diff --git a/DB/blastDB.nhr b/DB/blastDB.nhr new file mode 100644 index 0000000..15d8333 Binary files /dev/null and b/DB/blastDB.nhr differ diff --git a/DB/blastDB.nin b/DB/blastDB.nin new file mode 100644 index 0000000..f3e3fbb Binary files /dev/null and b/DB/blastDB.nin differ diff --git a/DB/blastDB.nsq b/DB/blastDB.nsq new file mode 100644 index 0000000..e3a56cd Binary files /dev/null and b/DB/blastDB.nsq differ diff --git a/dirlisting b/dirlisting new file mode 100644 index 0000000..db7eeb0 --- /dev/null +++ b/dirlisting @@ -0,0 +1,8 @@ +DB +README.md +dirlisting +input.fasta +main.nf +nextflow.config +out_dir +test diff --git a/input.fasta b/input.fasta new file mode 100644 index 0000000..fdc0c85 --- /dev/null +++ b/input.fasta @@ -0,0 +1,10 @@ +>Scaffold_1_1..100 +CAGGCAAAATGTGGCACAAAAACAACAAATTGTTTAGTAGATACAGGGGCATCCATTTGTTGTATTTCGTCTGCTTTTCTGAGCACAGCTTTTGAAAACC +>Scaffold_1_101..200 +TTACTCTTGGAAACTCACCCTTTCCACAGGTAAAAGGTGTTGGCGGCGAATTGCATAAAGTGTTAGGTTCAGTTGTGTTAGATTTTGTCATTGAGGATCA +>Scaffold_1_201..300 +GGAATTTTCTCAAAGATTCTATGTACTGCCTACACTGCCGAAGGCAGTGATACTAGGTGAGAACTTCCTTAATGACAATGATGCAGTCTTAGATTATAGC +>Scaffold_1_301..400 +TGTCATTCCTTGATACTCAACAACAGCACCTCAGATAGGCAATATATCAATTTCATAGCCAATTCAGTGCATGAGATTAGTGGATTAGCAAAAACACTAG +>Scaffold_1_401..500 +ATCAGATTTACATCCCCCCTCAGAGTGAAATTCATTTCAAGGTCAGACTATCAGAGACCAAAGAGGATTCCCTCATCCTCATTGAACCCATTGCTTCCCT diff --git a/main.nf b/main.nf index 7f65404..31a169b 100644 --- a/main.nf +++ b/main.nf @@ -1,6 +1,56 @@ #! /usr/bin/env nextflow -blastdb="myBlastDatabase" -params.query="file.fasta" +println "\nI want to BLAST $params.query to $params.dbDir/$params.dbName using $params.threads CPUs and output it to $params.outdir" + +def helpMessage() { + log.info """ + Usage: + The typical command for running the pipeline is as follows: + nextflow run main.nf --query QUERY.fasta --dbDir "blastDatabaseDirectory" --dbName "blastPrefixName" + + Mandatory arguments: + --query Query fasta file of sequences you wish to BLAST + --dbDir BLAST database directory (full path required) + --dbName Prefix name of the BLAST database + + Optional arguments: + --outdir Output directory to place final BLAST output + --outfmt Output format ['6'] + --options Additional options for BLAST command [-evalue 1e-3] + --outFileName Prefix name for BLAST output [input.blastout] + --threads Number of CPUs to use during blast job [16] + --chunkSize Number of fasta records to use when splitting the query fasta file + --app BLAST program to use [blastn;blastp,tblastn,blastx] + --help This usage statement. + """ +} + +// Show help message +if (params.help) { + helpMessage() + exit 0 +} + +Channel + .fromPath(params.query) + .splitFasta(by: 1, file:true) + .set { queryFile_ch } + +process runBlast { + + input: + path(queryFile) from queryFile_ch + + output: + path(params.outFileName) into blast_output_ch + + script: + """ + blastn -num_threads $params.threads -db $params.dbDir/$params.dbName -query $queryFile -outfmt $params.outfmt $params.options -out $params.outFileName + """ + +} + +blast_output_ch + .collectFile(name: 'blast_output_combined.txt', storeDir: params.outdir) -println "I will BLAST $params.query against $blastdb" diff --git a/nextflow.config b/nextflow.config index e69de29..a2b7c7e 100644 --- a/nextflow.config +++ b/nextflow.config @@ -0,0 +1,11 @@ +params { + query = "$PWD/input.fasta" + dbDir = "$PWD/DB/" + dbName = "blastDB" + threads = 2 + outdir = "out_dir" + outFileName = "input.blastout" + options = "-evalue 1e-3" + outfmt = "'6'" + help = false +} \ No newline at end of file diff --git a/out_dir/blast_output_combined.txt b/out_dir/blast_output_combined.txt new file mode 100644 index 0000000..4fadc77 --- /dev/null +++ b/out_dir/blast_output_combined.txt @@ -0,0 +1,5 @@ +Scaffold_1_401..500 Scaffold_1_401..500 100.000 100 0 0 1 100 1 100 4.90e-52 185 +Scaffold_1_301..400 Scaffold_1_301..400 100.000 100 0 0 1 100 1 100 4.90e-52 185 +Scaffold_1_201..300 Scaffold_1_201..300 100.000 100 0 0 1 100 1 100 4.90e-52 185 +Scaffold_1_101..200 Scaffold_1_101..200 100.000 100 0 0 1 100 1 100 4.90e-52 185 +Scaffold_1_1..100 Scaffold_1_1..100 100.000 100 0 0 1 100 1 100 4.90e-52 185 diff --git a/out_dir/blastout/input.blastout b/out_dir/blastout/input.blastout new file mode 120000 index 0000000..8dc94f0 --- /dev/null +++ b/out_dir/blastout/input.blastout @@ -0,0 +1 @@ +/mnt/d/Work/nextflow_stuff/nftutorial/work/7f/c96665018e757eb6f4c0765d024062/input.blastout \ No newline at end of file diff --git a/test/test.txt b/test/test.txt new file mode 100644 index 0000000..610f2cb --- /dev/null +++ b/test/test.txt @@ -0,0 +1 @@ +Content of test.txt diff --git a/test/test2.txt b/test/test2.txt new file mode 100644 index 0000000..e69de29