From fa8a7bf15aca2cf8393d2e9cf8c7f599f3200271 Mon Sep 17 00:00:00 2001 From: moorembioinfo <6642283+moorembioinfo@users.noreply.github.com> Date: Mon, 27 Jun 2022 17:01:21 +0000 Subject: [PATCH 1/6] Add LocusName to SNP table --- lib/Modules/PanGenome/PanGenome.pm | 8 +++++--- t/genomes.batch | 10 ++++++++++ t/plasmids.batch | 14 ++++++++++++++ t/query.batch | 12 ++++++++++++ 4 files changed, 41 insertions(+), 3 deletions(-) create mode 100644 t/genomes.batch create mode 100644 t/plasmids.batch create mode 100644 t/query.batch diff --git a/lib/Modules/PanGenome/PanGenome.pm b/lib/Modules/PanGenome/PanGenome.pm index a95a3a9..c966b07 100755 --- a/lib/Modules/PanGenome/PanGenome.pm +++ b/lib/Modules/PanGenome/PanGenome.pm @@ -714,7 +714,7 @@ sub _printResults{ foreach my $snpId(sort keys %{$finalResult->{genomeResults}->{$genome}->{snp}}){ $snpStringHash{$snpId}->{$genomeCounter} = $finalResult->{genomeResults}->{$genome}->{snp}->{$snpId}->{value}; - + $snpStringHash{$snpId}->{'loci'} = $finalResult->{locusInformation}->{name}; $self->_printCoreSnpData( snpId=>$snpId ,locusName=>$finalResult->{locusInformation}->{name} @@ -846,9 +846,10 @@ sub _printSnpTable{ foreach my $id(sort keys %{$snpStringHash}){ my $printLine = "\n"; foreach my $genome(1..scalar(@{$self->settings->orderedGenomeNames})){ - $printLine .= ("\t" . $snpStringHash->{$id}->{$genome}) + $printLine .= ("\t" . $snpStringHash->{$id}->{$genome}) #. $snpStringHash->{$id}->{$genome}) } - push @output, $printLine; + my $snploci = $printLine . "\t" . $snpStringHash->{$id}->{'loci'}; + push @output, $snploci; #, $printLine; } $self->_printFH->{snpTableFH}->print(@output); } @@ -1158,6 +1159,7 @@ sub _getCoreResult { return $snpDataArrayRef; } + 1; diff --git a/t/genomes.batch b/t/genomes.batch new file mode 100644 index 0000000..1a6e6c4 --- /dev/null +++ b/t/genomes.batch @@ -0,0 +1,10 @@ +queryDirectory t/data/genomes/ +percentIdentityCutoff 90 +coreGenomeThreshold 3 +minimumNovelRegionSize 1000 +fragmentationSize 1000 +overwrite 1 +nameOrId name +baseDirectory t/genomes/ +numberOfCores 1 +runMode pan diff --git a/t/plasmids.batch b/t/plasmids.batch new file mode 100644 index 0000000..56c02e0 --- /dev/null +++ b/t/plasmids.batch @@ -0,0 +1,14 @@ +percentIdentityCutoff 90 +queryDirectory t/data/plasmids/ +coreGenomeThreshold 2 +cdhitDirectory /mnt/gvl/apps/linuxbrew/bin/ +minimumNovelRegionSize 500 +fragmentationSize 500 +allelesToKeep 2 +nameOrId name +overwrite 1 +runMode pan +storeAlleles 1 +numberOfCores 1 +cdhit 1 +baseDirectory t/plasmids/ diff --git a/t/query.batch b/t/query.batch new file mode 100644 index 0000000..4e2a125 --- /dev/null +++ b/t/query.batch @@ -0,0 +1,12 @@ +storeAlleles 1 +runMode pan +queryFile t/data/testfragments.fasta +baseDirectory t/query/ +numberOfCores 1 +overwrite 1 +nameOrId name +minimumNovelRegionSize 1 +fragmentationSize 0 +coreGenomeThreshold 2 +percentIdentityCutoff 90 +queryDirectory t/data/genomes/ From 88b8f70748d50d411a964505470c726eacb7abc1 Mon Sep 17 00:00:00 2001 From: moorembioinfo <6642283+moorembioinfo@users.noreply.github.com> Date: Tue, 28 Jun 2022 11:30:27 +0100 Subject: [PATCH 2/6] Update README.md --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index ab440fa..be9bec3 100755 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ [![Master branch build status](https://travis-ci.org/chadlaing/Panseq.svg?branch=master "Master Build Status")](https://travis-ci.org/chadlaing/Panseq) +# Panseq-LC: fork of Panseq, with output modified to include LocusName in SNP table ## OVERVIEW From eb5856ebff6fd1967524ab3b519501cc101f4ea3 Mon Sep 17 00:00:00 2001 From: moorembioinfo <6642283+moorembioinfo@users.noreply.github.com> Date: Sun, 10 Jul 2022 16:59:53 +0000 Subject: [PATCH 3/6] Dummy value for inclusion of ref startbp attempt --- lib/Modules/PanGenome/PanGenome.pm | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/lib/Modules/PanGenome/PanGenome.pm b/lib/Modules/PanGenome/PanGenome.pm index c966b07..5f09f69 100755 --- a/lib/Modules/PanGenome/PanGenome.pm +++ b/lib/Modules/PanGenome/PanGenome.pm @@ -845,10 +845,15 @@ sub _printSnpTable{ my @output; foreach my $id(sort keys %{$snpStringHash}){ my $printLine = "\n"; + my $startbp = ''; #get ref start ID + foreach my $genome(1..scalar(@{$self->settings->orderedGenomeNames})){ - $printLine .= ("\t" . $snpStringHash->{$id}->{$genome}) #. $snpStringHash->{$id}->{$genome}) + $printLine .= ("\t" . $snpStringHash->{$id}->{$genome}); + my $lociname = $snpStringHash->{$id}->{'loci'}; + my $refgenome = (split /|/, $lociname)[1]; + $startbp = 150 if $genome==$refgenome; } - my $snploci = $printLine . "\t" . $snpStringHash->{$id}->{'loci'}; + my $snploci = $printLine . "\t" . $snpStringHash->{$id}->{'loci'} . "\t" . $startbp; push @output, $snploci; #, $printLine; } $self->_printFH->{snpTableFH}->print(@output); From 6ed8afe7622586390ba228bbe6792d6b270f964e Mon Sep 17 00:00:00 2001 From: moorembioinfo <6642283+moorembioinfo@users.noreply.github.com> Date: Mon, 11 Jul 2022 10:56:02 +0000 Subject: [PATCH 4/6] output snp position but of final genome in order --- lib/Modules/PanGenome/PanGenome.pm | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/Modules/PanGenome/PanGenome.pm b/lib/Modules/PanGenome/PanGenome.pm index 5f09f69..d4c20d7 100755 --- a/lib/Modules/PanGenome/PanGenome.pm +++ b/lib/Modules/PanGenome/PanGenome.pm @@ -722,6 +722,7 @@ sub _printResults{ ,snpChar=>$finalResult->{genomeResults}->{$genome}->{snp}->{$snpId}->{value} ,startBp=>$finalResult->{genomeResults}->{$genome}->{snp}->{$snpId}->{start_bp} ,contigId=>$finalResult->{genomeResults}->{$genome}->{binary}->[0]->{contig_id} + ,$snpStringHash{$snpId}->{'snp_position'} = $finalResult->{genomeResults}->{$genome}->{snp}->{$snpId}->{start_bp} ); } @@ -841,7 +842,7 @@ sub _printCoreSnpData{ sub _printSnpTable{ my $self=shift; my $snpStringHash = shift; - + #my $finalResult = shift; my @output; foreach my $id(sort keys %{$snpStringHash}){ my $printLine = "\n"; @@ -851,7 +852,10 @@ sub _printSnpTable{ $printLine .= ("\t" . $snpStringHash->{$id}->{$genome}); my $lociname = $snpStringHash->{$id}->{'loci'}; my $refgenome = (split /|/, $lociname)[1]; - $startbp = 150 if $genome==$refgenome; + $startbp = $snpStringHash->{$id}->{'snp_position'} if $snpStringHash->{$id}->{$genome}==$refgenome; + #print "$snpStringHash->{$id}" + #print $params->{$id}->{$startBp} + #print"$finalResult->{$genome}->{$id}->{$start_bp}"; } my $snploci = $printLine . "\t" . $snpStringHash->{$id}->{'loci'} . "\t" . $startbp; push @output, $snploci; #, $printLine; From 284a3fb9716c64db6ddf38603799988ae905ef70 Mon Sep 17 00:00:00 2001 From: moorembioinfo <6642283+moorembioinfo@users.noreply.github.com> Date: Mon, 11 Jul 2022 15:43:21 +0000 Subject: [PATCH 5/6] LocusName and snp_position from reference now output to snp_table.txt for large core_snps.txt memory workaround --- lib/Modules/PanGenome/PanGenome.pm | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/lib/Modules/PanGenome/PanGenome.pm b/lib/Modules/PanGenome/PanGenome.pm index d4c20d7..ffd1887 100755 --- a/lib/Modules/PanGenome/PanGenome.pm +++ b/lib/Modules/PanGenome/PanGenome.pm @@ -722,8 +722,19 @@ sub _printResults{ ,snpChar=>$finalResult->{genomeResults}->{$genome}->{snp}->{$snpId}->{value} ,startBp=>$finalResult->{genomeResults}->{$genome}->{snp}->{$snpId}->{start_bp} ,contigId=>$finalResult->{genomeResults}->{$genome}->{binary}->[0]->{contig_id} - ,$snpStringHash{$snpId}->{'snp_position'} = $finalResult->{genomeResults}->{$genome}->{snp}->{$snpId}->{start_bp} + #,$lname= $finalResult->{locusInformation}->{name}; + #,$refgenome = (split /|/, $lname)[1]; + #,$snpStringHash{$snpId}->{'snp_position'} = $finalResult->{genomeResults}->{$genome}->{snp}->{$snpId}->{start_bp} #if $genome==$refgenome; ); + my $lname = "" . $snpStringHash{$snpId}->{'loci'}; + #print"$lname\n"; + #my $delim = '|'; + my @refgenome = (split(/[|]/, $lname)); + my $refgenome_ = $refgenome[1]; + #print"@refgenome\n"; + #print "$refgenome_"; + #print"\n"; + $snpStringHash{$snpId}->{'snp_position'} = $finalResult->{genomeResults}->{$refgenome_}->{snp}->{$snpId}->{start_bp}; #if $refgenome==$genome; } #if there are SNPs in a region, add dashes to the snp string @@ -852,7 +863,7 @@ sub _printSnpTable{ $printLine .= ("\t" . $snpStringHash->{$id}->{$genome}); my $lociname = $snpStringHash->{$id}->{'loci'}; my $refgenome = (split /|/, $lociname)[1]; - $startbp = $snpStringHash->{$id}->{'snp_position'} if $snpStringHash->{$id}->{$genome}==$refgenome; + $startbp = $snpStringHash->{$id}->{'snp_position'}; #if $snpStringHash->{$id}->{$genome}==$refgenome; #print "$snpStringHash->{$id}" #print $params->{$id}->{$startBp} #print"$finalResult->{$genome}->{$id}->{$start_bp}"; From 45ffd65197f349cce4fb206afa83db0230194fca Mon Sep 17 00:00:00 2001 From: moorembioinfo <6642283+moorembioinfo@users.noreply.github.com> Date: Mon, 11 Jul 2022 15:59:23 +0000 Subject: [PATCH 6/6] Cleanup --- lib/Modules/PanGenome/PanGenome.pm | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/lib/Modules/PanGenome/PanGenome.pm b/lib/Modules/PanGenome/PanGenome.pm index ffd1887..9efc744 100755 --- a/lib/Modules/PanGenome/PanGenome.pm +++ b/lib/Modules/PanGenome/PanGenome.pm @@ -722,18 +722,10 @@ sub _printResults{ ,snpChar=>$finalResult->{genomeResults}->{$genome}->{snp}->{$snpId}->{value} ,startBp=>$finalResult->{genomeResults}->{$genome}->{snp}->{$snpId}->{start_bp} ,contigId=>$finalResult->{genomeResults}->{$genome}->{binary}->[0]->{contig_id} - #,$lname= $finalResult->{locusInformation}->{name}; - #,$refgenome = (split /|/, $lname)[1]; - #,$snpStringHash{$snpId}->{'snp_position'} = $finalResult->{genomeResults}->{$genome}->{snp}->{$snpId}->{start_bp} #if $genome==$refgenome; ); my $lname = "" . $snpStringHash{$snpId}->{'loci'}; - #print"$lname\n"; - #my $delim = '|'; my @refgenome = (split(/[|]/, $lname)); my $refgenome_ = $refgenome[1]; - #print"@refgenome\n"; - #print "$refgenome_"; - #print"\n"; $snpStringHash{$snpId}->{'snp_position'} = $finalResult->{genomeResults}->{$refgenome_}->{snp}->{$snpId}->{start_bp}; #if $refgenome==$genome; } @@ -863,13 +855,10 @@ sub _printSnpTable{ $printLine .= ("\t" . $snpStringHash->{$id}->{$genome}); my $lociname = $snpStringHash->{$id}->{'loci'}; my $refgenome = (split /|/, $lociname)[1]; - $startbp = $snpStringHash->{$id}->{'snp_position'}; #if $snpStringHash->{$id}->{$genome}==$refgenome; - #print "$snpStringHash->{$id}" - #print $params->{$id}->{$startBp} - #print"$finalResult->{$genome}->{$id}->{$start_bp}"; + $startbp = $snpStringHash->{$id}->{'snp_position'}; } my $snploci = $printLine . "\t" . $snpStringHash->{$id}->{'loci'} . "\t" . $startbp; - push @output, $snploci; #, $printLine; + push @output, $snploci; } $self->_printFH->{snpTableFH}->print(@output); }