Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 27 additions & 9 deletions CPU/juicer.sh
Original file line number Diff line number Diff line change
Expand Up @@ -106,9 +106,11 @@ singleend=0
sampleName="HiC_sample"
# library name for RG tag
libraryName="HiC_library"
# samtools sort memory per thread
memory="1G"

## Read arguments
usageHelp="Usage: ${0##*/} [-g genomeID] [-d topDir] [-s site]\n [-a about] [-S stage] [-p chrom.sizes path]\n [-y restriction site file] [-z reference genome file]\n [-D Juicer scripts parent dir] [-b ligation] [-t threads]\n [-T threadsHic] [-i sample] [-k library] [-w wobble]\n [-e] [-h] [-f] [-j] [-u] [-m] [--assembly] [--cleanup] [--qc]"
usageHelp="Usage: ${0##*/} [-g genomeID] [-d topDir] [-s site]\n [-a about] [-S stage] [-p chrom.sizes path]\n [-y restriction site file] [-z reference genome file]\n [-D Juicer scripts parent dir] [-b ligation] [-t threads] [--mem memory]\n [-T threadsHic] [-i sample] [-k library] [-w wobble]\n [-e] [-h] [-f] [-j] [-u] [-m] [--assembly] [--cleanup] [--qc]"
genomeHelp="* [genomeID] must be defined in the script, e.g. \"hg19\" or \"mm10\" (default \n \"$genomeID\"); alternatively, it can be defined using the -z command"
dirHelp="* [topDir] is the top level directory (default\n \"$topDir\")\n [topDir]/fastq must contain the fastq files\n [topDir]/splits will be created to contain the temporary split files\n [topDir]/aligned will be created for the final alignment"
siteHelp="* [site] must be defined in the script, e.g. \"HindIII\" or \"MboI\" \n (default \"$site\")"
Expand All @@ -119,7 +121,8 @@ siteFileHelp="* [restriction site file]: enter path for restriction site file (l
scriptDirHelp="* [Juicer scripts directory]: set the Juicer directory,\n which should have scripts/ references/ and restriction_sites/ underneath it\n (default ${juiceDir})"
refSeqHelp="* [reference genome file]: enter path for reference sequence file, BWA index\n files must be in same directory"
ligationHelp="* [ligation junction]: use this string when counting ligation junctions"
threadsHelp="* [threads]: number of threads when running BWA alignment"
threadsHelp="* [threads]: number of threads when running BWA alignment and samtools view/sort/merge"
memoryHelp="* [memory]: maximum memory per thread when running samtools sort (default 1G).\n Suffix K/M/G recognized, gigabyte (G) is used if suffix is not provided"
threadsHicHelp="* [threads for hic file creation]: number of threads when building hic file"
sampleHelp="* [sample name]: will be added to the SM portion of the read group (RG) tag"
libraryHelp="* [library name]: will be added to the LB portion of the read group (RG) tag"
Expand Down Expand Up @@ -149,6 +152,7 @@ printHelpAndExit() {
echo -e "$scriptDirHelp"
echo -e "$ligationHelp"
echo -e "$threadsHelp"
echo -e "$memoryHelp"
echo -e "$threadsHicHelp"
echo -e "$sampleHelp"
echo -e "$libraryHelp"
Expand Down Expand Up @@ -192,7 +196,8 @@ while getopts "d:g:a:hs:p:y:z:S:D:b:t:jfuecT:1:2:i:-:w:k:m" opt; do
m) methylation=1 ;;
1) read1files=$OPTARG ;;
2) read2files=$OPTARG ;;
-) case "${OPTARG}" in
-) case "${OPTARG}" in
mem) memory=$2 ;;
assembly) earlyexit=1; assembly=1 ;;
cleanup) cleanup=1 ;;
qc) qc=1 ;;
Expand Down Expand Up @@ -311,7 +316,7 @@ then
echo "Using $site_file as site file"
fi

## Set threads for sending appropriate parameters to cluster and string for BWA call
## Set threads for sending appropriate parameters to cluster and string for BWA and samtools calls
if [ -z "$threads" ]
then
threads="$(getconf _NPROCESSORS_ONLN)"
Expand All @@ -322,6 +327,19 @@ else
sthreadstring="-@ $threads"
fi

## Set memory for sending appropriate parameters to cluster and string for samtools sort calls
if [[ $memory =~ ^[0-9]+$ ]]
then
smemorystring="-m ${memory}G"
elif [[ $memory =~ ^[0-9]+[KMG]$ ]]
then
smemorystring="-m ${memory}"
else
echo "Memory unit suffix not recognised. Use default setting: 1G"
echo $memory
smemorystring="-m 1G"
fi

if [ -n "$read2files" ] && [ -z "$read1files" ]
then
echo "***! When fastqs for read2 are specified with \"-2\", corresponding read1 fastqs must be specified with \"-1\" "
Expand Down Expand Up @@ -542,17 +560,17 @@ then
then
if [ $singleend -eq 1 ]
then
awk -v stem=${name}${ext}_norm -v site_file=$site_file -v singleend=$singleend -f $juiceDir/scripts/common/chimeric_sam.awk $name$ext.sam | samtools sort -t cb -n $sthreadstring > ${name}${ext}.bam
awk -v stem=${name}${ext}_norm -v site_file=$site_file -v singleend=$singleend -f $juiceDir/scripts/common/chimeric_sam.awk $name$ext.sam | samtools sort -t cb -n $sthreadstring $smemorystring > ${name}${ext}.bam
else
awk -v stem=${name}${ext}_norm -v site_file=$site_file -f $juiceDir/scripts/common/chimeric_sam.awk $name$ext.sam | samtools sort -t cb -n $sthreadstring > ${name}${ext}.bam
awk -v stem=${name}${ext}_norm -v site_file=$site_file -f $juiceDir/scripts/common/chimeric_sam.awk $name$ext.sam | samtools sort -t cb -n $sthreadstring $smemorystring > ${name}${ext}.bam
fi
else
if [ $singleend -eq 1 ]
then
awk -v stem=${name}${ext}_norm -v singleend=$singleend -f $juiceDir/scripts/common/chimeric_sam.awk $name$ext.sam | samtools sort -t cb -n $sthreadstring > ${name}${ext}.bam
awk -v stem=${name}${ext}_norm -v singleend=$singleend -f $juiceDir/scripts/common/chimeric_sam.awk $name$ext.sam | samtools sort -t cb -n $sthreadstring $smemorystring > ${name}${ext}.bam
else
awk -v stem=${name}${ext}_norm -f $juiceDir/scripts/common/chimeric_sam.awk $name$ext.sam > $name$ext.sam2
awk -v avgInsertFile=${name}${ext}_norm.txt.res.txt -f $juiceDir/scripts/common/adjust_insert_size.awk $name$ext.sam2 | samtools sort -t cb -n $sthreadstring > ${name}${ext}.bam
awk -v avgInsertFile=${name}${ext}_norm.txt.res.txt -f $juiceDir/scripts/common/adjust_insert_size.awk $name$ext.sam2 | samtools sort -t cb -n $sthreadstring $smemorystring > ${name}${ext}.bam
fi
fi

Expand Down Expand Up @@ -679,7 +697,7 @@ if [ -z $postproc ]
if [ "$methylation" = 1 ]
then
$load_methyl
samtools sort $sthreadstring ${outputdir}/merged_dedup.bam > ${outputdir}/merged_dedup_sort.bam
samtools sort $sthreadstring $smemorystring ${outputdir}/merged_dedup.bam > ${outputdir}/merged_dedup_sort.bam
$call_methyl extract -F 1024 --keepSingleton --keepDiscordant $refSeq ${outputdir}/merged_dedup_sort.bam
$call_methyl extract -F 1024 --keepSingleton --keepDiscordant --cytosine_report --CHH --CHG $refSeq ${outputdir}/merged_dedup_sort.bam
${juiceDir}/scripts/common/conversion.sh ${outputdir}/merged_dedup_sort.cytosine_report.txt > ${outputdir}/conversion_report.txt
Expand Down
32 changes: 27 additions & 5 deletions CPU/mega_from_bams_diploid.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ USAGE="
*****************************************************
Simplified mega script for ENCODE DCC hic-pipeline.

USAGE: ./mega.sh -c|--chrom-sizes <path_to_chrom_sizes_file> [-g|--genome-id genome_id] [-r|--resolutions resolutions_string] [-v|--vcf <path_to_vcf>] [-C|--exclude-chr-from-diploid chr_list] [--separate-homologs] [-p|--psf <path_to_psf>] [--reads-to-homologs <path_to_reads_to_homologs_file>] [--juicer-dir <path_to_juicer_dir>] [--phaser-dir <path_to_phaser_dir>] [-t|--threads thread_count] [-T|--threads-hic hic_thread_count] [--from-stage stage] [--to-stage stage] <path_to_merged_dedup_bam_1> ... <path_to_merged_dedup_bam_N>
USAGE: ./mega.sh -c|--chrom-sizes <path_to_chrom_sizes_file> [-g|--genome-id genome_id] [-r|--resolutions resolutions_string] [-v|--vcf <path_to_vcf>] [-C|--exclude-chr-from-diploid chr_list] [--separate-homologs] [-p|--psf <path_to_psf>] [--reads-to-homologs <path_to_reads_to_homologs_file>] [--juicer-dir <path_to_juicer_dir>] [--phaser-dir <path_to_phaser_dir>] [-t|--threads thread_count] [--mem memory] [-T|--threads-hic hic_thread_count] [--from-stage stage] [--to-stage stage] <path_to_merged_dedup_bam_1> ... <path_to_merged_dedup_bam_N>

DESCRIPTION:
This is a simplied mega.sh script to produce aggregate Hi-C maps and chromatic accessibility tracks from multiple experiments. The pipeline includes optional diploid steps which produce diploid versions of the contact maps and chromatin accessibility tracks.
Expand Down Expand Up @@ -58,6 +58,10 @@ WORKFLOW CONTROL:
-t|--threads [num]
Indicate how many threads to use. Default: half of available cores as calculated by parallel --number-of-cores.

--mem [num]
Indicate the maximum memory per thread when running samtools sort (default 1G). Suffix K/M/G recognized, gigabyte (G) is used if suffix is not provided.


-T|--threads-hic [num]
Indicate how many threads to use when generating the Hi-C file. Default: 24.

Expand All @@ -81,9 +85,10 @@ resolutionsToBuildString="-r 2500000,1000000,500000,250000,100000,50000,25000,10
exclude_chr="Y|chrY|MT|chrM"
separate_homologs=false
mapq=1 ##lowest mapq of interest
memory="1G" # samtools sort memory per thread

# multithreading
threads=`parallel --number-of-cores`
threads="$(getconf _NPROCESSORS_ONLN)"
threads=$((threads/2))
# adjust for mem usage
tmp=`awk '/MemTotal/ {threads=int($2/1024/1024/2/6-1)}END{print threads+0}' /proc/meminfo 2>/dev/null`
Expand Down Expand Up @@ -190,6 +195,10 @@ while :; do
fi
shift
;;
--mem) OPTARG=$2
memory=$OPTARG
shift
;;
-T|--threads-hic) OPTARG=$2
re='^[0-9]+$'
if [[ $OPTARG =~ $re ]]; then
Expand Down Expand Up @@ -266,6 +275,19 @@ fi

[ -z $genome_id ] && { echo >&2 ":| Warning: no genome_id is provided. Please provide a genome_id if using one of the common references to be able to run the motif finder. Ignoring motif finder!"; }

## Set memory for sending appropriate parameters to cluster and string for samtools sort calls
if [[ $memory =~ ^[0-9]+$ ]]
then
smemorystring="-m ${memory}G"
elif [[ $memory =~ ^[0-9]+[KMG]$ ]]
then
smemorystring="-m ${memory}"
else
echo "Memory unit suffix not recognised. Use default setting: 1G"
echo $memory
smemorystring="-m 1G"
fi

############### HANDLE DEPENDENCIES ###############

## Juicer & Phaser
Expand Down Expand Up @@ -306,7 +328,7 @@ if [ "$first_stage" == "prep" ]; then
samtools merge --no-PG -f mega_header.bam ${header_list}
rm ${header_list}

samtools cat -@ $((threads * 2)) -h mega_header.bam $bam | samtools view -u -d "rt:0" -d "rt:1" -d "rt:2" -d "rt:3" -d "rt:4" -d "rt:5" -@ $((threads * 2)) -F 0x400 -q $mapq - | samtools sort -@ $threads -m 6G -o reads.sorted.bam
samtools cat -@ $((threads * 2)) -h mega_header.bam $bam | samtools view -u -d "rt:0" -d "rt:1" -d "rt:2" -d "rt:3" -d "rt:4" -d "rt:5" -@ $((threads * 2)) -F 0x400 -q $mapq - | samtools sort -@ $threads $smemorystring -o reads.sorted.bam
[ `echo "${PIPESTATUS[@]}" | tr -s ' ' + | bc` -eq 0 ] || { echo ":( Pipeline failed at bam sorting. See stderr for more info. Exiting!" | tee -a /dev/stderr && exit 1; }
rm mega_header.bam

Expand Down Expand Up @@ -522,7 +544,7 @@ if [ "$first_stage" == "diploid_hic" ]; then
export pipeline=${phaser_dir}
export reads_to_homologs=$reads_to_homologs
doit () {
samtools view -@ 2 -h reads.sorted.bam $1 | awk -v chr=$1 'BEGIN{OFS="\t"}FILENAME==ARGV[1]{if($2==chr"-r"||$2==chr"-a"){if(keep[$1]&&keep[$1]!=$2){delete keep[$1]}else{keep[$1]=$2}};next}$0~/^@SQ/{$2=$2"-r"; print; $2=substr($2,1,length($2)-2)"-a";print;next}$0~/^@/{print;next}($1 in keep)&&($7=="="||$7=="*"){$3=keep[$1];print}' $reads_to_homologs - | samtools sort -n -m 1G -O sam | awk '$0~/^@/{next}($1!=prev){if(n==2){sub("\t","",str); print str}; str=""; n=0}{for(i=12;i<=NF;i++){if($i~/^ip:i:/){$4=substr($i,6);break;}};str=str"\t"n"\t"$3"\t"$4"\t"n; n++; prev=$1}END{if(n==2){sub("\t","",str); print str}}' | sort -k 2,2 -S 6G
samtools view -@ 2 -h reads.sorted.bam $1 | awk -v chr=$1 'BEGIN{OFS="\t"}FILENAME==ARGV[1]{if($2==chr"-r"||$2==chr"-a"){if(keep[$1]&&keep[$1]!=$2){delete keep[$1]}else{keep[$1]=$2}};next}$0~/^@SQ/{$2=$2"-r"; print; $2=substr($2,1,length($2)-2)"-a";print;next}$0~/^@/{print;next}($1 in keep)&&($7=="="||$7=="*"){$3=keep[$1];print}' $reads_to_homologs - | samtools sort -@ 2 -n $smemorystring -O sam | awk '$0~/^@/{next}($1!=prev){if(n==2){sub("\t","",str); print str}; str=""; n=0}{for(i=12;i<=NF;i++){if($i~/^ip:i:/){$4=substr($i,6);break;}};str=str"\t"n"\t"$3"\t"$4"\t"n; n++; prev=$1}END{if(n==2){sub("\t","",str); print str}}' | sort -k 2,2 -S 6G

}
export -f doit
Expand Down Expand Up @@ -576,7 +598,7 @@ if [ "$first_stage" == "diploid_dhs" ]; then
export junction_rt_string=${junction_rt_string}
export reads_to_homologs=${reads_to_homologs}
doit () {
samtools view -@2 ${junction_rt_string} -h reads.sorted.bam $1 | awk -v chr=$1 'BEGIN{
samtools view -@ 2 ${junction_rt_string} -h reads.sorted.bam $1 | awk -v chr=$1 'BEGIN{
OFS="\t"}FILENAME==ARGV[1]{
if($2==chr"-r"||$2==chr"-a"){
if(keep[$1]&&keep[$1]!=$2){
Expand Down
13 changes: 11 additions & 2 deletions PBS/scripts/juicer.sh
Original file line number Diff line number Diff line change
Expand Up @@ -621,13 +621,22 @@ ALGNR1
date +"%Y-%m-%d %H:%M:%S"
$load_samtools
export LC_ALL=C

# Set memory for samtools sort (maximum memory is set above with 'mem=24gb')
smemory=$(echo 24 / $threads | bc)
if [ "$smemory" -le 1 ]
then
smemory=1
fi
smemorystring="-m ${smemory}G"

# call chimeric_blacklist.awk to deal with chimeric reads; sorted file is sorted by read name at this point
if [ "$site" != "none" ] && [ -e "$site_file" ]
then
awk -v stem=${name}${ext}_norm -v site_file=$site_file -f $juiceDir/scripts/chimeric_sam.awk $name$ext.sam | samtools sort -t cb -n $sthreadstring > ${name}${ext}.bam
awk -v stem=${name}${ext}_norm -v site_file=$site_file -f $juiceDir/scripts/chimeric_sam.awk $name$ext.sam | samtools sort -t cb -n $sthreadstring $smemorystring > ${name}${ext}.bam
else
awk -v stem=${name}${ext}_norm -f $juiceDir/scripts/chimeric_sam.awk $name$ext.sam > $name$ext.sam2
awk -v avgInsertFile=${name}${ext}_norm.txt.res.txt -f $juiceDir/scripts/adjust_insert_size.awk $name$ext.sam2 | samtools sort -t cb -n $sthreadstring > ${name}${ext}.bam
awk -v avgInsertFile=${name}${ext}_norm.txt.res.txt -f $juiceDir/scripts/adjust_insert_size.awk $name$ext.sam2 | samtools sort -t cb -n $sthreadstring $smemorystring > ${name}${ext}.bam
fi
if [ \$? -ne 0 ]
then
Expand Down
13 changes: 11 additions & 2 deletions PBS_without_launch/scripts/juicer.sh
Original file line number Diff line number Diff line change
Expand Up @@ -608,13 +608,22 @@ ALGNR1
date +"%Y-%m-%d %H:%M:%S"
$load_samtools
export LC_ALL=C

# Set memory for samtools sort (maximum memory is set above with 'mem=24gb')
smemory=$(echo 24 / $threads | bc)
if [ "$smemory" -le 1 ]
then
smemory=1
fi
smemorystring="-m ${smemory}G"

# call chimeric_blacklist.awk to deal with chimeric reads; sorted file is sorted by read name at this point
if [ "$site" != "none" ] && [ -e "$site_file" ]
then
awk -v stem=${name}${ext}_norm -v site_file=$site_file -f $juiceDir/scripts/chimeric_sam.awk $name$ext.sam | samtools sort -t cb -n $sthreadstring > ${name}${ext}.bam
awk -v stem=${name}${ext}_norm -v site_file=$site_file -f $juiceDir/scripts/chimeric_sam.awk $name$ext.sam | samtools sort -t cb -n $sthreadstring $smemorystring > ${name}${ext}.bam
else
awk -v stem=${name}${ext}_norm -f $juiceDir/scripts/chimeric_sam.awk $name$ext.sam > $name$ext.sam2
awk -v avgInsertFile=${name}${ext}_norm.txt.res.txt -f $juiceDir/scripts/adjust_insert_size.awk $name$ext.sam2 | samtools sort -t cb -n $sthreadstring > ${name}${ext}.bam
awk -v avgInsertFile=${name}${ext}_norm.txt.res.txt -f $juiceDir/scripts/adjust_insert_size.awk $name$ext.sam2 | samtools sort -t cb -n $sthreadstring $smemorystring > ${name}${ext}.bam
fi
if [ $? -ne 0 ]
then
Expand Down
4 changes: 2 additions & 2 deletions SLURM/scripts/juicer.sh
Original file line number Diff line number Diff line change
Expand Up @@ -964,7 +964,7 @@ MRGALL3`
$userstring
${load_samtools}
#we should probably set the -m based on memory / num of threads
if time samtools sort -t cb -n -O SAM -@ $sortthreads -l 0 -m 2G $name$ext.sam3 > ${name}${ext}.sam
if time samtools sort -t cb -n -O SAM $sthreadstring -l 0 -m 2G $name$ext.sam3 > ${name}${ext}.sam
then
rm -f $name$ext.sam2 $name$ext.sam3
touch $touchfile
Expand Down Expand Up @@ -1326,7 +1326,7 @@ BAMRM`
$userstring
${load_samtools}
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/gpfs0/home/neva/lib
samtools sort $sthreadstring ${outputdir}/merged_dedup.bam > ${outputdir}/merged_dedup_sort.bam
samtools sort $sthreadstring -m 10G ${outputdir}/merged_dedup.bam > ${outputdir}/merged_dedup_sort.bam
/gpfs0/home/neva/bin/MethylDackel extract -F 1024 --keepSingleton --keepDiscordant $refSeq ${outputdir}/merged_dedup_sort.bam
/gpfs0/home/neva/bin/MethylDackel extract -F 1024 --keepSingleton --keepDiscordant --cytosine_report --CHH --CHG $refSeq ${outputdir}/merged_dedup_sort.bam
${juiceDir}/scripts/conversion.sh ${outputdir}/merged_dedup_sort.cytosine_report.txt > ${outputdir}/conversion_report.txt
Expand Down