version update for LRSDAY: v1.4.0 -> v1.5.0

yjx1217 · May 14, 2019 · 00a3519 · 00a3519
1 parent 5c35d3d
commit 00a3519
Show file tree

Hide file tree

Showing 59 changed files with 808 additions and 434 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,21 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 
 ## [Unreleased]
 
+## [1.5.0] - 2019-05-13
+### Added
+- Supports for native ONT nanopore basecalling, demultiplexing, and read profile plotting.
+- An lite-weight bash script for generating dotplot for any pairs of fasta files.
+### Changed
+- Better robustness for the dependency installation script. 
+- Reducing the requirements of setting output prefix by assuming the same prefix was used across different modules.
+- Setting adjustment for long-read filtering and downsampling.
+- Applying read filtering/downsampling for the testing example.
+- Software version updates for a number of dependencies.
+### Fixed
+- A bug that might lead to missing gene annotation in certain genomic region.
+- Typos in the installation script.
+- Typos in the manual.
+
 ## [1.4.0] - 2019-03-21
 ### Changed
 - Supports for multi-round assembly polishing using both long and short reads.

diff --git a/Example_Outputs/SK1.assembly.final.fa.gz b/Example_Outputs/SK1.assembly.final.fa.gz
diff --git a/Example_Outputs/SK1.assembly.final.filter.mummer2vcf.INDEL.vcf.gz b/Example_Outputs/SK1.assembly.final.filter.mummer2vcf.INDEL.vcf.gz
diff --git a/Example_Outputs/SK1.assembly.final.filter.mummer2vcf.SNP.vcf.gz b/Example_Outputs/SK1.assembly.final.filter.mummer2vcf.SNP.vcf.gz
diff --git a/Example_Outputs/SK1.assembly.final.filter.pdf b/Example_Outputs/SK1.assembly.final.filter.pdf
diff --git a/Example_Outputs/SK1.assembly.final.stats.txt b/Example_Outputs/SK1.assembly.final.stats.txt
@@ -1,17 +1,17 @@
 total sequence count: 34
-total sequence length: 12448003
-min sequence length: 1248
-max sequence length: 1480301
-mean sequence length: 366117.74
-median sequence length: 60826.50
-N50: 923676
+total sequence length: 12473902
+min sequence length: 12836
+max sequence length: 1480337
+mean sequence length: 366879.47
+median sequence length: 57378.00
+N50: 923760
 L50: 6
-N90: 341518
+N90: 328166
 L90: 14
-A%: 30.89
+A%: 30.88
 T%: 30.81
-G%: 19.14
-C%: 19.13
-AT%: 61.70
-GC%: 38.26
+G%: 19.12
+C%: 19.14
+AT%: 61.69
+GC%: 38.27
 N%: 0.04
diff --git a/Example_Outputs/SK1.final.cds.fa.gz b/Example_Outputs/SK1.final.cds.fa.gz
diff --git a/Example_Outputs/SK1.final.gff3.gz b/Example_Outputs/SK1.final.gff3.gz
diff --git a/Example_Outputs/SK1.final.manual_check.list b/Example_Outputs/SK1.final.manual_check.list
diff --git a/Example_Outputs/SK1.final.pep.fa.gz b/Example_Outputs/SK1.final.pep.fa.gz
diff --git a/Example_Outputs/SK1.final.trimmed_cds.fa.gz b/Example_Outputs/SK1.final.trimmed_cds.fa.gz
diff --git a/Manual.pdf b/Manual.pdf
diff --git a/Project_Template/00.Long_Reads/LRSDAY.00.Long_Reads_Preprocessing.sh b/Project_Template/00.Long_Reads/LRSDAY.00.Long_Reads_Preprocessing.sh
@@ -7,13 +7,13 @@ source ./../../env.sh
 #######################################
 # set project-specific variables
 
-prefix="YGL3210" # The file name prefix for the output files
-reads="./../00.Long_Reads/YGL3210.fq.gz" # The file path of the long reads file (in fastq or fastq.gz format).
-reads_type="nanopore-raw" # The long reads data type: "pacbio-raw" or "pacbio-corrected" or "nanopore-raw" or "nanopore-corrected".
-run_filtering="yes" # Whether to filter the reads: "yes" or "no". Default = "yes".
+prefix="SK1" # The file name prefix for the processing sample. Default = "SK1" for the testing example.
+reads="./../00.Long_Reads/$prefix.filtered_subreads.fastq.gz" # The file path of the long reads file (in fastq or fastq.gz format).
+reads_type="pacbio-raw" # The long reads data type: "pacbio-raw" or "pacbio-corrected" or "nanopore-raw" or "nanopore-corrected".
+run_filtering="yes" # Whether to filter and downsample the reads: "yes" or "no". Default = "yes".
 genome_size="12500000" # The haploid genome size (in bp) of sequenced organism. Default = "12500000" (i.e. 12.5 Mb for the budding yeast S. cereviaie genome). This is used to calculate targeted sequencing coverage after read filtering (see below). 
-post_filtering_coverage="40" # Targeted sequencing coverage after read filtering. Default = "40" (i.e. 40x coverage).
-threads=1 # The number of threads to use. Default = "1".
+post_filtering_coverage="60" # Targeted sequencing coverage after read filtering and downsampling. Default = "60" (i.e. 60x coverage).
+threads=4 # The number of threads to use. Default = "1".
 
 #######################################
 # process the pipeline

diff --git a/Project_Template/00.Long_Reads/LRSDAY.00.Nanopore_Reads_Basecalling_and_Demultiplexing.sh b/Project_Template/00.Long_Reads/LRSDAY.00.Nanopore_Reads_Basecalling_and_Demultiplexing.sh
@@ -0,0 +1,139 @@
+#!/bin/bash
+set -e -o pipefail
+#######################################
+# load environment variables for LRSDAY
+source ./../../env.sh
+
+#######################################
+# set project-specific variables
+project_name="Project_Example" # LRSDAY Project name. Default = "Project_Example".
+run_basecalling="yes" # Whether to perform basecalling: "yes" or "no". Default = "yes". 
+run_demultiplexing="yes" # Whether to perform demultiplexing: "yes" or "no". Default = "yes". 
+run_nanoplotting="yes" # Whether to perform nanoplotting: "yes" or "no". Default = "yes". 
+
+flowcell_id="FAKXXXXX" # The flowcell ID of the nanopore run. Default = "FAKXXXXX".
+flowcell_version="FLO-MIN106" # The flowcell version of the nanopore run. Default = "FLO-MIN106".
+sequencing_kit_version="SQK-LSK108" # The sequencing kit version of the nanopore run. Default = "SQK-LSK108".
+barcode_kit_version="EXP-NBD103" # The barcode kit version of the nanopore run. Default = "EXP-NBD103".
+
+raw_reads_directory="$LRSDAY_HOME/$project_name/00.Long_Reads/nanopore_raw_fast5_files" # The directory containing the raw nanopore reads before basecalling
+basecalling_output_directory="$LRSDAY_HOME/$project_name/00.Long_Reads/nanopore_basecalled_fast5_files" # The directory containing the basecalled nanopore reads. This directory will be automatically generated when running basecalling.
+threads=8 # The number of threads to use. Default = 8.
+
+#############################
+# normally no need to change the following
+qual=5 # read quality filter for guppy basecalling
+num_callers=$threads # num_callers for guppy
+threads_per_caller=1 # threads_per_caller for guppy
+demultiplexing_threads=$threads # threads to use for demultiplexing
+demultiplexing_output_directory="$LRSDAY_HOME/$project_name/00.Long_Reads/nanopore_demultiplexed_fastq_files" # The directory containing the demultiplexed basecalled nanopore reads. This directory will be automatically generated when running demultiplexing. 
+
+if [[ "$run_basecalling" == "yes" ]]
+then
+    echo "Check if $basecalling_output_directory is empty for running basecalling."
+    if [[ "$(ls $basecalling_output_directory)" ]]
+    then
+	echo "Warning! The basecalling directory is not empty! Please empty its content if you want to run basecalling."
+	echo "Exit!!!"
+	exit
+    else
+	echo "Running basecalling."
+	$guppy_dir/guppy_basecaller \
+	    --flowcell $flowcell_version \
+	    --kit $sequencing_kit_version \
+	    --recursive \
+	    --input_path $raw_reads_directory \
+	    --save_path $basecalling_output_directory \
+	    --fast5_out \
+	    --qscore_filtering \
+	    --min_qscore $qual \
+	    --num_callers $num_callers \
+	    --cpu_threads_per_caller $threads_per_caller 
+	cd $basecalling_output_directory
+	cat ./pass/*.fastq |gzip -c > $project_name.basecalled_reads.Q${qual}.pass.fastq.gz
+	cat ./fail/*.fastq |gzip -c > $project_name.basecalled_reads.Q${qual}.fail.fastq.gz
+    fi
+fi
+
+if [[ "$run_demultiplexing" == "yes" ]]
+then
+    echo "Check if $basecalling_output_directory/pass has basecalled reads for running demultiplexing."
+    if [[ "$(ls $basecalling_output_directory/pass)" ]]
+    then
+	echo "Running demultiplexing."
+	$guppy_dir/guppy_barcoder \
+	    --barcode_kit $barcode_kit_version \
+	    --recursive \
+	    --input_path $basecalling_output_directory/pass \
+	    --save_path $demultiplexing_output_directory \
+	    --worker_threads $demultiplexing_threads
+
+	cd $demultiplexing_output_directory
+	for b in barcode*
+	do 
+	    echo "for demultiplexing: barcode=$b"
+	    cat ./$b/*.fastq |gzip -c > $project_name.basecalled_reads.Q${qual}.pass.$b.fastq.gz
+	done
+	cat ./unclassified/*.fastq |gzip -c > $project_name.basecalled_reads.Q${qual}.pass.unclassified.fastq.gz
+    else
+	echo "There is no reads in $basecalling_output_directory/pass!"
+	echo "Please put the basecalled reads in $basecalling_output_directory/pass for demultiplexing!"
+	echo "Exit!!!"
+	exit
+    fi
+fi
+
+set +oe pipefail 
+
+if [[ "$run_nanoplotting" == "yes" ]]
+then
+    echo "Check if $basecalling_output_directory/pass has basecalled reads for running nanoplotting."
+    if [[ "$(ls $basecalling_output_directory/pass)" ]]
+    then
+	echo "Running nanoplotting."
+	cd $basecalling_output_directory
+	fastq_input="$project_name.basecalled_reads.Q${qual}.pass.fastq.gz"
+	source $nanoplot_dir/activate
+	$nanoplot_dir/NanoPlot \
+	    --threads $threads \
+	    --fastq $fastq_input \
+	    --N50 \
+	    -o "${project_name}_Q${qual}_pass_NanoPlot_out"
+    fi
+    if [[ "$run_demultiplexing" == "yes" ]]
+    then
+	cd $demultiplexing_output_directory
+	for b in barcode*
+	do
+	    echo "for nanoplotting: barcode=$b"
+	    fastq_input="$project_name.basecalled_reads.Q${qual}.pass.$b.fastq.gz"
+	    source $nanoplot_dir/activate
+	    $nanoplot_dir/NanoPlot \
+		--threads $threads \
+		--fastq $fastq_input \
+		--N50 \
+		-o "${project_name}_Q${qual}_pass_${b}_NanoPlot_out"
+	done
+	echo "for nanoplotting: unclassified"
+	fastq_input="$project_name.basecalled_reads.Q${qual}.pass.unclassified.fastq.gz"
+	$nanoplot_dir/NanoPlot \
+            --threads $threads \
+            --fastq $fastq_input \
+            --N50 \
+            -o "${project_name}_Q${qual}_pass_unclassified_NanoPlot_out" 
+    fi
+fi
+
+
+
+############################
+# checking bash exit status
+if [[ $? -eq 0 ]]
+then
+    echo "" 
+    echo "LRSDAY message: This bash script has been successfully processed! :)"
+    echo ""
+    echo ""
+    exit 0
+fi
+############################
diff --git a/....Long_Reads/nanopore_fast5_files/.gitkeep → .../nanopore_basecalled_fast5_files/.gitkeep b/....Long_Reads/nanopore_fast5_files/.gitkeep → .../nanopore_basecalled_fast5_files/.gitkeep
diff --git a/Project_Template/00.Long_Reads/nanopore_raw_fast5_files/.gitkeep b/Project_Template/00.Long_Reads/nanopore_raw_fast5_files/.gitkeep
diff --git a/..._Template/01.Long-read-based_Genome_Assembly/LRSDAY.01.Long-read-based_Genome_Assembly.sh b/..._Template/01.Long-read-based_Genome_Assembly/LRSDAY.01.Long-read-based_Genome_Assembly.sh
@@ -7,13 +7,13 @@ PATH=$gnuplot_dir:$PATH
 
 ###########################################
 # set project-specific variables
-prefix="SK1" # The file name prefix for the output files.
-long_reads="./../00.Long_Reads/SK1.filtered_subreads.fastq.gz" # The file path of the long reads file (in fastq or fastq.gz format).
+prefix="SK1" # The file name prefix for the processing sample. Default = "SK1" for the testing example.
+long_reads="./../00.Long_Reads/$prefix.filtlong.fastq.gz" # The file path of the long reads file (in fastq or fastq.gz format).
 long_reads_type="pacbio-raw" # The long reads data type. Use "pacbio-raw" or "pacbio-corrected" or "nanopore-raw" or "nanopore-corrected". Default = "pacbio-raw" for the testing example
 genome_size="12.5m" # The estimated genome size with the format of <number>[g|m|k], e.g. 12.5m for 12.5 Mb. Default = "12.5m".
 assembler="canu" # The long-read assembler: Use "canu" or "flye" or "wtdbg2" or "smartdenovo" or "canu-flye" or "canu-wtdbg2" or "canu-smartdenovo". For "canu-flye", "canu-wtdbg2", and "canu-smartdenovo", the assembler canu is used first to generate error-corrected reads from the raw reads and then the assembler flye/wtdbg2/smartdenovo is used to assemble the genome. Based on our test, assembler="canu" generally gives the best result but will take substantially longer time than the other options.
-customized_canu_parameters="correctedErrorRate=0.04" # For assembler="canu" only. Users can set customized Canu assembly parameters here or simply leave it empty like customized_canu_parameters="" to use Canu's default assembly parameter. For example you could set customized_canu_parameters="correctedErrorRate=0.04" for high coverage (>60X) PacBio data and customized_canu_parameters="overlapper=mhap;utgReAlign=true" for high coverage (>60X) Nanopore data to improve the assembly speed. When assembling genomes with high heterozygosity, you can could set customized_canu_parameters="corOutCoverage=200;batOptions=-dg 3 -db 3 -dr 1 -ca 500 -cp 50" to avoid collasping haplotypes. As shown in these examples, more than one customized parameters can be set here as long as they are separeted by a semicolon and contained in a pair of double quotes (e.g. customized_canu_parameters="option1=XXX;option2=YYY;option3=ZZZ"). Please consult Canu's manual "http://canu.readthedocs.io/en/latest/faq.html#what-parameters-can-i-tweak" for advanced customization settings. Default = "correctedErrorRate=0.04" for the testing example.
-threads=2 # The number of threads to use. Default = 2.
+customized_canu_parameters="" # For assembler="canu" only. Users can set customized Canu assembly parameters here or simply leave it empty like customized_canu_parameters="" to use Canu's default assembly parameter. For example you could set customized_canu_parameters="correctedErrorRate=0.04" for high coverage (>60X) PacBio data and customized_canu_parameters="overlapper=mhap;utgReAlign=true" for high coverage (>60X) Nanopore data to improve the assembly speed. When assembling genomes with high heterozygosity, you can could set customized_canu_parameters="corOutCoverage=200;batOptions=-dg 3 -db 3 -dr 1 -ca 500 -cp 50" to avoid collasping haplotypes. As shown in these examples, more than one customized parameters can be set here as long as they are separeted by a semicolon and contained in a pair of double quotes (e.g. customized_canu_parameters="option1=XXX;option2=YYY;option3=ZZZ"). Please consult Canu's manual "http://canu.readthedocs.io/en/latest/faq.html#what-parameters-can-i-tweak" for advanced customization settings. Default = "" for the testing example.
+threads=4 # The number of threads to use. Default = 4.
 vcf="yes" # Use "yes" if prefer to have vcf file generated to show SNP and INDEL differences between the assembled genome and the reference genome for their uniquely alignable regions. Otherwise use "no". Default = "yes".
 dotplot="yes" # Use "yes" if prefer to plot genome-wide dotplot based on the comparison with the reference genome below. Otherwise use "no". Default = "yes".
 ref_genome_raw="./../00.Ref_Genome/S288C.ASM205763v1.fa" # The file path of the raw reference genome. This is only needed when the option "dotplot=" or "vcf=" has been set as "yes".

diff --git a/...ate/02.Long-read-based_Assembly_Polishing/LRSDAY.02.Long-read-based_Assembly_Polishing.sh b/...ate/02.Long-read-based_Assembly_Polishing/LRSDAY.02.Long-read-based_Assembly_Polishing.sh
@@ -7,19 +7,19 @@ source ./../../env.sh
 ###########################################
 # set project-specific variables
 
-input_assembly="./../01.Long-read-based_Genome_Assembly/SK1.assembly.raw.fa" # The file path of the input raw long-read-based assembly for polishing.
-long_reads_in_fastq="./../00.Long_Reads/SK1.filtered_subreads.fastq.gz" # The file path of the long-read fastq file. 
+prefix="SK1" # The file name prefix for the processing sample. Default = "SK1" for the testing example.
+
+input_assembly="./../01.Long-read-based_Genome_Assembly/$prefix.assembly.raw.fa" # The file path of the input raw long-read-based assembly for polishing.
+long_reads_in_fastq="./../00.Long_Reads/$prefix.filtlong.fastq.gz" # The file path of the long-read fastq file. 
 long_read_technology="pacbio" # The used long-read sequencing technology. Use "pacbio" or "nanopore". Default = "pacbio" for the testing example.
 
 ### When long_read_technology="pacbio" ####
-pacbio_bam_fofn_file="./../00.Long_Reads/pacbio_fofn_files/SK1.merged.bam.fofn" # The file path to the fofn file containing the absolute path to the PacBio bam files. BAM file is the native output format for PacBio Sequel platform but this is not the case for the RSII platform. For RSII data, the bax2bam file conversion is needed. This can be done by running the LRSDAY.00.Retrieve_Sample_PacBio_Reads.sh script in the 00.Long_Reads directory.
+pacbio_bam_fofn_file="./../00.Long_Reads/pacbio_fofn_files/$prefix.merged.bam.fofn" # The file path to the fofn file containing the absolute path to the PacBio bam files. BAM file is the native output format for PacBio Sequel platform but this is not the case for the RSII platform. For RSII data, the bax2bam file conversion is needed. This can be done by running the LRSDAY.00.Retrieve_Sample_PacBio_Reads.sh script in the 00.Long_Reads directory.
 pacbio_reads_type="RSII" # The sequencing machine used to generate the input PacBio reads . Use "RSII" or "Sequel". Default = "RSII" for the testing example. 
 
 ### When long_read_technology="nanopore" ###
-nanopore_fast5_files="./../00.Long_Reads/nanopore_fast5_files" # The file path to the directory containing raw Oxford Nanopore FAST5 files.
-nanopore_basecalling_sequencing_summary="./../00.Long_Reads/nanopore_fast5_files/sequencing_summary.txt" # The file path to the nanopore albacore/guppy basecaller sequencing summary output. This summary file is not necessary but it can help the polishing step to run much faster when available. When this file is unavailable, set nanopore_albacore_sequencing_summary="".
-
-prefix="SK1" # The file name prefix for the output files. Default = "SK1" for the testing example.
+nanopore_basecalled_fast5_files="./../00.Long_Reads/nanopore_basecalled_fast5_files" # The file path to the directory containing the basecalled Oxford Nanopore FAST5 files.
+nanopore_basecalling_sequencing_summary="./../00.Long_Reads/nanopore_basecalled_fast5_files/sequencing_summary.txt" # The file path to the nanopore albacore/guppy basecaller sequencing summary output. This summary file is not necessary but it can help the polishing step to run much faster when available. When this file is unavailable, set nanopore_albacore_sequencing_summary="".
 
 threads=1  # The number of threads to use. Default = "1".
 ploidy=1 # The ploidy status of the sequenced genome. use "1" for haploid genome and "2" for diploid genome. Default = "1" for the testing example.
@@ -82,15 +82,15 @@ else
     source $nanopolish_dir/py3_virtualenv_nanopolish/bin/activate
     if [[ -z "$nanopore_basecalling_sequencing_summary" ]]
     then
-	$nanopolish_dir/nanopolish index -d $nanopore_fast5_files $long_reads_in_fastq
+	$nanopolish_dir/nanopolish index -d $nanopore_basecalled_fast5_files $long_reads_in_fastq
     else
-	$nanopolish_dir/nanopolish index -d $nanopore_fast5_files -s $nanopore_basecalling_sequencing_summary $long_reads_in_fastq
+	$nanopolish_dir/nanopolish index -d $nanopore_basecalled_fast5_files -s $nanopore_basecalling_sequencing_summary $long_reads_in_fastq
     fi
     for i in $(seq 1 1 $rounds_of_successive_polishing)
     do
 	java -Djava.io.tmpdir=./tmp -Dpicard.useLegacyParser=false -XX:ParallelGCThreads=$threads -jar $picard_dir/picard.jar CreateSequenceDictionary -REFERENCE $prefix.assembly.tmp.fa -OUTPUT $prefix.assembly.tmp.dict
 	$minimap2_dir/minimap2 -ax map-ont $prefix.assembly.tmp.fa $long_reads_in_fastq > $prefix.minimap2.round_${i}.sam
-	java -Djava.io.tmpdir=./tmp -Dpicard.useLegacyParser=false -XX:ParallelGCThreads=$threads -jar $picard_dir/picard.jar SortSam -INPUT $prefix.minimap2.round_${i}.sam -OUTPUT $prefix.minimap2.round_${i}.bam -SORT_ORDER coordinate
+	java -Djava.io.tmpdir=./tmp -Dpicard.useLegacyParser=false -XX:ParallelGCThreads=$threads -jar $picard_dir/picard.jar SortSam -INPUT $prefix.minimap2.round_${i}.sam -OUTPUT $prefix.minimap2.round_${i}.bam -SORT_ORDER coordinate -VALIDATION_STRINGENCY LENIENT -MAX_RECORDS_IN_RAM 50000  
 	$samtools_dir/samtools index $prefix.minimap2.round_${i}.bam
 	rm $prefix.minimap2.round_${i}.sam
 	python3 $nanopolish_dir/scripts/nanopolish_makerange.py $prefix.assembly.tmp.fa | $parallel_dir/parallel --results ${prefix}_nanopolish_round_${i}_results -P 1 \

diff --git a/...llumina-read-based_Assembly_Polishing/LRSDAY.03.Illumina-read-based_Assembly_Polishing.sh b/...llumina-read-based_Assembly_Polishing/LRSDAY.03.Illumina-read-based_Assembly_Polishing.sh
@@ -7,8 +7,8 @@ source ./../../env.sh
 ###########################################
 # set project-specific variables
 
-input_assembly="./../02.Long-read-based_Assembly_Polishing/SK1.assembly.long_read_polished.fa" # The file path of the input assembly before Illumina-based correction
-prefix="SK1" # The file name prefix for the output files.
+prefix="SK1" # The file name prefix for the processing sample. Default = "SK1" for the testing example.
+input_assembly="./../02.Long-read-based_Assembly_Polishing/$prefix.assembly.long_read_polished.fa" # The file path of the input assembly before Illumina-based correction
 trim_illumina_reads="yes" # Whether to trim the input Illumina reads. Use "yes" if prefer to perform trimming, otherwise use "no". Default = "yes". 
 rounds_of_successive_polishing=1 # The number of total rounds of Illumina-read-based assembly polishing. Default = "1" for the testing example.
 threads=1 # The number of threads to use. Default = "1".
@@ -117,13 +117,13 @@ do
 
     # GATK local realign
     # find realigner targets
-    java -Djava.io.tmpdir=./tmp -XX:ParallelGCThreads=$threads -jar $gatk_dir/GenomeAnalysisTK.jar \
+    java -Djava.io.tmpdir=./tmp -XX:ParallelGCThreads=$threads -jar $gatk3_dir/GenomeAnalysisTK.jar \
 	-R refseq.tmp.fa \
 	-T RealignerTargetCreator \
 	-I $prefix.round_${i}.dedup.bam \
 	-o $prefix.round_${i}.realn.intervals
     # run realigner
-    java -Djava.io.tmpdir=./tmp -XX:ParallelGCThreads=$threads -jar $gatk_dir/GenomeAnalysisTK.jar \
+    java -Djava.io.tmpdir=./tmp -XX:ParallelGCThreads=$threads -jar $gatk3_dir/GenomeAnalysisTK.jar \
 	-R refseq.tmp.fa \
 	-T IndelRealigner \
 	-I $prefix.round_${i}.dedup.bam \