diff --git a/CHANGELOG.md b/CHANGELOG.md index 347aed3..a1c74c1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,21 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] +## [1.5.0] - 2019-05-13 +### Added +- Supports for native ONT nanopore basecalling, demultiplexing, and read profile plotting. +- An lite-weight bash script for generating dotplot for any pairs of fasta files. +### Changed +- Better robustness for the dependency installation script. +- Reducing the requirements of setting output prefix by assuming the same prefix was used across different modules. +- Setting adjustment for long-read filtering and downsampling. +- Applying read filtering/downsampling for the testing example. +- Software version updates for a number of dependencies. +### Fixed +- A bug that might lead to missing gene annotation in certain genomic region. +- Typos in the installation script. +- Typos in the manual. + ## [1.4.0] - 2019-03-21 ### Changed - Supports for multi-round assembly polishing using both long and short reads. diff --git a/Example_Outputs/SK1.assembly.final.fa.gz b/Example_Outputs/SK1.assembly.final.fa.gz index dcb7c82..36992ac 100644 Binary files a/Example_Outputs/SK1.assembly.final.fa.gz and b/Example_Outputs/SK1.assembly.final.fa.gz differ diff --git a/Example_Outputs/SK1.assembly.final.filter.mummer2vcf.INDEL.vcf.gz b/Example_Outputs/SK1.assembly.final.filter.mummer2vcf.INDEL.vcf.gz index 49c8378..78a0130 100644 Binary files a/Example_Outputs/SK1.assembly.final.filter.mummer2vcf.INDEL.vcf.gz and b/Example_Outputs/SK1.assembly.final.filter.mummer2vcf.INDEL.vcf.gz differ diff --git a/Example_Outputs/SK1.assembly.final.filter.mummer2vcf.SNP.vcf.gz b/Example_Outputs/SK1.assembly.final.filter.mummer2vcf.SNP.vcf.gz index 93fe8cd..c71920d 100644 Binary files a/Example_Outputs/SK1.assembly.final.filter.mummer2vcf.SNP.vcf.gz and b/Example_Outputs/SK1.assembly.final.filter.mummer2vcf.SNP.vcf.gz differ diff --git a/Example_Outputs/SK1.assembly.final.filter.pdf b/Example_Outputs/SK1.assembly.final.filter.pdf index 48ac43d..01c74ed 100644 Binary files a/Example_Outputs/SK1.assembly.final.filter.pdf and b/Example_Outputs/SK1.assembly.final.filter.pdf differ diff --git a/Example_Outputs/SK1.assembly.final.stats.txt b/Example_Outputs/SK1.assembly.final.stats.txt index b286074..2067a0d 100644 --- a/Example_Outputs/SK1.assembly.final.stats.txt +++ b/Example_Outputs/SK1.assembly.final.stats.txt @@ -1,17 +1,17 @@ total sequence count: 34 -total sequence length: 12448003 -min sequence length: 1248 -max sequence length: 1480301 -mean sequence length: 366117.74 -median sequence length: 60826.50 -N50: 923676 +total sequence length: 12473902 +min sequence length: 12836 +max sequence length: 1480337 +mean sequence length: 366879.47 +median sequence length: 57378.00 +N50: 923760 L50: 6 -N90: 341518 +N90: 328166 L90: 14 -A%: 30.89 +A%: 30.88 T%: 30.81 -G%: 19.14 -C%: 19.13 -AT%: 61.70 -GC%: 38.26 +G%: 19.12 +C%: 19.14 +AT%: 61.69 +GC%: 38.27 N%: 0.04 diff --git a/Example_Outputs/SK1.final.cds.fa.gz b/Example_Outputs/SK1.final.cds.fa.gz index 0075582..44a7c1e 100644 Binary files a/Example_Outputs/SK1.final.cds.fa.gz and b/Example_Outputs/SK1.final.cds.fa.gz differ diff --git a/Example_Outputs/SK1.final.gff3.gz b/Example_Outputs/SK1.final.gff3.gz index af1e457..8192eae 100644 Binary files a/Example_Outputs/SK1.final.gff3.gz and b/Example_Outputs/SK1.final.gff3.gz differ diff --git a/Example_Outputs/SK1.final.manual_check.list b/Example_Outputs/SK1.final.manual_check.list index a0482e6..259614f 100644 --- a/Example_Outputs/SK1.final.manual_check.list +++ b/Example_Outputs/SK1.final.manual_check.list @@ -1,110 +1,105 @@ -SK1_G0000050|SK1_G0000050.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 1;incorrect CDS length -SK1_G0000710|SK1_G0000710.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0000810|SK1_G0000810.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0000950|SK1_G0000950.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0000970|SK1_G0000970.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0001060|SK1_G0001060.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0001170|SK1_G0001170.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0001220|SK1_G0001220.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0001420|SK1_G0001420.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0001600|SK1_G0001600.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0001750|SK1_G0001750.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 1;incorrect CDS length -SK1_G0001850|SK1_G0001850.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0002640|SK1_G0002640.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0002830|SK1_G0002830.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0002860|SK1_G0002860.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0002920|SK1_G0002920.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0003770|SK1_G0003770.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0004150|SK1_G0004150.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0004490|SK1_G0004490.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0004900|SK1_G0004900.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0005440|SK1_G0005440.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0005520|SK1_G0005520.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0005600|SK1_G0005600.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0005800|SK1_G0005800.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0005830|SK1_G0005830.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0000020|SK1_G0000020.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 1;incorrect CDS length +SK1_G0000680|SK1_G0000680.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0000780|SK1_G0000780.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0000920|SK1_G0000920.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 1;incorrect CDS length +SK1_G0001070|SK1_G0001070.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0001180|SK1_G0001180.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0001230|SK1_G0001230.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0001430|SK1_G0001430.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0001610|SK1_G0001610.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0001760|SK1_G0001760.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 1;incorrect CDS length +SK1_G0001860|SK1_G0001860.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0002650|SK1_G0002650.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0002840|SK1_G0002840.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0002870|SK1_G0002870.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0002930|SK1_G0002930.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0003780|SK1_G0003780.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0004160|SK1_G0004160.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0004500|SK1_G0004500.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0004910|SK1_G0004910.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0005420|SK1_G0005420.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0005500|SK1_G0005500.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0005580|SK1_G0005580.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0005780|SK1_G0005780.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0005810|SK1_G0005810.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0005820|SK1_G0005820.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) SK1_G0005840|SK1_G0005840.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0005860|SK1_G0005860.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0005930|SK1_G0005930.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0006010|SK1_G0006010.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0006430|SK1_G0006430.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0006480|SK1_G0006480.mRNA.1 unexpected stop codon based on standard genentic code;your selected code table is 1 -SK1_G0006490|SK1_G0006490.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 1 -SK1_G0006630|SK1_G0006630.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0007710|SK1_G0007710.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0007800|SK1_G0007800.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0008100|SK1_G0008100.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0008130|SK1_G0008130.mRNA.1 incorrect CDS length +SK1_G0006000|SK1_G0006000.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0006160|SK1_G0006160.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0006410|SK1_G0006410.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0006460|SK1_G0006460.mRNA.1 unexpected stop codon based on standard genentic code;your selected code table is 1 +SK1_G0006470|SK1_G0006470.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 1 +SK1_G0006620|SK1_G0006620.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0007700|SK1_G0007700.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0007790|SK1_G0007790.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0008090|SK1_G0008090.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0008120|SK1_G0008120.mRNA.1 incorrect CDS length +SK1_G0008130|SK1_G0008130.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) SK1_G0008140|SK1_G0008140.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0008150|SK1_G0008150.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0009340|SK1_G0009340.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0010000|SK1_G0010000.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0010080|SK1_G0010080.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0010370|SK1_G0010370.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 1;incorrect CDS length -SK1_G0011120|SK1_G0011120.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0011610|SK1_G0011610.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0012360|SK1_G0012360.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0012510|SK1_G0012510.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0009330|SK1_G0009330.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0009990|SK1_G0009990.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0010070|SK1_G0010070.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0010360|SK1_G0010360.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 1;incorrect CDS length +SK1_G0011110|SK1_G0011110.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0011600|SK1_G0011600.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0012350|SK1_G0012350.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0012500|SK1_G0012500.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0012710|SK1_G0012710.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) SK1_G0012720|SK1_G0012720.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0012730|SK1_G0012730.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0012910|SK1_G0012910.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0013390|SK1_G0013390.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0013410|SK1_G0013410.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0013710|SK1_G0013710.mRNA.1 unexpected stop codon based on standard genentic code;your selected code table is 1 -SK1_G0013750|SK1_G0013750.mRNA.1 unexpected stop codon based on standard genentic code;your selected code table is 1;incorrect CDS length -SK1_G0013760|SK1_G0013760.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 1;incorrect CDS length +SK1_G0012900|SK1_G0012900.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0013380|SK1_G0013380.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0013400|SK1_G0013400.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0013690|SK1_G0013690.mRNA.1 unexpected stop codon based on standard genentic code;your selected code table is 1 +SK1_G0013730|SK1_G0013730.mRNA.1 unexpected stop codon based on standard genentic code;your selected code table is 1;incorrect CDS length +SK1_G0013740|SK1_G0013740.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 1;incorrect CDS length SK1_G0013810|SK1_G0013810.mRNA.1 incorrect CDS length -SK1_G0013820|SK1_G0013820.mRNA.1 incorrect CDS length -SK1_G0013830|SK1_G0013830.mRNA.1 incorrect CDS length -SK1_G0013850|SK1_G0013850.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0014570|SK1_G0014570.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0014620|SK1_G0014620.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0015020|SK1_G0015020.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0015160|SK1_G0015160.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0015550|SK1_G0015550.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0015580|SK1_G0015580.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0016490|SK1_G0016490.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1 -SK1_G0016520|SK1_G0016520.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0016650|SK1_G0016650.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0016870|SK1_G0016870.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0016880|SK1_G0016880.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0016980|SK1_G0016980.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0016990|SK1_G0016990.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0017460|SK1_G0017460.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0017540|SK1_G0017540.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0017800|SK1_G0017800.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;incorrect CDS length -SK1_G0017810|SK1_G0017810.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0017820|SK1_G0017820.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0017970|SK1_G0017970.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0018200|SK1_G0018200.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0018580|SK1_G0018580.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0019130|SK1_G0019130.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0019420|SK1_G0019420.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0019530|SK1_G0019530.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0020180|SK1_G0020180.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0021590|SK1_G0021590.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0021830|SK1_G0021830.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0022800|SK1_G0022800.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0013830|SK1_G0013830.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0014550|SK1_G0014550.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0014600|SK1_G0014600.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0015000|SK1_G0015000.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0015140|SK1_G0015140.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0015530|SK1_G0015530.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0015560|SK1_G0015560.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0016490|SK1_G0016490.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0016620|SK1_G0016620.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0016840|SK1_G0016840.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0016850|SK1_G0016850.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0016950|SK1_G0016950.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0016960|SK1_G0016960.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0017430|SK1_G0017430.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0017510|SK1_G0017510.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0017770|SK1_G0017770.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;incorrect CDS length +SK1_G0017780|SK1_G0017780.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0017790|SK1_G0017790.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0017940|SK1_G0017940.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0018170|SK1_G0018170.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0018550|SK1_G0018550.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0019100|SK1_G0019100.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0019390|SK1_G0019390.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0019500|SK1_G0019500.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0020150|SK1_G0020150.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0021560|SK1_G0021560.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0021800|SK1_G0021800.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0022770|SK1_G0022770.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0022810|SK1_G0022810.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) SK1_G0022840|SK1_G0022840.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0022870|SK1_G0022870.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0022920|SK1_G0022920.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0022950|SK1_G0022950.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0023560|SK1_G0023560.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0023610|SK1_G0023610.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0023800|SK1_G0023800.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0023820|SK1_G0023820.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0023880|SK1_G0023880.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0024180|SK1_G0024180.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0024210|SK1_G0024210.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0024390|SK1_G0024390.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0024430|SK1_G0024430.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0024820|SK1_G0024820.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0025260|SK1_G0025260.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0025400|SK1_G0025400.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0025440|SK1_G0025440.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0025600|SK1_G0025600.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0025620|SK1_G0025620.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0025880|SK1_G0025880.mRNA.1 unexpected stop codon based on standard genentic code;your selected code table is 1 +SK1_G0022910|SK1_G0022910.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0022940|SK1_G0022940.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0023550|SK1_G0023550.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0023600|SK1_G0023600.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0023790|SK1_G0023790.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0023810|SK1_G0023810.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0023870|SK1_G0023870.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0024170|SK1_G0024170.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0024200|SK1_G0024200.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0024380|SK1_G0024380.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0024420|SK1_G0024420.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0024810|SK1_G0024810.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0025250|SK1_G0025250.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0025390|SK1_G0025390.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0025430|SK1_G0025430.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0025610|SK1_G0025610.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0025630|SK1_G0025630.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) SK1_G0025970|SK1_G0025970.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) SK1_G0026260|SK1_G0026260.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) SK1_G0026410|SK1_G0026410.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 1;internal stop codon(s) @@ -112,128 +107,144 @@ SK1_G0026500|SK1_G0026500.mRNA.1 unexpected start & end codons based on standard SK1_G0026540|SK1_G0026540.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) SK1_G0026690|SK1_G0026690.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) SK1_G0026890|SK1_G0026890.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0026920|SK1_G0026920.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0027190|SK1_G0027190.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0027640|SK1_G0027640.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0027710|SK1_G0027710.mRNA.1 unexpected stop codon based on standard genentic code;your selected code table is 1 -SK1_G0027720|SK1_G0027720.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0027730|SK1_G0027730.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0027750|SK1_G0027750.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0027780|SK1_G0027780.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0027930|SK1_G0027930.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0028520|SK1_G0028520.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0029470|SK1_G0029470.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0029540|SK1_G0029540.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0029900|SK1_G0029900.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0030030|SK1_G0030030.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0031040|SK1_G0031040.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0031080|SK1_G0031080.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0031140|SK1_G0031140.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0031200|SK1_G0031200.mRNA.1 unexpected stop codon based on standard genentic code;your selected code table is 1 -SK1_G0031210|SK1_G0031210.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0031820|SK1_G0031820.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0032230|SK1_G0032230.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0032390|SK1_G0032390.mRNA.1 unexpected stop codon based on standard genentic code;your selected code table is 1;incorrect CDS length -SK1_G0033300|SK1_G0033300.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0027180|SK1_G0027180.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0027630|SK1_G0027630.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0027660|SK1_G0027660.mRNA.1 incorrect CDS length +SK1_G0027700|SK1_G0027700.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0027740|SK1_G0027740.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0027770|SK1_G0027770.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0027920|SK1_G0027920.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0028510|SK1_G0028510.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0029460|SK1_G0029460.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0029530|SK1_G0029530.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0029890|SK1_G0029890.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0030020|SK1_G0030020.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0031030|SK1_G0031030.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0031130|SK1_G0031130.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0031200|SK1_G0031200.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0031810|SK1_G0031810.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0032220|SK1_G0032220.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0032380|SK1_G0032380.mRNA.1 unexpected stop codon based on standard genentic code;your selected code table is 1;incorrect CDS length +SK1_G0033290|SK1_G0033290.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0033390|SK1_G0033390.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) SK1_G0033400|SK1_G0033400.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0033410|SK1_G0033410.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0034240|SK1_G0034240.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 1;incorrect CDS length -SK1_G0034390|SK1_G0034390.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0034900|SK1_G0034900.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0035500|SK1_G0035500.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0035720|SK1_G0035720.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0035870|SK1_G0035870.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0036790|SK1_G0036790.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0036900|SK1_G0036900.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0037780|SK1_G0037780.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 1;incorrect CDS length +SK1_G0034230|SK1_G0034230.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 1;incorrect CDS length +SK1_G0034380|SK1_G0034380.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0034890|SK1_G0034890.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0035490|SK1_G0035490.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0035710|SK1_G0035710.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0035860|SK1_G0035860.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0036800|SK1_G0036800.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0036910|SK1_G0036910.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) SK1_G0038600|SK1_G0038600.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) SK1_G0038690|SK1_G0038690.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) SK1_G0039140|SK1_G0039140.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0039220|SK1_G0039220.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0039320|SK1_G0039320.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0039340|SK1_G0039340.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0039650|SK1_G0039650.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0039690|SK1_G0039690.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0039800|SK1_G0039800.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0039860|SK1_G0039860.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0039980|SK1_G0039980.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0040240|SK1_G0040240.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0040250|SK1_G0040250.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0040550|SK1_G0040550.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0041560|SK1_G0041560.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0041790|SK1_G0041790.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0042000|SK1_G0042000.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0042310|SK1_G0042310.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0042380|SK1_G0042380.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0042620|SK1_G0042620.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0043700|SK1_G0043700.mRNA.1 unexpected stop codon based on standard genentic code;your selected code table is 1;incorrect CDS length -SK1_G0043730|SK1_G0043730.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0043750|SK1_G0043750.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0044050|SK1_G0044050.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0044060|SK1_G0044060.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0039270|SK1_G0039270.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0039290|SK1_G0039290.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0039600|SK1_G0039600.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0039640|SK1_G0039640.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0039750|SK1_G0039750.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0039810|SK1_G0039810.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0039930|SK1_G0039930.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0040190|SK1_G0040190.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0040200|SK1_G0040200.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0040500|SK1_G0040500.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0041510|SK1_G0041510.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0041740|SK1_G0041740.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0041950|SK1_G0041950.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0042260|SK1_G0042260.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0042330|SK1_G0042330.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0042570|SK1_G0042570.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0043650|SK1_G0043650.mRNA.1 unexpected stop codon based on standard genentic code;your selected code table is 1;incorrect CDS length +SK1_G0043680|SK1_G0043680.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0043700|SK1_G0043700.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0044000|SK1_G0044000.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0044010|SK1_G0044010.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0044280|SK1_G0044280.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) SK1_G0044330|SK1_G0044330.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0044380|SK1_G0044380.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0044970|SK1_G0044970.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0045150|SK1_G0045150.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0045520|SK1_G0045520.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 1 -SK1_G0045610|SK1_G0045610.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0045900|SK1_G0045900.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0046150|SK1_G0046150.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0046310|SK1_G0046310.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0047290|SK1_G0047290.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0047300|SK1_G0047300.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0047380|SK1_G0047380.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0047410|SK1_G0047410.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0047520|SK1_G0047520.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0047920|SK1_G0047920.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0047930|SK1_G0047930.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0048620|SK1_G0048620.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0048630|SK1_G0048630.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0049290|SK1_G0049290.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0050170|SK1_G0050170.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0051020|SK1_G0051020.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0051180|SK1_G0051180.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0051250|SK1_G0051250.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0051580|SK1_G0051580.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0052280|SK1_G0052280.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0052520|SK1_G0052520.mRNA.1 unexpected stop codon based on standard genentic code;your selected code table is 1;incorrect CDS length -SK1_G0052530|SK1_G0052530.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 1;incorrect CDS length +SK1_G0044920|SK1_G0044920.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0045100|SK1_G0045100.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0045470|SK1_G0045470.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 1 +SK1_G0045560|SK1_G0045560.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0045850|SK1_G0045850.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0046100|SK1_G0046100.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0046260|SK1_G0046260.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0047240|SK1_G0047240.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0047250|SK1_G0047250.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0047330|SK1_G0047330.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0047360|SK1_G0047360.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0047470|SK1_G0047470.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0047880|SK1_G0047880.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0047890|SK1_G0047890.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0048580|SK1_G0048580.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0048590|SK1_G0048590.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0049250|SK1_G0049250.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0050130|SK1_G0050130.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0050980|SK1_G0050980.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0051140|SK1_G0051140.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0051210|SK1_G0051210.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0051540|SK1_G0051540.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0052240|SK1_G0052240.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0052480|SK1_G0052480.mRNA.1 unexpected stop codon based on standard genentic code;your selected code table is 1;incorrect CDS length +SK1_G0052490|SK1_G0052490.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 1;incorrect CDS length +SK1_G0052630|SK1_G0052630.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) SK1_G0052670|SK1_G0052670.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0052700|SK1_G0052700.mRNA.1 unexpected stop codon based on standard genentic code;your selected code table is 1;incorrect CDS length -SK1_G0052710|SK1_G0052710.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0052750|SK1_G0052750.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0053050|SK1_G0053050.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0053140|SK1_G0053140.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0053670|SK1_G0053670.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0053850|SK1_G0053850.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0054290|SK1_G0054290.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0054390|SK1_G0054390.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0052680|SK1_G0052680.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0052700|SK1_G0052700.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0052730|SK1_G0052730.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0053020|SK1_G0053020.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0053110|SK1_G0053110.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0053120|SK1_G0053120.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0053520|SK1_G0053520.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 1 +SK1_G0053650|SK1_G0053650.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0053820|SK1_G0053820.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0054250|SK1_G0054250.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0054350|SK1_G0054350.mRNA.1 unexpected stop codon based on standard genentic code;your selected code table is 1;incorrect CDS length +SK1_G0054540|SK1_G0054540.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) SK1_G0054580|SK1_G0054580.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0055120|SK1_G0055120.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0055180|SK1_G0055180.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0055260|SK1_G0055260.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0055520|SK1_G0055520.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0055990|SK1_G0055990.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0056290|SK1_G0056290.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0057300|SK1_G0057300.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;incorrect CDS length +SK1_G0054900|SK1_G0054900.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0055070|SK1_G0055070.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0055130|SK1_G0055130.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0055210|SK1_G0055210.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0055220|SK1_G0055220.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 1;incorrect CDS length +SK1_G0055480|SK1_G0055480.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0055490|SK1_G0055490.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0055760|SK1_G0055760.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0055840|SK1_G0055840.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 1;incorrect CDS length +SK1_G0055850|SK1_G0055850.mRNA.1 unexpected stop codon based on standard genentic code;your selected code table is 1;incorrect CDS length +SK1_G0055940|SK1_G0055940.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0056240|SK1_G0056240.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0056440|SK1_G0056440.mRNA.1 unexpected stop codon based on standard genentic code;your selected code table is 1;incorrect CDS length +SK1_G0056590|SK1_G0056590.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0057200|SK1_G0057200.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1 +SK1_G0057230|SK1_G0057230.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0057280|SK1_G0057280.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0057310|SK1_G0057310.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0057370|SK1_G0057370.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 1 SK1_G0057390|SK1_G0057390.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0057400|SK1_G0057400.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) SK1_G0057410|SK1_G0057410.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0057470|SK1_G0057470.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0057490|SK1_G0057490.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0057500|SK1_G0057500.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0057560|SK1_G0057560.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0057580|SK1_G0057580.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0057770|SK1_G0057770.mRNA.1 incorrect CDS length -SK1_G0057950|SK1_G0057950.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0057960|SK1_G0057960.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0057440|SK1_G0057440.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0057650|SK1_G0057650.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0057730|SK1_G0057730.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0057830|SK1_G0057830.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0057870|SK1_G0057870.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0057900|SK1_G0057900.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0057930|SK1_G0057930.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) SK1_G0057970|SK1_G0057970.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0058000|SK1_G0058000.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0058010|SK1_G0058010.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0058020|SK1_G0058020.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0058060|SK1_G0058060.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0058150|SK1_G0058150.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 1;incorrect CDS length -SK1_G0058250|SK1_G0058250.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) -SK1_G0058270|SK1_G0058270.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0058000|SK1_G0058000.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 1;incorrect CDS length +SK1_G0058010|SK1_G0058010.mRNA.1 unexpected stop codon based on standard genentic code;your selected code table is 1;incorrect CDS length +SK1_G0058060|SK1_G0058060.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 1;incorrect CDS length +SK1_G0058100|SK1_G0058100.mRNA.1 unexpected stop codon based on standard genentic code;your selected code table is 1 +SK1_G0058190|SK1_G0058190.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0058200|SK1_G0058200.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0058220|SK1_G0058220.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0058230|SK1_G0058230.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0058240|SK1_G0058240.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0058370|SK1_G0058370.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0058410|SK1_G0058410.mRNA.1 unexpected stop codon based on standard genentic code;your selected code table is 1;incorrect CDS length +SK1_G0058460|SK1_G0058460.mRNA.1 unexpected start & end codons based on standard genentic code;your selected code table is 1;internal stop codon(s) +SK1_G0058640|SK1_G0058640.mRNA.1 unexpected stop codon based on standard genentic code;your selected code table is 1;incorrect CDS length cox2|cox2.mRNA.1 unexpected stop codon based on standard genentic code;your selected code table is 3;incorrect CDS length orf474|orf474.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 3;incorrect CDS length orf90|orf90.mRNA.1 unexpected start codon based on standard genentic code;your selected code table is 3 diff --git a/Example_Outputs/SK1.final.pep.fa.gz b/Example_Outputs/SK1.final.pep.fa.gz index 2f2d82a..dd63a38 100644 Binary files a/Example_Outputs/SK1.final.pep.fa.gz and b/Example_Outputs/SK1.final.pep.fa.gz differ diff --git a/Example_Outputs/SK1.final.trimmed_cds.fa.gz b/Example_Outputs/SK1.final.trimmed_cds.fa.gz index f5a1c49..4daab96 100644 Binary files a/Example_Outputs/SK1.final.trimmed_cds.fa.gz and b/Example_Outputs/SK1.final.trimmed_cds.fa.gz differ diff --git a/Manual.pdf b/Manual.pdf index f9ddc97..b680654 100644 Binary files a/Manual.pdf and b/Manual.pdf differ diff --git a/Project_Template/00.Long_Reads/LRSDAY.00.Long_Reads_Preprocessing.sh b/Project_Template/00.Long_Reads/LRSDAY.00.Long_Reads_Preprocessing.sh index 2fdbe69..926db3f 100755 --- a/Project_Template/00.Long_Reads/LRSDAY.00.Long_Reads_Preprocessing.sh +++ b/Project_Template/00.Long_Reads/LRSDAY.00.Long_Reads_Preprocessing.sh @@ -7,13 +7,13 @@ source ./../../env.sh ####################################### # set project-specific variables -prefix="YGL3210" # The file name prefix for the output files -reads="./../00.Long_Reads/YGL3210.fq.gz" # The file path of the long reads file (in fastq or fastq.gz format). -reads_type="nanopore-raw" # The long reads data type: "pacbio-raw" or "pacbio-corrected" or "nanopore-raw" or "nanopore-corrected". -run_filtering="yes" # Whether to filter the reads: "yes" or "no". Default = "yes". +prefix="SK1" # The file name prefix for the processing sample. Default = "SK1" for the testing example. +reads="./../00.Long_Reads/$prefix.filtered_subreads.fastq.gz" # The file path of the long reads file (in fastq or fastq.gz format). +reads_type="pacbio-raw" # The long reads data type: "pacbio-raw" or "pacbio-corrected" or "nanopore-raw" or "nanopore-corrected". +run_filtering="yes" # Whether to filter and downsample the reads: "yes" or "no". Default = "yes". genome_size="12500000" # The haploid genome size (in bp) of sequenced organism. Default = "12500000" (i.e. 12.5 Mb for the budding yeast S. cereviaie genome). This is used to calculate targeted sequencing coverage after read filtering (see below). -post_filtering_coverage="40" # Targeted sequencing coverage after read filtering. Default = "40" (i.e. 40x coverage). -threads=1 # The number of threads to use. Default = "1". +post_filtering_coverage="60" # Targeted sequencing coverage after read filtering and downsampling. Default = "60" (i.e. 60x coverage). +threads=4 # The number of threads to use. Default = "1". ####################################### # process the pipeline diff --git a/Project_Template/00.Long_Reads/LRSDAY.00.Nanopore_Reads_Basecalling_and_Demultiplexing.sh b/Project_Template/00.Long_Reads/LRSDAY.00.Nanopore_Reads_Basecalling_and_Demultiplexing.sh new file mode 100755 index 0000000..6c1c640 --- /dev/null +++ b/Project_Template/00.Long_Reads/LRSDAY.00.Nanopore_Reads_Basecalling_and_Demultiplexing.sh @@ -0,0 +1,139 @@ +#!/bin/bash +set -e -o pipefail +####################################### +# load environment variables for LRSDAY +source ./../../env.sh + +####################################### +# set project-specific variables +project_name="Project_Example" # LRSDAY Project name. Default = "Project_Example". +run_basecalling="yes" # Whether to perform basecalling: "yes" or "no". Default = "yes". +run_demultiplexing="yes" # Whether to perform demultiplexing: "yes" or "no". Default = "yes". +run_nanoplotting="yes" # Whether to perform nanoplotting: "yes" or "no". Default = "yes". + +flowcell_id="FAKXXXXX" # The flowcell ID of the nanopore run. Default = "FAKXXXXX". +flowcell_version="FLO-MIN106" # The flowcell version of the nanopore run. Default = "FLO-MIN106". +sequencing_kit_version="SQK-LSK108" # The sequencing kit version of the nanopore run. Default = "SQK-LSK108". +barcode_kit_version="EXP-NBD103" # The barcode kit version of the nanopore run. Default = "EXP-NBD103". + +raw_reads_directory="$LRSDAY_HOME/$project_name/00.Long_Reads/nanopore_raw_fast5_files" # The directory containing the raw nanopore reads before basecalling +basecalling_output_directory="$LRSDAY_HOME/$project_name/00.Long_Reads/nanopore_basecalled_fast5_files" # The directory containing the basecalled nanopore reads. This directory will be automatically generated when running basecalling. +threads=8 # The number of threads to use. Default = 8. + +############################# +# normally no need to change the following +qual=5 # read quality filter for guppy basecalling +num_callers=$threads # num_callers for guppy +threads_per_caller=1 # threads_per_caller for guppy +demultiplexing_threads=$threads # threads to use for demultiplexing +demultiplexing_output_directory="$LRSDAY_HOME/$project_name/00.Long_Reads/nanopore_demultiplexed_fastq_files" # The directory containing the demultiplexed basecalled nanopore reads. This directory will be automatically generated when running demultiplexing. + +if [[ "$run_basecalling" == "yes" ]] +then + echo "Check if $basecalling_output_directory is empty for running basecalling." + if [[ "$(ls $basecalling_output_directory)" ]] + then + echo "Warning! The basecalling directory is not empty! Please empty its content if you want to run basecalling." + echo "Exit!!!" + exit + else + echo "Running basecalling." + $guppy_dir/guppy_basecaller \ + --flowcell $flowcell_version \ + --kit $sequencing_kit_version \ + --recursive \ + --input_path $raw_reads_directory \ + --save_path $basecalling_output_directory \ + --fast5_out \ + --qscore_filtering \ + --min_qscore $qual \ + --num_callers $num_callers \ + --cpu_threads_per_caller $threads_per_caller + cd $basecalling_output_directory + cat ./pass/*.fastq |gzip -c > $project_name.basecalled_reads.Q${qual}.pass.fastq.gz + cat ./fail/*.fastq |gzip -c > $project_name.basecalled_reads.Q${qual}.fail.fastq.gz + fi +fi + +if [[ "$run_demultiplexing" == "yes" ]] +then + echo "Check if $basecalling_output_directory/pass has basecalled reads for running demultiplexing." + if [[ "$(ls $basecalling_output_directory/pass)" ]] + then + echo "Running demultiplexing." + $guppy_dir/guppy_barcoder \ + --barcode_kit $barcode_kit_version \ + --recursive \ + --input_path $basecalling_output_directory/pass \ + --save_path $demultiplexing_output_directory \ + --worker_threads $demultiplexing_threads + + cd $demultiplexing_output_directory + for b in barcode* + do + echo "for demultiplexing: barcode=$b" + cat ./$b/*.fastq |gzip -c > $project_name.basecalled_reads.Q${qual}.pass.$b.fastq.gz + done + cat ./unclassified/*.fastq |gzip -c > $project_name.basecalled_reads.Q${qual}.pass.unclassified.fastq.gz + else + echo "There is no reads in $basecalling_output_directory/pass!" + echo "Please put the basecalled reads in $basecalling_output_directory/pass for demultiplexing!" + echo "Exit!!!" + exit + fi +fi + +set +oe pipefail + +if [[ "$run_nanoplotting" == "yes" ]] +then + echo "Check if $basecalling_output_directory/pass has basecalled reads for running nanoplotting." + if [[ "$(ls $basecalling_output_directory/pass)" ]] + then + echo "Running nanoplotting." + cd $basecalling_output_directory + fastq_input="$project_name.basecalled_reads.Q${qual}.pass.fastq.gz" + source $nanoplot_dir/activate + $nanoplot_dir/NanoPlot \ + --threads $threads \ + --fastq $fastq_input \ + --N50 \ + -o "${project_name}_Q${qual}_pass_NanoPlot_out" + fi + if [[ "$run_demultiplexing" == "yes" ]] + then + cd $demultiplexing_output_directory + for b in barcode* + do + echo "for nanoplotting: barcode=$b" + fastq_input="$project_name.basecalled_reads.Q${qual}.pass.$b.fastq.gz" + source $nanoplot_dir/activate + $nanoplot_dir/NanoPlot \ + --threads $threads \ + --fastq $fastq_input \ + --N50 \ + -o "${project_name}_Q${qual}_pass_${b}_NanoPlot_out" + done + echo "for nanoplotting: unclassified" + fastq_input="$project_name.basecalled_reads.Q${qual}.pass.unclassified.fastq.gz" + $nanoplot_dir/NanoPlot \ + --threads $threads \ + --fastq $fastq_input \ + --N50 \ + -o "${project_name}_Q${qual}_pass_unclassified_NanoPlot_out" + fi +fi + + + +############################ +# checking bash exit status +if [[ $? -eq 0 ]] +then + echo "" + echo "LRSDAY message: This bash script has been successfully processed! :)" + echo "" + echo "" + exit 0 +fi +############################ diff --git a/Project_Template/00.Long_Reads/nanopore_fast5_files/.gitkeep b/Project_Template/00.Long_Reads/nanopore_basecalled_fast5_files/.gitkeep similarity index 100% rename from Project_Template/00.Long_Reads/nanopore_fast5_files/.gitkeep rename to Project_Template/00.Long_Reads/nanopore_basecalled_fast5_files/.gitkeep diff --git a/Project_Template/00.Long_Reads/nanopore_raw_fast5_files/.gitkeep b/Project_Template/00.Long_Reads/nanopore_raw_fast5_files/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/Project_Template/01.Long-read-based_Genome_Assembly/LRSDAY.01.Long-read-based_Genome_Assembly.sh b/Project_Template/01.Long-read-based_Genome_Assembly/LRSDAY.01.Long-read-based_Genome_Assembly.sh index bda2793..3710e12 100755 --- a/Project_Template/01.Long-read-based_Genome_Assembly/LRSDAY.01.Long-read-based_Genome_Assembly.sh +++ b/Project_Template/01.Long-read-based_Genome_Assembly/LRSDAY.01.Long-read-based_Genome_Assembly.sh @@ -7,13 +7,13 @@ PATH=$gnuplot_dir:$PATH ########################################### # set project-specific variables -prefix="SK1" # The file name prefix for the output files. -long_reads="./../00.Long_Reads/SK1.filtered_subreads.fastq.gz" # The file path of the long reads file (in fastq or fastq.gz format). +prefix="SK1" # The file name prefix for the processing sample. Default = "SK1" for the testing example. +long_reads="./../00.Long_Reads/$prefix.filtlong.fastq.gz" # The file path of the long reads file (in fastq or fastq.gz format). long_reads_type="pacbio-raw" # The long reads data type. Use "pacbio-raw" or "pacbio-corrected" or "nanopore-raw" or "nanopore-corrected". Default = "pacbio-raw" for the testing example genome_size="12.5m" # The estimated genome size with the format of [g|m|k], e.g. 12.5m for 12.5 Mb. Default = "12.5m". assembler="canu" # The long-read assembler: Use "canu" or "flye" or "wtdbg2" or "smartdenovo" or "canu-flye" or "canu-wtdbg2" or "canu-smartdenovo". For "canu-flye", "canu-wtdbg2", and "canu-smartdenovo", the assembler canu is used first to generate error-corrected reads from the raw reads and then the assembler flye/wtdbg2/smartdenovo is used to assemble the genome. Based on our test, assembler="canu" generally gives the best result but will take substantially longer time than the other options. -customized_canu_parameters="correctedErrorRate=0.04" # For assembler="canu" only. Users can set customized Canu assembly parameters here or simply leave it empty like customized_canu_parameters="" to use Canu's default assembly parameter. For example you could set customized_canu_parameters="correctedErrorRate=0.04" for high coverage (>60X) PacBio data and customized_canu_parameters="overlapper=mhap;utgReAlign=true" for high coverage (>60X) Nanopore data to improve the assembly speed. When assembling genomes with high heterozygosity, you can could set customized_canu_parameters="corOutCoverage=200;batOptions=-dg 3 -db 3 -dr 1 -ca 500 -cp 50" to avoid collasping haplotypes. As shown in these examples, more than one customized parameters can be set here as long as they are separeted by a semicolon and contained in a pair of double quotes (e.g. customized_canu_parameters="option1=XXX;option2=YYY;option3=ZZZ"). Please consult Canu's manual "http://canu.readthedocs.io/en/latest/faq.html#what-parameters-can-i-tweak" for advanced customization settings. Default = "correctedErrorRate=0.04" for the testing example. -threads=2 # The number of threads to use. Default = 2. +customized_canu_parameters="" # For assembler="canu" only. Users can set customized Canu assembly parameters here or simply leave it empty like customized_canu_parameters="" to use Canu's default assembly parameter. For example you could set customized_canu_parameters="correctedErrorRate=0.04" for high coverage (>60X) PacBio data and customized_canu_parameters="overlapper=mhap;utgReAlign=true" for high coverage (>60X) Nanopore data to improve the assembly speed. When assembling genomes with high heterozygosity, you can could set customized_canu_parameters="corOutCoverage=200;batOptions=-dg 3 -db 3 -dr 1 -ca 500 -cp 50" to avoid collasping haplotypes. As shown in these examples, more than one customized parameters can be set here as long as they are separeted by a semicolon and contained in a pair of double quotes (e.g. customized_canu_parameters="option1=XXX;option2=YYY;option3=ZZZ"). Please consult Canu's manual "http://canu.readthedocs.io/en/latest/faq.html#what-parameters-can-i-tweak" for advanced customization settings. Default = "" for the testing example. +threads=4 # The number of threads to use. Default = 4. vcf="yes" # Use "yes" if prefer to have vcf file generated to show SNP and INDEL differences between the assembled genome and the reference genome for their uniquely alignable regions. Otherwise use "no". Default = "yes". dotplot="yes" # Use "yes" if prefer to plot genome-wide dotplot based on the comparison with the reference genome below. Otherwise use "no". Default = "yes". ref_genome_raw="./../00.Ref_Genome/S288C.ASM205763v1.fa" # The file path of the raw reference genome. This is only needed when the option "dotplot=" or "vcf=" has been set as "yes". diff --git a/Project_Template/02.Long-read-based_Assembly_Polishing/LRSDAY.02.Long-read-based_Assembly_Polishing.sh b/Project_Template/02.Long-read-based_Assembly_Polishing/LRSDAY.02.Long-read-based_Assembly_Polishing.sh index d03f79c..7b4c1b4 100755 --- a/Project_Template/02.Long-read-based_Assembly_Polishing/LRSDAY.02.Long-read-based_Assembly_Polishing.sh +++ b/Project_Template/02.Long-read-based_Assembly_Polishing/LRSDAY.02.Long-read-based_Assembly_Polishing.sh @@ -7,19 +7,19 @@ source ./../../env.sh ########################################### # set project-specific variables -input_assembly="./../01.Long-read-based_Genome_Assembly/SK1.assembly.raw.fa" # The file path of the input raw long-read-based assembly for polishing. -long_reads_in_fastq="./../00.Long_Reads/SK1.filtered_subreads.fastq.gz" # The file path of the long-read fastq file. +prefix="SK1" # The file name prefix for the processing sample. Default = "SK1" for the testing example. + +input_assembly="./../01.Long-read-based_Genome_Assembly/$prefix.assembly.raw.fa" # The file path of the input raw long-read-based assembly for polishing. +long_reads_in_fastq="./../00.Long_Reads/$prefix.filtlong.fastq.gz" # The file path of the long-read fastq file. long_read_technology="pacbio" # The used long-read sequencing technology. Use "pacbio" or "nanopore". Default = "pacbio" for the testing example. ### When long_read_technology="pacbio" #### -pacbio_bam_fofn_file="./../00.Long_Reads/pacbio_fofn_files/SK1.merged.bam.fofn" # The file path to the fofn file containing the absolute path to the PacBio bam files. BAM file is the native output format for PacBio Sequel platform but this is not the case for the RSII platform. For RSII data, the bax2bam file conversion is needed. This can be done by running the LRSDAY.00.Retrieve_Sample_PacBio_Reads.sh script in the 00.Long_Reads directory. +pacbio_bam_fofn_file="./../00.Long_Reads/pacbio_fofn_files/$prefix.merged.bam.fofn" # The file path to the fofn file containing the absolute path to the PacBio bam files. BAM file is the native output format for PacBio Sequel platform but this is not the case for the RSII platform. For RSII data, the bax2bam file conversion is needed. This can be done by running the LRSDAY.00.Retrieve_Sample_PacBio_Reads.sh script in the 00.Long_Reads directory. pacbio_reads_type="RSII" # The sequencing machine used to generate the input PacBio reads . Use "RSII" or "Sequel". Default = "RSII" for the testing example. ### When long_read_technology="nanopore" ### -nanopore_fast5_files="./../00.Long_Reads/nanopore_fast5_files" # The file path to the directory containing raw Oxford Nanopore FAST5 files. -nanopore_basecalling_sequencing_summary="./../00.Long_Reads/nanopore_fast5_files/sequencing_summary.txt" # The file path to the nanopore albacore/guppy basecaller sequencing summary output. This summary file is not necessary but it can help the polishing step to run much faster when available. When this file is unavailable, set nanopore_albacore_sequencing_summary="". - -prefix="SK1" # The file name prefix for the output files. Default = "SK1" for the testing example. +nanopore_basecalled_fast5_files="./../00.Long_Reads/nanopore_basecalled_fast5_files" # The file path to the directory containing the basecalled Oxford Nanopore FAST5 files. +nanopore_basecalling_sequencing_summary="./../00.Long_Reads/nanopore_basecalled_fast5_files/sequencing_summary.txt" # The file path to the nanopore albacore/guppy basecaller sequencing summary output. This summary file is not necessary but it can help the polishing step to run much faster when available. When this file is unavailable, set nanopore_albacore_sequencing_summary="". threads=1 # The number of threads to use. Default = "1". ploidy=1 # The ploidy status of the sequenced genome. use "1" for haploid genome and "2" for diploid genome. Default = "1" for the testing example. @@ -82,15 +82,15 @@ else source $nanopolish_dir/py3_virtualenv_nanopolish/bin/activate if [[ -z "$nanopore_basecalling_sequencing_summary" ]] then - $nanopolish_dir/nanopolish index -d $nanopore_fast5_files $long_reads_in_fastq + $nanopolish_dir/nanopolish index -d $nanopore_basecalled_fast5_files $long_reads_in_fastq else - $nanopolish_dir/nanopolish index -d $nanopore_fast5_files -s $nanopore_basecalling_sequencing_summary $long_reads_in_fastq + $nanopolish_dir/nanopolish index -d $nanopore_basecalled_fast5_files -s $nanopore_basecalling_sequencing_summary $long_reads_in_fastq fi for i in $(seq 1 1 $rounds_of_successive_polishing) do java -Djava.io.tmpdir=./tmp -Dpicard.useLegacyParser=false -XX:ParallelGCThreads=$threads -jar $picard_dir/picard.jar CreateSequenceDictionary -REFERENCE $prefix.assembly.tmp.fa -OUTPUT $prefix.assembly.tmp.dict $minimap2_dir/minimap2 -ax map-ont $prefix.assembly.tmp.fa $long_reads_in_fastq > $prefix.minimap2.round_${i}.sam - java -Djava.io.tmpdir=./tmp -Dpicard.useLegacyParser=false -XX:ParallelGCThreads=$threads -jar $picard_dir/picard.jar SortSam -INPUT $prefix.minimap2.round_${i}.sam -OUTPUT $prefix.minimap2.round_${i}.bam -SORT_ORDER coordinate + java -Djava.io.tmpdir=./tmp -Dpicard.useLegacyParser=false -XX:ParallelGCThreads=$threads -jar $picard_dir/picard.jar SortSam -INPUT $prefix.minimap2.round_${i}.sam -OUTPUT $prefix.minimap2.round_${i}.bam -SORT_ORDER coordinate -VALIDATION_STRINGENCY LENIENT -MAX_RECORDS_IN_RAM 50000 $samtools_dir/samtools index $prefix.minimap2.round_${i}.bam rm $prefix.minimap2.round_${i}.sam python3 $nanopolish_dir/scripts/nanopolish_makerange.py $prefix.assembly.tmp.fa | $parallel_dir/parallel --results ${prefix}_nanopolish_round_${i}_results -P 1 \ diff --git a/Project_Template/03.Illumina-read-based_Assembly_Polishing/LRSDAY.03.Illumina-read-based_Assembly_Polishing.sh b/Project_Template/03.Illumina-read-based_Assembly_Polishing/LRSDAY.03.Illumina-read-based_Assembly_Polishing.sh index a14b0b8..bfe2117 100755 --- a/Project_Template/03.Illumina-read-based_Assembly_Polishing/LRSDAY.03.Illumina-read-based_Assembly_Polishing.sh +++ b/Project_Template/03.Illumina-read-based_Assembly_Polishing/LRSDAY.03.Illumina-read-based_Assembly_Polishing.sh @@ -7,8 +7,8 @@ source ./../../env.sh ########################################### # set project-specific variables -input_assembly="./../02.Long-read-based_Assembly_Polishing/SK1.assembly.long_read_polished.fa" # The file path of the input assembly before Illumina-based correction -prefix="SK1" # The file name prefix for the output files. +prefix="SK1" # The file name prefix for the processing sample. Default = "SK1" for the testing example. +input_assembly="./../02.Long-read-based_Assembly_Polishing/$prefix.assembly.long_read_polished.fa" # The file path of the input assembly before Illumina-based correction trim_illumina_reads="yes" # Whether to trim the input Illumina reads. Use "yes" if prefer to perform trimming, otherwise use "no". Default = "yes". rounds_of_successive_polishing=1 # The number of total rounds of Illumina-read-based assembly polishing. Default = "1" for the testing example. threads=1 # The number of threads to use. Default = "1". @@ -117,13 +117,13 @@ do # GATK local realign # find realigner targets - java -Djava.io.tmpdir=./tmp -XX:ParallelGCThreads=$threads -jar $gatk_dir/GenomeAnalysisTK.jar \ + java -Djava.io.tmpdir=./tmp -XX:ParallelGCThreads=$threads -jar $gatk3_dir/GenomeAnalysisTK.jar \ -R refseq.tmp.fa \ -T RealignerTargetCreator \ -I $prefix.round_${i}.dedup.bam \ -o $prefix.round_${i}.realn.intervals # run realigner - java -Djava.io.tmpdir=./tmp -XX:ParallelGCThreads=$threads -jar $gatk_dir/GenomeAnalysisTK.jar \ + java -Djava.io.tmpdir=./tmp -XX:ParallelGCThreads=$threads -jar $gatk3_dir/GenomeAnalysisTK.jar \ -R refseq.tmp.fa \ -T IndelRealigner \ -I $prefix.round_${i}.dedup.bam \ diff --git a/Project_Template/04.Reference-guided_Assembly_Scaffolding/LRSDAY.04.Reference-guided_Assembly_Scaffolding.sh b/Project_Template/04.Reference-guided_Assembly_Scaffolding/LRSDAY.04.Reference-guided_Assembly_Scaffolding.sh index 1a011ef..bb32eff 100755 --- a/Project_Template/04.Reference-guided_Assembly_Scaffolding/LRSDAY.04.Reference-guided_Assembly_Scaffolding.sh +++ b/Project_Template/04.Reference-guided_Assembly_Scaffolding/LRSDAY.04.Reference-guided_Assembly_Scaffolding.sh @@ -7,8 +7,8 @@ PATH=$gnuplot_dir:$hal_dir:$PATH ####################################### # set project-specific variables -input_assembly="./../03.Illumina-read-based_Assembly_Polishing/SK1.assembly.illumina_read_polished.fa" # The file path of the input genome assembly. -prefix="SK1" # The file name prefix for the output files. +prefix="SK1" # The file name prefix for processing sample. Default = "SK1" for the testing example. +input_assembly="./../03.Illumina-read-based_Assembly_Polishing/$prefix.assembly.illumina_read_polished.fa" # The file path of the input genome assembly. ref_genome_raw="./../00.Ref_Genome/S288C.ASM205763v1.fa" # The file path of the raw reference genome. ref_genome_noncore_masked="./../00.Ref_Genome/S288C.ASM205763v1.noncore_masked.fa" # The file path of the specially masked reference genome where subtelomeres and chromosome-ends were hard masked. When the subtelomere/chromosome-end information is unavailable for the organism that you are interested in, you can just put the path of the raw reference genome assembly here. chrMT_tag="chrMT" # The sequence name for the mitochondrial genome in the raw reference genome file, if there are multiple reference mitochondrial genomes that you want to check, use a single ';' to separate them. e.g. "Sc_chrMT;Sp_chrMT". Default = "chrMT". diff --git a/Project_Template/05.Centromere_Identity_Profiling/LRSDAY.05.Centromere_Identity_Profiling.sh b/Project_Template/05.Centromere_Identity_Profiling/LRSDAY.05.Centromere_Identity_Profiling.sh index 440d741..663c004 100755 --- a/Project_Template/05.Centromere_Identity_Profiling/LRSDAY.05.Centromere_Identity_Profiling.sh +++ b/Project_Template/05.Centromere_Identity_Profiling/LRSDAY.05.Centromere_Identity_Profiling.sh @@ -6,8 +6,8 @@ source ./../../env.sh ####################################### # set project-specific variables -genome="./../04.Reference-guided_Assembly_Scaffolding/SK1.assembly.ref_based_scaffolded.fa" # The file path of the input genome assembly -prefix="SK1" # The file name prefix for the output files +prefix="SK1" # The file name prefix for the processing sample. Default = "SK1" for the testing example. +genome="./../04.Reference-guided_Assembly_Scaffolding/$prefix.assembly.ref_based_scaffolded.fa" # The file path of the input genome assembly query="$LRSDAY_HOME/data/S288C.centromere.fa" # S. cerevisiae S288C reference centromere sequences based on Yue et al. (2017) Nature Genetics. debug="no" # Whether to keep intermediate files for debugging. Use "yes" if prefer to keep intermediate files, otherwise use "no". Default = "no". diff --git a/Project_Template/06.Mitochondrial_Genome_Assembly_Improvement/LRSDAY.06.Mitochondrial_Genome_Assembly_Improvement.sh b/Project_Template/06.Mitochondrial_Genome_Assembly_Improvement/LRSDAY.06.Mitochondrial_Genome_Assembly_Improvement.sh index f936551..e0d5574 100755 --- a/Project_Template/06.Mitochondrial_Genome_Assembly_Improvement/LRSDAY.06.Mitochondrial_Genome_Assembly_Improvement.sh +++ b/Project_Template/06.Mitochondrial_Genome_Assembly_Improvement/LRSDAY.06.Mitochondrial_Genome_Assembly_Improvement.sh @@ -7,9 +7,9 @@ PATH=$bwa_dir:$samtools_dir:$gnuplot_dir:$canu_dir:$mummer_dir:$spades_dir:$prod ########################################### # set project-specific variables -genome="./../04.Reference-guided_Assembly_Scaffolding/SK1.assembly.ref_based_scaffolded.fa" # The file path of the input genome assembly -prefix="SK1" # The file name prefix for the output files -mt_contig_list="./../04.Reference-guided_Assembly_Scaffolding/SK1.assembly.ref_based_scaffolded.mt_contig.list" # The mitochodnrial contig list generated by the module 04.Reference-guided_Assembly_Scaffolding. +prefix="SK1" # The file name prefix for the processing sample. Default = "SK1" for the testing example. +genome="./../04.Reference-guided_Assembly_Scaffolding/$prefix.assembly.ref_based_scaffolded.fa" # The file path of the input genome assembly +mt_contig_list="./../04.Reference-guided_Assembly_Scaffolding/$prefix.assembly.ref_based_scaffolded.mt_contig.list" # The mitochodnrial contig list generated by the module 04.Reference-guided_Assembly_Scaffolding. gene_start="$LRSDAY_HOME/data/ATP6.cds.fa" # A FASTA file containing the CDS sequence of the mitochodnrial gene to be used as the starting point of the mitochondrial assembly. This can be set to any gene as long as a fasta file containing the DNA sequence of the gene is provided. ref_genome_raw="./../00.Ref_Genome/S288C.ASM205763v1.fa" # The file path of the raw reference genome chrMT_tag="chrMT" # The sequence name for the mitochondrial genome in the raw reference genome file, if there are multiple reference mitochondrial genomes that you want to check, use a single ';' to separate them. e.g. "Sc_chrMT;Sp_chrMT". Default = "chrMT". diff --git a/Project_Template/07.Supervised_Final_Assembly/LRSDAY.07.Supervised_Final_Assembly.1.sh b/Project_Template/07.Supervised_Final_Assembly/LRSDAY.07.Supervised_Final_Assembly.1.sh index 6601cce..f1a95ab 100755 --- a/Project_Template/07.Supervised_Final_Assembly/LRSDAY.07.Supervised_Final_Assembly.1.sh +++ b/Project_Template/07.Supervised_Final_Assembly/LRSDAY.07.Supervised_Final_Assembly.1.sh @@ -6,8 +6,9 @@ source ./../../env.sh ####################################### # set project-specific variables -genome="./../06.Mitochondrial_Genome_Assembly_Improvement/SK1.assembly.mt_improved.fa" # The file name of the input genome assembly. -prefix="SK1" # The file name prefix for the output files. +prefix="SK1" # The file name prefix for the processing sample. Default = "SK1" for the testing example. +genome="./../06.Mitochondrial_Genome_Assembly_Improvement/$prefix.assembly.mt_improved.fa" # The file name of the input genome assembly. + ####################################### # process the pipeline diff --git a/Project_Template/07.Supervised_Final_Assembly/LRSDAY.07.Supervised_Final_Assembly.2.sh b/Project_Template/07.Supervised_Final_Assembly/LRSDAY.07.Supervised_Final_Assembly.2.sh index a7e3aec..4332ef0 100755 --- a/Project_Template/07.Supervised_Final_Assembly/LRSDAY.07.Supervised_Final_Assembly.2.sh +++ b/Project_Template/07.Supervised_Final_Assembly/LRSDAY.07.Supervised_Final_Assembly.2.sh @@ -7,8 +7,8 @@ PATH=$gnuplot_dir:$PATH ####################################### # set project-specific variables -genome="./../06.Mitochondrial_Genome_Assembly_Improvement/SK1.assembly.mt_improved.fa" # The file path of the input genome assembly. -prefix="SK1" # The file name prefix for the output files. +prefix="SK1" # The file name prefix for the processing sample. Default = "SK1" for the testing example. +genome="./../06.Mitochondrial_Genome_Assembly_Improvement/$prefix.assembly.mt_improved.fa" # The file path of the input genome assembly. vcf="yes" # Whether to generate a vcf file generated to show SNP and INDEL differences between the assembled genome and the reference genome for their uniquely alignable regions. Use "yes" if prefer to have vcf file generated to show SNP and INDEL differences between the assembled genome and the reference genome. Default = "yes". dotplot="yes" # Whether to plot genome-wide dotplot based on the comparison with the reference genome below. Use "yes" if prefer to plot, otherwise use "no". Default = "yes". ref_genome_raw="./../00.Ref_Genome/S288C.ASM205763v1.fa" # The path of the raw reference genome, only needed when dotplot="yes" or vcf="yes". diff --git a/Project_Template/08.Centromere_Annotation/LRSDAY.08.Centromere_Annotation.sh b/Project_Template/08.Centromere_Annotation/LRSDAY.08.Centromere_Annotation.sh index 658b9da..8a731c7 100755 --- a/Project_Template/08.Centromere_Annotation/LRSDAY.08.Centromere_Annotation.sh +++ b/Project_Template/08.Centromere_Annotation/LRSDAY.08.Centromere_Annotation.sh @@ -6,8 +6,8 @@ source ./../../env.sh ####################################### # set project-specific variables -genome="./../07.Supervised_Final_Assembly/SK1.assembly.final.fa" # The path of the input genome assembly. -prefix="SK1" # The file name prefix for the output files. +prefix="SK1" # The file name prefix for the processing sample. Default = "SK1" for the testing example. +genome="./../07.Supervised_Final_Assembly/$prefix.assembly.final.fa" # The path of the input genome assembly. query="$LRSDAY_HOME/data/S288C.centromere.fa" # The S. cerevisiae S288C reference centromere sequences based on Yue et al. (2017) Nature Genetics. debug="no" # Whether to keep intermediate files for debugging. Use "yes" if prefer to keep intermediate files, otherwise use "no". Default = "no". diff --git a/Project_Template/09.Nuclear_Gene_Annotation/LRSDAY.09.Nuclear_Gene_Annotation.sh b/Project_Template/09.Nuclear_Gene_Annotation/LRSDAY.09.Nuclear_Gene_Annotation.sh index 8558a81..444ef7a 100755 --- a/Project_Template/09.Nuclear_Gene_Annotation/LRSDAY.09.Nuclear_Gene_Annotation.sh +++ b/Project_Template/09.Nuclear_Gene_Annotation/LRSDAY.09.Nuclear_Gene_Annotation.sh @@ -6,8 +6,8 @@ source ./../../env.sh ####################################### # set project-specific variables -genome_assembly="./../07.Supervised_Final_Assembly/SK1.assembly.final.fa" # The file path of the input genome assembly. -genome_tag="SK1" # The file name prefix for the output files. +prefix="SK1" # The file name prefix for the processing sample. Default = "SK1" for the testing example. +genome_assembly="./../07.Supervised_Final_Assembly/$prefix.assembly.final.fa" # The file path of the input genome assembly. chrMT_tag="chrMT" # The sequence name for the mitochondrial genome in the final assembly. If there are multiple sequences, use a single ';' to separate them. e.g. "chrMT_part1;chrMT_part2". Default = "chrMT". threads=1 # The number of threads to use. Default = "1". maker_opts="$LRSDAY_HOME/misc/maker_opts.customized.ctl" # The configuration file for MAKER. You can edit this file if you have native transciptome/EST data for the strain/species that you sequenced or if you want to adapt it to annotate other eukaryotic organisms. Otherwise, please keep it unchanged. Please note that if this file is in the same directory where this bash script is executed, the file name cannot be "maker_opts.ctl". @@ -16,8 +16,10 @@ debug="no" # use "yes" if prefer to keep intermediate files, otherwise use "no". ####################################### # process the pipeline -echo "genome_assembly=$genome_assembly" +genome_tag="$prefix" + echo "genome_tag=$genome_tag" +echo "genome_assembly=$genome_assembly" # convert the genome assembly file to all uppercases diff --git a/Project_Template/10.Mitochondrial_Gene_Annotation/LRSDAY.10.Mitochondrial_Gene_Annotation.sh b/Project_Template/10.Mitochondrial_Gene_Annotation/LRSDAY.10.Mitochondrial_Gene_Annotation.sh index 2839f07..b37fec6 100755 --- a/Project_Template/10.Mitochondrial_Gene_Annotation/LRSDAY.10.Mitochondrial_Gene_Annotation.sh +++ b/Project_Template/10.Mitochondrial_Gene_Annotation/LRSDAY.10.Mitochondrial_Gene_Annotation.sh @@ -18,11 +18,10 @@ export PATH="$flip_dir:$blast_dir:$muscle_dir:$umac_dir:$hmmer_dir:$erpin_dir:$t ####################################### # set project-specific variables -genome="./../07.Supervised_Final_Assembly/SK1.assembly.final.fa" # The file path of the input genome assembly. +prefix="SK1" # The file name prefix for the processing sample. Default = "SK1" for the testing example. +genome="./../07.Supervised_Final_Assembly/$prefix.assembly.final.fa" # The file path of the input genome assembly. chrMT_tag="chrMT" # The sequence name for the mitochondrial genome in the input genome assembly, if there are multiple corresponding contigs/scaffolds, use a single ';' to separate them. e.g. "chrMT_1;chrMT_2". Default = "chrMT". genetic_code_table=3 # The NCBI genetic code table (https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi) for the annotated mitochondrial genome. Default = 3 (i.e. Yeast Mitochondria) - -prefix="SK1" # The file name prefix for the output files. debug="no" # Whehter to keep intermediate files for debugging. Use "yes" if prefer to keep intermediate files, otherwise use "no". Default = "no". ###################################### diff --git a/Project_Template/11.TE_Annotation/LRSDAY.11.TE_Annotation.sh b/Project_Template/11.TE_Annotation/LRSDAY.11.TE_Annotation.sh index d4db5cc..520fd87 100755 --- a/Project_Template/11.TE_Annotation/LRSDAY.11.TE_Annotation.sh +++ b/Project_Template/11.TE_Annotation/LRSDAY.11.TE_Annotation.sh @@ -6,8 +6,8 @@ source ./../../env.sh ####################################### # set project-specific variables -genome="./../07.Supervised_Final_Assembly/SK1.assembly.final.fa" # The file path of the input genome assembly. -prefix="SK1" # The file name prefix for the output files. +prefix="SK1" # The file name prefix for the processing sample. Default = "SK1" for the testing example. +genome="./../07.Supervised_Final_Assembly/$prefix.assembly.final.fa" # The file path of the input genome assembly. threads=1 # The number of threads to use. Default = "1". debug="no" # Whether to keep intermediate files for debugging. Use "yes" if prefer to keep intermediate files, otherwise use "no". Default = "no". diff --git a/Project_Template/12.Core_X_Element_Annotation/LRSDAY.12.Core_X_Element_Annotation.sh b/Project_Template/12.Core_X_Element_Annotation/LRSDAY.12.Core_X_Element_Annotation.sh index 7b01a17..5c4f6f6 100755 --- a/Project_Template/12.Core_X_Element_Annotation/LRSDAY.12.Core_X_Element_Annotation.sh +++ b/Project_Template/12.Core_X_Element_Annotation/LRSDAY.12.Core_X_Element_Annotation.sh @@ -6,8 +6,8 @@ source ./../../env.sh ####################################### # set project-specific variables -genome="./../07.Supervised_Final_Assembly/SK1.assembly.final.fa" # The file path of the input genome assembly. -prefix="SK1" # The file name prefix for the output files. +prefix="SK1" # The file name prefix for the processing sample. Default = "SK1" for the testing example. +genome="./../07.Supervised_Final_Assembly/$prefix.assembly.final.fa" # The file path of the input genome assembly. debug="no" # Whether to keep intermediate files for debugging. Use "yes" if prefer to keep intermediate files, otherwise use "no". Default = "no". ####################################### diff --git a/Project_Template/13.Y_Prime_Element_Annotation/LRSDAY.13.Y_Prime_Element_Annotation.sh b/Project_Template/13.Y_Prime_Element_Annotation/LRSDAY.13.Y_Prime_Element_Annotation.sh index eec3831..52fcfd5 100755 --- a/Project_Template/13.Y_Prime_Element_Annotation/LRSDAY.13.Y_Prime_Element_Annotation.sh +++ b/Project_Template/13.Y_Prime_Element_Annotation/LRSDAY.13.Y_Prime_Element_Annotation.sh @@ -6,8 +6,8 @@ source ./../../env.sh ####################################### # set project-specific variables -genome="./../07.Supervised_Final_Assembly/SK1.assembly.final.fa" # The file path of the input genome assembly. -prefix="SK1" # The file name prefix for the output files. +prefix="SK1" # The file name prefix for the processing sample. Default = "SK1" for the testing example. +genome="./../07.Supervised_Final_Assembly/$prefix.assembly.final.fa" # The file path of the input genome assembly. debug="no" # Whether to keep intermediate files for debugging. Use "yes" if prefer to keep intermediate files, otherwise use "no". Default = "no". ####################################### diff --git a/Project_Template/14.Gene_Orthology_Identification/LRSDAY.14.Gene_Orthology_Identification.sh b/Project_Template/14.Gene_Orthology_Identification/LRSDAY.14.Gene_Orthology_Identification.sh index 67e886c..2fc5a3d 100755 --- a/Project_Template/14.Gene_Orthology_Identification/LRSDAY.14.Gene_Orthology_Identification.sh +++ b/Project_Template/14.Gene_Orthology_Identification/LRSDAY.14.Gene_Orthology_Identification.sh @@ -6,16 +6,16 @@ source ./../../env.sh ####################################### # set project-specific variables -prefix="SK1" # The file name prefix for the output files. +prefix="SK1" # The file name prefix for the processing sample. Default = "SK1" for the testing example. threads=1 # The number of threads to use. Default = "1". -input_nuclear_gene_gff="./../09.Nuclear_Gene_Annotation/SK1.nuclear_genome.EVM.gff3" # The file path of the input nuclear gene gff3 file generated in the task 09.Nuclear_Gene_Annotation. Set this variable as well as the following two variables to "" if you want to skip this step for the nuclear gene annotation. -query_nuclear_gene_PoFF_faa="./../09.Nuclear_Gene_Annotation/SK1.nuclear_genome.EVM.PoFF.faa" # The file path of the PoFF.faa file generated in the task 09.Nuclear_Gene_Annotation. -query_nuclear_gene_PoFF_gff="./../09.Nuclear_Gene_Annotation/SK1.nuclear_genome.EVM.PoFF.gff" # The file path of the PoFF.gff file generated in the task 09.Nuclear_Gene_Annotation. +input_nuclear_gene_gff="./../09.Nuclear_Gene_Annotation/$prefix.nuclear_genome.EVM.gff3" # The file path of the input nuclear gene gff3 file generated in the task 09.Nuclear_Gene_Annotation. Set this variable as well as the following two variables to "" if you want to skip this step for the nuclear gene annotation. +query_nuclear_gene_PoFF_faa="./../09.Nuclear_Gene_Annotation/$prefix.nuclear_genome.EVM.PoFF.faa" # The file path of the PoFF.faa file generated in the task 09.Nuclear_Gene_Annotation. +query_nuclear_gene_PoFF_gff="./../09.Nuclear_Gene_Annotation/$prefix.nuclear_genome.EVM.PoFF.gff" # The file path of the PoFF.gff file generated in the task 09.Nuclear_Gene_Annotation. -input_mitochondrial_gene_gff="./../10.Mitochondrial_Gene_Annotation/SK1.mitochondrial_genome.mfannot.gff3" # The file path of the input mitochondrial gene gff3 file generated in the task 10.Mitochondrial_Gene_Annotation. Set this variable as well as the following two variables to "" if you want to skip this step for the mitochondrial gene annotation. -query_mitochondrial_gene_PoFF_faa="./../10.Mitochondrial_Gene_Annotation/SK1.mitochondrial_genome.mfannot.PoFF.faa" # The file path of the PoFF.faa file generated in the task 10.Mitochondrial_Gene_Annotation. -query_mitochondrial_gene_PoFF_gff="./../10.Mitochondrial_Gene_Annotation/SK1.mitochondrial_genome.mfannot.PoFF.gff" # The file path of the PoFF.gff file generated in the task 10.Mitochondrial_Gene_Annotation. +input_mitochondrial_gene_gff="./../10.Mitochondrial_Gene_Annotation/$prefix.mitochondrial_genome.mfannot.gff3" # The file path of the input mitochondrial gene gff3 file generated in the task 10.Mitochondrial_Gene_Annotation. Set this variable as well as the following two variables to "" if you want to skip this step for the mitochondrial gene annotation. +query_mitochondrial_gene_PoFF_faa="./../10.Mitochondrial_Gene_Annotation/$prefix.mitochondrial_genome.mfannot.PoFF.faa" # The file path of the PoFF.faa file generated in the task 10.Mitochondrial_Gene_Annotation. +query_mitochondrial_gene_PoFF_gff="./../10.Mitochondrial_Gene_Annotation/$prefix.mitochondrial_genome.mfannot.PoFF.gff" # The file path of the PoFF.gff file generated in the task 10.Mitochondrial_Gene_Annotation. ref_PoFF_faa="$LRSDAY_HOME/data/SGDref.PoFF.faa" # The file path of the reference proteome file in FASTA format: for S. cerevisiae and its close relatives, you can directly use the pre-shipped file: SGDref.PoFF.faa; if you work with other organisms, you can check ProteinOrtho's manual for details on how to prepare such file. ref_PoFF_gff="$LRSDAY_HOME/data/SGDref.PoFF.gff" # The path of the reference gene GFF file in GFF format: for S. cerevisiae and its close relatives, you can directly use the pre-shipped file: SGDref.PoFF.gff; if you work with other organisms, you can check ProteinOrtho's manual for details on how to prepare such file. diff --git a/Project_Template/15.Annotation_Integration/LRSDAY.15.Annotation_Integration.sh b/Project_Template/15.Annotation_Integration/LRSDAY.15.Annotation_Integration.sh index e2930f3..306f7e8 100755 --- a/Project_Template/15.Annotation_Integration/LRSDAY.15.Annotation_Integration.sh +++ b/Project_Template/15.Annotation_Integration/LRSDAY.15.Annotation_Integration.sh @@ -6,14 +6,14 @@ source ./../../env.sh ####################################### # set project-specific variables -genome="./../07.Supervised_Final_Assembly/SK1.assembly.final.fa" # The file path of the input genome assembly. -prefix="SK1" # The file name prefix for the output file -centromere_gff3="./../08.Centromere_Annotation/SK1.centromere.gff3" # The path of the final centromere annotation gff3 generated by task 08.Centromere_Annotation_for_FinalAssembly, use "" if not existing. -TE_gff3="./../11.TE_Annotation/SK1.TE.gff3" # The file path of the TE annotation gff3 generated by task 11.TE_Annotation, use "" if not existing. -X_element_gff3="./../12.Core_X_Element_Annotation/SK1.X_element.gff3" # The file path of the X_element annotation gff3 generated by task 12.CoreX_Annotation, use "" if not existing. -Y_prime_element_gff3="./../13.Y_Prime_Element_Annotation/SK1.Y_prime_element.gff3" # The file path of the Y_prime_element annotation gff3 generated by task 13.Yprime_Annotation, use "" if not existing. -nuclear_gene_gff3="./../14.Gene_Orthology_Identification/SK1.nuclear_gene.updated.gff3" # The file path of the nuclear gene annotation gff3 generated by task 14.GeneOrthology_Identification (which is better) or 09.Nuclear_Gene_Annotation, use "" if not existing. -mitochondrial_gene_gff3="./../14.Gene_Orthology_Identification/SK1.mitochondrial_gene.updated.gff3" # The file path of the mitochondrial gene annotation gff3 generated by task 14.GeneOrthology_Identification (which is better) or 10.Mitochondrial_Gene_Annotation, use "" if not existing. +prefix="SK1" # The file name prefix for the processing sample. Default = "SK1" for the testing example. +genome="./../07.Supervised_Final_Assembly/$prefix.assembly.final.fa" # The file path of the input genome assembly. +centromere_gff3="./../08.Centromere_Annotation/$prefix.centromere.gff3" # The path of the final centromere annotation gff3 generated by task 08.Centromere_Annotation_for_FinalAssembly, use "" if not existing. +TE_gff3="./../11.TE_Annotation/$prefix.TE.gff3" # The file path of the TE annotation gff3 generated by task 11.TE_Annotation, use "" if not existing. +X_element_gff3="./../12.Core_X_Element_Annotation/$prefix.X_element.gff3" # The file path of the X_element annotation gff3 generated by task 12.CoreX_Annotation, use "" if not existing. +Y_prime_element_gff3="./../13.Y_Prime_Element_Annotation/$prefix.Y_prime_element.gff3" # The file path of the Y_prime_element annotation gff3 generated by task 13.Yprime_Annotation, use "" if not existing. +nuclear_gene_gff3="./../14.Gene_Orthology_Identification/$prefix.nuclear_gene.updated.gff3" # The file path of the nuclear gene annotation gff3 generated by task 14.GeneOrthology_Identification (which is better) or 09.Nuclear_Gene_Annotation, use "" if not existing. +mitochondrial_gene_gff3="./../14.Gene_Orthology_Identification/$prefix.mitochondrial_gene.updated.gff3" # The file path of the mitochondrial gene annotation gff3 generated by task 14.GeneOrthology_Identification (which is better) or 10.Mitochondrial_Gene_Annotation, use "" if not existing. chrMT_tag="chrMT" # The sequence name for the mitochondrial genome in the input genome assembly, if there are multiple corresponding contigs/scaffolds, use a single ';' to separate them. e.g. "chrMT_1;chrMT_2". Default = "chrMT". chrMT_genetic_code_table=3 # The NCBI genetic code table (https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi) for the annotated mitochondrial genome. Default = 3 (i.e. Yeast Mitochondria) diff --git a/README.md b/README.md index ec125f6..ffb8994 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,7 @@ Jia-Xing Yue & Gianni Liti. (2018) Long-read sequencing data analysis for yeasts Jia-Xing Yue, Jing Li, Louise Aigrain, Johan Hallin, Karl Persson, Karen Oliver, Anders Bergström, Paul Coupland, Jonas Warringer, Marco Cosentino Lagomarsino, Gilles Fischer, Richard Durbin, Gianni Liti. (2017) Contrasting evolutionary genome dynamics between domesticated and wild yeasts. *Nature Genetics*, 49:913-924. ## Release history +* v1.5.0 Released on 2019/05/13 * v1.4.0 Released on 2019/03/21 * v1.3.1 Released on 2019/01/22 * v1.3.0 Released on 2018/11/13 @@ -35,26 +36,27 @@ LRSDAY itself is distributed under the MIT license. A number of LRSDAY's depende ## Requirements ### Hardware, operating system and network -This protocol is designed for a desktop or computing server running an x86-64-bit Linux operating system. Multithreaded processors are preferred to speed up the process since many steps can be configured to use multiple threads in parallel. For assembling and analyzing the budding yeast genomes (genome size = ~12.5 Mb), at least 16 Gb of RAM and 100 Gb of free disk space are recomended. When adapted for other eukaryotic organisms with larger genome sizes, the RAM and disk space consumption will scale up, majorly during *de novo* genome assembly (performed by [Canu](https://github.com/marbl/canu) in default. Plese refer to [Canu’s manual](http://canu.readthedocs.io/en/latest/) for suggested RAM and disk space consumption for assembling large genomes. Stable Internet connection is required for the installation and configuration of LRSDAY as well as for retrieving the test data. +This protocol is designed for a desktop or computing server running an x86-64-bit Linux operating system. Multithreaded processors are preferred to speed up the process since many steps can be configured to use multiple threads in parallel. For assembling and analyzing the budding yeast genomes (genome size = ~12.5 Mb), at least 16 Gb of RAM and 100 Gb of free disk space are recomended. When adapted for other eukaryotic organisms with larger genome sizes, the RAM and disk space consumption will scale up, majorly during *de novo* genome assembly (performed by [Canu](https://github.com/marbl/canu) by default. Plese refer to [Canu’s manual](http://canu.readthedocs.io/en/latest/) for suggested RAM and disk space consumption for assembling large genomes. Stable Internet connection is required for the installation and configuration of LRSDAY as well as for retrieving the test data. ### Software or library requirements -* Bash (https://www.gnu.org/software/bash/) -* Bzip2 (http://www.bzip.org/) -* Cmake (https://cmake.org/) -* GCC and G++ v4.9.1 or newer (https://gcc.gnu.org/) -* Ghostscript (https://www.ghostscript.com) -* Git (https://git-scm.com/) -* GNU make (https://www.gnu.org/software/make/) -* Gzip (https://www.gnu.org/software/gzip/) -* Java runtime environment (JRE) v1.8.0 (https://www.java.com) -* Perl v5.12 or newer (https://www.perl.org/) -* Python v2.7.9 or newer (https://www.python.org/) -* Python v3.4 or newer (https://www.python.org/) -* Tar (https://www.gnu.org/software/tar/) -* Unzip (http://infozip.sourceforge.net/UnZip.html) -* Virtualenv v15.1.0 or newer (https://virtualenv.pypa.io) -* Wget v1.14 or newer (https://www.gnu.org/software/wget/) -* Zlib (https://zlib.net/) - +* bash (https://www.gnu.org/software/bash/) +* bzip2 and libbz2-dev (http://www.bzip.org/) +* cmake (https://cmake.org/) +* gcc and g++ v4.9.1 or newer (https://gcc.gnu.org/) +* ghostscript (https://www.ghostscript.com) +* git (https://git-scm.com/) +* gnu make (https://www.gnu.org/software/make/) +* gzip (https://www.gnu.org/software/gzip/) +* java runtime environment (JRE) v1.8.0 (https://www.java.com) +* perl v5.12 or newer (https://www.perl.org/) +* python v2.7.9 or newer (https://www.python.org/) +* python-devel +* python v3.4 or newer (https://www.python.org/) +* python3-devel +* tar (https://www.gnu.org/software/tar/) +* unzip (http://infozip.sourceforge.net/UnZip.html) +* virtualenv v15.1.0 or newer (https://virtualenv.pypa.io) +* wget (https://www.gnu.org/software/wget/) +* zlib and zlib-devel (https://zlib.net/) diff --git a/data/Proteome_DB_for_annotation.CDhit_I95.fa.gz b/data/Proteome_DB_for_annotation.CDhit_I95.fa.gz index bd079d2..eee4cab 100644 Binary files a/data/Proteome_DB_for_annotation.CDhit_I95.fa.gz and b/data/Proteome_DB_for_annotation.CDhit_I95.fa.gz differ diff --git a/data/SGDref.PoFF.faa.gz b/data/SGDref.PoFF.faa.gz index 9ecf6fe..bc36e84 100644 Binary files a/data/SGDref.PoFF.faa.gz and b/data/SGDref.PoFF.faa.gz differ diff --git a/data/SGDref.PoFF.ffn.gz b/data/SGDref.PoFF.ffn.gz index b93c93f..a0c2124 100644 Binary files a/data/SGDref.PoFF.ffn.gz and b/data/SGDref.PoFF.ffn.gz differ diff --git a/data/te_proteins.fasta.gz b/data/te_proteins.fasta.gz index d62689b..1941590 100644 Binary files a/data/te_proteins.fasta.gz and b/data/te_proteins.fasta.gz differ diff --git a/install_dependencies.sh b/install_dependencies.sh index b7f58c5..cc5490b 100755 --- a/install_dependencies.sh +++ b/install_dependencies.sh @@ -1,5 +1,5 @@ #!/bin/bash -# last update: 2019/03/07 +# last update: 2019/05/13 set -e -o pipefail @@ -19,7 +19,7 @@ if [ ! -z "$INSTALL_DEPS" ]; then xargs -a debiandeps sudo apt-get install -y fi -SRA_VERSION="2.9.2" # released on 2018.09.26 +SRA_VERSION="2.9.6" # released on 2019.03.18 SRA_DOWNLOAD_URL="https://ftp-trace.ncbi.nlm.nih.gov/sra/sdk/${SRA_VERSION}/sratoolkit.${SRA_VERSION}-centos_linux64.tar.gz" PORECHOP_VERSION="0.2.4" # @@ -36,11 +36,11 @@ MINIMAP2_DOWNLOAD_URL="https://github.com/lh3/minimap2/releases/download/v${MINI CANU_VERSION="1.8" # released on 2018.10.23 CANU_DOWNLOAD_URL="https://github.com/marbl/canu/releases/download/v${CANU_VERSION}/canu-${CANU_VERSION}.Linux-amd64.tar.xz" -FLYE_VERSION="2.4.1" # released on 2019.03.07 +FLYE_VERSION="2.4.2" # released on 2019.04.07 FLYE_DOWNLOAD_URL="https://github.com/fenderglass/Flye/archive/${FLYE_VERSION}.tar.gz" -WTDBG2_VERSION="2.3" # -WTDBG2_GITHUB_COMMIT_VERSION="59a39a6" # committed on 2019.03.06 +WTDBG2_VERSION="2.4" # +WTDBG2_GITHUB_COMMIT_VERSION="f460eee" # committed on 2019.04.17 WTDBG2_DOWNLOAD_URL="https://github.com/ruanjue/wtdbg2.git" SMARTDENOVO_VERSION="" # @@ -49,7 +49,13 @@ SMARTDENOVO_DOWNLOAD_URL="https://github.com/ruanjue/smartdenovo" RAGOUT_VERSION="2.1.1" # released on 2018.07.30 RAGOUT_DOWNLOAD_URL="https://github.com/fenderglass/Ragout/archive/${RAGOUT_VERSION}.tar.gz" -# GUPPY_VERSION="2.3.5" # released on 2019.02.26 + +GUPPY_VERSION="2.3.5" # released on 2019.02.26 +GUPPY_DOWNLOAD_URL="https://mirror.oxfordnanoportal.com/software/analysis/ont-guppy-cpu_${GUPPY_VERSION}_linux64.tar.gz" + +NANOPLOT_VERSION="1.0.0" # released on 2017.11.04 +NANOPLOT_DOWNLOAD_URL="https://github.com/wdecoster/NanoPlot.git" + # QUAST_VERSION="5.0.1" # one of its dependency needs "csh" to be pre-installed HDF_VERSION="1.10.1" # @@ -94,8 +100,8 @@ CIRCLATOR_DOWNLOAD_URL="https://github.com/sanger-pathogens/circlator/archive/v$ TRIMMOMATIC_VERSION="0.38" # TRIMMOMATIC_DOWNLOAD_URL="http://www.usadellab.org/cms/uploads/supplementary/Trimmomatic/Trimmomatic-${TRIMMOMATIC_VERSION}.zip" -#GATK_VERSION="3.6-6" # -#GATK_DOWNLOAD_URL="https://github.com/broadgsa/gatk/archive/${GATK_VERSION}.tar.gz" +GATK3_VERSION="3.6-6" # +GATK3_DOWNLOAD_URL="https://ftp-trace.ncbi.nlm.nih.gov/sra/sdk/${SRA_VERSION}/GenomeAnalysisTK.jar" PICARD_VERSION="2.18.23" # released on 2019.02.25 PICARD_DOWNLOAD_URL="https://github.com/broadinstitute/picard/releases/download/${PICARD_VERSION}/picard.jar" @@ -240,14 +246,14 @@ download () { url=$1 download_location=$2 echo "Downloading $url to $download_location" - wget -nv --no-check-certificate $url -O $download_location + wget --no-check-certificate $url -O $download_location } download_and_extract() { url=$1 download_location=$2 echo "Downloading $url to $download_location" - wget -nv --no-check-certificate $url -O $download_location + wget --no-check-certificate $url -O $download_location if [[ $download_location =~ \.bz2$ ]]; then extract_command="tar -xjf" elif [[ $download_location =~ \.xz$ || $download_location =~ \.tar$ ]]; then @@ -281,7 +287,7 @@ cpanm_dir=$build_dir/cpanm if [ ! -e "$build_dir/cpanm" ]; then mkdir -p $build_dir/cpanm cd $cpanm_dir - wget -nv --no-check-certificate -O - https://cpanmin.us/ > cpanm + wget --no-check-certificate -O - https://cpanmin.us/ > cpanm chmod +x cpanm mkdir -p perlmods fi @@ -319,7 +325,6 @@ if [ -z $(check_installed $porechop_dir) ]; then note_installed $porechop_dir fi - # --------------- Filtlong ------------------ filtlong_dir="$build_dir/Filtlong/bin" if [ -z $(check_installed $filtlong_dir) ]; then @@ -433,7 +438,6 @@ if [ -z $(check_installed "sonLib") ]; then note_installed "$build_dir/sonLib" fi - # ---------------- HAL ------------------- hal_dir="$build_dir/hal/bin" if [ -z $(check_installed $hal_dir) ]; then @@ -475,30 +479,45 @@ if [ -z $(check_installed $gnuplot_dir) ]; then fi PATH="$gnuplot_dir:${PATH}" -# # --------------- Guppy -------------------- -# cd $build_dir -# echo "Download Guppy-v${GUPPY_VERSION}" -# download $GUPPY_DOWNLOAD_URL "ont-guppy-cpu_${GUPPY_VERSION}_linux64.tar.gz" -# tar -xzf ont-guppy-cpu_${GUPPY_VERSION}_linux64.tar.gz -# guppy_dir="$build_dir/ont-guppy-cpu/bin" -# rm "ont-guppy-cpu_${GUPPY_VERSION}_linux64.tar.gz" +# --------------- Guppy -------------------- +guppy_dir="$build_dir/ont-guppy-cpu/bin" +if [ -z $(check_installed $guppy_dir) ]; then + cd $build_dir + echo "Download Guppy-v${GUPPY_VERSION}" + download_and_extract $GUPPY_DOWNLOAD_URL "ont-guppy-cpu_${GUPPY_VERSION}_linux64.tar.gz" + note_installed $guppy_dir +fi -# # ------------- QUAST -------------------- -# cd $build_dir -# echo "Download QUAST-v${QUAST_VERSION}" -# download $QUAST_DOWNLOAD_URL "QUAST-${QUAST_VERSION}.tar.gz" -# tar -xzf QUAST-${QUAST_VERSION}.tar.gz +# --------------- Nanoplot -------------------- +nanoplot_dir="$build_dir/py3_virtualenv_nanoplot/bin" +if [ -z $(check_installed $nanoplot_dir) ]; then + cd $build_dir + virtualenv -p $(which python3) py3_virtualenv_nanoplot + source py3_virtualenv_nanoplot/bin/activate + py3_virtualenv_nanoplot/bin/pip install --upgrade pip + py3_virtualenv_nanoplot/bin/pip install --upgrade setuptools + py3_virtualenv_nanoplot/bin/pip install numpy + py3_virtualenv_nanoplot/bin/pip install NanoPlot + deactivate + note_installed $nanoplot_dir +fi + +# ------------- QUAST -------------------- # quast_dir="$build_dir/quast-${QUAST_VERSION}" -# cd $quast_dir -# virtualenv -p $(which python3) py3_virtualenv_quast -# source py3_virtualenv_quast/bin/activate -# py3_virtualenv_quast/bin/pip install joblib -# py3_virtualenv_quast/bin/pip install simplejson -# py3_virtualenv_quast/bin/python3 -mpip install -U matplotlib -# py3_virtualenv_quast/bin/python3 ./setup.py install -# deactivate -# cd .. -# rm QUAST-${QUAST_VERSION}.tar.gz +# if [ -z $(check_installed $quast_dir) ]; then +# cd $build_dir +# echo "Download QUAST-v${QUAST_VERSION}" +# download_and_extract $QUAST_DOWNLOAD_URL "QUAST-${QUAST_VERSION}.tar.gz" +# cd $quast_dir +# virtualenv -p $(which python3) py3_virtualenv_quast +# source py3_virtualenv_quast/bin/activate +# py3_virtualenv_quast/bin/pip install joblib +# py3_virtualenv_quast/bin/pip install simplejson +# py3_virtualenv_quast/bin/python3 -mpip install -U matplotlib +# py3_virtualenv_quast/bin/python3 ./setup.py install +# deactivate +# note_installed $quast_dir +# fi # --------------- mummer ------------------ mummer_dir="$build_dir/mummer-${MUMMER_VERSION}" @@ -866,19 +885,18 @@ if [ -z $(check_installed $proteinortho_dir) ]; then note_installed $proteinortho_dir fi -# --------------- GATK ------------------ -gatk_dir="$build_dir/GATK3" -if [ -z $(check_installed $gatk_dir) ]; then +# --------------- GATK3 ------------------ +gatk3_dir="$build_dir/GATK3" +if [ -z $(check_installed $gatk3_dir) ]; then cd $build_dir - echo "Create GATK3 folder for users' manual installation" + echo "Create the GATK3 folder for installation" mkdir GATK3 cd GATK3 - wget -nv --no-check-certificate https://ftp-trace.ncbi.nlm.nih.gov/sra/sdk/${SRA_VERSION}/GenomeAnalysisTK.jar + wget -nv --no-check-certificate $GATK3_DOWNLOAD_URL chmod 755 GenomeAnalysisTK.jar - note_installed $gatk_dir + note_installed $gatk3_dir fi - # --------------- MAKER ----------------- maker_dir="$build_dir/maker/bin" if [ -z $(check_installed $maker_dir) ]; then @@ -1135,7 +1153,8 @@ echo "export canu_dir=${canu_dir}" >> env.sh echo "export flye_dir=${flye_dir}" >> env.sh echo "export wtdbg2_dir=${wtdbg2_dir}" >> env.sh echo "export smartdenovo_dir=${smartdenovo_dir}" >> env.sh -# echo "export guppy_dir=${guppy_dir}" >> env.sh +echo "export guppy_dir=${guppy_dir}" >> env.sh +echo "export nanoplot_dir=${nanoplot_dir}" >> env.sh # echo "export quast_dir=${quast_dir}" >> env.sh echo "export ragout_dir=${ragout_dir}" >> env.sh echo "export hdf_dir=${hdf_dir}" >> env.sh @@ -1175,7 +1194,7 @@ echo "export evm_dir=${evm_dir}" >> env.sh echo "export EVM_HOME=${evm_dir}" >> env.sh echo "export maker_dir=${maker_dir}" >> env.sh echo "export proteinortho_dir=${proteinortho_dir}" >> env.sh -echo "export gatk_dir=${gatk_dir}" >> env.sh +echo "export gatk3_dir=${gatk3_dir}" >> env.sh echo "export ucsc_dir=${ucsc_dir}" >> env.sh echo "export miniconda2_dir=${miniconda2_dir}" >> env.sh echo "export conda_pacbio_dir=${conda_pacbio_dir}" >> env.sh diff --git a/pipelines/LRSDAY.00.Long_Reads_Preprocessing.sh b/pipelines/LRSDAY.00.Long_Reads_Preprocessing.sh index 2fdbe69..926db3f 100755 --- a/pipelines/LRSDAY.00.Long_Reads_Preprocessing.sh +++ b/pipelines/LRSDAY.00.Long_Reads_Preprocessing.sh @@ -7,13 +7,13 @@ source ./../../env.sh ####################################### # set project-specific variables -prefix="YGL3210" # The file name prefix for the output files -reads="./../00.Long_Reads/YGL3210.fq.gz" # The file path of the long reads file (in fastq or fastq.gz format). -reads_type="nanopore-raw" # The long reads data type: "pacbio-raw" or "pacbio-corrected" or "nanopore-raw" or "nanopore-corrected". -run_filtering="yes" # Whether to filter the reads: "yes" or "no". Default = "yes". +prefix="SK1" # The file name prefix for the processing sample. Default = "SK1" for the testing example. +reads="./../00.Long_Reads/$prefix.filtered_subreads.fastq.gz" # The file path of the long reads file (in fastq or fastq.gz format). +reads_type="pacbio-raw" # The long reads data type: "pacbio-raw" or "pacbio-corrected" or "nanopore-raw" or "nanopore-corrected". +run_filtering="yes" # Whether to filter and downsample the reads: "yes" or "no". Default = "yes". genome_size="12500000" # The haploid genome size (in bp) of sequenced organism. Default = "12500000" (i.e. 12.5 Mb for the budding yeast S. cereviaie genome). This is used to calculate targeted sequencing coverage after read filtering (see below). -post_filtering_coverage="40" # Targeted sequencing coverage after read filtering. Default = "40" (i.e. 40x coverage). -threads=1 # The number of threads to use. Default = "1". +post_filtering_coverage="60" # Targeted sequencing coverage after read filtering and downsampling. Default = "60" (i.e. 60x coverage). +threads=4 # The number of threads to use. Default = "1". ####################################### # process the pipeline diff --git a/pipelines/LRSDAY.00.Nanopore_Reads_Basecalling_and_Demultiplexing.sh b/pipelines/LRSDAY.00.Nanopore_Reads_Basecalling_and_Demultiplexing.sh new file mode 100755 index 0000000..6c1c640 --- /dev/null +++ b/pipelines/LRSDAY.00.Nanopore_Reads_Basecalling_and_Demultiplexing.sh @@ -0,0 +1,139 @@ +#!/bin/bash +set -e -o pipefail +####################################### +# load environment variables for LRSDAY +source ./../../env.sh + +####################################### +# set project-specific variables +project_name="Project_Example" # LRSDAY Project name. Default = "Project_Example". +run_basecalling="yes" # Whether to perform basecalling: "yes" or "no". Default = "yes". +run_demultiplexing="yes" # Whether to perform demultiplexing: "yes" or "no". Default = "yes". +run_nanoplotting="yes" # Whether to perform nanoplotting: "yes" or "no". Default = "yes". + +flowcell_id="FAKXXXXX" # The flowcell ID of the nanopore run. Default = "FAKXXXXX". +flowcell_version="FLO-MIN106" # The flowcell version of the nanopore run. Default = "FLO-MIN106". +sequencing_kit_version="SQK-LSK108" # The sequencing kit version of the nanopore run. Default = "SQK-LSK108". +barcode_kit_version="EXP-NBD103" # The barcode kit version of the nanopore run. Default = "EXP-NBD103". + +raw_reads_directory="$LRSDAY_HOME/$project_name/00.Long_Reads/nanopore_raw_fast5_files" # The directory containing the raw nanopore reads before basecalling +basecalling_output_directory="$LRSDAY_HOME/$project_name/00.Long_Reads/nanopore_basecalled_fast5_files" # The directory containing the basecalled nanopore reads. This directory will be automatically generated when running basecalling. +threads=8 # The number of threads to use. Default = 8. + +############################# +# normally no need to change the following +qual=5 # read quality filter for guppy basecalling +num_callers=$threads # num_callers for guppy +threads_per_caller=1 # threads_per_caller for guppy +demultiplexing_threads=$threads # threads to use for demultiplexing +demultiplexing_output_directory="$LRSDAY_HOME/$project_name/00.Long_Reads/nanopore_demultiplexed_fastq_files" # The directory containing the demultiplexed basecalled nanopore reads. This directory will be automatically generated when running demultiplexing. + +if [[ "$run_basecalling" == "yes" ]] +then + echo "Check if $basecalling_output_directory is empty for running basecalling." + if [[ "$(ls $basecalling_output_directory)" ]] + then + echo "Warning! The basecalling directory is not empty! Please empty its content if you want to run basecalling." + echo "Exit!!!" + exit + else + echo "Running basecalling." + $guppy_dir/guppy_basecaller \ + --flowcell $flowcell_version \ + --kit $sequencing_kit_version \ + --recursive \ + --input_path $raw_reads_directory \ + --save_path $basecalling_output_directory \ + --fast5_out \ + --qscore_filtering \ + --min_qscore $qual \ + --num_callers $num_callers \ + --cpu_threads_per_caller $threads_per_caller + cd $basecalling_output_directory + cat ./pass/*.fastq |gzip -c > $project_name.basecalled_reads.Q${qual}.pass.fastq.gz + cat ./fail/*.fastq |gzip -c > $project_name.basecalled_reads.Q${qual}.fail.fastq.gz + fi +fi + +if [[ "$run_demultiplexing" == "yes" ]] +then + echo "Check if $basecalling_output_directory/pass has basecalled reads for running demultiplexing." + if [[ "$(ls $basecalling_output_directory/pass)" ]] + then + echo "Running demultiplexing." + $guppy_dir/guppy_barcoder \ + --barcode_kit $barcode_kit_version \ + --recursive \ + --input_path $basecalling_output_directory/pass \ + --save_path $demultiplexing_output_directory \ + --worker_threads $demultiplexing_threads + + cd $demultiplexing_output_directory + for b in barcode* + do + echo "for demultiplexing: barcode=$b" + cat ./$b/*.fastq |gzip -c > $project_name.basecalled_reads.Q${qual}.pass.$b.fastq.gz + done + cat ./unclassified/*.fastq |gzip -c > $project_name.basecalled_reads.Q${qual}.pass.unclassified.fastq.gz + else + echo "There is no reads in $basecalling_output_directory/pass!" + echo "Please put the basecalled reads in $basecalling_output_directory/pass for demultiplexing!" + echo "Exit!!!" + exit + fi +fi + +set +oe pipefail + +if [[ "$run_nanoplotting" == "yes" ]] +then + echo "Check if $basecalling_output_directory/pass has basecalled reads for running nanoplotting." + if [[ "$(ls $basecalling_output_directory/pass)" ]] + then + echo "Running nanoplotting." + cd $basecalling_output_directory + fastq_input="$project_name.basecalled_reads.Q${qual}.pass.fastq.gz" + source $nanoplot_dir/activate + $nanoplot_dir/NanoPlot \ + --threads $threads \ + --fastq $fastq_input \ + --N50 \ + -o "${project_name}_Q${qual}_pass_NanoPlot_out" + fi + if [[ "$run_demultiplexing" == "yes" ]] + then + cd $demultiplexing_output_directory + for b in barcode* + do + echo "for nanoplotting: barcode=$b" + fastq_input="$project_name.basecalled_reads.Q${qual}.pass.$b.fastq.gz" + source $nanoplot_dir/activate + $nanoplot_dir/NanoPlot \ + --threads $threads \ + --fastq $fastq_input \ + --N50 \ + -o "${project_name}_Q${qual}_pass_${b}_NanoPlot_out" + done + echo "for nanoplotting: unclassified" + fastq_input="$project_name.basecalled_reads.Q${qual}.pass.unclassified.fastq.gz" + $nanoplot_dir/NanoPlot \ + --threads $threads \ + --fastq $fastq_input \ + --N50 \ + -o "${project_name}_Q${qual}_pass_unclassified_NanoPlot_out" + fi +fi + + + +############################ +# checking bash exit status +if [[ $? -eq 0 ]] +then + echo "" + echo "LRSDAY message: This bash script has been successfully processed! :)" + echo "" + echo "" + exit 0 +fi +############################ diff --git a/pipelines/LRSDAY.01.Long-read-based_Genome_Assembly.sh b/pipelines/LRSDAY.01.Long-read-based_Genome_Assembly.sh index bda2793..3710e12 100755 --- a/pipelines/LRSDAY.01.Long-read-based_Genome_Assembly.sh +++ b/pipelines/LRSDAY.01.Long-read-based_Genome_Assembly.sh @@ -7,13 +7,13 @@ PATH=$gnuplot_dir:$PATH ########################################### # set project-specific variables -prefix="SK1" # The file name prefix for the output files. -long_reads="./../00.Long_Reads/SK1.filtered_subreads.fastq.gz" # The file path of the long reads file (in fastq or fastq.gz format). +prefix="SK1" # The file name prefix for the processing sample. Default = "SK1" for the testing example. +long_reads="./../00.Long_Reads/$prefix.filtlong.fastq.gz" # The file path of the long reads file (in fastq or fastq.gz format). long_reads_type="pacbio-raw" # The long reads data type. Use "pacbio-raw" or "pacbio-corrected" or "nanopore-raw" or "nanopore-corrected". Default = "pacbio-raw" for the testing example genome_size="12.5m" # The estimated genome size with the format of [g|m|k], e.g. 12.5m for 12.5 Mb. Default = "12.5m". assembler="canu" # The long-read assembler: Use "canu" or "flye" or "wtdbg2" or "smartdenovo" or "canu-flye" or "canu-wtdbg2" or "canu-smartdenovo". For "canu-flye", "canu-wtdbg2", and "canu-smartdenovo", the assembler canu is used first to generate error-corrected reads from the raw reads and then the assembler flye/wtdbg2/smartdenovo is used to assemble the genome. Based on our test, assembler="canu" generally gives the best result but will take substantially longer time than the other options. -customized_canu_parameters="correctedErrorRate=0.04" # For assembler="canu" only. Users can set customized Canu assembly parameters here or simply leave it empty like customized_canu_parameters="" to use Canu's default assembly parameter. For example you could set customized_canu_parameters="correctedErrorRate=0.04" for high coverage (>60X) PacBio data and customized_canu_parameters="overlapper=mhap;utgReAlign=true" for high coverage (>60X) Nanopore data to improve the assembly speed. When assembling genomes with high heterozygosity, you can could set customized_canu_parameters="corOutCoverage=200;batOptions=-dg 3 -db 3 -dr 1 -ca 500 -cp 50" to avoid collasping haplotypes. As shown in these examples, more than one customized parameters can be set here as long as they are separeted by a semicolon and contained in a pair of double quotes (e.g. customized_canu_parameters="option1=XXX;option2=YYY;option3=ZZZ"). Please consult Canu's manual "http://canu.readthedocs.io/en/latest/faq.html#what-parameters-can-i-tweak" for advanced customization settings. Default = "correctedErrorRate=0.04" for the testing example. -threads=2 # The number of threads to use. Default = 2. +customized_canu_parameters="" # For assembler="canu" only. Users can set customized Canu assembly parameters here or simply leave it empty like customized_canu_parameters="" to use Canu's default assembly parameter. For example you could set customized_canu_parameters="correctedErrorRate=0.04" for high coverage (>60X) PacBio data and customized_canu_parameters="overlapper=mhap;utgReAlign=true" for high coverage (>60X) Nanopore data to improve the assembly speed. When assembling genomes with high heterozygosity, you can could set customized_canu_parameters="corOutCoverage=200;batOptions=-dg 3 -db 3 -dr 1 -ca 500 -cp 50" to avoid collasping haplotypes. As shown in these examples, more than one customized parameters can be set here as long as they are separeted by a semicolon and contained in a pair of double quotes (e.g. customized_canu_parameters="option1=XXX;option2=YYY;option3=ZZZ"). Please consult Canu's manual "http://canu.readthedocs.io/en/latest/faq.html#what-parameters-can-i-tweak" for advanced customization settings. Default = "" for the testing example. +threads=4 # The number of threads to use. Default = 4. vcf="yes" # Use "yes" if prefer to have vcf file generated to show SNP and INDEL differences between the assembled genome and the reference genome for their uniquely alignable regions. Otherwise use "no". Default = "yes". dotplot="yes" # Use "yes" if prefer to plot genome-wide dotplot based on the comparison with the reference genome below. Otherwise use "no". Default = "yes". ref_genome_raw="./../00.Ref_Genome/S288C.ASM205763v1.fa" # The file path of the raw reference genome. This is only needed when the option "dotplot=" or "vcf=" has been set as "yes". diff --git a/pipelines/LRSDAY.02.Long-read-based_Assembly_Polishing.sh b/pipelines/LRSDAY.02.Long-read-based_Assembly_Polishing.sh index d03f79c..7b4c1b4 100755 --- a/pipelines/LRSDAY.02.Long-read-based_Assembly_Polishing.sh +++ b/pipelines/LRSDAY.02.Long-read-based_Assembly_Polishing.sh @@ -7,19 +7,19 @@ source ./../../env.sh ########################################### # set project-specific variables -input_assembly="./../01.Long-read-based_Genome_Assembly/SK1.assembly.raw.fa" # The file path of the input raw long-read-based assembly for polishing. -long_reads_in_fastq="./../00.Long_Reads/SK1.filtered_subreads.fastq.gz" # The file path of the long-read fastq file. +prefix="SK1" # The file name prefix for the processing sample. Default = "SK1" for the testing example. + +input_assembly="./../01.Long-read-based_Genome_Assembly/$prefix.assembly.raw.fa" # The file path of the input raw long-read-based assembly for polishing. +long_reads_in_fastq="./../00.Long_Reads/$prefix.filtlong.fastq.gz" # The file path of the long-read fastq file. long_read_technology="pacbio" # The used long-read sequencing technology. Use "pacbio" or "nanopore". Default = "pacbio" for the testing example. ### When long_read_technology="pacbio" #### -pacbio_bam_fofn_file="./../00.Long_Reads/pacbio_fofn_files/SK1.merged.bam.fofn" # The file path to the fofn file containing the absolute path to the PacBio bam files. BAM file is the native output format for PacBio Sequel platform but this is not the case for the RSII platform. For RSII data, the bax2bam file conversion is needed. This can be done by running the LRSDAY.00.Retrieve_Sample_PacBio_Reads.sh script in the 00.Long_Reads directory. +pacbio_bam_fofn_file="./../00.Long_Reads/pacbio_fofn_files/$prefix.merged.bam.fofn" # The file path to the fofn file containing the absolute path to the PacBio bam files. BAM file is the native output format for PacBio Sequel platform but this is not the case for the RSII platform. For RSII data, the bax2bam file conversion is needed. This can be done by running the LRSDAY.00.Retrieve_Sample_PacBio_Reads.sh script in the 00.Long_Reads directory. pacbio_reads_type="RSII" # The sequencing machine used to generate the input PacBio reads . Use "RSII" or "Sequel". Default = "RSII" for the testing example. ### When long_read_technology="nanopore" ### -nanopore_fast5_files="./../00.Long_Reads/nanopore_fast5_files" # The file path to the directory containing raw Oxford Nanopore FAST5 files. -nanopore_basecalling_sequencing_summary="./../00.Long_Reads/nanopore_fast5_files/sequencing_summary.txt" # The file path to the nanopore albacore/guppy basecaller sequencing summary output. This summary file is not necessary but it can help the polishing step to run much faster when available. When this file is unavailable, set nanopore_albacore_sequencing_summary="". - -prefix="SK1" # The file name prefix for the output files. Default = "SK1" for the testing example. +nanopore_basecalled_fast5_files="./../00.Long_Reads/nanopore_basecalled_fast5_files" # The file path to the directory containing the basecalled Oxford Nanopore FAST5 files. +nanopore_basecalling_sequencing_summary="./../00.Long_Reads/nanopore_basecalled_fast5_files/sequencing_summary.txt" # The file path to the nanopore albacore/guppy basecaller sequencing summary output. This summary file is not necessary but it can help the polishing step to run much faster when available. When this file is unavailable, set nanopore_albacore_sequencing_summary="". threads=1 # The number of threads to use. Default = "1". ploidy=1 # The ploidy status of the sequenced genome. use "1" for haploid genome and "2" for diploid genome. Default = "1" for the testing example. @@ -82,15 +82,15 @@ else source $nanopolish_dir/py3_virtualenv_nanopolish/bin/activate if [[ -z "$nanopore_basecalling_sequencing_summary" ]] then - $nanopolish_dir/nanopolish index -d $nanopore_fast5_files $long_reads_in_fastq + $nanopolish_dir/nanopolish index -d $nanopore_basecalled_fast5_files $long_reads_in_fastq else - $nanopolish_dir/nanopolish index -d $nanopore_fast5_files -s $nanopore_basecalling_sequencing_summary $long_reads_in_fastq + $nanopolish_dir/nanopolish index -d $nanopore_basecalled_fast5_files -s $nanopore_basecalling_sequencing_summary $long_reads_in_fastq fi for i in $(seq 1 1 $rounds_of_successive_polishing) do java -Djava.io.tmpdir=./tmp -Dpicard.useLegacyParser=false -XX:ParallelGCThreads=$threads -jar $picard_dir/picard.jar CreateSequenceDictionary -REFERENCE $prefix.assembly.tmp.fa -OUTPUT $prefix.assembly.tmp.dict $minimap2_dir/minimap2 -ax map-ont $prefix.assembly.tmp.fa $long_reads_in_fastq > $prefix.minimap2.round_${i}.sam - java -Djava.io.tmpdir=./tmp -Dpicard.useLegacyParser=false -XX:ParallelGCThreads=$threads -jar $picard_dir/picard.jar SortSam -INPUT $prefix.minimap2.round_${i}.sam -OUTPUT $prefix.minimap2.round_${i}.bam -SORT_ORDER coordinate + java -Djava.io.tmpdir=./tmp -Dpicard.useLegacyParser=false -XX:ParallelGCThreads=$threads -jar $picard_dir/picard.jar SortSam -INPUT $prefix.minimap2.round_${i}.sam -OUTPUT $prefix.minimap2.round_${i}.bam -SORT_ORDER coordinate -VALIDATION_STRINGENCY LENIENT -MAX_RECORDS_IN_RAM 50000 $samtools_dir/samtools index $prefix.minimap2.round_${i}.bam rm $prefix.minimap2.round_${i}.sam python3 $nanopolish_dir/scripts/nanopolish_makerange.py $prefix.assembly.tmp.fa | $parallel_dir/parallel --results ${prefix}_nanopolish_round_${i}_results -P 1 \ diff --git a/pipelines/LRSDAY.03.Illumina-read-based_Assembly_Polishing.sh b/pipelines/LRSDAY.03.Illumina-read-based_Assembly_Polishing.sh index a14b0b8..bfe2117 100755 --- a/pipelines/LRSDAY.03.Illumina-read-based_Assembly_Polishing.sh +++ b/pipelines/LRSDAY.03.Illumina-read-based_Assembly_Polishing.sh @@ -7,8 +7,8 @@ source ./../../env.sh ########################################### # set project-specific variables -input_assembly="./../02.Long-read-based_Assembly_Polishing/SK1.assembly.long_read_polished.fa" # The file path of the input assembly before Illumina-based correction -prefix="SK1" # The file name prefix for the output files. +prefix="SK1" # The file name prefix for the processing sample. Default = "SK1" for the testing example. +input_assembly="./../02.Long-read-based_Assembly_Polishing/$prefix.assembly.long_read_polished.fa" # The file path of the input assembly before Illumina-based correction trim_illumina_reads="yes" # Whether to trim the input Illumina reads. Use "yes" if prefer to perform trimming, otherwise use "no". Default = "yes". rounds_of_successive_polishing=1 # The number of total rounds of Illumina-read-based assembly polishing. Default = "1" for the testing example. threads=1 # The number of threads to use. Default = "1". @@ -117,13 +117,13 @@ do # GATK local realign # find realigner targets - java -Djava.io.tmpdir=./tmp -XX:ParallelGCThreads=$threads -jar $gatk_dir/GenomeAnalysisTK.jar \ + java -Djava.io.tmpdir=./tmp -XX:ParallelGCThreads=$threads -jar $gatk3_dir/GenomeAnalysisTK.jar \ -R refseq.tmp.fa \ -T RealignerTargetCreator \ -I $prefix.round_${i}.dedup.bam \ -o $prefix.round_${i}.realn.intervals # run realigner - java -Djava.io.tmpdir=./tmp -XX:ParallelGCThreads=$threads -jar $gatk_dir/GenomeAnalysisTK.jar \ + java -Djava.io.tmpdir=./tmp -XX:ParallelGCThreads=$threads -jar $gatk3_dir/GenomeAnalysisTK.jar \ -R refseq.tmp.fa \ -T IndelRealigner \ -I $prefix.round_${i}.dedup.bam \ diff --git a/pipelines/LRSDAY.04.Reference-guided_Assembly_Scaffolding.sh b/pipelines/LRSDAY.04.Reference-guided_Assembly_Scaffolding.sh index 1a011ef..bb32eff 100755 --- a/pipelines/LRSDAY.04.Reference-guided_Assembly_Scaffolding.sh +++ b/pipelines/LRSDAY.04.Reference-guided_Assembly_Scaffolding.sh @@ -7,8 +7,8 @@ PATH=$gnuplot_dir:$hal_dir:$PATH ####################################### # set project-specific variables -input_assembly="./../03.Illumina-read-based_Assembly_Polishing/SK1.assembly.illumina_read_polished.fa" # The file path of the input genome assembly. -prefix="SK1" # The file name prefix for the output files. +prefix="SK1" # The file name prefix for processing sample. Default = "SK1" for the testing example. +input_assembly="./../03.Illumina-read-based_Assembly_Polishing/$prefix.assembly.illumina_read_polished.fa" # The file path of the input genome assembly. ref_genome_raw="./../00.Ref_Genome/S288C.ASM205763v1.fa" # The file path of the raw reference genome. ref_genome_noncore_masked="./../00.Ref_Genome/S288C.ASM205763v1.noncore_masked.fa" # The file path of the specially masked reference genome where subtelomeres and chromosome-ends were hard masked. When the subtelomere/chromosome-end information is unavailable for the organism that you are interested in, you can just put the path of the raw reference genome assembly here. chrMT_tag="chrMT" # The sequence name for the mitochondrial genome in the raw reference genome file, if there are multiple reference mitochondrial genomes that you want to check, use a single ';' to separate them. e.g. "Sc_chrMT;Sp_chrMT". Default = "chrMT". diff --git a/pipelines/LRSDAY.05.Centromere_Identity_Profiling.sh b/pipelines/LRSDAY.05.Centromere_Identity_Profiling.sh index 440d741..663c004 100755 --- a/pipelines/LRSDAY.05.Centromere_Identity_Profiling.sh +++ b/pipelines/LRSDAY.05.Centromere_Identity_Profiling.sh @@ -6,8 +6,8 @@ source ./../../env.sh ####################################### # set project-specific variables -genome="./../04.Reference-guided_Assembly_Scaffolding/SK1.assembly.ref_based_scaffolded.fa" # The file path of the input genome assembly -prefix="SK1" # The file name prefix for the output files +prefix="SK1" # The file name prefix for the processing sample. Default = "SK1" for the testing example. +genome="./../04.Reference-guided_Assembly_Scaffolding/$prefix.assembly.ref_based_scaffolded.fa" # The file path of the input genome assembly query="$LRSDAY_HOME/data/S288C.centromere.fa" # S. cerevisiae S288C reference centromere sequences based on Yue et al. (2017) Nature Genetics. debug="no" # Whether to keep intermediate files for debugging. Use "yes" if prefer to keep intermediate files, otherwise use "no". Default = "no". diff --git a/pipelines/LRSDAY.06.Mitochondrial_Genome_Assembly_Improvement.sh b/pipelines/LRSDAY.06.Mitochondrial_Genome_Assembly_Improvement.sh index f936551..e0d5574 100755 --- a/pipelines/LRSDAY.06.Mitochondrial_Genome_Assembly_Improvement.sh +++ b/pipelines/LRSDAY.06.Mitochondrial_Genome_Assembly_Improvement.sh @@ -7,9 +7,9 @@ PATH=$bwa_dir:$samtools_dir:$gnuplot_dir:$canu_dir:$mummer_dir:$spades_dir:$prod ########################################### # set project-specific variables -genome="./../04.Reference-guided_Assembly_Scaffolding/SK1.assembly.ref_based_scaffolded.fa" # The file path of the input genome assembly -prefix="SK1" # The file name prefix for the output files -mt_contig_list="./../04.Reference-guided_Assembly_Scaffolding/SK1.assembly.ref_based_scaffolded.mt_contig.list" # The mitochodnrial contig list generated by the module 04.Reference-guided_Assembly_Scaffolding. +prefix="SK1" # The file name prefix for the processing sample. Default = "SK1" for the testing example. +genome="./../04.Reference-guided_Assembly_Scaffolding/$prefix.assembly.ref_based_scaffolded.fa" # The file path of the input genome assembly +mt_contig_list="./../04.Reference-guided_Assembly_Scaffolding/$prefix.assembly.ref_based_scaffolded.mt_contig.list" # The mitochodnrial contig list generated by the module 04.Reference-guided_Assembly_Scaffolding. gene_start="$LRSDAY_HOME/data/ATP6.cds.fa" # A FASTA file containing the CDS sequence of the mitochodnrial gene to be used as the starting point of the mitochondrial assembly. This can be set to any gene as long as a fasta file containing the DNA sequence of the gene is provided. ref_genome_raw="./../00.Ref_Genome/S288C.ASM205763v1.fa" # The file path of the raw reference genome chrMT_tag="chrMT" # The sequence name for the mitochondrial genome in the raw reference genome file, if there are multiple reference mitochondrial genomes that you want to check, use a single ';' to separate them. e.g. "Sc_chrMT;Sp_chrMT". Default = "chrMT". diff --git a/pipelines/LRSDAY.07.Supervised_Final_Assembly.1.sh b/pipelines/LRSDAY.07.Supervised_Final_Assembly.1.sh index 6601cce..f1a95ab 100755 --- a/pipelines/LRSDAY.07.Supervised_Final_Assembly.1.sh +++ b/pipelines/LRSDAY.07.Supervised_Final_Assembly.1.sh @@ -6,8 +6,9 @@ source ./../../env.sh ####################################### # set project-specific variables -genome="./../06.Mitochondrial_Genome_Assembly_Improvement/SK1.assembly.mt_improved.fa" # The file name of the input genome assembly. -prefix="SK1" # The file name prefix for the output files. +prefix="SK1" # The file name prefix for the processing sample. Default = "SK1" for the testing example. +genome="./../06.Mitochondrial_Genome_Assembly_Improvement/$prefix.assembly.mt_improved.fa" # The file name of the input genome assembly. + ####################################### # process the pipeline diff --git a/pipelines/LRSDAY.07.Supervised_Final_Assembly.2.sh b/pipelines/LRSDAY.07.Supervised_Final_Assembly.2.sh index a7e3aec..4332ef0 100755 --- a/pipelines/LRSDAY.07.Supervised_Final_Assembly.2.sh +++ b/pipelines/LRSDAY.07.Supervised_Final_Assembly.2.sh @@ -7,8 +7,8 @@ PATH=$gnuplot_dir:$PATH ####################################### # set project-specific variables -genome="./../06.Mitochondrial_Genome_Assembly_Improvement/SK1.assembly.mt_improved.fa" # The file path of the input genome assembly. -prefix="SK1" # The file name prefix for the output files. +prefix="SK1" # The file name prefix for the processing sample. Default = "SK1" for the testing example. +genome="./../06.Mitochondrial_Genome_Assembly_Improvement/$prefix.assembly.mt_improved.fa" # The file path of the input genome assembly. vcf="yes" # Whether to generate a vcf file generated to show SNP and INDEL differences between the assembled genome and the reference genome for their uniquely alignable regions. Use "yes" if prefer to have vcf file generated to show SNP and INDEL differences between the assembled genome and the reference genome. Default = "yes". dotplot="yes" # Whether to plot genome-wide dotplot based on the comparison with the reference genome below. Use "yes" if prefer to plot, otherwise use "no". Default = "yes". ref_genome_raw="./../00.Ref_Genome/S288C.ASM205763v1.fa" # The path of the raw reference genome, only needed when dotplot="yes" or vcf="yes". diff --git a/pipelines/LRSDAY.08.Centromere_Annotation.sh b/pipelines/LRSDAY.08.Centromere_Annotation.sh index 658b9da..8a731c7 100755 --- a/pipelines/LRSDAY.08.Centromere_Annotation.sh +++ b/pipelines/LRSDAY.08.Centromere_Annotation.sh @@ -6,8 +6,8 @@ source ./../../env.sh ####################################### # set project-specific variables -genome="./../07.Supervised_Final_Assembly/SK1.assembly.final.fa" # The path of the input genome assembly. -prefix="SK1" # The file name prefix for the output files. +prefix="SK1" # The file name prefix for the processing sample. Default = "SK1" for the testing example. +genome="./../07.Supervised_Final_Assembly/$prefix.assembly.final.fa" # The path of the input genome assembly. query="$LRSDAY_HOME/data/S288C.centromere.fa" # The S. cerevisiae S288C reference centromere sequences based on Yue et al. (2017) Nature Genetics. debug="no" # Whether to keep intermediate files for debugging. Use "yes" if prefer to keep intermediate files, otherwise use "no". Default = "no". diff --git a/pipelines/LRSDAY.09.Nuclear_Gene_Annotation.sh b/pipelines/LRSDAY.09.Nuclear_Gene_Annotation.sh index 8558a81..444ef7a 100755 --- a/pipelines/LRSDAY.09.Nuclear_Gene_Annotation.sh +++ b/pipelines/LRSDAY.09.Nuclear_Gene_Annotation.sh @@ -6,8 +6,8 @@ source ./../../env.sh ####################################### # set project-specific variables -genome_assembly="./../07.Supervised_Final_Assembly/SK1.assembly.final.fa" # The file path of the input genome assembly. -genome_tag="SK1" # The file name prefix for the output files. +prefix="SK1" # The file name prefix for the processing sample. Default = "SK1" for the testing example. +genome_assembly="./../07.Supervised_Final_Assembly/$prefix.assembly.final.fa" # The file path of the input genome assembly. chrMT_tag="chrMT" # The sequence name for the mitochondrial genome in the final assembly. If there are multiple sequences, use a single ';' to separate them. e.g. "chrMT_part1;chrMT_part2". Default = "chrMT". threads=1 # The number of threads to use. Default = "1". maker_opts="$LRSDAY_HOME/misc/maker_opts.customized.ctl" # The configuration file for MAKER. You can edit this file if you have native transciptome/EST data for the strain/species that you sequenced or if you want to adapt it to annotate other eukaryotic organisms. Otherwise, please keep it unchanged. Please note that if this file is in the same directory where this bash script is executed, the file name cannot be "maker_opts.ctl". @@ -16,8 +16,10 @@ debug="no" # use "yes" if prefer to keep intermediate files, otherwise use "no". ####################################### # process the pipeline -echo "genome_assembly=$genome_assembly" +genome_tag="$prefix" + echo "genome_tag=$genome_tag" +echo "genome_assembly=$genome_assembly" # convert the genome assembly file to all uppercases diff --git a/pipelines/LRSDAY.10.Mitochondrial_Gene_Annotation.sh b/pipelines/LRSDAY.10.Mitochondrial_Gene_Annotation.sh index 2839f07..b37fec6 100755 --- a/pipelines/LRSDAY.10.Mitochondrial_Gene_Annotation.sh +++ b/pipelines/LRSDAY.10.Mitochondrial_Gene_Annotation.sh @@ -18,11 +18,10 @@ export PATH="$flip_dir:$blast_dir:$muscle_dir:$umac_dir:$hmmer_dir:$erpin_dir:$t ####################################### # set project-specific variables -genome="./../07.Supervised_Final_Assembly/SK1.assembly.final.fa" # The file path of the input genome assembly. +prefix="SK1" # The file name prefix for the processing sample. Default = "SK1" for the testing example. +genome="./../07.Supervised_Final_Assembly/$prefix.assembly.final.fa" # The file path of the input genome assembly. chrMT_tag="chrMT" # The sequence name for the mitochondrial genome in the input genome assembly, if there are multiple corresponding contigs/scaffolds, use a single ';' to separate them. e.g. "chrMT_1;chrMT_2". Default = "chrMT". genetic_code_table=3 # The NCBI genetic code table (https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi) for the annotated mitochondrial genome. Default = 3 (i.e. Yeast Mitochondria) - -prefix="SK1" # The file name prefix for the output files. debug="no" # Whehter to keep intermediate files for debugging. Use "yes" if prefer to keep intermediate files, otherwise use "no". Default = "no". ###################################### diff --git a/pipelines/LRSDAY.11.TE_Annotation.sh b/pipelines/LRSDAY.11.TE_Annotation.sh index d4db5cc..520fd87 100755 --- a/pipelines/LRSDAY.11.TE_Annotation.sh +++ b/pipelines/LRSDAY.11.TE_Annotation.sh @@ -6,8 +6,8 @@ source ./../../env.sh ####################################### # set project-specific variables -genome="./../07.Supervised_Final_Assembly/SK1.assembly.final.fa" # The file path of the input genome assembly. -prefix="SK1" # The file name prefix for the output files. +prefix="SK1" # The file name prefix for the processing sample. Default = "SK1" for the testing example. +genome="./../07.Supervised_Final_Assembly/$prefix.assembly.final.fa" # The file path of the input genome assembly. threads=1 # The number of threads to use. Default = "1". debug="no" # Whether to keep intermediate files for debugging. Use "yes" if prefer to keep intermediate files, otherwise use "no". Default = "no". diff --git a/pipelines/LRSDAY.12.Core_X_Element_Annotation.sh b/pipelines/LRSDAY.12.Core_X_Element_Annotation.sh index 7b01a17..5c4f6f6 100755 --- a/pipelines/LRSDAY.12.Core_X_Element_Annotation.sh +++ b/pipelines/LRSDAY.12.Core_X_Element_Annotation.sh @@ -6,8 +6,8 @@ source ./../../env.sh ####################################### # set project-specific variables -genome="./../07.Supervised_Final_Assembly/SK1.assembly.final.fa" # The file path of the input genome assembly. -prefix="SK1" # The file name prefix for the output files. +prefix="SK1" # The file name prefix for the processing sample. Default = "SK1" for the testing example. +genome="./../07.Supervised_Final_Assembly/$prefix.assembly.final.fa" # The file path of the input genome assembly. debug="no" # Whether to keep intermediate files for debugging. Use "yes" if prefer to keep intermediate files, otherwise use "no". Default = "no". ####################################### diff --git a/pipelines/LRSDAY.13.Y_Prime_Element_Annotation.sh b/pipelines/LRSDAY.13.Y_Prime_Element_Annotation.sh index eec3831..52fcfd5 100755 --- a/pipelines/LRSDAY.13.Y_Prime_Element_Annotation.sh +++ b/pipelines/LRSDAY.13.Y_Prime_Element_Annotation.sh @@ -6,8 +6,8 @@ source ./../../env.sh ####################################### # set project-specific variables -genome="./../07.Supervised_Final_Assembly/SK1.assembly.final.fa" # The file path of the input genome assembly. -prefix="SK1" # The file name prefix for the output files. +prefix="SK1" # The file name prefix for the processing sample. Default = "SK1" for the testing example. +genome="./../07.Supervised_Final_Assembly/$prefix.assembly.final.fa" # The file path of the input genome assembly. debug="no" # Whether to keep intermediate files for debugging. Use "yes" if prefer to keep intermediate files, otherwise use "no". Default = "no". ####################################### diff --git a/pipelines/LRSDAY.14.Gene_Orthology_Identification.sh b/pipelines/LRSDAY.14.Gene_Orthology_Identification.sh index 67e886c..2fc5a3d 100755 --- a/pipelines/LRSDAY.14.Gene_Orthology_Identification.sh +++ b/pipelines/LRSDAY.14.Gene_Orthology_Identification.sh @@ -6,16 +6,16 @@ source ./../../env.sh ####################################### # set project-specific variables -prefix="SK1" # The file name prefix for the output files. +prefix="SK1" # The file name prefix for the processing sample. Default = "SK1" for the testing example. threads=1 # The number of threads to use. Default = "1". -input_nuclear_gene_gff="./../09.Nuclear_Gene_Annotation/SK1.nuclear_genome.EVM.gff3" # The file path of the input nuclear gene gff3 file generated in the task 09.Nuclear_Gene_Annotation. Set this variable as well as the following two variables to "" if you want to skip this step for the nuclear gene annotation. -query_nuclear_gene_PoFF_faa="./../09.Nuclear_Gene_Annotation/SK1.nuclear_genome.EVM.PoFF.faa" # The file path of the PoFF.faa file generated in the task 09.Nuclear_Gene_Annotation. -query_nuclear_gene_PoFF_gff="./../09.Nuclear_Gene_Annotation/SK1.nuclear_genome.EVM.PoFF.gff" # The file path of the PoFF.gff file generated in the task 09.Nuclear_Gene_Annotation. +input_nuclear_gene_gff="./../09.Nuclear_Gene_Annotation/$prefix.nuclear_genome.EVM.gff3" # The file path of the input nuclear gene gff3 file generated in the task 09.Nuclear_Gene_Annotation. Set this variable as well as the following two variables to "" if you want to skip this step for the nuclear gene annotation. +query_nuclear_gene_PoFF_faa="./../09.Nuclear_Gene_Annotation/$prefix.nuclear_genome.EVM.PoFF.faa" # The file path of the PoFF.faa file generated in the task 09.Nuclear_Gene_Annotation. +query_nuclear_gene_PoFF_gff="./../09.Nuclear_Gene_Annotation/$prefix.nuclear_genome.EVM.PoFF.gff" # The file path of the PoFF.gff file generated in the task 09.Nuclear_Gene_Annotation. -input_mitochondrial_gene_gff="./../10.Mitochondrial_Gene_Annotation/SK1.mitochondrial_genome.mfannot.gff3" # The file path of the input mitochondrial gene gff3 file generated in the task 10.Mitochondrial_Gene_Annotation. Set this variable as well as the following two variables to "" if you want to skip this step for the mitochondrial gene annotation. -query_mitochondrial_gene_PoFF_faa="./../10.Mitochondrial_Gene_Annotation/SK1.mitochondrial_genome.mfannot.PoFF.faa" # The file path of the PoFF.faa file generated in the task 10.Mitochondrial_Gene_Annotation. -query_mitochondrial_gene_PoFF_gff="./../10.Mitochondrial_Gene_Annotation/SK1.mitochondrial_genome.mfannot.PoFF.gff" # The file path of the PoFF.gff file generated in the task 10.Mitochondrial_Gene_Annotation. +input_mitochondrial_gene_gff="./../10.Mitochondrial_Gene_Annotation/$prefix.mitochondrial_genome.mfannot.gff3" # The file path of the input mitochondrial gene gff3 file generated in the task 10.Mitochondrial_Gene_Annotation. Set this variable as well as the following two variables to "" if you want to skip this step for the mitochondrial gene annotation. +query_mitochondrial_gene_PoFF_faa="./../10.Mitochondrial_Gene_Annotation/$prefix.mitochondrial_genome.mfannot.PoFF.faa" # The file path of the PoFF.faa file generated in the task 10.Mitochondrial_Gene_Annotation. +query_mitochondrial_gene_PoFF_gff="./../10.Mitochondrial_Gene_Annotation/$prefix.mitochondrial_genome.mfannot.PoFF.gff" # The file path of the PoFF.gff file generated in the task 10.Mitochondrial_Gene_Annotation. ref_PoFF_faa="$LRSDAY_HOME/data/SGDref.PoFF.faa" # The file path of the reference proteome file in FASTA format: for S. cerevisiae and its close relatives, you can directly use the pre-shipped file: SGDref.PoFF.faa; if you work with other organisms, you can check ProteinOrtho's manual for details on how to prepare such file. ref_PoFF_gff="$LRSDAY_HOME/data/SGDref.PoFF.gff" # The path of the reference gene GFF file in GFF format: for S. cerevisiae and its close relatives, you can directly use the pre-shipped file: SGDref.PoFF.gff; if you work with other organisms, you can check ProteinOrtho's manual for details on how to prepare such file. diff --git a/pipelines/LRSDAY.15.Annotation_Integration.sh b/pipelines/LRSDAY.15.Annotation_Integration.sh index e2930f3..306f7e8 100755 --- a/pipelines/LRSDAY.15.Annotation_Integration.sh +++ b/pipelines/LRSDAY.15.Annotation_Integration.sh @@ -6,14 +6,14 @@ source ./../../env.sh ####################################### # set project-specific variables -genome="./../07.Supervised_Final_Assembly/SK1.assembly.final.fa" # The file path of the input genome assembly. -prefix="SK1" # The file name prefix for the output file -centromere_gff3="./../08.Centromere_Annotation/SK1.centromere.gff3" # The path of the final centromere annotation gff3 generated by task 08.Centromere_Annotation_for_FinalAssembly, use "" if not existing. -TE_gff3="./../11.TE_Annotation/SK1.TE.gff3" # The file path of the TE annotation gff3 generated by task 11.TE_Annotation, use "" if not existing. -X_element_gff3="./../12.Core_X_Element_Annotation/SK1.X_element.gff3" # The file path of the X_element annotation gff3 generated by task 12.CoreX_Annotation, use "" if not existing. -Y_prime_element_gff3="./../13.Y_Prime_Element_Annotation/SK1.Y_prime_element.gff3" # The file path of the Y_prime_element annotation gff3 generated by task 13.Yprime_Annotation, use "" if not existing. -nuclear_gene_gff3="./../14.Gene_Orthology_Identification/SK1.nuclear_gene.updated.gff3" # The file path of the nuclear gene annotation gff3 generated by task 14.GeneOrthology_Identification (which is better) or 09.Nuclear_Gene_Annotation, use "" if not existing. -mitochondrial_gene_gff3="./../14.Gene_Orthology_Identification/SK1.mitochondrial_gene.updated.gff3" # The file path of the mitochondrial gene annotation gff3 generated by task 14.GeneOrthology_Identification (which is better) or 10.Mitochondrial_Gene_Annotation, use "" if not existing. +prefix="SK1" # The file name prefix for the processing sample. Default = "SK1" for the testing example. +genome="./../07.Supervised_Final_Assembly/$prefix.assembly.final.fa" # The file path of the input genome assembly. +centromere_gff3="./../08.Centromere_Annotation/$prefix.centromere.gff3" # The path of the final centromere annotation gff3 generated by task 08.Centromere_Annotation_for_FinalAssembly, use "" if not existing. +TE_gff3="./../11.TE_Annotation/$prefix.TE.gff3" # The file path of the TE annotation gff3 generated by task 11.TE_Annotation, use "" if not existing. +X_element_gff3="./../12.Core_X_Element_Annotation/$prefix.X_element.gff3" # The file path of the X_element annotation gff3 generated by task 12.CoreX_Annotation, use "" if not existing. +Y_prime_element_gff3="./../13.Y_Prime_Element_Annotation/$prefix.Y_prime_element.gff3" # The file path of the Y_prime_element annotation gff3 generated by task 13.Yprime_Annotation, use "" if not existing. +nuclear_gene_gff3="./../14.Gene_Orthology_Identification/$prefix.nuclear_gene.updated.gff3" # The file path of the nuclear gene annotation gff3 generated by task 14.GeneOrthology_Identification (which is better) or 09.Nuclear_Gene_Annotation, use "" if not existing. +mitochondrial_gene_gff3="./../14.Gene_Orthology_Identification/$prefix.mitochondrial_gene.updated.gff3" # The file path of the mitochondrial gene annotation gff3 generated by task 14.GeneOrthology_Identification (which is better) or 10.Mitochondrial_Gene_Annotation, use "" if not existing. chrMT_tag="chrMT" # The sequence name for the mitochondrial genome in the input genome assembly, if there are multiple corresponding contigs/scaffolds, use a single ';' to separate them. e.g. "chrMT_1;chrMT_2". Default = "chrMT". chrMT_genetic_code_table=3 # The NCBI genetic code table (https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi) for the annotated mitochondrial genome. Default = 3 (i.e. Yeast Mitochondria) diff --git a/prerequisite.txt b/prerequisite.txt index d7deaf4..85a0240 100644 --- a/prerequisite.txt +++ b/prerequisite.txt @@ -8,21 +8,23 @@ This protocol is designed for a desktop or computing server running an x86-64-bi ######################## # Software requirements ######################## -● Bash (https://www.gnu.org/software/bash/) -● Bzip2 (http://www.bzip.org/) -● Cmake (https://cmake.org/) -● GCC and G++ v4.9.1 or newer (https://gcc.gnu.org/) -● Ghostscript (https://www.ghostscript.com) -● Git (https://git-scm.com/) -● GNU make (https://www.gnu.org/software/make/) -● Gzip (https://www.gnu.org/software/gzip/) -● Java runtime environment (JRE) v1.8.0 (https://www.java.com) -● Perl v5.12 or newer (https://www.perl.org/) -● Python v2.7.9 or newer (https://www.python.org/) -● Python v3.4 or newer (https://www.python.org/) -● Tar (https://www.gnu.org/software/tar/) -● Unzip (http://infozip.sourceforge.net/UnZip.html) -● Virtualenv v15.1.0 or newer (https://virtualenv.pypa.io) -● Wget (https://www.gnu.org/software/wget/) -● Zlib (https://zlib.net/) +● bash (https://www.gnu.org/software/bash/) +● bzip2 and libbz2-dev (http://www.bzip.org/) +● cmake (https://cmake.org/) +● gcc and g++ v4.9.1 or newer (https://gcc.gnu.org/) +● ghostscript (https://www.ghostscript.com) +● git (https://git-scm.com/) +● gnu make (https://www.gnu.org/software/make/) +● gzip (https://www.gnu.org/software/gzip/) +● java runtime environment (JRE) v1.8.0 (https://www.java.com) +● perl v5.12 or newer (https://www.perl.org/) +● python v2.7.9 or newer (https://www.python.org/) +● python-devel +● python v3.4 or newer (https://www.python.org/) +● python3-devel +● tar (https://www.gnu.org/software/tar/) +● unzip (http://infozip.sourceforge.net/UnZip.html) +● virtualenv v15.1.0 or newer (https://virtualenv.pypa.io) +● wget (https://www.gnu.org/software/wget/) +● zlib and zlib-devel (https://zlib.net/) diff --git a/scripts/run_mummerplot.sh b/scripts/run_mummerplot.sh new file mode 100755 index 0000000..8d460f4 --- /dev/null +++ b/scripts/run_mummerplot.sh @@ -0,0 +1,43 @@ +#!/bin/bash +set -e -o pipefail +########################################## +# load environment variables for LRSDAY +source ./../../env.sh +PATH=$gnuplot_dir:$PATH + +########################################### +query_genome="SK1.assembly.raw.fa" +ref_genome="S288C.ASM205763v1.fa" +prefix="SK1.assembly.raw" +threads=4 + +########################################### +# process the pipeline + +# make the comparison between the assembled genome and the reference genome +$mummer_dir/nucmer -t $threads --maxmatch --nosimplify -p $prefix.mummerplot $ref_genome $query_genome +$mummer_dir/delta-filter -m $prefix.mummerplot.delta > $prefix.mummerplot.delta_filter +$mummer_dir/mummerplot --large --postscript $prefix.mummerplot.delta_filter -p $prefix.mummerplot.filter +perl $LRSDAY_HOME/scripts/fine_tune_gnuplot.pl -i $prefix.mummerplot.filter.gp -o $prefix.mummerplot.filter_adjust.gp -r $ref_genome -q $query_genome +$gnuplot_dir/gnuplot < $prefix.mummerplot.filter_adjust.gp +rm *.delta +rm *.delta_filter +rm *.filter.fplot +rm *.filter.rplot +rm *.filter.gp +rm *.filter_adjust.gp +rm *.filter.ps + +############################ +# checking bash exit status +if [[ $? -eq 0 ]] +then + echo "" + echo "LRSDAY message: This bash script has been successfully processed! :)" + echo "" + echo "" + exit 0 +fi +############################ + + diff --git a/scripts/subsampling_seqeunces.pl b/scripts/subsampling_sequences.pl similarity index 100% rename from scripts/subsampling_seqeunces.pl rename to scripts/subsampling_sequences.pl