Marcus Davy 12/13/2019
The pycoQC was used for quality control and filtering.
#` albacore pycoQC call
bsub -o $WKDIR/01.pycoQC/log/QC1.out -e $WKDIR/01.pycoQC/log/QC1.err -J ONT_QC1 \"pycoQC -f $WKDIR/albacore2/Red_flesh_ON_run1_Cas9/all_summary_run1.txt -o $WKDIR/01.pycoQC/pycoQC_summary_run1
#` Guppy pycoQC call
bsub -o $WKDIR/01.pycoQC/log/QC1.out -e $WKDIR/01.pycoQC/log/QC1.err -J ONT_QC1 \"pycoQC -f $WKDIR/sequencing_summary.txt $WKDIR/01.pycoQC/pycoQC_summary_run1
./check_dates.sh
## -rw-rw-r--. 1 hrpelg powerplant 1595833 May 7 2019 /workspace/hrpelg/Red_Flesh_ON/albacore2/Red_flesh_ON_run1_Cas9/all_summary_run1.txt
## -rw-rw-r--. 1 hrpelg powerplant 103551626 May 7 2019 /workspace/hrpelg/Red_Flesh_ON/02.poreChop/All_DS_RedFlesh_ON_run1_cas_after_porechop_dis.fastq.gz
## -rwxr-xr-x. 1 hrpelg powerplant 1763876 Oct 4 2019 /workspace/hrpelg/Red_Flesh_ON/Guppy_basecalling/sequencing_summary.txt
## -rwxr-xr-x. 1 hrpelg powerplant 116048737 Oct 7 2019 /workspace/hrpelg/Red_Flesh_ON/Guppy_basecalling/02.poreChop/After_PoreCHOP_RedFlesh_ON_GUPPY_cas.fastq.gz
MYB10 coordinates;
myb10_coords <- import(Sys.getenv("BEDFILE"), format="bed")
print(myb10_coords)
## GRanges object with 1 range and 2 metadata columns:
## seqnames ranges strand | name score
## <Rle> <IRanges> <Rle> | <character> <numeric>
## [1] Chr09 35542701-35551878 + | IRHS-2017 0
## -------
## seqinfo: 1 sequence from an unspecified genome; no seqlengths
Summary statistics from albacore2
, and guppy
base callers. The
summary files have changed slightly between the two base callers, they
do not have identical column
fields;
albacore_summary <- read.table("/workspace/hrpelg/Red_Flesh_ON/albacore2/Red_flesh_ON_run1_Cas9/all_summary_run1.txt", header=TRUE, as.is = TRUE)
# albacore_summary <- read.table("/workspace/hramwd/github/analysis-workflows/Malus/Red_Flesh_ON/albacore2/Red_flesh_ON_run1_Cas9/all_summary_run1.txt", header=TRUE, as.is = TRUE)
guppy_summary <- read.table("/workspace/hrpelg/Red_Flesh_ON/Guppy_basecalling/sequencing_summary.txt", header=TRUE, as.is = FALSE)
dim(albacore_summary)
## [1] 7056 19
dim(guppy_summary)
## [1] 7056 18
table(albacore_summary$passes_filtering)
##
## False True
## 974 6082
table(guppy_summary$passes_filtering)
##
## FALSE TRUE
## 678 6378
#`
#` Create filtered summaries
#`
albacore_summary_pass <- albacore_summary[albacore_summary$passes_filtering%in%"True", ]
albacore_summary_fail <- albacore_summary[albacore_summary$passes_filtering%in%"True", ]
guppy_summary_pass <- guppy_summary[guppy_summary$passes_filtering%in%TRUE, ]
guppy_summary_fail <- guppy_summary[guppy_summary$passes_filtering%in%FALSE, ]
There are a few subtle changes in the reported summary statistics fields;
## [ Field names: intersection ]
## [1] "filename" "read_id"
## [3] "run_id" "channel"
## [5] "start_time" "duration"
## [7] "num_events" "passes_filtering"
## [9] "template_start" "num_events_template"
## [11] "template_duration" "sequence_length_template"
## [13] "mean_qscore_template" "strand_score_template"
## [ Field names: Unique to albacore ]
## [1] "num_called_template"
## [2] "calibration_strand_genome_template"
## [3] "calibration_strand_identity_template"
## [4] "calibration_strand_accuracy_template"
## [5] "calibration_strand_speed_bps_template"
## [ Field names: Unique to guppy ]
## [1] "batch_id" "mux" "median_template" "mad_template"
Base calling using albacore were independently run against the same
fast5
files, checking the statistics summary files generated
illustrates that the records returned may be in a different row position
probably due to differences in results return order from
parallelisation;
basecall_stats_versions()
## [ hramwd run albacore dimensions ]
## [1] 7056 19
## [ hrpelg run albacore dimensions ]
## [1] 7056 19
The Fastq files generated by each base caller should be the same length;
#` Albacore fastq reads
if(PORECHOP) {
#` Porechop adapter trimmed fastq files
albacore.fq <- readFastq("/workspace/hrpelg/Red_Flesh_ON/02.poreChop/All_DS_RedFlesh_ON_run1_cas_after_porechop_dis.fastq.gz")
guppy.fq <- readFastq("/workspace/hrpelg/Red_Flesh_ON/Guppy_basecalling/02.poreChop/After_PoreCHOP_RedFlesh_ON_GUPPY_cas.fastq.gz")
} else {
#` Raw fastq files
albacore.fq <- readFastq("/workspace/hrpelg/Red_Flesh_ON/albacore2/All_DS_RedFlesh_ON_run1_cas.fastq")
guppy.fq <- readFastq("/workspace/hrpelg/Red_Flesh_ON/Guppy_basecalling/02.poreChop/Merged_RedFlesh_ON_GUPPY_cas.fastq.gz")
}
length(albacore.fq)
## [1] 6106
length(guppy.fq)
## [1] 7052
however the albacore
base caller returns the filtered reads, while the
guppy
base caller returns 7052 unfiltered reads.
We appear to have 24 duplicate sequences in the albacore.fq
file;
## [ albacore duplicate table ]
##
## 1 2
## 6058 24
## [ albacore duplicate ids ]
## [1] "00986955-624b-462e-a76b-2811435e51ba"
## [2] "00cfb4bb-b78c-499d-b44f-4bdfcc09ede5"
## [3] "01531616-9556-4c00-b80d-8aec50c4ab34"
## [4] "00c6ac2c-8ff3-4a0c-80c8-91d2abcd7bda"
## [5] "00cf0df9-b459-4be9-b70f-64cd1df2a977"
## [6] "012f00b6-c3fb-4347-add7-b26f6da478a2"
## [7] "00a27f19-9368-4eb0-b5cd-5c62b18bed36"
## [8] "006c95c5-d9a6-43f7-9961-4ee6f4de65e8"
## [9] "0154cb43-d436-4fc1-a822-104f70afde8f"
## [10] "01fd669c-4160-44ab-aa47-220f2f230437"
## [11] "0211976e-086b-4083-83c4-ed38843c17b7"
## [12] "01d9ab3e-5ffc-4d47-9b01-a5e500605115"
## [13] "01455441-afa9-499b-a1b1-38c3de5de68e"
## [14] "0088bc8f-f225-4987-a4e0-1ed2d51368b0"
## [15] "0017be96-2a39-4547-a255-ed9ba9bc0b2c"
## [16] "00d003bd-04ed-4616-9626-646b1b66bc8b"
## [17] "00ce4d57-e8dc-473b-9b41-f7820e02f1bc"
## [18] "00212653-5459-481c-abac-9fcda80ce46e"
## [19] "00cf9db4-03ce-41dd-a9ba-51d3a0c6e029"
## [20] "001a41f1-0b7a-4f08-bac8-95d876f69ef6"
## [21] "007a9957-614d-4014-a2e7-1750e3c67449"
## [22] "001976f0-de53-4ae6-85e8-48829ee3b787"
## [23] "0112ba23-9ad3-4d41-832b-ee55811a4a62"
## [24] "00297bd2-5e2d-44c5-b75b-e6ff7f499e9d"
## [ Sorted albacore sequences ]
## A DNAStringSet instance of length 48
## width seq
## [1] 1002 AAAAACTACAATAAAAGCCTCGAATGTTGGACG...TAAAATATCGGTCATATTCATAGCAATACGTAA
## [2] 1002 AAAAACTACAATAAAAGCCTCGAATGTTGGACG...TAAAATATCGGTCATATTCATAGCAATACGTAA
## [3] 12593 AAATTGTATTTCTTTTTAAAATATGTTTTAGGA...AAAGGCACACTGGCTTCTCCTCTTTGACCGCAG
## [4] 12593 AAATTGTATTTCTTTTTAAAATATGTTTTAGGA...AAAGGCACACTGGCTTCTCCTCTTTGACCGCAG
## [5] 4726 AAGCCGGTAAACAAGCTCTTTGTTGTTGGCTAA...TATGCTTGATTTGAAGGCTGGTGAATACAACGA
## ... ... ...
## [44] 2041 TTCATGTGGACACATAAAACAAATTACTTTAAA...AATCTCCGTCTCCTCTTGTGGGTCCATTGGATC
## [45] 1008 TTCTAATAACGTCCTGCGGCAACAATATAGAGC...TCCACGATACGGAAGAGCGTTATAATTCAACGA
## [46] 1008 TTCTAATAACGTCCTGCGGCAACAATATAGAGC...TCCACGATACGGAAGAGCGTTATAATTCAACGA
## [47] 11386 TTTTGAGAAAATTATACGTTTATGTCTCATTTA...CGAAGAAAATGGGTGAAAACGAAGGAGAATGGC
## [48] 11386 TTTTGAGAAAATTATACGTTTATGTCTCATTTA...CGAAGAAAATGGGTGAAAACGAAGGAGAATGGC
## [ guppy duplicate table ]
##
## 1
## 7052
Fix the albacore.fq data set by removing the duplicates;
#` Subset unique albacore sequences
albacore_unique.fq <- albacore.fq[!albacore_dups$dup_ind]
length(albacore_unique.fq)
## [1] 6082
The albacore
base caller filters the sequences, so the number of
sequences should match the number of passed sequences in the table.
expect_equal(
sort(split_ids(ShortRead::id(albacore_unique.fq))),
sort(albacore_summary$read_id[albacore_summary$passes_filtering%in%"True"])
)
The unfiltered guppy
fastq file should have the same number of
sequences as records in the statistics summary, however there appears to
be 4 additional records in the summary not present in the guppy
fastq
file;
## [ Unfiltered guppy fastq sequences ]
## [1] 7052
## [ guppy statistics records ]
## [1] 7056
What are the 4 additional guppy records?
cat("[ missing ids ]\n")
## [ missing ids ]
setdiff(guppy_summary$read_id, split_ids(ShortRead::id(guppy.fq)))
## [1] "56772c11-431c-4081-a4f2-3434868945ac"
## [2] "fbfb526a-9101-4ea3-b550-8930954c01a2"
## [3] "62152ec8-2fbc-4a62-b6a9-0ba9d03265a3"
## [4] "e91965ce-3454-43d3-94c8-bf1100b483f5"
missing_ind <- which(!guppy_summary$read_id%in%split_ids(ShortRead::id(guppy.fq)))
cat("[ widths of missing sequences: sequence_length_template field ]\n")
## [ widths of missing sequences: sequence_length_template field ]
guppy_summary[missing_ind, "sequence_length_template"]
## [1] 4 39 46991 134
knitr::kable(guppy_summary[missing_ind,])
filename | read_id | run_id | batch_id | channel | mux | start_time | duration | num_events | passes_filtering | template_start | num_events_template | template_duration | sequence_length_template | mean_qscore_template | strand_score_template | median_template | mad_template | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1257 | FAK77153_381a627fa445f6ab43dcbb0542d4cc06427d04b3_1.fast5 | 56772c11-431c-4081-a4f2-3434868945ac | 381a627fa445f6ab43dcbb0542d4cc06427d04b3 | 0 | 77 | 2 | 46477.84 | 3.00600 | 6012 | FALSE | 46477.84 | 6012 | 3.00600 | 4 | 3.619523 | 0 | 62.81604 | 2.684446 |
2637 | FAK77153_381a627fa445f6ab43dcbb0542d4cc06427d04b3_1.fast5 | fbfb526a-9101-4ea3-b550-8930954c01a2 | 381a627fa445f6ab43dcbb0542d4cc06427d04b3 | 0 | 77 | 2 | 51581.49 | 4.51125 | 9022 | FALSE | 51581.49 | 9022 | 4.51125 | 39 | 2.550976 | 0 | 64.06878 | 2.326520 |
2860 | FAK77153_381a627fa445f6ab43dcbb0542d4cc06427d04b3_1.fast5 | 62152ec8-2fbc-4a62-b6a9-0ba9d03265a3 | 381a627fa445f6ab43dcbb0542d4cc06427d04b3 | 0 | 164 | 3 | 44760.41 | 126.10750 | 252215 | TRUE | 44760.70 | 251650 | 125.82500 | 46991 | 11.535305 | 0 | 96.81902 | 12.169489 |
3049 | FAK77153_381a627fa445f6ab43dcbb0542d4cc06427d04b3_1.fast5 | e91965ce-3454-43d3-94c8-bf1100b483f5 | 381a627fa445f6ab43dcbb0542d4cc06427d04b3 | 0 | 213 | 3 | 65784.55 | 2.38850 | 4777 | FALSE | 65784.55 | 4777 | 2.38850 | 134 | 5.128243 | 0 | 42.23528 | 8.053338 |
Using Rsamtools to extract coverage information from the bam files, note: no flag filtering is currently used to remove poor quality reads.
bamfile1 <- Sys.getenv("GUPPY_BAM")
bamfile2 <- Sys.getenv("ALBACORE_BAM")
guppy_all <- bamCoverage(1, myb10_coords, bamfile1)
albacore_all <- bamCoverage(1, myb10_coords, bamfile2)
guppy_fwd <- bamCoverage(1, myb10_coords, bamfile1, orient="fwd")
albacore_fwd <- bamCoverage(1, myb10_coords, bamfile2, orient="fwd")
guppy_rev <- bamCoverage(1, myb10_coords, bamfile1, orient="revcomp")
albacore_rev <- bamCoverage(1, myb10_coords, bamfile2, orient="revcomp")
Note: The field passes_filtering
logical has changed to upper case for
guppy
, probably for downstream analysis using R.
Try to replicate the figures in table 3;
table3 <- data.frame(
Method = c("Albacore2","Guppy"),
All_reads = c(nrow(albacore_summary), nrow(guppy_summary)),
Pass_reads = c(nrow(albacore_summary_pass), nrow(guppy_summary_pass)),
bases_all = c(sum(albacore_summary$sequence_length_template), sum(guppy_summary$sequence_length_template)),
bases_pass = c(sum(albacore_summary_pass$sequence_length_template), sum(guppy_summary_pass$sequence_length_template)),
median_all = c(median(albacore_summary$sequence_length_template), median(guppy_summary$sequence_length_template)),
median_pass = c(median(albacore_summary_pass$sequence_length_template), median(guppy_summary_pass$sequence_length_template)),
N50_all = c(N50(albacore_summary$sequence_length_template), N50(guppy_summary$sequence_length_template)),
N50_pass = c(N50(albacore_summary_pass$sequence_length_template), N50(guppy_summary_pass$sequence_length_template)),
median_qual_all = c(round(median(albacore_summary$mean_qscore_template), 2), round(median(guppy_summary$mean_qscore_template), 2)),
median_qual_pass = c(round(median(albacore_summary_pass$mean_qscore_template), 2), round(median(guppy_summary_pass$mean_qscore_template), 2))
)
knitr::kable(table3)
Method | All_reads | Pass_reads | bases_all | bases_pass | median_all | median_pass | N50_all | N50_pass | median_qual_all | median_qual_pass |
---|---|---|---|---|---|---|---|---|---|---|
Albacore2 | 7056 | 6082 | 112578716 | 106589486 | 9783.5 | 11359.0 | 30416 | 30910 | 9.11 | 9.28 |
Guppy | 7056 | 6378 | 113972292 | 109526241 | 9891.5 | 11008.5 | 30635 | 30825 | 11.22 | 11.39 |
There is one read ID which is present and a pass in the guppy summary file, however it is missing from the guppy fastq file.
setdiff(guppy_summary_pass$read_id, split_ids(ShortRead::id(guppy.fq)))
## [1] "62152ec8-2fbc-4a62-b6a9-0ba9d03265a3"
This record should be removed for an amended version of Table 3;
missing_id <- setdiff(guppy_summary_pass$read_id, split_ids(ShortRead::id(guppy.fq)))
guppy_summary_pass <- guppy_summary[guppy_summary$passes_filtering%in%TRUE & (!guppy_summary$read_id%in%missing_id), ]
guppy_summary_fail <- guppy_summary[guppy_summary$passes_filtering%in%FALSE & (!guppy_summary$read_id%in%missing_id), ]
table3_amended <- data.frame(
Method = c("Albacore2","Guppy"),
All_reads = c(nrow(albacore_summary), nrow(guppy_summary)),
Pass_reads = c(nrow(albacore_summary_pass), nrow(guppy_summary_pass)),
bases_all = c(sum(albacore_summary$sequence_length_template), sum(guppy_summary$sequence_length_template)),
bases_pass = c(sum(albacore_summary_pass$sequence_length_template), sum(guppy_summary_pass$sequence_length_template)),
median_all = c(median(albacore_summary$sequence_length_template), median(guppy_summary$sequence_length_template)),
median_pass = c(median(albacore_summary_pass$sequence_length_template), median(guppy_summary_pass$sequence_length_template)),
N50_all = c(N50(albacore_summary$sequence_length_template), N50(guppy_summary$sequence_length_template)),
N50_pass = c(N50(albacore_summary_pass$sequence_length_template), N50(guppy_summary_pass$sequence_length_template)),
median_qual_all = c(round(median(albacore_summary$mean_qscore_template), 2), round(median(guppy_summary$mean_qscore_template), 2)),
median_qual_pass = c(round(median(albacore_summary_pass$mean_qscore_template), 2), round(median(guppy_summary_pass$mean_qscore_template), 2))
)
knitr::kable(table3_amended)
Method | All_reads | Pass_reads | bases_all | bases_pass | median_all | median_pass | N50_all | N50_pass | median_qual_all | median_qual_pass |
---|---|---|---|---|---|---|---|---|---|---|
Albacore2 | 7056 | 6082 | 112578716 | 106589486 | 9783.5 | 11359 | 30416 | 30910 | 9.11 | 9.28 |
Guppy | 7056 | 6377 | 113972292 | 109479250 | 9891.5 | 11008 | 30635 | 30825 | 11.22 | 11.39 |
#`
#` Comparing sequence length to `sequence_length_template` field
#` ind <- 1 fails for albacore...
ind <- 2
expect_equal(width(sread(albacore_unique.fq[grepl( albacore_summary$read_id[ind], id(albacore_unique.fq))])), albacore_summary[ind, "sequence_length_template"])
expect_equal(width(sread(guppy.fq[grepl( guppy_summary$read_id[ind], id(guppy.fq))])), guppy_summary[ind, "sequence_length_template"])
## Quality
albacore_summary[ind, ]
## filename
## 2 00cfb4bb-b78c-499d-b44f-4bdfcc09ede5.fast5
## read_id run_id
## 2 00cfb4bb-b78c-499d-b44f-4bdfcc09ede5 381a627fa445f6ab43dcbb0542d4cc06427d04b3
## channel start_time duration num_events passes_filtering template_start
## 2 99 50990.59 4.89375 3915 True 0
## num_events_template template_duration num_called_template
## 2 3915 4.89375 3915
## sequence_length_template mean_qscore_template strand_score_template
## 2 2041 8.759 -4e-04
## calibration_strand_genome_template calibration_strand_identity_template
## 2 filtered_out -1
## calibration_strand_accuracy_template calibration_strand_speed_bps_template
## 2 -1 0
summary(as(quality(albacore_unique.fq[grepl( albacore_summary$read_id[ind], id(albacore_unique.fq))]), "numeric"))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 7.00 12.00 12.55 18.00 27.00
guppy_summary[ind, ]
## filename
## 2 FAK77153_381a627fa445f6ab43dcbb0542d4cc06427d04b3_1.fast5
## read_id run_id
## 2 11b7b0ee-0a4c-46f9-88ee-58db0cd81f12 381a627fa445f6ab43dcbb0542d4cc06427d04b3
## batch_id channel mux start_time duration num_events passes_filtering
## 2 0 56 1 16549.63 53.36375 106727 TRUE
## template_start num_events_template template_duration sequence_length_template
## 2 16549.67 106646 53.32325 11570
## mean_qscore_template strand_score_template median_template mad_template
## 2 10.54434 0 81.07027 11.09571
summary(as(quality(guppy.fq[grepl( guppy_summary$read_id[ind], id(guppy.fq))]), "numeric"))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 11.00 21.00 20.61 29.00 66.00
#`
#` Sanity check read IDs to subset
#`
setdiff(guppy_summary$read_id, split_ids(ShortRead::id(guppy.fq)))
## [1] "56772c11-431c-4081-a4f2-3434868945ac"
## [2] "fbfb526a-9101-4ea3-b550-8930954c01a2"
## [3] "62152ec8-2fbc-4a62-b6a9-0ba9d03265a3"
## [4] "e91965ce-3454-43d3-94c8-bf1100b483f5"
#` The missing long read is not in the Fastq file
setdiff(guppy_summary_pass$read_id, split_ids(ShortRead::id(guppy.fq)))
## character(0)
table(guppy_summary_pass$read_id %in% split_ids(ShortRead::id(guppy.fq)))
##
## TRUE
## 6377
length(intersect(split_ids(ShortRead::id(guppy.fq)), guppy_summary_pass$read_id))
## [1] 6377
#`
#` Filter guppy.fq for passed reads only
#`
guppy_subset_ind <- split_ids(ShortRead::id(guppy.fq))%in%guppy_summary_pass$read_id
guppy_pass.fq <- guppy.fq[guppy_subset_ind]
if(PORECHOP) {
expect_equal(length(guppy_pass.fq), 6377)
} else {
expect_equal(length(guppy_pass.fq), 6378)
}
dset <- rbind(
data.frame(Method="Albacore2", width=width(albacore_unique.fq)),
data.frame(Method="Guppy", width=width(guppy_pass.fq))
)
#`
#` Summaries in same order as Fastq files
#`
ind <- match(split_ids(ShortRead::id(albacore_unique.fq)), albacore_summary$read_id)
albacore_summary_pass_sort <- albacore_summary[ind,]
ind <- match(split_ids(ShortRead::id(guppy_pass.fq)), guppy_summary$read_id)
guppy_summary_pass_sort <- guppy_summary[ind,]
## Sanity check
expect_equal(as.character(albacore_summary_pass_sort$read_id), split_ids(ShortRead::id(albacore_unique.fq)))
expect_equal(as.character(guppy_summary_pass_sort$read_id), split_ids(ShortRead::id(guppy_pass.fq)))
if(!grepl("aklppr31", Sys.getenv("HOSTNAME"))) {
tmpl <- system.file(package="batchtools", "templates", "openlava-simple.tmpl")
bparam <- MulticoreParam(workers=WORKERS) ## , template=tmpl)
register(bparam)
}
#`
#` Extract quality scores
#`
metric <- mean
if(DEBUG) {
system.time(guppy_qual <- unlist(bplapply(1:n, filter_quality, quality(guppy_pass.fq), fun=metric, BPPARAM = bpparam("MulticoreParam"))))
system.time(albacore_qual <- unlist(bplapply(1:n, filter_quality, quality(albacore_unique.fq), fun=metric, BPPARAM = bpparam("MulticoreParam"))))
#` Sanity check
#` system.time(median_quality(, quality(guppy.fq)[1:n]))
} else {
system.time(guppy_qual <- unlist(bplapply(seq(guppy_pass.fq), filter_quality, quality(guppy_pass.fq), fun=metric, BPPARAM = bpparam("MulticoreParam"))))
system.time(albacore_qual <- unlist(bplapply(seq(albacore_unique.fq), filter_quality, quality(albacore_unique.fq), fun=metric, BPPARAM = bpparam("MulticoreParam"))))
}
## user system elapsed
## 1.316 0.880 4.032
We have base calling summaries which have a sequence_length_template
field, and filtered Fastq which have sequence widths, we neeed to check
they are equal;
#` Albacore
albacore_width <- data.frame(read_id = albacore_summary_pass_sort$read_id,
width_sum = albacore_summary_pass_sort$sequence_length_template,
width_fq = width(albacore_unique.fq)) # porechoped adapters
albacore_width_issues <- albacore_width[albacore_width$width_sum!=albacore_width$width_fq,]
head(albacore_width_issues)
## read_id width_sum width_fq
## 1 00986955-624b-462e-a76b-2811435e51ba 2599 2568
## 2 00c6ac2c-8ff3-4a0c-80c8-91d2abcd7bda 6075 6022
## 4 006c95c5-d9a6-43f7-9961-4ee6f4de65e8 12628 12593
## 5 00a27f19-9368-4eb0-b5cd-5c62b18bed36 11418 11386
## 6 00cf0df9-b459-4be9-b70f-64cd1df2a977 7521 7462
## 7 01531616-9556-4c00-b80d-8aec50c4ab34 1054 1008
dim(albacore_width_issues)
## [1] 5343 3
#` One read longer than
table(albacore_width$width_sum>=albacore_width$width_fq)
##
## TRUE
## 6082
plot(albacore_summary_pass_sort$sequence_length_template, width(albacore_unique.fq), pch=16, cex=0.3, log="xy")
#` Guppy
guppy_width <- data.frame(read_id = guppy_summary_pass_sort$read_id,
width_sum = guppy_summary_pass_sort$sequence_length_template,
width_fq = width(guppy_pass.fq)) # porechoped adapters
guppy_width_issues <- guppy_width[guppy_width$width_sum!=guppy_width$width_fq,]
head(guppy_width_issues)
## read_id width_sum width_fq
## 4 6f6da979-2c68-47b2-81e4-0186af5f19f8 4301 4288
## 8 d8ecb2ec-3b5e-43cc-918a-e19dbc893632 41917 41910
## 9 dee664c2-734c-46c8-8405-83d90182af58 899 873
## 11 2fc75e38-0e94-40ba-9da6-c02d92b3e587 172 157
## 13 4f0a06a4-4ec9-4365-88fd-df3a724ed916 6687 6673
## 14 aede5be0-233e-4560-90f2-bad448827c53 32439 32422
dim(guppy_width_issues)
## [1] 1624 3
table(guppy_width$width_sum>=guppy_width$width_fq)
##
## TRUE
## 6377
plot(guppy_summary_pass_sort$sequence_length_template, width(guppy_pass.fq), pch=16, cex=0.3, log="xy")
There are discrepancies.
albacore_summary_qual <- albacore_summary_pass_sort$mean_qscore_template
guppy_summary_qual <- guppy_summary_pass_sort$mean_qscore_template
if(DEBUG) {
dcols <- densCols(guppy_qual, width(guppy.fq)[1:n])
plot(guppy_qual, width(guppy.fq)[1:n], pch=16, cex=0.4, col=dcols,
main = "Guppy", xlab="Quality", ylab="Read Length")
dcols <- densCols(albacore_qual, width(albacore.fq)[1:n])
plot(albacore_qual, width(albacore.fq)[1:n], pch=16, cex=0.4, col=dcols,
main = "Albacore2", xlab="Quality", ylab="Read Length")
} else {
dcols <- densCols(guppy_qual, width(guppy.fq))
plot(guppy_qual, width(guppy.fq), pch=16, cex=0.4, col=dcols,
main = "Guppy", xlab="Quality", ylab="Read Length")
dcols <- densCols(albacore_qual, width(albacore.fq))
plot(albacore_qual, width(albacore.fq), pch=16, cex=0.4, col=dcols,
main = "Albacore2", xlab="Quality", ylab="Read Length")
}
#`
#` https://github.com/Actinidia/GBS/blob/98b36d3bc5fcb2cdc41b506dc17ffd21261397f0/RandomTagvsStandard/paper/figureGeneration.Rmd
#`
nBins <- seq(0, log10(max(dset$width)), length=70+1)
xAxis <- 0:7
plotObj1 <- histogram( ~ log10(width) | Method, data=dset, col="white", breaks=nBins,
aspect=1, layout=c(2,1), xlab=expression(log[10]("Read Length")),
type="count",
scales = list(x = list(alternating=FALSE, at=xAxis, rot=90, labels=(10^xAxis))),
between=list(x=0.3, y=0.3), panel=function(x,y, ...){
panel.histogram(x, ...)
panel.abline(v=N50(x), lty=3, col="red")
panel.text(x=N50(x), y=470, "N50", cex=0.75)
})
print(plotObj1)
f <- seq(0,1, length=201)
dset_Nf <- rbind(
data.frame(Method="Albacore2", f = f, Nf = sapply(f, function(x)Nf(albacore_summary_pass_sort$sequence_length_template, x))),
data.frame(Method="Guppy", f = f, Nf = sapply(f, function(x)Nf(guppy_summary_pass_sort$sequence_length_template, x)))
#` data.frame(Method="Guppy - Albacore", f=f,
#` Nf = sapply(f, function(x)Nf(width(guppy.fq), x)) - sapply(f, function(x)Nf(width(albacore.fq), x)))
)
#` contig length needed to cover 50% of the genome
xAxis <- seq(0, 120000, length=4)
plotObj3 <- xyplot(f ~ Nf | Method, data=dset_Nf, layout=c(2,1), between=list(x=0.3, y=0.3),
xlab=expression(paste(N[f], ", contig length to cover fraction (f) of the genome")),
scales = list(x = list(alternating=FALSE, at=xAxis, rot=0)),
ylab="fraction (f)",
panel=function(x,y, ...) {
panel.lines(x,y, ...)
ind <- which(y==0.5)
panel.text(x[ind], y[ind], label=bquote(N[50] == .(x[ind])))
})
print(plotObj3)
The total number of quality scores based on raw fastq data;
sum(width(quality(albacore.fq)))
## [1] 106571236
sum(width(quality(guppy.fq)))
## [1] 113899787
These are too many quality scores to visualize in R, and the files are reflective of all derived reads, not the ones in the region of interest only.
Extracting Base quality scores in region of interest;
#` Albacore fastq reads
albacore_region.fq <- readFastq("fastq_region/All_DS_RedFlesh_ON_run1_cas_after_porechop_dis.fastq.gz")
guppy_region.fq <- readFastq("fastq_region/After_PoreCHOP_RedFlesh_ON_GUPPY_cas.fastq.gz")
# dset<- rbind(
# data.frame(Method="Albacore2", quality=as(quality(albacore_region.fq), "numeric")),
# data.frame(Method="Guppy", quality=as(quality(guppy_region.fq), "numeric"))
# )
#`
#` Summaries in same order as Fastq files
#`
ind <- match(split_ids(ShortRead::id(albacore_region.fq)), albacore_summary$read_id)
albacore_summary_region_sort <- albacore_summary[ind,]
ind <- match(split_ids(ShortRead::id(guppy_region.fq)), guppy_summary$read_id)
guppy_summary_region_sort <- guppy_summary[ind,]
## Sanity check
expect_equal(as.character(albacore_summary_region_sort$read_id), split_ids(ShortRead::id(albacore_region.fq)))
expect_equal(as.character(guppy_summary_region_sort$read_id), split_ids(ShortRead::id(guppy_region.fq)))
dset<- rbind(
data.frame(Method="Albacore2", quality=albacore_summary_region_sort$mean_qscore_template),
data.frame(Method="Guppy", quality=guppy_summary_region_sort$mean_qscore_template)
)
dim(dset)
## [1] 312 2
#` Summary statistics (Text above Table 3)
with(dset, tapply(quality, Method, function(x)round(summary(x),2)))
## $Albacore2
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 7.05 9.34 9.95 9.72 10.24 11.07
##
## $Guppy
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 9.69 11.86 12.36 12.31 12.83 13.88
#`
#` https://github.com/Actinidia/GBS/blob/98b36d3bc5fcb2cdc41b506dc17ffd21261397f0/RandomTagvsStandard/paper/figureGeneration.Rmd
#`
nBins <- seq(0, max(dset$quality), length=90+1)
xAxis <- 0:7
#` Histogram
plotObj4 <- histogram( ~ quality | Method, data=dset, col="white", breaks=nBins,
aspect=1, layout=c(2,1), xlab=expression("Quality Score"),
type="count", main=which_coords(myb10_coords),
scales = list(x = list(alternating=FALSE)), #, at=xAxis, rot=90, labels=(xAxis))),
between=list(x=0.3, y=0.3), panel=function(x,y, ...){
panel.histogram(x, ...)
})
print(plotObj4)
#` Violin plot - see example(panel.violin)
#`set.seed(42)
#`dset_sub <- dset[sample(nrow(dset), 1000000),]
plotObj5 <- bwplot( quality ~ Method, data=dset, xlab="Base Caller", ylim=c(-0.5, 16),
aspect=1, ylab=expression("Base Call Quality Scores"),
type="count", main=which_coords(myb10_coords),
scales = list(x = list(alternating=FALSE)),
between=list(x=0.3, y=0.3),
panel = function(..., box.ratio) {
panel.violin(..., width=2, col = "transparent",
varwidth = FALSE, box.ratio = box.ratio)
panel.bwplot(..., fill = NULL, box.ratio = .1)
} )
print(plotObj5)
dat1 <- data.frame(
pos = seq(length(guppy_all$cov)) + guppy_all$start - 1,
coverage = as.integer(guppy_all$cov),
type = "Guppy",
orient = "all"
)
dat2 <- data.frame(
pos = seq(length(albacore_all$cov)) + albacore_all$start - 1,
coverage = as.integer(albacore_all$cov),
type = "Albacore2",
orient = "all"
)
dat3 <- data.frame(
pos = seq(length(guppy_fwd$cov)) + guppy_fwd$start - 1,
coverage = as.integer(guppy_fwd$cov),
type = "Guppy",
orient = "forward"
)
dat4 <- data.frame(
pos = seq(length(albacore_fwd$cov)) + albacore_fwd$start - 1,
coverage = as.integer(albacore_fwd$cov),
type = "Albacore2",
orient = "forward"
)
dat5 <- data.frame(
pos = seq(length(guppy_rev$cov)) + guppy_rev$start - 1,
coverage = as.integer(guppy_rev$cov),
type = "Guppy",
orient = "reverse complement"
)
dat6 <- data.frame(
pos = seq(length(albacore_rev$cov)) + albacore_rev$start - 1,
coverage = as.integer(albacore_rev$cov),
type = "Albacore2",
orient = "reverse complement"
)
#` Forward difference
cov1 <- guppy_fwd
cov2 <- albacore_fwd
if(length(cov2$cov) == length(cov1$cov)) {
delta_start <- cov2$start
}
if(length(cov2$cov) < length(cov1$cov)) {
toPad <- rep(0, abs(length(cov2$cov) - length(cov1$cov)))
cov2$cov <- append(cov2$cov, toPad)
delta_start <- cov2$start
}
if(length(cov1$cov) < length(cov2$cov)) {
toPad <- rep(0, abs(length(cov2$cov) - length(cov1$cov)))
cov1$cov <- append(cov1$cov, toPad)
delta_start <- cov1$start
}
delta <- cov1$cov - cov2$cov
dat7 <- data.frame(
pos = seq(length(delta)) + delta_start - 1,
coverage = as.integer(delta),
type = "Guppy-Albacore2",
orient = "forward"
)
#` Reverse complement difference
cov1 <- guppy_rev
cov2 <- albacore_rev
if(length(cov2$cov) == length(cov1$cov)) {
delta_start <- cov2$start
}
if(length(cov2$cov) < length(cov1$cov)) {
toPad <- rep(0, abs(length(cov2$cov) - length(cov1$cov)))
cov2$cov <- append(cov2$cov, toPad)
delta_start <- cov2$start
}
if(length(cov1$cov) < length(cov2$cov)) {
toPad <- rep(0, abs(length(cov2$cov) - length(cov1$cov)))
cov1$cov <- append(cov1$cov, toPad)
delta_start <- cov1$start
}
delta <- cov1$cov - cov2$cov
dat8 <- data.frame(
pos = seq(length(delta)) + delta_start - 1,
coverage = as.integer(delta),
type = "Guppy-Albacore2",
orient = "reverse complement"
)
dat <- rbind(dat3, dat4, dat5, dat6, dat7, dat8)
KRISPR RNA oligo matches to Genome;
#`crRNA genome coordinates;
patterns <- krispr_rna_oligos()
ref <- readDNAStringSet(Sys.getenv("REFERENCE"))
#` Sort wrt chromosome names
ref <- ref[sort(names(ref))]
#` Sequence matching
mindex1 <- vmatchPattern(patterns[[1]], ref)$Chr09
mindex2 <- vmatchPattern(patterns[[2]], ref)$Chr09
mindex3 <- vmatchPattern(reverseComplement(patterns[[3]]), ref)$Chr09
mindex4 <- vmatchPattern(patterns[[4]], ref)$Chr09
flank <- 10000
myb10_region <- myb10_coords + flank
ref_region <- narrow(ref["Chr09"], start(myb10_region), end(myb10_region))
window <- 500
#`compute the GC content in a sliding window (as a fraction) for a sequence no. 364
gc <- rowSums(letterFrequencyInSlidingView(ref_region[[1]], window, c("G", "C")))/window
plot(gc, type = 'l', main=paste(which_coords(myb10_coords), " (red lines)"))
window_start <- start(myb10_coords) - start(myb10_region) - window
window_end <- window_start + width(myb10_coords)
abline(v=window_start, col="red", lty=3)
abline(v=window_end, col="red", lty=3)
Coverage including difference;
Coverage excluding difference;
Examining coverage difference;
Examining coverage difference start region;
Examining coverage difference start region (magnified);
Visualizing Albacore start spike region samtools tview
;
samtools view -F 16 -u $ALBACORE_BAM Chr09:35542848-35542888 | samtools sort -o albacore_region_fwd.bam
samtools index albacore_region_fwd.bam
samtools view -f 16 -u $ALBACORE_BAM Chr09:35542848-35542888 | samtools sort -o albacore_region_rc.bam
samtools index albacore_region_rc.bam
echo "[ Albacore tview forward reads ]"
samtools tview -d T -p Chr09:35542848-35542888 albacore_region_fwd.bam $REFERENCE
echo "[ Albacore tview reverse complement reads ]"
samtools tview -d T -p Chr09:35542848-35542888 albacore_region_rc.bam $REFERENCE
## [ Albacore tview forward reads ]
## 35542851 35542861 35542871 35542881 35542891
## TCTGT*A*CTC*CG**T**C*TGTC**GGT*CGGTCT*CTCCTAT*CT**C**GA**ATT*C**C*GAAAGGC*A*TTGC
## ..... . ... .. . . .... ... ...... ....... .. . .. ... . . ....... . ....
## .....***...*AA**.AG.A..****...AAAT...*..*....*..**.**..**...*.**.*.......*.*..**
## .....C.*...*..**.**.*....**...*.....**A.*....*..**.GA..**...*.**.*.......*.T...T
## .T...*.*...*..**.**.*....**...*.A....*..*....*..**.**..**...*.**.*.......*.*....
## .A.TC*.C...*..**.CA.*CA..CG...**A....*......**..**.**AG**...*.**.*.......*.*....
## .....*.*...AA.**.**.*....**...*......*.......*..**.***.**...****.*.**....*.*....
## .....*.*...*.***A**.*....**...*......*.......*..**.***.**...*.**.*.*.....*.*....
## .....*.*...*..TC.**.*....**...*......*..*....*..**.**..**...*.**.*..***..G.****.
## ....*****.G*..**.**.*....**...*......*.......*..**.**..**...*.**.*.......*.*....
## .....GC*...*..**.**.*....**...C.A....*.......*..**.***.**...*.**.*.......*.*....
## ...*......*.......*..**.**..**...****.*.......G.****.
## ...*......*.......A..CC.**..**...****.*.......*.*....
## ...*......*..*....*..**.CA..**...****.*A......*.*....
## ...*.A....C.......*..**.**..**...*.**.*.*.....***G...
## ...*.A....*.......*..**.**..**...****.*.....A.*.*....
## ..*.A....*.......*..**.**..**...*.**.*.......*.*....
## ..*.A....*.......*..**.**..**...*.**.*.......*.*....
## ..*.A....*.......*..**.**AG**...*.**.*..G.*..*.*....
## .....*..*....*..C*.**..**...*.**A*AG.....*.*....
## ....*..*....*..**.**..**...*.**.*.......*.*....
## .....*..**.***.**...****.*.......*.*....
## ...*..**.**..AG...*.**.*.......*.*....
## ...*..**.**..**...*.**.*....****.*....
## ...*..**.***.**...****.*.......*.*..*A
## ...*..**.**..**...*.**.*.*.....*.*....
## ...*..**.**AG**...*.**.*.*.....***G...
## .*..**.**..**...*.**.*.......*.*....
## .*..**.**..**...*.**.*.......*G*.**A
## .*..**.**..**...*.*****......*.*....
## ..**.**..**...*.CA.*..***..*.*....
## ..**.**..**...*.**.*.*.....*.*....
## ..**.**..**...*.**.*.*.....*.*....
## ..**.**..**...*.**.*.......*.*....
## ..**.**..**...*.**.*.......***G.A.
## ..**.**..**...*.**.*.*.....*.*....
## ..**.**..**...*.**.A.......*.*....
## ..**.**..**...*.**.*AG.....*.*....
## ..**.**..**...*.**.*.*.....*.*....
## ..**.**..**...*.**.*.**....*.*....
## ..**.**..**...*.**.*.......*.*....
## ..**.**..**...****.*.......*.*....
## ..**.**..**...*.**.*...GA..***G.A.
## ..**.**..**...*.*****......*.*....
## ..**.**..**...****.*.......*.*....
## ..**.**..**...****.*.......*.*....
## ..**.**..**...*.**.*.......***G.A.
## ..**.**..**...****.*.......*.*....
## ..**.**..**...*.**.*.......*.*....
## ..**.**..**...*.**.*.*.....***G...
## ..**.**..**...****.*.*.....*.*....
## ..**.**..**...*.**.*.*.....*.*....
## ..**.**..**...****.*.*.....*.*....
## ..**.**..**...*.**.*.......*.*....
## ..**.**..**...*.**.*.**....*.*....
## ..**.**..**...*.**.*.......*.*....
## ..**.**..**...*.**.*.......***G...
## ..**.**..**...*.**.*.*.....*.*....
## ..**.**..**...****.*.*.....*.*....
## ..**.**..**...*.**.*.......*.*....
## ..**.**..**...*.**A*.......*.*....
## ..**.**..**...****.*.......*.*....
## ..**.**..**...*.**.*.......*.*....
## ..**.**..**...*.**.*.*.....*.*....
## ..**.**..**...*.*****......*.*....
## .**..**...*.**.*.*.....*G*....
## .**..**...*.**.*.......*.*....
## .**..**...*.**.*.*.....*C*....
## .**..**...****.*.*.....*.*..A.
## .**..**...*.**.*.......*.*....
## .**..**...*.**.*.*.....*.*....
## .**..**...*.**.*.......*.*....
## .**..**...*.**.*.......*.*....
## .**..**...****.*..***..*.*....
## .**..**...*.**.*.......*.*....
## ..**...A.**.*.......*.*....
## ..**...****.*.......****...
## .**...*.**.*.......***G...
## .**...*.**.*.......*.*....
## .**...*.**.*.......*.*....
## .**...****.*.......*.*....
## .**...*.**.*.......*.*....
## .**...*.**.*.*.....*.*..**
## .**...*.**.*.......***G.A.
## ...*.**.*.......*.*....
## ...*.**.*..***..*.*....
## ...*.**.*.*.....*.*....
## ...*.**.*.......***G.A.
## ...*.**.**......*.*....
## ...*.**.*.*.....*.*..TA
## ...*.**.*.......*.*....
## ...*.**.*.*.....*.*....
## ..*.**A*.......*.*....
## [ Albacore tview reverse complement reads ]
## 35542851 35542861 35542871 35542881 35542891
## TCT*GTA**CTCCGTCT**G**TCGGTC*G*GTC**TCTCC**T**A**T*C*TC***GA**ATTCC*G*AAA****GGC
## ... ... K....... . ...... . ..K ..... . . . . .. .. ..... . ... ...
## ,,,*,,,**,,,,,,,,**,**,,,,,,*a*,,,**,,,,,**,**,**,***,,***,,**,,,,,*,*,,,****,,,
## ,,,*,,,**,,,,,,,,**,**,,,,*,*t*,,,ag,,,,,**,**,**,*,*,,***,,**,,,,,***,,,****,,,
## ,,,*,,,**,*,,,,,c**,**,,,,,,*a*,,,**,,,,,**,**,**,*,*,,***,,**,,,,,*,*,,,****,,,
## ,,,*,,,**,,,,,,,,**,**,,,,,,aa*,,,**,,,,,**,**,**,*,*,,***,,**,*,,t*,*,,,****,,,
## ,,,*,,,**,,,,,,,,**,**,,,,,,*a*,ct**,,,,,**,**,**,***,,***,,**,,,,,*,*,,,****,,,
## ,,,*,,,**,,,,,,,,**,**,,,,,,*a*,****,,,,,**,**,**,*,*,,***,,**,,,,,*,*,,,****,,,
## ,,,*,,,**a,t,,,,,**,**,,,,,,*a*,,,**,,,,,**,**,**,*,*,,***,,**,,,,,*,*,,,****,,,
## ,,,*,,,**,,,,,,,,**,tt,,*,,,*a*,,,**,,,,,**,**,**,*t*,,***,,**,,,,,*,*,,,****,,,
## ,,,*,,,**,,,,,,,,**,**,t,,,,*a*,,,**,,,,,**,**,**,*,*,,***,,**,,,,g*a*,,,****,,,
## ,,,*,,,**,,*,,,,,**,**,,,,,,*a*,****,,,,,**,**,**,*,*,,***,,**,,,*,*,*,,,****,,,
## *,,*,,,at,,t,,c,c**,**,,,,,,*a*,****,,,,,**,**,**,*,*,,***,,**,,,,,*,*,,,****,,,
## ,,,c,,,**,c,a,,tc**,**,,,,,,*,a,,t**c,,,,**,**,**,*,*,t***a,**,,,*t*a*,,,****,,,
## ,,,*,,,**,,,,,,,,**,**,,,,,,*a*,****,,,,,**,**,**,*,*,,***,,**,,,,,*,*,,,****,,,
## ,,,*ta,**a,t,,,,,ac,**,,,,,,*a*,,***,,,,,t*,**,**,*,*,,***,,**,,,,,*,a,,,****,,,
## ,**,**,t,,,,*a*,,***,,,,,**,**,**,*,*,****,,**,,,,,*,*,,,****,,,
## ,**,**,t,,,,*,*,****,,,,,**,**,**,*,*,,***,,**,,,*,*,*,,,*****,,
## ,**,**,t,,,,*a*,,,**,,,,,**,**,**,*,*,,t**,,**,,,,,*,*,,,****,,,
## ,,,,*a*,,,**,***,**,**,**,*,*,,***,,**,,,,,*,*,,,****,,,
## ,,,,*,*,,,t*,,,,,**,**,**,*,*,,***,,**,,,*,*,*,,,****,,,
## ,,,,*,*,,,**,,,,,**,**,**,c,*,t***,,**,*,,t*,*,,,****,,,
## ,,,,*,*,,,**,,,,,**,**,**,*,*,,***,,**,,,,,*,a,,,****,,,
## ,,,,*,*,,,**,,,,,ct,**,**,*,t,,***,,**,,,,,*,*,,,****,,t
## ,,,,***,,,**,,,,,**,**,**,***,,***,,**,,,,,*,*,,,****,,,
## ,,,,*,*,,,**,*,,,**,**,**,*,*,,***,,ag,a,,t*,*,,,****,,,
## ,,,,*,*,,,**,,,,,**,**,**,*,*,,***,,**,,,,,*,*,,,****,,,
## ,,,*,*,,***,,,,,**,**,**,*,*,,***,,**,,,*,*,*,gt****t,,
## ,,,*,*,****,,,,,*****,**,***,,***,,**,,,*,*,*,,,****,,,
## ,,,*a*,,,**,,,,,**,**,**,*,*,,***,,**,,,*,*,*,,,****,,,
## ,,,*,*,,,**,*,,,**c**,**,*,*,,***,,**,,,*,*,*,,,****,,,
## ,,,*a*,,,**,,,,,**,**,**,*,*,,tcg,,**,,ctt*,*,,,****,,,
## ,,,*,*,,t**c,,,,**,**,**,*,*,,***,,**,,,*,*,*,,,****,,,
## ,,,*,*,,,**,,,,,**,gc,**,***,,***,,**,,,*,*,*,,,****,,,
## ,,,*a*,,,**,,,,,**,**,**,*,*,,***,,**,,,,,*,*,*******,,
## ,,,*,*,,,**,,,,,**,**,**,*,*,,***,,**,,,*,*,*,,,****,,,
## ,,,*,*,,***,,,,,**,**,**,*,*,,***,,**,,,,,*,*,,,****,,,
## ,,,*,*,,,**,,,,,**,**,**,*,*,,***,,**,,,,,*,*,,,****,,,
## ,,,*,*,,,**,,,*,**,**,**,*,*,,***,,**,,,,,*,*,,,gttg,,,
## ,,,*,*,,,**,,,,,**,**,**,*,*,****,,**,,,,,*,*,,,****,,,
## ,,,*a*,,,**,,,,,**,**,**,*,*,,***,,**,,,*,*,*,,,****,,,
## ,,,*,*,,,**,,,*,**,**,tc,*,*,,***,,**,*,,t*,*,,,****,,,
## ,,,*,*,,,**,,,,,**,**,***********,,**,,,,,*,*,,,****,,,
## ,,,*,*,,,**,,,,,**,**,**,*,*,,***,,**,,,,,*,*,,,****,,,
## ,,,*,*,,,**,,,,,**,**,**,*,*,,***,,**,,,,,*,*,,,****,,,
## ,,,*,*,****,,,,,**,**,**,*,*,,t**,,**,*,,t*,*,,,****,,,
## ,,,*a*,,,**,,,,,**,**,**,*,*,,***,,**,,,,,*,*,,,****,,,
## ,,,*,*,,,**,,,,,**,**,**,*,*,,***,,**,,,,,g,*,,,****,,,
## ,,,*,*,,,**,,,,,**,**,**,*,*,,***,,**,*,,t*,*,,,****,,,
## ,,,*a*,,,**,,,,,**,**,**,*,*,,***,,**,,,,,*,*,,,****,,,
## ,,,*,*,,,**,,,,,**,**,**,*,*,,***,****,,,,*,*,,,****,,,
## ,,,*a*,,,**,,,,,**,**,**,*,*,,***,,**,,,,,*,*,,,****,,,
## ,,,*a*,,,**,,,,,**,**,**,*,*,,***,,**,,,*,*,*,,,****,,,
## ,,,*,*,,,**,,,,,*****,**,*t*c,***,,**,,,,,*,*,,,****,,,
## ,*,*,,***,,,,,**c**,**,***,,***,,**,,,,,*,*,,,****,,,
## ,*,,,**,,,,,**,**,**,*,*,,***,,**,,,,,*,*,,,****,,,
## ,,**,,,,,**,**,**,*,*,,***,,**,,,,,*,*,,,****,,,
## ,,**,,,,,**,**,**,*,*,,***,,**,,,,,*,*,,,****,,,
## ,,**,,,,,**,**,**,*,*,,***,,**,,,,,g,*,,,****,,,
## ,,,,,**,**,**,*,*,,***,,**,,,,,g,*,,,****,,,
## ,,,,,**,**,t*,*,*,****,,**,,,,,*,a,,,****,,,
## ,,,,,**,**,**,*,*,,***,,**,,,,,*,*,,,****,,,
## ,,,,,**,**,**,*,*,,***,,**,*,,t*,*,,,****,,,
## ,,,,**,**,**,*,*,,***,,**,,,*,*,*,,,****,,,
## ,,,,*****,**,*,*,,***,,**,,,,,*,*,,,****,,,
## ,**,**,*,*,,***,,**,,,,,*,*,,,****,,t
Visualizing Guppy start spike region samtools tview
;
samtools view -F 16 -u $GUPPY_BAM Chr09:35542848-35542888 | samtools sort -o albacore_region_fwd.bam
samtools index albacore_region_fwd.bam
samtools view -f 16 -u $GUPPY_BAM Chr09:35542848-35542888 | samtools sort -o albacore_region_rc.bam
samtools index albacore_region_rc.bam
echo "[ Guppy tview forward reads ]"
samtools tview -d T -p Chr09:35542848-35542888 albacore_region_fwd.bam $REFERENCE
echo "[ Guppy tview reverse complement reads ]"
samtools tview -d T -p Chr09:35542848-35542888 albacore_region_rc.bam $REFERENCE
## [ Guppy tview forward reads ]
## 35542851 35542861 35542871 35542881 35542891
## TCTG*T*A*CTC*C****GTC*TGT*C*GGTCGGTCTCT*CCTA**TCT**C****G*A*ATT*CCG*A*AAG*GC*ATT
## .... . . ... . ... ... . ....R...... .... ... . . . ... ... . ... .. ...
## ...A*.*.TA..*A****A..*...*.A.C..A......*....**...**.****A*.G...*..A*.G...*..*...
## ....*.*.*...AA****...*...*.*...........**...**...C*.****.*.*...**..****..*..*...
## .*AA*.*.C...*.****...*...C.*.....A.....*....****.**.****A*.*...*...*.*...*..*...
## ....*.*.*...*.****...*...*.*....A....****...**...**.****.*.*...*...*.*...*..*...
## ....*.****G.*.****...*...*.*...........*....**...**.****.*.*...*...*.*...*..*...
## ....CC*.*...*.****...*...*.*.......A.A.**...**...**.GA**.*.*...*...***...*C.*...
## ....*.*.*...*.*****A.*...*.*...........*....**...**.******.*...*...***...*..*...
## ....*.***...*A****A.A*ACA*T*....A......**...**...**.****.*.*...*...*.*...*..*...
## ....*.*.*...*.AAATC..*...*.*...........**...**...C*.****.*.*...*...*.*****..*...
## ....*.*.*...*A****A..AC..*G*.A..A......**...**...**.******.*...**..***...*..*...
## ....*.GC*...*.****...*...*.*....A......*....**...**.****C*.*...*...*.*...*..*...
## ..*.*.*..T*.****..****.*.*A..........*....**...**.****.*.*...*...*.*...*..*...
## ...........*....**...**.****.A.*...*...*.*...*..*...
## ....A......**...**...CC.****.*.*...*...*.*...*..*...
## ...........*....**...**.****.*.*...*...*.*...*..**G.
## ....A.....**....**...**.****.*.*...*...*.*..**..*C..
## ...........*....**...**.****.*.*...*...*.*...*..*...
## ...........*....**...**.******.*...*.GA*.*...*.T*...
## ...........*....**...**.****.*.*...*...*.*...*..*...
## ...........*....**...**.****.*.*...*...*.*.GA*..*G..
## ...A......*....**...**.****.*.*...*...*.*...*..*...
## ..........*....**...**.****.*.*...*...*.*...*..*...
## ..........*....**...**.****.*.*...*...*.*...*..*...
## .....**...*....**...**.******.*...*...*.*...*..*...
## ..........*...G**...**.****A*.*...*...***...*..*...
## ..........*....**...**.****.*.*...**..*.*...*..*...
## ..........*....**...**.AGAA.*.*...**.A*.*...*..*...
## ..........*....**...C*.****.*.*...**..*.*...*..*...
## ...A......*....**..***.****.*.*...*...*.*...*..*...
## ..........**...**...**.****.*.*...*...*.*.***..**G.
## ...A......*....**...**.******.*...*..**.*...*..*...
## ...A......**...**...**.****.*.*...*...*.*...*..*...
## ...A......*....**...**.****.*.*...*...*.*G.**..*...
## ..........*....**...**.****.*.*...*...*.*...*..*...
## ...A......**...**...**.****.*.*...*..A*.*...*..*...
## ...A......*....**...**.****.*.*...*...*.*...*..*...
## ..........*....**...**.****.*.*...*...***...*..*...
## .........*....**...**.****.*.*...*...*.*...*..**G.
## .........*....**...C*.****.*.*...*...*.*...*..*...
## .........*....**...**.****.*.*...*...*.*...*..*...
## .........*....**...**.****.*.*...**..****..*..*...
## .........**...**...**.****.*.*...**..*.*...*..*...
## .........C....**...**.****.*.*...*...*.*.GA*..**G.
## .........**...**...*********.*...*...***...*..*C..
## .........*....**...**.****.*.*...*...*.*...*..*...
## .........*....TC...**.****.*.*...*.A.***...*..*GC.
## .........*....**...**.****.*.*...**..*.*...*..G.**
## .........*....**...**.****A*G*...*T..*.*...*..*...
## .........*....**...**.****.*.*...*...***...*..*...
## .........*....**...**.****.*.*...*...***...*..*...
## .........*....**...**.******.*...*...*.*...*..*...
## ........**...**...**.****.*.*...*...*.*...*..*...
## .......C....TC...**.****.*.*...*...*.*...*****..
## .......**...**...CC.****.*.*...*...*.*...*..*.*.
## .......**...**...**.****.*.*...*...*.*...*..*...
## .......*....TC...**.****.*.*...*...*.*...*..*...
## ......*....**...**.****.*.*...**..*.*...*..*...
## ......*....**...**.****.*.*...*...***...*..*...
## ......*....**...**.GG**.*.*...*...*.*...*..**G.
## ......*....**...**.T***C*.*G..*******...*..*...
## ......**...**...**.****.*.*...*...*.*.***..**G.
## ......*....**...**.****.*.*...*...*.*...*..*...
## ......*....**...**.****.*.*...*...***...*..*...
## ......*....**...**.****.*.*...*...*.*...*..*G..
## ......*....**...**.****.*.*...*...*.*...*..*...
## ......**...**...**.****.*.*...*...***...*..*...
## ......**...**...**.****.*.*...*...*.*...*..*...
## ......*....**...**.****.*.*...*..*****.**..**G.
## ......**...**...**.****.*.*...*...*.*...*..*...
## ......*....**...**.****.*.*...**.A*.*...*..*...
## ......**...**...C*.****.*.*...*...*.*...*..*...
## ......*....**...**.****.*.*...*...*.*...*..*...
## ......*....**...**.****.*.*...*...***...*..*...
## ......**...**...C*.****.*.*...*...*.*****..**G.
## ......*....**...**.****.*.*...*...*.*...*..*.*.
## ......C....**...C*.****.*.*...*...*.*...*..*...
## ......**...**...**.****.*.*...*...*.*...*..*...
## ......*....**...**.****.*.*...*...*.*...*..*...
## ......*....**...**.****.*.*...*...*.*...*..*...
## ......*....**...**.****.*.*...*...*.*...*..*...
## ......**...**...**.****.*.*...*...*.*...*..*...
## ......*....**...**.****.*.*...*...*.*...*..*...
## ......*....**...**.****.*.*...**..*.*...*..*...
## ......*....**...**.****.*.*...*...*.*****..*...
## ......**...**...**.****.*.*...*...*.*...*..*...
## ......*....**...**.****.*.*...*...*.*...*..*...
## .....*....**...**.****.*.*...*...***...*..*...
## .....*....**...**.****.*.*...*...***...*..*...
## .....*.*****...**.****T*.*...*AA.*.*.***..*C..
## .....*....**...CC.****.*.*...*.***.*...A..**G.
## .....**...**...**.****.*.*...**..*.*...*..*...
## .....**...**...**.****.*.*...*...****..*..*...
## .....**...**...**.****.*.*...*...*.*...*..*...
## .....C....**...**.****.***...**..***...*..*.**
## ....*...G**...**.******.*G..**..*.*...*..*...
## ...*....**...**.****.*.*...C...*.*...*..*...
## ...*....**...**.********...*...A.*...A..*G.*
## ...*....**...**.****A*G*...*...*.*...*..*...
## ...*....**...**.****.*.*...*.*******.*..*...
## ...*....**...**.****.*.*...**.A*.*...*..*...
## ...*....**...**.****.*.*...**..***...*..*...
## ....**...**.****.*.*...**..*.*...*..*...
## ...**...**.****.*.*...*...***...*..*...
## ...**...**.****.*.*...*...***...*..*...
## ...**...**.****.*.*...*...*.*...*..*...
## ...**...**.****.*.*...*...*.*...*..*...
## ...**...**.****.*.*...*...*.*...*..*...
## ..**...**.****.*.*...*...*.*...*..*...
## .*...*...A.*...*..*...
## [ Guppy tview reverse complement reads ]
## 35542851 35542861 35542871 35542881 35542891 35542901
## TCTGTACTCCGTC***TGTCGGTC*GGTCT*CT*CC*TA**T**C*TCGAATTCCG*AAAGGCATT**GC*CTC*TTCAT
## ............. ........ R.... .. .. .. . . .......... ......... .. ... .....
## ,,,,,,,,,,,,,***,,,,,,,,*a,,,,*,,*,,*,,**,**,*,,,,,,,*,,*,,,,,,,,,**,,*,,,*,,,,,
## ,,,,,,,,,,,,,***,,,,,,,,*a,,,,*,,*,,*,,**,**,*,,,,,,,,,**,,,,,,,,,**,,*,,,*,,,,,
## ,,,,,,,,,,,,,***,,,,,,,,*a,,,,*,,*,,*,,**,**,*,,,,,,,,,,*,,,,,,,,,**,,*,,,t,,,,*
## ,,,,,,,,,,,,,***,,,,,,,,*a,,,,*,,*,,*,,**,**,*,,,,,,,,,,*,,,,,,,c,**,,*,*,*,,,,,
## ,,,,,,,,,,,,,***,,,,,,,,*a,**,*,,*,,*,,**,**,*,,,,,,ctt,*,,,,,,,,,**,,*,,,*,,,,,
## ,,,,,,,,,,,,,***,,,,,,,,*a,,,,*,,*,,*,,**,**,*,,,,,,,,,,*,,,,,,,,,**,**,,,*,,,,*
## ,,,,,,,,,,,,,***,,,,,,,,*a,,,,*,,*,,*,,**,**,*,,,,,,,,,,*,,,,,,,,,**,,*,,,*,,,,,
## ,,,,,,,,,,,,,***,,,,,,,,*a,ct,*,,*,,*,,**,tc,*,*,,,,,*,,*,,,,,,,,,**,,*,,,*,,,,,
## ,,,,,,,,,,,,,***,,,,,,,,*a,,,,*,,*,,*,,**,**,*,,,,,,,,,,*,,,,,,,,,**,g*,,,*,,,,,
## ,,,,,,,,,,,,,***,,,,,,,,*a,,,,*,,*,,*,,**,**,*,,,,,,,,g,*,,,,,,,,,**,,*,,,*,,,,,
## ,,,,,,,,,,,,,***,,,tt,,,g,,,,,*,,*,,*,,tc,**,*,,,,,*,,t,*,,,,,,,,,**,,*,,,*,,,,*
## ,,,,,,,,,,,,,***,,,,,,,,*a,,,,*,,*,,*,,**,**,*,,,,,,,*,,*,,,,,,,,,**,,*,,,*,,,,,
## ,,,,c,*,,a,,t***c,,,,,,,*,,,tc*,,*,,**,**,****,,,,,,ctt,*,,,,,,,,,**,,*,,,*,,,,*
## ,,,,,,,,,,,,,***,,,,,,,,*a,,,,*,,*,,*,,**,**,*,,,,,,,,,,*,,,,,,,,,**,,*,,,t,,,,*
## ,,,,,,,,,,,,,tat,,,,,,,,*a,,,,**,*,,t,,**,**,*,,,,,,,,,,a,,,,,,,,,**,,*,,,*,**,,
## ,,,t,,,,*a,,,,*,,*,,*,,**,**,*,,,,,,,,,,*,,,,,,,,,**,,*,,,*,,,,*
## ,,,,*,,,,,*,,*,,*,,**,**,*,,,,,,,*,,*,,,,,,,,,**,,*,,,*,,,,,
## ,,,,*a,,,,*,,*,,*,,**,**,*,,,,,,,*,,*,,,,,,,,,**,,*,****,,,,
## ,,,,*a,,,,*,,*,,*,,**,**,*,,,,,,,*,,*,,,,,,,,,**,,*,,,*,,,,,
## ,,,,*a,,,,*,,*,,*,,**,**,*,,,,,,,*,,*,,,,,,,,,**,,*,,,*,,,,,
## ,,,,*a,,,,*,,*,,*,,**,**,*,,,,,*,,t,*,***,,,,,**,,*,,,*,,,,,
## ,,,,*a,,,,*****,*,,**,**,*,,,,,,,,,,*,,,,,,,,,**,,*,,,*,,,,,
## ,,,,*,,,,,*,,*,,*,,**,**,*,,,,,,,*,,*,,,,,,,,,**,,*,,,*,,,,*
## ,,,,*a,,,,*,,*,,*,,**,**,*,,,,,,,,,,*,,,,,,,,,**,,*,,,*,,,,,
## ,,,,*a,,,,*,,*,,*,,**,**,*,,,,,,,,,,*,,,,,,,,,**,,*,,,*,,,,,
## ,,,,*,,,,,*,,*,,t,,**,**,*,,,,,,,,,,*,,,,,,,,,**,,*,,,*,,,,*
## ,,,*,,,,,*tc*,,*,,**,**,*,,,,,,,*,,*,,,,,,,,,**,,*,,,*,,,,,
## ,,,*,,,,,*,,*,,*,,**,**,*,,,,,,,*,,*,,,,,,,,,**,,*,,,*,,,,*
## ,,,*,,,,,*,,*,,*,,**,**,*,,,,,,,,,,*,,,,,,,,,**,,*,,,*,,,,,
## ,,,*a,,,,*,,*,,*,,**,**,*,,,,,,,,,,*,,,,,,,,,**,,*,,,*,,,,,
## ,,,*,,,,,*,,*,,*,,**,**,*,,,,,,,,,,*,,,,,,,,,**,,*,,,*,,,,,
## ,,,*,,,,,*,,*,,*,,**,**,*,,,,,,,,,,*,,,,,,,,,**,,*,,,*,,,,*
## ,,,*,,,,,*,,*,,*,,**,**,*,*,,,,,,,,*,,,,,,,,,g*,g*,,,*,,,,*
## ,,,*,,,,,*,,*,,*,,**,**,*,,,,,,,,,,*,,,,,,,,,**,,*,,,*,,,,,
## ,,,*,,,,,*,,*,,*,,**,**,*,,,,,,,*,,*,,,,,,,,,**,,*,,,*,,,,*
## ,,,*,,,,,*,,*,,*,,**,**,*,,,,,,,,,,*,,,,,,,,,**,g*,,,*,,,,,
## ,,,*,,,,,*,,*,,*,,**,**,*,,,,,,,,,,*,,,,,,,,,**,,*,,,*,,,,*
## ,,,*,,,,,*,,*,,*,,**,**,*,,,,,,,,,,*,,,,,,,,,**,,*,,,*,,,,*
## ,,,*,,,,,*,,*,,*,,**,**,*,,,,,,,*,,*,,,,,,,,,**,,*,,,*,,,,,
## ,,,*,,,,,*,,*,,g,,**,**,*,,,,,,,*,,*,,,,,,,,,**,,*,,,*,,,,*
## ,,,*,,,,,*,,*,,*,,**,**,*,,,,,,,*,,*,,,,,,,,,**,,*,,,*,,,,*
## ,,,*,,,,,*,,*,,*,,*********,,,,,,,,*,,,,,,,,,**,,***,*,,,,,
## ,,,*a,,,,*,,*,,*,,**,**,*,,,,,,,,,,*,,,,,,,,,**,,*,,,*,,,,*
## ,,,*,,,,,*,,*,,*,,**,**,*,,,,,,,,,,*,,,,,,,,,**,,*,c,*,,,,,
## ,,,*,,,,,*,,*,,*,,**,**,*,,,,,,,,,,*,,,,,,,,,**,,*,,,*,,,,*
## ,,,*,,,,,*,,*,,*,,**,**,*,,,,,,,,,,*,,,,,,,,,**,,*,,,*,,,,*
## ,,,*,,,,,*,,*,,*,,**,**,*,,,,,,,*,,*,gtta,,,,**,,*,,,*,,,,*
## ,,,*a,,,,*,,*,,*,,**,**,*,,,,,,,,,,*,,,,,,,,,**,,*,,,*,,,,,
## ,,,*,,,,,*,,*,,*,,**,**,*,,,,,,,,,,*,,,,,,,,,**,,*,,,*,,,,,
## ,,,*,,,,,*,,*,,*,,**,**,*,,,,,,,,,,*,,,,,,,,,**,**,,,*,,,,,
## ,,,*,,,,,*,,*,,*,,**,**,*,,,,,,,,,,*,,,,,,,,,**,,*,,,*,,,,,
## ,,,*,,,,,*,,*,,*,,**,**,*,,,,,,,*,,*,,,,,,,,,**,,*,,,*,,,,,
## ,,,*,,,,,*,,*,,*,,**,**,*,,,**,,,,,*,,,,,,,,,**,,t,,,*,,,,,
## ,,,*a,,,,**,*,,*,,**,**,*,,,,,*,,t,*,,,,,,,,,**,,*,,,*,,,,*
## ,,,*a,,,,*,,*,,*,,**,**,*,,,,,,,*,,*,,,,,,,,,**,,*,,,*,,,,,
## ,,,,,*,,*,,**,**,****,,,,,,,,,,*,,,,,,,,,**,,*,,,*,,,,,
## ,,,,,c,,c,,*,,**,**,t,,,,,,,,,,*,,,,,,,,,gg,t*t,,*,,,,*
## ,,,,*,,*,,*,,**,**,*,,,,,,,,,,*,,,,,,,,,**,,*,,,*,,,,,
## ,,,,*,,*,,*,,**,**,*,,,,,,,,,,*,,,,,,,,,**,,*,,,*,,,,*
## ,*,,*,,*,,**,**,*,,,,,,,*,,*,,,,,,,,,**,,*,,,*,,,,*
## ,*,,*,,*,,**,**,*,,,,,,,,,,*,,,,,,,,,**,,*,,,*,,,,*
## ,,*,,**,**,**,*,,,,,,,,,,*,,,,,,,,,**,,*,,,*,,,,,
## ,*,,**,**,*,,,,,,,*,,*,,,*,,,,,**,,*,,,*,,,,,
## ,*,,**,**,*,,,,,,,*,,*,,,,,,,,,**,,*,,,*,,,,,
## ,,**,**,*,,,,,,,,,,*,,,,,t,,,**,,*,,,*,,,,,
## ,,,,,*,,t,*,,,*,,,,,**,,*,,,*,,,,,
Examining coverage difference end region;
start_region <- start(mindex2)+45
end_region <- start(mindex3)-100
dat_region <- dat[start_region <= dat$pos & dat$pos <= end_region, ]
plotObj11 <- xyplot(coverage ~ pos | type, data=dat_region, subset = (dat_region$type=="Guppy-Albacore2"),
xlab="Physical position on Chr09", ylab="Coverage", aspect=1,
groups=orient, between=list(x=0.3, y=0.3), layout=c(1, 1),
main=paste(which_coords(myb10_coords), "end region"), xlim = c(start_region-200, end_region+200),
key = simpleKey(text = c("Forward", "Reverse"),
columns = 2, space = "top", lines=TRUE, points=FALSE),
scales=list(alternating=FALSE), panel=function(x,y, ...) {
panel.abline(v = start(myb10_coords), col = "darkgrey", lty = 1, cex=0.2)
panel.abline(v = end(myb10_coords), col = "darkgrey", lty = 1, cex=0.2)
if(plot_kripr_oligos) {
panel.abline(v = start(mindex2)+45, col = "green", lty = 8, cex = 0.9)
panel.abline(v = start(mindex3)-100, col = "red", lty = 8, cex = 0.2)
}
panel.superpose(x,y, type="l", ...)
})
print(plotObj11)
#` Summary statistics (forward/revcomp regions)
cat(paste0("[ Region ", "Chr09:", start_region,"-", end_region), "]\n")
## [ Region Chr09:35542893-35550589 ]
with(dat_region, tapply(coverage, list(type, orient), function(x)round(mean(x),2)))
## forward reverse complement
## Guppy 106.19 69.04
## Albacore2 98.29 68.97
## Guppy-Albacore2 7.89 0.08
with(dat_region, tapply(coverage, list(type, orient), function(x)round(sd(x),2)))
## forward reverse complement
## Guppy 5.51 1.92
## Albacore2 5.12 2.19
## Guppy-Albacore2 0.87 0.57
In the coordinate range Chr09:35542893-35550589 which is 98% of the 7,841 bp red flesh MYB10 gene locus region of interest, the difference in forward read coverage between base callers is relatively uniform where the coverage depth of the Guppy base caller is on average 7.9X higher than the Albacore2 base caller with standard deviation 0.87X.
Comparing on target coverage to off target coverage;
#` myb10 coverage
myb10_mean_cov <- with(dat_region, tapply(coverage, list(type), function(x)round(mean(x),2)))
#` myb10 region
myb10_width <- width(myb10_coords)
nsamples <- 1000
set.seed(42)
for( chr in names(ref)[10]) {
start_samples <- sort(sample((width(ref[1]) - 2*myb10_width), nsamples))
which_sample <- GRanges(seqnames = names(ref[chr]), ranges = IRanges(start=start_samples, width=myb10_width))
cov_sample <- bplapply(seq(nsamples), bamCoverage, which_sample, bamfile1, BPPARAM = bpparam("MulticoreParam"))
cvg_means <- sapply(cov_sample, function(x)ifelse(!is.null(x$cov), mean(x$cov), NA))
print(summary(cvg_means))
hist(cvg_means,
main=paste0(chr, ": Off target vs on myb10 site"),
xlab="Average coverage",
xlim=c(0, round(max(myb10_mean_cov), -2)))
points(x=myb10_mean_cov[["Guppy"]], y=1, col="green", pch=16, cex=0.5)
points(x=myb10_mean_cov[["Albacore2"]], y=1, col="red", pch=16, cex=0.5)
legend(x=70, y=125, legend=c("Guppy","Albacore2"), col=c("green","red"), pch=16, cex=0.8)
}
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.5533 1.0000 1.0000 1.1827 1.1497 4.6340 746