Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Finish draft assembly track #12

Merged
merged 46 commits into from
Apr 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
ed9d0dc
added output
hkaspersen Jun 7, 2023
a97d439
minor changes
hkaspersen Jun 7, 2023
5cd8e46
updated workflow
hkaspersen Jun 7, 2023
79d9c33
added script for generating input samplesheet
hkaspersen Jan 3, 2024
6f005af
further work
hkaspersen Jan 3, 2024
2bd6c95
added polypolish channel from bwa
hkaspersen Jan 3, 2024
075ec1b
further work
hkaspersen Jan 3, 2024
e31d1d9
minor changes
hkaspersen Jan 3, 2024
dee762c
updated the version of medaka and fixed channels
hkaspersen Jan 3, 2024
d64e1dd
added medaka process
hkaspersen Jan 3, 2024
78e8b27
renamed assembly track
hkaspersen Jan 3, 2024
7bef53b
added skeleton for long-read assembly
hkaspersen Jan 3, 2024
eb2e245
added skeleton for long read assembly
hkaspersen Jan 3, 2024
3d0f069
renamed track workflow
hkaspersen Jan 3, 2024
95117e0
updated process for simplicity
hkaspersen Jan 27, 2024
9e5768a
added etx.args for unicycler
hkaspersen Jan 27, 2024
1525b73
updated channels and input
hkaspersen Jan 27, 2024
b485a7d
corrections to input and parameters
hkaspersen Mar 5, 2024
5f83e71
fixed input channels
hkaspersen Mar 5, 2024
c6bb440
corrected errors with ext args for unicycler
hkaspersen Mar 5, 2024
e188fc0
added default parameter for ext args for unicycler
hkaspersen Mar 5, 2024
99f2dc4
updated channel
hkaspersen Apr 8, 2024
9620d98
set back to old style channel
hkaspersen Apr 8, 2024
d64fb87
changed default value of channel
hkaspersen Apr 8, 2024
dbc86e1
upadted workflow with combined QC and assembly
hkaspersen Apr 8, 2024
6fcffc5
added draft assembly workflow
hkaspersen Apr 8, 2024
e49954f
corrected samplesheet script
hkaspersen Apr 22, 2024
9bde988
updated with new parameters
hkaspersen Apr 22, 2024
5b3c40e
adjusted output
hkaspersen Apr 22, 2024
5300883
updated the workflow
hkaspersen Apr 22, 2024
1d027d6
added scripts for reporting
hkaspersen Apr 22, 2024
5c602b0
added scripts for merging reports
hkaspersen Apr 22, 2024
07ffd38
added rasusa module
hkaspersen Apr 22, 2024
2dfd41b
added scripts for reporting
hkaspersen Apr 22, 2024
54907e4
corrections to reporting
hkaspersen Apr 23, 2024
9f4aa1e
split merging script to two
hkaspersen Apr 23, 2024
d5addfe
added html output for reporting
hkaspersen Apr 23, 2024
e9bec70
added channels for merging input
hkaspersen Apr 23, 2024
d8d4bc8
split merging into two
hkaspersen Apr 23, 2024
e5fe833
increased default coverage value to 100
hkaspersen Apr 23, 2024
817d97c
added reporting processes
hkaspersen Apr 23, 2024
e452d29
corrected filename
hkaspersen Apr 24, 2024
5496fa8
corrected some output
hkaspersen Apr 24, 2024
180d7d6
cleaned up report
hkaspersen Apr 24, 2024
8593873
added more figure text
hkaspersen Apr 24, 2024
c8dbd4d
removed redundant parameter
hkaspersen Apr 24, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions bin/gen_report.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/usr/bin/env Rscript

args <- commandArgs(trailingOnly = TRUE)
workflow <- args[1]
genome_size <- args[2]
species_name <- args[3]

# Generate rmarkdown report for the relevant track
if (workflow == "draft") {
rmarkdown::render(
input = 'report_draft_assembly.Rmd',
params = list(
quast_report = "transposed_report.tsv",
kraken_report = "kraken_reports.txt",
coverage_report = "coverage_reports.txt",
genome_size_val = genome_size,
species_name = species_name
)
)
}
89 changes: 89 additions & 0 deletions bin/generate_input.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#!/usr/bin/env Rscript

args <- commandArgs(trailingOnly = TRUE)

option <- args[1]

if (option == "illumina") {
path <- args[2]
pattern <- args[3]

if (grepl("/$", path) == FALSE) {
path <- paste0(path, "/")
}

files <- list.files(path = path,
pattern = pattern)

files_path <- paste0(path, files)

filenames <- unique(sub(pattern, "", files))
df <- data.frame(sample = filenames)
forward <- grep("_R1_", files_path, value = TRUE)
reverse <- grep("_R2_", files_path, value = TRUE)

df$R1 <- forward
df$R2 <- reverse

write.table(
x = df,
file = "samplesheet.csv",
quote = FALSE,
row.names = FALSE,
sep = ","
)

} else if (option == "hybrid") {
path1 <- args[2]
path2 <- args[3]
pattern <- args[4]

if (grepl("/$", path1) == FALSE) {
path1 <- paste0(path1, "/")
}

if (grepl("/$", path2) == FALSE) {
path2 <- paste0(path2, "/")
}

files <- list.files(path = path1,
pattern = pattern)

np_files <- list.files(path = path2,
pattern = ".fastq.gz")

filenames <- sub(pattern, "", files)
filenames_np <- sub(".fastq.gz", "", np_files)

df <- data.frame(sample = filenames)
df$path <- paste0(path1, files)
df$read <- ifelse(grepl("_R1.fastq.gz", df$path), "R1", "R2")

df_np <- data.frame(sample = filenames_np,
NP = paste0(path2, np_files))

il_samplesheet <- reshape(
data = df,
direction = "wide",
v.names = "path",
idvar = "sample",
timevar = "read"
)

samplesheet <- merge(
x = il_samplesheet,
y = df_np,
by = "sample",
all = TRUE
)

names(samplesheet) <- c("sample","R1","R2","NP")

write.table(
x = samplesheet,
file = "samplesheet.csv",
quote = FALSE,
row.names = FALSE,
sep = ","
)
}
59 changes: 59 additions & 0 deletions bin/merge_cov_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#!/usr/bin/env Rscript

library(impoRt)
library(dplyr)
library(formattable)
library(sparkline)

# Import and merge coverage reports
func_paste <- function(x, collapse = ",") paste(unique(x[!is.na(x)]), collapse = collapse)

calc_cov <- function(df) {
df %>%
rename("reference" = X1,
"pos" = X2,
"reads" = X3) %>%
mutate(reads_no_unmapped = ifelse(reads == 0, NA, reads),
test = ifelse(reads == 0, 0, 1)) %>%
group_by(reference) %>%
summarise(reference = unique(reference),
cov_perc = round(sum(test)/max(pos)*100, 2),
mean_reads_per_base = round(mean(reads_no_unmapped, na.rm = TRUE), 2),
median_reads_per_base = median(reads_no_unmapped, na.rm = TRUE),
sd_reads_per_base = round(sd(reads_no_unmapped, na.rm = TRUE), 2)) %>%
ungroup() %>%
mutate(mean_cov = round(mean(mean_reads_per_base),2),
median_cov = median(median_reads_per_base),
mean_perc_cov = round(mean(cov_perc),2),
Coverage = spk_chr(mean_reads_per_base, width = 180, height = 25))
}

coverage_reports <- get_data(
".",
pattern = "genomecov.txt",
col_names = FALSE,
df = FALSE
) %>%
lapply(., calc_cov) %>%
bind_rows(.id = "sample") %>%
group_by(sample) %>%
summarise_all(list(func_paste)) %>%
mutate(sample = sub("_genomecov.txt", "", sample),
`Mean coverage` = color_bar(color = "#d9d9d9")(mean_cov),
`Median coverage` = color_bar(color = "#ccebc5")(median_cov),
`Mean percent coverage` = color_bar(color = "#fccde5")(mean_perc_cov)) %>%
select(sample,
`Mean coverage`,
`Median coverage`,
`Mean percent coverage`,
Coverage) %>%
rename("Assembly" = sample) %>%
mutate(Coverage = gsub("\n", "", Coverage))

write.table(
coverage_reports,
"coverage_reports.txt",
sep = "\t",
quote = FALSE,
row.names = FALSE
)
28 changes: 28 additions & 0 deletions bin/merge_kraken_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/usr/bin/env Rscript

library(impoRt)
library(dplyr)

# Import and merge kraken2 reports
kraken_reports <- get_data(
".",
pattern = "report",
col_names = FALSE,
convert = TRUE
) %>%
rename("perc_fragments" = X1,
"num_fragments_root" = X2,
"num_fragments_direct" = X3,
"rank_code" = X4,
"NCBI_taxon" = X5,
"scientific_name" = X6) %>%
mutate_all(~trimws(., which = "both")) %>%
mutate(ref = sub("\\.kr2\\.report", "", ref))

write.table(
kraken_reports,
"kraken_reports.txt",
sep = "\t",
quote = FALSE,
row.names = FALSE
)
Loading