Skip to content

Commit

Permalink
Add option to show all coverage plots from several sequencing runs in…
Browse files Browse the repository at this point in the history
…to the same coverage plot
  • Loading branch information
judithbergada committed Mar 6, 2025
1 parent 94ff3cb commit 860db52
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 21 deletions.
50 changes: 32 additions & 18 deletions cov_plot.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,33 +9,47 @@ library(cowplot)

# set path to data (terminal slash is important!)
path <- paste0(args[1],"/")

files = list.files(path, pattern = "depth")
data = data.frame()

for (i in files) {

if (file.info(paste0(path, i))$size == 0) {
depth_i = c(0) #if depth file is empty
names(depth_i) = c(1)
} else {
depth_i = read_delim(paste0(path, i), "\t", col_names = FALSE, trim_ws = TRUE, col_types = "cii") %>% pull(X3)
names(depth_i) = read_delim(paste0(path, i), "\t", col_names = FALSE, trim_ws = TRUE) %>% pull(X2)
}
data_i = data.frame(pos = 1:max(as.numeric(names(depth_i)))) %>%
mutate(cov = ifelse(is.na(depth_i[as.character(pos)]), 0, depth_i[as.character(pos)])) %>%
mutate(file = str_sub(i, 1, -9)) %>%
mutate(iteration = str_sub(i, -7, -7))
data = rbind(data, data_i)
get_data <- function(path, data) {
files = list.files(path, pattern = "depth")

for (i in files) {

if (file.info(paste0(path, i))$size == 0) {
depth_i = c(0) #if depth file is empty
names(depth_i) = c(1)
} else {
depth_i = read_delim(paste0(path, i), "\t", col_names = FALSE, trim_ws = TRUE, col_types = "cii") %>% pull(X3)
names(depth_i) = read_delim(paste0(path, i), "\t", col_names = FALSE, trim_ws = TRUE) %>% pull(X2)
}
data_i = data.frame(pos = 1:max(as.numeric(names(depth_i)))) %>%
mutate(cov = ifelse(is.na(depth_i[as.character(pos)]), 0, depth_i[as.character(pos)])) %>%
mutate(file = str_sub(i, 1, -9)) %>%
mutate(iteration = str_sub(i, -7, -7))
data = rbind(data, data_i)
}
return(data)
}

if (length(args)==1) {
data = get_data(path, data)
} else if (length(args)==2) {
subdirs = strsplit(args[2], split = " ")[[1]]
for (out_dir in subdirs) {
new_path = paste0(path, out_dir, "/")
data = get_data(new_path, data)
}
data <- data %>% group_by(pos, iteration) %>% summarise(cov=sum(cov))


}
plot = data %>%
ggplot(aes(x = pos, y = cov, color = iteration)) +
geom_line(size = .2) +
geom_line(linewidth = .2) +
xlab("genome position") +
ylab("coverage (reads)") +
scale_y_log10(breaks = c(1,10,100,1000,10000,100000)) +
facet_wrap( ~ file) +
panel_border() +
background_grid(major = "xy") +
theme(axis.title = element_text(size = 7.5)) +
Expand Down
22 changes: 19 additions & 3 deletions smaltalign.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ Options:
[-c or --mincov]
[-o or --outdir]
[-d or --indels]
[-m or --mergecov]
"""
}
# Describe usage of the tool and provide help
Expand Down Expand Up @@ -46,6 +47,9 @@ Options:
-d, --indels:
If required, extract indels as well.
Default: don't perform this step.
-m, --mergecov:
If required, generate an additional covplot merging all results.
Default: don't perform this step.
Required arguments:
-r, --reference:
Path to the reference file needed for the analysis.
Expand All @@ -68,17 +72,18 @@ for ARGS in "$@"; do
"--mincov") set -- "$@" "-c" ;;
"--outdir") set -- "$@" "-o" ;;
"--indels") set -- "$@" "-d" ;;
"--mergecov") set -- "$@" "-m" ;;
"--help") set -- "$@" "-h" ;;
*) set - "$@" "$ARGS"
esac
done

# Define defaults
outdir="./"; n_reads=200000; iterations=4; indels=0
varthres=15; mincov=3
varthres=15; mincov=3; mergecov=0

# Define all parameters
while getopts 'r:n::i::t::c::o::dh' flag; do
while getopts 'r:n::i::t::c::o::dmh' flag; do
case "${flag}" in
r) reference=${OPTARG} ;;
n) n_reads=${OPTARG} ;;
Expand All @@ -87,6 +92,7 @@ while getopts 'r:n::i::t::c::o::dh' flag; do
c) mincov=${OPTARG} ;;
o) outdir=${OPTARG} ;;
d) indels=${OPTARG} ;;
m) mergecov=${OPTARG} ;;
h) print_help
exit 1;;
*) print_usage
Expand Down Expand Up @@ -119,6 +125,7 @@ echo -e 'varthres: ' $varthres
echo -e 'mincov: ' $mincov
echo -e 'outdir: ' $outdir
echo -e 'indels: ' $indels
echo -e 'mergecov: ' $mergecov


### loop over list of files to analyse
Expand All @@ -129,6 +136,9 @@ for i in $list; do

ref=$reference
name=$(basename $i | sed 's/_L001_R.*//' | sed 's/.fastq.gz//'| sed 's/.fastq//')
if [[ $mergecov != 0 ]]; then
all_names="${all_names}${name} "
fi

if [ $num_files -gt 1 ]; then
mkdir ${outdir}/${name}
Expand Down Expand Up @@ -258,4 +268,10 @@ for i in $list; do

Rscript ${script_dir}/cov_plot.R ${new_outdir}
Rscript ${script_dir}/wts.R ${new_outdir} $varthres $mincov
done
done

if [[ $mergecov != 0 ]]; then
echo ${outdir}
echo "${all_names}"
Rscript ${script_dir}/cov_plot.R ${outdir} "${all_names}"
fi

0 comments on commit 860db52

Please sign in to comment.