Skip to content

Commit

Permalink
feat: allowing only FW reads with a UMI (#152)
Browse files Browse the repository at this point in the history
---------

Co-authored-by: Max Schubach <[email protected]>
  • Loading branch information
visze and visze authored Dec 17, 2024
1 parent e317560 commit 56b2254
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 17 deletions.
47 changes: 30 additions & 17 deletions workflow/rules/common.smk
Original file line number Diff line number Diff line change
Expand Up @@ -605,42 +605,49 @@ def getUMIBamFile(project, condition, replicate, type):
)


def useUMI(project):
def useUMI(project, type="DNA"):
"""
helper to check if UMI should be used
"""
return "UMI" in experiments[project] or "DNA_UMI" in experiments[project]
return "UMI" in experiments[project] or f"{type}_UMI" in experiments[project]


def noUMI(project):
def onlyFW(project, type="DNA"):
"""
helper to check if only forward reads should be used (length option)
"""
return f"{type}_BC_R" not in experiments[project]


def noUMI(project, type="DNA"):
"""
helper to check if UMI should not be used
"""
return (
"UMI" not in experiments[project]
and "DNA_UMI" not in experiments[project]
and "DNA_BC_R" in experiments[project]
and f"{type}_UMI" not in experiments[project]
and f"{type}_BC_R" in experiments[project]
)


def onlyFWByLength(project):
def onlyFWByLength(project, type="DNA"):
"""
helper to check if only forward reads should be used (length option)
"""
return (
"UMI" not in experiments[project]
and "DNA_BC_R" not in experiments[project]
and f"{type}_BC_R" not in experiments[project]
and "adapter" not in config["experiments"][project]
)


def onlyFWbyCutadapt(project):
def onlyFWbyCutadapt(project, type="DNA"):
"""
helper to check if only forward reads should be used (cutadapt option)
"""
return (
"UMI" not in experiments[project]
and "DNA_BC_R" not in experiments[project]
and f"{type}_BC_R" not in experiments[project]
and "adapter" in config["experiments"][project]
)

Expand All @@ -649,22 +656,28 @@ def getRawCounts(project, type):
"""
Helper to get the correct raw counts file (umi/noUMI or just FW read)
"""
if useUMI(project):
return (
"results/experiments/{project}/counts/useUMI.{condition}_{replicate}_%s_raw_counts.tsv.gz"
% type
)
elif noUMI(project):
if useUMI(project, type):
if onlyFW(project, type):
return (
"results/experiments/{project}/counts/onlyFWUMI.{condition}_{replicate}_%s_raw_counts.tsv.gz"
% type
)
else:
return (
"results/experiments/{project}/counts/useUMI.{condition}_{replicate}_%s_raw_counts.tsv.gz"
% type
)
elif noUMI(project, type):
return (
"results/experiments/{project}/counts/noUMI.{condition}_{replicate}_%s_raw_counts.tsv.gz"
% type
)
elif onlyFWByLength(project):
elif onlyFWByLength(project, type):
return (
"results/experiments/{project}/counts/onlyFWByLength.{condition}_{replicate}_%s_raw_counts.tsv.gz"
% type
)
elif onlyFWbyCutadapt(project):
elif onlyFWbyCutadapt(project, type):
return (
"results/experiments/{project}/counts/onlyFWByCutadapt.{condition}_{replicate}_%s_raw_counts.tsv.gz"
% type
Expand Down
1 change: 1 addition & 0 deletions workflow/rules/counts.smk
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ include: "counts/counts_demultiplex.smk"
include: "counts/counts_umi.smk"
include: "counts/counts_noUMI.smk"
include: "counts/counts_onlyFW.smk"
include: "counts/counts_onlyFWWithUMI.smk"


rule counts_filter_counts:
Expand Down
34 changes: 34 additions & 0 deletions workflow/rules/counts/counts_onlyFWWithUMI.smk
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
######################################
### Everything before assigning BC ###
######################################


### Create_BAM_umi without demultiplexing ###


rule counts_onlyFWUMI_raw_counts:
"""
Getting the BCs and UMIs from the reads using fixed length.
"""
conda:
"../../envs/default.yaml"
input:
fw_fastq=lambda wc: getFW(wc.project, wc.condition, wc.replicate, wc.type),
umi_fastq=lambda wc: getUMI(wc.project, wc.condition, wc.replicate, wc.type),
output:
"results/experiments/{project}/counts/onlyFWUMI.{condition}_{replicate}_{type}_raw_counts.tsv.gz",
params:
umi_length=lambda wc: config["experiments"][wc.project]["umi_length"],
log:
temp(
"results/logs/counts/onlyFW/onlyFWUMI_raw_counts_by_length.{project}.{condition}.{replicate}.{type}.log"
),
shell:
"""
paste <(zcat {input.fw_fastq} | awk 'NR%4==2 {{print $1}}') \
<(zcat {input.umi_fastq} | awk 'NR%4==2 {{print $1}}') | \
awk -v 'OFS=\\t' 'length($2) == {params.umi_length} {{print $0}}' | \
sort | uniq -c | \
awk -v 'OFS=\\t' '{{ print $2,$3,$1 }}' | \
gzip -c > {output} 2> {log}
"""
5 changes: 5 additions & 0 deletions workflow/schemas/experiment_file.schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,11 @@ anyOf:
- DNA_BC_R
- RNA_UMI
- RNA_BC_R
- required:
- DNA_BC_F
- RNA_BC_F
- DNA_UMI
- RNA_UMI
- required:
- DNA_BC_F
- RNA_BC_F
Expand Down

0 comments on commit 56b2254

Please sign in to comment.