Skip to content

Commit 0111773

Browse files
committed
use one ref when the sample is from pure strain
1 parent 4c49ba4 commit 0111773

File tree

6 files changed

+66
-11
lines changed

6 files changed

+66
-11
lines changed

README.md

+3-1
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,13 @@ In this benchmarking study: variants callers `BCFtools` (v1.9), `VarScan` (v2.4.
99

1010
To reproduce the output, you need to use `Bioconda`.
1111

12-
Please follow the instruction [here](https://bioconda.github.io) to install `Bioconda`. And then you need to install `snakemake` and Python package `click`:
12+
Please follow the instruction [here](https://bioconda.github.io) to install `Bioconda`. And then you need to install `snakemake`, `csvtk` and Python package `click`:
1313

1414
```shell
1515
conda install snakemake=5.3.0
16+
conda install csvtk=0.18.2
1617
conda install click=7.0
18+
1719
```
1820

1921
After this has been done, download the pipeline onto your system:

config/config.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ MerlinRef: ref/Merlin.BAC.fa
33
TB40ERef: ref/TB40E.GFP.fa
44
AD169Ref: ref/AD169.BAC.fa
55
PhixRef: ref/Phix.fa
6-
outpath: ../revision_output_3
6+
outpath: ../revision_output_4
77
threads: 20
88
runOnReads: false
99
rmHumanEcoli: true

eval_assembly.smk

+28-6
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,23 @@ metaquast_criteria = ["num_contigs", "Largest_contig", "Genome_fraction",
2020
def make_mix():
2121
return ["{}/{}".format(sample.split("-")[0], sample) for sample in sample_list]
2222

23+
def get_assembly_ref(wc):
24+
if wc.sample.startswith('TA'):
25+
if wc.sample.endswith('-1-0'):
26+
ref_list = [tb_ref]
27+
elif wc.sample.endswith('-0-1'):
28+
ref_list = [ad_ref]
29+
else:
30+
ref_list = [tb_ref, ad_ref]
31+
else:
32+
if wc.sample.endswith('-1-0'):
33+
ref_list = [tb_ref]
34+
elif wc.sample.endswith('-0-1'):
35+
ref_list = [merlin_ref]
36+
else:
37+
ref_list = [tb_ref, merlin_ref]
38+
return ','.join(ref_list)
39+
2340

2441
onsuccess:
2542
print("The assembly evaluation is done!")
@@ -85,8 +102,9 @@ rule metaquast:
85102
threads: threads
86103
params:
87104
metaquast_outdir = metaquast_dir + "/{mix}/{sample}",
88-
ref = lambda wc: ",".join(
89-
[tb_ref, ad_ref]) if wc.mix == "TA" else ",".join([tb_ref, merlin_ref])
105+
ref = get_assembly_ref
106+
# ref = lambda wc: ",".join(
107+
# [tb_ref, ad_ref]) if wc.mix == "TA" else ",".join([tb_ref, merlin_ref])
90108
shell:
91109
"""
92110
metaquast.py --unique-mapping -o {params.metaquast_outdir} -R {params.ref} {input.scaffolds} -t {threads}
@@ -99,16 +117,20 @@ rule summarize:
99117
strain_sample=make_mix())
100118
output:
101119
metaquast_dir + "/summary_for_figure/{mix}.{criteria}.merged.tsv"
102-
conda:
103-
"config/conda_env.yaml"
120+
# conda:
121+
# "config/conda_env.yaml"
104122
params:
105-
input_files = metaquast_dir + "/{mix}/*/summary/TSV/{criteria}.tsv"
123+
input_files = metaquast_dir + "/{mix}/*/summary/TSV/{criteria}.tsv",
124+
joiner = cd + '/program/join_tsv.py'
106125
shell:
107126
"""
108-
paste -d"\t" {params.input_files}|sed '1s/\.scaffolds//g' |csvtk transpose -Tt -|\
127+
python {params.joiner} {params.input_files}|sed '1s/\.scaffolds//g' |csvtk transpose -Tt -|\
109128
awk 'NR==1{{print}}$1!="Assemblies"{{print}}'|sed '1s/\.GFP\|\.BAC//g' > {output}
110129
"""
111130

131+
# paste -d"\t" {params.input_files}|sed '1s/\.scaffolds//g' |csvtk transpose -Tt -|\
132+
# awk 'NR==1{{print}}$1!="Assemblies"{{print}}'|sed '1s/\.GFP\|\.BAC//g' > {output}
133+
112134
# Visualize the evaluation
113135
rule visualize:
114136
input:

program/join_tsv.py

+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import click
2+
from sys import stdout
3+
import pandas as pd
4+
5+
6+
@click.command()
7+
@click.argument('tsv', nargs=-1)
8+
@click.option('-o', '--out', help='Output joined tsv')
9+
def join_tsv(tsv, out):
10+
df_list = []
11+
for file in tsv:
12+
df = pd.read_csv(file, sep='\t', index_col=0)
13+
df_list.append(df)
14+
15+
joined_df = pd.concat(df_list, axis=1, sort=False)
16+
17+
out_file = out if out else stdout
18+
19+
joined_df.index.name = 'Assemblies'
20+
joined_df.reset_index(level=0, inplace=True)
21+
22+
# joined_df['Assemblies'] = joined_df.index
23+
24+
joined_df.to_csv(out_file, sep='\t', index=False)
25+
26+
27+
if __name__ == '__main__':
28+
join_tsv()

run_benchmark.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import snakemake
1616

1717
wd = os.path.dirname(os.path.realpath(__file__))
18-
VERSION = '0.2'
18+
VERSION = '0.3'
1919

2020

2121
class SpecialHelpOrder(click.Group):

scripts/metaquast_visualize.R

+5-2
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,11 @@ read_metaquast <- function(file_name, dir = input_dir) {
2525

2626
metaquast <- read.table(file, sep = "\t", header = T, na.strings = "-", check.names = F) %>%
2727
separate(Assemblies, c("sample", "assembler"), sep = "\\.")
28-
ref1 <- 3
29-
ref2 <- 4
28+
ref1 <- 'TB40E'
29+
ref2 <- ifelse(startsWith(basename(file_name), 'TA'), 'AD169', 'Merlin')
30+
31+
# ref1 <- 3
32+
# ref2 <- 4
3033
if (grepl("NGA50", file_name)) {
3134
metaquast[, ref1] <- ifelse(endsWith(metaquast$sample, "_0_1"), -1, metaquast[, ref1])
3235
metaquast[, ref2] <- ifelse(endsWith(metaquast$sample, "_1_0"), -1, metaquast[, ref2])

0 commit comments

Comments
 (0)