Skip to content

Commit 20455d4

Browse files
committed
add virgena
1 parent 632b5fd commit 20455d4

20 files changed

+4183
-4173
lines changed

config/config.yaml

+3-3
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@ MerlinRef: ref/Merlin.BAC.fa
33
TB40ERef: ref/TB40E.GFP.fa
44
AD169Ref: ref/AD169.BAC.fa
55
PhixRef: ref/Phix.fa
6-
outpath: ../revision_output_1
7-
threads: 20
8-
runOnReads: true
6+
outpath: ../revision_output_5
7+
threads: 24
8+
runOnReads: false
99
rmHumanEcoli: true
1010
HumanRefBWAIdx: /net/sgi/viral_genomics/MHH/human_genome/hg19.genome.bwa
1111
EcoliRefBWAIdx: ref/Ecoli.NC_000913.fa

data/assembly.tar.gz

655 KB
Binary file not shown.

eval_assembly.smk

+11-18
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,9 @@ include: "rules/load_config.smk"
33

44
assembly_dir = "/".join([project_dir, "results/assembly"])
55
metaquast_dir = "/".join([project_dir, "results/metaquast"])
6-
# assemblers = ["spades", "metaspades", "tadpole", "abyss",
7-
# "megahit", "ray", "idba", "vicuna", "iva", "savage"] # "haploflow", "pehaplo", "quasirecomb",
6+
assemblers = ["spades", "metaspades", "tadpole", "abyss",
7+
"megahit", "ray", "idba", "vicuna", "iva", "savage", "virgena"] # "haploflow", "pehaplo", "quasirecomb",
88

9-
assemblers = ['virgena']
10-
11-
v_samples_1 = ['TM-1-0', 'TA-1-1', 'TA-1-10', 'TA-1-0', 'TA-1-50', 'TM-1-1']
12-
v_samples_2 = ['TM-1-10', 'TA-0-1']
13-
v_samples_3 = ['TM-1-50']
14-
v_samples_4 = ['TM-0-1']
159

1610
metaquast_criteria = ["num_contigs", "Largest_contig", "Genome_fraction",
1711
"Duplication_ratio", "Largest_alignment", "LGA50",
@@ -51,12 +45,12 @@ onsuccess:
5145

5246
rule all:
5347
input:
54-
# metaquast_report = expand(metaquast_dir + "/{strain_sample}/report.html",
55-
# strain_sample=make_mix()),
56-
# all_sample_metaquast_table = results_dir + "/final_tables/all_sample_metaquast.tsv",
57-
# figure = results_dir + "/final_figures/assembly_metaquast_evaluation.pdf"
58-
expand("{assemblyDir}/{assembler}/{sample}/contigs.fasta",
59-
assemblyDir=assembly_dir, sample=v_samples_1, assembler=assemblers),
48+
metaquast_report = expand(metaquast_dir + "/{strain_sample}/report.html",
49+
strain_sample=make_mix()),
50+
all_sample_metaquast_table = results_dir + "/final_tables/all_sample_metaquast.tsv",
51+
figure = results_dir + "/final_figures/assembly_metaquast_evaluation.pdf"
52+
# expand("{assemblyDir}/{assembler}/{sample}/contig.done",
53+
# assemblyDir=assembly_dir, sample=sample_list, assembler=assemblers),
6054

6155
# expand(metaquast_dir + "/summary_for_figure/{mix}.{criteria}.merged.tsv",
6256
# mix=["TM", "TA"], criteria=metaquast_criteria),
@@ -98,12 +92,11 @@ rule metaquast:
9892
input:
9993
scaffolds = lambda wc: expand(assembly_dir + "/{assembler}/{{sample}}.{assembler}.scaffolds.fa",
10094
assembler=assemblers),
101-
ref_fai = lambda wc: [tb_ref + ".fai", ad_ref + ".fai"] if
102-
wc.mix == "TA" else [tb_ref + ".fai", merlin_ref + ".fai"]
95+
ref_fai = lambda wc: [tb_ref + ".fai", ad_ref + ".fai"] if \
96+
wc.mix == "TA" else [tb_ref + ".fai", merlin_ref + ".fai"]
10397
output:
104-
report = metaquast_dir + "/{mix}/{sample, [A-Z]+-[0-9\-]+}/report.html",
98+
reports = metaquast_dir + "/{mix}/{sample, [A-Z]+-[0-9\-]+}/report.html",
10599
tsv_report = metaquast_dir + "/{mix}/{sample, [A-Z]+-[0-9\-]+}/combined_reference/report.tsv"
106-
107100
conda:
108101
"config/conda_env.yaml"
109102
threads: threads

libs/virgena/tools/vsearch

100644100755
File mode changed.

program/virgena_config_generator.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
@click.argument("r2", type=click.Path(exists=True))
1818
@click.argument("ref", type=click.Path(exists=True))
1919
@click.argument("outdir", type=str)
20+
@click.argument("cd", type=click.Path(exists=True))
2021
@click.option('-l', '--insert', type=int, default=800, help='Insrtion length')
2122
@click.option('-t', '--threads', type=int, default=8, help='The number of threads')
2223
def config_generator(r1, r2, ref, outdir, insert, threads):
@@ -30,12 +31,12 @@ def config_generator(r1, r2, ref, outdir, insert, threads):
3031
<Reference>{ref}</Reference>
3132
<OutPath>{outdir}</OutPath>
3233
<ThreadNumber>{threads}</ThreadNumber>
33-
<BatchSize>10000</BatchSize>
34+
<BatchSize>1000</BatchSize>
3435
<ReferenceSelector>
3536
<Enabled>false</Enabled>
3637
<UseMajor>false</UseMajor>
3738
<ReferenceMSA></ReferenceMSA>
38-
<PathToUsearch>vsearch</PathToUsearch>
39+
<PathToUsearch>{cd}/libs/virgena/tools/vsearch</PathToUsearch>
3940
<UclustIdentity>0.98</UclustIdentity>
4041
<MinReadLength>50</MinReadLength>
4142
<MinContigLength>1000</MinContigLength>
@@ -95,7 +96,7 @@ def config_generator(r1, r2, ref, outdir, insert, threads):
9596
<MinFragmentCoverage>0.99</MinFragmentCoverage>
9697
<Debug>false</Debug>
9798
</Postprocessor>
98-
</config>'''.format(r1=r1, r2=r2, ref=ref, outdir=outdir, insert=insert, threads=threads)
99+
</config>'''.format(r1=r1, r2=r2, ref=ref, outdir=outdir, cd=cd, insert=insert, threads=threads)
99100

100101
print(out_config)
101102

0 commit comments

Comments
 (0)