@@ -27,6 +27,11 @@ def get_onehot_tag(wildcards):
27
27
i = SAMPLES .index .get_loc (wildcards .sample ) % 32
28
28
return 1 << i
29
29
30
+ def get_anchor_mb (wc ,input ):
31
+ kmc_bytes = sum ([f .size for f in input .kmc_pre + input .kmc_suf ])
32
+ fa_bytes = input .fasta .size
33
+ return 1000 + (1.5 * kmc_bytes + fa_bytes * len (input .kmc_pre ))/ (10 ** 6 )
34
+
30
35
rule anchor :
31
36
input :
32
37
kmc_pre = expand ("kmc/bitvec{i}.kmc_pre" , i = range (index .kmc_bitvec_count )),
@@ -36,29 +41,51 @@ rule anchor:
36
41
expand ("anchor/{{sample}}/bitmap.{step}.{ext}" , step = index .steps , ext = ["gz" ,"gzi" ]),
37
42
"anchor/{sample}/chrs.tsv" ,
38
43
log :
39
- logfile = "logs/anchor.{sample}.txt"
44
+ log = "logs/anchor.{sample}.log.txt"
45
+ benchmark :
46
+ "logs/anchor.{sample}.benchmark.txt"
47
+ resources :
48
+ mem_mb = get_anchor_mb ,
40
49
run :
41
- index [wildcards .sample ].run_anchor (index .bitvec_prefixes , log .logfile )
50
+ index [wildcards .sample ].run_anchor (index .bitvec_prefixes , log .log )
51
+
52
+ def get_bitvec_mb (wc ,input ):
53
+ dbsize = max ([d .size for d in input .dbs ])
54
+ return 1000 + dbsize / (10 ** 6 )
42
55
43
56
rule kmc_bitvec :
44
57
input :
45
- "kmc/opdef{i}.txt" ,
58
+ opdef = "kmc/opdef{i}.txt" ,
59
+ dbs = expand ("kmc/{sample}.onehot.{ext}" , sample = list (SAMPLES .index ), ext = KMC_EXTS )
46
60
output :
47
61
expand ("kmc/bitvec{{i}}.{ext}" , ext = KMC_EXTS )
48
62
log :
49
- "logs/kmc.bitvec{i}.txt"
63
+ "logs/kmc.bitvec{i}.log.txt"
64
+ benchmark :
65
+ "logs/kmc.bitvec{i}.benchmark.txt"
66
+ resources :
67
+ mem_mb = get_bitvec_mb ,
68
+ threads : 2
50
69
shell :
51
- f"{ EXTRA_DIR } /kmc_tools complex kmc/opdef{{wildcards.i}}.txt > {{log}} 2>&1"
70
+ f"{ EXTRA_DIR } /kmc_tools complex {{input.opdef}} > {{log}} 2>&1"
71
+ #kmc/opdef{{wildcards.i}}.txt
52
72
53
73
rule opdefs :
54
74
input :
55
75
expand ("kmc/{sample}.onehot.{ext}" , sample = list (SAMPLES .index ), ext = KMC_EXTS )
56
76
output :
57
77
expand ("kmc/opdef{i}.txt" , i = range (index .kmc_bitvec_count ))
78
+ benchmark :
79
+ "logs/kmc.opdef.benchmark.txt"
58
80
run :
59
81
index .init_opdefs ()
60
82
61
- rule kmc_sample :
83
+ def get_kmc_mb (wc ,input ):
84
+ dbsize = sum ([d .size for d in input .dbs ])
85
+ config ["kmc" ]["memory" ]* 1000
86
+ return 1000 + dbsize / (10 ** 6 )
87
+
88
+ rule kmc_count :
62
89
input :
63
90
get_fasta #"{fasta}"
64
91
output :
@@ -67,15 +94,19 @@ rule kmc_sample:
67
94
tag = get_onehot_tag
68
95
log :
69
96
"logs/kmc.{sample}.txt"
97
+ benchmark :
98
+ "logs/kmc.{sample}.benchmark.txt"
70
99
threads : config ["kmc" ]["threads" ]
100
+ resources :
101
+ mem_mb = 500 + config ["kmc" ]["memory" ]* 1000 ,
71
102
shell :
72
103
f"mkdir -p { TMPDIR } {{wildcards.sample}}; "
73
104
74
105
f"{ EXTRA_DIR } /kmc -k{{config[k]}} -t{{threads}} -m{{config[kmc][memory]}} "
75
106
f"-ci1 -cs1000 -fm {{input}} kmc/{{wildcards.sample}}.count { TMPDIR } {{wildcards.sample}} "
76
107
"> {log} 2>&1;"
77
108
78
- f"{ EXTRA_DIR } /kmc_tools -t4 transform kmc/{{wildcards.sample}}.count "
109
+ f"{ EXTRA_DIR } /kmc_tools -t{{threads}} transform kmc/{{wildcards.sample}}.count "
79
110
f"set_counts {{params.tag}} kmc/{{wildcards.sample}}.onehot "
80
111
">> {log} 2>&1;"
81
112
@@ -85,36 +116,40 @@ rule faidx:
85
116
output :
86
117
"{fasta}.fai"
87
118
log :
88
- "logs/faidx.{fasta}.txt"
119
+ "logs/faidx.{fasta}.log.txt"
120
+ benchmark :
121
+ "logs/faidx.{fasta}.benchmark.txt"
89
122
shell :
90
123
"samtools faidx {input} > {log} 2>&1"
91
124
92
125
rule mash_sample :
93
126
input :
94
127
get_fasta
95
128
output :
96
- "{sample}.msh"
129
+ "tmp/ {sample}.msh"
97
130
log :
98
- "logs/mash.sketch.{sample}.txt"
131
+ "logs/mash.sketch.{sample}.log.txt"
132
+ benchmark :
133
+ "logs/mash.sketch.{sample}.benchmark.txt"
99
134
shell :
100
135
"{EXTRA_DIR}/mash "
101
- "sketch -C {wildcards.sample} -o {wildcards.sample}.msh -r -s 10000 {input} "
136
+ "sketch -C {wildcards.sample} -o tmp/ {wildcards.sample}.msh -r -s 10000 {input} "
102
137
"> {log} 2>&1;"
103
138
104
-
105
139
rule mash_triangle :
106
140
input :
107
- expand ("{sample}.msh" , sample = SAMPLES .index )
141
+ expand ("tmp/ {sample}.msh" , sample = SAMPLES .index )
108
142
output :
109
143
"genome_dist.tsv"
110
144
log :
111
- "logs/mash.triangle.txt"
145
+ "logs/mash.triangle.log.txt"
146
+ benchmark :
147
+ "logs/mash.triangle.benchmark.txt"
112
148
shell :
113
149
"{EXTRA_DIR}/mash "
114
150
"triangle -C -E {input} > {output} 2> {log}"
115
151
116
152
rule all :
117
153
input :
118
154
"genome_dist.tsv" ,
119
- #index.anchor_filenames
120
155
expand ("anchor/{sample}/chrs.tsv" , sample = index .anchor_genomes )
0 commit comments