Skip to content

Commit 1373ffc

Browse files
committed
Stats fixed
1 parent a20e09b commit 1373ffc

File tree

1 file changed

+24
-21
lines changed

1 file changed

+24
-21
lines changed

src/stats_main.cc

+24-21
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
#include <bitset>
1212
#include <unordered_set>
1313

14+
#include <boost/dynamic_bitset.hpp>
15+
1416
#include "align.h"
1517
#include "common.h"
1618
#include "fasta.h"
@@ -166,36 +168,35 @@ void stats(const string &ref_path, const string &bed_path)
166168

167169
/******************************************************************************/
168170

169-
#include <boost/dynamic_bitset.hpp>
170-
171-
void get_differences()
171+
void get_differences(const string &ref_path, const string &bed_path,
172+
const string &wgac_path)
172173
{
173174
map<string, boost::dynamic_bitset<>> sedef;
174175
map<string, boost::dynamic_bitset<>> wgac;
175176

176177
string s;
177-
ifstream fin("results/out.hg19.bed", ifstream::in);
178+
ifstream fin(bed_path);
178179
while (getline(fin, s)) {
179180
string cigar;
180181
Hit h = Hit::from_bed(s, &cigar);
181182

182-
auto c1 = fmt::format("{}{}", h.query->name, "+-"[h.query->is_rc]);
183-
auto c2 = fmt::format("{}{}", h.ref->name, "+-"[h.ref->is_rc]);
183+
auto c1 = fmt::format("{}", h.query->name, "+-"[h.query->is_rc]);
184+
auto c2 = fmt::format("{}", h.ref->name, "+-"[h.ref->is_rc]);
184185
if (sedef.find(c1)==sedef.end()) sedef[c1]=boost::dynamic_bitset<>(250000000);
185186
if (sedef.find(c2)==sedef.end()) sedef[c2]=boost::dynamic_bitset<>(250000000);
186187
for (int i = h.query_start; i < h.query_end; i++) sedef[c1].set(i);
187188
for (int i = h.ref_start; i < h.ref_end; i++) sedef[c2].set(i);
188189
}
189190

190-
eprn("sedef done");
191+
eprn("SEDEF reading done!");
191192

192-
ifstream fiw("data/GRCh37GenomicSuperDup.tab");
193+
ifstream fiw(wgac_path);
193194
getline(fiw, s);
194195
unordered_set<string> seen;
195196
while (getline(fiw, s)) {
196197
Hit h = Hit::from_wgac(s);
197-
auto c1 = fmt::format("{}{}", h.query->name, "+-"[h.query->is_rc]);
198-
auto c2 = fmt::format("{}{}", h.ref->name, "+-"[h.ref->is_rc]);
198+
auto c1 = fmt::format("{}", h.query->name, "+-"[h.query->is_rc]);
199+
auto c2 = fmt::format("{}", h.ref->name, "+-"[h.ref->is_rc]);
199200
if (c1.size() > 6 || c2.size() > 6)
200201
continue;
201202

@@ -208,9 +209,9 @@ void get_differences()
208209
}
209210
}
210211

211-
eprn("wgac done");
212+
eprn("WGAC reading done!");
212213

213-
FastaReference fr("data/hg19/hg19.fa");
214+
FastaReference fr(ref_path);
214215

215216
int intersect = 0, wgac_only = 0, wgac_span = 0, sedef_only = 0, sedef_span = 0;
216217

@@ -221,7 +222,7 @@ void get_differences()
221222
auto &s = p.second;
222223
auto &w = wgac[p.first];
223224

224-
auto seq = fr.get_sequence(p.first.substr(0, p.first.size()-1));
225+
auto seq = fr.get_sequence(p.first);
225226

226227
for (int i = 0; i < seq.size(); i++) {
227228
if ((s[i] & (~w[i])) && isupper(seq[i]) && seq[i] != 'N') {
@@ -239,13 +240,15 @@ void get_differences()
239240
wgac_span += w.count();
240241
}
241242

242-
eprn("SEDEF: span {:12n}\n"
243-
" only {:12n}\n"
244-
" on/u {:12n}\n"
245-
" miss {:12n}\n"
246-
" mi/u {:12n}\n"
247-
"WGAC: span {:12n}\n"
248-
" intr {:12n}", sedef_span, sedef_only, sedef_extra_upper, wgac_only, miss_upper, wgac_span, intersect);
243+
eprn("SEDEF: spans {:12n}\n"
244+
" unique {:12n}\n"
245+
" unique (uppercase) {:12n}\n"
246+
" misses {:12n}\n"
247+
" misses (uppercase) {:12n}\n"
248+
"WGAC: spans {:12n}\n"
249+
" intersects {:12n}",
250+
sedef_span, sedef_only, sedef_extra_upper, wgac_only,
251+
miss_upper, wgac_span, intersect);
249252
}
250253

251254
/******************************************************************************/
@@ -260,7 +263,7 @@ void stats_main(int argc, char **argv)
260263
if (command == "generate") {
261264
stats(argv[1], argv[2]);
262265
} else if (command == "diff") {
263-
get_differences(); //(argv[1], argv[2], atoi(argv[3]));
266+
get_differences(argv[1], argv[2], argv[3]);
264267
} else {
265268
throw fmt::format("Unknown stats command");
266269
}

0 commit comments

Comments
 (0)