11
11
#include < bitset>
12
12
#include < unordered_set>
13
13
14
+ #include < boost/dynamic_bitset.hpp>
15
+
14
16
#include " align.h"
15
17
#include " common.h"
16
18
#include " fasta.h"
@@ -166,36 +168,35 @@ void stats(const string &ref_path, const string &bed_path)
166
168
167
169
/* *****************************************************************************/
168
170
169
- #include < boost/dynamic_bitset.hpp>
170
-
171
- void get_differences ()
171
+ void get_differences (const string &ref_path, const string &bed_path,
172
+ const string &wgac_path)
172
173
{
173
174
map<string, boost::dynamic_bitset<>> sedef;
174
175
map<string, boost::dynamic_bitset<>> wgac;
175
176
176
177
string s;
177
- ifstream fin (" results/out.hg19.bed " , ifstream::in );
178
+ ifstream fin (bed_path );
178
179
while (getline (fin, s)) {
179
180
string cigar;
180
181
Hit h = Hit::from_bed (s, &cigar);
181
182
182
- auto c1 = fmt::format (" {}{} " , h.query ->name , " +-" [h.query ->is_rc ]);
183
- auto c2 = fmt::format (" {}{} " , h.ref ->name , " +-" [h.ref ->is_rc ]);
183
+ auto c1 = fmt::format (" {}" , h.query ->name , " +-" [h.query ->is_rc ]);
184
+ auto c2 = fmt::format (" {}" , h.ref ->name , " +-" [h.ref ->is_rc ]);
184
185
if (sedef.find (c1)==sedef.end ()) sedef[c1]=boost::dynamic_bitset<>(250000000 );
185
186
if (sedef.find (c2)==sedef.end ()) sedef[c2]=boost::dynamic_bitset<>(250000000 );
186
187
for (int i = h.query_start ; i < h.query_end ; i++) sedef[c1].set (i);
187
188
for (int i = h.ref_start ; i < h.ref_end ; i++) sedef[c2].set (i);
188
189
}
189
190
190
- eprn (" sedef done" );
191
+ eprn (" SEDEF reading done! " );
191
192
192
- ifstream fiw (" data/GRCh37GenomicSuperDup.tab " );
193
+ ifstream fiw (wgac_path );
193
194
getline (fiw, s);
194
195
unordered_set<string> seen;
195
196
while (getline (fiw, s)) {
196
197
Hit h = Hit::from_wgac (s);
197
- auto c1 = fmt::format (" {}{} " , h.query ->name , " +-" [h.query ->is_rc ]);
198
- auto c2 = fmt::format (" {}{} " , h.ref ->name , " +-" [h.ref ->is_rc ]);
198
+ auto c1 = fmt::format (" {}" , h.query ->name , " +-" [h.query ->is_rc ]);
199
+ auto c2 = fmt::format (" {}" , h.ref ->name , " +-" [h.ref ->is_rc ]);
199
200
if (c1.size () > 6 || c2.size () > 6 )
200
201
continue ;
201
202
@@ -208,9 +209,9 @@ void get_differences()
208
209
}
209
210
}
210
211
211
- eprn (" wgac done" );
212
+ eprn (" WGAC reading done! " );
212
213
213
- FastaReference fr (" data/hg19/hg19.fa " );
214
+ FastaReference fr (ref_path );
214
215
215
216
int intersect = 0 , wgac_only = 0 , wgac_span = 0 , sedef_only = 0 , sedef_span = 0 ;
216
217
@@ -221,7 +222,7 @@ void get_differences()
221
222
auto &s = p.second ;
222
223
auto &w = wgac[p.first ];
223
224
224
- auto seq = fr.get_sequence (p.first . substr ( 0 , p. first . size ()- 1 ) );
225
+ auto seq = fr.get_sequence (p.first );
225
226
226
227
for (int i = 0 ; i < seq.size (); i++) {
227
228
if ((s[i] & (~w[i])) && isupper (seq[i]) && seq[i] != ' N' ) {
@@ -239,13 +240,15 @@ void get_differences()
239
240
wgac_span += w.count ();
240
241
}
241
242
242
- eprn (" SEDEF: span {:12n}\n "
243
- " only {:12n}\n "
244
- " on/u {:12n}\n "
245
- " miss {:12n}\n "
246
- " mi/u {:12n}\n "
247
- " WGAC: span {:12n}\n "
248
- " intr {:12n}" , sedef_span, sedef_only, sedef_extra_upper, wgac_only, miss_upper, wgac_span, intersect);
243
+ eprn (" SEDEF: spans {:12n}\n "
244
+ " unique {:12n}\n "
245
+ " unique (uppercase) {:12n}\n "
246
+ " misses {:12n}\n "
247
+ " misses (uppercase) {:12n}\n "
248
+ " WGAC: spans {:12n}\n "
249
+ " intersects {:12n}" ,
250
+ sedef_span, sedef_only, sedef_extra_upper, wgac_only,
251
+ miss_upper, wgac_span, intersect);
249
252
}
250
253
251
254
/* *****************************************************************************/
@@ -260,7 +263,7 @@ void stats_main(int argc, char **argv)
260
263
if (command == " generate" ) {
261
264
stats (argv[1 ], argv[2 ]);
262
265
} else if (command == " diff" ) {
263
- get_differences (); // ( argv[1], argv[2], atoi( argv[3]) );
266
+ get_differences (argv[1 ], argv[2 ], argv[3 ]);
264
267
} else {
265
268
throw fmt::format (" Unknown stats command" );
266
269
}
0 commit comments