10
10
#include < limits>
11
11
#include < vector>
12
12
#include < algorithm>
13
+ #include < iterator>
13
14
#include < unordered_map>
14
15
#include < fstream>
15
16
#include < zlib.h>
@@ -356,17 +357,49 @@ namespace skch
356
357
// Filter over reference axis and report the mappings
357
358
if (param.filterMode == filter::ONETOONE)
358
359
{
359
- skch::Filter::ref::filterMappings (allReadMappings, this ->refSketch ,
360
- param.numMappingsForSegment - 1
361
- // (input->len < param.segLength ? param.shortSecondaryToKeep : param.secondaryToKeep)
362
- );
360
+ // how many secondary mappings to keep
361
+ int n_mappings = param.numMappingsForSegment - 1 ;
362
+
363
+ // Group sequences by query prefix, then pass to ref filter
364
+ auto subrange_begin = allReadMappings.begin ();
365
+ auto subrange_end = allReadMappings.begin ();
366
+ MappingResultsVector_t tmpMappings;
367
+ MappingResultsVector_t filteredMappings;
368
+
369
+ while (subrange_end != allReadMappings.end ())
370
+ {
371
+ if (param.skip_prefix )
372
+ {
373
+ int currGroup = this ->getRefGroup (qmetadata[subrange_begin->querySeqId ].name );
374
+ subrange_end = std::find_if_not (subrange_begin, allReadMappings.end (), [this , currGroup] (const auto & allReadMappings_candidate) {
375
+ return currGroup == this ->getRefGroup (this ->qmetadata [allReadMappings_candidate.querySeqId ].name );
376
+ });
377
+ }
378
+ else
379
+ {
380
+ subrange_end = allReadMappings.end ();
381
+ }
382
+ tmpMappings.insert (
383
+ tmpMappings.end (),
384
+ std::make_move_iterator (subrange_begin),
385
+ std::make_move_iterator (subrange_end));
386
+
387
+ // tmpMappings now contains mappings from one group of query sequences to all reference groups
388
+ // we now run filterByGroup, which filters based on the reference group.
389
+ filterByGroup (tmpMappings, filteredMappings, n_mappings, true );
390
+ tmpMappings.clear ();
391
+ subrange_begin = subrange_end;
392
+ }
393
+ allReadMappings = std::move (filteredMappings);
363
394
364
395
// Re-sort mappings by input order of query sequences
365
396
// This order may be needed for any post analysis of output
366
- std::sort (allReadMappings.begin (), allReadMappings.end (), [](const MappingResult &a, const MappingResult &b)
367
- {
368
- return (a.querySeqId < b.querySeqId );
369
- });
397
+ std::sort (
398
+ allReadMappings.begin (), allReadMappings.end (),
399
+ [](const MappingResult &a, const MappingResult &b) {
400
+ return std::tie (a.querySeqId , a.queryStartPos , a.refSeqId , a.refStartPos )
401
+ < std::tie (b.querySeqId , b.queryStartPos , b.refSeqId , b.refStartPos );
402
+ });
370
403
371
404
reportReadMappings (allReadMappings, " " , outstrm);
372
405
}
@@ -460,17 +493,19 @@ namespace skch
460
493
}
461
494
462
495
/* *
463
- * @brief helper to main filtering function
464
- * @details filters mappings by group
465
- * @param[in] input unfiltered mappings
466
- * @param[in] output filtered mappings
467
- * @param[in] input num mappings per segment
468
- * @return void
496
+ * @brief helper to main filtering function
497
+ * @details filters mappings by group
498
+ * @param[in] input unfiltered mappings
499
+ * @param[in] output filtered mappings
500
+ * @param[in] n_mappings num mappings per segment
501
+ * @param[in] filter_ref use Filter::ref instead of Filter::query
502
+ * @return void
469
503
*/
470
504
void filterByGroup (
471
505
MappingResultsVector_t &unfilteredMappings,
472
506
MappingResultsVector_t &filteredMappings,
473
- int n_mappings)
507
+ int n_mappings,
508
+ bool filter_ref)
474
509
{
475
510
filteredMappings.reserve (unfilteredMappings.size ());
476
511
@@ -480,6 +515,7 @@ namespace skch
480
515
auto subrange_end = unfilteredMappings.begin ();
481
516
if (param.filterMode == filter::MAP || param.filterMode == filter::ONETOONE)
482
517
{
518
+ std::vector<skch::MappingResult> tmpMappings;
483
519
while (subrange_end != unfilteredMappings.end ())
484
520
{
485
521
if (param.skip_prefix )
@@ -493,13 +529,25 @@ namespace skch
493
529
{
494
530
subrange_end = unfilteredMappings.end ();
495
531
}
496
- // TODO why are we filtering these before merging?
497
- std::vector<skch::MappingResult> tmpMappings (std::distance (subrange_begin, subrange_end));
498
- std::move (subrange_begin, subrange_end, tmpMappings.begin ());
532
+ tmpMappings.insert (
533
+ tmpMappings.end (),
534
+ std::make_move_iterator (subrange_begin),
535
+ std::make_move_iterator (subrange_end));
499
536
std::sort (tmpMappings.begin (), tmpMappings.end (), [](const auto & a, const auto & b)
500
537
{ return std::tie (a.queryStartPos , a.refSeqId , a.refStartPos ) < std::tie (b.queryStartPos , b.refSeqId , b.refStartPos ); });
501
- skch::Filter::query::filterMappings (tmpMappings, n_mappings);
502
- std::move (tmpMappings.begin (), tmpMappings.end (), std::back_inserter (filteredMappings));
538
+ if (filter_ref)
539
+ {
540
+ skch::Filter::ref::filterMappings (tmpMappings, this ->refSketch , n_mappings);
541
+ }
542
+ else
543
+ {
544
+ skch::Filter::query::filterMappings (tmpMappings, n_mappings);
545
+ }
546
+ filteredMappings.insert (
547
+ filteredMappings.end (),
548
+ std::make_move_iterator (tmpMappings.begin ()),
549
+ std::make_move_iterator (tmpMappings.end ()));
550
+ tmpMappings.clear ();
503
551
subrange_begin = subrange_end;
504
552
}
505
553
}
@@ -509,8 +557,6 @@ namespace skch
509
557
[](const MappingResult &a, const MappingResult &b) {
510
558
return std::tie (a.queryStartPos , a.refSeqId , a.refStartPos )
511
559
< std::tie (b.queryStartPos , b.refSeqId , b.refStartPos );
512
- // return std::tie(a.refSeqId, a.refStartPos, a.queryStartPos)
513
- // < std::tie(b.refSeqId, b.refStartPos, b.queryStartPos);
514
560
});
515
561
}
516
562
@@ -644,10 +690,10 @@ namespace skch
644
690
}
645
691
646
692
if (param.filterMode == filter::MAP || param.filterMode == filter::ONETOONE) {
647
- MappingResultsVector_t tempMappings ;
648
- tempMappings .reserve (output->readMappings .size ());
649
- filterByGroup (unfilteredMappings, tempMappings , n_mappings);
650
- std::swap (tempMappings, unfilteredMappings );
693
+ MappingResultsVector_t tmpMappings ;
694
+ tmpMappings .reserve (output->readMappings .size ());
695
+ filterByGroup (unfilteredMappings, tmpMappings , n_mappings, false );
696
+ unfilteredMappings = std::move (tmpMappings );
651
697
}
652
698
653
699
std::swap (output->readMappings , unfilteredMappings);
0 commit comments