-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #197 from pangenome/avoid_2_graphs_in_memory
Avoid 2 graphs in memory, sample decompressed blocks, parallelize path embedding
- Loading branch information
Showing
12 changed files
with
1,252 additions
and
611 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
#include "dna.hpp" | ||
|
||
namespace smoothxg { | ||
|
||
static const char dna_complement[256] = {'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', // 8 | ||
'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', // 16 | ||
'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', // 24 | ||
'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', // 32 | ||
'N', 'N', 'N', '$', '#', 'N', 'N', 'N', // 40 GCSA stop/start characters | ||
'N', 'N', 'N', 'N', 'N', '-', 'N', 'N', // 48 | ||
'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', // 56 | ||
'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', // 64 | ||
'N', 'T', 'V', 'G', 'H', 'N', 'N', 'C', // 72 | ||
'D', 'N', 'N', 'M', 'N', 'K', 'N', 'N', // 80 | ||
'N', 'Q', 'Y', 'W', 'A', 'A', 'B', 'S', // 88 | ||
'N', 'R', 'N', 'N', 'N', 'N', 'N', 'N', // 96 | ||
'N', 't', 'v', 'g', 'h', 'N', 'N', 'c', // 104 | ||
'd', 'N', 'N', 'm', 'N', 'k', 'n', 'N', // 112 | ||
'N', 'q', 'y', 'w', 'a', 'a', 'b', 's', // 120 | ||
'N', 'r', 'N', 'N', 'N', 'N', 'N', 'N', // 128 | ||
'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', // 136 | ||
'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', // 144 | ||
'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', // 152 | ||
'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', // 160 | ||
'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', // 168 | ||
'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', // 176 | ||
'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', // 184 | ||
'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', // 192 | ||
'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', // 200 | ||
'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', // 208 | ||
'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', // 216 | ||
'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', // 224 | ||
'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', // 232 | ||
'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', // 240 | ||
'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N', // 248 | ||
'N', 'N', 'N', 'N', 'N', 'N', 'N', 'N'};// 256 | ||
|
||
char dna_reverse_complement(const char& c) { | ||
return dna_complement[c]; | ||
} | ||
|
||
std::string dna_reverse_complement(const std::string& seq) { | ||
std::string rc; | ||
rc.assign(seq.rbegin(), seq.rend()); | ||
for (auto& c : rc) { | ||
c = dna_complement[c]; | ||
} | ||
return rc; | ||
} | ||
|
||
void dna_reverse_complement_in_place(std::string& seq) { | ||
size_t swap_size = seq.size() / 2; | ||
for (size_t i = 0, j = seq.size() - 1; i < swap_size; i++, j--) { | ||
char tmp = seq[i]; | ||
seq[i] = dna_complement[seq[j]]; | ||
seq[j] = dna_complement[tmp]; | ||
} | ||
|
||
if (seq.size() % 2) { | ||
seq[swap_size] = dna_complement[seq[swap_size]]; | ||
} | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
#ifndef DNA_HPP_INCLUDED | ||
#define DNA_HPP_INCLUDED | ||
|
||
#include <string> | ||
|
||
namespace smoothxg { | ||
|
||
char dna_reverse_complement(const char& c); | ||
std::string dna_reverse_complement(const std::string& seq); | ||
void dna_reverse_complement_in_place(std::string& seq); | ||
|
||
} | ||
|
||
#endif |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
#include "pos.hpp" | ||
|
||
namespace smoothxg { | ||
|
||
bool operator<(const aln_pos_t& a, const aln_pos_t& b) { | ||
return a.pos < b.pos && a.aln_length < b.aln_length; | ||
} | ||
|
||
bool operator==(const aln_pos_t& a, const aln_pos_t& b) { | ||
return a.pos == b.pos && a.aln_length == b.aln_length; | ||
} | ||
|
||
pos_t make_pos_t(uint64_t offset, bool is_rev) { | ||
// top bit is reserved for is_rev flag | ||
// the rest is our offset in the input sequence vector | ||
uint64_t rev_mask = (uint64_t)1; // the bit mask | ||
pos_t pos = offset<<1; | ||
// https://graphics.stanford.edu/~seander/bithacks.html#ConditionalSetOrClearBitsWithoutBranching | ||
pos = (pos & ~rev_mask) | (-is_rev & rev_mask); | ||
return pos; | ||
} | ||
|
||
uint64_t offset(const pos_t& pos) { | ||
//return (pos & ~(uint64_t)1) >> 1; | ||
return pos >> 1; | ||
} | ||
|
||
bool is_rev(const pos_t& pos) { | ||
return pos & (uint64_t)1; | ||
} | ||
|
||
void incr_pos(pos_t& pos) { | ||
if (is_rev(pos)) { | ||
pos -= 2; | ||
} else { | ||
pos += 2; | ||
} | ||
} | ||
|
||
void incr_pos(pos_t& pos, size_t by) { | ||
if (is_rev(pos)) { | ||
pos -= 2*by; | ||
} else { | ||
pos += 2*by; | ||
} | ||
} | ||
|
||
void decr_pos(pos_t& pos) { | ||
if (!is_rev(pos)) { | ||
pos -= 2; | ||
} else { | ||
pos += 2; | ||
} | ||
} | ||
|
||
void decr_pos(pos_t& pos, size_t by) { | ||
if (!is_rev(pos)) { | ||
pos -= 2*by; | ||
} else { | ||
pos += 2*by; | ||
} | ||
} | ||
|
||
pos_t rev_pos_t(const pos_t& pos) { | ||
return make_pos_t(offset(pos), !is_rev(pos)); | ||
} | ||
|
||
std::string pos_to_string(const pos_t& pos) { | ||
return std::to_string(offset(pos)) + (is_rev(pos)?"-":"+"); | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
#pragma once | ||
|
||
#include <cstdint> | ||
#include <string> | ||
|
||
namespace smoothxg { | ||
|
||
typedef uint64_t pos_t; | ||
struct aln_pos_t { pos_t pos; uint64_t aln_length; }; | ||
bool operator<(const aln_pos_t& a, const aln_pos_t& b); | ||
bool operator==(const aln_pos_t& a, const aln_pos_t& b); | ||
pos_t make_pos_t(uint64_t offset, bool is_rev); | ||
uint64_t offset(const pos_t& pos); | ||
bool is_rev(const pos_t& pos); | ||
void incr_pos(pos_t& pos); | ||
void incr_pos(pos_t& pos, size_t by); | ||
void decr_pos(pos_t& pos); | ||
void decr_pos(pos_t& pos, size_t by); | ||
pos_t rev_pos_t(const pos_t& pos); | ||
std::string pos_to_string(const pos_t& pos); | ||
|
||
} |
Oops, something went wrong.