Skip to content

Commit d024c53

Browse files
committed
enable multiply input
1 parent 194ded3 commit d024c53

File tree

6 files changed

+160
-75
lines changed

6 files changed

+160
-75
lines changed

Diff for: CMakeLists.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ aux_source_directory(./barcode_extractor BARCODE_SRC)
2424

2525
add_executable(SpecHap ${SPEC_SRC})
2626
add_executable(BarcodeExtract ${BARCODE_SRC})
27-
target_link_libraries(SpecHap ${HTSlib_LIBRARIES} ${ARPACK_LIBRARIES})
28-
target_link_libraries(BarcodeExtract ${HTSlib_LIBRARIES})
27+
target_link_libraries(SpecHap -lhts -larpack)
28+
target_link_libraries(BarcodeExtract -lhts)
2929

3030
#add_custom_target(extractHAIRS
3131
# ALL COMMAND make

Diff for: main.cpp

+60-23
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,16 @@
22
#include "phaser.h"
33
#include "results.h"
44
#include "type.h"
5+
#include <string>
6+
#include <sstream>
57

68

9+
#define PROTOCOL_HIC "hic"
10+
#define PROTOCOL_NGS "ngs"
11+
#define PROTOCOL_TENX "tenx"
12+
#define PROTOCOL_PACBIO "pacbio"
13+
#define PROTOCOL_NANOPORE "nanopore"
14+
#define PROTOCOL_HYBRID "hybrid"
715
bool HYBRID = false;
816
bool NEW_FORMAT = false;
917
bool KEEP_PS = false;
@@ -14,14 +22,18 @@ int MAX_BARCODE_SPANNING = 60000;
1422
int MAX_HIC_INSERTION= 40000000;
1523
int RECURSIVE_LIMIT = 15;
1624
int OPERATION = MODE_PE;
25+
bool HAS_HIC;
26+
bool HAS_TENX;
27+
std::vector<int> OPERATIONS;
1728

1829

1930
enum optionIndex
2031
{
2132
UNKNOWN, HELP, VCF, FRAGMENT, OUT, TENX, HIC, _WINDOW_SIZE, COVERAGE, _RECURSIVE_LIMIT, NANOPORE, PACBIO, NOSORT,
22-
_MAX_BARCODE_SPANNING_LENGTH, _WINDOW_OVERLAP, STATS, _NEWFORMAT, USESECONDARY, _KEEP_PHASING_INFO, _BASEOFFSET, _HYBRID
33+
_MAX_BARCODE_SPANNING_LENGTH, _WINDOW_OVERLAP, STATS, _NEWFORMAT, USESECONDARY, _KEEP_PHASING_INFO, _BASEOFFSET, _HYBRID,PROTOCOLS
2334
};
2435

36+
2537
struct Arg : public option::Arg
2638
{
2739
static void printError(const char *msg1, const option::Option &opt, const char *msg2)
@@ -62,7 +74,7 @@ const option::Descriptor usage[] =
6274
{UNKNOWN, 0, "", "", Arg::Unknown, "Usage: "},
6375
{HELP, 0, "h", "help", Arg::None, "\t--help\tPrint this message and exit."},
6476
{VCF, 0, "v", "vcf", Arg::Required, "\t-v <arg>,\t--vcf=<arg>\tSorted Heterozygous VCF File, gzipped, tbi or csi index required."},
65-
{FRAGMENT, 0, "f", "frag", Arg::Required, "\t-f <arg>,\t--frag=<arg>\tFragment File Generated by ExtractHairs, sort by SNP position is required."},
77+
{FRAGMENT, 0, "f", "frag", Arg::Required, "\t-f <arg>,\t--frag=<arg>\tFragment File Generated by ExtractHairs, sort by SNP position is required, multiply files with comma split is permitted"},
6678
{STATS, 0, "s", "frag_stat", Arg::Required, "\t-s <arg>,\t--frag_stat=<arg>\tFragment file statistic, in bed format, required for 10x."},
6779
{OUT, 0, "o", "out", Arg::Required, "\t-o <arg>,\t--out=<arg>\tOutput VCF File."},
6880
{_WINDOW_SIZE, 0, "", "window_size", Arg::Numeric, "\t-w [<arg>],\t--window_size [<arg>]\tPhasing Window Size, default=250."},
@@ -71,6 +83,7 @@ const option::Descriptor usage[] =
7183
{HIC, 0, "H", "hic", Arg::None, "\t-H,\t--hic\tSpecified for HiC data."},
7284
{PACBIO, 0, "P", "pacbio", Arg::None, "\t-P,\t--pacbio\tSpecified for Pacbio data."},
7385
{NANOPORE, 0, "N", "nanopore", Arg::None, "\t-N,\t--nanopore\tSpecified for Nanopore data."},
86+
{PROTOCOLS, 0,"p","protocols", Arg::Required, "\t-p,\t--protocols\t Sequence protocols for corresponding to frags, split with comma, hic,ngs, tenx, pacbio, nanopore is supported"},
7487
{_HYBRID, 0, "", "hybrid", Arg::None, "\t--hybrid\tSpecified for hybrid data type."},
7588
{_NEWFORMAT, 0, "", "new_format", Arg::None, "\t--new_format\tSpecified when using new_format with extractHair"},
7689
{_BASEOFFSET, 0, "", "base_offset", Arg::Numeric, "\t--base_offset\tQuality of set for read base, default is 33."},
@@ -138,30 +151,54 @@ int main(int argc, char *argv[])
138151
WINDOW_SIZE = int(atoi(options[_WINDOW_SIZE].arg));
139152

140153
std::string fnbed;
141-
if (options[TENX])
142-
{
143-
OPERATION = MODE_10X;
144-
fnbed = options[STATS].arg;
145-
}
146-
else if (options[HIC])
147-
OPERATION = MODE_HIC;
148-
else if (options[PACBIO])
149-
OPERATION = MODE_PACBIO;
150-
else if (options[NANOPORE])
151-
OPERATION = MODE_NANOPORE;
152-
153-
if (options[_HYBRID]) //hybrid mode, using pacbio/oxford nanopore, ngs and hic.
154-
{
155-
HYBRID = true;
156-
OPERATION = MODE_HYBRID;
157-
}
158-
154+
// if (options[TENX])
155+
// {
156+
// OPERATION = MODE_10X;
157+
// fnbed = options[STATS].arg;
158+
// }
159+
// else if (options[HIC])
160+
// OPERATION = MODE_HIC;
161+
// else if (options[PACBIO])
162+
// OPERATION = MODE_PACBIO;
163+
// else if (options[NANOPORE])
164+
// OPERATION = MODE_NANOPORE;
165+
//
166+
// if (options[_HYBRID]) //hybrid mode, using pacbio/oxford nanopore, ngs and hic.
167+
// {
168+
// HYBRID = true;
169+
// OPERATION = MODE_HYBRID;
170+
// }
171+
//
159172
std::string invcf = options[VCF].arg;
160173
std::string out = options[OUT].arg;
161174
std::string frag = options[FRAGMENT].arg;
162-
163-
164-
Phaser *phaser = new Phaser(invcf, out, frag, fnbed);
175+
// multiply frags file support
176+
std::vector<std::string> frags;
177+
std::string token;
178+
std::istringstream iss(options[FRAGMENT].arg);
179+
while(std::getline(iss, token, ','))
180+
frags.push_back(token);
181+
// protocols
182+
std::istringstream protocols(options[PROTOCOLS].arg);
183+
while(std::getline(protocols, token, ',')) {
184+
if (token == PROTOCOL_HIC){
185+
OPERATIONS.push_back(MODE_HIC);
186+
HAS_HIC = true;
187+
}
188+
else if (token == PROTOCOL_NGS) OPERATIONS.push_back(MODE_PE);
189+
else if (token == PROTOCOL_TENX) {
190+
HAS_TENX = true;
191+
OPERATIONS.push_back(MODE_10X);
192+
fnbed = options[STATS].arg;
193+
} else if (token == PROTOCOL_NANOPORE) OPERATIONS.push_back(MODE_NANOPORE);
194+
else if (token == PROTOCOL_PACBIO) OPERATIONS.push_back(MODE_PACBIO);
195+
else if (token == PROTOCOL_HYBRID) OPERATIONS.push_back(MODE_HYBRID);
196+
else {
197+
std::cerr << "SpecHap: Error, --protocols muste be split with comma, and only hic, ngs, tenx, pacbio and nanopore are supported\n";
198+
exit(1);
199+
}
200+
}
201+
auto *phaser = new Phaser(invcf, out, frags, fnbed);
165202
phaser->phasing();
166203
delete phaser;
167204
return 0;

Diff for: phaser.cpp

+32-12
Original file line numberDiff line numberDiff line change
@@ -12,32 +12,46 @@
1212

1313
// TODO clarify between variant count and block count
1414

15-
Phaser::Phaser(const std::string &fnvcf, const std::string &fnout, const std::string &fnfrag, const std::string &fnbed)
15+
Phaser::Phaser(const std::string &fnvcf, const std::string &fnout, std::vector<std::string> & fnfrags, const std::string &fnbed)
1616
{
17-
17+
for (int i = 0; i < fnfrags.size(); i++) {
18+
auto item = fnfrags[i];
19+
frfrags.push_back(new FragmentReader(item.data()));
20+
if(OPERATIONS[i] == MODE_10X) {
21+
frbed = new BEDReader(fnbed.data());
22+
}
23+
}
24+
for (const auto& item : fnfrags) {
25+
frfrags.push_back(new FragmentReader(item.data()));
26+
}
1827
frvcf = new VCFReader(fnvcf.data());
1928
fwvcf = new VCFWriter(frvcf->header, fnout.data());
20-
frfrag = new FragmentReader(fnfrag.data());
29+
// frfrag = new FragmentReader(fnfrag.data());
2130
frbed = nullptr;
2231
coverage = 30; //deprecated
2332

24-
if (OPERATION == MODE_10X)
33+
if (HAS_TENX)
2534
frbed = new BEDReader(fnbed.data());
2635

2736
bool use_secondary = false;
2837
threshold = 1e-5;
2938

30-
spectral = new Spectral(frfrag, frbed, threshold, coverage, use_secondary);
39+
spectral = new Spectral(frfrags, frbed, threshold, coverage, use_secondary);
3140
}
3241

3342
Phaser::~Phaser()
3443
{
3544
delete frvcf;
3645
delete fwvcf;
37-
delete frfrag;
46+
// delete frfrag;
47+
for (auto & item : frfrags) {
48+
delete item;
49+
item = nullptr;
50+
}
51+
frfrags.clear();
52+
frfrags.shrink_to_fit();
3853
delete spectral;
39-
if (frbed != nullptr)
40-
delete frbed;
54+
delete frbed;
4155
}
4256

4357

@@ -127,7 +141,10 @@ void Phaser::phasing()
127141
else
128142
load_contig_records(chromo_phaser);
129143
chromo_phaser->construct_phasing_window_initialize();
130-
frfrag->set_prev_chr_var_count(prev_variant_count);
144+
for (auto item : frfrags) {
145+
item->set_prev_chr_var_count(prev_variant_count);
146+
}
147+
// frfrag->set_prev_chr_var_count(prev_variant_count);
131148
spectral->set_chromo_phaser(chromo_phaser);
132149
phasing_by_chrom(chromo_phaser->variant_count, chromo_phaser);
133150

@@ -142,13 +159,16 @@ void Phaser::phasing()
142159

143160
void Phaser::phasing_by_chrom(uint var_count, ChromoPhaser *chromo_phaser)
144161
{
145-
frfrag->set_curr_chr_var_count(var_count);
162+
for (auto item : frfrags) {
163+
item->set_curr_chr_var_count(var_count);
164+
}
165+
// frfrag->set_curr_chr_var_count(var_count);
146166

147167
while (chromo_phaser->phased->rest_blk_count > 0)
148168
{
149169
if (chromo_phaser->phased->rest_blk_count > chromo_phaser->init_block_count)
150170
break;
151-
if (OPERATION == MODE_10X)
171+
if (HAS_TENX)
152172
chromo_phaser->phased->update_phasing_info(MAX_BARCODE_SPANNING);
153173
else
154174
{
@@ -160,7 +180,7 @@ void Phaser::phasing_by_chrom(uint var_count, ChromoPhaser *chromo_phaser)
160180
spectral->solver();
161181
}
162182
//std::cout << chromo_phaser->phased->phased_blk_count << std::endl;
163-
if (OPERATION == MODE_HIC)
183+
if (HAS_HIC)
164184
{
165185
phase_HiC_poss(chromo_phaser);
166186
}

Diff for: phaser.h

+3-2
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ class Phaser
2424
{
2525
public:
2626
Phaser() = default;
27-
explicit Phaser(const std::string & fnvcf, const std::string & fnout, const std::string & fnfrag, const std::string &fnbed);
27+
explicit Phaser(const std::string & fnvcf, const std::string & fnout, std::vector<std::string> & fnfrags, const std::string &fnbed);
2828
~Phaser();
2929
void phasing();
3030

@@ -33,7 +33,8 @@ class Phaser
3333
double threshold;
3434
VCFReader *frvcf;
3535
VCFWriter *fwvcf;
36-
FragmentReader *frfrag;
36+
std::vector<FragmentReader* > frfrags;
37+
// FragmentReader *frfrag;
3738
BEDReader *frbed;
3839
Spectral *spectral;
3940
int coverage;

0 commit comments

Comments
 (0)