Skip to content

Commit

Permalink
support duplex_only option
Browse files Browse the repository at this point in the history
  • Loading branch information
sfchen committed Oct 11, 2021

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
1 parent 47c5864 commit ad54f0a
Showing 5 changed files with 10 additions and 3 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -192,11 +192,12 @@ options:
-o, --out output bam/sam file. STDOUT will be written to if it's not specified (string [=-])
-r, --ref reference fasta file name (should be an uncompressed .fa/.fasta file) (string)
-b, --bed bed file to specify the capturing region, none by default (string [=])
-x, --duplex_only only output duplex consensus sequences, which means single stranded consensus sequences will be discarded.
-u, --umi_prefix the prefix for UMI, if it has. None by default. Check the README for the defails of UMI formats. (string [=auto])
-s, --supporting_reads only output consensus reads/pairs that merged by >= <supporting_reads> reads/pairs. The valud should be 1~10, and the default value is 1. (int [=1])
-a, --ratio_threshold if the ratio of the major base in a cluster is less than <ratio_threshold>, it will be further compared to the reference. The valud should be 0.5~1.0, and the default value is 0.8 (double [=0.8])
-c, --score_threshold if the score of the major base in a cluster is less than <score_threshold>, it will be further compared to the reference. The valud should be 1~20, and the default value is 6 (int [=6])
-d, --umi_diff_threshold if two reads with identical mapping position have UMI difference <= <umi_diff_threshold>, then they will be merged to generate a consensus read. Default value is 2. (int [=2])
-d, --umi_diff_threshold if two reads with identical mapping position have UMI difference <= <umi_diff_threshold>, then they will be merged to generate a consensus read. Default value is 1. (int [=1])
-D, --duplex_diff_threshold if the forward consensus and reverse consensus sequences have <= <duplex_diff_threshold> mismatches, then they will be merged to generate a duplex consensus sequence, otherwise will be discarded. Default value is 2. (int [=2])
--high_qual the threshold for a quality score to be considered as high quality. Default 30 means Q30. (int [=30])
--moderate_qual the threshold for a quality score to be considered as moderate quality. Default 20 means Q20. (int [=20])
4 changes: 2 additions & 2 deletions src/cluster.cpp
Original file line number Diff line number Diff line change
@@ -154,7 +154,7 @@ vector<Pair*> Cluster::clusterByUMI(int umiDiffThreshold, Stats* preStats, Stats
}
// no duplex found, treat it as sscs
if(!foundDuplex) {
if(p1->mMergeReads >= mOptions->clusterSizeReq) {
if(!mOptions->duplexOnly && p1->mMergeReads >= mOptions->clusterSizeReq) {
singleConsesusCount++;
p1->writeSscsDcsTag();
postStats->addSSCS();
@@ -168,7 +168,7 @@ vector<Pair*> Cluster::clusterByUMI(int umiDiffThreshold, Stats* preStats, Stats
// no umi, no duplex
for(int i=0;i<singleConsensusPairs.size(); i++) {
Pair* p = singleConsensusPairs[i];
if(p->mMergeReads >= mOptions->clusterSizeReq) {
if(!mOptions->duplexOnly && p->mMergeReads >= mOptions->clusterSizeReq) {
singleConsesusCount++;
p->writeSscsDcsTag();
postStats->addSSCS();
2 changes: 2 additions & 0 deletions src/main.cpp
Original file line number Diff line number Diff line change
@@ -32,6 +32,7 @@ int main(int argc, char* argv[]){
cmd.add<string>("out", 'o', "output bam/sam file. STDOUT will be written to if it's not specified", false, "-");
cmd.add<string>("ref", 'r', "reference fasta file name (should be an uncompressed .fa/.fasta file)", true, "");
cmd.add<string>("bed", 'b', "bed file to specify the capturing region, none by default", false, "");
cmd.add("duplex_only", 'x', "only output duplex consensus sequences, which means single stranded consensus sequences will be discarded.");

// UMI
cmd.add<string>("umi_prefix", 'u', "the prefix for UMI, if it has. None by default. Check the README for the defails of UMI formats.", false, "auto");
@@ -74,6 +75,7 @@ int main(int argc, char* argv[]){
opt.properReadsUmiDiffThreshold = cmd.get<int>("umi_diff_threshold");
opt.duplexMismatchThreshold = cmd.get<int>("duplex_diff_threshold");
opt.debug = cmd.exist("debug");
opt.duplexOnly = cmd.exist("duplex_only");

// reporting
opt.jsonFile = cmd.get<string>("json");
2 changes: 2 additions & 0 deletions src/options.cpp
Original file line number Diff line number Diff line change
@@ -38,6 +38,8 @@ Options::Options(){

bedCoverageStep = 10;
coverageStep = 10000;

duplexOnly = false;
}

bool Options::validate() {
2 changes: 2 additions & 0 deletions src/options.h
Original file line number Diff line number Diff line change
@@ -59,6 +59,8 @@ class Options{

int coverageStep;
int bedCoverageStep;

bool duplexOnly;
};

#endif

0 comments on commit ad54f0a

Please sign in to comment.