Skip to content

Commit

Permalink
1. effi-mem, 2. bwt_aln
Browse files Browse the repository at this point in the history
  • Loading branch information
Gao Yan committed Mar 21, 2016
1 parent 53082ce commit 0b61fff
Show file tree
Hide file tree
Showing 10 changed files with 100 additions and 89 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ src/*.o
# Executables
lamsa
gdb_lamsa
debug_lamsa

# Project files
.cproject
Expand Down
7 changes: 4 additions & 3 deletions src/bwt_aln.c
Original file line number Diff line number Diff line change
Expand Up @@ -372,8 +372,10 @@ int bwt_aln_core(bwt_t *bwt, bntseq_t *bns, uint8_t *pac, uint8_t *read_bseq, ui

bwt_set_bound(*seed_v, line[i], node_n[i], seed_len, reg_len, &left_bound, &right_bound);
bwt_aln_res((*seed_v)[line[i][0].x].loc[line[i][0].y].ref_id, (*seed_v)[line[i][0].x].loc[line[i][0].y].is_rev, bns, pac, read_bseq, read_rbseq, reg_beg, reg_len, &left_bound, &right_bound, AP, seqs, re_res->la+re_res->l_n);
re_res->la[re_res->l_n].line_score = 0;
re_res->l_n++;
if (readInCigar(re_res->la[re_res->l_n].res[0].cigar, re_res->la[re_res->l_n].res[0].cigar_len) > 1/2 * reg_len) {
re_res->la[re_res->l_n].line_score = 0;
re_res->l_n++;
}
}
}
// free
Expand All @@ -391,7 +393,6 @@ void bwt_aln_remain(aln_reg *a_reg, aln_res *re_res, bwt_t *bwt, bntseq_t *bns,
re_res->l_n = 0;
for (i = 0; i < re_reg->reg_n; ++i)
bwt_aln_core(bwt, bns, pac, read_bseq, read_rbseq, re_reg->reg[i], AP, seqs, re_res);

End:
aln_free_reg(re_reg);
}
75 changes: 45 additions & 30 deletions src/frag_check.c
Original file line number Diff line number Diff line change
Expand Up @@ -16,41 +16,26 @@
// for debug
char READ_NAME[1024];

void frag_init_msg(frag_msg *f_msg, int frag_max)
void frag_init_msg(frag_msg *f_msg)
{
f_msg->frag_max = frag_max;
f_msg->frag_max = 1;
f_msg->frag_num = 0;
f_msg->per_seed_max = frag_max;

f_msg->fa_msg = (frag_aln_msg*)malloc(frag_max*sizeof(frag_aln_msg));
f_msg->fa_msg = (frag_aln_msg*)malloc(sizeof(frag_aln_msg));
int i;
for (i = 0; i < frag_max; ++i)
for (i = 0; i < 1; ++i)
{
f_msg->fa_msg[i].cigar_max = CIGAR_LEN_M;
f_msg->fa_msg[i].cigar = (cigar32_t*)malloc(CIGAR_LEN_M * sizeof(cigar32_t));
f_msg->fa_msg[i].cigar_len = 0;

f_msg->fa_msg[i].seed_i = (int*)malloc(f_msg->per_seed_max*sizeof(int));
f_msg->fa_msg[i].seed_aln_i = (int*)malloc(f_msg->per_seed_max*sizeof(int));
f_msg->fa_msg[i].seed_i = (int*)malloc(sizeof(int));
f_msg->fa_msg[i].seed_aln_i = (int*)malloc(sizeof(int));
f_msg->fa_msg[i].seed_num = 0;
f_msg->fa_msg[i].seed_max = 1;
}
}

int frag_copy_msg(frag_msg *ff_msg, frag_msg *tf_msg)
{
tf_msg->frag_max = ff_msg->frag_max;
tf_msg->frag_num = 0;
tf_msg->per_seed_max = ff_msg->per_seed_max;
int i;
for (i = 0; i < tf_msg->frag_max; ++i) {
tf_msg->fa_msg[i].cigar_max = CIGAR_LEN_M;
tf_msg->fa_msg[i].cigar_len = 0;

tf_msg->fa_msg[i].seed_num = 0;
}
return 0;
}

void frag_free_msg(frag_msg *f_msg, int line_num)
{
int i, j;
Expand All @@ -71,18 +56,38 @@ int frag_set_msg(map_msg *m_msg, int seed_i, int aln_i,
int FLAG, frag_msg *f_msg, int frag_i)//FLAG 0:start / 1:end / 2:seed
{
if (FLAG == FRAG_END) { //end
if (f_msg->frag_num == f_msg->frag_max) {
// realloc
f_msg->frag_max <<= 1;
f_msg->fa_msg = (frag_aln_msg*)realloc(f_msg->fa_msg, f_msg->frag_max * sizeof(frag_aln_msg));
int i;
for (i = f_msg->frag_num; i < f_msg->frag_max; ++i) {
f_msg->fa_msg[i].cigar_max = CIGAR_LEN_M;
f_msg->fa_msg[i].cigar = (cigar32_t*)malloc(CIGAR_LEN_M * sizeof(cigar32_t));
f_msg->fa_msg[i].cigar_len = 0;

f_msg->fa_msg[i].seed_i = (int*)malloc(sizeof(int));
f_msg->fa_msg[i].seed_aln_i = (int*)malloc(sizeof(int));
f_msg->fa_msg[i].seed_num = 0;
f_msg->fa_msg[i].seed_max = 1;
}
}
f_msg->fa_msg[frag_i].chr = m_msg[seed_i].map[aln_i].nchr;
f_msg->fa_msg[frag_i].strand = m_msg[seed_i].map[aln_i].nstrand; //+:1/-:-1
f_msg->fa_msg[frag_i].seed_i[0] = seed_i;
f_msg->fa_msg[frag_i].seed_aln_i[0] = aln_i;
f_msg->fa_msg[frag_i].seed_num = 1;
f_msg->fa_msg[frag_i].flag = UNCOVERED;
f_msg->fa_msg[frag_i].cigar_len = 0;
}
else if(FLAG==FRAG_START) { //start
} else if(FLAG==FRAG_START) { //start
f_msg->frag_num = frag_i + 1;
}
else { //seed
} else { //seed
if (f_msg->fa_msg[frag_i].seed_num == f_msg->fa_msg[frag_i].seed_max) {
// realloc
f_msg->fa_msg[frag_i].seed_max <<= 1;
f_msg->fa_msg[frag_i].seed_i = (int*)realloc(f_msg->fa_msg[frag_i].seed_i, f_msg->fa_msg[frag_i].seed_max * sizeof(int));
f_msg->fa_msg[frag_i].seed_aln_i = (int*)realloc(f_msg->fa_msg[frag_i].seed_aln_i, f_msg->fa_msg[frag_i].seed_max * sizeof(int));
}
f_msg->fa_msg[frag_i].seed_i[f_msg->fa_msg[frag_i].seed_num] = seed_i;
f_msg->fa_msg[frag_i].seed_aln_i[f_msg->fa_msg[frag_i].seed_num] = aln_i;
f_msg->fa_msg[frag_i].seed_num++;
Expand Down Expand Up @@ -201,6 +206,9 @@ void push_res(line_aln_res *la)
int i;
if (la->cur_res_n == la->res_m-1) {
la->res_m <<= 1;
#ifdef __DEBUG__
fprintf(stderr, "la->res_m: %d\n", la->res_m);
#endif
la->res = (res_t*)realloc(la->res, la->res_m * sizeof(res_t));
if (la->res == NULL) { fprintf(stderr, "[lamsa_res_split] Not enough memory.\n"); exit(0); }
for (i = la->cur_res_n+1; i < la->res_m; ++i) {
Expand All @@ -221,8 +229,8 @@ void merge_cigar(cigar32_t **c1, int *c1_n, int *c1_m, uint64_t *c1_refend, int
bntseq_t *bns, uint8_t *pac, uint8_t *read_bseq, lamsa_aln_para *AP)
{
if (c2_n == 0) return;
if (*c1_n > 1 && (((((*c1)[*c1_n-1] & 0xf) == CINS || ((*c1)[*c1_n-1] & 0xf) == CDEL) && (_c2[0] & 0xf) != CSOFT_CLIP && (_c2[0] & 0xf) != CHARD_CLIP)
|| (((_c2[0] & 0xf) == CINS || (_c2[0] & 0xf) == CDEL) && ((*c1)[*c1_n-1] & 0xf) != CSOFT_CLIP && ((*c1)[*c1_n-1] & 0xf) != CHARD_CLIP))) { // repair boundary
if (*c1_n > 1 && ((((((*c1)[*c1_n-1] & 0xf) == CINS || ((*c1)[*c1_n-1] & 0xf) == CDEL) && (((*c1)[*c1_n-1]>>4) <= 3)) && (_c2[0] & 0xf) != CSOFT_CLIP && (_c2[0] & 0xf) != CHARD_CLIP)
|| ((((_c2[0] & 0xf) == CINS || (_c2[0] & 0xf) == CDEL) && ((_c2[0]>>4) <= 3)) && ((*c1)[*c1_n-1] & 0xf) != CSOFT_CLIP && ((*c1)[*c1_n-1] & 0xf) != CHARD_CLIP))) { // repair boundary
/* seq1, ref */ /* seq2, read */
int len1, len11=0, len2, len21=0, len22=0, len_dif1=0, len_dif2=0;
int bd_cigar_len=0, b=0, min_b, ci=0; cigar32_t *bd_cigar = 0;
Expand Down Expand Up @@ -378,6 +386,9 @@ int frag_extend(frag_msg *f_msg, map_msg *m_msg, int f_i,
}
}
merge_cigar(&(la->res[la->cur_res_n].cigar), &(la->res[la->cur_res_n].cigar_len), &(la->res[la->cur_res_n].c_m), &(la->res[la->cur_res_n].refend), &(la->res[la->cur_res_n].readend), fa_msg->chr, fa_msg->cigar, fa_msg->cigar_len, fa_msg->cigar_ref_end-fa_msg->cigar_ref_start+1, fa_msg->cigar_read_end-fa_msg->cigar_read_start+1, bns, pac, read_bseq, AP);
#ifdef __DEBUG__
printcigar(stderr, la->res[la->cur_res_n].cigar, la->res[la->cur_res_n].cigar_len);
#endif
return 0;
}

Expand Down Expand Up @@ -455,6 +466,9 @@ void split_mapping(bntseq_t *bns, uint8_t *pac,
free(_cigar);
} else {
res = split_indel_map(&s_cigar, &s_clen, &s_cm, s_qseq, s_qlen, s_tseq, s_tlen, 0, AP, hash_num, hash_node);
#ifdef __DEBUG__
printcigar(stderr, s_cigar, s_clen);
#endif
}
} else if (dis < -match_dis) { //INS
s_tlen = s_qlen + dis;
Expand Down Expand Up @@ -520,8 +534,8 @@ void split_mapping(bntseq_t *bns, uint8_t *pac,
res = ksw_bi_extend(s_qlen, s_qseq, s_tlen, s_tseq, 5, AP->sc_mat, 100, 100, AP, &_cigar, &_cigar_n, &_cigar_m);
_push_cigar(&s_cigar, &s_clen, &s_cm, _cigar, _cigar_n);
#ifdef __DEBUG__
printcigar(stdout, _cigar, _cigar_n);
printf("\n");
printcigar(stderr, _cigar, _cigar_n);
fprintf(stderr, "\n");
#endif
free(_cigar);
}
Expand Down Expand Up @@ -924,5 +938,6 @@ void frag_check(map_msg *m_msg, frag_msg **f_msg, aln_res *a_res,
// filter line-aln-res in merged-lines, get opt-res OR multi-sub-opt-res
//res_filter(a_res);

frag_free_msg(*f_msg, line_n);
free(bseq1); free(bseq2);
}
8 changes: 4 additions & 4 deletions src/frag_check.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ typedef struct {
int flag;
int b_f;

int seed_num; //seed number of per frag
int seed_max, seed_num; //seed number of per frag
int *seed_i;
int *seed_aln_i;
} frag_aln_msg;
Expand All @@ -37,7 +37,6 @@ typedef struct {
int frag_max;
int frag_num;

int per_seed_max;
frag_aln_msg *fa_msg;
int line_score;
int frag_left_bound; //read_id of previous line's last seed, if NO pre line, this will be 0
Expand All @@ -47,7 +46,7 @@ typedef struct {
typedef struct {
uint64_t offset; //1-based
int chr;
int nstrand; //1:'+' 0:'-'
int8_t nstrand; //1:'+' 0:'-'
cigar32_t *cigar;
int c_m; int cigar_len;
uint64_t refend; int readend; // for merge_cigar
Expand Down Expand Up @@ -201,7 +200,8 @@ static inline int _push_cigar_e(cigar32_t **cigar, int *cigar_n, int *cigar_m, u
//int _push_cigar1(cigar32_t **cigar, int *cigar_len, int *cigar_m, cigar32_t _cigar);
//int _push_cigar0(cigar32_t **cigar, int *cigar_len, int *cigar_m, cigar32_t _cigar);
//void _invert_cigar(cigar32_t **cigar, int cigar_n);
void frag_init_msg(frag_msg *f_msg, int frag_max);
//void frag_init_msg(frag_msg *f_msg, int frag_max);
void frag_init_msg(frag_msg *f_msg);
void frag_free_msg(frag_msg *f_msg, int line_num);
int frag_set_msg(map_msg *m_msg, int seed_i, int aln_i, int FLAG, frag_msg *f_msg, int frag_i);//FLAG 0: start/1:end / 2:seed
int frag_copy_msg(frag_msg *ff_msg, frag_msg *tf_msg);
Expand Down
20 changes: 9 additions & 11 deletions src/gem_parse.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ void printcigar(FILE *outp, cigar32_t *cigar, int cigar_len)
}

void _push_cigar1(cigar32_t **cigar, int *cigar_n, int *cigar_m, cigar32_t _cigar) {
if (_cigar >> 4 == 0) return;
if ((_cigar >> 4) == 0) return;
int i;
i = *cigar_n;
if (((i-1) >=0) && (((*cigar)[i-1] & 0xf) == (_cigar & 0xf)))
Expand All @@ -28,6 +28,9 @@ void _push_cigar1(cigar32_t **cigar, int *cigar_n, int *cigar_m, cigar32_t _ciga
{
if (i == *cigar_m) {
(*cigar_m) <<= 1;
#ifdef __DEBUG__
fprintf(stderr, "[gem_parse]cigar_m: %d\n", *cigar_m);
#endif
(*cigar) = (cigar32_t*)realloc(*cigar, (*cigar_m) * sizeof (cigar32_t));
if ((*cigar) == NULL) {fprintf(stderr, "\n[frag_check] Memory is not enougy.\n");exit(1);}
}
Expand Down Expand Up @@ -233,14 +236,6 @@ int gem_map_msg(map_msg *m_msg, int max_n)
char chr[1024], strand, md[1024];
long long offset; char os[100];

//sscanf(gem_line, "%*[^\t]\t%*[^\t]\t%*[^\t]\t%[^\n]\n", aln_msg);
//if (strcmp(aln_msg, "-") == 0) {
//#ifdef __DEBUG__
//fprintf(stderr, "-\n");
//#endif
//goto RET;
//}

msg_len = strlen(aln_msg);
t = strtok(aln_msg, ",");
t_i = strlen(t)+1;
Expand All @@ -251,6 +246,9 @@ int gem_map_msg(map_msg *m_msg, int max_n)
}
if (m_msg->map_n == m_msg->map_m) {
m_msg->map_m <<= 1;
#ifdef __DEBUG__
fprintf(stderr, "[gem_parse]map_m: %d\n", m_msg->map_m);
#endif
m_msg->map = (map_t*)realloc(m_msg->map, m_msg->map_m * sizeof(map_t));
if (m_msg->map == NULL) {
fprintf(stderr, "[gem_map_read] Error: not enough memory.\n"); exit(0);
Expand All @@ -276,8 +274,8 @@ int gem_map_msg(map_msg *m_msg, int max_n)
printcigar(stderr, m_msg->map[m_msg->map_n].cigar->cigar, m_msg->map[m_msg->map_n].cigar->cigar_n);
fprintf(stderr, "\n");
#endif
if (strcmp(chr, "chrM")!=0)
++(m_msg->map_n);
///if (strcmp(chr, "chrM")!=0)
++(m_msg->map_n);
if (t_i >= msg_len) break;
t = strtok(aln_msg+t_i, ",");
if (t == NULL) break;
Expand Down
Loading

0 comments on commit 0b61fff

Please sign in to comment.