forked from lh3/kmer-cnt
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathkc-py1.py
executable file
·46 lines (40 loc) · 945 Bytes
/
kc-py1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#!/usr/bin/env python
import sys
base_for = "ACGT"
base_rev = "TGCA"
comp_tab = str.maketrans(base_for, base_rev)
def count_kmer(h, k, seq):
l = len(seq)
if l < k: return
for i in range(l - k + 1):
kmer_for = seq[i:(i+k)]
if 'N' in kmer_for: continue
kmer_rev = kmer_for.translate(comp_tab)[::-1]
if kmer_for < kmer_rev: kmer = kmer_for
else: kmer = kmer_rev
if kmer in h:
h[kmer] += 1
else: h[kmer] = 1
def count_stdin(k):
counter = {}
seq = []
for line in sys.stdin:
if line[0] == '>':
if len(seq) > 0:
count_kmer(counter, k, ''.join(seq))
seq = []
else:
seq.append(line[:-1])
if len(seq) > 0:
count_kmer(counter, k, ''.join(seq).upper())
return counter
def print_hist(counter):
hist = [0] * 256
for kmer in counter:
cnt = counter[kmer]
if cnt > 255: cnt = 255
hist[cnt] += 1
for i in range(1, 256):
print("{}\t{}".format(i, hist[i]))
counter = count_stdin(31)
print_hist(counter)