Skip to content

Commit 852845b

Browse files
committed
First commit
0 parents  commit 852845b

File tree

9 files changed

+1975
-0
lines changed

9 files changed

+1975
-0
lines changed

Diff for: README.md

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# imrep

Diff for: cast.py

+102
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
2+
3+
4+
from numpy import mean
5+
import networkx as nx
6+
from jellyfish import levenshtein_distance as edit_distance
7+
8+
9+
10+
11+
class Cast(object):
12+
13+
def __init__(self, raw_cdr3_dict):
14+
self.graph = nx.Graph()
15+
16+
for x, y in raw_cdr3_dict.items():
17+
self.graph.add_node(x, weight=int(y))
18+
19+
self._dist_dict = {}
20+
keys = raw_cdr3_dict.keys()
21+
22+
for i in range(len(keys)):
23+
for j in range(i, len(keys)):
24+
x, u = keys[i], keys[j]
25+
if x != u:
26+
w = 1.0/ int(edit_distance(unicode(x), unicode(u)))
27+
else:
28+
w = 2.0
29+
self._dist_dict[(x, u)] = w
30+
self._dist_dict[(u, x)] = w
31+
if w >= 0.5:
32+
self.graph.add_edge(x, u, weight=w)
33+
34+
35+
def __close(self, nodes, cluster, threshold):
36+
dists = []
37+
for node1 in nodes:
38+
d = []
39+
for node2 in cluster:
40+
d.append(self._dist_dict[(node1, node2)])
41+
mean_dist = mean(d)
42+
if mean_dist > threshold:
43+
dists.append((node1, mean_dist))
44+
if dists:
45+
return max(dists, key=lambda z: z[1])[0]
46+
return None
47+
48+
49+
50+
def __distant(self, cluster, threshold):
51+
dists = []
52+
for node1 in cluster:
53+
d = []
54+
for node2 in cluster:
55+
d.append(self._dist_dict[(node1, node2)])
56+
mean_dist = mean(d)
57+
if mean_dist < threshold:
58+
dists.append((node1, mean_dist))
59+
if dists:
60+
return min(dists, key=lambda z: z[1])[0]
61+
return None
62+
63+
64+
65+
def __cast(self, nodes, threshold):
66+
partition = []
67+
while nodes:
68+
degrees = [(node, self.graph.degree(node)) for node in self.graph.nodes()]
69+
max_deg_vert = max(degrees, key=lambda z: z[1])[0]
70+
cluster = set([max_deg_vert])
71+
nodes.remove(max_deg_vert)
72+
cl = self.__close(nodes, cluster, threshold)
73+
dist = self.__distant(cluster, threshold)
74+
while cl or dist:
75+
if cl:
76+
cluster.add(cl)
77+
nodes.remove(cl)
78+
if dist:
79+
cluster.remove(dist)
80+
nodes.add(dist)
81+
cl = self.__close(nodes, cluster, threshold)
82+
dist = self.__distant(cluster, threshold)
83+
new_cluster = []
84+
for node in cluster:
85+
new_cluster.append((node, self.graph.node[node]["weight"]))
86+
partition.append(new_cluster)
87+
for vert in cluster:
88+
self.graph.remove_node(vert)
89+
return partition
90+
91+
92+
def doCast(self, threshold):
93+
nodes = set(self.graph.nodes())
94+
partition = self.__cast(nodes, threshold)
95+
cdr3s = []
96+
for part in partition:
97+
cdr3s.append(max(part, key=lambda z: z[1])[0])
98+
return cdr3s
99+
100+
101+
102+

Diff for: db/IGHJ.faa

+26
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
>J00256|IGHJ1*01|Homo sapiens|F|J-REGION|723..774|52 nt|1| | | |17 AA|17+0=17| | |
2+
AEYFQHWGQGTLVTVSS
3+
>J00256|IGHJ2*01|Homo sapiens|F|J-REGION|932..984|53 nt|2| | | |17 AA|17+0=17| | |
4+
YWYFDLWGRGTLVTVSS
5+
>J00256|IGHJ3*01|Homo sapiens|F|J-REGION|1537..1586|50 nt|2| | | |16 AA|16+0=16| | |
6+
DAFDVWGQGTMVTVSS
7+
>X86355|IGHJ3*02|Homo sapiens|F|J-REGION|1107..1156|50 nt|2| | | |16 AA|16+0=16| | |
8+
DAFDIWGQGTMVTVSS
9+
>J00256|IGHJ4*01|Homo sapiens|F|J-REGION|1912..1959|48 nt|3| | | |15 AA|15+0=15| | |
10+
YFDYWGQGTLVTVSS
11+
>X86355|IGHJ4*02|Homo sapiens|F|J-REGION|1480..1527|48 nt|3| | | |15 AA|15+0=15| | |
12+
YFDYWGQGTLVTVSS
13+
>M25625|IGHJ4*03|Homo sapiens|F|J-REGION|446..493|48 nt|3| | | |15 AA|15+0=15| | |
14+
YFDYWGQGTLVTVSS
15+
>J00256|IGHJ5*01|Homo sapiens|F|J-REGION|2354..2404|51 nt|3| | | |16 AA|16+0=16| | |
16+
NWFDSWGQGTLVTVSS
17+
>X86355|IGHJ5*02|Homo sapiens|F|J-REGION|1878..1928|51 nt|3| | | |16 AA|16+0=16| | |
18+
NWFDPWGQGTLVTVSS
19+
>J00256|IGHJ6*01|Homo sapiens|F|J-REGION|2947..3009|63 nt|3| | | |20 AA|20+0=20| | |
20+
YYYYYGMDVWGQGTTVTVSS
21+
>X86355|IGHJ6*02|Homo sapiens|F|J-REGION|2482..2543|62 nt|3| | | |20 AA|20+0=20| | |
22+
YYYYYGMDVWGQGTTVTVSS
23+
>X86356|IGHJ6*03|Homo sapiens|F|J-REGION|2482..2543|62 nt|3| | | |20 AA|20+0=20| | |
24+
YYYYYYMDVWGKGTTVTVSS
25+
>AJ879487|IGHJ6*04|Homo sapiens|F|J-REGION|39..101|63 nt|3| | | |20 AA|20+0=20| | |
26+
YYYYYGMDVWGKGTTVTVSS

Diff for: db/IGHV.faa

+1,050
Large diffs are not rendered by default.

Diff for: db/TRAJ.faa

+136
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
J1*01|Homo sapiens|ORF|J-REGION|2363..2424|62 nt|2| | | |20 AA|20+0=20| | |
2+
YESITSQLQFGKGTRVSTSP
3+
>M94081|TRAJ10*01|Homo sapiens|F|J-REGION|73699..73762|64 nt|1| | | |21 AA|21+0=21| | |
4+
ILTGGGNKLTFGTGTQLKVEL
5+
>M94081|TRAJ11*01|Homo sapiens|F|J-REGION|72706..72765|60 nt|3| | | |19 AA|19+0=19| | |
6+
NSGYSTLTFGKGTMLLVSP
7+
>X02885|TRAJ12*01|Homo sapiens|F|J-REGION|53..112|60 nt|3| | | |19 AA|19+0=19| | |
8+
MDSSYKLIFGSGTRLLVRP
9+
>M94081|TRAJ13*01|Homo sapiens|F|J-REGION|71280..71342|63 nt|3| | | |20 AA|20+0=20| | |
10+
NSGGYQKVTFGIGTKLQVIP
11+
>AC023226|TRAJ13*02|Homo sapiens|F|J-REGION|51292..51354|63 nt|3| | | |20 AA|20+0=20| |rev-compl|
12+
NSGGYQKVTFGTGTKLQVIP
13+
>M94081|TRAJ14*01|Homo sapiens|F|J-REGION|70532..70583|52 nt|1| | | |17 AA|17+0=17| | |
14+
IYSTFIFGSGTRLSVKP
15+
>X05775|TRAJ15*01|Homo sapiens|F|J-REGION|80..139|60 nt|3| | | |19 AA|19+0=19| | |
16+
NQAGTALIFGKGTTLSVSS
17+
>M94081|TRAJ15*02|Homo sapiens|F|J-REGION|69836..69895|60 nt|3| | | |19 AA|19+0=19| | |
18+
NQAGTALIFGKGTHLSVSS
19+
>M94081|TRAJ16*01|Homo sapiens|F|J-REGION|68743..68802|60 nt|3| | | |19 AA|19+0=19| | |
20+
FSDGQKLLFARGTMLKVDL
21+
>X05773|TRAJ17*01|Homo sapiens|F|J-REGION|173..235|63 nt|3| | | |20 AA|20+0=20| | |
22+
IKAAGNKLTFGGGTRVLVKP
23+
>M94081|TRAJ18*01|Homo sapiens|F|J-REGION|65876..65941|66 nt|3| | | |21 AA|21+0=21| | |
24+
DRGSTLGRLYFGRGTQLTVWP
25+
>M94081|TRAJ19*01|Homo sapiens|ORF|J-REGION|65489..65548|60 nt|3| | | |19 AA|19+0=19| | |
26+
YQRFYNFTFGKGSKHNVTP
27+
>X02884|TRAJ2*01|Homo sapiens|ORF|J-REGION|1397..1462|66 nt|3| | | |21 AA|21+0=21| | |
28+
NTGGTIDKLTFGKGTHVFIIS
29+
>M94081|TRAJ20*01|Homo sapiens|F|J-REGION|64552..64608|57 nt|3| | | |18 AA|18+0=18| | |
30+
SNDYKLSFGAGTTVTVRA
31+
>M94081|TRAJ21*01|Homo sapiens|F|J-REGION|63829..63883|55 nt|1| | | |18 AA|18+0=18| | |
32+
YNFNKFYFGSGTKLNVKP
33+
>X02886|TRAJ22*01|Homo sapiens|F|J-REGION|405..467|63 nt|3| | | |20 AA|20+0=20| | |
34+
SSGSARQLTFGSGTQLTVLP
35+
>M94081|TRAJ23*01|Homo sapiens|F|J-REGION|60650..60712|63 nt|3| | | |20 AA|20+0=20| | |
36+
IYNQGGKLIFGQGTELSVKP
37+
>X58763|TRAJ23*02|Homo sapiens|(F)|J-REGION|61..123|63 nt|3| | | |20 AA|20+0=20| | |
38+
IYNQGGKLIFGQGTELSVKP
39+
>X02887|TRAJ24*01|Homo sapiens|F|J-REGION|124..186|63 nt|3| | | |20 AA|20+0=20| | |
40+
TTDSWGKFEFGAGTQVVVTP
41+
>M94081|TRAJ24*02|Homo sapiens|F|J-REGION|60203..60265|63 nt|3| | | |20 AA|20+0=20| | |
42+
TTDSWGKLQFGAGTQVVVTP
43+
>X02888|TRAJ25*01|Homo sapiens|ORF|J-REGION|35..94|60 nt|3| | | |19 AA|19+0=19| | |
44+
EGQGFSFIFGKGTRLLVKP
45+
>M94081|TRAJ26*01|Homo sapiens|F|J-REGION|58680..58739|60 nt|3| | | |19 AA|19+0=19| | |
46+
DNYGQNFVFGPGTRLSVLP
47+
>M94081|TRAJ27*01|Homo sapiens|F|J-REGION|56507..56565|59 nt|2| | | |19 AA|19+0=19| | |
48+
NTNAGKSTFGDGTTLTVKP
49+
>M94081|TRAJ28*01|Homo sapiens|F|J-REGION|55857..55922|66 nt|3| | | |21 AA|21+0=21| | |
50+
YSGAGSYQLTFGKGTKLSVIP
51+
>M94081|TRAJ29*01|Homo sapiens|F|J-REGION|54177..54236|60 nt|3| | | |19 AA|19+0=19| | |
52+
NSGNTPLVFGKGTRLSVIA
53+
>X02884|TRAJ3*01|Homo sapiens|F|J-REGION|504..565|62 nt|2| | | |20 AA|20+0=20| | |
54+
GYSSASKIIFGSGTRLSIRP
55+
>M94081|TRAJ30*01|Homo sapiens|F|J-REGION|53090..53146|57 nt|3| | | |18 AA|18+0=18| | |
56+
NRDDKIIFGKGTRLHILP
57+
>M14905|TRAJ31*01|Homo sapiens|F|J-REGION|77..132|56 nt|2| | | |18 AA|18+0=18| | |
58+
NNNARLMFGDGTQLVVKP
59+
>M94081|TRAJ32*01|Homo sapiens|F|J-REGION|49581..49646|66 nt|3| | | |21 AA|21+0=21| | |
60+
NYGGATNKLIFGTGTLLAVQP
61+
>AF532854|TRAJ32*02|Homo sapiens|(F)|J-REGION|240..299,a|60 nt|3| |1| |19 AA|19+0=19|partial in 5'| |
62+
GGATNKLIFGTGTLLAVQP
63+
>M94081|TRAJ33*01|Homo sapiens|F|J-REGION|48843..48899|57 nt|3| | | |18 AA|18+0=18| | |
64+
DSNYQLIWGAGTKLIIKP
65+
>M35622|TRAJ34*01|Homo sapiens|F|J-REGION|30..87|58 nt|1| | | |19 AA|19+0=19| | |
66+
SYNTDKLIFGTGTRLQVFP
67+
>M94081|TRAJ35*01|Homo sapiens|ORF|J-REGION|46885..46943|59 nt|2| | | |19 AA|19+0=19| | |
68+
IGFGNVLHCGSGTQVIVLP
69+
>M94081|TRAJ36*01|Homo sapiens|F|J-REGION|45353..45411|59 nt|2| | | |19 AA|19+0=19| | |
70+
QTGANNLFFGTGTRLTVIP
71+
>M94081|TRAJ37*01|Homo sapiens|F|J-REGION|43993..44054|62 nt|2| | | |20 AA|20+0=20| | |
72+
GSGNTGKLIFGQGTTLQVKP
73+
>AJ007774|TRAJ37*02|Homo sapiens|(F)|J-REGION|336..397,g|62 nt|2| |1| |20 AA|20+0=20| | |
74+
GSSNTGKLIFGQGTTLQVKP
75+
>M94081|TRAJ38*01|Homo sapiens|F|J-REGION|42473..42534|62 nt|2| | | |20 AA|20+0=20| | |
76+
NAGNNRKLIWGLGTSLAVNP
77+
>M94081|TRAJ39*01|Homo sapiens|F|J-REGION|41843..41905|63 nt|3| | | |20 AA|20+0=20| | |
78+
NNNAGNMLTFGGGTRLMVKP
79+
>M94081|TRAJ4*01|Homo sapiens|F|J-REGION|82396..82458|63 nt|3| | | |20 AA|20+0=20| | |
80+
FSGGYNKLIFGAGTRLAVHP
81+
>M35620|TRAJ40*01|Homo sapiens|F|J-REGION|35..95|61 nt|1| | | |20 AA|20+0=20| | |
82+
TTSGTYKYIFGTGTRLKVLA
83+
>M94081|TRAJ41*01|Homo sapiens|F|J-REGION|37900..37961|62 nt|2| | | |20 AA|20+0=20| | |
84+
NSNSGYALNFGKGTSLLVTP
85+
>M94081|TRAJ42*01|Homo sapiens|F|J-REGION|37130..37195|66 nt|3| | | |21 AA|21+0=21| | |
86+
NYGGSQGNLIFGKGTKLSVKP
87+
>M94081|TRAJ43*01|Homo sapiens|F|J-REGION|36154..36207|54 nt|3| | | |17 AA|17+0=17| | |
88+
NNNDMRFGAGTRLTVKP
89+
>M35619|TRAJ44*01|Homo sapiens|F|J-REGION|30..92|63 nt|3| | | |20 AA|20+0=20| | |
90+
NTGTASKLTFGTGTRLQVTL
91+
>M94081|TRAJ45*01|Homo sapiens|F|J-REGION|34169..34234|66 nt|3| | | |21 AA|21+0=21| | |
92+
YSGGGADGLTFGKGTHLIIQP
93+
>M94081|TRAJ46*01|Homo sapiens|F|J-REGION|33647..33709|63 nt|3| | | |20 AA|20+0=20| | |
94+
KKSSGDKLTFGTGTRLAVRP
95+
>M94081|TRAJ47*01|Homo sapiens|F|J-REGION|33096..33152|57 nt|3| | | |18 AA|18+0=18| | |
96+
EYGNKLVFGAGTILRVKS
97+
>AF033825|TRAJ47*02|Homo sapiens|(F)|J-REGION|312..368,t|57 nt|3| |1| |18 AA|18+0=18| | |
98+
EYGNKLVFGAGTILRVKS
99+
>M94081|TRAJ48*01|Homo sapiens|F|J-REGION|30737..30799|63 nt|3| | | |20 AA|20+0=20| | |
100+
SNFGNEKLTFGTGTRLTIIP
101+
>M94081|TRAJ49*01|Homo sapiens|F|J-REGION|29734..29789|56 nt|2| | | |18 AA|18+0=18| | |
102+
NTGNQFYFGTGTSLTVIP
103+
>M94081|TRAJ5*01|Homo sapiens|F|J-REGION|80444..80503|60 nt|3| | | |19 AA|19+0=19| | |
104+
DTGRRALTFGSGTRLQVQP
105+
>M94081|TRAJ50*01|Homo sapiens|F|J-REGION|28839..28898|60 nt|3| | | |19 AA|19+0=19| | |
106+
KTSYDKVIFGPGTSLSVIP
107+
>M94081|TRAJ51*01|Homo sapiens|P|J-REGION|27429..27491|63 nt|3| | | |20 AA|20+0=20| | |
108+
MRDSYEKLIFGKET*LTVKP
109+
>M94081|TRAJ52*01|Homo sapiens|F|J-REGION|26474..26542|69 nt|3| | | |22 AA|22+0=22| | |
110+
NAGGTSYGKLTFGQGTILTVHP
111+
>M94081|TRAJ53*01|Homo sapiens|F|J-REGION|23251..23316|66 nt|3| | | |21 AA|21+0=21| | |
112+
NSGGSNYKLTFGKGTLLTVNP
113+
>M94081|TRAJ54*01|Homo sapiens|F|J-REGION|22534..22593|60 nt|3| | | |19 AA|19+0=19| | |
114+
IQGAQKLVFGQGTRLTINP
115+
>M94081|TRAJ55*01|Homo sapiens|P|J-REGION|21944..22000|57 nt|3| | | |18 AA|18+0=18| | |
116+
KCW*CSCWGKGMSTKINP
117+
>M94081|TRAJ56*01|Homo sapiens|F|J-REGION|19768..19829|62 nt|2| | | |20 AA|20+0=20| | |
118+
YTGANSKLTFGKGITLSVRP
119+
>M94081|TRAJ57*01|Homo sapiens|F|J-REGION|19119..19181|63 nt|3| | | |20 AA|20+0=20| | |
120+
TQGGSEKLVFGKGTKLTVNP
121+
>M94081|TRAJ58*01|Homo sapiens|ORF|J-REGION|17954..18016|63 nt|3| | | |20 AA|20+0=20| | |
122+
*ETSGSRLTFGEGTQLTVNP
123+
>M94081|TRAJ59*01|Homo sapiens|ORF|J-REGION|16801..16854|54 nt|3| | | |17 AA|17+0=17| | |
124+
KEGNRKFTFGMGTQVRV
125+
>M16747|TRAJ6*01|Homo sapiens|F|J-REGION|31..92|62 nt|2| | | |20 AA|20+0=20| | |
126+
ASGGSYIPTFGRGTSLIVHP
127+
>M94081|TRAJ60*01|Homo sapiens|P|J-REGION|16554..16610|57 nt|3| | | |18 AA|18+0=18| | |
128+
KIT*MLNFGKGTELIVSL
129+
>M94081|TRAJ61*01|Homo sapiens|ORF|J-REGION|15564..15623|60 nt|3| | | |19 AA|19+0=19| | |
130+
YRVNRKLTFGANTRGIMKL
131+
>M94081|TRAJ7*01|Homo sapiens|F|J-REGION|77821..77879|59 nt|2| | | |19 AA|19+0=19| | |
132+
DYGNNRLAFGKGNQVVVIP
133+
>M94081|TRAJ8*01|Homo sapiens|F|J-REGION|76346..76405|60 nt|3| | | |19 AA|19+0=19| | |
134+
NTGFQKLVFGTGTRLLVSP
135+
>M94081|TRAJ9*01|Homo sapiens|F|J-REGION|75756..75816|61 nt|1| | | |20 AA|20+0=20| | |
136+
GNTGGFKTIFGAGTRLFVKA

0 commit comments

Comments
 (0)