4
4
from api .models .annotations_lookup import AgiAlias
5
5
from api .models .eplant2 import Isoforms as EPlant2Isoforms
6
6
from api .models .eplant2 import Publications as EPlant2Publications
7
+ from api .models .eplant2 import TAIR10GFF3 as EPlant2TAIR10GFF3
8
+ from api .models .eplant2 import AgiAlias as EPlant2AgiAlias
9
+ from api .models .eplant2 import AgiAnnotation as EPlant2AgiAnnotation
7
10
from api .models .eplant_poplar import Isoforms as EPlantPoplarIsoforms
8
11
from api .models .eplant_tomato import Isoforms as EPlantTomatoIsoforms
9
12
from api .models .eplant_soybean import Isoforms as EPlantSoybeanIsoforms
14
17
15
18
gene_information = Namespace ("Gene Information" , description = "Information about Genes" , path = "/gene_information" )
16
19
20
+ parser = gene_information .parser ()
21
+ parser .add_argument (
22
+ "terms" ,
23
+ type = list ,
24
+ action = "append" ,
25
+ required = True ,
26
+ help = "Gene IDs, format example: AT1G01010" ,
27
+ default = ["AT1G01020" , "AT1G01030" ],
28
+ )
29
+
17
30
# I think this is only needed for Swagger UI POST
18
31
gene_information_request_fields = gene_information .model (
19
32
"GeneInformation" ,
27
40
},
28
41
)
29
42
43
+ query_genes_request_fields = gene_information .model (
44
+ "GeneInformation" ,
45
+ {
46
+ "species" : fields .String (required = True , example = "arabidopsis" ),
47
+ "terms" : fields .List (
48
+ required = True ,
49
+ example = ["AT1G01010" , "AT1G01020" ],
50
+ cls_or_instance = fields .String ,
51
+ ),
52
+ },
53
+ )
54
+
30
55
31
56
# Validation is done in a different way to keep things simple
32
57
class GeneInformationSchema (Schema ):
@@ -99,16 +124,13 @@ def get(self, species="", gene_id=""):
99
124
species = escape (species )
100
125
gene_id = escape (gene_id )
101
126
102
- # truncate gene ID
103
- for i in range (len (gene_id )):
104
- if gene_id [i ] == "." :
105
- gene_id = gene_id [0 :i ]
106
- break
107
-
108
127
# Set the database and check if genes are valid
109
128
if species == "arabidopsis" :
110
129
database = EPlant2Publications
111
130
131
+ # Remove Arabidopsis isoforms
132
+ gene_id = gene_id .split ("." )[0 ]
133
+
112
134
if not BARUtils .is_arabidopsis_gene_valid (gene_id ):
113
135
return BARUtils .error_exit ("Invalid gene id" ), 400
114
136
else :
@@ -135,6 +157,277 @@ def get(self, species="", gene_id=""):
135
157
return BARUtils .error_exit ("There are no data found for the given gene" )
136
158
137
159
160
+ @gene_information .route (
161
+ "/genes_by_position/<string:species>/<string:chromosome>/<string:start_param>/<string:end_param>"
162
+ )
163
+ class GeneTair10Gff3 (Resource ):
164
+ @gene_information .param ("species" , _in = "path" , default = "arabidopsis" )
165
+ @gene_information .param ("chromosome" , _in = "path" , default = "1" )
166
+ @gene_information .param ("start_param" , _in = "path" , default = 3000 )
167
+ @gene_information .param ("end_param" , _in = "path" , default = 6000 )
168
+ def get (self , species = "" , chromosome = "" , start_param = "" , end_param = "" ):
169
+ """This end point provides genes given position."""
170
+
171
+ # Check if all parameters are provided
172
+ if not chromosome or not start_param or not end_param :
173
+ return BARUtils .error_exit ("Missing parameters" ), 400
174
+
175
+ # Check if the start param is smaller than end param
176
+ if start_param >= end_param :
177
+ return BARUtils .error_exit ("Start location should be smaller than the end location" ), 400
178
+
179
+ # Check if both parameters are valid figures
180
+ if not BARUtils .is_integer (start_param ) or not BARUtils .is_integer (end_param ):
181
+ return BARUtils .error_exit ("At lease one parameter is not valid" )
182
+
183
+ # Escape input
184
+ species = escape (species )
185
+ chromosome = escape (chromosome )
186
+ start_param = escape (start_param )
187
+ end_param = escape (end_param )
188
+
189
+ # Set database
190
+ if species == "arabidopsis" :
191
+ gff3_database = EPlant2TAIR10GFF3
192
+ alias_database = EPlant2AgiAlias
193
+ annotation_database = EPlant2AgiAnnotation
194
+
195
+ if chromosome not in ["1" , "2" , "3" , "4" , "5" , "C" , "M" ]:
196
+ return BARUtils .error_exit ("Invalid chromosome" ), 400
197
+
198
+ # Arabidopsis Gene format
199
+ gene_id = "AT" + str (chromosome ) + "G"
200
+ else :
201
+ return BARUtils .error_exit ("No data for the given species" ), 400
202
+
203
+ # Construct the query
204
+
205
+ query1 = db .select (gff3_database .geneId , gff3_database .Start , gff3_database .End , gff3_database .Strand ).where (
206
+ gff3_database .Type == "gene" ,
207
+ gff3_database .geneId .startswith (gene_id ),
208
+ (
209
+ gff3_database .Start .between (start_param , end_param )
210
+ | gff3_database .End .between (start_param , end_param )
211
+ | ((gff3_database .Start < start_param ) & (gff3_database .End > end_param ))
212
+ ),
213
+ )
214
+ result1 = db .session .execute (query1 ).all ()
215
+ gene_ids = [row [0 ] for row in result1 ]
216
+
217
+ # Get aliases
218
+ query2 = db .select (alias_database .agi , alias_database .alias ).where (alias_database .agi .in_ (gene_ids ))
219
+ result2 = db .session .execute (query2 ).all ()
220
+ all_aliases = {}
221
+ for row in result2 :
222
+ if row [0 ] not in all_aliases :
223
+ all_aliases [row [0 ]] = []
224
+ all_aliases [row [0 ]].append (row [1 ])
225
+
226
+ # Get annotation
227
+ query3 = db .select (annotation_database .agi , annotation_database .annotation ).where (
228
+ annotation_database .agi .in_ (gene_ids )
229
+ )
230
+ result3 = db .session .execute (query3 ).all ()
231
+ all_annotations = {}
232
+ for row in result3 :
233
+ temp = row [1 ].split ("__" )
234
+ if len (temp ) > 1 :
235
+ all_annotations [row [0 ].upper ()] = temp [1 ]
236
+ else :
237
+ all_annotations [row [0 ].upper ()] = temp [0 ]
238
+
239
+ genes = []
240
+ for row in result1 :
241
+ gene = {
242
+ "id" : row [0 ],
243
+ "start" : row [1 ],
244
+ "end" : row [2 ],
245
+ "strand" : row [3 ],
246
+ "aliases" : all_aliases .get (row [0 ], []),
247
+ "annotation" : all_annotations .get (row [0 ], None ),
248
+ }
249
+
250
+ genes .append (gene )
251
+
252
+ return BARUtils .success_exit (genes )
253
+
254
+
255
+ @gene_information .route ("/gene_query" )
256
+ class GeneQueryGene (Resource ):
257
+ @gene_information .expect (query_genes_request_fields )
258
+ def post (self ):
259
+ """This end point provides gene information for multiple genes given multiple terms."""
260
+
261
+ # Escape input
262
+ data = request .get_json ()
263
+ species = data ["species" ]
264
+ terms = data ["terms" ]
265
+ for one_term in terms :
266
+ one_term .upper ()
267
+
268
+ # Species check
269
+ if species == "arabidopsis" :
270
+ # Term check
271
+ for one_term in terms :
272
+ if not BARUtils .is_arabidopsis_gene_valid (one_term ):
273
+ return BARUtils .error_exit ("Input list contains invalid term" ), 400
274
+
275
+ alias_database = EPlant2AgiAlias
276
+ gff3_database = EPlant2TAIR10GFF3
277
+ annotation_database = EPlant2AgiAnnotation
278
+ else :
279
+ return BARUtils .error_exit ("No data for the given species" ), 400
280
+
281
+ gene_ids = []
282
+ gene_fail = []
283
+ for one_term in terms :
284
+ query = db .select (alias_database .agi ).where (alias_database .agi .contains (one_term )).limit (1 )
285
+ result = db .session .execute (query ).fetchone ()
286
+ if result is not None :
287
+ gene_ids .append (result [0 ])
288
+ else :
289
+ gene_fail .append (one_term )
290
+
291
+ # For terms that do not have results
292
+ for fail_term in gene_fail :
293
+ query = (
294
+ db .select (gff3_database .geneId )
295
+ .where (
296
+ ((gff3_database .Type == "gene" ) | (gff3_database .Type == "transposable_element_gene" )),
297
+ gff3_database .geneId .contains (fail_term ),
298
+ )
299
+ .limit (1 )
300
+ )
301
+ result = db .session .execute (query ).fetchone ()
302
+ if result :
303
+ gene_ids .append (result [0 ])
304
+
305
+ # Find information for each term
306
+ query = db .select (gff3_database .geneId , gff3_database .Start , gff3_database .End , gff3_database .Strand ).where (
307
+ ((gff3_database .Type == "gene" ) | (gff3_database .Type == "transposable_element_gene" )),
308
+ gff3_database .Source == "TAIR10" ,
309
+ gff3_database .geneId .in_ (gene_ids ),
310
+ )
311
+ result = db .session .execute (query ).all ()
312
+ genes_info = {}
313
+ for row in result :
314
+ if row [0 ] not in genes_info :
315
+ gene = {
316
+ "id" : row [0 ],
317
+ "chromosome" : "Chr" + row [0 ][2 :3 ],
318
+ "start" : row [1 ],
319
+ "end" : row [2 ],
320
+ "strand" : row [3 ],
321
+ "aliases" : [],
322
+ "annotation" : None ,
323
+ }
324
+ genes_info [row [0 ]] = gene
325
+
326
+ # Get aliases
327
+ query = db .select (alias_database .agi , alias_database .alias ).where (alias_database .agi .in_ (gene_ids ))
328
+ result = db .session .execute (query ).all ()
329
+ for row in result :
330
+ if row [0 ] in genes_info :
331
+ genes_info [row [0 ]]["aliases" ].append (row [1 ])
332
+
333
+ # Get annotations
334
+ query = db .select (annotation_database .agi , annotation_database .annotation ).where (
335
+ annotation_database .agi .in_ (gene_ids )
336
+ )
337
+ result = db .session .execute (query )
338
+ for row in result :
339
+ if row [0 ].upper () in genes_info :
340
+ temp = row [1 ].split ("__" )
341
+ if len (temp ) > 1 :
342
+ genes_info [row [0 ].upper ()]["annotation" ] = temp [1 ]
343
+ else :
344
+ genes_info [row [0 ].upper ()]["annotation" ] = temp [0 ]
345
+
346
+ return BARUtils .success_exit (genes_info )
347
+
348
+
349
+ @gene_information .route ("/single_gene_query/<string:species>/<string:term>" )
350
+ class SingleGeneQueryGene (Resource ):
351
+ @gene_information .param ("species" , _in = "path" , default = "arabidopsis" )
352
+ @gene_information .param ("term" , _in = "path" , default = "AT1G01010" )
353
+ def get (self , species = "" , term = "" ):
354
+ """This end point provides gene information for a single gene given one term."""
355
+
356
+ # Escape input
357
+ species = escape (species )
358
+ term = escape (term ).upper ()
359
+
360
+ # Species check
361
+ if species == "arabidopsis" :
362
+ alias_database = EPlant2AgiAlias
363
+ gff3_database = EPlant2TAIR10GFF3
364
+ annotation_database = EPlant2AgiAnnotation
365
+
366
+ # Term check
367
+ if not BARUtils .is_arabidopsis_gene_valid (term ):
368
+ return BARUtils .error_exit ("Input term invalid" ), 400
369
+ else :
370
+ return BARUtils .error_exit ("No data for the given species" ), 400
371
+
372
+ query = db .select (alias_database .agi ).where (alias_database .agi == term ).limit (1 )
373
+ result = db .session .execute (query ).fetchone ()
374
+
375
+ if not result :
376
+ query = (
377
+ db .select (gff3_database .geneId )
378
+ .where (
379
+ ((gff3_database .Type == "gene" ) | (gff3_database .Type == "transposable_element_gene" )),
380
+ gff3_database .geneId == term ,
381
+ )
382
+ .limit (1 )
383
+ )
384
+ result = db .session .execute (query ).fetchone ()
385
+
386
+ genes_info = {}
387
+ if result :
388
+ # Find information for the term
389
+ query = db .select (gff3_database .geneId , gff3_database .Start , gff3_database .End , gff3_database .Strand ).where (
390
+ ((gff3_database .Type == "gene" ) | (gff3_database .Type == "transposable_element_gene" )),
391
+ gff3_database .Source == "TAIR10" ,
392
+ gff3_database .geneId == term ,
393
+ )
394
+ result = db .session .execute (query ).fetchone ()
395
+
396
+ # This Arabidopsis specific.
397
+ gene = {
398
+ "id" : result [0 ],
399
+ "chromosome" : "Chr" + result [0 ][2 :3 ],
400
+ "start" : result [1 ],
401
+ "end" : result [2 ],
402
+ "strand" : result [3 ],
403
+ "aliases" : [],
404
+ "annotation" : None ,
405
+ }
406
+
407
+ genes_info [result [0 ]] = gene
408
+
409
+ # Get aliases
410
+ query = db .select (alias_database .agi , alias_database .alias ).where (alias_database .agi == term )
411
+ result = db .session .execute (query ).all ()
412
+ for row in result :
413
+ if row [1 ] not in gene ["aliases" ]:
414
+ gene ["aliases" ].append (row [1 ])
415
+
416
+ # Get annotations
417
+ query = db .select (annotation_database .agi , annotation_database .annotation ).where (
418
+ annotation_database .agi == term
419
+ )
420
+ result = db .session .execute (query ).all ()
421
+ for row in result :
422
+ temp = row [1 ].split ("__" )
423
+ if len (temp ) > 1 :
424
+ gene ["annotation" ] = temp [1 ]
425
+ else :
426
+ gene ["annotation" ] = temp [0 ]
427
+
428
+ return BARUtils .success_exit (genes_info )
429
+
430
+
138
431
@gene_information .route ("/gene_isoforms/<string:species>/<string:gene_id>" )
139
432
class GeneIsoforms (Resource ):
140
433
@gene_information .param ("species" , _in = "path" , default = "arabidopsis" )
0 commit comments