44from api .models .annotations_lookup import AgiAlias
55from api .models .eplant2 import Isoforms as EPlant2Isoforms
66from api .models .eplant2 import Publications as EPlant2Publications
7+ from api .models .eplant2 import TAIR10GFF3 as EPlant2TAIR10GFF3
8+ from api .models .eplant2 import AgiAlias as EPlant2AgiAlias
9+ from api .models .eplant2 import AgiAnnotation as EPlant2AgiAnnotation
710from api .models .eplant_poplar import Isoforms as EPlantPoplarIsoforms
811from api .models .eplant_tomato import Isoforms as EPlantTomatoIsoforms
912from api .models .eplant_soybean import Isoforms as EPlantSoybeanIsoforms
1417
1518gene_information = Namespace ("Gene Information" , description = "Information about Genes" , path = "/gene_information" )
1619
20+ parser = gene_information .parser ()
21+ parser .add_argument (
22+ "terms" ,
23+ type = list ,
24+ action = "append" ,
25+ required = True ,
26+ help = "Gene IDs, format example: AT1G01010" ,
27+ default = ["AT1G01020" , "AT1G01030" ],
28+ )
29+
1730# I think this is only needed for Swagger UI POST
1831gene_information_request_fields = gene_information .model (
1932 "GeneInformation" ,
2740 },
2841)
2942
43+ query_genes_request_fields = gene_information .model (
44+ "GeneInformation" ,
45+ {
46+ "species" : fields .String (required = True , example = "arabidopsis" ),
47+ "terms" : fields .List (
48+ required = True ,
49+ example = ["AT1G01010" , "AT1G01020" ],
50+ cls_or_instance = fields .String ,
51+ ),
52+ },
53+ )
54+
3055
3156# Validation is done in a different way to keep things simple
3257class GeneInformationSchema (Schema ):
@@ -99,16 +124,13 @@ def get(self, species="", gene_id=""):
99124 species = escape (species )
100125 gene_id = escape (gene_id )
101126
102- # truncate gene ID
103- for i in range (len (gene_id )):
104- if gene_id [i ] == "." :
105- gene_id = gene_id [0 :i ]
106- break
107-
108127 # Set the database and check if genes are valid
109128 if species == "arabidopsis" :
110129 database = EPlant2Publications
111130
131+ # Remove Arabidopsis isoforms
132+ gene_id = gene_id .split ("." )[0 ]
133+
112134 if not BARUtils .is_arabidopsis_gene_valid (gene_id ):
113135 return BARUtils .error_exit ("Invalid gene id" ), 400
114136 else :
@@ -135,6 +157,277 @@ def get(self, species="", gene_id=""):
135157 return BARUtils .error_exit ("There are no data found for the given gene" )
136158
137159
160+ @gene_information .route (
161+ "/genes_by_position/<string:species>/<string:chromosome>/<string:start_param>/<string:end_param>"
162+ )
163+ class GeneTair10Gff3 (Resource ):
164+ @gene_information .param ("species" , _in = "path" , default = "arabidopsis" )
165+ @gene_information .param ("chromosome" , _in = "path" , default = "1" )
166+ @gene_information .param ("start_param" , _in = "path" , default = 3000 )
167+ @gene_information .param ("end_param" , _in = "path" , default = 6000 )
168+ def get (self , species = "" , chromosome = "" , start_param = "" , end_param = "" ):
169+ """This end point provides genes given position."""
170+
171+ # Check if all parameters are provided
172+ if not chromosome or not start_param or not end_param :
173+ return BARUtils .error_exit ("Missing parameters" ), 400
174+
175+ # Check if the start param is smaller than end param
176+ if start_param >= end_param :
177+ return BARUtils .error_exit ("Start location should be smaller than the end location" ), 400
178+
179+ # Check if both parameters are valid figures
180+ if not BARUtils .is_integer (start_param ) or not BARUtils .is_integer (end_param ):
181+ return BARUtils .error_exit ("At lease one parameter is not valid" )
182+
183+ # Escape input
184+ species = escape (species )
185+ chromosome = escape (chromosome )
186+ start_param = escape (start_param )
187+ end_param = escape (end_param )
188+
189+ # Set database
190+ if species == "arabidopsis" :
191+ gff3_database = EPlant2TAIR10GFF3
192+ alias_database = EPlant2AgiAlias
193+ annotation_database = EPlant2AgiAnnotation
194+
195+ if chromosome not in ["1" , "2" , "3" , "4" , "5" , "C" , "M" ]:
196+ return BARUtils .error_exit ("Invalid chromosome" ), 400
197+
198+ # Arabidopsis Gene format
199+ gene_id = "AT" + str (chromosome ) + "G"
200+ else :
201+ return BARUtils .error_exit ("No data for the given species" ), 400
202+
203+ # Construct the query
204+
205+ query1 = db .select (gff3_database .geneId , gff3_database .Start , gff3_database .End , gff3_database .Strand ).where (
206+ gff3_database .Type == "gene" ,
207+ gff3_database .geneId .startswith (gene_id ),
208+ (
209+ gff3_database .Start .between (start_param , end_param )
210+ | gff3_database .End .between (start_param , end_param )
211+ | ((gff3_database .Start < start_param ) & (gff3_database .End > end_param ))
212+ ),
213+ )
214+ result1 = db .session .execute (query1 ).all ()
215+ gene_ids = [row [0 ] for row in result1 ]
216+
217+ # Get aliases
218+ query2 = db .select (alias_database .agi , alias_database .alias ).where (alias_database .agi .in_ (gene_ids ))
219+ result2 = db .session .execute (query2 ).all ()
220+ all_aliases = {}
221+ for row in result2 :
222+ if row [0 ] not in all_aliases :
223+ all_aliases [row [0 ]] = []
224+ all_aliases [row [0 ]].append (row [1 ])
225+
226+ # Get annotation
227+ query3 = db .select (annotation_database .agi , annotation_database .annotation ).where (
228+ annotation_database .agi .in_ (gene_ids )
229+ )
230+ result3 = db .session .execute (query3 ).all ()
231+ all_annotations = {}
232+ for row in result3 :
233+ temp = row [1 ].split ("__" )
234+ if len (temp ) > 1 :
235+ all_annotations [row [0 ].upper ()] = temp [1 ]
236+ else :
237+ all_annotations [row [0 ].upper ()] = temp [0 ]
238+
239+ genes = []
240+ for row in result1 :
241+ gene = {
242+ "id" : row [0 ],
243+ "start" : row [1 ],
244+ "end" : row [2 ],
245+ "strand" : row [3 ],
246+ "aliases" : all_aliases .get (row [0 ], []),
247+ "annotation" : all_annotations .get (row [0 ], None ),
248+ }
249+
250+ genes .append (gene )
251+
252+ return BARUtils .success_exit (genes )
253+
254+
255+ @gene_information .route ("/gene_query" )
256+ class GeneQueryGene (Resource ):
257+ @gene_information .expect (query_genes_request_fields )
258+ def post (self ):
259+ """This end point provides gene information for multiple genes given multiple terms."""
260+
261+ # Escape input
262+ data = request .get_json ()
263+ species = data ["species" ]
264+ terms = data ["terms" ]
265+ for one_term in terms :
266+ one_term .upper ()
267+
268+ # Species check
269+ if species == "arabidopsis" :
270+ # Term check
271+ for one_term in terms :
272+ if not BARUtils .is_arabidopsis_gene_valid (one_term ):
273+ return BARUtils .error_exit ("Input list contains invalid term" ), 400
274+
275+ alias_database = EPlant2AgiAlias
276+ gff3_database = EPlant2TAIR10GFF3
277+ annotation_database = EPlant2AgiAnnotation
278+ else :
279+ return BARUtils .error_exit ("No data for the given species" ), 400
280+
281+ gene_ids = []
282+ gene_fail = []
283+ for one_term in terms :
284+ query = db .select (alias_database .agi ).where (alias_database .agi .contains (one_term )).limit (1 )
285+ result = db .session .execute (query ).fetchone ()
286+ if result is not None :
287+ gene_ids .append (result [0 ])
288+ else :
289+ gene_fail .append (one_term )
290+
291+ # For terms that do not have results
292+ for fail_term in gene_fail :
293+ query = (
294+ db .select (gff3_database .geneId )
295+ .where (
296+ ((gff3_database .Type == "gene" ) | (gff3_database .Type == "transposable_element_gene" )),
297+ gff3_database .geneId .contains (fail_term ),
298+ )
299+ .limit (1 )
300+ )
301+ result = db .session .execute (query ).fetchone ()
302+ if result :
303+ gene_ids .append (result [0 ])
304+
305+ # Find information for each term
306+ query = db .select (gff3_database .geneId , gff3_database .Start , gff3_database .End , gff3_database .Strand ).where (
307+ ((gff3_database .Type == "gene" ) | (gff3_database .Type == "transposable_element_gene" )),
308+ gff3_database .Source == "TAIR10" ,
309+ gff3_database .geneId .in_ (gene_ids ),
310+ )
311+ result = db .session .execute (query ).all ()
312+ genes_info = {}
313+ for row in result :
314+ if row [0 ] not in genes_info :
315+ gene = {
316+ "id" : row [0 ],
317+ "chromosome" : "Chr" + row [0 ][2 :3 ],
318+ "start" : row [1 ],
319+ "end" : row [2 ],
320+ "strand" : row [3 ],
321+ "aliases" : [],
322+ "annotation" : None ,
323+ }
324+ genes_info [row [0 ]] = gene
325+
326+ # Get aliases
327+ query = db .select (alias_database .agi , alias_database .alias ).where (alias_database .agi .in_ (gene_ids ))
328+ result = db .session .execute (query ).all ()
329+ for row in result :
330+ if row [0 ] in genes_info :
331+ genes_info [row [0 ]]["aliases" ].append (row [1 ])
332+
333+ # Get annotations
334+ query = db .select (annotation_database .agi , annotation_database .annotation ).where (
335+ annotation_database .agi .in_ (gene_ids )
336+ )
337+ result = db .session .execute (query )
338+ for row in result :
339+ if row [0 ].upper () in genes_info :
340+ temp = row [1 ].split ("__" )
341+ if len (temp ) > 1 :
342+ genes_info [row [0 ].upper ()]["annotation" ] = temp [1 ]
343+ else :
344+ genes_info [row [0 ].upper ()]["annotation" ] = temp [0 ]
345+
346+ return BARUtils .success_exit (genes_info )
347+
348+
349+ @gene_information .route ("/single_gene_query/<string:species>/<string:term>" )
350+ class SingleGeneQueryGene (Resource ):
351+ @gene_information .param ("species" , _in = "path" , default = "arabidopsis" )
352+ @gene_information .param ("term" , _in = "path" , default = "AT1G01010" )
353+ def get (self , species = "" , term = "" ):
354+ """This end point provides gene information for a single gene given one term."""
355+
356+ # Escape input
357+ species = escape (species )
358+ term = escape (term ).upper ()
359+
360+ # Species check
361+ if species == "arabidopsis" :
362+ alias_database = EPlant2AgiAlias
363+ gff3_database = EPlant2TAIR10GFF3
364+ annotation_database = EPlant2AgiAnnotation
365+
366+ # Term check
367+ if not BARUtils .is_arabidopsis_gene_valid (term ):
368+ return BARUtils .error_exit ("Input term invalid" ), 400
369+ else :
370+ return BARUtils .error_exit ("No data for the given species" ), 400
371+
372+ query = db .select (alias_database .agi ).where (alias_database .agi == term ).limit (1 )
373+ result = db .session .execute (query ).fetchone ()
374+
375+ if not result :
376+ query = (
377+ db .select (gff3_database .geneId )
378+ .where (
379+ ((gff3_database .Type == "gene" ) | (gff3_database .Type == "transposable_element_gene" )),
380+ gff3_database .geneId == term ,
381+ )
382+ .limit (1 )
383+ )
384+ result = db .session .execute (query ).fetchone ()
385+
386+ genes_info = {}
387+ if result :
388+ # Find information for the term
389+ query = db .select (gff3_database .geneId , gff3_database .Start , gff3_database .End , gff3_database .Strand ).where (
390+ ((gff3_database .Type == "gene" ) | (gff3_database .Type == "transposable_element_gene" )),
391+ gff3_database .Source == "TAIR10" ,
392+ gff3_database .geneId == term ,
393+ )
394+ result = db .session .execute (query ).fetchone ()
395+
396+ # This Arabidopsis specific.
397+ gene = {
398+ "id" : result [0 ],
399+ "chromosome" : "Chr" + result [0 ][2 :3 ],
400+ "start" : result [1 ],
401+ "end" : result [2 ],
402+ "strand" : result [3 ],
403+ "aliases" : [],
404+ "annotation" : None ,
405+ }
406+
407+ genes_info [result [0 ]] = gene
408+
409+ # Get aliases
410+ query = db .select (alias_database .agi , alias_database .alias ).where (alias_database .agi == term )
411+ result = db .session .execute (query ).all ()
412+ for row in result :
413+ if row [1 ] not in gene ["aliases" ]:
414+ gene ["aliases" ].append (row [1 ])
415+
416+ # Get annotations
417+ query = db .select (annotation_database .agi , annotation_database .annotation ).where (
418+ annotation_database .agi == term
419+ )
420+ result = db .session .execute (query ).all ()
421+ for row in result :
422+ temp = row [1 ].split ("__" )
423+ if len (temp ) > 1 :
424+ gene ["annotation" ] = temp [1 ]
425+ else :
426+ gene ["annotation" ] = temp [0 ]
427+
428+ return BARUtils .success_exit (genes_info )
429+
430+
138431@gene_information .route ("/gene_isoforms/<string:species>/<string:gene_id>" )
139432class GeneIsoforms (Resource ):
140433 @gene_information .param ("species" , _in = "path" , default = "arabidopsis" )
0 commit comments