Skip to content

Commit a195141

Browse files
authored
Merge pull request #241 from BioAnalyticResource/dev
Update main branch
2 parents acf7770 + e2cd22e commit a195141

File tree

14 files changed

+1056
-58
lines changed

14 files changed

+1056
-58
lines changed

api/models/eplant2.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,3 +49,29 @@ class Publications(db.Model):
4949
journal: db.Mapped[str] = db.mapped_column(db.String(64), nullable=False, primary_key=True)
5050
title: db.Mapped[str] = db.mapped_column(TEXT(), nullable=False, primary_key=True)
5151
pubmed: db.Mapped[str] = db.mapped_column(db.String(16), nullable=False, primary_key=True)
52+
53+
54+
class TAIR10GFF3(db.Model):
55+
__bind_key__ = "eplant2"
56+
__tablename__ = "tair10_gff3"
57+
58+
SeqID: db.Mapped[str] = db.mapped_column(db.String(20), nullable=False, primary_key=True)
59+
Source: db.Mapped[str] = db.mapped_column(db.String(10), nullable=False, primary_key=True)
60+
Type: db.Mapped[str] = db.mapped_column(db.String(30), nullable=False, primary_key=True)
61+
Start: db.Mapped[int] = db.mapped_column(db.Integer, nullable=False, primary_key=True)
62+
End: db.Mapped[int] = db.mapped_column(db.Integer, nullable=False, primary_key=True)
63+
Score: db.Mapped[float] = db.mapped_column(db.Float, nullable=True, primary_key=True)
64+
Strand: db.Mapped[str] = db.mapped_column(db.String(1), nullable=True, primary_key=True)
65+
Phase: db.Mapped[str] = db.mapped_column(db.String(1), nullable=True, primary_key=True)
66+
Id: db.Mapped[str] = db.mapped_column(db.String(20), nullable=True, primary_key=True)
67+
geneId: db.Mapped[str] = db.mapped_column(db.String(20), nullable=True, primary_key=True)
68+
Parent: db.Mapped[str] = db.mapped_column(db.String(40), nullable=True, primary_key=True)
69+
Attributes: db.Mapped[str] = db.mapped_column(db.String(256), nullable=True, primary_key=True)
70+
71+
72+
class AgiAlias(db.Model):
73+
__bind_key__ = "eplant2"
74+
__tablename__ = "agi_alias"
75+
76+
agi: db.Mapped[str] = db.mapped_column(db.String(30), nullable=False, primary_key=True)
77+
alias: db.Mapped[str] = db.mapped_column(db.String(30), nullable=False, primary_key=True)

api/resources/efp_image.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ def get(self):
2525
"efp_arachis",
2626
"efp_cannabis",
2727
"efp_maize",
28+
"efp_rice",
2829
"efp_sorghum",
2930
"efp_soybean",
3031
]

api/resources/gene_information.py

Lines changed: 299 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
from api.models.annotations_lookup import AgiAlias
55
from api.models.eplant2 import Isoforms as EPlant2Isoforms
66
from api.models.eplant2 import Publications as EPlant2Publications
7+
from api.models.eplant2 import TAIR10GFF3 as EPlant2TAIR10GFF3
8+
from api.models.eplant2 import AgiAlias as EPlant2AgiAlias
9+
from api.models.eplant2 import AgiAnnotation as EPlant2AgiAnnotation
710
from api.models.eplant_poplar import Isoforms as EPlantPoplarIsoforms
811
from api.models.eplant_tomato import Isoforms as EPlantTomatoIsoforms
912
from api.models.eplant_soybean import Isoforms as EPlantSoybeanIsoforms
@@ -14,6 +17,16 @@
1417

1518
gene_information = Namespace("Gene Information", description="Information about Genes", path="/gene_information")
1619

20+
parser = gene_information.parser()
21+
parser.add_argument(
22+
"terms",
23+
type=list,
24+
action="append",
25+
required=True,
26+
help="Gene IDs, format example: AT1G01010",
27+
default=["AT1G01020", "AT1G01030"],
28+
)
29+
1730
# I think this is only needed for Swagger UI POST
1831
gene_information_request_fields = gene_information.model(
1932
"GeneInformation",
@@ -27,6 +40,18 @@
2740
},
2841
)
2942

43+
query_genes_request_fields = gene_information.model(
44+
"GeneInformation",
45+
{
46+
"species": fields.String(required=True, example="arabidopsis"),
47+
"terms": fields.List(
48+
required=True,
49+
example=["AT1G01010", "AT1G01020"],
50+
cls_or_instance=fields.String,
51+
),
52+
},
53+
)
54+
3055

3156
# Validation is done in a different way to keep things simple
3257
class GeneInformationSchema(Schema):
@@ -99,16 +124,13 @@ def get(self, species="", gene_id=""):
99124
species = escape(species)
100125
gene_id = escape(gene_id)
101126

102-
# truncate gene ID
103-
for i in range(len(gene_id)):
104-
if gene_id[i] == ".":
105-
gene_id = gene_id[0:i]
106-
break
107-
108127
# Set the database and check if genes are valid
109128
if species == "arabidopsis":
110129
database = EPlant2Publications
111130

131+
# Remove Arabidopsis isoforms
132+
gene_id = gene_id.split(".")[0]
133+
112134
if not BARUtils.is_arabidopsis_gene_valid(gene_id):
113135
return BARUtils.error_exit("Invalid gene id"), 400
114136
else:
@@ -135,6 +157,277 @@ def get(self, species="", gene_id=""):
135157
return BARUtils.error_exit("There are no data found for the given gene")
136158

137159

160+
@gene_information.route(
161+
"/genes_by_position/<string:species>/<string:chromosome>/<string:start_param>/<string:end_param>"
162+
)
163+
class GeneTair10Gff3(Resource):
164+
@gene_information.param("species", _in="path", default="arabidopsis")
165+
@gene_information.param("chromosome", _in="path", default="1")
166+
@gene_information.param("start_param", _in="path", default=3000)
167+
@gene_information.param("end_param", _in="path", default=6000)
168+
def get(self, species="", chromosome="", start_param="", end_param=""):
169+
"""This end point provides genes given position."""
170+
171+
# Check if all parameters are provided
172+
if not chromosome or not start_param or not end_param:
173+
return BARUtils.error_exit("Missing parameters"), 400
174+
175+
# Check if the start param is smaller than end param
176+
if start_param >= end_param:
177+
return BARUtils.error_exit("Start location should be smaller than the end location"), 400
178+
179+
# Check if both parameters are valid figures
180+
if not BARUtils.is_integer(start_param) or not BARUtils.is_integer(end_param):
181+
return BARUtils.error_exit("At lease one parameter is not valid")
182+
183+
# Escape input
184+
species = escape(species)
185+
chromosome = escape(chromosome)
186+
start_param = escape(start_param)
187+
end_param = escape(end_param)
188+
189+
# Set database
190+
if species == "arabidopsis":
191+
gff3_database = EPlant2TAIR10GFF3
192+
alias_database = EPlant2AgiAlias
193+
annotation_database = EPlant2AgiAnnotation
194+
195+
if chromosome not in ["1", "2", "3", "4", "5", "C", "M"]:
196+
return BARUtils.error_exit("Invalid chromosome"), 400
197+
198+
# Arabidopsis Gene format
199+
gene_id = "AT" + str(chromosome) + "G"
200+
else:
201+
return BARUtils.error_exit("No data for the given species"), 400
202+
203+
# Construct the query
204+
205+
query1 = db.select(gff3_database.geneId, gff3_database.Start, gff3_database.End, gff3_database.Strand).where(
206+
gff3_database.Type == "gene",
207+
gff3_database.geneId.startswith(gene_id),
208+
(
209+
gff3_database.Start.between(start_param, end_param)
210+
| gff3_database.End.between(start_param, end_param)
211+
| ((gff3_database.Start < start_param) & (gff3_database.End > end_param))
212+
),
213+
)
214+
result1 = db.session.execute(query1).all()
215+
gene_ids = [row[0] for row in result1]
216+
217+
# Get aliases
218+
query2 = db.select(alias_database.agi, alias_database.alias).where(alias_database.agi.in_(gene_ids))
219+
result2 = db.session.execute(query2).all()
220+
all_aliases = {}
221+
for row in result2:
222+
if row[0] not in all_aliases:
223+
all_aliases[row[0]] = []
224+
all_aliases[row[0]].append(row[1])
225+
226+
# Get annotation
227+
query3 = db.select(annotation_database.agi, annotation_database.annotation).where(
228+
annotation_database.agi.in_(gene_ids)
229+
)
230+
result3 = db.session.execute(query3).all()
231+
all_annotations = {}
232+
for row in result3:
233+
temp = row[1].split("__")
234+
if len(temp) > 1:
235+
all_annotations[row[0].upper()] = temp[1]
236+
else:
237+
all_annotations[row[0].upper()] = temp[0]
238+
239+
genes = []
240+
for row in result1:
241+
gene = {
242+
"id": row[0],
243+
"start": row[1],
244+
"end": row[2],
245+
"strand": row[3],
246+
"aliases": all_aliases.get(row[0], []),
247+
"annotation": all_annotations.get(row[0], None),
248+
}
249+
250+
genes.append(gene)
251+
252+
return BARUtils.success_exit(genes)
253+
254+
255+
@gene_information.route("/gene_query")
256+
class GeneQueryGene(Resource):
257+
@gene_information.expect(query_genes_request_fields)
258+
def post(self):
259+
"""This end point provides gene information for multiple genes given multiple terms."""
260+
261+
# Escape input
262+
data = request.get_json()
263+
species = data["species"]
264+
terms = data["terms"]
265+
for one_term in terms:
266+
one_term.upper()
267+
268+
# Species check
269+
if species == "arabidopsis":
270+
# Term check
271+
for one_term in terms:
272+
if not BARUtils.is_arabidopsis_gene_valid(one_term):
273+
return BARUtils.error_exit("Input list contains invalid term"), 400
274+
275+
alias_database = EPlant2AgiAlias
276+
gff3_database = EPlant2TAIR10GFF3
277+
annotation_database = EPlant2AgiAnnotation
278+
else:
279+
return BARUtils.error_exit("No data for the given species"), 400
280+
281+
gene_ids = []
282+
gene_fail = []
283+
for one_term in terms:
284+
query = db.select(alias_database.agi).where(alias_database.agi.contains(one_term)).limit(1)
285+
result = db.session.execute(query).fetchone()
286+
if result is not None:
287+
gene_ids.append(result[0])
288+
else:
289+
gene_fail.append(one_term)
290+
291+
# For terms that do not have results
292+
for fail_term in gene_fail:
293+
query = (
294+
db.select(gff3_database.geneId)
295+
.where(
296+
((gff3_database.Type == "gene") | (gff3_database.Type == "transposable_element_gene")),
297+
gff3_database.geneId.contains(fail_term),
298+
)
299+
.limit(1)
300+
)
301+
result = db.session.execute(query).fetchone()
302+
if result:
303+
gene_ids.append(result[0])
304+
305+
# Find information for each term
306+
query = db.select(gff3_database.geneId, gff3_database.Start, gff3_database.End, gff3_database.Strand).where(
307+
((gff3_database.Type == "gene") | (gff3_database.Type == "transposable_element_gene")),
308+
gff3_database.Source == "TAIR10",
309+
gff3_database.geneId.in_(gene_ids),
310+
)
311+
result = db.session.execute(query).all()
312+
genes_info = {}
313+
for row in result:
314+
if row[0] not in genes_info:
315+
gene = {
316+
"id": row[0],
317+
"chromosome": "Chr" + row[0][2:3],
318+
"start": row[1],
319+
"end": row[2],
320+
"strand": row[3],
321+
"aliases": [],
322+
"annotation": None,
323+
}
324+
genes_info[row[0]] = gene
325+
326+
# Get aliases
327+
query = db.select(alias_database.agi, alias_database.alias).where(alias_database.agi.in_(gene_ids))
328+
result = db.session.execute(query).all()
329+
for row in result:
330+
if row[0] in genes_info:
331+
genes_info[row[0]]["aliases"].append(row[1])
332+
333+
# Get annotations
334+
query = db.select(annotation_database.agi, annotation_database.annotation).where(
335+
annotation_database.agi.in_(gene_ids)
336+
)
337+
result = db.session.execute(query)
338+
for row in result:
339+
if row[0].upper() in genes_info:
340+
temp = row[1].split("__")
341+
if len(temp) > 1:
342+
genes_info[row[0].upper()]["annotation"] = temp[1]
343+
else:
344+
genes_info[row[0].upper()]["annotation"] = temp[0]
345+
346+
return BARUtils.success_exit(genes_info)
347+
348+
349+
@gene_information.route("/single_gene_query/<string:species>/<string:term>")
350+
class SingleGeneQueryGene(Resource):
351+
@gene_information.param("species", _in="path", default="arabidopsis")
352+
@gene_information.param("term", _in="path", default="AT1G01010")
353+
def get(self, species="", term=""):
354+
"""This end point provides gene information for a single gene given one term."""
355+
356+
# Escape input
357+
species = escape(species)
358+
term = escape(term).upper()
359+
360+
# Species check
361+
if species == "arabidopsis":
362+
alias_database = EPlant2AgiAlias
363+
gff3_database = EPlant2TAIR10GFF3
364+
annotation_database = EPlant2AgiAnnotation
365+
366+
# Term check
367+
if not BARUtils.is_arabidopsis_gene_valid(term):
368+
return BARUtils.error_exit("Input term invalid"), 400
369+
else:
370+
return BARUtils.error_exit("No data for the given species"), 400
371+
372+
query = db.select(alias_database.agi).where(alias_database.agi == term).limit(1)
373+
result = db.session.execute(query).fetchone()
374+
375+
if not result:
376+
query = (
377+
db.select(gff3_database.geneId)
378+
.where(
379+
((gff3_database.Type == "gene") | (gff3_database.Type == "transposable_element_gene")),
380+
gff3_database.geneId == term,
381+
)
382+
.limit(1)
383+
)
384+
result = db.session.execute(query).fetchone()
385+
386+
genes_info = {}
387+
if result:
388+
# Find information for the term
389+
query = db.select(gff3_database.geneId, gff3_database.Start, gff3_database.End, gff3_database.Strand).where(
390+
((gff3_database.Type == "gene") | (gff3_database.Type == "transposable_element_gene")),
391+
gff3_database.Source == "TAIR10",
392+
gff3_database.geneId == term,
393+
)
394+
result = db.session.execute(query).fetchone()
395+
396+
# This Arabidopsis specific.
397+
gene = {
398+
"id": result[0],
399+
"chromosome": "Chr" + result[0][2:3],
400+
"start": result[1],
401+
"end": result[2],
402+
"strand": result[3],
403+
"aliases": [],
404+
"annotation": None,
405+
}
406+
407+
genes_info[result[0]] = gene
408+
409+
# Get aliases
410+
query = db.select(alias_database.agi, alias_database.alias).where(alias_database.agi == term)
411+
result = db.session.execute(query).all()
412+
for row in result:
413+
if row[1] not in gene["aliases"]:
414+
gene["aliases"].append(row[1])
415+
416+
# Get annotations
417+
query = db.select(annotation_database.agi, annotation_database.annotation).where(
418+
annotation_database.agi == term
419+
)
420+
result = db.session.execute(query).all()
421+
for row in result:
422+
temp = row[1].split("__")
423+
if len(temp) > 1:
424+
gene["annotation"] = temp[1]
425+
else:
426+
gene["annotation"] = temp[0]
427+
428+
return BARUtils.success_exit(genes_info)
429+
430+
138431
@gene_information.route("/gene_isoforms/<string:species>/<string:gene_id>")
139432
class GeneIsoforms(Resource):
140433
@gene_information.param("species", _in="path", default="arabidopsis")

0 commit comments

Comments
 (0)