Skip to content

Commit d36c4cf

Browse files
committed
misc_jobs
1 parent 7154443 commit d36c4cf

File tree

3 files changed

+85
-18
lines changed

3 files changed

+85
-18
lines changed

biocontainers/annotations_yml_generator.py renamed to biocontainers/misc_jobs.py

Lines changed: 83 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
import logging
2+
3+
import yaml
24
from pymodm import connect
35
from biocontainers.common.models import MongoTool
46
import click
5-
import yaml
7+
from ruamel.yaml import YAML
68
import requests
79

8-
logger = logging.getLogger('annotations_yml_generator')
10+
logger = logging.getLogger('misc_jobs')
911

1012

1113
def print_help(ctx, value):
@@ -22,18 +24,25 @@ def get_database_uri(param):
2224

2325

2426
@click.command()
27+
@click.option('--find-missing-annotations', '-ma', help='Find missing annotations', is_flag=True)
28+
@click.option('--find-duplicate-tools', '-dt', help='Find duplicate tools', is_flag=True)
29+
@click.option('--find-invalid-annotations', '-ia', help='invalid_annotations', is_flag=True)
30+
@click.option('-ay', '--annotations-yml-url', help='Annotations Yaml file')
2531
@click.option('-db', '--db-name', help="Name of the database", envvar='BIOCONT_DB_NAME')
2632
@click.option('-h', '--db-host', help='Host the database', envvar='MONGODB_HOST')
2733
@click.option('-a', '--db-auth-database', help='Authentication database in Mongo', envvar='MONGODB_ADMIN_DB')
2834
@click.option('-u', '--db-user', help='Database root user', envvar='MONGODB_USER', default='admin')
2935
@click.option('-pw', '--db-password', help='Database password', envvar='MONGODB_PASS')
3036
@click.option('-p', '--db-port', help='Database port', envvar='MONGO_PORT', default='27017')
31-
@click.option('-ay', '--annotations-yml-url', help='Annotations Yaml file')
3237
@click.option('-st', '--slack-token', help='Slack token')
3338
@click.pass_context
34-
def main(ctx, db_name, db_host, db_auth_database, db_user, db_password, db_port, annotations_yml_url, slack_token):
39+
def main(ctx, find_missing_annotations, find_duplicate_tools, find_invalid_annotations, annotations_yml_url, db_name,
40+
db_host, db_auth_database,
41+
db_user, db_password, db_port, slack_token):
3542
config = {}
36-
if (db_name is None) or (db_host is None) or (db_user is None) or (annotations_yml_url is None):
43+
if (db_name is None) or (db_host is None) or (db_user is None):
44+
print_help(ctx, value=True)
45+
elif ((find_missing_annotations is True) or (find_invalid_annotations is True)) and (annotations_yml_url is None):
3746
print_help(ctx, value=True)
3847
else:
3948
config['BIOCONT_DB_NAME'] = db_name
@@ -44,38 +53,95 @@ def main(ctx, db_name, db_host, db_auth_database, db_user, db_password, db_port,
4453
config['MONGODB_PASS'] = db_password
4554
config['DATABASE_URI'] = get_database_uri(config)
4655

47-
db_uri = get_database_uri(config)
48-
connect(db_uri)
49-
tools = list(MongoTool.get_all_tools())
56+
tools = []
57+
if find_missing_annotations is True or find_duplicate_tools is True:
58+
db_uri = get_database_uri(config)
59+
connect(db_uri)
60+
tools = list(MongoTool.get_all_tools())
61+
i = len(tools)
62+
print("Total tools: {}".format(i))
5063

51-
i = len(tools)
52-
print("Total tools: {}".format(i))
64+
if find_missing_annotations is True:
65+
missing_annotations(annotations_yml_url, tools, slack_token)
66+
67+
if find_duplicate_tools is True:
68+
duplicate_tools(tools, slack_token)
69+
70+
if find_invalid_annotations is True:
71+
invalid_annotations(annotations_yml_url)
72+
73+
74+
def duplicate_tools(tools, slack_token):
75+
biotools = {}
76+
for tool in tools:
77+
if tool.additional_identifiers:
78+
for i in tool.additional_identifiers:
79+
if str(i).startswith("biotools:"):
80+
add_tool(biotools, i, tool.id)
81+
82+
for biotool in biotools:
83+
print(biotool)
84+
85+
# out_yml = 'duplicate_tools.yml'
86+
# with open(out_yml, 'w') as outfile:
87+
# yaml.dump(missing_tools, outfile)
88+
89+
# slack_notify(out_yml, slack_token)
5390

54-
yml_tools = set()
5591

92+
def invalid_annotations(annotations_yml_url):
93+
yml_tools = []
5694
outfile = requests.get(annotations_yml_url).text
5795
yml_contents = yaml.load(outfile, Loader=yaml.FullLoader)
96+
for key, value in yml_contents.items():
97+
yml_tools.append(value)
98+
print("total yaml tools: {}".format(len(yml_tools)))
99+
100+
missing_home_urls = []
101+
missing_licenses = []
102+
for tool in yml_tools:
103+
tool_keys = tool.keys()
104+
if 'home_url' not in tool_keys:
105+
missing_home_urls.append(tool['name'])
106+
if 'license' not in tool_keys:
107+
missing_licenses.append(tool['name'])
108+
109+
print(missing_home_urls)
110+
print(missing_licenses)
58111

112+
113+
def add_tool(biotools, identifier, tool_id):
114+
if identifier not in biotools:
115+
biotools[identifier] = []
116+
biotools[identifier].append(tool_id)
117+
118+
119+
def missing_annotations(annotations_yml_url, tools, slack_token):
120+
yaml = YAML()
121+
yml_tools = set()
122+
outfile = requests.get(annotations_yml_url).text
123+
yml_contents = yaml.load(outfile)
59124
for key, value in yml_contents.items():
60125
yml_tools.add(str(key))
61-
62126
print("total yaml tools: {}".format(len(yml_tools)))
63-
64127
missing_tools = []
65128
for tool in tools:
66-
if tool.id not in yml_tools:
129+
if (not str(tool.id).startswith("mulled-")) and (tool.id not in yml_tools):
67130
tool1 = {tool.id: {'name': tool.id, 'description': tool.description, 'license': tool.license,
68131
'home_url': tool.home_url, 'total_pulls': tool.total_pulls, 'manually_check': False,
69132
'identifiers': tool.additional_identifiers, 'keywords': tool.tool_tags}}
70133

71134
# print(tool1)
72135
missing_tools.append(tool1)
73-
74136
# print(missing_tools)
75-
out_yml = 'out.yml'
137+
out_yml = 'missing_annotations.yml'
76138
with open(out_yml, 'w') as outfile:
77139
yaml.dump(missing_tools, outfile)
78140

141+
slack_notify(out_yml, slack_token)
142+
143+
144+
def slack_notify(out_yml, slack_token):
79145
if slack_token is None:
80146
logger.info("Slack token is empty: skipping sending to slack")
81147
else:
@@ -88,5 +154,6 @@ def main(ctx, db_name, db_host, db_auth_database, db_user, db_password, db_port,
88154
print(x.text)
89155
logger.info("response sending to slack: {} {}".format(x.status_code, x.text))
90156

157+
91158
if __name__ == "__main__":
92159
main()

dockerfiles/annotations_yml_generator/run.sh renamed to dockerfiles/misc_jobs/run.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
#!/bin/bash
22

33
cd /biocontainers-backend/biocontainers/
4-
python annotations_yml_generator.py \
5-
-st $SLACK_TOKEN -ay $ANNOTATIONS_YML_URL \
4+
python misc_jobs.py \
5+
$PIPELINE_ARGS -st $SLACK_TOKEN \
66
--db-password $MONGODB_PASS --db-host $MONGODB_HOST --db-auth-database $MONGODB_ADMIN_DB \
77
--db-name $BIOCONT_DB_NAME --db-user $MONGODB_USER
88

0 commit comments

Comments
 (0)