1
1
import logging
2
+
3
+ import yaml
2
4
from pymodm import connect
3
5
from biocontainers .common .models import MongoTool
4
6
import click
5
- import yaml
7
+ from ruamel . yaml import YAML
6
8
import requests
7
9
8
- logger = logging .getLogger ('annotations_yml_generator ' )
10
+ logger = logging .getLogger ('misc_jobs ' )
9
11
10
12
11
13
def print_help (ctx , value ):
@@ -22,18 +24,25 @@ def get_database_uri(param):
22
24
23
25
24
26
@click .command ()
27
+ @click .option ('--find-missing-annotations' , '-ma' , help = 'Find missing annotations' , is_flag = True )
28
+ @click .option ('--find-duplicate-tools' , '-dt' , help = 'Find duplicate tools' , is_flag = True )
29
+ @click .option ('--find-invalid-annotations' , '-ia' , help = 'invalid_annotations' , is_flag = True )
30
+ @click .option ('-ay' , '--annotations-yml-url' , help = 'Annotations Yaml file' )
25
31
@click .option ('-db' , '--db-name' , help = "Name of the database" , envvar = 'BIOCONT_DB_NAME' )
26
32
@click .option ('-h' , '--db-host' , help = 'Host the database' , envvar = 'MONGODB_HOST' )
27
33
@click .option ('-a' , '--db-auth-database' , help = 'Authentication database in Mongo' , envvar = 'MONGODB_ADMIN_DB' )
28
34
@click .option ('-u' , '--db-user' , help = 'Database root user' , envvar = 'MONGODB_USER' , default = 'admin' )
29
35
@click .option ('-pw' , '--db-password' , help = 'Database password' , envvar = 'MONGODB_PASS' )
30
36
@click .option ('-p' , '--db-port' , help = 'Database port' , envvar = 'MONGO_PORT' , default = '27017' )
31
- @click .option ('-ay' , '--annotations-yml-url' , help = 'Annotations Yaml file' )
32
37
@click .option ('-st' , '--slack-token' , help = 'Slack token' )
33
38
@click .pass_context
34
- def main (ctx , db_name , db_host , db_auth_database , db_user , db_password , db_port , annotations_yml_url , slack_token ):
39
+ def main (ctx , find_missing_annotations , find_duplicate_tools , find_invalid_annotations , annotations_yml_url , db_name ,
40
+ db_host , db_auth_database ,
41
+ db_user , db_password , db_port , slack_token ):
35
42
config = {}
36
- if (db_name is None ) or (db_host is None ) or (db_user is None ) or (annotations_yml_url is None ):
43
+ if (db_name is None ) or (db_host is None ) or (db_user is None ):
44
+ print_help (ctx , value = True )
45
+ elif ((find_missing_annotations is True ) or (find_invalid_annotations is True )) and (annotations_yml_url is None ):
37
46
print_help (ctx , value = True )
38
47
else :
39
48
config ['BIOCONT_DB_NAME' ] = db_name
@@ -44,38 +53,95 @@ def main(ctx, db_name, db_host, db_auth_database, db_user, db_password, db_port,
44
53
config ['MONGODB_PASS' ] = db_password
45
54
config ['DATABASE_URI' ] = get_database_uri (config )
46
55
47
- db_uri = get_database_uri (config )
48
- connect (db_uri )
49
- tools = list (MongoTool .get_all_tools ())
56
+ tools = []
57
+ if find_missing_annotations is True or find_duplicate_tools is True :
58
+ db_uri = get_database_uri (config )
59
+ connect (db_uri )
60
+ tools = list (MongoTool .get_all_tools ())
61
+ i = len (tools )
62
+ print ("Total tools: {}" .format (i ))
50
63
51
- i = len (tools )
52
- print ("Total tools: {}" .format (i ))
64
+ if find_missing_annotations is True :
65
+ missing_annotations (annotations_yml_url , tools , slack_token )
66
+
67
+ if find_duplicate_tools is True :
68
+ duplicate_tools (tools , slack_token )
69
+
70
+ if find_invalid_annotations is True :
71
+ invalid_annotations (annotations_yml_url )
72
+
73
+
74
+ def duplicate_tools (tools , slack_token ):
75
+ biotools = {}
76
+ for tool in tools :
77
+ if tool .additional_identifiers :
78
+ for i in tool .additional_identifiers :
79
+ if str (i ).startswith ("biotools:" ):
80
+ add_tool (biotools , i , tool .id )
81
+
82
+ for biotool in biotools :
83
+ print (biotool )
84
+
85
+ # out_yml = 'duplicate_tools.yml'
86
+ # with open(out_yml, 'w') as outfile:
87
+ # yaml.dump(missing_tools, outfile)
88
+
89
+ # slack_notify(out_yml, slack_token)
53
90
54
- yml_tools = set ()
55
91
92
+ def invalid_annotations (annotations_yml_url ):
93
+ yml_tools = []
56
94
outfile = requests .get (annotations_yml_url ).text
57
95
yml_contents = yaml .load (outfile , Loader = yaml .FullLoader )
96
+ for key , value in yml_contents .items ():
97
+ yml_tools .append (value )
98
+ print ("total yaml tools: {}" .format (len (yml_tools )))
99
+
100
+ missing_home_urls = []
101
+ missing_licenses = []
102
+ for tool in yml_tools :
103
+ tool_keys = tool .keys ()
104
+ if 'home_url' not in tool_keys :
105
+ missing_home_urls .append (tool ['name' ])
106
+ if 'license' not in tool_keys :
107
+ missing_licenses .append (tool ['name' ])
108
+
109
+ print (missing_home_urls )
110
+ print (missing_licenses )
58
111
112
+
113
+ def add_tool (biotools , identifier , tool_id ):
114
+ if identifier not in biotools :
115
+ biotools [identifier ] = []
116
+ biotools [identifier ].append (tool_id )
117
+
118
+
119
+ def missing_annotations (annotations_yml_url , tools , slack_token ):
120
+ yaml = YAML ()
121
+ yml_tools = set ()
122
+ outfile = requests .get (annotations_yml_url ).text
123
+ yml_contents = yaml .load (outfile )
59
124
for key , value in yml_contents .items ():
60
125
yml_tools .add (str (key ))
61
-
62
126
print ("total yaml tools: {}" .format (len (yml_tools )))
63
-
64
127
missing_tools = []
65
128
for tool in tools :
66
- if tool .id not in yml_tools :
129
+ if ( not str ( tool .id ). startswith ( "mulled-" )) and ( tool . id not in yml_tools ) :
67
130
tool1 = {tool .id : {'name' : tool .id , 'description' : tool .description , 'license' : tool .license ,
68
131
'home_url' : tool .home_url , 'total_pulls' : tool .total_pulls , 'manually_check' : False ,
69
132
'identifiers' : tool .additional_identifiers , 'keywords' : tool .tool_tags }}
70
133
71
134
# print(tool1)
72
135
missing_tools .append (tool1 )
73
-
74
136
# print(missing_tools)
75
- out_yml = 'out .yml'
137
+ out_yml = 'missing_annotations .yml'
76
138
with open (out_yml , 'w' ) as outfile :
77
139
yaml .dump (missing_tools , outfile )
78
140
141
+ slack_notify (out_yml , slack_token )
142
+
143
+
144
+ def slack_notify (out_yml , slack_token ):
79
145
if slack_token is None :
80
146
logger .info ("Slack token is empty: skipping sending to slack" )
81
147
else :
@@ -88,5 +154,6 @@ def main(ctx, db_name, db_host, db_auth_database, db_user, db_password, db_port,
88
154
print (x .text )
89
155
logger .info ("response sending to slack: {} {}" .format (x .status_code , x .text ))
90
156
157
+
91
158
if __name__ == "__main__" :
92
159
main ()
0 commit comments