-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathgbMetaCheck.py
340 lines (285 loc) · 18.1 KB
/
gbMetaCheck.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
import os
import sys
import zipfile
import subprocess
import datetime
import gbHelpers
def metaCheck(ws):
#Load ISOs for later checks
with open(ws["working"] + "/geoBoundaryBot/dta/iso_3166_1_alpha_3.csv") as isoCsv:
lines = isoCsv.readlines()
validISO = []
for line in lines:
data = line.split(',')
validISO.append(data[2])
print(validISO)
#Load licenses for later checks
with open(ws["working"] + "/geoBoundaryBot/dta/gbLicenses.csv") as lCsv:
lines = lCsv.readlines()
validLicense = []
validOpenLicense = []
validAuthLicense = []
validHumLicense = []
for line in lines:
data = line.split(',')
validLicense.append(data[0].lower().strip())
if(str(data[2]).strip() == "Yes"):
validOpenLicense.append(data[0].lower().strip())
if(str(data[3]).strip() == "Yes"):
validAuthLicense.append(data[0].lower().strip())
if(str(data[4]).strip() == "Yes"):
validHumLicense.append(data[0].lower().strip())
if(len(ws["zips"]) > 0):
gbHelpers.logWrite(ws["checkType"], "Modified zip files found. Checking meta.txt validity.")
gbHelpers.logWrite(ws["checkType"], "")
ws["zipTotal"] = ws["zipTotal"] + 1
for z in ws["zips"]:
gbHelpers.checkRetrieveLFSFiles(z, ws['working'])
req = {}
opt = {}
req["year"] = 0
req["iso"] = 0
req["bType"] = 0
req["source"] = 0
req["releaseType"] = 0
req["releaseTypeName"] = ""
req["license"] = 0
req["licenseName"] = ""
req["licenseSource"] = 0
req["dataSource"] = 0
req["releaseTypeFolder"] = 0
opt["canonical"] = 0
opt["licenseImage"] = 0
opt["licenseNotes"] = 0
opt["otherNotes"] = 0
checkFail = 0
gbHelpers.logWrite(ws["checkType"], "Metadata Check: " + z)
try:
bZip = zipfile.ZipFile(ws["working"] + "/" + z)
except:
print("A zipfile didn't open. " + str(z))
return [opt, req, 0]
if("meta.txt" in bZip.namelist()):
gbHelpers.logWrite(ws["checkType"], "")
gbHelpers.logWrite(ws["checkType"], "============================")
gbHelpers.logWrite(ws["checkType"], "Metadata file exists in " + z)
with zipfile.ZipFile(ws["working"] + "/" + z) as zF:
meta = zF.read('meta.txt')
for m in meta.splitlines():
try:
gbHelpers.logWrite(ws["checkType"], "")
e = m.decode("utf-8").split(":")
if(len(e) > 2):
e[1] = e[1] + e[2]
key = e[0].strip()
val = e[1].strip()
except:
checkFail = 1
gbHelpers.logWrite(ws["checkType"], "WARN: At least one line of the meta.txt failed to be read correctly: " + str(m))
key = "readError"
val = "readError"
gbHelpers.logWrite(ws["checkType"], "Detected Key / Value: " + key + " / " + val)
if(("Year" in key) or "year" in key):
try:
if "to" in val:
date1, date2 = val.split(" to ")
date1 = datetime.datetime.strptime(date1, "%d-%m-%Y")
date2 = datetime.datetime.strptime(date2, "%d-%m-%Y")
gbHelpers.logWrite(ws["checkType"], "Valid date range " + str(val) + " detected.")
req["year"] = 1
else:
year = int(float(val))
if( (year > 1950) and (year <= datetime.datetime.now().year)):
gbHelpers.logWrite(ws["checkType"], "Valid year " + str(year) + " detected.")
req["year"] = 1
else:
gbHelpers.logWrite(ws["checkType"], "CRITICAL ERROR: The year in the meta.txt file is invalid: " + str(year))
gbHelpers.logWrite(ws["checkType"], "We expect a value between 1950 and " + str(datetime.datetime.now().year))
checkFail = 1
except:
gbHelpers.logWrite(ws["checkType"], "CRITICAL ERROR: The year in the meta.txt file is invalid.")
checkFail = 1
if("boundary type" in key.lower() and "name" not in key.lower()):
#May add other valid types in the future, but for now ADMs only.
validTypes = ["ADM0", "ADM1", "ADM2", "ADM3", "ADM4", "ADM5"]
if(val.upper().replace(" ","") in validTypes):
gbHelpers.logWrite(ws["checkType"], "Valid Boundary Type detected: " + val +".")
req["bType"] = 1
else:
gbHelpers.logWrite(ws["checkType"], "CRITICAL ERROR: The boundary type in the meta.txt file is invalid: " + val)
gbHelpers.logWrite(ws["checkType"], "We expect one of: " + str(validTypes))
checkFail = 1
if("iso" in key.lower().strip()):
if(len(val) != 3):
gbHelpers.logWrite(ws["checkType"], "CRITICAL ERROR: ISO is invalid - we expect a 3-character ISO code following ISO-3166-1 (Alpha 3).")
checkFail = 1
if(val not in validISO):
gbHelpers.logWrite(ws["checkType"], "CRITICAL ERROR: ISO is not on our list of valid ISO-3 codes. See https://github.com/wmgeolab/geoBoundaryBot/blob/main/dta/iso_3166_1_alpha_3.csv for all valid codes this script checks against.")
checkFail = 1
else:
gbHelpers.logWrite(ws["checkType"], "Valid ISO detected: " + val)
req["iso"] = 1
if("canonical" in key.lower()):
if(len(val.replace(" ","")) > 0):
if(val.lower() not in ["na", "nan", "null"]):
gbHelpers.logWrite(ws["checkType"], "Canonical name detected: " + val)
opt["canonical"] = 1
else:
gbHelpers.logWrite(ws["checkType"], "WARN: No canonical name detected. This field is optional.")
if("source" in key.lower() and "license" not in key.lower() and "data" not in key.lower()):
if(len(val.replace(" ","")) > 0):
if(val.lower() not in ["na", "nan", "null"]):
gbHelpers.logWrite(ws["checkType"], "Source detected: " + val)
req["source"] = 1
if("release type" in key.lower()):
if (val.lower() not in ["gbopen", "gbauthoritative", "gbhumanitarian"]):
gbHelpers.logWrite(ws["checkType"], "Invalid release type detected: " + val)
gbHelpers.logWrite(ws["checkType"], "We expect one of three values: gbOpen, gbAuthoritative, and gbHumanitarian")
checkFail = 1
else:
if(val.lower() not in z.lower()):
req["releaseTypeName"] = val.lower().strip()
req["releaseType"] = 1
req["releaseTypeFolder"] = 0
gbHelpers.logWrite(ws["checkType"], "CRITICAL ERROR: The zip file is in the incorrect subdirectory - according to meta.txt you are submitting a " + val + " boundary, but have the zip file in the folder " + z + ".")
checkFail = 1
else:
req["releaseType"] = 1
req["releaseTypeName"] = val.lower().strip()
req["releaseTypeFolder"] = 1
if("license" == key.lower()):
if(('"' + val.lower().strip() + '"') not in validLicense):
gbHelpers.logWrite(ws["checkType"], "CRITICAL ERROR: Invalid license detected: " + val)
gbHelpers.logWrite(ws["checkType"], "We expect one of the licenses in https://github.com/wmgeolab/geoBoundaryBot/blob/main/dta/gbLicenses.csv. It must exactly match one of these; we do no fuzzy matching to ensure accuracy. If you believe your license should be included, please open a ticket.")
checkFail = 1
else:
req["license"] = 1
req["licenseName"] = val.lower().strip()
gbHelpers.logWrite(ws["checkType"], "Valid license type detected: " + val)
if("license notes" in key.lower()):
if(len(val.replace(" ","")) > 0):
if(val.lower() not in ["na", "nan", "null"]):
gbHelpers.logWrite(ws["checkType"], "License notes detected: " + val)
opt["licenseNotes"] = 1
else:
gbHelpers.logWrite(ws["checkType"], "WARN: No license notes detected. This field is optional.")
if("license source" in key.lower()):
if(len(val.replace(" ","")) > 0):
if(val.lower() not in ["na", "nan", "null"]):
gbHelpers.logWrite(ws["checkType"], "License source detected: " + val)
req["licenseSource"] = 1
#Check for a png image of the license source.
#Any png or jpg with the name "license" is accepted.
licPic = 0
try:
with zipfile.ZipFile(ws["working"] + "/" + z) as zFb:
licPic = zFb.read('license.png')
except:
pass
try:
with zipfile.ZipFile(ws["working"] + "/" + z) as zFb:
licPic = zFb.read('license.jpg')
except:
pass
if(licPic != 0):
gbHelpers.logWrite(ws["checkType"], "License image found.")
opt["licenseImage"] = 1
else:
gbHelpers.logWrite(ws["checkType"], "WARN: No license image found. This is not required. We check for license.png and license.jpg.")
else:
gbHelpers.logWrite(ws["checkType"], "CRITICAL ERROR: No license source detected.")
checkFail = 1
else:
gbHelpers.logWrite(ws["checkType"], "CRITICAL ERROR: No license source detected.")
checkFail = 1
if("link to source data" in key.lower()):
if(len(val.replace(" ","")) > 0):
if(val.lower() not in ["na", "nan", "null"]):
req["dataSource"] = 1
gbHelpers.logWrite(ws["checkType"], "Data Source Found: " + val)
else:
gbHelpers.logWrite(ws["checkType"], "CRITICAL ERROR: No license source detected.")
checkFail = 1
else:
gbHelpers.logWrite(ws["checkType"], "CRITICAL ERROR: No license source detected.")
checkFail = 1
if("other notes" in key.lower()):
if(len(val.replace(" ","")) > 0):
if(val.lower() not in ["na", "nan", "null"]):
gbHelpers.logWrite(ws["checkType"], "Other notes detected: " + val)
opt["otherNotes"] = 1
else:
gbHelpers.logWrite(ws["checkType"], "WARN: No other notes detected. This field is optional.")
if((req["license"] == 1) and (req["releaseType"] == 1)):
gbHelpers.logWrite(ws["checkType"], "")
gbHelpers.logWrite(ws["checkType"], "Both a license and release type are defined. Checking for compatability.")
if(req["releaseTypeName"] == "gbopen"):
if(('"' + req["licenseName"] + '"') in validOpenLicense):
gbHelpers.logWrite(ws["checkType"], "License type is valid license for the gbOpen product.")
else:
gbHelpers.logWrite(ws["checkType"], "CRITICAL ERROR: The license you have specified is not valid for the gbOpen product.")
checkFail = 1
if(req["releaseTypeName"] == "gbauthoritative"):
if(('"' + req["licenseName"] + '"') in validAuthLicense):
gbHelpers.logWrite(ws["checkType"], "License type is a valid license for the gbAuthoritative product.")
else:
gbHelpers.logWrite(ws["checkType"], "CRITICAL ERROR: The license you have specified is not valid for the gbAuthoritative product.")
checkFail = 1
if(req["releaseTypeName"] == "gbhumanitarian"):
if(('"' + req["licenseName"] + '"') in validHumLicense):
gbHelpers.logWrite(ws["checkType"], "License type is a valid license for the gbHumanitarian product.")
else:
gbHelpers.logWrite(ws["checkType"], "CRITICAL ERROR: The license you have specified is not valid for the gbHumanitarian product.")
checkFail = 1
if(req["source"] == 0):
gbHelpers.logWrite(ws["checkType"], "CRITICAL ERROR: No data source was provided in the metadata.")
checkFail = 1
gbHelpers.logWrite(ws["checkType"], "")
gbHelpers.logWrite(ws["checkType"], "Metadata checks complete for " + z)
gbHelpers.logWrite(ws["checkType"], "")
gbHelpers.logWrite(ws["checkType"], "----------------------------")
gbHelpers.logWrite(ws["checkType"], " OPTIONAL TESTS ")
gbHelpers.logWrite(ws["checkType"], "----------------------------")
for i in opt:
if(opt[i] == 1 or len(str(opt[i]))>1):
gbHelpers.logWrite(ws["checkType"], '%-20s%-12s' % (i, "PASSED"))
else:
gbHelpers.logWrite(ws["checkType"], '%-20s%-12s' % (i, "FAILED"))
gbHelpers.logWrite(ws["checkType"], "")
gbHelpers.logWrite(ws["checkType"], "----------------------------")
gbHelpers.logWrite(ws["checkType"], " REQUIRED TESTS ")
gbHelpers.logWrite(ws["checkType"], "----------------------------")
for i in req:
if(req[i] == 1 or len(str(req[i]))>1):
gbHelpers.logWrite(ws["checkType"], '%-20s%-12s' % (i, "PASSED"))
else:
gbHelpers.logWrite(ws["checkType"], '%-20s%-12s' % (i, "FAILED"))
checkFail = 1
gbHelpers.logWrite(ws["checkType"], "==========================")
else:
gbHelpers.logWrite(ws["checkType"], "CRITICAL ERROR: Metadata file does not exist in " + z)
gbHelpers.gbEnvVars("RESULT", "CRITICAL ERROR: Metadata file does not exist in " + z, "w")
checkFail = 1
if(checkFail == 1):
ws["zipFailures"] = ws["zipFailures"] + 1
else:
ws["zipSuccess"] = ws["zipSuccess"] + 1
gbHelpers.logWrite(ws["checkType"], "Metadata checks passed for " + z)
gbHelpers.logWrite(ws["checkType"], "")
gbHelpers.logWrite(ws["checkType"], "====================")
gbHelpers.logWrite(ws["checkType"], "All metadata checks complete.")
gbHelpers.logWrite(ws["checkType"], "Successes: " + str(ws["zipSuccess"]))
gbHelpers.logWrite(ws["checkType"], "Failures: " + str(ws["zipFailures"]))
if(ws["zipFailures"] > 0):
gbHelpers.logWrite(ws["checkType"], "CRITICAL ERROR: At least one Metadata check failed; check the log to see what's wrong.")
gbHelpers.gbEnvVars("RESULT", "It looks like your metadata has one or more errors - take a look at the logs to see what you need to fix.", "w")
else:
gbHelpers.gbEnvVars("RESULT", "PASSED", "w")
#Return of the last element for overall build
return [opt, req, ws["zipSuccess"]]
else:
gbHelpers.logWrite(ws["checkType"], "CRITICAL ERROR: No modified zip files found.")
gbHelpers.gbEnvVars("RESULT", "You didn't submit a zip file.", "w")
if __name__ == "__main__":
ws = gbHelpers.initiateWorkspace("metaChecks")
metaCheck(ws)