16
16
import argparse
17
17
import csv
18
18
import logging
19
- import json
20
19
import os
21
20
import pathlib
22
21
import re
23
22
import sys
24
23
25
24
import pandas
26
25
26
+ from utils import utilities as utils
27
+
27
28
28
29
def parse_args ():
29
30
"""
@@ -63,6 +64,26 @@ def string_compare(str1, str2):
63
64
return str1 == str2
64
65
65
66
67
+ #
68
+ def access_rights_compare (str1 , str2 ):
69
+ """
70
+ Compare access rights given the following mapping
71
+ """
72
+ access_rights = {
73
+ "http://terms.library.ualberta.ca/public" : "open.access" ,
74
+ "http://terms.library.ualberta.ca/embargo" : "metadata.only" ,
75
+ "http://terms.library.ualberta.ca/authenticated" : "restricted" ,
76
+ }
77
+
78
+ logging .debug ("%s ---- %s" , str1 , str2 )
79
+
80
+ return (
81
+ str2 == access_rights [str1 ]
82
+ if isinstance (str1 , str ) and str1 in access_rights
83
+ else False
84
+ )
85
+
86
+
66
87
# Scholaris removes trailing linebreaks
67
88
def string_compare_ignore_whitespace (str1 , str2 ):
68
89
"""
@@ -94,16 +115,48 @@ def member_of_list_compare(list1, list2):
94
115
return list1 == list2
95
116
96
117
118
+ #
119
+ def value_in_string_list_compare (str1 , list2 ):
120
+ """
121
+ compare a simple string to the contents of a list represented as a string
122
+ """
123
+ logging .debug ("%s ---- %s" , str1 , list2 )
124
+ list2 = utils .convert_string_list_representation_to_list (list2 )
125
+ logging .debug ("%s ---- %s" , str1 , list2 )
126
+
127
+ return True if not str1 and not list2 else str1 in list2
128
+
129
+
130
+ #
131
+ def string_lists_compare (list1 , list2 ):
132
+ """
133
+ compare a the contents of two lists represented as a strings
134
+ """
135
+ logging .debug ("%s ---- %s" , list1 , list2 )
136
+ list1 = utils .convert_string_list_representation_to_list (list1 )
137
+ list2 = utils .convert_string_list_representation_to_list (list2 )
138
+ logging .debug ("%s ---- %s" , list1 , list2 )
139
+ return list1 == list2
140
+
141
+
97
142
#
98
143
def collection_parent_compare (list1 , list2 ):
99
144
"""
100
145
Compare two lists
101
146
"""
102
- logging .debug ("member_of_list_compare: %s ---- %s" , list1 , list2 )
147
+ logging .debug ("%s ---- %s" , list1 , list2 )
148
+
149
+ # list 1 is nan if item not in Jupiter
150
+ list1 = "[]" if isinstance (list1 , str ) is False else list1
151
+
103
152
list1_collection_ids = list (
104
- path .split ("/" )[1 ] for path in json .loads (list1 ) if path
153
+ path .split ("/" )[1 ]
154
+ for path in utils .convert_string_to_json (list1 )
155
+ if path and isinstance (list1 , str )
156
+ )
157
+ return list1_collection_ids == utils .convert_string_list_representation_to_list (
158
+ list2
105
159
)
106
- return list1_collection_ids == json .loads (list2 )
107
160
108
161
109
162
#
@@ -128,17 +181,19 @@ def language_compare(list1, list2):
128
181
}
129
182
logging .debug ("member_of_list_compare: %s ---- %s" , list1 , list2 )
130
183
conversion_result = list (
131
- easy_language_mapping [language ] for language in json .loads (list1 ) if language
184
+ easy_language_mapping [language ]
185
+ for language in utils .convert_string_list_representation_to_list (list1 )
186
+ if language
132
187
)
133
- return conversion_result == json . loads (list2 )
188
+ return conversion_result == utils . convert_string_list_representation_to_list (list2 )
134
189
135
190
136
191
#
137
192
def special_type_compare (row , key , value ):
138
193
"""
139
194
Special type comparision
140
195
"""
141
- logging .debug ("special_type_compare: [%s] %s ---- %s " , key , value , row )
196
+ logging .debug ("special_type_compare: [%s] %s" , key , value )
142
197
143
198
# Adapted from the original migration
144
199
# https://gist.github.com/lagoan/839cf8ce997fa17b529d84776b91cdac
@@ -162,12 +217,26 @@ def special_type_compare(row, key, value):
162
217
"http://terms.library.ualberta.ca/learningObject" : "http://purl.org/coar/resource_type/c_e059" ,
163
218
}
164
219
165
- list1 = [row [value ["columns" ]["jupiter" ][0 ]]] + json .loads (
166
- row [value ["columns" ]["jupiter" ][1 ]]
220
+ # nan float if jupiter item not found
221
+ list1 = (
222
+ [row [value ["columns" ]["jupiter" ][0 ]]]
223
+ if isinstance (row [value ["columns" ]["jupiter" ][0 ]], str )
224
+ else []
167
225
)
168
- # str1 = " ".join(easy_item_type_mapping[type] for type in list1 if type)
226
+ if (
227
+ isinstance (row [value ["columns" ]["jupiter" ][1 ]], str )
228
+ and row [value ["columns" ]["jupiter" ][1 ]]
229
+ ):
230
+ list1 = list1 + utils .convert_string_list_representation_to_list (
231
+ row [value ["columns" ]["jupiter" ][1 ]]
232
+ )
233
+
234
+ logging .debug ("special_type_compare: %s" , list1 )
235
+
169
236
list1 = list (easy_item_type_mapping [type ] for type in list1 if type )
170
- list2 = json .loads (row [value ["columns" ]["dspace" ]])
237
+ list2 = utils .convert_string_list_representation_to_list (
238
+ row [value ["columns" ]["dspace" ]]
239
+ )
171
240
172
241
logging .debug ("special_type_compare: %s ---- %s" , list1 , list2 )
173
242
@@ -341,7 +410,7 @@ def special_type_compare(row, key, value):
341
410
"description" : {
342
411
"columns" : {
343
412
"jupiter" : "description" ,
344
- "dspace" : "metadata.dc.description.0.value " ,
413
+ "dspace" : "metadata.dc.description" ,
345
414
},
346
415
"comparison_function" : string_compare_ignore_whitespace ,
347
416
},
@@ -353,22 +422,22 @@ def special_type_compare(row, key, value):
353
422
"comparison_function" : collection_parent_compare ,
354
423
},
355
424
"dc.title" : {
356
- "columns" : {"jupiter" : "title" , "dspace" : "metadata.dc.title.0.value " },
425
+ "columns" : {"jupiter" : "title" , "dspace" : "metadata.dc.title" },
357
426
"comparison_function" : string_compare ,
358
427
},
359
428
"dc.contributor.author" : {
360
429
"columns" : {
361
430
"jupiter" : "creators" "" ,
362
431
"dspace" : "metadata.dc.contributor.author" ,
363
432
},
364
- "comparison_function" : member_of_list_compare ,
433
+ "comparison_function" : string_lists_compare ,
365
434
},
366
435
"dc.contributor.other" : {
367
436
"columns" : {
368
437
"jupiter" : "contributors" "" ,
369
438
"dspace" : "metadata.dc.contributor.other" ,
370
439
},
371
- "comparison_function" : member_of_list_compare ,
440
+ "comparison_function" : string_lists_compare ,
372
441
},
373
442
"dc.type" : {
374
443
"columns" : {
@@ -383,19 +452,19 @@ def special_type_compare(row, key, value):
383
452
},
384
453
"dc.subject" : {
385
454
"columns" : {"jupiter" : "subject" , "dspace" : "metadata.dc.subject" },
386
- "comparison_function" : member_of_list_compare ,
455
+ "comparison_function" : string_lists_compare ,
387
456
},
388
457
"dc.date.issued" : {
389
458
"columns" : {"jupiter" : "created" , "dspace" : "metadata.dc.date.issued" },
390
- "comparison_function" : string_compare ,
459
+ "comparison_function" : value_in_string_list_compare ,
391
460
},
392
461
"dc.rights" : {
393
462
"columns" : {"jupiter" : "rights" , "dspace" : "metadata.dc.rights" },
394
- "comparison_function" : member_of_list_compare ,
463
+ "comparison_function" : value_in_string_list_compare ,
395
464
},
396
465
"dc.rights.license" : {
397
466
"columns" : {"jupiter" : "license" , "dspace" : "metadata.dc.rights.license" },
398
- "comparison_function" : member_of_list_compare ,
467
+ "comparison_function" : value_in_string_list_compare ,
399
468
},
400
469
# "dissertant": {
401
470
# "columns": {"jupiter": "", "dspace": "metadata.dissertant"},
@@ -409,6 +478,10 @@ def special_type_compare(row, key, value):
409
478
# "columns": {"jupiter": "", "dspace": "metadata.graduation_date"},
410
479
# "comparison_function": string_compare,
411
480
# },
481
+ "access_rights" : {
482
+ "columns" : {"jupiter" : "visibility" , "dspace" : "access_rights" },
483
+ "comparison_function" : access_rights_compare ,
484
+ },
412
485
},
413
486
}
414
487
@@ -424,12 +497,22 @@ def process_row(row, columns_to_compare):
424
497
dspace_column = f"{ value ['columns' ]['dspace' ]} "
425
498
comparison_function = value ["comparison_function" ]
426
499
500
+ logging .debug (
501
+ "comparison [%s]: jupiter_column [%s] --- dspace_column [%s]" ,
502
+ comparison_function .__name__ ,
503
+ jupiter_column ,
504
+ dspace_column ,
505
+ )
506
+
427
507
if key == "dc.type" :
428
508
comparison_output [key ] = comparison_function (row , key , value )
429
509
elif comparison_function (row [jupiter_column ], row [dspace_column ]):
430
510
comparison_output [key ] = "PASS"
431
511
else :
432
512
comparison_output [key ] = "FAIL"
513
+
514
+ logging .debug ("key: [%s] status:[%s]" , key , comparison_output [key ])
515
+
433
516
return comparison_output
434
517
435
518
@@ -478,6 +561,9 @@ def process_input(
478
561
479
562
# Iterate over the rows in the aligned dataframe and compare the columns
480
563
for index , row in aligned_df .iterrows ():
564
+
565
+ logging .debug ("ID [%s]" , index )
566
+
481
567
comparison_output = {
482
568
"index (empty if no ERA obj)" : index ,
483
569
"label" : row [comparison_config ["label_column" ]],
@@ -509,6 +595,7 @@ def process_input(
509
595
comparison_output .update (
510
596
process_row (row , comparison_config ["comparison_types" ])
511
597
)
598
+ logging .debug ("output: [%s]" , comparison_output )
512
599
writer .writerow (comparison_output )
513
600
514
601
0 commit comments