@@ -234,6 +234,128 @@ def language_compare(list1, list2):
234
234
return conversion_result == utils .convert_string_list_representation_to_list (list2 )
235
235
236
236
237
+ #
238
+ def item_or_thesis_jupiter_strings_to_single_dspace (row , key , value ):
239
+ """
240
+ Special dc.issue_data comparison: jupiter item and thesis have different fields
241
+ that migrated into the dc.date.issued
242
+ """
243
+ logging .debug (": [%s] %s" , key , value )
244
+
245
+ col_1 = row [value ["columns" ]["jupiter" ][0 ]]
246
+ col_2 = row [value ["columns" ]["jupiter" ][1 ]]
247
+ list_dspace = utils .convert_string_list_representation_to_list (
248
+ row [value ["columns" ]["dspace" ]]
249
+ )
250
+
251
+ logging .debug (
252
+ ": %s[%s] %s[%s] %s[%s]" ,
253
+ value ["columns" ]["jupiter" ][0 ],
254
+ col_1 ,
255
+ value ["columns" ]["jupiter" ][1 ],
256
+ col_2 ,
257
+ value ["columns" ]["dspace" ],
258
+ list_dspace ,
259
+ )
260
+
261
+ ret = "FAIL"
262
+ if not col_1 and not col_2 and not list_dspace :
263
+ ret = "PASS"
264
+ elif col_1 in list_dspace or col_2 in list_dspace :
265
+ ret = "PASS"
266
+ return ret
267
+
268
+
269
+ #
270
+ def item_or_thesis_jupiter_list_and_string_to_single_dspace (row , key , value ):
271
+ """
272
+ Special comparison: jupiter item and thesis have different fields, one list and one string
273
+ that migrated into the Scholaris field
274
+ """
275
+ logging .debug (": [%s] %s" , key , value )
276
+
277
+ logging .debug (
278
+ ": %s[%s] %s[%s] %s[%s]" ,
279
+ value ["columns" ]["jupiter" ][0 ],
280
+ row [value ["columns" ]["jupiter" ][0 ]],
281
+ value ["columns" ]["jupiter" ][1 ],
282
+ row [value ["columns" ]["jupiter" ][1 ]],
283
+ value ["columns" ]["dspace" ],
284
+ row [value ["columns" ]["dspace" ]],
285
+ )
286
+
287
+ list_1 = utils .convert_string_list_representation_to_list (
288
+ row [value ["columns" ]["jupiter" ][0 ]]
289
+ )
290
+ str_1 = row [value ["columns" ]["jupiter" ][1 ]]
291
+ list_dspace = utils .convert_string_list_representation_to_list (
292
+ row [value ["columns" ]["dspace" ]]
293
+ )
294
+
295
+ logging .debug (
296
+ ": %s[%s] %s[%s] %s[%s]" ,
297
+ value ["columns" ]["jupiter" ][0 ],
298
+ list_1 ,
299
+ value ["columns" ]["jupiter" ][1 ],
300
+ str_1 ,
301
+ value ["columns" ]["dspace" ],
302
+ list_dspace ,
303
+ )
304
+
305
+ ret = "FAIL"
306
+ if not list_1 and not str_1 and not list_dspace :
307
+ ret = "PASS"
308
+ elif list_1 == list_dspace or str_1 in list_dspace :
309
+ ret = "PASS"
310
+ return ret
311
+
312
+
313
+ #
314
+ def item_or_thesis_jupiter_lists_to_single_dspace (row , key , value ):
315
+ """
316
+ Special comparison: jupiter item and thesis have different fields
317
+ that migrated into the field
318
+ """
319
+ logging .debug (": [%s] %s" , key , value )
320
+
321
+ logging .debug (
322
+ ": %s[%s] %s[%s] %s[%s]" ,
323
+ value ["columns" ]["jupiter" ][0 ],
324
+ row [value ["columns" ]["jupiter" ][0 ]],
325
+ value ["columns" ]["jupiter" ][1 ],
326
+ row [value ["columns" ]["jupiter" ][1 ]],
327
+ value ["columns" ]["dspace" ],
328
+ row [value ["columns" ]["dspace" ]],
329
+ )
330
+
331
+ list_1 = utils .convert_string_list_representation_to_list (
332
+ row [value ["columns" ]["jupiter" ][0 ]]
333
+ )
334
+ list_2 = utils .convert_string_list_representation_to_list (
335
+ row [value ["columns" ]["jupiter" ][1 ]]
336
+ )
337
+ list_dspace = utils .convert_string_list_representation_to_list (
338
+ row [value ["columns" ]["dspace" ]]
339
+ )
340
+
341
+ logging .debug (
342
+ ": %s[%s] %s[%s] %s[%s]" ,
343
+ value ["columns" ]["jupiter" ][0 ],
344
+ list_1 ,
345
+ value ["columns" ]["jupiter" ][1 ],
346
+ list_2 ,
347
+ value ["columns" ]["dspace" ],
348
+ list_dspace ,
349
+ )
350
+
351
+ ret = "FAIL"
352
+ if not list_1 and not list_2 and not list_dspace :
353
+ ret = "PASS"
354
+ elif list_dspace in (list_1 , list_2 ):
355
+ ret = "PASS"
356
+ return ret
357
+
358
+
237
359
#
238
360
def special_type_compare (row , key , value ):
239
361
"""
@@ -286,7 +408,12 @@ def special_type_compare(row, key, value):
286
408
287
409
logging .debug ("special_type_compare: %s ---- %s" , list1 , list2 )
288
410
289
- return "PASS" if list1 == list2 else "FAIL"
411
+ ret = "FAIL"
412
+ if list1 == list2 :
413
+ ret = "PASS"
414
+ elif not list1 and list2 == ["http://purl.org/coar/resource_type/c_46ec" ]:
415
+ ret = "STATIC VALUE ADDED (thesis?)"
416
+ return ret
290
417
291
418
292
419
# Define the columns to compare and how to compare them
@@ -311,18 +438,18 @@ def special_type_compare(row, key, value):
311
438
"jupiter" : "description" ,
312
439
"dspace" : "metadata.dc.description" ,
313
440
},
314
- "comparison_function" : string_compare_ignore_whitespace ,
441
+ "comparison_function" : string_in_list_compare_ignore_whitespace ,
315
442
},
316
443
"abstract" : {
317
444
"columns" : {
318
445
"jupiter" : "description" ,
319
446
"dspace" : "metadata.dc.description.abstract" ,
320
447
},
321
- "comparison_function" : string_compare_ignore_whitespace ,
448
+ "comparison_function" : string_in_list_compare_ignore_whitespace ,
322
449
},
323
450
"dc.title" : {
324
451
"columns" : {"jupiter" : "title" , "dspace" : "metadata.dc.title" },
325
- "comparison_function" : string_compare ,
452
+ "comparison_function" : value_in_string_list_compare ,
326
453
},
327
454
},
328
455
}
@@ -352,18 +479,18 @@ def special_type_compare(row, key, value):
352
479
"jupiter" : "description" ,
353
480
"dspace" : "metadata.dc.description" ,
354
481
},
355
- "comparison_function" : string_compare_ignore_whitespace ,
482
+ "comparison_function" : string_in_list_compare_ignore_whitespace ,
356
483
},
357
484
"abstract" : {
358
485
"columns" : {
359
486
"jupiter" : "description" ,
360
487
"dspace" : "metadata.dc.description.abstract" ,
361
488
},
362
- "comparison_function" : string_compare_ignore_whitespace ,
489
+ "comparison_function" : string_in_list_compare_ignore_whitespace ,
363
490
},
364
491
"dc.title" : {
365
492
"columns" : {"jupiter" : "title" , "dspace" : "metadata.dc.title" },
366
- "comparison_function" : string_compare ,
493
+ "comparison_function" : value_in_string_list_compare ,
367
494
},
368
495
"collection_parent_expect_to_fail_due_to_lack_of_community_provenance" : {
369
496
"columns" : {
@@ -471,19 +598,19 @@ def special_type_compare(row, key, value):
471
598
"columns" : {"jupiter" : "title" , "dspace" : "metadata.dc.title" },
472
599
"comparison_function" : value_in_string_list_compare ,
473
600
},
474
- "dc.contributor" : {
601
+ "dc.contributor.other " : {
475
602
"columns" : {
476
- "jupiter" : "contributors" ,
603
+ "jupiter" : [ "contributors" , "committee_members" ] ,
477
604
"dspace" : "metadata.dc.contributor.other" ,
478
605
},
479
- "comparison_function" : string_lists_compare ,
606
+ "comparison_function" : item_or_thesis_jupiter_lists_to_single_dspace ,
480
607
},
481
- "dc.creator " : {
608
+ "dc.contributor.author " : {
482
609
"columns" : {
483
- "jupiter" : "creators" ,
610
+ "jupiter" : [ "creators" , "dissertant" ] ,
484
611
"dspace" : "metadata.dc.contributor.author" ,
485
612
},
486
- "comparison_function" : string_lists_compare ,
613
+ "comparison_function" : item_or_thesis_jupiter_list_and_string_to_single_dspace ,
487
614
},
488
615
"dc.type" : {
489
616
"columns" : {
@@ -501,8 +628,11 @@ def special_type_compare(row, key, value):
501
628
"comparison_function" : string_lists_compare ,
502
629
},
503
630
"dc.date.issued" : {
504
- "columns" : {"jupiter" : "created" , "dspace" : "metadata.dc.date.issued" },
505
- "comparison_function" : value_in_string_list_compare ,
631
+ "columns" : {
632
+ "jupiter" : ["created" , "graduation_date" ],
633
+ "dspace" : "metadata.dc.date.issued" ,
634
+ },
635
+ "comparison_function" : item_or_thesis_jupiter_strings_to_single_dspace ,
506
636
},
507
637
"dc.rights" : {
508
638
"columns" : {"jupiter" : "rights" , "dspace" : "metadata.dc.rights" },
@@ -527,27 +657,27 @@ def special_type_compare(row, key, value):
527
657
# "columns": {"jupiter": "", "dspace": "metadata.thesis.degree.discipline"},
528
658
# "comparison_function": value_in_string_list_compare,
529
659
# },
530
- "if_thesis_dissertant" : {
531
- "columns" : {
532
- "jupiter" : "dissertant" ,
533
- "dspace" : "metadata.dc.contributor.author" ,
534
- },
535
- "comparison_function" : value_in_string_list_compare ,
536
- },
660
+ # "if_thesis_dissertant": {
661
+ # "columns": {
662
+ # "jupiter": "dissertant",
663
+ # "dspace": "metadata.dc.contributor.author",
664
+ # },
665
+ # "comparison_function": value_in_string_list_compare,
666
+ # },
537
667
"if_thesis_supervisor" : {
538
668
"columns" : {
539
669
"jupiter" : "supervisors" ,
540
670
"dspace" : "metadata.dc.contributor.advisor" ,
541
671
},
542
672
"comparison_function" : string_lists_compare ,
543
673
},
544
- "if_thesis_committee_members" : {
545
- "columns" : {
546
- "jupiter" : "committee_members" ,
547
- "dspace" : "metadata.dc.contributor.other" ,
548
- },
549
- "comparison_function" : string_lists_compare ,
550
- },
674
+ # "if_thesis_committee_members": {
675
+ # "columns": {
676
+ # "jupiter": "committee_members",
677
+ # "dspace": "metadata.dc.contributor.other",
678
+ # },
679
+ # "comparison_function": string_lists_compare,
680
+ # },
551
681
"if_thesis_degree.grantor" : {
552
682
"columns" : {
553
683
"jupiter" : "institution" ,
@@ -575,7 +705,7 @@ def special_type_compare(row, key, value):
575
705
},
576
706
"if_thesis_ual.department" : {
577
707
"columns" : {"jupiter" : "departments" , "dspace" : "metadata.ual.department" },
578
- "comparison_function" : value_in_string_list_compare ,
708
+ "comparison_function" : string_lists_compare ,
579
709
},
580
710
},
581
711
}
@@ -599,7 +729,13 @@ def process_row(row, columns_to_compare):
599
729
dspace_column ,
600
730
)
601
731
602
- if key == "dc.type" :
732
+ if comparison_function .__name__ in [
733
+ "special_type_compare" ,
734
+ "item_or_thesis_jupiter_strings_to_single_dspace" ,
735
+ "item_or_thesis_jupiter_list_and_string_to_single_dspace" ,
736
+ "item_or_thesis_jupiter_lists_to_single_dspace" ,
737
+ ]:
738
+ # special comparison function
603
739
comparison_output [key ] = comparison_function (row , key , value )
604
740
elif comparison_function (row [jupiter_column ], row [dspace_column ]):
605
741
comparison_output [key ] = "PASS"
0 commit comments