Skip to content

Commit aa031f4

Browse files
committed
Adjust some Item comparison mappings & enhance tests.
1 parent a645beb commit aa031f4

File tree

4 files changed

+82
-15
lines changed

4 files changed

+82
-15
lines changed

src/compare_csv.py

+25-13
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ def string_compare(str1, str2):
6060
"""
6161
Compare two strings
6262
"""
63-
logging.debug("string_compare: %s ---- %s", str1, str2)
63+
logging.debug("%s ---- %s", str1, str2)
6464
return str1 == str2
6565

6666

@@ -98,7 +98,7 @@ def string_compare_ignore_whitespace(str1, str2):
9898
else:
9999
ret = regex.sub("", str(str1)) == regex.sub("", str(str2))
100100
logging.debug(
101-
"string_compare_ignore_whitespace: %s ---- %s %s",
101+
"%s ---- %s %s",
102102
str(str1),
103103
str(str2),
104104
str(ret),
@@ -111,7 +111,7 @@ def member_of_list_compare(list1, list2):
111111
"""
112112
Compare two lists
113113
"""
114-
logging.debug("member_of_list_compare: %s ---- %s", list1, list2)
114+
logging.debug("%s ---- %s", list1, list2)
115115
return list1 == list2
116116

117117

@@ -147,13 +147,18 @@ def collection_parent_compare(list1, list2):
147147
logging.debug("%s ---- %s", list1, list2)
148148

149149
# list 1 is nan if item not in Jupiter
150-
list1 = "[]" if isinstance(list1, str) is False else list1
150+
list1 = "['']" if isinstance(list1, str) is False else list1
151+
152+
logging.debug("%s ---- %s", list1, list2)
151153

152154
list1_collection_ids = list(
153155
path.split("/")[1]
154156
for path in utils.convert_string_to_json(list1)
155157
if path and isinstance(list1, str)
156158
)
159+
160+
logging.debug("%s ---- %s", list1_collection_ids, list2)
161+
157162
return list1_collection_ids == utils.convert_string_list_representation_to_list(
158163
list2
159164
)
@@ -179,7 +184,7 @@ def language_compare(list1, list2):
179184
"http://id.loc.gov/vocabulary/iso639-2/zxx": "No linguistic content",
180185
"http://terms.library.ualberta.ca/other": "other",
181186
}
182-
logging.debug("member_of_list_compare: %s ---- %s", list1, list2)
187+
logging.debug("%s ---- %s", list1, list2)
183188
conversion_result = list(
184189
easy_language_mapping[language]
185190
for language in utils.convert_string_list_representation_to_list(list1)
@@ -347,7 +352,7 @@ def special_type_compare(row, key, value):
347352
"dspace": ["provenance.ual.jupiterId.item", "bitstream.sequenceId"],
348353
},
349354
"label_column": "item.name",
350-
"identifier": {"jupiter": "provenance.ual.jupiterId.item", "dspace": "item.uuid"},
355+
"identifier": {"jupiter": "item.id", "dspace": "item.uuid"},
351356
"last_modified": {"jupiter": "created_at", "dspace": None},
352357
"comparison_types": {
353358
"name": {
@@ -371,7 +376,7 @@ def special_type_compare(row, key, value):
371376
"parent_item_id": {
372377
"columns": {
373378
"jupiter": "provenance.ual.jupiterId.item_jupiter",
374-
"dspace": "provenance.ual.jupiterId.item_jupiter",
379+
"dspace": "provenance.ual.jupiterId.item_dspace",
375380
},
376381
"comparison_function": string_compare,
377382
},
@@ -423,11 +428,11 @@ def special_type_compare(row, key, value):
423428
},
424429
"dc.title": {
425430
"columns": {"jupiter": "title", "dspace": "metadata.dc.title"},
426-
"comparison_function": string_compare,
431+
"comparison_function": value_in_string_list_compare,
427432
},
428433
"dc.contributor.author": {
429434
"columns": {
430-
"jupiter": "creators" "",
435+
"jupiter": "creators",
431436
"dspace": "metadata.dc.contributor.author",
432437
},
433438
"comparison_function": string_lists_compare,
@@ -439,6 +444,13 @@ def special_type_compare(row, key, value):
439444
},
440445
"comparison_function": string_lists_compare,
441446
},
447+
"dc.creator": {
448+
"columns": {
449+
"jupiter": "creators",
450+
"dspace": "metadata.dc.creator",
451+
},
452+
"comparison_function": string_lists_compare,
453+
},
442454
"dc.type": {
443455
"columns": {
444456
"jupiter": ["item_type", "publication_status"],
@@ -584,13 +596,13 @@ def process_input(
584596
]
585597
}
586598
)
587-
if comparison_config["identifier"]["dspace"] is not None:
599+
if comparison_config["identifier"]["jupiter"] is not None:
588600
comparison_output.update(
589-
{"dspace_id": row[comparison_config["identifier"]["dspace"]]}
601+
{"jupiter_id": row[comparison_config["identifier"]["jupiter"]]}
590602
)
591-
if comparison_config["identifier"]["jupiter"] is not None:
603+
if comparison_config["identifier"]["dspace"] is not None:
592604
comparison_output.update(
593-
{"jupiter_id": row[comparison_config["identifier"]["dspace"]]}
605+
{"dspace_id": row[comparison_config["identifier"]["dspace"]]}
594606
)
595607
comparison_output.update(
596608
process_row(row, comparison_config["comparison_types"])

src/tests/assets/dspace_item.csv

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
uuid,name,metadata.dc.description,metadata.dc.title,metadata.ual.jupiterId,metadata.ual.jupiterCollection,provenance.ual.jupiterId.collection,lastModified,metadata.dc.contributor.author,metadata.dc.contributor.other,metadata.dc.type,metadata.dc.language.iso,metadata.dc.subject,metadata.ual.itemType,metadata.ual.publicationStatus,metadata.dc.date.issued,metadata.dc.rights,metadata.dc.rights.license,access_rights
2-
1234,Test,Test description,Test,123456789.1,"[""collection_2""]",['collection_2'],9999-01-02T03:04:05Z,"[""cre 1"",""cre 2""]","[""con 1"",""con 2""]","[""http://purl.org/coar/resource_type/c_2f33"",""http://purl.org/coar/version/c_970fb48d4fbd8a85""]","[""en""]","[""sub 1""]",http://purl.org/ontology/bibo/Book,"[""http://purl.org/ontology/bibo/status#published""]",['9999-01-02'],['http://creativecommons.org/licenses/by-nc-nd/3.0/'],['asdf'],open.access
1+
uuid,name,metadata.dc.description,metadata.dc.title,metadata.ual.jupiterId,metadata.ual.jupiterCollection,provenance.ual.jupiterId.collection,lastModified,metadata.dc.contributor.author,metadata.dc.contributor.other,metadata.dc.creator,metadata.dc.type,metadata.dc.language.iso,metadata.dc.subject,metadata.ual.itemType,metadata.ual.publicationStatus,metadata.dc.date.issued,metadata.dc.rights,metadata.dc.rights.license,access_rights
2+
1234,Test,Test description,"['Test']",123456789.1,"[""collection_2""]",['collection_2'],9999-01-02T03:04:05Z,"[""cre 1"",""cre 2""]","[""con 1"",""con 2""]","[""cre 1"",""cre 2""]","[""http://purl.org/coar/resource_type/c_2f33"",""http://purl.org/coar/version/c_970fb48d4fbd8a85""]","[""en""]","[""sub 1""]",http://purl.org/ontology/bibo/Book,"[""http://purl.org/ontology/bibo/status#published""]",['9999-01-02'],['http://creativecommons.org/licenses/by-nc-nd/3.0/'],['asdf'],open.access

src/tests/test_integration.py

+46
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,52 @@ def test_member_of_list_compare():
4949
assert compare.member_of_list_compare(["a", "b", "c"], ["1"]) is False
5050

5151

52+
def test_value_in_string_list_compare():
53+
"""
54+
Given a single value string compare to string representation of a list
55+
"""
56+
assert compare.value_in_string_list_compare("a", "['a']") is True
57+
assert compare.value_in_string_list_compare("", "[]") is True
58+
59+
60+
def test_string_lists_compare():
61+
"""
62+
compare to string representations of lists
63+
"""
64+
assert compare.string_lists_compare("['a','b']", "['a','b']") is True
65+
assert compare.string_lists_compare("['a','b']", "['b','a']") is False
66+
assert compare.string_lists_compare("", "[]") is True
67+
68+
69+
def test_collection_parent_compare():
70+
"""
71+
Test the Jupiter member_of_path compared to the Scholaris collection id
72+
"""
73+
assert compare.collection_parent_compare('["a/b"]', "['b']") is True
74+
assert compare.collection_parent_compare('["a/b"]', "['c']") is False
75+
assert compare.collection_parent_compare("[]", "[]") is True
76+
77+
78+
def test_language_compare():
79+
"""
80+
Language tests
81+
"""
82+
assert (
83+
compare.language_compare(
84+
'["http://id.loc.gov/vocabulary/iso639-2/jpn", "http://id.loc.gov/vocabulary/iso639-2/fre"]',
85+
"['ja', 'fr']",
86+
)
87+
is True
88+
)
89+
assert (
90+
compare.language_compare(
91+
'["http://id.loc.gov/vocabulary/iso639-2/zxx", "http://id.loc.gov/vocabulary/iso639-2/ukr"]',
92+
"['No linguistic content', 'uk']",
93+
)
94+
is True
95+
)
96+
97+
5298
def test_input_process_community_valid(tmp_path):
5399
"""
54100
Test process_input for community with valid data

src/utils/utilities.py

+9
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,7 @@
281281
"ual.sortYear",
282282
"ual.stats.jupiterDownloads",
283283
"ual.stats.jupiterViews",
284+
"provenance.ual.jupiterId.item",
284285
# "provenance.ual.jupiterId.collection",
285286
]
286287

@@ -301,6 +302,14 @@ def deconstruct_list_of_dicts_to_list_of_values(list_of_dicts):
301302
def deconstruct_list_of_dicts_to_a_single_value(list_of_dicts):
302303
"""
303304
Deconstruct a list of dictionaries to a single value
305+
Example, we only want the "value":
306+
"dc.contributor.author" : [ {
307+
"value" : "Item - Test Creator 1",
308+
"language" : null,
309+
"authority" : null,
310+
"confidence" : -1,
311+
"place" : 0
312+
}
304313
"""
305314
return list_of_dicts[0]["value"]
306315

0 commit comments

Comments
 (0)