Skip to content

Commit d21ab9f

Browse files
authored
Merge pull request #5 from jefferya/community_collection_json_flatten
Fix flattening of community and collection JSON output
2 parents 650ee4b + 1f630e1 commit d21ab9f

13 files changed

+29
-30
lines changed

README.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -211,20 +211,20 @@ The steps to set up a validation run.
211211
--input_dspace ~/Downloads/scholaris_collections.csv \
212212
--output /tmp/migration_audit_collections_$(date +%Y-%m-%d_%H:%M:%S).csv \
213213
--type collections
214-
214+
215215
# Item audit results
216216
venv/bin/python src/compare_csv.py \
217217
--input_jupiter ~/Downloads/era_export/jupiter_items_2025-03-06_12-08-01.csv \
218218
--input_dspace ~/Downloads/scholaris_items.csv \
219-
--output /tmp/migration_audit_bitstreams_$(date +%Y-%m-%d_%H:%M:%S).csv \
220-
--type bitstreams
221-
219+
--output /tmp/migration_audit_items_$(date +%Y-%m-%d_%H:%M:%S).csv \
220+
--type items
221+
222222
# Bitstream audit results
223223
venv/bin/python src/compare_csv.py \
224224
--input_jupiter ~/Downloads/era_export/jupiter_items_2025-03-06_12-08-01.csv \
225225
--input_dspace ~/Downloads/scholaris_bitstreams.csv \
226226
--output /tmp/migration_audit_bitstreams_$(date +%Y-%m-%d_%H:%M:%S).csv \
227-
--type bitstreams
227+
--type bitstreams
228228
```
229229

230230
5. Review the results for PASS/FAIL notices on the validated columns.

src/compare_csv.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,8 @@ def abstract_compare(str1, list2):
125125
remove from comparison
126126
"""
127127
logging.debug("%s ---- %s", str1, list2)
128+
str1 = "" if isinstance(str1, float) else str1
129+
list2 = "[]" if isinstance(list2, float) else list2
128130
list2 = list2.replace("<p>", "").replace("</p>", "")
129131
list2 = utils.convert_string_list_representation_to_list(list2)
130132
logging.debug("%s ---- %s", str1, list2)
@@ -307,19 +309,19 @@ def special_type_compare(row, key, value):
307309
"description": {
308310
"columns": {
309311
"jupiter": "description",
310-
"dspace": "metadata.dc.description.0.value",
312+
"dspace": "metadata.dc.description",
311313
},
312314
"comparison_function": string_compare_ignore_whitespace,
313315
},
314316
"abstract": {
315317
"columns": {
316318
"jupiter": "description",
317-
"dspace": "metadata.dc.description.abstract.0.value",
319+
"dspace": "metadata.dc.description.abstract",
318320
},
319321
"comparison_function": string_compare_ignore_whitespace,
320322
},
321323
"dc.title": {
322-
"columns": {"jupiter": "title", "dspace": "metadata.dc.title.0.value"},
324+
"columns": {"jupiter": "title", "dspace": "metadata.dc.title"},
323325
"comparison_function": string_compare,
324326
},
325327
},
@@ -348,24 +350,24 @@ def special_type_compare(row, key, value):
348350
"description": {
349351
"columns": {
350352
"jupiter": "description",
351-
"dspace": "metadata.dc.description.0.value",
353+
"dspace": "metadata.dc.description",
352354
},
353355
"comparison_function": string_compare_ignore_whitespace,
354356
},
355357
"abstract": {
356358
"columns": {
357359
"jupiter": "description",
358-
"dspace": "metadata.dc.description.abstract.0.value",
360+
"dspace": "metadata.dc.description.abstract",
359361
},
360362
"comparison_function": string_compare_ignore_whitespace,
361363
},
362364
"dc.title": {
363-
"columns": {"jupiter": "title", "dspace": "metadata.dc.title.0.value"},
365+
"columns": {"jupiter": "title", "dspace": "metadata.dc.title"},
364366
"comparison_function": string_compare,
365367
},
366368
"collection_parent_expect_to_fail_due_to_lack_of_community_provenance": {
367369
"columns": {
368-
"jupiter": "community.label",
370+
"jupiter": "community.title",
369371
"dspace": "provenance.ual.jupiterId.community",
370372
},
371373
"comparison_function": string_compare,

src/dspace_api_exports.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ def process_bitstreams(dspace_client, output_file):
157157
if "dc.title" in bitstream.metadata:
158158
tmp_dict.update(
159159
{
160-
"bitstream.metadata.dc.title.0.value": bitstream.metadata[
160+
"bitstream.metadata.dc.title": bitstream.metadata[
161161
"dc.title"
162162
][0]["value"]
163163
}
@@ -173,13 +173,9 @@ def process_bitstreams(dspace_client, output_file):
173173
if "dc.description" in bitstream.metadata:
174174
tmp_dict.update(
175175
{
176-
"bitstream.metadata.dc.description.0.value": bitstream.metadata[
176+
"bitstream.metadata.dc.description": bitstream.metadata[
177177
"dc.description"
178-
][
179-
0
180-
][
181-
"value"
182-
],
178+
][0]["value"],
183179
}
184180
)
185181

src/tests/assets/dspace_bitstream.csv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
item.handle,item.uuid,item.name,provenance.ual.jupiterId.item,bitstream.bundleName,bitstream.sizeBytes,bitstream.id,bitstream.name,bitstream.sequenceId,bitstream.checksum.value,bitstream.checksum_algorithm,bitstream.uuid,bitstream.metadata.dc.title.0.value,bitstream.metadata.dc.source.0.value,bitstream.metadata.dc.description.0.value,bundle.name
1+
item.handle,item.uuid,item.name,provenance.ual.jupiterId.item,bitstream.bundleName,bitstream.sizeBytes,bitstream.id,bitstream.name,bitstream.sequenceId,bitstream.checksum.value,bitstream.checksum_algorithm,bitstream.uuid,bitstream.metadata.dc.title,bitstream.metadata.dc.source.0.value,bitstream.metadata.dc.description,bundle.name
22
,1,Test 1,123456789.1,,,,test_1.a.pdf,1,8e82dffa09e62a70efe3f6525108c3f6,,,,,,
33
,1,Test 1,123456789.1,,,,test_1.b.pdf,2,8e82dffa09e62a70efe3f6525108c3f6,,,,,,
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
uuid,name,metadata.dc.description.0.value,metadata.dc.description.abstract.0.value,metadata.dc.title.0.value,provenance.ual.jupiterId.collection,provenance.ual.jupiterId.community,lastModified
1+
uuid,name,metadata.dc.description,metadata.dc.description.abstract,metadata.dc.title,provenance.ual.jupiterId.collection,provenance.ual.jupiterId.community,lastModified
22
1234,Test,Test description,Test description,Test,123456789.1,community_2,9999-01-02T03:04:05Z
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
uuid,name,metadata.dc.description.0.value,metadata.dc.description.abstract.0.value,metadata.dc.title.0.value,provenance.ual.jupiterId.collection,provenance.ual.jupiterId.community,lastModified
1+
uuid,name,metadata.dc.description,metadata.dc.description.abstract,metadata.dc.title,provenance.ual.jupiterId.collection,provenance.ual.jupiterId.community,lastModified
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
uuid,name,metadata.dc.description.0.value,metadata.dc.description.abstract.0.value,metadata.dc.title.0.value,provenance.ual.jupiterId.collection,provenance.ual.jupiterId.community,languages.iso,metadata.dc.identifier.0.value,lastModified
2-
1234,Test,Test description 2,Test description 2,Test 2,123456789.1,community_2,9999-01-02T00:00:00Z
1+
uuid,name,metadata.dc.description,metadata.dc.description.abstract,metadata.dc.title,provenance.ual.jupiterId.collection,provenance.ual.jupiterId.community,languages.iso,metadata.dc.identifier.0.value,lastModified
2+
1234,Test,Test description 2,Test description 2,Test 2,123456789.1,community_2,9999-01-02T00:00:00Z,a,b

src/tests/assets/dspace_community.csv

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
uuid,name,metadata.dc.description.0.value,metadata.dc.description.abstract.0.value,metadata.dc.title.0.value,lastModified
1+
uuid,name,metadata.dc.description,metadata.dc.description.abstract,metadata.dc.title,lastModified
22
1234,Test,Test description,Test description,Test,9999-01-02T03:04:05Z
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
uuid,name,metadata.dc.description.0.value,metadata.dc.description.abstract.0.value,metadata.dc.title.0.value,lastModified
1+
uuid,name,metadata.dc.description,metadata.dc.description.abstract,metadata.dc.title,lastModified
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
uuid,name,metadata.dc.description.0.value,metadata.dc.description.abstract.0.value,metadata.dc.title.0.value,lastModified
1+
uuid,name,metadata.dc.description,metadata.dc.description.abstract,metadata.dc.title,lastModified
22
i1234,Test,Test description 2,Test description 2,Test 2,9999-01-02T03:04:05Z
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
title,description,id,updated_at,community_id,community.label
1+
title,description,id,updated_at,community_id,community.title
22
Test,Test description,123456789.1,9999-01-02T03:04:05Z,abcdef,comm_label

src/tests/test_integration.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ def test_abstract_compare():
6464
assert compare.abstract_compare("a", "['a']") is True
6565
assert compare.abstract_compare("a", "['<p>a</p>']") is True
6666
assert compare.abstract_compare("a. ", "['<p>a. </p>']") is True
67+
assert compare.abstract_compare(float("NaN"), "['']") is True
6768

6869

6970
def test_string_in_list_compare_ignore_whitespace():

src/utils/utilities.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -142,9 +142,9 @@
142142
"bitstream.checksum.value",
143143
"bitstream.checksum_algorithm",
144144
"bitstream.uuid",
145-
"bitstream.metadata.dc.title.0.value",
145+
"bitstream.metadata.dc.title",
146146
"bitstream.metadata.dc.source.0.value",
147-
"bitstream.metadata.dc.description.0.value",
147+
"bitstream.metadata.dc.description",
148148
"bundle.name",
149149
],
150150
"user": [

0 commit comments

Comments
 (0)