|
| 1 | +# Subset of Jupiter content via a Ruby irb script to output the metadata in CSV format |
| 2 | +# Usage: irb -r ./juptiter_collection_metadata_to_CSV.rb |
| 3 | + |
| 4 | +ITEM_SUBSET = [ |
| 5 | +'0e790a8e-a263-4a99-9a77-418d91b700c0', |
| 6 | +'204c4969-4b26-4737-9482-39981fca2ded', |
| 7 | +'285a73a4-fa9d-41f8-9df3-c92956a338cd', |
| 8 | +'297d92b8-f255-49ff-9835-b52c7c1a0b6a', |
| 9 | +'2c5219bd-01e4-4b1b-8c9f-2c3bddf12c0e', |
| 10 | +'2e7f1207-79dd-4620-9a01-127f66c81def', |
| 11 | +'2f3407d5-193a-4a9e-b2cb-633ffd7c8a3f', |
| 12 | +#'3424cccc-3bfb-4ba1-992e-37c5adfd52bf', not found in prd |
| 13 | +'369dad5d-f221-4973-92aa-e9ce673cd58f', |
| 14 | +'3d6b31c2-686c-4c62-a598-eda1cf159125', |
| 15 | +'5a0aad85-bffa-4686-bae3-d589a64361dc', |
| 16 | +'69c8b80b-2251-4ed3-9fd4-5f433c30e521', |
| 17 | +'8d2ccf90-1cec-474b-9d83-b6d7bc02704c', |
| 18 | +'90fb3051-2a0f-4217-89a4-fc0ce2bfd672', |
| 19 | +'930915e9-aad9-46e6-9b98-72c379789869', |
| 20 | +'a20df6be-4fc3-4a19-aeea-6609474bec3f', |
| 21 | +'b464a3dc-510d-4625-a009-75dddaac1c91', |
| 22 | +'be7d39d9-9561-4485-a6f2-0f69f8084f83', |
| 23 | +# 'c3bcf748-5676-4b58-bdbe-168590b0bbbc', not found in prd |
| 24 | +'c8d55e58-b23f-421c-a75a-7c26c67a1c48', |
| 25 | +'c97c1a89-c2aa-49ae-9c26-d7560d8dfdc0', |
| 26 | +'c9f40f8c-56d1-4acd-80d6-dca64bd3e068', |
| 27 | +'cc9de8a2-ec54-46e2-b1a3-f3e34e7c762c', |
| 28 | +'d0973c15-8cff-431c-94e5-c21738e09594', |
| 29 | +'d18f9dd3-afda-4108-b32c-fff2ccbf93cf', |
| 30 | +'d3a154a1-cdb5-4a27-8074-c598448ea5df', |
| 31 | +'d8ab19e4-1f58-404d-a7f1-e7ac071d0a74', |
| 32 | +'db0339cf-5faf-4f28-9a25-5360bad5e8d9', |
| 33 | +'dd5e240c-533e-482c-9141-fcbab6362d50', |
| 34 | +'e22bb1bc-cdc5-4cc7-b907-8cc723599ec2', |
| 35 | +'f3678283-1807-4016-90e4-07f5dde4efd3' |
| 36 | +].freeze |
| 37 | + |
| 38 | +#ITEM_SUBSET = [ |
| 39 | + #"047630f2-beba-4895-b7da-78c1a0219a92" |
| 40 | +#].freeze |
| 41 | + |
| 42 | +class JupiterBasicMetadataToCSV |
| 43 | + def initialize() |
| 44 | + @output_file = "" |
| 45 | + @instance = nil |
| 46 | + end |
| 47 | + def enumerable |
| 48 | + @instance.find_each() do |i| |
| 49 | + yeild i |
| 50 | + end |
| 51 | + end |
| 52 | + def run |
| 53 | + raise "Instance not set" unless @instance |
| 54 | + headers = @instance.new.decorate.attributes.keys |
| 55 | + CSV.open(@output_file, 'wb', write_headers: true, headers: headers) do |csv| |
| 56 | + enumerable do |i| |
| 57 | + csv << i.decorate.attributes.values |
| 58 | + end |
| 59 | + end |
| 60 | + end |
| 61 | +end |
| 62 | + |
| 63 | +# Jupiter Item metadata |
| 64 | +class JupiterItemMetadataToCSV < JupiterBasicMetadataToCSV |
| 65 | + def initialize(output_directory) |
| 66 | + super() |
| 67 | + @date_time = Time.now.strftime("%Y-%m-%d_%H-%M-%S") |
| 68 | + @output_file = output_directory + "jupiter_item_#{@date_time}.csv" |
| 69 | + @instance = Item |
| 70 | + end |
| 71 | +end |
| 72 | + |
| 73 | +# Subset of Jupiter Item metadata |
| 74 | +class JupiterItemMetadataToCSVSubset < JupiterBasicMetadataToCSV |
| 75 | + def initialize(output_directory) |
| 76 | + super() |
| 77 | + @date_time = Time.now.strftime("%Y-%m-%d_%H-%M-%S") |
| 78 | + @output_file = output_directory + "jupiter_item_subset_#{@date_time}.csv" |
| 79 | + @instance = Item |
| 80 | + end |
| 81 | + def enumerable |
| 82 | + ITEM_SUBSET.each do |i| |
| 83 | + yield @instance.find(i) |
| 84 | + end |
| 85 | + end |
| 86 | +end |
| 87 | + |
| 88 | + |
| 89 | +# Juptier Active Storage Blob and Item metadata |
| 90 | +class JupiterActiveStorageBlobMetadataToCSV |
| 91 | + def initialize(output_directory) |
| 92 | + @date_time = Time.now.strftime("%Y-%m-%d_%H-%M-%S") |
| 93 | + @output_file = output_directory + "jupiter_activestorage_#{@date_time}.csv" |
| 94 | + @instance = Item |
| 95 | + end |
| 96 | + def enumerable |
| 97 | + @instance.find_each() do |i| |
| 98 | + yeild i |
| 99 | + end |
| 100 | + end |
| 101 | + def run |
| 102 | + # "provenance.ual.jupiterId.item" & "bitstream.sequenceId" labels need to align |
| 103 | + # with the DSpace CSV for use with pandas dataframe multi-index join |
| 104 | + # in the comparison script |
| 105 | + headers = ["item.id", |
| 106 | + "item.title", |
| 107 | + "provenance.ual.jupiterId.item", |
| 108 | + "bitstream.sequenceId", |
| 109 | + "key", |
| 110 | + "filename", |
| 111 | + "content_type", |
| 112 | + "metadata", |
| 113 | + "byte_size", |
| 114 | + "checksum", |
| 115 | + "created_at"] |
| 116 | + CSV.open(@output_file, 'wb', write_headers: true, headers: headers) do |csv| |
| 117 | + enumerable do |item| |
| 118 | + sequence_num = 0 |
| 119 | + item.ordered_files.each do |f| |
| 120 | + sequence_num += 1 |
| 121 | + csv << [item.id, |
| 122 | + item.title, |
| 123 | + item.id, |
| 124 | + sequence_num, |
| 125 | + f.blob.key, |
| 126 | + f.blob.filename, |
| 127 | + f.blob.content_type, |
| 128 | + f.blob.metadata, |
| 129 | + f.blob.byte_size, |
| 130 | + f.blob.checksum, |
| 131 | + f.blob.created_at] |
| 132 | + end |
| 133 | + end |
| 134 | + end |
| 135 | + end |
| 136 | +end |
| 137 | + |
| 138 | +# Subset of Juptier Active Storage Blob and Item metadata |
| 139 | +class JupiterActiveStorageBlobMetadataToCSVSubset < JupiterActiveStorageBlobMetadataToCSV |
| 140 | + def initialize(output_directory) |
| 141 | + @date_time = Time.now.strftime("%Y-%m-%d_%H-%M-%S") |
| 142 | + @output_file = output_directory + "jupiter_activestorage_subset_#{@date_time}.csv" |
| 143 | + @instance = Item |
| 144 | + end |
| 145 | + def enumerable |
| 146 | + ITEM_SUBSET.each do |i| |
| 147 | + yield @instance.find(i) |
| 148 | + end |
| 149 | + end |
| 150 | +end |
| 151 | + |
| 152 | +#JupiterCommunityMetadataToCSV.new("/era_tmp/delete_me_by_2025-04-15/").run |
| 153 | +#JupiterCollectionMetadataToCSV.new("/era_tmp/delete_me_by_2025-04-15/").run |
| 154 | +#JupiterItemMetadataToCSV.new("/era_tmp/delete_me_by_2025-04-15/").run |
| 155 | +#JupiterActiveStorageBlobMetadataToCSV.new("/era_tmp/delete_me_by_2025-04-15/").run |
| 156 | + |
| 157 | +JupiterItemMetadataToCSVSubset.new("/tmp/").run |
| 158 | +JupiterActiveStorageBlobMetadataToCSVSubset.new("/tmp/").run |
0 commit comments