From 0778d1cf4dbb996033ac5babcedb72a5e9bd045d Mon Sep 17 00:00:00 2001 From: Kirk Wang Date: Thu, 30 Jan 2025 14:04:44 -0800 Subject: [PATCH 1/2] =?UTF-8?q?=F0=9F=A7=B9=20fix=20tenant=20rake?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ref: - https://github.com/notch8/palni-palci/pull/1064 --- lib/tasks/tenants.rake | 85 ++++++++++++++++++++++-------------------- 1 file changed, 44 insertions(+), 41 deletions(-) diff --git a/lib/tasks/tenants.rake b/lib/tasks/tenants.rake index bb0c0d888..69ffe5add 100644 --- a/lib/tasks/tenants.rake +++ b/lib/tasks/tenants.rake @@ -1,58 +1,61 @@ # frozen_string_literal: true namespace :tenants do - # how much space, works, files, per each tenant? - # rubocop:disable Metrics/BlockNesting + # How much space, works, files, per each tenant? task calculate_usage: :environment do @results = [] Account.where(search_only: false).find_each do |account| - if account.cname.present? - AccountElevator.switch!(account.cname) - puts "---------------#{account.cname}-------------------------" - models = Hyrax.config.curation_concerns.map { |m| "\"#{m}\"" } - works = ActiveFedora::SolrService.query("has_model_ssim:(#{models.join(' OR ')})", rows: 100_000) - if works&.any? - puts "#{works.count} works found" - @tenant_file_sizes = [] - works.each do |work| + next if account.cname.blank? + + AccountElevator.switch!(account.cname) + puts "---------------#{account.cname}-------------------------" + + models = Hyrax.config.curation_concerns.map { |m| "\"#{m}\"" } + works = ActiveFedora::SolrService.query("has_model_ssim:(#{models.join(' OR ')})", rows: 100_000) + + if works&.any? + puts "#{works.count} works found" + tenant_file_sizes = [] # Declare and initialize within the block + + works.each do |work| + begin document = SolrDocument.find(work.id) - files = document._source["file_set_ids_ssim"] - if files&.any? - file_sizes = [] - files.each do |file| - f = SolrDocument.find(file.to_s) - if file - file_sizes.push(f.to_h['file_size_lts']) unless f.to_h['file_size_lts'].nil? - else - files_sizes.push(0) + files = document._source["file_set_ids_ssim"] || [] + + if files.any? + file_sizes = files.map do |file| + begin + f = SolrDocument.find(file.to_s) + f.to_h['file_size_lts'] || 0 + rescue Blacklight::Exceptions::RecordNotFound => e + puts "Warning: File #{file} not found. Skipping. Error: #{e.message}" + 0 end end - if file_sizes.any? - file_sizes_total_bytes = file_sizes.inject(0, :+) - file_size_total = (file_sizes_total_bytes / 1.0.megabyte).round(2) - else - file_size_total = 0 - end - @tenant_file_sizes.push(file_size_total) + + total_file_size_bytes = file_sizes.inject(0, :+) + tenant_file_sizes << (total_file_size_bytes / 1.0.megabyte).round(2) else - @tenant_file_sizes.push(0) + tenant_file_sizes << 0 end + rescue Blacklight::Exceptions::RecordNotFound => e + puts "Warning: Work #{work.id} not found. Skipping. Error: #{e.message}" + tenant_file_sizes << 0 end - if @tenant_file_sizes - tenant_file_sizes_total_megabytes = @tenant_file_sizes.inject(0, :+) - @results.push("#{account.cname}: #{tenant_file_sizes_total_megabytes} Total MB / #{works.count} Works") - else - @results.push("#{account.cname}: 0 Total MB / #{works.count} Works") - end - else - @results.push("#{account.cname}: 0 Total MB / 0 Works") - end - puts "==================================================================" - @results.each do |result| - puts result end + + total_mb = tenant_file_sizes.inject(0, :+) + @results << "#{account.cname}: #{total_mb} Total MB / #{works.count} Works" + else + @results << "#{account.cname}: 0 Total MB / 0 Works" end + + puts "==================================================================" + end + + # Output results + @results.each do |result| + puts result end end - # rubocop:enable Metrics/BlockNesting end From 10394ec654973669e4f26afa0756f1d8e0b3b4dd Mon Sep 17 00:00:00 2001 From: Kirk Wang Date: Thu, 30 Jan 2025 14:48:34 -0800 Subject: [PATCH 2/2] =?UTF-8?q?=F0=9F=A7=B9=20Remove=20commented=20out=20l?= =?UTF-8?q?ogic?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit will remove the commented out logic for the csv_parser_decorator since it was not being used. It seems the missing fields check is working fine in Bulkrax so we'll use that. --- app/parsers/bulkrax/csv_parser_decorator.rb | 60 --------------------- lib/tasks/tenants.rake | 38 ++++++------- 2 files changed, 17 insertions(+), 81 deletions(-) diff --git a/app/parsers/bulkrax/csv_parser_decorator.rb b/app/parsers/bulkrax/csv_parser_decorator.rb index b438c3e1e..6c0d9a469 100644 --- a/app/parsers/bulkrax/csv_parser_decorator.rb +++ b/app/parsers/bulkrax/csv_parser_decorator.rb @@ -4,66 +4,6 @@ module Bulkrax module CsvParserDecorator include OerCsvParser - - # TODO: We need to revisit this for Valkyrie - # - # def valid_import? - # missing_fields_by_model = records.each_with_object({}) do |record, hash| - # record.compact! - # record.transform_keys!(&:downcase).transform_keys!(&:to_sym) - # missing_fields = missing_fields_for(record) - # hash[record[:model]] = missing_fields if missing_fields.present? - # end - - # raise_error_for_missing_fields(missing_fields_by_model) if missing_fields_by_model.keys.present? - - # file_paths.is_a?(Array) - # rescue StandardError => e - # set_status_info(e) - # false - # end - - # private - - # def missing_fields_for(record) - # required_fields = determine_required_fields_for(record[:model]) - # required_fields.select do |field| - # # checks the field itself - # # any parser_mappings fields terms from `config/initializers/bulkrax.rb` - # # or any keys that has sequential numbers like creator_1 - # (record[field] || - # mapped_from(field).map { |f| record[f] }.any? || - # handle_keys_with_numbers(field, record)).blank? - # end - # end - - # def determine_required_fields_for(model) - # # TODO: Revisit when Valkyrized as we can use Hyrax::ModelRegistry - # case model - # when 'Collection' then Hyrax::Forms::CollectionForm.required_fields - # when 'FileSet' then [] - # else "Hyrax::#{model}Form".constantize.required_fields - # end - # end - - # def mapped_from(field) - # Bulkrax.config.field_mappings[self.class.to_s][field.to_s]&.fetch(:from, [])&.map(&:to_sym) - # end - - # def handle_keys_with_numbers(_field, record) - # keys_with_numbers = record.keys.select { |k| k.to_s.match(/(.+)_\d+/) } - # keys_with_numbers.each do |key| - # return record[key] - # end - # end - - # def raise_error_for_missing_fields(missing_fields_by_model) - # error_alert = missing_fields_by_model.keys.map do |model| - # "#{model} missing: #{missing_fields_by_model[model].join(', ')}" - # end.join('; ') - # # example alert: 'Collection missing: title; GenericWork missing: title, creator' - # raise StandardError, error_alert - # end end end diff --git a/lib/tasks/tenants.rake b/lib/tasks/tenants.rake index 69ffe5add..5f8942df3 100644 --- a/lib/tasks/tenants.rake +++ b/lib/tasks/tenants.rake @@ -18,30 +18,26 @@ namespace :tenants do tenant_file_sizes = [] # Declare and initialize within the block works.each do |work| - begin - document = SolrDocument.find(work.id) - files = document._source["file_set_ids_ssim"] || [] - - if files.any? - file_sizes = files.map do |file| - begin - f = SolrDocument.find(file.to_s) - f.to_h['file_size_lts'] || 0 - rescue Blacklight::Exceptions::RecordNotFound => e - puts "Warning: File #{file} not found. Skipping. Error: #{e.message}" - 0 - end - end - - total_file_size_bytes = file_sizes.inject(0, :+) - tenant_file_sizes << (total_file_size_bytes / 1.0.megabyte).round(2) - else - tenant_file_sizes << 0 + document = SolrDocument.find(work.id) + files = document._source["file_set_ids_ssim"] || [] + + if files.any? + file_sizes = files.map do |file| + f = SolrDocument.find(file.to_s) + f.to_h['file_size_lts'] || 0 + rescue Blacklight::Exceptions::RecordNotFound => e + puts "Warning: File #{file} not found. Skipping. Error: #{e.message}" + 0 end - rescue Blacklight::Exceptions::RecordNotFound => e - puts "Warning: Work #{work.id} not found. Skipping. Error: #{e.message}" + + total_file_size_bytes = file_sizes.inject(0, :+) + tenant_file_sizes << (total_file_size_bytes / 1.0.megabyte).round(2) + else tenant_file_sizes << 0 end + rescue Blacklight::Exceptions::RecordNotFound => e + puts "Warning: Work #{work.id} not found. Skipping. Error: #{e.message}" + tenant_file_sizes << 0 end total_mb = tenant_file_sizes.inject(0, :+)