Skip to content

Commit

Permalink
Merge pull request #2441 from samvera/i117-pals-vs-pals-knapsack-comp
Browse files Browse the repository at this point in the history
I117 pals vs pals knapsack comp
  • Loading branch information
kirkkwang authored Jan 31, 2025
2 parents d0fa2af + 10394ec commit b9f5d43
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 104 deletions.
60 changes: 0 additions & 60 deletions app/parsers/bulkrax/csv_parser_decorator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,66 +4,6 @@
module Bulkrax
module CsvParserDecorator
include OerCsvParser

# TODO: We need to revisit this for Valkyrie
#
# def valid_import?
# missing_fields_by_model = records.each_with_object({}) do |record, hash|
# record.compact!
# record.transform_keys!(&:downcase).transform_keys!(&:to_sym)
# missing_fields = missing_fields_for(record)
# hash[record[:model]] = missing_fields if missing_fields.present?
# end

# raise_error_for_missing_fields(missing_fields_by_model) if missing_fields_by_model.keys.present?

# file_paths.is_a?(Array)
# rescue StandardError => e
# set_status_info(e)
# false
# end

# private

# def missing_fields_for(record)
# required_fields = determine_required_fields_for(record[:model])
# required_fields.select do |field|
# # checks the field itself
# # any parser_mappings fields terms from `config/initializers/bulkrax.rb`
# # or any keys that has sequential numbers like creator_1
# (record[field] ||
# mapped_from(field).map { |f| record[f] }.any? ||
# handle_keys_with_numbers(field, record)).blank?
# end
# end

# def determine_required_fields_for(model)
# # TODO: Revisit when Valkyrized as we can use Hyrax::ModelRegistry
# case model
# when 'Collection' then Hyrax::Forms::CollectionForm.required_fields
# when 'FileSet' then []
# else "Hyrax::#{model}Form".constantize.required_fields
# end
# end

# def mapped_from(field)
# Bulkrax.config.field_mappings[self.class.to_s][field.to_s]&.fetch(:from, [])&.map(&:to_sym)
# end

# def handle_keys_with_numbers(_field, record)
# keys_with_numbers = record.keys.select { |k| k.to_s.match(/(.+)_\d+/) }
# keys_with_numbers.each do |key|
# return record[key]
# end
# end

# def raise_error_for_missing_fields(missing_fields_by_model)
# error_alert = missing_fields_by_model.keys.map do |model|
# "#{model} missing: #{missing_fields_by_model[model].join(', ')}"
# end.join('; ')
# # example alert: 'Collection missing: title; GenericWork missing: title, creator'
# raise StandardError, error_alert
# end
end
end

Expand Down
87 changes: 43 additions & 44 deletions lib/tasks/tenants.rake
Original file line number Diff line number Diff line change
@@ -1,58 +1,57 @@
# frozen_string_literal: true

namespace :tenants do
# how much space, works, files, per each tenant?
# rubocop:disable Metrics/BlockNesting
# How much space, works, files, per each tenant?
task calculate_usage: :environment do
@results = []
Account.where(search_only: false).find_each do |account|
if account.cname.present?
AccountElevator.switch!(account.cname)
puts "---------------#{account.cname}-------------------------"
models = Hyrax.config.curation_concerns.map { |m| "\"#{m}\"" }
works = ActiveFedora::SolrService.query("has_model_ssim:(#{models.join(' OR ')})", rows: 100_000)
if works&.any?
puts "#{works.count} works found"
@tenant_file_sizes = []
works.each do |work|
document = SolrDocument.find(work.id)
files = document._source["file_set_ids_ssim"]
if files&.any?
file_sizes = []
files.each do |file|
f = SolrDocument.find(file.to_s)
if file
file_sizes.push(f.to_h['file_size_lts']) unless f.to_h['file_size_lts'].nil?
else
files_sizes.push(0)
end
end
if file_sizes.any?
file_sizes_total_bytes = file_sizes.inject(0, :+)
file_size_total = (file_sizes_total_bytes / 1.0.megabyte).round(2)
else
file_size_total = 0
end
@tenant_file_sizes.push(file_size_total)
else
@tenant_file_sizes.push(0)
next if account.cname.blank?

AccountElevator.switch!(account.cname)
puts "---------------#{account.cname}-------------------------"

models = Hyrax.config.curation_concerns.map { |m| "\"#{m}\"" }
works = ActiveFedora::SolrService.query("has_model_ssim:(#{models.join(' OR ')})", rows: 100_000)

if works&.any?
puts "#{works.count} works found"
tenant_file_sizes = [] # Declare and initialize within the block

works.each do |work|
document = SolrDocument.find(work.id)
files = document._source["file_set_ids_ssim"] || []

if files.any?
file_sizes = files.map do |file|
f = SolrDocument.find(file.to_s)
f.to_h['file_size_lts'] || 0
rescue Blacklight::Exceptions::RecordNotFound => e
puts "Warning: File #{file} not found. Skipping. Error: #{e.message}"
0
end
end
if @tenant_file_sizes
tenant_file_sizes_total_megabytes = @tenant_file_sizes.inject(0, :+)
@results.push("#{account.cname}: #{tenant_file_sizes_total_megabytes} Total MB / #{works.count} Works")

total_file_size_bytes = file_sizes.inject(0, :+)
tenant_file_sizes << (total_file_size_bytes / 1.0.megabyte).round(2)
else
@results.push("#{account.cname}: 0 Total MB / #{works.count} Works")
tenant_file_sizes << 0
end
else
@results.push("#{account.cname}: 0 Total MB / 0 Works")
end
puts "=================================================================="
@results.each do |result|
puts result
rescue Blacklight::Exceptions::RecordNotFound => e
puts "Warning: Work #{work.id} not found. Skipping. Error: #{e.message}"
tenant_file_sizes << 0
end

total_mb = tenant_file_sizes.inject(0, :+)
@results << "#{account.cname}: #{total_mb} Total MB / #{works.count} Works"
else
@results << "#{account.cname}: 0 Total MB / 0 Works"
end

puts "=================================================================="
end

# Output results
@results.each do |result|
puts result
end
end
# rubocop:enable Metrics/BlockNesting
end

0 comments on commit b9f5d43

Please sign in to comment.