From 3cc999b470113e95691ff476f56cd032a09255d9 Mon Sep 17 00:00:00 2001 From: Andrew Myers Date: Wed, 25 Sep 2024 17:49:38 -0400 Subject: [PATCH] Adds Fix for deleting PhysicalInstantiations from Assets based on Media Type (#922) Refactors some one-type scripts into more re-usable batch processing methods that can be run from the ruby console or CLI. * Delete Assets by ID list from file. * Delete Physical Instantiations by Media Type and IDs from file. * Change Media Type for instantiations by IDs from file. --- lib/fix/batch_process.rb | 89 +++++++++ lib/fix/change_media_type.rb | 53 ++--- lib/fix/data/nebraska_public_media_ids.txt | 184 ------------------ lib/fix/delete_asset_resources.rb | 25 +++ lib/fix/delete_physical_instantiations.rb | 53 +++++ ops/demo-deploy.tmpl.yaml | 6 + .../pbcore_xml/instantiation/identifier.rb | 2 +- spec/fix/delete_asset_resources_spec.rb | 56 ++++++ spec/support/batch_ingest_helpers.rb | 17 ++ 9 files changed, 275 insertions(+), 210 deletions(-) create mode 100644 lib/fix/batch_process.rb delete mode 100644 lib/fix/data/nebraska_public_media_ids.txt create mode 100644 lib/fix/delete_asset_resources.rb create mode 100644 lib/fix/delete_physical_instantiations.rb create mode 100644 spec/fix/delete_asset_resources_spec.rb diff --git a/lib/fix/batch_process.rb b/lib/fix/batch_process.rb new file mode 100644 index 000000000..e7f002ad9 --- /dev/null +++ b/lib/fix/batch_process.rb @@ -0,0 +1,89 @@ +module Fix + class BatchProcess + attr_reader :ids, :log, :cli_ptions, :log_level + + def initialize(ids_file:, log_level: Logger::INFO) + @ids = File.readlines(ids_file, chomp: true) + @cli_options = {} + @log = Logger.new(STDOUT) + @log.level = log_level + end + + # asset_resources Returns an array of AssetResource instances for the given ids. + # @return [Array] An array of AssetResource instances. + def asset_resources + @asset_resources ||= ids.map do |id| + log.info "Finding Asset Resource '#{id}'..." + begin + AssetResource.find(id) + rescue => e + log_error(e) + nil + end + end.compact + end + + def log_error(e) + log.error "#{e.class}: #{e.message}" + log.debug "Backtrace:\n#{e.backtrace.join("\t\t\n")}\n\n" + end + + # run! is the main method to be implemented by subclasses. + def run + log.warn "No action taken. Put your logic in the #{self.class}#run! method" + end + + # self.cli_options A hash to store command line options. + def self.cli_options + @cli_options ||= {} + end + + # self.option_parser Creates a default OptionParser for cli options and allows subclasses + # to add their own options. + # @param block [Proc] A block that takes an OptionParser instance as an argument. + # @return [OptionParser] The OptionParser instance. + # Usage: + # class MyBatchProcess < BatchProcess + # def initialize(my_option:, **args) + # super(**args) + # @my_option = my_option + # end + # + # option_parser do |opts| + # opts.on("-m", "--my-option", "My custom option") do |my_option_val| + # # Assign option values to the cli_options hash. + # cli_options[:my_option] = my_option_val + # end + # end + # end + def self.option_parser(&block) + # Set a default options for all BatchProcess classes + @option_parser ||= OptionParser.new do |opts| + # Allow verbose ouput + opts.on('-l', '--log-level [0-5]', '0=DEBUG, 1=INFO, 2=WARN, 3=ERROR, 4=FATAL, 5=UNKNOWN') do |log_level| + cli_options[:log_level] = log_level[/\d+/].to_i || 1 + end + + # Allow file input of AAPB IDs + opts.on("-f", "--file FILE", "List of AAPB IDs, one per line") do |file| + cli_options[:ids_file] = file + end + end + + # Call the passed block with option parser instance if a block was given. + block.call(@option_parser) if block_given? + + # Return the option parser. + @option_parser + end + + # self.run_cli Parses command line options and runs the batch process. + def self.run_cli + # Call option_parser.parse! to set cli_options from $ARGV + option_parser.parse! + + # Run the batch process with cli_options + new(**cli_options).run + end + end +end \ No newline at end of file diff --git a/lib/fix/change_media_type.rb b/lib/fix/change_media_type.rb index 11b408940..9d3be80f6 100644 --- a/lib/fix/change_media_type.rb +++ b/lib/fix/change_media_type.rb @@ -1,53 +1,56 @@ require_relative '../../config/environment' +require_relative 'batch_process' module Fix - class ChangeMediaType - MEDIA_TYPES = MediaTypeService.new.select_all_options + class ChangeMediaType < BatchProcess + MEDIA_TYPES = MediaTypeService.new.select_all_options.to_h.values - attr_reader :aapb_ids_filename, :aapb_ids - def initialize - @aapb_ids_filename = "#{File.dirname(File.expand_path(__FILE__))}/data/nebraska_public_media_ids.txt" + attr_reader :media_type + + def initialize(media_type:, **args) + super(**args) + raise ArgumentError, "media_type must be one of '#{MEDIA_TYPES.join("', '")}', but '#{media_type}' was given" unless MEDIA_TYPES.include?(media_type) + @media_type = media_type end + + option_parser do |opts| + opts.banner = "Usage: ruby lib/fix/change_media_type.rb [options]" - def aapb_ids - @aapb_ids ||= File.readlines(aapb_ids_filename).map(&:strip) + opts.on("-t", "--media-type MEDIA_TYPE", "Either 'Sound' or 'Movind Image'") do |media_type| + cli_options[:media_type] = media_type + end end - def asset_resources - @asset_resources ||= aapb_ids.map do |aapb_id| - puts "Looking up Asset #{aapb_id} ..." - begin - AssetResource.find(aapb_id) - rescue => e - puts "Error looking up Asset #{aapb_id}: #{e.class} -- #{e.message}" + def run + log.info "Running fix #{self.class.name} ..." + asset_resources.each do |ar| + if ar.physical_instantiation_resources.count == 0 + log.warn "No Physical Instantiations for Asset Resource #{ar.id}, skipping." + next end - end.compact - end - def run! - puts "Running Hotfix #{self.class.name} ..." - asset_resources.each do |asset_resource| - pi = asset_resource.physical_instantiation_resources.detect { |pi| pi.media_type != 'Moving Image' } + pi = ar.physical_instantiation_resources.detect { |pi| pi.media_type != media_type } if !pi - puts "Nothing to fix for AssetResource #{asset_resource.id}, skipping ..." + log.warn "Asset Resource #{ar.id} has no Physical Instantiations without media type of #{media_type}, skipping." next end # Change the metadata - pi.media_type = 'Moving Image' + pi.media_type = media_type begin pi.save - puts "PhysicalInstantiationResource #{pi.id} saved with media_type 'Moving Image'" + log.info "Physical Instantiation #{pi.id} for Asset Resource #{ar.id} saved with media_type '#{media_type}'" rescue => e - puts "Error saving PhysicalInstantiationResource #{pi.id}: #{e.class} -- #{e.message}" + log_error e end end + log.info "Done." end end end if __FILE__ == $0 - Fix::ChangeMediaType.new.run! + Fix::ChangeMediaType.run_cli end diff --git a/lib/fix/data/nebraska_public_media_ids.txt b/lib/fix/data/nebraska_public_media_ids.txt deleted file mode 100644 index f562be0d1..000000000 --- a/lib/fix/data/nebraska_public_media_ids.txt +++ /dev/null @@ -1,184 +0,0 @@ -cpb-aacip-294f75638b3 -cpb-aacip-c32e1318cc7 -cpb-aacip-f2b99432175 -cpb-aacip-24fb6d82fcb -cpb-aacip-b7b8d470a72 -cpb-aacip-238cdfc89fb -cpb-aacip-02be757f06a -cpb-aacip-fef056eafe4 -cpb-aacip-fb2b1e20cae -cpb-aacip-bb4be95f74e -cpb-aacip-7d3138cbc98 -cpb-aacip-612808b0608 -cpb-aacip-13074c81579 -cpb-aacip-a8c07eb1c16 -cpb-aacip-137dcaccab9 -cpb-aacip-7a203c3da70 -cpb-aacip-27597f5b0ed -cpb-aacip-3b4860945ce -cpb-aacip-0f315b5280b -cpb-aacip-87f657701d4 -cpb-aacip-69af623df9c -cpb-aacip-1ca1e4a08d8 -cpb-aacip-7b46a69b242 -cpb-aacip-33f510ab40d -cpb-aacip-00421efb2b0 -cpb-aacip-afe58622be9 -cpb-aacip-08675066c04 -cpb-aacip-d3f2542d7df -cpb-aacip-4968e4f808d -cpb-aacip-554f6fcb6b2 -cpb-aacip-076167f1ca0 -cpb-aacip-fd1e44d94e6 -cpb-aacip-62e9a7747b6 -cpb-aacip-4d8c3199ee8 -cpb-aacip-c320a08c526 -cpb-aacip-4a90240a09a -cpb-aacip-aa9bfded5c7 -cpb-aacip-afa7e93a600 -cpb-aacip-cc301924ee9 -cpb-aacip-4c3fbb2aaf7 -cpb-aacip-50a746fac18 -cpb-aacip-f8a9b505e20 -cpb-aacip-b9688cf50ed -cpb-aacip-b08aff600aa -cpb-aacip-927ec03e4e4 -cpb-aacip-19b34393d8a -cpb-aacip-7cc14165ee9 -cpb-aacip-235f7c22567 -cpb-aacip-e302b9a5ac0 -cpb-aacip-ac6964df90a -cpb-aacip-e0459b39a92 -cpb-aacip-f2b8dd63357 -cpb-aacip-81597012922 -cpb-aacip-5ad8ae9c053 -cpb-aacip-fa1c856ac7e -cpb-aacip-e6407f3804c -cpb-aacip-5a3eb7860c7 -cpb-aacip-831258383f0 -cpb-aacip-df04686ff28 -cpb-aacip-d7d55b1745c -cpb-aacip-81995f80c5d -cpb-aacip-03bef5df681 -cpb-aacip-b9b88420ee2 -cpb-aacip-4bff7b980f6 -cpb-aacip-2169696e881 -cpb-aacip-63cdf88d538 -cpb-aacip-fd3c57d4802 -cpb-aacip-2e0188aa873 -cpb-aacip-cb8bdaf0513 -cpb-aacip-c32c4b4f54a -cpb-aacip-fb2d43f32c7 -cpb-aacip-5fc9a5a17eb -cpb-aacip-2dc1049e7a7 -cpb-aacip-b30a6e8be3a -cpb-aacip-7652b312a94 -cpb-aacip-14baff9861d -cpb-aacip-7258a3f072e -cpb-aacip-3fd03a4a992 -cpb-aacip-0ce41f22310 -cpb-aacip-ac37704cc9c -cpb-aacip-e2143726821 -cpb-aacip-23291e45341 -cpb-aacip-78e99f5d48f -cpb-aacip-e3fea70c819 -cpb-aacip-d3dd261a113 -cpb-aacip-e22ca58aa7a -cpb-aacip-c055582d672 -cpb-aacip-43c913ae953 -cpb-aacip-b4506a9329c -cpb-aacip-b9fc73214a9 -cpb-aacip-c3c7a1cb288 -cpb-aacip-7d0865d1edd -cpb-aacip-ad2b0d9eb34 -cpb-aacip-39c0714877f -cpb-aacip-6dc11d896e1 -cpb-aacip-4903c325751 -cpb-aacip-32a2f2d3bab -cpb-aacip-1b82bc253ae -cpb-aacip-51decbb01f0 -cpb-aacip-30e983081f2 -cpb-aacip-4dca4885f1f -cpb-aacip-01042c99044 -cpb-aacip-cfd64c5e00e -cpb-aacip-ff36a94f53f -cpb-aacip-60b57c1e2fd -cpb-aacip-8a8d8a7e538 -cpb-aacip-19c828e679f -cpb-aacip-6ad5aa76df7 -cpb-aacip-1199b291859 -cpb-aacip-1aa95eb993b -cpb-aacip-ca91fb5a460 -cpb-aacip-587d0b59034 -cpb-aacip-36863145ceb -cpb-aacip-d39c402f6ce -cpb-aacip-af2c6141754 -cpb-aacip-95e39785494 -cpb-aacip-bac022e4b04 -cpb-aacip-ff36471c048 -cpb-aacip-8e07558b829 -cpb-aacip-3828675b84c -cpb-aacip-d3c8f80b52c -cpb-aacip-c6fd2fd44cc -cpb-aacip-61ab5395f2c -cpb-aacip-06cc34335d0 -cpb-aacip-6dfae4fbe22 -cpb-aacip-76697efd567 -cpb-aacip-f552aafccf6 -cpb-aacip-861e985a21a -cpb-aacip-6ecd8a3257e -cpb-aacip-acad2369ea8 -cpb-aacip-0f193a2292a -cpb-aacip-3cb234d049e -cpb-aacip-4f42e38eb80 -cpb-aacip-6cef7787164 -cpb-aacip-376a7e7dca4 -cpb-aacip-7afafa04f81 -cpb-aacip-7f9196d5375 -cpb-aacip-ae45802f2d9 -cpb-aacip-208b7e67cf8 -cpb-aacip-78bda899de8 -cpb-aacip-e647587264e -cpb-aacip-9fa78df5409 -cpb-aacip-08d22ec00aa -cpb-aacip-3af06cd3ee8 -cpb-aacip-98ffbb0b81b -cpb-aacip-0c988783bfc -cpb-aacip-9979eceacb2 -cpb-aacip-8b876080369 -cpb-aacip-9725a780ef6 -cpb-aacip-f856b0f0713 -cpb-aacip-47720e19ac4 -cpb-aacip-003a721989d -cpb-aacip-a2bd7e73631 -cpb-aacip-d0cadad274d -cpb-aacip-027545a40c9 -cpb-aacip-cd8a9c5f19c -cpb-aacip-df677b6d1de -cpb-aacip-033e86e4555 -cpb-aacip-fe4c325c262 -cpb-aacip-cb33354c038 -cpb-aacip-7a928d4b385 -cpb-aacip-cc4f2c2ad47 -cpb-aacip-67f4b006aed -cpb-aacip-e9a491d8a91 -cpb-aacip-6cd320707ec -cpb-aacip-6ce061d3f88 -cpb-aacip-0ca84291b09 -cpb-aacip-2cf96b857f7 -cpb-aacip-8156b991b6a -cpb-aacip-3f7518bba67 -cpb-aacip-cefcf4fbf38 -cpb-aacip-30a15ad3cfa -cpb-aacip-450dfe2a949 -cpb-aacip-242bd7c076e -cpb-aacip-e40b39c823a -cpb-aacip-6bb32940393 -cpb-aacip-5a959dc4fb1 -cpb-aacip-f874cf4c4d9 -cpb-aacip-4404c0eaf3d -cpb-aacip-28ec850180b -cpb-aacip-c32c68ea5f8 -cpb-aacip-94dbb10a51c -cpb-aacip-1bfeadcc053 -cpb-aacip-c741bf6082d \ No newline at end of file diff --git a/lib/fix/delete_asset_resources.rb b/lib/fix/delete_asset_resources.rb new file mode 100644 index 000000000..f0c1254e3 --- /dev/null +++ b/lib/fix/delete_asset_resources.rb @@ -0,0 +1,25 @@ +require_relative '../../config/environment' +require_relative 'batch_process' + +module Fix + class DeleteAssetResources < BatchProcess + def run + asset_resources.each do |ar| + log.info "Destroying Asset Resource #{ar.id}" + begin + Hyrax.persister.delete(resource: ar) + Hyrax.index_adapter.delete(resource: ar) + Hyrax.index_adapter.connection.commit + log.info "Asset Resource #{ar.id} destroyed." + rescue => e + log_error e + end + end + puts "Done." + end + end +end + +if __FILE__ == $0 + Fix::DeleteAssetResources.run_cli +end diff --git a/lib/fix/delete_physical_instantiations.rb b/lib/fix/delete_physical_instantiations.rb new file mode 100644 index 000000000..3b71c1e49 --- /dev/null +++ b/lib/fix/delete_physical_instantiations.rb @@ -0,0 +1,53 @@ +require 'optparse' +require_relative '../../config/environment' +require_relative 'batch_process' + +module Fix + class DeletePhysicalInstantiations < BatchProcess + MEDIA_TYPES = MediaTypeService.new.select_all_options.to_h.values + + attr_reader :media_type + + def initialize(media_type:, **args) + super(**args) + raise ArgumentError, "media_type must be one of '#{MEDIA_TYPES.join("', '")}', but '#{media_type}' was given" unless MEDIA_TYPES.include?(media_type) + @media_type = media_type + end + + option_parser do |opts| + opts.banner = "Usage: ruby lib/fix/change_media_type.rb [options]" + + opts.on("-t", "--media-type MEDIA_TYPE", "Either 'Sound' or 'Movind Image'") do |media_type| + cli_options[:media_type] = media_type + end + end + + def run + asset_resources.each do |ar| + pis = ar.physical_instantiation_resources.select { |pi| pi.media_type == media_type } + if pis.count == 0 + log.warn "No physical instantiations with media type '#{media_type}' were found for Asset #{ar.id}, skipping." + next + end + + pis.each do |pi| + begin + log.info "Deleting Physical Instantiation #{pi.id} with media type '#{media_type}' from Asset #{ar.id}..." + Hyrax.persister.delete(resource: pi) + Hyrax.index_adapter.delete(resource: pi) + log.info "Deleted physical instantiation #{pi.id} with media type '#{media_type}' from Asset #{ar.id}." + Hyrax.index_adapter.save(resource: ar) + log.info "Asset Resource #{ar.id} saved." + rescue => e + log_error(e) + end + end + end + end + end +end + + +if __FILE__ == $0 + Fix::DeletePhysicalInstantiations.run_cli +end diff --git a/ops/demo-deploy.tmpl.yaml b/ops/demo-deploy.tmpl.yaml index d9770c1f4..cf033bd86 100644 --- a/ops/demo-deploy.tmpl.yaml +++ b/ops/demo-deploy.tmpl.yaml @@ -186,6 +186,12 @@ redis: cluster: enabled: false password: demo + master: + livenessProbe: + initialDelaySeconds: 180 + readinessProbe: + initialDelaySeconds: 180 + solr: enabled: false diff --git a/spec/factories/pbcore_xml/instantiation/identifier.rb b/spec/factories/pbcore_xml/instantiation/identifier.rb index 2d081b1f2..ee0522cd2 100644 --- a/spec/factories/pbcore_xml/instantiation/identifier.rb +++ b/spec/factories/pbcore_xml/instantiation/identifier.rb @@ -9,7 +9,7 @@ trait :ams do source { "ams" } - value { "cpb-blah-blah-blah" } + value { ::AMS::IdentifierService.mint } end initialize_with { new(attributes) } diff --git a/spec/fix/delete_asset_resources_spec.rb b/spec/fix/delete_asset_resources_spec.rb new file mode 100644 index 000000000..5cc91182e --- /dev/null +++ b/spec/fix/delete_asset_resources_spec.rb @@ -0,0 +1,56 @@ +require 'rails_helper' +require 'fix/delete_asset_resources' +require 'sidekiq/testing' + +RSpec.describe 'Delete Asset Resources' do + # Temporarily set ActiveJob queue adapter to :sidekiq for this test, since + # it's an integration test that involves running ingest jobs. + before(:all) do + ActiveJob::Base.queue_adapter = :sidekiq + Sidekiq::Testing.inline! + end + after(:all) { ActiveJob::Base.queue_adapter = :sidekiq } + + + let(:pbcore_description_documents) { build_list(:pbcore_description_document, rand(2..4), :full_aapb) } + let(:zipped_batch) { make_aapb_pbcore_zipped_batch(pbcore_description_documents) } + let(:batch) do + user, admin_set = create_user_and_admin_set_for_deposit + run_batch_ingest( + ingest_file_path: zipped_batch, + ingest_type: 'aapb_pbcore_zipped', + admin_set: admin_set, + submitter: user + ) + end + + let(:ids) do + batch.batch_items.map do |batch_item| + batch_item.repo_object_id.to_s + end + end + + let(:ids_file) do + f = Tempfile.new + f.write(ids.join("\n")) + f.flush + f.path + end + + # Non-memoized helper for fetching Asset by ID. + def asset_resource_results + ids.map do |id| + begin + Hyrax.query_service.find_by(id: id) + rescue Valkyrie::Persistence::ObjectNotFoundError + nil + end + end.compact + end + + it 'deletes the AssetResources' do + expect(asset_resource_results.count).to be > 0 + Fix::DeleteAssetResources.new(ids_file: ids_file).run + expect(asset_resource_results.count).to eq 0 + end +end diff --git a/spec/support/batch_ingest_helpers.rb b/spec/support/batch_ingest_helpers.rb index 309a0133f..70de17b11 100644 --- a/spec/support/batch_ingest_helpers.rb +++ b/spec/support/batch_ingest_helpers.rb @@ -17,8 +17,25 @@ def run_batch_ingest(ingest_file_path:, ingest_type:, admin_set:, submitter:) submitter_email: submitter.email, status: 'received') runner = Hyrax::BatchIngest::BatchRunner.new(batch: batch) + + puts "Starting batch ingest." runner.run + + # spin for up to 30 seconds while we wait for all items to get processed + enqueued = runner.batch.batch_items.select { |bi| bi.status == 'enqueued' } + max_time = Time.now.to_i + 30 + while enqueued.count > 0 && max_time > Time.now.to_i + puts "#{enqueued.count} items remaining." + sleep 3 + runner.batch.batch_items.each { |bi| bi.reload if bi.status == 'enqueued' } + enqueued = runner.batch.batch_items.select { |bi| bi.status == 'enqueued' } + end + + raise "Batch ingest timed out. #{enqueued.count} items still enqueued." if enqueued.count > 0 + + # If we didn't raise, then we're done! # Return the batch so we can run expectations on it in tests. + puts "Done." runner.batch end