Skip to content

Commit

Permalink
Adds Fix for deleting PhysicalInstantiations from Assets based on Med…
Browse files Browse the repository at this point in the history
…ia Type

Refactors some one-type scripts into more re-usable batch processing methods that can be run
from the ruby console or CLI.

* Delete Assets by ID list from file.
* Delete Physical Instantiations by Media Type and IDs from file.
* Change Media Type for instantiations by IDs from file.
  • Loading branch information
afred committed Sep 24, 2024
1 parent fed8d72 commit ac9a3fa
Show file tree
Hide file tree
Showing 11 changed files with 2,022 additions and 26 deletions.
89 changes: 89 additions & 0 deletions lib/fix/batch_process.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
module Fix
class BatchProcess
attr_reader :ids, :log, :cli_ptions, :log_level

def initialize(ids_file:, log_level: Logger::INFO)
@ids = File.readlines(ids_file, chomp: true)
@cli_options = {}
@log = Logger.new(STDOUT)
@log.level = log_level
end

# asset_resources Returns an array of AssetResource instances for the given ids.
# @return [Array<AssetResource>] An array of AssetResource instances.
def asset_resources
@asset_resources ||= ids.map do |id|
log.info "Finding Asset Resource '#{id}'..."
begin
AssetResource.find(id)
rescue => e
log_error(e)
nil
end
end.compact
end

def log_error(e)
log.error "#{e.class}: #{e.message}"
log.debug "Backtrace:\n#{e.backtrace.join("\t\t\n")}\n\n"
end

# run! is the main method to be implemented by subclasses.
def run
log.warn "No action taken. Put your logic in the #{self.class}#run! method"
end

# self.cli_options A hash to store command line options.
def self.cli_options
@cli_options ||= {}
end

# self.option_parser Creates a default OptionParser for cli options and allows subclasses
# to add their own options.
# @param block [Proc] A block that takes an OptionParser instance as an argument.
# @return [OptionParser] The OptionParser instance.
# Usage:
# class MyBatchProcess < BatchProcess
# def initialize(my_option:, **args)
# super(**args)
# @my_option = my_option
# end
#
# option_parser do |opts|
# opts.on("-m", "--my-option", "My custom option") do |my_option_val|
# # Assign option values to the cli_options hash.
# cli_options[:my_option] = my_option_val
# end
# end
# end
def self.option_parser(&block)
# Set a default options for all BatchProcess classes
@option_parser ||= OptionParser.new do |opts|
# Allow verbose ouput
opts.on('-l', '--log-level [0-5]', '0=DEBUG, 1=INFO, 2=WARN, 3=ERROR, 4=FATAL, 5=UNKNOWN') do |log_level|
cli_options[:log_level] = log_level[/\d+/].to_i || 1
end

# Allow file input of AAPB IDs
opts.on("-f", "--file FILE", "List of AAPB IDs, one per line") do |file|
cli_options[:ids_file] = file
end
end

# Call the passed block with option parser instance if a block was given.
block.call(@option_parser) if block_given?

# Return the option parser.
@option_parser
end

# self.run_cli Parses command line options and runs the batch process.
def self.run_cli
# Call option_parser.parse! to set cli_options from $ARGV
option_parser.parse!

# Run the batch process with cli_options
new(**cli_options).run
end
end
end
53 changes: 28 additions & 25 deletions lib/fix/change_media_type.rb
Original file line number Diff line number Diff line change
@@ -1,53 +1,56 @@
require_relative '../../config/environment'
require_relative 'batch_process'

module Fix
class ChangeMediaType
MEDIA_TYPES = MediaTypeService.new.select_all_options
class ChangeMediaType < BatchProcess
MEDIA_TYPES = MediaTypeService.new.select_all_options.to_h.values

attr_reader :aapb_ids_filename, :aapb_ids
def initialize
@aapb_ids_filename = "#{File.dirname(File.expand_path(__FILE__))}/data/nebraska_public_media_ids.txt"
attr_reader :media_type

def initialize(media_type:, **args)
super(**args)
raise ArgumentError, "media_type must be one of '#{MEDIA_TYPES.join("', '")}', but '#{media_type}' was given" unless MEDIA_TYPES.include?(media_type)
@media_type = media_type
end

option_parser do |opts|
opts.banner = "Usage: ruby lib/fix/change_media_type.rb [options]"

def aapb_ids
@aapb_ids ||= File.readlines(aapb_ids_filename).map(&:strip)
opts.on("-t", "--media-type MEDIA_TYPE", "Either 'Sound' or 'Movind Image'") do |media_type|
cli_options[:media_type] = media_type
end
end

def asset_resources
@asset_resources ||= aapb_ids.map do |aapb_id|
puts "Looking up Asset #{aapb_id} ..."
begin
AssetResource.find(aapb_id)
rescue => e
puts "Error looking up Asset #{aapb_id}: #{e.class} -- #{e.message}"
def run
log.info "Running fix #{self.class.name} ..."
asset_resources.each do |ar|
if ar.physical_instantiation_resources.count == 0
log.warn "No Physical Instantiations for Asset Resource #{ar.id}, skipping."
next
end
end.compact
end

def run!
puts "Running Hotfix #{self.class.name} ..."
asset_resources.each do |asset_resource|
pi = asset_resource.physical_instantiation_resources.detect { |pi| pi.media_type != 'Moving Image' }
pi = ar.physical_instantiation_resources.detect { |pi| pi.media_type != media_type }
if !pi
puts "Nothing to fix for AssetResource #{asset_resource.id}, skipping ..."
log.warn "Asset Resource #{ar.id} has no Physical Instantiations without media type of #{media_type}, skipping."
next
end

# Change the metadata
pi.media_type = 'Moving Image'
pi.media_type = media_type

begin
pi.save
puts "PhysicalInstantiationResource #{pi.id} saved with media_type 'Moving Image'"
log.info "Physical Instantiation #{pi.id} for Asset Resource #{ar.id} saved with media_type '#{media_type}'"
rescue => e
puts "Error saving PhysicalInstantiationResource #{pi.id}: #{e.class} -- #{e.message}"
log_error e
end
end
log.info "Done."
end
end
end


if __FILE__ == $0
Fix::ChangeMediaType.new.run!
Fix::ChangeMediaType.run_cli
end
43 changes: 43 additions & 0 deletions lib/fix/data/wbgo_ids.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
cpb-aacip-c88b33d7358
cpb-aacip-f1ae8d53434
cpb-aacip-019e7cac605
cpb-aacip-f3b09f6678e
cpb-aacip-80f53d5da69
cpb-aacip-cc15b90b384
cpb-aacip-1ff29659b46
cpb-aacip-482bb33c1bc
cpb-aacip-c6031b0e935
cpb-aacip-6ebc10485e7
cpb-aacip-51648e75603
cpb-aacip-51c3bba4f79
cpb-aacip-575a10227a1
cpb-aacip-2a9f06c0128
cpb-aacip-9cd2835a6b5
cpb-aacip-8cb4e400b0c
cpb-aacip-fb82f5dd309
cpb-aacip-bef9b6d7456
cpb-aacip-e8780d9a0a9
cpb-aacip-7dfc2c698cb
cpb-aacip-8decc2181c0
cpb-aacip-1412ec8f88b
cpb-aacip-d1cf98896c3
cpb-aacip-485d84c5dc5
cpb-aacip-e12313137cc
cpb-aacip-c3ac4f7577a
cpb-aacip-82ba318c312
cpb-aacip-a05b2a13b73
cpb-aacip-e27b5e518b7
cpb-aacip-ad6d1abfc06
cpb-aacip-02260da6323
cpb-aacip-bd0f2156ebb
cpb-aacip-ec27eb2e293
cpb-aacip-fae9fbdd91d
cpb-aacip-3af55f5a09d
cpb-aacip-015140162e8
cpb-aacip-223e1efcea9
cpb-aacip-2f035514157
cpb-aacip-9c3bdc57501
cpb-aacip-56dc3735f35
cpb-aacip-6d008ca547c
cpb-aacip-0d4f8c17e1c
cpb-aacip-5a17ba408d8
10 changes: 10 additions & 0 deletions lib/fix/data/wbgo_ids_subset_10.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
cpb-aacip-00b688f25ac
cpb-aacip-0a4c28651ac
cpb-aacip-0a662172bed
cpb-aacip-0ade34b9d0e
cpb-aacip-0ae4c109ad3
cpb-aacip-0ae5a11b411
cpb-aacip-0b06a81a25f
cpb-aacip-0b43b33322e
cpb-aacip-0b8f4f2a08c
cpb-aacip-0b8f510383f
25 changes: 25 additions & 0 deletions lib/fix/delete_asset_resources.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
require_relative '../../config/environment'
require_relative 'batch_process'

module Fix
class DeleteAssetResources < BatchProcess
def run
asset_resources.each do |ar|
log.info "Destroying Asset Resource #{ar.id}"
begin
Hyrax.persister.delete(resource: ar)
Hyrax.index_adapter.delete(resource: ar)
Hyrax.index_adapter.connection.commit
log.info "Asset Resource #{ar.id} destroyed."
rescue => e
log_error e
end
end
puts "Done."
end
end
end

if __FILE__ == $0
Fix::DeleteAssetResources.run_cli
end
55 changes: 55 additions & 0 deletions lib/fix/delete_physical_instantiations.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
require 'optparse'
require_relative '../../config/environment'
require_relative 'batch_process'

module Fix
class DeletePhysicalInstantiations < BatchProcess
MEDIA_TYPES = MediaTypeService.new.select_all_options.to_h.values

attr_reader :media_type

def initialize(media_type:, **args)
super(**args)
raise ArgumentError, "media_type must be one of '#{MEDIA_TYPES.join("', '")}', but '#{media_type}' was given" unless MEDIA_TYPES.include?(media_type)
@media_type = media_type
end

option_parser do |opts|
opts.banner = "Usage: ruby lib/fix/change_media_type.rb [options]"

opts.on("-t", "--media-type MEDIA_TYPE", "Either 'Sound' or 'Movind Image'") do |media_type|
cli_options[:media_type] = media_type
end
end

def run
raise ArgumentError, "media_type must be one of '#{MEDIA_TYPES.join("', '")}', but '#{media_type}' was given" unless MEDIA_TYPES.include?(media_type)

asset_resources.each do |ar|
pis = ar.physical_instantiation_resources.select { |pi| pi.media_type == media_type }
if pis.count == 0
log.warn "No physical instantiations with media type '#{media_type}' were found for Asset #{ar.id}, skipping."
next
end

pis.each do |pi|
begin
log.info "Deleting Physical Instantiation #{pi.id} with media type '#{media_type}' from Asset #{ar.id}..."
Hyrax.persister.delete(resource: pi)
Hyrax.index_adapter.delete(resource: pi)
log.info "Deleted physical instantiation #{pi.id} with media type '#{media_type}' from Asset #{ar.id}."
Hyrax.index_adapter.save(resource: ar)
log.info "Asset Resource #{ar.id} saved."
rescue => e
log_error(e)
end
end
end
end
end
end


if __FILE__ == $0
Fix::DeletePhysicalInstantiations.run_cli
end
6 changes: 6 additions & 0 deletions ops/demo-deploy.tmpl.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,12 @@ redis:
cluster:
enabled: false
password: demo
master:
livenessProbe:
initialDelaySeconds: 180
readinessProbe:
initialDelaySeconds: 180

solr:
enabled: false

Expand Down
2 changes: 1 addition & 1 deletion spec/factories/pbcore_xml/instantiation/identifier.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

trait :ams do
source { "ams" }
value { "cpb-blah-blah-blah" }
value { IdentifierService.mint }
end

initialize_with { new(attributes) }
Expand Down
56 changes: 56 additions & 0 deletions spec/fix/delete_asset_resources_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
require 'rails_helper'
require 'fix/delete_asset_resources'
require 'sidekiq/testing'

RSpec.describe 'Delete Asset Resources' do
# Temporarily set ActiveJob queue adapter to :sidekiq for this test, since
# it's an integration test that involves running ingest jobs.
before(:all) do
ActiveJob::Base.queue_adapter = :sidekiq
Sidekiq::Testing.inline!
end
after(:all) { ActiveJob::Base.queue_adapter = :sidekiq }


let(:pbcore_description_documents) { build_list(:pbcore_description_document, rand(2..4), :full_aapb) }
let(:zipped_batch) { make_aapb_pbcore_zipped_batch(pbcore_description_documents) }
let(:batch) do
user, admin_set = create_user_and_admin_set_for_deposit
run_batch_ingest(
ingest_file_path: zipped_batch,
ingest_type: 'aapb_pbcore_zipped',
admin_set: admin_set,
submitter: user
)
end

let(:ids) do
batch.batch_items.map do |batch_item|
batch_item.repo_object_id.to_s
end
end

let(:ids_file) do
f = Tempfile.new
f.write(ids.join("\n"))
f.flush
f.path
end

# Non-memoized helper for fetching Asset by ID.
def asset_resource_results
ids.map do |id|
begin
Hyrax.query_service.find_by(id: id)
rescue Valkyrie::Persistence::ObjectNotFoundError
nil
end
end.compact
end

it 'deletes the AssetResources' do
expect(asset_resource_results.count).to be > 0
Fix::DeleteAssetResources.new(ids_file: ids_file).run
expect(asset_resource_results.count).to eq 0
end
end
Loading

0 comments on commit ac9a3fa

Please sign in to comment.