From 7b18c514839e16eb2c55a1cafbbb96db65e38b7a Mon Sep 17 00:00:00 2001 From: Frederic Merizen Date: Sat, 8 Dec 2018 15:38:45 +0100 Subject: [PATCH 1/3] [#2180] Add separate notification for (slow) PJ listing --- lib/tasks/2018_12_03_finish_piece_jointe_transfer.rake | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/tasks/2018_12_03_finish_piece_jointe_transfer.rake b/lib/tasks/2018_12_03_finish_piece_jointe_transfer.rake index fd8991ef3..f6b78f025 100644 --- a/lib/tasks/2018_12_03_finish_piece_jointe_transfer.rake +++ b/lib/tasks/2018_12_03_finish_piece_jointe_transfer.rake @@ -67,12 +67,13 @@ namespace :'2018_12_03_finish_piece_jointe_transfer' do # This task ports them to the new storage after the switch, while being careful not to # overwrite attachments that may have changed in the new storage after the switch. def refresh_outdated_files - rake_puts "Refresh outdated attachments" - refreshed_keys = [] missing_keys = [] old_pj_adapter.session do |old_pjs| + rake_puts "List old PJs" keys = old_pjs.list_prefixed('') + + rake_puts "Refresh outdated attachments" progress = ProgressReport.new(keys.count) keys.each do |key| new_pj_metadata = new_pjs.files.head(key) From 270c7568fa507030027120f52d0e248349df5c18 Mon Sep 17 00:00:00 2001 From: Frederic Merizen Date: Sat, 8 Dec 2018 15:55:40 +0100 Subject: [PATCH 2/3] [#2180] Avoid fetching old PJ modification times separately Speeds up transfer --- lib/active_storage/service/cellar_service.rb | 2 +- lib/cellar/cellar_adapter.rb | 11 ++++++++--- ...2018_12_03_finish_piece_jointe_transfer.rake | 17 ++++------------- spec/lib/cellar/cellar_adapter_spec.rb | 14 ++++++++++++-- 4 files changed, 25 insertions(+), 19 deletions(-) diff --git a/lib/active_storage/service/cellar_service.rb b/lib/active_storage/service/cellar_service.rb index 43f2d0632..56b00e24b 100644 --- a/lib/active_storage/service/cellar_service.rb +++ b/lib/active_storage/service/cellar_service.rb @@ -37,7 +37,7 @@ module ActiveStorage def delete_prefixed(prefix) instrument :delete_prefixed, prefix: prefix do @adapter.session do |s| - keys = s.list_prefixed(prefix) + keys = s.list_prefixed(prefix).map(&:first) s.delete_keys(keys) end end diff --git a/lib/cellar/cellar_adapter.rb b/lib/cellar/cellar_adapter.rb index 1a1ed400c..0cd7e88d0 100644 --- a/lib/cellar/cellar_adapter.rb +++ b/lib/cellar/cellar_adapter.rb @@ -91,7 +91,7 @@ module Cellar if response.is_a?(Net::HTTPSuccess) (listing, truncated) = parse_bucket_listing(response.body) result += listing - marker = listing.last + marker = listing.last.first else # TODO: error handling return nil @@ -139,8 +139,13 @@ module Cellar def parse_bucket_listing(bucket_listing_xml) doc = Nokogiri::XML(bucket_listing_xml) listing = doc - .xpath('//xmlns:Contents/xmlns:Key') - .map(&:text) + .xpath('//xmlns:Contents') + .map do |node| + [ + node.xpath('xmlns:Key').text, + DateTime.iso8601(node.xpath('xmlns:LastModified').text) + ] + end truncated = doc.xpath('//xmlns:IsTruncated').text == 'true' [listing, truncated] end diff --git a/lib/tasks/2018_12_03_finish_piece_jointe_transfer.rake b/lib/tasks/2018_12_03_finish_piece_jointe_transfer.rake index f6b78f025..1c1b2d66b 100644 --- a/lib/tasks/2018_12_03_finish_piece_jointe_transfer.rake +++ b/lib/tasks/2018_12_03_finish_piece_jointe_transfer.rake @@ -68,25 +68,19 @@ namespace :'2018_12_03_finish_piece_jointe_transfer' do # overwrite attachments that may have changed in the new storage after the switch. def refresh_outdated_files refreshed_keys = [] - missing_keys = [] old_pj_adapter.session do |old_pjs| rake_puts "List old PJs" - keys = old_pjs.list_prefixed('') + old_pj_listing = old_pjs.list_prefixed('') rake_puts "Refresh outdated attachments" - progress = ProgressReport.new(keys.count) - keys.each do |key| + progress = ProgressReport.new(old_pj_listing.count) + old_pj_listing.each do |key, old_pj_last_modified| new_pj_metadata = new_pjs.files.head(key) refresh_needed = new_pj_metadata.nil? if !refresh_needed new_pj_last_modified = new_pj_metadata.last_modified.in_time_zone - old_pj_last_modified = old_pjs.last_modified(key) - if old_pj_last_modified.nil? - missing_keys.push(key) - else - refresh_needed = new_pj_last_modified < old_pj_last_modified - end + refresh_needed = new_pj_last_modified < old_pj_last_modified end if refresh_needed @@ -115,9 +109,6 @@ namespace :'2018_12_03_finish_piece_jointe_transfer' do if verbose? rake_puts "Refreshed #{refreshed_keys.count} attachments\n#{refreshed_keys.join(', ')}" end - if missing_keys.present? - rake_puts "Failed to refresh #{missing_keys.count} attachments\n#{missing_keys.join(', ')}" - end end # For OpenStack, the content type cannot be forced dynamically from a direct download URL. diff --git a/spec/lib/cellar/cellar_adapter_spec.rb b/spec/lib/cellar/cellar_adapter_spec.rb index 312a8cb77..c385a5ecf 100644 --- a/spec/lib/cellar/cellar_adapter_spec.rb +++ b/spec/lib/cellar/cellar_adapter_spec.rb @@ -43,7 +43,7 @@ describe 'CellarAdapter' do sample2.jpg - 2011-02-26T01:56:20.000Z + 2014-03-21T17:44:07.000Z "bf1d737a4d46a19f3bced6905cc8b902" 142863 STANDARD @@ -54,7 +54,17 @@ describe 'CellarAdapter' do subject { session.send(:parse_bucket_listing, response) } - it { is_expected.to eq([["sample1.jpg", "sample2.jpg"], false]) } + it do + is_expected.to eq( + [ + [ + ["sample1.jpg", DateTime.new(2011, 2, 26, 1, 56, 20, 0)], + ["sample2.jpg", DateTime.new(2014, 3, 21, 17, 44, 7, 0)] + ], + false + ] + ) + end end describe 'bulk_deletion_request_body' do From c72216aafd43001249e182ce5e9899a51c782d57 Mon Sep 17 00:00:00 2001 From: Frederic Merizen Date: Sat, 8 Dec 2018 15:58:32 +0100 Subject: [PATCH 3/3] [#2180] Batch fetch new PJ modification times To speed up transfer --- ...18_12_03_finish_piece_jointe_transfer.rake | 24 +++++++++++++++---- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/lib/tasks/2018_12_03_finish_piece_jointe_transfer.rake b/lib/tasks/2018_12_03_finish_piece_jointe_transfer.rake index 1c1b2d66b..c11db190f 100644 --- a/lib/tasks/2018_12_03_finish_piece_jointe_transfer.rake +++ b/lib/tasks/2018_12_03_finish_piece_jointe_transfer.rake @@ -72,15 +72,29 @@ namespace :'2018_12_03_finish_piece_jointe_transfer' do rake_puts "List old PJs" old_pj_listing = old_pjs.list_prefixed('') + rake_puts "List new PJs" + new_pj_listing = {} + progress = ProgressReport.new(new_pjs.count.to_i) + new_pjs.files.each do |f| + new_pj_listing[f.key] = f.last_modified.in_time_zone + progress.inc + end + progress.finish + rake_puts "Refresh outdated attachments" progress = ProgressReport.new(old_pj_listing.count) old_pj_listing.each do |key, old_pj_last_modified| - new_pj_metadata = new_pjs.files.head(key) + new_pj_last_modified = new_pj_listing[key] - refresh_needed = new_pj_metadata.nil? - if !refresh_needed - new_pj_last_modified = new_pj_metadata.last_modified.in_time_zone - refresh_needed = new_pj_last_modified < old_pj_last_modified + if new_pj_last_modified.nil? || new_pj_last_modified < old_pj_last_modified + # Looks like we need to refresh this PJ. + # Fetch fresh metadata to avoid overwriting a last-minute change + new_pj_metadata = new_pjs.files.head(key) + refresh_needed = new_pj_metadata.nil? + if !refresh_needed + new_pj_last_modified = new_pj_metadata.last_modified.in_time_zone + refresh_needed = new_pj_last_modified < old_pj_last_modified + end end if refresh_needed