From c6c8bea0957e5bb8738e40a819a60f5f33ab02ee Mon Sep 17 00:00:00 2001 From: Frederic Merizen Date: Thu, 6 Dec 2018 17:02:08 +0100 Subject: [PATCH 1/3] [#2180] Progress reporter that doesn't crash --- ...18_12_03_finish_piece_jointe_transfer.rake | 18 +++---- lib/tasks/task_helper.rb | 52 +++++++++++++++++++ 2 files changed, 61 insertions(+), 9 deletions(-) diff --git a/lib/tasks/2018_12_03_finish_piece_jointe_transfer.rake b/lib/tasks/2018_12_03_finish_piece_jointe_transfer.rake index c32501d9d..ba8aac5af 100644 --- a/lib/tasks/2018_12_03_finish_piece_jointe_transfer.rake +++ b/lib/tasks/2018_12_03_finish_piece_jointe_transfer.rake @@ -69,7 +69,7 @@ namespace :'2018_12_03_finish_piece_jointe_transfer' do def refresh_outdated_files rake_puts "Refresh outdated attachments" - bar = RakeProgressbar.new(ActiveStorage::Blob.count) + progress = ProgressReport.new(ActiveStorage::Blob.count) refreshed_keys = [] missing_keys = [] old_pj_adapter.session do |old_pjs| @@ -105,10 +105,10 @@ namespace :'2018_12_03_finish_piece_jointe_transfer' do file.unlink end end - bar.inc + progress.inc end end - bar.finished + progress.finish if verbose? rake_puts "Refreshed #{refreshed_keys.count} attachments\n#{refreshed_keys.join(', ')}" @@ -132,7 +132,7 @@ namespace :'2018_12_03_finish_piece_jointe_transfer' do end rake_puts "Fix MIME types" - bar = RakeProgressbar.new(ActiveStorage::Blob.count) + progress = ProgressReport.new(ActiveStorage::Blob.count) failed_keys = [] updated_keys = [] ActiveStorage::Blob.find_each do |blob| @@ -144,9 +144,9 @@ namespace :'2018_12_03_finish_piece_jointe_transfer' do end end end - bar.inc + progress.inc end - bar.finished + progress.finish if verbose? rake_puts "Updated MIME Type for #{updated_keys.count} keys\n#{updated_keys.join(', ')}" @@ -160,7 +160,7 @@ namespace :'2018_12_03_finish_piece_jointe_transfer' do def remove_unused_openstack_objects rake_puts "Remove unused files" - bar = RakeProgressbar.new(new_pjs.count.to_i) + progress = ProgressReport.new(new_pjs.count.to_i) removed_keys = [] new_pjs.files.each do |file| if !ActiveStorage::Blob.exists?(key: file.key) @@ -170,9 +170,9 @@ namespace :'2018_12_03_finish_piece_jointe_transfer' do end end - bar.inc + progress.inc end - bar.finished + progress.finish if verbose? rake_puts "Removed #{removed_keys.count} unused objects\n#{removed_keys.join(', ')}" diff --git a/lib/tasks/task_helper.rb b/lib/tasks/task_helper.rb index 238ea9ced..5ed9d6b7a 100644 --- a/lib/tasks/task_helper.rb +++ b/lib/tasks/task_helper.rb @@ -8,3 +8,55 @@ def rake_puts(*args) puts(*args) end end + +def rake_print(*args) + if Rake.verbose + print(*args) + end +end + +class ProgressReport + def initialize(total) + @start = Time.zone.now + rake_puts + set_progress(total: total, count: 0) + end + + def inc + set_progress(count: @count + 1) + if @per_10_000 % 10 == 0 + print_progress + end + end + + def finish + if @count > 0 && @per_10_000 != 10_000 + set_progress(total: @count) + print_progress + end + rake_puts + end + + def set_progress(total: nil, count: nil) + if total.present? + @total = total + end + if count.present? + @count = count + @total = [@count, @total].max + end + @per_10_000 = 10_000 * @count / @total + end + + def print_progress + elapsed = Time.zone.now - @start + percent = sprintf('%5.1f%%', @per_10_000 / 100.0) + total = @total.to_s + count = @count.to_s.rjust(total.length) + rake_print("\r#{percent} (#{count}/#{total}) [#{format_duration(elapsed)}/#{format_duration(elapsed * 10_000 / @per_10_000)}]") + end + + def format_duration(seconds) + Time.at(seconds).utc.strftime('%H:%M:%S') + end +end From 907e87809b77ba10c67d82e20b7e1229616c4bce Mon Sep 17 00:00:00 2001 From: Frederic Merizen Date: Fri, 7 Dec 2018 13:45:23 +0100 Subject: [PATCH 2/3] [#2180] Iterate over storage keys rather than blobs To create less confusion on dev --- lib/cellar/cellar_adapter.rb | 28 ++++++++++---- ...18_12_03_finish_piece_jointe_transfer.rake | 38 ++++++++++--------- spec/lib/cellar/cellar_adapter_spec.rb | 2 +- 3 files changed, 43 insertions(+), 25 deletions(-) diff --git a/lib/cellar/cellar_adapter.rb b/lib/cellar/cellar_adapter.rb index 00b3e1f74..97369c2c5 100644 --- a/lib/cellar/cellar_adapter.rb +++ b/lib/cellar/cellar_adapter.rb @@ -81,12 +81,24 @@ module Cellar end def list_prefixed(prefix) - request = Net::HTTP::Get.new("/?prefix=#{prefix}") - @signer.sign(request, "") - response = @http.request(request) - if response.is_a?(Net::HTTPSuccess) - parse_bucket_listing(response.body) - end + result = [] + marker = '' + + begin + request = Net::HTTP::Get.new("/?prefix=#{prefix}&marker=#{marker}") + @signer.sign(request, "") + response = @http.request(request) + if response.is_a?(Net::HTTPSuccess) + (listing, truncated) = parse_bucket_listing(response.body) + result += listing + marker = listing.last + else + # TODO: error handling + return nil + end + end while truncated + + result end def delete_keys(keys) @@ -126,9 +138,11 @@ module Cellar def parse_bucket_listing(bucket_listing_xml) doc = Nokogiri::XML(bucket_listing_xml) - doc + listing = doc .xpath('//xmlns:Contents/xmlns:Key') .map(&:text) + truncated = doc.xpath('//xmlns:IsTruncated').text == 'true' + [listing, truncated] end def bulk_deletion_request_body(keys) diff --git a/lib/tasks/2018_12_03_finish_piece_jointe_transfer.rake b/lib/tasks/2018_12_03_finish_piece_jointe_transfer.rake index ba8aac5af..c52c76ae3 100644 --- a/lib/tasks/2018_12_03_finish_piece_jointe_transfer.rake +++ b/lib/tasks/2018_12_03_finish_piece_jointe_transfer.rake @@ -69,35 +69,36 @@ namespace :'2018_12_03_finish_piece_jointe_transfer' do def refresh_outdated_files rake_puts "Refresh outdated attachments" - progress = ProgressReport.new(ActiveStorage::Blob.count) refreshed_keys = [] missing_keys = [] old_pj_adapter.session do |old_pjs| - ActiveStorage::Blob.find_each do |blob| - new_pj_metadata = new_pjs.files.head(blob.key) + keys = old_pjs.list_prefixed('') + progress = ProgressReport.new(keys.count) + keys.each do |key| + new_pj_metadata = new_pjs.files.head(key) refresh_needed = new_pj_metadata.nil? if !refresh_needed new_pj_last_modified = new_pj_metadata.last_modified.in_time_zone - old_pj_last_modified = old_pjs.last_modified(blob.key) + old_pj_last_modified = old_pjs.last_modified(key) if old_pj_last_modified.nil? - missing_keys.push(blob.key) + missing_keys.push(key) else refresh_needed = new_pj_last_modified < old_pj_last_modified end end if refresh_needed - refreshed_keys.push(blob.key) + refreshed_keys.push(key) if force? - file = Tempfile.new(blob.key) + file = Tempfile.new(key) file.binmode - old_pjs.download(blob.key) do |chunk| + old_pjs.download(key) do |chunk| file.write(chunk) end file.rewind new_pjs.files.create( - :key => blob.key, + :key => key, :body => file, :public => false ) @@ -107,8 +108,8 @@ namespace :'2018_12_03_finish_piece_jointe_transfer' do end progress.inc end + progress.finish end - progress.finish if verbose? rake_puts "Refreshed #{refreshed_keys.count} attachments\n#{refreshed_keys.join(', ')}" @@ -132,15 +133,18 @@ namespace :'2018_12_03_finish_piece_jointe_transfer' do end rake_puts "Fix MIME types" - progress = ProgressReport.new(ActiveStorage::Blob.count) + progress = ProgressReport.new(new_pjs.count.to_i) failed_keys = [] updated_keys = [] - ActiveStorage::Blob.find_each do |blob| - if blob.identified? && blob.content_type.present? - updated_keys.push(blob.key) + new_pjs.files.each do |file| + blob = ActiveStorage::Blob.find_by(key: file.key) + if blob.nil? + failed_keys.push(file.key) + elsif blob.identified? && blob.content_type.present? + updated_keys.push(file.key) if force? - if !blob.service.change_content_type(blob.key, blob.content_type) - failed_keys.push(blob.key) + if !blob.service.change_content_type(file.key, blob.content_type) + failed_keys.push(file.key) end end end @@ -152,7 +156,7 @@ namespace :'2018_12_03_finish_piece_jointe_transfer' do rake_puts "Updated MIME Type for #{updated_keys.count} keys\n#{updated_keys.join(', ')}" end if failed_keys.present? - rake_puts "failed to update #{failed_keys.count} keys (dangling blob?)\n#{failed_keys.join(', ')}" + rake_puts "failed to update #{failed_keys.count} keys\n#{failed_keys.join(', ')}" end end diff --git a/spec/lib/cellar/cellar_adapter_spec.rb b/spec/lib/cellar/cellar_adapter_spec.rb index 65d7c5dc0..312a8cb77 100644 --- a/spec/lib/cellar/cellar_adapter_spec.rb +++ b/spec/lib/cellar/cellar_adapter_spec.rb @@ -54,7 +54,7 @@ describe 'CellarAdapter' do subject { session.send(:parse_bucket_listing, response) } - it { is_expected.to eq(["sample1.jpg", "sample2.jpg"]) } + it { is_expected.to eq([["sample1.jpg", "sample2.jpg"], false]) } end describe 'bulk_deletion_request_body' do From 8c64ab9995798aec77725ef7977f2194ead31ded Mon Sep 17 00:00:00 2001 From: Frederic Merizen Date: Fri, 7 Dec 2018 13:57:42 +0100 Subject: [PATCH 3/3] [#2180] Remove dead files before fixing mime types To avoid trying to fix the mime type of non-existing files --- lib/tasks/2018_12_03_finish_piece_jointe_transfer.rake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/tasks/2018_12_03_finish_piece_jointe_transfer.rake b/lib/tasks/2018_12_03_finish_piece_jointe_transfer.rake index c52c76ae3..fd8991ef3 100644 --- a/lib/tasks/2018_12_03_finish_piece_jointe_transfer.rake +++ b/lib/tasks/2018_12_03_finish_piece_jointe_transfer.rake @@ -4,8 +4,8 @@ namespace :'2018_12_03_finish_piece_jointe_transfer' do def run notify_dry_run refresh_outdated_files - fix_openstack_mime_types remove_unused_openstack_objects + fix_openstack_mime_types notify_dry_run end