Merge pull request #3141 from betagouv/frederic/fix_2180/improve_migration_task

Improve PJ migration task
This commit is contained in:
Frederic Merizen 2018-12-07 15:31:22 +01:00 committed by GitHub
commit 1820008a31
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 102 additions and 32 deletions

View file

@ -81,12 +81,24 @@ module Cellar
end
def list_prefixed(prefix)
request = Net::HTTP::Get.new("/?prefix=#{prefix}")
@signer.sign(request, "")
response = @http.request(request)
if response.is_a?(Net::HTTPSuccess)
parse_bucket_listing(response.body)
end
result = []
marker = ''
begin
request = Net::HTTP::Get.new("/?prefix=#{prefix}&marker=#{marker}")
@signer.sign(request, "")
response = @http.request(request)
if response.is_a?(Net::HTTPSuccess)
(listing, truncated) = parse_bucket_listing(response.body)
result += listing
marker = listing.last
else
# TODO: error handling
return nil
end
end while truncated
result
end
def delete_keys(keys)
@ -126,9 +138,11 @@ module Cellar
def parse_bucket_listing(bucket_listing_xml)
doc = Nokogiri::XML(bucket_listing_xml)
doc
listing = doc
.xpath('//xmlns:Contents/xmlns:Key')
.map(&:text)
truncated = doc.xpath('//xmlns:IsTruncated').text == 'true'
[listing, truncated]
end
def bulk_deletion_request_body(keys)

View file

@ -4,8 +4,8 @@ namespace :'2018_12_03_finish_piece_jointe_transfer' do
def run
notify_dry_run
refresh_outdated_files
fix_openstack_mime_types
remove_unused_openstack_objects
fix_openstack_mime_types
notify_dry_run
end
@ -69,35 +69,36 @@ namespace :'2018_12_03_finish_piece_jointe_transfer' do
def refresh_outdated_files
rake_puts "Refresh outdated attachments"
bar = RakeProgressbar.new(ActiveStorage::Blob.count)
refreshed_keys = []
missing_keys = []
old_pj_adapter.session do |old_pjs|
ActiveStorage::Blob.find_each do |blob|
new_pj_metadata = new_pjs.files.head(blob.key)
keys = old_pjs.list_prefixed('')
progress = ProgressReport.new(keys.count)
keys.each do |key|
new_pj_metadata = new_pjs.files.head(key)
refresh_needed = new_pj_metadata.nil?
if !refresh_needed
new_pj_last_modified = new_pj_metadata.last_modified.in_time_zone
old_pj_last_modified = old_pjs.last_modified(blob.key)
old_pj_last_modified = old_pjs.last_modified(key)
if old_pj_last_modified.nil?
missing_keys.push(blob.key)
missing_keys.push(key)
else
refresh_needed = new_pj_last_modified < old_pj_last_modified
end
end
if refresh_needed
refreshed_keys.push(blob.key)
refreshed_keys.push(key)
if force?
file = Tempfile.new(blob.key)
file = Tempfile.new(key)
file.binmode
old_pjs.download(blob.key) do |chunk|
old_pjs.download(key) do |chunk|
file.write(chunk)
end
file.rewind
new_pjs.files.create(
:key => blob.key,
:key => key,
:body => file,
:public => false
)
@ -105,10 +106,10 @@ namespace :'2018_12_03_finish_piece_jointe_transfer' do
file.unlink
end
end
bar.inc
progress.inc
end
progress.finish
end
bar.finished
if verbose?
rake_puts "Refreshed #{refreshed_keys.count} attachments\n#{refreshed_keys.join(', ')}"
@ -132,27 +133,30 @@ namespace :'2018_12_03_finish_piece_jointe_transfer' do
end
rake_puts "Fix MIME types"
bar = RakeProgressbar.new(ActiveStorage::Blob.count)
progress = ProgressReport.new(new_pjs.count.to_i)
failed_keys = []
updated_keys = []
ActiveStorage::Blob.find_each do |blob|
if blob.identified? && blob.content_type.present?
updated_keys.push(blob.key)
new_pjs.files.each do |file|
blob = ActiveStorage::Blob.find_by(key: file.key)
if blob.nil?
failed_keys.push(file.key)
elsif blob.identified? && blob.content_type.present?
updated_keys.push(file.key)
if force?
if !blob.service.change_content_type(blob.key, blob.content_type)
failed_keys.push(blob.key)
if !blob.service.change_content_type(file.key, blob.content_type)
failed_keys.push(file.key)
end
end
end
bar.inc
progress.inc
end
bar.finished
progress.finish
if verbose?
rake_puts "Updated MIME Type for #{updated_keys.count} keys\n#{updated_keys.join(', ')}"
end
if failed_keys.present?
rake_puts "failed to update #{failed_keys.count} keys (dangling blob?)\n#{failed_keys.join(', ')}"
rake_puts "failed to update #{failed_keys.count} keys\n#{failed_keys.join(', ')}"
end
end
@ -160,7 +164,7 @@ namespace :'2018_12_03_finish_piece_jointe_transfer' do
def remove_unused_openstack_objects
rake_puts "Remove unused files"
bar = RakeProgressbar.new(new_pjs.count.to_i)
progress = ProgressReport.new(new_pjs.count.to_i)
removed_keys = []
new_pjs.files.each do |file|
if !ActiveStorage::Blob.exists?(key: file.key)
@ -170,9 +174,9 @@ namespace :'2018_12_03_finish_piece_jointe_transfer' do
end
end
bar.inc
progress.inc
end
bar.finished
progress.finish
if verbose?
rake_puts "Removed #{removed_keys.count} unused objects\n#{removed_keys.join(', ')}"

View file

@ -8,3 +8,55 @@ def rake_puts(*args)
puts(*args)
end
end
def rake_print(*args)
if Rake.verbose
print(*args)
end
end
class ProgressReport
def initialize(total)
@start = Time.zone.now
rake_puts
set_progress(total: total, count: 0)
end
def inc
set_progress(count: @count + 1)
if @per_10_000 % 10 == 0
print_progress
end
end
def finish
if @count > 0 && @per_10_000 != 10_000
set_progress(total: @count)
print_progress
end
rake_puts
end
def set_progress(total: nil, count: nil)
if total.present?
@total = total
end
if count.present?
@count = count
@total = [@count, @total].max
end
@per_10_000 = 10_000 * @count / @total
end
def print_progress
elapsed = Time.zone.now - @start
percent = sprintf('%5.1f%%', @per_10_000 / 100.0)
total = @total.to_s
count = @count.to_s.rjust(total.length)
rake_print("\r#{percent} (#{count}/#{total}) [#{format_duration(elapsed)}/#{format_duration(elapsed * 10_000 / @per_10_000)}]")
end
def format_duration(seconds)
Time.at(seconds).utc.strftime('%H:%M:%S')
end
end

View file

@ -54,7 +54,7 @@ describe 'CellarAdapter' do
subject { session.send(:parse_bucket_listing, response) }
it { is_expected.to eq(["sample1.jpg", "sample2.jpg"]) }
it { is_expected.to eq([["sample1.jpg", "sample2.jpg"], false]) }
end
describe 'bulk_deletion_request_body' do