Merge pull request #3148 from betagouv/frederic/fix_2180/speed_up_migration

#2180 speed up migration
This commit is contained in:
Frederic Merizen 2018-12-10 10:31:41 +01:00 committed by GitHub
commit 1ff1a0b4e9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 44 additions and 23 deletions

View file

@ -37,7 +37,7 @@ module ActiveStorage
def delete_prefixed(prefix)
instrument :delete_prefixed, prefix: prefix do
@adapter.session do |s|
keys = s.list_prefixed(prefix)
keys = s.list_prefixed(prefix).map(&:first)
s.delete_keys(keys)
end
end

View file

@ -91,7 +91,7 @@ module Cellar
if response.is_a?(Net::HTTPSuccess)
(listing, truncated) = parse_bucket_listing(response.body)
result += listing
marker = listing.last
marker = listing.last.first
else
# TODO: error handling
return nil
@ -139,8 +139,13 @@ module Cellar
def parse_bucket_listing(bucket_listing_xml)
doc = Nokogiri::XML(bucket_listing_xml)
listing = doc
.xpath('//xmlns:Contents/xmlns:Key')
.map(&:text)
.xpath('//xmlns:Contents')
.map do |node|
[
node.xpath('xmlns:Key').text,
DateTime.iso8601(node.xpath('xmlns:LastModified').text)
]
end
truncated = doc.xpath('//xmlns:IsTruncated').text == 'true'
[listing, truncated]
end

View file

@ -67,23 +67,32 @@ namespace :'2018_12_03_finish_piece_jointe_transfer' do
# This task ports them to the new storage after the switch, while being careful not to
# overwrite attachments that may have changed in the new storage after the switch.
def refresh_outdated_files
rake_puts "Refresh outdated attachments"
refreshed_keys = []
missing_keys = []
old_pj_adapter.session do |old_pjs|
keys = old_pjs.list_prefixed('')
progress = ProgressReport.new(keys.count)
keys.each do |key|
new_pj_metadata = new_pjs.files.head(key)
rake_puts "List old PJs"
old_pj_listing = old_pjs.list_prefixed('')
refresh_needed = new_pj_metadata.nil?
if !refresh_needed
new_pj_last_modified = new_pj_metadata.last_modified.in_time_zone
old_pj_last_modified = old_pjs.last_modified(key)
if old_pj_last_modified.nil?
missing_keys.push(key)
else
rake_puts "List new PJs"
new_pj_listing = {}
progress = ProgressReport.new(new_pjs.count.to_i)
new_pjs.files.each do |f|
new_pj_listing[f.key] = f.last_modified.in_time_zone
progress.inc
end
progress.finish
rake_puts "Refresh outdated attachments"
progress = ProgressReport.new(old_pj_listing.count)
old_pj_listing.each do |key, old_pj_last_modified|
new_pj_last_modified = new_pj_listing[key]
if new_pj_last_modified.nil? || new_pj_last_modified < old_pj_last_modified
# Looks like we need to refresh this PJ.
# Fetch fresh metadata to avoid overwriting a last-minute change
new_pj_metadata = new_pjs.files.head(key)
refresh_needed = new_pj_metadata.nil?
if !refresh_needed
new_pj_last_modified = new_pj_metadata.last_modified.in_time_zone
refresh_needed = new_pj_last_modified < old_pj_last_modified
end
end
@ -114,9 +123,6 @@ namespace :'2018_12_03_finish_piece_jointe_transfer' do
if verbose?
rake_puts "Refreshed #{refreshed_keys.count} attachments\n#{refreshed_keys.join(', ')}"
end
if missing_keys.present?
rake_puts "Failed to refresh #{missing_keys.count} attachments\n#{missing_keys.join(', ')}"
end
end
# For OpenStack, the content type cannot be forced dynamically from a direct download URL.

View file

@ -43,7 +43,7 @@ describe 'CellarAdapter' do
</Contents>
<Contents>
<Key>sample2.jpg</Key>
<LastModified>2011-02-26T01:56:20.000Z</LastModified>
<LastModified>2014-03-21T17:44:07.000Z</LastModified>
<ETag>&quot;bf1d737a4d46a19f3bced6905cc8b902&quot;</ETag>
<Size>142863</Size>
<StorageClass>STANDARD</StorageClass>
@ -54,7 +54,17 @@ describe 'CellarAdapter' do
subject { session.send(:parse_bucket_listing, response) }
it { is_expected.to eq([["sample1.jpg", "sample2.jpg"], false]) }
it do
is_expected.to eq(
[
[
["sample1.jpg", DateTime.new(2011, 2, 26, 1, 56, 20, 0)],
["sample2.jpg", DateTime.new(2014, 3, 21, 17, 44, 7, 0)]
],
false
]
)
end
end
describe 'bulk_deletion_request_body' do