perf(export): download in chunks files >= 10 mb

This commit is contained in:
Colin Darie 2024-04-02 12:54:24 +02:00
parent b245d9b063
commit e23e2d9c31
No known key found for this signature in database
GPG key ID: 8C76CADD40253590
2 changed files with 93 additions and 23 deletions

View file

@ -15,18 +15,14 @@ module DownloadManager
hydra = Typhoeus::Hydra.new(max_concurrency: DOWNLOAD_MAX_PARALLEL)
attachments.each do |attachment, path|
begin
download_one(attachment: attachment,
path_in_download_dir: path,
http_client: hydra)
rescue => e
on_error.call(attachment, path, e)
end
download_one(attachment: attachment,
path_in_download_dir: path,
http_client: hydra)
rescue => e
on_error.call(attachment, path, e)
end
hydra.run
GC.start
end
# can't be used with typhoeus, otherwise block is closed before the request is run by hydra
@ -38,20 +34,17 @@ module DownloadManager
if attachment.is_a?(ActiveStorage::FakeAttachment)
attachment_path.write(attachment.file.read, mode: 'wb')
else
request = Typhoeus::Request.new(attachment.url)
request.on_complete do |response|
if response.success?
attachment_path.open(mode: "wb") do |fd|
fd.write(response.body)
end
else
attachment_path.delete if attachment_path.exist? # -> case of retries failed, must cleanup partialy downloaded file
on_error.call(attachment, path_in_download_dir, response.code)
end
end
http_client.queue(request)
return
end
request = Typhoeus::Request.new(attachment.url)
if attachment.blob.byte_size < 10.megabytes
request_in_whole(request, attachment:, attachment_path:, path_in_download_dir:)
else
request_in_chunks(request, attachment:, attachment_path:, path_in_download_dir:)
end
http_client.queue(request)
end
private
@ -66,5 +59,38 @@ module DownloadManager
basename + ext
end
def request_in_whole(request, attachment:, attachment_path:, path_in_download_dir:)
request.on_complete do |response|
if response.success?
attachment_path.open(mode: 'wb') do |fd|
fd.write(response.body)
end
else
handle_response_error(response, attachment:, attachment_path:, path_in_download_dir:)
end
end
end
def request_in_chunks(request, attachment:, attachment_path:, path_in_download_dir:)
downloaded_file = attachment_path.open(mode: 'wb')
request.on_body do |chunk|
downloaded_file.write(chunk)
end
request.on_complete do |response|
downloaded_file.close
if !response.success?
handle_response_error(response, attachment:, attachment_path:, path_in_download_dir:)
end
end
end
def handle_response_error(response, attachment:, attachment_path:, path_in_download_dir:)
attachment_path.delete if attachment_path.exist? # -> case of retries failed, must cleanup partialy downloaded file
on_error.call(attachment, path_in_download_dir, response.code)
end
end
end