2021-12-22 16:03:36 +01:00
|
|
|
module DownloadManager
|
|
|
|
class ParallelDownloadQueue
|
|
|
|
DOWNLOAD_MAX_PARALLEL = ENV.fetch('DOWNLOAD_MAX_PARALLEL') { 10 }
|
|
|
|
|
|
|
|
attr_accessor :attachments,
|
|
|
|
:destination,
|
|
|
|
:on_error
|
|
|
|
|
|
|
|
def initialize(attachments, destination)
|
|
|
|
@attachments = attachments
|
2023-05-04 12:55:05 +02:00
|
|
|
@destination = Pathname.new(destination)
|
2021-12-22 16:03:36 +01:00
|
|
|
end
|
|
|
|
|
|
|
|
def download_all
|
2022-01-04 16:15:28 +01:00
|
|
|
hydra = Typhoeus::Hydra.new(max_concurrency: DOWNLOAD_MAX_PARALLEL)
|
2021-12-22 16:03:36 +01:00
|
|
|
|
2024-04-01 19:37:23 +02:00
|
|
|
attachments.each do |attachment, path|
|
2024-04-02 12:54:24 +02:00
|
|
|
download_one(attachment: attachment,
|
|
|
|
path_in_download_dir: path,
|
|
|
|
http_client: hydra)
|
|
|
|
rescue => e
|
|
|
|
on_error.call(attachment, path, e)
|
2021-12-22 16:03:36 +01:00
|
|
|
end
|
2024-04-01 19:37:23 +02:00
|
|
|
|
2022-01-04 16:15:28 +01:00
|
|
|
hydra.run
|
2021-12-22 16:03:36 +01:00
|
|
|
end
|
|
|
|
|
2022-01-04 16:15:28 +01:00
|
|
|
# can't be used with typhoeus, otherwise block is closed before the request is run by hydra
|
2021-12-22 16:03:36 +01:00
|
|
|
def download_one(attachment:, path_in_download_dir:, http_client:)
|
2023-05-04 12:55:05 +02:00
|
|
|
path = Pathname.new(path_in_download_dir)
|
|
|
|
attachment_path = destination.join(path.dirname, sanitize_filename(path.basename.to_s))
|
|
|
|
|
|
|
|
attachment_path.dirname.mkpath # defensive, do not write in undefined dir
|
2021-12-22 16:03:36 +01:00
|
|
|
|
2022-11-18 11:30:21 +01:00
|
|
|
if attachment.is_a?(ActiveStorage::FakeAttachment)
|
2023-05-04 12:55:05 +02:00
|
|
|
attachment_path.write(attachment.file.read, mode: 'wb')
|
2024-04-02 12:54:24 +02:00
|
|
|
return
|
|
|
|
end
|
|
|
|
|
|
|
|
request = Typhoeus::Request.new(attachment.url)
|
|
|
|
if attachment.blob.byte_size < 10.megabytes
|
|
|
|
request_in_whole(request, attachment:, attachment_path:, path_in_download_dir:)
|
2021-12-22 16:03:36 +01:00
|
|
|
else
|
2024-04-02 12:54:24 +02:00
|
|
|
request_in_chunks(request, attachment:, attachment_path:, path_in_download_dir:)
|
2021-12-22 16:03:36 +01:00
|
|
|
end
|
2024-04-02 12:54:24 +02:00
|
|
|
|
|
|
|
http_client.queue(request)
|
2021-12-22 16:03:36 +01:00
|
|
|
end
|
2023-05-04 12:45:58 +02:00
|
|
|
|
|
|
|
private
|
|
|
|
|
2023-05-04 12:55:05 +02:00
|
|
|
def sanitize_filename(original_filename)
|
|
|
|
filename = ActiveStorage::Filename.new(original_filename).sanitized
|
|
|
|
|
2023-05-04 12:45:58 +02:00
|
|
|
return filename if filename.bytesize <= 255
|
|
|
|
|
|
|
|
ext = File.extname(filename)
|
|
|
|
basename = File.basename(filename, ext).byteslice(0, 255 - ext.bytesize)
|
|
|
|
|
|
|
|
basename + ext
|
|
|
|
end
|
2024-04-02 12:54:24 +02:00
|
|
|
|
|
|
|
def request_in_whole(request, attachment:, attachment_path:, path_in_download_dir:)
|
|
|
|
request.on_complete do |response|
|
|
|
|
if response.success?
|
|
|
|
attachment_path.open(mode: 'wb') do |fd|
|
|
|
|
fd.write(response.body)
|
|
|
|
end
|
|
|
|
else
|
|
|
|
handle_response_error(response, attachment:, attachment_path:, path_in_download_dir:)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def request_in_chunks(request, attachment:, attachment_path:, path_in_download_dir:)
|
|
|
|
downloaded_file = attachment_path.open(mode: 'wb')
|
|
|
|
|
|
|
|
request.on_body do |chunk|
|
|
|
|
downloaded_file.write(chunk)
|
|
|
|
end
|
|
|
|
|
|
|
|
request.on_complete do |response|
|
|
|
|
downloaded_file.close
|
|
|
|
|
|
|
|
if !response.success?
|
|
|
|
handle_response_error(response, attachment:, attachment_path:, path_in_download_dir:)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
def handle_response_error(response, attachment:, attachment_path:, path_in_download_dir:)
|
|
|
|
attachment_path.delete if attachment_path.exist? # -> case of retries failed, must cleanup partialy downloaded file
|
|
|
|
on_error.call(attachment, path_in_download_dir, response.code)
|
|
|
|
end
|
2021-12-22 16:03:36 +01:00
|
|
|
end
|
|
|
|
end
|