perf(export): download in chunks files >= 10 mb

This commit is contained in:
Colin Darie 2024-04-02 12:54:24 +02:00
parent b245d9b063
commit e23e2d9c31
No known key found for this signature in database
GPG key ID: 8C76CADD40253590
2 changed files with 93 additions and 23 deletions

View file

@ -15,18 +15,14 @@ module DownloadManager
hydra = Typhoeus::Hydra.new(max_concurrency: DOWNLOAD_MAX_PARALLEL) hydra = Typhoeus::Hydra.new(max_concurrency: DOWNLOAD_MAX_PARALLEL)
attachments.each do |attachment, path| attachments.each do |attachment, path|
begin
download_one(attachment: attachment, download_one(attachment: attachment,
path_in_download_dir: path, path_in_download_dir: path,
http_client: hydra) http_client: hydra)
rescue => e rescue => e
on_error.call(attachment, path, e) on_error.call(attachment, path, e)
end end
end
hydra.run hydra.run
GC.start
end end
# can't be used with typhoeus, otherwise block is closed before the request is run by hydra # can't be used with typhoeus, otherwise block is closed before the request is run by hydra
@ -38,21 +34,18 @@ module DownloadManager
if attachment.is_a?(ActiveStorage::FakeAttachment) if attachment.is_a?(ActiveStorage::FakeAttachment)
attachment_path.write(attachment.file.read, mode: 'wb') attachment_path.write(attachment.file.read, mode: 'wb')
else return
end
request = Typhoeus::Request.new(attachment.url) request = Typhoeus::Request.new(attachment.url)
request.on_complete do |response| if attachment.blob.byte_size < 10.megabytes
if response.success? request_in_whole(request, attachment:, attachment_path:, path_in_download_dir:)
attachment_path.open(mode: "wb") do |fd|
fd.write(response.body)
end
else else
attachment_path.delete if attachment_path.exist? # -> case of retries failed, must cleanup partialy downloaded file request_in_chunks(request, attachment:, attachment_path:, path_in_download_dir:)
on_error.call(attachment, path_in_download_dir, response.code)
end
end end
http_client.queue(request) http_client.queue(request)
end end
end
private private
@ -66,5 +59,38 @@ module DownloadManager
basename + ext basename + ext
end end
def request_in_whole(request, attachment:, attachment_path:, path_in_download_dir:)
request.on_complete do |response|
if response.success?
attachment_path.open(mode: 'wb') do |fd|
fd.write(response.body)
end
else
handle_response_error(response, attachment:, attachment_path:, path_in_download_dir:)
end
end
end
def request_in_chunks(request, attachment:, attachment_path:, path_in_download_dir:)
downloaded_file = attachment_path.open(mode: 'wb')
request.on_body do |chunk|
downloaded_file.write(chunk)
end
request.on_complete do |response|
downloaded_file.close
if !response.success?
handle_response_error(response, attachment:, attachment_path:, path_in_download_dir:)
end
end
end
def handle_response_error(response, attachment:, attachment_path:, path_in_download_dir:)
attachment_path.delete if attachment_path.exist? # -> case of retries failed, must cleanup partialy downloaded file
on_error.call(attachment, path_in_download_dir, response.code)
end
end end
end end

View file

@ -8,8 +8,10 @@ describe DownloadManager::ParallelDownloadQueue do
after { FileUtils.remove_entry_secure(test_dir) if Dir.exist?(test_dir) } after { FileUtils.remove_entry_secure(test_dir) if Dir.exist?(test_dir) }
let(:downloadable_manager) { DownloadManager::ParallelDownloadQueue.new([attachment], download_to_dir) } let(:downloadable_manager) { DownloadManager::ParallelDownloadQueue.new([attachment], download_to_dir) }
let(:http_client) { instance_double(Typhoeus::Hydra) }
describe '#download_one' do describe '#download_one' do
subject { downloadable_manager.download_one(attachment: attachment, path_in_download_dir: destination, http_client: double) } subject { downloadable_manager.download_one(attachment: attachment, path_in_download_dir: destination, http_client:) }
let(:destination) { 'lol.png' } let(:destination) { 'lol.png' }
let(:attachment) do let(:attachment) do
@ -73,5 +75,47 @@ describe DownloadManager::ParallelDownloadQueue do
end end
end end
end end
context "download strategies" do
subject { super(); http_client.run }
let(:byte_size) { 1.kilobyte }
let(:file_url) { 'http://example.com/test_file' }
let(:destination) { 'test_file.txt' }
let(:http_client) { Typhoeus::Hydra.new }
let(:blob) { instance_double('ActiveStorage::Blob', byte_size:, url: file_url) }
let(:attachment) { double('ActiveStorage::Attachment', blob: blob) }
before do
allow(attachment).to receive(:url).and_return(file_url)
stub_request(:get, file_url).to_return(body: file_content, status: 200)
end
context 'for small files using request_in_whole method' do
let(:file_content) { 'downloaded content' }
it 'downloads the file in whole' do
target = Pathname.new(download_to_dir).join(destination)
expect { subject }.to change { target.exist? }.from(false).to(true)
expect(File.read(target)).to eq(file_content)
end
end
context 'for large files using request_in_chunks method' do
let(:byte_size) { 20.megabytes } # Adjust byte size for large file scenario
let(:file_content) { 'downloaded content' * 1000 }
before do
allow(downloadable_manager).to receive(:request_in_chunks).and_call_original
end
it 'downloads the file in chunks' do
target = Pathname.new(download_to_dir).join(destination)
expect { subject }.to change { target.exist? }.from(false).to(true)
expect(File.read(target)).to eq(file_content)
expect(downloadable_manager).to have_received(:request_in_chunks) # ensure we're taking the chunks code path
end
end
end
end end
end end