perf(export): download in chunks files >= 10 mb
This commit is contained in:
parent
b245d9b063
commit
e23e2d9c31
2 changed files with 93 additions and 23 deletions
|
@ -15,18 +15,14 @@ module DownloadManager
|
|||
hydra = Typhoeus::Hydra.new(max_concurrency: DOWNLOAD_MAX_PARALLEL)
|
||||
|
||||
attachments.each do |attachment, path|
|
||||
begin
|
||||
download_one(attachment: attachment,
|
||||
path_in_download_dir: path,
|
||||
http_client: hydra)
|
||||
rescue => e
|
||||
on_error.call(attachment, path, e)
|
||||
end
|
||||
download_one(attachment: attachment,
|
||||
path_in_download_dir: path,
|
||||
http_client: hydra)
|
||||
rescue => e
|
||||
on_error.call(attachment, path, e)
|
||||
end
|
||||
|
||||
hydra.run
|
||||
|
||||
GC.start
|
||||
end
|
||||
|
||||
# can't be used with typhoeus, otherwise block is closed before the request is run by hydra
|
||||
|
@ -38,20 +34,17 @@ module DownloadManager
|
|||
|
||||
if attachment.is_a?(ActiveStorage::FakeAttachment)
|
||||
attachment_path.write(attachment.file.read, mode: 'wb')
|
||||
else
|
||||
request = Typhoeus::Request.new(attachment.url)
|
||||
request.on_complete do |response|
|
||||
if response.success?
|
||||
attachment_path.open(mode: "wb") do |fd|
|
||||
fd.write(response.body)
|
||||
end
|
||||
else
|
||||
attachment_path.delete if attachment_path.exist? # -> case of retries failed, must cleanup partialy downloaded file
|
||||
on_error.call(attachment, path_in_download_dir, response.code)
|
||||
end
|
||||
end
|
||||
http_client.queue(request)
|
||||
return
|
||||
end
|
||||
|
||||
request = Typhoeus::Request.new(attachment.url)
|
||||
if attachment.blob.byte_size < 10.megabytes
|
||||
request_in_whole(request, attachment:, attachment_path:, path_in_download_dir:)
|
||||
else
|
||||
request_in_chunks(request, attachment:, attachment_path:, path_in_download_dir:)
|
||||
end
|
||||
|
||||
http_client.queue(request)
|
||||
end
|
||||
|
||||
private
|
||||
|
@ -66,5 +59,38 @@ module DownloadManager
|
|||
|
||||
basename + ext
|
||||
end
|
||||
|
||||
def request_in_whole(request, attachment:, attachment_path:, path_in_download_dir:)
|
||||
request.on_complete do |response|
|
||||
if response.success?
|
||||
attachment_path.open(mode: 'wb') do |fd|
|
||||
fd.write(response.body)
|
||||
end
|
||||
else
|
||||
handle_response_error(response, attachment:, attachment_path:, path_in_download_dir:)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def request_in_chunks(request, attachment:, attachment_path:, path_in_download_dir:)
|
||||
downloaded_file = attachment_path.open(mode: 'wb')
|
||||
|
||||
request.on_body do |chunk|
|
||||
downloaded_file.write(chunk)
|
||||
end
|
||||
|
||||
request.on_complete do |response|
|
||||
downloaded_file.close
|
||||
|
||||
if !response.success?
|
||||
handle_response_error(response, attachment:, attachment_path:, path_in_download_dir:)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def handle_response_error(response, attachment:, attachment_path:, path_in_download_dir:)
|
||||
attachment_path.delete if attachment_path.exist? # -> case of retries failed, must cleanup partialy downloaded file
|
||||
on_error.call(attachment, path_in_download_dir, response.code)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -8,8 +8,10 @@ describe DownloadManager::ParallelDownloadQueue do
|
|||
after { FileUtils.remove_entry_secure(test_dir) if Dir.exist?(test_dir) }
|
||||
|
||||
let(:downloadable_manager) { DownloadManager::ParallelDownloadQueue.new([attachment], download_to_dir) }
|
||||
let(:http_client) { instance_double(Typhoeus::Hydra) }
|
||||
|
||||
describe '#download_one' do
|
||||
subject { downloadable_manager.download_one(attachment: attachment, path_in_download_dir: destination, http_client: double) }
|
||||
subject { downloadable_manager.download_one(attachment: attachment, path_in_download_dir: destination, http_client:) }
|
||||
|
||||
let(:destination) { 'lol.png' }
|
||||
let(:attachment) do
|
||||
|
@ -73,5 +75,47 @@ describe DownloadManager::ParallelDownloadQueue do
|
|||
end
|
||||
end
|
||||
end
|
||||
|
||||
context "download strategies" do
|
||||
subject { super(); http_client.run }
|
||||
let(:byte_size) { 1.kilobyte }
|
||||
let(:file_url) { 'http://example.com/test_file' }
|
||||
let(:destination) { 'test_file.txt' }
|
||||
let(:http_client) { Typhoeus::Hydra.new }
|
||||
let(:blob) { instance_double('ActiveStorage::Blob', byte_size:, url: file_url) }
|
||||
let(:attachment) { double('ActiveStorage::Attachment', blob: blob) }
|
||||
|
||||
before do
|
||||
allow(attachment).to receive(:url).and_return(file_url)
|
||||
stub_request(:get, file_url).to_return(body: file_content, status: 200)
|
||||
end
|
||||
|
||||
context 'for small files using request_in_whole method' do
|
||||
let(:file_content) { 'downloaded content' }
|
||||
it 'downloads the file in whole' do
|
||||
target = Pathname.new(download_to_dir).join(destination)
|
||||
expect { subject }.to change { target.exist? }.from(false).to(true)
|
||||
|
||||
expect(File.read(target)).to eq(file_content)
|
||||
end
|
||||
end
|
||||
|
||||
context 'for large files using request_in_chunks method' do
|
||||
let(:byte_size) { 20.megabytes } # Adjust byte size for large file scenario
|
||||
let(:file_content) { 'downloaded content' * 1000 }
|
||||
|
||||
before do
|
||||
allow(downloadable_manager).to receive(:request_in_chunks).and_call_original
|
||||
end
|
||||
|
||||
it 'downloads the file in chunks' do
|
||||
target = Pathname.new(download_to_dir).join(destination)
|
||||
expect { subject }.to change { target.exist? }.from(false).to(true)
|
||||
|
||||
expect(File.read(target)).to eq(file_content)
|
||||
expect(downloadable_manager).to have_received(:request_in_chunks) # ensure we're taking the chunks code path
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in a new issue