perf(export): download in chunks files >= 10 mb
This commit is contained in:
parent
b245d9b063
commit
e23e2d9c31
2 changed files with 93 additions and 23 deletions
|
@ -15,18 +15,14 @@ module DownloadManager
|
||||||
hydra = Typhoeus::Hydra.new(max_concurrency: DOWNLOAD_MAX_PARALLEL)
|
hydra = Typhoeus::Hydra.new(max_concurrency: DOWNLOAD_MAX_PARALLEL)
|
||||||
|
|
||||||
attachments.each do |attachment, path|
|
attachments.each do |attachment, path|
|
||||||
begin
|
|
||||||
download_one(attachment: attachment,
|
download_one(attachment: attachment,
|
||||||
path_in_download_dir: path,
|
path_in_download_dir: path,
|
||||||
http_client: hydra)
|
http_client: hydra)
|
||||||
rescue => e
|
rescue => e
|
||||||
on_error.call(attachment, path, e)
|
on_error.call(attachment, path, e)
|
||||||
end
|
end
|
||||||
end
|
|
||||||
|
|
||||||
hydra.run
|
hydra.run
|
||||||
|
|
||||||
GC.start
|
|
||||||
end
|
end
|
||||||
|
|
||||||
# can't be used with typhoeus, otherwise block is closed before the request is run by hydra
|
# can't be used with typhoeus, otherwise block is closed before the request is run by hydra
|
||||||
|
@ -38,21 +34,18 @@ module DownloadManager
|
||||||
|
|
||||||
if attachment.is_a?(ActiveStorage::FakeAttachment)
|
if attachment.is_a?(ActiveStorage::FakeAttachment)
|
||||||
attachment_path.write(attachment.file.read, mode: 'wb')
|
attachment_path.write(attachment.file.read, mode: 'wb')
|
||||||
else
|
return
|
||||||
|
end
|
||||||
|
|
||||||
request = Typhoeus::Request.new(attachment.url)
|
request = Typhoeus::Request.new(attachment.url)
|
||||||
request.on_complete do |response|
|
if attachment.blob.byte_size < 10.megabytes
|
||||||
if response.success?
|
request_in_whole(request, attachment:, attachment_path:, path_in_download_dir:)
|
||||||
attachment_path.open(mode: "wb") do |fd|
|
|
||||||
fd.write(response.body)
|
|
||||||
end
|
|
||||||
else
|
else
|
||||||
attachment_path.delete if attachment_path.exist? # -> case of retries failed, must cleanup partialy downloaded file
|
request_in_chunks(request, attachment:, attachment_path:, path_in_download_dir:)
|
||||||
on_error.call(attachment, path_in_download_dir, response.code)
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
http_client.queue(request)
|
http_client.queue(request)
|
||||||
end
|
end
|
||||||
end
|
|
||||||
|
|
||||||
private
|
private
|
||||||
|
|
||||||
|
@ -66,5 +59,38 @@ module DownloadManager
|
||||||
|
|
||||||
basename + ext
|
basename + ext
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def request_in_whole(request, attachment:, attachment_path:, path_in_download_dir:)
|
||||||
|
request.on_complete do |response|
|
||||||
|
if response.success?
|
||||||
|
attachment_path.open(mode: 'wb') do |fd|
|
||||||
|
fd.write(response.body)
|
||||||
|
end
|
||||||
|
else
|
||||||
|
handle_response_error(response, attachment:, attachment_path:, path_in_download_dir:)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def request_in_chunks(request, attachment:, attachment_path:, path_in_download_dir:)
|
||||||
|
downloaded_file = attachment_path.open(mode: 'wb')
|
||||||
|
|
||||||
|
request.on_body do |chunk|
|
||||||
|
downloaded_file.write(chunk)
|
||||||
|
end
|
||||||
|
|
||||||
|
request.on_complete do |response|
|
||||||
|
downloaded_file.close
|
||||||
|
|
||||||
|
if !response.success?
|
||||||
|
handle_response_error(response, attachment:, attachment_path:, path_in_download_dir:)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def handle_response_error(response, attachment:, attachment_path:, path_in_download_dir:)
|
||||||
|
attachment_path.delete if attachment_path.exist? # -> case of retries failed, must cleanup partialy downloaded file
|
||||||
|
on_error.call(attachment, path_in_download_dir, response.code)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -8,8 +8,10 @@ describe DownloadManager::ParallelDownloadQueue do
|
||||||
after { FileUtils.remove_entry_secure(test_dir) if Dir.exist?(test_dir) }
|
after { FileUtils.remove_entry_secure(test_dir) if Dir.exist?(test_dir) }
|
||||||
|
|
||||||
let(:downloadable_manager) { DownloadManager::ParallelDownloadQueue.new([attachment], download_to_dir) }
|
let(:downloadable_manager) { DownloadManager::ParallelDownloadQueue.new([attachment], download_to_dir) }
|
||||||
|
let(:http_client) { instance_double(Typhoeus::Hydra) }
|
||||||
|
|
||||||
describe '#download_one' do
|
describe '#download_one' do
|
||||||
subject { downloadable_manager.download_one(attachment: attachment, path_in_download_dir: destination, http_client: double) }
|
subject { downloadable_manager.download_one(attachment: attachment, path_in_download_dir: destination, http_client:) }
|
||||||
|
|
||||||
let(:destination) { 'lol.png' }
|
let(:destination) { 'lol.png' }
|
||||||
let(:attachment) do
|
let(:attachment) do
|
||||||
|
@ -73,5 +75,47 @@ describe DownloadManager::ParallelDownloadQueue do
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
context "download strategies" do
|
||||||
|
subject { super(); http_client.run }
|
||||||
|
let(:byte_size) { 1.kilobyte }
|
||||||
|
let(:file_url) { 'http://example.com/test_file' }
|
||||||
|
let(:destination) { 'test_file.txt' }
|
||||||
|
let(:http_client) { Typhoeus::Hydra.new }
|
||||||
|
let(:blob) { instance_double('ActiveStorage::Blob', byte_size:, url: file_url) }
|
||||||
|
let(:attachment) { double('ActiveStorage::Attachment', blob: blob) }
|
||||||
|
|
||||||
|
before do
|
||||||
|
allow(attachment).to receive(:url).and_return(file_url)
|
||||||
|
stub_request(:get, file_url).to_return(body: file_content, status: 200)
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'for small files using request_in_whole method' do
|
||||||
|
let(:file_content) { 'downloaded content' }
|
||||||
|
it 'downloads the file in whole' do
|
||||||
|
target = Pathname.new(download_to_dir).join(destination)
|
||||||
|
expect { subject }.to change { target.exist? }.from(false).to(true)
|
||||||
|
|
||||||
|
expect(File.read(target)).to eq(file_content)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'for large files using request_in_chunks method' do
|
||||||
|
let(:byte_size) { 20.megabytes } # Adjust byte size for large file scenario
|
||||||
|
let(:file_content) { 'downloaded content' * 1000 }
|
||||||
|
|
||||||
|
before do
|
||||||
|
allow(downloadable_manager).to receive(:request_in_chunks).and_call_original
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'downloads the file in chunks' do
|
||||||
|
target = Pathname.new(download_to_dir).join(destination)
|
||||||
|
expect { subject }.to change { target.exist? }.from(false).to(true)
|
||||||
|
|
||||||
|
expect(File.read(target)).to eq(file_content)
|
||||||
|
expect(downloadable_manager).to have_received(:request_in_chunks) # ensure we're taking the chunks code path
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in a new issue