diff --git a/app/lib/download_manager/parallel_download_queue.rb b/app/lib/download_manager/parallel_download_queue.rb index 2a769aa14..16dcbd762 100644 --- a/app/lib/download_manager/parallel_download_queue.rb +++ b/app/lib/download_manager/parallel_download_queue.rb @@ -15,18 +15,14 @@ module DownloadManager hydra = Typhoeus::Hydra.new(max_concurrency: DOWNLOAD_MAX_PARALLEL) attachments.each do |attachment, path| - begin - download_one(attachment: attachment, - path_in_download_dir: path, - http_client: hydra) - rescue => e - on_error.call(attachment, path, e) - end + download_one(attachment: attachment, + path_in_download_dir: path, + http_client: hydra) + rescue => e + on_error.call(attachment, path, e) end hydra.run - - GC.start end # can't be used with typhoeus, otherwise block is closed before the request is run by hydra @@ -38,20 +34,17 @@ module DownloadManager if attachment.is_a?(ActiveStorage::FakeAttachment) attachment_path.write(attachment.file.read, mode: 'wb') - else - request = Typhoeus::Request.new(attachment.url) - request.on_complete do |response| - if response.success? - attachment_path.open(mode: "wb") do |fd| - fd.write(response.body) - end - else - attachment_path.delete if attachment_path.exist? # -> case of retries failed, must cleanup partialy downloaded file - on_error.call(attachment, path_in_download_dir, response.code) - end - end - http_client.queue(request) + return end + + request = Typhoeus::Request.new(attachment.url) + if attachment.blob.byte_size < 10.megabytes + request_in_whole(request, attachment:, attachment_path:, path_in_download_dir:) + else + request_in_chunks(request, attachment:, attachment_path:, path_in_download_dir:) + end + + http_client.queue(request) end private @@ -66,5 +59,38 @@ module DownloadManager basename + ext end + + def request_in_whole(request, attachment:, attachment_path:, path_in_download_dir:) + request.on_complete do |response| + if response.success? + attachment_path.open(mode: 'wb') do |fd| + fd.write(response.body) + end + else + handle_response_error(response, attachment:, attachment_path:, path_in_download_dir:) + end + end + end + + def request_in_chunks(request, attachment:, attachment_path:, path_in_download_dir:) + downloaded_file = attachment_path.open(mode: 'wb') + + request.on_body do |chunk| + downloaded_file.write(chunk) + end + + request.on_complete do |response| + downloaded_file.close + + if !response.success? + handle_response_error(response, attachment:, attachment_path:, path_in_download_dir:) + end + end + end + + def handle_response_error(response, attachment:, attachment_path:, path_in_download_dir:) + attachment_path.delete if attachment_path.exist? # -> case of retries failed, must cleanup partialy downloaded file + on_error.call(attachment, path_in_download_dir, response.code) + end end end diff --git a/spec/lib/download_manager/parallel_download_queue_spec.rb b/spec/lib/download_manager/parallel_download_queue_spec.rb index fa63da50f..12c489abf 100644 --- a/spec/lib/download_manager/parallel_download_queue_spec.rb +++ b/spec/lib/download_manager/parallel_download_queue_spec.rb @@ -8,8 +8,10 @@ describe DownloadManager::ParallelDownloadQueue do after { FileUtils.remove_entry_secure(test_dir) if Dir.exist?(test_dir) } let(:downloadable_manager) { DownloadManager::ParallelDownloadQueue.new([attachment], download_to_dir) } + let(:http_client) { instance_double(Typhoeus::Hydra) } + describe '#download_one' do - subject { downloadable_manager.download_one(attachment: attachment, path_in_download_dir: destination, http_client: double) } + subject { downloadable_manager.download_one(attachment: attachment, path_in_download_dir: destination, http_client:) } let(:destination) { 'lol.png' } let(:attachment) do @@ -73,5 +75,47 @@ describe DownloadManager::ParallelDownloadQueue do end end end + + context "download strategies" do + subject { super(); http_client.run } + let(:byte_size) { 1.kilobyte } + let(:file_url) { 'http://example.com/test_file' } + let(:destination) { 'test_file.txt' } + let(:http_client) { Typhoeus::Hydra.new } + let(:blob) { instance_double('ActiveStorage::Blob', byte_size:, url: file_url) } + let(:attachment) { double('ActiveStorage::Attachment', blob: blob) } + + before do + allow(attachment).to receive(:url).and_return(file_url) + stub_request(:get, file_url).to_return(body: file_content, status: 200) + end + + context 'for small files using request_in_whole method' do + let(:file_content) { 'downloaded content' } + it 'downloads the file in whole' do + target = Pathname.new(download_to_dir).join(destination) + expect { subject }.to change { target.exist? }.from(false).to(true) + + expect(File.read(target)).to eq(file_content) + end + end + + context 'for large files using request_in_chunks method' do + let(:byte_size) { 20.megabytes } # Adjust byte size for large file scenario + let(:file_content) { 'downloaded content' * 1000 } + + before do + allow(downloadable_manager).to receive(:request_in_chunks).and_call_original + end + + it 'downloads the file in chunks' do + target = Pathname.new(download_to_dir).join(destination) + expect { subject }.to change { target.exist? }.from(false).to(true) + + expect(File.read(target)).to eq(file_content) + expect(downloadable_manager).to have_received(:request_in_chunks) # ensure we're taking the chunks code path + end + end + end end end