diff --git a/app/lib/download_manager/parallel_download_queue.rb b/app/lib/download_manager/parallel_download_queue.rb index 5c5d549da..79f0fa923 100644 --- a/app/lib/download_manager/parallel_download_queue.rb +++ b/app/lib/download_manager/parallel_download_queue.rb @@ -8,7 +8,7 @@ module DownloadManager def initialize(attachments, destination) @attachments = attachments - @destination = destination + @destination = Pathname.new(destination) end def download_all @@ -28,26 +28,40 @@ module DownloadManager # can't be used with typhoeus, otherwise block is closed before the request is run by hydra def download_one(attachment:, path_in_download_dir:, http_client:) - attachment_path = File.join(destination, path_in_download_dir) - attachment_dir = File.dirname(attachment_path) + path = Pathname.new(path_in_download_dir) + attachment_path = destination.join(path.dirname, sanitize_filename(path.basename.to_s)) + + attachment_path.dirname.mkpath # defensive, do not write in undefined dir - FileUtils.mkdir_p(attachment_dir) if !Dir.exist?(attachment_dir) # defensive, do not write in undefined dir if attachment.is_a?(ActiveStorage::FakeAttachment) - File.write(attachment_path, attachment.file.read, mode: 'wb') + attachment_path.write(attachment.file.read, mode: 'wb') else request = Typhoeus::Request.new(attachment.url) request.on_complete do |response| if response.success? - File.open(attachment_path, mode: "wb") do |fd| + attachment_path.open(mode: "wb") do |fd| fd.write(response.body) end else - File.delete(attachment_path) if File.exist?(attachment_path) # -> case of retries failed, must cleanup partialy downloaded file + attachment_path.delete if attachment_path.exist? # -> case of retries failed, must cleanup partialy downloaded file on_error.call(attachment, path_in_download_dir, response.code) end end http_client.queue(request) end end + + private + + def sanitize_filename(original_filename) + filename = ActiveStorage::Filename.new(original_filename).sanitized + + return filename if filename.bytesize <= 255 + + ext = File.extname(filename) + basename = File.basename(filename, ext).byteslice(0, 255 - ext.bytesize) + + basename + ext + end end end diff --git a/spec/lib/download_manager/parallel_download_queue_spec.rb b/spec/lib/download_manager/parallel_download_queue_spec.rb index cdc67e820..fa63da50f 100644 --- a/spec/lib/download_manager/parallel_download_queue_spec.rb +++ b/spec/lib/download_manager/parallel_download_queue_spec.rb @@ -40,5 +40,38 @@ describe DownloadManager::ParallelDownloadQueue do # expect(downloadable_manager.errors).to have_key(destination) end end + + context 'with a destination filename too long' do + let(:destination) { 'a' * 252 + '.txt' } + + it 'limit the file path to 255 bytes' do + target = File.join(download_to_dir, 'a' * 251 + '.txt') + expect { subject }.to change { File.exist?(target) } + attachment.file.rewind + expect(attachment.file.read).to eq(File.read(target)) + end + end + + context 'with filename containing unsafe characters for storage' do + let(:destination) { "file:éà\u{202e} K.txt" } + + it 'sanitize the problematic chars' do + target = File.join(download_to_dir, 'file-éà- K.txt') + expect { subject }.to change { File.exist?(target) } + attachment.file.rewind + expect(attachment.file.read).to eq(File.read(target)) + end + + context 'with a destination tree' do + let(:destination) { 'subdir/file.txt' } + + it 'preserves the destination tree' do + target = File.join(download_to_dir, 'subdir/file.txt') + expect { subject }.to change { File.exist?(target) } + attachment.file.rewind + expect(attachment.file.read).to eq(File.read(target)) + end + end + end end end