Switch to usage of zip unix binary to create archive. Also use a dedicated queue for DelayedJob

use dedicated archives queue

As the used disk space will increase, we want a fined grain control

move zip logic in dedicated method

zip

wip

wip

fix(spec): pass spec in green

tech(improvements): avoid File.delete(folder), favor FileUtils.remove_entry_secure which is safer. Also wrap most of code that open file within blocks so it is cleaned when the block ends. Lastly use  attachement.download to avoid big memory pressure [download in chunk, write in chunk] otherwise big file [124>1GO] are loaded in memory. what if we run multiple jobs/download in parallel ?

fix(spec): try to retry with grace

clean(procedure_archive_service_spec.rb): better retry [avoid to rewrite on open file]

lint(things): everything
This commit is contained in:
simon lehericey 2021-11-29 15:43:51 +01:00 committed by Martin
parent 68a0b6f474
commit f0b0e7fd9a
6 changed files with 164 additions and 40 deletions

View file

@ -1,6 +1,10 @@
require 'tempfile'
require 'utils/retryable'
class ProcedureArchiveService
include Utils::Retryable
ARCHIVE_CREATION_DIR = ENV.fetch('ARCHIVE_CREATION_DIR') { '/tmp' }
def initialize(procedure)
@procedure = procedure
end
@ -14,39 +18,22 @@ class ProcedureArchiveService
end
def collect_files_archive(archive, instructeur)
## faux, ca ne doit prendre que certains groupe instructeur
if archive.time_span_type == 'everything'
dossiers = @procedure.dossiers.state_termine
else
dossiers = @procedure.dossiers.processed_in_month(archive.month)
end
files = create_list_of_attachments(dossiers)
tmp_file = Tempfile.new(['tc', '.zip'])
Zip::OutputStream.open(tmp_file) do |zipfile|
bug_reports = ''
files.each do |attachment, pj_filename|
zipfile.put_next_entry("#{zip_root_folder(@procedure)}/#{pj_filename}")
begin
zipfile.puts(attachment.download)
rescue
bug_reports += "Impossible de récupérer le fichier #{pj_filename}\n"
end
end
if !bug_reports.empty?
zipfile.put_next_entry("#{zip_root_folder(@procedure)}/LISEZMOI.txt")
zipfile.puts(bug_reports)
end
attachments = create_list_of_attachments(dossiers)
zip(attachments) do |zip_file|
archive.file.attach(
io: File.open(zip_file),
filename: archive.filename(@procedure),
# we don't want to run virus scanner on this file
metadata: { virus_scan_result: ActiveStorage::VirusScanner::SAFE }
)
end
archive.file.attach(
io: File.open(tmp_file),
filename: archive.filename(@procedure),
# we don't want to run virus scanner on this file
metadata: { virus_scan_result: ActiveStorage::VirusScanner::SAFE }
)
tmp_file.delete
archive.make_available!
InstructeurMailer.send_archive(instructeur, @procedure, archive).deliver_later
end
@ -63,7 +50,49 @@ class ProcedureArchiveService
private
def zip_root_folder(procedure)
def zip(attachments, &block)
Dir.mktmpdir(nil, ARCHIVE_CREATION_DIR) do |tmp_dir|
archive_dir = File.join(tmp_dir, zip_root_folder)
zip_path = File.join(ARCHIVE_CREATION_DIR, "#{zip_root_folder}.zip")
begin
FileUtils.remove_entry_secure(archive_dir) if Dir.exist?(archive_dir)
Dir.mkdir(archive_dir)
bug_reports = ''
attachments.each do |attachment, path|
attachment_path = File.join(archive_dir, path)
attachment_dir = File.dirname(attachment_path)
FileUtils.mkdir_p(attachment_dir) if !Dir.exist?(attachment_dir)
begin
with_retry(max_attempt: 1) do
ActiveStorage::DownloadableFile.download(attachment: attachment,
destination_path: attachment_path,
in_chunk: true)
end
rescue => e
Rails.logger.error("Fail to download filename #{File.basename(attachment_path)} in procedure##{@procedure.id}, reason: #{e}")
File.delete(attachment_path) if File.exist?(attachment_path)
bug_reports += "Impossible de récupérer le fichier #{File.basename(attachment_path)}\n"
end
end
if !bug_reports.empty?
File.write(File.join(archive_dir, 'LISEZMOI.txt'), bug_reports)
end
File.delete(zip_path) if File.exist?(zip_path)
puts `cd #{tmp_dir} && zip -r #{zip_path} #{zip_root_folder}`
yield(zip_path)
ensure
FileUtils.remove_entry_secure(archive_dir) if Dir.exist?(archive_dir)
File.delete(zip_path) if File.exist?(zip_path)
end
end
end
def zip_root_folder
"procedure-#{@procedure.id}"
end