Switch to usage of zip unix binary to create archive. Also use a dedicated queue for DelayedJob

use dedicated archives queue

As the used disk space will increase, we want a fined grain control

move zip logic in dedicated method

zip

wip

wip

fix(spec): pass spec in green

tech(improvements): avoid File.delete(folder), favor FileUtils.remove_entry_secure which is safer. Also wrap most of code that open file within blocks so it is cleaned when the block ends. Lastly use  attachement.download to avoid big memory pressure [download in chunk, write in chunk] otherwise big file [124>1GO] are loaded in memory. what if we run multiple jobs/download in parallel ?

fix(spec): try to retry with grace

clean(procedure_archive_service_spec.rb): better retry [avoid to rewrite on open file]

lint(things): everything
This commit is contained in:
simon lehericey 2021-11-29 15:43:51 +01:00 committed by Martin
parent 68a0b6f474
commit f0b0e7fd9a
6 changed files with 164 additions and 40 deletions

View file

@ -1,5 +1,5 @@
class ArchiveCreationJob < ApplicationJob
queue_as :exports
queue_as :archives
def perform(procedure, archive, instructeur)
ProcedureArchiveService

View file

@ -1,4 +1,20 @@
class ActiveStorage::DownloadableFile
# https://edgeapi.rubyonrails.org/classes/ActiveStorage/Blob.html#method-i-download
def self.download(attachment:, destination_path:, in_chunk: true)
byte_written = 0
File.open(destination_path, mode: 'wb') do |fd| # we expact a path as string, so we can recreate the file (ex: failure/retry on former existing fd)
if in_chunk
attachment.download do |chunk|
byte_written += fd.write(chunk)
end
else
byte_written = fd.write(attachment.download)
end
end
byte_written
end
def self.create_list_from_dossier(dossier, for_expert = false)
dossier_export = PiecesJustificativesService.generate_dossier_export(dossier)
pjs = [dossier_export] + PiecesJustificativesService.liste_documents(dossier, for_expert)

View file

@ -0,0 +1,20 @@
module Utils
module Retryable
# usage:
# max_attempt : retry count
# errors : only retry those errors
# with_retry(max_attempt: 10, errors: [StandardError]) do
# do_something_which_can_fail
# end
def with_retry(max_attempt: 1, errors: [StandardError], &block)
limiter = 0
begin
yield
rescue *errors
limiter += 1
retry if limiter <= max_attempt
raise
end
end
end
end

View file

@ -1,6 +1,10 @@
require 'tempfile'
require 'utils/retryable'
class ProcedureArchiveService
include Utils::Retryable
ARCHIVE_CREATION_DIR = ENV.fetch('ARCHIVE_CREATION_DIR') { '/tmp' }
def initialize(procedure)
@procedure = procedure
end
@ -14,39 +18,22 @@ class ProcedureArchiveService
end
def collect_files_archive(archive, instructeur)
## faux, ca ne doit prendre que certains groupe instructeur
if archive.time_span_type == 'everything'
dossiers = @procedure.dossiers.state_termine
else
dossiers = @procedure.dossiers.processed_in_month(archive.month)
end
files = create_list_of_attachments(dossiers)
tmp_file = Tempfile.new(['tc', '.zip'])
Zip::OutputStream.open(tmp_file) do |zipfile|
bug_reports = ''
files.each do |attachment, pj_filename|
zipfile.put_next_entry("#{zip_root_folder(@procedure)}/#{pj_filename}")
begin
zipfile.puts(attachment.download)
rescue
bug_reports += "Impossible de récupérer le fichier #{pj_filename}\n"
end
end
if !bug_reports.empty?
zipfile.put_next_entry("#{zip_root_folder(@procedure)}/LISEZMOI.txt")
zipfile.puts(bug_reports)
end
attachments = create_list_of_attachments(dossiers)
zip(attachments) do |zip_file|
archive.file.attach(
io: File.open(zip_file),
filename: archive.filename(@procedure),
# we don't want to run virus scanner on this file
metadata: { virus_scan_result: ActiveStorage::VirusScanner::SAFE }
)
end
archive.file.attach(
io: File.open(tmp_file),
filename: archive.filename(@procedure),
# we don't want to run virus scanner on this file
metadata: { virus_scan_result: ActiveStorage::VirusScanner::SAFE }
)
tmp_file.delete
archive.make_available!
InstructeurMailer.send_archive(instructeur, @procedure, archive).deliver_later
end
@ -63,7 +50,49 @@ class ProcedureArchiveService
private
def zip_root_folder(procedure)
def zip(attachments, &block)
Dir.mktmpdir(nil, ARCHIVE_CREATION_DIR) do |tmp_dir|
archive_dir = File.join(tmp_dir, zip_root_folder)
zip_path = File.join(ARCHIVE_CREATION_DIR, "#{zip_root_folder}.zip")
begin
FileUtils.remove_entry_secure(archive_dir) if Dir.exist?(archive_dir)
Dir.mkdir(archive_dir)
bug_reports = ''
attachments.each do |attachment, path|
attachment_path = File.join(archive_dir, path)
attachment_dir = File.dirname(attachment_path)
FileUtils.mkdir_p(attachment_dir) if !Dir.exist?(attachment_dir)
begin
with_retry(max_attempt: 1) do
ActiveStorage::DownloadableFile.download(attachment: attachment,
destination_path: attachment_path,
in_chunk: true)
end
rescue => e
Rails.logger.error("Fail to download filename #{File.basename(attachment_path)} in procedure##{@procedure.id}, reason: #{e}")
File.delete(attachment_path) if File.exist?(attachment_path)
bug_reports += "Impossible de récupérer le fichier #{File.basename(attachment_path)}\n"
end
end
if !bug_reports.empty?
File.write(File.join(archive_dir, 'LISEZMOI.txt'), bug_reports)
end
File.delete(zip_path) if File.exist?(zip_path)
puts `cd #{tmp_dir} && zip -r #{zip_path} #{zip_root_folder}`
yield(zip_path)
ensure
FileUtils.remove_entry_secure(archive_dir) if Dir.exist?(archive_dir)
File.delete(zip_path) if File.exist?(zip_path)
end
end
end
def zip_root_folder
"procedure-#{@procedure.id}"
end

View file

@ -0,0 +1,36 @@
describe Utils::Retryable do
Includer = Struct.new(:something) do
include Utils::Retryable
def caller(max_attempt:, errors:)
with_retry(max_attempt: max_attempt, errors: errors) do
yield
end
end
end
subject { Includer.new("test") }
let(:spy) { double() }
describe '#with_retry' do
it 'works while retry count is less than max attempts' do
divider_that_raise_error = 0
divider_that_works = 1
expect(spy).to receive(:divider).and_return(divider_that_raise_error, divider_that_works)
result = subject.caller(max_attempt: 2, errors: [ZeroDivisionError]) { 10 / spy.divider }
expect(result).to eq(10 / divider_that_works)
end
it 're raise error if it occures more than max_attempt' do
expect(spy).to receive(:divider).and_return(0, 0)
expect { subject.caller(max_attempt: 1, errors: [ZeroDivisionError]) { 0 / spy.divider } }
.to raise_error(ZeroDivisionError)
end
it 'does not retry other errors' do
expect(spy).to receive(:divider).and_raise(StandardError).once
expect { subject.caller(max_attempt: 2, errors: [ZeroDivisionError]) { 0 / spy.divider } }
.to raise_error(StandardError)
end
end
end

View file

@ -1,3 +1,4 @@
describe ProcedureArchiveService do
let(:procedure) { create(:procedure, :published) }
let(:instructeur) { create(:instructeur) }
@ -5,6 +6,7 @@ describe ProcedureArchiveService do
let(:year) { 2020 }
let(:month) { 3 }
let(:date_month) { Date.strptime("#{year}-#{month}", "%Y-%m") }
describe '#create_pending_archive' do
context 'for a specific month' do
it 'creates a pending archive' do
@ -28,10 +30,8 @@ describe ProcedureArchiveService do
end
describe '#collect_files_archive' do
before do
create_dossier_for_month(year, month)
create_dossier_for_month(2020, month)
end
let!(:dossier) { create_dossier_for_month(year, month) }
let!(:dossier_2020) { create_dossier_for_month(2020, month) }
after { Timecop.return }
@ -42,14 +42,19 @@ describe ProcedureArchiveService do
it 'collect files' do
expect(InstructeurMailer).to receive(:send_archive).and_return(mailer)
service.collect_files_archive(archive, instructeur)
archive.file.open do |f|
files = ZipTricks::FileReader.read_zip_structure(io: f)
expect(files.size).to be 2
expect(files.first.filename).to include("export")
expect(files.last.filename).to include("attestation")
structure = [
"procedure-#{procedure.id}/",
"procedure-#{procedure.id}/dossier-#{dossier.id}/",
"procedure-#{procedure.id}/dossier-#{dossier.id}/pieces_justificatives/",
"procedure-#{procedure.id}/dossier-#{dossier.id}/pieces_justificatives/attestation-dossier--05-03-2021-00-00-#{dossier.attestation.pdf.id % 10000}.pdf",
"procedure-#{procedure.id}/dossier-#{dossier.id}/export-#{dossier.id}-05-03-2021-00-00-#{dossier.id}.pdf"
]
expect(files.map(&:filename)).to match_array(structure)
end
expect(archive.file.attached?).to be_truthy
end
@ -89,9 +94,16 @@ describe ProcedureArchiveService do
archive.file.open do |f|
files = ZipTricks::FileReader.read_zip_structure(io: f)
expect(files.size).to be 4
expect(files.last.filename).to eq("procedure-#{procedure.id}/LISEZMOI.txt")
expect(extract(f, files.last)).to match(/Impossible de .*cni.*png/)
structure = [
"procedure-#{procedure.id}/",
"procedure-#{procedure.id}/dossier-#{dossier.id}/",
"procedure-#{procedure.id}/dossier-#{dossier.id}/export-dossier-05-03-2020-00-00-1.pdf",
"procedure-#{procedure.id}/dossier-#{dossier.id}/pieces_justificatives/",
"procedure-#{procedure.id}/dossier-#{dossier.id}/export-#{dossier.id}-05-03-2021-00-00-#{dossier.id}.pdf",
"procedure-#{procedure.id}/LISEZMOI.txt"
]
expect(files.map(&:filename)).to match_array(structure)
expect(extract(f, files.last)).to match(/Impossible de .* .*cni.*png/)
end
end
end
@ -109,7 +121,18 @@ describe ProcedureArchiveService do
archive = Archive.last
archive.file.open do |f|
files = ZipTricks::FileReader.read_zip_structure(io: f)
expect(files.size).to be 4
structure = [
"procedure-#{procedure.id}/",
"procedure-#{procedure.id}/dossier-#{dossier.id}/",
"procedure-#{procedure.id}/dossier-#{dossier.id}/pieces_justificatives/",
"procedure-#{procedure.id}/dossier-#{dossier.id}/pieces_justificatives/attestation-dossier--05-03-2020-00-00-#{dossier.attestation.pdf.id % 10000}.pdf",
"procedure-#{procedure.id}/dossier-#{dossier.id}/export-#{dossier.id}-05-03-2020-00-00-#{dossier.id}.pdf",
"procedure-#{procedure.id}/dossier-#{dossier_2020.id}/",
"procedure-#{procedure.id}/dossier-#{dossier_2020.id}/export-#{dossier_2020.id}-05-03-2020-00-00-#{dossier_2020.id}.pdf",
"procedure-#{procedure.id}/dossier-#{dossier_2020.id}/pieces_justificatives/",
"procedure-#{procedure.id}/dossier-#{dossier_2020.id}/pieces_justificatives/attestation-dossier--05-03-2020-00-00-#{dossier_2020.attestation.pdf.id % 10000}.pdf"
]
expect(files.map(&:filename)).to match_array(structure)
end
expect(archive.file.attached?).to be_truthy
end