Merge pull request #6735 from betagouv/zip_using_zip

Zip using zip
This commit is contained in:
mfo 2021-12-13 17:50:29 +01:00 committed by GitHub
commit 776f531804
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 267 additions and 7 deletions

View file

@ -1,5 +1,5 @@
class ArchiveCreationJob < ApplicationJob
queue_as :exports
queue_as :archives
def perform(procedure, archive, instructeur)
ProcedureArchiveService

View file

@ -1,4 +1,20 @@
class ActiveStorage::DownloadableFile
# https://edgeapi.rubyonrails.org/classes/ActiveStorage/Blob.html#method-i-download
def self.download(attachment:, destination_path:, in_chunk: true)
byte_written = 0
File.open(destination_path, mode: 'wb') do |fd| # we expact a path as string, so we can recreate the file (ex: failure/retry on former existing fd)
if in_chunk
attachment.download do |chunk|
byte_written += fd.write(chunk)
end
else
byte_written = fd.write(attachment.download)
end
end
byte_written
end
def self.create_list_from_dossier(dossier, for_expert = false)
dossier_export = PiecesJustificativesService.generate_dossier_export(dossier)
pjs = [dossier_export] + PiecesJustificativesService.liste_documents(dossier, for_expert)

View file

@ -0,0 +1,20 @@
module Utils
module Retryable
# usage:
# max_attempt : retry count
# errors : only retry those errors
# with_retry(max_attempt: 10, errors: [StandardError]) do
# do_something_which_can_fail
# end
def with_retry(max_attempt: 1, errors: [StandardError], &block)
limiter = 0
begin
yield
rescue *errors
limiter += 1
retry if limiter <= max_attempt
raise
end
end
end
end

View file

@ -1,6 +1,9 @@
require 'tempfile'
class ProcedureArchiveService
include Utils::Retryable
ARCHIVE_CREATION_DIR = ENV.fetch('ARCHIVE_CREATION_DIR') { '/tmp' }
def initialize(procedure)
@procedure = procedure
end
@ -14,6 +17,35 @@ class ProcedureArchiveService
end
def collect_files_archive(archive, instructeur)
if Flipper.enabled?(:zip_using_binary, @procedure)
new_collect_files_archive(archive, instructeur)
else
old_collect_files_archive(archive, instructeur)
end
end
def new_collect_files_archive(archive, instructeur)
## faux, ca ne doit prendre que certains groupe instructeur ?
if archive.time_span_type == 'everything'
dossiers = @procedure.dossiers.state_termine
else
dossiers = @procedure.dossiers.processed_in_month(archive.month)
end
attachments = create_list_of_attachments(dossiers)
download_and_zip(attachments) do |zip_file|
archive.file.attach(
io: File.open(zip_file),
filename: archive.filename(@procedure),
# we don't want to run virus scanner on this file
metadata: { virus_scan_result: ActiveStorage::VirusScanner::SAFE }
)
end
archive.make_available!
InstructeurMailer.send_archive(instructeur, @procedure, archive).deliver_later
end
def old_collect_files_archive(archive, instructeur)
if archive.time_span_type == 'everything'
dossiers = @procedure.dossiers.state_termine
else
@ -27,7 +59,7 @@ class ProcedureArchiveService
Zip::OutputStream.open(tmp_file) do |zipfile|
bug_reports = ''
files.each do |attachment, pj_filename|
zipfile.put_next_entry("#{zip_root_folder(@procedure)}/#{pj_filename}")
zipfile.put_next_entry("#{zip_root_folder}/#{pj_filename}")
begin
zipfile.puts(attachment.download)
rescue
@ -35,7 +67,7 @@ class ProcedureArchiveService
end
end
if !bug_reports.empty?
zipfile.put_next_entry("#{zip_root_folder(@procedure)}/LISEZMOI.txt")
zipfile.put_next_entry("#{zip_root_folder}/LISEZMOI.txt")
zipfile.puts(bug_reports)
end
end
@ -63,7 +95,51 @@ class ProcedureArchiveService
private
def zip_root_folder(procedure)
def download_and_zip(attachments, &block)
Dir.mktmpdir(nil, ARCHIVE_CREATION_DIR) do |tmp_dir|
archive_dir = File.join(tmp_dir, zip_root_folder)
zip_path = File.join(ARCHIVE_CREATION_DIR, "#{zip_root_folder}.zip")
begin
FileUtils.remove_entry_secure(archive_dir) if Dir.exist?(archive_dir)
Dir.mkdir(archive_dir)
bug_reports = ''
attachments.each do |attachment, path|
attachment_path = File.join(archive_dir, path)
attachment_dir = File.dirname(attachment_path)
FileUtils.mkdir_p(attachment_dir) if !Dir.exist?(attachment_dir)
begin
with_retry(max_attempt: 1) do
ActiveStorage::DownloadableFile.download(attachment: attachment,
destination_path: attachment_path,
in_chunk: true)
end
rescue => e
Rails.logger.error("Fail to download filename #{File.basename(attachment_path)} in procedure##{@procedure.id}, reason: #{e}")
File.delete(attachment_path) if File.exist?(attachment_path)
bug_reports += "Impossible de récupérer le fichier #{File.basename(attachment_path)}\n"
end
end
if !bug_reports.empty?
File.write(File.join(archive_dir, 'LISEZMOI.txt'), bug_reports)
end
File.delete(zip_path) if File.exist?(zip_path)
Dir.chdir(tmp_dir) do
system 'zip', '-r', zip_path, zip_root_folder
end
yield(zip_path)
ensure
FileUtils.remove_entry_secure(archive_dir) if Dir.exist?(archive_dir)
File.delete(zip_path) if File.exist?(zip_path)
end
end
end
def zip_root_folder
"procedure-#{@procedure.id}"
end

View file

@ -109,7 +109,7 @@
"check_name": "Redirect",
"message": "Possible unprotected redirect",
"file": "app/controllers/instructeurs/procedures_controller.rb",
"line": 190,
"line": 195,
"link": "https://brakemanscanner.org/docs/warning_types/redirect/",
"code": "redirect_to(Export.find_or_create_export(params[:export_format], (params[:time_span_type] or \"everything\"), current_instructeur.groupe_instructeurs.where(:procedure => procedure)).file.service_url)",
"render_path": null,
@ -143,6 +143,6 @@
"note": "no user input, fixed value"
}
],
"updated": "2021-11-26 13:22:41 +0100",
"updated": "2021-12-13 17:09:07 +0100",
"brakeman_version": "5.1.1"
}

View file

@ -0,0 +1,36 @@
describe Utils::Retryable do
Includer = Struct.new(:something) do
include Utils::Retryable
def caller(max_attempt:, errors:)
with_retry(max_attempt: max_attempt, errors: errors) do
yield
end
end
end
subject { Includer.new("test") }
let(:spy) { double() }
describe '#with_retry' do
it 'works while retry count is less than max attempts' do
divider_that_raise_error = 0
divider_that_works = 1
expect(spy).to receive(:divider).and_return(divider_that_raise_error, divider_that_works)
result = subject.caller(max_attempt: 2, errors: [ZeroDivisionError]) { 10 / spy.divider }
expect(result).to eq(10 / divider_that_works)
end
it 're raise error if it occures more than max_attempt' do
expect(spy).to receive(:divider).and_return(0, 0)
expect { subject.caller(max_attempt: 1, errors: [ZeroDivisionError]) { 0 / spy.divider } }
.to raise_error(ZeroDivisionError)
end
it 'does not retry other errors' do
expect(spy).to receive(:divider).and_raise(StandardError).once
expect { subject.caller(max_attempt: 2, errors: [ZeroDivisionError]) { 0 / spy.divider } }
.to raise_error(StandardError)
end
end
end

View file

@ -5,6 +5,7 @@ describe ProcedureArchiveService do
let(:year) { 2020 }
let(:month) { 3 }
let(:date_month) { Date.strptime("#{year}-#{month}", "%Y-%m") }
describe '#create_pending_archive' do
context 'for a specific month' do
it 'creates a pending archive' do
@ -27,7 +28,7 @@ describe ProcedureArchiveService do
end
end
describe '#collect_files_archive' do
describe '#old_collect_files_archive' do
before do
create_dossier_for_month(year, month)
create_dossier_for_month(2020, month)
@ -116,6 +117,117 @@ describe ProcedureArchiveService do
end
end
describe '#new_collect_files_archive' do
before { Flipper.enable_actor(:zip_using_binary, procedure) }
let!(:dossier) { create_dossier_for_month(year, month) }
let!(:dossier_2020) { create_dossier_for_month(2020, month) }
after { Timecop.return }
context 'for a specific month' do
let(:archive) { create(:archive, time_span_type: 'monthly', status: 'pending', month: date_month) }
let(:year) { 2021 }
let(:mailer) { double('mailer', deliver_later: true) }
it 'collect files' do
expect(InstructeurMailer).to receive(:send_archive).and_return(mailer)
service.collect_files_archive(archive, instructeur)
archive.file.open do |f|
files = ZipTricks::FileReader.read_zip_structure(io: f)
structure = [
"procedure-#{procedure.id}/",
"procedure-#{procedure.id}/dossier-#{dossier.id}/",
"procedure-#{procedure.id}/dossier-#{dossier.id}/pieces_justificatives/",
"procedure-#{procedure.id}/dossier-#{dossier.id}/pieces_justificatives/attestation-dossier--05-03-2021-00-00-#{dossier.attestation.pdf.id % 10000}.pdf",
"procedure-#{procedure.id}/dossier-#{dossier.id}/export-#{dossier.id}-05-03-2021-00-00-#{dossier.id}.pdf"
]
expect(files.map(&:filename)).to match_array(structure)
end
expect(archive.file.attached?).to be_truthy
end
context 'with a missing file' do
let(:pj) do
PiecesJustificativesService::FakeAttachment.new(
file: StringIO.new('coucou'),
filename: "export-dossier.pdf",
name: 'pdf_export_for_instructeur',
id: 1,
created_at: Time.zone.now
)
end
let(:bad_pj) do
PiecesJustificativesService::FakeAttachment.new(
file: nil,
filename: "cni.png",
name: 'cni.png',
id: 2,
created_at: Time.zone.now
)
end
let(:documents) { [pj, bad_pj] }
before do
allow(PiecesJustificativesService).to receive(:liste_documents).and_return(documents)
end
it 'collect files without raising exception' do
expect { service.collect_files_archive(archive, instructeur) }.not_to raise_exception
end
it 'add bug report to archive' do
service.collect_files_archive(archive, instructeur)
archive.file.open do |f|
files = ZipTricks::FileReader.read_zip_structure(io: f)
structure = [
"procedure-#{procedure.id}/",
"procedure-#{procedure.id}/dossier-#{dossier.id}/",
"procedure-#{procedure.id}/dossier-#{dossier.id}/export-dossier-05-03-2020-00-00-1.pdf",
"procedure-#{procedure.id}/dossier-#{dossier.id}/pieces_justificatives/",
"procedure-#{procedure.id}/dossier-#{dossier.id}/export-#{dossier.id}-05-03-2021-00-00-#{dossier.id}.pdf",
"procedure-#{procedure.id}/LISEZMOI.txt"
]
expect(files.map(&:filename)).to match_array(structure)
expect(extract(f, files.last)).to match(/Impossible de .* .*cni.*png/)
end
end
end
end
context 'for all months' do
let(:archive) { create(:archive, time_span_type: 'everything', status: 'pending') }
let(:mailer) { double('mailer', deliver_later: true) }
it 'collect files' do
expect(InstructeurMailer).to receive(:send_archive).and_return(mailer)
service.collect_files_archive(archive, instructeur)
archive = Archive.last
archive.file.open do |f|
files = ZipTricks::FileReader.read_zip_structure(io: f)
structure = [
"procedure-#{procedure.id}/",
"procedure-#{procedure.id}/dossier-#{dossier.id}/",
"procedure-#{procedure.id}/dossier-#{dossier.id}/pieces_justificatives/",
"procedure-#{procedure.id}/dossier-#{dossier.id}/pieces_justificatives/attestation-dossier--05-03-2020-00-00-#{dossier.attestation.pdf.id % 10000}.pdf",
"procedure-#{procedure.id}/dossier-#{dossier.id}/export-#{dossier.id}-05-03-2020-00-00-#{dossier.id}.pdf",
"procedure-#{procedure.id}/dossier-#{dossier_2020.id}/",
"procedure-#{procedure.id}/dossier-#{dossier_2020.id}/export-#{dossier_2020.id}-05-03-2020-00-00-#{dossier_2020.id}.pdf",
"procedure-#{procedure.id}/dossier-#{dossier_2020.id}/pieces_justificatives/",
"procedure-#{procedure.id}/dossier-#{dossier_2020.id}/pieces_justificatives/attestation-dossier--05-03-2020-00-00-#{dossier_2020.attestation.pdf.id % 10000}.pdf"
]
expect(files.map(&:filename)).to match_array(structure)
end
expect(archive.file.attached?).to be_truthy
end
end
end
private
def create_dossier_for_month(year, month)