Merge pull request #6750 from betagouv/US/download_parallel
6688-ETQ instructeur mes archives sont lentes à générer
This commit is contained in:
commit
f9f2bd8bce
8 changed files with 383 additions and 46 deletions
|
@ -1,20 +1,5 @@
|
|||
|
||||
class ActiveStorage::DownloadableFile
|
||||
# https://edgeapi.rubyonrails.org/classes/ActiveStorage/Blob.html#method-i-download
|
||||
def self.download(attachment:, destination_path:, in_chunk: true)
|
||||
byte_written = 0
|
||||
|
||||
File.open(destination_path, mode: 'wb') do |fd| # we expact a path as string, so we can recreate the file (ex: failure/retry on former existing fd)
|
||||
if in_chunk
|
||||
attachment.download do |chunk|
|
||||
byte_written += fd.write(chunk)
|
||||
end
|
||||
else
|
||||
byte_written = fd.write(attachment.download)
|
||||
end
|
||||
end
|
||||
byte_written
|
||||
end
|
||||
|
||||
def self.create_list_from_dossier(dossier, for_expert = false)
|
||||
dossier_export = PiecesJustificativesService.generate_dossier_export(dossier)
|
||||
pjs = [dossier_export] + PiecesJustificativesService.liste_documents(dossier, for_expert)
|
||||
|
|
61
app/lib/download_manager/parallel_download_queue.rb
Normal file
61
app/lib/download_manager/parallel_download_queue.rb
Normal file
|
@ -0,0 +1,61 @@
|
|||
module DownloadManager
|
||||
class ParallelDownloadQueue
|
||||
include Utils::Retryable
|
||||
DOWNLOAD_MAX_PARALLEL = ENV.fetch('DOWNLOAD_MAX_PARALLEL') { 10 }
|
||||
|
||||
attr_accessor :attachments,
|
||||
:destination,
|
||||
:on_error
|
||||
|
||||
def initialize(attachments, destination)
|
||||
@attachments = attachments
|
||||
@destination = destination
|
||||
end
|
||||
|
||||
def download_all
|
||||
hydra = Typhoeus::Hydra.new(max_concurrency: DOWNLOAD_MAX_PARALLEL)
|
||||
|
||||
attachments.map do |attachment, path|
|
||||
begin
|
||||
with_retry(max_attempt: 1) do
|
||||
download_one(attachment: attachment,
|
||||
path_in_download_dir: path,
|
||||
http_client: hydra)
|
||||
end
|
||||
rescue => e
|
||||
on_error.call(attachment, path, e)
|
||||
end
|
||||
end
|
||||
hydra.run
|
||||
end
|
||||
|
||||
# rubocop:disable Style/AutoResourceCleanup
|
||||
# can't be used with typhoeus, otherwise block is closed before the request is run by hydra
|
||||
def download_one(attachment:, path_in_download_dir:, http_client:)
|
||||
attachment_path = File.join(destination, path_in_download_dir)
|
||||
attachment_dir = File.dirname(attachment_path)
|
||||
|
||||
FileUtils.mkdir_p(attachment_dir) if !Dir.exist?(attachment_dir) # defensive, do not write in undefined dir
|
||||
if attachment.is_a?(PiecesJustificativesService::FakeAttachment)
|
||||
File.write(attachment_path, attachment.file.read, mode: 'wb')
|
||||
else
|
||||
request = Typhoeus::Request.new(attachment.url)
|
||||
fd = File.open(attachment_path, mode: 'wb')
|
||||
request.on_body do |chunk|
|
||||
fd.write(chunk)
|
||||
end
|
||||
request.on_complete do |response|
|
||||
fd.close
|
||||
unless response.success?
|
||||
raise 'ko'
|
||||
end
|
||||
end
|
||||
http_client.queue(request)
|
||||
end
|
||||
rescue
|
||||
File.delete(attachment_path) if File.exist?(attachment_path) # -> case of retries failed, must cleanup partialy downloaded file
|
||||
raise
|
||||
end
|
||||
# rubocop:enable Style/AutoResourceCleanup
|
||||
end
|
||||
end
|
34
app/lib/download_manager/procedure_attachments_export.rb
Normal file
34
app/lib/download_manager/procedure_attachments_export.rb
Normal file
|
@ -0,0 +1,34 @@
|
|||
module DownloadManager
|
||||
class ProcedureAttachmentsExport
|
||||
delegate :destination, to: :@queue
|
||||
|
||||
attr_reader :queue
|
||||
attr_accessor :errors
|
||||
|
||||
def initialize(procedure, attachments, destination)
|
||||
@procedure = procedure
|
||||
@errors = {}
|
||||
@queue = ParallelDownloadQueue.new(attachments, destination)
|
||||
@queue.on_error = proc do |_attachment, path, error|
|
||||
errors[path] = true
|
||||
Rails.logger.error("Fail to download filename #{path} in procedure##{@procedure.id}, reason: #{error}")
|
||||
end
|
||||
end
|
||||
|
||||
def download_all
|
||||
@queue.download_all
|
||||
write_report if !errors.empty?
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def write_report
|
||||
manifest_path = File.join(destination, 'LISEZMOI.txt')
|
||||
manifest_content = errors.map do |file_basename, _failed|
|
||||
"Impossible de récupérer le fichier #{file_basename}"
|
||||
end
|
||||
.join("\n")
|
||||
File.write(manifest_path, manifest_content)
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,7 +1,6 @@
|
|||
require 'tempfile'
|
||||
|
||||
class ProcedureArchiveService
|
||||
include Utils::Retryable
|
||||
ARCHIVE_CREATION_DIR = ENV.fetch('ARCHIVE_CREATION_DIR') { '/tmp' }
|
||||
|
||||
def initialize(procedure)
|
||||
|
@ -37,7 +36,6 @@ class ProcedureArchiveService
|
|||
archive.file.attach(
|
||||
io: File.open(zip_file),
|
||||
filename: archive.filename(@procedure),
|
||||
# we don't want to run virus scanner on this file
|
||||
metadata: { virus_scan_result: ActiveStorage::VirusScanner::SAFE }
|
||||
)
|
||||
end
|
||||
|
@ -104,32 +102,12 @@ class ProcedureArchiveService
|
|||
FileUtils.remove_entry_secure(archive_dir) if Dir.exist?(archive_dir)
|
||||
Dir.mkdir(archive_dir)
|
||||
|
||||
bug_reports = ''
|
||||
attachments.each do |attachment, path|
|
||||
attachment_path = File.join(archive_dir, path)
|
||||
attachment_dir = File.dirname(attachment_path)
|
||||
download_manager = DownloadManager::ProcedureAttachmentsExport.new(@procedure, attachments, archive_dir)
|
||||
download_manager.download_all
|
||||
|
||||
FileUtils.mkdir_p(attachment_dir) if !Dir.exist?(attachment_dir)
|
||||
begin
|
||||
with_retry(max_attempt: 1) do
|
||||
ActiveStorage::DownloadableFile.download(attachment: attachment,
|
||||
destination_path: attachment_path,
|
||||
in_chunk: true)
|
||||
end
|
||||
rescue => e
|
||||
Rails.logger.error("Fail to download filename #{File.basename(attachment_path)} in procedure##{@procedure.id}, reason: #{e}")
|
||||
File.delete(attachment_path) if File.exist?(attachment_path)
|
||||
bug_reports += "Impossible de récupérer le fichier #{File.basename(attachment_path)}\n"
|
||||
end
|
||||
end
|
||||
|
||||
if !bug_reports.empty?
|
||||
File.write(File.join(archive_dir, 'LISEZMOI.txt'), bug_reports)
|
||||
end
|
||||
|
||||
File.delete(zip_path) if File.exist?(zip_path)
|
||||
Dir.chdir(tmp_dir) do
|
||||
system 'zip', '-r', zip_path, zip_root_folder
|
||||
File.delete(zip_path) if File.exist?(zip_path)
|
||||
system 'zip', '-0', '-r', zip_path, zip_root_folder
|
||||
end
|
||||
yield(zip_path)
|
||||
ensure
|
||||
|
@ -152,8 +130,8 @@ class ProcedureArchiveService
|
|||
def self.attachments_from_champs_piece_justificative(champs)
|
||||
champs
|
||||
.filter { |c| c.type_champ == TypeDeChamp.type_champs.fetch(:piece_justificative) }
|
||||
.filter { |pj| pj.piece_justificative_file.attached? }
|
||||
.map(&:piece_justificative_file)
|
||||
.filter(&:attached?)
|
||||
end
|
||||
|
||||
def self.liste_pieces_justificatives_for_archive(dossier)
|
||||
|
|
51
spec/fixtures/cassettes/archive/file_to_get.yml
vendored
Normal file
51
spec/fixtures/cassettes/archive/file_to_get.yml
vendored
Normal file
|
@ -0,0 +1,51 @@
|
|||
---
|
||||
http_interactions:
|
||||
- request:
|
||||
method: get
|
||||
uri: http://file.to/get.ext
|
||||
body:
|
||||
encoding: US-ASCII
|
||||
string: ''
|
||||
headers:
|
||||
User-Agent:
|
||||
- demarches-simplifiees.fr
|
||||
Expect:
|
||||
- ''
|
||||
response:
|
||||
status:
|
||||
code: 200
|
||||
message: ''
|
||||
headers:
|
||||
Last-Modified:
|
||||
- Thu, 16 Dec 2021 13:04:07 GMT
|
||||
X-Trans-Id:
|
||||
- tx62bf43b03d7e4b60b3f25-0061d45dbd
|
||||
Accept-Ranges:
|
||||
- bytes
|
||||
Expires:
|
||||
- Tue, 04 Jan 2022 15:20:54 GMT
|
||||
X-Openstack-Request-Id:
|
||||
- tx62bf43b03d7e4b60b3f25-0061d45dbd
|
||||
Content-Type:
|
||||
- image/png
|
||||
Date:
|
||||
- Tue, 04 Jan 2022 14:46:21 GMT
|
||||
X-Iplb-Request-Id:
|
||||
- 877D6D0C:B8E4_5762BBC9:01BB_61D45DBD_104936A5:293F4
|
||||
X-Timestamp:
|
||||
- '1639659846.52947'
|
||||
Etag:
|
||||
- 49961feab1c277af65fcb876c379cebf
|
||||
X-Iplb-Instance:
|
||||
- '42085'
|
||||
Content-Length:
|
||||
- '494761'
|
||||
Content-Disposition:
|
||||
- inline; filename="Screen Shot 2021-12-09 at 9.42.44 AM.png"; filename*=UTF-8''Screen%20Shot%202021-12-09%20at%209.42.44%20AM.png
|
||||
Strict-Transport-Security:
|
||||
- max-age=63072000
|
||||
body:
|
||||
encoding: ASCII-8BIT
|
||||
string: ''
|
||||
recorded_at: Wed, 04 Mar 2020 23:00:00 GMT
|
||||
recorded_with: VCR 6.0.0
|
145
spec/fixtures/cassettes/archive/file_to_get_typhoeus.yml
vendored
Normal file
145
spec/fixtures/cassettes/archive/file_to_get_typhoeus.yml
vendored
Normal file
|
@ -0,0 +1,145 @@
|
|||
---
|
||||
http_interactions:
|
||||
- request:
|
||||
method: get
|
||||
uri: https://i.etsystatic.com/6212702/r/il/744d2c/470726480/il_1588xN.470726480_bpk5.jpg
|
||||
body:
|
||||
encoding: US-ASCII
|
||||
string: ''
|
||||
headers:
|
||||
User-Agent:
|
||||
- demarches-simplifiees.fr
|
||||
Expect:
|
||||
- ''
|
||||
response:
|
||||
status:
|
||||
code: 200
|
||||
message: ''
|
||||
headers:
|
||||
Cache-Control:
|
||||
- public, max-age=365000000, immutable
|
||||
Content-Type:
|
||||
- image/jpeg
|
||||
Etag:
|
||||
- '"J9Qt3QnUKZIbmKehfH+pmv/rYxafyM81ENfBKsCN6Qw"'
|
||||
Expires:
|
||||
- Mon, 02 Jan 2023 05:24:30 GMT
|
||||
Fastly-Io-Info:
|
||||
- ifsz=14677 idim=600x600 ifmt=jpeg ofsz=43339 odim=1588x1588 ofmt=jpeg
|
||||
Fastly-Stats:
|
||||
- io=1
|
||||
Server:
|
||||
- UploadServer
|
||||
X-Goog-Generation:
|
||||
- '1514228438620441'
|
||||
X-Goog-Hash:
|
||||
- crc32c=ZohETA==
|
||||
- md5=iKPGsaOoUN0hhNZYYzYDLQ==
|
||||
X-Goog-Metageneration:
|
||||
- '1'
|
||||
X-Goog-Storage-Class:
|
||||
- MULTI_REGIONAL
|
||||
X-Goog-Stored-Content-Encoding:
|
||||
- identity
|
||||
X-Goog-Stored-Content-Length:
|
||||
- '14677'
|
||||
X-Guploader-Uploadid:
|
||||
- ADPycdvXmKF1KUStMVeN1v5TUKBQA_YezSueBDRwp4qiVKTn5IDoW7f3_t6_tyvJwjkOoUE4lO1cC_NxSl-LkM5ukthJcv6JkA
|
||||
Via:
|
||||
- 1.1 varnish, 1.1 varnish
|
||||
Accept-Ranges:
|
||||
- bytes
|
||||
Date:
|
||||
- Tue, 04 Jan 2022 14:59:40 GMT
|
||||
Age:
|
||||
- '207310'
|
||||
X-Served-By:
|
||||
- cache-mdw17340-MDW, cache-cdg20755-CDG
|
||||
X-Cache:
|
||||
- HIT, HIT
|
||||
X-Cache-Hits:
|
||||
- 1, 1
|
||||
X-Timer:
|
||||
- S1641308380.238785,VS0,VE1
|
||||
Vary:
|
||||
- Accept
|
||||
Strict-Transport-Security:
|
||||
- max-age=300
|
||||
Content-Length:
|
||||
- '43339'
|
||||
body:
|
||||
encoding: ASCII-8BIT
|
||||
string: ''
|
||||
recorded_at: Wed, 04 Mar 2020 23:00:00 GMT
|
||||
- request:
|
||||
method: get
|
||||
uri: https://i.etsystatic.com/6212702/r/il/744d2c/470726480/il_1588xN.470726480_bpk5.jpg
|
||||
body:
|
||||
encoding: US-ASCII
|
||||
string: ''
|
||||
headers:
|
||||
User-Agent:
|
||||
- demarches-simplifiees.fr
|
||||
Expect:
|
||||
- ''
|
||||
response:
|
||||
status:
|
||||
code: 200
|
||||
message: ''
|
||||
headers:
|
||||
Cache-Control:
|
||||
- public, max-age=365000000, immutable
|
||||
Content-Type:
|
||||
- image/jpeg
|
||||
Etag:
|
||||
- '"J9Qt3QnUKZIbmKehfH+pmv/rYxafyM81ENfBKsCN6Qw"'
|
||||
Expires:
|
||||
- Mon, 02 Jan 2023 05:24:30 GMT
|
||||
Fastly-Io-Info:
|
||||
- ifsz=14677 idim=600x600 ifmt=jpeg ofsz=43339 odim=1588x1588 ofmt=jpeg
|
||||
Fastly-Stats:
|
||||
- io=1
|
||||
Server:
|
||||
- UploadServer
|
||||
X-Goog-Generation:
|
||||
- '1514228438620441'
|
||||
X-Goog-Hash:
|
||||
- crc32c=ZohETA==
|
||||
- md5=iKPGsaOoUN0hhNZYYzYDLQ==
|
||||
X-Goog-Metageneration:
|
||||
- '1'
|
||||
X-Goog-Storage-Class:
|
||||
- MULTI_REGIONAL
|
||||
X-Goog-Stored-Content-Encoding:
|
||||
- identity
|
||||
X-Goog-Stored-Content-Length:
|
||||
- '14677'
|
||||
X-Guploader-Uploadid:
|
||||
- ADPycdvXmKF1KUStMVeN1v5TUKBQA_YezSueBDRwp4qiVKTn5IDoW7f3_t6_tyvJwjkOoUE4lO1cC_NxSl-LkM5ukthJcv6JkA
|
||||
Via:
|
||||
- 1.1 varnish, 1.1 varnish
|
||||
Accept-Ranges:
|
||||
- bytes
|
||||
Date:
|
||||
- Tue, 04 Jan 2022 14:59:40 GMT
|
||||
Age:
|
||||
- '207310'
|
||||
X-Served-By:
|
||||
- cache-mdw17340-MDW, cache-cdg20737-CDG
|
||||
X-Cache:
|
||||
- HIT, HIT
|
||||
X-Cache-Hits:
|
||||
- 1, 1
|
||||
X-Timer:
|
||||
- S1641308380.241689,VS0,VE1
|
||||
Vary:
|
||||
- Accept
|
||||
Strict-Transport-Security:
|
||||
- max-age=300
|
||||
Content-Length:
|
||||
- '43339'
|
||||
body:
|
||||
encoding: ASCII-8BIT
|
||||
string: ''
|
||||
recorded_at: Wed, 04 Mar 2020 23:00:00 GMT
|
||||
recorded_with: VCR 6.0.0
|
44
spec/lib/download_manager/parallel_download_queue_spec.rb
Normal file
44
spec/lib/download_manager/parallel_download_queue_spec.rb
Normal file
|
@ -0,0 +1,44 @@
|
|||
describe DownloadManager::ParallelDownloadQueue do
|
||||
let(:test_dir) { Dir.mktmpdir(nil, Dir.tmpdir) }
|
||||
let(:download_to_dir) { test_dir }
|
||||
before do
|
||||
downloadable_manager.on_error = proc { |_, _, _| }
|
||||
end
|
||||
|
||||
after { FileUtils.remove_entry_secure(test_dir) if Dir.exist?(test_dir) }
|
||||
|
||||
let(:downloadable_manager) { DownloadManager::ParallelDownloadQueue.new([attachment], download_to_dir) }
|
||||
describe '#download_one' do
|
||||
subject { downloadable_manager.download_one(attachment: attachment, path_in_download_dir: destination, http_client: double) }
|
||||
|
||||
let(:destination) { 'lol.png' }
|
||||
let(:attachment) do
|
||||
PiecesJustificativesService::FakeAttachment.new(
|
||||
file: StringIO.new('coucou'),
|
||||
filename: "export-dossier.pdf",
|
||||
name: 'pdf_export_for_instructeur',
|
||||
id: 1,
|
||||
created_at: Time.zone.now
|
||||
)
|
||||
end
|
||||
|
||||
context 'with a PiecesJustificativesService::FakeAttachment and it works' do
|
||||
it 'write attachment.file to disk' do
|
||||
target = File.join(download_to_dir, destination)
|
||||
expect { subject }.to change { File.exist?(target) }
|
||||
attachment.file.rewind
|
||||
expect(attachment.file.read).to eq(File.read(target))
|
||||
end
|
||||
end
|
||||
|
||||
context 'with a PiecesJustificativesService::FakeAttachment and it fails' do
|
||||
it 'write attachment.file to disk' do
|
||||
expect(attachment.file).to receive(:read).and_raise("boom")
|
||||
target = File.join(download_to_dir, destination)
|
||||
expect { subject }.to raise_error(StandardError)
|
||||
expect(File.exist?(target)).to be_falsey
|
||||
# expect(downloadable_manager.errors).to have_key(destination)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
|
@ -128,10 +128,14 @@ describe ProcedureArchiveService do
|
|||
let(:archive) { create(:archive, time_span_type: 'monthly', status: 'pending', month: date_month) }
|
||||
let(:year) { 2021 }
|
||||
let(:mailer) { double('mailer', deliver_later: true) }
|
||||
|
||||
before do
|
||||
allow_any_instance_of(ActiveStorage::Attached::One).to receive(:url).and_return("http://file.to/get.ext")
|
||||
end
|
||||
it 'collect files' do
|
||||
expect(InstructeurMailer).to receive(:send_archive).and_return(mailer)
|
||||
service.collect_files_archive(archive, instructeur)
|
||||
VCR.use_cassette('archive/file_to_get') do
|
||||
service.collect_files_archive(archive, instructeur)
|
||||
end
|
||||
|
||||
archive.file.open do |f|
|
||||
files = ZipTricks::FileReader.read_zip_structure(io: f)
|
||||
|
@ -207,11 +211,16 @@ describe ProcedureArchiveService do
|
|||
context 'for all months' do
|
||||
let(:archive) { create(:archive, time_span_type: 'everything', status: 'pending') }
|
||||
let(:mailer) { double('mailer', deliver_later: true) }
|
||||
before do
|
||||
allow_any_instance_of(ActiveStorage::Attached::One).to receive(:url).and_return("https://i.etsystatic.com/6212702/r/il/744d2c/470726480/il_1588xN.470726480_bpk5.jpg")
|
||||
end
|
||||
|
||||
it 'collect files' do
|
||||
expect(InstructeurMailer).to receive(:send_archive).and_return(mailer)
|
||||
|
||||
service.collect_files_archive(archive, instructeur)
|
||||
VCR.use_cassette('archive/file_to_get_typhoeus') do
|
||||
service.collect_files_archive(archive, instructeur)
|
||||
end
|
||||
|
||||
archive = Archive.last
|
||||
archive.file.open do |f|
|
||||
|
@ -234,6 +243,36 @@ describe ProcedureArchiveService do
|
|||
end
|
||||
end
|
||||
|
||||
describe '#download_and_zip' do
|
||||
it 'create a tmpdir while block is running' do
|
||||
previous_dir_list = Dir.entries(ProcedureArchiveService::ARCHIVE_CREATION_DIR)
|
||||
|
||||
service.send(:download_and_zip, []) do |_zip_file|
|
||||
new_dir_list = Dir.entries(ProcedureArchiveService::ARCHIVE_CREATION_DIR)
|
||||
expect(previous_dir_list).not_to eq(new_dir_list)
|
||||
end
|
||||
end
|
||||
|
||||
it 'cleans up its tmpdir after block execution' do
|
||||
expect { service.send(:download_and_zip, []) { |zip_file| } }
|
||||
.not_to change { Dir.entries(ProcedureArchiveService::ARCHIVE_CREATION_DIR) }
|
||||
end
|
||||
|
||||
it 'creates a zip with zip utility' do
|
||||
expected_zip_path = File.join(ProcedureArchiveService::ARCHIVE_CREATION_DIR, "#{service.send(:zip_root_folder)}.zip")
|
||||
expect(service).to receive(:system).with('zip', '-0', '-r', expected_zip_path, an_instance_of(String))
|
||||
service.send(:download_and_zip, []) { |zip_path| }
|
||||
end
|
||||
|
||||
it 'cleans up its generated zip' do
|
||||
expected_zip_path = File.join(ProcedureArchiveService::ARCHIVE_CREATION_DIR, "#{service.send(:zip_root_folder)}.zip")
|
||||
service.send(:download_and_zip, []) do |_zip_path|
|
||||
expect(File.exist?(expected_zip_path)).to be_truthy
|
||||
end
|
||||
expect(File.exist?(expected_zip_path)).to be_falsey
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def create_dossier_for_month(year, month)
|
||||
|
|
Loading…
Reference in a new issue