diff --git a/app/lib/download_manager/parallel_download_queue.rb b/app/lib/download_manager/parallel_download_queue.rb index ab734e30d..f7c693200 100644 --- a/app/lib/download_manager/parallel_download_queue.rb +++ b/app/lib/download_manager/parallel_download_queue.rb @@ -1,6 +1,5 @@ module DownloadManager class ParallelDownloadQueue - include Utils::Retryable DOWNLOAD_MAX_PARALLEL = ENV.fetch('DOWNLOAD_MAX_PARALLEL') { 10 } attr_accessor :attachments, @@ -17,11 +16,9 @@ module DownloadManager attachments.map do |attachment, path| begin - with_retry(max_attempt: 1) do - download_one(attachment: attachment, - path_in_download_dir: path, - http_client: hydra) - end + download_one(attachment: attachment, + path_in_download_dir: path, + http_client: hydra) rescue => e on_error.call(attachment, path, e) end @@ -47,14 +44,12 @@ module DownloadManager request.on_complete do |response| fd.close unless response.success? - raise 'ko' + File.delete(attachment_path) if File.exist?(attachment_path) # -> case of retries failed, must cleanup partialy downloaded file + on_error.call(attachment, path_in_download_dir, response.code) end end http_client.queue(request) end - rescue - File.delete(attachment_path) if File.exist?(attachment_path) # -> case of retries failed, must cleanup partialy downloaded file - raise end # rubocop:enable Style/AutoResourceCleanup end diff --git a/app/lib/download_manager/procedure_attachments_export.rb b/app/lib/download_manager/procedure_attachments_export.rb index d343c343d..bae43a39a 100644 --- a/app/lib/download_manager/procedure_attachments_export.rb +++ b/app/lib/download_manager/procedure_attachments_export.rb @@ -9,18 +9,20 @@ module DownloadManager @procedure = procedure @errors = {} @queue = ParallelDownloadQueue.new(attachments, destination) - @queue.on_error = proc do |_attachment, path, error| - errors[path] = true + @queue.on_error = proc do |attachment, path, error| + errors[path] = [attachment, path] Rails.logger.error("Fail to download filename #{path} in procedure##{@procedure.id}, reason: #{error}") end - end - - def download_all - @queue.download_all - write_report if !errors.empty? end - private + def download_all(attempt_left: 1) + @queue.download_all + if !errors.empty? && attempt_left.positive? + retryable_queue = self.class.new(@procedure, errors.values, destination) + retryable_queue.download_all(attempt_left: 0) + retryable_queue.write_report if !retryable_queue.errors.empty? + end + end def write_report manifest_path = File.join(destination, 'LISEZMOI.txt') diff --git a/app/lib/utils/retryable.rb b/app/lib/utils/retryable.rb deleted file mode 100644 index d4ab13fca..000000000 --- a/app/lib/utils/retryable.rb +++ /dev/null @@ -1,20 +0,0 @@ -module Utils - module Retryable - # usage: - # max_attempt : retry count - # errors : only retry those errors - # with_retry(max_attempt: 10, errors: [StandardError]) do - # do_something_which_can_fail - # end - def with_retry(max_attempt: 1, errors: [StandardError], &block) - limiter = 0 - begin - yield - rescue *errors - limiter += 1 - retry if limiter <= max_attempt - raise - end - end - end -end diff --git a/spec/fixtures/cassettes/archive/file_to_get.yml b/spec/fixtures/cassettes/archive/file_to_get.yml deleted file mode 100644 index 3e4f18e24..000000000 --- a/spec/fixtures/cassettes/archive/file_to_get.yml +++ /dev/null @@ -1,51 +0,0 @@ ---- -http_interactions: -- request: - method: get - uri: http://file.to/get.ext - body: - encoding: US-ASCII - string: '' - headers: - User-Agent: - - demarches-simplifiees.fr - Expect: - - '' - response: - status: - code: 200 - message: '' - headers: - Last-Modified: - - Thu, 16 Dec 2021 13:04:07 GMT - X-Trans-Id: - - tx62bf43b03d7e4b60b3f25-0061d45dbd - Accept-Ranges: - - bytes - Expires: - - Tue, 04 Jan 2022 15:20:54 GMT - X-Openstack-Request-Id: - - tx62bf43b03d7e4b60b3f25-0061d45dbd - Content-Type: - - image/png - Date: - - Tue, 04 Jan 2022 14:46:21 GMT - X-Iplb-Request-Id: - - 877D6D0C:B8E4_5762BBC9:01BB_61D45DBD_104936A5:293F4 - X-Timestamp: - - '1639659846.52947' - Etag: - - 49961feab1c277af65fcb876c379cebf - X-Iplb-Instance: - - '42085' - Content-Length: - - '494761' - Content-Disposition: - - inline; filename="Screen Shot 2021-12-09 at 9.42.44 AM.png"; filename*=UTF-8''Screen%20Shot%202021-12-09%20at%209.42.44%20AM.png - Strict-Transport-Security: - - max-age=63072000 - body: - encoding: ASCII-8BIT - string: '' - recorded_at: Wed, 04 Mar 2020 23:00:00 GMT -recorded_with: VCR 6.0.0 diff --git a/spec/fixtures/cassettes/archive/file_to_get_typhoeus.yml b/spec/fixtures/cassettes/archive/file_to_get_typhoeus.yml deleted file mode 100644 index 557f10345..000000000 --- a/spec/fixtures/cassettes/archive/file_to_get_typhoeus.yml +++ /dev/null @@ -1,145 +0,0 @@ ---- -http_interactions: -- request: - method: get - uri: https://i.etsystatic.com/6212702/r/il/744d2c/470726480/il_1588xN.470726480_bpk5.jpg - body: - encoding: US-ASCII - string: '' - headers: - User-Agent: - - demarches-simplifiees.fr - Expect: - - '' - response: - status: - code: 200 - message: '' - headers: - Cache-Control: - - public, max-age=365000000, immutable - Content-Type: - - image/jpeg - Etag: - - '"J9Qt3QnUKZIbmKehfH+pmv/rYxafyM81ENfBKsCN6Qw"' - Expires: - - Mon, 02 Jan 2023 05:24:30 GMT - Fastly-Io-Info: - - ifsz=14677 idim=600x600 ifmt=jpeg ofsz=43339 odim=1588x1588 ofmt=jpeg - Fastly-Stats: - - io=1 - Server: - - UploadServer - X-Goog-Generation: - - '1514228438620441' - X-Goog-Hash: - - crc32c=ZohETA== - - md5=iKPGsaOoUN0hhNZYYzYDLQ== - X-Goog-Metageneration: - - '1' - X-Goog-Storage-Class: - - MULTI_REGIONAL - X-Goog-Stored-Content-Encoding: - - identity - X-Goog-Stored-Content-Length: - - '14677' - X-Guploader-Uploadid: - - ADPycdvXmKF1KUStMVeN1v5TUKBQA_YezSueBDRwp4qiVKTn5IDoW7f3_t6_tyvJwjkOoUE4lO1cC_NxSl-LkM5ukthJcv6JkA - Via: - - 1.1 varnish, 1.1 varnish - Accept-Ranges: - - bytes - Date: - - Tue, 04 Jan 2022 14:59:40 GMT - Age: - - '207310' - X-Served-By: - - cache-mdw17340-MDW, cache-cdg20755-CDG - X-Cache: - - HIT, HIT - X-Cache-Hits: - - 1, 1 - X-Timer: - - S1641308380.238785,VS0,VE1 - Vary: - - Accept - Strict-Transport-Security: - - max-age=300 - Content-Length: - - '43339' - body: - encoding: ASCII-8BIT - string: '' - recorded_at: Wed, 04 Mar 2020 23:00:00 GMT -- request: - method: get - uri: https://i.etsystatic.com/6212702/r/il/744d2c/470726480/il_1588xN.470726480_bpk5.jpg - body: - encoding: US-ASCII - string: '' - headers: - User-Agent: - - demarches-simplifiees.fr - Expect: - - '' - response: - status: - code: 200 - message: '' - headers: - Cache-Control: - - public, max-age=365000000, immutable - Content-Type: - - image/jpeg - Etag: - - '"J9Qt3QnUKZIbmKehfH+pmv/rYxafyM81ENfBKsCN6Qw"' - Expires: - - Mon, 02 Jan 2023 05:24:30 GMT - Fastly-Io-Info: - - ifsz=14677 idim=600x600 ifmt=jpeg ofsz=43339 odim=1588x1588 ofmt=jpeg - Fastly-Stats: - - io=1 - Server: - - UploadServer - X-Goog-Generation: - - '1514228438620441' - X-Goog-Hash: - - crc32c=ZohETA== - - md5=iKPGsaOoUN0hhNZYYzYDLQ== - X-Goog-Metageneration: - - '1' - X-Goog-Storage-Class: - - MULTI_REGIONAL - X-Goog-Stored-Content-Encoding: - - identity - X-Goog-Stored-Content-Length: - - '14677' - X-Guploader-Uploadid: - - ADPycdvXmKF1KUStMVeN1v5TUKBQA_YezSueBDRwp4qiVKTn5IDoW7f3_t6_tyvJwjkOoUE4lO1cC_NxSl-LkM5ukthJcv6JkA - Via: - - 1.1 varnish, 1.1 varnish - Accept-Ranges: - - bytes - Date: - - Tue, 04 Jan 2022 14:59:40 GMT - Age: - - '207310' - X-Served-By: - - cache-mdw17340-MDW, cache-cdg20737-CDG - X-Cache: - - HIT, HIT - X-Cache-Hits: - - 1, 1 - X-Timer: - - S1641308380.241689,VS0,VE1 - Vary: - - Accept - Strict-Transport-Security: - - max-age=300 - Content-Length: - - '43339' - body: - encoding: ASCII-8BIT - string: '' - recorded_at: Wed, 04 Mar 2020 23:00:00 GMT -recorded_with: VCR 6.0.0 diff --git a/spec/fixtures/cassettes/archive/new_file_to_get_200.yml b/spec/fixtures/cassettes/archive/new_file_to_get_200.yml new file mode 100644 index 000000000..b8e968d95 --- /dev/null +++ b/spec/fixtures/cassettes/archive/new_file_to_get_200.yml @@ -0,0 +1,80 @@ +--- +http_interactions: +- request: + method: get + uri: https://opengraph.githubassets.com/d0e7862b24d8026a3c03516d865b28151eb3859029c6c6c2e86605891fbdcd7a/socketry/async-io + body: + encoding: US-ASCII + string: '' + headers: + User-Agent: + - demarches-simplifiees.fr + Expect: + - '' + response: + status: + code: 200 + message: '' + headers: + X-Ratelimit-Limit: + - '100' + X-Ratelimit-Remaining: + - '31' + X-Ratelimit-Reset: + - '1641406997' + Access-Control-Allow-Origin: + - "*" + Content-Security-Policy: + - default-src 'none';style-src 'unsafe-inline';font-src https://github.github.com;img-src + https://avatars.githubusercontent.com https://github.githubassets.com https://camo.githubusercontent.com + X-Dns-Prefetch-Control: + - 'off' + Expect-Ct: + - max-age=0 + X-Frame-Options: + - SAMEORIGIN + X-Download-Options: + - noopen + X-Content-Type-Options: + - nosniff + X-Permitted-Cross-Domain-Policies: + - none + Referrer-Policy: + - no-referrer + X-Xss-Protection: + - '0' + Cache-Control: + - public, max-age=21600, immutable + Content-Type: + - image/png + Etag: + - W/"106ec-qZkXmv4Ygfd8LZzQoW8mwToCU7k" + X-Github-Backend: + - Kubernetes + X-Github-Request-Id: + - 2BDC:1C4E:5B580:368C45:61D5E1B1 + Via: + - 1.1 varnish, 1.1 varnish + Accept-Ranges: + - bytes + Date: + - Thu, 06 Jan 2022 14:38:48 GMT + Age: + - '3435' + X-Served-By: + - cache-iad-kiad7000127-IAD, cache-cdg20776-CDG + X-Cache: + - HIT, HIT + X-Cache-Hits: + - 1, 1 + Strict-Transport-Security: + - max-age=31536000 + X-Fastly-Request-Id: + - '064738cfe513e9a05fc3af47b513b4f63b3f199c' + Content-Length: + - '67308' + body: + encoding: ASCII-8BIT + string: '' + recorded_at: Wed, 04 Mar 2020 23:00:00 GMT +recorded_with: VCR 6.0.0 diff --git a/spec/fixtures/cassettes/archive/new_file_to_get_400_html.yml b/spec/fixtures/cassettes/archive/new_file_to_get_400_html.yml new file mode 100644 index 000000000..91bb6092c --- /dev/null +++ b/spec/fixtures/cassettes/archive/new_file_to_get_400_html.yml @@ -0,0 +1,105 @@ +--- +http_interactions: +- request: + method: get + uri: https://www.demarches-simplifiees.fr/error_2 + body: + encoding: US-ASCII + string: '' + headers: + User-Agent: + - demarches-simplifiees.fr + Expect: + - '' + response: + status: + code: 404 + message: '' + headers: + Server: + - nginx + Date: + - Thu, 06 Jan 2022 14:39:31 GMT + Content-Type: + - text/html; charset=UTF-8 + Content-Length: + - '1583' + X-Request-Id: + - 8881aea9-7dfc-442b-b818-a2988a8a39ee + X-Runtime: + - '0.003713' + Strict-Transport-Security: + - max-age=63072000 + body: + encoding: ASCII-8BIT + string: '' + recorded_at: Wed, 04 Mar 2020 23:00:00 GMT +- request: + method: get + uri: https://www.demarches-simplifiees.fr/error_1 + body: + encoding: US-ASCII + string: '' + headers: + User-Agent: + - demarches-simplifiees.fr + Expect: + - '' + response: + status: + code: 404 + message: '' + headers: + Server: + - nginx + Date: + - Thu, 06 Jan 2022 14:53:09 GMT + Content-Type: + - text/html; charset=UTF-8 + Content-Length: + - '1583' + X-Request-Id: + - ea764501-134e-49a4-bd65-7fb96f772908 + X-Runtime: + - '0.005202' + Strict-Transport-Security: + - max-age=63072000 + body: + encoding: ASCII-8BIT + string: '' + recorded_at: Wed, 04 Mar 2020 23:00:00 GMT +- request: + method: get + uri: https://www.demarches-simplifiees.fr/error_1 + body: + encoding: US-ASCII + string: '' + headers: + User-Agent: + - demarches-simplifiees.fr + Expect: + - '' + response: + status: + code: 404 + message: '' + headers: + Server: + - nginx + Date: + - Thu, 06 Jan 2022 14:53:09 GMT + Content-Type: + - text/html; charset=UTF-8 + Content-Length: + - '1583' + X-Request-Id: + - 27892c10-cc06-4b63-80ea-b14428d4585c + X-Runtime: + - '0.003955' + Strict-Transport-Security: + - max-age=63072000 + body: + encoding: ASCII-8BIT + string: '' + recorded_at: Wed, 04 Mar 2020 23:00:00 GMT +recorded_with: VCR 6.0.0 diff --git a/spec/fixtures/cassettes/archive/old_file_to_get_200.yml b/spec/fixtures/cassettes/archive/old_file_to_get_200.yml new file mode 100644 index 000000000..b35f5e1f5 --- /dev/null +++ b/spec/fixtures/cassettes/archive/old_file_to_get_200.yml @@ -0,0 +1,157 @@ +--- +http_interactions: +- request: + method: get + uri: https://opengraph.githubassets.com/5e61989aecb78e369c93674f877d7bf4ecde378850114a9563cdf8b6a2472536/typhoeus/typhoeus/issues/110 + body: + encoding: US-ASCII + string: '' + headers: + User-Agent: + - demarches-simplifiees.fr + Expect: + - '' + response: + status: + code: 200 + message: '' + headers: + X-Ratelimit-Limit: + - '100' + X-Ratelimit-Remaining: + - '83' + X-Ratelimit-Reset: + - '1641476165' + Access-Control-Allow-Origin: + - "*" + Content-Security-Policy: + - default-src 'none';style-src 'unsafe-inline';font-src https://github.github.com;img-src + https://avatars.githubusercontent.com https://github.githubassets.com https://camo.githubusercontent.com + X-Dns-Prefetch-Control: + - 'off' + Expect-Ct: + - max-age=0 + X-Frame-Options: + - SAMEORIGIN + X-Download-Options: + - noopen + X-Content-Type-Options: + - nosniff + X-Permitted-Cross-Domain-Policies: + - none + Referrer-Policy: + - no-referrer + X-Xss-Protection: + - '0' + Cache-Control: + - public, max-age=21600, immutable + Content-Type: + - image/png + Etag: + - W/"1066a-R45DSLyb/5W3DaEyAWdsUpLVmr4" + X-Github-Backend: + - Kubernetes + X-Github-Request-Id: + - CB64:6748:E2B78:86C62A:61D6EE15 + Via: + - 1.1 varnish, 1.1 varnish + Accept-Ranges: + - bytes + Date: + - Thu, 06 Jan 2022 14:38:47 GMT + Age: + - '4322' + X-Served-By: + - cache-iad-kiad7000110-IAD, cache-cdg20721-CDG + X-Cache: + - MISS, HIT + X-Cache-Hits: + - 0, 1 + Strict-Transport-Security: + - max-age=31536000 + X-Fastly-Request-Id: + - b6b7ae28f1e40734296daed0187e36df9f25de8d + Content-Length: + - '67178' + body: + encoding: ASCII-8BIT + string: '' + recorded_at: Wed, 04 Mar 2020 23:00:00 GMT +- request: + method: get + uri: https://opengraph.githubassets.com/5e61989aecb78e369c93674f877d7bf4ecde378850114a9563cdf8b6a2472536/typhoeus/typhoeus/issues/110 + body: + encoding: US-ASCII + string: '' + headers: + User-Agent: + - demarches-simplifiees.fr + Expect: + - '' + response: + status: + code: 200 + message: '' + headers: + X-Ratelimit-Limit: + - '100' + X-Ratelimit-Remaining: + - '83' + X-Ratelimit-Reset: + - '1641476165' + Access-Control-Allow-Origin: + - "*" + Content-Security-Policy: + - default-src 'none';style-src 'unsafe-inline';font-src https://github.github.com;img-src + https://avatars.githubusercontent.com https://github.githubassets.com https://camo.githubusercontent.com + X-Dns-Prefetch-Control: + - 'off' + Expect-Ct: + - max-age=0 + X-Frame-Options: + - SAMEORIGIN + X-Download-Options: + - noopen + X-Content-Type-Options: + - nosniff + X-Permitted-Cross-Domain-Policies: + - none + Referrer-Policy: + - no-referrer + X-Xss-Protection: + - '0' + Cache-Control: + - public, max-age=21600, immutable + Content-Type: + - image/png + Etag: + - W/"1066a-R45DSLyb/5W3DaEyAWdsUpLVmr4" + X-Github-Backend: + - Kubernetes + X-Github-Request-Id: + - CB64:6748:E2B78:86C62A:61D6EE15 + Via: + - 1.1 varnish, 1.1 varnish + Accept-Ranges: + - bytes + Date: + - Thu, 06 Jan 2022 14:38:47 GMT + Age: + - '4322' + X-Served-By: + - cache-iad-kiad7000110-IAD, cache-cdg20767-CDG + X-Cache: + - MISS, HIT + X-Cache-Hits: + - 0, 1 + Strict-Transport-Security: + - max-age=31536000 + X-Fastly-Request-Id: + - 58c062baca0e760a7a6c348ab9c64cceb965dd1a + Content-Length: + - '67178' + body: + encoding: ASCII-8BIT + string: '' + recorded_at: Wed, 04 Mar 2020 23:00:00 GMT +recorded_with: VCR 6.0.0 diff --git a/spec/lib/utils/retryable_spec.rb b/spec/lib/utils/retryable_spec.rb deleted file mode 100644 index 5e556713d..000000000 --- a/spec/lib/utils/retryable_spec.rb +++ /dev/null @@ -1,36 +0,0 @@ -describe Utils::Retryable do - Includer = Struct.new(:something) do - include Utils::Retryable - - def caller(max_attempt:, errors:) - with_retry(max_attempt: max_attempt, errors: errors) do - yield - end - end - end - - subject { Includer.new("test") } - let(:spy) { double() } - - describe '#with_retry' do - it 'works while retry count is less than max attempts' do - divider_that_raise_error = 0 - divider_that_works = 1 - expect(spy).to receive(:divider).and_return(divider_that_raise_error, divider_that_works) - result = subject.caller(max_attempt: 2, errors: [ZeroDivisionError]) { 10 / spy.divider } - expect(result).to eq(10 / divider_that_works) - end - - it 're raise error if it occures more than max_attempt' do - expect(spy).to receive(:divider).and_return(0, 0) - expect { subject.caller(max_attempt: 1, errors: [ZeroDivisionError]) { 0 / spy.divider } } - .to raise_error(ZeroDivisionError) - end - - it 'does not retry other errors' do - expect(spy).to receive(:divider).and_raise(StandardError).once - expect { subject.caller(max_attempt: 2, errors: [ZeroDivisionError]) { 0 / spy.divider } } - .to raise_error(StandardError) - end - end -end diff --git a/spec/services/procedure_archive_service_spec.rb b/spec/services/procedure_archive_service_spec.rb index 43f5efedf..97ab7b735 100644 --- a/spec/services/procedure_archive_service_spec.rb +++ b/spec/services/procedure_archive_service_spec.rb @@ -128,12 +128,11 @@ describe ProcedureArchiveService do let(:archive) { create(:archive, time_span_type: 'monthly', status: 'pending', month: date_month) } let(:year) { 2021 } let(:mailer) { double('mailer', deliver_later: true) } - before do - allow_any_instance_of(ActiveStorage::Attached::One).to receive(:url).and_return("http://file.to/get.ext") - end - it 'collect files' do + + it 'collects files with success' do + allow_any_instance_of(ActiveStorage::Attached::One).to receive(:url).and_return("https://opengraph.githubassets.com/d0e7862b24d8026a3c03516d865b28151eb3859029c6c6c2e86605891fbdcd7a/socketry/async-io") expect(InstructeurMailer).to receive(:send_archive).and_return(mailer) - VCR.use_cassette('archive/file_to_get') do + VCR.use_cassette('archive/new_file_to_get_200') do service.collect_files_archive(archive, instructeur) end @@ -152,6 +151,26 @@ describe ProcedureArchiveService do expect(archive.file.attached?).to be_truthy end + it 'retry errors files with errors' do + allow_any_instance_of(ActiveStorage::Attached::One).to receive(:url).and_return("https://www.demarches-simplifiees.fr/error_1") + expect(InstructeurMailer).to receive(:send_archive).and_return(mailer) + VCR.use_cassette('archive/new_file_to_get_400.html') do + service.collect_files_archive(archive, instructeur) + end + archive.file.open do |f| + files = ZipTricks::FileReader.read_zip_structure(io: f) + structure = [ + "procedure-#{procedure.id}/", + "procedure-#{procedure.id}/dossier-#{dossier.id}/", + "procedure-#{procedure.id}/dossier-#{dossier.id}/pieces_justificatives/", + "procedure-#{procedure.id}/dossier-#{dossier.id}/export-#{dossier.id}-05-03-2021-00-00-#{dossier.id}.pdf", + "procedure-#{procedure.id}/LISEZMOI.txt" + ] + expect(files.map(&:filename)).to match_array(structure) + end + expect(archive.file.attached?).to be_truthy + end + context 'with a missing file' do let(:pj) do PiecesJustificativesService::FakeAttachment.new( @@ -211,14 +230,12 @@ describe ProcedureArchiveService do context 'for all months' do let(:archive) { create(:archive, time_span_type: 'everything', status: 'pending') } let(:mailer) { double('mailer', deliver_later: true) } - before do - allow_any_instance_of(ActiveStorage::Attached::One).to receive(:url).and_return("https://i.etsystatic.com/6212702/r/il/744d2c/470726480/il_1588xN.470726480_bpk5.jpg") - end it 'collect files' do + allow_any_instance_of(ActiveStorage::Attached::One).to receive(:url).and_return("https://opengraph.githubassets.com/5e61989aecb78e369c93674f877d7bf4ecde378850114a9563cdf8b6a2472536/typhoeus/typhoeus/issues/110") expect(InstructeurMailer).to receive(:send_archive).and_return(mailer) - VCR.use_cassette('archive/file_to_get_typhoeus') do + VCR.use_cassette('archive/old_file_to_get_200') do service.collect_files_archive(archive, instructeur) end