fix(parallel_download_queue): tophoeus does not like raise from a request handler [crash straight to first frame]

This commit is contained in:
Martin 2022-01-06 15:42:56 +01:00
parent e4755c5433
commit 383a92bcd9
10 changed files with 383 additions and 279 deletions

View file

@ -1,6 +1,5 @@
module DownloadManager
class ParallelDownloadQueue
include Utils::Retryable
DOWNLOAD_MAX_PARALLEL = ENV.fetch('DOWNLOAD_MAX_PARALLEL') { 10 }
attr_accessor :attachments,
@ -17,11 +16,9 @@ module DownloadManager
attachments.map do |attachment, path|
begin
with_retry(max_attempt: 1) do
download_one(attachment: attachment,
path_in_download_dir: path,
http_client: hydra)
end
download_one(attachment: attachment,
path_in_download_dir: path,
http_client: hydra)
rescue => e
on_error.call(attachment, path, e)
end
@ -47,14 +44,12 @@ module DownloadManager
request.on_complete do |response|
fd.close
unless response.success?
raise 'ko'
File.delete(attachment_path) if File.exist?(attachment_path) # -> case of retries failed, must cleanup partialy downloaded file
on_error.call(attachment, path_in_download_dir, response.code)
end
end
http_client.queue(request)
end
rescue
File.delete(attachment_path) if File.exist?(attachment_path) # -> case of retries failed, must cleanup partialy downloaded file
raise
end
# rubocop:enable Style/AutoResourceCleanup
end

View file

@ -9,18 +9,20 @@ module DownloadManager
@procedure = procedure
@errors = {}
@queue = ParallelDownloadQueue.new(attachments, destination)
@queue.on_error = proc do |_attachment, path, error|
errors[path] = true
@queue.on_error = proc do |attachment, path, error|
errors[path] = [attachment, path]
Rails.logger.error("Fail to download filename #{path} in procedure##{@procedure.id}, reason: #{error}")
end
end
def download_all
@queue.download_all
write_report if !errors.empty?
end
private
def download_all(attempt_left: 1)
@queue.download_all
if !errors.empty? && attempt_left.positive?
retryable_queue = self.class.new(@procedure, errors.values, destination)
retryable_queue.download_all(attempt_left: 0)
retryable_queue.write_report if !retryable_queue.errors.empty?
end
end
def write_report
manifest_path = File.join(destination, 'LISEZMOI.txt')

View file

@ -1,20 +0,0 @@
module Utils
module Retryable
# usage:
# max_attempt : retry count
# errors : only retry those errors
# with_retry(max_attempt: 10, errors: [StandardError]) do
# do_something_which_can_fail
# end
def with_retry(max_attempt: 1, errors: [StandardError], &block)
limiter = 0
begin
yield
rescue *errors
limiter += 1
retry if limiter <= max_attempt
raise
end
end
end
end

View file

@ -1,51 +0,0 @@
---
http_interactions:
- request:
method: get
uri: http://file.to/get.ext
body:
encoding: US-ASCII
string: ''
headers:
User-Agent:
- demarches-simplifiees.fr
Expect:
- ''
response:
status:
code: 200
message: ''
headers:
Last-Modified:
- Thu, 16 Dec 2021 13:04:07 GMT
X-Trans-Id:
- tx62bf43b03d7e4b60b3f25-0061d45dbd
Accept-Ranges:
- bytes
Expires:
- Tue, 04 Jan 2022 15:20:54 GMT
X-Openstack-Request-Id:
- tx62bf43b03d7e4b60b3f25-0061d45dbd
Content-Type:
- image/png
Date:
- Tue, 04 Jan 2022 14:46:21 GMT
X-Iplb-Request-Id:
- 877D6D0C:B8E4_5762BBC9:01BB_61D45DBD_104936A5:293F4
X-Timestamp:
- '1639659846.52947'
Etag:
- 49961feab1c277af65fcb876c379cebf
X-Iplb-Instance:
- '42085'
Content-Length:
- '494761'
Content-Disposition:
- inline; filename="Screen Shot 2021-12-09 at 9.42.44 AM.png"; filename*=UTF-8''Screen%20Shot%202021-12-09%20at%209.42.44%20AM.png
Strict-Transport-Security:
- max-age=63072000
body:
encoding: ASCII-8BIT
string: ''
recorded_at: Wed, 04 Mar 2020 23:00:00 GMT
recorded_with: VCR 6.0.0

View file

@ -1,145 +0,0 @@
---
http_interactions:
- request:
method: get
uri: https://i.etsystatic.com/6212702/r/il/744d2c/470726480/il_1588xN.470726480_bpk5.jpg
body:
encoding: US-ASCII
string: ''
headers:
User-Agent:
- demarches-simplifiees.fr
Expect:
- ''
response:
status:
code: 200
message: ''
headers:
Cache-Control:
- public, max-age=365000000, immutable
Content-Type:
- image/jpeg
Etag:
- '"J9Qt3QnUKZIbmKehfH+pmv/rYxafyM81ENfBKsCN6Qw"'
Expires:
- Mon, 02 Jan 2023 05:24:30 GMT
Fastly-Io-Info:
- ifsz=14677 idim=600x600 ifmt=jpeg ofsz=43339 odim=1588x1588 ofmt=jpeg
Fastly-Stats:
- io=1
Server:
- UploadServer
X-Goog-Generation:
- '1514228438620441'
X-Goog-Hash:
- crc32c=ZohETA==
- md5=iKPGsaOoUN0hhNZYYzYDLQ==
X-Goog-Metageneration:
- '1'
X-Goog-Storage-Class:
- MULTI_REGIONAL
X-Goog-Stored-Content-Encoding:
- identity
X-Goog-Stored-Content-Length:
- '14677'
X-Guploader-Uploadid:
- ADPycdvXmKF1KUStMVeN1v5TUKBQA_YezSueBDRwp4qiVKTn5IDoW7f3_t6_tyvJwjkOoUE4lO1cC_NxSl-LkM5ukthJcv6JkA
Via:
- 1.1 varnish, 1.1 varnish
Accept-Ranges:
- bytes
Date:
- Tue, 04 Jan 2022 14:59:40 GMT
Age:
- '207310'
X-Served-By:
- cache-mdw17340-MDW, cache-cdg20755-CDG
X-Cache:
- HIT, HIT
X-Cache-Hits:
- 1, 1
X-Timer:
- S1641308380.238785,VS0,VE1
Vary:
- Accept
Strict-Transport-Security:
- max-age=300
Content-Length:
- '43339'
body:
encoding: ASCII-8BIT
string: ''
recorded_at: Wed, 04 Mar 2020 23:00:00 GMT
- request:
method: get
uri: https://i.etsystatic.com/6212702/r/il/744d2c/470726480/il_1588xN.470726480_bpk5.jpg
body:
encoding: US-ASCII
string: ''
headers:
User-Agent:
- demarches-simplifiees.fr
Expect:
- ''
response:
status:
code: 200
message: ''
headers:
Cache-Control:
- public, max-age=365000000, immutable
Content-Type:
- image/jpeg
Etag:
- '"J9Qt3QnUKZIbmKehfH+pmv/rYxafyM81ENfBKsCN6Qw"'
Expires:
- Mon, 02 Jan 2023 05:24:30 GMT
Fastly-Io-Info:
- ifsz=14677 idim=600x600 ifmt=jpeg ofsz=43339 odim=1588x1588 ofmt=jpeg
Fastly-Stats:
- io=1
Server:
- UploadServer
X-Goog-Generation:
- '1514228438620441'
X-Goog-Hash:
- crc32c=ZohETA==
- md5=iKPGsaOoUN0hhNZYYzYDLQ==
X-Goog-Metageneration:
- '1'
X-Goog-Storage-Class:
- MULTI_REGIONAL
X-Goog-Stored-Content-Encoding:
- identity
X-Goog-Stored-Content-Length:
- '14677'
X-Guploader-Uploadid:
- ADPycdvXmKF1KUStMVeN1v5TUKBQA_YezSueBDRwp4qiVKTn5IDoW7f3_t6_tyvJwjkOoUE4lO1cC_NxSl-LkM5ukthJcv6JkA
Via:
- 1.1 varnish, 1.1 varnish
Accept-Ranges:
- bytes
Date:
- Tue, 04 Jan 2022 14:59:40 GMT
Age:
- '207310'
X-Served-By:
- cache-mdw17340-MDW, cache-cdg20737-CDG
X-Cache:
- HIT, HIT
X-Cache-Hits:
- 1, 1
X-Timer:
- S1641308380.241689,VS0,VE1
Vary:
- Accept
Strict-Transport-Security:
- max-age=300
Content-Length:
- '43339'
body:
encoding: ASCII-8BIT
string: ''
recorded_at: Wed, 04 Mar 2020 23:00:00 GMT
recorded_with: VCR 6.0.0

View file

@ -0,0 +1,80 @@
---
http_interactions:
- request:
method: get
uri: https://opengraph.githubassets.com/d0e7862b24d8026a3c03516d865b28151eb3859029c6c6c2e86605891fbdcd7a/socketry/async-io
body:
encoding: US-ASCII
string: ''
headers:
User-Agent:
- demarches-simplifiees.fr
Expect:
- ''
response:
status:
code: 200
message: ''
headers:
X-Ratelimit-Limit:
- '100'
X-Ratelimit-Remaining:
- '31'
X-Ratelimit-Reset:
- '1641406997'
Access-Control-Allow-Origin:
- "*"
Content-Security-Policy:
- default-src 'none';style-src 'unsafe-inline';font-src https://github.github.com;img-src
https://avatars.githubusercontent.com https://github.githubassets.com https://camo.githubusercontent.com
X-Dns-Prefetch-Control:
- 'off'
Expect-Ct:
- max-age=0
X-Frame-Options:
- SAMEORIGIN
X-Download-Options:
- noopen
X-Content-Type-Options:
- nosniff
X-Permitted-Cross-Domain-Policies:
- none
Referrer-Policy:
- no-referrer
X-Xss-Protection:
- '0'
Cache-Control:
- public, max-age=21600, immutable
Content-Type:
- image/png
Etag:
- W/"106ec-qZkXmv4Ygfd8LZzQoW8mwToCU7k"
X-Github-Backend:
- Kubernetes
X-Github-Request-Id:
- 2BDC:1C4E:5B580:368C45:61D5E1B1
Via:
- 1.1 varnish, 1.1 varnish
Accept-Ranges:
- bytes
Date:
- Thu, 06 Jan 2022 14:38:48 GMT
Age:
- '3435'
X-Served-By:
- cache-iad-kiad7000127-IAD, cache-cdg20776-CDG
X-Cache:
- HIT, HIT
X-Cache-Hits:
- 1, 1
Strict-Transport-Security:
- max-age=31536000
X-Fastly-Request-Id:
- '064738cfe513e9a05fc3af47b513b4f63b3f199c'
Content-Length:
- '67308'
body:
encoding: ASCII-8BIT
string: ''
recorded_at: Wed, 04 Mar 2020 23:00:00 GMT
recorded_with: VCR 6.0.0

View file

@ -0,0 +1,105 @@
---
http_interactions:
- request:
method: get
uri: https://www.demarches-simplifiees.fr/error_2
body:
encoding: US-ASCII
string: ''
headers:
User-Agent:
- demarches-simplifiees.fr
Expect:
- ''
response:
status:
code: 404
message: ''
headers:
Server:
- nginx
Date:
- Thu, 06 Jan 2022 14:39:31 GMT
Content-Type:
- text/html; charset=UTF-8
Content-Length:
- '1583'
X-Request-Id:
- 8881aea9-7dfc-442b-b818-a2988a8a39ee
X-Runtime:
- '0.003713'
Strict-Transport-Security:
- max-age=63072000
body:
encoding: ASCII-8BIT
string: ''
recorded_at: Wed, 04 Mar 2020 23:00:00 GMT
- request:
method: get
uri: https://www.demarches-simplifiees.fr/error_1
body:
encoding: US-ASCII
string: ''
headers:
User-Agent:
- demarches-simplifiees.fr
Expect:
- ''
response:
status:
code: 404
message: ''
headers:
Server:
- nginx
Date:
- Thu, 06 Jan 2022 14:53:09 GMT
Content-Type:
- text/html; charset=UTF-8
Content-Length:
- '1583'
X-Request-Id:
- ea764501-134e-49a4-bd65-7fb96f772908
X-Runtime:
- '0.005202'
Strict-Transport-Security:
- max-age=63072000
body:
encoding: ASCII-8BIT
string: ''
recorded_at: Wed, 04 Mar 2020 23:00:00 GMT
- request:
method: get
uri: https://www.demarches-simplifiees.fr/error_1
body:
encoding: US-ASCII
string: ''
headers:
User-Agent:
- demarches-simplifiees.fr
Expect:
- ''
response:
status:
code: 404
message: ''
headers:
Server:
- nginx
Date:
- Thu, 06 Jan 2022 14:53:09 GMT
Content-Type:
- text/html; charset=UTF-8
Content-Length:
- '1583'
X-Request-Id:
- 27892c10-cc06-4b63-80ea-b14428d4585c
X-Runtime:
- '0.003955'
Strict-Transport-Security:
- max-age=63072000
body:
encoding: ASCII-8BIT
string: ''
recorded_at: Wed, 04 Mar 2020 23:00:00 GMT
recorded_with: VCR 6.0.0

View file

@ -0,0 +1,157 @@
---
http_interactions:
- request:
method: get
uri: https://opengraph.githubassets.com/5e61989aecb78e369c93674f877d7bf4ecde378850114a9563cdf8b6a2472536/typhoeus/typhoeus/issues/110
body:
encoding: US-ASCII
string: ''
headers:
User-Agent:
- demarches-simplifiees.fr
Expect:
- ''
response:
status:
code: 200
message: ''
headers:
X-Ratelimit-Limit:
- '100'
X-Ratelimit-Remaining:
- '83'
X-Ratelimit-Reset:
- '1641476165'
Access-Control-Allow-Origin:
- "*"
Content-Security-Policy:
- default-src 'none';style-src 'unsafe-inline';font-src https://github.github.com;img-src
https://avatars.githubusercontent.com https://github.githubassets.com https://camo.githubusercontent.com
X-Dns-Prefetch-Control:
- 'off'
Expect-Ct:
- max-age=0
X-Frame-Options:
- SAMEORIGIN
X-Download-Options:
- noopen
X-Content-Type-Options:
- nosniff
X-Permitted-Cross-Domain-Policies:
- none
Referrer-Policy:
- no-referrer
X-Xss-Protection:
- '0'
Cache-Control:
- public, max-age=21600, immutable
Content-Type:
- image/png
Etag:
- W/"1066a-R45DSLyb/5W3DaEyAWdsUpLVmr4"
X-Github-Backend:
- Kubernetes
X-Github-Request-Id:
- CB64:6748:E2B78:86C62A:61D6EE15
Via:
- 1.1 varnish, 1.1 varnish
Accept-Ranges:
- bytes
Date:
- Thu, 06 Jan 2022 14:38:47 GMT
Age:
- '4322'
X-Served-By:
- cache-iad-kiad7000110-IAD, cache-cdg20721-CDG
X-Cache:
- MISS, HIT
X-Cache-Hits:
- 0, 1
Strict-Transport-Security:
- max-age=31536000
X-Fastly-Request-Id:
- b6b7ae28f1e40734296daed0187e36df9f25de8d
Content-Length:
- '67178'
body:
encoding: ASCII-8BIT
string: ''
recorded_at: Wed, 04 Mar 2020 23:00:00 GMT
- request:
method: get
uri: https://opengraph.githubassets.com/5e61989aecb78e369c93674f877d7bf4ecde378850114a9563cdf8b6a2472536/typhoeus/typhoeus/issues/110
body:
encoding: US-ASCII
string: ''
headers:
User-Agent:
- demarches-simplifiees.fr
Expect:
- ''
response:
status:
code: 200
message: ''
headers:
X-Ratelimit-Limit:
- '100'
X-Ratelimit-Remaining:
- '83'
X-Ratelimit-Reset:
- '1641476165'
Access-Control-Allow-Origin:
- "*"
Content-Security-Policy:
- default-src 'none';style-src 'unsafe-inline';font-src https://github.github.com;img-src
https://avatars.githubusercontent.com https://github.githubassets.com https://camo.githubusercontent.com
X-Dns-Prefetch-Control:
- 'off'
Expect-Ct:
- max-age=0
X-Frame-Options:
- SAMEORIGIN
X-Download-Options:
- noopen
X-Content-Type-Options:
- nosniff
X-Permitted-Cross-Domain-Policies:
- none
Referrer-Policy:
- no-referrer
X-Xss-Protection:
- '0'
Cache-Control:
- public, max-age=21600, immutable
Content-Type:
- image/png
Etag:
- W/"1066a-R45DSLyb/5W3DaEyAWdsUpLVmr4"
X-Github-Backend:
- Kubernetes
X-Github-Request-Id:
- CB64:6748:E2B78:86C62A:61D6EE15
Via:
- 1.1 varnish, 1.1 varnish
Accept-Ranges:
- bytes
Date:
- Thu, 06 Jan 2022 14:38:47 GMT
Age:
- '4322'
X-Served-By:
- cache-iad-kiad7000110-IAD, cache-cdg20767-CDG
X-Cache:
- MISS, HIT
X-Cache-Hits:
- 0, 1
Strict-Transport-Security:
- max-age=31536000
X-Fastly-Request-Id:
- 58c062baca0e760a7a6c348ab9c64cceb965dd1a
Content-Length:
- '67178'
body:
encoding: ASCII-8BIT
string: ''
recorded_at: Wed, 04 Mar 2020 23:00:00 GMT
recorded_with: VCR 6.0.0

View file

@ -1,36 +0,0 @@
describe Utils::Retryable do
Includer = Struct.new(:something) do
include Utils::Retryable
def caller(max_attempt:, errors:)
with_retry(max_attempt: max_attempt, errors: errors) do
yield
end
end
end
subject { Includer.new("test") }
let(:spy) { double() }
describe '#with_retry' do
it 'works while retry count is less than max attempts' do
divider_that_raise_error = 0
divider_that_works = 1
expect(spy).to receive(:divider).and_return(divider_that_raise_error, divider_that_works)
result = subject.caller(max_attempt: 2, errors: [ZeroDivisionError]) { 10 / spy.divider }
expect(result).to eq(10 / divider_that_works)
end
it 're raise error if it occures more than max_attempt' do
expect(spy).to receive(:divider).and_return(0, 0)
expect { subject.caller(max_attempt: 1, errors: [ZeroDivisionError]) { 0 / spy.divider } }
.to raise_error(ZeroDivisionError)
end
it 'does not retry other errors' do
expect(spy).to receive(:divider).and_raise(StandardError).once
expect { subject.caller(max_attempt: 2, errors: [ZeroDivisionError]) { 0 / spy.divider } }
.to raise_error(StandardError)
end
end
end

View file

@ -128,12 +128,11 @@ describe ProcedureArchiveService do
let(:archive) { create(:archive, time_span_type: 'monthly', status: 'pending', month: date_month) }
let(:year) { 2021 }
let(:mailer) { double('mailer', deliver_later: true) }
before do
allow_any_instance_of(ActiveStorage::Attached::One).to receive(:url).and_return("http://file.to/get.ext")
end
it 'collect files' do
it 'collects files with success' do
allow_any_instance_of(ActiveStorage::Attached::One).to receive(:url).and_return("https://opengraph.githubassets.com/d0e7862b24d8026a3c03516d865b28151eb3859029c6c6c2e86605891fbdcd7a/socketry/async-io")
expect(InstructeurMailer).to receive(:send_archive).and_return(mailer)
VCR.use_cassette('archive/file_to_get') do
VCR.use_cassette('archive/new_file_to_get_200') do
service.collect_files_archive(archive, instructeur)
end
@ -152,6 +151,26 @@ describe ProcedureArchiveService do
expect(archive.file.attached?).to be_truthy
end
it 'retry errors files with errors' do
allow_any_instance_of(ActiveStorage::Attached::One).to receive(:url).and_return("https://www.demarches-simplifiees.fr/error_1")
expect(InstructeurMailer).to receive(:send_archive).and_return(mailer)
VCR.use_cassette('archive/new_file_to_get_400.html') do
service.collect_files_archive(archive, instructeur)
end
archive.file.open do |f|
files = ZipTricks::FileReader.read_zip_structure(io: f)
structure = [
"procedure-#{procedure.id}/",
"procedure-#{procedure.id}/dossier-#{dossier.id}/",
"procedure-#{procedure.id}/dossier-#{dossier.id}/pieces_justificatives/",
"procedure-#{procedure.id}/dossier-#{dossier.id}/export-#{dossier.id}-05-03-2021-00-00-#{dossier.id}.pdf",
"procedure-#{procedure.id}/LISEZMOI.txt"
]
expect(files.map(&:filename)).to match_array(structure)
end
expect(archive.file.attached?).to be_truthy
end
context 'with a missing file' do
let(:pj) do
PiecesJustificativesService::FakeAttachment.new(
@ -211,14 +230,12 @@ describe ProcedureArchiveService do
context 'for all months' do
let(:archive) { create(:archive, time_span_type: 'everything', status: 'pending') }
let(:mailer) { double('mailer', deliver_later: true) }
before do
allow_any_instance_of(ActiveStorage::Attached::One).to receive(:url).and_return("https://i.etsystatic.com/6212702/r/il/744d2c/470726480/il_1588xN.470726480_bpk5.jpg")
end
it 'collect files' do
allow_any_instance_of(ActiveStorage::Attached::One).to receive(:url).and_return("https://opengraph.githubassets.com/5e61989aecb78e369c93674f877d7bf4ecde378850114a9563cdf8b6a2472536/typhoeus/typhoeus/issues/110")
expect(InstructeurMailer).to receive(:send_archive).and_return(mailer)
VCR.use_cassette('archive/file_to_get_typhoeus') do
VCR.use_cassette('archive/old_file_to_get_200') do
service.collect_files_archive(archive, instructeur)
end