From 75a104631525d1da558f0d0ea30e052663e29be6 Mon Sep 17 00:00:00 2001 From: Pierre de La Morinerie Date: Thu, 11 Mar 2021 13:42:57 +0000 Subject: [PATCH 1/2] active_storage: refactor concerns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up of #5953. Refactor the concerns with two goals: - Getting closer from the way ActiveStorage adds its own hooks. Usually ActiveStorage does this using an `Attachment#after_create` hook, which then delegates to the blob to enqueue the job. - Enqueuing each job only once. By hooking on `Attachment#after_create`, we guarantee each job will be added only once. We then let the jobs themselves check if they are relevant or not, and retry or discard themselves if necessary. We also need to update the tests a bit, because Rails' `perform_enqueued_jobs(&block)` test helper doesn't honor the `retry_on` clause of jobs. Instead it forwards the exception to the caller – which makes the test fail. Instead we use the inline version of `perform_enqueued_jobs()`, without a block, which properly ignores errors catched by retry_on. --- app/jobs/titre_identite_watermark_job.rb | 14 +++++ app/jobs/virus_scanner_job.rb | 9 ++- ...chment_titre_identite_watermark_concern.rb | 17 ++++++ .../attachment_virus_scanner_concern.rb | 20 ++++++ .../blob_titre_identite_watermark_concern.rb | 37 +++-------- .../concerns/blob_virus_scanner_concern.rb | 25 ++------ config/initializers/active_storage.rb | 9 ++- .../instructeurs/avis_controller_spec.rb | 7 +-- spec/features/instructeurs/expert_spec.rb | 8 +-- spec/jobs/virus_scanner_job_spec.rb | 61 ++++++++++--------- spec/models/champ_spec.rb | 11 ++-- spec/models/dossier_spec.rb | 16 ++--- spec/services/commentaire_service_spec.rb | 11 +--- 13 files changed, 130 insertions(+), 115 deletions(-) create mode 100644 app/models/concerns/attachment_titre_identite_watermark_concern.rb create mode 100644 app/models/concerns/attachment_virus_scanner_concern.rb diff --git a/app/jobs/titre_identite_watermark_job.rb b/app/jobs/titre_identite_watermark_job.rb index c3faae73d..bf7e7d3eb 100644 --- a/app/jobs/titre_identite_watermark_job.rb +++ b/app/jobs/titre_identite_watermark_job.rb @@ -1,9 +1,23 @@ class TitreIdentiteWatermarkJob < ApplicationJob + class FileNotScannedYetError < StandardError + end + + # If by the time the job runs the blob has been deleted, ignore the error + discard_on ActiveRecord::RecordNotFound + # If the file is deleted during the scan, ignore the error + discard_on ActiveStorage::FileNotFoundError + # If the file is not analyzed or scanned for viruses yet, retry later + # (to avoid modifying the file while it is being scanned). + retry_on FileNotScannedYetError, wait: :exponentially_longer, attempts: 10 + MAX_IMAGE_SIZE = 1500 SCALE = 0.9 WATERMARK = Rails.root.join("app/assets/images/#{WATERMARK_FILE}") def perform(blob) + if blob.virus_scanner.pending? then raise FileNotScannedYetError end + if blob.watermark_done? then return end + blob.open do |file| watermark = resize_watermark(file) diff --git a/app/jobs/virus_scanner_job.rb b/app/jobs/virus_scanner_job.rb index 3907ee4b6..71b859dc0 100644 --- a/app/jobs/virus_scanner_job.rb +++ b/app/jobs/virus_scanner_job.rb @@ -1,15 +1,22 @@ class VirusScannerJob < ApplicationJob + class FileNotAnalyzedYetError < StandardError + end + queue_as :active_storage_analysis # If by the time the job runs the blob has been deleted, ignore the error discard_on ActiveRecord::RecordNotFound # If the file is deleted during the scan, ignore the error discard_on ActiveStorage::FileNotFoundError - + # If the file is not analyzed yet, retry later (to avoid clobbering metadata) + retry_on FileNotAnalyzedYetError, wait: :exponentially_longer, attempts: 10 # If for some reason the file appears invalid, retry for a while retry_on ActiveStorage::IntegrityError, attempts: 10, wait: 5.seconds def perform(blob) + if !blob.analyzed? then raise FileNotAnalyzedYetError end + if blob.virus_scanner.done? then return end + metadata = extract_metadata_via_virus_scanner(blob) blob.update!(metadata: blob.metadata.merge(metadata)) end diff --git a/app/models/concerns/attachment_titre_identite_watermark_concern.rb b/app/models/concerns/attachment_titre_identite_watermark_concern.rb new file mode 100644 index 000000000..2091a850d --- /dev/null +++ b/app/models/concerns/attachment_titre_identite_watermark_concern.rb @@ -0,0 +1,17 @@ +# Request a watermark on files attached to a `Champs::TitreIdentiteChamp`. +# +# We're using a class extension here, but we could as well have a periodic +# job that watermarks relevant attachments. +module AttachmentTitreIdentiteWatermarkConcern + extend ActiveSupport::Concern + + included do + after_create_commit :watermark_later + end + + private + + def watermark_later + blob&.watermark_later + end +end diff --git a/app/models/concerns/attachment_virus_scanner_concern.rb b/app/models/concerns/attachment_virus_scanner_concern.rb new file mode 100644 index 000000000..217d5e920 --- /dev/null +++ b/app/models/concerns/attachment_virus_scanner_concern.rb @@ -0,0 +1,20 @@ +# Run a virus scan on all attachments after they are analyzed. +# +# We're using a class extension to ensure that all attachments get scanned, +# regardless on how they were created. This could be an ActiveStorage::Analyzer, +# but as of Rails 6.1 only the first matching analyzer is ever run on +# a blob (and we may want to analyze the dimension of a picture as well +# as scanning it). +module AttachmentVirusScannerConcern + extend ActiveSupport::Concern + + included do + after_create_commit :scan_for_virus_later + end + + private + + def scan_for_virus_later + blob&.scan_for_virus_later + end +end diff --git a/app/models/concerns/blob_titre_identite_watermark_concern.rb b/app/models/concerns/blob_titre_identite_watermark_concern.rb index 85ae35263..598a23d05 100644 --- a/app/models/concerns/blob_titre_identite_watermark_concern.rb +++ b/app/models/concerns/blob_titre_identite_watermark_concern.rb @@ -1,38 +1,21 @@ -# Request a watermark on blobs attached to a `Champs::TitreIdentiteChamp` -# after the virus scan has run. -# -# We're using a class extension here, but we could as well have a periodic -# job that watermarks relevant attachments. -# -# The `after_commit` hook is triggered, among other cases, when -# the analyzer or virus scan updates the blob metadata. When both the analyzer -# and the virus scan have run, it is now safe to start the watermarking, -# without risking to replace the picture while it is being scanned in a -# concurrent job. module BlobTitreIdentiteWatermarkConcern - extend ActiveSupport::Concern - - included do - after_commit :enqueue_watermark_job - end - def watermark_pending? watermark_required? && !watermark_done? end + def watermark_done? + metadata[:watermark] + end + + def watermark_later + if watermark_required? + TitreIdentiteWatermarkJob.perform_later(self) + end + end + private def watermark_required? attachments.any? { |attachment| attachment.record.class.name == 'Champs::TitreIdentiteChamp' } end - - def watermark_done? - metadata[:watermark] - end - - def enqueue_watermark_job - if analyzed? && virus_scanner.done? && watermark_pending? - TitreIdentiteWatermarkJob.perform_later(self) - end - end end diff --git a/app/models/concerns/blob_virus_scanner_concern.rb b/app/models/concerns/blob_virus_scanner_concern.rb index 25c922e70..fbe64ffb0 100644 --- a/app/models/concerns/blob_virus_scanner_concern.rb +++ b/app/models/concerns/blob_virus_scanner_concern.rb @@ -1,36 +1,21 @@ -# Run a virus scan on all blobs after they are analyzed. -# -# We're using a class extension to ensure that all blobs get scanned, -# regardless on how they were created. This could be an ActiveStorage::Analyzer, -# but as of Rails 6.1 only the first matching analyzer is ever run on -# a blob (and we may want to analyze the dimension of a picture as well -# as scanning it). -# -# The `after_commit` hook is triggered, among other cases, when -# the analyzer updates the blob metadata. When the analyzer has run, -# it is now safe to start our own scanning, without risking to have -# two concurrent jobs overwriting the metadata of the blob. module BlobVirusScannerConcern extend ActiveSupport::Concern included do before_create :set_pending - after_commit :enqueue_virus_scan end def virus_scanner ActiveStorage::VirusScanner.new(self) end + def scan_for_virus_later + VirusScannerJob.perform_later(self) + end + private def set_pending - self.metadata[:virus_scan_result] ||= ActiveStorage::VirusScanner::PENDING - end - - def enqueue_virus_scan - if analyzed? && !virus_scanner.done? - VirusScannerJob.perform_later(self) - end + metadata[:virus_scan_result] ||= ActiveStorage::VirusScanner::PENDING end end diff --git a/config/initializers/active_storage.rb b/config/initializers/active_storage.rb index 9e06230bf..45d529258 100644 --- a/config/initializers/active_storage.rb +++ b/config/initializers/active_storage.rb @@ -4,9 +4,14 @@ Rails.application.config.active_storage.analyzers.delete ActiveStorage::Analyzer Rails.application.config.active_storage.analyzers.delete ActiveStorage::Analyzer::VideoAnalyzer ActiveSupport.on_load(:active_storage_blob) do - include BlobSignedIdConcern - include BlobVirusScannerConcern include BlobTitreIdentiteWatermarkConcern + include BlobVirusScannerConcern + include BlobSignedIdConcern +end + +ActiveSupport.on_load(:active_storage_attachment) do + include AttachmentTitreIdentiteWatermarkConcern + include AttachmentVirusScannerConcern end # When an OpenStack service is initialized it makes a request to fetch diff --git a/spec/controllers/instructeurs/avis_controller_spec.rb b/spec/controllers/instructeurs/avis_controller_spec.rb index 720578b19..cce22cd01 100644 --- a/spec/controllers/instructeurs/avis_controller_spec.rb +++ b/spec/controllers/instructeurs/avis_controller_spec.rb @@ -98,14 +98,10 @@ describe Instructeurs::AvisController, type: :controller do end context 'with attachment' do - include ActiveJob::TestHelper let(:file) { fixture_file_upload('spec/fixtures/files/piece_justificative_0.pdf', 'application/pdf') } before do - expect(ClamavService).to receive(:safe_file?).and_return(true) - perform_enqueued_jobs do - post :update, params: { id: avis_without_answer.id, procedure_id: procedure.id, avis: { answer: 'answer', piece_justificative_file: file } } - end + post :update, params: { id: avis_without_answer.id, procedure_id: procedure.id, avis: { answer: 'answer', piece_justificative_file: file } } avis_without_answer.reload end @@ -126,7 +122,6 @@ describe Instructeurs::AvisController, type: :controller do subject { post :create_commentaire, params: { id: avis_without_answer.id, procedure_id: procedure.id, commentaire: { body: 'commentaire body', piece_jointe: file } } } before do - allow(ClamavService).to receive(:safe_file?).and_return(scan_result) Timecop.freeze(now) end diff --git a/spec/features/instructeurs/expert_spec.rb b/spec/features/instructeurs/expert_spec.rb index 8accab080..442020333 100644 --- a/spec/features/instructeurs/expert_spec.rb +++ b/spec/features/instructeurs/expert_spec.rb @@ -25,9 +25,8 @@ feature 'Inviting an expert:' do check 'avis_invite_linked_dossiers' page.select 'confidentiel', from: 'avis_confidentiel' - perform_enqueued_jobs do - click_on 'Demander un avis' - end + click_on 'Demander un avis' + perform_enqueued_jobs expect(page).to have_content('Une demande d\'avis a été envoyée') expect(page).to have_content('Avis des invités') @@ -38,7 +37,8 @@ feature 'Inviting an expert:' do end expect(Avis.count).to eq(4) - expect(all_emails.size).to eq(2) + expect(emails_sent_to('expert1@exemple.fr').size).to eq(1) + expect(emails_sent_to('expert2@exemple.fr').size).to eq(1) invitation_email = open_email('expert2@exemple.fr') avis = Avis.find_by(email: 'expert2@exemple.fr', dossier: dossier) diff --git a/spec/jobs/virus_scanner_job_spec.rb b/spec/jobs/virus_scanner_job_spec.rb index 0550f9f1b..887fd3260 100644 --- a/spec/jobs/virus_scanner_job_spec.rb +++ b/spec/jobs/virus_scanner_job_spec.rb @@ -1,48 +1,49 @@ -RSpec.describe VirusScannerJob, type: :job do - include ActiveJob::TestHelper - - let(:champ) do - champ = create(:champ_piece_justificative) - champ.piece_justificative_file.attach(io: StringIO.new("toto"), filename: "toto.txt", content_type: "text/plain") - champ.save - champ +describe VirusScannerJob, type: :job do + let(:blob) do + ActiveStorage::Blob.create_and_upload!(io: StringIO.new("toto"), filename: "toto.txt", content_type: "text/plain") end subject do - perform_enqueued_jobs do - VirusScannerJob.perform_later(champ.piece_justificative_file.blob) + VirusScannerJob.perform_now(blob) + end + + context "when the blob is not analyzed yet" do + it "retries the job later" do + expect { subject }.to have_enqueued_job(VirusScannerJob) end end - context "when no virus is found" do - let(:virus_found?) { true } - + context "when the blob has been analyzed" do before do - allow(ClamavService).to receive(:safe_file?).and_return(virus_found?) - subject + blob.analyze end - it { expect(champ.reload.piece_justificative_file.virus_scanner.safe?).to be_truthy } - end + context "when no virus is found" do + before do + allow(ClamavService).to receive(:safe_file?).and_return(true) + subject + end - context "when a virus is found" do - let(:virus_found?) { false } - - before do - allow(ClamavService).to receive(:safe_file?).and_return(virus_found?) - subject + it { expect(blob.virus_scanner.safe?).to be_truthy } end - it { expect(champ.reload.piece_justificative_file.virus_scanner.infected?).to be_truthy } - end + context "when a virus is found" do + before do + allow(ClamavService).to receive(:safe_file?).and_return(false) + subject + end - context "when the blob has been deleted" do - before do - Champ.find(champ.id).piece_justificative_file.purge + it { expect(blob.virus_scanner.infected?).to be_truthy } end - it "ignores the error" do - expect { subject }.not_to raise_error + context "when the blob has been deleted" do + before do + ActiveStorage::Blob.find(blob.id).purge + end + + it "ignores the error" do + expect { subject }.not_to raise_error + end end end end diff --git a/spec/models/champ_spec.rb b/spec/models/champ_spec.rb index 526538213..917b4a974 100644 --- a/spec/models/champ_spec.rb +++ b/spec/models/champ_spec.rb @@ -458,7 +458,8 @@ describe Champ do end it 'marks the file as safe once the scan completes' do - perform_enqueued_jobs { subject } + subject + perform_enqueued_jobs expect(champ.reload.piece_justificative_file.virus_scanner.safe?).to be_truthy end end @@ -480,13 +481,15 @@ describe Champ do champ end - it 'enqueues a watermark job on file attachment' do + it 'marks the file as needing watermarking' do expect(subject.piece_justificative_file.watermark_pending?).to be_truthy end it 'watermarks the file' do - perform_enqueued_jobs { subject } - expect(champ.reload.piece_justificative_file.blob.metadata[:watermark]).to be_truthy + subject + perform_enqueued_jobs + expect(champ.reload.piece_justificative_file.watermark_pending?).to be_falsy + expect(champ.reload.piece_justificative_file.blob.watermark_done?).to be_truthy end end end diff --git a/spec/models/dossier_spec.rb b/spec/models/dossier_spec.rb index fcdc624d9..e12ff5879 100644 --- a/spec/models/dossier_spec.rb +++ b/spec/models/dossier_spec.rb @@ -1386,27 +1386,21 @@ describe Dossier do it "clean up titres identite on accepter" do expect(champ_titre_identite.piece_justificative_file.attached?).to be_truthy expect(champ_titre_identite_vide.piece_justificative_file.attached?).to be_falsey - perform_enqueued_jobs do - dossier.accepter!(dossier.followers_instructeurs.first, "yolo!") - end + dossier.accepter!(dossier.followers_instructeurs.first, "yolo!") expect(champ_titre_identite.piece_justificative_file.attached?).to be_falsey end it "clean up titres identite on refuser" do expect(champ_titre_identite.piece_justificative_file.attached?).to be_truthy expect(champ_titre_identite_vide.piece_justificative_file.attached?).to be_falsey - perform_enqueued_jobs do - dossier.refuser!(dossier.followers_instructeurs.first, "yolo!") - end + dossier.refuser!(dossier.followers_instructeurs.first, "yolo!") expect(champ_titre_identite.piece_justificative_file.attached?).to be_falsey end it "clean up titres identite on classer_sans_suite" do expect(champ_titre_identite.piece_justificative_file.attached?).to be_truthy expect(champ_titre_identite_vide.piece_justificative_file.attached?).to be_falsey - perform_enqueued_jobs do - dossier.classer_sans_suite!(dossier.followers_instructeurs.first, "yolo!") - end + dossier.classer_sans_suite!(dossier.followers_instructeurs.first, "yolo!") expect(champ_titre_identite.piece_justificative_file.attached?).to be_falsey end @@ -1416,9 +1410,7 @@ describe Dossier do it "clean up titres identite on accepter_automatiquement" do expect(champ_titre_identite.piece_justificative_file.attached?).to be_truthy expect(champ_titre_identite_vide.piece_justificative_file.attached?).to be_falsey - perform_enqueued_jobs do - dossier.accepter_automatiquement! - end + dossier.accepter_automatiquement! expect(champ_titre_identite.piece_justificative_file.attached?).to be_falsey end end diff --git a/spec/services/commentaire_service_spec.rb b/spec/services/commentaire_service_spec.rb index 09801029d..a84bb8500 100644 --- a/spec/services/commentaire_service_spec.rb +++ b/spec/services/commentaire_service_spec.rb @@ -29,15 +29,8 @@ describe CommentaireService do context 'when it has a file' do let(:file) { fixture_file_upload('spec/fixtures/files/piece_justificative_0.pdf', 'application/pdf') } - before do - expect(ClamavService).to receive(:safe_file?).and_return(true) - end - - it 'saves the attached file' do - perform_enqueued_jobs do - commentaire.save - expect(commentaire.piece_jointe.attached?).to be_truthy - end + it 'attaches the file' do + expect(commentaire.piece_jointe.attached?).to be_truthy end end end From be58d82016504a4d74fe0bb5ebfb7d7d7cff12a6 Mon Sep 17 00:00:00 2001 From: Pierre de La Morinerie Date: Thu, 11 Mar 2021 15:01:05 +0100 Subject: [PATCH 2/2] specs: clear jobs queue before each test This avoids `perform_enqueued_jobs` to execute jobs from the previous tests. --- spec/support/active_job.rb | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 spec/support/active_job.rb diff --git a/spec/support/active_job.rb b/spec/support/active_job.rb new file mode 100644 index 000000000..bd030c81c --- /dev/null +++ b/spec/support/active_job.rb @@ -0,0 +1,9 @@ +RSpec.configure do |config| + config.include ActiveJob::TestHelper + + config.before(:each) do + clear_enqueued_jobs + end +end + +ActiveJob::Base.queue_adapter = :test