From 6e18090fb3201c45161759999ec79e4e63016946 Mon Sep 17 00:00:00 2001 From: Paul Chavard Date: Thu, 22 Dec 2022 18:20:58 +0100 Subject: [PATCH] refactor(virus_scan_result): use column instead of metadata on blob --- .../fix_missing_antivirus_analysis_job.rb | 4 +-- app/jobs/migrations/.keep | 0 .../backfill_virus_scan_blobs_job.rb | 6 ++++ app/jobs/virus_scanner_job.rb | 24 ++------------ app/lib/active_storage/virus_scanner.rb | 24 ++++++++------ .../concerns/blob_virus_scanner_concern.rb | 2 +- ...d_virus_scanned_at_active_storage_blobs.rb | 6 ++++ db/schema.rb | 4 ++- ...21222173733_backfill_virus_scan_blobs.rake | 32 +++++++++++++++++++ .../attachment/edit_component_spec.rb | 2 +- .../attachment/multiple_component_spec.rb | 2 +- .../attachment/pending_poll_component_spec.rb | 2 +- .../attachment/show_component_spec.rb | 2 +- spec/jobs/virus_scanner_job_spec.rb | 4 +-- .../champs/piece_justificative_champ_spec.rb | 9 +++--- spec/system/users/brouillon_spec.rb | 2 +- .../dossiers/_state_button.html.haml_spec.rb | 2 +- 17 files changed, 80 insertions(+), 47 deletions(-) create mode 100644 app/jobs/migrations/.keep create mode 100644 app/jobs/migrations/backfill_virus_scan_blobs_job.rb create mode 100644 db/migrate/20221222170319_add_virus_scanned_at_active_storage_blobs.rb create mode 100644 lib/tasks/deployment/20221222173733_backfill_virus_scan_blobs.rake diff --git a/app/jobs/cron/fix_missing_antivirus_analysis_job.rb b/app/jobs/cron/fix_missing_antivirus_analysis_job.rb index b12a0445c..f9f4a9dc9 100644 --- a/app/jobs/cron/fix_missing_antivirus_analysis_job.rb +++ b/app/jobs/cron/fix_missing_antivirus_analysis_job.rb @@ -2,9 +2,9 @@ class Cron::FixMissingAntivirusAnalysisJob < Cron::CronJob self.schedule_expression = "every day at 2 am" def perform - ActiveStorage::Blob.where("metadata like '%\"virus_scan_result\":\"pending%'").each do |b| + ActiveStorage::Blob.where(virus_scan_result: ActiveStorage::VirusScanner::PENDING).find_each do |blob| begin - VirusScannerJob.perform_now(b) + VirusScannerJob.perform_now(blob) rescue ActiveStorage::IntegrityError end end diff --git a/app/jobs/migrations/.keep b/app/jobs/migrations/.keep new file mode 100644 index 000000000..e69de29bb diff --git a/app/jobs/migrations/backfill_virus_scan_blobs_job.rb b/app/jobs/migrations/backfill_virus_scan_blobs_job.rb new file mode 100644 index 000000000..f9ba1b002 --- /dev/null +++ b/app/jobs/migrations/backfill_virus_scan_blobs_job.rb @@ -0,0 +1,6 @@ +class Migrations::BackfillVirusScanBlobsJob < ApplicationJob + def perform(batch) + ActiveStorage::Blob.where(id: batch) + .update_all(virus_scan_result: ActiveStorage::VirusScanner::SAFE) + end +end diff --git a/app/jobs/virus_scanner_job.rb b/app/jobs/virus_scanner_job.rb index 0d5f0ba24..3d2ea0944 100644 --- a/app/jobs/virus_scanner_job.rb +++ b/app/jobs/virus_scanner_job.rb @@ -6,30 +6,12 @@ class VirusScannerJob < ApplicationJob # If for some reason the file appears invalid, retry for a while retry_on(ActiveStorage::IntegrityError, attempts: 5, wait: 5.seconds) do |job, _error| blob = job.arguments.first - - metadata = { - virus_scan_result: ActiveStorage::VirusScanner::INTEGRITY_ERROR, - scanned_at: Time.zone.now - } - - merge_and_update_metadata(blob, metadata) + blob.update_columns(virus_scan_result: ActiveStorage::VirusScanner::INTEGRITY_ERROR, virus_scanned_at: Time.zone.now) end def perform(blob) - if blob.virus_scanner.done? then return end + return if blob.virus_scanner.done? - metadata = extract_metadata_via_virus_scanner(blob) - - VirusScannerJob.merge_and_update_metadata(blob, metadata) - end - - def extract_metadata_via_virus_scanner(blob) - ActiveStorage::VirusScanner.new(blob).metadata - end - - private - - def self.merge_and_update_metadata(blob, metadata) - blob.update!(metadata: blob.metadata.merge(metadata)) + blob.update_columns(ActiveStorage::VirusScanner.new(blob).attributes) end end diff --git a/app/lib/active_storage/virus_scanner.rb b/app/lib/active_storage/virus_scanner.rb index 90cd0a528..6e5d26d01 100644 --- a/app/lib/active_storage/virus_scanner.rb +++ b/app/lib/active_storage/virus_scanner.rb @@ -11,36 +11,42 @@ class ActiveStorage::VirusScanner INTEGRITY_ERROR = 'integrity_error' def pending? - blob.metadata[:virus_scan_result] == PENDING + virus_scan_result == PENDING end def infected? - blob.metadata[:virus_scan_result] == INFECTED + virus_scan_result == INFECTED end def safe? - blob.metadata[:virus_scan_result] == SAFE + virus_scan_result == SAFE end def corrupt? - blob.metadata[:virus_scan_result] == INTEGRITY_ERROR + virus_scan_result == INTEGRITY_ERROR end def done? - started? && blob.metadata[:virus_scan_result] != PENDING + started? && virus_scan_result != PENDING end def started? - blob.metadata[:virus_scan_result].present? + virus_scan_result.present? end - def metadata + def attributes blob.open do |file| if ClamavService.safe_file?(file.path) - { virus_scan_result: SAFE, scanned_at: Time.zone.now } + { virus_scan_result: SAFE, virus_scanned_at: Time.zone.now } else - { virus_scan_result: INFECTED, scanned_at: Time.zone.now } + { virus_scan_result: INFECTED, virus_scanned_at: Time.zone.now } end end end + + private + + def virus_scan_result + blob.virus_scan_result || blob.metadata[:virus_scan_result] + end end diff --git a/app/models/concerns/blob_virus_scanner_concern.rb b/app/models/concerns/blob_virus_scanner_concern.rb index 685c0f73e..b77a55d3b 100644 --- a/app/models/concerns/blob_virus_scanner_concern.rb +++ b/app/models/concerns/blob_virus_scanner_concern.rb @@ -23,6 +23,6 @@ module BlobVirusScannerConcern private def set_pending - metadata[:virus_scan_result] ||= ActiveStorage::VirusScanner::PENDING + self.virus_scan_result = metadata[:virus_scan_result] || ActiveStorage::VirusScanner::PENDING end end diff --git a/db/migrate/20221222170319_add_virus_scanned_at_active_storage_blobs.rb b/db/migrate/20221222170319_add_virus_scanned_at_active_storage_blobs.rb new file mode 100644 index 000000000..36b536fcf --- /dev/null +++ b/db/migrate/20221222170319_add_virus_scanned_at_active_storage_blobs.rb @@ -0,0 +1,6 @@ +class AddVirusScannedAtActiveStorageBlobs < ActiveRecord::Migration[6.1] + def change + add_column :active_storage_blobs, :virus_scan_result, :string + add_column :active_storage_blobs, :virus_scanned_at, :datetime + end +end diff --git a/db/schema.rb b/db/schema.rb index 037c0a890..059e989e7 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 2022_12_13_084442) do +ActiveRecord::Schema.define(version: 2022_12_27_084442) do # These are extensions that must be enabled in order to support this database enable_extension "pgcrypto" @@ -47,6 +47,8 @@ ActiveRecord::Schema.define(version: 2022_12_13_084442) do t.integer "lock_version" t.text "metadata" t.string "service_name", null: false + t.string "virus_scan_result" + t.datetime "virus_scanned_at" t.datetime "watermarked_at" t.index ["key"], name: "index_active_storage_blobs_on_key", unique: true end diff --git a/lib/tasks/deployment/20221222173733_backfill_virus_scan_blobs.rake b/lib/tasks/deployment/20221222173733_backfill_virus_scan_blobs.rake new file mode 100644 index 000000000..7b42af031 --- /dev/null +++ b/lib/tasks/deployment/20221222173733_backfill_virus_scan_blobs.rake @@ -0,0 +1,32 @@ +namespace :after_party do + desc 'Deployment task: backfill_virus_scan_blobs' + task backfill_virus_scan_blobs: :environment do + puts "Running deploy task 'backfill_virus_scan_blobs'" + + pending_blobs = ActiveStorage::Blob.where("metadata like '%\"virus_scan_result\":\"#{ActiveStorage::VirusScanner::PENDING}%'").where(virus_scan_result: nil) + infected_blobs = ActiveStorage::Blob.where("metadata like '%\"virus_scan_result\":\"#{ActiveStorage::VirusScanner::INFECTED}%'").where(virus_scan_result: nil) + integrity_error_blobs = ActiveStorage::Blob.where("metadata like '%\"virus_scan_result\":\"#{ActiveStorage::VirusScanner::INTEGRITY_ERROR}%'").where(virus_scan_result: nil) + safe_blobs = ActiveStorage::Blob.where("metadata like '%\"virus_scan_result\":\"#{ActiveStorage::VirusScanner::SAFE}%'").where(virus_scan_result: nil) + + pp "pending blobs: #{pending_blobs.count}" + pp "infected blobs: #{infected_blobs.count}" + pp "with integrity error blobs: #{integrity_error_blobs.count}" + + pending_blobs.in_batches.update_all(virus_scan_result: ActiveStorage::VirusScanner::PENDING) + infected_blobs.in_batches.update_all(virus_scan_result: ActiveStorage::VirusScanner::INFECTED) + integrity_error_blobs.in_batches.update_all(virus_scan_result: ActiveStorage::VirusScanner::INTEGRITY_ERROR) + + safe_blobs_ids = safe_blobs.pluck(:id) + progress = ProgressReport.new(safe_blobs_ids.size) + safe_blobs_ids.in_groups_of(10_000) do |batch| + Migrations::BackfillVirusScanBlobsJob.perform_later(batch.compact) + progress.inc(batch.compact.size) + end + progress.finish + + # Update task as completed. If you remove the line below, the task will + # run with every deploy (or every time you call after_party:run). + AfterParty::TaskRecord + .create version: AfterParty::TaskRecorder.new(__FILE__).timestamp + end +end diff --git a/spec/components/attachment/edit_component_spec.rb b/spec/components/attachment/edit_component_spec.rb index bcae3b6ec..40851b3aa 100644 --- a/spec/components/attachment/edit_component_spec.rb +++ b/spec/components/attachment/edit_component_spec.rb @@ -127,7 +127,7 @@ RSpec.describe Attachment::EditComponent, type: :component do context 'with non nominal or final antivirus status' do before do - champ.piece_justificative_file[0].blob.update(metadata: attachment.blob.metadata.merge(virus_scan_result: virus_scan_result)) + champ.piece_justificative_file[0].blob.update(virus_scan_result:) end context 'when the anti-virus scan is pending' do diff --git a/spec/components/attachment/multiple_component_spec.rb b/spec/components/attachment/multiple_component_spec.rb index 2ac7396be..54b8782b4 100644 --- a/spec/components/attachment/multiple_component_spec.rb +++ b/spec/components/attachment/multiple_component_spec.rb @@ -84,7 +84,7 @@ RSpec.describe Attachment::MultipleComponent, type: :component do let(:created_at) { 1.second.ago } before do - attached_file.attachments[0].blob.update(metadata: { virus_scan_result: ActiveStorage::VirusScanner::PENDING }) + attached_file.attachments[0].blob.update(virus_scan_result: ActiveStorage::VirusScanner::PENDING) attached_file.attachments[0].update!(created_at:) end diff --git a/spec/components/attachment/pending_poll_component_spec.rb b/spec/components/attachment/pending_poll_component_spec.rb index 1bd9f9f5f..0ca07eddf 100644 --- a/spec/components/attachment/pending_poll_component_spec.rb +++ b/spec/components/attachment/pending_poll_component_spec.rb @@ -48,7 +48,7 @@ RSpec.describe Attachment::PendingPollComponent, type: :component do context "when antivirus is in progress" do before do - attachment.blob[:metadata] = { virus_scan_result: ActiveStorage::VirusScanner::PENDING } + attachment.blob.virus_scan_result = ActiveStorage::VirusScanner::PENDING end it "renders" do diff --git a/spec/components/attachment/show_component_spec.rb b/spec/components/attachment/show_component_spec.rb index 658aa5308..adcf36ff3 100644 --- a/spec/components/attachment/show_component_spec.rb +++ b/spec/components/attachment/show_component_spec.rb @@ -15,7 +15,7 @@ RSpec.describe Attachment::ShowComponent, type: :component do subject { render_inline(component).to_html } before do - champ.piece_justificative_file[0].blob.update(metadata: champ.piece_justificative_file[0].blob.metadata.merge(virus_scan_result: virus_scan_result)) + attachment.blob.update(virus_scan_result:, metadata: attachment.blob.metadata.merge(virus_scan_result:)) end context 'when there is no anti-virus scan' do diff --git a/spec/jobs/virus_scanner_job_spec.rb b/spec/jobs/virus_scanner_job_spec.rb index 2ea97bff6..9de830373 100644 --- a/spec/jobs/virus_scanner_job_spec.rb +++ b/spec/jobs/virus_scanner_job_spec.rb @@ -21,8 +21,8 @@ describe VirusScannerJob, type: :job do context "should raise ActiveRecord::StaleObjectError" do let(:blob_2) { ActiveStorage::Blob.find(blob.id) } before do - blob_2.metadata[:virus_scan_result] = "infected" - blob.metadata[:virus_scan_result] = "safe" + blob_2.virus_scan_result = ActiveStorage::VirusScanner::INFECTED + blob.virus_scan_result = ActiveStorage::VirusScanner::SAFE blob.save end diff --git a/spec/models/champs/piece_justificative_champ_spec.rb b/spec/models/champs/piece_justificative_champ_spec.rb index 0073793db..d3462235d 100644 --- a/spec/models/champs/piece_justificative_champ_spec.rb +++ b/spec/models/champs/piece_justificative_champ_spec.rb @@ -53,24 +53,23 @@ describe Champs::PieceJustificativeChamp do describe '#for_api' do let(:champ_pj) { create(:champ_piece_justificative) } - let(:metadata) { champ_pj.piece_justificative_file.first.blob.metadata } - before { champ_pj.piece_justificative_file.first.blob.update(metadata: metadata.merge(virus_scan_result: status)) } + before { champ_pj.piece_justificative_file.first.blob.update(virus_scan_result:) } subject { champ_pj.for_api } context 'when file is safe' do - let(:status) { ActiveStorage::VirusScanner::SAFE } + let(:virus_scan_result) { ActiveStorage::VirusScanner::SAFE } it { is_expected.to include("/rails/active_storage/disk/") } end context 'when file is not scanned' do - let(:status) { ActiveStorage::VirusScanner::PENDING } + let(:virus_scan_result) { ActiveStorage::VirusScanner::PENDING } it { is_expected.to include("/rails/active_storage/disk/") } end context 'when file is infected' do - let(:status) { ActiveStorage::VirusScanner::INFECTED } + let(:virus_scan_result) { ActiveStorage::VirusScanner::INFECTED } it { is_expected.to be_nil } end end diff --git a/spec/system/users/brouillon_spec.rb b/spec/system/users/brouillon_spec.rb index cc9dc92a8..b36abd2f3 100644 --- a/spec/system/users/brouillon_spec.rb +++ b/spec/system/users/brouillon_spec.rb @@ -280,7 +280,7 @@ describe 'The user' do end attachments.each { - _1.blob.metadata = { virus_scan_result: ActiveStorage::VirusScanner::SAFE } + _1.blob.virus_scan_result = ActiveStorage::VirusScanner::SAFE _1.save! } expect(page).not_to have_text('Analyse antivirus en cours', wait: 10) diff --git a/spec/views/instructeur/dossiers/_state_button.html.haml_spec.rb b/spec/views/instructeur/dossiers/_state_button.html.haml_spec.rb index d44baf449..5c71fc460 100644 --- a/spec/views/instructeur/dossiers/_state_button.html.haml_spec.rb +++ b/spec/views/instructeur/dossiers/_state_button.html.haml_spec.rb @@ -95,7 +95,7 @@ describe 'instructeurs/dossiers/state_button.html.haml', type: :view do context 'with a justificatif' do let(:dossier) do dossier = create(:dossier, state, :with_justificatif) - dossier.justificatif_motivation.blob.update(metadata: dossier.justificatif_motivation.blob.metadata.merge(virus_scan_result: ActiveStorage::VirusScanner::SAFE)) + dossier.justificatif_motivation.blob.update(virus_scan_result: ActiveStorage::VirusScanner::SAFE) dossier end