refactor(virus_scan_result): use column instead of metadata on blob

This commit is contained in:
Paul Chavard 2022-12-22 18:20:58 +01:00
parent d469bca0ae
commit 6e18090fb3
17 changed files with 80 additions and 47 deletions

View file

@ -2,9 +2,9 @@ class Cron::FixMissingAntivirusAnalysisJob < Cron::CronJob
self.schedule_expression = "every day at 2 am" self.schedule_expression = "every day at 2 am"
def perform def perform
ActiveStorage::Blob.where("metadata like '%\"virus_scan_result\":\"pending%'").each do |b| ActiveStorage::Blob.where(virus_scan_result: ActiveStorage::VirusScanner::PENDING).find_each do |blob|
begin begin
VirusScannerJob.perform_now(b) VirusScannerJob.perform_now(blob)
rescue ActiveStorage::IntegrityError rescue ActiveStorage::IntegrityError
end end
end end

View file

View file

@ -0,0 +1,6 @@
class Migrations::BackfillVirusScanBlobsJob < ApplicationJob
def perform(batch)
ActiveStorage::Blob.where(id: batch)
.update_all(virus_scan_result: ActiveStorage::VirusScanner::SAFE)
end
end

View file

@ -6,30 +6,12 @@ class VirusScannerJob < ApplicationJob
# If for some reason the file appears invalid, retry for a while # If for some reason the file appears invalid, retry for a while
retry_on(ActiveStorage::IntegrityError, attempts: 5, wait: 5.seconds) do |job, _error| retry_on(ActiveStorage::IntegrityError, attempts: 5, wait: 5.seconds) do |job, _error|
blob = job.arguments.first blob = job.arguments.first
blob.update_columns(virus_scan_result: ActiveStorage::VirusScanner::INTEGRITY_ERROR, virus_scanned_at: Time.zone.now)
metadata = {
virus_scan_result: ActiveStorage::VirusScanner::INTEGRITY_ERROR,
scanned_at: Time.zone.now
}
merge_and_update_metadata(blob, metadata)
end end
def perform(blob) def perform(blob)
if blob.virus_scanner.done? then return end return if blob.virus_scanner.done?
metadata = extract_metadata_via_virus_scanner(blob) blob.update_columns(ActiveStorage::VirusScanner.new(blob).attributes)
VirusScannerJob.merge_and_update_metadata(blob, metadata)
end
def extract_metadata_via_virus_scanner(blob)
ActiveStorage::VirusScanner.new(blob).metadata
end
private
def self.merge_and_update_metadata(blob, metadata)
blob.update!(metadata: blob.metadata.merge(metadata))
end end
end end

View file

@ -11,36 +11,42 @@ class ActiveStorage::VirusScanner
INTEGRITY_ERROR = 'integrity_error' INTEGRITY_ERROR = 'integrity_error'
def pending? def pending?
blob.metadata[:virus_scan_result] == PENDING virus_scan_result == PENDING
end end
def infected? def infected?
blob.metadata[:virus_scan_result] == INFECTED virus_scan_result == INFECTED
end end
def safe? def safe?
blob.metadata[:virus_scan_result] == SAFE virus_scan_result == SAFE
end end
def corrupt? def corrupt?
blob.metadata[:virus_scan_result] == INTEGRITY_ERROR virus_scan_result == INTEGRITY_ERROR
end end
def done? def done?
started? && blob.metadata[:virus_scan_result] != PENDING started? && virus_scan_result != PENDING
end end
def started? def started?
blob.metadata[:virus_scan_result].present? virus_scan_result.present?
end end
def metadata def attributes
blob.open do |file| blob.open do |file|
if ClamavService.safe_file?(file.path) if ClamavService.safe_file?(file.path)
{ virus_scan_result: SAFE, scanned_at: Time.zone.now } { virus_scan_result: SAFE, virus_scanned_at: Time.zone.now }
else else
{ virus_scan_result: INFECTED, scanned_at: Time.zone.now } { virus_scan_result: INFECTED, virus_scanned_at: Time.zone.now }
end end
end end
end end
private
def virus_scan_result
blob.virus_scan_result || blob.metadata[:virus_scan_result]
end
end end

View file

@ -23,6 +23,6 @@ module BlobVirusScannerConcern
private private
def set_pending def set_pending
metadata[:virus_scan_result] ||= ActiveStorage::VirusScanner::PENDING self.virus_scan_result = metadata[:virus_scan_result] || ActiveStorage::VirusScanner::PENDING
end end
end end

View file

@ -0,0 +1,6 @@
class AddVirusScannedAtActiveStorageBlobs < ActiveRecord::Migration[6.1]
def change
add_column :active_storage_blobs, :virus_scan_result, :string
add_column :active_storage_blobs, :virus_scanned_at, :datetime
end
end

View file

@ -10,7 +10,7 @@
# #
# It's strongly recommended that you check this file into your version control system. # It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 2022_12_13_084442) do ActiveRecord::Schema.define(version: 2022_12_27_084442) do
# These are extensions that must be enabled in order to support this database # These are extensions that must be enabled in order to support this database
enable_extension "pgcrypto" enable_extension "pgcrypto"
@ -47,6 +47,8 @@ ActiveRecord::Schema.define(version: 2022_12_13_084442) do
t.integer "lock_version" t.integer "lock_version"
t.text "metadata" t.text "metadata"
t.string "service_name", null: false t.string "service_name", null: false
t.string "virus_scan_result"
t.datetime "virus_scanned_at"
t.datetime "watermarked_at" t.datetime "watermarked_at"
t.index ["key"], name: "index_active_storage_blobs_on_key", unique: true t.index ["key"], name: "index_active_storage_blobs_on_key", unique: true
end end

View file

@ -0,0 +1,32 @@
namespace :after_party do
desc 'Deployment task: backfill_virus_scan_blobs'
task backfill_virus_scan_blobs: :environment do
puts "Running deploy task 'backfill_virus_scan_blobs'"
pending_blobs = ActiveStorage::Blob.where("metadata like '%\"virus_scan_result\":\"#{ActiveStorage::VirusScanner::PENDING}%'").where(virus_scan_result: nil)
infected_blobs = ActiveStorage::Blob.where("metadata like '%\"virus_scan_result\":\"#{ActiveStorage::VirusScanner::INFECTED}%'").where(virus_scan_result: nil)
integrity_error_blobs = ActiveStorage::Blob.where("metadata like '%\"virus_scan_result\":\"#{ActiveStorage::VirusScanner::INTEGRITY_ERROR}%'").where(virus_scan_result: nil)
safe_blobs = ActiveStorage::Blob.where("metadata like '%\"virus_scan_result\":\"#{ActiveStorage::VirusScanner::SAFE}%'").where(virus_scan_result: nil)
pp "pending blobs: #{pending_blobs.count}"
pp "infected blobs: #{infected_blobs.count}"
pp "with integrity error blobs: #{integrity_error_blobs.count}"
pending_blobs.in_batches.update_all(virus_scan_result: ActiveStorage::VirusScanner::PENDING)
infected_blobs.in_batches.update_all(virus_scan_result: ActiveStorage::VirusScanner::INFECTED)
integrity_error_blobs.in_batches.update_all(virus_scan_result: ActiveStorage::VirusScanner::INTEGRITY_ERROR)
safe_blobs_ids = safe_blobs.pluck(:id)
progress = ProgressReport.new(safe_blobs_ids.size)
safe_blobs_ids.in_groups_of(10_000) do |batch|
Migrations::BackfillVirusScanBlobsJob.perform_later(batch.compact)
progress.inc(batch.compact.size)
end
progress.finish
# Update task as completed. If you remove the line below, the task will
# run with every deploy (or every time you call after_party:run).
AfterParty::TaskRecord
.create version: AfterParty::TaskRecorder.new(__FILE__).timestamp
end
end

View file

@ -127,7 +127,7 @@ RSpec.describe Attachment::EditComponent, type: :component do
context 'with non nominal or final antivirus status' do context 'with non nominal or final antivirus status' do
before do before do
champ.piece_justificative_file[0].blob.update(metadata: attachment.blob.metadata.merge(virus_scan_result: virus_scan_result)) champ.piece_justificative_file[0].blob.update(virus_scan_result:)
end end
context 'when the anti-virus scan is pending' do context 'when the anti-virus scan is pending' do

View file

@ -84,7 +84,7 @@ RSpec.describe Attachment::MultipleComponent, type: :component do
let(:created_at) { 1.second.ago } let(:created_at) { 1.second.ago }
before do before do
attached_file.attachments[0].blob.update(metadata: { virus_scan_result: ActiveStorage::VirusScanner::PENDING }) attached_file.attachments[0].blob.update(virus_scan_result: ActiveStorage::VirusScanner::PENDING)
attached_file.attachments[0].update!(created_at:) attached_file.attachments[0].update!(created_at:)
end end

View file

@ -48,7 +48,7 @@ RSpec.describe Attachment::PendingPollComponent, type: :component do
context "when antivirus is in progress" do context "when antivirus is in progress" do
before do before do
attachment.blob[:metadata] = { virus_scan_result: ActiveStorage::VirusScanner::PENDING } attachment.blob.virus_scan_result = ActiveStorage::VirusScanner::PENDING
end end
it "renders" do it "renders" do

View file

@ -15,7 +15,7 @@ RSpec.describe Attachment::ShowComponent, type: :component do
subject { render_inline(component).to_html } subject { render_inline(component).to_html }
before do before do
champ.piece_justificative_file[0].blob.update(metadata: champ.piece_justificative_file[0].blob.metadata.merge(virus_scan_result: virus_scan_result)) attachment.blob.update(virus_scan_result:, metadata: attachment.blob.metadata.merge(virus_scan_result:))
end end
context 'when there is no anti-virus scan' do context 'when there is no anti-virus scan' do

View file

@ -21,8 +21,8 @@ describe VirusScannerJob, type: :job do
context "should raise ActiveRecord::StaleObjectError" do context "should raise ActiveRecord::StaleObjectError" do
let(:blob_2) { ActiveStorage::Blob.find(blob.id) } let(:blob_2) { ActiveStorage::Blob.find(blob.id) }
before do before do
blob_2.metadata[:virus_scan_result] = "infected" blob_2.virus_scan_result = ActiveStorage::VirusScanner::INFECTED
blob.metadata[:virus_scan_result] = "safe" blob.virus_scan_result = ActiveStorage::VirusScanner::SAFE
blob.save blob.save
end end

View file

@ -53,24 +53,23 @@ describe Champs::PieceJustificativeChamp do
describe '#for_api' do describe '#for_api' do
let(:champ_pj) { create(:champ_piece_justificative) } let(:champ_pj) { create(:champ_piece_justificative) }
let(:metadata) { champ_pj.piece_justificative_file.first.blob.metadata }
before { champ_pj.piece_justificative_file.first.blob.update(metadata: metadata.merge(virus_scan_result: status)) } before { champ_pj.piece_justificative_file.first.blob.update(virus_scan_result:) }
subject { champ_pj.for_api } subject { champ_pj.for_api }
context 'when file is safe' do context 'when file is safe' do
let(:status) { ActiveStorage::VirusScanner::SAFE } let(:virus_scan_result) { ActiveStorage::VirusScanner::SAFE }
it { is_expected.to include("/rails/active_storage/disk/") } it { is_expected.to include("/rails/active_storage/disk/") }
end end
context 'when file is not scanned' do context 'when file is not scanned' do
let(:status) { ActiveStorage::VirusScanner::PENDING } let(:virus_scan_result) { ActiveStorage::VirusScanner::PENDING }
it { is_expected.to include("/rails/active_storage/disk/") } it { is_expected.to include("/rails/active_storage/disk/") }
end end
context 'when file is infected' do context 'when file is infected' do
let(:status) { ActiveStorage::VirusScanner::INFECTED } let(:virus_scan_result) { ActiveStorage::VirusScanner::INFECTED }
it { is_expected.to be_nil } it { is_expected.to be_nil }
end end
end end

View file

@ -280,7 +280,7 @@ describe 'The user' do
end end
attachments.each { attachments.each {
_1.blob.metadata = { virus_scan_result: ActiveStorage::VirusScanner::SAFE } _1.blob.virus_scan_result = ActiveStorage::VirusScanner::SAFE
_1.save! _1.save!
} }
expect(page).not_to have_text('Analyse antivirus en cours', wait: 10) expect(page).not_to have_text('Analyse antivirus en cours', wait: 10)

View file

@ -95,7 +95,7 @@ describe 'instructeurs/dossiers/state_button.html.haml', type: :view do
context 'with a justificatif' do context 'with a justificatif' do
let(:dossier) do let(:dossier) do
dossier = create(:dossier, state, :with_justificatif) dossier = create(:dossier, state, :with_justificatif)
dossier.justificatif_motivation.blob.update(metadata: dossier.justificatif_motivation.blob.metadata.merge(virus_scan_result: ActiveStorage::VirusScanner::SAFE)) dossier.justificatif_motivation.blob.update(virus_scan_result: ActiveStorage::VirusScanner::SAFE)
dossier dossier
end end