Merge pull request #8331 from tchak/refactor-blob-virus-scanner

refactor(virus_scan_result): use column instead of metadata on blob
This commit is contained in:
Paul Chavard 2023-01-04 12:07:51 +00:00 committed by GitHub
commit 6bb4e98329
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
25 changed files with 119 additions and 83 deletions

View file

@ -10,7 +10,7 @@ class Champs::PieceJustificativeController < ApplicationController
end
def update
if attach_piece_justificative_or_retry
if attach_piece_justificative
render :show
else
render json: { errors: @champ.errors.full_messages }, status: 422
@ -46,10 +46,4 @@ class Champs::PieceJustificativeController < ApplicationController
save_succeed
end
def attach_piece_justificative_or_retry
attach_piece_justificative
rescue ActiveRecord::StaleObjectError
attach_piece_justificative
end
end

View file

@ -2,9 +2,9 @@ class Cron::FixMissingAntivirusAnalysisJob < Cron::CronJob
self.schedule_expression = "every day at 2 am"
def perform
ActiveStorage::Blob.where("metadata like '%\"virus_scan_result\":\"pending%'").each do |b|
ActiveStorage::Blob.where(virus_scan_result: ActiveStorage::VirusScanner::PENDING).find_each do |blob|
begin
VirusScannerJob.perform_now(b)
VirusScannerJob.perform_now(blob)
rescue ActiveStorage::IntegrityError
end
end

View file

View file

@ -0,0 +1,6 @@
class Migrations::BackfillVirusScanBlobsJob < ApplicationJob
def perform(batch)
ActiveStorage::Blob.where(id: batch)
.update_all(virus_scan_result: ActiveStorage::VirusScanner::SAFE)
end
end

View file

@ -15,8 +15,8 @@ class TitreIdentiteWatermarkJob < ApplicationJob
WATERMARK = URI.parse(WATERMARK_FILE).is_a?(URI::HTTP) ? WATERMARK_FILE : Rails.root.join("app/assets/images/#{WATERMARK_FILE}")
def perform(blob)
if blob.virus_scanner.pending? then raise FileNotScannedYetError end
if blob.watermark_done? then return end
return if blob.watermark_done?
raise FileNotScannedYetError if blob.virus_scanner.pending?
blob.open do |file|
watermark = resize_watermark(file)
@ -24,12 +24,8 @@ class TitreIdentiteWatermarkJob < ApplicationJob
if watermark.present?
processed = watermark_image(file, watermark)
blob.metadata[:watermark] = true
blob.upload(processed)
blob.save
else
blob.metadata[:watermark_invalid] = true
blob.save
blob.touch(:watermarked_at)
end
end
end

View file

@ -6,30 +6,12 @@ class VirusScannerJob < ApplicationJob
# If for some reason the file appears invalid, retry for a while
retry_on(ActiveStorage::IntegrityError, attempts: 5, wait: 5.seconds) do |job, _error|
blob = job.arguments.first
metadata = {
virus_scan_result: ActiveStorage::VirusScanner::INTEGRITY_ERROR,
scanned_at: Time.zone.now
}
merge_and_update_metadata(blob, metadata)
blob.update_columns(virus_scan_result: ActiveStorage::VirusScanner::INTEGRITY_ERROR, virus_scanned_at: Time.zone.now)
end
def perform(blob)
if blob.virus_scanner.done? then return end
return if blob.virus_scanner.done?
metadata = extract_metadata_via_virus_scanner(blob)
VirusScannerJob.merge_and_update_metadata(blob, metadata)
end
def extract_metadata_via_virus_scanner(blob)
ActiveStorage::VirusScanner.new(blob).metadata
end
private
def self.merge_and_update_metadata(blob, metadata)
blob.update!(metadata: blob.metadata.merge(metadata))
blob.update_columns(ActiveStorage::VirusScanner.new(blob).attributes)
end
end

View file

@ -5,8 +5,7 @@ module ActiveJob::RetryOnTransientErrors
Excon::Error::InternalServerError,
Excon::Error::GatewayTimeout,
Excon::Error::BadRequest,
Excon::Error::Socket,
ActiveRecord::StaleObjectError
Excon::Error::Socket
]
included do

View file

@ -11,36 +11,42 @@ class ActiveStorage::VirusScanner
INTEGRITY_ERROR = 'integrity_error'
def pending?
blob.metadata[:virus_scan_result] == PENDING
virus_scan_result == PENDING
end
def infected?
blob.metadata[:virus_scan_result] == INFECTED
virus_scan_result == INFECTED
end
def safe?
blob.metadata[:virus_scan_result] == SAFE
virus_scan_result == SAFE
end
def corrupt?
blob.metadata[:virus_scan_result] == INTEGRITY_ERROR
virus_scan_result == INTEGRITY_ERROR
end
def done?
started? && blob.metadata[:virus_scan_result] != PENDING
started? && virus_scan_result != PENDING
end
def started?
blob.metadata[:virus_scan_result].present?
virus_scan_result.present?
end
def metadata
def attributes
blob.open do |file|
if ClamavService.safe_file?(file.path)
{ virus_scan_result: SAFE, scanned_at: Time.zone.now }
{ virus_scan_result: SAFE, virus_scanned_at: Time.zone.now }
else
{ virus_scan_result: INFECTED, scanned_at: Time.zone.now }
{ virus_scan_result: INFECTED, virus_scanned_at: Time.zone.now }
end
end
end
private
def virus_scan_result
blob.virus_scan_result || blob.metadata[:virus_scan_result]
end
end

View file

@ -4,11 +4,11 @@ module BlobTitreIdentiteWatermarkConcern
end
def watermark_done?
metadata[:watermark]
watermarked_at.present?
end
def watermark_later
if watermark_required?
if watermark_pending?
TitreIdentiteWatermarkJob.perform_later(self)
end
end
@ -16,6 +16,6 @@ module BlobTitreIdentiteWatermarkConcern
private
def watermark_required?
attachments.any? { |attachment| attachment.record.class.name == 'Champs::TitreIdentiteChamp' }
attachments.any? { _1.record.class == Champs::TitreIdentiteChamp }
end
end

View file

@ -2,6 +2,7 @@ module BlobVirusScannerConcern
extend ActiveSupport::Concern
included do
self.ignored_columns = [:lock_version]
before_create :set_pending
end
@ -23,6 +24,6 @@ module BlobVirusScannerConcern
private
def set_pending
metadata[:virus_scan_result] ||= ActiveStorage::VirusScanner::PENDING
self.virus_scan_result = metadata[:virus_scan_result] || ActiveStorage::VirusScanner::PENDING
end
end

View file

@ -36,14 +36,10 @@ class CommentaireService
def self.save(dossier, params)
build_and_save(dossier, params)
rescue ActiveRecord::StaleObjectError
build_and_save(dossier, params)
end
def self.save!(dossier, params)
build_and_save(dossier, params, raise_exception: true)
rescue ActiveRecord::StaleObjectError
build_and_save(dossier, params, raise_exception: true)
end
def self.build_and_save(dossier, params, raise_exception: false)

View file

@ -0,0 +1,5 @@
class AddWatermarkedAtActiveStorageBlobs < ActiveRecord::Migration[6.1]
def change
add_column :active_storage_blobs, :watermarked_at, :datetime
end
end

View file

@ -0,0 +1,6 @@
class AddVirusScannedAtActiveStorageBlobs < ActiveRecord::Migration[6.1]
def change
add_column :active_storage_blobs, :virus_scan_result, :string
add_column :active_storage_blobs, :virus_scanned_at, :datetime
end
end

View file

@ -0,0 +1,7 @@
class AddVirusScanResultIndex < ActiveRecord::Migration[6.1]
disable_ddl_transaction!
def change
add_index :active_storage_blobs, :virus_scan_result, algorithm: :concurrently
end
end

View file

@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 2022_12_13_084442) do
ActiveRecord::Schema.define(version: 2022_12_27_084442) do
# These are extensions that must be enabled in order to support this database
enable_extension "pgcrypto"
@ -47,6 +47,9 @@ ActiveRecord::Schema.define(version: 2022_12_13_084442) do
t.integer "lock_version"
t.text "metadata"
t.string "service_name", null: false
t.string "virus_scan_result"
t.datetime "virus_scanned_at"
t.datetime "watermarked_at"
t.index ["key"], name: "index_active_storage_blobs_on_key", unique: true
end

View file

@ -0,0 +1,16 @@
namespace :after_party do
desc 'Deployment task: backfill_watermarked_blobs'
task backfill_watermarked_blobs: :environment do
puts "Running deploy task 'backfill_watermarked_blobs'"
ActiveStorage::Blob.where("metadata like '%\"watermark\":true%'")
.where(watermarked_at: nil)
.in_batches
.update_all('watermarked_at = created_at')
# Update task as completed. If you remove the line below, the task will
# run with every deploy (or every time you call after_party:run).
AfterParty::TaskRecord
.create version: AfterParty::TaskRecorder.new(__FILE__).timestamp
end
end

View file

@ -0,0 +1,32 @@
namespace :after_party do
desc 'Deployment task: backfill_virus_scan_blobs'
task backfill_virus_scan_blobs: :environment do
puts "Running deploy task 'backfill_virus_scan_blobs'"
pending_blobs = ActiveStorage::Blob.where("metadata like '%\"virus_scan_result\":\"#{ActiveStorage::VirusScanner::PENDING}%'").where(virus_scan_result: nil)
infected_blobs = ActiveStorage::Blob.where("metadata like '%\"virus_scan_result\":\"#{ActiveStorage::VirusScanner::INFECTED}%'").where(virus_scan_result: nil)
integrity_error_blobs = ActiveStorage::Blob.where("metadata like '%\"virus_scan_result\":\"#{ActiveStorage::VirusScanner::INTEGRITY_ERROR}%'").where(virus_scan_result: nil)
safe_blobs = ActiveStorage::Blob.where("metadata like '%\"virus_scan_result\":\"#{ActiveStorage::VirusScanner::SAFE}%'").where(virus_scan_result: nil)
pp "pending blobs: #{pending_blobs.count}"
pp "infected blobs: #{infected_blobs.count}"
pp "with integrity error blobs: #{integrity_error_blobs.count}"
pending_blobs.in_batches.update_all(virus_scan_result: ActiveStorage::VirusScanner::PENDING)
infected_blobs.in_batches.update_all(virus_scan_result: ActiveStorage::VirusScanner::INFECTED)
integrity_error_blobs.in_batches.update_all(virus_scan_result: ActiveStorage::VirusScanner::INTEGRITY_ERROR)
safe_blobs_ids = safe_blobs.pluck(:id)
progress = ProgressReport.new(safe_blobs_ids.size)
safe_blobs_ids.in_groups_of(10_000) do |batch|
Migrations::BackfillVirusScanBlobsJob.perform_later(batch.compact)
progress.inc(batch.compact.size)
end
progress.finish
# Update task as completed. If you remove the line below, the task will
# run with every deploy (or every time you call after_party:run).
AfterParty::TaskRecord
.create version: AfterParty::TaskRecorder.new(__FILE__).timestamp
end
end

View file

@ -88,7 +88,7 @@ RSpec.describe Attachment::EditComponent, type: :component do
context 'when watermarking is done' do
before do
attachment.metadata['watermark'] = true
attachment.blob.touch(:watermarked_at)
end
it 'renders a complete downlaod interface with details to download the file' do
@ -115,7 +115,7 @@ RSpec.describe Attachment::EditComponent, type: :component do
context 'when watermarking is done' do
before do
attachment.metadata['watermark'] = true
attachment.blob.touch(:watermarked_at)
end
it 'renders a simple link to view file' do
@ -127,7 +127,7 @@ RSpec.describe Attachment::EditComponent, type: :component do
context 'with non nominal or final antivirus status' do
before do
champ.piece_justificative_file[0].blob.update(metadata: attachment.blob.metadata.merge(virus_scan_result: virus_scan_result))
champ.piece_justificative_file[0].blob.update(virus_scan_result:)
end
context 'when the anti-virus scan is pending' do

View file

@ -84,7 +84,7 @@ RSpec.describe Attachment::MultipleComponent, type: :component do
let(:created_at) { 1.second.ago }
before do
attached_file.attachments[0].blob.update(metadata: { virus_scan_result: ActiveStorage::VirusScanner::PENDING })
attached_file.attachments[0].blob.update(virus_scan_result: ActiveStorage::VirusScanner::PENDING)
attached_file.attachments[0].update!(created_at:)
end

View file

@ -39,7 +39,7 @@ RSpec.describe Attachment::PendingPollComponent, type: :component do
context "when waterkmark is done" do
before do
attachment.blob[:metadata] = { watermark: true }
attachment.blob.touch(:watermarked_at)
end
it "does not render" do
@ -48,7 +48,7 @@ RSpec.describe Attachment::PendingPollComponent, type: :component do
context "when antivirus is in progress" do
before do
attachment.blob[:metadata] = { virus_scan_result: ActiveStorage::VirusScanner::PENDING }
attachment.blob.virus_scan_result = ActiveStorage::VirusScanner::PENDING
end
it "renders" do

View file

@ -15,7 +15,7 @@ RSpec.describe Attachment::ShowComponent, type: :component do
subject { render_inline(component).to_html }
before do
champ.piece_justificative_file[0].blob.update(metadata: champ.piece_justificative_file[0].blob.metadata.merge(virus_scan_result: virus_scan_result))
attachment.blob.update(virus_scan_result:, metadata: attachment.blob.metadata.merge(virus_scan_result:))
end
context 'when there is no anti-virus scan' do

View file

@ -15,18 +15,6 @@ describe VirusScannerJob, type: :job do
end
it { expect(blob.virus_scanner.safe?).to be_truthy }
it { expect(blob.analyzed?).to be_truthy }
it { expect(blob.lock_version).to eq(2) }
end
context "should raise ActiveRecord::StaleObjectError" do
let(:blob_2) { ActiveStorage::Blob.find(blob.id) }
before do
blob_2.metadata[:virus_scan_result] = "infected"
blob.metadata[:virus_scan_result] = "safe"
blob.save
end
it { expect { blob_2.save }.to raise_error(ActiveRecord::StaleObjectError) }
end
context "when there is an integrity error" do

View file

@ -53,24 +53,23 @@ describe Champs::PieceJustificativeChamp do
describe '#for_api' do
let(:champ_pj) { create(:champ_piece_justificative) }
let(:metadata) { champ_pj.piece_justificative_file.first.blob.metadata }
before { champ_pj.piece_justificative_file.first.blob.update(metadata: metadata.merge(virus_scan_result: status)) }
before { champ_pj.piece_justificative_file.first.blob.update(virus_scan_result:) }
subject { champ_pj.for_api }
context 'when file is safe' do
let(:status) { ActiveStorage::VirusScanner::SAFE }
let(:virus_scan_result) { ActiveStorage::VirusScanner::SAFE }
it { is_expected.to include("/rails/active_storage/disk/") }
end
context 'when file is not scanned' do
let(:status) { ActiveStorage::VirusScanner::PENDING }
let(:virus_scan_result) { ActiveStorage::VirusScanner::PENDING }
it { is_expected.to include("/rails/active_storage/disk/") }
end
context 'when file is infected' do
let(:status) { ActiveStorage::VirusScanner::INFECTED }
let(:virus_scan_result) { ActiveStorage::VirusScanner::INFECTED }
it { is_expected.to be_nil }
end
end

View file

@ -280,7 +280,7 @@ describe 'The user' do
end
attachments.each {
_1.blob.metadata = { virus_scan_result: ActiveStorage::VirusScanner::SAFE }
_1.blob.virus_scan_result = ActiveStorage::VirusScanner::SAFE
_1.save!
}
expect(page).not_to have_text('Analyse antivirus en cours', wait: 10)

View file

@ -95,7 +95,7 @@ describe 'instructeurs/dossiers/state_button.html.haml', type: :view do
context 'with a justificatif' do
let(:dossier) do
dossier = create(:dossier, state, :with_justificatif)
dossier.justificatif_motivation.blob.update(metadata: dossier.justificatif_motivation.blob.metadata.merge(virus_scan_result: ActiveStorage::VirusScanner::SAFE))
dossier.justificatif_motivation.blob.update(virus_scan_result: ActiveStorage::VirusScanner::SAFE)
dossier
end