From 5739150f158bc22b90e429ae2c04b26a3e0ea6fb Mon Sep 17 00:00:00 2001 From: Martin Date: Mon, 28 Feb 2022 13:16:27 +0100 Subject: [PATCH] feat(service/archive_uploader): add an archive uploader class to upload files thru a custom script which handle file encryption of massive file (bigger than 4Go) Update doc/object-storange-and-data-encryption.md Co-authored-by: LeSim Update app/services/archive_uploader.rb Co-authored-by: LeSim Update doc/object-storange-and-data-encryption.md Co-authored-by: Pierre de La Morinerie clean(doc): align document file name and document h1 clean(review): refactore based on various comments clean(review): refactore based on various comments --- app/models/archive.rb | 2 +- app/services/archive_uploader.rb | 85 ++++++++++++++++++++++ app/services/procedure_archive_service.rb | 9 +-- config/env.example.optional | 13 ++++ doc/object-storange-and-data-encryption.md | 85 ++++++++++++++++++++++ spec/services/archive_uploader_spec.rb | 70 ++++++++++++++++++ 6 files changed, 257 insertions(+), 7 deletions(-) create mode 100644 app/services/archive_uploader.rb create mode 100644 doc/object-storange-and-data-encryption.md create mode 100644 spec/services/archive_uploader_spec.rb diff --git a/app/models/archive.rb b/app/models/archive.rb index 7ee93720f..42e0085ab 100644 --- a/app/models/archive.rb +++ b/app/models/archive.rb @@ -13,7 +13,7 @@ class Archive < ApplicationRecord include AASM - RETENTION_DURATION = 1.week + RETENTION_DURATION = 4.days has_and_belongs_to_many :groupe_instructeurs diff --git a/app/services/archive_uploader.rb b/app/services/archive_uploader.rb new file mode 100644 index 000000000..1fb039600 --- /dev/null +++ b/app/services/archive_uploader.rb @@ -0,0 +1,85 @@ +class ArchiveUploader + # see: https://docs.ovh.com/fr/storage/pcs/capabilities-and-limitations/#max_file_size-5368709122-5gb + # officialy it's 5Gb. but let's avoid to reach the exact spot of the limit + # when file size is bigger, active storage expects the chunks + a manifest. + MAX_FILE_SIZE_FOR_BACKEND_BEFORE_CHUNKING = ENV.fetch('ACTIVE_STORAGE_FILE_SIZE_THRESHOLD_BEFORE_CUSTOM_UPLOAD') { 4.gigabytes }.to_i + + def upload + uploaded_blob = create_and_upload_blob + begin + archive.file.purge if archive.file.attached? + rescue ActiveStorage::FileNotFoundError + archive.file.destroy + archive.file.detach + end + archive.reload + ActiveStorage::Attachment.create( + name: 'file', + record_type: 'Archive', + record_id: archive.id, + blob_id: uploaded_blob.id + ) + end + + private + + attr_reader :procedure, :archive, :filepath + + def create_and_upload_blob + if active_storage_service_local? || File.size(filepath) < MAX_FILE_SIZE_FOR_BACKEND_BEFORE_CHUNKING + upload_with_active_storage + else + upload_with_chunking_wrapper + end + end + + def active_storage_service_local? + Rails.application.config.active_storage.service == :local + end + + def upload_with_active_storage + params = blob_default_params(filepath).merge(io: File.open(filepath), + identify: false) + blob = ActiveStorage::Blob.create_and_upload!(**params) + return blob + end + + def upload_with_chunking_wrapper + params = blob_default_params(filepath).merge(byte_size: File.size(filepath), + checksum: Digest::SHA256.file(filepath).hexdigest) + blob = ActiveStorage::Blob.create_before_direct_upload!(**params) + if syscall_to_custom_uploader(blob) + return blob + else + blob.purge + fail "custom archive attachment failed, should it be retried ?" + end + end + + # keeps consistency between ActiveStorage api calls (otherwise archives are not storaged in '/archives') : + # - create_and_upload, blob is attached by active storage + # - upload_with_chunking_wrapper, blob is attached by custom script + def blob_default_params(filepath) + { + key: namespaced_object_key, + filename: archive.filename(procedure), + content_type: 'application/zip', + metadata: { virus_scan_result: ActiveStorage::VirusScanner::SAFE } + } + end + + # explicitely memoize so it keeps its consistency across many calls (Ex: retry) + def namespaced_object_key + @namespaced_object_key ||= "archives/#{Date.today.strftime("%Y-%m-%d")}/#{SecureRandom.uuid}" + end + + def syscall_to_custom_uploader(blob) + system(ENV.fetch('ACTIVE_STORAGE_BIG_FILE_UPLOADER_WITH_ENCRYPTION_PATH').to_s, filepath, blob.key, exception: true) + end + + def initialize(procedure:, archive:, filepath:) + @procedure = procedure + @archive = archive + @filepath = filepath + end +end diff --git a/app/services/procedure_archive_service.rb b/app/services/procedure_archive_service.rb index f5561d426..3fd4c7211 100644 --- a/app/services/procedure_archive_service.rb +++ b/app/services/procedure_archive_service.rb @@ -34,12 +34,9 @@ class ProcedureArchiveService end attachments = create_list_of_attachments(dossiers) - download_and_zip(attachments) do |zip_file| - archive.file.attach( - io: File.open(zip_file), - filename: archive.filename(@procedure), - metadata: { virus_scan_result: ActiveStorage::VirusScanner::SAFE } - ) + download_and_zip(attachments) do |zip_filepath| + ArchiveUploader.new(procedure: @procedure, archive: archive, filepath: zip_filepath) + .upload end archive.make_available! InstructeurMailer.send_archive(instructeur, @procedure, archive).deliver_later diff --git a/config/env.example.optional b/config/env.example.optional index 615162f9e..51b05c16d 100644 --- a/config/env.example.optional +++ b/config/env.example.optional @@ -99,3 +99,16 @@ MATOMO_IFRAME_URL="https://matomo.example.org/index.php?module=CoreAdminHome&act # Landing page sections # LANDING_TESTIMONIALS_ENABLED="enabled" # LANDING_USERS_ENABLED="enabled" + +# Archive creation options +# when we create an archive of a Procedure, the worker uses this directory as a root in order to build our archives (archive are build within a tmp_dir in this dir) +# ARCHIVE_CREATION_DIR='/tmp' +# max parallel download when creating an archive +# ARCHIVE_DOWNLOAD_MAX_PARALLEL=10 + +# Archive when encryption of massive file options +# depending on your object storage backend (ie: aws::s3/ovh::object_storage), it may requires a custom upload strategy for big file if you encrypt your files in case of data breach +# suggested value is 4.gigabytes (4294967296) +# ACTIVE_STORAGE_FILE_SIZE_THRESHOLD_BEFORE_CUSTOM_UPLOAD=4294967296 +# a custom script handling upload of big file +# ACTIVE_STORAGE_BIG_FILE_UPLOADER_WITH_ENCRYPTION_PATH='/usr/local/bin/swift' diff --git a/doc/object-storange-and-data-encryption.md b/doc/object-storange-and-data-encryption.md new file mode 100644 index 000000000..f3bf4a395 --- /dev/null +++ b/doc/object-storange-and-data-encryption.md @@ -0,0 +1,85 @@ +# Object Storange And Data Encryption + +## Object Storage + +By default, demarches-simplifiees.fr uses an [OVH Object Storage](https://www.ovhcloud.com/en/public-cloud/object-storage/) backend. The hard-drives are encrypted at rest, but to protect user files even better, demarches-simplifiees.fr can also use an external encryption proxy, that will encrypt and decrypt files on the fly: + +* Encryption is done via our [proxy](https://github.com/betagouv/ds_proxy) when the file is uploaded by a client. +* Decryption is done via the same proxy when the file is downloaded to a client + +### Object Storage limitation + +As an s3 compatible object storage backend, OVH Object Storage suffers the same limitations. + +One of them being that when you upload a file bigger than 5Go, it must be chunked into segments (see the [documentation](https://docs.ovh.com/fr/storage/pcs/capabilities-and-limitations/#max_file_size-5368709122-5gb)). + +This process to chunks the file in segment, then re-arrange it via a manifest. Unfortunately encryption can't work with this usecase. + +So we are using a custom script that wraps two call to our proxy in order to buffer all the chunks, encrypt/decrypt the whole. Here is an example + +``` +#!/usr/bin/env bash +# wrapper script to encrypt and upload file received from archive + +set -o errexit +set -o pipefail +set -o nounset + +# params +# 1: filename +# 2: key +if ! [ "$#" -eq 2 ]; then + echo "usage: $0 " + exit 1 +fi +local_file_path=$1 +remote_basename=$(basename $local_file_path) +key=$2 + +# encrypt +curl -s -XPUT http://ds_proxy_host:ds_proxy_port/local/encrypt/${remote_basename} --data-binary @${local_file_path} + +# get back encrypted file +encrypted_filename="${local_file_path}.enc" +curl -s http://ds_proxy_host:ds_proxy_port/local/encrypt/${remote_basename} -o ${encrypted_filename} + +# OVH openstack params +os_tenant_name=os_tenant_name +os_username=os_username +os_password=os_password +os_region_name=GRA + +# auth = https://auth.cloud.ovh.net/v3/ +# use haproxy proxy and not direct internet URL +os_auth_url="os_auth_url" +os_storage_url="os_storage_url" \ +container_name=container_name + +expiring_delay="$((60 * 60 * 24 * 4))" # 4 days + +# upload +/usr/local/bin/swift \ + --auth-version 3 \ + --os-auth-url "$os_auth_url" \ + --os-storage-url "$os_storage_url" \ + --os-region-name "$os_region_name" \ + --os-tenant-name "$os_tenant_name" \ + --os-username "$os_username" \ + --os-password "$os_password" \ + upload \ + --header "X-Delete-After: ${expiring_delay}" \ + --segment-size "$((3 * 1024 * 1024 * 1024))" \ + --header "Content-Disposition: filename=${remote_basename}" \ + --object-name "${key}" \ + "${container_name}" "${encrypted_filename}" + +swift_exit_code=$? + +# cleanup +rm ${encrypted_filename} + +# return swift return code +exit ${swift_exit_code} +``` + + diff --git a/spec/services/archive_uploader_spec.rb b/spec/services/archive_uploader_spec.rb new file mode 100644 index 000000000..c4b4bcfac --- /dev/null +++ b/spec/services/archive_uploader_spec.rb @@ -0,0 +1,70 @@ +describe ProcedureArchiveService do + let(:procedure) { build(:procedure) } + let(:archive) { create(:archive) } + let(:file) { Tempfile.new } + let(:fixture_blob) { ActiveStorage::Blob.create_before_direct_upload!(filename: File.basename(file.path), byte_size: file.size, checksum: 'osf') } + + let(:uploader) { ArchiveUploader.new(procedure: procedure, archive: archive, filepath: file.path) } + + describe '.upload' do + context 'when active storage service is local' do + it 'uploads with upload_with_active_storage' do + expect(uploader).to receive(:active_storage_service_local?).and_return(true) + expect(uploader).to receive(:upload_with_active_storage).and_return(fixture_blob) + uploader.upload + end + + it 'link the created blob as an attachment to the current archive instance' do + expect { uploader.upload } + .to change { ActiveStorage::Attachment.where(name: 'file', record_type: 'Archive', record_id: archive.id).count }.by(1) + end + end + + context 'when active storage service is not local' do + before do + expect(uploader).to receive(:active_storage_service_local?).and_return(false) + expect(File).to receive(:size).with(file.path).and_return(filesize) + end + + context 'when file is smaller than MAX_FILE_SIZE_FOR_BACKEND_BEFORE_CHUNKING' do + let(:filesize) { ArchiveUploader::MAX_FILE_SIZE_FOR_BACKEND_BEFORE_CHUNKING - 1 } + + it 'uploads with upload_with_active_storage' do + expect(uploader).to receive(:upload_with_active_storage).and_return(fixture_blob) + uploader.upload + end + end + + context 'when file is bigger than MAX_FILE_SIZE_FOR_BACKEND_BEFORE_CHUNKING' do + let(:filesize) { ArchiveUploader::MAX_FILE_SIZE_FOR_BACKEND_BEFORE_CHUNKING + 1 } + + it 'uploads with upload_with_chunking_wrapper' do + expect(uploader).to receive(:upload_with_chunking_wrapper).and_return(fixture_blob) + uploader.upload + end + + it 'link the created blob as an attachment to the current archive instance' do + expect(uploader).to receive(:upload_with_chunking_wrapper).and_return(fixture_blob) + expect { uploader.upload } + .to change { ActiveStorage::Attachment.where(name: 'file', record_type: 'Archive', record_id: archive.id).count }.by(1) + end + end + end + end + + describe '.upload_with_chunking_wrapper' do + let(:fake_blob_checksum) { Digest::SHA256.file(file.path) } + let(:fake_blob_bytesize) { 100.gigabytes } + + before do + expect(uploader).to receive(:syscall_to_custom_uploader).and_return(true) + expect(File).to receive(:size).with(file.path).and_return(fake_blob_bytesize) + expect(Digest::SHA256).to receive(:file).with(file.path).and_return(double(hexdigest: fake_blob_checksum.hexdigest)) + end + + it 'creates a blob' do + expect { uploader.send(:upload_with_chunking_wrapper) } + .to change { ActiveStorage::Blob.where(checksum: fake_blob_checksum.hexdigest, byte_size: fake_blob_bytesize).count }.by(1) + end + end +end