Merge pull request #7576 from betagouv/opendata-publish

[opendata] job that publish opendata demarches to datagouv
This commit is contained in:
krichtof 2022-07-21 22:02:14 +02:00 committed by GitHub
commit df7e5256ea
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 208 additions and 22 deletions

View file

@ -3,6 +3,10 @@ class Cron::CronJob < ApplicationJob
class_attribute :schedule_expression
class << self
def schedulable?
true
end
def schedule
remove if cron_expression_changed?
set(cron: cron_expression).perform_later if !scheduled?

View file

@ -0,0 +1,22 @@
class Cron::Datagouv::ExportAndPublishDemarchesPubliquesJob < Cron::CronJob
self.schedule_expression = "every month at 3:00"
def perform(*args)
gzip_filepath = [
'tmp/',
Time.zone.now.to_formatted_s(:number),
'-demarches.json.gz'
].join
begin
DemarchesPubliquesExportService.new(gzip_filepath).call
APIDatagouv::API.upload(gzip_filepath)
ensure
FileUtils.rm(gzip_filepath)
end
end
def self.schedulable?
ENV.fetch('OPENDATA_ENABLED', nil) == 'enabled'
end
end

View file

@ -0,0 +1,47 @@
class APIDatagouv::API
class RequestFailed < StandardError
def initialize(url, response)
msg = <<-TEXT
HTTP error code: #{response.code}
#{response.body}
TEXT
super(msg)
end
end
class << self
def upload(path)
io = File.new(path, 'r')
response = Typhoeus.post(
datagouv_upload_url,
body: {
file: io
},
headers: { "X-Api-Key" => datagouv_secret[:api_key] }
)
io.close
if response.success?
response.body
else
raise RequestFailed.new(datagouv_upload_url, response)
end
end
private
def datagouv_upload_url
[
datagouv_secret[:api_url],
"/datasets/", datagouv_secret[:descriptif_demarches_dataset],
"/resources/", datagouv_secret[:descriptif_demarches_resource],
"/upload/"
].join
end
def datagouv_secret
Rails.application.secrets.datagouv
end
end
end

View file

@ -1,27 +1,34 @@
class DemarchesPubliquesExportService
attr_reader :io
def initialize(io)
@io = io
attr_reader :gzip_filename
def initialize(gzip_filename)
@gzip_filename = gzip_filename
end
def call
Zlib::GzipWriter.open(gzip_filename) do |gz|
generate_json(gz)
end
end
private
def generate_json(io)
end_cursor = nil
first = true
write_array_opening
write_array_opening(io)
loop do
write_demarches_separator if !first
write_demarches_separator(io) if !first
execute_query(cursor: end_cursor)
end_cursor = last_cursor
io.write(jsonify(demarches))
first = false
break if !has_next_page?
end
write_array_closing
write_array_closing(io)
io.close
end
private
def execute_query(cursor: nil)
result = API::V2::Schema.execute(query, variables: { cursor: cursor }, context: { internal_use: true })
raise DemarchesPubliquesExportService::Error.new(result["errors"]) if result["errors"]
@ -83,15 +90,15 @@ class DemarchesPubliquesExportService
demarches.map(&:to_json).join(',')
end
def write_array_opening
def write_array_opening(io)
io.write('[')
end
def write_array_closing
def write_array_closing(io)
io.write(']')
end
def write_demarches_separator
def write_demarches_separator(io)
io.write(',')
end
end

View file

@ -140,3 +140,11 @@ VITE_LEGACY=""
# around july 2022, we changed the duree_conservation_dossiers_dans_ds, allow instances to choose their own duration
NEW_MAX_DUREE_CONSERVATION=12
#
OPENDATA_ENABLED="enabled"
# Publish to datagouv
DATAGOUV_API_KEY="thisisasecret"
DATAGOUV_API_URL="https://www.data.gouv.fr/api/1"
DATAGOUV_DESCRIPTIF_DEMARCHES_DATASET="datasetid"
DATAGOUV_DESCRIPTIF_DEMARCHES_RESOURCE="resourceid"

View file

@ -78,7 +78,11 @@ defaults: &defaults
api_geo_url: <%= ENV['API_GEO_URL'] %>
api_adresse_url: <%= ENV['API_ADRESSE_URL'] %>
api_education_url: <%= ENV['API_EDUCATION_URL'] %>
datagouv:
api_key: <%= ENV['DATAGOUV_API_KEY'] %>
api_url: <%= ENV['DATAGOUV_API_URL'] %>
descriptif_demarches_dataset: <%= ENV['DATAGOUV_DESCRIPTIF_DEMARCHES_DATASET'] %>
descriptif_demarches_resource: <%= ENV['DATAGOUV_DESCRIPTIF_DEMARCHES_RESOURCE'] %>
development:
@ -109,6 +113,11 @@ test:
userpwd: 'fake:fake'
autocomplete:
api_geo_url: /test/api_geo
datagouv:
api_key: "clesecrete"
api_url: "https://www.data.gouv.fr/api/1"
descriptif_demarches_dataset: "ethopundataset"
descriptif_demarches_resource: "etbimuneressource"
# Do not keep production secrets in the repository,
# instead read values from the environment.

View file

@ -1,15 +1,17 @@
namespace :jobs do
desc 'Schedule all cron jobs'
desc 'Schedule all schedulable cron jobs'
task schedule: :environment do
glob = Rails.root.join('app', 'jobs', '**', '*_job.rb')
Dir.glob(glob).each { |f| require f }
Cron::CronJob.subclasses.each(&:schedule)
schedulable_jobs.each(&:schedule)
end
desc 'Display schedule for all cron jobs'
desc 'Display schedule for all schedulable cron jobs'
task display_schedule: :environment do
schedulable_jobs.each(&:display_schedule)
end
def schedulable_jobs
glob = Rails.root.join('app', 'jobs', '**', '*_job.rb')
Dir.glob(glob).each { |f| require f }
Cron::CronJob.subclasses.each(&:display_schedule)
Cron::CronJob.subclasses.filter(&:schedulable?)
end
end

View file

@ -0,0 +1,7 @@
RSpec.describe Cron::Datagouv::ExportAndPublishDemarchesPubliquesJob, type: :job do
describe '#schedulable?' do
it 'is schedulable by default' do
expect(Cron::CronJob.schedulable?).to be_truthy
end
end
end

View file

@ -0,0 +1,43 @@
RSpec.describe Cron::Datagouv::ExportAndPublishDemarchesPubliquesJob, type: :job do
let!(:procedure) { create(:procedure, :published, :with_service, :with_type_de_champ) }
let(:status) { 200 }
let(:body) { "ok" }
let(:stub) { stub_request(:post, /https:\/\/www.data.gouv.fr\/api\/.*\/upload\//) }
describe 'perform' do
before do
stub
end
subject { Cron::Datagouv::ExportAndPublishDemarchesPubliquesJob.perform_now }
it 'send POST request to datagouv' do
subject
expect(stub).to have_been_requested
end
it 'removes gzip file even if an error occured' do
procedure.libelle = nil
procedure.save(validate: false)
expect { subject }.to raise_error(StandardError)
expect(Dir.glob("*demarches.json.gz", base: 'tmp').empty?).to be_truthy
end
end
describe '#schedulable?' do
context "when ENV['OPENDATA_ENABLED'] == 'enabled'" do
it 'is schedulable' do
ENV['OPENDATA_ENABLED'] = 'enabled'
expect(Cron::Datagouv::ExportAndPublishDemarchesPubliquesJob.schedulable?).to be_truthy
end
end
context "when ENV['OPENDATA_ENABLED'] != 'enabled'" do
it 'is schedulable' do
ENV['OPENDATA_ENABLED'] = nil
expect(Cron::Datagouv::ExportAndPublishDemarchesPubliquesJob.schedulable?).to be_falsy
end
end
end
end

View file

@ -0,0 +1,28 @@
describe APIDatagouv::API do
describe '#upload' do
let(:subject) { APIDatagouv::API.upload(Tempfile.new.path) }
before do
stub_request(:post, /https:\/\/www.data.gouv.fr\/api/)
.to_return(body: body, status: status)
end
context "when response ok" do
let(:status) { 200 }
let(:body) { "ok" }
it 'returns body response' do
expect(subject).to eq body
end
end
context "when responds with error" do
let(:status) { 400 }
let(:body) { "oops ! There is a problem..." }
it 'raise error' do
expect { subject }.to raise_error(APIDatagouv::API::RequestFailed)
end
end
end
end

View file

@ -1,7 +1,9 @@
describe DemarchesPubliquesExportService do
let(:procedure) { create(:procedure, :published, :with_service, :with_type_de_champ) }
let!(:dossier) { create(:dossier, procedure: procedure) }
let(:io) { StringIO.new }
let(:gzip_filename) { "demarches.json.gz" }
after { FileUtils.rm(gzip_filename) }
describe 'call' do
it 'generate json for all closed procedures' do
@ -31,17 +33,24 @@ describe DemarchesPubliquesExportService do
]
}
}
DemarchesPubliquesExportService.new(gzip_filename).call
DemarchesPubliquesExportService.new(io).call
expect(JSON.parse(io.string)[0]
expect(JSON.parse(deflat_gzip(gzip_filename))[0]
.deep_symbolize_keys)
.to eq(expected_result)
end
it 'raises exception when procedure with bad data' do
procedure.libelle = nil
procedure.save(validate: false)
expect { DemarchesPubliquesExportService.new(io).call }.to raise_error(DemarchesPubliquesExportService::Error)
expect { DemarchesPubliquesExportService.new(gzip_filename).call }.to raise_error(DemarchesPubliquesExportService::Error)
end
end
def deflat_gzip(gzip_filename)
Zlib::GzipReader.open(gzip_filename) do |gz|
return gz.read
end
end
end