feat(tags): replace regexp based parser with a parser combinator
This commit is contained in:
parent
32cdff820f
commit
432c4690b5
5 changed files with 137 additions and 52 deletions
1
Gemfile
1
Gemfile
|
@ -59,6 +59,7 @@ gem 'net-imap', require: false # See https://github.com/mikel/mail/pull/1439
|
|||
gem 'net-pop', require: false # same
|
||||
gem 'net-smtp', require: false # same
|
||||
gem 'openid_connect'
|
||||
gem 'parsby'
|
||||
gem 'pg'
|
||||
gem 'phonelib'
|
||||
gem 'prawn-rails' # PDF Generation
|
||||
|
|
|
@ -467,6 +467,7 @@ GEM
|
|||
webfinger (>= 1.0.1)
|
||||
orm_adapter (0.5.0)
|
||||
parallel (1.22.1)
|
||||
parsby (1.1.1)
|
||||
parser (3.1.2.0)
|
||||
ast (~> 2.4.1)
|
||||
pdf-core (0.9.0)
|
||||
|
@ -877,6 +878,7 @@ DEPENDENCIES
|
|||
net-pop
|
||||
net-smtp
|
||||
openid_connect
|
||||
parsby
|
||||
pg
|
||||
phonelib
|
||||
prawn-rails
|
||||
|
|
|
@ -4,6 +4,45 @@ module TagsSubstitutionConcern
|
|||
include Rails.application.routes.url_helpers
|
||||
include ActionView::Helpers::UrlHelper
|
||||
|
||||
module TagsParser
|
||||
include Parsby::Combinators
|
||||
extend self
|
||||
|
||||
def parse(io)
|
||||
doc.parse io
|
||||
end
|
||||
|
||||
define_combinator :doc do
|
||||
many(tag | text) < eof
|
||||
end
|
||||
|
||||
define_combinator :text do
|
||||
join(many(any_char.that_fail(tag))).fmap do |str|
|
||||
{ text: str.force_encoding('utf-8').encode }
|
||||
end
|
||||
end
|
||||
|
||||
define_combinator :tag do
|
||||
between(tag_delimiter, tag_delimiter, tag_text).fmap do |tag|
|
||||
{ tag: tag }
|
||||
end
|
||||
end
|
||||
|
||||
define_combinator :tag_delimiter do
|
||||
lit('--')
|
||||
end
|
||||
|
||||
define_combinator :tag_text do
|
||||
join(many(any_char.that_fail(tag_delimiter | eol))).fmap do |str|
|
||||
str.force_encoding('utf-8').encode
|
||||
end
|
||||
end
|
||||
|
||||
define_combinator :eol do
|
||||
lit("\r\n") | lit("\n")
|
||||
end
|
||||
end
|
||||
|
||||
DOSSIER_TAGS = [
|
||||
{
|
||||
libelle: 'motivation',
|
||||
|
@ -141,8 +180,6 @@ module TagsSubstitutionConcern
|
|||
|
||||
SHARED_TAG_LIBELLES = (DOSSIER_TAGS + DOSSIER_TAGS_FOR_MAIL + INDIVIDUAL_TAGS + ENTREPRISE_TAGS + ROUTAGE_TAGS).map { |tag| tag[:libelle] }
|
||||
|
||||
TAG_DELIMITERS_REGEX = /--(?<capture>((?!--).)*)--/
|
||||
|
||||
def tags
|
||||
if procedure.for_individual?
|
||||
identity_tags = INDIVIDUAL_TAGS
|
||||
|
@ -159,7 +196,7 @@ module TagsSubstitutionConcern
|
|||
end
|
||||
|
||||
def used_type_de_champ_tags(text)
|
||||
used_tags_for(text, with_libelle: true).filter_map do |(tag, libelle)|
|
||||
used_tags_and_libelle_for(text).filter_map do |(tag, libelle)|
|
||||
if !tag.in?(SHARED_TAG_LIBELLES)
|
||||
if tag.start_with?('tdc')
|
||||
[libelle, tag.gsub('tdc', '').to_i]
|
||||
|
@ -170,19 +207,8 @@ module TagsSubstitutionConcern
|
|||
end
|
||||
end
|
||||
|
||||
def used_tags_for(text, with_libelle: false)
|
||||
text, tags = normalize_tags(text)
|
||||
text
|
||||
.scan(TAG_DELIMITERS_REGEX)
|
||||
.flatten
|
||||
.map do |tag_str|
|
||||
if with_libelle
|
||||
tag = tags.find { |tag| tag[:id] == tag_str }
|
||||
[tag_str, tag ? tag[:libelle] : nil]
|
||||
else
|
||||
tag_str
|
||||
end
|
||||
end
|
||||
def used_tags_for(text)
|
||||
used_tags_and_libelle_for(text).map { |(tag, _)| tag }
|
||||
end
|
||||
|
||||
private
|
||||
|
@ -250,7 +276,7 @@ module TagsSubstitutionConcern
|
|||
return ''
|
||||
end
|
||||
|
||||
text, _ = normalize_tags(text)
|
||||
tokens = parse_tags(text)
|
||||
|
||||
tags_and_datas = [
|
||||
[champ_public_tags(dossier: dossier), dossier.champs],
|
||||
|
@ -259,51 +285,68 @@ module TagsSubstitutionConcern
|
|||
[ROUTAGE_TAGS, dossier],
|
||||
[INDIVIDUAL_TAGS, dossier.individual],
|
||||
[ENTREPRISE_TAGS, dossier.etablissement&.entreprise]
|
||||
]
|
||||
|
||||
tags_and_datas
|
||||
.map { |(tags, data)| [filter_tags(tags), data] }
|
||||
.reduce(text) { |acc, (tags, data)| replace_tags_with_values_from_data(acc, tags, data) }
|
||||
].filter_map do |(tags, data)|
|
||||
data && [filter_tags(tags).index_by { _1[:id].presence || _1[:libelle] }, data]
|
||||
end
|
||||
|
||||
def replace_tags_with_values_from_data(text, tags, data)
|
||||
if data.present?
|
||||
tags.reduce(text) do |acc, tag|
|
||||
replace_tag(acc, tag, data)
|
||||
end
|
||||
tags_and_datas.reduce(tokens) do |tokens, (tags, data)|
|
||||
# Replace tags with their value
|
||||
tokens.map do |token|
|
||||
case token
|
||||
in { tag: _, id: id } if tags.key?(id)
|
||||
{ text: replace_tag(tags.fetch(id), data) }
|
||||
in { tag: tag } if tags.key?(tag)
|
||||
{ text: replace_tag(tags.fetch(tag), data) }
|
||||
else
|
||||
token
|
||||
end
|
||||
end
|
||||
end.map do |token|
|
||||
# Get tokens text representation
|
||||
case token
|
||||
in { tag: tag }
|
||||
"--#{tag}--"
|
||||
in { text: text }
|
||||
text
|
||||
end
|
||||
end.join('')
|
||||
end
|
||||
|
||||
def replace_tag(text, tag, data)
|
||||
libelle = Regexp.quote(tag[:id].presence || tag[:libelle])
|
||||
|
||||
# allow any kind of space (non-breaking or other) in the tag’s libellé to match any kind of space in the template
|
||||
# (the '\\ |' is there because plain ASCII spaces were escaped by preceding Regexp.quote)
|
||||
libelle.gsub!(/\\ |[[:blank:]]/, "[[:blank:]]")
|
||||
|
||||
def replace_tag(tag, data)
|
||||
if tag.key?(:target)
|
||||
value = data.send(tag[:target])
|
||||
data.public_send(tag[:target])
|
||||
else
|
||||
value = instance_exec(data, &tag[:lambda])
|
||||
instance_exec(data, &tag[:lambda])
|
||||
end
|
||||
end
|
||||
|
||||
text.gsub(/--#{libelle}--/, value.to_s)
|
||||
def procedure_types_de_champ_tags
|
||||
filter_tags(types_de_champ_tags(procedure.types_de_champ_public_for_tags, Dossier::SOUMIS) + types_de_champ_tags(procedure.types_de_champ_private_for_tags, Dossier::INSTRUCTION_COMMENCEE))
|
||||
end
|
||||
|
||||
def normalize_tags(text)
|
||||
tags = types_de_champ_tags(procedure.types_de_champ_public_for_tags, Dossier::SOUMIS) + types_de_champ_tags(procedure.types_de_champ_private_for_tags, Dossier::INSTRUCTION_COMMENCEE)
|
||||
[filter_tags(tags).reduce(text) { |text, tag| normalize_tag(text, tag) }, tags]
|
||||
def parse_tags(text)
|
||||
tags = procedure_types_de_champ_tags.index_by { _1[:libelle] }
|
||||
|
||||
TagsParser.parse(text).map do |token|
|
||||
case token
|
||||
in { tag: tag } if tags.key?(tag)
|
||||
{ tag: tag, id: tags.fetch(tag).fetch(:id) }
|
||||
else
|
||||
token
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def normalize_tag(text, tag)
|
||||
libelle = Regexp.quote(tag[:libelle])
|
||||
|
||||
# allow any kind of space (non-breaking or other) in the tag’s libellé to match any kind of space in the template
|
||||
# (the '\\ |' is there because plain ASCII spaces were escaped by preceding Regexp.quote)
|
||||
libelle.gsub!(/\\ |[[:blank:]]/, "[[:blank:]]")
|
||||
|
||||
text.gsub(/--#{libelle}--/, "--#{tag[:id]}--")
|
||||
def used_tags_and_libelle_for(text)
|
||||
parse_tags(text).filter_map do |token|
|
||||
case token
|
||||
in { tag: tag, id: id }
|
||||
[id, tag]
|
||||
in { tag: tag }
|
||||
[tag]
|
||||
else
|
||||
nil
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
@ -29,4 +29,28 @@ namespace :benchmarks do
|
|||
x.report("Démarche 55824") { PiecesJustificativesService.generate_dossier_export(p_55824.dossiers.limit(10_000)) }
|
||||
end
|
||||
end
|
||||
|
||||
desc 'Attestation Template parser'
|
||||
task attestation_template_parser: :environment do
|
||||
progress = ProgressReport.new(AttestationTemplate.count)
|
||||
AttestationTemplate.find_each do |template|
|
||||
parsed = TagsSubstitutionConcern::TagsParser.parse(template.body)
|
||||
serialized = parsed.map do |token|
|
||||
case token
|
||||
in { tag: tag }
|
||||
"--#{tag}--"
|
||||
in { text: text }
|
||||
text
|
||||
end
|
||||
end.join('')
|
||||
if serialized != template.body
|
||||
throw "Template '#{serialized}' is not eq '#{template.body}' with attestation template #{template.id}"
|
||||
end
|
||||
progress.inc
|
||||
rescue => e
|
||||
pp "Error with attestation template #{template.id}"
|
||||
throw e
|
||||
end
|
||||
progress.finish
|
||||
end
|
||||
end
|
||||
|
|
|
@ -383,13 +383,13 @@ describe TagsSubstitutionConcern, type: :model do
|
|||
|
||||
context 'when generating a document for a dossier that is not termine' do
|
||||
let(:dossier) { create(:dossier) }
|
||||
let(:template) { '--motivation-- --date de décision--' }
|
||||
let(:template) { 'text --motivation-- --date de décision--' }
|
||||
let(:state) { Dossier.states.fetch(:en_instruction) }
|
||||
|
||||
subject { template_concern.send(:replace_tags, template, dossier) }
|
||||
|
||||
it "does not treat motivation or date de décision as tags" do
|
||||
is_expected.to eq('--motivation-- --date de décision--')
|
||||
is_expected.to eq('text --motivation-- --date de décision--')
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -495,4 +495,19 @@ describe TagsSubstitutionConcern, type: :model do
|
|||
|
||||
it { is_expected.to eq([["public", procedure.draft_revision.types_de_champ.first.stable_id], ['yolo']]) }
|
||||
end
|
||||
|
||||
describe 'parser' do
|
||||
it do
|
||||
tokens = TagsSubstitutionConcern::TagsParser.parse("hello world --public--, --numéro du dossier--, un test--yolo-- encore du text\n---\n encore du text")
|
||||
expect(tokens).to eq([
|
||||
{ text: "hello world " },
|
||||
{ tag: "public" },
|
||||
{ text: ", " },
|
||||
{ tag: "numéro du dossier" },
|
||||
{ text: ", un test" },
|
||||
{ tag: "yolo" },
|
||||
{ text: " encore du text\n" + "---\n" + " encore du text" }
|
||||
])
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in a new issue