openstreetmap-website/script/locale/diff
2009-06-26 10:48:30 +00:00

286 lines
7.1 KiB
Perl
Executable file

#!/usr/bin/env perl
use feature ':5.10';
use strict;
use warnings;
use YAML::Syck qw(Load LoadFile);
use Test::Differences;
use Pod::Usage ();
use Getopt::Long ();
=head1 NAME
locale-diff - Compare two YAML files and print how their datastructures differ
=head1 SYNOPSIS
# --keys is the default
diff en.yml is.yml
diff --keys en.yml is.yml
# --untranslated-values compares prints keys whose values don't differ
diff --untranslated-values en.yml is.yml
# --untranslated-values-all compares prints keys whose values
# don't differ. Ignoring the blacklist which prunes things
# unlikley to be translated
diff --untranslated-values-all en.yml is.yml
# Check that interpolated variables ({{var}} and [[var]]) are the same
diff --validate-variables en.yml is.yml
=head1 DESCRIPTION
This utility prints the differences between two YAML files using
L<Test::Differences>. The purpose of it is to diff the files is
F<config/locales> to find out what keys need to be added to the
translated files when F<en.yml> changes.
=head1 OPTIONS
=over
=item -h, --help
Print this help message.
=item --keys
Show the hash keys that differ between the two files, useful merging
new entries from F<en.yml> to a local file.
=item --untranslated-values
Show keys whose values are either exactly the same between the two
files, or don't exist in the target file (the latter file
specified). The values are pruned according to global and language
specific blacklists found in the C<__DATA__> section of this script.
This helps to find untranslated values.
=item --untranslated-values-all
Like C<--untranslated-values> but ignores blacklists.
=item --validate-variables
Check that interpolated Ruby i18n variables (C<{{foo}}> and
C<[[foo]]>) are equivalent in the two provided files.
=back
=head1 AUTHOR
E<AElig>var ArnfjE<ouml>rE<eth> Bjarmason <avar@f-prot.com>
=cut
# Get the command-line options
Getopt::Long::Parser->new(
config => [ qw< bundling no_ignore_case no_require_order pass_through > ],
)->getoptions(
'h|help' => \my $help,
'keys' => \my $keys,
'untranslated-values' => \my $untranslated_values,
'untranslated-values-all' => \my $untranslated_values_all,
'validate-variables' => \my $validate_variables,
) or help();
# --keys is the default
$keys = 1 if not $untranslated_values_all and not $untranslated_values and not $validate_variables;
# On --help
help() if $help;
# If we're not given two .yml files
help() if @ARGV != 2 or (!-f $ARGV[0] or !-f $ARGV[1]);
my ($from, $to) = @ARGV;
my $from_data = LoadFile($from);
my $to_data = LoadFile($to);
my $from_parsed = { iterate($from_data->{basename($from)}) };
my $to_parsed = { iterate($to_data->{basename($to)}) };
if ($keys)
{
print_key_differences($from_parsed, $to_parsed);
}
elsif ($untranslated_values or $untranslated_values_all)
{
my @untranslated = untranslated_keys($from_parsed, $to_parsed);
# Prune according to blacklist
if ($untranslated_values) {
@untranslated = prune_untranslated_with_blacklist(basename($to), @untranslated);
}
say for @untranslated;
} elsif ($validate_variables)
{
print_validate_variables($from_parsed, $to_parsed);
}
exit 0;
sub print_key_differences
{
my ($f, $t) = @_;
# Hack around Test::Differences wanting a Test::* module loaded
$INC{"Test.pm"} = 1;
sub Test::ok { print shift }
# Diff the tree
eq_or_diff([ sort keys %$f ], [ sort keys %$t ]);
}
sub untranslated_keys
{
my ($from_parsed, $to_parsed) = @_;
sort grep { not exists $to_parsed->{$_} or $from_parsed->{$_} eq $to_parsed->{$_} } keys %$from_parsed;
}
sub prune_untranslated_with_blacklist
{
my ($language, @keys) = @_;
my %keys;
@keys{@keys} = ();
my $end_yaml = Load(join '', <DATA>);
my $untranslated_values = $end_yaml->{untranslated_values};
my $default = $untranslated_values->{default};
my $this_language = $untranslated_values->{$language} || {};
my %bw_list = (%$default, %$this_language);
while (my ($key, $blacklisted) = each %bw_list)
{
# FIXME: Does syck actually support true/false booleans in yaml?
delete $keys{$key} if $blacklisted eq 'true'
}
sort keys %keys;
}
sub print_validate_variables
{
my ($f, $t) = @_;
while (my ($key, $val) = each %$f)
{
next if exists $f->{$key} and not exists $t->{$key};
my @from_var = parse_variables_from_string($f->{$key});
my @to_var = parse_variables_from_string($t->{$key});
unless (@from_var ~~ @to_var) {
say "$key in $from has (@from_var) and $to has (@to_var)";
}
}
}
sub parse_variables_from_string
{
my ($string) = @_;
# This probably matches most of the variables
my $var = qr/ [a-z0-9_]+? /xs;
if (my @var = $string =~ m/ \{\{ ($var) \}\} | \[\[ ($var) \]\] /gsx) {
return sort grep { defined } @var;
} else {
return;
}
}
sub iterate
{
my ($hash, @path) = @_;
my @ret;
while (my ($k, $v) = each %$hash)
{
if (ref $v eq 'HASH')
{
push @ret => iterate($v, @path, $k);
}
else
{
push @ret => join(".",@path, $k), $v;
}
}
return @ret;
}
sub basename
{
my $name = shift;
$name =~ s[\..*?$][];
$name;
}
sub help
{
my %arg = @_;
Pod::Usage::pod2usage(
-verbose => $arg{ verbose },
-exitval => $arg{ exitval } || 0,
);
}
__DATA__
untranslated_values:
# Default/Per language blacklist/whitelist for the
# --untranslated-values switch. "true" as a value indicates that the
# key is to be blacklisted, and "false" that it's to be
# whitelisted. "false" is only required to whitelist a key
# blacklisted by default on a per-language basis.
default:
html.dir: true
layouts.intro_3_bytemark: true
layouts.intro_3_ucl: true
layouts.project_name.h1: true
layouts.project_name.title: true
site.index.license.project_url: true
de:
activerecord.attributes.message.sender: true
activerecord.attributes.trace.name: true
activerecord.models.changeset: true
activerecord.models.relation: true
browse.changeset.changeset: true
browse.changeset.changesetxml: true
browse.changeset.osmchangexml: true
browse.changeset.title: true
browse.common_details.version: true
browse.containing_relation.relation: true
browse.relation.relation: true
browse.relation.relation_title: true
browse.start_rjs.details: true
browse.start_rjs.object_list.details: true
browse.tag_details.tags: true
changeset.changesets.id: true
export.start.export_button: true
export.start.format: true
export.start.output: true
export.start.zoom: true
export.start_rjs.export: true
layouts.export: true
layouts.shop: true
site.edit.anon_edits: true
site.index.license.license_name: true
site.index.permalink: true
site.key.table.entry.park: true
site.search.submit_text: true
trace.edit.tags: true
trace.trace.in: true
trace.trace_form.tags: true
trace.trace_optionals.tags: true
trace.view.tags: true
user.account.public editing.enabled link: true