openstreetmap-website/script/misc/update-wiki-pages
Ævar Arnfjörð Bjarmason 3a54a20af6 Implicitly update config/wiki_pages.yml if it exists, and chmod +x
This makes:

    script/misc/update-wiki-pages

from the top-level just work.
2010-05-08 18:09:49 +00:00

152 lines
3.8 KiB
Perl
Executable file

#!/usr/bin/env perl
use 5.010;
use strict;
use warnings;
use Pod::Usage ();
use Getopt::Long ();
BEGIN {
eval "require MediaWiki::API; require YAML::XS;" or do {
print "You have to install some modules via CPAN to run this:\n";
print " sudo cpanp MediaWiki::API YAML::XS\n";
exit 1;
};
}
use MediaWiki::API;
use YAML::XS qw(Dump);
use Test::More 'no_plan';
=head1 NAME
update-wiki-pages - Scrape the wiki for key/value wiki description pages
=head1 SYNOPSIS
perl script/misc/update-wiki-pages config/wiki_pages.yml
Or with prove(1):
prove -e 'perl script/misc/update-wiki-pages' config/wiki_pages.yml
=cut
# Get the command-line options
Getopt::Long::Parser->new(
config => [ qw< bundling no_ignore_case no_require_order pass_through > ],
)->getoptions(
'h|help' => \my $help,
) or help();
# On --help
help() if $help;
my $out_file = $ARGV[0];
$out_file //= 'config/wiki_pages.yml';
help() unless -f $out_file;
# Get a API interface
my $mw = MediaWiki::API->new();
ok($mw, "Got a MediaWiki API");
$mw->{config}->{api_url} = 'http://wiki.openstreetmap.org/w/api.php';
# All our goodies
my (%feature, %count);
# This is what you get on:
## http://wiki.openstreetmap.org/w/index.php?search=Template:KeyDescription&fulltext=Search&fulltext=Search
for my $lang ('', map { "${_}:" } qw[ Pt Fi De It HU Cz Fr RU Pl ]) {
ok(1, " Templates for language '$lang'");
# Key pages
ok(1, " Getting key pages");
my $cnt = stick_content_in_hash("key", "Template:${lang}KeyDescription", \%feature);
ok(1, " Got $cnt key pages");
$count{key} += $cnt;
# Value pages
ok(1, " Getting value pages");
$cnt = stick_content_in_hash("tag", "Template:${lang}ValueDescription", \%feature);
ok(1, " Got $cnt value pages");
$count{value} += $cnt;
}
ok(1, "Got a total of $count{$_} ${_}s") for qw[ key value ];
# Dump to .yml file
open my $out, ">", $out_file or die "Can't open file '$out_file' supplied on the command line";
say $out "# THIS FILE IS AUTOGENERATED WITH THE script/misc/update-wiki-pages";
say $out "# PROGRAM DO NOT MANUALLY EDIT IT";
say $out "";
say $out Dump(\%feature);
close $out;
exit 0;
sub stick_content_in_hash
{
my ($key, $title, $hash) = @_;
my $ukey = ucfirst $key;
my $space_to_underscore = sub {
my $txt = shift;
$txt =~ s/ /_/g;
$txt;
};
my $count = 0;
get_embeddedin(
$title,
sub {
my ($links) = @_;
my (@links) = @$links;
ok(1, " ... got " . scalar(@links) . " more links");
for my $link (@links) {
$count++;
my $title = $link->{title};
if ($title =~ /^$ukey:(?<key_name>.*?)$/) {
# English by default
$hash->{en}->{$key}->{ $space_to_underscore->($+{key_name}) } = $title;
} elsif ($title =~ /^(?<lang>[^:]+):$ukey:(?<key_name>.*?)$/) {
$hash->{lc $+{lang}}->{$key}->{ $space_to_underscore->($+{key_name}) } = $title;
}
}
}
);
return $count;
}
sub get_embeddedin
{
my ($title, $callback) = @_;
my $articles = $mw->list(
{
action => 'query',
list => 'embeddedin',
eititle => $title,
eifilterredir => 'nonredirects',
# Doesn't work for De:* and anything non-en. Odd.
# einamespace => '0|8',
eilimit => '200',
},
{
max => '0',
hook => $callback,
skip_encoding => 1,
}
) || die $mw->{error}->{code} . ': ' . $mw->{error}->{details};
}
sub help
{
my %arg = @_;
Pod::Usage::pod2usage(
-verbose => $arg{ verbose },
-exitval => $arg{ exitval } || 0,
);
}