openstreetmap-website/script/misc/update-wiki-pages
Tom Hughes 6d68c0b22d Stop wiki update script trying to be "helpful"
Instead of trying to advise how to import some (but not all) of
the modules, just require them in the normal way.

Closes #1763
2018-09-26 22:43:30 +01:00

190 lines
4.7 KiB
Perl
Executable file

#!/usr/bin/env perl
use 5.010;
use strict;
use warnings;
use Getopt::Long;
use Pod::Usage;
use MediaWiki::API;
use Test::More qw(no_plan);
use YAML::XS qw(Dump);
=head1 NAME
update-wiki-pages - Scrape the wiki for key/value wiki description pages
=head1 SYNOPSIS
perl script/misc/update-wiki-pages config/wiki_pages.yml
Or with prove(1):
prove -e 'perl script/misc/update-wiki-pages' config/wiki_pages.yml
=cut
# Get the command-line options
Getopt::Long::Parser->new(
config => [ qw< bundling no_ignore_case no_require_order pass_through > ],
)->getoptions(
'h|help' => \my $help,
) or help();
# On --help
help() if $help;
my $out_file = $ARGV[0];
$out_file //= 'config/wiki_pages.yml';
help() unless -f $out_file;
# Get a API interface
my $mw = MediaWiki::API->new();
ok($mw, "Got a MediaWiki API");
$mw->{config}->{api_url} = 'https://wiki.openstreetmap.org/w/api.php';
# All our goodies
my (%feature, %count);
# This is what you get on:
## http://wiki.openstreetmap.org/w/index.php?search=Template:KeyDescription&fulltext=Search&fulltext=Search
for my $lang ('', map { "${_}:" } qw[ Pt Fi De It HU Cz Fr RU Pl ]) {
ok(1, " Templates for language '$lang'");
# Key pages
ok(1, " Getting key pages");
my $cnt = stick_content_in_hash("key", "Template:${lang}KeyDescription", \%feature);
$cnt += stick_content_in_hash("key", "Template:${lang}Feature", \%feature);
ok(1, " Got $cnt key pages");
$count{key} += $cnt;
# Value pages
ok(1, " Getting value pages");
$cnt = stick_content_in_hash("tag", "Template:${lang}ValueDescription", \%feature);
ok(1, " Got $cnt value pages");
$count{value} += $cnt;
}
ok(1, "Got a total of $count{$_} ${_}s") for qw[ key value ];
# Dump to .yml file
open my $out, ">", $out_file or die "Can't open file '$out_file' supplied on the command line";
say $out "# THIS FILE IS AUTOGENERATED WITH THE script/misc/update-wiki-pages";
say $out "# PROGRAM DO NOT MANUALLY EDIT IT";
say $out "";
say $out Dump(\%feature);
close $out;
exit 0;
sub stick_content_in_hash
{
my ($key, $title, $hash) = @_;
my $ukey = ucfirst $key;
my $space_to_underscore = sub {
my $txt = shift;
$txt =~ s/ /_/g;
$txt;
};
my $count = 0;
my $process_link = sub {
my $link = shift;
$count++;
ok(1, " ... got $count links") if $count % 200 == 0;
my $title = $link->{title};
my $lang;
my $key_name;
if ($title =~ /^$ukey:(?<key_name>.*?)$/) {
# English by default
$lang = "en";
$key_name = $space_to_underscore->($+{key_name});
} elsif ($title =~ /^(?<lang>[^:]+):$ukey:(?<key_name>.*?)$/) {
$lang = lc $+{lang};
$key_name = $space_to_underscore->($+{key_name});
}
if ($lang && !exists($hash->{$lang}->{$key}->{$key_name})) {
$hash->{$lang}->{$key}->{$key_name} = $title;
}
};
get_embeddedin(
$title,
sub {
my $link = shift;
$process_link->($link);
get_redirects(
$link->{title},
sub {
my $link = shift;
$process_link->($link) if exists($link->{redirect});
}
);
}
);
return $count;
}
sub process_list
{
my $callback = shift;
my $links = shift;
for my $link (@$links) {
$callback->($link);
}
}
sub get_embeddedin
{
my ($title, $callback) = @_;
my $articles = $mw->list(
{
action => 'query',
list => 'embeddedin',
eititle => $title,
eifilterredir => 'nonredirects',
# Doesn't work for De:* and anything non-en. Odd.
# einamespace => '0|8',
eilimit => '200',
},
{
max => '0',
hook => sub { process_list($callback, @_) },
skip_encoding => 1,
}
) || die $mw->{error}->{code} . ': ' . $mw->{error}->{details};
}
sub get_redirects
{
my ($title, $callback) = @_;
my $articles = $mw->list(
{
action => 'query',
list => 'backlinks',
bltitle => $title,
blfilterredir => 'redirects',
# Doesn't work for De:* and anything non-en. Odd.
# einamespace => '0|8',
bllimit => '200',
},
{
max => '0',
hook => sub { process_list($callback, @_) },
skip_encoding => 1,
}
) || die $mw->{error}->{code} . ': ' . $mw->{error}->{details};
}
sub help
{
my %arg = @_;
Pod::Usage::pod2usage(
-verbose => $arg{ verbose },
-exitval => $arg{ exitval } || 0,
);
}