tvl-depot/scripts/download-using-manifests.pl.in

377 lines
11 KiB
Perl
Raw Normal View History

#! @perl@ -w @perlFlags@
2014-08-20 17:00:17 +02:00
use utf8;
use strict;
use Nix::Config;
use Nix::Manifest;
use Nix::Store;
2012-07-30 23:09:36 +02:00
use Nix::Utils;
2006-10-04 20:58:11 +02:00
use POSIX qw(strftime);
STDOUT->autoflush(1);
binmode STDERR, ":encoding(utf8)";
my $logFile = "$Nix::Config::logDir/downloads";
2004-12-30 17:34:54 +01:00
# For queries, skip expensive calls to nix-hash etc. We're just
# estimating the expected download size.
my $fast = 1;
my $curl = "$Nix::Config::curl --fail --location";
# Open the manifest cache and update it if necessary.
my $dbh = updateManifestDB();
exit 0 unless defined $dbh; # exit if there are no manifests
print "\n";
# $hashCache->{$algo}->{$path} yields the $algo-hash of $path.
my $hashCache;
sub parseHash {
my $hash = shift;
if ($hash =~ /^(.+):(.+)$/) {
return ($1, $2);
} else {
return ("md5", $hash);
}
}
# Compute the most efficient sequence of downloads to produce the
# given path.
sub computeSmallestDownload {
my $targetPath = shift;
# Build a graph of all store paths that might contribute to the
# construction of $targetPath, and the special node "start". The
# edges are either patch operations, or downloads of full NAR
# files. The latter edges only occur between "start" and a store
# path.
my %graph;
$graph{"start"} = {d => 0, pred => undef, edges => []};
my @queue = ();
my $queueFront = 0;
my %done;
sub addNode {
my $graph = shift;
my $u = shift;
$$graph{$u} = {d => 999999999999, pred => undef, edges => []}
unless defined $$graph{$u};
}
sub addEdge {
my $graph = shift;
my $u = shift;
my $v = shift;
my $w = shift;
my $type = shift;
my $info = shift;
addNode $graph, $u;
push @{$$graph{$u}->{edges}},
{weight => $w, start => $u, end => $v, type => $type, info => $info};
my $n = scalar @{$$graph{$u}->{edges}};
}
push @queue, $targetPath;
while ($queueFront < scalar @queue) {
my $u = $queue[$queueFront++];
2010-11-17 18:54:49 +01:00
next if defined $done{$u};
$done{$u} = 1;
addNode \%graph, $u;
# If the path already exists, it has distance 0 from the
# "start" node.
if (isValidPath($u)) {
addEdge \%graph, "start", $u, 0, "present", undef;
}
else {
# Add patch edges.
my $patchList = $dbh->selectall_arrayref(
"select * from Patches where storePath = ?",
{ Slice => {} }, $u);
foreach my $patch (@{$patchList}) {
if (isValidPath($patch->{basePath})) {
my ($baseHashAlgo, $baseHash) = parseHash $patch->{baseHash};
my $hash = $hashCache->{$baseHashAlgo}->{$patch->{basePath}};
if (!defined $hash) {
$hash = $fast && $baseHashAlgo eq "sha256"
? queryPathHash($patch->{basePath})
: hashPath($baseHashAlgo, $baseHashAlgo ne "md5", $patch->{basePath});
$hash =~ s/.*://;
$hashCache->{$baseHashAlgo}->{$patch->{basePath}} = $hash;
}
next if $hash ne $baseHash;
}
push @queue, $patch->{basePath};
addEdge \%graph, $patch->{basePath}, $u, $patch->{size}, "patch", $patch;
}
# Add NAR file edges to the start node.
my $narFileList = $dbh->selectall_arrayref(
"select * from NARs where storePath = ?",
{ Slice => {} }, $u);
foreach my $narFile (@{$narFileList}) {
# !!! how to handle files whose size is not known in advance?
# For now, assume some arbitrary size (1 GB).
# This has the side-effect of preferring non-Hydra downloads.
addEdge \%graph, "start", $u, ($narFile->{size} || 1000000000), "narfile", $narFile;
}
}
}
# Run Dijkstra's shortest path algorithm to determine the shortest
# sequence of download and/or patch actions that will produce
# $targetPath.
my @todo = keys %graph;
while (scalar @todo > 0) {
# Remove the closest element from the todo list.
# !!! inefficient, use a priority queue
@todo = sort { -($graph{$a}->{d} <=> $graph{$b}->{d}) } @todo;
my $u = pop @todo;
my $u_ = $graph{$u};
foreach my $edge (@{$u_->{edges}}) {
my $v_ = $graph{$edge->{end}};
if ($v_->{d} > $u_->{d} + $edge->{weight}) {
$v_->{d} = $u_->{d} + $edge->{weight};
# Store the edge; to edge->start is actually the
# predecessor.
$v_->{pred} = $edge;
}
}
}
# Retrieve the shortest path from "start" to $targetPath.
my @path = ();
my $cur = $targetPath;
return () unless defined $graph{$targetPath}->{pred};
while ($cur ne "start") {
push @path, $graph{$cur}->{pred};
$cur = $graph{$cur}->{pred}->{start};
}
return @path;
}
# Parse the arguments.
if ($ARGV[0] eq "--query") {
while (<STDIN>) {
2012-07-12 00:52:09 +02:00
chomp;
my ($cmd, @args) = split " ", $_;
if ($cmd eq "have") {
2012-07-12 00:52:09 +02:00
foreach my $storePath (@args) {
print "$storePath\n" if scalar @{$dbh->selectcol_arrayref("select 1 from NARs where storePath = ?", {}, $storePath)} > 0;
}
print "\n";
}
elsif ($cmd eq "info") {
2012-07-12 00:52:09 +02:00
foreach my $storePath (@args) {
2012-07-12 00:52:09 +02:00
my $infos = $dbh->selectall_arrayref(
"select * from NARs where storePath = ?",
{ Slice => {} }, $storePath);
2012-07-12 00:52:09 +02:00
next unless scalar @{$infos} > 0;
my $info = @{$infos}[0];
print "$storePath\n";
print "$info->{deriver}\n";
my @references = split " ", $info->{refs};
print scalar @references, "\n";
print "$_\n" foreach @references;
my @path = computeSmallestDownload $storePath;
my $downloadSize = 0;
while (scalar @path > 0) {
my $edge = pop @path;
my $u = $edge->{start};
my $v = $edge->{end};
if ($edge->{type} eq "patch") {
$downloadSize += $edge->{info}->{size} || 0;
}
elsif ($edge->{type} eq "narfile") {
$downloadSize += $edge->{info}->{size} || 0;
}
}
2012-07-12 00:52:09 +02:00
print "$downloadSize\n";
2012-07-12 00:52:09 +02:00
my $narSize = $info->{narSize} || 0;
print "$narSize\n";
}
print "\n";
}
2014-08-20 17:00:17 +02:00
else { die "unknown command $cmd"; }
}
exit 0;
}
elsif ($ARGV[0] ne "--substitute") {
2010-02-04 10:38:09 +01:00
die;
}
die unless scalar @ARGV == 3;
my $targetPath = $ARGV[1];
my $destPath = $ARGV[2];
$fast = 0;
# Create a temporary directory.
my $tmpDir = mkTempDir("nix-download");
my $tmpNar = "$tmpDir/nar";
my $tmpNar2 = "$tmpDir/nar2";
open LOGFILE, ">>$logFile" or die "cannot open log file $logFile";
my $date = strftime ("%F %H:%M:%S UTC", gmtime (time));
print LOGFILE "$$ get $targetPath $date\n";
2014-08-20 17:00:17 +02:00
print STDERR "\n*** Trying to download/patch $targetPath\n";
# Compute the shortest path.
my @path = computeSmallestDownload $targetPath;
die "don't know how to produce $targetPath\n" if scalar @path == 0;
# We don't need the manifest anymore, so close it as an optimisation:
# if we still have SQLite locks blocking other processes (we
# shouldn't), this gets rid of them.
$dbh->disconnect;
# Traverse the shortest path, perform the actions described by the
# edges.
my $curStep = 1;
my $maxStep = scalar @path;
my $finalNarHash;
while (scalar @path > 0) {
my $edge = pop @path;
my $u = $edge->{start};
my $v = $edge->{end};
print STDERR "\n*** Step $curStep/$maxStep: ";
if ($edge->{type} eq "present") {
2014-08-20 17:00:17 +02:00
print STDERR "using already present path $v\n";
2004-12-30 17:34:54 +01:00
print LOGFILE "$$ present $v\n";
if ($curStep < $maxStep) {
# Since this is not the last step, the path will be used
# as a base to one or more patches. So turn the base path
# into a NAR archive, to which we can apply the patch.
print STDERR " packing base path...\n";
system("$Nix::Config::binDir/nix-store --dump $v > $tmpNar") == 0
2014-08-20 17:00:17 +02:00
or die "cannot dump $v";
}
}
elsif ($edge->{type} eq "patch") {
my $patch = $edge->{info};
2014-08-20 17:00:17 +02:00
print STDERR "applying patch $patch->{url} to $u to create $v\n";
2004-12-30 17:34:54 +01:00
print LOGFILE "$$ patch $patch->{url} $patch->{size} $patch->{baseHash} $u $v\n";
# Download the patch.
print STDERR " downloading patch...\n";
my $patchPath = "$tmpDir/patch";
checkURL $patch->{url};
system("$curl '$patch->{url}' -o $patchPath") == 0
2014-08-20 17:00:17 +02:00
or die "cannot download patch $patch->{url}\n";
# Apply the patch to the NAR archive produced in step 1 (for
# the already present path) or a later step (for patch sequences).
print STDERR " applying patch...\n";
system("$Nix::Config::libexecDir/nix/bspatch $tmpNar $tmpNar2 $patchPath") == 0
2014-08-20 17:00:17 +02:00
or die "cannot apply patch $patchPath to $tmpNar\n";
if ($curStep < $maxStep) {
# The archive will be used as the base of the next patch.
2005-09-15 17:21:35 +02:00
rename "$tmpNar2", "$tmpNar" or die "cannot rename NAR archive: $!";
} else {
# This was the last patch. Unpack the final NAR archive
# into the target path.
print STDERR " unpacking patched archive...\n";
system("$Nix::Config::binDir/nix-store --restore $destPath < $tmpNar2") == 0
2014-08-20 17:00:17 +02:00
or die "cannot unpack $tmpNar2 to $v\n";
}
$finalNarHash = $patch->{narHash};
}
elsif ($edge->{type} eq "narfile") {
my $narFile = $edge->{info};
2014-08-20 17:00:17 +02:00
print STDERR "downloading $narFile->{url} to $v\n";
my $size = $narFile->{size} || -1;
print LOGFILE "$$ narfile $narFile->{url} $size $v\n";
checkURL $narFile->{url};
my $decompressor =
$narFile->{compressionType} eq "bzip2" ? "| $Nix::Config::bzip2 -d" :
$narFile->{compressionType} eq "xz" ? "| $Nix::Config::xz -d" :
$narFile->{compressionType} eq "none" ? "" :
2014-08-20 17:00:17 +02:00
die "unknown compression type $narFile->{compressionType}";
if ($curStep < $maxStep) {
# The archive will be used a base to a patch.
system("$curl '$narFile->{url}' $decompressor > $tmpNar") == 0
2014-08-20 17:00:17 +02:00
or die "cannot download and unpack $narFile->{url} to $v\n";
} else {
2012-09-21 21:02:33 +02:00
# Unpack the archive to the target path.
system("$curl '$narFile->{url}' $decompressor | $Nix::Config::binDir/nix-store --restore '$destPath'") == 0
2014-08-20 17:00:17 +02:00
or die "cannot download and unpack $narFile->{url} to $v\n";
}
$finalNarHash = $narFile->{narHash};
}
$curStep++;
}
2004-12-30 17:34:54 +01:00
# Tell Nix about the expected hash so it can verify it.
die "cannot check integrity of the downloaded path since its hash is not known\n"
unless defined $finalNarHash;
print "$finalNarHash\n";
print STDERR "\n";
2004-12-30 17:34:54 +01:00
print LOGFILE "$$ success\n";
close LOGFILE;