2007-11-15 15:28:08 +01:00
|
|
|
#! @perl@ -w
|
|
|
|
|
|
|
|
use strict;
|
|
|
|
use Fcntl ':flock';
|
|
|
|
use English '-no_match_vars';
|
2009-03-29 20:40:44 +02:00
|
|
|
use IO::Handle;
|
2007-11-15 15:28:08 +01:00
|
|
|
|
|
|
|
# General operation:
|
|
|
|
#
|
|
|
|
# Try to find a free machine of type $neededSystem. We do this as
|
|
|
|
# follows:
|
|
|
|
# - We acquire an exclusive lock on $currentLoad/main-lock.
|
|
|
|
# - For each machine $machine of type $neededSystem and for each $slot
|
|
|
|
# less than the maximum load for that machine, we try to get an
|
|
|
|
# exclusive lock on $currentLoad/$machine-$slot (without blocking).
|
|
|
|
# If we get such a lock, we send "accept" to the caller. Otherwise,
|
|
|
|
# we send "postpone" and exit.
|
|
|
|
# - We release the exclusive lock on $currentLoad/main-lock.
|
|
|
|
# - We perform the build on $neededSystem.
|
|
|
|
# - We release the exclusive lock on $currentLoad/$machine-$slot.
|
|
|
|
#
|
|
|
|
# The nice thing about this scheme is that if we die prematurely, the
|
|
|
|
# locks are released automatically.
|
|
|
|
|
|
|
|
my $loadIncreased = 0;
|
|
|
|
|
2009-03-29 20:00:45 +02:00
|
|
|
my ($amWilling, $localSystem, $neededSystem, $drvPath, $maxSilentTime) = @ARGV;
|
2008-12-04 17:51:16 +01:00
|
|
|
$maxSilentTime = 0 unless defined $maxSilentTime;
|
2007-11-15 15:28:08 +01:00
|
|
|
|
|
|
|
sub sendReply {
|
|
|
|
my $reply = shift;
|
2009-03-28 20:29:55 +01:00
|
|
|
print STDERR "# $reply\n";
|
2007-11-15 15:28:08 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
sub decline {
|
|
|
|
sendReply "decline";
|
|
|
|
exit 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
my $currentLoad = $ENV{"NIX_CURRENT_LOAD"};
|
|
|
|
decline unless defined $currentLoad;
|
|
|
|
mkdir $currentLoad, 0777 or die unless -d $currentLoad;
|
|
|
|
|
|
|
|
my $conf = $ENV{"NIX_REMOTE_SYSTEMS"};
|
|
|
|
decline if !defined $conf || ! -e $conf;
|
|
|
|
|
2008-12-04 17:35:47 +01:00
|
|
|
my $canBuildLocally = $amWilling && ($localSystem eq $neededSystem);
|
2007-11-15 15:28:08 +01:00
|
|
|
|
|
|
|
|
|
|
|
# Read the list of machines.
|
2009-09-17 17:48:17 +02:00
|
|
|
my @machines;
|
2007-11-15 15:28:08 +01:00
|
|
|
open CONF, "< $conf" or die;
|
|
|
|
|
|
|
|
while (<CONF>) {
|
|
|
|
chomp;
|
|
|
|
s/\#.*$//g;
|
|
|
|
next if /^\s*$/;
|
2009-09-17 17:48:17 +02:00
|
|
|
/^\s*(\S+)\s+(\S+)\s+(\S+)\s+(\d+)(\s+([0-9\.]+))?\s*$/ or die;
|
2008-12-04 13:20:06 +01:00
|
|
|
push @machines,
|
|
|
|
{ hostName => $1
|
2009-09-17 15:51:04 +02:00
|
|
|
, systemTypes => [split(/,/, $2)]
|
2008-12-04 13:20:06 +01:00
|
|
|
, sshKeys => $3
|
|
|
|
, maxJobs => $4
|
2009-09-17 17:48:17 +02:00
|
|
|
, speedFactor => 1.0 * ($6 || 1)
|
2008-12-04 13:20:06 +01:00
|
|
|
};
|
2007-11-15 15:28:08 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
close CONF;
|
|
|
|
|
|
|
|
|
|
|
|
# Acquire the exclusive lock on $currentLoad/main-lock.
|
|
|
|
my $mainLock = "$currentLoad/main-lock";
|
|
|
|
open MAINLOCK, ">>$mainLock" or die;
|
|
|
|
flock(MAINLOCK, LOCK_EX) or die;
|
|
|
|
|
|
|
|
|
2009-09-17 17:48:17 +02:00
|
|
|
sub openSlotLock {
|
|
|
|
my ($machine, $slot) = @_;
|
|
|
|
my $slotLockFn = "$currentLoad/" . (join '+', @{$machine->{systemTypes}}) . "-" . $machine->{hostName} . "-$slot";
|
|
|
|
my $slotLock = new IO::Handle;
|
|
|
|
open $slotLock, ">>$slotLockFn" or die;
|
|
|
|
return $slotLock;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
# Find all machine that can execute this build, i.e., that support
|
|
|
|
# builds for the given platform and are not at their job limit.
|
2007-11-15 15:28:08 +01:00
|
|
|
my $rightType = 0;
|
2009-09-17 17:48:17 +02:00
|
|
|
my @available = ();
|
2008-12-04 13:20:06 +01:00
|
|
|
LOOP: foreach my $cur (@machines) {
|
2009-09-17 15:51:04 +02:00
|
|
|
if (grep { $neededSystem eq $_ } @{$cur->{systemTypes}}) {
|
2007-11-15 15:28:08 +01:00
|
|
|
$rightType = 1;
|
|
|
|
|
2009-09-17 17:48:17 +02:00
|
|
|
# We have a machine of the right type. Determine the load on
|
|
|
|
# the machine.
|
2007-11-15 15:28:08 +01:00
|
|
|
my $slot = 0;
|
2009-09-17 17:48:17 +02:00
|
|
|
my $load = 0;
|
|
|
|
my $free;
|
2009-03-29 20:00:45 +02:00
|
|
|
while ($slot < $cur->{maxJobs}) {
|
2009-09-17 17:48:17 +02:00
|
|
|
my $slotLock = openSlotLock($cur, $slot);
|
2009-03-29 20:40:44 +02:00
|
|
|
if (flock($slotLock, LOCK_EX | LOCK_NB)) {
|
2009-09-17 17:48:17 +02:00
|
|
|
$free = $slot unless defined $free;
|
|
|
|
flock($slotLock, LOCK_UN) or die;
|
|
|
|
} else {
|
|
|
|
$load++;
|
2008-12-04 15:29:41 +01:00
|
|
|
}
|
2009-03-29 20:40:44 +02:00
|
|
|
close $slotLock;
|
2007-11-15 15:28:08 +01:00
|
|
|
$slot++;
|
|
|
|
}
|
2009-09-17 17:48:17 +02:00
|
|
|
|
|
|
|
push @available, { machine => $cur, load => $load, free => $free }
|
|
|
|
if $load < $cur->{maxJobs};
|
2007-11-15 15:28:08 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-09-17 17:48:17 +02:00
|
|
|
if (defined $ENV{NIX_DEBUG_HOOK}) {
|
|
|
|
print STDERR "load on " . $_->{machine}->{hostName} . " = " . $_->{load} . "\n"
|
|
|
|
foreach @available;
|
|
|
|
}
|
2007-11-15 15:28:08 +01:00
|
|
|
|
|
|
|
|
2009-09-17 17:48:17 +02:00
|
|
|
# Didn't find any available machine? Then decline or postpone.
|
|
|
|
if (scalar @available == 0) {
|
2008-12-04 17:35:47 +01:00
|
|
|
# Postpone if we have a machine of the right type, except if the
|
|
|
|
# local system can and wants to do the build.
|
|
|
|
if ($rightType && !$canBuildLocally) {
|
2007-11-15 15:28:08 +01:00
|
|
|
sendReply "postpone";
|
|
|
|
exit 0;
|
|
|
|
} else {
|
|
|
|
decline;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-09-17 17:48:17 +02:00
|
|
|
|
|
|
|
# Prioritise the available machines as follows:
|
|
|
|
# - First by load divided by speed factor, rounded to the nearest
|
|
|
|
# integer. This causes fast machines to be preferred over slow
|
|
|
|
# machines with similar loads.
|
|
|
|
# - Then by speed factor.
|
|
|
|
# - Finally by load.
|
|
|
|
sub lf { my $x = shift; return int($x->{load} / $x->{machine}->{speedFactor} + 0.4999); }
|
|
|
|
@available = sort
|
|
|
|
{ lf($a) <=> lf($b)
|
|
|
|
|| $b->{machine}->{speedFactor} <=> $a->{machine}->{speedFactor}
|
|
|
|
|| $a->{load} <=> $b->{load}
|
|
|
|
} @available;
|
|
|
|
|
|
|
|
|
|
|
|
# Select the best available machine and lock a free slot.
|
|
|
|
my $selected = $available[0];
|
|
|
|
my $machine = $selected->{machine};
|
|
|
|
|
|
|
|
my $slotLock = openSlotLock($machine, $selected->{free});
|
|
|
|
flock($slotLock, LOCK_EX | LOCK_NB) or die;
|
2009-09-17 19:02:14 +02:00
|
|
|
utime undef, undef, $slotLock;
|
2009-09-17 17:48:17 +02:00
|
|
|
|
|
|
|
close MAINLOCK;
|
|
|
|
|
|
|
|
|
|
|
|
# Tell Nix we've accepted the build.
|
2007-11-15 15:28:08 +01:00
|
|
|
sendReply "accept";
|
2009-09-17 17:48:17 +02:00
|
|
|
if (defined $ENV{NIX_DEBUG_HOOK}) {
|
|
|
|
my $hostName = $machine->{hostName};
|
|
|
|
my $sp = $machine->{speedFactor};
|
|
|
|
print STDERR "building `$drvPath' on `$hostName' - $sp - " . $selected->{free} . "\n";
|
|
|
|
sleep 10;
|
|
|
|
exit 0;
|
|
|
|
}
|
2009-03-28 20:29:55 +01:00
|
|
|
my $x = <STDIN>;
|
2007-11-15 15:28:08 +01:00
|
|
|
chomp $x;
|
|
|
|
|
|
|
|
if ($x ne "okay") {
|
|
|
|
exit 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2009-09-17 17:48:17 +02:00
|
|
|
# Do the actual build.
|
2008-12-04 13:20:06 +01:00
|
|
|
my $hostName = $machine->{hostName};
|
2009-03-28 17:12:46 +01:00
|
|
|
print STDERR "building `$drvPath' on `$hostName'\n";
|
2007-11-15 15:28:08 +01:00
|
|
|
|
|
|
|
# Make sure that we don't get any SSH passphrase or host key popups -
|
|
|
|
# if there is any problem it should fail, not do something
|
|
|
|
# interactive.
|
|
|
|
$ENV{"DISPLAY"} = "";
|
|
|
|
$ENV{"SSH_PASSWORD_FILE="} = "";
|
|
|
|
$ENV{"SSH_ASKPASS="} = "";
|
|
|
|
|
2008-12-04 13:20:06 +01:00
|
|
|
my $sshOpts = "-i " . $machine->{sshKeys} . " -x";
|
2007-11-15 15:28:08 +01:00
|
|
|
|
|
|
|
# Hack to support Cygwin: if we login without a password, we don't
|
2008-12-04 13:20:06 +01:00
|
|
|
# have exactly the same rights as when we do. This causes the
|
2007-11-15 15:28:08 +01:00
|
|
|
# Microsoft C compiler to fail with certain flags:
|
|
|
|
#
|
|
|
|
# http://connect.microsoft.com/VisualStudio/feedback/ViewFeedback.aspx?FeedbackID=99676
|
|
|
|
#
|
|
|
|
# So as a workaround, we pass a verbatim password. ssh tries to makes
|
|
|
|
# this very hard; the trick is to make it call SSH_ASKPASS to get the
|
|
|
|
# password. (It only calls this command when there is no controlling
|
|
|
|
# terminal, but Nix ensures that is is the case. When doing this
|
|
|
|
# manually, use setsid(1).)
|
2008-12-04 13:20:06 +01:00
|
|
|
if ($machine->{sshKeys} =~ /^password:/) {
|
|
|
|
my $passwordFile = $machine->{sshKeys};
|
2007-11-15 15:28:08 +01:00
|
|
|
$passwordFile =~ s/^password://;
|
|
|
|
$sshOpts = "ssh -x";
|
|
|
|
$ENV{"SSH_PASSWORD_FILE"} = $passwordFile;
|
|
|
|
$ENV{"SSH_ASKPASS"} = "/tmp/writepass";
|
|
|
|
|
|
|
|
open WRITEPASS, ">/tmp/writepass" or die;
|
|
|
|
print WRITEPASS "#! /bin/sh\ncat \"\$SSH_PASSWORD_FILE\"";
|
|
|
|
close WRITEPASS;
|
|
|
|
chmod 0755, "/tmp/writepass" or die;
|
|
|
|
}
|
|
|
|
|
|
|
|
my $inputs = `cat inputs`; die if ($? != 0);
|
|
|
|
$inputs =~ s/\n/ /g;
|
|
|
|
|
|
|
|
my $outputs = `cat outputs`; die if ($? != 0);
|
|
|
|
$outputs =~ s/\n/ /g;
|
|
|
|
|
2009-03-28 17:12:46 +01:00
|
|
|
print "copying inputs...\n";
|
2007-11-15 15:28:08 +01:00
|
|
|
|
|
|
|
my $maybeSign = "";
|
|
|
|
$maybeSign = "--sign" if -e "/nix/etc/nix/signing-key.sec";
|
|
|
|
|
2009-03-28 22:10:29 +01:00
|
|
|
system("NIX_SSHOPTS=\"$sshOpts\" @bindir@/nix-copy-closure --gzip $hostName $maybeSign $drvPath $inputs") == 0
|
2008-12-04 13:20:06 +01:00
|
|
|
or die "cannot copy inputs to $hostName: $?";
|
2007-11-15 15:28:08 +01:00
|
|
|
|
2009-03-28 17:12:46 +01:00
|
|
|
print "building...\n";
|
2007-11-15 15:28:08 +01:00
|
|
|
|
2008-12-04 17:51:36 +01:00
|
|
|
my $buildFlags = "--max-silent-time $maxSilentTime";
|
2008-12-04 17:51:16 +01:00
|
|
|
|
2008-12-04 14:36:52 +01:00
|
|
|
# `-tt' forces allocation of a pseudo-terminal. This is required to
|
|
|
|
# make the remote nix-store process receive a signal when the
|
|
|
|
# connection dies. Without it, the remote process might continue to
|
|
|
|
# run indefinitely (that is, until it next tries to write to
|
|
|
|
# stdout/stderr).
|
2009-11-17 17:22:39 +01:00
|
|
|
if (system("ssh -tt $sshOpts $hostName 'nix-store --realise $buildFlags $drvPath > /dev/null'") != 0) {
|
2009-01-13 12:39:09 +01:00
|
|
|
# If we couldn't run ssh or there was an ssh problem (indicated by
|
|
|
|
# exit code 255), then we return exit code 1; otherwise we assume
|
|
|
|
# that the builder failed, which we indicated to Nix using exit
|
|
|
|
# code 100. It's important to distinguish between the two because
|
|
|
|
# the first is a transient failure and the latter is permanent.
|
|
|
|
my $res = $? == -1 || ($? >> 8) == 255 ? 1 : 100;
|
2009-03-28 17:12:46 +01:00
|
|
|
print STDERR "build of `$drvPath' on `$hostName' failed with exit code $?\n";
|
2009-01-13 12:39:09 +01:00
|
|
|
exit $res;
|
|
|
|
}
|
2007-11-15 15:28:08 +01:00
|
|
|
|
2009-03-28 17:12:46 +01:00
|
|
|
print "build of `$drvPath' on `$hostName' succeeded\n";
|
2007-11-15 15:28:08 +01:00
|
|
|
|
|
|
|
foreach my $output (split '\n', $outputs) {
|
|
|
|
my $maybeSignRemote = "";
|
|
|
|
$maybeSignRemote = "--sign" if $UID != 0;
|
|
|
|
|
2009-02-03 11:34:15 +01:00
|
|
|
system("ssh $sshOpts $hostName 'nix-store --export $maybeSignRemote $output | gzip' | gunzip | @bindir@/nix-store --import > /dev/null") == 0
|
2008-12-04 13:20:06 +01:00
|
|
|
or die "cannot copy $output from $hostName: $?";
|
2007-11-15 15:28:08 +01:00
|
|
|
}
|