* In the build hook, if connecting to a machine fails, try the other
machines of the right type (if available). This makes the build farm more robust to failures.
This commit is contained in:
parent
f56a039775
commit
d0c32dc135
3 changed files with 83 additions and 79 deletions
|
@ -71,6 +71,7 @@ while (<CONF>) {
|
|||
, sshKeys => $3
|
||||
, maxJobs => $4
|
||||
, speedFactor => 1.0 * ($6 || 1)
|
||||
, enabled => 1
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -92,12 +93,16 @@ sub openSlotLock {
|
|||
}
|
||||
|
||||
|
||||
my $hostName;
|
||||
|
||||
while (1) {
|
||||
|
||||
# Find all machine that can execute this build, i.e., that support
|
||||
# builds for the given platform and are not at their job limit.
|
||||
my $rightType = 0;
|
||||
my @available = ();
|
||||
LOOP: foreach my $cur (@machines) {
|
||||
if (grep { $neededSystem eq $_ } @{$cur->{systemTypes}}) {
|
||||
if ($cur->{enabled} && grep { $neededSystem eq $_ } @{$cur->{systemTypes}}) {
|
||||
$rightType = 1;
|
||||
|
||||
# We have a machine of the right type. Determine the load on
|
||||
|
@ -166,15 +171,18 @@ utime undef, undef, $slotLock;
|
|||
close MAINLOCK;
|
||||
|
||||
|
||||
# Connect to the selected machine.
|
||||
@sshOpts = ("-i", $machine->{sshKeys}, "-x");
|
||||
$hostName = $machine->{hostName};
|
||||
last if openSSHConnection $hostName;
|
||||
|
||||
warn "unable to open SSH connection to $hostName, trying other available machines...\n";
|
||||
$machine->{enabled} = 0;
|
||||
}
|
||||
|
||||
|
||||
# Tell Nix we've accepted the build.
|
||||
sendReply "accept";
|
||||
if (defined $ENV{NIX_DEBUG_HOOK}) {
|
||||
my $hostName = $machine->{hostName};
|
||||
my $sp = $machine->{speedFactor};
|
||||
print STDERR "building `$drvPath' on `$hostName' - $sp - " . $selected->{free} . "\n";
|
||||
sleep 10;
|
||||
exit 0;
|
||||
}
|
||||
my $x = <STDIN>;
|
||||
chomp $x;
|
||||
|
||||
|
@ -184,13 +192,8 @@ if ($x ne "okay") {
|
|||
|
||||
|
||||
# Do the actual build.
|
||||
my $hostName = $machine->{hostName};
|
||||
print STDERR "building `$drvPath' on `$hostName'\n";
|
||||
|
||||
push @sshOpts, "-i", $machine->{sshKeys}, "-x";
|
||||
|
||||
openSSHConnection $hostName;
|
||||
|
||||
my $inputs = `cat inputs`; die if ($? != 0);
|
||||
$inputs =~ s/\n/ /g;
|
||||
|
||||
|
|
|
@ -53,7 +53,7 @@ while (@ARGV) {
|
|||
}
|
||||
|
||||
|
||||
openSSHConnection $sshHost;
|
||||
openSSHConnection $sshHost or die "$0: unable to start SSH\n";
|
||||
|
||||
|
||||
if ($toMode) { # Copy TO the remote machine.
|
||||
|
|
|
@ -12,15 +12,16 @@ sub openSSHConnection {
|
|||
my ($host) = @_;
|
||||
die if $sshStarted;
|
||||
$sshHost = $host;
|
||||
return if system("ssh $sshHost @sshOpts -O check 2> /dev/null") == 0;
|
||||
return 1 if system("ssh $sshHost @sshOpts -O check 2> /dev/null") == 0;
|
||||
|
||||
my $tmpDir = tempdir("nix-ssh.XXXXXX", CLEANUP => 1, TMPDIR => 1)
|
||||
or die "cannot create a temporary directory";
|
||||
|
||||
push @sshOpts, "-S", "$tmpDir/control";
|
||||
system("ssh $sshHost @sshOpts -M -N -f") == 0
|
||||
or die "unable to start SSH: $?";
|
||||
or return 0;
|
||||
$sshStarted = 1;
|
||||
return 1;
|
||||
}
|
||||
|
||||
# Tell the master SSH client to exit.
|
||||
|
|
Loading…
Reference in a new issue