From 00d732195812234f578a9513b32010fbe6469cd1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludovic=20Court=C3=A8s?= Date: Tue, 15 Oct 2019 12:24:09 +0200 Subject: [PATCH] offload: Set a longer SSH session timeout. Fixes . * guix/scripts/offload.scm (open-ssh-session): Add 'max-silent-time' parameter. Add call to 'session-set!' before returning SESSION. (transfer-and-offload): Pass MAX-SILENT-TIME to 'open-ssh-session'. (%short-timeout): New variable. (choose-build-machine): Pass %SHORT-TIMEOUT to 'open-ssh-session'. (check-machine-availability): Likewise. (check-machine-status): Likewise. --- guix/scripts/offload.scm | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/guix/scripts/offload.scm b/guix/scripts/offload.scm index bb307cefd1..1384f6b41d 100644 --- a/guix/scripts/offload.scm +++ b/guix/scripts/offload.scm @@ -174,7 +174,7 @@ (define (private-key-from-file* file) private key from '~a': ~a") file str)))))))) -(define (open-ssh-session machine) +(define* (open-ssh-session machine #:optional (max-silent-time -1)) "Open an SSH session for MACHINE and return it. Throw an error on failure." (let ((private (private-key-from-file* (build-machine-private-key machine))) (public (public-key-from-file @@ -183,7 +183,7 @@ (define (open-ssh-session machine) (session (make-session #:user (build-machine-user machine) #:host (build-machine-name machine) #:port (build-machine-port machine) - #:timeout 10 ;seconds + #:timeout 10 ;initial timeout (seconds) ;; #:log-verbosity 'protocol #:identity (build-machine-private-key machine) @@ -225,6 +225,10 @@ (define (open-ssh-session machine) (leave (G_ "SSH public key authentication failed for '~a': ~a~%") (build-machine-name machine) (get-error session)))) + ;; From then on use MAX-SILENT-TIME as the absolute timeout when + ;; reading from or write to a channel for this session. + (session-set! session 'timeout max-silent-time) + session) (x ;; Connection failed or timeout expired. @@ -313,7 +317,7 @@ (define* (transfer-and-offload drv machine INPUTS to MACHINE; if building DRV succeeds, retrieve all of OUTPUTS from MACHINE." (define session - (open-ssh-session machine)) + (open-ssh-session machine max-silent-time)) (define store (connect-to-remote-daemon session @@ -472,7 +476,8 @@ (define (machine-faster? m1 m2) ;; Return the best machine unless it's already overloaded. ;; Note: We call 'node-load' only as a last resort because it is ;; too costly to call it once for every machine. - (let* ((session (false-if-exception (open-ssh-session best))) + (let* ((session (false-if-exception (open-ssh-session best + %short-timeout))) (node (and session (remote-inferior session))) (load (and node (normalized-load best (node-load node)))) (space (and node (node-free-disk-space node)))) @@ -573,6 +578,11 @@ (define* (process-request wants-local? system drv features ;;; Installation tests. ;;; +(define %short-timeout + ;; Timeout in seconds used on SSH connections where reads and writes + ;; shouldn't take long. + 15) + (define (assert-node-repl node name) "Bail out if NODE is not running Guile." (match (node-guile-version node) @@ -658,7 +668,7 @@ (define (build-machine=? m1 m2) (length machines) machine-file) (let* ((names (map build-machine-name machines)) (sockets (map build-machine-daemon-socket machines)) - (sessions (map open-ssh-session machines)) + (sessions (map (cut open-ssh-session <> %short-timeout) machines)) (nodes (map remote-inferior sessions))) (for-each assert-node-has-guix nodes names) (for-each assert-node-repl nodes names) @@ -682,7 +692,7 @@ (define (build-machine=? m1 m2) (length machines) machine-file) (for-each (lambda (machine) (define session - (open-ssh-session machine)) + (open-ssh-session machine %short-timeout)) (match (remote-inferior session) (#f