offload: Set a longer SSH session timeout.

Fixes <https://bugs.gnu.org/37762>.

* guix/scripts/offload.scm (open-ssh-session): Add 'max-silent-time'
parameter.  Add call to 'session-set!' before returning SESSION.
(transfer-and-offload): Pass MAX-SILENT-TIME to 'open-ssh-session'.
(%short-timeout): New variable.
(choose-build-machine): Pass %SHORT-TIMEOUT to 'open-ssh-session'.
(check-machine-availability): Likewise.
(check-machine-status): Likewise.
This commit is contained in:
Ludovic Courtès 2019-10-15 12:24:09 +02:00
parent e464ac6672
commit 00d7321958
No known key found for this signature in database
GPG key ID: 090B11993D9AEBB5

View file

@ -174,7 +174,7 @@ (define (private-key-from-file* file)
private key from '~a': ~a") private key from '~a': ~a")
file str)))))))) file str))))))))
(define (open-ssh-session machine) (define* (open-ssh-session machine #:optional (max-silent-time -1))
"Open an SSH session for MACHINE and return it. Throw an error on failure." "Open an SSH session for MACHINE and return it. Throw an error on failure."
(let ((private (private-key-from-file* (build-machine-private-key machine))) (let ((private (private-key-from-file* (build-machine-private-key machine)))
(public (public-key-from-file (public (public-key-from-file
@ -183,7 +183,7 @@ (define (open-ssh-session machine)
(session (make-session #:user (build-machine-user machine) (session (make-session #:user (build-machine-user machine)
#:host (build-machine-name machine) #:host (build-machine-name machine)
#:port (build-machine-port machine) #:port (build-machine-port machine)
#:timeout 10 ;seconds #:timeout 10 ;initial timeout (seconds)
;; #:log-verbosity 'protocol ;; #:log-verbosity 'protocol
#:identity (build-machine-private-key machine) #:identity (build-machine-private-key machine)
@ -225,6 +225,10 @@ (define (open-ssh-session machine)
(leave (G_ "SSH public key authentication failed for '~a': ~a~%") (leave (G_ "SSH public key authentication failed for '~a': ~a~%")
(build-machine-name machine) (get-error session)))) (build-machine-name machine) (get-error session))))
;; From then on use MAX-SILENT-TIME as the absolute timeout when
;; reading from or write to a channel for this session.
(session-set! session 'timeout max-silent-time)
session) session)
(x (x
;; Connection failed or timeout expired. ;; Connection failed or timeout expired.
@ -313,7 +317,7 @@ (define* (transfer-and-offload drv machine
INPUTS to MACHINE; if building DRV succeeds, retrieve all of OUTPUTS from INPUTS to MACHINE; if building DRV succeeds, retrieve all of OUTPUTS from
MACHINE." MACHINE."
(define session (define session
(open-ssh-session machine)) (open-ssh-session machine max-silent-time))
(define store (define store
(connect-to-remote-daemon session (connect-to-remote-daemon session
@ -472,7 +476,8 @@ (define (machine-faster? m1 m2)
;; Return the best machine unless it's already overloaded. ;; Return the best machine unless it's already overloaded.
;; Note: We call 'node-load' only as a last resort because it is ;; Note: We call 'node-load' only as a last resort because it is
;; too costly to call it once for every machine. ;; too costly to call it once for every machine.
(let* ((session (false-if-exception (open-ssh-session best))) (let* ((session (false-if-exception (open-ssh-session best
%short-timeout)))
(node (and session (remote-inferior session))) (node (and session (remote-inferior session)))
(load (and node (normalized-load best (node-load node)))) (load (and node (normalized-load best (node-load node))))
(space (and node (node-free-disk-space node)))) (space (and node (node-free-disk-space node))))
@ -573,6 +578,11 @@ (define* (process-request wants-local? system drv features
;;; Installation tests. ;;; Installation tests.
;;; ;;;
(define %short-timeout
;; Timeout in seconds used on SSH connections where reads and writes
;; shouldn't take long.
15)
(define (assert-node-repl node name) (define (assert-node-repl node name)
"Bail out if NODE is not running Guile." "Bail out if NODE is not running Guile."
(match (node-guile-version node) (match (node-guile-version node)
@ -658,7 +668,7 @@ (define (build-machine=? m1 m2)
(length machines) machine-file) (length machines) machine-file)
(let* ((names (map build-machine-name machines)) (let* ((names (map build-machine-name machines))
(sockets (map build-machine-daemon-socket machines)) (sockets (map build-machine-daemon-socket machines))
(sessions (map open-ssh-session machines)) (sessions (map (cut open-ssh-session <> %short-timeout) machines))
(nodes (map remote-inferior sessions))) (nodes (map remote-inferior sessions)))
(for-each assert-node-has-guix nodes names) (for-each assert-node-has-guix nodes names)
(for-each assert-node-repl nodes names) (for-each assert-node-repl nodes names)
@ -682,7 +692,7 @@ (define (build-machine=? m1 m2)
(length machines) machine-file) (length machines) machine-file)
(for-each (lambda (machine) (for-each (lambda (machine)
(define session (define session
(open-ssh-session machine)) (open-ssh-session machine %short-timeout))
(match (remote-inferior session) (match (remote-inferior session)
(#f (#f