offload: Use (guix inferior) instead of (ssh dist node).
Using inferiors and thus 'guix repl' simplifies setup on build machines (no need to worry about GUILE_LOAD_PATH etc.) Furthermore, the 'guix repl -t machine' protocol running in a remote pipe addresses several issues with the current implementation of nodes and RREPLs in Guile-SSH: fewer round trips, doesn't leave a 'guile --listen' process behind it, stateless (since a new process is started each time), more efficient (the SSH channel can be reused), more reliable (no 'pgrep', 'pkill', and shellology; see <https://github.com/artyom-poptsov/guile-ssh/issues/11> as an example.) * guix/ssh.scm (inferior-remote-eval): New procedure. (send-files): Use it instead of 'make-node' and 'node-eval'. * guix/scripts/offload.scm (node-guile-version): New procedure. (node-free-disk-space, transfer-and-offload, node-load) (choose-build-machine, assert-node-has-guix): Use 'remote-inferior' instead of 'make-node' and 'inferior-eval' instead of 'node-eval'. (assert-node-can-import, assert-node-can-export): Likewise, and add 'session' parameter. (check-machine-availability): Likewise, and add calls to 'close-inferior' and 'disconnect!'. (check-machine-status): Likewise. * doc/guix.texi (Daemon Offload Setup): Remove bit related to 'guile' in $PATH and $GUILE_LOAD_PATH; mention 'guix' alone.
This commit is contained in:
parent
af15fe13b6
commit
ed7b44370f
@ -1051,13 +1051,11 @@ name, and they will be scheduled on matching build machines.
|
||||
@end table
|
||||
@end deftp
|
||||
|
||||
The @code{guile} command must be in the search path on the build
|
||||
machines. In addition, the Guix modules must be in
|
||||
@code{$GUILE_LOAD_PATH} on the build machine---you can check whether
|
||||
this is the case by running:
|
||||
The @command{guix} command must be in the search path on the build
|
||||
machines. You can check whether this is the case by running:
|
||||
|
||||
@example
|
||||
ssh build-machine guile -c "'(use-modules (guix config))'"
|
||||
ssh build-machine guix repl --version
|
||||
@end example
|
||||
|
||||
There is one last thing to do once @file{machines.scm} is in place. As
|
||||
|
@ -23,13 +23,12 @@
|
||||
#:use-module (ssh session)
|
||||
#:use-module (ssh channel)
|
||||
#:use-module (ssh popen)
|
||||
#:use-module (ssh dist)
|
||||
#:use-module (ssh dist node)
|
||||
#:use-module (ssh version)
|
||||
#:use-module (guix config)
|
||||
#:use-module (guix records)
|
||||
#:use-module (guix ssh)
|
||||
#:use-module (guix store)
|
||||
#:use-module (guix inferior)
|
||||
#:use-module (guix derivations)
|
||||
#:use-module ((guix serialization)
|
||||
#:select (nar-error? nar-error-file))
|
||||
@ -321,12 +320,15 @@ hook."
|
||||
(set-port-revealed! port 1)
|
||||
port))
|
||||
|
||||
(define (node-guile-version node)
|
||||
(inferior-eval '(version) node))
|
||||
|
||||
(define (node-free-disk-space node)
|
||||
"Return the free disk space, in bytes, in NODE's store."
|
||||
(node-eval node
|
||||
`(begin
|
||||
(use-modules (guix build syscalls))
|
||||
(free-disk-space ,(%store-prefix)))))
|
||||
(inferior-eval `(begin
|
||||
(use-modules (guix build syscalls))
|
||||
(free-disk-space ,(%store-prefix)))
|
||||
node))
|
||||
|
||||
(define* (transfer-and-offload drv machine
|
||||
#:key
|
||||
@ -367,8 +369,12 @@ MACHINE."
|
||||
(derivation-file-name drv)
|
||||
(build-machine-name machine)
|
||||
(nix-protocol-error-message c))
|
||||
(let* ((space (false-if-exception
|
||||
(node-free-disk-space (make-node session)))))
|
||||
(let* ((inferior (false-if-exception (remote-inferior session)))
|
||||
(space (false-if-exception
|
||||
(node-free-disk-space inferior))))
|
||||
|
||||
(when inferior
|
||||
(close-inferior inferior))
|
||||
|
||||
;; Use exit code 100 for a permanent build failure. The daemon
|
||||
;; interprets other non-zero codes as transient build failures.
|
||||
@ -417,11 +423,11 @@ of free disk space on '~a'~%")
|
||||
|
||||
(define (node-load node)
|
||||
"Return the load on NODE. Return +∞ if NODE is misbehaving."
|
||||
(let ((line (node-eval node
|
||||
'(begin
|
||||
(use-modules (ice-9 rdelim))
|
||||
(call-with-input-file "/proc/loadavg"
|
||||
read-string)))))
|
||||
(let ((line (inferior-eval '(begin
|
||||
(use-modules (ice-9 rdelim))
|
||||
(call-with-input-file "/proc/loadavg"
|
||||
read-string))
|
||||
node)))
|
||||
(if (eof-object? line)
|
||||
+inf.0 ;MACHINE does not respond, so assume it is infinitely loaded
|
||||
(match (string-tokenize line)
|
||||
@ -508,9 +514,10 @@ slot (which must later be released with 'release-build-slot'), or #f and #f."
|
||||
;; Note: We call 'node-load' only as a last resort because it is
|
||||
;; too costly to call it once for every machine.
|
||||
(let* ((session (false-if-exception (open-ssh-session best)))
|
||||
(node (and session (make-node session)))
|
||||
(node (and session (remote-inferior session)))
|
||||
(load (and node (normalized-load best (node-load node))))
|
||||
(space (and node (node-free-disk-space node))))
|
||||
(when node (close-inferior node))
|
||||
(when session (disconnect! session))
|
||||
(if (and node (< load 2.) (>= space %minimum-disk-space))
|
||||
(match others
|
||||
@ -613,18 +620,17 @@ If TIMEOUT is #f, simply evaluate EXP..."
|
||||
(#f
|
||||
(report-guile-error name))
|
||||
((? string? version)
|
||||
;; Note: The version string already contains the word "Guile".
|
||||
(info (G_ "'~a' is running ~a~%")
|
||||
(info (G_ "'~a' is running GNU Guile ~a~%")
|
||||
name (node-guile-version node)))))
|
||||
|
||||
(define (assert-node-has-guix node name)
|
||||
"Bail out if NODE lacks the (guix) module, or if its daemon is not running."
|
||||
(catch 'node-repl-error
|
||||
(lambda ()
|
||||
(match (node-eval node
|
||||
'(begin
|
||||
(use-modules (guix))
|
||||
(and add-text-to-store 'alright)))
|
||||
(match (inferior-eval '(begin
|
||||
(use-modules (guix))
|
||||
(and add-text-to-store 'alright))
|
||||
node)
|
||||
('alright #t)
|
||||
(_ (report-module-error name))))
|
||||
(lambda (key . args)
|
||||
@ -632,12 +638,12 @@ If TIMEOUT is #f, simply evaluate EXP..."
|
||||
|
||||
(catch 'node-repl-error
|
||||
(lambda ()
|
||||
(match (node-eval node
|
||||
'(begin
|
||||
(use-modules (guix))
|
||||
(with-store store
|
||||
(add-text-to-store store "test"
|
||||
"Hello, build machine!"))))
|
||||
(match (inferior-eval '(begin
|
||||
(use-modules (guix))
|
||||
(with-store store
|
||||
(add-text-to-store store "test"
|
||||
"Hello, build machine!")))
|
||||
node)
|
||||
((? string? str)
|
||||
(info (G_ "Guix is usable on '~a' (test returned ~s)~%")
|
||||
name str))
|
||||
@ -656,25 +662,23 @@ If TIMEOUT is #f, simply evaluate EXP..."
|
||||
(string-append name "-"
|
||||
(number->string (random 1000000 (force %random-state)))))
|
||||
|
||||
(define (assert-node-can-import node name daemon-socket)
|
||||
(define (assert-node-can-import session node name daemon-socket)
|
||||
"Bail out if NODE refuses to import our archives."
|
||||
(let ((session (node-session node)))
|
||||
(with-store store
|
||||
(let* ((item (add-text-to-store store "export-test" (nonce)))
|
||||
(remote (connect-to-remote-daemon session daemon-socket)))
|
||||
(with-store local
|
||||
(send-files local (list item) remote))
|
||||
(with-store store
|
||||
(let* ((item (add-text-to-store store "export-test" (nonce)))
|
||||
(remote (connect-to-remote-daemon session daemon-socket)))
|
||||
(with-store local
|
||||
(send-files local (list item) remote))
|
||||
|
||||
(if (valid-path? remote item)
|
||||
(info (G_ "'~a' successfully imported '~a'~%")
|
||||
name item)
|
||||
(leave (G_ "'~a' was not properly imported on '~a'~%")
|
||||
item name))))))
|
||||
(if (valid-path? remote item)
|
||||
(info (G_ "'~a' successfully imported '~a'~%")
|
||||
name item)
|
||||
(leave (G_ "'~a' was not properly imported on '~a'~%")
|
||||
item name)))))
|
||||
|
||||
(define (assert-node-can-export node name daemon-socket)
|
||||
(define (assert-node-can-export session node name daemon-socket)
|
||||
"Bail out if we cannot import signed archives from NODE."
|
||||
(let* ((session (node-session node))
|
||||
(remote (connect-to-remote-daemon session daemon-socket))
|
||||
(let* ((remote (connect-to-remote-daemon session daemon-socket))
|
||||
(item (add-text-to-store remote "import-test" (nonce name))))
|
||||
(with-store store
|
||||
(if (and (retrieve-files store (list item) remote)
|
||||
@ -701,11 +705,13 @@ machine."
|
||||
(let* ((names (map build-machine-name machines))
|
||||
(sockets (map build-machine-daemon-socket machines))
|
||||
(sessions (map open-ssh-session machines))
|
||||
(nodes (map make-node sessions)))
|
||||
(nodes (map remote-inferior sessions)))
|
||||
(for-each assert-node-repl nodes names)
|
||||
(for-each assert-node-has-guix nodes names)
|
||||
(for-each assert-node-can-import nodes names sockets)
|
||||
(for-each assert-node-can-export nodes names sockets))))
|
||||
(for-each assert-node-can-import sessions nodes names sockets)
|
||||
(for-each assert-node-can-export sessions nodes names sockets)
|
||||
(for-each close-inferior nodes)
|
||||
(for-each disconnect! sessions))))
|
||||
|
||||
(define (check-machine-status machine-file pred)
|
||||
"Print the load of each machine matching PRED in MACHINE-FILE."
|
||||
@ -722,10 +728,11 @@ machine."
|
||||
(length machines) machine-file)
|
||||
(for-each (lambda (machine)
|
||||
(let* ((session (open-ssh-session machine))
|
||||
(node (make-node session))
|
||||
(uts (node-eval node '(uname)))
|
||||
(load (node-load node))
|
||||
(free (node-free-disk-space node)))
|
||||
(inferior (remote-inferior session))
|
||||
(uts (inferior-eval '(uname) inferior))
|
||||
(load (node-load inferior))
|
||||
(free (node-free-disk-space inferior)))
|
||||
(close-inferior inferior)
|
||||
(disconnect! session)
|
||||
(format #t "~a~% kernel: ~a ~a~% architecture: ~a~%\
|
||||
host name: ~a~% normalized load: ~a~% free disk space: ~,2f MiB~%"
|
||||
|
32
guix/ssh.scm
32
guix/ssh.scm
@ -27,8 +27,6 @@
|
||||
#:use-module (ssh channel)
|
||||
#:use-module (ssh popen)
|
||||
#:use-module (ssh session)
|
||||
#:use-module (ssh dist)
|
||||
#:use-module (ssh dist node)
|
||||
#:use-module (srfi srfi-1)
|
||||
#:use-module (srfi srfi-11)
|
||||
#:use-module (srfi srfi-26)
|
||||
@ -102,6 +100,20 @@ Throw an error on failure."
|
||||
"guix" "repl" "-t" "machine")))
|
||||
(port->inferior pipe)))
|
||||
|
||||
(define (inferior-remote-eval exp session)
|
||||
"Evaluate EXP in a new inferior running in SESSION, and close the inferior
|
||||
right away."
|
||||
(let ((inferior (remote-inferior session)))
|
||||
(dynamic-wind
|
||||
(const #t)
|
||||
(lambda ()
|
||||
(inferior-eval exp inferior))
|
||||
(lambda ()
|
||||
;; Close INFERIOR right away to prevent finalization from happening in
|
||||
;; another thread at the wrong time (see
|
||||
;; <https://bugs.gnu.org/26976>.)
|
||||
(close-inferior inferior)))))
|
||||
|
||||
(define* (remote-daemon-channel session
|
||||
#:optional
|
||||
(socket-name
|
||||
@ -277,15 +289,15 @@ Return the list of store items actually sent."
|
||||
;; Compute the subset of FILES missing on SESSION and send them.
|
||||
(let* ((files (if recursive? (requisites local files) files))
|
||||
(session (channel-get-session (nix-server-socket remote)))
|
||||
(node (make-node session))
|
||||
(missing (node-eval node
|
||||
`(begin
|
||||
(use-modules (guix)
|
||||
(srfi srfi-1) (srfi srfi-26))
|
||||
(missing (inferior-remote-eval
|
||||
`(begin
|
||||
(use-modules (guix)
|
||||
(srfi srfi-1) (srfi srfi-26))
|
||||
|
||||
(with-store store
|
||||
(remove (cut valid-path? store <>)
|
||||
',files)))))
|
||||
(with-store store
|
||||
(remove (cut valid-path? store <>)
|
||||
',files)))
|
||||
session))
|
||||
(count (length missing))
|
||||
(sizes (map (lambda (item)
|
||||
(path-info-nar-size (query-path-info local item)))
|
||||
|
Loading…
Reference in New Issue
Block a user