union: Rewrite to be faster; handle symlink/directory conflicts.

* guix/build/union.scm: Rewrite; only 'file=?' remains unchanged.  Remove
  'tree-union' and 'delete-duplicate-leaves' exports.  Merge inputs in a
  breadth-first fashion.  Follow symlinks for purposes of making decisions
  about the merge.

* tests/union.scm: Remove tests of 'tree-union' and 'delete-duplicate-leaves'.
This commit is contained in:
Mark H Weaver 2014-03-28 03:54:01 -04:00
parent 8ead71b4b0
commit 1212999868
2 changed files with 78 additions and 201 deletions

@ -1,5 +1,6 @@
;;; GNU Guix --- Functional package management for GNU ;;; GNU Guix --- Functional package management for GNU
;;; Copyright © 2012, 2013, 2014 Ludovic Courtès <ludo@gnu.org> ;;; Copyright © 2012, 2013, 2014 Ludovic Courtès <ludo@gnu.org>
;;; Copyright © 2014 Mark H Weaver <mhw@netris.org>
;;; ;;;
;;; This file is part of GNU Guix. ;;; This file is part of GNU Guix.
;;; ;;;
@ -17,16 +18,13 @@
;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>. ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
(define-module (guix build union) (define-module (guix build union)
#:use-module (ice-9 ftw)
#:use-module (ice-9 match) #:use-module (ice-9 match)
#:use-module (ice-9 format) #:use-module (ice-9 format)
#:use-module (srfi srfi-1) #:use-module (srfi srfi-1)
#:use-module (srfi srfi-26) #:use-module (srfi srfi-26)
#:use-module (rnrs bytevectors) #:use-module (rnrs bytevectors)
#:use-module (rnrs io ports) #:use-module (rnrs io ports)
#:export (tree-union #:export (union-build))
delete-duplicate-leaves
union-build))
;;; Commentary: ;;; Commentary:
;;; ;;;
@ -35,72 +33,20 @@
;;; ;;;
;;; Code: ;;; Code:
(define (tree-union trees) (define (files-in-directory dirname)
"Return a tree that is the union of the trees listed in TREES. Each (let ((dir (opendir dirname)))
tree has the form (PARENT LEAVES ...) or just LEAF, where each leaf is (let loop ((files '()))
itself a tree. " (match (readdir dir)
(let loop ((trees trees)) ((or "." "..")
(match trees (loop files))
(() ; nothing left ((? eof-object?)
'()) (closedir dir)
(_ (sort files string<?))
(let ((dirs (filter pair? trees)) (file
(leaves (remove pair? trees))) (loop (cons file files)))))))
`(,@leaves
,@(fold (lambda (dir result)
(cons `(,dir
,@(loop
(concatenate
(filter-map (match-lambda
((head children ...)
(and (equal? head dir)
children)))
dirs))))
result))
'()
(delete-duplicates (map car dirs)))))))))
(define* (delete-duplicate-leaves tree (define (file-is-directory? file)
#:optional (eq? 'directory (stat:type (stat file))))
(leaf=? equal?)
(delete-duplicates (match-lambda
((head _ ...) head))))
"Delete duplicate leaves from TREE. Two leaves are considered equal
when LEAF=? applied to them returns #t. Each collision (list of leaves
that are LEAF=?) is passed to DELETE-DUPLICATES, which must return a
single leaf."
(let loop ((tree tree))
(match tree
((dir children ...)
(let ((dirs (filter pair? children))
(leaves (remove pair? children)))
(define collisions
(fold (lambda (leaf result)
(define same?
(cut leaf=? leaf <>))
(if (any (cut find same? <>) result)
result
(match (filter same? leaves)
((_)
result)
((collision ...)
(cons collision result)))))
'()
leaves))
(define non-collisions
(filter (lambda (leaf)
(match (filter (cut leaf=? leaf <>) leaves)
((_) #t)
((_ _ ..1) #f)))
leaves))
`(,dir
,@non-collisions
,@(map delete-duplicates collisions)
,@(map loop dirs))))
(leaf leaf))))
(define (file=? file1 file2) (define (file=? file1 file2)
"Return #t if FILE1 and FILE2 are regular files and their contents are "Return #t if FILE1 and FILE2 are regular files and their contents are
@ -124,110 +70,82 @@ identical, #f otherwise."
(or (eof-object? n1) (or (eof-object? n1)
(loop)))))))))))) (loop))))))))))))
(define* (union-build output directories (define* (union-build output inputs
#:key (log-port (current-error-port))) #:key (log-port (current-error-port)))
"Build in the OUTPUT directory a symlink tree that is the union of all "Build in the OUTPUT directory a symlink tree that is the union of all
the DIRECTORIES." the INPUTS."
(define (file-tree dir)
;; Return the contents of DIR as a tree.
(define (others-have-it? subdir) (define (symlink* input output)
;; Return #t if other elements of DIRECTORIES have SUBDIR. (format log-port "`~a' ~~> `~a'~%" input output)
(let ((subdir (substring subdir (string-length dir)))) (symlink input output))
(any (lambda (other)
(and (not (string=? other dir))
(file-exists? (string-append other "/" subdir))))
directories)))
(match (file-system-fold (lambda (subdir stat result) ; enter? (define (resolve-collisions output dirs files)
;; No need to traverse DIR since there's (cond ((null? dirs)
;; nothing to union it with. Thus, we avoid ;; The inputs are all files.
;; creating a gazillon symlinks (think (format (current-error-port)
;; share/emacs/24.3, share/texmf, etc.) "warning: collision encountered: ~{~a ~}~%"
(or (string=? subdir dir) files)
(others-have-it? subdir)))
(lambda (file stat result) ; leaf
(match result
(((siblings ...) rest ...)
`((,file ,@siblings) ,@rest))))
(lambda (dir stat result) ; down
`(() ,@result))
(lambda (dir stat result) ; up
(match result
(((leaves ...) (siblings ...) rest ...)
`(((,(basename dir) ,@leaves) ,@siblings)
,@rest))))
(lambda (dir stat result) ; skip
;; DIR is not available elsewhere, so treat it
;; as a leaf.
(match result
(((siblings ...) rest ...)
`((,dir ,@siblings) ,@rest))))
(lambda (file stat errno result)
(format (current-error-port) "union-build: ~a: ~a~%"
file (strerror errno)))
'(())
dir)
(((tree)) tree)
(() #f)))
(define tree-leaves (let ((file (first files)))
;; Return the leaves of the given tree. ;; TODO: Implement smarter strategies.
(match-lambda (format (current-error-port)
(((? string?) leaves ...) "warning: arbitrarily choosing ~a~%"
leaves))) file)
(define (leaf=? a b) (symlink* file output)))
(equal? (basename a) (basename b)))
(define (resolve-collision leaves) (else
;; LEAVES all have the same basename, so choose one of them. ;; The inputs are a mixture of files and directories
(match (delete-duplicates leaves string=?) (error "union-build: collision between file and directories"
((one-and-the-same) `((files ,files) (dirs ,dirs))))))
;; LEAVES all actually point to the same file, so nothing to worry
;; about.
one-and-the-same)
((and lst (head rest ...))
;; A real collision, unless those files are all identical.
(unless (every (cut file=? head <>) rest)
(format (current-error-port) "warning: collision encountered: ~{~a ~}~%"
lst)
;; TODO: Implement smarter strategies. (define (union output inputs)
(format (current-error-port) "warning: arbitrarily choosing ~a~%" (match inputs
head)) ((input)
head))) ;; There's only one input, so just make a link.
(symlink* input output))
(_
(call-with-values (lambda () (partition file-is-directory? inputs))
(match-lambda*
((dirs ())
;; All inputs are directories. Create a new directory
;; where we will merge the input directories.
(mkdir output)
;; Build a hash table mapping each file to a list of input
;; directories containing that file.
(let ((table (make-hash-table)))
(define (add-to-table! file dir)
(hash-set! table file (cons dir (hash-ref table file '()))))
;; Populate the table.
(for-each (lambda (dir)
(for-each (cut add-to-table! <> dir)
(files-in-directory dir)))
dirs)
;; Now iterate over the table and recursively
;; perform a union for each entry.
(hash-for-each (lambda (file dirs-with-file)
(union (string-append output "/" file)
(map (cut string-append <> "/" file)
(reverse dirs-with-file))))
table)))
((() (file (? (cut file=? <> file)) ...))
;; There are no directories, and all files have the same contents,
;; so there's no conflict.
(symlink* file output))
((dirs files)
(resolve-collisions output dirs files)))))))
(setvbuf (current-output-port) _IOLBF) (setvbuf (current-output-port) _IOLBF)
(setvbuf (current-error-port) _IOLBF) (setvbuf (current-error-port) _IOLBF)
(when (file-port? log-port) (when (file-port? log-port)
(setvbuf log-port _IOLBF)) (setvbuf log-port _IOLBF))
(mkdir output) (union output (delete-duplicates inputs)))
(let loop ((tree (delete-duplicate-leaves
(cons "."
(tree-union
(append-map (compose tree-leaves file-tree)
(delete-duplicates directories))))
leaf=?
resolve-collision))
(dir '()))
(match tree
((? string?)
;; A leaf: create a symlink.
(let* ((dir (string-join dir "/"))
(target (string-append output "/" dir "/" (basename tree))))
(format log-port "`~a' ~~> `~a'~%" tree target)
(symlink tree target)))
(((? string? subdir) leaves ...)
;; A sub-directory: create it in OUTPUT, and iterate over LEAVES.
(unless (string=? subdir ".")
(let ((dir (string-join dir "/")))
(mkdir (string-append output "/" dir "/" subdir))))
(for-each (cute loop <> `(,@dir ,subdir))
leaves))
((leaves ...)
;; A series of leaves: iterate over them.
(for-each (cut loop <> dir) leaves)))))
;;; union.scm ends here ;;; union.scm ends here

@ -43,47 +43,6 @@
(test-begin "union") (test-begin "union")
(test-equal "tree-union, empty"
'()
(tree-union '()))
(test-equal "tree-union, leaves only"
'(a b c d)
(tree-union '(a b c d)))
(test-equal "tree-union, simple"
'((bin ls touch make awk gawk))
(tree-union '((bin ls touch)
(bin make)
(bin awk gawk))))
(test-equal "tree-union, several levels"
'((share (doc (make README) (coreutils README)))
(bin ls touch make))
(tree-union '((bin ls touch)
(share (doc (coreutils README)))
(bin make)
(share (doc (make README))))))
(test-equal "delete-duplicate-leaves, default"
'(bin make touch ls)
(delete-duplicate-leaves '(bin ls make touch ls)))
(test-equal "delete-duplicate-leaves, file names"
'("doc" ("info"
"/binutils/ld.info"
"/gcc/gcc.info"
"/binutils/standards.info"))
(let ((leaf=? (lambda (a b)
(string=? (basename a) (basename b)))))
(delete-duplicate-leaves '("doc"
("info"
"/binutils/ld.info"
"/binutils/standards.info"
"/gcc/gcc.info"
"/gcc/standards.info"))
leaf=?)))
(test-skip (if (and %store (test-skip (if (and %store
(false-if-exception (false-if-exception
(getaddrinfo "www.gnu.org" "80" AI_NUMERICSERV))) (getaddrinfo "www.gnu.org" "80" AI_NUMERICSERV)))