deduplicate: Avoid traversing directories twice.

Until now, we'd call (nar-sha256 file) unconditionally.  Thus, if FILE
was a directory, we would traverse it for no reason, and then call
'deduplicate' on FILE, which would again traverse it.

This change also removes redundant (mkdir-p store) calls from the loop,
and avoids 'lstat' calls by using 'scandir*'.

* guix/store/deduplication.scm (deduplicate): Add named loop.  Move
'mkdir-p' outside the loop.  Use 'scandir*' instead of 'scandir'.  Do
not call 'nar-sha256' when FILE has type 'directory.
This commit is contained in:
Ludovic Courtès 2020-06-22 12:29:15 +02:00
parent a0d419e639
commit fe5de925aa
No known key found for this signature in database
GPG key ID: 090B11993D9AEBB5

View file

@ -23,10 +23,12 @@
(define-module (guix store deduplication)
#:use-module (gcrypt hash)
#:use-module (guix build utils)
#:use-module (guix build syscalls)
#:use-module (guix base32)
#:use-module (srfi srfi-11)
#:use-module (rnrs io ports)
#:use-module (ice-9 ftw)
#:use-module (ice-9 match)
#:use-module (guix serialization)
#:export (nar-sha256
deduplicate))
@ -138,40 +140,47 @@ (define* (deduplicate path hash #:key (store %store-directory))
replace PATH with a hardlink to the already-existing one. If not, register
PATH so that future duplicates can hardlink to it. PATH is assumed to be
under STORE."
(let* ((links-directory (string-append store "/.links"))
(link-file (string-append links-directory "/"
(bytevector->nix-base32-string hash))))
(mkdir-p links-directory)
(if (eq? 'directory (stat:type (lstat path)))
(define links-directory
(string-append store "/.links"))
(mkdir-p links-directory)
(let loop ((path path)
(type (stat:type (lstat path)))
(hash hash))
(if (eq? 'directory type)
;; Can't hardlink directories, so hardlink their atoms.
(for-each (lambda (file)
(unless (or (member file '("." ".."))
(and (string=? path store)
(string=? file ".links")))
(let ((file (string-append path "/" file)))
(deduplicate file (nar-sha256 file)
#:store store))))
(scandir path))
(if (file-exists? link-file)
(replace-with-link link-file path
#:swap-directory links-directory)
(catch 'system-error
(lambda ()
(link path link-file))
(lambda args
(let ((errno (system-error-errno args)))
(cond ((= errno EEXIST)
;; Someone else put an entry for PATH in
;; LINKS-DIRECTORY before we could. Let's use it.
(replace-with-link path link-file
#:swap-directory links-directory))
((= errno ENOSPC)
;; There's not enough room in the directory index for
;; more entries in .links, but that's fine: we can
;; just stop.
#f)
((= errno EMLINK)
;; PATH has reached the maximum number of links, but
;; that's OK: we just can't deduplicate it more.
#f)
(else (apply throw args))))))))))
(for-each (match-lambda
((file . properties)
(unless (member file '("." ".."))
(let* ((file (string-append path "/" file))
(type (or (assq-ref properties 'type)
(stat:type (lstat file)))))
(loop file type
(and (not (eq? 'directory type))
(nar-sha256 file)))))))
(scandir* path))
(let ((link-file (string-append links-directory "/"
(bytevector->nix-base32-string hash))))
(if (file-exists? link-file)
(replace-with-link link-file path
#:swap-directory links-directory)
(catch 'system-error
(lambda ()
(link path link-file))
(lambda args
(let ((errno (system-error-errno args)))
(cond ((= errno EEXIST)
;; Someone else put an entry for PATH in
;; LINKS-DIRECTORY before we could. Let's use it.
(replace-with-link path link-file
#:swap-directory links-directory))
((= errno ENOSPC)
;; There's not enough room in the directory index for
;; more entries in .links, but that's fine: we can
;; just stop.
#f)
((= errno EMLINK)
;; PATH has reached the maximum number of links, but
;; that's OK: we just can't deduplicate it more.
#f)
(else (apply throw args)))))))))))