deduplicate: Avoid traversing directories twice.

Until now, we'd call (nar-sha256 file) unconditionally.  Thus, if FILE
was a directory, we would traverse it for no reason, and then call
'deduplicate' on FILE, which would again traverse it.

This change also removes redundant (mkdir-p store) calls from the loop,
and avoids 'lstat' calls by using 'scandir*'.

* guix/store/deduplication.scm (deduplicate): Add named loop.  Move
'mkdir-p' outside the loop.  Use 'scandir*' instead of 'scandir'.  Do
not call 'nar-sha256' when FILE has type 'directory.
This commit is contained in:
Ludovic Courtès 2020-06-22 12:29:15 +02:00
parent a0d419e639
commit fe5de925aa
No known key found for this signature in database
GPG key ID: 090B11993D9AEBB5

View file

@ -23,10 +23,12 @@
(define-module (guix store deduplication) (define-module (guix store deduplication)
#:use-module (gcrypt hash) #:use-module (gcrypt hash)
#:use-module (guix build utils) #:use-module (guix build utils)
#:use-module (guix build syscalls)
#:use-module (guix base32) #:use-module (guix base32)
#:use-module (srfi srfi-11) #:use-module (srfi srfi-11)
#:use-module (rnrs io ports) #:use-module (rnrs io ports)
#:use-module (ice-9 ftw) #:use-module (ice-9 ftw)
#:use-module (ice-9 match)
#:use-module (guix serialization) #:use-module (guix serialization)
#:export (nar-sha256 #:export (nar-sha256
deduplicate)) deduplicate))
@ -138,40 +140,47 @@ (define* (deduplicate path hash #:key (store %store-directory))
replace PATH with a hardlink to the already-existing one. If not, register replace PATH with a hardlink to the already-existing one. If not, register
PATH so that future duplicates can hardlink to it. PATH is assumed to be PATH so that future duplicates can hardlink to it. PATH is assumed to be
under STORE." under STORE."
(let* ((links-directory (string-append store "/.links")) (define links-directory
(link-file (string-append links-directory "/" (string-append store "/.links"))
(bytevector->nix-base32-string hash))))
(mkdir-p links-directory) (mkdir-p links-directory)
(if (eq? 'directory (stat:type (lstat path))) (let loop ((path path)
(type (stat:type (lstat path)))
(hash hash))
(if (eq? 'directory type)
;; Can't hardlink directories, so hardlink their atoms. ;; Can't hardlink directories, so hardlink their atoms.
(for-each (lambda (file) (for-each (match-lambda
(unless (or (member file '("." "..")) ((file . properties)
(and (string=? path store) (unless (member file '("." ".."))
(string=? file ".links"))) (let* ((file (string-append path "/" file))
(let ((file (string-append path "/" file))) (type (or (assq-ref properties 'type)
(deduplicate file (nar-sha256 file) (stat:type (lstat file)))))
#:store store)))) (loop file type
(scandir path)) (and (not (eq? 'directory type))
(if (file-exists? link-file) (nar-sha256 file)))))))
(replace-with-link link-file path (scandir* path))
#:swap-directory links-directory) (let ((link-file (string-append links-directory "/"
(catch 'system-error (bytevector->nix-base32-string hash))))
(lambda () (if (file-exists? link-file)
(link path link-file)) (replace-with-link link-file path
(lambda args #:swap-directory links-directory)
(let ((errno (system-error-errno args))) (catch 'system-error
(cond ((= errno EEXIST) (lambda ()
;; Someone else put an entry for PATH in (link path link-file))
;; LINKS-DIRECTORY before we could. Let's use it. (lambda args
(replace-with-link path link-file (let ((errno (system-error-errno args)))
#:swap-directory links-directory)) (cond ((= errno EEXIST)
((= errno ENOSPC) ;; Someone else put an entry for PATH in
;; There's not enough room in the directory index for ;; LINKS-DIRECTORY before we could. Let's use it.
;; more entries in .links, but that's fine: we can (replace-with-link path link-file
;; just stop. #:swap-directory links-directory))
#f) ((= errno ENOSPC)
((= errno EMLINK) ;; There's not enough room in the directory index for
;; PATH has reached the maximum number of links, but ;; more entries in .links, but that's fine: we can
;; that's OK: we just can't deduplicate it more. ;; just stop.
#f) #f)
(else (apply throw args)))))))))) ((= errno EMLINK)
;; PATH has reached the maximum number of links, but
;; that's OK: we just can't deduplicate it more.
#f)
(else (apply throw args)))))))))))