deduplicate: Avoid traversing directories twice.

Until now, we'd call (nar-sha256 file) unconditionally.  Thus, if FILE
was a directory, we would traverse it for no reason, and then call
'deduplicate' on FILE, which would again traverse it.

This change also removes redundant (mkdir-p store) calls from the loop,
and avoids 'lstat' calls by using 'scandir*'.

* guix/store/deduplication.scm (deduplicate): Add named loop.  Move
'mkdir-p' outside the loop.  Use 'scandir*' instead of 'scandir'.  Do
not call 'nar-sha256' when FILE has type 'directory.
This commit is contained in:
Ludovic Courtès 2020-06-22 12:29:15 +02:00
parent a0d419e639
commit fe5de925aa
No known key found for this signature in database
GPG key ID: 090B11993D9AEBB5

View file

@ -23,10 +23,12 @@
(define-module (guix store deduplication) (define-module (guix store deduplication)
#:use-module (gcrypt hash) #:use-module (gcrypt hash)
#:use-module (guix build utils) #:use-module (guix build utils)
#:use-module (guix build syscalls)
#:use-module (guix base32) #:use-module (guix base32)
#:use-module (srfi srfi-11) #:use-module (srfi srfi-11)
#:use-module (rnrs io ports) #:use-module (rnrs io ports)
#:use-module (ice-9 ftw) #:use-module (ice-9 ftw)
#:use-module (ice-9 match)
#:use-module (guix serialization) #:use-module (guix serialization)
#:export (nar-sha256 #:export (nar-sha256
deduplicate)) deduplicate))
@ -138,20 +140,27 @@ (define* (deduplicate path hash #:key (store %store-directory))
replace PATH with a hardlink to the already-existing one. If not, register replace PATH with a hardlink to the already-existing one. If not, register
PATH so that future duplicates can hardlink to it. PATH is assumed to be PATH so that future duplicates can hardlink to it. PATH is assumed to be
under STORE." under STORE."
(let* ((links-directory (string-append store "/.links")) (define links-directory
(link-file (string-append links-directory "/" (string-append store "/.links"))
(bytevector->nix-base32-string hash))))
(mkdir-p links-directory) (mkdir-p links-directory)
(if (eq? 'directory (stat:type (lstat path))) (let loop ((path path)
(type (stat:type (lstat path)))
(hash hash))
(if (eq? 'directory type)
;; Can't hardlink directories, so hardlink their atoms. ;; Can't hardlink directories, so hardlink their atoms.
(for-each (lambda (file) (for-each (match-lambda
(unless (or (member file '("." "..")) ((file . properties)
(and (string=? path store) (unless (member file '("." ".."))
(string=? file ".links"))) (let* ((file (string-append path "/" file))
(let ((file (string-append path "/" file))) (type (or (assq-ref properties 'type)
(deduplicate file (nar-sha256 file) (stat:type (lstat file)))))
#:store store)))) (loop file type
(scandir path)) (and (not (eq? 'directory type))
(nar-sha256 file)))))))
(scandir* path))
(let ((link-file (string-append links-directory "/"
(bytevector->nix-base32-string hash))))
(if (file-exists? link-file) (if (file-exists? link-file)
(replace-with-link link-file path (replace-with-link link-file path
#:swap-directory links-directory) #:swap-directory links-directory)
@ -174,4 +183,4 @@ (define* (deduplicate path hash #:key (store %store-directory))
;; PATH has reached the maximum number of links, but ;; PATH has reached the maximum number of links, but
;; that's OK: we just can't deduplicate it more. ;; that's OK: we just can't deduplicate it more.
#f) #f)
(else (apply throw args)))))))))) (else (apply throw args)))))))))))