From fe5de925aa0f2854a679cebdea02b03cac561c8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ludovic=20Court=C3=A8s?= Date: Mon, 22 Jun 2020 12:29:15 +0200 Subject: [PATCH] deduplicate: Avoid traversing directories twice. Until now, we'd call (nar-sha256 file) unconditionally. Thus, if FILE was a directory, we would traverse it for no reason, and then call 'deduplicate' on FILE, which would again traverse it. This change also removes redundant (mkdir-p store) calls from the loop, and avoids 'lstat' calls by using 'scandir*'. * guix/store/deduplication.scm (deduplicate): Add named loop. Move 'mkdir-p' outside the loop. Use 'scandir*' instead of 'scandir'. Do not call 'nar-sha256' when FILE has type 'directory. --- guix/store/deduplication.scm | 81 ++++++++++++++++++++---------------- 1 file changed, 45 insertions(+), 36 deletions(-) diff --git a/guix/store/deduplication.scm b/guix/store/deduplication.scm index 80868692c0..6784ee0b92 100644 --- a/guix/store/deduplication.scm +++ b/guix/store/deduplication.scm @@ -23,10 +23,12 @@ (define-module (guix store deduplication) #:use-module (gcrypt hash) #:use-module (guix build utils) + #:use-module (guix build syscalls) #:use-module (guix base32) #:use-module (srfi srfi-11) #:use-module (rnrs io ports) #:use-module (ice-9 ftw) + #:use-module (ice-9 match) #:use-module (guix serialization) #:export (nar-sha256 deduplicate)) @@ -138,40 +140,47 @@ (define* (deduplicate path hash #:key (store %store-directory)) replace PATH with a hardlink to the already-existing one. If not, register PATH so that future duplicates can hardlink to it. PATH is assumed to be under STORE." - (let* ((links-directory (string-append store "/.links")) - (link-file (string-append links-directory "/" - (bytevector->nix-base32-string hash)))) - (mkdir-p links-directory) - (if (eq? 'directory (stat:type (lstat path))) + (define links-directory + (string-append store "/.links")) + + (mkdir-p links-directory) + (let loop ((path path) + (type (stat:type (lstat path))) + (hash hash)) + (if (eq? 'directory type) ;; Can't hardlink directories, so hardlink their atoms. - (for-each (lambda (file) - (unless (or (member file '("." "..")) - (and (string=? path store) - (string=? file ".links"))) - (let ((file (string-append path "/" file))) - (deduplicate file (nar-sha256 file) - #:store store)))) - (scandir path)) - (if (file-exists? link-file) - (replace-with-link link-file path - #:swap-directory links-directory) - (catch 'system-error - (lambda () - (link path link-file)) - (lambda args - (let ((errno (system-error-errno args))) - (cond ((= errno EEXIST) - ;; Someone else put an entry for PATH in - ;; LINKS-DIRECTORY before we could. Let's use it. - (replace-with-link path link-file - #:swap-directory links-directory)) - ((= errno ENOSPC) - ;; There's not enough room in the directory index for - ;; more entries in .links, but that's fine: we can - ;; just stop. - #f) - ((= errno EMLINK) - ;; PATH has reached the maximum number of links, but - ;; that's OK: we just can't deduplicate it more. - #f) - (else (apply throw args)))))))))) + (for-each (match-lambda + ((file . properties) + (unless (member file '("." "..")) + (let* ((file (string-append path "/" file)) + (type (or (assq-ref properties 'type) + (stat:type (lstat file))))) + (loop file type + (and (not (eq? 'directory type)) + (nar-sha256 file))))))) + (scandir* path)) + (let ((link-file (string-append links-directory "/" + (bytevector->nix-base32-string hash)))) + (if (file-exists? link-file) + (replace-with-link link-file path + #:swap-directory links-directory) + (catch 'system-error + (lambda () + (link path link-file)) + (lambda args + (let ((errno (system-error-errno args))) + (cond ((= errno EEXIST) + ;; Someone else put an entry for PATH in + ;; LINKS-DIRECTORY before we could. Let's use it. + (replace-with-link path link-file + #:swap-directory links-directory)) + ((= errno ENOSPC) + ;; There's not enough room in the directory index for + ;; more entries in .links, but that's fine: we can + ;; just stop. + #f) + ((= errno EMLINK) + ;; PATH has reached the maximum number of links, but + ;; that's OK: we just can't deduplicate it more. + #f) + (else (apply throw args)))))))))))