lint: archival: Lookup content in Disarchive database.

* guix/lint.scm (lookup-disarchive-spec): New procedure.
(check-archival): When 'lookup-content' returns #f, call
'lookup-disarchive-spec'.  Call 'lookup-directory' on the result of
'lookup-directory'.
* guix/download.scm (%disarchive-mirrors): Make public.
* tests/lint.scm ("archival: missing content"): Set
'%disarchive-mirrors'.
("archival: content unavailable but disarchive available"): New test.
This commit is contained in:
Ludovic Courtès 2021-05-15 12:19:03 +02:00
parent dac6c21623
commit bc4d81d267
No known key found for this signature in database
GPG key ID: 090B11993D9AEBB5
3 changed files with 89 additions and 8 deletions

View file

@ -35,6 +35,7 @@ (define-module (guix download)
#:use-module (srfi srfi-1)
#:use-module (srfi srfi-26)
#:export (%mirrors
%disarchive-mirrors
(url-fetch* . url-fetch)
url-fetch/executable
url-fetch/tarbomb

View file

@ -30,6 +30,7 @@
(define-module (guix lint)
#:use-module (guix store)
#:autoload (guix base16) (bytevector->base16-string)
#:use-module (guix base32)
#:use-module (guix diagnostics)
#:use-module (guix download)
@ -1227,6 +1228,43 @@ (define (check-for-updates package)
#:field 'source)))))))
(define (lookup-disarchive-spec hash)
"If Disarchive mirrors have a spec for HASH, return the list of SWH
directory identifiers the spec refers to. Otherwise return #f."
(define (extract-swh-id spec)
;; Return the list of SWH directory identifiers SPEC refers to, where SPEC
;; is a Disarchive sexp. Instead of attempting to parse it, traverse it
;; in a pretty unintelligent fashion.
(let loop ((sexp spec)
(ids '()))
(match sexp
((? string? str)
(let ((prefix "swh:1:dir:"))
(if (string-prefix? prefix str)
(cons (string-drop str (string-length prefix)) ids)
ids)))
((head tail ...)
(loop tail (loop head ids)))
(_ ids))))
(any (lambda (mirror)
(with-networking-fail-safe
(format #f (G_ "failed to access Disarchive database at ~a")
mirror)
#f
(guard (c ((http-get-error? c) #f))
(let* ((url (string-append mirror
(symbol->string
(content-hash-algorithm hash))
"/"
(bytevector->base16-string
(content-hash-value hash))))
(port (http-fetch (string->uri url) #:text? #t))
(spec (read port)))
(close-port port)
(extract-swh-id spec)))))
%disarchive-mirrors))
(define (check-archival package)
"Check whether PACKAGE's source code is archived on Software Heritage. If
it's not, and if its source code is a VCS snapshot, then send a \"save\"
@ -1302,10 +1340,26 @@ (define commit
(symbol->string
(content-hash-algorithm hash)))
(#f
(list (make-warning package
(G_ "source not archived on Software \
Heritage")
#:field 'source)))
;; If SWH doesn't have HASH as is, it may be because it's
;; a hand-crafted tarball. In that case, check whether
;; the Disarchive database has an entry for that tarball.
(match (lookup-disarchive-spec hash)
(#f
(list (make-warning package
(G_ "source not archived on Software \
Heritage and missing from the Disarchive database")
#:field 'source)))
(directory-ids
(match (find (lambda (id)
(not (lookup-directory id)))
directory-ids)
(#f '())
(id
(list (make-warning package
(G_ "
Disarchive entry refers to non-existent SWH directory '~a'")
(list id)
#:field 'source)))))))
((? content?)
'())))
'()))))

View file

@ -1,7 +1,7 @@
;;; GNU Guix --- Functional package management for GNU
;;; Copyright © 2012, 2013 Cyril Roelandt <tipecaml@gmail.com>
;;; Copyright © 2014, 2015, 2016 Eric Bavier <bavier@member.fsf.org>
;;; Copyright © 2014, 2015, 2016, 2017, 2018, 2019, 2020 Ludovic Courtès <ludo@gnu.org>
;;; Copyright © 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021 Ludovic Courtès <ludo@gnu.org>
;;; Copyright © 2015, 2016 Mathieu Lirzin <mthl@gnu.org>
;;; Copyright © 2016 Hartmut Goebel <h.goebel@crazy-compilers.com>
;;; Copyright © 2017 Alex Kost <alezost@gmail.com>
@ -1008,10 +1008,13 @@ (define (warning-contains? str warnings)
(method url-fetch)
(uri "http://example.org/foo.tgz")
(sha256 (make-bytevector 32))))
(warnings (with-http-server '((404 "Not archived."))
(warnings (with-http-server '((404 "Not archived.")
(404 "Not in Disarchive database."))
(parameterize ((%swh-base-url (%local-url)))
(check-archival (dummy-package "x"
(source origin)))))))
(mock ((guix download) %disarchive-mirrors
(list (%local-url)))
(check-archival (dummy-package "x"
(source origin))))))))
(warning-contains? "not archived" warnings)))
(test-equal "archival: content available"
@ -1027,6 +1030,29 @@ (define (warning-contains? str warnings)
(parameterize ((%swh-base-url (%local-url)))
(check-archival (dummy-package "x" (source origin)))))))
(test-equal "archival: content unavailable but disarchive available"
'()
(let* ((origin (origin
(method url-fetch)
(uri "http://example.org/foo.tgz")
(sha256 (make-bytevector 32))))
(disarchive (object->string
'(disarchive (version 0)
...
"swh:1:dir:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")))
;; https://archive.softwareheritage.org/api/1/directory/
(directory "[ { \"checksums\": {},
\"dir_id\": \"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\",
\"type\": \"file\",
\"name\": \"README\"
\"length\": 42 } ]"))
(with-http-server `((404 "") ;lookup-content
(200 ,disarchive) ;Disarchive database lookup
(200 ,directory)) ;lookup-directory
(mock ((guix download) %disarchive-mirrors (list (%local-url)))
(parameterize ((%swh-base-url (%local-url)))
(check-archival (dummy-package "x" (source origin))))))))
(test-assert "archival: missing revision"
(let* ((origin (origin
(method git-fetch)