import: pypi: Improve parsing of requirement specifications.

The previous solution was fragile and could leave unwanted characters in a
requirement name, such as '[' or ']'.

Partially fixes <https://bugs.gnu.org/33047>.

* guix/import/pypi.scm (use-modules): Export SPECIFICATION->REQUIREMENT-NAME
(%requirement-name-regexp): New variable.
(clean-requirement): Rename to...
(specification->requirement-name): this, which now uses
%requirement-name-regexp to select the requirement name from the requirement
specification.
(parse-requires.txt): Adapt.
This commit is contained in:
Maxim Cournoyer 2019-03-28 00:26:01 -04:00
parent c4797121be
commit 803fb336d6
No known key found for this signature in database
GPG key ID: 1260E46482E63562
2 changed files with 52 additions and 14 deletions

View file

@ -48,6 +48,7 @@ (define-module (guix import pypi)
#:use-module ((guix licenses) #:prefix license:) #:use-module ((guix licenses) #:prefix license:)
#:use-module (guix build-system python) #:use-module (guix build-system python)
#:export (parse-requires.txt #:export (parse-requires.txt
specification->requirement-name
guix-package->pypi-name guix-package->pypi-name
pypi-recursive-import pypi-recursive-import
pypi->guix-package pypi->guix-package
@ -118,22 +119,47 @@ (define (maybe-inputs package-inputs)
((package-inputs ...) ((package-inputs ...)
`((propagated-inputs (,'quasiquote ,package-inputs)))))) `((propagated-inputs (,'quasiquote ,package-inputs))))))
(define (clean-requirement s) (define %requirement-name-regexp
;; Given a requirement LINE, as can be found in a setuptools requires.txt ;; Regexp to match the requirement name in a requirement specification.
;; file, remove everything other than the actual name of the required
;; package, and return it. ;; Some grammar, taken from PEP-0508 (see:
(cond ;; https://www.python.org/dev/peps/pep-0508/).
((string-index s (char-set #\space #\> #\= #\<)) => (cut string-take s <>))
(else s))) ;; Using this grammar makes the PEP-0508 regexp easier to understand for
;; humans. The use of a regexp is preferred to more primitive string
;; manipulations because we can more directly match what upstream uses
;; (again, per PEP-0508). The regexp approach is also easier to extend,
;; should we want to implement more completely the grammar of PEP-0508.
;; The unified rule can be expressed as:
;; specification = wsp* ( url_req | name_req ) wsp*
;; where url_req is:
;; url_req = name wsp* extras? wsp* urlspec wsp+ quoted_marker?
;; and where name_req is:
;; name_req = name wsp* extras? wsp* versionspec? wsp* quoted_marker?
;; Thus, we need only matching NAME, which is expressed as:
;; identifer_end = letterOrDigit | (('-' | '_' | '.' )* letterOrDigit)
;; identifier = letterOrDigit identifier_end*
;; name = identifier
(let* ((letter-or-digit "[A-Za-z0-9]")
(identifier-end (string-append "(" letter-or-digit "|"
"[-_.]*" letter-or-digit ")"))
(identifier (string-append "^" letter-or-digit identifier-end "*"))
(name identifier))
(make-regexp name)))
(define (specification->requirement-name spec)
"Given a specification SPEC, return the requirement name."
(match:substring
(or (regexp-exec %requirement-name-regexp spec)
(error (G_ "Could not extract requirement name in spec:") spec))))
(define (parse-requires.txt requires.txt) (define (parse-requires.txt requires.txt)
"Given REQUIRES.TXT, a Setuptools requires.txt file, return a list of "Given REQUIRES.TXT, a Setuptools requires.txt file, return a list of
requirement names." requirement names."
;; This is a very incomplete parser, whose job is to select the non-optional
;; dependencies and strip them out of any version information.
;; Alternatively, we could implement a PEG parser with the (ice-9 peg)
;; library and the requirements grammar defined by PEP-0508
;; (https://www.python.org/dev/peps/pep-0508/).
(define (comment? line) (define (comment? line)
;; Return #t if the given LINE is a comment, #f otherwise. ;; Return #t if the given LINE is a comment, #f otherwise.
@ -156,7 +182,7 @@ (define (section-header? line)
((or (string-null? line) (comment? line)) ((or (string-null? line) (comment? line))
(loop result)) (loop result))
(else (else
(loop (cons (clean-requirement line) (loop (cons (specification->requirement-name line)
result)))))))))) result))))))))))
(define (guess-requirements source-url wheel-url tarball) (define (guess-requirements source-url wheel-url tarball)
@ -198,7 +224,7 @@ (define (read-wheel-metadata wheel-archive)
(hash-ref (list-ref run_requires 0) (hash-ref (list-ref run_requires 0)
"requires") "requires")
'()))) '())))
(map clean-requirement requirements))))) (map specification->requirement-name requirements)))))
(lambda () (lambda ()
(delete-file json-file) (delete-file json-file)
(rmdir dirname)))))) (rmdir dirname))))))

View file

@ -55,6 +55,14 @@ (define test-json
(define test-source-hash (define test-source-hash
"") "")
(define test-specifications
'("Fizzy [foo, bar]"
"PickyThing<1.6,>1.9,!=1.9.6,<2.0a0,==2.4c1"
"SomethingWithMarker[foo]>1.0;python_version<\"2.7\""
"requests [security,tests] >= 2.8.1, == 2.8.* ; python_version < \"2.7\""
"pip @ https://github.com/pypa/pip/archive/1.3.1.zip#\
sha1=da9234ee9982d4bbb3c72346a6de940a148ea686"))
(define test-requires.txt "\ (define test-requires.txt "\
# A comment # A comment
# A comment after a space # A comment after a space
@ -109,6 +117,10 @@ (define test-metadata
(uri (list "https://bitheap.org/cram/cram-0.7.tar.gz" (uri (list "https://bitheap.org/cram/cram-0.7.tar.gz"
(pypi-uri "cram" "0.7")))))))) (pypi-uri "cram" "0.7"))))))))
(test-equal "specification->requirement-name"
'("Fizzy" "PickyThing" "SomethingWithMarker" "requests" "pip")
(map specification->requirement-name test-specifications))
(test-equal "parse-requires.txt, with sections" (test-equal "parse-requires.txt, with sections"
'("foo" "bar") '("foo" "bar")
(mock ((ice-9 ports) call-with-input-file (mock ((ice-9 ports) call-with-input-file