mirror of
https://git.in.rschanz.org/ryan77627/guix.git
synced 2025-01-23 19:19:20 -05:00
utils: Allow text substitution even in the presence of NUL characters.
Fixes <https://issues.guix.gnu.org/30116>. Before this change, the presence of a NUL character on a line meant that the (glibc) regexp engine used by Guile would either 1. stop scanning the string or 2. crash with the error "string contains #\\nul character", depending on the locale used. This change works around this limitation by first replacing the NUL character by an unused Unicode code point, doing the substitution, then reverting the replacement. * guix/build/utils.scm (unused-private-use-code-point) (replace-char): New procedures. (substitute): Make use of the above procedures to work around the NUL character regexp engine limitation. * tests/build-utils.scm: Add tests. Co-authored-by: Maxim Cournoyer <maxim.cournoyer@gmail.com> Signed-off-by: Maxim Cournoyer <maxim.cournoyer@gmail.com>
This commit is contained in:
parent
8027711ee8
commit
485ac28235
2 changed files with 69 additions and 12 deletions
|
@ -2,7 +2,7 @@
|
|||
;;; Copyright © 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020 Ludovic Courtès <ludo@gnu.org>
|
||||
;;; Copyright © 2013 Andreas Enge <andreas@enge.fr>
|
||||
;;; Copyright © 2013 Nikita Karetnikov <nikita@karetnikov.org>
|
||||
;;; Copyright © 2015, 2018 Mark H Weaver <mhw@netris.org>
|
||||
;;; Copyright © 2015, 2018, 2021 Mark H Weaver <mhw@netris.org>
|
||||
;;; Copyright © 2018 Arun Isaac <arunisaac@systemreboot.net>
|
||||
;;; Copyright © 2018, 2019 Ricardo Wurmus <rekado@elephly.net>
|
||||
;;; Copyright © 2020 Efraim Flashner <efraim@flashner.co.il>
|
||||
|
@ -784,6 +784,31 @@ (define (with-atomic-file-replacement file proc)
|
|||
(lambda (key . args)
|
||||
(false-if-exception (delete-file template))))))
|
||||
|
||||
(define (unused-private-use-code-point s)
|
||||
"Find a code point within a Unicode Private Use Area that is not
|
||||
present in S, and return the corresponding character object. If one
|
||||
cannot be found, return false."
|
||||
(define (scan lo hi)
|
||||
(and (<= lo hi)
|
||||
(let ((c (integer->char lo)))
|
||||
(if (string-index s c)
|
||||
(scan (+ lo 1) hi)
|
||||
c))))
|
||||
(or (scan #xE000 #xF8FF)
|
||||
(scan #xF0000 #xFFFFD)
|
||||
(scan #x100000 #x10FFFD)))
|
||||
|
||||
(define (replace-char c1 c2 s)
|
||||
"Return a string which is equal to S except with all instances of C1
|
||||
replaced by C2. If C1 and C2 are equal, return S."
|
||||
(if (char=? c1 c2)
|
||||
s
|
||||
(string-map (lambda (c)
|
||||
(if (char=? c c1)
|
||||
c2
|
||||
c))
|
||||
s)))
|
||||
|
||||
(define (substitute file pattern+procs)
|
||||
"PATTERN+PROCS is a list of regexp/two-argument-procedure pairs. For each
|
||||
line of FILE, and for each PATTERN that it matches, call the corresponding
|
||||
|
@ -802,16 +827,26 @@ (define (substitute file pattern+procs)
|
|||
(let loop ((line (read-line in 'concat)))
|
||||
(if (eof-object? line)
|
||||
#t
|
||||
(let ((line (fold (lambda (r+p line)
|
||||
(match r+p
|
||||
((regexp . proc)
|
||||
(match (list-matches regexp line)
|
||||
((and m+ (_ _ ...))
|
||||
(proc line m+))
|
||||
(_ line)))))
|
||||
line
|
||||
rx+proc)))
|
||||
(display line out)
|
||||
;; Work around the fact that Guile's regexp-exec does not handle
|
||||
;; NUL characters (a limitation of the underlying GNU libc's
|
||||
;; regexec) by temporarily replacing them by an unused private
|
||||
;; Unicode code point.
|
||||
;; TODO: Use SRFI-115 instead, once available in Guile.
|
||||
(let* ((nul* (or (and (string-index line #\nul)
|
||||
(unused-private-use-code-point line))
|
||||
#\nul))
|
||||
(line* (replace-char #\nul nul* line))
|
||||
(line1* (fold (lambda (r+p line)
|
||||
(match r+p
|
||||
((regexp . proc)
|
||||
(match (list-matches regexp line)
|
||||
((and m+ (_ _ ...))
|
||||
(proc line m+))
|
||||
(_ line)))))
|
||||
line*
|
||||
rx+proc))
|
||||
(line1 (replace-char nul* #\nul line1*)))
|
||||
(display line1 out)
|
||||
(loop (read-line in 'concat)))))))))
|
||||
|
||||
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
;;; GNU Guix --- Functional package management for GNU
|
||||
;;; Copyright © 2012, 2015, 2016, 2019, 2020 Ludovic Courtès <ludo@gnu.org>
|
||||
;;; Copyright © 2019 Ricardo Wurmus <rekado@elephly.net>
|
||||
;;; Copyright © 2021 Maxim Cournoyer <maxim.cournoyer@gmail.com>
|
||||
;;;
|
||||
;;; This file is part of GNU Guix.
|
||||
;;;
|
||||
|
@ -18,7 +19,7 @@
|
|||
;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
|
||||
(define-module (test-build-utils)
|
||||
(define-module (test build-utils)
|
||||
#:use-module (guix tests)
|
||||
#:use-module (guix build utils)
|
||||
#:use-module ((guix utils)
|
||||
|
@ -241,4 +242,25 @@ (define-module (test-build-utils)
|
|||
"/some/other/path")))
|
||||
#f)))))
|
||||
|
||||
(test-equal "substitute*, text contains a NUL byte, UTF-8"
|
||||
"c\0d"
|
||||
(with-fluids ((%default-port-encoding "UTF-8")
|
||||
(%default-port-conversion-strategy 'error))
|
||||
;; The GNU libc is locale sensitive. Depending on the value of LANG, the
|
||||
;; test could fail with "string contains #\\nul character: ~S" or "cannot
|
||||
;; convert wide string to output locale".
|
||||
(setlocale LC_ALL "en_US.UTF-8")
|
||||
(call-with-temporary-output-file
|
||||
(lambda (file port)
|
||||
(format port "a\0b")
|
||||
(flush-output-port port)
|
||||
|
||||
(substitute* file
|
||||
(("a") "c")
|
||||
(("b") "d"))
|
||||
|
||||
(with-input-from-file file
|
||||
(lambda _
|
||||
(get-string-all (current-input-port))))))))
|
||||
|
||||
(test-end)
|
||||
|
|
Loading…
Reference in a new issue