self: Speed up Texinfo cross-reference translation.

Building guix-translated-texinfo.drv goes from 11mn to 1mn50s, most of
which is taken by po4a.

* guix/self.scm (translate-texi-manuals)[build](make-ref-regex): Remove.
(canonicalize-whitespace): New procedure.
(xref-regexp): New variable.
(translate-cross-references): Rewrite to iterate over the
cross-references rather than iterating over the msgids.  Update caller.
This commit is contained in:
Ludovic Courtès 2020-06-22 22:57:26 +02:00
parent 2f562699ea
commit a524a31de4
No known key found for this signature in database
GPG key ID: 090B11993D9AEBB5

View file

@ -290,6 +290,7 @@ (define build
#~(begin #~(begin
(use-modules (guix build utils) (guix build po) (use-modules (guix build utils) (guix build po)
(ice-9 match) (ice-9 regex) (ice-9 textual-ports) (ice-9 match) (ice-9 regex) (ice-9 textual-ports)
(ice-9 vlist)
(srfi srfi-1)) (srfi srfi-1))
(mkdir #$output) (mkdir #$output)
@ -315,38 +316,69 @@ (define (translate-tmp-texi po source output)
"-M" "UTF-8" "-L" "UTF-8" "-k" "0" "-f" "texinfo" "-M" "UTF-8" "-L" "UTF-8" "-k" "0" "-f" "texinfo"
"-m" source "-p" po "-l" output)) "-m" source "-p" po "-l" output))
(define (make-ref-regex msgid end) (define (canonicalize-whitespace str)
(make-regexp (string-append ;; Change whitespace (newlines, etc.) in STR to #\space.
"ref\\{" (string-map (lambda (chr)
(string-join (string-split (regexp-quote msgid) #\ ) (if (char-set-contains? char-set:whitespace chr)
"[ \n]+") #\space
end))) chr))
str))
(define (translate-cross-references content translations) (define xref-regexp
"Take CONTENT, a string representing a .texi file and translate any ;; Texinfo cross-reference regexp.
cross-reference in it (@ref, @xref and @pxref) that have a translation in (make-regexp "@(px|x)?ref\\{([^,}]+)"))
TRANSLATIONS, an alist of msgid and msgstr."
(fold (define (translate-cross-references texi translations)
(lambda (elem content) ;; Translate the cross-references that appear in TEXI, a Texinfo
(match elem ;; file, using the msgid/msgstr pairs from TRANSLATIONS.
((msgid . msgstr) (define content
;; Empty translations and strings containing some special characters (call-with-input-file texi get-string-all))
;; cannot be the name of a section.
(if (or (equal? msgstr "") (define matches
(string-any (lambda (chr) (list-matches xref-regexp content))
(member chr '(#\{ #\} #\( #\) #\newline #\,)))
msgid)) (define translation-map
content (fold (match-lambda*
;; Otherwise, they might be the name of a section, so we (((msgid . str) result)
;; need to translate any occurence in @(p?x?)ref{...}. (vhash-cons msgid str result)))
(let ((regexp1 (make-ref-regex msgid ",")) vlist-null
(regexp2 (make-ref-regex msgid "\\}"))) translations))
(regexp-substitute/global
#f regexp2 (define translated
(regexp-substitute/global ;; Iterate over MATCHES and replace cross-references with their
#f regexp1 content 'pre "ref{" msgstr "," 'post) ;; translation found in TRANSLATION-MAP. (We can't use
'pre "ref{" msgstr "}" 'post)))))) ;; 'substitute*' because matches can span multiple lines.)
content translations)) (let loop ((matches matches)
(offset 0)
(result '()))
(match matches
(()
(string-concatenate-reverse
(cons (string-drop content offset) result)))
((head . tail)
(let ((prefix (match:substring head 1))
(ref (canonicalize-whitespace (match:substring head 2))))
(define translated
(string-append "@" (or prefix "")
"ref{"
(match (vhash-assoc ref translation-map)
(#f ref)
((_ . str) str))))
(loop tail
(match:end head)
(append (list translated
(string-take
(string-drop content offset)
(- (match:start head) offset)))
result)))))))
(format (current-error-port)
"translated ~a cross-references in '~a'~%"
(length matches) texi)
(call-with-output-file texi
(lambda (port)
(display translated port))))
(define* (translate-texi prefix po lang (define* (translate-texi prefix po lang
#:key (extras '())) #:key (extras '()))
@ -363,12 +395,9 @@ (define* (translate-texi prefix po lang
(for-each (lambda (file) (for-each (lambda (file)
(let* ((texi (string-append file "." lang ".texi")) (let* ((texi (string-append file "." lang ".texi"))
(tmp (string-append texi ".tmp"))) (tmp (string-append texi ".tmp")))
(with-output-to-file texi (copy-file tmp texi)
(lambda () (translate-cross-references texi
(display translations)))
(translate-cross-references
(call-with-input-file tmp get-string-all)
translations))))))
(cons prefix extras)))) (cons prefix extras))))
(define (available-translations directory domain) (define (available-translations directory domain)