gnu: Add docx2txt.

* gnu/packages/textutils.scm (docx2txt): New variable.

Signed-off-by: Ludovic Courtès <ludo@gnu.org>
This commit is contained in:
Pierre Neidhardt 2018-06-25 23:22:32 +02:00 committed by Ludovic Courtès
parent a586d1d2bf
commit 74fa77e936
No known key found for this signature in database
GPG key ID: 090B11993D9AEBB5

View file

@ -14,6 +14,7 @@
;;; Copyright © 2017 Kei Kebreau <kkebreau@posteo.net> ;;; Copyright © 2017 Kei Kebreau <kkebreau@posteo.net>
;;; Copyright © 2017 Alex Vong <alexvong1995@gmail.com> ;;; Copyright © 2017 Alex Vong <alexvong1995@gmail.com>
;;; Copyright © 2018 Tobias Geerinckx-Rice <me@tobias.gr> ;;; Copyright © 2018 Tobias Geerinckx-Rice <me@tobias.gr>
;;; Copyright © 2018 Pierre Neidhardt <ambrevar@gmail.com>
;;; ;;;
;;; This file is part of GNU Guix. ;;; This file is part of GNU Guix.
;;; ;;;
@ -675,3 +676,68 @@ (define-public go-github.com-mattn-go-runewidth
measuring and checking the width of strings, with support east asian text.") measuring and checking the width of strings, with support east asian text.")
(home-page "https://github.com/jessevdk/go-flags") (home-page "https://github.com/jessevdk/go-flags")
(license license:expat))) (license license:expat)))
(define-public docx2txt
(package
(name "docx2txt")
(version "1.4")
(source (origin
(method url-fetch)
(uri (string-append
"mirror://sourceforge/docx2txt/docx2txt/v"
version "/docx2txt-" version ".tgz"))
(sha256
(base32
"06vdikjvpj6qdb41d8wzfnyj44jpnknmlgbhbr1w215420lpb5xj"))))
(build-system gnu-build-system)
(inputs
`(("unzip" ,unzip)
("perl" ,perl)))
(arguments
`(#:tests? #f ; No tests.
#:make-flags (list (string-append "BINDIR="
(assoc-ref %outputs "out") "/bin")
(string-append "CONFIGDIR="
(assoc-ref %outputs "out") "/etc")
;; Makefile seems to be a bit dumb at guessing.
(string-append "INSTALL=install")
(string-append "PERL=perl"))
#:phases
(modify-phases %standard-phases
(delete 'configure)
(add-after 'install 'fix-install
(lambda* (#:key outputs inputs #:allow-other-keys)
(let* ((out (assoc-ref outputs "out"))
(bin (string-append out "/bin"))
(config (string-append out "/etc/docx2txt.config"))
(unzip (assoc-ref inputs "unzip")))
;; According to INSTALL, the .sh wrapper can be skipped.
(delete-file (string-append bin "/docx2txt.sh"))
(rename-file (string-append bin "/docx2txt.pl")
(string-append bin "/docx2txt"))
(substitute* config
(("config_unzip => '/usr/bin/unzip',")
(string-append "config_unzip => '"
unzip
"/bin/unzip',")))
;; Makefile is wrong.
(chmod config #o644)))))))
(synopsis "Recover text from @file{.docx} files, with good formatting")
(description
"@command{docx2txt} is a Perl based command line utility to convert
Microsoft Office @file{.docx} documents to equivalent text documents. Latest
version supports following features during text extraction.
@itemize
@item Character conversions; currency characters are converted to respective
names like Euro.
@item Capitalisation of text blocks.
@item Center and right justification of text fitting in a line of
(configurable) 80 columns.
@item Horizontal ruler, line breaks, paragraphs separation, tabs.
@item Indicating hyperlinked text along with the hyperlink (configurable).
@item Handling (bullet, decimal, letter, roman) lists along with (attempt at)
indentation.
@end itemize\n")
(home-page "http://docx2txt.sourceforge.net")
(license license:gpl3+)))