gnu: Add uniutils.

* gnu/packages/textutils.scm (uniutils): New variable.
This commit is contained in:
Hartmut Goebel 2019-07-27 23:22:02 +02:00
parent 7577ab5556
commit a6baae7486
No known key found for this signature in database
GPG key ID: 634A8DFFD3F631DF

View file

@ -45,6 +45,7 @@ (define-module (gnu packages textutils)
#:use-module (guix build-system python)
#:use-module (gnu packages)
#:use-module (gnu packages autotools)
#:use-module (gnu packages base)
#:use-module (gnu packages compression)
#:use-module (gnu packages gettext)
#:use-module (gnu packages java)
@ -362,6 +363,68 @@ (define-public ascii2binary
of floating point numbers, just treat the input as a sequence of unsigned
characters.)
@end itemize")
(license license:gpl3)))
(define-public uniutils
(package
(name "uniutils")
(version "2.27")
(source
(origin
(method url-fetch)
(uri (string-append "http://billposer.org/Software/Downloads/"
"uniutils-" version ".tar.bz2"))
(sha256
(base32 "19w1510w87gx7n4qy3zsb0m467a4rn5scvh4ajajg7jh6x5xri08"))))
(build-system gnu-build-system)
(arguments
'(#:configure-flags '("--disable-dependency-tracking")
#:phases
(modify-phases %standard-phases
(add-after 'build 'fix-paths
(lambda* (#:key outputs inputs #:allow-other-keys)
(let ((out (assoc-ref outputs "out"))
(a2b (assoc-ref inputs "ascii2binary"))
(iconv (assoc-ref inputs "libiconv")))
(substitute* "utf8lookup"
(("^ascii2binary ") (string-append a2b "/bin/ascii2binary "))
(("^uniname ") (string-append out "/bin/uniname "))
(("^iconv ") (string-append iconv "/bin/iconv ")))
#t))))))
(inputs
`(("ascii2binary" ,ascii2binary)
("libiconv" ,libiconv)))
(home-page "https://billposer.org/Software/unidesc.html")
(synopsis "Find out what is in a Unicode file")
(description "Useful tools when working with Unicode files when one
doesn't know the writing system, doesn't have the necessary font, needs to
inspect invisible characters, needs to find out whether characters have been
combined or in what order they occur, or needs statistics on which characters
occur.
@itemize
@item @command{uniname} defaults to printing the character offset of each
character, its byte offset, its hex code value, its encoding, the glyph
itself, and its name. It may also be used to validate UTF-8 input.
@item @command{unidesc} reports the character ranges to which different
portions of the text belong. It can also be used to identify Unicode encodings
(e.g. UTF-16be) flagged by magic numbers.
@item @command{unihist} generates a histogram of the characters in its input.
@item @command{ExplicateUTF8} is intended for debugging or for learning about
Unicode. It determines and explains the validity of a sequence of bytes as a
UTF8 encoding.
@item @command{utf8lookup} provides a handy way to look up Unicode characters
from the command line.
@item @command{unireverse} reverse each line of UTF-8 input
character-by-character.
@end itemize")
(license license:gpl3)))