gnu: Add mecab.

* gnu/packages/language.scm (mecab): New variable.
* gnu/packages/patches/mecab-variable-param.patch: New file.
* gnu/local.mk (dist_patch_DATA): Add it.
This commit is contained in:
Julien Lepiller 2023-04-01 16:13:13 +02:00 committed by Maxim Cournoyer
parent 15c7b48333
commit d06bcd6031
No known key found for this signature in database
GPG key ID: 1260E46482E63562

View file

@ -979,55 +979,3 @@ (define-public mecab
collaboration between the Kyoto university and Nippon Telegraph and Telephone
Corporation. The engine is independent of any language, dictionary or corpus.")
(license (list license:gpl2+ license:lgpl2.1+ license:bsd-3))))
(define-public mecab-ipadic
(package
(name "mecab-ipadic")
(version "2.7.0")
(source (package-source mecab))
(build-system gnu-build-system)
(arguments
`(#:configure-flags
(list (string-append "--with-dicdir=" (assoc-ref %outputs "out")
"/lib/mecab/dic")
"--with-charset=utf8")
#:phases
(modify-phases %standard-phases
(add-after 'unpack 'chdir
(lambda _
(chdir "mecab-ipadic")))
(add-before 'configure 'set-mecab-dir
(lambda* (#:key outputs #:allow-other-keys)
(setenv "MECAB_DICDIR" (string-append (assoc-ref outputs "out")
"/lib/mecab/dic")))))))
(native-inputs (list mecab)); for mecab-config
(home-page "https://taku910.github.io/mecab")
(synopsis "Dictionary data for MeCab")
(description "This package contains dictionnary data derived from
ipadic for use with MeCab.")
(license (license:non-copyleft "mecab-ipadic/COPYING"))))
(define-public mecab-unidic
(package
(name "mecab-unidic")
(version "3.1.0")
(source (origin
(method url-fetch)
(uri (string-append "https://clrd.ninjal.ac.jp/unidic_archive/cwj/"
version "/unidic-cwj-" version ".zip"))
(sha256
(base32
"1z132p2q3bgchiw529j2d7dari21kn0fhkgrj3vcl0ncg2m521il"))))
(build-system copy-build-system)
(arguments
`(#:install-plan
'(("." "lib/mecab/dic"
#:include-regexp ("\\.bin$" "\\.def$" "\\.dic$" "dicrc")))))
(native-inputs (list unzip))
(home-page "https://clrd.ninjal.ac.jp/unidic/en/")
(synopsis "Dictionary data for MeCab")
(description "UniDic for morphological analysis is a dictionary for
analysis with the morphological analyser MeCab, where the short units exported
from the database are used as entries (heading terms).")
;; triple-licensed (at the users choice)
(license (list license:gpl2+ license:lgpl2.1 license:bsd-3))))