From 3ab24ba216ce91210b93ec61554b3343fbc3aaab Mon Sep 17 00:00:00 2001 From: Julien Lepiller Date: Sat, 1 Apr 2023 16:13:13 +0200 Subject: [PATCH] gnu: Add mecab. * gnu/packages/language.scm (mecab): New variable. * gnu/packages/patches/mecab-variable-param.patch: New file. * gnu/local.mk (dist_patch_DATA): Add it. --- gnu/local.mk | 1 + gnu/packages/language.scm | 51 ++++++++++++++++++- .../patches/mecab-variable-param.patch | 30 +++++++++++ 3 files changed, 81 insertions(+), 1 deletion(-) create mode 100644 gnu/packages/patches/mecab-variable-param.patch diff --git a/gnu/local.mk b/gnu/local.mk index 3a93ab50dd..47fe4daaff 100644 --- a/gnu/local.mk +++ b/gnu/local.mk @@ -1540,6 +1540,7 @@ dist_patch_DATA = \ %D%/packages/patches/libmemcached-build-with-gcc7.patch \ %D%/packages/patches/libmhash-hmac-fix-uaf.patch \ %D%/packages/patches/libsigrokdecode-python3.9-fix.patch \ + %D%/packages/patches/mecab-variable-param.patch \ %D%/packages/patches/memtest86+-build-reproducibly.patch \ %D%/packages/patches/mercurial-hg-extension-path.patch \ %D%/packages/patches/mercurial-openssl-compat.patch \ diff --git a/gnu/packages/language.scm b/gnu/packages/language.scm index f7e3ea6cd6..5dda0c1eeb 100644 --- a/gnu/packages/language.scm +++ b/gnu/packages/language.scm @@ -4,7 +4,7 @@ ;;; Copyright © 2018 Nikita ;;; Copyright © 2019 Alex Vong ;;; Copyright © 2020 Ricardo Wurmus -;;; Copyright © 2020 Julien Lepiller +;;; Copyright © 2020, 2022 Julien Lepiller ;;; Copyright © 2022 Milran ;;; ;;; This file is part of GNU Guix. @@ -928,3 +928,52 @@ (define-public libskk (description "libskk is a library to deal with Japanese kana-to-kanji conversion method.") (license license:gpl3+))) + +(define-public mecab + (package + (name "mecab") + (version "0.996") + (source (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/taku910/mecab") + ;; latest commit + (commit "046fa78b2ed56fbd4fac312040f6d62fc1bc31e3"))) + (file-name (git-file-name name version)) + (sha256 + (base32 + "1hdv7rgn8j0ym9gsbigydwrbxa8cx2fb0qngg1ya15vvbw0lk4aa")) + (patches + (search-patches + "mecab-variable-param.patch")))) + (build-system gnu-build-system) + (native-search-paths + (list (search-path-specification + (variable "MECAB_DICDIR") + (separator #f) + (files '("lib/mecab/dic"))))) + (arguments + `(#:phases + (modify-phases %standard-phases + (add-after 'unpack 'chdir + (lambda _ + (chdir "mecab"))) + (add-before 'build 'add-mecab-dicdir-variable + (lambda _ + (substitute* "mecabrc.in" + (("dicdir = .*") + "dicdir = $MECAB_DICDIR")) + (substitute* "mecab-config.in" + (("echo @libdir@/mecab/dic") + "if [ -z \"$MECAB_DICDIR\" ]; then + echo @libdir@/mecab/dic +else + echo \"$MECAB_DICDIR\" +fi"))))))) + (inputs (list libiconv)) + (home-page "https://taku910.github.io/mecab") + (synopsis "Morphological analysis engine for texts") + (description "Mecab is a morphological analysis engine developped as a +collaboration between the Kyoto university and Nippon Telegraph and Telephone +Corporation. The engine is independent of any language, dictionary or corpus.") + (license (list license:gpl2+ license:lgpl2.1+ license:bsd-3)))) diff --git a/gnu/packages/patches/mecab-variable-param.patch b/gnu/packages/patches/mecab-variable-param.patch new file mode 100644 index 0000000000..4457cf3f44 --- /dev/null +++ b/gnu/packages/patches/mecab-variable-param.patch @@ -0,0 +1,30 @@ +From 2396e90056706ef897acab3aaa081289c7336483 Mon Sep 17 00:00:00 2001 +From: LEPILLER Julien +Date: Fri, 19 Apr 2019 11:48:39 +0200 +Subject: [PATCH] Allow variable parameters + +--- + mecab/src/param.cpp | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/mecab/src/param.cpp b/mecab/src/param.cpp +index 65328a2..006b1b5 100644 +--- a/mecab/src/param.cpp ++++ b/mecab/src/param.cpp +@@ -79,8 +79,12 @@ bool Param::load(const char *filename) { + size_t s1, s2; + for (s1 = pos+1; s1 < line.size() && isspace(line[s1]); s1++); + for (s2 = pos-1; static_cast(s2) >= 0 && isspace(line[s2]); s2--); +- const std::string value = line.substr(s1, line.size() - s1); ++ std::string value = line.substr(s1, line.size() - s1); + const std::string key = line.substr(0, s2 + 1); ++ ++ if(value.find('$') == 0) { ++ value = std::getenv(value.substr(1).c_str()); ++ } + set(key.c_str(), value, false); + } + +-- +2.20.1 +