gnu: libextractor: Enable tidy-html support.

* gnu/packages/gnunet.scm (libextractor)
[source]: Apply patch.
[phases] <force-reconfigure>: New phase.
[inputs]: Add tidy-html.  Remove associated comment.
* gnu/packages/patches/libextractor-tidy-support.patch: New file.
* gnu/local.mk (dist_patch_DATA): Register it.

Change-Id: Ic812e09504d522ec87410bbbb03ccd3d6e48dd71
This commit is contained in:
Maxim Cournoyer 2024-03-12 21:56:17 -04:00
parent e1e3536ece
commit 43d6d5e646
No known key found for this signature in database
GPG key ID: 1260E46482E63562
3 changed files with 89 additions and 7 deletions

View file

@ -1514,6 +1514,7 @@ dist_patch_DATA = \
%D%/packages/patches/julia-SOURCE_DATE_EPOCH-mtime.patch \ %D%/packages/patches/julia-SOURCE_DATE_EPOCH-mtime.patch \
%D%/packages/patches/julia-Use-MPFR-4.2.patch \ %D%/packages/patches/julia-Use-MPFR-4.2.patch \
%D%/packages/patches/libcss-check-format.patch \ %D%/packages/patches/libcss-check-format.patch \
%D%/packages/patches/libextractor-tidy-support.patch \
%D%/packages/patches/libftdi-fix-paths-when-FTDIPP-set.patch \ %D%/packages/patches/libftdi-fix-paths-when-FTDIPP-set.patch \
%D%/packages/patches/libgeotiff-fix-tests-with-proj-9.1.1.patch \ %D%/packages/patches/libgeotiff-fix-tests-with-proj-9.1.1.patch \
%D%/packages/patches/libgeotiff-fix-tests-with-proj-9.3.0.patch \ %D%/packages/patches/libgeotiff-fix-tests-with-proj-9.3.0.patch \

View file

@ -93,7 +93,9 @@ (define-public libextractor
version ".tar.gz")) version ".tar.gz"))
(sha256 (sha256
(base32 (base32
"0mgprmwdhdwq9xhfxfhcncd304425nvcc4zi8ci5f0nja4n333xv")))) "0mgprmwdhdwq9xhfxfhcncd304425nvcc4zi8ci5f0nja4n333xv"))
(patches
(search-patches "libextractor-tidy-support.patch"))))
(build-system gnu-build-system) (build-system gnu-build-system)
(outputs '("out" (outputs '("out"
"static")) ; 420 KiB .a files "static")) ; 420 KiB .a files
@ -103,6 +105,9 @@ (define-public libextractor
#$(this-package-input "libltdl"))) #$(this-package-input "libltdl")))
#:phases #:phases
#~(modify-phases %standard-phases #~(modify-phases %standard-phases
(add-after 'unpack 'force-reconfigure
(lambda _
(delete-file "configure")))
(add-after 'install 'move-static-libraries (add-after 'install 'move-static-libraries
(lambda* (#:key outputs #:allow-other-keys) (lambda* (#:key outputs #:allow-other-keys)
;; Move static libraries to the "static" output. ;; Move static libraries to the "static" output.
@ -114,12 +119,6 @@ (define-public libextractor
(install-file file slib) (install-file file slib)
(delete-file file)) (delete-file file))
(find-files lib "\\.a$")))))))) (find-files lib "\\.a$"))))))))
;; WARNING: Checks require /dev/shm to be in the build chroot, especially
;; not to be a symbolic link to /run/shm.
;; FIXME:
;; The following dependency is optional, but should be
;; available for maximum coverage:
;; * libtidy-html (tidy-html) ; investigate failure
(native-inputs (native-inputs
(list autoconf-2.71 (list autoconf-2.71
automake automake
@ -149,6 +148,7 @@ (define-public libextractor
libtiff libtiff
libvorbis libvorbis
rpm rpm
tidy-html
zlib)) zlib))
(synopsis "Library to extract meta-data from media files") (synopsis "Library to extract meta-data from media files")
(description (description

View file

@ -0,0 +1,81 @@
Upstream status: submitted to bug-libextractor@gnu.org.
From 1fc6daaeaf829fb941a176831c011888a73c43b9 Mon Sep 17 00:00:00 2001
From: Maxim Cournoyer <maxim.cournoyer@gmail.com>
Date: Mon, 11 Mar 2024 09:36:26 -0400
Subject: [PATCH] html_extractor: Add support for modern tidy-html.
* configure.ac: Use PKG_PROG_PKG_CONFIG to initialize pkg-config detection.
<tidy>: Check for library via pkg-config.
* src/plugins/html_extractor.c: Standardize tidy include file names.
---
configure.ac | 28 +++++++++-------------------
src/plugins/html_extractor.c | 4 ++--
2 files changed, 11 insertions(+), 21 deletions(-)
diff --git a/configure.ac b/configure.ac
index d17ff39..e89d70c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -176,6 +176,8 @@ AS_CASE(["$target_os"],
AM_ICONV
+PKG_PROG_PKG_CONFIG()
+
# We define the paths here, because MinGW/GCC expands paths
# passed through the command line ("-DLOCALEDIR=..."). This would
# lead to hard-coded paths ("C:\mingw\mingw\bin...") that do
@@ -424,25 +426,13 @@ AC_CHECK_LIB(magic, magic_open,
AM_CONDITIONAL(HAVE_MAGIC, false))],
AM_CONDITIONAL(HAVE_MAGIC, false))
-AC_MSG_CHECKING(for tidyNodeGetValue -ltidy)
-AC_LANG_PUSH(C++)
-SAVED_LIBS=$LIBS
-LIBS="$LIBS -ltidy"
-AC_LINK_IFELSE(
- [AC_LANG_PROGRAM([[#include <tidy/tidy.h>]],
- [[ Bool b = tidyNodeGetValue (NULL, NULL, NULL); ]])],
- [AC_MSG_RESULT(yes)
- AM_CONDITIONAL(HAVE_TIDY, true)
- AC_DEFINE(HAVE_TIDY,1,[Have tidyNodeGetValue in libtidy])],
- [AC_MSG_RESULT(no)
- AM_CONDITIONAL(HAVE_TIDY, false)])
-LIBS=$SAVED_LIBS
-AC_LANG_POP(C++)
-
-# restore LIBS
-LIBS=$LIBSOLD
-
-
+dnl tidyNodeGetValue was already available in 5.0.0, released in 2015.
+PKG_CHECK_MODULES([TIDY], [tidy >= 5.0.0],
+ [AC_DEFINE(HAVE_TIDY, 1, [Have tidy])
+ AM_CONDITIONAL(HAVE_TIDY, true)],
+ [AM_CONDITIONAL(HAVE_TIDY, false)])
+CFLAGS="$CFLAGS $TIDY_CFLAGS"
+LIBS="$LIBS $TIDY_LIBS"
# should 'make check' run tests?
AC_MSG_CHECKING(whether to run tests)
diff --git a/src/plugins/html_extractor.c b/src/plugins/html_extractor.c
index 5ebf97b..88100d3 100644
--- a/src/plugins/html_extractor.c
+++ b/src/plugins/html_extractor.c
@@ -26,8 +26,8 @@
#include "platform.h"
#include "extractor.h"
#include <magic.h>
-#include <tidy/tidy.h>
-#include <tidy/tidybuffio.h>
+#include <tidy.h>
+#include <tidybuffio.h>
/**
* Mapping of HTML META names to LE types.
base-commit: a75f40b64b5868967c95ea214e8eaac4f7088b23
--
2.41.0