From f00f56514d90ebba5d9e08ec786c8118e437097c Mon Sep 17 00:00:00 2001 From: Troy Figiel Date: Thu, 25 Jan 2024 21:45:20 +0100 Subject: [PATCH] gnu: Add python-fastparquet. * gnu/packages/databases.scm (python-fastparquet): New variable. Co-authored-by: Sharlatan Hellseher Signed-off-by: Sharlatan Hellseher Change-Id: Ib3c39167c0d82aab9b271fdba181daa311f692a8 --- gnu/packages/databases.scm | 75 +++++++++++++++++++++++++++++++++++++- 1 file changed, 74 insertions(+), 1 deletion(-) diff --git a/gnu/packages/databases.scm b/gnu/packages/databases.scm index db4fe0b447..33e7327ebb 100644 --- a/gnu/packages/databases.scm +++ b/gnu/packages/databases.scm @@ -45,7 +45,7 @@ ;;; Copyright © 2020 Michael Rohleder ;;; Copyright © 2020 Vinicius Monego ;;; Copyright © 2020 Vincent Legoll -;;; Copyright © 2021 Sharlatan Hellseher +;;; Copyright © 2021, 2024 Sharlatan Hellseher ;;; Copyright © 2021, 2024 Greg Hogan ;;; Copyright © 2021 David Larsson ;;; Copyright © 2021 Pjotr Prins @@ -62,6 +62,7 @@ ;;; Copyright © 2023 Felix Gruber ;;; Copyright © 2023 Giacomo Leidi +;;; Copyright © 2024 Troy Figiel ;;; ;;; This file is part of GNU Guix. ;;; @@ -143,6 +144,7 @@ (define-module (gnu packages databases) #:use-module (gnu packages python) #:use-module (gnu packages python-build) #:use-module (gnu packages python-check) + #:use-module (gnu packages python-compression) #:use-module (gnu packages python-crypto) #:use-module (gnu packages python-science) #:use-module (gnu packages python-web) @@ -4984,6 +4986,77 @@ (define-public python-pyarrow-0.16 other traditional Python scientific computing packages.") (license license:asl2.0))) +(define-public python-fastparquet + (package + (name "python-fastparquet") + (version "2024.2.0") + (source + (origin + ;; Fastparquet uses setuptools-scm to find the current version. This + ;; only works when we use the PyPI tarball, which does not contain + ;; tests. Instead, we use the git-fetch method and set the version via + ;; envar. + (method git-fetch) + (uri (git-reference + (url "https://github.com/dask/fastparquet") + (commit version))) + (file-name (git-file-name name version)) + (sha256 + (base32 "0f32dj1xvd11l0siznqd33dpjlhg9siylcjcfkcdlqfcy45jfj3v")))) + (build-system pyproject-build-system) + (arguments + (list + #:test-flags + #~(list "-n" "auto") + #:phases + #~(modify-phases %standard-phases + (add-after 'unpack 'relax-requirements + (lambda _ + (substitute* "setup.py" + ;; Remove dependencies on git. + (("^.*\"git\", \"status\".*$") "") + ;; Guix is only compatible with a single version of numpy + ;; at a time. We can safely remove this dependency. + (("'oldest-supported-numpy'") "")))) + (add-before 'build 'pretend-version + ;; The version string is usually derived via setuptools-scm, but + ;; without the git metadata available, the version string is set + ;; to '0.0.0'. + (lambda _ + (setenv "SETUPTOOLS_SCM_PRETEND_VERSION" #$version))) + (add-before 'check 'build-cython-extensions + ;; Cython extensions need to be built for the check phase. + (lambda _ + (invoke "python" "setup.py" "build_ext" "--inplace")))))) + (propagated-inputs + (list python-cramjam + python-fsspec + python-lzo + python-numpy + python-packaging + python-pandas)) + (native-inputs + (list python-cython + python-pytest-runner + python-pytest-xdist + python-setuptools-scm)) + (home-page "https://github.com/dask/fastparquet") + (synopsis "Python implementation of the Parquet file format") + (description + "@code{fastparquet} is a Python implementation of the Parquet file +format. @code{fastparquet} is used implicitly by @code{dask}, @code{pandas} +and @code{intake-parquet}. It supports the following compression algorithms: + +@itemize +@item Gzip +@item Snappy +@item Brotli +@item LZ4 +@item Zstd +@item LZO (optionally) +@end itemize") + (license license:asl2.0))) + (define-public python-crate (package (name "python-crate")