gnu: apache-arrow: Update to 0.17.1.

* gnu/packages/databases.scm (apache-arrow): Update to 0.17.1.
* gnu/packages/databases.scm (python-pyarrow): Update to 0.17.1.
* gnu/packages/serialization.scm (python-feather-format): Update to 0.4.1.

Signed-off-by: Ludovic Courtès <ludo@gnu.org>
This commit is contained in:
Katherine Cox-Buday 2020-06-02 16:33:36 -05:00 committed by Ludovic Courtès
parent fbc1cbb527
commit a50628bbe0
No known key found for this signature in database
GPG key ID: 090B11993D9AEBB5
2 changed files with 98 additions and 47 deletions

View file

@ -88,7 +88,9 @@ (define-module (gnu packages databases)
#:use-module (gnu packages language)
#:use-module (gnu packages libevent)
#:use-module (gnu packages linux)
#:use-module (gnu packages logging)
#:use-module (gnu packages man)
#:use-module (gnu packages maths)
#:use-module (gnu packages ncurses)
#:use-module (gnu packages onc-rpc)
#:use-module (gnu packages parallel)
@ -98,6 +100,7 @@ (define-module (gnu packages databases)
#:use-module (gnu packages perl-web)
#:use-module (gnu packages pkg-config)
#:use-module (gnu packages popt)
#:use-module (gnu packages protobuf)
#:use-module (gnu packages python)
#:use-module (gnu packages python-crypto)
#:use-module (gnu packages python-web)
@ -105,6 +108,8 @@ (define-module (gnu packages databases)
#:use-module (gnu packages python-xyz)
#:use-module (gnu packages rdf)
#:use-module (gnu packages readline)
#:use-module (gnu packages regex)
#:use-module (gnu packages rpc)
#:use-module (gnu packages ruby)
#:use-module (gnu packages serialization)
#:use-module (gnu packages sphinx)
@ -3216,10 +3221,12 @@ (define-public mongo-tools
@end table")
(license license:asl2.0)))
;; There are many wrappers for this in other languages. When touching, please
;; be sure to ensure all dependencies continue to build.
(define-public apache-arrow
(package
(name "apache-arrow")
(version "0.10.0")
(version "0.17.1")
(source
(origin
(method git-fetch)
@ -3229,7 +3236,7 @@ (define-public apache-arrow
(file-name (git-file-name name version))
(sha256
(base32
"04xkp922b8qrrnpvv9ixxnvk7151n1plzx6aqdff6frn9651zvxs"))))
"02r6yx3yhywzikd3b0vfkjgddhfiriyx2vpm3jf5880wq59x798a"))))
(build-system cmake-build-system)
(arguments
`(#:tests? #f
@ -3242,40 +3249,91 @@ (define-public apache-arrow
(setenv "BOOST_ROOT" (assoc-ref %build-inputs "boost"))
(setenv "BROTLI_HOME" (assoc-ref %build-inputs "brotli"))
(setenv "FLATBUFFERS_HOME" (assoc-ref %build-inputs "flatbuffers"))
(setenv "JEMALLOC_HOME" (assoc-ref %build-inputs "jemalloc"))
(setenv "RAPIDJSON_HOME" (assoc-ref %build-inputs "rapidjson"))
#t)))
#:build-type "Release"
#:configure-flags
(list "-DARROW_PYTHON=ON"
"-DARROW_GLOG=ON"
;; Parquet options
"-DARROW_PARQUET=ON"
"-DPARQUET_BUILD_EXECUTABLES=ON"
;; The maintainers disallow using system versions of
;; jemalloc:
;; https://issues.apache.org/jira/browse/ARROW-3507. This
;; is unfortunate because jemalloc increases performance:
;; https://arrow.apache.org/blog/2018/07/20/jemalloc/.
"-DARROW_JEMALLOC=OFF"
;; Install to PREFIX/lib (the default is
;; PREFIX/lib64).
(string-append "-DCMAKE_INSTALL_LIBDIR="
(assoc-ref %outputs "out")
;; The CMake option ARROW_DEPENDENCY_SOURCE is a global
;; option that instructs the build system how to resolve
;; each dependency. SYSTEM = Finding the dependency in
;; system paths using CMake's built-in find_package
;; function, or using pkg-config for packages that do not
;; have this feature
"-DARROW_DEPENDENCY_SOURCE=SYSTEM"
;; Split output into its component packages.
(string-append "-DCMAKE_INSTALL_PREFIX="
(assoc-ref %outputs "lib"))
(string-append "-DCMAKE_INSTALL_RPATH="
(assoc-ref %outputs "lib")
"/lib")
(string-append "-DCMAKE_INSTALL_BINDIR="
(assoc-ref %outputs "out")
"/bin")
(string-append "-DCMAKE_INSTALL_INCLUDEDIR="
(assoc-ref %outputs "include")
"/share/include")
;; XXX These Guix package offer static
;; libraries that are not position independent,
;; and ld fails to link them into the arrow .so
"-DARROW_WITH_SNAPPY=OFF"
"-DARROW_WITH_ZLIB=OFF"
"-DARROW_WITH_ZSTD=OFF"
"-DARROW_WITH_LZ4=OFF"
"-DARROW_WITH_SNAPPY=ON"
"-DARROW_WITH_ZLIB=ON"
"-DARROW_WITH_ZSTD=ON"
"-DARROW_WITH_LZ4=ON"
"-DARROW_COMPUTE=ON"
"-DARROW_CSV=ON"
"-DARROW_DATASET=ON"
"-DARROW_FILESYSTEM=ON"
"-DARROW_HDFS=ON"
"-DARROW_JSON=ON"
;; Arrow Python C++ integration library (required for
;; building pyarrow). This library must be built against
;; the same Python version for which you are building
;; pyarrow. NumPy must also be installed. Enabling this
;; option also enables ARROW_COMPUTE, ARROW_CSV,
;; ARROW_DATASET, ARROW_FILESYSTEM, ARROW_HDFS, and
;; ARROW_JSON.
"-DARROW_PYTHON=ON"
;; Building the tests forces on all the
;; optional features and the use of static
;; libraries.
"-DARROW_BUILD_TESTS=OFF"
"-DBENCHMARK_ENABLE_GTEST_TESTS=OFF"
;;"-DBENCHMARK_ENABLE_TESTING=OFF"
"-DARROW_BUILD_STATIC=OFF")))
(inputs
`(("boost" ,boost)
("rapidjson" ,rapidjson)
("brotli" ,google-brotli)
("flatbuffers" ,flatbuffers)
("jemalloc" ,jemalloc)
("double-conversion" ,double-conversion)
("snappy" ,snappy)
("gflags" ,gflags)
("glog" ,glog)
("apache-thrift" ,apache-thrift "lib")
("protobuf" ,protobuf)
("rapidjson" ,rapidjson)
("zlib" ,zlib)
("bzip2" ,bzip2)
("lz4" ,lz4)
("zstd" ,zstd "lib")
("re2" ,re2)
("grpc" ,grpc)
("python-3" ,python)
("python-numpy" ,python-numpy)))
(native-inputs
`(("pkg-config" ,pkg-config)))
(outputs '("out" "lib" "include"))
(home-page "https://arrow.apache.org/")
(synopsis "Columnar in-memory analytics")
(description "Apache Arrow is a columnar in-memory analytics layer
@ -3287,18 +3345,8 @@ (define-public apache-arrow
(define-public python-pyarrow
(package
(inherit apache-arrow)
(name "python-pyarrow")
(version "0.10.0")
(source
(origin
(method git-fetch)
(uri (git-reference
(url "https://github.com/apache/arrow")
(commit (string-append "apache-arrow-" version))))
(file-name (git-file-name name version))
(sha256
(base32
"04xkp922b8qrrnpvv9ixxnvk7151n1plzx6aqdff6frn9651zvxs"))))
(build-system python-build-system)
(arguments
'(#:tests? #f ; XXX There are no tests in the "python" directory
@ -3307,26 +3355,29 @@ (define-public python-pyarrow
(delete 'build) ; XXX the build is performed again during the install phase
(add-after 'unpack 'enter-source-directory
(lambda _ (chdir "python") #t))
(add-after 'unpack 'set-env
(add-after 'unpack 'make-git-checkout-writable
(lambda _
(setenv "ARROW_HOME" (assoc-ref %build-inputs "apache-arrow"))
(for-each make-file-writable (find-files "."))
#t)))))
(propagated-inputs
`(("apache-arrow" ,apache-arrow)
`(("apache-arrow" ,apache-arrow "lib")
("python-numpy" ,python-numpy)
("python-pandas" ,python-pandas)
("python-six" ,python-six)))
(native-inputs
`(("cmake" ,cmake-minimal)
("pkg-config" ,pkg-config)
("python-cython" ,python-cython)
("python-pytest" ,python-pytest)
("python-pytest-runner" ,python-pytest-runner)
("python-setuptools-scm" ,python-setuptools-scm)))
(outputs '("out"))
(home-page "https://arrow.apache.org/docs/python/")
(synopsis "Python bindings for Apache Arrow")
(description "This library provides a Pythonic API wrapper for the reference
Arrow C++ implementation, along with tools for interoperability with pandas,
NumPy, and other traditional Python scientific computing packages.")
(description
"This library provides a Pythonic API wrapper for the reference Arrow C++
implementation, along with tools for interoperability with pandas, NumPy, and
other traditional Python scientific computing packages.")
(license license:asl2.0)))
(define-public python2-pyarrow

View file

@ -467,14 +467,14 @@ (define-public flatbuffers
(define-public python-feather-format
(package
(name "python-feather-format")
(version "0.4.0")
(version "0.4.1")
(source
(origin
(method url-fetch)
(uri (pypi-uri "feather-format" version))
(sha256
(base32
"1adivm5w5ji4qv7hq7942vqlk8l2wgw87bdlsia771z14z3zp857"))))
"00w9hwz7sj3fkdjc378r066vdy6lpxmn6vfac3qx956k8lvpxxj5"))))
(build-system python-build-system)
(propagated-inputs
`(("python-pandas" ,python-pandas)