From 4fa16c9ae639c9891359527edc8c0b235e3987f2 Mon Sep 17 00:00:00 2001 From: Ricardo Wurmus Date: Mon, 8 Apr 2019 14:45:47 +0200 Subject: [PATCH] gnu: Add tensorflow. * gnu/packages/machine-learning.scm (tensorflow): New variable. --- gnu/packages/machine-learning.scm | 451 ++++++++++++++++++++++++++++++ 1 file changed, 451 insertions(+) diff --git a/gnu/packages/machine-learning.scm b/gnu/packages/machine-learning.scm index 5373020a77..00662fb26d 100644 --- a/gnu/packages/machine-learning.scm +++ b/gnu/packages/machine-learning.scm @@ -49,6 +49,7 @@ (define-module (gnu packages machine-learning) #:use-module (gnu packages check) #:use-module (gnu packages compression) #:use-module (gnu packages cran) + #:use-module (gnu packages databases) #:use-module (gnu packages dejagnu) #:use-module (gnu packages gcc) #:use-module (gnu packages glib) @@ -65,7 +66,9 @@ (define-module (gnu packages machine-learning) #:use-module (gnu packages python) #:use-module (gnu packages python-web) #:use-module (gnu packages python-xyz) + #:use-module (gnu packages serialization) #:use-module (gnu packages statistics) + #:use-module (gnu packages sqlite) #:use-module (gnu packages swig) #:use-module (gnu packages tls) #:use-module (gnu packages web) @@ -1303,3 +1306,451 @@ (define-public grpc applicable in last mile of distributed computing to connect devices, mobile applications and browsers to backend services.") (license license:asl2.0))) + +;; Note that Tensorflow includes a "third_party" directory, which seems to not +;; only contain modified subsets of upstream library source code, but also +;; adapter headers provided by Google (such as the fft.h header, which is not +;; part of the upstream project code). The Tensorflow code includes headers +;; from the "third_party" directory. It does not look like we can replace +;; these headers with unmodified upstream files, so we keep them. +(define-public tensorflow + (package + (name "tensorflow") + (version "1.9.0") + (source + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/tensorflow/tensorflow.git") + (commit (string-append "v" version)))) + (file-name (string-append "tensorflow-" version "-checkout")) + (sha256 + (base32 + "0a9kwha395g3wgxfwln5j8vn9nkspmd75xldrlqdq540w996g8xa")))) + (build-system cmake-build-system) + (arguments + `(#:tests? #f ; no "check" target + #:build-type "Release" + #:configure-flags + (let ((protobuf (assoc-ref %build-inputs "protobuf")) + (protobuf:native (assoc-ref %build-inputs "protobuf:native")) + (jsoncpp (assoc-ref %build-inputs "jsoncpp")) + (snappy (assoc-ref %build-inputs "snappy")) + (sqlite (assoc-ref %build-inputs "sqlite"))) + (list + ;; Use protobuf from Guix + (string-append "-Dprotobuf_STATIC_LIBRARIES=" + protobuf "/lib/libprotobuf.so") + (string-append "-DPROTOBUF_PROTOC_EXECUTABLE=" + protobuf:native "/bin/protoc") + + ;; Use snappy from Guix + (string-append "-Dsnappy_STATIC_LIBRARIES=" + snappy "/lib/libsnappy.so") + ;; Yes, this is not actually the include directory but a prefix... + (string-append "-Dsnappy_INCLUDE_DIR=" snappy) + + ;; Use jsoncpp from Guix + (string-append "-Djsoncpp_STATIC_LIBRARIES=" + jsoncpp "/lib/libjsoncpp.so") + ;; Yes, this is not actually the include directory but a prefix... + (string-append "-Djsoncpp_INCLUDE_DIR=" jsoncpp) + + ;; Use sqlite from Guix + (string-append "-Dsqlite_STATIC_LIBRARIES=" + sqlite "/lib/libsqlite.a") + + ;; Use system libraries wherever possible. Currently, this + ;; only affects zlib. + "-Dsystemlib_ALL=ON" + "-Dtensorflow_ENABLE_POSITION_INDEPENDENT_CODE=ON" + "-Dtensorflow_BUILD_SHARED_LIB=ON" + "-Dtensorflow_OPTIMIZE_FOR_NATIVE_ARCH=OFF" + "-Dtensorflow_ENABLE_SSL_SUPPORT=OFF" + "-Dtensorflow_BUILD_CONTRIB_KERNELS=OFF")) + #:make-flags + (list "CC=gcc") + #:modules ((ice-9 ftw) + (guix build utils) + (guix build cmake-build-system)) + #:phases + (modify-phases %standard-phases + (add-after 'unpack 'set-source-file-times-to-1980 + ;; At the end of the tf_python_build_pip_package target, a ZIP + ;; archive should be generated via bdist_wheel, but it fails with + ;; "ZIP does not support timestamps before 1980". Luckily, + ;; SOURCE_DATE_EPOCH is respected, which we set to some time in + ;; 1980. + (lambda _ (setenv "SOURCE_DATE_EPOCH" "315532800") #t)) + ;; See https://github.com/tensorflow/tensorflow/issues/20517#issuecomment-406373913 + (add-after 'unpack 'python3.7-compatibility + (lambda _ + (substitute* '("tensorflow/python/eager/pywrap_tfe_src.cc" + "tensorflow/python/lib/core/ndarray_tensor.cc" + "tensorflow/python/lib/core/py_func.cc") + (("PyUnicode_AsUTF8") "(char *)PyUnicode_AsUTF8")) + (substitute* "tensorflow/c/eager/c_api.h" + (("unsigned char async") + "unsigned char is_async")) + + ;; Remove dependency on tensorboard, a complicated but probably + ;; optional package. + (substitute* "tensorflow/tools/pip_package/setup.py" + ((".*'tensorboard >.*") "")) + #t)) + (add-after 'python3.7-compatibility 'chdir + (lambda _ (chdir "tensorflow/contrib/cmake") #t)) + (add-after 'chdir 'disable-downloads + (lambda* (#:key inputs #:allow-other-keys) + (substitute* (find-files "external" "\\.cmake$") + (("GIT_REPOSITORY.*") "") + (("GIT_TAG.*") "") + (("PREFIX ") + "DOWNLOAD_COMMAND \"\"\nPREFIX ")) + + ;; Use packages from Guix + (let ((grpc (assoc-ref inputs "grpc"))) + (substitute* "CMakeLists.txt" + ;; Sqlite + (("include\\(sqlite\\)") "") + (("\\$\\{sqlite_STATIC_LIBRARIES\\}") + (string-append (assoc-ref inputs "sqlite") + "/lib/libsqlite3.so")) + (("sqlite_copy_headers_to_destination") "") + + ;; PNG + (("include\\(png\\)") "") + (("\\$\\{png_STATIC_LIBRARIES\\}") + (string-append (assoc-ref inputs "libpng") + "/lib/libpng16.so")) + (("png_copy_headers_to_destination") "") + + ;; JPEG + (("include\\(jpeg\\)") "") + (("\\$\\{jpeg_STATIC_LIBRARIES\\}") + (string-append (assoc-ref inputs "libjpeg") + "/lib/libjpeg.so")) + (("jpeg_copy_headers_to_destination") "") + + ;; GIF + (("include\\(gif\\)") "") + (("\\$\\{gif_STATIC_LIBRARIES\\}") + (string-append (assoc-ref inputs "giflib") + "/lib/libgif.so")) + (("gif_copy_headers_to_destination") "") + + ;; lmdb + (("include\\(lmdb\\)") "") + (("\\$\\{lmdb_STATIC_LIBRARIES\\}") + (string-append (assoc-ref inputs "lmdb") + "/lib/liblmdb.so")) + (("lmdb_copy_headers_to_destination") "") + + ;; Protobuf + (("include\\(protobuf\\)") "") + (("protobuf_copy_headers_to_destination") "") + (("^ +protobuf") "") + + ;; gRPC + (("include\\(grpc\\)") + "find_package(grpc REQUIRED NAMES gRPC)") + (("list\\(APPEND tensorflow_EXTERNAL_DEPENDENCIES grpc\\)") "") + + ;; Eigen + (("include\\(eigen\\)") + (string-append "find_package(eigen REQUIRED NAMES Eigen3) +set(eigen_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/external/eigen_archive " + (assoc-ref inputs "eigen") "/include/eigen3)")) + (("^ +eigen") "") + + ;; snappy + (("include\\(snappy\\)") + "add_definitions(-DTF_USE_SNAPPY)") + (("list\\(APPEND tensorflow_EXTERNAL_DEPENDENCIES snappy\\)") "") + + ;; jsoncpp + (("include\\(jsoncpp\\)") "") + (("^ +jsoncpp") "")) + + (substitute* "tf_core_framework.cmake" + ((" grpc") "") + (("\\$\\{GRPC_BUILD\\}/grpc_cpp_plugin") + (which "grpc_cpp_plugin")) + ;; Link with gRPC libraries + (("add_library\\(tf_protos_cc.*" m) + (string-append m + (format #f "\ntarget_link_libraries(tf_protos_cc PRIVATE \ +~a/lib/libgrpc++_unsecure.a \ +~a/lib/libgrpc_unsecure.a \ +~a/lib/libaddress_sorting.a \ +~a/lib/libgpr.a \ +~a//lib/libcares.so +)\n" + grpc grpc grpc grpc + (assoc-ref inputs "c-ares")))))) + (substitute* "tf_tools.cmake" + (("add_dependencies\\(\\$\\{proto_text.*") "")) + ;; Remove dependency on bundled grpc + (substitute* "tf_core_distributed_runtime.cmake" + (("tf_core_cpu grpc") "tf_core_cpu")) + + ;; This directory is a dependency of many targets. + (mkdir-p "protobuf") + #t)) + (add-after 'configure 'unpack-third-party-sources + (lambda* (#:key inputs #:allow-other-keys) + ;; This is needed to configure bundled packages properly. + (setenv "CONFIG_SHELL" (which "bash")) + (for-each + (lambda (name) + (let* ((what (assoc-ref inputs (string-append name "-src"))) + (name* (string-map (lambda (c) + (if (char=? c #\-) + #\_ c)) name)) + (where (string-append "../build/" name* "/src/" name*))) + (cond + ((string-suffix? ".zip" what) + (mkdir-p where) + (with-directory-excursion where + (invoke "unzip" what))) + ((string-suffix? ".tar.gz" what) + (mkdir-p where) + (invoke "tar" "xf" what + "-C" where "--strip-components=1")) + (else + (let ((parent (dirname where))) + (mkdir-p parent) + (with-directory-excursion parent + (when (file-exists? name*) + (delete-file-recursively name*)) + (copy-recursively what name*) + (map make-file-writable + (find-files name* ".*")))))))) + (list "boringssl" + "cub" + "double-conversion" + "farmhash" + "fft2d" + "highwayhash" + "nsync" + "re2")) + + (rename-file "../build/cub/src/cub/cub-1.8.0/" + "../build/cub/src/cub/cub/") + #t)) + (add-after 'unpack 'fix-python-build + (lambda* (#:key inputs outputs #:allow-other-keys) + (mkdir-p "protobuf-src") + (invoke "tar" "xf" (assoc-ref inputs "protobuf:src") + "-C" "protobuf-src" "--strip-components=1") + (mkdir-p "eigen-src") + (invoke "tar" "xf" (assoc-ref inputs "eigen:src") + "-C" "eigen-src" "--strip-components=1") + + (substitute* "tensorflow/contrib/cmake/tf_python.cmake" + ;; Ensure that all Python dependencies can be found at build time. + (("PYTHONPATH=\\$\\{CMAKE_CURRENT_BINARY_DIR\\}/tf_python" m) + (string-append m ":" (getenv "PYTHONPATH"))) + ;; Take protobuf source files from our source package. + (("\\$\\{CMAKE_CURRENT_BINARY_DIR\\}/protobuf/src/protobuf/src/google") + (string-append (getcwd) "/protobuf-src/src/google"))) + + (substitute* '("tensorflow/contrib/cmake/tf_shared_lib.cmake" + "tensorflow/contrib/cmake/tf_python.cmake") + ;; Take Eigen source files from our source package. + (("\\$\\{CMAKE_CURRENT_BINARY_DIR\\}/eigen/src/eigen/") + (string-append (getcwd) "/eigen-src/")) + ;; Take Eigen headers from our own package. + (("\\$\\{CMAKE_CURRENT_BINARY_DIR\\}/external/eigen_archive") + (string-append (assoc-ref inputs "eigen") "/include/eigen3"))) + + ;; Correct the RUNPATH of ops libraries generated for Python. + ;; TODO: this doesn't work :( + ;; /gnu/store/...-tensorflow-1.9.0/lib/python3.7/site-packages/tensorflow/contrib/seq2seq/python/ops/lib_beam_search_ops.so: + ;; warning: RUNPATH contains bogus entries: ("/tmp/guix-build-tensorflow-1.9.0.drv-0/source/tensorflow/contrib/build") + ;; /gnu/store/...-tensorflow-1.9.0/lib/python3.7/site-packages/tensorflow/contrib/seq2seq/python/ops/lib_beam_search_ops.so: + ;; error: depends on 'libpywrap_tensorflow_internal.so', which + ;; cannot be found in RUNPATH ... + (substitute* "tensorflow/contrib/cmake/tf_cc_ops.cmake" + (("set_target_properties.*") + (string-append "set_target_properties(${_AT_TARGET} PROPERTIES \ +COMPILE_FLAGS ${target_compile_flags} \ +INSTALL_RPATH_USE_LINK_PATH TRUE \ +INSTALL_RPATH " (assoc-ref outputs "out") "/lib)\n"))) + #t)) + (add-after 'build 'build-pip-package + (lambda* (#:key outputs #:allow-other-keys) + (setenv "LDFLAGS" + (string-append "-Wl,-rpath=" + (assoc-ref outputs "out") "/lib")) + (invoke "make" "tf_python_build_pip_package") + #t)) + (add-after 'build-pip-package 'install-python + (lambda* (#:key outputs #:allow-other-keys) + (let ((out (assoc-ref outputs "out")) + (wheel (car (find-files "../build/tf_python/dist/" "\\.whl$")))) + (invoke "python" "-m" "pip" "install" wheel + (string-append "--prefix=" out)) + + ;; XXX: broken RUNPATH, see fix-python-build phase. + (delete-file + (string-append + out "/lib/python3.7/site-packages/tensorflow/contrib/" + "seq2seq/python/ops/lib_beam_search_ops.so")) + #t)))))) + (native-inputs + `(("pkg-config" ,pkg-config) + ("protobuf:native" ,protobuf-next) ; protoc + ("protobuf:src" ,(package-source protobuf-next)) + ("eigen:src" ,(package-source eigen-for-tensorflow)) + ;; The commit hashes and URLs for third-party source code are taken + ;; from "tensorflow/workspace.bzl". + ("boringssl-src" + ,(let ((commit "ee7aa02") + (revision "1")) + (origin + (method git-fetch) + (uri (git-reference + (url "https://boringssl.googlesource.com/boringssl") + (commit commit))) + (file-name (string-append "boringssl-0-" revision + (string-take commit 7) + "-checkout")) + (sha256 + (base32 + "1jf693q0nw0adsic6cgmbdx6g7wr4rj4vxa8j1hpn792fqhd8wgw"))))) + ("cub-src" + ,(let ((version "1.8.0")) + (origin + (method url-fetch) + (uri (string-append "https://mirror.bazel.build/github.com/NVlabs/" + "cub/archive/" version ".zip")) + (file-name (string-append "cub-" version ".zip")) + (sha256 + (base32 + "1hsqikqridb90dkxkjr2918dcry6pfh46ccnwrzawl56aamhdykb"))))) + ("double-conversion-src" + ,(let ((commit "5664746") + (revision "1")) + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/google/double-conversion.git") + (commit commit))) + (file-name + (git-file-name "double-conversion" + (string-append "0-" revision "." + (string-take commit 7)))) + (sha256 + (base32 + "1h5lppqqxcvdg5jq42i5msgwx20ryij3apvmndflngrgdpc04gn1"))))) + ("farmhash-src" + ,(let ((commit "816a4ae622e964763ca0862d9dbd19324a1eaf45")) + (origin + (method url-fetch) + (uri (string-append + "https://mirror.bazel.build/github.com/google/farmhash/archive/" + commit ".tar.gz")) + (file-name (string-append "farmhash-0-" (string-take commit 7) + ".tar.gz")) + (sha256 + (base32 + "185b2xdxl4d4cnsnv6abg8s22gxvx8673jq2yaq85bz4cdy58q35"))))) + ;; The license notice on the home page at + ;; http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html says: + ;; Copyright Takuya OOURA, 1996-2001 + ;; + ;; You may use, copy, modify and distribute this code for any purpose + ;; (include commercial use) and without fee. Please refer to this + ;; package when you modify this code. + ;; + ;; We take the identical tarball from the Bazel mirror, because the URL + ;; at the home page is not versioned and might change. + ("fft2d-src" + ,(origin + (method url-fetch) + (uri "https://mirror.bazel.build/www.kurims.kyoto-u.ac.jp/~ooura/fft.tgz") + (file-name "fft2d.tar.gz") + (sha256 + (base32 + "15jjkfvhqvl2c0753d2di8hz0pyzn598g74wqy79awdrf1y67fsj")))) + ("highwayhash-src" + ,(let ((commit "be5edafc2e1a455768e260ccd68ae7317b6690ee") + (revision "1")) + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/google/highwayhash.git") + (commit commit))) + (file-name (string-append "highwayhash-0-" revision + (string-take commit 7) + "-checkout")) + (sha256 + (base32 + "154jwf98cyy54hldr94pgjn85zynly3abpnc1avmb8a18lzwjyb6"))))) + ("nsync-src" + ,(let ((version "0559ce013feac8db639ee1bf776aca0325d28777") + (revision "1")) + (origin + (method url-fetch) + (uri (string-append "https://mirror.bazel.build/" + "github.com/google/nsync/archive/" + version ".tar.gz")) + (file-name (string-append "nsync-0." revision + "-" (string-take version 7) + ".tar.gz")) + (sha256 + (base32 + "0qdkyqym34x739mmzv97ah5r7ph462v5xkxqxvidmcfqbi64b132"))))) + ("re2-src" + ,(let ((commit "e7efc48") + (revision "1")) + (origin + (method git-fetch) + (uri (git-reference + (url "https://github.com/google/re2") + (commit commit))) + (file-name (string-append "re2-0-" revision + (string-take commit 7) + "-checkout")) + (sha256 + (base32 + "161g9841rjfsy5pn52fcis0s9hdr7rxvb06pad38j5rppfihvign"))))) + ("googletest" ,googletest) + ("swig" ,swig) + ("unzip" ,unzip))) + (propagated-inputs + `(("python-absl-py" ,python-absl-py) + ("python-astor" ,python-astor) + ("python-gast" ,python-gast) + ("python-grpcio" ,python-grpcio) + ("python-numpy" ,python-numpy) + ("python-protobuf" ,python-protobuf-next) + ("python-six" ,python-six) + ("python-termcolo" ,python-termcolor) + ("python-wheel" ,python-wheel))) + (inputs + `(("c-ares" ,c-ares-next) + ("eigen" ,eigen-for-tensorflow) + ("gemmlowp" ,gemmlowp-for-tensorflow) + ("lmdb" ,lmdb) + ("libjpeg" ,libjpeg) + ("libpng" ,libpng) + ("giflib" ,giflib) + ("grpc" ,grpc) + ("jsoncpp" ,jsoncpp-for-tensorflow) + ("snappy" ,snappy) + ("sqlite" ,sqlite) + ("protobuf" ,protobuf-next) + ("python" ,python-wrapper) + ("zlib" ,zlib))) + (home-page "https://tensorflow.org") + (synopsis "Machine learning framework") + (description + "TensorFlow is a flexible platform for building and training machine +learning models. It provides a library for high performance numerical +computation and includes high level Python APIs, including both a sequential +API for beginners that allows users to build models quickly by plugging +together building blocks and a subclassing API with an imperative style for +advanced research.") + (license license:asl2.0)))