gnu: python-pytorch: Update to 2.2.1 and unbundle dependencies.

Autogenerated files are also regenerated. The tests can be run, but are
disabled, as they require a long time.

* gnu/packages/machine-learning.scm (python-pytorch): Update to 2.2.1.
[version]: Use %python-pytorch-version.
[source]: Use %python-pytorch-src.
[arguments]: Remove 'make-things-writable phase.  Add 'cmake-patches,
'disable-avx-dependencies, 'set-max-jobs, 'codegen1, 'codegen2, 'build2,
'install2 phases. Adjust 'use-system-libraries and 'check phases.
[native-inputs]: Add doxygen, ideep-pytorch, pocketfft-cpp, python-expecttest,
python-pytest-flakefinder, python-pytest-rerunfailures-13,
python-pytest-shard, python-pytest-xdist, python-hypothesis,
python-types-dataclasses, python-typing-extensions-4.10 and valgrind.
[inputs]: Add asmjit, clog, flatbuffers-next, foxi, fxdiv, libuv,
miniz-for-pytorch, qnnpack, qnnpack-pytorch and oneapi-dnnl. Use nnpack,
oneapi-dnnl, qnnpack, qnnpack-pytorch and xnnpack only for supported systems.
[propagated-inputs]: Add python-filelock, python-fsspec, python-jinja2,
python-networkx, python-opt-einsum, python-optree, python-packaging,
python-psutil and python-sympy. Remove python-cffi and python-six. Use cpuinfo
only for supported systems.
(%python-pytorch-src)[source]: Add patches.
(python-pytorch2): Remove variable.
* gnu/packages/patches/python-pytorch-runpath.patch: Adjust patch.
* gnu/packages/patches/python-pytorch-system-libraries.patch: Adjust patch.
* gnu/packages/patches/python-pytorch-1.9.0-system-libraries.patch: Remove file.
* gnu/packages/patches/python-pytorch-fix-codegen.patch: New file.
* gnu/packages/patches/python-pytorch-without-kineto: New file.
* gnu/local.mk (dist_patch_DATA): Register them.
This commit is contained in:
David Elsing 2024-03-23 22:05:05 +00:00 committed by Ludovic Courtès
parent b77c772a3e
commit 7b62d614e7
No known key found for this signature in database
GPG key ID: 090B11993D9AEBB5
7 changed files with 768 additions and 356 deletions

View file

@ -1971,9 +1971,10 @@ dist_patch_DATA = \
%D%/packages/patches/python-pyan3-fix-absolute-path-bug.patch \
%D%/packages/patches/python-pyan3-fix-positional-arguments.patch \
%D%/packages/patches/python-pygpgme-fix-pinentry-tests.patch \
%D%/packages/patches/python-pytorch-fix-codegen.patch \
%D%/packages/patches/python-pytorch-runpath.patch \
%D%/packages/patches/python-pytorch-system-libraries.patch \
%D%/packages/patches/python-pytorch-1.9.0-system-libraries.patch \
%D%/packages/patches/python-pytorch-without-kineto.patch \
%D%/packages/patches/python-robotframework-atest.patch \
%D%/packages/patches/python-robotframework-source-date-epoch.patch \
%D%/packages/patches/python-robotframework-sshlibrary-rf5-compat.patch \

View file

@ -105,6 +105,7 @@ (define-module (gnu packages machine-learning)
#:use-module (gnu packages parallel)
#:use-module (gnu packages perl)
#:use-module (gnu packages pkg-config)
#:use-module (gnu packages pretty-print)
#:use-module (gnu packages protobuf)
#:use-module (gnu packages pulseaudio)
#:use-module (gnu packages python)
@ -123,6 +124,7 @@ (define-module (gnu packages machine-learning)
#:use-module (gnu packages swig)
#:use-module (gnu packages time)
#:use-module (gnu packages tls)
#:use-module (gnu packages valgrind)
#:use-module (gnu packages vulkan)
#:use-module (gnu packages video)
#:use-module (gnu packages web)
@ -4346,6 +4348,13 @@ (define %python-pytorch-src
(sha256
(base32
"03mm0pwwb5lxdsmmiw3cch9fijgjw81kmmc4ln9rlyazkm7l1r48"))
(patches (search-patches "python-pytorch-system-libraries.patch"
"python-pytorch-runpath.patch"
"python-pytorch-without-kineto.patch"
;; Some autogeneration scripts depend on the
;; compile PyTorch library. Therefore, we create
;; dummy versions which are regenerated later.
"python-pytorch-fix-codegen.patch"))
(modules '((guix build utils)))
(snippet
'(begin
@ -4465,135 +4474,250 @@ (define-public qnnpack-pytorch
(define-public python-pytorch
(package
(name "python-pytorch")
(version "1.13.1")
(source (origin
(method git-fetch)
(uri (git-reference
(url "https://github.com/pytorch/pytorch")
(commit (string-append "v" version))
(recursive? #t)))
(file-name (git-file-name name version))
(sha256
(base32
"17yxjzwp4zp75fz7czgz9acijzw7dpyqcza50v8y1x7hfg2gw369"))
(patches (search-patches "python-pytorch-system-libraries.patch"
"python-pytorch-runpath.patch"))
(modules '((guix build utils)))
(snippet
'(begin
;; XXX: Let's be clear: this package is a bundling fest. We
;; delete as much as we can, but there's still a lot left.
(for-each (lambda (directory)
(delete-file-recursively
(string-append "third_party/" directory)))
'("benchmark" "cpuinfo" "eigen"
;; FIXME: QNNPACK (of which XNNPACK is a fork)
;; needs these.
;; "FP16" "FXdiv" "gemmlowp" "psimd"
"gloo" "googletest" "ios-cmake" "NNPACK"
"onnx" "protobuf" "pthreadpool"
"pybind11" "python-enum" "python-peachpy"
"python-six" "tbb" "XNNPACK" "zstd"))
(substitute* "functorch/CMakeLists.txt"
(("\\$\\{_rpath_portable_origin\\}/../torch/lib")
"$ORIGIN/../torch/lib"))))))
(version %python-pytorch-version)
(source %python-pytorch-src)
(build-system python-build-system)
(arguments
'(#:phases (modify-phases %standard-phases
(add-before 'build 'use-system-libraries
(lambda* (#:key outputs #:allow-other-keys)
;; Tell 'setup.py' to let 'CMakeLists.txt' know that we
;; want to use "system libraries" instead of the bundled
;; ones.
(setenv "USE_SYSTEM_LIBS" "1")
(list
#:phases
#~(modify-phases %standard-phases
(add-after 'unpack 'cmake-patches
(lambda _
(substitute* "cmake/Dependencies.cmake"
(("#POCKETFFT_INCLUDE_DIR")
(string-append
#$(this-package-native-input "pocketfft-cpp") "/include"))
(("#FP16_INCLUDE_DIR")
(string-append
#$(this-package-input "fp16") "/include")))))
(add-before 'build 'use-system-libraries
(lambda _
(substitute* '("caffe2/serialize/crc.cc"
"caffe2/serialize/inline_container.cc")
(("\"miniz\\.h\"") "<miniz/miniz.h>"))
(substitute* "aten/src/ATen/native/vulkan/api/Allocator.h"
(("<include/vk_mem_alloc.h>")
"<vk_mem_alloc.h>"))
;; For Vulkan
(substitute* "CMakeLists.txt"
(("append_cxx_flag.*-Werror=(return-type|range-loop-construct).*") ""))
(substitute*
(cons*
"torch/csrc/Module.cpp"
(map
(lambda (name)
(string-append
"torch/utils/benchmark/utils/valgrind_wrapper/"
name))
'("compat_bindings.cpp" "timer_callgrind_template.cpp")))
(("<callgrind.h>") "<valgrind/callgrind.h>"))
(setenv "USE_FFMPEG" "1")
(setenv "USE_VULKAN" "1")
(setenv "USE_OPENCV" "1")
;; Tell 'setup.py' to let 'CMakeLists.txt' know that we
;; want to use "system libraries" instead of the bundled
;; ones.
(setenv "USE_SYSTEM_LIBS" "1")
;; For oneDNN
(setenv "USE_MKLDNN" "1")
;; Only works with CUPTI
(setenv "USE_KINETO" "0")
;; Prevent CMake error by disabling explicitely
(setenv "USE_ITT" "0")
;; Disable on unsupported systems
(if #$(not (member
(or (%current-target-system)
(%current-system))
(package-transitive-supported-systems qnnpack)))
(setenv "USE_QNNPACK" "0")
(setenv "USE_PYTORCH_QNNPACK" "0"))))
;; PyTorch is still built with AVX2 and AVX-512 support selected at
;; runtime, but these dependencies require it (nnpack only for
;; x86_64).
(add-before 'build 'disable-avx-dependencies
(lambda _
(setenv "USE_FBGEMM" "0")
(if #$(not
(member (or (%current-target-system)
(%current-system))
'("armhf-linux" "aarch64-linux")))
(setenv "USE_NNPACK" "0"))))
(add-after 'use-system-libraries 'set-max-jobs
(lambda _
(setenv "MAX_JOBS" (number->string (parallel-job-count)))))
(add-after 'set-max-jobs 'codegen1
(lambda _
(with-directory-excursion "torch/csrc/jit/tensorexpr"
(setenv "PYTHONPATH" "../../../..")
(invoke "python3" "codegen_external.py")
(setenv "PYTHONPATH" #f))
(substitute* "cmake/Dependencies.cmake"
(("if\\(USE_SYSTEM_BIND11\\)")
"if(TRUE)"))
(invoke "python3" "aten/src/ATen/nnapi/codegen.py")
;; XXX: Disable that for simplicity for now.
(setenv "USE_FBGEMM" "0")))
(add-before 'build 'make-things-writable
(lambda _
;; The 'build_caffe2' function in
;; 'tools/build_pytorch_libs.py', called from the
;; top-level 'setup.py', needs write access to this
;; directory.
(for-each make-file-writable
(find-files "caffe2/proto" "."
#:directories? #t))))
(replace 'check
(lambda* (#:key inputs outputs tests? #:allow-other-keys)
;; Run the test suite following the instructions in
;; 'CONTRIBUTING.md'. XXX: Unfortunately this doesn't
;; work, unless you set GUIX_PYTHONPATH presumably.
(when tests?
(add-installed-pythonpath inputs outputs)
(invoke "python" "test/run_test.py"))))
(add-after 'install 'remove-test-executables
(lambda* (#:key inputs outputs #:allow-other-keys)
;; Remove test executables, but keep other executables
;; such as 'torch_shm_manager' and and .so files such as
;; 'libtorch_global_deps.so'.
(let ((python-site (site-packages inputs outputs)))
(for-each delete-file
(find-files python-site
"(^test_cpp_rpc|_test)$")))))
(add-after 'install 'remove-caffe2-onnx-scripts
(lambda* (#:key outputs #:allow-other-keys)
(let* ((out (assoc-ref outputs "out"))
(bin (string-append out "/bin")))
;; Remove 'convert-caffe2-to-onnx' and
;; 'convert-onnx-to-caffe2': they seem to be
;; deprecated and they cause a failure of the
;; 'sanity-check' phase:
;;
;; ImportError: cannot import name 'metanet_pb2' from partially initialized module 'caffe2.proto' (most likely due to a circular import)
(for-each delete-file
(find-files bin "^convert-.*caffe2"))
(invoke "bash" "tools/gen_flatbuffers.sh")
(substitute* (find-files out "^entry_points\\.txt$")
(("^convert-.*" all)
(string-append "# " all "\n")))))))
;; Generate dummy files as the generation depends on the compiled
;; library. They are regenerated later.
(setenv "PYTHONPATH" ".")
(invoke "python3"
"torchgen/operator_versions/gen_mobile_upgraders.py"
"dummy")
(setenv "PYTHONPATH" #f)
;; XXX: Tests attempt to download data such as
;; <https://raw.githubusercontent.com/pytorch/test-infra/master/stats/slow-tests.json>.
;; We're also missing some Python modules, such as expecttest.
#:tests? #f))
(invoke "python3"
"torchgen/shape_functions/gen_jit_shape_functions.py"
"dummy")
(invoke "python3"
"torchgen/decompositions/gen_jit_decompositions.py"
"dummy")))
;; Properly generate autogenerated files ...
(add-after 'install 'codegen2
(lambda* (#:key inputs outputs #:allow-other-keys)
(add-installed-pythonpath inputs outputs)
(invoke "python3"
"torchgen/operator_versions/gen_mobile_upgraders.py")
(invoke "python3"
"torchgen/shape_functions/gen_jit_shape_functions.py")
(invoke "python3"
"torchgen/decompositions/gen_jit_decompositions.py")))
;; ... rebuild their dependencies ...
(add-after 'codegen2 'build2
(lambda _
(invoke "python3" "setup.py" "build")))
;; ... and install again.
(add-after 'build2 'install2
(lambda _
(invoke "python3" "setup.py" "install" (string-append "--prefix=" #$output)
"--no-compile" "--single-version-externally-managed" "--root=/")
(invoke "python" "-m" "compileall"
"--invalidation-mode=unchecked-hash" #$output)))
(replace 'check
(lambda* (#:key tests? #:allow-other-keys)
;; Run the test suite following the instructions in
;; 'CONTRIBUTING.md'. Unfortunately this doesn't work, unless
;; you set PYTHONPATH or GUIX_PYTHONPATH, but this is done in
;; the codegen2 phase already.
(when tests?
(invoke "python3" "test/run_test.py" "--core"))))
(add-after 'install2 'remove-test-executables
(lambda* (#:key inputs outputs #:allow-other-keys)
;; Remove test executables, but keep other executables
;; such as 'torch_shm_manager' and and .so files such as
;; 'libtorch_global_deps.so'.
(let ((python-site (site-packages inputs outputs)))
(for-each delete-file
(find-files python-site
"(^test_cpp_rpc|_test)$")))))
(add-after 'install2 'remove-caffe2-onnx-scripts
(lambda* (#:key outputs #:allow-other-keys)
(let* ((out (assoc-ref outputs "out"))
(bin (string-append out "/bin")))
;; Remove 'convert-caffe2-to-onnx' and
;; 'convert-onnx-to-caffe2': they seem to be
;; deprecated and they cause a failure of the
;; 'sanity-check' phase:
;;
;; ImportError: cannot import name 'metanet_pb2' from
;; partially initialized module 'caffe2.proto' (most likely
;; due to a circular import)
(for-each delete-file
(find-files bin "^convert-.*caffe2"))
(substitute* (find-files out "^entry_points\\.txt$")
(("^convert-.*" all)
(string-append "# " all "\n")))))))
;; Even only the core tests take a very long time to run.
#:tests? #f))
(native-inputs
(list cmake ninja))
(list cmake
doxygen
ideep-pytorch
ninja
pocketfft-cpp
python-expecttest
python-pytest-flakefinder
python-pytest-rerunfailures-13
python-pytest-shard
python-pytest-xdist
python-hypothesis
python-types-dataclasses
python-typing-extensions-4.10
shaderc
valgrind))
(inputs
(list eigen
;; ("fmt" ,fmt)
fp16
gemmlowp
googletest
googlebenchmark
gloo
nnpack
openblas
openmpi
pthreadpool
protobuf
pybind11
sleef
xnnpack
zstd))
(append
(list asmjit
clog
eigen
ffmpeg
flatbuffers-next
fmt
foxi
fp16
fxdiv
gemmlowp
gloo
googletest
googlebenchmark
libuv
miniz-for-pytorch
openblas
opencv
openmpi
pthreadpool
protobuf
pybind11
sleef
tensorpipe
vulkan-headers
vulkan-loader
vulkan-memory-allocator
zstd)
;; TODO: fix build on 32 bit systems once Rust is available.
(filter
(lambda (pkg)
(member (or (%current-target-system)
(%current-system))
(package-transitive-supported-systems pkg)))
(list oneapi-dnnl
qnnpack
qnnpack-pytorch
xnnpack))
;; nnpack requires AVX2 for x86_64-linux
(filter
(lambda (pkg)
(member (or (%current-target-system)
(%current-system))
'("armhf-linux" "aarch64-linux")))
(list nnpack))))
(propagated-inputs
(list python-astunparse
python-click
python-numpy
python-pyyaml
python-cffi
python-typing-extensions
python-future
python-six
python-requests
onnx ;propagated for its Python modules
onnx-optimizer
cpuinfo))
(append
(list onnx ;propagated for its Python modules
onnx-optimizer
python-astunparse
python-click
python-filelock
python-fsspec
python-future
python-jinja2
python-networkx
python-numpy
python-opt-einsum
python-optree
python-packaging
python-psutil
python-pyyaml
python-requests
python-sympy
python-typing-extensions)
(filter
(lambda (pkg)
(member (or (%current-target-system)
(%current-system))
(package-transitive-supported-systems pkg)))
(list cpuinfo))))
(home-page "https://pytorch.org/")
(synopsis "Python library for tensor computation and deep neural networks")
(description
@ -4610,61 +4734,6 @@ (define-public python-pytorch
Note: currently this package does not provide GPU support.")
(license license:bsd-3)))
(define-public python-pytorch2
(package
(inherit python-pytorch)
(name "python-pytorch")
(version "2.2.1")
(source (origin
(method git-fetch)
(uri (git-reference
(url "https://github.com/pytorch/pytorch")
(commit (string-append "v" version))
(recursive? #t)))
(file-name (git-file-name name version))
(sha256
(base32
"0hdr0d6n072qd0nq2dkxhc9pva6vggj9hpzc0glpc60vfgk0cgzb"))
(patches (search-patches "python-pytorch2-system-libraries.patch"
"python-pytorch-runpath.patch"))
(modules '((guix build utils)))
(snippet
'(begin
;; XXX: Let's be clear: this package is a bundling fest. We
;; delete as much as we can, but there's still a lot left.
(for-each (lambda (directory)
(delete-file-recursively
(string-append "third_party/" directory)))
'("benchmark" "cpuinfo" "eigen"
;; FIXME: QNNPACK (of which XNNPACK is a fork)
;; needs these.
;; "FP16" "FXdiv" "gemmlowp" "psimd"
"gloo" "googletest" "ios-cmake" "NNPACK"
"onnx" "protobuf" "pthreadpool"
"pybind11" "python-peachpy"
"tbb" "XNNPACK" "zstd"))
(substitute* "caffe2/CMakeLists.txt"
(("target_link_libraries\\(\\$\\{test_name\\}_\\$\\{CPU_CAPABILITY\\} c10 sleef gtest_main\\)")
"target_link_libraries(${test_name}_${CPU_CAPABILITY} c10 sleef gtest gtest_main)"))
(substitute* "functorch/CMakeLists.txt"
(("\\$\\{_rpath_portable_origin\\}/../torch/lib")
"$ORIGIN/../torch/lib"))))))
(inputs
(modify-inputs (package-inputs python-pytorch)
(replace "xnnpack" xnnpack-for-torch2)))
(propagated-inputs
(modify-inputs (package-propagated-inputs python-pytorch)
(append python-filelock
python-fsspec
python-jinja2
python-networkx
python-opt-einsum
python-sympy)
(replace "onnx" onnx-for-torch2)
(replace "onnx-optimizer" onnx-optimizer-for-torch2)))))
(define-public python-pytorch-for-r-torch
(package
(inherit python-pytorch)

View file

@ -1,139 +0,0 @@
Use our own googletest rather than the bundled one.
Get NNPACK to use our own PeachPy rather than the bundled one.
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index 5d57b9ca78..620cca4e60 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -644,11 +644,6 @@ if(BUILD_TEST OR BUILD_MOBILE_BENCHMARK OR BUILD_MOBILE_TEST)
# this shouldn't be necessary anymore.
get_property(INC_DIR_temp DIRECTORY PROPERTY INCLUDE_DIRECTORIES)
set_property(DIRECTORY PROPERTY INCLUDE_DIRECTORIES "")
- add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/googletest)
- set_property(DIRECTORY PROPERTY INCLUDE_DIRECTORIES ${INC_DIR_temp})
-
- include_directories(BEFORE SYSTEM ${CMAKE_CURRENT_LIST_DIR}/../third_party/googletest/googletest/include)
- include_directories(BEFORE SYSTEM ${CMAKE_CURRENT_LIST_DIR}/../third_party/googletest/googlemock/include)
# We will not need to test benchmark lib itself.
set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "Disable benchmark testing as we don't need it.")
@@ -1485,7 +1480,7 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
endif()
set_property(TARGET onnx_proto PROPERTY IMPORTED_LOCATION ${ONNX_PROTO_LIBRARY})
message("-- Found onnx: ${ONNX_LIBRARY} ${ONNX_PROTO_LIBRARY}")
- list(APPEND Caffe2_DEPENDENCY_LIBS onnx_proto onnx)
+ list(APPEND Caffe2_DEPENDENCY_LIBS onnx_proto onnx onnx_optimizer)
endif()
include_directories(${FOXI_INCLUDE_DIRS})
list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader)
diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
index 50ebb224ce..5953d9ddf7 100644
--- a/caffe2/CMakeLists.txt
+++ b/caffe2/CMakeLists.txt
@@ -1632,7 +1632,7 @@ if(BUILD_TEST)
if(NOT MSVC)
add_executable(${test_name}_${CPU_CAPABILITY} "${test_src}" ../aten/src/ATen/native/quantized/affine_quantizer_base.cpp)
# TODO: Get rid of c10 dependency (which is only needed for the implementation of AT_ERROR)
- target_link_libraries(${test_name}_${CPU_CAPABILITY} c10 sleef gtest_main)
+ target_link_libraries(${test_name}_${CPU_CAPABILITY} c10 sleef gtest_main gtest)
if(USE_FBGEMM)
target_link_libraries(${test_name}_${CPU_CAPABILITY} fbgemm)
endif()
@@ -1655,7 +1655,7 @@ if(BUILD_TEST)
foreach(test_src ${Caffe2_CPU_TEST_SRCS})
get_filename_component(test_name ${test_src} NAME_WE)
add_executable(${test_name} "${test_src}")
- target_link_libraries(${test_name} torch_library gtest_main)
+ target_link_libraries(${test_name} torch_library gtest_main gtest)
target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
target_include_directories(${test_name} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>)
target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE})
@@ -1673,7 +1673,7 @@ if(BUILD_TEST)
foreach(test_src ${Caffe2_GPU_TEST_SRCS})
get_filename_component(test_name ${test_src} NAME_WE)
cuda_add_executable(${test_name} "${test_src}")
- target_link_libraries(${test_name} torch_library gtest_main)
+ target_link_libraries(${test_name} torch_library gtest_main gtest)
target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE})
add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
@@ -1691,7 +1691,7 @@ if(BUILD_TEST)
foreach(test_src ${Caffe2_VULKAN_TEST_SRCS})
get_filename_component(test_name ${test_src} NAME_WE)
add_executable(${test_name} "${test_src}")
- target_link_libraries(${test_name} torch_library gtest_main)
+ target_link_libraries(${test_name} torch_library gtest_main gtest)
target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE})
add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
@@ -1709,7 +1709,7 @@ if(BUILD_TEST)
foreach(test_src ${Caffe2_HIP_TEST_SRCS})
get_filename_component(test_name ${test_src} NAME_WE)
add_executable(${test_name} "${test_src}")
- target_link_libraries(${test_name} torch_library gtest_main)
+ target_link_libraries(${test_name} torch_library gtest_main gtest)
target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE} ${Caffe2_HIP_INCLUDE})
target_compile_options(${test_name} PRIVATE ${HIP_CXX_FLAGS})
diff --git a/torch/lib/c10d/test/CMakeLists.txt b/torch/lib/c10d/test/CMakeLists.txt
index b74d4b65f7..fc7c207505 100644
--- a/torch/lib/c10d/test/CMakeLists.txt
+++ b/torch/lib/c10d/test/CMakeLists.txt
@@ -16,24 +16,24 @@ function(c10d_add_test test_src)
add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
endfunction()
-c10d_add_test(FileStoreTest.cpp c10d gtest_main)
-c10d_add_test(TCPStoreTest.cpp c10d gtest_main)
+c10d_add_test(FileStoreTest.cpp c10d gtest_main gtest)
+c10d_add_test(TCPStoreTest.cpp c10d gtest_main gtest)
if(NOT WIN32)
- c10d_add_test(HashStoreTest.cpp c10d gtest_main)
+ c10d_add_test(HashStoreTest.cpp c10d gtest_main gtest)
endif()
if(USE_CUDA)
if(USE_C10D_GLOO)
- c10d_add_test(ProcessGroupGlooTest.cpp c10d c10d_cuda_test gtest_main)
- c10d_add_test(ProcessGroupGlooAsyncTest.cpp c10d c10d_cuda_test gtest_main)
+ c10d_add_test(ProcessGroupGlooTest.cpp c10d c10d_cuda_test gtest_main gtest)
+ c10d_add_test(ProcessGroupGlooAsyncTest.cpp c10d c10d_cuda_test gtest_main gtest)
endif()
if(USE_C10D_NCCL)
- c10d_add_test(ProcessGroupNCCLTest.cpp c10d c10d_cuda_test gtest_main)
+ c10d_add_test(ProcessGroupNCCLTest.cpp c10d c10d_cuda_test gtest_main gtest)
c10d_add_test(ProcessGroupNCCLErrorsTest.cpp c10d c10d_cuda_test
- gtest_main)
+ gtest_main gtest)
endif()
else()
if(USE_C10D_GLOO)
- c10d_add_test(ProcessGroupGlooTest.cpp c10d gtest_main)
+ c10d_add_test(ProcessGroupGlooTest.cpp c10d gtest_main gtest)
endif()
endif()
diff --git a/cmake/External/nnpack.cmake b/cmake/External/nnpack.cmake
index a41343cbb5..6075bdd0a4 100644
--- a/cmake/External/nnpack.cmake
+++ b/cmake/External/nnpack.cmake
@@ -40,7 +40,7 @@ endif()
# (3) Android, iOS, Linux, macOS - supported
##############################################################################
-if(ANDROID OR IOS OR ${CMAKE_SYSTEM_NAME} STREQUAL "Linux" OR ${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
+if(FALSE)
message(STATUS "Brace yourself, we are building NNPACK")
set(CAFFE2_THIRD_PARTY_ROOT ${PROJECT_SOURCE_DIR}/third_party)
@@ -114,6 +114,5 @@ endif()
# (4) Catch-all: not supported.
##############################################################################
-message(WARNING "Unknown platform - I don't know how to build NNPACK. "
- "See cmake/External/nnpack.cmake for details.")
-set(USE_NNPACK OFF)
+set(NNPACK_FOUND TRUE)
+set(USE_NNPACK ON)

View file

@ -0,0 +1,167 @@
This patch fixes some scripts for generating source files. For
gen_jit_decompositions.py, gen_mobile_upgraders.py and
gen_jit_shape_functions.py, which depend on the compiled PyTorch library, the
option to generate "dummy" source files is added for the initial build, which
is later corrected. codegen_external.py is patched to avoid duplicate
functions and add the static keyword as in the existing generated file.
diff --git a/tools/gen_flatbuffers.sh b/tools/gen_flatbuffers.sh
index cc0263dbbf..ac34e84b82 100644
--- a/tools/gen_flatbuffers.sh
+++ b/tools/gen_flatbuffers.sh
@@ -1,13 +1,13 @@
#!/bin/bash
ROOT=$(pwd)
-FF_LOCATION="$ROOT/third_party/flatbuffers"
-cd "$FF_LOCATION" || exit
-mkdir build
-cd build || exit
-cmake ..
-cmake --build . --target flatc
-mkdir -p "$ROOT/build/torch/csrc/jit/serialization"
-./flatc --cpp --gen-mutable --scoped-enums \
+#FF_LOCATION="$ROOT/third_party/flatbuffers"
+#cd "$FF_LOCATION" || exit
+#mkdir build
+#cd build || exit
+#cmake ..
+#cmake --build . --target flatc
+#mkdir -p "$ROOT/build/torch/csrc/jit/serialization"
+flatc --cpp --gen-mutable --scoped-enums \
-o "$ROOT/torch/csrc/jit/serialization" \
-c "$ROOT/torch/csrc/jit/serialization/mobile_bytecode.fbs"
echo '// @generated' >> "$ROOT/torch/csrc/jit/serialization/mobile_bytecode_generated.h"
diff --git a/torch/csrc/jit/tensorexpr/codegen_external.py b/torch/csrc/jit/tensorexpr/codegen_external.py
index bc69b05162..0f8df81de3 100644
--- a/torch/csrc/jit/tensorexpr/codegen_external.py
+++ b/torch/csrc/jit/tensorexpr/codegen_external.py
@@ -20,9 +20,14 @@ def gen_external(native_functions_path, tags_path, external_path):
native_functions = parse_native_yaml(native_functions_path, tags_path)
func_decls = []
func_registrations = []
- for func in native_functions:
+ done_names = set()
+ for func in native_functions[0]:
schema = func.func
name = schema.name.name.base
+ if name in done_names:
+ continue
+ else:
+ done_names.add(name)
args = schema.arguments
# Only supports extern calls for functions with out variants
if not schema.is_out_fn():
@@ -62,7 +67,7 @@ def gen_external(native_functions_path, tags_path, external_path):
# print(tensor_decls, name, arg_names)
func_decl = f"""\
-void nnc_aten_{name}(
+static void nnc_aten_{name}(
int64_t bufs_num,
void** buf_data,
int64_t* buf_ranks,
diff --git a/torchgen/decompositions/gen_jit_decompositions.py b/torchgen/decompositions/gen_jit_decompositions.py
index 7cfbb803f9..2e69bb1868 100644
--- a/torchgen/decompositions/gen_jit_decompositions.py
+++ b/torchgen/decompositions/gen_jit_decompositions.py
@@ -1,8 +1,12 @@
#!/usr/bin/env python3
import os
from pathlib import Path
+import sys
-from torch.jit._decompositions import decomposition_table
+if len(sys.argv) < 2 or sys.argv[1] != "dummy":
+ from torch.jit._decompositions import decomposition_table
+else:
+ decomposition_table = {}
# from torchgen.code_template import CodeTemplate
@@ -85,7 +89,7 @@ def write_decomposition_util_file(path: str) -> None:
def main() -> None:
- pytorch_dir = Path(__file__).resolve().parents[3]
+ pytorch_dir = Path(__file__).resolve().parents[2]
upgrader_path = pytorch_dir / "torch" / "csrc" / "jit" / "runtime"
write_decomposition_util_file(str(upgrader_path))
diff --git a/torchgen/operator_versions/gen_mobile_upgraders.py b/torchgen/operator_versions/gen_mobile_upgraders.py
index dab1568580..55c58715fc 100644
--- a/torchgen/operator_versions/gen_mobile_upgraders.py
+++ b/torchgen/operator_versions/gen_mobile_upgraders.py
@@ -2,10 +2,12 @@
import os
from enum import Enum
from pathlib import Path
+import sys
from typing import Any, Dict, List
-import torch
-from torch.jit.generate_bytecode import generate_upgraders_bytecode
+if len(sys.argv) < 2 or sys.argv[1] != "dummy":
+ import torch
+ from torch.jit.generate_bytecode import generate_upgraders_bytecode
from torchgen.code_template import CodeTemplate
from torchgen.operator_versions.gen_mobile_upgraders_constant import (
@@ -262,7 +264,10 @@ def construct_register_size(register_size_from_yaml: int) -> str:
def construct_version_maps(
upgrader_bytecode_function_to_index_map: Dict[str, Any]
) -> str:
- version_map = torch._C._get_operator_version_map()
+ if len(sys.argv) < 2 or sys.argv[1] != "dummy":
+ version_map = torch._C._get_operator_version_map()
+ else:
+ version_map = {}
sorted_version_map_ = sorted(version_map.items(), key=lambda item: item[0]) # type: ignore[no-any-return]
sorted_version_map = dict(sorted_version_map_)
@@ -378,7 +383,10 @@ def sort_upgrader(upgrader_list: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
def main() -> None:
- upgrader_list = generate_upgraders_bytecode()
+ if len(sys.argv) < 2 or sys.argv[1] != "dummy":
+ upgrader_list = generate_upgraders_bytecode()
+ else:
+ upgrader_list = []
sorted_upgrader_list = sort_upgrader(upgrader_list)
for up in sorted_upgrader_list:
print("after sort upgrader : ", next(iter(up)))
diff --git a/torchgen/shape_functions/gen_jit_shape_functions.py b/torchgen/shape_functions/gen_jit_shape_functions.py
index c6336a6951..34e394d818 100644
--- a/torchgen/shape_functions/gen_jit_shape_functions.py
+++ b/torchgen/shape_functions/gen_jit_shape_functions.py
@@ -18,16 +18,20 @@ you are in the root directory of the Pytorch git repo"""
if not file_path.exists():
raise Exception(err_msg)
-spec = importlib.util.spec_from_file_location(module_name, file_path)
-assert spec is not None
-module = importlib.util.module_from_spec(spec)
-sys.modules[module_name] = module
-assert spec.loader is not None
-assert module is not None
-spec.loader.exec_module(module)
-
-bounded_compute_graph_mapping = module.bounded_compute_graph_mapping
-shape_compute_graph_mapping = module.shape_compute_graph_mapping
+if len(sys.argv) < 2 or sys.argv[1] != "dummy":
+ spec = importlib.util.spec_from_file_location(module_name, file_path)
+ assert spec is not None
+ module = importlib.util.module_from_spec(spec)
+ sys.modules[module_name] = module
+ assert spec.loader is not None
+ assert module is not None
+ spec.loader.exec_module(module)
+
+ bounded_compute_graph_mapping = module.bounded_compute_graph_mapping
+ shape_compute_graph_mapping = module.shape_compute_graph_mapping
+else:
+ bounded_compute_graph_mapping = {}
+ shape_compute_graph_mapping = {}
SHAPE_HEADER = r"""

View file

@ -3,10 +3,10 @@ get installed, quite surprisingly, to 'lib/python3.8/site-packages/{bin,lib}'.
Make sure RUNPATH matches that.
diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
index 5b5622f0..30d27e57 100644
index 74836372..c8eb69d1 100644
--- a/caffe2/CMakeLists.txt
+++ b/caffe2/CMakeLists.txt
@@ -1909,7 +1909,7 @@ if(BUILD_PYTHON)
@@ -1910,7 +1910,7 @@ if(BUILD_PYTHON)
if(${BUILDING_WITH_TORCH_LIBS})
# site-packages/caffe2/python/caffe2_pybind11_state
# site-packages/torch/lib
@ -16,7 +16,7 @@ index 5b5622f0..30d27e57 100644
# Must also include `CMAKE_SHARED_LINKER_FLAGS` in linker flags for
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index ee9cf410..f190e69b 100644
index acc95842..8f8fb7d7 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -4,7 +4,7 @@ if(APPLE)
@ -28,3 +28,16 @@ index ee9cf410..f190e69b 100644
endif(APPLE)
# Use separate rpaths during build and install phases
set(CMAKE_SKIP_BUILD_RPATH FALSE)
diff --git a/functorch/CMakeLists.txt b/functorch/CMakeLists.txt
index f2f32745..db21b656 100644
--- a/functorch/CMakeLists.txt
+++ b/functorch/CMakeLists.txt
@@ -21,7 +21,7 @@ target_link_libraries(${PROJECT_NAME} PRIVATE pybind::pybind11)
set_target_properties(${PROJECT_NAME} PROPERTIES LIBRARY_OUTPUT_DIRECTORY
${CMAKE_BINARY_DIR}/functorch)
-set_target_properties(${PROJECT_NAME} PROPERTIES INSTALL_RPATH "${_rpath_portable_origin}/../torch/lib")
+set_target_properties(${PROJECT_NAME} PROPERTIES INSTALL_RPATH "$ORIGIN/../torch/lib")
# Copy-pasted prefix/suffix logic for Python extensions from
# https://github.com/pytorch/pytorch/blob/33bb8ae350611760139457b85842b1d7edf9aa11/caffe2/CMakeLists.txt#L1975

View file

@ -1,38 +1,104 @@
Use our own googletest rather than the bundled one.
Get NNPACK to use our own PeachPy rather than the bundled one.
Patch build files to also system libraries instead of bundled ones for the
libraries not supported or working only by specifying USE_SYSTEM_LIBS. This
includes using the clog, cpuinfo, fbgemm, foxi, fp16, fxdiv, googletest,
ideep, miniz, nnpack, oneapi-dnnl, pocketfft, pthreadpool, qnnpack,
qnnpack-pytorch, tensorpipe, valgrind and xnnpack packages.
For QNNPACK, two versions were bundled and are required: The upstream one and
an internal fork (now in the package qnnpack-pytorch).
diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt
index 2c2b967..5ac5fa6 100644
--- a/aten/src/ATen/CMakeLists.txt
+++ b/aten/src/ATen/CMakeLists.txt
@@ -371,9 +371,9 @@ if(AT_NNPACK_ENABLED)
list(APPEND ATen_CPU_DEPENDENCY_LIBS nnpack) # cpuinfo is added below
endif()
-if(MKLDNN_FOUND)
- list(APPEND ATen_CPU_DEPENDENCY_LIBS ${MKLDNN_LIBRARIES})
-endif(MKLDNN_FOUND)
+if(USE_MKLDNN)
+ list(APPEND ATen_CPU_DEPENDENCY_LIBS DNNL::dnnl)
+endif(USE_MKLDNN)
if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "^(s390x|ppc64le)$")
list(APPEND ATen_CPU_DEPENDENCY_LIBS cpuinfo)
diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
index d57d7ebb..5b5622f0 100644
index 7483637..093de40 100644
--- a/caffe2/CMakeLists.txt
+++ b/caffe2/CMakeLists.txt
@@ -1736,7 +1736,7 @@ if(BUILD_TEST)
@@ -111,9 +111,6 @@ if(NOT MSVC AND USE_XNNPACK)
if(NOT TARGET fxdiv)
set(FXDIV_BUILD_TESTS OFF CACHE BOOL "")
set(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "")
- add_subdirectory(
- "${FXDIV_SOURCE_DIR}"
- "${CMAKE_BINARY_DIR}/FXdiv")
endif()
endif()
@@ -1055,7 +1052,6 @@ elseif(USE_CUDA)
endif()
if(NOT MSVC AND USE_XNNPACK)
- TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv)
endif()
# ==========================================================
@@ -1396,6 +1392,7 @@ target_link_libraries(torch_cpu PUBLIC c10)
target_link_libraries(torch_cpu PUBLIC ${Caffe2_PUBLIC_DEPENDENCY_LIBS})
target_link_libraries(torch_cpu PRIVATE ${Caffe2_DEPENDENCY_LIBS})
target_link_libraries(torch_cpu PRIVATE ${Caffe2_DEPENDENCY_WHOLE_LINK_LIBS})
+target_link_libraries(torch_cpu PRIVATE miniz clog)
if(USE_MPI)
target_link_libraries(torch_cpu PRIVATE MPI::MPI_CXX)
endif()
@@ -1653,7 +1650,7 @@ if(BUILD_STATIC_RUNTIME_BENCHMARK)
add_executable(static_runtime_bench "${STATIC_RUNTIME_BENCHMARK_SRCS}")
add_executable(static_runtime_test "${STATIC_RUNTIME_TEST_SRCS}")
target_link_libraries(static_runtime_bench torch_library benchmark)
- target_link_libraries(static_runtime_test torch_library gtest_main)
+ target_link_libraries(static_runtime_test torch_library gtest_main gtest)
endif()
if(BUILD_TENSOREXPR_BENCHMARK)
@@ -1680,7 +1677,7 @@ if(BUILD_MOBILE_TEST)
foreach(test_src ${ATen_MOBILE_TEST_SRCS})
get_filename_component(test_name ${test_src} NAME_WE)
add_executable(${test_name} "${test_src}")
- target_link_libraries(${test_name} torch_library gtest_main)
+ target_link_libraries(${test_name} torch_library gtest_main gtest)
target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
target_include_directories(${test_name} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>)
target_include_directories(${test_name} PRIVATE ${ATen_CPU_INCLUDE})
@@ -1701,7 +1698,7 @@ if(BUILD_TEST)
if(NOT MSVC)
add_executable(${test_name}_${CPU_CAPABILITY} "${test_src}" ../aten/src/ATen/native/quantized/affine_quantizer_base.cpp)
add_executable(${test_name}_${CPU_CAPABILITY} "${test_src}" ../aten/src/ATen/native/quantized/AffineQuantizerBase.cpp)
# TODO: Get rid of c10 dependency (which is only needed for the implementation of AT_ERROR)
- target_link_libraries(${test_name}_${CPU_CAPABILITY} c10 sleef gtest_main)
+ target_link_libraries(${test_name}_${CPU_CAPABILITY} c10 sleef gtest_main gtest)
if(USE_FBGEMM)
target_link_libraries(${test_name}_${CPU_CAPABILITY} fbgemm)
endif()
@@ -1759,7 +1759,7 @@ if(BUILD_TEST)
@@ -1715,7 +1712,7 @@ if(BUILD_TEST)
endif()
else()
add_executable(${test_name}_${CPU_CAPABILITY} "${test_src}")
- target_link_libraries(${test_name}_${CPU_CAPABILITY} torch_library gtest_main)
+ target_link_libraries(${test_name}_${CPU_CAPABILITY} torch_library gtest_main gtest)
endif()
target_include_directories(${test_name}_${CPU_CAPABILITY} PRIVATE $<INSTALL_INTERFACE:include>)
target_include_directories(${test_name}_${CPU_CAPABILITY} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>)
@@ -1732,7 +1729,7 @@ if(BUILD_TEST)
foreach(test_src ${Caffe2_CPU_TEST_SRCS})
get_filename_component(test_name ${test_src} NAME_WE)
add_executable(${test_name} "${test_src}")
- target_link_libraries(${test_name} torch_library gtest_main)
+ target_link_libraries(${test_name} torch_library gtest_main gtest)
if(USE_OPENMP)
# -fopenmp is a compile time flag and as result not guaranteed
# to link executable against OpenMP runtime library
@@ -1785,7 +1785,7 @@ if(BUILD_TEST)
foreach(test_src ${Caffe2_GPU_TEST_SRCS})
get_filename_component(test_name ${test_src} NAME_WE)
add_executable(${test_name} "${test_src}")
- target_link_libraries(${test_name} torch_library gtest_main)
+ target_link_libraries(${test_name} torch_library gtest_main gtest)
target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE})
add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
@@ -1803,7 +1803,7 @@ if(BUILD_TEST)
target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
target_include_directories(${test_name} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>)
target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE})
@@ -1795,7 +1792,7 @@ if(BUILD_TEST)
foreach(test_src ${Caffe2_VULKAN_TEST_SRCS})
get_filename_component(test_name ${test_src} NAME_WE)
add_executable(${test_name} "${test_src}")
@ -41,20 +107,66 @@ index d57d7ebb..5b5622f0 100644
target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE})
add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
@@ -1821,7 +1821,7 @@ if(BUILD_TEST)
foreach(test_src ${Caffe2_HIP_TEST_SRCS})
get_filename_component(test_name ${test_src} NAME_WE)
add_executable(${test_name} "${test_src}")
- target_link_libraries(${test_name} torch_library gtest_main)
+ target_link_libraries(${test_name} torch_library gtest_main gtest)
target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE} ${Caffe2_HIP_INCLUDE})
target_compile_options(${test_name} PRIVATE ${HIP_CXX_FLAGS})
diff --git a/caffe2/serialize/CMakeLists.txt b/caffe2/serialize/CMakeLists.txt
index 1552b59..67e1a9a 100644
--- a/caffe2/serialize/CMakeLists.txt
+++ b/caffe2/serialize/CMakeLists.txt
@@ -2,7 +2,6 @@ file(GLOB tmp *_test.cc)
set(Caffe2_CPU_TEST_SRCS ${Caffe2_CPU_TEST_SRCS} ${tmp})
list(APPEND Caffe2_CPU_SRCS
- ${PROJECT_SOURCE_DIR}/third_party/miniz-2.1.0/miniz.c
${CMAKE_CURRENT_SOURCE_DIR}/inline_container.cc
${CMAKE_CURRENT_SOURCE_DIR}/istream_adapter.cc
${CMAKE_CURRENT_SOURCE_DIR}/file_adapter.cc
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index 557ab649..ee9cf410 100644
index acc9584..97275bf 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -732,11 +732,6 @@ if(BUILD_TEST OR BUILD_MOBILE_BENCHMARK OR BUILD_MOBILE_TEST)
@@ -283,7 +283,7 @@ endif()
# --- [ PocketFFT
set(AT_POCKETFFT_ENABLED 0)
if(NOT AT_MKL_ENABLED)
- set(POCKETFFT_INCLUDE_DIR "${Torch_SOURCE_DIR}/third_party/pocketfft/")
+ set(POCKETFFT_INCLUDE_DIR "#POCKETFFT_INCLUDE_DIR")
if(NOT EXISTS "${POCKETFFT_INCLUDE_DIR}")
message(FATAL_ERROR "pocketfft directory not found, expected ${POCKETFFT_INCLUDE_DIR}")
elif(NOT EXISTS "${POCKETFFT_INCLUDE_DIR}/pocketfft_hdronly.h")
@@ -489,19 +489,6 @@ if(USE_QNNPACK)
set(QNNPACK_BUILD_TESTS OFF CACHE BOOL "")
set(QNNPACK_BUILD_BENCHMARKS OFF CACHE BOOL "")
set(QNNPACK_LIBRARY_TYPE "static" CACHE STRING "")
- add_subdirectory(
- "${QNNPACK_SOURCE_DIR}"
- "${CONFU_DEPENDENCIES_BINARY_DIR}/QNNPACK")
-
- # TODO: See https://github.com/pytorch/pytorch/issues/56285
- if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
- target_compile_options(qnnpack PRIVATE -Wno-deprecated-declarations)
- endif()
-
- # We build static versions of QNNPACK and pthreadpool but link
- # them into a shared library for Caffe2, so they need PIC.
- set_property(TARGET qnnpack PROPERTY POSITION_INDEPENDENT_CODE ON)
- set_property(TARGET cpuinfo PROPERTY POSITION_INDEPENDENT_CODE ON)
if(QNNPACK_CUSTOM_THREADPOOL)
target_compile_definitions(
@@ -550,13 +537,6 @@ if(USE_PYTORCH_QNNPACK)
set(PYTORCH_QNNPACK_BUILD_TESTS OFF CACHE BOOL "")
set(PYTORCH_QNNPACK_BUILD_BENCHMARKS OFF CACHE BOOL "")
set(PYTORCH_QNNPACK_LIBRARY_TYPE "static" CACHE STRING "")
- add_subdirectory(
- "${PYTORCH_QNNPACK_SOURCE_DIR}"
- "${CONFU_DEPENDENCIES_BINARY_DIR}/pytorch_qnnpack")
- # We build static versions of QNNPACK and pthreadpool but link
- # them into a shared library for Caffe2, so they need PIC.
- set_property(TARGET pytorch_qnnpack PROPERTY POSITION_INDEPENDENT_CODE ON)
- set_property(TARGET cpuinfo PROPERTY POSITION_INDEPENDENT_CODE ON)
if(PYTORCH_QNNPACK_CUSTOM_THREADPOOL)
target_compile_definitions(
@@ -728,11 +708,6 @@ if(BUILD_TEST OR BUILD_MOBILE_BENCHMARK OR BUILD_MOBILE_TEST)
# this shouldn't be necessary anymore.
get_property(INC_DIR_temp DIRECTORY PROPERTY INCLUDE_DIRECTORIES)
set_property(DIRECTORY PROPERTY INCLUDE_DIRECTORIES "")
@ -66,7 +178,49 @@ index 557ab649..ee9cf410 100644
# We will not need to test benchmark lib itself.
set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "Disable benchmark testing as we don't need it.")
@@ -1543,7 +1538,7 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
@@ -810,16 +785,6 @@ if(USE_FBGEMM)
if(USE_ASAN)
set(USE_SANITIZER "address,undefined" CACHE STRING "-fsanitize options for FBGEMM")
endif()
- add_subdirectory("${FBGEMM_SOURCE_DIR}")
- set_property(TARGET fbgemm_generic PROPERTY POSITION_INDEPENDENT_CODE ON)
- set_property(TARGET fbgemm_avx2 PROPERTY POSITION_INDEPENDENT_CODE ON)
- set_property(TARGET fbgemm_avx512 PROPERTY POSITION_INDEPENDENT_CODE ON)
- set_property(TARGET fbgemm PROPERTY POSITION_INDEPENDENT_CODE ON)
- if("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 13.0.0)
- # See https://github.com/pytorch/pytorch/issues/74352
- target_compile_options_if_supported(asmjit -Wno-deprecated-copy)
- target_compile_options_if_supported(asmjit -Wno-unused-but-set-variable)
- endif()
endif()
if(USE_FBGEMM)
@@ -979,7 +944,7 @@ if(NOT TARGET fp16 AND NOT USE_SYSTEM_FP16)
"${FP16_SOURCE_DIR}"
"${CONFU_DEPENDENCIES_BINARY_DIR}/FP16")
elseif(NOT TARGET fp16 AND USE_SYSTEM_FP16)
- add_library(fp16 STATIC "/usr/include/fp16.h")
+ add_library(fp16 STATIC "#FP16_INCLUDE_DIR")
set_target_properties(fp16 PROPERTIES LINKER_LANGUAGE C)
endif()
list(APPEND Caffe2_DEPENDENCY_LIBS fp16)
@@ -1362,7 +1327,6 @@ if(USE_DISTRIBUTED AND USE_TENSORPIPE)
# Tensorpipe uses cuda_add_library
torch_update_find_cuda_flags()
- add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/tensorpipe)
list(APPEND Caffe2_DEPENDENCY_LIBS tensorpipe)
if(USE_CUDA)
@@ -1529,7 +1493,6 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
set_target_properties(onnx_proto PROPERTIES CXX_STANDARD 17)
endif()
endif()
- add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/foxi EXCLUDE_FROM_ALL)
add_definitions(-DONNX_NAMESPACE=${ONNX_NAMESPACE})
if(NOT USE_SYSTEM_ONNX)
@@ -1560,7 +1523,7 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
endif()
set_property(TARGET onnx_proto PROPERTY IMPORTED_LOCATION ${ONNX_PROTO_LIBRARY})
message("-- Found onnx: ${ONNX_LIBRARY} ${ONNX_PROTO_LIBRARY}")
@ -75,8 +229,36 @@ index 557ab649..ee9cf410 100644
endif()
include_directories(${FOXI_INCLUDE_DIRS})
list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader)
@@ -1739,9 +1702,8 @@ if(NOT INTERN_BUILD_MOBILE)
endif()
if(USE_MKLDNN)
include(${CMAKE_CURRENT_LIST_DIR}/public/mkldnn.cmake)
- if(MKLDNN_FOUND)
+ if(DNNL_FOUND)
set(AT_MKLDNN_ENABLED 1)
- include_directories(AFTER SYSTEM ${MKLDNN_INCLUDE_DIR})
if(BUILD_CAFFE2_OPS)
list(APPEND Caffe2_DEPENDENCY_LIBS caffe2::mkldnn)
endif(BUILD_CAFFE2_OPS)
@@ -1796,7 +1758,7 @@ endif()
#
set(TEMP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS})
set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libs" FORCE)
-add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)
+find_package(fmt)
# Disable compiler feature checks for `fmt`.
#
@@ -1805,7 +1767,6 @@ add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)
# CMAKE_CXX_FLAGS in ways that break feature checks. Since we already know
# `fmt` is compatible with a superset of the compilers that PyTorch is, it
# shouldn't be too bad to just disable the checks.
-set_target_properties(fmt-header-only PROPERTIES INTERFACE_COMPILE_FEATURES "")
list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only)
set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS} CACHE BOOL "Build shared libs" FORCE)
diff --git a/cmake/External/nnpack.cmake b/cmake/External/nnpack.cmake
index a41343cb..6075bdd0 100644
index 9d5f064..c3624e5 100644
--- a/cmake/External/nnpack.cmake
+++ b/cmake/External/nnpack.cmake
@@ -40,7 +40,7 @@ endif()
@ -88,7 +270,7 @@ index a41343cb..6075bdd0 100644
message(STATUS "Brace yourself, we are building NNPACK")
set(CAFFE2_THIRD_PARTY_ROOT ${PROJECT_SOURCE_DIR}/third_party)
@@ -114,6 +114,5 @@ endif()
@@ -112,6 +112,5 @@ endif()
# (4) Catch-all: not supported.
##############################################################################
@ -97,8 +279,45 @@ index a41343cb..6075bdd0 100644
-set(USE_NNPACK OFF)
+set(NNPACK_FOUND TRUE)
+set(USE_NNPACK ON)
diff --git a/cmake/public/mkldnn.cmake b/cmake/public/mkldnn.cmake
index 8793562..9f8fa3d 100644
--- a/cmake/public/mkldnn.cmake
+++ b/cmake/public/mkldnn.cmake
@@ -4,7 +4,7 @@ if(CPU_AARCH64)
include(${CMAKE_CURRENT_LIST_DIR}/ComputeLibrary.cmake)
endif()
-find_package(MKLDNN QUIET)
+find_package(DNNL REQUIRED)
if(NOT TARGET caffe2::mkldnn)
add_library(caffe2::mkldnn INTERFACE IMPORTED)
@@ -15,4 +15,4 @@ set_property(
${MKLDNN_INCLUDE_DIR})
set_property(
TARGET caffe2::mkldnn PROPERTY INTERFACE_LINK_LIBRARIES
- ${MKLDNN_LIBRARIES})
+ DNNL::dnnl)
diff --git a/setup.py b/setup.py
index 81f3c6c..3251cab 100644
--- a/setup.py
+++ b/setup.py
@@ -482,13 +482,9 @@ def build_deps():
# Windows has very poor support for them.
sym_files = [
"tools/shared/_utils_internal.py",
- "torch/utils/benchmark/utils/valgrind_wrapper/callgrind.h",
- "torch/utils/benchmark/utils/valgrind_wrapper/valgrind.h",
]
orig_files = [
"torch/_utils_internal.py",
- "third_party/valgrind-headers/callgrind.h",
- "third_party/valgrind-headers/valgrind.h",
]
for sym_file, orig_file in zip(sym_files, orig_files):
same = False
diff --git a/test/cpp/c10d/CMakeLists.txt b/test/cpp/c10d/CMakeLists.txt
index bf91460c..ef56948f 100644
index 5c89748..ef84c57 100644
--- a/test/cpp/c10d/CMakeLists.txt
+++ b/test/cpp/c10d/CMakeLists.txt
@@ -16,14 +16,14 @@ function(c10d_add_test test_src)
@ -133,7 +352,29 @@ index bf91460c..ef56948f 100644
endif()
if(USE_NCCL AND USE_C10D_NCCL)
# NCCL is a private dependency of libtorch, but the tests include some
@@ -56,7 +56,7 @@ if(USE_CUDA)
@@ -44,10 +44,10 @@ if(USE_CUDA)
# a private dependency of the tests as well.
c10d_add_test(
ProcessGroupNCCLTest.cpp
- torch_cpu c10d_cuda_test gtest_main __caffe2_nccl)
+ torch_cpu c10d_cuda_test gtest_main gtest __caffe2_nccl)
c10d_add_test(
ProcessGroupNCCLErrorsTest.cpp
- torch_cpu c10d_cuda_test gtest_main __caffe2_nccl)
+ torch_cpu c10d_cuda_test gtest_main gtest __caffe2_nccl)
if(INSTALL_TEST)
install(TARGETS ProcessGroupNCCLTest DESTINATION bin)
install(TARGETS ProcessGroupNCCLErrorsTest DESTINATION bin)
@@ -61,7 +61,7 @@ if(USE_CUDA)
# a private dependency of the tests as well.
c10d_add_test(
ProcessGroupUCCTest.cpp
- torch_cpu c10d_cuda_test gtest_main __caffe2_ucc)
+ torch_cpu c10d_cuda_test gtest_main gtest __caffe2_ucc)
if(INSTALL_TEST)
install(TARGETS ProcessGroupUCCTest DESTINATION bin)
install(TARGETS c10d_cuda_test DESTINATION lib)
@@ -69,7 +69,7 @@ if(USE_CUDA)
endif()
else()
if(USE_GLOO AND USE_C10D_GLOO)
@ -143,10 +384,10 @@ index bf91460c..ef56948f 100644
endif()
diff --git a/test/cpp/tensorexpr/CMakeLists.txt b/test/cpp/tensorexpr/CMakeLists.txt
index 8fc5a0a1..643202f6 100644
index 012471d..d39b625 100644
--- a/test/cpp/tensorexpr/CMakeLists.txt
+++ b/test/cpp/tensorexpr/CMakeLists.txt
@@ -53,7 +53,7 @@ target_include_directories(tutorial_tensorexpr PRIVATE ${ATen_CPU_INCLUDE})
@@ -54,7 +54,7 @@ target_include_directories(tutorial_tensorexpr PRIVATE ${ATen_CPU_INCLUDE})
# pthreadpool header. For some build environment we need add the dependency
# explicitly.
if(USE_PTHREADPOOL)
@ -154,4 +395,4 @@ index 8fc5a0a1..643202f6 100644
+ target_link_libraries(test_tensorexpr PRIVATE pthreadpool)
endif()
if(USE_CUDA)
target_link_libraries(test_tensorexpr PRIVATE
target_compile_definitions(test_tensorexpr PRIVATE USE_CUDA)

View file

@ -0,0 +1,60 @@
Even when building without Kineto, the <ActivityType.h> header is still
imported and the ActivityType type is used. This patch was copied from
https://github.com/pytorch/pytorch/pull/111048.
diff --git a/torch/csrc/profiler/kineto_shim.h b/torch/csrc/profiler/kineto_shim.h
index e92cbf00..68985ab7 100644
--- a/torch/csrc/profiler/kineto_shim.h
+++ b/torch/csrc/profiler/kineto_shim.h
@@ -12,7 +12,51 @@
#undef USE_KINETO
#endif
+#ifdef USE_KINETO
#include <ActivityType.h>
+#else
+namespace libkineto {
+// copied from header
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// Note : All activity types are not enabled by default. Please add them
+// at correct position in the enum
+enum class ActivityType {
+ // Activity types enabled by default
+ CPU_OP = 0, // cpu side ops
+ USER_ANNOTATION,
+ GPU_USER_ANNOTATION,
+ GPU_MEMCPY,
+ GPU_MEMSET,
+ CONCURRENT_KERNEL, // on-device kernels
+ EXTERNAL_CORRELATION,
+ CUDA_RUNTIME, // host side cuda runtime events
+ CUDA_DRIVER, // host side cuda driver events
+ CPU_INSTANT_EVENT, // host side point-like events
+ PYTHON_FUNCTION,
+ OVERHEAD, // CUPTI induced overhead events sampled from its overhead API.
+
+ // Optional Activity types
+ CUDA_SYNC, // synchronization events between runtime and kernels
+ GLOW_RUNTIME, // host side glow runtime events
+ MTIA_RUNTIME, // host side MTIA runtime events
+ CUDA_PROFILER_RANGE, // CUPTI Profiler range for performance metrics
+ MTIA_CCP_EVENTS, // MTIA ondevice CCP events
+ HPU_OP, // HPU host side runtime event
+ XPU_RUNTIME, // host side xpu runtime events
+
+ ENUM_COUNT, // This is to add buffer and not used for any profiling logic. Add your new type before it.
+ OPTIONAL_ACTIVITY_TYPE_START = CUDA_SYNC,
+};
+}
+
+#endif
#include <torch/csrc/Export.h>
#include <torch/csrc/profiler/api.h>