gnu: python-pytorch: Update to 2.4.0.

This also updates the qnnpack-pytorch package.

* gnu/packages/machine-learning.scm (%python-pytorch-version): Set to 2.4.0.
(%python-pytorch-src): Adjust hash accordingly.
[source]: Remove obsolete file deletions in snippet.
(python-pytorch)[arguments]<#:phases>: Disable telemetry and set Python
install directory.  Remove obsolete USE_FFMPEG, USE_OPENCV and
USE_PYTORCH_QNNPACK environment variables.
[native-inputs]: Remove python-typing-extensions.
[inputs]: Add brotli, cpp-httplib and zlib.  Remove qnnpack.  Use oneapi-dnnl,
qnnpack-pytorch and xnnpack for all systems.
[propagated-inputs]: Add onnx and python-typing-extensions.  Use cpuinfo for
all systems.
[supported-systems]: New field.
(python-pytorch-for-r-torch)[inputs]: Add qnnpack.
* gnu/packages/patches/python-pytorch-fix-codegen.patch: Adjust patch.
* gnu/packages/patches/python-pytorch-runpath.patch: Adjust patch.
* gnu/packages/patches/python-pytorch-system-libraries.patch: Adjust patch.
* gnu/packages/patches/python-pytorch-without-kineto.patch: Adjust patch.
This commit is contained in:
David Elsing 2024-08-04 22:15:58 +00:00 committed by Ludovic Courtès
parent 87fd4d217c
commit b44b2e346c
No known key found for this signature in database
GPG key ID: 090B11993D9AEBB5
5 changed files with 119 additions and 134 deletions

View file

@ -4466,7 +4466,7 @@ (define-public ideep-pytorch-for-r-torch
(base32 (base32
"0hdpkhcjry22fjx2zg2r48v7f4ljrclzj0li2pgk76kvyblfbyvm")))))) "0hdpkhcjry22fjx2zg2r48v7f4ljrclzj0li2pgk76kvyblfbyvm"))))))
(define %python-pytorch-version "2.2.1") (define %python-pytorch-version "2.4.0")
(define %python-pytorch-src (define %python-pytorch-src
(origin (origin
@ -4477,7 +4477,7 @@ (define %python-pytorch-src
(file-name (git-file-name "python-pytorch" %python-pytorch-version)) (file-name (git-file-name "python-pytorch" %python-pytorch-version))
(sha256 (sha256
(base32 (base32
"03mm0pwwb5lxdsmmiw3cch9fijgjw81kmmc4ln9rlyazkm7l1r48")) "18hdhzr12brj0b7ppyiscax0dbra30207qx0cckw78midfkcn7cn"))
(patches (search-patches "python-pytorch-system-libraries.patch" (patches (search-patches "python-pytorch-system-libraries.patch"
"python-pytorch-runpath.patch" "python-pytorch-runpath.patch"
"python-pytorch-without-kineto.patch" "python-pytorch-without-kineto.patch"
@ -4505,14 +4505,6 @@ (define %python-pytorch-src
delete-file delete-file
'("aten/src/ATen/nnapi/nnapi_wrapper.cpp" '("aten/src/ATen/nnapi/nnapi_wrapper.cpp"
"aten/src/ATen/nnapi/nnapi_wrapper.h" "aten/src/ATen/nnapi/nnapi_wrapper.h"
"caffe2/mobile/contrib/ios/mpscnn/mpscnn_kernels.h"
"caffe2/proto/caffe2_legacy_pb2.pyi"
"caffe2/proto/caffe2_pb2.pyi"
"caffe2/proto/hsm_pb2.pyi"
"caffe2/proto/metanet_pb2.pyi"
"caffe2/proto/predictor_consts_pb2.pyi"
"caffe2/proto/prof_dag_pb2.pyi"
"caffe2/proto/torch_pb2.pyi"
;; These files contain just lists of floating point values and ;; These files contain just lists of floating point values and
;; might be as well hand-written. ;; might be as well hand-written.
;; "test/cpp/api/init_baseline.h" ;; "test/cpp/api/init_baseline.h"
@ -4619,7 +4611,18 @@ (define-public python-pytorch
#$(this-package-native-input "pocketfft-cpp") "/include")) #$(this-package-native-input "pocketfft-cpp") "/include"))
(("#FP16_INCLUDE_DIR") (("#FP16_INCLUDE_DIR")
(string-append (string-append
#$(this-package-input "fp16") "/include"))))) #$(this-package-input "fp16") "/include"))
;; Disable opentelemetry
((".*(add_library|target_include_directories).*opentelemetry.*")
""))
(substitute* "torch/CMakeLists.txt"
((".*opentelemetry.*") ""))
;; Fix Python install directory
(substitute* "caffe2/CMakeLists.txt"
(("\\$\\{Python_SITELIB\\}")
(string-append #$output "/lib/python"
#$(version-major+minor (package-version python))
"/site-packages")))))
(add-before 'build 'use-system-libraries (add-before 'build 'use-system-libraries
(lambda _ (lambda _
(substitute* '("caffe2/serialize/crc.cc" (substitute* '("caffe2/serialize/crc.cc"
@ -4641,9 +4644,7 @@ (define-public python-pytorch
name)) name))
'("compat_bindings.cpp" "timer_callgrind_template.cpp"))) '("compat_bindings.cpp" "timer_callgrind_template.cpp")))
(("<callgrind.h>") "<valgrind/callgrind.h>")) (("<callgrind.h>") "<valgrind/callgrind.h>"))
(setenv "USE_FFMPEG" "1")
(setenv "USE_VULKAN" "1") (setenv "USE_VULKAN" "1")
(setenv "USE_OPENCV" "1")
;; Tell 'setup.py' to let 'CMakeLists.txt' know that we ;; Tell 'setup.py' to let 'CMakeLists.txt' know that we
;; want to use "system libraries" instead of the bundled ;; want to use "system libraries" instead of the bundled
;; ones. ;; ones.
@ -4659,8 +4660,7 @@ (define-public python-pytorch
(or (%current-target-system) (or (%current-target-system)
(%current-system)) (%current-system))
(package-transitive-supported-systems qnnpack))) (package-transitive-supported-systems qnnpack)))
(setenv "USE_QNNPACK" "0") (setenv "USE_QNNPACK" "0"))))
(setenv "USE_PYTORCH_QNNPACK" "0"))))
;; PyTorch is still built with AVX2 and AVX-512 support selected at ;; PyTorch is still built with AVX2 and AVX-512 support selected at
;; runtime, but these dependencies require it (nnpack only for ;; runtime, but these dependencies require it (nnpack only for
;; x86_64). ;; x86_64).
@ -4773,15 +4773,15 @@ (define-public python-pytorch
python-pytest-xdist python-pytest-xdist
python-hypothesis python-hypothesis
python-types-dataclasses python-types-dataclasses
python-typing-extensions
shaderc shaderc
valgrind)) valgrind))
(inputs (inputs
(append (append
(list asmjit (list asmjit
brotli ; for cpp-httplib
clog clog
cpp-httplib
eigen eigen
ffmpeg
flatbuffers-next flatbuffers-next
fmt fmt
foxi foxi
@ -4793,38 +4793,33 @@ (define-public python-pytorch
googlebenchmark googlebenchmark
libuv libuv
miniz-for-pytorch miniz-for-pytorch
oneapi-dnnl
openblas openblas
opencv
openmpi openmpi
openssl ; for cpp-httplib
pthreadpool pthreadpool
protobuf protobuf
pybind11 pybind11
;; qnnpack
qnnpack-pytorch
sleef sleef
tensorpipe tensorpipe
vulkan-headers vulkan-headers
vulkan-loader vulkan-loader
vulkan-memory-allocator vulkan-memory-allocator
xnnpack
zlib ; for cpp-httplib
zstd) zstd)
;; TODO: fix build on 32 bit systems once Rust is available.
(filter
(lambda (pkg)
(member (or (%current-target-system)
(%current-system))
(package-transitive-supported-systems pkg)))
(list oneapi-dnnl
qnnpack
qnnpack-pytorch
xnnpack))
;; nnpack requires AVX2 for x86_64-linux ;; nnpack requires AVX2 for x86_64-linux
(filter (if (equal? (or (%current-target-system)
(lambda (pkg)
(member (or (%current-target-system)
(%current-system)) (%current-system))
'("armhf-linux" "aarch64-linux"))) '("aarch64-linux"))
(list nnpack)))) (list nnpack)
'())))
(propagated-inputs (propagated-inputs
(append (append
(list onnx ;propagated for its Python modules (list cpuinfo
onnx ;propagated for its Python modules
onnx-optimizer onnx-optimizer
python-astunparse python-astunparse
python-click python-click
@ -4841,15 +4836,11 @@ (define-public python-pytorch
python-pyyaml python-pyyaml
python-requests python-requests
python-sympy python-sympy
python-typing-extensions) python-typing-extensions)))
(filter
(lambda (pkg)
(member (or (%current-target-system)
(%current-system))
(package-transitive-supported-systems pkg)))
(list cpuinfo))))
(home-page "https://pytorch.org/") (home-page "https://pytorch.org/")
(synopsis "Python library for tensor computation and deep neural networks") (synopsis "Python library for tensor computation and deep neural networks")
;; TODO: Support other 64-bit systems.
(supported-systems '("x86_64-linux" "aarch64-linux"))
(description (description
"PyTorch is a Python package that provides two high-level features: "PyTorch is a Python package that provides two high-level features:
@ -4927,6 +4918,7 @@ (define-public python-pytorch-for-r-torch
(replace "ideep-pytorch" ideep-pytorch-for-r-torch))) (replace "ideep-pytorch" ideep-pytorch-for-r-torch)))
(inputs (inputs
(modify-inputs (package-inputs python-pytorch) (modify-inputs (package-inputs python-pytorch)
(prepend qnnpack)
(replace "qnnpack-pytorch" qnnpack-pytorch-for-r-torch) (replace "qnnpack-pytorch" qnnpack-pytorch-for-r-torch)
(replace "oneapi-dnnl" oneapi-dnnl-for-r-torch) (replace "oneapi-dnnl" oneapi-dnnl-for-r-torch)
(replace "xnnpack" xnnpack-for-r-torch))) (replace "xnnpack" xnnpack-for-r-torch)))

View file

@ -6,7 +6,7 @@ is later corrected. codegen_external.py is patched to avoid duplicate
functions and add the static keyword as in the existing generated file. functions and add the static keyword as in the existing generated file.
diff --git a/tools/gen_flatbuffers.sh b/tools/gen_flatbuffers.sh diff --git a/tools/gen_flatbuffers.sh b/tools/gen_flatbuffers.sh
index cc0263dbbf..ac34e84b82 100644 index cc0263d..ac34e84 100644
--- a/tools/gen_flatbuffers.sh --- a/tools/gen_flatbuffers.sh
+++ b/tools/gen_flatbuffers.sh +++ b/tools/gen_flatbuffers.sh
@@ -1,13 +1,13 @@ @@ -1,13 +1,13 @@
@ -32,10 +32,10 @@ index cc0263dbbf..ac34e84b82 100644
-c "$ROOT/torch/csrc/jit/serialization/mobile_bytecode.fbs" -c "$ROOT/torch/csrc/jit/serialization/mobile_bytecode.fbs"
echo '// @generated' >> "$ROOT/torch/csrc/jit/serialization/mobile_bytecode_generated.h" echo '// @generated' >> "$ROOT/torch/csrc/jit/serialization/mobile_bytecode_generated.h"
diff --git a/torch/csrc/jit/tensorexpr/codegen_external.py b/torch/csrc/jit/tensorexpr/codegen_external.py diff --git a/torch/csrc/jit/tensorexpr/codegen_external.py b/torch/csrc/jit/tensorexpr/codegen_external.py
index bc69b05162..0f8df81de3 100644 index 5dcf1b2..0e20b0c 100644
--- a/torch/csrc/jit/tensorexpr/codegen_external.py --- a/torch/csrc/jit/tensorexpr/codegen_external.py
+++ b/torch/csrc/jit/tensorexpr/codegen_external.py +++ b/torch/csrc/jit/tensorexpr/codegen_external.py
@@ -20,9 +20,14 @@ def gen_external(native_functions_path, tags_path, external_path): @@ -21,9 +21,14 @@ def gen_external(native_functions_path, tags_path, external_path):
native_functions = parse_native_yaml(native_functions_path, tags_path) native_functions = parse_native_yaml(native_functions_path, tags_path)
func_decls = [] func_decls = []
func_registrations = [] func_registrations = []
@ -51,7 +51,7 @@ index bc69b05162..0f8df81de3 100644
args = schema.arguments args = schema.arguments
# Only supports extern calls for functions with out variants # Only supports extern calls for functions with out variants
if not schema.is_out_fn(): if not schema.is_out_fn():
@@ -62,7 +67,7 @@ def gen_external(native_functions_path, tags_path, external_path): @@ -63,7 +68,7 @@ def gen_external(native_functions_path, tags_path, external_path):
# print(tensor_decls, name, arg_names) # print(tensor_decls, name, arg_names)
func_decl = f"""\ func_decl = f"""\
@ -61,7 +61,7 @@ index bc69b05162..0f8df81de3 100644
void** buf_data, void** buf_data,
int64_t* buf_ranks, int64_t* buf_ranks,
diff --git a/torchgen/decompositions/gen_jit_decompositions.py b/torchgen/decompositions/gen_jit_decompositions.py diff --git a/torchgen/decompositions/gen_jit_decompositions.py b/torchgen/decompositions/gen_jit_decompositions.py
index 7cfbb803f9..2e69bb1868 100644 index 7a0024f..6b2445f 100644
--- a/torchgen/decompositions/gen_jit_decompositions.py --- a/torchgen/decompositions/gen_jit_decompositions.py
+++ b/torchgen/decompositions/gen_jit_decompositions.py +++ b/torchgen/decompositions/gen_jit_decompositions.py
@@ -1,8 +1,12 @@ @@ -1,8 +1,12 @@
@ -88,12 +88,12 @@ index 7cfbb803f9..2e69bb1868 100644
write_decomposition_util_file(str(upgrader_path)) write_decomposition_util_file(str(upgrader_path))
diff --git a/torchgen/operator_versions/gen_mobile_upgraders.py b/torchgen/operator_versions/gen_mobile_upgraders.py diff --git a/torchgen/operator_versions/gen_mobile_upgraders.py b/torchgen/operator_versions/gen_mobile_upgraders.py
index dab1568580..55c58715fc 100644 index 2907076..6866332 100644
--- a/torchgen/operator_versions/gen_mobile_upgraders.py --- a/torchgen/operator_versions/gen_mobile_upgraders.py
+++ b/torchgen/operator_versions/gen_mobile_upgraders.py +++ b/torchgen/operator_versions/gen_mobile_upgraders.py
@@ -2,10 +2,12 @@ @@ -3,10 +3,12 @@ import os
import os
from enum import Enum from enum import Enum
from operator import itemgetter
from pathlib import Path from pathlib import Path
+import sys +import sys
from typing import Any, Dict, List from typing import Any, Dict, List
@ -106,7 +106,7 @@ index dab1568580..55c58715fc 100644
from torchgen.code_template import CodeTemplate from torchgen.code_template import CodeTemplate
from torchgen.operator_versions.gen_mobile_upgraders_constant import ( from torchgen.operator_versions.gen_mobile_upgraders_constant import (
@@ -262,7 +264,10 @@ def construct_register_size(register_size_from_yaml: int) -> str: @@ -263,7 +265,10 @@ def construct_register_size(register_size_from_yaml: int) -> str:
def construct_version_maps( def construct_version_maps(
upgrader_bytecode_function_to_index_map: Dict[str, Any] upgrader_bytecode_function_to_index_map: Dict[str, Any]
) -> str: ) -> str:
@ -115,10 +115,10 @@ index dab1568580..55c58715fc 100644
+ version_map = torch._C._get_operator_version_map() + version_map = torch._C._get_operator_version_map()
+ else: + else:
+ version_map = {} + version_map = {}
sorted_version_map_ = sorted(version_map.items(), key=lambda item: item[0]) # type: ignore[no-any-return] sorted_version_map_ = sorted(version_map.items(), key=itemgetter(0)) # type: ignore[no-any-return]
sorted_version_map = dict(sorted_version_map_) sorted_version_map = dict(sorted_version_map_)
@@ -378,7 +383,10 @@ def sort_upgrader(upgrader_list: List[Dict[str, Any]]) -> List[Dict[str, Any]]: @@ -379,7 +384,10 @@ def sort_upgrader(upgrader_list: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
def main() -> None: def main() -> None:
@ -131,12 +131,12 @@ index dab1568580..55c58715fc 100644
for up in sorted_upgrader_list: for up in sorted_upgrader_list:
print("after sort upgrader : ", next(iter(up))) print("after sort upgrader : ", next(iter(up)))
diff --git a/torchgen/shape_functions/gen_jit_shape_functions.py b/torchgen/shape_functions/gen_jit_shape_functions.py diff --git a/torchgen/shape_functions/gen_jit_shape_functions.py b/torchgen/shape_functions/gen_jit_shape_functions.py
index c6336a6951..34e394d818 100644 index bdfd5c7..72b237a 100644
--- a/torchgen/shape_functions/gen_jit_shape_functions.py --- a/torchgen/shape_functions/gen_jit_shape_functions.py
+++ b/torchgen/shape_functions/gen_jit_shape_functions.py +++ b/torchgen/shape_functions/gen_jit_shape_functions.py
@@ -18,16 +18,20 @@ you are in the root directory of the Pytorch git repo""" @@ -18,16 +18,20 @@ you are in the root directory of the Pytorch git repo"""
if not file_path.exists(): if not file_path.exists():
raise Exception(err_msg) raise Exception(err_msg) # noqa: TRY002
-spec = importlib.util.spec_from_file_location(module_name, file_path) -spec = importlib.util.spec_from_file_location(module_name, file_path)
-assert spec is not None -assert spec is not None

View file

@ -2,21 +2,8 @@ Libraries (such as 'libtorch_cpu.so') and executables (such as 'torch_shm_manage
get installed, quite surprisingly, to 'lib/python3.8/site-packages/{bin,lib}'. get installed, quite surprisingly, to 'lib/python3.8/site-packages/{bin,lib}'.
Make sure RUNPATH matches that. Make sure RUNPATH matches that.
diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
index 74836372..c8eb69d1 100644
--- a/caffe2/CMakeLists.txt
+++ b/caffe2/CMakeLists.txt
@@ -1910,7 +1910,7 @@ if(BUILD_PYTHON)
if(${BUILDING_WITH_TORCH_LIBS})
# site-packages/caffe2/python/caffe2_pybind11_state
# site-packages/torch/lib
- set(caffe2_pybind11_rpath "${_rpath_portable_origin}/../../torch/lib")
+ set(caffe2_pybind11_rpath $ORIGIN/../../torch/lib)
endif(${BUILDING_WITH_TORCH_LIBS})
# Must also include `CMAKE_SHARED_LINKER_FLAGS` in linker flags for
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index acc95842..8f8fb7d7 100644 index f1f2eb7..cb5caea 100644
--- a/cmake/Dependencies.cmake --- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake
@@ -4,7 +4,7 @@ if(APPLE) @@ -4,7 +4,7 @@ if(APPLE)
@ -29,10 +16,10 @@ index acc95842..8f8fb7d7 100644
# Use separate rpaths during build and install phases # Use separate rpaths during build and install phases
set(CMAKE_SKIP_BUILD_RPATH FALSE) set(CMAKE_SKIP_BUILD_RPATH FALSE)
diff --git a/functorch/CMakeLists.txt b/functorch/CMakeLists.txt diff --git a/functorch/CMakeLists.txt b/functorch/CMakeLists.txt
index f2f32745..db21b656 100644 index bdfa4bf..2a75e38 100644
--- a/functorch/CMakeLists.txt --- a/functorch/CMakeLists.txt
+++ b/functorch/CMakeLists.txt +++ b/functorch/CMakeLists.txt
@@ -21,7 +21,7 @@ target_link_libraries(${PROJECT_NAME} PRIVATE pybind::pybind11) @@ -26,7 +26,7 @@ target_link_libraries(${PROJECT_NAME} PRIVATE pybind::pybind11)
set_target_properties(${PROJECT_NAME} PROPERTIES LIBRARY_OUTPUT_DIRECTORY set_target_properties(${PROJECT_NAME} PROPERTIES LIBRARY_OUTPUT_DIRECTORY
${CMAKE_BINARY_DIR}/functorch) ${CMAKE_BINARY_DIR}/functorch)

View file

@ -1,16 +1,14 @@
Patch build files to also system libraries instead of bundled ones for the Patch build files to also system libraries instead of bundled ones for the
libraries not supported or working only by specifying USE_SYSTEM_LIBS. This libraries not supported or working only by specifying USE_SYSTEM_LIBS. This
includes using the clog, cpuinfo, fbgemm, foxi, fp16, fxdiv, googletest, includes using the clog, cpuinfo, fbgemm, foxi, fp16, fxdiv, googletest,
ideep, miniz, nnpack, oneapi-dnnl, pocketfft, pthreadpool, qnnpack, httlib, ideep, miniz, nnpack, oneapi-dnnl, pocketfft, pthreadpool,
qnnpack-pytorch, tensorpipe, valgrind and xnnpack packages. qnnpack-pytorch, tensorpipe, valgrind and xnnpack packages.
For QNNPACK, two versions were bundled and are required: The upstream one and
an internal fork (now in the package qnnpack-pytorch).
diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt
index 2c2b967..5ac5fa6 100644 index 0087dd9..0235fa1 100644
--- a/aten/src/ATen/CMakeLists.txt --- a/aten/src/ATen/CMakeLists.txt
+++ b/aten/src/ATen/CMakeLists.txt +++ b/aten/src/ATen/CMakeLists.txt
@@ -371,9 +371,9 @@ if(AT_NNPACK_ENABLED) @@ -419,9 +419,9 @@ if(AT_NNPACK_ENABLED)
list(APPEND ATen_CPU_DEPENDENCY_LIBS nnpack) # cpuinfo is added below list(APPEND ATen_CPU_DEPENDENCY_LIBS nnpack) # cpuinfo is added below
endif() endif()
@ -24,10 +22,10 @@ index 2c2b967..5ac5fa6 100644
if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "^(s390x|ppc64le)$") if(NOT CMAKE_SYSTEM_PROCESSOR MATCHES "^(s390x|ppc64le)$")
list(APPEND ATen_CPU_DEPENDENCY_LIBS cpuinfo) list(APPEND ATen_CPU_DEPENDENCY_LIBS cpuinfo)
diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
index 7483637..093de40 100644 index 89c31fa..e6d9ef1 100644
--- a/caffe2/CMakeLists.txt --- a/caffe2/CMakeLists.txt
+++ b/caffe2/CMakeLists.txt +++ b/caffe2/CMakeLists.txt
@@ -111,9 +111,6 @@ if(NOT MSVC AND USE_XNNPACK) @@ -91,9 +91,6 @@ if(NOT MSVC AND USE_XNNPACK)
if(NOT TARGET fxdiv) if(NOT TARGET fxdiv)
set(FXDIV_BUILD_TESTS OFF CACHE BOOL "") set(FXDIV_BUILD_TESTS OFF CACHE BOOL "")
set(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "") set(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "")
@ -37,7 +35,7 @@ index 7483637..093de40 100644
endif() endif()
endif() endif()
@@ -1055,7 +1052,6 @@ elseif(USE_CUDA) @@ -1075,7 +1072,6 @@ if(USE_XPU)
endif() endif()
if(NOT MSVC AND USE_XNNPACK) if(NOT MSVC AND USE_XNNPACK)
@ -45,7 +43,17 @@ index 7483637..093de40 100644
endif() endif()
# ========================================================== # ==========================================================
@@ -1396,6 +1392,7 @@ target_link_libraries(torch_cpu PUBLIC c10) @@ -1189,6 +1185,9 @@ endif()
target_include_directories(torch_cpu PRIVATE
${TORCH_ROOT}/third_party/cpp-httplib)
+find_package(httplib REQUIRED)
+target_link_libraries(torch_cpu PUBLIC httplib::httplib)
+
install(DIRECTORY "${TORCH_SRC_DIR}/csrc"
DESTINATION ${TORCH_INSTALL_INCLUDE_DIR}/torch
FILES_MATCHING PATTERN "*.h" PATTERN "*.hpp")
@@ -1417,6 +1416,7 @@ target_link_libraries(torch_cpu PUBLIC c10)
target_link_libraries(torch_cpu PUBLIC ${Caffe2_PUBLIC_DEPENDENCY_LIBS}) target_link_libraries(torch_cpu PUBLIC ${Caffe2_PUBLIC_DEPENDENCY_LIBS})
target_link_libraries(torch_cpu PRIVATE ${Caffe2_DEPENDENCY_LIBS}) target_link_libraries(torch_cpu PRIVATE ${Caffe2_DEPENDENCY_LIBS})
target_link_libraries(torch_cpu PRIVATE ${Caffe2_DEPENDENCY_WHOLE_LINK_LIBS}) target_link_libraries(torch_cpu PRIVATE ${Caffe2_DEPENDENCY_WHOLE_LINK_LIBS})
@ -53,7 +61,7 @@ index 7483637..093de40 100644
if(USE_MPI) if(USE_MPI)
target_link_libraries(torch_cpu PRIVATE MPI::MPI_CXX) target_link_libraries(torch_cpu PRIVATE MPI::MPI_CXX)
endif() endif()
@@ -1653,7 +1650,7 @@ if(BUILD_STATIC_RUNTIME_BENCHMARK) @@ -1694,7 +1694,7 @@ if(BUILD_STATIC_RUNTIME_BENCHMARK)
add_executable(static_runtime_bench "${STATIC_RUNTIME_BENCHMARK_SRCS}") add_executable(static_runtime_bench "${STATIC_RUNTIME_BENCHMARK_SRCS}")
add_executable(static_runtime_test "${STATIC_RUNTIME_TEST_SRCS}") add_executable(static_runtime_test "${STATIC_RUNTIME_TEST_SRCS}")
target_link_libraries(static_runtime_bench torch_library benchmark) target_link_libraries(static_runtime_bench torch_library benchmark)
@ -61,8 +69,8 @@ index 7483637..093de40 100644
+ target_link_libraries(static_runtime_test torch_library gtest_main gtest) + target_link_libraries(static_runtime_test torch_library gtest_main gtest)
endif() endif()
if(BUILD_TENSOREXPR_BENCHMARK) if(BUILD_MOBILE_BENCHMARK)
@@ -1680,7 +1677,7 @@ if(BUILD_MOBILE_TEST) @@ -1713,7 +1713,7 @@ if(BUILD_MOBILE_TEST)
foreach(test_src ${ATen_MOBILE_TEST_SRCS}) foreach(test_src ${ATen_MOBILE_TEST_SRCS})
get_filename_component(test_name ${test_src} NAME_WE) get_filename_component(test_name ${test_src} NAME_WE)
add_executable(${test_name} "${test_src}") add_executable(${test_name} "${test_src}")
@ -71,7 +79,7 @@ index 7483637..093de40 100644
target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>) target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
target_include_directories(${test_name} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>) target_include_directories(${test_name} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>)
target_include_directories(${test_name} PRIVATE ${ATen_CPU_INCLUDE}) target_include_directories(${test_name} PRIVATE ${ATen_CPU_INCLUDE})
@@ -1701,7 +1698,7 @@ if(BUILD_TEST) @@ -1734,7 +1734,7 @@ if(BUILD_TEST)
if(NOT MSVC) if(NOT MSVC)
add_executable(${test_name}_${CPU_CAPABILITY} "${test_src}" ../aten/src/ATen/native/quantized/AffineQuantizerBase.cpp) add_executable(${test_name}_${CPU_CAPABILITY} "${test_src}" ../aten/src/ATen/native/quantized/AffineQuantizerBase.cpp)
# TODO: Get rid of c10 dependency (which is only needed for the implementation of AT_ERROR) # TODO: Get rid of c10 dependency (which is only needed for the implementation of AT_ERROR)
@ -80,25 +88,25 @@ index 7483637..093de40 100644
if(USE_FBGEMM) if(USE_FBGEMM)
target_link_libraries(${test_name}_${CPU_CAPABILITY} fbgemm) target_link_libraries(${test_name}_${CPU_CAPABILITY} fbgemm)
endif() endif()
@@ -1715,7 +1712,7 @@ if(BUILD_TEST) @@ -1748,7 +1748,7 @@ if(BUILD_TEST)
endif() endif()
else() else()
add_executable(${test_name}_${CPU_CAPABILITY} "${test_src}") add_executable(${test_name}_${CPU_CAPABILITY} "${test_src}")
- target_link_libraries(${test_name}_${CPU_CAPABILITY} torch_library gtest_main) - target_link_libraries(${test_name}_${CPU_CAPABILITY} torch_library sleef gtest_main)
+ target_link_libraries(${test_name}_${CPU_CAPABILITY} torch_library gtest_main gtest) + target_link_libraries(${test_name}_${CPU_CAPABILITY} torch_library sleef gtest_main gtest)
endif() endif()
target_include_directories(${test_name}_${CPU_CAPABILITY} PRIVATE $<INSTALL_INTERFACE:include>) target_include_directories(${test_name}_${CPU_CAPABILITY} PRIVATE $<INSTALL_INTERFACE:include>)
target_include_directories(${test_name}_${CPU_CAPABILITY} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>) target_include_directories(${test_name}_${CPU_CAPABILITY} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>)
@@ -1732,7 +1729,7 @@ if(BUILD_TEST) @@ -1765,7 +1765,7 @@ if(BUILD_TEST)
foreach(test_src ${Caffe2_CPU_TEST_SRCS}) foreach(test_src ${Caffe2_CPU_TEST_SRCS})
get_filename_component(test_name ${test_src} NAME_WE) get_filename_component(test_name ${test_src} NAME_WE)
add_executable(${test_name} "${test_src}") add_executable(${test_name} "${test_src}")
- target_link_libraries(${test_name} torch_library gtest_main) - target_link_libraries(${test_name} torch_library gtest_main)
+ target_link_libraries(${test_name} torch_library gtest_main gtest) + target_link_libraries(${test_name} torch_library gtest_main gtest)
target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>) if(NOT MSVC)
target_include_directories(${test_name} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>) target_link_libraries(${test_name} stdc++)
target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE}) endif()
@@ -1795,7 +1792,7 @@ if(BUILD_TEST) @@ -1845,7 +1845,7 @@ if(BUILD_TEST)
foreach(test_src ${Caffe2_VULKAN_TEST_SRCS}) foreach(test_src ${Caffe2_VULKAN_TEST_SRCS})
get_filename_component(test_name ${test_src} NAME_WE) get_filename_component(test_name ${test_src} NAME_WE)
add_executable(${test_name} "${test_src}") add_executable(${test_name} "${test_src}")
@ -120,10 +128,10 @@ index 1552b59..67e1a9a 100644
${CMAKE_CURRENT_SOURCE_DIR}/istream_adapter.cc ${CMAKE_CURRENT_SOURCE_DIR}/istream_adapter.cc
${CMAKE_CURRENT_SOURCE_DIR}/file_adapter.cc ${CMAKE_CURRENT_SOURCE_DIR}/file_adapter.cc
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index acc9584..97275bf 100644 index f1f2eb7..b4d2033 100644
--- a/cmake/Dependencies.cmake --- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake
@@ -283,7 +283,7 @@ endif() @@ -263,7 +263,7 @@ endif()
# --- [ PocketFFT # --- [ PocketFFT
set(AT_POCKETFFT_ENABLED 0) set(AT_POCKETFFT_ENABLED 0)
if(NOT AT_MKL_ENABLED) if(NOT AT_MKL_ENABLED)
@ -132,27 +140,7 @@ index acc9584..97275bf 100644
if(NOT EXISTS "${POCKETFFT_INCLUDE_DIR}") if(NOT EXISTS "${POCKETFFT_INCLUDE_DIR}")
message(FATAL_ERROR "pocketfft directory not found, expected ${POCKETFFT_INCLUDE_DIR}") message(FATAL_ERROR "pocketfft directory not found, expected ${POCKETFFT_INCLUDE_DIR}")
elif(NOT EXISTS "${POCKETFFT_INCLUDE_DIR}/pocketfft_hdronly.h") elif(NOT EXISTS "${POCKETFFT_INCLUDE_DIR}/pocketfft_hdronly.h")
@@ -489,19 +489,6 @@ if(USE_QNNPACK) @@ -458,15 +458,6 @@ if(USE_PYTORCH_QNNPACK)
set(QNNPACK_BUILD_TESTS OFF CACHE BOOL "")
set(QNNPACK_BUILD_BENCHMARKS OFF CACHE BOOL "")
set(QNNPACK_LIBRARY_TYPE "static" CACHE STRING "")
- add_subdirectory(
- "${QNNPACK_SOURCE_DIR}"
- "${CONFU_DEPENDENCIES_BINARY_DIR}/QNNPACK")
-
- # TODO: See https://github.com/pytorch/pytorch/issues/56285
- if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
- target_compile_options(qnnpack PRIVATE -Wno-deprecated-declarations)
- endif()
-
- # We build static versions of QNNPACK and pthreadpool but link
- # them into a shared library for Caffe2, so they need PIC.
- set_property(TARGET qnnpack PROPERTY POSITION_INDEPENDENT_CODE ON)
- set_property(TARGET cpuinfo PROPERTY POSITION_INDEPENDENT_CODE ON)
if(QNNPACK_CUSTOM_THREADPOOL)
target_compile_definitions(
@@ -550,13 +537,6 @@ if(USE_PYTORCH_QNNPACK)
set(PYTORCH_QNNPACK_BUILD_TESTS OFF CACHE BOOL "") set(PYTORCH_QNNPACK_BUILD_TESTS OFF CACHE BOOL "")
set(PYTORCH_QNNPACK_BUILD_BENCHMARKS OFF CACHE BOOL "") set(PYTORCH_QNNPACK_BUILD_BENCHMARKS OFF CACHE BOOL "")
set(PYTORCH_QNNPACK_LIBRARY_TYPE "static" CACHE STRING "") set(PYTORCH_QNNPACK_LIBRARY_TYPE "static" CACHE STRING "")
@ -163,10 +151,12 @@ index acc9584..97275bf 100644
- # them into a shared library for Caffe2, so they need PIC. - # them into a shared library for Caffe2, so they need PIC.
- set_property(TARGET pytorch_qnnpack PROPERTY POSITION_INDEPENDENT_CODE ON) - set_property(TARGET pytorch_qnnpack PROPERTY POSITION_INDEPENDENT_CODE ON)
- set_property(TARGET cpuinfo PROPERTY POSITION_INDEPENDENT_CODE ON) - set_property(TARGET cpuinfo PROPERTY POSITION_INDEPENDENT_CODE ON)
- # QNNPACK depends on gemmlowp headers
- target_include_directories(pytorch_qnnpack PRIVATE "${CAFFE2_THIRD_PARTY_ROOT}/gemmlowp")
if(PYTORCH_QNNPACK_CUSTOM_THREADPOOL) if(PYTORCH_QNNPACK_CUSTOM_THREADPOOL)
target_compile_definitions( target_compile_definitions(
@@ -728,11 +708,6 @@ if(BUILD_TEST OR BUILD_MOBILE_BENCHMARK OR BUILD_MOBILE_TEST) @@ -653,11 +644,6 @@ if(BUILD_TEST OR BUILD_MOBILE_BENCHMARK OR BUILD_MOBILE_TEST)
# this shouldn't be necessary anymore. # this shouldn't be necessary anymore.
get_property(INC_DIR_temp DIRECTORY PROPERTY INCLUDE_DIRECTORIES) get_property(INC_DIR_temp DIRECTORY PROPERTY INCLUDE_DIRECTORIES)
set_property(DIRECTORY PROPERTY INCLUDE_DIRECTORIES "") set_property(DIRECTORY PROPERTY INCLUDE_DIRECTORIES "")
@ -178,7 +168,7 @@ index acc9584..97275bf 100644
# We will not need to test benchmark lib itself. # We will not need to test benchmark lib itself.
set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "Disable benchmark testing as we don't need it.") set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "Disable benchmark testing as we don't need it.")
@@ -810,16 +785,6 @@ if(USE_FBGEMM) @@ -735,16 +721,6 @@ if(USE_FBGEMM)
if(USE_ASAN) if(USE_ASAN)
set(USE_SANITIZER "address,undefined" CACHE STRING "-fsanitize options for FBGEMM") set(USE_SANITIZER "address,undefined" CACHE STRING "-fsanitize options for FBGEMM")
endif() endif()
@ -195,7 +185,7 @@ index acc9584..97275bf 100644
endif() endif()
if(USE_FBGEMM) if(USE_FBGEMM)
@@ -979,7 +944,7 @@ if(NOT TARGET fp16 AND NOT USE_SYSTEM_FP16) @@ -812,7 +788,7 @@ if(NOT TARGET fp16 AND NOT USE_SYSTEM_FP16)
"${FP16_SOURCE_DIR}" "${FP16_SOURCE_DIR}"
"${CONFU_DEPENDENCIES_BINARY_DIR}/FP16") "${CONFU_DEPENDENCIES_BINARY_DIR}/FP16")
elseif(NOT TARGET fp16 AND USE_SYSTEM_FP16) elseif(NOT TARGET fp16 AND USE_SYSTEM_FP16)
@ -204,7 +194,7 @@ index acc9584..97275bf 100644
set_target_properties(fp16 PROPERTIES LINKER_LANGUAGE C) set_target_properties(fp16 PROPERTIES LINKER_LANGUAGE C)
endif() endif()
list(APPEND Caffe2_DEPENDENCY_LIBS fp16) list(APPEND Caffe2_DEPENDENCY_LIBS fp16)
@@ -1362,7 +1327,6 @@ if(USE_DISTRIBUTED AND USE_TENSORPIPE) @@ -1159,7 +1135,6 @@ if(USE_DISTRIBUTED AND USE_TENSORPIPE)
# Tensorpipe uses cuda_add_library # Tensorpipe uses cuda_add_library
torch_update_find_cuda_flags() torch_update_find_cuda_flags()
@ -212,7 +202,7 @@ index acc9584..97275bf 100644
list(APPEND Caffe2_DEPENDENCY_LIBS tensorpipe) list(APPEND Caffe2_DEPENDENCY_LIBS tensorpipe)
if(USE_CUDA) if(USE_CUDA)
@@ -1529,7 +1493,6 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX) @@ -1288,7 +1263,6 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
set_target_properties(onnx_proto PROPERTIES CXX_STANDARD 17) set_target_properties(onnx_proto PROPERTIES CXX_STANDARD 17)
endif() endif()
endif() endif()
@ -220,7 +210,7 @@ index acc9584..97275bf 100644
add_definitions(-DONNX_NAMESPACE=${ONNX_NAMESPACE}) add_definitions(-DONNX_NAMESPACE=${ONNX_NAMESPACE})
if(NOT USE_SYSTEM_ONNX) if(NOT USE_SYSTEM_ONNX)
@@ -1560,7 +1523,7 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX) @@ -1319,7 +1293,7 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
endif() endif()
set_property(TARGET onnx_proto PROPERTY IMPORTED_LOCATION ${ONNX_PROTO_LIBRARY}) set_property(TARGET onnx_proto PROPERTY IMPORTED_LOCATION ${ONNX_PROTO_LIBRARY})
message("-- Found onnx: ${ONNX_LIBRARY} ${ONNX_PROTO_LIBRARY}") message("-- Found onnx: ${ONNX_LIBRARY} ${ONNX_PROTO_LIBRARY}")
@ -229,7 +219,7 @@ index acc9584..97275bf 100644
endif() endif()
include_directories(${FOXI_INCLUDE_DIRS}) include_directories(${FOXI_INCLUDE_DIRS})
list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader) list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader)
@@ -1739,9 +1702,8 @@ if(NOT INTERN_BUILD_MOBILE) @@ -1476,9 +1450,8 @@ if(NOT INTERN_BUILD_MOBILE)
endif() endif()
if(USE_MKLDNN) if(USE_MKLDNN)
include(${CMAKE_CURRENT_LIST_DIR}/public/mkldnn.cmake) include(${CMAKE_CURRENT_LIST_DIR}/public/mkldnn.cmake)
@ -237,10 +227,10 @@ index acc9584..97275bf 100644
+ if(DNNL_FOUND) + if(DNNL_FOUND)
set(AT_MKLDNN_ENABLED 1) set(AT_MKLDNN_ENABLED 1)
- include_directories(AFTER SYSTEM ${MKLDNN_INCLUDE_DIR}) - include_directories(AFTER SYSTEM ${MKLDNN_INCLUDE_DIR})
if(BUILD_CAFFE2_OPS) else()
list(APPEND Caffe2_DEPENDENCY_LIBS caffe2::mkldnn) message(WARNING "MKLDNN could not be found.")
endif(BUILD_CAFFE2_OPS) caffe2_update_option(USE_MKLDNN OFF)
@@ -1796,7 +1758,7 @@ endif() @@ -1530,7 +1503,7 @@ endif()
# #
set(TEMP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}) set(TEMP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS})
set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libs" FORCE) set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libs" FORCE)
@ -249,7 +239,7 @@ index acc9584..97275bf 100644
# Disable compiler feature checks for `fmt`. # Disable compiler feature checks for `fmt`.
# #
@@ -1805,7 +1767,6 @@ add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt) @@ -1539,7 +1512,6 @@ add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)
# CMAKE_CXX_FLAGS in ways that break feature checks. Since we already know # CMAKE_CXX_FLAGS in ways that break feature checks. Since we already know
# `fmt` is compatible with a superset of the compilers that PyTorch is, it # `fmt` is compatible with a superset of the compilers that PyTorch is, it
# shouldn't be too bad to just disable the checks. # shouldn't be too bad to just disable the checks.
@ -299,10 +289,10 @@ index 8793562..9f8fa3d 100644
- ${MKLDNN_LIBRARIES}) - ${MKLDNN_LIBRARIES})
+ DNNL::dnnl) + DNNL::dnnl)
diff --git a/setup.py b/setup.py diff --git a/setup.py b/setup.py
index 81f3c6c..3251cab 100644 index 9ec41cd..1f505fe 100644
--- a/setup.py --- a/setup.py
+++ b/setup.py +++ b/setup.py
@@ -482,13 +482,9 @@ def build_deps(): @@ -494,13 +494,9 @@ def build_deps():
# Windows has very poor support for them. # Windows has very poor support for them.
sym_files = [ sym_files = [
"tools/shared/_utils_internal.py", "tools/shared/_utils_internal.py",
@ -384,10 +374,10 @@ index 5c89748..ef84c57 100644
endif() endif()
diff --git a/test/cpp/tensorexpr/CMakeLists.txt b/test/cpp/tensorexpr/CMakeLists.txt diff --git a/test/cpp/tensorexpr/CMakeLists.txt b/test/cpp/tensorexpr/CMakeLists.txt
index 012471d..d39b625 100644 index 179270c..72f5582 100644
--- a/test/cpp/tensorexpr/CMakeLists.txt --- a/test/cpp/tensorexpr/CMakeLists.txt
+++ b/test/cpp/tensorexpr/CMakeLists.txt +++ b/test/cpp/tensorexpr/CMakeLists.txt
@@ -54,7 +54,7 @@ target_include_directories(tutorial_tensorexpr PRIVATE ${ATen_CPU_INCLUDE}) @@ -51,7 +51,7 @@ target_include_directories(tutorial_tensorexpr PRIVATE ${ATen_CPU_INCLUDE})
# pthreadpool header. For some build environment we need add the dependency # pthreadpool header. For some build environment we need add the dependency
# explicitly. # explicitly.
if(USE_PTHREADPOOL) if(USE_PTHREADPOOL)
@ -396,3 +386,15 @@ index 012471d..d39b625 100644
endif() endif()
if(USE_CUDA) if(USE_CUDA)
target_compile_definitions(test_tensorexpr PRIVATE USE_CUDA) target_compile_definitions(test_tensorexpr PRIVATE USE_CUDA)
diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt
index 10a44af..33e2df4 100644
--- a/torch/CMakeLists.txt
+++ b/torch/CMakeLists.txt
@@ -81,7 +81,6 @@ set(TORCH_PYTHON_LINK_LIBRARIES
Python::Module
pybind::pybind11
opentelemetry::api
- httplib
shm
fmt::fmt-header-only
ATEN_CPU_FILES_GEN_LIB)

View file

@ -1,12 +1,12 @@
Even when building without Kineto, the <ActivityType.h> header is still Even when building without Kineto, the <ActivityType.h> header is still
imported and the ActivityType type is used. This patch was copied from imported and the ActivityType type is used. This patch was copied from
https://github.com/pytorch/pytorch/pull/111048. https://github.com/pytorch/pytorch/pull/111048 and adapted.
diff --git a/torch/csrc/profiler/kineto_shim.h b/torch/csrc/profiler/kineto_shim.h diff --git a/torch/csrc/profiler/kineto_shim.h b/torch/csrc/profiler/kineto_shim.h
index e92cbf00..68985ab7 100644 index 7a3b788..7f49d18 100644
--- a/torch/csrc/profiler/kineto_shim.h --- a/torch/csrc/profiler/kineto_shim.h
+++ b/torch/csrc/profiler/kineto_shim.h +++ b/torch/csrc/profiler/kineto_shim.h
@@ -12,7 +12,51 @@ @@ -12,7 +12,55 @@
#undef USE_KINETO #undef USE_KINETO
#endif #endif
@ -48,6 +48,10 @@ index e92cbf00..68985ab7 100644
+ MTIA_CCP_EVENTS, // MTIA ondevice CCP events + MTIA_CCP_EVENTS, // MTIA ondevice CCP events
+ HPU_OP, // HPU host side runtime event + HPU_OP, // HPU host side runtime event
+ XPU_RUNTIME, // host side xpu runtime events + XPU_RUNTIME, // host side xpu runtime events
+ MTIA_WORKLOADD,
+
+ PRIVATEUSE1_RUNTIME,
+ PRIVATEUSE1_DRIVER,
+ +
+ ENUM_COUNT, // This is to add buffer and not used for any profiling logic. Add your new type before it. + ENUM_COUNT, // This is to add buffer and not used for any profiling logic. Add your new type before it.
+ OPTIONAL_ACTIVITY_TYPE_START = CUDA_SYNC, + OPTIONAL_ACTIVITY_TYPE_START = CUDA_SYNC,