diff --git a/gnu/local.mk b/gnu/local.mk index 40d10db02c..629234d584 100644 --- a/gnu/local.mk +++ b/gnu/local.mk @@ -1390,6 +1390,7 @@ dist_patch_DATA = \ %D%/packages/patches/glib-appinfo-watch.patch \ %D%/packages/patches/glib-skip-failing-test.patch \ %D%/packages/patches/glibc-2.33-riscv64-miscompilation.patch \ + %D%/packages/patches/glibc-2.39-git-updates.patch \ %D%/packages/patches/glibc-CVE-2019-7309.patch \ %D%/packages/patches/glibc-CVE-2019-9169.patch \ %D%/packages/patches/glibc-CVE-2019-19126.patch \ diff --git a/gnu/packages/base.scm b/gnu/packages/base.scm index 1a18be4416..b1599817a6 100644 --- a/gnu/packages/base.scm +++ b/gnu/packages/base.scm @@ -830,7 +830,8 @@ (define-public glibc (sha256 (base32 "09nrwb0ksbah9k35jchd28xxp2hidilqdgz7b8v5f30pz1yd8yzp")) - (patches (search-patches "glibc-ldd-powerpc.patch" + (patches (search-patches "glibc-2.39-git-updates.patch" + "glibc-ldd-powerpc.patch" "glibc-2.38-ldd-x86_64.patch" "glibc-dl-cache.patch" "glibc-2.37-versioned-locpath.patch" @@ -842,6 +843,8 @@ (define-public glibc "glibc-hurd-mach-print.patch" "glibc-hurd-gettyent.patch" "glibc-hurd-getauxval.patch")))) + (properties `((lint-hidden-cve . ("CVE-2024-33601" "CVE-2024-33602" + "CVE-2024-33600" "CVE-2024-33599")))) (build-system gnu-build-system) ;; Glibc's refers to , for instance, so glibc diff --git a/gnu/packages/patches/glibc-2.39-git-updates.patch b/gnu/packages/patches/glibc-2.39-git-updates.patch new file mode 100644 index 0000000000..251ff36872 --- /dev/null +++ b/gnu/packages/patches/glibc-2.39-git-updates.patch @@ -0,0 +1,9653 @@ +Updates from upstream's release/2.39/master branch, up to commit c7c3f5bf80ae86b34501f473f1a9fc545c911b7f. + +diff --git a/ADVISORIES b/ADVISORIES +new file mode 100644 +index 0000000000..d4e33f2df3 +--- /dev/null ++++ b/ADVISORIES +@@ -0,0 +1,2 @@ ++For the GNU C Library Security Advisories, see the git master branch: ++https://sourceware.org/git/?p=glibc.git;a=tree;f=advisories;hb=HEAD +diff --git a/Makeconfig b/Makeconfig +index 85e00cef94..522182abdc 100644 +--- a/Makeconfig ++++ b/Makeconfig +@@ -586,10 +586,13 @@ link-libc-rpath-link = -Wl,-rpath-link=$(rpath-link) + # before the expansion of LDLIBS-* variables). + + # Tests use -Wl,-rpath instead of -Wl,-rpath-link for +-# build-hardcoded-path-in-tests. ++# build-hardcoded-path-in-tests. Add -Wl,--disable-new-dtags to force ++# DT_RPATH instead of DT_RUNPATH which only applies to DT_NEEDED entries ++# in the executable and doesn't applies to DT_NEEDED entries in shared ++# libraries which are loaded via DT_NEEDED entries in the executable. + ifeq (yes,$(build-hardcoded-path-in-tests)) +-link-libc-tests-rpath-link = $(link-libc-rpath) +-link-test-modules-rpath-link = $(link-libc-rpath) ++link-libc-tests-rpath-link = $(link-libc-rpath) -Wl,--disable-new-dtags ++link-test-modules-rpath-link = $(link-libc-rpath) -Wl,--disable-new-dtags + else + link-libc-tests-rpath-link = $(link-libc-rpath-link) + link-test-modules-rpath-link = +diff --git a/Makefile b/Makefile +index 7052b46df8..2e351c0321 100644 +--- a/Makefile ++++ b/Makefile +@@ -577,6 +577,13 @@ $(objpfx)lint-makefiles.out: scripts/lint-makefiles.sh + $(SHELL) $< "$(PYTHON)" `pwd` > $@ ; \ + $(evaluate-test) + ++# Link libc.a as a whole to verify that it does not contain multiple ++# definitions of any symbols. ++tests-special += $(objpfx)link-static-libc.out ++$(objpfx)link-static-libc.out: ++ $(LINK.o) $(whole-archive) -r $(objpfx)libc.a -o /dev/null > $@ 2>&1; \ ++ $(evaluate-test) ++ + # Print test summary for tests in $1 .sum file; + # $2 is optional test identifier. + # Fail if there are unexpected failures in the test results. +diff --git a/advisories/GLIBC-SA-2023-0001 b/advisories/GLIBC-SA-2023-0001 +deleted file mode 100644 +index 3d19c91b6a..0000000000 +--- a/advisories/GLIBC-SA-2023-0001 ++++ /dev/null +@@ -1,14 +0,0 @@ +-printf: incorrect output for integers with thousands separator and width field +- +-When the printf family of functions is called with a format specifier +-that uses an (enable grouping) and a minimum width +-specifier, the resulting output could be larger than reasonably expected +-by a caller that computed a tight bound on the buffer size. The +-resulting larger than expected output could result in a buffer overflow +-in the printf family of functions. +- +-CVE-Id: CVE-2023-25139 +-Public-Date: 2023-02-02 +-Vulnerable-Commit: e88b9f0e5cc50cab57a299dc7efe1a4eb385161d (2.37) +-Fix-Commit: c980549cc6a1c03c23cc2fe3e7b0fe626a0364b0 (2.38) +-Fix-Commit: 07b9521fc6369d000216b96562ff7c0ed32a16c4 (2.37-4) +diff --git a/advisories/GLIBC-SA-2023-0002 b/advisories/GLIBC-SA-2023-0002 +deleted file mode 100644 +index 5122669a64..0000000000 +--- a/advisories/GLIBC-SA-2023-0002 ++++ /dev/null +@@ -1,15 +0,0 @@ +-getaddrinfo: Stack read overflow in no-aaaa mode +- +-If the system is configured in no-aaaa mode via /etc/resolv.conf, +-getaddrinfo is called for the AF_UNSPEC address family, and a DNS +-response is received over TCP that is larger than 2048 bytes, +-getaddrinfo may potentially disclose stack contents via the returned +-address data, or crash. +- +-CVE-Id: CVE-2023-4527 +-Public-Date: 2023-09-12 +-Vulnerable-Commit: f282cdbe7f436c75864e5640a409a10485e9abb2 (2.36) +-Fix-Commit: bd77dd7e73e3530203be1c52c8a29d08270cb25d (2.39) +-Fix-Commit: 4ea972b7edd7e36610e8cde18bf7a8149d7bac4f (2.36-113) +-Fix-Commit: b7529346025a130fee483d42178b5c118da971bb (2.37-38) +-Fix-Commit: b25508dd774b617f99419bdc3cf2ace4560cd2d6 (2.38-19) +diff --git a/advisories/GLIBC-SA-2023-0003 b/advisories/GLIBC-SA-2023-0003 +deleted file mode 100644 +index d3aef80348..0000000000 +--- a/advisories/GLIBC-SA-2023-0003 ++++ /dev/null +@@ -1,15 +0,0 @@ +-getaddrinfo: Potential use-after-free +- +-When an NSS plugin only implements the _gethostbyname2_r and +-_getcanonname_r callbacks, getaddrinfo could use memory that was freed +-during buffer resizing, potentially causing a crash or read or write to +-arbitrary memory. +- +-CVE-Id: CVE-2023-4806 +-Public-Date: 2023-09-12 +-Fix-Commit: 973fe93a5675c42798b2161c6f29c01b0e243994 (2.39) +-Fix-Commit: e09ee267c03e3150c2c9ba28625ab130705a485e (2.34-420) +-Fix-Commit: e3ccb230a961b4797510e6a1f5f21fd9021853e7 (2.35-270) +-Fix-Commit: a9728f798ec7f05454c95637ee6581afaa9b487d (2.36-115) +-Fix-Commit: 6529a7466c935f36e9006b854d6f4e1d4876f942 (2.37-39) +-Fix-Commit: 00ae4f10b504bc4564e9f22f00907093f1ab9338 (2.38-20) +diff --git a/advisories/GLIBC-SA-2023-0004 b/advisories/GLIBC-SA-2023-0004 +deleted file mode 100644 +index 5286a7aa54..0000000000 +--- a/advisories/GLIBC-SA-2023-0004 ++++ /dev/null +@@ -1,16 +0,0 @@ +-tunables: local privilege escalation through buffer overflow +- +-If a tunable of the form NAME=NAME=VAL is passed in the environment of a +-setuid program and NAME is valid, it may result in a buffer overflow, +-which could be exploited to achieve escalated privileges. This flaw was +-introduced in glibc 2.34. +- +-CVE-Id: CVE-2023-4911 +-Public-Date: 2023-10-03 +-Vulnerable-Commit: 2ed18c5b534d9e92fc006202a5af0df6b72e7aca (2.34) +-Fix-Commit: 1056e5b4c3f2d90ed2b4a55f96add28da2f4c8fa (2.39) +-Fix-Commit: dcc367f148bc92e7f3778a125f7a416b093964d9 (2.34-423) +-Fix-Commit: c84018a05aec80f5ee6f682db0da1130b0196aef (2.35-274) +-Fix-Commit: 22955ad85186ee05834e47e665056148ca07699c (2.36-118) +-Fix-Commit: b4e23c75aea756b4bddc4abcf27a1c6dca8b6bd3 (2.37-45) +-Fix-Commit: 750a45a783906a19591fb8ff6b7841470f1f5701 (2.38-27) +diff --git a/advisories/GLIBC-SA-2023-0005 b/advisories/GLIBC-SA-2023-0005 +deleted file mode 100644 +index cc4eb90b82..0000000000 +--- a/advisories/GLIBC-SA-2023-0005 ++++ /dev/null +@@ -1,18 +0,0 @@ +-getaddrinfo: DoS due to memory leak +- +-The fix for CVE-2023-4806 introduced a memory leak when an application +-calls getaddrinfo for AF_INET6 with AI_CANONNAME, AI_ALL and AI_V4MAPPED +-flags set. +- +-CVE-Id: CVE-2023-5156 +-Public-Date: 2023-09-25 +-Vulnerable-Commit: e09ee267c03e3150c2c9ba28625ab130705a485e (2.34-420) +-Vulnerable-Commit: e3ccb230a961b4797510e6a1f5f21fd9021853e7 (2.35-270) +-Vulnerable-Commit: a9728f798ec7f05454c95637ee6581afaa9b487d (2.36-115) +-Vulnerable-Commit: 6529a7466c935f36e9006b854d6f4e1d4876f942 (2.37-39) +-Vulnerable-Commit: 00ae4f10b504bc4564e9f22f00907093f1ab9338 (2.38-20) +-Fix-Commit: 8006457ab7e1cd556b919f477348a96fe88f2e49 (2.34-421) +-Fix-Commit: 17092c0311f954e6f3c010f73ce3a78c24ac279a (2.35-272) +-Fix-Commit: 856bac55f98dc840e7c27cfa82262b933385de90 (2.36-116) +-Fix-Commit: 4473d1b87d04b25cdd0e0354814eeaa421328268 (2.37-42) +-Fix-Commit: 5ee59ca371b99984232d7584fe2b1a758b4421d3 (2.38-24) +diff --git a/advisories/GLIBC-SA-2024-0001 b/advisories/GLIBC-SA-2024-0001 +deleted file mode 100644 +index 28931c75ae..0000000000 +--- a/advisories/GLIBC-SA-2024-0001 ++++ /dev/null +@@ -1,15 +0,0 @@ +-syslog: Heap buffer overflow in __vsyslog_internal +- +-__vsyslog_internal did not handle a case where printing a SYSLOG_HEADER +-containing a long program name failed to update the required buffer +-size, leading to the allocation and overflow of a too-small buffer on +-the heap. +- +-CVE-Id: CVE-2023-6246 +-Public-Date: 2024-01-30 +-Vulnerable-Commit: 52a5be0df411ef3ff45c10c7c308cb92993d15b1 (2.37) +-Fix-Commit: 6bd0e4efcc78f3c0115e5ea9739a1642807450da (2.39) +-Fix-Commit: 23514c72b780f3da097ecf33a793b7ba9c2070d2 (2.38-42) +-Fix-Commit: 97a4292aa4a2642e251472b878d0ec4c46a0e59a (2.37-57) +-Vulnerable-Commit: b0e7888d1fa2dbd2d9e1645ec8c796abf78880b9 (2.36-16) +-Fix-Commit: d1a83b6767f68b3cb5b4b4ea2617254acd040c82 (2.36-126) +diff --git a/advisories/GLIBC-SA-2024-0002 b/advisories/GLIBC-SA-2024-0002 +deleted file mode 100644 +index 940bfcf2fc..0000000000 +--- a/advisories/GLIBC-SA-2024-0002 ++++ /dev/null +@@ -1,15 +0,0 @@ +-syslog: Heap buffer overflow in __vsyslog_internal +- +-__vsyslog_internal used the return value of snprintf/vsnprintf to +-calculate buffer sizes for memory allocation. If these functions (for +-any reason) failed and returned -1, the resulting buffer would be too +-small to hold output. +- +-CVE-Id: CVE-2023-6779 +-Public-Date: 2024-01-30 +-Vulnerable-Commit: 52a5be0df411ef3ff45c10c7c308cb92993d15b1 (2.37) +-Fix-Commit: 7e5a0c286da33159d47d0122007aac016f3e02cd (2.39) +-Fix-Commit: d0338312aace5bbfef85e03055e1212dd0e49578 (2.38-43) +-Fix-Commit: 67062eccd9a65d7fda9976a56aeaaf6c25a80214 (2.37-58) +-Vulnerable-Commit: b0e7888d1fa2dbd2d9e1645ec8c796abf78880b9 (2.36-16) +-Fix-Commit: 2bc9d7c002bdac38b5c2a3f11b78e309d7765b83 (2.36-127) +diff --git a/advisories/GLIBC-SA-2024-0003 b/advisories/GLIBC-SA-2024-0003 +deleted file mode 100644 +index b43a5150ab..0000000000 +--- a/advisories/GLIBC-SA-2024-0003 ++++ /dev/null +@@ -1,13 +0,0 @@ +-syslog: Integer overflow in __vsyslog_internal +- +-__vsyslog_internal calculated a buffer size by adding two integers, but +-did not first check if the addition would overflow. +- +-CVE-Id: CVE-2023-6780 +-Public-Date: 2024-01-30 +-Vulnerable-Commit: 52a5be0df411ef3ff45c10c7c308cb92993d15b1 (2.37) +-Fix-Commit: ddf542da94caf97ff43cc2875c88749880b7259b (2.39) +-Fix-Commit: d37c2b20a4787463d192b32041c3406c2bd91de0 (2.38-44) +-Fix-Commit: 2b58cba076e912961ceaa5fa58588e4b10f791c0 (2.37-59) +-Vulnerable-Commit: b0e7888d1fa2dbd2d9e1645ec8c796abf78880b9 (2.36-16) +-Fix-Commit: b9b7d6a27aa0632f334352fa400771115b3c69b7 (2.36-128) +diff --git a/advisories/README b/advisories/README +deleted file mode 100644 +index 94e68b1350..0000000000 +--- a/advisories/README ++++ /dev/null +@@ -1,73 +0,0 @@ +-GNU C Library Security Advisory Format +-====================================== +- +-Security advisories in this directory follow a simple git commit log +-format, with a heading and free-format description augmented with tags +-to allow parsing key information. References to code changes are +-specific to the glibc repository and follow a specific format: +- +- Tag-name: (release-version) +- +-The indicates a specific commit in the repository. The +-release-version indicates the publicly consumable release in which this +-commit is known to exist. The release-version is derived from the +-git-describe format, (i.e. stripped out from glibc-2.34.NNN-gxxxx) and +-is of the form 2.34-NNN. If the -NNN suffix is absent, it means that +-the change is in that release tarball, otherwise the change is on the +-release/2.YY/master branch and not in any released tarball. +- +-The following tags are currently being used: +- +-CVE-Id: +-This is the CVE-Id assigned under the CVE Program +-(https://www.cve.org/). +- +-Public-Date: +-The date this issue became publicly known. +- +-Vulnerable-Commit: +-The commit that introduced this vulnerability. There could be multiple +-entries, one for each release branch in the glibc repository; the +-release-version portion of this tag should tell you which branch this is +-on. +- +-Fix-Commit: +-The commit that fixed this vulnerability. There could be multiple +-entries for each release branch in the glibc repository, indicating that +-all of those commits contributed to fixing that issue in each of those +-branches. +- +-Adding an Advisory +------------------- +- +-An advisory for a CVE needs to be added on the master branch in two steps: +- +-1. Add the text of the advisory without any Fix-Commit tags along with +- the fix for the CVE. Add the Vulnerable-Commit tag, if applicable. +- The advisories directory does not exist in release branches, so keep +- the advisory text commit distinct from the code changes, to ease +- backports. Ask for the GLIBC-SA advisory number from the security +- team. +- +-2. Finish all backports on release branches and then back on the msater +- branch, add all commit refs to the advisory using the Fix-Commit +- tags. Don't bother adding the release-version subscript since the +- next step will overwrite it. +- +-3. Run the process-advisories.sh script in the scripts directory on the +- advisory: +- +- scripts/process-advisories.sh update GLIBC-SA-YYYY-NNNN +- +- (replace YYYY-NNNN with the actual advisory number). +- +-4. Verify the updated advisory and push the result. +- +-Getting a NEWS snippet from advisories +--------------------------------------- +- +-Run: +- +- scripts/process-advisories.sh news +- +-and copy the content into the NEWS file. +diff --git a/bits/wordsize.h b/bits/wordsize.h +index 14edae3a11..53013a9275 100644 +--- a/bits/wordsize.h ++++ b/bits/wordsize.h +@@ -21,7 +21,9 @@ + #define __WORDSIZE32_PTRDIFF_LONG + + /* Set to 1 in order to force time types to be 32 bits instead of 64 bits in +- struct lastlog and struct utmp{,x} on 64-bit ports. This may be done in ++ struct lastlog and struct utmp{,x}. This may be done in + order to make 64-bit ports compatible with 32-bit ports. Set to 0 for +- 64-bit ports where the time types are 64-bits or for any 32-bit ports. */ ++ 64-bit ports where the time types are 64-bits and new 32-bit ports ++ where time_t is 64 bits, and there is no companion architecture with ++ 32-bit time_t. */ + #define __WORDSIZE_TIME64_COMPAT32 +diff --git a/config.h.in b/config.h.in +index 44a34072a4..1e647de585 100644 +--- a/config.h.in ++++ b/config.h.in +@@ -141,6 +141,9 @@ + /* LOONGARCH floating-point ABI for ld.so. */ + #undef LOONGARCH_ABI_FRLEN + ++/* Define whether ARM used hard-float and support VFPvX-D32. */ ++#undef HAVE_ARM_PCS_VFP_D32 ++ + /* Linux specific: minimum supported kernel version. */ + #undef __LINUX_KERNEL_VERSION + +@@ -283,6 +286,9 @@ + /* Define if x86 ISA level should be included in shared libraries. */ + #undef INCLUDE_X86_ISA_LEVEL + ++/* The x86 ISA level. 1 for baseline. Undefined on non-x86. */ ++#undef MINIMUM_X86_ISA_LEVEL ++ + /* Define if -msahf is enabled by default on x86. */ + #undef HAVE_X86_LAHF_SAHF + +diff --git a/configure b/configure +index 59ff1e415d..432e40a592 100755 +--- a/configure ++++ b/configure +@@ -653,7 +653,7 @@ LIBGD + libc_cv_cc_loop_to_function + libc_cv_cc_submachine + libc_cv_cc_nofma +-libc_cv_mtls_dialect_gnu2 ++libc_cv_mtls_descriptor + libc_cv_has_glob_dat + libc_cv_fpie + libc_cv_z_execstack +@@ -4760,6 +4760,9 @@ libc_config_ok=no + # whether to use such directories. + with_fp_cond=1 + ++# A preconfigure script may define another name to TLS descriptor variant ++mtls_descriptor=gnu2 ++ + if frags=`ls -d $srcdir/sysdeps/*/preconfigure 2> /dev/null` + then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for sysdeps preconfigure fragments" >&5 +@@ -7006,9 +7009,9 @@ fi + printf "%s\n" "$libc_cv_has_glob_dat" >&6; } + + +-{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for -mtls-dialect=gnu2" >&5 +-printf %s "checking for -mtls-dialect=gnu2... " >&6; } +-if test ${libc_cv_mtls_dialect_gnu2+y} ++{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for tls descriptor support" >&5 ++printf %s "checking for tls descriptor support... " >&6; } ++if test ${libc_cv_mtls_descriptor+y} + then : + printf %s "(cached) " >&6 + else $as_nop +@@ -7019,25 +7022,25 @@ void foo (void) + i = 10; + } + EOF +-if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -fPIC -mtls-dialect=gnu2 -nostdlib -nostartfiles +- conftest.c -o conftest 1>&5' ++if { ac_try='${CC-cc} $CFLAGS $CPPFLAGS -fPIC -mtls-dialect=$mtls_descriptor -nostdlib -nostartfiles ++ -shared conftest.c -o conftest 1>&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; } + then +- libc_cv_mtls_dialect_gnu2=yes ++ libc_cv_mtls_descriptor=$mtls_descriptor + else +- libc_cv_mtls_dialect_gnu2=no ++ libc_cv_mtls_descriptor=no + fi + rm -f conftest* + fi +-{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_mtls_dialect_gnu2" >&5 +-printf "%s\n" "$libc_cv_mtls_dialect_gnu2" >&6; } ++{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_mtls_descriptor" >&5 ++printf "%s\n" "$libc_cv_mtls_descriptor" >&6; } + + config_vars="$config_vars +-have-mtls-dialect-gnu2 = $libc_cv_mtls_dialect_gnu2" ++have-mtls-descriptor = $libc_cv_mtls_descriptor" + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if -Wno-ignored-attributes is required for aliases" >&5 + printf %s "checking if -Wno-ignored-attributes is required for aliases... " >&6; } +diff --git a/configure.ac b/configure.ac +index 65799e5685..bdc385d03c 100644 +--- a/configure.ac ++++ b/configure.ac +@@ -442,6 +442,9 @@ libc_config_ok=no + # whether to use such directories. + with_fp_cond=1 + ++# A preconfigure script may define another name to TLS descriptor variant ++mtls_descriptor=gnu2 ++ + dnl Let sysdeps/*/preconfigure act here. + LIBC_PRECONFIGURE([$srcdir], [for sysdeps]) + +@@ -1287,7 +1290,7 @@ fi + rm -f conftest*]) + AC_SUBST(libc_cv_has_glob_dat) + +-AC_CACHE_CHECK([for -mtls-dialect=gnu2], libc_cv_mtls_dialect_gnu2, ++AC_CACHE_CHECK([for tls descriptor support], libc_cv_mtls_descriptor, + [dnl + cat > conftest.c <&AS_MESSAGE_LOG_FD]) ++if AC_TRY_COMMAND([${CC-cc} $CFLAGS $CPPFLAGS -fPIC -mtls-dialect=$mtls_descriptor -nostdlib -nostartfiles ++ -shared conftest.c -o conftest 1>&AS_MESSAGE_LOG_FD]) + then +- libc_cv_mtls_dialect_gnu2=yes ++ libc_cv_mtls_descriptor=$mtls_descriptor + else +- libc_cv_mtls_dialect_gnu2=no ++ libc_cv_mtls_descriptor=no + fi + rm -f conftest*]) +-AC_SUBST(libc_cv_mtls_dialect_gnu2) +-LIBC_CONFIG_VAR([have-mtls-dialect-gnu2], [$libc_cv_mtls_dialect_gnu2]) ++AC_SUBST(libc_cv_mtls_descriptor) ++LIBC_CONFIG_VAR([have-mtls-descriptor], [$libc_cv_mtls_descriptor]) + + dnl clang emits an warning for a double alias redirection, to warn the + dnl original symbol is sed even when weak definition overrides it. +diff --git a/elf/Makefile b/elf/Makefile +index 5d78b659ce..a50a988e73 100644 +--- a/elf/Makefile ++++ b/elf/Makefile +@@ -170,6 +170,7 @@ CFLAGS-.op += $(call elide-stack-protector,.op,$(elide-routines.os)) + CFLAGS-.os += $(call elide-stack-protector,.os,$(all-rtld-routines)) + + # Add the requested compiler flags to the early startup code. ++CFLAGS-dl-misc.os += $(rtld-early-cflags) + CFLAGS-dl-printf.os += $(rtld-early-cflags) + CFLAGS-dl-setup_hash.os += $(rtld-early-cflags) + CFLAGS-dl-sysdep.os += $(rtld-early-cflags) +@@ -424,6 +425,7 @@ tests += \ + tst-glibc-hwcaps-prepend \ + tst-global1 \ + tst-global2 \ ++ tst-gnu2-tls2 \ + tst-initfinilazyfail \ + tst-initorder \ + tst-initorder2 \ +@@ -846,6 +848,9 @@ modules-names += \ + tst-filterobj-flt \ + tst-finilazyfailmod \ + tst-globalmod2 \ ++ tst-gnu2-tls2mod0 \ ++ tst-gnu2-tls2mod1 \ ++ tst-gnu2-tls2mod2 \ + tst-initlazyfailmod \ + tst-initorder2a \ + tst-initorder2b \ +@@ -995,13 +1000,13 @@ modules-names-tests = $(filter-out ifuncmod% tst-tlsmod%,\ + # For +depfiles in Makerules. + extra-test-objs += tst-auditmod17.os + +-ifeq (yes,$(have-mtls-dialect-gnu2)) ++ifneq (no,$(have-mtls-descriptor)) + tests += tst-gnu2-tls1 + modules-names += tst-gnu2-tls1mod + $(objpfx)tst-gnu2-tls1: $(objpfx)tst-gnu2-tls1mod.so + tst-gnu2-tls1mod.so-no-z-defs = yes +-CFLAGS-tst-gnu2-tls1mod.c += -mtls-dialect=gnu2 +-endif # $(have-mtls-dialect-gnu2) ++CFLAGS-tst-gnu2-tls1mod.c += -mtls-dialect=$(have-mtls-descriptor) ++endif # $(have-mtls-descriptor) + + ifeq (yes,$(have-protected-data)) + modules-names += tst-protected1moda tst-protected1modb +@@ -2968,11 +2973,11 @@ $(objpfx)tst-tls-allocation-failure-static-patched.out: \ + $(objpfx)tst-audit-tlsdesc: $(objpfx)tst-audit-tlsdesc-mod1.so \ + $(objpfx)tst-audit-tlsdesc-mod2.so \ + $(shared-thread-library) +-ifeq (yes,$(have-mtls-dialect-gnu2)) ++ifneq (no,$(have-mtls-descriptor)) + # The test is valid for all TLS types, but we want to exercise GNU2 + # TLS if possible. +-CFLAGS-tst-audit-tlsdesc-mod1.c += -mtls-dialect=gnu2 +-CFLAGS-tst-audit-tlsdesc-mod2.c += -mtls-dialect=gnu2 ++CFLAGS-tst-audit-tlsdesc-mod1.c += -mtls-dialect=$(have-mtls-descriptor) ++CFLAGS-tst-audit-tlsdesc-mod2.c += -mtls-dialect=$(have-mtls-descriptor) + endif + $(objpfx)tst-audit-tlsdesc-dlopen: $(shared-thread-library) + $(objpfx)tst-audit-tlsdesc-dlopen.out: $(objpfx)tst-audit-tlsdesc-mod1.so \ +@@ -3044,8 +3049,18 @@ $(objpfx)tst-tlsgap.out: \ + $(objpfx)tst-tlsgap-mod0.so \ + $(objpfx)tst-tlsgap-mod1.so \ + $(objpfx)tst-tlsgap-mod2.so +-ifeq (yes,$(have-mtls-dialect-gnu2)) +-CFLAGS-tst-tlsgap-mod0.c += -mtls-dialect=gnu2 +-CFLAGS-tst-tlsgap-mod1.c += -mtls-dialect=gnu2 +-CFLAGS-tst-tlsgap-mod2.c += -mtls-dialect=gnu2 ++ ++$(objpfx)tst-gnu2-tls2: $(shared-thread-library) ++$(objpfx)tst-gnu2-tls2.out: \ ++ $(objpfx)tst-gnu2-tls2mod0.so \ ++ $(objpfx)tst-gnu2-tls2mod1.so \ ++ $(objpfx)tst-gnu2-tls2mod2.so ++ ++ifneq (no,$(have-mtls-descriptor)) ++CFLAGS-tst-tlsgap-mod0.c += -mtls-dialect=$(have-mtls-descriptor) ++CFLAGS-tst-tlsgap-mod1.c += -mtls-dialect=$(have-mtls-descriptor) ++CFLAGS-tst-tlsgap-mod2.c += -mtls-dialect=$(have-mtls-descriptor) ++CFLAGS-tst-gnu2-tls2mod0.c += -mtls-dialect=$(have-mtls-descriptor) ++CFLAGS-tst-gnu2-tls2mod1.c += -mtls-dialect=$(have-mtls-descriptor) ++CFLAGS-tst-gnu2-tls2mod2.c += -mtls-dialect=$(have-mtls-descriptor) + endif +diff --git a/elf/dl-diagnostics.c b/elf/dl-diagnostics.c +index 7345ebc4e5..aaf67b87e8 100644 +--- a/elf/dl-diagnostics.c ++++ b/elf/dl-diagnostics.c +@@ -235,6 +235,8 @@ _dl_print_diagnostics (char **environ) + _dl_diagnostics_print_labeled_value ("dl_hwcap", GLRO (dl_hwcap)); + _dl_diagnostics_print_labeled_value ("dl_hwcap_important", HWCAP_IMPORTANT); + _dl_diagnostics_print_labeled_value ("dl_hwcap2", GLRO (dl_hwcap2)); ++ _dl_diagnostics_print_labeled_value ("dl_hwcap3", GLRO (dl_hwcap3)); ++ _dl_diagnostics_print_labeled_value ("dl_hwcap4", GLRO (dl_hwcap4)); + _dl_diagnostics_print_labeled_string + ("dl_hwcaps_subdirs", _dl_hwcaps_subdirs); + _dl_diagnostics_print_labeled_value +diff --git a/elf/dl-support.c b/elf/dl-support.c +index 2f502c8b0d..451932dd03 100644 +--- a/elf/dl-support.c ++++ b/elf/dl-support.c +@@ -158,6 +158,8 @@ const ElfW(Phdr) *_dl_phdr; + size_t _dl_phnum; + uint64_t _dl_hwcap; + uint64_t _dl_hwcap2; ++uint64_t _dl_hwcap3; ++uint64_t _dl_hwcap4; + + enum dso_sort_algorithm _dl_dso_sort_algo; + +diff --git a/elf/dl-tunables.c b/elf/dl-tunables.c +index 03e1a68675..614ac9c047 100644 +--- a/elf/dl-tunables.c ++++ b/elf/dl-tunables.c +@@ -32,6 +32,7 @@ + #include + #include + #include ++#include + + #define TUNABLES_INTERNAL 1 + #include "dl-tunables.h" +@@ -223,6 +224,7 @@ parse_tunables_string (const char *valstring, struct tunable_toset_t *tunables) + { + tunables[ntunables++] = + (struct tunable_toset_t) { cur, value, p - value }; ++ + break; + } + } +@@ -234,23 +236,27 @@ parse_tunables_string (const char *valstring, struct tunable_toset_t *tunables) + static void + parse_tunables (const char *valstring) + { +- struct tunable_toset_t tunables[tunables_list_size]; +- int ntunables = parse_tunables_string (valstring, tunables); +- if (ntunables == -1) ++ struct tunable_toset_t tunables[tunables_list_size] = { 0 }; ++ if (parse_tunables_string (valstring, tunables) == -1) + { + _dl_error_printf ( + "WARNING: ld.so: invalid GLIBC_TUNABLES `%s': ignored.\n", valstring); + return; + } + +- for (int i = 0; i < ntunables; i++) +- if (!tunable_initialize (tunables[i].t, tunables[i].value, +- tunables[i].len)) +- _dl_error_printf ("WARNING: ld.so: invalid GLIBC_TUNABLES value `%.*s' " +- "for option `%s': ignored.\n", +- (int) tunables[i].len, +- tunables[i].value, +- tunables[i].t->name); ++ for (int i = 0; i < tunables_list_size; i++) ++ { ++ if (tunables[i].t == NULL) ++ continue; ++ ++ if (!tunable_initialize (tunables[i].t, tunables[i].value, ++ tunables[i].len)) ++ _dl_error_printf ("WARNING: ld.so: invalid GLIBC_TUNABLES value `%.*s' " ++ "for option `%s': ignored.\n", ++ (int) tunables[i].len, ++ tunables[i].value, ++ tunables[i].t->name); ++ } + } + + /* Initialize the tunables list from the environment. For now we only use the +diff --git a/elf/elf.h b/elf/elf.h +index 455731663c..1c394c64cd 100644 +--- a/elf/elf.h ++++ b/elf/elf.h +@@ -1234,6 +1234,10 @@ typedef struct + #define AT_RSEQ_FEATURE_SIZE 27 /* rseq supported feature size. */ + #define AT_RSEQ_ALIGN 28 /* rseq allocation alignment. */ + ++/* More machine-dependent hints about processor capabilities. */ ++#define AT_HWCAP3 29 /* extension of AT_HWCAP. */ ++#define AT_HWCAP4 30 /* extension of AT_HWCAP. */ ++ + #define AT_EXECFN 31 /* Filename of executable. */ + + /* Pointer to the global system page used for system calls and other +diff --git a/elf/tst-gnu2-tls2.c b/elf/tst-gnu2-tls2.c +new file mode 100644 +index 0000000000..7ac04d7f33 +--- /dev/null ++++ b/elf/tst-gnu2-tls2.c +@@ -0,0 +1,122 @@ ++/* Test TLSDESC relocation. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "tst-gnu2-tls2.h" ++ ++#ifndef IS_SUPPORTED ++# define IS_SUPPORTED() true ++#endif ++ ++/* An architecture can define it to clobber caller-saved registers in ++ malloc below to verify that the implicit TLSDESC call won't change ++ caller-saved registers. */ ++#ifndef PREPARE_MALLOC ++# define PREPARE_MALLOC() ++#endif ++ ++extern void * __libc_malloc (size_t); ++ ++size_t malloc_counter = 0; ++ ++void * ++malloc (size_t n) ++{ ++ PREPARE_MALLOC (); ++ malloc_counter++; ++ return __libc_malloc (n); ++} ++ ++static void *mod[3]; ++#ifndef MOD ++# define MOD(i) "tst-gnu2-tls2mod" #i ".so" ++#endif ++static const char *modname[3] = { MOD(0), MOD(1), MOD(2) }; ++#undef MOD ++ ++static void ++open_mod (int i) ++{ ++ mod[i] = xdlopen (modname[i], RTLD_LAZY); ++ printf ("open %s\n", modname[i]); ++} ++ ++static void ++close_mod (int i) ++{ ++ xdlclose (mod[i]); ++ mod[i] = NULL; ++ printf ("close %s\n", modname[i]); ++} ++ ++static void ++access_mod (int i, const char *sym) ++{ ++ struct tls var = { -1, -1, -1, -1 }; ++ struct tls *(*f) (struct tls *) = xdlsym (mod[i], sym); ++ /* Check that our malloc is called. */ ++ malloc_counter = 0; ++ struct tls *p = f (&var); ++ TEST_VERIFY (malloc_counter != 0); ++ printf ("access %s: %s() = %p\n", modname[i], sym, p); ++ TEST_VERIFY_EXIT (memcmp (p, &var, sizeof (var)) == 0); ++ ++(p->a); ++} ++ ++static void * ++start (void *arg) ++{ ++ /* The DTV generation is at the last dlopen of mod0 and the ++ entry for mod1 is NULL. */ ++ ++ open_mod (1); /* Reuse modid of mod1. Uses dynamic TLS. */ ++ ++ /* Force the slow path in GNU2 TLS descriptor call. */ ++ access_mod (1, "apply_tls"); ++ ++ return arg; ++} ++ ++static int ++do_test (void) ++{ ++ if (!IS_SUPPORTED ()) ++ return EXIT_UNSUPPORTED; ++ ++ open_mod (0); ++ open_mod (1); ++ open_mod (2); ++ close_mod (0); ++ close_mod (1); /* Create modid gap at mod1. */ ++ open_mod (0); /* Reuse modid of mod0, bump generation count. */ ++ ++ /* Create a thread where DTV of mod1 is NULL. */ ++ pthread_t t = xpthread_create (NULL, start, NULL); ++ xpthread_join (t); ++ return 0; ++} ++ ++#include +diff --git a/elf/tst-gnu2-tls2.h b/elf/tst-gnu2-tls2.h +new file mode 100644 +index 0000000000..1ade8151e2 +--- /dev/null ++++ b/elf/tst-gnu2-tls2.h +@@ -0,0 +1,40 @@ ++/* Test TLSDESC relocation. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++struct tls ++{ ++ int64_t a, b, c, d; ++}; ++ ++extern struct tls *apply_tls (struct tls *); ++ ++/* An architecture can define them to verify that clobber caller-saved ++ registers aren't changed by the implicit TLSDESC call. */ ++#ifndef INIT_TLSDESC_CALL ++# define INIT_TLSDESC_CALL() ++#endif ++ ++#ifndef BEFORE_TLSDESC_CALL ++# define BEFORE_TLSDESC_CALL() ++#endif ++ ++#ifndef AFTER_TLSDESC_CALL ++# define AFTER_TLSDESC_CALL() ++#endif +diff --git a/elf/tst-gnu2-tls2mod0.c b/elf/tst-gnu2-tls2mod0.c +new file mode 100644 +index 0000000000..3fe3c14277 +--- /dev/null ++++ b/elf/tst-gnu2-tls2mod0.c +@@ -0,0 +1,32 @@ ++/* DSO used by tst-gnu2-tls2. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++__thread struct tls tls_var0 __attribute__ ((visibility ("hidden"))); ++ ++struct tls * ++apply_tls (struct tls *p) ++{ ++ INIT_TLSDESC_CALL (); ++ BEFORE_TLSDESC_CALL (); ++ tls_var0 = *p; ++ struct tls *ret = &tls_var0; ++ AFTER_TLSDESC_CALL (); ++ return ret; ++} +diff --git a/elf/tst-gnu2-tls2mod1.c b/elf/tst-gnu2-tls2mod1.c +new file mode 100644 +index 0000000000..e210538468 +--- /dev/null ++++ b/elf/tst-gnu2-tls2mod1.c +@@ -0,0 +1,32 @@ ++/* DSO used by tst-gnu2-tls2. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++__thread struct tls tls_var1[100] __attribute__ ((visibility ("hidden"))); ++ ++struct tls * ++apply_tls (struct tls *p) ++{ ++ INIT_TLSDESC_CALL (); ++ BEFORE_TLSDESC_CALL (); ++ tls_var1[1] = *p; ++ struct tls *ret = &tls_var1[1]; ++ AFTER_TLSDESC_CALL (); ++ return ret; ++} +diff --git a/elf/tst-gnu2-tls2mod2.c b/elf/tst-gnu2-tls2mod2.c +new file mode 100644 +index 0000000000..6d3031dc5f +--- /dev/null ++++ b/elf/tst-gnu2-tls2mod2.c +@@ -0,0 +1,32 @@ ++/* DSO used by tst-gnu2-tls2. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++__thread struct tls tls_var2 __attribute__ ((visibility ("hidden"))); ++ ++struct tls * ++apply_tls (struct tls *p) ++{ ++ INIT_TLSDESC_CALL (); ++ BEFORE_TLSDESC_CALL (); ++ tls_var2 = *p; ++ struct tls *ret = &tls_var2; ++ AFTER_TLSDESC_CALL (); ++ return ret; ++} +diff --git a/elf/tst-tunables.c b/elf/tst-tunables.c +index 095b5c81d9..dff34ed748 100644 +--- a/elf/tst-tunables.c ++++ b/elf/tst-tunables.c +@@ -17,6 +17,10 @@ + . */ + + #include ++/* The test uses the tunable_list size, which is only exported for ++ ld.so. This will result in a copy of tunable_list, which is ununsed by ++ the test itself. */ ++#define TUNABLES_INTERNAL 1 + #include + #include + #include +@@ -24,12 +28,13 @@ + #include + #include + #include ++#include + + static int restart; + #define CMDLINE_OPTIONS \ + { "restart", no_argument, &restart, 1 }, + +-static const struct test_t ++static struct test_t + { + const char *name; + const char *value; +@@ -284,6 +289,29 @@ static const struct test_t + 0, + 0, + }, ++ /* Also check for repeated tunables with a count larger than the total number ++ of tunables. */ ++ { ++ "GLIBC_TUNABLES", ++ NULL, ++ 2, ++ 0, ++ 0, ++ }, ++ { ++ "GLIBC_TUNABLES", ++ NULL, ++ 1, ++ 0, ++ 0, ++ }, ++ { ++ "GLIBC_TUNABLES", ++ NULL, ++ 0, ++ 0, ++ 0, ++ }, + }; + + static int +@@ -327,6 +355,37 @@ do_test (int argc, char *argv[]) + spargv[i] = NULL; + } + ++ /* Create a tunable line with the duplicate values with a total number ++ larger than the different number of tunables. */ ++ { ++ enum { tunables_list_size = array_length (tunable_list) }; ++ const char *value = ""; ++ for (int i = 0; i < tunables_list_size; i++) ++ value = xasprintf ("%sglibc.malloc.check=2%c", ++ value, ++ i == (tunables_list_size - 1) ? '\0' : ':'); ++ tests[33].value = value; ++ } ++ /* Same as before, but the last tunable values is differen than the ++ rest. */ ++ { ++ enum { tunables_list_size = array_length (tunable_list) }; ++ const char *value = ""; ++ for (int i = 0; i < tunables_list_size - 1; i++) ++ value = xasprintf ("%sglibc.malloc.check=2:", value); ++ value = xasprintf ("%sglibc.malloc.check=1", value); ++ tests[34].value = value; ++ } ++ /* Same as before, but with an invalid last entry. */ ++ { ++ enum { tunables_list_size = array_length (tunable_list) }; ++ const char *value = ""; ++ for (int i = 0; i < tunables_list_size - 1; i++) ++ value = xasprintf ("%sglibc.malloc.check=2:", value); ++ value = xasprintf ("%sglibc.malloc.check=1=1", value); ++ tests[35].value = value; ++ } ++ + for (int i = 0; i < array_length (tests); i++) + { + snprintf (nteststr, sizeof nteststr, "%d", i); +diff --git a/iconvdata/Makefile b/iconvdata/Makefile +index ea019ce5c0..7196a8744b 100644 +--- a/iconvdata/Makefile ++++ b/iconvdata/Makefile +@@ -75,7 +75,8 @@ ifeq (yes,$(build-shared)) + tests = bug-iconv1 bug-iconv2 tst-loading tst-e2big tst-iconv4 bug-iconv4 \ + tst-iconv6 bug-iconv5 bug-iconv6 tst-iconv7 bug-iconv8 bug-iconv9 \ + bug-iconv10 bug-iconv11 bug-iconv12 tst-iconv-big5-hkscs-to-2ucs4 \ +- bug-iconv13 bug-iconv14 bug-iconv15 ++ bug-iconv13 bug-iconv14 bug-iconv15 \ ++ tst-iconv-iso-2022-cn-ext + ifeq ($(have-thread-library),yes) + tests += bug-iconv3 + endif +@@ -330,6 +331,8 @@ $(objpfx)bug-iconv14.out: $(addprefix $(objpfx), $(gconv-modules)) \ + $(addprefix $(objpfx),$(modules.so)) + $(objpfx)bug-iconv15.out: $(addprefix $(objpfx), $(gconv-modules)) \ + $(addprefix $(objpfx),$(modules.so)) ++$(objpfx)tst-iconv-iso-2022-cn-ext.out: $(addprefix $(objpfx), $(gconv-modules)) \ ++ $(addprefix $(objpfx),$(modules.so)) + + $(objpfx)iconv-test.out: run-iconv-test.sh \ + $(addprefix $(objpfx), $(gconv-modules)) \ +diff --git a/iconvdata/iso-2022-cn-ext.c b/iconvdata/iso-2022-cn-ext.c +index b34c8a36f4..cce29b1969 100644 +--- a/iconvdata/iso-2022-cn-ext.c ++++ b/iconvdata/iso-2022-cn-ext.c +@@ -574,6 +574,12 @@ DIAG_IGNORE_Os_NEEDS_COMMENT (5, "-Wmaybe-uninitialized"); + { \ + const char *escseq; \ + \ ++ if (outptr + 4 > outend) \ ++ { \ ++ result = __GCONV_FULL_OUTPUT; \ ++ break; \ ++ } \ ++ \ + assert (used == CNS11643_2_set); /* XXX */ \ + escseq = "*H"; \ + *outptr++ = ESC; \ +@@ -587,6 +593,12 @@ DIAG_IGNORE_Os_NEEDS_COMMENT (5, "-Wmaybe-uninitialized"); + { \ + const char *escseq; \ + \ ++ if (outptr + 4 > outend) \ ++ { \ ++ result = __GCONV_FULL_OUTPUT; \ ++ break; \ ++ } \ ++ \ + assert ((used >> 5) >= 3 && (used >> 5) <= 7); \ + escseq = "+I+J+K+L+M" + ((used >> 5) - 3) * 2; \ + *outptr++ = ESC; \ +diff --git a/iconvdata/tst-iconv-iso-2022-cn-ext.c b/iconvdata/tst-iconv-iso-2022-cn-ext.c +new file mode 100644 +index 0000000000..96a8765fd5 +--- /dev/null ++++ b/iconvdata/tst-iconv-iso-2022-cn-ext.c +@@ -0,0 +1,128 @@ ++/* Verify ISO-2022-CN-EXT does not write out of the bounds. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++#include ++#include ++#include ++ ++#include ++#include ++#include ++ ++/* The test sets up a two memory page buffer with the second page marked ++ PROT_NONE to trigger a fault if the conversion writes beyond the exact ++ expected amount. Then we carry out various conversions and precisely ++ place the start of the output buffer in order to trigger a SIGSEGV if the ++ process writes anywhere between 1 and page sized bytes more (only one ++ PROT_NONE page is setup as a canary) than expected. These tests exercise ++ all three of the cases in ISO-2022-CN-EXT where the converter must switch ++ character sets and may run out of buffer space while doing the ++ operation. */ ++ ++static int ++do_test (void) ++{ ++ iconv_t cd = iconv_open ("ISO-2022-CN-EXT", "UTF-8"); ++ TEST_VERIFY_EXIT (cd != (iconv_t) -1); ++ ++ char *ntf; ++ size_t ntfsize; ++ char *outbufbase; ++ { ++ int pgz = getpagesize (); ++ TEST_VERIFY_EXIT (pgz > 0); ++ ntfsize = 2 * pgz; ++ ++ ntf = xmmap (NULL, ntfsize, PROT_READ | PROT_WRITE, MAP_PRIVATE ++ | MAP_ANONYMOUS, -1); ++ xmprotect (ntf + pgz, pgz, PROT_NONE); ++ ++ outbufbase = ntf + pgz; ++ } ++ ++ /* Check if SOdesignation escape sequence does not trigger an OOB write. */ ++ { ++ char inbuf[] = "\xe4\xba\xa4\xe6\x8d\xa2"; ++ ++ for (int i = 0; i < 9; i++) ++ { ++ char *inp = inbuf; ++ size_t inleft = sizeof (inbuf) - 1; ++ ++ char *outp = outbufbase - i; ++ size_t outleft = i; ++ ++ TEST_VERIFY_EXIT (iconv (cd, &inp, &inleft, &outp, &outleft) ++ == (size_t) -1); ++ TEST_COMPARE (errno, E2BIG); ++ ++ TEST_VERIFY_EXIT (iconv (cd, NULL, NULL, NULL, NULL) == 0); ++ } ++ } ++ ++ /* Same as before for SS2designation. */ ++ { ++ char inbuf[] = "㴽 \xe3\xb4\xbd"; ++ ++ for (int i = 0; i < 14; i++) ++ { ++ char *inp = inbuf; ++ size_t inleft = sizeof (inbuf) - 1; ++ ++ char *outp = outbufbase - i; ++ size_t outleft = i; ++ ++ TEST_VERIFY_EXIT (iconv (cd, &inp, &inleft, &outp, &outleft) ++ == (size_t) -1); ++ TEST_COMPARE (errno, E2BIG); ++ ++ TEST_VERIFY_EXIT (iconv (cd, NULL, NULL, NULL, NULL) == 0); ++ } ++ } ++ ++ /* Same as before for SS3designation. */ ++ { ++ char inbuf[] = "劄 \xe5\x8a\x84"; ++ ++ for (int i = 0; i < 14; i++) ++ { ++ char *inp = inbuf; ++ size_t inleft = sizeof (inbuf) - 1; ++ ++ char *outp = outbufbase - i; ++ size_t outleft = i; ++ ++ TEST_VERIFY_EXIT (iconv (cd, &inp, &inleft, &outp, &outleft) ++ == (size_t) -1); ++ TEST_COMPARE (errno, E2BIG); ++ ++ TEST_VERIFY_EXIT (iconv (cd, NULL, NULL, NULL, NULL) == 0); ++ } ++ } ++ ++ TEST_VERIFY_EXIT (iconv_close (cd) != -1); ++ ++ xmunmap (ntf, ntfsize); ++ ++ return 0; ++} ++ ++#include +diff --git a/login/Makefile b/login/Makefile +index 1e22008a61..f91190e3dc 100644 +--- a/login/Makefile ++++ b/login/Makefile +@@ -44,7 +44,9 @@ subdir-dirs = programs + vpath %.c programs + + tests := tst-utmp tst-utmpx tst-grantpt tst-ptsname tst-getlogin tst-updwtmpx \ +- tst-pututxline-lockfail tst-pututxline-cache ++ tst-pututxline-lockfail tst-pututxline-cache tst-utmp-size tst-utmp-size-64 ++ ++CFLAGS-tst-utmp-size-64.c += -D_FILE_OFFSET_BITS=64 -D_TIME_BITS=64 + + # Empty compatibility library for old binaries. + extra-libs := libutil +diff --git a/login/tst-utmp-size-64.c b/login/tst-utmp-size-64.c +new file mode 100644 +index 0000000000..7a581a4c12 +--- /dev/null ++++ b/login/tst-utmp-size-64.c +@@ -0,0 +1,2 @@ ++/* The on-disk layout must not change in time64 mode. */ ++#include "tst-utmp-size.c" +diff --git a/login/tst-utmp-size.c b/login/tst-utmp-size.c +new file mode 100644 +index 0000000000..1b7f7ff042 +--- /dev/null ++++ b/login/tst-utmp-size.c +@@ -0,0 +1,33 @@ ++/* Check expected sizes of struct utmp, struct utmpx, struct lastlog. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++ ++static int ++do_test (void) ++{ ++ _Static_assert (sizeof (struct utmp) == UTMP_SIZE, "struct utmp size"); ++ _Static_assert (sizeof (struct utmpx) == UTMP_SIZE, "struct utmpx size"); ++ _Static_assert (sizeof (struct lastlog) == LASTLOG_SIZE, ++ "struct lastlog size"); ++ return 0; ++} ++ ++#include +diff --git a/manual/stdbit.texi b/manual/stdbit.texi +index fe41c671d8..6c75ed9a20 100644 +--- a/manual/stdbit.texi ++++ b/manual/stdbit.texi +@@ -32,7 +32,13 @@ and @code{unsigned long long int}. In addition, there is a + corresponding type-generic macro (not listed below), named the same as + the functions but without any suffix such as @samp{_uc}. The + type-generic macro can only be used with an argument of an unsigned +-integer type with a width of 8, 16, 32 or 64 bits. ++integer type with a width of 8, 16, 32 or 64 bits, or when using ++a compiler with support for ++@uref{https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html,@code{__builtin_stdc_bit_ceil}}, ++etc.@:, built-in functions such as GCC 14.1 or later ++any unsigned integer type those built-in functions support. ++In GCC 14.1 that includes support for @code{unsigned __int128} and ++@code{unsigned _BitInt(@var{n})} if supported by the target. + + @deftypefun {unsigned int} stdc_leading_zeros_uc (unsigned char @var{x}) + @deftypefunx {unsigned int} stdc_leading_zeros_us (unsigned short @var{x}) +diff --git a/nscd/netgroupcache.c b/nscd/netgroupcache.c +index 0c6e46f15c..01d554af9c 100644 +--- a/nscd/netgroupcache.c ++++ b/nscd/netgroupcache.c +@@ -23,6 +23,7 @@ + #include + #include + #include ++#include + + #include "../nss/netgroup.h" + #include "nscd.h" +@@ -65,6 +66,16 @@ struct dataset + char strdata[0]; + }; + ++/* Send a notfound response to FD. Always returns -1 to indicate an ++ ephemeral error. */ ++static time_t ++send_notfound (int fd) ++{ ++ if (fd != -1) ++ TEMP_FAILURE_RETRY (send (fd, ¬found, sizeof (notfound), MSG_NOSIGNAL)); ++ return -1; ++} ++ + /* Sends a notfound message and prepares a notfound dataset to write to the + cache. Returns true if there was enough memory to allocate the dataset and + returns the dataset in DATASETP, total bytes to write in TOTALP and the +@@ -83,8 +94,7 @@ do_notfound (struct database_dyn *db, int fd, request_header *req, + total = sizeof (notfound); + timeout = time (NULL) + db->negtimeout; + +- if (fd != -1) +- TEMP_FAILURE_RETRY (send (fd, ¬found, total, MSG_NOSIGNAL)); ++ send_notfound (fd); + + dataset = mempool_alloc (db, sizeof (struct dataset) + req->key_len, 1); + /* If we cannot permanently store the result, so be it. */ +@@ -109,11 +119,78 @@ do_notfound (struct database_dyn *db, int fd, request_header *req, + return cacheable; + } + ++struct addgetnetgrentX_scratch ++{ ++ /* This is the result that the caller should use. It can be NULL, ++ point into buffer, or it can be in the cache. */ ++ struct dataset *dataset; ++ ++ struct scratch_buffer buffer; ++ ++ /* Used internally in addgetnetgrentX as a staging area. */ ++ struct scratch_buffer tmp; ++ ++ /* Number of bytes in buffer that are actually used. */ ++ size_t buffer_used; ++}; ++ ++static void ++addgetnetgrentX_scratch_init (struct addgetnetgrentX_scratch *scratch) ++{ ++ scratch->dataset = NULL; ++ scratch_buffer_init (&scratch->buffer); ++ scratch_buffer_init (&scratch->tmp); ++ ++ /* Reserve space for the header. */ ++ scratch->buffer_used = sizeof (struct dataset); ++ static_assert (sizeof (struct dataset) < sizeof (scratch->tmp.__space), ++ "initial buffer space"); ++ memset (scratch->tmp.data, 0, sizeof (struct dataset)); ++} ++ ++static void ++addgetnetgrentX_scratch_free (struct addgetnetgrentX_scratch *scratch) ++{ ++ scratch_buffer_free (&scratch->buffer); ++ scratch_buffer_free (&scratch->tmp); ++} ++ ++/* Copy LENGTH bytes from S into SCRATCH. Returns NULL if SCRATCH ++ could not be resized, otherwise a pointer to the copy. */ ++static char * ++addgetnetgrentX_append_n (struct addgetnetgrentX_scratch *scratch, ++ const char *s, size_t length) ++{ ++ while (true) ++ { ++ size_t remaining = scratch->buffer.length - scratch->buffer_used; ++ if (remaining >= length) ++ break; ++ if (!scratch_buffer_grow_preserve (&scratch->buffer)) ++ return NULL; ++ } ++ char *copy = scratch->buffer.data + scratch->buffer_used; ++ memcpy (copy, s, length); ++ scratch->buffer_used += length; ++ return copy; ++} ++ ++/* Copy S into SCRATCH, including its null terminator. Returns false ++ if SCRATCH could not be resized. */ ++static bool ++addgetnetgrentX_append (struct addgetnetgrentX_scratch *scratch, const char *s) ++{ ++ if (s == NULL) ++ s = ""; ++ return addgetnetgrentX_append_n (scratch, s, strlen (s) + 1) != NULL; ++} ++ ++/* Caller must initialize and free *SCRATCH. If the return value is ++ negative, this function has sent a notfound response. */ + static time_t + addgetnetgrentX (struct database_dyn *db, int fd, request_header *req, + const char *key, uid_t uid, struct hashentry *he, +- struct datahead *dh, struct dataset **resultp, +- void **tofreep) ++ struct datahead *dh, struct addgetnetgrentX_scratch *scratch) + { + if (__glibc_unlikely (debug_level > 0)) + { +@@ -132,14 +209,10 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req, + + char *key_copy = NULL; + struct __netgrent data; +- size_t buflen = MAX (1024, sizeof (*dataset) + req->key_len); +- size_t buffilled = sizeof (*dataset); +- char *buffer = NULL; + size_t nentries = 0; + size_t group_len = strlen (key) + 1; + struct name_list *first_needed + = alloca (sizeof (struct name_list) + group_len); +- *tofreep = NULL; + + if (netgroup_database == NULL + && !__nss_database_get (nss_database_netgroup, &netgroup_database)) +@@ -147,12 +220,10 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req, + /* No such service. */ + cacheable = do_notfound (db, fd, req, key, &dataset, &total, &timeout, + &key_copy); +- goto writeout; ++ goto maybe_cache_add; + } + + memset (&data, '\0', sizeof (data)); +- buffer = xmalloc (buflen); +- *tofreep = buffer; + first_needed->next = first_needed; + memcpy (first_needed->name, key, group_len); + data.needed_groups = first_needed; +@@ -195,8 +266,8 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req, + while (1) + { + int e; +- status = getfct.f (&data, buffer + buffilled, +- buflen - buffilled - req->key_len, &e); ++ status = getfct.f (&data, scratch->tmp.data, ++ scratch->tmp.length, &e); + if (status == NSS_STATUS_SUCCESS) + { + if (data.type == triple_val) +@@ -204,68 +275,10 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req, + const char *nhost = data.val.triple.host; + const char *nuser = data.val.triple.user; + const char *ndomain = data.val.triple.domain; +- +- size_t hostlen = strlen (nhost ?: "") + 1; +- size_t userlen = strlen (nuser ?: "") + 1; +- size_t domainlen = strlen (ndomain ?: "") + 1; +- +- if (nhost == NULL || nuser == NULL || ndomain == NULL +- || nhost > nuser || nuser > ndomain) +- { +- const char *last = nhost; +- if (last == NULL +- || (nuser != NULL && nuser > last)) +- last = nuser; +- if (last == NULL +- || (ndomain != NULL && ndomain > last)) +- last = ndomain; +- +- size_t bufused +- = (last == NULL +- ? buffilled +- : last + strlen (last) + 1 - buffer); +- +- /* We have to make temporary copies. */ +- size_t needed = hostlen + userlen + domainlen; +- +- if (buflen - req->key_len - bufused < needed) +- { +- buflen += MAX (buflen, 2 * needed); +- /* Save offset in the old buffer. We don't +- bother with the NULL check here since +- we'll do that later anyway. */ +- size_t nhostdiff = nhost - buffer; +- size_t nuserdiff = nuser - buffer; +- size_t ndomaindiff = ndomain - buffer; +- +- char *newbuf = xrealloc (buffer, buflen); +- /* Fix up the triplet pointers into the new +- buffer. */ +- nhost = (nhost ? newbuf + nhostdiff +- : NULL); +- nuser = (nuser ? newbuf + nuserdiff +- : NULL); +- ndomain = (ndomain ? newbuf + ndomaindiff +- : NULL); +- *tofreep = buffer = newbuf; +- } +- +- nhost = memcpy (buffer + bufused, +- nhost ?: "", hostlen); +- nuser = memcpy ((char *) nhost + hostlen, +- nuser ?: "", userlen); +- ndomain = memcpy ((char *) nuser + userlen, +- ndomain ?: "", domainlen); +- } +- +- char *wp = buffer + buffilled; +- wp = memmove (wp, nhost ?: "", hostlen); +- wp += hostlen; +- wp = memmove (wp, nuser ?: "", userlen); +- wp += userlen; +- wp = memmove (wp, ndomain ?: "", domainlen); +- wp += domainlen; +- buffilled = wp - buffer; ++ if (!(addgetnetgrentX_append (scratch, nhost) ++ && addgetnetgrentX_append (scratch, nuser) ++ && addgetnetgrentX_append (scratch, ndomain))) ++ return send_notfound (fd); + ++nentries; + } + else +@@ -317,8 +330,8 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req, + } + else if (status == NSS_STATUS_TRYAGAIN && e == ERANGE) + { +- buflen *= 2; +- *tofreep = buffer = xrealloc (buffer, buflen); ++ if (!scratch_buffer_grow (&scratch->tmp)) ++ return send_notfound (fd); + } + else if (status == NSS_STATUS_RETURN + || status == NSS_STATUS_NOTFOUND +@@ -348,13 +361,20 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req, + { + cacheable = do_notfound (db, fd, req, key, &dataset, &total, &timeout, + &key_copy); +- goto writeout; ++ goto maybe_cache_add; + } + +- total = buffilled; ++ /* Capture the result size without the key appended. */ ++ total = scratch->buffer_used; ++ ++ /* Make a copy of the key. The scratch buffer must not move after ++ this point. */ ++ key_copy = addgetnetgrentX_append_n (scratch, key, req->key_len); ++ if (key_copy == NULL) ++ return send_notfound (fd); + + /* Fill in the dataset. */ +- dataset = (struct dataset *) buffer; ++ dataset = scratch->buffer.data; + timeout = datahead_init_pos (&dataset->head, total + req->key_len, + total - offsetof (struct dataset, resp), + he == NULL ? 0 : dh->nreloads + 1, +@@ -363,11 +383,7 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req, + dataset->resp.version = NSCD_VERSION; + dataset->resp.found = 1; + dataset->resp.nresults = nentries; +- dataset->resp.result_len = buffilled - sizeof (*dataset); +- +- assert (buflen - buffilled >= req->key_len); +- key_copy = memcpy (buffer + buffilled, key, req->key_len); +- buffilled += req->key_len; ++ dataset->resp.result_len = total - sizeof (*dataset); + + /* Now we can determine whether on refill we have to create a new + record or not. */ +@@ -398,7 +414,7 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req, + if (__glibc_likely (newp != NULL)) + { + /* Adjust pointer into the memory block. */ +- key_copy = (char *) newp + (key_copy - buffer); ++ key_copy = (char *) newp + (key_copy - (char *) dataset); + + dataset = memcpy (newp, dataset, total + req->key_len); + cacheable = true; +@@ -410,14 +426,12 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req, + } + + if (he == NULL && fd != -1) +- { +- /* We write the dataset before inserting it to the database +- since while inserting this thread might block and so would +- unnecessarily let the receiver wait. */ +- writeout: ++ /* We write the dataset before inserting it to the database since ++ while inserting this thread might block and so would ++ unnecessarily let the receiver wait. */ + writeall (fd, &dataset->resp, dataset->head.recsize); +- } + ++ maybe_cache_add: + if (cacheable) + { + /* If necessary, we also propagate the data to disk. */ +@@ -441,7 +455,7 @@ addgetnetgrentX (struct database_dyn *db, int fd, request_header *req, + } + + out: +- *resultp = dataset; ++ scratch->dataset = dataset; + + return timeout; + } +@@ -462,6 +476,9 @@ addinnetgrX (struct database_dyn *db, int fd, request_header *req, + if (user != NULL) + key = strchr (key, '\0') + 1; + const char *domain = *key++ ? key : NULL; ++ struct addgetnetgrentX_scratch scratch; ++ ++ addgetnetgrentX_scratch_init (&scratch); + + if (__glibc_unlikely (debug_level > 0)) + { +@@ -477,12 +494,8 @@ addinnetgrX (struct database_dyn *db, int fd, request_header *req, + group, group_len, + db, uid); + time_t timeout; +- void *tofree; + if (result != NULL) +- { +- timeout = result->head.timeout; +- tofree = NULL; +- } ++ timeout = result->head.timeout; + else + { + request_header req_get = +@@ -491,7 +504,10 @@ addinnetgrX (struct database_dyn *db, int fd, request_header *req, + .key_len = group_len + }; + timeout = addgetnetgrentX (db, -1, &req_get, group, uid, NULL, NULL, +- &result, &tofree); ++ &scratch); ++ result = scratch.dataset; ++ if (timeout < 0) ++ goto out; + } + + struct indataset +@@ -502,24 +518,26 @@ addinnetgrX (struct database_dyn *db, int fd, request_header *req, + = (struct indataset *) mempool_alloc (db, + sizeof (*dataset) + req->key_len, + 1); +- struct indataset dataset_mem; + bool cacheable = true; + if (__glibc_unlikely (dataset == NULL)) + { + cacheable = false; +- dataset = &dataset_mem; ++ /* The alloca is safe because nscd_run_worker verfies that ++ key_len is not larger than MAXKEYLEN. */ ++ dataset = alloca (sizeof (*dataset) + req->key_len); + } + + datahead_init_pos (&dataset->head, sizeof (*dataset) + req->key_len, + sizeof (innetgroup_response_header), +- he == NULL ? 0 : dh->nreloads + 1, result->head.ttl); ++ he == NULL ? 0 : dh->nreloads + 1, ++ result == NULL ? db->negtimeout : result->head.ttl); + /* Set the notfound status and timeout based on the result from + getnetgrent. */ +- dataset->head.notfound = result->head.notfound; ++ dataset->head.notfound = result == NULL || result->head.notfound; + dataset->head.timeout = timeout; + + dataset->resp.version = NSCD_VERSION; +- dataset->resp.found = result->resp.found; ++ dataset->resp.found = result != NULL && result->resp.found; + /* Until we find a matching entry the result is 0. */ + dataset->resp.result = 0; + +@@ -567,7 +585,9 @@ addinnetgrX (struct database_dyn *db, int fd, request_header *req, + goto out; + } + +- if (he == NULL) ++ /* addgetnetgrentX may have already sent a notfound response. Do ++ not send another one. */ ++ if (he == NULL && dataset->resp.found) + { + /* We write the dataset before inserting it to the database + since while inserting this thread might block and so would +@@ -601,7 +621,7 @@ addinnetgrX (struct database_dyn *db, int fd, request_header *req, + } + + out: +- free (tofree); ++ addgetnetgrentX_scratch_free (&scratch); + return timeout; + } + +@@ -611,11 +631,12 @@ addgetnetgrentX_ignore (struct database_dyn *db, int fd, request_header *req, + const char *key, uid_t uid, struct hashentry *he, + struct datahead *dh) + { +- struct dataset *ignore; +- void *tofree; +- time_t timeout = addgetnetgrentX (db, fd, req, key, uid, he, dh, +- &ignore, &tofree); +- free (tofree); ++ struct addgetnetgrentX_scratch scratch; ++ addgetnetgrentX_scratch_init (&scratch); ++ time_t timeout = addgetnetgrentX (db, fd, req, key, uid, he, dh, &scratch); ++ addgetnetgrentX_scratch_free (&scratch); ++ if (timeout < 0) ++ timeout = 0; + return timeout; + } + +@@ -659,5 +680,9 @@ readdinnetgr (struct database_dyn *db, struct hashentry *he, + .key_len = he->len + }; + +- return addinnetgrX (db, -1, &req, db->data + he->key, he->owner, he, dh); ++ time_t timeout = addinnetgrX (db, -1, &req, db->data + he->key, he->owner, ++ he, dh); ++ if (timeout < 0) ++ timeout = 0; ++ return timeout; + } +diff --git a/socket/Makefile b/socket/Makefile +index 74ca5b8452..fc1bd0a260 100644 +--- a/socket/Makefile ++++ b/socket/Makefile +@@ -70,6 +70,7 @@ tests := \ + tst-accept4 \ + tst-cmsg_cloexec \ + tst-cmsghdr \ ++ tst-connect \ + tst-sockopt \ + # tests + +diff --git a/socket/tst-connect.c b/socket/tst-connect.c +new file mode 100644 +index 0000000000..ec2fdd92c0 +--- /dev/null ++++ b/socket/tst-connect.c +@@ -0,0 +1,113 @@ ++/* Test the connect function. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++static struct sockaddr_in server_address; ++ ++int ++open_socket_inet_tcp (void) ++{ ++ int fd = socket (AF_INET, SOCK_STREAM, IPPROTO_TCP); ++ if (fd < 0) ++ { ++ if (errno == EAFNOSUPPORT) ++ FAIL_UNSUPPORTED ("The host does not support IPv4"); ++ else ++ FAIL_EXIT1 ("socket (AF_INET, SOCK_STREAM, IPPROTO_TCP): %m\n"); ++ } ++ return fd; ++} ++ ++static pid_t ++start_server (void) ++{ ++ server_address.sin_family = AF_INET; ++ server_address.sin_port = 0; ++ server_address.sin_addr.s_addr = htonl (INADDR_LOOPBACK); ++ ++ int server_sock = open_socket_inet_tcp (); ++ ++ xbind (server_sock, (struct sockaddr *) &server_address, ++ sizeof (server_address)); ++ ++ socklen_t sa_len = sizeof (server_address); ++ xgetsockname (server_sock, (struct sockaddr *) &server_address, &sa_len); ++ xlisten (server_sock, 5); ++ ++ pid_t my_pid = xfork (); ++ if (my_pid > 0) ++ { ++ xclose (server_sock); ++ return my_pid; ++ } ++ ++ struct sockaddr_in client_address; ++ socklen_t ca_len = sizeof (server_address); ++ int client_sock = xaccept (server_sock, (struct sockaddr *) &client_address, ++ &ca_len); ++ printf ("socket accepted %d\n", client_sock); ++ ++ _exit (0); ++} ++ ++static int ++do_test (void) ++{ ++ pid_t serv_pid; ++ struct sockaddr_in peer; ++ socklen_t peer_len; ++ ++ serv_pid = start_server (); ++ int client_sock = open_socket_inet_tcp (); ++ xconnect (client_sock, (const struct sockaddr *) &server_address, ++ sizeof (server_address)); ++ ++ /* A second connect with same arguments should fail with EISCONN. */ ++ int result = connect (client_sock, ++ (const struct sockaddr *) &server_address, ++ sizeof (server_address)); ++ if (result == 0 || errno != EISCONN) ++ FAIL_EXIT1 ("Second connect (%d), should fail with EISCONN: %m", ++ client_sock); ++ ++ peer_len = sizeof (peer); ++ xgetpeername (client_sock, (struct sockaddr *) &peer, &peer_len); ++ TEST_COMPARE (peer_len, sizeof (peer)); ++ TEST_COMPARE (peer.sin_port, server_address.sin_port); ++ TEST_COMPARE_BLOB (&peer.sin_addr, sizeof (peer.sin_addr), ++ &server_address.sin_addr, ++ sizeof (server_address.sin_addr)); ++ ++ int status; ++ xwaitpid (serv_pid, &status, 0); ++ TEST_COMPARE (status, 0); ++ ++ return 0; ++} ++ ++#include +diff --git a/stdlib/Makefile b/stdlib/Makefile +index d587f054d1..9898cc5d8a 100644 +--- a/stdlib/Makefile ++++ b/stdlib/Makefile +@@ -308,6 +308,7 @@ tests := \ + tst-setcontext10 \ + tst-setcontext11 \ + tst-stdbit-Wconversion \ ++ tst-stdbit-builtins \ + tst-stdc_bit_ceil \ + tst-stdc_bit_floor \ + tst-stdc_bit_width \ +diff --git a/stdlib/stdbit.h b/stdlib/stdbit.h +index f334eb174d..2801590c63 100644 +--- a/stdlib/stdbit.h ++++ b/stdlib/stdbit.h +@@ -64,9 +64,13 @@ extern unsigned int stdc_leading_zeros_ul (unsigned long int __x) + __extension__ + extern unsigned int stdc_leading_zeros_ull (unsigned long long int __x) + __THROW __attribute_const__; +-#define stdc_leading_zeros(x) \ ++#if __glibc_has_builtin (__builtin_stdc_leading_zeros) ++# define stdc_leading_zeros(x) (__builtin_stdc_leading_zeros (x)) ++#else ++# define stdc_leading_zeros(x) \ + (stdc_leading_zeros_ull (x) \ + - (unsigned int) (8 * (sizeof (0ULL) - sizeof (x)))) ++#endif + + #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_clzll) + static __always_inline unsigned int +@@ -116,9 +120,13 @@ extern unsigned int stdc_leading_ones_ul (unsigned long int __x) + __extension__ + extern unsigned int stdc_leading_ones_ull (unsigned long long int __x) + __THROW __attribute_const__; +-#define stdc_leading_ones(x) \ ++#if __glibc_has_builtin (__builtin_stdc_leading_ones) ++# define stdc_leading_ones(x) (__builtin_stdc_leading_ones (x)) ++#else ++# define stdc_leading_ones(x) \ + (stdc_leading_ones_ull ((unsigned long long int) (x) \ + << 8 * (sizeof (0ULL) - sizeof (x)))) ++#endif + + #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_clzll) + static __always_inline unsigned int +@@ -168,11 +176,15 @@ extern unsigned int stdc_trailing_zeros_ul (unsigned long int __x) + __extension__ + extern unsigned int stdc_trailing_zeros_ull (unsigned long long int __x) + __THROW __attribute_const__; +-#define stdc_trailing_zeros(x) \ ++#if __glibc_has_builtin (__builtin_stdc_trailing_zeros) ++# define stdc_trailing_zeros(x) (__builtin_stdc_trailing_zeros (x)) ++#else ++# define stdc_trailing_zeros(x) \ + (sizeof (x) == 8 ? stdc_trailing_zeros_ull (x) \ + : sizeof (x) == 4 ? stdc_trailing_zeros_ui (x) \ + : sizeof (x) == 2 ? stdc_trailing_zeros_us (__pacify_uint16 (x)) \ + : stdc_trailing_zeros_uc (__pacify_uint8 (x))) ++#endif + + #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_ctzll) + static __always_inline unsigned int +@@ -222,7 +234,11 @@ extern unsigned int stdc_trailing_ones_ul (unsigned long int __x) + __extension__ + extern unsigned int stdc_trailing_ones_ull (unsigned long long int __x) + __THROW __attribute_const__; +-#define stdc_trailing_ones(x) (stdc_trailing_ones_ull (x)) ++#if __glibc_has_builtin (__builtin_stdc_trailing_ones) ++# define stdc_trailing_ones(x) (__builtin_stdc_trailing_ones (x)) ++#else ++# define stdc_trailing_ones(x) (stdc_trailing_ones_ull (x)) ++#endif + + #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_ctzll) + static __always_inline unsigned int +@@ -272,11 +288,15 @@ extern unsigned int stdc_first_leading_zero_ul (unsigned long int __x) + __extension__ + extern unsigned int stdc_first_leading_zero_ull (unsigned long long int __x) + __THROW __attribute_const__; +-#define stdc_first_leading_zero(x) \ ++#if __glibc_has_builtin (__builtin_stdc_first_leading_zero) ++# define stdc_first_leading_zero(x) (__builtin_stdc_first_leading_zero (x)) ++#else ++# define stdc_first_leading_zero(x) \ + (sizeof (x) == 8 ? stdc_first_leading_zero_ull (x) \ + : sizeof (x) == 4 ? stdc_first_leading_zero_ui (x) \ + : sizeof (x) == 2 ? stdc_first_leading_zero_us (__pacify_uint16 (x)) \ + : stdc_first_leading_zero_uc (__pacify_uint8 (x))) ++#endif + + #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_clzll) + static __always_inline unsigned int +@@ -326,11 +346,15 @@ extern unsigned int stdc_first_leading_one_ul (unsigned long int __x) + __extension__ + extern unsigned int stdc_first_leading_one_ull (unsigned long long int __x) + __THROW __attribute_const__; +-#define stdc_first_leading_one(x) \ ++#if __glibc_has_builtin (__builtin_stdc_first_leading_one) ++# define stdc_first_leading_one(x) (__builtin_stdc_first_leading_one (x)) ++#else ++# define stdc_first_leading_one(x) \ + (sizeof (x) == 8 ? stdc_first_leading_one_ull (x) \ + : sizeof (x) == 4 ? stdc_first_leading_one_ui (x) \ + : sizeof (x) == 2 ? stdc_first_leading_one_us (__pacify_uint16 (x)) \ + : stdc_first_leading_one_uc (__pacify_uint8 (x))) ++#endif + + #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_clzll) + static __always_inline unsigned int +@@ -380,11 +404,15 @@ extern unsigned int stdc_first_trailing_zero_ul (unsigned long int __x) + __extension__ + extern unsigned int stdc_first_trailing_zero_ull (unsigned long long int __x) + __THROW __attribute_const__; +-#define stdc_first_trailing_zero(x) \ ++#if __glibc_has_builtin (__builtin_stdc_first_trailing_zero) ++# define stdc_first_trailing_zero(x) (__builtin_stdc_first_trailing_zero (x)) ++#else ++# define stdc_first_trailing_zero(x) \ + (sizeof (x) == 8 ? stdc_first_trailing_zero_ull (x) \ + : sizeof (x) == 4 ? stdc_first_trailing_zero_ui (x) \ + : sizeof (x) == 2 ? stdc_first_trailing_zero_us (__pacify_uint16 (x)) \ + : stdc_first_trailing_zero_uc (__pacify_uint8 (x))) ++#endif + + #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_ctzll) + static __always_inline unsigned int +@@ -434,11 +462,15 @@ extern unsigned int stdc_first_trailing_one_ul (unsigned long int __x) + __extension__ + extern unsigned int stdc_first_trailing_one_ull (unsigned long long int __x) + __THROW __attribute_const__; +-#define stdc_first_trailing_one(x) \ ++#if __glibc_has_builtin (__builtin_stdc_first_trailing_one) ++# define stdc_first_trailing_one(x) (__builtin_stdc_first_trailing_one (x)) ++#else ++# define stdc_first_trailing_one(x) \ + (sizeof (x) == 8 ? stdc_first_trailing_one_ull (x) \ + : sizeof (x) == 4 ? stdc_first_trailing_one_ui (x) \ + : sizeof (x) == 2 ? stdc_first_trailing_one_us (__pacify_uint16 (x)) \ + : stdc_first_trailing_one_uc (__pacify_uint8 (x))) ++#endif + + #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_ctzll) + static __always_inline unsigned int +@@ -488,9 +520,13 @@ extern unsigned int stdc_count_zeros_ul (unsigned long int __x) + __extension__ + extern unsigned int stdc_count_zeros_ull (unsigned long long int __x) + __THROW __attribute_const__; +-#define stdc_count_zeros(x) \ ++#if __glibc_has_builtin (__builtin_stdc_count_zeros) ++# define stdc_count_zeros(x) (__builtin_stdc_count_zeros (x)) ++#else ++# define stdc_count_zeros(x) \ + (stdc_count_zeros_ull (x) \ + - (unsigned int) (8 * (sizeof (0ULL) - sizeof (x)))) ++#endif + + #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_popcountll) + static __always_inline unsigned int +@@ -540,7 +576,11 @@ extern unsigned int stdc_count_ones_ul (unsigned long int __x) + __extension__ + extern unsigned int stdc_count_ones_ull (unsigned long long int __x) + __THROW __attribute_const__; +-#define stdc_count_ones(x) (stdc_count_ones_ull (x)) ++#if __glibc_has_builtin (__builtin_stdc_count_ones) ++# define stdc_count_ones(x) (__builtin_stdc_count_ones (x)) ++#else ++# define stdc_count_ones(x) (stdc_count_ones_ull (x)) ++#endif + + #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_popcountll) + static __always_inline unsigned int +@@ -590,10 +630,14 @@ extern bool stdc_has_single_bit_ul (unsigned long int __x) + __extension__ + extern bool stdc_has_single_bit_ull (unsigned long long int __x) + __THROW __attribute_const__; +-#define stdc_has_single_bit(x) \ ++#if __glibc_has_builtin (__builtin_stdc_has_single_bit) ++# define stdc_has_single_bit(x) (__builtin_stdc_has_single_bit (x)) ++#else ++# define stdc_has_single_bit(x) \ + ((bool) (sizeof (x) <= sizeof (unsigned int) \ + ? stdc_has_single_bit_ui (x) \ + : stdc_has_single_bit_ull (x))) ++#endif + + static __always_inline bool + __hsb64_inline (uint64_t __x) +@@ -641,7 +685,11 @@ extern unsigned int stdc_bit_width_ul (unsigned long int __x) + __extension__ + extern unsigned int stdc_bit_width_ull (unsigned long long int __x) + __THROW __attribute_const__; +-#define stdc_bit_width(x) (stdc_bit_width_ull (x)) ++#if __glibc_has_builtin (__builtin_stdc_bit_width) ++# define stdc_bit_width(x) (__builtin_stdc_bit_width (x)) ++#else ++# define stdc_bit_width(x) (stdc_bit_width_ull (x)) ++#endif + + #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_clzll) + static __always_inline unsigned int +@@ -691,7 +739,11 @@ extern unsigned long int stdc_bit_floor_ul (unsigned long int __x) + __extension__ + extern unsigned long long int stdc_bit_floor_ull (unsigned long long int __x) + __THROW __attribute_const__; +-#define stdc_bit_floor(x) ((__typeof (x)) stdc_bit_floor_ull (x)) ++#if __glibc_has_builtin (__builtin_stdc_bit_floor) ++# define stdc_bit_floor(x) (__builtin_stdc_bit_floor (x)) ++#else ++# define stdc_bit_floor(x) ((__typeof (x)) stdc_bit_floor_ull (x)) ++#endif + + #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_clzll) + static __always_inline uint64_t +@@ -743,7 +795,11 @@ extern unsigned long int stdc_bit_ceil_ul (unsigned long int __x) + __extension__ + extern unsigned long long int stdc_bit_ceil_ull (unsigned long long int __x) + __THROW __attribute_const__; +-#define stdc_bit_ceil(x) ((__typeof (x)) stdc_bit_ceil_ull (x)) ++#if __glibc_has_builtin (__builtin_stdc_bit_ceil) ++# define stdc_bit_ceil(x) (__builtin_stdc_bit_ceil (x)) ++#else ++# define stdc_bit_ceil(x) ((__typeof (x)) stdc_bit_ceil_ull (x)) ++#endif + + #if __GNUC_PREREQ (3, 4) || __glibc_has_builtin (__builtin_clzll) + static __always_inline uint64_t +diff --git a/stdlib/tst-stdbit-builtins.c b/stdlib/tst-stdbit-builtins.c +new file mode 100644 +index 0000000000..536841ca8a +--- /dev/null ++++ b/stdlib/tst-stdbit-builtins.c +@@ -0,0 +1,778 @@ ++/* Test type-generic macros with compiler __builtin_stdc_* support. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++ ++#if __glibc_has_builtin (__builtin_stdc_leading_zeros) \ ++ && __glibc_has_builtin (__builtin_stdc_leading_ones) \ ++ && __glibc_has_builtin (__builtin_stdc_trailing_zeros) \ ++ && __glibc_has_builtin (__builtin_stdc_trailing_ones) \ ++ && __glibc_has_builtin (__builtin_stdc_first_leading_zero) \ ++ && __glibc_has_builtin (__builtin_stdc_first_leading_one) \ ++ && __glibc_has_builtin (__builtin_stdc_first_trailing_zero) \ ++ && __glibc_has_builtin (__builtin_stdc_first_trailing_one) \ ++ && __glibc_has_builtin (__builtin_stdc_count_zeros) \ ++ && __glibc_has_builtin (__builtin_stdc_count_ones) \ ++ && __glibc_has_builtin (__builtin_stdc_has_single_bit) \ ++ && __glibc_has_builtin (__builtin_stdc_bit_width) \ ++ && __glibc_has_builtin (__builtin_stdc_bit_floor) \ ++ && __glibc_has_builtin (__builtin_stdc_bit_ceil) ++ ++# if !defined (BITINT_MAXWIDTH) && defined (__BITINT_MAXWIDTH__) ++# define BITINT_MAXWIDTH __BITINT_MAXWIDTH__ ++# endif ++ ++typedef unsigned char uc; ++typedef unsigned short us; ++typedef unsigned int ui; ++typedef unsigned long int ul; ++typedef unsigned long long int ull; ++ ++# define expr_has_type(e, t) _Generic (e, default : 0, t : 1) ++ ++static int ++do_test (void) ++{ ++ TEST_COMPARE (stdc_leading_zeros ((uc) 0), CHAR_BIT); ++ TEST_COMPARE (expr_has_type (stdc_leading_zeros ((uc) 0), ui), 1); ++ TEST_COMPARE (stdc_leading_zeros ((us) 0), sizeof (short) * CHAR_BIT); ++ TEST_COMPARE (expr_has_type (stdc_leading_zeros ((us) 0), ui), 1); ++ TEST_COMPARE (stdc_leading_zeros (0U), sizeof (int) * CHAR_BIT); ++ TEST_COMPARE (expr_has_type (stdc_leading_zeros (0U), ui), 1); ++ TEST_COMPARE (stdc_leading_zeros (0UL), sizeof (long int) * CHAR_BIT); ++ TEST_COMPARE (expr_has_type (stdc_leading_zeros (0UL), ui), 1); ++ TEST_COMPARE (stdc_leading_zeros (0ULL), sizeof (long long int) * CHAR_BIT); ++ TEST_COMPARE (expr_has_type (stdc_leading_zeros (0ULL), ui), 1); ++ TEST_COMPARE (stdc_leading_zeros ((uc) ~0U), 0); ++ TEST_COMPARE (stdc_leading_zeros ((us) ~0U), 0); ++ TEST_COMPARE (stdc_leading_zeros (~0U), 0); ++ TEST_COMPARE (stdc_leading_zeros (~0UL), 0); ++ TEST_COMPARE (stdc_leading_zeros (~0ULL), 0); ++ TEST_COMPARE (stdc_leading_zeros ((uc) 3), CHAR_BIT - 2); ++ TEST_COMPARE (stdc_leading_zeros ((us) 9), sizeof (short) * CHAR_BIT - 4); ++ TEST_COMPARE (stdc_leading_zeros (34U), sizeof (int) * CHAR_BIT - 6); ++ TEST_COMPARE (stdc_leading_zeros (130UL), sizeof (long int) * CHAR_BIT - 8); ++ TEST_COMPARE (stdc_leading_zeros (512ULL), ++ sizeof (long long int) * CHAR_BIT - 10); ++ TEST_COMPARE (stdc_leading_ones ((uc) 0), 0); ++ TEST_COMPARE (expr_has_type (stdc_leading_ones ((uc) 0), ui), 1); ++ TEST_COMPARE (stdc_leading_ones ((us) 0), 0); ++ TEST_COMPARE (expr_has_type (stdc_leading_ones ((us) 0), ui), 1); ++ TEST_COMPARE (stdc_leading_ones (0U), 0); ++ TEST_COMPARE (expr_has_type (stdc_leading_ones (0U), ui), 1); ++ TEST_COMPARE (stdc_leading_ones (0UL), 0); ++ TEST_COMPARE (expr_has_type (stdc_leading_ones (0UL), ui), 1); ++ TEST_COMPARE (stdc_leading_ones (0ULL), 0); ++ TEST_COMPARE (expr_has_type (stdc_leading_ones (0ULL), ui), 1); ++ TEST_COMPARE (stdc_leading_ones ((uc) ~0U), CHAR_BIT); ++ TEST_COMPARE (stdc_leading_ones ((us) ~0U), sizeof (short) * CHAR_BIT); ++ TEST_COMPARE (stdc_leading_ones (~0U), sizeof (int) * CHAR_BIT); ++ TEST_COMPARE (stdc_leading_ones (~0UL), sizeof (long int) * CHAR_BIT); ++ TEST_COMPARE (stdc_leading_ones (~0ULL), sizeof (long long int) * CHAR_BIT); ++ TEST_COMPARE (stdc_leading_ones ((uc) ~3), CHAR_BIT - 2); ++ TEST_COMPARE (stdc_leading_ones ((us) ~9), sizeof (short) * CHAR_BIT - 4); ++ TEST_COMPARE (stdc_leading_ones (~34U), sizeof (int) * CHAR_BIT - 6); ++ TEST_COMPARE (stdc_leading_ones (~130UL), sizeof (long int) * CHAR_BIT - 8); ++ TEST_COMPARE (stdc_leading_ones (~512ULL), ++ sizeof (long long int) * CHAR_BIT - 10); ++ TEST_COMPARE (stdc_trailing_zeros ((uc) 0), CHAR_BIT); ++ TEST_COMPARE (expr_has_type (stdc_trailing_zeros ((uc) 0), ui), 1); ++ TEST_COMPARE (stdc_trailing_zeros ((us) 0), sizeof (short) * CHAR_BIT); ++ TEST_COMPARE (expr_has_type (stdc_trailing_zeros ((us) 0), ui), 1); ++ TEST_COMPARE (stdc_trailing_zeros (0U), sizeof (int) * CHAR_BIT); ++ TEST_COMPARE (expr_has_type (stdc_trailing_zeros (0U), ui), 1); ++ TEST_COMPARE (stdc_trailing_zeros (0UL), sizeof (long int) * CHAR_BIT); ++ TEST_COMPARE (expr_has_type (stdc_trailing_zeros (0UL), ui), 1); ++ TEST_COMPARE (stdc_trailing_zeros (0ULL), sizeof (long long int) * CHAR_BIT); ++ TEST_COMPARE (expr_has_type (stdc_trailing_zeros (0ULL), ui), 1); ++ TEST_COMPARE (stdc_trailing_zeros ((uc) ~0U), 0); ++ TEST_COMPARE (stdc_trailing_zeros ((us) ~0U), 0); ++ TEST_COMPARE (stdc_trailing_zeros (~0U), 0); ++ TEST_COMPARE (stdc_trailing_zeros (~0UL), 0); ++ TEST_COMPARE (stdc_trailing_zeros (~0ULL), 0); ++ TEST_COMPARE (stdc_trailing_zeros ((uc) 2), 1); ++ TEST_COMPARE (stdc_trailing_zeros ((us) 24), 3); ++ TEST_COMPARE (stdc_trailing_zeros (32U), 5); ++ TEST_COMPARE (stdc_trailing_zeros (128UL), 7); ++ TEST_COMPARE (stdc_trailing_zeros (512ULL), 9); ++ TEST_COMPARE (stdc_trailing_ones ((uc) 0), 0); ++ TEST_COMPARE (expr_has_type (stdc_trailing_ones ((uc) 0), ui), 1); ++ TEST_COMPARE (stdc_trailing_ones ((us) 0), 0); ++ TEST_COMPARE (expr_has_type (stdc_trailing_ones ((us) 0), ui), 1); ++ TEST_COMPARE (stdc_trailing_ones (0U), 0); ++ TEST_COMPARE (expr_has_type (stdc_trailing_ones (0U), ui), 1); ++ TEST_COMPARE (stdc_trailing_ones (0UL), 0); ++ TEST_COMPARE (expr_has_type (stdc_trailing_ones (0UL), ui), 1); ++ TEST_COMPARE (stdc_trailing_ones (0ULL), 0); ++ TEST_COMPARE (expr_has_type (stdc_trailing_ones (0ULL), ui), 1); ++ TEST_COMPARE (stdc_trailing_ones ((uc) ~0U), CHAR_BIT); ++ TEST_COMPARE (stdc_trailing_ones ((us) ~0U), sizeof (short) * CHAR_BIT); ++ TEST_COMPARE (stdc_trailing_ones (~0U), sizeof (int) * CHAR_BIT); ++ TEST_COMPARE (stdc_trailing_ones (~0UL), sizeof (long int) * CHAR_BIT); ++ TEST_COMPARE (stdc_trailing_ones (~0ULL), sizeof (long long int) * CHAR_BIT); ++ TEST_COMPARE (stdc_trailing_ones ((uc) 5), 1); ++ TEST_COMPARE (stdc_trailing_ones ((us) 15), 4); ++ TEST_COMPARE (stdc_trailing_ones (127U), 7); ++ TEST_COMPARE (stdc_trailing_ones (511UL), 9); ++ TEST_COMPARE (stdc_trailing_ones (~0ULL >> 2), ++ sizeof (long long int) * CHAR_BIT - 2); ++ TEST_COMPARE (stdc_first_leading_zero ((uc) 0), 1); ++ TEST_COMPARE (expr_has_type (stdc_first_leading_zero ((uc) 0), ui), 1); ++ TEST_COMPARE (stdc_first_leading_zero ((us) 0), 1); ++ TEST_COMPARE (expr_has_type (stdc_first_leading_zero ((us) 0), ui), 1); ++ TEST_COMPARE (stdc_first_leading_zero (0U), 1); ++ TEST_COMPARE (expr_has_type (stdc_first_leading_zero (0U), ui), 1); ++ TEST_COMPARE (stdc_first_leading_zero (0UL), 1); ++ TEST_COMPARE (expr_has_type (stdc_first_leading_zero (0UL), ui), 1); ++ TEST_COMPARE (stdc_first_leading_zero (0ULL), 1); ++ TEST_COMPARE (expr_has_type (stdc_first_leading_zero (0ULL), ui), 1); ++ TEST_COMPARE (stdc_first_leading_zero ((uc) ~0U), 0); ++ TEST_COMPARE (stdc_first_leading_zero ((us) ~0U), 0); ++ TEST_COMPARE (stdc_first_leading_zero (~0U), 0); ++ TEST_COMPARE (stdc_first_leading_zero (~0UL), 0); ++ TEST_COMPARE (stdc_first_leading_zero (~0ULL), 0); ++ TEST_COMPARE (stdc_first_leading_zero ((uc) ~3U), CHAR_BIT - 1); ++ TEST_COMPARE (stdc_first_leading_zero ((us) ~15U), ++ sizeof (short) * CHAR_BIT - 3); ++ TEST_COMPARE (stdc_first_leading_zero (~63U), sizeof (int) * CHAR_BIT - 5); ++ TEST_COMPARE (stdc_first_leading_zero (~255UL), ++ sizeof (long int) * CHAR_BIT - 7); ++ TEST_COMPARE (stdc_first_leading_zero (~1023ULL), ++ sizeof (long long int) * CHAR_BIT - 9); ++ TEST_COMPARE (stdc_first_leading_one ((uc) 0), 0); ++ TEST_COMPARE (expr_has_type (stdc_first_leading_one ((uc) 0), ui), 1); ++ TEST_COMPARE (stdc_first_leading_one ((us) 0), 0); ++ TEST_COMPARE (expr_has_type (stdc_first_leading_one ((us) 0), ui), 1); ++ TEST_COMPARE (stdc_first_leading_one (0U), 0); ++ TEST_COMPARE (expr_has_type (stdc_first_leading_one (0U), ui), 1); ++ TEST_COMPARE (stdc_first_leading_one (0UL), 0); ++ TEST_COMPARE (expr_has_type (stdc_first_leading_one (0UL), ui), 1); ++ TEST_COMPARE (stdc_first_leading_one (0ULL), 0); ++ TEST_COMPARE (expr_has_type (stdc_first_leading_one (0ULL), ui), 1); ++ TEST_COMPARE (stdc_first_leading_one ((uc) ~0U), 1); ++ TEST_COMPARE (stdc_first_leading_one ((us) ~0U), 1); ++ TEST_COMPARE (stdc_first_leading_one (~0U), 1); ++ TEST_COMPARE (stdc_first_leading_one (~0UL), 1); ++ TEST_COMPARE (stdc_first_leading_one (~0ULL), 1); ++ TEST_COMPARE (stdc_first_leading_one ((uc) 3), CHAR_BIT - 1); ++ TEST_COMPARE (stdc_first_leading_one ((us) 9), ++ sizeof (short) * CHAR_BIT - 3); ++ TEST_COMPARE (stdc_first_leading_one (34U), sizeof (int) * CHAR_BIT - 5); ++ TEST_COMPARE (stdc_first_leading_one (130UL), ++ sizeof (long int) * CHAR_BIT - 7); ++ TEST_COMPARE (stdc_first_leading_one (512ULL), ++ sizeof (long long int) * CHAR_BIT - 9); ++ TEST_COMPARE (stdc_first_trailing_zero ((uc) 0), 1); ++ TEST_COMPARE (expr_has_type (stdc_first_trailing_zero ((uc) 0), ui), 1); ++ TEST_COMPARE (stdc_first_trailing_zero ((us) 0), 1); ++ TEST_COMPARE (expr_has_type (stdc_first_trailing_zero ((us) 0), ui), 1); ++ TEST_COMPARE (stdc_first_trailing_zero (0U), 1); ++ TEST_COMPARE (expr_has_type (stdc_first_trailing_zero (0U), ui), 1); ++ TEST_COMPARE (stdc_first_trailing_zero (0UL), 1); ++ TEST_COMPARE (expr_has_type (stdc_first_trailing_zero (0UL), ui), 1); ++ TEST_COMPARE (stdc_first_trailing_zero (0ULL), 1); ++ TEST_COMPARE (expr_has_type (stdc_first_trailing_zero (0ULL), ui), 1); ++ TEST_COMPARE (stdc_first_trailing_zero ((uc) ~0U), 0); ++ TEST_COMPARE (stdc_first_trailing_zero ((us) ~0U), 0); ++ TEST_COMPARE (stdc_first_trailing_zero (~0U), 0); ++ TEST_COMPARE (stdc_first_trailing_zero (~0UL), 0); ++ TEST_COMPARE (stdc_first_trailing_zero (~0ULL), 0); ++ TEST_COMPARE (stdc_first_trailing_zero ((uc) 2), 1); ++ TEST_COMPARE (stdc_first_trailing_zero ((us) 15), 5); ++ TEST_COMPARE (stdc_first_trailing_zero (63U), 7); ++ TEST_COMPARE (stdc_first_trailing_zero (128UL), 1); ++ TEST_COMPARE (stdc_first_trailing_zero (511ULL), 10); ++ TEST_COMPARE (stdc_first_trailing_one ((uc) 0), 0); ++ TEST_COMPARE (expr_has_type (stdc_first_trailing_one ((uc) 0), ui), 1); ++ TEST_COMPARE (stdc_first_trailing_one ((us) 0), 0); ++ TEST_COMPARE (expr_has_type (stdc_first_trailing_one ((us) 0), ui), 1); ++ TEST_COMPARE (stdc_first_trailing_one (0U), 0); ++ TEST_COMPARE (expr_has_type (stdc_first_trailing_one (0U), ui), 1); ++ TEST_COMPARE (stdc_first_trailing_one (0UL), 0); ++ TEST_COMPARE (expr_has_type (stdc_first_trailing_one (0UL), ui), 1); ++ TEST_COMPARE (stdc_first_trailing_one (0ULL), 0); ++ TEST_COMPARE (expr_has_type (stdc_first_trailing_one (0ULL), ui), 1); ++ TEST_COMPARE (stdc_first_trailing_one ((uc) ~0U), 1); ++ TEST_COMPARE (stdc_first_trailing_one ((us) ~0U), 1); ++ TEST_COMPARE (stdc_first_trailing_one (~0U), 1); ++ TEST_COMPARE (stdc_first_trailing_one (~0UL), 1); ++ TEST_COMPARE (stdc_first_trailing_one (~0ULL), 1); ++ TEST_COMPARE (stdc_first_trailing_one ((uc) 4), 3); ++ TEST_COMPARE (stdc_first_trailing_one ((us) 96), 6); ++ TEST_COMPARE (stdc_first_trailing_one (127U), 1); ++ TEST_COMPARE (stdc_first_trailing_one (511UL), 1); ++ TEST_COMPARE (stdc_first_trailing_one (~0ULL << 12), 13); ++ TEST_COMPARE (stdc_count_zeros ((uc) 0), CHAR_BIT); ++ TEST_COMPARE (expr_has_type (stdc_count_zeros ((uc) 0), ui), 1); ++ TEST_COMPARE (stdc_count_zeros ((us) 0), sizeof (short) * CHAR_BIT); ++ TEST_COMPARE (expr_has_type (stdc_count_zeros ((us) 0), ui), 1); ++ TEST_COMPARE (stdc_count_zeros (0U), sizeof (int) * CHAR_BIT); ++ TEST_COMPARE (expr_has_type (stdc_count_zeros (0U), ui), 1); ++ TEST_COMPARE (stdc_count_zeros (0UL), sizeof (long int) * CHAR_BIT); ++ TEST_COMPARE (expr_has_type (stdc_count_zeros (0UL), ui), 1); ++ TEST_COMPARE (stdc_count_zeros (0ULL), sizeof (long long int) * CHAR_BIT); ++ TEST_COMPARE (expr_has_type (stdc_count_zeros (0ULL), ui), 1); ++ TEST_COMPARE (stdc_count_zeros ((uc) ~0U), 0); ++ TEST_COMPARE (stdc_count_zeros ((us) ~0U), 0); ++ TEST_COMPARE (stdc_count_zeros (~0U), 0); ++ TEST_COMPARE (stdc_count_zeros (~0UL), 0); ++ TEST_COMPARE (stdc_count_zeros (~0ULL), 0); ++ TEST_COMPARE (stdc_count_zeros ((uc) 1U), CHAR_BIT - 1); ++ TEST_COMPARE (stdc_count_zeros ((us) 42), sizeof (short) * CHAR_BIT - 3); ++ TEST_COMPARE (stdc_count_zeros (291U), sizeof (int) * CHAR_BIT - 4); ++ TEST_COMPARE (stdc_count_zeros (~1315UL), 5); ++ TEST_COMPARE (stdc_count_zeros (3363ULL), ++ sizeof (long long int) * CHAR_BIT - 6); ++ TEST_COMPARE (stdc_count_ones ((uc) 0), 0); ++ TEST_COMPARE (expr_has_type (stdc_count_ones ((uc) 0), ui), 1); ++ TEST_COMPARE (stdc_count_ones ((us) 0), 0); ++ TEST_COMPARE (expr_has_type (stdc_count_ones ((us) 0), ui), 1); ++ TEST_COMPARE (stdc_count_ones (0U), 0); ++ TEST_COMPARE (expr_has_type (stdc_count_ones (0U), ui), 1); ++ TEST_COMPARE (stdc_count_ones (0UL), 0); ++ TEST_COMPARE (expr_has_type (stdc_count_ones (0UL), ui), 1); ++ TEST_COMPARE (stdc_count_ones (0ULL), 0); ++ TEST_COMPARE (expr_has_type (stdc_count_ones (0ULL), ui), 1); ++ TEST_COMPARE (stdc_count_ones ((uc) ~0U), CHAR_BIT); ++ TEST_COMPARE (stdc_count_ones ((us) ~0U), sizeof (short) * CHAR_BIT); ++ TEST_COMPARE (stdc_count_ones (~0U), sizeof (int) * CHAR_BIT); ++ TEST_COMPARE (stdc_count_ones (~0UL), sizeof (long int) * CHAR_BIT); ++ TEST_COMPARE (stdc_count_ones (~0ULL), sizeof (long long int) * CHAR_BIT); ++ TEST_COMPARE (stdc_count_ones ((uc) ~1U), CHAR_BIT - 1); ++ TEST_COMPARE (stdc_count_ones ((us) ~42), sizeof (short) * CHAR_BIT - 3); ++ TEST_COMPARE (stdc_count_ones (~291U), sizeof (int) * CHAR_BIT - 4); ++ TEST_COMPARE (stdc_count_ones (1315UL), 5); ++ TEST_COMPARE (stdc_count_ones (~3363ULL), ++ sizeof (long long int) * CHAR_BIT - 6); ++ TEST_COMPARE (stdc_has_single_bit ((uc) 0), 0); ++ TEST_COMPARE (expr_has_type (stdc_has_single_bit ((uc) 0), _Bool), 1); ++ TEST_COMPARE (stdc_has_single_bit ((us) 0), 0); ++ TEST_COMPARE (expr_has_type (stdc_has_single_bit ((us) 0), _Bool), 1); ++ TEST_COMPARE (stdc_has_single_bit (0U), 0); ++ TEST_COMPARE (expr_has_type (stdc_has_single_bit (0U), _Bool), 1); ++ TEST_COMPARE (stdc_has_single_bit (0UL), 0); ++ TEST_COMPARE (expr_has_type (stdc_has_single_bit (0UL), _Bool), 1); ++ TEST_COMPARE (stdc_has_single_bit (0ULL), 0); ++ TEST_COMPARE (expr_has_type (stdc_has_single_bit (0ULL), _Bool), 1); ++ TEST_COMPARE (stdc_has_single_bit ((uc) 2), 1); ++ TEST_COMPARE (stdc_has_single_bit ((us) 8), 1); ++ TEST_COMPARE (stdc_has_single_bit (32U), 1); ++ TEST_COMPARE (stdc_has_single_bit (128UL), 1); ++ TEST_COMPARE (stdc_has_single_bit (512ULL), 1); ++ TEST_COMPARE (stdc_has_single_bit ((uc) 7), 0); ++ TEST_COMPARE (stdc_has_single_bit ((us) 96), 0); ++ TEST_COMPARE (stdc_has_single_bit (513U), 0); ++ TEST_COMPARE (stdc_has_single_bit (1022UL), 0); ++ TEST_COMPARE (stdc_has_single_bit (12ULL), 0); ++ TEST_COMPARE (stdc_bit_width ((uc) 0), 0); ++ TEST_COMPARE (expr_has_type (stdc_bit_width ((uc) 0), ui), 1); ++ TEST_COMPARE (stdc_bit_width ((us) 0), 0); ++ TEST_COMPARE (expr_has_type (stdc_bit_width ((us) 0), ui), 1); ++ TEST_COMPARE (stdc_bit_width (0U), 0); ++ TEST_COMPARE (expr_has_type (stdc_bit_width (0U), ui), 1); ++ TEST_COMPARE (stdc_bit_width (0UL), 0); ++ TEST_COMPARE (expr_has_type (stdc_bit_width (0UL), ui), 1); ++ TEST_COMPARE (stdc_bit_width (0ULL), 0); ++ TEST_COMPARE (expr_has_type (stdc_bit_width (0ULL), ui), 1); ++ TEST_COMPARE (stdc_bit_width ((uc) ~0U), CHAR_BIT); ++ TEST_COMPARE (stdc_bit_width ((us) ~0U), sizeof (short) * CHAR_BIT); ++ TEST_COMPARE (stdc_bit_width (~0U), sizeof (int) * CHAR_BIT); ++ TEST_COMPARE (stdc_bit_width (~0UL), sizeof (long int) * CHAR_BIT); ++ TEST_COMPARE (stdc_bit_width (~0ULL), sizeof (long long int) * CHAR_BIT); ++ TEST_COMPARE (stdc_bit_width ((uc) ((uc) ~0U >> 1)), CHAR_BIT - 1); ++ TEST_COMPARE (stdc_bit_width ((uc) 6), 3); ++ TEST_COMPARE (stdc_bit_width ((us) 12U), 4); ++ TEST_COMPARE (stdc_bit_width ((us) ((us) ~0U >> 5)), ++ sizeof (short) * CHAR_BIT - 5); ++ TEST_COMPARE (stdc_bit_width (137U), 8); ++ TEST_COMPARE (stdc_bit_width (269U), 9); ++ TEST_COMPARE (stdc_bit_width (39UL), 6); ++ TEST_COMPARE (stdc_bit_width (~0UL >> 2), sizeof (long int) * CHAR_BIT - 2); ++ TEST_COMPARE (stdc_bit_width (1023ULL), 10); ++ TEST_COMPARE (stdc_bit_width (1024ULL), 11); ++ TEST_COMPARE (stdc_bit_floor ((uc) 0), 0); ++ TEST_COMPARE (expr_has_type (stdc_bit_floor ((uc) 0), uc), 1); ++ TEST_COMPARE (stdc_bit_floor ((us) 0), 0); ++ TEST_COMPARE (expr_has_type (stdc_bit_floor ((us) 0), us), 1); ++ TEST_COMPARE (stdc_bit_floor (0U), 0U); ++ TEST_COMPARE (expr_has_type (stdc_bit_floor (0U), ui), 1); ++ TEST_COMPARE (stdc_bit_floor (0UL), 0UL); ++ TEST_COMPARE (expr_has_type (stdc_bit_floor (0UL), ul), 1); ++ TEST_COMPARE (stdc_bit_floor (0ULL), 0ULL); ++ TEST_COMPARE (expr_has_type (stdc_bit_floor (0ULL), ull), 1); ++ TEST_COMPARE (stdc_bit_floor ((uc) ~0U), (1U << (CHAR_BIT - 1))); ++ TEST_COMPARE (stdc_bit_floor ((us) ~0U), ++ (1U << (sizeof (short) * CHAR_BIT - 1))); ++ TEST_COMPARE (stdc_bit_floor (~0U), (1U << (sizeof (int) * CHAR_BIT - 1))); ++ TEST_COMPARE (stdc_bit_floor (~0UL), ++ (1UL << (sizeof (long int) * CHAR_BIT - 1))); ++ TEST_COMPARE (stdc_bit_floor (~0ULL), ++ (1ULL << (sizeof (long long int) * CHAR_BIT - 1))); ++ TEST_COMPARE (stdc_bit_floor ((uc) 4), 4); ++ TEST_COMPARE (stdc_bit_floor ((uc) 7), 4); ++ TEST_COMPARE (stdc_bit_floor ((us) 8U), 8); ++ TEST_COMPARE (stdc_bit_floor ((us) 31U), 16); ++ TEST_COMPARE (stdc_bit_floor (137U), 128U); ++ TEST_COMPARE (stdc_bit_floor (269U), 256U); ++ TEST_COMPARE (stdc_bit_floor (511UL), 256UL); ++ TEST_COMPARE (stdc_bit_floor (512UL), 512UL); ++ TEST_COMPARE (stdc_bit_floor (513UL), 512ULL); ++ TEST_COMPARE (stdc_bit_floor (1024ULL), 1024ULL); ++ TEST_COMPARE (stdc_bit_ceil ((uc) 0), 1); ++ TEST_COMPARE (expr_has_type (stdc_bit_ceil ((uc) 0), uc), 1); ++ TEST_COMPARE (stdc_bit_ceil ((us) 0), 1); ++ TEST_COMPARE (expr_has_type (stdc_bit_ceil ((us) 0), us), 1); ++ TEST_COMPARE (stdc_bit_ceil (0U), 1U); ++ TEST_COMPARE (expr_has_type (stdc_bit_ceil (0U), ui), 1); ++ TEST_COMPARE (stdc_bit_ceil (0UL), 1UL); ++ TEST_COMPARE (expr_has_type (stdc_bit_ceil (0UL), ul), 1); ++ TEST_COMPARE (stdc_bit_ceil (0ULL), 1ULL); ++ TEST_COMPARE (expr_has_type (stdc_bit_ceil (0ULL), ull), 1); ++ TEST_COMPARE (stdc_bit_ceil ((uc) ~0U), 0); ++ TEST_COMPARE (stdc_bit_ceil ((us) ~0U), 0); ++ TEST_COMPARE (stdc_bit_ceil (~0U), 0U); ++ TEST_COMPARE (stdc_bit_ceil (~0UL), 0UL); ++ TEST_COMPARE (stdc_bit_ceil (~0ULL), 0ULL); ++ TEST_COMPARE (stdc_bit_ceil ((uc) ((uc) ~0U >> 1)), (1U << (CHAR_BIT - 1))); ++ TEST_COMPARE (stdc_bit_ceil ((uc) ((uc) ~0U >> 1)), (1U << (CHAR_BIT - 1))); ++ TEST_COMPARE (stdc_bit_ceil ((us) ((us) ~0U >> 1)), ++ (1U << (sizeof (short) * CHAR_BIT - 1))); ++ TEST_COMPARE (stdc_bit_ceil ((us) ((us) ~0U >> 1)), ++ (1U << (sizeof (short) * CHAR_BIT - 1))); ++ TEST_COMPARE (stdc_bit_ceil (~0U >> 1), ++ (1U << (sizeof (int) * CHAR_BIT - 1))); ++ TEST_COMPARE (stdc_bit_ceil (1U << (sizeof (int) * CHAR_BIT - 1)), ++ (1U << (sizeof (int) * CHAR_BIT - 1))); ++ TEST_COMPARE (stdc_bit_ceil (~0UL >> 1), ++ (1UL << (sizeof (long int) * CHAR_BIT - 1))); ++ TEST_COMPARE (stdc_bit_ceil (~0UL >> 1), ++ (1UL << (sizeof (long int) * CHAR_BIT - 1))); ++ TEST_COMPARE (stdc_bit_ceil (1ULL ++ << (sizeof (long long int) * CHAR_BIT - 1)), ++ (1ULL << (sizeof (long long int) * CHAR_BIT - 1))); ++ TEST_COMPARE (stdc_bit_ceil (~0ULL >> 1), ++ (1ULL << (sizeof (long long int) * CHAR_BIT - 1))); ++ TEST_COMPARE (stdc_bit_ceil ((uc) 1), 1); ++ TEST_COMPARE (stdc_bit_ceil ((uc) 2), 2); ++ TEST_COMPARE (stdc_bit_ceil ((us) 3U), 4); ++ TEST_COMPARE (stdc_bit_ceil ((us) 4U), 4); ++ TEST_COMPARE (stdc_bit_ceil (5U), 8U); ++ TEST_COMPARE (stdc_bit_ceil (269U), 512U); ++ TEST_COMPARE (stdc_bit_ceil (511UL), 512UL); ++ TEST_COMPARE (stdc_bit_ceil (512UL), 512UL); ++ TEST_COMPARE (stdc_bit_ceil (513ULL), 1024ULL); ++ TEST_COMPARE (stdc_bit_ceil (1025ULL), 2048ULL); ++# ifdef __SIZEOF_INT128__ ++ TEST_COMPARE (stdc_leading_zeros ((unsigned __int128) 0), ++ sizeof (__int128) * CHAR_BIT); ++ TEST_COMPARE (expr_has_type (stdc_leading_zeros ((unsigned __int128) 0), ui), ++ 1); ++ TEST_COMPARE (stdc_leading_zeros (~(unsigned __int128) 0), 0); ++ TEST_COMPARE (stdc_leading_ones ((unsigned __int128) 0), 0); ++ TEST_COMPARE (expr_has_type (stdc_leading_ones ((unsigned __int128) 0), ui), ++ 1); ++ TEST_COMPARE (stdc_leading_ones (~(unsigned __int128) 0), ++ sizeof (__int128) * CHAR_BIT); ++ TEST_COMPARE (stdc_trailing_zeros ((unsigned __int128) 0), ++ sizeof (__int128) * CHAR_BIT); ++ TEST_COMPARE (expr_has_type (stdc_trailing_zeros ((unsigned __int128) 0), ++ ui), 1); ++ TEST_COMPARE (stdc_trailing_zeros (~(unsigned __int128) 0), 0); ++ TEST_COMPARE (stdc_trailing_ones ((unsigned __int128) 0), 0); ++ TEST_COMPARE (expr_has_type (stdc_trailing_ones ((unsigned __int128) 0), ui), ++ 1); ++ TEST_COMPARE (stdc_trailing_ones (~(unsigned __int128) 0), ++ sizeof (__int128) * CHAR_BIT); ++ TEST_COMPARE (stdc_first_leading_zero ((unsigned __int128) 0), 1); ++ TEST_COMPARE (expr_has_type (stdc_first_leading_zero ((unsigned __int128) 0), ++ ui), 1); ++ TEST_COMPARE (stdc_first_leading_zero (~(unsigned __int128) 0), 0); ++ TEST_COMPARE (stdc_first_leading_one ((unsigned __int128) 0), 0); ++ TEST_COMPARE (expr_has_type (stdc_first_leading_one ((unsigned __int128) 0), ++ ui), 1); ++ TEST_COMPARE (stdc_first_leading_one (~(unsigned __int128) 0), 1); ++ TEST_COMPARE (stdc_first_trailing_zero ((unsigned __int128) 0), 1); ++ TEST_COMPARE (expr_has_type (stdc_first_trailing_zero ((unsigned __int128) ++ 0), ui), 1); ++ TEST_COMPARE (stdc_first_trailing_zero (~(unsigned __int128) 0), 0); ++ TEST_COMPARE (stdc_first_trailing_one ((unsigned __int128) 0), 0); ++ TEST_COMPARE (expr_has_type (stdc_first_trailing_one ((unsigned __int128) 0), ++ ui), 1); ++ TEST_COMPARE (stdc_first_trailing_one (~(unsigned __int128) 0), 1); ++ TEST_COMPARE (stdc_count_zeros ((unsigned __int128) 0), ++ sizeof (__int128) * CHAR_BIT); ++ TEST_COMPARE (expr_has_type (stdc_count_zeros ((unsigned __int128) 0), ui), ++ 1); ++ TEST_COMPARE (stdc_count_zeros (~(unsigned __int128) 0), 0); ++ TEST_COMPARE (stdc_count_ones ((unsigned __int128) 0), 0); ++ TEST_COMPARE (expr_has_type (stdc_count_ones ((unsigned __int128) 0), ui), ++ 1); ++ TEST_COMPARE (stdc_count_ones (~(unsigned __int128) 0), ++ sizeof (__int128) * CHAR_BIT); ++ TEST_COMPARE (stdc_has_single_bit ((unsigned __int128) 0), 0); ++ TEST_COMPARE (expr_has_type (stdc_has_single_bit ((unsigned __int128) 0), ++ _Bool), 1); ++ TEST_COMPARE (stdc_has_single_bit (~(unsigned __int128) 0), 0); ++ TEST_COMPARE (stdc_bit_width ((unsigned __int128) 0), 0); ++ TEST_COMPARE (expr_has_type (stdc_bit_width ((unsigned __int128) 0), ui), 1); ++ TEST_COMPARE (stdc_bit_width (~(unsigned __int128) 0), ++ sizeof (__int128) * CHAR_BIT); ++ TEST_COMPARE (stdc_bit_floor ((unsigned __int128) 0) != 0, 0); ++ TEST_COMPARE (expr_has_type (stdc_bit_floor ((unsigned __int128) 0), ++ unsigned __int128), 1); ++ TEST_COMPARE (stdc_bit_floor (~(unsigned __int128) 0) ++ != ((unsigned __int128) 1) << (sizeof (__int128) ++ * CHAR_BIT - 1), 0); ++ TEST_COMPARE (stdc_bit_ceil ((unsigned __int128) 0) != 1, 0); ++ TEST_COMPARE (expr_has_type (stdc_bit_ceil ((unsigned __int128) 0), ++ unsigned __int128), 1); ++ TEST_COMPARE (stdc_bit_ceil ((unsigned __int128) 1) != 1, 0); ++ TEST_COMPARE (stdc_bit_ceil ((~(unsigned __int128) 0) >> 1) ++ != ((unsigned __int128) 1) << (sizeof (__int128) ++ * CHAR_BIT - 1), 0); ++ TEST_COMPARE (stdc_bit_ceil (~(unsigned __int128) 0) != 0, 0); ++# endif ++ uc a = 0; ++ TEST_COMPARE (stdc_bit_width (a++), 0); ++ TEST_COMPARE (a, 1); ++ ull b = 0; ++ TEST_COMPARE (stdc_bit_width (b++), 0); ++ TEST_COMPARE (b, 1); ++ TEST_COMPARE (stdc_bit_floor (a++), 1); ++ TEST_COMPARE (a, 2); ++ TEST_COMPARE (stdc_bit_floor (b++), 1); ++ TEST_COMPARE (b, 2); ++ TEST_COMPARE (stdc_bit_ceil (a++), 2); ++ TEST_COMPARE (a, 3); ++ TEST_COMPARE (stdc_bit_ceil (b++), 2); ++ TEST_COMPARE (b, 3); ++ TEST_COMPARE (stdc_leading_zeros (a++), CHAR_BIT - 2); ++ TEST_COMPARE (a, 4); ++ TEST_COMPARE (stdc_leading_zeros (b++), ++ sizeof (long long int) * CHAR_BIT - 2); ++ TEST_COMPARE (b, 4); ++ TEST_COMPARE (stdc_leading_ones (a++), 0); ++ TEST_COMPARE (a, 5); ++ TEST_COMPARE (stdc_leading_ones (b++), 0); ++ TEST_COMPARE (b, 5); ++ TEST_COMPARE (stdc_trailing_zeros (a++), 0); ++ TEST_COMPARE (a, 6); ++ TEST_COMPARE (stdc_trailing_zeros (b++), 0); ++ TEST_COMPARE (b, 6); ++ TEST_COMPARE (stdc_trailing_ones (a++), 0); ++ TEST_COMPARE (a, 7); ++ TEST_COMPARE (stdc_trailing_ones (b++), 0); ++ TEST_COMPARE (b, 7); ++ TEST_COMPARE (stdc_first_leading_zero (a++), 1); ++ TEST_COMPARE (a, 8); ++ TEST_COMPARE (stdc_first_leading_zero (b++), 1); ++ TEST_COMPARE (b, 8); ++ TEST_COMPARE (stdc_first_leading_one (a++), CHAR_BIT - 3); ++ TEST_COMPARE (a, 9); ++ TEST_COMPARE (stdc_first_leading_one (b++), ++ sizeof (long long int) * CHAR_BIT - 3); ++ TEST_COMPARE (b, 9); ++ TEST_COMPARE (stdc_first_trailing_zero (a++), 2); ++ TEST_COMPARE (a, 10); ++ TEST_COMPARE (stdc_first_trailing_zero (b++), 2); ++ TEST_COMPARE (b, 10); ++ TEST_COMPARE (stdc_first_trailing_one (a++), 2); ++ TEST_COMPARE (a, 11); ++ TEST_COMPARE (stdc_first_trailing_one (b++), 2); ++ TEST_COMPARE (b, 11); ++ TEST_COMPARE (stdc_count_zeros (a++), CHAR_BIT - 3); ++ TEST_COMPARE (a, 12); ++ TEST_COMPARE (stdc_count_zeros (b++), ++ sizeof (long long int) * CHAR_BIT - 3); ++ TEST_COMPARE (b, 12); ++ TEST_COMPARE (stdc_count_ones (a++), 2); ++ TEST_COMPARE (a, 13); ++ TEST_COMPARE (stdc_count_ones (b++), 2); ++ TEST_COMPARE (b, 13); ++ TEST_COMPARE (stdc_has_single_bit (a++), 0); ++ TEST_COMPARE (a, 14); ++ TEST_COMPARE (stdc_has_single_bit (b++), 0); ++ TEST_COMPARE (b, 14); ++# ifdef BITINT_MAXWIDTH ++# if BITINT_MAXWIDTH >= 64 ++ TEST_COMPARE (stdc_leading_zeros (0uwb), 1); ++ TEST_COMPARE (expr_has_type (stdc_leading_zeros (0uwb), ui), 1); ++ TEST_COMPARE (stdc_leading_zeros (1uwb), 0); ++ TEST_COMPARE (expr_has_type (stdc_leading_zeros (1uwb), ui), 1); ++ TEST_COMPARE (stdc_leading_ones (0uwb), 0); ++ TEST_COMPARE (expr_has_type (stdc_leading_ones (0uwb), ui), 1); ++ TEST_COMPARE (stdc_leading_ones (1uwb), 1); ++ TEST_COMPARE (expr_has_type (stdc_leading_ones (1uwb), ui), 1); ++ TEST_COMPARE (stdc_trailing_zeros (0uwb), 1); ++ TEST_COMPARE (expr_has_type (stdc_trailing_zeros (0uwb), ui), 1); ++ TEST_COMPARE (stdc_trailing_zeros (1uwb), 0); ++ TEST_COMPARE (expr_has_type (stdc_trailing_zeros (1uwb), ui), 1); ++ TEST_COMPARE (stdc_trailing_ones (0uwb), 0); ++ TEST_COMPARE (expr_has_type (stdc_trailing_ones (0uwb), ui), 1); ++ TEST_COMPARE (stdc_trailing_ones (1uwb), 1); ++ TEST_COMPARE (expr_has_type (stdc_trailing_ones (1uwb), ui), 1); ++ TEST_COMPARE (stdc_first_leading_zero (0uwb), 1); ++ TEST_COMPARE (expr_has_type (stdc_first_leading_zero (0uwb), ui), 1); ++ TEST_COMPARE (stdc_first_leading_zero (1uwb), 0); ++ TEST_COMPARE (expr_has_type (stdc_first_leading_zero (1uwb), ui), 1); ++ TEST_COMPARE (stdc_first_leading_one (0uwb), 0); ++ TEST_COMPARE (expr_has_type (stdc_first_leading_one (0uwb), ui), 1); ++ TEST_COMPARE (stdc_first_leading_one (1uwb), 1); ++ TEST_COMPARE (expr_has_type (stdc_first_leading_one (1uwb), ui), 1); ++ TEST_COMPARE (stdc_first_trailing_zero (0uwb), 1); ++ TEST_COMPARE (expr_has_type (stdc_first_trailing_zero (0uwb), ui), 1); ++ TEST_COMPARE (stdc_first_trailing_zero (1uwb), 0); ++ TEST_COMPARE (expr_has_type (stdc_first_trailing_zero (1uwb), ui), 1); ++ TEST_COMPARE (stdc_first_trailing_one (0uwb), 0); ++ TEST_COMPARE (expr_has_type (stdc_first_trailing_one (0uwb), ui), 1); ++ TEST_COMPARE (stdc_first_trailing_one (1uwb), 1); ++ TEST_COMPARE (expr_has_type (stdc_first_trailing_one (1uwb), ui), 1); ++ TEST_COMPARE (stdc_count_zeros (0uwb), 1); ++ TEST_COMPARE (expr_has_type (stdc_count_zeros (0uwb), ui), 1); ++ TEST_COMPARE (stdc_count_zeros (1uwb), 0); ++ TEST_COMPARE (expr_has_type (stdc_count_zeros (1uwb), ui), 1); ++ TEST_COMPARE (stdc_count_ones (0uwb), 0); ++ TEST_COMPARE (expr_has_type (stdc_count_ones (0uwb), ui), 1); ++ TEST_COMPARE (stdc_count_ones (1uwb), 1); ++ TEST_COMPARE (expr_has_type (stdc_count_ones (1uwb), ui), 1); ++ TEST_COMPARE (stdc_has_single_bit (0uwb), 0); ++ TEST_COMPARE (expr_has_type (stdc_has_single_bit (0uwb), _Bool), 1); ++ TEST_COMPARE (stdc_has_single_bit (1uwb), 1); ++ TEST_COMPARE (expr_has_type (stdc_has_single_bit (1uwb), _Bool), 1); ++ TEST_COMPARE (stdc_bit_width (0uwb), 0); ++ TEST_COMPARE (expr_has_type (stdc_bit_width (0uwb), ui), 1); ++ TEST_COMPARE (stdc_bit_width (1uwb), 1); ++ TEST_COMPARE (expr_has_type (stdc_bit_width (1uwb), ui), 1); ++ TEST_COMPARE (stdc_bit_floor (0uwb), 0); ++ TEST_COMPARE (expr_has_type (stdc_bit_floor (0uwb), unsigned _BitInt(1)), 1); ++ TEST_COMPARE (stdc_bit_floor (1uwb), 1); ++ TEST_COMPARE (expr_has_type (stdc_bit_floor (1uwb), unsigned _BitInt(1)), 1); ++ TEST_COMPARE (stdc_bit_ceil (0uwb), 1); ++ TEST_COMPARE (expr_has_type (stdc_bit_ceil (0uwb), unsigned _BitInt(1)), 1); ++ TEST_COMPARE (stdc_bit_ceil (1uwb), 1); ++ TEST_COMPARE (expr_has_type (stdc_bit_ceil (1uwb), unsigned _BitInt(1)), 1); ++ unsigned _BitInt(1) c = 0; ++ TEST_COMPARE (stdc_bit_floor (c++), 0); ++ TEST_COMPARE (c, 1); ++ TEST_COMPARE (stdc_bit_floor (c++), 1); ++ TEST_COMPARE (c, 0); ++ TEST_COMPARE (stdc_bit_ceil (c++), 1); ++ TEST_COMPARE (c, 1); ++ TEST_COMPARE (stdc_bit_ceil (c++), 1); ++ TEST_COMPARE (c, 0); ++# endif ++# if BITINT_MAXWIDTH >= 512 ++ TEST_COMPARE (stdc_leading_zeros ((unsigned _BitInt(512)) 0), 512); ++ TEST_COMPARE (expr_has_type (stdc_leading_zeros ((unsigned _BitInt(512)) 0), ++ ui), 1); ++ TEST_COMPARE (stdc_leading_zeros ((unsigned _BitInt(373)) 0), 373); ++ TEST_COMPARE (expr_has_type (stdc_leading_zeros ((unsigned _BitInt(373)) 0), ++ ui), 1); ++ TEST_COMPARE (stdc_leading_zeros (~(unsigned _BitInt(512)) 0), 0); ++ TEST_COMPARE (stdc_leading_zeros (~(unsigned _BitInt(373)) 0), 0); ++ TEST_COMPARE (stdc_leading_zeros ((unsigned _BitInt(512)) 275), 512 - 9); ++ TEST_COMPARE (stdc_leading_zeros ((unsigned _BitInt(373)) 512), 373 - 10); ++ TEST_COMPARE (stdc_leading_ones ((unsigned _BitInt(512)) 0), 0); ++ TEST_COMPARE (expr_has_type (stdc_leading_ones ((unsigned _BitInt(512)) 0), ++ ui), 1); ++ TEST_COMPARE (stdc_leading_ones ((unsigned _BitInt(373)) 0), 0); ++ TEST_COMPARE (expr_has_type (stdc_leading_ones ((unsigned _BitInt(373)) 0), ++ ui), 1); ++ TEST_COMPARE (stdc_leading_ones (~(unsigned _BitInt(512)) 0), 512); ++ TEST_COMPARE (stdc_leading_ones (~(unsigned _BitInt(373)) 0), 373); ++ TEST_COMPARE (stdc_leading_ones (~(unsigned _BitInt(512)) 275), 512 - 9); ++ TEST_COMPARE (stdc_leading_ones (~(unsigned _BitInt(373)) 512), 373 - 10); ++ TEST_COMPARE (stdc_trailing_zeros ((unsigned _BitInt(512)) 0), 512); ++ TEST_COMPARE (expr_has_type (stdc_trailing_zeros ((unsigned _BitInt(512)) 0), ++ ui), 1); ++ TEST_COMPARE (stdc_trailing_zeros ((unsigned _BitInt(373)) 0), 373); ++ TEST_COMPARE (expr_has_type (stdc_trailing_zeros ((unsigned _BitInt(373)) 0), ++ ui), 1); ++ TEST_COMPARE (stdc_trailing_zeros (~(unsigned _BitInt(512)) 0), 0); ++ TEST_COMPARE (stdc_trailing_zeros (~(unsigned _BitInt(373)) 0), 0); ++ TEST_COMPARE (stdc_trailing_zeros ((unsigned _BitInt(512)) 256), 8); ++ TEST_COMPARE (stdc_trailing_zeros ((unsigned _BitInt(373)) 512), 9); ++ TEST_COMPARE (stdc_trailing_ones ((unsigned _BitInt(512)) 0), 0); ++ TEST_COMPARE (expr_has_type (stdc_trailing_ones ((unsigned _BitInt(512)) 0), ++ ui), 1); ++ TEST_COMPARE (stdc_trailing_ones ((unsigned _BitInt(373)) 0), 0); ++ TEST_COMPARE (expr_has_type (stdc_trailing_ones ((unsigned _BitInt(373)) 0), ++ ui), 1); ++ TEST_COMPARE (stdc_trailing_ones (~(unsigned _BitInt(512)) 0), 512); ++ TEST_COMPARE (stdc_trailing_ones (~(unsigned _BitInt(373)) 0), 373); ++ TEST_COMPARE (stdc_trailing_ones ((unsigned _BitInt(512)) 255), 8); ++ TEST_COMPARE (stdc_trailing_ones ((~(unsigned _BitInt(373)) 0) >> 2), ++ 373 - 2); ++ TEST_COMPARE (stdc_first_leading_zero ((unsigned _BitInt(512)) 0), 1); ++ TEST_COMPARE (expr_has_type (stdc_first_leading_zero ((unsigned _BitInt(512)) ++ 0), ui), 1); ++ TEST_COMPARE (stdc_first_leading_zero ((unsigned _BitInt(373)) 0), 1); ++ TEST_COMPARE (expr_has_type (stdc_first_leading_zero ((unsigned _BitInt(373)) ++ 0), ui), 1); ++ TEST_COMPARE (stdc_first_leading_zero (~(unsigned _BitInt(512)) 0), 0); ++ TEST_COMPARE (stdc_first_leading_zero (~(unsigned _BitInt(373)) 0), 0); ++ TEST_COMPARE (stdc_first_leading_zero (~(unsigned _BitInt(512)) 511), ++ 512 - 8); ++ TEST_COMPARE (stdc_first_leading_zero (~(unsigned _BitInt(373)) 1023), ++ 373 - 9); ++ TEST_COMPARE (stdc_first_leading_one ((unsigned _BitInt(512)) 0), 0); ++ TEST_COMPARE (expr_has_type (stdc_first_leading_one ((unsigned _BitInt(512)) ++ 0), ui), 1); ++ TEST_COMPARE (stdc_first_leading_one ((unsigned _BitInt(373)) 0), 0); ++ TEST_COMPARE (expr_has_type (stdc_first_leading_one ((unsigned _BitInt(373)) ++ 0), ui), 1); ++ TEST_COMPARE (stdc_first_leading_one (~(unsigned _BitInt(512)) 0), 1); ++ TEST_COMPARE (stdc_first_leading_one (~(unsigned _BitInt(373)) 0), 1); ++ TEST_COMPARE (stdc_first_leading_one ((unsigned _BitInt(512)) 275), 512 - 8); ++ TEST_COMPARE (stdc_first_leading_one ((unsigned _BitInt(373)) 512), 373 - 9); ++ TEST_COMPARE (stdc_first_trailing_zero ((unsigned _BitInt(512)) 0), 1); ++ TEST_COMPARE (expr_has_type (stdc_first_trailing_zero ((unsigned ++ _BitInt(512)) 0), ++ ui), 1); ++ TEST_COMPARE (stdc_first_trailing_zero ((unsigned _BitInt(373)) 0), 1); ++ TEST_COMPARE (expr_has_type (stdc_first_trailing_zero ((unsigned ++ _BitInt(373)) 0), ++ ui), 1); ++ TEST_COMPARE (stdc_first_trailing_zero (~(unsigned _BitInt(512)) 0), 0); ++ TEST_COMPARE (stdc_first_trailing_zero (~(unsigned _BitInt(373)) 0), 0); ++ TEST_COMPARE (stdc_first_trailing_zero ((unsigned _BitInt(512)) 255), 9); ++ TEST_COMPARE (stdc_first_trailing_zero ((unsigned _BitInt(373)) 511), 10); ++ TEST_COMPARE (stdc_first_trailing_one ((unsigned _BitInt(512)) 0), 0); ++ TEST_COMPARE (expr_has_type (stdc_first_trailing_one ((unsigned _BitInt(512)) ++ 0), ui), 1); ++ TEST_COMPARE (stdc_first_trailing_one ((unsigned _BitInt(373)) 0), 0); ++ TEST_COMPARE (expr_has_type (stdc_first_trailing_one ((unsigned _BitInt(373)) ++ 0), ui), 1); ++ TEST_COMPARE (stdc_first_trailing_one (~(unsigned _BitInt(512)) 0), 1); ++ TEST_COMPARE (stdc_first_trailing_one (~(unsigned _BitInt(373)) 0), 1); ++ TEST_COMPARE (stdc_first_trailing_one (((unsigned _BitInt(512)) 255) << 175), ++ 176); ++ TEST_COMPARE (stdc_first_trailing_one ((~(unsigned _BitInt(373)) 0) << 311), ++ 312); ++ TEST_COMPARE (stdc_count_zeros ((unsigned _BitInt(512)) 0), 512); ++ TEST_COMPARE (expr_has_type (stdc_count_zeros ((unsigned _BitInt(512)) 0), ++ ui), 1); ++ TEST_COMPARE (stdc_count_zeros ((unsigned _BitInt(373)) 0), 373); ++ TEST_COMPARE (expr_has_type (stdc_count_zeros ((unsigned _BitInt(373)) 0), ++ ui), 1); ++ TEST_COMPARE (stdc_count_zeros (~(unsigned _BitInt(512)) 0), 0); ++ TEST_COMPARE (stdc_count_zeros (~(unsigned _BitInt(373)) 0), 0); ++ TEST_COMPARE (stdc_count_zeros ((unsigned _BitInt(512)) 1315), 512 - 5); ++ TEST_COMPARE (stdc_count_zeros ((unsigned _BitInt(373)) 3363), 373 - 6); ++ TEST_COMPARE (stdc_count_ones ((unsigned _BitInt(512)) 0), 0); ++ TEST_COMPARE (expr_has_type (stdc_count_ones ((unsigned _BitInt(512)) 0), ++ ui), 1); ++ TEST_COMPARE (stdc_count_ones ((unsigned _BitInt(373)) 0), 0); ++ TEST_COMPARE (expr_has_type (stdc_count_ones ((unsigned _BitInt(373)) 0), ++ ui), 1); ++ TEST_COMPARE (stdc_count_ones (~(unsigned _BitInt(512)) 0), 512); ++ TEST_COMPARE (stdc_count_ones (~(unsigned _BitInt(373)) 0), 373); ++ TEST_COMPARE (stdc_count_ones (~(unsigned _BitInt(512)) 1315), 512 - 5); ++ TEST_COMPARE (stdc_count_ones (~(unsigned _BitInt(373)) 3363), 373 - 6); ++ TEST_COMPARE (stdc_has_single_bit ((unsigned _BitInt(512)) 0), 0); ++ TEST_COMPARE (expr_has_type (stdc_has_single_bit ((unsigned _BitInt(512)) 0), ++ _Bool), 1); ++ TEST_COMPARE (stdc_has_single_bit ((unsigned _BitInt(373)) 0), 0); ++ TEST_COMPARE (expr_has_type (stdc_has_single_bit ((unsigned _BitInt(373)) 0), ++ _Bool), 1); ++ TEST_COMPARE (stdc_has_single_bit (~(unsigned _BitInt(512)) 0), 0); ++ TEST_COMPARE (stdc_has_single_bit (~(unsigned _BitInt(373)) 0), 0); ++ TEST_COMPARE (stdc_has_single_bit (((unsigned _BitInt(512)) 1022) << 279), ++ 0); ++ TEST_COMPARE (stdc_has_single_bit (((unsigned _BitInt(373)) 12) << 305), 0); ++ TEST_COMPARE (stdc_bit_width ((unsigned _BitInt(512)) 0), 0); ++ TEST_COMPARE (expr_has_type (stdc_bit_width ((unsigned _BitInt(512)) 0), ++ ui), 1); ++ TEST_COMPARE (stdc_bit_width ((unsigned _BitInt(373)) 0), 0); ++ TEST_COMPARE (expr_has_type (stdc_bit_width ((unsigned _BitInt(373)) 0), ++ ui), 1); ++ TEST_COMPARE (stdc_bit_width (~(unsigned _BitInt(512)) 0), 512); ++ TEST_COMPARE (stdc_bit_width (~(unsigned _BitInt(373)) 0), 373); ++ TEST_COMPARE (stdc_bit_width (((unsigned _BitInt(512)) 1023) << 405), ++ 405 + 10); ++ TEST_COMPARE (stdc_bit_width (((unsigned _BitInt(373)) 1024) << 242), ++ 242 + 11); ++ TEST_COMPARE (stdc_bit_floor ((unsigned _BitInt(512)) 0) != 0, 0); ++ TEST_COMPARE (expr_has_type (stdc_bit_floor ((unsigned _BitInt(512)) 0), ++ unsigned _BitInt(512)), 1); ++ TEST_COMPARE (stdc_bit_floor ((unsigned _BitInt(373)) 0) != 0, 0); ++ TEST_COMPARE (expr_has_type (stdc_bit_floor ((unsigned _BitInt(373)) 0), ++ unsigned _BitInt(373)), 1); ++ TEST_COMPARE (stdc_bit_floor (~(unsigned _BitInt(512)) 0) ++ != ((unsigned _BitInt(512)) 1) << (512 - 1), 0); ++ TEST_COMPARE (stdc_bit_floor (~(unsigned _BitInt(373)) 0) ++ != ((unsigned _BitInt(373)) 1) << (373 - 1), 0); ++ TEST_COMPARE (stdc_bit_floor (((unsigned _BitInt(512)) 511) << 405) ++ != (((unsigned _BitInt(512)) 256) << 405), 0); ++ TEST_COMPARE (stdc_bit_floor (((unsigned _BitInt(373)) 512) << 242) ++ != (((unsigned _BitInt(512)) 512) << 242), 0); ++ TEST_COMPARE (stdc_bit_ceil ((unsigned _BitInt(512)) 0) != 1, 0); ++ TEST_COMPARE (expr_has_type (stdc_bit_ceil ((unsigned _BitInt(512)) 0), ++ unsigned _BitInt(512)), 1); ++ TEST_COMPARE (stdc_bit_ceil ((unsigned _BitInt(373)) 0) != 1, 0); ++ TEST_COMPARE (expr_has_type (stdc_bit_ceil ((unsigned _BitInt(373)) 0), ++ unsigned _BitInt(373)), 1); ++ TEST_COMPARE (stdc_bit_ceil (~(unsigned _BitInt(512)) 0) != 0, 0); ++ TEST_COMPARE (stdc_bit_ceil (~(unsigned _BitInt(373)) 0) != 0, 0); ++ TEST_COMPARE (stdc_bit_ceil (((unsigned _BitInt(512)) 1) << (512 - 1)) ++ != ((unsigned _BitInt(512)) 1) << (512 - 1), 0); ++ TEST_COMPARE (stdc_bit_ceil ((~(unsigned _BitInt(373)) 0) >> 1) ++ != ((unsigned _BitInt(373)) 1) << (373 - 1), 0); ++ TEST_COMPARE (stdc_bit_ceil (((unsigned _BitInt(512)) 512) << 405) ++ != (((unsigned _BitInt(512)) 512) << 405), 0); ++ TEST_COMPARE (stdc_bit_ceil (((unsigned _BitInt(373)) 513) << 242) ++ != (((unsigned _BitInt(512)) 1024) << 242), 0); ++ TEST_COMPARE (stdc_bit_floor ((unsigned _BitInt(BITINT_MAXWIDTH)) 0) != 0, ++ 0); ++ TEST_COMPARE (stdc_bit_floor (~(unsigned _BitInt(BITINT_MAXWIDTH)) 0) ++ != ((unsigned _BitInt(BITINT_MAXWIDTH)) 1) << (BITINT_MAXWIDTH ++ - 1), 0); ++ TEST_COMPARE (stdc_bit_floor (((unsigned _BitInt(BITINT_MAXWIDTH)) 511) ++ << 405) ++ != (((unsigned _BitInt(BITINT_MAXWIDTH)) 256) << 405), 0); ++ TEST_COMPARE (stdc_bit_floor (((unsigned _BitInt(BITINT_MAXWIDTH)) 512) ++ << 405) ++ != (((unsigned _BitInt(BITINT_MAXWIDTH)) 512) << 405), 0); ++ TEST_COMPARE (stdc_bit_ceil ((unsigned _BitInt(BITINT_MAXWIDTH)) 0) != 1, 0); ++ TEST_COMPARE (stdc_bit_ceil (~(unsigned _BitInt(BITINT_MAXWIDTH)) 0) != 0, ++ 0); ++ TEST_COMPARE (stdc_bit_ceil (((unsigned _BitInt(BITINT_MAXWIDTH)) 1) ++ << (BITINT_MAXWIDTH - 1)) ++ != ((unsigned _BitInt(BITINT_MAXWIDTH)) 1) << (BITINT_MAXWIDTH ++ - 1), 0); ++ TEST_COMPARE (stdc_bit_ceil (((unsigned _BitInt(BITINT_MAXWIDTH)) 512) ++ << 405) ++ != (((unsigned _BitInt(BITINT_MAXWIDTH)) 512) << 405), 0); ++ TEST_COMPARE (stdc_bit_ceil (((unsigned _BitInt(BITINT_MAXWIDTH)) 513) ++ << 405) ++ != (((unsigned _BitInt(BITINT_MAXWIDTH)) 1024) << 405), 0); ++# endif ++# endif ++ return 0; ++} ++#else ++static int ++do_test (void) ++{ ++ return 0; ++} ++#endif ++ ++#include +diff --git a/support/Makefile b/support/Makefile +index 362a51f882..aa57207bdc 100644 +--- a/support/Makefile ++++ b/support/Makefile +@@ -131,6 +131,7 @@ libsupport-routines = \ + xfreopen \ + xftruncate \ + xgetline \ ++ xgetpeername \ + xgetsockname \ + xlisten \ + xlseek \ +diff --git a/support/xgetpeername.c b/support/xgetpeername.c +new file mode 100644 +index 0000000000..6f448e456a +--- /dev/null ++++ b/support/xgetpeername.c +@@ -0,0 +1,30 @@ ++/* getpeername with error checking. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++#include ++#include ++#include ++ ++void ++xgetpeername (int fd, struct sockaddr *sa, socklen_t *plen) ++{ ++ if (getpeername (fd, sa, plen) != 0) ++ FAIL_EXIT1 ("getpeername (%d): %m", fd); ++} +diff --git a/support/xsocket.h b/support/xsocket.h +index 3e44103546..4ac0e1f5ff 100644 +--- a/support/xsocket.h ++++ b/support/xsocket.h +@@ -26,6 +26,7 @@ + int xsocket (int, int, int); + void xsetsockopt (int, int, int, const void *, socklen_t); + void xgetsockname (int, struct sockaddr *, socklen_t *); ++void xgetpeername (int, struct sockaddr *, socklen_t *); + void xconnect (int, const struct sockaddr *, socklen_t); + void xbind (int, const struct sockaddr *, socklen_t); + void xlisten (int, int); +diff --git a/sysdeps/aarch64/configure b/sysdeps/aarch64/configure +old mode 100644 +new mode 100755 +index ca57edce47..9606137e8d +--- a/sysdeps/aarch64/configure ++++ b/sysdeps/aarch64/configure +@@ -325,9 +325,10 @@ then : + printf %s "(cached) " >&6 + else $as_nop + cat > conftest.s <<\EOF +- ptrue p0.b ++ .arch armv8.2-a+sve ++ ptrue p0.b + EOF +-if { ac_try='${CC-cc} -c -march=armv8.2-a+sve conftest.s 1>&5' ++if { ac_try='${CC-cc} -c conftest.s 1>&5' + { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 + (eval $ac_try) 2>&5 + ac_status=$? +diff --git a/sysdeps/aarch64/configure.ac b/sysdeps/aarch64/configure.ac +index 27874eceb4..56d12d661d 100644 +--- a/sysdeps/aarch64/configure.ac ++++ b/sysdeps/aarch64/configure.ac +@@ -90,9 +90,10 @@ LIBC_CONFIG_VAR([aarch64-variant-pcs], [$libc_cv_aarch64_variant_pcs]) + # Check if asm support armv8.2-a+sve + AC_CACHE_CHECK([for SVE support in assembler], [libc_cv_aarch64_sve_asm], [dnl + cat > conftest.s <<\EOF +- ptrue p0.b ++ .arch armv8.2-a+sve ++ ptrue p0.b + EOF +-if AC_TRY_COMMAND(${CC-cc} -c -march=armv8.2-a+sve conftest.s 1>&AS_MESSAGE_LOG_FD); then ++if AC_TRY_COMMAND(${CC-cc} -c conftest.s 1>&AS_MESSAGE_LOG_FD); then + libc_cv_aarch64_sve_asm=yes + else + libc_cv_aarch64_sve_asm=no +diff --git a/sysdeps/aarch64/cpu-features.h b/sysdeps/aarch64/cpu-features.h +index 77a782422a..5f2da91ebb 100644 +--- a/sysdeps/aarch64/cpu-features.h ++++ b/sysdeps/aarch64/cpu-features.h +@@ -71,6 +71,7 @@ struct cpu_features + /* Currently, the GLIBC memory tagging tunable only defines 8 bits. */ + uint8_t mte_state; + bool sve; ++ bool prefer_sve_ifuncs; + bool mops; + }; + +diff --git a/sysdeps/aarch64/fpu/acos_advsimd.c b/sysdeps/aarch64/fpu/acos_advsimd.c +index a8eabb5e71..0a86c9823a 100644 +--- a/sysdeps/aarch64/fpu/acos_advsimd.c ++++ b/sysdeps/aarch64/fpu/acos_advsimd.c +@@ -40,8 +40,8 @@ static const struct data + }; + + #define AllMask v_u64 (0xffffffffffffffff) +-#define Oneu (0x3ff0000000000000) +-#define Small (0x3e50000000000000) /* 2^-53. */ ++#define Oneu 0x3ff0000000000000 ++#define Small 0x3e50000000000000 /* 2^-53. */ + + #if WANT_SIMD_EXCEPT + static float64x2_t VPCS_ATTR NOINLINE +diff --git a/sysdeps/aarch64/fpu/asin_advsimd.c b/sysdeps/aarch64/fpu/asin_advsimd.c +index 141646e954..2de6eff407 100644 +--- a/sysdeps/aarch64/fpu/asin_advsimd.c ++++ b/sysdeps/aarch64/fpu/asin_advsimd.c +@@ -39,8 +39,8 @@ static const struct data + }; + + #define AllMask v_u64 (0xffffffffffffffff) +-#define One (0x3ff0000000000000) +-#define Small (0x3e50000000000000) /* 2^-12. */ ++#define One 0x3ff0000000000000 ++#define Small 0x3e50000000000000 /* 2^-12. */ + + #if WANT_SIMD_EXCEPT + static float64x2_t VPCS_ATTR NOINLINE +diff --git a/sysdeps/aarch64/fpu/atan2_sve.c b/sysdeps/aarch64/fpu/atan2_sve.c +index 09a4c559b8..04fa71fa37 100644 +--- a/sysdeps/aarch64/fpu/atan2_sve.c ++++ b/sysdeps/aarch64/fpu/atan2_sve.c +@@ -37,9 +37,6 @@ static const struct data + .pi_over_2 = 0x1.921fb54442d18p+0, + }; + +-/* Useful constants. */ +-#define SignMask sv_u64 (0x8000000000000000) +- + /* Special cases i.e. 0, infinity, nan (fall back to scalar calls). */ + static svfloat64_t NOINLINE + special_case (svfloat64_t y, svfloat64_t x, svfloat64_t ret, +@@ -72,14 +69,15 @@ svfloat64_t SV_NAME_D2 (atan2) (svfloat64_t y, svfloat64_t x, const svbool_t pg) + svbool_t cmp_y = zeroinfnan (iy, pg); + svbool_t cmp_xy = svorr_z (pg, cmp_x, cmp_y); + +- svuint64_t sign_x = svand_x (pg, ix, SignMask); +- svuint64_t sign_y = svand_x (pg, iy, SignMask); +- svuint64_t sign_xy = sveor_x (pg, sign_x, sign_y); +- + svfloat64_t ax = svabs_x (pg, x); + svfloat64_t ay = svabs_x (pg, y); ++ svuint64_t iax = svreinterpret_u64 (ax); ++ svuint64_t iay = svreinterpret_u64 (ay); ++ ++ svuint64_t sign_x = sveor_x (pg, ix, iax); ++ svuint64_t sign_y = sveor_x (pg, iy, iay); ++ svuint64_t sign_xy = sveor_x (pg, sign_x, sign_y); + +- svbool_t pred_xlt0 = svcmplt (pg, x, 0.0); + svbool_t pred_aygtax = svcmpgt (pg, ay, ax); + + /* Set up z for call to atan. */ +@@ -88,8 +86,9 @@ svfloat64_t SV_NAME_D2 (atan2) (svfloat64_t y, svfloat64_t x, const svbool_t pg) + svfloat64_t z = svdiv_x (pg, n, d); + + /* Work out the correct shift. */ +- svfloat64_t shift = svsel (pred_xlt0, sv_f64 (-2.0), sv_f64 (0.0)); +- shift = svsel (pred_aygtax, svadd_x (pg, shift, 1.0), shift); ++ svfloat64_t shift = svreinterpret_f64 (svlsr_x (pg, sign_x, 1)); ++ shift = svsel (pred_aygtax, sv_f64 (1.0), shift); ++ shift = svreinterpret_f64 (svorr_x (pg, sign_x, svreinterpret_u64 (shift))); + shift = svmul_x (pg, shift, data_ptr->pi_over_2); + + /* Use split Estrin scheme for P(z^2) with deg(P)=19. */ +@@ -109,10 +108,10 @@ svfloat64_t SV_NAME_D2 (atan2) (svfloat64_t y, svfloat64_t x, const svbool_t pg) + ret = svadd_m (pg, ret, shift); + + /* Account for the sign of x and y. */ +- ret = svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (ret), sign_xy)); +- + if (__glibc_unlikely (svptest_any (pg, cmp_xy))) +- return special_case (y, x, ret, cmp_xy); +- +- return ret; ++ return special_case ( ++ y, x, ++ svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (ret), sign_xy)), ++ cmp_xy); ++ return svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (ret), sign_xy)); + } +diff --git a/sysdeps/aarch64/fpu/atan2f_sve.c b/sysdeps/aarch64/fpu/atan2f_sve.c +index b92f83cdea..9ea197147c 100644 +--- a/sysdeps/aarch64/fpu/atan2f_sve.c ++++ b/sysdeps/aarch64/fpu/atan2f_sve.c +@@ -32,10 +32,8 @@ static const struct data + .pi_over_2 = 0x1.921fb6p+0f, + }; + +-#define SignMask sv_u32 (0x80000000) +- + /* Special cases i.e. 0, infinity, nan (fall back to scalar calls). */ +-static inline svfloat32_t ++static svfloat32_t NOINLINE + special_case (svfloat32_t y, svfloat32_t x, svfloat32_t ret, + const svbool_t cmp) + { +@@ -67,14 +65,15 @@ svfloat32_t SV_NAME_F2 (atan2) (svfloat32_t y, svfloat32_t x, const svbool_t pg) + svbool_t cmp_y = zeroinfnan (iy, pg); + svbool_t cmp_xy = svorr_z (pg, cmp_x, cmp_y); + +- svuint32_t sign_x = svand_x (pg, ix, SignMask); +- svuint32_t sign_y = svand_x (pg, iy, SignMask); +- svuint32_t sign_xy = sveor_x (pg, sign_x, sign_y); +- + svfloat32_t ax = svabs_x (pg, x); + svfloat32_t ay = svabs_x (pg, y); ++ svuint32_t iax = svreinterpret_u32 (ax); ++ svuint32_t iay = svreinterpret_u32 (ay); ++ ++ svuint32_t sign_x = sveor_x (pg, ix, iax); ++ svuint32_t sign_y = sveor_x (pg, iy, iay); ++ svuint32_t sign_xy = sveor_x (pg, sign_x, sign_y); + +- svbool_t pred_xlt0 = svcmplt (pg, x, 0.0); + svbool_t pred_aygtax = svcmpgt (pg, ay, ax); + + /* Set up z for call to atan. */ +@@ -83,11 +82,12 @@ svfloat32_t SV_NAME_F2 (atan2) (svfloat32_t y, svfloat32_t x, const svbool_t pg) + svfloat32_t z = svdiv_x (pg, n, d); + + /* Work out the correct shift. */ +- svfloat32_t shift = svsel (pred_xlt0, sv_f32 (-2.0), sv_f32 (0.0)); +- shift = svsel (pred_aygtax, svadd_x (pg, shift, 1.0), shift); ++ svfloat32_t shift = svreinterpret_f32 (svlsr_x (pg, sign_x, 1)); ++ shift = svsel (pred_aygtax, sv_f32 (1.0), shift); ++ shift = svreinterpret_f32 (svorr_x (pg, sign_x, svreinterpret_u32 (shift))); + shift = svmul_x (pg, shift, sv_f32 (data_ptr->pi_over_2)); + +- /* Use split Estrin scheme for P(z^2) with deg(P)=7. */ ++ /* Use pure Estrin scheme for P(z^2) with deg(P)=7. */ + svfloat32_t z2 = svmul_x (pg, z, z); + svfloat32_t z4 = svmul_x (pg, z2, z2); + svfloat32_t z8 = svmul_x (pg, z4, z4); +@@ -101,10 +101,12 @@ svfloat32_t SV_NAME_F2 (atan2) (svfloat32_t y, svfloat32_t x, const svbool_t pg) + ret = svadd_m (pg, ret, shift); + + /* Account for the sign of x and y. */ +- ret = svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (ret), sign_xy)); + + if (__glibc_unlikely (svptest_any (pg, cmp_xy))) +- return special_case (y, x, ret, cmp_xy); ++ return special_case ( ++ y, x, ++ svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (ret), sign_xy)), ++ cmp_xy); + +- return ret; ++ return svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (ret), sign_xy)); + } +diff --git a/sysdeps/aarch64/fpu/cos_advsimd.c b/sysdeps/aarch64/fpu/cos_advsimd.c +index 2897e8b909..3924c9ce44 100644 +--- a/sysdeps/aarch64/fpu/cos_advsimd.c ++++ b/sysdeps/aarch64/fpu/cos_advsimd.c +@@ -63,8 +63,7 @@ float64x2_t VPCS_ATTR V_NAME_D1 (cos) (float64x2_t x) + special-case handler later. */ + r = vbslq_f64 (cmp, v_f64 (1.0), r); + #else +- cmp = vcageq_f64 (d->range_val, x); +- cmp = vceqzq_u64 (cmp); /* cmp = ~cmp. */ ++ cmp = vcageq_f64 (x, d->range_val); + r = x; + #endif + +diff --git a/sysdeps/aarch64/fpu/cosf_advsimd.c b/sysdeps/aarch64/fpu/cosf_advsimd.c +index 60abc8dfcf..d0c285b03a 100644 +--- a/sysdeps/aarch64/fpu/cosf_advsimd.c ++++ b/sysdeps/aarch64/fpu/cosf_advsimd.c +@@ -64,8 +64,7 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (cos) (float32x4_t x) + special-case handler later. */ + r = vbslq_f32 (cmp, v_f32 (1.0f), r); + #else +- cmp = vcageq_f32 (d->range_val, x); +- cmp = vceqzq_u32 (cmp); /* cmp = ~cmp. */ ++ cmp = vcageq_f32 (x, d->range_val); + r = x; + #endif + +diff --git a/sysdeps/aarch64/fpu/exp10_advsimd.c b/sysdeps/aarch64/fpu/exp10_advsimd.c +index fe7149b191..eeb31ca839 100644 +--- a/sysdeps/aarch64/fpu/exp10_advsimd.c ++++ b/sysdeps/aarch64/fpu/exp10_advsimd.c +@@ -57,7 +57,7 @@ const static struct data + # define BigBound v_u64 (0x4070000000000000) /* asuint64 (0x1p8). */ + # define Thres v_u64 (0x2070000000000000) /* BigBound - TinyBound. */ + +-static inline float64x2_t VPCS_ATTR ++static float64x2_t VPCS_ATTR NOINLINE + special_case (float64x2_t x, float64x2_t y, uint64x2_t cmp) + { + /* If fenv exceptions are to be triggered correctly, fall back to the scalar +@@ -72,7 +72,7 @@ special_case (float64x2_t x, float64x2_t y, uint64x2_t cmp) + # define SpecialBias1 v_u64 (0x7000000000000000) /* 0x1p769. */ + # define SpecialBias2 v_u64 (0x3010000000000000) /* 0x1p-254. */ + +-static float64x2_t VPCS_ATTR NOINLINE ++static inline float64x2_t VPCS_ATTR + special_case (float64x2_t s, float64x2_t y, float64x2_t n, + const struct data *d) + { +diff --git a/sysdeps/aarch64/fpu/exp10f_advsimd.c b/sysdeps/aarch64/fpu/exp10f_advsimd.c +index 7ee0c90948..ab117b69da 100644 +--- a/sysdeps/aarch64/fpu/exp10f_advsimd.c ++++ b/sysdeps/aarch64/fpu/exp10f_advsimd.c +@@ -25,7 +25,8 @@ + static const struct data + { + float32x4_t poly[5]; +- float32x4_t shift, log10_2, log2_10_hi, log2_10_lo; ++ float32x4_t log10_2_and_inv, shift; ++ + #if !WANT_SIMD_EXCEPT + float32x4_t scale_thresh; + #endif +@@ -38,9 +39,9 @@ static const struct data + .poly = { V4 (0x1.26bb16p+1f), V4 (0x1.5350d2p+1f), V4 (0x1.04744ap+1f), + V4 (0x1.2d8176p+0f), V4 (0x1.12b41ap-1f) }, + .shift = V4 (0x1.8p23f), +- .log10_2 = V4 (0x1.a934fp+1), +- .log2_10_hi = V4 (0x1.344136p-2), +- .log2_10_lo = V4 (-0x1.ec10cp-27), ++ ++ /* Stores constants 1/log10(2), log10(2)_high, log10(2)_low, 0. */ ++ .log10_2_and_inv = { 0x1.a934fp+1, 0x1.344136p-2, -0x1.ec10cp-27, 0 }, + #if !WANT_SIMD_EXCEPT + .scale_thresh = V4 (ScaleBound) + #endif +@@ -98,24 +99,22 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (exp10) (float32x4_t x) + #if WANT_SIMD_EXCEPT + /* asuint(x) - TinyBound >= BigBound - TinyBound. */ + uint32x4_t cmp = vcgeq_u32 ( +- vsubq_u32 (vandq_u32 (vreinterpretq_u32_f32 (x), v_u32 (0x7fffffff)), +- TinyBound), +- Thres); ++ vsubq_u32 (vreinterpretq_u32_f32 (vabsq_f32 (x)), TinyBound), Thres); + float32x4_t xm = x; + /* If any lanes are special, mask them with 1 and retain a copy of x to allow + special case handler to fix special lanes later. This is only necessary if + fenv exceptions are to be triggered correctly. */ + if (__glibc_unlikely (v_any_u32 (cmp))) +- x = vbslq_f32 (cmp, v_f32 (1), x); ++ x = v_zerofy_f32 (x, cmp); + #endif + + /* exp10(x) = 2^n * 10^r = 2^n * (1 + poly (r)), + with poly(r) in [1/sqrt(2), sqrt(2)] and + x = r + n * log10 (2), with r in [-log10(2)/2, log10(2)/2]. */ +- float32x4_t z = vfmaq_f32 (d->shift, x, d->log10_2); ++ float32x4_t z = vfmaq_laneq_f32 (d->shift, x, d->log10_2_and_inv, 0); + float32x4_t n = vsubq_f32 (z, d->shift); +- float32x4_t r = vfmsq_f32 (x, n, d->log2_10_hi); +- r = vfmsq_f32 (r, n, d->log2_10_lo); ++ float32x4_t r = vfmsq_laneq_f32 (x, n, d->log10_2_and_inv, 1); ++ r = vfmsq_laneq_f32 (r, n, d->log10_2_and_inv, 2); + uint32x4_t e = vshlq_n_u32 (vreinterpretq_u32_f32 (z), 23); + + float32x4_t scale = vreinterpretq_f32_u32 (vaddq_u32 (e, ExponentBias)); +diff --git a/sysdeps/aarch64/fpu/exp2_advsimd.c b/sysdeps/aarch64/fpu/exp2_advsimd.c +index 391a93180c..ae1e63d503 100644 +--- a/sysdeps/aarch64/fpu/exp2_advsimd.c ++++ b/sysdeps/aarch64/fpu/exp2_advsimd.c +@@ -24,6 +24,7 @@ + #define IndexMask (N - 1) + #define BigBound 1022.0 + #define UOFlowBound 1280.0 ++#define TinyBound 0x2000000000000000 /* asuint64(0x1p-511). */ + + static const struct data + { +@@ -48,14 +49,13 @@ lookup_sbits (uint64x2_t i) + + #if WANT_SIMD_EXCEPT + +-# define TinyBound 0x2000000000000000 /* asuint64(0x1p-511). */ + # define Thres 0x2080000000000000 /* asuint64(512.0) - TinyBound. */ + + /* Call scalar exp2 as a fallback. */ + static float64x2_t VPCS_ATTR NOINLINE +-special_case (float64x2_t x) ++special_case (float64x2_t x, float64x2_t y, uint64x2_t is_special) + { +- return v_call_f64 (exp2, x, x, v_u64 (0xffffffffffffffff)); ++ return v_call_f64 (exp2, x, y, is_special); + } + + #else +@@ -65,7 +65,7 @@ special_case (float64x2_t x) + # define SpecialBias1 0x7000000000000000 /* 0x1p769. */ + # define SpecialBias2 0x3010000000000000 /* 0x1p-254. */ + +-static float64x2_t VPCS_ATTR ++static inline float64x2_t VPCS_ATTR + special_case (float64x2_t s, float64x2_t y, float64x2_t n, + const struct data *d) + { +@@ -94,10 +94,10 @@ float64x2_t V_NAME_D1 (exp2) (float64x2_t x) + #if WANT_SIMD_EXCEPT + uint64x2_t ia = vreinterpretq_u64_f64 (vabsq_f64 (x)); + cmp = vcgeq_u64 (vsubq_u64 (ia, v_u64 (TinyBound)), v_u64 (Thres)); +- /* If any special case (inf, nan, small and large x) is detected, +- fall back to scalar for all lanes. */ +- if (__glibc_unlikely (v_any_u64 (cmp))) +- return special_case (x); ++ /* Mask special lanes and retain a copy of x for passing to special-case ++ handler. */ ++ float64x2_t xc = x; ++ x = v_zerofy_f64 (x, cmp); + #else + cmp = vcagtq_f64 (x, d->scale_big_bound); + #endif +@@ -120,9 +120,11 @@ float64x2_t V_NAME_D1 (exp2) (float64x2_t x) + float64x2_t y = v_pairwise_poly_3_f64 (r, r2, d->poly); + y = vmulq_f64 (r, y); + +-#if !WANT_SIMD_EXCEPT + if (__glibc_unlikely (v_any_u64 (cmp))) ++#if !WANT_SIMD_EXCEPT + return special_case (s, y, n, d); ++#else ++ return special_case (xc, vfmaq_f64 (s, s, y), cmp); + #endif + return vfmaq_f64 (s, s, y); + } +diff --git a/sysdeps/aarch64/fpu/exp2f_sve.c b/sysdeps/aarch64/fpu/exp2f_sve.c +index 9a5a523a10..8a686e3e05 100644 +--- a/sysdeps/aarch64/fpu/exp2f_sve.c ++++ b/sysdeps/aarch64/fpu/exp2f_sve.c +@@ -20,6 +20,8 @@ + #include "sv_math.h" + #include "poly_sve_f32.h" + ++#define Thres 0x1.5d5e2ap+6f ++ + static const struct data + { + float poly[5]; +@@ -33,7 +35,7 @@ static const struct data + .shift = 0x1.903f8p17f, + /* Roughly 87.3. For x < -Thres, the result is subnormal and not handled + correctly by FEXPA. */ +- .thres = 0x1.5d5e2ap+6f, ++ .thres = Thres, + }; + + static svfloat32_t NOINLINE +diff --git a/sysdeps/aarch64/fpu/exp_advsimd.c b/sysdeps/aarch64/fpu/exp_advsimd.c +index fd215f1d2c..5e3a9a0d44 100644 +--- a/sysdeps/aarch64/fpu/exp_advsimd.c ++++ b/sysdeps/aarch64/fpu/exp_advsimd.c +@@ -54,7 +54,7 @@ const static volatile struct + # define BigBound v_u64 (0x4080000000000000) /* asuint64 (0x1p9). */ + # define SpecialBound v_u64 (0x2080000000000000) /* BigBound - TinyBound. */ + +-static inline float64x2_t VPCS_ATTR ++static float64x2_t VPCS_ATTR NOINLINE + special_case (float64x2_t x, float64x2_t y, uint64x2_t cmp) + { + /* If fenv exceptions are to be triggered correctly, fall back to the scalar +@@ -69,7 +69,7 @@ special_case (float64x2_t x, float64x2_t y, uint64x2_t cmp) + # define SpecialBias1 v_u64 (0x7000000000000000) /* 0x1p769. */ + # define SpecialBias2 v_u64 (0x3010000000000000) /* 0x1p-254. */ + +-static float64x2_t VPCS_ATTR NOINLINE ++static inline float64x2_t VPCS_ATTR + special_case (float64x2_t s, float64x2_t y, float64x2_t n) + { + /* 2^(n/N) may overflow, break it up into s1*s2. */ +diff --git a/sysdeps/aarch64/fpu/expm1_advsimd.c b/sysdeps/aarch64/fpu/expm1_advsimd.c +index 0b85bd06f3..3628398674 100644 +--- a/sysdeps/aarch64/fpu/expm1_advsimd.c ++++ b/sysdeps/aarch64/fpu/expm1_advsimd.c +@@ -23,7 +23,7 @@ + static const struct data + { + float64x2_t poly[11]; +- float64x2_t invln2, ln2_lo, ln2_hi, shift; ++ float64x2_t invln2, ln2, shift; + int64x2_t exponent_bias; + #if WANT_SIMD_EXCEPT + uint64x2_t thresh, tiny_bound; +@@ -38,8 +38,7 @@ static const struct data + V2 (0x1.71ddf82db5bb4p-19), V2 (0x1.27e517fc0d54bp-22), + V2 (0x1.af5eedae67435p-26), V2 (0x1.1f143d060a28ap-29) }, + .invln2 = V2 (0x1.71547652b82fep0), +- .ln2_hi = V2 (0x1.62e42fefa39efp-1), +- .ln2_lo = V2 (0x1.abc9e3b39803fp-56), ++ .ln2 = { 0x1.62e42fefa39efp-1, 0x1.abc9e3b39803fp-56 }, + .shift = V2 (0x1.8p52), + .exponent_bias = V2 (0x3ff0000000000000), + #if WANT_SIMD_EXCEPT +@@ -83,7 +82,7 @@ float64x2_t VPCS_ATTR V_NAME_D1 (expm1) (float64x2_t x) + x = v_zerofy_f64 (x, special); + #else + /* Large input, NaNs and Infs. */ +- uint64x2_t special = vceqzq_u64 (vcaltq_f64 (x, d->oflow_bound)); ++ uint64x2_t special = vcageq_f64 (x, d->oflow_bound); + #endif + + /* Reduce argument to smaller range: +@@ -93,8 +92,8 @@ float64x2_t VPCS_ATTR V_NAME_D1 (expm1) (float64x2_t x) + where 2^i is exact because i is an integer. */ + float64x2_t n = vsubq_f64 (vfmaq_f64 (d->shift, d->invln2, x), d->shift); + int64x2_t i = vcvtq_s64_f64 (n); +- float64x2_t f = vfmsq_f64 (x, n, d->ln2_hi); +- f = vfmsq_f64 (f, n, d->ln2_lo); ++ float64x2_t f = vfmsq_laneq_f64 (x, n, d->ln2, 0); ++ f = vfmsq_laneq_f64 (f, n, d->ln2, 1); + + /* Approximate expm1(f) using polynomial. + Taylor expansion for expm1(x) has the form: +diff --git a/sysdeps/aarch64/fpu/expm1f_advsimd.c b/sysdeps/aarch64/fpu/expm1f_advsimd.c +index 8d4c9a2193..93db200f61 100644 +--- a/sysdeps/aarch64/fpu/expm1f_advsimd.c ++++ b/sysdeps/aarch64/fpu/expm1f_advsimd.c +@@ -23,7 +23,8 @@ + static const struct data + { + float32x4_t poly[5]; +- float32x4_t invln2, ln2_lo, ln2_hi, shift; ++ float32x4_t invln2_and_ln2; ++ float32x4_t shift; + int32x4_t exponent_bias; + #if WANT_SIMD_EXCEPT + uint32x4_t thresh; +@@ -34,9 +35,8 @@ static const struct data + /* Generated using fpminimax with degree=5 in [-log(2)/2, log(2)/2]. */ + .poly = { V4 (0x1.fffffep-2), V4 (0x1.5554aep-3), V4 (0x1.555736p-5), + V4 (0x1.12287cp-7), V4 (0x1.6b55a2p-10) }, +- .invln2 = V4 (0x1.715476p+0f), +- .ln2_hi = V4 (0x1.62e4p-1f), +- .ln2_lo = V4 (0x1.7f7d1cp-20f), ++ /* Stores constants: invln2, ln2_hi, ln2_lo, 0. */ ++ .invln2_and_ln2 = { 0x1.715476p+0f, 0x1.62e4p-1f, 0x1.7f7d1cp-20f, 0 }, + .shift = V4 (0x1.8p23f), + .exponent_bias = V4 (0x3f800000), + #if !WANT_SIMD_EXCEPT +@@ -80,7 +80,7 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (expm1) (float32x4_t x) + x = v_zerofy_f32 (x, special); + #else + /* Handles very large values (+ve and -ve), +/-NaN, +/-Inf. */ +- uint32x4_t special = vceqzq_u32 (vcaltq_f32 (x, d->oflow_bound)); ++ uint32x4_t special = vcagtq_f32 (x, d->oflow_bound); + #endif + + /* Reduce argument to smaller range: +@@ -88,10 +88,11 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (expm1) (float32x4_t x) + and f = x - i * ln2, then f is in [-ln2/2, ln2/2]. + exp(x) - 1 = 2^i * (expm1(f) + 1) - 1 + where 2^i is exact because i is an integer. */ +- float32x4_t j = vsubq_f32 (vfmaq_f32 (d->shift, d->invln2, x), d->shift); ++ float32x4_t j = vsubq_f32 ( ++ vfmaq_laneq_f32 (d->shift, x, d->invln2_and_ln2, 0), d->shift); + int32x4_t i = vcvtq_s32_f32 (j); +- float32x4_t f = vfmsq_f32 (x, j, d->ln2_hi); +- f = vfmsq_f32 (f, j, d->ln2_lo); ++ float32x4_t f = vfmsq_laneq_f32 (x, j, d->invln2_and_ln2, 1); ++ f = vfmsq_laneq_f32 (f, j, d->invln2_and_ln2, 2); + + /* Approximate expm1(f) using polynomial. + Taylor expansion for expm1(x) has the form: +diff --git a/sysdeps/aarch64/fpu/log_advsimd.c b/sysdeps/aarch64/fpu/log_advsimd.c +index 067ae79613..21df61728c 100644 +--- a/sysdeps/aarch64/fpu/log_advsimd.c ++++ b/sysdeps/aarch64/fpu/log_advsimd.c +@@ -58,8 +58,13 @@ lookup (uint64x2_t i) + uint64_t i1 = (i[1] >> (52 - V_LOG_TABLE_BITS)) & IndexMask; + float64x2_t e0 = vld1q_f64 (&__v_log_data.table[i0].invc); + float64x2_t e1 = vld1q_f64 (&__v_log_data.table[i1].invc); ++#if __BYTE_ORDER == __LITTLE_ENDIAN + e.invc = vuzp1q_f64 (e0, e1); + e.logc = vuzp2q_f64 (e0, e1); ++#else ++ e.invc = vuzp1q_f64 (e1, e0); ++ e.logc = vuzp2q_f64 (e1, e0); ++#endif + return e; + } + +diff --git a/sysdeps/aarch64/fpu/sin_advsimd.c b/sysdeps/aarch64/fpu/sin_advsimd.c +index efce183e86..a0d9d3b819 100644 +--- a/sysdeps/aarch64/fpu/sin_advsimd.c ++++ b/sysdeps/aarch64/fpu/sin_advsimd.c +@@ -75,8 +75,7 @@ float64x2_t VPCS_ATTR V_NAME_D1 (sin) (float64x2_t x) + r = vbslq_f64 (cmp, vreinterpretq_f64_u64 (cmp), x); + #else + r = x; +- cmp = vcageq_f64 (d->range_val, x); +- cmp = vceqzq_u64 (cmp); /* cmp = ~cmp. */ ++ cmp = vcageq_f64 (x, d->range_val); + #endif + + /* n = rint(|x|/pi). */ +diff --git a/sysdeps/aarch64/fpu/sinf_advsimd.c b/sysdeps/aarch64/fpu/sinf_advsimd.c +index 60cf3f2ca1..375dfc3331 100644 +--- a/sysdeps/aarch64/fpu/sinf_advsimd.c ++++ b/sysdeps/aarch64/fpu/sinf_advsimd.c +@@ -67,8 +67,7 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (sin) (float32x4_t x) + r = vbslq_f32 (cmp, vreinterpretq_f32_u32 (cmp), x); + #else + r = x; +- cmp = vcageq_f32 (d->range_val, x); +- cmp = vceqzq_u32 (cmp); /* cmp = ~cmp. */ ++ cmp = vcageq_f32 (x, d->range_val); + #endif + + /* n = rint(|x|/pi) */ +diff --git a/sysdeps/aarch64/fpu/tan_advsimd.c b/sysdeps/aarch64/fpu/tan_advsimd.c +index d7e5ba7b1a..0459821ab2 100644 +--- a/sysdeps/aarch64/fpu/tan_advsimd.c ++++ b/sysdeps/aarch64/fpu/tan_advsimd.c +@@ -23,7 +23,7 @@ + static const struct data + { + float64x2_t poly[9]; +- float64x2_t half_pi_hi, half_pi_lo, two_over_pi, shift; ++ float64x2_t half_pi, two_over_pi, shift; + #if !WANT_SIMD_EXCEPT + float64x2_t range_val; + #endif +@@ -34,8 +34,7 @@ static const struct data + V2 (0x1.226e5e5ecdfa3p-7), V2 (0x1.d6c7ddbf87047p-9), + V2 (0x1.7ea75d05b583ep-10), V2 (0x1.289f22964a03cp-11), + V2 (0x1.4e4fd14147622p-12) }, +- .half_pi_hi = V2 (0x1.921fb54442d18p0), +- .half_pi_lo = V2 (0x1.1a62633145c07p-54), ++ .half_pi = { 0x1.921fb54442d18p0, 0x1.1a62633145c07p-54 }, + .two_over_pi = V2 (0x1.45f306dc9c883p-1), + .shift = V2 (0x1.8p52), + #if !WANT_SIMD_EXCEPT +@@ -56,15 +55,15 @@ special_case (float64x2_t x) + + /* Vector approximation for double-precision tan. + Maximum measured error is 3.48 ULP: +- __v_tan(0x1.4457047ef78d8p+20) got -0x1.f6ccd8ecf7dedp+37 +- want -0x1.f6ccd8ecf7deap+37. */ ++ _ZGVnN2v_tan(0x1.4457047ef78d8p+20) got -0x1.f6ccd8ecf7dedp+37 ++ want -0x1.f6ccd8ecf7deap+37. */ + float64x2_t VPCS_ATTR V_NAME_D1 (tan) (float64x2_t x) + { + const struct data *dat = ptr_barrier (&data); +- /* Our argument reduction cannot calculate q with sufficient accuracy for very +- large inputs. Fall back to scalar routine for all lanes if any are too +- large, or Inf/NaN. If fenv exceptions are expected, also fall back for tiny +- input to avoid underflow. */ ++ /* Our argument reduction cannot calculate q with sufficient accuracy for ++ very large inputs. Fall back to scalar routine for all lanes if any are ++ too large, or Inf/NaN. If fenv exceptions are expected, also fall back for ++ tiny input to avoid underflow. */ + #if WANT_SIMD_EXCEPT + uint64x2_t iax = vreinterpretq_u64_f64 (vabsq_f64 (x)); + /* iax - tiny_bound > range_val - tiny_bound. */ +@@ -82,8 +81,8 @@ float64x2_t VPCS_ATTR V_NAME_D1 (tan) (float64x2_t x) + /* Use q to reduce x to r in [-pi/4, pi/4], by: + r = x - q * pi/2, in extended precision. */ + float64x2_t r = x; +- r = vfmsq_f64 (r, q, dat->half_pi_hi); +- r = vfmsq_f64 (r, q, dat->half_pi_lo); ++ r = vfmsq_laneq_f64 (r, q, dat->half_pi, 0); ++ r = vfmsq_laneq_f64 (r, q, dat->half_pi, 1); + /* Further reduce r to [-pi/8, pi/8], to be reconstructed using double angle + formula. */ + r = vmulq_n_f64 (r, 0.5); +@@ -106,14 +105,15 @@ float64x2_t VPCS_ATTR V_NAME_D1 (tan) (float64x2_t x) + and reciprocity around pi/2: + tan(x) = 1 / (tan(pi/2 - x)) + to assemble result using change-of-sign and conditional selection of +- numerator/denominator, dependent on odd/even-ness of q (hence quadrant). */ ++ numerator/denominator, dependent on odd/even-ness of q (hence quadrant). ++ */ + float64x2_t n = vfmaq_f64 (v_f64 (-1), p, p); + float64x2_t d = vaddq_f64 (p, p); + + uint64x2_t no_recip = vtstq_u64 (vreinterpretq_u64_s64 (qi), v_u64 (1)); + + #if !WANT_SIMD_EXCEPT +- uint64x2_t special = vceqzq_u64 (vcaleq_f64 (x, dat->range_val)); ++ uint64x2_t special = vcageq_f64 (x, dat->range_val); + if (__glibc_unlikely (v_any_u64 (special))) + return special_case (x); + #endif +diff --git a/sysdeps/aarch64/fpu/tanf_advsimd.c b/sysdeps/aarch64/fpu/tanf_advsimd.c +index 1f16103f8a..5a7489390a 100644 +--- a/sysdeps/aarch64/fpu/tanf_advsimd.c ++++ b/sysdeps/aarch64/fpu/tanf_advsimd.c +@@ -23,7 +23,8 @@ + static const struct data + { + float32x4_t poly[6]; +- float32x4_t neg_half_pi_1, neg_half_pi_2, neg_half_pi_3, two_over_pi, shift; ++ float32x4_t pi_consts; ++ float32x4_t shift; + #if !WANT_SIMD_EXCEPT + float32x4_t range_val; + #endif +@@ -31,10 +32,9 @@ static const struct data + /* Coefficients generated using FPMinimax. */ + .poly = { V4 (0x1.55555p-2f), V4 (0x1.11166p-3f), V4 (0x1.b88a78p-5f), + V4 (0x1.7b5756p-6f), V4 (0x1.4ef4cep-8f), V4 (0x1.0e1e74p-7f) }, +- .neg_half_pi_1 = V4 (-0x1.921fb6p+0f), +- .neg_half_pi_2 = V4 (0x1.777a5cp-25f), +- .neg_half_pi_3 = V4 (0x1.ee59dap-50f), +- .two_over_pi = V4 (0x1.45f306p-1f), ++ /* Stores constants: (-pi/2)_high, (-pi/2)_mid, (-pi/2)_low, and 2/pi. */ ++ .pi_consts ++ = { -0x1.921fb6p+0f, 0x1.777a5cp-25f, 0x1.ee59dap-50f, 0x1.45f306p-1f }, + .shift = V4 (0x1.8p+23f), + #if !WANT_SIMD_EXCEPT + .range_val = V4 (0x1p15f), +@@ -58,10 +58,11 @@ eval_poly (float32x4_t z, const struct data *d) + { + float32x4_t z2 = vmulq_f32 (z, z); + #if WANT_SIMD_EXCEPT +- /* Tiny z (<= 0x1p-31) will underflow when calculating z^4. If fp exceptions +- are to be triggered correctly, sidestep this by fixing such lanes to 0. */ ++ /* Tiny z (<= 0x1p-31) will underflow when calculating z^4. ++ If fp exceptions are to be triggered correctly, ++ sidestep this by fixing such lanes to 0. */ + uint32x4_t will_uflow +- = vcleq_u32 (vreinterpretq_u32_f32 (vabsq_f32 (z)), TinyBound); ++ = vcleq_u32 (vreinterpretq_u32_f32 (vabsq_f32 (z)), TinyBound); + if (__glibc_unlikely (v_any_u32 (will_uflow))) + z2 = vbslq_f32 (will_uflow, v_f32 (0), z2); + #endif +@@ -94,16 +95,16 @@ float32x4_t VPCS_ATTR NOINLINE V_NAME_F1 (tan) (float32x4_t x) + #endif + + /* n = rint(x/(pi/2)). */ +- float32x4_t q = vfmaq_f32 (d->shift, d->two_over_pi, x); ++ float32x4_t q = vfmaq_laneq_f32 (d->shift, x, d->pi_consts, 3); + float32x4_t n = vsubq_f32 (q, d->shift); + /* Determine if x lives in an interval, where |tan(x)| grows to infinity. */ + uint32x4_t pred_alt = vtstq_u32 (vreinterpretq_u32_f32 (q), v_u32 (1)); + + /* r = x - n * (pi/2) (range reduction into -pi./4 .. pi/4). */ + float32x4_t r; +- r = vfmaq_f32 (x, d->neg_half_pi_1, n); +- r = vfmaq_f32 (r, d->neg_half_pi_2, n); +- r = vfmaq_f32 (r, d->neg_half_pi_3, n); ++ r = vfmaq_laneq_f32 (x, n, d->pi_consts, 0); ++ r = vfmaq_laneq_f32 (r, n, d->pi_consts, 1); ++ r = vfmaq_laneq_f32 (r, n, d->pi_consts, 2); + + /* If x lives in an interval, where |tan(x)| + - is finite, then use a polynomial approximation of the form +diff --git a/sysdeps/aarch64/multiarch/init-arch.h b/sysdeps/aarch64/multiarch/init-arch.h +index c52860efb2..61dc40088f 100644 +--- a/sysdeps/aarch64/multiarch/init-arch.h ++++ b/sysdeps/aarch64/multiarch/init-arch.h +@@ -36,5 +36,7 @@ + MTE_ENABLED (); \ + bool __attribute__((unused)) sve = \ + GLRO(dl_aarch64_cpu_features).sve; \ ++ bool __attribute__((unused)) prefer_sve_ifuncs = \ ++ GLRO(dl_aarch64_cpu_features).prefer_sve_ifuncs; \ + bool __attribute__((unused)) mops = \ + GLRO(dl_aarch64_cpu_features).mops; +diff --git a/sysdeps/aarch64/multiarch/memcpy.c b/sysdeps/aarch64/multiarch/memcpy.c +index d12eccfca5..ce53567dab 100644 +--- a/sysdeps/aarch64/multiarch/memcpy.c ++++ b/sysdeps/aarch64/multiarch/memcpy.c +@@ -47,7 +47,7 @@ select_memcpy_ifunc (void) + { + if (IS_A64FX (midr)) + return __memcpy_a64fx; +- return __memcpy_sve; ++ return prefer_sve_ifuncs ? __memcpy_sve : __memcpy_generic; + } + + if (IS_THUNDERX (midr)) +diff --git a/sysdeps/aarch64/multiarch/memmove.c b/sysdeps/aarch64/multiarch/memmove.c +index 2081eeb4d4..fe95037be3 100644 +--- a/sysdeps/aarch64/multiarch/memmove.c ++++ b/sysdeps/aarch64/multiarch/memmove.c +@@ -47,7 +47,7 @@ select_memmove_ifunc (void) + { + if (IS_A64FX (midr)) + return __memmove_a64fx; +- return __memmove_sve; ++ return prefer_sve_ifuncs ? __memmove_sve : __memmove_generic; + } + + if (IS_THUNDERX (midr)) +diff --git a/sysdeps/aarch64/multiarch/memset_generic.S b/sysdeps/aarch64/multiarch/memset_generic.S +index 81748bdbce..e125a5ed85 100644 +--- a/sysdeps/aarch64/multiarch/memset_generic.S ++++ b/sysdeps/aarch64/multiarch/memset_generic.S +@@ -33,3 +33,7 @@ + #endif + + #include <../memset.S> ++ ++#if IS_IN (rtld) ++strong_alias (memset, __memset_generic) ++#endif +diff --git a/sysdeps/aarch64/preconfigure b/sysdeps/aarch64/preconfigure +index d9bd1f8558..19657b627b 100644 +--- a/sysdeps/aarch64/preconfigure ++++ b/sysdeps/aarch64/preconfigure +@@ -2,5 +2,6 @@ case "$machine" in + aarch64*) + base_machine=aarch64 + machine=aarch64 ++ mtls_descriptor=desc + ;; + esac +diff --git a/sysdeps/arc/utmp-size.h b/sysdeps/arc/utmp-size.h +new file mode 100644 +index 0000000000..a247fcd3da +--- /dev/null ++++ b/sysdeps/arc/utmp-size.h +@@ -0,0 +1,3 @@ ++/* arc has less padding than other architectures with 64-bit time_t. */ ++#define UTMP_SIZE 392 ++#define LASTLOG_SIZE 296 +diff --git a/sysdeps/arm/Makefile b/sysdeps/arm/Makefile +index d5cea717a9..619474eca9 100644 +--- a/sysdeps/arm/Makefile ++++ b/sysdeps/arm/Makefile +@@ -13,15 +13,15 @@ $(objpfx)libgcc-stubs.a: $(objpfx)aeabi_unwind_cpp_pr1.os + lib-noranlib: $(objpfx)libgcc-stubs.a + + ifeq ($(build-shared),yes) +-ifeq (yes,$(have-mtls-dialect-gnu2)) ++ifneq (no,$(have-mtls-descriptor)) + tests += tst-armtlsdescloc tst-armtlsdescextnow tst-armtlsdescextlazy + modules-names += tst-armtlsdesclocmod + modules-names += tst-armtlsdescextlazymod tst-armtlsdescextnowmod + CPPFLAGS-tst-armtlsdescextnowmod.c += -Dstatic= + CPPFLAGS-tst-armtlsdescextlazymod.c += -Dstatic= +-CFLAGS-tst-armtlsdesclocmod.c += -mtls-dialect=gnu2 +-CFLAGS-tst-armtlsdescextnowmod.c += -mtls-dialect=gnu2 +-CFLAGS-tst-armtlsdescextlazymod.c += -mtls-dialect=gnu2 ++CFLAGS-tst-armtlsdesclocmod.c += -mtls-dialect=$(have-mtls-descriptor) ++CFLAGS-tst-armtlsdescextnowmod.c += -mtls-dialect=$(have-mtls-descriptor) ++CFLAGS-tst-armtlsdescextlazymod.c += -mtls-dialect=$(have-mtls-descriptor) + LDFLAGS-tst-armtlsdescextnowmod.so += -Wl,-z,now + tst-armtlsdescloc-ENV = LD_BIND_NOW=1 + tst-armtlsdescextnow-ENV = LD_BIND_NOW=1 +diff --git a/sysdeps/arm/bits/wordsize.h b/sysdeps/arm/bits/wordsize.h +new file mode 100644 +index 0000000000..6ecbfe7c86 +--- /dev/null ++++ b/sysdeps/arm/bits/wordsize.h +@@ -0,0 +1,21 @@ ++/* Copyright (C) 1999-2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#define __WORDSIZE 32 ++#define __WORDSIZE_TIME64_COMPAT32 1 ++#define __WORDSIZE32_SIZE_ULONG 0 ++#define __WORDSIZE32_PTRDIFF_LONG 0 +diff --git a/sysdeps/arm/configure b/sysdeps/arm/configure +index 35e2918922..4ef4d46cbd 100644 +--- a/sysdeps/arm/configure ++++ b/sysdeps/arm/configure +@@ -187,6 +187,38 @@ else + default-abi = soft" + fi + ++{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether VFP supports 32 registers" >&5 ++printf %s "checking whether VFP supports 32 registers... " >&6; } ++if test ${libc_cv_arm_pcs_vfp_d32+y} ++then : ++ printf %s "(cached) " >&6 ++else $as_nop ++ ++cat confdefs.h - <<_ACEOF >conftest.$ac_ext ++/* end confdefs.h. */ ++ ++void foo (void) ++{ ++ asm volatile ("vldr d16,=17" : : : "d16"); ++} ++ ++_ACEOF ++if ac_fn_c_try_compile "$LINENO" ++then : ++ libc_cv_arm_pcs_vfp_d32=yes ++else $as_nop ++ libc_cv_arm_pcs_vfp_d32=no ++fi ++rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext ++fi ++{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_arm_pcs_vfp_d32" >&5 ++printf "%s\n" "$libc_cv_arm_pcs_vfp_d32" >&6; } ++if test "$libc_cv_arm_pcs_vfp_d32" = yes ; ++then ++ printf "%s\n" "#define HAVE_ARM_PCS_VFP_D32 1" >>confdefs.h ++ ++fi ++ + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether PC-relative relocs in movw/movt work properly" >&5 + printf %s "checking whether PC-relative relocs in movw/movt work properly... " >&6; } + if test ${libc_cv_arm_pcrel_movw+y} +diff --git a/sysdeps/arm/configure.ac b/sysdeps/arm/configure.ac +index 5172e30bbe..cd00ddc9d9 100644 +--- a/sysdeps/arm/configure.ac ++++ b/sysdeps/arm/configure.ac +@@ -21,6 +21,21 @@ else + LIBC_CONFIG_VAR([default-abi], [soft]) + fi + ++AC_CACHE_CHECK([whether VFP supports 32 registers], ++ libc_cv_arm_pcs_vfp_d32, [ ++AC_COMPILE_IFELSE([AC_LANG_SOURCE([[ ++void foo (void) ++{ ++ asm volatile ("vldr d16,=17" : : : "d16"); ++} ++]])], ++ [libc_cv_arm_pcs_vfp_d32=yes], ++ [libc_cv_arm_pcs_vfp_d32=no])]) ++if test "$libc_cv_arm_pcs_vfp_d32" = yes ; ++then ++ AC_DEFINE(HAVE_ARM_PCS_VFP_D32) ++fi ++ + AC_CACHE_CHECK([whether PC-relative relocs in movw/movt work properly], + libc_cv_arm_pcrel_movw, [ + cat > conftest.s <<\EOF +diff --git a/sysdeps/arm/dl-machine.h b/sysdeps/arm/dl-machine.h +index b857bbc868..dd1a0f6b6e 100644 +--- a/sysdeps/arm/dl-machine.h ++++ b/sysdeps/arm/dl-machine.h +@@ -139,7 +139,6 @@ _start:\n\ + _dl_start_user:\n\ + adr r6, .L_GET_GOT\n\ + add sl, sl, r6\n\ +- ldr r4, [sl, r4]\n\ + @ save the entry point in another register\n\ + mov r6, r0\n\ + @ get the original arg count\n\ +diff --git a/sysdeps/arm/dl-tlsdesc.S b/sysdeps/arm/dl-tlsdesc.S +index 764c56e70f..ada106521d 100644 +--- a/sysdeps/arm/dl-tlsdesc.S ++++ b/sysdeps/arm/dl-tlsdesc.S +@@ -19,6 +19,7 @@ + #include + #include + #include ++#include + #include "tlsdesc.h" + + .text +@@ -83,14 +84,20 @@ _dl_tlsdesc_dynamic(struct tlsdesc *tdp) + .align 2 + _dl_tlsdesc_dynamic: + /* Our calling convention is to clobber r0, r1 and the processor +- flags. All others that are modified must be saved */ +- eabi_save ({r2,r3,r4,lr}) +- push {r2,r3,r4,lr} +- cfi_adjust_cfa_offset (16) ++ flags. All others that are modified must be saved. r5 is ++ used as the hwcap value to avoid reload after __tls_get_addr ++ call. If required we will save the vector register on the slow ++ path. */ ++ eabi_save ({r2,r3,r4,r5,ip,lr}) ++ push {r2,r3,r4,r5,ip,lr} ++ cfi_adjust_cfa_offset (24) + cfi_rel_offset (r2,0) + cfi_rel_offset (r3,4) + cfi_rel_offset (r4,8) +- cfi_rel_offset (lr,12) ++ cfi_rel_offset (r5,12) ++ cfi_rel_offset (ip,16) ++ cfi_rel_offset (lr,20) ++ + ldr r1, [r0] /* td */ + GET_TLS (lr) + mov r4, r0 /* r4 = tp */ +@@ -113,22 +120,69 @@ _dl_tlsdesc_dynamic: + rsbne r0, r4, r3 + bne 2f + 1: mov r0, r1 ++ ++ /* Load the hwcap to check for vector support. */ ++ ldr r2, 3f ++ ldr r1, .Lrtld_global_ro ++0: add r2, pc, r2 ++ ldr r2, [r2, r1] ++ ldr r5, [r2, #RTLD_GLOBAL_RO_DL_HWCAP_OFFSET] ++ ++#ifdef __SOFTFP__ ++ tst r5, #HWCAP_ARM_VFP ++ beq .Lno_vfp ++#endif ++ ++ /* Store the VFP registers. Don't use VFP instructions directly ++ because this code is used in non-VFP multilibs. */ ++#define VFP_STACK_REQ (32*8 + 8) ++ sub sp, sp, VFP_STACK_REQ ++ cfi_adjust_cfa_offset (VFP_STACK_REQ) ++ mov r3, sp ++ .inst 0xeca30b20 /* vstmia r3!, {d0-d15} */ ++ tst r5, #HWCAP_ARM_VFPD32 ++ beq 4f ++ .inst 0xece30b20 /* vstmia r3!, {d16-d31} */ ++ /* Store the floating-point status register. */ ++4: .inst 0xeef12a10 /* vmrs r2, fpscr */ ++ str r2, [r3] ++.Lno_vfp: + bl __tls_get_addr + rsb r0, r4, r0 ++#ifdef __SOFTFP__ ++ tst r5, #HWCAP_ARM_VFP ++ beq 2f ++#endif ++ mov r3, sp ++ .inst 0xecb30b20 /* vldmia r3!, {d0-d15} */ ++ tst r5, #HWCAP_ARM_VFPD32 ++ beq 5f ++ .inst 0xecf30b20 /* vldmia r3!, {d16-d31} */ ++ ldr r4, [r3] ++5: .inst 0xeee14a10 /* vmsr fpscr, r4 */ ++ add sp, sp, VFP_STACK_REQ ++ cfi_adjust_cfa_offset (-VFP_STACK_REQ) ++ + 2: + #if ((defined (__ARM_ARCH_4T__) && defined (__THUMB_INTERWORK__)) \ + || defined (ARM_ALWAYS_BX)) +- pop {r2,r3,r4, lr} +- cfi_adjust_cfa_offset (-16) ++ pop {r2,r3,r4,r5,ip, lr} ++ cfi_adjust_cfa_offset (-20) + cfi_restore (lr) ++ cfi_restore (ip) ++ cfi_restore (r5) + cfi_restore (r4) + cfi_restore (r3) + cfi_restore (r2) + bx lr + #else +- pop {r2,r3,r4, pc} ++ pop {r2,r3,r4,r5,ip, pc} + #endif + eabi_fnend + cfi_endproc + .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic ++ ++3: .long _GLOBAL_OFFSET_TABLE_ - 0b - PC_OFS ++.Lrtld_global_ro: ++ .long C_SYMBOL_NAME(_rtld_global_ro)(GOT) + #endif /* SHARED */ +diff --git a/sysdeps/arm/tst-gnu2-tls2.h b/sysdeps/arm/tst-gnu2-tls2.h +new file mode 100644 +index 0000000000..e413ac21fb +--- /dev/null ++++ b/sysdeps/arm/tst-gnu2-tls2.h +@@ -0,0 +1,128 @@ ++/* Test TLSDESC relocation. ARM version. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++#include ++ ++#ifndef __SOFTFP__ ++ ++# ifdef HAVE_ARM_PCS_VFP_D32 ++# define SAVE_VFP_D32 \ ++ asm volatile ("vldr d16,=17" : : : "d16"); \ ++ asm volatile ("vldr d17,=18" : : : "d17"); \ ++ asm volatile ("vldr d18,=19" : : : "d18"); \ ++ asm volatile ("vldr d19,=20" : : : "d19"); \ ++ asm volatile ("vldr d20,=21" : : : "d20"); \ ++ asm volatile ("vldr d21,=22" : : : "d21"); \ ++ asm volatile ("vldr d22,=23" : : : "d22"); \ ++ asm volatile ("vldr d23,=24" : : : "d23"); \ ++ asm volatile ("vldr d24,=25" : : : "d24"); \ ++ asm volatile ("vldr d25,=26" : : : "d25"); \ ++ asm volatile ("vldr d26,=27" : : : "d26"); \ ++ asm volatile ("vldr d27,=28" : : : "d27"); \ ++ asm volatile ("vldr d28,=29" : : : "d28"); \ ++ asm volatile ("vldr d29,=30" : : : "d29"); \ ++ asm volatile ("vldr d30,=31" : : : "d30"); \ ++ asm volatile ("vldr d31,=32" : : : "d31"); ++# else ++# define SAVE_VFP_D32 ++# endif ++ ++# define INIT_TLSDESC_CALL() \ ++ unsigned long hwcap = getauxval (AT_HWCAP) ++ ++/* Set each vector register to a value from 1 to 32 before the TLS access, ++ dump to memory after TLS access, and compare with the expected values. */ ++ ++# define BEFORE_TLSDESC_CALL() \ ++ if (hwcap & HWCAP_ARM_VFP) \ ++ { \ ++ asm volatile ("vldr d0,=1" : : : "d0"); \ ++ asm volatile ("vldr d1,=2" : : : "d1"); \ ++ asm volatile ("vldr d2,=3" : : : "d1"); \ ++ asm volatile ("vldr d3,=4" : : : "d3"); \ ++ asm volatile ("vldr d4,=5" : : : "d4"); \ ++ asm volatile ("vldr d5,=6" : : : "d5"); \ ++ asm volatile ("vldr d6,=7" : : : "d6"); \ ++ asm volatile ("vldr d7,=8" : : : "d7"); \ ++ asm volatile ("vldr d8,=9" : : : "d8"); \ ++ asm volatile ("vldr d9,=10" : : : "d9"); \ ++ asm volatile ("vldr d10,=11" : : : "d10"); \ ++ asm volatile ("vldr d11,=12" : : : "d11"); \ ++ asm volatile ("vldr d12,=13" : : : "d12"); \ ++ asm volatile ("vldr d13,=14" : : : "d13"); \ ++ asm volatile ("vldr d14,=15" : : : "d14"); \ ++ asm volatile ("vldr d15,=16" : : : "d15"); \ ++ } \ ++ if (hwcap & HWCAP_ARM_VFPD32) \ ++ { \ ++ SAVE_VFP_D32 \ ++ } ++ ++# define VFP_STACK_REQ (16*8) ++# if __BYTE_ORDER == __BIG_ENDIAN ++# define DISP 7 ++# else ++# define DISP 0 ++# endif ++ ++# ifdef HAVE_ARM_PCS_VFP_D32 ++# define CHECK_VFP_D32 \ ++ char vfp[VFP_STACK_REQ]; \ ++ asm volatile ("vstmia %0, {d16-d31}\n" \ ++ : \ ++ : "r" (vfp) \ ++ : "memory"); \ ++ \ ++ char expected[VFP_STACK_REQ] = { 0 }; \ ++ for (int i = 0; i < 16; ++i) \ ++ expected[i * 8 + DISP] = i + 17; \ ++ \ ++ if (memcmp (vfp, expected, VFP_STACK_REQ) != 0) \ ++ abort (); ++# else ++# define CHECK_VFP_D32 ++# endif ++ ++# define AFTER_TLSDESC_CALL() \ ++ if (hwcap & HWCAP_ARM_VFP) \ ++ { \ ++ char vfp[VFP_STACK_REQ]; \ ++ asm volatile ("vstmia %0, {d0-d15}\n" \ ++ : \ ++ : "r" (vfp) \ ++ : "memory"); \ ++ \ ++ char expected[VFP_STACK_REQ] = { 0 }; \ ++ for (int i = 0; i < 16; ++i) \ ++ expected[i * 8 + DISP] = i + 1; \ ++ \ ++ if (memcmp (vfp, expected, VFP_STACK_REQ) != 0) \ ++ abort (); \ ++ } \ ++ if (hwcap & HWCAP_ARM_VFPD32) \ ++ { \ ++ CHECK_VFP_D32 \ ++ } ++ ++#endif /* __SOFTFP__ */ ++ ++#include_next +diff --git a/sysdeps/arm/utmp-size.h b/sysdeps/arm/utmp-size.h +new file mode 100644 +index 0000000000..8f21ebe1b6 +--- /dev/null ++++ b/sysdeps/arm/utmp-size.h +@@ -0,0 +1,2 @@ ++#define UTMP_SIZE 384 ++#define LASTLOG_SIZE 292 +diff --git a/sysdeps/csky/bits/wordsize.h b/sysdeps/csky/bits/wordsize.h +new file mode 100644 +index 0000000000..6ecbfe7c86 +--- /dev/null ++++ b/sysdeps/csky/bits/wordsize.h +@@ -0,0 +1,21 @@ ++/* Copyright (C) 1999-2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#define __WORDSIZE 32 ++#define __WORDSIZE_TIME64_COMPAT32 1 ++#define __WORDSIZE32_SIZE_ULONG 0 ++#define __WORDSIZE32_PTRDIFF_LONG 0 +diff --git a/sysdeps/csky/utmp-size.h b/sysdeps/csky/utmp-size.h +new file mode 100644 +index 0000000000..8f21ebe1b6 +--- /dev/null ++++ b/sysdeps/csky/utmp-size.h +@@ -0,0 +1,2 @@ ++#define UTMP_SIZE 384 ++#define LASTLOG_SIZE 292 +diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h +index 117c901ccc..50f58a60e3 100644 +--- a/sysdeps/generic/ldsodefs.h ++++ b/sysdeps/generic/ldsodefs.h +@@ -646,6 +646,8 @@ struct rtld_global_ro + /* Mask for more hardware capabilities that are available on some + platforms. */ + EXTERN uint64_t _dl_hwcap2; ++ EXTERN uint64_t _dl_hwcap3; ++ EXTERN uint64_t _dl_hwcap4; + + EXTERN enum dso_sort_algorithm _dl_dso_sort_algo; + +diff --git a/sysdeps/generic/utmp-size.h b/sysdeps/generic/utmp-size.h +new file mode 100644 +index 0000000000..89dbe878b0 +--- /dev/null ++++ b/sysdeps/generic/utmp-size.h +@@ -0,0 +1,23 @@ ++/* Expected sizes of utmp-related structures stored in files. 64-bit version. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++/* Expected size, in bytes, of struct utmp and struct utmpx. */ ++#define UTMP_SIZE 400 ++ ++/* Expected size, in bytes, of struct lastlog. */ ++#define LASTLOG_SIZE 296 +diff --git a/sysdeps/hppa/utmp-size.h b/sysdeps/hppa/utmp-size.h +new file mode 100644 +index 0000000000..8f21ebe1b6 +--- /dev/null ++++ b/sysdeps/hppa/utmp-size.h +@@ -0,0 +1,2 @@ ++#define UTMP_SIZE 384 ++#define LASTLOG_SIZE 292 +diff --git a/sysdeps/i386/dl-machine.h b/sysdeps/i386/dl-machine.h +index fc1ef96587..50d74fe6e9 100644 +--- a/sysdeps/i386/dl-machine.h ++++ b/sysdeps/i386/dl-machine.h +@@ -347,7 +347,7 @@ and creates an unsatisfiable circular dependency.\n", + { + td->arg = _dl_make_tlsdesc_dynamic + (sym_map, sym->st_value + (ElfW(Word))td->arg); +- td->entry = _dl_tlsdesc_dynamic; ++ td->entry = GLRO(dl_x86_tlsdesc_dynamic); + } + else + # endif +diff --git a/sysdeps/i386/dl-tlsdesc-dynamic.h b/sysdeps/i386/dl-tlsdesc-dynamic.h +new file mode 100644 +index 0000000000..3627028577 +--- /dev/null ++++ b/sysdeps/i386/dl-tlsdesc-dynamic.h +@@ -0,0 +1,190 @@ ++/* Thread-local storage handling in the ELF dynamic linker. i386 version. ++ Copyright (C) 2004-2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#undef REGISTER_SAVE_AREA ++ ++#if !defined USE_FNSAVE && (STATE_SAVE_ALIGNMENT % 16) != 0 ++# error STATE_SAVE_ALIGNMENT must be multiple of 16 ++#endif ++ ++#if DL_RUNTIME_RESOLVE_REALIGN_STACK ++# ifdef USE_FNSAVE ++# error USE_FNSAVE shouldn't be defined ++# endif ++# ifdef USE_FXSAVE ++/* Use fxsave to save all registers. */ ++# define REGISTER_SAVE_AREA 512 ++# endif ++#else ++# ifdef USE_FNSAVE ++/* Use fnsave to save x87 FPU stack registers. */ ++# define REGISTER_SAVE_AREA 108 ++# else ++# ifndef USE_FXSAVE ++# error USE_FXSAVE must be defined ++# endif ++/* Use fxsave to save all registers. Add 12 bytes to align the stack ++ to 16 bytes. */ ++# define REGISTER_SAVE_AREA (512 + 12) ++# endif ++#endif ++ ++ .hidden _dl_tlsdesc_dynamic ++ .global _dl_tlsdesc_dynamic ++ .type _dl_tlsdesc_dynamic,@function ++ ++ /* This function is used for symbols that need dynamic TLS. ++ ++ %eax points to the TLS descriptor, such that 0(%eax) points to ++ _dl_tlsdesc_dynamic itself, and 4(%eax) points to a struct ++ tlsdesc_dynamic_arg object. It must return in %eax the offset ++ between the thread pointer and the object denoted by the ++ argument, without clobbering any registers. ++ ++ The assembly code that follows is a rendition of the following ++ C code, hand-optimized a little bit. ++ ++ptrdiff_t ++__attribute__ ((__regparm__ (1))) ++_dl_tlsdesc_dynamic (struct tlsdesc *tdp) ++{ ++ struct tlsdesc_dynamic_arg *td = tdp->arg; ++ dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + DTV_OFFSET); ++ if (__builtin_expect (td->gen_count <= dtv[0].counter ++ && (dtv[td->tlsinfo.ti_module].pointer.val ++ != TLS_DTV_UNALLOCATED), ++ 1)) ++ return dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset ++ - __thread_pointer; ++ ++ return ___tls_get_addr (&td->tlsinfo) - __thread_pointer; ++} ++*/ ++ cfi_startproc ++ .align 16 ++_dl_tlsdesc_dynamic: ++ /* Like all TLS resolvers, preserve call-clobbered registers. ++ We need two scratch regs anyway. */ ++ subl $32, %esp ++ cfi_adjust_cfa_offset (32) ++ movl %ecx, 20(%esp) ++ movl %edx, 24(%esp) ++ movl TLSDESC_ARG(%eax), %eax ++ movl %gs:DTV_OFFSET, %edx ++ movl TLSDESC_GEN_COUNT(%eax), %ecx ++ cmpl (%edx), %ecx ++ ja 2f ++ movl TLSDESC_MODID(%eax), %ecx ++ movl (%edx,%ecx,8), %edx ++ cmpl $-1, %edx ++ je 2f ++ movl TLSDESC_MODOFF(%eax), %eax ++ addl %edx, %eax ++1: ++ movl 20(%esp), %ecx ++ subl %gs:0, %eax ++ movl 24(%esp), %edx ++ addl $32, %esp ++ cfi_adjust_cfa_offset (-32) ++ ret ++ .p2align 4,,7 ++2: ++ cfi_adjust_cfa_offset (32) ++#if DL_RUNTIME_RESOLVE_REALIGN_STACK ++ movl %ebx, -28(%esp) ++ movl %esp, %ebx ++ cfi_def_cfa_register(%ebx) ++ and $-STATE_SAVE_ALIGNMENT, %esp ++#endif ++#ifdef REGISTER_SAVE_AREA ++ subl $REGISTER_SAVE_AREA, %esp ++# if !DL_RUNTIME_RESOLVE_REALIGN_STACK ++ cfi_adjust_cfa_offset(REGISTER_SAVE_AREA) ++# endif ++#else ++# if !DL_RUNTIME_RESOLVE_REALIGN_STACK ++# error DL_RUNTIME_RESOLVE_REALIGN_STACK must be true ++# endif ++ /* Allocate stack space of the required size to save the state. */ ++ LOAD_PIC_REG (cx) ++ subl RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_SIZE_OFFSET+_rtld_local_ro@GOTOFF(%ecx), %esp ++#endif ++#ifdef USE_FNSAVE ++ fnsave (%esp) ++#elif defined USE_FXSAVE ++ fxsave (%esp) ++#else ++ /* Save the argument for ___tls_get_addr in EAX. */ ++ movl %eax, %ecx ++ movl $TLSDESC_CALL_STATE_SAVE_MASK, %eax ++ xorl %edx, %edx ++ /* Clear the XSAVE Header. */ ++# ifdef USE_XSAVE ++ movl %edx, (512)(%esp) ++ movl %edx, (512 + 4 * 1)(%esp) ++ movl %edx, (512 + 4 * 2)(%esp) ++ movl %edx, (512 + 4 * 3)(%esp) ++# endif ++ movl %edx, (512 + 4 * 4)(%esp) ++ movl %edx, (512 + 4 * 5)(%esp) ++ movl %edx, (512 + 4 * 6)(%esp) ++ movl %edx, (512 + 4 * 7)(%esp) ++ movl %edx, (512 + 4 * 8)(%esp) ++ movl %edx, (512 + 4 * 9)(%esp) ++ movl %edx, (512 + 4 * 10)(%esp) ++ movl %edx, (512 + 4 * 11)(%esp) ++ movl %edx, (512 + 4 * 12)(%esp) ++ movl %edx, (512 + 4 * 13)(%esp) ++ movl %edx, (512 + 4 * 14)(%esp) ++ movl %edx, (512 + 4 * 15)(%esp) ++# ifdef USE_XSAVE ++ xsave (%esp) ++# else ++ xsavec (%esp) ++# endif ++ /* Restore the argument for ___tls_get_addr in EAX. */ ++ movl %ecx, %eax ++#endif ++ call HIDDEN_JUMPTARGET (___tls_get_addr) ++ /* Get register content back. */ ++#ifdef USE_FNSAVE ++ frstor (%esp) ++#elif defined USE_FXSAVE ++ fxrstor (%esp) ++#else ++ /* Save and retore ___tls_get_addr return value stored in EAX. */ ++ movl %eax, %ecx ++ movl $TLSDESC_CALL_STATE_SAVE_MASK, %eax ++ xorl %edx, %edx ++ xrstor (%esp) ++ movl %ecx, %eax ++#endif ++#if DL_RUNTIME_RESOLVE_REALIGN_STACK ++ mov %ebx, %esp ++ cfi_def_cfa_register(%esp) ++ movl -28(%esp), %ebx ++ cfi_restore(%ebx) ++#else ++ addl $REGISTER_SAVE_AREA, %esp ++ cfi_adjust_cfa_offset(-REGISTER_SAVE_AREA) ++#endif ++ jmp 1b ++ cfi_endproc ++ .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic ++ ++#undef STATE_SAVE_ALIGNMENT +diff --git a/sysdeps/i386/dl-tlsdesc.S b/sysdeps/i386/dl-tlsdesc.S +index 90d93caa0c..f002feee56 100644 +--- a/sysdeps/i386/dl-tlsdesc.S ++++ b/sysdeps/i386/dl-tlsdesc.S +@@ -18,8 +18,27 @@ + + #include + #include ++#include ++#include + #include "tlsdesc.h" + ++#ifndef DL_STACK_ALIGNMENT ++/* Due to GCC bug: ++ ++ https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066 ++ ++ __tls_get_addr may be called with 4-byte stack alignment. Although ++ this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't assume ++ that stack will be always aligned at 16 bytes. */ ++# define DL_STACK_ALIGNMENT 4 ++#endif ++ ++/* True if _dl_tlsdesc_dynamic should align stack for STATE_SAVE or align ++ stack to MINIMUM_ALIGNMENT bytes before calling ___tls_get_addr. */ ++#define DL_RUNTIME_RESOLVE_REALIGN_STACK \ ++ (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \ ++ || MINIMUM_ALIGNMENT > DL_STACK_ALIGNMENT) ++ + .text + + /* This function is used to compute the TP offset for symbols in +@@ -65,69 +84,35 @@ _dl_tlsdesc_undefweak: + .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak + + #ifdef SHARED +- .hidden _dl_tlsdesc_dynamic +- .global _dl_tlsdesc_dynamic +- .type _dl_tlsdesc_dynamic,@function +- +- /* This function is used for symbols that need dynamic TLS. +- +- %eax points to the TLS descriptor, such that 0(%eax) points to +- _dl_tlsdesc_dynamic itself, and 4(%eax) points to a struct +- tlsdesc_dynamic_arg object. It must return in %eax the offset +- between the thread pointer and the object denoted by the +- argument, without clobbering any registers. +- +- The assembly code that follows is a rendition of the following +- C code, hand-optimized a little bit. +- +-ptrdiff_t +-__attribute__ ((__regparm__ (1))) +-_dl_tlsdesc_dynamic (struct tlsdesc *tdp) +-{ +- struct tlsdesc_dynamic_arg *td = tdp->arg; +- dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + DTV_OFFSET); +- if (__builtin_expect (td->gen_count <= dtv[0].counter +- && (dtv[td->tlsinfo.ti_module].pointer.val +- != TLS_DTV_UNALLOCATED), +- 1)) +- return dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset +- - __thread_pointer; +- +- return ___tls_get_addr (&td->tlsinfo) - __thread_pointer; +-} +-*/ +- cfi_startproc +- .align 16 +-_dl_tlsdesc_dynamic: +- /* Like all TLS resolvers, preserve call-clobbered registers. +- We need two scratch regs anyway. */ +- subl $28, %esp +- cfi_adjust_cfa_offset (28) +- movl %ecx, 20(%esp) +- movl %edx, 24(%esp) +- movl TLSDESC_ARG(%eax), %eax +- movl %gs:DTV_OFFSET, %edx +- movl TLSDESC_GEN_COUNT(%eax), %ecx +- cmpl (%edx), %ecx +- ja .Lslow +- movl TLSDESC_MODID(%eax), %ecx +- movl (%edx,%ecx,8), %edx +- cmpl $-1, %edx +- je .Lslow +- movl TLSDESC_MODOFF(%eax), %eax +- addl %edx, %eax +-.Lret: +- movl 20(%esp), %ecx +- subl %gs:0, %eax +- movl 24(%esp), %edx +- addl $28, %esp +- cfi_adjust_cfa_offset (-28) +- ret +- .p2align 4,,7 +-.Lslow: +- cfi_adjust_cfa_offset (28) +- call HIDDEN_JUMPTARGET (___tls_get_addr) +- jmp .Lret +- cfi_endproc +- .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic ++# define USE_FNSAVE ++# define MINIMUM_ALIGNMENT 4 ++# define STATE_SAVE_ALIGNMENT 4 ++# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_fnsave ++# include "dl-tlsdesc-dynamic.h" ++# undef _dl_tlsdesc_dynamic ++# undef MINIMUM_ALIGNMENT ++# undef USE_FNSAVE ++ ++# define MINIMUM_ALIGNMENT 16 ++ ++# define USE_FXSAVE ++# define STATE_SAVE_ALIGNMENT 16 ++# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_fxsave ++# include "dl-tlsdesc-dynamic.h" ++# undef _dl_tlsdesc_dynamic ++# undef USE_FXSAVE ++ ++# define USE_XSAVE ++# define STATE_SAVE_ALIGNMENT 64 ++# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_xsave ++# include "dl-tlsdesc-dynamic.h" ++# undef _dl_tlsdesc_dynamic ++# undef USE_XSAVE ++ ++# define USE_XSAVEC ++# define STATE_SAVE_ALIGNMENT 64 ++# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_xsavec ++# include "dl-tlsdesc-dynamic.h" ++# undef _dl_tlsdesc_dynamic ++# undef USE_XSAVEC + #endif /* SHARED */ +diff --git a/sysdeps/i386/fpu/libm-test-ulps b/sysdeps/i386/fpu/libm-test-ulps +index 84e6686eba..f2139fc172 100644 +--- a/sysdeps/i386/fpu/libm-test-ulps ++++ b/sysdeps/i386/fpu/libm-test-ulps +@@ -1232,6 +1232,7 @@ ldouble: 6 + + Function: "hypot": + double: 1 ++float: 1 + float128: 1 + ldouble: 1 + +diff --git a/sysdeps/i386/i586/memcpy.S b/sysdeps/i386/i586/memcpy.S +index 3e26f112d6..79856d498a 100644 +--- a/sysdeps/i386/i586/memcpy.S ++++ b/sysdeps/i386/i586/memcpy.S +@@ -26,7 +26,7 @@ + #define LEN SRC+4 + + .text +-#if defined PIC && IS_IN (libc) ++#if defined SHARED && IS_IN (libc) + ENTRY (__memcpy_chk) + movl 12(%esp), %eax + cmpl %eax, 16(%esp) +diff --git a/sysdeps/i386/i686/memmove.S b/sysdeps/i386/i686/memmove.S +index f230359ad6..effd958120 100644 +--- a/sysdeps/i386/i686/memmove.S ++++ b/sysdeps/i386/i686/memmove.S +@@ -29,7 +29,7 @@ + #define SRC DEST+4 + #define LEN SRC+4 + +-#if defined PIC && IS_IN (libc) ++#if defined SHARED && IS_IN (libc) + ENTRY_CHK (__memmove_chk) + movl 12(%esp), %eax + cmpl %eax, 16(%esp) +diff --git a/sysdeps/i386/i686/memset.S b/sysdeps/i386/i686/memset.S +index f02f5a6df7..ab06771ea0 100644 +--- a/sysdeps/i386/i686/memset.S ++++ b/sysdeps/i386/i686/memset.S +@@ -27,7 +27,7 @@ + #define LEN CHR+4 + + .text +-#if defined PIC && IS_IN (libc) ++#if defined SHARED && IS_IN (libc) + ENTRY_CHK (__memset_chk) + movl 12(%esp), %eax + cmpl %eax, 16(%esp) +diff --git a/sysdeps/i386/i686/multiarch/memrchr-c.c b/sysdeps/i386/i686/multiarch/memrchr-c.c +index ef7bbbe792..20bfdf3af3 100644 +--- a/sysdeps/i386/i686/multiarch/memrchr-c.c ++++ b/sysdeps/i386/i686/multiarch/memrchr-c.c +@@ -5,3 +5,4 @@ extern void *__memrchr_ia32 (const void *, int, size_t); + #endif + + #include "string/memrchr.c" ++strong_alias (__memrchr_ia32, __GI___memrchr) +diff --git a/sysdeps/i386/i686/multiarch/memrchr-sse2.S b/sysdeps/i386/i686/multiarch/memrchr-sse2.S +index d9dae04171..e123f87435 100644 +--- a/sysdeps/i386/i686/multiarch/memrchr-sse2.S ++++ b/sysdeps/i386/i686/multiarch/memrchr-sse2.S +@@ -720,5 +720,4 @@ L(ret_null): + ret + + END (__memrchr_sse2) +-strong_alias (__memrchr_sse2, __GI___memrchr) + #endif +diff --git a/sysdeps/loongarch/fpu/e_scalbf.c b/sysdeps/loongarch/fpu/e_scalbf.c +index 9f05485236..7c0395fbb5 100644 +--- a/sysdeps/loongarch/fpu/e_scalbf.c ++++ b/sysdeps/loongarch/fpu/e_scalbf.c +@@ -57,4 +57,4 @@ __ieee754_scalbf (float x, float fn) + + return x; + } +-libm_alias_finite (__ieee754_scalb, __scalb) ++libm_alias_finite (__ieee754_scalbf, __scalbf) +diff --git a/sysdeps/loongarch/lp64/multiarch/Makefile b/sysdeps/loongarch/lp64/multiarch/Makefile +index fe863e1ba4..01762ef526 100644 +--- a/sysdeps/loongarch/lp64/multiarch/Makefile ++++ b/sysdeps/loongarch/lp64/multiarch/Makefile +@@ -1,52 +1,52 @@ + ifeq ($(subdir),string) + sysdep_routines += \ +- strlen-aligned \ +- strlen-lsx \ +- strlen-lasx \ +- strnlen-aligned \ +- strnlen-lsx \ +- strnlen-lasx \ ++ memchr-aligned \ ++ memchr-lasx \ ++ memchr-lsx \ ++ memcmp-aligned \ ++ memcmp-lasx \ ++ memcmp-lsx \ ++ memcpy-aligned \ ++ memcpy-unaligned \ ++ memmove-lasx \ ++ memmove-lsx \ ++ memmove-unaligned \ ++ memrchr-generic \ ++ memrchr-lasx \ ++ memrchr-lsx \ ++ memset-aligned \ ++ memset-lasx \ ++ memset-lsx \ ++ memset-unaligned \ ++ rawmemchr-aligned \ ++ rawmemchr-lasx \ ++ rawmemchr-lsx \ ++ stpcpy-aligned \ ++ stpcpy-lasx \ ++ stpcpy-lsx \ ++ stpcpy-unaligned \ + strchr-aligned \ +- strchr-lsx \ + strchr-lasx \ +- strrchr-aligned \ +- strrchr-lsx \ +- strrchr-lasx \ ++ strchr-lsx \ + strchrnul-aligned \ +- strchrnul-lsx \ + strchrnul-lasx \ ++ strchrnul-lsx \ + strcmp-aligned \ + strcmp-lsx \ +- strncmp-aligned \ +- strncmp-lsx \ + strcpy-aligned \ +- strcpy-unaligned \ +- strcpy-lsx \ + strcpy-lasx \ +- stpcpy-aligned \ +- stpcpy-unaligned \ +- stpcpy-lsx \ +- stpcpy-lasx \ +- memcpy-aligned \ +- memcpy-unaligned \ +- memmove-unaligned \ +- memmove-lsx \ +- memmove-lasx \ +- rawmemchr-aligned \ +- rawmemchr-lsx \ +- rawmemchr-lasx \ +- memchr-aligned \ +- memchr-lsx \ +- memchr-lasx \ +- memrchr-generic \ +- memrchr-lsx \ +- memrchr-lasx \ +- memset-aligned \ +- memset-unaligned \ +- memset-lsx \ +- memset-lasx \ +- memcmp-aligned \ +- memcmp-lsx \ +- memcmp-lasx \ ++ strcpy-lsx \ ++ strcpy-unaligned \ ++ strlen-aligned \ ++ strlen-lasx \ ++ strlen-lsx \ ++ strncmp-aligned \ ++ strncmp-lsx \ ++ strnlen-aligned \ ++ strnlen-lasx \ ++ strnlen-lsx \ ++ strrchr-aligned \ ++ strrchr-lasx \ ++ strrchr-lsx \ + # sysdep_routines + endif +diff --git a/sysdeps/loongarch/lp64/multiarch/dl-symbol-redir-ifunc.h b/sysdeps/loongarch/lp64/multiarch/dl-symbol-redir-ifunc.h +index cb640d77b7..a73390b12f 100644 +--- a/sysdeps/loongarch/lp64/multiarch/dl-symbol-redir-ifunc.h ++++ b/sysdeps/loongarch/lp64/multiarch/dl-symbol-redir-ifunc.h +@@ -19,6 +19,9 @@ + #ifndef _DL_IFUNC_GENERIC_H + #define _DL_IFUNC_GENERIC_H + ++#ifndef SHARED + asm ("memset = __memset_aligned"); ++asm ("memcmp = __memcmp_aligned"); ++#endif + + #endif +diff --git a/sysdeps/m68k/bits/wordsize.h b/sysdeps/m68k/bits/wordsize.h +new file mode 100644 +index 0000000000..6ecbfe7c86 +--- /dev/null ++++ b/sysdeps/m68k/bits/wordsize.h +@@ -0,0 +1,21 @@ ++/* Copyright (C) 1999-2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#define __WORDSIZE 32 ++#define __WORDSIZE_TIME64_COMPAT32 1 ++#define __WORDSIZE32_SIZE_ULONG 0 ++#define __WORDSIZE32_PTRDIFF_LONG 0 +diff --git a/sysdeps/m68k/utmp-size.h b/sysdeps/m68k/utmp-size.h +new file mode 100644 +index 0000000000..5946685819 +--- /dev/null ++++ b/sysdeps/m68k/utmp-size.h +@@ -0,0 +1,3 @@ ++/* m68k has 2-byte alignment. */ ++#define UTMP_SIZE 382 ++#define LASTLOG_SIZE 292 +diff --git a/sysdeps/microblaze/bits/wordsize.h b/sysdeps/microblaze/bits/wordsize.h +new file mode 100644 +index 0000000000..6ecbfe7c86 +--- /dev/null ++++ b/sysdeps/microblaze/bits/wordsize.h +@@ -0,0 +1,21 @@ ++/* Copyright (C) 1999-2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#define __WORDSIZE 32 ++#define __WORDSIZE_TIME64_COMPAT32 1 ++#define __WORDSIZE32_SIZE_ULONG 0 ++#define __WORDSIZE32_PTRDIFF_LONG 0 +diff --git a/sysdeps/microblaze/utmp-size.h b/sysdeps/microblaze/utmp-size.h +new file mode 100644 +index 0000000000..8f21ebe1b6 +--- /dev/null ++++ b/sysdeps/microblaze/utmp-size.h +@@ -0,0 +1,2 @@ ++#define UTMP_SIZE 384 ++#define LASTLOG_SIZE 292 +diff --git a/sysdeps/mips/bits/wordsize.h b/sysdeps/mips/bits/wordsize.h +index 57f0f2a22f..30dd3fd85d 100644 +--- a/sysdeps/mips/bits/wordsize.h ++++ b/sysdeps/mips/bits/wordsize.h +@@ -19,11 +19,7 @@ + + #define __WORDSIZE _MIPS_SZPTR + +-#if _MIPS_SIM == _ABI64 +-# define __WORDSIZE_TIME64_COMPAT32 1 +-#else +-# define __WORDSIZE_TIME64_COMPAT32 0 +-#endif ++#define __WORDSIZE_TIME64_COMPAT32 1 + + #if __WORDSIZE == 32 + #define __WORDSIZE32_SIZE_ULONG 0 +diff --git a/sysdeps/mips/mips64/libm-test-ulps b/sysdeps/mips/mips64/libm-test-ulps +index 78969745b2..933aba4735 100644 +--- a/sysdeps/mips/mips64/libm-test-ulps ++++ b/sysdeps/mips/mips64/libm-test-ulps +@@ -1066,17 +1066,17 @@ double: 1 + ldouble: 1 + + Function: "j0": +-double: 2 ++double: 3 + float: 9 + ldouble: 2 + + Function: "j0_downward": +-double: 5 ++double: 6 + float: 9 + ldouble: 9 + + Function: "j0_towardzero": +-double: 6 ++double: 7 + float: 9 + ldouble: 9 + +@@ -1146,6 +1146,7 @@ float: 6 + ldouble: 8 + + Function: "log": ++double: 1 + float: 1 + ldouble: 1 + +diff --git a/sysdeps/mips/utmp-size.h b/sysdeps/mips/utmp-size.h +new file mode 100644 +index 0000000000..8f21ebe1b6 +--- /dev/null ++++ b/sysdeps/mips/utmp-size.h +@@ -0,0 +1,2 @@ ++#define UTMP_SIZE 384 ++#define LASTLOG_SIZE 292 +diff --git a/sysdeps/nios2/bits/wordsize.h b/sysdeps/nios2/bits/wordsize.h +new file mode 100644 +index 0000000000..6ecbfe7c86 +--- /dev/null ++++ b/sysdeps/nios2/bits/wordsize.h +@@ -0,0 +1,21 @@ ++/* Copyright (C) 1999-2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#define __WORDSIZE 32 ++#define __WORDSIZE_TIME64_COMPAT32 1 ++#define __WORDSIZE32_SIZE_ULONG 0 ++#define __WORDSIZE32_PTRDIFF_LONG 0 +diff --git a/sysdeps/nios2/utmp-size.h b/sysdeps/nios2/utmp-size.h +new file mode 100644 +index 0000000000..8f21ebe1b6 +--- /dev/null ++++ b/sysdeps/nios2/utmp-size.h +@@ -0,0 +1,2 @@ ++#define UTMP_SIZE 384 ++#define LASTLOG_SIZE 292 +diff --git a/sysdeps/or1k/utmp-size.h b/sysdeps/or1k/utmp-size.h +new file mode 100644 +index 0000000000..6b3653aa4d +--- /dev/null ++++ b/sysdeps/or1k/utmp-size.h +@@ -0,0 +1,3 @@ ++/* or1k has less padding than other architectures with 64-bit time_t. */ ++#define UTMP_SIZE 392 ++#define LASTLOG_SIZE 296 +diff --git a/sysdeps/powerpc/dl-procinfo.c b/sysdeps/powerpc/dl-procinfo.c +index a76bb6e5b0..8cf00aa7e3 100644 +--- a/sysdeps/powerpc/dl-procinfo.c ++++ b/sysdeps/powerpc/dl-procinfo.c +@@ -38,6 +38,10 @@ + needed. + */ + ++/* The total number of available bits (including those prior to ++ _DL_HWCAP_FIRST). Some of these bits might not be used. */ ++#define _DL_HWCAP_COUNT 128 ++ + #ifndef PROCINFO_CLASS + # define PROCINFO_CLASS + #endif +@@ -61,7 +65,7 @@ PROCINFO_CLASS struct cpu_features _dl_powerpc_cpu_features + #if !defined PROCINFO_DECL && defined SHARED + ._dl_powerpc_cap_flags + #else +-PROCINFO_CLASS const char _dl_powerpc_cap_flags[64][15] ++PROCINFO_CLASS const char _dl_powerpc_cap_flags[_DL_HWCAP_COUNT][15] + #endif + #ifndef PROCINFO_DECL + = { +diff --git a/sysdeps/powerpc/dl-procinfo.h b/sysdeps/powerpc/dl-procinfo.h +index 68f4241095..b36697ba44 100644 +--- a/sysdeps/powerpc/dl-procinfo.h ++++ b/sysdeps/powerpc/dl-procinfo.h +@@ -22,22 +22,23 @@ + #include + #include /* This defines the PPC_FEATURE[2]_* macros. */ + +-/* The total number of available bits (including those prior to +- _DL_HWCAP_FIRST). Some of these bits might not be used. */ +-#define _DL_HWCAP_COUNT 64 ++/* Feature masks are all 32-bits in size. */ ++#define _DL_HWCAP_SIZE 32 + +-/* Features started at bit 31 and decremented as new features were added. */ +-#define _DL_HWCAP_LAST 31 ++/* AT_HWCAP2 feature strings follow the AT_HWCAP feature strings. */ ++#define _DL_HWCAP2_OFFSET _DL_HWCAP_SIZE + +-/* AT_HWCAP2 features started at bit 31 and decremented as new features were +- added. HWCAP2 feature bits start at bit 0. */ +-#define _DL_HWCAP2_LAST 31 ++/* AT_HWCAP3 feature strings follow the AT_HWCAP2 feature strings. */ ++#define _DL_HWCAP3_OFFSET (_DL_HWCAP2_OFFSET + _DL_HWCAP_SIZE) ++ ++/* AT_HWCAP4 feature strings follow the AT_HWCAP3 feature strings. */ ++#define _DL_HWCAP4_OFFSET (_DL_HWCAP3_OFFSET + _DL_HWCAP_SIZE) + + /* These bits influence library search. */ + #define HWCAP_IMPORTANT (PPC_FEATURE_HAS_ALTIVEC \ + + PPC_FEATURE_HAS_DFP) + +-#define _DL_PLATFORMS_COUNT 16 ++#define _DL_PLATFORMS_COUNT 17 + + #define _DL_FIRST_PLATFORM 32 + /* Mask to filter out platforms. */ +@@ -61,6 +62,7 @@ + #define PPC_PLATFORM_POWER8 13 + #define PPC_PLATFORM_POWER9 14 + #define PPC_PLATFORM_POWER10 15 ++#define PPC_PLATFORM_POWER11 16 + + static inline const char * + __attribute__ ((unused)) +@@ -88,6 +90,11 @@ _dl_string_platform (const char *str) + ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER10; + str++; + } ++ else if (str[1] == '1') ++ { ++ ret = _DL_FIRST_PLATFORM + PPC_PLATFORM_POWER11; ++ str++; ++ } + else + return -1; + break; +@@ -187,21 +194,42 @@ _dl_procinfo (unsigned int type, unsigned long int word) + case AT_HWCAP: + _dl_printf ("AT_HWCAP: "); + +- for (int i = 0; i <= _DL_HWCAP_LAST; ++i) ++ for (int i = 0; i < _DL_HWCAP_SIZE; ++i) + if (word & (1 << i)) + _dl_printf (" %s", _dl_hwcap_string (i)); + break; + case AT_HWCAP2: + { +- unsigned int offset = _DL_HWCAP_LAST + 1; + + _dl_printf ("AT_HWCAP2: "); + +- /* We have to go through them all because the kernel added the +- AT_HWCAP2 features starting with the high bits. */ +- for (int i = 0; i <= _DL_HWCAP2_LAST; ++i) +- if (word & (1 << i)) +- _dl_printf (" %s", _dl_hwcap_string (offset + i)); ++ /* We have to go through them all because the kernel added the ++ AT_HWCAP2 features starting with the high bits. */ ++ for (int i = 0; i < _DL_HWCAP_SIZE; ++i) ++ if (word & (1 << i)) ++ _dl_printf (" %s", _dl_hwcap_string (_DL_HWCAP2_OFFSET + i)); ++ break; ++ } ++ case AT_HWCAP3: ++ { ++ _dl_printf ("AT_HWCAP3: "); ++ ++ /* We have to go through them all because the kernel added the ++ AT_HWCAP3 features starting with the high bits. */ ++ for (int i = 0; i < _DL_HWCAP_SIZE; ++i) ++ if (word & (1 << i)) ++ _dl_printf (" %s", _dl_hwcap_string (_DL_HWCAP3_OFFSET + i)); ++ break; ++ } ++ case AT_HWCAP4: ++ { ++ _dl_printf ("AT_HWCAP4: "); ++ ++ /* We have to go through them all because the kernel added the ++ AT_HWCAP4 features starting with the high bits. */ ++ for (int i = 0; i <= _DL_HWCAP_SIZE; ++i) ++ if (word & (1 << i)) ++ _dl_printf (" %s", _dl_hwcap_string (_DL_HWCAP4_OFFSET + i)); + break; + } + case AT_L1I_CACHEGEOMETRY: +diff --git a/sysdeps/powerpc/hwcapinfo.c b/sysdeps/powerpc/hwcapinfo.c +index 76344f285a..f6fede15a7 100644 +--- a/sysdeps/powerpc/hwcapinfo.c ++++ b/sysdeps/powerpc/hwcapinfo.c +@@ -31,7 +31,7 @@ void + __tcb_parse_hwcap_and_convert_at_platform (void) + { + +- uint64_t h1, h2; ++ uint64_t h1, h2, h3, h4; + + /* Read AT_PLATFORM string from auxv and convert it to a number. */ + __tcb.at_platform = _dl_string_platform (GLRO (dl_platform)); +@@ -39,6 +39,8 @@ __tcb_parse_hwcap_and_convert_at_platform (void) + /* Read HWCAP and HWCAP2 from auxv. */ + h1 = GLRO (dl_hwcap); + h2 = GLRO (dl_hwcap2); ++ h3 = GLRO (dl_hwcap3); ++ h4 = GLRO (dl_hwcap4); + + /* hwcap contains only the latest supported ISA, the code checks which is + and fills the previous supported ones. */ +@@ -64,13 +66,16 @@ __tcb_parse_hwcap_and_convert_at_platform (void) + else if (h1 & PPC_FEATURE_POWER5) + h1 |= PPC_FEATURE_POWER4; + +- uint64_t array_hwcaps[] = { h1, h2 }; ++ uint64_t array_hwcaps[] = { h1, h2, h3, h4 }; + init_cpu_features (&GLRO(dl_powerpc_cpu_features), array_hwcaps); + + /* Consolidate both HWCAP and HWCAP2 into a single doubleword so that + we can read both in a single load later. */ + __tcb.hwcap = (h1 << 32) | (h2 & 0xffffffff); +- __tcb.hwcap_extn = 0x0; ++ ++ /* Consolidate both HWCAP3 and HWCAP4 into a single doubleword so that ++ we can read both in a single load later. */ ++ __tcb.hwcap_extn = (h3 << 32) | (h4 & 0xffffffff); + + } + #if IS_IN (rtld) +diff --git a/sysdeps/powerpc/powerpc32/bits/wordsize.h b/sysdeps/powerpc/powerpc32/bits/wordsize.h +index 04ca9debf0..6993fb6b29 100644 +--- a/sysdeps/powerpc/powerpc32/bits/wordsize.h ++++ b/sysdeps/powerpc/powerpc32/bits/wordsize.h +@@ -2,10 +2,9 @@ + + #if defined __powerpc64__ + # define __WORDSIZE 64 +-# define __WORDSIZE_TIME64_COMPAT32 1 + #else + # define __WORDSIZE 32 +-# define __WORDSIZE_TIME64_COMPAT32 0 + # define __WORDSIZE32_SIZE_ULONG 0 + # define __WORDSIZE32_PTRDIFF_LONG 0 + #endif ++#define __WORDSIZE_TIME64_COMPAT32 1 +diff --git a/sysdeps/powerpc/powerpc32/power11/Implies b/sysdeps/powerpc/powerpc32/power11/Implies +new file mode 100644 +index 0000000000..051cbe0f79 +--- /dev/null ++++ b/sysdeps/powerpc/powerpc32/power11/Implies +@@ -0,0 +1,2 @@ ++powerpc/powerpc32/power10/fpu ++powerpc/powerpc32/power10 +diff --git a/sysdeps/powerpc/powerpc32/power11/fpu/multiarch/Implies b/sysdeps/powerpc/powerpc32/power11/fpu/multiarch/Implies +new file mode 100644 +index 0000000000..58edb2861d +--- /dev/null ++++ b/sysdeps/powerpc/powerpc32/power11/fpu/multiarch/Implies +@@ -0,0 +1 @@ ++powerpc/powerpc32/power10/fpu/multiarch +diff --git a/sysdeps/powerpc/powerpc32/power11/multiarch/Implies b/sysdeps/powerpc/powerpc32/power11/multiarch/Implies +new file mode 100644 +index 0000000000..c70f0428ba +--- /dev/null ++++ b/sysdeps/powerpc/powerpc32/power11/multiarch/Implies +@@ -0,0 +1 @@ ++powerpc/powerpc32/power10/multiarch +diff --git a/sysdeps/powerpc/powerpc64/be/power11/Implies b/sysdeps/powerpc/powerpc64/be/power11/Implies +new file mode 100644 +index 0000000000..de481d1c13 +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/be/power11/Implies +@@ -0,0 +1,2 @@ ++powerpc/powerpc64/be/power10/fpu ++powerpc/powerpc64/be/power10 +diff --git a/sysdeps/powerpc/powerpc64/be/power11/fpu/Implies b/sysdeps/powerpc/powerpc64/be/power11/fpu/Implies +new file mode 100644 +index 0000000000..dff0e13064 +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/be/power11/fpu/Implies +@@ -0,0 +1 @@ ++powerpc/powerpc64/be/power10/fpu +diff --git a/sysdeps/powerpc/powerpc64/be/power11/fpu/multiarch/Implies b/sysdeps/powerpc/powerpc64/be/power11/fpu/multiarch/Implies +new file mode 100644 +index 0000000000..c3f259e009 +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/be/power11/fpu/multiarch/Implies +@@ -0,0 +1 @@ ++powerpc/powerpc64/be/power10/fpu/multiarch +diff --git a/sysdeps/powerpc/powerpc64/be/power11/multiarch/Implies b/sysdeps/powerpc/powerpc64/be/power11/multiarch/Implies +new file mode 100644 +index 0000000000..9491a394c9 +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/be/power11/multiarch/Implies +@@ -0,0 +1 @@ ++powerpc/powerpc64/be/power10/multiarch +diff --git a/sysdeps/powerpc/powerpc64/bits/wordsize.h b/sysdeps/powerpc/powerpc64/bits/wordsize.h +index 04ca9debf0..6993fb6b29 100644 +--- a/sysdeps/powerpc/powerpc64/bits/wordsize.h ++++ b/sysdeps/powerpc/powerpc64/bits/wordsize.h +@@ -2,10 +2,9 @@ + + #if defined __powerpc64__ + # define __WORDSIZE 64 +-# define __WORDSIZE_TIME64_COMPAT32 1 + #else + # define __WORDSIZE 32 +-# define __WORDSIZE_TIME64_COMPAT32 0 + # define __WORDSIZE32_SIZE_ULONG 0 + # define __WORDSIZE32_PTRDIFF_LONG 0 + #endif ++#define __WORDSIZE_TIME64_COMPAT32 1 +diff --git a/sysdeps/powerpc/powerpc64/dl-machine.h b/sysdeps/powerpc/powerpc64/dl-machine.h +index c6682f3445..2b6f5d2b08 100644 +--- a/sysdeps/powerpc/powerpc64/dl-machine.h ++++ b/sysdeps/powerpc/powerpc64/dl-machine.h +@@ -78,6 +78,7 @@ elf_host_tolerates_class (const Elf64_Ehdr *ehdr) + static inline Elf64_Addr + elf_machine_load_address (void) __attribute__ ((const)); + ++#ifndef __PCREL__ + static inline Elf64_Addr + elf_machine_load_address (void) + { +@@ -105,6 +106,24 @@ elf_machine_dynamic (void) + /* Then subtract off the load address offset. */ + return runtime_dynamic - elf_machine_load_address() ; + } ++#else /* __PCREL__ */ ++/* In PCREL mode, r2 may have been clobbered. Rely on relative ++ relocations instead. */ ++ ++static inline ElfW(Addr) ++elf_machine_load_address (void) ++{ ++ extern const ElfW(Ehdr) __ehdr_start attribute_hidden; ++ return (ElfW(Addr)) &__ehdr_start; ++} ++ ++static inline ElfW(Addr) ++elf_machine_dynamic (void) ++{ ++ extern ElfW(Dyn) _DYNAMIC[] attribute_hidden; ++ return (ElfW(Addr)) _DYNAMIC - elf_machine_load_address (); ++} ++#endif /* __PCREL__ */ + + /* The PLT uses Elf64_Rela relocs. */ + #define elf_machine_relplt elf_machine_rela +diff --git a/sysdeps/powerpc/powerpc64/le/power11/Implies b/sysdeps/powerpc/powerpc64/le/power11/Implies +new file mode 100644 +index 0000000000..e18182dcc1 +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/le/power11/Implies +@@ -0,0 +1,2 @@ ++powerpc/powerpc64/le/power10/fpu ++powerpc/powerpc64/le/power10 +diff --git a/sysdeps/powerpc/powerpc64/le/power11/fpu/Implies b/sysdeps/powerpc/powerpc64/le/power11/fpu/Implies +new file mode 100644 +index 0000000000..e41bd55684 +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/le/power11/fpu/Implies +@@ -0,0 +1 @@ ++powerpc/powerpc64/le/power10/fpu +diff --git a/sysdeps/powerpc/powerpc64/le/power11/fpu/multiarch/Implies b/sysdeps/powerpc/powerpc64/le/power11/fpu/multiarch/Implies +new file mode 100644 +index 0000000000..c838d50931 +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/le/power11/fpu/multiarch/Implies +@@ -0,0 +1 @@ ++powerpc/powerpc64/le/power10/fpu/multiarch +diff --git a/sysdeps/powerpc/powerpc64/le/power11/multiarch/Implies b/sysdeps/powerpc/powerpc64/le/power11/multiarch/Implies +new file mode 100644 +index 0000000000..687248c3c2 +--- /dev/null ++++ b/sysdeps/powerpc/powerpc64/le/power11/multiarch/Implies +@@ -0,0 +1 @@ ++powerpc/powerpc64/le/power10/multiarch +diff --git a/sysdeps/powerpc/powerpc64/le/tst-glibc-hwcaps.c b/sysdeps/powerpc/powerpc64/le/tst-glibc-hwcaps.c +index 77465d9133..65d3e69303 100644 +--- a/sysdeps/powerpc/powerpc64/le/tst-glibc-hwcaps.c ++++ b/sysdeps/powerpc/powerpc64/le/tst-glibc-hwcaps.c +@@ -36,9 +36,11 @@ compute_level (void) + return 9; + if (strcmp (platform, "power10") == 0) + return 10; ++ if (strcmp (platform, "power11") == 0) ++ return 11; + printf ("warning: unrecognized AT_PLATFORM value: %s\n", platform); +- /* Assume that the new platform supports POWER10. */ +- return 10; ++ /* Assume that the new platform supports POWER11. */ ++ return 11; + } + + static int +diff --git a/sysdeps/powerpc/preconfigure b/sysdeps/powerpc/preconfigure +index 4de94089a3..9e5a07ab6d 100644 +--- a/sysdeps/powerpc/preconfigure ++++ b/sysdeps/powerpc/preconfigure +@@ -58,7 +58,7 @@ fi + + ;; + +- a2|970|power[4-9]|power5x|power6+|power10) ++ a2|970|power[4-9]|power5x|power6+|power10|power11) + submachine=${archcpu} + if test ${libc_cv_cc_submachine+y} + then : +diff --git a/sysdeps/powerpc/preconfigure.ac b/sysdeps/powerpc/preconfigure.ac +index 6c63bd8257..14b6dafd4a 100644 +--- a/sysdeps/powerpc/preconfigure.ac ++++ b/sysdeps/powerpc/preconfigure.ac +@@ -46,7 +46,7 @@ case "${machine}:${submachine}" in + AC_CACHE_VAL(libc_cv_cc_submachine,libc_cv_cc_submachine="") + ;; + +- a2|970|power[[4-9]]|power5x|power6+|power10) ++ a2|970|power[[4-9]]|power5x|power6+|power10|power11) + submachine=${archcpu} + AC_CACHE_VAL(libc_cv_cc_submachine,libc_cv_cc_submachine="") + ;; +diff --git a/sysdeps/powerpc/utmp-size.h b/sysdeps/powerpc/utmp-size.h +new file mode 100644 +index 0000000000..8f21ebe1b6 +--- /dev/null ++++ b/sysdeps/powerpc/utmp-size.h +@@ -0,0 +1,2 @@ ++#define UTMP_SIZE 384 ++#define LASTLOG_SIZE 292 +diff --git a/sysdeps/pthread/tst-cancel30.c b/sysdeps/pthread/tst-cancel30.c +index 3030660e5f..94ad6281bc 100644 +--- a/sysdeps/pthread/tst-cancel30.c ++++ b/sysdeps/pthread/tst-cancel30.c +@@ -18,6 +18,7 @@ + License along with the GNU C Library; if not, see + . */ + ++#include + #include + #include + #include +@@ -46,13 +47,19 @@ tf (void *arg) + + /* Wait indefinitely for cancellation, which only works if asynchronous + cancellation is enabled. */ +-#if defined SYS_ppoll || defined SYS_ppoll_time64 +-# ifndef SYS_ppoll_time64 +-# define SYS_ppoll_time64 SYS_ppoll ++#ifdef SYS_ppoll_time64 ++ long int ret = syscall (SYS_ppoll_time64, NULL, 0, NULL, NULL); ++ (void) ret; ++# ifdef SYS_ppoll ++ if (ret == -1 && errno == ENOSYS) ++ syscall (SYS_ppoll, NULL, 0, NULL, NULL); + # endif +- syscall (SYS_ppoll_time64, NULL, 0, NULL, NULL); + #else ++# ifdef SYS_ppoll ++ syscall (SYS_ppoll, NULL, 0, NULL, NULL); ++# else + for (;;); ++# endif + #endif + + return 0; +diff --git a/sysdeps/riscv/utmp-size.h b/sysdeps/riscv/utmp-size.h +new file mode 100644 +index 0000000000..8f21ebe1b6 +--- /dev/null ++++ b/sysdeps/riscv/utmp-size.h +@@ -0,0 +1,2 @@ ++#define UTMP_SIZE 384 ++#define LASTLOG_SIZE 292 +diff --git a/sysdeps/sh/bits/wordsize.h b/sysdeps/sh/bits/wordsize.h +new file mode 100644 +index 0000000000..6ecbfe7c86 +--- /dev/null ++++ b/sysdeps/sh/bits/wordsize.h +@@ -0,0 +1,21 @@ ++/* Copyright (C) 1999-2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#define __WORDSIZE 32 ++#define __WORDSIZE_TIME64_COMPAT32 1 ++#define __WORDSIZE32_SIZE_ULONG 0 ++#define __WORDSIZE32_PTRDIFF_LONG 0 +diff --git a/sysdeps/sh/utmp-size.h b/sysdeps/sh/utmp-size.h +new file mode 100644 +index 0000000000..8f21ebe1b6 +--- /dev/null ++++ b/sysdeps/sh/utmp-size.h +@@ -0,0 +1,2 @@ ++#define UTMP_SIZE 384 ++#define LASTLOG_SIZE 292 +diff --git a/sysdeps/sparc/sparc32/bits/wordsize.h b/sysdeps/sparc/sparc32/bits/wordsize.h +index 4bbd2e63b4..a2e79e0fa9 100644 +--- a/sysdeps/sparc/sparc32/bits/wordsize.h ++++ b/sysdeps/sparc/sparc32/bits/wordsize.h +@@ -1,6 +1,6 @@ + /* Determine the wordsize from the preprocessor defines. */ + + #define __WORDSIZE 32 +-#define __WORDSIZE_TIME64_COMPAT32 0 ++#define __WORDSIZE_TIME64_COMPAT32 1 + #define __WORDSIZE32_SIZE_ULONG 0 + #define __WORDSIZE32_PTRDIFF_LONG 0 +diff --git a/sysdeps/sparc/sparc64/bits/wordsize.h b/sysdeps/sparc/sparc64/bits/wordsize.h +index 2f66f10d72..ea103e5970 100644 +--- a/sysdeps/sparc/sparc64/bits/wordsize.h ++++ b/sysdeps/sparc/sparc64/bits/wordsize.h +@@ -2,10 +2,9 @@ + + #if defined __arch64__ || defined __sparcv9 + # define __WORDSIZE 64 +-# define __WORDSIZE_TIME64_COMPAT32 1 + #else + # define __WORDSIZE 32 +-# define __WORDSIZE_TIME64_COMPAT32 0 + # define __WORDSIZE32_SIZE_ULONG 0 + # define __WORDSIZE32_PTRDIFF_LONG 0 + #endif ++#define __WORDSIZE_TIME64_COMPAT32 1 +diff --git a/sysdeps/sparc/sparc64/rtld-memset.c b/sysdeps/sparc/sparc64/rtld-memset.c +index 55f3835790..a19202a620 100644 +--- a/sysdeps/sparc/sparc64/rtld-memset.c ++++ b/sysdeps/sparc/sparc64/rtld-memset.c +@@ -1 +1,4 @@ + #include ++#if IS_IN(rtld) ++strong_alias (memset, __memset_ultra1) ++#endif +diff --git a/sysdeps/sparc/utmp-size.h b/sysdeps/sparc/utmp-size.h +new file mode 100644 +index 0000000000..8f21ebe1b6 +--- /dev/null ++++ b/sysdeps/sparc/utmp-size.h +@@ -0,0 +1,2 @@ ++#define UTMP_SIZE 384 ++#define LASTLOG_SIZE 292 +diff --git a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c +index b1a3f673f0..c0b047bc0d 100644 +--- a/sysdeps/unix/sysv/linux/aarch64/cpu-features.c ++++ b/sysdeps/unix/sysv/linux/aarch64/cpu-features.c +@@ -21,6 +21,7 @@ + #include + #include + #include ++#include + #include + + #define DCZID_DZP_MASK (1 << 4) +@@ -62,6 +63,46 @@ get_midr_from_mcpu (const struct tunable_str_t *mcpu) + return UINT64_MAX; + } + ++#if __LINUX_KERNEL_VERSION < 0x060200 ++ ++/* Return true if we prefer using SVE in string ifuncs. Old kernels disable ++ SVE after every system call which results in unnecessary traps if memcpy ++ uses SVE. This is true for kernels between 4.15.0 and before 6.2.0, except ++ for 5.14.0 which was patched. For these versions return false to avoid using ++ SVE ifuncs. ++ Parse the kernel version into a 24-bit kernel.major.minor value without ++ calling any library functions. If uname() is not supported or if the version ++ format is not recognized, assume the kernel is modern and return true. */ ++ ++static inline bool ++prefer_sve_ifuncs (void) ++{ ++ struct utsname buf; ++ const char *p = &buf.release[0]; ++ int kernel = 0; ++ int val; ++ ++ if (__uname (&buf) < 0) ++ return true; ++ ++ for (int shift = 16; shift >= 0; shift -= 8) ++ { ++ for (val = 0; *p >= '0' && *p <= '9'; p++) ++ val = val * 10 + *p - '0'; ++ kernel |= (val & 255) << shift; ++ if (*p++ != '.') ++ break; ++ } ++ ++ if (kernel >= 0x060200 || kernel == 0x050e00) ++ return true; ++ if (kernel >= 0x040f00) ++ return false; ++ return true; ++} ++ ++#endif ++ + static inline void + init_cpu_features (struct cpu_features *cpu_features) + { +@@ -126,6 +167,13 @@ init_cpu_features (struct cpu_features *cpu_features) + /* Check if SVE is supported. */ + cpu_features->sve = GLRO (dl_hwcap) & HWCAP_SVE; + ++ cpu_features->prefer_sve_ifuncs = cpu_features->sve; ++ ++#if __LINUX_KERNEL_VERSION < 0x060200 ++ if (cpu_features->sve) ++ cpu_features->prefer_sve_ifuncs = prefer_sve_ifuncs (); ++#endif ++ + /* Check if MOPS is supported. */ + cpu_features->mops = GLRO (dl_hwcap2) & HWCAP2_MOPS; + } +diff --git a/sysdeps/unix/sysv/linux/dl-parse_auxv.h b/sysdeps/unix/sysv/linux/dl-parse_auxv.h +index e3d758b163..ea2a58ecb1 100644 +--- a/sysdeps/unix/sysv/linux/dl-parse_auxv.h ++++ b/sysdeps/unix/sysv/linux/dl-parse_auxv.h +@@ -47,6 +47,8 @@ void _dl_parse_auxv (ElfW(auxv_t) *av, dl_parse_auxv_t auxv_values) + GLRO(dl_platform) = (void *) auxv_values[AT_PLATFORM]; + GLRO(dl_hwcap) = auxv_values[AT_HWCAP]; + GLRO(dl_hwcap2) = auxv_values[AT_HWCAP2]; ++ GLRO(dl_hwcap3) = auxv_values[AT_HWCAP3]; ++ GLRO(dl_hwcap4) = auxv_values[AT_HWCAP4]; + GLRO(dl_clktck) = auxv_values[AT_CLKTCK]; + GLRO(dl_fpu_control) = auxv_values[AT_FPUCW]; + _dl_random = (void *) auxv_values[AT_RANDOM]; +diff --git a/sysdeps/unix/sysv/linux/dl-sysdep.c b/sysdeps/unix/sysv/linux/dl-sysdep.c +index ad3692d738..e1b14e9eb3 100644 +--- a/sysdeps/unix/sysv/linux/dl-sysdep.c ++++ b/sysdeps/unix/sysv/linux/dl-sysdep.c +@@ -197,6 +197,8 @@ _dl_show_auxv (void) + [AT_SYSINFO_EHDR - 2] = { "SYSINFO_EHDR: 0x", hex }, + [AT_RANDOM - 2] = { "RANDOM: 0x", hex }, + [AT_HWCAP2 - 2] = { "HWCAP2: 0x", hex }, ++ [AT_HWCAP3 - 2] = { "HWCAP3: 0x", hex }, ++ [AT_HWCAP4 - 2] = { "HWCAP4: 0x", hex }, + [AT_MINSIGSTKSZ - 2] = { "MINSIGSTKSZ: ", dec }, + [AT_L1I_CACHESIZE - 2] = { "L1I_CACHESIZE: ", dec }, + [AT_L1I_CACHEGEOMETRY - 2] = { "L1I_CACHEGEOMETRY: 0x", hex }, +diff --git a/sysdeps/unix/sysv/linux/hppa/bits/wordsize.h b/sysdeps/unix/sysv/linux/hppa/bits/wordsize.h +new file mode 100644 +index 0000000000..6ecbfe7c86 +--- /dev/null ++++ b/sysdeps/unix/sysv/linux/hppa/bits/wordsize.h +@@ -0,0 +1,21 @@ ++/* Copyright (C) 1999-2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#define __WORDSIZE 32 ++#define __WORDSIZE_TIME64_COMPAT32 1 ++#define __WORDSIZE32_SIZE_ULONG 0 ++#define __WORDSIZE32_PTRDIFF_LONG 0 +diff --git a/sysdeps/unix/sysv/linux/mips/clone3.S b/sysdeps/unix/sysv/linux/mips/clone3.S +index e9fec2fa47..481b8ae963 100644 +--- a/sysdeps/unix/sysv/linux/mips/clone3.S ++++ b/sysdeps/unix/sysv/linux/mips/clone3.S +@@ -37,11 +37,6 @@ + + .text + .set nomips16 +-#if _MIPS_SIM == _ABIO32 +-# define EXTRA_LOCALS 1 +-#else +-# define EXTRA_LOCALS 0 +-#endif + #define FRAMESZ ((NARGSAVE*SZREG)+ALSZ)&ALMASK + GPOFF= FRAMESZ-(1*SZREG) + NESTED(__clone3, SZREG, sp) +@@ -68,8 +63,31 @@ NESTED(__clone3, SZREG, sp) + beqz a0, L(error) /* No NULL cl_args pointer. */ + beqz a2, L(error) /* No NULL function pointer. */ + ++#if _MIPS_SIM == _ABIO32 ++ /* Both stack and stack_size on clone_args are defined as uint64_t, and ++ there is no need to handle values larger than to 32 bits for o32. */ ++# if __BYTE_ORDER == __BIG_ENDIAN ++# define CL_STACKPOINTER_OFFSET 44 ++# define CL_STACKSIZE_OFFSET 52 ++# else ++# define CL_STACKPOINTER_OFFSET 40 ++# define CL_STACKSIZE_OFFSET 48 ++# endif ++ ++ /* For o32 we need to setup a minimal stack frame to allow cprestore ++ on __thread_start_clone3. Also there is no guarantee by kABI that ++ $8 will be preserved after syscall execution (so we need to save it ++ on the provided stack). */ ++ lw t0, CL_STACKPOINTER_OFFSET(a0) /* Load the stack pointer. */ ++ lw t1, CL_STACKSIZE_OFFSET(a0) /* Load the stack_size. */ ++ addiu t1, -32 /* Update the stack size. */ ++ addu t2, t1, t0 /* Calculate the thread stack. */ ++ sw a3, 0(t2) /* Save argument pointer. */ ++ sw t1, CL_STACKSIZE_OFFSET(a0) /* Save the new stack size. */ ++#else + move $8, a3 /* a3 is set to 0/1 for syscall success/error + while a4/$8 is returned unmodified. */ ++#endif + + /* Do the system call, the kernel expects: + v0: system call number +@@ -125,7 +143,11 @@ L(thread_start_clone3): + + /* Restore the arg for user's function. */ + move t9, a2 /* Function pointer. */ ++#if _MIPS_SIM == _ABIO32 ++ PTR_L a0, 0(sp) ++#else + move a0, $8 /* Argument pointer. */ ++#endif + + /* Call the user's function. */ + jal t9 +diff --git a/sysdeps/unix/sysv/linux/powerpc/bits/wordsize.h b/sysdeps/unix/sysv/linux/powerpc/bits/wordsize.h +index 04ca9debf0..6993fb6b29 100644 +--- a/sysdeps/unix/sysv/linux/powerpc/bits/wordsize.h ++++ b/sysdeps/unix/sysv/linux/powerpc/bits/wordsize.h +@@ -2,10 +2,9 @@ + + #if defined __powerpc64__ + # define __WORDSIZE 64 +-# define __WORDSIZE_TIME64_COMPAT32 1 + #else + # define __WORDSIZE 32 +-# define __WORDSIZE_TIME64_COMPAT32 0 + # define __WORDSIZE32_SIZE_ULONG 0 + # define __WORDSIZE32_PTRDIFF_LONG 0 + #endif ++#define __WORDSIZE_TIME64_COMPAT32 1 +diff --git a/sysdeps/unix/sysv/linux/powerpc/cpu-features.c b/sysdeps/unix/sysv/linux/powerpc/cpu-features.c +index 8e8a5ec2ea..a947d62db6 100644 +--- a/sysdeps/unix/sysv/linux/powerpc/cpu-features.c ++++ b/sysdeps/unix/sysv/linux/powerpc/cpu-features.c +@@ -94,6 +94,8 @@ init_cpu_features (struct cpu_features *cpu_features, uint64_t hwcaps[]) + which are set by __tcb_parse_hwcap_and_convert_at_platform. */ + cpu_features->hwcap = hwcaps[0]; + cpu_features->hwcap2 = hwcaps[1]; ++ cpu_features->hwcap3 = hwcaps[2]; ++ cpu_features->hwcap4 = hwcaps[3]; + /* Default is to use aligned memory access on optimized function unless + tunables is enable, since for this case user can explicit disable + unaligned optimizations. */ +diff --git a/sysdeps/unix/sysv/linux/powerpc/cpu-features.h b/sysdeps/unix/sysv/linux/powerpc/cpu-features.h +index 1294f0b601..e9eb6a13c8 100644 +--- a/sysdeps/unix/sysv/linux/powerpc/cpu-features.h ++++ b/sysdeps/unix/sysv/linux/powerpc/cpu-features.h +@@ -26,6 +26,8 @@ struct cpu_features + bool use_cached_memopt; + unsigned long int hwcap; + unsigned long int hwcap2; ++ unsigned long int hwcap3; ++ unsigned long int hwcap4; + }; + + static const char hwcap_names[] = { +diff --git a/sysdeps/unix/sysv/linux/powerpc/libc-start.c b/sysdeps/unix/sysv/linux/powerpc/libc-start.c +index a4705daf1c..6a00cd88cd 100644 +--- a/sysdeps/unix/sysv/linux/powerpc/libc-start.c ++++ b/sysdeps/unix/sysv/linux/powerpc/libc-start.c +@@ -87,6 +87,12 @@ __libc_start_main_impl (int argc, char **argv, + case AT_HWCAP2: + _dl_hwcap2 = (unsigned long int) av->a_un.a_val; + break; ++ case AT_HWCAP3: ++ _dl_hwcap3 = (unsigned long int) av->a_un.a_val; ++ break; ++ case AT_HWCAP4: ++ _dl_hwcap4 = (unsigned long int) av->a_un.a_val; ++ break; + case AT_PLATFORM: + _dl_platform = (void *) av->a_un.a_val; + break; +diff --git a/sysdeps/unix/sysv/linux/s390/s390-32/clone.S b/sysdeps/unix/sysv/linux/s390/s390-32/clone.S +index 4c882ef2ee..a7a863242c 100644 +--- a/sysdeps/unix/sysv/linux/s390/s390-32/clone.S ++++ b/sysdeps/unix/sysv/linux/s390/s390-32/clone.S +@@ -53,6 +53,7 @@ ENTRY(__clone) + br %r14 + error: + lhi %r2,-EINVAL ++ lm %r6,%r7,24(%r15) /* Load registers. */ + j SYSCALL_ERROR_LABEL + PSEUDO_END (__clone) + +diff --git a/sysdeps/unix/sysv/linux/s390/s390-64/clone.S b/sysdeps/unix/sysv/linux/s390/s390-64/clone.S +index 4eb104be71..c552a6b8de 100644 +--- a/sysdeps/unix/sysv/linux/s390/s390-64/clone.S ++++ b/sysdeps/unix/sysv/linux/s390/s390-64/clone.S +@@ -54,6 +54,7 @@ ENTRY(__clone) + br %r14 + error: + lghi %r2,-EINVAL ++ lmg %r6,%r7,48(%r15) /* Restore registers. */ + jg SYSCALL_ERROR_LABEL + PSEUDO_END (__clone) + +diff --git a/sysdeps/unix/sysv/linux/sched_getcpu.c b/sysdeps/unix/sysv/linux/sched_getcpu.c +index dfb884568d..72a3360550 100644 +--- a/sysdeps/unix/sysv/linux/sched_getcpu.c ++++ b/sysdeps/unix/sysv/linux/sched_getcpu.c +@@ -33,17 +33,9 @@ vsyscall_sched_getcpu (void) + return r == -1 ? r : cpu; + } + +-#ifdef RSEQ_SIG + int + sched_getcpu (void) + { + int cpu_id = THREAD_GETMEM_VOLATILE (THREAD_SELF, rseq_area.cpu_id); + return __glibc_likely (cpu_id >= 0) ? cpu_id : vsyscall_sched_getcpu (); + } +-#else /* RSEQ_SIG */ +-int +-sched_getcpu (void) +-{ +- return vsyscall_sched_getcpu (); +-} +-#endif /* RSEQ_SIG */ +diff --git a/sysdeps/unix/sysv/linux/sparc/bits/wordsize.h b/sysdeps/unix/sysv/linux/sparc/bits/wordsize.h +index 7562875ee2..ea103e5970 100644 +--- a/sysdeps/unix/sysv/linux/sparc/bits/wordsize.h ++++ b/sysdeps/unix/sysv/linux/sparc/bits/wordsize.h +@@ -2,10 +2,9 @@ + + #if defined __arch64__ || defined __sparcv9 + # define __WORDSIZE 64 +-# define __WORDSIZE_TIME64_COMPAT32 1 + #else + # define __WORDSIZE 32 + # define __WORDSIZE32_SIZE_ULONG 0 + # define __WORDSIZE32_PTRDIFF_LONG 0 +-# define __WORDSIZE_TIME64_COMPAT32 0 + #endif ++#define __WORDSIZE_TIME64_COMPAT32 1 +diff --git a/sysdeps/unix/sysv/linux/timespec_get.c b/sysdeps/unix/sysv/linux/timespec_get.c +index c6e5e66289..778d1e3354 100644 +--- a/sysdeps/unix/sysv/linux/timespec_get.c ++++ b/sysdeps/unix/sysv/linux/timespec_get.c +@@ -5,7 +5,7 @@ + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either +- version 2.1 of the License. ++ version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of +diff --git a/sysdeps/unix/sysv/linux/timespec_getres.c b/sysdeps/unix/sysv/linux/timespec_getres.c +index 5acebe2a2c..2eef9e512c 100644 +--- a/sysdeps/unix/sysv/linux/timespec_getres.c ++++ b/sysdeps/unix/sysv/linux/timespec_getres.c +@@ -5,7 +5,7 @@ + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either +- version 2.1 of the License. ++ version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of +diff --git a/sysdeps/unix/sysv/linux/tst-clone.c b/sysdeps/unix/sysv/linux/tst-clone.c +index 470676ab2b..2bc7124983 100644 +--- a/sysdeps/unix/sysv/linux/tst-clone.c ++++ b/sysdeps/unix/sysv/linux/tst-clone.c +@@ -16,12 +16,16 @@ + License along with the GNU C Library; if not, see + . */ + +-/* BZ #2386 */ ++/* BZ #2386, BZ #31402 */ + #include + #include + #include + #include + #include ++#include /* For _STACK_GROWS_{UP,DOWN}. */ ++#include ++ ++volatile unsigned v = 0xdeadbeef; + + int child_fn(void *arg) + { +@@ -30,22 +34,67 @@ int child_fn(void *arg) + } + + static int +-do_test (void) ++__attribute__((noinline)) ++do_clone (int (*fn)(void *), void *stack) + { + int result; ++ unsigned int a = v; ++ unsigned int b = v; ++ unsigned int c = v; ++ unsigned int d = v; ++ unsigned int e = v; ++ unsigned int f = v; ++ unsigned int g = v; ++ unsigned int h = v; ++ unsigned int i = v; ++ unsigned int j = v; ++ unsigned int k = v; ++ unsigned int l = v; ++ unsigned int m = v; ++ unsigned int n = v; ++ unsigned int o = v; ++ ++ result = clone (fn, stack, 0, NULL); ++ ++ /* Check that clone does not clobber call-saved registers. */ ++ TEST_VERIFY (a == v && b == v && c == v && d == v && e == v && f == v ++ && g == v && h == v && i == v && j == v && k == v && l == v ++ && m == v && n == v && o == v); ++ ++ return result; ++} ++ ++static void ++__attribute__((noinline)) ++do_test_single (int (*fn)(void *), void *stack) ++{ ++ printf ("%s (fn=%p, stack=%p)\n", __FUNCTION__, fn, stack); ++ errno = 0; ++ ++ int result = do_clone (fn, stack); ++ ++ TEST_COMPARE (errno, EINVAL); ++ TEST_COMPARE (result, -1); ++} + +- result = clone (child_fn, NULL, 0, NULL); ++static int ++do_test (void) ++{ ++ char st[128 * 1024] __attribute__ ((aligned)); ++ void *stack = NULL; ++#if _STACK_GROWS_DOWN ++ stack = st + sizeof (st); ++#elif _STACK_GROWS_UP ++ stack = st; ++#else ++# error "Define either _STACK_GROWS_DOWN or _STACK_GROWS_UP" ++#endif + +- if (errno != EINVAL || result != -1) +- { +- printf ("FAIL: clone()=%d (wanted -1) errno=%d (wanted %d)\n", +- result, errno, EINVAL); +- return 1; +- } ++ do_test_single (child_fn, NULL); ++ do_test_single (NULL, stack); ++ do_test_single (NULL, NULL); + +- puts ("All OK"); + return 0; + } + +-#define TEST_FUNCTION do_test () +-#include "../test-skeleton.c" ++#include +diff --git a/sysdeps/unix/sysv/linux/x86_64/Makefile b/sysdeps/unix/sysv/linux/x86_64/Makefile +index 4223feb95f..9a1e7aa646 100644 +--- a/sysdeps/unix/sysv/linux/x86_64/Makefile ++++ b/sysdeps/unix/sysv/linux/x86_64/Makefile +@@ -63,6 +63,33 @@ $(objpfx)libx86-64-isa-level%.os: $(..)/sysdeps/unix/sysv/linux/x86_64/x86-64-is + $(objpfx)libx86-64-isa-level.so: $(objpfx)libx86-64-isa-level-1.so + cp $< $@ + endif ++ ++ifeq (yes,$(have-mamx-tile)) ++tests += \ ++ tst-gnu2-tls2-amx \ ++# tests ++ ++modules-names += \ ++ tst-gnu2-tls2-amx-mod0 \ ++ tst-gnu2-tls2-amx-mod1 \ ++ tst-gnu2-tls2-amx-mod2 \ ++# modules-names ++ ++$(objpfx)tst-gnu2-tls2-amx: $(shared-thread-library) ++$(objpfx)tst-gnu2-tls2-amx.out: \ ++ $(objpfx)tst-gnu2-tls2-amx-mod0.so \ ++ $(objpfx)tst-gnu2-tls2-amx-mod1.so \ ++ $(objpfx)tst-gnu2-tls2-amx-mod2.so ++$(objpfx)tst-gnu2-tls2-amx-mod0.so: $(libsupport) ++$(objpfx)tst-gnu2-tls2-amx-mod1.so: $(libsupport) ++$(objpfx)tst-gnu2-tls2-amx-mod2.so: $(libsupport) ++ ++CFLAGS-tst-gnu2-tls2-amx.c += -mamx-tile ++CFLAGS-tst-gnu2-tls2-amx-mod0.c += -mamx-tile -mtls-dialect=gnu2 ++CFLAGS-tst-gnu2-tls2-amx-mod1.c += -mamx-tile -mtls-dialect=gnu2 ++CFLAGS-tst-gnu2-tls2-amx-mod2.c += -mamx-tile -mtls-dialect=gnu2 ++endif ++ + endif # $(subdir) == elf + + ifneq ($(enable-cet),no) +diff --git a/sysdeps/unix/sysv/linux/x86_64/include/asm/prctl.h b/sysdeps/unix/sysv/linux/x86_64/include/asm/prctl.h +index 2f511321ad..ef4631bf4b 100644 +--- a/sysdeps/unix/sysv/linux/x86_64/include/asm/prctl.h ++++ b/sysdeps/unix/sysv/linux/x86_64/include/asm/prctl.h +@@ -20,3 +20,8 @@ + # define ARCH_SHSTK_SHSTK 0x1 + # define ARCH_SHSTK_WRSS 0x2 + #endif ++ ++#ifndef ARCH_GET_XCOMP_PERM ++# define ARCH_GET_XCOMP_PERM 0x1022 ++# define ARCH_REQ_XCOMP_PERM 0x1023 ++#endif +diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod0.c b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod0.c +new file mode 100644 +index 0000000000..2e0c7b91b7 +--- /dev/null ++++ b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod0.c +@@ -0,0 +1,2 @@ ++#include "tst-gnu2-tls2-amx.h" ++#include +diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod1.c b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod1.c +new file mode 100644 +index 0000000000..b8a8ccf1c1 +--- /dev/null ++++ b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod1.c +@@ -0,0 +1,2 @@ ++#include "tst-gnu2-tls2-amx.h" ++#include +diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod2.c b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod2.c +new file mode 100644 +index 0000000000..cdf4a8f363 +--- /dev/null ++++ b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx-mod2.c +@@ -0,0 +1,2 @@ ++#include "tst-gnu2-tls2-amx.h" ++#include +diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.c b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.c +new file mode 100644 +index 0000000000..ae4dd82556 +--- /dev/null ++++ b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.c +@@ -0,0 +1,83 @@ ++/* Test TLSDESC relocation with AMX. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include "tst-gnu2-tls2-amx.h" ++ ++extern int arch_prctl (int, ...); ++ ++#define X86_XSTATE_TILECFG_ID 17 ++#define X86_XSTATE_TILEDATA_ID 18 ++ ++/* Initialize tile config. */ ++__attribute__ ((noinline, noclone)) ++static void ++init_tile_config (__tilecfg *tileinfo) ++{ ++ int i; ++ tileinfo->palette_id = 1; ++ tileinfo->start_row = 0; ++ ++ tileinfo->colsb[0] = MAX_ROWS; ++ tileinfo->rows[0] = MAX_ROWS; ++ ++ for (i = 1; i < 4; ++i) ++ { ++ tileinfo->colsb[i] = MAX_COLS; ++ tileinfo->rows[i] = MAX_ROWS; ++ } ++ ++ _tile_loadconfig (tileinfo); ++} ++ ++static bool ++enable_amx (void) ++{ ++ uint64_t bitmask; ++ if (arch_prctl (ARCH_GET_XCOMP_PERM, &bitmask) != 0) ++ return false; ++ ++ if ((bitmask & (1 << X86_XSTATE_TILECFG_ID)) == 0) ++ return false; ++ ++ if (arch_prctl (ARCH_REQ_XCOMP_PERM, X86_XSTATE_TILEDATA_ID) != 0) ++ return false; ++ ++ /* Load tile configuration. */ ++ __tilecfg tile_data = { 0 }; ++ init_tile_config (&tile_data); ++ ++ return true; ++} ++ ++/* An architecture can define it to clobber caller-saved registers in ++ malloc below to verify that the implicit TLSDESC call won't change ++ caller-saved registers. */ ++static void ++clear_tile_register (void) ++{ ++ _tile_zero (2); ++} ++ ++#define MOD(i) "tst-gnu2-tls2-amx-mod" #i ".so" ++#define IS_SUPPORTED() enable_amx () ++#define PREPARE_MALLOC() clear_tile_register () ++ ++#include +diff --git a/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.h b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.h +new file mode 100644 +index 0000000000..1845a3caba +--- /dev/null ++++ b/sysdeps/unix/sysv/linux/x86_64/tst-gnu2-tls2-amx.h +@@ -0,0 +1,63 @@ ++/* Test TLSDESC relocation with AMX. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++#include ++#include ++ ++#define MAX_ROWS 16 ++#define MAX_COLS 64 ++#define MAX 1024 ++#define STRIDE 64 ++ ++typedef struct __tile_config ++{ ++ uint8_t palette_id; ++ uint8_t start_row; ++ uint8_t reserved_0[14]; ++ uint16_t colsb[16]; ++ uint8_t rows[16]; ++} __tilecfg __attribute__ ((aligned (64))); ++ ++/* Initialize int8_t buffer */ ++static inline void ++init_buffer (int8_t *buf, int8_t value) ++{ ++ int rows, colsb, i, j; ++ rows = MAX_ROWS; ++ colsb = MAX_COLS; ++ ++ for (i = 0; i < rows; i++) ++ for (j = 0; j < colsb; j++) ++ buf[i * colsb + j] = value; ++} ++ ++#define BEFORE_TLSDESC_CALL() \ ++ int8_t src[MAX]; \ ++ int8_t res[MAX]; \ ++ /* Initialize src with data */ \ ++ init_buffer (src, 2); \ ++ /* Load tile rows from memory. */ \ ++ _tile_loadd (2, src, STRIDE); ++ ++#define AFTER_TLSDESC_CALL() \ ++ /* Store the tile data to memory. */ \ ++ _tile_stored (2, res, STRIDE); \ ++ _tile_release (); \ ++ TEST_VERIFY_EXIT (memcmp (src, res, sizeof (res)) == 0); +diff --git a/sysdeps/x86/Makefile b/sysdeps/x86/Makefile +index 4d50b327b5..5311b594af 100644 +--- a/sysdeps/x86/Makefile ++++ b/sysdeps/x86/Makefile +@@ -1,5 +1,5 @@ + ifeq ($(subdir),csu) +-gen-as-const-headers += cpu-features-offsets.sym ++gen-as-const-headers += cpu-features-offsets.sym features-offsets.sym + endif + + ifeq ($(subdir),elf) +@@ -15,18 +15,18 @@ CFLAGS-dl-get-cpu-features.os += $(rtld-early-cflags) + CFLAGS-get-cpuid-feature-leaf.o += $(no-stack-protector) + + tests += \ +- tst-get-cpu-features \ +- tst-get-cpu-features-static \ + tst-cpu-features-cpuinfo \ + tst-cpu-features-cpuinfo-static \ + tst-cpu-features-supports \ + tst-cpu-features-supports-static \ ++ tst-get-cpu-features \ ++ tst-get-cpu-features-static \ + tst-hwcap-tunables \ + # tests + tests-static += \ +- tst-get-cpu-features-static \ + tst-cpu-features-cpuinfo-static \ + tst-cpu-features-supports-static \ ++ tst-get-cpu-features-static \ + # tests-static + ifeq (yes,$(have-ifunc)) + ifeq (yes,$(have-gcc-ifunc)) +@@ -86,6 +86,11 @@ endif + tst-ifunc-isa-2-ENV = GLIBC_TUNABLES=glibc.cpu.hwcaps=-SSE4_2,-AVX,-AVX2,-AVX512F + tst-ifunc-isa-2-static-ENV = $(tst-ifunc-isa-2-ENV) + tst-hwcap-tunables-ARGS = -- $(host-test-program-cmd) ++ ++CFLAGS-tst-gnu2-tls2.c += -msse ++CFLAGS-tst-gnu2-tls2mod0.c += -msse2 -mtune=haswell ++CFLAGS-tst-gnu2-tls2mod1.c += -msse2 -mtune=haswell ++CFLAGS-tst-gnu2-tls2mod2.c += -msse2 -mtune=haswell + endif + + ifeq ($(subdir),math) +diff --git a/sysdeps/x86/bits/wordsize.h b/sysdeps/x86/bits/wordsize.h +index 70f652bca1..3f40aa76f9 100644 +--- a/sysdeps/x86/bits/wordsize.h ++++ b/sysdeps/x86/bits/wordsize.h +@@ -8,10 +8,9 @@ + #define __WORDSIZE32_PTRDIFF_LONG 0 + #endif + ++#define __WORDSIZE_TIME64_COMPAT32 1 ++ + #ifdef __x86_64__ +-# define __WORDSIZE_TIME64_COMPAT32 1 + /* Both x86-64 and x32 use the 64-bit system call interface. */ + # define __SYSCALL_WORDSIZE 64 +-#else +-# define __WORDSIZE_TIME64_COMPAT32 0 + #endif +diff --git a/sysdeps/x86/configure b/sysdeps/x86/configure +index 1f4c2d67fd..d28d9bcb29 100644 +--- a/sysdeps/x86/configure ++++ b/sysdeps/x86/configure +@@ -98,6 +98,7 @@ printf "%s\n" "$libc_cv_have_x86_lahf_sahf" >&6; } + if test $libc_cv_have_x86_lahf_sahf = yes; then + printf "%s\n" "#define HAVE_X86_LAHF_SAHF 1" >>confdefs.h + ++ ISAFLAG="-DHAVE_X86_LAHF_SAHF" + fi + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for MOVBE instruction support" >&5 + printf %s "checking for MOVBE instruction support... " >&6; } +@@ -120,8 +121,47 @@ printf "%s\n" "$libc_cv_have_x86_movbe" >&6; } + if test $libc_cv_have_x86_movbe = yes; then + printf "%s\n" "#define HAVE_X86_MOVBE 1" >>confdefs.h + ++ ISAFLAG="$ISAFLAG -DHAVE_X86_MOVBE" + fi ++ ++ # Check for ISA level support. ++ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for ISA level support" >&5 ++printf %s "checking for ISA level support... " >&6; } ++if test ${libc_cv_have_x86_isa_level+y} ++then : ++ printf %s "(cached) " >&6 ++else $as_nop ++ cat > conftest.c < ++#if MINIMUM_X86_ISA_LEVEL >= 4 ++libc_cv_have_x86_isa_level=4 ++#elif MINIMUM_X86_ISA_LEVEL == 3 ++libc_cv_have_x86_isa_level=3 ++#elif MINIMUM_X86_ISA_LEVEL == 2 ++libc_cv_have_x86_isa_level=2 ++#else ++libc_cv_have_x86_isa_level=baseline ++#endif ++EOF ++ eval `${CC-cc} $CFLAGS $CPPFLAGS $ISAFLAG -I$srcdir -E conftest.c | grep libc_cv_have_x86_isa_level` ++ rm -rf conftest* + fi ++{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_have_x86_isa_level" >&5 ++printf "%s\n" "$libc_cv_have_x86_isa_level" >&6; } ++else ++ libc_cv_have_x86_isa_level=baseline ++fi ++if test $libc_cv_have_x86_isa_level = baseline; then ++ printf "%s\n" "#define MINIMUM_X86_ISA_LEVEL 1" >>confdefs.h ++ ++else ++ printf "%s\n" "#define MINIMUM_X86_ISA_LEVEL $libc_cv_have_x86_isa_level" >>confdefs.h ++ ++fi ++config_vars="$config_vars ++have-x86-isa-level = $libc_cv_have_x86_isa_level" ++config_vars="$config_vars ++x86-isa-level-3-or-above = 3 4" + config_vars="$config_vars + enable-x86-isa-level = $libc_cv_include_x86_isa_level" + +diff --git a/sysdeps/x86/configure.ac b/sysdeps/x86/configure.ac +index 437a50623b..5b0acd03d2 100644 +--- a/sysdeps/x86/configure.ac ++++ b/sysdeps/x86/configure.ac +@@ -72,6 +72,7 @@ if test $libc_cv_include_x86_isa_level = yes; then + fi]) + if test $libc_cv_have_x86_lahf_sahf = yes; then + AC_DEFINE(HAVE_X86_LAHF_SAHF) ++ ISAFLAG="-DHAVE_X86_LAHF_SAHF" + fi + AC_CACHE_CHECK([for MOVBE instruction support], + libc_cv_have_x86_movbe, [dnl +@@ -81,8 +82,36 @@ if test $libc_cv_include_x86_isa_level = yes; then + fi]) + if test $libc_cv_have_x86_movbe = yes; then + AC_DEFINE(HAVE_X86_MOVBE) ++ ISAFLAG="$ISAFLAG -DHAVE_X86_MOVBE" + fi ++ ++ # Check for ISA level support. ++ AC_CACHE_CHECK([for ISA level support], ++ libc_cv_have_x86_isa_level, [dnl ++cat > conftest.c < ++#if MINIMUM_X86_ISA_LEVEL >= 4 ++libc_cv_have_x86_isa_level=4 ++#elif MINIMUM_X86_ISA_LEVEL == 3 ++libc_cv_have_x86_isa_level=3 ++#elif MINIMUM_X86_ISA_LEVEL == 2 ++libc_cv_have_x86_isa_level=2 ++#else ++libc_cv_have_x86_isa_level=baseline ++#endif ++EOF ++ eval `${CC-cc} $CFLAGS $CPPFLAGS $ISAFLAG -I$srcdir -E conftest.c | grep libc_cv_have_x86_isa_level` ++ rm -rf conftest*]) ++else ++ libc_cv_have_x86_isa_level=baseline ++fi ++if test $libc_cv_have_x86_isa_level = baseline; then ++ AC_DEFINE_UNQUOTED(MINIMUM_X86_ISA_LEVEL, 1) ++else ++ AC_DEFINE_UNQUOTED(MINIMUM_X86_ISA_LEVEL, $libc_cv_have_x86_isa_level) + fi ++LIBC_CONFIG_VAR([have-x86-isa-level], [$libc_cv_have_x86_isa_level]) ++LIBC_CONFIG_VAR([x86-isa-level-3-or-above], [3 4]) + LIBC_CONFIG_VAR([enable-x86-isa-level], [$libc_cv_include_x86_isa_level]) + + dnl Static PIE is supported. +diff --git a/sysdeps/x86/cpu-features-offsets.sym b/sysdeps/x86/cpu-features-offsets.sym +index 6a8fd29813..21fc88d651 100644 +--- a/sysdeps/x86/cpu-features-offsets.sym ++++ b/sysdeps/x86/cpu-features-offsets.sym +@@ -3,3 +3,4 @@ + #include + + XSAVE_STATE_SIZE_OFFSET offsetof (struct cpu_features, xsave_state_size) ++XSAVE_STATE_FULL_SIZE_OFFSET offsetof (struct cpu_features, xsave_state_full_size) +diff --git a/sysdeps/x86/cpu-features.c b/sysdeps/x86/cpu-features.c +index 25e6622a79..3d7c2819d7 100644 +--- a/sysdeps/x86/cpu-features.c ++++ b/sysdeps/x86/cpu-features.c +@@ -18,6 +18,7 @@ + + #include + #include ++#include + #include + #include + #include +@@ -27,8 +28,13 @@ + extern void TUNABLE_CALLBACK (set_hwcaps) (tunable_val_t *) + attribute_hidden; + +-#if defined SHARED && defined __x86_64__ +-# include ++#if defined SHARED ++extern void _dl_tlsdesc_dynamic_fxsave (void) attribute_hidden; ++extern void _dl_tlsdesc_dynamic_xsave (void) attribute_hidden; ++extern void _dl_tlsdesc_dynamic_xsavec (void) attribute_hidden; ++ ++# ifdef __x86_64__ ++# include + + static void + TUNABLE_CALLBACK (set_plt_rewrite) (tunable_val_t *valp) +@@ -47,6 +53,15 @@ TUNABLE_CALLBACK (set_plt_rewrite) (tunable_val_t *valp) + : plt_rewrite_jmp); + } + } ++# else ++extern void _dl_tlsdesc_dynamic_fnsave (void) attribute_hidden; ++# endif ++#endif ++ ++#ifdef __x86_64__ ++extern void _dl_runtime_resolve_fxsave (void) attribute_hidden; ++extern void _dl_runtime_resolve_xsave (void) attribute_hidden; ++extern void _dl_runtime_resolve_xsavec (void) attribute_hidden; + #endif + + #ifdef __LP64__ +@@ -293,8 +308,10 @@ update_active (struct cpu_features *cpu_features) + __cpuid_count (0xd, 0, eax, ebx, ecx, edx); + if (ebx != 0) + { ++ /* NB: On AMX capable processors, ebx always includes AMX ++ states. */ + unsigned int xsave_state_full_size +- = ALIGN_UP (ebx + STATE_SAVE_OFFSET, 64); ++ = ALIGN_UP (ebx + TLSDESC_CALL_REGISTER_SAVE_AREA, 64); + + cpu_features->xsave_state_size + = xsave_state_full_size; +@@ -306,6 +323,11 @@ update_active (struct cpu_features *cpu_features) + { + unsigned int xstate_comp_offsets[32]; + unsigned int xstate_comp_sizes[32]; ++#ifdef __x86_64__ ++ unsigned int xstate_amx_comp_offsets[32]; ++ unsigned int xstate_amx_comp_sizes[32]; ++ unsigned int amx_ecx; ++#endif + unsigned int i; + + xstate_comp_offsets[0] = 0; +@@ -313,16 +335,39 @@ update_active (struct cpu_features *cpu_features) + xstate_comp_offsets[2] = 576; + xstate_comp_sizes[0] = 160; + xstate_comp_sizes[1] = 256; ++#ifdef __x86_64__ ++ xstate_amx_comp_offsets[0] = 0; ++ xstate_amx_comp_offsets[1] = 160; ++ xstate_amx_comp_offsets[2] = 576; ++ xstate_amx_comp_sizes[0] = 160; ++ xstate_amx_comp_sizes[1] = 256; ++#endif + + for (i = 2; i < 32; i++) + { +- if ((STATE_SAVE_MASK & (1 << i)) != 0) ++ if ((FULL_STATE_SAVE_MASK & (1 << i)) != 0) + { + __cpuid_count (0xd, i, eax, ebx, ecx, edx); +- xstate_comp_sizes[i] = eax; ++#ifdef __x86_64__ ++ /* Include this in xsave_state_full_size. */ ++ amx_ecx = ecx; ++ xstate_amx_comp_sizes[i] = eax; ++ if ((AMX_STATE_SAVE_MASK & (1 << i)) != 0) ++ { ++ /* Exclude this from xsave_state_size. */ ++ ecx = 0; ++ xstate_comp_sizes[i] = 0; ++ } ++ else ++#endif ++ xstate_comp_sizes[i] = eax; + } + else + { ++#ifdef __x86_64__ ++ amx_ecx = 0; ++ xstate_amx_comp_sizes[i] = 0; ++#endif + ecx = 0; + xstate_comp_sizes[i] = 0; + } +@@ -335,6 +380,15 @@ update_active (struct cpu_features *cpu_features) + if ((ecx & (1 << 1)) != 0) + xstate_comp_offsets[i] + = ALIGN_UP (xstate_comp_offsets[i], 64); ++#ifdef __x86_64__ ++ xstate_amx_comp_offsets[i] ++ = (xstate_amx_comp_offsets[i - 1] ++ + xstate_amx_comp_sizes[i - 1]); ++ if ((amx_ecx & (1 << 1)) != 0) ++ xstate_amx_comp_offsets[i] ++ = ALIGN_UP (xstate_amx_comp_offsets[i], ++ 64); ++#endif + } + } + +@@ -343,8 +397,23 @@ update_active (struct cpu_features *cpu_features) + = xstate_comp_offsets[31] + xstate_comp_sizes[31]; + if (size) + { ++#ifdef __x86_64__ ++ unsigned int amx_size ++ = (xstate_amx_comp_offsets[31] ++ + xstate_amx_comp_sizes[31]); ++ amx_size ++ = ALIGN_UP ((amx_size ++ + TLSDESC_CALL_REGISTER_SAVE_AREA), ++ 64); ++ /* Set xsave_state_full_size to the compact AMX ++ state size for XSAVEC. NB: xsave_state_full_size ++ is only used in _dl_tlsdesc_dynamic_xsave and ++ _dl_tlsdesc_dynamic_xsavec. */ ++ cpu_features->xsave_state_full_size = amx_size; ++#endif + cpu_features->xsave_state_size +- = ALIGN_UP (size + STATE_SAVE_OFFSET, 64); ++ = ALIGN_UP (size + TLSDESC_CALL_REGISTER_SAVE_AREA, ++ 64); + CPU_FEATURE_SET (cpu_features, XSAVEC); + } + } +@@ -1130,6 +1199,45 @@ no_cpuid: + TUNABLE_CALLBACK (set_x86_shstk)); + #endif + ++ if (MINIMUM_X86_ISA_LEVEL >= AVX_X86_ISA_LEVEL ++ || (GLRO(dl_x86_cpu_features).xsave_state_size != 0)) ++ { ++ if (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC)) ++ { ++#ifdef __x86_64__ ++ GLRO(dl_x86_64_runtime_resolve) = _dl_runtime_resolve_xsavec; ++#endif ++#ifdef SHARED ++ GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_xsavec; ++#endif ++ } ++ else ++ { ++#ifdef __x86_64__ ++ GLRO(dl_x86_64_runtime_resolve) = _dl_runtime_resolve_xsave; ++#endif ++#ifdef SHARED ++ GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_xsave; ++#endif ++ } ++ } ++ else ++ { ++#ifdef __x86_64__ ++ GLRO(dl_x86_64_runtime_resolve) = _dl_runtime_resolve_fxsave; ++# ifdef SHARED ++ GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fxsave; ++# endif ++#else ++# ifdef SHARED ++ if (CPU_FEATURE_USABLE_P (cpu_features, FXSR)) ++ GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fxsave; ++ else ++ GLRO(dl_x86_tlsdesc_dynamic) = _dl_tlsdesc_dynamic_fnsave; ++# endif ++#endif ++ } ++ + #ifdef SHARED + # ifdef __x86_64__ + TUNABLE_GET (plt_rewrite, tunable_val_t *, +diff --git a/sysdeps/x86/dl-cacheinfo.h b/sysdeps/x86/dl-cacheinfo.h +index d5101615e3..5a98f70364 100644 +--- a/sysdeps/x86/dl-cacheinfo.h ++++ b/sysdeps/x86/dl-cacheinfo.h +@@ -791,7 +791,6 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) + long int data = -1; + long int shared = -1; + long int shared_per_thread = -1; +- long int core = -1; + unsigned int threads = 0; + unsigned long int level1_icache_size = -1; + unsigned long int level1_icache_linesize = -1; +@@ -809,7 +808,6 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) + if (cpu_features->basic.kind == arch_kind_intel) + { + data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, cpu_features); +- core = handle_intel (_SC_LEVEL2_CACHE_SIZE, cpu_features); + shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, cpu_features); + shared_per_thread = shared; + +@@ -822,7 +820,8 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) + = handle_intel (_SC_LEVEL1_DCACHE_ASSOC, cpu_features); + level1_dcache_linesize + = handle_intel (_SC_LEVEL1_DCACHE_LINESIZE, cpu_features); +- level2_cache_size = core; ++ level2_cache_size ++ = handle_intel (_SC_LEVEL2_CACHE_SIZE, cpu_features); + level2_cache_assoc + = handle_intel (_SC_LEVEL2_CACHE_ASSOC, cpu_features); + level2_cache_linesize +@@ -835,12 +834,12 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) + level4_cache_size + = handle_intel (_SC_LEVEL4_CACHE_SIZE, cpu_features); + +- get_common_cache_info (&shared, &shared_per_thread, &threads, core); ++ get_common_cache_info (&shared, &shared_per_thread, &threads, ++ level2_cache_size); + } + else if (cpu_features->basic.kind == arch_kind_zhaoxin) + { + data = handle_zhaoxin (_SC_LEVEL1_DCACHE_SIZE); +- core = handle_zhaoxin (_SC_LEVEL2_CACHE_SIZE); + shared = handle_zhaoxin (_SC_LEVEL3_CACHE_SIZE); + shared_per_thread = shared; + +@@ -849,19 +848,19 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) + level1_dcache_size = data; + level1_dcache_assoc = handle_zhaoxin (_SC_LEVEL1_DCACHE_ASSOC); + level1_dcache_linesize = handle_zhaoxin (_SC_LEVEL1_DCACHE_LINESIZE); +- level2_cache_size = core; ++ level2_cache_size = handle_zhaoxin (_SC_LEVEL2_CACHE_SIZE); + level2_cache_assoc = handle_zhaoxin (_SC_LEVEL2_CACHE_ASSOC); + level2_cache_linesize = handle_zhaoxin (_SC_LEVEL2_CACHE_LINESIZE); + level3_cache_size = shared; + level3_cache_assoc = handle_zhaoxin (_SC_LEVEL3_CACHE_ASSOC); + level3_cache_linesize = handle_zhaoxin (_SC_LEVEL3_CACHE_LINESIZE); + +- get_common_cache_info (&shared, &shared_per_thread, &threads, core); ++ get_common_cache_info (&shared, &shared_per_thread, &threads, ++ level2_cache_size); + } + else if (cpu_features->basic.kind == arch_kind_amd) + { + data = handle_amd (_SC_LEVEL1_DCACHE_SIZE); +- core = handle_amd (_SC_LEVEL2_CACHE_SIZE); + shared = handle_amd (_SC_LEVEL3_CACHE_SIZE); + + level1_icache_size = handle_amd (_SC_LEVEL1_ICACHE_SIZE); +@@ -869,7 +868,7 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) + level1_dcache_size = data; + level1_dcache_assoc = handle_amd (_SC_LEVEL1_DCACHE_ASSOC); + level1_dcache_linesize = handle_amd (_SC_LEVEL1_DCACHE_LINESIZE); +- level2_cache_size = core; ++ level2_cache_size = handle_amd (_SC_LEVEL2_CACHE_SIZE);; + level2_cache_assoc = handle_amd (_SC_LEVEL2_CACHE_ASSOC); + level2_cache_linesize = handle_amd (_SC_LEVEL2_CACHE_LINESIZE); + level3_cache_size = shared; +@@ -880,12 +879,12 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) + if (shared <= 0) + { + /* No shared L3 cache. All we have is the L2 cache. */ +- shared = core; ++ shared = level2_cache_size; + } + else if (cpu_features->basic.family < 0x17) + { + /* Account for exclusive L2 and L3 caches. */ +- shared += core; ++ shared += level2_cache_size; + } + + shared_per_thread = shared; +@@ -987,6 +986,12 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) + if (CPU_FEATURE_USABLE_P (cpu_features, FSRM)) + rep_movsb_threshold = 2112; + ++ /* For AMD CPUs that support ERMS (Zen3+), REP MOVSB is in a lot of ++ cases slower than the vectorized path (and for some alignments, ++ it is really slow, check BZ #30994). */ ++ if (cpu_features->basic.kind == arch_kind_amd) ++ rep_movsb_threshold = non_temporal_threshold; ++ + /* The default threshold to use Enhanced REP STOSB. */ + unsigned long int rep_stosb_threshold = 2048; + +@@ -1016,6 +1021,11 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) + minimum value is fixed. */ + rep_stosb_threshold = TUNABLE_GET (x86_rep_stosb_threshold, + long int, NULL); ++ if (cpu_features->basic.kind == arch_kind_amd ++ && !TUNABLE_IS_INITIALIZED (x86_rep_stosb_threshold)) ++ /* For AMD Zen3+ architecture, the performance of the vectorized loop is ++ slightly better than ERMS. */ ++ rep_stosb_threshold = SIZE_MAX; + + TUNABLE_SET_WITH_BOUNDS (x86_data_cache_size, data, 0, SIZE_MAX); + TUNABLE_SET_WITH_BOUNDS (x86_shared_cache_size, shared, 0, SIZE_MAX); +@@ -1028,16 +1038,9 @@ dl_init_cacheinfo (struct cpu_features *cpu_features) + SIZE_MAX); + + unsigned long int rep_movsb_stop_threshold; +- /* ERMS feature is implemented from AMD Zen3 architecture and it is +- performing poorly for data above L2 cache size. Henceforth, adding +- an upper bound threshold parameter to limit the usage of Enhanced +- REP MOVSB operations and setting its value to L2 cache size. */ +- if (cpu_features->basic.kind == arch_kind_amd) +- rep_movsb_stop_threshold = core; + /* Setting the upper bound of ERMS to the computed value of +- non-temporal threshold for architectures other than AMD. */ +- else +- rep_movsb_stop_threshold = non_temporal_threshold; ++ non-temporal threshold for all architectures. */ ++ rep_movsb_stop_threshold = non_temporal_threshold; + + cpu_features->data_cache_size = data; + cpu_features->shared_cache_size = shared; +diff --git a/sysdeps/x86/dl-procinfo.c b/sysdeps/x86/dl-procinfo.c +index ee957b4d70..5920d4b320 100644 +--- a/sysdeps/x86/dl-procinfo.c ++++ b/sysdeps/x86/dl-procinfo.c +@@ -86,3 +86,19 @@ PROCINFO_CLASS const char _dl_x86_platforms[4][9] + #else + , + #endif ++ ++#if defined SHARED && !IS_IN (ldconfig) ++# if !defined PROCINFO_DECL ++ ._dl_x86_tlsdesc_dynamic ++# else ++PROCINFO_CLASS void * _dl_x86_tlsdesc_dynamic ++# endif ++# ifndef PROCINFO_DECL ++= NULL ++# endif ++# ifdef PROCINFO_DECL ++; ++# else ++, ++# endif ++#endif +diff --git a/sysdeps/x86_64/features-offsets.sym b/sysdeps/x86/features-offsets.sym +similarity index 89% +rename from sysdeps/x86_64/features-offsets.sym +rename to sysdeps/x86/features-offsets.sym +index 9e4be3393a..77e990c705 100644 +--- a/sysdeps/x86_64/features-offsets.sym ++++ b/sysdeps/x86/features-offsets.sym +@@ -3,4 +3,6 @@ + #include + + RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET offsetof (struct rtld_global_ro, _dl_x86_cpu_features) ++#ifdef __x86_64__ + RTLD_GLOBAL_DL_X86_FEATURE_1_OFFSET offsetof (struct rtld_global, _dl_x86_feature_1) ++#endif +diff --git a/sysdeps/x86/include/cpu-features.h b/sysdeps/x86/include/cpu-features.h +index b9bf3115b6..cd7bd27cf3 100644 +--- a/sysdeps/x86/include/cpu-features.h ++++ b/sysdeps/x86/include/cpu-features.h +@@ -934,6 +934,8 @@ struct cpu_features + /* The full state size for XSAVE when XSAVEC is disabled by + + GLIBC_TUNABLES=glibc.cpu.hwcaps=-XSAVEC ++ ++ and the AMX state size when XSAVEC is available. + */ + unsigned int xsave_state_full_size; + /* Data cache size for use in memory and string routines, typically +diff --git a/sysdeps/x86/isa-level.h b/sysdeps/x86/isa-level.h +index 11fe1ca90c..2c7f74212b 100644 +--- a/sysdeps/x86/isa-level.h ++++ b/sysdeps/x86/isa-level.h +@@ -61,8 +61,10 @@ + # define __X86_ISA_V4 0 + #endif + +-#define MINIMUM_X86_ISA_LEVEL \ ++#ifndef MINIMUM_X86_ISA_LEVEL ++# define MINIMUM_X86_ISA_LEVEL \ + (__X86_ISA_V1 + __X86_ISA_V2 + __X86_ISA_V3 + __X86_ISA_V4) ++#endif + + /* Depending on the minimum ISA level, a feature check result can be a + compile-time constant.. */ +diff --git a/sysdeps/x86/sysdep.h b/sysdeps/x86/sysdep.h +index 85d0a8c943..7359149e17 100644 +--- a/sysdeps/x86/sysdep.h ++++ b/sysdeps/x86/sysdep.h +@@ -21,14 +21,118 @@ + + #include + ++/* The extended state feature IDs in the state component bitmap. */ ++#define X86_XSTATE_X87_ID 0 ++#define X86_XSTATE_SSE_ID 1 ++#define X86_XSTATE_AVX_ID 2 ++#define X86_XSTATE_BNDREGS_ID 3 ++#define X86_XSTATE_BNDCFG_ID 4 ++#define X86_XSTATE_K_ID 5 ++#define X86_XSTATE_ZMM_H_ID 6 ++#define X86_XSTATE_ZMM_ID 7 ++#define X86_XSTATE_PKRU_ID 9 ++#define X86_XSTATE_TILECFG_ID 17 ++#define X86_XSTATE_TILEDATA_ID 18 ++#define X86_XSTATE_APX_F_ID 19 ++ ++#ifdef __x86_64__ + /* Offset for fxsave/xsave area used by _dl_runtime_resolve. Also need + space to preserve RCX, RDX, RSI, RDI, R8, R9 and RAX. It must be +- aligned to 16 bytes for fxsave and 64 bytes for xsave. */ +-#define STATE_SAVE_OFFSET (8 * 7 + 8) ++ aligned to 16 bytes for fxsave and 64 bytes for xsave. It is non-zero ++ because MOV, instead of PUSH, is used to save registers onto stack. ++ ++ +==================+<- stack frame start aligned at 8 or 16 bytes ++ | |<- paddings for stack realignment of 64 bytes ++ |------------------|<- xsave buffer end aligned at 64 bytes ++ | |<- ++ | |<- ++ | |<- ++ |------------------|<- xsave buffer start at STATE_SAVE_OFFSET(%rsp) ++ | |<- 8-byte padding for 64-byte alignment ++ | |<- R9 ++ | |<- R8 ++ | |<- RDI ++ | |<- RSI ++ | |<- RDX ++ | |<- RCX ++ | |<- RAX ++ +==================+<- RSP aligned at 64 bytes ++ ++ */ ++# define STATE_SAVE_OFFSET (8 * 7 + 8) ++ ++/* _dl_tlsdesc_dynamic preserves RDI, RSI and RBX before realigning ++ stack. After realigning stack, it saves RCX, RDX, R8, R9, R10 and ++ R11. Allocate space for RDI, RSI and RBX to avoid clobbering saved ++ RDI, RSI and RBX values on stack by xsave. ++ ++ +==================+<- stack frame start aligned at 8 or 16 bytes ++ | |<- RDI saved in the red zone ++ | |<- RSI saved in the red zone ++ | |<- RBX saved in the red zone ++ | |<- paddings for stack realignment of 64 bytes ++ |------------------|<- xsave buffer end aligned at 64 bytes ++ | |<- ++ | |<- ++ | |<- ++ |------------------|<- xsave buffer start at STATE_SAVE_OFFSET(%rsp) ++ | |<- 8-byte padding for 64-byte alignment ++ | |<- 8-byte padding for 64-byte alignment ++ | |<- R11 ++ | |<- R10 ++ | |<- R9 ++ | |<- R8 ++ | |<- RDX ++ | |<- RCX ++ +==================+<- RSP aligned at 64 bytes ++ ++ Define the total register save area size for all integer registers by ++ adding 24 to STATE_SAVE_OFFSET since RDI, RSI and RBX are saved onto ++ stack without adjusting stack pointer first, using the red-zone. */ ++# define TLSDESC_CALL_REGISTER_SAVE_AREA (STATE_SAVE_OFFSET + 24) ++ ++/* Save SSE, AVX, AVX512, mask, bound and APX registers. Bound and APX ++ registers are mutually exclusive. */ ++# define STATE_SAVE_MASK \ ++ ((1 << X86_XSTATE_SSE_ID) \ ++ | (1 << X86_XSTATE_AVX_ID) \ ++ | (1 << X86_XSTATE_BNDREGS_ID) \ ++ | (1 << X86_XSTATE_K_ID) \ ++ | (1 << X86_XSTATE_ZMM_H_ID) \ ++ | (1 << X86_XSTATE_ZMM_ID) \ ++ | (1 << X86_XSTATE_APX_F_ID)) ++ ++/* AMX state mask. */ ++# define AMX_STATE_SAVE_MASK \ ++ ((1 << X86_XSTATE_TILECFG_ID) | (1 << X86_XSTATE_TILEDATA_ID)) ++ ++/* States to be included in xsave_state_full_size. */ ++# define FULL_STATE_SAVE_MASK \ ++ (STATE_SAVE_MASK | AMX_STATE_SAVE_MASK) ++#else ++/* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic. Since i386 ++ uses PUSH to save registers onto stack, use 0 here. */ ++# define STATE_SAVE_OFFSET 0 ++# define TLSDESC_CALL_REGISTER_SAVE_AREA 0 ++ ++/* Save SSE, AVX, AXV512, mask and bound registers. */ ++# define STATE_SAVE_MASK \ ++ ((1 << X86_XSTATE_SSE_ID) \ ++ | (1 << X86_XSTATE_AVX_ID) \ ++ | (1 << X86_XSTATE_BNDREGS_ID) \ ++ | (1 << X86_XSTATE_K_ID) \ ++ | (1 << X86_XSTATE_ZMM_H_ID)) ++ ++/* States to be included in xsave_state_size. */ ++# define FULL_STATE_SAVE_MASK STATE_SAVE_MASK ++#endif + +-/* Save SSE, AVX, AVX512, mask and bound registers. */ +-#define STATE_SAVE_MASK \ +- ((1 << 1) | (1 << 2) | (1 << 3) | (1 << 5) | (1 << 6) | (1 << 7)) ++/* States which should be saved for TLSDESC_CALL and TLS_DESC_CALL. ++ Compiler assumes that all registers, including AMX and x87 FPU ++ stack registers, are unchanged after CALL, except for EFLAGS and ++ RAX/EAX. */ ++#define TLSDESC_CALL_STATE_SAVE_MASK \ ++ (FULL_STATE_SAVE_MASK | (1 << X86_XSTATE_X87_ID)) + + /* Constants for bits in __x86_string_control: */ + +diff --git a/sysdeps/x86/tst-gnu2-tls2.c b/sysdeps/x86/tst-gnu2-tls2.c +new file mode 100644 +index 0000000000..de900a423b +--- /dev/null ++++ b/sysdeps/x86/tst-gnu2-tls2.c +@@ -0,0 +1,20 @@ ++#ifndef __x86_64__ ++#include ++ ++#define IS_SUPPORTED() CPU_FEATURE_ACTIVE (SSE2) ++#endif ++ ++/* Clear XMM0...XMM7 */ ++#define PREPARE_MALLOC() \ ++{ \ ++ asm volatile ("xorps %%xmm0, %%xmm0" : : : "xmm0" ); \ ++ asm volatile ("xorps %%xmm1, %%xmm1" : : : "xmm1" ); \ ++ asm volatile ("xorps %%xmm2, %%xmm2" : : : "xmm2" ); \ ++ asm volatile ("xorps %%xmm3, %%xmm3" : : : "xmm3" ); \ ++ asm volatile ("xorps %%xmm4, %%xmm4" : : : "xmm4" ); \ ++ asm volatile ("xorps %%xmm5, %%xmm5" : : : "xmm5" ); \ ++ asm volatile ("xorps %%xmm6, %%xmm6" : : : "xmm6" ); \ ++ asm volatile ("xorps %%xmm7, %%xmm7" : : : "xmm7" ); \ ++} ++ ++#include +diff --git a/sysdeps/x86/utmp-size.h b/sysdeps/x86/utmp-size.h +new file mode 100644 +index 0000000000..8f21ebe1b6 +--- /dev/null ++++ b/sysdeps/x86/utmp-size.h +@@ -0,0 +1,2 @@ ++#define UTMP_SIZE 384 ++#define LASTLOG_SIZE 292 +diff --git a/sysdeps/x86_64/Makefile b/sysdeps/x86_64/Makefile +index 90f4ecfd26..0ede447405 100644 +--- a/sysdeps/x86_64/Makefile ++++ b/sysdeps/x86_64/Makefile +@@ -10,7 +10,7 @@ LDFLAGS-rtld += -Wl,-z,nomark-plt + endif + + ifeq ($(subdir),csu) +-gen-as-const-headers += features-offsets.sym link-defines.sym ++gen-as-const-headers += link-defines.sym + endif + + ifeq ($(subdir),gmon) +@@ -210,6 +210,8 @@ tst-plt-rewrite2-ENV = GLIBC_TUNABLES=glibc.cpu.plt_rewrite=2 + $(objpfx)tst-plt-rewrite2: $(objpfx)tst-plt-rewritemod2.so + endif + ++test-internal-extras += tst-gnu2-tls2mod1 ++ + endif # $(subdir) == elf + + ifeq ($(subdir),csu) +@@ -250,6 +252,10 @@ sysdep-dl-routines += dl-cet + + tests += \ + tst-cet-legacy-1 \ ++ tst-cet-legacy-10 \ ++ tst-cet-legacy-10-static \ ++ tst-cet-legacy-10a \ ++ tst-cet-legacy-10a-static \ + tst-cet-legacy-1a \ + tst-cet-legacy-2 \ + tst-cet-legacy-2a \ +@@ -261,15 +267,11 @@ tests += \ + tst-cet-legacy-8 \ + tst-cet-legacy-9 \ + tst-cet-legacy-9-static \ +- tst-cet-legacy-10 \ +- tst-cet-legacy-10-static \ +- tst-cet-legacy-10a \ +- tst-cet-legacy-10a-static \ + # tests + tests-static += \ +- tst-cet-legacy-9-static \ + tst-cet-legacy-10-static \ + tst-cet-legacy-10a-static \ ++ tst-cet-legacy-9-static \ + # tests-static + tst-cet-legacy-1a-ARGS = -- $(host-test-program-cmd) + +diff --git a/sysdeps/x86_64/configure b/sysdeps/x86_64/configure +index 418cc4a9b8..04a534fa12 100755 +--- a/sysdeps/x86_64/configure ++++ b/sysdeps/x86_64/configure +@@ -134,6 +134,34 @@ fi + config_vars="$config_vars + enable-cet = $enable_cet" + ++# Check if -mamx-tile works properly. ++{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether -mamx-tile works properly" >&5 ++printf %s "checking whether -mamx-tile works properly... " >&6; } ++if test ${libc_cv_x86_have_amx_tile+y} ++then : ++ printf %s "(cached) " >&6 ++else $as_nop ++ cat > conftest.c < ++EOF ++ libc_cv_x86_have_amx_tile=no ++ if { ac_try='${CC-cc} -E $CFLAGS -mamx-tile conftest.c > conftest.i' ++ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5 ++ (eval $ac_try) 2>&5 ++ ac_status=$? ++ printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 ++ test $ac_status = 0; }; }; then ++ if grep -q __builtin_ia32_ldtilecfg conftest.i; then ++ libc_cv_x86_have_amx_tile=yes ++ fi ++ fi ++ rm -rf conftest* ++fi ++{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $libc_cv_x86_have_amx_tile" >&5 ++printf "%s\n" "$libc_cv_x86_have_amx_tile" >&6; } ++config_vars="$config_vars ++have-mamx-tile = $libc_cv_x86_have_amx_tile" ++ + test -n "$critic_missing" && as_fn_error $? " + *** $critic_missing" "$LINENO" 5 + +diff --git a/sysdeps/x86_64/configure.ac b/sysdeps/x86_64/configure.ac +index d1f803c02e..c714c47351 100644 +--- a/sysdeps/x86_64/configure.ac ++++ b/sysdeps/x86_64/configure.ac +@@ -61,5 +61,20 @@ elif test $enable_cet = permissive; then + fi + LIBC_CONFIG_VAR([enable-cet], [$enable_cet]) + ++# Check if -mamx-tile works properly. ++AC_CACHE_CHECK(whether -mamx-tile works properly, ++ libc_cv_x86_have_amx_tile, [dnl ++cat > conftest.c < ++EOF ++ libc_cv_x86_have_amx_tile=no ++ if AC_TRY_COMMAND(${CC-cc} -E $CFLAGS -mamx-tile conftest.c > conftest.i); then ++ if grep -q __builtin_ia32_ldtilecfg conftest.i; then ++ libc_cv_x86_have_amx_tile=yes ++ fi ++ fi ++ rm -rf conftest*]) ++LIBC_CONFIG_VAR([have-mamx-tile], [$libc_cv_x86_have_amx_tile]) ++ + test -n "$critic_missing" && AC_MSG_ERROR([ + *** $critic_missing]) +diff --git a/sysdeps/x86_64/dl-machine.h b/sysdeps/x86_64/dl-machine.h +index 6d605d0d32..ff5d45f7cb 100644 +--- a/sysdeps/x86_64/dl-machine.h ++++ b/sysdeps/x86_64/dl-machine.h +@@ -71,9 +71,6 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], + int lazy, int profile) + { + Elf64_Addr *got; +- extern void _dl_runtime_resolve_fxsave (ElfW(Word)) attribute_hidden; +- extern void _dl_runtime_resolve_xsave (ElfW(Word)) attribute_hidden; +- extern void _dl_runtime_resolve_xsavec (ElfW(Word)) attribute_hidden; + extern void _dl_runtime_profile_sse (ElfW(Word)) attribute_hidden; + extern void _dl_runtime_profile_avx (ElfW(Word)) attribute_hidden; + extern void _dl_runtime_profile_avx512 (ElfW(Word)) attribute_hidden; +@@ -96,8 +93,6 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], + /* Identify this shared object. */ + *(ElfW(Addr) *) (got + 1) = (ElfW(Addr)) l; + +- const struct cpu_features* cpu_features = __get_cpu_features (); +- + #ifdef SHARED + /* The got[2] entry contains the address of a function which gets + called to get the address of a so far unresolved function and +@@ -107,6 +102,7 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], + end in this function. */ + if (__glibc_unlikely (profile)) + { ++ const struct cpu_features* cpu_features = __get_cpu_features (); + if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX512F)) + *(ElfW(Addr) *) (got + 2) = (ElfW(Addr)) &_dl_runtime_profile_avx512; + else if (X86_ISA_CPU_FEATURE_USABLE_P (cpu_features, AVX)) +@@ -126,15 +122,8 @@ elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[], + /* This function will get called to fix up the GOT entry + indicated by the offset on the stack, and then jump to + the resolved address. */ +- if (MINIMUM_X86_ISA_LEVEL >= AVX_X86_ISA_LEVEL +- || GLRO(dl_x86_cpu_features).xsave_state_size != 0) +- *(ElfW(Addr) *) (got + 2) +- = (CPU_FEATURE_USABLE_P (cpu_features, XSAVEC) +- ? (ElfW(Addr)) &_dl_runtime_resolve_xsavec +- : (ElfW(Addr)) &_dl_runtime_resolve_xsave); +- else +- *(ElfW(Addr) *) (got + 2) +- = (ElfW(Addr)) &_dl_runtime_resolve_fxsave; ++ *(ElfW(Addr) *) (got + 2) ++ = (ElfW(Addr)) GLRO(dl_x86_64_runtime_resolve); + } + } + +@@ -383,7 +372,7 @@ and creates an unsatisfiable circular dependency.\n", + { + td->arg = _dl_make_tlsdesc_dynamic + (sym_map, sym->st_value + reloc->r_addend); +- td->entry = _dl_tlsdesc_dynamic; ++ td->entry = GLRO(dl_x86_tlsdesc_dynamic); + } + else + # endif +diff --git a/sysdeps/x86_64/dl-procinfo.c b/sysdeps/x86_64/dl-procinfo.c +index 4d1d790fbb..06637a8154 100644 +--- a/sysdeps/x86_64/dl-procinfo.c ++++ b/sysdeps/x86_64/dl-procinfo.c +@@ -41,5 +41,21 @@ + + #include + ++#if !IS_IN (ldconfig) ++# if !defined PROCINFO_DECL && defined SHARED ++ ._dl_x86_64_runtime_resolve ++# else ++PROCINFO_CLASS void * _dl_x86_64_runtime_resolve ++# endif ++# ifndef PROCINFO_DECL ++= NULL ++# endif ++# if !defined SHARED || defined PROCINFO_DECL ++; ++# else ++, ++# endif ++#endif ++ + #undef PROCINFO_DECL + #undef PROCINFO_CLASS +diff --git a/sysdeps/x86_64/dl-tlsdesc-dynamic.h b/sysdeps/x86_64/dl-tlsdesc-dynamic.h +new file mode 100644 +index 0000000000..9f02cfc3eb +--- /dev/null ++++ b/sysdeps/x86_64/dl-tlsdesc-dynamic.h +@@ -0,0 +1,166 @@ ++/* Thread-local storage handling in the ELF dynamic linker. x86_64 version. ++ Copyright (C) 2004-2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef SECTION ++# define SECTION(p) p ++#endif ++ ++#undef REGISTER_SAVE_AREA ++#undef LOCAL_STORAGE_AREA ++#undef BASE ++ ++#include "dl-trampoline-state.h" ++ ++ .section SECTION(.text),"ax",@progbits ++ ++ .hidden _dl_tlsdesc_dynamic ++ .global _dl_tlsdesc_dynamic ++ .type _dl_tlsdesc_dynamic,@function ++ ++ /* %rax points to the TLS descriptor, such that 0(%rax) points to ++ _dl_tlsdesc_dynamic itself, and 8(%rax) points to a struct ++ tlsdesc_dynamic_arg object. It must return in %rax the offset ++ between the thread pointer and the object denoted by the ++ argument, without clobbering any registers. ++ ++ The assembly code that follows is a rendition of the following ++ C code, hand-optimized a little bit. ++ ++ptrdiff_t ++_dl_tlsdesc_dynamic (register struct tlsdesc *tdp asm ("%rax")) ++{ ++ struct tlsdesc_dynamic_arg *td = tdp->arg; ++ dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + DTV_OFFSET); ++ if (__builtin_expect (td->gen_count <= dtv[0].counter ++ && (dtv[td->tlsinfo.ti_module].pointer.val ++ != TLS_DTV_UNALLOCATED), ++ 1)) ++ return dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset ++ - __thread_pointer; ++ ++ return __tls_get_addr_internal (&td->tlsinfo) - __thread_pointer; ++} ++*/ ++ cfi_startproc ++ .align 16 ++_dl_tlsdesc_dynamic: ++ _CET_ENDBR ++ /* Preserve call-clobbered registers that we modify. ++ We need two scratch regs anyway. */ ++ movq %rsi, -16(%rsp) ++ mov %fs:DTV_OFFSET, %RSI_LP ++ movq %rdi, -8(%rsp) ++ movq TLSDESC_ARG(%rax), %rdi ++ movq (%rsi), %rax ++ cmpq %rax, TLSDESC_GEN_COUNT(%rdi) ++ ja 2f ++ movq TLSDESC_MODID(%rdi), %rax ++ salq $4, %rax ++ movq (%rax,%rsi), %rax ++ cmpq $-1, %rax ++ je 2f ++ addq TLSDESC_MODOFF(%rdi), %rax ++1: ++ movq -16(%rsp), %rsi ++ sub %fs:0, %RAX_LP ++ movq -8(%rsp), %rdi ++ ret ++2: ++#if DL_RUNTIME_RESOLVE_REALIGN_STACK ++ movq %rbx, -24(%rsp) ++ mov %RSP_LP, %RBX_LP ++ cfi_def_cfa_register(%rbx) ++ and $-STATE_SAVE_ALIGNMENT, %RSP_LP ++#endif ++#ifdef REGISTER_SAVE_AREA ++# if DL_RUNTIME_RESOLVE_REALIGN_STACK ++ /* STATE_SAVE_OFFSET has space for 8 integer registers. But we ++ need space for RCX, RDX, RSI, RDI, R8, R9, R10 and R11, plus ++ RBX above. */ ++ sub $(REGISTER_SAVE_AREA + STATE_SAVE_ALIGNMENT), %RSP_LP ++# else ++ sub $REGISTER_SAVE_AREA, %RSP_LP ++ cfi_adjust_cfa_offset(REGISTER_SAVE_AREA) ++# endif ++#else ++ /* Allocate stack space of the required size to save the state. */ ++ sub _rtld_local_ro+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_FULL_SIZE_OFFSET(%rip), %RSP_LP ++#endif ++ /* Besides rdi and rsi, saved above, save rcx, rdx, r8, r9, ++ r10 and r11. */ ++ movq %rcx, REGISTER_SAVE_RCX(%rsp) ++ movq %rdx, REGISTER_SAVE_RDX(%rsp) ++ movq %r8, REGISTER_SAVE_R8(%rsp) ++ movq %r9, REGISTER_SAVE_R9(%rsp) ++ movq %r10, REGISTER_SAVE_R10(%rsp) ++ movq %r11, REGISTER_SAVE_R11(%rsp) ++#ifdef USE_FXSAVE ++ fxsave STATE_SAVE_OFFSET(%rsp) ++#else ++ movl $TLSDESC_CALL_STATE_SAVE_MASK, %eax ++ xorl %edx, %edx ++ /* Clear the XSAVE Header. */ ++# ifdef USE_XSAVE ++ movq %rdx, (STATE_SAVE_OFFSET + 512)(%rsp) ++ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8)(%rsp) ++# endif ++ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 2)(%rsp) ++ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 3)(%rsp) ++ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 4)(%rsp) ++ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 5)(%rsp) ++ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 6)(%rsp) ++ movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 7)(%rsp) ++# ifdef USE_XSAVE ++ xsave STATE_SAVE_OFFSET(%rsp) ++# else ++ xsavec STATE_SAVE_OFFSET(%rsp) ++# endif ++#endif ++ /* %rdi already points to the tlsinfo data structure. */ ++ call HIDDEN_JUMPTARGET (__tls_get_addr) ++ # Get register content back. ++#ifdef USE_FXSAVE ++ fxrstor STATE_SAVE_OFFSET(%rsp) ++#else ++ /* Save and retore __tls_get_addr return value stored in RAX. */ ++ mov %RAX_LP, %RCX_LP ++ movl $TLSDESC_CALL_STATE_SAVE_MASK, %eax ++ xorl %edx, %edx ++ xrstor STATE_SAVE_OFFSET(%rsp) ++ mov %RCX_LP, %RAX_LP ++#endif ++ movq REGISTER_SAVE_R11(%rsp), %r11 ++ movq REGISTER_SAVE_R10(%rsp), %r10 ++ movq REGISTER_SAVE_R9(%rsp), %r9 ++ movq REGISTER_SAVE_R8(%rsp), %r8 ++ movq REGISTER_SAVE_RDX(%rsp), %rdx ++ movq REGISTER_SAVE_RCX(%rsp), %rcx ++#if DL_RUNTIME_RESOLVE_REALIGN_STACK ++ mov %RBX_LP, %RSP_LP ++ cfi_def_cfa_register(%rsp) ++ movq -24(%rsp), %rbx ++ cfi_restore(%rbx) ++#else ++ add $REGISTER_SAVE_AREA, %RSP_LP ++ cfi_adjust_cfa_offset(-REGISTER_SAVE_AREA) ++#endif ++ jmp 1b ++ cfi_endproc ++ .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic ++ ++#undef STATE_SAVE_ALIGNMENT +diff --git a/sysdeps/x86_64/dl-tlsdesc.S b/sysdeps/x86_64/dl-tlsdesc.S +index f748af2ece..057a10862a 100644 +--- a/sysdeps/x86_64/dl-tlsdesc.S ++++ b/sysdeps/x86_64/dl-tlsdesc.S +@@ -18,7 +18,20 @@ + + #include + #include ++#include ++#include ++#include + #include "tlsdesc.h" ++#include "dl-trampoline-save.h" ++ ++/* Area on stack to save and restore registers used for parameter ++ passing when calling _dl_tlsdesc_dynamic. */ ++#define REGISTER_SAVE_RCX 0 ++#define REGISTER_SAVE_RDX (REGISTER_SAVE_RCX + 8) ++#define REGISTER_SAVE_R8 (REGISTER_SAVE_RDX + 8) ++#define REGISTER_SAVE_R9 (REGISTER_SAVE_R8 + 8) ++#define REGISTER_SAVE_R10 (REGISTER_SAVE_R9 + 8) ++#define REGISTER_SAVE_R11 (REGISTER_SAVE_R10 + 8) + + .text + +@@ -67,80 +80,26 @@ _dl_tlsdesc_undefweak: + .size _dl_tlsdesc_undefweak, .-_dl_tlsdesc_undefweak + + #ifdef SHARED +- .hidden _dl_tlsdesc_dynamic +- .global _dl_tlsdesc_dynamic +- .type _dl_tlsdesc_dynamic,@function +- +- /* %rax points to the TLS descriptor, such that 0(%rax) points to +- _dl_tlsdesc_dynamic itself, and 8(%rax) points to a struct +- tlsdesc_dynamic_arg object. It must return in %rax the offset +- between the thread pointer and the object denoted by the +- argument, without clobbering any registers. +- +- The assembly code that follows is a rendition of the following +- C code, hand-optimized a little bit. +- +-ptrdiff_t +-_dl_tlsdesc_dynamic (register struct tlsdesc *tdp asm ("%rax")) +-{ +- struct tlsdesc_dynamic_arg *td = tdp->arg; +- dtv_t *dtv = *(dtv_t **)((char *)__thread_pointer + DTV_OFFSET); +- if (__builtin_expect (td->gen_count <= dtv[0].counter +- && (dtv[td->tlsinfo.ti_module].pointer.val +- != TLS_DTV_UNALLOCATED), +- 1)) +- return dtv[td->tlsinfo.ti_module].pointer.val + td->tlsinfo.ti_offset +- - __thread_pointer; +- +- return __tls_get_addr_internal (&td->tlsinfo) - __thread_pointer; +-} +-*/ +- cfi_startproc +- .align 16 +-_dl_tlsdesc_dynamic: +- _CET_ENDBR +- /* Preserve call-clobbered registers that we modify. +- We need two scratch regs anyway. */ +- movq %rsi, -16(%rsp) +- mov %fs:DTV_OFFSET, %RSI_LP +- movq %rdi, -8(%rsp) +- movq TLSDESC_ARG(%rax), %rdi +- movq (%rsi), %rax +- cmpq %rax, TLSDESC_GEN_COUNT(%rdi) +- ja .Lslow +- movq TLSDESC_MODID(%rdi), %rax +- salq $4, %rax +- movq (%rax,%rsi), %rax +- cmpq $-1, %rax +- je .Lslow +- addq TLSDESC_MODOFF(%rdi), %rax +-.Lret: +- movq -16(%rsp), %rsi +- sub %fs:0, %RAX_LP +- movq -8(%rsp), %rdi +- ret +-.Lslow: +- /* Besides rdi and rsi, saved above, save rdx, rcx, r8, r9, +- r10 and r11. Also, align the stack, that's off by 8 bytes. */ +- subq $72, %rsp +- cfi_adjust_cfa_offset (72) +- movq %rdx, 8(%rsp) +- movq %rcx, 16(%rsp) +- movq %r8, 24(%rsp) +- movq %r9, 32(%rsp) +- movq %r10, 40(%rsp) +- movq %r11, 48(%rsp) +- /* %rdi already points to the tlsinfo data structure. */ +- call HIDDEN_JUMPTARGET (__tls_get_addr) +- movq 8(%rsp), %rdx +- movq 16(%rsp), %rcx +- movq 24(%rsp), %r8 +- movq 32(%rsp), %r9 +- movq 40(%rsp), %r10 +- movq 48(%rsp), %r11 +- addq $72, %rsp +- cfi_adjust_cfa_offset (-72) +- jmp .Lret +- cfi_endproc +- .size _dl_tlsdesc_dynamic, .-_dl_tlsdesc_dynamic ++# if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL ++# define USE_FXSAVE ++# define STATE_SAVE_ALIGNMENT 16 ++# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_fxsave ++# include "dl-tlsdesc-dynamic.h" ++# undef _dl_tlsdesc_dynamic ++# undef USE_FXSAVE ++# endif ++ ++# define USE_XSAVE ++# define STATE_SAVE_ALIGNMENT 64 ++# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_xsave ++# include "dl-tlsdesc-dynamic.h" ++# undef _dl_tlsdesc_dynamic ++# undef USE_XSAVE ++ ++# define USE_XSAVEC ++# define STATE_SAVE_ALIGNMENT 64 ++# define _dl_tlsdesc_dynamic _dl_tlsdesc_dynamic_xsavec ++# include "dl-tlsdesc-dynamic.h" ++# undef _dl_tlsdesc_dynamic ++# undef USE_XSAVEC + #endif /* SHARED */ +diff --git a/sysdeps/x86_64/dl-trampoline-save.h b/sysdeps/x86_64/dl-trampoline-save.h +new file mode 100644 +index 0000000000..84eac4a8ac +--- /dev/null ++++ b/sysdeps/x86_64/dl-trampoline-save.h +@@ -0,0 +1,34 @@ ++/* x86-64 PLT trampoline register save macros. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#ifndef DL_STACK_ALIGNMENT ++/* Due to GCC bug: ++ ++ https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066 ++ ++ __tls_get_addr may be called with 8-byte stack alignment. Although ++ this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't assume ++ that stack will be always aligned at 16 bytes. */ ++# define DL_STACK_ALIGNMENT 8 ++#endif ++ ++/* True if _dl_runtime_resolve should align stack for STATE_SAVE or align ++ stack to 16 bytes before calling _dl_fixup. */ ++#define DL_RUNTIME_RESOLVE_REALIGN_STACK \ ++ (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \ ++ || 16 > DL_STACK_ALIGNMENT) +diff --git a/sysdeps/x86_64/dl-trampoline-state.h b/sysdeps/x86_64/dl-trampoline-state.h +new file mode 100644 +index 0000000000..575f120797 +--- /dev/null ++++ b/sysdeps/x86_64/dl-trampoline-state.h +@@ -0,0 +1,51 @@ ++/* x86-64 PLT dl-trampoline state macros. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#if (STATE_SAVE_ALIGNMENT % 16) != 0 ++# error STATE_SAVE_ALIGNMENT must be multiple of 16 ++#endif ++ ++#if (STATE_SAVE_OFFSET % STATE_SAVE_ALIGNMENT) != 0 ++# error STATE_SAVE_OFFSET must be multiple of STATE_SAVE_ALIGNMENT ++#endif ++ ++#if DL_RUNTIME_RESOLVE_REALIGN_STACK ++/* Local stack area before jumping to function address: RBX. */ ++# define LOCAL_STORAGE_AREA 8 ++# define BASE rbx ++# ifdef USE_FXSAVE ++/* Use fxsave to save XMM registers. */ ++# define REGISTER_SAVE_AREA (512 + STATE_SAVE_OFFSET) ++# if (REGISTER_SAVE_AREA % 16) != 0 ++# error REGISTER_SAVE_AREA must be multiple of 16 ++# endif ++# endif ++#else ++# ifndef USE_FXSAVE ++# error USE_FXSAVE must be defined ++# endif ++/* Use fxsave to save XMM registers. */ ++# define REGISTER_SAVE_AREA (512 + STATE_SAVE_OFFSET + 8) ++/* Local stack area before jumping to function address: All saved ++ registers. */ ++# define LOCAL_STORAGE_AREA REGISTER_SAVE_AREA ++# define BASE rsp ++# if (REGISTER_SAVE_AREA % 16) != 8 ++# error REGISTER_SAVE_AREA must be odd multiple of 8 ++# endif ++#endif +diff --git a/sysdeps/x86_64/dl-trampoline.S b/sysdeps/x86_64/dl-trampoline.S +index b2e7e0f69b..87c5137837 100644 +--- a/sysdeps/x86_64/dl-trampoline.S ++++ b/sysdeps/x86_64/dl-trampoline.S +@@ -22,25 +22,7 @@ + #include + #include + #include +- +-#ifndef DL_STACK_ALIGNMENT +-/* Due to GCC bug: +- +- https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066 +- +- __tls_get_addr may be called with 8-byte stack alignment. Although +- this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't assume +- that stack will be always aligned at 16 bytes. We use unaligned +- 16-byte move to load and store SSE registers, which has no penalty +- on modern processors if stack is 16-byte aligned. */ +-# define DL_STACK_ALIGNMENT 8 +-#endif +- +-/* True if _dl_runtime_resolve should align stack for STATE_SAVE or align +- stack to 16 bytes before calling _dl_fixup. */ +-#define DL_RUNTIME_RESOLVE_REALIGN_STACK \ +- (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \ +- || 16 > DL_STACK_ALIGNMENT) ++#include "dl-trampoline-save.h" + + /* Area on stack to save and restore registers used for parameter + passing when calling _dl_fixup. */ +diff --git a/sysdeps/x86_64/dl-trampoline.h b/sysdeps/x86_64/dl-trampoline.h +index f55c6ea040..d9ccfb40d4 100644 +--- a/sysdeps/x86_64/dl-trampoline.h ++++ b/sysdeps/x86_64/dl-trampoline.h +@@ -27,39 +27,7 @@ + # undef LOCAL_STORAGE_AREA + # undef BASE + +-# if (STATE_SAVE_ALIGNMENT % 16) != 0 +-# error STATE_SAVE_ALIGNMENT must be multiple of 16 +-# endif +- +-# if (STATE_SAVE_OFFSET % STATE_SAVE_ALIGNMENT) != 0 +-# error STATE_SAVE_OFFSET must be multiple of STATE_SAVE_ALIGNMENT +-# endif +- +-# if DL_RUNTIME_RESOLVE_REALIGN_STACK +-/* Local stack area before jumping to function address: RBX. */ +-# define LOCAL_STORAGE_AREA 8 +-# define BASE rbx +-# ifdef USE_FXSAVE +-/* Use fxsave to save XMM registers. */ +-# define REGISTER_SAVE_AREA (512 + STATE_SAVE_OFFSET) +-# if (REGISTER_SAVE_AREA % 16) != 0 +-# error REGISTER_SAVE_AREA must be multiple of 16 +-# endif +-# endif +-# else +-# ifndef USE_FXSAVE +-# error USE_FXSAVE must be defined +-# endif +-/* Use fxsave to save XMM registers. */ +-# define REGISTER_SAVE_AREA (512 + STATE_SAVE_OFFSET + 8) +-/* Local stack area before jumping to function address: All saved +- registers. */ +-# define LOCAL_STORAGE_AREA REGISTER_SAVE_AREA +-# define BASE rsp +-# if (REGISTER_SAVE_AREA % 16) != 8 +-# error REGISTER_SAVE_AREA must be odd multiple of 8 +-# endif +-# endif ++# include "dl-trampoline-state.h" + + .globl _dl_runtime_resolve + .hidden _dl_runtime_resolve +diff --git a/sysdeps/x86_64/fpu/multiarch/Makefile b/sysdeps/x86_64/fpu/multiarch/Makefile +index ea81753b70..6ddd50240c 100644 +--- a/sysdeps/x86_64/fpu/multiarch/Makefile ++++ b/sysdeps/x86_64/fpu/multiarch/Makefile +@@ -1,49 +1,4 @@ + ifeq ($(subdir),math) +-libm-sysdep_routines += \ +- s_ceil-c \ +- s_ceilf-c \ +- s_floor-c \ +- s_floorf-c \ +- s_rint-c \ +- s_rintf-c \ +- s_nearbyint-c \ +- s_nearbyintf-c \ +- s_roundeven-c \ +- s_roundevenf-c \ +- s_trunc-c \ +- s_truncf-c \ +-# libm-sysdep_routines +- +-libm-sysdep_routines += \ +- s_ceil-sse4_1 \ +- s_ceilf-sse4_1 \ +- s_floor-sse4_1 \ +- s_floorf-sse4_1 \ +- s_nearbyint-sse4_1 \ +- s_nearbyintf-sse4_1 \ +- s_roundeven-sse4_1 \ +- s_roundevenf-sse4_1 \ +- s_rint-sse4_1 \ +- s_rintf-sse4_1 \ +- s_trunc-sse4_1 \ +- s_truncf-sse4_1 \ +-# libm-sysdep_routines +- +-libm-sysdep_routines += \ +- e_asin-fma \ +- e_atan2-fma \ +- e_exp-fma \ +- e_log-fma \ +- e_log2-fma \ +- e_pow-fma \ +- s_atan-fma \ +- s_expm1-fma \ +- s_log1p-fma \ +- s_sin-fma \ +- s_sincos-fma \ +- s_tan-fma \ +-# libm-sysdep_routines +- + CFLAGS-e_asin-fma.c = -mfma -mavx2 + CFLAGS-e_atan2-fma.c = -mfma -mavx2 + CFLAGS-e_exp-fma.c = -mfma -mavx2 +@@ -57,23 +12,6 @@ CFLAGS-s_sin-fma.c = -mfma -mavx2 + CFLAGS-s_tan-fma.c = -mfma -mavx2 + CFLAGS-s_sincos-fma.c = -mfma -mavx2 + +-libm-sysdep_routines += \ +- s_cosf-sse2 \ +- s_sincosf-sse2 \ +- s_sinf-sse2 \ +-# libm-sysdep_routines +- +-libm-sysdep_routines += \ +- e_exp2f-fma \ +- e_expf-fma \ +- e_log2f-fma \ +- e_logf-fma \ +- e_powf-fma \ +- s_cosf-fma \ +- s_sincosf-fma \ +- s_sinf-fma \ +-# libm-sysdep_routines +- + CFLAGS-e_exp2f-fma.c = -mfma -mavx2 + CFLAGS-e_expf-fma.c = -mfma -mavx2 + CFLAGS-e_log2f-fma.c = -mfma -mavx2 +@@ -83,17 +21,93 @@ CFLAGS-s_sinf-fma.c = -mfma -mavx2 + CFLAGS-s_cosf-fma.c = -mfma -mavx2 + CFLAGS-s_sincosf-fma.c = -mfma -mavx2 + ++# Check if ISA level is 3 or above. ++ifneq (,$(filter $(have-x86-isa-level),$(x86-isa-level-3-or-above))) ++libm-sysdep_routines += \ ++ s_ceil-avx \ ++ s_ceilf-avx \ ++ s_floor-avx \ ++ s_floorf-avx \ ++ s_nearbyint-avx \ ++ s_nearbyintf-avx \ ++ s_rint-avx \ ++ s_rintf-avx \ ++ s_roundeven-avx \ ++ s_roundevenf-avx \ ++ s_trunc-avx \ ++ s_truncf-avx \ ++# libm-sysdep_routines ++else + libm-sysdep_routines += \ ++ e_asin-fma \ ++ e_asin-fma4 \ ++ e_atan2-avx \ ++ e_atan2-fma \ ++ e_atan2-fma4 \ ++ e_exp-avx \ ++ e_exp-fma \ + e_exp-fma4 \ ++ e_exp2f-fma \ ++ e_expf-fma \ ++ e_log-avx \ ++ e_log-fma \ + e_log-fma4 \ ++ e_log2-fma \ ++ e_log2f-fma \ ++ e_logf-fma \ ++ e_pow-fma \ + e_pow-fma4 \ +- e_asin-fma4 \ ++ e_powf-fma \ ++ s_atan-avx \ ++ s_atan-fma \ + s_atan-fma4 \ +- e_atan2-fma4 \ ++ s_ceil-sse4_1 \ ++ s_ceilf-sse4_1 \ ++ s_cosf-fma \ ++ s_cosf-sse2 \ ++ s_expm1-fma \ ++ s_floor-sse4_1 \ ++ s_floorf-sse4_1 \ ++ s_log1p-fma \ ++ s_nearbyint-sse4_1 \ ++ s_nearbyintf-sse4_1 \ ++ s_rint-sse4_1 \ ++ s_rintf-sse4_1 \ ++ s_roundeven-sse4_1 \ ++ s_roundevenf-sse4_1 \ ++ s_sin-avx \ ++ s_sin-fma \ + s_sin-fma4 \ ++ s_sincos-avx \ ++ s_sincos-fma \ + s_sincos-fma4 \ ++ s_sincosf-fma \ ++ s_sincosf-sse2 \ ++ s_sinf-fma \ ++ s_sinf-sse2 \ ++ s_tan-avx \ ++ s_tan-fma \ + s_tan-fma4 \ ++ s_trunc-sse4_1 \ ++ s_truncf-sse4_1 \ ++# libm-sysdep_routines ++ifeq ($(have-x86-isa-level),baseline) ++libm-sysdep_routines += \ ++ s_ceil-c \ ++ s_ceilf-c \ ++ s_floor-c \ ++ s_floorf-c \ ++ s_nearbyint-c \ ++ s_nearbyintf-c \ ++ s_rint-c \ ++ s_rintf-c \ ++ s_roundeven-c \ ++ s_roundevenf-c \ ++ s_trunc-c \ ++ s_truncf-c \ + # libm-sysdep_routines ++endif ++endif + + CFLAGS-e_asin-fma4.c = -mfma4 + CFLAGS-e_atan2-fma4.c = -mfma4 +@@ -105,16 +119,6 @@ CFLAGS-s_sin-fma4.c = -mfma4 + CFLAGS-s_tan-fma4.c = -mfma4 + CFLAGS-s_sincos-fma4.c = -mfma4 + +-libm-sysdep_routines += \ +- e_exp-avx \ +- e_log-avx \ +- s_atan-avx \ +- e_atan2-avx \ +- s_sin-avx \ +- s_sincos-avx \ +- s_tan-avx \ +-# libm-sysdep_routines +- + CFLAGS-e_atan2-avx.c = -msse2avx -DSSE2AVX + CFLAGS-e_exp-avx.c = -msse2avx -DSSE2AVX + CFLAGS-e_log-avx.c = -msse2avx -DSSE2AVX +diff --git a/sysdeps/x86_64/fpu/multiarch/e_asin.c b/sysdeps/x86_64/fpu/multiarch/e_asin.c +index 2eaa6c2c04..d64fca2586 100644 +--- a/sysdeps/x86_64/fpu/multiarch/e_asin.c ++++ b/sysdeps/x86_64/fpu/multiarch/e_asin.c +@@ -16,26 +16,29 @@ + License along with the GNU C Library; if not, see + . */ + +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL ++# include + + extern double __redirect_ieee754_asin (double); + extern double __redirect_ieee754_acos (double); + +-#define SYMBOL_NAME ieee754_asin +-#include "ifunc-fma4.h" ++# define SYMBOL_NAME ieee754_asin ++# include "ifunc-fma4.h" + + libc_ifunc_redirected (__redirect_ieee754_asin, __ieee754_asin, + IFUNC_SELECTOR ()); + libm_alias_finite (__ieee754_asin, __asin) + +-#undef SYMBOL_NAME +-#define SYMBOL_NAME ieee754_acos +-#include "ifunc-fma4.h" ++# undef SYMBOL_NAME ++# define SYMBOL_NAME ieee754_acos ++# include "ifunc-fma4.h" + + libc_ifunc_redirected (__redirect_ieee754_acos, __ieee754_acos, + IFUNC_SELECTOR ()); + libm_alias_finite (__ieee754_acos, __acos) + +-#define __ieee754_acos __ieee754_acos_sse2 +-#define __ieee754_asin __ieee754_asin_sse2 ++# define __ieee754_acos __ieee754_acos_sse2 ++# define __ieee754_asin __ieee754_asin_sse2 ++#endif + #include +diff --git a/sysdeps/x86_64/fpu/multiarch/e_atan2.c b/sysdeps/x86_64/fpu/multiarch/e_atan2.c +index 17ee4f3c36..8a86c14ded 100644 +--- a/sysdeps/x86_64/fpu/multiarch/e_atan2.c ++++ b/sysdeps/x86_64/fpu/multiarch/e_atan2.c +@@ -16,16 +16,19 @@ + License along with the GNU C Library; if not, see + . */ + +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL ++# include + + extern double __redirect_ieee754_atan2 (double, double); + +-#define SYMBOL_NAME ieee754_atan2 +-#include "ifunc-avx-fma4.h" ++# define SYMBOL_NAME ieee754_atan2 ++# include "ifunc-avx-fma4.h" + + libc_ifunc_redirected (__redirect_ieee754_atan2, + __ieee754_atan2, IFUNC_SELECTOR ()); + libm_alias_finite (__ieee754_atan2, __atan2) + +-#define __ieee754_atan2 __ieee754_atan2_sse2 ++# define __ieee754_atan2 __ieee754_atan2_sse2 ++#endif + #include +diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp.c b/sysdeps/x86_64/fpu/multiarch/e_exp.c +index 406b7ebd44..d56329291a 100644 +--- a/sysdeps/x86_64/fpu/multiarch/e_exp.c ++++ b/sysdeps/x86_64/fpu/multiarch/e_exp.c +@@ -16,17 +16,20 @@ + License along with the GNU C Library; if not, see + . */ + +-#include +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL ++# include ++# include + + extern double __redirect_ieee754_exp (double); + +-#define SYMBOL_NAME ieee754_exp +-#include "ifunc-avx-fma4.h" ++# define SYMBOL_NAME ieee754_exp ++# include "ifunc-avx-fma4.h" + + libc_ifunc_redirected (__redirect_ieee754_exp, __ieee754_exp, + IFUNC_SELECTOR ()); + libm_alias_finite (__ieee754_exp, __exp) + +-#define __exp __ieee754_exp_sse2 ++# define __exp __ieee754_exp_sse2 ++#endif + #include +diff --git a/sysdeps/x86_64/fpu/multiarch/e_exp2f.c b/sysdeps/x86_64/fpu/multiarch/e_exp2f.c +index 804fd6be85..06fe5028d6 100644 +--- a/sysdeps/x86_64/fpu/multiarch/e_exp2f.c ++++ b/sysdeps/x86_64/fpu/multiarch/e_exp2f.c +@@ -16,25 +16,28 @@ + License along with the GNU C Library; if not, see + . */ + +-#include +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL ++# include ++# include + + extern float __redirect_exp2f (float); + +-#define SYMBOL_NAME exp2f +-#include "ifunc-fma.h" ++# define SYMBOL_NAME exp2f ++# include "ifunc-fma.h" + + libc_ifunc_redirected (__redirect_exp2f, __exp2f, IFUNC_SELECTOR ()); + +-#ifdef SHARED ++# ifdef SHARED + versioned_symbol (libm, __ieee754_exp2f, exp2f, GLIBC_2_27); + libm_alias_float_other (__exp2, exp2) +-#else ++# else + libm_alias_float (__exp2, exp2) +-#endif ++# endif + + strong_alias (__exp2f, __ieee754_exp2f) + libm_alias_finite (__exp2f, __exp2f) + +-#define __exp2f __exp2f_sse2 ++# define __exp2f __exp2f_sse2 ++#endif + #include +diff --git a/sysdeps/x86_64/fpu/multiarch/e_expf.c b/sysdeps/x86_64/fpu/multiarch/e_expf.c +index 4a7e2a5bce..19d767f636 100644 +--- a/sysdeps/x86_64/fpu/multiarch/e_expf.c ++++ b/sysdeps/x86_64/fpu/multiarch/e_expf.c +@@ -16,28 +16,31 @@ + License along with the GNU C Library; if not, see + . */ + +-#include +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL ++# include ++# include + + extern float __redirect_expf (float); + +-#define SYMBOL_NAME expf +-#include "ifunc-fma.h" ++# define SYMBOL_NAME expf ++# include "ifunc-fma.h" + + libc_ifunc_redirected (__redirect_expf, __expf, IFUNC_SELECTOR ()); + +-#ifdef SHARED ++# ifdef SHARED + __hidden_ver1 (__expf, __GI___expf, __redirect_expf) + __attribute__ ((visibility ("hidden"))); + + versioned_symbol (libm, __ieee754_expf, expf, GLIBC_2_27); + libm_alias_float_other (__exp, exp) +-#else ++# else + libm_alias_float (__exp, exp) +-#endif ++# endif + + strong_alias (__expf, __ieee754_expf) + libm_alias_finite (__expf, __expf) + +-#define __expf __expf_sse2 ++# define __expf __expf_sse2 ++#endif + #include +diff --git a/sysdeps/x86_64/fpu/multiarch/e_log.c b/sysdeps/x86_64/fpu/multiarch/e_log.c +index 067fbf58c3..d80c1b1463 100644 +--- a/sysdeps/x86_64/fpu/multiarch/e_log.c ++++ b/sysdeps/x86_64/fpu/multiarch/e_log.c +@@ -16,17 +16,20 @@ + License along with the GNU C Library; if not, see + . */ + +-#include +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL ++# include ++# include + + extern double __redirect_ieee754_log (double); + +-#define SYMBOL_NAME ieee754_log +-#include "ifunc-avx-fma4.h" ++# define SYMBOL_NAME ieee754_log ++# include "ifunc-avx-fma4.h" + + libc_ifunc_redirected (__redirect_ieee754_log, __ieee754_log, + IFUNC_SELECTOR ()); + libm_alias_finite (__ieee754_log, __log) + +-#define __log __ieee754_log_sse2 ++# define __log __ieee754_log_sse2 ++#endif + #include +diff --git a/sysdeps/x86_64/fpu/multiarch/e_log2.c b/sysdeps/x86_64/fpu/multiarch/e_log2.c +index 9c57a2f6cc..9686782c09 100644 +--- a/sysdeps/x86_64/fpu/multiarch/e_log2.c ++++ b/sysdeps/x86_64/fpu/multiarch/e_log2.c +@@ -16,28 +16,31 @@ + License along with the GNU C Library; if not, see + . */ + +-#include +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL ++# include ++# include + + extern double __redirect_log2 (double); + +-#define SYMBOL_NAME log2 +-#include "ifunc-fma.h" ++# define SYMBOL_NAME log2 ++# include "ifunc-fma.h" + + libc_ifunc_redirected (__redirect_log2, __log2, IFUNC_SELECTOR ()); + +-#ifdef SHARED ++# ifdef SHARED + __hidden_ver1 (__log2, __GI___log2, __redirect_log2) + __attribute__ ((visibility ("hidden"))); + + versioned_symbol (libm, __ieee754_log2, log2, GLIBC_2_29); + libm_alias_double_other (__log2, log2) +-#else ++# else + libm_alias_double (__log2, log2) +-#endif ++# endif + + strong_alias (__log2, __ieee754_log2) + libm_alias_finite (__log2, __log2) + +-#define __log2 __log2_sse2 ++# define __log2 __log2_sse2 ++#endif + #include +diff --git a/sysdeps/x86_64/fpu/multiarch/e_log2f.c b/sysdeps/x86_64/fpu/multiarch/e_log2f.c +index 2b45c87f38..8ada46e11e 100644 +--- a/sysdeps/x86_64/fpu/multiarch/e_log2f.c ++++ b/sysdeps/x86_64/fpu/multiarch/e_log2f.c +@@ -16,28 +16,31 @@ + License along with the GNU C Library; if not, see + . */ + +-#include +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL ++# include ++# include + + extern float __redirect_log2f (float); + +-#define SYMBOL_NAME log2f +-#include "ifunc-fma.h" ++# define SYMBOL_NAME log2f ++# include "ifunc-fma.h" + + libc_ifunc_redirected (__redirect_log2f, __log2f, IFUNC_SELECTOR ()); + +-#ifdef SHARED ++# ifdef SHARED + __hidden_ver1 (__log2f, __GI___log2f, __redirect_log2f) + __attribute__ ((visibility ("hidden"))); + + versioned_symbol (libm, __ieee754_log2f, log2f, GLIBC_2_27); + libm_alias_float_other (__log2, log2) +-#else ++# else + libm_alias_float (__log2, log2) +-#endif ++# endif + + strong_alias (__log2f, __ieee754_log2f) + libm_alias_finite (__log2f, __log2f) + +-#define __log2f __log2f_sse2 ++# define __log2f __log2f_sse2 ++#endif + #include +diff --git a/sysdeps/x86_64/fpu/multiarch/e_logf.c b/sysdeps/x86_64/fpu/multiarch/e_logf.c +index 97e23c8fea..a3978d9a8e 100644 +--- a/sysdeps/x86_64/fpu/multiarch/e_logf.c ++++ b/sysdeps/x86_64/fpu/multiarch/e_logf.c +@@ -16,28 +16,31 @@ + License along with the GNU C Library; if not, see + . */ + +-#include +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL ++# include ++# include + + extern float __redirect_logf (float); + +-#define SYMBOL_NAME logf +-#include "ifunc-fma.h" ++# define SYMBOL_NAME logf ++# include "ifunc-fma.h" + + libc_ifunc_redirected (__redirect_logf, __logf, IFUNC_SELECTOR ()); + +-#ifdef SHARED ++# ifdef SHARED + __hidden_ver1 (__logf, __GI___logf, __redirect_logf) + __attribute__ ((visibility ("hidden"))); + + versioned_symbol (libm, __ieee754_logf, logf, GLIBC_2_27); + libm_alias_float_other (__log, log) +-#else ++# else + libm_alias_float (__log, log) +-#endif ++# endif + + strong_alias (__logf, __ieee754_logf) + libm_alias_finite (__logf, __logf) + +-#define __logf __logf_sse2 ++# define __logf __logf_sse2 ++#endif + #include +diff --git a/sysdeps/x86_64/fpu/multiarch/e_pow.c b/sysdeps/x86_64/fpu/multiarch/e_pow.c +index 42618e7112..f8f17aff9f 100644 +--- a/sysdeps/x86_64/fpu/multiarch/e_pow.c ++++ b/sysdeps/x86_64/fpu/multiarch/e_pow.c +@@ -16,17 +16,20 @@ + License along with the GNU C Library; if not, see + . */ + +-#include +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL ++# include ++# include + + extern double __redirect_ieee754_pow (double, double); + +-#define SYMBOL_NAME ieee754_pow +-#include "ifunc-fma4.h" ++# define SYMBOL_NAME ieee754_pow ++# include "ifunc-fma4.h" + + libc_ifunc_redirected (__redirect_ieee754_pow, + __ieee754_pow, IFUNC_SELECTOR ()); + libm_alias_finite (__ieee754_pow, __pow) + +-#define __pow __ieee754_pow_sse2 ++# define __pow __ieee754_pow_sse2 ++#endif + #include +diff --git a/sysdeps/x86_64/fpu/multiarch/e_powf.c b/sysdeps/x86_64/fpu/multiarch/e_powf.c +index 8e6ce13cc1..8b1a4c7d04 100644 +--- a/sysdeps/x86_64/fpu/multiarch/e_powf.c ++++ b/sysdeps/x86_64/fpu/multiarch/e_powf.c +@@ -16,31 +16,34 @@ + License along with the GNU C Library; if not, see + . */ + +-#include +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL ++# include ++# include + +-#define powf __redirect_powf +-#define __DECL_SIMD___redirect_powf +-#include +-#undef powf ++# define powf __redirect_powf ++# define __DECL_SIMD___redirect_powf ++# include ++# undef powf + +-#define SYMBOL_NAME powf +-#include "ifunc-fma.h" ++# define SYMBOL_NAME powf ++# include "ifunc-fma.h" + + libc_ifunc_redirected (__redirect_powf, __powf, IFUNC_SELECTOR ()); + +-#ifdef SHARED ++# ifdef SHARED + __hidden_ver1 (__powf, __GI___powf, __redirect_powf) + __attribute__ ((visibility ("hidden"))); + + versioned_symbol (libm, __ieee754_powf, powf, GLIBC_2_27); + libm_alias_float_other (__pow, pow) +-#else ++# else + libm_alias_float (__pow, pow) +-#endif ++# endif + + strong_alias (__powf, __ieee754_powf) + libm_alias_finite (__powf, __powf) + +-#define __powf __powf_sse2 ++# define __powf __powf_sse2 ++#endif + #include +diff --git a/sysdeps/x86_64/fpu/multiarch/s_atan.c b/sysdeps/x86_64/fpu/multiarch/s_atan.c +index 71bad096a9..4d2c6ce006 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_atan.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_atan.c +@@ -16,15 +16,18 @@ + License along with the GNU C Library; if not, see + . */ + +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL ++# include + + extern double __redirect_atan (double); + +-#define SYMBOL_NAME atan +-#include "ifunc-avx-fma4.h" ++# define SYMBOL_NAME atan ++# include "ifunc-avx-fma4.h" + + libc_ifunc_redirected (__redirect_atan, __atan, IFUNC_SELECTOR ()); + libm_alias_double (__atan, atan) + +-#define __atan __atan_sse2 ++# define __atan __atan_sse2 ++#endif + #include +diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceil-avx.S b/sysdeps/x86_64/fpu/multiarch/s_ceil-avx.S +new file mode 100644 +index 0000000000..e6c1106753 +--- /dev/null ++++ b/sysdeps/x86_64/fpu/multiarch/s_ceil-avx.S +@@ -0,0 +1,28 @@ ++/* AVX implementation of ceil function. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++ .text ++ENTRY(__ceil) ++ vroundsd $10, %xmm0, %xmm0, %xmm0 ++ ret ++END(__ceil) ++ ++libm_alias_double (__ceil, ceil) +diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S +index 64119011ad..dba756c38f 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S ++++ b/sysdeps/x86_64/fpu/multiarch/s_ceil-sse4_1.S +@@ -17,8 +17,20 @@ + + #include + ++#include ++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL ++# include ++# define __ceil_sse41 __ceil ++ .text ++#else + .section .text.sse4.1,"ax",@progbits ++#endif ++ + ENTRY(__ceil_sse41) + roundsd $10, %xmm0, %xmm0 + ret + END(__ceil_sse41) ++ ++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL ++libm_alias_double (__ceil, ceil) ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceil.c b/sysdeps/x86_64/fpu/multiarch/s_ceil.c +index cc028addee..46c8e91e19 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_ceil.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_ceil.c +@@ -16,17 +16,20 @@ + License along with the GNU C Library; if not, see + . */ + +-#define NO_MATH_REDIRECT +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL ++# define NO_MATH_REDIRECT ++# include + +-#define ceil __redirect_ceil +-#define __ceil __redirect___ceil +-#include +-#undef ceil +-#undef __ceil ++# define ceil __redirect_ceil ++# define __ceil __redirect___ceil ++# include ++# undef ceil ++# undef __ceil + +-#define SYMBOL_NAME ceil +-#include "ifunc-sse4_1.h" ++# define SYMBOL_NAME ceil ++# include "ifunc-sse4_1.h" + + libc_ifunc_redirected (__redirect_ceil, __ceil, IFUNC_SELECTOR ()); + libm_alias_double (__ceil, ceil) ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceilf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_ceilf-avx.S +new file mode 100644 +index 0000000000..b4d8ac0455 +--- /dev/null ++++ b/sysdeps/x86_64/fpu/multiarch/s_ceilf-avx.S +@@ -0,0 +1,28 @@ ++/* AVX implementation of ceilf function. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++ .text ++ENTRY(__ceilf) ++ vroundss $10, %xmm0, %xmm0, %xmm0 ++ ret ++END(__ceilf) ++ ++libm_alias_float (__ceil, ceil) +diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S +index dd9a9f6b71..9abc87b91a 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S ++++ b/sysdeps/x86_64/fpu/multiarch/s_ceilf-sse4_1.S +@@ -17,8 +17,20 @@ + + #include + ++#include ++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL ++# include ++# define __ceilf_sse41 __ceilf ++ .text ++#else + .section .text.sse4.1,"ax",@progbits ++#endif ++ + ENTRY(__ceilf_sse41) + roundss $10, %xmm0, %xmm0 + ret + END(__ceilf_sse41) ++ ++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL ++libm_alias_float (__ceil, ceil) ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_ceilf.c b/sysdeps/x86_64/fpu/multiarch/s_ceilf.c +index 97a0ca7d19..bb53108f73 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_ceilf.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_ceilf.c +@@ -16,17 +16,20 @@ + License along with the GNU C Library; if not, see + . */ + +-#define NO_MATH_REDIRECT +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL ++# define NO_MATH_REDIRECT ++# include + +-#define ceilf __redirect_ceilf +-#define __ceilf __redirect___ceilf +-#include +-#undef ceilf +-#undef __ceilf ++# define ceilf __redirect_ceilf ++# define __ceilf __redirect___ceilf ++# include ++# undef ceilf ++# undef __ceilf + +-#define SYMBOL_NAME ceilf +-#include "ifunc-sse4_1.h" ++# define SYMBOL_NAME ceilf ++# include "ifunc-sse4_1.h" + + libc_ifunc_redirected (__redirect_ceilf, __ceilf, IFUNC_SELECTOR ()); + libm_alias_float (__ceil, ceil) ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_cosf.c b/sysdeps/x86_64/fpu/multiarch/s_cosf.c +index 2703c576df..8a02e04538 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_cosf.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_cosf.c +@@ -16,13 +16,18 @@ + License along with the GNU C Library; if not, see + . */ + +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL ++# include + + extern float __redirect_cosf (float); + +-#define SYMBOL_NAME cosf +-#include "ifunc-fma.h" ++# define SYMBOL_NAME cosf ++# include "ifunc-fma.h" + + libc_ifunc_redirected (__redirect_cosf, __cosf, IFUNC_SELECTOR ()); + + libm_alias_float (__cos, cos) ++#else ++# include ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_expm1.c b/sysdeps/x86_64/fpu/multiarch/s_expm1.c +index 8a2d69f9b2..d58ef3d8f5 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_expm1.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_expm1.c +@@ -16,21 +16,24 @@ + License along with the GNU C Library; if not, see + . */ + +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL ++# include + + extern double __redirect_expm1 (double); + +-#define SYMBOL_NAME expm1 +-#include "ifunc-fma.h" ++# define SYMBOL_NAME expm1 ++# include "ifunc-fma.h" + + libc_ifunc_redirected (__redirect_expm1, __expm1, IFUNC_SELECTOR ()); + libm_alias_double (__expm1, expm1) + +-#define __expm1 __expm1_sse2 ++# define __expm1 __expm1_sse2 + + /* NB: __expm1 may be expanded to __expm1_sse2 in the following + prototypes. */ + extern long double __expm1l (long double); + extern long double __expm1f128 (long double); + ++#endif + #include +diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor-avx.S b/sysdeps/x86_64/fpu/multiarch/s_floor-avx.S +new file mode 100644 +index 0000000000..ff74b5a8bf +--- /dev/null ++++ b/sysdeps/x86_64/fpu/multiarch/s_floor-avx.S +@@ -0,0 +1,28 @@ ++/* AVX implementation of floor function. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++ .text ++ENTRY(__floor) ++ vroundsd $9, %xmm0, %xmm0, %xmm0 ++ ret ++END(__floor) ++ ++libm_alias_double (__floor, floor) +diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S +index 2f7521f39f..c9b9b0639b 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S ++++ b/sysdeps/x86_64/fpu/multiarch/s_floor-sse4_1.S +@@ -17,8 +17,20 @@ + + #include + ++#include ++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL ++# include ++# define __floor_sse41 __floor ++ .text ++#else + .section .text.sse4.1,"ax",@progbits ++#endif ++ + ENTRY(__floor_sse41) + roundsd $9, %xmm0, %xmm0 + ret + END(__floor_sse41) ++ ++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL ++libm_alias_double (__floor, floor) ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_floor.c b/sysdeps/x86_64/fpu/multiarch/s_floor.c +index 8cebd48e10..2c87dd0056 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_floor.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_floor.c +@@ -16,17 +16,20 @@ + License along with the GNU C Library; if not, see + . */ + +-#define NO_MATH_REDIRECT +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL ++# define NO_MATH_REDIRECT ++# include + +-#define floor __redirect_floor +-#define __floor __redirect___floor +-#include +-#undef floor +-#undef __floor ++# define floor __redirect_floor ++# define __floor __redirect___floor ++# include ++# undef floor ++# undef __floor + +-#define SYMBOL_NAME floor +-#include "ifunc-sse4_1.h" ++# define SYMBOL_NAME floor ++# include "ifunc-sse4_1.h" + + libc_ifunc_redirected (__redirect_floor, __floor, IFUNC_SELECTOR ()); + libm_alias_double (__floor, floor) ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_floorf-avx.S +new file mode 100644 +index 0000000000..c378baae8e +--- /dev/null ++++ b/sysdeps/x86_64/fpu/multiarch/s_floorf-avx.S +@@ -0,0 +1,28 @@ ++/* AVX implementation of floorf function. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++ .text ++ENTRY(__floorf) ++ vroundss $9, %xmm0, %xmm0, %xmm0 ++ ret ++END(__floorf) ++ ++libm_alias_float (__floor, floor) +diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S +index 5f6020d27d..c2216899db 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S ++++ b/sysdeps/x86_64/fpu/multiarch/s_floorf-sse4_1.S +@@ -17,8 +17,20 @@ + + #include + ++#include ++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL ++# include ++# define __floorf_sse41 __floorf ++ .text ++#else + .section .text.sse4.1,"ax",@progbits ++#endif ++ + ENTRY(__floorf_sse41) + roundss $9, %xmm0, %xmm0 + ret + END(__floorf_sse41) ++ ++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL ++libm_alias_float (__floor, floor) ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_floorf.c b/sysdeps/x86_64/fpu/multiarch/s_floorf.c +index a14e18b03c..a277802b6d 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_floorf.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_floorf.c +@@ -16,17 +16,20 @@ + License along with the GNU C Library; if not, see + . */ + +-#define NO_MATH_REDIRECT +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL ++# define NO_MATH_REDIRECT ++# include + +-#define floorf __redirect_floorf +-#define __floorf __redirect___floorf +-#include +-#undef floorf +-#undef __floorf ++# define floorf __redirect_floorf ++# define __floorf __redirect___floorf ++# include ++# undef floorf ++# undef __floorf + +-#define SYMBOL_NAME floorf +-#include "ifunc-sse4_1.h" ++# define SYMBOL_NAME floorf ++# include "ifunc-sse4_1.h" + + libc_ifunc_redirected (__redirect_floorf, __floorf, IFUNC_SELECTOR ()); + libm_alias_float (__floor, floor) ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_log1p.c b/sysdeps/x86_64/fpu/multiarch/s_log1p.c +index a8e1a3f21b..3fa1185d81 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_log1p.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_log1p.c +@@ -16,14 +16,17 @@ + License along with the GNU C Library; if not, see + . */ + +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL ++# include + + extern double __redirect_log1p (double); + +-#define SYMBOL_NAME log1p +-#include "ifunc-fma.h" ++# define SYMBOL_NAME log1p ++# include "ifunc-fma.h" + + libc_ifunc_redirected (__redirect_log1p, __log1p, IFUNC_SELECTOR ()); + +-#define __log1p __log1p_sse2 ++# define __log1p __log1p_sse2 ++#endif + #include +diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyint-avx.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyint-avx.S +new file mode 100644 +index 0000000000..5bfdf73c28 +--- /dev/null ++++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyint-avx.S +@@ -0,0 +1,28 @@ ++/* AVX implementation of nearbyint function. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++ .text ++ENTRY(__nearbyint) ++ vroundsd $0xc, %xmm0, %xmm0, %xmm0 ++ ret ++END(__nearbyint) ++ ++libm_alias_double (__nearbyint, nearbyint) +diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S +index 674f7eb40a..9d84410a1f 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S ++++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyint-sse4_1.S +@@ -17,8 +17,20 @@ + + #include + ++#include ++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL ++# include ++# define __nearbyint_sse41 __nearbyint ++ .text ++#else + .section .text.sse4.1,"ax",@progbits ++#endif ++ + ENTRY(__nearbyint_sse41) + roundsd $0xc, %xmm0, %xmm0 + ret + END(__nearbyint_sse41) ++ ++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL ++libm_alias_double (__nearbyint, nearbyint) ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c b/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c +index 693e42dd4e..057a7ca60f 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyint.c +@@ -16,17 +16,20 @@ + License along with the GNU C Library; if not, see + . */ + +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL ++# include + +-#define nearbyint __redirect_nearbyint +-#define __nearbyint __redirect___nearbyint +-#include +-#undef nearbyint +-#undef __nearbyint ++# define nearbyint __redirect_nearbyint ++# define __nearbyint __redirect___nearbyint ++# include ++# undef nearbyint ++# undef __nearbyint + +-#define SYMBOL_NAME nearbyint +-#include "ifunc-sse4_1.h" ++# define SYMBOL_NAME nearbyint ++# include "ifunc-sse4_1.h" + + libc_ifunc_redirected (__redirect_nearbyint, __nearbyint, + IFUNC_SELECTOR ()); + libm_alias_double (__nearbyint, nearbyint) ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-avx.S +new file mode 100644 +index 0000000000..1dbaed0324 +--- /dev/null ++++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-avx.S +@@ -0,0 +1,28 @@ ++/* AVX implmentation of nearbyintf function. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++ .text ++ENTRY(__nearbyintf) ++ vroundss $0xc, %xmm0, %xmm0, %xmm0 ++ ret ++END(__nearbyintf) ++ ++libm_alias_float (__nearbyint, nearbyint) +diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S +index 5892bd7563..3cf35f92d6 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S ++++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf-sse4_1.S +@@ -17,8 +17,20 @@ + + #include + ++#include ++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL ++# include ++# define __nearbyintf_sse41 __nearbyintf ++ .text ++#else + .section .text.sse4.1,"ax",@progbits ++#endif ++ + ENTRY(__nearbyintf_sse41) + roundss $0xc, %xmm0, %xmm0 + ret + END(__nearbyintf_sse41) ++ ++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL ++libm_alias_float (__nearbyint, nearbyint) ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c +index a0ac009f4b..41f374ba72 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_nearbyintf.c +@@ -16,17 +16,20 @@ + License along with the GNU C Library; if not, see + . */ + +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL ++# include + +-#define nearbyintf __redirect_nearbyintf +-#define __nearbyintf __redirect___nearbyintf +-#include +-#undef nearbyintf +-#undef __nearbyintf ++# define nearbyintf __redirect_nearbyintf ++# define __nearbyintf __redirect___nearbyintf ++# include ++# undef nearbyintf ++# undef __nearbyintf + +-#define SYMBOL_NAME nearbyintf +-#include "ifunc-sse4_1.h" ++# define SYMBOL_NAME nearbyintf ++# include "ifunc-sse4_1.h" + + libc_ifunc_redirected (__redirect_nearbyintf, __nearbyintf, + IFUNC_SELECTOR ()); + libm_alias_float (__nearbyint, nearbyint) ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint-avx.S b/sysdeps/x86_64/fpu/multiarch/s_rint-avx.S +new file mode 100644 +index 0000000000..2b403b331f +--- /dev/null ++++ b/sysdeps/x86_64/fpu/multiarch/s_rint-avx.S +@@ -0,0 +1,28 @@ ++/* AVX implementation of rint function. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++ .text ++ENTRY(__rint) ++ vroundsd $4, %xmm0, %xmm0, %xmm0 ++ ret ++END(__rint) ++ ++libm_alias_double (__rint, rint) +diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S +index 405372991b..8cd9cf759f 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S ++++ b/sysdeps/x86_64/fpu/multiarch/s_rint-sse4_1.S +@@ -17,8 +17,20 @@ + + #include + ++#include ++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL ++# include ++# define __rint_sse41 __rint ++ .text ++#else + .section .text.sse4.1,"ax",@progbits ++#endif ++ + ENTRY(__rint_sse41) + roundsd $4, %xmm0, %xmm0 + ret + END(__rint_sse41) ++ ++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL ++libm_alias_double (__rint, rint) ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_rint.c b/sysdeps/x86_64/fpu/multiarch/s_rint.c +index 754c87e004..18623b7d99 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_rint.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_rint.c +@@ -16,17 +16,20 @@ + License along with the GNU C Library; if not, see + . */ + +-#define NO_MATH_REDIRECT +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL ++# define NO_MATH_REDIRECT ++# include + +-#define rint __redirect_rint +-#define __rint __redirect___rint +-#include +-#undef rint +-#undef __rint ++# define rint __redirect_rint ++# define __rint __redirect___rint ++# include ++# undef rint ++# undef __rint + +-#define SYMBOL_NAME rint +-#include "ifunc-sse4_1.h" ++# define SYMBOL_NAME rint ++# include "ifunc-sse4_1.h" + + libc_ifunc_redirected (__redirect_rint, __rint, IFUNC_SELECTOR ()); + libm_alias_double (__rint, rint) ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_rintf-avx.S +new file mode 100644 +index 0000000000..171c2867f4 +--- /dev/null ++++ b/sysdeps/x86_64/fpu/multiarch/s_rintf-avx.S +@@ -0,0 +1,28 @@ ++/* AVX implementation of rintf function. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++ .text ++ENTRY(__rintf) ++ vroundss $4, %xmm0, %xmm0, %xmm0 ++ ret ++END(__rintf) ++ ++libm_alias_float (__rint, rint) +diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S +index 8ac67ce767..fc1e70f0c9 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S ++++ b/sysdeps/x86_64/fpu/multiarch/s_rintf-sse4_1.S +@@ -17,8 +17,20 @@ + + #include + ++#include ++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL ++# include ++# define __rintf_sse41 __rintf ++ .text ++#else + .section .text.sse4.1,"ax",@progbits ++#endif ++ + ENTRY(__rintf_sse41) + roundss $4, %xmm0, %xmm0 + ret + END(__rintf_sse41) ++ ++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL ++libm_alias_float (__rint, rint) ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_rintf.c b/sysdeps/x86_64/fpu/multiarch/s_rintf.c +index e9d6b7a5f2..e275368dec 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_rintf.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_rintf.c +@@ -16,17 +16,20 @@ + License along with the GNU C Library; if not, see + . */ + +-#define NO_MATH_REDIRECT +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL ++# define NO_MATH_REDIRECT ++# include + +-#define rintf __redirect_rintf +-#define __rintf __redirect___rintf +-#include +-#undef rintf +-#undef __rintf ++# define rintf __redirect_rintf ++# define __rintf __redirect___rintf ++# include ++# undef rintf ++# undef __rintf + +-#define SYMBOL_NAME rintf +-#include "ifunc-sse4_1.h" ++# define SYMBOL_NAME rintf ++# include "ifunc-sse4_1.h" + + libc_ifunc_redirected (__redirect_rintf, __rintf, IFUNC_SELECTOR ()); + libm_alias_float (__rint, rint) ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundeven-avx.S b/sysdeps/x86_64/fpu/multiarch/s_roundeven-avx.S +new file mode 100644 +index 0000000000..576790355c +--- /dev/null ++++ b/sysdeps/x86_64/fpu/multiarch/s_roundeven-avx.S +@@ -0,0 +1,28 @@ ++/* AVX implementation of roundeven function. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++ .text ++ENTRY(__roundeven) ++ vroundsd $8, %xmm0, %xmm0, %xmm0 ++ ret ++END(__roundeven) ++ ++libm_alias_double (__roundeven, roundeven) +diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundeven-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_roundeven-sse4_1.S +index 5ef102336b..f00be56c59 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_roundeven-sse4_1.S ++++ b/sysdeps/x86_64/fpu/multiarch/s_roundeven-sse4_1.S +@@ -17,8 +17,20 @@ + + #include + ++#include ++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL ++# include ++# define __roundeven_sse41 __roundeven ++ .text ++#else + .section .text.sse4.1,"ax",@progbits ++#endif ++ + ENTRY(__roundeven_sse41) + roundsd $8, %xmm0, %xmm0 + ret + END(__roundeven_sse41) ++ ++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL ++libm_alias_double (__roundeven, roundeven) ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundeven.c b/sysdeps/x86_64/fpu/multiarch/s_roundeven.c +index 8737b32e26..139aad088f 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_roundeven.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_roundeven.c +@@ -16,16 +16,19 @@ + License along with the GNU C Library; if not, see + . */ + +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL ++# include + +-#define roundeven __redirect_roundeven +-#define __roundeven __redirect___roundeven +-#include +-#undef roundeven +-#undef __roundeven ++# define roundeven __redirect_roundeven ++# define __roundeven __redirect___roundeven ++# include ++# undef roundeven ++# undef __roundeven + +-#define SYMBOL_NAME roundeven +-#include "ifunc-sse4_1.h" ++# define SYMBOL_NAME roundeven ++# include "ifunc-sse4_1.h" + + libc_ifunc_redirected (__redirect_roundeven, __roundeven, IFUNC_SELECTOR ()); + libm_alias_double (__roundeven, roundeven) ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundevenf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_roundevenf-avx.S +new file mode 100644 +index 0000000000..42c359f4cd +--- /dev/null ++++ b/sysdeps/x86_64/fpu/multiarch/s_roundevenf-avx.S +@@ -0,0 +1,28 @@ ++/* AVX implementation of roundevenf function. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++ .text ++ENTRY(__roundevenf) ++ vroundss $8, %xmm0, %xmm0, %xmm0 ++ ret ++END(__roundevenf) ++ ++libm_alias_float (__roundeven, roundeven) +diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundevenf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_roundevenf-sse4_1.S +index 792c90ba07..6b148e4353 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_roundevenf-sse4_1.S ++++ b/sysdeps/x86_64/fpu/multiarch/s_roundevenf-sse4_1.S +@@ -17,8 +17,20 @@ + + #include + ++#include ++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL ++# include ++# define __roundevenf_sse41 __roundevenf ++ .text ++#else + .section .text.sse4.1,"ax",@progbits ++#endif ++ + ENTRY(__roundevenf_sse41) + roundss $8, %xmm0, %xmm0 + ret + END(__roundevenf_sse41) ++ ++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL ++libm_alias_float (__roundeven, roundeven) ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_roundevenf.c b/sysdeps/x86_64/fpu/multiarch/s_roundevenf.c +index e96016a4d5..2fb090075d 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_roundevenf.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_roundevenf.c +@@ -16,16 +16,19 @@ + License along with the GNU C Library; if not, see + . */ + +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL ++# include + +-#define roundevenf __redirect_roundevenf +-#define __roundevenf __redirect___roundevenf +-#include +-#undef roundevenf +-#undef __roundevenf ++# define roundevenf __redirect_roundevenf ++# define __roundevenf __redirect___roundevenf ++# include ++# undef roundevenf ++# undef __roundevenf + +-#define SYMBOL_NAME roundevenf +-#include "ifunc-sse4_1.h" ++# define SYMBOL_NAME roundevenf ++# include "ifunc-sse4_1.h" + + libc_ifunc_redirected (__redirect_roundevenf, __roundevenf, IFUNC_SELECTOR ()); + libm_alias_float (__roundeven, roundeven) ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_sin.c b/sysdeps/x86_64/fpu/multiarch/s_sin.c +index 355cc0092e..21e77943a3 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_sin.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_sin.c +@@ -16,24 +16,27 @@ + License along with the GNU C Library; if not, see + . */ + +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL ++# include + + extern double __redirect_sin (double); + extern double __redirect_cos (double); + +-#define SYMBOL_NAME sin +-#include "ifunc-avx-fma4.h" ++# define SYMBOL_NAME sin ++# include "ifunc-avx-fma4.h" + + libc_ifunc_redirected (__redirect_sin, __sin, IFUNC_SELECTOR ()); + libm_alias_double (__sin, sin) + +-#undef SYMBOL_NAME +-#define SYMBOL_NAME cos +-#include "ifunc-avx-fma4.h" ++# undef SYMBOL_NAME ++# define SYMBOL_NAME cos ++# include "ifunc-avx-fma4.h" + + libc_ifunc_redirected (__redirect_cos, __cos, IFUNC_SELECTOR ()); + libm_alias_double (__cos, cos) + +-#define __cos __cos_sse2 +-#define __sin __sin_sse2 ++# define __cos __cos_sse2 ++# define __sin __sin_sse2 ++#endif + #include +diff --git a/sysdeps/x86_64/fpu/multiarch/s_sincos.c b/sysdeps/x86_64/fpu/multiarch/s_sincos.c +index 70107e999c..b35757f8de 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_sincos.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_sincos.c +@@ -16,15 +16,18 @@ + License along with the GNU C Library; if not, see + . */ + +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL ++# include + + extern void __redirect_sincos (double, double *, double *); + +-#define SYMBOL_NAME sincos +-#include "ifunc-fma4.h" ++# define SYMBOL_NAME sincos ++# include "ifunc-fma4.h" + + libc_ifunc_redirected (__redirect_sincos, __sincos, IFUNC_SELECTOR ()); + libm_alias_double (__sincos, sincos) + +-#define __sincos __sincos_sse2 ++# define __sincos __sincos_sse2 ++#endif + #include +diff --git a/sysdeps/x86_64/fpu/multiarch/s_sincosf.c b/sysdeps/x86_64/fpu/multiarch/s_sincosf.c +index 80bc028451..0ea9b40e84 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_sincosf.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_sincosf.c +@@ -16,13 +16,18 @@ + License along with the GNU C Library; if not, see + . */ + +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL ++# include + + extern void __redirect_sincosf (float, float *, float *); + +-#define SYMBOL_NAME sincosf +-#include "ifunc-fma.h" ++# define SYMBOL_NAME sincosf ++# include "ifunc-fma.h" + + libc_ifunc_redirected (__redirect_sincosf, __sincosf, IFUNC_SELECTOR ()); + + libm_alias_float (__sincos, sincos) ++#else ++# include ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_sinf.c b/sysdeps/x86_64/fpu/multiarch/s_sinf.c +index a32b9e9550..c61624e3ee 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_sinf.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_sinf.c +@@ -16,13 +16,18 @@ + License along with the GNU C Library; if not, see + . */ + +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL ++# include + + extern float __redirect_sinf (float); + +-#define SYMBOL_NAME sinf +-#include "ifunc-fma.h" ++# define SYMBOL_NAME sinf ++# include "ifunc-fma.h" + + libc_ifunc_redirected (__redirect_sinf, __sinf, IFUNC_SELECTOR ()); + + libm_alias_float (__sin, sin) ++#else ++# include ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_tan.c b/sysdeps/x86_64/fpu/multiarch/s_tan.c +index f9a2474a13..125d992ba1 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_tan.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_tan.c +@@ -16,15 +16,18 @@ + License along with the GNU C Library; if not, see + . */ + +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < AVX2_X86_ISA_LEVEL ++# include + + extern double __redirect_tan (double); + +-#define SYMBOL_NAME tan +-#include "ifunc-avx-fma4.h" ++# define SYMBOL_NAME tan ++# include "ifunc-avx-fma4.h" + + libc_ifunc_redirected (__redirect_tan, __tan, IFUNC_SELECTOR ()); + libm_alias_double (__tan, tan) + +-#define __tan __tan_sse2 ++# define __tan __tan_sse2 ++#endif + #include +diff --git a/sysdeps/x86_64/fpu/multiarch/s_trunc-avx.S b/sysdeps/x86_64/fpu/multiarch/s_trunc-avx.S +new file mode 100644 +index 0000000000..b3e87e9606 +--- /dev/null ++++ b/sysdeps/x86_64/fpu/multiarch/s_trunc-avx.S +@@ -0,0 +1,28 @@ ++/* AVX implementation of trunc function. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++ .text ++ENTRY(__trunc) ++ vroundsd $11, %xmm0, %xmm0, %xmm0 ++ ret ++END(__trunc) ++ ++libm_alias_double (__trunc, trunc) +diff --git a/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S +index b496a6ef49..2b79174eed 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S ++++ b/sysdeps/x86_64/fpu/multiarch/s_trunc-sse4_1.S +@@ -18,8 +18,20 @@ + + #include + ++#include ++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL ++# include ++# define __trunc_sse41 __trunc ++ .text ++#else + .section .text.sse4.1,"ax",@progbits ++#endif ++ + ENTRY(__trunc_sse41) + roundsd $11, %xmm0, %xmm0 + ret + END(__trunc_sse41) ++ ++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL ++libm_alias_double (__trunc, trunc) ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_trunc.c b/sysdeps/x86_64/fpu/multiarch/s_trunc.c +index 9bc9df8744..ea89c4f85d 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_trunc.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_trunc.c +@@ -16,17 +16,20 @@ + License along with the GNU C Library; if not, see + . */ + +-#define NO_MATH_REDIRECT +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL ++# define NO_MATH_REDIRECT ++# include + +-#define trunc __redirect_trunc +-#define __trunc __redirect___trunc +-#include +-#undef trunc +-#undef __trunc ++# define trunc __redirect_trunc ++# define __trunc __redirect___trunc ++# include ++# undef trunc ++# undef __trunc + +-#define SYMBOL_NAME trunc +-#include "ifunc-sse4_1.h" ++# define SYMBOL_NAME trunc ++# include "ifunc-sse4_1.h" + + libc_ifunc_redirected (__redirect_trunc, __trunc, IFUNC_SELECTOR ()); + libm_alias_double (__trunc, trunc) ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_truncf-avx.S b/sysdeps/x86_64/fpu/multiarch/s_truncf-avx.S +new file mode 100644 +index 0000000000..f31ac7d7f7 +--- /dev/null ++++ b/sysdeps/x86_64/fpu/multiarch/s_truncf-avx.S +@@ -0,0 +1,28 @@ ++/* AVX implementation of truncf function. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++#include ++ ++ .text ++ENTRY(__truncf) ++ vroundss $11, %xmm0, %xmm0, %xmm0 ++ ret ++END(__truncf) ++ ++libm_alias_float (__trunc, trunc) +diff --git a/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S b/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S +index 22e9a83307..60498b2cb2 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S ++++ b/sysdeps/x86_64/fpu/multiarch/s_truncf-sse4_1.S +@@ -18,8 +18,20 @@ + + #include + ++#include ++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL ++# include ++# define __truncf_sse41 __truncf ++ .text ++#else + .section .text.sse4.1,"ax",@progbits ++#endif ++ + ENTRY(__truncf_sse41) + roundss $11, %xmm0, %xmm0 + ret + END(__truncf_sse41) ++ ++#if MINIMUM_X86_ISA_LEVEL == SSE4_1_X86_ISA_LEVEL ++libm_alias_float (__trunc, trunc) ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/s_truncf.c b/sysdeps/x86_64/fpu/multiarch/s_truncf.c +index dae01d166a..92435ce39d 100644 +--- a/sysdeps/x86_64/fpu/multiarch/s_truncf.c ++++ b/sysdeps/x86_64/fpu/multiarch/s_truncf.c +@@ -16,17 +16,20 @@ + License along with the GNU C Library; if not, see + . */ + +-#define NO_MATH_REDIRECT +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL < SSE4_1_X86_ISA_LEVEL ++# define NO_MATH_REDIRECT ++# include + +-#define truncf __redirect_truncf +-#define __truncf __redirect___truncf +-#include +-#undef truncf +-#undef __truncf ++# define truncf __redirect_truncf ++# define __truncf __redirect___truncf ++# include ++# undef truncf ++# undef __truncf + +-#define SYMBOL_NAME truncf +-#include "ifunc-sse4_1.h" ++# define SYMBOL_NAME truncf ++# include "ifunc-sse4_1.h" + + libc_ifunc_redirected (__redirect_truncf, __truncf, IFUNC_SELECTOR ()); + libm_alias_float (__trunc, trunc) ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/w_exp.c b/sysdeps/x86_64/fpu/multiarch/w_exp.c +index 27eee98a0a..3584187e0e 100644 +--- a/sysdeps/x86_64/fpu/multiarch/w_exp.c ++++ b/sysdeps/x86_64/fpu/multiarch/w_exp.c +@@ -1 +1,6 @@ +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL >= AVX2_X86_ISA_LEVEL ++# include ++#else ++# include ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/w_log.c b/sysdeps/x86_64/fpu/multiarch/w_log.c +index 9b2b018711..414ca3ca3d 100644 +--- a/sysdeps/x86_64/fpu/multiarch/w_log.c ++++ b/sysdeps/x86_64/fpu/multiarch/w_log.c +@@ -1 +1,6 @@ +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL >= AVX2_X86_ISA_LEVEL ++# include ++#else ++# include ++#endif +diff --git a/sysdeps/x86_64/fpu/multiarch/w_pow.c b/sysdeps/x86_64/fpu/multiarch/w_pow.c +index b50c1988de..d5fcc4f871 100644 +--- a/sysdeps/x86_64/fpu/multiarch/w_pow.c ++++ b/sysdeps/x86_64/fpu/multiarch/w_pow.c +@@ -1 +1,6 @@ +-#include ++#include ++#if MINIMUM_X86_ISA_LEVEL >= AVX2_X86_ISA_LEVEL ++# include ++#else ++# include ++#endif +diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile +index e1e894c963..d3d2270394 100644 +--- a/sysdeps/x86_64/multiarch/Makefile ++++ b/sysdeps/x86_64/multiarch/Makefile +@@ -4,8 +4,8 @@ sysdep_routines += \ + memchr-avx2 \ + memchr-avx2-rtm \ + memchr-evex \ +- memchr-evex512 \ + memchr-evex-rtm \ ++ memchr-evex512 \ + memchr-sse2 \ + memcmp-avx2-movbe \ + memcmp-avx2-movbe-rtm \ +@@ -37,8 +37,8 @@ sysdep_routines += \ + rawmemchr-avx2 \ + rawmemchr-avx2-rtm \ + rawmemchr-evex \ +- rawmemchr-evex512 \ + rawmemchr-evex-rtm \ ++ rawmemchr-evex512 \ + rawmemchr-sse2 \ + stpcpy-avx2 \ + stpcpy-avx2-rtm \ +diff --git a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S +index 9984c3ca0f..97839a2248 100644 +--- a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S ++++ b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S +@@ -21,7 +21,9 @@ + 2. If size is less than VEC, use integer register stores. + 3. If size is from VEC_SIZE to 2 * VEC_SIZE, use 2 VEC stores. + 4. If size is from 2 * VEC_SIZE to 4 * VEC_SIZE, use 4 VEC stores. +- 5. If size is more to 4 * VEC_SIZE, align to 4 * VEC_SIZE with ++ 5. On machines ERMS feature, if size is greater or equal than ++ __x86_rep_stosb_threshold then REP STOSB will be used. ++ 6. If size is more to 4 * VEC_SIZE, align to 4 * VEC_SIZE with + 4 VEC stores and store 4 * VEC at a time until done. */ + + #include +diff --git a/sysdeps/x86_64/multiarch/wcsncat-evex.S b/sysdeps/x86_64/multiarch/wcsncat-evex.S +index 392215950a..10bfb0a531 100644 +--- a/sysdeps/x86_64/multiarch/wcsncat-evex.S ++++ b/sysdeps/x86_64/multiarch/wcsncat-evex.S +@@ -1,9 +1,9 @@ +-#ifndef WCSCAT +-# define WCSCAT __wcsncat_evex ++#ifndef WCSNCAT ++# define WCSNCAT __wcsncat_evex + #endif + + #define USE_AS_WCSCPY + #define USE_AS_STRCAT + +-#define STRNCAT WCSCAT ++#define STRNCAT WCSNCAT + #include "strncat-evex.S" +diff --git a/sysdeps/x86_64/tst-gnu2-tls2mod1.S b/sysdeps/x86_64/tst-gnu2-tls2mod1.S +new file mode 100644 +index 0000000000..1d636669ba +--- /dev/null ++++ b/sysdeps/x86_64/tst-gnu2-tls2mod1.S +@@ -0,0 +1,87 @@ ++/* Check if TLSDESC relocation preserves %rdi, %rsi and %rbx. ++ Copyright (C) 2024 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ . */ ++ ++#include ++ ++/* On AVX512 machines, OFFSET == 40 caused _dl_tlsdesc_dynamic_xsavec ++ to clobber %rdi, %rsi and %rbx. On Intel AVX CPUs, the state size ++ is 960 bytes and this test didn't fail. It may be due to the unused ++ last 128 bytes. On AMD AVX CPUs, the state size is 832 bytes and ++ this test might fail without the fix. */ ++#ifndef OFFSET ++# define OFFSET 40 ++#endif ++ ++ .text ++ .p2align 4 ++ .globl apply_tls ++ .type apply_tls, @function ++apply_tls: ++ cfi_startproc ++ _CET_ENDBR ++ pushq %rbp ++ cfi_def_cfa_offset (16) ++ cfi_offset (6, -16) ++ movdqu (%RDI_LP), %xmm0 ++ lea tls_var1@TLSDESC(%rip), %RAX_LP ++ mov %RSP_LP, %RBP_LP ++ cfi_def_cfa_register (6) ++ /* Align stack to 64 bytes. */ ++ and $-64, %RSP_LP ++ sub $OFFSET, %RSP_LP ++ pushq %rbx ++ /* Set %ebx to 0xbadbeef. */ ++ movl $0xbadbeef, %ebx ++ movl $0xbadbeef, %esi ++ movq %rdi, saved_rdi(%rip) ++ movq %rsi, saved_rsi(%rip) ++ call *tls_var1@TLSCALL(%RAX_LP) ++ /* Check if _dl_tlsdesc_dynamic preserves %rdi, %rsi and %rbx. */ ++ cmpq saved_rdi(%rip), %rdi ++ jne L(hlt) ++ cmpq saved_rsi(%rip), %rsi ++ jne L(hlt) ++ cmpl $0xbadbeef, %ebx ++ jne L(hlt) ++ add %fs:0, %RAX_LP ++ movups %xmm0, 32(%RAX_LP) ++ movdqu 16(%RDI_LP), %xmm1 ++ mov %RAX_LP, %RBX_LP ++ movups %xmm1, 48(%RAX_LP) ++ lea 32(%RBX_LP), %RAX_LP ++ pop %rbx ++ leave ++ cfi_def_cfa (7, 8) ++ ret ++L(hlt): ++ hlt ++ cfi_endproc ++ .size apply_tls, .-apply_tls ++ .hidden tls_var1 ++ .globl tls_var1 ++ .section .tbss,"awT",@nobits ++ .align 16 ++ .type tls_var1, @object ++ .size tls_var1, 3200 ++tls_var1: ++ .zero 3200 ++ .local saved_rdi ++ .comm saved_rdi,8,8 ++ .local saved_rsi ++ .comm saved_rsi,8,8 ++ .section .note.GNU-stack,"",@progbits +diff --git a/time/timespec_get.c b/time/timespec_get.c +index b031e42ca2..26a044bca6 100644 +--- a/time/timespec_get.c ++++ b/time/timespec_get.c +@@ -4,7 +4,7 @@ + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either +- version 2.1 of the License. ++ version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of +diff --git a/time/timespec_getres.c b/time/timespec_getres.c +index edb397507c..2e18b8bcac 100644 +--- a/time/timespec_getres.c ++++ b/time/timespec_getres.c +@@ -5,7 +5,7 @@ + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either +- version 2.1 of the License. ++ version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of