gnu: ucx: Avoid relying on /sys/class/net.

This fixes test failures of packages that use Open MPI, whereby UCX
would error out due to /sys/class/net being unavailable in the build
chroot that the daemon sets up.

* gnu/packages/patches/ucx-tcp-iface-ioctl.patch: New file.
* gnu/packages/fabric-management.scm (ucx)[source]: Use it.
* gnu/local.mk (dist_patch_DATA): Add it.
This commit is contained in:
Ludovic Courtès 2019-11-16 17:15:14 +01:00
parent bd7df85ea4
commit 2401806bdb
No known key found for this signature in database
GPG key ID: 090B11993D9AEBB5
3 changed files with 112 additions and 1 deletions

View file

@ -1392,6 +1392,7 @@ dist_patch_DATA = \
%D%/packages/patches/tuxpaint-stamps-path.patch \
%D%/packages/patches/txr-shell.patch \
%D%/packages/patches/u-boot-fix-mkimage-header-verification.patch \
%D%/packages/patches/ucx-tcp-iface-ioctl.patch \
%D%/packages/patches/udiskie-no-appindicator.patch \
%D%/packages/patches/unzip-CVE-2014-8139.patch \
%D%/packages/patches/unzip-CVE-2014-8140.patch \

View file

@ -190,7 +190,8 @@ (define-public ucx
(file-name (git-file-name name version))
(sha256
(base32
"0x3clvy716i7va4m4adgx6ihjsfnzrkdizhxz5v52944dkglpc8n"))))
"0x3clvy716i7va4m4adgx6ihjsfnzrkdizhxz5v52944dkglpc8n"))
(patches (search-patches "ucx-tcp-iface-ioctl.patch"))))
(build-system gnu-build-system)
(arguments
'( ;; These are some of the flags found in 'contrib/configure-release'.

View file

@ -0,0 +1,109 @@
Since /sys is unavailable in build environments, the list of available
TCP network interfaces cannot be obtained via /sys/class/net. This patch
provides alternative code that uses the SIOCGIFCONF ioctl to get the
names of the available TCP network interfaces.
diff --git a/src/uct/tcp/tcp_iface.c b/src/uct/tcp/tcp_iface.c
index 81ad459..10024a6 100644
--- a/src/uct/tcp/tcp_iface.c
+++ b/src/uct/tcp/tcp_iface.c
@@ -12,6 +12,8 @@
#include <sys/poll.h>
#include <netinet/tcp.h>
#include <dirent.h>
+#include <net/if.h>
+#include <sys/ioctl.h>
static ucs_config_field_t uct_tcp_iface_config_table[] = {
{"", "MAX_SHORT=8k", NULL,
@@ -483,6 +485,70 @@ static UCS_CLASS_DEFINE_NEW_FUNC(uct_tcp_iface_t, uct_iface_t, uct_md_h,
uct_worker_h, const uct_iface_params_t*,
const uct_iface_config_t*);
+/* Fetch information about available network devices through an ioctl. */
+static ucs_status_t query_devices_ioctl(uct_md_h md,
+ uct_tl_resource_desc_t **resource_p,
+ unsigned *num_resources_p)
+{
+ int sock, err, i;
+ uct_tl_resource_desc_t *resources, *tmp;
+ unsigned num_resources;
+ ucs_status_t status;
+ struct ifconf conf;
+ struct ifreq reqs[10];
+
+ conf.ifc_len = sizeof reqs;
+ conf.ifc_req = reqs;
+
+ sock = socket(SOCK_STREAM, AF_INET, 0);
+ if (sock < 0) {
+ ucs_error("socket(2) failed: %m");
+ status = UCS_ERR_IO_ERROR;
+ goto out;
+ }
+
+ err = ioctl(sock, SIOCGIFCONF, &conf);
+ if (err < 0) {
+ ucs_error("SIOCGIFCONF ioctl failed: %m");
+ status = UCS_ERR_IO_ERROR;
+ goto out;
+ }
+
+ resources = NULL;
+ num_resources = 0;
+ for (i = 0; i < conf.ifc_len / sizeof(struct ifreq); i++) {
+ const char *name = reqs[i].ifr_name;
+
+ if (!ucs_netif_is_active(name)) {
+ continue;
+ }
+
+ tmp = ucs_realloc(resources, sizeof(*resources) * (num_resources + 1),
+ "tcp resources");
+ if (tmp == NULL) {
+ ucs_free(resources);
+ status = UCS_ERR_NO_MEMORY;
+ goto out;
+ }
+ resources = tmp;
+
+ ucs_snprintf_zero(resources[i].tl_name, sizeof(resources[i].tl_name),
+ "%s", UCT_TCP_NAME);
+ ucs_snprintf_zero(resources[i].dev_name, sizeof(resources[i].dev_name),
+ "%s", name);
+ resources[i].dev_type = UCT_DEVICE_TYPE_NET;
+ ++num_resources;
+ }
+
+ *num_resources_p = num_resources;
+ *resource_p = resources;
+ status = UCS_OK;
+
+out:
+ if (sock >= 0) close(sock);
+ return status;
+}
+
static ucs_status_t uct_tcp_query_tl_resources(uct_md_h md,
uct_tl_resource_desc_t **resource_p,
unsigned *num_resources_p)
@@ -496,9 +562,9 @@ static ucs_status_t uct_tcp_query_tl_resources(uct_md_h md,
dir = opendir(netdev_dir);
if (dir == NULL) {
- ucs_error("opendir(%s) failed: %m", netdev_dir);
- status = UCS_ERR_IO_ERROR;
- goto out;
+ /* When /sys is unavailable, as can be the case in a container,
+ * resort to a good old 'ioctl'. */
+ return query_devices_ioctl(md, resource_p, num_resources_p);
}
resources = NULL;
@@ -543,6 +609,5 @@ static ucs_status_t uct_tcp_query_tl_resources(uct_md_h md,
out_closedir:
closedir(dir);
-out:
return status;
}