linux-numa.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Tim Pepper" <lnxninja@linux.vnet.ibm.com>
To: linux-numa@vger.kernel.org
Cc: Thomas Renninger <trenn@suse.de>,
	Anton Blanchard <anton@samba.org>,
	Amit Arora <aarora@linux.vnet.ibm.com>
Subject: [PATCH 5 of 5] numactl --hardware should handle sparse node numbering
Date: Wed, 22 Sep 2010 11:04:40 -0700	[thread overview]
Message-ID: <20100922180440.GH31877@tpepper-t61p.dolavim.us> (raw)

Author: Tim Pepper <lnxninja@linux.vnet.ibm.com>
Date:   Tue Sep 21 16:53:01 2010 -0700

    numactl --hardware should handle sparse node numbering
    
    Previously this has been enabled, but the patches by Amit Arora in
    2009 were partly undone by Thomas Renninger in 2010, because these
    past patches appear to have only considered parts of the problem.
    Reverting the patches fixed Thomas's problem but reintroduced Amit's.
    
    It is possible to have sparse node numbering as well as nodes with
    no memory, nodes with no cpus and nodes with neither cpus or memory.
    All of these should be handled.  The existing node bitmasks and code also
    have conflated those possibilities with policy (ie: numa_all_nodes_ptr
    contains nodes which have memory and from which the calling process may
    allocated memory).  For this reason a new mode bitmask, numa_all_nodes,
    is added and populated so that 'numactl --hardware' can truly inventory
    the available hardware as per the man page.
    
    This may or may not be correct in that the word "available" may or may not
    have been intended to include policy and actual usability of the nodes.
    But it seems that the command is meant to show a lower level inventory,
    in which case it seems reasonable to truly print out all the nodes which
    kernelspace has exposed to userspace.
    
    Signed-off-by: Tim Pepper <lnxninja@linux.vnet.ibm.com>
    Cc: Thomas Renninger <trenn@suse.de>
    Cc: Anton Blanchard <anton@samba.org>
    Cc: Amit Arora <aarora@linux.vnet.ibm.com>
---
 distance.c        |   26 ++++++++++++++++----------
 libnuma.c         |    9 +++++++--
 numa.h            |    3 +++
 numactl.c         |   51 +++++++++++++++++++++++++++++++++++++++++++++++----
 versions.ldscript |    1 +
 5 files changed, 74 insertions(+), 16 deletions(-)

diff --git a/distance.c b/distance.c
index 2aad2bb..2b48f97 100755
--- a/distance.c
+++ b/distance.c
@@ -26,14 +26,21 @@
 static int distance_numnodes;
 static int *distance_table;
 
-static void parse_numbers(char *s, int *iptr, int n)
+static void parse_numbers(char *s, int *iptr)
 {
 	int i, d, j;
 	char *end;
-	for (i = 0, j = 0; i < n; i++, j++) {
+	int maxnode = numa_max_node();
+	int numnodes = 0;
+
+	for (i = 0; i <= maxnode; i++)
+		if (numa_bitmask_isbitset(numa_nodes_ptr, i))
+			numnodes++;
+
+	for (i = 0, j = 0; i <= maxnode; i++, j++) {
 		d = strtoul(s, &end, 0);
 		/* Skip unavailable nodes */
-		while (j<n &&  !numa_bitmask_isbitset(numa_all_nodes_ptr, j))
+		while (j<=maxnode && !numa_bitmask_isbitset(numa_nodes_ptr, j))
 			j++;
 		*(iptr+j) = d;
 		if (s == end)
@@ -47,10 +54,10 @@ static int read_distance_table(void)
 	int nd, len;
 	char *line = NULL;
 	size_t linelen = 0;
-	int numnodes = 0;
+	int maxnode = numa_max_node() + 1;
 	int *table = NULL;
 	int err = -1;
-	
+
 	for (nd = 0;; nd++) {
 		char fn[100];
 		FILE *dfh;
@@ -59,7 +66,7 @@ static int read_distance_table(void)
 		if (!dfh) {
 			if (errno == ENOENT && nd > 0)
 				err = 0;
-			if (!err && nd<=numa_max_node())
+			if (!err && nd<maxnode)
 				continue;
 			else
 				break;
@@ -70,15 +77,14 @@ static int read_distance_table(void)
 			break;
 
 		if (!table) {
-			numnodes = numa_num_configured_nodes();
-			table = calloc(numnodes * numnodes, sizeof(int));
+			table = calloc(maxnode * maxnode, sizeof(int));
 			if (!table) {
 				errno = ENOMEM;
 				break;
 			}
 		}
 
-		parse_numbers(line, table + nd * numnodes, numnodes);
+		parse_numbers(line, table + nd * maxnode);
 	}
 	free(line);
 	if (err)  {
@@ -96,7 +102,7 @@ static int read_distance_table(void)
 		free(table);
 		return 0;
 	}
-	distance_numnodes = numnodes;
+	distance_numnodes = maxnode;
 	distance_table = table;
 	return 0;		
 }
diff --git a/libnuma.c b/libnuma.c
index 641ad26..d40835d 100644
--- a/libnuma.c
+++ b/libnuma.c
@@ -51,6 +51,7 @@ struct bitmask *numa_all_cpus_ptr = NULL;
    of numa_no_nodes and numa_all_nodes, but the loader does not correctly
    handle versioning of BSS versus small data items */
 
+struct bitmask *numa_nodes_ptr = NULL;
 static struct bitmask *numa_memnode_ptr = NULL;
 static unsigned long *node_cpu_mask_v1[NUMA_NUM_NODES];
 struct bitmask **node_cpu_mask_v2;
@@ -105,6 +106,8 @@ numa_fini(void)
 		numa_bitmask_free(numa_no_nodes_ptr);
 	if (numa_memnode_ptr)
 		numa_bitmask_free(numa_memnode_ptr);
+	if (numa_nodes_ptr)
+		numa_bitmask_free(numa_nodes_ptr);
 }
 
 /*
@@ -292,8 +295,8 @@ int numa_pagesize(void)
 make_internal_alias(numa_pagesize);
 
 /*
- * Find nodes with memory (numa_memnode_ptr) and the highest numbered
- * existing node (maxconfigurednode).
+ * Find nodes (numa_nodes_ptr), nodes with memory (numa_memnode_ptr)
+ * and the highest numbered existing node (maxconfigurednode).
  */
 static void
 set_configured_nodes(void)
@@ -303,6 +306,7 @@ set_configured_nodes(void)
 	long long freep;
 
 	numa_memnode_ptr = numa_allocate_nodemask();
+	numa_nodes_ptr = numa_allocate_nodemask();
 
 	d = opendir("/sys/devices/system/node");
 	if (!d) {
@@ -313,6 +317,7 @@ set_configured_nodes(void)
 			if (strncmp(de->d_name, "node", 4))
 				continue;
 			nd = strtoul(de->d_name+4, NULL, 0);
+			numa_bitmask_setbit(numa_nodes_ptr, nd);
 			if (numa_node_size64(nd, &freep) > 0)
 				numa_bitmask_setbit(numa_memnode_ptr, nd);
 			if (maxconfigurednode < nd)
diff --git a/numa.h b/numa.h
index 989f4d7..9a6a644 100755
--- a/numa.h
+++ b/numa.h
@@ -150,6 +150,9 @@ int numa_pagesize(void);
    Only valid after numa_available. */
 extern struct bitmask *numa_all_nodes_ptr;
 
+/* Set with all nodes the kernel has exposed to userspace */
+extern struct bitmask *numa_nodes_ptr;
+
 /* For source compatibility */
 extern nodemask_t numa_all_nodes;
 
diff --git a/numactl.c b/numactl.c
index ce3a482..2e21ae8 100755
--- a/numactl.c
+++ b/numactl.c
@@ -188,12 +188,17 @@ static void print_distances(int maxnode)
 	printf("node distances:\n");
 	printf("node ");
 	for (i = 0; i <= maxnode; i++)
-		printf("% 3d ", i);
+		if (numa_bitmask_isbitset(numa_nodes_ptr, i))
+			printf("% 3d ", i);
 	printf("\n");
 	for (i = 0; i <= maxnode; i++) {
+		if (!numa_bitmask_isbitset(numa_nodes_ptr, i))
+			continue;
 		printf("% 3d: ", i);
 		for (k = 0; k <= maxnode; k++)
-			printf("% 3d ", numa_distance(i,k));
+			if (numa_bitmask_isbitset(numa_nodes_ptr, i) &&
+			    numa_bitmask_isbitset(numa_nodes_ptr, k))
+				printf("% 3d ", numa_distance(i,k));
 		printf("\n");
 	}			
 }
@@ -216,14 +221,52 @@ void print_node_cpus(int node)
 void hardware(void)
 {
 	int i;
+	int numnodes=0;
+	int prevnode=-1;
+	int skip=0;
 	int maxnode = numa_max_node();
 
-	printf("available: %d nodes (0-%d)\n", 1+maxnode, maxnode);
+	for (i=0; i<=maxnode; i++)
+		if (numa_bitmask_isbitset(numa_nodes_ptr, i))
+			numnodes++;
+	printf("available: %d nodes (", numnodes);
+	for (i=0; i<=maxnode; i++) {
+		if (numa_bitmask_isbitset(numa_nodes_ptr, i)) {
+			if (prevnode == -1) {
+				printf("%d", i);
+				prevnode=i;
+				continue;
+			}
+
+			if (i > prevnode + 1) {
+				if (skip) {
+					printf("%d", prevnode);
+					skip=0;
+				}
+				printf(",%d", i);
+				prevnode=i;
+				continue;
+			}
+
+			if (i == prevnode + 1) {
+				if (!skip) {
+					printf("-");
+					skip=1;
+				}
+				prevnode=i;
+			}
+
+			if ((i == maxnode) && skip)
+				printf("%d", prevnode);
+		}
+	}
+	printf(")\n");
+
 	for (i = 0; i <= maxnode; i++) {
 		char buf[64];
 		long long fr;
 		unsigned long long sz = numa_node_size64(i, &fr);
-		if (!numa_bitmask_isbitset(numa_all_nodes_ptr, i))
+		if (!numa_bitmask_isbitset(numa_nodes_ptr, i))
 			continue;
 
 		printf("node %d cpus:", i);
diff --git a/versions.ldscript b/versions.ldscript
index c2c88b6..e3389e0 100755
--- a/versions.ldscript
+++ b/versions.ldscript
@@ -119,6 +119,7 @@ libnuma_1.2 {
     numa_node_size;
     numa_node_to_cpus;
     numa_node_of_cpu;
+    numa_nodes_ptr;
     numa_num_configured_cpus;
     numa_num_configured_nodes;
     numa_num_possible_nodes;

                 reply	other threads:[~2010-09-22 18:04 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100922180440.GH31877@tpepper-t61p.dolavim.us \
    --to=lnxninja@linux.vnet.ibm.com \
    --cc=aarora@linux.vnet.ibm.com \
    --cc=anton@samba.org \
    --cc=linux-numa@vger.kernel.org \
    --cc=trenn@suse.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).