All the mail mirrored from lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Correct handling node with CPU populated but no memory populated
@ 2009-12-25 10:42 Jiang, Yunhong
  2009-12-26  7:57 ` Keir Fraser
  2010-01-04  8:20 ` Jiang, Yunhong
  0 siblings, 2 replies; 4+ messages in thread
From: Jiang, Yunhong @ 2009-12-25 10:42 UTC (permalink / raw
  To: Keir Fraser; +Cc: xen-devel@lists.xensource.com

[-- Attachment #1: Type: text/plain, Size: 4568 bytes --]

This patch fix one bug caused by changeset 20599.

BTW, when I was working on the patch, I'm really confused by srat_detect_node() and init_cpu_to_node().

Firstly,  Per my understanding to the code itself, these two functions achieve the same purpose, but I can't understanding why srat_detect_node need be called after the CPU is up. As this is backported from kernel, so I checked linux kernel. In linux kernel, the srat_detect_node is mainly for one quirk for AMD platform, but that quirk is not implemented in Xen side. So is the quirk is really needed?

Also, are there any special reason that init_cpu_to_node() can't be called right after numa_initmem_init()? I think it will be cleaner if we setup the whole numa information in one place. Originally I suspect it is related to generic_apic_probe(), but after checking the code, I didn't find anything related. Did I miss anything?

Thanks
--jyh

# HG changeset patch
# User Yunhong Jiang <yunhong.jiang@intel.com>
# Date 1261684998 -28800
# Node ID 986dae93aea54f575202dcbaacbf3f4ff05032d4
# Parent  98c4b2498415751f97091631907d9d173ac0092f

Correct handling node with CPU populated but no memory populated

In changset 20599, the node that has no memory populated is marked parsed, but not online. However, if there are CPU populated in this node, the corresponding CPU mapping (i.e. the cpu_to_node) is still setup to the offline node, this will cause trouble for memory allocation.

This patch changes the init_cpu_to_node() and srant_detect_node(), to considering the node is offlined situation.

Now the apicid_to_node is only used to keep the mapping between cpu/node provided by BIOS, and should not be used for memory allocation anymore.

One thing left is to update the cpu_to_node mapping after memory populated by memory hot-add.

Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com>

diff -r 98c4b2498415 -r 986dae93aea5 xen/arch/x86/numa.c
--- a/xen/arch/x86/numa.c	Thu Dec 24 15:59:44 2009 +0000
+++ b/xen/arch/x86/numa.c	Fri Dec 25 04:03:18 2009 +0800
@@ -35,6 +35,9 @@ unsigned char cpu_to_node[NR_CPUS] __rea
 unsigned char cpu_to_node[NR_CPUS] __read_mostly = {
 	[0 ... NR_CPUS-1] = NUMA_NO_NODE
 };
+/*
+ * Keep BIOS's CPU2node information, should not be used for memory allocaion
+ */
 unsigned char apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
  	[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
 };
@@ -288,14 +291,16 @@ static __init int numa_setup(char *opt)
  */
 void __devinit init_cpu_to_node(void)
 {
-	int i;
+	int i, node;
  	for (i = 0; i < NR_CPUS; i++) {
 		u32 apicid = x86_cpu_to_apicid[i];
 		if (apicid == BAD_APICID)
 			continue;
-		if (apicid_to_node[apicid] == NUMA_NO_NODE)
-			continue;
-		numa_set_node(i,apicid_to_node[apicid]);
+        node = apicid_to_node[apicid];
+
+		if ( node == NUMA_NO_NODE || !node_online(node) )
+			node = 0;
+		numa_set_node(i, node);
 	}
 }
 
diff -r 98c4b2498415 -r 986dae93aea5 xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c	Thu Dec 24 15:59:44 2009 +0000
+++ b/xen/arch/x86/setup.c	Fri Dec 25 04:03:18 2009 +0800
@@ -20,6 +20,7 @@
 #include <xen/rcupdate.h>
 #include <xen/vga.h>
 #include <xen/dmi.h>
+#include <xen/nodemask.h>
 #include <public/version.h>
 #ifdef CONFIG_COMPAT
 #include <compat/platform.h>
@@ -252,7 +253,7 @@ void __devinit srat_detect_node(int cpu)
     u32 apicid = x86_cpu_to_apicid[cpu];
 
     node = apicid_to_node[apicid];
-    if ( node == NUMA_NO_NODE )
+    if ( node == NUMA_NO_NODE || !node_online(node) )
         node = 0;
     numa_set_node(cpu, node);
 
diff -r 98c4b2498415 -r 986dae93aea5 xen/arch/x86/smpboot.c
--- a/xen/arch/x86/smpboot.c	Thu Dec 24 15:59:44 2009 +0000
+++ b/xen/arch/x86/smpboot.c	Fri Dec 25 04:03:18 2009 +0800
@@ -913,7 +913,7 @@ static int __devinit do_boot_cpu(int api
 	}
 #else
 	if (!per_cpu(compat_arg_xlat, cpu))
-		setup_compat_arg_xlat(cpu, apicid_to_node[apicid]);
+		setup_compat_arg_xlat(cpu, cpu_to_node[cpu]);
 #endif
 
 	if (!idt_tables[cpu]) {
diff -r 98c4b2498415 -r 986dae93aea5 xen/arch/x86/x86_64/mm.c
--- a/xen/arch/x86/x86_64/mm.c	Thu Dec 24 15:59:44 2009 +0000
+++ b/xen/arch/x86/x86_64/mm.c	Fri Dec 25 04:03:18 2009 +0800
@@ -997,7 +997,7 @@ void __init subarch_init_memory(void)
     }
 
     if ( setup_compat_arg_xlat(smp_processor_id(),
-                               apicid_to_node[boot_cpu_physical_apicid]) )
+                               cpu_to_node[0]) )
         panic("Could not setup argument translation area");
 }
 



[-- Attachment #2: node_online.patch --]
[-- Type: application/octet-stream, Size: 3552 bytes --]

# HG changeset patch
# User Yunhong Jiang <yunhong.jiang@intel.com>
# Date 1261684998 -28800
# Node ID 986dae93aea54f575202dcbaacbf3f4ff05032d4
# Parent  98c4b2498415751f97091631907d9d173ac0092f

Correct handling node with CPU populated but no memory populated

In changset 20599, the node that has no memory populated is marked parsed, but not online. However, if there are CPU populated in this node, the corresponding CPU mapping (i.e. the cpu_to_node) is still setup to the offline node, this will cause trouble for memory allocation.

This patch changes the init_cpu_to_node() and srant_detect_node(), to considering the node is offlined situation.

Now the apicid_to_node is only used to keep the mapping between cpu/node provided by BIOS, and should not be used for memory allocation anymore.

One thing left is to update the cpu_to_node mapping after memory populated by memory hot-add.

Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com>

diff -r 98c4b2498415 -r 986dae93aea5 xen/arch/x86/numa.c
--- a/xen/arch/x86/numa.c	Thu Dec 24 15:59:44 2009 +0000
+++ b/xen/arch/x86/numa.c	Fri Dec 25 04:03:18 2009 +0800
@@ -35,6 +35,9 @@ unsigned char cpu_to_node[NR_CPUS] __rea
 unsigned char cpu_to_node[NR_CPUS] __read_mostly = {
 	[0 ... NR_CPUS-1] = NUMA_NO_NODE
 };
+/*
+ * Keep BIOS's CPU2node information, should not be used for memory allocaion
+ */
 unsigned char apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
  	[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
 };
@@ -288,14 +291,16 @@ static __init int numa_setup(char *opt)
  */
 void __devinit init_cpu_to_node(void)
 {
-	int i;
+	int i, node;
  	for (i = 0; i < NR_CPUS; i++) {
 		u32 apicid = x86_cpu_to_apicid[i];
 		if (apicid == BAD_APICID)
 			continue;
-		if (apicid_to_node[apicid] == NUMA_NO_NODE)
-			continue;
-		numa_set_node(i,apicid_to_node[apicid]);
+        node = apicid_to_node[apicid];
+
+		if ( node == NUMA_NO_NODE || !node_online(node) )
+			node = 0;
+		numa_set_node(i, node);
 	}
 }
 
diff -r 98c4b2498415 -r 986dae93aea5 xen/arch/x86/setup.c
--- a/xen/arch/x86/setup.c	Thu Dec 24 15:59:44 2009 +0000
+++ b/xen/arch/x86/setup.c	Fri Dec 25 04:03:18 2009 +0800
@@ -20,6 +20,7 @@
 #include <xen/rcupdate.h>
 #include <xen/vga.h>
 #include <xen/dmi.h>
+#include <xen/nodemask.h>
 #include <public/version.h>
 #ifdef CONFIG_COMPAT
 #include <compat/platform.h>
@@ -252,7 +253,7 @@ void __devinit srat_detect_node(int cpu)
     u32 apicid = x86_cpu_to_apicid[cpu];
 
     node = apicid_to_node[apicid];
-    if ( node == NUMA_NO_NODE )
+    if ( node == NUMA_NO_NODE || !node_online(node) )
         node = 0;
     numa_set_node(cpu, node);
 
diff -r 98c4b2498415 -r 986dae93aea5 xen/arch/x86/smpboot.c
--- a/xen/arch/x86/smpboot.c	Thu Dec 24 15:59:44 2009 +0000
+++ b/xen/arch/x86/smpboot.c	Fri Dec 25 04:03:18 2009 +0800
@@ -913,7 +913,7 @@ static int __devinit do_boot_cpu(int api
 	}
 #else
 	if (!per_cpu(compat_arg_xlat, cpu))
-		setup_compat_arg_xlat(cpu, apicid_to_node[apicid]);
+		setup_compat_arg_xlat(cpu, cpu_to_node[cpu]);
 #endif
 
 	if (!idt_tables[cpu]) {
diff -r 98c4b2498415 -r 986dae93aea5 xen/arch/x86/x86_64/mm.c
--- a/xen/arch/x86/x86_64/mm.c	Thu Dec 24 15:59:44 2009 +0000
+++ b/xen/arch/x86/x86_64/mm.c	Fri Dec 25 04:03:18 2009 +0800
@@ -997,7 +997,7 @@ void __init subarch_init_memory(void)
     }
 
     if ( setup_compat_arg_xlat(smp_processor_id(),
-                               apicid_to_node[boot_cpu_physical_apicid]) )
+                               cpu_to_node[0]) )
         panic("Could not setup argument translation area");
 }
 

[-- Attachment #3: Type: text/plain, Size: 138 bytes --]

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xensource.com
http://lists.xensource.com/xen-devel

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] Correct handling node with CPU populated but no memory populated
  2009-12-25 10:42 [PATCH] Correct handling node with CPU populated but no memory populated Jiang, Yunhong
@ 2009-12-26  7:57 ` Keir Fraser
  2009-12-28 23:26   ` Jiang, Yunhong
  2010-01-04  8:20 ` Jiang, Yunhong
  1 sibling, 1 reply; 4+ messages in thread
From: Keir Fraser @ 2009-12-26  7:57 UTC (permalink / raw
  To: Jiang, Yunhong; +Cc: xen-devel@lists.xensource.com

On 25/12/2009 10:42, "Jiang, Yunhong" <yunhong.jiang@intel.com> wrote:

> BTW, when I was working on the patch, I'm really confused by
> srat_detect_node() and init_cpu_to_node().
> 
> Firstly,  Per my understanding to the code itself, these two functions achieve
> the same purpose, but I can't understanding why srat_detect_node need be
> called after the CPU is up. As this is backported from kernel, so I checked
> linux kernel. In linux kernel, the srat_detect_node is mainly for one quirk
> for AMD platform, but that quirk is not implemented in Xen side. So is the
> quirk is really needed?
> 
> Also, are there any special reason that init_cpu_to_node() can't be called
> right after numa_initmem_init()? I think it will be cleaner if we setup the
> whole numa information in one place. Originally I suspect it is related to
> generic_apic_probe(), but after checking the code, I didn't find anything
> related. Did I miss anything?

I have no direct involvement in the NUMA code that got ported, and I think
the guy that originally did the port has moved on. If you think it can be
cleaned up and improved, please go ahead.

 -- Keir

^ permalink raw reply	[flat|nested] 4+ messages in thread

* RE: [PATCH] Correct handling node with CPU populated but no memory populated
  2009-12-26  7:57 ` Keir Fraser
@ 2009-12-28 23:26   ` Jiang, Yunhong
  0 siblings, 0 replies; 4+ messages in thread
From: Jiang, Yunhong @ 2009-12-28 23:26 UTC (permalink / raw
  To: Keir Fraser; +Cc: xen-devel@lists.xensource.com

Ok, I will take that.

--jyh

>-----Original Message-----
>From: Keir Fraser [mailto:keir.fraser@eu.citrix.com]
>Sent: Saturday, December 26, 2009 3:57 PM
>To: Jiang, Yunhong
>Cc: xen-devel@lists.xensource.com
>Subject: Re: [PATCH] Correct handling node with CPU populated but no memory
>populated
>
>On 25/12/2009 10:42, "Jiang, Yunhong" <yunhong.jiang@intel.com> wrote:
>
>> BTW, when I was working on the patch, I'm really confused by
>> srat_detect_node() and init_cpu_to_node().
>>
>> Firstly,  Per my understanding to the code itself, these two functions achieve
>> the same purpose, but I can't understanding why srat_detect_node need be
>> called after the CPU is up. As this is backported from kernel, so I checked
>> linux kernel. In linux kernel, the srat_detect_node is mainly for one quirk
>> for AMD platform, but that quirk is not implemented in Xen side. So is the
>> quirk is really needed?
>>
>> Also, are there any special reason that init_cpu_to_node() can't be called
>> right after numa_initmem_init()? I think it will be cleaner if we setup the
>> whole numa information in one place. Originally I suspect it is related to
>> generic_apic_probe(), but after checking the code, I didn't find anything
>> related. Did I miss anything?
>
>I have no direct involvement in the NUMA code that got ported, and I think
>the guy that originally did the port has moved on. If you think it can be
>cleaned up and improved, please go ahead.
>
> -- Keir
>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* RE: [PATCH] Correct handling node with CPU populated but no memory populated
  2009-12-25 10:42 [PATCH] Correct handling node with CPU populated but no memory populated Jiang, Yunhong
  2009-12-26  7:57 ` Keir Fraser
@ 2010-01-04  8:20 ` Jiang, Yunhong
  1 sibling, 0 replies; 4+ messages in thread
From: Jiang, Yunhong @ 2010-01-04  8:20 UTC (permalink / raw
  To: Jiang, Yunhong, Keir Fraser; +Cc: xen-devel@lists.xensource.com


>Firstly,  Per my understanding to the code itself, these two functions achieve the
>same purpose, but I can't understanding why srat_detect_node need be called after
>the CPU is up. As this is backported from kernel, so I checked linux kernel. In linux
>kernel, the srat_detect_node is mainly for one quirk for AMD platform, but that quirk
>is not implemented in Xen side. So is the quirk is really needed?
>
>Also, are there any special reason that init_cpu_to_node() can't be called right after
>numa_initmem_init()? I think it will be cleaner if we setup the whole numa
>information in one place. Originally I suspect it is related to generic_apic_probe(),
>but after checking the code, I didn't find anything related. Did I miss anything?

Seems I do missed something here after checking the code again. The init_cpu_to_node() depends on the the cpu_to_apicid information, which will be achieved only after the MADT table parser.
As for the srat_detect_node(), although it is in fact same to init_cpu_to_node() currently per my understanding, but it is really harmless, so I'd leave it, so that one day some quirks can be added after the CPU is identified already.
So seems it does not need any clean at all. Really sorry for the wrong alerm.

--jyh

>
>Thanks
>--jyh
>
># HG changeset patch
># User Yunhong Jiang <yunhong.jiang@intel.com>
># Date 1261684998 -28800
># Node ID 986dae93aea54f575202dcbaacbf3f4ff05032d4
># Parent  98c4b2498415751f97091631907d9d173ac0092f
>
>Correct handling node with CPU populated but no memory populated
>
>In changset 20599, the node that has no memory populated is marked parsed, but
>not online. However, if there are CPU populated in this node, the corresponding CPU
>mapping (i.e. the cpu_to_node) is still setup to the offline node, this will cause
>trouble for memory allocation.
>
>This patch changes the init_cpu_to_node() and srant_detect_node(), to considering
>the node is offlined situation.
>
>Now the apicid_to_node is only used to keep the mapping between cpu/node
>provided by BIOS, and should not be used for memory allocation anymore.
>
>One thing left is to update the cpu_to_node mapping after memory populated by
>memory hot-add.
>
>Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com>
>
>diff -r 98c4b2498415 -r 986dae93aea5 xen/arch/x86/numa.c
>--- a/xen/arch/x86/numa.c	Thu Dec 24 15:59:44 2009 +0000
>+++ b/xen/arch/x86/numa.c	Fri Dec 25 04:03:18 2009 +0800
>@@ -35,6 +35,9 @@ unsigned char cpu_to_node[NR_CPUS] __rea
> unsigned char cpu_to_node[NR_CPUS] __read_mostly = {
> 	[0 ... NR_CPUS-1] = NUMA_NO_NODE
> };
>+/*
>+ * Keep BIOS's CPU2node information, should not be used for memory allocaion
>+ */
> unsigned char apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = {
>  	[0 ... MAX_LOCAL_APIC-1] = NUMA_NO_NODE
> };
>@@ -288,14 +291,16 @@ static __init int numa_setup(char *opt)
>  */
> void __devinit init_cpu_to_node(void)
> {
>-	int i;
>+	int i, node;
>  	for (i = 0; i < NR_CPUS; i++) {
> 		u32 apicid = x86_cpu_to_apicid[i];
> 		if (apicid == BAD_APICID)
> 			continue;
>-		if (apicid_to_node[apicid] == NUMA_NO_NODE)
>-			continue;
>-		numa_set_node(i,apicid_to_node[apicid]);
>+        node = apicid_to_node[apicid];
>+
>+		if ( node == NUMA_NO_NODE || !node_online(node) )
>+			node = 0;
>+		numa_set_node(i, node);
> 	}
> }
>
>diff -r 98c4b2498415 -r 986dae93aea5 xen/arch/x86/setup.c
>--- a/xen/arch/x86/setup.c	Thu Dec 24 15:59:44 2009 +0000
>+++ b/xen/arch/x86/setup.c	Fri Dec 25 04:03:18 2009 +0800
>@@ -20,6 +20,7 @@
> #include <xen/rcupdate.h>
> #include <xen/vga.h>
> #include <xen/dmi.h>
>+#include <xen/nodemask.h>
> #include <public/version.h>
> #ifdef CONFIG_COMPAT
> #include <compat/platform.h>
>@@ -252,7 +253,7 @@ void __devinit srat_detect_node(int cpu)
>     u32 apicid = x86_cpu_to_apicid[cpu];
>
>     node = apicid_to_node[apicid];
>-    if ( node == NUMA_NO_NODE )
>+    if ( node == NUMA_NO_NODE || !node_online(node) )
>         node = 0;
>     numa_set_node(cpu, node);
>
>diff -r 98c4b2498415 -r 986dae93aea5 xen/arch/x86/smpboot.c
>--- a/xen/arch/x86/smpboot.c	Thu Dec 24 15:59:44 2009 +0000
>+++ b/xen/arch/x86/smpboot.c	Fri Dec 25 04:03:18 2009 +0800
>@@ -913,7 +913,7 @@ static int __devinit do_boot_cpu(int api
> 	}
> #else
> 	if (!per_cpu(compat_arg_xlat, cpu))
>-		setup_compat_arg_xlat(cpu, apicid_to_node[apicid]);
>+		setup_compat_arg_xlat(cpu, cpu_to_node[cpu]);
> #endif
>
> 	if (!idt_tables[cpu]) {
>diff -r 98c4b2498415 -r 986dae93aea5 xen/arch/x86/x86_64/mm.c
>--- a/xen/arch/x86/x86_64/mm.c	Thu Dec 24 15:59:44 2009 +0000
>+++ b/xen/arch/x86/x86_64/mm.c	Fri Dec 25 04:03:18 2009 +0800
>@@ -997,7 +997,7 @@ void __init subarch_init_memory(void)
>     }
>
>     if ( setup_compat_arg_xlat(smp_processor_id(),
>-                               apicid_to_node[boot_cpu_physical_apicid]) )
>+                               cpu_to_node[0]) )
>         panic("Could not setup argument translation area");
> }
>
>

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2010-01-04  8:20 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-12-25 10:42 [PATCH] Correct handling node with CPU populated but no memory populated Jiang, Yunhong
2009-12-26  7:57 ` Keir Fraser
2009-12-28 23:26   ` Jiang, Yunhong
2010-01-04  8:20 ` Jiang, Yunhong

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.