LKML Archive mirror
 help / color / mirror / Atom feed
* [PATCH v3 0/4] Give chance to build under !CONFIG_SMP for LoongArch
@ 2024-04-11  1:05 Tiezhu Yang
  2024-04-11  1:05 ` [PATCH v3 1/4] LoongArch: Move CONFIG_HAVE_SETUP_PER_CPU_AREA related code to smp.c Tiezhu Yang
                   ` (4 more replies)
  0 siblings, 5 replies; 17+ messages in thread
From: Tiezhu Yang @ 2024-04-11  1:05 UTC (permalink / raw
  To: Huacai Chen, Thomas Gleixner, Arnd Bergmann, Marc Zyngier
  Cc: loongarch, linux-kernel, loongson-kernel

The changes of irqchip have been merged into the irq/core branch of tip.

https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git/commit/?id=42a7d887664b
https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git/commit/?id=a64003da0ef8

This version is only related with arch/loongarch and based on 6.9-rc3,
the first 3 patches with detailed commit message are preparations for
patch #4.

Tested with the following configs:
(1) CONFIG_NUMA=n, CONFIG_SMP=n
(2) CONFIG_NUMA=n, CONFIG_SMP=y
(3) CONFIG_NUMA=y, CONFIG_SMP=n (not allowed due to NUMA select SMP)
(4) CONFIG_NUMA=y, CONFIG_SMP=y

Tiezhu Yang (4):
  LoongArch: Move CONFIG_HAVE_SETUP_PER_CPU_AREA related code to smp.c
  LoongArch: Refactor get_acpi_id_for_cpu() related code
  LoongArch: Save and restore PERCPU_BASE_KS for ACPI S3 state
  LoongArch: Give chance to build under !CONFIG_SMP

 arch/loongarch/Kconfig                |  4 +-
 arch/loongarch/include/asm/acpi.h     |  8 +++-
 arch/loongarch/include/asm/smp.h      |  5 +++
 arch/loongarch/kernel/acpi.c          |  9 +++-
 arch/loongarch/kernel/irq.c           |  2 +
 arch/loongarch/kernel/machine_kexec.c |  2 +-
 arch/loongarch/kernel/numa.c          | 58 --------------------------
 arch/loongarch/kernel/smp.c           | 59 +++++++++++++++++++++++++++
 arch/loongarch/power/suspend.c        |  4 +-
 9 files changed, 87 insertions(+), 64 deletions(-)

-- 
2.42.0


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH v3 1/4] LoongArch: Move CONFIG_HAVE_SETUP_PER_CPU_AREA related code to smp.c
  2024-04-11  1:05 [PATCH v3 0/4] Give chance to build under !CONFIG_SMP for LoongArch Tiezhu Yang
@ 2024-04-11  1:05 ` Tiezhu Yang
  2024-04-12  4:12   ` Huacai Chen
  2024-04-11  1:05 ` [PATCH v3 2/4] LoongArch: Refactor get_acpi_id_for_cpu() related code Tiezhu Yang
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 17+ messages in thread
From: Tiezhu Yang @ 2024-04-11  1:05 UTC (permalink / raw
  To: Huacai Chen, Thomas Gleixner, Arnd Bergmann, Marc Zyngier
  Cc: loongarch, linux-kernel, loongson-kernel

Currently, if CONFIG_NUMA is not set but CONFIG_SMP is set, the arch
specified setup_per_cpu_areas() in arch/loongarch/kernel/numa.c will
not be built and the generic setup_per_cpu_areas() in mm/percpu.c is
actually used, this is not reasonable and does not work as intended.

Let us select HAVE_SETUP_PER_CPU_AREA unconditionally and then move
CONFIG_HAVE_SETUP_PER_CPU_AREA related code from numa.c to smp.c to
avoid this problem.

Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
---
 arch/loongarch/Kconfig       |  2 +-
 arch/loongarch/kernel/numa.c | 58 -----------------------------------
 arch/loongarch/kernel/smp.c  | 59 ++++++++++++++++++++++++++++++++++++
 3 files changed, 60 insertions(+), 59 deletions(-)

diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index a5f300ec6f28..64052ae2c2af 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -151,7 +151,7 @@ config LOONGARCH
 	select HAVE_RUST
 	select HAVE_SAMPLE_FTRACE_DIRECT
 	select HAVE_SAMPLE_FTRACE_DIRECT_MULTI
-	select HAVE_SETUP_PER_CPU_AREA if NUMA
+	select HAVE_SETUP_PER_CPU_AREA
 	select HAVE_STACK_VALIDATION if HAVE_OBJTOOL
 	select HAVE_STACKPROTECTOR
 	select HAVE_SYSCALL_TRACEPOINTS
diff --git a/arch/loongarch/kernel/numa.c b/arch/loongarch/kernel/numa.c
index 8fe21f868f72..49dc1d932ce2 100644
--- a/arch/loongarch/kernel/numa.c
+++ b/arch/loongarch/kernel/numa.c
@@ -48,64 +48,6 @@ EXPORT_SYMBOL(__cpuid_to_node);
 
 nodemask_t numa_nodes_parsed __initdata;
 
-#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
-unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
-EXPORT_SYMBOL(__per_cpu_offset);
-
-static int __init pcpu_cpu_to_node(int cpu)
-{
-	return early_cpu_to_node(cpu);
-}
-
-static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
-{
-	if (early_cpu_to_node(from) == early_cpu_to_node(to))
-		return LOCAL_DISTANCE;
-	else
-		return REMOTE_DISTANCE;
-}
-
-void __init pcpu_populate_pte(unsigned long addr)
-{
-	populate_kernel_pte(addr);
-}
-
-void __init setup_per_cpu_areas(void)
-{
-	unsigned long delta;
-	unsigned int cpu;
-	int rc = -EINVAL;
-
-	if (pcpu_chosen_fc == PCPU_FC_AUTO) {
-		if (nr_node_ids >= 8)
-			pcpu_chosen_fc = PCPU_FC_PAGE;
-		else
-			pcpu_chosen_fc = PCPU_FC_EMBED;
-	}
-
-	/*
-	 * Always reserve area for module percpu variables.  That's
-	 * what the legacy allocator did.
-	 */
-	if (pcpu_chosen_fc != PCPU_FC_PAGE) {
-		rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
-					    PERCPU_DYNAMIC_RESERVE, PMD_SIZE,
-					    pcpu_cpu_distance, pcpu_cpu_to_node);
-		if (rc < 0)
-			pr_warn("%s allocator failed (%d), falling back to page size\n",
-				pcpu_fc_names[pcpu_chosen_fc], rc);
-	}
-	if (rc < 0)
-		rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, pcpu_cpu_to_node);
-	if (rc < 0)
-		panic("cannot initialize percpu area (err=%d)", rc);
-
-	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
-	for_each_possible_cpu(cpu)
-		__per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
-}
-#endif
-
 /*
  * Get nodeid by logical cpu number.
  * __cpuid_to_node maps phyical cpu id to node, so we
diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
index aabee0b280fe..88b9c6b68d1e 100644
--- a/arch/loongarch/kernel/smp.c
+++ b/arch/loongarch/kernel/smp.c
@@ -29,6 +29,7 @@
 #include <asm/loongson.h>
 #include <asm/mmu_context.h>
 #include <asm/numa.h>
+#include <asm/pgalloc.h>
 #include <asm/processor.h>
 #include <asm/setup.h>
 #include <asm/time.h>
@@ -717,3 +718,61 @@ void flush_tlb_one(unsigned long vaddr)
 	on_each_cpu(flush_tlb_one_ipi, (void *)vaddr, 1);
 }
 EXPORT_SYMBOL(flush_tlb_one);
+
+#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
+unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
+EXPORT_SYMBOL(__per_cpu_offset);
+
+static int __init pcpu_cpu_to_node(int cpu)
+{
+	return early_cpu_to_node(cpu);
+}
+
+static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
+{
+	if (early_cpu_to_node(from) == early_cpu_to_node(to))
+		return LOCAL_DISTANCE;
+	else
+		return REMOTE_DISTANCE;
+}
+
+void __init pcpu_populate_pte(unsigned long addr)
+{
+	populate_kernel_pte(addr);
+}
+
+void __init setup_per_cpu_areas(void)
+{
+	unsigned long delta;
+	unsigned int cpu;
+	int rc = -EINVAL;
+
+	if (pcpu_chosen_fc == PCPU_FC_AUTO) {
+		if (nr_node_ids >= 8)
+			pcpu_chosen_fc = PCPU_FC_PAGE;
+		else
+			pcpu_chosen_fc = PCPU_FC_EMBED;
+	}
+
+	/*
+	 * Always reserve area for module percpu variables.  That's
+	 * what the legacy allocator did.
+	 */
+	if (pcpu_chosen_fc != PCPU_FC_PAGE) {
+		rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
+					    PERCPU_DYNAMIC_RESERVE, PMD_SIZE,
+					    pcpu_cpu_distance, pcpu_cpu_to_node);
+		if (rc < 0)
+			pr_warn("%s allocator failed (%d), falling back to page size\n",
+				pcpu_fc_names[pcpu_chosen_fc], rc);
+	}
+	if (rc < 0)
+		rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, pcpu_cpu_to_node);
+	if (rc < 0)
+		panic("cannot initialize percpu area (err=%d)", rc);
+
+	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
+	for_each_possible_cpu(cpu)
+		__per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
+}
+#endif
-- 
2.42.0


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH v3 2/4] LoongArch: Refactor get_acpi_id_for_cpu() related code
  2024-04-11  1:05 [PATCH v3 0/4] Give chance to build under !CONFIG_SMP for LoongArch Tiezhu Yang
  2024-04-11  1:05 ` [PATCH v3 1/4] LoongArch: Move CONFIG_HAVE_SETUP_PER_CPU_AREA related code to smp.c Tiezhu Yang
@ 2024-04-11  1:05 ` Tiezhu Yang
  2024-04-12  4:17   ` Huacai Chen
  2024-04-11  1:05 ` [PATCH v3 3/4] LoongArch: Save and restore PERCPU_BASE_KS for ACPI S3 state Tiezhu Yang
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 17+ messages in thread
From: Tiezhu Yang @ 2024-04-11  1:05 UTC (permalink / raw
  To: Huacai Chen, Thomas Gleixner, Arnd Bergmann, Marc Zyngier
  Cc: loongarch, linux-kernel, loongson-kernel

Currently, cpu_logical_map(cpu) is defined as __cpu_logical_map[cpu]
in arch/loongarch/include/asm/smp.h and __cpu_logical_map[] is defined
in arch/loongarch/kernel/smp.c, that is to say, cpu_logical_map(cpu) is
vaild only under CONFIG_SMP, the implementation of get_acpi_id_for_cpu()
which calls cpu_logical_map(cpu) is not suitable for the case of non-SMP,
so refactor get_acpi_id_for_cpu() related code to make it work well for
both SMP and non-SMP.

Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
---
 arch/loongarch/include/asm/acpi.h | 7 ++++++-
 arch/loongarch/kernel/acpi.c      | 9 ++++++++-
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/arch/loongarch/include/asm/acpi.h b/arch/loongarch/include/asm/acpi.h
index 49e29b29996f..a4ad3f75bd60 100644
--- a/arch/loongarch/include/asm/acpi.h
+++ b/arch/loongarch/include/asm/acpi.h
@@ -39,9 +39,14 @@ extern struct acpi_madt_core_pic acpi_core_pic[MAX_CORE_PIC];
 
 extern int __init parse_acpi_topology(void);
 
+static inline struct acpi_madt_core_pic *acpi_cpu_get_core_pic(int cpu)
+{
+	return &acpi_core_pic[cpu];
+}
+
 static inline u32 get_acpi_id_for_cpu(unsigned int cpu)
 {
-	return acpi_core_pic[cpu_logical_map(cpu)].processor_id;
+	return acpi_cpu_get_core_pic(cpu)->processor_id;
 }
 
 #endif /* !CONFIG_ACPI */
diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c
index 5cf59c617126..ccfa90faf0ea 100644
--- a/arch/loongarch/kernel/acpi.c
+++ b/arch/loongarch/kernel/acpi.c
@@ -98,8 +98,15 @@ acpi_parse_processor(union acpi_subtable_headers *header, const unsigned long en
 		return -EINVAL;
 
 	acpi_table_print_madt_entry(&header->common);
+
+	/*
+	 * When CONFIG_SMP is disabled, mapping won't be created for all cpus.
+	 * CPUs more than num_possible_cpus will be ignored.
+	 */
+	if (processor->core_id >= 0 && processor->core_id < num_possible_cpus())
+		acpi_core_pic[processor->core_id] = *processor;
+
 #ifdef CONFIG_SMP
-	acpi_core_pic[processor->core_id] = *processor;
 	set_processor_mask(processor->core_id, processor->flags);
 #endif
 
-- 
2.42.0


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH v3 3/4] LoongArch: Save and restore PERCPU_BASE_KS for ACPI S3 state
  2024-04-11  1:05 [PATCH v3 0/4] Give chance to build under !CONFIG_SMP for LoongArch Tiezhu Yang
  2024-04-11  1:05 ` [PATCH v3 1/4] LoongArch: Move CONFIG_HAVE_SETUP_PER_CPU_AREA related code to smp.c Tiezhu Yang
  2024-04-11  1:05 ` [PATCH v3 2/4] LoongArch: Refactor get_acpi_id_for_cpu() related code Tiezhu Yang
@ 2024-04-11  1:05 ` Tiezhu Yang
  2024-04-12  4:18   ` Huacai Chen
  2024-04-11  1:05 ` [PATCH v3 4/4] LoongArch: Give chance to build under !CONFIG_SMP Tiezhu Yang
  2024-04-11  4:26 ` [PATCH v3 0/4] Give chance to build under !CONFIG_SMP for LoongArch Huacai Chen
  4 siblings, 1 reply; 17+ messages in thread
From: Tiezhu Yang @ 2024-04-11  1:05 UTC (permalink / raw
  To: Huacai Chen, Thomas Gleixner, Arnd Bergmann, Marc Zyngier
  Cc: loongarch, linux-kernel, loongson-kernel

Currently, per_cpu_offset(x) is defined as __per_cpu_offset[x])
only under CONFIG_SMP in include/asm-generic/percpu.h, that is
to say, the implementation of loongarch_common_resume() which
calls per_cpu_offset(0) is not suitable for the case of non-SMP,
so do not write per_cpu_offset(0) to PERCPU_BASE_KS when resume,
just save the value of PERCPU_BASE_KS when suspend and restore
it when resume to make it work well for both SMP and non-SMP.

Tested with the command "rtcwake -d rtc1 -s 20 -m mem -v", dmesg
shows that "PM: suspend entry (deep)" and "PM: suspend exit".

Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
---
 arch/loongarch/power/suspend.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/loongarch/power/suspend.c b/arch/loongarch/power/suspend.c
index 166d9e06a64b..7a97949ddade 100644
--- a/arch/loongarch/power/suspend.c
+++ b/arch/loongarch/power/suspend.c
@@ -16,6 +16,7 @@
 #include <asm/tlbflush.h>
 
 u64 loongarch_suspend_addr;
+static u64 saved_pcpu_base;
 
 struct saved_registers {
 	u32 ecfg;
@@ -36,6 +37,7 @@ void loongarch_common_suspend(void)
 	saved_regs.pwctl1 = csr_read32(LOONGARCH_CSR_PWCTL1);
 	saved_regs.ecfg = csr_read32(LOONGARCH_CSR_ECFG);
 	saved_regs.euen = csr_read32(LOONGARCH_CSR_EUEN);
+	saved_pcpu_base = csr_read64(PERCPU_BASE_KS);
 
 	loongarch_suspend_addr = loongson_sysconf.suspend_addr;
 }
@@ -44,7 +46,7 @@ void loongarch_common_resume(void)
 {
 	sync_counter();
 	local_flush_tlb_all();
-	csr_write64(per_cpu_offset(0), PERCPU_BASE_KS);
+	csr_write64(saved_pcpu_base, PERCPU_BASE_KS);
 	csr_write64(eentry, LOONGARCH_CSR_EENTRY);
 	csr_write64(eentry, LOONGARCH_CSR_MERRENTRY);
 	csr_write64(tlbrentry, LOONGARCH_CSR_TLBRENTRY);
-- 
2.42.0


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH v3 4/4] LoongArch: Give chance to build under !CONFIG_SMP
  2024-04-11  1:05 [PATCH v3 0/4] Give chance to build under !CONFIG_SMP for LoongArch Tiezhu Yang
                   ` (2 preceding siblings ...)
  2024-04-11  1:05 ` [PATCH v3 3/4] LoongArch: Save and restore PERCPU_BASE_KS for ACPI S3 state Tiezhu Yang
@ 2024-04-11  1:05 ` Tiezhu Yang
  2024-04-11  4:26 ` [PATCH v3 0/4] Give chance to build under !CONFIG_SMP for LoongArch Huacai Chen
  4 siblings, 0 replies; 17+ messages in thread
From: Tiezhu Yang @ 2024-04-11  1:05 UTC (permalink / raw
  To: Huacai Chen, Thomas Gleixner, Arnd Bergmann, Marc Zyngier
  Cc: loongarch, linux-kernel, loongson-kernel

In the current code, SMP is selected in Kconfig for LoongArch, the users
can not unset it, this is reasonable for a multiprocessor machine. But as
the help info of config SMP said, if you have a system with only one CPU,
say N. On a uniprocessor machine, the kernel will run faster if you say N
here.

The Loongson-2K0500 is a single-core CPU for applications like industrial
control, printing terminals, and BMC (Baseboard Management Controller),
there are many development boards, products and solutions on the market,
so it is better and necessary to give a chance to build under !CONFIG_SMP
for a uniprocessor machine.

First of all, do not select SMP for config LOONGARCH in Kconfig to make it
possible to unset CONFIG_SMP. Then, do some changes to fix the warnings and
errors if CONFIG_SMP is not set.

(1) Define get_ipi_irq() only if CONFIG_SMP is set to fix the warning:
arch/loongarch/kernel/irq.c:90:19: warning: 'get_ipi_irq' defined but not used [-Wunused-function]

(2) Add "#ifdef CONFIG_SMP" in asm/smp.h to fix the warning:
./arch/loongarch/include/asm/smp.h:49:9: warning: "raw_smp_processor_id" redefined
   49 | #define raw_smp_processor_id raw_smp_processor_id
      |         ^~~~~~~~~~~~~~~~~~~~
./include/linux/smp.h:198:9: note: this is the location of the previous definition
  198 | #define raw_smp_processor_id()                  0

(3) Define machine_shutdown() as empty under !CONFIG_SMP to fix the error:
arch/loongarch/kernel/machine_kexec.c: In function 'machine_shutdown':
arch/loongarch/kernel/machine_kexec.c:233:25: error: implicit declaration of function 'cpu_device_up'; did you mean 'put_device'? [-Wimplicit-function-declaration]

(4) Make config SCHED_SMT depends on SMP to fix many errors such as:
kernel/sched/core.c: In function 'sched_core_find':
kernel/sched/core.c:310:43: error: 'struct rq' has no member named 'cpu'

(5) Define cpu_logical_map(cpu) as read_csr_cpuid() under !CONFIG_SMP
in asm/smp.h and then include asm/smp.h in asm/acpi.h (because acpi.h
is included in linux/irq.h indirectly) to fix many build errors under
drivers/irqchip such as:
drivers/irqchip/irq-loongson-eiointc.c: In function 'cpu_to_eio_node':
drivers/irqchip/irq-loongson-eiointc.c:59:16: error: implicit declaration of function 'cpu_logical_map' [-Wimplicit-function-declaration]

When running the UnixBench tests with "-c 1" single-streamed pass,
the improvement in performance is about 9 percent with this patch.

By the way, it is helpful to debug and analysis the kernel issue
of multi-core system under !CONFIG_SMP.

Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
---
 arch/loongarch/Kconfig                | 2 +-
 arch/loongarch/include/asm/acpi.h     | 1 +
 arch/loongarch/include/asm/smp.h      | 5 +++++
 arch/loongarch/kernel/irq.c           | 2 ++
 arch/loongarch/kernel/machine_kexec.c | 2 +-
 5 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 64052ae2c2af..8946945daf34 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -174,7 +174,6 @@ config LOONGARCH
 	select PCI_QUIRKS
 	select PERF_USE_VMALLOC
 	select RTC_LIB
-	select SMP
 	select SPARSE_IRQ
 	select SYSCTL_ARCH_UNALIGN_ALLOW
 	select SYSCTL_ARCH_UNALIGN_NO_WARN
@@ -420,6 +419,7 @@ config EFI_STUB
 
 config SCHED_SMT
 	bool "SMT scheduler support"
+	depends on SMP
 	default y
 	help
 	  Improves scheduler's performance when there are multiple
diff --git a/arch/loongarch/include/asm/acpi.h b/arch/loongarch/include/asm/acpi.h
index a4ad3f75bd60..5174063ad08e 100644
--- a/arch/loongarch/include/asm/acpi.h
+++ b/arch/loongarch/include/asm/acpi.h
@@ -8,6 +8,7 @@
 #ifndef _ASM_LOONGARCH_ACPI_H
 #define _ASM_LOONGARCH_ACPI_H
 
+#include <asm/smp.h>
 #include <asm/suspend.h>
 
 #ifdef CONFIG_ACPI
diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h
index f81e5f01d619..1ec11b57e60c 100644
--- a/arch/loongarch/include/asm/smp.h
+++ b/arch/loongarch/include/asm/smp.h
@@ -6,6 +6,7 @@
 #ifndef __ASM_SMP_H
 #define __ASM_SMP_H
 
+#ifdef CONFIG_SMP
 #include <linux/atomic.h>
 #include <linux/bitops.h>
 #include <linux/linkage.h>
@@ -101,4 +102,8 @@ static inline void __cpu_die(unsigned int cpu)
 }
 #endif
 
+#else /* !CONFIG_SMP */
+#define cpu_logical_map(cpu)	read_csr_cpuid()
+#endif /* CONFIG_SMP */
+
 #endif /* __ASM_SMP_H */
diff --git a/arch/loongarch/kernel/irq.c b/arch/loongarch/kernel/irq.c
index 883e5066ae44..e791fa275ec5 100644
--- a/arch/loongarch/kernel/irq.c
+++ b/arch/loongarch/kernel/irq.c
@@ -87,6 +87,7 @@ static void __init init_vec_parent_group(void)
 	acpi_table_parse(ACPI_SIG_MCFG, early_pci_mcfg_parse);
 }
 
+#ifdef CONFIG_SMP
 static int __init get_ipi_irq(void)
 {
 	struct irq_domain *d = irq_find_matching_fwnode(cpuintc_handle, DOMAIN_BUS_ANY);
@@ -96,6 +97,7 @@ static int __init get_ipi_irq(void)
 
 	return -EINVAL;
 }
+#endif
 
 void __init init_IRQ(void)
 {
diff --git a/arch/loongarch/kernel/machine_kexec.c b/arch/loongarch/kernel/machine_kexec.c
index 2dcb9e003657..8ae641dc53bb 100644
--- a/arch/loongarch/kernel/machine_kexec.c
+++ b/arch/loongarch/kernel/machine_kexec.c
@@ -225,6 +225,7 @@ void crash_smp_send_stop(void)
 
 void machine_shutdown(void)
 {
+#ifdef CONFIG_SMP
 	int cpu;
 
 	/* All CPUs go to reboot_code_buffer */
@@ -232,7 +233,6 @@ void machine_shutdown(void)
 		if (!cpu_online(cpu))
 			cpu_device_up(get_cpu_device(cpu));
 
-#ifdef CONFIG_SMP
 	smp_call_function(kexec_shutdown_secondary, NULL, 0);
 #endif
 }
-- 
2.42.0


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH v3 0/4] Give chance to build under !CONFIG_SMP for LoongArch
  2024-04-11  1:05 [PATCH v3 0/4] Give chance to build under !CONFIG_SMP for LoongArch Tiezhu Yang
                   ` (3 preceding siblings ...)
  2024-04-11  1:05 ` [PATCH v3 4/4] LoongArch: Give chance to build under !CONFIG_SMP Tiezhu Yang
@ 2024-04-11  4:26 ` Huacai Chen
  2024-04-11  6:06   ` Arnd Bergmann
  4 siblings, 1 reply; 17+ messages in thread
From: Huacai Chen @ 2024-04-11  4:26 UTC (permalink / raw
  To: Marc Zyngier, Arnd Bergmann, Tiezhu Yang
  Cc: Thomas Gleixner, loongarch, linux-kernel, loongson-kernel

Hi, Marc and Arnd,

I remember that you both suggested not introducing NOSMP support for a
modern architecture which brings additional complexity. I wonder if
you still have the same attitude now. I will merge this series only if
you think it is worthy to introduce NOSMP now.

Huacai

On Thu, Apr 11, 2024 at 9:05 AM Tiezhu Yang <yangtiezhu@loongson.cn> wrote:
>
> The changes of irqchip have been merged into the irq/core branch of tip.
>
> https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git/commit/?id=42a7d887664b
> https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git/commit/?id=a64003da0ef8
>
> This version is only related with arch/loongarch and based on 6.9-rc3,
> the first 3 patches with detailed commit message are preparations for
> patch #4.
>
> Tested with the following configs:
> (1) CONFIG_NUMA=n, CONFIG_SMP=n
> (2) CONFIG_NUMA=n, CONFIG_SMP=y
> (3) CONFIG_NUMA=y, CONFIG_SMP=n (not allowed due to NUMA select SMP)
> (4) CONFIG_NUMA=y, CONFIG_SMP=y
>
> Tiezhu Yang (4):
>   LoongArch: Move CONFIG_HAVE_SETUP_PER_CPU_AREA related code to smp.c
>   LoongArch: Refactor get_acpi_id_for_cpu() related code
>   LoongArch: Save and restore PERCPU_BASE_KS for ACPI S3 state
>   LoongArch: Give chance to build under !CONFIG_SMP
>
>  arch/loongarch/Kconfig                |  4 +-
>  arch/loongarch/include/asm/acpi.h     |  8 +++-
>  arch/loongarch/include/asm/smp.h      |  5 +++
>  arch/loongarch/kernel/acpi.c          |  9 +++-
>  arch/loongarch/kernel/irq.c           |  2 +
>  arch/loongarch/kernel/machine_kexec.c |  2 +-
>  arch/loongarch/kernel/numa.c          | 58 --------------------------
>  arch/loongarch/kernel/smp.c           | 59 +++++++++++++++++++++++++++
>  arch/loongarch/power/suspend.c        |  4 +-
>  9 files changed, 87 insertions(+), 64 deletions(-)
>
> --
> 2.42.0
>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v3 0/4] Give chance to build under !CONFIG_SMP for LoongArch
  2024-04-11  4:26 ` [PATCH v3 0/4] Give chance to build under !CONFIG_SMP for LoongArch Huacai Chen
@ 2024-04-11  6:06   ` Arnd Bergmann
  2024-04-12  4:00     ` Huacai Chen
  2024-04-18 17:44     ` Xi Ruoyao
  0 siblings, 2 replies; 17+ messages in thread
From: Arnd Bergmann @ 2024-04-11  6:06 UTC (permalink / raw
  To: Huacai Chen, Marc Zyngier, Tiezhu Yang
  Cc: Thomas Gleixner, loongarch, linux-kernel, loongson-kernel

On Thu, Apr 11, 2024, at 06:26, Huacai Chen wrote:
>
> I remember that you both suggested not introducing NOSMP support for a
> modern architecture which brings additional complexity. I wonder if
> you still have the same attitude now. I will merge this series only if
> you think it is worthy to introduce NOSMP now.

It's an interesting question, as we have recently discussed two
opposite ideas and may end up doing both (or possible neither)
in the future:

- On x86, there is no real reason to need non-SMP kernels as the
  memory savings are fairly small, and it tends to break because
  of lack of users testing it.

- On arm64, we have never supported non-SMP kernels, but I have
  looked at possibly adding this because there are still popular
  ARM based system-in-package products with less than 128MB of
  built-in RAM and only a single CPU. As these are moving from
  32-bit to 64-bit cores, it becomes more interesting to build
  a 64-bit UP kernel and save multiple megabytes.

I think loongarch64 is in the same place as arm64 and riscv64
(which does allow non-SMP builds) here, and there are good
reasons to allow it on all of them now, even if we previously
never had a need for it.

That said, both the 9% observed performance improvements that
Tiezhu Yang reported, and the memory savings that I saw are
probably higher than they should be, so we may also want to
investigate that further to see if we can improve the SMP
kernel to better support UP runs.

    Arnd

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v3 0/4] Give chance to build under !CONFIG_SMP for LoongArch
  2024-04-11  6:06   ` Arnd Bergmann
@ 2024-04-12  4:00     ` Huacai Chen
  2024-04-18 17:44     ` Xi Ruoyao
  1 sibling, 0 replies; 17+ messages in thread
From: Huacai Chen @ 2024-04-12  4:00 UTC (permalink / raw
  To: Arnd Bergmann
  Cc: Marc Zyngier, Tiezhu Yang, Thomas Gleixner, loongarch,
	linux-kernel, loongson-kernel

Hi, Arnd,

On Thu, Apr 11, 2024 at 2:07 PM Arnd Bergmann <arnd@arndb.de> wrote:
>
> On Thu, Apr 11, 2024, at 06:26, Huacai Chen wrote:
> >
> > I remember that you both suggested not introducing NOSMP support for a
> > modern architecture which brings additional complexity. I wonder if
> > you still have the same attitude now. I will merge this series only if
> > you think it is worthy to introduce NOSMP now.
>
> It's an interesting question, as we have recently discussed two
> opposite ideas and may end up doing both (or possible neither)
> in the future:
>
> - On x86, there is no real reason to need non-SMP kernels as the
>   memory savings are fairly small, and it tends to break because
>   of lack of users testing it.
>
> - On arm64, we have never supported non-SMP kernels, but I have
>   looked at possibly adding this because there are still popular
>   ARM based system-in-package products with less than 128MB of
>   built-in RAM and only a single CPU. As these are moving from
>   32-bit to 64-bit cores, it becomes more interesting to build
>   a 64-bit UP kernel and save multiple megabytes.
>
> I think loongarch64 is in the same place as arm64 and riscv64
> (which does allow non-SMP builds) here, and there are good
> reasons to allow it on all of them now, even if we previously
> never had a need for it.
OK, thanks. This means you agree to support non-SMP on LoongArch now,
then I will review this series carefully.

Huacai
>
> That said, both the 9% observed performance improvements that
> Tiezhu Yang reported, and the memory savings that I saw are
> probably higher than they should be, so we may also want to
> investigate that further to see if we can improve the SMP
> kernel to better support UP runs.
>
>     Arnd
>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v3 1/4] LoongArch: Move CONFIG_HAVE_SETUP_PER_CPU_AREA related code to smp.c
  2024-04-11  1:05 ` [PATCH v3 1/4] LoongArch: Move CONFIG_HAVE_SETUP_PER_CPU_AREA related code to smp.c Tiezhu Yang
@ 2024-04-12  4:12   ` Huacai Chen
  2024-04-12  9:27     ` Tiezhu Yang
  0 siblings, 1 reply; 17+ messages in thread
From: Huacai Chen @ 2024-04-12  4:12 UTC (permalink / raw
  To: Tiezhu Yang
  Cc: Thomas Gleixner, Arnd Bergmann, Marc Zyngier, loongarch,
	linux-kernel, loongson-kernel

On Thu, Apr 11, 2024 at 9:05 AM Tiezhu Yang <yangtiezhu@loongson.cn> wrote:
>
> Currently, if CONFIG_NUMA is not set but CONFIG_SMP is set, the arch
> specified setup_per_cpu_areas() in arch/loongarch/kernel/numa.c will
> not be built and the generic setup_per_cpu_areas() in mm/percpu.c is
> actually used, this is not reasonable and does not work as intended.
Why is the generic version not reasonable? We need a custom version
just because it can put the percpu variable in the best node. If NUMA
disabled, software can only see one node, how to optimize?

Huacai

>
> Let us select HAVE_SETUP_PER_CPU_AREA unconditionally and then move
> CONFIG_HAVE_SETUP_PER_CPU_AREA related code from numa.c to smp.c to
> avoid this problem.
>
> Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
> ---
>  arch/loongarch/Kconfig       |  2 +-
>  arch/loongarch/kernel/numa.c | 58 -----------------------------------
>  arch/loongarch/kernel/smp.c  | 59 ++++++++++++++++++++++++++++++++++++
>  3 files changed, 60 insertions(+), 59 deletions(-)
>
> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> index a5f300ec6f28..64052ae2c2af 100644
> --- a/arch/loongarch/Kconfig
> +++ b/arch/loongarch/Kconfig
> @@ -151,7 +151,7 @@ config LOONGARCH
>         select HAVE_RUST
>         select HAVE_SAMPLE_FTRACE_DIRECT
>         select HAVE_SAMPLE_FTRACE_DIRECT_MULTI
> -       select HAVE_SETUP_PER_CPU_AREA if NUMA
> +       select HAVE_SETUP_PER_CPU_AREA
>         select HAVE_STACK_VALIDATION if HAVE_OBJTOOL
>         select HAVE_STACKPROTECTOR
>         select HAVE_SYSCALL_TRACEPOINTS
> diff --git a/arch/loongarch/kernel/numa.c b/arch/loongarch/kernel/numa.c
> index 8fe21f868f72..49dc1d932ce2 100644
> --- a/arch/loongarch/kernel/numa.c
> +++ b/arch/loongarch/kernel/numa.c
> @@ -48,64 +48,6 @@ EXPORT_SYMBOL(__cpuid_to_node);
>
>  nodemask_t numa_nodes_parsed __initdata;
>
> -#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
> -unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
> -EXPORT_SYMBOL(__per_cpu_offset);
> -
> -static int __init pcpu_cpu_to_node(int cpu)
> -{
> -       return early_cpu_to_node(cpu);
> -}
> -
> -static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
> -{
> -       if (early_cpu_to_node(from) == early_cpu_to_node(to))
> -               return LOCAL_DISTANCE;
> -       else
> -               return REMOTE_DISTANCE;
> -}
> -
> -void __init pcpu_populate_pte(unsigned long addr)
> -{
> -       populate_kernel_pte(addr);
> -}
> -
> -void __init setup_per_cpu_areas(void)
> -{
> -       unsigned long delta;
> -       unsigned int cpu;
> -       int rc = -EINVAL;
> -
> -       if (pcpu_chosen_fc == PCPU_FC_AUTO) {
> -               if (nr_node_ids >= 8)
> -                       pcpu_chosen_fc = PCPU_FC_PAGE;
> -               else
> -                       pcpu_chosen_fc = PCPU_FC_EMBED;
> -       }
> -
> -       /*
> -        * Always reserve area for module percpu variables.  That's
> -        * what the legacy allocator did.
> -        */
> -       if (pcpu_chosen_fc != PCPU_FC_PAGE) {
> -               rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
> -                                           PERCPU_DYNAMIC_RESERVE, PMD_SIZE,
> -                                           pcpu_cpu_distance, pcpu_cpu_to_node);
> -               if (rc < 0)
> -                       pr_warn("%s allocator failed (%d), falling back to page size\n",
> -                               pcpu_fc_names[pcpu_chosen_fc], rc);
> -       }
> -       if (rc < 0)
> -               rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, pcpu_cpu_to_node);
> -       if (rc < 0)
> -               panic("cannot initialize percpu area (err=%d)", rc);
> -
> -       delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
> -       for_each_possible_cpu(cpu)
> -               __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
> -}
> -#endif
> -
>  /*
>   * Get nodeid by logical cpu number.
>   * __cpuid_to_node maps phyical cpu id to node, so we
> diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
> index aabee0b280fe..88b9c6b68d1e 100644
> --- a/arch/loongarch/kernel/smp.c
> +++ b/arch/loongarch/kernel/smp.c
> @@ -29,6 +29,7 @@
>  #include <asm/loongson.h>
>  #include <asm/mmu_context.h>
>  #include <asm/numa.h>
> +#include <asm/pgalloc.h>
>  #include <asm/processor.h>
>  #include <asm/setup.h>
>  #include <asm/time.h>
> @@ -717,3 +718,61 @@ void flush_tlb_one(unsigned long vaddr)
>         on_each_cpu(flush_tlb_one_ipi, (void *)vaddr, 1);
>  }
>  EXPORT_SYMBOL(flush_tlb_one);
> +
> +#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
> +unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
> +EXPORT_SYMBOL(__per_cpu_offset);
> +
> +static int __init pcpu_cpu_to_node(int cpu)
> +{
> +       return early_cpu_to_node(cpu);
> +}
> +
> +static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
> +{
> +       if (early_cpu_to_node(from) == early_cpu_to_node(to))
> +               return LOCAL_DISTANCE;
> +       else
> +               return REMOTE_DISTANCE;
> +}
> +
> +void __init pcpu_populate_pte(unsigned long addr)
> +{
> +       populate_kernel_pte(addr);
> +}
> +
> +void __init setup_per_cpu_areas(void)
> +{
> +       unsigned long delta;
> +       unsigned int cpu;
> +       int rc = -EINVAL;
> +
> +       if (pcpu_chosen_fc == PCPU_FC_AUTO) {
> +               if (nr_node_ids >= 8)
> +                       pcpu_chosen_fc = PCPU_FC_PAGE;
> +               else
> +                       pcpu_chosen_fc = PCPU_FC_EMBED;
> +       }
> +
> +       /*
> +        * Always reserve area for module percpu variables.  That's
> +        * what the legacy allocator did.
> +        */
> +       if (pcpu_chosen_fc != PCPU_FC_PAGE) {
> +               rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
> +                                           PERCPU_DYNAMIC_RESERVE, PMD_SIZE,
> +                                           pcpu_cpu_distance, pcpu_cpu_to_node);
> +               if (rc < 0)
> +                       pr_warn("%s allocator failed (%d), falling back to page size\n",
> +                               pcpu_fc_names[pcpu_chosen_fc], rc);
> +       }
> +       if (rc < 0)
> +               rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, pcpu_cpu_to_node);
> +       if (rc < 0)
> +               panic("cannot initialize percpu area (err=%d)", rc);
> +
> +       delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
> +       for_each_possible_cpu(cpu)
> +               __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
> +}
> +#endif
> --
> 2.42.0
>
>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v3 2/4] LoongArch: Refactor get_acpi_id_for_cpu() related code
  2024-04-11  1:05 ` [PATCH v3 2/4] LoongArch: Refactor get_acpi_id_for_cpu() related code Tiezhu Yang
@ 2024-04-12  4:17   ` Huacai Chen
  2024-04-12  9:28     ` Tiezhu Yang
  0 siblings, 1 reply; 17+ messages in thread
From: Huacai Chen @ 2024-04-12  4:17 UTC (permalink / raw
  To: Tiezhu Yang
  Cc: Thomas Gleixner, Arnd Bergmann, Marc Zyngier, loongarch,
	linux-kernel, loongson-kernel

Hi, Tiezhu,

On Thu, Apr 11, 2024 at 9:05 AM Tiezhu Yang <yangtiezhu@loongson.cn> wrote:
>
> Currently, cpu_logical_map(cpu) is defined as __cpu_logical_map[cpu]
> in arch/loongarch/include/asm/smp.h and __cpu_logical_map[] is defined
> in arch/loongarch/kernel/smp.c, that is to say, cpu_logical_map(cpu) is
> vaild only under CONFIG_SMP, the implementation of get_acpi_id_for_cpu()
> which calls cpu_logical_map(cpu) is not suitable for the case of non-SMP,
> so refactor get_acpi_id_for_cpu() related code to make it work well for
> both SMP and non-SMP.
But you implement cpu_logical_map(cpu) for non-SMP in the 4th patch, right?

Huacai
>
> Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
> ---
>  arch/loongarch/include/asm/acpi.h | 7 ++++++-
>  arch/loongarch/kernel/acpi.c      | 9 ++++++++-
>  2 files changed, 14 insertions(+), 2 deletions(-)
>
> diff --git a/arch/loongarch/include/asm/acpi.h b/arch/loongarch/include/asm/acpi.h
> index 49e29b29996f..a4ad3f75bd60 100644
> --- a/arch/loongarch/include/asm/acpi.h
> +++ b/arch/loongarch/include/asm/acpi.h
> @@ -39,9 +39,14 @@ extern struct acpi_madt_core_pic acpi_core_pic[MAX_CORE_PIC];
>
>  extern int __init parse_acpi_topology(void);
>
> +static inline struct acpi_madt_core_pic *acpi_cpu_get_core_pic(int cpu)
> +{
> +       return &acpi_core_pic[cpu];
> +}
> +
>  static inline u32 get_acpi_id_for_cpu(unsigned int cpu)
>  {
> -       return acpi_core_pic[cpu_logical_map(cpu)].processor_id;
> +       return acpi_cpu_get_core_pic(cpu)->processor_id;
>  }
>
>  #endif /* !CONFIG_ACPI */
> diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c
> index 5cf59c617126..ccfa90faf0ea 100644
> --- a/arch/loongarch/kernel/acpi.c
> +++ b/arch/loongarch/kernel/acpi.c
> @@ -98,8 +98,15 @@ acpi_parse_processor(union acpi_subtable_headers *header, const unsigned long en
>                 return -EINVAL;
>
>         acpi_table_print_madt_entry(&header->common);
> +
> +       /*
> +        * When CONFIG_SMP is disabled, mapping won't be created for all cpus.
> +        * CPUs more than num_possible_cpus will be ignored.
> +        */
> +       if (processor->core_id >= 0 && processor->core_id < num_possible_cpus())
> +               acpi_core_pic[processor->core_id] = *processor;
> +
>  #ifdef CONFIG_SMP
> -       acpi_core_pic[processor->core_id] = *processor;
>         set_processor_mask(processor->core_id, processor->flags);
>  #endif
>
> --
> 2.42.0
>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v3 3/4] LoongArch: Save and restore PERCPU_BASE_KS for ACPI S3 state
  2024-04-11  1:05 ` [PATCH v3 3/4] LoongArch: Save and restore PERCPU_BASE_KS for ACPI S3 state Tiezhu Yang
@ 2024-04-12  4:18   ` Huacai Chen
  2024-04-12  9:30     ` Tiezhu Yang
  0 siblings, 1 reply; 17+ messages in thread
From: Huacai Chen @ 2024-04-12  4:18 UTC (permalink / raw
  To: Tiezhu Yang
  Cc: Thomas Gleixner, Arnd Bergmann, Marc Zyngier, loongarch,
	linux-kernel, loongson-kernel

Hi, Tiezhu,

On Thu, Apr 11, 2024 at 9:05 AM Tiezhu Yang <yangtiezhu@loongson.cn> wrote:
>
> Currently, per_cpu_offset(x) is defined as __per_cpu_offset[x])
> only under CONFIG_SMP in include/asm-generic/percpu.h, that is
> to say, the implementation of loongarch_common_resume() which
> calls per_cpu_offset(0) is not suitable for the case of non-SMP,
> so do not write per_cpu_offset(0) to PERCPU_BASE_KS when resume,
> just save the value of PERCPU_BASE_KS when suspend and restore
> it when resume to make it work well for both SMP and non-SMP.
For non-SMP you need PERCPU_BASE_KS to do what?

Huacai

>
> Tested with the command "rtcwake -d rtc1 -s 20 -m mem -v", dmesg
> shows that "PM: suspend entry (deep)" and "PM: suspend exit".
>
> Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
> ---
>  arch/loongarch/power/suspend.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/arch/loongarch/power/suspend.c b/arch/loongarch/power/suspend.c
> index 166d9e06a64b..7a97949ddade 100644
> --- a/arch/loongarch/power/suspend.c
> +++ b/arch/loongarch/power/suspend.c
> @@ -16,6 +16,7 @@
>  #include <asm/tlbflush.h>
>
>  u64 loongarch_suspend_addr;
> +static u64 saved_pcpu_base;
>
>  struct saved_registers {
>         u32 ecfg;
> @@ -36,6 +37,7 @@ void loongarch_common_suspend(void)
>         saved_regs.pwctl1 = csr_read32(LOONGARCH_CSR_PWCTL1);
>         saved_regs.ecfg = csr_read32(LOONGARCH_CSR_ECFG);
>         saved_regs.euen = csr_read32(LOONGARCH_CSR_EUEN);
> +       saved_pcpu_base = csr_read64(PERCPU_BASE_KS);
>
>         loongarch_suspend_addr = loongson_sysconf.suspend_addr;
>  }
> @@ -44,7 +46,7 @@ void loongarch_common_resume(void)
>  {
>         sync_counter();
>         local_flush_tlb_all();
> -       csr_write64(per_cpu_offset(0), PERCPU_BASE_KS);
> +       csr_write64(saved_pcpu_base, PERCPU_BASE_KS);
>         csr_write64(eentry, LOONGARCH_CSR_EENTRY);
>         csr_write64(eentry, LOONGARCH_CSR_MERRENTRY);
>         csr_write64(tlbrentry, LOONGARCH_CSR_TLBRENTRY);
> --
> 2.42.0
>
>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v3 1/4] LoongArch: Move CONFIG_HAVE_SETUP_PER_CPU_AREA related code to smp.c
  2024-04-12  4:12   ` Huacai Chen
@ 2024-04-12  9:27     ` Tiezhu Yang
  2024-04-13  7:20       ` Huacai Chen
  0 siblings, 1 reply; 17+ messages in thread
From: Tiezhu Yang @ 2024-04-12  9:27 UTC (permalink / raw
  To: Huacai Chen
  Cc: Thomas Gleixner, Arnd Bergmann, Marc Zyngier, loongarch,
	linux-kernel, loongson-kernel



On 04/12/2024 12:12 PM, Huacai Chen wrote:
> On Thu, Apr 11, 2024 at 9:05 AM Tiezhu Yang <yangtiezhu@loongson.cn> wrote:
>>
>> Currently, if CONFIG_NUMA is not set but CONFIG_SMP is set, the arch
>> specified setup_per_cpu_areas() in arch/loongarch/kernel/numa.c will
>> not be built and the generic setup_per_cpu_areas() in mm/percpu.c is
>> actually used, this is not reasonable and does not work as intended.
> Why is the generic version not reasonable? We need a custom version
> just because it can put the percpu variable in the best node. If NUMA
> disabled, software can only see one node, how to optimize?

The initial aim is to use the arch specific setup_per_cpu_areas()
in any case under CONFIG_SMP, this patch can be dropped if it is
meaningless for the case of !CONFIG_NUMA and CONFIG_SMP.

Thanks,
Tiezhu


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v3 2/4] LoongArch: Refactor get_acpi_id_for_cpu() related code
  2024-04-12  4:17   ` Huacai Chen
@ 2024-04-12  9:28     ` Tiezhu Yang
  0 siblings, 0 replies; 17+ messages in thread
From: Tiezhu Yang @ 2024-04-12  9:28 UTC (permalink / raw
  To: Huacai Chen
  Cc: Thomas Gleixner, Arnd Bergmann, Marc Zyngier, loongarch,
	linux-kernel, loongson-kernel



On 04/12/2024 12:17 PM, Huacai Chen wrote:
> Hi, Tiezhu,
>
> On Thu, Apr 11, 2024 at 9:05 AM Tiezhu Yang <yangtiezhu@loongson.cn> wrote:
>>
>> Currently, cpu_logical_map(cpu) is defined as __cpu_logical_map[cpu]
>> in arch/loongarch/include/asm/smp.h and __cpu_logical_map[] is defined
>> in arch/loongarch/kernel/smp.c, that is to say, cpu_logical_map(cpu) is
>> vaild only under CONFIG_SMP, the implementation of get_acpi_id_for_cpu()
>> which calls cpu_logical_map(cpu) is not suitable for the case of non-SMP,
>> so refactor get_acpi_id_for_cpu() related code to make it work well for
>> both SMP and non-SMP.
> But you implement cpu_logical_map(cpu) for non-SMP in the 4th patch, right?

The initial aim is to make get_acpi_id_for_cpu() only related with
acpi stuff and referenced the following riscv code, this patch can
be dropped if you think it is better to only define cpu_logical_map(cpu)
under !CONFIG_SMP, I am not sure whether it needs to keep the changes
in acpi.c at least.

static int acpi_parse_madt_rintc(union acpi_subtable_headers *header, 
const unsigned long end)
{
	struct acpi_madt_rintc *rintc = (struct acpi_madt_rintc *)header;
	int cpuid;

	if (!(rintc->flags & ACPI_MADT_ENABLED))
		return 0;

	cpuid = riscv_hartid_to_cpuid(rintc->hart_id);
	/*
	 * When CONFIG_SMP is disabled, mapping won't be created for
	 * all cpus.
	 * CPUs more than num_possible_cpus, will be ignored.
	 */
	if (cpuid >= 0 && cpuid < num_possible_cpus())
		cpu_madt_rintc[cpuid] = *rintc;

	return 0;
}

struct acpi_madt_rintc *acpi_cpu_get_madt_rintc(int cpu)
{
	return &cpu_madt_rintc[cpu];
}

u32 get_acpi_id_for_cpu(int cpu)
{
	return acpi_cpu_get_madt_rintc(cpu)->uid;
}

https://elixir.bootlin.com/linux/v6.9-rc3/source/arch/riscv/kernel/acpi.c#L194

Thanks,
Tiezhu


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v3 3/4] LoongArch: Save and restore PERCPU_BASE_KS for ACPI S3 state
  2024-04-12  4:18   ` Huacai Chen
@ 2024-04-12  9:30     ` Tiezhu Yang
  2024-04-13  7:22       ` Huacai Chen
  0 siblings, 1 reply; 17+ messages in thread
From: Tiezhu Yang @ 2024-04-12  9:30 UTC (permalink / raw
  To: Huacai Chen
  Cc: Thomas Gleixner, Arnd Bergmann, Marc Zyngier, loongarch,
	linux-kernel, loongson-kernel



On 04/12/2024 12:18 PM, Huacai Chen wrote:
> Hi, Tiezhu,
>
> On Thu, Apr 11, 2024 at 9:05 AM Tiezhu Yang <yangtiezhu@loongson.cn> wrote:
>>
>> Currently, per_cpu_offset(x) is defined as __per_cpu_offset[x])
>> only under CONFIG_SMP in include/asm-generic/percpu.h, that is
>> to say, the implementation of loongarch_common_resume() which
>> calls per_cpu_offset(0) is not suitable for the case of non-SMP,
>> so do not write per_cpu_offset(0) to PERCPU_BASE_KS when resume,
>> just save the value of PERCPU_BASE_KS when suspend and restore
>> it when resume to make it work well for both SMP and non-SMP.
> For non-SMP you need PERCPU_BASE_KS to do what?

The initial aim is to avoid build error under !CONFIG_SMP
and it works well on both !CONFIG_SMP and CONFIG_SMP, the
changes are similar with the code in hibernate.c.

An alternative way is to do the following simple change,
but it seems a little ugly due to the ifdef, let me know
what is your preference.

diff --git a/arch/loongarch/power/suspend.c b/arch/loongarch/power/suspend.c
index 166d9e06a64b..35191afefcda 100644
--- a/arch/loongarch/power/suspend.c
+++ b/arch/loongarch/power/suspend.c
@@ -44,7 +44,9 @@ void loongarch_common_resume(void)
  {
         sync_counter();
         local_flush_tlb_all();
+#ifdef
         csr_write64(per_cpu_offset(0), PERCPU_BASE_KS);
+#endif
         csr_write64(eentry, LOONGARCH_CSR_EENTRY);
         csr_write64(eentry, LOONGARCH_CSR_MERRENTRY);
         csr_write64(tlbrentry, LOONGARCH_CSR_TLBRENTRY);

Thanks,
Tiezhu


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH v3 1/4] LoongArch: Move CONFIG_HAVE_SETUP_PER_CPU_AREA related code to smp.c
  2024-04-12  9:27     ` Tiezhu Yang
@ 2024-04-13  7:20       ` Huacai Chen
  0 siblings, 0 replies; 17+ messages in thread
From: Huacai Chen @ 2024-04-13  7:20 UTC (permalink / raw
  To: Tiezhu Yang
  Cc: Thomas Gleixner, Arnd Bergmann, Marc Zyngier, loongarch,
	linux-kernel, loongson-kernel

On Fri, Apr 12, 2024 at 5:27 PM Tiezhu Yang <yangtiezhu@loongson.cn> wrote:
>
>
>
> On 04/12/2024 12:12 PM, Huacai Chen wrote:
> > On Thu, Apr 11, 2024 at 9:05 AM Tiezhu Yang <yangtiezhu@loongson.cn> wrote:
> >>
> >> Currently, if CONFIG_NUMA is not set but CONFIG_SMP is set, the arch
> >> specified setup_per_cpu_areas() in arch/loongarch/kernel/numa.c will
> >> not be built and the generic setup_per_cpu_areas() in mm/percpu.c is
> >> actually used, this is not reasonable and does not work as intended.
> > Why is the generic version not reasonable? We need a custom version
> > just because it can put the percpu variable in the best node. If NUMA
> > disabled, software can only see one node, how to optimize?
>
> The initial aim is to use the arch specific setup_per_cpu_areas()
> in any case under CONFIG_SMP, this patch can be dropped if it is
> meaningless for the case of !CONFIG_NUMA and CONFIG_SMP.
Yes, it is better to drop this patch.

Huacai
>
> Thanks,
> Tiezhu
>
>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v3 3/4] LoongArch: Save and restore PERCPU_BASE_KS for ACPI S3 state
  2024-04-12  9:30     ` Tiezhu Yang
@ 2024-04-13  7:22       ` Huacai Chen
  0 siblings, 0 replies; 17+ messages in thread
From: Huacai Chen @ 2024-04-13  7:22 UTC (permalink / raw
  To: Tiezhu Yang
  Cc: Thomas Gleixner, Arnd Bergmann, Marc Zyngier, loongarch,
	linux-kernel, loongson-kernel

On Fri, Apr 12, 2024 at 5:30 PM Tiezhu Yang <yangtiezhu@loongson.cn> wrote:
>
>
>
> On 04/12/2024 12:18 PM, Huacai Chen wrote:
> > Hi, Tiezhu,
> >
> > On Thu, Apr 11, 2024 at 9:05 AM Tiezhu Yang <yangtiezhu@loongson.cn> wrote:
> >>
> >> Currently, per_cpu_offset(x) is defined as __per_cpu_offset[x])
> >> only under CONFIG_SMP in include/asm-generic/percpu.h, that is
> >> to say, the implementation of loongarch_common_resume() which
> >> calls per_cpu_offset(0) is not suitable for the case of non-SMP,
> >> so do not write per_cpu_offset(0) to PERCPU_BASE_KS when resume,
> >> just save the value of PERCPU_BASE_KS when suspend and restore
> >> it when resume to make it work well for both SMP and non-SMP.
> > For non-SMP you need PERCPU_BASE_KS to do what?
>
> The initial aim is to avoid build error under !CONFIG_SMP
> and it works well on both !CONFIG_SMP and CONFIG_SMP, the
> changes are similar with the code in hibernate.c.
>
> An alternative way is to do the following simple change,
> but it seems a little ugly due to the ifdef, let me know
> what is your preference.
Add "u64 pcpu_base" at the last of saved_registers is better than this
patch, and can avoid ifdef.

Huacai

>
> diff --git a/arch/loongarch/power/suspend.c b/arch/loongarch/power/suspend.c
> index 166d9e06a64b..35191afefcda 100644
> --- a/arch/loongarch/power/suspend.c
> +++ b/arch/loongarch/power/suspend.c
> @@ -44,7 +44,9 @@ void loongarch_common_resume(void)
>   {
>          sync_counter();
>          local_flush_tlb_all();
> +#ifdef
>          csr_write64(per_cpu_offset(0), PERCPU_BASE_KS);
> +#endif
>          csr_write64(eentry, LOONGARCH_CSR_EENTRY);
>          csr_write64(eentry, LOONGARCH_CSR_MERRENTRY);
>          csr_write64(tlbrentry, LOONGARCH_CSR_TLBRENTRY);
>
> Thanks,
> Tiezhu
>
>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v3 0/4] Give chance to build under !CONFIG_SMP for LoongArch
  2024-04-11  6:06   ` Arnd Bergmann
  2024-04-12  4:00     ` Huacai Chen
@ 2024-04-18 17:44     ` Xi Ruoyao
  1 sibling, 0 replies; 17+ messages in thread
From: Xi Ruoyao @ 2024-04-18 17:44 UTC (permalink / raw
  To: Arnd Bergmann, Huacai Chen, Marc Zyngier, Tiezhu Yang
  Cc: Thomas Gleixner, loongarch, linux-kernel, loongson-kernel

On Thu, 2024-04-11 at 08:06 +0200, Arnd Bergmann wrote:
> On Thu, Apr 11, 2024, at 06:26, Huacai Chen wrote:
> > 
> > I remember that you both suggested not introducing NOSMP support for a
> > modern architecture which brings additional complexity. I wonder if
> > you still have the same attitude now. I will merge this series only if
> > you think it is worthy to introduce NOSMP now.
> 
> It's an interesting question, as we have recently discussed two
> opposite ideas and may end up doing both (or possible neither)
> in the future:
> 
> - On x86, there is no real reason to need non-SMP kernels as the
>   memory savings are fairly small, and it tends to break because
>   of lack of users testing it.

FWIW I'm still running the latest Linux kernel on a Athlon 64 3000+
launched in 2004 with !CONFIG_SMP :).

No objection to this paragraph (and other paragraphs) though.

-- 
Xi Ruoyao <xry111@xry111.site>
School of Aerospace Science and Technology, Xidian University

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2024-04-18 17:44 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-04-11  1:05 [PATCH v3 0/4] Give chance to build under !CONFIG_SMP for LoongArch Tiezhu Yang
2024-04-11  1:05 ` [PATCH v3 1/4] LoongArch: Move CONFIG_HAVE_SETUP_PER_CPU_AREA related code to smp.c Tiezhu Yang
2024-04-12  4:12   ` Huacai Chen
2024-04-12  9:27     ` Tiezhu Yang
2024-04-13  7:20       ` Huacai Chen
2024-04-11  1:05 ` [PATCH v3 2/4] LoongArch: Refactor get_acpi_id_for_cpu() related code Tiezhu Yang
2024-04-12  4:17   ` Huacai Chen
2024-04-12  9:28     ` Tiezhu Yang
2024-04-11  1:05 ` [PATCH v3 3/4] LoongArch: Save and restore PERCPU_BASE_KS for ACPI S3 state Tiezhu Yang
2024-04-12  4:18   ` Huacai Chen
2024-04-12  9:30     ` Tiezhu Yang
2024-04-13  7:22       ` Huacai Chen
2024-04-11  1:05 ` [PATCH v3 4/4] LoongArch: Give chance to build under !CONFIG_SMP Tiezhu Yang
2024-04-11  4:26 ` [PATCH v3 0/4] Give chance to build under !CONFIG_SMP for LoongArch Huacai Chen
2024-04-11  6:06   ` Arnd Bergmann
2024-04-12  4:00     ` Huacai Chen
2024-04-18 17:44     ` Xi Ruoyao

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).