Re: [PATCH v3 05/11] percpu: Wire up cmpxchg128

LKML Archive mirror
 help / color / mirror / Atom feed

From: Peter Zijlstra <peterz@infradead.org>
To: torvalds@linux-foundation.org
Cc: corbet@lwn.net, will@kernel.org, boqun.feng@gmail.com,
	mark.rutland@arm.com, catalin.marinas@arm.com, dennis@kernel.org,
	tj@kernel.org, cl@linux.com, hca@linux.ibm.com,
	gor@linux.ibm.com, agordeev@linux.ibm.com,
	borntraeger@linux.ibm.com, svens@linux.ibm.com,
	tglx@linutronix.de, mingo@redhat.com, bp@alien8.de,
	dave.hansen@linux.intel.com, x86@kernel.org, hpa@zytor.com,
	joro@8bytes.org, suravee.suthikulpanit@amd.com,
	robin.murphy@arm.com, dwmw2@infradead.org,
	baolu.lu@linux.intel.com, Arnd Bergmann <arnd@arndb.de>,
	Herbert Xu <herbert@gondor.apana.org.au>,
	davem@davemloft.net, penberg@kernel.org, rientjes@google.com,
	iamjoonsoo.kim@lge.com, Andrew Morton <akpm@linux-foundation.org>,
	vbabka@suse.cz, roman.gushchin@linux.dev, 42.hyeyoo@gmail.com,
	linux-doc@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-mm@kvack.org, linux-s390@vger.kernel.org,
	iommu@lists.linux.dev, linux-arch@vger.kernel.org,
	linux-crypto@vger.kernel.org
Subject: Re: [PATCH v3 05/11] percpu: Wire up cmpxchg128
Date: Thu, 25 May 2023 14:49:55 +0200	[thread overview]
Message-ID: <20230525124955.GS83892@hirez.programming.kicks-ass.net> (raw)
In-Reply-To: <20230515080554.248739380@infradead.org>

On Mon, May 15, 2023 at 09:57:04AM +0200, Peter Zijlstra wrote:
> In order to replace cmpxchg_double() with the newly minted
> cmpxchg128() family of functions, wire it up in this_cpu_cmpxchg().
> 
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>

> ---
> --- a/arch/x86/include/asm/percpu.h
> +++ b/arch/x86/include/asm/percpu.h
> @@ -210,6 +210,65 @@ do {									\
>  	(typeof(_var))(unsigned long) pco_old__;			\
>  })
>  
> +#if defined(CONFIG_X86_32) && defined(CONFIG_X86_CMPXCHG64)
> +#define percpu_cmpxchg64_op(size, qual, _var, _oval, _nval)		\
> +({									\
> +	union {								\
> +		u64 var;						\
> +		struct {						\
> +			u32 low, high;					\
> +		};							\
> +	} old__, new__;							\
> +									\
> +	old__.var = _oval;						\
> +	new__.var = _nval;						\
> +									\
> +	asm qual ("cmpxchg8b " __percpu_arg([var])			\
> +		  : [var] "+m" (_var),					\
> +		    "+a" (old__.low),					\
> +		    "+d" (old__.high)					\
> +		  : "b" (new__.low),					\
> +		    "c" (new__.high)					\
> +		  : "memory");						\
> +									\
> +	old__.var;							\
> +})
> +
> +#define raw_cpu_cmpxchg64(pcp, oval, nval)	percpu_cmpxchg64_op(8,         , pcp, oval, nval)
> +#define this_cpu_cmpxchg64(pcp, oval, nval)	percpu_cmpxchg64_op(8, volatile, pcp, oval, nval)
> +#endif
> +
> +#ifdef CONFIG_X86_64
> +#define raw_cpu_cmpxchg64(pcp, oval, nval)	percpu_cmpxchg_op(8,         , pcp, oval, nval);
> +#define this_cpu_cmpxchg64(pcp, oval, nval)	percpu_cmpxchg_op(8, volatile, pcp, oval, nval);
> +
> +#define percpu_cmpxchg128_op(size, qual, _var, _oval, _nval)		\
> +({									\
> +	union {								\
> +		u128 var;						\
> +		struct {						\
> +			u64 low, high;					\
> +		};							\
> +	} old__, new__;							\
> +									\
> +	old__.var = _oval;						\
> +	new__.var = _nval;						\
> +									\
> +	asm qual ("cmpxchg16b " __percpu_arg([var])			\
> +		  : [var] "+m" (_var),					\
> +		    "+a" (old__.low),					\
> +		    "+d" (old__.high)					\
> +		  : "b" (new__.low),					\
> +		    "c" (new__.high)					\
> +		  : "memory");						\
> +									\
> +	old__.var;							\
> +})
> +
> +#define raw_cpu_cmpxchg128(pcp, oval, nval)	percpu_cmpxchg128_op(16,         , pcp, oval, nval)
> +#define this_cpu_cmpxchg128(pcp, oval, nval)	percpu_cmpxchg128_op(16, volatile, pcp, oval, nval)
> +#endif
> +
>  /*
>   * this_cpu_read() makes gcc load the percpu variable every time it is
>   * accessed while this_cpu_read_stable() allows the value to be cached.

Since this_cpu_cmpxchg*() is assumed to always be present (their usage
is not guarded with system_has_cmpxchg*() it needs a fallback for then
the instruction is not present.

The below has been tested with clearcpuid=cx16; adding an obvious defect
in the fallback implemention crashes the kernel, with the code as
presented it boots.

Build tested i386-defconfig.

(the things we do for museum pieces :/)

---
 include/asm/percpu.h |   25 +++++++++++---------
 lib/Makefile         |    3 +-
 lib/cmpxchg16b_emu.S |   43 ++++++++++++++++++++--------------
 lib/cmpxchg8b_emu.S  |   63 +++++++++++++++++++++++++++++++++++++++------------
 4 files changed, 90 insertions(+), 44 deletions(-)

--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -210,7 +210,7 @@ do {									\
 	(typeof(_var))(unsigned long) pco_old__;			\
 })
 
-#if defined(CONFIG_X86_32) && defined(CONFIG_X86_CMPXCHG64)
+#ifdef CONFIG_X86_32
 #define percpu_cmpxchg64_op(size, qual, _var, _oval, _nval)		\
 ({									\
 	union {								\
@@ -223,13 +223,14 @@ do {									\
 	old__.var = _oval;						\
 	new__.var = _nval;						\
 									\
-	asm qual ("cmpxchg8b " __percpu_arg([var])			\
+	asm qual (ALTERNATIVE("leal %P[var], %%esi; call this_cpu_cmpxchg8b_emu", \
+			      "cmpxchg8b " __percpu_arg([var]), X86_FEATURE_CX8) \
 		  : [var] "+m" (_var),					\
 		    "+a" (old__.low),					\
 		    "+d" (old__.high)					\
 		  : "b" (new__.low),					\
 		    "c" (new__.high)					\
-		  : "memory");						\
+		  : "memory", "esi");					\
 									\
 	old__.var;							\
 })
@@ -254,13 +255,14 @@ do {									\
 	old__.var = _oval;						\
 	new__.var = _nval;						\
 									\
-	asm qual ("cmpxchg16b " __percpu_arg([var])			\
+	asm qual (ALTERNATIVE("leaq %P[var], %%rsi; call this_cpu_cmpxchg16b_emu", \
+			      "cmpxchg16b " __percpu_arg([var]), X86_FEATURE_CX16) \
 		  : [var] "+m" (_var),					\
 		    "+a" (old__.low),					\
 		    "+d" (old__.high)					\
 		  : "b" (new__.low),					\
 		    "c" (new__.high)					\
-		  : "memory");						\
+		  : "memory", "rsi");					\
 									\
 	old__.var;							\
 })
@@ -400,12 +402,13 @@ do {									\
 	bool __ret;							\
 	typeof(pcp1) __o1 = (o1), __n1 = (n1);				\
 	typeof(pcp2) __o2 = (o2), __n2 = (n2);				\
-	alternative_io("leaq %P1,%%rsi\n\tcall this_cpu_cmpxchg16b_emu\n\t", \
-		       "cmpxchg16b " __percpu_arg(1) "\n\tsetz %0\n\t",	\
-		       X86_FEATURE_CX16,				\
-		       ASM_OUTPUT2("=a" (__ret), "+m" (pcp1),		\
-				   "+m" (pcp2), "+d" (__o2)),		\
-		       "b" (__n1), "c" (__n2), "a" (__o1) : "rsi");	\
+	asm volatile (ALTERNATIVE("leaq %P1, %%rsi; call this_cpu_cmpxchg16b_emu", \
+				  "cmpxchg16b " __percpu_arg(1), X86_FEATURE_CX16) \
+			     "setz %0"					\
+			     : "=a" (__ret), "+m" (pcp1)		\
+			     : "b" (__n1), "c" (__n2),			\
+			       "a" (__o1), "d" (__o2)			\
+			     : "memory", "rsi");			\
 	__ret;								\
 })
 
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -61,8 +61,9 @@ ifeq ($(CONFIG_X86_32),y)
         lib-y += strstr_32.o
         lib-y += string_32.o
         lib-y += memmove_32.o
+        lib-y += cmpxchg8b_emu.o
 ifneq ($(CONFIG_X86_CMPXCHG64),y)
-        lib-y += cmpxchg8b_emu.o atomic64_386_32.o
+        lib-y += atomic64_386_32.o
 endif
 else
         obj-y += iomap_copy_64.o
--- a/arch/x86/lib/cmpxchg16b_emu.S
+++ b/arch/x86/lib/cmpxchg16b_emu.S
@@ -1,47 +1,54 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 #include <linux/linkage.h>
 #include <asm/percpu.h>
+#include <asm/processor-flags.h>
 
 .text
 
 /*
+ * Emulate 'cmpxchg16b %gs:(%rsi)'
+ *
  * Inputs:
  * %rsi : memory location to compare
  * %rax : low 64 bits of old value
  * %rdx : high 64 bits of old value
  * %rbx : low 64 bits of new value
  * %rcx : high 64 bits of new value
- * %al  : Operation successful
+ *
+ * Notably this is not LOCK prefixed and is not safe against NMIs
  */
 SYM_FUNC_START(this_cpu_cmpxchg16b_emu)
 
-#
-# Emulate 'cmpxchg16b %gs:(%rsi)' except we return the result in %al not
-# via the ZF.  Caller will access %al to get result.
-#
-# Note that this is only useful for a cpuops operation.  Meaning that we
-# do *not* have a fully atomic operation but just an operation that is
-# *atomic* on a single cpu (as provided by the this_cpu_xx class of
-# macros).
-#
 	pushfq
 	cli
 
-	cmpq PER_CPU_VAR((%rsi)), %rax
-	jne .Lnot_same
-	cmpq PER_CPU_VAR(8(%rsi)), %rdx
-	jne .Lnot_same
+	/* if (*ptr == old) */
+	cmpq	PER_CPU_VAR(0(%rsi)), %rax
+	jne	.Lnot_same
+	cmpq	PER_CPU_VAR(8(%rsi)), %rdx
+	jne	.Lnot_same
+
+	/* *ptr = new */
+	movq	%rbx, PER_CPU_VAR(0(%rsi))
+	movq	%rcx, PER_CPU_VAR(8(%rsi))
 
-	movq %rbx, PER_CPU_VAR((%rsi))
-	movq %rcx, PER_CPU_VAR(8(%rsi))
+	/* set ZF in EFLAGS to indicate success */
+	orl	$X86_EFLAGS_ZF, (%rsp)
 
 	popfq
-	mov $1, %al
 	RET
 
 .Lnot_same:
+	/* *ptr != old */
+
+	/* old = *ptr */
+	movq	PER_CPU_VAR(0(%rsi)), %rax
+	movq	PER_CPU_VAR(8(%rsi)), %rdx
+
+	/* clear ZF in EFLAGS to indicate failure */
+	andl	$(~X86_EFLAGS_ZF), (%rsp)
+
 	popfq
-	xor %al,%al
 	RET
 
 SYM_FUNC_END(this_cpu_cmpxchg16b_emu)
--- a/arch/x86/lib/cmpxchg8b_emu.S
+++ b/arch/x86/lib/cmpxchg8b_emu.S
@@ -2,10 +2,16 @@
 
 #include <linux/linkage.h>
 #include <asm/export.h>
+#include <asm/percpu.h>
+#include <asm/processor-flags.h>
 
 .text
 
+#ifndef CONFIG_X86_CMPXCHG64
+
 /*
+ * Emulate 'cmpxchg8b (%esi)' on UP
+ *
  * Inputs:
  * %esi : memory location to compare
  * %eax : low 32 bits of old value
@@ -15,32 +21,61 @@
  */
 SYM_FUNC_START(cmpxchg8b_emu)
 
-#
-# Emulate 'cmpxchg8b (%esi)' on UP except we don't
-# set the whole ZF thing (caller will just compare
-# eax:edx with the expected value)
-#
 	pushfl
 	cli
 
-	cmpl  (%esi), %eax
-	jne .Lnot_same
-	cmpl 4(%esi), %edx
-	jne .Lhalf_same
+	cmpl	0(%esi), %eax
+	jne	.Lnot_same
+	cmpl	4(%esi), %edx
+	jne	.Lnot_same
+
+	movl	%ebx, 0(%esi)
+	movl	%ecx, 4(%esi)
 
-	movl %ebx,  (%esi)
-	movl %ecx, 4(%esi)
+	orl	$X86_EFLAGS_ZF, (%esp)
 
 	popfl
 	RET
 
 .Lnot_same:
-	movl  (%esi), %eax
-.Lhalf_same:
-	movl 4(%esi), %edx
+	movl	0(%esi), %eax
+	movl	4(%esi), %edx
+
+	andl	$(~X86_EFLAGS_ZF), (%esp)
 
 	popfl
 	RET
 
 SYM_FUNC_END(cmpxchg8b_emu)
 EXPORT_SYMBOL(cmpxchg8b_emu)
+
+#endif
+
+SYM_FUNC_START(this_cpu_cmpxchg8b_emu)
+
+	pushfl
+	cli
+
+	cmpl	PER_CPU_VAR(0(%esi)), %eax
+	jne	.Lnot_same2
+	cmpl	PER_CPU_VAR(4(%esi)), %edx
+	jne	.Lnot_same2
+
+	movl	%ebx, PER_CPU_VAR(0(%esi))
+	movl	%ecx, PER_CPU_VAR(4(%esi))
+
+	orl	$X86_EFLAGS_ZF, (%esp)
+
+	popfl
+	RET
+
+.Lnot_same2:
+	movl	PER_CPU_VAR(0(%esi)), %eax
+	movl	PER_CPU_VAR(4(%esi)), %edx
+
+	andl	$(~X86_EFLAGS_ZF), (%esp)
+
+	popfl
+	RET
+
+SYM_FUNC_END(this_cpu_cmpxchg8b_emu)

next prev parent reply	other threads:[~2023-05-25 12:51 UTC|newest]

Thread overview: 45+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-05-15  7:56 [PATCH v3 00/11] Introduce cmpxchg128() -- aka. the demise of cmpxchg_double() Peter Zijlstra
2023-05-15  7:57 ` [PATCH v3 01/11] cyrpto/b128ops: Remove struct u128 Peter Zijlstra
2023-05-20 10:49   ` [tip: locking/core] " tip-bot2 for Peter Zijlstra
2023-05-22 10:27   ` tip-bot2 for Peter Zijlstra
2023-05-15  7:57 ` [PATCH v3 02/11] types: Introduce [us]128 Peter Zijlstra
2023-05-20 10:49   ` [tip: locking/core] " tip-bot2 for Peter Zijlstra
2023-05-22 10:27   ` tip-bot2 for Peter Zijlstra
2023-05-15  7:57 ` [PATCH v3 03/11] arch: Introduce arch_{,try_}_cmpxchg128{,_local}() Peter Zijlstra
2023-05-20 10:49   ` [tip: locking/core] " tip-bot2 for Peter Zijlstra
2023-05-22 10:27   ` tip-bot2 for Peter Zijlstra
2023-05-15  7:57 ` [PATCH v3 04/11] instrumentation: Wire up cmpxchg128() Peter Zijlstra
2023-05-20 10:49   ` [tip: locking/core] " tip-bot2 for Peter Zijlstra
2023-05-22 10:27   ` tip-bot2 for Peter Zijlstra
2023-05-15  7:57 ` [PATCH v3 05/11] percpu: Wire up cmpxchg128 Peter Zijlstra
2023-05-20 10:49   ` [tip: locking/core] " tip-bot2 for Peter Zijlstra
2023-05-22 10:27   ` tip-bot2 for Peter Zijlstra
2023-05-25 12:49   ` Peter Zijlstra [this message]
2023-05-25 22:59     ` [PATCH v3 05/11] " Petr Tesařík
2023-05-15  7:57 ` [PATCH v3 06/11] x86,amd_iommu: Replace cmpxchg_double() Peter Zijlstra
2023-05-20 10:49   ` [tip: locking/core] " tip-bot2 for Peter Zijlstra
2023-05-22 10:27   ` tip-bot2 for Peter Zijlstra
2023-05-15  7:57 ` [PATCH v3 07/11] x86,intel_iommu: " Peter Zijlstra
2023-05-20 10:49   ` [tip: locking/core] " tip-bot2 for Peter Zijlstra
2023-05-22 10:27   ` tip-bot2 for Peter Zijlstra
2023-05-15  7:57 ` [PATCH v3 08/11] slub: " Peter Zijlstra
2023-05-20 10:49   ` [tip: locking/core] " tip-bot2 for Peter Zijlstra
2023-05-22 10:27   ` tip-bot2 for Peter Zijlstra
2023-05-24  9:32   ` [PATCH v3 08/11] " Peter Zijlstra
2023-05-24 10:13     ` Vlastimil Babka
2023-05-25 10:29     ` Peter Zijlstra
2023-05-25 10:52       ` Arnd Bergmann
2023-05-25 13:10         ` Peter Zijlstra
2023-05-30 14:22     ` Peter Zijlstra
2023-05-30 19:32       ` Peter Zijlstra
2023-05-15  7:57 ` [PATCH v3 09/11] mm/slub: Fold slab_update_freelist() Peter Zijlstra
2023-05-24 11:58   ` Vlastimil Babka
2023-05-15  7:57 ` [PATCH v3 10/11] arch: Remove cmpxchg_double Peter Zijlstra
2023-05-15  8:52   ` Heiko Carstens
2023-05-20 10:49   ` [tip: locking/core] " tip-bot2 for Peter Zijlstra
2023-05-22 10:27   ` tip-bot2 for Peter Zijlstra
2023-05-15  7:57 ` [PATCH v3 11/11] s390/cpum_sf: Convert to cmpxchg128() Peter Zijlstra
2023-05-20 10:49   ` [tip: locking/core] " tip-bot2 for Peter Zijlstra
2023-05-22 10:27   ` tip-bot2 for Peter Zijlstra
2023-05-15  9:42 ` [PATCH v3 00/11] Introduce cmpxchg128() -- aka. the demise of cmpxchg_double() Arnd Bergmann
2023-05-24  9:39   ` Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230525124955.GS83892@hirez.programming.kicks-ass.net \
    --to=peterz@infradead.org \
    --cc=42.hyeyoo@gmail.com \
    --cc=agordeev@linux.ibm.com \
    --cc=akpm@linux-foundation.org \
    --cc=arnd@arndb.de \
    --cc=baolu.lu@linux.intel.com \
    --cc=boqun.feng@gmail.com \
    --cc=borntraeger@linux.ibm.com \
    --cc=bp@alien8.de \
    --cc=catalin.marinas@arm.com \
    --cc=cl@linux.com \
    --cc=corbet@lwn.net \
    --cc=dave.hansen@linux.intel.com \
    --cc=davem@davemloft.net \
    --cc=dennis@kernel.org \
    --cc=dwmw2@infradead.org \
    --cc=gor@linux.ibm.com \
    --cc=hca@linux.ibm.com \
    --cc=herbert@gondor.apana.org.au \
    --cc=hpa@zytor.com \
    --cc=iamjoonsoo.kim@lge.com \
    --cc=iommu@lists.linux.dev \
    --cc=joro@8bytes.org \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-crypto@vger.kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-s390@vger.kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=mingo@redhat.com \
    --cc=penberg@kernel.org \
    --cc=rientjes@google.com \
    --cc=robin.murphy@arm.com \
    --cc=roman.gushchin@linux.dev \
    --cc=suravee.suthikulpanit@amd.com \
    --cc=svens@linux.ibm.com \
    --cc=tglx@linutronix.de \
    --cc=tj@kernel.org \
    --cc=torvalds@linux-foundation.org \
    --cc=vbabka@suse.cz \
    --cc=will@kernel.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).