[PATCH V10 14/19] RISC-V: paravirt: pvqspinlock: Add xchg8 & cmpxchg_small support

Linux-csky Archive mirror
 help / color / mirror / Atom feed

From: guoren@kernel.org
To: paul.walmsley@sifive.com, anup@brainfault.org,
	peterz@infradead.org, mingo@redhat.com, will@kernel.org,
	palmer@rivosinc.com, longman@redhat.com, boqun.feng@gmail.com,
	tglx@linutronix.de, paulmck@kernel.org, rostedt@goodmis.org,
	rdunlap@infradead.org, catalin.marinas@arm.com,
	conor.dooley@microchip.com, xiaoguang.xing@sophgo.com,
	bjorn@rivosinc.com, alexghiti@rivosinc.com,
	keescook@chromium.org, greentime.hu@sifive.com,
	ajones@ventanamicro.com, jszhang@kernel.org, wefu@redhat.com,
	wuwei2016@iscas.ac.cn
Cc: linux-arch@vger.kernel.org, linux-riscv@lists.infradead.org,
	linux-doc@vger.kernel.org, kvm@vger.kernel.org,
	virtualization@lists.linux-foundation.org,
	linux-csky@vger.kernel.org, Guo Ren <guoren@linux.alibaba.com>,
	Guo Ren <guoren@kernel.org>
Subject: [PATCH V10 14/19] RISC-V: paravirt: pvqspinlock: Add xchg8 & cmpxchg_small support
Date: Wed,  2 Aug 2023 12:46:56 -0400	[thread overview]
Message-ID: <20230802164701.192791-15-guoren@kernel.org> (raw)
In-Reply-To: <20230802164701.192791-1-guoren@kernel.org>

From: Guo Ren <guoren@linux.alibaba.com>

The pvqspinlock needs additional sub-word atomic operations. Here
is the list:
 - xchg8 (RCsc)
 - cmpxchg8/16_relaxed
 - cmpxchg8/16_release (Rcpc)
 - cmpxchg8_acquire (RCpc)
 - cmpxchg8 (RCsc)

Although paravirt qspinlock doesn't have the native_qspinlock
fairness, giving a strong forward progress guarantee to these
atomic semantics could prevent unnecessary tries, which would
cause cache line bouncing.

Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
Signed-off-by: Guo Ren <guoren@kernel.org>
---
 arch/riscv/include/asm/cmpxchg.h | 177 +++++++++++++++++++++++++++++++
 1 file changed, 177 insertions(+)

diff --git a/arch/riscv/include/asm/cmpxchg.h b/arch/riscv/include/asm/cmpxchg.h
index 3ab37215ed86..2fd797c04e7a 100644
--- a/arch/riscv/include/asm/cmpxchg.h
+++ b/arch/riscv/include/asm/cmpxchg.h
@@ -103,12 +103,37 @@ static inline ulong __xchg16_relaxed(ulong new, void *ptr)
 					    _x_, sizeof(*(ptr)));	\
 })
 
+static inline ulong __xchg8(ulong new, void *ptr)
+{
+	ulong ret, tmp;
+	ulong shif = ((ulong)ptr & 3) * 8;
+	ulong mask = 0xff << shif;
+	ulong *__ptr = (ulong *)((ulong)ptr & ~3);
+
+	__asm__ __volatile__ (
+		"0:	lr.w %0, %2\n"
+		"	and  %1, %0, %z3\n"
+		"	or   %1, %1, %z4\n"
+		"	sc.w.rl %1, %1, %2\n"
+		"	bnez %1, 0b\n"
+			"fence w, rw\n"
+		: "=&r" (ret), "=&r" (tmp), "+A" (*__ptr)
+		: "rJ" (~mask), "rJ" (new << shif)
+		: "memory");
+
+	return (ulong)((ret & mask) >> shif);
+}
+
 #define __arch_xchg(ptr, new, size)					\
 ({									\
 	__typeof__(ptr) __ptr = (ptr);					\
 	__typeof__(new) __new = (new);					\
 	__typeof__(*(ptr)) __ret;					\
 	switch (size) {							\
+	case 1:								\
+		__ret = (__typeof__(*(ptr)))				\
+			__xchg8((ulong)__new, __ptr);			\
+		break;							\
 	case 4:								\
 		__asm__ __volatile__ (					\
 			"	amoswap.w.aqrl %0, %2, %1\n"		\
@@ -140,6 +165,51 @@ static inline ulong __xchg16_relaxed(ulong new, void *ptr)
  * store NEW in MEM.  Return the initial value in MEM.  Success is
  * indicated by comparing RETURN with OLD.
  */
+static inline ulong __cmpxchg_small_relaxed(void *ptr, ulong old,
+					    ulong new, ulong size)
+{
+	ulong shift;
+	ulong ret, mask, temp;
+	volatile ulong *ptr32;
+
+	/* Mask inputs to the correct size. */
+	mask = GENMASK((size * BITS_PER_BYTE) - 1, 0);
+	old &= mask;
+	new &= mask;
+
+	/*
+	 * Calculate a shift & mask that correspond to the value we wish to
+	 * compare & exchange within the naturally aligned 4 byte integer
+	 * that includes it.
+	 */
+	shift = (ulong)ptr & 0x3;
+	shift *= BITS_PER_BYTE;
+	old <<= shift;
+	new <<= shift;
+	mask <<= shift;
+
+	/*
+	 * Calculate a pointer to the naturally aligned 4 byte integer that
+	 * includes our byte of interest, and load its value.
+	 */
+	ptr32 = (volatile ulong *)((ulong)ptr & ~0x3);
+
+	__asm__ __volatile__ (
+		"0:	lr.w %0, %2\n"
+		"	and  %1, %0, %z3\n"
+		"	bne  %1, %z5, 1f\n"
+		"	and  %1, %0, %z4\n"
+		"	or   %1, %1, %z6\n"
+		"	sc.w %1, %1, %2\n"
+		"	bnez %1, 0b\n"
+		"1:\n"
+		: "=&r" (ret), "=&r" (temp), "+A" (*ptr32)
+		: "rJ" (mask), "rJ" (~mask), "rJ" (old), "rJ" (new)
+		: "memory");
+
+	return (ret & mask) >> shift;
+}
+
 #define __cmpxchg_relaxed(ptr, old, new, size)				\
 ({									\
 	__typeof__(ptr) __ptr = (ptr);					\
@@ -148,6 +218,11 @@ static inline ulong __xchg16_relaxed(ulong new, void *ptr)
 	__typeof__(*(ptr)) __ret;					\
 	register unsigned int __rc;					\
 	switch (size) {							\
+	case 1:								\
+		__ret = (__typeof__(*(ptr)))				\
+			__cmpxchg_small_relaxed(__ptr, (ulong)__old,	\
+					(ulong)__new, (ulong)size);	\
+		break;							\
 	case 4:								\
 		__asm__ __volatile__ (					\
 			"0:	lr.w %0, %2\n"				\
@@ -184,6 +259,52 @@ static inline ulong __xchg16_relaxed(ulong new, void *ptr)
 					_o_, _n_, sizeof(*(ptr)));	\
 })
 
+static inline ulong __cmpxchg_small_acquire(void *ptr, ulong old,
+					    ulong new, ulong size)
+{
+	ulong shift;
+	ulong ret, mask, temp;
+	volatile ulong *ptr32;
+
+	/* Mask inputs to the correct size. */
+	mask = GENMASK((size * BITS_PER_BYTE) - 1, 0);
+	old &= mask;
+	new &= mask;
+
+	/*
+	 * Calculate a shift & mask that correspond to the value we wish to
+	 * compare & exchange within the naturally aligned 4 byte integer
+	 * that includes it.
+	 */
+	shift = (ulong)ptr & 0x3;
+	shift *= BITS_PER_BYTE;
+	old <<= shift;
+	new <<= shift;
+	mask <<= shift;
+
+	/*
+	 * Calculate a pointer to the naturally aligned 4 byte integer that
+	 * includes our byte of interest, and load its value.
+	 */
+	ptr32 = (volatile ulong *)((ulong)ptr & ~0x3);
+
+	__asm__ __volatile__ (
+		"0:	lr.w %0, %2\n"
+		"	and  %1, %0, %z3\n"
+		"	bne  %1, %z5, 1f\n"
+		"	and  %1, %0, %z4\n"
+		"	or   %1, %1, %z6\n"
+		"	sc.w %1, %1, %2\n"
+		"	bnez %1, 0b\n"
+		RISCV_ACQUIRE_BARRIER
+		"1:\n"
+		: "=&r" (ret), "=&r" (temp), "+A" (*ptr32)
+		: "rJ" (mask), "rJ" (~mask), "rJ" (old), "rJ" (new)
+		: "memory");
+
+	return (ret & mask) >> shift;
+}
+
 #define __cmpxchg_acquire(ptr, old, new, size)				\
 ({									\
 	__typeof__(ptr) __ptr = (ptr);					\
@@ -192,6 +313,12 @@ static inline ulong __xchg16_relaxed(ulong new, void *ptr)
 	__typeof__(*(ptr)) __ret;					\
 	register unsigned int __rc;					\
 	switch (size) {							\
+	case 1:								\
+	case 2:								\
+		__ret = (__typeof__(*(ptr)))				\
+			__cmpxchg_small_acquire(__ptr, (ulong)__old,	\
+					(ulong)__new, (ulong)size);	\
+		break;							\
 	case 4:								\
 		__asm__ __volatile__ (					\
 			"0:	lr.w %0, %2\n"				\
@@ -230,6 +357,51 @@ static inline ulong __xchg16_relaxed(ulong new, void *ptr)
 					_o_, _n_, sizeof(*(ptr)));	\
 })
 
+static inline ulong __cmpxchg_small(void *ptr, ulong old,
+				    ulong new, ulong size)
+{
+	ulong shift;
+	ulong ret, mask, temp;
+	volatile ulong *ptr32;
+
+	/* Mask inputs to the correct size. */
+	mask = GENMASK((size * BITS_PER_BYTE) - 1, 0);
+	old &= mask;
+	new &= mask;
+
+	/*
+	 * Calculate a shift & mask that correspond to the value we wish to
+	 * compare & exchange within the naturally aligned 4 byte integer
+	 * that includes it.
+	 */
+	shift = (ulong)ptr & 0x3;
+	shift *= BITS_PER_BYTE;
+	old <<= shift;
+	new <<= shift;
+	mask <<= shift;
+
+	/*
+	 * Calculate a pointer to the naturally aligned 4 byte integer that
+	 * includes our byte of interest, and load its value.
+	 */
+	ptr32 = (volatile ulong *)((ulong)ptr & ~0x3);
+
+	__asm__ __volatile__ (
+		"0:	lr.w %0, %2\n"
+		"	and  %1, %0, %z3\n"
+		"	bne  %1, %z5, 1f\n"
+		"	and  %1, %0, %z4\n"
+		"	or   %1, %1, %z6\n"
+		"	sc.w.rl %1, %1, %2\n"
+		"	bnez %1, 0b\n"
+		"	fence w, rw\n"
+		"1:\n"
+		: "=&r" (ret), "=&r" (temp), "+A" (*ptr32)
+		: "rJ" (mask), "rJ" (~mask), "rJ" (old), "rJ" (new)
+		: "memory");
+
+	return (ret & mask) >> shift;
+}
 #define __cmpxchg(ptr, old, new, size)					\
 ({									\
 	__typeof__(ptr) __ptr = (ptr);					\
@@ -238,6 +410,11 @@ static inline ulong __xchg16_relaxed(ulong new, void *ptr)
 	__typeof__(*(ptr)) __ret;					\
 	register unsigned int __rc;					\
 	switch (size) {							\
+	case 1:								\
+		__ret = (__typeof__(*(ptr)))				\
+			__cmpxchg_small(__ptr, (ulong)__old,		\
+					(ulong)__new, (ulong)size);	\
+		break;							\
 	case 4:								\
 		__asm__ __volatile__ (					\
 			"0:	lr.w %0, %2\n"				\
-- 
2.36.1

next prev parent reply	other threads:[~2023-08-02 16:52 UTC|newest]

Thread overview: 36+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-08-02 16:46 [PATCH V10 00/19] riscv: Add Native/Paravirt/CNA qspinlock support guoren
2023-08-02 16:46 ` [PATCH V10 01/19] asm-generic: ticket-lock: Reuse arch_spinlock_t of qspinlock guoren
2023-08-02 16:46 ` [PATCH V10 02/19] asm-generic: ticket-lock: Move into ticket_spinlock.h guoren
2023-08-02 16:46 ` [PATCH V10 03/19] riscv: qspinlock: errata: Add ERRATA_THEAD_WRITE_ONCE fixup guoren
2023-08-02 16:46 ` [PATCH V10 04/19] riscv: qspinlock: Add basic queued_spinlock support guoren
2023-08-11 19:34   ` Waiman Long
2023-08-12  0:18     ` Guo Ren
2023-08-02 16:46 ` [PATCH V10 05/19] riscv: qspinlock: Introduce combo spinlock guoren
2023-08-11 19:51   ` Waiman Long
2023-08-12  0:22     ` Guo Ren
2023-08-02 16:46 ` [PATCH V10 06/19] riscv: qspinlock: Allow force qspinlock from the command line guoren
2023-08-02 16:46 ` [PATCH V10 07/19] riscv: qspinlock: errata: Introduce ERRATA_THEAD_QSPINLOCK guoren
2023-08-04  9:05   ` Conor Dooley
2023-08-04  9:53     ` Guo Ren
2023-08-04 10:06       ` Conor Dooley
2023-08-05  1:28         ` Guo Ren
2023-08-07  5:23   ` Stefan O'Rear
2023-08-08  2:12     ` Guo Ren
2023-09-13 18:54     ` Palmer Dabbelt
2023-09-13 19:32       ` Waiman Long
2023-09-14  3:31       ` Guo Ren
2023-08-02 16:46 ` [PATCH V10 08/19] riscv: qspinlock: Use new static key for controlling call of virt_spin_lock() guoren
2023-08-02 16:46 ` [PATCH V10 09/19] RISC-V: paravirt: pvqspinlock: Add paravirt qspinlock skeleton guoren
2023-08-02 16:46 ` [PATCH V10 10/19] RISC-V: paravirt: pvqspinlock: KVM: " guoren
2023-08-02 16:46 ` [PATCH V10 11/19] RISC-V: paravirt: pvqspinlock: KVM: Implement kvm_sbi_ext_pvlock_kick_cpu() guoren
2023-08-02 16:46 ` [PATCH V10 12/19] RISC-V: paravirt: pvqspinlock: Add nopvspin kernel parameter guoren
2023-08-02 16:46 ` [PATCH V10 13/19] RISC-V: paravirt: pvqspinlock: Remove unnecessary definitions of cmpxchg & xchg guoren
2023-08-02 16:46 ` guoren [this message]
2023-08-02 16:46 ` [PATCH V10 15/19] RISC-V: paravirt: pvqspinlock: Add SBI implementation guoren
2023-08-02 16:46 ` [PATCH V10 16/19] RISC-V: paravirt: pvqspinlock: Add kconfig entry guoren
2023-08-02 16:46 ` [PATCH V10 17/19] RISC-V: paravirt: pvqspinlock: Add trace point for pv_kick/wait guoren
2023-08-02 16:47 ` [PATCH V10 18/19] locking/qspinlock: Move pv_ops into x86 directory guoren
2023-08-11 20:42   ` Waiman Long
2023-08-12  0:24     ` Guo Ren
2023-08-12  0:47       ` Waiman Long
2023-08-02 16:47 ` [PATCH V10 19/19] locking/qspinlock: riscv: Add Compact NUMA-aware lock support guoren

find likely ancestor, descendant, or conflicting patches for this message:
dfblob:3ab37215ed8 dfblob:2fd797c04e7
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230802164701.192791-15-guoren@kernel.org \
    --to=guoren@kernel.org \
    --cc=ajones@ventanamicro.com \
    --cc=alexghiti@rivosinc.com \
    --cc=anup@brainfault.org \
    --cc=bjorn@rivosinc.com \
    --cc=boqun.feng@gmail.com \
    --cc=catalin.marinas@arm.com \
    --cc=conor.dooley@microchip.com \
    --cc=greentime.hu@sifive.com \
    --cc=guoren@linux.alibaba.com \
    --cc=jszhang@kernel.org \
    --cc=keescook@chromium.org \
    --cc=kvm@vger.kernel.org \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-csky@vger.kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-riscv@lists.infradead.org \
    --cc=longman@redhat.com \
    --cc=mingo@redhat.com \
    --cc=palmer@rivosinc.com \
    --cc=paul.walmsley@sifive.com \
    --cc=paulmck@kernel.org \
    --cc=peterz@infradead.org \
    --cc=rdunlap@infradead.org \
    --cc=rostedt@goodmis.org \
    --cc=tglx@linutronix.de \
    --cc=virtualization@lists.linux-foundation.org \
    --cc=wefu@redhat.com \
    --cc=will@kernel.org \
    --cc=wuwei2016@iscas.ac.cn \
    --cc=xiaoguang.xing@sophgo.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).