From: Christophe Leroy <christophe.leroy@csgroup.eu> To: Benjamin Herrenschmidt <benh@kernel.crashing.org>, Paul Mackerras <paulus@samba.org>, Michael Ellerman <mpe@ellerman.id.au> Cc: linux-kernel@vger.kernel.org, linuxppc-dev@lists.ozlabs.org Subject: [PATCH v1 1/2] powerpc/bitops: Use immediate operand when possible Date: Thu, 8 Apr 2021 15:33:44 +0000 (UTC) [thread overview] Message-ID: <09da6fec57792d6559d1ea64e00be9870b02dab4.1617896018.git.christophe.leroy@csgroup.eu> (raw) Today we get the following code generation for bitops like set or clear bit: c0009fe0: 39 40 08 00 li r10,2048 c0009fe4: 7c e0 40 28 lwarx r7,0,r8 c0009fe8: 7c e7 53 78 or r7,r7,r10 c0009fec: 7c e0 41 2d stwcx. r7,0,r8 c000c044: 39 40 20 00 li r10,8192 c000c048: 7c e0 40 28 lwarx r7,0,r8 c000c04c: 7c e7 50 78 andc r7,r7,r10 c000c050: 7c e0 41 2d stwcx. r7,0,r8 Most set bits are constant on lower 16 bits, so it can easily be replaced by the "immediate" version of the operation. Allow GCC to choose between the normal or immediate form. For clear bits, on 32 bits 'rlwinm' can be used instead or 'andc' for when all bits to be cleared are consecutive. For the time being only handle the single bit case, which we detect by checking whether the mask is a power of two. Can't use is_power_of_2() function because it is not included yet, but it is easy to code with (mask & (mask - 1)) and even the 0 case which is not a power of two is acceptable for us. On 64 bits we don't have any equivalent single operation, we'd need two 'rldicl' so it is not worth it. With this patch we get: c0009fe0: 7d 00 50 28 lwarx r8,0,r10 c0009fe4: 61 08 08 00 ori r8,r8,2048 c0009fe8: 7d 00 51 2d stwcx. r8,0,r10 c000c034: 7d 00 50 28 lwarx r8,0,r10 c000c038: 55 08 04 e2 rlwinm r8,r8,0,19,17 c000c03c: 7d 00 51 2d stwcx. r8,0,r10 On pmac32_defconfig, it reduces the text by approx 10 kbytes. Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> --- arch/powerpc/include/asm/bitops.h | 77 +++++++++++++++++++++++++++---- 1 file changed, 69 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h index 299ab33505a6..0b0c6bdd9be9 100644 --- a/arch/powerpc/include/asm/bitops.h +++ b/arch/powerpc/include/asm/bitops.h @@ -71,19 +71,49 @@ static inline void fn(unsigned long mask, \ __asm__ __volatile__ ( \ prefix \ "1:" PPC_LLARX(%0,0,%3,0) "\n" \ - stringify_in_c(op) "%0,%0,%2\n" \ + #op "%I2 %0,%0,%2\n" \ PPC_STLCX "%0,0,%3\n" \ "bne- 1b\n" \ : "=&r" (old), "+m" (*p) \ - : "r" (mask), "r" (p) \ + : "rK" (mask), "r" (p) \ : "cc", "memory"); \ } DEFINE_BITOP(set_bits, or, "") -DEFINE_BITOP(clear_bits, andc, "") -DEFINE_BITOP(clear_bits_unlock, andc, PPC_RELEASE_BARRIER) DEFINE_BITOP(change_bits, xor, "") +#define DEFINE_CLROP(fn, prefix) \ +static inline void fn(unsigned long mask, volatile unsigned long *_p) \ +{ \ + unsigned long old; \ + unsigned long *p = (unsigned long *)_p; \ + if (IS_ENABLED(CONFIG_PPC32) && \ + __builtin_constant_p(mask) && !(mask & (mask - 1))) { \ + asm volatile ( \ + prefix \ + "1:" "lwarx %0,0,%3\n" \ + "rlwinm %0,%0,0,%2\n" \ + "stwcx. %0,0,%3\n" \ + "bne- 1b\n" \ + : "=&r" (old), "+m" (*p) \ + : "i" (~mask), "r" (p) \ + : "cc", "memory"); \ + } else { \ + asm volatile ( \ + prefix \ + "1:" PPC_LLARX(%0,0,%3,0) "\n" \ + "andc %0,%0,%2\n" \ + PPC_STLCX "%0,0,%3\n" \ + "bne- 1b\n" \ + : "=&r" (old), "+m" (*p) \ + : "r" (mask), "r" (p) \ + : "cc", "memory"); \ + } \ +} + +DEFINE_CLROP(clear_bits, "") +DEFINE_CLROP(clear_bits_unlock, PPC_RELEASE_BARRIER) + static inline void arch_set_bit(int nr, volatile unsigned long *addr) { set_bits(BIT_MASK(nr), addr + BIT_WORD(nr)); @@ -116,12 +146,12 @@ static inline unsigned long fn( \ __asm__ __volatile__ ( \ prefix \ "1:" PPC_LLARX(%0,0,%3,eh) "\n" \ - stringify_in_c(op) "%1,%0,%2\n" \ + #op "%I2 %1,%0,%2\n" \ PPC_STLCX "%1,0,%3\n" \ "bne- 1b\n" \ postfix \ : "=&r" (old), "=&r" (t) \ - : "r" (mask), "r" (p) \ + : "rK" (mask), "r" (p) \ : "cc", "memory"); \ return (old & mask); \ } @@ -130,11 +160,42 @@ DEFINE_TESTOP(test_and_set_bits, or, PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, 0) DEFINE_TESTOP(test_and_set_bits_lock, or, "", PPC_ACQUIRE_BARRIER, 1) -DEFINE_TESTOP(test_and_clear_bits, andc, PPC_ATOMIC_ENTRY_BARRIER, - PPC_ATOMIC_EXIT_BARRIER, 0) DEFINE_TESTOP(test_and_change_bits, xor, PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, 0) +static inline unsigned long test_and_clear_bits(unsigned long mask, volatile unsigned long *_p) +{ + unsigned long old, t; + unsigned long *p = (unsigned long *)_p; + + if (IS_ENABLED(CONFIG_PPC32) && + __builtin_constant_p(mask) && !(mask & (mask - 1))) { + asm volatile ( + PPC_ATOMIC_ENTRY_BARRIER + "1:" PPC_LLARX(%0,0,%3,0) "\n" + "rlwinm %1,%0,0,%2\n" + PPC_STLCX "%1,0,%3\n" + "bne- 1b\n" + PPC_ATOMIC_EXIT_BARRIER + : "=&r" (old), "=&r" (t) + : "i" (~mask), "r" (p) + : "cc", "memory"); + } else { + asm volatile ( + PPC_ATOMIC_ENTRY_BARRIER + "1:" PPC_LLARX(%0,0,%3,0) "\n" + "andc %1,%0,%2\n" + PPC_STLCX "%1,0,%3\n" + "bne- 1b\n" + PPC_ATOMIC_EXIT_BARRIER + : "=&r" (old), "=&r" (t) + : "r" (mask), "r" (p) + : "cc", "memory"); + } + + return (old & mask); +} + static inline int arch_test_and_set_bit(unsigned long nr, volatile unsigned long *addr) { -- 2.25.0
WARNING: multiple messages have this Message-ID (diff)
From: Christophe Leroy <christophe.leroy@csgroup.eu> To: Benjamin Herrenschmidt <benh@kernel.crashing.org>, Paul Mackerras <paulus@samba.org>, Michael Ellerman <mpe@ellerman.id.au> Cc: linuxppc-dev@lists.ozlabs.org, linux-kernel@vger.kernel.org Subject: [PATCH v1 1/2] powerpc/bitops: Use immediate operand when possible Date: Thu, 8 Apr 2021 15:33:44 +0000 (UTC) [thread overview] Message-ID: <09da6fec57792d6559d1ea64e00be9870b02dab4.1617896018.git.christophe.leroy@csgroup.eu> (raw) Today we get the following code generation for bitops like set or clear bit: c0009fe0: 39 40 08 00 li r10,2048 c0009fe4: 7c e0 40 28 lwarx r7,0,r8 c0009fe8: 7c e7 53 78 or r7,r7,r10 c0009fec: 7c e0 41 2d stwcx. r7,0,r8 c000c044: 39 40 20 00 li r10,8192 c000c048: 7c e0 40 28 lwarx r7,0,r8 c000c04c: 7c e7 50 78 andc r7,r7,r10 c000c050: 7c e0 41 2d stwcx. r7,0,r8 Most set bits are constant on lower 16 bits, so it can easily be replaced by the "immediate" version of the operation. Allow GCC to choose between the normal or immediate form. For clear bits, on 32 bits 'rlwinm' can be used instead or 'andc' for when all bits to be cleared are consecutive. For the time being only handle the single bit case, which we detect by checking whether the mask is a power of two. Can't use is_power_of_2() function because it is not included yet, but it is easy to code with (mask & (mask - 1)) and even the 0 case which is not a power of two is acceptable for us. On 64 bits we don't have any equivalent single operation, we'd need two 'rldicl' so it is not worth it. With this patch we get: c0009fe0: 7d 00 50 28 lwarx r8,0,r10 c0009fe4: 61 08 08 00 ori r8,r8,2048 c0009fe8: 7d 00 51 2d stwcx. r8,0,r10 c000c034: 7d 00 50 28 lwarx r8,0,r10 c000c038: 55 08 04 e2 rlwinm r8,r8,0,19,17 c000c03c: 7d 00 51 2d stwcx. r8,0,r10 On pmac32_defconfig, it reduces the text by approx 10 kbytes. Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu> --- arch/powerpc/include/asm/bitops.h | 77 +++++++++++++++++++++++++++---- 1 file changed, 69 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/bitops.h b/arch/powerpc/include/asm/bitops.h index 299ab33505a6..0b0c6bdd9be9 100644 --- a/arch/powerpc/include/asm/bitops.h +++ b/arch/powerpc/include/asm/bitops.h @@ -71,19 +71,49 @@ static inline void fn(unsigned long mask, \ __asm__ __volatile__ ( \ prefix \ "1:" PPC_LLARX(%0,0,%3,0) "\n" \ - stringify_in_c(op) "%0,%0,%2\n" \ + #op "%I2 %0,%0,%2\n" \ PPC_STLCX "%0,0,%3\n" \ "bne- 1b\n" \ : "=&r" (old), "+m" (*p) \ - : "r" (mask), "r" (p) \ + : "rK" (mask), "r" (p) \ : "cc", "memory"); \ } DEFINE_BITOP(set_bits, or, "") -DEFINE_BITOP(clear_bits, andc, "") -DEFINE_BITOP(clear_bits_unlock, andc, PPC_RELEASE_BARRIER) DEFINE_BITOP(change_bits, xor, "") +#define DEFINE_CLROP(fn, prefix) \ +static inline void fn(unsigned long mask, volatile unsigned long *_p) \ +{ \ + unsigned long old; \ + unsigned long *p = (unsigned long *)_p; \ + if (IS_ENABLED(CONFIG_PPC32) && \ + __builtin_constant_p(mask) && !(mask & (mask - 1))) { \ + asm volatile ( \ + prefix \ + "1:" "lwarx %0,0,%3\n" \ + "rlwinm %0,%0,0,%2\n" \ + "stwcx. %0,0,%3\n" \ + "bne- 1b\n" \ + : "=&r" (old), "+m" (*p) \ + : "i" (~mask), "r" (p) \ + : "cc", "memory"); \ + } else { \ + asm volatile ( \ + prefix \ + "1:" PPC_LLARX(%0,0,%3,0) "\n" \ + "andc %0,%0,%2\n" \ + PPC_STLCX "%0,0,%3\n" \ + "bne- 1b\n" \ + : "=&r" (old), "+m" (*p) \ + : "r" (mask), "r" (p) \ + : "cc", "memory"); \ + } \ +} + +DEFINE_CLROP(clear_bits, "") +DEFINE_CLROP(clear_bits_unlock, PPC_RELEASE_BARRIER) + static inline void arch_set_bit(int nr, volatile unsigned long *addr) { set_bits(BIT_MASK(nr), addr + BIT_WORD(nr)); @@ -116,12 +146,12 @@ static inline unsigned long fn( \ __asm__ __volatile__ ( \ prefix \ "1:" PPC_LLARX(%0,0,%3,eh) "\n" \ - stringify_in_c(op) "%1,%0,%2\n" \ + #op "%I2 %1,%0,%2\n" \ PPC_STLCX "%1,0,%3\n" \ "bne- 1b\n" \ postfix \ : "=&r" (old), "=&r" (t) \ - : "r" (mask), "r" (p) \ + : "rK" (mask), "r" (p) \ : "cc", "memory"); \ return (old & mask); \ } @@ -130,11 +160,42 @@ DEFINE_TESTOP(test_and_set_bits, or, PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, 0) DEFINE_TESTOP(test_and_set_bits_lock, or, "", PPC_ACQUIRE_BARRIER, 1) -DEFINE_TESTOP(test_and_clear_bits, andc, PPC_ATOMIC_ENTRY_BARRIER, - PPC_ATOMIC_EXIT_BARRIER, 0) DEFINE_TESTOP(test_and_change_bits, xor, PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, 0) +static inline unsigned long test_and_clear_bits(unsigned long mask, volatile unsigned long *_p) +{ + unsigned long old, t; + unsigned long *p = (unsigned long *)_p; + + if (IS_ENABLED(CONFIG_PPC32) && + __builtin_constant_p(mask) && !(mask & (mask - 1))) { + asm volatile ( + PPC_ATOMIC_ENTRY_BARRIER + "1:" PPC_LLARX(%0,0,%3,0) "\n" + "rlwinm %1,%0,0,%2\n" + PPC_STLCX "%1,0,%3\n" + "bne- 1b\n" + PPC_ATOMIC_EXIT_BARRIER + : "=&r" (old), "=&r" (t) + : "i" (~mask), "r" (p) + : "cc", "memory"); + } else { + asm volatile ( + PPC_ATOMIC_ENTRY_BARRIER + "1:" PPC_LLARX(%0,0,%3,0) "\n" + "andc %1,%0,%2\n" + PPC_STLCX "%1,0,%3\n" + "bne- 1b\n" + PPC_ATOMIC_EXIT_BARRIER + : "=&r" (old), "=&r" (t) + : "r" (mask), "r" (p) + : "cc", "memory"); + } + + return (old & mask); +} + static inline int arch_test_and_set_bit(unsigned long nr, volatile unsigned long *addr) { -- 2.25.0
next reply other threads:[~2021-04-08 15:33 UTC|newest] Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top 2021-04-08 15:33 Christophe Leroy [this message] 2021-04-08 15:33 ` [PATCH v1 1/2] powerpc/bitops: Use immediate operand when possible Christophe Leroy 2021-04-08 15:33 ` [PATCH v1 2/2] powerpc/atomics: " Christophe Leroy 2021-04-08 15:33 ` Christophe Leroy 2021-04-12 22:08 ` Segher Boessenkool 2021-04-12 22:08 ` Segher Boessenkool 2021-04-13 16:36 ` Christophe Leroy 2021-04-13 16:36 ` Christophe Leroy 2021-04-12 21:54 ` [PATCH v1 1/2] powerpc/bitops: " Segher Boessenkool 2021-04-12 21:54 ` Segher Boessenkool 2021-04-13 16:33 ` Christophe Leroy 2021-04-13 16:33 ` Christophe Leroy 2021-04-13 21:58 ` Segher Boessenkool 2021-04-13 21:58 ` Segher Boessenkool 2021-04-14 2:01 ` Nicholas Piggin 2021-04-14 2:01 ` Nicholas Piggin 2021-04-14 12:24 ` Segher Boessenkool 2021-04-14 12:24 ` Segher Boessenkool 2021-04-14 12:42 ` Christophe Leroy 2021-04-14 12:42 ` Christophe Leroy 2021-04-14 15:19 ` Segher Boessenkool 2021-04-14 15:19 ` Segher Boessenkool 2021-04-14 15:32 ` David Laight 2021-04-14 15:32 ` David Laight 2021-04-14 17:20 ` Segher Boessenkool 2021-04-14 17:20 ` Segher Boessenkool
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=09da6fec57792d6559d1ea64e00be9870b02dab4.1617896018.git.christophe.leroy@csgroup.eu \ --to=christophe.leroy@csgroup.eu \ --cc=benh@kernel.crashing.org \ --cc=linux-kernel@vger.kernel.org \ --cc=linuxppc-dev@lists.ozlabs.org \ --cc=mpe@ellerman.id.au \ --cc=paulus@samba.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.