From: Vineet Gupta <Vineet.Gupta1@synopsys.com> To: <linux-arch@vger.kernel.org>, <linux-kernel@vger.kernel.org> Cc: <arnd@arndb.de>, <arc-linux-dev@synopsys.com>, Vineet Gupta <Vineet.Gupta1@synopsys.com> Subject: [PATCH 10/28] ARCv2: Adhere to Zero Delay loop restriction Date: Tue, 9 Jun 2015 17:18:10 +0530 [thread overview] Message-ID: <1433850508-26317-11-git-send-email-vgupta@synopsys.com> (raw) In-Reply-To: <1433850508-26317-1-git-send-email-vgupta@synopsys.com> Branch insn can't be scheduled as last insn of Zero Overhead loop Signed-off-by: Vineet Gupta <vgupta@synopsys.com> --- arch/arc/include/asm/delay.h | 9 ++++----- arch/arc/include/asm/uaccess.h | 17 ++++++++--------- arch/arc/lib/memcmp.S | 30 +++++++++++++++++++++++++++++- 3 files changed, 41 insertions(+), 15 deletions(-) diff --git a/arch/arc/include/asm/delay.h b/arch/arc/include/asm/delay.h index 43de30256981..08e7e2a16ac1 100644 --- a/arch/arc/include/asm/delay.h +++ b/arch/arc/include/asm/delay.h @@ -22,11 +22,10 @@ static inline void __delay(unsigned long loops) { __asm__ __volatile__( - "1: sub.f %0, %0, 1 \n" - " jpnz 1b \n" - : "+r"(loops) - : - : "cc"); + " lp 1f \n" + " nop \n" + "1: \n" + : "+l"(loops)); } extern void __bad_udelay(void); diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h index 30c9baffa96f..d1da6032b715 100644 --- a/arch/arc/include/asm/uaccess.h +++ b/arch/arc/include/asm/uaccess.h @@ -659,31 +659,30 @@ static inline unsigned long __arc_clear_user(void __user *to, unsigned long n) static inline long __arc_strncpy_from_user(char *dst, const char __user *src, long count) { - long res = count; + long res = 0; char val; - unsigned int hw_count; if (count == 0) return 0; __asm__ __volatile__( - " lp 2f \n" + " lp 3f \n" "1: ldb.ab %3, [%2, 1] \n" - " breq.d %3, 0, 2f \n" + " breq.d %3, 0, 3f \n" " stb.ab %3, [%1, 1] \n" - "2: sub %0, %6, %4 \n" - "3: ;nop \n" + " add %0, %0, 1 # Num of NON NULL bytes copied \n" + "3: \n" " .section .fixup, \"ax\" \n" " .align 4 \n" - "4: mov %0, %5 \n" + "4: mov %0, %4 # sets @res as -EFAULT \n" " j 3b \n" " .previous \n" " .section __ex_table, \"a\" \n" " .align 4 \n" " .word 1b, 4b \n" " .previous \n" - : "=r"(res), "+r"(dst), "+r"(src), "=&r"(val), "=l"(hw_count) - : "g"(-EFAULT), "ir"(count), "4"(count) /* this "4" seeds lp_count */ + : "+r"(res), "+r"(dst), "+r"(src), "=r"(val) + : "g"(-EFAULT), "l"(count) : "memory"); return res; diff --git a/arch/arc/lib/memcmp.S b/arch/arc/lib/memcmp.S index 978bf8314dfb..a4015e7d9ab7 100644 --- a/arch/arc/lib/memcmp.S +++ b/arch/arc/lib/memcmp.S @@ -24,14 +24,32 @@ ENTRY(memcmp) ld r4,[r0,0] ld r5,[r1,0] lsr.f lp_count,r3,3 +#ifdef CONFIG_ISA_ARCV2 + /* In ARCv2 a branch can't be the last instruction in a zero overhead + * loop. + * So we move the branch to the start of the loop, duplicate it + * after the end, and set up r12 so that the branch isn't taken + * initially. + */ + mov_s r12,WORD2 + lpne .Loop_end + brne WORD2,r12,.Lodd + ld WORD2,[r0,4] +#else lpne .Loop_end ld_s WORD2,[r0,4] +#endif ld_s r12,[r1,4] brne r4,r5,.Leven ld.a r4,[r0,8] ld.a r5,[r1,8] +#ifdef CONFIG_ISA_ARCV2 +.Loop_end: + brne WORD2,r12,.Lodd +#else brne WORD2,r12,.Lodd .Loop_end: +#endif asl_s SHIFT,SHIFT,3 bhs_s .Last_cmp brne r4,r5,.Leven @@ -89,7 +107,6 @@ ENTRY(memcmp) bset.cs r0,r0,31 .Lodd: cmp_s WORD2,r12 - mov_s r0,1 j_s.d [blink] bset.cs r0,r0,31 @@ -100,14 +117,25 @@ ENTRY(memcmp) ldb r4,[r0,0] ldb r5,[r1,0] lsr.f lp_count,r3 +#ifdef CONFIG_ISA_ARCV2 + mov r12,r3 lpne .Lbyte_end + brne r3,r12,.Lbyte_odd +#else + lpne .Lbyte_end +#endif ldb_s r3,[r0,1] ldb r12,[r1,1] brne r4,r5,.Lbyte_even ldb.a r4,[r0,2] ldb.a r5,[r1,2] +#ifdef CONFIG_ISA_ARCV2 +.Lbyte_end: + brne r3,r12,.Lbyte_odd +#else brne r3,r12,.Lbyte_odd .Lbyte_end: +#endif bcc .Lbyte_even brne r4,r5,.Lbyte_even ldb_s r3,[r0,1] -- 1.9.1
WARNING: multiple messages have this Message-ID (diff)
From: Vineet Gupta <Vineet.Gupta1@synopsys.com> To: linux-arch@vger.kernel.org, linux-kernel@vger.kernel.org Cc: arnd@arndb.de, arc-linux-dev@synopsys.com, Vineet Gupta <Vineet.Gupta1@synopsys.com> Subject: [PATCH 10/28] ARCv2: Adhere to Zero Delay loop restriction Date: Tue, 9 Jun 2015 17:18:10 +0530 [thread overview] Message-ID: <1433850508-26317-11-git-send-email-vgupta@synopsys.com> (raw) In-Reply-To: <1433850508-26317-1-git-send-email-vgupta@synopsys.com> Branch insn can't be scheduled as last insn of Zero Overhead loop Signed-off-by: Vineet Gupta <vgupta@synopsys.com> --- arch/arc/include/asm/delay.h | 9 ++++----- arch/arc/include/asm/uaccess.h | 17 ++++++++--------- arch/arc/lib/memcmp.S | 30 +++++++++++++++++++++++++++++- 3 files changed, 41 insertions(+), 15 deletions(-) diff --git a/arch/arc/include/asm/delay.h b/arch/arc/include/asm/delay.h index 43de30256981..08e7e2a16ac1 100644 --- a/arch/arc/include/asm/delay.h +++ b/arch/arc/include/asm/delay.h @@ -22,11 +22,10 @@ static inline void __delay(unsigned long loops) { __asm__ __volatile__( - "1: sub.f %0, %0, 1 \n" - " jpnz 1b \n" - : "+r"(loops) - : - : "cc"); + " lp 1f \n" + " nop \n" + "1: \n" + : "+l"(loops)); } extern void __bad_udelay(void); diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h index 30c9baffa96f..d1da6032b715 100644 --- a/arch/arc/include/asm/uaccess.h +++ b/arch/arc/include/asm/uaccess.h @@ -659,31 +659,30 @@ static inline unsigned long __arc_clear_user(void __user *to, unsigned long n) static inline long __arc_strncpy_from_user(char *dst, const char __user *src, long count) { - long res = count; + long res = 0; char val; - unsigned int hw_count; if (count == 0) return 0; __asm__ __volatile__( - " lp 2f \n" + " lp 3f \n" "1: ldb.ab %3, [%2, 1] \n" - " breq.d %3, 0, 2f \n" + " breq.d %3, 0, 3f \n" " stb.ab %3, [%1, 1] \n" - "2: sub %0, %6, %4 \n" - "3: ;nop \n" + " add %0, %0, 1 # Num of NON NULL bytes copied \n" + "3: \n" " .section .fixup, \"ax\" \n" " .align 4 \n" - "4: mov %0, %5 \n" + "4: mov %0, %4 # sets @res as -EFAULT \n" " j 3b \n" " .previous \n" " .section __ex_table, \"a\" \n" " .align 4 \n" " .word 1b, 4b \n" " .previous \n" - : "=r"(res), "+r"(dst), "+r"(src), "=&r"(val), "=l"(hw_count) - : "g"(-EFAULT), "ir"(count), "4"(count) /* this "4" seeds lp_count */ + : "+r"(res), "+r"(dst), "+r"(src), "=r"(val) + : "g"(-EFAULT), "l"(count) : "memory"); return res; diff --git a/arch/arc/lib/memcmp.S b/arch/arc/lib/memcmp.S index 978bf8314dfb..a4015e7d9ab7 100644 --- a/arch/arc/lib/memcmp.S +++ b/arch/arc/lib/memcmp.S @@ -24,14 +24,32 @@ ENTRY(memcmp) ld r4,[r0,0] ld r5,[r1,0] lsr.f lp_count,r3,3 +#ifdef CONFIG_ISA_ARCV2 + /* In ARCv2 a branch can't be the last instruction in a zero overhead + * loop. + * So we move the branch to the start of the loop, duplicate it + * after the end, and set up r12 so that the branch isn't taken + * initially. + */ + mov_s r12,WORD2 + lpne .Loop_end + brne WORD2,r12,.Lodd + ld WORD2,[r0,4] +#else lpne .Loop_end ld_s WORD2,[r0,4] +#endif ld_s r12,[r1,4] brne r4,r5,.Leven ld.a r4,[r0,8] ld.a r5,[r1,8] +#ifdef CONFIG_ISA_ARCV2 +.Loop_end: + brne WORD2,r12,.Lodd +#else brne WORD2,r12,.Lodd .Loop_end: +#endif asl_s SHIFT,SHIFT,3 bhs_s .Last_cmp brne r4,r5,.Leven @@ -89,7 +107,6 @@ ENTRY(memcmp) bset.cs r0,r0,31 .Lodd: cmp_s WORD2,r12 - mov_s r0,1 j_s.d [blink] bset.cs r0,r0,31 @@ -100,14 +117,25 @@ ENTRY(memcmp) ldb r4,[r0,0] ldb r5,[r1,0] lsr.f lp_count,r3 +#ifdef CONFIG_ISA_ARCV2 + mov r12,r3 lpne .Lbyte_end + brne r3,r12,.Lbyte_odd +#else + lpne .Lbyte_end +#endif ldb_s r3,[r0,1] ldb r12,[r1,1] brne r4,r5,.Lbyte_even ldb.a r4,[r0,2] ldb.a r5,[r1,2] +#ifdef CONFIG_ISA_ARCV2 +.Lbyte_end: + brne r3,r12,.Lbyte_odd +#else brne r3,r12,.Lbyte_odd .Lbyte_end: +#endif bcc .Lbyte_even brne r4,r5,.Lbyte_even ldb_s r3,[r0,1] -- 1.9.1
next prev parent reply other threads:[~2015-06-09 11:58 UTC|newest] Thread overview: 109+ messages / expand[flat|nested] mbox.gz Atom feed top 2015-06-09 11:48 [PATCH 00/28] ARCv2 port to Linux - (B) ISA / Core / platform support Vineet Gupta 2015-06-09 11:48 ` Vineet Gupta 2015-06-09 11:48 ` [PATCH 01/28] ARCv2: [intc] HS38 core interrupt controller Vineet Gupta 2015-06-09 11:48 ` Vineet Gupta 2015-06-09 11:48 ` [PATCH 02/28] ARCv2: Support for ARCv2 ISA and HS38x cores Vineet Gupta 2015-06-09 11:48 ` Vineet Gupta 2015-06-09 11:48 ` [PATCH 03/28] ARCv2: STAR 9000793984: Handle return from intr to Delay Slot Vineet Gupta 2015-06-09 11:48 ` Vineet Gupta 2015-06-09 11:48 ` [PATCH 04/28] ARCv2: STAR 9000808988: signals involving " Vineet Gupta 2015-06-09 11:48 ` Vineet Gupta 2015-06-09 11:48 ` [PATCH 05/28] ARCv2: STAR 9000814690: Really Re-enable interrupts to avoid deadlocks Vineet Gupta 2015-06-09 11:48 ` Vineet Gupta 2015-06-09 11:48 ` [PATCH 06/28] ARCv2: MMUv4: TLB programming Model changes Vineet Gupta 2015-06-09 11:48 ` Vineet Gupta 2015-06-09 11:48 ` [PATCH 07/28] ARCv2: MMUv4: cache programming model changes Vineet Gupta 2015-06-09 11:48 ` Vineet Gupta 2015-06-09 11:48 ` [PATCH 08/28] ARCv2: MMUv4: support aliasing icache config Vineet Gupta 2015-06-09 11:48 ` Vineet Gupta 2015-06-09 11:48 ` [PATCH 09/28] ARCv2: optimised string/mem lib routines Vineet Gupta 2015-06-09 11:48 ` Vineet Gupta 2015-06-09 11:48 ` Vineet Gupta [this message] 2015-06-09 11:48 ` [PATCH 10/28] ARCv2: Adhere to Zero Delay loop restriction Vineet Gupta 2015-06-09 11:48 ` [PATCH 11/28] ARCv2: extable: Enable sorting at build time Vineet Gupta 2015-06-09 11:48 ` Vineet Gupta 2015-06-24 5:51 ` Vineet Gupta 2015-06-24 5:51 ` Vineet Gupta 2015-06-29 20:38 ` David Daney 2015-06-30 4:41 ` Vineet Gupta 2015-06-30 4:41 ` Vineet Gupta 2015-06-09 11:48 ` [PATCH 12/28] ARCv2: clocksource: Introduce 64bit local RTC counter Vineet Gupta 2015-06-09 11:48 ` Vineet Gupta 2015-06-09 11:48 ` [PATCH 13/28] ARC: make plat_smp_ops weak to allow over-rides Vineet Gupta 2015-06-09 11:48 ` Vineet Gupta 2015-06-09 11:48 ` [PATCH 14/28] ARCv2: SMP: ARConnect debug/robustness Vineet Gupta 2015-06-09 11:48 ` Vineet Gupta 2015-06-09 11:48 ` [PATCH 15/28] ARCv2: SMP: clocksource: Enable Global Real Time counter Vineet Gupta 2015-06-09 11:48 ` Vineet Gupta 2015-06-09 11:48 ` [PATCH 16/28] ARCv2: SMP: intc: IDU 2nd level intc for dynamic IRQ distribution Vineet Gupta 2015-06-09 11:48 ` Vineet Gupta 2015-06-09 11:48 ` [PATCH 17/28] ARC: add compiler barrier to LLSC based cmpxchg Vineet Gupta 2015-06-09 11:48 ` Vineet Gupta 2015-06-09 12:23 ` Peter Zijlstra 2015-06-09 11:48 ` [PATCH 18/28] ARC: add smp barriers around atomics per memory-barrriers.txt Vineet Gupta 2015-06-09 11:48 ` Vineet Gupta 2015-06-09 12:30 ` Peter Zijlstra 2015-06-10 9:17 ` Vineet Gupta 2015-06-10 10:53 ` Peter Zijlstra 2015-06-11 13:03 ` Vineet Gupta 2015-06-12 12:15 ` [PATCH v2] ARC: add smp barriers around atomics per Documentation/atomic_ops.txt Vineet Gupta 2015-06-12 12:15 ` Vineet Gupta 2015-06-12 13:04 ` Peter Zijlstra 2015-06-12 13:16 ` Vineet Gupta 2015-06-09 11:48 ` [PATCH 19/28] arch: conditionally define smp_{mb,rmb,wmb} Vineet Gupta 2015-06-09 11:48 ` Vineet Gupta 2015-06-09 12:32 ` Peter Zijlstra 2015-06-09 11:48 ` [PATCH 20/28] ARCv2: barriers Vineet Gupta 2015-06-09 11:48 ` Vineet Gupta 2015-06-09 12:40 ` Peter Zijlstra 2015-06-10 9:34 ` Vineet Gupta 2015-06-10 10:58 ` Peter Zijlstra 2015-06-10 13:01 ` Will Deacon 2015-06-11 12:13 ` Vineet Gupta 2015-06-11 13:39 ` Will Deacon 2015-06-19 13:13 ` Vineet Gupta 2015-06-19 13:13 ` Vineet Gupta 2015-06-19 13:13 ` Vineet Gupta 2015-06-22 13:36 ` Will Deacon 2015-06-22 13:36 ` Will Deacon 2015-06-22 13:36 ` Will Deacon 2015-06-23 7:58 ` [PATCH v2 " Vineet Gupta 2015-06-23 7:58 ` Vineet Gupta 2015-06-23 8:49 ` Will Deacon 2015-06-23 9:03 ` Vineet Gupta 2015-06-23 9:26 ` Will Deacon 2015-06-23 9:52 ` [PATCH v3 22/28] " Vineet Gupta 2015-06-23 9:52 ` Vineet Gupta 2015-06-23 16:28 ` Will Deacon 2015-06-23 9:25 ` [PATCH v2 20/28] " Peter Zijlstra 2015-06-23 8:02 ` [PATCH " Vineet Gupta 2015-06-09 11:48 ` [PATCH 21/28] ARC: Reduce bitops lines of code using macros Vineet Gupta 2015-06-09 11:48 ` Vineet Gupta 2015-06-12 12:20 ` [PATCH v2] " Vineet Gupta 2015-06-12 12:20 ` Vineet Gupta 2015-06-12 13:05 ` Peter Zijlstra 2015-06-09 11:48 ` [PATCH 22/28] ARCv2: STAR 9000837815 workaround hardware exclusive transactions livelock Vineet Gupta 2015-06-09 11:48 ` Vineet Gupta 2015-06-09 12:35 ` Peter Zijlstra 2015-06-10 10:01 ` Vineet Gupta 2015-06-10 11:02 ` Peter Zijlstra 2015-06-19 9:55 ` [PATCH v2 " Vineet Gupta 2015-06-19 9:55 ` Vineet Gupta 2015-06-19 9:59 ` Will Deacon 2015-06-19 10:09 ` Vineet Gupta 2015-06-23 7:59 ` Vineet Gupta 2015-06-23 7:59 ` Vineet Gupta 2015-06-09 11:48 ` [PATCH 23/28] ARCv2: SLC: Handle explcit flush for DMA ops (w/o IO-coherency) Vineet Gupta 2015-06-09 11:48 ` Vineet Gupta 2015-06-09 11:48 ` [PATCH 24/28] ARCv2: All bits in place, allow ARCv2 builds Vineet Gupta 2015-06-09 11:48 ` Vineet Gupta 2015-06-09 11:48 ` [PATCH 25/28] ARCv2: [nsim*hs*] Support simulation platforms for HS38x cores Vineet Gupta 2015-06-09 11:48 ` Vineet Gupta 2015-06-09 11:48 ` Vineet Gupta 2015-06-09 11:48 ` [PATCH 26/28] ARC: [axs101] Prepare for AXS103 Vineet Gupta 2015-06-09 11:48 ` Vineet Gupta 2015-06-09 11:48 ` [PATCH 27/28] ARCv2: [axs103] Support ARC SDP FPGA platform for HS38x cores Vineet Gupta 2015-06-09 11:48 ` Vineet Gupta 2015-06-09 11:48 ` Vineet Gupta 2015-06-09 11:48 ` [PATCH 28/28] ARCv2: [vdk] dts files and defconfig for HS38 VDK Vineet Gupta 2015-06-09 11:48 ` Vineet Gupta
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=1433850508-26317-11-git-send-email-vgupta@synopsys.com \ --to=vineet.gupta1@synopsys.com \ --cc=arc-linux-dev@synopsys.com \ --cc=arnd@arndb.de \ --cc=linux-arch@vger.kernel.org \ --cc=linux-kernel@vger.kernel.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.