All the mail mirrored from lore.kernel.org
 help / color / mirror / Atom feed
From: Vineet Gupta <Vineet.Gupta1@synopsys.com>
To: <linux-arch@vger.kernel.org>, <linux-kernel@vger.kernel.org>
Cc: <arnd@arndb.de>, <arc-linux-dev@synopsys.com>,
	Vineet Gupta <Vineet.Gupta1@synopsys.com>
Subject: [PATCH 10/28] ARCv2: Adhere to Zero Delay loop restriction
Date: Tue, 9 Jun 2015 17:18:10 +0530	[thread overview]
Message-ID: <1433850508-26317-11-git-send-email-vgupta@synopsys.com> (raw)
In-Reply-To: <1433850508-26317-1-git-send-email-vgupta@synopsys.com>

Branch insn can't be scheduled as last insn of Zero Overhead loop

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/delay.h   |  9 ++++-----
 arch/arc/include/asm/uaccess.h | 17 ++++++++---------
 arch/arc/lib/memcmp.S          | 30 +++++++++++++++++++++++++++++-
 3 files changed, 41 insertions(+), 15 deletions(-)

diff --git a/arch/arc/include/asm/delay.h b/arch/arc/include/asm/delay.h
index 43de30256981..08e7e2a16ac1 100644
--- a/arch/arc/include/asm/delay.h
+++ b/arch/arc/include/asm/delay.h
@@ -22,11 +22,10 @@
 static inline void __delay(unsigned long loops)
 {
 	__asm__ __volatile__(
-	"1:	sub.f %0, %0, 1	\n"
-	"	jpnz 1b		\n"
-	: "+r"(loops)
-	:
-	: "cc");
+	"	lp  1f	\n"
+	"	nop	\n"
+	"1:		\n"
+	: "+l"(loops));
 }
 
 extern void __bad_udelay(void);
diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h
index 30c9baffa96f..d1da6032b715 100644
--- a/arch/arc/include/asm/uaccess.h
+++ b/arch/arc/include/asm/uaccess.h
@@ -659,31 +659,30 @@ static inline unsigned long __arc_clear_user(void __user *to, unsigned long n)
 static inline long
 __arc_strncpy_from_user(char *dst, const char __user *src, long count)
 {
-	long res = count;
+	long res = 0;
 	char val;
-	unsigned int hw_count;
 
 	if (count == 0)
 		return 0;
 
 	__asm__ __volatile__(
-	"	lp 2f		\n"
+	"	lp	3f			\n"
 	"1:	ldb.ab  %3, [%2, 1]		\n"
-	"	breq.d  %3, 0, 2f		\n"
+	"	breq.d	%3, 0, 3f               \n"
 	"	stb.ab  %3, [%1, 1]		\n"
-	"2:	sub %0, %6, %4			\n"
-	"3:	;nop				\n"
+	"	add	%0, %0, 1	# Num of NON NULL bytes copied	\n"
+	"3:								\n"
 	"	.section .fixup, \"ax\"		\n"
 	"	.align 4			\n"
-	"4:	mov %0, %5			\n"
+	"4:	mov %0, %4		# sets @res as -EFAULT	\n"
 	"	j   3b				\n"
 	"	.previous			\n"
 	"	.section __ex_table, \"a\"	\n"
 	"	.align 4			\n"
 	"	.word   1b, 4b			\n"
 	"	.previous			\n"
-	: "=r"(res), "+r"(dst), "+r"(src), "=&r"(val), "=l"(hw_count)
-	: "g"(-EFAULT), "ir"(count), "4"(count)	/* this "4" seeds lp_count */
+	: "+r"(res), "+r"(dst), "+r"(src), "=r"(val)
+	: "g"(-EFAULT), "l"(count)
 	: "memory");
 
 	return res;
diff --git a/arch/arc/lib/memcmp.S b/arch/arc/lib/memcmp.S
index 978bf8314dfb..a4015e7d9ab7 100644
--- a/arch/arc/lib/memcmp.S
+++ b/arch/arc/lib/memcmp.S
@@ -24,14 +24,32 @@ ENTRY(memcmp)
 	ld	r4,[r0,0]
 	ld	r5,[r1,0]
 	lsr.f	lp_count,r3,3
+#ifdef CONFIG_ISA_ARCV2
+	/* In ARCv2 a branch can't be the last instruction in a zero overhead
+	 * loop.
+	 * So we move the branch to the start of the loop, duplicate it
+	 * after the end, and set up r12 so that the branch isn't taken
+	 *  initially.
+	 */
+	mov_s	r12,WORD2
+	lpne	.Loop_end
+	brne	WORD2,r12,.Lodd
+	ld	WORD2,[r0,4]
+#else
 	lpne	.Loop_end
 	ld_s	WORD2,[r0,4]
+#endif
 	ld_s	r12,[r1,4]
 	brne	r4,r5,.Leven
 	ld.a	r4,[r0,8]
 	ld.a	r5,[r1,8]
+#ifdef CONFIG_ISA_ARCV2
+.Loop_end:
+	brne	WORD2,r12,.Lodd
+#else
 	brne	WORD2,r12,.Lodd
 .Loop_end:
+#endif
 	asl_s	SHIFT,SHIFT,3
 	bhs_s	.Last_cmp
 	brne	r4,r5,.Leven
@@ -89,7 +107,6 @@ ENTRY(memcmp)
 	bset.cs	r0,r0,31
 .Lodd:
 	cmp_s	WORD2,r12
-
 	mov_s	r0,1
 	j_s.d	[blink]
 	bset.cs	r0,r0,31
@@ -100,14 +117,25 @@ ENTRY(memcmp)
 	ldb	r4,[r0,0]
 	ldb	r5,[r1,0]
 	lsr.f	lp_count,r3
+#ifdef CONFIG_ISA_ARCV2
+	mov	r12,r3
 	lpne	.Lbyte_end
+	brne	r3,r12,.Lbyte_odd
+#else
+	lpne	.Lbyte_end
+#endif
 	ldb_s	r3,[r0,1]
 	ldb	r12,[r1,1]
 	brne	r4,r5,.Lbyte_even
 	ldb.a	r4,[r0,2]
 	ldb.a	r5,[r1,2]
+#ifdef CONFIG_ISA_ARCV2
+.Lbyte_end:
+	brne	r3,r12,.Lbyte_odd
+#else
 	brne	r3,r12,.Lbyte_odd
 .Lbyte_end:
+#endif
 	bcc	.Lbyte_even
 	brne	r4,r5,.Lbyte_even
 	ldb_s	r3,[r0,1]
-- 
1.9.1


WARNING: multiple messages have this Message-ID (diff)
From: Vineet Gupta <Vineet.Gupta1@synopsys.com>
To: linux-arch@vger.kernel.org, linux-kernel@vger.kernel.org
Cc: arnd@arndb.de, arc-linux-dev@synopsys.com,
	Vineet Gupta <Vineet.Gupta1@synopsys.com>
Subject: [PATCH 10/28] ARCv2: Adhere to Zero Delay loop restriction
Date: Tue, 9 Jun 2015 17:18:10 +0530	[thread overview]
Message-ID: <1433850508-26317-11-git-send-email-vgupta@synopsys.com> (raw)
In-Reply-To: <1433850508-26317-1-git-send-email-vgupta@synopsys.com>

Branch insn can't be scheduled as last insn of Zero Overhead loop

Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
---
 arch/arc/include/asm/delay.h   |  9 ++++-----
 arch/arc/include/asm/uaccess.h | 17 ++++++++---------
 arch/arc/lib/memcmp.S          | 30 +++++++++++++++++++++++++++++-
 3 files changed, 41 insertions(+), 15 deletions(-)

diff --git a/arch/arc/include/asm/delay.h b/arch/arc/include/asm/delay.h
index 43de30256981..08e7e2a16ac1 100644
--- a/arch/arc/include/asm/delay.h
+++ b/arch/arc/include/asm/delay.h
@@ -22,11 +22,10 @@
 static inline void __delay(unsigned long loops)
 {
 	__asm__ __volatile__(
-	"1:	sub.f %0, %0, 1	\n"
-	"	jpnz 1b		\n"
-	: "+r"(loops)
-	:
-	: "cc");
+	"	lp  1f	\n"
+	"	nop	\n"
+	"1:		\n"
+	: "+l"(loops));
 }
 
 extern void __bad_udelay(void);
diff --git a/arch/arc/include/asm/uaccess.h b/arch/arc/include/asm/uaccess.h
index 30c9baffa96f..d1da6032b715 100644
--- a/arch/arc/include/asm/uaccess.h
+++ b/arch/arc/include/asm/uaccess.h
@@ -659,31 +659,30 @@ static inline unsigned long __arc_clear_user(void __user *to, unsigned long n)
 static inline long
 __arc_strncpy_from_user(char *dst, const char __user *src, long count)
 {
-	long res = count;
+	long res = 0;
 	char val;
-	unsigned int hw_count;
 
 	if (count == 0)
 		return 0;
 
 	__asm__ __volatile__(
-	"	lp 2f		\n"
+	"	lp	3f			\n"
 	"1:	ldb.ab  %3, [%2, 1]		\n"
-	"	breq.d  %3, 0, 2f		\n"
+	"	breq.d	%3, 0, 3f               \n"
 	"	stb.ab  %3, [%1, 1]		\n"
-	"2:	sub %0, %6, %4			\n"
-	"3:	;nop				\n"
+	"	add	%0, %0, 1	# Num of NON NULL bytes copied	\n"
+	"3:								\n"
 	"	.section .fixup, \"ax\"		\n"
 	"	.align 4			\n"
-	"4:	mov %0, %5			\n"
+	"4:	mov %0, %4		# sets @res as -EFAULT	\n"
 	"	j   3b				\n"
 	"	.previous			\n"
 	"	.section __ex_table, \"a\"	\n"
 	"	.align 4			\n"
 	"	.word   1b, 4b			\n"
 	"	.previous			\n"
-	: "=r"(res), "+r"(dst), "+r"(src), "=&r"(val), "=l"(hw_count)
-	: "g"(-EFAULT), "ir"(count), "4"(count)	/* this "4" seeds lp_count */
+	: "+r"(res), "+r"(dst), "+r"(src), "=r"(val)
+	: "g"(-EFAULT), "l"(count)
 	: "memory");
 
 	return res;
diff --git a/arch/arc/lib/memcmp.S b/arch/arc/lib/memcmp.S
index 978bf8314dfb..a4015e7d9ab7 100644
--- a/arch/arc/lib/memcmp.S
+++ b/arch/arc/lib/memcmp.S
@@ -24,14 +24,32 @@ ENTRY(memcmp)
 	ld	r4,[r0,0]
 	ld	r5,[r1,0]
 	lsr.f	lp_count,r3,3
+#ifdef CONFIG_ISA_ARCV2
+	/* In ARCv2 a branch can't be the last instruction in a zero overhead
+	 * loop.
+	 * So we move the branch to the start of the loop, duplicate it
+	 * after the end, and set up r12 so that the branch isn't taken
+	 *  initially.
+	 */
+	mov_s	r12,WORD2
+	lpne	.Loop_end
+	brne	WORD2,r12,.Lodd
+	ld	WORD2,[r0,4]
+#else
 	lpne	.Loop_end
 	ld_s	WORD2,[r0,4]
+#endif
 	ld_s	r12,[r1,4]
 	brne	r4,r5,.Leven
 	ld.a	r4,[r0,8]
 	ld.a	r5,[r1,8]
+#ifdef CONFIG_ISA_ARCV2
+.Loop_end:
+	brne	WORD2,r12,.Lodd
+#else
 	brne	WORD2,r12,.Lodd
 .Loop_end:
+#endif
 	asl_s	SHIFT,SHIFT,3
 	bhs_s	.Last_cmp
 	brne	r4,r5,.Leven
@@ -89,7 +107,6 @@ ENTRY(memcmp)
 	bset.cs	r0,r0,31
 .Lodd:
 	cmp_s	WORD2,r12
-
 	mov_s	r0,1
 	j_s.d	[blink]
 	bset.cs	r0,r0,31
@@ -100,14 +117,25 @@ ENTRY(memcmp)
 	ldb	r4,[r0,0]
 	ldb	r5,[r1,0]
 	lsr.f	lp_count,r3
+#ifdef CONFIG_ISA_ARCV2
+	mov	r12,r3
 	lpne	.Lbyte_end
+	brne	r3,r12,.Lbyte_odd
+#else
+	lpne	.Lbyte_end
+#endif
 	ldb_s	r3,[r0,1]
 	ldb	r12,[r1,1]
 	brne	r4,r5,.Lbyte_even
 	ldb.a	r4,[r0,2]
 	ldb.a	r5,[r1,2]
+#ifdef CONFIG_ISA_ARCV2
+.Lbyte_end:
+	brne	r3,r12,.Lbyte_odd
+#else
 	brne	r3,r12,.Lbyte_odd
 .Lbyte_end:
+#endif
 	bcc	.Lbyte_even
 	brne	r4,r5,.Lbyte_even
 	ldb_s	r3,[r0,1]
-- 
1.9.1

  parent reply	other threads:[~2015-06-09 11:58 UTC|newest]

Thread overview: 109+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-06-09 11:48 [PATCH 00/28] ARCv2 port to Linux - (B) ISA / Core / platform support Vineet Gupta
2015-06-09 11:48 ` Vineet Gupta
2015-06-09 11:48 ` [PATCH 01/28] ARCv2: [intc] HS38 core interrupt controller Vineet Gupta
2015-06-09 11:48   ` Vineet Gupta
2015-06-09 11:48 ` [PATCH 02/28] ARCv2: Support for ARCv2 ISA and HS38x cores Vineet Gupta
2015-06-09 11:48   ` Vineet Gupta
2015-06-09 11:48 ` [PATCH 03/28] ARCv2: STAR 9000793984: Handle return from intr to Delay Slot Vineet Gupta
2015-06-09 11:48   ` Vineet Gupta
2015-06-09 11:48 ` [PATCH 04/28] ARCv2: STAR 9000808988: signals involving " Vineet Gupta
2015-06-09 11:48   ` Vineet Gupta
2015-06-09 11:48 ` [PATCH 05/28] ARCv2: STAR 9000814690: Really Re-enable interrupts to avoid deadlocks Vineet Gupta
2015-06-09 11:48   ` Vineet Gupta
2015-06-09 11:48 ` [PATCH 06/28] ARCv2: MMUv4: TLB programming Model changes Vineet Gupta
2015-06-09 11:48   ` Vineet Gupta
2015-06-09 11:48 ` [PATCH 07/28] ARCv2: MMUv4: cache programming model changes Vineet Gupta
2015-06-09 11:48   ` Vineet Gupta
2015-06-09 11:48 ` [PATCH 08/28] ARCv2: MMUv4: support aliasing icache config Vineet Gupta
2015-06-09 11:48   ` Vineet Gupta
2015-06-09 11:48 ` [PATCH 09/28] ARCv2: optimised string/mem lib routines Vineet Gupta
2015-06-09 11:48   ` Vineet Gupta
2015-06-09 11:48 ` Vineet Gupta [this message]
2015-06-09 11:48   ` [PATCH 10/28] ARCv2: Adhere to Zero Delay loop restriction Vineet Gupta
2015-06-09 11:48 ` [PATCH 11/28] ARCv2: extable: Enable sorting at build time Vineet Gupta
2015-06-09 11:48   ` Vineet Gupta
2015-06-24  5:51   ` Vineet Gupta
2015-06-24  5:51     ` Vineet Gupta
2015-06-29 20:38     ` David Daney
2015-06-30  4:41       ` Vineet Gupta
2015-06-30  4:41         ` Vineet Gupta
2015-06-09 11:48 ` [PATCH 12/28] ARCv2: clocksource: Introduce 64bit local RTC counter Vineet Gupta
2015-06-09 11:48   ` Vineet Gupta
2015-06-09 11:48 ` [PATCH 13/28] ARC: make plat_smp_ops weak to allow over-rides Vineet Gupta
2015-06-09 11:48   ` Vineet Gupta
2015-06-09 11:48 ` [PATCH 14/28] ARCv2: SMP: ARConnect debug/robustness Vineet Gupta
2015-06-09 11:48   ` Vineet Gupta
2015-06-09 11:48 ` [PATCH 15/28] ARCv2: SMP: clocksource: Enable Global Real Time counter Vineet Gupta
2015-06-09 11:48   ` Vineet Gupta
2015-06-09 11:48 ` [PATCH 16/28] ARCv2: SMP: intc: IDU 2nd level intc for dynamic IRQ distribution Vineet Gupta
2015-06-09 11:48   ` Vineet Gupta
2015-06-09 11:48 ` [PATCH 17/28] ARC: add compiler barrier to LLSC based cmpxchg Vineet Gupta
2015-06-09 11:48   ` Vineet Gupta
2015-06-09 12:23   ` Peter Zijlstra
2015-06-09 11:48 ` [PATCH 18/28] ARC: add smp barriers around atomics per memory-barrriers.txt Vineet Gupta
2015-06-09 11:48   ` Vineet Gupta
2015-06-09 12:30   ` Peter Zijlstra
2015-06-10  9:17     ` Vineet Gupta
2015-06-10 10:53       ` Peter Zijlstra
2015-06-11 13:03         ` Vineet Gupta
2015-06-12 12:15   ` [PATCH v2] ARC: add smp barriers around atomics per Documentation/atomic_ops.txt Vineet Gupta
2015-06-12 12:15     ` Vineet Gupta
2015-06-12 13:04     ` Peter Zijlstra
2015-06-12 13:16       ` Vineet Gupta
2015-06-09 11:48 ` [PATCH 19/28] arch: conditionally define smp_{mb,rmb,wmb} Vineet Gupta
2015-06-09 11:48   ` Vineet Gupta
2015-06-09 12:32   ` Peter Zijlstra
2015-06-09 11:48 ` [PATCH 20/28] ARCv2: barriers Vineet Gupta
2015-06-09 11:48   ` Vineet Gupta
2015-06-09 12:40   ` Peter Zijlstra
2015-06-10  9:34     ` Vineet Gupta
2015-06-10 10:58       ` Peter Zijlstra
2015-06-10 13:01         ` Will Deacon
2015-06-11 12:13           ` Vineet Gupta
2015-06-11 13:39             ` Will Deacon
2015-06-19 13:13               ` Vineet Gupta
2015-06-19 13:13                 ` Vineet Gupta
2015-06-19 13:13                 ` Vineet Gupta
2015-06-22 13:36                 ` Will Deacon
2015-06-22 13:36                   ` Will Deacon
2015-06-22 13:36                   ` Will Deacon
2015-06-23  7:58                   ` [PATCH v2 " Vineet Gupta
2015-06-23  7:58                     ` Vineet Gupta
2015-06-23  8:49                     ` Will Deacon
2015-06-23  9:03                       ` Vineet Gupta
2015-06-23  9:26                         ` Will Deacon
2015-06-23  9:52                           ` [PATCH v3 22/28] " Vineet Gupta
2015-06-23  9:52                             ` Vineet Gupta
2015-06-23 16:28                             ` Will Deacon
2015-06-23  9:25                     ` [PATCH v2 20/28] " Peter Zijlstra
2015-06-23  8:02                   ` [PATCH " Vineet Gupta
2015-06-09 11:48 ` [PATCH 21/28] ARC: Reduce bitops lines of code using macros Vineet Gupta
2015-06-09 11:48   ` Vineet Gupta
2015-06-12 12:20   ` [PATCH v2] " Vineet Gupta
2015-06-12 12:20     ` Vineet Gupta
2015-06-12 13:05     ` Peter Zijlstra
2015-06-09 11:48 ` [PATCH 22/28] ARCv2: STAR 9000837815 workaround hardware exclusive transactions livelock Vineet Gupta
2015-06-09 11:48   ` Vineet Gupta
2015-06-09 12:35   ` Peter Zijlstra
2015-06-10 10:01     ` Vineet Gupta
2015-06-10 11:02       ` Peter Zijlstra
2015-06-19  9:55         ` [PATCH v2 " Vineet Gupta
2015-06-19  9:55           ` Vineet Gupta
2015-06-19  9:59           ` Will Deacon
2015-06-19 10:09             ` Vineet Gupta
2015-06-23  7:59             ` Vineet Gupta
2015-06-23  7:59               ` Vineet Gupta
2015-06-09 11:48 ` [PATCH 23/28] ARCv2: SLC: Handle explcit flush for DMA ops (w/o IO-coherency) Vineet Gupta
2015-06-09 11:48   ` Vineet Gupta
2015-06-09 11:48 ` [PATCH 24/28] ARCv2: All bits in place, allow ARCv2 builds Vineet Gupta
2015-06-09 11:48   ` Vineet Gupta
2015-06-09 11:48 ` [PATCH 25/28] ARCv2: [nsim*hs*] Support simulation platforms for HS38x cores Vineet Gupta
2015-06-09 11:48   ` Vineet Gupta
2015-06-09 11:48   ` Vineet Gupta
2015-06-09 11:48 ` [PATCH 26/28] ARC: [axs101] Prepare for AXS103 Vineet Gupta
2015-06-09 11:48   ` Vineet Gupta
2015-06-09 11:48 ` [PATCH 27/28] ARCv2: [axs103] Support ARC SDP FPGA platform for HS38x cores Vineet Gupta
2015-06-09 11:48   ` Vineet Gupta
2015-06-09 11:48   ` Vineet Gupta
2015-06-09 11:48 ` [PATCH 28/28] ARCv2: [vdk] dts files and defconfig for HS38 VDK Vineet Gupta
2015-06-09 11:48   ` Vineet Gupta

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1433850508-26317-11-git-send-email-vgupta@synopsys.com \
    --to=vineet.gupta1@synopsys.com \
    --cc=arc-linux-dev@synopsys.com \
    --cc=arnd@arndb.de \
    --cc=linux-arch@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.