All the mail mirrored from lore.kernel.org
 help / color / mirror / Atom feed
From: Andy Lutomirski <luto@kernel.org>
To: x86@kernel.org
Cc: Borislav Petkov <bp@suse.de>,
	Peter Zijlstra <peterz@infradead.org>,
	John Stultz <john.stultz@linaro.org>,
	linux-kernel@vger.kernel.org, Len Brown <lenb@kernel.org>,
	Huang Rui <ray.huang@amd.com>,
	Denys Vlasenko <dvlasenk@redhat.com>,
	kvm@vger.kernel.org, Ralf Baechle <ralf@linux-mips.org>,
	Andy Lutomirski <luto@kernel.org>
Subject: [PATCH v3 14/18] x86: Add rdtsc_ordered() and use it in trivial call sites
Date: Tue, 16 Jun 2015 17:36:02 -0700	[thread overview]
Message-ID: <dddbf98a2af53312e9aa73a5a2b1622fe5d6f52b.1434501121.git.luto@kernel.org> (raw)
In-Reply-To: <cover.1434501120.git.luto@kernel.org>
In-Reply-To: <cover.1434501120.git.luto@kernel.org>

rdtsc_barrier(); rdtsc() is an unnecessary mouthful and requires
more thought than should be necessary.  Add an rdtsc_ordered()
helper and replace the trivial call sites with it.

This should not change generated code.  The duplication of the fence
asm is temporary.

Signed-off-by: Andy Lutomirski <luto@kernel.org>
---
 arch/x86/entry/vdso/vclock_gettime.c | 16 ++--------------
 arch/x86/include/asm/msr.h           | 26 ++++++++++++++++++++++++++
 arch/x86/kernel/trace_clock.c        |  7 +------
 arch/x86/kvm/x86.c                   | 16 ++--------------
 arch/x86/lib/delay.c                 |  9 +++------
 5 files changed, 34 insertions(+), 40 deletions(-)

diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c
index 0340d93c18ca..ca94fa649251 100644
--- a/arch/x86/entry/vdso/vclock_gettime.c
+++ b/arch/x86/entry/vdso/vclock_gettime.c
@@ -175,20 +175,8 @@ static notrace cycle_t vread_pvclock(int *mode)
 
 notrace static cycle_t vread_tsc(void)
 {
-	cycle_t ret;
-	u64 last;
-
-	/*
-	 * Empirically, a fence (of type that depends on the CPU)
-	 * before rdtsc is enough to ensure that rdtsc is ordered
-	 * with respect to loads.  The various CPU manuals are unclear
-	 * as to whether rdtsc can be reordered with later loads,
-	 * but no one has ever seen it happen.
-	 */
-	rdtsc_barrier();
-	ret = (cycle_t)rdtsc();
-
-	last = gtod->cycle_last;
+	cycle_t ret = (cycle_t)rdtsc_ordered();
+	u64 last = gtod->cycle_last;
 
 	if (likely(ret >= last))
 		return ret;
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index ff0c120dafe5..02bdd6c65017 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -127,6 +127,32 @@ static __always_inline unsigned long long rdtsc(void)
 	return EAX_EDX_VAL(val, low, high);
 }
 
+/**
+ * rdtsc_ordered() - read the current TSC in program order
+ *
+ * rdtsc_ordered() returns the result of RDTSC as a 64-bit integer.
+ * It is ordered like a load to a global in-memory counter.  It should
+ * be impossible to observe non-monotonic rdtsc_unordered() behavior
+ * across multiple CPUs as long as the TSC is synced.
+ */
+static __always_inline unsigned long long rdtsc_ordered(void)
+{
+	/*
+	 * The RDTSC instruction is not ordered relative to memory
+	 * access.  The Intel SDM and the AMD APM are both vague on this
+	 * point, but empirically an RDTSC instruction can be
+	 * speculatively executed before prior loads.  An RDTSC
+	 * immediately after an appropriate barrier appears to be
+	 * ordered as a normal load, that is, it provides the same
+	 * ordering guarantees as reading from a global memory location
+	 * that some other imaginary CPU is updating continuously with a
+	 * time stamp.
+	 */
+	alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC,
+			  "lfence", X86_FEATURE_LFENCE_RDTSC);
+	return rdtsc();
+}
+
 static inline unsigned long long native_read_pmc(int counter)
 {
 	DECLARE_ARGS(val, low, high);
diff --git a/arch/x86/kernel/trace_clock.c b/arch/x86/kernel/trace_clock.c
index 67efb8c96fc4..80bb24d9b880 100644
--- a/arch/x86/kernel/trace_clock.c
+++ b/arch/x86/kernel/trace_clock.c
@@ -12,10 +12,5 @@
  */
 u64 notrace trace_clock_x86_tsc(void)
 {
-	u64 ret;
-
-	rdtsc_barrier();
-	ret = rdtsc();
-
-	return ret;
+	return rdtsc_ordered();
 }
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b0afdc74c28a..dfccaf2f2e00 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1419,20 +1419,8 @@ EXPORT_SYMBOL_GPL(kvm_write_tsc);
 
 static cycle_t read_tsc(void)
 {
-	cycle_t ret;
-	u64 last;
-
-	/*
-	 * Empirically, a fence (of type that depends on the CPU)
-	 * before rdtsc is enough to ensure that rdtsc is ordered
-	 * with respect to loads.  The various CPU manuals are unclear
-	 * as to whether rdtsc can be reordered with later loads,
-	 * but no one has ever seen it happen.
-	 */
-	rdtsc_barrier();
-	ret = (cycle_t)rdtsc();
-
-	last = pvclock_gtod_data.clock.cycle_last;
+	cycle_t ret = (cycle_t)rdtsc_ordered();
+	u64 last = pvclock_gtod_data.clock.cycle_last;
 
 	if (likely(ret >= last))
 		return ret;
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c
index f24bc59ab0a0..4453d52a143d 100644
--- a/arch/x86/lib/delay.c
+++ b/arch/x86/lib/delay.c
@@ -54,11 +54,9 @@ static void delay_tsc(unsigned long __loops)
 
 	preempt_disable();
 	cpu = smp_processor_id();
-	rdtsc_barrier();
-	bclock = rdtsc();
+	bclock = rdtsc_ordered();
 	for (;;) {
-		rdtsc_barrier();
-		now = rdtsc();
+		now = rdtsc_ordered();
 		if ((now - bclock) >= loops)
 			break;
 
@@ -79,8 +77,7 @@ static void delay_tsc(unsigned long __loops)
 		if (unlikely(cpu != smp_processor_id())) {
 			loops -= (now - bclock);
 			cpu = smp_processor_id();
-			rdtsc_barrier();
-			bclock = rdtsc();
+			bclock = rdtsc_ordered();
 		}
 	}
 	preempt_enable();
-- 
2.4.2


  parent reply	other threads:[~2015-06-17  0:37 UTC|newest]

Thread overview: 67+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-06-17  0:35 [PATCH v3 00/18] x86/tsc: Clean up rdtsc helpers Andy Lutomirski
2015-06-17  0:35 ` [PATCH v3 01/18] x86/tsc: Inline native_read_tsc and remove __native_read_tsc Andy Lutomirski
2015-06-17  9:26   ` Borislav Petkov
2015-07-06 15:39   ` [tip:x86/asm] x86/asm/tsc: Inline native_read_tsc() and remove __native_read_tsc() tip-bot for Andy Lutomirski
2015-06-17  0:35 ` [PATCH v3 02/18] x86/msr/kvm: Remove vget_cycles() Andy Lutomirski
2015-06-17  9:42   ` Borislav Petkov
2015-06-17 13:34   ` Paolo Bonzini
2015-07-06 15:40   ` [tip:x86/asm] x86/asm/tsc, kvm: " tip-bot for Andy Lutomirski
2015-06-17  0:35 ` [PATCH v3 03/18] x86/tsc/paravirt: Remove the read_tsc and read_tscp paravirt hooks Andy Lutomirski
2015-06-17  9:56   ` Borislav Petkov
2015-06-17  9:56     ` Borislav Petkov
2015-06-19 15:32   ` Borislav Petkov
2015-06-19 15:32     ` Borislav Petkov
2015-06-19 16:14     ` Andy Lutomirski
2015-06-19 16:14       ` Andy Lutomirski
2015-06-19 17:13       ` Borislav Petkov
2015-06-19 17:13         ` Borislav Petkov
2015-07-06 15:40   ` [tip:x86/asm] x86/asm/tsc, x86/paravirt: Remove read_tsc() and read_tscp() " tip-bot for Andy Lutomirski
2015-06-17  0:35 ` [PATCH v3 04/18] x86/tsc: Replace rdtscll with native_read_tsc Andy Lutomirski
2015-06-17 10:03   ` Borislav Petkov
2015-07-06 15:40   ` [tip:x86/asm] x86/asm/tsc: Replace rdtscll() with native_read_tsc () tip-bot for Andy Lutomirski
2015-06-17  0:35 ` [PATCH v3 05/18] x86/tsc: Remove the rdtscp and rdtscpll macros Andy Lutomirski
2015-07-06 15:41   ` [tip:x86/asm] x86/asm/tsc: Remove the rdtscp() and rdtscpll() macros tip-bot for Andy Lutomirski
2015-06-17  0:35 ` [PATCH v3 06/18] x86/tsc: Use the full 64-bit tsc in tsc_delay Andy Lutomirski
2015-07-06 15:41   ` [tip:x86/asm] x86/asm/tsc: Use the full 64-bit TSC in delay_tsc() tip-bot for Andy Lutomirski
2015-06-17  0:35 ` [PATCH v3 07/18] x86/cpu/amd: Use the full 64-bit TSC to detect the 2.6.2 bug Andy Lutomirski
2015-07-06 15:41   ` [tip:x86/asm] x86/asm/tsc, " tip-bot for Andy Lutomirski
2015-06-17  0:35 ` [PATCH v3 08/18] baycom_epp: Replace rdtscl() with native_read_tsc() Andy Lutomirski
2015-06-17  0:49   ` Thomas Sailer
2015-06-20 13:54   ` walter harms
2015-06-20 13:54     ` walter harms
2015-06-20 14:14     ` Thomas Gleixner
2015-06-20 14:14       ` Thomas Gleixner
2015-06-20 14:26       ` Andy Lutomirski
2015-06-20 14:26         ` Andy Lutomirski
2015-06-20 16:30         ` Thomas Gleixner
2015-06-20 16:30           ` Thomas Gleixner
2015-07-06 15:42   ` [tip:x86/asm] x86/asm/tsc, drivers/net/hamradio/baycom_epp: " tip-bot for Andy Lutomirski
2015-06-17  0:35 ` [PATCH v3 09/18] staging/lirc_serial: Remove TSC-based timing Andy Lutomirski
2015-06-17  0:35   ` Andy Lutomirski
2015-07-06 15:42   ` [tip:x86/asm] x86/asm/tsc, " tip-bot for Andy Lutomirski
2015-06-17  0:35 ` [PATCH v3 10/18] input/joystick/analog: Switch from rdtscl() to native_read_tsc() Andy Lutomirski
2015-07-06 15:42   ` [tip:x86/asm] x86/asm/tsc, " tip-bot for Andy Lutomirski
2015-06-17  0:35 ` [PATCH v3 11/18] drivers/input/gameport: Replace rdtscl() with native_read_tsc() Andy Lutomirski
2015-07-06 15:43   ` [tip:x86/asm] x86/asm/tsc, drivers/input/gameport: Replace rdtscl () " tip-bot for Andy Lutomirski
2015-06-17  0:36 ` [PATCH v3 12/18] x86/tsc: Remove rdtscl() Andy Lutomirski
2015-07-06 15:43   ` [tip:x86/asm] x86/asm/tsc: " tip-bot for Andy Lutomirski
2015-06-17  0:36 ` [PATCH v3 13/18] x86/tsc: Rename native_read_tsc() to rdtsc() Andy Lutomirski
2015-06-24 21:38   ` Borislav Petkov
2015-07-06 15:43   ` [tip:x86/asm] x86/asm/tsc: " tip-bot for Andy Lutomirski
2015-06-17  0:36 ` Andy Lutomirski [this message]
2015-07-06 15:44   ` [tip:x86/asm] x86/asm/tsc: Add rdtsc_ordered() and use it in trivial call sites tip-bot for Andy Lutomirski
2015-08-21  7:45   ` [tip:x86/asm] x86/asm/tsc: Add rdtscll() merge helper tip-bot for Ingo Molnar
2015-06-17  0:36 ` [PATCH v3 15/18] x86/tsc: Use rdtsc_ordered() in check_tsc_warp() and drop extra barriers Andy Lutomirski
2015-07-06 15:44   ` [tip:x86/asm] x86/asm/tsc/sync: " tip-bot for Andy Lutomirski
2015-06-17  0:36 ` [PATCH v3 16/18] x86/tsc: In read_tsc, use rdtsc_ordered() instead of get_cycles() Andy Lutomirski
2015-07-06 15:44   ` [tip:x86/asm] x86/asm/tsc: Use rdtsc_ordered() in read_tsc() " tip-bot for Andy Lutomirski
2015-06-17  0:36 ` [PATCH v3 17/18] x86/kvm/tsc: Drop extra barrier and use rdtsc_ordered in kvmclock Andy Lutomirski
2015-06-17  7:47   ` Paolo Bonzini
2015-06-17 13:31     ` Paolo Bonzini
2015-06-20 21:50     ` Borislav Petkov
2015-06-20 21:50       ` Borislav Petkov
2015-07-06 15:45   ` [tip:x86/asm] x86/asm/tsc, x86/kvm: Drop open-coded barrier and use rdtsc_ordered() " tip-bot for Andy Lutomirski
2015-06-17  0:36 ` [PATCH v3 18/18] x86/tsc: Remove rdtsc_barrier() Andy Lutomirski
2015-07-06 15:45   ` [tip:x86/asm] x86/asm/tsc: " tip-bot for Andy Lutomirski
2015-06-17 11:11 ` [PATCH v3 00/18] x86/tsc: Clean up rdtsc helpers Borislav Petkov
2015-06-17 13:37   ` Paolo Bonzini

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=dddbf98a2af53312e9aa73a5a2b1622fe5d6f52b.1434501121.git.luto@kernel.org \
    --to=luto@kernel.org \
    --cc=bp@suse.de \
    --cc=dvlasenk@redhat.com \
    --cc=john.stultz@linaro.org \
    --cc=kvm@vger.kernel.org \
    --cc=lenb@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=peterz@infradead.org \
    --cc=ralf@linux-mips.org \
    --cc=ray.huang@amd.com \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.