LKML Archive mirror
 help / color / mirror / Atom feed
* Updated MSR tracing patchkit v2
@ 2015-03-20  0:29 Andi Kleen
  2015-03-20  0:29 ` [PATCH 1/3] x86: Move msr accesses out of line Andi Kleen
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Andi Kleen @ 2015-03-20  0:29 UTC (permalink / raw
  To: x86; +Cc: linux-kernel, peterz

This patchkit adds proper trace points for all MSR accesses. Very useful
for PMU (perf) debugging, but also various other CPU near areas.

Updated version:

- Now traces RDPMC too
- Add benchmarks on the overhead of moving MSR access out of line


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 1/3] x86: Move msr accesses out of line
  2015-03-20  0:29 Updated MSR tracing patchkit v2 Andi Kleen
@ 2015-03-20  0:29 ` Andi Kleen
  2015-03-20  0:29 ` [PATCH 2/3] x86: Add trace point for MSR accesses Andi Kleen
  2015-03-20  0:29 ` [PATCH 3/3] x86, perf: Trace rdpmc too Andi Kleen
  2 siblings, 0 replies; 4+ messages in thread
From: Andi Kleen @ 2015-03-20  0:29 UTC (permalink / raw
  To: x86; +Cc: linux-kernel, peterz, Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

To add trace points to msr accesses we need to include
linux/tracepoint.h. Unfortunately this causes hellish include loops
when with the msr inlines in asm/msr.h, which are included all over.
I tried to fix several of them, but eventually gave up.

This patch moves the MSR functions out of line. A MSR access is typically
40-100 cycles or even slower, a call is a few cycles at best, so the
additional function call is not really significant.

Kernel text size is neutral:

11852945	1671656	1822720	15347321	 ea2e79	vmlinux-no-msr
11852969	1671656	1822720	15347345	 ea2e91	vmlinux-msr

As requested, some benchmarking on the difference to inline MSR (including
the trace points from the next patch):

The absolute differences are fairly low, 6-8 cycles for out of line +
trace point. 6-7% on Haswell. On Avoton the percentages are higher
because the base costs are lower, but the absolute cycle deltas are
very low too and in the same range.

I think it's reasonable to spend 6-8 cycles/call for much better
debuggability. In fact looking at the traces already exposed a number
of optimization possibilities for optimizing away unnecessary
accesses, that should give much higher gains.

haswell:
136 cycles ool wrmsr
128 cycles inline wrmsr             6%
90 cycles ool rdmsr
84 cycles inline rdmsr              7%

avoton:

68 cycles ool wrmsr
54 cycles inline wrmsr             20%
60 cycles ool rdmsr
44 cycles inline rdmsr             26%

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 arch/x86/include/asm/msr.h | 51 ++++----------------------------------------
 arch/x86/lib/msr.c         | 53 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 57 insertions(+), 47 deletions(-)

diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index de36f22..99d6864 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -57,53 +57,10 @@ static inline unsigned long long native_read_tscp(unsigned int *aux)
 #define EAX_EDX_RET(val, low, high)	"=A" (val)
 #endif
 
-static inline unsigned long long native_read_msr(unsigned int msr)
-{
-	DECLARE_ARGS(val, low, high);
-
-	asm volatile("rdmsr" : EAX_EDX_RET(val, low, high) : "c" (msr));
-	return EAX_EDX_VAL(val, low, high);
-}
-
-static inline unsigned long long native_read_msr_safe(unsigned int msr,
-						      int *err)
-{
-	DECLARE_ARGS(val, low, high);
-
-	asm volatile("2: rdmsr ; xor %[err],%[err]\n"
-		     "1:\n\t"
-		     ".section .fixup,\"ax\"\n\t"
-		     "3:  mov %[fault],%[err] ; jmp 1b\n\t"
-		     ".previous\n\t"
-		     _ASM_EXTABLE(2b, 3b)
-		     : [err] "=r" (*err), EAX_EDX_RET(val, low, high)
-		     : "c" (msr), [fault] "i" (-EIO));
-	return EAX_EDX_VAL(val, low, high);
-}
-
-static inline void native_write_msr(unsigned int msr,
-				    unsigned low, unsigned high)
-{
-	asm volatile("wrmsr" : : "c" (msr), "a"(low), "d" (high) : "memory");
-}
-
-/* Can be uninlined because referenced by paravirt */
-notrace static inline int native_write_msr_safe(unsigned int msr,
-					unsigned low, unsigned high)
-{
-	int err;
-	asm volatile("2: wrmsr ; xor %[err],%[err]\n"
-		     "1:\n\t"
-		     ".section .fixup,\"ax\"\n\t"
-		     "3:  mov %[fault],%[err] ; jmp 1b\n\t"
-		     ".previous\n\t"
-		     _ASM_EXTABLE(2b, 3b)
-		     : [err] "=a" (err)
-		     : "c" (msr), "0" (low), "d" (high),
-		       [fault] "i" (-EIO)
-		     : "memory");
-	return err;
-}
+extern unsigned long long native_read_msr(unsigned int msr);
+extern unsigned long long native_read_msr_safe(unsigned int msr, int *err);
+extern int native_write_msr_safe(unsigned int msr, unsigned low, unsigned high);
+extern void native_write_msr(unsigned int msr, unsigned low, unsigned high);
 
 extern unsigned long long native_read_tsc(void);
 
diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c
index 4362373..7eed044 100644
--- a/arch/x86/lib/msr.c
+++ b/arch/x86/lib/msr.c
@@ -108,3 +108,56 @@ int msr_clear_bit(u32 msr, u8 bit)
 {
 	return __flip_bit(msr, bit, false);
 }
+
+inline unsigned long long native_read_msr(unsigned int msr)
+{
+	DECLARE_ARGS(val, low, high);
+
+	asm volatile("rdmsr" : EAX_EDX_RET(val, low, high) : "c" (msr));
+	return EAX_EDX_VAL(val, low, high);
+}
+EXPORT_SYMBOL(native_read_msr);
+
+inline unsigned long long native_read_msr_safe(unsigned int msr,
+						      int *err)
+{
+	DECLARE_ARGS(val, low, high);
+
+	asm volatile("2: rdmsr ; xor %[err],%[err]\n"
+		     "1:\n\t"
+		     ".section .fixup,\"ax\"\n\t"
+		     "3:  mov %[fault],%[err] ; jmp 1b\n\t"
+		     ".previous\n\t"
+		     _ASM_EXTABLE(2b, 3b)
+		     : [err] "=r" (*err), EAX_EDX_RET(val, low, high)
+		     : "c" (msr), [fault] "i" (-EIO));
+	return EAX_EDX_VAL(val, low, high);
+}
+EXPORT_SYMBOL(native_read_msr_safe);
+
+inline void native_write_msr(unsigned int msr,
+				    unsigned low, unsigned high)
+{
+	asm volatile("wrmsr" : : "c" (msr), "a"(low), "d" (high) : "memory");
+}
+EXPORT_SYMBOL(native_write_msr);
+
+/* Can be uninlined because referenced by paravirt */
+notrace inline int native_write_msr_safe(unsigned int msr,
+					unsigned low, unsigned high)
+{
+	int err;
+
+	asm volatile("2: wrmsr ; xor %[err],%[err]\n"
+		     "1:\n\t"
+		     ".section .fixup,\"ax\"\n\t"
+		     "3:  mov %[fault],%[err] ; jmp 1b\n\t"
+		     ".previous\n\t"
+		     _ASM_EXTABLE(2b, 3b)
+		     : [err] "=a" (err)
+		     : "c" (msr), "0" (low), "d" (high),
+		       [fault] "i" (-EIO)
+		     : "memory");
+	return err;
+}
+EXPORT_SYMBOL(native_write_msr_safe);
-- 
1.9.3


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/3] x86: Add trace point for MSR accesses
  2015-03-20  0:29 Updated MSR tracing patchkit v2 Andi Kleen
  2015-03-20  0:29 ` [PATCH 1/3] x86: Move msr accesses out of line Andi Kleen
@ 2015-03-20  0:29 ` Andi Kleen
  2015-03-20  0:29 ` [PATCH 3/3] x86, perf: Trace rdpmc too Andi Kleen
  2 siblings, 0 replies; 4+ messages in thread
From: Andi Kleen @ 2015-03-20  0:29 UTC (permalink / raw
  To: x86; +Cc: linux-kernel, peterz, Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

For debugging low level code interacting with the CPU
it is often useful to trace the MSR read/writes. This gives
a concise summary of PMU and other operations.

perf has an ad-hoc way to do this using trace_printk,
but it's somewhat limited (and also now spews ugly
messages when enabled)

Instead define real trace points for all MSR accesses.

This adds two new trace point: read_msr and write_msr.
They also report if the access faulted (if *_safe is used)

This allows filtering and triggering on specific
MSR values, which allows various more advanced
debugging techniques.

All the values are well defined in the CPU documentation.

I only added it to native MSR accesses in C, not paravirtualized
or in entry*.S (which is not too interesting)

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 arch/x86/lib/msr.c         | 14 ++++++++++++--
 include/trace/events/msr.h | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 58 insertions(+), 2 deletions(-)
 create mode 100644 include/trace/events/msr.h

diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c
index 7eed044..29d1952 100644
--- a/arch/x86/lib/msr.c
+++ b/arch/x86/lib/msr.c
@@ -1,6 +1,8 @@
 #include <linux/module.h>
 #include <linux/preempt.h>
 #include <asm/msr.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/msr.h>
 
 struct msr *msrs_alloc(void)
 {
@@ -111,16 +113,20 @@ int msr_clear_bit(u32 msr, u8 bit)
 
 inline unsigned long long native_read_msr(unsigned int msr)
 {
+	unsigned long long lval;
 	DECLARE_ARGS(val, low, high);
 
 	asm volatile("rdmsr" : EAX_EDX_RET(val, low, high) : "c" (msr));
-	return EAX_EDX_VAL(val, low, high);
+	lval = EAX_EDX_VAL(val, low, high);
+	trace_read_msr(msr, lval, 0);
+	return lval;
 }
 EXPORT_SYMBOL(native_read_msr);
 
 inline unsigned long long native_read_msr_safe(unsigned int msr,
 						      int *err)
 {
+	unsigned long long lval;
 	DECLARE_ARGS(val, low, high);
 
 	asm volatile("2: rdmsr ; xor %[err],%[err]\n"
@@ -131,7 +137,9 @@ inline unsigned long long native_read_msr_safe(unsigned int msr,
 		     _ASM_EXTABLE(2b, 3b)
 		     : [err] "=r" (*err), EAX_EDX_RET(val, low, high)
 		     : "c" (msr), [fault] "i" (-EIO));
-	return EAX_EDX_VAL(val, low, high);
+	lval = EAX_EDX_VAL(val, low, high);
+	trace_read_msr(msr, lval, *err);
+	return lval;
 }
 EXPORT_SYMBOL(native_read_msr_safe);
 
@@ -139,6 +147,7 @@ inline void native_write_msr(unsigned int msr,
 				    unsigned low, unsigned high)
 {
 	asm volatile("wrmsr" : : "c" (msr), "a"(low), "d" (high) : "memory");
+	trace_write_msr(msr, ((u64)high << 32 | low), 0);
 }
 EXPORT_SYMBOL(native_write_msr);
 
@@ -158,6 +167,7 @@ notrace inline int native_write_msr_safe(unsigned int msr,
 		     : "c" (msr), "0" (low), "d" (high),
 		       [fault] "i" (-EIO)
 		     : "memory");
+	trace_write_msr(msr, ((u64)high << 32 | low), err);
 	return err;
 }
 EXPORT_SYMBOL(native_write_msr_safe);
diff --git a/include/trace/events/msr.h b/include/trace/events/msr.h
new file mode 100644
index 0000000..e1677e8
--- /dev/null
+++ b/include/trace/events/msr.h
@@ -0,0 +1,46 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM msr
+
+#if !defined(_TRACE_MSR_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MSR_H
+
+#include <linux/tracepoint.h>
+
+/*
+ * Tracing for x86 model specific registers. Directly maps to the
+ * RDMSR/WRMSR instructions.
+ */
+
+DECLARE_EVENT_CLASS(msr_trace_class,
+	    TP_PROTO(unsigned msr, u64 val, int failed),
+	    TP_ARGS(msr, val, failed),
+	    TP_STRUCT__entry(
+		    __field(	unsigned,	msr )
+		    __field(    u64,		val )
+		    __field(    int,		failed )
+	    ),
+	    TP_fast_assign(
+		    __entry->msr = msr;
+		    __entry->val = val;
+		    __entry->failed = failed;
+	    ),
+	    TP_printk("%x, value %llx%s",
+		      __entry->msr,
+		      __entry->val,
+		      __entry->failed ? " #GP" : "")
+);
+
+DEFINE_EVENT(msr_trace_class, read_msr,
+	     TP_PROTO(unsigned msr, u64 val, int failed),
+	     TP_ARGS(msr, val, failed)
+);
+
+DEFINE_EVENT(msr_trace_class, write_msr,
+	     TP_PROTO(unsigned msr, u64 val, int failed),
+	     TP_ARGS(msr, val, failed)
+);
+
+#endif /* _TRACE_MSR_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
-- 
1.9.3


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 3/3] x86, perf: Trace rdpmc too
  2015-03-20  0:29 Updated MSR tracing patchkit v2 Andi Kleen
  2015-03-20  0:29 ` [PATCH 1/3] x86: Move msr accesses out of line Andi Kleen
  2015-03-20  0:29 ` [PATCH 2/3] x86: Add trace point for MSR accesses Andi Kleen
@ 2015-03-20  0:29 ` Andi Kleen
  2 siblings, 0 replies; 4+ messages in thread
From: Andi Kleen @ 2015-03-20  0:29 UTC (permalink / raw
  To: x86; +Cc: linux-kernel, peterz, Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

perf uses RDPMC to read the performance counters, so it's useful to trace
that too. Add a trace point for RDPMC too, similar to the existing
MSR ones. Since there is only a single call of rdpmc in the whole kernel
(in perf)
just add the trace statement to that call, instead of moving the rdpmcl
inline out of line.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 arch/x86/kernel/cpu/perf_event.c | 3 +++
 include/trace/events/msr.h       | 6 ++++++
 2 files changed, 9 insertions(+)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index e0dab5c..9644d95 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -26,6 +26,8 @@
 #include <linux/bitops.h>
 #include <linux/device.h>
 
+#include <trace/events/msr.h>
+
 #include <asm/apic.h>
 #include <asm/stacktrace.h>
 #include <asm/nmi.h>
@@ -82,6 +84,7 @@ u64 x86_perf_event_update(struct perf_event *event)
 again:
 	prev_raw_count = local64_read(&hwc->prev_count);
 	rdpmcl(hwc->event_base_rdpmc, new_raw_count);
+	trace_rdpmc(hwc->event_base_rdpmc, new_raw_count, 0);
 
 	if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
 					new_raw_count) != prev_raw_count)
diff --git a/include/trace/events/msr.h b/include/trace/events/msr.h
index e1677e8..4fa81b5 100644
--- a/include/trace/events/msr.h
+++ b/include/trace/events/msr.h
@@ -40,6 +40,12 @@ DEFINE_EVENT(msr_trace_class, write_msr,
 	     TP_ARGS(msr, val, failed)
 );
 
+DEFINE_EVENT(msr_trace_class, rdpmc,
+	     TP_PROTO(unsigned msr, u64 val, int failed),
+	     TP_ARGS(msr, val, failed)
+);
+
+
 #endif /* _TRACE_MSR_H */
 
 /* This part must be outside protection */
-- 
1.9.3


^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2015-03-20  0:30 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-03-20  0:29 Updated MSR tracing patchkit v2 Andi Kleen
2015-03-20  0:29 ` [PATCH 1/3] x86: Move msr accesses out of line Andi Kleen
2015-03-20  0:29 ` [PATCH 2/3] x86: Add trace point for MSR accesses Andi Kleen
2015-03-20  0:29 ` [PATCH 3/3] x86, perf: Trace rdpmc too Andi Kleen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).