* Updated MSR tracing patchkit v2
@ 2015-03-20 0:29 Andi Kleen
2015-03-20 0:29 ` [PATCH 1/3] x86: Move msr accesses out of line Andi Kleen
` (2 more replies)
0 siblings, 3 replies; 4+ messages in thread
From: Andi Kleen @ 2015-03-20 0:29 UTC (permalink / raw
To: x86; +Cc: linux-kernel, peterz
This patchkit adds proper trace points for all MSR accesses. Very useful
for PMU (perf) debugging, but also various other CPU near areas.
Updated version:
- Now traces RDPMC too
- Add benchmarks on the overhead of moving MSR access out of line
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH 1/3] x86: Move msr accesses out of line
2015-03-20 0:29 Updated MSR tracing patchkit v2 Andi Kleen
@ 2015-03-20 0:29 ` Andi Kleen
2015-03-20 0:29 ` [PATCH 2/3] x86: Add trace point for MSR accesses Andi Kleen
2015-03-20 0:29 ` [PATCH 3/3] x86, perf: Trace rdpmc too Andi Kleen
2 siblings, 0 replies; 4+ messages in thread
From: Andi Kleen @ 2015-03-20 0:29 UTC (permalink / raw
To: x86; +Cc: linux-kernel, peterz, Andi Kleen
From: Andi Kleen <ak@linux.intel.com>
To add trace points to msr accesses we need to include
linux/tracepoint.h. Unfortunately this causes hellish include loops
when with the msr inlines in asm/msr.h, which are included all over.
I tried to fix several of them, but eventually gave up.
This patch moves the MSR functions out of line. A MSR access is typically
40-100 cycles or even slower, a call is a few cycles at best, so the
additional function call is not really significant.
Kernel text size is neutral:
11852945 1671656 1822720 15347321 ea2e79 vmlinux-no-msr
11852969 1671656 1822720 15347345 ea2e91 vmlinux-msr
As requested, some benchmarking on the difference to inline MSR (including
the trace points from the next patch):
The absolute differences are fairly low, 6-8 cycles for out of line +
trace point. 6-7% on Haswell. On Avoton the percentages are higher
because the base costs are lower, but the absolute cycle deltas are
very low too and in the same range.
I think it's reasonable to spend 6-8 cycles/call for much better
debuggability. In fact looking at the traces already exposed a number
of optimization possibilities for optimizing away unnecessary
accesses, that should give much higher gains.
haswell:
136 cycles ool wrmsr
128 cycles inline wrmsr 6%
90 cycles ool rdmsr
84 cycles inline rdmsr 7%
avoton:
68 cycles ool wrmsr
54 cycles inline wrmsr 20%
60 cycles ool rdmsr
44 cycles inline rdmsr 26%
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
arch/x86/include/asm/msr.h | 51 ++++----------------------------------------
arch/x86/lib/msr.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 57 insertions(+), 47 deletions(-)
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h
index de36f22..99d6864 100644
--- a/arch/x86/include/asm/msr.h
+++ b/arch/x86/include/asm/msr.h
@@ -57,53 +57,10 @@ static inline unsigned long long native_read_tscp(unsigned int *aux)
#define EAX_EDX_RET(val, low, high) "=A" (val)
#endif
-static inline unsigned long long native_read_msr(unsigned int msr)
-{
- DECLARE_ARGS(val, low, high);
-
- asm volatile("rdmsr" : EAX_EDX_RET(val, low, high) : "c" (msr));
- return EAX_EDX_VAL(val, low, high);
-}
-
-static inline unsigned long long native_read_msr_safe(unsigned int msr,
- int *err)
-{
- DECLARE_ARGS(val, low, high);
-
- asm volatile("2: rdmsr ; xor %[err],%[err]\n"
- "1:\n\t"
- ".section .fixup,\"ax\"\n\t"
- "3: mov %[fault],%[err] ; jmp 1b\n\t"
- ".previous\n\t"
- _ASM_EXTABLE(2b, 3b)
- : [err] "=r" (*err), EAX_EDX_RET(val, low, high)
- : "c" (msr), [fault] "i" (-EIO));
- return EAX_EDX_VAL(val, low, high);
-}
-
-static inline void native_write_msr(unsigned int msr,
- unsigned low, unsigned high)
-{
- asm volatile("wrmsr" : : "c" (msr), "a"(low), "d" (high) : "memory");
-}
-
-/* Can be uninlined because referenced by paravirt */
-notrace static inline int native_write_msr_safe(unsigned int msr,
- unsigned low, unsigned high)
-{
- int err;
- asm volatile("2: wrmsr ; xor %[err],%[err]\n"
- "1:\n\t"
- ".section .fixup,\"ax\"\n\t"
- "3: mov %[fault],%[err] ; jmp 1b\n\t"
- ".previous\n\t"
- _ASM_EXTABLE(2b, 3b)
- : [err] "=a" (err)
- : "c" (msr), "0" (low), "d" (high),
- [fault] "i" (-EIO)
- : "memory");
- return err;
-}
+extern unsigned long long native_read_msr(unsigned int msr);
+extern unsigned long long native_read_msr_safe(unsigned int msr, int *err);
+extern int native_write_msr_safe(unsigned int msr, unsigned low, unsigned high);
+extern void native_write_msr(unsigned int msr, unsigned low, unsigned high);
extern unsigned long long native_read_tsc(void);
diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c
index 4362373..7eed044 100644
--- a/arch/x86/lib/msr.c
+++ b/arch/x86/lib/msr.c
@@ -108,3 +108,56 @@ int msr_clear_bit(u32 msr, u8 bit)
{
return __flip_bit(msr, bit, false);
}
+
+inline unsigned long long native_read_msr(unsigned int msr)
+{
+ DECLARE_ARGS(val, low, high);
+
+ asm volatile("rdmsr" : EAX_EDX_RET(val, low, high) : "c" (msr));
+ return EAX_EDX_VAL(val, low, high);
+}
+EXPORT_SYMBOL(native_read_msr);
+
+inline unsigned long long native_read_msr_safe(unsigned int msr,
+ int *err)
+{
+ DECLARE_ARGS(val, low, high);
+
+ asm volatile("2: rdmsr ; xor %[err],%[err]\n"
+ "1:\n\t"
+ ".section .fixup,\"ax\"\n\t"
+ "3: mov %[fault],%[err] ; jmp 1b\n\t"
+ ".previous\n\t"
+ _ASM_EXTABLE(2b, 3b)
+ : [err] "=r" (*err), EAX_EDX_RET(val, low, high)
+ : "c" (msr), [fault] "i" (-EIO));
+ return EAX_EDX_VAL(val, low, high);
+}
+EXPORT_SYMBOL(native_read_msr_safe);
+
+inline void native_write_msr(unsigned int msr,
+ unsigned low, unsigned high)
+{
+ asm volatile("wrmsr" : : "c" (msr), "a"(low), "d" (high) : "memory");
+}
+EXPORT_SYMBOL(native_write_msr);
+
+/* Can be uninlined because referenced by paravirt */
+notrace inline int native_write_msr_safe(unsigned int msr,
+ unsigned low, unsigned high)
+{
+ int err;
+
+ asm volatile("2: wrmsr ; xor %[err],%[err]\n"
+ "1:\n\t"
+ ".section .fixup,\"ax\"\n\t"
+ "3: mov %[fault],%[err] ; jmp 1b\n\t"
+ ".previous\n\t"
+ _ASM_EXTABLE(2b, 3b)
+ : [err] "=a" (err)
+ : "c" (msr), "0" (low), "d" (high),
+ [fault] "i" (-EIO)
+ : "memory");
+ return err;
+}
+EXPORT_SYMBOL(native_write_msr_safe);
--
1.9.3
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 2/3] x86: Add trace point for MSR accesses
2015-03-20 0:29 Updated MSR tracing patchkit v2 Andi Kleen
2015-03-20 0:29 ` [PATCH 1/3] x86: Move msr accesses out of line Andi Kleen
@ 2015-03-20 0:29 ` Andi Kleen
2015-03-20 0:29 ` [PATCH 3/3] x86, perf: Trace rdpmc too Andi Kleen
2 siblings, 0 replies; 4+ messages in thread
From: Andi Kleen @ 2015-03-20 0:29 UTC (permalink / raw
To: x86; +Cc: linux-kernel, peterz, Andi Kleen
From: Andi Kleen <ak@linux.intel.com>
For debugging low level code interacting with the CPU
it is often useful to trace the MSR read/writes. This gives
a concise summary of PMU and other operations.
perf has an ad-hoc way to do this using trace_printk,
but it's somewhat limited (and also now spews ugly
messages when enabled)
Instead define real trace points for all MSR accesses.
This adds two new trace point: read_msr and write_msr.
They also report if the access faulted (if *_safe is used)
This allows filtering and triggering on specific
MSR values, which allows various more advanced
debugging techniques.
All the values are well defined in the CPU documentation.
I only added it to native MSR accesses in C, not paravirtualized
or in entry*.S (which is not too interesting)
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
arch/x86/lib/msr.c | 14 ++++++++++++--
include/trace/events/msr.h | 46 ++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 58 insertions(+), 2 deletions(-)
create mode 100644 include/trace/events/msr.h
diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c
index 7eed044..29d1952 100644
--- a/arch/x86/lib/msr.c
+++ b/arch/x86/lib/msr.c
@@ -1,6 +1,8 @@
#include <linux/module.h>
#include <linux/preempt.h>
#include <asm/msr.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/msr.h>
struct msr *msrs_alloc(void)
{
@@ -111,16 +113,20 @@ int msr_clear_bit(u32 msr, u8 bit)
inline unsigned long long native_read_msr(unsigned int msr)
{
+ unsigned long long lval;
DECLARE_ARGS(val, low, high);
asm volatile("rdmsr" : EAX_EDX_RET(val, low, high) : "c" (msr));
- return EAX_EDX_VAL(val, low, high);
+ lval = EAX_EDX_VAL(val, low, high);
+ trace_read_msr(msr, lval, 0);
+ return lval;
}
EXPORT_SYMBOL(native_read_msr);
inline unsigned long long native_read_msr_safe(unsigned int msr,
int *err)
{
+ unsigned long long lval;
DECLARE_ARGS(val, low, high);
asm volatile("2: rdmsr ; xor %[err],%[err]\n"
@@ -131,7 +137,9 @@ inline unsigned long long native_read_msr_safe(unsigned int msr,
_ASM_EXTABLE(2b, 3b)
: [err] "=r" (*err), EAX_EDX_RET(val, low, high)
: "c" (msr), [fault] "i" (-EIO));
- return EAX_EDX_VAL(val, low, high);
+ lval = EAX_EDX_VAL(val, low, high);
+ trace_read_msr(msr, lval, *err);
+ return lval;
}
EXPORT_SYMBOL(native_read_msr_safe);
@@ -139,6 +147,7 @@ inline void native_write_msr(unsigned int msr,
unsigned low, unsigned high)
{
asm volatile("wrmsr" : : "c" (msr), "a"(low), "d" (high) : "memory");
+ trace_write_msr(msr, ((u64)high << 32 | low), 0);
}
EXPORT_SYMBOL(native_write_msr);
@@ -158,6 +167,7 @@ notrace inline int native_write_msr_safe(unsigned int msr,
: "c" (msr), "0" (low), "d" (high),
[fault] "i" (-EIO)
: "memory");
+ trace_write_msr(msr, ((u64)high << 32 | low), err);
return err;
}
EXPORT_SYMBOL(native_write_msr_safe);
diff --git a/include/trace/events/msr.h b/include/trace/events/msr.h
new file mode 100644
index 0000000..e1677e8
--- /dev/null
+++ b/include/trace/events/msr.h
@@ -0,0 +1,46 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM msr
+
+#if !defined(_TRACE_MSR_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_MSR_H
+
+#include <linux/tracepoint.h>
+
+/*
+ * Tracing for x86 model specific registers. Directly maps to the
+ * RDMSR/WRMSR instructions.
+ */
+
+DECLARE_EVENT_CLASS(msr_trace_class,
+ TP_PROTO(unsigned msr, u64 val, int failed),
+ TP_ARGS(msr, val, failed),
+ TP_STRUCT__entry(
+ __field( unsigned, msr )
+ __field( u64, val )
+ __field( int, failed )
+ ),
+ TP_fast_assign(
+ __entry->msr = msr;
+ __entry->val = val;
+ __entry->failed = failed;
+ ),
+ TP_printk("%x, value %llx%s",
+ __entry->msr,
+ __entry->val,
+ __entry->failed ? " #GP" : "")
+);
+
+DEFINE_EVENT(msr_trace_class, read_msr,
+ TP_PROTO(unsigned msr, u64 val, int failed),
+ TP_ARGS(msr, val, failed)
+);
+
+DEFINE_EVENT(msr_trace_class, write_msr,
+ TP_PROTO(unsigned msr, u64 val, int failed),
+ TP_ARGS(msr, val, failed)
+);
+
+#endif /* _TRACE_MSR_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
--
1.9.3
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 3/3] x86, perf: Trace rdpmc too
2015-03-20 0:29 Updated MSR tracing patchkit v2 Andi Kleen
2015-03-20 0:29 ` [PATCH 1/3] x86: Move msr accesses out of line Andi Kleen
2015-03-20 0:29 ` [PATCH 2/3] x86: Add trace point for MSR accesses Andi Kleen
@ 2015-03-20 0:29 ` Andi Kleen
2 siblings, 0 replies; 4+ messages in thread
From: Andi Kleen @ 2015-03-20 0:29 UTC (permalink / raw
To: x86; +Cc: linux-kernel, peterz, Andi Kleen
From: Andi Kleen <ak@linux.intel.com>
perf uses RDPMC to read the performance counters, so it's useful to trace
that too. Add a trace point for RDPMC too, similar to the existing
MSR ones. Since there is only a single call of rdpmc in the whole kernel
(in perf)
just add the trace statement to that call, instead of moving the rdpmcl
inline out of line.
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
arch/x86/kernel/cpu/perf_event.c | 3 +++
include/trace/events/msr.h | 6 ++++++
2 files changed, 9 insertions(+)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index e0dab5c..9644d95 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -26,6 +26,8 @@
#include <linux/bitops.h>
#include <linux/device.h>
+#include <trace/events/msr.h>
+
#include <asm/apic.h>
#include <asm/stacktrace.h>
#include <asm/nmi.h>
@@ -82,6 +84,7 @@ u64 x86_perf_event_update(struct perf_event *event)
again:
prev_raw_count = local64_read(&hwc->prev_count);
rdpmcl(hwc->event_base_rdpmc, new_raw_count);
+ trace_rdpmc(hwc->event_base_rdpmc, new_raw_count, 0);
if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
new_raw_count) != prev_raw_count)
diff --git a/include/trace/events/msr.h b/include/trace/events/msr.h
index e1677e8..4fa81b5 100644
--- a/include/trace/events/msr.h
+++ b/include/trace/events/msr.h
@@ -40,6 +40,12 @@ DEFINE_EVENT(msr_trace_class, write_msr,
TP_ARGS(msr, val, failed)
);
+DEFINE_EVENT(msr_trace_class, rdpmc,
+ TP_PROTO(unsigned msr, u64 val, int failed),
+ TP_ARGS(msr, val, failed)
+);
+
+
#endif /* _TRACE_MSR_H */
/* This part must be outside protection */
--
1.9.3
^ permalink raw reply related [flat|nested] 4+ messages in thread
end of thread, other threads:[~2015-03-20 0:30 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-03-20 0:29 Updated MSR tracing patchkit v2 Andi Kleen
2015-03-20 0:29 ` [PATCH 1/3] x86: Move msr accesses out of line Andi Kleen
2015-03-20 0:29 ` [PATCH 2/3] x86: Add trace point for MSR accesses Andi Kleen
2015-03-20 0:29 ` [PATCH 3/3] x86, perf: Trace rdpmc too Andi Kleen
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).