All the mail mirrored from lore.kernel.org
 help / color / mirror / Atom feed
From: Waiman Long <Waiman.Long@hpe.com>
To: Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@redhat.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	"H. Peter Anvin" <hpa@zytor.com>
Cc: x86@kernel.org, linux-kernel@vger.kernel.org,
	Scott J Norton <scott.norton@hp.com>,
	Douglas Hatch <doug.hatch@hp.com>,
	Davidlohr Bueso <dave@stgolabs.net>,
	Waiman Long <Waiman.Long@hpe.com>
Subject: [PATCH v6 4/6] locking/pvqspinlock: Collect slowpath lock statistics
Date: Fri, 11 Sep 2015 14:37:36 -0400	[thread overview]
Message-ID: <1441996658-62854-5-git-send-email-Waiman.Long@hpe.com> (raw)
In-Reply-To: <1441996658-62854-1-git-send-email-Waiman.Long@hpe.com>

This patch enables the accumulation of kicking and waiting related
PV qspinlock statistics when the new QUEUED_LOCK_STAT configuration
option is selected. It also enables the collection of kicking and
wakeup latencies which have a heavy dependency on the CPUs being used.

The measured latencies for different CPUs are:

	CPU		Wakeup		Kicking
	---		------		-------
	Haswell-EX	63.6us		 7.4us
	Westmere-EX	67.6us		 9.3us

The measured latencies varied a bit from run-to-run. The wakeup
latency is much higher than the kicking latency.

A sample of statistics counts after system bootup (with vCPU
overcommit) was:

hash_hops_count=9001
kick_latencies=138047878
kick_unlock_count=9001
kick_wait_count=9000
spurious_wakeup=3
wait_again_count=2
wait_head_count=10
wait_node_count=8994
wake_latencies=713195944

Signed-off-by: Waiman Long <Waiman.Long@hpe.com>
---
 arch/x86/Kconfig                    |    7 ++
 kernel/locking/qspinlock_paravirt.h |  171 ++++++++++++++++++++++++++++++++++-
 2 files changed, 173 insertions(+), 5 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f37010f..d08828f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -719,6 +719,13 @@ config PARAVIRT_SPINLOCKS
 
 	  If you are unsure how to answer this question, answer Y.
 
+config QUEUED_LOCK_STAT
+	bool "Paravirt queued lock statistics"
+	depends on PARAVIRT && DEBUG_FS && QUEUED_SPINLOCKS
+	---help---
+	  Enable the collection of statistical data on the behavior of
+	  paravirtualized queued spinlocks and report them on debugfs.
+
 source "arch/x86/xen/Kconfig"
 
 config KVM_GUEST
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 4bd323d..2d71768 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -41,6 +41,147 @@ struct pv_node {
 };
 
 /*
+ * PV qspinlock statistics
+ */
+enum pv_qlock_stat {
+	pvstat_wait_head,
+	pvstat_wait_node,
+	pvstat_wait_again,
+	pvstat_kick_wait,
+	pvstat_kick_unlock,
+	pvstat_spurious,
+	pvstat_hops,
+	pvstat_num	/* Total number of statistics counts */
+};
+
+#ifdef CONFIG_QUEUED_LOCK_STAT
+/*
+ * Collect pvqspinlock statiatics
+ */
+#include <linux/debugfs.h>
+#include <linux/sched.h>
+
+static const char * const stat_fsnames[pvstat_num] = {
+	[pvstat_wait_head]   = "wait_head_count",
+	[pvstat_wait_node]   = "wait_node_count",
+	[pvstat_wait_again]  = "wait_again_count",
+	[pvstat_kick_wait]   = "kick_wait_count",
+	[pvstat_kick_unlock] = "kick_unlock_count",
+	[pvstat_spurious]    = "spurious_wakeup",
+	[pvstat_hops]	     = "hash_hops_count",
+};
+
+static atomic_t pvstats[pvstat_num];
+
+/*
+ * pv_kick_latencies = sum of all pv_kick latencies in ns
+ * pv_wake_latencies = sum of all wakeup latencies in ns
+ *
+ * Avg kick latency   = pv_kick_latencies/kick_unlock_count
+ * Avg wake latency   = pv_wake_latencies/kick_wait_count
+ * Avg # of hops/hash = hash_hops_count/kick_unlock_count
+ */
+static atomic64_t pv_kick_latencies, pv_wake_latencies;
+static DEFINE_PER_CPU(u64, pv_kick_time);
+
+/*
+ * Reset all the statistics counts if set
+ */
+static bool reset_cnts __read_mostly;
+
+/*
+ * Initialize debugfs for the PV qspinlock statistics
+ */
+static int __init pv_qspinlock_debugfs(void)
+{
+	struct dentry *d_pvqlock = debugfs_create_dir("pv-qspinlock", NULL);
+	int i;
+
+	if (!d_pvqlock)
+		pr_warn("Could not create 'pv-qspinlock' debugfs directory\n");
+
+	for (i = 0; i < pvstat_num; i++)
+		debugfs_create_u32(stat_fsnames[i], 0444, d_pvqlock,
+				  (u32 *)&pvstats[i]);
+	debugfs_create_u64("kick_latencies", 0444, d_pvqlock,
+			   (u64 *)&pv_kick_latencies);
+	debugfs_create_u64("wake_latencies", 0444, d_pvqlock,
+			   (u64 *)&pv_wake_latencies);
+	debugfs_create_bool("reset_cnts", 0644, d_pvqlock, (u32 *)&reset_cnts);
+	return 0;
+}
+fs_initcall(pv_qspinlock_debugfs);
+
+/*
+ * Reset all the counts
+ */
+static noinline void pvstat_reset(void)
+{
+	int i;
+
+	for (i = 0; i < pvstat_num; i++)
+		atomic_set(&pvstats[i], 0);
+	atomic64_set(&pv_kick_latencies, 0);
+	atomic64_set(&pv_wake_latencies, 0);
+	reset_cnts = 0;
+}
+
+/*
+ * Increment the PV qspinlock statistics counts
+ */
+static inline void pvstat_inc(enum pv_qlock_stat stat)
+{
+	atomic_inc(&pvstats[stat]);
+	if (unlikely(reset_cnts))
+		pvstat_reset();
+}
+
+/*
+ * PV hash hop count
+ */
+static inline void pvstat_hop(int hopcnt)
+{
+	atomic_add(hopcnt, &pvstats[pvstat_hops]);
+}
+
+/*
+ * Replacement function for pv_kick()
+ */
+static inline void __pv_kick(int cpu)
+{
+	u64 start = sched_clock();
+
+	*per_cpu_ptr(&pv_kick_time, cpu) = start;
+	pv_kick(cpu);
+	atomic64_add(sched_clock() - start, &pv_kick_latencies);
+}
+
+/*
+ * Replacement function for pv_wait()
+ */
+static inline void __pv_wait(u8 *ptr, u8 val)
+{
+	u64 *pkick_time = this_cpu_ptr(&pv_kick_time);
+
+	*pkick_time = 0;
+	pv_wait(ptr, val);
+	if (*pkick_time) {
+		atomic64_add(sched_clock() - *pkick_time, &pv_wake_latencies);
+		pvstat_inc(pvstat_kick_wait);
+	}
+}
+
+#define pv_kick(c)	__pv_kick(c)
+#define pv_wait(p, v)	__pv_wait(p, v)
+
+#else /* CONFIG_QUEUED_LOCK_STAT */
+
+static inline void pvstat_inc(enum pv_qlock_stat stat)	{ }
+static inline void pvstat_hop(int hopcnt)		{ }
+
+#endif /* CONFIG_QUEUED_LOCK_STAT */
+
+/*
  * Lock and MCS node addresses hash table for fast lookup
  *
  * Hashing is done on a per-cacheline basis to minimize the need to access
@@ -100,10 +241,13 @@ static struct qspinlock **pv_hash(struct qspinlock *lock, struct pv_node *node)
 {
 	unsigned long offset, hash = hash_ptr(lock, pv_lock_hash_bits);
 	struct pv_hash_entry *he;
+	int hopcnt = 0;
 
 	for_each_hash_entry(he, offset, hash) {
+		hopcnt++;
 		if (!cmpxchg(&he->lock, NULL, lock)) {
 			WRITE_ONCE(he->node, node);
+			pvstat_hop(hopcnt);
 			return &he->lock;
 		}
 	}
@@ -164,9 +308,10 @@ static void pv_init_node(struct mcs_spinlock *node)
 static void pv_wait_node(struct mcs_spinlock *node)
 {
 	struct pv_node *pn = (struct pv_node *)node;
+	int waitcnt = 0;
 	int loop;
 
-	for (;;) {
+	for (;; waitcnt++) {
 		for (loop = SPIN_THRESHOLD; loop; loop--) {
 			if (READ_ONCE(node->locked))
 				return;
@@ -184,15 +329,22 @@ static void pv_wait_node(struct mcs_spinlock *node)
 		 */
 		smp_store_mb(pn->state, vcpu_halted);
 
-		if (!READ_ONCE(node->locked))
+		if (!READ_ONCE(node->locked)) {
+			pvstat_inc(pvstat_wait_node);
+			if (waitcnt)
+				pvstat_inc(pvstat_wait_again);
 			pv_wait(&pn->state, vcpu_halted);
+		}
 
 		/*
-		 * If pv_kick_node() changed us to vcpu_hashed, retain that value
-		 * so that pv_wait_head() knows to not also try to hash this lock.
+		 * If pv_kick_node() changed us to vcpu_hashed, retain that
+		 * value so that pv_wait_head() knows to not also try to hash
+		 * this lock.
 		 */
 		cmpxchg(&pn->state, vcpu_halted, vcpu_running);
 
+		if (READ_ONCE(node->locked))
+			break;
 		/*
 		 * If the locked flag is still not set after wakeup, it is a
 		 * spurious wakeup and the vCPU should wait again. However,
@@ -200,6 +352,7 @@ static void pv_wait_node(struct mcs_spinlock *node)
 		 * So it is better to spin for a while in the hope that the
 		 * MCS lock will be released soon.
 		 */
+		pvstat_inc(pvstat_spurious);
 	}
 
 	/*
@@ -250,6 +403,7 @@ static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node)
 	struct pv_node *pn = (struct pv_node *)node;
 	struct __qspinlock *l = (void *)lock;
 	struct qspinlock **lp = NULL;
+	int waitcnt = 0;
 	int loop;
 
 	/*
@@ -259,7 +413,7 @@ static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node)
 	if (READ_ONCE(pn->state) == vcpu_hashed)
 		lp = (struct qspinlock **)1;
 
-	for (;;) {
+	for (;; waitcnt++) {
 		for (loop = SPIN_THRESHOLD; loop; loop--) {
 			if (!READ_ONCE(l->locked))
 				return;
@@ -290,14 +444,20 @@ static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node)
 				return;
 			}
 		}
+		pvstat_inc(pvstat_wait_head);
+		if (waitcnt)
+			pvstat_inc(pvstat_wait_again);
 		pv_wait(&l->locked, _Q_SLOW_VAL);
 
+		if (!READ_ONCE(l->locked))
+			return;
 		/*
 		 * The unlocker should have freed the lock before kicking the
 		 * CPU. So if the lock is still not free, it is a spurious
 		 * wakeup and so the vCPU should wait again after spinning for
 		 * a while.
 		 */
+		pvstat_inc(pvstat_spurious);
 	}
 
 	/*
@@ -352,6 +512,7 @@ __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked)
 	 * vCPU is harmless other than the additional latency in completing
 	 * the unlock.
 	 */
+	pvstat_inc(pvstat_kick_unlock);
 	pv_kick(node->cpu);
 }
 
-- 
1.7.1


  parent reply	other threads:[~2015-09-11 18:38 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-09-11 18:37 [PATCH v6 0/6] locking/qspinlock: Enhance pvqspinlock performance Waiman Long
2015-09-11 18:37 ` [PATCH v6 1/6] locking/qspinlock: relaxes cmpxchg & xchg ops in native code Waiman Long
2015-09-11 22:27   ` Davidlohr Bueso
2015-09-14 12:06     ` Peter Zijlstra
2015-09-14 18:40       ` Waiman Long
2015-09-14 15:16     ` Waiman Long
2015-09-11 18:37 ` [PATCH v6 2/6] locking/pvqspinlock: Unconditional PV kick with _Q_SLOW_VAL Waiman Long
2015-09-18  8:50   ` [tip:locking/core] locking/pvqspinlock: Kick the PV CPU unconditionally when _Q_SLOW_VAL tip-bot for Waiman Long
2015-09-11 18:37 ` [PATCH v6 3/6] locking/pvqspinlock, x86: Optimize PV unlock code path Waiman Long
2015-09-11 18:37 ` Waiman Long [this message]
2015-09-11 23:13   ` [PATCH v6 4/6] locking/pvqspinlock: Collect slowpath lock statistics Davidlohr Bueso
2015-09-14 15:25     ` Waiman Long
2015-09-14 21:41       ` Davidlohr Bueso
2015-09-15  3:47         ` Waiman Long
2015-09-11 18:37 ` [PATCH v6 5/6] locking/pvqspinlock: Allow 1 lock stealing attempt Waiman Long
2015-09-14 13:57   ` Peter Zijlstra
2015-09-14 19:02     ` Waiman Long
2015-09-14 14:00   ` Peter Zijlstra
2015-09-14 19:15     ` Waiman Long
2015-09-14 19:38       ` Waiman Long
2015-09-15  8:24       ` Peter Zijlstra
2015-09-15 15:29         ` Waiman Long
2015-09-16 15:01           ` Peter Zijlstra
2015-09-17 15:08             ` Waiman Long
2015-09-14 14:04   ` Peter Zijlstra
2015-09-14 19:19     ` Waiman Long
2015-09-11 18:37 ` [PATCH v6 6/6] locking/pvqspinlock: Queue node adaptive spinning Waiman Long
2015-09-14 14:10   ` Peter Zijlstra
2015-09-14 19:37     ` Waiman Long
2015-09-15  8:38       ` Peter Zijlstra
2015-09-15 15:32         ` Waiman Long
2015-09-16 15:03           ` Peter Zijlstra

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1441996658-62854-5-git-send-email-Waiman.Long@hpe.com \
    --to=waiman.long@hpe.com \
    --cc=dave@stgolabs.net \
    --cc=doug.hatch@hp.com \
    --cc=hpa@zytor.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=scott.norton@hp.com \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.