All the mail mirrored from lore.kernel.org
 help / color / mirror / Atom feed
From: "tip-bot2 for Thomas Gleixner" <tip-bot2@linutronix.de>
To: linux-tip-commits@vger.kernel.org
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Jason Wang <jasowang@redhat.com>,
	Al Viro <viro@zeniv.linux.org.uk>,
	x86@kernel.org, linux-kernel@vger.kernel.org
Subject: [tip: sched/core] eventfd: Make signal recursion protection a task bit
Date: Fri, 27 Aug 2021 23:41:12 -0000	[thread overview]
Message-ID: <163010767256.25758.8600942642007356589.tip-bot2@tip-bot2> (raw)
In-Reply-To: <87wnp9idso.ffs@tglx>

The following commit has been merged into the sched/core branch of tip:

Commit-ID:     b542e383d8c005f06a131e2b40d5889b812f19c6
Gitweb:        https://git.kernel.org/tip/b542e383d8c005f06a131e2b40d5889b812f19c6
Author:        Thomas Gleixner <tglx@linutronix.de>
AuthorDate:    Thu, 29 Jul 2021 13:01:59 +02:00
Committer:     Thomas Gleixner <tglx@linutronix.de>
CommitterDate: Sat, 28 Aug 2021 01:33:02 +02:00

eventfd: Make signal recursion protection a task bit

The recursion protection for eventfd_signal() is based on a per CPU
variable and relies on the !RT semantics of spin_lock_irqsave() for
protecting this per CPU variable. On RT kernels spin_lock_irqsave() neither
disables preemption nor interrupts which allows the spin lock held section
to be preempted. If the preempting task invokes eventfd_signal() as well,
then the recursion warning triggers.

Paolo suggested to protect the per CPU variable with a local lock, but
that's heavyweight and actually not necessary. The goal of this protection
is to prevent the task stack from overflowing, which can be achieved with a
per task recursion protection as well.

Replace the per CPU variable with a per task bit similar to other recursion
protection bits like task_struct::in_page_owner. This works on both !RT and
RT kernels and removes as a side effect the extra per CPU storage.

No functional change for !RT kernels.

Reported-by: Daniel Bristot de Oliveira <bristot@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Daniel Bristot de Oliveira <bristot@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Link: https://lore.kernel.org/r/87wnp9idso.ffs@tglx

---
 fs/aio.c                |  2 +-
 fs/eventfd.c            | 12 +++++-------
 include/linux/eventfd.h | 11 +++++------
 include/linux/sched.h   |  4 ++++
 4 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index 76ce0cc..51b08ab 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1695,7 +1695,7 @@ static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
 		list_del(&iocb->ki_list);
 		iocb->ki_res.res = mangle_poll(mask);
 		req->done = true;
-		if (iocb->ki_eventfd && eventfd_signal_count()) {
+		if (iocb->ki_eventfd && eventfd_signal_allowed()) {
 			iocb = NULL;
 			INIT_WORK(&req->work, aio_poll_put_work);
 			schedule_work(&req->work);
diff --git a/fs/eventfd.c b/fs/eventfd.c
index e265b6d..3627dd7 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -25,8 +25,6 @@
 #include <linux/idr.h>
 #include <linux/uio.h>
 
-DEFINE_PER_CPU(int, eventfd_wake_count);
-
 static DEFINE_IDA(eventfd_ida);
 
 struct eventfd_ctx {
@@ -67,21 +65,21 @@ __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n)
 	 * Deadlock or stack overflow issues can happen if we recurse here
 	 * through waitqueue wakeup handlers. If the caller users potentially
 	 * nested waitqueues with custom wakeup handlers, then it should
-	 * check eventfd_signal_count() before calling this function. If
-	 * it returns true, the eventfd_signal() call should be deferred to a
+	 * check eventfd_signal_allowed() before calling this function. If
+	 * it returns false, the eventfd_signal() call should be deferred to a
 	 * safe context.
 	 */
-	if (WARN_ON_ONCE(this_cpu_read(eventfd_wake_count)))
+	if (WARN_ON_ONCE(current->in_eventfd_signal))
 		return 0;
 
 	spin_lock_irqsave(&ctx->wqh.lock, flags);
-	this_cpu_inc(eventfd_wake_count);
+	current->in_eventfd_signal = 1;
 	if (ULLONG_MAX - ctx->count < n)
 		n = ULLONG_MAX - ctx->count;
 	ctx->count += n;
 	if (waitqueue_active(&ctx->wqh))
 		wake_up_locked_poll(&ctx->wqh, EPOLLIN);
-	this_cpu_dec(eventfd_wake_count);
+	current->in_eventfd_signal = 0;
 	spin_unlock_irqrestore(&ctx->wqh.lock, flags);
 
 	return n;
diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h
index fa0a524..305d5f1 100644
--- a/include/linux/eventfd.h
+++ b/include/linux/eventfd.h
@@ -14,6 +14,7 @@
 #include <linux/err.h>
 #include <linux/percpu-defs.h>
 #include <linux/percpu.h>
+#include <linux/sched.h>
 
 /*
  * CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining
@@ -43,11 +44,9 @@ int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *w
 				  __u64 *cnt);
 void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt);
 
-DECLARE_PER_CPU(int, eventfd_wake_count);
-
-static inline bool eventfd_signal_count(void)
+static inline bool eventfd_signal_allowed(void)
 {
-	return this_cpu_read(eventfd_wake_count);
+	return !current->in_eventfd_signal;
 }
 
 #else /* CONFIG_EVENTFD */
@@ -78,9 +77,9 @@ static inline int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx,
 	return -ENOSYS;
 }
 
-static inline bool eventfd_signal_count(void)
+static inline bool eventfd_signal_allowed(void)
 {
-	return false;
+	return true;
 }
 
 static inline void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3bb9fec..6421a9a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -864,6 +864,10 @@ struct task_struct {
 	/* Used by page_owner=on to detect recursion in page tracking. */
 	unsigned			in_page_owner:1;
 #endif
+#ifdef CONFIG_EVENTFD
+	/* Recursion prevention for eventfd_signal() */
+	unsigned			in_eventfd_signal:1;
+#endif
 
 	unsigned long			atomic_flags; /* Flags requiring atomic access. */
 

      parent reply	other threads:[~2021-08-27 23:41 UTC|newest]

Thread overview: 45+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-07-14  8:01 5.13-rt1 + KVM = WARNING: at fs/eventfd.c:74 eventfd_signal() Daniel Bristot de Oliveira
2021-07-14  8:10 ` Paolo Bonzini
2021-07-14  9:23   ` Jason Wang
2021-07-14 10:35     ` Paolo Bonzini
2021-07-14 10:41       ` Michael S. Tsirkin
2021-07-14 10:44         ` Paolo Bonzini
2021-07-14 12:20       ` Daniel Bristot de Oliveira
2021-07-15  4:14       ` Jason Wang
2021-07-15  5:58         ` Paolo Bonzini
2021-07-15  6:45           ` Jason Wang
2021-07-15  8:22       ` Daniel Bristot de Oliveira
2021-07-15  8:44         ` He Zhe
2021-07-15  9:51           ` Paolo Bonzini
2021-07-15 10:10             ` He Zhe
2021-07-15 11:05               ` Paolo Bonzini
2021-07-16  2:26                 ` Jason Wang
2021-07-16  2:43                   ` He Zhe
2021-07-16  2:46                     ` Jason Wang
2021-07-15  9:46         ` Paolo Bonzini
2021-07-15 12:34           ` Daniel Bristot de Oliveira
     [not found]       ` <20210715102249.2205-1-hdanton@sina.com>
2021-07-15 12:31         ` Daniel Bristot de Oliveira
     [not found]         ` <20210716020611.2288-1-hdanton@sina.com>
2021-07-16  6:54           ` Paolo Bonzini
     [not found]           ` <20210716075539.2376-1-hdanton@sina.com>
2021-07-16  7:59             ` Paolo Bonzini
     [not found]             ` <20210716093725.2438-1-hdanton@sina.com>
2021-07-16 11:55               ` Paolo Bonzini
2021-07-18 12:42                 ` Hillf Danton
2021-07-19 15:38                   ` Paolo Bonzini
2021-07-21  7:04                     ` Hillf Danton
2021-07-21  7:25                       ` Thomas Gleixner
2021-07-21 10:11                         ` Hillf Danton
2021-07-21 10:59                           ` Paolo Bonzini
2021-07-22  5:58                             ` Hillf Danton
2021-07-23  2:23                             ` Hillf Danton
2021-07-23  7:59                               ` Paolo Bonzini
2021-07-23  9:48                                 ` Hillf Danton
2021-07-23 10:56                                   ` Paolo Bonzini
2021-07-24  4:33                                     ` Hillf Danton
2021-07-26 11:03                                       ` Paolo Bonzini
2021-07-28  8:06       ` Thomas Gleixner
2021-07-28 10:21         ` Paolo Bonzini
2021-07-28 19:07           ` Thomas Gleixner
2021-07-29 11:01             ` [PATCH] eventfd: Make signal recursion protection a task bit Thomas Gleixner
2021-07-29 14:32               ` Daniel Bristot de Oliveira
2021-07-29 19:23               ` Daniel Bristot de Oliveira
2021-08-26  7:03               ` Jason Wang
2021-08-27 23:41               ` tip-bot2 for Thomas Gleixner [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=163010767256.25758.8600942642007356589.tip-bot2@tip-bot2 \
    --to=tip-bot2@linutronix.de \
    --cc=bristot@redhat.com \
    --cc=jasowang@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-tip-commits@vger.kernel.org \
    --cc=tglx@linutronix.de \
    --cc=viro@zeniv.linux.org.uk \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.