All the mail mirrored from lore.kernel.org
 help / color / mirror / Atom feed
From: Lucas Karpinski <lkarpins@redhat.com>
To: viro@zeniv.linux.org.uk, brauner@kernel.org, jack@suse.cz
Cc: linux-fsdevel@vger.kernel.org, linux-kernel@vger.kernel.org,
	alexl@redhat.com, echanude@redhat.com, ikent@redhat.com,
	Lucas Karpinski <lkarpins@redhat.com>
Subject: [RFC v2 1/1] fs/namespace: defer RCU sync for MNT_DETACH umount
Date: Fri, 26 Apr 2024 15:53:48 -0400	[thread overview]
Message-ID: <20240426195429.28547-2-lkarpins@redhat.com> (raw)
In-Reply-To: <20240426195429.28547-1-lkarpins@redhat.com>

Use call_rcu to defer releasing the detached filesystem when calling
namespace_unlock() during a lazy umount.

When detaching (MNT_DETACH) a filesystem, it should not be necessary to
wait for the grace period before completing the syscall. The
expectation that the filesystem is shut down by the time the syscall
returns does not apply in this case.

Calling synchronize_rcu_expedited() has a significant cost on RT kernel
that default to rcupdate.rcu_normal_after_boot=1. The struct mount
umount'ed are queued up for release in a separate list
once the grace period completes while no longer accessible to following
syscalls.

Without patch, on 6.9.0-rc2-rt kernel:
perf stat -r 10 --null --pre 'mount -t tmpfs tmpfs mnt' -- umount mnt
        0.02756 +- 0.00299 seconds time elapsed  ( +- 10.84% )
perf stat -r 10 --null --pre 'mount -t tmpfs tmpfs mnt' -- umount -l mnt
        0.04422 +- 0.00521 seconds time elapsed  ( +- 11.79% )

With patch, on 6.9.0-rc2-rt kernel:
perf stat -r 10 --null --pre 'mount -t tmpfs tmpfs mnt' -- umount mnt
	0.02852 +- 0.00377 seconds time elapsed  ( +- 13.20% )
perf stat -r 10 --null --pre 'mount -t tmpfs tmpfs mnt' -- umount -l mnt
        0.0030812 +- 0.0000524 seconds time elapsed  ( +-  1.70% )

Signed-off-by: Alexander Larsson <alexl@redhat.com>
Signed-off-by: Eric Chanudet <echanude@redhat.com>
Signed-off-by: Lucas Karpinski <lkarpins@redhat.com>
---
 fs/namespace.c | 51 ++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 45 insertions(+), 6 deletions(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index 5a51315c6678..df03fc0d1990 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -45,6 +45,11 @@ static unsigned int m_hash_shift __ro_after_init;
 static unsigned int mp_hash_mask __ro_after_init;
 static unsigned int mp_hash_shift __ro_after_init;
 
+struct mount_delayed_release {
+	struct rcu_head rcu;
+	struct hlist_head release_list;
+};
+
 static __initdata unsigned long mhash_entries;
 static int __init set_mhash_entries(char *str)
 {
@@ -78,6 +83,7 @@ static struct kmem_cache *mnt_cache __ro_after_init;
 static DECLARE_RWSEM(namespace_sem);
 static HLIST_HEAD(unmounted);	/* protected by namespace_sem */
 static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */
+static bool lazy_unlock = false; /* protected by namespace_sem */
 
 struct mount_kattr {
 	unsigned int attr_set;
@@ -1553,16 +1559,39 @@ int may_umount(struct vfsmount *mnt)
 
 EXPORT_SYMBOL(may_umount);
 
-static void namespace_unlock(void)
+static void free_mounts(struct hlist_head *mount_list)
 {
-	struct hlist_head head;
 	struct hlist_node *p;
 	struct mount *m;
+
+	hlist_for_each_entry_safe(m, p, mount_list, mnt_umount) {
+		hlist_del(&m->mnt_umount);
+		mntput(&m->mnt);
+	}
+}
+
+static void delayed_mount_release(struct rcu_head *head)
+{
+	struct mount_delayed_release *drelease =
+		container_of(head, struct mount_delayed_release, rcu);
+
+	free_mounts(&drelease->release_list);
+	kfree(drelease);
+}
+
+static void namespace_unlock(void)
+{
+	HLIST_HEAD(head);
 	LIST_HEAD(list);
+	bool lazy;
+
 
 	hlist_move_list(&unmounted, &head);
 	list_splice_init(&ex_mountpoints, &list);
 
+	lazy = lazy_unlock;
+	lazy_unlock = false;
+
 	up_write(&namespace_sem);
 
 	shrink_dentry_list(&list);
@@ -1570,12 +1599,21 @@ static void namespace_unlock(void)
 	if (likely(hlist_empty(&head)))
 		return;
 
-	synchronize_rcu_expedited();
+	if (lazy) {
+		struct mount_delayed_release *drelease =
+			kmalloc(sizeof(*drelease), GFP_KERNEL);
 
-	hlist_for_each_entry_safe(m, p, &head, mnt_umount) {
-		hlist_del(&m->mnt_umount);
-		mntput(&m->mnt);
+		if (unlikely(!drelease))
+			goto out;
+
+		hlist_move_list(&head, &drelease->release_list);
+		call_rcu(&drelease->rcu, delayed_mount_release);
+		return;
 	}
+
+out:
+	synchronize_rcu_expedited();
+	free_mounts(&head);
 }
 
 static inline void namespace_lock(void)
@@ -1798,6 +1836,7 @@ static int do_umount(struct mount *mnt, int flags)
 	}
 out:
 	unlock_mount_hash();
+	lazy_unlock = flags & MNT_DETACH ? true : false;
 	namespace_unlock();
 	return retval;
 }
-- 
2.44.0


  reply	other threads:[~2024-04-26 19:55 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-04-26 19:53 [RFC v2 0/1] fs/namespace: defer RCU sync for MNT_DETACH umount Lucas Karpinski
2024-04-26 19:53 ` Lucas Karpinski [this message]
2024-04-26 20:09   ` [RFC v2 1/1] " Al Viro
2024-04-30 13:25     ` Lucas Karpinski
2024-05-01 13:41       ` Ian Kent
2024-04-30 14:14   ` kernel test robot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240426195429.28547-2-lkarpins@redhat.com \
    --to=lkarpins@redhat.com \
    --cc=alexl@redhat.com \
    --cc=brauner@kernel.org \
    --cc=echanude@redhat.com \
    --cc=ikent@redhat.com \
    --cc=jack@suse.cz \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.