From: Dave Chinner <david@fromorbit.com>
To: linux-fsdevel@vger.kernel.org
Cc: linux-block@vger.kernel.org, linux-cachefs@redhat.com,
dhowells@redhat.com, gfs2@lists.linux.dev,
dm-devel@lists.linux.dev, linux-security-module@vger.kernel.org,
selinux@vger.kernel.org, linux-kernel@vger.kernel.org
Subject: [PATCH 05/11] selinux: use dlist for isec inode list
Date: Wed, 6 Dec 2023 17:05:34 +1100 [thread overview]
Message-ID: <20231206060629.2827226-6-david@fromorbit.com> (raw)
In-Reply-To: <20231206060629.2827226-1-david@fromorbit.com>
From: Dave Chinner <dchinner@redhat.com>
Because it's a horrible point of lock contention under heavily
concurrent directory traversals...
- 12.14% d_instantiate
- 12.06% security_d_instantiate
- 12.13% selinux_d_instantiate
- 12.16% inode_doinit_with_dentry
- 15.45% _raw_spin_lock
- do_raw_spin_lock
14.68% __pv_queued_spin_lock_slowpath
Signed-off-by: Dave Chinner <dchinner@redhat.com>
---
include/linux/dlock-list.h | 9 ++++
security/selinux/hooks.c | 72 +++++++++++++++----------------
security/selinux/include/objsec.h | 6 +--
3 files changed, 47 insertions(+), 40 deletions(-)
diff --git a/include/linux/dlock-list.h b/include/linux/dlock-list.h
index 327cb9edc7e3..7ad933b8875d 100644
--- a/include/linux/dlock-list.h
+++ b/include/linux/dlock-list.h
@@ -132,6 +132,15 @@ extern void dlock_lists_add(struct dlock_list_node *node,
struct dlock_list_heads *dlist);
extern void dlock_lists_del(struct dlock_list_node *node);
+static inline void
+dlock_list_del_iter(struct dlock_list_iter *iter,
+ struct dlock_list_node *node)
+{
+ WARN_ON_ONCE((iter->entry != READ_ONCE(node->head)));
+ list_del_init(&node->list);
+ WRITE_ONCE(node->head, NULL);
+}
+
/*
* Find the first entry of the next available list.
*/
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index feda711c6b7b..0358d7c66949 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -340,26 +340,11 @@ static struct inode_security_struct *backing_inode_security(struct dentry *dentr
static void inode_free_security(struct inode *inode)
{
struct inode_security_struct *isec = selinux_inode(inode);
- struct superblock_security_struct *sbsec;
if (!isec)
return;
- sbsec = selinux_superblock(inode->i_sb);
- /*
- * As not all inode security structures are in a list, we check for
- * empty list outside of the lock to make sure that we won't waste
- * time taking a lock doing nothing.
- *
- * The list_del_init() function can be safely called more than once.
- * It should not be possible for this function to be called with
- * concurrent list_add(), but for better safety against future changes
- * in the code, we use list_empty_careful() here.
- */
- if (!list_empty_careful(&isec->list)) {
- spin_lock(&sbsec->isec_lock);
- list_del_init(&isec->list);
- spin_unlock(&sbsec->isec_lock);
- }
+ if (!list_empty(&isec->list.list))
+ dlock_lists_del(&isec->list);
}
struct selinux_mnt_opts {
@@ -547,6 +532,8 @@ static int sb_finish_set_opts(struct super_block *sb)
struct superblock_security_struct *sbsec = selinux_superblock(sb);
struct dentry *root = sb->s_root;
struct inode *root_inode = d_backing_inode(root);
+ struct dlock_list_iter iter;
+ struct inode_security_struct *isec, *n;
int rc = 0;
if (sbsec->behavior == SECURITY_FS_USE_XATTR) {
@@ -570,27 +557,28 @@ static int sb_finish_set_opts(struct super_block *sb)
/* Initialize the root inode. */
rc = inode_doinit_with_dentry(root_inode, root);
- /* Initialize any other inodes associated with the superblock, e.g.
- inodes created prior to initial policy load or inodes created
- during get_sb by a pseudo filesystem that directly
- populates itself. */
- spin_lock(&sbsec->isec_lock);
- while (!list_empty(&sbsec->isec_head)) {
- struct inode_security_struct *isec =
- list_first_entry(&sbsec->isec_head,
- struct inode_security_struct, list);
+ /*
+ * Initialize any other inodes associated with the superblock, e.g.
+ * inodes created prior to initial policy load or inodes created during
+ * get_sb by a pseudo filesystem that directly populates itself.
+ */
+ init_dlock_list_iter(&iter, &sbsec->isec_head);
+ dlist_for_each_entry_safe(isec, n, &iter, list) {
struct inode *inode = isec->inode;
- list_del_init(&isec->list);
- spin_unlock(&sbsec->isec_lock);
+
+ dlock_list_del_iter(&iter, &isec->list);
+ dlock_list_unlock(&iter);
+
inode = igrab(inode);
if (inode) {
if (!IS_PRIVATE(inode))
inode_doinit_with_dentry(inode, NULL);
iput(inode);
}
- spin_lock(&sbsec->isec_lock);
+
+ dlock_list_relock(&iter);
}
- spin_unlock(&sbsec->isec_lock);
+ WARN_ON_ONCE(!dlock_lists_empty(&sbsec->isec_head));
return rc;
}
@@ -1428,10 +1416,8 @@ static int inode_doinit_with_dentry(struct inode *inode, struct dentry *opt_dent
/* Defer initialization until selinux_complete_init,
after the initial policy is loaded and the security
server is ready to handle calls. */
- spin_lock(&sbsec->isec_lock);
- if (list_empty(&isec->list))
- list_add(&isec->list, &sbsec->isec_head);
- spin_unlock(&sbsec->isec_lock);
+ if (list_empty(&isec->list.list))
+ dlock_lists_add(&isec->list, &sbsec->isec_head);
goto out_unlock;
}
@@ -2548,9 +2534,10 @@ static int selinux_sb_alloc_security(struct super_block *sb)
{
struct superblock_security_struct *sbsec = selinux_superblock(sb);
+ if (alloc_dlock_list_heads(&sbsec->isec_head))
+ return -ENOMEM;
+
mutex_init(&sbsec->lock);
- INIT_LIST_HEAD(&sbsec->isec_head);
- spin_lock_init(&sbsec->isec_lock);
sbsec->sid = SECINITSID_UNLABELED;
sbsec->def_sid = SECINITSID_FILE;
sbsec->mntpoint_sid = SECINITSID_UNLABELED;
@@ -2558,6 +2545,15 @@ static int selinux_sb_alloc_security(struct super_block *sb)
return 0;
}
+static void selinux_sb_free_security(struct super_block *sb)
+{
+ struct superblock_security_struct *sbsec = selinux_superblock(sb);
+
+ if (!sbsec)
+ return;
+ free_dlock_list_heads(&sbsec->isec_head);
+}
+
static inline int opt_len(const char *s)
{
bool open_quote = false;
@@ -2841,7 +2837,7 @@ static int selinux_inode_alloc_security(struct inode *inode)
u32 sid = current_sid();
spin_lock_init(&isec->lock);
- INIT_LIST_HEAD(&isec->list);
+ init_dlock_list_node(&isec->list);
isec->inode = inode;
isec->sid = SECINITSID_UNLABELED;
isec->sclass = SECCLASS_FILE;
@@ -2920,6 +2916,7 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir,
if (rc)
return rc;
+
/* Possibly defer initialization to selinux_complete_init. */
if (sbsec->flags & SE_SBINITIALIZED) {
struct inode_security_struct *isec = selinux_inode(inode);
@@ -7215,6 +7212,7 @@ static struct security_hook_list selinux_hooks[] __ro_after_init = {
selinux_msg_queue_alloc_security),
LSM_HOOK_INIT(shm_alloc_security, selinux_shm_alloc_security),
LSM_HOOK_INIT(sb_alloc_security, selinux_sb_alloc_security),
+ LSM_HOOK_INIT(sb_free_security, selinux_sb_free_security),
LSM_HOOK_INIT(inode_alloc_security, selinux_inode_alloc_security),
LSM_HOOK_INIT(sem_alloc_security, selinux_sem_alloc_security),
LSM_HOOK_INIT(secid_to_secctx, selinux_secid_to_secctx),
diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h
index 8159fd53c3de..e0709f429c56 100644
--- a/security/selinux/include/objsec.h
+++ b/security/selinux/include/objsec.h
@@ -24,6 +24,7 @@
#include <linux/spinlock.h>
#include <linux/lsm_hooks.h>
#include <linux/msg.h>
+#include <linux/dlock-list.h>
#include <net/net_namespace.h>
#include "flask.h"
#include "avc.h"
@@ -45,7 +46,7 @@ enum label_initialized {
struct inode_security_struct {
struct inode *inode; /* back pointer to inode object */
- struct list_head list; /* list of inode_security_struct */
+ struct dlock_list_node list; /* list of inode_security_struct */
u32 task_sid; /* SID of creating task */
u32 sid; /* SID of this object */
u16 sclass; /* security class of this object */
@@ -67,8 +68,7 @@ struct superblock_security_struct {
unsigned short behavior; /* labeling behavior */
unsigned short flags; /* which mount options were specified */
struct mutex lock;
- struct list_head isec_head;
- spinlock_t isec_lock;
+ struct dlock_list_heads isec_head;
};
struct msg_security_struct {
--
2.42.0
next prev parent reply other threads:[~2023-12-06 6:06 UTC|newest]
Thread overview: 34+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-12-06 6:05 [PATCH 0/11] vfs: inode cache scalability improvements Dave Chinner
2023-12-06 6:05 ` [PATCH 01/11] lib/dlock-list: Distributed and lock-protected lists Dave Chinner
2023-12-07 2:23 ` Al Viro
2023-12-06 6:05 ` [PATCH 02/11] vfs: Remove unnecessary list_for_each_entry_safe() variants Dave Chinner
2023-12-07 2:26 ` Al Viro
2023-12-07 4:18 ` Kent Overstreet
2023-12-06 6:05 ` [PATCH 03/11] vfs: Use dlock list for superblock's inode list Dave Chinner
2023-12-07 2:40 ` Al Viro
2023-12-07 4:59 ` Dave Chinner
2023-12-07 5:03 ` Kent Overstreet
2023-12-06 6:05 ` [PATCH 04/11] lib/dlock-list: Make sibling CPUs share the same linked list Dave Chinner
2023-12-07 4:31 ` Kent Overstreet
2023-12-07 5:42 ` Kent Overstreet
2023-12-07 6:25 ` Dave Chinner
2023-12-07 6:49 ` Al Viro
2023-12-06 6:05 ` Dave Chinner [this message]
2023-12-06 21:52 ` [PATCH 05/11] selinux: use dlist for isec inode list Paul Moore
2023-12-06 23:04 ` Dave Chinner
2023-12-07 0:36 ` Paul Moore
2023-12-06 6:05 ` [PATCH 06/11] vfs: factor out inode hash head calculation Dave Chinner
2023-12-07 3:02 ` Al Viro
2023-12-06 6:05 ` [PATCH 07/11] hlist-bl: add hlist_bl_fake() Dave Chinner
2023-12-07 3:05 ` Al Viro
2023-12-06 6:05 ` [PATCH 08/11] vfs: inode cache conversion to hash-bl Dave Chinner
2023-12-07 4:58 ` Kent Overstreet
2023-12-07 6:03 ` Dave Chinner
2023-12-07 6:42 ` Al Viro
2023-12-06 6:05 ` [PATCH 09/11] hash-bl: explicitly initialise hash-bl heads Dave Chinner
2023-12-07 3:15 ` Al Viro
2023-12-06 6:05 ` [PATCH 10/11] list_bl: don't use bit locks for PREEMPT_RT or lockdep Dave Chinner
2023-12-07 4:16 ` Kent Overstreet
2023-12-07 4:41 ` Dave Chinner
2023-12-06 6:05 ` [PATCH 11/11] hlist-bl: introduced nested locking for dm-snap Dave Chinner
2023-12-07 17:08 ` [PATCH 0/11] vfs: inode cache scalability improvements Kent Overstreet
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20231206060629.2827226-6-david@fromorbit.com \
--to=david@fromorbit.com \
--cc=dhowells@redhat.com \
--cc=dm-devel@lists.linux.dev \
--cc=gfs2@lists.linux.dev \
--cc=linux-block@vger.kernel.org \
--cc=linux-cachefs@redhat.com \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-security-module@vger.kernel.org \
--cc=selinux@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).