From: "Darrick J. Wong" <djwong@kernel.org>
To: djwong@kernel.org, cem@kernel.org
Cc: Christoph Hellwig <hch@lst.de>,
cmaiolino@redhat.com, linux-xfs@vger.kernel.org,
hch@infradead.org
Subject: [PATCH 089/111] libxfs: add xfile support
Date: Mon, 15 Apr 2024 18:00:23 -0700 [thread overview]
Message-ID: <171322883501.211103.5837361141423551988.stgit@frogsfrogsfrogs> (raw)
In-Reply-To: <171322882240.211103.3776766269442402814.stgit@frogsfrogsfrogs>
From: Darrick J. Wong <djwong@kernel.org>
Port the xfile functionality (anonymous pageable file-index memory) from
the kernel. In userspace, we try to use memfd() to create tmpfs files
that are not in any namespace, matching the kernel.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
libxfs/Makefile | 2
libxfs/xfile.c | 210 +++++++++++++++++++++++++++++++++++++++++++++++++++
libxfs/xfile.h | 21 +++++
repair/xfs_repair.c | 15 ++++
4 files changed, 248 insertions(+)
create mode 100644 libxfs/xfile.c
create mode 100644 libxfs/xfile.h
diff --git a/libxfs/Makefile b/libxfs/Makefile
index 6f688c0ad25a..43e8ae183229 100644
--- a/libxfs/Makefile
+++ b/libxfs/Makefile
@@ -26,6 +26,7 @@ HFILES = \
libxfs_priv.h \
linux-err.h \
topology.h \
+ xfile.h \
xfs_ag_resv.h \
xfs_alloc.h \
xfs_alloc_btree.h \
@@ -66,6 +67,7 @@ CFILES = cache.c \
topology.c \
trans.c \
util.c \
+ xfile.c \
xfs_ag.c \
xfs_ag_resv.c \
xfs_alloc.c \
diff --git a/libxfs/xfile.c b/libxfs/xfile.c
new file mode 100644
index 000000000000..cba173cc17f1
--- /dev/null
+++ b/libxfs/xfile.c
@@ -0,0 +1,210 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2021-2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "libxfs_priv.h"
+#include "libxfs.h"
+#include "libxfs/xfile.h"
+#include <linux/memfd.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+/*
+ * Swappable Temporary Memory
+ * ==========================
+ *
+ * Offline checking sometimes needs to be able to stage a large amount of data
+ * in memory. This information might not fit in the available memory and it
+ * doesn't all need to be accessible at all times. In other words, we want an
+ * indexed data buffer to store data that can be paged out.
+ *
+ * memfd files meet those requirements. Therefore, the xfile mechanism uses
+ * one to store our staging data. The xfile must be freed with xfile_destroy.
+ *
+ * xfiles assume that the caller will handle all required concurrency
+ * management; file locks are not taken.
+ */
+
+/*
+ * Starting with Linux 6.3, there's a new MFD_NOEXEC_SEAL flag that disables
+ * the longstanding memfd behavior that files are created with the executable
+ * bit set, and seals the file against it being turned back on.
+ */
+#ifndef MFD_NOEXEC_SEAL
+# define MFD_NOEXEC_SEAL (0x0008U)
+#endif
+
+/*
+ * Open a memory-backed fd to back an xfile. We require close-on-exec here,
+ * because these memfd files function as windowed RAM and hence should never
+ * be shared with other processes.
+ */
+static int
+xfile_create_fd(
+ const char *description)
+{
+ int fd = -1;
+ int ret;
+
+ /*
+ * memfd_create was added to kernel 3.17 (2014). MFD_NOEXEC_SEAL
+ * causes -EINVAL on old kernels, so fall back to omitting it so that
+ * new xfs_repair can run on an older recovery cd kernel.
+ */
+ fd = memfd_create(description, MFD_CLOEXEC | MFD_NOEXEC_SEAL);
+ if (fd >= 0)
+ goto got_fd;
+ fd = memfd_create(description, MFD_CLOEXEC);
+ if (fd >= 0)
+ goto got_fd;
+
+ /*
+ * O_TMPFILE exists as of kernel 3.11 (2013), which means that if we
+ * find it, we're pretty safe in assuming O_CLOEXEC exists too.
+ */
+ fd = open("/dev/shm", O_TMPFILE | O_CLOEXEC | O_RDWR, 0600);
+ if (fd >= 0)
+ goto got_fd;
+
+ fd = open("/tmp", O_TMPFILE | O_CLOEXEC | O_RDWR, 0600);
+ if (fd >= 0)
+ goto got_fd;
+
+ /*
+ * mkostemp exists as of glibc 2.7 (2007) and O_CLOEXEC exists as of
+ * kernel 2.6.23 (2007).
+ */
+ fd = mkostemp("libxfsXXXXXX", O_CLOEXEC);
+ if (fd >= 0)
+ goto got_fd;
+
+ if (!errno)
+ errno = EOPNOTSUPP;
+ return -1;
+got_fd:
+ /*
+ * Turn off mode bits we don't want -- group members and others should
+ * not have access to the xfile, nor it be executable. memfds are
+ * created with mode 0777, but we'll be careful just in case the other
+ * implementations fail to set 0600.
+ */
+ ret = fchmod(fd, 0600);
+ if (ret)
+ perror("disabling xfile executable bit");
+
+ return fd;
+}
+
+/*
+ * Create an xfile of the given size. The description will be used in the
+ * trace output.
+ */
+int
+xfile_create(
+ const char *description,
+ struct xfile **xfilep)
+{
+ struct xfile *xf;
+ int error;
+
+ xf = kmalloc(sizeof(struct xfile), 0);
+ if (!xf)
+ return -ENOMEM;
+
+ xf->fd = xfile_create_fd(description);
+ if (xf->fd < 0) {
+ error = -errno;
+ kfree(xf);
+ return error;
+ }
+
+ *xfilep = xf;
+ return 0;
+}
+
+/* Close the file and release all resources. */
+void
+xfile_destroy(
+ struct xfile *xf)
+{
+ close(xf->fd);
+ kfree(xf);
+}
+
+static inline loff_t
+xfile_maxbytes(
+ struct xfile *xf)
+{
+ if (sizeof(loff_t) == 8)
+ return LLONG_MAX;
+ return LONG_MAX;
+}
+
+/*
+ * Load an object. Since we're treating this file as "memory", any error or
+ * short IO is treated as a failure to allocate memory.
+ */
+ssize_t
+xfile_load(
+ struct xfile *xf,
+ void *buf,
+ size_t count,
+ loff_t pos)
+{
+ ssize_t ret;
+
+ if (count > INT_MAX)
+ return -ENOMEM;
+ if (xfile_maxbytes(xf) - pos < count)
+ return -ENOMEM;
+
+ ret = pread(xf->fd, buf, count, pos);
+ if (ret < 0)
+ return -errno;
+ if (ret != count)
+ return -ENOMEM;
+ return 0;
+}
+
+/*
+ * Store an object. Since we're treating this file as "memory", any error or
+ * short IO is treated as a failure to allocate memory.
+ */
+ssize_t
+xfile_store(
+ struct xfile *xf,
+ const void *buf,
+ size_t count,
+ loff_t pos)
+{
+ ssize_t ret;
+
+ if (count > INT_MAX)
+ return -E2BIG;
+ if (xfile_maxbytes(xf) - pos < count)
+ return -EFBIG;
+
+ ret = pwrite(xf->fd, buf, count, pos);
+ if (ret < 0)
+ return -errno;
+ if (ret != count)
+ return -ENOMEM;
+ return 0;
+}
+
+/* Compute the number of bytes used by a xfile. */
+unsigned long long
+xfile_bytes(
+ struct xfile *xf)
+{
+ struct stat statbuf;
+ int error;
+
+ error = fstat(xf->fd, &statbuf);
+ if (error)
+ return -errno;
+
+ return (unsigned long long)statbuf.st_blocks << 9;
+}
diff --git a/libxfs/xfile.h b/libxfs/xfile.h
new file mode 100644
index 000000000000..d60084011357
--- /dev/null
+++ b/libxfs/xfile.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2021-2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#ifndef __LIBXFS_XFILE_H__
+#define __LIBXFS_XFILE_H__
+
+struct xfile {
+ int fd;
+};
+
+int xfile_create(const char *description, struct xfile **xfilep);
+void xfile_destroy(struct xfile *xf);
+
+ssize_t xfile_load(struct xfile *xf, void *buf, size_t count, loff_t pos);
+ssize_t xfile_store(struct xfile *xf, const void *buf, size_t count, loff_t pos);
+
+unsigned long long xfile_bytes(struct xfile *xf);
+
+#endif /* __LIBXFS_XFILE_H__ */
diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
index d4f99f36f71d..01f92e841f29 100644
--- a/repair/xfs_repair.c
+++ b/repair/xfs_repair.c
@@ -953,6 +953,20 @@ phase_end(
platform_crash();
}
+/* Try to allow as many memfds as possible. */
+static void
+bump_max_fds(void)
+{
+ struct rlimit rlim = { };
+ int ret;
+
+ ret = getrlimit(RLIMIT_NOFILE, &rlim);
+ if (!ret) {
+ rlim.rlim_cur = rlim.rlim_max;
+ setrlimit(RLIMIT_NOFILE, &rlim);
+ }
+}
+
int
main(int argc, char **argv)
{
@@ -972,6 +986,7 @@ main(int argc, char **argv)
bindtextdomain(PACKAGE, LOCALEDIR);
textdomain(PACKAGE);
dinode_bmbt_translation_init();
+ bump_max_fds();
temp_mp = &xfs_m;
setbuf(stdout, NULL);
next prev parent reply other threads:[~2024-04-16 1:00 UTC|newest]
Thread overview: 38+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-04-16 0:51 [PATCHBOMB v3] xfsprogs: everything headed towards 6.9 Darrick J. Wong
2024-04-16 0:57 ` [PATCHSET 1/4] xfsprogs: bug fixes for 6.8 Darrick J. Wong
2024-04-16 0:58 ` [PATCH 1/5] xfs_repair: double-check with shortform attr verifiers Darrick J. Wong
2024-04-16 0:59 ` [PATCH 2/5] xfs_db: improve number extraction in getbitval Darrick J. Wong
2024-04-16 4:53 ` Christoph Hellwig
2024-04-16 0:59 ` [PATCH 3/5] xfs_scrub: fix threadcount estimates for phase 6 Darrick J. Wong
2024-04-16 4:53 ` Christoph Hellwig
2024-04-16 0:59 ` [PATCH 4/5] xfs_scrub: don't fail while reporting media scan errors Darrick J. Wong
2024-04-16 0:59 ` [PATCH 5/5] xfs_io: add linux madvise advice codes Darrick J. Wong
2024-04-17 7:34 ` [PATCHSET 1/4] xfsprogs: bug fixes for 6.8 Carlos Maiolino
2024-04-17 15:30 ` Darrick J. Wong
2024-04-16 0:58 ` [PATCHSET 2/4] libxfs: sync with 6.9 Darrick J. Wong
2024-04-16 1:00 ` [PATCH 088/111] libxfs: teach buftargs to maintain their own buffer hashtable Darrick J. Wong
2024-04-16 1:00 ` Darrick J. Wong [this message]
2024-04-16 1:00 ` [PATCH 090/111] libxfs: partition memfd files to avoid using too many fds Darrick J. Wong
2024-04-16 4:55 ` Christoph Hellwig
2024-04-16 15:49 ` Darrick J. Wong
2024-04-16 16:29 ` Christoph Hellwig
2024-04-16 16:57 ` Darrick J. Wong
2024-04-16 18:47 ` Christoph Hellwig
2024-04-16 18:55 ` Darrick J. Wong
2024-04-24 17:20 ` [PATCH v3.1 " Darrick J. Wong
2024-04-16 1:00 ` [PATCH 091/111] xfs: teach buftargs to maintain their own buffer hashtable Darrick J. Wong
2024-04-16 1:01 ` [PATCH 092/111] libxfs: support in-memory buffer cache targets Darrick J. Wong
2024-04-16 0:58 ` [PATCHSET v30.3 3/4] xfsprogs: bmap log intent cleanups Darrick J. Wong
2024-04-16 1:01 ` [PATCH 1/4] libxfs: remove kmem_alloc, kmem_zalloc, and kmem_free Darrick J. Wong
2024-04-16 4:55 ` Christoph Hellwig
2024-04-16 1:01 ` [PATCH 2/4] libxfs: add a bi_entry helper Darrick J. Wong
2024-04-16 4:55 ` Christoph Hellwig
2024-04-16 1:01 ` [PATCH 3/4] libxfs: reuse xfs_bmap_update_cancel_item Darrick J. Wong
2024-04-16 4:55 ` Christoph Hellwig
2024-04-16 1:02 ` [PATCH 4/4] libxfs: add a xattr_entry helper Darrick J. Wong
2024-04-16 4:56 ` Christoph Hellwig
2024-04-16 0:58 ` [PATCHSET v30.3 4/4] xfs_repair: minor fixes Darrick J. Wong
2024-04-16 1:02 ` [PATCH 1/1] xfs_repair: check num before bplist[num] Darrick J. Wong
2024-04-16 4:56 ` Christoph Hellwig
-- strict thread matches above, loose matches on Subject: below --
2024-05-22 2:45 [PATCHSET v30.4 02/10] libxfs: sync with 6.9 Darrick J. Wong
2024-05-22 3:11 ` [PATCH 089/111] libxfs: add xfile support Darrick J. Wong
2024-06-03 18:49 [PATCHSET v30.5 02/10] libxfs: sync with 6.9 Darrick J. Wong
2024-06-03 19:15 ` [PATCH 089/111] libxfs: add xfile support Darrick J. Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=171322883501.211103.5837361141423551988.stgit@frogsfrogsfrogs \
--to=djwong@kernel.org \
--cc=cem@kernel.org \
--cc=cmaiolino@redhat.com \
--cc=hch@infradead.org \
--cc=hch@lst.de \
--cc=linux-xfs@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).