Linux-XFS Archive mirror
 help / color / mirror / Atom feed
* [RFC PATCH] xfs_io: add linux madvise advice codes
       [not found] <20240314161300.382526-1-david@redhat.com>
@ 2024-03-17 16:51 ` Darrick J. Wong
  2024-03-17 16:53   ` [RFC PATCH] fstests: test MADV_POPULATE_READ with IO errors Darrick J. Wong
  2024-03-17 21:14   ` [RFC PATCH] xfs_io: add linux madvise advice codes Christoph Hellwig
  0 siblings, 2 replies; 5+ messages in thread
From: Darrick J. Wong @ 2024-03-17 16:51 UTC (permalink / raw
  To: David Hildenbrand, djwong; +Cc: linux-kernel, linux-mm, fstests, xfs

From: Darrick J. Wong <djwong@kernel.org>

Add all the Linux-specific madvise codes.  We're going to need
MADV_POPULATE_READ for a regression test.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 configure.ac          |    1 
 include/builddefs.in  |    1 
 io/Makefile           |    4 ++
 io/madvise.c          |  111 +++++++++++++++++++++++++++++++++++++++++++++++++
 m4/package_libcdev.m4 |   17 ++++++++
 5 files changed, 133 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index 3786e44db6fd..723bdca506d1 100644
--- a/configure.ac
+++ b/configure.ac
@@ -187,6 +187,7 @@ AC_CONFIG_SYSTEMD_SYSTEM_UNIT_DIR
 AC_CONFIG_CROND_DIR
 AC_CONFIG_UDEV_DIR
 AC_HAVE_BLKID_TOPO
+AC_HAVE_KERNEL_MADVISE_FLAGS
 
 if test "$enable_ubsan" = "yes" || test "$enable_ubsan" = "probe"; then
         AC_PACKAGE_CHECK_UBSAN
diff --git a/include/builddefs.in b/include/builddefs.in
index 07428206da45..a04f3e70f19d 100644
--- a/include/builddefs.in
+++ b/include/builddefs.in
@@ -193,6 +193,7 @@ HAVE_O_TMPFILE = @have_o_tmpfile@
 HAVE_MKOSTEMP_CLOEXEC = @have_mkostemp_cloexec@
 USE_RADIX_TREE_FOR_INUMS = @use_radix_tree_for_inums@
 HAVE_FSVERITY_DESCR = @have_fsverity_descr@
+HAVE_KERNEL_MADVISE = @have_kernel_madvise@
 
 GCCFLAGS = -funsigned-char -fno-strict-aliasing -Wall -Werror -Wextra -Wno-unused-parameter
 #	   -Wbitwise -Wno-transparent-union -Wno-old-initializer -Wno-decl
diff --git a/io/Makefile b/io/Makefile
index 6f903e3df9a7..ce39fda0e82a 100644
--- a/io/Makefile
+++ b/io/Makefile
@@ -84,6 +84,10 @@ ifeq ($(HAVE_GETFSMAP),yes)
 CFILES += fsmap.c
 endif
 
+ifeq ($(HAVE_KERNEL_MADVISE),yes)
+LCFLAGS += -DHAVE_KERNEL_MADVISE
+endif
+
 default: depend $(LTCOMMAND)
 
 include $(BUILDRULES)
diff --git a/io/madvise.c b/io/madvise.c
index 6e9c5b121d72..081666f403bb 100644
--- a/io/madvise.c
+++ b/io/madvise.c
@@ -9,6 +9,9 @@
 #include <sys/mman.h>
 #include "init.h"
 #include "io.h"
+#ifdef HAVE_KERNEL_MADVISE
+# include <asm/mman.h>
+#endif
 
 static cmdinfo_t madvise_cmd;
 
@@ -26,6 +29,47 @@ madvise_help(void)
 " -r -- expect random page references (POSIX_MADV_RANDOM)\n"
 " -s -- expect sequential page references (POSIX_MADV_SEQUENTIAL)\n"
 " -w -- will need these pages (POSIX_MADV_WILLNEED) [*]\n"
+"\n"
+"The following Linux-specific advise values are available:\n"
+#ifdef MADV_COLLAPSE
+" -c -- try to collapse range into transparent hugepages (MADV_COLLAPSE)\n"
+#endif
+#ifdef MADV_COLD
+" -D -- deactivate the range (MADV_COLD)\n"
+#endif
+#ifdef MADV_FREE
+" -f -- free the range (MADV_FREE)\n"
+#endif
+#ifdef MADV_NOHUGEPAGE
+" -h -- disable transparent hugepages (MADV_NOHUGEPAGE)\n"
+#endif
+#ifdef MADV_HUGEPAGE
+" -H -- enable transparent hugepages (MADV_HUGEPAGE)\n"
+#endif
+#ifdef MADV_MERGEABLE
+" -m -- mark the range mergeable (MADV_MERGEABLE)\n"
+#endif
+#ifdef MADV_UNMERGEABLE
+" -M -- mark the range unmergeable (MADV_UNMERGEABLE)\n"
+#endif
+#ifdef MADV_SOFT_OFFLINE
+" -o -- mark the range offline (MADV_SOFT_OFFLINE)\n"
+#endif
+#ifdef MADV_REMOVE
+" -p -- punch a hole in the file (MADV_REMOVE)\n"
+#endif
+#ifdef MADV_HWPOISON
+" -P -- poison the page cache (MADV_HWPOISON)\n"
+#endif
+#ifdef MADV_POPULATE_READ
+" -R -- prefault in the range for read (MADV_POPULATE_READ)\n"
+#endif
+#ifdef MADV_POPULATE_WRITE
+" -W -- prefault in the range for write (MADV_POPULATE_WRITE)\n"
+#endif
+#ifdef MADV_PAGEOUT
+" -X -- reclaim the range (MADV_PAGEOUT)\n"
+#endif
 " Notes:\n"
 "   NORMAL sets the default readahead setting on the file.\n"
 "   RANDOM sets the readahead setting on the file to zero.\n"
@@ -45,20 +89,85 @@ madvise_f(
 	int		advise = MADV_NORMAL, c;
 	size_t		blocksize, sectsize;
 
-	while ((c = getopt(argc, argv, "drsw")) != EOF) {
+	while ((c = getopt(argc, argv, "cdDfhHmMopPrRswWX")) != EOF) {
 		switch (c) {
+#ifdef MADV_COLLAPSE
+		case 'c':	/* collapse to thp */
+			advise = MADV_COLLAPSE;
+			break;
+#endif
 		case 'd':	/* Don't need these pages */
 			advise = MADV_DONTNEED;
 			break;
+#ifdef MADV_COLD
+		case 'D':	/* make more likely to be reclaimed */
+			advise = MADV_COLD;
+			break;
+#endif
+#ifdef MADV_FREE
+		case 'f':	/* page range out of memory */
+			advise = MADV_FREE;
+			break;
+#endif
+#ifdef MADV_HUGEPAGE
+		case 'h':	/* enable thp memory */
+			advise = MADV_HUGEPAGE;
+			break;
+#endif
+#ifdef MADV_NOHUGEPAGE
+		case 'H':	/* disable thp memory */
+			advise = MADV_NOHUGEPAGE;
+			break;
+#endif
+#ifdef MADV_MERGEABLE
+		case 'm':	/* enable merging */
+			advise = MADV_MERGEABLE;
+			break;
+#endif
+#ifdef MADV_UNMERGEABLE
+		case 'M':	/* disable merging */
+			advise = MADV_UNMERGEABLE;
+			break;
+#endif
+#ifdef MADV_SOFT_OFFLINE
+		case 'o':	/* offline */
+			advise = MADV_SOFT_OFFLINE;
+			break;
+#endif
+#ifdef MADV_REMOVE
+		case 'p':	/* punch hole */
+			advise = MADV_REMOVE;
+			break;
+#endif
+#ifdef MADV_HWPOISON
+		case 'P':	/* poison */
+			advise = MADV_HWPOISON;
+			break;
+#endif
 		case 'r':	/* Expect random page references */
 			advise = MADV_RANDOM;
 			break;
+#ifdef MADV_POPULATE_READ
+		case 'R':	/* fault in pages for read */
+			advise = MADV_POPULATE_READ;
+			break;
+#endif
 		case 's':	/* Expect sequential page references */
 			advise = MADV_SEQUENTIAL;
 			break;
 		case 'w':	/* Will need these pages */
 			advise = MADV_WILLNEED;
 			break;
+#ifdef MADV_POPULATE_WRITE
+		case 'W':	/* fault in pages for write */
+			advise = MADV_POPULATE_WRITE;
+			break;
+#endif
+#ifdef MADV_PAGEOUT
+		case 'X':	/* reclaim memory */
+			advise = MADV_PAGEOUT;
+			break;
+#endif
 		default:
 			exitcode = 1;
 			return command_usage(&madvise_cmd);
diff --git a/m4/package_libcdev.m4 b/m4/package_libcdev.m4
index 84f288dfcfdb..064d050b2b55 100644
--- a/m4/package_libcdev.m4
+++ b/m4/package_libcdev.m4
@@ -322,3 +322,20 @@ struct fsverity_descriptor m = { };
     AC_SUBST(have_fsverity_descr)
   ])
 
+#
+# Check if asm/mman.h can be included
+#
+AC_DEFUN([AC_HAVE_KERNEL_MADVISE_FLAGS],
+  [ AC_MSG_CHECKING([for kernel madvise flags in asm/mman.h ])
+    AC_COMPILE_IFELSE(
+    [	AC_LANG_PROGRAM([[
+#include <asm/mman.h>
+	]], [[
+int moo = MADV_COLLAPSE;
+	]])
+    ], have_kernel_madvise=yes
+       AC_MSG_RESULT(yes),
+       AC_MSG_RESULT(no))
+    AC_SUBST(have_kernel_madvise)
+  ])
+

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [RFC PATCH] fstests: test MADV_POPULATE_READ with IO errors
  2024-03-17 16:51 ` [RFC PATCH] xfs_io: add linux madvise advice codes Darrick J. Wong
@ 2024-03-17 16:53   ` Darrick J. Wong
  2024-03-17 21:14     ` Christoph Hellwig
  2024-03-19  8:59     ` David Hildenbrand
  2024-03-17 21:14   ` [RFC PATCH] xfs_io: add linux madvise advice codes Christoph Hellwig
  1 sibling, 2 replies; 5+ messages in thread
From: Darrick J. Wong @ 2024-03-17 16:53 UTC (permalink / raw
  To: David Hildenbrand; +Cc: linux-kernel, linux-mm, fstests, xfs

From: Darrick J. Wong <djwong@kernel.org>

This is a regression test for "mm/madvise: make
MADV_POPULATE_(READ|WRITE) handle VM_FAULT_RETRY properly".

Cc: David Hildenbrand <david@redhat.com>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 tests/generic/1835     |   65 ++++++++++++++++++++++++++++++++++++++++++++++++
 tests/generic/1835.out |    4 +++
 2 files changed, 69 insertions(+)
 create mode 100755 tests/generic/1835
 create mode 100644 tests/generic/1835.out

diff --git a/tests/generic/1835 b/tests/generic/1835
new file mode 100755
index 0000000000..07479ab712
--- /dev/null
+++ b/tests/generic/1835
@@ -0,0 +1,65 @@
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2024 Oracle.  All Rights Reserved.
+#
+# FS QA Test 1835
+#
+# This is a regression test for a kernel hang that I saw when creating a memory
+# mapping, injecting EIO errors on the block device, and invoking
+# MADV_POPULATE_READ on the mapping to fault in the pages.
+#
+. ./common/preamble
+_begin_fstest auto rw
+
+# Override the default cleanup function.
+_cleanup()
+{
+	cd /
+	rm -f $tmp.*
+	_dmerror_unmount
+	_dmerror_cleanup
+}
+
+# Import common functions.
+. ./common/dmerror
+
+_fixed_by_kernel_commit XXXXXXXXXXXX \
+	"mm/madvise: make MADV_POPULATE_(READ|WRITE) handle VM_FAULT_RETRY properly"
+
+# real QA test starts here
+
+# Modify as appropriate.
+_supported_fs generic
+_require_xfs_io_command madvise -R
+_require_scratch
+_require_dm_target error
+_require_command "$TIMEOUT_PROG" "timeout"
+
+_scratch_mkfs >> $seqres.full 2>&1
+_dmerror_init
+
+filesz=2m
+
+# Create a file that we'll read, then cycle mount to zap pagecache
+_dmerror_mount
+$XFS_IO_PROG -f -c "pwrite -S 0x58 0 $filesz" "$SCRATCH_MNT/a" >> $seqres.full
+_dmerror_unmount
+_dmerror_mount
+
+# Try to read the file data in a regular fashion just to prove that it works.
+echo read with no errors
+timeout -s KILL 10s $XFS_IO_PROG -c "mmap -r 0 $filesz" -c "madvise -R 0 $filesz" "$SCRATCH_MNT/a"
+_dmerror_unmount
+_dmerror_mount
+
+# Load file metadata and induce EIO errors on read.  Try to provoke the kernel;
+# kill the process after 10s so we can clean up.
+stat "$SCRATCH_MNT/a" >> $seqres.full
+echo read with IO errors
+_dmerror_load_error_table
+timeout -s KILL 10s $XFS_IO_PROG -c "mmap -r 0 $filesz" -c "madvise -R 0 $filesz" "$SCRATCH_MNT/a"
+_dmerror_load_working_table
+
+# success, all done
+status=0
+exit
diff --git a/tests/generic/1835.out b/tests/generic/1835.out
new file mode 100644
index 0000000000..1b03586e8c
--- /dev/null
+++ b/tests/generic/1835.out
@@ -0,0 +1,4 @@
+QA output created by 1835
+read with no errors
+read with IO errors
+madvise: Bad address

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [RFC PATCH] xfs_io: add linux madvise advice codes
  2024-03-17 16:51 ` [RFC PATCH] xfs_io: add linux madvise advice codes Darrick J. Wong
  2024-03-17 16:53   ` [RFC PATCH] fstests: test MADV_POPULATE_READ with IO errors Darrick J. Wong
@ 2024-03-17 21:14   ` Christoph Hellwig
  1 sibling, 0 replies; 5+ messages in thread
From: Christoph Hellwig @ 2024-03-17 21:14 UTC (permalink / raw
  To: Darrick J. Wong; +Cc: David Hildenbrand, linux-kernel, linux-mm, fstests, xfs

> +#
> +# Check if asm/mman.h can be included
> +#
> +AC_DEFUN([AC_HAVE_KERNEL_MADVISE_FLAGS],
> +  [ AC_MSG_CHECKING([for kernel madvise flags in asm/mman.h ])
> +    AC_COMPILE_IFELSE(
> +    [	AC_LANG_PROGRAM([[
> +#include <asm/mman.h>
> +	]], [[
> +int moo = MADV_COLLAPSE;
> +	]])
> +    ], have_kernel_madvise=yes
> +       AC_MSG_RESULT(yes),
> +       AC_MSG_RESULT(no))
> +    AC_SUBST(have_kernel_madvise)
> +  ])
> +

I don't think we really need this check, as madvise and asm/mman.h
have been around forever.  We can probably also drop most of the
actual flag idefs, probably for everything older than MADV_WIPEONFORK.

The rest looks good to me.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [RFC PATCH] fstests: test MADV_POPULATE_READ with IO errors
  2024-03-17 16:53   ` [RFC PATCH] fstests: test MADV_POPULATE_READ with IO errors Darrick J. Wong
@ 2024-03-17 21:14     ` Christoph Hellwig
  2024-03-19  8:59     ` David Hildenbrand
  1 sibling, 0 replies; 5+ messages in thread
From: Christoph Hellwig @ 2024-03-17 21:14 UTC (permalink / raw
  To: Darrick J. Wong; +Cc: David Hildenbrand, linux-kernel, linux-mm, fstests, xfs

Looks good:

Reviewed-by: Christoph Hellwig <hch@lst.de>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [RFC PATCH] fstests: test MADV_POPULATE_READ with IO errors
  2024-03-17 16:53   ` [RFC PATCH] fstests: test MADV_POPULATE_READ with IO errors Darrick J. Wong
  2024-03-17 21:14     ` Christoph Hellwig
@ 2024-03-19  8:59     ` David Hildenbrand
  1 sibling, 0 replies; 5+ messages in thread
From: David Hildenbrand @ 2024-03-19  8:59 UTC (permalink / raw
  To: Darrick J. Wong; +Cc: linux-kernel, linux-mm, fstests, xfs

On 17.03.24 17:53, Darrick J. Wong wrote:
> From: Darrick J. Wong <djwong@kernel.org>
> 
> This is a regression test for "mm/madvise: make
> MADV_POPULATE_(READ|WRITE) handle VM_FAULT_RETRY properly".
> 
> Cc: David Hildenbrand <david@redhat.com>
> Signed-off-by: Darrick J. Wong <djwong@kernel.org>
> ---

Thanks for including this test, very helpful!

It's my first time reading fstests code, so I cannot give any feedback 
that would be of a lot of value. Having that said, nothing jumped at me :)

-- 
Cheers,

David / dhildenb


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2024-03-19  9:00 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <20240314161300.382526-1-david@redhat.com>
2024-03-17 16:51 ` [RFC PATCH] xfs_io: add linux madvise advice codes Darrick J. Wong
2024-03-17 16:53   ` [RFC PATCH] fstests: test MADV_POPULATE_READ with IO errors Darrick J. Wong
2024-03-17 21:14     ` Christoph Hellwig
2024-03-19  8:59     ` David Hildenbrand
2024-03-17 21:14   ` [RFC PATCH] xfs_io: add linux madvise advice codes Christoph Hellwig

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).