LKML Archive mirror
 help / color / mirror / Atom feed
From: Guillaume Morin <guillaume@morinfr.org>
To: David Hildenbrand <david@redhat.com>
Cc: Guillaume Morin <guillaume@morinfr.org>,
	oleg@redhat.com, linux-kernel@vger.kernel.org,
	linux-trace-kernel@vger.kernel.org, muchun.song@linux.dev
Subject: Re: [RFC][PATCH] uprobe: support for private hugetlb mappings
Date: Fri, 26 Apr 2024 21:55:14 +0200	[thread overview]
Message-ID: <ZiwGovSHiVCF7z6y@bender.morinfr.org> (raw)
In-Reply-To: <385d3516-95bb-4ff9-9d60-ac4e46104130@redhat.com>

On 26 Apr  9:19, David Hildenbrand wrote:
> A couple of points:
> 
> a) Don't use page_mapcount(). Either folio_mapcount(), but likely you want
> to check PageAnonExclusive.
> 
> b) If you're not following the can_follow_write_pte/_pmd model, you are
> doing something wrong :)
> 
> c) The code was heavily changed in mm/mm-unstable. It was merged with t
> the common code.
> 
> Likely, in mm/mm-unstable, the existing can_follow_write_pte and
> can_follow_write_pmd checks will already cover what you want in most cases.
> 
> We'd need a can_follow_write_pud() to cover follow_huge_pud() and
> (unfortunately) something to handle follow_hugepd() as well similarly.
> 
> Copy-pasting what we do in can_follow_write_pte() and adjusting for
> different PTE types is the right thing to do. Maybe now it's time to factor
> out the common checks into a separate helper.

I tried to get the hugepd stuff right but this was the first I heard
about it :-) Afaict follow_huge_pmd and friends were already DTRT

diff --git a/mm/gup.c b/mm/gup.c
index 2f7baf96f65..9df97ea555f 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -517,6 +517,34 @@ static int record_subpages(struct page *page, unsigned long sz,
 }
 #endif	/* CONFIG_ARCH_HAS_HUGEPD || CONFIG_HAVE_GUP_FAST */
 
+/* Common code for can_follow_write_* */
+static inline bool can_follow_write_common(struct page *page,
+					   struct vm_area_struct *vma,
+					   unsigned int flags)
+{
+	/* Maybe FOLL_FORCE is set to override it? */
+	if (!(flags & FOLL_FORCE))
+		return false;
+
+	/* But FOLL_FORCE has no effect on shared mappings */
+	if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
+		return false;
+
+	/* ... or read-only private ones */
+	if (!(vma->vm_flags & VM_MAYWRITE))
+		return false;
+
+	/* ... or already writable ones that just need to take a write fault */
+	if (vma->vm_flags & VM_WRITE)
+		return false;
+
+	/*
+	 * See can_change_pte_writable(): we broke COW and could map the page
+	 * writable if we have an exclusive anonymous page ...
+	 */
+	return page && PageAnon(page) && PageAnonExclusive(page);
+}
+
 #ifdef CONFIG_ARCH_HAS_HUGEPD
 static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
 				      unsigned long sz)
@@ -525,6 +553,17 @@ static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
 	return (__boundary - 1 < end - 1) ? __boundary : end;
 }
 
+static inline bool can_follow_write_hugepd(pte_t pte, struct page* page,
+					   struct vm_area_struct *vma,
+					   unsigned int flags)
+{
+	/* If the pte is writable, we can write to the page. */
+	if (pte_access_permitted(pte, flags & FOLL_WRITE))
+		return true;
+
+	return can_follow_write_common(page, vma, flags);
+}
+
 static int gup_fast_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
 		unsigned long end, unsigned int flags, struct page **pages,
 		int *nr)
@@ -541,13 +580,13 @@ static int gup_fast_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
 
 	pte = huge_ptep_get(ptep);
 
-	if (!pte_access_permitted(pte, flags & FOLL_WRITE))
-		return 0;
-
 	/* hugepages are never "special" */
 	VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
 
 	page = pte_page(pte);
+	if (!can_follow_write_hugepd(pte, page, vma, flags))
+		return 0;
+
 	refs = record_subpages(page, sz, addr, end, pages + *nr);
 
 	folio = try_grab_folio(page, refs, flags);
@@ -668,7 +707,25 @@ static struct page *no_page_table(struct vm_area_struct *vma,
 	return NULL;
 }
 
+
+
 #ifdef CONFIG_PGTABLE_HAS_HUGE_LEAVES
+/* FOLL_FORCE can write to even unwritable PUDs in COW mappings. */
+static inline bool can_follow_write_pud(pud_t pud, struct page *page,
+					struct vm_area_struct *vma,
+					unsigned int flags)
+{
+	/* If the pud is writable, we can write to the page. */
+	if (pud_write(pud))
+		return true;
+
+	if (!can_follow_write_common(page, vma, flags))
+		return false;
+
+	/* ... and a write-fault isn't required for other reasons. */
+	return !vma_soft_dirty_enabled(vma) || pud_soft_dirty(pud);
+}
+
 static struct page *follow_huge_pud(struct vm_area_struct *vma,
 				    unsigned long addr, pud_t *pudp,
 				    int flags, struct follow_page_context *ctx)
@@ -681,7 +738,8 @@ static struct page *follow_huge_pud(struct vm_area_struct *vma,
 
 	assert_spin_locked(pud_lockptr(mm, pudp));
 
-	if ((flags & FOLL_WRITE) && !pud_write(pud))
+	if ((flags & FOLL_WRITE) &&
+	    !can_follow_write_pud(pud, page, vma, flags))
 		return NULL;
 
 	if (!pud_present(pud))
@@ -733,27 +791,7 @@ static inline bool can_follow_write_pmd(pmd_t pmd, struct page *page,
 	if (pmd_write(pmd))
 		return true;
 
-	/* Maybe FOLL_FORCE is set to override it? */
-	if (!(flags & FOLL_FORCE))
-		return false;
-
-	/* But FOLL_FORCE has no effect on shared mappings */
-	if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
-		return false;
-
-	/* ... or read-only private ones */
-	if (!(vma->vm_flags & VM_MAYWRITE))
-		return false;
-
-	/* ... or already writable ones that just need to take a write fault */
-	if (vma->vm_flags & VM_WRITE)
-		return false;
-
-	/*
-	 * See can_change_pte_writable(): we broke COW and could map the page
-	 * writable if we have an exclusive anonymous page ...
-	 */
-	if (!page || !PageAnon(page) || !PageAnonExclusive(page))
+	if (!can_follow_write_common(page, vma, flags))
 		return false;
 
 	/* ... and a write-fault isn't required for other reasons. */
@@ -854,27 +892,7 @@ static inline bool can_follow_write_pte(pte_t pte, struct page *page,
 	if (pte_write(pte))
 		return true;
 
-	/* Maybe FOLL_FORCE is set to override it? */
-	if (!(flags & FOLL_FORCE))
-		return false;
-
-	/* But FOLL_FORCE has no effect on shared mappings */
-	if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
-		return false;
-
-	/* ... or read-only private ones */
-	if (!(vma->vm_flags & VM_MAYWRITE))
-		return false;
-
-	/* ... or already writable ones that just need to take a write fault */
-	if (vma->vm_flags & VM_WRITE)
-		return false;
-
-	/*
-	 * See can_change_pte_writable(): we broke COW and could map the page
-	 * writable if we have an exclusive anonymous page ...
-	 */
-	if (!page || !PageAnon(page) || !PageAnonExclusive(page))
+	if (!can_follow_write_common(page, vma, flags))
 		return false;
 
 	/* ... and a write-fault isn't required for other reasons. */
@@ -1374,9 +1392,7 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
 		if (!(vm_flags & VM_WRITE) || (vm_flags & VM_SHADOW_STACK)) {
 			if (!(gup_flags & FOLL_FORCE))
 				return -EFAULT;
-			/* hugetlb does not support FOLL_FORCE|FOLL_WRITE. */
-			if (is_vm_hugetlb_page(vma))
-				return -EFAULT;
+
 			/*
 			 * We used to let the write,force case do COW in a
 			 * VM_MAYWRITE VM_SHARED !VM_WRITE vma, so ptrace could
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 417fc5cdb6e..099a71ee028 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5320,6 +5320,13 @@ static void set_huge_ptep_writable(struct vm_area_struct *vma,
 		update_mmu_cache(vma, address, ptep);
 }
 
+static void set_huge_ptep_maybe_writable(struct vm_area_struct *vma,
+					 unsigned long address, pte_t *ptep)
+{
+	if (vma->vm_flags & VM_WRITE)
+		set_huge_ptep_writable(vma, address, ptep);
+}
+
 bool is_hugetlb_entry_migration(pte_t pte)
 {
 	swp_entry_t swp;
@@ -5942,13 +5949,6 @@ static vm_fault_t hugetlb_wp(struct folio *pagecache_folio,
 	if (!unshare && huge_pte_uffd_wp(pte))
 		return 0;
 
-	/*
-	 * hugetlb does not support FOLL_FORCE-style write faults that keep the
-	 * PTE mapped R/O such as maybe_mkwrite() would do.
-	 */
-	if (WARN_ON_ONCE(!unshare && !(vma->vm_flags & VM_WRITE)))
-		return VM_FAULT_SIGSEGV;
-
 	/* Let's take out MAP_SHARED mappings first. */
 	if (vma->vm_flags & VM_MAYSHARE) {
 		set_huge_ptep_writable(vma, vmf->address, vmf->pte);
@@ -5970,7 +5970,7 @@ static vm_fault_t hugetlb_wp(struct folio *pagecache_folio,
 			SetPageAnonExclusive(&old_folio->page);
 		}
 		if (likely(!unshare))
-			set_huge_ptep_writable(vma, vmf->address, vmf->pte);
+			set_huge_ptep_maybe_writable(vma, vmf->address, vmf->pte);
 
 		delayacct_wpcopy_end();
 		return 0;
@@ -6076,7 +6076,8 @@ static vm_fault_t hugetlb_wp(struct folio *pagecache_folio,
 	spin_lock(vmf->ptl);
 	vmf->pte = hugetlb_walk(vma, vmf->address, huge_page_size(h));
 	if (likely(vmf->pte && pte_same(huge_ptep_get(vmf->pte), pte))) {
-		pte_t newpte = make_huge_pte(vma, &new_folio->page, !unshare);
+		const bool writable = !unshare && (vma->vm_flags & VM_WRITE);
+		pte_t newpte = make_huge_pte(vma, &new_folio->page, writable);
 
 		/* Break COW or unshare */
 		huge_ptep_clear_flush(vma, vmf->address, vmf->pte);


-- 
Guillaume Morin <guillaume@morinfr.org>

  reply	other threads:[~2024-04-26 19:55 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-04-19 18:37 [RFC][PATCH] uprobe: support for private hugetlb mappings Guillaume Morin
2024-04-22  9:39 ` David Hildenbrand
2024-04-22 18:11   ` Guillaume Morin
2024-04-22 18:59     ` David Hildenbrand
2024-04-22 20:53       ` Guillaume Morin
2024-04-24 20:09         ` David Hildenbrand
2024-04-24 20:44           ` Guillaume Morin
2024-04-24 21:00             ` David Hildenbrand
2024-04-25 15:19               ` Guillaume Morin
2024-04-25 15:42                 ` David Hildenbrand
2024-04-25 19:56                 ` David Hildenbrand
2024-04-26  0:09                   ` Guillaume Morin
2024-04-26  7:19                     ` David Hildenbrand
2024-04-26 19:55                       ` Guillaume Morin [this message]
2024-04-30 15:22                         ` Guillaume Morin
2024-04-30 18:21                           ` David Hildenbrand
2024-04-30 18:58                             ` Guillaume Morin
2024-04-30 19:25                         ` David Hildenbrand
2024-05-02  3:59                           ` Guillaume Morin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ZiwGovSHiVCF7z6y@bender.morinfr.org \
    --to=guillaume@morinfr.org \
    --cc=david@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-trace-kernel@vger.kernel.org \
    --cc=muchun.song@linux.dev \
    --cc=oleg@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).