All the mail mirrored from lore.kernel.org
 help / color / mirror / Atom feed
From: Guillaume Morin <guillaume@morinfr.org>
To: David Hildenbrand <david@redhat.com>
Cc: Guillaume Morin <guillaume@morinfr.org>,
	oleg@redhat.com, linux-kernel@vger.kernel.org,
	linux-trace-kernel@vger.kernel.org, muchun.song@linux.dev
Subject: Re: [RFC][PATCH] uprobe: support for private hugetlb mappings
Date: Fri, 26 Apr 2024 21:55:14 +0200	[thread overview]
Message-ID: <ZiwGovSHiVCF7z6y@bender.morinfr.org> (raw)
In-Reply-To: <385d3516-95bb-4ff9-9d60-ac4e46104130@redhat.com>

On 26 Apr  9:19, David Hildenbrand wrote:
> A couple of points:
> 
> a) Don't use page_mapcount(). Either folio_mapcount(), but likely you want
> to check PageAnonExclusive.
> 
> b) If you're not following the can_follow_write_pte/_pmd model, you are
> doing something wrong :)
> 
> c) The code was heavily changed in mm/mm-unstable. It was merged with t
> the common code.
> 
> Likely, in mm/mm-unstable, the existing can_follow_write_pte and
> can_follow_write_pmd checks will already cover what you want in most cases.
> 
> We'd need a can_follow_write_pud() to cover follow_huge_pud() and
> (unfortunately) something to handle follow_hugepd() as well similarly.
> 
> Copy-pasting what we do in can_follow_write_pte() and adjusting for
> different PTE types is the right thing to do. Maybe now it's time to factor
> out the common checks into a separate helper.

I tried to get the hugepd stuff right but this was the first I heard
about it :-) Afaict follow_huge_pmd and friends were already DTRT

diff --git a/mm/gup.c b/mm/gup.c
index 2f7baf96f65..9df97ea555f 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -517,6 +517,34 @@ static int record_subpages(struct page *page, unsigned long sz,
 }
 #endif	/* CONFIG_ARCH_HAS_HUGEPD || CONFIG_HAVE_GUP_FAST */
 
+/* Common code for can_follow_write_* */
+static inline bool can_follow_write_common(struct page *page,
+					   struct vm_area_struct *vma,
+					   unsigned int flags)
+{
+	/* Maybe FOLL_FORCE is set to override it? */
+	if (!(flags & FOLL_FORCE))
+		return false;
+
+	/* But FOLL_FORCE has no effect on shared mappings */
+	if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
+		return false;
+
+	/* ... or read-only private ones */
+	if (!(vma->vm_flags & VM_MAYWRITE))
+		return false;
+
+	/* ... or already writable ones that just need to take a write fault */
+	if (vma->vm_flags & VM_WRITE)
+		return false;
+
+	/*
+	 * See can_change_pte_writable(): we broke COW and could map the page
+	 * writable if we have an exclusive anonymous page ...
+	 */
+	return page && PageAnon(page) && PageAnonExclusive(page);
+}
+
 #ifdef CONFIG_ARCH_HAS_HUGEPD
 static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
 				      unsigned long sz)
@@ -525,6 +553,17 @@ static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
 	return (__boundary - 1 < end - 1) ? __boundary : end;
 }
 
+static inline bool can_follow_write_hugepd(pte_t pte, struct page* page,
+					   struct vm_area_struct *vma,
+					   unsigned int flags)
+{
+	/* If the pte is writable, we can write to the page. */
+	if (pte_access_permitted(pte, flags & FOLL_WRITE))
+		return true;
+
+	return can_follow_write_common(page, vma, flags);
+}
+
 static int gup_fast_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
 		unsigned long end, unsigned int flags, struct page **pages,
 		int *nr)
@@ -541,13 +580,13 @@ static int gup_fast_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
 
 	pte = huge_ptep_get(ptep);
 
-	if (!pte_access_permitted(pte, flags & FOLL_WRITE))
-		return 0;
-
 	/* hugepages are never "special" */
 	VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
 
 	page = pte_page(pte);
+	if (!can_follow_write_hugepd(pte, page, vma, flags))
+		return 0;
+
 	refs = record_subpages(page, sz, addr, end, pages + *nr);
 
 	folio = try_grab_folio(page, refs, flags);
@@ -668,7 +707,25 @@ static struct page *no_page_table(struct vm_area_struct *vma,
 	return NULL;
 }
 
+
+
 #ifdef CONFIG_PGTABLE_HAS_HUGE_LEAVES
+/* FOLL_FORCE can write to even unwritable PUDs in COW mappings. */
+static inline bool can_follow_write_pud(pud_t pud, struct page *page,
+					struct vm_area_struct *vma,
+					unsigned int flags)
+{
+	/* If the pud is writable, we can write to the page. */
+	if (pud_write(pud))
+		return true;
+
+	if (!can_follow_write_common(page, vma, flags))
+		return false;
+
+	/* ... and a write-fault isn't required for other reasons. */
+	return !vma_soft_dirty_enabled(vma) || pud_soft_dirty(pud);
+}
+
 static struct page *follow_huge_pud(struct vm_area_struct *vma,
 				    unsigned long addr, pud_t *pudp,
 				    int flags, struct follow_page_context *ctx)
@@ -681,7 +738,8 @@ static struct page *follow_huge_pud(struct vm_area_struct *vma,
 
 	assert_spin_locked(pud_lockptr(mm, pudp));
 
-	if ((flags & FOLL_WRITE) && !pud_write(pud))
+	if ((flags & FOLL_WRITE) &&
+	    !can_follow_write_pud(pud, page, vma, flags))
 		return NULL;
 
 	if (!pud_present(pud))
@@ -733,27 +791,7 @@ static inline bool can_follow_write_pmd(pmd_t pmd, struct page *page,
 	if (pmd_write(pmd))
 		return true;
 
-	/* Maybe FOLL_FORCE is set to override it? */
-	if (!(flags & FOLL_FORCE))
-		return false;
-
-	/* But FOLL_FORCE has no effect on shared mappings */
-	if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
-		return false;
-
-	/* ... or read-only private ones */
-	if (!(vma->vm_flags & VM_MAYWRITE))
-		return false;
-
-	/* ... or already writable ones that just need to take a write fault */
-	if (vma->vm_flags & VM_WRITE)
-		return false;
-
-	/*
-	 * See can_change_pte_writable(): we broke COW and could map the page
-	 * writable if we have an exclusive anonymous page ...
-	 */
-	if (!page || !PageAnon(page) || !PageAnonExclusive(page))
+	if (!can_follow_write_common(page, vma, flags))
 		return false;
 
 	/* ... and a write-fault isn't required for other reasons. */
@@ -854,27 +892,7 @@ static inline bool can_follow_write_pte(pte_t pte, struct page *page,
 	if (pte_write(pte))
 		return true;
 
-	/* Maybe FOLL_FORCE is set to override it? */
-	if (!(flags & FOLL_FORCE))
-		return false;
-
-	/* But FOLL_FORCE has no effect on shared mappings */
-	if (vma->vm_flags & (VM_MAYSHARE | VM_SHARED))
-		return false;
-
-	/* ... or read-only private ones */
-	if (!(vma->vm_flags & VM_MAYWRITE))
-		return false;
-
-	/* ... or already writable ones that just need to take a write fault */
-	if (vma->vm_flags & VM_WRITE)
-		return false;
-
-	/*
-	 * See can_change_pte_writable(): we broke COW and could map the page
-	 * writable if we have an exclusive anonymous page ...
-	 */
-	if (!page || !PageAnon(page) || !PageAnonExclusive(page))
+	if (!can_follow_write_common(page, vma, flags))
 		return false;
 
 	/* ... and a write-fault isn't required for other reasons. */
@@ -1374,9 +1392,7 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
 		if (!(vm_flags & VM_WRITE) || (vm_flags & VM_SHADOW_STACK)) {
 			if (!(gup_flags & FOLL_FORCE))
 				return -EFAULT;
-			/* hugetlb does not support FOLL_FORCE|FOLL_WRITE. */
-			if (is_vm_hugetlb_page(vma))
-				return -EFAULT;
+
 			/*
 			 * We used to let the write,force case do COW in a
 			 * VM_MAYWRITE VM_SHARED !VM_WRITE vma, so ptrace could
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 417fc5cdb6e..099a71ee028 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5320,6 +5320,13 @@ static void set_huge_ptep_writable(struct vm_area_struct *vma,
 		update_mmu_cache(vma, address, ptep);
 }
 
+static void set_huge_ptep_maybe_writable(struct vm_area_struct *vma,
+					 unsigned long address, pte_t *ptep)
+{
+	if (vma->vm_flags & VM_WRITE)
+		set_huge_ptep_writable(vma, address, ptep);
+}
+
 bool is_hugetlb_entry_migration(pte_t pte)
 {
 	swp_entry_t swp;
@@ -5942,13 +5949,6 @@ static vm_fault_t hugetlb_wp(struct folio *pagecache_folio,
 	if (!unshare && huge_pte_uffd_wp(pte))
 		return 0;
 
-	/*
-	 * hugetlb does not support FOLL_FORCE-style write faults that keep the
-	 * PTE mapped R/O such as maybe_mkwrite() would do.
-	 */
-	if (WARN_ON_ONCE(!unshare && !(vma->vm_flags & VM_WRITE)))
-		return VM_FAULT_SIGSEGV;
-
 	/* Let's take out MAP_SHARED mappings first. */
 	if (vma->vm_flags & VM_MAYSHARE) {
 		set_huge_ptep_writable(vma, vmf->address, vmf->pte);
@@ -5970,7 +5970,7 @@ static vm_fault_t hugetlb_wp(struct folio *pagecache_folio,
 			SetPageAnonExclusive(&old_folio->page);
 		}
 		if (likely(!unshare))
-			set_huge_ptep_writable(vma, vmf->address, vmf->pte);
+			set_huge_ptep_maybe_writable(vma, vmf->address, vmf->pte);
 
 		delayacct_wpcopy_end();
 		return 0;
@@ -6076,7 +6076,8 @@ static vm_fault_t hugetlb_wp(struct folio *pagecache_folio,
 	spin_lock(vmf->ptl);
 	vmf->pte = hugetlb_walk(vma, vmf->address, huge_page_size(h));
 	if (likely(vmf->pte && pte_same(huge_ptep_get(vmf->pte), pte))) {
-		pte_t newpte = make_huge_pte(vma, &new_folio->page, !unshare);
+		const bool writable = !unshare && (vma->vm_flags & VM_WRITE);
+		pte_t newpte = make_huge_pte(vma, &new_folio->page, writable);
 
 		/* Break COW or unshare */
 		huge_ptep_clear_flush(vma, vmf->address, vmf->pte);


-- 
Guillaume Morin <guillaume@morinfr.org>

  reply	other threads:[~2024-04-26 19:55 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-04-19 18:37 [RFC][PATCH] uprobe: support for private hugetlb mappings Guillaume Morin
2024-04-22  9:39 ` David Hildenbrand
2024-04-22 18:11   ` Guillaume Morin
2024-04-22 18:59     ` David Hildenbrand
2024-04-22 20:53       ` Guillaume Morin
2024-04-24 20:09         ` David Hildenbrand
2024-04-24 20:44           ` Guillaume Morin
2024-04-24 21:00             ` David Hildenbrand
2024-04-25 15:19               ` Guillaume Morin
2024-04-25 15:42                 ` David Hildenbrand
2024-04-25 19:56                 ` David Hildenbrand
2024-04-26  0:09                   ` Guillaume Morin
2024-04-26  7:19                     ` David Hildenbrand
2024-04-26 19:55                       ` Guillaume Morin [this message]
2024-04-30 15:22                         ` Guillaume Morin
2024-04-30 18:21                           ` David Hildenbrand
2024-04-30 18:58                             ` Guillaume Morin
2024-04-30 19:25                         ` David Hildenbrand
2024-05-02  3:59                           ` Guillaume Morin
2024-05-16 17:44                             ` Guillaume Morin
2024-05-16 19:52                               ` David Hildenbrand

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ZiwGovSHiVCF7z6y@bender.morinfr.org \
    --to=guillaume@morinfr.org \
    --cc=david@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-trace-kernel@vger.kernel.org \
    --cc=muchun.song@linux.dev \
    --cc=oleg@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.