Linux-PCI Archive mirror
 help / color / mirror / Atom feed
From: Koichiro Den <den@valinux.co.jp>
To: "Jon Mason" <jdmason@kudzu.us>,
	"Dave Jiang" <dave.jiang@intel.com>,
	"Allen Hubbe" <allenbh@gmail.com>,
	"Manivannan Sadhasivam" <mani@kernel.org>,
	"Krzysztof Wilczyński" <kwilczynski@kernel.org>,
	"Kishon Vijay Abraham I" <kishon@kernel.org>,
	"Bjorn Helgaas" <bhelgaas@google.com>,
	"Frank Li" <Frank.Li@nxp.com>,
	"Jerome Brunet" <jbrunet@baylibre.com>,
	"Lorenzo Pieralisi" <lpieralisi@kernel.org>,
	"Niklas Cassel" <cassel@kernel.org>
Cc: linux-kernel@vger.kernel.org, linux-pci@vger.kernel.org,
	ntb@lists.linux.dev
Subject: [PATCH v3 02/10] PCI: endpoint: pci-epf-vntb: Defer pci_epc_raise_irq() out of atomic context
Date: Mon, 23 Mar 2026 12:15:36 +0900	[thread overview]
Message-ID: <20260323031544.2598111-3-den@valinux.co.jp> (raw)
In-Reply-To: <20260323031544.2598111-1-den@valinux.co.jp>

The NTB .peer_db_set() callback may be invoked from atomic context.
pci-epf-vntb currently calls pci_epc_raise_irq() directly, but
pci_epc_raise_irq() may sleep (it takes epc->lock).

Avoid sleeping in atomic context by coalescing doorbell bits into an
atomic64 pending mask and raising MSIs from a work item. Limit the
amount of work per run to avoid monopolizing the workqueue under a
doorbell storm.

Fixes: e35f56bb0330 ("PCI: endpoint: Support NTB transfer between RC and EP")
Signed-off-by: Koichiro Den <den@valinux.co.jp>
---
Changes since v2:
  - Resolved a trivial context-only conflict after
    d799984233a5 ("PCI: endpoint: pci-epf-vntb: Stop cmd_handler work in epf_ntb_epc_cleanup")
    landed in pci/endpoint.

 drivers/pci/endpoint/functions/pci-epf-vntb.c | 106 +++++++++++++-----
 1 file changed, 78 insertions(+), 28 deletions(-)

diff --git a/drivers/pci/endpoint/functions/pci-epf-vntb.c b/drivers/pci/endpoint/functions/pci-epf-vntb.c
index a75f8a30f8dc..bc3b3df53ddb 100644
--- a/drivers/pci/endpoint/functions/pci-epf-vntb.c
+++ b/drivers/pci/endpoint/functions/pci-epf-vntb.c
@@ -69,6 +69,9 @@ static struct workqueue_struct *kpcintb_workqueue;
 #define MAX_DB_COUNT			32
 #define MAX_MW				4
 
+/* Limit per-work execution to avoid monopolizing kworker on doorbell storms. */
+#define VNTB_PEER_DB_WORK_BUDGET	5
+
 enum epf_ntb_bar {
 	BAR_CONFIG,
 	BAR_DB,
@@ -129,6 +132,8 @@ struct epf_ntb {
 	u32 spad_count;
 	u64 mws_size[MAX_MW];
 	atomic64_t db;
+	atomic64_t peer_db_pending;
+	struct work_struct peer_db_work;
 	u32 vbus_number;
 	u16 vntb_pid;
 	u16 vntb_vid;
@@ -920,6 +925,8 @@ static int epf_ntb_epc_init(struct epf_ntb *ntb)
 	INIT_DELAYED_WORK(&ntb->cmd_handler, epf_ntb_cmd_handler);
 	queue_work(kpcintb_workqueue, &ntb->cmd_handler.work);
 
+	enable_work(&ntb->peer_db_work);
+
 	return 0;
 
 err_write_header:
@@ -943,6 +950,7 @@ static int epf_ntb_epc_init(struct epf_ntb *ntb)
 static void epf_ntb_epc_cleanup(struct epf_ntb *ntb)
 {
 	disable_delayed_work_sync(&ntb->cmd_handler);
+	disable_work_sync(&ntb->peer_db_work);
 	epf_ntb_mw_bar_clear(ntb, ntb->num_mws);
 	epf_ntb_db_bar_clear(ntb);
 	epf_ntb_config_sspad_bar_clear(ntb);
@@ -1357,41 +1365,79 @@ static int vntb_epf_peer_spad_write(struct ntb_dev *ndev, int pidx, int idx, u32
 	return 0;
 }
 
+static void vntb_epf_peer_db_work(struct work_struct *work)
+{
+	struct epf_ntb *ntb = container_of(work, struct epf_ntb, peer_db_work);
+	struct pci_epf *epf = ntb->epf;
+	unsigned int budget = VNTB_PEER_DB_WORK_BUDGET;
+	u8 func_no, vfunc_no;
+	u32 interrupt_num;
+	u64 db_bits;
+	int ret;
+
+	if (!epf || !epf->epc)
+		return;
+
+	func_no = epf->func_no;
+	vfunc_no = epf->vfunc_no;
+
+	/*
+	 * Drain doorbells from peer_db_pending in snapshots (atomic64_xchg()).
+	 * Limit the number of snapshots handled per run so we don't monopolize
+	 * the workqueue under a doorbell storm.
+	 */
+	while (budget--) {
+		db_bits = atomic64_xchg(&ntb->peer_db_pending, 0);
+		if (!db_bits)
+			return;
+
+		while (db_bits) {
+			/*
+			 * pci_epc_raise_irq() for MSI expects a 1-based
+			 * interrupt number. ffs() returns a 1-based index (bit
+			 * 0 -> 1). We historically add +2 to compute
+			 * interrupt_num.
+			 *
+			 * Legacy mapping (kept for compatibility):
+			 *
+			 *   MSI #1 : link event (reserved)
+			 *   MSI #2 : unused (historical offset)
+			 *   MSI #3 : doorbell bit 0 (DB#0)
+			 *   MSI #4 : doorbell bit 1 (DB#1)
+			 *   ...
+			 *
+			 * Do not change this mapping to avoid breaking
+			 * interoperability with older peers.
+			 */
+			interrupt_num = ffs(db_bits) + 2;
+			db_bits &= db_bits - 1;
+
+			ret = pci_epc_raise_irq(epf->epc, func_no, vfunc_no,
+						PCI_IRQ_MSI, interrupt_num);
+			if (ret)
+				dev_err(&ntb->ntb.dev,
+					"Failed to raise IRQ for interrupt_num %u: %d\n",
+					interrupt_num, ret);
+		}
+	}
+
+	if (atomic64_read(&ntb->peer_db_pending))
+		queue_work(kpcintb_workqueue, &ntb->peer_db_work);
+}
+
 static int vntb_epf_peer_db_set(struct ntb_dev *ndev, u64 db_bits)
 {
-	u32 interrupt_num = ffs(db_bits) + 1;
 	struct epf_ntb *ntb = ntb_ndev(ndev);
-	u8 func_no, vfunc_no;
-	int ret;
-
-	func_no = ntb->epf->func_no;
-	vfunc_no = ntb->epf->vfunc_no;
 
 	/*
-	 * pci_epc_raise_irq() for MSI expects a 1-based interrupt number.
-	 * ffs() returns a 1-based index (bit 0 -> 1). interrupt_num has already
-	 * been computed as ffs(db_bits) + 1 above. Adding one more +1 when
-	 * calling pci_epc_raise_irq() therefore results in:
-	 *
-	 *   doorbell bit 0 -> MSI #3
-	 *
-	 * Legacy mapping (kept for compatibility):
-	 *
-	 *   MSI #1 : link event (reserved)
-	 *   MSI #2 : unused (historical offset)
-	 *   MSI #3 : doorbell bit 0 (DB#0)
-	 *   MSI #4 : doorbell bit 1 (DB#1)
-	 *   ...
-	 *
-	 * Do not change this mapping to avoid breaking interoperability with
-	 * older peers.
+	 * .peer_db_set() may be called from atomic context. pci_epc_raise_irq()
+	 * can sleep (it takes epc->lock), so defer MSI raising to process
+	 * context. Doorbell requests are coalesced in peer_db_pending.
 	 */
-	ret = pci_epc_raise_irq(ntb->epf->epc, func_no, vfunc_no,
-				PCI_IRQ_MSI, interrupt_num + 1);
-	if (ret)
-		dev_err(&ntb->ntb.dev, "Failed to raise IRQ\n");
+	atomic64_or(db_bits, &ntb->peer_db_pending);
+	queue_work(kpcintb_workqueue, &ntb->peer_db_work);
 
-	return ret;
+	return 0;
 }
 
 static u64 vntb_epf_db_read(struct ntb_dev *ndev)
@@ -1629,6 +1675,10 @@ static int epf_ntb_probe(struct pci_epf *epf,
 	ntb->epf = epf;
 	ntb->vbus_number = 0xff;
 
+	INIT_WORK(&ntb->peer_db_work, vntb_epf_peer_db_work);
+	disable_work(&ntb->peer_db_work);
+	atomic64_set(&ntb->peer_db_pending, 0);
+
 	/* Initially, no bar is assigned */
 	for (i = 0; i < VNTB_BAR_NUM; i++)
 		ntb->epf_ntb_bar[i] = NO_BAR;
-- 
2.51.0


  parent reply	other threads:[~2026-03-23  3:16 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-03-23  3:15 [PATCH v3 00/10] PCI: endpoint: pci-epf-vntb / NTB: epf: Enable per-doorbell bit handling Koichiro Den
2026-03-23  3:15 ` [PATCH v3 01/10] PCI: endpoint: pci-epf-vntb: Document legacy MSI doorbell offset Koichiro Den
2026-03-23  3:15 ` Koichiro Den [this message]
2026-03-23 18:27   ` [PATCH v3 02/10] PCI: endpoint: pci-epf-vntb: Defer pci_epc_raise_irq() out of atomic context Frank Li
2026-05-01 16:26   ` Manivannan Sadhasivam
2026-05-11  7:54     ` Koichiro Den
2026-03-23  3:15 ` [PATCH v3 03/10] PCI: endpoint: pci-epf-vntb: Report 0-based doorbell vector via ntb_db_event() Koichiro Den
2026-03-23  3:15 ` [PATCH v3 04/10] PCI: endpoint: pci-epf-vntb: Exclude reserved slots from db_valid_mask Koichiro Den
2026-03-23  3:15 ` [PATCH v3 05/10] PCI: endpoint: pci-epf-vntb: Implement db_vector_count/mask for doorbells Koichiro Den
2026-03-23  3:15 ` [PATCH v3 06/10] NTB: epf: Document legacy doorbell slot offset in ntb_epf_peer_db_set() Koichiro Den
2026-03-23  3:15 ` [PATCH v3 07/10] NTB: epf: Make db_valid_mask cover only real doorbell bits Koichiro Den
2026-03-23  3:15 ` [PATCH v3 08/10] NTB: epf: Report 0-based doorbell vector via ntb_db_event() Koichiro Den
2026-03-23  3:15 ` [PATCH v3 09/10] NTB: epf: Fix doorbell bitmask handling in db_read/db_clear Koichiro Den
2026-05-01 16:29   ` Manivannan Sadhasivam
2026-05-11  9:15     ` Koichiro Den
2026-03-23  3:15 ` [PATCH v3 10/10] NTB: epf: Implement db_vector_count/mask for doorbells Koichiro Den
2026-03-23 15:43 ` [PATCH v3 00/10] PCI: endpoint: pci-epf-vntb / NTB: epf: Enable per-doorbell bit handling Koichiro Den
2026-03-25  6:23   ` Niklas Cassel
2026-03-25  8:44     ` Koichiro Den
2026-04-07 15:21   ` Koichiro Den

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260323031544.2598111-3-den@valinux.co.jp \
    --to=den@valinux.co.jp \
    --cc=Frank.Li@nxp.com \
    --cc=allenbh@gmail.com \
    --cc=bhelgaas@google.com \
    --cc=cassel@kernel.org \
    --cc=dave.jiang@intel.com \
    --cc=jbrunet@baylibre.com \
    --cc=jdmason@kudzu.us \
    --cc=kishon@kernel.org \
    --cc=kwilczynski@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-pci@vger.kernel.org \
    --cc=lpieralisi@kernel.org \
    --cc=mani@kernel.org \
    --cc=ntb@lists.linux.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).