All the mail mirrored from lore.kernel.org
 help / color / mirror / Atom feed
From: "Michał Pecio" <michal.pecio@gmail.com>
To: Paul Menzel <pmenzel@molgen.mpg.de>
Cc: linux-usb@vger.kernel.org,
	Mathias Nyman <mathias.nyman@intel.com>,
	LKML <linux-kernel@vger.kernel.org>
Subject: Re: xhci_hcd 0000:00:14.0: ERROR Transfer event TRB DMA ptr not part of current TD ep_index 1 comp_code 1
Date: Fri, 5 Apr 2024 11:32:47 +0200	[thread overview]
Message-ID: <20240405113247.743e34b2@foxbook> (raw)
In-Reply-To: <58bca6f2-797a-4e20-a476-2294309afdd5@molgen.mpg.de>

[-- Attachment #1: Type: text/plain, Size: 1252 bytes --]

Hi,

> On a Dell XPS 13 9360/0596KF, BIOS 2.21.0 06/02/2022 with Debian 
> sid/unstable, at least Linux 6.8-rc7 and 6.9-rc1+ log the error
> below, when unplugging a USB headset from the USB Type A port of the
> laptop.
Can you reproduce this on other computers or with other USB devices?
(I just tested a few random audio dongles on my PC, nothing happens).

Is there any kernel version known to be free of this problem?

>      xhci_hcd 0000:00:14.0: ERROR Transfer event TRB DMA ptr not part
> of current TD ep_index 1 comp_code 1
>      xhci_hcd 0000:00:14.0: Looking for event-dma 0000000128897080 
> trb-start 0000000128897090 trb-end 0000000128897090 seg-start 
> 0000000128897000 seg-end 0000000128897ff0
This looks like the xHCI driver freed data structures related to a USB
transfer concurrently with the hardware executing this transfer. It's
a bad thing which should never happen, but sometimes it does because of
bugs in USB controllers or the driver.

Can you run patched kernels on the affected laptop? I attach a patch
I used to track down similar issues on UVC webcams. It doesn't fix
anything, but adds some dmesg noise which may be helpful in figuring
out what goes wrong. The patch should work on v6.9-rc1 or -rc2.

Regards,
Michal

[-- Attachment #2: xhci-noise.patch --]
[-- Type: text/x-patch, Size: 6207 bytes --]

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 52278afea94b..6472b2554ed3 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -843,11 +843,16 @@ static void xhci_unmap_td_bounce_buffer(struct xhci_hcd *xhci,
 	seg->bounce_offs = 0;
 }
 
-static int xhci_td_cleanup(struct xhci_hcd *xhci, struct xhci_td *td,
-			   struct xhci_ring *ep_ring, int status)
+static int __xhci_td_cleanup(struct xhci_hcd *xhci, struct xhci_td *td,
+			   struct xhci_ring *ep_ring, int status, const char *from)
 {
 	struct urb *urb = NULL;
 
+	ep_ring->cleanup_first_dma = xhci_trb_virt_to_dma(ep_ring->deq_seg, td->first_trb);
+	ep_ring->cleanup_last_dma = xhci_trb_virt_to_dma(ep_ring->deq_seg, td->last_trb);
+	ep_ring->cleanup_status = status;
+	ep_ring->cleanup_from = from;
+
 	/* Clean up the endpoint's TD list */
 	urb = td->urb;
 
@@ -891,6 +896,8 @@ static int xhci_td_cleanup(struct xhci_hcd *xhci, struct xhci_td *td,
 	return 0;
 }
 
+#define xhci_td_cleanup(xhci, td, ep_ring, status) __xhci_td_cleanup(xhci, td, ep_ring, status, __func__)
+
 
 /* Complete the cancelled URBs we unlinked from td_list. */
 static void xhci_giveback_invalidated_tds(struct xhci_virt_ep *ep)
@@ -2179,6 +2186,15 @@ static int finish_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
 		     u32 trb_comp_code)
 {
 	struct xhci_ep_ctx *ep_ctx;
+	struct urb_priv *urb_priv;
+	struct usb_iso_packet_descriptor *frame;
+
+	urb_priv = td->urb->hcpriv;
+	frame = &td->urb->iso_frame_desc[urb_priv->num_tds_done];
+
+	if (ep_ring->interesting)
+		xhci_info(xhci, "finish_td td_status %d comp_code %d frame_status %d frame_actual_length %d\n",
+				td->status, trb_comp_code, frame->status, frame->actual_length);
 
 	ep_ctx = xhci_get_ep_ctx(xhci, ep->vdev->out_ctx, ep->ep_index);
 
@@ -2391,12 +2407,17 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
 	short_framestatus = td->urb->transfer_flags & URB_SHORT_NOT_OK ?
 		-EREMOTEIO : 0;
 
+	if (ep_ring->interesting)
+		xhci_info(xhci, "process_isoc_td requested %u ep_trb_len %u remaining %u\n", requested, ep_trb_len, remaining);
+
 	/* handle completion code */
 	switch (trb_comp_code) {
 	case COMP_SUCCESS:
 		/* Don't overwrite status if TD had an error, see xHCI 4.9.1 */
-		if (td->error_mid_td)
+		if (td->error_mid_td) {
+			xhci_info(xhci, "Got SUCCESS after mid TD error\n");
 			break;
+		}
 		if (remaining) {
 			frame->status = short_framestatus;
 			if (xhci->quirks & XHCI_TRUST_TX_LENGTH)
@@ -2462,7 +2483,7 @@ static int process_isoc_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
 finish_td:
 	/* Don't give back TD yet if we encountered an error mid TD */
 	if (td->error_mid_td && ep_trb != td->last_trb) {
-		xhci_dbg(xhci, "Error mid isoc TD, wait for final completion event\n");
+		xhci_info(xhci, "Error mid isoc TD, wait for final completion event\n");
 		td->urb_length_set = true;
 		return 0;
 	}
@@ -2597,6 +2618,10 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 	trb_comp_code = GET_COMP_CODE(le32_to_cpu(event->transfer_len));
 	ep_trb_dma = le64_to_cpu(event->buffer);
 
+	bool interesting = trb_comp_code != COMP_SUCCESS && trb_comp_code != COMP_SHORT_PACKET;
+	if (interesting)
+		xhci_info(xhci, "handle_tx_event interesting ep_trb_dma %llx comp_code %d slot %u ep %d", ep_trb_dma, trb_comp_code, slot_id, ep_index);
+
 	ep = xhci_get_virt_ep(xhci, slot_id, ep_index);
 	if (!ep) {
 		xhci_err(xhci, "ERROR Invalid Transfer event\n");
@@ -2640,6 +2665,10 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 		}
 	}
 
+	if (!interesting && ep_ring->interesting)
+		xhci_info(xhci, "handle_tx_event uninteresting ep_trb_dma %llx comp_code %d slot %u ep %d", ep_trb_dma, trb_comp_code, slot_id, ep_index);
+	ep_ring->interesting |= interesting;
+
 	/* Count current td numbers if ep->skip is set */
 	if (ep->skip)
 		td_num += list_count_nodes(&ep_ring->td_list);
@@ -2831,6 +2860,12 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 		ep_seg = trb_in_td(xhci, td->start_seg, td->first_trb,
 				td->last_trb, ep_trb_dma, false);
 
+		if (ep_ring->interesting)
+			xhci_info(xhci, "handle_tx_event first_trb %llx last_trb %llx ep_seg %llx\n",
+					xhci_trb_virt_to_dma(ep_ring->deq_seg, td->first_trb),
+					xhci_trb_virt_to_dma(ep_ring->deq_seg, td->last_trb),
+					ep_seg? ep_seg->dma: -1);
+
 		/*
 		 * Skip the Force Stopped Event. The event_trb(event_dma) of FSE
 		 * is not in the current TD pointed by ep_ring->dequeue because
@@ -2880,7 +2915,7 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 						   td_next->last_trb, ep_trb_dma, false);
 				if (ep_seg) {
 					/* give back previous TD, start handling new */
-					xhci_dbg(xhci, "Missing TD completion event after mid TD error\n");
+					xhci_info(xhci, "Missing TD completion event after mid TD error\n");
 					ep_ring->dequeue = td->last_trb;
 					ep_ring->deq_seg = td->last_trb_seg;
 					inc_deq(xhci, ep_ring);
@@ -2898,9 +2933,14 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 					trb_comp_code);
 				trb_in_td(xhci, td->start_seg, td->first_trb,
 					  td->last_trb, ep_trb_dma, true);
+				xhci_info(xhci, "last xhci_td_cleanup: first_dma %llx last_dma %llx status %d from %s\n",
+						ep_ring->cleanup_first_dma, ep_ring->cleanup_last_dma,
+						ep_ring->cleanup_status, ep_ring->cleanup_from);
+				ep_ring->interesting = true;
 				return -ESHUTDOWN;
 			}
 		}
+
 		if (trb_comp_code == COMP_SHORT_PACKET)
 			ep_ring->last_td_was_short = true;
 		else
@@ -2958,6 +2998,8 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 	 */
 	} while (handling_skipped_tds);
 
+	ep_ring->interesting = interesting;
+
 	return 0;
 
 err_out:
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index 6f4bf98a6282..1e6ccb644279 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -1360,7 +1360,12 @@ struct xhci_ring {
 	unsigned int		bounce_buf_len;
 	enum xhci_ring_type	type;
 	bool			last_td_was_short;
+	bool			interesting;
 	struct radix_tree_root	*trb_address_map;
+	dma_addr_t		cleanup_first_dma;
+	dma_addr_t		cleanup_last_dma;
+	const char		*cleanup_from;
+	int			cleanup_status;
 };
 
 struct xhci_erst_entry {

  reply	other threads:[~2024-04-05  9:32 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-03-29 10:47 xhci_hcd 0000:00:14.0: ERROR Transfer event TRB DMA ptr not part of current TD ep_index 1 comp_code 1 Paul Menzel
2024-04-05  9:32 ` Michał Pecio [this message]
2024-04-06 14:37   ` Paul Menzel
2024-04-06 16:36     ` Michał Pecio
2024-04-06 17:01       ` Paul Menzel
2024-04-07 12:25         ` Michał Pecio
2024-04-08  7:17           ` Mathias Nyman
2024-04-08 16:37             ` Paul Menzel
2024-04-08 16:42               ` Paul Menzel
2024-04-08 19:05             ` Michał Pecio
2024-04-09 11:22               ` Mathias Nyman
2024-04-10  7:59                 ` Paul Menzel
2024-04-11  7:18                   ` Mathias Nyman
2024-04-11 19:55                     ` Paul Menzel
2024-04-13  9:17                   ` Michał Pecio
2024-04-10  9:46                 ` Michał Pecio

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240405113247.743e34b2@foxbook \
    --to=michal.pecio@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-usb@vger.kernel.org \
    --cc=mathias.nyman@intel.com \
    --cc=pmenzel@molgen.mpg.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.