LKML Archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/1] Avoid usb reset crashes by making tty_io cdevs truly dynamic
@ 2015-05-19 15:06 Richard Watts
  2015-07-23 22:08 ` Greg KH
  0 siblings, 1 reply; 4+ messages in thread
From: Richard Watts @ 2015-05-19 15:06 UTC (permalink / raw
  To: linux-kernel, gregkh

Avoid usb reset crashes by making tty_io cdevs truly dynamic

Signed-off-by: Richard Watts <rrw@kynesim.co.uk>
Reported-by: Duncan Mackintosh <DMackintosh@cbnl.com>
---
  drivers/tty/tty_io.c       | 24 ++++++++++++++++--------
  include/linux/tty_driver.h |  2 +-
  2 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index e569546..699cf20 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -3168,9 +3168,12 @@ static int tty_cdev_add(struct tty_driver 
*driver, dev_t dev,
  		unsigned int index, unsigned int count)
  {
  	/* init here, since reused cdevs cause crashes */
-	cdev_init(&driver->cdevs[index], &tty_fops);
-	driver->cdevs[index].owner = driver->owner;
-	return cdev_add(&driver->cdevs[index], dev, count);
+	driver->cdevs[index] = cdev_alloc();
+	if (!driver->cdevs[index])
+		return -ENOMEM;
+	cdev_init(driver->cdevs[index], &tty_fops);
+	driver->cdevs[index]->owner = driver->owner;
+	return cdev_add(driver->cdevs[index], dev, count);
  }

  /**
@@ -3276,8 +3279,10 @@ struct device *tty_register_device_attr(struct 
tty_driver *driver,

  error:
  	put_device(dev);
-	if (cdev)
-		cdev_del(&driver->cdevs[index]);
+	if (cdev) {
+		cdev_del(driver->cdevs[index]);
+		driver->cdevs[index] = NULL;
+	}
  	return ERR_PTR(retval);
  }
  EXPORT_SYMBOL_GPL(tty_register_device_attr);
@@ -3297,8 +3302,10 @@ void tty_unregister_device(struct tty_driver 
*driver, unsigned index)
  {
  	device_destroy(tty_class,
  		MKDEV(driver->major, driver->minor_start) + index);
-	if (!(driver->flags & TTY_DRIVER_DYNAMIC_ALLOC))
-		cdev_del(&driver->cdevs[index]);
+	if (!(driver->flags & TTY_DRIVER_DYNAMIC_ALLOC)) {
+		cdev_del(driver->cdevs[index]);
+		driver->cdevs[index] = NULL;
+	}
  }
  EXPORT_SYMBOL(tty_unregister_device);

@@ -3363,6 +3370,7 @@ err_free_all:
  	kfree(driver->ports);
  	kfree(driver->ttys);
  	kfree(driver->termios);
+	kfree(driver->cdevs);
  	kfree(driver);
  	return ERR_PTR(err);
  }
@@ -3391,7 +3399,7 @@ static void destruct_tty_driver(struct kref *kref)
  		}
  		proc_tty_unregister_driver(driver);
  		if (driver->flags & TTY_DRIVER_DYNAMIC_ALLOC)
-			cdev_del(&driver->cdevs[0]);
+			cdev_del(driver->cdevs[0]);
  	}
  	kfree(driver->cdevs);
  	kfree(driver->ports);
diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
index 92e337c..1610524 100644
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -296,7 +296,7 @@ struct tty_operations {
  struct tty_driver {
  	int	magic;		/* magic number for this structure */
  	struct kref kref;	/* Reference management */
-	struct cdev *cdevs;
+	struct cdev **cdevs;
  	struct module	*owner;
  	const char	*driver_name;
  	const char	*name;
-- 
1.9.1

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH 1/1] Avoid usb reset crashes by making tty_io cdevs truly dynamic
  2015-05-19 15:06 [PATCH 1/1] Avoid usb reset crashes by making tty_io cdevs truly dynamic Richard Watts
@ 2015-07-23 22:08 ` Greg KH
  2015-07-23 22:46   ` Richard Watts
  0 siblings, 1 reply; 4+ messages in thread
From: Greg KH @ 2015-07-23 22:08 UTC (permalink / raw
  To: Richard Watts; +Cc: linux-serial, linux-usb, linux-kernel

On Tue, May 19, 2015 at 04:06:53PM +0100, Richard Watts wrote:
> Avoid usb reset crashes by making tty_io cdevs truly dynamic

What USB reset crashes are you referring to here?

> 
> Signed-off-by: Richard Watts <rrw@kynesim.co.uk>
> Reported-by: Duncan Mackintosh <DMackintosh@cbnl.com>
> ---
>  drivers/tty/tty_io.c       | 24 ++++++++++++++++--------
>  include/linux/tty_driver.h |  2 +-
>  2 files changed, 17 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
> index e569546..699cf20 100644
> --- a/drivers/tty/tty_io.c
> +++ b/drivers/tty/tty_io.c
> @@ -3168,9 +3168,12 @@ static int tty_cdev_add(struct tty_driver *driver,
> dev_t dev,
>  		unsigned int index, unsigned int count)
>  {
>  	/* init here, since reused cdevs cause crashes */
> -	cdev_init(&driver->cdevs[index], &tty_fops);
> -	driver->cdevs[index].owner = driver->owner;
> -	return cdev_add(&driver->cdevs[index], dev, count);
> +	driver->cdevs[index] = cdev_alloc();
> +	if (!driver->cdevs[index])
> +		return -ENOMEM;
> +	cdev_init(driver->cdevs[index], &tty_fops);
> +	driver->cdevs[index]->owner = driver->owner;
> +	return cdev_add(driver->cdevs[index], dev, count);
>  }
> 
>  /**
> @@ -3276,8 +3279,10 @@ struct device *tty_register_device_attr(struct
> tty_driver *driver,
> 
>  error:
>  	put_device(dev);
> -	if (cdev)
> -		cdev_del(&driver->cdevs[index]);
> +	if (cdev) {
> +		cdev_del(driver->cdevs[index]);
> +		driver->cdevs[index] = NULL;
> +	}
>  	return ERR_PTR(retval);
>  }
>  EXPORT_SYMBOL_GPL(tty_register_device_attr);
> @@ -3297,8 +3302,10 @@ void tty_unregister_device(struct tty_driver *driver,
> unsigned index)
>  {
>  	device_destroy(tty_class,
>  		MKDEV(driver->major, driver->minor_start) + index);
> -	if (!(driver->flags & TTY_DRIVER_DYNAMIC_ALLOC))
> -		cdev_del(&driver->cdevs[index]);
> +	if (!(driver->flags & TTY_DRIVER_DYNAMIC_ALLOC)) {
> +		cdev_del(driver->cdevs[index]);
> +		driver->cdevs[index] = NULL;
> +	}
>  }
>  EXPORT_SYMBOL(tty_unregister_device);
> 
> @@ -3363,6 +3370,7 @@ err_free_all:
>  	kfree(driver->ports);
>  	kfree(driver->ttys);
>  	kfree(driver->termios);
> +	kfree(driver->cdevs);
>  	kfree(driver);
>  	return ERR_PTR(err);
>  }
> @@ -3391,7 +3399,7 @@ static void destruct_tty_driver(struct kref *kref)
>  		}
>  		proc_tty_unregister_driver(driver);
>  		if (driver->flags & TTY_DRIVER_DYNAMIC_ALLOC)
> -			cdev_del(&driver->cdevs[0]);
> +			cdev_del(driver->cdevs[0]);
>  	}
>  	kfree(driver->cdevs);
>  	kfree(driver->ports);
> diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
> index 92e337c..1610524 100644
> --- a/include/linux/tty_driver.h
> +++ b/include/linux/tty_driver.h
> @@ -296,7 +296,7 @@ struct tty_operations {
>  struct tty_driver {
>  	int	magic;		/* magic number for this structure */
>  	struct kref kref;	/* Reference management */
> -	struct cdev *cdevs;
> +	struct cdev **cdevs;
>  	struct module	*owner;
>  	const char	*driver_name;
>  	const char	*name;

I don't understand what bug this patch is trying to solve, care to help
describe it better?

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 1/1] Avoid usb reset crashes by making tty_io cdevs truly dynamic
  2015-07-23 22:08 ` Greg KH
@ 2015-07-23 22:46   ` Richard Watts
  2015-07-24 13:12     ` Alan Stern
  0 siblings, 1 reply; 4+ messages in thread
From: Richard Watts @ 2015-07-23 22:46 UTC (permalink / raw
  To: Greg KH; +Cc: linux-serial, linux-usb, linux-kernel

[-- Attachment #1: Type: text/plain, Size: 2638 bytes --]

Hi,

  Sure - sorry, my description was a little .. basic.

  So, I have a client who was having problems with machines hanging in
the field. Very rare, associated with a h/w change that introduced
more cores. Kernel dumps implied that the timer list was getting
corrupted.

  This configuration of machine is an SBC on a board which communicates
with the SBC (partly) via a USB CDC device, which pops up as
/dev/ttyACM0.

  So one of the things we turned on was CONFIG_DEBUG_KOBJECT_RELEASE.
One of the side-effects of this is to delay kobject destruction.

  When we did that, we could reproduce the crash by performing a
USB reset on the CDC device -  and logs suggest that this was
happening in the field too.

  When the USB reset happens, we get a bunch of complaints from the
kernel.

  Some of these are to do with races on the kobjects associated with the
sysfs entries for the ttyACM0 device. They turn out not to be fatal,
and have their own patch series ('Attempt to cope with device changes
and delayed kobject deallocation' on linux-kernel).

  The fatal one turns out to be an execution path that goes like this:

  1 USB device declares itself to be CDC
  2 tty driver fires up and allocates a cdev for the relevant tty.
  3 driver->cdevs[0].kobj gets initialised as part of the cdev_alloc()
  4 USB reset happens, queueing driver->cdevs[0].kobj for release.
  5 The tty driver calls cdev_init(&driver->cdevs[0]), which
      reinitialises driver->cdevs[0].kobj with a refcount of 1.
  6 tty driver starts using that new cdev, queueing an operation on it.
     This causes a timer entry to be added including the kobj.
  7 At this point, the release we scheduled in (4) happens and the
     members of kobj are deallocated.
  8 Someone allocates the newly released memory for one of the members of
      cdriver->cdevs[0].kobj somewhere else and overwrites it.
  9 The timer goes off.
10 Boom

  My patch (ham-fistedly) fixes this by ensuring that because we
never reuse the cdev pointer, we are never fooled into reinitialising
a kobject queued for deletion.

  I'm not all that familiar with how the locking should go here, and
there is a definite argument that under non CONFIG_DEBUG_KOBJECT_RELEASE
conditions, the kobject_release() would have happened by 5, and
therefore this situation should never exist "for real".

  .. but (a) that makes it rather hard to test kernels with
CONFIG_DEBUG_KOBJECT_RELEASE, and (b) my customer's crashes have
(allegedly) now gone away even without CONFIG_DEBUG_KOBJECT_RELEASE
set.

  Does that help at all? I've attached my 0/1, just in case that
got lost somewhere.


Richard.




[-- Attachment #2: Attached Message --]
[-- Type: message/rfc822, Size: 1554 bytes --]

From: Richard Watts <rrw@kynesim.co.uk>
To: linux-kernel@vger.kernel.org, gregkh@linuxfoundation.org
Subject: [PATCH 0/1] Avoid usb reset crashes by making tty_io cdevs truly dynamic
Date: Tue, 19 May 2015 16:04:32 +0100
Message-ID: <555B5100.8010708@kynesim.co.uk>

Sometimes, usb buses on which CDC ACM devices sit encounter a usb reset.

When this happens, particularly when CONFIG_DEBUG_KOBJECT_RELEASE is on,
we attempt to destroy the cdev for the associated tty and then
rapidly re-initialise it. Since kobject destruction is not immediate,
this potentially leaves us with cdev_init() calling kobject_init() on a
kobject that is about to be destroyed.

This turns out not to be such a good thing and this patch solves the
problem by making the cdevs tty_operations->cdevs dynamically
allocated.

This may not be a problem in the wild (though I have some circumstantial
evidence that it is), but I submit that we might want to think about
fixing it anyway, since it makes debugging on systems with
CONFIG_DEBUG_KOBJECT_RELEASE=y and USB resets rather difficult
(guess what I have been doing lately .. ).

Patch is against e26081808edadfd257c6c9d81014e3b25e9a6118 (head of
git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty.git ).

  (in fact, you will still get an oops - which is the subject of
another, more controversial, patchset ..)



Richard.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH 1/1] Avoid usb reset crashes by making tty_io cdevs truly dynamic
  2015-07-23 22:46   ` Richard Watts
@ 2015-07-24 13:12     ` Alan Stern
  0 siblings, 0 replies; 4+ messages in thread
From: Alan Stern @ 2015-07-24 13:12 UTC (permalink / raw
  To: Richard Watts; +Cc: Greg KH, linux-serial, linux-usb, linux-kernel

On Thu, 23 Jul 2015, Richard Watts wrote:

>   When the USB reset happens, we get a bunch of complaints from the
> kernel.
> 
>   Some of these are to do with races on the kobjects associated with the
> sysfs entries for the ttyACM0 device. They turn out not to be fatal,
> and have their own patch series ('Attempt to cope with device changes
> and delayed kobject deallocation' on linux-kernel).
> 
>   The fatal one turns out to be an execution path that goes like this:
> 
>   1 USB device declares itself to be CDC
>   2 tty driver fires up and allocates a cdev for the relevant tty.

Does this happen in the same thread as 1?

>   3 driver->cdevs[0].kobj gets initialised as part of the cdev_alloc()
>   4 USB reset happens, queueing driver->cdevs[0].kobj for release.

1 and 4 should be mutually exclusive.  Probing and reset both acquire
the USB device's mutex.

>   5 The tty driver calls cdev_init(&driver->cdevs[0]), which
>       reinitialises driver->cdevs[0].kobj with a refcount of 1.

Presumably this happens in the same thread as 1, 2, and 3?  Which means 
it should be mutually exclusive with 4 -- it should happen _before_ 4, 
not after.

By the way, kobjects should _never_ be reinitialized.  They are 
initialized just once, when they are created.  If something initializes 
them again afterward, that's a bug.

>   6 tty driver starts using that new cdev, queueing an operation on it.
>      This causes a timer entry to be added including the kobj.
>   7 At this point, the release we scheduled in (4) happens and the
>      members of kobj are deallocated.
>   8 Someone allocates the newly released memory for one of the members of
>       cdriver->cdevs[0].kobj somewhere else and overwrites it.
>   9 The timer goes off.
> 10 Boom
> 
>   My patch (ham-fistedly) fixes this by ensuring that because we
> never reuse the cdev pointer, we are never fooled into reinitialising
> a kobject queued for deletion.
> 
>   I'm not all that familiar with how the locking should go here, and
> there is a definite argument that under non CONFIG_DEBUG_KOBJECT_RELEASE
> conditions, the kobject_release() would have happened by 5, and
> therefore this situation should never exist "for real".

I can tell you a little about locking in the USB subsystem (don't know
about the TTY subsystem).  In particular, USB probing and reset are
mutually exclusive.

>   .. but (a) that makes it rather hard to test kernels with
> CONFIG_DEBUG_KOBJECT_RELEASE, and (b) my customer's crashes have
> (allegedly) now gone away even without CONFIG_DEBUG_KOBJECT_RELEASE
> set.

In general, delaying an object's release should make no difference at
all (except for the fact that the memory is temporarily unavailable for
other purposes).  Objects don't get released until their refcounts are
0, meaning that they are not in use anywhere.  If an object is still in
use (being initialized, or on a list, etc.) then its refcount should 
not be 0.  If it is, that's a bug.

Alan Stern


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2015-07-24 13:12 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-05-19 15:06 [PATCH 1/1] Avoid usb reset crashes by making tty_io cdevs truly dynamic Richard Watts
2015-07-23 22:08 ` Greg KH
2015-07-23 22:46   ` Richard Watts
2015-07-24 13:12     ` Alan Stern

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).