All the mail mirrored from lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] linux-nvme 3.20 plans
@ 2015-02-12  1:11 Keith Busch
  2015-02-12  1:11 ` [PATCH] NVMe: Async probe Keith Busch
  0 siblings, 1 reply; 4+ messages in thread
From: Keith Busch @ 2015-02-12  1:11 UTC (permalink / raw


Hi Willy,

This cover letter is not necessarily for the following patch. I just
want to know what your plans are for 3.20. We'd be remiss to miss another
merge window.

Keith Busch (1):
  NVMe: Async probe

 drivers/block/nvme-core.c |   55 ++++++++++++++++++++++++---------------------
 include/linux/nvme.h      |    2 ++
 2 files changed, 32 insertions(+), 25 deletions(-)

-- 
1.7.10.4

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH] NVMe: Async probe
  2015-02-12  1:11 [PATCH] linux-nvme 3.20 plans Keith Busch
@ 2015-02-12  1:11 ` Keith Busch
  2015-02-12 14:52   ` Indraneel Mukherjee
  0 siblings, 1 reply; 4+ messages in thread
From: Keith Busch @ 2015-02-12  1:11 UTC (permalink / raw


This performs the longest parts of nvme device probe asynchronously. This
speeds up probe significantly when multiple devices are in use.

Just to drive how important this is for many distros, 'systemd' sends a
fatal signal to the modprobe routine during boot if you have a lot of
NVMe drives; it might take a while to initailize them and exceed some
arbitrary timeout that no one knows how to change. The result is only a
subset of your drives are discovered after boot since nvme probe bails
on devices when fatal_signal_pending is set.

Signed-off-by: Keith Busch <keith.busch at intel.com>
---
 drivers/block/nvme-core.c |   55 ++++++++++++++++++++++++---------------------
 include/linux/nvme.h      |    2 ++
 2 files changed, 32 insertions(+), 25 deletions(-)

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index b3cb67d..89cb4c5 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -2799,6 +2799,8 @@ static void nvme_reset_workfn(struct work_struct *work)
 	dev->reset_workfn(work);
 }
 
+static void nvme_async_probe(void *data, async_cookie_t cookie);
+
 static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	int node, result = -ENOMEM;
@@ -2833,48 +2835,50 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (result)
 		goto release;
 
+	dev->probe_cookie = async_schedule(nvme_async_probe, dev);
+	return result;
+
+ release:
+	nvme_release_instance(dev);
+ put_pci:
+	pci_dev_put(dev->pci_dev);
+ free:
+	kfree(dev->queues);
+	kfree(dev->entry);
+	kfree(dev);
+	return result;
+}
+
+static void nvme_async_probe(void *data, async_cookie_t cookie)
+{
+	struct nvme_dev *dev = data;
+	int result;
+
 	kref_init(&dev->kref);
 	result = nvme_dev_start(dev);
 	if (result)
-		goto release_pools;
+		goto reset;
 
 	if (dev->online_queues > 1)
 		result = nvme_dev_add(dev);
 	if (result)
-		goto shutdown;
+		goto reset;
 
 	scnprintf(dev->name, sizeof(dev->name), "nvme%d", dev->instance);
 	dev->miscdev.minor = MISC_DYNAMIC_MINOR;
-	dev->miscdev.parent = &pdev->dev;
+	dev->miscdev.parent = &dev->pci_dev->dev;
 	dev->miscdev.name = dev->name;
 	dev->miscdev.fops = &nvme_dev_fops;
 	result = misc_register(&dev->miscdev);
 	if (result)
-		goto remove;
+		goto reset;
 
 	nvme_set_irq_hints(dev);
-
 	dev->initialized = 1;
-	return 0;
-
- remove:
-	nvme_dev_remove(dev);
-	nvme_dev_remove_admin(dev);
-	nvme_free_namespaces(dev);
- shutdown:
-	nvme_dev_shutdown(dev);
- release_pools:
-	nvme_free_queues(dev, 0);
-	nvme_release_prp_pools(dev);
- release:
-	nvme_release_instance(dev);
- put_pci:
-	pci_dev_put(dev->pci_dev);
- free:
-	kfree(dev->queues);
-	kfree(dev->entry);
-	kfree(dev);
-	return result;
+	return;
+ reset:
+	dev->reset_workfn = nvme_reset_failed_dev;
+	queue_work(nvme_workq, &dev->reset_work);
 }
 
 static void nvme_reset_notify(struct pci_dev *pdev, bool prepare)
@@ -2902,6 +2906,7 @@ static void nvme_remove(struct pci_dev *pdev)
 	spin_unlock(&dev_list_lock);
 
 	pci_set_drvdata(pdev, NULL);
+	async_synchronize_cookie(dev->probe_cookie);
 	flush_work(&dev->reset_work);
 	misc_deregister(&dev->miscdev);
 	nvme_dev_shutdown(dev);
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 19a5d4b..ebfbfa8 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -16,6 +16,7 @@
 #define _LINUX_NVME_H
 
 #include <uapi/linux/nvme.h>
+#include <linux/async.h>
 #include <linux/pci.h>
 #include <linux/miscdevice.h>
 #include <linux/kref.h>
@@ -94,6 +95,7 @@ struct nvme_dev {
 	struct miscdevice miscdev;
 	work_func_t reset_workfn;
 	struct work_struct reset_work;
+	async_cookie_t probe_cookie;
 	char name[12];
 	char serial[20];
 	char model[40];
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH] NVMe: Async probe
  2015-02-12  1:11 ` [PATCH] NVMe: Async probe Keith Busch
@ 2015-02-12 14:52   ` Indraneel Mukherjee
  2015-02-12 15:25     ` Keith Busch
  0 siblings, 1 reply; 4+ messages in thread
From: Indraneel Mukherjee @ 2015-02-12 14:52 UTC (permalink / raw




> -----Original Message-----
> From: Linux-nvme [mailto:linux-nvme-bounces at lists.infradead.org] On Behalf
> Of Keith Busch
> Sent: Thursday, February 12, 2015 6:41 AM
> To: linux-nvme at lists.infradead.org; willy at linux.intel.com
> Cc: Keith Busch
> Subject: [PATCH] NVMe: Async probe
> 
> This performs the longest parts of nvme device probe asynchronously. This
> speeds up probe significantly when multiple devices are in use.
> 
> Just to drive how important this is for many distros, 'systemd' sends a
fatal signal
> to the modprobe routine during boot if you have a lot of NVMe drives; it
might
> take a while to initailize them and exceed some arbitrary timeout that no
one
> knows how to change. The result is only a subset of your drives are
discovered
> after boot since nvme probe bails on devices when fatal_signal_pending is
set.
> 
> Signed-off-by: Keith Busch <keith.busch at intel.com>
> ---
>  drivers/block/nvme-core.c |   55
++++++++++++++++++++++++---------------------
>  include/linux/nvme.h      |    2 ++
>  2 files changed, 32 insertions(+), 25 deletions(-)
> 
> diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c index
> b3cb67d..89cb4c5 100644
> --- a/drivers/block/nvme-core.c
> +++ b/drivers/block/nvme-core.c
> @@ -2799,6 +2799,8 @@ static void nvme_reset_workfn(struct work_struct
> *work)
>  	dev->reset_workfn(work);
>  }
> 
> +static void nvme_async_probe(void *data, async_cookie_t cookie);
> +
>  static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id
*id)  {
>  	int node, result = -ENOMEM;
> @@ -2833,48 +2835,50 @@ static int nvme_probe(struct pci_dev *pdev, const
> struct pci_device_id *id)
>  	if (result)
>  		goto release;
> 
> +	dev->probe_cookie = async_schedule(nvme_async_probe, dev);
> +	return result;
> +
> + release:
> +	nvme_release_instance(dev);
> + put_pci:
> +	pci_dev_put(dev->pci_dev);
> + free:
> +	kfree(dev->queues);
> +	kfree(dev->entry);
> +	kfree(dev);
> +	return result;
> +}
> +
> +static void nvme_async_probe(void *data, async_cookie_t cookie) {
> +	struct nvme_dev *dev = data;
> +	int result;
> +
>  	kref_init(&dev->kref);
>  	result = nvme_dev_start(dev);
>  	if (result)
> -		goto release_pools;
> +		goto reset;
> 
>  	if (dev->online_queues > 1)
>  		result = nvme_dev_add(dev);
>  	if (result)
> -		goto shutdown;
> +		goto reset;
> 
>  	scnprintf(dev->name, sizeof(dev->name), "nvme%d", dev->instance);
>  	dev->miscdev.minor = MISC_DYNAMIC_MINOR;
> -	dev->miscdev.parent = &pdev->dev;
> +	dev->miscdev.parent = &dev->pci_dev->dev;
>  	dev->miscdev.name = dev->name;
>  	dev->miscdev.fops = &nvme_dev_fops;
>  	result = misc_register(&dev->miscdev);
>  	if (result)
> -		goto remove;
> +		goto reset;
> 
>  	nvme_set_irq_hints(dev);
> -
>  	dev->initialized = 1;
> -	return 0;
> -
> - remove:
> -	nvme_dev_remove(dev);
> -	nvme_dev_remove_admin(dev);
> -	nvme_free_namespaces(dev);
> - shutdown:
> -	nvme_dev_shutdown(dev);
> - release_pools:
> -	nvme_free_queues(dev, 0);
> -	nvme_release_prp_pools(dev);
> - release:
> -	nvme_release_instance(dev);
> - put_pci:
> -	pci_dev_put(dev->pci_dev);
> - free:
> -	kfree(dev->queues);
> -	kfree(dev->entry);
> -	kfree(dev);
> -	return result;
> +	return;
> + reset:
> +	dev->reset_workfn = nvme_reset_failed_dev;
> +	queue_work(nvme_workq, &dev->reset_work);
>  }
> 
>  static void nvme_reset_notify(struct pci_dev *pdev, bool prepare) @@
-2902,6
> +2906,7 @@ static void nvme_remove(struct pci_dev *pdev)
>  	spin_unlock(&dev_list_lock);
> 
>  	pci_set_drvdata(pdev, NULL);
> +	async_synchronize_cookie(dev->probe_cookie);

Keith, I think this may not work. All asynchronous functions called prior to
the one identified by cookie
are guaranteed to have completed. The current one is not.

And I just remembered Matthew's feedback on one of the previous attempts at
asynchronous probe.
We should be able to rmmod the driver during probe if suppose there are a
million namespaces.
Is there any plan to address such scenarios?


>  	flush_work(&dev->reset_work);
>  	misc_deregister(&dev->miscdev);
>  	nvme_dev_shutdown(dev);
> diff --git a/include/linux/nvme.h b/include/linux/nvme.h index
19a5d4b..ebfbfa8
> 100644
> --- a/include/linux/nvme.h
> +++ b/include/linux/nvme.h
> @@ -16,6 +16,7 @@
>  #define _LINUX_NVME_H
> 
>  #include <uapi/linux/nvme.h>
> +#include <linux/async.h>
>  #include <linux/pci.h>
>  #include <linux/miscdevice.h>
>  #include <linux/kref.h>
> @@ -94,6 +95,7 @@ struct nvme_dev {
>  	struct miscdevice miscdev;
>  	work_func_t reset_workfn;
>  	struct work_struct reset_work;
> +	async_cookie_t probe_cookie;
>  	char name[12];
>  	char serial[20];
>  	char model[40];
> --
> 1.7.10.4
> 
> 
> _______________________________________________
> Linux-nvme mailing list
> Linux-nvme at lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-nvme

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH] NVMe: Async probe
  2015-02-12 14:52   ` Indraneel Mukherjee
@ 2015-02-12 15:25     ` Keith Busch
  0 siblings, 0 replies; 4+ messages in thread
From: Keith Busch @ 2015-02-12 15:25 UTC (permalink / raw


On Thu, 12 Feb 2015, Indraneel Mukherjee wrote:
>>  static void nvme_reset_notify(struct pci_dev *pdev, bool prepare) @@
> -2902,6
>> +2906,7 @@ static void nvme_remove(struct pci_dev *pdev)
>>  	spin_unlock(&dev_list_lock);
>>
>>  	pci_set_drvdata(pdev, NULL);
>> +	async_synchronize_cookie(dev->probe_cookie);
>
> Keith, I think this may not work. All asynchronous functions called prior to
> the one identified by cookie
> are guaranteed to have completed. The current one is not.

Huh, so it is. That rules out asynchronous_schedule as being useful
here. Workqueues FTW, then.

> And I just remembered Matthew's feedback on one of the previous attempts at
> asynchronous probe.
> We should be able to rmmod the driver during probe if suppose there are a
> million namespaces.
> Is there any plan to address such scenarios?

This idea would have allowed that if sync cookie worked like I'd
thought. The rmmod would just synchronize with the namespace discovery
first. I think we can use the global work queue and flush individual
work items in the remove path.

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2015-02-12 15:25 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-02-12  1:11 [PATCH] linux-nvme 3.20 plans Keith Busch
2015-02-12  1:11 ` [PATCH] NVMe: Async probe Keith Busch
2015-02-12 14:52   ` Indraneel Mukherjee
2015-02-12 15:25     ` Keith Busch

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.