All the mail mirrored from lore.kernel.org
 help / color / mirror / Atom feed
From: Aravind Iddamsetty <aravind.iddamsetty@linux.intel.com>
To: "Nilawar, Badal" <badal.nilawar@intel.com>,
	dri-devel@lists.freedesktop.org, daniel@ffwll.ch,
	maarten.lankhorst@linux.intel.com, airlied@gmail.com,
	mripard@kernel.org, tzimmermann@suse.de, rodrigo.vivi@intel.com
Cc: intel-xe@lists.freedesktop.org,
	Lucas De Marchi <lucas.demarchi@intel.com>,
	Michal Wajdeczko <michal.wajdeczko@intel.com>
Subject: Re: [PATCH v3 4/4] drm/xe/FLR: Support PCIe FLR
Date: Thu, 25 Apr 2024 09:37:01 +0530	[thread overview]
Message-ID: <9905154f-d598-4767-8553-d64f40a14557@linux.intel.com> (raw)
In-Reply-To: <48f22c33-87e5-4dd6-856d-441ebef49341@intel.com>


On 24/04/24 16:42, Nilawar, Badal wrote:
>
>
> On 24-04-2024 08:42, Aravind Iddamsetty wrote:
>>
>> On 23/04/24 20:34, Nilawar, Badal wrote:
>>>
>>>
>>> On 22-04-2024 12:27, Aravind Iddamsetty wrote:
>>>> PCI subsystem provides callbacks to inform the driver about a request to
>>>> do function level reset by user, initiated by writing to sysfs entry
>>>> /sys/bus/pci/devices/.../reset. This will allow the driver to handle FLR
>>>> without the need to do unbind and rebind as the driver needs to
>>>> reinitialize the device afresh post FLR.
>>>>
>>>> v2:
>>>> 1. separate out gt idle and pci save/restore to a separate patch (Lucas)
>>>> 2. Fixed the warnings seen around xe_guc_submit_stop, xe_guc_puc_fini
>>>>
>>>> v3: declare xe_pci_err_handlers as static(Michal)
>>>>
>>>> Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
>>>> Cc: Lucas De Marchi <lucas.demarchi@intel.com>
>>>> Cc: Michal Wajdeczko <michal.wajdeczko@intel.com>
>>>>
>>>> Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
>>>> Signed-off-by: Aravind Iddamsetty <aravind.iddamsetty@linux.intel.com>
>>>> ---
>>>>    drivers/gpu/drm/xe/Makefile          |  1 +
>>>>    drivers/gpu/drm/xe/xe_device_types.h |  3 +
>>>>    drivers/gpu/drm/xe/xe_guc_pc.c       |  4 ++
>>>>    drivers/gpu/drm/xe/xe_pci.c          |  9 ++-
>>>>    drivers/gpu/drm/xe/xe_pci.h          |  2 +
>>>>    drivers/gpu/drm/xe/xe_pci_err.c      | 88 ++++++++++++++++++++++++++++
>>>>    drivers/gpu/drm/xe/xe_pci_err.h      | 13 ++++
>>>>    7 files changed, 119 insertions(+), 1 deletion(-)
>>>>    create mode 100644 drivers/gpu/drm/xe/xe_pci_err.c
>>>>    create mode 100644 drivers/gpu/drm/xe/xe_pci_err.h
>>>>
>>>> diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
>>>> index 8bc62bfbc679..693971a1fac0 100644
>>>> --- a/drivers/gpu/drm/xe/Makefile
>>>> +++ b/drivers/gpu/drm/xe/Makefile
>>>> @@ -117,6 +117,7 @@ xe-y += xe_bb.o \
>>>>        xe_module.o \
>>>>        xe_pat.o \
>>>>        xe_pci.o \
>>>> +    xe_pci_err.o \
>>>>        xe_pcode.o \
>>>>        xe_pm.o \
>>>>        xe_preempt_fence.o \
>>>> diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
>>>> index 0a66555229e9..8c749b378a92 100644
>>>> --- a/drivers/gpu/drm/xe/xe_device_types.h
>>>> +++ b/drivers/gpu/drm/xe/xe_device_types.h
>>>> @@ -465,6 +465,9 @@ struct xe_device {
>>>>        /** @pci_state: PCI state of device */
>>>>        struct pci_saved_state *pci_state;
>>>>    +    /** @pci_device_is_reset: device went through PCIe FLR */
>>>> +    bool pci_device_is_reset;
>>>> +
>>>>        /* private: */
>>>>      #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
>>>> diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
>>>> index 509649d0e65e..efba0fbe2f5c 100644
>>>> --- a/drivers/gpu/drm/xe/xe_guc_pc.c
>>>> +++ b/drivers/gpu/drm/xe/xe_guc_pc.c
>>>> @@ -902,6 +902,10 @@ static void xe_guc_pc_fini(struct drm_device *drm, void *arg)
>>>>            return;
>>>>        }
>>>>    +    /* We already have done this before going through a reset, so skip here */
>>>> +    if (xe->pci_device_is_reset)
>>>> +        return;
>>>> +
>>>>        XE_WARN_ON(xe_force_wake_get(gt_to_fw(pc_to_gt(pc)), XE_FORCEWAKE_ALL));
>>>>        XE_WARN_ON(xe_guc_pc_gucrc_disable(pc));
>>>>        XE_WARN_ON(xe_guc_pc_stop(pc));
>>>> diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
>>>> index a62300990e19..b5a582afc9e7 100644
>>>> --- a/drivers/gpu/drm/xe/xe_pci.c
>>>> +++ b/drivers/gpu/drm/xe/xe_pci.c
>>>> @@ -23,6 +23,7 @@
>>>>    #include "xe_macros.h"
>>>>    #include "xe_mmio.h"
>>>>    #include "xe_module.h"
>>>> +#include "xe_pci_err.h"
>>>>    #include "xe_pci_types.h"
>>>>    #include "xe_pm.h"
>>>>    #include "xe_sriov.h"
>>>> @@ -738,7 +739,7 @@ static void xe_pci_remove(struct pci_dev *pdev)
>>>>        pci_set_drvdata(pdev, NULL);
>>>>    }
>>>>    -static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
>>>> +int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
>>>>    {
>>>>        const struct xe_device_desc *desc = (const void *)ent->driver_data;
>>>>        const struct xe_subplatform_desc *subplatform_desc;
>>>> @@ -986,6 +987,11 @@ static const struct dev_pm_ops xe_pm_ops = {
>>>>    };
>>>>    #endif
>>>>    +static const struct pci_error_handlers xe_pci_err_handlers = {
>>>> +    .reset_prepare = xe_pci_reset_prepare,
>>>> +    .reset_done = xe_pci_reset_done,
>>>> +};
>>>> +
>>>>    static struct pci_driver xe_pci_driver = {
>>>>        .name = DRIVER_NAME,
>>>>        .id_table = pciidlist,
>>>> @@ -995,6 +1001,7 @@ static struct pci_driver xe_pci_driver = {
>>>>    #ifdef CONFIG_PM_SLEEP
>>>>        .driver.pm = &xe_pm_ops,
>>>>    #endif
>>>> +    .err_handler = &xe_pci_err_handlers,
>>>>    };
>>>>      int xe_register_pci_driver(void)
>>>> diff --git a/drivers/gpu/drm/xe/xe_pci.h b/drivers/gpu/drm/xe/xe_pci.h
>>>> index 73b90a430d1f..9faf5380a09e 100644
>>>> --- a/drivers/gpu/drm/xe/xe_pci.h
>>>> +++ b/drivers/gpu/drm/xe/xe_pci.h
>>>> @@ -7,8 +7,10 @@
>>>>    #define _XE_PCI_H_
>>>>      struct pci_dev;
>>>> +struct pci_device_id;
>>>>      int xe_register_pci_driver(void);
>>>>    void xe_unregister_pci_driver(void);
>>>>    void xe_load_pci_state(struct pci_dev *pdev);
>>>> +int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent);
>>>>    #endif
>>>> diff --git a/drivers/gpu/drm/xe/xe_pci_err.c b/drivers/gpu/drm/xe/xe_pci_err.c
>>>> new file mode 100644
>>>> index 000000000000..5306925ea2fa
>>>> --- /dev/null
>>>> +++ b/drivers/gpu/drm/xe/xe_pci_err.c
>>>> @@ -0,0 +1,88 @@
>>>> +// SPDX-License-Identifier: MIT
>>>> +/*
>>>> + * Copyright © 2024 Intel Corporation
>>>> + */
>>>> +
>>>> +#include <linux/pci.h>
>>>> +#include <drm/drm_drv.h>
>>>> +
>>>> +#include "xe_device.h"
>>>> +#include "xe_gt.h"
>>>> +#include "xe_gt_printk.h"
>>>> +#include "xe_pci.h"
>>>> +#include "xe_pci_err.h"
>>>> +#include "xe_pm.h"
>>>> +#include "xe_uc.h"
>>>> +
>>>> +/**
>>>> + * xe_pci_reset_prepare - Called when user issued a PCIe reset
>>>> + * via /sys/bus/pci/devices/.../reset.
>>>> + * @pdev: PCI device struct
>>>> + */
>>>> +void xe_pci_reset_prepare(struct pci_dev *pdev)
>>>> +{
>>>> +    struct xe_device *xe = pci_get_drvdata(pdev);
>>>> +    struct xe_gt *gt;
>>>> +    int id, err;
>>>> +
>>>> +    pci_warn(pdev, "preparing for PCIe reset\n");
>>>> +
>>>> +    drm_warn(&xe->drm, "removing device access to userspace\n");
>>>> +    drm_dev_unplug(&xe->drm);
>>>> +
>>>> +    xe_pm_runtime_get(xe);
>>>> +    /* idle the GTs */
>>>> +    for_each_gt(gt, xe, id) {
>>>> +        err = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
>>>> +        if (err)
>>>> +            goto reset;
>>>> +        xe_uc_reset_prepare(&gt->uc);
>>>> +        xe_gt_idle(gt);
>>>> +        err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
>>>> +        XE_WARN_ON(err);
>>>> +    }
>>>> +    xe_pm_runtime_put(xe);
>>> Is xe_save_pci_state() needed here?
>>
>> No, as the state has already been saved post driver probe.
>
> Thanks for clarification. One more doubt I have is in FLR flow what will happen to buffer objects especially mmaped BOs. Will all the BOs get destroyed? I couldn't figure out it in the code.

buffer ummaping is handled in drm_dev_unplug called above.

Thanks,
Aravind.
>
> Badal
>>
>> Regards,
>> Aravind.
>>>
>>> Regards,
>>> Badal Nilawar> +
>>>> +reset:
>>>> +    pci_disable_device(pdev);
>>>> +}
>>>> +
>>>> +/**
>>>> + * xe_pci_reset_done - Called when PCIe reset is done.
>>>> + * @pdev: PCI device struct
>>>> + */
>>>> +void xe_pci_reset_done(struct pci_dev *pdev)
>>>> +{
>>>> +    const struct pci_device_id *ent = pci_match_id(pdev->driver->id_table, pdev);
>>>> +    struct xe_device *xe = pci_get_drvdata(pdev);
>>>> +
>>>> +    dev_info(&pdev->dev,
>>>> +         "device went through PCIe reset, reenabling the device\n");
>>>> +
>>>> +    if (pci_enable_device(pdev)) {
>>>> +        dev_err(&pdev->dev,
>>>> +            "Cannot re-enable PCI device after reset\n");
>>>> +        return;
>>>> +    }
>>>> +    pci_set_master(pdev);
>>>> +    xe_load_pci_state(pdev);
>>>> +
>>>> +    xe->pci_device_is_reset = true;
>>>> +    /*
>>>> +     * We want to completely clean the driver and even destroy
>>>> +     * the xe private data and reinitialize afresh similar to
>>>> +     * probe
>>>> +     */
>>>> +    pdev->driver->remove(pdev);
>>>> +    if (pci_dev_msi_enabled(pdev))
>>>> +        pci_free_irq_vectors(pdev);
>>>> +
>>>> +    devm_drm_dev_release_action(&xe->drm);
>>>> +    pci_disable_device(pdev);
>>>> +
>>>> +    /*
>>>> +     * if this fails the driver might be in a stale state, only option is
>>>> +     * to unbind and rebind
>>>> +     */
>>>> +    xe_pci_probe(pdev, ent);
>>>> +}
>>>> diff --git a/drivers/gpu/drm/xe/xe_pci_err.h b/drivers/gpu/drm/xe/xe_pci_err.h
>>>> new file mode 100644
>>>> index 000000000000..95a4c8ce9cf1
>>>> --- /dev/null
>>>> +++ b/drivers/gpu/drm/xe/xe_pci_err.h
>>>> @@ -0,0 +1,13 @@
>>>> +/* SPDX-License-Identifier: MIT */
>>>> +/*
>>>> + * Copyright © 2024 Intel Corporation
>>>> + */
>>>> +
>>>> +#ifndef _XE_PCI_ERR_H_
>>>> +#define _XE_PCI_ERR_H_
>>>> +
>>>> +struct pci_dev;
>>>> +
>>>> +void xe_pci_reset_prepare(struct pci_dev *pdev);
>>>> +void xe_pci_reset_done(struct pci_dev *pdev);
>>>> +#endif

  reply	other threads:[~2024-04-25  4:04 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-04-22  6:57 [PATCH v4 0/4] drm/xe: Support PCIe FLR Aravind Iddamsetty
2024-04-22  6:57 ` [PATCH v3 1/4] drm: add devm release action Aravind Iddamsetty
2024-04-22 20:54   ` Rodrigo Vivi
2024-04-23  8:55     ` Aravind Iddamsetty
2024-04-23 17:42       ` Rodrigo Vivi
2024-04-24 11:30         ` Aravind Iddamsetty
2024-04-24 11:49         ` Maxime Ripard
2024-04-24 12:20           ` Rodrigo Vivi
2024-04-25 12:52             ` Maxime Ripard
2024-04-25 14:42               ` Aravind Iddamsetty
2024-04-24 11:51   ` Maxime Ripard
2024-04-24 12:36     ` Aravind Iddamsetty
2024-05-02 13:42       ` Maxime Ripard
2024-04-22  6:57 ` [PATCH 2/4] drm/xe: Save and restore PCI state Aravind Iddamsetty
2024-04-23  4:18   ` Ghimiray, Himal Prasad
2024-04-22  6:57 ` [PATCH v2 3/4] drm/xe: Extract xe_gt_idle() helper Aravind Iddamsetty
2024-04-22  6:57 ` [PATCH v3 4/4] drm/xe/FLR: Support PCIe FLR Aravind Iddamsetty
2024-04-23  4:18   ` Ghimiray, Himal Prasad
2024-04-23 15:04   ` Nilawar, Badal
2024-04-24  3:12     ` Aravind Iddamsetty
2024-04-24 11:12       ` Nilawar, Badal
2024-04-25  4:07         ` Aravind Iddamsetty [this message]
2024-04-23 23:49   ` Michał Winiarski
2024-04-24  5:12     ` Aravind Iddamsetty
2024-04-24 23:29       ` Michał Winiarski
2024-04-25  6:17         ` Aravind Iddamsetty
2024-04-26  0:53           ` Michał Winiarski
2024-04-22  8:21 ` ✓ CI.Patch_applied: success for drm/xe: Support PCIe FLR (rev5) Patchwork
2024-04-22  8:21 ` ✗ CI.checkpatch: warning " Patchwork
2024-04-22  8:23 ` ✓ CI.KUnit: success " Patchwork
2024-04-22  8:34 ` ✓ CI.Build: " Patchwork
2024-04-22  8:37 ` ✓ CI.Hooks: " Patchwork
2024-04-22  8:38 ` ✗ CI.checksparse: warning " Patchwork
2024-04-22 15:54 ` ✓ CI.Patch_applied: success " Patchwork
2024-04-22 15:55 ` ✗ CI.checkpatch: warning " Patchwork
2024-04-22 15:56 ` ✓ CI.KUnit: success " Patchwork
2024-04-22 16:13 ` ✓ CI.Build: " Patchwork
2024-04-22 16:15 ` ✓ CI.Hooks: " Patchwork
2024-04-22 16:17 ` ✗ CI.checksparse: warning " Patchwork
2024-04-23 14:09 ` ✓ CI.Patch_applied: success for drm/xe: Support PCIe FLR (rev6) Patchwork
2024-04-23 14:09 ` ✗ CI.checkpatch: warning " Patchwork
2024-04-23 14:10 ` ✓ CI.KUnit: success " Patchwork
2024-04-23 14:22 ` ✓ CI.Build: " Patchwork
2024-04-23 14:24 ` ✓ CI.Hooks: " Patchwork
2024-04-23 14:26 ` ✗ CI.checksparse: warning " Patchwork
2024-04-23 14:48 ` ✓ CI.BAT: success " Patchwork
2024-04-23 21:41 ` ✓ CI.FULL: " Patchwork
  -- strict thread matches above, loose matches on Subject: below --
2024-04-19  8:58 [PATCH v3 0/4] drm/xe: Support PCIe FLR Aravind Iddamsetty
2024-04-19  8:58 ` [PATCH v3 4/4] drm/xe/FLR: " Aravind Iddamsetty

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=9905154f-d598-4767-8553-d64f40a14557@linux.intel.com \
    --to=aravind.iddamsetty@linux.intel.com \
    --cc=airlied@gmail.com \
    --cc=badal.nilawar@intel.com \
    --cc=daniel@ffwll.ch \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=intel-xe@lists.freedesktop.org \
    --cc=lucas.demarchi@intel.com \
    --cc=maarten.lankhorst@linux.intel.com \
    --cc=michal.wajdeczko@intel.com \
    --cc=mripard@kernel.org \
    --cc=rodrigo.vivi@intel.com \
    --cc=tzimmermann@suse.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.