All the mail mirrored from lore.kernel.org
 help / color / mirror / Atom feed
* [Patch V3] drm/amdgpu: Increase tlb flush timeout for sriov
@ 2022-08-11 10:00 Dusica Milinkovic
  2022-08-11 14:16 ` Liu, Shaoyun
  0 siblings, 1 reply; 3+ messages in thread
From: Dusica Milinkovic @ 2022-08-11 10:00 UTC (permalink / raw)
  To: amd-gfx; +Cc: Dusica Milinkovic

[Why]
During multi-vf executing benchmark (Luxmark) observed kiq error timeout.
It happenes because all of VFs do the tlb invalidation at the same time.
Although each VF has the invalidate register set, from hardware side
the invalidate requests are queue to execute.

[How]
In case of 12 VF increase timeout on 12*100ms

Signed-off-by: Dusica Milinkovic <Dusica.Milinkovic@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h    | 2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 3 ++-
 drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 3 ++-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 5a639c857bd0..79bb6fd83094 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -320,7 +320,7 @@ enum amdgpu_kiq_irq {
 	AMDGPU_CP_KIQ_IRQ_DRIVER0 = 0,
 	AMDGPU_CP_KIQ_IRQ_LAST
 };
-
+#define SRIOV_USEC_TIMEOUT  1200000 /* wait 12 * 100ms for SRIOV */
 #define MAX_KIQ_REG_WAIT       5000 /* in usecs, 5ms */
 #define MAX_KIQ_REG_BAILOUT_INTERVAL   5 /* in msecs, 5ms */
 #define MAX_KIQ_REG_TRY 1000
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 9ae8cdaa033e..f513e2c2e964 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -419,6 +419,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 	uint32_t seq;
 	uint16_t queried_pasid;
 	bool ret;
+	u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout;
 	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
 
@@ -437,7 +438,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 
 		amdgpu_ring_commit(ring);
 		spin_unlock(&adev->gfx.kiq.ring_lock);
-		r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+		r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
 		if (r < 1) {
 			dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
 			return -ETIME;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index ab89d91975ab..4603653916f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -896,6 +896,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 	uint32_t seq;
 	uint16_t queried_pasid;
 	bool ret;
+	u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : adev->usec_timeout;
 	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
 
@@ -935,7 +936,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
 
 		amdgpu_ring_commit(ring);
 		spin_unlock(&adev->gfx.kiq.ring_lock);
-		r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+		r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
 		if (r < 1) {
 			dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
 			up_read(&adev->reset_domain->sem);
-- 
2.25.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* RE: [Patch V3] drm/amdgpu: Increase tlb flush timeout for sriov
  2022-08-11 10:00 [Patch V3] drm/amdgpu: Increase tlb flush timeout for sriov Dusica Milinkovic
@ 2022-08-11 14:16 ` Liu, Shaoyun
  2022-08-11 14:33   ` Alex Deucher
  0 siblings, 1 reply; 3+ messages in thread
From: Liu, Shaoyun @ 2022-08-11 14:16 UTC (permalink / raw)
  To: Milinkovic, Dusica, amd-gfx@lists.freedesktop.org; +Cc: Milinkovic, Dusica

[AMD Official Use Only - General]

From HW point of view , the  maximum VF number can reach 16  instead  of 12 . Although currently no product will use the 16 VFs  together,  not sure about the future.
You can added Acked-by me.  I will let Alex & Christion decide whether accept this change.

Regards
Shaoyun.liu



-----Original Message-----
From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Dusica Milinkovic
Sent: Thursday, August 11, 2022 6:01 AM
To: amd-gfx@lists.freedesktop.org
Cc: Milinkovic, Dusica <Dusica.Milinkovic@amd.com>
Subject: [Patch V3] drm/amdgpu: Increase tlb flush timeout for sriov

[Why]
During multi-vf executing benchmark (Luxmark) observed kiq error timeout.
It happenes because all of VFs do the tlb invalidation at the same time.
Although each VF has the invalidate register set, from hardware side the invalidate requests are queue to execute.

[How]
In case of 12 VF increase timeout on 12*100ms

Signed-off-by: Dusica Milinkovic <Dusica.Milinkovic@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h    | 2 +-
 drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 3 ++-  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 3 ++-
 3 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 5a639c857bd0..79bb6fd83094 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -320,7 +320,7 @@ enum amdgpu_kiq_irq {
        AMDGPU_CP_KIQ_IRQ_DRIVER0 = 0,
        AMDGPU_CP_KIQ_IRQ_LAST
 };
-
+#define SRIOV_USEC_TIMEOUT  1200000 /* wait 12 * 100ms for SRIOV */
 #define MAX_KIQ_REG_WAIT       5000 /* in usecs, 5ms */
 #define MAX_KIQ_REG_BAILOUT_INTERVAL   5 /* in msecs, 5ms */
 #define MAX_KIQ_REG_TRY 1000
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index 9ae8cdaa033e..f513e2c2e964 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -419,6 +419,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
        uint32_t seq;
        uint16_t queried_pasid;
        bool ret;
+       u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT :
+adev->usec_timeout;
        struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
        struct amdgpu_kiq *kiq = &adev->gfx.kiq;

@@ -437,7 +438,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,

                amdgpu_ring_commit(ring);
                spin_unlock(&adev->gfx.kiq.ring_lock);
-               r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+               r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
                if (r < 1) {
                        dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
                        return -ETIME;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index ab89d91975ab..4603653916f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -896,6 +896,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
        uint32_t seq;
        uint16_t queried_pasid;
        bool ret;
+       u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT :
+adev->usec_timeout;
        struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
        struct amdgpu_kiq *kiq = &adev->gfx.kiq;

@@ -935,7 +936,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,

                amdgpu_ring_commit(ring);
                spin_unlock(&adev->gfx.kiq.ring_lock);
-               r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+               r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
                if (r < 1) {
                        dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
                        up_read(&adev->reset_domain->sem);
--
2.25.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [Patch V3] drm/amdgpu: Increase tlb flush timeout for sriov
  2022-08-11 14:16 ` Liu, Shaoyun
@ 2022-08-11 14:33   ` Alex Deucher
  0 siblings, 0 replies; 3+ messages in thread
From: Alex Deucher @ 2022-08-11 14:33 UTC (permalink / raw)
  To: Liu, Shaoyun; +Cc: amd-gfx@lists.freedesktop.org, Milinkovic, Dusica

On Thu, Aug 11, 2022 at 10:16 AM Liu, Shaoyun <Shaoyun.Liu@amd.com> wrote:
>
> [AMD Official Use Only - General]
>
> From HW point of view , the  maximum VF number can reach 16  instead  of 12 . Although currently no product will use the 16 VFs  together,  not sure about the future.
> You can added Acked-by me.  I will let Alex & Christion decide whether accept this change.
>

I'll let you two sort out the timeout, but the rest of the patch looks
good to me.
Acked-by: Alex Deucher <alexander.deucher@amd.com>

> Regards
> Shaoyun.liu
>
>
>
> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces@lists.freedesktop.org> On Behalf Of Dusica Milinkovic
> Sent: Thursday, August 11, 2022 6:01 AM
> To: amd-gfx@lists.freedesktop.org
> Cc: Milinkovic, Dusica <Dusica.Milinkovic@amd.com>
> Subject: [Patch V3] drm/amdgpu: Increase tlb flush timeout for sriov
>
> [Why]
> During multi-vf executing benchmark (Luxmark) observed kiq error timeout.
> It happenes because all of VFs do the tlb invalidation at the same time.
> Although each VF has the invalidate register set, from hardware side the invalidate requests are queue to execute.
>
> [How]
> In case of 12 VF increase timeout on 12*100ms
>
> Signed-off-by: Dusica Milinkovic <Dusica.Milinkovic@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h    | 2 +-
>  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 3 ++-  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 3 ++-
>  3 files changed, 5 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 5a639c857bd0..79bb6fd83094 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -320,7 +320,7 @@ enum amdgpu_kiq_irq {
>         AMDGPU_CP_KIQ_IRQ_DRIVER0 = 0,
>         AMDGPU_CP_KIQ_IRQ_LAST
>  };
> -
> +#define SRIOV_USEC_TIMEOUT  1200000 /* wait 12 * 100ms for SRIOV */
>  #define MAX_KIQ_REG_WAIT       5000 /* in usecs, 5ms */
>  #define MAX_KIQ_REG_BAILOUT_INTERVAL   5 /* in msecs, 5ms */
>  #define MAX_KIQ_REG_TRY 1000
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index 9ae8cdaa033e..f513e2c2e964 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -419,6 +419,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>         uint32_t seq;
>         uint16_t queried_pasid;
>         bool ret;
> +       u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT :
> +adev->usec_timeout;
>         struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
>         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
>
> @@ -437,7 +438,7 @@ static int gmc_v10_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>
>                 amdgpu_ring_commit(ring);
>                 spin_unlock(&adev->gfx.kiq.ring_lock);
> -               r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
> +               r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
>                 if (r < 1) {
>                         dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
>                         return -ETIME;
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index ab89d91975ab..4603653916f5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -896,6 +896,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>         uint32_t seq;
>         uint16_t queried_pasid;
>         bool ret;
> +       u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT :
> +adev->usec_timeout;
>         struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
>         struct amdgpu_kiq *kiq = &adev->gfx.kiq;
>
> @@ -935,7 +936,7 @@ static int gmc_v9_0_flush_gpu_tlb_pasid(struct amdgpu_device *adev,
>
>                 amdgpu_ring_commit(ring);
>                 spin_unlock(&adev->gfx.kiq.ring_lock);
> -               r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
> +               r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
>                 if (r < 1) {
>                         dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
>                         up_read(&adev->reset_domain->sem);
> --
> 2.25.1
>

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2022-08-11 14:34 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-08-11 10:00 [Patch V3] drm/amdgpu: Increase tlb flush timeout for sriov Dusica Milinkovic
2022-08-11 14:16 ` Liu, Shaoyun
2022-08-11 14:33   ` Alex Deucher

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.