dri-devel Archive mirror
 help / color / mirror / Atom feed
* [PATCH] drm/amdgpu: nuke the ih reentrant lock
@ 2021-03-12 13:59 Christian König
  2021-03-12 14:04 ` Daniel Vetter
  0 siblings, 1 reply; 7+ messages in thread
From: Christian König @ 2021-03-12 13:59 UTC (permalink / raw
  To: dri-devel

Interrupts on are non-reentrant on linux. This is just an ancient
leftover from radeon where irq processing was kicked of from different
places.

Signed-off-by: Christian König <christian.koenig@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 -
 drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c     | 5 -----
 drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h     | 1 -
 3 files changed, 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index a15f1b604733..886625fb464b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3284,7 +3284,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 
 	/* mutex initialization are all done here so we
 	 * can recall function without having locking issues */
-	atomic_set(&adev->irq.ih.lock, 0);
 	mutex_init(&adev->firmware.mutex);
 	mutex_init(&adev->pm.mutex);
 	mutex_init(&adev->gfx.gpu_clock_mutex);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
index 1024065f1f03..faaa6aa2faaf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
@@ -228,10 +228,6 @@ int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
 	wptr = amdgpu_ih_get_wptr(adev, ih);
 
 restart_ih:
-	/* is somebody else already processing irqs? */
-	if (atomic_xchg(&ih->lock, 1))
-		return IRQ_NONE;
-
 	DRM_DEBUG("%s: rptr %d, wptr %d\n", __func__, ih->rptr, wptr);
 
 	/* Order reading of wptr vs. reading of IH ring data */
@@ -244,7 +240,6 @@ int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
 
 	amdgpu_ih_set_rptr(adev, ih);
 	wake_up_all(&ih->wait_process);
-	atomic_set(&ih->lock, 0);
 
 	/* make sure wptr hasn't changed while processing */
 	wptr = amdgpu_ih_get_wptr(adev, ih);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
index 87ec6d20dbe0..0649b59830a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
@@ -64,7 +64,6 @@ struct amdgpu_ih_ring {
 
 	bool                    enabled;
 	unsigned		rptr;
-	atomic_t		lock;
 	struct amdgpu_ih_regs	ih_regs;
 
 	/* For waiting on IH processing at checkpoint. */
-- 
2.25.1

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] drm/amdgpu: nuke the ih reentrant lock
  2021-03-12 13:59 [PATCH] drm/amdgpu: nuke the ih reentrant lock Christian König
@ 2021-03-12 14:04 ` Daniel Vetter
  2021-03-12 14:27   ` Christian König
  0 siblings, 1 reply; 7+ messages in thread
From: Daniel Vetter @ 2021-03-12 14:04 UTC (permalink / raw
  To: Christian König; +Cc: dri-devel

On Fri, Mar 12, 2021 at 02:59:06PM +0100, Christian König wrote:
> Interrupts on are non-reentrant on linux. This is just an ancient
> leftover from radeon where irq processing was kicked of from different
> places.
> 
> Signed-off-by: Christian König <christian.koenig@amd.com>

Man you tricked me into grepping this on radeon and it looks horrible.
atomic_t is unordered in linux, so whatever was built there for radeon
does not wokr like a lock. It's missing all the barriers afiui. Good
riddance at least for amdgpu.

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 -
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c     | 5 -----
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h     | 1 -
>  3 files changed, 7 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index a15f1b604733..886625fb464b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -3284,7 +3284,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>  
>  	/* mutex initialization are all done here so we
>  	 * can recall function without having locking issues */
> -	atomic_set(&adev->irq.ih.lock, 0);
>  	mutex_init(&adev->firmware.mutex);
>  	mutex_init(&adev->pm.mutex);
>  	mutex_init(&adev->gfx.gpu_clock_mutex);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> index 1024065f1f03..faaa6aa2faaf 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> @@ -228,10 +228,6 @@ int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
>  	wptr = amdgpu_ih_get_wptr(adev, ih);
>  
>  restart_ih:
> -	/* is somebody else already processing irqs? */
> -	if (atomic_xchg(&ih->lock, 1))
> -		return IRQ_NONE;
> -
>  	DRM_DEBUG("%s: rptr %d, wptr %d\n", __func__, ih->rptr, wptr);
>  
>  	/* Order reading of wptr vs. reading of IH ring data */
> @@ -244,7 +240,6 @@ int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
>  
>  	amdgpu_ih_set_rptr(adev, ih);
>  	wake_up_all(&ih->wait_process);
> -	atomic_set(&ih->lock, 0);
>  
>  	/* make sure wptr hasn't changed while processing */
>  	wptr = amdgpu_ih_get_wptr(adev, ih);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> index 87ec6d20dbe0..0649b59830a5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> @@ -64,7 +64,6 @@ struct amdgpu_ih_ring {
>  
>  	bool                    enabled;
>  	unsigned		rptr;
> -	atomic_t		lock;
>  	struct amdgpu_ih_regs	ih_regs;
>  
>  	/* For waiting on IH processing at checkpoint. */
> -- 
> 2.25.1
> 
> _______________________________________________
> dri-devel mailing list
> dri-devel@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/dri-devel

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] drm/amdgpu: nuke the ih reentrant lock
  2021-03-12 14:04 ` Daniel Vetter
@ 2021-03-12 14:27   ` Christian König
  2021-03-12 14:35     ` Daniel Vetter
  0 siblings, 1 reply; 7+ messages in thread
From: Christian König @ 2021-03-12 14:27 UTC (permalink / raw
  To: Daniel Vetter; +Cc: dri-devel



Am 12.03.21 um 15:04 schrieb Daniel Vetter:
> On Fri, Mar 12, 2021 at 02:59:06PM +0100, Christian König wrote:
>> Interrupts on are non-reentrant on linux. This is just an ancient
>> leftover from radeon where irq processing was kicked of from different
>> places.
>>
>> Signed-off-by: Christian König <christian.koenig@amd.com>
> Man you tricked me into grepping this on radeon and it looks horrible.
> atomic_t is unordered in linux, so whatever was built there for radeon
> does not wokr like a lock. It's missing all the barriers afiui. Good
> riddance at least for amdgpu.

Hui? atomic_xchg() is perfectly ordered as far as I know.

IIRC Alex added this for R600 because we had boards where interrupts 
where sporadically swallowed during driver startup and we had to kick of 
ring buffer processing manually.

Anyway we have a fence processing fallback timer in amdgpu instead and 
that stuff is probably unused on any modern hardware.

> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

Thanks,
Christian.

>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 -
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c     | 5 -----
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h     | 1 -
>>   3 files changed, 7 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index a15f1b604733..886625fb464b 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -3284,7 +3284,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>>   
>>   	/* mutex initialization are all done here so we
>>   	 * can recall function without having locking issues */
>> -	atomic_set(&adev->irq.ih.lock, 0);
>>   	mutex_init(&adev->firmware.mutex);
>>   	mutex_init(&adev->pm.mutex);
>>   	mutex_init(&adev->gfx.gpu_clock_mutex);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
>> index 1024065f1f03..faaa6aa2faaf 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
>> @@ -228,10 +228,6 @@ int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
>>   	wptr = amdgpu_ih_get_wptr(adev, ih);
>>   
>>   restart_ih:
>> -	/* is somebody else already processing irqs? */
>> -	if (atomic_xchg(&ih->lock, 1))
>> -		return IRQ_NONE;
>> -
>>   	DRM_DEBUG("%s: rptr %d, wptr %d\n", __func__, ih->rptr, wptr);
>>   
>>   	/* Order reading of wptr vs. reading of IH ring data */
>> @@ -244,7 +240,6 @@ int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
>>   
>>   	amdgpu_ih_set_rptr(adev, ih);
>>   	wake_up_all(&ih->wait_process);
>> -	atomic_set(&ih->lock, 0);
>>   
>>   	/* make sure wptr hasn't changed while processing */
>>   	wptr = amdgpu_ih_get_wptr(adev, ih);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
>> index 87ec6d20dbe0..0649b59830a5 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
>> @@ -64,7 +64,6 @@ struct amdgpu_ih_ring {
>>   
>>   	bool                    enabled;
>>   	unsigned		rptr;
>> -	atomic_t		lock;
>>   	struct amdgpu_ih_regs	ih_regs;
>>   
>>   	/* For waiting on IH processing at checkpoint. */
>> -- 
>> 2.25.1
>>
>> _______________________________________________
>> dri-devel mailing list
>> dri-devel@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/dri-devel

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] drm/amdgpu: nuke the ih reentrant lock
  2021-03-12 14:27   ` Christian König
@ 2021-03-12 14:35     ` Daniel Vetter
  2021-03-12 14:36       ` Daniel Vetter
  0 siblings, 1 reply; 7+ messages in thread
From: Daniel Vetter @ 2021-03-12 14:35 UTC (permalink / raw
  To: Christian König; +Cc: dri-devel

On Fri, Mar 12, 2021 at 03:27:58PM +0100, Christian König wrote:
> 
> 
> Am 12.03.21 um 15:04 schrieb Daniel Vetter:
> > On Fri, Mar 12, 2021 at 02:59:06PM +0100, Christian König wrote:
> > > Interrupts on are non-reentrant on linux. This is just an ancient
> > > leftover from radeon where irq processing was kicked of from different
> > > places.
> > > 
> > > Signed-off-by: Christian König <christian.koenig@amd.com>
> > Man you tricked me into grepping this on radeon and it looks horrible.
> > atomic_t is unordered in linux, so whatever was built there for radeon
> > does not wokr like a lock. It's missing all the barriers afiui. Good
> > riddance at least for amdgpu.
> 
> Hui? atomic_xchg() is perfectly ordered as far as I know.

Oh right, if atomic_ returns a value it's fully ordered. Nice tour into
memory-barriers.txt. It's the atomic_t operations that don't return
anything which are fully unordered, and I mixed that up.

> IIRC Alex added this for R600 because we had boards where interrupts where
> sporadically swallowed during driver startup and we had to kick of ring
> buffer processing manually.
> 
> Anyway we have a fence processing fallback timer in amdgpu instead and that
> stuff is probably unused on any modern hardware.

Ah yes that stuff. Kinda annoying we have these old dma_fence around where
dma_fence_wait doesn't really work that well, but oh well.
-Daniel

> 
> > Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> 
> Thanks,
> Christian.
> 
> > 
> > > ---
> > >   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 -
> > >   drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c     | 5 -----
> > >   drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h     | 1 -
> > >   3 files changed, 7 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > index a15f1b604733..886625fb464b 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > @@ -3284,7 +3284,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
> > >   	/* mutex initialization are all done here so we
> > >   	 * can recall function without having locking issues */
> > > -	atomic_set(&adev->irq.ih.lock, 0);
> > >   	mutex_init(&adev->firmware.mutex);
> > >   	mutex_init(&adev->pm.mutex);
> > >   	mutex_init(&adev->gfx.gpu_clock_mutex);
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> > > index 1024065f1f03..faaa6aa2faaf 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> > > @@ -228,10 +228,6 @@ int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
> > >   	wptr = amdgpu_ih_get_wptr(adev, ih);
> > >   restart_ih:
> > > -	/* is somebody else already processing irqs? */
> > > -	if (atomic_xchg(&ih->lock, 1))
> > > -		return IRQ_NONE;
> > > -
> > >   	DRM_DEBUG("%s: rptr %d, wptr %d\n", __func__, ih->rptr, wptr);
> > >   	/* Order reading of wptr vs. reading of IH ring data */
> > > @@ -244,7 +240,6 @@ int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
> > >   	amdgpu_ih_set_rptr(adev, ih);
> > >   	wake_up_all(&ih->wait_process);
> > > -	atomic_set(&ih->lock, 0);
> > >   	/* make sure wptr hasn't changed while processing */
> > >   	wptr = amdgpu_ih_get_wptr(adev, ih);
> > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> > > index 87ec6d20dbe0..0649b59830a5 100644
> > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> > > @@ -64,7 +64,6 @@ struct amdgpu_ih_ring {
> > >   	bool                    enabled;
> > >   	unsigned		rptr;
> > > -	atomic_t		lock;
> > >   	struct amdgpu_ih_regs	ih_regs;
> > >   	/* For waiting on IH processing at checkpoint. */
> > > -- 
> > > 2.25.1
> > > 
> > > _______________________________________________
> > > dri-devel mailing list
> > > dri-devel@lists.freedesktop.org
> > > https://lists.freedesktop.org/mailman/listinfo/dri-devel
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] drm/amdgpu: nuke the ih reentrant lock
  2021-03-12 14:35     ` Daniel Vetter
@ 2021-03-12 14:36       ` Daniel Vetter
  2021-03-12 14:50         ` Christian König
  0 siblings, 1 reply; 7+ messages in thread
From: Daniel Vetter @ 2021-03-12 14:36 UTC (permalink / raw
  To: Christian König; +Cc: dri-devel

On Fri, Mar 12, 2021 at 03:35:50PM +0100, Daniel Vetter wrote:
> On Fri, Mar 12, 2021 at 03:27:58PM +0100, Christian König wrote:
> > 
> > 
> > Am 12.03.21 um 15:04 schrieb Daniel Vetter:
> > > On Fri, Mar 12, 2021 at 02:59:06PM +0100, Christian König wrote:
> > > > Interrupts on are non-reentrant on linux. This is just an ancient
> > > > leftover from radeon where irq processing was kicked of from different
> > > > places.
> > > > 
> > > > Signed-off-by: Christian König <christian.koenig@amd.com>
> > > Man you tricked me into grepping this on radeon and it looks horrible.
> > > atomic_t is unordered in linux, so whatever was built there for radeon
> > > does not wokr like a lock. It's missing all the barriers afiui. Good
> > > riddance at least for amdgpu.
> > 
> > Hui? atomic_xchg() is perfectly ordered as far as I know.
> 
> Oh right, if atomic_ returns a value it's fully ordered. Nice tour into
> memory-barriers.txt. It's the atomic_t operations that don't return
> anything which are fully unordered, and I mixed that up.
> 
> > IIRC Alex added this for R600 because we had boards where interrupts where
> > sporadically swallowed during driver startup and we had to kick of ring
> > buffer processing manually.
> > 
> > Anyway we have a fence processing fallback timer in amdgpu instead and that
> > stuff is probably unused on any modern hardware.
> 
> Ah yes that stuff. Kinda annoying we have these old dma_fence around where
> dma_fence_wait doesn't really work that well, but oh well.

Argh I'm not awake. dma_fence_wait() works well on these, but the generic
cb stuff (which is used in ever more place) doesn't, because it misses the
fallback timer the ->wait does.
-Daniel

> > > Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> > 
> > Thanks,
> > Christian.
> > 
> > > 
> > > > ---
> > > >   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 -
> > > >   drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c     | 5 -----
> > > >   drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h     | 1 -
> > > >   3 files changed, 7 deletions(-)
> > > > 
> > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > > index a15f1b604733..886625fb464b 100644
> > > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > > @@ -3284,7 +3284,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
> > > >   	/* mutex initialization are all done here so we
> > > >   	 * can recall function without having locking issues */
> > > > -	atomic_set(&adev->irq.ih.lock, 0);
> > > >   	mutex_init(&adev->firmware.mutex);
> > > >   	mutex_init(&adev->pm.mutex);
> > > >   	mutex_init(&adev->gfx.gpu_clock_mutex);
> > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> > > > index 1024065f1f03..faaa6aa2faaf 100644
> > > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> > > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> > > > @@ -228,10 +228,6 @@ int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
> > > >   	wptr = amdgpu_ih_get_wptr(adev, ih);
> > > >   restart_ih:
> > > > -	/* is somebody else already processing irqs? */
> > > > -	if (atomic_xchg(&ih->lock, 1))
> > > > -		return IRQ_NONE;
> > > > -
> > > >   	DRM_DEBUG("%s: rptr %d, wptr %d\n", __func__, ih->rptr, wptr);
> > > >   	/* Order reading of wptr vs. reading of IH ring data */
> > > > @@ -244,7 +240,6 @@ int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
> > > >   	amdgpu_ih_set_rptr(adev, ih);
> > > >   	wake_up_all(&ih->wait_process);
> > > > -	atomic_set(&ih->lock, 0);
> > > >   	/* make sure wptr hasn't changed while processing */
> > > >   	wptr = amdgpu_ih_get_wptr(adev, ih);
> > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> > > > index 87ec6d20dbe0..0649b59830a5 100644
> > > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> > > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> > > > @@ -64,7 +64,6 @@ struct amdgpu_ih_ring {
> > > >   	bool                    enabled;
> > > >   	unsigned		rptr;
> > > > -	atomic_t		lock;
> > > >   	struct amdgpu_ih_regs	ih_regs;
> > > >   	/* For waiting on IH processing at checkpoint. */
> > > > -- 
> > > > 2.25.1
> > > > 
> > > > _______________________________________________
> > > > dri-devel mailing list
> > > > dri-devel@lists.freedesktop.org
> > > > https://lists.freedesktop.org/mailman/listinfo/dri-devel
> > 
> 
> -- 
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] drm/amdgpu: nuke the ih reentrant lock
  2021-03-12 14:36       ` Daniel Vetter
@ 2021-03-12 14:50         ` Christian König
  2021-03-16  9:32           ` Daniel Vetter
  0 siblings, 1 reply; 7+ messages in thread
From: Christian König @ 2021-03-12 14:50 UTC (permalink / raw
  To: Daniel Vetter; +Cc: dri-devel

Am 12.03.21 um 15:36 schrieb Daniel Vetter:
> On Fri, Mar 12, 2021 at 03:35:50PM +0100, Daniel Vetter wrote:
>> On Fri, Mar 12, 2021 at 03:27:58PM +0100, Christian König wrote:
>>>
>>> Am 12.03.21 um 15:04 schrieb Daniel Vetter:
>>>> On Fri, Mar 12, 2021 at 02:59:06PM +0100, Christian König wrote:
>>>>> Interrupts on are non-reentrant on linux. This is just an ancient
>>>>> leftover from radeon where irq processing was kicked of from different
>>>>> places.
>>>>>
>>>>> Signed-off-by: Christian König <christian.koenig@amd.com>
>>>> Man you tricked me into grepping this on radeon and it looks horrible.
>>>> atomic_t is unordered in linux, so whatever was built there for radeon
>>>> does not wokr like a lock. It's missing all the barriers afiui. Good
>>>> riddance at least for amdgpu.
>>> Hui? atomic_xchg() is perfectly ordered as far as I know.
>> Oh right, if atomic_ returns a value it's fully ordered. Nice tour into
>> memory-barriers.txt. It's the atomic_t operations that don't return
>> anything which are fully unordered, and I mixed that up.
>>
>>> IIRC Alex added this for R600 because we had boards where interrupts where
>>> sporadically swallowed during driver startup and we had to kick of ring
>>> buffer processing manually.
>>>
>>> Anyway we have a fence processing fallback timer in amdgpu instead and that
>>> stuff is probably unused on any modern hardware.
>> Ah yes that stuff. Kinda annoying we have these old dma_fence around where
>> dma_fence_wait doesn't really work that well, but oh well.
> Argh I'm not awake. dma_fence_wait() works well on these, but the generic
> cb stuff (which is used in ever more place) doesn't, because it misses the
> fallback timer the ->wait does.

That's not what I meant. See amdgpu_fence_schedule_fallback().

As soon as somebody requested a dma_fence to be signaled we start a 
500ms timeout for fence processing which is cleared as soon as we see 
the first interrupt.

That is necessary because some motherboards have the ugly behavior of 
swallowing approx ~0.1% if all MSI interrupts after some idle time.

No idea where exactly that comes from, but I have the feeling that this 
was the same bug Alex tried to handle on R6xx over 10 years ago with 
kicking of interrupt processing manually after enabling interrupts for 
the first time.

Regards,
Christian.

> -Daniel
>
>>>> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
>>> Thanks,
>>> Christian.
>>>
>>>>> ---
>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 -
>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c     | 5 -----
>>>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h     | 1 -
>>>>>    3 files changed, 7 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>> index a15f1b604733..886625fb464b 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>>>>> @@ -3284,7 +3284,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
>>>>>    	/* mutex initialization are all done here so we
>>>>>    	 * can recall function without having locking issues */
>>>>> -	atomic_set(&adev->irq.ih.lock, 0);
>>>>>    	mutex_init(&adev->firmware.mutex);
>>>>>    	mutex_init(&adev->pm.mutex);
>>>>>    	mutex_init(&adev->gfx.gpu_clock_mutex);
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
>>>>> index 1024065f1f03..faaa6aa2faaf 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
>>>>> @@ -228,10 +228,6 @@ int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
>>>>>    	wptr = amdgpu_ih_get_wptr(adev, ih);
>>>>>    restart_ih:
>>>>> -	/* is somebody else already processing irqs? */
>>>>> -	if (atomic_xchg(&ih->lock, 1))
>>>>> -		return IRQ_NONE;
>>>>> -
>>>>>    	DRM_DEBUG("%s: rptr %d, wptr %d\n", __func__, ih->rptr, wptr);
>>>>>    	/* Order reading of wptr vs. reading of IH ring data */
>>>>> @@ -244,7 +240,6 @@ int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
>>>>>    	amdgpu_ih_set_rptr(adev, ih);
>>>>>    	wake_up_all(&ih->wait_process);
>>>>> -	atomic_set(&ih->lock, 0);
>>>>>    	/* make sure wptr hasn't changed while processing */
>>>>>    	wptr = amdgpu_ih_get_wptr(adev, ih);
>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
>>>>> index 87ec6d20dbe0..0649b59830a5 100644
>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
>>>>> @@ -64,7 +64,6 @@ struct amdgpu_ih_ring {
>>>>>    	bool                    enabled;
>>>>>    	unsigned		rptr;
>>>>> -	atomic_t		lock;
>>>>>    	struct amdgpu_ih_regs	ih_regs;
>>>>>    	/* For waiting on IH processing at checkpoint. */
>>>>> -- 
>>>>> 2.25.1
>>>>>
>>>>> _______________________________________________
>>>>> dri-devel mailing list
>>>>> dri-devel@lists.freedesktop.org
>>>>> https://lists.freedesktop.org/mailman/listinfo/dri-devel
>> -- 
>> Daniel Vetter
>> Software Engineer, Intel Corporation
>> http://blog.ffwll.ch

_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] drm/amdgpu: nuke the ih reentrant lock
  2021-03-12 14:50         ` Christian König
@ 2021-03-16  9:32           ` Daniel Vetter
  0 siblings, 0 replies; 7+ messages in thread
From: Daniel Vetter @ 2021-03-16  9:32 UTC (permalink / raw
  To: Christian König; +Cc: dri-devel

On Fri, Mar 12, 2021 at 03:50:35PM +0100, Christian König wrote:
> Am 12.03.21 um 15:36 schrieb Daniel Vetter:
> > On Fri, Mar 12, 2021 at 03:35:50PM +0100, Daniel Vetter wrote:
> > > On Fri, Mar 12, 2021 at 03:27:58PM +0100, Christian König wrote:
> > > > 
> > > > Am 12.03.21 um 15:04 schrieb Daniel Vetter:
> > > > > On Fri, Mar 12, 2021 at 02:59:06PM +0100, Christian König wrote:
> > > > > > Interrupts on are non-reentrant on linux. This is just an ancient
> > > > > > leftover from radeon where irq processing was kicked of from different
> > > > > > places.
> > > > > > 
> > > > > > Signed-off-by: Christian König <christian.koenig@amd.com>
> > > > > Man you tricked me into grepping this on radeon and it looks horrible.
> > > > > atomic_t is unordered in linux, so whatever was built there for radeon
> > > > > does not wokr like a lock. It's missing all the barriers afiui. Good
> > > > > riddance at least for amdgpu.
> > > > Hui? atomic_xchg() is perfectly ordered as far as I know.
> > > Oh right, if atomic_ returns a value it's fully ordered. Nice tour into
> > > memory-barriers.txt. It's the atomic_t operations that don't return
> > > anything which are fully unordered, and I mixed that up.
> > > 
> > > > IIRC Alex added this for R600 because we had boards where interrupts where
> > > > sporadically swallowed during driver startup and we had to kick of ring
> > > > buffer processing manually.
> > > > 
> > > > Anyway we have a fence processing fallback timer in amdgpu instead and that
> > > > stuff is probably unused on any modern hardware.
> > > Ah yes that stuff. Kinda annoying we have these old dma_fence around where
> > > dma_fence_wait doesn't really work that well, but oh well.
> > Argh I'm not awake. dma_fence_wait() works well on these, but the generic
> > cb stuff (which is used in ever more place) doesn't, because it misses the
> > fallback timer the ->wait does.
> 
> That's not what I meant. See amdgpu_fence_schedule_fallback().
> 
> As soon as somebody requested a dma_fence to be signaled we start a 500ms
> timeout for fence processing which is cleared as soon as we see the first
> interrupt.
> 
> That is necessary because some motherboards have the ugly behavior of
> swallowing approx ~0.1% if all MSI interrupts after some idle time.
> 
> No idea where exactly that comes from, but I have the feeling that this was
> the same bug Alex tried to handle on R6xx over 10 years ago with kicking of
> interrupt processing manually after enabling interrupts for the first time.

That's the same thing I mean. I think at least on radeon this is handled
by periodically waking up in the ->wait callback, but unfortunately that
doesn't take care of any of the callback based fence waiters. Maybe
there's a different reason on radeon for the lost interrupts, but I
thought consequences are the same. I think i915 also had some hacks like
that, not sure why it still has a ->wait callback tbh. Maybe that's still
needed for handling the gpu recovery flow on gen2/3, which is rather
non-standard and violates dma_fence nesting rules a bit. But oh well,
that's real old stuff.
-Daniel

> 
> Regards,
> Christian.
> 
> > -Daniel
> > 
> > > > > Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
> > > > Thanks,
> > > > Christian.
> > > > 
> > > > > > ---
> > > > > >    drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 -
> > > > > >    drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c     | 5 -----
> > > > > >    drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h     | 1 -
> > > > > >    3 files changed, 7 deletions(-)
> > > > > > 
> > > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > > > > index a15f1b604733..886625fb464b 100644
> > > > > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > > > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> > > > > > @@ -3284,7 +3284,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
> > > > > >    	/* mutex initialization are all done here so we
> > > > > >    	 * can recall function without having locking issues */
> > > > > > -	atomic_set(&adev->irq.ih.lock, 0);
> > > > > >    	mutex_init(&adev->firmware.mutex);
> > > > > >    	mutex_init(&adev->pm.mutex);
> > > > > >    	mutex_init(&adev->gfx.gpu_clock_mutex);
> > > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> > > > > > index 1024065f1f03..faaa6aa2faaf 100644
> > > > > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> > > > > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> > > > > > @@ -228,10 +228,6 @@ int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
> > > > > >    	wptr = amdgpu_ih_get_wptr(adev, ih);
> > > > > >    restart_ih:
> > > > > > -	/* is somebody else already processing irqs? */
> > > > > > -	if (atomic_xchg(&ih->lock, 1))
> > > > > > -		return IRQ_NONE;
> > > > > > -
> > > > > >    	DRM_DEBUG("%s: rptr %d, wptr %d\n", __func__, ih->rptr, wptr);
> > > > > >    	/* Order reading of wptr vs. reading of IH ring data */
> > > > > > @@ -244,7 +240,6 @@ int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
> > > > > >    	amdgpu_ih_set_rptr(adev, ih);
> > > > > >    	wake_up_all(&ih->wait_process);
> > > > > > -	atomic_set(&ih->lock, 0);
> > > > > >    	/* make sure wptr hasn't changed while processing */
> > > > > >    	wptr = amdgpu_ih_get_wptr(adev, ih);
> > > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> > > > > > index 87ec6d20dbe0..0649b59830a5 100644
> > > > > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> > > > > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
> > > > > > @@ -64,7 +64,6 @@ struct amdgpu_ih_ring {
> > > > > >    	bool                    enabled;
> > > > > >    	unsigned		rptr;
> > > > > > -	atomic_t		lock;
> > > > > >    	struct amdgpu_ih_regs	ih_regs;
> > > > > >    	/* For waiting on IH processing at checkpoint. */
> > > > > > -- 
> > > > > > 2.25.1
> > > > > > 
> > > > > > _______________________________________________
> > > > > > dri-devel mailing list
> > > > > > dri-devel@lists.freedesktop.org
> > > > > > https://lists.freedesktop.org/mailman/listinfo/dri-devel
> > > -- 
> > > Daniel Vetter
> > > Software Engineer, Intel Corporation
> > > http://blog.ffwll.ch
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/dri-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2021-03-16  9:32 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2021-03-12 13:59 [PATCH] drm/amdgpu: nuke the ih reentrant lock Christian König
2021-03-12 14:04 ` Daniel Vetter
2021-03-12 14:27   ` Christian König
2021-03-12 14:35     ` Daniel Vetter
2021-03-12 14:36       ` Daniel Vetter
2021-03-12 14:50         ` Christian König
2021-03-16  9:32           ` Daniel Vetter

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).