All the mail mirrored from lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/amdgpu: reserve GDS resources on the gfx ring for gfx10
@ 2019-07-17  0:06 Marek Olšák
       [not found] ` <20190717000606.7181-1-maraeo-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 6+ messages in thread
From: Marek Olšák @ 2019-07-17  0:06 UTC (permalink / raw
  To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

From: Marek Olšák <marek.olsak@amd.com>

Hopefully we'll only use 1 gfx ring, because otherwise we'd have to have
separate GDS buffers for each gfx ring.

This is a workaround to ensure stability of transform feedback. Shaders hang
waiting for a GDS instruction (ds_sub, not ds_ordered_count).

The disadvantage is that compute IBs might get a different VMID,
because now gfx always has GDS and compute doesn't.

Signed-off-by: Marek Olšák <marek.olsak@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 10 ++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h |  6 ++++++
 drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 20 ++++++++++++++++++++
 4 files changed, 37 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 4b514a44184c..cbd55d061b72 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -456,20 +456,21 @@ struct amdgpu_cs_parser {
 	struct drm_file		*filp;
 	struct amdgpu_ctx	*ctx;
 
 	/* chunks */
 	unsigned		nchunks;
 	struct amdgpu_cs_chunk	*chunks;
 
 	/* scheduler job object */
 	struct amdgpu_job	*job;
 	struct drm_sched_entity	*entity;
+	unsigned		hw_ip;
 
 	/* buffer objects */
 	struct ww_acquire_ctx		ticket;
 	struct amdgpu_bo_list		*bo_list;
 	struct amdgpu_mn		*mn;
 	struct amdgpu_bo_list_entry	vm_pd;
 	struct list_head		validated;
 	struct dma_fence		*fence;
 	uint64_t			bytes_moved_threshold;
 	uint64_t			bytes_moved_vis_threshold;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index c691df6f7a57..9125cd69a124 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -678,20 +678,28 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 	if (r)
 		goto error_validate;
 
 	amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
 				     p->bytes_moved_vis);
 
 	gds = p->bo_list->gds_obj;
 	gws = p->bo_list->gws_obj;
 	oa = p->bo_list->oa_obj;
 
+	if (p->hw_ip == AMDGPU_HW_IP_GFX) {
+		/* Only gfx10 allocates these. */
+		if (!gds)
+			gds = p->adev->gds.gds_gfx_bo;
+		if (!oa)
+			oa = p->adev->gds.oa_gfx_bo;
+	}
+
 	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
 		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
 
 		/* Make sure we use the exclusive slot for shared BOs */
 		if (bo->prime_shared_count)
 			e->tv.num_shared = 0;
 		e->bo_va = amdgpu_vm_bo_find(vm, bo);
 	}
 
 	if (gds) {
@@ -954,20 +962,22 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 		struct drm_amdgpu_cs_chunk_ib *chunk_ib;
 		struct drm_sched_entity *entity;
 
 		chunk = &parser->chunks[i];
 		ib = &parser->job->ibs[j];
 		chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
 
 		if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
 			continue;
 
+		parser->hw_ip = chunk_ib->ip_type;
+
 		if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
 		    (amdgpu_mcbp || amdgpu_sriov_vf(adev))) {
 			if (chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
 				if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
 					ce_preempt++;
 				else
 					de_preempt++;
 			}
 
 			/* each GFX command submit allows 0 or 1 IB preemptible for CE & DE */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
index df8a23554831..0943b8e1d97e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
@@ -26,20 +26,26 @@
 
 struct amdgpu_ring;
 struct amdgpu_bo;
 
 struct amdgpu_gds {
 	uint32_t gds_size;
 	uint32_t gws_size;
 	uint32_t oa_size;
 	uint32_t gds_compute_max_wave_id;
 	uint32_t vgt_gs_max_wave_id;
+
+	/* Reserved OA for the gfx ring. (gfx10) */
+	uint32_t gds_gfx_reservation_size;
+	uint32_t oa_gfx_reservation_size;
+	struct amdgpu_bo *gds_gfx_bo;
+	struct amdgpu_bo *oa_gfx_bo;
 };
 
 struct amdgpu_gds_reg_offset {
 	uint32_t	mem_base;
 	uint32_t	mem_size;
 	uint32_t	gws;
 	uint32_t	oa;
 };
 
 #endif /* __AMDGPU_GDS_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 618291df659b..3952754c04ff 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -1314,20 +1314,33 @@ static int gfx_v10_0_sw_init(void *handle)
 
 	kiq = &adev->gfx.kiq;
 	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
 	if (r)
 		return r;
 
 	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v10_compute_mqd));
 	if (r)
 		return r;
 
+	/* allocate reserved GDS resources for transform feedback */
+	r = amdgpu_bo_create_kernel(adev, adev->gds.gds_gfx_reservation_size,
+				    4, AMDGPU_GEM_DOMAIN_GDS,
+				    &adev->gds.gds_gfx_bo, NULL, NULL);
+	if (r)
+		return r;
+
+	r = amdgpu_bo_create_kernel(adev, adev->gds.oa_gfx_reservation_size,
+				    1, AMDGPU_GEM_DOMAIN_OA,
+				    &adev->gds.oa_gfx_bo, NULL, NULL);
+	if (r)
+		return r;
+
 	/* allocate visible FB for rlc auto-loading fw */
 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
 		r = gfx_v10_0_rlc_backdoor_autoload_buffer_init(adev);
 		if (r)
 			return r;
 	}
 
 	adev->gfx.ce_ram_size = F32_CE_PROGRAM_RAM_SIZE;
 
 	gfx_v10_0_gpu_early_init(adev);
@@ -1354,20 +1367,23 @@ static void gfx_v10_0_me_fini(struct amdgpu_device *adev)
 	amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj,
 			      &adev->gfx.me.me_fw_gpu_addr,
 			      (void **)&adev->gfx.me.me_fw_ptr);
 }
 
 static int gfx_v10_0_sw_fini(void *handle)
 {
 	int i;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
+	amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
+
 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
 		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
 
 	amdgpu_gfx_mqd_sw_fini(adev);
 	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
 	amdgpu_gfx_kiq_fini(adev);
 
 	gfx_v10_0_pfp_fini(adev);
@@ -5181,20 +5197,24 @@ static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev)
 	case CHIP_NAVI10:
 	default:
 		adev->gds.gds_size = 0x10000;
 		adev->gds.gds_compute_max_wave_id = 0x4ff;
 		adev->gds.vgt_gs_max_wave_id = 0x3ff;
 		break;
 	}
 
 	adev->gds.gws_size = 64;
 	adev->gds.oa_size = 16;
+
+	/* Reserved for transform feedback. */
+	adev->gds.gds_gfx_reservation_size = 256;
+	adev->gds.oa_gfx_reservation_size = 4;
 }
 
 static void gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev,
 							  u32 bitmap)
 {
 	u32 data;
 
 	if (!bitmap)
 		return;
 
-- 
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] drm/amdgpu: reserve GDS resources on the gfx ring for gfx10
       [not found] ` <20190717000606.7181-1-maraeo-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2019-07-17  7:15   ` Christian König
       [not found]     ` <26330d7e-3cc0-f460-b7e0-b234597f382d-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 6+ messages in thread
From: Christian König @ 2019-07-17  7:15 UTC (permalink / raw
  To: Marek Olšák, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
	Felix Kuehling

Am 17.07.19 um 02:06 schrieb Marek Olšák:
> From: Marek Olšák <marek.olsak@amd.com>
>
> Hopefully we'll only use 1 gfx ring, because otherwise we'd have to have
> separate GDS buffers for each gfx ring.
>
> This is a workaround to ensure stability of transform feedback. Shaders hang
> waiting for a GDS instruction (ds_sub, not ds_ordered_count).
>
> The disadvantage is that compute IBs might get a different VMID,
> because now gfx always has GDS and compute doesn't.

So far compute is only using GWS, but I don't think that those 
reservations are a good idea in general.

How severe is the ENOMEM problem you see with using an explicit GWS 
allocation?

Regards,
Christian.

>
> Signed-off-by: Marek Olšák <marek.olsak@amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 10 ++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h |  6 ++++++
>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 20 ++++++++++++++++++++
>   4 files changed, 37 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 4b514a44184c..cbd55d061b72 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -456,20 +456,21 @@ struct amdgpu_cs_parser {
>   	struct drm_file		*filp;
>   	struct amdgpu_ctx	*ctx;
>   
>   	/* chunks */
>   	unsigned		nchunks;
>   	struct amdgpu_cs_chunk	*chunks;
>   
>   	/* scheduler job object */
>   	struct amdgpu_job	*job;
>   	struct drm_sched_entity	*entity;
> +	unsigned		hw_ip;
>   
>   	/* buffer objects */
>   	struct ww_acquire_ctx		ticket;
>   	struct amdgpu_bo_list		*bo_list;
>   	struct amdgpu_mn		*mn;
>   	struct amdgpu_bo_list_entry	vm_pd;
>   	struct list_head		validated;
>   	struct dma_fence		*fence;
>   	uint64_t			bytes_moved_threshold;
>   	uint64_t			bytes_moved_vis_threshold;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index c691df6f7a57..9125cd69a124 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -678,20 +678,28 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>   	if (r)
>   		goto error_validate;
>   
>   	amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
>   				     p->bytes_moved_vis);
>   
>   	gds = p->bo_list->gds_obj;
>   	gws = p->bo_list->gws_obj;
>   	oa = p->bo_list->oa_obj;
>   
> +	if (p->hw_ip == AMDGPU_HW_IP_GFX) {
> +		/* Only gfx10 allocates these. */
> +		if (!gds)
> +			gds = p->adev->gds.gds_gfx_bo;
> +		if (!oa)
> +			oa = p->adev->gds.oa_gfx_bo;
> +	}
> +
>   	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
>   		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
>   
>   		/* Make sure we use the exclusive slot for shared BOs */
>   		if (bo->prime_shared_count)
>   			e->tv.num_shared = 0;
>   		e->bo_va = amdgpu_vm_bo_find(vm, bo);
>   	}
>   
>   	if (gds) {
> @@ -954,20 +962,22 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
>   		struct drm_amdgpu_cs_chunk_ib *chunk_ib;
>   		struct drm_sched_entity *entity;
>   
>   		chunk = &parser->chunks[i];
>   		ib = &parser->job->ibs[j];
>   		chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
>   
>   		if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
>   			continue;
>   
> +		parser->hw_ip = chunk_ib->ip_type;
> +
>   		if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
>   		    (amdgpu_mcbp || amdgpu_sriov_vf(adev))) {
>   			if (chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
>   				if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
>   					ce_preempt++;
>   				else
>   					de_preempt++;
>   			}
>   
>   			/* each GFX command submit allows 0 or 1 IB preemptible for CE & DE */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
> index df8a23554831..0943b8e1d97e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
> @@ -26,20 +26,26 @@
>   
>   struct amdgpu_ring;
>   struct amdgpu_bo;
>   
>   struct amdgpu_gds {
>   	uint32_t gds_size;
>   	uint32_t gws_size;
>   	uint32_t oa_size;
>   	uint32_t gds_compute_max_wave_id;
>   	uint32_t vgt_gs_max_wave_id;
> +
> +	/* Reserved OA for the gfx ring. (gfx10) */
> +	uint32_t gds_gfx_reservation_size;
> +	uint32_t oa_gfx_reservation_size;
> +	struct amdgpu_bo *gds_gfx_bo;
> +	struct amdgpu_bo *oa_gfx_bo;
>   };
>   
>   struct amdgpu_gds_reg_offset {
>   	uint32_t	mem_base;
>   	uint32_t	mem_size;
>   	uint32_t	gws;
>   	uint32_t	oa;
>   };
>   
>   #endif /* __AMDGPU_GDS_H__ */
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 618291df659b..3952754c04ff 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -1314,20 +1314,33 @@ static int gfx_v10_0_sw_init(void *handle)
>   
>   	kiq = &adev->gfx.kiq;
>   	r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
>   	if (r)
>   		return r;
>   
>   	r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v10_compute_mqd));
>   	if (r)
>   		return r;
>   
> +	/* allocate reserved GDS resources for transform feedback */
> +	r = amdgpu_bo_create_kernel(adev, adev->gds.gds_gfx_reservation_size,
> +				    4, AMDGPU_GEM_DOMAIN_GDS,
> +				    &adev->gds.gds_gfx_bo, NULL, NULL);
> +	if (r)
> +		return r;
> +
> +	r = amdgpu_bo_create_kernel(adev, adev->gds.oa_gfx_reservation_size,
> +				    1, AMDGPU_GEM_DOMAIN_OA,
> +				    &adev->gds.oa_gfx_bo, NULL, NULL);
> +	if (r)
> +		return r;
> +
>   	/* allocate visible FB for rlc auto-loading fw */
>   	if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
>   		r = gfx_v10_0_rlc_backdoor_autoload_buffer_init(adev);
>   		if (r)
>   			return r;
>   	}
>   
>   	adev->gfx.ce_ram_size = F32_CE_PROGRAM_RAM_SIZE;
>   
>   	gfx_v10_0_gpu_early_init(adev);
> @@ -1354,20 +1367,23 @@ static void gfx_v10_0_me_fini(struct amdgpu_device *adev)
>   	amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj,
>   			      &adev->gfx.me.me_fw_gpu_addr,
>   			      (void **)&adev->gfx.me.me_fw_ptr);
>   }
>   
>   static int gfx_v10_0_sw_fini(void *handle)
>   {
>   	int i;
>   	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>   
> +	amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
> +	amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
> +
>   	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
>   		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
>   	for (i = 0; i < adev->gfx.num_compute_rings; i++)
>   		amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
>   
>   	amdgpu_gfx_mqd_sw_fini(adev);
>   	amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
>   	amdgpu_gfx_kiq_fini(adev);
>   
>   	gfx_v10_0_pfp_fini(adev);
> @@ -5181,20 +5197,24 @@ static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev)
>   	case CHIP_NAVI10:
>   	default:
>   		adev->gds.gds_size = 0x10000;
>   		adev->gds.gds_compute_max_wave_id = 0x4ff;
>   		adev->gds.vgt_gs_max_wave_id = 0x3ff;
>   		break;
>   	}
>   
>   	adev->gds.gws_size = 64;
>   	adev->gds.oa_size = 16;
> +
> +	/* Reserved for transform feedback. */
> +	adev->gds.gds_gfx_reservation_size = 256;
> +	adev->gds.oa_gfx_reservation_size = 4;
>   }
>   
>   static void gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev,
>   							  u32 bitmap)
>   {
>   	u32 data;
>   
>   	if (!bitmap)
>   		return;
>   

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] drm/amdgpu: reserve GDS resources on the gfx ring for gfx10
       [not found]     ` <26330d7e-3cc0-f460-b7e0-b234597f382d-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2019-07-17 14:27       ` Marek Olšák
       [not found]         ` <CAAxE2A6s2SOhqhkZydtN7kzzFRzHKwRnSG-JgYGy_GceofHAsQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 6+ messages in thread
From: Marek Olšák @ 2019-07-17 14:27 UTC (permalink / raw
  To: Christian König; +Cc: Felix Kuehling, amd-gfx mailing list


[-- Attachment #1.1: Type: text/plain, Size: 9593 bytes --]

On Wed., Jul. 17, 2019, 03:15 Christian König, <
ckoenig.leichtzumerken-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> wrote:

> Am 17.07.19 um 02:06 schrieb Marek Olšák:
> > From: Marek Olšák <marek.olsak-5C7GfCeVMHo@public.gmane.org>
> >
> > Hopefully we'll only use 1 gfx ring, because otherwise we'd have to have
> > separate GDS buffers for each gfx ring.
> >
> > This is a workaround to ensure stability of transform feedback. Shaders
> hang
> > waiting for a GDS instruction (ds_sub, not ds_ordered_count).
> >
> > The disadvantage is that compute IBs might get a different VMID,
> > because now gfx always has GDS and compute doesn't.
>
> So far compute is only using GWS, but I don't think that those
> reservations are a good idea in general.
>
> How severe is the ENOMEM problem you see with using an explicit GWS
> allocation?
>

I guess you mean GDS or OA.

There is no ENOMEM, it just hangs. I don't know why. The shader is waiting
for ds_sub and can't continue, but GDS is idle.

Marek


> Regards,
> Christian.
>
> >
> > Signed-off-by: Marek Olšák <marek.olsak-5C7GfCeVMHo@public.gmane.org>
> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  1 +
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 10 ++++++++++
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h |  6 ++++++
> >   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 20 ++++++++++++++++++++
> >   4 files changed, 37 insertions(+)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > index 4b514a44184c..cbd55d061b72 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> > @@ -456,20 +456,21 @@ struct amdgpu_cs_parser {
> >       struct drm_file         *filp;
> >       struct amdgpu_ctx       *ctx;
> >
> >       /* chunks */
> >       unsigned                nchunks;
> >       struct amdgpu_cs_chunk  *chunks;
> >
> >       /* scheduler job object */
> >       struct amdgpu_job       *job;
> >       struct drm_sched_entity *entity;
> > +     unsigned                hw_ip;
> >
> >       /* buffer objects */
> >       struct ww_acquire_ctx           ticket;
> >       struct amdgpu_bo_list           *bo_list;
> >       struct amdgpu_mn                *mn;
> >       struct amdgpu_bo_list_entry     vm_pd;
> >       struct list_head                validated;
> >       struct dma_fence                *fence;
> >       uint64_t                        bytes_moved_threshold;
> >       uint64_t                        bytes_moved_vis_threshold;
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > index c691df6f7a57..9125cd69a124 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> > @@ -678,20 +678,28 @@ static int amdgpu_cs_parser_bos(struct
> amdgpu_cs_parser *p,
> >       if (r)
> >               goto error_validate;
> >
> >       amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
> >                                    p->bytes_moved_vis);
> >
> >       gds = p->bo_list->gds_obj;
> >       gws = p->bo_list->gws_obj;
> >       oa = p->bo_list->oa_obj;
> >
> > +     if (p->hw_ip == AMDGPU_HW_IP_GFX) {
> > +             /* Only gfx10 allocates these. */
> > +             if (!gds)
> > +                     gds = p->adev->gds.gds_gfx_bo;
> > +             if (!oa)
> > +                     oa = p->adev->gds.oa_gfx_bo;
> > +     }
> > +
> >       amdgpu_bo_list_for_each_entry(e, p->bo_list) {
> >               struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
> >
> >               /* Make sure we use the exclusive slot for shared BOs */
> >               if (bo->prime_shared_count)
> >                       e->tv.num_shared = 0;
> >               e->bo_va = amdgpu_vm_bo_find(vm, bo);
> >       }
> >
> >       if (gds) {
> > @@ -954,20 +962,22 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device
> *adev,
> >               struct drm_amdgpu_cs_chunk_ib *chunk_ib;
> >               struct drm_sched_entity *entity;
> >
> >               chunk = &parser->chunks[i];
> >               ib = &parser->job->ibs[j];
> >               chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
> >
> >               if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
> >                       continue;
> >
> > +             parser->hw_ip = chunk_ib->ip_type;
> > +
> >               if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
> >                   (amdgpu_mcbp || amdgpu_sriov_vf(adev))) {
> >                       if (chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
> >                               if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
> >                                       ce_preempt++;
> >                               else
> >                                       de_preempt++;
> >                       }
> >
> >                       /* each GFX command submit allows 0 or 1 IB
> preemptible for CE & DE */
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
> > index df8a23554831..0943b8e1d97e 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
> > @@ -26,20 +26,26 @@
> >
> >   struct amdgpu_ring;
> >   struct amdgpu_bo;
> >
> >   struct amdgpu_gds {
> >       uint32_t gds_size;
> >       uint32_t gws_size;
> >       uint32_t oa_size;
> >       uint32_t gds_compute_max_wave_id;
> >       uint32_t vgt_gs_max_wave_id;
> > +
> > +     /* Reserved OA for the gfx ring. (gfx10) */
> > +     uint32_t gds_gfx_reservation_size;
> > +     uint32_t oa_gfx_reservation_size;
> > +     struct amdgpu_bo *gds_gfx_bo;
> > +     struct amdgpu_bo *oa_gfx_bo;
> >   };
> >
> >   struct amdgpu_gds_reg_offset {
> >       uint32_t        mem_base;
> >       uint32_t        mem_size;
> >       uint32_t        gws;
> >       uint32_t        oa;
> >   };
> >
> >   #endif /* __AMDGPU_GDS_H__ */
> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > index 618291df659b..3952754c04ff 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> > @@ -1314,20 +1314,33 @@ static int gfx_v10_0_sw_init(void *handle)
> >
> >       kiq = &adev->gfx.kiq;
> >       r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
> >       if (r)
> >               return r;
> >
> >       r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v10_compute_mqd));
> >       if (r)
> >               return r;
> >
> > +     /* allocate reserved GDS resources for transform feedback */
> > +     r = amdgpu_bo_create_kernel(adev,
> adev->gds.gds_gfx_reservation_size,
> > +                                 4, AMDGPU_GEM_DOMAIN_GDS,
> > +                                 &adev->gds.gds_gfx_bo, NULL, NULL);
> > +     if (r)
> > +             return r;
> > +
> > +     r = amdgpu_bo_create_kernel(adev,
> adev->gds.oa_gfx_reservation_size,
> > +                                 1, AMDGPU_GEM_DOMAIN_OA,
> > +                                 &adev->gds.oa_gfx_bo, NULL, NULL);
> > +     if (r)
> > +             return r;
> > +
> >       /* allocate visible FB for rlc auto-loading fw */
> >       if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
> >               r = gfx_v10_0_rlc_backdoor_autoload_buffer_init(adev);
> >               if (r)
> >                       return r;
> >       }
> >
> >       adev->gfx.ce_ram_size = F32_CE_PROGRAM_RAM_SIZE;
> >
> >       gfx_v10_0_gpu_early_init(adev);
> > @@ -1354,20 +1367,23 @@ static void gfx_v10_0_me_fini(struct
> amdgpu_device *adev)
> >       amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj,
> >                             &adev->gfx.me.me_fw_gpu_addr,
> >                             (void **)&adev->gfx.me.me_fw_ptr);
> >   }
> >
> >   static int gfx_v10_0_sw_fini(void *handle)
> >   {
> >       int i;
> >       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> >
> > +     amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
> > +     amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
> > +
> >       for (i = 0; i < adev->gfx.num_gfx_rings; i++)
> >               amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
> >       for (i = 0; i < adev->gfx.num_compute_rings; i++)
> >               amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
> >
> >       amdgpu_gfx_mqd_sw_fini(adev);
> >       amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
> >       amdgpu_gfx_kiq_fini(adev);
> >
> >       gfx_v10_0_pfp_fini(adev);
> > @@ -5181,20 +5197,24 @@ static void gfx_v10_0_set_gds_init(struct
> amdgpu_device *adev)
> >       case CHIP_NAVI10:
> >       default:
> >               adev->gds.gds_size = 0x10000;
> >               adev->gds.gds_compute_max_wave_id = 0x4ff;
> >               adev->gds.vgt_gs_max_wave_id = 0x3ff;
> >               break;
> >       }
> >
> >       adev->gds.gws_size = 64;
> >       adev->gds.oa_size = 16;
> > +
> > +     /* Reserved for transform feedback. */
> > +     adev->gds.gds_gfx_reservation_size = 256;
> > +     adev->gds.oa_gfx_reservation_size = 4;
> >   }
> >
> >   static void gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(struct
> amdgpu_device *adev,
> >                                                         u32 bitmap)
> >   {
> >       u32 data;
> >
> >       if (!bitmap)
> >               return;
> >
>
>

[-- Attachment #1.2: Type: text/html, Size: 12957 bytes --]

[-- Attachment #2: Type: text/plain, Size: 153 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] drm/amdgpu: reserve GDS resources on the gfx ring for gfx10
       [not found]         ` <CAAxE2A6s2SOhqhkZydtN7kzzFRzHKwRnSG-JgYGy_GceofHAsQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2019-07-17 14:43           ` Christian König
       [not found]             ` <57de127c-a6ac-c459-fc77-4da8578a41ae-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
  0 siblings, 1 reply; 6+ messages in thread
From: Christian König @ 2019-07-17 14:43 UTC (permalink / raw
  To: Marek Olšák, Christian König
  Cc: Felix Kuehling, amd-gfx mailing list


[-- Attachment #1.1: Type: text/plain, Size: 11694 bytes --]

Am 17.07.19 um 16:27 schrieb Marek Olšák:
>
>
> On Wed., Jul. 17, 2019, 03:15 Christian König, 
> <ckoenig.leichtzumerken-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org 
> <mailto:ckoenig.leichtzumerken-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>> wrote:
>
>     Am 17.07.19 um 02:06 schrieb Marek Olšák:
>     > From: Marek Olšák <marek.olsak-5C7GfCeVMHo@public.gmane.org <mailto:marek.olsak-5C7GfCeVMHo@public.gmane.org>>
>     >
>     > Hopefully we'll only use 1 gfx ring, because otherwise we'd have
>     to have
>     > separate GDS buffers for each gfx ring.
>     >
>     > This is a workaround to ensure stability of transform feedback.
>     Shaders hang
>     > waiting for a GDS instruction (ds_sub, not ds_ordered_count).
>     >
>     > The disadvantage is that compute IBs might get a different VMID,
>     > because now gfx always has GDS and compute doesn't.
>
>     So far compute is only using GWS, but I don't think that those
>     reservations are a good idea in general.
>
>     How severe is the ENOMEM problem you see with using an explicit GWS
>     allocation?
>
>
> I guess you mean GDS or OA.

Yeah, just a typo. Compute is using GWS and we want to use GDS and OA here.

> There is no ENOMEM, it just hangs. I don't know why. The shader is 
> waiting for ds_sub and can't continue, but GDS is idle.

Well could it be because we don't correctly handle non zero offsets or 
stuff like that?

Does it work with this hack when you don't allocate GDS/OA from the 
start? (Just allocate it twice or something like this).

Christian.

>
> Marek
>
>
>     Regards,
>     Christian.
>
>     >
>     > Signed-off-by: Marek Olšák <marek.olsak-5C7GfCeVMHo@public.gmane.org
>     <mailto:marek.olsak-5C7GfCeVMHo@public.gmane.org>>
>     > ---
>     >   drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  1 +
>     >   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 10 ++++++++++
>     >   drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h |  6 ++++++
>     >   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 20 ++++++++++++++++++++
>     >   4 files changed, 37 insertions(+)
>     >
>     > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>     b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>     > index 4b514a44184c..cbd55d061b72 100644
>     > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>     > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>     > @@ -456,20 +456,21 @@ struct amdgpu_cs_parser {
>     >       struct drm_file         *filp;
>     >       struct amdgpu_ctx       *ctx;
>     >
>     >       /* chunks */
>     >       unsigned                nchunks;
>     >       struct amdgpu_cs_chunk  *chunks;
>     >
>     >       /* scheduler job object */
>     >       struct amdgpu_job       *job;
>     >       struct drm_sched_entity *entity;
>     > +     unsigned                hw_ip;
>     >
>     >       /* buffer objects */
>     >       struct ww_acquire_ctx           ticket;
>     >       struct amdgpu_bo_list           *bo_list;
>     >       struct amdgpu_mn                *mn;
>     >       struct amdgpu_bo_list_entry     vm_pd;
>     >       struct list_head                validated;
>     >       struct dma_fence                *fence;
>     >       uint64_t bytes_moved_threshold;
>     >       uint64_t bytes_moved_vis_threshold;
>     > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>     b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>     > index c691df6f7a57..9125cd69a124 100644
>     > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>     > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>     > @@ -678,20 +678,28 @@ static int amdgpu_cs_parser_bos(struct
>     amdgpu_cs_parser *p,
>     >       if (r)
>     >               goto error_validate;
>     >
>     >       amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
>     > p->bytes_moved_vis);
>     >
>     >       gds = p->bo_list->gds_obj;
>     >       gws = p->bo_list->gws_obj;
>     >       oa = p->bo_list->oa_obj;
>     >
>     > +     if (p->hw_ip == AMDGPU_HW_IP_GFX) {
>     > +             /* Only gfx10 allocates these. */
>     > +             if (!gds)
>     > +                     gds = p->adev->gds.gds_gfx_bo;
>     > +             if (!oa)
>     > +                     oa = p->adev->gds.oa_gfx_bo;
>     > +     }
>     > +
>     >       amdgpu_bo_list_for_each_entry(e, p->bo_list) {
>     >               struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo
>     <http://tv.bo>);
>     >
>     >               /* Make sure we use the exclusive slot for shared
>     BOs */
>     >               if (bo->prime_shared_count)
>     >                       e->tv.num_shared = 0;
>     >               e->bo_va = amdgpu_vm_bo_find(vm, bo);
>     >       }
>     >
>     >       if (gds) {
>     > @@ -954,20 +962,22 @@ static int amdgpu_cs_ib_fill(struct
>     amdgpu_device *adev,
>     >               struct drm_amdgpu_cs_chunk_ib *chunk_ib;
>     >               struct drm_sched_entity *entity;
>     >
>     >               chunk = &parser->chunks[i];
>     >               ib = &parser->job->ibs[j];
>     >               chunk_ib = (struct drm_amdgpu_cs_chunk_ib
>     *)chunk->kdata;
>     >
>     >               if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
>     >                       continue;
>     >
>     > +             parser->hw_ip = chunk_ib->ip_type;
>     > +
>     >               if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
>     >                   (amdgpu_mcbp || amdgpu_sriov_vf(adev))) {
>     >                       if (chunk_ib->flags &
>     AMDGPU_IB_FLAG_PREEMPT) {
>     >                               if (chunk_ib->flags &
>     AMDGPU_IB_FLAG_CE)
>     >                                       ce_preempt++;
>     >                               else
>     >                                       de_preempt++;
>     >                       }
>     >
>     >                       /* each GFX command submit allows 0 or 1
>     IB preemptible for CE & DE */
>     > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
>     b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
>     > index df8a23554831..0943b8e1d97e 100644
>     > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
>     > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
>     > @@ -26,20 +26,26 @@
>     >
>     >   struct amdgpu_ring;
>     >   struct amdgpu_bo;
>     >
>     >   struct amdgpu_gds {
>     >       uint32_t gds_size;
>     >       uint32_t gws_size;
>     >       uint32_t oa_size;
>     >       uint32_t gds_compute_max_wave_id;
>     >       uint32_t vgt_gs_max_wave_id;
>     > +
>     > +     /* Reserved OA for the gfx ring. (gfx10) */
>     > +     uint32_t gds_gfx_reservation_size;
>     > +     uint32_t oa_gfx_reservation_size;
>     > +     struct amdgpu_bo *gds_gfx_bo;
>     > +     struct amdgpu_bo *oa_gfx_bo;
>     >   };
>     >
>     >   struct amdgpu_gds_reg_offset {
>     >       uint32_t        mem_base;
>     >       uint32_t        mem_size;
>     >       uint32_t        gws;
>     >       uint32_t        oa;
>     >   };
>     >
>     >   #endif /* __AMDGPU_GDS_H__ */
>     > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>     b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>     > index 618291df659b..3952754c04ff 100644
>     > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>     > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>     > @@ -1314,20 +1314,33 @@ static int gfx_v10_0_sw_init(void *handle)
>     >
>     >       kiq = &adev->gfx.kiq;
>     >       r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
>     >       if (r)
>     >               return r;
>     >
>     >       r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct
>     v10_compute_mqd));
>     >       if (r)
>     >               return r;
>     >
>     > +     /* allocate reserved GDS resources for transform feedback */
>     > +     r = amdgpu_bo_create_kernel(adev,
>     adev->gds.gds_gfx_reservation_size,
>     > +                                 4, AMDGPU_GEM_DOMAIN_GDS,
>     > +  &adev->gds.gds_gfx_bo, NULL, NULL);
>     > +     if (r)
>     > +             return r;
>     > +
>     > +     r = amdgpu_bo_create_kernel(adev,
>     adev->gds.oa_gfx_reservation_size,
>     > +                                 1, AMDGPU_GEM_DOMAIN_OA,
>     > +  &adev->gds.oa_gfx_bo, NULL, NULL);
>     > +     if (r)
>     > +             return r;
>     > +
>     >       /* allocate visible FB for rlc auto-loading fw */
>     >       if (adev->firmware.load_type ==
>     AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
>     >               r = gfx_v10_0_rlc_backdoor_autoload_buffer_init(adev);
>     >               if (r)
>     >                       return r;
>     >       }
>     >
>     >       adev->gfx.ce_ram_size = F32_CE_PROGRAM_RAM_SIZE;
>     >
>     >       gfx_v10_0_gpu_early_init(adev);
>     > @@ -1354,20 +1367,23 @@ static void gfx_v10_0_me_fini(struct
>     amdgpu_device *adev)
>     >  amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj,
>     >  &adev->gfx.me.me_fw_gpu_addr,
>     >                             (void **)&adev->gfx.me.me_fw_ptr);
>     >   }
>     >
>     >   static int gfx_v10_0_sw_fini(void *handle)
>     >   {
>     >       int i;
>     >       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>     >
>     > +  amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
>     > +  amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
>     > +
>     >       for (i = 0; i < adev->gfx.num_gfx_rings; i++)
>     >  amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
>     >       for (i = 0; i < adev->gfx.num_compute_rings; i++)
>     >  amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
>     >
>     >       amdgpu_gfx_mqd_sw_fini(adev);
>     >  amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
>     >       amdgpu_gfx_kiq_fini(adev);
>     >
>     >       gfx_v10_0_pfp_fini(adev);
>     > @@ -5181,20 +5197,24 @@ static void
>     gfx_v10_0_set_gds_init(struct amdgpu_device *adev)
>     >       case CHIP_NAVI10:
>     >       default:
>     >               adev->gds.gds_size = 0x10000;
>     >               adev->gds.gds_compute_max_wave_id = 0x4ff;
>     >               adev->gds.vgt_gs_max_wave_id = 0x3ff;
>     >               break;
>     >       }
>     >
>     >       adev->gds.gws_size = 64;
>     >       adev->gds.oa_size = 16;
>     > +
>     > +     /* Reserved for transform feedback. */
>     > +     adev->gds.gds_gfx_reservation_size = 256;
>     > +     adev->gds.oa_gfx_reservation_size = 4;
>     >   }
>     >
>     >   static void
>     gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device
>     *adev,
>     >    u32 bitmap)
>     >   {
>     >       u32 data;
>     >
>     >       if (!bitmap)
>     >               return;
>     >
>
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


[-- Attachment #1.2: Type: text/html, Size: 19236 bytes --]

[-- Attachment #2: Type: text/plain, Size: 153 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] drm/amdgpu: reserve GDS resources on the gfx ring for gfx10
       [not found]             ` <57de127c-a6ac-c459-fc77-4da8578a41ae-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
@ 2019-07-17 14:54               ` Marek Olšák
       [not found]                 ` <CAAxE2A67J0gN=ZV2t66zPc+E9kJDyFghd6oGobu4DLT5hvK1gA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 6+ messages in thread
From: Marek Olšák @ 2019-07-17 14:54 UTC (permalink / raw
  To: Christian König; +Cc: Felix Kuehling, amd-gfx mailing list


[-- Attachment #1.1: Type: text/plain, Size: 10686 bytes --]

On Wed., Jul. 17, 2019, 10:43 Christian König, <
ckoenig.leichtzumerken-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> wrote:

> Am 17.07.19 um 16:27 schrieb Marek Olšák:
>
>
>
> On Wed., Jul. 17, 2019, 03:15 Christian König, <
> ckoenig.leichtzumerken-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> wrote:
>
>> Am 17.07.19 um 02:06 schrieb Marek Olšák:
>> > From: Marek Olšák <marek.olsak-5C7GfCeVMHo@public.gmane.org>
>> >
>> > Hopefully we'll only use 1 gfx ring, because otherwise we'd have to have
>> > separate GDS buffers for each gfx ring.
>> >
>> > This is a workaround to ensure stability of transform feedback. Shaders
>> hang
>> > waiting for a GDS instruction (ds_sub, not ds_ordered_count).
>> >
>> > The disadvantage is that compute IBs might get a different VMID,
>> > because now gfx always has GDS and compute doesn't.
>>
>> So far compute is only using GWS, but I don't think that those
>> reservations are a good idea in general.
>>
>> How severe is the ENOMEM problem you see with using an explicit GWS
>> allocation?
>>
>
> I guess you mean GDS or OA.
>
>
> Yeah, just a typo. Compute is using GWS and we want to use GDS and OA here.
>
> There is no ENOMEM, it just hangs. I don't know why. The shader is waiting
> for ds_sub and can't continue, but GDS is idle.
>
>
> Well could it be because we don't correctly handle non zero offsets or
> stuff like that?
>

I don't know what you mean.


> Does it work with this hack when you don't allocate GDS/OA from the start?
> (Just allocate it twice or something like this).
>

It's only allocated once by the kernel with this hack.

Marek


> Christian.
>
>
> Marek
>
>
>> Regards,
>> Christian.
>>
>> >
>> > Signed-off-by: Marek Olšák <marek.olsak-5C7GfCeVMHo@public.gmane.org>
>> > ---
>> >   drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  1 +
>> >   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 10 ++++++++++
>> >   drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h |  6 ++++++
>> >   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 20 ++++++++++++++++++++
>> >   4 files changed, 37 insertions(+)
>> >
>> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> > index 4b514a44184c..cbd55d061b72 100644
>> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> > @@ -456,20 +456,21 @@ struct amdgpu_cs_parser {
>> >       struct drm_file         *filp;
>> >       struct amdgpu_ctx       *ctx;
>> >
>> >       /* chunks */
>> >       unsigned                nchunks;
>> >       struct amdgpu_cs_chunk  *chunks;
>> >
>> >       /* scheduler job object */
>> >       struct amdgpu_job       *job;
>> >       struct drm_sched_entity *entity;
>> > +     unsigned                hw_ip;
>> >
>> >       /* buffer objects */
>> >       struct ww_acquire_ctx           ticket;
>> >       struct amdgpu_bo_list           *bo_list;
>> >       struct amdgpu_mn                *mn;
>> >       struct amdgpu_bo_list_entry     vm_pd;
>> >       struct list_head                validated;
>> >       struct dma_fence                *fence;
>> >       uint64_t                        bytes_moved_threshold;
>> >       uint64_t                        bytes_moved_vis_threshold;
>> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> > index c691df6f7a57..9125cd69a124 100644
>> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> > @@ -678,20 +678,28 @@ static int amdgpu_cs_parser_bos(struct
>> amdgpu_cs_parser *p,
>> >       if (r)
>> >               goto error_validate;
>> >
>> >       amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
>> >                                    p->bytes_moved_vis);
>> >
>> >       gds = p->bo_list->gds_obj;
>> >       gws = p->bo_list->gws_obj;
>> >       oa = p->bo_list->oa_obj;
>> >
>> > +     if (p->hw_ip == AMDGPU_HW_IP_GFX) {
>> > +             /* Only gfx10 allocates these. */
>> > +             if (!gds)
>> > +                     gds = p->adev->gds.gds_gfx_bo;
>> > +             if (!oa)
>> > +                     oa = p->adev->gds.oa_gfx_bo;
>> > +     }
>> > +
>> >       amdgpu_bo_list_for_each_entry(e, p->bo_list) {
>> >               struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
>> >
>> >               /* Make sure we use the exclusive slot for shared BOs */
>> >               if (bo->prime_shared_count)
>> >                       e->tv.num_shared = 0;
>> >               e->bo_va = amdgpu_vm_bo_find(vm, bo);
>> >       }
>> >
>> >       if (gds) {
>> > @@ -954,20 +962,22 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device
>> *adev,
>> >               struct drm_amdgpu_cs_chunk_ib *chunk_ib;
>> >               struct drm_sched_entity *entity;
>> >
>> >               chunk = &parser->chunks[i];
>> >               ib = &parser->job->ibs[j];
>> >               chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
>> >
>> >               if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
>> >                       continue;
>> >
>> > +             parser->hw_ip = chunk_ib->ip_type;
>> > +
>> >               if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
>> >                   (amdgpu_mcbp || amdgpu_sriov_vf(adev))) {
>> >                       if (chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
>> >                               if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
>> >                                       ce_preempt++;
>> >                               else
>> >                                       de_preempt++;
>> >                       }
>> >
>> >                       /* each GFX command submit allows 0 or 1 IB
>> preemptible for CE & DE */
>> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
>> > index df8a23554831..0943b8e1d97e 100644
>> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
>> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
>> > @@ -26,20 +26,26 @@
>> >
>> >   struct amdgpu_ring;
>> >   struct amdgpu_bo;
>> >
>> >   struct amdgpu_gds {
>> >       uint32_t gds_size;
>> >       uint32_t gws_size;
>> >       uint32_t oa_size;
>> >       uint32_t gds_compute_max_wave_id;
>> >       uint32_t vgt_gs_max_wave_id;
>> > +
>> > +     /* Reserved OA for the gfx ring. (gfx10) */
>> > +     uint32_t gds_gfx_reservation_size;
>> > +     uint32_t oa_gfx_reservation_size;
>> > +     struct amdgpu_bo *gds_gfx_bo;
>> > +     struct amdgpu_bo *oa_gfx_bo;
>> >   };
>> >
>> >   struct amdgpu_gds_reg_offset {
>> >       uint32_t        mem_base;
>> >       uint32_t        mem_size;
>> >       uint32_t        gws;
>> >       uint32_t        oa;
>> >   };
>> >
>> >   #endif /* __AMDGPU_GDS_H__ */
>> > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> > index 618291df659b..3952754c04ff 100644
>> > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> > @@ -1314,20 +1314,33 @@ static int gfx_v10_0_sw_init(void *handle)
>> >
>> >       kiq = &adev->gfx.kiq;
>> >       r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
>> >       if (r)
>> >               return r;
>> >
>> >       r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v10_compute_mqd));
>> >       if (r)
>> >               return r;
>> >
>> > +     /* allocate reserved GDS resources for transform feedback */
>> > +     r = amdgpu_bo_create_kernel(adev,
>> adev->gds.gds_gfx_reservation_size,
>> > +                                 4, AMDGPU_GEM_DOMAIN_GDS,
>> > +                                 &adev->gds.gds_gfx_bo, NULL, NULL);
>> > +     if (r)
>> > +             return r;
>> > +
>> > +     r = amdgpu_bo_create_kernel(adev,
>> adev->gds.oa_gfx_reservation_size,
>> > +                                 1, AMDGPU_GEM_DOMAIN_OA,
>> > +                                 &adev->gds.oa_gfx_bo, NULL, NULL);
>> > +     if (r)
>> > +             return r;
>> > +
>> >       /* allocate visible FB for rlc auto-loading fw */
>> >       if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO)
>> {
>> >               r = gfx_v10_0_rlc_backdoor_autoload_buffer_init(adev);
>> >               if (r)
>> >                       return r;
>> >       }
>> >
>> >       adev->gfx.ce_ram_size = F32_CE_PROGRAM_RAM_SIZE;
>> >
>> >       gfx_v10_0_gpu_early_init(adev);
>> > @@ -1354,20 +1367,23 @@ static void gfx_v10_0_me_fini(struct
>> amdgpu_device *adev)
>> >       amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj,
>> >                             &adev->gfx.me.me_fw_gpu_addr,
>> >                             (void **)&adev->gfx.me.me_fw_ptr);
>> >   }
>> >
>> >   static int gfx_v10_0_sw_fini(void *handle)
>> >   {
>> >       int i;
>> >       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>> >
>> > +     amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
>> > +     amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
>> > +
>> >       for (i = 0; i < adev->gfx.num_gfx_rings; i++)
>> >               amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
>> >       for (i = 0; i < adev->gfx.num_compute_rings; i++)
>> >               amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
>> >
>> >       amdgpu_gfx_mqd_sw_fini(adev);
>> >       amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
>> >       amdgpu_gfx_kiq_fini(adev);
>> >
>> >       gfx_v10_0_pfp_fini(adev);
>> > @@ -5181,20 +5197,24 @@ static void gfx_v10_0_set_gds_init(struct
>> amdgpu_device *adev)
>> >       case CHIP_NAVI10:
>> >       default:
>> >               adev->gds.gds_size = 0x10000;
>> >               adev->gds.gds_compute_max_wave_id = 0x4ff;
>> >               adev->gds.vgt_gs_max_wave_id = 0x3ff;
>> >               break;
>> >       }
>> >
>> >       adev->gds.gws_size = 64;
>> >       adev->gds.oa_size = 16;
>> > +
>> > +     /* Reserved for transform feedback. */
>> > +     adev->gds.gds_gfx_reservation_size = 256;
>> > +     adev->gds.oa_gfx_reservation_size = 4;
>> >   }
>> >
>> >   static void gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(struct
>> amdgpu_device *adev,
>> >                                                         u32 bitmap)
>> >   {
>> >       u32 data;
>> >
>> >       if (!bitmap)
>> >               return;
>> >
>>
>>
> _______________________________________________
> amd-gfx mailing listamd-gfx-PD4FTy7X32lNgt0PjOBp934avgP/u3fG0wdF1cv0I5s@public.gmane.org://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
>
>

[-- Attachment #1.2: Type: text/html, Size: 20248 bytes --]

[-- Attachment #2: Type: text/plain, Size: 153 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] drm/amdgpu: reserve GDS resources on the gfx ring for gfx10
       [not found]                 ` <CAAxE2A67J0gN=ZV2t66zPc+E9kJDyFghd6oGobu4DLT5hvK1gA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2019-07-17 15:21                   ` Koenig, Christian
  0 siblings, 0 replies; 6+ messages in thread
From: Koenig, Christian @ 2019-07-17 15:21 UTC (permalink / raw
  To: Marek Olšák; +Cc: Kuehling, Felix, amd-gfx mailing list


[-- Attachment #1.1: Type: text/plain, Size: 10375 bytes --]

Am 17.07.19 um 16:54 schrieb Marek Olšák:


On Wed., Jul. 17, 2019, 10:43 Christian König, <ckoenig.leichtzumerken@gmail.com<mailto:ckoenig.leichtzumerken@gmail.com>> wrote:
Am 17.07.19 um 16:27 schrieb Marek Olšák:


On Wed., Jul. 17, 2019, 03:15 Christian König, <ckoenig.leichtzumerken@gmail.com<mailto:ckoenig.leichtzumerken@gmail.com>> wrote:
Am 17.07.19 um 02:06 schrieb Marek Olšák:
> From: Marek Olšák <marek.olsak@amd.com<mailto:marek.olsak@amd.com>>
>
> Hopefully we'll only use 1 gfx ring, because otherwise we'd have to have
> separate GDS buffers for each gfx ring.
>
> This is a workaround to ensure stability of transform feedback. Shaders hang
> waiting for a GDS instruction (ds_sub, not ds_ordered_count).
>
> The disadvantage is that compute IBs might get a different VMID,
> because now gfx always has GDS and compute doesn't.

So far compute is only using GWS, but I don't think that those
reservations are a good idea in general.

How severe is the ENOMEM problem you see with using an explicit GWS
allocation?

I guess you mean GDS or OA.

Yeah, just a typo. Compute is using GWS and we want to use GDS and OA here.

There is no ENOMEM, it just hangs. I don't know why. The shader is waiting for ds_sub and can't continue, but GDS is idle.

Well could it be because we don't correctly handle non zero offsets or stuff like that?

I don't know what you mean.

I wanted to try to leak some GDS/OA during allocation to narrow down what is going wrong here.



Does it work with this hack when you don't allocate GDS/OA from the start? (Just allocate it twice or something like this).

It's only allocated once by the kernel with this hack.

Well my question is why does that actually help?

I mean is it a bug in the memory management or some hardware issue that we can't switch GDS/OS?

I think we need to find the root cause of the problem and if it's related to switching GDS/OS what the actual restrictions are.

Christian.


Marek


Christian.


Marek


Regards,
Christian.

>
> Signed-off-by: Marek Olšák <marek.olsak@amd.com<mailto:marek.olsak@amd.com>>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 10 ++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h |  6 ++++++
>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c  | 20 ++++++++++++++++++++
>   4 files changed, 37 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 4b514a44184c..cbd55d061b72 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -456,20 +456,21 @@ struct amdgpu_cs_parser {
>       struct drm_file         *filp;
>       struct amdgpu_ctx       *ctx;
>
>       /* chunks */
>       unsigned                nchunks;
>       struct amdgpu_cs_chunk  *chunks;
>
>       /* scheduler job object */
>       struct amdgpu_job       *job;
>       struct drm_sched_entity *entity;
> +     unsigned                hw_ip;
>
>       /* buffer objects */
>       struct ww_acquire_ctx           ticket;
>       struct amdgpu_bo_list           *bo_list;
>       struct amdgpu_mn                *mn;
>       struct amdgpu_bo_list_entry     vm_pd;
>       struct list_head                validated;
>       struct dma_fence                *fence;
>       uint64_t                        bytes_moved_threshold;
>       uint64_t                        bytes_moved_vis_threshold;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index c691df6f7a57..9125cd69a124 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -678,20 +678,28 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>       if (r)
>               goto error_validate;
>
>       amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
>                                    p->bytes_moved_vis);
>
>       gds = p->bo_list->gds_obj;
>       gws = p->bo_list->gws_obj;
>       oa = p->bo_list->oa_obj;
>
> +     if (p->hw_ip == AMDGPU_HW_IP_GFX) {
> +             /* Only gfx10 allocates these. */
> +             if (!gds)
> +                     gds = p->adev->gds.gds_gfx_bo;
> +             if (!oa)
> +                     oa = p->adev->gds.oa_gfx_bo;
> +     }
> +
>       amdgpu_bo_list_for_each_entry(e, p->bo_list) {
>               struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo<http://tv.bo>);
>
>               /* Make sure we use the exclusive slot for shared BOs */
>               if (bo->prime_shared_count)
>                       e->tv.num_shared = 0;
>               e->bo_va = amdgpu_vm_bo_find(vm, bo);
>       }
>
>       if (gds) {
> @@ -954,20 +962,22 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
>               struct drm_amdgpu_cs_chunk_ib *chunk_ib;
>               struct drm_sched_entity *entity;
>
>               chunk = &parser->chunks[i];
>               ib = &parser->job->ibs[j];
>               chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
>
>               if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
>                       continue;
>
> +             parser->hw_ip = chunk_ib->ip_type;
> +
>               if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
>                   (amdgpu_mcbp || amdgpu_sriov_vf(adev))) {
>                       if (chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
>                               if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
>                                       ce_preempt++;
>                               else
>                                       de_preempt++;
>                       }
>
>                       /* each GFX command submit allows 0 or 1 IB preemptible for CE & DE */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
> index df8a23554831..0943b8e1d97e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
> @@ -26,20 +26,26 @@
>
>   struct amdgpu_ring;
>   struct amdgpu_bo;
>
>   struct amdgpu_gds {
>       uint32_t gds_size;
>       uint32_t gws_size;
>       uint32_t oa_size;
>       uint32_t gds_compute_max_wave_id;
>       uint32_t vgt_gs_max_wave_id;
> +
> +     /* Reserved OA for the gfx ring. (gfx10) */
> +     uint32_t gds_gfx_reservation_size;
> +     uint32_t oa_gfx_reservation_size;
> +     struct amdgpu_bo *gds_gfx_bo;
> +     struct amdgpu_bo *oa_gfx_bo;
>   };
>
>   struct amdgpu_gds_reg_offset {
>       uint32_t        mem_base;
>       uint32_t        mem_size;
>       uint32_t        gws;
>       uint32_t        oa;
>   };
>
>   #endif /* __AMDGPU_GDS_H__ */
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 618291df659b..3952754c04ff 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -1314,20 +1314,33 @@ static int gfx_v10_0_sw_init(void *handle)
>
>       kiq = &adev->gfx.kiq;
>       r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
>       if (r)
>               return r;
>
>       r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v10_compute_mqd));
>       if (r)
>               return r;
>
> +     /* allocate reserved GDS resources for transform feedback */
> +     r = amdgpu_bo_create_kernel(adev, adev->gds.gds_gfx_reservation_size,
> +                                 4, AMDGPU_GEM_DOMAIN_GDS,
> +                                 &adev->gds.gds_gfx_bo, NULL, NULL);
> +     if (r)
> +             return r;
> +
> +     r = amdgpu_bo_create_kernel(adev, adev->gds.oa_gfx_reservation_size,
> +                                 1, AMDGPU_GEM_DOMAIN_OA,
> +                                 &adev->gds.oa_gfx_bo, NULL, NULL);
> +     if (r)
> +             return r;
> +
>       /* allocate visible FB for rlc auto-loading fw */
>       if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
>               r = gfx_v10_0_rlc_backdoor_autoload_buffer_init(adev);
>               if (r)
>                       return r;
>       }
>
>       adev->gfx.ce_ram_size = F32_CE_PROGRAM_RAM_SIZE;
>
>       gfx_v10_0_gpu_early_init(adev);
> @@ -1354,20 +1367,23 @@ static void gfx_v10_0_me_fini(struct amdgpu_device *adev)
>       amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj,
>                             &adev->gfx.me.me_fw_gpu_addr,
>                             (void **)&adev->gfx.me.me_fw_ptr);
>   }
>
>   static int gfx_v10_0_sw_fini(void *handle)
>   {
>       int i;
>       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>
> +     amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
> +     amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
> +
>       for (i = 0; i < adev->gfx.num_gfx_rings; i++)
>               amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
>       for (i = 0; i < adev->gfx.num_compute_rings; i++)
>               amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
>
>       amdgpu_gfx_mqd_sw_fini(adev);
>       amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
>       amdgpu_gfx_kiq_fini(adev);
>
>       gfx_v10_0_pfp_fini(adev);
> @@ -5181,20 +5197,24 @@ static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev)
>       case CHIP_NAVI10:
>       default:
>               adev->gds.gds_size = 0x10000;
>               adev->gds.gds_compute_max_wave_id = 0x4ff;
>               adev->gds.vgt_gs_max_wave_id = 0x3ff;
>               break;
>       }
>
>       adev->gds.gws_size = 64;
>       adev->gds.oa_size = 16;
> +
> +     /* Reserved for transform feedback. */
> +     adev->gds.gds_gfx_reservation_size = 256;
> +     adev->gds.oa_gfx_reservation_size = 4;
>   }
>
>   static void gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev,
>                                                         u32 bitmap)
>   {
>       u32 data;
>
>       if (!bitmap)
>               return;
>




_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org<mailto:amd-gfx@lists.freedesktop.org>
https://lists.freedesktop.org/mailman/listinfo/amd-gfx



[-- Attachment #1.2: Type: text/html, Size: 21402 bytes --]

[-- Attachment #2: Type: text/plain, Size: 153 bytes --]

_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2019-07-17 15:21 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2019-07-17  0:06 [PATCH] drm/amdgpu: reserve GDS resources on the gfx ring for gfx10 Marek Olšák
     [not found] ` <20190717000606.7181-1-maraeo-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2019-07-17  7:15   ` Christian König
     [not found]     ` <26330d7e-3cc0-f460-b7e0-b234597f382d-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2019-07-17 14:27       ` Marek Olšák
     [not found]         ` <CAAxE2A6s2SOhqhkZydtN7kzzFRzHKwRnSG-JgYGy_GceofHAsQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2019-07-17 14:43           ` Christian König
     [not found]             ` <57de127c-a6ac-c459-fc77-4da8578a41ae-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
2019-07-17 14:54               ` Marek Olšák
     [not found]                 ` <CAAxE2A67J0gN=ZV2t66zPc+E9kJDyFghd6oGobu4DLT5hvK1gA-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2019-07-17 15:21                   ` Koenig, Christian

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.