All the mail mirrored from lore.kernel.org
 help / color / mirror / Atom feed
* [RFC PATCH] drm/i915: Don't reset GuC before engine reset on full GT reset
@ 2024-04-15 16:44 Nirmoy Das
  2024-04-15 23:40 ` ✗ Fi.CI.CHECKPATCH: warning for " Patchwork
                   ` (3 more replies)
  0 siblings, 4 replies; 6+ messages in thread
From: Nirmoy Das @ 2024-04-15 16:44 UTC (permalink / raw
  To: intel-gfx; +Cc: Nirmoy Das, John Harrison

Currently intel_gt_reset() happens as follows:

reset_prepare() ---> Sends GDRST to GuC, GuC is in GS_MIA_IN_RESET
do_reset()
	__intel_gt_reset()
		*_engine_reset_prepare() -->RESET_CTL expects running
		GuC
		*_reset_engines()
intel_gt_init_hw() --> GuC FW loading happens, GuC comes out of
GS_MIA_IN_RESET.

Fix the above flow so that GuC reset happens after all the
engines reset is done.

Cc: John Harrison <John.C.Harrison@Intel.com>
Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
---
 drivers/gpu/drm/i915/gt/intel_reset.c |  9 ++++--
 drivers/gpu/drm/i915/gt/uc/intel_uc.c | 42 +++++++++++++++++++++------
 drivers/gpu/drm/i915/gt/uc/intel_uc.h |  1 +
 3 files changed, 41 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
index c8e9aa41fdea..9ebd68ce0c22 100644
--- a/drivers/gpu/drm/i915/gt/intel_reset.c
+++ b/drivers/gpu/drm/i915/gt/intel_reset.c
@@ -879,8 +879,11 @@ static intel_engine_mask_t reset_prepare(struct intel_gt *gt)
 	intel_engine_mask_t awake = 0;
 	enum intel_engine_id id;
 
-	/* For GuC mode, ensure submission is disabled before stopping ring */
-	intel_uc_reset_prepare(&gt->uc);
+	/*
+	 * For GuC mode, ensure submission is disabled before stopping ring.
+	 * Don't reset the GuC a engine reset requires GuC to be running.
+	 */
+	intel_uc_reset_prepare_without_guc_reset(&gt->uc);
 
 	for_each_engine(engine, gt, id) {
 		if (intel_engine_pm_get_if_awake(engine))
@@ -1227,6 +1230,8 @@ void intel_gt_reset(struct intel_gt *gt,
 
 	intel_overlay_reset(gt->i915);
 
+	/* Now that all engines are clean, Reset the GuC */
+	intel_uc_reset_prepare(&gt->uc);
 	/*
 	 * Next we need to restore the context, but we don't use those
 	 * yet either...
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
index 7a63abf8f644..5feee4db2ccc 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
@@ -345,7 +345,7 @@ static void __uc_fini(struct intel_uc *uc)
 	intel_guc_fini(&uc->guc);
 }
 
-static int __uc_sanitize(struct intel_uc *uc)
+static void __uc_sanitize_without_guc_reset(struct intel_uc *uc)
 {
 	struct intel_guc *guc = &uc->guc;
 	struct intel_huc *huc = &uc->huc;
@@ -354,7 +354,11 @@ static int __uc_sanitize(struct intel_uc *uc)
 
 	intel_huc_sanitize(huc);
 	intel_guc_sanitize(guc);
+}
 
+static int __uc_sanitize(struct intel_uc *uc)
+{
+	__uc_sanitize_without_guc_reset(uc);
 	return __intel_uc_reset_hw(uc);
 }
 
@@ -593,13 +597,7 @@ static void __uc_fini_hw(struct intel_uc *uc)
 	__uc_sanitize(uc);
 }
 
-/**
- * intel_uc_reset_prepare - Prepare for reset
- * @uc: the intel_uc structure
- *
- * Preparing for full gpu reset.
- */
-void intel_uc_reset_prepare(struct intel_uc *uc)
+static void __intel_uc_reset_prepare(struct intel_uc *uc, bool reset_guc)
 {
 	struct intel_guc *guc = &uc->guc;
 
@@ -617,9 +615,35 @@ void intel_uc_reset_prepare(struct intel_uc *uc)
 		intel_guc_submission_reset_prepare(guc);
 
 sanitize:
-	__uc_sanitize(uc);
+	if (reset_guc)
+		__uc_sanitize(uc);
+	else
+		__uc_sanitize_without_guc_reset(uc);
 }
 
+/**
+ * intel_uc_reset_prepare - Prepare for reset
+ * @uc: the intel_uc structure
+ *
+ * Preparing for full gpu reset.
+ */
+void intel_uc_reset_prepare(struct intel_uc *uc)
+{
+	__intel_uc_reset_prepare(uc, true);
+}
+/**
+ * intel_uc_reset_prepare_without_guc_reset - Prepare for reset but don't reset
+ * the GuC
+ * @uc: the intel_uc structure
+ *
+ * Preparing for full gpu reset.
+ */
+void intel_uc_reset_prepare_without_guc_reset(struct intel_uc *uc)
+{
+	__intel_uc_reset_prepare(uc, false);
+}
+
+
 void intel_uc_reset(struct intel_uc *uc, intel_engine_mask_t stalled)
 {
 	struct intel_guc *guc = &uc->guc;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
index 014bb7d83689..9d6191ece498 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
@@ -46,6 +46,7 @@ void intel_uc_driver_late_release(struct intel_uc *uc);
 void intel_uc_driver_remove(struct intel_uc *uc);
 void intel_uc_init_mmio(struct intel_uc *uc);
 void intel_uc_reset_prepare(struct intel_uc *uc);
+void intel_uc_reset_prepare_without_guc_reset(struct intel_uc *uc);
 void intel_uc_reset(struct intel_uc *uc, intel_engine_mask_t stalled);
 void intel_uc_reset_finish(struct intel_uc *uc);
 void intel_uc_cancel_requests(struct intel_uc *uc);
-- 
2.42.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* ✗ Fi.CI.CHECKPATCH: warning for drm/i915: Don't reset GuC before engine reset on full GT reset
  2024-04-15 16:44 [RFC PATCH] drm/i915: Don't reset GuC before engine reset on full GT reset Nirmoy Das
@ 2024-04-15 23:40 ` Patchwork
  2024-04-15 23:40 ` ✗ Fi.CI.SPARSE: " Patchwork
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 6+ messages in thread
From: Patchwork @ 2024-04-15 23:40 UTC (permalink / raw
  To: Nirmoy Das; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Don't reset GuC before engine reset on full GT reset
URL   : https://patchwork.freedesktop.org/series/132457/
State : warning

== Summary ==

Error: dim checkpatch failed
de0c87c308af drm/i915: Don't reset GuC before engine reset on full GT reset
-:112: CHECK:LINE_SPACING: Please use a blank line after function/struct/union/enum declarations
#112: FILE: drivers/gpu/drm/i915/gt/uc/intel_uc.c:634:
+}
+/**

-:124: CHECK:LINE_SPACING: Please don't use multiple blank lines
#124: FILE: drivers/gpu/drm/i915/gt/uc/intel_uc.c:646:
+
+

total: 0 errors, 0 warnings, 2 checks, 97 lines checked



^ permalink raw reply	[flat|nested] 6+ messages in thread

* ✗ Fi.CI.SPARSE: warning for drm/i915: Don't reset GuC before engine reset on full GT reset
  2024-04-15 16:44 [RFC PATCH] drm/i915: Don't reset GuC before engine reset on full GT reset Nirmoy Das
  2024-04-15 23:40 ` ✗ Fi.CI.CHECKPATCH: warning for " Patchwork
@ 2024-04-15 23:40 ` Patchwork
  2024-04-15 23:48 ` ✗ Fi.CI.BAT: failure " Patchwork
  2024-04-17  0:37 ` [RFC PATCH] " John Harrison
  3 siblings, 0 replies; 6+ messages in thread
From: Patchwork @ 2024-04-15 23:40 UTC (permalink / raw
  To: Nirmoy Das; +Cc: intel-gfx

== Series Details ==

Series: drm/i915: Don't reset GuC before engine reset on full GT reset
URL   : https://patchwork.freedesktop.org/series/132457/
State : warning

== Summary ==

Error: dim sparse failed
Sparse version: v0.6.2
Fast mode used, each commit won't be checked separately.



^ permalink raw reply	[flat|nested] 6+ messages in thread

* ✗ Fi.CI.BAT: failure for drm/i915: Don't reset GuC before engine reset on full GT reset
  2024-04-15 16:44 [RFC PATCH] drm/i915: Don't reset GuC before engine reset on full GT reset Nirmoy Das
  2024-04-15 23:40 ` ✗ Fi.CI.CHECKPATCH: warning for " Patchwork
  2024-04-15 23:40 ` ✗ Fi.CI.SPARSE: " Patchwork
@ 2024-04-15 23:48 ` Patchwork
  2024-04-17  0:37 ` [RFC PATCH] " John Harrison
  3 siblings, 0 replies; 6+ messages in thread
From: Patchwork @ 2024-04-15 23:48 UTC (permalink / raw
  To: Nirmoy Das; +Cc: intel-gfx

[-- Attachment #1: Type: text/plain, Size: 12357 bytes --]

== Series Details ==

Series: drm/i915: Don't reset GuC before engine reset on full GT reset
URL   : https://patchwork.freedesktop.org/series/132457/
State : failure

== Summary ==

CI Bug Log - changes from CI_DRM_14582 -> Patchwork_132457v1
====================================================

Summary
-------

  **FAILURE**

  Serious unknown changes coming with Patchwork_132457v1 absolutely need to be
  verified manually.
  
  If you think the reported changes have nothing to do with the changes
  introduced in Patchwork_132457v1, please notify your bug team (I915-ci-infra@lists.freedesktop.org) to allow them
  to document this new failure mode, which will reduce false positives in CI.

  External URL: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132457v1/index.html

Participating hosts (38 -> 38)
------------------------------

  Additional (3): fi-kbl-7567u bat-dg2-11 bat-jsl-1 
  Missing    (3): fi-glk-j4005 fi-apl-guc fi-snb-2520m 

Possible new issues
-------------------

  Here are the unknown changes that may have been introduced in Patchwork_132457v1:

### IGT changes ###

#### Possible regressions ####

  * igt@i915_selftest@live@gem:
    - bat-arls-3:         [PASS][1] -> [DMESG-WARN][2]
   [1]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_14582/bat-arls-3/igt@i915_selftest@live@gem.html
   [2]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132457v1/bat-arls-3/igt@i915_selftest@live@gem.html

  
Known issues
------------

  Here are the changes found in Patchwork_132457v1 that come from known issues:

### IGT changes ###

#### Issues hit ####

  * igt@debugfs_test@basic-hwmon:
    - bat-jsl-1:          NOTRUN -> [SKIP][3] ([i915#9318])
   [3]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132457v1/bat-jsl-1/igt@debugfs_test@basic-hwmon.html

  * igt@gem_huc_copy@huc-copy:
    - fi-kbl-7567u:       NOTRUN -> [SKIP][4] ([i915#2190])
   [4]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132457v1/fi-kbl-7567u/igt@gem_huc_copy@huc-copy.html
    - bat-jsl-1:          NOTRUN -> [SKIP][5] ([i915#2190])
   [5]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132457v1/bat-jsl-1/igt@gem_huc_copy@huc-copy.html

  * igt@gem_lmem_swapping@basic:
    - fi-kbl-7567u:       NOTRUN -> [SKIP][6] ([i915#4613]) +3 other tests skip
   [6]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132457v1/fi-kbl-7567u/igt@gem_lmem_swapping@basic.html

  * igt@gem_lmem_swapping@verify-random:
    - bat-jsl-1:          NOTRUN -> [SKIP][7] ([i915#4613]) +3 other tests skip
   [7]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132457v1/bat-jsl-1/igt@gem_lmem_swapping@verify-random.html

  * igt@gem_mmap@basic:
    - bat-dg2-11:         NOTRUN -> [SKIP][8] ([i915#4083])
   [8]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132457v1/bat-dg2-11/igt@gem_mmap@basic.html

  * igt@gem_tiled_fence_blits@basic:
    - bat-dg2-11:         NOTRUN -> [SKIP][9] ([i915#4077]) +2 other tests skip
   [9]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132457v1/bat-dg2-11/igt@gem_tiled_fence_blits@basic.html

  * igt@gem_tiled_pread_basic:
    - bat-dg2-11:         NOTRUN -> [SKIP][10] ([i915#4079]) +1 other test skip
   [10]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132457v1/bat-dg2-11/igt@gem_tiled_pread_basic.html

  * igt@i915_pm_rps@basic-api:
    - bat-dg2-11:         NOTRUN -> [SKIP][11] ([i915#6621])
   [11]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132457v1/bat-dg2-11/igt@i915_pm_rps@basic-api.html

  * igt@i915_selftest@live@gem:
    - bat-atsm-1:         NOTRUN -> [ABORT][12] ([i915#10182] / [i915#10564])
   [12]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132457v1/bat-atsm-1/igt@i915_selftest@live@gem.html

  * igt@i915_selftest@live@guc_hang:
    - bat-arls-3:         [PASS][13] -> [DMESG-FAIL][14] ([i915#10262]) +15 other tests dmesg-fail
   [13]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_14582/bat-arls-3/igt@i915_selftest@live@guc_hang.html
   [14]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132457v1/bat-arls-3/igt@i915_selftest@live@guc_hang.html

  * igt@kms_addfb_basic@addfb25-x-tiled-mismatch-legacy:
    - bat-dg2-11:         NOTRUN -> [SKIP][15] ([i915#4212]) +7 other tests skip
   [15]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132457v1/bat-dg2-11/igt@kms_addfb_basic@addfb25-x-tiled-mismatch-legacy.html

  * igt@kms_addfb_basic@addfb25-y-tiled-small-legacy:
    - bat-dg2-11:         NOTRUN -> [SKIP][16] ([i915#5190])
   [16]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132457v1/bat-dg2-11/igt@kms_addfb_basic@addfb25-y-tiled-small-legacy.html

  * igt@kms_addfb_basic@basic-y-tiled-legacy:
    - bat-dg2-11:         NOTRUN -> [SKIP][17] ([i915#4215] / [i915#5190])
   [17]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132457v1/bat-dg2-11/igt@kms_addfb_basic@basic-y-tiled-legacy.html

  * igt@kms_cursor_legacy@basic-busy-flip-before-cursor-atomic:
    - bat-dg2-11:         NOTRUN -> [SKIP][18] ([i915#4103] / [i915#4213]) +1 other test skip
   [18]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132457v1/bat-dg2-11/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-atomic.html

  * igt@kms_cursor_legacy@basic-busy-flip-before-cursor-legacy:
    - bat-jsl-1:          NOTRUN -> [SKIP][19] ([i915#4103]) +1 other test skip
   [19]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132457v1/bat-jsl-1/igt@kms_cursor_legacy@basic-busy-flip-before-cursor-legacy.html

  * igt@kms_dsc@dsc-basic:
    - bat-dg2-11:         NOTRUN -> [SKIP][20] ([i915#3555] / [i915#3840])
   [20]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132457v1/bat-dg2-11/igt@kms_dsc@dsc-basic.html
    - bat-jsl-1:          NOTRUN -> [SKIP][21] ([i915#3555] / [i915#9886])
   [21]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132457v1/bat-jsl-1/igt@kms_dsc@dsc-basic.html

  * igt@kms_force_connector_basic@force-load-detect:
    - fi-kbl-7567u:       NOTRUN -> [SKIP][22] +11 other tests skip
   [22]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132457v1/fi-kbl-7567u/igt@kms_force_connector_basic@force-load-detect.html
    - bat-jsl-1:          NOTRUN -> [SKIP][23]
   [23]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132457v1/bat-jsl-1/igt@kms_force_connector_basic@force-load-detect.html
    - bat-dg2-11:         NOTRUN -> [SKIP][24]
   [24]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132457v1/bat-dg2-11/igt@kms_force_connector_basic@force-load-detect.html

  * igt@kms_force_connector_basic@prune-stale-modes:
    - bat-dg2-11:         NOTRUN -> [SKIP][25] ([i915#5274])
   [25]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132457v1/bat-dg2-11/igt@kms_force_connector_basic@prune-stale-modes.html

  * igt@kms_pm_backlight@basic-brightness:
    - bat-dg2-11:         NOTRUN -> [SKIP][26] ([i915#5354])
   [26]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132457v1/bat-dg2-11/igt@kms_pm_backlight@basic-brightness.html

  * igt@kms_psr@psr-sprite-plane-onoff:
    - bat-dg2-11:         NOTRUN -> [SKIP][27] ([i915#1072] / [i915#9732]) +3 other tests skip
   [27]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132457v1/bat-dg2-11/igt@kms_psr@psr-sprite-plane-onoff.html

  * igt@kms_setmode@basic-clone-single-crtc:
    - bat-dg2-11:         NOTRUN -> [SKIP][28] ([i915#3555])
   [28]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132457v1/bat-dg2-11/igt@kms_setmode@basic-clone-single-crtc.html
    - bat-jsl-1:          NOTRUN -> [SKIP][29] ([i915#3555])
   [29]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132457v1/bat-jsl-1/igt@kms_setmode@basic-clone-single-crtc.html

  * igt@prime_vgem@basic-fence-flip:
    - bat-dg2-11:         NOTRUN -> [SKIP][30] ([i915#3708])
   [30]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132457v1/bat-dg2-11/igt@prime_vgem@basic-fence-flip.html

  * igt@prime_vgem@basic-fence-mmap:
    - bat-dg2-11:         NOTRUN -> [SKIP][31] ([i915#3708] / [i915#4077]) +1 other test skip
   [31]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132457v1/bat-dg2-11/igt@prime_vgem@basic-fence-mmap.html

  * igt@prime_vgem@basic-read:
    - bat-dg2-11:         NOTRUN -> [SKIP][32] ([i915#3291] / [i915#3708]) +2 other tests skip
   [32]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132457v1/bat-dg2-11/igt@prime_vgem@basic-read.html

  
#### Possible fixes ####

  * igt@i915_selftest@live@execlists:
    - bat-adls-6:         [TIMEOUT][33] ([i915#10795]) -> [PASS][34]
   [33]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_14582/bat-adls-6/igt@i915_selftest@live@execlists.html
   [34]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132457v1/bat-adls-6/igt@i915_selftest@live@execlists.html

  * igt@i915_selftest@live@gt_timelines:
    - bat-atsm-1:         [INCOMPLETE][35] ([i915#10461]) -> [PASS][36]
   [35]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_14582/bat-atsm-1/igt@i915_selftest@live@gt_timelines.html
   [36]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132457v1/bat-atsm-1/igt@i915_selftest@live@gt_timelines.html

  * igt@i915_selftest@live@hangcheck:
    - bat-adls-6:         [DMESG-WARN][37] ([i915#5591]) -> [PASS][38]
   [37]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_14582/bat-adls-6/igt@i915_selftest@live@hangcheck.html
   [38]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132457v1/bat-adls-6/igt@i915_selftest@live@hangcheck.html
    - {bat-rpls-4}:       [DMESG-WARN][39] ([i915#5591]) -> [PASS][40]
   [39]: https://intel-gfx-ci.01.org/tree/drm-tip/CI_DRM_14582/bat-rpls-4/igt@i915_selftest@live@hangcheck.html
   [40]: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132457v1/bat-rpls-4/igt@i915_selftest@live@hangcheck.html

  
  {name}: This element is suppressed. This means it is ignored when computing
          the status of the difference (SUCCESS, WARNING, or FAILURE).

  [i915#10182]: https://gitlab.freedesktop.org/drm/intel/issues/10182
  [i915#10262]: https://gitlab.freedesktop.org/drm/intel/issues/10262
  [i915#10435]: https://gitlab.freedesktop.org/drm/intel/issues/10435
  [i915#10461]: https://gitlab.freedesktop.org/drm/intel/issues/10461
  [i915#10564]: https://gitlab.freedesktop.org/drm/intel/issues/10564
  [i915#1072]: https://gitlab.freedesktop.org/drm/intel/issues/1072
  [i915#10795]: https://gitlab.freedesktop.org/drm/intel/issues/10795
  [i915#2190]: https://gitlab.freedesktop.org/drm/intel/issues/2190
  [i915#3291]: https://gitlab.freedesktop.org/drm/intel/issues/3291
  [i915#3555]: https://gitlab.freedesktop.org/drm/intel/issues/3555
  [i915#3708]: https://gitlab.freedesktop.org/drm/intel/issues/3708
  [i915#3840]: https://gitlab.freedesktop.org/drm/intel/issues/3840
  [i915#4077]: https://gitlab.freedesktop.org/drm/intel/issues/4077
  [i915#4079]: https://gitlab.freedesktop.org/drm/intel/issues/4079
  [i915#4083]: https://gitlab.freedesktop.org/drm/intel/issues/4083
  [i915#4103]: https://gitlab.freedesktop.org/drm/intel/issues/4103
  [i915#4212]: https://gitlab.freedesktop.org/drm/intel/issues/4212
  [i915#4213]: https://gitlab.freedesktop.org/drm/intel/issues/4213
  [i915#4215]: https://gitlab.freedesktop.org/drm/intel/issues/4215
  [i915#4613]: https://gitlab.freedesktop.org/drm/intel/issues/4613
  [i915#5190]: https://gitlab.freedesktop.org/drm/intel/issues/5190
  [i915#5274]: https://gitlab.freedesktop.org/drm/intel/issues/5274
  [i915#5354]: https://gitlab.freedesktop.org/drm/intel/issues/5354
  [i915#5591]: https://gitlab.freedesktop.org/drm/intel/issues/5591
  [i915#6621]: https://gitlab.freedesktop.org/drm/intel/issues/6621
  [i915#9318]: https://gitlab.freedesktop.org/drm/intel/issues/9318
  [i915#9732]: https://gitlab.freedesktop.org/drm/intel/issues/9732
  [i915#9886]: https://gitlab.freedesktop.org/drm/intel/issues/9886


Build changes
-------------

  * Linux: CI_DRM_14582 -> Patchwork_132457v1

  CI-20190529: 20190529
  CI_DRM_14582: 7bc330055f5c2924b42e389887691fea3f401a45 @ git://anongit.freedesktop.org/gfx-ci/linux
  IGT_7806: 849cd963ce7e8222dcf17cc872d355181fd2c2a2 @ https://gitlab.freedesktop.org/drm/igt-gpu-tools.git
  Patchwork_132457v1: 7bc330055f5c2924b42e389887691fea3f401a45 @ git://anongit.freedesktop.org/gfx-ci/linux


### Linux commits

2af8196a236f drm/i915: Don't reset GuC before engine reset on full GT reset

== Logs ==

For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/Patchwork_132457v1/index.html

[-- Attachment #2: Type: text/html, Size: 14381 bytes --]

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [RFC PATCH] drm/i915: Don't reset GuC before engine reset on full GT reset
  2024-04-15 16:44 [RFC PATCH] drm/i915: Don't reset GuC before engine reset on full GT reset Nirmoy Das
                   ` (2 preceding siblings ...)
  2024-04-15 23:48 ` ✗ Fi.CI.BAT: failure " Patchwork
@ 2024-04-17  0:37 ` John Harrison
  2024-04-17 15:50   ` Nirmoy Das
  3 siblings, 1 reply; 6+ messages in thread
From: John Harrison @ 2024-04-17  0:37 UTC (permalink / raw
  To: Nirmoy Das, intel-gfx

On 4/15/2024 09:44, Nirmoy Das wrote:
> Currently intel_gt_reset() happens as follows:
>
> reset_prepare() ---> Sends GDRST to GuC, GuC is in GS_MIA_IN_RESET
> do_reset()
> 	__intel_gt_reset()
> 		*_engine_reset_prepare() -->RESET_CTL expects running
> 		GuC
> 		*_reset_engines()
> intel_gt_init_hw() --> GuC FW loading happens, GuC comes out of
> GS_MIA_IN_RESET.
>
> Fix the above flow so that GuC reset happens after all the
> engines reset is done.
>
> Cc: John Harrison <John.C.Harrison@Intel.com>
> Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_reset.c |  9 ++++--
>   drivers/gpu/drm/i915/gt/uc/intel_uc.c | 42 +++++++++++++++++++++------
>   drivers/gpu/drm/i915/gt/uc/intel_uc.h |  1 +
>   3 files changed, 41 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
> index c8e9aa41fdea..9ebd68ce0c22 100644
> --- a/drivers/gpu/drm/i915/gt/intel_reset.c
> +++ b/drivers/gpu/drm/i915/gt/intel_reset.c
> @@ -879,8 +879,11 @@ static intel_engine_mask_t reset_prepare(struct intel_gt *gt)
>   	intel_engine_mask_t awake = 0;
>   	enum intel_engine_id id;
>   
> -	/* For GuC mode, ensure submission is disabled before stopping ring */
> -	intel_uc_reset_prepare(&gt->uc);
> +	/*
> +	 * For GuC mode, ensure submission is disabled before stopping ring.
> +	 * Don't reset the GuC a engine reset requires GuC to be running.
These two lines appear to be mutually exclusive unless there is a test 
for GuC submission being enabled, which I am not seeing. Note that 
"ensure submission is disabled" means "reset the GuC".

> +	 */
> +	intel_uc_reset_prepare_without_guc_reset(&gt->uc);
>   
>   	for_each_engine(engine, gt, id) {
>   		if (intel_engine_pm_get_if_awake(engine))
> @@ -1227,6 +1230,8 @@ void intel_gt_reset(struct intel_gt *gt,
>   
>   	intel_overlay_reset(gt->i915);
>   
> +	/* Now that all engines are clean, Reset the GuC */
> +	intel_uc_reset_prepare(&gt->uc);
>   	/*
>   	 * Next we need to restore the context, but we don't use those
>   	 * yet either...
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> index 7a63abf8f644..5feee4db2ccc 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
> @@ -345,7 +345,7 @@ static void __uc_fini(struct intel_uc *uc)
>   	intel_guc_fini(&uc->guc);
>   }
>   
> -static int __uc_sanitize(struct intel_uc *uc)
> +static void __uc_sanitize_without_guc_reset(struct intel_uc *uc)
>   {
>   	struct intel_guc *guc = &uc->guc;
>   	struct intel_huc *huc = &uc->huc;
> @@ -354,7 +354,11 @@ static int __uc_sanitize(struct intel_uc *uc)
>   
>   	intel_huc_sanitize(huc);
>   	intel_guc_sanitize(guc);
> +}
This seems like an extremely bad idea. You are wiping out all the GuC 
communication structures on the host side while the GuC itself is still 
executing and using those same structures.

Is the failure when doing individual engine resets or when doing a full 
GT reset?

If the former, I think a better approach would be to just not reset GuC 
at all (or indeed any UC) if not using GuC submission. Although, looking 
at the code, I'm not seeing an engine only reset path that does nuke the 
UC layers?

If it is the latter, then how/why are individual engine resets happening 
in the middle of a full GT reset? Don't we just splat everything all at 
once? Either way, it would be safer to split at the GT reset code layer 
rather than inside the UC layer. That is, when not using GuC submission, 
do the entire prepare/reset/init sequence of the UC layers as one 
'atomic' operation either before the GT/engine reset or after it (or 
potentially both before and after?).

John.


>   
> +static int __uc_sanitize(struct intel_uc *uc)
> +{
> +	__uc_sanitize_without_guc_reset(uc);
>   	return __intel_uc_reset_hw(uc);
>   }
>   
> @@ -593,13 +597,7 @@ static void __uc_fini_hw(struct intel_uc *uc)
>   	__uc_sanitize(uc);
>   }
>   
> -/**
> - * intel_uc_reset_prepare - Prepare for reset
> - * @uc: the intel_uc structure
> - *
> - * Preparing for full gpu reset.
> - */
> -void intel_uc_reset_prepare(struct intel_uc *uc)
> +static void __intel_uc_reset_prepare(struct intel_uc *uc, bool reset_guc)
>   {
>   	struct intel_guc *guc = &uc->guc;
>   
> @@ -617,9 +615,35 @@ void intel_uc_reset_prepare(struct intel_uc *uc)
>   		intel_guc_submission_reset_prepare(guc);
>   
>   sanitize:
> -	__uc_sanitize(uc);
> +	if (reset_guc)
> +		__uc_sanitize(uc);
> +	else
> +		__uc_sanitize_without_guc_reset(uc);
>   }
>   
> +/**
> + * intel_uc_reset_prepare - Prepare for reset
> + * @uc: the intel_uc structure
> + *
> + * Preparing for full gpu reset.
> + */
> +void intel_uc_reset_prepare(struct intel_uc *uc)
> +{
> +	__intel_uc_reset_prepare(uc, true);
> +}
> +/**
> + * intel_uc_reset_prepare_without_guc_reset - Prepare for reset but don't reset
> + * the GuC
> + * @uc: the intel_uc structure
> + *
> + * Preparing for full gpu reset.
> + */
> +void intel_uc_reset_prepare_without_guc_reset(struct intel_uc *uc)
> +{
> +	__intel_uc_reset_prepare(uc, false);
> +}
> +
> +
>   void intel_uc_reset(struct intel_uc *uc, intel_engine_mask_t stalled)
>   {
>   	struct intel_guc *guc = &uc->guc;
> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
> index 014bb7d83689..9d6191ece498 100644
> --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h
> +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
> @@ -46,6 +46,7 @@ void intel_uc_driver_late_release(struct intel_uc *uc);
>   void intel_uc_driver_remove(struct intel_uc *uc);
>   void intel_uc_init_mmio(struct intel_uc *uc);
>   void intel_uc_reset_prepare(struct intel_uc *uc);
> +void intel_uc_reset_prepare_without_guc_reset(struct intel_uc *uc);
>   void intel_uc_reset(struct intel_uc *uc, intel_engine_mask_t stalled);
>   void intel_uc_reset_finish(struct intel_uc *uc);
>   void intel_uc_cancel_requests(struct intel_uc *uc);


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [RFC PATCH] drm/i915: Don't reset GuC before engine reset on full GT reset
  2024-04-17  0:37 ` [RFC PATCH] " John Harrison
@ 2024-04-17 15:50   ` Nirmoy Das
  0 siblings, 0 replies; 6+ messages in thread
From: Nirmoy Das @ 2024-04-17 15:50 UTC (permalink / raw
  To: John Harrison, Nirmoy Das, intel-gfx

Hi John,

On 4/17/2024 2:37 AM, John Harrison wrote:
> On 4/15/2024 09:44, Nirmoy Das wrote:
>> Currently intel_gt_reset() happens as follows:
>>
>> reset_prepare() ---> Sends GDRST to GuC, GuC is in GS_MIA_IN_RESET
>> do_reset()
>>     __intel_gt_reset()
>>         *_engine_reset_prepare() -->RESET_CTL expects running
>>         GuC
>>         *_reset_engines()
>> intel_gt_init_hw() --> GuC FW loading happens, GuC comes out of
>> GS_MIA_IN_RESET.
>>
>> Fix the above flow so that GuC reset happens after all the
>> engines reset is done.
>>
>> Cc: John Harrison <John.C.Harrison@Intel.com>
>> Signed-off-by: Nirmoy Das <nirmoy.das@intel.com>
>> ---
>>   drivers/gpu/drm/i915/gt/intel_reset.c |  9 ++++--
>>   drivers/gpu/drm/i915/gt/uc/intel_uc.c | 42 +++++++++++++++++++++------
>>   drivers/gpu/drm/i915/gt/uc/intel_uc.h |  1 +
>>   3 files changed, 41 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c 
>> b/drivers/gpu/drm/i915/gt/intel_reset.c
>> index c8e9aa41fdea..9ebd68ce0c22 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_reset.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_reset.c
>> @@ -879,8 +879,11 @@ static intel_engine_mask_t reset_prepare(struct 
>> intel_gt *gt)
>>       intel_engine_mask_t awake = 0;
>>       enum intel_engine_id id;
>>   -    /* For GuC mode, ensure submission is disabled before stopping 
>> ring */
>> -    intel_uc_reset_prepare(&gt->uc);
>> +    /*
>> +     * For GuC mode, ensure submission is disabled before stopping 
>> ring.
>> +     * Don't reset the GuC a engine reset requires GuC to be running.
> These two lines appear to be mutually exclusive unless there is a test 
> for GuC submission being enabled, which I am not seeing. Note that 
> "ensure submission is disabled" means "reset the GuC".
>
>> +     */
>> +    intel_uc_reset_prepare_without_guc_reset(&gt->uc);
>>         for_each_engine(engine, gt, id) {
>>           if (intel_engine_pm_get_if_awake(engine))
>> @@ -1227,6 +1230,8 @@ void intel_gt_reset(struct intel_gt *gt,
>>         intel_overlay_reset(gt->i915);
>>   +    /* Now that all engines are clean, Reset the GuC */
>> +    intel_uc_reset_prepare(&gt->uc);
>>       /*
>>        * Next we need to restore the context, but we don't use those
>>        * yet either...
>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c 
>> b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
>> index 7a63abf8f644..5feee4db2ccc 100644
>> --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c
>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c
>> @@ -345,7 +345,7 @@ static void __uc_fini(struct intel_uc *uc)
>>       intel_guc_fini(&uc->guc);
>>   }
>>   -static int __uc_sanitize(struct intel_uc *uc)
>> +static void __uc_sanitize_without_guc_reset(struct intel_uc *uc)
>>   {
>>       struct intel_guc *guc = &uc->guc;
>>       struct intel_huc *huc = &uc->huc;
>> @@ -354,7 +354,11 @@ static int __uc_sanitize(struct intel_uc *uc)
>>         intel_huc_sanitize(huc);
>>       intel_guc_sanitize(guc);
>> +}
> This seems like an extremely bad idea. You are wiping out all the GuC 
> communication structures on the host side while the GuC itself is 
> still executing and using those same structures.
>
> Is the failure when doing individual engine resets or when doing a 
> full GT reset?

The failed test is doing "intel_gt_reset(gt, ALL_ENGINES, NULL)" so a 
full GT reset.


>
> If the former, I think a better approach would be to just not reset 
> GuC at all (or indeed any UC) if not using GuC submission. Although, 
> looking at the code, I'm not seeing an engine only reset path that 
> does nuke the UC layers?


Yes, intel_engine_reset() doesn't touch UC layer.

>
> If it is the latter, 

This is the case here.


> then how/why are individual engine resets happening in the middle of a 
> full GT reset? Don't we just splat everything all at once? 

It seems we use __intel_gt_reset(engine->gt, engine_mask) to reset all 
or some engines.

> Either way, it would be safer to split at the GT reset code layer 
> rather than inside the UC layer. That is, when not using GuC 
> submission, do the entire prepare/reset/init sequence of the UC layers 
> as one 'atomic' operation either before the GT/engine reset or after 
> it (or potentially both before and after?).

I think this should work. Let me try it out


Thanks,

Nirmoy


>
> John.
>
>
>>   +static int __uc_sanitize(struct intel_uc *uc)
>> +{
>> +    __uc_sanitize_without_guc_reset(uc);
>>       return __intel_uc_reset_hw(uc);
>>   }
>>   @@ -593,13 +597,7 @@ static void __uc_fini_hw(struct intel_uc *uc)
>>       __uc_sanitize(uc);
>>   }
>>   -/**
>> - * intel_uc_reset_prepare - Prepare for reset
>> - * @uc: the intel_uc structure
>> - *
>> - * Preparing for full gpu reset.
>> - */
>> -void intel_uc_reset_prepare(struct intel_uc *uc)
>> +static void __intel_uc_reset_prepare(struct intel_uc *uc, bool 
>> reset_guc)
>>   {
>>       struct intel_guc *guc = &uc->guc;
>>   @@ -617,9 +615,35 @@ void intel_uc_reset_prepare(struct intel_uc *uc)
>>           intel_guc_submission_reset_prepare(guc);
>>     sanitize:
>> -    __uc_sanitize(uc);
>> +    if (reset_guc)
>> +        __uc_sanitize(uc);
>> +    else
>> +        __uc_sanitize_without_guc_reset(uc);
>>   }
>>   +/**
>> + * intel_uc_reset_prepare - Prepare for reset
>> + * @uc: the intel_uc structure
>> + *
>> + * Preparing for full gpu reset.
>> + */
>> +void intel_uc_reset_prepare(struct intel_uc *uc)
>> +{
>> +    __intel_uc_reset_prepare(uc, true);
>> +}
>> +/**
>> + * intel_uc_reset_prepare_without_guc_reset - Prepare for reset but 
>> don't reset
>> + * the GuC
>> + * @uc: the intel_uc structure
>> + *
>> + * Preparing for full gpu reset.
>> + */
>> +void intel_uc_reset_prepare_without_guc_reset(struct intel_uc *uc)
>> +{
>> +    __intel_uc_reset_prepare(uc, false);
>> +}
>> +
>> +
>>   void intel_uc_reset(struct intel_uc *uc, intel_engine_mask_t stalled)
>>   {
>>       struct intel_guc *guc = &uc->guc;
>> diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.h 
>> b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
>> index 014bb7d83689..9d6191ece498 100644
>> --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.h
>> +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.h
>> @@ -46,6 +46,7 @@ void intel_uc_driver_late_release(struct intel_uc 
>> *uc);
>>   void intel_uc_driver_remove(struct intel_uc *uc);
>>   void intel_uc_init_mmio(struct intel_uc *uc);
>>   void intel_uc_reset_prepare(struct intel_uc *uc);
>> +void intel_uc_reset_prepare_without_guc_reset(struct intel_uc *uc);
>>   void intel_uc_reset(struct intel_uc *uc, intel_engine_mask_t stalled);
>>   void intel_uc_reset_finish(struct intel_uc *uc);
>>   void intel_uc_cancel_requests(struct intel_uc *uc);
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2024-04-17 15:50 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-04-15 16:44 [RFC PATCH] drm/i915: Don't reset GuC before engine reset on full GT reset Nirmoy Das
2024-04-15 23:40 ` ✗ Fi.CI.CHECKPATCH: warning for " Patchwork
2024-04-15 23:40 ` ✗ Fi.CI.SPARSE: " Patchwork
2024-04-15 23:48 ` ✗ Fi.CI.BAT: failure " Patchwork
2024-04-17  0:37 ` [RFC PATCH] " John Harrison
2024-04-17 15:50   ` Nirmoy Das

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.