All the mail mirrored from lore.kernel.org
 help / color / mirror / Atom feed
From: Matthew Brost <matthew.brost@intel.com>
To: <intel-gfx@lists.freedesktop.org>, <dri-devel@lists.freedesktop.org>
Subject: [Intel-gfx] [PATCH 1/1] drm/i915: Check if engine has heartbeat when closing a context
Date: Wed, 28 Jul 2021 17:34:00 -0700	[thread overview]
Message-ID: <20210729003400.151864-2-matthew.brost@intel.com> (raw)
In-Reply-To: <20210729003400.151864-1-matthew.brost@intel.com>

If an engine associated with a context does not have a heartbeat, ban it
immediately. This is needed for GuC submission as a idle pulse doesn't
kick the context off the hardware where it then can check for a
heartbeat and ban the context.

This patch also updates intel_engine_has_heartbeat to be a vfunc as we
now need to call this function on execlists virtual engines too.  

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   |  5 +++--
 drivers/gpu/drm/i915/gt/intel_context_types.h |  2 ++
 drivers/gpu/drm/i915/gt/intel_engine.h        | 21 ++-----------------
 .../drm/i915/gt/intel_execlists_submission.c  | 14 +++++++++++++
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  6 +++++-
 .../gpu/drm/i915/gt/uc/intel_guc_submission.h |  2 --
 6 files changed, 26 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 9c3672bac0e2..b8e01c5ba9e5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1090,8 +1090,9 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
 	 */
 	for_each_gem_engine(ce, engines, it) {
 		struct intel_engine_cs *engine;
+		bool local_ban = ban || !intel_engine_has_heartbeat(ce->engine);
 
-		if (ban && intel_context_ban(ce, NULL))
+		if (local_ban && intel_context_ban(ce, NULL))
 			continue;
 
 		/*
@@ -1104,7 +1105,7 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
 		engine = active_engine(ce);
 
 		/* First attempt to gracefully cancel the context */
-		if (engine && !__cancel_engine(engine) && ban)
+		if (engine && !__cancel_engine(engine) && local_ban)
 			/*
 			 * If we are unable to send a preemptive pulse to bump
 			 * the context from the GPU, we have to resort to a full
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index e54351a170e2..65f2eb2a78e4 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -55,6 +55,8 @@ struct intel_context_ops {
 	void (*reset)(struct intel_context *ce);
 	void (*destroy)(struct kref *kref);
 
+	bool (*has_heartbeat)(const struct intel_engine_cs *engine);
+
 	/* virtual engine/context interface */
 	struct intel_context *(*create_virtual)(struct intel_engine_cs **engine,
 						unsigned int count);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index c2a5640ae055..1b11a808acc4 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -283,28 +283,11 @@ struct intel_context *
 intel_engine_create_virtual(struct intel_engine_cs **siblings,
 			    unsigned int count);
 
-static inline bool
-intel_virtual_engine_has_heartbeat(const struct intel_engine_cs *engine)
-{
-	/*
-	 * For non-GuC submission we expect the back-end to look at the
-	 * heartbeat status of the actual physical engine that the work
-	 * has been (or is being) scheduled on, so we should only reach
-	 * here with GuC submission enabled.
-	 */
-	GEM_BUG_ON(!intel_engine_uses_guc(engine));
-
-	return intel_guc_virtual_engine_has_heartbeat(engine);
-}
-
 static inline bool
 intel_engine_has_heartbeat(const struct intel_engine_cs *engine)
 {
-	if (!IS_ACTIVE(CONFIG_DRM_I915_HEARTBEAT_INTERVAL))
-		return false;
-
-	if (intel_engine_is_virtual(engine))
-		return intel_virtual_engine_has_heartbeat(engine);
+	if (engine->cops->has_heartbeat)
+		return engine->cops->has_heartbeat(engine);
 	else
 		return READ_ONCE(engine->props.heartbeat_interval_ms);
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index de5f9c86b9a4..18005b5546b6 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -3619,6 +3619,18 @@ virtual_get_sibling(struct intel_engine_cs *engine, unsigned int sibling)
 	return ve->siblings[sibling];
 }
 
+static bool virtual_engine_has_heartbeat(const struct intel_engine_cs *ve)
+{
+	struct intel_engine_cs *engine;
+	intel_engine_mask_t tmp, mask = ve->mask;
+
+	for_each_engine_masked(engine, ve->gt, mask, tmp)
+		if (READ_ONCE(engine->props.heartbeat_interval_ms))
+			return true;
+
+	return false;
+}
+
 static const struct intel_context_ops virtual_context_ops = {
 	.flags = COPS_HAS_INFLIGHT,
 
@@ -3634,6 +3646,8 @@ static const struct intel_context_ops virtual_context_ops = {
 	.enter = virtual_context_enter,
 	.exit = virtual_context_exit,
 
+	.has_heartbeat = virtual_engine_has_heartbeat,
+
 	.destroy = virtual_context_destroy,
 
 	.get_sibling = virtual_get_sibling,
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 89ff0e4b4bc7..ae70bff3605f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -2168,6 +2168,8 @@ static int guc_virtual_context_alloc(struct intel_context *ce)
 	return lrc_alloc(ce, engine);
 }
 
+static bool guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve);
+
 static const struct intel_context_ops virtual_guc_context_ops = {
 	.alloc = guc_virtual_context_alloc,
 
@@ -2183,6 +2185,8 @@ static const struct intel_context_ops virtual_guc_context_ops = {
 	.enter = guc_virtual_context_enter,
 	.exit = guc_virtual_context_exit,
 
+	.has_heartbeat = guc_virtual_engine_has_heartbeat,
+
 	.sched_disable = guc_context_sched_disable,
 
 	.destroy = guc_context_destroy,
@@ -3029,7 +3033,7 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count)
 	return ERR_PTR(err);
 }
 
-bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve)
+static bool guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve)
 {
 	struct intel_engine_cs *engine;
 	intel_engine_mask_t tmp, mask = ve->mask;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
index c7ef44fa0c36..c2afc3b88fd8 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
@@ -29,8 +29,6 @@ void intel_guc_dump_active_requests(struct intel_engine_cs *engine,
 				    struct i915_request *hung_rq,
 				    struct drm_printer *m);
 
-bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve);
-
 int intel_guc_wait_for_pending_msg(struct intel_guc *guc,
 				   atomic_t *wait_var,
 				   bool interruptible,
-- 
2.28.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

WARNING: multiple messages have this Message-ID (diff)
From: Matthew Brost <matthew.brost@intel.com>
To: <intel-gfx@lists.freedesktop.org>, <dri-devel@lists.freedesktop.org>
Subject: [PATCH 1/1] drm/i915: Check if engine has heartbeat when closing a context
Date: Wed, 28 Jul 2021 17:34:00 -0700	[thread overview]
Message-ID: <20210729003400.151864-2-matthew.brost@intel.com> (raw)
In-Reply-To: <20210729003400.151864-1-matthew.brost@intel.com>

If an engine associated with a context does not have a heartbeat, ban it
immediately. This is needed for GuC submission as a idle pulse doesn't
kick the context off the hardware where it then can check for a
heartbeat and ban the context.

This patch also updates intel_engine_has_heartbeat to be a vfunc as we
now need to call this function on execlists virtual engines too.  

Signed-off-by: Matthew Brost <matthew.brost@intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c   |  5 +++--
 drivers/gpu/drm/i915/gt/intel_context_types.h |  2 ++
 drivers/gpu/drm/i915/gt/intel_engine.h        | 21 ++-----------------
 .../drm/i915/gt/intel_execlists_submission.c  | 14 +++++++++++++
 .../gpu/drm/i915/gt/uc/intel_guc_submission.c |  6 +++++-
 .../gpu/drm/i915/gt/uc/intel_guc_submission.h |  2 --
 6 files changed, 26 insertions(+), 24 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 9c3672bac0e2..b8e01c5ba9e5 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1090,8 +1090,9 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
 	 */
 	for_each_gem_engine(ce, engines, it) {
 		struct intel_engine_cs *engine;
+		bool local_ban = ban || !intel_engine_has_heartbeat(ce->engine);
 
-		if (ban && intel_context_ban(ce, NULL))
+		if (local_ban && intel_context_ban(ce, NULL))
 			continue;
 
 		/*
@@ -1104,7 +1105,7 @@ static void kill_engines(struct i915_gem_engines *engines, bool ban)
 		engine = active_engine(ce);
 
 		/* First attempt to gracefully cancel the context */
-		if (engine && !__cancel_engine(engine) && ban)
+		if (engine && !__cancel_engine(engine) && local_ban)
 			/*
 			 * If we are unable to send a preemptive pulse to bump
 			 * the context from the GPU, we have to resort to a full
diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h
index e54351a170e2..65f2eb2a78e4 100644
--- a/drivers/gpu/drm/i915/gt/intel_context_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_context_types.h
@@ -55,6 +55,8 @@ struct intel_context_ops {
 	void (*reset)(struct intel_context *ce);
 	void (*destroy)(struct kref *kref);
 
+	bool (*has_heartbeat)(const struct intel_engine_cs *engine);
+
 	/* virtual engine/context interface */
 	struct intel_context *(*create_virtual)(struct intel_engine_cs **engine,
 						unsigned int count);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h
index c2a5640ae055..1b11a808acc4 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine.h
@@ -283,28 +283,11 @@ struct intel_context *
 intel_engine_create_virtual(struct intel_engine_cs **siblings,
 			    unsigned int count);
 
-static inline bool
-intel_virtual_engine_has_heartbeat(const struct intel_engine_cs *engine)
-{
-	/*
-	 * For non-GuC submission we expect the back-end to look at the
-	 * heartbeat status of the actual physical engine that the work
-	 * has been (or is being) scheduled on, so we should only reach
-	 * here with GuC submission enabled.
-	 */
-	GEM_BUG_ON(!intel_engine_uses_guc(engine));
-
-	return intel_guc_virtual_engine_has_heartbeat(engine);
-}
-
 static inline bool
 intel_engine_has_heartbeat(const struct intel_engine_cs *engine)
 {
-	if (!IS_ACTIVE(CONFIG_DRM_I915_HEARTBEAT_INTERVAL))
-		return false;
-
-	if (intel_engine_is_virtual(engine))
-		return intel_virtual_engine_has_heartbeat(engine);
+	if (engine->cops->has_heartbeat)
+		return engine->cops->has_heartbeat(engine);
 	else
 		return READ_ONCE(engine->props.heartbeat_interval_ms);
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index de5f9c86b9a4..18005b5546b6 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -3619,6 +3619,18 @@ virtual_get_sibling(struct intel_engine_cs *engine, unsigned int sibling)
 	return ve->siblings[sibling];
 }
 
+static bool virtual_engine_has_heartbeat(const struct intel_engine_cs *ve)
+{
+	struct intel_engine_cs *engine;
+	intel_engine_mask_t tmp, mask = ve->mask;
+
+	for_each_engine_masked(engine, ve->gt, mask, tmp)
+		if (READ_ONCE(engine->props.heartbeat_interval_ms))
+			return true;
+
+	return false;
+}
+
 static const struct intel_context_ops virtual_context_ops = {
 	.flags = COPS_HAS_INFLIGHT,
 
@@ -3634,6 +3646,8 @@ static const struct intel_context_ops virtual_context_ops = {
 	.enter = virtual_context_enter,
 	.exit = virtual_context_exit,
 
+	.has_heartbeat = virtual_engine_has_heartbeat,
+
 	.destroy = virtual_context_destroy,
 
 	.get_sibling = virtual_get_sibling,
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 89ff0e4b4bc7..ae70bff3605f 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -2168,6 +2168,8 @@ static int guc_virtual_context_alloc(struct intel_context *ce)
 	return lrc_alloc(ce, engine);
 }
 
+static bool guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve);
+
 static const struct intel_context_ops virtual_guc_context_ops = {
 	.alloc = guc_virtual_context_alloc,
 
@@ -2183,6 +2185,8 @@ static const struct intel_context_ops virtual_guc_context_ops = {
 	.enter = guc_virtual_context_enter,
 	.exit = guc_virtual_context_exit,
 
+	.has_heartbeat = guc_virtual_engine_has_heartbeat,
+
 	.sched_disable = guc_context_sched_disable,
 
 	.destroy = guc_context_destroy,
@@ -3029,7 +3033,7 @@ guc_create_virtual(struct intel_engine_cs **siblings, unsigned int count)
 	return ERR_PTR(err);
 }
 
-bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve)
+static bool guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve)
 {
 	struct intel_engine_cs *engine;
 	intel_engine_mask_t tmp, mask = ve->mask;
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
index c7ef44fa0c36..c2afc3b88fd8 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h
@@ -29,8 +29,6 @@ void intel_guc_dump_active_requests(struct intel_engine_cs *engine,
 				    struct i915_request *hung_rq,
 				    struct drm_printer *m);
 
-bool intel_guc_virtual_engine_has_heartbeat(const struct intel_engine_cs *ve);
-
 int intel_guc_wait_for_pending_msg(struct intel_guc *guc,
 				   atomic_t *wait_var,
 				   bool interruptible,
-- 
2.28.0


  reply	other threads:[~2021-07-29  0:16 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-07-29  0:33 [Intel-gfx] [PATCH 0/1] Fix gem_ctx_persistence failures with GuC submission Matthew Brost
2021-07-29  0:33 ` Matthew Brost
2021-07-29  0:34 ` Matthew Brost [this message]
2021-07-29  0:34   ` [PATCH 1/1] drm/i915: Check if engine has heartbeat when closing a context Matthew Brost
2021-07-30  0:13   ` [Intel-gfx] " John Harrison
2021-07-30  0:13     ` John Harrison
2021-07-30  9:49     ` Tvrtko Ursulin
2021-07-30 18:13       ` John Harrison
2021-08-02  9:40         ` Tvrtko Ursulin
2021-08-06 18:00           ` John Harrison
2021-08-06 19:46             ` Daniel Vetter
2021-08-09 23:12               ` John Harrison
2021-08-10  6:36                 ` Daniel Vetter
2021-08-18  0:28                   ` John Harrison
2021-08-18  9:26                     ` Daniel Vetter
2021-07-30 18:13       ` Matthew Brost
2021-07-29  2:08 ` [Intel-gfx] ✓ Fi.CI.BAT: success for Fix gem_ctx_persistence failures with GuC submission Patchwork
2021-07-29  7:30 ` [Intel-gfx] ✗ Fi.CI.IGT: failure " Patchwork
2021-08-10  6:38 ` [Intel-gfx] [PATCH 0/1] " Daniel Vetter
2021-08-10  6:38   ` Daniel Vetter
2021-08-18  0:08   ` [Intel-gfx] " John Harrison
2021-08-18  9:49     ` Daniel Vetter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210729003400.151864-2-matthew.brost@intel.com \
    --to=matthew.brost@intel.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=intel-gfx@lists.freedesktop.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.