gfs2.lists.linux.dev archive mirror
 help / color / mirror / Atom feed
* [PATCH] gfs2: Fix do_xmote locking error
@ 2024-04-11 13:52 Andreas Gruenbacher
  2024-04-16 10:10 ` [PATCH 0/3] gfs2: do_xmote fixes Andreas Gruenbacher
  0 siblings, 1 reply; 5+ messages in thread
From: Andreas Gruenbacher @ 2024-04-11 13:52 UTC (permalink / raw
  To: gfs2; +Cc: Andreas Gruenbacher

Commit 86934198eefa added a 'goto' statement from the beginning of
do_xmote() to further below where the glock spinlock must no longer be
held.  I don't fully understand what's going on here, and apparently
this case isn't covered by our testing.  In any case though, we must
drop the glock spinlock before this 'goto' or else we won't be able to
retake the lock at the end of do_xmote().

Fixes: 86934198eefa ("gfs2: Clear flags when withdraw prevents xmote")
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/glock.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index ce62937d85b5..2c9fdc3f7d76 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -705,8 +705,10 @@ __acquires(&gl->gl_lockref.lock)
 	int ret;
 
 	if (target != LM_ST_UNLOCKED && glock_blocked_by_withdraw(gl) &&
-	    gh && !(gh->gh_flags & LM_FLAG_NOEXP))
+	    gh && !(gh->gh_flags & LM_FLAG_NOEXP)) {
+		spin_unlock(&gl->gl_lockref.lock);
 		goto skip_inval;
+	}
 
 	lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP);
 	GLOCK_BUG_ON(gl, gl->gl_state == target);
-- 
2.44.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 0/3] gfs2: do_xmote fixes
  2024-04-11 13:52 [PATCH] gfs2: Fix do_xmote locking error Andreas Gruenbacher
@ 2024-04-16 10:10 ` Andreas Gruenbacher
  2024-04-16 10:10   ` [PATCH 1/3] gfs2: finish_xmote cleanup Andreas Gruenbacher
                     ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Andreas Gruenbacher @ 2024-04-16 10:10 UTC (permalink / raw
  To: gfs2; +Cc: Andreas Gruenbacher

A closer review of commit 86934198eefa ("gfs2: Clear flags when withdraw
prevents xmote") has revealed that this commit got things very wrong and
that a more thorough cleanup of function do_xmote() is in order.  Here
is a set of three patches for doing that:

gfs2: finish_xmote cleanup

  Pushes more of the glock spin lock logic into do_xmote() which then
  allows to simplify things there.

gfs2: Fix do_xmote locking error

  The actual do_xmote() fixes and cleanup.

gfs2: Remove and replace gfs2_glock_queue_work

  A follow-up cleanup now that the previous patch has eliminated the last
  user of gfs2_glock_queue_work().

This obsoletes the previous patch "gfs2: Fix do_xmote locking error"
(https://lore.kernel.org/gfs2/20240411135243.260020-1-agruenba@redhat.com/).

Thanks,
Andreas

Andreas Gruenbacher (3):
  gfs2: finish_xmote cleanup
  gfs2: do_xmote fixes
  gfs2: Remove and replace gfs2_glock_queue_work

 fs/gfs2/glock.c | 78 ++++++++++++++++++++++++++-----------------------
 1 file changed, 42 insertions(+), 36 deletions(-)

-- 
2.44.0


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 1/3] gfs2: finish_xmote cleanup
  2024-04-16 10:10 ` [PATCH 0/3] gfs2: do_xmote fixes Andreas Gruenbacher
@ 2024-04-16 10:10   ` Andreas Gruenbacher
  2024-04-16 10:10   ` [PATCH 2/3] gfs2: do_xmote fixes Andreas Gruenbacher
  2024-04-16 10:10   ` [PATCH 3/3] gfs2: Remove and replace gfs2_glock_queue_work Andreas Gruenbacher
  2 siblings, 0 replies; 5+ messages in thread
From: Andreas Gruenbacher @ 2024-04-16 10:10 UTC (permalink / raw
  To: gfs2; +Cc: Andreas Gruenbacher

Currently, function finish_xmote() takes and releases the glock
spinlock.  However, all of its callers immediately take that spinlock
again, so it makes more sense to take the spin lock before calling
finish_xmote() already.

With that, thaw_glock() is the only place that sets the GLF_HAVE_REPLY
flag outside of the glock spinlock, but it also takes that spinlock
immediately thereafter.  Change that to set the bit when the spinlock is
already held.  This allows to switch from test_and_clear_bit() to
test_bit() and clear_bit() in glock_work_func().

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/glock.c | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 2e81304fdf90..52d1ba7245d4 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -623,7 +623,6 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret)
 	struct gfs2_holder *gh;
 	unsigned state = ret & LM_OUT_ST_MASK;
 
-	spin_lock(&gl->gl_lockref.lock);
 	trace_gfs2_glock_state_change(gl, state);
 	state_change(gl, state);
 	gh = find_first_waiter(gl);
@@ -671,7 +670,6 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret)
 			       gl->gl_target, state);
 			GLOCK_BUG_ON(gl, 1);
 		}
-		spin_unlock(&gl->gl_lockref.lock);
 		return;
 	}
 
@@ -694,7 +692,6 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret)
 	}
 out:
 	clear_bit(GLF_LOCK, &gl->gl_flags);
-	spin_unlock(&gl->gl_lockref.lock);
 }
 
 static bool is_system_glock(struct gfs2_glock *gl)
@@ -841,15 +838,19 @@ __acquires(&gl->gl_lockref.lock)
 		if (ret == -EINVAL && gl->gl_target == LM_ST_UNLOCKED &&
 		    target == LM_ST_UNLOCKED &&
 		    test_bit(DFL_UNMOUNT, &ls->ls_recover_flags)) {
+			spin_lock(&gl->gl_lockref.lock);
 			finish_xmote(gl, target);
-			gfs2_glock_queue_work(gl, 0);
+			__gfs2_glock_queue_work(gl, 0);
+			spin_unlock(&gl->gl_lockref.lock);
 		} else if (ret) {
 			fs_err(sdp, "lm_lock ret %d\n", ret);
 			GLOCK_BUG_ON(gl, !gfs2_withdrawing_or_withdrawn(sdp));
 		}
 	} else { /* lock_nolock */
+		spin_lock(&gl->gl_lockref.lock);
 		finish_xmote(gl, target);
-		gfs2_glock_queue_work(gl, 0);
+		__gfs2_glock_queue_work(gl, 0);
+		spin_unlock(&gl->gl_lockref.lock);
 	}
 out:
 	spin_lock(&gl->gl_lockref.lock);
@@ -1106,11 +1107,12 @@ static void glock_work_func(struct work_struct *work)
 	struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work);
 	unsigned int drop_refs = 1;
 
-	if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) {
+	spin_lock(&gl->gl_lockref.lock);
+	if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags)) {
+		clear_bit(GLF_REPLY_PENDING, &gl->gl_flags);
 		finish_xmote(gl, gl->gl_reply);
 		drop_refs++;
 	}
-	spin_lock(&gl->gl_lockref.lock);
 	if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
 	    gl->gl_state != LM_ST_UNLOCKED &&
 	    gl->gl_demote_state != LM_ST_EXCLUSIVE) {
@@ -2181,8 +2183,11 @@ static void thaw_glock(struct gfs2_glock *gl)
 		return;
 	if (!lockref_get_not_dead(&gl->gl_lockref))
 		return;
+
+	spin_lock(&gl->gl_lockref.lock);
 	set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
-	gfs2_glock_queue_work(gl, 0);
+	__gfs2_glock_queue_work(gl, 0);
+	spin_unlock(&gl->gl_lockref.lock);
 }
 
 /**
-- 
2.44.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/3] gfs2: do_xmote fixes
  2024-04-16 10:10 ` [PATCH 0/3] gfs2: do_xmote fixes Andreas Gruenbacher
  2024-04-16 10:10   ` [PATCH 1/3] gfs2: finish_xmote cleanup Andreas Gruenbacher
@ 2024-04-16 10:10   ` Andreas Gruenbacher
  2024-04-16 10:10   ` [PATCH 3/3] gfs2: Remove and replace gfs2_glock_queue_work Andreas Gruenbacher
  2 siblings, 0 replies; 5+ messages in thread
From: Andreas Gruenbacher @ 2024-04-16 10:10 UTC (permalink / raw
  To: gfs2; +Cc: Andreas Gruenbacher

Function do_xmote() is called with the glock spinlock held.  Commit
86934198eefa added a 'goto skip_inval' statement at the beginning of the
function to further below where the glock spinlock is expected not to be
held anymore.  Then it added code there that requires the glock spinlock
to be held.  This doesn't make sense; fix this up by dropping and
retaking the spinlock where needed.

In addition, when ->lm_lock() returned an error, do_xmote() didn't fail
the locking operation, and simply left the glock hanging; fix that as
well.  (This is a much older error.)

Fixes: 86934198eefa ("gfs2: Clear flags when withdraw prevents xmote")
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/glock.c | 44 +++++++++++++++++++++++++-------------------
 1 file changed, 25 insertions(+), 19 deletions(-)

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 52d1ba7245d4..db0e960d1b33 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -719,6 +719,7 @@ __acquires(&gl->gl_lockref.lock)
 {
 	const struct gfs2_glock_operations *glops = gl->gl_ops;
 	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
+	struct lm_lockstruct *ls = &sdp->sd_lockstruct;
 	unsigned int lck_flags = (unsigned int)(gh ? gh->gh_flags : 0);
 	int ret;
 
@@ -747,6 +748,9 @@ __acquires(&gl->gl_lockref.lock)
 	    (gl->gl_state == LM_ST_EXCLUSIVE) ||
 	    (lck_flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB)))
 		clear_bit(GLF_BLOCKING, &gl->gl_flags);
+	if (!glops->go_inval && !glops->go_sync)
+		goto skip_inval;
+
 	spin_unlock(&gl->gl_lockref.lock);
 	if (glops->go_sync) {
 		ret = glops->go_sync(gl);
@@ -759,6 +763,7 @@ __acquires(&gl->gl_lockref.lock)
 				fs_err(sdp, "Error %d syncing glock \n", ret);
 				gfs2_dump_glock(NULL, gl, true);
 			}
+			spin_lock(&gl->gl_lockref.lock);
 			goto skip_inval;
 		}
 	}
@@ -779,9 +784,10 @@ __acquires(&gl->gl_lockref.lock)
 		glops->go_inval(gl, target == LM_ST_DEFERRED ? 0 : DIO_METADATA);
 		clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
 	}
+	spin_lock(&gl->gl_lockref.lock);
 
 skip_inval:
-	gfs2_glock_hold(gl);
+	gl->gl_lockref.count++;
 	/*
 	 * Check for an error encountered since we called go_sync and go_inval.
 	 * If so, we can't withdraw from the glock code because the withdraw
@@ -823,37 +829,37 @@ __acquires(&gl->gl_lockref.lock)
 			 */
 			clear_bit(GLF_LOCK, &gl->gl_flags);
 			clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags);
-			gfs2_glock_queue_work(gl, GL_GLOCK_DFT_HOLD);
-			goto out;
+			__gfs2_glock_queue_work(gl, GL_GLOCK_DFT_HOLD);
+			return;
 		} else {
 			clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
 		}
 	}
 
-	if (sdp->sd_lockstruct.ls_ops->lm_lock)	{
-		struct lm_lockstruct *ls = &sdp->sd_lockstruct;
+	if (ls->ls_ops->lm_lock) {
+		spin_unlock(&gl->gl_lockref.lock);
+		ret = ls->ls_ops->lm_lock(gl, target, lck_flags);
+		spin_lock(&gl->gl_lockref.lock);
 
-		/* lock_dlm */
-		ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags);
 		if (ret == -EINVAL && gl->gl_target == LM_ST_UNLOCKED &&
 		    target == LM_ST_UNLOCKED &&
 		    test_bit(DFL_UNMOUNT, &ls->ls_recover_flags)) {
-			spin_lock(&gl->gl_lockref.lock);
-			finish_xmote(gl, target);
-			__gfs2_glock_queue_work(gl, 0);
-			spin_unlock(&gl->gl_lockref.lock);
+			/*
+			 * The lockspace has been released and the lock has
+			 * been unlocked implicitly.
+			 */
 		} else if (ret) {
 			fs_err(sdp, "lm_lock ret %d\n", ret);
-			GLOCK_BUG_ON(gl, !gfs2_withdrawing_or_withdrawn(sdp));
+			target = gl->gl_state | LM_OUT_ERROR;
+		} else {
+			/* The operation will be completed asynchronously. */
+			return;
 		}
-	} else { /* lock_nolock */
-		spin_lock(&gl->gl_lockref.lock);
-		finish_xmote(gl, target);
-		__gfs2_glock_queue_work(gl, 0);
-		spin_unlock(&gl->gl_lockref.lock);
 	}
-out:
-	spin_lock(&gl->gl_lockref.lock);
+
+	/* Complete the operation now. */
+	finish_xmote(gl, target);
+	__gfs2_glock_queue_work(gl, 0);
 }
 
 /**
-- 
2.44.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 3/3] gfs2: Remove and replace gfs2_glock_queue_work
  2024-04-16 10:10 ` [PATCH 0/3] gfs2: do_xmote fixes Andreas Gruenbacher
  2024-04-16 10:10   ` [PATCH 1/3] gfs2: finish_xmote cleanup Andreas Gruenbacher
  2024-04-16 10:10   ` [PATCH 2/3] gfs2: do_xmote fixes Andreas Gruenbacher
@ 2024-04-16 10:10   ` Andreas Gruenbacher
  2 siblings, 0 replies; 5+ messages in thread
From: Andreas Gruenbacher @ 2024-04-16 10:10 UTC (permalink / raw
  To: gfs2; +Cc: Andreas Gruenbacher

There are no more callers of gfs2_glock_queue_work() left, so remove
that helper.  With that, we can now rename __gfs2_glock_queue_work()
back to gfs2_glock_queue_work() to get rid of some unnecessary clutter.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/glock.c | 35 +++++++++++++++--------------------
 1 file changed, 15 insertions(+), 20 deletions(-)

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index db0e960d1b33..66b084cd08e9 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -272,7 +272,7 @@ static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl)
  * Enqueue the glock on the work queue.  Passes one glock reference on to the
  * work queue.
  */
-static void __gfs2_glock_queue_work(struct gfs2_glock *gl, unsigned long delay) {
+static void gfs2_glock_queue_work(struct gfs2_glock *gl, unsigned long delay) {
 	if (!queue_delayed_work(glock_workqueue, &gl->gl_work, delay)) {
 		/*
 		 * We are holding the lockref spinlock, and the work was still
@@ -285,12 +285,6 @@ static void __gfs2_glock_queue_work(struct gfs2_glock *gl, unsigned long delay)
 	}
 }
 
-static void gfs2_glock_queue_work(struct gfs2_glock *gl, unsigned long delay) {
-	spin_lock(&gl->gl_lockref.lock);
-	__gfs2_glock_queue_work(gl, delay);
-	spin_unlock(&gl->gl_lockref.lock);
-}
-
 static void __gfs2_glock_put(struct gfs2_glock *gl)
 {
 	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
@@ -335,7 +329,8 @@ void gfs2_glock_put_async(struct gfs2_glock *gl)
 	if (lockref_put_or_lock(&gl->gl_lockref))
 		return;
 
-	__gfs2_glock_queue_work(gl, 0);
+	GLOCK_BUG_ON(gl, gl->gl_lockref.count != 1);
+	gfs2_glock_queue_work(gl, 0);
 	spin_unlock(&gl->gl_lockref.lock);
 }
 
@@ -829,7 +824,7 @@ __acquires(&gl->gl_lockref.lock)
 			 */
 			clear_bit(GLF_LOCK, &gl->gl_flags);
 			clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags);
-			__gfs2_glock_queue_work(gl, GL_GLOCK_DFT_HOLD);
+			gfs2_glock_queue_work(gl, GL_GLOCK_DFT_HOLD);
 			return;
 		} else {
 			clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
@@ -859,7 +854,7 @@ __acquires(&gl->gl_lockref.lock)
 
 	/* Complete the operation now. */
 	finish_xmote(gl, target);
-	__gfs2_glock_queue_work(gl, 0);
+	gfs2_glock_queue_work(gl, 0);
 }
 
 /**
@@ -907,7 +902,7 @@ __acquires(&gl->gl_lockref.lock)
 	clear_bit(GLF_LOCK, &gl->gl_flags);
 	smp_mb__after_atomic();
 	gl->gl_lockref.count++;
-	__gfs2_glock_queue_work(gl, 0);
+	gfs2_glock_queue_work(gl, 0);
 	return;
 
 out_unlock:
@@ -1139,12 +1134,12 @@ static void glock_work_func(struct work_struct *work)
 		drop_refs--;
 		if (gl->gl_name.ln_type != LM_TYPE_INODE)
 			delay = 0;
-		__gfs2_glock_queue_work(gl, delay);
+		gfs2_glock_queue_work(gl, delay);
 	}
 
 	/*
 	 * Drop the remaining glock references manually here. (Mind that
-	 * __gfs2_glock_queue_work depends on the lockref spinlock begin held
+	 * gfs2_glock_queue_work depends on the lockref spinlock begin held
 	 * here as well.)
 	 */
 	gl->gl_lockref.count -= drop_refs;
@@ -1649,7 +1644,7 @@ int gfs2_glock_nq(struct gfs2_holder *gh)
 		     test_and_clear_bit(GLF_FROZEN, &gl->gl_flags))) {
 		set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
 		gl->gl_lockref.count++;
-		__gfs2_glock_queue_work(gl, 0);
+		gfs2_glock_queue_work(gl, 0);
 	}
 	run_queue(gl, 1);
 	spin_unlock(&gl->gl_lockref.lock);
@@ -1715,7 +1710,7 @@ static void __gfs2_glock_dq(struct gfs2_holder *gh)
 		    !test_bit(GLF_DEMOTE, &gl->gl_flags) &&
 		    gl->gl_name.ln_type == LM_TYPE_INODE)
 			delay = gl->gl_hold_time;
-		__gfs2_glock_queue_work(gl, delay);
+		gfs2_glock_queue_work(gl, delay);
 	}
 }
 
@@ -1939,7 +1934,7 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
 			delay = gl->gl_hold_time;
 	}
 	handle_callback(gl, state, delay, true);
-	__gfs2_glock_queue_work(gl, delay);
+	gfs2_glock_queue_work(gl, delay);
 	spin_unlock(&gl->gl_lockref.lock);
 }
 
@@ -1999,7 +1994,7 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
 
 	gl->gl_lockref.count++;
 	set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
-	__gfs2_glock_queue_work(gl, 0);
+	gfs2_glock_queue_work(gl, 0);
 	spin_unlock(&gl->gl_lockref.lock);
 }
 
@@ -2068,7 +2063,7 @@ __acquires(&lru_lock)
 		gl->gl_lockref.count++;
 		if (demote_ok(gl))
 			handle_callback(gl, LM_ST_UNLOCKED, 0, false);
-		__gfs2_glock_queue_work(gl, 0);
+		gfs2_glock_queue_work(gl, 0);
 		spin_unlock(&gl->gl_lockref.lock);
 		cond_resched_lock(&lru_lock);
 	}
@@ -2192,7 +2187,7 @@ static void thaw_glock(struct gfs2_glock *gl)
 
 	spin_lock(&gl->gl_lockref.lock);
 	set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
-	__gfs2_glock_queue_work(gl, 0);
+	gfs2_glock_queue_work(gl, 0);
 	spin_unlock(&gl->gl_lockref.lock);
 }
 
@@ -2211,7 +2206,7 @@ static void clear_glock(struct gfs2_glock *gl)
 		gl->gl_lockref.count++;
 		if (gl->gl_state != LM_ST_UNLOCKED)
 			handle_callback(gl, LM_ST_UNLOCKED, 0, false);
-		__gfs2_glock_queue_work(gl, 0);
+		gfs2_glock_queue_work(gl, 0);
 	}
 	spin_unlock(&gl->gl_lockref.lock);
 }
-- 
2.44.0


^ permalink raw reply related	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2024-04-16 10:11 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-04-11 13:52 [PATCH] gfs2: Fix do_xmote locking error Andreas Gruenbacher
2024-04-16 10:10 ` [PATCH 0/3] gfs2: do_xmote fixes Andreas Gruenbacher
2024-04-16 10:10   ` [PATCH 1/3] gfs2: finish_xmote cleanup Andreas Gruenbacher
2024-04-16 10:10   ` [PATCH 2/3] gfs2: do_xmote fixes Andreas Gruenbacher
2024-04-16 10:10   ` [PATCH 3/3] gfs2: Remove and replace gfs2_glock_queue_work Andreas Gruenbacher

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).