grub-devel.gnu.org archive mirror
 help / color / mirror / Atom feed
From: Mukesh Kumar Chaurasiya <mchauras@linux.ibm.com>
To: grub-devel@gnu.org
Cc: meghanaprakash@in.ibm.com, avnish@linux.vnet.ibm.com,
	brking@linux.vnet.ibm.com, mamatha4@linux.vnet.ibm.com,
	mchauras@linux.vnet.ibm.com,
	Mukesh Kumar Chaurasiya <mchauras@linux.ibm.com>,
	Diego Domingos <diegodo@linux.vnet.ibm.com>
Subject: [PATCH V5] ieee1275/ofdisk: retry on open and read failure
Date: Wed, 24 Jan 2024 11:42:30 +0530	[thread overview]
Message-ID: <20240124061229.116151-2-mchauras@linux.ibm.com> (raw)

Sometimes, when booting from a very busy SAN, the access to the
disk can fail and then GRUB will eventually drop to GRUB prompt.
This scenario is more frequent when deploying many machines at
the same time using the same SAN.
This patch aims to force the ofdisk module to retry the open or
read function for network disks excluding after it fails. We use
DEFAULT_RETRY_TIMEOUT, which is 15 seconds to specify the time it'll
retry to access the disk before it definitely fails. The timeout can be
changed by setting the environment variable ofdisk_retry_timeout.
If the environment variable fails to read, GRUB will consider the
default value of 15 seconds.

Signed-off-by: Diego Domingos <diegodo@linux.vnet.ibm.com>
Signed-off-by: Mukesh Kumar Chaurasiya <mchauras@linux.ibm.com>
---
 docs/grub.texi                   |  8 +++
 grub-core/disk/ieee1275/ofdisk.c | 91 ++++++++++++++++++++++++++++++--
 2 files changed, 96 insertions(+), 3 deletions(-)

diff --git a/docs/grub.texi b/docs/grub.texi
index 975e521d1..db0acb869 100644
--- a/docs/grub.texi
+++ b/docs/grub.texi
@@ -3306,6 +3306,7 @@ These variables have special meaning to GRUB.
 * net_default_ip::
 * net_default_mac::
 * net_default_server::
+* ofdisk_retry_timeout::
 * pager::
 * prefix::
 * pxe_blksize::
@@ -3728,6 +3729,13 @@ The default is the value of @samp{color_normal} (@pxref{color_normal}).
 @xref{Network}.
 
 
+@node ofdisk_retry_timeout
+@subsection ofdisk_retry_timeout
+
+The time in seconds till which the GRUB will retry to open or read a disk in
+case of failure to do so. This value defaults to 15 seconds.
+
+
 @node pager
 @subsection pager
 
diff --git a/grub-core/disk/ieee1275/ofdisk.c b/grub-core/disk/ieee1275/ofdisk.c
index c6cba0c8a..d90b9b70b 100644
--- a/grub-core/disk/ieee1275/ofdisk.c
+++ b/grub-core/disk/ieee1275/ofdisk.c
@@ -24,6 +24,9 @@
 #include <grub/ieee1275/ofdisk.h>
 #include <grub/i18n.h>
 #include <grub/time.h>
+#include <grub/env.h>
+
+#define RETRY_DEFAULT_TIMEOUT 15
 
 static char *last_devpath;
 static grub_ieee1275_ihandle_t last_ihandle;
@@ -452,7 +455,7 @@ compute_dev_path (const char *name)
 }
 
 static grub_err_t
-grub_ofdisk_open (const char *name, grub_disk_t disk)
+grub_ofdisk_open_real (const char *name, grub_disk_t disk)
 {
   grub_ieee1275_phandle_t dev;
   char *devpath;
@@ -525,6 +528,61 @@ grub_ofdisk_open (const char *name, grub_disk_t disk)
   return 0;
 }
 
+static grub_uint64_t
+grub_ofdisk_disk_timeout (grub_disk_t disk)
+{
+  grub_uint64_t retry = RETRY_DEFAULT_TIMEOUT;
+  const char *timeout = grub_env_get ("ofdisk_retry_timeout");
+  const char *timeout_end;
+
+  if (grub_strstr (disk->name, "fibre-channel") != NULL ||
+      grub_strstr (disk->name, "vfc-client") != NULL)
+    {
+      if (timeout == NULL)
+        {
+          return retry;
+        }
+      retry = grub_strtoul (timeout, &timeout_end, 10);
+      /* Ignore all errors and return default timeout */
+      if (grub_errno != GRUB_ERR_NONE ||
+          *timeout == '\0' ||
+          *timeout_end != '\0')
+        {
+          return RETRY_DEFAULT_TIMEOUT;
+        }
+    }
+  else
+    return 0;
+
+  return retry;
+}
+
+static grub_err_t
+grub_ofdisk_open (const char *name, grub_disk_t disk)
+{
+  grub_err_t err;
+  grub_uint64_t timeout = grub_get_time_ms () + (grub_ofdisk_disk_timeout (disk) * 1000);
+  grub_uint16_t inc = 0;
+
+  do
+    {
+      err = grub_ofdisk_open_real (name, disk);
+      if (err == GRUB_ERR_UNKNOWN_DEVICE)
+        {
+          grub_dprintf ("ofdisk", "Failed to open disk %s.\n", name);
+        }
+      if (grub_get_time_ms () >= timeout)
+        break;
+      grub_dprintf ("ofdisk", "Retry to open disk %s.\n", name);
+      /*
+       * Increase in wait time for subsequent requests
+       * Cur time is used as a random number here
+       */
+      grub_millisleep ((32 << ++inc) * (grub_get_time_ms () % 32));
+    } while (1);
+  return err;
+}
+
 static void
 grub_ofdisk_close (grub_disk_t disk)
 {
@@ -568,8 +626,8 @@ grub_ofdisk_prepare (grub_disk_t disk, grub_disk_addr_t sector)
 }
 
 static grub_err_t
-grub_ofdisk_read (grub_disk_t disk, grub_disk_addr_t sector,
-		  grub_size_t size, char *buf)
+grub_ofdisk_read_real (grub_disk_t disk, grub_disk_addr_t sector,
+                       grub_size_t size, char *buf)
 {
   grub_err_t err;
   grub_ssize_t actual;
@@ -587,6 +645,33 @@ grub_ofdisk_read (grub_disk_t disk, grub_disk_addr_t sector,
   return 0;
 }
 
+static grub_err_t
+grub_ofdisk_read (grub_disk_t disk, grub_disk_addr_t sector,
+                  grub_size_t size, char *buf)
+{
+  grub_err_t err;
+  grub_uint64_t timeout = grub_get_time_ms () + (grub_ofdisk_disk_timeout (disk) * 1000);
+  grub_uint16_t inc = 0;
+
+  do
+    {
+      err = grub_ofdisk_read_real (disk, sector, size, buf);
+      if (err == GRUB_ERR_UNKNOWN_DEVICE)
+        {
+          grub_dprintf ("ofdisk", "Failed to read disk %s.\n", (char*)disk->data);
+        }
+      if (grub_get_time_ms () >= timeout)
+        break;
+      grub_dprintf ("ofdisk", "Retry to read disk %s.\n", (char*)disk->data);
+      /*
+       * Increase in wait time for subsequent requests
+       * Cur time is used as a random number here
+       */
+      grub_millisleep ((32 << ++inc) * (grub_get_time_ms () % 32));
+    } while (1);
+  return err;
+}
+
 static grub_err_t
 grub_ofdisk_write (grub_disk_t disk, grub_disk_addr_t sector,
 		   grub_size_t size, const char *buf)
-- 
2.43.0


_______________________________________________
Grub-devel mailing list
Grub-devel@gnu.org
https://lists.gnu.org/mailman/listinfo/grub-devel

             reply	other threads:[~2024-01-24  6:15 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-01-24  6:12 Mukesh Kumar Chaurasiya [this message]
2024-01-25  5:05 ` [PATCH V5] ieee1275/ofdisk: retry on open and read failure Michael Chang via Grub-devel
2024-01-29  8:32   ` Mukesh Kumar Chaurasiya

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240124061229.116151-2-mchauras@linux.ibm.com \
    --to=mchauras@linux.ibm.com \
    --cc=avnish@linux.vnet.ibm.com \
    --cc=brking@linux.vnet.ibm.com \
    --cc=diegodo@linux.vnet.ibm.com \
    --cc=grub-devel@gnu.org \
    --cc=mamatha4@linux.vnet.ibm.com \
    --cc=mchauras@linux.vnet.ibm.com \
    --cc=meghanaprakash@in.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).