LKML Archive mirror
 help / color / mirror / Atom feed
From: Daniel Ferguson <danielf@os.amperecomputing.com>
To: shiju.jose@huawei.com
Cc: ira.weiny@intel.com, vishal.l.verma@intel.com,
	alison.schofield@intel.com, dave.jiang@intel.com,
	jonathan.cameron@huawei.com, dave@stgolabs.net,
	dan.j.williams@intel.com, linux-mm@kvack.org,
	linux-acpi@vger.kernel.org, linux-cxl@vger.kernel.org,
	linux-edac@vger.kernel.org, linux-kernel@vger.kernel.org,
	david@redhat.com, Vilas.Sridharan@amd.com, leo.duran@amd.com,
	Yazen.Ghannam@amd.com, rientjes@google.com, jiaqiyan@google.com,
	tony.luck@intel.com, Jon.Grimm@amd.com,
	dave.hansen@linux.intel.com, rafael@kernel.org, lenb@kernel.org,
	naoya.horiguchi@nec.com, james.morse@arm.com,
	jthoughton@google.com, somasundaram.a@hpe.com,
	erdemaktas@google.com, pgonda@google.com, duenwen@google.com,
	mike.malvestuto@intel.com, gthelen@google.com,
	wschwartz@amperecomputing.com, dferguson@amperecomputing.com,
	wbs@os.amperecomputing.com, nifan.cxl@gmail.com,
	tanxiaofei@huawei.com, prime.zeng@hisilicon.com,
	kangkang.shen@futurewei.com, wanghuiqiang@huawei.com,
	linuxarm@huawei.com
Subject: Re: [RFC PATCH v8 08/10] ACPI:RAS2: Add ACPI RAS2 driver
Date: Wed, 5 Jun 2024 14:32:55 -0700	[thread overview]
Message-ID: <fec6ba82-aef8-4ffe-a18b-20ac8e0a1a03@os.amperecomputing.com> (raw)
In-Reply-To: <20240419164720.1765-9-shiju.jose@huawei.com>

> +static int ras2_check_pcc_chan(struct ras2_pcc_subspace *pcc_subspace)
> +{
> +	struct acpi_ras2_shared_memory __iomem *generic_comm_base =pcc_subspace->pcc_comm_addr;
> +	ktime_t next_deadline = ktime_add(ktime_get(), pcc_subspace->deadline);
> +	u16 status;
> +
> +	while (!ktime_after(ktime_get(), next_deadline)) {
> +		/*
> +		 * As per ACPI spec, the PCC space will be initialized by
> +		 * platform and should have set the command completion bit when
> +		 * PCC can be used by OSPM
> +		 */
> +		status = readw_relaxed(&generic_comm_base->status);
> +		if (status & RAS2_PCC_CMD_ERROR)
> +			return -EIO;

We need to clear the error bit before reporting an error.
Maybe the error was transient, or specific to the last transaction.
So clearing it here, lets us try again later. Like This:
		if (status & RAS2_PCC_CMD_ERROR) {
			status &= ~RAS2_PCC_CMD_ERROR;
			writew_relaxed(status, &generic_comm_base->status);
			return -EIO;
		}


Also, we are thinking that using the "Set RAS Capability Status" as a
way to communicate RAS2 Scrub specific error conditions is appropriate.
If we agree on that idea, then we can additionally check the
set_capabilities_status and return an appropriate error..
for example, we can add a new error mapping function:
static int report_cap_error(u32 cap_status)
{
	switch (cap_status) {
		case 1:  /* Not Valid */
		case 2:  /* Not Supported */
			return -EPERM;
		case 3:  /* Busy */
			return -EBUSY;
		case 4:  /* FailedF */
		case 5:  /* Aborted */
		case 6:  /* Invalid Data */
			return -EINVAL;
		default: /* 0 or other, Success */
			return 0;
	}
}

and then instead, modify ras2_check_pcc_chan in this way:
		if (status & RAS2_PCC_CMD_ERROR) {
			cap_status = readw_relaxed(&generic_comm_base->set_capabilities_status);
			ret = report_cap_error(cap_status);

			status &= ~RAS2_PCC_CMD_ERROR;
			writew_relaxed(status, &generic_comm_base->status);
			return ret;
		}

> +		if (status & RAS2_PCC_CMD_COMPLETE)
> +			return 0;
> +		/*
> +		 * Reducing the bus traffic in case this loop takes longer than
> +		 * a few retries.
> +		 */
> +		msleep(10);
> +	}
> +
> +	return -EIO;
> +}
> +
> +/**
> + * ras2_send_pcc_cmd() - Send RAS2 command via PCC channel
> + * @ras2_ctx:	pointer to the ras2 context structure
> + * @cmd:	command to send
> + *
> + * Returns: 0 on success, an error otherwise
> + */
> +int ras2_send_pcc_cmd(struct ras2_scrub_ctx *ras2_ctx, u16 cmd)
> +{
> +	struct ras2_pcc_subspace *pcc_subspace = ras2_ctx->pcc_subspace;
> +	struct acpi_ras2_shared_memory *generic_comm_base = pcc_subspace->pcc_comm_addr;
> +	static ktime_t last_cmd_cmpl_time, last_mpar_reset;
> +	struct mbox_chan *pcc_channel;
> +	unsigned int time_delta;
> +	static int mpar_count;
> +	int ret;
> +
> +	guard(mutex)(&ras2_pcc_subspace_lock);
> +	ret = ras2_check_pcc_chan(pcc_subspace);
> +	if (ret)
> +		return ret;
> +	pcc_channel = pcc_subspace->pcc_chan->mchan;
> +
> +	/*
> +	 * Handle the Minimum Request Turnaround Time(MRTT)
> +	 * "The minimum amount of time that OSPM must wait after the completion
> +	 * of a command before issuing the next command, in microseconds"
> +	 */
> +	if (pcc_subspace->pcc_mrtt) {
> +		time_delta = ktime_us_delta(ktime_get(), last_cmd_cmpl_time);
> +		if (pcc_subspace->pcc_mrtt > time_delta)
> +			udelay(pcc_subspace->pcc_mrtt - time_delta);
> +	}
> +
> +	/*
> +	 * Handle the non-zero Maximum Periodic Access Rate(MPAR)
> +	 * "The maximum number of periodic requests that the subspace channel can
> +	 * support, reported in commands per minute. 0 indicates no limitation."
> +	 *
> +	 * This parameter should be ideally zero or large enough so that it can
> +	 * handle maximum number of requests that all the cores in the system can
> +	 * collectively generate. If it is not, we will follow the spec and just
> +	 * not send the request to the platform after hitting the MPAR limit in
> +	 * any 60s window
> +	 */
> +	if (pcc_subspace->pcc_mpar) {
> +		if (mpar_count == 0) {
> +			time_delta = ktime_ms_delta(ktime_get(), last_mpar_reset);
> +			if (time_delta < 60 * MSEC_PER_SEC) {
> +				dev_dbg(ras2_ctx->dev,
> +					"PCC cmd not sent due to MPAR limit");
> +				return -EIO;
> +			}
> +			last_mpar_reset = ktime_get();
> +			mpar_count = pcc_subspace->pcc_mpar;
> +		}
> +		mpar_count--;
> +	}
> +
> +	/* Write to the shared comm region. */
> +	writew_relaxed(cmd, &generic_comm_base->command);
> +
> +	/* Flip CMD COMPLETE bit */
> +	writew_relaxed(0, &generic_comm_base->status);
> +
> +	/* Ring doorbell */
> +	ret = mbox_send_message(pcc_channel, &cmd);
> +	if (ret < 0) {
> +		dev_err(ras2_ctx->dev,
> +			"Err sending PCC mbox message. cmd:%d, ret:%d\n",
> +			cmd, ret);
> +		return ret;
> +	}
> +
> +	/*
> +	 * If Minimum Request Turnaround Time is non-zero, we need
> +	 * to record the completion time of both READ and WRITE
> +	 * command for proper handling of MRTT, so we need to check
> +	 * for pcc_mrtt in addition to CMD_READ
> +	 */
> +	if (cmd == RAS2_PCC_CMD_EXEC || pcc_subspace->pcc_mrtt) {
> +		ret = ras2_check_pcc_chan(pcc_subspace);
> +		if (pcc_subspace->pcc_mrtt)
> +			last_cmd_cmpl_time = ktime_get();
> +	}
> +
> +	if (pcc_channel->mbox->txdone_irq)
> +		mbox_chan_txdone(pcc_channel, ret);
> +	else
> +		mbox_client_txdone(pcc_channel, ret);
> +
> +	return 0;


As of now, this code doesn't consider errors occurring after the
mbox_send_message.
Checking errors is, probably, necessary to take appropriate action - such as
reporting an error to the user.  Also, error or no, it is important to
call the txdone bits.

One simple solution that works is to modify the previous "return 0" to
look like:
	return ret >= 0 ? 0 : ret;

> +}
> +EXPORT_SYMBOL_GPL(ras2_send_pcc_cmd);




  reply	other threads:[~2024-06-05 21:33 UTC|newest]

Thread overview: 55+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-04-19 16:47 [RFC PATCH v8 00/10] ras: scrub: introduce subsystem + CXL/ACPI-RAS2 drivers shiju.jose
2024-04-19 16:47 ` [RFC PATCH v8 01/10] ras: scrub: Add scrub subsystem shiju.jose
2024-04-24 20:25   ` fan
2024-04-25 10:38     ` Shiju Jose
2024-04-25 10:15   ` Borislav Petkov
2024-04-25 18:11     ` Shiju Jose
2024-05-06 10:30       ` Borislav Petkov
2024-05-08 16:59         ` Shiju Jose
2024-05-08 17:20           ` Borislav Petkov
2024-05-08 17:44             ` Shiju Jose
2024-05-08 19:25               ` Borislav Petkov
2024-05-09  9:19                 ` Jonathan Cameron
2024-05-09 15:52                   ` Borislav Petkov
2024-05-09 20:03                     ` Borislav Petkov
2024-05-09 21:21                       ` Dan Williams
2024-05-09 21:51                         ` Borislav Petkov
2024-05-09 22:59                           ` Dan Williams
2024-05-10  9:25                             ` Borislav Petkov
2024-05-10 17:13                               ` Dan Williams
2024-05-11 10:17                                 ` Borislav Petkov
2024-05-17 11:15                                   ` Jonathan Cameron
2024-05-17 11:44                                     ` Jonathan Cameron
2024-05-21  8:06                                       ` Borislav Petkov
2024-05-22  9:40                                         ` Jonathan Cameron
2024-05-27  9:09                                           ` Borislav Petkov
2024-05-20 10:54                                   ` Shiju Jose
2024-05-20 11:58                                     ` Jonathan Cameron
2024-05-27  9:21                                       ` Borislav Petkov
2024-05-28  9:06                                         ` Jonathan Cameron
2024-06-06 16:05                                           ` Borislav Petkov
2024-05-10 13:31                     ` Jonathan Cameron
2024-05-09 21:47   ` Dan Williams
2024-05-10  9:03     ` Jonathan Cameron
2024-04-19 16:47 ` [RFC PATCH v8 02/10] cxl/mbox: Add GET_SUPPORTED_FEATURES mailbox command shiju.jose
2024-04-19 16:47 ` [RFC PATCH v8 03/10] cxl/mbox: Add GET_FEATURE " shiju.jose
2024-04-24 23:19   ` fan
2024-04-25 10:38     ` Shiju Jose
2024-04-19 16:47 ` [RFC PATCH v8 04/10] cxl/mbox: Add SET_FEATURE " shiju.jose
2024-04-25 17:26   ` fan
2024-04-19 16:47 ` [RFC PATCH v8 05/10] cxl/memscrub: Add CXL device patrol scrub control feature shiju.jose
2024-04-26 23:56   ` fan
2024-04-29 11:20     ` Shiju Jose
2024-04-29 12:21       ` Jonathan Cameron
2024-05-10  0:26   ` Dan Williams
2024-05-10 11:23     ` Jonathan Cameron
2024-04-19 16:47 ` [RFC PATCH v8 06/10] ACPICA: Add __free() based cleanup function for acpi_put_table shiju.jose
2024-04-19 18:06   ` Jonathan Cameron
2024-04-19 16:47 ` [RFC PATCH v8 07/10] platform: Add __free() based cleanup function for platform_device_put shiju.jose
2024-04-19 16:47 ` [RFC PATCH v8 08/10] ACPI:RAS2: Add ACPI RAS2 driver shiju.jose
2024-06-05 21:32   ` Daniel Ferguson [this message]
2024-04-19 16:47 ` [RFC PATCH v8 09/10] ras: scrub: Add scrub control attributes for ACPI RAS2 shiju.jose
2024-04-19 16:47 ` [RFC PATCH v8 10/10] ras: scrub: ACPI RAS2: Add memory ACPI RAS2 driver shiju.jose
2024-06-05 21:33   ` Daniel Ferguson
2024-06-07 15:46     ` Shiju Jose
2024-06-21 18:06       ` Daniel Ferguson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=fec6ba82-aef8-4ffe-a18b-20ac8e0a1a03@os.amperecomputing.com \
    --to=danielf@os.amperecomputing.com \
    --cc=Jon.Grimm@amd.com \
    --cc=Vilas.Sridharan@amd.com \
    --cc=Yazen.Ghannam@amd.com \
    --cc=alison.schofield@intel.com \
    --cc=dan.j.williams@intel.com \
    --cc=dave.hansen@linux.intel.com \
    --cc=dave.jiang@intel.com \
    --cc=dave@stgolabs.net \
    --cc=david@redhat.com \
    --cc=dferguson@amperecomputing.com \
    --cc=duenwen@google.com \
    --cc=erdemaktas@google.com \
    --cc=gthelen@google.com \
    --cc=ira.weiny@intel.com \
    --cc=james.morse@arm.com \
    --cc=jiaqiyan@google.com \
    --cc=jonathan.cameron@huawei.com \
    --cc=jthoughton@google.com \
    --cc=kangkang.shen@futurewei.com \
    --cc=lenb@kernel.org \
    --cc=leo.duran@amd.com \
    --cc=linux-acpi@vger.kernel.org \
    --cc=linux-cxl@vger.kernel.org \
    --cc=linux-edac@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linuxarm@huawei.com \
    --cc=mike.malvestuto@intel.com \
    --cc=naoya.horiguchi@nec.com \
    --cc=nifan.cxl@gmail.com \
    --cc=pgonda@google.com \
    --cc=prime.zeng@hisilicon.com \
    --cc=rafael@kernel.org \
    --cc=rientjes@google.com \
    --cc=shiju.jose@huawei.com \
    --cc=somasundaram.a@hpe.com \
    --cc=tanxiaofei@huawei.com \
    --cc=tony.luck@intel.com \
    --cc=vishal.l.verma@intel.com \
    --cc=wanghuiqiang@huawei.com \
    --cc=wbs@os.amperecomputing.com \
    --cc=wschwartz@amperecomputing.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).