[PATCH] habanalabs: fix MMU print message

All the mail mirrored from lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH] habanalabs: fix MMU print message
@ 2020-11-04 10:14 Oded Gabbay
  2020-11-04 10:14 ` [PATCH] habanalabs/gaudi: fetch PLL info from FW Oded Gabbay
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Oded Gabbay @ 2020-11-04 10:14 UTC (permalink / raw
  To: linux-kernel; +Cc: SW_Drivers, Moti Haimovski

From: Moti Haimovski <mhaimovski@habana.ai>

This commit fixes an incorrect error message

Signed-off-by: Moti Haimovski <mhaimovski@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/mmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/common/mmu.c b/drivers/misc/habanalabs/common/mmu.c
index b5058798aeb9..451148959431 100644
--- a/drivers/misc/habanalabs/common/mmu.c
+++ b/drivers/misc/habanalabs/common/mmu.c
@@ -216,7 +216,7 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size,
 		real_page_size = mmu_prop->page_size;
 	} else {
 		dev_err(hdev->dev,
-			"page size of %u is not %uKB aligned, can't unmap\n",
+			"page size of %u is not %uKB aligned, can't map\n",
 			page_size, mmu_prop->page_size >> 10);
 
 		return -EFAULT;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH] habanalabs/gaudi: fetch PLL info from FW
  2020-11-04 10:14 [PATCH] habanalabs: fix MMU print message Oded Gabbay
@ 2020-11-04 10:14 ` Oded Gabbay
  2020-11-04 10:14 ` [PATCH] habanalabs: refactor MMU to support dual residency MMU Oded Gabbay
  2020-11-04 10:14 ` [PATCH] habanalabs: Small refactoring of CS IOCTL handling Oded Gabbay
  2 siblings, 0 replies; 4+ messages in thread
From: Oded Gabbay @ 2020-11-04 10:14 UTC (permalink / raw
  To: linux-kernel; +Cc: SW_Drivers, Ofir Bitton

From: Ofir Bitton <obitton@habana.ai>

Once FW security is enabled there is no access to PLL registers,
need to read values from FW using a dedicated interface.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/firmware_if.c | 26 +++++++++++++
 drivers/misc/habanalabs/common/habanalabs.h  |  4 ++
 drivers/misc/habanalabs/gaudi/gaudi.c        | 41 +++++++++++++++-----
 3 files changed, 62 insertions(+), 9 deletions(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 8de6a8690b1b..d84a70ec0ce1 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -448,6 +448,32 @@ int hl_fw_cpucp_total_energy_get(struct hl_device *hdev, u64 *total_energy)
 	return rc;
 }
 
+int hl_fw_cpucp_pll_info_get(struct hl_device *hdev,
+		enum cpucp_pll_type_attributes pll_type,
+		enum cpucp_pll_reg_attributes pll_reg,
+		u32 *pll_info)
+{
+	struct cpucp_packet pkt;
+	long result;
+	int rc;
+
+	memset(&pkt, 0, sizeof(pkt));
+
+	pkt.ctl = cpu_to_le32(CPUCP_PACKET_PLL_REG_GET <<
+				CPUCP_PKT_CTL_OPCODE_SHIFT);
+	pkt.pll_type = __cpu_to_le16(pll_type);
+	pkt.pll_reg = __cpu_to_le16(pll_reg);
+
+	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+			HL_CPUCP_INFO_TIMEOUT_USEC, &result);
+	if (rc)
+		dev_err(hdev->dev, "Failed to read PLL info, error %d\n", rc);
+
+	*pll_info = result;
+
+	return rc;
+}
+
 static void fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg,
 		u32 cpu_security_boot_status_reg)
 {
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index fee68fc121d7..ce516e9e1ebe 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -2113,6 +2113,10 @@ int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
 		struct hl_info_pci_counters *counters);
 int hl_fw_cpucp_total_energy_get(struct hl_device *hdev,
 			u64 *total_energy);
+int hl_fw_cpucp_pll_info_get(struct hl_device *hdev,
+		enum cpucp_pll_type_attributes pll_type,
+		enum cpucp_pll_reg_attributes pll_reg,
+		u32 *pll_info);
 int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
 			u32 msg_to_cpu_reg, u32 cpu_msg_status_reg,
 			u32 cpu_security_boot_status_reg, u32 boot_err0_reg,
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 5df06c63ceb8..9e38ac6f7264 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -673,16 +673,33 @@ static int gaudi_early_fini(struct hl_device *hdev)
  * @hdev: pointer to hl_device structure
  *
  */
-static void gaudi_fetch_psoc_frequency(struct hl_device *hdev)
+static int gaudi_fetch_psoc_frequency(struct hl_device *hdev)
 {
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
-	u32 trace_freq = 0;
-	u32 pll_clk = 0;
-	u32 div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
-	u32 div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
-	u32 nr = RREG32(mmPSOC_CPU_PLL_NR);
-	u32 nf = RREG32(mmPSOC_CPU_PLL_NF);
-	u32 od = RREG32(mmPSOC_CPU_PLL_OD);
+	u32 trace_freq = 0, pll_clk = 0;
+	u32 div_fctr, div_sel, nr, nf, od;
+	int rc;
+
+	if (hdev->asic_prop.fw_security_disabled) {
+		div_fctr = RREG32(mmPSOC_CPU_PLL_DIV_FACTOR_2);
+		div_sel = RREG32(mmPSOC_CPU_PLL_DIV_SEL_2);
+		nr = RREG32(mmPSOC_CPU_PLL_NR);
+		nf = RREG32(mmPSOC_CPU_PLL_NF);
+		od = RREG32(mmPSOC_CPU_PLL_OD);
+	} else {
+		rc = hl_fw_cpucp_pll_info_get(hdev, cpucp_pll_cpu,
+				cpucp_pll_div_factor_reg, &div_fctr);
+		rc |= hl_fw_cpucp_pll_info_get(hdev, cpucp_pll_cpu,
+				cpucp_pll_div_sel_reg, &div_sel);
+		rc |= hl_fw_cpucp_pll_info_get(hdev, cpucp_pll_cpu,
+				cpucp_pll_nr_reg, &nr);
+		rc |= hl_fw_cpucp_pll_info_get(hdev, cpucp_pll_cpu,
+				cpucp_pll_nf_reg, &nf);
+		rc |= hl_fw_cpucp_pll_info_get(hdev, cpucp_pll_cpu,
+				cpucp_pll_od_reg, &od);
+		if (rc)
+			return rc;
+	}
 
 	if (div_sel == DIV_SEL_REF_CLK || div_sel == DIV_SEL_DIVIDED_REF) {
 		if (div_sel == DIV_SEL_REF_CLK)
@@ -706,6 +723,8 @@ static void gaudi_fetch_psoc_frequency(struct hl_device *hdev)
 	prop->psoc_pci_pll_nf = nf;
 	prop->psoc_pci_pll_od = od;
 	prop->psoc_pci_pll_div_factor = div_fctr;
+
+	return 0;
 }
 
 static int _gaudi_init_tpc_mem(struct hl_device *hdev,
@@ -1319,7 +1338,11 @@ static int gaudi_late_init(struct hl_device *hdev)
 
 	WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER);
 
-	gaudi_fetch_psoc_frequency(hdev);
+	rc = gaudi_fetch_psoc_frequency(hdev);
+	if (rc) {
+		dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
+		goto disable_pci_access;
+	}
 
 	rc = gaudi_mmu_clear_pgt_range(hdev);
 	if (rc) {
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH] habanalabs: refactor MMU to support dual residency MMU
  2020-11-04 10:14 [PATCH] habanalabs: fix MMU print message Oded Gabbay
  2020-11-04 10:14 ` [PATCH] habanalabs/gaudi: fetch PLL info from FW Oded Gabbay
@ 2020-11-04 10:14 ` Oded Gabbay
  2020-11-04 10:14 ` [PATCH] habanalabs: Small refactoring of CS IOCTL handling Oded Gabbay
  2 siblings, 0 replies; 4+ messages in thread
From: Oded Gabbay @ 2020-11-04 10:14 UTC (permalink / raw
  To: linux-kernel; +Cc: SW_Drivers, Moti Haimovski

From: Moti Haimovski <mhaimovski@habana.ai>

This commit refactors the MMU code to support PCI MMU page tables
residing on host and DCORE MMU residing on the device DRAM at the
same time.

This is needed for future devices as on GAUDI and GOYA we have
a single MMU where its page tables always reside on DRAM.

Signed-off-by: Moti Haimovski <mhaimovski@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/habanalabs.h |  57 ++++++++++-
 drivers/misc/habanalabs/common/mmu.c        | 105 ++++++++++++++------
 drivers/misc/habanalabs/common/mmu_v1.c     |  28 +++---
 3 files changed, 143 insertions(+), 47 deletions(-)

diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 54600443ebc7..fee68fc121d7 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -61,6 +61,18 @@
 /* MMU */
 #define MMU_HASH_TABLE_BITS		7 /* 1 << 7 buckets */
 
+/**
+ * enum hl_mmu_page_table_locaion - mmu page table location
+ * @MMU_DR_PGT: page-table is located on device DRAM.
+ * @MMU_HR_PGT: page-table is located on host memory.
+ * @MMU_NUM_PGT_LOCATIONS: number of page-table locations currently supported.
+ */
+enum hl_mmu_page_table_location {
+	MMU_DR_PGT = 0,		/* device-dram-resident MMU PGT */
+	MMU_HR_PGT,		/* host resident MMU PGT */
+	MMU_NUM_PGT_LOCATIONS	/* num of PGT locations */
+};
+
 /*
  * HL_RSVD_SOBS 'sync stream' reserved sync objects per QMAN stream
  * HL_RSVD_MONS 'sync stream' reserved monitors per QMAN stream
@@ -303,6 +315,8 @@ enum hl_device_hw_state {
  * @hop5_mask: mask to get the PTE address in hop 5.
  * @page_size: default page size used to allocate memory.
  * @num_hops: The amount of hops supported by the translation table.
+ * @host_resident: Should the MMU page table reside in host memory or in the
+ *                 device DRAM.
  */
 struct hl_mmu_properties {
 	u64	start_addr;
@@ -321,6 +335,7 @@ struct hl_mmu_properties {
 	u64	hop5_mask;
 	u32	page_size;
 	u32	num_hops;
+	u8	host_resident;
 };
 
 /**
@@ -1572,17 +1587,51 @@ struct hl_device_idle_busy_ts {
 	ktime_t				busy_to_idle_ts;
 };
 
+/**
+ * struct hr_mmu_hop_addrs - used for holding per-device host-resident mmu hop
+ * information.
+ * @virt_addr: the virtual address of the hop.
+ * @phys-addr: the physical address of the hop (used by the device-mmu).
+ * @shadow_addr: The shadow of the hop used by the driver for walking the hops.
+ */
+struct hr_mmu_hop_addrs {
+	u64 virt_addr;
+	u64 phys_addr;
+	u64 shadow_addr;
+};
 
 /**
- * struct hl_mmu_priv - used for holding per-device mmu internal information.
+ * struct hl_mmu_hr_pgt_priv - used for holding per-device mmu host-resident
+ * page-table internal information.
  * @mmu_pgt_pool: pool of page tables used by MMU for allocating hops.
  * @mmu_shadow_hop0: shadow array of hop0 tables.
  */
-struct hl_mmu_priv {
+struct hl_mmu_hr_priv {
+	struct gen_pool *mmu_pgt_pool;
+	struct hr_mmu_hop_addrs *mmu_shadow_hop0;
+};
+
+/**
+ * struct hl_mmu_dr_pgt_priv - used for holding per-device mmu device-resident
+ * page-table internal information.
+ * @mmu_pgt_pool: pool of page tables used by MMU for allocating hops.
+ * @mmu_shadow_hop0: shadow array of hop0 tables.
+ */
+struct hl_mmu_dr_priv {
 	struct gen_pool *mmu_pgt_pool;
 	void *mmu_shadow_hop0;
 };
 
+/**
+ * struct hl_mmu_priv - used for holding per-device mmu internal information.
+ * @dr: information on the device-resident MMU, when exists.
+ * @hr: information on the host-resident MMU, when exists.
+ */
+struct hl_mmu_priv {
+	struct hl_mmu_dr_priv dr;
+	struct hl_mmu_hr_priv hr;
+};
+
 /**
  * struct hl_mmu_funcs - Device related MMU functions.
  * @init: initialize the MMU module.
@@ -1779,7 +1828,7 @@ struct hl_device {
 	struct hl_cs_counters_atomic	aggregated_cs_counters;
 
 	struct hl_mmu_priv		mmu_priv;
-	struct hl_mmu_funcs		mmu_func;
+	struct hl_mmu_funcs		mmu_func[MMU_NUM_PGT_LOCATIONS];
 
 	atomic64_t			dram_used_mem;
 	u64				timeout_jiffies;
@@ -2042,7 +2091,7 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
 void hl_mmu_swap_out(struct hl_ctx *ctx);
 void hl_mmu_swap_in(struct hl_ctx *ctx);
 int hl_mmu_if_set_funcs(struct hl_device *hdev);
-void hl_mmu_v1_set_funcs(struct hl_device *hdev);
+void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu);
 
 int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name,
 				void __iomem *dst, u32 src_offset, u32 size);
diff --git a/drivers/misc/habanalabs/common/mmu.c b/drivers/misc/habanalabs/common/mmu.c
index 451148959431..6f535c81478d 100644
--- a/drivers/misc/habanalabs/common/mmu.c
+++ b/drivers/misc/habanalabs/common/mmu.c
@@ -22,18 +22,25 @@ static bool is_dram_va(struct hl_device *hdev, u64 virt_addr)
  * hl_mmu_init() - initialize the MMU module.
  * @hdev: habanalabs device structure.
  *
- * This function does the following:
- * - Create a pool of pages for pgt_infos.
- * - Create a shadow table for pgt
- *
  * Return: 0 for success, non-zero for failure.
  */
 int hl_mmu_init(struct hl_device *hdev)
 {
-	if (hdev->mmu_enable)
-		return hdev->mmu_func.init(hdev);
+	int rc = -EOPNOTSUPP;
 
-	return 0;
+	if (!hdev->mmu_enable)
+		return 0;
+
+	if (hdev->mmu_func[MMU_DR_PGT].init != NULL) {
+		rc = hdev->mmu_func[MMU_DR_PGT].init(hdev);
+		if (rc)
+			return rc;
+	}
+
+	if (hdev->mmu_func[MMU_HR_PGT].init != NULL)
+		rc = hdev->mmu_func[MMU_HR_PGT].init(hdev);
+
+	return rc;
 }
 
 /**
@@ -48,8 +55,14 @@ int hl_mmu_init(struct hl_device *hdev)
  */
 void hl_mmu_fini(struct hl_device *hdev)
 {
-	if (hdev->mmu_enable)
-		hdev->mmu_func.fini(hdev);
+	if (!hdev->mmu_enable)
+		return;
+
+	if (hdev->mmu_func[MMU_DR_PGT].fini != NULL)
+		hdev->mmu_func[MMU_DR_PGT].fini(hdev);
+
+	if (hdev->mmu_func[MMU_HR_PGT].fini != NULL)
+		hdev->mmu_func[MMU_HR_PGT].fini(hdev);
 }
 
 /**
@@ -63,11 +76,21 @@ void hl_mmu_fini(struct hl_device *hdev)
 int hl_mmu_ctx_init(struct hl_ctx *ctx)
 {
 	struct hl_device *hdev = ctx->hdev;
+	int rc = -EOPNOTSUPP;
 
-	if (hdev->mmu_enable)
-		return hdev->mmu_func.ctx_init(ctx);
+	if (!hdev->mmu_enable)
+		return 0;
 
-	return 0;
+	if (hdev->mmu_func[MMU_DR_PGT].ctx_init != NULL) {
+		rc = hdev->mmu_func[MMU_DR_PGT].ctx_init(ctx);
+		if (rc)
+			return rc;
+	}
+
+	if (hdev->mmu_func[MMU_HR_PGT].ctx_init != NULL)
+		rc = hdev->mmu_func[MMU_HR_PGT].ctx_init(ctx);
+
+	return rc;
 }
 
 /*
@@ -84,8 +107,14 @@ void hl_mmu_ctx_fini(struct hl_ctx *ctx)
 {
 	struct hl_device *hdev = ctx->hdev;
 
-	if (hdev->mmu_enable)
-		hdev->mmu_func.ctx_fini(ctx);
+	if (!hdev->mmu_enable)
+		return;
+
+	if (hdev->mmu_func[MMU_DR_PGT].ctx_fini != NULL)
+		hdev->mmu_func[MMU_DR_PGT].ctx_fini(ctx);
+
+	if (hdev->mmu_func[MMU_HR_PGT].ctx_fini != NULL)
+		hdev->mmu_func[MMU_HR_PGT].ctx_fini(ctx);
 }
 
 /*
@@ -117,7 +146,7 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
 	struct hl_mmu_properties *mmu_prop;
 	u64 real_virt_addr;
 	u32 real_page_size, npages;
-	int i, rc = 0;
+	int i, rc = 0, pgt_residency;
 	bool is_dram_addr;
 
 	if (!hdev->mmu_enable)
@@ -132,6 +161,8 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
 	else
 		mmu_prop = &prop->pmmu;
 
+	pgt_residency = mmu_prop->host_resident ? MMU_HR_PGT : MMU_DR_PGT;
+
 	/*
 	 * The H/W handles mapping of specific page sizes. Hence if the page
 	 * size is bigger, we break it to sub-pages and unmap them separately.
@@ -150,7 +181,8 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
 	real_virt_addr = virt_addr;
 
 	for (i = 0 ; i < npages ; i++) {
-		rc = hdev->mmu_func.unmap(ctx, real_virt_addr, is_dram_addr);
+		rc = hdev->mmu_func[pgt_residency].unmap(ctx,
+						real_virt_addr, is_dram_addr);
 		if (rc)
 			break;
 
@@ -158,7 +190,7 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
 	}
 
 	if (flush_pte)
-		hdev->mmu_func.flush(ctx);
+		hdev->mmu_func[pgt_residency].flush(ctx);
 
 	return rc;
 }
@@ -193,9 +225,10 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size,
 	struct hl_mmu_properties *mmu_prop;
 	u64 real_virt_addr, real_phys_addr;
 	u32 real_page_size, npages;
-	int i, rc, mapped_cnt = 0;
+	int i, rc, pgt_residency, mapped_cnt = 0;
 	bool is_dram_addr;
 
+
 	if (!hdev->mmu_enable)
 		return 0;
 
@@ -208,6 +241,8 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size,
 	else
 		mmu_prop = &prop->pmmu;
 
+	pgt_residency = mmu_prop->host_resident ? MMU_HR_PGT : MMU_DR_PGT;
+
 	/*
 	 * The H/W handles mapping of specific page sizes. Hence if the page
 	 * size is bigger, we break it to sub-pages and map them separately.
@@ -231,8 +266,9 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size,
 	real_phys_addr = phys_addr;
 
 	for (i = 0 ; i < npages ; i++) {
-		rc = hdev->mmu_func.map(ctx, real_virt_addr, real_phys_addr,
-				real_page_size, is_dram_addr);
+		rc = hdev->mmu_func[pgt_residency].map(ctx,
+						real_virt_addr, real_phys_addr,
+						real_page_size, is_dram_addr);
 		if (rc)
 			goto err;
 
@@ -242,21 +278,22 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size,
 	}
 
 	if (flush_pte)
-		hdev->mmu_func.flush(ctx);
+		hdev->mmu_func[pgt_residency].flush(ctx);
 
 	return 0;
 
 err:
 	real_virt_addr = virt_addr;
 	for (i = 0 ; i < mapped_cnt ; i++) {
-		if (hdev->mmu_func.unmap(ctx, real_virt_addr, is_dram_addr))
+		if (hdev->mmu_func[pgt_residency].unmap(ctx,
+						real_virt_addr, is_dram_addr))
 			dev_warn_ratelimited(hdev->dev,
 				"failed to unmap va: 0x%llx\n", real_virt_addr);
 
 		real_virt_addr += real_page_size;
 	}
 
-	hdev->mmu_func.flush(ctx);
+	hdev->mmu_func[pgt_residency].flush(ctx);
 
 	return rc;
 }
@@ -271,8 +308,14 @@ void hl_mmu_swap_out(struct hl_ctx *ctx)
 {
 	struct hl_device *hdev = ctx->hdev;
 
-	if (hdev->mmu_enable)
-		hdev->mmu_func.swap_out(ctx);
+	if (!hdev->mmu_enable)
+		return;
+
+	if (hdev->mmu_func[MMU_DR_PGT].swap_out != NULL)
+		hdev->mmu_func[MMU_DR_PGT].swap_out(ctx);
+
+	if (hdev->mmu_func[MMU_HR_PGT].swap_out != NULL)
+		hdev->mmu_func[MMU_HR_PGT].swap_out(ctx);
 }
 
 /*
@@ -285,8 +328,14 @@ void hl_mmu_swap_in(struct hl_ctx *ctx)
 {
 	struct hl_device *hdev = ctx->hdev;
 
-	if (hdev->mmu_enable)
-		hdev->mmu_func.swap_in(ctx);
+	if (!hdev->mmu_enable)
+		return;
+
+	if (hdev->mmu_func[MMU_DR_PGT].swap_in != NULL)
+		hdev->mmu_func[MMU_DR_PGT].swap_in(ctx);
+
+	if (hdev->mmu_func[MMU_HR_PGT].swap_in != NULL)
+		hdev->mmu_func[MMU_HR_PGT].swap_in(ctx);
 }
 
 int hl_mmu_if_set_funcs(struct hl_device *hdev)
@@ -297,7 +346,7 @@ int hl_mmu_if_set_funcs(struct hl_device *hdev)
 	switch (hdev->asic_type) {
 	case ASIC_GOYA:
 	case ASIC_GAUDI:
-		hl_mmu_v1_set_funcs(hdev);
+		hl_mmu_v1_set_funcs(hdev, &hdev->mmu_func[MMU_DR_PGT]);
 		break;
 	default:
 		dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
diff --git a/drivers/misc/habanalabs/common/mmu_v1.c b/drivers/misc/habanalabs/common/mmu_v1.c
index 8d1eb5265419..ec7e8a3c37b8 100644
--- a/drivers/misc/habanalabs/common/mmu_v1.c
+++ b/drivers/misc/habanalabs/common/mmu_v1.c
@@ -29,7 +29,7 @@ static void _free_hop(struct hl_ctx *ctx, struct pgt_info *pgt_info)
 {
 	struct hl_device *hdev = ctx->hdev;
 
-	gen_pool_free(hdev->mmu_priv.mmu_pgt_pool, pgt_info->phys_addr,
+	gen_pool_free(hdev->mmu_priv.dr.mmu_pgt_pool, pgt_info->phys_addr,
 			hdev->asic_prop.mmu_hop_table_size);
 	hash_del(&pgt_info->node);
 	kfree((u64 *) (uintptr_t) pgt_info->shadow_addr);
@@ -54,7 +54,7 @@ static u64 alloc_hop(struct hl_ctx *ctx)
 	if (!pgt_info)
 		return ULLONG_MAX;
 
-	phys_addr = (u64) gen_pool_alloc(hdev->mmu_priv.mmu_pgt_pool,
+	phys_addr = (u64) gen_pool_alloc(hdev->mmu_priv.dr.mmu_pgt_pool,
 					prop->mmu_hop_table_size);
 	if (!phys_addr) {
 		dev_err(hdev->dev, "failed to allocate page\n");
@@ -75,7 +75,7 @@ static u64 alloc_hop(struct hl_ctx *ctx)
 	return shadow_addr;
 
 shadow_err:
-	gen_pool_free(hdev->mmu_priv.mmu_pgt_pool, phys_addr,
+	gen_pool_free(hdev->mmu_priv.dr.mmu_pgt_pool, phys_addr,
 			prop->mmu_hop_table_size);
 pool_add_err:
 	kfree(pgt_info);
@@ -91,7 +91,7 @@ static inline u64 get_phys_hop0_addr(struct hl_ctx *ctx)
 
 static inline u64 get_hop0_addr(struct hl_ctx *ctx)
 {
-	return (u64) (uintptr_t) ctx->hdev->mmu_priv.mmu_shadow_hop0 +
+	return (u64) (uintptr_t) ctx->hdev->mmu_priv.dr.mmu_shadow_hop0 +
 			(ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
 }
 
@@ -419,15 +419,15 @@ static int hl_mmu_v1_init(struct hl_device *hdev)
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
 	int rc;
 
-	hdev->mmu_priv.mmu_pgt_pool =
+	hdev->mmu_priv.dr.mmu_pgt_pool =
 			gen_pool_create(__ffs(prop->mmu_hop_table_size), -1);
 
-	if (!hdev->mmu_priv.mmu_pgt_pool) {
+	if (!hdev->mmu_priv.dr.mmu_pgt_pool) {
 		dev_err(hdev->dev, "Failed to create page gen pool\n");
 		return -ENOMEM;
 	}
 
-	rc = gen_pool_add(hdev->mmu_priv.mmu_pgt_pool, prop->mmu_pgt_addr +
+	rc = gen_pool_add(hdev->mmu_priv.dr.mmu_pgt_pool, prop->mmu_pgt_addr +
 			prop->mmu_hop0_tables_total_size,
 			prop->mmu_pgt_size - prop->mmu_hop0_tables_total_size,
 			-1);
@@ -436,10 +436,10 @@ static int hl_mmu_v1_init(struct hl_device *hdev)
 		goto err_pool_add;
 	}
 
-	hdev->mmu_priv.mmu_shadow_hop0 = kvmalloc_array(prop->max_asid,
+	hdev->mmu_priv.dr.mmu_shadow_hop0 = kvmalloc_array(prop->max_asid,
 						prop->mmu_hop_table_size,
 						GFP_KERNEL | __GFP_ZERO);
-	if (ZERO_OR_NULL_PTR(hdev->mmu_priv.mmu_shadow_hop0)) {
+	if (ZERO_OR_NULL_PTR(hdev->mmu_priv.dr.mmu_shadow_hop0)) {
 		rc = -ENOMEM;
 		goto err_pool_add;
 	}
@@ -449,7 +449,7 @@ static int hl_mmu_v1_init(struct hl_device *hdev)
 	return 0;
 
 err_pool_add:
-	gen_pool_destroy(hdev->mmu_priv.mmu_pgt_pool);
+	gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool);
 
 	return rc;
 }
@@ -468,8 +468,8 @@ static void hl_mmu_v1_fini(struct hl_device *hdev)
 {
 	/* MMU H/W fini was already done in device hw_fini() */
 
-	kvfree(hdev->mmu_priv.mmu_shadow_hop0);
-	gen_pool_destroy(hdev->mmu_priv.mmu_pgt_pool);
+	kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0);
+	gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool);
 }
 
 /**
@@ -847,10 +847,8 @@ static void hl_mmu_v1_swap_in(struct hl_ctx *ctx)
  *
  * @hdev: pointer to the device structure
  */
-void hl_mmu_v1_set_funcs(struct hl_device *hdev)
+void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu)
 {
-	struct hl_mmu_funcs *mmu = &hdev->mmu_func;
-
 	mmu->init = hl_mmu_v1_init;
 	mmu->fini = hl_mmu_v1_fini;
 	mmu->ctx_init = hl_mmu_v1_ctx_init;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH] habanalabs: Small refactoring of CS IOCTL handling
  2020-11-04 10:14 [PATCH] habanalabs: fix MMU print message Oded Gabbay
  2020-11-04 10:14 ` [PATCH] habanalabs/gaudi: fetch PLL info from FW Oded Gabbay
  2020-11-04 10:14 ` [PATCH] habanalabs: refactor MMU to support dual residency MMU Oded Gabbay
@ 2020-11-04 10:14 ` Oded Gabbay
  2 siblings, 0 replies; 4+ messages in thread
From: Oded Gabbay @ 2020-11-04 10:14 UTC (permalink / raw
  To: linux-kernel; +Cc: SW_Drivers, Tomer Tayar

From: Tomer Tayar <ttayar@habana.ai>

Refactor the CS IOCTL handling by gathering common code into
sub-functions, in order to ease future additions of new CS types.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 .../habanalabs/common/command_submission.c    | 419 ++++++++++--------
 1 file changed, 224 insertions(+), 195 deletions(-)

diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index e9529f3efc1b..3e6f4e5ef7ec 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -11,8 +11,6 @@
 #include <linux/uaccess.h>
 #include <linux/slab.h>
 
-#define HL_CS_FLAGS_SIG_WAIT	(HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT)
-
 static void job_wq_completion(struct work_struct *work);
 static long _hl_cs_wait_ioctl(struct hl_device *hdev,
 		struct hl_ctx *ctx, u64 timeout_us, u64 seq);
@@ -660,44 +658,114 @@ struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
 	return job;
 }
 
-static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
-				u32 num_chunks, u64 *cs_seq)
+static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags)
+{
+	if (cs_type_flags & HL_CS_FLAGS_SIGNAL)
+		return CS_TYPE_SIGNAL;
+	else if (cs_type_flags & HL_CS_FLAGS_WAIT)
+		return CS_TYPE_WAIT;
+	else if (cs_type_flags & HL_CS_FLAGS_COLLECTIVE_WAIT)
+		return CS_TYPE_COLLECTIVE_WAIT;
+	else
+		return CS_TYPE_DEFAULT;
+}
+
+static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
 {
 	struct hl_device *hdev = hpriv->hdev;
-	struct hl_cs_chunk *cs_chunk_array;
-	struct hl_cs_counters_atomic *cntr;
-	struct hl_cs_job *job;
-	struct hl_cs *cs;
-	struct hl_cb *cb;
-	bool int_queues_only = true;
-	u32 size_to_copy;
-	int rc, i;
+	struct hl_ctx *ctx = hpriv->ctx;
+	u32 cs_type_flags, num_chunks;
+	enum hl_cs_type cs_type;
 
-	cntr = &hdev->aggregated_cs_counters;
-	*cs_seq = ULLONG_MAX;
+	if (hl_device_disabled_or_in_reset(hdev)) {
+		dev_warn_ratelimited(hdev->dev,
+			"Device is %s. Can't submit new CS\n",
+			atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
+		return -EBUSY;
+	}
+
+	cs_type_flags = args->in.cs_flags & ~HL_CS_FLAGS_FORCE_RESTORE;
+
+	if (unlikely(cs_type_flags && !is_power_of_2(cs_type_flags))) {
+		dev_err(hdev->dev,
+			"CS type flags are mutually exclusive, context %d\n",
+			ctx->asid);
+		return -EINVAL;
+	}
+
+	cs_type = hl_cs_get_cs_type(cs_type_flags);
+	num_chunks = args->in.num_chunks_execute;
+
+	if (unlikely((cs_type != CS_TYPE_DEFAULT) &&
+					!hdev->supports_sync_stream)) {
+		dev_err(hdev->dev, "Sync stream CS is not supported\n");
+		return -EINVAL;
+	}
+
+	if (cs_type == CS_TYPE_DEFAULT) {
+		if (!num_chunks) {
+			dev_err(hdev->dev,
+				"Got execute CS with 0 chunks, context %d\n",
+				ctx->asid);
+			return -EINVAL;
+		}
+	} else if (num_chunks != 1) {
+		dev_err(hdev->dev,
+			"Sync stream CS mandates one chunk only, context %d\n",
+			ctx->asid);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int hl_cs_copy_chunk_array(struct hl_device *hdev,
+					struct hl_cs_chunk **cs_chunk_array,
+					void __user *chunks, u32 num_chunks)
+{
+	u32 size_to_copy;
 
 	if (num_chunks > HL_MAX_JOBS_PER_CS) {
 		dev_err(hdev->dev,
 			"Number of chunks can NOT be larger than %d\n",
 			HL_MAX_JOBS_PER_CS);
-		rc = -EINVAL;
-		goto out;
+		return -EINVAL;
 	}
 
-	cs_chunk_array = kmalloc_array(num_chunks, sizeof(*cs_chunk_array),
+	*cs_chunk_array = kmalloc_array(num_chunks, sizeof(**cs_chunk_array),
 					GFP_ATOMIC);
-	if (!cs_chunk_array) {
-		rc = -ENOMEM;
-		goto out;
-	}
+	if (!*cs_chunk_array)
+		return -ENOMEM;
 
 	size_to_copy = num_chunks * sizeof(struct hl_cs_chunk);
-	if (copy_from_user(cs_chunk_array, chunks, size_to_copy)) {
+	if (copy_from_user(*cs_chunk_array, chunks, size_to_copy)) {
 		dev_err(hdev->dev, "Failed to copy cs chunk array from user\n");
-		rc = -EFAULT;
-		goto free_cs_chunk_array;
+		kfree(*cs_chunk_array);
+		return -EFAULT;
 	}
 
+	return 0;
+}
+
+static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
+				u32 num_chunks, u64 *cs_seq)
+{
+	bool int_queues_only = true;
+	struct hl_device *hdev = hpriv->hdev;
+	struct hl_cs_chunk *cs_chunk_array;
+	struct hl_cs_counters_atomic *cntr;
+	struct hl_cs_job *job;
+	struct hl_cs *cs;
+	struct hl_cb *cb;
+	int rc, i;
+
+	cntr = &hdev->aggregated_cs_counters;
+	*cs_seq = ULLONG_MAX;
+
+	rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks);
+	if (rc)
+		goto out;
+
 	/* increment refcnt for context */
 	hl_ctx_get(hdev, hpriv->ctx);
 
@@ -828,6 +896,108 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
 	return rc;
 }
 
+static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
+				u64 *cs_seq)
+{
+	struct hl_device *hdev = hpriv->hdev;
+	struct hl_ctx *ctx = hpriv->ctx;
+	bool need_soft_reset = false;
+	int rc = 0, do_ctx_switch;
+	void __user *chunks;
+	u32 num_chunks, tmp;
+	long ret;
+
+	do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
+
+	if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) {
+		mutex_lock(&hpriv->restore_phase_mutex);
+
+		if (do_ctx_switch) {
+			rc = hdev->asic_funcs->context_switch(hdev, ctx->asid);
+			if (rc) {
+				dev_err_ratelimited(hdev->dev,
+					"Failed to switch to context %d, rejecting CS! %d\n",
+					ctx->asid, rc);
+				/*
+				 * If we timedout, or if the device is not IDLE
+				 * while we want to do context-switch (-EBUSY),
+				 * we need to soft-reset because QMAN is
+				 * probably stuck. However, we can't call to
+				 * reset here directly because of deadlock, so
+				 * need to do it at the very end of this
+				 * function
+				 */
+				if ((rc == -ETIMEDOUT) || (rc == -EBUSY))
+					need_soft_reset = true;
+				mutex_unlock(&hpriv->restore_phase_mutex);
+				goto out;
+			}
+		}
+
+		hdev->asic_funcs->restore_phase_topology(hdev);
+
+		chunks = (void __user *) (uintptr_t) args->in.chunks_restore;
+		num_chunks = args->in.num_chunks_restore;
+
+		if (!num_chunks) {
+			dev_dbg(hdev->dev,
+				"Need to run restore phase but restore CS is empty\n");
+			rc = 0;
+		} else {
+			rc = cs_ioctl_default(hpriv, chunks, num_chunks,
+						cs_seq);
+		}
+
+		mutex_unlock(&hpriv->restore_phase_mutex);
+
+		if (rc) {
+			dev_err(hdev->dev,
+				"Failed to submit restore CS for context %d (%d)\n",
+				ctx->asid, rc);
+			goto out;
+		}
+
+		/* Need to wait for restore completion before execution phase */
+		if (num_chunks) {
+wait_again:
+			ret = _hl_cs_wait_ioctl(hdev, ctx,
+					jiffies_to_usecs(hdev->timeout_jiffies),
+					*cs_seq);
+			if (ret <= 0) {
+				if (ret == -ERESTARTSYS) {
+					usleep_range(100, 200);
+					goto wait_again;
+				}
+
+				dev_err(hdev->dev,
+					"Restore CS for context %d failed to complete %ld\n",
+					ctx->asid, ret);
+				rc = -ENOEXEC;
+				goto out;
+			}
+		}
+
+		ctx->thread_ctx_switch_wait_token = 1;
+
+	} else if (!ctx->thread_ctx_switch_wait_token) {
+		rc = hl_poll_timeout_memory(hdev,
+			&ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1),
+			100, jiffies_to_usecs(hdev->timeout_jiffies), false);
+
+		if (rc == -ETIMEDOUT) {
+			dev_err(hdev->dev,
+				"context switch phase timeout (%d)\n", tmp);
+			goto out;
+		}
+	}
+
+out:
+	if ((rc == -ETIMEDOUT || rc == -EBUSY) && (need_soft_reset))
+		hl_device_reset(hdev, false, false);
+
+	return rc;
+}
+
 static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,
 		struct hl_cs_chunk *chunk, u64 *signal_seq)
 {
@@ -935,43 +1105,25 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 				void __user *chunks, u32 num_chunks,
 				u64 *cs_seq)
 {
-	struct hl_device *hdev = hpriv->hdev;
-	struct hl_ctx *ctx = hpriv->ctx;
 	struct hl_cs_chunk *cs_chunk_array, *chunk;
 	struct hw_queue_properties *hw_queue_prop;
-	struct hl_fence *sig_fence = NULL;
-	struct hl_cs_counters_atomic *cntr;
+	struct hl_device *hdev = hpriv->hdev;
 	struct hl_cs_compl *sig_waitcs_cmpl;
-	struct hl_cs *cs;
+	u32 q_idx, collective_engine_id = 0;
+	struct hl_cs_counters_atomic *cntr;
+	struct hl_fence *sig_fence = NULL;
+	struct hl_ctx *ctx = hpriv->ctx;
 	enum hl_queue_type q_type;
-	u32 size_to_copy, q_idx, collective_engine_id = 0;
+	struct hl_cs *cs;
 	u64 signal_seq;
 	int rc;
 
 	*cs_seq = ULLONG_MAX;
 	cntr = &hdev->aggregated_cs_counters;
 
-	if (num_chunks > HL_MAX_JOBS_PER_CS) {
-		dev_err(hdev->dev,
-			"Number of chunks can NOT be larger than %d\n",
-			HL_MAX_JOBS_PER_CS);
-		rc = -EINVAL;
-		goto out;
-	}
-
-	cs_chunk_array = kmalloc_array(num_chunks, sizeof(*cs_chunk_array),
-					GFP_ATOMIC);
-	if (!cs_chunk_array) {
-		rc = -ENOMEM;
+	rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks);
+	if (rc)
 		goto out;
-	}
-
-	size_to_copy = num_chunks * sizeof(struct hl_cs_chunk);
-	if (copy_from_user(cs_chunk_array, chunks, size_to_copy)) {
-		dev_err(hdev->dev, "Failed to copy cs chunk array from user\n");
-		rc = -EFAULT;
-		goto free_cs_chunk_array;
-	}
 
 	/* currently it is guaranteed to have only one chunk */
 	chunk = &cs_chunk_array[0];
@@ -1108,158 +1260,38 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 
 int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
 {
-	struct hl_device *hdev = hpriv->hdev;
 	union hl_cs_args *args = data;
-	struct hl_ctx *ctx = hpriv->ctx;
-	void __user *chunks_execute, *chunks_restore;
 	enum hl_cs_type cs_type;
-	u32 num_chunks_execute, num_chunks_restore, sig_wait_flags;
 	u64 cs_seq = ULONG_MAX;
-	int rc, do_ctx_switch;
-	bool need_soft_reset = false;
-
-	if (hl_device_disabled_or_in_reset(hdev)) {
-		dev_warn_ratelimited(hdev->dev,
-			"Device is %s. Can't submit new CS\n",
-			atomic_read(&hdev->in_reset) ? "in_reset" : "disabled");
-		rc = -EBUSY;
-		goto out;
-	}
-
-	sig_wait_flags = args->in.cs_flags & HL_CS_FLAGS_SIG_WAIT;
-
-	if (unlikely(sig_wait_flags == HL_CS_FLAGS_SIG_WAIT)) {
-		dev_err(hdev->dev,
-			"Signal and wait CS flags are mutually exclusive, context %d\n",
-		ctx->asid);
-		rc = -EINVAL;
-		goto out;
-	}
+	void __user *chunks;
+	u32 num_chunks;
+	int rc;
 
-	if (unlikely((sig_wait_flags & HL_CS_FLAGS_SIG_WAIT) &&
-			(!hdev->supports_sync_stream))) {
-		dev_err(hdev->dev, "Sync stream CS is not supported\n");
-		rc = -EINVAL;
+	rc = hl_cs_sanity_checks(hpriv, args);
+	if (rc)
 		goto out;
-	}
-
-	if (args->in.cs_flags & HL_CS_FLAGS_SIGNAL)
-		cs_type = CS_TYPE_SIGNAL;
-	else if (args->in.cs_flags & HL_CS_FLAGS_WAIT)
-		cs_type = CS_TYPE_WAIT;
-	else if (args->in.cs_flags & HL_CS_FLAGS_COLLECTIVE_WAIT)
-		cs_type = CS_TYPE_COLLECTIVE_WAIT;
-	else
-		cs_type = CS_TYPE_DEFAULT;
 
-	chunks_execute = (void __user *) (uintptr_t) args->in.chunks_execute;
-	num_chunks_execute = args->in.num_chunks_execute;
-
-	if (cs_type == CS_TYPE_DEFAULT) {
-		if (!num_chunks_execute) {
-			dev_err(hdev->dev,
-				"Got execute CS with 0 chunks, context %d\n",
-				ctx->asid);
-			rc = -EINVAL;
-			goto out;
-		}
-	} else if (num_chunks_execute != 1) {
-		dev_err(hdev->dev,
-			"Sync stream CS mandates one chunk only, context %d\n",
-			ctx->asid);
-		rc = -EINVAL;
+	rc = hl_cs_ctx_switch(hpriv, args, &cs_seq);
+	if (rc)
 		goto out;
-	}
-
-	do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
-
-	if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) {
-		long ret;
-
-		chunks_restore =
-			(void __user *) (uintptr_t) args->in.chunks_restore;
-		num_chunks_restore = args->in.num_chunks_restore;
-
-		mutex_lock(&hpriv->restore_phase_mutex);
-
-		if (do_ctx_switch) {
-			rc = hdev->asic_funcs->context_switch(hdev, ctx->asid);
-			if (rc) {
-				dev_err_ratelimited(hdev->dev,
-					"Failed to switch to context %d, rejecting CS! %d\n",
-					ctx->asid, rc);
-				/*
-				 * If we timedout, or if the device is not IDLE
-				 * while we want to do context-switch (-EBUSY),
-				 * we need to soft-reset because QMAN is
-				 * probably stuck. However, we can't call to
-				 * reset here directly because of deadlock, so
-				 * need to do it at the very end of this
-				 * function
-				 */
-				if ((rc == -ETIMEDOUT) || (rc == -EBUSY))
-					need_soft_reset = true;
-				mutex_unlock(&hpriv->restore_phase_mutex);
-				goto out;
-			}
-		}
-
-		hdev->asic_funcs->restore_phase_topology(hdev);
-
-		if (!num_chunks_restore) {
-			dev_dbg(hdev->dev,
-			"Need to run restore phase but restore CS is empty\n");
-			rc = 0;
-		} else {
-			rc = cs_ioctl_default(hpriv, chunks_restore,
-						num_chunks_restore, &cs_seq);
-		}
-
-		mutex_unlock(&hpriv->restore_phase_mutex);
-
-		if (rc) {
-			dev_err(hdev->dev,
-				"Failed to submit restore CS for context %d (%d)\n",
-				ctx->asid, rc);
-			goto out;
-		}
-
-		/* Need to wait for restore completion before execution phase */
-		if (num_chunks_restore) {
-			ret = _hl_cs_wait_ioctl(hdev, ctx,
-					jiffies_to_usecs(hdev->timeout_jiffies),
-					cs_seq);
-			if (ret <= 0) {
-				dev_err(hdev->dev,
-					"Restore CS for context %d failed to complete %ld\n",
-					ctx->asid, ret);
-				rc = -ENOEXEC;
-				goto out;
-			}
-		}
-
-		ctx->thread_ctx_switch_wait_token = 1;
-	} else if (!ctx->thread_ctx_switch_wait_token) {
-		u32 tmp;
-
-		rc = hl_poll_timeout_memory(hdev,
-			&ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1),
-			100, jiffies_to_usecs(hdev->timeout_jiffies), false);
 
-		if (rc == -ETIMEDOUT) {
-			dev_err(hdev->dev,
-				"context switch phase timeout (%d)\n", tmp);
-			goto out;
-		}
+	cs_type = hl_cs_get_cs_type(args->in.cs_flags &
+					~HL_CS_FLAGS_FORCE_RESTORE);
+	chunks = (void __user *) (uintptr_t) args->in.chunks_execute;
+	num_chunks = args->in.num_chunks_execute;
+
+	switch (cs_type) {
+	case CS_TYPE_SIGNAL:
+	case CS_TYPE_WAIT:
+	case CS_TYPE_COLLECTIVE_WAIT:
+		rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks, num_chunks,
+						&cs_seq);
+		break;
+	default:
+		rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq);
+		break;
 	}
 
-	if (cs_type == CS_TYPE_DEFAULT)
-		rc = cs_ioctl_default(hpriv, chunks_execute, num_chunks_execute,
-					&cs_seq);
-	else
-		rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks_execute,
-						num_chunks_execute, &cs_seq);
-
 out:
 	if (rc != -EAGAIN) {
 		memset(args, 0, sizeof(*args));
@@ -1267,9 +1299,6 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
 		args->out.seq = cs_seq;
 	}
 
-	if (((rc == -ETIMEDOUT) || (rc == -EBUSY)) && (need_soft_reset))
-		hl_device_reset(hdev, false, false);
-
 	return rc;
 }
 
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2020-11-04 10:14 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2020-11-04 10:14 [PATCH] habanalabs: fix MMU print message Oded Gabbay
2020-11-04 10:14 ` [PATCH] habanalabs/gaudi: fetch PLL info from FW Oded Gabbay
2020-11-04 10:14 ` [PATCH] habanalabs: refactor MMU to support dual residency MMU Oded Gabbay
2020-11-04 10:14 ` [PATCH] habanalabs: Small refactoring of CS IOCTL handling Oded Gabbay

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.