[v3,4/9] crypto/ycc: Add device error handling support for ycc hw errors

Message ID 1666691616-69983-5-git-send-email-guanjun@linux.alibaba.com
State New
Headers
Series Drivers for Alibaba YCC (Yitian Cryptography Complex) cryptographic accelerator |

Commit Message

guanjun Oct. 25, 2022, 9:53 a.m. UTC
  From: Zelin Deng <zelin.deng@linux.alibaba.com>

Due to ycc hardware limitations, in REE ycc device cannot be reset to
recover from fatal error (reset register is only valid in TEE and
PCIE FLR only reset queue pointers but not ycc hw), regard all hw errors
except queue error as fatal error.

Signed-off-by: Zelin Deng <zelin.deng@linux.alibaba.com>
---
 drivers/crypto/ycc/ycc_isr.c  | 93 +++++++++++++++++++++++++++++++++++++++++--
 drivers/crypto/ycc/ycc_ring.c | 90 +++++++++++++++++++++++++++++++++++++++++
 drivers/crypto/ycc/ycc_ring.h |  3 ++
 3 files changed, 183 insertions(+), 3 deletions(-)
  

Comments

kernel test robot Oct. 25, 2022, 2:18 p.m. UTC | #1
Hi 'Guanjun',

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on herbert-cryptodev-2.6/master]
[also build test WARNING on herbert-crypto-2.6/master linus/master v6.1-rc2 next-20221025]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Guanjun/Drivers-for-Alibaba-YCC-Yitian-Cryptography-Complex-cryptographic-accelerator/20221025-180005
base:   https://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git master
patch link:    https://lore.kernel.org/r/1666691616-69983-5-git-send-email-guanjun%40linux.alibaba.com
patch subject: [PATCH v3 4/9] crypto/ycc: Add device error handling support for ycc hw errors
config: sparc-allyesconfig (attached as .config)
compiler: sparc64-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/944c3c90dbd0017bbe59ea4ae5cfe2fb01f13a0e
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Guanjun/Drivers-for-Alibaba-YCC-Yitian-Cryptography-Complex-cryptographic-accelerator/20221025-180005
        git checkout 944c3c90dbd0017bbe59ea4ae5cfe2fb01f13a0e
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=sparc SHELL=/bin/bash drivers/crypto/ycc/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   drivers/crypto/ycc/ycc_ring.c:536:6: warning: no previous prototype for 'ycc_handle_resp' [-Wmissing-prototypes]
     536 | void ycc_handle_resp(struct ycc_ring *ring, struct ycc_resp_desc *desc)
         |      ^~~~~~~~~~~~~~~
>> drivers/crypto/ycc/ycc_ring.c:588:6: warning: no previous prototype for 'ycc_clear_cmd_ring' [-Wmissing-prototypes]
     588 | void ycc_clear_cmd_ring(struct ycc_ring *ring)
         |      ^~~~~~~~~~~~~~~~~~
>> drivers/crypto/ycc/ycc_ring.c:613:6: warning: no previous prototype for 'ycc_clear_resp_ring' [-Wmissing-prototypes]
     613 | void ycc_clear_resp_ring(struct ycc_ring *ring)
         |      ^~~~~~~~~~~~~~~~~~~


vim +/ycc_clear_cmd_ring +588 drivers/crypto/ycc/ycc_ring.c

   535	
 > 536	void ycc_handle_resp(struct ycc_ring *ring, struct ycc_resp_desc *desc)
   537	{
   538		struct ycc_flags *aflag;
   539	
   540		dma_rmb();
   541	
   542		aflag = (struct ycc_flags *)desc->private_ptr;
   543		if (!aflag || (u64)aflag == CMD_INVALID_CONTENT_U64) {
   544			pr_debug("Invalid command aflag\n");
   545			return;
   546		}
   547	
   548		ycc_check_cmd_state(desc->state);
   549		aflag->ycc_done_callback(aflag->ptr, desc->state);
   550	
   551		memset(desc, CMD_INVALID_CONTENT_U8, sizeof(*desc));
   552		kfree(aflag);
   553	}
   554	
   555	/*
   556	 * dequeue, read response descriptor
   557	 */
   558	void ycc_dequeue(struct ycc_ring *ring)
   559	{
   560		struct ycc_resp_desc *resp;
   561		int cnt = 0;
   562	
   563		if (!test_bit(YDEV_STATUS_READY, &ring->ydev->status) || ycc_ring_stopped(ring))
   564			return;
   565	
   566		ring->resp_wr_ptr = YCC_CSR_RD(ring->csr_vaddr, REG_RING_RSP_WR_PTR);
   567		while (!ycc_ring_empty(ring)) {
   568			resp = (struct ycc_resp_desc *)ring->resp_base_vaddr +
   569				ring->resp_rd_ptr;
   570			ycc_handle_resp(ring, resp);
   571	
   572			cnt++;
   573			ring->nr_resps++;
   574			if (++ring->resp_rd_ptr == ring->max_desc)
   575				ring->resp_rd_ptr = 0;
   576		}
   577	
   578		if (cnt)
   579			YCC_CSR_WR(ring->csr_vaddr, REG_RING_RSP_RD_PTR, ring->resp_rd_ptr);
   580	}
   581	
   582	/*
   583	 * Clear incompletion cmds in command queue while rollback cmd_wr_ptr.
   584	 *
   585	 * Note: Make sure been invoked when error occurs in YCC internal and
   586	 * YCC status is not ready.
   587	 */
 > 588	void ycc_clear_cmd_ring(struct ycc_ring *ring)
   589	{
   590		struct ycc_cmd_desc *desc = NULL;
   591	
   592		ring->cmd_rd_ptr = YCC_CSR_RD(ring->csr_vaddr, REG_RING_CMD_RD_PTR);
   593		ring->cmd_wr_ptr = YCC_CSR_RD(ring->csr_vaddr, REG_RING_CMD_WR_PTR);
   594	
   595		while (ring->cmd_rd_ptr != ring->cmd_wr_ptr) {
   596			desc = (struct ycc_cmd_desc *)ring->cmd_base_vaddr +
   597				ring->cmd_rd_ptr;
   598			ycc_cancel_cmd(ring, desc);
   599	
   600			if (--ring->cmd_wr_ptr == 0)
   601				ring->cmd_wr_ptr = ring->max_desc;
   602		}
   603	
   604		YCC_CSR_WR(ring->csr_vaddr, REG_RING_CMD_WR_PTR, ring->cmd_wr_ptr);
   605	}
   606	
   607	/*
   608	 * Clear response queue
   609	 *
   610	 * Note: Make sure been invoked when error occurs in YCC internal and
   611	 * YCC status is not ready.
   612	 */
 > 613	void ycc_clear_resp_ring(struct ycc_ring *ring)
  

Patch

diff --git a/drivers/crypto/ycc/ycc_isr.c b/drivers/crypto/ycc/ycc_isr.c
index a86c8d7..28f8918 100644
--- a/drivers/crypto/ycc/ycc_isr.c
+++ b/drivers/crypto/ycc/ycc_isr.c
@@ -15,6 +15,8 @@ 
 #include "ycc_dev.h"
 #include "ycc_ring.h"
 
+extern void ycc_clear_cmd_ring(struct ycc_ring *ring);
+extern void ycc_clear_resp_ring(struct ycc_ring *ring);
 
 static irqreturn_t ycc_resp_isr(int irq, void *data)
 {
@@ -24,11 +26,93 @@  static irqreturn_t ycc_resp_isr(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
-/*
- * TODO: will implement when ycc ring actually work.
- */
+static void ycc_fatal_error(struct ycc_dev *ydev)
+{
+	struct ycc_ring *ring;
+	int i;
+
+	for (i = 0; i < YCC_RINGPAIR_NUM; i++) {
+		ring = ydev->rings + i;
+
+		if (ring->type != KERN_RING)
+			continue;
+
+		spin_lock_bh(&ring->lock);
+		ycc_clear_cmd_ring(ring);
+		spin_unlock_bh(&ring->lock);
+
+		ycc_clear_resp_ring(ring);
+	}
+}
+
 static void ycc_process_global_err(struct work_struct *work)
 {
+	struct ycc_dev *ydev = container_of(work, struct ycc_dev, work);
+	struct ycc_bar *cfg_bar = &ydev->ycc_bars[YCC_SEC_CFG_BAR];
+	struct ycc_ring *ring;
+	u32 hclk_err, xclk_err;
+	u32 xclk_ecc_uncor_err_0, xclk_ecc_uncor_err_1;
+	u32 hclk_ecc_uncor_err;
+	int i;
+
+	if (pci_wait_for_pending_transaction(ydev->pdev))
+		pr_warn("Failed to pending transaction\n");
+
+	hclk_err = YCC_CSR_RD(cfg_bar->vaddr, REG_YCC_HCLK_INT_STATUS);
+	xclk_err = YCC_CSR_RD(cfg_bar->vaddr, REG_YCC_XCLK_INT_STATUS);
+	xclk_ecc_uncor_err_0 = YCC_CSR_RD(cfg_bar->vaddr, REG_YCC_XCLK_MEM_ECC_UNCOR_0);
+	xclk_ecc_uncor_err_1 = YCC_CSR_RD(cfg_bar->vaddr, REG_YCC_XCLK_MEM_ECC_UNCOR_1);
+	hclk_ecc_uncor_err = YCC_CSR_RD(cfg_bar->vaddr, REG_YCC_HCLK_MEM_ECC_UNCOR);
+
+	if ((hclk_err & ~(YCC_HCLK_TRNG_ERR)) || xclk_err || hclk_ecc_uncor_err) {
+		pr_err("Got uncorrected error, must be reset\n");
+		/*
+		 * Fatal error, as ycc cannot be reset in REE, clear ring data.
+		 */
+		return ycc_fatal_error(ydev);
+	}
+
+	if (xclk_ecc_uncor_err_0 || xclk_ecc_uncor_err_1) {
+		pr_err("Got algorithm ECC error: %x, %x\n",
+			xclk_ecc_uncor_err_0, xclk_ecc_uncor_err_1);
+		return ycc_fatal_error(ydev);
+	}
+
+	/* This has to be queue error. Handling command rings. */
+	for (i = 0; i < YCC_RINGPAIR_NUM; i++) {
+		ring = ydev->rings + i;
+
+		if (ring->type != KERN_RING)
+			continue;
+
+		ring->status = YCC_CSR_RD(ring->csr_vaddr, REG_RING_STATUS);
+		if (ring->status) {
+			pr_err("YCC: Dev: %d, Ring: %d got ring err: %x\n",
+				ydev->id, ring->ring_id, ring->status);
+			spin_lock_bh(&ring->lock);
+			ycc_clear_cmd_ring(ring);
+			spin_unlock_bh(&ring->lock);
+		}
+	}
+
+	/*
+	 * Give HW a chance to process all pending_cmds
+	 * through recovering transactions.
+	 */
+	pci_set_master(ydev->pdev);
+
+	for (i = 0; i < YCC_RINGPAIR_NUM; i++) {
+		ring = ydev->rings + i;
+
+		if (ring->type != KERN_RING || !ring->status)
+			continue;
+
+		ycc_clear_resp_ring(ring);
+	}
+
+	ycc_g_err_unmask(cfg_bar->vaddr);
+	clear_bit(YDEV_STATUS_ERR, &ydev->status);
+	set_bit(YDEV_STATUS_READY, &ydev->status);
 }
 
 static irqreturn_t ycc_g_err_isr(int irq, void *data)
@@ -45,6 +129,9 @@  static irqreturn_t ycc_g_err_isr(int irq, void *data)
 
 	clear_bit(YDEV_STATUS_READY, &ydev->status);
 
+	/* Disable YCC mastering, no new transactions */
+	pci_clear_master(ydev->pdev);
+
 	schedule_work(&ydev->work);
 	return IRQ_HANDLED;
 }
diff --git a/drivers/crypto/ycc/ycc_ring.c b/drivers/crypto/ycc/ycc_ring.c
index d808054..f6f6e40 100644
--- a/drivers/crypto/ycc/ycc_ring.c
+++ b/drivers/crypto/ycc/ycc_ring.c
@@ -483,6 +483,24 @@  int ycc_enqueue(struct ycc_ring *ring, void *cmd)
 	return ret;
 }
 
+static void ycc_cancel_cmd(struct ycc_ring *ring, struct ycc_cmd_desc *desc)
+{
+	struct ycc_flags *aflag;
+
+	dma_rmb();
+
+	aflag = (struct ycc_flags *)desc->private_ptr;
+	if (!aflag || (u64)aflag == CMD_INVALID_CONTENT_U64) {
+		pr_debug("YCC: Invalid aflag\n");
+		return;
+	}
+
+	aflag->ycc_done_callback(aflag->ptr, CMD_CANCELLED);
+
+	memset(desc, CMD_INVALID_CONTENT_U8, sizeof(*desc));
+	kfree(aflag);
+}
+
 static inline void ycc_check_cmd_state(u16 state)
 {
 	switch (state) {
@@ -560,3 +578,75 @@  void ycc_dequeue(struct ycc_ring *ring)
 	if (cnt)
 		YCC_CSR_WR(ring->csr_vaddr, REG_RING_RSP_RD_PTR, ring->resp_rd_ptr);
 }
+
+/*
+ * Clear incompletion cmds in command queue while rollback cmd_wr_ptr.
+ *
+ * Note: Make sure been invoked when error occurs in YCC internal and
+ * YCC status is not ready.
+ */
+void ycc_clear_cmd_ring(struct ycc_ring *ring)
+{
+	struct ycc_cmd_desc *desc = NULL;
+
+	ring->cmd_rd_ptr = YCC_CSR_RD(ring->csr_vaddr, REG_RING_CMD_RD_PTR);
+	ring->cmd_wr_ptr = YCC_CSR_RD(ring->csr_vaddr, REG_RING_CMD_WR_PTR);
+
+	while (ring->cmd_rd_ptr != ring->cmd_wr_ptr) {
+		desc = (struct ycc_cmd_desc *)ring->cmd_base_vaddr +
+			ring->cmd_rd_ptr;
+		ycc_cancel_cmd(ring, desc);
+
+		if (--ring->cmd_wr_ptr == 0)
+			ring->cmd_wr_ptr = ring->max_desc;
+	}
+
+	YCC_CSR_WR(ring->csr_vaddr, REG_RING_CMD_WR_PTR, ring->cmd_wr_ptr);
+}
+
+/*
+ * Clear response queue
+ *
+ * Note: Make sure been invoked when error occurs in YCC internal and
+ * YCC status is not ready.
+ */
+void ycc_clear_resp_ring(struct ycc_ring *ring)
+{
+	struct ycc_resp_desc *resp;
+	int retry;
+	u32 pending_cmd;
+
+	/*
+	 * Check if the ring has been stopped. *stop* means no
+	 * new transactions, No need to wait for pending_cmds
+	 * been processed under this condition.
+	 */
+	retry = ycc_ring_stopped(ring) ? 0 : MAX_ERROR_RETRY;
+	pending_cmd = YCC_CSR_RD(ring->csr_vaddr, REG_RING_PENDING_CMD);
+
+	ring->resp_wr_ptr = YCC_CSR_RD(ring->csr_vaddr, REG_RING_RSP_WR_PTR);
+	while (!ycc_ring_empty(ring) || (retry && pending_cmd)) {
+		if (!ycc_ring_empty(ring)) {
+			resp = (struct ycc_resp_desc *)ring->resp_base_vaddr +
+				ring->resp_rd_ptr;
+			resp->state = CMD_CANCELLED;
+			ycc_handle_resp(ring, resp);
+
+			if (++ring->resp_rd_ptr == ring->max_desc)
+				ring->resp_rd_ptr = 0;
+
+			YCC_CSR_WR(ring->csr_vaddr, REG_RING_RSP_RD_PTR, ring->resp_rd_ptr);
+		} else {
+			udelay(MAX_SLEEP_US_PER_CHECK);
+			retry--;
+		}
+
+		pending_cmd = YCC_CSR_RD(ring->csr_vaddr, REG_RING_PENDING_CMD);
+		ring->resp_wr_ptr = YCC_CSR_RD(ring->csr_vaddr, REG_RING_RSP_WR_PTR);
+	}
+
+	if (!retry && pending_cmd)
+		ring->type = INVAL_RING;
+
+	ring->status = 0;
+}
diff --git a/drivers/crypto/ycc/ycc_ring.h b/drivers/crypto/ycc/ycc_ring.h
index eb3e6f9..c3e7cbf 100644
--- a/drivers/crypto/ycc/ycc_ring.h
+++ b/drivers/crypto/ycc/ycc_ring.h
@@ -20,6 +20,9 @@ 
 #define CMD_INVALID_CONTENT_U8		0x7f
 #define CMD_INVALID_CONTENT_U64		0x7f7f7f7f7f7f7f7fULL
 
+#define MAX_SLEEP_US_PER_CHECK		100   /* every 100us to check register */
+#define MAX_ERROR_RETRY			10000 /* 1s in total */
+
 enum ring_type {
 	FREE_RING,
 	USER_RING,