[v3,RESEND,4/9] crypto/ycc: Add device error handling support for ycc hw errors

Message ID 1667461243-48652-5-git-send-email-guanjun@linux.alibaba.com
State New
Headers
Series Drivers for Alibaba YCC (Yitian Cryptography Complex) cryptographic accelerator |

Commit Message

guanjun Nov. 3, 2022, 7:40 a.m. UTC
  From: Zelin Deng <zelin.deng@linux.alibaba.com>

Due to ycc hardware limitations, in REE ycc device cannot be reset to
recover from fatal error (reset register is only valid in TEE and
PCIE FLR only reset queue pointers but not ycc hw), regard all hw errors
except queue error as fatal error.

Signed-off-by: Zelin Deng <zelin.deng@linux.alibaba.com>
---
 drivers/crypto/ycc/ycc_isr.c  | 92 +++++++++++++++++++++++++++++++++++++++++--
 drivers/crypto/ycc/ycc_ring.c | 90 ++++++++++++++++++++++++++++++++++++++++++
 drivers/crypto/ycc/ycc_ring.h |  5 +++
 3 files changed, 183 insertions(+), 4 deletions(-)
  

Comments

kernel test robot Nov. 10, 2022, 5:51 a.m. UTC | #1
Hi 'Guanjun',

Thank you for the patch! Yet something to improve:

[auto build test ERROR on herbert-cryptodev-2.6/master]
[also build test ERROR on herbert-crypto-2.6/master linus/master v6.1-rc4 next-20221109]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Guanjun/Drivers-for-Alibaba-YCC-Yitian-Cryptography-Complex-cryptographic-accelerator/20221103-154448
base:   https://git.kernel.org/pub/scm/linux/kernel/git/herbert/cryptodev-2.6.git master
patch link:    https://lore.kernel.org/r/1667461243-48652-5-git-send-email-guanjun%40linux.alibaba.com
patch subject: [PATCH v3 RESEND 4/9] crypto/ycc: Add device error handling support for ycc hw errors
config: xtensa-allyesconfig
compiler: xtensa-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/e5dd94f77cbf5e4ca1f2b9bd365b5d1e12324984
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Guanjun/Drivers-for-Alibaba-YCC-Yitian-Cryptography-Complex-cryptographic-accelerator/20221103-154448
        git checkout e5dd94f77cbf5e4ca1f2b9bd365b5d1e12324984
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=xtensa SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>

All errors (new ones prefixed by >>):

   In file included from include/linux/pgtable.h:6,
                    from include/linux/kasan.h:33,
                    from include/linux/slab.h:148,
                    from drivers/crypto/ycc/ycc_ring.c:8:
   arch/xtensa/include/asm/pgtable.h:20:33: error: expected identifier before numeric constant
      20 | #define USER_RING               1       /* user ring level */
         |                                 ^
   drivers/crypto/ycc/ycc_ring.h:28:9: note: in expansion of macro 'USER_RING'
      28 |         USER_RING,
         |         ^~~~~~~~~
   drivers/crypto/ycc/ycc_ring.c: In function 'ycc_ring_debugfs_status_show':
   drivers/crypto/ycc/ycc_ring.c:40:41: warning: format '%llx' expects argument of type 'long long unsigned int', but argument 3 has type 'dma_addr_t' {aka 'unsigned int'} [-Wformat=]
      40 |         seq_printf(s, "CMD base addr:%llx, RESP base addr:%llx\n",
         |                                      ~~~^
         |                                         |
         |                                         long long unsigned int
         |                                      %x
      41 |                    ring->cmd_base_paddr, ring->resp_base_paddr);
         |                    ~~~~~~~~~~~~~~~~~~~~  
         |                        |
         |                        dma_addr_t {aka unsigned int}
   drivers/crypto/ycc/ycc_ring.c:40:62: warning: format '%llx' expects argument of type 'long long unsigned int', but argument 4 has type 'dma_addr_t' {aka 'unsigned int'} [-Wformat=]
      40 |         seq_printf(s, "CMD base addr:%llx, RESP base addr:%llx\n",
         |                                                           ~~~^
         |                                                              |
         |                                                              long long unsigned int
         |                                                           %x
      41 |                    ring->cmd_base_paddr, ring->resp_base_paddr);
         |                                          ~~~~~~~~~~~~~~~~~~~~~
         |                                              |
         |                                              dma_addr_t {aka unsigned int}
   drivers/crypto/ycc/ycc_ring.c: In function 'ycc_init_ring':
   drivers/crypto/ycc/ycc_ring.c:193:22: error: 'KERN_RING' undeclared (first use in this function); did you mean 'KERNEL_RING'?
     193 |         ring->type = KERN_RING;
         |                      ^~~~~~~~~
         |                      KERNEL_RING
   drivers/crypto/ycc/ycc_ring.c:193:22: note: each undeclared identifier is reported only once for each function it appears in
   drivers/crypto/ycc/ycc_ring.c: In function 'ycc_select_ring':
   drivers/crypto/ycc/ycc_ring.c:374:44: error: 'KERN_RING' undeclared (first use in this function); did you mean 'KERNEL_RING'?
     374 |                         if (found->type != KERN_RING) {
         |                                            ^~~~~~~~~
         |                                            KERNEL_RING
   drivers/crypto/ycc/ycc_ring.c: In function 'ycc_cancel_cmd':
   drivers/crypto/ycc/ycc_ring.c:489:17: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
     489 |         aflag = (struct ycc_flags *)desc->private_ptr;
         |                 ^
   drivers/crypto/ycc/ycc_ring.c:490:23: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
     490 |         if (!aflag || (u64)aflag == CMD_INVALID_CONTENT_U64) {
         |                       ^
   drivers/crypto/ycc/ycc_ring.c: In function 'ycc_handle_resp':
   drivers/crypto/ycc/ycc_ring.c:539:17: warning: cast to pointer from integer of different size [-Wint-to-pointer-cast]
     539 |         aflag = (struct ycc_flags *)desc->private_ptr;
         |                 ^
   drivers/crypto/ycc/ycc_ring.c:540:23: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
     540 |         if (!aflag || (u64)aflag == CMD_INVALID_CONTENT_U64) {
         |                       ^
   drivers/crypto/ycc/ycc_ring.c: In function 'ycc_clear_resp_ring':
>> drivers/crypto/ycc/ycc_ring.c:646:30: error: 'INVAL_RING' undeclared (first use in this function)
     646 |                 ring->type = INVAL_RING;
         |                              ^~~~~~~~~~


vim +/INVAL_RING +646 drivers/crypto/ycc/ycc_ring.c

   603	
   604	/*
   605	 * Clear response queue
   606	 *
   607	 * Note: Make sure been invoked when error occurs in YCC internal and
   608	 * YCC status is not ready.
   609	 */
   610	void ycc_clear_resp_ring(struct ycc_ring *ring)
   611	{
   612		struct ycc_resp_desc *resp;
   613		int retry;
   614		u32 pending_cmd;
   615	
   616		/*
   617		 * Check if the ring has been stopped. *stop* means no
   618		 * new transactions, No need to wait for pending_cmds
   619		 * been processed under this condition.
   620		 */
   621		retry = ycc_ring_stopped(ring) ? 0 : MAX_ERROR_RETRY;
   622		pending_cmd = YCC_CSR_RD(ring->csr_vaddr, REG_RING_PENDING_CMD);
   623	
   624		ring->resp_wr_ptr = YCC_CSR_RD(ring->csr_vaddr, REG_RING_RSP_WR_PTR);
   625		while (!ycc_ring_empty(ring) || (retry && pending_cmd)) {
   626			if (!ycc_ring_empty(ring)) {
   627				resp = (struct ycc_resp_desc *)ring->resp_base_vaddr +
   628					ring->resp_rd_ptr;
   629				resp->state = CMD_CANCELLED;
   630				ycc_handle_resp(ring, resp);
   631	
   632				if (++ring->resp_rd_ptr == ring->max_desc)
   633					ring->resp_rd_ptr = 0;
   634	
   635				YCC_CSR_WR(ring->csr_vaddr, REG_RING_RSP_RD_PTR, ring->resp_rd_ptr);
   636			} else {
   637				udelay(MAX_SLEEP_US_PER_CHECK);
   638				retry--;
   639			}
   640	
   641			pending_cmd = YCC_CSR_RD(ring->csr_vaddr, REG_RING_PENDING_CMD);
   642			ring->resp_wr_ptr = YCC_CSR_RD(ring->csr_vaddr, REG_RING_RSP_WR_PTR);
   643		}
   644	
   645		if (!retry && pending_cmd)
 > 646			ring->type = INVAL_RING;
  

Patch

diff --git a/drivers/crypto/ycc/ycc_isr.c b/drivers/crypto/ycc/ycc_isr.c
index a86c8d7..abbe0c4 100644
--- a/drivers/crypto/ycc/ycc_isr.c
+++ b/drivers/crypto/ycc/ycc_isr.c
@@ -15,7 +15,6 @@ 
 #include "ycc_dev.h"
 #include "ycc_ring.h"
 
-
 static irqreturn_t ycc_resp_isr(int irq, void *data)
 {
 	struct ycc_ring *ring = (struct ycc_ring *)data;
@@ -24,11 +23,93 @@  static irqreturn_t ycc_resp_isr(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
-/*
- * TODO: will implement when ycc ring actually work.
- */
+static void ycc_fatal_error(struct ycc_dev *ydev)
+{
+	struct ycc_ring *ring;
+	int i;
+
+	for (i = 0; i < YCC_RINGPAIR_NUM; i++) {
+		ring = ydev->rings + i;
+
+		if (ring->type != KERN_RING)
+			continue;
+
+		spin_lock_bh(&ring->lock);
+		ycc_clear_cmd_ring(ring);
+		spin_unlock_bh(&ring->lock);
+
+		ycc_clear_resp_ring(ring);
+	}
+}
+
 static void ycc_process_global_err(struct work_struct *work)
 {
+	struct ycc_dev *ydev = container_of(work, struct ycc_dev, work);
+	struct ycc_bar *cfg_bar = &ydev->ycc_bars[YCC_SEC_CFG_BAR];
+	struct ycc_ring *ring;
+	u32 hclk_err, xclk_err;
+	u32 xclk_ecc_uncor_err_0, xclk_ecc_uncor_err_1;
+	u32 hclk_ecc_uncor_err;
+	int i;
+
+	if (pci_wait_for_pending_transaction(ydev->pdev))
+		pr_warn("Failed to pending transaction\n");
+
+	hclk_err = YCC_CSR_RD(cfg_bar->vaddr, REG_YCC_HCLK_INT_STATUS);
+	xclk_err = YCC_CSR_RD(cfg_bar->vaddr, REG_YCC_XCLK_INT_STATUS);
+	xclk_ecc_uncor_err_0 = YCC_CSR_RD(cfg_bar->vaddr, REG_YCC_XCLK_MEM_ECC_UNCOR_0);
+	xclk_ecc_uncor_err_1 = YCC_CSR_RD(cfg_bar->vaddr, REG_YCC_XCLK_MEM_ECC_UNCOR_1);
+	hclk_ecc_uncor_err = YCC_CSR_RD(cfg_bar->vaddr, REG_YCC_HCLK_MEM_ECC_UNCOR);
+
+	if ((hclk_err & ~(YCC_HCLK_TRNG_ERR)) || xclk_err || hclk_ecc_uncor_err) {
+		pr_err("Got uncorrected error, must be reset\n");
+		/*
+		 * Fatal error, as ycc cannot be reset in REE, clear ring data.
+		 */
+		return ycc_fatal_error(ydev);
+	}
+
+	if (xclk_ecc_uncor_err_0 || xclk_ecc_uncor_err_1) {
+		pr_err("Got algorithm ECC error: %x, %x\n",
+			xclk_ecc_uncor_err_0, xclk_ecc_uncor_err_1);
+		return ycc_fatal_error(ydev);
+	}
+
+	/* This has to be queue error. Handling command rings. */
+	for (i = 0; i < YCC_RINGPAIR_NUM; i++) {
+		ring = ydev->rings + i;
+
+		if (ring->type != KERN_RING)
+			continue;
+
+		ring->status = YCC_CSR_RD(ring->csr_vaddr, REG_RING_STATUS);
+		if (ring->status) {
+			pr_err("YCC: Dev: %d, Ring: %d got ring err: %x\n",
+				ydev->id, ring->ring_id, ring->status);
+			spin_lock_bh(&ring->lock);
+			ycc_clear_cmd_ring(ring);
+			spin_unlock_bh(&ring->lock);
+		}
+	}
+
+	/*
+	 * Give HW a chance to process all pending_cmds
+	 * through recovering transactions.
+	 */
+	pci_set_master(ydev->pdev);
+
+	for (i = 0; i < YCC_RINGPAIR_NUM; i++) {
+		ring = ydev->rings + i;
+
+		if (ring->type != KERN_RING || !ring->status)
+			continue;
+
+		ycc_clear_resp_ring(ring);
+	}
+
+	ycc_g_err_unmask(cfg_bar->vaddr);
+	clear_bit(YDEV_STATUS_ERR, &ydev->status);
+	set_bit(YDEV_STATUS_READY, &ydev->status);
 }
 
 static irqreturn_t ycc_g_err_isr(int irq, void *data)
@@ -45,6 +126,9 @@  static irqreturn_t ycc_g_err_isr(int irq, void *data)
 
 	clear_bit(YDEV_STATUS_READY, &ydev->status);
 
+	/* Disable YCC mastering, no new transactions */
+	pci_clear_master(ydev->pdev);
+
 	schedule_work(&ydev->work);
 	return IRQ_HANDLED;
 }
diff --git a/drivers/crypto/ycc/ycc_ring.c b/drivers/crypto/ycc/ycc_ring.c
index ea6877e..5207228 100644
--- a/drivers/crypto/ycc/ycc_ring.c
+++ b/drivers/crypto/ycc/ycc_ring.c
@@ -480,6 +480,24 @@  int ycc_enqueue(struct ycc_ring *ring, void *cmd)
 	return ret;
 }
 
+static void ycc_cancel_cmd(struct ycc_ring *ring, struct ycc_cmd_desc *desc)
+{
+	struct ycc_flags *aflag;
+
+	dma_rmb();
+
+	aflag = (struct ycc_flags *)desc->private_ptr;
+	if (!aflag || (u64)aflag == CMD_INVALID_CONTENT_U64) {
+		pr_debug("YCC: Invalid aflag\n");
+		return;
+	}
+
+	aflag->ycc_done_callback(aflag->ptr, CMD_CANCELLED);
+
+	memset(desc, CMD_INVALID_CONTENT_U8, sizeof(*desc));
+	kfree(aflag);
+}
+
 static inline void ycc_check_cmd_state(u16 state)
 {
 	switch (state) {
@@ -557,3 +575,75 @@  void ycc_dequeue(struct ycc_ring *ring)
 	if (cnt)
 		YCC_CSR_WR(ring->csr_vaddr, REG_RING_RSP_RD_PTR, ring->resp_rd_ptr);
 }
+
+/*
+ * Clear incompletion cmds in command queue while rollback cmd_wr_ptr.
+ *
+ * Note: Make sure been invoked when error occurs in YCC internal and
+ * YCC status is not ready.
+ */
+void ycc_clear_cmd_ring(struct ycc_ring *ring)
+{
+	struct ycc_cmd_desc *desc = NULL;
+
+	ring->cmd_rd_ptr = YCC_CSR_RD(ring->csr_vaddr, REG_RING_CMD_RD_PTR);
+	ring->cmd_wr_ptr = YCC_CSR_RD(ring->csr_vaddr, REG_RING_CMD_WR_PTR);
+
+	while (ring->cmd_rd_ptr != ring->cmd_wr_ptr) {
+		desc = (struct ycc_cmd_desc *)ring->cmd_base_vaddr +
+			ring->cmd_rd_ptr;
+		ycc_cancel_cmd(ring, desc);
+
+		if (--ring->cmd_wr_ptr == 0)
+			ring->cmd_wr_ptr = ring->max_desc;
+	}
+
+	YCC_CSR_WR(ring->csr_vaddr, REG_RING_CMD_WR_PTR, ring->cmd_wr_ptr);
+}
+
+/*
+ * Clear response queue
+ *
+ * Note: Make sure been invoked when error occurs in YCC internal and
+ * YCC status is not ready.
+ */
+void ycc_clear_resp_ring(struct ycc_ring *ring)
+{
+	struct ycc_resp_desc *resp;
+	int retry;
+	u32 pending_cmd;
+
+	/*
+	 * Check if the ring has been stopped. *stop* means no
+	 * new transactions, No need to wait for pending_cmds
+	 * been processed under this condition.
+	 */
+	retry = ycc_ring_stopped(ring) ? 0 : MAX_ERROR_RETRY;
+	pending_cmd = YCC_CSR_RD(ring->csr_vaddr, REG_RING_PENDING_CMD);
+
+	ring->resp_wr_ptr = YCC_CSR_RD(ring->csr_vaddr, REG_RING_RSP_WR_PTR);
+	while (!ycc_ring_empty(ring) || (retry && pending_cmd)) {
+		if (!ycc_ring_empty(ring)) {
+			resp = (struct ycc_resp_desc *)ring->resp_base_vaddr +
+				ring->resp_rd_ptr;
+			resp->state = CMD_CANCELLED;
+			ycc_handle_resp(ring, resp);
+
+			if (++ring->resp_rd_ptr == ring->max_desc)
+				ring->resp_rd_ptr = 0;
+
+			YCC_CSR_WR(ring->csr_vaddr, REG_RING_RSP_RD_PTR, ring->resp_rd_ptr);
+		} else {
+			udelay(MAX_SLEEP_US_PER_CHECK);
+			retry--;
+		}
+
+		pending_cmd = YCC_CSR_RD(ring->csr_vaddr, REG_RING_PENDING_CMD);
+		ring->resp_wr_ptr = YCC_CSR_RD(ring->csr_vaddr, REG_RING_RSP_WR_PTR);
+	}
+
+	if (!retry && pending_cmd)
+		ring->type = INVAL_RING;
+
+	ring->status = 0;
+}
diff --git a/drivers/crypto/ycc/ycc_ring.h b/drivers/crypto/ycc/ycc_ring.h
index eb3e6f9..52b0fe8 100644
--- a/drivers/crypto/ycc/ycc_ring.h
+++ b/drivers/crypto/ycc/ycc_ring.h
@@ -20,6 +20,9 @@ 
 #define CMD_INVALID_CONTENT_U8		0x7f
 #define CMD_INVALID_CONTENT_U64		0x7f7f7f7f7f7f7f7fULL
 
+#define MAX_SLEEP_US_PER_CHECK		100   /* every 100us to check register */
+#define MAX_ERROR_RETRY			10000 /* 1s in total */
+
 enum ring_type {
 	FREE_RING,
 	USER_RING,
@@ -104,6 +107,8 @@  static inline bool ycc_ring_stopped(struct ycc_ring *ring)
 
 int ycc_enqueue(struct ycc_ring *ring, void *cmd);
 void ycc_dequeue(struct ycc_ring *ring);
+void ycc_clear_cmd_ring(struct ycc_ring *ring);
+void ycc_clear_resp_ring(struct ycc_ring *ring);
 struct ycc_ring *ycc_crypto_get_ring(void);
 void ycc_crypto_free_ring(struct ycc_ring *ring);
 int ycc_dev_rings_init(struct ycc_dev *ydev, u32 max_desc, int user_rings);