[09/15] habanalabs/gaudi2: use graceful hard reset for F/W events

Message ID 20221027091007.664797-9-ogabbay@kernel.org
State New
Headers
Series [01/15] habanalabs: fix using freed pointer |

Commit Message

Oded Gabbay Oct. 27, 2022, 9:10 a.m. UTC
  From: Tomer Tayar <ttayar@habana.ai>

Use graceful hard reset for F/W events on Gaudi2 device that require a
device reset.

While at it, do a small refactor of the checks and function calls,
to simplify it and to avoid code duplication.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/gaudi2/gaudi2.c | 27 +++++++++----------------
 1 file changed, 10 insertions(+), 17 deletions(-)
  

Patch

diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c
index 9208f69dd7f8..22f5445fe71c 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c
@@ -8768,9 +8768,9 @@  static void hl_arc_event_handle(struct hl_device *hdev,
 
 static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
 {
-	u32 ctl, reset_flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY;
-	struct gaudi2_device *gaudi2 = hdev->asic_specific;
 	bool reset_required = false, skip_reset = false, is_critical = false;
+	struct gaudi2_device *gaudi2 = hdev->asic_specific;
+	u32 ctl, reset_flags = HL_DRV_RESET_HARD;
 	int index, sbte_index;
 	u64 event_mask = 0;
 	u16 event_type;
@@ -9158,7 +9158,9 @@  static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 						event_type);
 	}
 
-	if ((gaudi2_irq_map_table[event_type].reset || reset_required) && !skip_reset)
+	if ((gaudi2_irq_map_table[event_type].reset || reset_required) && !skip_reset &&
+			(hdev->hard_reset_on_fw_events ||
+			(hdev->asic_prop.fw_security_enabled && is_critical)))
 		goto reset_device;
 
 	/* Send unmask irq only for interrupts not classified as MSG */
@@ -9172,22 +9174,13 @@  static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 
 reset_device:
 	if (hdev->asic_prop.fw_security_enabled && is_critical) {
-		reset_flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW;
-
-		/* notify on device unavailable while the reset triggered by fw */
-		event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET |
-					HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE);
-		hl_device_reset(hdev, reset_flags);
-	} else if (hdev->hard_reset_on_fw_events) {
-		event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
-		hl_device_reset(hdev, reset_flags);
+		reset_flags |= HL_DRV_RESET_BYPASS_REQ_TO_FW;
+		event_mask |= HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
 	} else {
-		if (!gaudi2_irq_map_table[event_type].msg)
-			hl_fw_unmask_irq(hdev, event_type);
+		reset_flags |= HL_DRV_RESET_DELAY;
 	}
-
-	if (event_mask)
-		hl_notifier_event_send_all(hdev, event_mask);
+	event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
+	hl_device_cond_reset(hdev, reset_flags, event_mask);
 }
 
 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val)