s390/vfio-ap: handle hardware checkstop state on queue reset operation

Message ID 20240215153144.14747-1-jjherne@linux.ibm.com
State New
Headers
Series s390/vfio-ap: handle hardware checkstop state on queue reset operation |

Commit Message

Jason J. Herne Feb. 15, 2024, 3:31 p.m. UTC
  Update vfio_ap_mdev_reset_queue() to handle an unexpected checkstop (hardware error) the
same as the deconfigured case. This prevents unexpected and unhelpful warnings in the
event of a hardware error.

We also stop lying about a queue's reset response code. This was originally done so we
could force vfio_ap_mdev_filter_matrix to pass a deconfigured device through to the guest
for the hotplug scenario. vfio_ap_mdev_filter_matrix is instead modified to allow
passthrough for all queues with reset state normal, deconfigured, or checkstopped. In the
checkstopped case we choose to pass the device through and let the error state be
reflected at the guest level.

Signed-off-by: Jason J. Herne <jjherne@linux.ibm.com>
Reviewed-by: Anthony Krowiak <akrowiak@linux.ibm.com>
---
 drivers/s390/crypto/vfio_ap_ops.c | 35 ++++++++++++++++---------------
 1 file changed, 18 insertions(+), 17 deletions(-)
  

Comments

Jason J. Herne Feb. 27, 2024, 1:25 p.m. UTC | #1
Polite Ping :)

Patch already has R-b.
Thanks for taking a look.

On 2/15/24 10:31 AM, Jason J. Herne wrote:
> Update vfio_ap_mdev_reset_queue() to handle an unexpected checkstop (hardware error) the
> same as the deconfigured case. This prevents unexpected and unhelpful warnings in the
> event of a hardware error.
> 
> We also stop lying about a queue's reset response code. This was originally done so we
> could force vfio_ap_mdev_filter_matrix to pass a deconfigured device through to the guest
> for the hotplug scenario. vfio_ap_mdev_filter_matrix is instead modified to allow
> passthrough for all queues with reset state normal, deconfigured, or checkstopped. In the
> checkstopped case we choose to pass the device through and let the error state be
> reflected at the guest level.
> 
> Signed-off-by: Jason J. Herne <jjherne@linux.ibm.com>
> Reviewed-by: Anthony Krowiak <akrowiak@linux.ibm.com>
> ---
>   drivers/s390/crypto/vfio_ap_ops.c | 35 ++++++++++++++++---------------
>   1 file changed, 18 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
> index 983b3b16196c..fc169bc61593 100644
> --- a/drivers/s390/crypto/vfio_ap_ops.c
> +++ b/drivers/s390/crypto/vfio_ap_ops.c
> @@ -659,6 +659,21 @@ static bool vfio_ap_mdev_filter_cdoms(struct ap_matrix_mdev *matrix_mdev)
>   			     AP_DOMAINS);
>   }
>   
> +static bool _queue_passable(struct vfio_ap_queue *q)
> +{
> +	if (!q)
> +		return false;
> +
> +	switch (q->reset_status.response_code) {
> +	case AP_RESPONSE_NORMAL:
> +	case AP_RESPONSE_DECONFIGURED:
> +	case AP_RESPONSE_CHECKSTOPPED:
> +		return true;
> +	default:
> +		return false;
> +	}
> +}
> +
>   /*
>    * vfio_ap_mdev_filter_matrix - filter the APQNs assigned to the matrix mdev
>    *				to ensure no queue devices are passed through to
> @@ -687,7 +702,6 @@ static bool vfio_ap_mdev_filter_matrix(struct ap_matrix_mdev *matrix_mdev,
>   	unsigned long apid, apqi, apqn;
>   	DECLARE_BITMAP(prev_shadow_apm, AP_DEVICES);
>   	DECLARE_BITMAP(prev_shadow_aqm, AP_DOMAINS);
> -	struct vfio_ap_queue *q;
>   
>   	bitmap_copy(prev_shadow_apm, matrix_mdev->shadow_apcb.apm, AP_DEVICES);
>   	bitmap_copy(prev_shadow_aqm, matrix_mdev->shadow_apcb.aqm, AP_DOMAINS);
> @@ -716,8 +730,7 @@ static bool vfio_ap_mdev_filter_matrix(struct ap_matrix_mdev *matrix_mdev,
>   			 * hardware device.
>   			 */
>   			apqn = AP_MKQID(apid, apqi);
> -			q = vfio_ap_mdev_get_queue(matrix_mdev, apqn);
> -			if (!q || q->reset_status.response_code) {
> +			if (!_queue_passable(vfio_ap_mdev_get_queue(matrix_mdev, apqn))) {
>   				clear_bit_inv(apid, matrix_mdev->shadow_apcb.apm);
>   
>   				/*
> @@ -1691,6 +1704,7 @@ static int apq_status_check(int apqn, struct ap_queue_status *status)
>   	switch (status->response_code) {
>   	case AP_RESPONSE_NORMAL:
>   	case AP_RESPONSE_DECONFIGURED:
> +	case AP_RESPONSE_CHECKSTOPPED:
>   		return 0;
>   	case AP_RESPONSE_RESET_IN_PROGRESS:
>   	case AP_RESPONSE_BUSY:
> @@ -1747,14 +1761,6 @@ static void apq_reset_check(struct work_struct *reset_work)
>   				memcpy(&q->reset_status, &status, sizeof(status));
>   				continue;
>   			}
> -			/*
> -			 * When an AP adapter is deconfigured, the
> -			 * associated queues are reset, so let's set the
> -			 * status response code to 0 so the queue may be
> -			 * passed through (i.e., not filtered)
> -			 */
> -			if (status.response_code == AP_RESPONSE_DECONFIGURED)
> -				q->reset_status.response_code = 0;
>   			if (q->saved_isc != VFIO_AP_ISC_INVALID)
>   				vfio_ap_free_aqic_resources(q);
>   			break;
> @@ -1781,12 +1787,7 @@ static void vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q)
>   		queue_work(system_long_wq, &q->reset_work);
>   		break;
>   	case AP_RESPONSE_DECONFIGURED:
> -		/*
> -		 * When an AP adapter is deconfigured, the associated
> -		 * queues are reset, so let's set the status response code to 0
> -		 * so the queue may be passed through (i.e., not filtered).
> -		 */
> -		q->reset_status.response_code = 0;
> +	case AP_RESPONSE_CHECKSTOPPED:
>   		vfio_ap_free_aqic_resources(q);
>   		break;
>   	default:
  
Heiko Carstens Feb. 27, 2024, 2:29 p.m. UTC | #2
On Tue, Feb 27, 2024 at 08:25:21AM -0500, Jason J. Herne wrote:
> Polite Ping :)
> 
> Patch already has R-b.
> Thanks for taking a look.
> 
> On 2/15/24 10:31 AM, Jason J. Herne wrote:
> > Update vfio_ap_mdev_reset_queue() to handle an unexpected checkstop (hardware error) the
> > same as the deconfigured case. This prevents unexpected and unhelpful warnings in the
> > event of a hardware error.
> > 
> > We also stop lying about a queue's reset response code. This was originally done so we
> > could force vfio_ap_mdev_filter_matrix to pass a deconfigured device through to the guest
> > for the hotplug scenario. vfio_ap_mdev_filter_matrix is instead modified to allow
> > passthrough for all queues with reset state normal, deconfigured, or checkstopped. In the
> > checkstopped case we choose to pass the device through and let the error state be
> > reflected at the guest level.
> > 
> > Signed-off-by: Jason J. Herne <jjherne@linux.ibm.com>
> > Reviewed-by: Anthony Krowiak <akrowiak@linux.ibm.com>
> > ---
> >   drivers/s390/crypto/vfio_ap_ops.c | 35 ++++++++++++++++---------------
> >   1 file changed, 18 insertions(+), 17 deletions(-)

Applied, thanks!
  

Patch

diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
index 983b3b16196c..fc169bc61593 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -659,6 +659,21 @@  static bool vfio_ap_mdev_filter_cdoms(struct ap_matrix_mdev *matrix_mdev)
 			     AP_DOMAINS);
 }
 
+static bool _queue_passable(struct vfio_ap_queue *q)
+{
+	if (!q)
+		return false;
+
+	switch (q->reset_status.response_code) {
+	case AP_RESPONSE_NORMAL:
+	case AP_RESPONSE_DECONFIGURED:
+	case AP_RESPONSE_CHECKSTOPPED:
+		return true;
+	default:
+		return false;
+	}
+}
+
 /*
  * vfio_ap_mdev_filter_matrix - filter the APQNs assigned to the matrix mdev
  *				to ensure no queue devices are passed through to
@@ -687,7 +702,6 @@  static bool vfio_ap_mdev_filter_matrix(struct ap_matrix_mdev *matrix_mdev,
 	unsigned long apid, apqi, apqn;
 	DECLARE_BITMAP(prev_shadow_apm, AP_DEVICES);
 	DECLARE_BITMAP(prev_shadow_aqm, AP_DOMAINS);
-	struct vfio_ap_queue *q;
 
 	bitmap_copy(prev_shadow_apm, matrix_mdev->shadow_apcb.apm, AP_DEVICES);
 	bitmap_copy(prev_shadow_aqm, matrix_mdev->shadow_apcb.aqm, AP_DOMAINS);
@@ -716,8 +730,7 @@  static bool vfio_ap_mdev_filter_matrix(struct ap_matrix_mdev *matrix_mdev,
 			 * hardware device.
 			 */
 			apqn = AP_MKQID(apid, apqi);
-			q = vfio_ap_mdev_get_queue(matrix_mdev, apqn);
-			if (!q || q->reset_status.response_code) {
+			if (!_queue_passable(vfio_ap_mdev_get_queue(matrix_mdev, apqn))) {
 				clear_bit_inv(apid, matrix_mdev->shadow_apcb.apm);
 
 				/*
@@ -1691,6 +1704,7 @@  static int apq_status_check(int apqn, struct ap_queue_status *status)
 	switch (status->response_code) {
 	case AP_RESPONSE_NORMAL:
 	case AP_RESPONSE_DECONFIGURED:
+	case AP_RESPONSE_CHECKSTOPPED:
 		return 0;
 	case AP_RESPONSE_RESET_IN_PROGRESS:
 	case AP_RESPONSE_BUSY:
@@ -1747,14 +1761,6 @@  static void apq_reset_check(struct work_struct *reset_work)
 				memcpy(&q->reset_status, &status, sizeof(status));
 				continue;
 			}
-			/*
-			 * When an AP adapter is deconfigured, the
-			 * associated queues are reset, so let's set the
-			 * status response code to 0 so the queue may be
-			 * passed through (i.e., not filtered)
-			 */
-			if (status.response_code == AP_RESPONSE_DECONFIGURED)
-				q->reset_status.response_code = 0;
 			if (q->saved_isc != VFIO_AP_ISC_INVALID)
 				vfio_ap_free_aqic_resources(q);
 			break;
@@ -1781,12 +1787,7 @@  static void vfio_ap_mdev_reset_queue(struct vfio_ap_queue *q)
 		queue_work(system_long_wq, &q->reset_work);
 		break;
 	case AP_RESPONSE_DECONFIGURED:
-		/*
-		 * When an AP adapter is deconfigured, the associated
-		 * queues are reset, so let's set the status response code to 0
-		 * so the queue may be passed through (i.e., not filtered).
-		 */
-		q->reset_status.response_code = 0;
+	case AP_RESPONSE_CHECKSTOPPED:
 		vfio_ap_free_aqic_resources(q);
 		break;
 	default: