@@ -164,6 +164,35 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp)
return rc;
}
+static void idxd_cdev_evl_drain_pasid(struct idxd_wq *wq, u32 pasid)
+{
+ struct idxd_device *idxd = wq->idxd;
+ struct idxd_evl *evl = idxd->evl;
+ union evl_status_reg status;
+ u16 h, t, size;
+ int ent_size = evl_ent_size(idxd);
+ struct __evl_entry *entry_head;
+
+ if (!evl)
+ return;
+
+ spin_lock(&evl->lock);
+ status.bits = ioread64(idxd->reg_base + IDXD_EVLSTATUS_OFFSET);
+ t = status.tail;
+ h = evl->head;
+ size = evl->size;
+
+ while (h != t) {
+ entry_head = (struct __evl_entry *)(evl->log + (h * ent_size));
+ if (entry_head->pasid == pasid && entry_head->wq_idx == wq->id)
+ set_bit(h, evl->bmap);
+ h = (h + 1) % size;
+ }
+ spin_unlock(&evl->lock);
+
+ drain_workqueue(wq->wq);
+}
+
static int idxd_cdev_release(struct inode *node, struct file *filep)
{
struct idxd_user_context *ctx = filep->private_data;
@@ -190,6 +219,7 @@ static int idxd_cdev_release(struct inode *node, struct file *filep)
}
if (ctx->sva) {
+ idxd_cdev_evl_drain_pasid(wq, ctx->pasid);
iommu_sva_unbind_device(ctx->sva);
idxd_xa_pasid_remove(ctx);
}
@@ -762,18 +762,29 @@ static int idxd_device_evl_setup(struct idxd_device *idxd)
dma_addr_t dma_addr;
int size;
struct idxd_evl *evl = idxd->evl;
+ unsigned long *bmap;
+ int rc;
if (!evl)
return 0;
size = evl_size(idxd);
+
+ bmap = bitmap_zalloc(size, GFP_KERNEL);
+ if (!bmap) {
+ rc = -ENOMEM;
+ goto err_bmap;
+ }
+
/*
* Address needs to be page aligned. However, dma_alloc_coherent() provides
* at minimal page size aligned address. No manual alignment required.
*/
addr = dma_alloc_coherent(dev, size, &dma_addr, GFP_KERNEL);
- if (!addr)
- return -ENOMEM;
+ if (!addr) {
+ rc = -ENOMEM;
+ goto err_alloc;
+ }
memset(addr, 0, size);
@@ -781,6 +792,7 @@ static int idxd_device_evl_setup(struct idxd_device *idxd)
evl->log = addr;
evl->dma = dma_addr;
evl->log_size = size;
+ evl->bmap = bmap;
memset(&evlcfg, 0, sizeof(evlcfg));
evlcfg.bits[0] = dma_addr & GENMASK(63, 12);
@@ -799,6 +811,11 @@ static int idxd_device_evl_setup(struct idxd_device *idxd)
spin_unlock(&evl->lock);
return 0;
+
+err_alloc:
+ bitmap_free(bmap);
+err_bmap:
+ return rc;
}
static void idxd_device_evl_free(struct idxd_device *idxd)
@@ -824,6 +841,7 @@ static void idxd_device_evl_free(struct idxd_device *idxd)
iowrite64(0, idxd->reg_base + IDXD_EVLCFG_OFFSET + 8);
dma_free_coherent(dev, evl->log_size, evl->log, evl->dma);
+ bitmap_free(evl->bmap);
evl->log = NULL;
evl->size = IDXD_EVL_SIZE_MIN;
spin_unlock(&evl->lock);
@@ -264,6 +264,7 @@ struct idxd_driver_data {
struct device_type *dev_type;
int compl_size;
int align;
+ int evl_cr_off;
};
struct idxd_evl {
@@ -276,6 +277,7 @@ struct idxd_evl {
/* The number of entries in the event log. */
u16 size;
u16 head;
+ unsigned long *bmap;
};
struct idxd_evl_fault {
@@ -47,6 +47,7 @@ static struct idxd_driver_data idxd_driver_data[] = {
.compl_size = sizeof(struct dsa_completion_record),
.align = 32,
.dev_type = &dsa_device_type,
+ .evl_cr_off = offsetof(struct dsa_evl_entry, cr),
},
[IDXD_TYPE_IAX] = {
.name_prefix = "iax",
@@ -54,6 +55,7 @@ static struct idxd_driver_data idxd_driver_data[] = {
.compl_size = sizeof(struct iax_completion_record),
.align = 64,
.dev_type = &iax_device_type,
+ .evl_cr_off = offsetof(struct iax_evl_entry, cr),
},
};
@@ -7,6 +7,8 @@
#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/dmaengine.h>
#include <linux/delay.h>
+#include <linux/iommu.h>
+#include <linux/sched/mm.h>
#include <uapi/linux/idxd.h>
#include "../dmaengine.h"
#include "idxd.h"
@@ -217,14 +219,89 @@ static void idxd_int_handle_revoke(struct work_struct *work)
kfree(revoke);
}
-static void process_evl_entry(struct idxd_device *idxd, struct __evl_entry *entry_head)
+static void idxd_evl_fault_work(struct work_struct *work)
+{
+ struct idxd_evl_fault *fault = container_of(work, struct idxd_evl_fault, work);
+ struct idxd_wq *wq = fault->wq;
+ struct idxd_device *idxd = wq->idxd;
+ struct device *dev = &idxd->pdev->dev;
+ struct __evl_entry *entry_head = fault->entry;
+ void *cr = (void *)entry_head + idxd->data->evl_cr_off;
+ int cr_size = idxd->data->compl_size, copied;
+
+ switch (fault->status) {
+ case DSA_COMP_CRA_XLAT:
+ case DSA_COMP_DRAIN_EVL:
+ /*
+ * Copy completion record to fault_addr in user address space
+ * that is found by wq and PASID.
+ */
+ copied = idxd_copy_cr(wq, entry_head->pasid,
+ entry_head->fault_addr,
+ cr, cr_size);
+ /*
+ * The task that triggered the page fault is unknown currently
+ * because multiple threads may share the user address
+ * space or the task exits already before this fault.
+ * So if the copy fails, SIGSEGV can not be sent to the task.
+ * Just print an error for the failure. The user application
+ * waiting for the completion record will time out on this
+ * failure.
+ */
+ if (copied != cr_size) {
+ dev_dbg_ratelimited(dev, "Failed to write to completion record. (%d:%d)\n",
+ cr_size, copied);
+ }
+ break;
+ default:
+ dev_dbg_ratelimited(dev, "Unrecognized error code: %#x\n",
+ DSA_COMP_STATUS(entry_head->error));
+ break;
+ }
+
+ kmem_cache_free(idxd->evl_cache, fault);
+}
+
+static void process_evl_entry(struct idxd_device *idxd,
+ struct __evl_entry *entry_head, unsigned int index)
{
struct device *dev = &idxd->pdev->dev;
+ struct idxd_evl *evl = idxd->evl;
u8 status;
- status = DSA_COMP_STATUS(entry_head->error);
- dev_warn_ratelimited(dev, "Device error %#x operation: %#x fault addr: %#llx\n",
- status, entry_head->operation, entry_head->fault_addr);
+ if (test_bit(index, evl->bmap)) {
+ clear_bit(index, evl->bmap);
+ } else {
+ status = DSA_COMP_STATUS(entry_head->error);
+
+ if (status == DSA_COMP_CRA_XLAT || status == DSA_COMP_DRAIN_EVL) {
+ struct idxd_evl_fault *fault;
+ int ent_size = evl_ent_size(idxd);
+
+ if (entry_head->rci)
+ dev_dbg(dev, "Completion Int Req set, ignoring!\n");
+
+ if (!entry_head->rcr && status == DSA_COMP_DRAIN_EVL)
+ return;
+
+ fault = kmem_cache_alloc(idxd->evl_cache, GFP_ATOMIC);
+ if (fault) {
+ struct idxd_wq *wq = idxd->wqs[entry_head->wq_idx];
+
+ fault->wq = wq;
+ fault->status = status;
+ memcpy(&fault->entry, entry_head, ent_size);
+ INIT_WORK(&fault->work, idxd_evl_fault_work);
+ queue_work(wq->wq, &fault->work);
+ } else {
+ dev_warn(dev, "Failed to service fault work.\n");
+ }
+ } else {
+ dev_warn_ratelimited(dev, "Device error %#x operation: %#x fault addr: %#llx\n",
+ status, entry_head->operation,
+ entry_head->fault_addr);
+ }
+ }
}
static void process_evl_entries(struct idxd_device *idxd)
@@ -250,7 +327,7 @@ static void process_evl_entries(struct idxd_device *idxd)
while (h != t) {
entry_head = (struct __evl_entry *)(evl->log + (h * ent_size));
- process_evl_entry(idxd, entry_head);
+ process_evl_entry(idxd, entry_head, h);
h = (h + 1) % size;
}
@@ -133,6 +133,7 @@ enum dsa_completion_status {
DSA_COMP_HW_ERR1,
DSA_COMP_HW_ERR_DRB,
DSA_COMP_TRANSLATION_FAIL,
+ DSA_COMP_DRAIN_EVL = 0x26,
};
enum iax_completion_status {