On Mon, Feb 06, 2023 at 12:08:36PM +0200, Vladimir Oltean wrote:
> Add support for filling an RX ring with buffers coming from an XSK umem.
> Although enetc has up to 8 RX rings, we still use one of the 2 per-CPU
> RX rings for XSK.
>
> To set up an XSK pool on one of the RX queues, we use the
> reconfiguration procedure which temporarily stops the rings.
>
> Since the RX procedure in the NAPI poll function is completely different
> (both the API for creating an xdp_buff, as well as refilling the ring
> with memory from user space), create a separate enetc_clean_rx_ring_xsk()
> function which gets called when we have both an XSK pool and an XDK
> program on this RX queue.
>
> Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
> ---
> drivers/net/ethernet/freescale/enetc/enetc.c | 377 +++++++++++++++++-
> drivers/net/ethernet/freescale/enetc/enetc.h | 3 +
> .../net/ethernet/freescale/enetc/enetc_pf.c | 1 +
> 3 files changed, 373 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
> index dee432cacf85..3990c006c011 100644
> --- a/drivers/net/ethernet/freescale/enetc/enetc.c
> +++ b/drivers/net/ethernet/freescale/enetc/enetc.c
> @@ -10,6 +10,7 @@
> #include <net/ip6_checksum.h>
> #include <net/pkt_sched.h>
> #include <net/tso.h>
> +#include <net/xdp_sock_drv.h>
>
> u32 enetc_port_mac_rd(struct enetc_si *si, u32 reg)
> {
> @@ -103,6 +104,9 @@ static void enetc_free_rx_swbd(struct enetc_bdr *rx_ring,
> rx_swbd->dir);
> __free_page(rx_swbd->page);
> rx_swbd->page = NULL;
> + } else if (rx_swbd->xsk_buff) {
> + xsk_buff_free(rx_swbd->xsk_buff);
> + rx_swbd->xsk_buff = NULL;
> }
> }
>
> @@ -979,6 +983,44 @@ static int enetc_refill_rx_ring(struct enetc_bdr *rx_ring, const int buff_cnt)
> return j;
> }
>
> +static int enetc_refill_rx_ring_xsk(struct enetc_bdr *rx_ring, int buff_cnt)
> +{
> + struct xsk_buff_pool *pool = rx_ring->xdp.xsk_pool;
> + struct enetc_rx_swbd *rx_swbd;
> + struct xdp_buff *xsk_buff;
> + union enetc_rx_bd *rxbd;
> + int i, j;
> +
> + i = rx_ring->next_to_use;
> + rxbd = enetc_rxbd(rx_ring, i);
> +
> + for (j = 0; j < buff_cnt; j++) {
> + xsk_buff = xsk_buff_alloc(pool); // TODO use _batch?
yes, use batch:P
> + if (!xsk_buff)
> + break;
> +
> + rx_swbd = &rx_ring->rx_swbd[i];
> + rx_swbd->xsk_buff = xsk_buff;
> + rx_swbd->dma = xsk_buff_xdp_get_dma(xsk_buff);
> +
> + /* update RxBD */
> + rxbd->w.addr = cpu_to_le64(rx_swbd->dma);
> + /* clear 'R" as well */
> + rxbd->r.lstatus = 0;
> +
> + enetc_rxbd_next(rx_ring, &rxbd, &i);
> + }
> +
> + if (likely(j)) {
> + rx_ring->next_to_use = i;
> +
> + /* update ENETC's consumer index */
> + enetc_wr_reg_hot(rx_ring->rcir, rx_ring->next_to_use);
> + }
> +
> + return j;
> +}
> +
> #ifdef CONFIG_FSL_ENETC_PTP_CLOCK
> static void enetc_get_rx_tstamp(struct net_device *ndev,
> union enetc_rx_bd *rxbd,
> @@ -1128,6 +1170,18 @@ static void enetc_add_rx_buff_to_skb(struct enetc_bdr *rx_ring, int i,
> enetc_flip_rx_buff(rx_ring, rx_swbd);
> }
>
> +static void enetc_put_rx_swbd(struct enetc_bdr *rx_ring, int i)
> +{
> + struct enetc_rx_swbd *rx_swbd = &rx_ring->rx_swbd[i];
> +
> + if (rx_swbd->xsk_buff) {
> + xsk_buff_free(rx_swbd->xsk_buff);
> + rx_swbd->xsk_buff = NULL;
> + } else {
> + enetc_put_rx_buff(rx_ring, rx_swbd);
> + }
> +}
> +
> static bool enetc_check_bd_errors_and_consume(struct enetc_bdr *rx_ring,
> u32 bd_status,
> union enetc_rx_bd **rxbd, int *i,
> @@ -1136,7 +1190,7 @@ static bool enetc_check_bd_errors_and_consume(struct enetc_bdr *rx_ring,
> if (likely(!(bd_status & ENETC_RXBD_LSTATUS(ENETC_RXBD_ERR_MASK))))
> return false;
>
> - enetc_put_rx_buff(rx_ring, &rx_ring->rx_swbd[*i]);
> + enetc_put_rx_swbd(rx_ring, *i);
> (*buffs_missing)++;
> enetc_rxbd_next(rx_ring, rxbd, i);
>
> @@ -1144,7 +1198,7 @@ static bool enetc_check_bd_errors_and_consume(struct enetc_bdr *rx_ring,
> dma_rmb();
> bd_status = le32_to_cpu((*rxbd)->r.lstatus);
>
> - enetc_put_rx_buff(rx_ring, &rx_ring->rx_swbd[*i]);
> + enetc_put_rx_swbd(rx_ring, *i);
> (*buffs_missing)++;
> enetc_rxbd_next(rx_ring, rxbd, i);
> }
> @@ -1484,6 +1538,43 @@ static void enetc_build_xdp_buff(struct enetc_bdr *rx_ring, u32 bd_status,
> }
> }
>
> +static struct xdp_buff *enetc_build_xsk_buff(struct xsk_buff_pool *pool,
> + struct enetc_bdr *rx_ring,
> + u32 bd_status,
> + union enetc_rx_bd **rxbd, int *i,
> + int *buffs_missing, int *rx_byte_cnt)
> +{
> + struct enetc_rx_swbd *rx_swbd = &rx_ring->rx_swbd[*i];
> + u16 size = le16_to_cpu((*rxbd)->r.buf_len);
> + struct xdp_buff *xsk_buff;
> +
> + /* Multi-buffer frames are not supported in XSK mode */
Nice! I realized we need to forbid that on ice now.
> + if (unlikely(!(bd_status & ENETC_RXBD_LSTATUS_F))) {
> + while (!(bd_status & ENETC_RXBD_LSTATUS_F)) {
> + enetc_put_rx_swbd(rx_ring, *i);
> +
> + (*buffs_missing)++;
> + enetc_rxbd_next(rx_ring, rxbd, i);
> + dma_rmb();
> + bd_status = le32_to_cpu((*rxbd)->r.lstatus);
> + }
> +
> + return NULL;
> + }
> +
> + xsk_buff = rx_swbd->xsk_buff;
> + xsk_buff_set_size(xsk_buff, size);
> + xsk_buff_dma_sync_for_cpu(xsk_buff, pool);
> +
> + rx_swbd->xsk_buff = NULL;
> +
> + (*buffs_missing)++;
> + (*rx_byte_cnt) += size;
> + enetc_rxbd_next(rx_ring, rxbd, i);
> +
> + return xsk_buff;
> +}
> +
> /* Convert RX buffer descriptors to TX buffer descriptors. These will be
> * recycled back into the RX ring in enetc_clean_tx_ring.
> */
> @@ -1659,11 +1750,136 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
> return rx_frm_cnt;
> }
>
> +static void enetc_xsk_buff_to_skb(struct xdp_buff *xsk_buff,
> + struct enetc_bdr *rx_ring,
> + union enetc_rx_bd *rxbd,
> + struct napi_struct *napi)
> +{
> + size_t len = xdp_get_buff_len(xsk_buff);
> + struct sk_buff *skb;
> +
> + skb = napi_alloc_skb(napi, len);
> + if (unlikely(!skb)) {
> + rx_ring->stats.rx_alloc_errs++;
> + goto out;
> + }
> +
> + skb_put_data(skb, xsk_buff->data, len);
> +
> + enetc_get_offloads(rx_ring, rxbd, skb);
> +
> + skb_record_rx_queue(skb, rx_ring->index);
> + skb->protocol = eth_type_trans(skb, rx_ring->ndev);
> +
> + rx_ring->stats.packets += skb->len;
> + rx_ring->stats.bytes++;
> +
> + napi_gro_receive(napi, skb);
> +out:
> + xsk_buff_free(xsk_buff);
> +}
> +
> +static int enetc_clean_rx_ring_xsk(struct enetc_bdr *rx_ring,
> + struct napi_struct *napi, int work_limit,
> + struct bpf_prog *prog,
> + struct xsk_buff_pool *pool)
> +{
> + struct net_device *ndev = rx_ring->ndev;
> + union enetc_rx_bd *rxbd, *orig_rxbd;
> + int rx_frm_cnt = 0, rx_byte_cnt = 0;
> + int xdp_redirect_frm_cnt = 0;
> + struct xdp_buff *xsk_buff;
> + int buffs_missing, err, i;
> + bool wakeup_xsk = false;
> + u32 bd_status, xdp_act;
> +
> + buffs_missing = enetc_bd_unused(rx_ring);
> + /* next descriptor to process */
> + i = rx_ring->next_to_clean;
> +
> + while (likely(rx_frm_cnt < work_limit)) {
> + if (buffs_missing >= ENETC_RXBD_BUNDLE) {
> + buffs_missing -= enetc_refill_rx_ring_xsk(rx_ring,
> + buffs_missing);
> + wakeup_xsk |= (buffs_missing != 0);
> + }
> +
> + rxbd = enetc_rxbd(rx_ring, i);
> + bd_status = le32_to_cpu(rxbd->r.lstatus);
> + if (!bd_status)
> + break;
> +
> + enetc_wr_reg_hot(rx_ring->idr, BIT(rx_ring->index));
> + dma_rmb(); /* for reading other rxbd fields */
> +
> + if (enetc_check_bd_errors_and_consume(rx_ring, bd_status,
> + &rxbd, &i,
> + &buffs_missing))
> + continue;
> +
> + orig_rxbd = rxbd;
> +
> + xsk_buff = enetc_build_xsk_buff(pool, rx_ring, bd_status,
> + &rxbd, &i, &buffs_missing,
> + &rx_byte_cnt);
> + if (!xsk_buff)
> + continue;
> +
> + xdp_act = bpf_prog_run_xdp(prog, xsk_buff);
> + switch (xdp_act) {
> + default:
> + bpf_warn_invalid_xdp_action(ndev, prog, xdp_act);
> + fallthrough;
> + case XDP_ABORTED:
> + trace_xdp_exception(ndev, prog, xdp_act);
> + fallthrough;
> + case XDP_DROP:
> + xsk_buff_free(xsk_buff);
> + break;
> + case XDP_PASS:
> + enetc_xsk_buff_to_skb(xsk_buff, rx_ring, orig_rxbd,
> + napi);
> + break;
> + case XDP_REDIRECT:
> + err = xdp_do_redirect(ndev, xsk_buff, prog);
> + if (unlikely(err)) {
> + if (err == -ENOBUFS)
> + wakeup_xsk = true;
> + xsk_buff_free(xsk_buff);
> + rx_ring->stats.xdp_redirect_failures++;
> + } else {
> + xdp_redirect_frm_cnt++;
> + rx_ring->stats.xdp_redirect++;
> + }
no XDP_TX support? I don't see it being added on next patch.
> + }
> +
> + rx_frm_cnt++;
> + }
> +
> + rx_ring->next_to_clean = i;
> +
> + rx_ring->stats.packets += rx_frm_cnt;
> + rx_ring->stats.bytes += rx_byte_cnt;
> +
> + if (xdp_redirect_frm_cnt)
> + xdp_do_flush_map();
> +
> + if (xsk_uses_need_wakeup(pool)) {
> + if (wakeup_xsk)
> + xsk_set_rx_need_wakeup(pool);
> + else
> + xsk_clear_rx_need_wakeup(pool);
> + }
> +
> + return rx_frm_cnt;
> +}
> +
> static int enetc_poll(struct napi_struct *napi, int budget)
> {
> struct enetc_int_vector
> *v = container_of(napi, struct enetc_int_vector, napi);
> struct enetc_bdr *rx_ring = &v->rx_ring;
> + struct xsk_buff_pool *pool;
> struct bpf_prog *prog;
> bool complete = true;
> int work_done;
> @@ -1676,10 +1892,15 @@ static int enetc_poll(struct napi_struct *napi, int budget)
> complete = false;
>
> prog = rx_ring->xdp.prog;
> - if (prog)
(...)
@@ -10,6 +10,7 @@
#include <net/ip6_checksum.h>
#include <net/pkt_sched.h>
#include <net/tso.h>
+#include <net/xdp_sock_drv.h>
u32 enetc_port_mac_rd(struct enetc_si *si, u32 reg)
{
@@ -103,6 +104,9 @@ static void enetc_free_rx_swbd(struct enetc_bdr *rx_ring,
rx_swbd->dir);
__free_page(rx_swbd->page);
rx_swbd->page = NULL;
+ } else if (rx_swbd->xsk_buff) {
+ xsk_buff_free(rx_swbd->xsk_buff);
+ rx_swbd->xsk_buff = NULL;
}
}
@@ -979,6 +983,44 @@ static int enetc_refill_rx_ring(struct enetc_bdr *rx_ring, const int buff_cnt)
return j;
}
+static int enetc_refill_rx_ring_xsk(struct enetc_bdr *rx_ring, int buff_cnt)
+{
+ struct xsk_buff_pool *pool = rx_ring->xdp.xsk_pool;
+ struct enetc_rx_swbd *rx_swbd;
+ struct xdp_buff *xsk_buff;
+ union enetc_rx_bd *rxbd;
+ int i, j;
+
+ i = rx_ring->next_to_use;
+ rxbd = enetc_rxbd(rx_ring, i);
+
+ for (j = 0; j < buff_cnt; j++) {
+ xsk_buff = xsk_buff_alloc(pool); // TODO use _batch?
+ if (!xsk_buff)
+ break;
+
+ rx_swbd = &rx_ring->rx_swbd[i];
+ rx_swbd->xsk_buff = xsk_buff;
+ rx_swbd->dma = xsk_buff_xdp_get_dma(xsk_buff);
+
+ /* update RxBD */
+ rxbd->w.addr = cpu_to_le64(rx_swbd->dma);
+ /* clear 'R" as well */
+ rxbd->r.lstatus = 0;
+
+ enetc_rxbd_next(rx_ring, &rxbd, &i);
+ }
+
+ if (likely(j)) {
+ rx_ring->next_to_use = i;
+
+ /* update ENETC's consumer index */
+ enetc_wr_reg_hot(rx_ring->rcir, rx_ring->next_to_use);
+ }
+
+ return j;
+}
+
#ifdef CONFIG_FSL_ENETC_PTP_CLOCK
static void enetc_get_rx_tstamp(struct net_device *ndev,
union enetc_rx_bd *rxbd,
@@ -1128,6 +1170,18 @@ static void enetc_add_rx_buff_to_skb(struct enetc_bdr *rx_ring, int i,
enetc_flip_rx_buff(rx_ring, rx_swbd);
}
+static void enetc_put_rx_swbd(struct enetc_bdr *rx_ring, int i)
+{
+ struct enetc_rx_swbd *rx_swbd = &rx_ring->rx_swbd[i];
+
+ if (rx_swbd->xsk_buff) {
+ xsk_buff_free(rx_swbd->xsk_buff);
+ rx_swbd->xsk_buff = NULL;
+ } else {
+ enetc_put_rx_buff(rx_ring, rx_swbd);
+ }
+}
+
static bool enetc_check_bd_errors_and_consume(struct enetc_bdr *rx_ring,
u32 bd_status,
union enetc_rx_bd **rxbd, int *i,
@@ -1136,7 +1190,7 @@ static bool enetc_check_bd_errors_and_consume(struct enetc_bdr *rx_ring,
if (likely(!(bd_status & ENETC_RXBD_LSTATUS(ENETC_RXBD_ERR_MASK))))
return false;
- enetc_put_rx_buff(rx_ring, &rx_ring->rx_swbd[*i]);
+ enetc_put_rx_swbd(rx_ring, *i);
(*buffs_missing)++;
enetc_rxbd_next(rx_ring, rxbd, i);
@@ -1144,7 +1198,7 @@ static bool enetc_check_bd_errors_and_consume(struct enetc_bdr *rx_ring,
dma_rmb();
bd_status = le32_to_cpu((*rxbd)->r.lstatus);
- enetc_put_rx_buff(rx_ring, &rx_ring->rx_swbd[*i]);
+ enetc_put_rx_swbd(rx_ring, *i);
(*buffs_missing)++;
enetc_rxbd_next(rx_ring, rxbd, i);
}
@@ -1484,6 +1538,43 @@ static void enetc_build_xdp_buff(struct enetc_bdr *rx_ring, u32 bd_status,
}
}
+static struct xdp_buff *enetc_build_xsk_buff(struct xsk_buff_pool *pool,
+ struct enetc_bdr *rx_ring,
+ u32 bd_status,
+ union enetc_rx_bd **rxbd, int *i,
+ int *buffs_missing, int *rx_byte_cnt)
+{
+ struct enetc_rx_swbd *rx_swbd = &rx_ring->rx_swbd[*i];
+ u16 size = le16_to_cpu((*rxbd)->r.buf_len);
+ struct xdp_buff *xsk_buff;
+
+ /* Multi-buffer frames are not supported in XSK mode */
+ if (unlikely(!(bd_status & ENETC_RXBD_LSTATUS_F))) {
+ while (!(bd_status & ENETC_RXBD_LSTATUS_F)) {
+ enetc_put_rx_swbd(rx_ring, *i);
+
+ (*buffs_missing)++;
+ enetc_rxbd_next(rx_ring, rxbd, i);
+ dma_rmb();
+ bd_status = le32_to_cpu((*rxbd)->r.lstatus);
+ }
+
+ return NULL;
+ }
+
+ xsk_buff = rx_swbd->xsk_buff;
+ xsk_buff_set_size(xsk_buff, size);
+ xsk_buff_dma_sync_for_cpu(xsk_buff, pool);
+
+ rx_swbd->xsk_buff = NULL;
+
+ (*buffs_missing)++;
+ (*rx_byte_cnt) += size;
+ enetc_rxbd_next(rx_ring, rxbd, i);
+
+ return xsk_buff;
+}
+
/* Convert RX buffer descriptors to TX buffer descriptors. These will be
* recycled back into the RX ring in enetc_clean_tx_ring.
*/
@@ -1659,11 +1750,136 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
return rx_frm_cnt;
}
+static void enetc_xsk_buff_to_skb(struct xdp_buff *xsk_buff,
+ struct enetc_bdr *rx_ring,
+ union enetc_rx_bd *rxbd,
+ struct napi_struct *napi)
+{
+ size_t len = xdp_get_buff_len(xsk_buff);
+ struct sk_buff *skb;
+
+ skb = napi_alloc_skb(napi, len);
+ if (unlikely(!skb)) {
+ rx_ring->stats.rx_alloc_errs++;
+ goto out;
+ }
+
+ skb_put_data(skb, xsk_buff->data, len);
+
+ enetc_get_offloads(rx_ring, rxbd, skb);
+
+ skb_record_rx_queue(skb, rx_ring->index);
+ skb->protocol = eth_type_trans(skb, rx_ring->ndev);
+
+ rx_ring->stats.packets += skb->len;
+ rx_ring->stats.bytes++;
+
+ napi_gro_receive(napi, skb);
+out:
+ xsk_buff_free(xsk_buff);
+}
+
+static int enetc_clean_rx_ring_xsk(struct enetc_bdr *rx_ring,
+ struct napi_struct *napi, int work_limit,
+ struct bpf_prog *prog,
+ struct xsk_buff_pool *pool)
+{
+ struct net_device *ndev = rx_ring->ndev;
+ union enetc_rx_bd *rxbd, *orig_rxbd;
+ int rx_frm_cnt = 0, rx_byte_cnt = 0;
+ int xdp_redirect_frm_cnt = 0;
+ struct xdp_buff *xsk_buff;
+ int buffs_missing, err, i;
+ bool wakeup_xsk = false;
+ u32 bd_status, xdp_act;
+
+ buffs_missing = enetc_bd_unused(rx_ring);
+ /* next descriptor to process */
+ i = rx_ring->next_to_clean;
+
+ while (likely(rx_frm_cnt < work_limit)) {
+ if (buffs_missing >= ENETC_RXBD_BUNDLE) {
+ buffs_missing -= enetc_refill_rx_ring_xsk(rx_ring,
+ buffs_missing);
+ wakeup_xsk |= (buffs_missing != 0);
+ }
+
+ rxbd = enetc_rxbd(rx_ring, i);
+ bd_status = le32_to_cpu(rxbd->r.lstatus);
+ if (!bd_status)
+ break;
+
+ enetc_wr_reg_hot(rx_ring->idr, BIT(rx_ring->index));
+ dma_rmb(); /* for reading other rxbd fields */
+
+ if (enetc_check_bd_errors_and_consume(rx_ring, bd_status,
+ &rxbd, &i,
+ &buffs_missing))
+ continue;
+
+ orig_rxbd = rxbd;
+
+ xsk_buff = enetc_build_xsk_buff(pool, rx_ring, bd_status,
+ &rxbd, &i, &buffs_missing,
+ &rx_byte_cnt);
+ if (!xsk_buff)
+ continue;
+
+ xdp_act = bpf_prog_run_xdp(prog, xsk_buff);
+ switch (xdp_act) {
+ default:
+ bpf_warn_invalid_xdp_action(ndev, prog, xdp_act);
+ fallthrough;
+ case XDP_ABORTED:
+ trace_xdp_exception(ndev, prog, xdp_act);
+ fallthrough;
+ case XDP_DROP:
+ xsk_buff_free(xsk_buff);
+ break;
+ case XDP_PASS:
+ enetc_xsk_buff_to_skb(xsk_buff, rx_ring, orig_rxbd,
+ napi);
+ break;
+ case XDP_REDIRECT:
+ err = xdp_do_redirect(ndev, xsk_buff, prog);
+ if (unlikely(err)) {
+ if (err == -ENOBUFS)
+ wakeup_xsk = true;
+ xsk_buff_free(xsk_buff);
+ rx_ring->stats.xdp_redirect_failures++;
+ } else {
+ xdp_redirect_frm_cnt++;
+ rx_ring->stats.xdp_redirect++;
+ }
+ }
+
+ rx_frm_cnt++;
+ }
+
+ rx_ring->next_to_clean = i;
+
+ rx_ring->stats.packets += rx_frm_cnt;
+ rx_ring->stats.bytes += rx_byte_cnt;
+
+ if (xdp_redirect_frm_cnt)
+ xdp_do_flush_map();
+
+ if (xsk_uses_need_wakeup(pool)) {
+ if (wakeup_xsk)
+ xsk_set_rx_need_wakeup(pool);
+ else
+ xsk_clear_rx_need_wakeup(pool);
+ }
+
+ return rx_frm_cnt;
+}
+
static int enetc_poll(struct napi_struct *napi, int budget)
{
struct enetc_int_vector
*v = container_of(napi, struct enetc_int_vector, napi);
struct enetc_bdr *rx_ring = &v->rx_ring;
+ struct xsk_buff_pool *pool;
struct bpf_prog *prog;
bool complete = true;
int work_done;
@@ -1676,10 +1892,15 @@ static int enetc_poll(struct napi_struct *napi, int budget)
complete = false;
prog = rx_ring->xdp.prog;
- if (prog)
+ pool = rx_ring->xdp.xsk_pool;
+ if (prog && pool)
+ work_done = enetc_clean_rx_ring_xsk(rx_ring, napi, budget, prog,
+ pool);
+ else if (prog)
work_done = enetc_clean_rx_ring_xdp(rx_ring, napi, budget, prog);
else
work_done = enetc_clean_rx_ring(rx_ring, napi, budget);
+
if (work_done == budget)
complete = false;
if (work_done)
@@ -2168,7 +2389,16 @@ static void enetc_setup_rxbdr(struct enetc_hw *hw, struct enetc_bdr *rx_ring,
rx_ring->next_to_alloc = 0;
enetc_lock_mdio();
- enetc_refill_rx_ring(rx_ring, enetc_bd_unused(rx_ring));
+ /* The XSK buffer pool and the BPF program are set up through different
+ * syscalls. From the moment the pool has been DMA mapped and until the
+ * XDP program is attached, we still need to use normal RX buffers,
+ * because we still use the normal NAPI poll routine. Only use buffers
+ * from the XSK pool when both conditions are fulfilled.
+ */
+ if (rx_ring->xdp.prog && rx_ring->xdp.xsk_pool)
+ enetc_refill_rx_ring_xsk(rx_ring, enetc_bd_unused(rx_ring));
+ else
+ enetc_refill_rx_ring(rx_ring, enetc_bd_unused(rx_ring));
enetc_unlock_mdio();
enetc_rxbdr_wr(hw, idx, ENETC_RBMR, rbmr);
@@ -2454,18 +2684,27 @@ static int enetc_xdp_rxq_mem_model_register(struct enetc_ndev_priv *priv,
int rxq)
{
struct enetc_bdr *rx_ring = priv->rx_ring[rxq];
+ struct xsk_buff_pool *pool;
+ enum xdp_mem_type type;
int err;
err = xdp_rxq_info_reg(&rx_ring->xdp.rxq, priv->ndev, rxq, 0);
if (err)
return err;
- err = xdp_rxq_info_reg_mem_model(&rx_ring->xdp.rxq,
- MEM_TYPE_PAGE_SHARED, NULL);
- if (err)
+ pool = rx_ring->xdp.xsk_pool;
+ type = !!pool ? MEM_TYPE_XSK_BUFF_POOL : MEM_TYPE_PAGE_SHARED;
+
+ err = xdp_rxq_info_reg_mem_model(&rx_ring->xdp.rxq, type, NULL);
+ if (err) {
xdp_rxq_info_unreg(&rx_ring->xdp.rxq);
+ return err;
+ }
- return err;
+ if (pool)
+ xsk_pool_set_rxq_info(pool, &rx_ring->xdp.rxq);
+
+ return 0;
}
static void enetc_xdp_rxq_mem_model_unregister(struct enetc_ndev_priv *priv,
@@ -2768,6 +3007,125 @@ static int enetc_reconfigure_xdp_cb(struct enetc_ndev_priv *priv, void *ctx)
return err;
}
+struct enetc_xsk_reconfig_ctx {
+ struct enetc_bdr *rx_ring;
+ struct xsk_buff_pool *pool;
+};
+
+static int enetc_enable_xsk_cb(struct enetc_ndev_priv *priv, void *ctx)
+{
+ struct enetc_xsk_reconfig_ctx *data = ctx;
+ struct enetc_bdr *rx_ring = data->rx_ring;
+ struct xsk_buff_pool *pool = data->pool;
+ int err;
+
+ err = xsk_pool_dma_map(pool, priv->dev, 0);
+ if (err)
+ return err;
+
+ rx_ring->xdp.xsk_pool = pool;
+
+ return 0;
+}
+
+static int enetc_disable_xsk_cb(struct enetc_ndev_priv *priv, void *ctx)
+{
+ struct enetc_xsk_reconfig_ctx *data = ctx;
+ struct enetc_bdr *rx_ring = data->rx_ring;
+ struct xsk_buff_pool *pool = data->pool;
+
+ rx_ring->xdp.xsk_pool = NULL;
+ xsk_pool_dma_unmap(pool, 0);
+
+ return 0;
+}
+
+static int enetc_enable_xsk_pool(struct net_device *ndev,
+ struct xsk_buff_pool *pool, u16 queue_id)
+{
+ struct enetc_ndev_priv *priv = netdev_priv(ndev);
+ struct enetc_xsk_reconfig_ctx ctx;
+ struct enetc_int_vector *v;
+ struct enetc_bdr *rx_ring;
+ bool extended;
+
+ if (queue_id >= priv->bdr_int_num) {
+ netdev_err(ndev, "QID %d exceeds the %d channels available\n",
+ queue_id, priv->bdr_int_num);
+ return -ERANGE;
+ }
+
+ v = priv->int_vector[queue_id];
+ rx_ring = &v->rx_ring;
+ if (rx_ring->xdp.xsk_pool) {
+ netdev_err(ndev, "QID %d already has an XSK pool attached\n",
+ rx_ring->index);
+ return -EBUSY;
+ }
+
+ /* Ensure enetc_setup_xdp_prog() won't be called before
+ * enetc_setup_xsk_pool(), because enetc_xdp_rxq_mem_model_register()
+ * depends on call ordering.
+ */
+ if (rx_ring->xdp.prog) {
+ netdev_err(ndev,
+ "Cannot use XSK if there is an XDP program already attached\n");
+ return -EINVAL;
+ }
+
+ extended = !!(priv->active_offloads & ENETC_F_RX_TSTAMP);
+ ctx.rx_ring = rx_ring;
+ ctx.pool = pool;
+
+ return enetc_reconfigure(priv, extended, enetc_enable_xsk_cb, &ctx);
+}
+
+static int enetc_disable_xsk_pool(struct net_device *ndev, u16 queue_id)
+{
+ struct enetc_ndev_priv *priv = netdev_priv(ndev);
+ struct enetc_xsk_reconfig_ctx ctx;
+ struct enetc_int_vector *v;
+ struct xsk_buff_pool *pool;
+ struct enetc_bdr *rx_ring;
+ bool extended;
+
+ if (queue_id >= priv->bdr_int_num) {
+ netdev_err(ndev, "QID %d exceeds the %d channels available\n",
+ queue_id, priv->bdr_int_num);
+ return -ERANGE;
+ }
+
+ v = priv->int_vector[queue_id];
+ rx_ring = &v->rx_ring;
+
+ pool = rx_ring->xdp.xsk_pool;
+ if (!pool) {
+ netdev_err(ndev, "QID %d does not have an XSK pool attached\n",
+ rx_ring->index);
+ return -ENOENT;
+ }
+
+ extended = !!(priv->active_offloads & ENETC_F_RX_TSTAMP);
+ ctx.rx_ring = rx_ring;
+ ctx.pool = pool;
+
+ return enetc_reconfigure(priv, extended, enetc_disable_xsk_cb, &ctx);
+}
+
+static int enetc_setup_xsk_pool(struct net_device *ndev,
+ struct xsk_buff_pool *pool,
+ u16 queue_id)
+{
+ return pool ? enetc_enable_xsk_pool(ndev, pool, queue_id) :
+ enetc_disable_xsk_pool(ndev, queue_id);
+}
+
+int enetc_xsk_wakeup(struct net_device *ndev, u32 queue_id, u32 flags)
+{
+ /* xp_assign_dev() wants this; nothing needed for RX */
+ return 0;
+}
+
static int enetc_setup_xdp_prog(struct net_device *ndev, struct bpf_prog *prog,
struct netlink_ext_ack *extack)
{
@@ -2798,6 +3156,9 @@ int enetc_setup_bpf(struct net_device *ndev, struct netdev_bpf *bpf)
switch (bpf->command) {
case XDP_SETUP_PROG:
return enetc_setup_xdp_prog(ndev, bpf->prog, bpf->extack);
+ case XDP_SETUP_XSK_POOL:
+ return enetc_setup_xsk_pool(ndev, bpf->xsk.pool,
+ bpf->xsk.queue_id);
default:
return -EINVAL;
}
@@ -48,6 +48,7 @@ struct enetc_tx_swbd {
(SKB_WITH_OVERHEAD(ENETC_RXB_TRUESIZE) - XDP_PACKET_HEADROOM)
struct enetc_rx_swbd {
+ struct xdp_buff *xsk_buff;
struct page *page;
dma_addr_t dma;
enum dma_data_direction dir;
@@ -77,6 +78,7 @@ struct enetc_ring_stats {
struct enetc_xdp_data {
struct xdp_rxq_info rxq;
+ struct xsk_buff_pool *xsk_pool;
struct bpf_prog *prog;
int xdp_tx_in_flight;
};
@@ -424,6 +426,7 @@ int enetc_setup_tc_mqprio(struct net_device *ndev, void *type_data);
int enetc_setup_bpf(struct net_device *ndev, struct netdev_bpf *bpf);
int enetc_xdp_xmit(struct net_device *ndev, int num_frames,
struct xdp_frame **frames, u32 flags);
+int enetc_xsk_wakeup(struct net_device *dev, u32 queue, u32 flags);
/* ethtool */
void enetc_set_ethtool_ops(struct net_device *ndev);
@@ -773,6 +773,7 @@ static const struct net_device_ops enetc_ndev_ops = {
.ndo_setup_tc = enetc_pf_setup_tc,
.ndo_bpf = enetc_setup_bpf,
.ndo_xdp_xmit = enetc_xdp_xmit,
+ .ndo_xsk_wakeup = enetc_xsk_wakeup,
};
static void enetc_pf_netdev_setup(struct enetc_si *si, struct net_device *ndev,