[net-next,1/3] net: macb: queue tie-off or disable during WOL suspend
Commit Message
When GEM is used as a wake device, it is not mandatory for the RX DMA
to be active. The RX engine in IP only needs to receive and identify
a wake packet through an interrupt. The wake packet is of no further
significance; hence, it is not required to be copied into memory.
By disabling RX DMA during suspend, we can avoid unnecessary DMA
processing of any incoming traffic.
During suspend, perform either of the below operations:
tie-off/dummy descriptor: Disable unused queues by connecting
them to a looped descriptor chain without free slots.
queue disable: The newer IP version allows disabling individual queues.
Co-developed-by: Harini Katakam <harini.katakam@amd.com>
Signed-off-by: Harini Katakam <harini.katakam@amd.com>
Signed-off-by: Vineeth Karumanchi <vineeth.karumanchi@amd.com>
---
drivers/net/ethernet/cadence/macb.h | 7 +++
drivers/net/ethernet/cadence/macb_main.c | 58 +++++++++++++++++++++++-
2 files changed, 64 insertions(+), 1 deletion(-)
Comments
Hi, Vineeth,
On 30.01.2024 12:48, Vineeth Karumanchi wrote:
> When GEM is used as a wake device, it is not mandatory for the RX DMA
> to be active. The RX engine in IP only needs to receive and identify
> a wake packet through an interrupt. The wake packet is of no further
> significance; hence, it is not required to be copied into memory.
> By disabling RX DMA during suspend, we can avoid unnecessary DMA
> processing of any incoming traffic.
>
> During suspend, perform either of the below operations:
>
> tie-off/dummy descriptor: Disable unused queues by connecting
> them to a looped descriptor chain without free slots.
>
> queue disable: The newer IP version allows disabling individual queues.
I would add a dash line for each of these 2 items for better understanding.
E.g.:
- tie-off/dummy descriptior: ...
- queue disable: ...
>
> Co-developed-by: Harini Katakam <harini.katakam@amd.com>
> Signed-off-by: Harini Katakam <harini.katakam@amd.com>
> Signed-off-by: Vineeth Karumanchi <vineeth.karumanchi@amd.com>
> ---
> drivers/net/ethernet/cadence/macb.h | 7 +++
> drivers/net/ethernet/cadence/macb_main.c | 58 +++++++++++++++++++++++-
> 2 files changed, 64 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
> index aa5700ac9c00..f68ff163aa18 100644
> --- a/drivers/net/ethernet/cadence/macb.h
> +++ b/drivers/net/ethernet/cadence/macb.h
> @@ -645,6 +645,9 @@
> #define GEM_T2OFST_OFFSET 0 /* offset value */
> #define GEM_T2OFST_SIZE 7
>
> +/* Bitfields in queue pointer registers */
> +#define GEM_RBQP_DISABLE BIT(0)
You have spaces after macro name. However the approach in this driver is to
define bit offset and lenght and use {MACB,GEM}_BIT() macros (see the above
bitfield definitions).
> +
> /* Offset for screener type 2 compare values (T2CMPOFST).
> * Note the offset is applied after the specified point,
> * e.g. GEM_T2COMPOFST_ETYPE denotes the EtherType field, so an offset
> @@ -737,6 +740,7 @@
> #define MACB_CAPS_HIGH_SPEED 0x02000000
> #define MACB_CAPS_CLK_HW_CHG 0x04000000
> #define MACB_CAPS_MACB_IS_EMAC 0x08000000
> +#define MACB_CAPS_QUEUE_DISABLE 0x00002000
Can you keep this sorted by value with the rest of the caps?
> #define MACB_CAPS_FIFO_MODE 0x10000000
> #define MACB_CAPS_GIGABIT_MODE_AVAILABLE 0x20000000
> #define MACB_CAPS_SG_DISABLED 0x40000000
> @@ -1254,6 +1258,7 @@ struct macb {
> u32 (*macb_reg_readl)(struct macb *bp, int offset);
> void (*macb_reg_writel)(struct macb *bp, int offset, u32 value);
>
> + struct macb_dma_desc *rx_ring_tieoff;
Keep this ^
> size_t rx_buffer_size;
>
> unsigned int rx_ring_size;
> @@ -1276,6 +1281,8 @@ struct macb {
> struct gem_stats gem;
> } hw_stats;
>
> + dma_addr_t rx_ring_tieoff_dma;
And this ^ toghether.
> +
> struct macb_or_gem_ops macbgem_ops;
>
> struct mii_bus *mii_bus;
> diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c
> index 898debfd4db3..47cbea58e6c3 100644
> --- a/drivers/net/ethernet/cadence/macb_main.c
> +++ b/drivers/net/ethernet/cadence/macb_main.c
> @@ -2479,6 +2479,12 @@ static void macb_free_consistent(struct macb *bp)
>
> bp->macbgem_ops.mog_free_rx_buffers(bp);
>
> + if (bp->rx_ring_tieoff) {
> + dma_free_coherent(&bp->pdev->dev, macb_dma_desc_get_size(bp),
> + bp->rx_ring_tieoff, bp->rx_ring_tieoff_dma);
> + bp->rx_ring_tieoff = NULL;
> + }
> +
Keep the reverse order of operation as oposed to macb_alloc_consistent,
thus move this before bp->macbgem_ops.mog_free_rx_buffers();
> for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
> kfree(queue->tx_skb);
> queue->tx_skb = NULL;
> @@ -2568,6 +2574,16 @@ static int macb_alloc_consistent(struct macb *bp)
> if (bp->macbgem_ops.mog_alloc_rx_buffers(bp))
> goto out_err;
>
> + /* Required for tie off descriptor for PM cases */
> + if (!(bp->caps & MACB_CAPS_QUEUE_DISABLE)) {
> + bp->rx_ring_tieoff = dma_alloc_coherent(&bp->pdev->dev,
> + macb_dma_desc_get_size(bp),
> + &bp->rx_ring_tieoff_dma,
> + GFP_KERNEL);
> + if (!bp->rx_ring_tieoff)
> + goto out_err;
You also need to free the previously allocated rx buffers.
> + }
> +
> return 0;
>
> out_err:
> @@ -2575,6 +2591,16 @@ static int macb_alloc_consistent(struct macb *bp)
> return -ENOMEM;
> }
>
> +static void macb_init_tieoff(struct macb *bp)
> +{
> + struct macb_dma_desc *d = bp->rx_ring_tieoff;
s/d/desc to cope with the rest of descriptor usage in this file.
Add this here:
if (bp->caps & MACB_CAPS_QUEUE_DISABLE)
return;
to avoid checking it in different places where this function is called.
> + /* Setup a wrapping descriptor with no free slots
> + * (WRAP and USED) to tie off/disable unused RX queues.
> + */
> + macb_set_addr(bp, d, MACB_BIT(RX_WRAP) | MACB_BIT(RX_USED));
> + d->ctrl = 0;
> +}
> +
> static void gem_init_rings(struct macb *bp)
> {
> struct macb_queue *queue;
> @@ -2598,6 +2624,8 @@ static void gem_init_rings(struct macb *bp)
> gem_rx_refill(queue);
> }
>
> + if (!(bp->caps & MACB_CAPS_QUEUE_DISABLE))
> + macb_init_tieoff(bp);
> }
>
> static void macb_init_rings(struct macb *bp)
> @@ -2615,6 +2643,8 @@ static void macb_init_rings(struct macb *bp)
> bp->queues[0].tx_head = 0;
> bp->queues[0].tx_tail = 0;
> desc->ctrl |= MACB_BIT(TX_WRAP);
> +
> + macb_init_tieoff(bp);
> }
>
> static void macb_reset_hw(struct macb *bp)
> @@ -4917,7 +4947,8 @@ static const struct macb_config sama7g5_emac_config = {
>
> static const struct macb_config versal_config = {
> .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_JUMBO |
> - MACB_CAPS_GEM_HAS_PTP | MACB_CAPS_BD_RD_PREFETCH | MACB_CAPS_NEED_TSUCLK,
> + MACB_CAPS_GEM_HAS_PTP | MACB_CAPS_BD_RD_PREFETCH |
> + MACB_CAPS_QUEUE_DISABLE | MACB_CAPS_NEED_TSUCLK,
This should go in a different patch.
> .dma_burst_length = 16,
> .clk_init = macb_clk_init,
> .init = init_reset_optional,
> @@ -5214,6 +5245,7 @@ static int __maybe_unused macb_suspend(struct device *dev)
> struct macb_queue *queue;
> unsigned long flags;
> unsigned int q;
> + u32 ctrlmask;
val/tmp should be enough for this as you are, anyway, re-using it in the
next patch for different purpose.
> int err;
>
> if (!device_may_wakeup(&bp->dev->dev))
> @@ -5224,6 +5256,30 @@ static int __maybe_unused macb_suspend(struct device *dev)
>
> if (bp->wol & MACB_WOL_ENABLED) {
> spin_lock_irqsave(&bp->lock, flags);
> +
> + /* Disable Tx and Rx engines before disabling the queues,
> + * this is mandatory as per the IP spec sheet
> + */
> + ctrlmask = macb_readl(bp, NCR);
> + ctrlmask &= ~(MACB_BIT(TE) | MACB_BIT(RE));
You can remove this
> + macb_writel(bp, NCR, ctrlmask);
And add this:
macb_writel(bp, NCR, ctrlmask & ~(MACB_BIT(TE) | MACB_BIT(RE));
> + for (q = 0, queue = bp->queues; q < bp->num_queues;
> + ++q, ++queue) {
> + /* Disable RX queues */
Operation in this for loop could be moved in the the above IRQ disable
loop. Have you tried it? are there any issues with it?
> + if (bp->caps & MACB_CAPS_QUEUE_DISABLE) {
> + queue_writel(queue, RBQP, GEM_RBQP_DISABLE);
> + } else {
> + /* Tie off RX queues */
> + queue_writel(queue, RBQP,
> + lower_32_bits(bp->rx_ring_tieoff_dma));
I think this should be guarded by:
#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
> + queue_writel(queue, RBQPH,
> + upper_32_bits(bp->rx_ring_tieoff_dma));
> + }
> + }
> + /* Enable Receive engine */
> + ctrlmask = macb_readl(bp, NCR);
Is this needed?
> + ctrlmask |= MACB_BIT(RE);
> + macb_writel(bp, NCR, ctrlmask);
These could be merged
> /* Flush all status bits */
> macb_writel(bp, TSR, -1);
> macb_writel(bp, RSR, -1);
Hi Claudiu,
On 2/3/2024 9:08 PM, claudiu beznea wrote:
<...>
>> + for (q = 0, queue = bp->queues; q < bp->num_queues;
>> + ++q, ++queue) {
>> + /* Disable RX queues */
> Operation in this for loop could be moved in the the above IRQ disable
> loop. Have you tried it? are there any issues with it?
OK. I haven't tried it. I will try and update.
>
>> + if (bp->caps & MACB_CAPS_QUEUE_DISABLE) {
>> + queue_writel(queue, RBQP, GEM_RBQP_DISABLE);
>> + } else {
>> + /* Tie off RX queues */
>> + queue_writel(queue, RBQP,
>> + lower_32_bits(bp->rx_ring_tieoff_dma));
> I think this should be guarded by:
> #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
>> + queue_writel(queue, RBQPH,
>> + upper_32_bits(bp->rx_ring_tieoff_dma));
>> + }
>> + }
>> + /* Enable Receive engine */
>> + ctrlmask = macb_readl(bp, NCR);
> Is this needed?
Yes, not needed, we can use earlier value directly.
>
>> + ctrlmask |= MACB_BIT(RE);
<...>
Hi Claudiu,
On 2/3/2024 9:08 PM, claudiu beznea wrote:
<...>
>> queue->tx_skb = NULL;
>> @@ -2568,6 +2574,16 @@ static int macb_alloc_consistent(struct macb *bp)
>> if (bp->macbgem_ops.mog_alloc_rx_buffers(bp))
>> goto out_err;
>>
>> + /* Required for tie off descriptor for PM cases */
>> + if (!(bp->caps & MACB_CAPS_QUEUE_DISABLE)) {
>> + bp->rx_ring_tieoff = dma_alloc_coherent(&bp->pdev->dev,
>> + macb_dma_desc_get_size(bp),
>> + &bp->rx_ring_tieoff_dma,
>> + GFP_KERNEL);
>> + if (!bp->rx_ring_tieoff)
>> + goto out_err;
> You also need to free the previously allocated rx buffers.
Are you referring to (bp->macbgem_ops.mog_alloc_rx_buffers(bp)) allocation ?
It was freed in macb_free_consistent():
...
bp->macbgem_ops.mog_free_rx_buffers(bp);
...
Please let me know if you are referring to different buffers.
>
>> + }
>> +
>> return 0;
>>
Hi, Vineeth,
On 15.02.2024 07:43, Karumanchi, Vineeth wrote:
> Hi Claudiu,
>
> On 2/3/2024 9:08 PM, claudiu beznea wrote:
> <...>
>>> queue->tx_skb = NULL;
>>> @@ -2568,6 +2574,16 @@ static int macb_alloc_consistent(struct macb *bp)
>>> if (bp->macbgem_ops.mog_alloc_rx_buffers(bp))
>>> goto out_err;
>>> + /* Required for tie off descriptor for PM cases */
>>> + if (!(bp->caps & MACB_CAPS_QUEUE_DISABLE)) {
>>> + bp->rx_ring_tieoff = dma_alloc_coherent(&bp->pdev->dev,
>>> + macb_dma_desc_get_size(bp),
>>> + &bp->rx_ring_tieoff_dma,
>>> + GFP_KERNEL);
>>> + if (!bp->rx_ring_tieoff)
>>> + goto out_err;
>> You also need to free the previously allocated rx buffers.
>
> Are you referring to (bp->macbgem_ops.mog_alloc_rx_buffers(bp)) allocation ?
> It was freed in macb_free_consistent():
> ...
> bp->macbgem_ops.mog_free_rx_buffers(bp);
> ...
You're right, my bad.
>
> Please let me know if you are referring to different buffers.
I was referring to bp->macbgem_ops.mog_alloc_rx_buffers but hat is covered
as you pointed out.
Thank you,
Claudiu Beznea
@@ -645,6 +645,9 @@
#define GEM_T2OFST_OFFSET 0 /* offset value */
#define GEM_T2OFST_SIZE 7
+/* Bitfields in queue pointer registers */
+#define GEM_RBQP_DISABLE BIT(0)
+
/* Offset for screener type 2 compare values (T2CMPOFST).
* Note the offset is applied after the specified point,
* e.g. GEM_T2COMPOFST_ETYPE denotes the EtherType field, so an offset
@@ -737,6 +740,7 @@
#define MACB_CAPS_HIGH_SPEED 0x02000000
#define MACB_CAPS_CLK_HW_CHG 0x04000000
#define MACB_CAPS_MACB_IS_EMAC 0x08000000
+#define MACB_CAPS_QUEUE_DISABLE 0x00002000
#define MACB_CAPS_FIFO_MODE 0x10000000
#define MACB_CAPS_GIGABIT_MODE_AVAILABLE 0x20000000
#define MACB_CAPS_SG_DISABLED 0x40000000
@@ -1254,6 +1258,7 @@ struct macb {
u32 (*macb_reg_readl)(struct macb *bp, int offset);
void (*macb_reg_writel)(struct macb *bp, int offset, u32 value);
+ struct macb_dma_desc *rx_ring_tieoff;
size_t rx_buffer_size;
unsigned int rx_ring_size;
@@ -1276,6 +1281,8 @@ struct macb {
struct gem_stats gem;
} hw_stats;
+ dma_addr_t rx_ring_tieoff_dma;
+
struct macb_or_gem_ops macbgem_ops;
struct mii_bus *mii_bus;
@@ -2479,6 +2479,12 @@ static void macb_free_consistent(struct macb *bp)
bp->macbgem_ops.mog_free_rx_buffers(bp);
+ if (bp->rx_ring_tieoff) {
+ dma_free_coherent(&bp->pdev->dev, macb_dma_desc_get_size(bp),
+ bp->rx_ring_tieoff, bp->rx_ring_tieoff_dma);
+ bp->rx_ring_tieoff = NULL;
+ }
+
for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
kfree(queue->tx_skb);
queue->tx_skb = NULL;
@@ -2568,6 +2574,16 @@ static int macb_alloc_consistent(struct macb *bp)
if (bp->macbgem_ops.mog_alloc_rx_buffers(bp))
goto out_err;
+ /* Required for tie off descriptor for PM cases */
+ if (!(bp->caps & MACB_CAPS_QUEUE_DISABLE)) {
+ bp->rx_ring_tieoff = dma_alloc_coherent(&bp->pdev->dev,
+ macb_dma_desc_get_size(bp),
+ &bp->rx_ring_tieoff_dma,
+ GFP_KERNEL);
+ if (!bp->rx_ring_tieoff)
+ goto out_err;
+ }
+
return 0;
out_err:
@@ -2575,6 +2591,16 @@ static int macb_alloc_consistent(struct macb *bp)
return -ENOMEM;
}
+static void macb_init_tieoff(struct macb *bp)
+{
+ struct macb_dma_desc *d = bp->rx_ring_tieoff;
+ /* Setup a wrapping descriptor with no free slots
+ * (WRAP and USED) to tie off/disable unused RX queues.
+ */
+ macb_set_addr(bp, d, MACB_BIT(RX_WRAP) | MACB_BIT(RX_USED));
+ d->ctrl = 0;
+}
+
static void gem_init_rings(struct macb *bp)
{
struct macb_queue *queue;
@@ -2598,6 +2624,8 @@ static void gem_init_rings(struct macb *bp)
gem_rx_refill(queue);
}
+ if (!(bp->caps & MACB_CAPS_QUEUE_DISABLE))
+ macb_init_tieoff(bp);
}
static void macb_init_rings(struct macb *bp)
@@ -2615,6 +2643,8 @@ static void macb_init_rings(struct macb *bp)
bp->queues[0].tx_head = 0;
bp->queues[0].tx_tail = 0;
desc->ctrl |= MACB_BIT(TX_WRAP);
+
+ macb_init_tieoff(bp);
}
static void macb_reset_hw(struct macb *bp)
@@ -4917,7 +4947,8 @@ static const struct macb_config sama7g5_emac_config = {
static const struct macb_config versal_config = {
.caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_JUMBO |
- MACB_CAPS_GEM_HAS_PTP | MACB_CAPS_BD_RD_PREFETCH | MACB_CAPS_NEED_TSUCLK,
+ MACB_CAPS_GEM_HAS_PTP | MACB_CAPS_BD_RD_PREFETCH |
+ MACB_CAPS_QUEUE_DISABLE | MACB_CAPS_NEED_TSUCLK,
.dma_burst_length = 16,
.clk_init = macb_clk_init,
.init = init_reset_optional,
@@ -5214,6 +5245,7 @@ static int __maybe_unused macb_suspend(struct device *dev)
struct macb_queue *queue;
unsigned long flags;
unsigned int q;
+ u32 ctrlmask;
int err;
if (!device_may_wakeup(&bp->dev->dev))
@@ -5224,6 +5256,30 @@ static int __maybe_unused macb_suspend(struct device *dev)
if (bp->wol & MACB_WOL_ENABLED) {
spin_lock_irqsave(&bp->lock, flags);
+
+ /* Disable Tx and Rx engines before disabling the queues,
+ * this is mandatory as per the IP spec sheet
+ */
+ ctrlmask = macb_readl(bp, NCR);
+ ctrlmask &= ~(MACB_BIT(TE) | MACB_BIT(RE));
+ macb_writel(bp, NCR, ctrlmask);
+ for (q = 0, queue = bp->queues; q < bp->num_queues;
+ ++q, ++queue) {
+ /* Disable RX queues */
+ if (bp->caps & MACB_CAPS_QUEUE_DISABLE) {
+ queue_writel(queue, RBQP, GEM_RBQP_DISABLE);
+ } else {
+ /* Tie off RX queues */
+ queue_writel(queue, RBQP,
+ lower_32_bits(bp->rx_ring_tieoff_dma));
+ queue_writel(queue, RBQPH,
+ upper_32_bits(bp->rx_ring_tieoff_dma));
+ }
+ }
+ /* Enable Receive engine */
+ ctrlmask = macb_readl(bp, NCR);
+ ctrlmask |= MACB_BIT(RE);
+ macb_writel(bp, NCR, ctrlmask);
/* Flush all status bits */
macb_writel(bp, TSR, -1);
macb_writel(bp, RSR, -1);