On Sun, Jun 11, 2023 at 01:35:17AM +0100, Daniel Golle wrote:
> @@ -1106,14 +1105,14 @@ struct mtk_eth {
> spinlock_t tx_irq_lock;
> spinlock_t rx_irq_lock;
> struct net_device dummy_dev;
> - struct net_device *netdev[MTK_MAX_DEVS];
> - struct mtk_mac *mac[MTK_MAX_DEVS];
> + struct net_device **netdev;
> + struct mtk_mac **mac;
> int irq[3];
> u32 msg_enable;
> unsigned long sysclk;
> struct regmap *ethsys;
> struct regmap *infra;
> - struct phylink_pcs *sgmii_pcs[MTK_MAX_DEVS];
> + struct phylink_pcs **sgmii_pcs;
> struct regmap *pctl;
> bool hwlro;
> refcount_t dma_refcnt;
Is it really worth the extra allocations?
There's three pointers here per device. Let's talk about modern systems,
so that's 8 bytes each, and if MTK_MAX_DEVS was two, that's 48 bytes in
all. If we expanded the array to allow three, that would be 72 bytes.
If we allocate separately, then we're allocating 16 or 24 bytes three
times depending on whether we want two or three of them.
On arm64, I'm seeing the minimum slab size as 128 bytes, which means
that's the minimum memory allocation. So, allocating three arrays will
be 384 bytes in all, irrespective of whether we want two or three
entries.
That's a waste of about 5x the memory over just expanding the arrays!
If you want to go down the route of dynamically allocating these, it
would make better sense to combine them into a single structure that
itself is an array, and thus requiring only one allocation. That
reduces the wastage to about 56 bytes for three ports or 80 bytes
for two.
Thanks.
> On Sun, Jun 11, 2023 at 01:35:17AM +0100, Daniel Golle wrote:
> > @@ -1106,14 +1105,14 @@ struct mtk_eth {
> > spinlock_t tx_irq_lock;
> > spinlock_t rx_irq_lock;
> > struct net_device dummy_dev;
> > - struct net_device *netdev[MTK_MAX_DEVS];
> > - struct mtk_mac *mac[MTK_MAX_DEVS];
> > + struct net_device **netdev;
> > + struct mtk_mac **mac;
> > int irq[3];
> > u32 msg_enable;
> > unsigned long sysclk;
> > struct regmap *ethsys;
> > struct regmap *infra;
> > - struct phylink_pcs *sgmii_pcs[MTK_MAX_DEVS];
> > + struct phylink_pcs **sgmii_pcs;
> > struct regmap *pctl;
> > bool hwlro;
> > refcount_t dma_refcnt;
>
> Is it really worth the extra allocations?
>
> There's three pointers here per device. Let's talk about modern systems,
> so that's 8 bytes each, and if MTK_MAX_DEVS was two, that's 48 bytes in
> all. If we expanded the array to allow three, that would be 72 bytes.
>
> If we allocate separately, then we're allocating 16 or 24 bytes three
> times depending on whether we want two or three of them.
>
> On arm64, I'm seeing the minimum slab size as 128 bytes, which means
> that's the minimum memory allocation. So, allocating three arrays will
> be 384 bytes in all, irrespective of whether we want two or three
> entries.
>
> That's a waste of about 5x the memory over just expanding the arrays!
ack, I agree. I will fix it.
Regards,
Lorenzo
>
> If you want to go down the route of dynamically allocating these, it
> would make better sense to combine them into a single structure that
> itself is an array, and thus requiring only one allocation. That
> reduces the wastage to about 56 bytes for three ports or 80 bytes
> for two.
>
> Thanks.
>
> --
> RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
> FTTP is here! 80Mbps down 10Mbps up. Decent connectivity at last!
>
@@ -4030,7 +4030,10 @@ static void mtk_sgmii_destroy(struct mtk_eth *eth)
{
int i;
- for (i = 0; i < MTK_MAX_DEVS; i++)
+ if (!eth->sgmii_pcs)
+ return;
+
+ for (i = 0; i < eth->soc->num_devs; i++)
mtk_pcs_lynxi_destroy(eth->sgmii_pcs[i]);
}
@@ -4489,7 +4492,12 @@ static int mtk_sgmii_init(struct mtk_eth *eth)
u32 flags;
int i;
- for (i = 0; i < MTK_MAX_DEVS; i++) {
+ eth->sgmii_pcs = devm_kzalloc(eth->dev,
+ sizeof(*eth->sgmii_pcs) *
+ eth->soc->num_devs,
+ GFP_KERNEL);
+
+ for (i = 0; i < eth->soc->num_devs; i++) {
np = of_parse_phandle(eth->dev->of_node, "mediatek,sgmiisys", i);
if (!np)
break;
@@ -4534,6 +4542,18 @@ static int mtk_probe(struct platform_device *pdev)
if (MTK_HAS_CAPS(eth->soc->caps, MTK_SOC_MT7628))
eth->ip_align = NET_IP_ALIGN;
+ eth->netdev = devm_kzalloc(eth->dev,
+ sizeof(*eth->netdev) * eth->soc->num_devs,
+ GFP_KERNEL);
+ if (!eth->netdev)
+ return -ENOMEM;
+
+ eth->mac = devm_kzalloc(eth->dev,
+ sizeof(*eth->mac) * eth->soc->num_devs,
+ GFP_KERNEL);
+ if (!eth->mac)
+ return -ENOMEM;
+
spin_lock_init(ð->page_lock);
spin_lock_init(ð->tx_irq_lock);
spin_lock_init(ð->rx_irq_lock);
@@ -4719,7 +4739,7 @@ static int mtk_probe(struct platform_device *pdev)
goto err_deinit_ppe;
}
- for (i = 0; i < MTK_MAX_DEVS; i++) {
+ for (i = 0; i < eth->soc->num_devs; i++) {
if (!eth->netdev[i])
continue;
@@ -4793,6 +4813,7 @@ static const struct mtk_soc_data mt2701_data = {
.hw_features = MTK_HW_FEATURES,
.required_clks = MT7623_CLKS_BITMAP,
.required_pctl = true,
+ .num_devs = 2,
.txrx = {
.txd_size = sizeof(struct mtk_tx_dma),
.rxd_size = sizeof(struct mtk_rx_dma),
@@ -4811,6 +4832,7 @@ static const struct mtk_soc_data mt7621_data = {
.required_pctl = false,
.offload_version = 1,
.hash_offset = 2,
+ .num_devs = 2,
.foe_entry_size = sizeof(struct mtk_foe_entry) - 16,
.txrx = {
.txd_size = sizeof(struct mtk_tx_dma),
@@ -4832,6 +4854,7 @@ static const struct mtk_soc_data mt7622_data = {
.offload_version = 2,
.hash_offset = 2,
.has_accounting = true,
+ .num_devs = 2,
.foe_entry_size = sizeof(struct mtk_foe_entry) - 16,
.txrx = {
.txd_size = sizeof(struct mtk_tx_dma),
@@ -4851,6 +4874,7 @@ static const struct mtk_soc_data mt7623_data = {
.required_pctl = true,
.offload_version = 1,
.hash_offset = 2,
+ .num_devs = 2,
.foe_entry_size = sizeof(struct mtk_foe_entry) - 16,
.txrx = {
.txd_size = sizeof(struct mtk_tx_dma),
@@ -4870,6 +4894,7 @@ static const struct mtk_soc_data mt7629_data = {
.required_clks = MT7629_CLKS_BITMAP,
.required_pctl = false,
.has_accounting = true,
+ .num_devs = 2,
.txrx = {
.txd_size = sizeof(struct mtk_tx_dma),
.rxd_size = sizeof(struct mtk_rx_dma),
@@ -4891,6 +4916,7 @@ static const struct mtk_soc_data mt7981_data = {
.hash_offset = 4,
.foe_entry_size = sizeof(struct mtk_foe_entry),
.has_accounting = true,
+ .num_devs = 2,
.txrx = {
.txd_size = sizeof(struct mtk_tx_dma_v2),
.rxd_size = sizeof(struct mtk_rx_dma_v2),
@@ -4910,6 +4936,7 @@ static const struct mtk_soc_data mt7986_data = {
.required_pctl = false,
.offload_version = 2,
.hash_offset = 4,
+ .num_devs = 2,
.foe_entry_size = sizeof(struct mtk_foe_entry),
.has_accounting = true,
.txrx = {
@@ -4928,6 +4955,7 @@ static const struct mtk_soc_data rt5350_data = {
.hw_features = MTK_HW_FEATURES_MT7628,
.required_clks = MT7628_CLKS_BITMAP,
.required_pctl = false,
+ .num_devs = 2,
.txrx = {
.txd_size = sizeof(struct mtk_tx_dma),
.rxd_size = sizeof(struct mtk_rx_dma),
@@ -1016,6 +1016,7 @@ struct mtk_reg_map {
* @required_pctl A bool value to show whether the SoC requires
* the extra setup for those pins used by GMAC.
* @hash_offset Flow table hash offset.
+ * @num_devs SoC number of macs.
* @foe_entry_size Foe table entry size.
* @has_accounting Bool indicating support for accounting of
* offloaded flows.
@@ -1034,6 +1035,7 @@ struct mtk_soc_data {
bool required_pctl;
u8 offload_version;
u8 hash_offset;
+ u8 num_devs;
u16 foe_entry_size;
netdev_features_t hw_features;
bool has_accounting;
@@ -1049,9 +1051,6 @@ struct mtk_soc_data {
#define MTK_DMA_MONITOR_TIMEOUT msecs_to_jiffies(1000)
-/* currently no SoC has more than 2 macs */
-#define MTK_MAX_DEVS 2
-
/* struct mtk_eth - This is the main datasructure for holding the state
* of the driver
* @dev: The device pointer
@@ -1106,14 +1105,14 @@ struct mtk_eth {
spinlock_t tx_irq_lock;
spinlock_t rx_irq_lock;
struct net_device dummy_dev;
- struct net_device *netdev[MTK_MAX_DEVS];
- struct mtk_mac *mac[MTK_MAX_DEVS];
+ struct net_device **netdev;
+ struct mtk_mac **mac;
int irq[3];
u32 msg_enable;
unsigned long sysclk;
struct regmap *ethsys;
struct regmap *infra;
- struct phylink_pcs *sgmii_pcs[MTK_MAX_DEVS];
+ struct phylink_pcs **sgmii_pcs;
struct regmap *pctl;
bool hwlro;
refcount_t dma_refcnt;