[net] net: dsa: avoid suspicious RCU usage for synced VLAN-aware MAC addresses

Message ID 20230626154402.3154454-1-vladimir.oltean@nxp.com
State New
Headers
Series [net] net: dsa: avoid suspicious RCU usage for synced VLAN-aware MAC addresses |

Commit Message

Vladimir Oltean June 26, 2023, 3:44 p.m. UTC
  When using the felix driver (the only one which supports UC filtering
and MC filtering) as a DSA master for a random other DSA switch, one can
see the following stack trace when the downstream switch ports join a
VLAN-aware bridge:

=============================
WARNING: suspicious RCU usage
-----------------------------
net/8021q/vlan_core.c:238 suspicious rcu_dereference_protected() usage!

stack backtrace:
Workqueue: dsa_ordered dsa_slave_switchdev_event_work
Call trace:
 lockdep_rcu_suspicious+0x170/0x210
 vlan_for_each+0x8c/0x188
 dsa_slave_sync_uc+0x128/0x178
 __hw_addr_sync_dev+0x138/0x158
 dsa_slave_set_rx_mode+0x58/0x70
 __dev_set_rx_mode+0x88/0xa8
 dev_uc_add+0x74/0xa0
 dsa_port_bridge_host_fdb_add+0xec/0x180
 dsa_slave_switchdev_event_work+0x7c/0x1c8
 process_one_work+0x290/0x568

What it's saying is that vlan_for_each() expects rtnl_lock() context and
it's not getting it, when it's called from the DSA master's ndo_set_rx_mode().

The caller of that - dsa_slave_set_rx_mode() - is the slave DSA
interface's dsa_port_bridge_host_fdb_add() which comes from the deferred
dsa_slave_switchdev_event_work().

We went to great lengths to avoid the rtnl_lock() context in that call
path in commit 0faf890fc519 ("net: dsa: drop rtnl_lock from
dsa_slave_switchdev_event_work"), and calling rtnl_lock() is simply not
an option due to the possibility of deadlocking when calling
dsa_flush_workqueue() from the call paths that do hold rtnl_lock() -
basically all of them.

So, when the DSA master calls vlan_for_each() from its ndo_set_rx_mode(),
the state of the 8021q driver on this device is really not protected
from concurrent access by anything.

Looking at net/8021q/, I don't think that vlan_info->vid_list was
particularly designed with RCU traversal in mind, so introducing an RCU
read-side form of vlan_for_each() - vlan_for_each_rcu() - won't be so
easy, and it also wouldn't be exactly what we need anyway.

In general I believe that the solution isn't in net/8021q/ anyway;
vlan_for_each() is not cut out for this task. DSA doesn't need rtnl_lock()
to be held per se - since it's not a netdev state change that we're
blocking, but rather, just concurrent additions/removals to a VLAN list.
We don't even need sleepable context - the callback of vlan_for_each()
just schedules deferred work.

The proposed escape is to remove the dependency on vlan_for_each() and
to open-code a non-sleepable, rtnl-free alternative to that, based on
copies of the VLAN list modified from .ndo_vlan_rx_add_vid() and
.ndo_vlan_rx_kill_vid().

Fixes: 64fdc5f341db ("net: dsa: sync unicast and multicast addresses for VLAN filters too")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
---
 include/net/dsa.h | 12 +++++--
 net/dsa/dsa.c     |  2 +-
 net/dsa/slave.c   | 84 ++++++++++++++++++++++++++++++++---------------
 net/dsa/switch.c  |  4 +--
 net/dsa/switch.h  |  3 ++
 5 files changed, 74 insertions(+), 31 deletions(-)
  

Comments

patchwork-bot+netdevbpf@kernel.org June 27, 2023, 4:50 p.m. UTC | #1
Hello:

This patch was applied to netdev/net.git (main)
by Jakub Kicinski <kuba@kernel.org>:

On Mon, 26 Jun 2023 18:44:02 +0300 you wrote:
> When using the felix driver (the only one which supports UC filtering
> and MC filtering) as a DSA master for a random other DSA switch, one can
> see the following stack trace when the downstream switch ports join a
> VLAN-aware bridge:
> 
> =============================
> WARNING: suspicious RCU usage
> 
> [...]

Here is the summary with links:
  - [net] net: dsa: avoid suspicious RCU usage for synced VLAN-aware MAC addresses
    https://git.kernel.org/netdev/net/c/d06f925f1397

You are awesome, thank you!
  

Patch

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 90bba1ce5899..d309ee7ed04b 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -314,9 +314,17 @@  struct dsa_port {
 	struct list_head	fdbs;
 	struct list_head	mdbs;
 
-	/* List of VLANs that CPU and DSA ports are members of. */
 	struct mutex		vlans_lock;
-	struct list_head	vlans;
+	union {
+		/* List of VLANs that CPU and DSA ports are members of.
+		 * Access to this is serialized by the sleepable @vlans_lock.
+		 */
+		struct list_head	vlans;
+		/* List of VLANs that user ports are members of.
+		 * Access to this is serialized by netif_addr_lock_bh().
+		 */
+		struct list_head	user_vlans;
+	};
 };
 
 /* TODO: ideally DSA ports would have a single dp->link_dp member,
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 1afed89e03c0..ccbdb98109f8 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -1106,7 +1106,7 @@  static struct dsa_port *dsa_port_touch(struct dsa_switch *ds, int index)
 	mutex_init(&dp->vlans_lock);
 	INIT_LIST_HEAD(&dp->fdbs);
 	INIT_LIST_HEAD(&dp->mdbs);
-	INIT_LIST_HEAD(&dp->vlans);
+	INIT_LIST_HEAD(&dp->vlans); /* also initializes &dp->user_vlans */
 	INIT_LIST_HEAD(&dp->list);
 	list_add_tail(&dp->list, &dst->ports);
 
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 165bb2cb8431..527b1d576460 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -27,6 +27,7 @@ 
 #include "master.h"
 #include "netlink.h"
 #include "slave.h"
+#include "switch.h"
 #include "tag.h"
 
 struct dsa_switchdev_event_work {
@@ -161,8 +162,7 @@  static int dsa_slave_schedule_standalone_work(struct net_device *dev,
 	return 0;
 }
 
-static int dsa_slave_host_vlan_rx_filtering(struct net_device *vdev, int vid,
-					    void *arg)
+static int dsa_slave_host_vlan_rx_filtering(void *arg, int vid)
 {
 	struct dsa_host_vlan_rx_filtering_ctx *ctx = arg;
 
@@ -170,6 +170,28 @@  static int dsa_slave_host_vlan_rx_filtering(struct net_device *vdev, int vid,
 						  ctx->addr, vid);
 }
 
+static int dsa_slave_vlan_for_each(struct net_device *dev,
+				   int (*cb)(void *arg, int vid), void *arg)
+{
+	struct dsa_port *dp = dsa_slave_to_port(dev);
+	struct dsa_vlan *v;
+	int err;
+
+	lockdep_assert_held(&dev->addr_list_lock);
+
+	err = cb(arg, 0);
+	if (err)
+		return err;
+
+	list_for_each_entry(v, &dp->user_vlans, list) {
+		err = cb(arg, v->vid);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 static int dsa_slave_sync_uc(struct net_device *dev,
 			     const unsigned char *addr)
 {
@@ -180,18 +202,14 @@  static int dsa_slave_sync_uc(struct net_device *dev,
 		.addr = addr,
 		.event = DSA_UC_ADD,
 	};
-	int err;
 
 	dev_uc_add(master, addr);
 
 	if (!dsa_switch_supports_uc_filtering(dp->ds))
 		return 0;
 
-	err = dsa_slave_schedule_standalone_work(dev, DSA_UC_ADD, addr, 0);
-	if (err)
-		return err;
-
-	return vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering, &ctx);
+	return dsa_slave_vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering,
+				       &ctx);
 }
 
 static int dsa_slave_unsync_uc(struct net_device *dev,
@@ -204,18 +222,14 @@  static int dsa_slave_unsync_uc(struct net_device *dev,
 		.addr = addr,
 		.event = DSA_UC_DEL,
 	};
-	int err;
 
 	dev_uc_del(master, addr);
 
 	if (!dsa_switch_supports_uc_filtering(dp->ds))
 		return 0;
 
-	err = dsa_slave_schedule_standalone_work(dev, DSA_UC_DEL, addr, 0);
-	if (err)
-		return err;
-
-	return vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering, &ctx);
+	return dsa_slave_vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering,
+				       &ctx);
 }
 
 static int dsa_slave_sync_mc(struct net_device *dev,
@@ -228,18 +242,14 @@  static int dsa_slave_sync_mc(struct net_device *dev,
 		.addr = addr,
 		.event = DSA_MC_ADD,
 	};
-	int err;
 
 	dev_mc_add(master, addr);
 
 	if (!dsa_switch_supports_mc_filtering(dp->ds))
 		return 0;
 
-	err = dsa_slave_schedule_standalone_work(dev, DSA_MC_ADD, addr, 0);
-	if (err)
-		return err;
-
-	return vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering, &ctx);
+	return dsa_slave_vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering,
+				       &ctx);
 }
 
 static int dsa_slave_unsync_mc(struct net_device *dev,
@@ -252,18 +262,14 @@  static int dsa_slave_unsync_mc(struct net_device *dev,
 		.addr = addr,
 		.event = DSA_MC_DEL,
 	};
-	int err;
 
 	dev_mc_del(master, addr);
 
 	if (!dsa_switch_supports_mc_filtering(dp->ds))
 		return 0;
 
-	err = dsa_slave_schedule_standalone_work(dev, DSA_MC_DEL, addr, 0);
-	if (err)
-		return err;
-
-	return vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering, &ctx);
+	return dsa_slave_vlan_for_each(dev, dsa_slave_host_vlan_rx_filtering,
+				       &ctx);
 }
 
 void dsa_slave_sync_ha(struct net_device *dev)
@@ -1759,6 +1765,7 @@  static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto,
 	struct netlink_ext_ack extack = {0};
 	struct dsa_switch *ds = dp->ds;
 	struct netdev_hw_addr *ha;
+	struct dsa_vlan *v;
 	int ret;
 
 	/* User port... */
@@ -1782,8 +1789,17 @@  static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto,
 	    !dsa_switch_supports_mc_filtering(ds))
 		return 0;
 
+	v = kzalloc(sizeof(*v), GFP_KERNEL);
+	if (!v) {
+		ret = -ENOMEM;
+		goto rollback;
+	}
+
 	netif_addr_lock_bh(dev);
 
+	v->vid = vid;
+	list_add_tail(&v->list, &dp->user_vlans);
+
 	if (dsa_switch_supports_mc_filtering(ds)) {
 		netdev_for_each_synced_mc_addr(ha, dev) {
 			dsa_slave_schedule_standalone_work(dev, DSA_MC_ADD,
@@ -1803,6 +1819,12 @@  static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto,
 	dsa_flush_workqueue();
 
 	return 0;
+
+rollback:
+	dsa_port_host_vlan_del(dp, &vlan);
+	dsa_port_vlan_del(dp, &vlan);
+
+	return ret;
 }
 
 static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto,
@@ -1816,6 +1838,7 @@  static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto,
 	};
 	struct dsa_switch *ds = dp->ds;
 	struct netdev_hw_addr *ha;
+	struct dsa_vlan *v;
 	int err;
 
 	err = dsa_port_vlan_del(dp, &vlan);
@@ -1832,6 +1855,15 @@  static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto,
 
 	netif_addr_lock_bh(dev);
 
+	v = dsa_vlan_find(&dp->user_vlans, &vlan);
+	if (!v) {
+		netif_addr_unlock_bh(dev);
+		return -ENOENT;
+	}
+
+	list_del(&v->list);
+	kfree(v);
+
 	if (dsa_switch_supports_mc_filtering(ds)) {
 		netdev_for_each_synced_mc_addr(ha, dev) {
 			dsa_slave_schedule_standalone_work(dev, DSA_MC_DEL,
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index 8c9a9f94b756..1a42f9317334 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -673,8 +673,8 @@  static bool dsa_port_host_vlan_match(struct dsa_port *dp,
 	return false;
 }
 
-static struct dsa_vlan *dsa_vlan_find(struct list_head *vlan_list,
-				      const struct switchdev_obj_port_vlan *vlan)
+struct dsa_vlan *dsa_vlan_find(struct list_head *vlan_list,
+			       const struct switchdev_obj_port_vlan *vlan)
 {
 	struct dsa_vlan *v;
 
diff --git a/net/dsa/switch.h b/net/dsa/switch.h
index 15e67b95eb6e..ea034677da15 100644
--- a/net/dsa/switch.h
+++ b/net/dsa/switch.h
@@ -111,6 +111,9 @@  struct dsa_notifier_master_state_info {
 	bool operational;
 };
 
+struct dsa_vlan *dsa_vlan_find(struct list_head *vlan_list,
+			       const struct switchdev_obj_port_vlan *vlan);
+
 int dsa_tree_notify(struct dsa_switch_tree *dst, unsigned long e, void *v);
 int dsa_broadcast(unsigned long e, void *v);