[RFC] bonding: rate-limit bonding driver inspect messages

Message ID 20240214044245.33170-1-praveen.kannoju@oracle.com
State New
Headers
Series [RFC] bonding: rate-limit bonding driver inspect messages |

Commit Message

Praveen Kannoju Feb. 14, 2024, 4:42 a.m. UTC
  Rate limit bond driver log messages, to prevent a log flood in a run-away
situation, e.g couldn't get rtnl lock. Message flood leads to instability
of system and loss of other crucial messages.

Signed-off-by: Praveen Kumar Kannoju <praveen.kannoju@oracle.com>
---
 drivers/net/bonding/bond_main.c | 34 +++++++++++++++++++---------------
 include/net/bonding.h           | 11 +++++++++++
 2 files changed, 30 insertions(+), 15 deletions(-)
  

Comments

Jay Vosburgh Feb. 14, 2024, 6:34 p.m. UTC | #1
Praveen Kumar Kannoju <praveen.kannoju@oracle.com> wrote:

>Rate limit bond driver log messages, to prevent a log flood in a run-away
>situation, e.g couldn't get rtnl lock. Message flood leads to instability
>of system and loss of other crucial messages.
>
>Signed-off-by: Praveen Kumar Kannoju <praveen.kannoju@oracle.com>
>---
> drivers/net/bonding/bond_main.c | 34 +++++++++++++++++++---------------
> include/net/bonding.h           | 11 +++++++++++
> 2 files changed, 30 insertions(+), 15 deletions(-)
>
>diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>index 4e0600c..32098dd 100644
>--- a/drivers/net/bonding/bond_main.c
>+++ b/drivers/net/bonding/bond_main.c
>@@ -2610,12 +2610,13 @@ static int bond_miimon_inspect(struct bonding *bond)
> 			commit++;
> 			slave->delay = bond->params.downdelay;
> 			if (slave->delay) {
>-				slave_info(bond->dev, slave->dev, "link status down for %sinterface, disabling it in %d ms\n",
>-					   (BOND_MODE(bond) ==
>-					    BOND_MODE_ACTIVEBACKUP) ?
>-					    (bond_is_active_slave(slave) ?
>+				bond_info_rl(bond->dev, slave->dev,
>+					     "link status down for %sinterface, disabling it in %d ms\n",
>+					     (BOND_MODE(bond) ==
>+					     BOND_MODE_ACTIVEBACKUP) ?
>+					     (bond_is_active_slave(slave) ?
> 					     "active " : "backup ") : "",
>-					   bond->params.downdelay * bond->params.miimon);
>+					     bond->params.downdelay * bond->params.miimon);

	Why not use net_info_ratelimited() or net_ratelimit()?  The rest
of the bonding messages that are rate limited are almost all gated by
the net rate limiter.

	-J

> 			}
> 			fallthrough;
> 		case BOND_LINK_FAIL:
>@@ -2623,9 +2624,10 @@ static int bond_miimon_inspect(struct bonding *bond)
> 				/* recovered before downdelay expired */
> 				bond_propose_link_state(slave, BOND_LINK_UP);
> 				slave->last_link_up = jiffies;
>-				slave_info(bond->dev, slave->dev, "link status up again after %d ms\n",
>-					   (bond->params.downdelay - slave->delay) *
>-					   bond->params.miimon);
>+				bond_info_rl(bond->dev, slave->dev,
>+					     "link status up again after %d ms\n",
>+					     (bond->params.downdelay - slave->delay) *
>+					     bond->params.miimon);
> 				commit++;
> 				continue;
> 			}
>@@ -2648,18 +2650,20 @@ static int bond_miimon_inspect(struct bonding *bond)
> 			slave->delay = bond->params.updelay;
> 
> 			if (slave->delay) {
>-				slave_info(bond->dev, slave->dev, "link status up, enabling it in %d ms\n",
>-					   ignore_updelay ? 0 :
>-					   bond->params.updelay *
>-					   bond->params.miimon);
>+				bond_info_rl(bond->dev, slave->dev,
>+					     "link status up, enabling it in %d ms\n",
>+					     ignore_updelay ? 0 :
>+					     bond->params.updelay *
>+					     bond->params.miimon);
> 			}
> 			fallthrough;
> 		case BOND_LINK_BACK:
> 			if (!link_state) {
> 				bond_propose_link_state(slave, BOND_LINK_DOWN);
>-				slave_info(bond->dev, slave->dev, "link status down again after %d ms\n",
>-					   (bond->params.updelay - slave->delay) *
>-					   bond->params.miimon);
>+				bond_info_rl(bond->dev, slave->dev,
>+					     "link status down again after %d ms\n",
>+					     (bond->params.updelay - slave->delay) *
>+					     bond->params.miimon);
> 				commit++;
> 				continue;
> 			}
>diff --git a/include/net/bonding.h b/include/net/bonding.h
>index 5b8b1b6..ebdfaf0 100644
>--- a/include/net/bonding.h
>+++ b/include/net/bonding.h
>@@ -39,8 +39,19 @@
> #define __long_aligned __attribute__((aligned((sizeof(long)))))
> #endif
> 
>+DEFINE_RATELIMIT_STATE(bond_rs, DEFAULT_RATELIMIT_INTERVAL,
>+		       DEFAULT_RATELIMIT_BURST);
>+
>+#define bond_ratelimited_function(function, ...)	\
>+do {							\
>+	if (__ratelimit(&bond_rs))		\
>+		function(__VA_ARGS__);			\
>+} while (0)
>+
> #define slave_info(bond_dev, slave_dev, fmt, ...) \
> 	netdev_info(bond_dev, "(slave %s): " fmt, (slave_dev)->name, ##__VA_ARGS__)
>+#define bond_info_rl(bond_dev, slave_dev, fmt, ...) \
>+	bond_ratelimited_function(slave_info, fmt, ##__VA_ARGS__)
> #define slave_warn(bond_dev, slave_dev, fmt, ...) \
> 	netdev_warn(bond_dev, "(slave %s): " fmt, (slave_dev)->name, ##__VA_ARGS__)
> #define slave_dbg(bond_dev, slave_dev, fmt, ...) \
>-- 
>1.8.3.1
>
>

---
	-Jay Vosburgh, jay.vosburgh@canonical.com
  
Praveen Kannoju Feb. 15, 2024, 6:03 p.m. UTC | #2
> -----Original Message-----
> From: Jay Vosburgh <jay.vosburgh@canonical.com>
> Sent: 15 February 2024 12:05 AM
> To: Praveen Kannoju <praveen.kannoju@oracle.com>
> Cc: andy@greyhouse.net; davem@davemloft.net; edumazet@google.com; kuba@kernel.org; pabeni@redhat.com;
> netdev@vger.kernel.org; linux-kernel@vger.kernel.org; Rajesh Sivaramasubramaniom <rajesh.sivaramasubramaniom@oracle.com>;
> Rama Nichanamatlu <rama.nichanamatlu@oracle.com>; Manjunath Patil <manjunath.b.patil@oracle.com>
> Subject: Re: [PATCH RFC] bonding: rate-limit bonding driver inspect messages
> 
> Praveen Kumar Kannoju <praveen.kannoju@oracle.com> wrote:
> 
> >Rate limit bond driver log messages, to prevent a log flood in a
> >run-away situation, e.g couldn't get rtnl lock. Message flood leads to
> >instability of system and loss of other crucial messages.
> >
> >Signed-off-by: Praveen Kumar Kannoju <praveen.kannoju@oracle.com>
> >---
> > drivers/net/bonding/bond_main.c | 34 +++++++++++++++++++---------------
> > include/net/bonding.h           | 11 +++++++++++
> > 2 files changed, 30 insertions(+), 15 deletions(-)
> >
> >diff --git a/drivers/net/bonding/bond_main.c
> >b/drivers/net/bonding/bond_main.c index 4e0600c..32098dd 100644
> >--- a/drivers/net/bonding/bond_main.c
> >+++ b/drivers/net/bonding/bond_main.c
> >@@ -2610,12 +2610,13 @@ static int bond_miimon_inspect(struct bonding *bond)
> > 			commit++;
> > 			slave->delay = bond->params.downdelay;
> > 			if (slave->delay) {
> >-				slave_info(bond->dev, slave->dev, "link status down for %sinterface, disabling it in %d ms\n",
> >-					   (BOND_MODE(bond) ==
> >-					    BOND_MODE_ACTIVEBACKUP) ?
> >-					    (bond_is_active_slave(slave) ?
> >+				bond_info_rl(bond->dev, slave->dev,
> >+					     "link status down for %sinterface, disabling it in %d ms\n",
> >+					     (BOND_MODE(bond) ==
> >+					     BOND_MODE_ACTIVEBACKUP) ?
> >+					     (bond_is_active_slave(slave) ?
> > 					     "active " : "backup ") : "",
> >-					   bond->params.downdelay * bond->params.miimon);
> >+					     bond->params.downdelay * bond->params.miimon);
> 
> 	Why not use net_info_ratelimited() or net_ratelimit()?  The rest of the bonding messages that are rate limited are almost all
> gated by the net rate limiter.
> 
> 	-J

Thank you for the reply, Jay. Yes, I agree. Used net_ratelimit() and resent the v2 patch. Please review and provide your comments.

> 
> > 			}
> > 			fallthrough;
> > 		case BOND_LINK_FAIL:
> >@@ -2623,9 +2624,10 @@ static int bond_miimon_inspect(struct bonding *bond)
> > 				/* recovered before downdelay expired */
> > 				bond_propose_link_state(slave, BOND_LINK_UP);
> > 				slave->last_link_up = jiffies;
> >-				slave_info(bond->dev, slave->dev, "link status up again after %d ms\n",
> >-					   (bond->params.downdelay - slave->delay) *
> >-					   bond->params.miimon);
> >+				bond_info_rl(bond->dev, slave->dev,
> >+					     "link status up again after %d ms\n",
> >+					     (bond->params.downdelay - slave->delay) *
> >+					     bond->params.miimon);
> > 				commit++;
> > 				continue;
> > 			}
> >@@ -2648,18 +2650,20 @@ static int bond_miimon_inspect(struct bonding *bond)
> > 			slave->delay = bond->params.updelay;
> >
> > 			if (slave->delay) {
> >-				slave_info(bond->dev, slave->dev, "link status up, enabling it in %d ms\n",
> >-					   ignore_updelay ? 0 :
> >-					   bond->params.updelay *
> >-					   bond->params.miimon);
> >+				bond_info_rl(bond->dev, slave->dev,
> >+					     "link status up, enabling it in %d ms\n",
> >+					     ignore_updelay ? 0 :
> >+					     bond->params.updelay *
> >+					     bond->params.miimon);
> > 			}
> > 			fallthrough;
> > 		case BOND_LINK_BACK:
> > 			if (!link_state) {
> > 				bond_propose_link_state(slave, BOND_LINK_DOWN);
> >-				slave_info(bond->dev, slave->dev, "link status down again after %d ms\n",
> >-					   (bond->params.updelay - slave->delay) *
> >-					   bond->params.miimon);
> >+				bond_info_rl(bond->dev, slave->dev,
> >+					     "link status down again after %d ms\n",
> >+					     (bond->params.updelay - slave->delay) *
> >+					     bond->params.miimon);
> > 				commit++;
> > 				continue;
> > 			}
> >diff --git a/include/net/bonding.h b/include/net/bonding.h index
> >5b8b1b6..ebdfaf0 100644
> >--- a/include/net/bonding.h
> >+++ b/include/net/bonding.h
> >@@ -39,8 +39,19 @@
> > #define __long_aligned __attribute__((aligned((sizeof(long)))))
> > #endif
> >
> >+DEFINE_RATELIMIT_STATE(bond_rs, DEFAULT_RATELIMIT_INTERVAL,
> >+		       DEFAULT_RATELIMIT_BURST);
> >+
> >+#define bond_ratelimited_function(function, ...)	\
> >+do {							\
> >+	if (__ratelimit(&bond_rs))		\
> >+		function(__VA_ARGS__);			\
> >+} while (0)
> >+
> > #define slave_info(bond_dev, slave_dev, fmt, ...) \
> > 	netdev_info(bond_dev, "(slave %s): " fmt, (slave_dev)->name,
> > ##__VA_ARGS__)
> >+#define bond_info_rl(bond_dev, slave_dev, fmt, ...) \
> >+	bond_ratelimited_function(slave_info, fmt, ##__VA_ARGS__)
> > #define slave_warn(bond_dev, slave_dev, fmt, ...) \
> > 	netdev_warn(bond_dev, "(slave %s): " fmt, (slave_dev)->name,
> >##__VA_ARGS__)  #define slave_dbg(bond_dev, slave_dev, fmt, ...) \
> >--
> >1.8.3.1
> >
> >
> 
> ---
> 	-Jay Vosburgh, jay.vosburgh@canonical.com
  

Patch

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 4e0600c..32098dd 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2610,12 +2610,13 @@  static int bond_miimon_inspect(struct bonding *bond)
 			commit++;
 			slave->delay = bond->params.downdelay;
 			if (slave->delay) {
-				slave_info(bond->dev, slave->dev, "link status down for %sinterface, disabling it in %d ms\n",
-					   (BOND_MODE(bond) ==
-					    BOND_MODE_ACTIVEBACKUP) ?
-					    (bond_is_active_slave(slave) ?
+				bond_info_rl(bond->dev, slave->dev,
+					     "link status down for %sinterface, disabling it in %d ms\n",
+					     (BOND_MODE(bond) ==
+					     BOND_MODE_ACTIVEBACKUP) ?
+					     (bond_is_active_slave(slave) ?
 					     "active " : "backup ") : "",
-					   bond->params.downdelay * bond->params.miimon);
+					     bond->params.downdelay * bond->params.miimon);
 			}
 			fallthrough;
 		case BOND_LINK_FAIL:
@@ -2623,9 +2624,10 @@  static int bond_miimon_inspect(struct bonding *bond)
 				/* recovered before downdelay expired */
 				bond_propose_link_state(slave, BOND_LINK_UP);
 				slave->last_link_up = jiffies;
-				slave_info(bond->dev, slave->dev, "link status up again after %d ms\n",
-					   (bond->params.downdelay - slave->delay) *
-					   bond->params.miimon);
+				bond_info_rl(bond->dev, slave->dev,
+					     "link status up again after %d ms\n",
+					     (bond->params.downdelay - slave->delay) *
+					     bond->params.miimon);
 				commit++;
 				continue;
 			}
@@ -2648,18 +2650,20 @@  static int bond_miimon_inspect(struct bonding *bond)
 			slave->delay = bond->params.updelay;
 
 			if (slave->delay) {
-				slave_info(bond->dev, slave->dev, "link status up, enabling it in %d ms\n",
-					   ignore_updelay ? 0 :
-					   bond->params.updelay *
-					   bond->params.miimon);
+				bond_info_rl(bond->dev, slave->dev,
+					     "link status up, enabling it in %d ms\n",
+					     ignore_updelay ? 0 :
+					     bond->params.updelay *
+					     bond->params.miimon);
 			}
 			fallthrough;
 		case BOND_LINK_BACK:
 			if (!link_state) {
 				bond_propose_link_state(slave, BOND_LINK_DOWN);
-				slave_info(bond->dev, slave->dev, "link status down again after %d ms\n",
-					   (bond->params.updelay - slave->delay) *
-					   bond->params.miimon);
+				bond_info_rl(bond->dev, slave->dev,
+					     "link status down again after %d ms\n",
+					     (bond->params.updelay - slave->delay) *
+					     bond->params.miimon);
 				commit++;
 				continue;
 			}
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 5b8b1b6..ebdfaf0 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -39,8 +39,19 @@ 
 #define __long_aligned __attribute__((aligned((sizeof(long)))))
 #endif
 
+DEFINE_RATELIMIT_STATE(bond_rs, DEFAULT_RATELIMIT_INTERVAL,
+		       DEFAULT_RATELIMIT_BURST);
+
+#define bond_ratelimited_function(function, ...)	\
+do {							\
+	if (__ratelimit(&bond_rs))		\
+		function(__VA_ARGS__);			\
+} while (0)
+
 #define slave_info(bond_dev, slave_dev, fmt, ...) \
 	netdev_info(bond_dev, "(slave %s): " fmt, (slave_dev)->name, ##__VA_ARGS__)
+#define bond_info_rl(bond_dev, slave_dev, fmt, ...) \
+	bond_ratelimited_function(slave_info, fmt, ##__VA_ARGS__)
 #define slave_warn(bond_dev, slave_dev, fmt, ...) \
 	netdev_warn(bond_dev, "(slave %s): " fmt, (slave_dev)->name, ##__VA_ARGS__)
 #define slave_dbg(bond_dev, slave_dev, fmt, ...) \