[RFC] bonding: rate-limit bonding driver inspect messages
Commit Message
Rate limit bond driver log messages, to prevent a log flood in a run-away
situation, e.g couldn't get rtnl lock. Message flood leads to instability
of system and loss of other crucial messages.
Signed-off-by: Praveen Kumar Kannoju <praveen.kannoju@oracle.com>
---
drivers/net/bonding/bond_main.c | 34 +++++++++++++++++++---------------
include/net/bonding.h | 11 +++++++++++
2 files changed, 30 insertions(+), 15 deletions(-)
Comments
Praveen Kumar Kannoju <praveen.kannoju@oracle.com> wrote:
>Rate limit bond driver log messages, to prevent a log flood in a run-away
>situation, e.g couldn't get rtnl lock. Message flood leads to instability
>of system and loss of other crucial messages.
>
>Signed-off-by: Praveen Kumar Kannoju <praveen.kannoju@oracle.com>
>---
> drivers/net/bonding/bond_main.c | 34 +++++++++++++++++++---------------
> include/net/bonding.h | 11 +++++++++++
> 2 files changed, 30 insertions(+), 15 deletions(-)
>
>diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
>index 4e0600c..32098dd 100644
>--- a/drivers/net/bonding/bond_main.c
>+++ b/drivers/net/bonding/bond_main.c
>@@ -2610,12 +2610,13 @@ static int bond_miimon_inspect(struct bonding *bond)
> commit++;
> slave->delay = bond->params.downdelay;
> if (slave->delay) {
>- slave_info(bond->dev, slave->dev, "link status down for %sinterface, disabling it in %d ms\n",
>- (BOND_MODE(bond) ==
>- BOND_MODE_ACTIVEBACKUP) ?
>- (bond_is_active_slave(slave) ?
>+ bond_info_rl(bond->dev, slave->dev,
>+ "link status down for %sinterface, disabling it in %d ms\n",
>+ (BOND_MODE(bond) ==
>+ BOND_MODE_ACTIVEBACKUP) ?
>+ (bond_is_active_slave(slave) ?
> "active " : "backup ") : "",
>- bond->params.downdelay * bond->params.miimon);
>+ bond->params.downdelay * bond->params.miimon);
Why not use net_info_ratelimited() or net_ratelimit()? The rest
of the bonding messages that are rate limited are almost all gated by
the net rate limiter.
-J
> }
> fallthrough;
> case BOND_LINK_FAIL:
>@@ -2623,9 +2624,10 @@ static int bond_miimon_inspect(struct bonding *bond)
> /* recovered before downdelay expired */
> bond_propose_link_state(slave, BOND_LINK_UP);
> slave->last_link_up = jiffies;
>- slave_info(bond->dev, slave->dev, "link status up again after %d ms\n",
>- (bond->params.downdelay - slave->delay) *
>- bond->params.miimon);
>+ bond_info_rl(bond->dev, slave->dev,
>+ "link status up again after %d ms\n",
>+ (bond->params.downdelay - slave->delay) *
>+ bond->params.miimon);
> commit++;
> continue;
> }
>@@ -2648,18 +2650,20 @@ static int bond_miimon_inspect(struct bonding *bond)
> slave->delay = bond->params.updelay;
>
> if (slave->delay) {
>- slave_info(bond->dev, slave->dev, "link status up, enabling it in %d ms\n",
>- ignore_updelay ? 0 :
>- bond->params.updelay *
>- bond->params.miimon);
>+ bond_info_rl(bond->dev, slave->dev,
>+ "link status up, enabling it in %d ms\n",
>+ ignore_updelay ? 0 :
>+ bond->params.updelay *
>+ bond->params.miimon);
> }
> fallthrough;
> case BOND_LINK_BACK:
> if (!link_state) {
> bond_propose_link_state(slave, BOND_LINK_DOWN);
>- slave_info(bond->dev, slave->dev, "link status down again after %d ms\n",
>- (bond->params.updelay - slave->delay) *
>- bond->params.miimon);
>+ bond_info_rl(bond->dev, slave->dev,
>+ "link status down again after %d ms\n",
>+ (bond->params.updelay - slave->delay) *
>+ bond->params.miimon);
> commit++;
> continue;
> }
>diff --git a/include/net/bonding.h b/include/net/bonding.h
>index 5b8b1b6..ebdfaf0 100644
>--- a/include/net/bonding.h
>+++ b/include/net/bonding.h
>@@ -39,8 +39,19 @@
> #define __long_aligned __attribute__((aligned((sizeof(long)))))
> #endif
>
>+DEFINE_RATELIMIT_STATE(bond_rs, DEFAULT_RATELIMIT_INTERVAL,
>+ DEFAULT_RATELIMIT_BURST);
>+
>+#define bond_ratelimited_function(function, ...) \
>+do { \
>+ if (__ratelimit(&bond_rs)) \
>+ function(__VA_ARGS__); \
>+} while (0)
>+
> #define slave_info(bond_dev, slave_dev, fmt, ...) \
> netdev_info(bond_dev, "(slave %s): " fmt, (slave_dev)->name, ##__VA_ARGS__)
>+#define bond_info_rl(bond_dev, slave_dev, fmt, ...) \
>+ bond_ratelimited_function(slave_info, fmt, ##__VA_ARGS__)
> #define slave_warn(bond_dev, slave_dev, fmt, ...) \
> netdev_warn(bond_dev, "(slave %s): " fmt, (slave_dev)->name, ##__VA_ARGS__)
> #define slave_dbg(bond_dev, slave_dev, fmt, ...) \
>--
>1.8.3.1
>
>
---
-Jay Vosburgh, jay.vosburgh@canonical.com
> -----Original Message-----
> From: Jay Vosburgh <jay.vosburgh@canonical.com>
> Sent: 15 February 2024 12:05 AM
> To: Praveen Kannoju <praveen.kannoju@oracle.com>
> Cc: andy@greyhouse.net; davem@davemloft.net; edumazet@google.com; kuba@kernel.org; pabeni@redhat.com;
> netdev@vger.kernel.org; linux-kernel@vger.kernel.org; Rajesh Sivaramasubramaniom <rajesh.sivaramasubramaniom@oracle.com>;
> Rama Nichanamatlu <rama.nichanamatlu@oracle.com>; Manjunath Patil <manjunath.b.patil@oracle.com>
> Subject: Re: [PATCH RFC] bonding: rate-limit bonding driver inspect messages
>
> Praveen Kumar Kannoju <praveen.kannoju@oracle.com> wrote:
>
> >Rate limit bond driver log messages, to prevent a log flood in a
> >run-away situation, e.g couldn't get rtnl lock. Message flood leads to
> >instability of system and loss of other crucial messages.
> >
> >Signed-off-by: Praveen Kumar Kannoju <praveen.kannoju@oracle.com>
> >---
> > drivers/net/bonding/bond_main.c | 34 +++++++++++++++++++---------------
> > include/net/bonding.h | 11 +++++++++++
> > 2 files changed, 30 insertions(+), 15 deletions(-)
> >
> >diff --git a/drivers/net/bonding/bond_main.c
> >b/drivers/net/bonding/bond_main.c index 4e0600c..32098dd 100644
> >--- a/drivers/net/bonding/bond_main.c
> >+++ b/drivers/net/bonding/bond_main.c
> >@@ -2610,12 +2610,13 @@ static int bond_miimon_inspect(struct bonding *bond)
> > commit++;
> > slave->delay = bond->params.downdelay;
> > if (slave->delay) {
> >- slave_info(bond->dev, slave->dev, "link status down for %sinterface, disabling it in %d ms\n",
> >- (BOND_MODE(bond) ==
> >- BOND_MODE_ACTIVEBACKUP) ?
> >- (bond_is_active_slave(slave) ?
> >+ bond_info_rl(bond->dev, slave->dev,
> >+ "link status down for %sinterface, disabling it in %d ms\n",
> >+ (BOND_MODE(bond) ==
> >+ BOND_MODE_ACTIVEBACKUP) ?
> >+ (bond_is_active_slave(slave) ?
> > "active " : "backup ") : "",
> >- bond->params.downdelay * bond->params.miimon);
> >+ bond->params.downdelay * bond->params.miimon);
>
> Why not use net_info_ratelimited() or net_ratelimit()? The rest of the bonding messages that are rate limited are almost all
> gated by the net rate limiter.
>
> -J
Thank you for the reply, Jay. Yes, I agree. Used net_ratelimit() and resent the v2 patch. Please review and provide your comments.
>
> > }
> > fallthrough;
> > case BOND_LINK_FAIL:
> >@@ -2623,9 +2624,10 @@ static int bond_miimon_inspect(struct bonding *bond)
> > /* recovered before downdelay expired */
> > bond_propose_link_state(slave, BOND_LINK_UP);
> > slave->last_link_up = jiffies;
> >- slave_info(bond->dev, slave->dev, "link status up again after %d ms\n",
> >- (bond->params.downdelay - slave->delay) *
> >- bond->params.miimon);
> >+ bond_info_rl(bond->dev, slave->dev,
> >+ "link status up again after %d ms\n",
> >+ (bond->params.downdelay - slave->delay) *
> >+ bond->params.miimon);
> > commit++;
> > continue;
> > }
> >@@ -2648,18 +2650,20 @@ static int bond_miimon_inspect(struct bonding *bond)
> > slave->delay = bond->params.updelay;
> >
> > if (slave->delay) {
> >- slave_info(bond->dev, slave->dev, "link status up, enabling it in %d ms\n",
> >- ignore_updelay ? 0 :
> >- bond->params.updelay *
> >- bond->params.miimon);
> >+ bond_info_rl(bond->dev, slave->dev,
> >+ "link status up, enabling it in %d ms\n",
> >+ ignore_updelay ? 0 :
> >+ bond->params.updelay *
> >+ bond->params.miimon);
> > }
> > fallthrough;
> > case BOND_LINK_BACK:
> > if (!link_state) {
> > bond_propose_link_state(slave, BOND_LINK_DOWN);
> >- slave_info(bond->dev, slave->dev, "link status down again after %d ms\n",
> >- (bond->params.updelay - slave->delay) *
> >- bond->params.miimon);
> >+ bond_info_rl(bond->dev, slave->dev,
> >+ "link status down again after %d ms\n",
> >+ (bond->params.updelay - slave->delay) *
> >+ bond->params.miimon);
> > commit++;
> > continue;
> > }
> >diff --git a/include/net/bonding.h b/include/net/bonding.h index
> >5b8b1b6..ebdfaf0 100644
> >--- a/include/net/bonding.h
> >+++ b/include/net/bonding.h
> >@@ -39,8 +39,19 @@
> > #define __long_aligned __attribute__((aligned((sizeof(long)))))
> > #endif
> >
> >+DEFINE_RATELIMIT_STATE(bond_rs, DEFAULT_RATELIMIT_INTERVAL,
> >+ DEFAULT_RATELIMIT_BURST);
> >+
> >+#define bond_ratelimited_function(function, ...) \
> >+do { \
> >+ if (__ratelimit(&bond_rs)) \
> >+ function(__VA_ARGS__); \
> >+} while (0)
> >+
> > #define slave_info(bond_dev, slave_dev, fmt, ...) \
> > netdev_info(bond_dev, "(slave %s): " fmt, (slave_dev)->name,
> > ##__VA_ARGS__)
> >+#define bond_info_rl(bond_dev, slave_dev, fmt, ...) \
> >+ bond_ratelimited_function(slave_info, fmt, ##__VA_ARGS__)
> > #define slave_warn(bond_dev, slave_dev, fmt, ...) \
> > netdev_warn(bond_dev, "(slave %s): " fmt, (slave_dev)->name,
> >##__VA_ARGS__) #define slave_dbg(bond_dev, slave_dev, fmt, ...) \
> >--
> >1.8.3.1
> >
> >
>
> ---
> -Jay Vosburgh, jay.vosburgh@canonical.com
@@ -2610,12 +2610,13 @@ static int bond_miimon_inspect(struct bonding *bond)
commit++;
slave->delay = bond->params.downdelay;
if (slave->delay) {
- slave_info(bond->dev, slave->dev, "link status down for %sinterface, disabling it in %d ms\n",
- (BOND_MODE(bond) ==
- BOND_MODE_ACTIVEBACKUP) ?
- (bond_is_active_slave(slave) ?
+ bond_info_rl(bond->dev, slave->dev,
+ "link status down for %sinterface, disabling it in %d ms\n",
+ (BOND_MODE(bond) ==
+ BOND_MODE_ACTIVEBACKUP) ?
+ (bond_is_active_slave(slave) ?
"active " : "backup ") : "",
- bond->params.downdelay * bond->params.miimon);
+ bond->params.downdelay * bond->params.miimon);
}
fallthrough;
case BOND_LINK_FAIL:
@@ -2623,9 +2624,10 @@ static int bond_miimon_inspect(struct bonding *bond)
/* recovered before downdelay expired */
bond_propose_link_state(slave, BOND_LINK_UP);
slave->last_link_up = jiffies;
- slave_info(bond->dev, slave->dev, "link status up again after %d ms\n",
- (bond->params.downdelay - slave->delay) *
- bond->params.miimon);
+ bond_info_rl(bond->dev, slave->dev,
+ "link status up again after %d ms\n",
+ (bond->params.downdelay - slave->delay) *
+ bond->params.miimon);
commit++;
continue;
}
@@ -2648,18 +2650,20 @@ static int bond_miimon_inspect(struct bonding *bond)
slave->delay = bond->params.updelay;
if (slave->delay) {
- slave_info(bond->dev, slave->dev, "link status up, enabling it in %d ms\n",
- ignore_updelay ? 0 :
- bond->params.updelay *
- bond->params.miimon);
+ bond_info_rl(bond->dev, slave->dev,
+ "link status up, enabling it in %d ms\n",
+ ignore_updelay ? 0 :
+ bond->params.updelay *
+ bond->params.miimon);
}
fallthrough;
case BOND_LINK_BACK:
if (!link_state) {
bond_propose_link_state(slave, BOND_LINK_DOWN);
- slave_info(bond->dev, slave->dev, "link status down again after %d ms\n",
- (bond->params.updelay - slave->delay) *
- bond->params.miimon);
+ bond_info_rl(bond->dev, slave->dev,
+ "link status down again after %d ms\n",
+ (bond->params.updelay - slave->delay) *
+ bond->params.miimon);
commit++;
continue;
}
@@ -39,8 +39,19 @@
#define __long_aligned __attribute__((aligned((sizeof(long)))))
#endif
+DEFINE_RATELIMIT_STATE(bond_rs, DEFAULT_RATELIMIT_INTERVAL,
+ DEFAULT_RATELIMIT_BURST);
+
+#define bond_ratelimited_function(function, ...) \
+do { \
+ if (__ratelimit(&bond_rs)) \
+ function(__VA_ARGS__); \
+} while (0)
+
#define slave_info(bond_dev, slave_dev, fmt, ...) \
netdev_info(bond_dev, "(slave %s): " fmt, (slave_dev)->name, ##__VA_ARGS__)
+#define bond_info_rl(bond_dev, slave_dev, fmt, ...) \
+ bond_ratelimited_function(slave_info, fmt, ##__VA_ARGS__)
#define slave_warn(bond_dev, slave_dev, fmt, ...) \
netdev_warn(bond_dev, "(slave %s): " fmt, (slave_dev)->name, ##__VA_ARGS__)
#define slave_dbg(bond_dev, slave_dev, fmt, ...) \