[net] ipmr: support IP_PKTINFO on cache report IGMP msg

Message ID ac24d9b6-bfff-4700-a301-d4bd0dbb9313@gmail.com
State New
Headers
Series [net] ipmr: support IP_PKTINFO on cache report IGMP msg |

Commit Message

Leone Fernando Dec. 13, 2023, 2:33 p.m. UTC
  In order to support IP_PKTINFO on those packets, we need to call
ipv4_pktinfo_prepare, so introduced minor changes to this
function to support this flow.

When sending mrouted/pimd daemons a cache report IGMP msg, it is
unnecessary to set dst on the newly created skb.
It used to be necessary on older versions until
commit d826eb14ecef ("ipv4: PKTINFO doesnt need dst reference") which
changed the way IP_PKTINFO struct is been retrieved.

Fixes: d826eb14ecef ("ipv4: PKTINFO doesnt need dst reference")
Signed-off-by: Leone Fernando <leone4fernando@gmail.com>
---
 include/net/ip.h       | 10 +++++++++-
 net/ipv4/ip_sockglue.c | 25 ++++++++++++++++---------
 net/ipv4/ipmr.c        | 12 +++++-------
 net/ipv4/raw.c         |  2 +-
 net/ipv4/udp.c         |  2 +-
 5 files changed, 32 insertions(+), 19 deletions(-)
  

Comments

Eric Dumazet Dec. 13, 2023, 2:55 p.m. UTC | #1
On Wed, Dec 13, 2023 at 3:35 PM Leone Fernando <leone4fernando@gmail.com> wrote:
>
> In order to support IP_PKTINFO on those packets, we need to call
> ipv4_pktinfo_prepare, so introduced minor changes to this
> function to support this flow.
>
> When sending mrouted/pimd daemons a cache report IGMP msg, it is
> unnecessary to set dst on the newly created skb.
> It used to be necessary on older versions until
> commit d826eb14ecef ("ipv4: PKTINFO doesnt need dst reference") which
> changed the way IP_PKTINFO struct is been retrieved.
>

Given this is a 12 years old bug, I would rather target net-next tree.

> Fixes: d826eb14ecef ("ipv4: PKTINFO doesnt need dst reference")
> Signed-off-by: Leone Fernando <leone4fernando@gmail.com>
> ---
>  include/net/ip.h       | 10 +++++++++-
>  net/ipv4/ip_sockglue.c | 25 ++++++++++++++++---------
>  net/ipv4/ipmr.c        | 12 +++++-------
>  net/ipv4/raw.c         |  2 +-
>  net/ipv4/udp.c         |  2 +-
>  5 files changed, 32 insertions(+), 19 deletions(-)
>
> diff --git a/include/net/ip.h b/include/net/ip.h
> index b31be912489a..1b40b7386c56 100644
> --- a/include/net/ip.h
> +++ b/include/net/ip.h
> @@ -767,7 +767,15 @@ int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev);
>   *     Functions provided by ip_sockglue.c
>   */
>
> -void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb);
> +void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *iskb,
> +                       struct sk_buff *oskb);
> +
> +
> +static inline void ipv4_pktinfo_input_prepare(const struct sock *sk, struct sk_buff *skb)
> +{
> +       ipv4_pktinfo_prepare(sk, skb, NULL);
> +}
> +
>  void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk,
>                          struct sk_buff *skb, int tlen, int offset);
>  int ip_cmsg_send(struct sock *sk, struct msghdr *msg,
> diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
> index d7d13940774e..fb26963e3869 100644
> --- a/net/ipv4/ip_sockglue.c
> +++ b/net/ipv4/ip_sockglue.c
> @@ -1364,19 +1364,26 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
>  /**
>   * ipv4_pktinfo_prepare - transfer some info from rtable to skb
>   * @sk: socket
> - * @skb: buffer
> + * @iskb: input buffer
> + * @oskb: out buffer
>   *
>   * To support IP_CMSG_PKTINFO option, we store rt_iif and specific
>   * destination in skb->cb[] before dst drop.
>   * This way, receiver doesn't make cache line misses to read rtable.
>   */
> -void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
> +void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *iskb,
> +                         struct sk_buff *oskb)

This looks more complicated than needed.

I am pretty sure we can fix the bug without touching this function...

>  {
> -       struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb);
> +       struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(iskb);
>         bool prepare = inet_test_bit(PKTINFO, sk) ||
>                        ipv6_sk_rxinfo(sk);
>
> -       if (prepare && skb_rtable(skb)) {
> +       if (oskb) {
> +               memcpy(oskb->cb, iskb->cb, sizeof(iskb->cb));
> +               pktinfo = PKTINFO_SKB_CB(oskb);
> +       }
> +
> +       if (prepare && skb_rtable(iskb)) {
>                 /* skb->cb is overloaded: prior to this point it is IP{6}CB
>                  * which has interface index (iif) as the first member of the
>                  * underlying inet{6}_skb_parm struct. This code then overlays
> @@ -1386,20 +1393,20 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
>                  * (e.g., process binds socket to eth0 for Tx which is
>                  * redirected to loopback in the rtable/dst).
>                  */
> -               struct rtable *rt = skb_rtable(skb);
> -               bool l3slave = ipv4_l3mdev_skb(IPCB(skb)->flags);
> +               struct rtable *rt = skb_rtable(iskb);
> +               bool l3slave = ipv4_l3mdev_skb(IPCB(iskb)->flags);
>
>                 if (pktinfo->ipi_ifindex == LOOPBACK_IFINDEX)
> -                       pktinfo->ipi_ifindex = inet_iif(skb);
> +                       pktinfo->ipi_ifindex = inet_iif(iskb);
>                 else if (l3slave && rt && rt->rt_iif)
>                         pktinfo->ipi_ifindex = rt->rt_iif;
>
> -               pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb);
> +               pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(iskb);
>         } else {
>                 pktinfo->ipi_ifindex = 0;
>                 pktinfo->ipi_spec_dst.s_addr = 0;
>         }
> -       skb_dst_drop(skb);
> +       skb_dst_drop(iskb);
>  }
>
>  int ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
> diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
> index 9e222a57bc2b..6ed7c88743f9 100644
> --- a/net/ipv4/ipmr.c
> +++ b/net/ipv4/ipmr.c
> @@ -1025,6 +1025,10 @@ static int ipmr_cache_report(const struct mr_table *mrt,
>         struct sk_buff *skb;
>         int ret;
>
> +       mroute_sk = rcu_dereference(mrt->mroute_sk);
> +       if (!mroute_sk)
> +               return -EINVAL;
> +
>         if (assert == IGMPMSG_WHOLEPKT || assert == IGMPMSG_WRVIFWHOLE)
>                 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
>         else
> @@ -1069,7 +1073,7 @@ static int ipmr_cache_report(const struct mr_table *mrt,
>                 msg = (struct igmpmsg *)skb_network_header(skb);
>                 msg->im_vif = vifi;
>                 msg->im_vif_hi = vifi >> 8;
> -               skb_dst_set(skb, dst_clone(skb_dst(pkt)));
> +               ipv4_pktinfo_prepare(mroute_sk, pkt, skb);

All we need is to call ipv4_pktinfo_prepare(sk, pkt);
then copy pkt->cb to skb->cb ?

>                 /* Add our header */
>                 igmp = skb_put(skb, sizeof(struct igmphdr));
>                 igmp->type = assert;
> @@ -1079,12 +1083,6 @@ static int ipmr_cache_report(const struct mr_table *mrt,
>                 skb->transport_header = skb->network_header;
>         }
>
> -       mroute_sk = rcu_dereference(mrt->mroute_sk);
> -       if (!mroute_sk) {
> -               kfree_skb(skb);
> -               return -EINVAL;
> -       }
> -
>         igmpmsg_netlink_event(mrt, skb);
>
>         /* Deliver to mrouted */
> diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
> index 27da9d7294c0..cde60c8deed4 100644
> --- a/net/ipv4/raw.c
> +++ b/net/ipv4/raw.c
> @@ -292,7 +292,7 @@ static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb)
>
>         /* Charge it to the socket. */
>
> -       ipv4_pktinfo_prepare(sk, skb);
> +       ipv4_pktinfo_input_prepare(sk, skb);
>         if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) {
>                 kfree_skb_reason(skb, reason);
>                 return NET_RX_DROP;
> diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
> index 89e5a806b82e..3e5a418c96c3 100644
> --- a/net/ipv4/udp.c
> +++ b/net/ipv4/udp.c
> @@ -2169,7 +2169,7 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
>
>         udp_csum_pull_header(skb);
>
> -       ipv4_pktinfo_prepare(sk, skb);
> +       ipv4_pktinfo_input_prepare(sk, skb);
>         return __udp_queue_rcv_skb(sk, skb);
>
>  csum_error:
> --
> 2.34.1
>
  
Leone Fernando Dec. 13, 2023, 4:09 p.m. UTC | #2
Thank you Eric. I will submit a v2.
  

Patch

diff --git a/include/net/ip.h b/include/net/ip.h
index b31be912489a..1b40b7386c56 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -767,7 +767,15 @@  int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev);
  *	Functions provided by ip_sockglue.c
  */
 
-void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb);
+void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *iskb,
+			struct sk_buff *oskb);
+
+
+static inline void ipv4_pktinfo_input_prepare(const struct sock *sk, struct sk_buff *skb)
+{
+	ipv4_pktinfo_prepare(sk, skb, NULL);
+}
+
 void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk,
 			 struct sk_buff *skb, int tlen, int offset);
 int ip_cmsg_send(struct sock *sk, struct msghdr *msg,
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index d7d13940774e..fb26963e3869 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1364,19 +1364,26 @@  int do_ip_setsockopt(struct sock *sk, int level, int optname,
 /**
  * ipv4_pktinfo_prepare - transfer some info from rtable to skb
  * @sk: socket
- * @skb: buffer
+ * @iskb: input buffer
+ * @oskb: out buffer
  *
  * To support IP_CMSG_PKTINFO option, we store rt_iif and specific
  * destination in skb->cb[] before dst drop.
  * This way, receiver doesn't make cache line misses to read rtable.
  */
-void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
+void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *iskb,
+			  struct sk_buff *oskb)
 {
-	struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb);
+	struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(iskb);
 	bool prepare = inet_test_bit(PKTINFO, sk) ||
 		       ipv6_sk_rxinfo(sk);
 
-	if (prepare && skb_rtable(skb)) {
+	if (oskb) {
+		memcpy(oskb->cb, iskb->cb, sizeof(iskb->cb));
+		pktinfo = PKTINFO_SKB_CB(oskb);
+	}
+
+	if (prepare && skb_rtable(iskb)) {
 		/* skb->cb is overloaded: prior to this point it is IP{6}CB
 		 * which has interface index (iif) as the first member of the
 		 * underlying inet{6}_skb_parm struct. This code then overlays
@@ -1386,20 +1393,20 @@  void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
 		 * (e.g., process binds socket to eth0 for Tx which is
 		 * redirected to loopback in the rtable/dst).
 		 */
-		struct rtable *rt = skb_rtable(skb);
-		bool l3slave = ipv4_l3mdev_skb(IPCB(skb)->flags);
+		struct rtable *rt = skb_rtable(iskb);
+		bool l3slave = ipv4_l3mdev_skb(IPCB(iskb)->flags);
 
 		if (pktinfo->ipi_ifindex == LOOPBACK_IFINDEX)
-			pktinfo->ipi_ifindex = inet_iif(skb);
+			pktinfo->ipi_ifindex = inet_iif(iskb);
 		else if (l3slave && rt && rt->rt_iif)
 			pktinfo->ipi_ifindex = rt->rt_iif;
 
-		pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb);
+		pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(iskb);
 	} else {
 		pktinfo->ipi_ifindex = 0;
 		pktinfo->ipi_spec_dst.s_addr = 0;
 	}
-	skb_dst_drop(skb);
+	skb_dst_drop(iskb);
 }
 
 int ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 9e222a57bc2b..6ed7c88743f9 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1025,6 +1025,10 @@  static int ipmr_cache_report(const struct mr_table *mrt,
 	struct sk_buff *skb;
 	int ret;
 
+	mroute_sk = rcu_dereference(mrt->mroute_sk);
+	if (!mroute_sk)
+		return -EINVAL;
+
 	if (assert == IGMPMSG_WHOLEPKT || assert == IGMPMSG_WRVIFWHOLE)
 		skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
 	else
@@ -1069,7 +1073,7 @@  static int ipmr_cache_report(const struct mr_table *mrt,
 		msg = (struct igmpmsg *)skb_network_header(skb);
 		msg->im_vif = vifi;
 		msg->im_vif_hi = vifi >> 8;
-		skb_dst_set(skb, dst_clone(skb_dst(pkt)));
+		ipv4_pktinfo_prepare(mroute_sk, pkt, skb);
 		/* Add our header */
 		igmp = skb_put(skb, sizeof(struct igmphdr));
 		igmp->type = assert;
@@ -1079,12 +1083,6 @@  static int ipmr_cache_report(const struct mr_table *mrt,
 		skb->transport_header = skb->network_header;
 	}
 
-	mroute_sk = rcu_dereference(mrt->mroute_sk);
-	if (!mroute_sk) {
-		kfree_skb(skb);
-		return -EINVAL;
-	}
-
 	igmpmsg_netlink_event(mrt, skb);
 
 	/* Deliver to mrouted */
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 27da9d7294c0..cde60c8deed4 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -292,7 +292,7 @@  static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb)
 
 	/* Charge it to the socket. */
 
-	ipv4_pktinfo_prepare(sk, skb);
+	ipv4_pktinfo_input_prepare(sk, skb);
 	if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) {
 		kfree_skb_reason(skb, reason);
 		return NET_RX_DROP;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 89e5a806b82e..3e5a418c96c3 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2169,7 +2169,7 @@  static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
 
 	udp_csum_pull_header(skb);
 
-	ipv4_pktinfo_prepare(sk, skb);
+	ipv4_pktinfo_input_prepare(sk, skb);
 	return __udp_queue_rcv_skb(sk, skb);
 
 csum_error: