net/ipv4: return the real errno instead of -EINVAL

Message ID 202308041648338823694@zte.com.cn
State New
Headers
Series net/ipv4: return the real errno instead of -EINVAL |

Commit Message

Yang Yang Aug. 4, 2023, 8:48 a.m. UTC
  From: xu xin <xu.xin16@zte.com.cn>

For now, no matter what error pointer ip_neigh_for_gw() returns,
ip_finish_output2() always return -EINVAL, which may mislead the upper
users.

For exemple, an application uses sendto to send an UDP packet, but when the
neighbor table overflows, sendto() will get a value of -EINVAL, and it will
cause users to waste a lot of time checking parameters for errors.

Return the real errno instead of -EINVAL.

Signed-off-by: xu xin <xu.xin16@zte.com.cn>
Reviewed-by: Yang Yang <yang.yang29@zte.com.cn>
Cc: Si Hao <si.hao@zte.com.cn>
Cc: Dai Shixin <dai.shixin@zte.com.cn>
Cc: Jiang Xuexin <jiang.xuexin@zte.com.cn>
---
 net/ipv4/ip_output.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)
  

Comments

Vadim Fedorenko Aug. 4, 2023, 8:49 p.m. UTC | #1
On 04/08/2023 09:48, yang.yang29@zte.com.cn wrote:
> From: xu xin <xu.xin16@zte.com.cn>
> 
> For now, no matter what error pointer ip_neigh_for_gw() returns,
> ip_finish_output2() always return -EINVAL, which may mislead the upper
> users.
> 
> For exemple, an application uses sendto to send an UDP packet, but when the
> neighbor table overflows, sendto() will get a value of -EINVAL, and it will
> cause users to waste a lot of time checking parameters for errors.
> 
> Return the real errno instead of -EINVAL.
> 
> Signed-off-by: xu xin <xu.xin16@zte.com.cn>
> Reviewed-by: Yang Yang <yang.yang29@zte.com.cn>
> Cc: Si Hao <si.hao@zte.com.cn>
> Cc: Dai Shixin <dai.shixin@zte.com.cn>
> Cc: Jiang Xuexin <jiang.xuexin@zte.com.cn>
> ---
>   net/ipv4/ip_output.c | 9 ++++-----
>   1 file changed, 4 insertions(+), 5 deletions(-)
> 
> diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
> index 6ba1a0fafbaa..2d7cf083dff9 100644
> --- a/net/ipv4/ip_output.c
> +++ b/net/ipv4/ip_output.c
> @@ -201,6 +201,7 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
>   	unsigned int hh_len = LL_RESERVED_SPACE(dev);
>   	struct neighbour *neigh;
>   	bool is_v6gw = false;
> +	int res;
> 
>   	if (rt->rt_type == RTN_MULTICAST) {
>   		IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTMCAST, skb->len);
> @@ -214,8 +215,7 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
>   	}
> 
>   	if (lwtunnel_xmit_redirect(dst->lwtstate)) {
> -		int res = lwtunnel_xmit(skb);
> -
> +		res = lwtunnel_xmit(skb);
>   		if (res < 0 || res == LWTUNNEL_XMIT_DONE)
>   			return res;
>   	}
> @@ -223,8 +223,6 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
>   	rcu_read_lock();
>   	neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
>   	if (!IS_ERR(neigh)) {
> -		int res;
> -
>   		sock_confirm_neigh(skb, neigh);
>   		/* if crossing protocols, can not use the cached header */
>   		res = neigh_output(neigh, skb, is_v6gw);
> @@ -236,7 +234,8 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
>   	net_dbg_ratelimited("%s: No header cache and no neighbour!\n",
>   			    __func__);
>   	kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_CREATEFAIL);
> -	return -EINVAL;
> +	res = PTR_ERR(neigh);
> +	return res;

I believe this part could be written as

-	return -EINVAL;
+	return PTR_ERR(neigh);


and there is no need to change the code is other places. This will be
easier to track/backport.


>   }
> 
>   static int ip_finish_output_gso(struct net *net, struct sock *sk,
  
xu Aug. 7, 2023, 2:05 a.m. UTC | #2
>> @@ -236,7 +234,8 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
>>   	net_dbg_ratelimited("%s: No header cache and no neighbour!\n",
>>   			    __func__);
>>   	kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_CREATEFAIL);
>> -	return -EINVAL;
>> +	res = PTR_ERR(neigh);
>> +	return res;
>
>I believe this part could be written as
>
>-	return -EINVAL;
>+	return PTR_ERR(neigh);
>
>
>and there is no need to change the code is other places. This will be
>easier to track/backport.

Sure, I initially wrote the same thing with you, but considering the convenience of maintainer review,
I deliberately modified some other parts of the code to present a clear context of the core
modifications.

From your reply, I can see you agree with my key idea of this patch, so I have sent a v2 patch[1] according
to your view.

[1] https://lore.kernel.org/all/20230807015408.248237-1-xu.xin16@zte.com.cn/
  

Patch

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 6ba1a0fafbaa..2d7cf083dff9 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -201,6 +201,7 @@  static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
 	unsigned int hh_len = LL_RESERVED_SPACE(dev);
 	struct neighbour *neigh;
 	bool is_v6gw = false;
+	int res;

 	if (rt->rt_type == RTN_MULTICAST) {
 		IP_UPD_PO_STATS(net, IPSTATS_MIB_OUTMCAST, skb->len);
@@ -214,8 +215,7 @@  static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
 	}

 	if (lwtunnel_xmit_redirect(dst->lwtstate)) {
-		int res = lwtunnel_xmit(skb);
-
+		res = lwtunnel_xmit(skb);
 		if (res < 0 || res == LWTUNNEL_XMIT_DONE)
 			return res;
 	}
@@ -223,8 +223,6 @@  static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
 	rcu_read_lock();
 	neigh = ip_neigh_for_gw(rt, skb, &is_v6gw);
 	if (!IS_ERR(neigh)) {
-		int res;
-
 		sock_confirm_neigh(skb, neigh);
 		/* if crossing protocols, can not use the cached header */
 		res = neigh_output(neigh, skb, is_v6gw);
@@ -236,7 +234,8 @@  static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s
 	net_dbg_ratelimited("%s: No header cache and no neighbour!\n",
 			    __func__);
 	kfree_skb_reason(skb, SKB_DROP_REASON_NEIGH_CREATEFAIL);
-	return -EINVAL;
+	res = PTR_ERR(neigh);
+	return res;
 }

 static int ip_finish_output_gso(struct net *net, struct sock *sk,