[v14,net-next,16/23] net/tcp: Ignore specific ICMPs for TCP-AO connections
Commit Message
Similarly to IPsec, RFC5925 prescribes:
">> A TCP-AO implementation MUST default to ignore incoming ICMPv4
messages of Type 3 (destination unreachable), Codes 2-4 (protocol
unreachable, port unreachable, and fragmentation needed -- ’hard
errors’), and ICMPv6 Type 1 (destination unreachable), Code 1
(administratively prohibited) and Code 4 (port unreachable) intended
for connections in synchronized states (ESTABLISHED, FIN-WAIT-1, FIN-
WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK, TIME-WAIT) that match MKTs."
A selftest (later in patch series) verifies that this attack is not
possible in this TCP-AO implementation.
Co-developed-by: Francesco Ruggeri <fruggeri@arista.com>
Signed-off-by: Francesco Ruggeri <fruggeri@arista.com>
Co-developed-by: Salam Noureddine <noureddine@arista.com>
Signed-off-by: Salam Noureddine <noureddine@arista.com>
Signed-off-by: Dmitry Safonov <dima@arista.com>
Acked-by: David Ahern <dsahern@kernel.org>
---
include/net/tcp_ao.h | 10 ++++++-
include/uapi/linux/snmp.h | 1 +
include/uapi/linux/tcp.h | 4 ++-
net/ipv4/proc.c | 1 +
net/ipv4/tcp_ao.c | 58 +++++++++++++++++++++++++++++++++++++++
net/ipv4/tcp_ipv4.c | 7 +++++
net/ipv6/tcp_ipv6.c | 7 +++++
7 files changed, 86 insertions(+), 2 deletions(-)
Comments
On Tue, Oct 10, 2023 at 1:07 AM Dmitry Safonov <dima@arista.com> wrote:
>
> Similarly to IPsec, RFC5925 prescribes:
> ">> A TCP-AO implementation MUST default to ignore incoming ICMPv4
> messages of Type 3 (destination unreachable), Codes 2-4 (protocol
> unreachable, port unreachable, and fragmentation needed -- ’hard
> errors’), and ICMPv6 Type 1 (destination unreachable), Code 1
> (administratively prohibited) and Code 4 (port unreachable) intended
> for connections in synchronized states (ESTABLISHED, FIN-WAIT-1, FIN-
> WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK, TIME-WAIT) that match MKTs."
>
> A selftest (later in patch series) verifies that this attack is not
> possible in this TCP-AO implementation.
>
> Co-developed-by: Francesco Ruggeri <fruggeri@arista.com>
> Signed-off-by: Francesco Ruggeri <fruggeri@arista.com>
> Co-developed-by: Salam Noureddine <noureddine@arista.com>
> Signed-off-by: Salam Noureddine <noureddine@arista.com>
> Signed-off-by: Dmitry Safonov <dima@arista.com>
> Acked-by: David Ahern <dsahern@kernel.org>
> ---
> include/net/tcp_ao.h | 10 ++++++-
> include/uapi/linux/snmp.h | 1 +
> include/uapi/linux/tcp.h | 4 ++-
> net/ipv4/proc.c | 1 +
> net/ipv4/tcp_ao.c | 58 +++++++++++++++++++++++++++++++++++++++
> net/ipv4/tcp_ipv4.c | 7 +++++
> net/ipv6/tcp_ipv6.c | 7 +++++
> 7 files changed, 86 insertions(+), 2 deletions(-)
>
> diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h
> index 8c315c3f31da..eec38e9e6380 100644
> --- a/include/net/tcp_ao.h
> +++ b/include/net/tcp_ao.h
> @@ -24,6 +24,7 @@ struct tcp_ao_counters {
> atomic64_t pkt_bad;
> atomic64_t key_not_found;
> atomic64_t ao_required;
> + atomic64_t dropped_icmp;
> };
>
> struct tcp_ao_key {
> @@ -92,7 +93,8 @@ struct tcp_ao_info {
> struct tcp_ao_key *rnext_key;
> struct tcp_ao_counters counters;
> u32 ao_required :1,
> - __unused :31;
> + accept_icmps :1,
> + __unused :30;
> __be32 lisn;
> __be32 risn;
> /* Sequence Number Extension (SNE) are upper 4 bytes for SEQ,
> @@ -191,6 +193,7 @@ int tcp_ao_calc_traffic_key(struct tcp_ao_key *mkt, u8 *key, void *ctx,
> unsigned int len, struct tcp_sigpool *hp);
> void tcp_ao_destroy_sock(struct sock *sk, bool twsk);
> void tcp_ao_time_wait(struct tcp_timewait_sock *tcptw, struct tcp_sock *tp);
> +bool tcp_ao_ignore_icmp(const struct sock *sk, int type, int code);
> enum skb_drop_reason tcp_inbound_ao_hash(struct sock *sk,
> const struct sk_buff *skb, unsigned short int family,
> const struct request_sock *req,
> @@ -274,6 +277,11 @@ static inline void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb,
> {
> }
>
> +static inline bool tcp_ao_ignore_icmp(const struct sock *sk, int type, int code)
> +{
> + return false;
> +}
> +
> static inline enum skb_drop_reason tcp_inbound_ao_hash(struct sock *sk,
> const struct sk_buff *skb, unsigned short int family,
> const struct request_sock *req, const struct tcp_ao_hdr *aoh)
> diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
> index 06ddf4cd295c..47a6b47da66f 100644
> --- a/include/uapi/linux/snmp.h
> +++ b/include/uapi/linux/snmp.h
> @@ -300,6 +300,7 @@ enum
> LINUX_MIB_TCPAOBAD, /* TCPAOBad */
> LINUX_MIB_TCPAOKEYNOTFOUND, /* TCPAOKeyNotFound */
> LINUX_MIB_TCPAOGOOD, /* TCPAOGood */
> + LINUX_MIB_TCPAODROPPEDICMPS, /* TCPAODroppedIcmps */
> __LINUX_MIB_MAX
> };
>
> diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
> index 62543f7c5523..e4ddca6178ca 100644
> --- a/include/uapi/linux/tcp.h
> +++ b/include/uapi/linux/tcp.h
> @@ -404,7 +404,8 @@ struct tcp_ao_info_opt { /* setsockopt(TCP_AO_INFO) */
> set_rnext :1, /* corresponding ::rnext */
> ao_required :1, /* don't accept non-AO connects */
> set_counters :1, /* set/clear ::pkt_* counters */
> - reserved :28; /* must be 0 */
> + accept_icmps :1, /* accept incoming ICMPs */
> + reserved :27; /* must be 0 */
> __u16 reserved2; /* padding, must be 0 */
> __u8 current_key; /* KeyID to set as Current_key */
> __u8 rnext; /* KeyID to set as Rnext_key */
> @@ -412,6 +413,7 @@ struct tcp_ao_info_opt { /* setsockopt(TCP_AO_INFO) */
> __u64 pkt_bad; /* failed verification */
> __u64 pkt_key_not_found; /* could not find a key to verify */
> __u64 pkt_ao_required; /* segments missing TCP-AO sign */
> + __u64 pkt_dropped_icmp; /* ICMPs that were ignored */
> } __attribute__((aligned(8)));
>
> /* setsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE, ...) */
> diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
> index 3f643cd29cfe..5d3c9c96773e 100644
> --- a/net/ipv4/proc.c
> +++ b/net/ipv4/proc.c
> @@ -302,6 +302,7 @@ static const struct snmp_mib snmp4_net_list[] = {
> SNMP_MIB_ITEM("TCPAOBad", LINUX_MIB_TCPAOBAD),
> SNMP_MIB_ITEM("TCPAOKeyNotFound", LINUX_MIB_TCPAOKEYNOTFOUND),
> SNMP_MIB_ITEM("TCPAOGood", LINUX_MIB_TCPAOGOOD),
> + SNMP_MIB_ITEM("TCPAODroppedIcmps", LINUX_MIB_TCPAODROPPEDICMPS),
> SNMP_MIB_SENTINEL
> };
>
> diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c
> index 801174c17853..168073cd1c89 100644
> --- a/net/ipv4/tcp_ao.c
> +++ b/net/ipv4/tcp_ao.c
> @@ -15,6 +15,7 @@
>
> #include <net/tcp.h>
> #include <net/ipv6.h>
> +#include <net/icmp.h>
>
> int tcp_ao_calc_traffic_key(struct tcp_ao_key *mkt, u8 *key, void *ctx,
> unsigned int len, struct tcp_sigpool *hp)
> @@ -44,6 +45,60 @@ int tcp_ao_calc_traffic_key(struct tcp_ao_key *mkt, u8 *key, void *ctx,
> return 1;
> }
>
> +bool tcp_ao_ignore_icmp(const struct sock *sk, int type, int code)
> +{
> + bool ignore_icmp = false;
> + struct tcp_ao_info *ao;
> +
> + /* RFC5925, 7.8:
> + * >> A TCP-AO implementation MUST default to ignore incoming ICMPv4
> + * messages of Type 3 (destination unreachable), Codes 2-4 (protocol
> + * unreachable, port unreachable, and fragmentation needed -- ’hard
> + * errors’), and ICMPv6 Type 1 (destination unreachable), Code 1
> + * (administratively prohibited) and Code 4 (port unreachable) intended
> + * for connections in synchronized states (ESTABLISHED, FIN-WAIT-1, FIN-
> + * WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK, TIME-WAIT) that match MKTs.
> + */
> + if (READ_ONCE(sk->sk_family) == AF_INET) {
You can not use sk->sk_family to make this decision.
It could be AF_INET6 and yet the flow could be IPv4. (dual stack)
Let the caller pass this information ?
tcp_ao_ignore_icmp(sk, AF_INET, type, code);
tcp_ao_ignore_icmp(sk, AF_INET6, type, code);
> + if (type != ICMP_DEST_UNREACH)
> + return false;
> + if (code < ICMP_PROT_UNREACH || code > ICMP_FRAG_NEEDED)
> + return false;
> + } else {
> + if (type != ICMPV6_DEST_UNREACH)
> + return false;
> + if (code != ICMPV6_ADM_PROHIBITED && code != ICMPV6_PORT_UNREACH)
> + return false;
> + }
> +
> + rcu_read_lock();
> + switch (sk->sk_state) {
> + case TCP_TIME_WAIT:
> + ao = rcu_dereference(tcp_twsk(sk)->ao_info);
> + break;
> + case TCP_SYN_SENT:
> + case TCP_SYN_RECV:
> + case TCP_LISTEN:
> + case TCP_NEW_SYN_RECV:
> + /* RFC5925 specifies to ignore ICMPs *only* on connections
> + * in synchronized states.
> + */
> + rcu_read_unlock();
> + return false;
> + default:
> + ao = rcu_dereference(tcp_sk(sk)->ao_info);
> + }
> +
> + if (ao && !ao->accept_icmps) {
> + ignore_icmp = true;
> + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAODROPPEDICMPS);
> + atomic64_inc(&ao->counters.dropped_icmp);
> + }
> + rcu_read_unlock();
> +
> + return ignore_icmp;
> +}
>
Hi Eric,
thanks once again for taking a look :)
On 10/11/23 18:53, Eric Dumazet wrote:
> On Tue, Oct 10, 2023 at 1:07 AM Dmitry Safonov <dima@arista.com> wrote:
[..]
>> +bool tcp_ao_ignore_icmp(const struct sock *sk, int type, int code)
>> +{
>> + bool ignore_icmp = false;
>> + struct tcp_ao_info *ao;
>> +
>> + /* RFC5925, 7.8:
>> + * >> A TCP-AO implementation MUST default to ignore incoming ICMPv4
>> + * messages of Type 3 (destination unreachable), Codes 2-4 (protocol
>> + * unreachable, port unreachable, and fragmentation needed -- ’hard
>> + * errors’), and ICMPv6 Type 1 (destination unreachable), Code 1
>> + * (administratively prohibited) and Code 4 (port unreachable) intended
>> + * for connections in synchronized states (ESTABLISHED, FIN-WAIT-1, FIN-
>> + * WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK, TIME-WAIT) that match MKTs.
>> + */
>> + if (READ_ONCE(sk->sk_family) == AF_INET) {
>
> You can not use sk->sk_family to make this decision.
>
> It could be AF_INET6 and yet the flow could be IPv4. (dual stack)
>
> Let the caller pass this information ?
>
> tcp_ao_ignore_icmp(sk, AF_INET, type, code);
>
> tcp_ao_ignore_icmp(sk, AF_INET6, type, code);
Yes, I thought about it when added READ_ONCE(), but than probably got
distracted over possible IPV6_ADDRFORM races, rather than on correctness.
Looking at other places:
tcp_ao_prepare_reset() seems to do a proper thing for dual stack, but I
see it reads sk->sk_family twice, which needs to be addressed as well.
tcp_ao_connect_init() seems to do the right thing as well, but that is
hidden in tcp_ao_key_cmp().
Will fix in the next version.
Thanks,
Dmitry
@@ -24,6 +24,7 @@ struct tcp_ao_counters {
atomic64_t pkt_bad;
atomic64_t key_not_found;
atomic64_t ao_required;
+ atomic64_t dropped_icmp;
};
struct tcp_ao_key {
@@ -92,7 +93,8 @@ struct tcp_ao_info {
struct tcp_ao_key *rnext_key;
struct tcp_ao_counters counters;
u32 ao_required :1,
- __unused :31;
+ accept_icmps :1,
+ __unused :30;
__be32 lisn;
__be32 risn;
/* Sequence Number Extension (SNE) are upper 4 bytes for SEQ,
@@ -191,6 +193,7 @@ int tcp_ao_calc_traffic_key(struct tcp_ao_key *mkt, u8 *key, void *ctx,
unsigned int len, struct tcp_sigpool *hp);
void tcp_ao_destroy_sock(struct sock *sk, bool twsk);
void tcp_ao_time_wait(struct tcp_timewait_sock *tcptw, struct tcp_sock *tp);
+bool tcp_ao_ignore_icmp(const struct sock *sk, int type, int code);
enum skb_drop_reason tcp_inbound_ao_hash(struct sock *sk,
const struct sk_buff *skb, unsigned short int family,
const struct request_sock *req,
@@ -274,6 +277,11 @@ static inline void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb,
{
}
+static inline bool tcp_ao_ignore_icmp(const struct sock *sk, int type, int code)
+{
+ return false;
+}
+
static inline enum skb_drop_reason tcp_inbound_ao_hash(struct sock *sk,
const struct sk_buff *skb, unsigned short int family,
const struct request_sock *req, const struct tcp_ao_hdr *aoh)
@@ -300,6 +300,7 @@ enum
LINUX_MIB_TCPAOBAD, /* TCPAOBad */
LINUX_MIB_TCPAOKEYNOTFOUND, /* TCPAOKeyNotFound */
LINUX_MIB_TCPAOGOOD, /* TCPAOGood */
+ LINUX_MIB_TCPAODROPPEDICMPS, /* TCPAODroppedIcmps */
__LINUX_MIB_MAX
};
@@ -404,7 +404,8 @@ struct tcp_ao_info_opt { /* setsockopt(TCP_AO_INFO) */
set_rnext :1, /* corresponding ::rnext */
ao_required :1, /* don't accept non-AO connects */
set_counters :1, /* set/clear ::pkt_* counters */
- reserved :28; /* must be 0 */
+ accept_icmps :1, /* accept incoming ICMPs */
+ reserved :27; /* must be 0 */
__u16 reserved2; /* padding, must be 0 */
__u8 current_key; /* KeyID to set as Current_key */
__u8 rnext; /* KeyID to set as Rnext_key */
@@ -412,6 +413,7 @@ struct tcp_ao_info_opt { /* setsockopt(TCP_AO_INFO) */
__u64 pkt_bad; /* failed verification */
__u64 pkt_key_not_found; /* could not find a key to verify */
__u64 pkt_ao_required; /* segments missing TCP-AO sign */
+ __u64 pkt_dropped_icmp; /* ICMPs that were ignored */
} __attribute__((aligned(8)));
/* setsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE, ...) */
@@ -302,6 +302,7 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TCPAOBad", LINUX_MIB_TCPAOBAD),
SNMP_MIB_ITEM("TCPAOKeyNotFound", LINUX_MIB_TCPAOKEYNOTFOUND),
SNMP_MIB_ITEM("TCPAOGood", LINUX_MIB_TCPAOGOOD),
+ SNMP_MIB_ITEM("TCPAODroppedIcmps", LINUX_MIB_TCPAODROPPEDICMPS),
SNMP_MIB_SENTINEL
};
@@ -15,6 +15,7 @@
#include <net/tcp.h>
#include <net/ipv6.h>
+#include <net/icmp.h>
int tcp_ao_calc_traffic_key(struct tcp_ao_key *mkt, u8 *key, void *ctx,
unsigned int len, struct tcp_sigpool *hp)
@@ -44,6 +45,60 @@ int tcp_ao_calc_traffic_key(struct tcp_ao_key *mkt, u8 *key, void *ctx,
return 1;
}
+bool tcp_ao_ignore_icmp(const struct sock *sk, int type, int code)
+{
+ bool ignore_icmp = false;
+ struct tcp_ao_info *ao;
+
+ /* RFC5925, 7.8:
+ * >> A TCP-AO implementation MUST default to ignore incoming ICMPv4
+ * messages of Type 3 (destination unreachable), Codes 2-4 (protocol
+ * unreachable, port unreachable, and fragmentation needed -- ’hard
+ * errors’), and ICMPv6 Type 1 (destination unreachable), Code 1
+ * (administratively prohibited) and Code 4 (port unreachable) intended
+ * for connections in synchronized states (ESTABLISHED, FIN-WAIT-1, FIN-
+ * WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK, TIME-WAIT) that match MKTs.
+ */
+ if (READ_ONCE(sk->sk_family) == AF_INET) {
+ if (type != ICMP_DEST_UNREACH)
+ return false;
+ if (code < ICMP_PROT_UNREACH || code > ICMP_FRAG_NEEDED)
+ return false;
+ } else {
+ if (type != ICMPV6_DEST_UNREACH)
+ return false;
+ if (code != ICMPV6_ADM_PROHIBITED && code != ICMPV6_PORT_UNREACH)
+ return false;
+ }
+
+ rcu_read_lock();
+ switch (sk->sk_state) {
+ case TCP_TIME_WAIT:
+ ao = rcu_dereference(tcp_twsk(sk)->ao_info);
+ break;
+ case TCP_SYN_SENT:
+ case TCP_SYN_RECV:
+ case TCP_LISTEN:
+ case TCP_NEW_SYN_RECV:
+ /* RFC5925 specifies to ignore ICMPs *only* on connections
+ * in synchronized states.
+ */
+ rcu_read_unlock();
+ return false;
+ default:
+ ao = rcu_dereference(tcp_sk(sk)->ao_info);
+ }
+
+ if (ao && !ao->accept_icmps) {
+ ignore_icmp = true;
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAODROPPEDICMPS);
+ atomic64_inc(&ao->counters.dropped_icmp);
+ }
+ rcu_read_unlock();
+
+ return ignore_icmp;
+}
+
/* Optimized version of tcp_ao_do_lookup(): only for sockets for which
* it's known that the keys in ao_info are matching peer's
* family/address/VRF/etc.
@@ -1083,6 +1138,7 @@ int tcp_ao_copy_all_matching(const struct sock *sk, struct sock *newsk,
new_ao->lisn = htonl(tcp_rsk(req)->snt_isn);
new_ao->risn = htonl(tcp_rsk(req)->rcv_isn);
new_ao->ao_required = ao->ao_required;
+ new_ao->accept_icmps = ao->accept_icmps;
if (family == AF_INET) {
addr = (union tcp_ao_addr *)&newsk->sk_daddr;
@@ -1789,9 +1845,11 @@ static int tcp_ao_info_cmd(struct sock *sk, unsigned short int family,
atomic64_set(&ao_info->counters.pkt_bad, cmd.pkt_bad);
atomic64_set(&ao_info->counters.key_not_found, cmd.pkt_key_not_found);
atomic64_set(&ao_info->counters.ao_required, cmd.pkt_ao_required);
+ atomic64_set(&ao_info->counters.dropped_icmp, cmd.pkt_dropped_icmp);
}
ao_info->ao_required = cmd.ao_required;
+ ao_info->accept_icmps = cmd.accept_icmps;
if (new_current)
WRITE_ONCE(ao_info->current_key, new_current);
if (new_rnext)
@@ -493,6 +493,8 @@ int tcp_v4_err(struct sk_buff *skb, u32 info)
return -ENOENT;
}
if (sk->sk_state == TCP_TIME_WAIT) {
+ /* To increase the counter of ignored icmps for TCP-AO */
+ tcp_ao_ignore_icmp(sk, type, code);
inet_twsk_put(inet_twsk(sk));
return 0;
}
@@ -506,6 +508,11 @@ int tcp_v4_err(struct sk_buff *skb, u32 info)
return 0;
}
+ if (tcp_ao_ignore_icmp(sk, type, code)) {
+ sock_put(sk);
+ return 0;
+ }
+
bh_lock_sock(sk);
/* If too many ICMPs get dropped on busy
* servers this needs to be solved differently.
@@ -395,6 +395,8 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
}
if (sk->sk_state == TCP_TIME_WAIT) {
+ /* To increase the counter of ignored icmps for TCP-AO */
+ tcp_ao_ignore_icmp(sk, type, code);
inet_twsk_put(inet_twsk(sk));
return 0;
}
@@ -405,6 +407,11 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
return 0;
}
+ if (tcp_ao_ignore_icmp(sk, type, code)) {
+ sock_put(sk);
+ return 0;
+ }
+
bh_lock_sock(sk);
if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);