[v9,net-next,16/23] net/tcp: Ignore specific ICMPs for TCP-AO connections
Commit Message
Similarly to IPsec, RFC5925 prescribes:
">> A TCP-AO implementation MUST default to ignore incoming ICMPv4
messages of Type 3 (destination unreachable), Codes 2-4 (protocol
unreachable, port unreachable, and fragmentation needed -- ’hard
errors’), and ICMPv6 Type 1 (destination unreachable), Code 1
(administratively prohibited) and Code 4 (port unreachable) intended
for connections in synchronized states (ESTABLISHED, FIN-WAIT-1, FIN-
WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK, TIME-WAIT) that match MKTs."
A selftest (later in patch series) verifies that this attack is not
possible in this TCP-AO implementation.
Co-developed-by: Francesco Ruggeri <fruggeri@arista.com>
Signed-off-by: Francesco Ruggeri <fruggeri@arista.com>
Co-developed-by: Salam Noureddine <noureddine@arista.com>
Signed-off-by: Salam Noureddine <noureddine@arista.com>
Signed-off-by: Dmitry Safonov <dima@arista.com>
Acked-by: David Ahern <dsahern@kernel.org>
---
include/net/tcp_ao.h | 10 ++++++-
include/uapi/linux/snmp.h | 1 +
include/uapi/linux/tcp.h | 4 ++-
net/ipv4/proc.c | 1 +
net/ipv4/tcp_ao.c | 61 +++++++++++++++++++++++++++++++++++++++
net/ipv4/tcp_ipv4.c | 5 ++++
net/ipv6/tcp_ipv6.c | 4 +++
7 files changed, 84 insertions(+), 2 deletions(-)
Comments
On Wed, Aug 2, 2023 at 7:27 PM Dmitry Safonov <dima@arista.com> wrote:
>
> Similarly to IPsec, RFC5925 prescribes:
> ">> A TCP-AO implementation MUST default to ignore incoming ICMPv4
> messages of Type 3 (destination unreachable), Codes 2-4 (protocol
> unreachable, port unreachable, and fragmentation needed -- ’hard
> errors’), and ICMPv6 Type 1 (destination unreachable), Code 1
> (administratively prohibited) and Code 4 (port unreachable) intended
> for connections in synchronized states (ESTABLISHED, FIN-WAIT-1, FIN-
> WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK, TIME-WAIT) that match MKTs."
>
> A selftest (later in patch series) verifies that this attack is not
> possible in this TCP-AO implementation.
>
> Co-developed-by: Francesco Ruggeri <fruggeri@arista.com>
> Signed-off-by: Francesco Ruggeri <fruggeri@arista.com>
> Co-developed-by: Salam Noureddine <noureddine@arista.com>
> Signed-off-by: Salam Noureddine <noureddine@arista.com>
> Signed-off-by: Dmitry Safonov <dima@arista.com>
> Acked-by: David Ahern <dsahern@kernel.org>
> ---
> include/net/tcp_ao.h | 10 ++++++-
> include/uapi/linux/snmp.h | 1 +
> include/uapi/linux/tcp.h | 4 ++-
> net/ipv4/proc.c | 1 +
> net/ipv4/tcp_ao.c | 61 +++++++++++++++++++++++++++++++++++++++
> net/ipv4/tcp_ipv4.c | 5 ++++
> net/ipv6/tcp_ipv6.c | 4 +++
> 7 files changed, 84 insertions(+), 2 deletions(-)
>
> diff --git a/include/net/tcp_ao.h b/include/net/tcp_ao.h
> index 986e8dcbb150..94fde002b638 100644
> --- a/include/net/tcp_ao.h
> +++ b/include/net/tcp_ao.h
> @@ -24,6 +24,7 @@ struct tcp_ao_counters {
> atomic64_t pkt_bad;
> atomic64_t key_not_found;
> atomic64_t ao_required;
> + atomic64_t dropped_icmp;
> };
>
> struct tcp_ao_key {
> @@ -92,7 +93,8 @@ struct tcp_ao_info {
> struct tcp_ao_key *rnext_key;
> struct tcp_ao_counters counters;
> u32 ao_required :1,
> - __unused :31;
> + accept_icmps :1,
> + __unused :30;
> __be32 lisn;
> __be32 risn;
> /* Sequence Number Extension (SNE) are upper 4 bytes for SEQ,
> @@ -189,6 +191,7 @@ int tcp_ao_calc_traffic_key(struct tcp_ao_key *mkt, u8 *key, void *ctx,
> unsigned int len, struct tcp_sigpool *hp);
> void tcp_ao_destroy_sock(struct sock *sk, bool twsk);
> void tcp_ao_time_wait(struct tcp_timewait_sock *tcptw, struct tcp_sock *tp);
> +bool tcp_ao_ignore_icmp(struct sock *sk, int type, int code);
> enum skb_drop_reason tcp_inbound_ao_hash(struct sock *sk,
> const struct sk_buff *skb, unsigned short int family,
> const struct request_sock *req,
> @@ -264,6 +267,11 @@ static inline void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb,
> {
> }
>
> +static inline bool tcp_ao_ignore_icmp(struct sock *sk, int type, int code)
> +{
> + return false;
> +}
> +
> static inline enum skb_drop_reason tcp_inbound_ao_hash(struct sock *sk,
> const struct sk_buff *skb, unsigned short int family,
> const struct request_sock *req, const struct tcp_ao_hdr *aoh)
> diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
> index 06ddf4cd295c..47a6b47da66f 100644
> --- a/include/uapi/linux/snmp.h
> +++ b/include/uapi/linux/snmp.h
> @@ -300,6 +300,7 @@ enum
> LINUX_MIB_TCPAOBAD, /* TCPAOBad */
> LINUX_MIB_TCPAOKEYNOTFOUND, /* TCPAOKeyNotFound */
> LINUX_MIB_TCPAOGOOD, /* TCPAOGood */
> + LINUX_MIB_TCPAODROPPEDICMPS, /* TCPAODroppedIcmps */
> __LINUX_MIB_MAX
> };
>
> diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
> index 3fe0612ec59a..ca7ed18ce67b 100644
> --- a/include/uapi/linux/tcp.h
> +++ b/include/uapi/linux/tcp.h
> @@ -392,7 +392,8 @@ struct tcp_ao_info_opt { /* setsockopt(TCP_AO_INFO) */
> set_rnext :1, /* corresponding ::rnext */
> ao_required :1, /* don't accept non-AO connects */
> set_counters :1, /* set/clear ::pkt_* counters */
> - reserved :28; /* must be 0 */
> + accept_icmps :1, /* accept incoming ICMPs */
> + reserved :27; /* must be 0 */
> __u16 reserved2; /* padding, must be 0 */
> __u8 current_key; /* KeyID to set as Current_key */
> __u8 rnext; /* KeyID to set as Rnext_key */
> @@ -400,6 +401,7 @@ struct tcp_ao_info_opt { /* setsockopt(TCP_AO_INFO) */
> __u64 pkt_bad; /* failed verification */
> __u64 pkt_key_not_found; /* could not find a key to verify */
> __u64 pkt_ao_required; /* segments missing TCP-AO sign */
> + __u64 pkt_dropped_icmp; /* ICMPs that were ignored */
> } __attribute__((aligned(8)));
>
> /* setsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE, ...) */
> diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
> index 3f643cd29cfe..5d3c9c96773e 100644
> --- a/net/ipv4/proc.c
> +++ b/net/ipv4/proc.c
> @@ -302,6 +302,7 @@ static const struct snmp_mib snmp4_net_list[] = {
> SNMP_MIB_ITEM("TCPAOBad", LINUX_MIB_TCPAOBAD),
> SNMP_MIB_ITEM("TCPAOKeyNotFound", LINUX_MIB_TCPAOKEYNOTFOUND),
> SNMP_MIB_ITEM("TCPAOGood", LINUX_MIB_TCPAOGOOD),
> + SNMP_MIB_ITEM("TCPAODroppedIcmps", LINUX_MIB_TCPAODROPPEDICMPS),
> SNMP_MIB_SENTINEL
> };
>
> diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c
> index 226dcefb426a..236c8cd1a0c7 100644
> --- a/net/ipv4/tcp_ao.c
> +++ b/net/ipv4/tcp_ao.c
> @@ -15,6 +15,7 @@
>
> #include <net/tcp.h>
> #include <net/ipv6.h>
> +#include <net/icmp.h>
>
> int tcp_ao_calc_traffic_key(struct tcp_ao_key *mkt, u8 *key, void *ctx,
> unsigned int len, struct tcp_sigpool *hp)
> @@ -44,6 +45,63 @@ int tcp_ao_calc_traffic_key(struct tcp_ao_key *mkt, u8 *key, void *ctx,
> return 1;
> }
>
> +bool tcp_ao_ignore_icmp(struct sock *sk, int type, int code)
const struct sock *sk ?
> +{
> + bool ignore_icmp = false;
> + struct tcp_ao_info *ao;
> +
> + /* RFC5925, 7.8:
> + * >> A TCP-AO implementation MUST default to ignore incoming ICMPv4
> + * messages of Type 3 (destination unreachable), Codes 2-4 (protocol
> + * unreachable, port unreachable, and fragmentation needed -- ’hard
> + * errors’), and ICMPv6 Type 1 (destination unreachable), Code 1
> + * (administratively prohibited) and Code 4 (port unreachable) intended
> + * for connections in synchronized states (ESTABLISHED, FIN-WAIT-1, FIN-
> + * WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK, TIME-WAIT) that match MKTs.
> + */
I know this sounds silly, but you should read sk->sk_family once.
Or risk another KCSAN report with IPV6_ADDRFORM
if (sk->sk_family == AF_INET) {
...
} else {
/* AF_INET case */
}
> + if (sk->sk_family == AF_INET) {
> + if (type != ICMP_DEST_UNREACH)
> + return false;
> + if (code < ICMP_PROT_UNREACH || code > ICMP_FRAG_NEEDED)
> + return false;
> + } else if (sk->sk_family == AF_INET6) {
> + if (type != ICMPV6_DEST_UNREACH)
> + return false;
> + if (code != ICMPV6_ADM_PROHIBITED && code != ICMPV6_PORT_UNREACH)
> + return false;
> + } else {
No WARN_ON_ONCE(1) here please.
> + WARN_ON_ONCE(1);
> + return false;
> + }
> +
> + rcu_read_lock();
> + switch (sk->sk_state) {
> + case TCP_TIME_WAIT:
> + ao = rcu_dereference(tcp_twsk(sk)->ao_info);
> + break;
> + case TCP_SYN_SENT:
> + case TCP_SYN_RECV:
> + case TCP_LISTEN:
> + case TCP_NEW_SYN_RECV:
> + /* RFC5925 specifies to ignore ICMPs *only* on connections
> + * in synchronized states.
> + */
> + rcu_read_unlock();
> + return false;
> + default:
> + ao = rcu_dereference(tcp_sk(sk)->ao_info);
> + }
> +
> + if (ao && !ao->accept_icmps) {
> + ignore_icmp = true;
> + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAODROPPEDICMPS);
> + atomic64_inc(&ao->counters.dropped_icmp);
> + }
> + rcu_read_unlock();
> +
> + return ignore_icmp;
> +}
> +
> /* Optimized version of tcp_ao_do_lookup(): only for sockets for which
> * it's known that the keys in ao_info are matching peer's
> * family/address/VRF/etc.
> @@ -1036,6 +1094,7 @@ int tcp_ao_copy_all_matching(const struct sock *sk, struct sock *newsk,
> new_ao->lisn = htonl(tcp_rsk(req)->snt_isn);
> new_ao->risn = htonl(tcp_rsk(req)->rcv_isn);
> new_ao->ao_required = ao->ao_required;
> + new_ao->accept_icmps = ao->accept_icmps;
>
> if (family == AF_INET) {
> addr = (union tcp_ao_addr *)&newsk->sk_daddr;
> @@ -1742,9 +1801,11 @@ static int tcp_ao_info_cmd(struct sock *sk, unsigned short int family,
> atomic64_set(&ao_info->counters.pkt_bad, cmd.pkt_bad);
> atomic64_set(&ao_info->counters.key_not_found, cmd.pkt_key_not_found);
> atomic64_set(&ao_info->counters.ao_required, cmd.pkt_ao_required);
> + atomic64_set(&ao_info->counters.dropped_icmp, cmd.pkt_dropped_icmp);
> }
>
> ao_info->ao_required = cmd.ao_required;
> + ao_info->accept_icmps = cmd.accept_icmps;
> if (new_current)
> WRITE_ONCE(ao_info->current_key, new_current);
> if (new_rnext)
> diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
> index 42a3be6c25a4..ae910181693d 100644
> --- a/net/ipv4/tcp_ipv4.c
> +++ b/net/ipv4/tcp_ipv4.c
> @@ -494,6 +494,8 @@ int tcp_v4_err(struct sk_buff *skb, u32 info)
> return -ENOENT;
> }
> if (sk->sk_state == TCP_TIME_WAIT) {
> + /* To increase the counter of ignored icmps for TCP-AO */
> + tcp_ao_ignore_icmp(sk, type, code);
> inet_twsk_put(inet_twsk(sk));
> return 0;
> }
> @@ -508,6 +510,9 @@ int tcp_v4_err(struct sk_buff *skb, u32 info)
> }
>
> bh_lock_sock(sk);
Do we need to hold the spinlock before calling tcp_ao_ignore_icmp() ?
> + if (tcp_ao_ignore_icmp(sk, type, code))
> + goto out;
> +
> /* If too many ICMPs get dropped on busy
> * servers this needs to be solved differently.
> * We do take care of PMTU discovery (RFC1191) special case :
> diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
> index f1804ec3bb1d..07126d9eeda9 100644
> --- a/net/ipv6/tcp_ipv6.c
> +++ b/net/ipv6/tcp_ipv6.c
> @@ -395,6 +395,8 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
> }
>
> if (sk->sk_state == TCP_TIME_WAIT) {
> + /* To increase the counter of ignored icmps for TCP-AO */
> + tcp_ao_ignore_icmp(sk, type, code);
> inet_twsk_put(inet_twsk(sk));
> return 0;
> }
> @@ -406,6 +408,8 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
> }
>
> bh_lock_sock(sk);
> + if (tcp_ao_ignore_icmp(sk, type, code))
> + goto out;
> if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
> __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
>
> --
> 2.41.0
>
On 8/8/23 14:43, Eric Dumazet wrote:
> On Wed, Aug 2, 2023 at 7:27 PM Dmitry Safonov <dima@arista.com> wrote:
[..]
>>
>> +bool tcp_ao_ignore_icmp(struct sock *sk, int type, int code)
>
> const struct sock *sk ?
Well, I can't really: atomic64_inc(&ao->counters.dropped_icmp)
>> +{
>> + bool ignore_icmp = false;
>> + struct tcp_ao_info *ao;
>> +
>> + /* RFC5925, 7.8:
>> + * >> A TCP-AO implementation MUST default to ignore incoming ICMPv4
>> + * messages of Type 3 (destination unreachable), Codes 2-4 (protocol
>> + * unreachable, port unreachable, and fragmentation needed -- ’hard
>> + * errors’), and ICMPv6 Type 1 (destination unreachable), Code 1
>> + * (administratively prohibited) and Code 4 (port unreachable) intended
>> + * for connections in synchronized states (ESTABLISHED, FIN-WAIT-1, FIN-
>> + * WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK, TIME-WAIT) that match MKTs.
>> + */
>
> I know this sounds silly, but you should read sk->sk_family once.
>
> Or risk another KCSAN report with IPV6_ADDRFORM
>
> if (sk->sk_family == AF_INET) {
> ...
> } else {
> /* AF_INET case */
> }
Oh, I didn't know about IPV6_ADDRFORM. Sure, will read it once.
>> + if (sk->sk_family == AF_INET) {
>> + if (type != ICMP_DEST_UNREACH)
>> + return false;
>> + if (code < ICMP_PROT_UNREACH || code > ICMP_FRAG_NEEDED)
>> + return false;
>> + } else if (sk->sk_family == AF_INET6) {
>> + if (type != ICMPV6_DEST_UNREACH)
>> + return false;
>> + if (code != ICMPV6_ADM_PROHIBITED && code != ICMPV6_PORT_UNREACH)
>> + return false;
>> + } else {
>
>
> No WARN_ON_ONCE(1) here please.
Ok.
[..]
>> +++ b/net/ipv4/tcp_ipv4.c
>> @@ -494,6 +494,8 @@ int tcp_v4_err(struct sk_buff *skb, u32 info)
>> return -ENOENT;
>> }
>> if (sk->sk_state == TCP_TIME_WAIT) {
>> + /* To increase the counter of ignored icmps for TCP-AO */
>> + tcp_ao_ignore_icmp(sk, type, code);
>> inet_twsk_put(inet_twsk(sk));
>> return 0;
>> }
>> @@ -508,6 +510,9 @@ int tcp_v4_err(struct sk_buff *skb, u32 info)
>> }
>>
>> bh_lock_sock(sk);
>
> Do we need to hold the spinlock before calling tcp_ao_ignore_icmp() ?
I don't think so. And I think originally I've written it out of
bh_lock_sock(), but now I can't remember which paranoid thought resulted
in moving it under the lock. Anyway, will move it out again.
>> + if (tcp_ao_ignore_icmp(sk, type, code))
>> + goto out;
>> +
>> /* If too many ICMPs get dropped on busy
>> * servers this needs to be solved differently.
>> * We do take care of PMTU discovery (RFC1191) special case :
[..]
Thanks,
Dmitry
On Thu, Aug 10, 2023 at 6:27 PM Dmitry Safonov <dima@arista.com> wrote:
>
> On 8/8/23 14:43, Eric Dumazet wrote:
> > On Wed, Aug 2, 2023 at 7:27 PM Dmitry Safonov <dima@arista.com> wrote:
> [..]
> >>
> >> +bool tcp_ao_ignore_icmp(struct sock *sk, int type, int code)
> >
> > const struct sock *sk ?
>
> Well, I can't really: atomic64_inc(&ao->counters.dropped_icmp)
I think we could, because this would still work.
struct tcp_ao_info *ao; // This is rw object
ao = rcu_dereference(tcp_sk(sk)->ao_info);
This helper looks to accept unlocked sockets, so marking them const
would avoid mistakes in the future.
On 8/10/23 17:36, Eric Dumazet wrote:
> On Thu, Aug 10, 2023 at 6:27 PM Dmitry Safonov <dima@arista.com> wrote:
>>
>> On 8/8/23 14:43, Eric Dumazet wrote:
>>> On Wed, Aug 2, 2023 at 7:27 PM Dmitry Safonov <dima@arista.com> wrote:
>> [..]
>>>>
>>>> +bool tcp_ao_ignore_icmp(struct sock *sk, int type, int code)
>>>
>>> const struct sock *sk ?
>>
>> Well, I can't really: atomic64_inc(&ao->counters.dropped_icmp)
>
> I think we could, because this would still work.
>
> struct tcp_ao_info *ao; // This is rw object
Yeah, right, had not enough coffee today.
> ao = rcu_dereference(tcp_sk(sk)->ao_info);
>
> This helper looks to accept unlocked sockets, so marking them const
> would avoid mistakes in the future.
I see, will make it `const', thanks!
@@ -24,6 +24,7 @@ struct tcp_ao_counters {
atomic64_t pkt_bad;
atomic64_t key_not_found;
atomic64_t ao_required;
+ atomic64_t dropped_icmp;
};
struct tcp_ao_key {
@@ -92,7 +93,8 @@ struct tcp_ao_info {
struct tcp_ao_key *rnext_key;
struct tcp_ao_counters counters;
u32 ao_required :1,
- __unused :31;
+ accept_icmps :1,
+ __unused :30;
__be32 lisn;
__be32 risn;
/* Sequence Number Extension (SNE) are upper 4 bytes for SEQ,
@@ -189,6 +191,7 @@ int tcp_ao_calc_traffic_key(struct tcp_ao_key *mkt, u8 *key, void *ctx,
unsigned int len, struct tcp_sigpool *hp);
void tcp_ao_destroy_sock(struct sock *sk, bool twsk);
void tcp_ao_time_wait(struct tcp_timewait_sock *tcptw, struct tcp_sock *tp);
+bool tcp_ao_ignore_icmp(struct sock *sk, int type, int code);
enum skb_drop_reason tcp_inbound_ao_hash(struct sock *sk,
const struct sk_buff *skb, unsigned short int family,
const struct request_sock *req,
@@ -264,6 +267,11 @@ static inline void tcp_ao_syncookie(struct sock *sk, const struct sk_buff *skb,
{
}
+static inline bool tcp_ao_ignore_icmp(struct sock *sk, int type, int code)
+{
+ return false;
+}
+
static inline enum skb_drop_reason tcp_inbound_ao_hash(struct sock *sk,
const struct sk_buff *skb, unsigned short int family,
const struct request_sock *req, const struct tcp_ao_hdr *aoh)
@@ -300,6 +300,7 @@ enum
LINUX_MIB_TCPAOBAD, /* TCPAOBad */
LINUX_MIB_TCPAOKEYNOTFOUND, /* TCPAOKeyNotFound */
LINUX_MIB_TCPAOGOOD, /* TCPAOGood */
+ LINUX_MIB_TCPAODROPPEDICMPS, /* TCPAODroppedIcmps */
__LINUX_MIB_MAX
};
@@ -392,7 +392,8 @@ struct tcp_ao_info_opt { /* setsockopt(TCP_AO_INFO) */
set_rnext :1, /* corresponding ::rnext */
ao_required :1, /* don't accept non-AO connects */
set_counters :1, /* set/clear ::pkt_* counters */
- reserved :28; /* must be 0 */
+ accept_icmps :1, /* accept incoming ICMPs */
+ reserved :27; /* must be 0 */
__u16 reserved2; /* padding, must be 0 */
__u8 current_key; /* KeyID to set as Current_key */
__u8 rnext; /* KeyID to set as Rnext_key */
@@ -400,6 +401,7 @@ struct tcp_ao_info_opt { /* setsockopt(TCP_AO_INFO) */
__u64 pkt_bad; /* failed verification */
__u64 pkt_key_not_found; /* could not find a key to verify */
__u64 pkt_ao_required; /* segments missing TCP-AO sign */
+ __u64 pkt_dropped_icmp; /* ICMPs that were ignored */
} __attribute__((aligned(8)));
/* setsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE, ...) */
@@ -302,6 +302,7 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TCPAOBad", LINUX_MIB_TCPAOBAD),
SNMP_MIB_ITEM("TCPAOKeyNotFound", LINUX_MIB_TCPAOKEYNOTFOUND),
SNMP_MIB_ITEM("TCPAOGood", LINUX_MIB_TCPAOGOOD),
+ SNMP_MIB_ITEM("TCPAODroppedIcmps", LINUX_MIB_TCPAODROPPEDICMPS),
SNMP_MIB_SENTINEL
};
@@ -15,6 +15,7 @@
#include <net/tcp.h>
#include <net/ipv6.h>
+#include <net/icmp.h>
int tcp_ao_calc_traffic_key(struct tcp_ao_key *mkt, u8 *key, void *ctx,
unsigned int len, struct tcp_sigpool *hp)
@@ -44,6 +45,63 @@ int tcp_ao_calc_traffic_key(struct tcp_ao_key *mkt, u8 *key, void *ctx,
return 1;
}
+bool tcp_ao_ignore_icmp(struct sock *sk, int type, int code)
+{
+ bool ignore_icmp = false;
+ struct tcp_ao_info *ao;
+
+ /* RFC5925, 7.8:
+ * >> A TCP-AO implementation MUST default to ignore incoming ICMPv4
+ * messages of Type 3 (destination unreachable), Codes 2-4 (protocol
+ * unreachable, port unreachable, and fragmentation needed -- ’hard
+ * errors’), and ICMPv6 Type 1 (destination unreachable), Code 1
+ * (administratively prohibited) and Code 4 (port unreachable) intended
+ * for connections in synchronized states (ESTABLISHED, FIN-WAIT-1, FIN-
+ * WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK, TIME-WAIT) that match MKTs.
+ */
+ if (sk->sk_family == AF_INET) {
+ if (type != ICMP_DEST_UNREACH)
+ return false;
+ if (code < ICMP_PROT_UNREACH || code > ICMP_FRAG_NEEDED)
+ return false;
+ } else if (sk->sk_family == AF_INET6) {
+ if (type != ICMPV6_DEST_UNREACH)
+ return false;
+ if (code != ICMPV6_ADM_PROHIBITED && code != ICMPV6_PORT_UNREACH)
+ return false;
+ } else {
+ WARN_ON_ONCE(1);
+ return false;
+ }
+
+ rcu_read_lock();
+ switch (sk->sk_state) {
+ case TCP_TIME_WAIT:
+ ao = rcu_dereference(tcp_twsk(sk)->ao_info);
+ break;
+ case TCP_SYN_SENT:
+ case TCP_SYN_RECV:
+ case TCP_LISTEN:
+ case TCP_NEW_SYN_RECV:
+ /* RFC5925 specifies to ignore ICMPs *only* on connections
+ * in synchronized states.
+ */
+ rcu_read_unlock();
+ return false;
+ default:
+ ao = rcu_dereference(tcp_sk(sk)->ao_info);
+ }
+
+ if (ao && !ao->accept_icmps) {
+ ignore_icmp = true;
+ __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAODROPPEDICMPS);
+ atomic64_inc(&ao->counters.dropped_icmp);
+ }
+ rcu_read_unlock();
+
+ return ignore_icmp;
+}
+
/* Optimized version of tcp_ao_do_lookup(): only for sockets for which
* it's known that the keys in ao_info are matching peer's
* family/address/VRF/etc.
@@ -1036,6 +1094,7 @@ int tcp_ao_copy_all_matching(const struct sock *sk, struct sock *newsk,
new_ao->lisn = htonl(tcp_rsk(req)->snt_isn);
new_ao->risn = htonl(tcp_rsk(req)->rcv_isn);
new_ao->ao_required = ao->ao_required;
+ new_ao->accept_icmps = ao->accept_icmps;
if (family == AF_INET) {
addr = (union tcp_ao_addr *)&newsk->sk_daddr;
@@ -1742,9 +1801,11 @@ static int tcp_ao_info_cmd(struct sock *sk, unsigned short int family,
atomic64_set(&ao_info->counters.pkt_bad, cmd.pkt_bad);
atomic64_set(&ao_info->counters.key_not_found, cmd.pkt_key_not_found);
atomic64_set(&ao_info->counters.ao_required, cmd.pkt_ao_required);
+ atomic64_set(&ao_info->counters.dropped_icmp, cmd.pkt_dropped_icmp);
}
ao_info->ao_required = cmd.ao_required;
+ ao_info->accept_icmps = cmd.accept_icmps;
if (new_current)
WRITE_ONCE(ao_info->current_key, new_current);
if (new_rnext)
@@ -494,6 +494,8 @@ int tcp_v4_err(struct sk_buff *skb, u32 info)
return -ENOENT;
}
if (sk->sk_state == TCP_TIME_WAIT) {
+ /* To increase the counter of ignored icmps for TCP-AO */
+ tcp_ao_ignore_icmp(sk, type, code);
inet_twsk_put(inet_twsk(sk));
return 0;
}
@@ -508,6 +510,9 @@ int tcp_v4_err(struct sk_buff *skb, u32 info)
}
bh_lock_sock(sk);
+ if (tcp_ao_ignore_icmp(sk, type, code))
+ goto out;
+
/* If too many ICMPs get dropped on busy
* servers this needs to be solved differently.
* We do take care of PMTU discovery (RFC1191) special case :
@@ -395,6 +395,8 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
}
if (sk->sk_state == TCP_TIME_WAIT) {
+ /* To increase the counter of ignored icmps for TCP-AO */
+ tcp_ao_ignore_icmp(sk, type, code);
inet_twsk_put(inet_twsk(sk));
return 0;
}
@@ -406,6 +408,8 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
}
bh_lock_sock(sk);
+ if (tcp_ao_ignore_icmp(sk, type, code))
+ goto out;
if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);