[net-next,3/3] selftests/net: fix GRO coalesce test and add ext
Commit Message
Currently there is no test which checks that IPv6 extension header packets
successfully coalesce. This commit adds a test, which verifies two IPv6
packets with HBH extension headers do coalesce.
I changed the receive socket filter to accept a packet with one extension
header. This change exposed a bug in the fragment test -- the old BPF did
not accept the fragment packet. I updated correct_num_packets in the
fragment test accordingly.
Signed-off-by: Richard Gobert <richardbgobert@gmail.com>
---
tools/testing/selftests/net/gro.c | 78 ++++++++++++++++++++++++++++---
1 file changed, 71 insertions(+), 7 deletions(-)
Comments
Hi Richard,
kernel test robot noticed the following build warnings:
[auto build test WARNING on net-next/main]
url: https://github.com/intel-lab-lkp/linux/commits/Richard-Gobert/net-gso-add-HBH-extension-header-offload-support/20231222-172059
base: net-next/main
patch link: https://lore.kernel.org/r/641157c0-f224-4910-874d-7906a48d914b%40gmail.com
patch subject: [PATCH net-next 3/3] selftests/net: fix GRO coalesce test and add ext
compiler: gcc-12 (Debian 12.2.0-14) 12.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231223/202312231344.sWSs5PIk-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202312231344.sWSs5PIk-lkp@intel.com/
All warnings (new ones prefixed by >>):
gro.c: In function 'add_ipv6_exthdr':
>> gro.c:600:13: warning: variable 'opt_len' set but not used [-Wunused-but-set-variable]
600 | int opt_len;
| ^~~~~~~
Richard Gobert wrote:
> Currently there is no test which checks that IPv6 extension header packets
> successfully coalesce. This commit adds a test, which verifies two IPv6
> packets with HBH extension headers do coalesce.
>
> I changed the receive socket filter to accept a packet with one extension
> header. This change exposed a bug in the fragment test -- the old BPF did
> not accept the fragment packet. I updated correct_num_packets in the
> fragment test accordingly.
>
> Signed-off-by: Richard Gobert <richardbgobert@gmail.com>
Thanks for adding test coverage along with the feature, and as part
of the existing gro test too.
> ---
> tools/testing/selftests/net/gro.c | 78 ++++++++++++++++++++++++++++---
> 1 file changed, 71 insertions(+), 7 deletions(-)
>
> diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/gro.c
> index 30024d0ed373..4ee34dca8e5f 100644
> --- a/tools/testing/selftests/net/gro.c
> +++ b/tools/testing/selftests/net/gro.c
> @@ -71,6 +71,10 @@
> #define MAX_PAYLOAD (IP_MAXPACKET - sizeof(struct tcphdr) - sizeof(struct ipv6hdr))
> #define NUM_LARGE_PKT (MAX_PAYLOAD / MSS)
> #define MAX_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
> +#define MIN_EXTHDR_SIZE 8 /* minimum size of IPv6 extension header */
> +
> +#define ipv6_optlen(p) (((p)->hdrlen+1) << 3) /* calculate IPv6 extension header len */
> +
>
> static const char *addr6_src = "fdaa::2";
> static const char *addr6_dst = "fdaa::1";
> @@ -104,7 +108,7 @@ static void setup_sock_filter(int fd)
> const int dport_off = tcp_offset + offsetof(struct tcphdr, dest);
> const int ethproto_off = offsetof(struct ethhdr, h_proto);
> int optlen = 0;
> - int ipproto_off;
> + int ipproto_off, opt_ipproto_off;
> int next_off;
>
> if (proto == PF_INET)
> @@ -116,14 +120,27 @@ static void setup_sock_filter(int fd)
> if (strcmp(testname, "ip") == 0) {
> if (proto == PF_INET)
> optlen = sizeof(struct ip_timestamp);
> - else
> - optlen = sizeof(struct ip6_frag);
> + else {
> + // same size for HBH and Fragment extension header types
no C++ style comments
Also, instead of comments here and at the MIN_EXTHDR_SIZE definition,
perhaps cleaner as self documenting code:
BUILD_BUG_ON(MIN_EXTHDR_SIZE != sizeof(struct ip6_hbh));
BUILD_BUG_ON(MIN_EXTHDR_SIZE != sizeof(struct ip6_dest));
BUILD_BUG_ON(MIN_EXTHDR_SIZE < sizeof(struct ip6_frag));
> + optlen = MIN_EXTHDR_SIZE;
> + opt_ipproto_off = ETH_HLEN + sizeof(struct ipv6hdr) +
> + offsetof(struct ip6_ext, ip6e_nxt);
> + }
> }
>
> + /*
> + * this filter validates the following:
> + * - packet is IPv4/IPv6 according to the running test.
> + * - packet is TCP. Also handles the case of one extension header and then TCP.
> + * - checks the packet tcp dport equals to DPORT.
> + * (also handles the case of one extension header and then TCP.)
> + */
nit: for filewide consistency: netdev style:
/* start comment
* more comment
*/
> struct sock_filter filter[] = {
> BPF_STMT(BPF_LD + BPF_H + BPF_ABS, ethproto_off),
> - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ntohs(ethhdr_proto), 0, 7),
> + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ntohs(ethhdr_proto), 0, 9),
> BPF_STMT(BPF_LD + BPF_B + BPF_ABS, ipproto_off),
> + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 2, 0),
> + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, opt_ipproto_off),
> BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 0, 5),
> BPF_STMT(BPF_LD + BPF_H + BPF_ABS, dport_off),
> BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 2, 0),
> @@ -576,6 +593,39 @@ static void add_ipv4_ts_option(void *buf, void *optpkt)
> iph->check = checksum_fold(iph, sizeof(struct iphdr) + optlen, 0);
> }
>
> +static void add_ipv6_exthdr(void *buf, void *optpkt, __u8 exthdr_type)
> +{
> + struct ipv6_opt_hdr *hbh_hdr = (struct ipv6_opt_hdr *)(optpkt + tcp_offset);
> + struct ipv6hdr *iph = (struct ipv6hdr *)(optpkt + ETH_HLEN);
> + int opt_len;
> +
> + hbh_hdr->hdrlen = 0;
> + hbh_hdr->nexthdr = IPPROTO_TCP;
> + opt_len = ipv6_optlen(hbh_hdr);
> +
> + memcpy(optpkt, buf, tcp_offset);
> + memcpy(optpkt + tcp_offset + MIN_EXTHDR_SIZE, buf + tcp_offset,
> + sizeof(struct tcphdr) + PAYLOAD_LEN);
> +
> + iph->nexthdr = exthdr_type;
> + iph->payload_len = htons(ntohs(iph->payload_len) + MIN_EXTHDR_SIZE);
> +}
> +
> +/* Send IPv6 packet with extension header */
> +static void send_ipv6_exthdr(int fd, struct sockaddr_ll *daddr)
> +{
> + static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
> + static char exthdr_pck[sizeof(buf) + MIN_EXTHDR_SIZE];
> +
> + create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
> + add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_HOPOPTS);
> + write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr);
> +
> + create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0);
> + add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_HOPOPTS);
> + write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr);
> +}
> +
> /* IPv4 options shouldn't coalesce */
> static void send_ip_options(int fd, struct sockaddr_ll *daddr)
> {
> @@ -697,7 +747,7 @@ static void send_fragment6(int fd, struct sockaddr_ll *daddr)
> create_packet(buf, PAYLOAD_LEN * i, 0, PAYLOAD_LEN, 0);
> write_packet(fd, buf, bufpkt_len, daddr);
> }
> -
> + sleep(1);
Leftover debug, or necessary fix to existing test?
> create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
> memset(extpkt, 0, extpkt_len);
>
> @@ -760,6 +810,7 @@ static void check_recv_pkts(int fd, int *correct_payload,
> vlog("}, Total %d packets\nReceived {", correct_num_pkts);
>
> while (1) {
> + ip_ext_len = 0;
> pkt_size = recv(fd, buffer, IP_MAXPACKET + ETH_HLEN + 1, 0);
> if (pkt_size < 0)
> error(1, errno, "could not receive");
> @@ -767,7 +818,7 @@ static void check_recv_pkts(int fd, int *correct_payload,
> if (iph->version == 4)
> ip_ext_len = (iph->ihl - 5) * 4;
> else if (ip6h->version == 6 && ip6h->nexthdr != IPPROTO_TCP)
> - ip_ext_len = sizeof(struct ip6_frag);
> + ip_ext_len = MIN_EXTHDR_SIZE;
struct ip6_frag is > MIN_EXTHDR_SIZE. Need both cases?
else if (ip6h->version == 6 && ip6h->nexthdr == NEXTHDR_FRAGMENT
ip_ext_len = sizeof(struct ip6_frag);
else if (ip6h->version == 6 && ip6h->nexthdr != IPPROTO_TCP)
ip_ext_len = MIN_EXTHDR_SIZE;
>
> tcph = (struct tcphdr *)(buffer + tcp_offset + ip_ext_len);
>
> @@ -880,7 +931,14 @@ static void gro_sender(void)
> sleep(1);
> write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
> } else if (proto == PF_INET6) {
> + sleep(1);
> send_fragment6(txfd, &daddr);
> + sleep(1);
> + write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
> +
> + sleep(1);
> + send_ipv6_exthdr(txfd, &daddr);
> + sleep(1);
For the casual reader: these sleeps are not leftover debug statements
unfortunately. The same exists in the PF_INET branch:
/* Modified packets may be received out of order.
* Sleep function added to enforce test boundaries
* so that fin pkts are not received prior to other pkts.
*/
> write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
> }
> } else if (strcmp(testname, "large") == 0) {
> @@ -997,7 +1055,13 @@ static void gro_receiver(void)
> */
> printf("fragmented ip6 doesn't coalesce: ");
> correct_payload[0] = PAYLOAD_LEN * 2;
> - check_recv_pkts(rxfd, correct_payload, 2);
> + correct_payload[1] = PAYLOAD_LEN;
> + correct_payload[2] = PAYLOAD_LEN;
> + check_recv_pkts(rxfd, correct_payload, 3);
> +
> + correct_payload[0] = PAYLOAD_LEN * 2;
> + printf("ipv6 with extension header DOES coalesce: ");
> + check_recv_pkts(rxfd, correct_payload, 1);
It might be worth adding a test with two different extension headers.
To verify that these should not coalesce.
Either different ipprotos, like IPPROTO_DSTOPTS vs IPPROTO_HOPOPTS.
Perhaps more intesting are two of the same ipproto but with different
content.
> }
> } else if (strcmp(testname, "large") == 0) {
> int offset = proto == PF_INET ? 20 : 0;
> --
> 2.36.1
>
Willem de Bruijn wrote:
> Richard Gobert wrote:
>> Currently there is no test which checks that IPv6 extension header packets
>> successfully coalesce. This commit adds a test, which verifies two IPv6
>> packets with HBH extension headers do coalesce.
>>
>> I changed the receive socket filter to accept a packet with one extension
>> header. This change exposed a bug in the fragment test -- the old BPF did
>> not accept the fragment packet. I updated correct_num_packets in the
>> fragment test accordingly.
>>
>> Signed-off-by: Richard Gobert <richardbgobert@gmail.com>
>
> Thanks for adding test coverage along with the feature, and as part
> of the existing gro test too.
>
>> ---
>> tools/testing/selftests/net/gro.c | 78 ++++++++++++++++++++++++++++---
>> 1 file changed, 71 insertions(+), 7 deletions(-)
>>
>> diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/gro.c
>> index 30024d0ed373..4ee34dca8e5f 100644
>> --- a/tools/testing/selftests/net/gro.c
>> +++ b/tools/testing/selftests/net/gro.c
>> @@ -71,6 +71,10 @@
>> #define MAX_PAYLOAD (IP_MAXPACKET - sizeof(struct tcphdr) - sizeof(struct ipv6hdr))
>> #define NUM_LARGE_PKT (MAX_PAYLOAD / MSS)
>> #define MAX_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
>> +#define MIN_EXTHDR_SIZE 8 /* minimum size of IPv6 extension header */
>> +
>> +#define ipv6_optlen(p) (((p)->hdrlen+1) << 3) /* calculate IPv6 extension header len */
>> +
>>
>> static const char *addr6_src = "fdaa::2";
>> static const char *addr6_dst = "fdaa::1";
>> @@ -104,7 +108,7 @@ static void setup_sock_filter(int fd)
>> const int dport_off = tcp_offset + offsetof(struct tcphdr, dest);
>> const int ethproto_off = offsetof(struct ethhdr, h_proto);
>> int optlen = 0;
>> - int ipproto_off;
>> + int ipproto_off, opt_ipproto_off;
>> int next_off;
>>
>> if (proto == PF_INET)
>> @@ -116,14 +120,27 @@ static void setup_sock_filter(int fd)
>> if (strcmp(testname, "ip") == 0) {
>> if (proto == PF_INET)
>> optlen = sizeof(struct ip_timestamp);
>> - else
>> - optlen = sizeof(struct ip6_frag);
>> + else {
>> + // same size for HBH and Fragment extension header types
>
> no C++ style comments
>
> Also, instead of comments here and at the MIN_EXTHDR_SIZE definition,
> perhaps cleaner as self documenting code:
>
> BUILD_BUG_ON(MIN_EXTHDR_SIZE != sizeof(struct ip6_hbh));
> BUILD_BUG_ON(MIN_EXTHDR_SIZE != sizeof(struct ip6_dest));
> BUILD_BUG_ON(MIN_EXTHDR_SIZE < sizeof(struct ip6_frag));
>
Thanks for the suggestion, I will add a check that the exthdr structs
are smaller than MIN_EXTHDR_SIZE bytes:
BUILD_BUG_ON(MIN_EXTHDR_SIZE < sizeof(struct ip6_hbh));
BUILD_BUG_ON(MIN_EXTHDR_SIZE < sizeof(struct ip6_dest));
BUILD_BUG_ON(MIN_EXTHDR_SIZE < sizeof(struct ip6_frag));
(The ip6_hbh and ip6_dest structs are 2 bytes long, and the ip6_frag
struct is 8 bytes long)
>> + optlen = MIN_EXTHDR_SIZE;
>> + opt_ipproto_off = ETH_HLEN + sizeof(struct ipv6hdr) +
>> + offsetof(struct ip6_ext, ip6e_nxt);
>> + }
>> }
>>
>> + /*
>> + * this filter validates the following:
>> + * - packet is IPv4/IPv6 according to the running test.
>> + * - packet is TCP. Also handles the case of one extension header and then TCP.
>> + * - checks the packet tcp dport equals to DPORT.
>> + * (also handles the case of one extension header and then TCP.)
>> + */
>
> nit: for filewide consistency: netdev style:
>
> /* start comment
> * more comment
> */
>
>> struct sock_filter filter[] = {
>> BPF_STMT(BPF_LD + BPF_H + BPF_ABS, ethproto_off),
>> - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ntohs(ethhdr_proto), 0, 7),
>> + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ntohs(ethhdr_proto), 0, 9),
>> BPF_STMT(BPF_LD + BPF_B + BPF_ABS, ipproto_off),
>> + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 2, 0),
>> + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, opt_ipproto_off),
>> BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 0, 5),
>> BPF_STMT(BPF_LD + BPF_H + BPF_ABS, dport_off),
>> BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 2, 0),
>> @@ -576,6 +593,39 @@ static void add_ipv4_ts_option(void *buf, void *optpkt)
>> iph->check = checksum_fold(iph, sizeof(struct iphdr) + optlen, 0);
>> }
>>
>> +static void add_ipv6_exthdr(void *buf, void *optpkt, __u8 exthdr_type)
>> +{
>> + struct ipv6_opt_hdr *hbh_hdr = (struct ipv6_opt_hdr *)(optpkt + tcp_offset);
>> + struct ipv6hdr *iph = (struct ipv6hdr *)(optpkt + ETH_HLEN);
>> + int opt_len;
>> +
>> + hbh_hdr->hdrlen = 0;
>> + hbh_hdr->nexthdr = IPPROTO_TCP;
>> + opt_len = ipv6_optlen(hbh_hdr);
>> +
>> + memcpy(optpkt, buf, tcp_offset);
>> + memcpy(optpkt + tcp_offset + MIN_EXTHDR_SIZE, buf + tcp_offset,
>> + sizeof(struct tcphdr) + PAYLOAD_LEN);
>> +
>> + iph->nexthdr = exthdr_type;
>> + iph->payload_len = htons(ntohs(iph->payload_len) + MIN_EXTHDR_SIZE);
>> +}
>> +
>> +/* Send IPv6 packet with extension header */
>> +static void send_ipv6_exthdr(int fd, struct sockaddr_ll *daddr)
>> +{
>> + static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
>> + static char exthdr_pck[sizeof(buf) + MIN_EXTHDR_SIZE];
>> +
>> + create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
>> + add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_HOPOPTS);
>> + write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr);
>> +
>> + create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0);
>> + add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_HOPOPTS);
>> + write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr);
>> +}
>> +
>> /* IPv4 options shouldn't coalesce */
>> static void send_ip_options(int fd, struct sockaddr_ll *daddr)
>> {
>> @@ -697,7 +747,7 @@ static void send_fragment6(int fd, struct sockaddr_ll *daddr)
>> create_packet(buf, PAYLOAD_LEN * i, 0, PAYLOAD_LEN, 0);
>> write_packet(fd, buf, bufpkt_len, daddr);
>> }
>> -
>> + sleep(1);
>
> Leftover debug, or necessary fix to existing test?
This sleep was necessary for the fragment test to consistently pass
as packets may arrive out-of-order on the receiving side.
>> create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
>> memset(extpkt, 0, extpkt_len);
>>
>> @@ -760,6 +810,7 @@ static void check_recv_pkts(int fd, int *correct_payload,
>> vlog("}, Total %d packets\nReceived {", correct_num_pkts);
>>
>> while (1) {
>> + ip_ext_len = 0;
>> pkt_size = recv(fd, buffer, IP_MAXPACKET + ETH_HLEN + 1, 0);
>> if (pkt_size < 0)
>> error(1, errno, "could not receive");
>> @@ -767,7 +818,7 @@ static void check_recv_pkts(int fd, int *correct_payload,
>> if (iph->version == 4)
>> ip_ext_len = (iph->ihl - 5) * 4;
>> else if (ip6h->version == 6 && ip6h->nexthdr != IPPROTO_TCP)
>> - ip_ext_len = sizeof(struct ip6_frag);
>> + ip_ext_len = MIN_EXTHDR_SIZE;
>
> struct ip6_frag is > MIN_EXTHDR_SIZE. Need both cases?
>
> else if (ip6h->version == 6 && ip6h->nexthdr == NEXTHDR_FRAGMENT
> ip_ext_len = sizeof(struct ip6_frag);
> else if (ip6h->version == 6 && ip6h->nexthdr != IPPROTO_TCP)
> ip_ext_len = MIN_EXTHDR_SIZE;
>
sizeof(struct ip6_frag) == MIN_EXTHDR_SIZE, and any other IPv6 exthdr
test uses an 8(=MIN_EXTHDR_SIZE) byte exthdr, as this is the minimum
size of a single IPv6 exthdr.
We can also see it by looking at the 'ipv6_optlen' macro copied from the
IPv6 exthdr parsing in GRO:
#define ipv6_optlen(p) (((p)->hdrlen+1) << 3)
When hdrlen == 0, the exthdr len will be (0 + 1) << 3 = 8.
>>
>> tcph = (struct tcphdr *)(buffer + tcp_offset + ip_ext_len);
>>
>> @@ -880,7 +931,14 @@ static void gro_sender(void)
>> sleep(1);
>> write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
>> } else if (proto == PF_INET6) {
>> + sleep(1);
>> send_fragment6(txfd, &daddr);
>> + sleep(1);
>> + write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
>> +
>> + sleep(1);
>> + send_ipv6_exthdr(txfd, &daddr);
>> + sleep(1);
>
> For the casual reader: these sleeps are not leftover debug statements
> unfortunately. The same exists in the PF_INET branch:
>
> /* Modified packets may be received out of order.
> * Sleep function added to enforce test boundaries
> * so that fin pkts are not received prior to other pkts.
> */
>
>> write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
>> }
>> } else if (strcmp(testname, "large") == 0) {
>> @@ -997,7 +1055,13 @@ static void gro_receiver(void)
>> */
>> printf("fragmented ip6 doesn't coalesce: ");
>> correct_payload[0] = PAYLOAD_LEN * 2;
>> - check_recv_pkts(rxfd, correct_payload, 2);
>> + correct_payload[1] = PAYLOAD_LEN;
>> + correct_payload[2] = PAYLOAD_LEN;
>> + check_recv_pkts(rxfd, correct_payload, 3);
>> +
>> + correct_payload[0] = PAYLOAD_LEN * 2;
>> + printf("ipv6 with extension header DOES coalesce: ");
>> + check_recv_pkts(rxfd, correct_payload, 1);
>
> It might be worth adding a test with two different extension headers.
> To verify that these should not coalesce.
>
> Either different ipprotos, like IPPROTO_DSTOPTS vs IPPROTO_HOPOPTS.
> Perhaps more intesting are two of the same ipproto but with different
> content.
>
Good idea, I will add this test in v2.
>> }
>> } else if (strcmp(testname, "large") == 0) {
>> int offset = proto == PF_INET ? 20 : 0;
>> --
>> 2.36.1
>>
>
>
@@ -71,6 +71,10 @@
#define MAX_PAYLOAD (IP_MAXPACKET - sizeof(struct tcphdr) - sizeof(struct ipv6hdr))
#define NUM_LARGE_PKT (MAX_PAYLOAD / MSS)
#define MAX_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
+#define MIN_EXTHDR_SIZE 8 /* minimum size of IPv6 extension header */
+
+#define ipv6_optlen(p) (((p)->hdrlen+1) << 3) /* calculate IPv6 extension header len */
+
static const char *addr6_src = "fdaa::2";
static const char *addr6_dst = "fdaa::1";
@@ -104,7 +108,7 @@ static void setup_sock_filter(int fd)
const int dport_off = tcp_offset + offsetof(struct tcphdr, dest);
const int ethproto_off = offsetof(struct ethhdr, h_proto);
int optlen = 0;
- int ipproto_off;
+ int ipproto_off, opt_ipproto_off;
int next_off;
if (proto == PF_INET)
@@ -116,14 +120,27 @@ static void setup_sock_filter(int fd)
if (strcmp(testname, "ip") == 0) {
if (proto == PF_INET)
optlen = sizeof(struct ip_timestamp);
- else
- optlen = sizeof(struct ip6_frag);
+ else {
+ // same size for HBH and Fragment extension header types
+ optlen = MIN_EXTHDR_SIZE;
+ opt_ipproto_off = ETH_HLEN + sizeof(struct ipv6hdr) +
+ offsetof(struct ip6_ext, ip6e_nxt);
+ }
}
+ /*
+ * this filter validates the following:
+ * - packet is IPv4/IPv6 according to the running test.
+ * - packet is TCP. Also handles the case of one extension header and then TCP.
+ * - checks the packet tcp dport equals to DPORT.
+ * (also handles the case of one extension header and then TCP.)
+ */
struct sock_filter filter[] = {
BPF_STMT(BPF_LD + BPF_H + BPF_ABS, ethproto_off),
- BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ntohs(ethhdr_proto), 0, 7),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ntohs(ethhdr_proto), 0, 9),
BPF_STMT(BPF_LD + BPF_B + BPF_ABS, ipproto_off),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 2, 0),
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, opt_ipproto_off),
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 0, 5),
BPF_STMT(BPF_LD + BPF_H + BPF_ABS, dport_off),
BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 2, 0),
@@ -576,6 +593,39 @@ static void add_ipv4_ts_option(void *buf, void *optpkt)
iph->check = checksum_fold(iph, sizeof(struct iphdr) + optlen, 0);
}
+static void add_ipv6_exthdr(void *buf, void *optpkt, __u8 exthdr_type)
+{
+ struct ipv6_opt_hdr *hbh_hdr = (struct ipv6_opt_hdr *)(optpkt + tcp_offset);
+ struct ipv6hdr *iph = (struct ipv6hdr *)(optpkt + ETH_HLEN);
+ int opt_len;
+
+ hbh_hdr->hdrlen = 0;
+ hbh_hdr->nexthdr = IPPROTO_TCP;
+ opt_len = ipv6_optlen(hbh_hdr);
+
+ memcpy(optpkt, buf, tcp_offset);
+ memcpy(optpkt + tcp_offset + MIN_EXTHDR_SIZE, buf + tcp_offset,
+ sizeof(struct tcphdr) + PAYLOAD_LEN);
+
+ iph->nexthdr = exthdr_type;
+ iph->payload_len = htons(ntohs(iph->payload_len) + MIN_EXTHDR_SIZE);
+}
+
+/* Send IPv6 packet with extension header */
+static void send_ipv6_exthdr(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ static char exthdr_pck[sizeof(buf) + MIN_EXTHDR_SIZE];
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_HOPOPTS);
+ write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0);
+ add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_HOPOPTS);
+ write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr);
+}
+
/* IPv4 options shouldn't coalesce */
static void send_ip_options(int fd, struct sockaddr_ll *daddr)
{
@@ -697,7 +747,7 @@ static void send_fragment6(int fd, struct sockaddr_ll *daddr)
create_packet(buf, PAYLOAD_LEN * i, 0, PAYLOAD_LEN, 0);
write_packet(fd, buf, bufpkt_len, daddr);
}
-
+ sleep(1);
create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
memset(extpkt, 0, extpkt_len);
@@ -760,6 +810,7 @@ static void check_recv_pkts(int fd, int *correct_payload,
vlog("}, Total %d packets\nReceived {", correct_num_pkts);
while (1) {
+ ip_ext_len = 0;
pkt_size = recv(fd, buffer, IP_MAXPACKET + ETH_HLEN + 1, 0);
if (pkt_size < 0)
error(1, errno, "could not receive");
@@ -767,7 +818,7 @@ static void check_recv_pkts(int fd, int *correct_payload,
if (iph->version == 4)
ip_ext_len = (iph->ihl - 5) * 4;
else if (ip6h->version == 6 && ip6h->nexthdr != IPPROTO_TCP)
- ip_ext_len = sizeof(struct ip6_frag);
+ ip_ext_len = MIN_EXTHDR_SIZE;
tcph = (struct tcphdr *)(buffer + tcp_offset + ip_ext_len);
@@ -880,7 +931,14 @@ static void gro_sender(void)
sleep(1);
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
} else if (proto == PF_INET6) {
+ sleep(1);
send_fragment6(txfd, &daddr);
+ sleep(1);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ sleep(1);
+ send_ipv6_exthdr(txfd, &daddr);
+ sleep(1);
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
}
} else if (strcmp(testname, "large") == 0) {
@@ -997,7 +1055,13 @@ static void gro_receiver(void)
*/
printf("fragmented ip6 doesn't coalesce: ");
correct_payload[0] = PAYLOAD_LEN * 2;
- check_recv_pkts(rxfd, correct_payload, 2);
+ correct_payload[1] = PAYLOAD_LEN;
+ correct_payload[2] = PAYLOAD_LEN;
+ check_recv_pkts(rxfd, correct_payload, 3);
+
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ printf("ipv6 with extension header DOES coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 1);
}
} else if (strcmp(testname, "large") == 0) {
int offset = proto == PF_INET ? 20 : 0;