[v2,tracing,3/6] tracing: support IPv6 filter predicates

Message ID 1682696089-27937-4-git-send-email-alan.maguire@oracle.com
State New
Headers
Series tracing: additional filter predicates |

Commit Message

Alan Maguire April 28, 2023, 3:34 p.m. UTC
  Support '==' and '!=' predicates for IPv6 addresses;
for example

 cd /sys/kernel/debug/tracing/events/tcp/tcp_receive_reset
 echo "saddr_v6 == ::1" > filter

 or equivalently

 echo "saddr_v6 == 0:0:0:0:0:0:0:1" > filter

Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
---
 kernel/trace/trace_events_filter.c | 73 ++++++++++++++++++++++++++++++
 1 file changed, 73 insertions(+)
  

Comments

Steven Rostedt June 9, 2023, 9:12 p.m. UTC | #1
BTW, the subjects for the tracing subsystem should always start with a
capital letter.

  "tracing: Support IPv6 filter predicates"

But that's not why I'm replying here.

On Fri, 28 Apr 2023 16:34:46 +0100
Alan Maguire <alan.maguire@oracle.com> wrote:

> Support '==' and '!=' predicates for IPv6 addresses;
> for example
> 
>  cd /sys/kernel/debug/tracing/events/tcp/tcp_receive_reset
>  echo "saddr_v6 == ::1" > filter
> 
>  or equivalently
> 
>  echo "saddr_v6 == 0:0:0:0:0:0:0:1" > filter
> 
> Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
> ---
>  kernel/trace/trace_events_filter.c | 73 ++++++++++++++++++++++++++++++
>  1 file changed, 73 insertions(+)
> 
> diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
> index d8e08d3c3594..e2521574f3c4 100644
> --- a/kernel/trace/trace_events_filter.c
> +++ b/kernel/trace/trace_events_filter.c
> @@ -1665,6 +1665,79 @@ static int parse_pred(const char *str, void *data,
>  		if (pred->op == OP_NE)
>  			pred->not = 1;
>  
> +	} else if (field->size == 16 &&
> +		   (str[i] == ':' ||
> +		    (isalnum(str[i]) && tolower(str[i + 1]) != 'x'))) {
> +		u8 j, gap_size, gap = 0, gap_count = 0, index = 0;
> +		u16 tmp_v6addr[8] = {};
> +		u16 v6addr[8] = {};
> +
> +		/* For IPv6 addresses, only '==' or '!=' are supported. */
> +		if (pred->op != OP_EQ && pred->op != OP_NE) {
> +			parse_error(pe, FILT_ERR_ILLEGAL_FIELD_OP, pos + i);
> +			goto err_free;
> +		}
> +		/* Store the u16s in the address string consecutively in
> +		 * tmp_v6addr while tracking the presence of a "::" (if any)
> +		 * in the IPv6 address string; we will use its location
> +		 * to determine how many u16s it represents (the gap_size
> +		 * below).  Only one "::" is allowed in an IPv6 address
> +		 * string.
> +		 */
> +		while (isalnum(str[i]) || str[i] == ':') {
> +			switch (str[i]) {
> +			case ':':
> +				i++;
> +				/* mark "::" index by setting gap */
> +				if (str[i] == ':') {
> +					gap = index;
> +					gap_count++;
> +					i++;
> +				}
> +				if (gap_count > 1) {
> +					parse_error(pe, FILT_ERR_ILLEGAL_FIELD_OP,
> +						    pos + s);
> +					goto err_free;
> +				}
> +				break;
> +			default:
> +				if (sscanf(&str[i], "%hx", &tmp_v6addr[index]) != 1) {
> +					parse_error(pe, FILT_ERR_ILLEGAL_FIELD_OP,
> +						    pos + s);
> +					goto err_free;
> +				}
> +				index++;
> +				while (isalnum(str[i]))
> +					i++;
> +				break;
> +			}
> +		}

There appears to be no limit to the above loop. I panic'd my machine with:

 # echo 'saddr_v6 == 0123:4567:89ab:cdef:0123:4567:89ab:cdef:0123:4567:89ab:cdef:0123:4567:89ab:cdef:0123:4567:89ab:cdef:0123:4567:89ab:cdef:0123:4567:89ab:cdef:0123:4567:89ab:cdef' > /sys/kernel/tracing/events/sock/inet_sk_error_report/filter

-- Steve

> +		/* The gap_size here represents the number of u16s the "::"
> +		 * represents; for ::1 the gap size is 7, for feed::face
> +		 * it is 6, etc.
> +		 */
> +		gap_size = 8 - index;
> +		index = 0;
> +		for (j = 0; j < 8; ) {
> +			if (gap_size > 0 && j == gap) {
> +				j += gap_size;
> +			} else {
> +#ifdef __BIG_ENDIAN
> +				v6addr[j++] = tmp_v6addr[index];
> +#else
> +				v6addr[j++] = ((tmp_v6addr[index] & 0xff) << 8) +
> +					      ((tmp_v6addr[index] & 0xff00) >> 8);
> +#endif
> +				index++;
> +			}
> +		}
> +		pred_val = kzalloc(field->size, GFP_KERNEL);
> +		memcpy(pred_val, v6addr, field->size);
> +		pred->val = (u64)pred_val;
> +		pred->fn_num = FILTER_PRED_FN_MEMCMP;
> +		if (pred->op == OP_NE)
> +			pred->not = 1;
> +
>  	} else if (str[i] == '0' && tolower(str[i + 1]) == 'x' &&
>  		   field->size > 8) {
>  		/* For sizes > 8 bytes, we store hex bytes for comparison;
  
Steven Rostedt June 9, 2023, 9:15 p.m. UTC | #2
On Fri, 9 Jun 2023 17:12:27 -0400
Steven Rostedt <rostedt@goodmis.org> wrote:

> > +		while (isalnum(str[i]) || str[i] == ':') {
> > +			switch (str[i]) {
> > +			case ':':
> > +				i++;
> > +				/* mark "::" index by setting gap */
> > +				if (str[i] == ':') {
> > +					gap = index;
> > +					gap_count++;
> > +					i++;
> > +				}
> > +				if (gap_count > 1) {
> > +					parse_error(pe, FILT_ERR_ILLEGAL_FIELD_OP,
> > +						    pos + s);
> > +					goto err_free;
> > +				}
> > +				break;
> > +			default:
> > +				if (sscanf(&str[i], "%hx", &tmp_v6addr[index]) != 1) {
> > +					parse_error(pe, FILT_ERR_ILLEGAL_FIELD_OP,
> > +						    pos + s);
> > +					goto err_free;
> > +				}
> > +				index++;
> > +				while (isalnum(str[i]))
> > +					i++;
> > +				break;

There should also be a lot more checks here where the input coming in is
correct. It also accepted:

 "123456789abcdef0" as "def0", where I expected it to fail.

-- Steve


> > +			}
> > +		}  
> 
> There appears to be no limit to the above loop. I panic'd my machine with:
> 
>  # echo 'saddr_v6 == 0123:4567:89ab:cdef:0123:4567:89ab:cdef:0123:4567:89ab:cdef:0123:4567:89ab:cdef:0123:4567:89ab:cdef:0123:4567:89ab:cdef:0123:4567:89ab:cdef:0123:4567:89ab:cdef' > /sys/kernel/tracing/events/sock/inet_sk_error_report/filter
> 
> -- Steve
> 
> > +		/* The gap_size here represents the number of u16s the "::"
> > +		 * represents; for ::1 the gap size is 7, for feed::face
> > +		 * it is 6, etc.
> > +		 */
> > +		gap_size = 8 - index;
> > +		index = 0;
> > +		for (j = 0; j < 8; ) {
> > +			if (gap_size > 0 && j == gap) {
> > +				j += gap_size;
> > +			} else {
> > +#ifdef __BIG_ENDIAN
> > +				v6addr[j++] = tmp_v6addr[index];
> > +#else
> > +				v6addr[j++] = ((tmp_v6addr[index] & 0xff) << 8) +
> > +					      ((tmp_v6addr[index] & 0xff00) >> 8);
> > +#endif
> > +				index++;
> > +			}
> > +		}
> > +		pred_val = kzalloc(field->size, GFP_KERNEL);
> > +		memcpy(pred_val, v6addr, field->size);
> > +		pred->val = (u64)pred_val;
> > +		pred->fn_num = FILTER_PRED_FN_MEMCMP;
> > +		if (pred->op == OP_NE)
> > +			pred->not = 1;
> > +
> >  	} else if (str[i] == '0' && tolower(str[i + 1]) == 'x' &&
> >  		   field->size > 8) {
> >  		/* For sizes > 8 bytes, we store hex bytes for comparison;
  

Patch

diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index d8e08d3c3594..e2521574f3c4 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -1665,6 +1665,79 @@  static int parse_pred(const char *str, void *data,
 		if (pred->op == OP_NE)
 			pred->not = 1;
 
+	} else if (field->size == 16 &&
+		   (str[i] == ':' ||
+		    (isalnum(str[i]) && tolower(str[i + 1]) != 'x'))) {
+		u8 j, gap_size, gap = 0, gap_count = 0, index = 0;
+		u16 tmp_v6addr[8] = {};
+		u16 v6addr[8] = {};
+
+		/* For IPv6 addresses, only '==' or '!=' are supported. */
+		if (pred->op != OP_EQ && pred->op != OP_NE) {
+			parse_error(pe, FILT_ERR_ILLEGAL_FIELD_OP, pos + i);
+			goto err_free;
+		}
+		/* Store the u16s in the address string consecutively in
+		 * tmp_v6addr while tracking the presence of a "::" (if any)
+		 * in the IPv6 address string; we will use its location
+		 * to determine how many u16s it represents (the gap_size
+		 * below).  Only one "::" is allowed in an IPv6 address
+		 * string.
+		 */
+		while (isalnum(str[i]) || str[i] == ':') {
+			switch (str[i]) {
+			case ':':
+				i++;
+				/* mark "::" index by setting gap */
+				if (str[i] == ':') {
+					gap = index;
+					gap_count++;
+					i++;
+				}
+				if (gap_count > 1) {
+					parse_error(pe, FILT_ERR_ILLEGAL_FIELD_OP,
+						    pos + s);
+					goto err_free;
+				}
+				break;
+			default:
+				if (sscanf(&str[i], "%hx", &tmp_v6addr[index]) != 1) {
+					parse_error(pe, FILT_ERR_ILLEGAL_FIELD_OP,
+						    pos + s);
+					goto err_free;
+				}
+				index++;
+				while (isalnum(str[i]))
+					i++;
+				break;
+			}
+		}
+		/* The gap_size here represents the number of u16s the "::"
+		 * represents; for ::1 the gap size is 7, for feed::face
+		 * it is 6, etc.
+		 */
+		gap_size = 8 - index;
+		index = 0;
+		for (j = 0; j < 8; ) {
+			if (gap_size > 0 && j == gap) {
+				j += gap_size;
+			} else {
+#ifdef __BIG_ENDIAN
+				v6addr[j++] = tmp_v6addr[index];
+#else
+				v6addr[j++] = ((tmp_v6addr[index] & 0xff) << 8) +
+					      ((tmp_v6addr[index] & 0xff00) >> 8);
+#endif
+				index++;
+			}
+		}
+		pred_val = kzalloc(field->size, GFP_KERNEL);
+		memcpy(pred_val, v6addr, field->size);
+		pred->val = (u64)pred_val;
+		pred->fn_num = FILTER_PRED_FN_MEMCMP;
+		if (pred->op == OP_NE)
+			pred->not = 1;
+
 	} else if (str[i] == '0' && tolower(str[i + 1]) == 'x' &&
 		   field->size > 8) {
 		/* For sizes > 8 bytes, we store hex bytes for comparison;