[v4,net-next,7/9] net/sched: taprio: allow per-TC user input of FP adminStatus
Commit Message
This is a duplication of the FP adminStatus logic introduced for
tc-mqprio. Offloading is done through the tc_mqprio_qopt_offload
structure embedded within tc_taprio_qopt_offload. So practically, if a
device driver is written to treat the mqprio portion of taprio just like
standalone mqprio, it gets unified handling of frame preemption.
I would have reused more code with taprio, but this is mostly netlink
attribute parsing, which is hard to transform into generic code without
having something that stinks as a result. We have the same variables
with the same semantics, just different nlattr type values
(TCA_MQPRIO_TC_ENTRY=5 vs TCA_TAPRIO_ATTR_TC_ENTRY=12;
TCA_MQPRIO_TC_ENTRY_FP=2 vs TCA_TAPRIO_TC_ENTRY_FP=3, etc) and
consequently, different policies for the nest.
Every time nla_parse_nested() is called, an on-stack table "tb" of
nlattr pointers is allocated statically, up to the maximum understood
nlattr type. That array size is hardcoded as a constant, but when
transforming this into a common parsing function, it would become either
a VLA (which the Linux kernel rightfully doesn't like) or a call to the
allocator.
Having FP adminStatus in tc-taprio can be seen as addressing the 802.1Q
Annex S.3 "Scheduling and preemption used in combination, no HOLD/RELEASE"
and S.4 "Scheduling and preemption used in combination with HOLD/RELEASE"
use cases. HOLD and RELEASE events are emitted towards the underlying
MAC Merge layer when the schedule hits a Set-And-Hold-MAC or a
Set-And-Release-MAC gate operation. So within the tc-taprio UAPI space,
one can distinguish between the 2 use cases by choosing whether to use
the TC_TAPRIO_CMD_SET_AND_HOLD and TC_TAPRIO_CMD_SET_AND_RELEASE gate
operations within the schedule, or just TC_TAPRIO_CMD_SET_GATES.
A small part of the change is dedicated to refactoring the max_sdu
nlattr parsing to put all logic under the "if" that tests for presence
of that nlattr.
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Ferenc Fejes <fejes@inf.elte.hu>
Reviewed-by: Simon Horman <simon.horman@corigine.com>
---
v3->v4: none
v2->v3: none
v1->v2: slightly reword commit message
include/uapi/linux/pkt_sched.h | 1 +
net/sched/sch_taprio.c | 65 +++++++++++++++++++++++++++-------
2 files changed, 53 insertions(+), 13 deletions(-)
Comments
On Mon, Apr 3, 2023 at 6:35 AM Vladimir Oltean <vladimir.oltean@nxp.com> wrote:
>
> This is a duplication of the FP adminStatus logic introduced for
> tc-mqprio. Offloading is done through the tc_mqprio_qopt_offload
> structure embedded within tc_taprio_qopt_offload. So practically, if a
> device driver is written to treat the mqprio portion of taprio just like
> standalone mqprio, it gets unified handling of frame preemption.
>
> I would have reused more code with taprio, but this is mostly netlink
> attribute parsing, which is hard to transform into generic code without
> having something that stinks as a result. We have the same variables
> with the same semantics, just different nlattr type values
> (TCA_MQPRIO_TC_ENTRY=5 vs TCA_TAPRIO_ATTR_TC_ENTRY=12;
> TCA_MQPRIO_TC_ENTRY_FP=2 vs TCA_TAPRIO_TC_ENTRY_FP=3, etc) and
> consequently, different policies for the nest.
>
> Every time nla_parse_nested() is called, an on-stack table "tb" of
> nlattr pointers is allocated statically, up to the maximum understood
> nlattr type. That array size is hardcoded as a constant, but when
> transforming this into a common parsing function, it would become either
> a VLA (which the Linux kernel rightfully doesn't like) or a call to the
> allocator.
>
> Having FP adminStatus in tc-taprio can be seen as addressing the 802.1Q
> Annex S.3 "Scheduling and preemption used in combination, no HOLD/RELEASE"
> and S.4 "Scheduling and preemption used in combination with HOLD/RELEASE"
> use cases. HOLD and RELEASE events are emitted towards the underlying
> MAC Merge layer when the schedule hits a Set-And-Hold-MAC or a
> Set-And-Release-MAC gate operation. So within the tc-taprio UAPI space,
> one can distinguish between the 2 use cases by choosing whether to use
> the TC_TAPRIO_CMD_SET_AND_HOLD and TC_TAPRIO_CMD_SET_AND_RELEASE gate
> operations within the schedule, or just TC_TAPRIO_CMD_SET_GATES.
>
> A small part of the change is dedicated to refactoring the max_sdu
> nlattr parsing to put all logic under the "if" that tests for presence
> of that nlattr.
>
> Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
> Reviewed-by: Ferenc Fejes <fejes@inf.elte.hu>
> Reviewed-by: Simon Horman <simon.horman@corigine.com>
> ---
> v3->v4: none
> v2->v3: none
> v1->v2: slightly reword commit message
>
> include/uapi/linux/pkt_sched.h | 1 +
> net/sched/sch_taprio.c | 65 +++++++++++++++++++++++++++-------
> 2 files changed, 53 insertions(+), 13 deletions(-)
>
> diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
> index b8d29be91b62..51a7addc56c6 100644
> --- a/include/uapi/linux/pkt_sched.h
> +++ b/include/uapi/linux/pkt_sched.h
> @@ -1252,6 +1252,7 @@ enum {
> TCA_TAPRIO_TC_ENTRY_UNSPEC,
> TCA_TAPRIO_TC_ENTRY_INDEX, /* u32 */
> TCA_TAPRIO_TC_ENTRY_MAX_SDU, /* u32 */
> + TCA_TAPRIO_TC_ENTRY_FP, /* u32 */
>
> /* add new constants above here */
> __TCA_TAPRIO_TC_ENTRY_CNT,
> diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
> index cbad43019172..76db9a10ef50 100644
> --- a/net/sched/sch_taprio.c
> +++ b/net/sched/sch_taprio.c
> @@ -7,6 +7,7 @@
> */
>
> #include <linux/ethtool.h>
> +#include <linux/ethtool_netlink.h>
> #include <linux/types.h>
> #include <linux/slab.h>
> #include <linux/kernel.h>
> @@ -96,6 +97,7 @@ struct taprio_sched {
> struct list_head taprio_list;
> int cur_txq[TC_MAX_QUEUE];
> u32 max_sdu[TC_MAX_QUEUE]; /* save info from the user */
> + u32 fp[TC_QOPT_MAX_QUEUE]; /* only for dump and offloading */
> u32 txtime_delay;
> };
>
> @@ -1002,6 +1004,9 @@ static const struct nla_policy entry_policy[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = {
> static const struct nla_policy taprio_tc_policy[TCA_TAPRIO_TC_ENTRY_MAX + 1] = {
> [TCA_TAPRIO_TC_ENTRY_INDEX] = { .type = NLA_U32 },
> [TCA_TAPRIO_TC_ENTRY_MAX_SDU] = { .type = NLA_U32 },
> + [TCA_TAPRIO_TC_ENTRY_FP] = NLA_POLICY_RANGE(NLA_U32,
> + TC_FP_EXPRESS,
> + TC_FP_PREEMPTIBLE),
Same comment applies as in patch 6 here..
cheers,
jamal
>
> static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
> @@ -1524,6 +1529,7 @@ static int taprio_enable_offload(struct net_device *dev,
> mqprio_qopt_reconstruct(dev, &offload->mqprio.qopt);
> offload->mqprio.extack = extack;
> taprio_sched_to_offload(dev, sched, offload, &caps);
> + mqprio_fp_to_offload(q->fp, &offload->mqprio);
>
> for (tc = 0; tc < TC_MAX_QUEUE; tc++)
> offload->max_sdu[tc] = q->max_sdu[tc];
> @@ -1671,13 +1677,14 @@ static int taprio_parse_clockid(struct Qdisc *sch, struct nlattr **tb,
> static int taprio_parse_tc_entry(struct Qdisc *sch,
> struct nlattr *opt,
> u32 max_sdu[TC_QOPT_MAX_QUEUE],
> + u32 fp[TC_QOPT_MAX_QUEUE],
> unsigned long *seen_tcs,
> struct netlink_ext_ack *extack)
> {
> struct nlattr *tb[TCA_TAPRIO_TC_ENTRY_MAX + 1] = { };
> struct net_device *dev = qdisc_dev(sch);
> - u32 val = 0;
> int err, tc;
> + u32 val;
>
> err = nla_parse_nested(tb, TCA_TAPRIO_TC_ENTRY_MAX, opt,
> taprio_tc_policy, extack);
> @@ -1702,15 +1709,18 @@ static int taprio_parse_tc_entry(struct Qdisc *sch,
>
> *seen_tcs |= BIT(tc);
>
> - if (tb[TCA_TAPRIO_TC_ENTRY_MAX_SDU])
> + if (tb[TCA_TAPRIO_TC_ENTRY_MAX_SDU]) {
> val = nla_get_u32(tb[TCA_TAPRIO_TC_ENTRY_MAX_SDU]);
> + if (val > dev->max_mtu) {
> + NL_SET_ERR_MSG_MOD(extack, "TC max SDU exceeds device max MTU");
> + return -ERANGE;
> + }
>
> - if (val > dev->max_mtu) {
> - NL_SET_ERR_MSG_MOD(extack, "TC max SDU exceeds device max MTU");
> - return -ERANGE;
> + max_sdu[tc] = val;
> }
>
> - max_sdu[tc] = val;
> + if (tb[TCA_TAPRIO_TC_ENTRY_FP])
> + fp[tc] = nla_get_u32(tb[TCA_TAPRIO_TC_ENTRY_FP]);
>
> return 0;
> }
> @@ -1720,29 +1730,51 @@ static int taprio_parse_tc_entries(struct Qdisc *sch,
> struct netlink_ext_ack *extack)
> {
> struct taprio_sched *q = qdisc_priv(sch);
> + struct net_device *dev = qdisc_dev(sch);
> u32 max_sdu[TC_QOPT_MAX_QUEUE];
> + bool have_preemption = false;
> unsigned long seen_tcs = 0;
> + u32 fp[TC_QOPT_MAX_QUEUE];
> struct nlattr *n;
> int tc, rem;
> int err = 0;
>
> - for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++)
> + for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++) {
> max_sdu[tc] = q->max_sdu[tc];
> + fp[tc] = q->fp[tc];
> + }
>
> nla_for_each_nested(n, opt, rem) {
> if (nla_type(n) != TCA_TAPRIO_ATTR_TC_ENTRY)
> continue;
>
> - err = taprio_parse_tc_entry(sch, n, max_sdu, &seen_tcs,
> + err = taprio_parse_tc_entry(sch, n, max_sdu, fp, &seen_tcs,
> extack);
> if (err)
> - goto out;
> + return err;
> }
>
> - for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++)
> + for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++) {
> q->max_sdu[tc] = max_sdu[tc];
> + q->fp[tc] = fp[tc];
> + if (fp[tc] != TC_FP_EXPRESS)
> + have_preemption = true;
> + }
> +
> + if (have_preemption) {
> + if (!FULL_OFFLOAD_IS_ENABLED(q->flags)) {
> + NL_SET_ERR_MSG(extack,
> + "Preemption only supported with full offload");
> + return -EOPNOTSUPP;
> + }
> +
> + if (!ethtool_dev_mm_supported(dev)) {
> + NL_SET_ERR_MSG(extack,
> + "Device does not support preemption");
> + return -EOPNOTSUPP;
> + }
> + }
>
> -out:
> return err;
> }
>
> @@ -2023,7 +2055,7 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
> {
> struct taprio_sched *q = qdisc_priv(sch);
> struct net_device *dev = qdisc_dev(sch);
> - int i;
> + int i, tc;
>
> spin_lock_init(&q->current_entry_lock);
>
> @@ -2080,6 +2112,9 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
> q->qdiscs[i] = qdisc;
> }
>
> + for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++)
> + q->fp[tc] = TC_FP_EXPRESS;
> +
> taprio_detect_broken_mqprio(q);
>
> return taprio_change(sch, opt, extack);
> @@ -2223,6 +2258,7 @@ static int dump_schedule(struct sk_buff *msg,
> }
>
> static int taprio_dump_tc_entries(struct sk_buff *skb,
> + struct taprio_sched *q,
> struct sched_gate_list *sched)
> {
> struct nlattr *n;
> @@ -2240,6 +2276,9 @@ static int taprio_dump_tc_entries(struct sk_buff *skb,
> sched->max_sdu[tc]))
> goto nla_put_failure;
>
> + if (nla_put_u32(skb, TCA_TAPRIO_TC_ENTRY_FP, q->fp[tc]))
> + goto nla_put_failure;
> +
> nla_nest_end(skb, n);
> }
>
> @@ -2281,7 +2320,7 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb)
> nla_put_u32(skb, TCA_TAPRIO_ATTR_TXTIME_DELAY, q->txtime_delay))
> goto options_error;
>
> - if (oper && taprio_dump_tc_entries(skb, oper))
> + if (oper && taprio_dump_tc_entries(skb, q, oper))
> goto options_error;
>
> if (oper && dump_schedule(skb, oper))
> --
> 2.34.1
>
@@ -1252,6 +1252,7 @@ enum {
TCA_TAPRIO_TC_ENTRY_UNSPEC,
TCA_TAPRIO_TC_ENTRY_INDEX, /* u32 */
TCA_TAPRIO_TC_ENTRY_MAX_SDU, /* u32 */
+ TCA_TAPRIO_TC_ENTRY_FP, /* u32 */
/* add new constants above here */
__TCA_TAPRIO_TC_ENTRY_CNT,
@@ -7,6 +7,7 @@
*/
#include <linux/ethtool.h>
+#include <linux/ethtool_netlink.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/kernel.h>
@@ -96,6 +97,7 @@ struct taprio_sched {
struct list_head taprio_list;
int cur_txq[TC_MAX_QUEUE];
u32 max_sdu[TC_MAX_QUEUE]; /* save info from the user */
+ u32 fp[TC_QOPT_MAX_QUEUE]; /* only for dump and offloading */
u32 txtime_delay;
};
@@ -1002,6 +1004,9 @@ static const struct nla_policy entry_policy[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = {
static const struct nla_policy taprio_tc_policy[TCA_TAPRIO_TC_ENTRY_MAX + 1] = {
[TCA_TAPRIO_TC_ENTRY_INDEX] = { .type = NLA_U32 },
[TCA_TAPRIO_TC_ENTRY_MAX_SDU] = { .type = NLA_U32 },
+ [TCA_TAPRIO_TC_ENTRY_FP] = NLA_POLICY_RANGE(NLA_U32,
+ TC_FP_EXPRESS,
+ TC_FP_PREEMPTIBLE),
};
static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
@@ -1524,6 +1529,7 @@ static int taprio_enable_offload(struct net_device *dev,
mqprio_qopt_reconstruct(dev, &offload->mqprio.qopt);
offload->mqprio.extack = extack;
taprio_sched_to_offload(dev, sched, offload, &caps);
+ mqprio_fp_to_offload(q->fp, &offload->mqprio);
for (tc = 0; tc < TC_MAX_QUEUE; tc++)
offload->max_sdu[tc] = q->max_sdu[tc];
@@ -1671,13 +1677,14 @@ static int taprio_parse_clockid(struct Qdisc *sch, struct nlattr **tb,
static int taprio_parse_tc_entry(struct Qdisc *sch,
struct nlattr *opt,
u32 max_sdu[TC_QOPT_MAX_QUEUE],
+ u32 fp[TC_QOPT_MAX_QUEUE],
unsigned long *seen_tcs,
struct netlink_ext_ack *extack)
{
struct nlattr *tb[TCA_TAPRIO_TC_ENTRY_MAX + 1] = { };
struct net_device *dev = qdisc_dev(sch);
- u32 val = 0;
int err, tc;
+ u32 val;
err = nla_parse_nested(tb, TCA_TAPRIO_TC_ENTRY_MAX, opt,
taprio_tc_policy, extack);
@@ -1702,15 +1709,18 @@ static int taprio_parse_tc_entry(struct Qdisc *sch,
*seen_tcs |= BIT(tc);
- if (tb[TCA_TAPRIO_TC_ENTRY_MAX_SDU])
+ if (tb[TCA_TAPRIO_TC_ENTRY_MAX_SDU]) {
val = nla_get_u32(tb[TCA_TAPRIO_TC_ENTRY_MAX_SDU]);
+ if (val > dev->max_mtu) {
+ NL_SET_ERR_MSG_MOD(extack, "TC max SDU exceeds device max MTU");
+ return -ERANGE;
+ }
- if (val > dev->max_mtu) {
- NL_SET_ERR_MSG_MOD(extack, "TC max SDU exceeds device max MTU");
- return -ERANGE;
+ max_sdu[tc] = val;
}
- max_sdu[tc] = val;
+ if (tb[TCA_TAPRIO_TC_ENTRY_FP])
+ fp[tc] = nla_get_u32(tb[TCA_TAPRIO_TC_ENTRY_FP]);
return 0;
}
@@ -1720,29 +1730,51 @@ static int taprio_parse_tc_entries(struct Qdisc *sch,
struct netlink_ext_ack *extack)
{
struct taprio_sched *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
u32 max_sdu[TC_QOPT_MAX_QUEUE];
+ bool have_preemption = false;
unsigned long seen_tcs = 0;
+ u32 fp[TC_QOPT_MAX_QUEUE];
struct nlattr *n;
int tc, rem;
int err = 0;
- for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++)
+ for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++) {
max_sdu[tc] = q->max_sdu[tc];
+ fp[tc] = q->fp[tc];
+ }
nla_for_each_nested(n, opt, rem) {
if (nla_type(n) != TCA_TAPRIO_ATTR_TC_ENTRY)
continue;
- err = taprio_parse_tc_entry(sch, n, max_sdu, &seen_tcs,
+ err = taprio_parse_tc_entry(sch, n, max_sdu, fp, &seen_tcs,
extack);
if (err)
- goto out;
+ return err;
}
- for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++)
+ for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++) {
q->max_sdu[tc] = max_sdu[tc];
+ q->fp[tc] = fp[tc];
+ if (fp[tc] != TC_FP_EXPRESS)
+ have_preemption = true;
+ }
+
+ if (have_preemption) {
+ if (!FULL_OFFLOAD_IS_ENABLED(q->flags)) {
+ NL_SET_ERR_MSG(extack,
+ "Preemption only supported with full offload");
+ return -EOPNOTSUPP;
+ }
+
+ if (!ethtool_dev_mm_supported(dev)) {
+ NL_SET_ERR_MSG(extack,
+ "Device does not support preemption");
+ return -EOPNOTSUPP;
+ }
+ }
-out:
return err;
}
@@ -2023,7 +2055,7 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
{
struct taprio_sched *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
- int i;
+ int i, tc;
spin_lock_init(&q->current_entry_lock);
@@ -2080,6 +2112,9 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
q->qdiscs[i] = qdisc;
}
+ for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++)
+ q->fp[tc] = TC_FP_EXPRESS;
+
taprio_detect_broken_mqprio(q);
return taprio_change(sch, opt, extack);
@@ -2223,6 +2258,7 @@ static int dump_schedule(struct sk_buff *msg,
}
static int taprio_dump_tc_entries(struct sk_buff *skb,
+ struct taprio_sched *q,
struct sched_gate_list *sched)
{
struct nlattr *n;
@@ -2240,6 +2276,9 @@ static int taprio_dump_tc_entries(struct sk_buff *skb,
sched->max_sdu[tc]))
goto nla_put_failure;
+ if (nla_put_u32(skb, TCA_TAPRIO_TC_ENTRY_FP, q->fp[tc]))
+ goto nla_put_failure;
+
nla_nest_end(skb, n);
}
@@ -2281,7 +2320,7 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb)
nla_put_u32(skb, TCA_TAPRIO_ATTR_TXTIME_DELAY, q->txtime_delay))
goto options_error;
- if (oper && taprio_dump_tc_entries(skb, oper))
+ if (oper && taprio_dump_tc_entries(skb, q, oper))
goto options_error;
if (oper && dump_schedule(skb, oper))