[net-next,v2,2/2] net: dsa: microchip: add ETS Qdisc support for KSZ9477 series

Message ID 20230308091237.3483895-3-o.rempel@pengutronix.de
State New
Headers
Series net: dsa: microchip: tc-ets support |

Commit Message

Oleksij Rempel March 8, 2023, 9:12 a.m. UTC
  Add ETS Qdisc support for KSZ9477 of switches. Current implementation is
limited to strict priority mode.

Tested on KSZ8563R with following configuration:
tc qdisc replace dev lan2 root handle 1: ets strict 4 \
  priomap 3 3 2 2 1 1 0 0
ip link add link lan2 name v1 type vlan id 1 \
  egress-qos-map 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7

and patched iperf3 version:
https://github.com/esnet/iperf/pull/1476
iperf3 -c 172.17.0.1 -b100M  -l1472 -t100 -u -R --sock-prio 2

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
---
 drivers/net/dsa/microchip/ksz_common.c | 185 +++++++++++++++++++++++++
 drivers/net/dsa/microchip/ksz_common.h |  12 ++
 2 files changed, 197 insertions(+)
  

Comments

Simon Horman March 8, 2023, 4:37 p.m. UTC | #1
On Wed, Mar 08, 2023 at 10:12:37AM +0100, Oleksij Rempel wrote:
> Add ETS Qdisc support for KSZ9477 of switches. Current implementation is
> limited to strict priority mode.
> 
> Tested on KSZ8563R with following configuration:
> tc qdisc replace dev lan2 root handle 1: ets strict 4 \
>   priomap 3 3 2 2 1 1 0 0
> ip link add link lan2 name v1 type vlan id 1 \
>   egress-qos-map 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
> 
> and patched iperf3 version:
> https://github.com/esnet/iperf/pull/1476
> iperf3 -c 172.17.0.1 -b100M  -l1472 -t100 -u -R --sock-prio 2
> 
> Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>

Reviewed-by: Simon Horman <simon.horman@corigine.com>
  
Arun Ramadoss March 10, 2023, 4:17 a.m. UTC | #2
Hi Oleksij,

> +
> +#define KSZ9477_PORT_MRI_TC_MAP__4     0x0808
> +#define KSZ9477_DEFAULT_TC_MAP         0x33221100

KSZ9477 and KSZ8 switches has 4 queues per port whereas LAN937x has 8
queues. So Default TC MAP for LAN937x will be 0x76543210. Instead of
using this values for all the switch, this value must be derived based
on chip info. Can you consider to add this value in chip_info and use
it in funciton ksz_tc_ets_del based on the switch type, So that the
function will be generic.

> +
> +#define KSZ9477_PORT_TC_MAP_S          4
> +#define KSZ9477_MAX_TC_PRIO            7
> +
>  /* CBS related registers */
>  #define REG_PORT_MTI_QUEUE_INDEX__4    0x0900
> 
> @@ -670,6 +679,9 @@ static inline int is_lan937x(struct ksz_device
> *dev)
>  #define MTI_SHAPING_SRP                        1
>  #define MTI_SHAPING_TIME_AWARE         2
> 
> +#define KSZ9477_PORT_MTI_QUEUE_CTRL_1  0x0915
> +#define KSZ9477_DEFAULT_WRR_WEIGHT     1
> +
>  #define REG_PORT_MTI_HI_WATER_MARK     0x0916
>  #define REG_PORT_MTI_LO_WATER_MARK     0x0918
> 
> --
> 2.30.2
>
  

Patch

diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index ae05fe0b0a81..2f9230d3c357 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -3172,12 +3172,197 @@  static int ksz_setup_tc_cbs(struct dsa_switch *ds, int port,
 				 MTI_SHAPING_SRP);
 }
 
+static int ksz_disable_egress_rate_limit(struct ksz_device *dev, int port)
+{
+	int queue, ret;
+
+	/* Configuration will not take effect until the last Port Queue X
+	 * Egress Limit Control Register is written.
+	 */
+	for (queue = 0; queue < dev->info->num_tx_queues; queue++) {
+		ret = ksz_pwrite8(dev, port, KSZ9477_REG_PORT_OUT_RATE_0 + queue,
+				  KSZ9477_OUT_RATE_NO_LIMIT);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int ksz_ets_band_to_queue(struct tc_ets_qopt_offload_replace_params *p,
+				 int band)
+{
+	/* Compared to queues, bands prioritize packets differently. In strict
+	 * priority mode, the lowest priority is assigned to Queue 0 while the
+	 * highest priority is given to Band 0.
+	 */
+	return p->bands - 1 - band;
+}
+
+static int ksz_queue_set_strict(struct ksz_device *dev, int port, int queue)
+{
+	int ret;
+
+	ret = ksz_pwrite32(dev, port, REG_PORT_MTI_QUEUE_INDEX__4, queue);
+	if (ret)
+		return ret;
+
+	return ksz_setup_tc_mode(dev, port, MTI_SCHEDULE_STRICT_PRIO,
+				 MTI_SHAPING_OFF);
+}
+
+static int ksz_queue_set_wrr(struct ksz_device *dev, int port, int queue,
+			     int weight)
+{
+	int ret;
+
+	ret = ksz_pwrite32(dev, port, REG_PORT_MTI_QUEUE_INDEX__4, queue);
+	if (ret)
+		return ret;
+
+	ret = ksz_setup_tc_mode(dev, port, MTI_SCHEDULE_WRR,
+				MTI_SHAPING_OFF);
+	if (ret)
+		return ret;
+
+	return ksz_pwrite8(dev, port, KSZ9477_PORT_MTI_QUEUE_CTRL_1, weight);
+}
+
+static int ksz_tc_ets_add(struct ksz_device *dev, int port,
+			  struct tc_ets_qopt_offload_replace_params *p)
+{
+	int ret, band, tc_prio;
+	u32 queue_map = 0;
+
+	/* In order to ensure proper prioritization, it is necessary to set the
+	 * rate limit for the related queue to zero. Otherwise strict priority
+	 * or WRR mode will not work. This is a hardware limitation.
+	 */
+	ret = ksz_disable_egress_rate_limit(dev, port);
+	if (ret)
+		return ret;
+
+	/* Configure queue scheduling mode for all bands. Currently only strict
+	 * prio mode is supported.
+	 */
+	for (band = 0; band < p->bands; band++) {
+		int queue = ksz_ets_band_to_queue(p, band);
+
+		ret = ksz_queue_set_strict(dev, port, queue);
+		if (ret)
+			return ret;
+	}
+
+	/* Configure the mapping between traffic classes and queues. Note:
+	 * priomap variable support 16 traffic classes, but the chip can handle
+	 * only 8 classes.
+	 */
+	for (tc_prio = 0; tc_prio < ARRAY_SIZE(p->priomap); tc_prio++) {
+		int queue;
+
+		if (tc_prio > KSZ9477_MAX_TC_PRIO)
+			break;
+
+		queue = ksz_ets_band_to_queue(p, p->priomap[tc_prio]);
+		queue_map |= queue << (tc_prio * KSZ9477_PORT_TC_MAP_S);
+	}
+
+	return ksz_pwrite32(dev, port, KSZ9477_PORT_MRI_TC_MAP__4, queue_map);
+}
+
+static int ksz_tc_ets_del(struct ksz_device *dev, int port)
+{
+	int ret, queue;
+
+	/* To restore the default chip configuration, set all queues to use the
+	 * WRR scheduler with a weight of 1.
+	 */
+	for (queue = 0; queue < dev->info->num_tx_queues; queue++) {
+		ret = ksz_queue_set_wrr(dev, port, queue,
+					KSZ9477_DEFAULT_WRR_WEIGHT);
+		if (ret)
+			return ret;
+	}
+
+	/* Revert the queue mapping for TC-priority to its default setting on
+	 * the chip.
+	 */
+	return ksz_pwrite32(dev, port, KSZ9477_PORT_MRI_TC_MAP__4,
+			    KSZ9477_DEFAULT_TC_MAP);
+}
+
+static int ksz_tc_ets_validate(struct ksz_device *dev, int port,
+			       struct tc_ets_qopt_offload_replace_params *p)
+{
+	int band;
+
+	/* Since it is not feasible to share one port among multiple qdisc,
+	 * the user must configure all available queues appropriately.
+	 */
+	if (p->bands != dev->info->num_tx_queues) {
+		dev_err(dev->dev, "Not supported amount of bands. It should be %d\n",
+			dev->info->num_tx_queues);
+		return -EOPNOTSUPP;
+	}
+
+	for (band = 0; band < p->bands; ++band) {
+		/* The KSZ switches utilize a weighted round robin configuration
+		 * where a certain number of packets can be transmitted from a
+		 * queue before the next queue is serviced. For more information
+		 * on this, refer to section 5.2.8.4 of the KSZ8565R
+		 * documentation on the Port Transmit Queue Control 1 Register.
+		 * However, the current ETS Qdisc implementation (as of February
+		 * 2023) assigns a weight to each queue based on the number of
+		 * bytes or extrapolated bandwidth in percentages. Since this
+		 * differs from the KSZ switches' method and we don't want to
+		 * fake support by converting bytes to packets, it is better to
+		 * return an error instead.
+		 */
+		if (p->quanta[band]) {
+			dev_err(dev->dev, "Quanta/weights configuration is not supported.\n");
+			return -EOPNOTSUPP;
+		}
+	}
+
+	return 0;
+}
+
+static int ksz_tc_setup_qdisc_ets(struct dsa_switch *ds, int port,
+				  struct tc_ets_qopt_offload *qopt)
+{
+	struct ksz_device *dev = ds->priv;
+	int ret;
+
+	if (qopt->parent != TC_H_ROOT) {
+		dev_err(dev->dev, "Parent should be \"root\"\n");
+		return -EOPNOTSUPP;
+	}
+
+	switch (qopt->command) {
+	case TC_ETS_REPLACE:
+		ret = ksz_tc_ets_validate(dev, port, &qopt->replace_params);
+		if (ret)
+			return ret;
+
+		return ksz_tc_ets_add(dev, port, &qopt->replace_params);
+	case TC_ETS_DESTROY:
+		return ksz_tc_ets_del(dev, port);
+	case TC_ETS_STATS:
+	case TC_ETS_GRAFT:
+		return -EOPNOTSUPP;
+	}
+
+	return -EOPNOTSUPP;
+}
+
 static int ksz_setup_tc(struct dsa_switch *ds, int port,
 			enum tc_setup_type type, void *type_data)
 {
 	switch (type) {
 	case TC_SETUP_QDISC_CBS:
 		return ksz_setup_tc_cbs(ds, port, type_data);
+	case TC_SETUP_QDISC_ETS:
+		return ksz_tc_setup_qdisc_ets(ds, port, type_data);
 	default:
 		return -EOPNOTSUPP;
 	}
diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h
index f53834bbe896..7618a4714e06 100644
--- a/drivers/net/dsa/microchip/ksz_common.h
+++ b/drivers/net/dsa/microchip/ksz_common.h
@@ -657,6 +657,15 @@  static inline int is_lan937x(struct ksz_device *dev)
 #define KSZ8_LEGAL_PACKET_SIZE		1518
 #define KSZ9477_MAX_FRAME_SIZE		9000
 
+#define KSZ9477_REG_PORT_OUT_RATE_0	0x0420
+#define KSZ9477_OUT_RATE_NO_LIMIT	0
+
+#define KSZ9477_PORT_MRI_TC_MAP__4	0x0808
+#define KSZ9477_DEFAULT_TC_MAP		0x33221100
+
+#define KSZ9477_PORT_TC_MAP_S		4
+#define KSZ9477_MAX_TC_PRIO		7
+
 /* CBS related registers */
 #define REG_PORT_MTI_QUEUE_INDEX__4	0x0900
 
@@ -670,6 +679,9 @@  static inline int is_lan937x(struct ksz_device *dev)
 #define MTI_SHAPING_SRP			1
 #define MTI_SHAPING_TIME_AWARE		2
 
+#define KSZ9477_PORT_MTI_QUEUE_CTRL_1	0x0915
+#define KSZ9477_DEFAULT_WRR_WEIGHT	1
+
 #define REG_PORT_MTI_HI_WATER_MARK	0x0916
 #define REG_PORT_MTI_LO_WATER_MARK	0x0918