[v2,22/22] interconnect: qcom: icc-rpm: Fix bandwidth calculations
Commit Message
Up until now, we've been aggregating the bandwidth values and only
dividing them by the bus width of the source node. This was completely
wrong, as different nodes on a given path may (and usually do) have
varying bus widths. That in turn, resulted in the calculated clock rates
being completely bogus - usually they ended up being much higher, as
NoC_A<->NoC_B links are very wide.
Since we're not using the aggregate bandwidth value for anything other
than clock rate calculations, remodel qcom_icc_bus_aggregate() to
calculate the per-context clock rate for a given provider, taking into
account the bus width of every individual node.
Fixes: 30c8fa3ec61a ("interconnect: qcom: Add MSM8916 interconnect provider driver")
Reported-by: Stephan Gerhold <stephan@gerhold.net>
Signed-off-by: Konrad Dybcio <konrad.dybcio@linaro.org>
---
drivers/interconnect/qcom/icc-rpm.c | 59 ++++++++++++-------------------------
1 file changed, 19 insertions(+), 40 deletions(-)
Comments
On Fri, Jun 09, 2023 at 10:19:27PM +0200, Konrad Dybcio wrote:
> Up until now, we've been aggregating the bandwidth values and only
> dividing them by the bus width of the source node. This was completely
> wrong, as different nodes on a given path may (and usually do) have
> varying bus widths. That in turn, resulted in the calculated clock rates
> being completely bogus - usually they ended up being much higher, as
> NoC_A<->NoC_B links are very wide.
>
> Since we're not using the aggregate bandwidth value for anything other
> than clock rate calculations, remodel qcom_icc_bus_aggregate() to
> calculate the per-context clock rate for a given provider, taking into
> account the bus width of every individual node.
>
> Fixes: 30c8fa3ec61a ("interconnect: qcom: Add MSM8916 interconnect provider driver")
> Reported-by: Stephan Gerhold <stephan@gerhold.net>
> Signed-off-by: Konrad Dybcio <konrad.dybcio@linaro.org>
> ---
> drivers/interconnect/qcom/icc-rpm.c | 59 ++++++++++++-------------------------
> 1 file changed, 19 insertions(+), 40 deletions(-)
>
> diff --git a/drivers/interconnect/qcom/icc-rpm.c b/drivers/interconnect/qcom/icc-rpm.c
> index 1508233632f6..d177a76abe2a 100644
> --- a/drivers/interconnect/qcom/icc-rpm.c
> +++ b/drivers/interconnect/qcom/icc-rpm.c
> @@ -293,58 +293,44 @@ static int qcom_icc_bw_aggregate(struct icc_node *node, u32 tag, u32 avg_bw,
> }
>
> /**
> - * qcom_icc_bus_aggregate - aggregate bandwidth by traversing all nodes
> + * qcom_icc_bus_aggregate - calculate bus clock rates by traversing all nodes
> * @provider: generic interconnect provider
> - * @agg_avg: an array for aggregated average bandwidth of buckets
> - * @agg_peak: an array for aggregated peak bandwidth of buckets
> - * @max_agg_avg: pointer to max value of aggregated average bandwidth
> + * @agg_clk_rate: array containing the aggregated clock rates in kHz
> */
> -static void qcom_icc_bus_aggregate(struct icc_provider *provider,
> - u64 *agg_avg, u64 *agg_peak,
> - u64 *max_agg_avg)
> +static void qcom_icc_bus_aggregate(struct icc_provider *provider, u64 *agg_clk_rate)
> {
> - struct icc_node *node;
> + u64 agg_avg_rate, agg_rate;
> struct qcom_icc_node *qn;
> - u64 sum_avg[QCOM_SMD_RPM_STATE_NUM];
> + struct icc_node *node;
> int i;
>
> - /* Initialise aggregate values */
> - for (i = 0; i < QCOM_SMD_RPM_STATE_NUM; i++) {
> - agg_avg[i] = 0;
> - agg_peak[i] = 0;
> - }
> -
> - *max_agg_avg = 0;
> -
> /*
> - * Iterate nodes on the interconnect and aggregate bandwidth
> - * requests for every bucket.
> + * Iterate nodes on the provider, aggregate bandwidth requests for
> + * every bucket and convert them into bus clock rates.
> */
> list_for_each_entry(node, &provider->nodes, node_list) {
> qn = node->data;
> for (i = 0; i < QCOM_SMD_RPM_STATE_NUM; i++) {
> if (qn->channels)
> - sum_avg[i] = div_u64(qn->sum_avg[i], qn->channels);
> + agg_avg_rate = div_u64(qn->sum_avg[i], qn->channels);
> else
> - sum_avg[i] = qn->sum_avg[i];
> - agg_avg[i] += sum_avg[i];
> - agg_peak[i] = max_t(u64, agg_peak[i], qn->max_peak[i]);
> + agg_avg_rate = qn->sum_avg[i];
> +
> + agg_rate = max_t(u64, agg_avg_rate, qn->max_peak[i]);
> + do_div(agg_rate, qn->buswidth);
> +
> + agg_clk_rate[i] = max_t(u64, agg_clk_rate[i], agg_rate);
> }
> }
> -
> - /* Find maximum values across all buckets */
> - for (i = 0; i < QCOM_SMD_RPM_STATE_NUM; i++)
> - *max_agg_avg = max_t(u64, *max_agg_avg, agg_avg[i]);
> }
>
> static int qcom_icc_set(struct icc_node *src, struct icc_node *dst)
> {
> - struct qcom_icc_provider *qp;
> struct qcom_icc_node *src_qn = NULL, *dst_qn = NULL;
> + u64 agg_clk_rate[QCOM_SMD_RPM_STATE_NUM] = { 0 };
> struct icc_provider *provider;
> + struct qcom_icc_provider *qp;
> u64 active_rate, sleep_rate;
> - u64 agg_avg[QCOM_SMD_RPM_STATE_NUM], agg_peak[QCOM_SMD_RPM_STATE_NUM];
> - u64 max_agg_avg;
> int ret;
>
> src_qn = src->data;
> @@ -353,7 +339,9 @@ static int qcom_icc_set(struct icc_node *src, struct icc_node *dst)
> provider = src->provider;
> qp = to_qcom_provider(provider);
>
> - qcom_icc_bus_aggregate(provider, agg_avg, agg_peak, &max_agg_avg);
> + qcom_icc_bus_aggregate(provider, agg_clk_rate);
> + active_rate = agg_clk_rate[QCOM_SMD_RPM_ACTIVE_STATE];
> + sleep_rate = agg_clk_rate[QCOM_SMD_RPM_SLEEP_STATE];
>
> ret = qcom_icc_rpm_set(src_qn, src_qn->sum_avg);
> if (ret)
> @@ -369,15 +357,6 @@ static int qcom_icc_set(struct icc_node *src, struct icc_node *dst)
> if (!qp->bus_clk_desc && !qp->bus_clk)
> return 0;
>
> - /* Intentionally keep the rates in kHz as that's what RPM accepts */
I kind of liked this comment because otherwise it's not obvious why
you're not converting from "ICC units" anywhere.
Anyway:
Reviewed-by: Stephan Gerhold <stephan@gerhold.net>
Thanks for going through the giant maze to fix this!
Stephan
On 10.06.2023 21:06, Stephan Gerhold wrote:
> On Fri, Jun 09, 2023 at 10:19:27PM +0200, Konrad Dybcio wrote:
>> Up until now, we've been aggregating the bandwidth values and only
>> dividing them by the bus width of the source node. This was completely
>> wrong, as different nodes on a given path may (and usually do) have
>> varying bus widths. That in turn, resulted in the calculated clock rates
>> being completely bogus - usually they ended up being much higher, as
>> NoC_A<->NoC_B links are very wide.
>>
>> Since we're not using the aggregate bandwidth value for anything other
>> than clock rate calculations, remodel qcom_icc_bus_aggregate() to
>> calculate the per-context clock rate for a given provider, taking into
>> account the bus width of every individual node.
>>
>> Fixes: 30c8fa3ec61a ("interconnect: qcom: Add MSM8916 interconnect provider driver")
>> Reported-by: Stephan Gerhold <stephan@gerhold.net>
>> Signed-off-by: Konrad Dybcio <konrad.dybcio@linaro.org>
>> ---
>> drivers/interconnect/qcom/icc-rpm.c | 59 ++++++++++++-------------------------
>> 1 file changed, 19 insertions(+), 40 deletions(-)
>>
>> diff --git a/drivers/interconnect/qcom/icc-rpm.c b/drivers/interconnect/qcom/icc-rpm.c
>> index 1508233632f6..d177a76abe2a 100644
>> --- a/drivers/interconnect/qcom/icc-rpm.c
>> +++ b/drivers/interconnect/qcom/icc-rpm.c
>> @@ -293,58 +293,44 @@ static int qcom_icc_bw_aggregate(struct icc_node *node, u32 tag, u32 avg_bw,
>> }
>>
>> /**
>> - * qcom_icc_bus_aggregate - aggregate bandwidth by traversing all nodes
>> + * qcom_icc_bus_aggregate - calculate bus clock rates by traversing all nodes
>> * @provider: generic interconnect provider
>> - * @agg_avg: an array for aggregated average bandwidth of buckets
>> - * @agg_peak: an array for aggregated peak bandwidth of buckets
>> - * @max_agg_avg: pointer to max value of aggregated average bandwidth
>> + * @agg_clk_rate: array containing the aggregated clock rates in kHz
>> */
>> -static void qcom_icc_bus_aggregate(struct icc_provider *provider,
>> - u64 *agg_avg, u64 *agg_peak,
>> - u64 *max_agg_avg)
>> +static void qcom_icc_bus_aggregate(struct icc_provider *provider, u64 *agg_clk_rate)
>> {
>> - struct icc_node *node;
>> + u64 agg_avg_rate, agg_rate;
>> struct qcom_icc_node *qn;
>> - u64 sum_avg[QCOM_SMD_RPM_STATE_NUM];
>> + struct icc_node *node;
>> int i;
>>
>> - /* Initialise aggregate values */
>> - for (i = 0; i < QCOM_SMD_RPM_STATE_NUM; i++) {
>> - agg_avg[i] = 0;
>> - agg_peak[i] = 0;
>> - }
>> -
>> - *max_agg_avg = 0;
>> -
>> /*
>> - * Iterate nodes on the interconnect and aggregate bandwidth
>> - * requests for every bucket.
>> + * Iterate nodes on the provider, aggregate bandwidth requests for
>> + * every bucket and convert them into bus clock rates.
>> */
>> list_for_each_entry(node, &provider->nodes, node_list) {
>> qn = node->data;
>> for (i = 0; i < QCOM_SMD_RPM_STATE_NUM; i++) {
>> if (qn->channels)
>> - sum_avg[i] = div_u64(qn->sum_avg[i], qn->channels);
>> + agg_avg_rate = div_u64(qn->sum_avg[i], qn->channels);
>> else
>> - sum_avg[i] = qn->sum_avg[i];
>> - agg_avg[i] += sum_avg[i];
>> - agg_peak[i] = max_t(u64, agg_peak[i], qn->max_peak[i]);
>> + agg_avg_rate = qn->sum_avg[i];
>> +
>> + agg_rate = max_t(u64, agg_avg_rate, qn->max_peak[i]);
>> + do_div(agg_rate, qn->buswidth);
>> +
>> + agg_clk_rate[i] = max_t(u64, agg_clk_rate[i], agg_rate);
>> }
>> }
>> -
>> - /* Find maximum values across all buckets */
>> - for (i = 0; i < QCOM_SMD_RPM_STATE_NUM; i++)
>> - *max_agg_avg = max_t(u64, *max_agg_avg, agg_avg[i]);
>> }
>>
>> static int qcom_icc_set(struct icc_node *src, struct icc_node *dst)
>> {
>> - struct qcom_icc_provider *qp;
>> struct qcom_icc_node *src_qn = NULL, *dst_qn = NULL;
>> + u64 agg_clk_rate[QCOM_SMD_RPM_STATE_NUM] = { 0 };
>> struct icc_provider *provider;
>> + struct qcom_icc_provider *qp;
>> u64 active_rate, sleep_rate;
>> - u64 agg_avg[QCOM_SMD_RPM_STATE_NUM], agg_peak[QCOM_SMD_RPM_STATE_NUM];
>> - u64 max_agg_avg;
>> int ret;
>>
>> src_qn = src->data;
>> @@ -353,7 +339,9 @@ static int qcom_icc_set(struct icc_node *src, struct icc_node *dst)
>> provider = src->provider;
>> qp = to_qcom_provider(provider);
>>
>> - qcom_icc_bus_aggregate(provider, agg_avg, agg_peak, &max_agg_avg);
>> + qcom_icc_bus_aggregate(provider, agg_clk_rate);
>> + active_rate = agg_clk_rate[QCOM_SMD_RPM_ACTIVE_STATE];
>> + sleep_rate = agg_clk_rate[QCOM_SMD_RPM_SLEEP_STATE];
>>
>> ret = qcom_icc_rpm_set(src_qn, src_qn->sum_avg);
>> if (ret)
>> @@ -369,15 +357,6 @@ static int qcom_icc_set(struct icc_node *src, struct icc_node *dst)
>> if (!qp->bus_clk_desc && !qp->bus_clk)
>> return 0;
>>
>> - /* Intentionally keep the rates in kHz as that's what RPM accepts */
>
> I kind of liked this comment because otherwise it's not obvious why
> you're not converting from "ICC units" anywhere.
I figured the kerneldoc change would be enough:
@agg_clk_rate: array containing the aggregated clock rates in kHz
>
> Anyway:
>
> Reviewed-by: Stephan Gerhold <stephan@gerhold.net>
>
> Thanks for going through the giant maze to fix this!
Thanks for being there along the way.. This spaghetti is far too much
for a single human..
Konrad
>
> Stephan
@@ -293,58 +293,44 @@ static int qcom_icc_bw_aggregate(struct icc_node *node, u32 tag, u32 avg_bw,
}
/**
- * qcom_icc_bus_aggregate - aggregate bandwidth by traversing all nodes
+ * qcom_icc_bus_aggregate - calculate bus clock rates by traversing all nodes
* @provider: generic interconnect provider
- * @agg_avg: an array for aggregated average bandwidth of buckets
- * @agg_peak: an array for aggregated peak bandwidth of buckets
- * @max_agg_avg: pointer to max value of aggregated average bandwidth
+ * @agg_clk_rate: array containing the aggregated clock rates in kHz
*/
-static void qcom_icc_bus_aggregate(struct icc_provider *provider,
- u64 *agg_avg, u64 *agg_peak,
- u64 *max_agg_avg)
+static void qcom_icc_bus_aggregate(struct icc_provider *provider, u64 *agg_clk_rate)
{
- struct icc_node *node;
+ u64 agg_avg_rate, agg_rate;
struct qcom_icc_node *qn;
- u64 sum_avg[QCOM_SMD_RPM_STATE_NUM];
+ struct icc_node *node;
int i;
- /* Initialise aggregate values */
- for (i = 0; i < QCOM_SMD_RPM_STATE_NUM; i++) {
- agg_avg[i] = 0;
- agg_peak[i] = 0;
- }
-
- *max_agg_avg = 0;
-
/*
- * Iterate nodes on the interconnect and aggregate bandwidth
- * requests for every bucket.
+ * Iterate nodes on the provider, aggregate bandwidth requests for
+ * every bucket and convert them into bus clock rates.
*/
list_for_each_entry(node, &provider->nodes, node_list) {
qn = node->data;
for (i = 0; i < QCOM_SMD_RPM_STATE_NUM; i++) {
if (qn->channels)
- sum_avg[i] = div_u64(qn->sum_avg[i], qn->channels);
+ agg_avg_rate = div_u64(qn->sum_avg[i], qn->channels);
else
- sum_avg[i] = qn->sum_avg[i];
- agg_avg[i] += sum_avg[i];
- agg_peak[i] = max_t(u64, agg_peak[i], qn->max_peak[i]);
+ agg_avg_rate = qn->sum_avg[i];
+
+ agg_rate = max_t(u64, agg_avg_rate, qn->max_peak[i]);
+ do_div(agg_rate, qn->buswidth);
+
+ agg_clk_rate[i] = max_t(u64, agg_clk_rate[i], agg_rate);
}
}
-
- /* Find maximum values across all buckets */
- for (i = 0; i < QCOM_SMD_RPM_STATE_NUM; i++)
- *max_agg_avg = max_t(u64, *max_agg_avg, agg_avg[i]);
}
static int qcom_icc_set(struct icc_node *src, struct icc_node *dst)
{
- struct qcom_icc_provider *qp;
struct qcom_icc_node *src_qn = NULL, *dst_qn = NULL;
+ u64 agg_clk_rate[QCOM_SMD_RPM_STATE_NUM] = { 0 };
struct icc_provider *provider;
+ struct qcom_icc_provider *qp;
u64 active_rate, sleep_rate;
- u64 agg_avg[QCOM_SMD_RPM_STATE_NUM], agg_peak[QCOM_SMD_RPM_STATE_NUM];
- u64 max_agg_avg;
int ret;
src_qn = src->data;
@@ -353,7 +339,9 @@ static int qcom_icc_set(struct icc_node *src, struct icc_node *dst)
provider = src->provider;
qp = to_qcom_provider(provider);
- qcom_icc_bus_aggregate(provider, agg_avg, agg_peak, &max_agg_avg);
+ qcom_icc_bus_aggregate(provider, agg_clk_rate);
+ active_rate = agg_clk_rate[QCOM_SMD_RPM_ACTIVE_STATE];
+ sleep_rate = agg_clk_rate[QCOM_SMD_RPM_SLEEP_STATE];
ret = qcom_icc_rpm_set(src_qn, src_qn->sum_avg);
if (ret)
@@ -369,15 +357,6 @@ static int qcom_icc_set(struct icc_node *src, struct icc_node *dst)
if (!qp->bus_clk_desc && !qp->bus_clk)
return 0;
- /* Intentionally keep the rates in kHz as that's what RPM accepts */
- active_rate = max(agg_avg[QCOM_SMD_RPM_ACTIVE_STATE],
- agg_peak[QCOM_SMD_RPM_ACTIVE_STATE]);
- do_div(active_rate, src_qn->buswidth);
-
- sleep_rate = max(agg_avg[QCOM_SMD_RPM_SLEEP_STATE],
- agg_peak[QCOM_SMD_RPM_SLEEP_STATE]);
- do_div(sleep_rate, src_qn->buswidth);
-
/*
* Downstream checks whether the requested rate is zero, but it makes little sense
* to vote for a value that's below the lower threshold, so let's not do so.