[v2,03/10] interconnect: qcom: icc-rpm: Let nodes drive their own bus clock
Commit Message
If this hardware couldn't get messier, some nodes are supposed to drive
their own bus clock.. Presumably to connect to some intermediate
interface between the node itself and the bus it's (supposed to be)
connected to.
Expand the node struct with the necessary data and hook up the
allocations & calculations.
To save on memory (not very many nodes have their own clocks), allocate
a pointer to an array instead of allocating an array within
qcom_icc_node.
Note that the node-specific AB/IB coefficients contribute (by design)
to both the node-level and the bus-level aggregation.
Signed-off-by: Konrad Dybcio <konrad.dybcio@linaro.org>
---
drivers/interconnect/qcom/icc-rpm.c | 48 +++++++++++++++++++++++++++++++------
drivers/interconnect/qcom/icc-rpm.h | 2 ++
2 files changed, 43 insertions(+), 7 deletions(-)
Comments
On Mon, Jul 31, 2023 at 12:52:19PM +0200, Konrad Dybcio wrote:
> If this hardware couldn't get messier, some nodes are supposed to drive
> their own bus clock.. Presumably to connect to some intermediate
> interface between the node itself and the bus it's (supposed to be)
> connected to.
>
> Expand the node struct with the necessary data and hook up the
> allocations & calculations.
>
> To save on memory (not very many nodes have their own clocks), allocate
> a pointer to an array instead of allocating an array within
> qcom_icc_node.
>
Only on ARM32 though. On ARM64 you waste extra memory:
u32 bus_clk_rate[QCOM_SMD_RPM_STATE_NUM];
sizeof(bus_clk_rate) = QCOM_SMD_RPM_STATE_NUM * sizeof(bus_clk_rate[0])
= 2 * 4
= 8
u32 *bus_clk_rate;
sizeof(bus_clk_rate) = sizeof(ptr)
= 8 (for ARM64)
+ 2 * 4 + malloc overhead
for each node with bus_clk_desc
which is > 8 from above.
I'm not quite convinced this optimization is worth it.
Thanks,
Stephan
On 1.08.2023 12:58, Stephan Gerhold wrote:
> On Mon, Jul 31, 2023 at 12:52:19PM +0200, Konrad Dybcio wrote:
>> If this hardware couldn't get messier, some nodes are supposed to drive
>> their own bus clock.. Presumably to connect to some intermediate
>> interface between the node itself and the bus it's (supposed to be)
>> connected to.
>>
>> Expand the node struct with the necessary data and hook up the
>> allocations & calculations.
>>
>> To save on memory (not very many nodes have their own clocks), allocate
>> a pointer to an array instead of allocating an array within
>> qcom_icc_node.
>>
>
> Only on ARM32 though. On ARM64 you waste extra memory:
>
> u32 bus_clk_rate[QCOM_SMD_RPM_STATE_NUM];
> sizeof(bus_clk_rate) = QCOM_SMD_RPM_STATE_NUM * sizeof(bus_clk_rate[0])
> = 2 * 4
> = 8
>
> u32 *bus_clk_rate;
> sizeof(bus_clk_rate) = sizeof(ptr)
> = 8 (for ARM64)
> + 2 * 4 + malloc overhead
> for each node with bus_clk_desc
>
> which is > 8 from above.
>
> I'm not quite convinced this optimization is worth it.
Right, u32 is not the same size as *u32, brain fart :D
Konrad
@@ -411,6 +411,33 @@ static int qcom_icc_set(struct icc_node *src, struct icc_node *dst)
qp->bus_clk_rate[QCOM_SMD_RPM_SLEEP_STATE] = sleep_rate;
}
+ /* Handle the node-specific clock */
+ if (!src_qn->bus_clk_desc)
+ return 0;
+
+ active_rate = qcom_icc_calc_rate(qp, src_qn, QCOM_SMD_RPM_ACTIVE_STATE);
+ sleep_rate = qcom_icc_calc_rate(qp, src_qn, QCOM_SMD_RPM_SLEEP_STATE);
+
+ if (active_rate != src_qn->bus_clk_rate[QCOM_SMD_RPM_ACTIVE_STATE]) {
+ ret = qcom_icc_rpm_set_bus_rate(src_qn->bus_clk_desc, QCOM_SMD_RPM_ACTIVE_STATE,
+ active_rate);
+ if (ret)
+ return ret;
+
+ /* Cache the rate after we've successfully committed it to RPM */
+ src_qn->bus_clk_rate[QCOM_SMD_RPM_ACTIVE_STATE] = active_rate;
+ }
+
+ if (sleep_rate != src_qn->bus_clk_rate[QCOM_SMD_RPM_SLEEP_STATE]) {
+ ret = qcom_icc_rpm_set_bus_rate(src_qn->bus_clk_desc, QCOM_SMD_RPM_SLEEP_STATE,
+ sleep_rate);
+ if (ret)
+ return ret;
+
+ /* Cache the rate after we've successfully committed it to RPM */
+ src_qn->bus_clk_rate[QCOM_SMD_RPM_SLEEP_STATE] = sleep_rate;
+ }
+
return 0;
}
@@ -531,24 +558,31 @@ int qnoc_probe(struct platform_device *pdev)
return ret;
for (i = 0; i < num_nodes; i++) {
+ struct qcom_icc_node *qn = qnodes[i];
size_t j;
- node = icc_node_create(qnodes[i]->id);
+ if (qn->bus_clk_desc) {
+ qn->bus_clk_rate = devm_kcalloc(dev, QCOM_SMD_RPM_STATE_NUM,
+ sizeof(*qn->bus_clk_rate),
+ GFP_KERNEL);
+ }
+
+ node = icc_node_create(qn->id);
if (IS_ERR(node)) {
ret = PTR_ERR(node);
goto err_remove_nodes;
}
- node->name = qnodes[i]->name;
- node->data = qnodes[i];
+ node->name = qn->name;
+ node->data = qn;
icc_node_add(node, provider);
- for (j = 0; j < qnodes[i]->num_links; j++)
- icc_link_create(node, qnodes[i]->links[j]);
+ for (j = 0; j < qn->num_links; j++)
+ icc_link_create(node, qn->links[j]);
/* Set QoS registers (we only need to do it once, generally) */
- if (qnodes[i]->qos.ap_owned &&
- qnodes[i]->qos.qos_mode != NOC_QOS_MODE_INVALID) {
+ if (qn->qos.ap_owned &&
+ qn->qos.qos_mode != NOC_QOS_MODE_INVALID) {
ret = qcom_icc_qos_set(node);
if (ret)
return ret;
@@ -97,6 +97,7 @@ struct qcom_icc_qos {
* @num_links: the total number of @links
* @channels: number of channels at this node (e.g. DDR channels)
* @buswidth: width of the interconnect between a node and the bus (bytes)
+ * @bus_clk_desc: a pointer to a rpm_clk_resource description of bus clocks
* @sum_avg: current sum aggregate value of all avg bw requests
* @max_peak: current max aggregate value of all peak bw requests
* @mas_rpm_id: RPM id for devices that are bus masters
@@ -110,6 +111,7 @@ struct qcom_icc_node {
u16 num_links;
u16 channels;
u16 buswidth;
+ const struct rpm_clk_resource *bus_clk_desc;
u64 sum_avg[QCOM_SMD_RPM_STATE_NUM];
u64 max_peak[QCOM_SMD_RPM_STATE_NUM];
int mas_rpm_id;