diff mbox series

[1/4] drm/panfrost: Implement ability to turn on/off GPU clocks in suspend

Message ID	20231030132257.85379-2-angelogioacchino.delregno@collabora.com
State	New
Headers	Received-SPF: pass (google.com: domain of linux-kernel-owner@vger.kernel.org designates 23.128.96.37 as permitted sender) client-ip=23.128.96.37; sender: kholk11) by madras.collabora.co.uk (Postfix) with ESMTPSA id 2A36666071A3; Mon, 30 Oct 2023 13:23:05 +0000 (GMT) From: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com> To: boris.brezillon@collabora.com Cc: robh@kernel.org, steven.price@arm.com, maarten.lankhorst@linux.intel.com, mripard@kernel.org, tzimmermann@suse.de, airlied@gmail.com, daniel@ffwll.ch, dri-devel@lists.freedesktop.org, linux-kernel@vger.kernel.org, wenst@chromium.org, AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>, kernel@collabora.com Subject: [PATCH 1/4] drm/panfrost: Implement ability to turn on/off GPU clocks in suspend Date: Mon, 30 Oct 2023 14:22:54 +0100 Message-ID: <20231030132257.85379-2-angelogioacchino.delregno@collabora.com> In-Reply-To: <20231030132257.85379-1-angelogioacchino.delregno@collabora.com> References: <20231030132257.85379-1-angelogioacchino.delregno@collabora.com> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit Precedence: bulk
Series	drm/panfrost: Turn off clocks and regulators in PM \| [0/4] drm/panfrost: Turn off clocks and regulators in PM [1/4] drm/panfrost: Implement ability to turn on/off GPU clocks in suspend [2/4] drm/panfrost: Set clocks on/off during system sleep on MediaTek SoCs [3/4] drm/panfrost: Implement ability to turn on/off regulators in suspend [4/4] drm/panfrost: Set regulators on/off during system sleep on MediaTek SoCs

Commit Message

AngeloGioacchino Del Regno Oct. 30, 2023, 1:22 p.m. UTC

  Currently, the GPU is being internally powered off for runtime suspend
and turned back on for runtime resume through commands sent to it, but
note that the GPU doesn't need to be clocked during the poweroff state,
hence it is possible to save some power on selected platforms.

Add suspend and resume handlers for full system sleep and then add
a new panfrost_gpu_pm enumeration and a pm_features variable in the
panfrost_compatible structure: BIT(GPU_PM_CLK_DIS) will be used to
enable this power saving technique only on SoCs that are able to
safely use it.

Note that this was implemented only for the system sleep case and not
for runtime PM because testing on one of my MediaTek platforms showed
issues when turning on and off clocks aggressively (in PM runtime),
with the GPU locking up and unable to soft reset, eventually resulting
in a full system lockup.

Doing this only for full system sleep never showed issues in 3 days
of testing by suspending and resuming the system continuously.

Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
---
 drivers/gpu/drm/panfrost/panfrost_device.c | 61 ++++++++++++++++++++--
 drivers/gpu/drm/panfrost/panfrost_device.h | 11 ++++
 2 files changed, 68 insertions(+), 4 deletions(-)

Comments

Steven Price Oct. 30, 2023, 2:57 p.m. UTC | #1

On 30/10/2023 13:22, AngeloGioacchino Del Regno wrote:
> Currently, the GPU is being internally powered off for runtime suspend
> and turned back on for runtime resume through commands sent to it, but
> note that the GPU doesn't need to be clocked during the poweroff state,
> hence it is possible to save some power on selected platforms.

Looks like a good addition - I suspect some implementations are quite
leaky so this could have a meaningful power saving in some cases.

> Add suspend and resume handlers for full system sleep and then add
> a new panfrost_gpu_pm enumeration and a pm_features variable in the
> panfrost_compatible structure: BIT(GPU_PM_CLK_DIS) will be used to
> enable this power saving technique only on SoCs that are able to
> safely use it.
> 
> Note that this was implemented only for the system sleep case and not
> for runtime PM because testing on one of my MediaTek platforms showed
> issues when turning on and off clocks aggressively (in PM runtime),
> with the GPU locking up and unable to soft reset, eventually resulting
> in a full system lockup.

I think I know why you saw this - see below.

> Doing this only for full system sleep never showed issues in 3 days
> of testing by suspending and resuming the system continuously.
> 
> Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
> ---
>  drivers/gpu/drm/panfrost/panfrost_device.c | 61 ++++++++++++++++++++--
>  drivers/gpu/drm/panfrost/panfrost_device.h | 11 ++++
>  2 files changed, 68 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/panfrost/panfrost_device.c b/drivers/gpu/drm/panfrost/panfrost_device.c
> index 28f7046e1b1a..2022ed76a620 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_device.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_device.c
> @@ -403,7 +403,7 @@ void panfrost_device_reset(struct panfrost_device *pfdev)
>  	panfrost_job_enable_interrupts(pfdev);
>  }
>  
> -static int panfrost_device_resume(struct device *dev)
> +static int panfrost_device_runtime_resume(struct device *dev)
>  {
>  	struct panfrost_device *pfdev = dev_get_drvdata(dev);
>  
> @@ -413,7 +413,7 @@ static int panfrost_device_resume(struct device *dev)
>  	return 0;
>  }
>  
> -static int panfrost_device_suspend(struct device *dev)
> +static int panfrost_device_runtime_suspend(struct device *dev)
>  {
>  	struct panfrost_device *pfdev = dev_get_drvdata(dev);
>  

So this function calls panfrost_gpu_power_off() which is simply:

void panfrost_gpu_power_off(struct panfrost_device *pfdev)
{
	gpu_write(pfdev, TILER_PWROFF_LO, 0);
	gpu_write(pfdev, SHADER_PWROFF_LO, 0);
	gpu_write(pfdev, L2_PWROFF_LO, 0);
}

So we instruct the GPU to turn off, but don't wait for it to complete.

> @@ -426,5 +426,58 @@ static int panfrost_device_suspend(struct device *dev)
>  	return 0;
>  }
>  
> -EXPORT_GPL_RUNTIME_DEV_PM_OPS(panfrost_pm_ops, panfrost_device_suspend,
> -			      panfrost_device_resume, NULL);
> +static int panfrost_device_resume(struct device *dev)
> +{
> +	struct panfrost_device *pfdev = dev_get_drvdata(dev);
> +	int ret;
> +
> +	if (pfdev->comp->pm_features & BIT(GPU_PM_CLK_DIS)) {
> +		ret = clk_enable(pfdev->clock);
> +		if (ret)
> +			return ret;
> +
> +		if (pfdev->bus_clock) {
> +			ret = clk_enable(pfdev->bus_clock);
> +			if (ret)
> +				goto err_bus_clk;
> +		}
> +	}
> +
> +	ret = pm_runtime_force_resume(dev);
> +	if (ret)
> +		goto err_resume;
> +
> +	return 0;
> +
> +err_resume:
> +	if (pfdev->comp->pm_features & BIT(GPU_PM_CLK_DIS) && pfdev->bus_clock)
> +		clk_disable(pfdev->bus_clock);
> +err_bus_clk:
> +	if (pfdev->comp->pm_features & BIT(GPU_PM_CLK_DIS))
> +		clk_disable(pfdev->clock);
> +	return ret;
> +}
> +
> +static int panfrost_device_suspend(struct device *dev)
> +{
> +	struct panfrost_device *pfdev = dev_get_drvdata(dev);
> +	int ret;
> +
> +	ret = pm_runtime_force_suspend(dev);
> +	if (ret)
> +		return ret;

So here we've started shutting down the GPU (pm_runtime_force_suspend
eventually calls panfrost_gpu_power_off). But nothing here waits for the
GPU to actually finish shutting down. If we're unlucky there's dirty
data in the caches (or coherency which can snoop into the caches) so the
GPU could be actively making bus cycles...

> +
> +	if (pfdev->comp->pm_features & BIT(GPU_PM_CLK_DIS)) {
> +		clk_disable(pfdev->clock);

... until its clock goes and everything locks up.

Something should be waiting for the power down to complete. Either poll
the L2_PWRTRANS_LO register to detect that the L2 is no longer
transitioning, or wait for the GPU_IRQ_POWER_CHANGED_ALL interrupt to fire.

It would be good to test this with the system suspend doing the full
power off, it should be safe so it would be a good stress test. Although
whether we want the overhead in normal operation is another matter - so
I suspect it should just be for testing purposes.

I would hope that we don't actually need the GPU_PM_CLK_DIS feature -
this should work as long as the GPU is given the time to shutdown.
Although note that actually cutting the power (patches 3 & 4) may expose
us to implementation errata - there have been issues with designs not
resetting correctly. I'm not sure if those made it into real products or
if such bugs are confined to test chips. So for the sake of not causing
regressions it's probably not a bad thing to have ;)

Steve

> +
> +		if (pfdev->bus_clock)
> +			clk_disable(pfdev->bus_clock);
> +	}
> +
> +	return 0;
> +}
> +
> +EXPORT_GPL_DEV_PM_OPS(panfrost_pm_ops) = {
> +	RUNTIME_PM_OPS(panfrost_device_runtime_suspend, panfrost_device_runtime_resume, NULL)
> +	SYSTEM_SLEEP_PM_OPS(panfrost_device_suspend, panfrost_device_resume)
> +};
> diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h
> index 1ef38f60d5dc..d7f179eb8ea3 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_device.h
> +++ b/drivers/gpu/drm/panfrost/panfrost_device.h
> @@ -25,6 +25,14 @@ struct panfrost_perfcnt;
>  #define NUM_JOB_SLOTS 3
>  #define MAX_PM_DOMAINS 5
>  
> +/**
> + * enum panfrost_gpu_pm - Supported kernel power management features
> + * @GPU_PM_CLK_DIS:  Allow disabling clocks during system suspend
> + */
> +enum panfrost_gpu_pm {
> +	GPU_PM_CLK_DIS,
> +};
> +
>  struct panfrost_features {
>  	u16 id;
>  	u16 revision;
> @@ -75,6 +83,9 @@ struct panfrost_compatible {
>  
>  	/* Vendor implementation quirks callback */
>  	void (*vendor_quirk)(struct panfrost_device *pfdev);
> +
> +	/* Allowed PM features */
> +	u8 pm_features;
>  };
>  
>  struct panfrost_device {

Chen-Yu Tsai Oct. 31, 2023, 3:18 a.m. UTC | #2

On Mon, Oct 30, 2023 at 9:23 PM AngeloGioacchino Del Regno
<angelogioacchino.delregno@collabora.com> wrote:
>
> Currently, the GPU is being internally powered off for runtime suspend
> and turned back on for runtime resume through commands sent to it, but
> note that the GPU doesn't need to be clocked during the poweroff state,
> hence it is possible to save some power on selected platforms.
>
> Add suspend and resume handlers for full system sleep and then add
> a new panfrost_gpu_pm enumeration and a pm_features variable in the
> panfrost_compatible structure: BIT(GPU_PM_CLK_DIS) will be used to
> enable this power saving technique only on SoCs that are able to
> safely use it.
>
> Note that this was implemented only for the system sleep case and not
> for runtime PM because testing on one of my MediaTek platforms showed
> issues when turning on and off clocks aggressively (in PM runtime),
> with the GPU locking up and unable to soft reset, eventually resulting
> in a full system lockup.
>
> Doing this only for full system sleep never showed issues in 3 days
> of testing by suspending and resuming the system continuously.
>
> Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
> ---
>  drivers/gpu/drm/panfrost/panfrost_device.c | 61 ++++++++++++++++++++--
>  drivers/gpu/drm/panfrost/panfrost_device.h | 11 ++++
>  2 files changed, 68 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/panfrost/panfrost_device.c b/drivers/gpu/drm/panfrost/panfrost_device.c
> index 28f7046e1b1a..2022ed76a620 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_device.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_device.c
> @@ -403,7 +403,7 @@ void panfrost_device_reset(struct panfrost_device *pfdev)
>         panfrost_job_enable_interrupts(pfdev);
>  }
>
> -static int panfrost_device_resume(struct device *dev)
> +static int panfrost_device_runtime_resume(struct device *dev)
>  {
>         struct panfrost_device *pfdev = dev_get_drvdata(dev);
>
> @@ -413,7 +413,7 @@ static int panfrost_device_resume(struct device *dev)
>         return 0;
>  }
>
> -static int panfrost_device_suspend(struct device *dev)
> +static int panfrost_device_runtime_suspend(struct device *dev)
>  {
>         struct panfrost_device *pfdev = dev_get_drvdata(dev);
>
> @@ -426,5 +426,58 @@ static int panfrost_device_suspend(struct device *dev)
>         return 0;
>  }
>
> -EXPORT_GPL_RUNTIME_DEV_PM_OPS(panfrost_pm_ops, panfrost_device_suspend,
> -                             panfrost_device_resume, NULL);
> +static int panfrost_device_resume(struct device *dev)
> +{
> +       struct panfrost_device *pfdev = dev_get_drvdata(dev);
> +       int ret;
> +
> +       if (pfdev->comp->pm_features & BIT(GPU_PM_CLK_DIS)) {
> +               ret = clk_enable(pfdev->clock);
> +               if (ret)
> +                       return ret;
> +
> +               if (pfdev->bus_clock) {
> +                       ret = clk_enable(pfdev->bus_clock);
> +                       if (ret)
> +                               goto err_bus_clk;
> +               }
> +       }
> +
> +       ret = pm_runtime_force_resume(dev);
> +       if (ret)
> +               goto err_resume;
> +
> +       return 0;
> +
> +err_resume:
> +       if (pfdev->comp->pm_features & BIT(GPU_PM_CLK_DIS) && pfdev->bus_clock)
> +               clk_disable(pfdev->bus_clock);
> +err_bus_clk:
> +       if (pfdev->comp->pm_features & BIT(GPU_PM_CLK_DIS))
> +               clk_disable(pfdev->clock);
> +       return ret;
> +}
> +
> +static int panfrost_device_suspend(struct device *dev)
> +{
> +       struct panfrost_device *pfdev = dev_get_drvdata(dev);
> +       int ret;
> +
> +       ret = pm_runtime_force_suspend(dev);
> +       if (ret)
> +               return ret;
> +
> +       if (pfdev->comp->pm_features & BIT(GPU_PM_CLK_DIS)) {
> +               clk_disable(pfdev->clock);
> +
> +               if (pfdev->bus_clock)
> +                       clk_disable(pfdev->bus_clock);
> +       }
> +
> +       return 0;
> +}
> +
> +EXPORT_GPL_DEV_PM_OPS(panfrost_pm_ops) = {
> +       RUNTIME_PM_OPS(panfrost_device_runtime_suspend, panfrost_device_runtime_resume, NULL)
> +       SYSTEM_SLEEP_PM_OPS(panfrost_device_suspend, panfrost_device_resume)
> +};
> diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h
> index 1ef38f60d5dc..d7f179eb8ea3 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_device.h
> +++ b/drivers/gpu/drm/panfrost/panfrost_device.h
> @@ -25,6 +25,14 @@ struct panfrost_perfcnt;
>  #define NUM_JOB_SLOTS 3
>  #define MAX_PM_DOMAINS 5
>
> +/**
> + * enum panfrost_gpu_pm - Supported kernel power management features
> + * @GPU_PM_CLK_DIS:  Allow disabling clocks during system suspend
> + */
> +enum panfrost_gpu_pm {
> +       GPU_PM_CLK_DIS,
> +};
> +
>  struct panfrost_features {
>         u16 id;
>         u16 revision;
> @@ -75,6 +83,9 @@ struct panfrost_compatible {
>
>         /* Vendor implementation quirks callback */
>         void (*vendor_quirk)(struct panfrost_device *pfdev);
> +
> +       /* Allowed PM features */
> +       u8 pm_features;

Nit: I'd just use bitfields. They are easier to set and get without
extra macros, and the naming would be self-explanatory. Unless you
expect a need to do mask checking (though the compiler might be able
to optimize this).

ChenYu

>  };
>
>  struct panfrost_device {
> --
> 2.42.0
>

AngeloGioacchino Del Regno Oct. 31, 2023, 8:59 a.m. UTC | #3

Il 30/10/23 15:57, Steven Price ha scritto:
> On 30/10/2023 13:22, AngeloGioacchino Del Regno wrote:
>> Currently, the GPU is being internally powered off for runtime suspend
>> and turned back on for runtime resume through commands sent to it, but
>> note that the GPU doesn't need to be clocked during the poweroff state,
>> hence it is possible to save some power on selected platforms.
> 
> Looks like a good addition - I suspect some implementations are quite
> leaky so this could have a meaningful power saving in some cases.
> 
>> Add suspend and resume handlers for full system sleep and then add
>> a new panfrost_gpu_pm enumeration and a pm_features variable in the
>> panfrost_compatible structure: BIT(GPU_PM_CLK_DIS) will be used to
>> enable this power saving technique only on SoCs that are able to
>> safely use it.
>>
>> Note that this was implemented only for the system sleep case and not
>> for runtime PM because testing on one of my MediaTek platforms showed
>> issues when turning on and off clocks aggressively (in PM runtime),
>> with the GPU locking up and unable to soft reset, eventually resulting
>> in a full system lockup.
> 
> I think I know why you saw this - see below.
> 
>> Doing this only for full system sleep never showed issues in 3 days
>> of testing by suspending and resuming the system continuously.
>>
>> Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
>> ---
>>   drivers/gpu/drm/panfrost/panfrost_device.c | 61 ++++++++++++++++++++--
>>   drivers/gpu/drm/panfrost/panfrost_device.h | 11 ++++
>>   2 files changed, 68 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/panfrost/panfrost_device.c b/drivers/gpu/drm/panfrost/panfrost_device.c
>> index 28f7046e1b1a..2022ed76a620 100644
>> --- a/drivers/gpu/drm/panfrost/panfrost_device.c
>> +++ b/drivers/gpu/drm/panfrost/panfrost_device.c
>> @@ -403,7 +403,7 @@ void panfrost_device_reset(struct panfrost_device *pfdev)
>>   	panfrost_job_enable_interrupts(pfdev);
>>   }
>>   
>> -static int panfrost_device_resume(struct device *dev)
>> +static int panfrost_device_runtime_resume(struct device *dev)
>>   {
>>   	struct panfrost_device *pfdev = dev_get_drvdata(dev);
>>   
>> @@ -413,7 +413,7 @@ static int panfrost_device_resume(struct device *dev)
>>   	return 0;
>>   }
>>   
>> -static int panfrost_device_suspend(struct device *dev)
>> +static int panfrost_device_runtime_suspend(struct device *dev)
>>   {
>>   	struct panfrost_device *pfdev = dev_get_drvdata(dev);
>>   
> 
> So this function calls panfrost_gpu_power_off() which is simply:
> 
> void panfrost_gpu_power_off(struct panfrost_device *pfdev)
> {
> 	gpu_write(pfdev, TILER_PWROFF_LO, 0);
> 	gpu_write(pfdev, SHADER_PWROFF_LO, 0);
> 	gpu_write(pfdev, L2_PWROFF_LO, 0);
> }
> 
> So we instruct the GPU to turn off, but don't wait for it to complete.
> 
>> @@ -426,5 +426,58 @@ static int panfrost_device_suspend(struct device *dev)
>>   	return 0;
>>   }
>>   
>> -EXPORT_GPL_RUNTIME_DEV_PM_OPS(panfrost_pm_ops, panfrost_device_suspend,
>> -			      panfrost_device_resume, NULL);
>> +static int panfrost_device_resume(struct device *dev)
>> +{
>> +	struct panfrost_device *pfdev = dev_get_drvdata(dev);
>> +	int ret;
>> +
>> +	if (pfdev->comp->pm_features & BIT(GPU_PM_CLK_DIS)) {
>> +		ret = clk_enable(pfdev->clock);
>> +		if (ret)
>> +			return ret;
>> +
>> +		if (pfdev->bus_clock) {
>> +			ret = clk_enable(pfdev->bus_clock);
>> +			if (ret)
>> +				goto err_bus_clk;
>> +		}
>> +	}
>> +
>> +	ret = pm_runtime_force_resume(dev);
>> +	if (ret)
>> +		goto err_resume;
>> +
>> +	return 0;
>> +
>> +err_resume:
>> +	if (pfdev->comp->pm_features & BIT(GPU_PM_CLK_DIS) && pfdev->bus_clock)
>> +		clk_disable(pfdev->bus_clock);
>> +err_bus_clk:
>> +	if (pfdev->comp->pm_features & BIT(GPU_PM_CLK_DIS))
>> +		clk_disable(pfdev->clock);
>> +	return ret;
>> +}
>> +
>> +static int panfrost_device_suspend(struct device *dev)
>> +{
>> +	struct panfrost_device *pfdev = dev_get_drvdata(dev);
>> +	int ret;
>> +
>> +	ret = pm_runtime_force_suspend(dev);
>> +	if (ret)
>> +		return ret;
> 
> So here we've started shutting down the GPU (pm_runtime_force_suspend
> eventually calls panfrost_gpu_power_off). But nothing here waits for the
> GPU to actually finish shutting down. If we're unlucky there's dirty
> data in the caches (or coherency which can snoop into the caches) so the
> GPU could be actively making bus cycles...
> 
>> +
>> +	if (pfdev->comp->pm_features & BIT(GPU_PM_CLK_DIS)) {
>> +		clk_disable(pfdev->clock);
> 
> ... until its clock goes and everything locks up.
> 
> Something should be waiting for the power down to complete. Either poll
> the L2_PWRTRANS_LO register to detect that the L2 is no longer
> transitioning, or wait for the GPU_IRQ_POWER_CHANGED_ALL interrupt to fire.
> 
> It would be good to test this with the system suspend doing the full
> power off, it should be safe so it would be a good stress test. Although
> whether we want the overhead in normal operation is another matter - so
> I suspect it should just be for testing purposes.
> 
> I would hope that we don't actually need the GPU_PM_CLK_DIS feature -
> this should work as long as the GPU is given the time to shutdown.
> Although note that actually cutting the power (patches 3 & 4) may expose
> us to implementation errata - there have been issues with designs not
> resetting correctly. I'm not sure if those made it into real products or
> if such bugs are confined to test chips. So for the sake of not causing
> regressions it's probably not a bad thing to have ;)
> 

Huge thanks for this analysis of that lockup issue. That was highly appreciated.

I've seen that anyway disabling the clocks during *runtime* suspend will make us
lose only a few nanoseconds (without polling for that register, nor waiting for
the interrupt you mentioned).... so I'd say that if L2_PWRTRANS_LO takes as well
just nanoseconds, I could put those clk_disable()/clk_enable() calls back to the
Runtime PM handlers as per my original idea.

I'll go on with checking if it is feasible to poll-wait to do this in runtime pm,
otherwise the v2 will still have this in system sleep handlers...

Anyway, as for the GPU_PM_CLK_DIS feature - I feel like being extremely careful
with this is still a good idea... thing is, even if we're sure that the GPU itself
is fine with us turning off/on clocks (even aggressively), I'm not sure that *all*
of the SoCs using Mali GPUs don't have any kind of quirk and for safety I don't
want to place any bets.

My idea is to add this with feature opt-in - then, if after some time we discover
that all SoCs want it and can safely use it, we can simplify the flow by removing
the feature bit.

Cheers,
Angelo

> Steve
> 
>> +
>> +		if (pfdev->bus_clock)
>> +			clk_disable(pfdev->bus_clock);
>> +	}
>> +
>> +	return 0;
>> +}
>> +
>> +EXPORT_GPL_DEV_PM_OPS(panfrost_pm_ops) = {
>> +	RUNTIME_PM_OPS(panfrost_device_runtime_suspend, panfrost_device_runtime_resume, NULL)
>> +	SYSTEM_SLEEP_PM_OPS(panfrost_device_suspend, panfrost_device_resume)
>> +};
>> diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h
>> index 1ef38f60d5dc..d7f179eb8ea3 100644
>> --- a/drivers/gpu/drm/panfrost/panfrost_device.h
>> +++ b/drivers/gpu/drm/panfrost/panfrost_device.h
>> @@ -25,6 +25,14 @@ struct panfrost_perfcnt;
>>   #define NUM_JOB_SLOTS 3
>>   #define MAX_PM_DOMAINS 5
>>   
>> +/**
>> + * enum panfrost_gpu_pm - Supported kernel power management features
>> + * @GPU_PM_CLK_DIS:  Allow disabling clocks during system suspend
>> + */
>> +enum panfrost_gpu_pm {
>> +	GPU_PM_CLK_DIS,
>> +};
>> +
>>   struct panfrost_features {
>>   	u16 id;
>>   	u16 revision;
>> @@ -75,6 +83,9 @@ struct panfrost_compatible {
>>   
>>   	/* Vendor implementation quirks callback */
>>   	void (*vendor_quirk)(struct panfrost_device *pfdev);
>> +
>> +	/* Allowed PM features */
>> +	u8 pm_features;
>>   };
>>   
>>   struct panfrost_device {
> 
> _______________________________________________
> Kernel mailing list -- kernel@mailman.collabora.com
> To unsubscribe send an email to kernel-leave@mailman.collabora.com

AngeloGioacchino Del Regno Oct. 31, 2023, 10:33 a.m. UTC | #4

Il 31/10/23 09:59, AngeloGioacchino Del Regno ha scritto:
> Il 30/10/23 15:57, Steven Price ha scritto:
>> On 30/10/2023 13:22, AngeloGioacchino Del Regno wrote:
>>> Currently, the GPU is being internally powered off for runtime suspend
>>> and turned back on for runtime resume through commands sent to it, but
>>> note that the GPU doesn't need to be clocked during the poweroff state,
>>> hence it is possible to save some power on selected platforms.
>>
>> Looks like a good addition - I suspect some implementations are quite
>> leaky so this could have a meaningful power saving in some cases.
>>
>>> Add suspend and resume handlers for full system sleep and then add
>>> a new panfrost_gpu_pm enumeration and a pm_features variable in the
>>> panfrost_compatible structure: BIT(GPU_PM_CLK_DIS) will be used to
>>> enable this power saving technique only on SoCs that are able to
>>> safely use it.
>>>
>>> Note that this was implemented only for the system sleep case and not
>>> for runtime PM because testing on one of my MediaTek platforms showed
>>> issues when turning on and off clocks aggressively (in PM runtime),
>>> with the GPU locking up and unable to soft reset, eventually resulting
>>> in a full system lockup.
>>
>> I think I know why you saw this - see below.
>>
>>> Doing this only for full system sleep never showed issues in 3 days
>>> of testing by suspending and resuming the system continuously.
>>>
>>> Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
>>> ---
>>>   drivers/gpu/drm/panfrost/panfrost_device.c | 61 ++++++++++++++++++++--
>>>   drivers/gpu/drm/panfrost/panfrost_device.h | 11 ++++
>>>   2 files changed, 68 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/panfrost/panfrost_device.c 
>>> b/drivers/gpu/drm/panfrost/panfrost_device.c
>>> index 28f7046e1b1a..2022ed76a620 100644
>>> --- a/drivers/gpu/drm/panfrost/panfrost_device.c
>>> +++ b/drivers/gpu/drm/panfrost/panfrost_device.c
>>> @@ -403,7 +403,7 @@ void panfrost_device_reset(struct panfrost_device *pfdev)
>>>       panfrost_job_enable_interrupts(pfdev);
>>>   }
>>> -static int panfrost_device_resume(struct device *dev)
>>> +static int panfrost_device_runtime_resume(struct device *dev)
>>>   {
>>>       struct panfrost_device *pfdev = dev_get_drvdata(dev);
>>> @@ -413,7 +413,7 @@ static int panfrost_device_resume(struct device *dev)
>>>       return 0;
>>>   }
>>> -static int panfrost_device_suspend(struct device *dev)
>>> +static int panfrost_device_runtime_suspend(struct device *dev)
>>>   {
>>>       struct panfrost_device *pfdev = dev_get_drvdata(dev);
>>
>> So this function calls panfrost_gpu_power_off() which is simply:
>>
>> void panfrost_gpu_power_off(struct panfrost_device *pfdev)
>> {
>>     gpu_write(pfdev, TILER_PWROFF_LO, 0);
>>     gpu_write(pfdev, SHADER_PWROFF_LO, 0);
>>     gpu_write(pfdev, L2_PWROFF_LO, 0);
>> }
>>
>> So we instruct the GPU to turn off, but don't wait for it to complete.
>>
>>> @@ -426,5 +426,58 @@ static int panfrost_device_suspend(struct device *dev)
>>>       return 0;
>>>   }
>>> -EXPORT_GPL_RUNTIME_DEV_PM_OPS(panfrost_pm_ops, panfrost_device_suspend,
>>> -                  panfrost_device_resume, NULL);
>>> +static int panfrost_device_resume(struct device *dev)
>>> +{
>>> +    struct panfrost_device *pfdev = dev_get_drvdata(dev);
>>> +    int ret;
>>> +
>>> +    if (pfdev->comp->pm_features & BIT(GPU_PM_CLK_DIS)) {
>>> +        ret = clk_enable(pfdev->clock);
>>> +        if (ret)
>>> +            return ret;
>>> +
>>> +        if (pfdev->bus_clock) {
>>> +            ret = clk_enable(pfdev->bus_clock);
>>> +            if (ret)
>>> +                goto err_bus_clk;
>>> +        }
>>> +    }
>>> +
>>> +    ret = pm_runtime_force_resume(dev);
>>> +    if (ret)
>>> +        goto err_resume;
>>> +
>>> +    return 0;
>>> +
>>> +err_resume:
>>> +    if (pfdev->comp->pm_features & BIT(GPU_PM_CLK_DIS) && pfdev->bus_clock)
>>> +        clk_disable(pfdev->bus_clock);
>>> +err_bus_clk:
>>> +    if (pfdev->comp->pm_features & BIT(GPU_PM_CLK_DIS))
>>> +        clk_disable(pfdev->clock);
>>> +    return ret;
>>> +}
>>> +
>>> +static int panfrost_device_suspend(struct device *dev)
>>> +{
>>> +    struct panfrost_device *pfdev = dev_get_drvdata(dev);
>>> +    int ret;
>>> +
>>> +    ret = pm_runtime_force_suspend(dev);
>>> +    if (ret)
>>> +        return ret;
>>
>> So here we've started shutting down the GPU (pm_runtime_force_suspend
>> eventually calls panfrost_gpu_power_off). But nothing here waits for the
>> GPU to actually finish shutting down. If we're unlucky there's dirty
>> data in the caches (or coherency which can snoop into the caches) so the
>> GPU could be actively making bus cycles...
>>
>>> +
>>> +    if (pfdev->comp->pm_features & BIT(GPU_PM_CLK_DIS)) {
>>> +        clk_disable(pfdev->clock);
>>
>> ... until its clock goes and everything locks up.
>>
>> Something should be waiting for the power down to complete. Either poll
>> the L2_PWRTRANS_LO register to detect that the L2 is no longer
>> transitioning, or wait for the GPU_IRQ_POWER_CHANGED_ALL interrupt to fire.
>>
>> It would be good to test this with the system suspend doing the full
>> power off, it should be safe so it would be a good stress test. Although
>> whether we want the overhead in normal operation is another matter - so
>> I suspect it should just be for testing purposes.
>>
>> I would hope that we don't actually need the GPU_PM_CLK_DIS feature -
>> this should work as long as the GPU is given the time to shutdown.
>> Although note that actually cutting the power (patches 3 & 4) may expose
>> us to implementation errata - there have been issues with designs not
>> resetting correctly. I'm not sure if those made it into real products or
>> if such bugs are confined to test chips. So for the sake of not causing
>> regressions it's probably not a bad thing to have ;)
>>
> 
> Huge thanks for this analysis of that lockup issue. That was highly appreciated.
> 
> I've seen that anyway disabling the clocks during *runtime* suspend will make us
> lose only a few nanoseconds (without polling for that register, nor waiting for
> the interrupt you mentioned).... so I'd say that if L2_PWRTRANS_LO takes as well
> just nanoseconds, I could put those clk_disable()/clk_enable() calls back to the
> Runtime PM handlers as per my original idea.
> 
> I'll go on with checking if it is feasible to poll-wait to do this in runtime pm,
> otherwise the v2 will still have this in system sleep handlers...
> 
> Anyway, as for the GPU_PM_CLK_DIS feature - I feel like being extremely careful
> with this is still a good idea... thing is, even if we're sure that the GPU itself
> is fine with us turning off/on clocks (even aggressively), I'm not sure that *all*
> of the SoCs using Mali GPUs don't have any kind of quirk and for safety I don't
> want to place any bets.
> 
> My idea is to add this with feature opt-in - then, if after some time we discover
> that all SoCs want it and can safely use it, we can simplify the flow by removing
> the feature bit.
> 

Sorry for the double email - after some analysis and some trials of your wait
solution, I've just seen that... well, panfrost_gpu_power_off() is, and has always
been entirely broken, as in it has never done any poweroff!

What it does is:

	gpu_write(pfdev, TILER_PWROFF_LO, 0);
	gpu_write(pfdev, SHADER_PWROFF_LO, 0);
	gpu_write(pfdev, L2_PWROFF_LO, 0);

...but the {TILER,SHADER,L2}_PWROFF_LO register is a bitmap and in order to request
poweroff of tiler/shader cores and cache we shall flip bits to 1, but this is doing
the *exact opposite* of what it's supposed to do.

It's doing nothing, at all.

I've just fixed that locally (running some tests on MT8195 as we speak) like so:

gpu_write(pfdev, TILER_PWROFF_LO, pfdev->features.tiler_present);
gpu_write(pfdev, SHADER_PWROFF_LO, pfdev->features.shader_present & core_mask);
gpu_write(pfdev, L2_PWROFF_LO, pfdev->features.l2_present & core_mask);

...and now it appears that I can actually manage clocks aggressively during runtime
power management without any side issues.

Apparently, v2 of this series will have "more juice" than initially intended...

Angelo

> Cheers,
> Angelo
> 
>> Steve
>>
>>> +
>>> +        if (pfdev->bus_clock)
>>> +            clk_disable(pfdev->bus_clock);
>>> +    }
>>> +
>>> +    return 0;
>>> +}
>>> +
>>> +EXPORT_GPL_DEV_PM_OPS(panfrost_pm_ops) = {
>>> +    RUNTIME_PM_OPS(panfrost_device_runtime_suspend, 
>>> panfrost_device_runtime_resume, NULL)
>>> +    SYSTEM_SLEEP_PM_OPS(panfrost_device_suspend, panfrost_device_resume)
>>> +};
>>> diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h 
>>> b/drivers/gpu/drm/panfrost/panfrost_device.h
>>> index 1ef38f60d5dc..d7f179eb8ea3 100644
>>> --- a/drivers/gpu/drm/panfrost/panfrost_device.h
>>> +++ b/drivers/gpu/drm/panfrost/panfrost_device.h
>>> @@ -25,6 +25,14 @@ struct panfrost_perfcnt;
>>>   #define NUM_JOB_SLOTS 3
>>>   #define MAX_PM_DOMAINS 5
>>> +/**
>>> + * enum panfrost_gpu_pm - Supported kernel power management features
>>> + * @GPU_PM_CLK_DIS:  Allow disabling clocks during system suspend
>>> + */
>>> +enum panfrost_gpu_pm {
>>> +    GPU_PM_CLK_DIS,
>>> +};
>>> +
>>>   struct panfrost_features {
>>>       u16 id;
>>>       u16 revision;
>>> @@ -75,6 +83,9 @@ struct panfrost_compatible {
>>>       /* Vendor implementation quirks callback */
>>>       void (*vendor_quirk)(struct panfrost_device *pfdev);
>>> +
>>> +    /* Allowed PM features */
>>> +    u8 pm_features;
>>>   };
>>>   struct panfrost_device {
>>

AngeloGioacchino Del Regno Oct. 31, 2023, 1:20 p.m. UTC | #5

Il 31/10/23 04:18, Chen-Yu Tsai ha scritto:
> On Mon, Oct 30, 2023 at 9:23 PM AngeloGioacchino Del Regno
> <angelogioacchino.delregno@collabora.com> wrote:
>>
>> Currently, the GPU is being internally powered off for runtime suspend
>> and turned back on for runtime resume through commands sent to it, but
>> note that the GPU doesn't need to be clocked during the poweroff state,
>> hence it is possible to save some power on selected platforms.
>>
>> Add suspend and resume handlers for full system sleep and then add
>> a new panfrost_gpu_pm enumeration and a pm_features variable in the
>> panfrost_compatible structure: BIT(GPU_PM_CLK_DIS) will be used to
>> enable this power saving technique only on SoCs that are able to
>> safely use it.
>>
>> Note that this was implemented only for the system sleep case and not
>> for runtime PM because testing on one of my MediaTek platforms showed
>> issues when turning on and off clocks aggressively (in PM runtime),
>> with the GPU locking up and unable to soft reset, eventually resulting
>> in a full system lockup.
>>
>> Doing this only for full system sleep never showed issues in 3 days
>> of testing by suspending and resuming the system continuously.
>>
>> Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
>> ---
>>   drivers/gpu/drm/panfrost/panfrost_device.c | 61 ++++++++++++++++++++--
>>   drivers/gpu/drm/panfrost/panfrost_device.h | 11 ++++
>>   2 files changed, 68 insertions(+), 4 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/panfrost/panfrost_device.c b/drivers/gpu/drm/panfrost/panfrost_device.c
>> index 28f7046e1b1a..2022ed76a620 100644
>> --- a/drivers/gpu/drm/panfrost/panfrost_device.c
>> +++ b/drivers/gpu/drm/panfrost/panfrost_device.c
>> @@ -403,7 +403,7 @@ void panfrost_device_reset(struct panfrost_device *pfdev)
>>          panfrost_job_enable_interrupts(pfdev);
>>   }
>>
>> -static int panfrost_device_resume(struct device *dev)
>> +static int panfrost_device_runtime_resume(struct device *dev)
>>   {
>>          struct panfrost_device *pfdev = dev_get_drvdata(dev);
>>
>> @@ -413,7 +413,7 @@ static int panfrost_device_resume(struct device *dev)
>>          return 0;
>>   }
>>
>> -static int panfrost_device_suspend(struct device *dev)
>> +static int panfrost_device_runtime_suspend(struct device *dev)
>>   {
>>          struct panfrost_device *pfdev = dev_get_drvdata(dev);
>>
>> @@ -426,5 +426,58 @@ static int panfrost_device_suspend(struct device *dev)
>>          return 0;
>>   }
>>
>> -EXPORT_GPL_RUNTIME_DEV_PM_OPS(panfrost_pm_ops, panfrost_device_suspend,
>> -                             panfrost_device_resume, NULL);
>> +static int panfrost_device_resume(struct device *dev)
>> +{
>> +       struct panfrost_device *pfdev = dev_get_drvdata(dev);
>> +       int ret;
>> +
>> +       if (pfdev->comp->pm_features & BIT(GPU_PM_CLK_DIS)) {
>> +               ret = clk_enable(pfdev->clock);
>> +               if (ret)
>> +                       return ret;
>> +
>> +               if (pfdev->bus_clock) {
>> +                       ret = clk_enable(pfdev->bus_clock);
>> +                       if (ret)
>> +                               goto err_bus_clk;
>> +               }
>> +       }
>> +
>> +       ret = pm_runtime_force_resume(dev);
>> +       if (ret)
>> +               goto err_resume;
>> +
>> +       return 0;
>> +
>> +err_resume:
>> +       if (pfdev->comp->pm_features & BIT(GPU_PM_CLK_DIS) && pfdev->bus_clock)
>> +               clk_disable(pfdev->bus_clock);
>> +err_bus_clk:
>> +       if (pfdev->comp->pm_features & BIT(GPU_PM_CLK_DIS))
>> +               clk_disable(pfdev->clock);
>> +       return ret;
>> +}
>> +
>> +static int panfrost_device_suspend(struct device *dev)
>> +{
>> +       struct panfrost_device *pfdev = dev_get_drvdata(dev);
>> +       int ret;
>> +
>> +       ret = pm_runtime_force_suspend(dev);
>> +       if (ret)
>> +               return ret;
>> +
>> +       if (pfdev->comp->pm_features & BIT(GPU_PM_CLK_DIS)) {
>> +               clk_disable(pfdev->clock);
>> +
>> +               if (pfdev->bus_clock)
>> +                       clk_disable(pfdev->bus_clock);
>> +       }
>> +
>> +       return 0;
>> +}
>> +
>> +EXPORT_GPL_DEV_PM_OPS(panfrost_pm_ops) = {
>> +       RUNTIME_PM_OPS(panfrost_device_runtime_suspend, panfrost_device_runtime_resume, NULL)
>> +       SYSTEM_SLEEP_PM_OPS(panfrost_device_suspend, panfrost_device_resume)
>> +};
>> diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h
>> index 1ef38f60d5dc..d7f179eb8ea3 100644
>> --- a/drivers/gpu/drm/panfrost/panfrost_device.h
>> +++ b/drivers/gpu/drm/panfrost/panfrost_device.h
>> @@ -25,6 +25,14 @@ struct panfrost_perfcnt;
>>   #define NUM_JOB_SLOTS 3
>>   #define MAX_PM_DOMAINS 5
>>
>> +/**
>> + * enum panfrost_gpu_pm - Supported kernel power management features
>> + * @GPU_PM_CLK_DIS:  Allow disabling clocks during system suspend
>> + */
>> +enum panfrost_gpu_pm {
>> +       GPU_PM_CLK_DIS,
>> +};
>> +
>>   struct panfrost_features {
>>          u16 id;
>>          u16 revision;
>> @@ -75,6 +83,9 @@ struct panfrost_compatible {
>>
>>          /* Vendor implementation quirks callback */
>>          void (*vendor_quirk)(struct panfrost_device *pfdev);
>> +
>> +       /* Allowed PM features */
>> +       u8 pm_features;
> 
> Nit: I'd just use bitfields. They are easier to set and get without
> extra macros, and the naming would be self-explanatory. Unless you
> expect a need to do mask checking (though the compiler might be able
> to optimize this).
> 

I don't expect a need to do mask checking, but I don't expect the opposite either..
...this could happen in the future, or maybe not, and this becomes a bool, even.

That's why I went with a u8 :-)

Let's keep it flexible.

Thanks,
Angelo

> ChenYu
> 
>>   };
>>
>>   struct panfrost_device {
>> --
>> 2.42.0
>>

Steven Price Nov. 1, 2023, 11:28 a.m. UTC | #6

On 31/10/2023 10:33, AngeloGioacchino Del Regno wrote:
<snip>
>> Anyway, as for the GPU_PM_CLK_DIS feature - I feel like being
>> extremely careful
>> with this is still a good idea... thing is, even if we're sure that
>> the GPU itself
>> is fine with us turning off/on clocks (even aggressively), I'm not
>> sure that *all*
>> of the SoCs using Mali GPUs don't have any kind of quirk and for
>> safety I don't
>> want to place any bets.
>>
>> My idea is to add this with feature opt-in - then, if after some time
>> we discover
>> that all SoCs want it and can safely use it, we can simplify the flow
>> by removing
>> the feature bit.

Yeah I agree it's best to start with opt-in that way we can avoid
regressions and focus the changes on platforms where this matters.

> 
> Sorry for the double email - after some analysis and some trials of your
> wait
> solution, I've just seen that... well, panfrost_gpu_power_off() is, and
> has always
> been entirely broken, as in it has never done any poweroff!
> 
> What it does is:
> 
>     gpu_write(pfdev, TILER_PWROFF_LO, 0);
>     gpu_write(pfdev, SHADER_PWROFF_LO, 0);
>     gpu_write(pfdev, L2_PWROFF_LO, 0);
> 
> ...but the {TILER,SHADER,L2}_PWROFF_LO register is a bitmap and in order
> to request
> poweroff of tiler/shader cores and cache we shall flip bits to 1, but
> this is doing
> the *exact opposite* of what it's supposed to do.
> 
> It's doing nothing, at all.

Doh! I'd looked at that function when replying to your email and still
not spotted that it is broken as you point out!

I guess I always get a little distracted by the fact that it's
technically "broken" in two other ways: first only the _LO registers are
used (but equally there are no implementations with > 32 cores so this
doesn't matter) and secondly we shouldn't really trigger the L2 power
off while the tiler/shader are powering down. Although it doesn't matter
here because the L2 power down will coordinate with the tiler and shader
and do the right thing. In reality a single write is sufficient as the
L2 power down will trigger the dependent cores to power down:

	gpu_write(pfdev, L2_PWROFF_LO, pfdev->features.l2_present);

> I've just fixed that locally (running some tests on MT8195 as we speak)
> like so:
> 
> gpu_write(pfdev, TILER_PWROFF_LO, pfdev->features.tiler_present);
> gpu_write(pfdev, SHADER_PWROFF_LO, pfdev->features.shader_present &
> core_mask);
> gpu_write(pfdev, L2_PWROFF_LO, pfdev->features.l2_present & core_mask);

But this should be fine too - as above the L2 transition will just wait.

Please can you include a fix (as a separate patch) for that in your next
posting? I think that should be worthy of a backport.

> ...and now it appears that I can actually manage clocks aggressively
> during runtime
> power management without any side issues.
> 
> Apparently, v2 of this series will have "more juice" than initially
> intended...

Thanks for looking in to this!

Thanks,

Steve

diff mbox series

Patch

diff --git a/drivers/gpu/drm/panfrost/panfrost_device.c b/drivers/gpu/drm/panfrost/panfrost_device.c
index 28f7046e1b1a..2022ed76a620 100644
--- a/drivers/gpu/drm/panfrost/panfrost_device.c
+++ b/drivers/gpu/drm/panfrost/panfrost_device.c
@@ -403,7 +403,7 @@  void panfrost_device_reset(struct panfrost_device *pfdev)
 	panfrost_job_enable_interrupts(pfdev);
 }
 
-static int panfrost_device_resume(struct device *dev)
+static int panfrost_device_runtime_resume(struct device *dev)
 {
 	struct panfrost_device *pfdev = dev_get_drvdata(dev);
 
@@ -413,7 +413,7 @@  static int panfrost_device_resume(struct device *dev)
 	return 0;
 }
 
-static int panfrost_device_suspend(struct device *dev)
+static int panfrost_device_runtime_suspend(struct device *dev)
 {
 	struct panfrost_device *pfdev = dev_get_drvdata(dev);
 
@@ -426,5 +426,58 @@  static int panfrost_device_suspend(struct device *dev)
 	return 0;
 }
 
-EXPORT_GPL_RUNTIME_DEV_PM_OPS(panfrost_pm_ops, panfrost_device_suspend,
-			      panfrost_device_resume, NULL);
+static int panfrost_device_resume(struct device *dev)
+{
+	struct panfrost_device *pfdev = dev_get_drvdata(dev);
+	int ret;
+
+	if (pfdev->comp->pm_features & BIT(GPU_PM_CLK_DIS)) {
+		ret = clk_enable(pfdev->clock);
+		if (ret)
+			return ret;
+
+		if (pfdev->bus_clock) {
+			ret = clk_enable(pfdev->bus_clock);
+			if (ret)
+				goto err_bus_clk;
+		}
+	}
+
+	ret = pm_runtime_force_resume(dev);
+	if (ret)
+		goto err_resume;
+
+	return 0;
+
+err_resume:
+	if (pfdev->comp->pm_features & BIT(GPU_PM_CLK_DIS) && pfdev->bus_clock)
+		clk_disable(pfdev->bus_clock);
+err_bus_clk:
+	if (pfdev->comp->pm_features & BIT(GPU_PM_CLK_DIS))
+		clk_disable(pfdev->clock);
+	return ret;
+}
+
+static int panfrost_device_suspend(struct device *dev)
+{
+	struct panfrost_device *pfdev = dev_get_drvdata(dev);
+	int ret;
+
+	ret = pm_runtime_force_suspend(dev);
+	if (ret)
+		return ret;
+
+	if (pfdev->comp->pm_features & BIT(GPU_PM_CLK_DIS)) {
+		clk_disable(pfdev->clock);
+
+		if (pfdev->bus_clock)
+			clk_disable(pfdev->bus_clock);
+	}
+
+	return 0;
+}
+
+EXPORT_GPL_DEV_PM_OPS(panfrost_pm_ops) = {
+	RUNTIME_PM_OPS(panfrost_device_runtime_suspend, panfrost_device_runtime_resume, NULL)
+	SYSTEM_SLEEP_PM_OPS(panfrost_device_suspend, panfrost_device_resume)
+};
diff --git a/drivers/gpu/drm/panfrost/panfrost_device.h b/drivers/gpu/drm/panfrost/panfrost_device.h
index 1ef38f60d5dc..d7f179eb8ea3 100644
--- a/drivers/gpu/drm/panfrost/panfrost_device.h
+++ b/drivers/gpu/drm/panfrost/panfrost_device.h
@@ -25,6 +25,14 @@  struct panfrost_perfcnt;
 #define NUM_JOB_SLOTS 3
 #define MAX_PM_DOMAINS 5
 
+/**
+ * enum panfrost_gpu_pm - Supported kernel power management features
+ * @GPU_PM_CLK_DIS:  Allow disabling clocks during system suspend
+ */
+enum panfrost_gpu_pm {
+	GPU_PM_CLK_DIS,
+};
+
 struct panfrost_features {
 	u16 id;
 	u16 revision;
@@ -75,6 +83,9 @@  struct panfrost_compatible {
 
 	/* Vendor implementation quirks callback */
 	void (*vendor_quirk)(struct panfrost_device *pfdev);
+
+	/* Allowed PM features */
+	u8 pm_features;
 };
 
 struct panfrost_device {