[RESEND,v6,09/11] drm/mediatek: gamma: Add support for 12-bit LUT and MT8195

Message ID 20230727094633.22505-10-angelogioacchino.delregno@collabora.com
State New
Headers
Series MediaTek DDP GAMMA - 12-bit LUT support |

Commit Message

AngeloGioacchino Del Regno July 27, 2023, 9:46 a.m. UTC
  Add support for 12-bit gamma lookup tables and introduce the first
user for it: MT8195.
While at it, also reorder the variables in mtk_gamma_set_common()
and rename `lut_base` to `lut0_base` to improve readability.

Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Reviewed-by: Jason-JH.Lin <jason-jh.lin@mediatek.com>
---
 drivers/gpu/drm/mediatek/mtk_disp_gamma.c | 61 ++++++++++++++++++-----
 1 file changed, 48 insertions(+), 13 deletions(-)
  

Comments

Alexandre Mergnat July 27, 2023, 11:03 a.m. UTC | #1
Hi Angelo !

On 27/07/2023 11:46, AngeloGioacchino Del Regno wrote:
> Add support for 12-bit gamma lookup tables and introduce the first
> user for it: MT8195.
> While at it, also reorder the variables in mtk_gamma_set_common()
> and rename `lut_base` to `lut0_base` to improve readability.
> 
> Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
> Reviewed-by: Jason-JH.Lin <jason-jh.lin@mediatek.com>
> ---
>   drivers/gpu/drm/mediatek/mtk_disp_gamma.c | 61 ++++++++++++++++++-----
>   1 file changed, 48 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/gpu/drm/mediatek/mtk_disp_gamma.c b/drivers/gpu/drm/mediatek/mtk_disp_gamma.c
> index f1a0b18b6c1a..e0e2d2bdbf59 100644
> --- a/drivers/gpu/drm/mediatek/mtk_disp_gamma.c
> +++ b/drivers/gpu/drm/mediatek/mtk_disp_gamma.c
> @@ -27,12 +27,20 @@
>   #define DISP_GAMMA_SIZE_VSIZE				GENMASK(12, 0)
>   #define DISP_GAMMA_BANK				0x0100
>   #define DISP_GAMMA_BANK_BANK				GENMASK(1, 0)
> +#define DISP_GAMMA_BANK_DATA_MODE			BIT(2)
>   #define DISP_GAMMA_LUT				0x0700
> +#define DISP_GAMMA_LUT1				0x0b00

Is this offset generic to all MTK SoC which support this driver ?

>   
> +/* For 10 bit LUT layout, R/G/B are in the same register */
>   #define DISP_GAMMA_LUT_10BIT_R			GENMASK(29, 20)
>   #define DISP_GAMMA_LUT_10BIT_G			GENMASK(19, 10)
>   #define DISP_GAMMA_LUT_10BIT_B			GENMASK(9, 0)
>   
> +/* For 12 bit LUT layout, R/G are in LUT, B is in LUT1 */

As I understood from the application processor registers (v0.4), R/G are 
in LUT, B is in LUT1 for 10bit and 12bit for MT8195. Can you check 
please to be sure ?

> +#define DISP_GAMMA_LUT_12BIT_R			GENMASK(11, 0)
> +#define DISP_GAMMA_LUT_12BIT_G			GENMASK(23, 12)
> +#define DISP_GAMMA_LUT_12BIT_B			GENMASK(11, 0)
> +
>   #define LUT_10BIT_MASK				0x03ff
>   #define LUT_BITS_DEFAULT			10
>   #define LUT_SIZE_DEFAULT			512
> @@ -83,14 +91,15 @@ unsigned int mtk_gamma_get_lut_size(struct device *dev)
>   void mtk_gamma_set_common(struct device *dev, void __iomem *regs, struct drm_crtc_state *state)
>   {
>   	struct mtk_disp_gamma *gamma = dev_get_drvdata(dev);
> -	unsigned int i;
> +	void __iomem *lut0_base = regs + DISP_GAMMA_LUT;
> +	void __iomem *lut1_base = regs + DISP_GAMMA_LUT1;
> +	u32 cfg_val, data_mode, lbank_val, word[2];
> +	int cur_bank, num_lut_banks;
> +	u16 lut_bank_size, lut_size;
>   	struct drm_color_lut *lut;
> -	void __iomem *lut_base;
> +	unsigned int i;
>   	bool lut_diff;
> -	u16 lut_bank_size, lut_size;
>   	u8 lut_bits;
> -	u32 cfg_val, lbank_val, word;
> -	int cur_bank, num_lut_banks;
>   
>   	/* If there's no gamma lut there's nothing to do here. */
>   	if (!state->gamma_lut)
> @@ -110,14 +119,17 @@ void mtk_gamma_set_common(struct device *dev, void __iomem *regs, struct drm_crt
>   	num_lut_banks = lut_size / lut_bank_size;
>   
>   	cfg_val = readl(regs + DISP_GAMMA_CFG);
> -	lut_base = regs + DISP_GAMMA_LUT;
>   	lut = (struct drm_color_lut *)state->gamma_lut->data;
>   
> +	/* Switch to 12 bits data mode if supported */
> +	data_mode = FIELD_PREP(DISP_GAMMA_BANK_DATA_MODE, !!(lut_bits == 12));
> +
>   	for (cur_bank = 0; cur_bank < num_lut_banks; cur_bank++) {
>   
>   		/* Switch gamma bank and set data mode before writing LUT */
>   		if (num_lut_banks > 1) {
>   			lbank_val = FIELD_PREP(DISP_GAMMA_BANK_BANK, cur_bank);
> +			lbank_val |= data_mode;
>   			writel(lbank_val, regs + DISP_GAMMA_BANK);
>   		}
>   
> @@ -130,9 +142,15 @@ void mtk_gamma_set_common(struct device *dev, void __iomem *regs, struct drm_crt
>   			hwlut.blue = drm_color_lut_extract(lut[n].blue, lut_bits);
>   
>   			if (!lut_diff || (i % 2 == 0)) {
> -				word = FIELD_PREP(DISP_GAMMA_LUT_10BIT_R, hwlut.red);
> -				word |= FIELD_PREP(DISP_GAMMA_LUT_10BIT_G, hwlut.green);
> -				word |= FIELD_PREP(DISP_GAMMA_LUT_10BIT_B, hwlut.blue);
> +				if (lut_bits == 12) {
> +					word[0] = FIELD_PREP(DISP_GAMMA_LUT_12BIT_R, hwlut.red);
> +					word[0] |= FIELD_PREP(DISP_GAMMA_LUT_12BIT_G, hwlut.green);
> +					word[1] = FIELD_PREP(DISP_GAMMA_LUT_12BIT_B, hwlut.blue);
> +				} else {
> +					word[0] = FIELD_PREP(DISP_GAMMA_LUT_10BIT_R, hwlut.red);
> +					word[0] |= FIELD_PREP(DISP_GAMMA_LUT_10BIT_G, hwlut.green);
> +					word[0] |= FIELD_PREP(DISP_GAMMA_LUT_10BIT_B, hwlut.blue);
> +				}
>   			} else {
>   				diff.red = lut[n].red - lut[n - 1].red;
>   				diff.red = drm_color_lut_extract(diff.red, lut_bits);
> @@ -143,11 +161,19 @@ void mtk_gamma_set_common(struct device *dev, void __iomem *regs, struct drm_crt
>   				diff.blue = lut[n].blue - lut[n - 1].blue;
>   				diff.blue = drm_color_lut_extract(diff.blue, lut_bits);
>   
> -				word = FIELD_PREP(DISP_GAMMA_LUT_10BIT_R, diff.red);
> -				word |= FIELD_PREP(DISP_GAMMA_LUT_10BIT_G, diff.green);
> -				word |= FIELD_PREP(DISP_GAMMA_LUT_10BIT_B, diff.blue);
> +				if (lut_bits == 12) {
> +					word[0] = FIELD_PREP(DISP_GAMMA_LUT_12BIT_R, diff.red);
> +					word[0] |= FIELD_PREP(DISP_GAMMA_LUT_12BIT_G, diff.green);
> +					word[1] = FIELD_PREP(DISP_GAMMA_LUT_12BIT_B, diff.blue);
> +				} else {
> +					word[0] = FIELD_PREP(DISP_GAMMA_LUT_10BIT_R, diff.red);
> +					word[0] |= FIELD_PREP(DISP_GAMMA_LUT_10BIT_G, diff.green);
> +					word[0] |= FIELD_PREP(DISP_GAMMA_LUT_10BIT_B, diff.blue);
> +				}
>   			}
> -			writel(word, (lut_base + i * 4));
> +			writel(word[0], (lut0_base + i * 4));
> +			if (lut_bits == 12)
> +				writel(word[1], (lut1_base + i * 4));

ditto

>   		}
>   	}
>   
> @@ -271,11 +297,20 @@ static const struct mtk_disp_gamma_data mt8183_gamma_driver_data = {
>   	.lut_size = 512,
>   };
>   
> +static const struct mtk_disp_gamma_data mt8195_gamma_driver_data = {
> +	.lut_bank_size = 256,
> +	.lut_bits = 12,

If I'm right, ".lut_bits = 10" will not work properly.

> +	.lut_diff = true,
> +	.lut_size = 1024,
> +};
> +
>   static const struct of_device_id mtk_disp_gamma_driver_dt_match[] = {
>   	{ .compatible = "mediatek,mt8173-disp-gamma",
>   	  .data = &mt8173_gamma_driver_data},
>   	{ .compatible = "mediatek,mt8183-disp-gamma",
>   	  .data = &mt8183_gamma_driver_data},
> +	{ .compatible = "mediatek,mt8195-disp-gamma",
> +	  .data = &mt8195_gamma_driver_data},
>   	{},
>   };
>   MODULE_DEVICE_TABLE(of, mtk_disp_gamma_driver_dt_match);
  
AngeloGioacchino Del Regno July 27, 2023, 1:06 p.m. UTC | #2
Il 27/07/23 13:03, Alexandre Mergnat ha scritto:
> Hi Angelo !
> 
> On 27/07/2023 11:46, AngeloGioacchino Del Regno wrote:
>> Add support for 12-bit gamma lookup tables and introduce the first
>> user for it: MT8195.
>> While at it, also reorder the variables in mtk_gamma_set_common()
>> and rename `lut_base` to `lut0_base` to improve readability.
>>
>> Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
>> Reviewed-by: Jason-JH.Lin <jason-jh.lin@mediatek.com>
>> ---
>>   drivers/gpu/drm/mediatek/mtk_disp_gamma.c | 61 ++++++++++++++++++-----
>>   1 file changed, 48 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/mediatek/mtk_disp_gamma.c 
>> b/drivers/gpu/drm/mediatek/mtk_disp_gamma.c
>> index f1a0b18b6c1a..e0e2d2bdbf59 100644
>> --- a/drivers/gpu/drm/mediatek/mtk_disp_gamma.c
>> +++ b/drivers/gpu/drm/mediatek/mtk_disp_gamma.c
>> @@ -27,12 +27,20 @@
>>   #define DISP_GAMMA_SIZE_VSIZE                GENMASK(12, 0)
>>   #define DISP_GAMMA_BANK                0x0100
>>   #define DISP_GAMMA_BANK_BANK                GENMASK(1, 0)
>> +#define DISP_GAMMA_BANK_DATA_MODE            BIT(2)
>>   #define DISP_GAMMA_LUT                0x0700
>> +#define DISP_GAMMA_LUT1                0x0b00
> 
> Is this offset generic to all MTK SoC which support this driver ?
> 
>> +/* For 10 bit LUT layout, R/G/B are in the same register */
>>   #define DISP_GAMMA_LUT_10BIT_R            GENMASK(29, 20)
>>   #define DISP_GAMMA_LUT_10BIT_G            GENMASK(19, 10)
>>   #define DISP_GAMMA_LUT_10BIT_B            GENMASK(9, 0)
>> +/* For 12 bit LUT layout, R/G are in LUT, B is in LUT1 */
> 
> As I understood from the application processor registers (v0.4), R/G are in LUT, B 
> is in LUT1 for 10bit and 12bit for MT8195. Can you check please to be sure ?
> 

That's right, but here I'm implying that 10-bit LUT is only for older SoCs, and
all of them have got the same register layout with one LUT register for R, G, B,
while all the new SoCs, which have got 12-bits LUT support, have got the new
register layout with two LUT registers (and multiple banks).
Infact, the MT8195 SoC was added here with 12-bits LUT support only (as the LUT
parameters extraction is easily handled by the drm_color_lut_extract() function).

The alternative would've been to add two compatibles, like
"mediatek,mt8195-disp-gamma-10bits" and "mediatek,mt8195-disp-gamma-12bits",
or a boolean property like "mediatek,lut-12bits" which would appear literally
everywhere starting from a certain point in time (since there's no reason to
use 10-bits LUT on MT8195, that starts now!).

Even then, consider the complication in code, where mtk_gamma_set_common()
would have to handle:
- 10-bits, layout A
- 10-bits, layout B -> but fallback to layout A if this is AAL
- 12-bits layout

is_aal = !(gamma && gamma->data);

for_each_bank()
{
	if (num_lut_banks > 1) write_num_bank();

	for (i = 0; i < lut_bank_size; i++) {
		.......

		if (!lut_diff || (i % 2 == 0)) {
			if (lut_bits == 12 || (lut_bits == 10 && layout_b)) {
				... setup word[0],[1] ...
			} else if (layout_b && !is_aal) {
				...setup word[0],[1]...
			} else {
				...setup word[0]
			}
		} else {
			 ^^^ almost repeat the same ^^^
		}
		writel(word[0], (...));
		if (lut_bits == 12 || (lut_bits == 10 && layout_b) && !is_aal)
			writel(word[i] (....));
	}
}

probe() {
	if (of_property_read_bool(dev->of_node, "mediatek,lut-12bits") ||
	    data->supports_only_12bits)
		priv->lut_bits = 12;
	else
		priv->lut_bits = 10;
}

...at least, that's the implementation that I would do to solve your concern,
which isn't *too bad*, but still, a big question arises here...


Why should we care about supporting *both* 10-bit and 12-bit Gamma LUTs on
the *same* SoC?


A 12-bit LUT gives us more precision and there's no penalty if we want to
convert a 10-bit LUT to a 12-bits one, as we're simply "ignoring" the value
of two bits per component (no expensive calculation involved)...

Is there anything that I'm underestimating here?

Cheers,
Angelo

>> +#define DISP_GAMMA_LUT_12BIT_R            GENMASK(11, 0)
>> +#define DISP_GAMMA_LUT_12BIT_G            GENMASK(23, 12)
>> +#define DISP_GAMMA_LUT_12BIT_B            GENMASK(11, 0)
>> +
>>   #define LUT_10BIT_MASK                0x03ff
>>   #define LUT_BITS_DEFAULT            10
>>   #define LUT_SIZE_DEFAULT            512
>> @@ -83,14 +91,15 @@ unsigned int mtk_gamma_get_lut_size(struct device *dev)
>>   void mtk_gamma_set_common(struct device *dev, void __iomem *regs, struct 
>> drm_crtc_state *state)
>>   {
>>       struct mtk_disp_gamma *gamma = dev_get_drvdata(dev);
>> -    unsigned int i;
>> +    void __iomem *lut0_base = regs + DISP_GAMMA_LUT;
>> +    void __iomem *lut1_base = regs + DISP_GAMMA_LUT1;
>> +    u32 cfg_val, data_mode, lbank_val, word[2];
>> +    int cur_bank, num_lut_banks;
>> +    u16 lut_bank_size, lut_size;
>>       struct drm_color_lut *lut;
>> -    void __iomem *lut_base;
>> +    unsigned int i;
>>       bool lut_diff;
>> -    u16 lut_bank_size, lut_size;
>>       u8 lut_bits;
>> -    u32 cfg_val, lbank_val, word;
>> -    int cur_bank, num_lut_banks;
>>       /* If there's no gamma lut there's nothing to do here. */
>>       if (!state->gamma_lut)
>> @@ -110,14 +119,17 @@ void mtk_gamma_set_common(struct device *dev, void __iomem 
>> *regs, struct drm_crt
>>       num_lut_banks = lut_size / lut_bank_size;
>>       cfg_val = readl(regs + DISP_GAMMA_CFG);
>> -    lut_base = regs + DISP_GAMMA_LUT;
>>       lut = (struct drm_color_lut *)state->gamma_lut->data;
>> +    /* Switch to 12 bits data mode if supported */
>> +    data_mode = FIELD_PREP(DISP_GAMMA_BANK_DATA_MODE, !!(lut_bits == 12));
>> +
>>       for (cur_bank = 0; cur_bank < num_lut_banks; cur_bank++) {
>>           /* Switch gamma bank and set data mode before writing LUT */
>>           if (num_lut_banks > 1) {
>>               lbank_val = FIELD_PREP(DISP_GAMMA_BANK_BANK, cur_bank);
>> +            lbank_val |= data_mode;
>>               writel(lbank_val, regs + DISP_GAMMA_BANK);
>>           }
>> @@ -130,9 +142,15 @@ void mtk_gamma_set_common(struct device *dev, void __iomem 
>> *regs, struct drm_crt
>>               hwlut.blue = drm_color_lut_extract(lut[n].blue, lut_bits);
>>               if (!lut_diff || (i % 2 == 0)) {
>> -                word = FIELD_PREP(DISP_GAMMA_LUT_10BIT_R, hwlut.red);
>> -                word |= FIELD_PREP(DISP_GAMMA_LUT_10BIT_G, hwlut.green);
>> -                word |= FIELD_PREP(DISP_GAMMA_LUT_10BIT_B, hwlut.blue);
>> +                if (lut_bits == 12) {
>> +                    word[0] = FIELD_PREP(DISP_GAMMA_LUT_12BIT_R, hwlut.red);
>> +                    word[0] |= FIELD_PREP(DISP_GAMMA_LUT_12BIT_G, hwlut.green);
>> +                    word[1] = FIELD_PREP(DISP_GAMMA_LUT_12BIT_B, hwlut.blue);
>> +                } else {
>> +                    word[0] = FIELD_PREP(DISP_GAMMA_LUT_10BIT_R, hwlut.red);
>> +                    word[0] |= FIELD_PREP(DISP_GAMMA_LUT_10BIT_G, hwlut.green);
>> +                    word[0] |= FIELD_PREP(DISP_GAMMA_LUT_10BIT_B, hwlut.blue);
>> +                }
>>               } else {
>>                   diff.red = lut[n].red - lut[n - 1].red;
>>                   diff.red = drm_color_lut_extract(diff.red, lut_bits);
>> @@ -143,11 +161,19 @@ void mtk_gamma_set_common(struct device *dev, void __iomem 
>> *regs, struct drm_crt
>>                   diff.blue = lut[n].blue - lut[n - 1].blue;
>>                   diff.blue = drm_color_lut_extract(diff.blue, lut_bits);
>> -                word = FIELD_PREP(DISP_GAMMA_LUT_10BIT_R, diff.red);
>> -                word |= FIELD_PREP(DISP_GAMMA_LUT_10BIT_G, diff.green);
>> -                word |= FIELD_PREP(DISP_GAMMA_LUT_10BIT_B, diff.blue);
>> +                if (lut_bits == 12) {
>> +                    word[0] = FIELD_PREP(DISP_GAMMA_LUT_12BIT_R, diff.red);
>> +                    word[0] |= FIELD_PREP(DISP_GAMMA_LUT_12BIT_G, diff.green);
>> +                    word[1] = FIELD_PREP(DISP_GAMMA_LUT_12BIT_B, diff.blue);
>> +                } else {
>> +                    word[0] = FIELD_PREP(DISP_GAMMA_LUT_10BIT_R, diff.red);
>> +                    word[0] |= FIELD_PREP(DISP_GAMMA_LUT_10BIT_G, diff.green);
>> +                    word[0] |= FIELD_PREP(DISP_GAMMA_LUT_10BIT_B, diff.blue);
>> +                }
>>               }
>> -            writel(word, (lut_base + i * 4));
>> +            writel(word[0], (lut0_base + i * 4));
>> +            if (lut_bits == 12)
>> +                writel(word[1], (lut1_base + i * 4));
> 
> ditto
> 
>>           }
>>       }
>> @@ -271,11 +297,20 @@ static const struct mtk_disp_gamma_data 
>> mt8183_gamma_driver_data = {
>>       .lut_size = 512,
>>   };
>> +static const struct mtk_disp_gamma_data mt8195_gamma_driver_data = {
>> +    .lut_bank_size = 256,
>> +    .lut_bits = 12,
> 
> If I'm right, ".lut_bits = 10" will not work properly.
> 
>> +    .lut_diff = true,
>> +    .lut_size = 1024,
>> +};
>> +
>>   static const struct of_device_id mtk_disp_gamma_driver_dt_match[] = {
>>       { .compatible = "mediatek,mt8173-disp-gamma",
>>         .data = &mt8173_gamma_driver_data},
>>       { .compatible = "mediatek,mt8183-disp-gamma",
>>         .data = &mt8183_gamma_driver_data},
>> +    { .compatible = "mediatek,mt8195-disp-gamma",
>> +      .data = &mt8195_gamma_driver_data},
>>       {},
>>   };
>>   MODULE_DEVICE_TABLE(of, mtk_disp_gamma_driver_dt_match);
>
  
Alexandre Mergnat July 28, 2023, 12:58 p.m. UTC | #3
Hi Angelo

On 27/07/2023 15:06, AngeloGioacchino Del Regno wrote:
>>> +/* For 10 bit LUT layout, R/G/B are in the same register */
>>>   #define DISP_GAMMA_LUT_10BIT_R            GENMASK(29, 20)
>>>   #define DISP_GAMMA_LUT_10BIT_G            GENMASK(19, 10)
>>>   #define DISP_GAMMA_LUT_10BIT_B            GENMASK(9, 0)
>>> +/* For 12 bit LUT layout, R/G are in LUT, B is in LUT1 */
>>
>> As I understood from the application processor registers (v0.4), R/G 
>> are in LUT, B is in LUT1 for 10bit and 12bit for MT8195. Can you check 
>> please to be sure ?
>>
> 
> That's right, but here I'm implying that 10-bit LUT is only for older 
> SoCs, and
> all of them have got the same register layout with one LUT register for 
> R, G, B,
> while all the new SoCs, which have got 12-bits LUT support, have got the 
> new
> register layout with two LUT registers (and multiple banks).
> Infact, the MT8195 SoC was added here with 12-bits LUT support only (as 
> the LUT
> parameters extraction is easily handled by the drm_color_lut_extract() 
> function).
> 
> The alternative would've been to add two compatibles, like
> "mediatek,mt8195-disp-gamma-10bits" and 
> "mediatek,mt8195-disp-gamma-12bits",
> or a boolean property like "mediatek,lut-12bits" which would appear 
> literally
> everywhere starting from a certain point in time (since there's no 
> reason to
> use 10-bits LUT on MT8195, that starts now!).
> 
> Even then, consider the complication in code, where mtk_gamma_set_common()
> would have to handle:
> - 10-bits, layout A
> - 10-bits, layout B -> but fallback to layout A if this is AAL
> - 12-bits layout
> 
> is_aal = !(gamma && gamma->data);
> 
> for_each_bank()
> {
>      if (num_lut_banks > 1) write_num_bank();
> 
>      for (i = 0; i < lut_bank_size; i++) {
>          .......
> 
>          if (!lut_diff || (i % 2 == 0)) {
>              if (lut_bits == 12 || (lut_bits == 10 && layout_b)) {
>                  ... setup word[0],[1] ...
>              } else if (layout_b && !is_aal) {
>                  ...setup word[0],[1]...
>              } else {
>                  ...setup word[0]
>              }
>          } else {
>               ^^^ almost repeat the same ^^^
>          }
>          writel(word[0], (...));
>          if (lut_bits == 12 || (lut_bits == 10 && layout_b) && !is_aal)
>              writel(word[i] (....));
>      }
> }
> 
> probe() {
>      if (of_property_read_bool(dev->of_node, "mediatek,lut-12bits") ||
>          data->supports_only_12bits)
>          priv->lut_bits = 12;
>      else
>          priv->lut_bits = 10;
> }
> 
> ...at least, that's the implementation that I would do to solve your 
> concern,
> which isn't *too bad*, but still, a big question arises here...
> 
> 
> Why should we care about supporting *both* 10-bit and 12-bit Gamma LUTs on
> the *same* SoC?
> 
> 
> A 12-bit LUT gives us more precision and there's no penalty if we want to
> convert a 10-bit LUT to a 12-bits one, as we're simply "ignoring" the value
> of two bits per component (no expensive calculation involved)...
> 
> Is there anything that I'm underestimating here?

Thanks for you explanation !
I think your choice is not bad, but it's not clear that MT8195 10 bit 
LUT isn't supported at all.
So, IMHO, the first solution is to support it like you explained it 
above, and the second solution is to add comment somewhere to clarify 
that driver doesn't support 10 bit LUT if the SoC is able to use 12 bit 
LUT, like MT8195 10 bit.

Is that relevant ? :D
  
AngeloGioacchino Del Regno July 31, 2023, 10:27 a.m. UTC | #4
Il 28/07/23 14:58, Alexandre Mergnat ha scritto:
> Hi Angelo
> 
> On 27/07/2023 15:06, AngeloGioacchino Del Regno wrote:
>>>> +/* For 10 bit LUT layout, R/G/B are in the same register */
>>>>   #define DISP_GAMMA_LUT_10BIT_R            GENMASK(29, 20)
>>>>   #define DISP_GAMMA_LUT_10BIT_G            GENMASK(19, 10)
>>>>   #define DISP_GAMMA_LUT_10BIT_B            GENMASK(9, 0)
>>>> +/* For 12 bit LUT layout, R/G are in LUT, B is in LUT1 */
>>>
>>> As I understood from the application processor registers (v0.4), R/G are in LUT, 
>>> B is in LUT1 for 10bit and 12bit for MT8195. Can you check please to be sure ?
>>>
>>
>> That's right, but here I'm implying that 10-bit LUT is only for older SoCs, and
>> all of them have got the same register layout with one LUT register for R, G, B,
>> while all the new SoCs, which have got 12-bits LUT support, have got the new
>> register layout with two LUT registers (and multiple banks).
>> Infact, the MT8195 SoC was added here with 12-bits LUT support only (as the LUT
>> parameters extraction is easily handled by the drm_color_lut_extract() function).
>>
>> The alternative would've been to add two compatibles, like
>> "mediatek,mt8195-disp-gamma-10bits" and "mediatek,mt8195-disp-gamma-12bits",
>> or a boolean property like "mediatek,lut-12bits" which would appear literally
>> everywhere starting from a certain point in time (since there's no reason to
>> use 10-bits LUT on MT8195, that starts now!).
>>
>> Even then, consider the complication in code, where mtk_gamma_set_common()
>> would have to handle:
>> - 10-bits, layout A
>> - 10-bits, layout B -> but fallback to layout A if this is AAL
>> - 12-bits layout
>>
>> is_aal = !(gamma && gamma->data);
>>
>> for_each_bank()
>> {
>>      if (num_lut_banks > 1) write_num_bank();
>>
>>      for (i = 0; i < lut_bank_size; i++) {
>>          .......
>>
>>          if (!lut_diff || (i % 2 == 0)) {
>>              if (lut_bits == 12 || (lut_bits == 10 && layout_b)) {
>>                  ... setup word[0],[1] ...
>>              } else if (layout_b && !is_aal) {
>>                  ...setup word[0],[1]...
>>              } else {
>>                  ...setup word[0]
>>              }
>>          } else {
>>               ^^^ almost repeat the same ^^^
>>          }
>>          writel(word[0], (...));
>>          if (lut_bits == 12 || (lut_bits == 10 && layout_b) && !is_aal)
>>              writel(word[i] (....));
>>      }
>> }
>>
>> probe() {
>>      if (of_property_read_bool(dev->of_node, "mediatek,lut-12bits") ||
>>          data->supports_only_12bits)
>>          priv->lut_bits = 12;
>>      else
>>          priv->lut_bits = 10;
>> }
>>
>> ...at least, that's the implementation that I would do to solve your concern,
>> which isn't *too bad*, but still, a big question arises here...
>>
>>
>> Why should we care about supporting *both* 10-bit and 12-bit Gamma LUTs on
>> the *same* SoC?
>>
>>
>> A 12-bit LUT gives us more precision and there's no penalty if we want to
>> convert a 10-bit LUT to a 12-bits one, as we're simply "ignoring" the value
>> of two bits per component (no expensive calculation involved)...
>>
>> Is there anything that I'm underestimating here?
> 
> Thanks for you explanation !
> I think your choice is not bad, but it's not clear that MT8195 10 bit LUT isn't 
> supported at all.
> So, IMHO, the first solution is to support it like you explained it above, and the 
> second solution is to add comment somewhere to clarify that driver doesn't support 
> 10 bit LUT if the SoC is able to use 12 bit LUT, like MT8195 10 bit.
> 
> Is that relevant ? :D
> 

Even though the same as whhat I'm doing here was already done before, as the
current 10-bits LUT support ignores 9-bits LUT support, I can add a comment to
the code:

/*
  * SoCs supporting 12-bits LUTs are using a new register layout that does
  * always support (by HW) both 12-bits and 10-bits LUT but, on those, we
  * ignore the support for 10-bits in this driver and always use 12-bits.
  *
  * Summarizing:
  * - SoC HW support 9/10-bits LUT only
  *   - Old register layout
  *     - 10-bits LUT supported
  *     - 9-bits LUT not supported
  * - SoC HW support both 10/12bits LUT
  *   - New register layout
  *    - 12-bits LUT supported
  *    - 10-its LUT not supported
  */

Where the SoCs supporting 9-bits and 10-bits: mt6795, 8173, 8192,others and
12-bits are 8195, 8186, others.. of course.

Would that work for you?

Regards,
Angelo
  
Alexandre Mergnat July 31, 2023, 11:57 a.m. UTC | #5
On 31/07/2023 12:27, AngeloGioacchino Del Regno wrote:
> Il 28/07/23 14:58, Alexandre Mergnat ha scritto:
>> Hi Angelo
>>
>> On 27/07/2023 15:06, AngeloGioacchino Del Regno wrote:
>>>>> +/* For 10 bit LUT layout, R/G/B are in the same register */
>>>>>   #define DISP_GAMMA_LUT_10BIT_R            GENMASK(29, 20)
>>>>>   #define DISP_GAMMA_LUT_10BIT_G            GENMASK(19, 10)
>>>>>   #define DISP_GAMMA_LUT_10BIT_B            GENMASK(9, 0)
>>>>> +/* For 12 bit LUT layout, R/G are in LUT, B is in LUT1 */
>>>>
>>>> As I understood from the application processor registers (v0.4), R/G 
>>>> are in LUT, B is in LUT1 for 10bit and 12bit for MT8195. Can you 
>>>> check please to be sure ?
>>>>
>>>
>>> That's right, but here I'm implying that 10-bit LUT is only for older 
>>> SoCs, and
>>> all of them have got the same register layout with one LUT register 
>>> for R, G, B,
>>> while all the new SoCs, which have got 12-bits LUT support, have got 
>>> the new
>>> register layout with two LUT registers (and multiple banks).
>>> Infact, the MT8195 SoC was added here with 12-bits LUT support only 
>>> (as the LUT
>>> parameters extraction is easily handled by the 
>>> drm_color_lut_extract() function).
>>>
>>> The alternative would've been to add two compatibles, like
>>> "mediatek,mt8195-disp-gamma-10bits" and 
>>> "mediatek,mt8195-disp-gamma-12bits",
>>> or a boolean property like "mediatek,lut-12bits" which would appear 
>>> literally
>>> everywhere starting from a certain point in time (since there's no 
>>> reason to
>>> use 10-bits LUT on MT8195, that starts now!).
>>>
>>> Even then, consider the complication in code, where 
>>> mtk_gamma_set_common()
>>> would have to handle:
>>> - 10-bits, layout A
>>> - 10-bits, layout B -> but fallback to layout A if this is AAL
>>> - 12-bits layout
>>>
>>> is_aal = !(gamma && gamma->data);
>>>
>>> for_each_bank()
>>> {
>>>      if (num_lut_banks > 1) write_num_bank();
>>>
>>>      for (i = 0; i < lut_bank_size; i++) {
>>>          .......
>>>
>>>          if (!lut_diff || (i % 2 == 0)) {
>>>              if (lut_bits == 12 || (lut_bits == 10 && layout_b)) {
>>>                  ... setup word[0],[1] ...
>>>              } else if (layout_b && !is_aal) {
>>>                  ...setup word[0],[1]...
>>>              } else {
>>>                  ...setup word[0]
>>>              }
>>>          } else {
>>>               ^^^ almost repeat the same ^^^
>>>          }
>>>          writel(word[0], (...));
>>>          if (lut_bits == 12 || (lut_bits == 10 && layout_b) && !is_aal)
>>>              writel(word[i] (....));
>>>      }
>>> }
>>>
>>> probe() {
>>>      if (of_property_read_bool(dev->of_node, "mediatek,lut-12bits") ||
>>>          data->supports_only_12bits)
>>>          priv->lut_bits = 12;
>>>      else
>>>          priv->lut_bits = 10;
>>> }
>>>
>>> ...at least, that's the implementation that I would do to solve your 
>>> concern,
>>> which isn't *too bad*, but still, a big question arises here...
>>>
>>>
>>> Why should we care about supporting *both* 10-bit and 12-bit Gamma 
>>> LUTs on
>>> the *same* SoC?
>>>
>>>
>>> A 12-bit LUT gives us more precision and there's no penalty if we 
>>> want to
>>> convert a 10-bit LUT to a 12-bits one, as we're simply "ignoring" the 
>>> value
>>> of two bits per component (no expensive calculation involved)...
>>>
>>> Is there anything that I'm underestimating here?
>>
>> Thanks for you explanation !
>> I think your choice is not bad, but it's not clear that MT8195 10 bit 
>> LUT isn't supported at all.
>> So, IMHO, the first solution is to support it like you explained it 
>> above, and the second solution is to add comment somewhere to clarify 
>> that driver doesn't support 10 bit LUT if the SoC is able to use 12 
>> bit LUT, like MT8195 10 bit.
>>
>> Is that relevant ? :D
>>
> 
> Even though the same as whhat I'm doing here was already done before, as 
> the
> current 10-bits LUT support ignores 9-bits LUT support, I can add a 
> comment to
> the code:
> 
> /*
>   * SoCs supporting 12-bits LUTs are using a new register layout that does
>   * always support (by HW) both 12-bits and 10-bits LUT but, on those, we
>   * ignore the support for 10-bits in this driver and always use 12-bits.
>   *
>   * Summarizing:
>   * - SoC HW support 9/10-bits LUT only
>   *   - Old register layout
>   *     - 10-bits LUT supported
>   *     - 9-bits LUT not supported
>   * - SoC HW support both 10/12bits LUT
>   *   - New register layout
>   *    - 12-bits LUT supported
>   *    - 10-its LUT not supported
>   */
> 
> Where the SoCs supporting 9-bits and 10-bits: mt6795, 8173, 8192,others and
> 12-bits are 8195, 8186, others.. of course.
> 
> Would that work for you?

Sound good for me. After that:

Reviewed-by: Alexandre Mergnat <amergnat@baylibre.com>
  

Patch

diff --git a/drivers/gpu/drm/mediatek/mtk_disp_gamma.c b/drivers/gpu/drm/mediatek/mtk_disp_gamma.c
index f1a0b18b6c1a..e0e2d2bdbf59 100644
--- a/drivers/gpu/drm/mediatek/mtk_disp_gamma.c
+++ b/drivers/gpu/drm/mediatek/mtk_disp_gamma.c
@@ -27,12 +27,20 @@ 
 #define DISP_GAMMA_SIZE_VSIZE				GENMASK(12, 0)
 #define DISP_GAMMA_BANK				0x0100
 #define DISP_GAMMA_BANK_BANK				GENMASK(1, 0)
+#define DISP_GAMMA_BANK_DATA_MODE			BIT(2)
 #define DISP_GAMMA_LUT				0x0700
+#define DISP_GAMMA_LUT1				0x0b00
 
+/* For 10 bit LUT layout, R/G/B are in the same register */
 #define DISP_GAMMA_LUT_10BIT_R			GENMASK(29, 20)
 #define DISP_GAMMA_LUT_10BIT_G			GENMASK(19, 10)
 #define DISP_GAMMA_LUT_10BIT_B			GENMASK(9, 0)
 
+/* For 12 bit LUT layout, R/G are in LUT, B is in LUT1 */
+#define DISP_GAMMA_LUT_12BIT_R			GENMASK(11, 0)
+#define DISP_GAMMA_LUT_12BIT_G			GENMASK(23, 12)
+#define DISP_GAMMA_LUT_12BIT_B			GENMASK(11, 0)
+
 #define LUT_10BIT_MASK				0x03ff
 #define LUT_BITS_DEFAULT			10
 #define LUT_SIZE_DEFAULT			512
@@ -83,14 +91,15 @@  unsigned int mtk_gamma_get_lut_size(struct device *dev)
 void mtk_gamma_set_common(struct device *dev, void __iomem *regs, struct drm_crtc_state *state)
 {
 	struct mtk_disp_gamma *gamma = dev_get_drvdata(dev);
-	unsigned int i;
+	void __iomem *lut0_base = regs + DISP_GAMMA_LUT;
+	void __iomem *lut1_base = regs + DISP_GAMMA_LUT1;
+	u32 cfg_val, data_mode, lbank_val, word[2];
+	int cur_bank, num_lut_banks;
+	u16 lut_bank_size, lut_size;
 	struct drm_color_lut *lut;
-	void __iomem *lut_base;
+	unsigned int i;
 	bool lut_diff;
-	u16 lut_bank_size, lut_size;
 	u8 lut_bits;
-	u32 cfg_val, lbank_val, word;
-	int cur_bank, num_lut_banks;
 
 	/* If there's no gamma lut there's nothing to do here. */
 	if (!state->gamma_lut)
@@ -110,14 +119,17 @@  void mtk_gamma_set_common(struct device *dev, void __iomem *regs, struct drm_crt
 	num_lut_banks = lut_size / lut_bank_size;
 
 	cfg_val = readl(regs + DISP_GAMMA_CFG);
-	lut_base = regs + DISP_GAMMA_LUT;
 	lut = (struct drm_color_lut *)state->gamma_lut->data;
 
+	/* Switch to 12 bits data mode if supported */
+	data_mode = FIELD_PREP(DISP_GAMMA_BANK_DATA_MODE, !!(lut_bits == 12));
+
 	for (cur_bank = 0; cur_bank < num_lut_banks; cur_bank++) {
 
 		/* Switch gamma bank and set data mode before writing LUT */
 		if (num_lut_banks > 1) {
 			lbank_val = FIELD_PREP(DISP_GAMMA_BANK_BANK, cur_bank);
+			lbank_val |= data_mode;
 			writel(lbank_val, regs + DISP_GAMMA_BANK);
 		}
 
@@ -130,9 +142,15 @@  void mtk_gamma_set_common(struct device *dev, void __iomem *regs, struct drm_crt
 			hwlut.blue = drm_color_lut_extract(lut[n].blue, lut_bits);
 
 			if (!lut_diff || (i % 2 == 0)) {
-				word = FIELD_PREP(DISP_GAMMA_LUT_10BIT_R, hwlut.red);
-				word |= FIELD_PREP(DISP_GAMMA_LUT_10BIT_G, hwlut.green);
-				word |= FIELD_PREP(DISP_GAMMA_LUT_10BIT_B, hwlut.blue);
+				if (lut_bits == 12) {
+					word[0] = FIELD_PREP(DISP_GAMMA_LUT_12BIT_R, hwlut.red);
+					word[0] |= FIELD_PREP(DISP_GAMMA_LUT_12BIT_G, hwlut.green);
+					word[1] = FIELD_PREP(DISP_GAMMA_LUT_12BIT_B, hwlut.blue);
+				} else {
+					word[0] = FIELD_PREP(DISP_GAMMA_LUT_10BIT_R, hwlut.red);
+					word[0] |= FIELD_PREP(DISP_GAMMA_LUT_10BIT_G, hwlut.green);
+					word[0] |= FIELD_PREP(DISP_GAMMA_LUT_10BIT_B, hwlut.blue);
+				}
 			} else {
 				diff.red = lut[n].red - lut[n - 1].red;
 				diff.red = drm_color_lut_extract(diff.red, lut_bits);
@@ -143,11 +161,19 @@  void mtk_gamma_set_common(struct device *dev, void __iomem *regs, struct drm_crt
 				diff.blue = lut[n].blue - lut[n - 1].blue;
 				diff.blue = drm_color_lut_extract(diff.blue, lut_bits);
 
-				word = FIELD_PREP(DISP_GAMMA_LUT_10BIT_R, diff.red);
-				word |= FIELD_PREP(DISP_GAMMA_LUT_10BIT_G, diff.green);
-				word |= FIELD_PREP(DISP_GAMMA_LUT_10BIT_B, diff.blue);
+				if (lut_bits == 12) {
+					word[0] = FIELD_PREP(DISP_GAMMA_LUT_12BIT_R, diff.red);
+					word[0] |= FIELD_PREP(DISP_GAMMA_LUT_12BIT_G, diff.green);
+					word[1] = FIELD_PREP(DISP_GAMMA_LUT_12BIT_B, diff.blue);
+				} else {
+					word[0] = FIELD_PREP(DISP_GAMMA_LUT_10BIT_R, diff.red);
+					word[0] |= FIELD_PREP(DISP_GAMMA_LUT_10BIT_G, diff.green);
+					word[0] |= FIELD_PREP(DISP_GAMMA_LUT_10BIT_B, diff.blue);
+				}
 			}
-			writel(word, (lut_base + i * 4));
+			writel(word[0], (lut0_base + i * 4));
+			if (lut_bits == 12)
+				writel(word[1], (lut1_base + i * 4));
 		}
 	}
 
@@ -271,11 +297,20 @@  static const struct mtk_disp_gamma_data mt8183_gamma_driver_data = {
 	.lut_size = 512,
 };
 
+static const struct mtk_disp_gamma_data mt8195_gamma_driver_data = {
+	.lut_bank_size = 256,
+	.lut_bits = 12,
+	.lut_diff = true,
+	.lut_size = 1024,
+};
+
 static const struct of_device_id mtk_disp_gamma_driver_dt_match[] = {
 	{ .compatible = "mediatek,mt8173-disp-gamma",
 	  .data = &mt8173_gamma_driver_data},
 	{ .compatible = "mediatek,mt8183-disp-gamma",
 	  .data = &mt8183_gamma_driver_data},
+	{ .compatible = "mediatek,mt8195-disp-gamma",
+	  .data = &mt8195_gamma_driver_data},
 	{},
 };
 MODULE_DEVICE_TABLE(of, mtk_disp_gamma_driver_dt_match);