[v2,12/16] drm/msm/dpu: add an API to setup the CDM block for writeback

Message ID 20231208050641.32582-13-quic_abhinavk@quicinc.com
State New
Headers
Series [v2,01/16] drm/msm/dpu: add formats check for writeback encoder |

Commit Message

Abhinav Kumar Dec. 8, 2023, 5:06 a.m. UTC
  Add an API dpu_encoder_helper_phys_setup_cdm() which can be used by
the writeback encoder to setup the CDM block.

Currently, this is defined and used within the writeback's physical
encoder layer however, the function can be modified to be used to setup
the CDM block even for non-writeback interfaces.

Until those modifications are planned and made, keep it local to
writeback.

changes in v2:
	- add the RGB2YUV CSC matrix to dpu util as needed by CDM
	- use dpu_hw_get_csc_cfg() to get and program CSC
	- drop usage of setup_csc_data() and setup_cdwn() cdm ops
	  as they both have been merged into enable()
	- drop reduntant hw_cdm and hw_pp checks

Signed-off-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
---
 .../gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h  |  3 +
 .../drm/msm/disp/dpu1/dpu_encoder_phys_wb.c   | 96 ++++++++++++++++++-
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c   | 17 ++++
 drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h   |  1 +
 4 files changed, 116 insertions(+), 1 deletion(-)
  

Comments

Dmitry Baryshkov Dec. 8, 2023, 11:52 a.m. UTC | #1
On Fri, 8 Dec 2023 at 07:07, Abhinav Kumar <quic_abhinavk@quicinc.com> wrote:
>
> Add an API dpu_encoder_helper_phys_setup_cdm() which can be used by
> the writeback encoder to setup the CDM block.
>
> Currently, this is defined and used within the writeback's physical
> encoder layer however, the function can be modified to be used to setup
> the CDM block even for non-writeback interfaces.
>
> Until those modifications are planned and made, keep it local to
> writeback.
>
> changes in v2:
>         - add the RGB2YUV CSC matrix to dpu util as needed by CDM
>         - use dpu_hw_get_csc_cfg() to get and program CSC
>         - drop usage of setup_csc_data() and setup_cdwn() cdm ops
>           as they both have been merged into enable()
>         - drop reduntant hw_cdm and hw_pp checks
>
> Signed-off-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
> ---
>  .../gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h  |  3 +
>  .../drm/msm/disp/dpu1/dpu_encoder_phys_wb.c   | 96 ++++++++++++++++++-
>  drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c   | 17 ++++
>  drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h   |  1 +
>  4 files changed, 116 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h
> index 410f6225789c..1d6d1eb642b9 100644
> --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h
> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h
> @@ -16,6 +16,7 @@
>  #include "dpu_hw_pingpong.h"
>  #include "dpu_hw_ctl.h"
>  #include "dpu_hw_top.h"
> +#include "dpu_hw_cdm.h"
>  #include "dpu_encoder.h"
>  #include "dpu_crtc.h"
>
> @@ -210,6 +211,7 @@ static inline int dpu_encoder_phys_inc_pending(struct dpu_encoder_phys *phys)
>   * @wbirq_refcount:     Reference count of writeback interrupt
>   * @wb_done_timeout_cnt: number of wb done irq timeout errors
>   * @wb_cfg:  writeback block config to store fb related details
> + * @cdm_cfg: cdm block config needed to store writeback block's CDM configuration
>   * @wb_conn: backpointer to writeback connector
>   * @wb_job: backpointer to current writeback job
>   * @dest:   dpu buffer layout for current writeback output buffer
> @@ -219,6 +221,7 @@ struct dpu_encoder_phys_wb {
>         atomic_t wbirq_refcount;
>         int wb_done_timeout_cnt;
>         struct dpu_hw_wb_cfg wb_cfg;
> +       struct dpu_hw_cdm_cfg cdm_cfg;
>         struct drm_writeback_connector *wb_conn;
>         struct drm_writeback_job *wb_job;
>         struct dpu_hw_fmt_layout dest;
> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
> index 4665367cf14f..85429c62d727 100644
> --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
> @@ -259,6 +259,99 @@ static void dpu_encoder_phys_wb_setup_ctl(struct dpu_encoder_phys *phys_enc)
>         }
>  }
>
> +/**
> + * dpu_encoder_phys_wb_setup_cdp - setup chroma down sampling block
> + * @phys_enc:Pointer to physical encoder
> + */
> +static void dpu_encoder_helper_phys_setup_cdm(struct dpu_encoder_phys *phys_enc)
> +{
> +       struct dpu_hw_cdm *hw_cdm;
> +       struct dpu_hw_cdm_cfg *cdm_cfg;
> +       struct dpu_hw_pingpong *hw_pp;
> +       struct dpu_encoder_phys_wb *wb_enc;
> +       const struct msm_format *format;
> +       const struct dpu_format *dpu_fmt;
> +       struct drm_writeback_job *wb_job;
> +       int ret;
> +
> +       if (!phys_enc)
> +               return;
> +
> +       wb_enc = to_dpu_encoder_phys_wb(phys_enc);
> +       cdm_cfg = &wb_enc->cdm_cfg;
> +       hw_pp = phys_enc->hw_pp;
> +       hw_cdm = phys_enc->hw_cdm;
> +       wb_job = wb_enc->wb_job;
> +
> +       format = msm_framebuffer_format(wb_enc->wb_job->fb);
> +       dpu_fmt = dpu_get_dpu_format_ext(format->pixel_format, wb_job->fb->modifier);
> +
> +       if (!hw_cdm)
> +               return;
> +
> +       if (!DPU_FORMAT_IS_YUV(dpu_fmt)) {
> +               DPU_DEBUG("[enc:%d] cdm_disable fmt:%x\n", DRMID(phys_enc->parent),
> +                         dpu_fmt->base.pixel_format);
> +               if (hw_cdm->ops.disable)
> +                       hw_cdm->ops.disable(hw_cdm);
> +
> +               return;
> +       }
> +
> +       memset(cdm_cfg, 0, sizeof(struct dpu_hw_cdm_cfg));
> +
> +       cdm_cfg->output_width = wb_job->fb->width;
> +       cdm_cfg->output_height = wb_job->fb->height;
> +       cdm_cfg->output_fmt = dpu_fmt;
> +       cdm_cfg->output_type = CDM_CDWN_OUTPUT_WB;
> +       cdm_cfg->output_bit_depth = DPU_FORMAT_IS_DX(dpu_fmt) ?
> +                       CDM_CDWN_OUTPUT_10BIT : CDM_CDWN_OUTPUT_8BIT;
> +       cdm_cfg->csc_cfg = dpu_hw_get_csc_cfg(DPU_HW_RGB2YUV_601L_10BIT);
> +       if (!cdm_cfg->csc_cfg) {
> +               DPU_ERROR("valid csc not found\n");
> +               return;
> +       }
> +
> +       /* enable 10 bit logic */
> +       switch (cdm_cfg->output_fmt->chroma_sample) {
> +       case DPU_CHROMA_RGB:
> +               cdm_cfg->h_cdwn_type = CDM_CDWN_DISABLE;
> +               cdm_cfg->v_cdwn_type = CDM_CDWN_DISABLE;
> +               break;
> +       case DPU_CHROMA_H2V1:
> +               cdm_cfg->h_cdwn_type = CDM_CDWN_COSITE;
> +               cdm_cfg->v_cdwn_type = CDM_CDWN_DISABLE;
> +               break;
> +       case DPU_CHROMA_420:
> +               cdm_cfg->h_cdwn_type = CDM_CDWN_COSITE;
> +               cdm_cfg->v_cdwn_type = CDM_CDWN_OFFSITE;
> +               break;
> +       case DPU_CHROMA_H1V2:
> +       default:
> +               DPU_ERROR("[enc:%d] unsupported chroma sampling type\n",
> +                         DRMID(phys_enc->parent));
> +               cdm_cfg->h_cdwn_type = CDM_CDWN_DISABLE;
> +               cdm_cfg->v_cdwn_type = CDM_CDWN_DISABLE;

If it is unsupported, we should return an error here.

> +               break;
> +       }
> +
> +       DPU_DEBUG("[enc:%d] cdm_enable:%d,%d,%X,%d,%d,%d,%d]\n",
> +                 DRMID(phys_enc->parent), cdm_cfg->output_width,
> +                 cdm_cfg->output_height, cdm_cfg->output_fmt->base.pixel_format,
> +                 cdm_cfg->output_type, cdm_cfg->output_bit_depth,
> +                 cdm_cfg->h_cdwn_type, cdm_cfg->v_cdwn_type);
> +
> +       if (hw_cdm->ops.enable) {
> +               cdm_cfg->pp_id = hw_pp->idx;
> +               ret = hw_cdm->ops.enable(hw_cdm, cdm_cfg);
> +               if (ret < 0) {
> +                       DPU_ERROR("[enc:%d] failed to enable CDM; ret:%d\n",
> +                                 DRMID(phys_enc->parent), ret);
> +                       return;
> +               }
> +       }
> +}
> +
>  /**
>   * dpu_encoder_phys_wb_atomic_check - verify and fixup given atomic states
>   * @phys_enc:  Pointer to physical encoder
> @@ -382,8 +475,9 @@ static void dpu_encoder_phys_wb_setup(
>
>         dpu_encoder_phys_wb_setup_fb(phys_enc, fb);
>
> -       dpu_encoder_phys_wb_setup_ctl(phys_enc);
> +       dpu_encoder_helper_phys_setup_cdm(phys_enc);
>
> +       dpu_encoder_phys_wb_setup_ctl(phys_enc);
>  }
>
>  /**
> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c
> index 59a153331194..34143491aba2 100644
> --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c
> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c
> @@ -87,6 +87,8 @@ static u32 dpu_hw_util_log_mask = DPU_DBG_MASK_NONE;
>  #define QOS_QOS_CTRL_VBLANK_EN            BIT(16)
>  #define QOS_QOS_CTRL_CREQ_VBLANK_MASK     GENMASK(21, 20)
>
> +#define TO_S15D16(_x_)((_x_) << 7)

Huh? I don't understand why it is shifted by 7. If you have data in
S8.9 format, I'd say that it makes things less obvious compared to
S15.16 (where you can perform division on the fly).

> +
>  static const struct dpu_csc_cfg dpu_csc_YUV2RGB_601L = {
>         {
>                 /* S15.16 format */
> @@ -117,6 +119,18 @@ static const struct dpu_csc_cfg dpu_csc10_YUV2RGB_601L = {
>         { 0x00, 0x3ff, 0x00, 0x3ff, 0x00, 0x3ff,},
>  };
>
> +static const struct dpu_csc_cfg dpu_csc10_rgb2yuv_601l = {
> +       {
> +               TO_S15D16(0x0083), TO_S15D16(0x0102), TO_S15D16(0x0032),
> +               TO_S15D16(0x1fb5), TO_S15D16(0x1f6c), TO_S15D16(0x00e1),
> +               TO_S15D16(0x00e1), TO_S15D16(0x1f45), TO_S15D16(0x1fdc)
> +       },
> +       { 0x00, 0x00, 0x00 },
> +       { 0x0040, 0x0200, 0x0200 },
> +       { 0x000, 0x3ff, 0x000, 0x3ff, 0x000, 0x3ff },
> +       { 0x040, 0x3ac, 0x040, 0x3c0, 0x040, 0x3c0 },
> +};
> +
>  /**
>   * dpu_hw_get_csc_cfg - get the CSC matrix based on the request type
>   * @type:              type of the requested CSC matrix from caller
> @@ -133,6 +147,9 @@ const struct dpu_csc_cfg *dpu_hw_get_csc_cfg(enum dpu_hw_csc_cfg_type type)
>         case DPU_HW_YUV2RGB_601L_10BIT:
>                 csc_cfg = &dpu_csc10_YUV2RGB_601L;
>                 break;
> +       case DPU_HW_RGB2YUV_601L_10BIT:
> +               csc_cfg = &dpu_csc10_rgb2yuv_601l;
> +               break;
>         default:
>                 DPU_ERROR("unknown csc_cfg type\n");
>                 break;
> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
> index 49f2bcf6de15..ed153d66f660 100644
> --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
> @@ -22,6 +22,7 @@
>  enum dpu_hw_csc_cfg_type {
>         DPU_HW_YUV2RGB_601L,
>         DPU_HW_YUV2RGB_601L_10BIT,
> +       DPU_HW_RGB2YUV_601L_10BIT,
>  };
>
>  /*
> --
> 2.40.1
>
  
Abhinav Kumar Dec. 8, 2023, 5:27 p.m. UTC | #2
On 12/8/2023 3:52 AM, Dmitry Baryshkov wrote:
> On Fri, 8 Dec 2023 at 07:07, Abhinav Kumar <quic_abhinavk@quicinc.com> wrote:
>>
>> Add an API dpu_encoder_helper_phys_setup_cdm() which can be used by
>> the writeback encoder to setup the CDM block.
>>
>> Currently, this is defined and used within the writeback's physical
>> encoder layer however, the function can be modified to be used to setup
>> the CDM block even for non-writeback interfaces.
>>
>> Until those modifications are planned and made, keep it local to
>> writeback.
>>
>> changes in v2:
>>          - add the RGB2YUV CSC matrix to dpu util as needed by CDM
>>          - use dpu_hw_get_csc_cfg() to get and program CSC
>>          - drop usage of setup_csc_data() and setup_cdwn() cdm ops
>>            as they both have been merged into enable()
>>          - drop reduntant hw_cdm and hw_pp checks
>>
>> Signed-off-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
>> ---
>>   .../gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h  |  3 +
>>   .../drm/msm/disp/dpu1/dpu_encoder_phys_wb.c   | 96 ++++++++++++++++++-
>>   drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c   | 17 ++++
>>   drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h   |  1 +
>>   4 files changed, 116 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h
>> index 410f6225789c..1d6d1eb642b9 100644
>> --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h
>> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h
>> @@ -16,6 +16,7 @@
>>   #include "dpu_hw_pingpong.h"
>>   #include "dpu_hw_ctl.h"
>>   #include "dpu_hw_top.h"
>> +#include "dpu_hw_cdm.h"
>>   #include "dpu_encoder.h"
>>   #include "dpu_crtc.h"
>>
>> @@ -210,6 +211,7 @@ static inline int dpu_encoder_phys_inc_pending(struct dpu_encoder_phys *phys)
>>    * @wbirq_refcount:     Reference count of writeback interrupt
>>    * @wb_done_timeout_cnt: number of wb done irq timeout errors
>>    * @wb_cfg:  writeback block config to store fb related details
>> + * @cdm_cfg: cdm block config needed to store writeback block's CDM configuration
>>    * @wb_conn: backpointer to writeback connector
>>    * @wb_job: backpointer to current writeback job
>>    * @dest:   dpu buffer layout for current writeback output buffer
>> @@ -219,6 +221,7 @@ struct dpu_encoder_phys_wb {
>>          atomic_t wbirq_refcount;
>>          int wb_done_timeout_cnt;
>>          struct dpu_hw_wb_cfg wb_cfg;
>> +       struct dpu_hw_cdm_cfg cdm_cfg;
>>          struct drm_writeback_connector *wb_conn;
>>          struct drm_writeback_job *wb_job;
>>          struct dpu_hw_fmt_layout dest;
>> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
>> index 4665367cf14f..85429c62d727 100644
>> --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
>> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
>> @@ -259,6 +259,99 @@ static void dpu_encoder_phys_wb_setup_ctl(struct dpu_encoder_phys *phys_enc)
>>          }
>>   }
>>
>> +/**
>> + * dpu_encoder_phys_wb_setup_cdp - setup chroma down sampling block
>> + * @phys_enc:Pointer to physical encoder
>> + */
>> +static void dpu_encoder_helper_phys_setup_cdm(struct dpu_encoder_phys *phys_enc)
>> +{
>> +       struct dpu_hw_cdm *hw_cdm;
>> +       struct dpu_hw_cdm_cfg *cdm_cfg;
>> +       struct dpu_hw_pingpong *hw_pp;
>> +       struct dpu_encoder_phys_wb *wb_enc;
>> +       const struct msm_format *format;
>> +       const struct dpu_format *dpu_fmt;
>> +       struct drm_writeback_job *wb_job;
>> +       int ret;
>> +
>> +       if (!phys_enc)
>> +               return;
>> +
>> +       wb_enc = to_dpu_encoder_phys_wb(phys_enc);
>> +       cdm_cfg = &wb_enc->cdm_cfg;
>> +       hw_pp = phys_enc->hw_pp;
>> +       hw_cdm = phys_enc->hw_cdm;
>> +       wb_job = wb_enc->wb_job;
>> +
>> +       format = msm_framebuffer_format(wb_enc->wb_job->fb);
>> +       dpu_fmt = dpu_get_dpu_format_ext(format->pixel_format, wb_job->fb->modifier);
>> +
>> +       if (!hw_cdm)
>> +               return;
>> +
>> +       if (!DPU_FORMAT_IS_YUV(dpu_fmt)) {
>> +               DPU_DEBUG("[enc:%d] cdm_disable fmt:%x\n", DRMID(phys_enc->parent),
>> +                         dpu_fmt->base.pixel_format);
>> +               if (hw_cdm->ops.disable)
>> +                       hw_cdm->ops.disable(hw_cdm);
>> +
>> +               return;
>> +       }
>> +
>> +       memset(cdm_cfg, 0, sizeof(struct dpu_hw_cdm_cfg));
>> +
>> +       cdm_cfg->output_width = wb_job->fb->width;
>> +       cdm_cfg->output_height = wb_job->fb->height;
>> +       cdm_cfg->output_fmt = dpu_fmt;
>> +       cdm_cfg->output_type = CDM_CDWN_OUTPUT_WB;
>> +       cdm_cfg->output_bit_depth = DPU_FORMAT_IS_DX(dpu_fmt) ?
>> +                       CDM_CDWN_OUTPUT_10BIT : CDM_CDWN_OUTPUT_8BIT;
>> +       cdm_cfg->csc_cfg = dpu_hw_get_csc_cfg(DPU_HW_RGB2YUV_601L_10BIT);
>> +       if (!cdm_cfg->csc_cfg) {
>> +               DPU_ERROR("valid csc not found\n");
>> +               return;
>> +       }
>> +
>> +       /* enable 10 bit logic */
>> +       switch (cdm_cfg->output_fmt->chroma_sample) {
>> +       case DPU_CHROMA_RGB:
>> +               cdm_cfg->h_cdwn_type = CDM_CDWN_DISABLE;
>> +               cdm_cfg->v_cdwn_type = CDM_CDWN_DISABLE;
>> +               break;
>> +       case DPU_CHROMA_H2V1:
>> +               cdm_cfg->h_cdwn_type = CDM_CDWN_COSITE;
>> +               cdm_cfg->v_cdwn_type = CDM_CDWN_DISABLE;
>> +               break;
>> +       case DPU_CHROMA_420:
>> +               cdm_cfg->h_cdwn_type = CDM_CDWN_COSITE;
>> +               cdm_cfg->v_cdwn_type = CDM_CDWN_OFFSITE;
>> +               break;
>> +       case DPU_CHROMA_H1V2:
>> +       default:
>> +               DPU_ERROR("[enc:%d] unsupported chroma sampling type\n",
>> +                         DRMID(phys_enc->parent));
>> +               cdm_cfg->h_cdwn_type = CDM_CDWN_DISABLE;
>> +               cdm_cfg->v_cdwn_type = CDM_CDWN_DISABLE;
> 
> If it is unsupported, we should return an error here.
> 

The caller of this API and the caller of the API even before that do not 
have error checking as they are all void. Disabling CDWN is the 
appropriate corrective action for this case and should be sufficient.

>> +               break;
>> +       }
>> +
>> +       DPU_DEBUG("[enc:%d] cdm_enable:%d,%d,%X,%d,%d,%d,%d]\n",
>> +                 DRMID(phys_enc->parent), cdm_cfg->output_width,
>> +                 cdm_cfg->output_height, cdm_cfg->output_fmt->base.pixel_format,
>> +                 cdm_cfg->output_type, cdm_cfg->output_bit_depth,
>> +                 cdm_cfg->h_cdwn_type, cdm_cfg->v_cdwn_type);
>> +
>> +       if (hw_cdm->ops.enable) {
>> +               cdm_cfg->pp_id = hw_pp->idx;
>> +               ret = hw_cdm->ops.enable(hw_cdm, cdm_cfg);
>> +               if (ret < 0) {
>> +                       DPU_ERROR("[enc:%d] failed to enable CDM; ret:%d\n",
>> +                                 DRMID(phys_enc->parent), ret);
>> +                       return;
>> +               }
>> +       }
>> +}
>> +
>>   /**
>>    * dpu_encoder_phys_wb_atomic_check - verify and fixup given atomic states
>>    * @phys_enc:  Pointer to physical encoder
>> @@ -382,8 +475,9 @@ static void dpu_encoder_phys_wb_setup(
>>
>>          dpu_encoder_phys_wb_setup_fb(phys_enc, fb);
>>
>> -       dpu_encoder_phys_wb_setup_ctl(phys_enc);
>> +       dpu_encoder_helper_phys_setup_cdm(phys_enc);
>>
>> +       dpu_encoder_phys_wb_setup_ctl(phys_enc);
>>   }
>>
>>   /**
>> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c
>> index 59a153331194..34143491aba2 100644
>> --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c
>> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c
>> @@ -87,6 +87,8 @@ static u32 dpu_hw_util_log_mask = DPU_DBG_MASK_NONE;
>>   #define QOS_QOS_CTRL_VBLANK_EN            BIT(16)
>>   #define QOS_QOS_CTRL_CREQ_VBLANK_MASK     GENMASK(21, 20)
>>
>> +#define TO_S15D16(_x_)((_x_) << 7)
> 
> Huh? I don't understand why it is shifted by 7. If you have data in
> S8.9 format, I'd say that it makes things less obvious compared to
> S15.16 (where you can perform division on the fly).
> 

I was referring to below comment and also because the values are in 
S15.16 in

https://git.codelinaro.org/clo/la/platform/vendor/opensource/display-drivers/-/blob/clo/main/msm/sde/sde_encoder_phys_wb.c?ref_type=heads#L35

428 struct dpu_csc_cfg {
429 	/* matrix coefficients in S15.16 format */
430 	uint32_t csc_mv[DPU_CSC_MATRIX_COEFF_SIZE];
431 	uint32_t csc_pre_bv[DPU_CSC_BIAS_SIZE];
432 	uint32_t csc_post_bv[DPU_CSC_BIAS_SIZE];
433 	uint32_t csc_pre_lv[DPU_CSC_CLAMP_SIZE];
434 	uint32_t csc_post_lv[DPU_CSC_CLAMP_SIZE];
435 };
436


>> +
>>   static const struct dpu_csc_cfg dpu_csc_YUV2RGB_601L = {
>>          {
>>                  /* S15.16 format */
>> @@ -117,6 +119,18 @@ static const struct dpu_csc_cfg dpu_csc10_YUV2RGB_601L = {
>>          { 0x00, 0x3ff, 0x00, 0x3ff, 0x00, 0x3ff,},
>>   };
>>
>> +static const struct dpu_csc_cfg dpu_csc10_rgb2yuv_601l = {
>> +       {
>> +               TO_S15D16(0x0083), TO_S15D16(0x0102), TO_S15D16(0x0032),
>> +               TO_S15D16(0x1fb5), TO_S15D16(0x1f6c), TO_S15D16(0x00e1),
>> +               TO_S15D16(0x00e1), TO_S15D16(0x1f45), TO_S15D16(0x1fdc)
>> +       },
>> +       { 0x00, 0x00, 0x00 },
>> +       { 0x0040, 0x0200, 0x0200 },
>> +       { 0x000, 0x3ff, 0x000, 0x3ff, 0x000, 0x3ff },
>> +       { 0x040, 0x3ac, 0x040, 0x3c0, 0x040, 0x3c0 },
>> +};
>> +
>>   /**
>>    * dpu_hw_get_csc_cfg - get the CSC matrix based on the request type
>>    * @type:              type of the requested CSC matrix from caller
>> @@ -133,6 +147,9 @@ const struct dpu_csc_cfg *dpu_hw_get_csc_cfg(enum dpu_hw_csc_cfg_type type)
>>          case DPU_HW_YUV2RGB_601L_10BIT:
>>                  csc_cfg = &dpu_csc10_YUV2RGB_601L;
>>                  break;
>> +       case DPU_HW_RGB2YUV_601L_10BIT:
>> +               csc_cfg = &dpu_csc10_rgb2yuv_601l;
>> +               break;
>>          default:
>>                  DPU_ERROR("unknown csc_cfg type\n");
>>                  break;
>> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
>> index 49f2bcf6de15..ed153d66f660 100644
>> --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
>> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
>> @@ -22,6 +22,7 @@
>>   enum dpu_hw_csc_cfg_type {
>>          DPU_HW_YUV2RGB_601L,
>>          DPU_HW_YUV2RGB_601L_10BIT,
>> +       DPU_HW_RGB2YUV_601L_10BIT,
>>   };
>>
>>   /*
>> --
>> 2.40.1
>>
> 
>
  
Dmitry Baryshkov Dec. 8, 2023, 8:55 p.m. UTC | #3
On Fri, 8 Dec 2023 at 19:28, Abhinav Kumar <quic_abhinavk@quicinc.com> wrote:
> On 12/8/2023 3:52 AM, Dmitry Baryshkov wrote:
> > On Fri, 8 Dec 2023 at 07:07, Abhinav Kumar <quic_abhinavk@quicinc.com> wrote:
> >>
> >> Add an API dpu_encoder_helper_phys_setup_cdm() which can be used by
> >> the writeback encoder to setup the CDM block.
> >>
> >> Currently, this is defined and used within the writeback's physical
> >> encoder layer however, the function can be modified to be used to setup
> >> the CDM block even for non-writeback interfaces.
> >>
> >> Until those modifications are planned and made, keep it local to
> >> writeback.
> >>
> >> changes in v2:
> >>          - add the RGB2YUV CSC matrix to dpu util as needed by CDM
> >>          - use dpu_hw_get_csc_cfg() to get and program CSC
> >>          - drop usage of setup_csc_data() and setup_cdwn() cdm ops
> >>            as they both have been merged into enable()
> >>          - drop reduntant hw_cdm and hw_pp checks
> >>
> >> Signed-off-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
> >> ---
> >>   .../gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h  |  3 +
> >>   .../drm/msm/disp/dpu1/dpu_encoder_phys_wb.c   | 96 ++++++++++++++++++-
> >>   drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c   | 17 ++++
> >>   drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h   |  1 +
> >>   4 files changed, 116 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h
> >> index 410f6225789c..1d6d1eb642b9 100644
> >> --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h
> >> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h
> >> @@ -16,6 +16,7 @@
> >>   #include "dpu_hw_pingpong.h"
> >>   #include "dpu_hw_ctl.h"
> >>   #include "dpu_hw_top.h"
> >> +#include "dpu_hw_cdm.h"
> >>   #include "dpu_encoder.h"
> >>   #include "dpu_crtc.h"
> >>
> >> @@ -210,6 +211,7 @@ static inline int dpu_encoder_phys_inc_pending(struct dpu_encoder_phys *phys)
> >>    * @wbirq_refcount:     Reference count of writeback interrupt
> >>    * @wb_done_timeout_cnt: number of wb done irq timeout errors
> >>    * @wb_cfg:  writeback block config to store fb related details
> >> + * @cdm_cfg: cdm block config needed to store writeback block's CDM configuration
> >>    * @wb_conn: backpointer to writeback connector
> >>    * @wb_job: backpointer to current writeback job
> >>    * @dest:   dpu buffer layout for current writeback output buffer
> >> @@ -219,6 +221,7 @@ struct dpu_encoder_phys_wb {
> >>          atomic_t wbirq_refcount;
> >>          int wb_done_timeout_cnt;
> >>          struct dpu_hw_wb_cfg wb_cfg;
> >> +       struct dpu_hw_cdm_cfg cdm_cfg;
> >>          struct drm_writeback_connector *wb_conn;
> >>          struct drm_writeback_job *wb_job;
> >>          struct dpu_hw_fmt_layout dest;
> >> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
> >> index 4665367cf14f..85429c62d727 100644
> >> --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
> >> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
> >> @@ -259,6 +259,99 @@ static void dpu_encoder_phys_wb_setup_ctl(struct dpu_encoder_phys *phys_enc)
> >>          }
> >>   }
> >>
> >> +/**
> >> + * dpu_encoder_phys_wb_setup_cdp - setup chroma down sampling block
> >> + * @phys_enc:Pointer to physical encoder
> >> + */
> >> +static void dpu_encoder_helper_phys_setup_cdm(struct dpu_encoder_phys *phys_enc)
> >> +{
> >> +       struct dpu_hw_cdm *hw_cdm;
> >> +       struct dpu_hw_cdm_cfg *cdm_cfg;
> >> +       struct dpu_hw_pingpong *hw_pp;
> >> +       struct dpu_encoder_phys_wb *wb_enc;
> >> +       const struct msm_format *format;
> >> +       const struct dpu_format *dpu_fmt;
> >> +       struct drm_writeback_job *wb_job;
> >> +       int ret;
> >> +
> >> +       if (!phys_enc)
> >> +               return;
> >> +
> >> +       wb_enc = to_dpu_encoder_phys_wb(phys_enc);
> >> +       cdm_cfg = &wb_enc->cdm_cfg;
> >> +       hw_pp = phys_enc->hw_pp;
> >> +       hw_cdm = phys_enc->hw_cdm;
> >> +       wb_job = wb_enc->wb_job;
> >> +
> >> +       format = msm_framebuffer_format(wb_enc->wb_job->fb);
> >> +       dpu_fmt = dpu_get_dpu_format_ext(format->pixel_format, wb_job->fb->modifier);
> >> +
> >> +       if (!hw_cdm)
> >> +               return;
> >> +
> >> +       if (!DPU_FORMAT_IS_YUV(dpu_fmt)) {
> >> +               DPU_DEBUG("[enc:%d] cdm_disable fmt:%x\n", DRMID(phys_enc->parent),
> >> +                         dpu_fmt->base.pixel_format);
> >> +               if (hw_cdm->ops.disable)
> >> +                       hw_cdm->ops.disable(hw_cdm);
> >> +
> >> +               return;
> >> +       }
> >> +
> >> +       memset(cdm_cfg, 0, sizeof(struct dpu_hw_cdm_cfg));
> >> +
> >> +       cdm_cfg->output_width = wb_job->fb->width;
> >> +       cdm_cfg->output_height = wb_job->fb->height;
> >> +       cdm_cfg->output_fmt = dpu_fmt;
> >> +       cdm_cfg->output_type = CDM_CDWN_OUTPUT_WB;
> >> +       cdm_cfg->output_bit_depth = DPU_FORMAT_IS_DX(dpu_fmt) ?
> >> +                       CDM_CDWN_OUTPUT_10BIT : CDM_CDWN_OUTPUT_8BIT;
> >> +       cdm_cfg->csc_cfg = dpu_hw_get_csc_cfg(DPU_HW_RGB2YUV_601L_10BIT);
> >> +       if (!cdm_cfg->csc_cfg) {
> >> +               DPU_ERROR("valid csc not found\n");
> >> +               return;
> >> +       }
> >> +
> >> +       /* enable 10 bit logic */
> >> +       switch (cdm_cfg->output_fmt->chroma_sample) {
> >> +       case DPU_CHROMA_RGB:
> >> +               cdm_cfg->h_cdwn_type = CDM_CDWN_DISABLE;
> >> +               cdm_cfg->v_cdwn_type = CDM_CDWN_DISABLE;
> >> +               break;
> >> +       case DPU_CHROMA_H2V1:
> >> +               cdm_cfg->h_cdwn_type = CDM_CDWN_COSITE;
> >> +               cdm_cfg->v_cdwn_type = CDM_CDWN_DISABLE;
> >> +               break;
> >> +       case DPU_CHROMA_420:
> >> +               cdm_cfg->h_cdwn_type = CDM_CDWN_COSITE;
> >> +               cdm_cfg->v_cdwn_type = CDM_CDWN_OFFSITE;
> >> +               break;
> >> +       case DPU_CHROMA_H1V2:
> >> +       default:
> >> +               DPU_ERROR("[enc:%d] unsupported chroma sampling type\n",
> >> +                         DRMID(phys_enc->parent));
> >> +               cdm_cfg->h_cdwn_type = CDM_CDWN_DISABLE;
> >> +               cdm_cfg->v_cdwn_type = CDM_CDWN_DISABLE;
> >
> > If it is unsupported, we should return an error here.
> >
>
> The caller of this API and the caller of the API even before that do not
> have error checking as they are all void. Disabling CDWN is the
> appropriate corrective action for this case and should be sufficient.

Ack. Could you please document that DPU_CHROMA_H1V2 is invalid for this API?

>
> >> +               break;
> >> +       }
> >> +
> >> +       DPU_DEBUG("[enc:%d] cdm_enable:%d,%d,%X,%d,%d,%d,%d]\n",
> >> +                 DRMID(phys_enc->parent), cdm_cfg->output_width,
> >> +                 cdm_cfg->output_height, cdm_cfg->output_fmt->base.pixel_format,
> >> +                 cdm_cfg->output_type, cdm_cfg->output_bit_depth,
> >> +                 cdm_cfg->h_cdwn_type, cdm_cfg->v_cdwn_type);
> >> +
> >> +       if (hw_cdm->ops.enable) {
> >> +               cdm_cfg->pp_id = hw_pp->idx;
> >> +               ret = hw_cdm->ops.enable(hw_cdm, cdm_cfg);
> >> +               if (ret < 0) {
> >> +                       DPU_ERROR("[enc:%d] failed to enable CDM; ret:%d\n",
> >> +                                 DRMID(phys_enc->parent), ret);
> >> +                       return;
> >> +               }
> >> +       }
> >> +}
> >> +
> >>   /**
> >>    * dpu_encoder_phys_wb_atomic_check - verify and fixup given atomic states
> >>    * @phys_enc:  Pointer to physical encoder
> >> @@ -382,8 +475,9 @@ static void dpu_encoder_phys_wb_setup(
> >>
> >>          dpu_encoder_phys_wb_setup_fb(phys_enc, fb);
> >>
> >> -       dpu_encoder_phys_wb_setup_ctl(phys_enc);
> >> +       dpu_encoder_helper_phys_setup_cdm(phys_enc);
> >>
> >> +       dpu_encoder_phys_wb_setup_ctl(phys_enc);
> >>   }
> >>
> >>   /**
> >> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c
> >> index 59a153331194..34143491aba2 100644
> >> --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c
> >> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c
> >> @@ -87,6 +87,8 @@ static u32 dpu_hw_util_log_mask = DPU_DBG_MASK_NONE;
> >>   #define QOS_QOS_CTRL_VBLANK_EN            BIT(16)
> >>   #define QOS_QOS_CTRL_CREQ_VBLANK_MASK     GENMASK(21, 20)
> >>
> >> +#define TO_S15D16(_x_)((_x_) << 7)
> >
> > Huh? I don't understand why it is shifted by 7. If you have data in
> > S8.9 format, I'd say that it makes things less obvious compared to
> > S15.16 (where you can perform division on the fly).
> >
>
> I was referring to below comment and also because the values are in
> S15.16 in
>
> https://git.codelinaro.org/clo/la/platform/vendor/opensource/display-drivers/-/blob/clo/main/msm/sde/sde_encoder_phys_wb.c?ref_type=heads#L35

Yes, I saw that. My first thought was that <<7 looked
counterintuitive. You can not perform maths easily with S4.9 values.
At least I can not guess the actual value of 0x083 or 0xff37.

I thought about writing data in S15.16 directly (e.g. 0x4180 instead
of 0x083, 0x8100 instead of 0x102). This way it would be easier to
understand that the first row is 65.5 / 256, 128 / 256, 25 / 256. And
this is what we had for existing DPU CSC tables.

But after looking at the dpu_hw_csc_setup() I actually fail to
understand why we need the S15.16 values. The hardware works with S4.9
in the end. So it looks pretty strange to convert S4.9 to S15.16 via
the macro only to convert it back to S4.9 in the CSC setup code.

Maybe we can go the following route:
- Merge this series using CSC tables as is (with your TO_S15D16 macro)
- Drop the <<7 from dpu_hw_csc_setup() (and drop the macro too)
- Define S4P9(floor, nom, den) and S4P9_NEG(floor, nom, den) macros to
be used for the matrix values
- Merge the DPU tables with MDP tables (which use S4.9 already)

I'd also like to check if we can drop two versions of clamp values
(for 8bit and for 10bit) and convert those values on the fly.

>
> 428 struct dpu_csc_cfg {
> 429     /* matrix coefficients in S15.16 format */
> 430     uint32_t csc_mv[DPU_CSC_MATRIX_COEFF_SIZE];
> 431     uint32_t csc_pre_bv[DPU_CSC_BIAS_SIZE];
> 432     uint32_t csc_post_bv[DPU_CSC_BIAS_SIZE];
> 433     uint32_t csc_pre_lv[DPU_CSC_CLAMP_SIZE];
> 434     uint32_t csc_post_lv[DPU_CSC_CLAMP_SIZE];
> 435 };
> 436
>
>
> >> +
> >>   static const struct dpu_csc_cfg dpu_csc_YUV2RGB_601L = {
> >>          {
> >>                  /* S15.16 format */
> >> @@ -117,6 +119,18 @@ static const struct dpu_csc_cfg dpu_csc10_YUV2RGB_601L = {
> >>          { 0x00, 0x3ff, 0x00, 0x3ff, 0x00, 0x3ff,},
> >>   };
> >>
> >> +static const struct dpu_csc_cfg dpu_csc10_rgb2yuv_601l = {
> >> +       {
> >> +               TO_S15D16(0x0083), TO_S15D16(0x0102), TO_S15D16(0x0032),
> >> +               TO_S15D16(0x1fb5), TO_S15D16(0x1f6c), TO_S15D16(0x00e1),
> >> +               TO_S15D16(0x00e1), TO_S15D16(0x1f45), TO_S15D16(0x1fdc)
> >> +       },
> >> +       { 0x00, 0x00, 0x00 },
> >> +       { 0x0040, 0x0200, 0x0200 },
> >> +       { 0x000, 0x3ff, 0x000, 0x3ff, 0x000, 0x3ff },
> >> +       { 0x040, 0x3ac, 0x040, 0x3c0, 0x040, 0x3c0 },
> >> +};
> >> +
> >>   /**
> >>    * dpu_hw_get_csc_cfg - get the CSC matrix based on the request type
> >>    * @type:              type of the requested CSC matrix from caller
> >> @@ -133,6 +147,9 @@ const struct dpu_csc_cfg *dpu_hw_get_csc_cfg(enum dpu_hw_csc_cfg_type type)
> >>          case DPU_HW_YUV2RGB_601L_10BIT:
> >>                  csc_cfg = &dpu_csc10_YUV2RGB_601L;
> >>                  break;
> >> +       case DPU_HW_RGB2YUV_601L_10BIT:
> >> +               csc_cfg = &dpu_csc10_rgb2yuv_601l;
> >> +               break;
> >>          default:
> >>                  DPU_ERROR("unknown csc_cfg type\n");
> >>                  break;
> >> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
> >> index 49f2bcf6de15..ed153d66f660 100644
> >> --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
> >> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
> >> @@ -22,6 +22,7 @@
> >>   enum dpu_hw_csc_cfg_type {
> >>          DPU_HW_YUV2RGB_601L,
> >>          DPU_HW_YUV2RGB_601L_10BIT,
> >> +       DPU_HW_RGB2YUV_601L_10BIT,
> >>   };
> >>
> >>   /*
> >> --
> >> 2.40.1
> >>
> >
> >



--
With best wishes
Dmitry
  
Abhinav Kumar Dec. 8, 2023, 10:48 p.m. UTC | #4
On 12/8/2023 12:55 PM, Dmitry Baryshkov wrote:
> On Fri, 8 Dec 2023 at 19:28, Abhinav Kumar <quic_abhinavk@quicinc.com> wrote:
>> On 12/8/2023 3:52 AM, Dmitry Baryshkov wrote:
>>> On Fri, 8 Dec 2023 at 07:07, Abhinav Kumar <quic_abhinavk@quicinc.com> wrote:
>>>>
>>>> Add an API dpu_encoder_helper_phys_setup_cdm() which can be used by
>>>> the writeback encoder to setup the CDM block.
>>>>
>>>> Currently, this is defined and used within the writeback's physical
>>>> encoder layer however, the function can be modified to be used to setup
>>>> the CDM block even for non-writeback interfaces.
>>>>
>>>> Until those modifications are planned and made, keep it local to
>>>> writeback.
>>>>
>>>> changes in v2:
>>>>           - add the RGB2YUV CSC matrix to dpu util as needed by CDM
>>>>           - use dpu_hw_get_csc_cfg() to get and program CSC
>>>>           - drop usage of setup_csc_data() and setup_cdwn() cdm ops
>>>>             as they both have been merged into enable()
>>>>           - drop reduntant hw_cdm and hw_pp checks
>>>>
>>>> Signed-off-by: Abhinav Kumar <quic_abhinavk@quicinc.com>
>>>> ---
>>>>    .../gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h  |  3 +
>>>>    .../drm/msm/disp/dpu1/dpu_encoder_phys_wb.c   | 96 ++++++++++++++++++-
>>>>    drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c   | 17 ++++
>>>>    drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h   |  1 +
>>>>    4 files changed, 116 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h
>>>> index 410f6225789c..1d6d1eb642b9 100644
>>>> --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h
>>>> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h
>>>> @@ -16,6 +16,7 @@
>>>>    #include "dpu_hw_pingpong.h"
>>>>    #include "dpu_hw_ctl.h"
>>>>    #include "dpu_hw_top.h"
>>>> +#include "dpu_hw_cdm.h"
>>>>    #include "dpu_encoder.h"
>>>>    #include "dpu_crtc.h"
>>>>
>>>> @@ -210,6 +211,7 @@ static inline int dpu_encoder_phys_inc_pending(struct dpu_encoder_phys *phys)
>>>>     * @wbirq_refcount:     Reference count of writeback interrupt
>>>>     * @wb_done_timeout_cnt: number of wb done irq timeout errors
>>>>     * @wb_cfg:  writeback block config to store fb related details
>>>> + * @cdm_cfg: cdm block config needed to store writeback block's CDM configuration
>>>>     * @wb_conn: backpointer to writeback connector
>>>>     * @wb_job: backpointer to current writeback job
>>>>     * @dest:   dpu buffer layout for current writeback output buffer
>>>> @@ -219,6 +221,7 @@ struct dpu_encoder_phys_wb {
>>>>           atomic_t wbirq_refcount;
>>>>           int wb_done_timeout_cnt;
>>>>           struct dpu_hw_wb_cfg wb_cfg;
>>>> +       struct dpu_hw_cdm_cfg cdm_cfg;
>>>>           struct drm_writeback_connector *wb_conn;
>>>>           struct drm_writeback_job *wb_job;
>>>>           struct dpu_hw_fmt_layout dest;
>>>> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
>>>> index 4665367cf14f..85429c62d727 100644
>>>> --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
>>>> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
>>>> @@ -259,6 +259,99 @@ static void dpu_encoder_phys_wb_setup_ctl(struct dpu_encoder_phys *phys_enc)
>>>>           }
>>>>    }
>>>>
>>>> +/**
>>>> + * dpu_encoder_phys_wb_setup_cdp - setup chroma down sampling block
>>>> + * @phys_enc:Pointer to physical encoder
>>>> + */
>>>> +static void dpu_encoder_helper_phys_setup_cdm(struct dpu_encoder_phys *phys_enc)
>>>> +{
>>>> +       struct dpu_hw_cdm *hw_cdm;
>>>> +       struct dpu_hw_cdm_cfg *cdm_cfg;
>>>> +       struct dpu_hw_pingpong *hw_pp;
>>>> +       struct dpu_encoder_phys_wb *wb_enc;
>>>> +       const struct msm_format *format;
>>>> +       const struct dpu_format *dpu_fmt;
>>>> +       struct drm_writeback_job *wb_job;
>>>> +       int ret;
>>>> +
>>>> +       if (!phys_enc)
>>>> +               return;
>>>> +
>>>> +       wb_enc = to_dpu_encoder_phys_wb(phys_enc);
>>>> +       cdm_cfg = &wb_enc->cdm_cfg;
>>>> +       hw_pp = phys_enc->hw_pp;
>>>> +       hw_cdm = phys_enc->hw_cdm;
>>>> +       wb_job = wb_enc->wb_job;
>>>> +
>>>> +       format = msm_framebuffer_format(wb_enc->wb_job->fb);
>>>> +       dpu_fmt = dpu_get_dpu_format_ext(format->pixel_format, wb_job->fb->modifier);
>>>> +
>>>> +       if (!hw_cdm)
>>>> +               return;
>>>> +
>>>> +       if (!DPU_FORMAT_IS_YUV(dpu_fmt)) {
>>>> +               DPU_DEBUG("[enc:%d] cdm_disable fmt:%x\n", DRMID(phys_enc->parent),
>>>> +                         dpu_fmt->base.pixel_format);
>>>> +               if (hw_cdm->ops.disable)
>>>> +                       hw_cdm->ops.disable(hw_cdm);
>>>> +
>>>> +               return;
>>>> +       }
>>>> +
>>>> +       memset(cdm_cfg, 0, sizeof(struct dpu_hw_cdm_cfg));
>>>> +
>>>> +       cdm_cfg->output_width = wb_job->fb->width;
>>>> +       cdm_cfg->output_height = wb_job->fb->height;
>>>> +       cdm_cfg->output_fmt = dpu_fmt;
>>>> +       cdm_cfg->output_type = CDM_CDWN_OUTPUT_WB;
>>>> +       cdm_cfg->output_bit_depth = DPU_FORMAT_IS_DX(dpu_fmt) ?
>>>> +                       CDM_CDWN_OUTPUT_10BIT : CDM_CDWN_OUTPUT_8BIT;
>>>> +       cdm_cfg->csc_cfg = dpu_hw_get_csc_cfg(DPU_HW_RGB2YUV_601L_10BIT);
>>>> +       if (!cdm_cfg->csc_cfg) {
>>>> +               DPU_ERROR("valid csc not found\n");
>>>> +               return;
>>>> +       }
>>>> +
>>>> +       /* enable 10 bit logic */
>>>> +       switch (cdm_cfg->output_fmt->chroma_sample) {
>>>> +       case DPU_CHROMA_RGB:
>>>> +               cdm_cfg->h_cdwn_type = CDM_CDWN_DISABLE;
>>>> +               cdm_cfg->v_cdwn_type = CDM_CDWN_DISABLE;
>>>> +               break;
>>>> +       case DPU_CHROMA_H2V1:
>>>> +               cdm_cfg->h_cdwn_type = CDM_CDWN_COSITE;
>>>> +               cdm_cfg->v_cdwn_type = CDM_CDWN_DISABLE;
>>>> +               break;
>>>> +       case DPU_CHROMA_420:
>>>> +               cdm_cfg->h_cdwn_type = CDM_CDWN_COSITE;
>>>> +               cdm_cfg->v_cdwn_type = CDM_CDWN_OFFSITE;
>>>> +               break;
>>>> +       case DPU_CHROMA_H1V2:
>>>> +       default:
>>>> +               DPU_ERROR("[enc:%d] unsupported chroma sampling type\n",
>>>> +                         DRMID(phys_enc->parent));
>>>> +               cdm_cfg->h_cdwn_type = CDM_CDWN_DISABLE;
>>>> +               cdm_cfg->v_cdwn_type = CDM_CDWN_DISABLE;
>>>
>>> If it is unsupported, we should return an error here.
>>>
>>
>> The caller of this API and the caller of the API even before that do not
>> have error checking as they are all void. Disabling CDWN is the
>> appropriate corrective action for this case and should be sufficient.
> 
> Ack. Could you please document that DPU_CHROMA_H1V2 is invalid for this API?
> 
>>
>>>> +               break;
>>>> +       }
>>>> +
>>>> +       DPU_DEBUG("[enc:%d] cdm_enable:%d,%d,%X,%d,%d,%d,%d]\n",
>>>> +                 DRMID(phys_enc->parent), cdm_cfg->output_width,
>>>> +                 cdm_cfg->output_height, cdm_cfg->output_fmt->base.pixel_format,
>>>> +                 cdm_cfg->output_type, cdm_cfg->output_bit_depth,
>>>> +                 cdm_cfg->h_cdwn_type, cdm_cfg->v_cdwn_type);
>>>> +
>>>> +       if (hw_cdm->ops.enable) {
>>>> +               cdm_cfg->pp_id = hw_pp->idx;
>>>> +               ret = hw_cdm->ops.enable(hw_cdm, cdm_cfg);
>>>> +               if (ret < 0) {
>>>> +                       DPU_ERROR("[enc:%d] failed to enable CDM; ret:%d\n",
>>>> +                                 DRMID(phys_enc->parent), ret);
>>>> +                       return;
>>>> +               }
>>>> +       }
>>>> +}
>>>> +
>>>>    /**
>>>>     * dpu_encoder_phys_wb_atomic_check - verify and fixup given atomic states
>>>>     * @phys_enc:  Pointer to physical encoder
>>>> @@ -382,8 +475,9 @@ static void dpu_encoder_phys_wb_setup(
>>>>
>>>>           dpu_encoder_phys_wb_setup_fb(phys_enc, fb);
>>>>
>>>> -       dpu_encoder_phys_wb_setup_ctl(phys_enc);
>>>> +       dpu_encoder_helper_phys_setup_cdm(phys_enc);
>>>>
>>>> +       dpu_encoder_phys_wb_setup_ctl(phys_enc);
>>>>    }
>>>>
>>>>    /**
>>>> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c
>>>> index 59a153331194..34143491aba2 100644
>>>> --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c
>>>> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c
>>>> @@ -87,6 +87,8 @@ static u32 dpu_hw_util_log_mask = DPU_DBG_MASK_NONE;
>>>>    #define QOS_QOS_CTRL_VBLANK_EN            BIT(16)
>>>>    #define QOS_QOS_CTRL_CREQ_VBLANK_MASK     GENMASK(21, 20)
>>>>
>>>> +#define TO_S15D16(_x_)((_x_) << 7)
>>>
>>> Huh? I don't understand why it is shifted by 7. If you have data in
>>> S8.9 format, I'd say that it makes things less obvious compared to
>>> S15.16 (where you can perform division on the fly).
>>>
>>
>> I was referring to below comment and also because the values are in
>> S15.16 in
>>
>> https://git.codelinaro.org/clo/la/platform/vendor/opensource/display-drivers/-/blob/clo/main/msm/sde/sde_encoder_phys_wb.c?ref_type=heads#L35
> 
> Yes, I saw that. My first thought was that <<7 looked
> counterintuitive. You can not perform maths easily with S4.9 values.
> At least I can not guess the actual value of 0x083 or 0xff37.
> 
> I thought about writing data in S15.16 directly (e.g. 0x4180 instead
> of 0x083, 0x8100 instead of 0x102). This way it would be easier to
> understand that the first row is 65.5 / 256, 128 / 256, 25 / 256. And
> this is what we had for existing DPU CSC tables.
> 
> But after looking at the dpu_hw_csc_setup() I actually fail to
> understand why we need the S15.16 values. The hardware works with S4.9
> in the end. So it looks pretty strange to convert S4.9 to S15.16 via
> the macro only to convert it back to S4.9 in the CSC setup code.
> 
> Maybe we can go the following route:
> - Merge this series using CSC tables as is (with your TO_S15D16 macro)
> - Drop the <<7 from dpu_hw_csc_setup() (and drop the macro too)
> - Define S4P9(floor, nom, den) and S4P9_NEG(floor, nom, den) macros to
> be used for the matrix values
> - Merge the DPU tables with MDP tables (which use S4.9 already)
> 
> I'd also like to check if we can drop two versions of clamp values
> (for 8bit and for 10bit) and convert those values on the fly.
> 

Yes, I am fine with this approach. That will also allow me to 
investigate the tables not just from WB context but from others as well.

>>
>> 428 struct dpu_csc_cfg {
>> 429     /* matrix coefficients in S15.16 format */
>> 430     uint32_t csc_mv[DPU_CSC_MATRIX_COEFF_SIZE];
>> 431     uint32_t csc_pre_bv[DPU_CSC_BIAS_SIZE];
>> 432     uint32_t csc_post_bv[DPU_CSC_BIAS_SIZE];
>> 433     uint32_t csc_pre_lv[DPU_CSC_CLAMP_SIZE];
>> 434     uint32_t csc_post_lv[DPU_CSC_CLAMP_SIZE];
>> 435 };
>> 436
>>
>>
>>>> +
>>>>    static const struct dpu_csc_cfg dpu_csc_YUV2RGB_601L = {
>>>>           {
>>>>                   /* S15.16 format */
>>>> @@ -117,6 +119,18 @@ static const struct dpu_csc_cfg dpu_csc10_YUV2RGB_601L = {
>>>>           { 0x00, 0x3ff, 0x00, 0x3ff, 0x00, 0x3ff,},
>>>>    };
>>>>
>>>> +static const struct dpu_csc_cfg dpu_csc10_rgb2yuv_601l = {
>>>> +       {
>>>> +               TO_S15D16(0x0083), TO_S15D16(0x0102), TO_S15D16(0x0032),
>>>> +               TO_S15D16(0x1fb5), TO_S15D16(0x1f6c), TO_S15D16(0x00e1),
>>>> +               TO_S15D16(0x00e1), TO_S15D16(0x1f45), TO_S15D16(0x1fdc)
>>>> +       },
>>>> +       { 0x00, 0x00, 0x00 },
>>>> +       { 0x0040, 0x0200, 0x0200 },
>>>> +       { 0x000, 0x3ff, 0x000, 0x3ff, 0x000, 0x3ff },
>>>> +       { 0x040, 0x3ac, 0x040, 0x3c0, 0x040, 0x3c0 },
>>>> +};
>>>> +
>>>>    /**
>>>>     * dpu_hw_get_csc_cfg - get the CSC matrix based on the request type
>>>>     * @type:              type of the requested CSC matrix from caller
>>>> @@ -133,6 +147,9 @@ const struct dpu_csc_cfg *dpu_hw_get_csc_cfg(enum dpu_hw_csc_cfg_type type)
>>>>           case DPU_HW_YUV2RGB_601L_10BIT:
>>>>                   csc_cfg = &dpu_csc10_YUV2RGB_601L;
>>>>                   break;
>>>> +       case DPU_HW_RGB2YUV_601L_10BIT:
>>>> +               csc_cfg = &dpu_csc10_rgb2yuv_601l;
>>>> +               break;
>>>>           default:
>>>>                   DPU_ERROR("unknown csc_cfg type\n");
>>>>                   break;
>>>> diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
>>>> index 49f2bcf6de15..ed153d66f660 100644
>>>> --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
>>>> +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
>>>> @@ -22,6 +22,7 @@
>>>>    enum dpu_hw_csc_cfg_type {
>>>>           DPU_HW_YUV2RGB_601L,
>>>>           DPU_HW_YUV2RGB_601L_10BIT,
>>>> +       DPU_HW_RGB2YUV_601L_10BIT,
>>>>    };
>>>>
>>>>    /*
>>>> --
>>>> 2.40.1
>>>>
>>>
>>>
> 
> 
> 
> --
> With best wishes
> Dmitry
  
kernel test robot Dec. 10, 2023, 2:06 p.m. UTC | #5
Hi Abhinav,

kernel test robot noticed the following build warnings:

[auto build test WARNING on next-20231207]
[also build test WARNING on v6.7-rc4]
[cannot apply to drm-misc/drm-misc-next linus/master v6.7-rc4 v6.7-rc3 v6.7-rc2]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Abhinav-Kumar/drm-msm-dpu-add-formats-check-for-writeback-encoder/20231208-130820
base:   next-20231207
patch link:    https://lore.kernel.org/r/20231208050641.32582-13-quic_abhinavk%40quicinc.com
patch subject: [PATCH v2 12/16] drm/msm/dpu: add an API to setup the CDM block for writeback
config: x86_64-allyesconfig (https://download.01.org/0day-ci/archive/20231210/202312102149.qmbCdsg2-lkp@intel.com/config)
compiler: clang version 16.0.4 (https://github.com/llvm/llvm-project.git ae42196bc493ffe877a7e3dff8be32035dea4d07)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231210/202312102149.qmbCdsg2-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202312102149.qmbCdsg2-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c:267: warning: expecting prototype for dpu_encoder_phys_wb_setup_cdp(). Prototype was for dpu_encoder_helper_phys_setup_cdm() instead


vim +267 drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c

   261	
   262	/**
   263	 * dpu_encoder_phys_wb_setup_cdp - setup chroma down sampling block
   264	 * @phys_enc:Pointer to physical encoder
   265	 */
   266	static void dpu_encoder_helper_phys_setup_cdm(struct dpu_encoder_phys *phys_enc)
 > 267	{
   268		struct dpu_hw_cdm *hw_cdm;
   269		struct dpu_hw_cdm_cfg *cdm_cfg;
   270		struct dpu_hw_pingpong *hw_pp;
   271		struct dpu_encoder_phys_wb *wb_enc;
   272		const struct msm_format *format;
   273		const struct dpu_format *dpu_fmt;
   274		struct drm_writeback_job *wb_job;
   275		int ret;
   276	
   277		if (!phys_enc)
   278			return;
   279	
   280		wb_enc = to_dpu_encoder_phys_wb(phys_enc);
   281		cdm_cfg = &wb_enc->cdm_cfg;
   282		hw_pp = phys_enc->hw_pp;
   283		hw_cdm = phys_enc->hw_cdm;
   284		wb_job = wb_enc->wb_job;
   285	
   286		format = msm_framebuffer_format(wb_enc->wb_job->fb);
   287		dpu_fmt = dpu_get_dpu_format_ext(format->pixel_format, wb_job->fb->modifier);
   288	
   289		if (!hw_cdm)
   290			return;
   291	
   292		if (!DPU_FORMAT_IS_YUV(dpu_fmt)) {
   293			DPU_DEBUG("[enc:%d] cdm_disable fmt:%x\n", DRMID(phys_enc->parent),
   294				  dpu_fmt->base.pixel_format);
   295			if (hw_cdm->ops.disable)
   296				hw_cdm->ops.disable(hw_cdm);
   297	
   298			return;
   299		}
   300	
   301		memset(cdm_cfg, 0, sizeof(struct dpu_hw_cdm_cfg));
   302	
   303		cdm_cfg->output_width = wb_job->fb->width;
   304		cdm_cfg->output_height = wb_job->fb->height;
   305		cdm_cfg->output_fmt = dpu_fmt;
   306		cdm_cfg->output_type = CDM_CDWN_OUTPUT_WB;
   307		cdm_cfg->output_bit_depth = DPU_FORMAT_IS_DX(dpu_fmt) ?
   308				CDM_CDWN_OUTPUT_10BIT : CDM_CDWN_OUTPUT_8BIT;
   309		cdm_cfg->csc_cfg = dpu_hw_get_csc_cfg(DPU_HW_RGB2YUV_601L_10BIT);
   310		if (!cdm_cfg->csc_cfg) {
   311			DPU_ERROR("valid csc not found\n");
   312			return;
   313		}
   314	
   315		/* enable 10 bit logic */
   316		switch (cdm_cfg->output_fmt->chroma_sample) {
   317		case DPU_CHROMA_RGB:
   318			cdm_cfg->h_cdwn_type = CDM_CDWN_DISABLE;
   319			cdm_cfg->v_cdwn_type = CDM_CDWN_DISABLE;
   320			break;
   321		case DPU_CHROMA_H2V1:
   322			cdm_cfg->h_cdwn_type = CDM_CDWN_COSITE;
   323			cdm_cfg->v_cdwn_type = CDM_CDWN_DISABLE;
   324			break;
   325		case DPU_CHROMA_420:
   326			cdm_cfg->h_cdwn_type = CDM_CDWN_COSITE;
   327			cdm_cfg->v_cdwn_type = CDM_CDWN_OFFSITE;
   328			break;
   329		case DPU_CHROMA_H1V2:
   330		default:
   331			DPU_ERROR("[enc:%d] unsupported chroma sampling type\n",
   332				  DRMID(phys_enc->parent));
   333			cdm_cfg->h_cdwn_type = CDM_CDWN_DISABLE;
   334			cdm_cfg->v_cdwn_type = CDM_CDWN_DISABLE;
   335			break;
   336		}
   337	
   338		DPU_DEBUG("[enc:%d] cdm_enable:%d,%d,%X,%d,%d,%d,%d]\n",
   339			  DRMID(phys_enc->parent), cdm_cfg->output_width,
   340			  cdm_cfg->output_height, cdm_cfg->output_fmt->base.pixel_format,
   341			  cdm_cfg->output_type, cdm_cfg->output_bit_depth,
   342			  cdm_cfg->h_cdwn_type, cdm_cfg->v_cdwn_type);
   343	
   344		if (hw_cdm->ops.enable) {
   345			cdm_cfg->pp_id = hw_pp->idx;
   346			ret = hw_cdm->ops.enable(hw_cdm, cdm_cfg);
   347			if (ret < 0) {
   348				DPU_ERROR("[enc:%d] failed to enable CDM; ret:%d\n",
   349					  DRMID(phys_enc->parent), ret);
   350				return;
   351			}
   352		}
   353	}
   354
  

Patch

diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h
index 410f6225789c..1d6d1eb642b9 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys.h
@@ -16,6 +16,7 @@ 
 #include "dpu_hw_pingpong.h"
 #include "dpu_hw_ctl.h"
 #include "dpu_hw_top.h"
+#include "dpu_hw_cdm.h"
 #include "dpu_encoder.h"
 #include "dpu_crtc.h"
 
@@ -210,6 +211,7 @@  static inline int dpu_encoder_phys_inc_pending(struct dpu_encoder_phys *phys)
  * @wbirq_refcount:     Reference count of writeback interrupt
  * @wb_done_timeout_cnt: number of wb done irq timeout errors
  * @wb_cfg:  writeback block config to store fb related details
+ * @cdm_cfg: cdm block config needed to store writeback block's CDM configuration
  * @wb_conn: backpointer to writeback connector
  * @wb_job: backpointer to current writeback job
  * @dest:   dpu buffer layout for current writeback output buffer
@@ -219,6 +221,7 @@  struct dpu_encoder_phys_wb {
 	atomic_t wbirq_refcount;
 	int wb_done_timeout_cnt;
 	struct dpu_hw_wb_cfg wb_cfg;
+	struct dpu_hw_cdm_cfg cdm_cfg;
 	struct drm_writeback_connector *wb_conn;
 	struct drm_writeback_job *wb_job;
 	struct dpu_hw_fmt_layout dest;
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
index 4665367cf14f..85429c62d727 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_wb.c
@@ -259,6 +259,99 @@  static void dpu_encoder_phys_wb_setup_ctl(struct dpu_encoder_phys *phys_enc)
 	}
 }
 
+/**
+ * dpu_encoder_phys_wb_setup_cdp - setup chroma down sampling block
+ * @phys_enc:Pointer to physical encoder
+ */
+static void dpu_encoder_helper_phys_setup_cdm(struct dpu_encoder_phys *phys_enc)
+{
+	struct dpu_hw_cdm *hw_cdm;
+	struct dpu_hw_cdm_cfg *cdm_cfg;
+	struct dpu_hw_pingpong *hw_pp;
+	struct dpu_encoder_phys_wb *wb_enc;
+	const struct msm_format *format;
+	const struct dpu_format *dpu_fmt;
+	struct drm_writeback_job *wb_job;
+	int ret;
+
+	if (!phys_enc)
+		return;
+
+	wb_enc = to_dpu_encoder_phys_wb(phys_enc);
+	cdm_cfg = &wb_enc->cdm_cfg;
+	hw_pp = phys_enc->hw_pp;
+	hw_cdm = phys_enc->hw_cdm;
+	wb_job = wb_enc->wb_job;
+
+	format = msm_framebuffer_format(wb_enc->wb_job->fb);
+	dpu_fmt = dpu_get_dpu_format_ext(format->pixel_format, wb_job->fb->modifier);
+
+	if (!hw_cdm)
+		return;
+
+	if (!DPU_FORMAT_IS_YUV(dpu_fmt)) {
+		DPU_DEBUG("[enc:%d] cdm_disable fmt:%x\n", DRMID(phys_enc->parent),
+			  dpu_fmt->base.pixel_format);
+		if (hw_cdm->ops.disable)
+			hw_cdm->ops.disable(hw_cdm);
+
+		return;
+	}
+
+	memset(cdm_cfg, 0, sizeof(struct dpu_hw_cdm_cfg));
+
+	cdm_cfg->output_width = wb_job->fb->width;
+	cdm_cfg->output_height = wb_job->fb->height;
+	cdm_cfg->output_fmt = dpu_fmt;
+	cdm_cfg->output_type = CDM_CDWN_OUTPUT_WB;
+	cdm_cfg->output_bit_depth = DPU_FORMAT_IS_DX(dpu_fmt) ?
+			CDM_CDWN_OUTPUT_10BIT : CDM_CDWN_OUTPUT_8BIT;
+	cdm_cfg->csc_cfg = dpu_hw_get_csc_cfg(DPU_HW_RGB2YUV_601L_10BIT);
+	if (!cdm_cfg->csc_cfg) {
+		DPU_ERROR("valid csc not found\n");
+		return;
+	}
+
+	/* enable 10 bit logic */
+	switch (cdm_cfg->output_fmt->chroma_sample) {
+	case DPU_CHROMA_RGB:
+		cdm_cfg->h_cdwn_type = CDM_CDWN_DISABLE;
+		cdm_cfg->v_cdwn_type = CDM_CDWN_DISABLE;
+		break;
+	case DPU_CHROMA_H2V1:
+		cdm_cfg->h_cdwn_type = CDM_CDWN_COSITE;
+		cdm_cfg->v_cdwn_type = CDM_CDWN_DISABLE;
+		break;
+	case DPU_CHROMA_420:
+		cdm_cfg->h_cdwn_type = CDM_CDWN_COSITE;
+		cdm_cfg->v_cdwn_type = CDM_CDWN_OFFSITE;
+		break;
+	case DPU_CHROMA_H1V2:
+	default:
+		DPU_ERROR("[enc:%d] unsupported chroma sampling type\n",
+			  DRMID(phys_enc->parent));
+		cdm_cfg->h_cdwn_type = CDM_CDWN_DISABLE;
+		cdm_cfg->v_cdwn_type = CDM_CDWN_DISABLE;
+		break;
+	}
+
+	DPU_DEBUG("[enc:%d] cdm_enable:%d,%d,%X,%d,%d,%d,%d]\n",
+		  DRMID(phys_enc->parent), cdm_cfg->output_width,
+		  cdm_cfg->output_height, cdm_cfg->output_fmt->base.pixel_format,
+		  cdm_cfg->output_type, cdm_cfg->output_bit_depth,
+		  cdm_cfg->h_cdwn_type, cdm_cfg->v_cdwn_type);
+
+	if (hw_cdm->ops.enable) {
+		cdm_cfg->pp_id = hw_pp->idx;
+		ret = hw_cdm->ops.enable(hw_cdm, cdm_cfg);
+		if (ret < 0) {
+			DPU_ERROR("[enc:%d] failed to enable CDM; ret:%d\n",
+				  DRMID(phys_enc->parent), ret);
+			return;
+		}
+	}
+}
+
 /**
  * dpu_encoder_phys_wb_atomic_check - verify and fixup given atomic states
  * @phys_enc:	Pointer to physical encoder
@@ -382,8 +475,9 @@  static void dpu_encoder_phys_wb_setup(
 
 	dpu_encoder_phys_wb_setup_fb(phys_enc, fb);
 
-	dpu_encoder_phys_wb_setup_ctl(phys_enc);
+	dpu_encoder_helper_phys_setup_cdm(phys_enc);
 
+	dpu_encoder_phys_wb_setup_ctl(phys_enc);
 }
 
 /**
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c
index 59a153331194..34143491aba2 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c
@@ -87,6 +87,8 @@  static u32 dpu_hw_util_log_mask = DPU_DBG_MASK_NONE;
 #define QOS_QOS_CTRL_VBLANK_EN            BIT(16)
 #define QOS_QOS_CTRL_CREQ_VBLANK_MASK     GENMASK(21, 20)
 
+#define TO_S15D16(_x_)((_x_) << 7)
+
 static const struct dpu_csc_cfg dpu_csc_YUV2RGB_601L = {
 	{
 		/* S15.16 format */
@@ -117,6 +119,18 @@  static const struct dpu_csc_cfg dpu_csc10_YUV2RGB_601L = {
 	{ 0x00, 0x3ff, 0x00, 0x3ff, 0x00, 0x3ff,},
 };
 
+static const struct dpu_csc_cfg dpu_csc10_rgb2yuv_601l = {
+	{
+		TO_S15D16(0x0083), TO_S15D16(0x0102), TO_S15D16(0x0032),
+		TO_S15D16(0x1fb5), TO_S15D16(0x1f6c), TO_S15D16(0x00e1),
+		TO_S15D16(0x00e1), TO_S15D16(0x1f45), TO_S15D16(0x1fdc)
+	},
+	{ 0x00, 0x00, 0x00 },
+	{ 0x0040, 0x0200, 0x0200 },
+	{ 0x000, 0x3ff, 0x000, 0x3ff, 0x000, 0x3ff },
+	{ 0x040, 0x3ac, 0x040, 0x3c0, 0x040, 0x3c0 },
+};
+
 /**
  * dpu_hw_get_csc_cfg - get the CSC matrix based on the request type
  * @type:		type of the requested CSC matrix from caller
@@ -133,6 +147,9 @@  const struct dpu_csc_cfg *dpu_hw_get_csc_cfg(enum dpu_hw_csc_cfg_type type)
 	case DPU_HW_YUV2RGB_601L_10BIT:
 		csc_cfg = &dpu_csc10_YUV2RGB_601L;
 		break;
+	case DPU_HW_RGB2YUV_601L_10BIT:
+		csc_cfg = &dpu_csc10_rgb2yuv_601l;
+		break;
 	default:
 		DPU_ERROR("unknown csc_cfg type\n");
 		break;
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
index 49f2bcf6de15..ed153d66f660 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
@@ -22,6 +22,7 @@ 
 enum dpu_hw_csc_cfg_type {
 	DPU_HW_YUV2RGB_601L,
 	DPU_HW_YUV2RGB_601L_10BIT,
+	DPU_HW_RGB2YUV_601L_10BIT,
 };
 
 /*