[v2,2/2] dmaengine: xilinx: xdma: Support cyclic transfers

Message ID 20230922162056.594933-3-miquel.raynal@bootlin.com
State New
Headers
Series dmaengine: xdma: Cyclic transfers support |

Commit Message

Miquel Raynal Sept. 22, 2023, 4:20 p.m. UTC
  In order to use this dmaengine with sound devices, let's add cyclic
transfers support. Most of the code is reused from the existing
scatter-gather implementation, only the final linking between
descriptors, the control fields (to trigger interrupts more often) and
the interrupt handling are really different.

This controller supports up to 32 adjacent descriptors, we assume this
is way more than enough for the purpose of cyclic transfers and limit to
32 the number of cycled descriptors. This way, we simplify a lot the
overall handling of the descriptors.

Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
---
 drivers/dma/xilinx/xdma-regs.h |   2 +
 drivers/dma/xilinx/xdma.c      | 165 +++++++++++++++++++++++++++++++--
 2 files changed, 161 insertions(+), 6 deletions(-)
  

Comments

kernel test robot Sept. 22, 2023, 5:43 p.m. UTC | #1
Hi Miquel,

kernel test robot noticed the following build warnings:

[auto build test WARNING on v6.6-rc2]
[also build test WARNING on linus/master next-20230921]
[cannot apply to xilinx-xlnx/master]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Miquel-Raynal/dmaengine-xilinx-xdma-Prepare-the-introduction-of-cyclic-transfers/20230923-002252
base:   v6.6-rc2
patch link:    https://lore.kernel.org/r/20230922162056.594933-3-miquel.raynal%40bootlin.com
patch subject: [PATCH v2 2/2] dmaengine: xilinx: xdma: Support cyclic transfers
config: m68k-allyesconfig (https://download.01.org/0day-ci/archive/20230923/202309230103.YgvYkSCn-lkp@intel.com/config)
compiler: m68k-linux-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20230923/202309230103.YgvYkSCn-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202309230103.YgvYkSCn-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> drivers/dma/xilinx/xdma.c:262: warning: Function parameter or member 'cyclic' not described in 'xdma_alloc_desc'


vim +262 drivers/dma/xilinx/xdma.c

17ce252266c7f0 Lizhi Hou     2023-01-19  254  
17ce252266c7f0 Lizhi Hou     2023-01-19  255  /**
17ce252266c7f0 Lizhi Hou     2023-01-19  256   * xdma_alloc_desc - Allocate descriptor
17ce252266c7f0 Lizhi Hou     2023-01-19  257   * @chan: DMA channel pointer
17ce252266c7f0 Lizhi Hou     2023-01-19  258   * @desc_num: Number of hardware descriptors
17ce252266c7f0 Lizhi Hou     2023-01-19  259   */
17ce252266c7f0 Lizhi Hou     2023-01-19  260  static struct xdma_desc *
9dfa9406316d5c Miquel Raynal 2023-09-22  261  xdma_alloc_desc(struct xdma_chan *chan, u32 desc_num, bool cyclic)
17ce252266c7f0 Lizhi Hou     2023-01-19 @262  {
17ce252266c7f0 Lizhi Hou     2023-01-19  263  	struct xdma_desc *sw_desc;
17ce252266c7f0 Lizhi Hou     2023-01-19  264  	struct xdma_hw_desc *desc;
17ce252266c7f0 Lizhi Hou     2023-01-19  265  	dma_addr_t dma_addr;
17ce252266c7f0 Lizhi Hou     2023-01-19  266  	u32 dblk_num;
34df67fe3afc84 Miquel Raynal 2023-09-22  267  	u32 control;
17ce252266c7f0 Lizhi Hou     2023-01-19  268  	void *addr;
17ce252266c7f0 Lizhi Hou     2023-01-19  269  	int i, j;
17ce252266c7f0 Lizhi Hou     2023-01-19  270  
17ce252266c7f0 Lizhi Hou     2023-01-19  271  	sw_desc = kzalloc(sizeof(*sw_desc), GFP_NOWAIT);
17ce252266c7f0 Lizhi Hou     2023-01-19  272  	if (!sw_desc)
17ce252266c7f0 Lizhi Hou     2023-01-19  273  		return NULL;
17ce252266c7f0 Lizhi Hou     2023-01-19  274  
17ce252266c7f0 Lizhi Hou     2023-01-19  275  	sw_desc->chan = chan;
17ce252266c7f0 Lizhi Hou     2023-01-19  276  	sw_desc->desc_num = desc_num;
9dfa9406316d5c Miquel Raynal 2023-09-22  277  	sw_desc->cyclic = cyclic;
17ce252266c7f0 Lizhi Hou     2023-01-19  278  	dblk_num = DIV_ROUND_UP(desc_num, XDMA_DESC_ADJACENT);
17ce252266c7f0 Lizhi Hou     2023-01-19  279  	sw_desc->desc_blocks = kcalloc(dblk_num, sizeof(*sw_desc->desc_blocks),
17ce252266c7f0 Lizhi Hou     2023-01-19  280  				       GFP_NOWAIT);
17ce252266c7f0 Lizhi Hou     2023-01-19  281  	if (!sw_desc->desc_blocks)
17ce252266c7f0 Lizhi Hou     2023-01-19  282  		goto failed;
17ce252266c7f0 Lizhi Hou     2023-01-19  283  
9dfa9406316d5c Miquel Raynal 2023-09-22  284  	if (cyclic)
9dfa9406316d5c Miquel Raynal 2023-09-22  285  		control = XDMA_DESC_CONTROL_CYCLIC;
9dfa9406316d5c Miquel Raynal 2023-09-22  286  	else
34df67fe3afc84 Miquel Raynal 2023-09-22  287  		control = XDMA_DESC_CONTROL(1, 0);
34df67fe3afc84 Miquel Raynal 2023-09-22  288  
17ce252266c7f0 Lizhi Hou     2023-01-19  289  	sw_desc->dblk_num = dblk_num;
17ce252266c7f0 Lizhi Hou     2023-01-19  290  	for (i = 0; i < sw_desc->dblk_num; i++) {
17ce252266c7f0 Lizhi Hou     2023-01-19  291  		addr = dma_pool_alloc(chan->desc_pool, GFP_NOWAIT, &dma_addr);
17ce252266c7f0 Lizhi Hou     2023-01-19  292  		if (!addr)
17ce252266c7f0 Lizhi Hou     2023-01-19  293  			goto failed;
17ce252266c7f0 Lizhi Hou     2023-01-19  294  
17ce252266c7f0 Lizhi Hou     2023-01-19  295  		sw_desc->desc_blocks[i].virt_addr = addr;
17ce252266c7f0 Lizhi Hou     2023-01-19  296  		sw_desc->desc_blocks[i].dma_addr = dma_addr;
17ce252266c7f0 Lizhi Hou     2023-01-19  297  		for (j = 0, desc = addr; j < XDMA_DESC_ADJACENT; j++)
34df67fe3afc84 Miquel Raynal 2023-09-22  298  			desc[j].control = cpu_to_le32(control);
17ce252266c7f0 Lizhi Hou     2023-01-19  299  	}
17ce252266c7f0 Lizhi Hou     2023-01-19  300  
9dfa9406316d5c Miquel Raynal 2023-09-22  301  	if (cyclic)
9dfa9406316d5c Miquel Raynal 2023-09-22  302  		xdma_link_cyclic_desc_blocks(sw_desc);
9dfa9406316d5c Miquel Raynal 2023-09-22  303  	else
34df67fe3afc84 Miquel Raynal 2023-09-22  304  		xdma_link_sg_desc_blocks(sw_desc);
17ce252266c7f0 Lizhi Hou     2023-01-19  305  
17ce252266c7f0 Lizhi Hou     2023-01-19  306  	return sw_desc;
17ce252266c7f0 Lizhi Hou     2023-01-19  307  
17ce252266c7f0 Lizhi Hou     2023-01-19  308  failed:
17ce252266c7f0 Lizhi Hou     2023-01-19  309  	xdma_free_desc(&sw_desc->vdesc);
17ce252266c7f0 Lizhi Hou     2023-01-19  310  	return NULL;
17ce252266c7f0 Lizhi Hou     2023-01-19  311  }
17ce252266c7f0 Lizhi Hou     2023-01-19  312
  
Vinod Koul Sept. 28, 2023, 10:54 a.m. UTC | #2
On 22-09-23, 18:20, Miquel Raynal wrote:

> @@ -583,7 +690,36 @@ static int xdma_alloc_chan_resources(struct dma_chan *chan)
>  static enum dma_status xdma_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
>  				      struct dma_tx_state *state)
>  {
> -	return dma_cookie_status(chan, cookie, state);
> +	struct xdma_chan *xdma_chan = to_xdma_chan(chan);
> +	struct xdma_desc *desc = NULL;
> +	struct virt_dma_desc *vd;
> +	enum dma_status ret;
> +	unsigned long flags;
> +	unsigned int period_idx;
> +	u32 residue = 0;
> +
> +	ret = dma_cookie_status(chan, cookie, state);
> +	if (ret == DMA_COMPLETE)
> +		return ret;
> +
> +	spin_lock_irqsave(&xdma_chan->vchan.lock, flags);
> +
> +	vd = vchan_find_desc(&xdma_chan->vchan, cookie);
> +	if (vd)
> +		desc = to_xdma_desc(vd);

vd is not used in below check, so should be done after below checks, why
do this for cyclic case?

Otherwise series lgtm, just fix the error reported by test bot

> +	if (!desc || !desc->cyclic) {
> +		spin_unlock_irqrestore(&xdma_chan->vchan.lock, flags);
> +		return ret;
> +	}
  
Miquel Raynal Oct. 3, 2023, 9:02 a.m. UTC | #3
Hi Vinod,

Thanks for the feedback.

vkoul@kernel.org wrote on Thu, 28 Sep 2023 16:24:31 +0530:

> On 22-09-23, 18:20, Miquel Raynal wrote:
> 
> > @@ -583,7 +690,36 @@ static int xdma_alloc_chan_resources(struct dma_chan *chan)
> >  static enum dma_status xdma_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
> >  				      struct dma_tx_state *state)
> >  {
> > -	return dma_cookie_status(chan, cookie, state);
> > +	struct xdma_chan *xdma_chan = to_xdma_chan(chan);
> > +	struct xdma_desc *desc = NULL;
> > +	struct virt_dma_desc *vd;
> > +	enum dma_status ret;
> > +	unsigned long flags;
> > +	unsigned int period_idx;
> > +	u32 residue = 0;
> > +
> > +	ret = dma_cookie_status(chan, cookie, state);
> > +	if (ret == DMA_COMPLETE)
> > +		return ret;
> > +
> > +	spin_lock_irqsave(&xdma_chan->vchan.lock, flags);
> > +
> > +	vd = vchan_find_desc(&xdma_chan->vchan, cookie);
> > +	if (vd)
> > +		desc = to_xdma_desc(vd);  
> 
> vd is not used in below check, so should be done after below checks, why
> do this for cyclic case?

I'm not sure I get this comment. vd is my way to get the descriptor,
and I need the descriptor to know whether we are in a cyclic transfer
or not. If the transfer is not cyclic, I just return the value from
dma_cookie_status() like before, otherwise I update the residue based
on the content of desc.

Maybe I don't understand what you mean, would you mind explaining it
again?

> Otherwise series lgtm, just fix the error reported by test bot

I will.

> 
> > +	if (!desc || !desc->cyclic) {
> > +		spin_unlock_irqrestore(&xdma_chan->vchan.lock, flags);
> > +		return ret;
> > +	}  


Thanks,
Miquèl
  
Vinod Koul Oct. 4, 2023, 7:29 a.m. UTC | #4
On 03-10-23, 11:02, Miquel Raynal wrote:
> Hi Vinod,
> 
> Thanks for the feedback.
> 
> vkoul@kernel.org wrote on Thu, 28 Sep 2023 16:24:31 +0530:
> 
> > On 22-09-23, 18:20, Miquel Raynal wrote:
> > 
> > > @@ -583,7 +690,36 @@ static int xdma_alloc_chan_resources(struct dma_chan *chan)
> > >  static enum dma_status xdma_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
> > >  				      struct dma_tx_state *state)
> > >  {
> > > -	return dma_cookie_status(chan, cookie, state);
> > > +	struct xdma_chan *xdma_chan = to_xdma_chan(chan);
> > > +	struct xdma_desc *desc = NULL;
> > > +	struct virt_dma_desc *vd;
> > > +	enum dma_status ret;
> > > +	unsigned long flags;
> > > +	unsigned int period_idx;
> > > +	u32 residue = 0;
> > > +
> > > +	ret = dma_cookie_status(chan, cookie, state);
> > > +	if (ret == DMA_COMPLETE)
> > > +		return ret;
> > > +
> > > +	spin_lock_irqsave(&xdma_chan->vchan.lock, flags);
> > > +
> > > +	vd = vchan_find_desc(&xdma_chan->vchan, cookie);
> > > +	if (vd)
> > > +		desc = to_xdma_desc(vd);  
> > 
> > vd is not used in below check, so should be done after below checks, why
> > do this for cyclic case?
> 
> I'm not sure I get this comment. vd is my way to get the descriptor,
> and I need the descriptor to know whether we are in a cyclic transfer
> or not. If the transfer is not cyclic, I just return the value from
> dma_cookie_status() like before, otherwise I update the residue based
> on the content of desc.
> 
> Maybe I don't understand what you mean, would you mind explaining it
> again?

Sorry I am not sure what I was thinking, this looks fine, we need the
lock to get the desc and use it
  
Miquel Raynal Oct. 4, 2023, 7:46 a.m. UTC | #5
Hi Vinod,

vkoul@kernel.org wrote on Wed, 4 Oct 2023 12:59:25 +0530:

> On 03-10-23, 11:02, Miquel Raynal wrote:
> > Hi Vinod,
> > 
> > Thanks for the feedback.
> > 
> > vkoul@kernel.org wrote on Thu, 28 Sep 2023 16:24:31 +0530:
> >   
> > > On 22-09-23, 18:20, Miquel Raynal wrote:
> > >   
> > > > @@ -583,7 +690,36 @@ static int xdma_alloc_chan_resources(struct dma_chan *chan)
> > > >  static enum dma_status xdma_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
> > > >  				      struct dma_tx_state *state)
> > > >  {
> > > > -	return dma_cookie_status(chan, cookie, state);
> > > > +	struct xdma_chan *xdma_chan = to_xdma_chan(chan);
> > > > +	struct xdma_desc *desc = NULL;
> > > > +	struct virt_dma_desc *vd;
> > > > +	enum dma_status ret;
> > > > +	unsigned long flags;
> > > > +	unsigned int period_idx;
> > > > +	u32 residue = 0;
> > > > +
> > > > +	ret = dma_cookie_status(chan, cookie, state);
> > > > +	if (ret == DMA_COMPLETE)
> > > > +		return ret;
> > > > +
> > > > +	spin_lock_irqsave(&xdma_chan->vchan.lock, flags);
> > > > +
> > > > +	vd = vchan_find_desc(&xdma_chan->vchan, cookie);
> > > > +	if (vd)
> > > > +		desc = to_xdma_desc(vd);    
> > > 
> > > vd is not used in below check, so should be done after below checks, why
> > > do this for cyclic case?  
> > 
> > I'm not sure I get this comment. vd is my way to get the descriptor,
> > and I need the descriptor to know whether we are in a cyclic transfer
> > or not. If the transfer is not cyclic, I just return the value from
> > dma_cookie_status() like before, otherwise I update the residue based
> > on the content of desc.
> > 
> > Maybe I don't understand what you mean, would you mind explaining it
> > again?  
> 
> Sorry I am not sure what I was thinking, this looks fine, we need the
> lock to get the desc and use it

Ah ok, no problem :) I'll send the v3 with the missing kernel doc line
(kernel test robot report).

Thanks,
Miquèl
  

Patch

diff --git a/drivers/dma/xilinx/xdma-regs.h b/drivers/dma/xilinx/xdma-regs.h
index dd98b4526b90..e641a5083e14 100644
--- a/drivers/dma/xilinx/xdma-regs.h
+++ b/drivers/dma/xilinx/xdma-regs.h
@@ -44,6 +44,8 @@ 
 	 FIELD_PREP(XDMA_DESC_FLAGS_BITS, (flag)))
 #define XDMA_DESC_CONTROL_LAST						\
 	XDMA_DESC_CONTROL(1, XDMA_DESC_STOPPED | XDMA_DESC_COMPLETED)
+#define XDMA_DESC_CONTROL_CYCLIC					\
+	XDMA_DESC_CONTROL(1, XDMA_DESC_COMPLETED)
 
 /*
  * Descriptor for a single contiguous memory block transfer.
diff --git a/drivers/dma/xilinx/xdma.c b/drivers/dma/xilinx/xdma.c
index 09ed13d6666d..bc9a6ca3353e 100644
--- a/drivers/dma/xilinx/xdma.c
+++ b/drivers/dma/xilinx/xdma.c
@@ -83,6 +83,9 @@  struct xdma_chan {
  * @dblk_num: Number of hardware descriptor blocks
  * @desc_num: Number of hardware descriptors
  * @completed_desc_num: Completed hardware descriptors
+ * @cyclic: Cyclic transfer vs. scatter-gather
+ * @periods: Number of periods in the cyclic transfer
+ * @period_size: Size of a period in bytes in cyclic transfers
  */
 struct xdma_desc {
 	struct virt_dma_desc		vdesc;
@@ -93,6 +96,9 @@  struct xdma_desc {
 	u32				dblk_num;
 	u32				desc_num;
 	u32				completed_desc_num;
+	bool				cyclic;
+	u32				periods;
+	u32				period_size;
 };
 
 #define XDMA_DEV_STATUS_REG_DMA		BIT(0)
@@ -174,6 +180,25 @@  static void xdma_link_sg_desc_blocks(struct xdma_desc *sw_desc)
 	desc->control = cpu_to_le32(XDMA_DESC_CONTROL_LAST);
 }
 
+/**
+ * xdma_link_cyclic_desc_blocks - Link cyclic descriptor blocks for DMA transfer
+ * @sw_desc: Tx descriptor pointer
+ */
+static void xdma_link_cyclic_desc_blocks(struct xdma_desc *sw_desc)
+{
+	struct xdma_desc_block *block;
+	struct xdma_hw_desc *desc;
+	int i;
+
+	block = sw_desc->desc_blocks;
+	for (i = 0; i < sw_desc->desc_num - 1; i++) {
+		desc = block->virt_addr + i * XDMA_DESC_SIZE;
+		desc->next_desc = cpu_to_le64(block->dma_addr + ((i + 1) * XDMA_DESC_SIZE));
+	}
+	desc = block->virt_addr + i * XDMA_DESC_SIZE;
+	desc->next_desc = cpu_to_le64(block->dma_addr);
+}
+
 static inline struct xdma_chan *to_xdma_chan(struct dma_chan *chan)
 {
 	return container_of(chan, struct xdma_chan, vchan.chan);
@@ -233,7 +258,7 @@  static void xdma_free_desc(struct virt_dma_desc *vdesc)
  * @desc_num: Number of hardware descriptors
  */
 static struct xdma_desc *
-xdma_alloc_desc(struct xdma_chan *chan, u32 desc_num)
+xdma_alloc_desc(struct xdma_chan *chan, u32 desc_num, bool cyclic)
 {
 	struct xdma_desc *sw_desc;
 	struct xdma_hw_desc *desc;
@@ -249,13 +274,17 @@  xdma_alloc_desc(struct xdma_chan *chan, u32 desc_num)
 
 	sw_desc->chan = chan;
 	sw_desc->desc_num = desc_num;
+	sw_desc->cyclic = cyclic;
 	dblk_num = DIV_ROUND_UP(desc_num, XDMA_DESC_ADJACENT);
 	sw_desc->desc_blocks = kcalloc(dblk_num, sizeof(*sw_desc->desc_blocks),
 				       GFP_NOWAIT);
 	if (!sw_desc->desc_blocks)
 		goto failed;
 
-	control = XDMA_DESC_CONTROL(1, 0);
+	if (cyclic)
+		control = XDMA_DESC_CONTROL_CYCLIC;
+	else
+		control = XDMA_DESC_CONTROL(1, 0);
 
 	sw_desc->dblk_num = dblk_num;
 	for (i = 0; i < sw_desc->dblk_num; i++) {
@@ -269,7 +298,10 @@  xdma_alloc_desc(struct xdma_chan *chan, u32 desc_num)
 			desc[j].control = cpu_to_le32(control);
 	}
 
-	xdma_link_sg_desc_blocks(sw_desc);
+	if (cyclic)
+		xdma_link_cyclic_desc_blocks(sw_desc);
+	else
+		xdma_link_sg_desc_blocks(sw_desc);
 
 	return sw_desc;
 
@@ -469,7 +501,7 @@  xdma_prep_device_sg(struct dma_chan *chan, struct scatterlist *sgl,
 	for_each_sg(sgl, sg, sg_len, i)
 		desc_num += DIV_ROUND_UP(sg_dma_len(sg), XDMA_DESC_BLEN_MAX);
 
-	sw_desc = xdma_alloc_desc(xdma_chan, desc_num);
+	sw_desc = xdma_alloc_desc(xdma_chan, desc_num, false);
 	if (!sw_desc)
 		return NULL;
 	sw_desc->dir = dir;
@@ -524,6 +556,81 @@  xdma_prep_device_sg(struct dma_chan *chan, struct scatterlist *sgl,
 	return NULL;
 }
 
+/**
+ * xdma_prep_dma_cyclic - prepare for cyclic DMA transactions
+ * @chan: DMA channel pointer
+ * @address: Device DMA address to access
+ * @size: Total length to transfer
+ * @period_size: Period size to use for each transfer
+ * @dir: Transfer direction
+ * @flags: Transfer ack flags
+ */
+static struct dma_async_tx_descriptor *
+xdma_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t address,
+		     size_t size, size_t period_size,
+		     enum dma_transfer_direction dir,
+		     unsigned long flags)
+{
+	struct xdma_chan *xdma_chan = to_xdma_chan(chan);
+	struct xdma_device *xdev = xdma_chan->xdev_hdl;
+	unsigned int periods = size / period_size;
+	struct dma_async_tx_descriptor *tx_desc;
+	struct xdma_desc_block *dblk;
+	struct xdma_hw_desc *desc;
+	struct xdma_desc *sw_desc;
+	unsigned int i;
+
+	/*
+	 * Simplify the whole logic by preventing an abnormally high number of
+	 * periods and periods size.
+	 */
+	if (period_size > XDMA_DESC_BLEN_MAX) {
+		xdma_err(xdev, "period size limited to %lu bytes\n", XDMA_DESC_BLEN_MAX);
+		return NULL;
+	}
+
+	if (periods > XDMA_DESC_ADJACENT) {
+		xdma_err(xdev, "number of periods limited to %u\n", XDMA_DESC_ADJACENT);
+		return NULL;
+	}
+
+	sw_desc = xdma_alloc_desc(xdma_chan, periods, true);
+	if (!sw_desc)
+		return NULL;
+
+	sw_desc->periods = periods;
+	sw_desc->period_size = period_size;
+	sw_desc->dir = dir;
+
+	dblk = sw_desc->desc_blocks;
+	desc = dblk->virt_addr;
+
+	/* fill hardware descriptor */
+	for (i = 0; i < periods; i++) {
+		desc->bytes = cpu_to_le32(period_size);
+		if (dir == DMA_MEM_TO_DEV) {
+			desc->src_addr = cpu_to_le64(address + i * period_size);
+			desc->dst_addr = cpu_to_le64(xdma_chan->cfg.dst_addr);
+		} else {
+			desc->src_addr = cpu_to_le64(xdma_chan->cfg.src_addr);
+			desc->dst_addr = cpu_to_le64(address + i * period_size);
+		}
+
+		desc++;
+	}
+
+	tx_desc = vchan_tx_prep(&xdma_chan->vchan, &sw_desc->vdesc, flags);
+	if (!tx_desc)
+		goto failed;
+
+	return tx_desc;
+
+failed:
+	xdma_free_desc(&sw_desc->vdesc);
+
+	return NULL;
+}
+
 /**
  * xdma_device_config - Configure the DMA channel
  * @chan: DMA channel
@@ -583,7 +690,36 @@  static int xdma_alloc_chan_resources(struct dma_chan *chan)
 static enum dma_status xdma_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
 				      struct dma_tx_state *state)
 {
-	return dma_cookie_status(chan, cookie, state);
+	struct xdma_chan *xdma_chan = to_xdma_chan(chan);
+	struct xdma_desc *desc = NULL;
+	struct virt_dma_desc *vd;
+	enum dma_status ret;
+	unsigned long flags;
+	unsigned int period_idx;
+	u32 residue = 0;
+
+	ret = dma_cookie_status(chan, cookie, state);
+	if (ret == DMA_COMPLETE)
+		return ret;
+
+	spin_lock_irqsave(&xdma_chan->vchan.lock, flags);
+
+	vd = vchan_find_desc(&xdma_chan->vchan, cookie);
+	if (vd)
+		desc = to_xdma_desc(vd);
+	if (!desc || !desc->cyclic) {
+		spin_unlock_irqrestore(&xdma_chan->vchan.lock, flags);
+		return ret;
+	}
+
+	period_idx = desc->completed_desc_num % desc->periods;
+	residue = (desc->periods - period_idx) * desc->period_size;
+
+	spin_unlock_irqrestore(&xdma_chan->vchan.lock, flags);
+
+	dma_set_residue(state, residue);
+
+	return ret;
 }
 
 /**
@@ -599,6 +735,7 @@  static irqreturn_t xdma_channel_isr(int irq, void *dev_id)
 	struct virt_dma_desc *vd;
 	struct xdma_desc *desc;
 	int ret;
+	u32 st;
 
 	spin_lock(&xchan->vchan.lock);
 
@@ -617,6 +754,19 @@  static irqreturn_t xdma_channel_isr(int irq, void *dev_id)
 		goto out;
 
 	desc->completed_desc_num += complete_desc_num;
+
+	if (desc->cyclic) {
+		ret = regmap_read(xdev->rmap, xchan->base + XDMA_CHAN_STATUS,
+				  &st);
+		if (ret)
+			goto out;
+
+		regmap_write(xdev->rmap, xchan->base + XDMA_CHAN_STATUS, st);
+
+		vchan_cyclic_callback(vd);
+		goto out;
+	}
+
 	/*
 	 * if all data blocks are transferred, remove and complete the request
 	 */
@@ -630,7 +780,7 @@  static irqreturn_t xdma_channel_isr(int irq, void *dev_id)
 	    complete_desc_num != XDMA_DESC_BLOCK_NUM * XDMA_DESC_ADJACENT)
 		goto out;
 
-	/* transfer the rest of data */
+	/* transfer the rest of data (SG only) */
 	xdma_xfer_start(xchan);
 
 out:
@@ -930,8 +1080,10 @@  static int xdma_probe(struct platform_device *pdev)
 
 	dma_cap_set(DMA_SLAVE, xdev->dma_dev.cap_mask);
 	dma_cap_set(DMA_PRIVATE, xdev->dma_dev.cap_mask);
+	dma_cap_set(DMA_CYCLIC, xdev->dma_dev.cap_mask);
 
 	xdev->dma_dev.dev = &pdev->dev;
+	xdev->dma_dev.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
 	xdev->dma_dev.device_free_chan_resources = xdma_free_chan_resources;
 	xdev->dma_dev.device_alloc_chan_resources = xdma_alloc_chan_resources;
 	xdev->dma_dev.device_tx_status = xdma_tx_status;
@@ -941,6 +1093,7 @@  static int xdma_probe(struct platform_device *pdev)
 	xdev->dma_dev.filter.map = pdata->device_map;
 	xdev->dma_dev.filter.mapcnt = pdata->device_map_cnt;
 	xdev->dma_dev.filter.fn = xdma_filter_fn;
+	xdev->dma_dev.device_prep_dma_cyclic = xdma_prep_dma_cyclic;
 
 	ret = dma_async_device_register(&xdev->dma_dev);
 	if (ret) {