[v3,13/13] tty: serial: qcom-geni-serial: add support for serial engine DMA

Message ID 20221123110759.1836666-14-brgl@bgdev.pl
State New
Headers
Series serial: qcom-geni-serial: implement support for SE DMA |

Commit Message

Bartosz Golaszewski Nov. 23, 2022, 11:07 a.m. UTC
  From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>

The qcom-geni-serial driver currently only works in SE FIFO mode. This
limits the UART speed to around 180 kB/s. In order to achieve higher
speeds we need to use SE DMA mode.

Keep the console port working in FIFO mode but extend the code to use DMA
for the high-speed port.

Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/tty/serial/qcom_geni_serial.c | 289 ++++++++++++++++++++++----
 1 file changed, 247 insertions(+), 42 deletions(-)
  

Comments

Srinivas Kandagatla Nov. 25, 2022, 2:37 p.m. UTC | #1
Thanks Bartosz for the patch,

On 23/11/2022 11:07, Bartosz Golaszewski wrote:
> From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
> 
> The qcom-geni-serial driver currently only works in SE FIFO mode. This
> limits the UART speed to around 180 kB/s. In order to achieve higher
> speeds we need to use SE DMA mode.
> 
> Keep the console port working in FIFO mode but extend the code to use DMA
> for the high-speed port.
> 
> Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
> ---
>   drivers/tty/serial/qcom_geni_serial.c | 289 ++++++++++++++++++++++----
>   1 file changed, 247 insertions(+), 42 deletions(-)
> 
> diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
> index 82242a40a95a..0f4ba909c792 100644
> --- a/drivers/tty/serial/qcom_geni_serial.c
> +++ b/drivers/tty/serial/qcom_geni_serial.c
> @@ -70,6 +70,8 @@
>   #define UART_START_TX			0x1
>   /* UART S_CMD OP codes */
>   #define UART_START_READ			0x1
> +#define UART_PARAM			0x1
> +#define UART_PARAM_RFR_OPEN		BIT(7)
>   
>   #define UART_OVERSAMPLING		32
>   #define STALE_TIMEOUT			16
> @@ -95,9 +97,11 @@
>   /* We always configure 4 bytes per FIFO word */
>   #define BYTES_PER_FIFO_WORD		4
>   
> +#define DMA_RX_BUF_SIZE		2048
> +
>   struct qcom_geni_device_data {
>   	bool console;
> -	void (*handle_rx)(struct uart_port *uport, u32 bytes, bool drop);
> +	enum geni_se_xfer_mode mode;
>   };
>   
>   struct qcom_geni_private_data {
> @@ -118,9 +122,11 @@ struct qcom_geni_serial_port {
>   	u32 tx_fifo_depth;
>   	u32 tx_fifo_width;
>   	u32 rx_fifo_depth;
> +	dma_addr_t tx_dma_addr;
> +	dma_addr_t rx_dma_addr;
>   	bool setup;
>   	unsigned int baud;
> -	void *rx_fifo;
> +	void *rx_buf;
>   	u32 loopback;
>   	bool brk;
>   
> @@ -552,18 +558,11 @@ static void handle_rx_console(struct uart_port *uport, u32 bytes, bool drop)
>   
>   static void handle_rx_uart(struct uart_port *uport, u32 bytes, bool drop)
>   {
> -	struct tty_port *tport;
>   	struct qcom_geni_serial_port *port = to_dev_port(uport);
> -	u32 num_bytes_pw = port->tx_fifo_width / BITS_PER_BYTE;
> -	u32 words = ALIGN(bytes, num_bytes_pw) / num_bytes_pw;
> +	struct tty_port *tport = &uport->state->port;
>   	int ret;
>   
> -	tport = &uport->state->port;
> -	ioread32_rep(uport->membase + SE_GENI_RX_FIFOn, port->rx_fifo, words);
> -	if (drop)
> -		return;
> -

Are we removing FIFO support for uart?

It it not a overhead to use dma for small transfers that are less than 
fifo size?


> -	ret = tty_insert_flip_string(tport, port->rx_fifo, bytes);
> +	ret = tty_insert_flip_string(tport, port->rx_buf, bytes);
>   	if (ret != bytes) {
>   		dev_err(uport->dev, "%s:Unable to push data ret %d_bytes %d\n",
>   				__func__, ret, bytes);
> @@ -578,7 +577,70 @@ static unsigned int qcom_geni_serial_tx_empty(struct uart_port *uport)
>   	return !readl(uport->membase + SE_GENI_TX_FIFO_STATUS);
>   }
>   
> -static void qcom_geni_serial_start_tx(struct uart_port *uport)
> +static void qcom_geni_serial_stop_tx_dma(struct uart_port *uport)
> +{
> +	struct qcom_geni_serial_port *port = to_dev_port(uport);
> +	bool done;

-->
> +	u32 status;
...
> +
> +	status = readl(uport->membase + SE_GENI_STATUS);
> +	if (!(status & M_GENI_CMD_ACTIVE))
> +		return;
<---

this code snippet is repeating more than few times in the patches, looks 
like it could be made to a inline helper.


> +
> +	if (port->rx_dma_addr) {
> +		geni_se_tx_dma_unprep(&port->se, port->tx_dma_addr,
> +				      port->tx_remaining);
> +		port->tx_dma_addr = (dma_addr_t)NULL;
> +		port->tx_remaining = 0;
> +	}
> +
> +	m_irq_en = readl(uport->membase + SE_GENI_M_IRQ_EN);
> +	writel(m_irq_en, uport->membase + SE_GENI_M_IRQ_EN);
> +	geni_se_cancel_m_cmd(&port->se);
> +
> +	done = qcom_geni_serial_poll_bit(uport, SE_GENI_S_IRQ_STATUS,
> +					 S_CMD_CANCEL_EN, true);
> +	if (!done) {
> +		geni_se_abort_m_cmd(&port->se);
> +		qcom_geni_serial_poll_bit(uport, SE_GENI_M_IRQ_STATUS,
> +					  M_CMD_ABORT_EN, true);

return is not checked, there are few more such instances in the patch.

> +		writel(M_CMD_ABORT_EN, uport->membase + SE_GENI_M_IRQ_CLEAR);
> +	}
> +
> +	writel(M_CMD_CANCEL_EN, uport->membase + SE_GENI_M_IRQ_CLEAR);
> +}
> +
> +static void qcom_geni_serial_start_tx_dma(struct uart_port *uport)
> +{
> +	struct qcom_geni_serial_port *port = to_dev_port(uport);
> +	struct circ_buf *xmit = &uport->state->xmit;
> +	unsigned int xmit_size;
> +	int ret;
> +
> +	if (port->tx_dma_addr)
> +		return;
Is this condition actually possible?


> +
> +	xmit_size = uart_circ_chars_pending(xmit);
> +	if (xmit_size < WAKEUP_CHARS)
> +		uart_write_wakeup(uport);
> +
> +	xmit_size = CIRC_CNT_TO_END(xmit->head, xmit->tail, UART_XMIT_SIZE);
> +
> +	qcom_geni_serial_setup_tx(uport, xmit_size);
> +
> +	ret = geni_se_tx_dma_prep(&port->se, &xmit->buf[xmit->tail],
> +				  xmit_size, &port->tx_dma_addr);
> +	if (ret) {
> +		dev_err(uport->dev, "unable to start TX SE DMA: %d\n", ret);
> +		qcom_geni_serial_stop_tx_dma(uport);
> +		return;
> +	}
> +
> +	port->tx_remaining = xmit_size;
> +}
> +

...
  
kernel test robot Nov. 25, 2022, 5:36 p.m. UTC | #2
Hi Bartosz,

I love your patch! Perhaps something to improve:

[auto build test WARNING on tty/tty-linus]
[also build test WARNING on linus/master v6.1-rc6]
[cannot apply to tty/tty-testing tty/tty-next next-20221125]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Bartosz-Golaszewski/serial-qcom-geni-serial-implement-support-for-SE-DMA/20221123-191249
base:   https://git.kernel.org/pub/scm/linux/kernel/git/gregkh/tty.git tty-linus
patch link:    https://lore.kernel.org/r/20221123110759.1836666-14-brgl%40bgdev.pl
patch subject: [PATCH v3 13/13] tty: serial: qcom-geni-serial: add support for serial engine DMA
config: arc-allyesconfig
compiler: arceb-elf-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
        wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
        chmod +x ~/bin/make.cross
        # https://github.com/intel-lab-lkp/linux/commit/5f94e67883d6c82dfe857dcac6dbde75773b8942
        git remote add linux-review https://github.com/intel-lab-lkp/linux
        git fetch --no-tags linux-review Bartosz-Golaszewski/serial-qcom-geni-serial-implement-support-for-SE-DMA/20221123-191249
        git checkout 5f94e67883d6c82dfe857dcac6dbde75773b8942
        # save the config file
        mkdir build_dir && cp config build_dir/.config
        COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=arc SHELL=/bin/bash drivers/tty/serial/

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot <lkp@intel.com>

All warnings (new ones prefixed by >>):

   drivers/tty/serial/qcom_geni_serial.c: In function 'qcom_geni_serial_stop_tx_dma':
>> drivers/tty/serial/qcom_geni_serial.c:594:37: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
     594 |                 port->tx_dma_addr = (dma_addr_t)NULL;
         |                                     ^
   drivers/tty/serial/qcom_geni_serial.c: In function 'qcom_geni_serial_stop_rx_dma':
   drivers/tty/serial/qcom_geni_serial.c:791:37: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
     791 |                 port->rx_dma_addr = (dma_addr_t)NULL;
         |                                     ^
   drivers/tty/serial/qcom_geni_serial.c: In function 'qcom_geni_serial_handle_rx_dma':
   drivers/tty/serial/qcom_geni_serial.c:831:29: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
     831 |         port->rx_dma_addr = (dma_addr_t)NULL;
         |                             ^
   drivers/tty/serial/qcom_geni_serial.c: In function 'qcom_geni_serial_handle_tx_dma':
   drivers/tty/serial/qcom_geni_serial.c:963:29: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast]
     963 |         port->tx_dma_addr = (dma_addr_t)NULL;
         |                             ^


vim +594 drivers/tty/serial/qcom_geni_serial.c

   579	
   580	static void qcom_geni_serial_stop_tx_dma(struct uart_port *uport)
   581	{
   582		struct qcom_geni_serial_port *port = to_dev_port(uport);
   583		bool done;
   584		u32 status;
   585		u32 m_irq_en;
   586	
   587		status = readl(uport->membase + SE_GENI_STATUS);
   588		if (!(status & M_GENI_CMD_ACTIVE))
   589			return;
   590	
   591		if (port->rx_dma_addr) {
   592			geni_se_tx_dma_unprep(&port->se, port->tx_dma_addr,
   593					      port->tx_remaining);
 > 594			port->tx_dma_addr = (dma_addr_t)NULL;
   595			port->tx_remaining = 0;
   596		}
   597	
   598		m_irq_en = readl(uport->membase + SE_GENI_M_IRQ_EN);
   599		writel(m_irq_en, uport->membase + SE_GENI_M_IRQ_EN);
   600		geni_se_cancel_m_cmd(&port->se);
   601	
   602		done = qcom_geni_serial_poll_bit(uport, SE_GENI_S_IRQ_STATUS,
   603						 S_CMD_CANCEL_EN, true);
   604		if (!done) {
   605			geni_se_abort_m_cmd(&port->se);
   606			qcom_geni_serial_poll_bit(uport, SE_GENI_M_IRQ_STATUS,
   607						  M_CMD_ABORT_EN, true);
   608			writel(M_CMD_ABORT_EN, uport->membase + SE_GENI_M_IRQ_CLEAR);
   609		}
   610	
   611		writel(M_CMD_CANCEL_EN, uport->membase + SE_GENI_M_IRQ_CLEAR);
   612	}
   613
  
Bartosz Golaszewski Nov. 28, 2022, 11:03 a.m. UTC | #3
On Fri, 25 Nov 2022 at 15:37, Srinivas Kandagatla
<srinivas.kandagatla@linaro.org> wrote:
>

<snip>

> >
> > @@ -552,18 +558,11 @@ static void handle_rx_console(struct uart_port *uport, u32 bytes, bool drop)
> >
> >   static void handle_rx_uart(struct uart_port *uport, u32 bytes, bool drop)
> >   {
> > -     struct tty_port *tport;
> >       struct qcom_geni_serial_port *port = to_dev_port(uport);
> > -     u32 num_bytes_pw = port->tx_fifo_width / BITS_PER_BYTE;
> > -     u32 words = ALIGN(bytes, num_bytes_pw) / num_bytes_pw;
> > +     struct tty_port *tport = &uport->state->port;
> >       int ret;
> >
> > -     tport = &uport->state->port;
> > -     ioread32_rep(uport->membase + SE_GENI_RX_FIFOn, port->rx_fifo, words);
> > -     if (drop)
> > -             return;
> > -
>
> Are we removing FIFO support for uart?
>
> It it not a overhead to use dma for small transfers that are less than
> fifo size?
>

You made me think about it but no - while I haven't measured it yet, I
don't think it's worth the code complexity behind it. The i2c driver
doesn't do it this way for short transfers either. If you insist I can
test it and come forward with numbers but unless we encounter a
real-life example of the need for lots of very short reads/writes in
short succession, I don't think we should overcomplicate this.

>
> > -     ret = tty_insert_flip_string(tport, port->rx_fifo, bytes);
> > +     ret = tty_insert_flip_string(tport, port->rx_buf, bytes);
> >       if (ret != bytes) {
> >               dev_err(uport->dev, "%s:Unable to push data ret %d_bytes %d\n",
> >                               __func__, ret, bytes);
> > @@ -578,7 +577,70 @@ static unsigned int qcom_geni_serial_tx_empty(struct uart_port *uport)
> >       return !readl(uport->membase + SE_GENI_TX_FIFO_STATUS);
> >   }
> >
> > -static void qcom_geni_serial_start_tx(struct uart_port *uport)
> > +static void qcom_geni_serial_stop_tx_dma(struct uart_port *uport)
> > +{
> > +     struct qcom_geni_serial_port *port = to_dev_port(uport);
> > +     bool done;
>
> -->
> > +     u32 status;
> ...
> > +
> > +     status = readl(uport->membase + SE_GENI_STATUS);
> > +     if (!(status & M_GENI_CMD_ACTIVE))
> > +             return;
> <---
>
> this code snippet is repeating more than few times in the patches, looks
> like it could be made to a inline helper.
>

Makes sense.

>
> > +
> > +     if (port->rx_dma_addr) {
> > +             geni_se_tx_dma_unprep(&port->se, port->tx_dma_addr,
> > +                                   port->tx_remaining);
> > +             port->tx_dma_addr = (dma_addr_t)NULL;
> > +             port->tx_remaining = 0;
> > +     }
> > +
> > +     m_irq_en = readl(uport->membase + SE_GENI_M_IRQ_EN);
> > +     writel(m_irq_en, uport->membase + SE_GENI_M_IRQ_EN);
> > +     geni_se_cancel_m_cmd(&port->se);
> > +
> > +     done = qcom_geni_serial_poll_bit(uport, SE_GENI_S_IRQ_STATUS,
> > +                                      S_CMD_CANCEL_EN, true);
> > +     if (!done) {
> > +             geni_se_abort_m_cmd(&port->se);
> > +             qcom_geni_serial_poll_bit(uport, SE_GENI_M_IRQ_STATUS,
> > +                                       M_CMD_ABORT_EN, true);
>
> return is not checked, there are few more such instances in the patch.
>

Yes, but this is an error path. What would I do if the cancel failed
to go through and then the abort failed as well? I can at best emit an
error message.

> > +             writel(M_CMD_ABORT_EN, uport->membase + SE_GENI_M_IRQ_CLEAR);
> > +     }
> > +
> > +     writel(M_CMD_CANCEL_EN, uport->membase + SE_GENI_M_IRQ_CLEAR);
> > +}
> > +
> > +static void qcom_geni_serial_start_tx_dma(struct uart_port *uport)
> > +{
> > +     struct qcom_geni_serial_port *port = to_dev_port(uport);
> > +     struct circ_buf *xmit = &uport->state->xmit;
> > +     unsigned int xmit_size;
> > +     int ret;
> > +
> > +     if (port->tx_dma_addr)
> > +             return;
> Is this condition actually possible?
>

It should never happen but I wanted to be sure. Maybe a BUG_ON() or
WARN_ON() would be better?

>
> > +
> > +     xmit_size = uart_circ_chars_pending(xmit);
> > +     if (xmit_size < WAKEUP_CHARS)
> > +             uart_write_wakeup(uport);
> > +
> > +     xmit_size = CIRC_CNT_TO_END(xmit->head, xmit->tail, UART_XMIT_SIZE);
> > +
> > +     qcom_geni_serial_setup_tx(uport, xmit_size);
> > +
> > +     ret = geni_se_tx_dma_prep(&port->se, &xmit->buf[xmit->tail],
> > +                               xmit_size, &port->tx_dma_addr);
> > +     if (ret) {
> > +             dev_err(uport->dev, "unable to start TX SE DMA: %d\n", ret);
> > +             qcom_geni_serial_stop_tx_dma(uport);
> > +             return;
> > +     }
> > +
> > +     port->tx_remaining = xmit_size;
> > +}
> > +
>
> ...

Bart
  
Bartosz Golaszewski Nov. 29, 2022, 10:13 a.m. UTC | #4
On Fri, Nov 25, 2022 at 3:37 PM Srinivas Kandagatla
<srinivas.kandagatla@linaro.org> wrote:
>

[snip]

> > +
> > +static void qcom_geni_serial_start_tx_dma(struct uart_port *uport)
> > +{
> > +     struct qcom_geni_serial_port *port = to_dev_port(uport);
> > +     struct circ_buf *xmit = &uport->state->xmit;
> > +     unsigned int xmit_size;
> > +     int ret;
> > +
> > +     if (port->tx_dma_addr)
> > +             return;
> Is this condition actually possible?
>

So it turns out, it's possible that the subsystem calls start_tx when
this is already set but the main engine is not yet in active state (so
we can't simply test that bit). This needs to stay then.

Bart
  

Patch

diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
index 82242a40a95a..0f4ba909c792 100644
--- a/drivers/tty/serial/qcom_geni_serial.c
+++ b/drivers/tty/serial/qcom_geni_serial.c
@@ -70,6 +70,8 @@ 
 #define UART_START_TX			0x1
 /* UART S_CMD OP codes */
 #define UART_START_READ			0x1
+#define UART_PARAM			0x1
+#define UART_PARAM_RFR_OPEN		BIT(7)
 
 #define UART_OVERSAMPLING		32
 #define STALE_TIMEOUT			16
@@ -95,9 +97,11 @@ 
 /* We always configure 4 bytes per FIFO word */
 #define BYTES_PER_FIFO_WORD		4
 
+#define DMA_RX_BUF_SIZE		2048
+
 struct qcom_geni_device_data {
 	bool console;
-	void (*handle_rx)(struct uart_port *uport, u32 bytes, bool drop);
+	enum geni_se_xfer_mode mode;
 };
 
 struct qcom_geni_private_data {
@@ -118,9 +122,11 @@  struct qcom_geni_serial_port {
 	u32 tx_fifo_depth;
 	u32 tx_fifo_width;
 	u32 rx_fifo_depth;
+	dma_addr_t tx_dma_addr;
+	dma_addr_t rx_dma_addr;
 	bool setup;
 	unsigned int baud;
-	void *rx_fifo;
+	void *rx_buf;
 	u32 loopback;
 	bool brk;
 
@@ -552,18 +558,11 @@  static void handle_rx_console(struct uart_port *uport, u32 bytes, bool drop)
 
 static void handle_rx_uart(struct uart_port *uport, u32 bytes, bool drop)
 {
-	struct tty_port *tport;
 	struct qcom_geni_serial_port *port = to_dev_port(uport);
-	u32 num_bytes_pw = port->tx_fifo_width / BITS_PER_BYTE;
-	u32 words = ALIGN(bytes, num_bytes_pw) / num_bytes_pw;
+	struct tty_port *tport = &uport->state->port;
 	int ret;
 
-	tport = &uport->state->port;
-	ioread32_rep(uport->membase + SE_GENI_RX_FIFOn, port->rx_fifo, words);
-	if (drop)
-		return;
-
-	ret = tty_insert_flip_string(tport, port->rx_fifo, bytes);
+	ret = tty_insert_flip_string(tport, port->rx_buf, bytes);
 	if (ret != bytes) {
 		dev_err(uport->dev, "%s:Unable to push data ret %d_bytes %d\n",
 				__func__, ret, bytes);
@@ -578,7 +577,70 @@  static unsigned int qcom_geni_serial_tx_empty(struct uart_port *uport)
 	return !readl(uport->membase + SE_GENI_TX_FIFO_STATUS);
 }
 
-static void qcom_geni_serial_start_tx(struct uart_port *uport)
+static void qcom_geni_serial_stop_tx_dma(struct uart_port *uport)
+{
+	struct qcom_geni_serial_port *port = to_dev_port(uport);
+	bool done;
+	u32 status;
+	u32 m_irq_en;
+
+	status = readl(uport->membase + SE_GENI_STATUS);
+	if (!(status & M_GENI_CMD_ACTIVE))
+		return;
+
+	if (port->rx_dma_addr) {
+		geni_se_tx_dma_unprep(&port->se, port->tx_dma_addr,
+				      port->tx_remaining);
+		port->tx_dma_addr = (dma_addr_t)NULL;
+		port->tx_remaining = 0;
+	}
+
+	m_irq_en = readl(uport->membase + SE_GENI_M_IRQ_EN);
+	writel(m_irq_en, uport->membase + SE_GENI_M_IRQ_EN);
+	geni_se_cancel_m_cmd(&port->se);
+
+	done = qcom_geni_serial_poll_bit(uport, SE_GENI_S_IRQ_STATUS,
+					 S_CMD_CANCEL_EN, true);
+	if (!done) {
+		geni_se_abort_m_cmd(&port->se);
+		qcom_geni_serial_poll_bit(uport, SE_GENI_M_IRQ_STATUS,
+					  M_CMD_ABORT_EN, true);
+		writel(M_CMD_ABORT_EN, uport->membase + SE_GENI_M_IRQ_CLEAR);
+	}
+
+	writel(M_CMD_CANCEL_EN, uport->membase + SE_GENI_M_IRQ_CLEAR);
+}
+
+static void qcom_geni_serial_start_tx_dma(struct uart_port *uport)
+{
+	struct qcom_geni_serial_port *port = to_dev_port(uport);
+	struct circ_buf *xmit = &uport->state->xmit;
+	unsigned int xmit_size;
+	int ret;
+
+	if (port->tx_dma_addr)
+		return;
+
+	xmit_size = uart_circ_chars_pending(xmit);
+	if (xmit_size < WAKEUP_CHARS)
+		uart_write_wakeup(uport);
+
+	xmit_size = CIRC_CNT_TO_END(xmit->head, xmit->tail, UART_XMIT_SIZE);
+
+	qcom_geni_serial_setup_tx(uport, xmit_size);
+
+	ret = geni_se_tx_dma_prep(&port->se, &xmit->buf[xmit->tail],
+				  xmit_size, &port->tx_dma_addr);
+	if (ret) {
+		dev_err(uport->dev, "unable to start TX SE DMA: %d\n", ret);
+		qcom_geni_serial_stop_tx_dma(uport);
+		return;
+	}
+
+	port->tx_remaining = xmit_size;
+}
+
+static void qcom_geni_serial_start_tx_fifo(struct uart_port *uport)
 {
 	u32 irq_en;
 	u32 status;
@@ -597,7 +659,7 @@  static void qcom_geni_serial_start_tx(struct uart_port *uport)
 	writel(irq_en, uport->membase +	SE_GENI_M_IRQ_EN);
 }
 
-static void qcom_geni_serial_stop_tx(struct uart_port *uport)
+static void qcom_geni_serial_stop_tx_fifo(struct uart_port *uport)
 {
 	u32 irq_en;
 	u32 status;
@@ -623,14 +685,13 @@  static void qcom_geni_serial_stop_tx(struct uart_port *uport)
 	writel(M_CMD_CANCEL_EN, uport->membase + SE_GENI_M_IRQ_CLEAR);
 }
 
-static void qcom_geni_serial_handle_rx(struct uart_port *uport, bool drop)
+static void qcom_geni_serial_handle_rx_fifo(struct uart_port *uport, bool drop)
 {
 	u32 status;
 	u32 word_cnt;
 	u32 last_word_byte_cnt;
 	u32 last_word_partial;
 	u32 total_bytes;
-	struct qcom_geni_serial_port *port = to_dev_port(uport);
 
 	status = readl(uport->membase +	SE_GENI_RX_FIFO_STATUS);
 	word_cnt = status & RX_FIFO_WC_MSK;
@@ -645,10 +706,10 @@  static void qcom_geni_serial_handle_rx(struct uart_port *uport, bool drop)
 		total_bytes += last_word_byte_cnt;
 	else
 		total_bytes += BYTES_PER_FIFO_WORD;
-	port->dev_data->handle_rx(uport, total_bytes, drop);
+	handle_rx_console(uport, total_bytes, drop);
 }
 
-static void qcom_geni_serial_stop_rx(struct uart_port *uport)
+static void qcom_geni_serial_stop_rx_fifo(struct uart_port *uport)
 {
 	u32 irq_en;
 	u32 status;
@@ -678,7 +739,7 @@  static void qcom_geni_serial_stop_rx(struct uart_port *uport)
 	s_irq_status = readl(uport->membase + SE_GENI_S_IRQ_STATUS);
 	/* Flush the Rx buffer */
 	if (s_irq_status & S_RX_FIFO_LAST_EN)
-		qcom_geni_serial_handle_rx(uport, true);
+		qcom_geni_serial_handle_rx_fifo(uport, true);
 	writel(s_irq_status, uport->membase + SE_GENI_S_IRQ_CLEAR);
 
 	status = readl(uport->membase + SE_GENI_STATUS);
@@ -686,7 +747,7 @@  static void qcom_geni_serial_stop_rx(struct uart_port *uport)
 		qcom_geni_serial_abort_rx(uport);
 }
 
-static void qcom_geni_serial_start_rx(struct uart_port *uport)
+static void qcom_geni_serial_start_rx_fifo(struct uart_port *uport)
 {
 	u32 irq_en;
 	u32 status;
@@ -694,7 +755,7 @@  static void qcom_geni_serial_start_rx(struct uart_port *uport)
 
 	status = readl(uport->membase + SE_GENI_STATUS);
 	if (status & S_GENI_CMD_ACTIVE)
-		qcom_geni_serial_stop_rx(uport);
+		qcom_geni_serial_stop_rx_fifo(uport);
 
 	geni_se_setup_s_cmd(&port->se, UART_START_READ, 0);
 
@@ -707,6 +768,101 @@  static void qcom_geni_serial_start_rx(struct uart_port *uport)
 	writel(irq_en, uport->membase + SE_GENI_M_IRQ_EN);
 }
 
+static void qcom_geni_serial_stop_rx_dma(struct uart_port *uport)
+{
+	struct qcom_geni_serial_port *port = to_dev_port(uport);
+	u32 status;
+
+	status = readl(uport->membase + SE_GENI_STATUS);
+	if (!(status & S_GENI_CMD_ACTIVE))
+		return;
+
+	geni_se_cancel_s_cmd(&port->se);
+	qcom_geni_serial_poll_bit(uport, SE_GENI_S_IRQ_STATUS,
+				  S_CMD_CANCEL_EN, true);
+
+	status = readl(uport->membase + SE_GENI_STATUS);
+	if (status & S_GENI_CMD_ACTIVE)
+		qcom_geni_serial_abort_rx(uport);
+
+	if (port->rx_dma_addr) {
+		geni_se_rx_dma_unprep(&port->se, port->rx_dma_addr,
+				      DMA_RX_BUF_SIZE);
+		port->rx_dma_addr = (dma_addr_t)NULL;
+	}
+}
+
+static void qcom_geni_serial_start_rx_dma(struct uart_port *uport)
+{
+	struct qcom_geni_serial_port *port = to_dev_port(uport);
+	u32 status;
+	int ret;
+
+	status = readl(uport->membase + SE_GENI_STATUS);
+	if (status & S_GENI_CMD_ACTIVE)
+		qcom_geni_serial_stop_rx_dma(uport);
+
+	geni_se_setup_s_cmd(&port->se, UART_START_READ, UART_PARAM_RFR_OPEN);
+
+	ret = geni_se_rx_dma_prep(&port->se, port->rx_buf,
+				  DMA_RX_BUF_SIZE,
+				  &port->rx_dma_addr);
+	if (ret) {
+		dev_err(uport->dev, "unable to start RX SE DMA: %d\n", ret);
+		qcom_geni_serial_stop_rx_dma(uport);
+	}
+}
+
+static void qcom_geni_serial_handle_rx_dma(struct uart_port *uport, bool drop)
+{
+	struct qcom_geni_serial_port *port = to_dev_port(uport);
+	u32 status;
+	u32 rx_in;
+	int ret;
+
+	status = readl(uport->membase + SE_GENI_STATUS);
+	if (!(status & S_GENI_CMD_ACTIVE))
+		return;
+
+	if (!port->rx_dma_addr)
+		return;
+
+	geni_se_rx_dma_unprep(&port->se, port->rx_dma_addr, DMA_RX_BUF_SIZE);
+	port->rx_dma_addr = (dma_addr_t)NULL;
+
+	rx_in = readl(uport->membase + SE_DMA_RX_LEN_IN);
+	if (!rx_in) {
+		dev_warn(uport->dev, "serial engine reports 0 RX bytes in!\n");
+		return;
+	}
+
+	if (!drop)
+		handle_rx_uart(uport, rx_in, drop);
+
+	ret = geni_se_rx_dma_prep(&port->se, port->rx_buf,
+				  DMA_RX_BUF_SIZE,
+				  &port->rx_dma_addr);
+	if (ret) {
+		dev_err(uport->dev, "unable to start RX SE DMA: %d\n", ret);
+		qcom_geni_serial_stop_rx_dma(uport);
+	}
+}
+
+static void qcom_geni_serial_start_rx(struct uart_port *uport)
+{
+	uport->ops->start_rx(uport);
+}
+
+static void qcom_geni_serial_stop_rx(struct uart_port *uport)
+{
+	uport->ops->stop_rx(uport);
+}
+
+static void qcom_geni_serial_stop_tx(struct uart_port *uport)
+{
+	uport->ops->stop_tx(uport);
+}
+
 static void qcom_geni_serial_send_chunk_fifo(struct uart_port *uport,
 					     unsigned int chunk)
 {
@@ -733,8 +889,8 @@  static void qcom_geni_serial_send_chunk_fifo(struct uart_port *uport,
 	}
 }
 
-static void qcom_geni_serial_handle_tx(struct uart_port *uport, bool done,
-		bool active)
+static void qcom_geni_serial_handle_tx_fifo(struct uart_port *uport,
+					    bool done, bool active)
 {
 	struct qcom_geni_serial_port *port = to_dev_port(uport);
 	struct circ_buf *xmit = &uport->state->xmit;
@@ -754,7 +910,7 @@  static void qcom_geni_serial_handle_tx(struct uart_port *uport, bool done,
 
 	/* All data has been transmitted and acknowledged as received */
 	if (!pending && !status && done) {
-		qcom_geni_serial_stop_tx(uport);
+		qcom_geni_serial_stop_tx_fifo(uport);
 		goto out_write_wakeup;
 	}
 
@@ -797,12 +953,32 @@  static void qcom_geni_serial_handle_tx(struct uart_port *uport, bool done,
 		uart_write_wakeup(uport);
 }
 
+static void qcom_geni_serial_handle_tx_dma(struct uart_port *uport)
+{
+	struct qcom_geni_serial_port *port = to_dev_port(uport);
+	struct circ_buf *xmit = &uport->state->xmit;
+
+	uart_xmit_advance(uport, port->tx_remaining);
+	geni_se_tx_dma_unprep(&port->se, port->tx_dma_addr, port->tx_remaining);
+	port->tx_dma_addr = (dma_addr_t)NULL;
+	port->tx_remaining = 0;
+
+	if (!uart_circ_empty(xmit))
+		qcom_geni_serial_start_tx_dma(uport);
+
+	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+		uart_write_wakeup(uport);
+}
+
 static irqreturn_t qcom_geni_serial_isr(int isr, void *dev)
 {
 	u32 m_irq_en;
 	u32 m_irq_status;
 	u32 s_irq_status;
 	u32 geni_status;
+	u32 dma;
+	u32 dma_tx_status;
+	u32 dma_rx_status;
 	struct uart_port *uport = dev;
 	bool drop_rx = false;
 	struct tty_port *tport = &uport->state->port;
@@ -815,10 +991,15 @@  static irqreturn_t qcom_geni_serial_isr(int isr, void *dev)
 
 	m_irq_status = readl(uport->membase + SE_GENI_M_IRQ_STATUS);
 	s_irq_status = readl(uport->membase + SE_GENI_S_IRQ_STATUS);
+	dma_tx_status = readl(uport->membase + SE_DMA_TX_IRQ_STAT);
+	dma_rx_status = readl(uport->membase + SE_DMA_RX_IRQ_STAT);
 	geni_status = readl(uport->membase + SE_GENI_STATUS);
+	dma = readl(uport->membase + SE_GENI_DMA_MODE_EN);
 	m_irq_en = readl(uport->membase + SE_GENI_M_IRQ_EN);
 	writel(m_irq_status, uport->membase + SE_GENI_M_IRQ_CLEAR);
 	writel(s_irq_status, uport->membase + SE_GENI_S_IRQ_CLEAR);
+	writel(dma_tx_status, uport->membase + SE_DMA_TX_IRQ_CLR);
+	writel(dma_rx_status, uport->membase + SE_DMA_RX_IRQ_CLR);
 
 	if (WARN_ON(m_irq_status & M_ILLEGAL_CMD_EN))
 		goto out_unlock;
@@ -828,10 +1009,6 @@  static irqreturn_t qcom_geni_serial_isr(int isr, void *dev)
 		tty_insert_flip_char(tport, 0, TTY_OVERRUN);
 	}
 
-	if (m_irq_status & m_irq_en & (M_TX_FIFO_WATERMARK_EN | M_CMD_DONE_EN))
-		qcom_geni_serial_handle_tx(uport, m_irq_status & M_CMD_DONE_EN,
-					   geni_status & M_GENI_CMD_ACTIVE);
-
 	if (s_irq_status & (S_GP_IRQ_0_EN | S_GP_IRQ_1_EN)) {
 		if (s_irq_status & S_GP_IRQ_0_EN)
 			uport->icount.parity++;
@@ -841,8 +1018,35 @@  static irqreturn_t qcom_geni_serial_isr(int isr, void *dev)
 		port->brk = true;
 	}
 
-	if (s_irq_status & (S_RX_FIFO_WATERMARK_EN | S_RX_FIFO_LAST_EN))
-		qcom_geni_serial_handle_rx(uport, drop_rx);
+	if (dma) {
+		if (dma_tx_status & TX_DMA_DONE)
+			qcom_geni_serial_handle_tx_dma(uport);
+
+		if (dma_rx_status) {
+			if (dma_rx_status & RX_RESET_DONE)
+				goto out_unlock;
+
+			if (dma_rx_status & RX_DMA_PARITY_ERR) {
+				uport->icount.parity++;
+				drop_rx = true;
+			}
+
+			if (dma_rx_status & RX_DMA_BREAK)
+				uport->icount.brk++;
+
+			if (dma_rx_status & (RX_DMA_DONE | RX_EOT))
+				qcom_geni_serial_handle_rx_dma(uport, drop_rx);
+		}
+	} else {
+		if (m_irq_status & m_irq_en &
+		    (M_TX_FIFO_WATERMARK_EN | M_CMD_DONE_EN))
+			qcom_geni_serial_handle_tx_fifo(uport,
+					m_irq_status & M_CMD_DONE_EN,
+					geni_status & M_GENI_CMD_ACTIVE);
+
+		if (s_irq_status & (S_RX_FIFO_WATERMARK_EN | S_RX_FIFO_LAST_EN))
+			qcom_geni_serial_handle_rx_fifo(uport, drop_rx);
+	}
 
 out_unlock:
 	uart_unlock_and_check_sysrq(uport);
@@ -912,7 +1116,7 @@  static int qcom_geni_serial_port_setup(struct uart_port *uport)
 	geni_se_config_packing(&port->se, BITS_PER_BYTE, BYTES_PER_FIFO_WORD,
 			       false, true, true);
 	geni_se_init(&port->se, UART_RX_WM, port->rx_fifo_depth - 2);
-	geni_se_select_mode(&port->se, GENI_SE_FIFO);
+	geni_se_select_mode(&port->se, port->dev_data->mode);
 	port->setup = true;
 
 	return 0;
@@ -1310,10 +1514,10 @@  static void qcom_geni_serial_pm(struct uart_port *uport,
 
 static const struct uart_ops qcom_geni_console_pops = {
 	.tx_empty = qcom_geni_serial_tx_empty,
-	.stop_tx = qcom_geni_serial_stop_tx,
-	.start_tx = qcom_geni_serial_start_tx,
-	.stop_rx = qcom_geni_serial_stop_rx,
-	.start_rx = qcom_geni_serial_start_rx,
+	.stop_tx = qcom_geni_serial_stop_tx_fifo,
+	.start_tx = qcom_geni_serial_start_tx_fifo,
+	.stop_rx = qcom_geni_serial_stop_rx_fifo,
+	.start_rx = qcom_geni_serial_start_rx_fifo,
 	.set_termios = qcom_geni_serial_set_termios,
 	.startup = qcom_geni_serial_startup,
 	.request_port = qcom_geni_serial_request_port,
@@ -1331,9 +1535,10 @@  static const struct uart_ops qcom_geni_console_pops = {
 
 static const struct uart_ops qcom_geni_uart_pops = {
 	.tx_empty = qcom_geni_serial_tx_empty,
-	.stop_tx = qcom_geni_serial_stop_tx,
-	.start_tx = qcom_geni_serial_start_tx,
-	.stop_rx = qcom_geni_serial_stop_rx,
+	.stop_tx = qcom_geni_serial_stop_tx_dma,
+	.start_tx = qcom_geni_serial_start_tx_dma,
+	.start_rx = qcom_geni_serial_start_rx_dma,
+	.stop_rx = qcom_geni_serial_stop_rx_dma,
 	.set_termios = qcom_geni_serial_set_termios,
 	.startup = qcom_geni_serial_startup,
 	.request_port = qcom_geni_serial_request_port,
@@ -1402,9 +1607,9 @@  static int qcom_geni_serial_probe(struct platform_device *pdev)
 	port->tx_fifo_width = DEF_FIFO_WIDTH_BITS;
 
 	if (!data->console) {
-		port->rx_fifo = devm_kcalloc(uport->dev,
-			port->rx_fifo_depth, sizeof(u32), GFP_KERNEL);
-		if (!port->rx_fifo)
+		port->rx_buf = devm_kzalloc(uport->dev,
+					    DMA_RX_BUF_SIZE, GFP_KERNEL);
+		if (!port->rx_buf)
 			return -ENOMEM;
 	}
 
@@ -1534,12 +1739,12 @@  static int __maybe_unused qcom_geni_serial_sys_resume(struct device *dev)
 
 static const struct qcom_geni_device_data qcom_geni_console_data = {
 	.console = true,
-	.handle_rx = handle_rx_console,
+	.mode = GENI_SE_FIFO,
 };
 
 static const struct qcom_geni_device_data qcom_geni_uart_data = {
 	.console = false,
-	.handle_rx = handle_rx_uart,
+	.mode = GENI_SE_DMA,
 };
 
 static const struct dev_pm_ops qcom_geni_serial_pm_ops = {