Merge tag 'dmaengine-5.1-rc1' of git://git.infradead.org/users/vkoul/slave-dma

Pull dmaengine updates from Vinod Koul:

 - dmatest updates for modularizing common struct and code

 - remove SG support for VDMA xilinx IP and updates to driver

 - Update to dw driver to support Intel iDMA controllers multi-block
   support

 - tegra updates for proper reporting of residue

 - Add Snow Ridge ioatdma device id and support for IOATDMA v3.4

 - struct_size() usage and useless LIST_HEAD cleanups in subsystem.

 - qDMA controller driver for Layerscape SoCs

 - stm32-dma PM Runtime support

 - And usual updates to imx-sdma, sprd, Documentation, fsl-edma,
   bcm2835, qcom_hidma etc

* tag 'dmaengine-5.1-rc1' of git://git.infradead.org/users/vkoul/slave-dma: (81 commits)
  dmaengine: imx-sdma: fix consistent dma test failures
  dmaengine: imx-sdma: add a test for imx8mq multi sdma devices
  dmaengine: imx-sdma: add clock ratio 1:1 check
  dmaengine: dmatest: move test data alloc & free into functions
  dmaengine: dmatest: add short-hand `buf_size` var in dmatest_func()
  dmaengine: dmatest: wrap src & dst data into a struct
  dmaengine: ioatdma: support latency tolerance report (LTR) for v3.4
  dmaengine: ioatdma: add descriptor pre-fetch support for v3.4
  dmaengine: ioatdma: disable DCA enabling on IOATDMA v3.4
  dmaengine: ioatdma: Add Snow Ridge ioatdma device id
  dmaengine: sprd: Change channel id to slave id for DMA cell specifier
  dt-bindings: dmaengine: sprd: Change channel id to slave id for DMA cell specifier
  dmaengine: mv_xor: Use correct device for DMA API
  Documentation :dmaengine: clarify DMA desc. pointer after submission
  Documentation: dmaengine: fix dmatest.rst warning
  dmaengine: k3dma: Add support for dma-channel-mask
  dmaengine: k3dma: Delete axi_config
  dmaengine: k3dma: Upgrade k3dma driver to support hisi_asp_dma hardware
  Documentation: bindings: dma: Add binding for dma-channel-mask
  Documentation: bindings: k3dma: Extend the k3dma driver binding to support hisi-asp
  ...
diff --git a/Documentation/devicetree/bindings/dma/dma.txt b/Documentation/devicetree/bindings/dma/dma.txt
index 6312fb0..eeb4e4d 100644
--- a/Documentation/devicetree/bindings/dma/dma.txt
+++ b/Documentation/devicetree/bindings/dma/dma.txt
@@ -16,6 +16,9 @@
 - dma-channels: 	Number of DMA channels supported by the controller.
 - dma-requests: 	Number of DMA request signals supported by the
 			controller.
+- dma-channel-mask:	Bitmask of available DMA channels in ascending order
+			that are not reserved by firmware and are available to
+			the kernel. i.e. first channel corresponds to LSB.
 
 Example:
 
@@ -29,6 +32,7 @@
 		#dma-cells = <1>;
 		dma-channels = <32>;
 		dma-requests = <127>;
+		dma-channel-mask = <0xfffe>
 	};
 
 * DMA router
diff --git a/Documentation/devicetree/bindings/dma/fsl-qdma.txt b/Documentation/devicetree/bindings/dma/fsl-qdma.txt
new file mode 100644
index 0000000..6a0ff90
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/fsl-qdma.txt
@@ -0,0 +1,57 @@
+NXP Layerscape SoC qDMA Controller
+==================================
+
+This device follows the generic DMA bindings defined in dma/dma.txt.
+
+Required properties:
+
+- compatible:		Must be one of
+			 "fsl,ls1021a-qdma": for LS1021A Board
+			 "fsl,ls1043a-qdma": for ls1043A Board
+			 "fsl,ls1046a-qdma": for ls1046A Board
+- reg:			Should contain the register's base address and length.
+- interrupts:		Should contain a reference to the interrupt used by this
+			device.
+- interrupt-names:	Should contain interrupt names:
+			 "qdma-queue0": the block0 interrupt
+			 "qdma-queue1": the block1 interrupt
+			 "qdma-queue2": the block2 interrupt
+			 "qdma-queue3": the block3 interrupt
+			 "qdma-error":  the error interrupt
+- fsl,dma-queues:	Should contain number of queues supported.
+- dma-channels:	Number of DMA channels supported
+- block-number:	the virtual block number
+- block-offset:	the offset of different virtual block
+- status-sizes:	status queue size of per virtual block
+- queue-sizes:		command queue size of per virtual block, the size number
+			based on queues
+
+Optional properties:
+
+- dma-channels:		Number of DMA channels supported by the controller.
+- big-endian:		If present registers and hardware scatter/gather descriptors
+			of the qDMA are implemented in big endian mode, otherwise in little
+			mode.
+
+Examples:
+
+	qdma: dma-controller@8390000 {
+			compatible = "fsl,ls1021a-qdma";
+			reg = <0x0 0x8388000 0x0 0x1000>, /* Controller regs */
+			      <0x0 0x8389000 0x0 0x1000>, /* Status regs */
+			      <0x0 0x838a000 0x0 0x2000>; /* Block regs */
+			interrupts = <GIC_SPI 185 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 76 IRQ_TYPE_LEVEL_HIGH>,
+				     <GIC_SPI 77 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "qdma-error",
+				"qdma-queue0", "qdma-queue1";
+			dma-channels = <8>;
+			block-number = <2>;
+			block-offset = <0x1000>;
+			fsl,dma-queues = <2>;
+			status-sizes = <64>;
+			queue-sizes = <64 64>;
+			big-endian;
+		};
+
+DMA clients must use the format described in dma/dma.txt file.
diff --git a/Documentation/devicetree/bindings/dma/k3dma.txt b/Documentation/devicetree/bindings/dma/k3dma.txt
index 4945aea..10a2f15 100644
--- a/Documentation/devicetree/bindings/dma/k3dma.txt
+++ b/Documentation/devicetree/bindings/dma/k3dma.txt
@@ -3,7 +3,9 @@
 See dma.txt first
 
 Required properties:
-- compatible: Should be "hisilicon,k3-dma-1.0"
+- compatible: Must be one of
+-              "hisilicon,k3-dma-1.0"
+-              "hisilicon,hisi-pcm-asp-dma-1.0"
 - reg: Should contain DMA registers location and length.
 - interrupts: Should contain one interrupt shared by all channel
 - #dma-cells: see dma.txt, should be 1, para number
diff --git a/Documentation/devicetree/bindings/dma/snps-dma.txt b/Documentation/devicetree/bindings/dma/snps-dma.txt
index db757df..0bedcee 100644
--- a/Documentation/devicetree/bindings/dma/snps-dma.txt
+++ b/Documentation/devicetree/bindings/dma/snps-dma.txt
@@ -23,8 +23,6 @@
 
 
 Optional properties:
-- is_private: The device channels should be marked as private and not for by the
-  general purpose DMA channel allocator. False if not passed.
 - multi-block: Multi block transfers supported by hardware. Array property with
   one cell per channel. 0: not supported, 1 (default): supported.
 - snps,dma-protection-control: AHB HPROT[3:1] protection setting.
diff --git a/Documentation/devicetree/bindings/dma/sprd-dma.txt b/Documentation/devicetree/bindings/dma/sprd-dma.txt
index 7a10fea..adccea994 100644
--- a/Documentation/devicetree/bindings/dma/sprd-dma.txt
+++ b/Documentation/devicetree/bindings/dma/sprd-dma.txt
@@ -31,7 +31,7 @@
 described in the dma.txt file, using a two-cell specifier for each channel.
 The two cells in order are:
 1. A phandle pointing to the DMA controller.
-2. The channel id.
+2. The slave id.
 
 spi0: spi@70a00000{
 	...
diff --git a/Documentation/devicetree/bindings/dma/xilinx/xilinx_dma.txt b/Documentation/devicetree/bindings/dma/xilinx/xilinx_dma.txt
index 174af2c..93b6d96 100644
--- a/Documentation/devicetree/bindings/dma/xilinx/xilinx_dma.txt
+++ b/Documentation/devicetree/bindings/dma/xilinx/xilinx_dma.txt
@@ -37,10 +37,11 @@
 Required properties for VDMA:
 - xlnx,num-fstores: Should be the number of framebuffers as configured in h/w.
 
-Optional properties:
-- xlnx,include-sg: Tells configured for Scatter-mode in
-	the hardware.
 Optional properties for AXI DMA:
+- xlnx,sg-length-width: Should be set to the width in bits of the length
+	register as configured in h/w. Takes values {8...26}. If the property
+	is missing or invalid then the default value 23 is used. This is the
+	maximum value that is supported by all IP versions.
 - xlnx,mcdma: Tells whether configured for multi-channel mode in the hardware.
 Optional properties for VDMA:
 - xlnx,flush-fsync: Tells which channel to Flush on Frame sync.
diff --git a/Documentation/driver-api/dmaengine/client.rst b/Documentation/driver-api/dmaengine/client.rst
index d728e50..45953f1 100644
--- a/Documentation/driver-api/dmaengine/client.rst
+++ b/Documentation/driver-api/dmaengine/client.rst
@@ -172,7 +172,7 @@
 
       After calling ``dmaengine_submit()`` the submitted transfer descriptor
       (``struct dma_async_tx_descriptor``) belongs to the DMA engine.
-      Consequentially, the client must consider invalid the pointer to that
+      Consequently, the client must consider invalid the pointer to that
       descriptor.
 
 5. Issue pending DMA requests and wait for callback notification
diff --git a/Documentation/driver-api/dmaengine/dmatest.rst b/Documentation/driver-api/dmaengine/dmatest.rst
index 8d81f1a..e78d070 100644
--- a/Documentation/driver-api/dmaengine/dmatest.rst
+++ b/Documentation/driver-api/dmaengine/dmatest.rst
@@ -59,6 +59,7 @@
 is created with the existing parameters. This thread is set as pending
 and will be executed once run is set to 1. Any parameters set after the thread
 is created are not applied.
+
 .. hint::
   available channel list could be extracted by running the following command::
 
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index d2286c7..0b1dfb5 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -218,6 +218,20 @@
 	  multiplexing capability for DMA request sources(slot).
 	  This module can be found on Freescale Vybrid and LS-1 SoCs.
 
+config FSL_QDMA
+       tristate "NXP Layerscape qDMA engine support"
+       depends on ARM || ARM64
+       select DMA_ENGINE
+       select DMA_VIRTUAL_CHANNELS
+       select DMA_ENGINE_RAID
+       select ASYNC_TX_ENABLE_CHANNEL_SWITCH
+       help
+         Support the NXP Layerscape qDMA engine with command queue and legacy mode.
+         Channel virtualization is supported through enqueuing of DMA jobs to,
+         or dequeuing DMA jobs from, different work queues.
+         This module can be found on NXP Layerscape SoCs.
+	  The qdma driver only work on  SoCs with a DPAA hardware block.
+
 config FSL_RAID
         tristate "Freescale RAID engine Support"
         depends on FSL_SOC && !ASYNC_TX_ENABLE_CHANNEL_SWITCH
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index 09571a8..6126e1c 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -33,6 +33,7 @@
 obj-$(CONFIG_FSL_DMA) += fsldma.o
 obj-$(CONFIG_FSL_EDMA) += fsl-edma.o fsl-edma-common.o
 obj-$(CONFIG_MCF_EDMA) += mcf-edma.o fsl-edma-common.o
+obj-$(CONFIG_FSL_QDMA) += fsl-qdma.o
 obj-$(CONFIG_FSL_RAID) += fsl_raid.o
 obj-$(CONFIG_HSU_DMA) += hsu/
 obj-$(CONFIG_IMG_MDC_DMA) += img-mdc-dma.o
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index 01d936c..a0a9cd7 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -134,7 +134,6 @@ static struct at_desc *atc_desc_get(struct at_dma_chan *atchan)
 	struct at_desc *ret = NULL;
 	unsigned long flags;
 	unsigned int i = 0;
-	LIST_HEAD(tmp_list);
 
 	spin_lock_irqsave(&atchan->lock, flags);
 	list_for_each_entry_safe(desc, _desc, &atchan->free_list, desc_node) {
@@ -1387,8 +1386,6 @@ static int atc_pause(struct dma_chan *chan)
 	int			chan_id = atchan->chan_common.chan_id;
 	unsigned long		flags;
 
-	LIST_HEAD(list);
-
 	dev_vdbg(chan2dev(chan), "%s\n", __func__);
 
 	spin_lock_irqsave(&atchan->lock, flags);
@@ -1408,8 +1405,6 @@ static int atc_resume(struct dma_chan *chan)
 	int			chan_id = atchan->chan_common.chan_id;
 	unsigned long		flags;
 
-	LIST_HEAD(list);
-
 	dev_vdbg(chan2dev(chan), "%s\n", __func__);
 
 	if (!atc_chan_is_paused(atchan))
diff --git a/drivers/dma/bcm2835-dma.c b/drivers/dma/bcm2835-dma.c
index ae10f56..ec8a291 100644
--- a/drivers/dma/bcm2835-dma.c
+++ b/drivers/dma/bcm2835-dma.c
@@ -2,9 +2,6 @@
 /*
  * BCM2835 DMA engine support
  *
- * This driver only supports cyclic DMA transfers
- * as needed for the I2S module.
- *
  * Author:      Florian Meier <florian.meier@koalo.de>
  *              Copyright 2013
  *
@@ -42,7 +39,6 @@
 
 struct bcm2835_dmadev {
 	struct dma_device ddev;
-	spinlock_t lock;
 	void __iomem *base;
 	struct device_dma_parameters dma_parms;
 };
@@ -64,7 +60,6 @@ struct bcm2835_cb_entry {
 
 struct bcm2835_chan {
 	struct virt_dma_chan vc;
-	struct list_head node;
 
 	struct dma_slave_config	cfg;
 	unsigned int dreq;
@@ -312,8 +307,7 @@ static struct bcm2835_desc *bcm2835_dma_create_cb_chain(
 		return NULL;
 
 	/* allocate and setup the descriptor. */
-	d = kzalloc(sizeof(*d) + frames * sizeof(struct bcm2835_cb_entry),
-		    gfp);
+	d = kzalloc(struct_size(d, cb_list, frames), gfp);
 	if (!d)
 		return NULL;
 
@@ -406,7 +400,7 @@ static void bcm2835_dma_fill_cb_chain_with_sg(
 	}
 }
 
-static int bcm2835_dma_abort(struct bcm2835_chan *c)
+static void bcm2835_dma_abort(struct bcm2835_chan *c)
 {
 	void __iomem *chan_base = c->chan_base;
 	long int timeout = 10000;
@@ -416,7 +410,7 @@ static int bcm2835_dma_abort(struct bcm2835_chan *c)
 	 * (The ACTIVE flag in the CS register is not a reliable indicator.)
 	 */
 	if (!readl(chan_base + BCM2835_DMA_ADDR))
-		return 0;
+		return;
 
 	/* Write 0 to the active bit - Pause the DMA */
 	writel(0, chan_base + BCM2835_DMA_CS);
@@ -432,7 +426,6 @@ static int bcm2835_dma_abort(struct bcm2835_chan *c)
 			"failed to complete outstanding writes\n");
 
 	writel(BCM2835_DMA_RESET, chan_base + BCM2835_DMA_CS);
-	return 0;
 }
 
 static void bcm2835_dma_start_desc(struct bcm2835_chan *c)
@@ -504,8 +497,12 @@ static int bcm2835_dma_alloc_chan_resources(struct dma_chan *chan)
 
 	dev_dbg(dev, "Allocating DMA channel %d\n", c->ch);
 
+	/*
+	 * Control blocks are 256 bit in length and must start at a 256 bit
+	 * (32 byte) aligned address (BCM2835 ARM Peripherals, sec. 4.2.1.1).
+	 */
 	c->cb_pool = dma_pool_create(dev_name(dev), dev,
-				     sizeof(struct bcm2835_dma_cb), 0, 0);
+				     sizeof(struct bcm2835_dma_cb), 32, 0);
 	if (!c->cb_pool) {
 		dev_err(dev, "unable to allocate descriptor pool\n");
 		return -ENOMEM;
@@ -774,17 +771,11 @@ static int bcm2835_dma_slave_config(struct dma_chan *chan,
 static int bcm2835_dma_terminate_all(struct dma_chan *chan)
 {
 	struct bcm2835_chan *c = to_bcm2835_dma_chan(chan);
-	struct bcm2835_dmadev *d = to_bcm2835_dma_dev(c->vc.chan.device);
 	unsigned long flags;
 	LIST_HEAD(head);
 
 	spin_lock_irqsave(&c->vc.lock, flags);
 
-	/* Prevent this channel being scheduled */
-	spin_lock(&d->lock);
-	list_del_init(&c->node);
-	spin_unlock(&d->lock);
-
 	/* stop DMA activity */
 	if (c->desc) {
 		vchan_terminate_vdesc(&c->desc->vd);
@@ -817,7 +808,6 @@ static int bcm2835_dma_chan_init(struct bcm2835_dmadev *d, int chan_id,
 
 	c->vc.desc_free = bcm2835_dma_desc_free;
 	vchan_init(&c->vc, &d->ddev);
-	INIT_LIST_HEAD(&c->node);
 
 	c->chan_base = BCM2835_DMA_CHANIO(d->base, chan_id);
 	c->ch = chan_id;
@@ -920,7 +910,6 @@ static int bcm2835_dma_probe(struct platform_device *pdev)
 	od->ddev.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
 	od->ddev.dev = &pdev->dev;
 	INIT_LIST_HEAD(&od->ddev.channels);
-	spin_lock_init(&od->lock);
 
 	platform_set_drvdata(pdev, od);
 
diff --git a/drivers/dma/dma-axi-dmac.c b/drivers/dma/dma-axi-dmac.c
index 15b2453..ffc0adc 100644
--- a/drivers/dma/dma-axi-dmac.c
+++ b/drivers/dma/dma-axi-dmac.c
@@ -367,8 +367,7 @@ static struct axi_dmac_desc *axi_dmac_alloc_desc(unsigned int num_sgs)
 	struct axi_dmac_desc *desc;
 	unsigned int i;
 
-	desc = kzalloc(sizeof(struct axi_dmac_desc) +
-		sizeof(struct axi_dmac_sg) * num_sgs, GFP_NOWAIT);
+	desc = kzalloc(struct_size(desc, sg, num_sgs), GFP_NOWAIT);
 	if (!desc)
 		return NULL;
 
diff --git a/drivers/dma/dma-jz4780.c b/drivers/dma/dma-jz4780.c
index a8b6225..9ce0a38 100644
--- a/drivers/dma/dma-jz4780.c
+++ b/drivers/dma/dma-jz4780.c
@@ -838,9 +838,8 @@ static int jz4780_dma_probe(struct platform_device *pdev)
 	if (!soc_data)
 		return -EINVAL;
 
-	jzdma = devm_kzalloc(dev, sizeof(*jzdma)
-				+ sizeof(*jzdma->chan) * soc_data->nb_channels,
-				GFP_KERNEL);
+	jzdma = devm_kzalloc(dev, struct_size(jzdma, chan,
+			     soc_data->nb_channels), GFP_KERNEL);
 	if (!jzdma)
 		return -ENOMEM;
 
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index 6511928..b96814a 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -200,15 +200,20 @@ struct dmatest_done {
 	wait_queue_head_t	*wait;
 };
 
+struct dmatest_data {
+	u8		**raw;
+	u8		**aligned;
+	unsigned int	cnt;
+	unsigned int	off;
+};
+
 struct dmatest_thread {
 	struct list_head	node;
 	struct dmatest_info	*info;
 	struct task_struct	*task;
 	struct dma_chan		*chan;
-	u8			**srcs;
-	u8			**usrcs;
-	u8			**dsts;
-	u8			**udsts;
+	struct dmatest_data	src;
+	struct dmatest_data	dst;
 	enum dma_transaction_type type;
 	wait_queue_head_t done_wait;
 	struct dmatest_done test_done;
@@ -481,6 +486,53 @@ static unsigned long long dmatest_KBs(s64 runtime, unsigned long long len)
 	return FIXPT_TO_INT(dmatest_persec(runtime, len >> 10));
 }
 
+static void __dmatest_free_test_data(struct dmatest_data *d, unsigned int cnt)
+{
+	unsigned int i;
+
+	for (i = 0; i < cnt; i++)
+		kfree(d->raw[i]);
+
+	kfree(d->aligned);
+	kfree(d->raw);
+}
+
+static void dmatest_free_test_data(struct dmatest_data *d)
+{
+	__dmatest_free_test_data(d, d->cnt);
+}
+
+static int dmatest_alloc_test_data(struct dmatest_data *d,
+		unsigned int buf_size, u8 align)
+{
+	unsigned int i = 0;
+
+	d->raw = kcalloc(d->cnt + 1, sizeof(u8 *), GFP_KERNEL);
+	if (!d->raw)
+		return -ENOMEM;
+
+	d->aligned = kcalloc(d->cnt + 1, sizeof(u8 *), GFP_KERNEL);
+	if (!d->aligned)
+		goto err;
+
+	for (i = 0; i < d->cnt; i++) {
+		d->raw[i] = kmalloc(buf_size + align, GFP_KERNEL);
+		if (!d->raw[i])
+			goto err;
+
+		/* align to alignment restriction */
+		if (align)
+			d->aligned[i] = PTR_ALIGN(d->raw[i], align);
+		else
+			d->aligned[i] = d->raw[i];
+	}
+
+	return 0;
+err:
+	__dmatest_free_test_data(d, i);
+	return -ENOMEM;
+}
+
 /*
  * This function repeatedly tests DMA transfers of various lengths and
  * offsets for a given operation type until it is told to exit by
@@ -511,8 +563,9 @@ static int dmatest_func(void *data)
 	enum dma_ctrl_flags 	flags;
 	u8			*pq_coefs = NULL;
 	int			ret;
-	int			src_cnt;
-	int			dst_cnt;
+	unsigned int 		buf_size;
+	struct dmatest_data	*src;
+	struct dmatest_data	*dst;
 	int			i;
 	ktime_t			ktime, start, diff;
 	ktime_t			filltime = 0;
@@ -535,25 +588,27 @@ static int dmatest_func(void *data)
 	params = &info->params;
 	chan = thread->chan;
 	dev = chan->device;
+	src = &thread->src;
+	dst = &thread->dst;
 	if (thread->type == DMA_MEMCPY) {
 		align = params->alignment < 0 ? dev->copy_align :
 						params->alignment;
-		src_cnt = dst_cnt = 1;
+		src->cnt = dst->cnt = 1;
 	} else if (thread->type == DMA_MEMSET) {
 		align = params->alignment < 0 ? dev->fill_align :
 						params->alignment;
-		src_cnt = dst_cnt = 1;
+		src->cnt = dst->cnt = 1;
 		is_memset = true;
 	} else if (thread->type == DMA_XOR) {
 		/* force odd to ensure dst = src */
-		src_cnt = min_odd(params->xor_sources | 1, dev->max_xor);
-		dst_cnt = 1;
+		src->cnt = min_odd(params->xor_sources | 1, dev->max_xor);
+		dst->cnt = 1;
 		align = params->alignment < 0 ? dev->xor_align :
 						params->alignment;
 	} else if (thread->type == DMA_PQ) {
 		/* force odd to ensure dst = src */
-		src_cnt = min_odd(params->pq_sources | 1, dma_maxpq(dev, 0));
-		dst_cnt = 2;
+		src->cnt = min_odd(params->pq_sources | 1, dma_maxpq(dev, 0));
+		dst->cnt = 2;
 		align = params->alignment < 0 ? dev->pq_align :
 						params->alignment;
 
@@ -561,75 +616,38 @@ static int dmatest_func(void *data)
 		if (!pq_coefs)
 			goto err_thread_type;
 
-		for (i = 0; i < src_cnt; i++)
+		for (i = 0; i < src->cnt; i++)
 			pq_coefs[i] = 1;
 	} else
 		goto err_thread_type;
 
 	/* Check if buffer count fits into map count variable (u8) */
-	if ((src_cnt + dst_cnt) >= 255) {
+	if ((src->cnt + dst->cnt) >= 255) {
 		pr_err("too many buffers (%d of 255 supported)\n",
-		       src_cnt + dst_cnt);
+		       src->cnt + dst->cnt);
 		goto err_free_coefs;
 	}
 
-	if (1 << align > params->buf_size) {
+	buf_size = params->buf_size;
+	if (1 << align > buf_size) {
 		pr_err("%u-byte buffer too small for %d-byte alignment\n",
-		       params->buf_size, 1 << align);
+		       buf_size, 1 << align);
 		goto err_free_coefs;
 	}
 
-	thread->srcs = kcalloc(src_cnt + 1, sizeof(u8 *), GFP_KERNEL);
-	if (!thread->srcs)
+	if (dmatest_alloc_test_data(src, buf_size, align) < 0)
 		goto err_free_coefs;
 
-	thread->usrcs = kcalloc(src_cnt + 1, sizeof(u8 *), GFP_KERNEL);
-	if (!thread->usrcs)
-		goto err_usrcs;
-
-	for (i = 0; i < src_cnt; i++) {
-		thread->usrcs[i] = kmalloc(params->buf_size + align,
-					   GFP_KERNEL);
-		if (!thread->usrcs[i])
-			goto err_srcbuf;
-
-		/* align srcs to alignment restriction */
-		if (align)
-			thread->srcs[i] = PTR_ALIGN(thread->usrcs[i], align);
-		else
-			thread->srcs[i] = thread->usrcs[i];
-	}
-	thread->srcs[i] = NULL;
-
-	thread->dsts = kcalloc(dst_cnt + 1, sizeof(u8 *), GFP_KERNEL);
-	if (!thread->dsts)
-		goto err_dsts;
-
-	thread->udsts = kcalloc(dst_cnt + 1, sizeof(u8 *), GFP_KERNEL);
-	if (!thread->udsts)
-		goto err_udsts;
-
-	for (i = 0; i < dst_cnt; i++) {
-		thread->udsts[i] = kmalloc(params->buf_size + align,
-					   GFP_KERNEL);
-		if (!thread->udsts[i])
-			goto err_dstbuf;
-
-		/* align dsts to alignment restriction */
-		if (align)
-			thread->dsts[i] = PTR_ALIGN(thread->udsts[i], align);
-		else
-			thread->dsts[i] = thread->udsts[i];
-	}
-	thread->dsts[i] = NULL;
+	if (dmatest_alloc_test_data(dst, buf_size, align) < 0)
+		goto err_src;
 
 	set_user_nice(current, 10);
 
-	srcs = kcalloc(src_cnt, sizeof(dma_addr_t), GFP_KERNEL);
+	srcs = kcalloc(src->cnt, sizeof(dma_addr_t), GFP_KERNEL);
 	if (!srcs)
-		goto err_dstbuf;
+		goto err_dst;
 
-	dma_pq = kcalloc(dst_cnt, sizeof(dma_addr_t), GFP_KERNEL);
+	dma_pq = kcalloc(dst->cnt, sizeof(dma_addr_t), GFP_KERNEL);
 	if (!dma_pq)
 		goto err_srcs_array;
 
@@ -644,21 +662,21 @@ static int dmatest_func(void *data)
 		struct dma_async_tx_descriptor *tx = NULL;
 		struct dmaengine_unmap_data *um;
 		dma_addr_t *dsts;
-		unsigned int src_off, dst_off, len;
+		unsigned int len;
 
 		total_tests++;
 
 		if (params->transfer_size) {
-			if (params->transfer_size >= params->buf_size) {
+			if (params->transfer_size >= buf_size) {
 				pr_err("%u-byte transfer size must be lower than %u-buffer size\n",
-				       params->transfer_size, params->buf_size);
+				       params->transfer_size, buf_size);
 				break;
 			}
 			len = params->transfer_size;
 		} else if (params->norandom) {
-			len = params->buf_size;
+			len = buf_size;
 		} else {
-			len = dmatest_random() % params->buf_size + 1;
+			len = dmatest_random() % buf_size + 1;
 		}
 
 		/* Do not alter transfer size explicitly defined by user */
@@ -670,57 +688,57 @@ static int dmatest_func(void *data)
 		total_len += len;
 
 		if (params->norandom) {
-			src_off = 0;
-			dst_off = 0;
+			src->off = 0;
+			dst->off = 0;
 		} else {
-			src_off = dmatest_random() % (params->buf_size - len + 1);
-			dst_off = dmatest_random() % (params->buf_size - len + 1);
+			src->off = dmatest_random() % (buf_size - len + 1);
+			dst->off = dmatest_random() % (buf_size - len + 1);
 
-			src_off = (src_off >> align) << align;
-			dst_off = (dst_off >> align) << align;
+			src->off = (src->off >> align) << align;
+			dst->off = (dst->off >> align) << align;
 		}
 
 		if (!params->noverify) {
 			start = ktime_get();
-			dmatest_init_srcs(thread->srcs, src_off, len,
-					  params->buf_size, is_memset);
-			dmatest_init_dsts(thread->dsts, dst_off, len,
-					  params->buf_size, is_memset);
+			dmatest_init_srcs(src->aligned, src->off, len,
+					  buf_size, is_memset);
+			dmatest_init_dsts(dst->aligned, dst->off, len,
+					  buf_size, is_memset);
 
 			diff = ktime_sub(ktime_get(), start);
 			filltime = ktime_add(filltime, diff);
 		}
 
-		um = dmaengine_get_unmap_data(dev->dev, src_cnt + dst_cnt,
+		um = dmaengine_get_unmap_data(dev->dev, src->cnt + dst->cnt,
 					      GFP_KERNEL);
 		if (!um) {
 			failed_tests++;
 			result("unmap data NULL", total_tests,
-			       src_off, dst_off, len, ret);
+			       src->off, dst->off, len, ret);
 			continue;
 		}
 
-		um->len = params->buf_size;
-		for (i = 0; i < src_cnt; i++) {
-			void *buf = thread->srcs[i];
+		um->len = buf_size;
+		for (i = 0; i < src->cnt; i++) {
+			void *buf = src->aligned[i];
 			struct page *pg = virt_to_page(buf);
 			unsigned long pg_off = offset_in_page(buf);
 
 			um->addr[i] = dma_map_page(dev->dev, pg, pg_off,
 						   um->len, DMA_TO_DEVICE);
-			srcs[i] = um->addr[i] + src_off;
+			srcs[i] = um->addr[i] + src->off;
 			ret = dma_mapping_error(dev->dev, um->addr[i]);
 			if (ret) {
 				result("src mapping error", total_tests,
-				       src_off, dst_off, len, ret);
+				       src->off, dst->off, len, ret);
 				goto error_unmap_continue;
 			}
 			um->to_cnt++;
 		}
 		/* map with DMA_BIDIRECTIONAL to force writeback/invalidate */
-		dsts = &um->addr[src_cnt];
-		for (i = 0; i < dst_cnt; i++) {
-			void *buf = thread->dsts[i];
+		dsts = &um->addr[src->cnt];
+		for (i = 0; i < dst->cnt; i++) {
+			void *buf = dst->aligned[i];
 			struct page *pg = virt_to_page(buf);
 			unsigned long pg_off = offset_in_page(buf);
 
@@ -729,7 +747,7 @@ static int dmatest_func(void *data)
 			ret = dma_mapping_error(dev->dev, dsts[i]);
 			if (ret) {
 				result("dst mapping error", total_tests,
-				       src_off, dst_off, len, ret);
+				       src->off, dst->off, len, ret);
 				goto error_unmap_continue;
 			}
 			um->bidi_cnt++;
@@ -737,29 +755,29 @@ static int dmatest_func(void *data)
 
 		if (thread->type == DMA_MEMCPY)
 			tx = dev->device_prep_dma_memcpy(chan,
-							 dsts[0] + dst_off,
+							 dsts[0] + dst->off,
 							 srcs[0], len, flags);
 		else if (thread->type == DMA_MEMSET)
 			tx = dev->device_prep_dma_memset(chan,
-						dsts[0] + dst_off,
-						*(thread->srcs[0] + src_off),
+						dsts[0] + dst->off,
+						*(src->aligned[0] + src->off),
 						len, flags);
 		else if (thread->type == DMA_XOR)
 			tx = dev->device_prep_dma_xor(chan,
-						      dsts[0] + dst_off,
-						      srcs, src_cnt,
+						      dsts[0] + dst->off,
+						      srcs, src->cnt,
 						      len, flags);
 		else if (thread->type == DMA_PQ) {
-			for (i = 0; i < dst_cnt; i++)
-				dma_pq[i] = dsts[i] + dst_off;
+			for (i = 0; i < dst->cnt; i++)
+				dma_pq[i] = dsts[i] + dst->off;
 			tx = dev->device_prep_dma_pq(chan, dma_pq, srcs,
-						     src_cnt, pq_coefs,
+						     src->cnt, pq_coefs,
 						     len, flags);
 		}
 
 		if (!tx) {
-			result("prep error", total_tests, src_off,
-			       dst_off, len, ret);
+			result("prep error", total_tests, src->off,
+			       dst->off, len, ret);
 			msleep(100);
 			goto error_unmap_continue;
 		}
@@ -770,8 +788,8 @@ static int dmatest_func(void *data)
 		cookie = tx->tx_submit(tx);
 
 		if (dma_submit_error(cookie)) {
-			result("submit error", total_tests, src_off,
-			       dst_off, len, ret);
+			result("submit error", total_tests, src->off,
+			       dst->off, len, ret);
 			msleep(100);
 			goto error_unmap_continue;
 		}
@@ -783,58 +801,58 @@ static int dmatest_func(void *data)
 		status = dma_async_is_tx_complete(chan, cookie, NULL, NULL);
 
 		if (!done->done) {
-			result("test timed out", total_tests, src_off, dst_off,
+			result("test timed out", total_tests, src->off, dst->off,
 			       len, 0);
 			goto error_unmap_continue;
 		} else if (status != DMA_COMPLETE) {
 			result(status == DMA_ERROR ?
 			       "completion error status" :
-			       "completion busy status", total_tests, src_off,
-			       dst_off, len, ret);
+			       "completion busy status", total_tests, src->off,
+			       dst->off, len, ret);
 			goto error_unmap_continue;
 		}
 
 		dmaengine_unmap_put(um);
 
 		if (params->noverify) {
-			verbose_result("test passed", total_tests, src_off,
-				       dst_off, len, 0);
+			verbose_result("test passed", total_tests, src->off,
+				       dst->off, len, 0);
 			continue;
 		}
 
 		start = ktime_get();
 		pr_debug("%s: verifying source buffer...\n", current->comm);
-		error_count = dmatest_verify(thread->srcs, 0, src_off,
+		error_count = dmatest_verify(src->aligned, 0, src->off,
 				0, PATTERN_SRC, true, is_memset);
-		error_count += dmatest_verify(thread->srcs, src_off,
-				src_off + len, src_off,
+		error_count += dmatest_verify(src->aligned, src->off,
+				src->off + len, src->off,
 				PATTERN_SRC | PATTERN_COPY, true, is_memset);
-		error_count += dmatest_verify(thread->srcs, src_off + len,
-				params->buf_size, src_off + len,
+		error_count += dmatest_verify(src->aligned, src->off + len,
+				buf_size, src->off + len,
 				PATTERN_SRC, true, is_memset);
 
 		pr_debug("%s: verifying dest buffer...\n", current->comm);
-		error_count += dmatest_verify(thread->dsts, 0, dst_off,
+		error_count += dmatest_verify(dst->aligned, 0, dst->off,
 				0, PATTERN_DST, false, is_memset);
 
-		error_count += dmatest_verify(thread->dsts, dst_off,
-				dst_off + len, src_off,
+		error_count += dmatest_verify(dst->aligned, dst->off,
+				dst->off + len, src->off,
 				PATTERN_SRC | PATTERN_COPY, false, is_memset);
 
-		error_count += dmatest_verify(thread->dsts, dst_off + len,
-				params->buf_size, dst_off + len,
+		error_count += dmatest_verify(dst->aligned, dst->off + len,
+				buf_size, dst->off + len,
 				PATTERN_DST, false, is_memset);
 
 		diff = ktime_sub(ktime_get(), start);
 		comparetime = ktime_add(comparetime, diff);
 
 		if (error_count) {
-			result("data error", total_tests, src_off, dst_off,
+			result("data error", total_tests, src->off, dst->off,
 			       len, error_count);
 			failed_tests++;
 		} else {
-			verbose_result("test passed", total_tests, src_off,
-				       dst_off, len, 0);
+			verbose_result("test passed", total_tests, src->off,
+				       dst->off, len, 0);
 		}
 
 		continue;
@@ -852,19 +870,10 @@ static int dmatest_func(void *data)
 	kfree(dma_pq);
 err_srcs_array:
 	kfree(srcs);
-err_dstbuf:
-	for (i = 0; thread->udsts[i]; i++)
-		kfree(thread->udsts[i]);
-	kfree(thread->udsts);
-err_udsts:
-	kfree(thread->dsts);
-err_dsts:
-err_srcbuf:
-	for (i = 0; thread->usrcs[i]; i++)
-		kfree(thread->usrcs[i]);
-	kfree(thread->usrcs);
-err_usrcs:
-	kfree(thread->srcs);
+err_dst:
+	dmatest_free_test_data(dst);
+err_src:
+	dmatest_free_test_data(src);
 err_free_coefs:
 	kfree(pq_coefs);
 err_thread_type:
diff --git a/drivers/dma/dw-axi-dmac/dw-axi-dmac.h b/drivers/dma/dw-axi-dmac/dw-axi-dmac.h
index f8888dc..18b6014 100644
--- a/drivers/dma/dw-axi-dmac/dw-axi-dmac.h
+++ b/drivers/dma/dw-axi-dmac/dw-axi-dmac.h
@@ -75,7 +75,7 @@ struct __packed axi_dma_lli {
 	__le32		sstat;
 	__le32		dstat;
 	__le32		status_lo;
-	__le32		ststus_hi;
+	__le32		status_hi;
 	__le32		reserved_lo;
 	__le32		reserved_hi;
 };
diff --git a/drivers/dma/dw/Kconfig b/drivers/dma/dw/Kconfig
index 04b9728..e516269 100644
--- a/drivers/dma/dw/Kconfig
+++ b/drivers/dma/dw/Kconfig
@@ -1,3 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+
 #
 # DMA engine configuration for dw
 #
diff --git a/drivers/dma/dw/Makefile b/drivers/dma/dw/Makefile
index 2b949c2..63ed895 100644
--- a/drivers/dma/dw/Makefile
+++ b/drivers/dma/dw/Makefile
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_DW_DMAC_CORE)	+= dw_dmac_core.o
-dw_dmac_core-objs	:= core.o
+dw_dmac_core-objs	:= core.o dw.o idma32.o
 
 obj-$(CONFIG_DW_DMAC)		+= dw_dmac.o
 dw_dmac-objs		:= platform.o
diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c
index dc053e6..21cb2a5 100644
--- a/drivers/dma/dw/core.c
+++ b/drivers/dma/dw/core.c
@@ -1,13 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Core driver for the Synopsys DesignWare DMA Controller
  *
  * Copyright (C) 2007-2008 Atmel Corporation
  * Copyright (C) 2010-2011 ST Microelectronics
  * Copyright (C) 2013 Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/bitops.h>
@@ -37,27 +34,6 @@
  * support descriptor writeback.
  */
 
-#define DWC_DEFAULT_CTLLO(_chan) ({				\
-		struct dw_dma_chan *_dwc = to_dw_dma_chan(_chan);	\
-		struct dma_slave_config	*_sconfig = &_dwc->dma_sconfig;	\
-		bool _is_slave = is_slave_direction(_dwc->direction);	\
-		u8 _smsize = _is_slave ? _sconfig->src_maxburst :	\
-			DW_DMA_MSIZE_16;			\
-		u8 _dmsize = _is_slave ? _sconfig->dst_maxburst :	\
-			DW_DMA_MSIZE_16;			\
-		u8 _dms = (_dwc->direction == DMA_MEM_TO_DEV) ?		\
-			_dwc->dws.p_master : _dwc->dws.m_master;	\
-		u8 _sms = (_dwc->direction == DMA_DEV_TO_MEM) ?		\
-			_dwc->dws.p_master : _dwc->dws.m_master;	\
-								\
-		(DWC_CTLL_DST_MSIZE(_dmsize)			\
-		 | DWC_CTLL_SRC_MSIZE(_smsize)			\
-		 | DWC_CTLL_LLP_D_EN				\
-		 | DWC_CTLL_LLP_S_EN				\
-		 | DWC_CTLL_DMS(_dms)				\
-		 | DWC_CTLL_SMS(_sms));				\
-	})
-
 /* The set of bus widths supported by the DMA controller */
 #define DW_DMA_BUSWIDTHS			  \
 	BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED)	| \
@@ -138,44 +114,6 @@ static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc)
 	dwc->descs_allocated--;
 }
 
-static void dwc_initialize_chan_idma32(struct dw_dma_chan *dwc)
-{
-	u32 cfghi = 0;
-	u32 cfglo = 0;
-
-	/* Set default burst alignment */
-	cfglo |= IDMA32C_CFGL_DST_BURST_ALIGN | IDMA32C_CFGL_SRC_BURST_ALIGN;
-
-	/* Low 4 bits of the request lines */
-	cfghi |= IDMA32C_CFGH_DST_PER(dwc->dws.dst_id & 0xf);
-	cfghi |= IDMA32C_CFGH_SRC_PER(dwc->dws.src_id & 0xf);
-
-	/* Request line extension (2 bits) */
-	cfghi |= IDMA32C_CFGH_DST_PER_EXT(dwc->dws.dst_id >> 4 & 0x3);
-	cfghi |= IDMA32C_CFGH_SRC_PER_EXT(dwc->dws.src_id >> 4 & 0x3);
-
-	channel_writel(dwc, CFG_LO, cfglo);
-	channel_writel(dwc, CFG_HI, cfghi);
-}
-
-static void dwc_initialize_chan_dw(struct dw_dma_chan *dwc)
-{
-	struct dw_dma *dw = to_dw_dma(dwc->chan.device);
-	u32 cfghi = DWC_CFGH_FIFO_MODE;
-	u32 cfglo = DWC_CFGL_CH_PRIOR(dwc->priority);
-	bool hs_polarity = dwc->dws.hs_polarity;
-
-	cfghi |= DWC_CFGH_DST_PER(dwc->dws.dst_id);
-	cfghi |= DWC_CFGH_SRC_PER(dwc->dws.src_id);
-	cfghi |= DWC_CFGH_PROTCTL(dw->pdata->protctl);
-
-	/* Set polarity of handshake interface */
-	cfglo |= hs_polarity ? DWC_CFGL_HS_DST_POL | DWC_CFGL_HS_SRC_POL : 0;
-
-	channel_writel(dwc, CFG_LO, cfglo);
-	channel_writel(dwc, CFG_HI, cfghi);
-}
-
 static void dwc_initialize(struct dw_dma_chan *dwc)
 {
 	struct dw_dma *dw = to_dw_dma(dwc->chan.device);
@@ -183,10 +121,7 @@ static void dwc_initialize(struct dw_dma_chan *dwc)
 	if (test_bit(DW_DMA_IS_INITIALIZED, &dwc->flags))
 		return;
 
-	if (dw->pdata->is_idma32)
-		dwc_initialize_chan_idma32(dwc);
-	else
-		dwc_initialize_chan_dw(dwc);
+	dw->initialize_chan(dwc);
 
 	/* Enable interrupts */
 	channel_set_bit(dw, MASK.XFER, dwc->mask);
@@ -215,37 +150,6 @@ static inline void dwc_chan_disable(struct dw_dma *dw, struct dw_dma_chan *dwc)
 		cpu_relax();
 }
 
-static u32 bytes2block(struct dw_dma_chan *dwc, size_t bytes,
-			  unsigned int width, size_t *len)
-{
-	struct dw_dma *dw = to_dw_dma(dwc->chan.device);
-	u32 block;
-
-	/* Always in bytes for iDMA 32-bit */
-	if (dw->pdata->is_idma32)
-		width = 0;
-
-	if ((bytes >> width) > dwc->block_size) {
-		block = dwc->block_size;
-		*len = block << width;
-	} else {
-		block = bytes >> width;
-		*len = bytes;
-	}
-
-	return block;
-}
-
-static size_t block2bytes(struct dw_dma_chan *dwc, u32 block, u32 width)
-{
-	struct dw_dma *dw = to_dw_dma(dwc->chan.device);
-
-	if (dw->pdata->is_idma32)
-		return IDMA32C_CTLH_BLOCK_TS(block);
-
-	return DWC_CTLH_BLOCK_TS(block) << width;
-}
-
 /*----------------------------------------------------------------------*/
 
 /* Perform single block transfer */
@@ -391,10 +295,11 @@ static void dwc_complete_all(struct dw_dma *dw, struct dw_dma_chan *dwc)
 /* Returns how many bytes were already received from source */
 static inline u32 dwc_get_sent(struct dw_dma_chan *dwc)
 {
+	struct dw_dma *dw = to_dw_dma(dwc->chan.device);
 	u32 ctlhi = channel_readl(dwc, CTL_HI);
 	u32 ctllo = channel_readl(dwc, CTL_LO);
 
-	return block2bytes(dwc, ctlhi, ctllo >> 4 & 7);
+	return dw->block2bytes(dwc, ctlhi, ctllo >> 4 & 7);
 }
 
 static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc)
@@ -651,7 +556,7 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 	unsigned int		src_width;
 	unsigned int		dst_width;
 	unsigned int		data_width = dw->pdata->data_width[m_master];
-	u32			ctllo;
+	u32			ctllo, ctlhi;
 	u8			lms = DWC_LLP_LMS(m_master);
 
 	dev_vdbg(chan2dev(chan),
@@ -667,7 +572,7 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 
 	src_width = dst_width = __ffs(data_width | src | dest | len);
 
-	ctllo = DWC_DEFAULT_CTLLO(chan)
+	ctllo = dw->prepare_ctllo(dwc)
 			| DWC_CTLL_DST_WIDTH(dst_width)
 			| DWC_CTLL_SRC_WIDTH(src_width)
 			| DWC_CTLL_DST_INC
@@ -680,10 +585,12 @@ dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 		if (!desc)
 			goto err_desc_get;
 
+		ctlhi = dw->bytes2block(dwc, len - offset, src_width, &xfer_count);
+
 		lli_write(desc, sar, src + offset);
 		lli_write(desc, dar, dest + offset);
 		lli_write(desc, ctllo, ctllo);
-		lli_write(desc, ctlhi, bytes2block(dwc, len - offset, src_width, &xfer_count));
+		lli_write(desc, ctlhi, ctlhi);
 		desc->len = xfer_count;
 
 		if (!first) {
@@ -721,7 +628,7 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 	struct dma_slave_config	*sconfig = &dwc->dma_sconfig;
 	struct dw_desc		*prev;
 	struct dw_desc		*first;
-	u32			ctllo;
+	u32			ctllo, ctlhi;
 	u8			m_master = dwc->dws.m_master;
 	u8			lms = DWC_LLP_LMS(m_master);
 	dma_addr_t		reg;
@@ -745,10 +652,10 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 	case DMA_MEM_TO_DEV:
 		reg_width = __ffs(sconfig->dst_addr_width);
 		reg = sconfig->dst_addr;
-		ctllo = (DWC_DEFAULT_CTLLO(chan)
+		ctllo = dw->prepare_ctllo(dwc)
 				| DWC_CTLL_DST_WIDTH(reg_width)
 				| DWC_CTLL_DST_FIX
-				| DWC_CTLL_SRC_INC);
+				| DWC_CTLL_SRC_INC;
 
 		ctllo |= sconfig->device_fc ? DWC_CTLL_FC(DW_DMA_FC_P_M2P) :
 			DWC_CTLL_FC(DW_DMA_FC_D_M2P);
@@ -768,9 +675,11 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 			if (!desc)
 				goto err_desc_get;
 
+			ctlhi = dw->bytes2block(dwc, len, mem_width, &dlen);
+
 			lli_write(desc, sar, mem);
 			lli_write(desc, dar, reg);
-			lli_write(desc, ctlhi, bytes2block(dwc, len, mem_width, &dlen));
+			lli_write(desc, ctlhi, ctlhi);
 			lli_write(desc, ctllo, ctllo | DWC_CTLL_SRC_WIDTH(mem_width));
 			desc->len = dlen;
 
@@ -793,10 +702,10 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 	case DMA_DEV_TO_MEM:
 		reg_width = __ffs(sconfig->src_addr_width);
 		reg = sconfig->src_addr;
-		ctllo = (DWC_DEFAULT_CTLLO(chan)
+		ctllo = dw->prepare_ctllo(dwc)
 				| DWC_CTLL_SRC_WIDTH(reg_width)
 				| DWC_CTLL_DST_INC
-				| DWC_CTLL_SRC_FIX);
+				| DWC_CTLL_SRC_FIX;
 
 		ctllo |= sconfig->device_fc ? DWC_CTLL_FC(DW_DMA_FC_P_P2M) :
 			DWC_CTLL_FC(DW_DMA_FC_D_P2M);
@@ -814,9 +723,11 @@ dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 			if (!desc)
 				goto err_desc_get;
 
+			ctlhi = dw->bytes2block(dwc, len, reg_width, &dlen);
+
 			lli_write(desc, sar, reg);
 			lli_write(desc, dar, mem);
-			lli_write(desc, ctlhi, bytes2block(dwc, len, reg_width, &dlen));
+			lli_write(desc, ctlhi, ctlhi);
 			mem_width = __ffs(data_width | mem | dlen);
 			lli_write(desc, ctllo, ctllo | DWC_CTLL_DST_WIDTH(mem_width));
 			desc->len = dlen;
@@ -876,22 +787,12 @@ EXPORT_SYMBOL_GPL(dw_dma_filter);
 static int dwc_config(struct dma_chan *chan, struct dma_slave_config *sconfig)
 {
 	struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
-	struct dma_slave_config *sc = &dwc->dma_sconfig;
 	struct dw_dma *dw = to_dw_dma(chan->device);
-	/*
-	 * Fix sconfig's burst size according to dw_dmac. We need to convert
-	 * them as:
-	 * 1 -> 0, 4 -> 1, 8 -> 2, 16 -> 3.
-	 *
-	 * NOTE: burst size 2 is not supported by DesignWare controller.
-	 *       iDMA 32-bit supports it.
-	 */
-	u32 s = dw->pdata->is_idma32 ? 1 : 2;
 
 	memcpy(&dwc->dma_sconfig, sconfig, sizeof(*sconfig));
 
-	sc->src_maxburst = sc->src_maxburst > 1 ? fls(sc->src_maxburst) - s : 0;
-	sc->dst_maxburst = sc->dst_maxburst > 1 ? fls(sc->dst_maxburst) - s : 0;
+	dw->encode_maxburst(dwc, &dwc->dma_sconfig.src_maxburst);
+	dw->encode_maxburst(dwc, &dwc->dma_sconfig.dst_maxburst);
 
 	return 0;
 }
@@ -900,16 +801,9 @@ static void dwc_chan_pause(struct dw_dma_chan *dwc, bool drain)
 {
 	struct dw_dma *dw = to_dw_dma(dwc->chan.device);
 	unsigned int		count = 20;	/* timeout iterations */
-	u32			cfglo;
 
-	cfglo = channel_readl(dwc, CFG_LO);
-	if (dw->pdata->is_idma32) {
-		if (drain)
-			cfglo |= IDMA32C_CFGL_CH_DRAIN;
-		else
-			cfglo &= ~IDMA32C_CFGL_CH_DRAIN;
-	}
-	channel_writel(dwc, CFG_LO, cfglo | DWC_CFGL_CH_SUSP);
+	dw->suspend_chan(dwc, drain);
+
 	while (!(channel_readl(dwc, CFG_LO) & DWC_CFGL_FIFO_EMPTY) && count--)
 		udelay(2);
 
@@ -928,11 +822,11 @@ static int dwc_pause(struct dma_chan *chan)
 	return 0;
 }
 
-static inline void dwc_chan_resume(struct dw_dma_chan *dwc)
+static inline void dwc_chan_resume(struct dw_dma_chan *dwc, bool drain)
 {
-	u32 cfglo = channel_readl(dwc, CFG_LO);
+	struct dw_dma *dw = to_dw_dma(dwc->chan.device);
 
-	channel_writel(dwc, CFG_LO, cfglo & ~DWC_CFGL_CH_SUSP);
+	dw->resume_chan(dwc, drain);
 
 	clear_bit(DW_DMA_IS_PAUSED, &dwc->flags);
 }
@@ -945,7 +839,7 @@ static int dwc_resume(struct dma_chan *chan)
 	spin_lock_irqsave(&dwc->lock, flags);
 
 	if (test_bit(DW_DMA_IS_PAUSED, &dwc->flags))
-		dwc_chan_resume(dwc);
+		dwc_chan_resume(dwc, false);
 
 	spin_unlock_irqrestore(&dwc->lock, flags);
 
@@ -968,7 +862,7 @@ static int dwc_terminate_all(struct dma_chan *chan)
 
 	dwc_chan_disable(dw, dwc);
 
-	dwc_chan_resume(dwc);
+	dwc_chan_resume(dwc, true);
 
 	/* active_list entries will end up before queued entries */
 	list_splice_init(&dwc->queue, &list);
@@ -1058,33 +952,7 @@ static void dwc_issue_pending(struct dma_chan *chan)
 
 /*----------------------------------------------------------------------*/
 
-/*
- * Program FIFO size of channels.
- *
- * By default full FIFO (512 bytes) is assigned to channel 0. Here we
- * slice FIFO on equal parts between channels.
- */
-static void idma32_fifo_partition(struct dw_dma *dw)
-{
-	u64 value = IDMA32C_FP_PSIZE_CH0(64) | IDMA32C_FP_PSIZE_CH1(64) |
-		    IDMA32C_FP_UPDATE;
-	u64 fifo_partition = 0;
-
-	if (!dw->pdata->is_idma32)
-		return;
-
-	/* Fill FIFO_PARTITION low bits (Channels 0..1, 4..5) */
-	fifo_partition |= value << 0;
-
-	/* Fill FIFO_PARTITION high bits (Channels 2..3, 6..7) */
-	fifo_partition |= value << 32;
-
-	/* Program FIFO Partition registers - 64 bytes per channel */
-	idma32_writeq(dw, FIFO_PARTITION1, fifo_partition);
-	idma32_writeq(dw, FIFO_PARTITION0, fifo_partition);
-}
-
-static void dw_dma_off(struct dw_dma *dw)
+void do_dw_dma_off(struct dw_dma *dw)
 {
 	unsigned int i;
 
@@ -1103,7 +971,7 @@ static void dw_dma_off(struct dw_dma *dw)
 		clear_bit(DW_DMA_IS_INITIALIZED, &dw->chan[i].flags);
 }
 
-static void dw_dma_on(struct dw_dma *dw)
+void do_dw_dma_on(struct dw_dma *dw)
 {
 	dma_writel(dw, CFG, DW_CFG_DMA_EN);
 }
@@ -1139,7 +1007,7 @@ static int dwc_alloc_chan_resources(struct dma_chan *chan)
 
 	/* Enable controller here if needed */
 	if (!dw->in_use)
-		dw_dma_on(dw);
+		do_dw_dma_on(dw);
 	dw->in_use |= dwc->mask;
 
 	return 0;
@@ -1150,7 +1018,6 @@ static void dwc_free_chan_resources(struct dma_chan *chan)
 	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
 	struct dw_dma		*dw = to_dw_dma(chan->device);
 	unsigned long		flags;
-	LIST_HEAD(list);
 
 	dev_dbg(chan2dev(chan), "%s: descs allocated=%u\n", __func__,
 			dwc->descs_allocated);
@@ -1177,30 +1044,25 @@ static void dwc_free_chan_resources(struct dma_chan *chan)
 	/* Disable controller in case it was a last user */
 	dw->in_use &= ~dwc->mask;
 	if (!dw->in_use)
-		dw_dma_off(dw);
+		do_dw_dma_off(dw);
 
 	dev_vdbg(chan2dev(chan), "%s: done\n", __func__);
 }
 
-int dw_dma_probe(struct dw_dma_chip *chip)
+int do_dma_probe(struct dw_dma_chip *chip)
 {
+	struct dw_dma *dw = chip->dw;
 	struct dw_dma_platform_data *pdata;
-	struct dw_dma		*dw;
 	bool			autocfg = false;
 	unsigned int		dw_params;
 	unsigned int		i;
 	int			err;
 
-	dw = devm_kzalloc(chip->dev, sizeof(*dw), GFP_KERNEL);
-	if (!dw)
-		return -ENOMEM;
-
 	dw->pdata = devm_kzalloc(chip->dev, sizeof(*dw->pdata), GFP_KERNEL);
 	if (!dw->pdata)
 		return -ENOMEM;
 
 	dw->regs = chip->regs;
-	chip->dw = dw;
 
 	pm_runtime_get_sync(chip->dev);
 
@@ -1227,8 +1089,6 @@ int dw_dma_probe(struct dw_dma_chip *chip)
 		pdata->block_size = dma_readl(dw, MAX_BLK_SIZE);
 
 		/* Fill platform data with the default values */
-		pdata->is_private = true;
-		pdata->is_memcpy = true;
 		pdata->chan_allocation_order = CHAN_ALLOCATION_ASCENDING;
 		pdata->chan_priority = CHAN_PRIORITY_ASCENDING;
 	} else if (chip->pdata->nr_channels > DW_DMA_MAX_NR_CHANNELS) {
@@ -1252,15 +1112,10 @@ int dw_dma_probe(struct dw_dma_chip *chip)
 	dw->all_chan_mask = (1 << pdata->nr_channels) - 1;
 
 	/* Force dma off, just in case */
-	dw_dma_off(dw);
-
-	idma32_fifo_partition(dw);
+	dw->disable(dw);
 
 	/* Device and instance ID for IRQ and DMA pool */
-	if (pdata->is_idma32)
-		snprintf(dw->name, sizeof(dw->name), "idma32:dmac%d", chip->id);
-	else
-		snprintf(dw->name, sizeof(dw->name), "dw:dmac%d", chip->id);
+	dw->set_device_name(dw, chip->id);
 
 	/* Create a pool of consistent memory blocks for hardware descriptors */
 	dw->desc_pool = dmam_pool_create(dw->name, chip->dev,
@@ -1340,10 +1195,8 @@ int dw_dma_probe(struct dw_dma_chip *chip)
 
 	/* Set capabilities */
 	dma_cap_set(DMA_SLAVE, dw->dma.cap_mask);
-	if (pdata->is_private)
-		dma_cap_set(DMA_PRIVATE, dw->dma.cap_mask);
-	if (pdata->is_memcpy)
-		dma_cap_set(DMA_MEMCPY, dw->dma.cap_mask);
+	dma_cap_set(DMA_PRIVATE, dw->dma.cap_mask);
+	dma_cap_set(DMA_MEMCPY, dw->dma.cap_mask);
 
 	dw->dma.dev = chip->dev;
 	dw->dma.device_alloc_chan_resources = dwc_alloc_chan_resources;
@@ -1384,16 +1237,15 @@ int dw_dma_probe(struct dw_dma_chip *chip)
 	pm_runtime_put_sync_suspend(chip->dev);
 	return err;
 }
-EXPORT_SYMBOL_GPL(dw_dma_probe);
 
-int dw_dma_remove(struct dw_dma_chip *chip)
+int do_dma_remove(struct dw_dma_chip *chip)
 {
 	struct dw_dma		*dw = chip->dw;
 	struct dw_dma_chan	*dwc, *_dwc;
 
 	pm_runtime_get_sync(chip->dev);
 
-	dw_dma_off(dw);
+	do_dw_dma_off(dw);
 	dma_async_device_unregister(&dw->dma);
 
 	free_irq(chip->irq, dw);
@@ -1408,27 +1260,24 @@ int dw_dma_remove(struct dw_dma_chip *chip)
 	pm_runtime_put_sync_suspend(chip->dev);
 	return 0;
 }
-EXPORT_SYMBOL_GPL(dw_dma_remove);
 
-int dw_dma_disable(struct dw_dma_chip *chip)
+int do_dw_dma_disable(struct dw_dma_chip *chip)
 {
 	struct dw_dma *dw = chip->dw;
 
-	dw_dma_off(dw);
+	dw->disable(dw);
 	return 0;
 }
-EXPORT_SYMBOL_GPL(dw_dma_disable);
+EXPORT_SYMBOL_GPL(do_dw_dma_disable);
 
-int dw_dma_enable(struct dw_dma_chip *chip)
+int do_dw_dma_enable(struct dw_dma_chip *chip)
 {
 	struct dw_dma *dw = chip->dw;
 
-	idma32_fifo_partition(dw);
-
-	dw_dma_on(dw);
+	dw->enable(dw);
 	return 0;
 }
-EXPORT_SYMBOL_GPL(dw_dma_enable);
+EXPORT_SYMBOL_GPL(do_dw_dma_enable);
 
 MODULE_LICENSE("GPL v2");
 MODULE_DESCRIPTION("Synopsys DesignWare DMA Controller core driver");
diff --git a/drivers/dma/dw/dw.c b/drivers/dma/dw/dw.c
new file mode 100644
index 0000000..7a085b3
--- /dev/null
+++ b/drivers/dma/dw/dw.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2007-2008 Atmel Corporation
+// Copyright (C) 2010-2011 ST Microelectronics
+// Copyright (C) 2013,2018 Intel Corporation
+
+#include <linux/bitops.h>
+#include <linux/dmaengine.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#include "internal.h"
+
+static void dw_dma_initialize_chan(struct dw_dma_chan *dwc)
+{
+	struct dw_dma *dw = to_dw_dma(dwc->chan.device);
+	u32 cfghi = DWC_CFGH_FIFO_MODE;
+	u32 cfglo = DWC_CFGL_CH_PRIOR(dwc->priority);
+	bool hs_polarity = dwc->dws.hs_polarity;
+
+	cfghi |= DWC_CFGH_DST_PER(dwc->dws.dst_id);
+	cfghi |= DWC_CFGH_SRC_PER(dwc->dws.src_id);
+	cfghi |= DWC_CFGH_PROTCTL(dw->pdata->protctl);
+
+	/* Set polarity of handshake interface */
+	cfglo |= hs_polarity ? DWC_CFGL_HS_DST_POL | DWC_CFGL_HS_SRC_POL : 0;
+
+	channel_writel(dwc, CFG_LO, cfglo);
+	channel_writel(dwc, CFG_HI, cfghi);
+}
+
+static void dw_dma_suspend_chan(struct dw_dma_chan *dwc, bool drain)
+{
+	u32 cfglo = channel_readl(dwc, CFG_LO);
+
+	channel_writel(dwc, CFG_LO, cfglo | DWC_CFGL_CH_SUSP);
+}
+
+static void dw_dma_resume_chan(struct dw_dma_chan *dwc, bool drain)
+{
+	u32 cfglo = channel_readl(dwc, CFG_LO);
+
+	channel_writel(dwc, CFG_LO, cfglo & ~DWC_CFGL_CH_SUSP);
+}
+
+static u32 dw_dma_bytes2block(struct dw_dma_chan *dwc,
+			      size_t bytes, unsigned int width, size_t *len)
+{
+	u32 block;
+
+	if ((bytes >> width) > dwc->block_size) {
+		block = dwc->block_size;
+		*len = dwc->block_size << width;
+	} else {
+		block = bytes >> width;
+		*len = bytes;
+	}
+
+	return block;
+}
+
+static size_t dw_dma_block2bytes(struct dw_dma_chan *dwc, u32 block, u32 width)
+{
+	return DWC_CTLH_BLOCK_TS(block) << width;
+}
+
+static u32 dw_dma_prepare_ctllo(struct dw_dma_chan *dwc)
+{
+	struct dma_slave_config	*sconfig = &dwc->dma_sconfig;
+	bool is_slave = is_slave_direction(dwc->direction);
+	u8 smsize = is_slave ? sconfig->src_maxburst : DW_DMA_MSIZE_16;
+	u8 dmsize = is_slave ? sconfig->dst_maxburst : DW_DMA_MSIZE_16;
+	u8 p_master = dwc->dws.p_master;
+	u8 m_master = dwc->dws.m_master;
+	u8 dms = (dwc->direction == DMA_MEM_TO_DEV) ? p_master : m_master;
+	u8 sms = (dwc->direction == DMA_DEV_TO_MEM) ? p_master : m_master;
+
+	return DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN |
+	       DWC_CTLL_DST_MSIZE(dmsize) | DWC_CTLL_SRC_MSIZE(smsize) |
+	       DWC_CTLL_DMS(dms) | DWC_CTLL_SMS(sms);
+}
+
+static void dw_dma_encode_maxburst(struct dw_dma_chan *dwc, u32 *maxburst)
+{
+	/*
+	 * Fix burst size according to dw_dmac. We need to convert them as:
+	 * 1 -> 0, 4 -> 1, 8 -> 2, 16 -> 3.
+	 */
+	*maxburst = *maxburst > 1 ? fls(*maxburst) - 2 : 0;
+}
+
+static void dw_dma_set_device_name(struct dw_dma *dw, int id)
+{
+	snprintf(dw->name, sizeof(dw->name), "dw:dmac%d", id);
+}
+
+static void dw_dma_disable(struct dw_dma *dw)
+{
+	do_dw_dma_off(dw);
+}
+
+static void dw_dma_enable(struct dw_dma *dw)
+{
+	do_dw_dma_on(dw);
+}
+
+int dw_dma_probe(struct dw_dma_chip *chip)
+{
+	struct dw_dma *dw;
+
+	dw = devm_kzalloc(chip->dev, sizeof(*dw), GFP_KERNEL);
+	if (!dw)
+		return -ENOMEM;
+
+	/* Channel operations */
+	dw->initialize_chan = dw_dma_initialize_chan;
+	dw->suspend_chan = dw_dma_suspend_chan;
+	dw->resume_chan = dw_dma_resume_chan;
+	dw->prepare_ctllo = dw_dma_prepare_ctllo;
+	dw->encode_maxburst = dw_dma_encode_maxburst;
+	dw->bytes2block = dw_dma_bytes2block;
+	dw->block2bytes = dw_dma_block2bytes;
+
+	/* Device operations */
+	dw->set_device_name = dw_dma_set_device_name;
+	dw->disable = dw_dma_disable;
+	dw->enable = dw_dma_enable;
+
+	chip->dw = dw;
+	return do_dma_probe(chip);
+}
+EXPORT_SYMBOL_GPL(dw_dma_probe);
+
+int dw_dma_remove(struct dw_dma_chip *chip)
+{
+	return do_dma_remove(chip);
+}
+EXPORT_SYMBOL_GPL(dw_dma_remove);
diff --git a/drivers/dma/dw/idma32.c b/drivers/dma/dw/idma32.c
new file mode 100644
index 0000000..f006573
--- /dev/null
+++ b/drivers/dma/dw/idma32.c
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2013,2018 Intel Corporation
+
+#include <linux/bitops.h>
+#include <linux/dmaengine.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#include "internal.h"
+
+static void idma32_initialize_chan(struct dw_dma_chan *dwc)
+{
+	u32 cfghi = 0;
+	u32 cfglo = 0;
+
+	/* Set default burst alignment */
+	cfglo |= IDMA32C_CFGL_DST_BURST_ALIGN | IDMA32C_CFGL_SRC_BURST_ALIGN;
+
+	/* Low 4 bits of the request lines */
+	cfghi |= IDMA32C_CFGH_DST_PER(dwc->dws.dst_id & 0xf);
+	cfghi |= IDMA32C_CFGH_SRC_PER(dwc->dws.src_id & 0xf);
+
+	/* Request line extension (2 bits) */
+	cfghi |= IDMA32C_CFGH_DST_PER_EXT(dwc->dws.dst_id >> 4 & 0x3);
+	cfghi |= IDMA32C_CFGH_SRC_PER_EXT(dwc->dws.src_id >> 4 & 0x3);
+
+	channel_writel(dwc, CFG_LO, cfglo);
+	channel_writel(dwc, CFG_HI, cfghi);
+}
+
+static void idma32_suspend_chan(struct dw_dma_chan *dwc, bool drain)
+{
+	u32 cfglo = channel_readl(dwc, CFG_LO);
+
+	if (drain)
+		cfglo |= IDMA32C_CFGL_CH_DRAIN;
+
+	channel_writel(dwc, CFG_LO, cfglo | DWC_CFGL_CH_SUSP);
+}
+
+static void idma32_resume_chan(struct dw_dma_chan *dwc, bool drain)
+{
+	u32 cfglo = channel_readl(dwc, CFG_LO);
+
+	if (drain)
+		cfglo &= ~IDMA32C_CFGL_CH_DRAIN;
+
+	channel_writel(dwc, CFG_LO, cfglo & ~DWC_CFGL_CH_SUSP);
+}
+
+static u32 idma32_bytes2block(struct dw_dma_chan *dwc,
+			      size_t bytes, unsigned int width, size_t *len)
+{
+	u32 block;
+
+	if (bytes > dwc->block_size) {
+		block = dwc->block_size;
+		*len = dwc->block_size;
+	} else {
+		block = bytes;
+		*len = bytes;
+	}
+
+	return block;
+}
+
+static size_t idma32_block2bytes(struct dw_dma_chan *dwc, u32 block, u32 width)
+{
+	return IDMA32C_CTLH_BLOCK_TS(block);
+}
+
+static u32 idma32_prepare_ctllo(struct dw_dma_chan *dwc)
+{
+	struct dma_slave_config	*sconfig = &dwc->dma_sconfig;
+	bool is_slave = is_slave_direction(dwc->direction);
+	u8 smsize = is_slave ? sconfig->src_maxburst : IDMA32_MSIZE_8;
+	u8 dmsize = is_slave ? sconfig->dst_maxburst : IDMA32_MSIZE_8;
+
+	return DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN |
+	       DWC_CTLL_DST_MSIZE(dmsize) | DWC_CTLL_SRC_MSIZE(smsize);
+}
+
+static void idma32_encode_maxburst(struct dw_dma_chan *dwc, u32 *maxburst)
+{
+	*maxburst = *maxburst > 1 ? fls(*maxburst) - 1 : 0;
+}
+
+static void idma32_set_device_name(struct dw_dma *dw, int id)
+{
+	snprintf(dw->name, sizeof(dw->name), "idma32:dmac%d", id);
+}
+
+/*
+ * Program FIFO size of channels.
+ *
+ * By default full FIFO (512 bytes) is assigned to channel 0. Here we
+ * slice FIFO on equal parts between channels.
+ */
+static void idma32_fifo_partition(struct dw_dma *dw)
+{
+	u64 value = IDMA32C_FP_PSIZE_CH0(64) | IDMA32C_FP_PSIZE_CH1(64) |
+		    IDMA32C_FP_UPDATE;
+	u64 fifo_partition = 0;
+
+	/* Fill FIFO_PARTITION low bits (Channels 0..1, 4..5) */
+	fifo_partition |= value << 0;
+
+	/* Fill FIFO_PARTITION high bits (Channels 2..3, 6..7) */
+	fifo_partition |= value << 32;
+
+	/* Program FIFO Partition registers - 64 bytes per channel */
+	idma32_writeq(dw, FIFO_PARTITION1, fifo_partition);
+	idma32_writeq(dw, FIFO_PARTITION0, fifo_partition);
+}
+
+static void idma32_disable(struct dw_dma *dw)
+{
+	do_dw_dma_off(dw);
+	idma32_fifo_partition(dw);
+}
+
+static void idma32_enable(struct dw_dma *dw)
+{
+	idma32_fifo_partition(dw);
+	do_dw_dma_on(dw);
+}
+
+int idma32_dma_probe(struct dw_dma_chip *chip)
+{
+	struct dw_dma *dw;
+
+	dw = devm_kzalloc(chip->dev, sizeof(*dw), GFP_KERNEL);
+	if (!dw)
+		return -ENOMEM;
+
+	/* Channel operations */
+	dw->initialize_chan = idma32_initialize_chan;
+	dw->suspend_chan = idma32_suspend_chan;
+	dw->resume_chan = idma32_resume_chan;
+	dw->prepare_ctllo = idma32_prepare_ctllo;
+	dw->encode_maxburst = idma32_encode_maxburst;
+	dw->bytes2block = idma32_bytes2block;
+	dw->block2bytes = idma32_block2bytes;
+
+	/* Device operations */
+	dw->set_device_name = idma32_set_device_name;
+	dw->disable = idma32_disable;
+	dw->enable = idma32_enable;
+
+	chip->dw = dw;
+	return do_dma_probe(chip);
+}
+EXPORT_SYMBOL_GPL(idma32_dma_probe);
+
+int idma32_dma_remove(struct dw_dma_chip *chip)
+{
+	return do_dma_remove(chip);
+}
+EXPORT_SYMBOL_GPL(idma32_dma_remove);
diff --git a/drivers/dma/dw/internal.h b/drivers/dma/dw/internal.h
index 4143973..1dd7a4e 100644
--- a/drivers/dma/dw/internal.h
+++ b/drivers/dma/dw/internal.h
@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Driver for the Synopsys DesignWare DMA Controller
  *
  * Copyright (C) 2013 Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef _DMA_DW_INTERNAL_H
@@ -15,8 +12,14 @@
 
 #include "regs.h"
 
-int dw_dma_disable(struct dw_dma_chip *chip);
-int dw_dma_enable(struct dw_dma_chip *chip);
+int do_dma_probe(struct dw_dma_chip *chip);
+int do_dma_remove(struct dw_dma_chip *chip);
+
+void do_dw_dma_on(struct dw_dma *dw);
+void do_dw_dma_off(struct dw_dma *dw);
+
+int do_dw_dma_disable(struct dw_dma_chip *chip);
+int do_dw_dma_enable(struct dw_dma_chip *chip);
 
 extern bool dw_dma_filter(struct dma_chan *chan, void *param);
 
diff --git a/drivers/dma/dw/pci.c b/drivers/dma/dw/pci.c
index 7778ed7..e79a75d 100644
--- a/drivers/dma/dw/pci.c
+++ b/drivers/dma/dw/pci.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * PCI driver for the Synopsys DesignWare DMA Controller
  *
  * Copyright (C) 2013 Intel Corporation
  * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/module.h>
@@ -15,21 +12,33 @@
 
 #include "internal.h"
 
-static struct dw_dma_platform_data mrfld_pdata = {
+struct dw_dma_pci_data {
+	const struct dw_dma_platform_data *pdata;
+	int (*probe)(struct dw_dma_chip *chip);
+};
+
+static const struct dw_dma_pci_data dw_pci_data = {
+	.probe = dw_dma_probe,
+};
+
+static const struct dw_dma_platform_data idma32_pdata = {
 	.nr_channels = 8,
-	.is_private = true,
-	.is_memcpy = true,
-	.is_idma32 = true,
 	.chan_allocation_order = CHAN_ALLOCATION_ASCENDING,
 	.chan_priority = CHAN_PRIORITY_ASCENDING,
 	.block_size = 131071,
 	.nr_masters = 1,
 	.data_width = {4},
+	.multi_block = {1, 1, 1, 1, 1, 1, 1, 1},
+};
+
+static const struct dw_dma_pci_data idma32_pci_data = {
+	.pdata = &idma32_pdata,
+	.probe = idma32_dma_probe,
 };
 
 static int dw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *pid)
 {
-	const struct dw_dma_platform_data *pdata = (void *)pid->driver_data;
+	const struct dw_dma_pci_data *data = (void *)pid->driver_data;
 	struct dw_dma_chip *chip;
 	int ret;
 
@@ -62,9 +71,9 @@ static int dw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *pid)
 	chip->id = pdev->devfn;
 	chip->regs = pcim_iomap_table(pdev)[0];
 	chip->irq = pdev->irq;
-	chip->pdata = pdata;
+	chip->pdata = data->pdata;
 
-	ret = dw_dma_probe(chip);
+	ret = data->probe(chip);
 	if (ret)
 		return ret;
 
@@ -90,7 +99,7 @@ static int dw_pci_suspend_late(struct device *dev)
 	struct pci_dev *pci = to_pci_dev(dev);
 	struct dw_dma_chip *chip = pci_get_drvdata(pci);
 
-	return dw_dma_disable(chip);
+	return do_dw_dma_disable(chip);
 };
 
 static int dw_pci_resume_early(struct device *dev)
@@ -98,7 +107,7 @@ static int dw_pci_resume_early(struct device *dev)
 	struct pci_dev *pci = to_pci_dev(dev);
 	struct dw_dma_chip *chip = pci_get_drvdata(pci);
 
-	return dw_dma_enable(chip);
+	return do_dw_dma_enable(chip);
 };
 
 #endif /* CONFIG_PM_SLEEP */
@@ -109,24 +118,24 @@ static const struct dev_pm_ops dw_pci_dev_pm_ops = {
 
 static const struct pci_device_id dw_pci_id_table[] = {
 	/* Medfield (GPDMA) */
-	{ PCI_VDEVICE(INTEL, 0x0827) },
+	{ PCI_VDEVICE(INTEL, 0x0827), (kernel_ulong_t)&dw_pci_data },
 
 	/* BayTrail */
-	{ PCI_VDEVICE(INTEL, 0x0f06) },
-	{ PCI_VDEVICE(INTEL, 0x0f40) },
+	{ PCI_VDEVICE(INTEL, 0x0f06), (kernel_ulong_t)&dw_pci_data },
+	{ PCI_VDEVICE(INTEL, 0x0f40), (kernel_ulong_t)&dw_pci_data },
 
-	/* Merrifield iDMA 32-bit (GPDMA) */
-	{ PCI_VDEVICE(INTEL, 0x11a2), (kernel_ulong_t)&mrfld_pdata },
+	/* Merrifield */
+	{ PCI_VDEVICE(INTEL, 0x11a2), (kernel_ulong_t)&idma32_pci_data },
 
 	/* Braswell */
-	{ PCI_VDEVICE(INTEL, 0x2286) },
-	{ PCI_VDEVICE(INTEL, 0x22c0) },
+	{ PCI_VDEVICE(INTEL, 0x2286), (kernel_ulong_t)&dw_pci_data },
+	{ PCI_VDEVICE(INTEL, 0x22c0), (kernel_ulong_t)&dw_pci_data },
 
 	/* Haswell */
-	{ PCI_VDEVICE(INTEL, 0x9c60) },
+	{ PCI_VDEVICE(INTEL, 0x9c60), (kernel_ulong_t)&dw_pci_data },
 
 	/* Broadwell */
-	{ PCI_VDEVICE(INTEL, 0x9ce0) },
+	{ PCI_VDEVICE(INTEL, 0x9ce0), (kernel_ulong_t)&dw_pci_data },
 
 	{ }
 };
diff --git a/drivers/dma/dw/platform.c b/drivers/dma/dw/platform.c
index 31ff811..382dfd9 100644
--- a/drivers/dma/dw/platform.c
+++ b/drivers/dma/dw/platform.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Platform driver for the Synopsys DesignWare DMA Controller
  *
@@ -6,10 +7,6 @@
  * Copyright (C) 2013 Intel Corporation
  *
  * Some parts of this driver are derived from the original dw_dmac.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/module.h>
@@ -128,15 +125,6 @@ dw_dma_parse_dt(struct platform_device *pdev)
 	pdata->nr_masters = nr_masters;
 	pdata->nr_channels = nr_channels;
 
-	if (of_property_read_bool(np, "is_private"))
-		pdata->is_private = true;
-
-	/*
-	 * All known devices, which use DT for configuration, support
-	 * memory-to-memory transfers. So enable it by default.
-	 */
-	pdata->is_memcpy = true;
-
 	if (!of_property_read_u32(np, "chan_allocation_order", &tmp))
 		pdata->chan_allocation_order = (unsigned char)tmp;
 
@@ -264,7 +252,7 @@ static void dw_shutdown(struct platform_device *pdev)
 	struct dw_dma_chip *chip = platform_get_drvdata(pdev);
 
 	/*
-	 * We have to call dw_dma_disable() to stop any ongoing transfer. On
+	 * We have to call do_dw_dma_disable() to stop any ongoing transfer. On
 	 * some platforms we can't do that since DMA device is powered off.
 	 * Moreover we have no possibility to check if the platform is affected
 	 * or not. That's why we call pm_runtime_get_sync() / pm_runtime_put()
@@ -273,7 +261,7 @@ static void dw_shutdown(struct platform_device *pdev)
 	 * used by the driver.
 	 */
 	pm_runtime_get_sync(chip->dev);
-	dw_dma_disable(chip);
+	do_dw_dma_disable(chip);
 	pm_runtime_put_sync_suspend(chip->dev);
 
 	clk_disable_unprepare(chip->clk);
@@ -303,7 +291,7 @@ static int dw_suspend_late(struct device *dev)
 {
 	struct dw_dma_chip *chip = dev_get_drvdata(dev);
 
-	dw_dma_disable(chip);
+	do_dw_dma_disable(chip);
 	clk_disable_unprepare(chip->clk);
 
 	return 0;
@@ -318,7 +306,7 @@ static int dw_resume_early(struct device *dev)
 	if (ret)
 		return ret;
 
-	return dw_dma_enable(chip);
+	return do_dw_dma_enable(chip);
 }
 
 #endif /* CONFIG_PM_SLEEP */
diff --git a/drivers/dma/dw/regs.h b/drivers/dma/dw/regs.h
index 646c9c9..3fce66e 100644
--- a/drivers/dma/dw/regs.h
+++ b/drivers/dma/dw/regs.h
@@ -1,13 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Driver for the Synopsys DesignWare AHB DMA Controller
  *
  * Copyright (C) 2005-2007 Atmel Corporation
  * Copyright (C) 2010-2011 ST Microelectronics
  * Copyright (C) 2016 Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/bitops.h>
@@ -222,6 +219,16 @@ enum dw_dma_msize {
 
 /* iDMA 32-bit support */
 
+/* bursts size */
+enum idma32_msize {
+	IDMA32_MSIZE_1,
+	IDMA32_MSIZE_2,
+	IDMA32_MSIZE_4,
+	IDMA32_MSIZE_8,
+	IDMA32_MSIZE_16,
+	IDMA32_MSIZE_32,
+};
+
 /* Bitfields in CTL_HI */
 #define IDMA32C_CTLH_BLOCK_TS_MASK	GENMASK(16, 0)
 #define IDMA32C_CTLH_BLOCK_TS(x)	((x) & IDMA32C_CTLH_BLOCK_TS_MASK)
@@ -312,6 +319,21 @@ struct dw_dma {
 	u8			all_chan_mask;
 	u8			in_use;
 
+	/* Channel operations */
+	void	(*initialize_chan)(struct dw_dma_chan *dwc);
+	void	(*suspend_chan)(struct dw_dma_chan *dwc, bool drain);
+	void	(*resume_chan)(struct dw_dma_chan *dwc, bool drain);
+	u32	(*prepare_ctllo)(struct dw_dma_chan *dwc);
+	void	(*encode_maxburst)(struct dw_dma_chan *dwc, u32 *maxburst);
+	u32	(*bytes2block)(struct dw_dma_chan *dwc, size_t bytes,
+			       unsigned int width, size_t *len);
+	size_t	(*block2bytes)(struct dw_dma_chan *dwc, u32 block, u32 width);
+
+	/* Device operations */
+	void (*set_device_name)(struct dw_dma *dw, int id);
+	void (*disable)(struct dw_dma *dw);
+	void (*enable)(struct dw_dma *dw);
+
 	/* platform data */
 	struct dw_dma_platform_data	*pdata;
 };
diff --git a/drivers/dma/fsl-edma-common.c b/drivers/dma/fsl-edma-common.c
index 8876c4c..680b2a0 100644
--- a/drivers/dma/fsl-edma-common.c
+++ b/drivers/dma/fsl-edma-common.c
@@ -6,6 +6,7 @@
 #include <linux/dmapool.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/dma-mapping.h>
 
 #include "fsl-edma-common.h"
 
@@ -173,12 +174,62 @@ int fsl_edma_resume(struct dma_chan *chan)
 }
 EXPORT_SYMBOL_GPL(fsl_edma_resume);
 
+static void fsl_edma_unprep_slave_dma(struct fsl_edma_chan *fsl_chan)
+{
+	if (fsl_chan->dma_dir != DMA_NONE)
+		dma_unmap_resource(fsl_chan->vchan.chan.device->dev,
+				   fsl_chan->dma_dev_addr,
+				   fsl_chan->dma_dev_size,
+				   fsl_chan->dma_dir, 0);
+	fsl_chan->dma_dir = DMA_NONE;
+}
+
+static bool fsl_edma_prep_slave_dma(struct fsl_edma_chan *fsl_chan,
+				    enum dma_transfer_direction dir)
+{
+	struct device *dev = fsl_chan->vchan.chan.device->dev;
+	enum dma_data_direction dma_dir;
+	phys_addr_t addr = 0;
+	u32 size = 0;
+
+	switch (dir) {
+	case DMA_MEM_TO_DEV:
+		dma_dir = DMA_FROM_DEVICE;
+		addr = fsl_chan->cfg.dst_addr;
+		size = fsl_chan->cfg.dst_maxburst;
+		break;
+	case DMA_DEV_TO_MEM:
+		dma_dir = DMA_TO_DEVICE;
+		addr = fsl_chan->cfg.src_addr;
+		size = fsl_chan->cfg.src_maxburst;
+		break;
+	default:
+		dma_dir = DMA_NONE;
+		break;
+	}
+
+	/* Already mapped for this config? */
+	if (fsl_chan->dma_dir == dma_dir)
+		return true;
+
+	fsl_edma_unprep_slave_dma(fsl_chan);
+
+	fsl_chan->dma_dev_addr = dma_map_resource(dev, addr, size, dma_dir, 0);
+	if (dma_mapping_error(dev, fsl_chan->dma_dev_addr))
+		return false;
+	fsl_chan->dma_dev_size = size;
+	fsl_chan->dma_dir = dma_dir;
+
+	return true;
+}
+
 int fsl_edma_slave_config(struct dma_chan *chan,
 				 struct dma_slave_config *cfg)
 {
 	struct fsl_edma_chan *fsl_chan = to_fsl_edma_chan(chan);
 
 	memcpy(&fsl_chan->cfg, cfg, sizeof(*cfg));
+	fsl_edma_unprep_slave_dma(fsl_chan);
 
 	return 0;
 }
@@ -339,9 +390,7 @@ static struct fsl_edma_desc *fsl_edma_alloc_desc(struct fsl_edma_chan *fsl_chan,
 	struct fsl_edma_desc *fsl_desc;
 	int i;
 
-	fsl_desc = kzalloc(sizeof(*fsl_desc) +
-			   sizeof(struct fsl_edma_sw_tcd) *
-			   sg_len, GFP_NOWAIT);
+	fsl_desc = kzalloc(struct_size(fsl_desc, tcd, sg_len), GFP_NOWAIT);
 	if (!fsl_desc)
 		return NULL;
 
@@ -378,6 +427,9 @@ struct dma_async_tx_descriptor *fsl_edma_prep_dma_cyclic(
 	if (!is_slave_direction(direction))
 		return NULL;
 
+	if (!fsl_edma_prep_slave_dma(fsl_chan, direction))
+		return NULL;
+
 	sg_len = buf_len / period_len;
 	fsl_desc = fsl_edma_alloc_desc(fsl_chan, sg_len);
 	if (!fsl_desc)
@@ -409,11 +461,11 @@ struct dma_async_tx_descriptor *fsl_edma_prep_dma_cyclic(
 
 		if (direction == DMA_MEM_TO_DEV) {
 			src_addr = dma_buf_next;
-			dst_addr = fsl_chan->cfg.dst_addr;
+			dst_addr = fsl_chan->dma_dev_addr;
 			soff = fsl_chan->cfg.dst_addr_width;
 			doff = 0;
 		} else {
-			src_addr = fsl_chan->cfg.src_addr;
+			src_addr = fsl_chan->dma_dev_addr;
 			dst_addr = dma_buf_next;
 			soff = 0;
 			doff = fsl_chan->cfg.src_addr_width;
@@ -444,6 +496,9 @@ struct dma_async_tx_descriptor *fsl_edma_prep_slave_sg(
 	if (!is_slave_direction(direction))
 		return NULL;
 
+	if (!fsl_edma_prep_slave_dma(fsl_chan, direction))
+		return NULL;
+
 	fsl_desc = fsl_edma_alloc_desc(fsl_chan, sg_len);
 	if (!fsl_desc)
 		return NULL;
@@ -468,11 +523,11 @@ struct dma_async_tx_descriptor *fsl_edma_prep_slave_sg(
 
 		if (direction == DMA_MEM_TO_DEV) {
 			src_addr = sg_dma_address(sg);
-			dst_addr = fsl_chan->cfg.dst_addr;
+			dst_addr = fsl_chan->dma_dev_addr;
 			soff = fsl_chan->cfg.dst_addr_width;
 			doff = 0;
 		} else {
-			src_addr = fsl_chan->cfg.src_addr;
+			src_addr = fsl_chan->dma_dev_addr;
 			dst_addr = sg_dma_address(sg);
 			soff = 0;
 			doff = fsl_chan->cfg.src_addr_width;
@@ -555,6 +610,7 @@ void fsl_edma_free_chan_resources(struct dma_chan *chan)
 	fsl_edma_chan_mux(fsl_chan, 0, false);
 	fsl_chan->edesc = NULL;
 	vchan_get_all_descriptors(&fsl_chan->vchan, &head);
+	fsl_edma_unprep_slave_dma(fsl_chan);
 	spin_unlock_irqrestore(&fsl_chan->vchan.lock, flags);
 
 	vchan_dma_desc_free_list(&fsl_chan->vchan, &head);
diff --git a/drivers/dma/fsl-edma-common.h b/drivers/dma/fsl-edma-common.h
index 8917e88..b435d8e 100644
--- a/drivers/dma/fsl-edma-common.h
+++ b/drivers/dma/fsl-edma-common.h
@@ -6,6 +6,7 @@
 #ifndef _FSL_EDMA_COMMON_H_
 #define _FSL_EDMA_COMMON_H_
 
+#include <linux/dma-direction.h>
 #include "virt-dma.h"
 
 #define EDMA_CR_EDBG		BIT(1)
@@ -120,6 +121,9 @@ struct fsl_edma_chan {
 	struct dma_slave_config		cfg;
 	u32				attr;
 	struct dma_pool			*tcd_pool;
+	dma_addr_t			dma_dev_addr;
+	u32				dma_dev_size;
+	enum dma_data_direction		dma_dir;
 };
 
 struct fsl_edma_desc {
diff --git a/drivers/dma/fsl-edma.c b/drivers/dma/fsl-edma.c
index 34d7011..75e8a7b 100644
--- a/drivers/dma/fsl-edma.c
+++ b/drivers/dma/fsl-edma.c
@@ -254,6 +254,7 @@ static int fsl_edma_probe(struct platform_device *pdev)
 		fsl_chan->pm_state = RUNNING;
 		fsl_chan->slave_id = 0;
 		fsl_chan->idle = true;
+		fsl_chan->dma_dir = DMA_NONE;
 		fsl_chan->vchan.desc_free = fsl_edma_free_desc;
 		vchan_init(&fsl_chan->vchan, &fsl_edma->dma_dev);
 
diff --git a/drivers/dma/fsl-qdma.c b/drivers/dma/fsl-qdma.c
new file mode 100644
index 0000000..aa1d0ae
--- /dev/null
+++ b/drivers/dma/fsl-qdma.c
@@ -0,0 +1,1259 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright 2014-2015 Freescale
+// Copyright 2018 NXP
+
+/*
+ * Driver for NXP Layerscape Queue Direct Memory Access Controller
+ *
+ * Author:
+ *  Wen He <wen.he_1@nxp.com>
+ *  Jiaheng Fan <jiaheng.fan@nxp.com>
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/of_dma.h>
+#include <linux/dma-mapping.h>
+
+#include "virt-dma.h"
+#include "fsldma.h"
+
+/* Register related definition */
+#define FSL_QDMA_DMR			0x0
+#define FSL_QDMA_DSR			0x4
+#define FSL_QDMA_DEIER			0xe00
+#define FSL_QDMA_DEDR			0xe04
+#define FSL_QDMA_DECFDW0R		0xe10
+#define FSL_QDMA_DECFDW1R		0xe14
+#define FSL_QDMA_DECFDW2R		0xe18
+#define FSL_QDMA_DECFDW3R		0xe1c
+#define FSL_QDMA_DECFQIDR		0xe30
+#define FSL_QDMA_DECBR			0xe34
+
+#define FSL_QDMA_BCQMR(x)		(0xc0 + 0x100 * (x))
+#define FSL_QDMA_BCQSR(x)		(0xc4 + 0x100 * (x))
+#define FSL_QDMA_BCQEDPA_SADDR(x)	(0xc8 + 0x100 * (x))
+#define FSL_QDMA_BCQDPA_SADDR(x)	(0xcc + 0x100 * (x))
+#define FSL_QDMA_BCQEEPA_SADDR(x)	(0xd0 + 0x100 * (x))
+#define FSL_QDMA_BCQEPA_SADDR(x)	(0xd4 + 0x100 * (x))
+#define FSL_QDMA_BCQIER(x)		(0xe0 + 0x100 * (x))
+#define FSL_QDMA_BCQIDR(x)		(0xe4 + 0x100 * (x))
+
+#define FSL_QDMA_SQDPAR			0x80c
+#define FSL_QDMA_SQEPAR			0x814
+#define FSL_QDMA_BSQMR			0x800
+#define FSL_QDMA_BSQSR			0x804
+#define FSL_QDMA_BSQICR			0x828
+#define FSL_QDMA_CQMR			0xa00
+#define FSL_QDMA_CQDSCR1		0xa08
+#define FSL_QDMA_CQDSCR2                0xa0c
+#define FSL_QDMA_CQIER			0xa10
+#define FSL_QDMA_CQEDR			0xa14
+#define FSL_QDMA_SQCCMR			0xa20
+
+/* Registers for bit and genmask */
+#define FSL_QDMA_CQIDR_SQT		BIT(15)
+#define QDMA_CCDF_FOTMAT		BIT(29)
+#define QDMA_CCDF_SER			BIT(30)
+#define QDMA_SG_FIN			BIT(30)
+#define QDMA_SG_LEN_MASK		GENMASK(29, 0)
+#define QDMA_CCDF_MASK			GENMASK(28, 20)
+
+#define FSL_QDMA_DEDR_CLEAR		GENMASK(31, 0)
+#define FSL_QDMA_BCQIDR_CLEAR		GENMASK(31, 0)
+#define FSL_QDMA_DEIER_CLEAR		GENMASK(31, 0)
+
+#define FSL_QDMA_BCQIER_CQTIE		BIT(15)
+#define FSL_QDMA_BCQIER_CQPEIE		BIT(23)
+#define FSL_QDMA_BSQICR_ICEN		BIT(31)
+
+#define FSL_QDMA_BSQICR_ICST(x)		((x) << 16)
+#define FSL_QDMA_CQIER_MEIE		BIT(31)
+#define FSL_QDMA_CQIER_TEIE		BIT(0)
+#define FSL_QDMA_SQCCMR_ENTER_WM	BIT(21)
+
+#define FSL_QDMA_BCQMR_EN		BIT(31)
+#define FSL_QDMA_BCQMR_EI		BIT(30)
+#define FSL_QDMA_BCQMR_CD_THLD(x)	((x) << 20)
+#define FSL_QDMA_BCQMR_CQ_SIZE(x)	((x) << 16)
+
+#define FSL_QDMA_BCQSR_QF		BIT(16)
+#define FSL_QDMA_BCQSR_XOFF		BIT(0)
+
+#define FSL_QDMA_BSQMR_EN		BIT(31)
+#define FSL_QDMA_BSQMR_DI		BIT(30)
+#define FSL_QDMA_BSQMR_CQ_SIZE(x)	((x) << 16)
+
+#define FSL_QDMA_BSQSR_QE		BIT(17)
+
+#define FSL_QDMA_DMR_DQD		BIT(30)
+#define FSL_QDMA_DSR_DB		BIT(31)
+
+/* Size related definition */
+#define FSL_QDMA_QUEUE_MAX		8
+#define FSL_QDMA_COMMAND_BUFFER_SIZE	64
+#define FSL_QDMA_DESCRIPTOR_BUFFER_SIZE 32
+#define FSL_QDMA_CIRCULAR_DESC_SIZE_MIN	64
+#define FSL_QDMA_CIRCULAR_DESC_SIZE_MAX	16384
+#define FSL_QDMA_QUEUE_NUM_MAX		8
+
+/* Field definition for CMD */
+#define FSL_QDMA_CMD_RWTTYPE		0x4
+#define FSL_QDMA_CMD_LWC                0x2
+#define FSL_QDMA_CMD_RWTTYPE_OFFSET	28
+#define FSL_QDMA_CMD_NS_OFFSET		27
+#define FSL_QDMA_CMD_DQOS_OFFSET	24
+#define FSL_QDMA_CMD_WTHROTL_OFFSET	20
+#define FSL_QDMA_CMD_DSEN_OFFSET	19
+#define FSL_QDMA_CMD_LWC_OFFSET		16
+
+/* Field definition for Descriptor offset */
+#define QDMA_CCDF_STATUS		20
+#define QDMA_CCDF_OFFSET		20
+
+/* Field definition for safe loop count*/
+#define FSL_QDMA_HALT_COUNT		1500
+#define FSL_QDMA_MAX_SIZE		16385
+#define	FSL_QDMA_COMP_TIMEOUT		1000
+#define FSL_COMMAND_QUEUE_OVERFLLOW	10
+
+#define FSL_QDMA_BLOCK_BASE_OFFSET(fsl_qdma_engine, x)			\
+	(((fsl_qdma_engine)->block_offset) * (x))
+
+/**
+ * struct fsl_qdma_format - This is the struct holding describing compound
+ *			    descriptor format with qDMA.
+ * @status:		    Command status and enqueue status notification.
+ * @cfg:		    Frame offset and frame format.
+ * @addr_lo:		    Holding the compound descriptor of the lower
+ *			    32-bits address in memory 40-bit address.
+ * @addr_hi:		    Same as above member, but point high 8-bits in
+ *			    memory 40-bit address.
+ * @__reserved1:	    Reserved field.
+ * @cfg8b_w1:		    Compound descriptor command queue origin produced
+ *			    by qDMA and dynamic debug field.
+ * @data		    Pointer to the memory 40-bit address, describes DMA
+ *			    source information and DMA destination information.
+ */
+struct fsl_qdma_format {
+	__le32 status;
+	__le32 cfg;
+	union {
+		struct {
+			__le32 addr_lo;
+			u8 addr_hi;
+			u8 __reserved1[2];
+			u8 cfg8b_w1;
+		} __packed;
+		__le64 data;
+	};
+} __packed;
+
+/* qDMA status notification pre information */
+struct fsl_pre_status {
+	u64 addr;
+	u8 queue;
+};
+
+static DEFINE_PER_CPU(struct fsl_pre_status, pre);
+
+struct fsl_qdma_chan {
+	struct virt_dma_chan		vchan;
+	struct virt_dma_desc		vdesc;
+	enum dma_status			status;
+	struct fsl_qdma_engine		*qdma;
+	struct fsl_qdma_queue		*queue;
+};
+
+struct fsl_qdma_queue {
+	struct fsl_qdma_format	*virt_head;
+	struct fsl_qdma_format	*virt_tail;
+	struct list_head	comp_used;
+	struct list_head	comp_free;
+	struct dma_pool		*comp_pool;
+	struct dma_pool		*desc_pool;
+	spinlock_t		queue_lock;
+	dma_addr_t		bus_addr;
+	u32                     n_cq;
+	u32			id;
+	struct fsl_qdma_format	*cq;
+	void __iomem		*block_base;
+};
+
+struct fsl_qdma_comp {
+	dma_addr_t              bus_addr;
+	dma_addr_t              desc_bus_addr;
+	struct fsl_qdma_format	*virt_addr;
+	struct fsl_qdma_format	*desc_virt_addr;
+	struct fsl_qdma_chan	*qchan;
+	struct virt_dma_desc    vdesc;
+	struct list_head	list;
+};
+
+struct fsl_qdma_engine {
+	struct dma_device	dma_dev;
+	void __iomem		*ctrl_base;
+	void __iomem            *status_base;
+	void __iomem		*block_base;
+	u32			n_chans;
+	u32			n_queues;
+	struct mutex            fsl_qdma_mutex;
+	int			error_irq;
+	int			*queue_irq;
+	u32			feature;
+	struct fsl_qdma_queue	*queue;
+	struct fsl_qdma_queue	**status;
+	struct fsl_qdma_chan	*chans;
+	int			block_number;
+	int			block_offset;
+	int			irq_base;
+	int			desc_allocated;
+
+};
+
+static inline u64
+qdma_ccdf_addr_get64(const struct fsl_qdma_format *ccdf)
+{
+	return le64_to_cpu(ccdf->data) & (U64_MAX >> 24);
+}
+
+static inline void
+qdma_desc_addr_set64(struct fsl_qdma_format *ccdf, u64 addr)
+{
+	ccdf->addr_hi = upper_32_bits(addr);
+	ccdf->addr_lo = cpu_to_le32(lower_32_bits(addr));
+}
+
+static inline u8
+qdma_ccdf_get_queue(const struct fsl_qdma_format *ccdf)
+{
+	return ccdf->cfg8b_w1 & U8_MAX;
+}
+
+static inline int
+qdma_ccdf_get_offset(const struct fsl_qdma_format *ccdf)
+{
+	return (le32_to_cpu(ccdf->cfg) & QDMA_CCDF_MASK) >> QDMA_CCDF_OFFSET;
+}
+
+static inline void
+qdma_ccdf_set_format(struct fsl_qdma_format *ccdf, int offset)
+{
+	ccdf->cfg = cpu_to_le32(QDMA_CCDF_FOTMAT | offset);
+}
+
+static inline int
+qdma_ccdf_get_status(const struct fsl_qdma_format *ccdf)
+{
+	return (le32_to_cpu(ccdf->status) & QDMA_CCDF_MASK) >> QDMA_CCDF_STATUS;
+}
+
+static inline void
+qdma_ccdf_set_ser(struct fsl_qdma_format *ccdf, int status)
+{
+	ccdf->status = cpu_to_le32(QDMA_CCDF_SER | status);
+}
+
+static inline void qdma_csgf_set_len(struct fsl_qdma_format *csgf, int len)
+{
+	csgf->cfg = cpu_to_le32(len & QDMA_SG_LEN_MASK);
+}
+
+static inline void qdma_csgf_set_f(struct fsl_qdma_format *csgf, int len)
+{
+	csgf->cfg = cpu_to_le32(QDMA_SG_FIN | (len & QDMA_SG_LEN_MASK));
+}
+
+static u32 qdma_readl(struct fsl_qdma_engine *qdma, void __iomem *addr)
+{
+	return FSL_DMA_IN(qdma, addr, 32);
+}
+
+static void qdma_writel(struct fsl_qdma_engine *qdma, u32 val,
+			void __iomem *addr)
+{
+	FSL_DMA_OUT(qdma, addr, val, 32);
+}
+
+static struct fsl_qdma_chan *to_fsl_qdma_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct fsl_qdma_chan, vchan.chan);
+}
+
+static struct fsl_qdma_comp *to_fsl_qdma_comp(struct virt_dma_desc *vd)
+{
+	return container_of(vd, struct fsl_qdma_comp, vdesc);
+}
+
+static void fsl_qdma_free_chan_resources(struct dma_chan *chan)
+{
+	struct fsl_qdma_chan *fsl_chan = to_fsl_qdma_chan(chan);
+	struct fsl_qdma_queue *fsl_queue = fsl_chan->queue;
+	struct fsl_qdma_engine *fsl_qdma = fsl_chan->qdma;
+	struct fsl_qdma_comp *comp_temp, *_comp_temp;
+	unsigned long flags;
+	LIST_HEAD(head);
+
+	spin_lock_irqsave(&fsl_chan->vchan.lock, flags);
+	vchan_get_all_descriptors(&fsl_chan->vchan, &head);
+	spin_unlock_irqrestore(&fsl_chan->vchan.lock, flags);
+
+	vchan_dma_desc_free_list(&fsl_chan->vchan, &head);
+
+	if (!fsl_queue->comp_pool && !fsl_queue->comp_pool)
+		return;
+
+	list_for_each_entry_safe(comp_temp, _comp_temp,
+				 &fsl_queue->comp_used,	list) {
+		dma_pool_free(fsl_queue->comp_pool,
+			      comp_temp->virt_addr,
+			      comp_temp->bus_addr);
+		dma_pool_free(fsl_queue->desc_pool,
+			      comp_temp->desc_virt_addr,
+			      comp_temp->desc_bus_addr);
+		list_del(&comp_temp->list);
+		kfree(comp_temp);
+	}
+
+	list_for_each_entry_safe(comp_temp, _comp_temp,
+				 &fsl_queue->comp_free, list) {
+		dma_pool_free(fsl_queue->comp_pool,
+			      comp_temp->virt_addr,
+			      comp_temp->bus_addr);
+		dma_pool_free(fsl_queue->desc_pool,
+			      comp_temp->desc_virt_addr,
+			      comp_temp->desc_bus_addr);
+		list_del(&comp_temp->list);
+		kfree(comp_temp);
+	}
+
+	dma_pool_destroy(fsl_queue->comp_pool);
+	dma_pool_destroy(fsl_queue->desc_pool);
+
+	fsl_qdma->desc_allocated--;
+	fsl_queue->comp_pool = NULL;
+	fsl_queue->desc_pool = NULL;
+}
+
+static void fsl_qdma_comp_fill_memcpy(struct fsl_qdma_comp *fsl_comp,
+				      dma_addr_t dst, dma_addr_t src, u32 len)
+{
+	struct fsl_qdma_format *sdf, *ddf;
+	struct fsl_qdma_format *ccdf, *csgf_desc, *csgf_src, *csgf_dest;
+
+	ccdf = fsl_comp->virt_addr;
+	csgf_desc = fsl_comp->virt_addr + 1;
+	csgf_src = fsl_comp->virt_addr + 2;
+	csgf_dest = fsl_comp->virt_addr + 3;
+	sdf = fsl_comp->desc_virt_addr;
+	ddf = fsl_comp->desc_virt_addr + 1;
+
+	memset(fsl_comp->virt_addr, 0, FSL_QDMA_COMMAND_BUFFER_SIZE);
+	memset(fsl_comp->desc_virt_addr, 0, FSL_QDMA_DESCRIPTOR_BUFFER_SIZE);
+	/* Head Command Descriptor(Frame Descriptor) */
+	qdma_desc_addr_set64(ccdf, fsl_comp->bus_addr + 16);
+	qdma_ccdf_set_format(ccdf, qdma_ccdf_get_offset(ccdf));
+	qdma_ccdf_set_ser(ccdf, qdma_ccdf_get_status(ccdf));
+	/* Status notification is enqueued to status queue. */
+	/* Compound Command Descriptor(Frame List Table) */
+	qdma_desc_addr_set64(csgf_desc, fsl_comp->desc_bus_addr);
+	/* It must be 32 as Compound S/G Descriptor */
+	qdma_csgf_set_len(csgf_desc, 32);
+	qdma_desc_addr_set64(csgf_src, src);
+	qdma_csgf_set_len(csgf_src, len);
+	qdma_desc_addr_set64(csgf_dest, dst);
+	qdma_csgf_set_len(csgf_dest, len);
+	/* This entry is the last entry. */
+	qdma_csgf_set_f(csgf_dest, len);
+	/* Descriptor Buffer */
+	sdf->data =
+		cpu_to_le64(FSL_QDMA_CMD_RWTTYPE <<
+			    FSL_QDMA_CMD_RWTTYPE_OFFSET);
+	ddf->data =
+		cpu_to_le64(FSL_QDMA_CMD_RWTTYPE <<
+			    FSL_QDMA_CMD_RWTTYPE_OFFSET);
+	ddf->data |=
+		cpu_to_le64(FSL_QDMA_CMD_LWC << FSL_QDMA_CMD_LWC_OFFSET);
+}
+
+/*
+ * Pre-request full command descriptor for enqueue.
+ */
+static int fsl_qdma_pre_request_enqueue_desc(struct fsl_qdma_queue *queue)
+{
+	int i;
+	struct fsl_qdma_comp *comp_temp, *_comp_temp;
+
+	for (i = 0; i < queue->n_cq + FSL_COMMAND_QUEUE_OVERFLLOW; i++) {
+		comp_temp = kzalloc(sizeof(*comp_temp), GFP_KERNEL);
+		if (!comp_temp)
+			goto err_alloc;
+		comp_temp->virt_addr =
+			dma_pool_alloc(queue->comp_pool, GFP_KERNEL,
+				       &comp_temp->bus_addr);
+		if (!comp_temp->virt_addr)
+			goto err_dma_alloc;
+
+		comp_temp->desc_virt_addr =
+			dma_pool_alloc(queue->desc_pool, GFP_KERNEL,
+				       &comp_temp->desc_bus_addr);
+		if (!comp_temp->desc_virt_addr)
+			goto err_desc_dma_alloc;
+
+		list_add_tail(&comp_temp->list, &queue->comp_free);
+	}
+
+	return 0;
+
+err_desc_dma_alloc:
+	dma_pool_free(queue->comp_pool, comp_temp->virt_addr,
+		      comp_temp->bus_addr);
+
+err_dma_alloc:
+	kfree(comp_temp);
+
+err_alloc:
+	list_for_each_entry_safe(comp_temp, _comp_temp,
+				 &queue->comp_free, list) {
+		if (comp_temp->virt_addr)
+			dma_pool_free(queue->comp_pool,
+				      comp_temp->virt_addr,
+				      comp_temp->bus_addr);
+		if (comp_temp->desc_virt_addr)
+			dma_pool_free(queue->desc_pool,
+				      comp_temp->desc_virt_addr,
+				      comp_temp->desc_bus_addr);
+
+		list_del(&comp_temp->list);
+		kfree(comp_temp);
+	}
+
+	return -ENOMEM;
+}
+
+/*
+ * Request a command descriptor for enqueue.
+ */
+static struct fsl_qdma_comp
+*fsl_qdma_request_enqueue_desc(struct fsl_qdma_chan *fsl_chan)
+{
+	unsigned long flags;
+	struct fsl_qdma_comp *comp_temp;
+	int timeout = FSL_QDMA_COMP_TIMEOUT;
+	struct fsl_qdma_queue *queue = fsl_chan->queue;
+
+	while (timeout--) {
+		spin_lock_irqsave(&queue->queue_lock, flags);
+		if (!list_empty(&queue->comp_free)) {
+			comp_temp = list_first_entry(&queue->comp_free,
+						     struct fsl_qdma_comp,
+						     list);
+			list_del(&comp_temp->list);
+
+			spin_unlock_irqrestore(&queue->queue_lock, flags);
+			comp_temp->qchan = fsl_chan;
+			return comp_temp;
+		}
+		spin_unlock_irqrestore(&queue->queue_lock, flags);
+		udelay(1);
+	}
+
+	return NULL;
+}
+
+static struct fsl_qdma_queue
+*fsl_qdma_alloc_queue_resources(struct platform_device *pdev,
+				struct fsl_qdma_engine *fsl_qdma)
+{
+	int ret, len, i, j;
+	int queue_num, block_number;
+	unsigned int queue_size[FSL_QDMA_QUEUE_MAX];
+	struct fsl_qdma_queue *queue_head, *queue_temp;
+
+	queue_num = fsl_qdma->n_queues;
+	block_number = fsl_qdma->block_number;
+
+	if (queue_num > FSL_QDMA_QUEUE_MAX)
+		queue_num = FSL_QDMA_QUEUE_MAX;
+	len = sizeof(*queue_head) * queue_num * block_number;
+	queue_head = devm_kzalloc(&pdev->dev, len, GFP_KERNEL);
+	if (!queue_head)
+		return NULL;
+
+	ret = device_property_read_u32_array(&pdev->dev, "queue-sizes",
+					     queue_size, queue_num);
+	if (ret) {
+		dev_err(&pdev->dev, "Can't get queue-sizes.\n");
+		return NULL;
+	}
+	for (j = 0; j < block_number; j++) {
+		for (i = 0; i < queue_num; i++) {
+			if (queue_size[i] > FSL_QDMA_CIRCULAR_DESC_SIZE_MAX ||
+			    queue_size[i] < FSL_QDMA_CIRCULAR_DESC_SIZE_MIN) {
+				dev_err(&pdev->dev,
+					"Get wrong queue-sizes.\n");
+				return NULL;
+			}
+			queue_temp = queue_head + i + (j * queue_num);
+
+			queue_temp->cq =
+			dma_alloc_coherent(&pdev->dev,
+					   sizeof(struct fsl_qdma_format) *
+					   queue_size[i],
+					   &queue_temp->bus_addr,
+					   GFP_KERNEL);
+			if (!queue_temp->cq)
+				return NULL;
+			queue_temp->block_base = fsl_qdma->block_base +
+				FSL_QDMA_BLOCK_BASE_OFFSET(fsl_qdma, j);
+			queue_temp->n_cq = queue_size[i];
+			queue_temp->id = i;
+			queue_temp->virt_head = queue_temp->cq;
+			queue_temp->virt_tail = queue_temp->cq;
+			/*
+			 * List for queue command buffer
+			 */
+			INIT_LIST_HEAD(&queue_temp->comp_used);
+			spin_lock_init(&queue_temp->queue_lock);
+		}
+	}
+	return queue_head;
+}
+
+static struct fsl_qdma_queue
+*fsl_qdma_prep_status_queue(struct platform_device *pdev)
+{
+	int ret;
+	unsigned int status_size;
+	struct fsl_qdma_queue *status_head;
+	struct device_node *np = pdev->dev.of_node;
+
+	ret = of_property_read_u32(np, "status-sizes", &status_size);
+	if (ret) {
+		dev_err(&pdev->dev, "Can't get status-sizes.\n");
+		return NULL;
+	}
+	if (status_size > FSL_QDMA_CIRCULAR_DESC_SIZE_MAX ||
+	    status_size < FSL_QDMA_CIRCULAR_DESC_SIZE_MIN) {
+		dev_err(&pdev->dev, "Get wrong status_size.\n");
+		return NULL;
+	}
+	status_head = devm_kzalloc(&pdev->dev,
+				   sizeof(*status_head), GFP_KERNEL);
+	if (!status_head)
+		return NULL;
+
+	/*
+	 * Buffer for queue command
+	 */
+	status_head->cq = dma_alloc_coherent(&pdev->dev,
+					     sizeof(struct fsl_qdma_format) *
+					     status_size,
+					     &status_head->bus_addr,
+					     GFP_KERNEL);
+	if (!status_head->cq) {
+		devm_kfree(&pdev->dev, status_head);
+		return NULL;
+	}
+	status_head->n_cq = status_size;
+	status_head->virt_head = status_head->cq;
+	status_head->virt_tail = status_head->cq;
+	status_head->comp_pool = NULL;
+
+	return status_head;
+}
+
+static int fsl_qdma_halt(struct fsl_qdma_engine *fsl_qdma)
+{
+	u32 reg;
+	int i, j, count = FSL_QDMA_HALT_COUNT;
+	void __iomem *block, *ctrl = fsl_qdma->ctrl_base;
+
+	/* Disable the command queue and wait for idle state. */
+	reg = qdma_readl(fsl_qdma, ctrl + FSL_QDMA_DMR);
+	reg |= FSL_QDMA_DMR_DQD;
+	qdma_writel(fsl_qdma, reg, ctrl + FSL_QDMA_DMR);
+	for (j = 0; j < fsl_qdma->block_number; j++) {
+		block = fsl_qdma->block_base +
+			FSL_QDMA_BLOCK_BASE_OFFSET(fsl_qdma, j);
+		for (i = 0; i < FSL_QDMA_QUEUE_NUM_MAX; i++)
+			qdma_writel(fsl_qdma, 0, block + FSL_QDMA_BCQMR(i));
+	}
+	while (1) {
+		reg = qdma_readl(fsl_qdma, ctrl + FSL_QDMA_DSR);
+		if (!(reg & FSL_QDMA_DSR_DB))
+			break;
+		if (count-- < 0)
+			return -EBUSY;
+		udelay(100);
+	}
+
+	for (j = 0; j < fsl_qdma->block_number; j++) {
+		block = fsl_qdma->block_base +
+			FSL_QDMA_BLOCK_BASE_OFFSET(fsl_qdma, j);
+
+		/* Disable status queue. */
+		qdma_writel(fsl_qdma, 0, block + FSL_QDMA_BSQMR);
+
+		/*
+		 * clear the command queue interrupt detect register for
+		 * all queues.
+		 */
+		qdma_writel(fsl_qdma, FSL_QDMA_BCQIDR_CLEAR,
+			    block + FSL_QDMA_BCQIDR(0));
+	}
+
+	return 0;
+}
+
+static int
+fsl_qdma_queue_transfer_complete(struct fsl_qdma_engine *fsl_qdma,
+				 void *block,
+				 int id)
+{
+	bool duplicate;
+	u32 reg, i, count;
+	struct fsl_qdma_queue *temp_queue;
+	struct fsl_qdma_format *status_addr;
+	struct fsl_qdma_comp *fsl_comp = NULL;
+	struct fsl_qdma_queue *fsl_queue = fsl_qdma->queue;
+	struct fsl_qdma_queue *fsl_status = fsl_qdma->status[id];
+
+	count = FSL_QDMA_MAX_SIZE;
+
+	while (count--) {
+		duplicate = 0;
+		reg = qdma_readl(fsl_qdma, block + FSL_QDMA_BSQSR);
+		if (reg & FSL_QDMA_BSQSR_QE)
+			return 0;
+
+		status_addr = fsl_status->virt_head;
+
+		if (qdma_ccdf_get_queue(status_addr) ==
+		   __this_cpu_read(pre.queue) &&
+			qdma_ccdf_addr_get64(status_addr) ==
+			__this_cpu_read(pre.addr))
+			duplicate = 1;
+		i = qdma_ccdf_get_queue(status_addr) +
+			id * fsl_qdma->n_queues;
+		__this_cpu_write(pre.addr, qdma_ccdf_addr_get64(status_addr));
+		__this_cpu_write(pre.queue, qdma_ccdf_get_queue(status_addr));
+		temp_queue = fsl_queue + i;
+
+		spin_lock(&temp_queue->queue_lock);
+		if (list_empty(&temp_queue->comp_used)) {
+			if (!duplicate) {
+				spin_unlock(&temp_queue->queue_lock);
+				return -EAGAIN;
+			}
+		} else {
+			fsl_comp = list_first_entry(&temp_queue->comp_used,
+						    struct fsl_qdma_comp, list);
+			if (fsl_comp->bus_addr + 16 !=
+				__this_cpu_read(pre.addr)) {
+				if (!duplicate) {
+					spin_unlock(&temp_queue->queue_lock);
+					return -EAGAIN;
+				}
+			}
+		}
+
+		if (duplicate) {
+			reg = qdma_readl(fsl_qdma, block + FSL_QDMA_BSQMR);
+			reg |= FSL_QDMA_BSQMR_DI;
+			qdma_desc_addr_set64(status_addr, 0x0);
+			fsl_status->virt_head++;
+			if (fsl_status->virt_head == fsl_status->cq
+						   + fsl_status->n_cq)
+				fsl_status->virt_head = fsl_status->cq;
+			qdma_writel(fsl_qdma, reg, block + FSL_QDMA_BSQMR);
+			spin_unlock(&temp_queue->queue_lock);
+			continue;
+		}
+		list_del(&fsl_comp->list);
+
+		reg = qdma_readl(fsl_qdma, block + FSL_QDMA_BSQMR);
+		reg |= FSL_QDMA_BSQMR_DI;
+		qdma_desc_addr_set64(status_addr, 0x0);
+		fsl_status->virt_head++;
+		if (fsl_status->virt_head == fsl_status->cq + fsl_status->n_cq)
+			fsl_status->virt_head = fsl_status->cq;
+		qdma_writel(fsl_qdma, reg, block + FSL_QDMA_BSQMR);
+		spin_unlock(&temp_queue->queue_lock);
+
+		spin_lock(&fsl_comp->qchan->vchan.lock);
+		vchan_cookie_complete(&fsl_comp->vdesc);
+		fsl_comp->qchan->status = DMA_COMPLETE;
+		spin_unlock(&fsl_comp->qchan->vchan.lock);
+	}
+
+	return 0;
+}
+
+static irqreturn_t fsl_qdma_error_handler(int irq, void *dev_id)
+{
+	unsigned int intr;
+	struct fsl_qdma_engine *fsl_qdma = dev_id;
+	void __iomem *status = fsl_qdma->status_base;
+
+	intr = qdma_readl(fsl_qdma, status + FSL_QDMA_DEDR);
+
+	if (intr) {
+		dev_err(fsl_qdma->dma_dev.dev, "DMA transaction error!\n");
+		return IRQ_NONE;
+	}
+
+	qdma_writel(fsl_qdma, FSL_QDMA_DEDR_CLEAR, status + FSL_QDMA_DEDR);
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t fsl_qdma_queue_handler(int irq, void *dev_id)
+{
+	int id;
+	unsigned int intr, reg;
+	struct fsl_qdma_engine *fsl_qdma = dev_id;
+	void __iomem *block, *ctrl = fsl_qdma->ctrl_base;
+
+	id = irq - fsl_qdma->irq_base;
+	if (id < 0 && id > fsl_qdma->block_number) {
+		dev_err(fsl_qdma->dma_dev.dev,
+			"irq %d is wrong irq_base is %d\n",
+			irq, fsl_qdma->irq_base);
+	}
+
+	block = fsl_qdma->block_base +
+		FSL_QDMA_BLOCK_BASE_OFFSET(fsl_qdma, id);
+
+	intr = qdma_readl(fsl_qdma, block + FSL_QDMA_BCQIDR(0));
+
+	if ((intr & FSL_QDMA_CQIDR_SQT) != 0)
+		intr = fsl_qdma_queue_transfer_complete(fsl_qdma, block, id);
+
+	if (intr != 0) {
+		reg = qdma_readl(fsl_qdma, ctrl + FSL_QDMA_DMR);
+		reg |= FSL_QDMA_DMR_DQD;
+		qdma_writel(fsl_qdma, reg, ctrl + FSL_QDMA_DMR);
+		qdma_writel(fsl_qdma, 0, block + FSL_QDMA_BCQIER(0));
+		dev_err(fsl_qdma->dma_dev.dev, "QDMA: status err!\n");
+	}
+
+	/* Clear all detected events and interrupts. */
+	qdma_writel(fsl_qdma, FSL_QDMA_BCQIDR_CLEAR,
+		    block + FSL_QDMA_BCQIDR(0));
+
+	return IRQ_HANDLED;
+}
+
+static int
+fsl_qdma_irq_init(struct platform_device *pdev,
+		  struct fsl_qdma_engine *fsl_qdma)
+{
+	int i;
+	int cpu;
+	int ret;
+	char irq_name[20];
+
+	fsl_qdma->error_irq =
+		platform_get_irq_byname(pdev, "qdma-error");
+	if (fsl_qdma->error_irq < 0) {
+		dev_err(&pdev->dev, "Can't get qdma controller irq.\n");
+		return fsl_qdma->error_irq;
+	}
+
+	ret = devm_request_irq(&pdev->dev, fsl_qdma->error_irq,
+			       fsl_qdma_error_handler, 0,
+			       "qDMA error", fsl_qdma);
+	if (ret) {
+		dev_err(&pdev->dev, "Can't register qDMA controller IRQ.\n");
+		return  ret;
+	}
+
+	for (i = 0; i < fsl_qdma->block_number; i++) {
+		sprintf(irq_name, "qdma-queue%d", i);
+		fsl_qdma->queue_irq[i] =
+				platform_get_irq_byname(pdev, irq_name);
+
+		if (fsl_qdma->queue_irq[i] < 0) {
+			dev_err(&pdev->dev,
+				"Can't get qdma queue %d irq.\n", i);
+			return fsl_qdma->queue_irq[i];
+		}
+
+		ret = devm_request_irq(&pdev->dev,
+				       fsl_qdma->queue_irq[i],
+				       fsl_qdma_queue_handler,
+				       0,
+				       "qDMA queue",
+				       fsl_qdma);
+		if (ret) {
+			dev_err(&pdev->dev,
+				"Can't register qDMA queue IRQ.\n");
+			return  ret;
+		}
+
+		cpu = i % num_online_cpus();
+		ret = irq_set_affinity_hint(fsl_qdma->queue_irq[i],
+					    get_cpu_mask(cpu));
+		if (ret) {
+			dev_err(&pdev->dev,
+				"Can't set cpu %d affinity to IRQ %d.\n",
+				cpu,
+				fsl_qdma->queue_irq[i]);
+			return  ret;
+		}
+	}
+
+	return 0;
+}
+
+static void fsl_qdma_irq_exit(struct platform_device *pdev,
+			      struct fsl_qdma_engine *fsl_qdma)
+{
+	int i;
+
+	devm_free_irq(&pdev->dev, fsl_qdma->error_irq, fsl_qdma);
+	for (i = 0; i < fsl_qdma->block_number; i++)
+		devm_free_irq(&pdev->dev, fsl_qdma->queue_irq[i], fsl_qdma);
+}
+
+static int fsl_qdma_reg_init(struct fsl_qdma_engine *fsl_qdma)
+{
+	u32 reg;
+	int i, j, ret;
+	struct fsl_qdma_queue *temp;
+	void __iomem *status = fsl_qdma->status_base;
+	void __iomem *block, *ctrl = fsl_qdma->ctrl_base;
+	struct fsl_qdma_queue *fsl_queue = fsl_qdma->queue;
+
+	/* Try to halt the qDMA engine first. */
+	ret = fsl_qdma_halt(fsl_qdma);
+	if (ret) {
+		dev_err(fsl_qdma->dma_dev.dev, "DMA halt failed!");
+		return ret;
+	}
+
+	for (i = 0; i < fsl_qdma->block_number; i++) {
+		/*
+		 * Clear the command queue interrupt detect register for
+		 * all queues.
+		 */
+
+		block = fsl_qdma->block_base +
+			FSL_QDMA_BLOCK_BASE_OFFSET(fsl_qdma, i);
+		qdma_writel(fsl_qdma, FSL_QDMA_BCQIDR_CLEAR,
+			    block + FSL_QDMA_BCQIDR(0));
+	}
+
+	for (j = 0; j < fsl_qdma->block_number; j++) {
+		block = fsl_qdma->block_base +
+			FSL_QDMA_BLOCK_BASE_OFFSET(fsl_qdma, j);
+		for (i = 0; i < fsl_qdma->n_queues; i++) {
+			temp = fsl_queue + i + (j * fsl_qdma->n_queues);
+			/*
+			 * Initialize Command Queue registers to
+			 * point to the first
+			 * command descriptor in memory.
+			 * Dequeue Pointer Address Registers
+			 * Enqueue Pointer Address Registers
+			 */
+
+			qdma_writel(fsl_qdma, temp->bus_addr,
+				    block + FSL_QDMA_BCQDPA_SADDR(i));
+			qdma_writel(fsl_qdma, temp->bus_addr,
+				    block + FSL_QDMA_BCQEPA_SADDR(i));
+
+			/* Initialize the queue mode. */
+			reg = FSL_QDMA_BCQMR_EN;
+			reg |= FSL_QDMA_BCQMR_CD_THLD(ilog2(temp->n_cq) - 4);
+			reg |= FSL_QDMA_BCQMR_CQ_SIZE(ilog2(temp->n_cq) - 6);
+			qdma_writel(fsl_qdma, reg, block + FSL_QDMA_BCQMR(i));
+		}
+
+		/*
+		 * Workaround for erratum: ERR010812.
+		 * We must enable XOFF to avoid the enqueue rejection occurs.
+		 * Setting SQCCMR ENTER_WM to 0x20.
+		 */
+
+		qdma_writel(fsl_qdma, FSL_QDMA_SQCCMR_ENTER_WM,
+			    block + FSL_QDMA_SQCCMR);
+
+		/*
+		 * Initialize status queue registers to point to the first
+		 * command descriptor in memory.
+		 * Dequeue Pointer Address Registers
+		 * Enqueue Pointer Address Registers
+		 */
+
+		qdma_writel(fsl_qdma, fsl_qdma->status[j]->bus_addr,
+			    block + FSL_QDMA_SQEPAR);
+		qdma_writel(fsl_qdma, fsl_qdma->status[j]->bus_addr,
+			    block + FSL_QDMA_SQDPAR);
+		/* Initialize status queue interrupt. */
+		qdma_writel(fsl_qdma, FSL_QDMA_BCQIER_CQTIE,
+			    block + FSL_QDMA_BCQIER(0));
+		qdma_writel(fsl_qdma, FSL_QDMA_BSQICR_ICEN |
+				   FSL_QDMA_BSQICR_ICST(5) | 0x8000,
+				   block + FSL_QDMA_BSQICR);
+		qdma_writel(fsl_qdma, FSL_QDMA_CQIER_MEIE |
+				   FSL_QDMA_CQIER_TEIE,
+				   block + FSL_QDMA_CQIER);
+
+		/* Initialize the status queue mode. */
+		reg = FSL_QDMA_BSQMR_EN;
+		reg |= FSL_QDMA_BSQMR_CQ_SIZE(ilog2
+			(fsl_qdma->status[j]->n_cq) - 6);
+
+		qdma_writel(fsl_qdma, reg, block + FSL_QDMA_BSQMR);
+		reg = qdma_readl(fsl_qdma, block + FSL_QDMA_BSQMR);
+	}
+
+	/* Initialize controller interrupt register. */
+	qdma_writel(fsl_qdma, FSL_QDMA_DEDR_CLEAR, status + FSL_QDMA_DEDR);
+	qdma_writel(fsl_qdma, FSL_QDMA_DEIER_CLEAR, status + FSL_QDMA_DEIER);
+
+	reg = qdma_readl(fsl_qdma, ctrl + FSL_QDMA_DMR);
+	reg &= ~FSL_QDMA_DMR_DQD;
+	qdma_writel(fsl_qdma, reg, ctrl + FSL_QDMA_DMR);
+
+	return 0;
+}
+
+static struct dma_async_tx_descriptor *
+fsl_qdma_prep_memcpy(struct dma_chan *chan, dma_addr_t dst,
+		     dma_addr_t src, size_t len, unsigned long flags)
+{
+	struct fsl_qdma_comp *fsl_comp;
+	struct fsl_qdma_chan *fsl_chan = to_fsl_qdma_chan(chan);
+
+	fsl_comp = fsl_qdma_request_enqueue_desc(fsl_chan);
+
+	if (!fsl_comp)
+		return NULL;
+
+	fsl_qdma_comp_fill_memcpy(fsl_comp, dst, src, len);
+
+	return vchan_tx_prep(&fsl_chan->vchan, &fsl_comp->vdesc, flags);
+}
+
+static void fsl_qdma_enqueue_desc(struct fsl_qdma_chan *fsl_chan)
+{
+	u32 reg;
+	struct virt_dma_desc *vdesc;
+	struct fsl_qdma_comp *fsl_comp;
+	struct fsl_qdma_queue *fsl_queue = fsl_chan->queue;
+	void __iomem *block = fsl_queue->block_base;
+
+	reg = qdma_readl(fsl_chan->qdma, block + FSL_QDMA_BCQSR(fsl_queue->id));
+	if (reg & (FSL_QDMA_BCQSR_QF | FSL_QDMA_BCQSR_XOFF))
+		return;
+	vdesc = vchan_next_desc(&fsl_chan->vchan);
+	if (!vdesc)
+		return;
+	list_del(&vdesc->node);
+	fsl_comp = to_fsl_qdma_comp(vdesc);
+
+	memcpy(fsl_queue->virt_head++,
+	       fsl_comp->virt_addr, sizeof(struct fsl_qdma_format));
+	if (fsl_queue->virt_head == fsl_queue->cq + fsl_queue->n_cq)
+		fsl_queue->virt_head = fsl_queue->cq;
+
+	list_add_tail(&fsl_comp->list, &fsl_queue->comp_used);
+	barrier();
+	reg = qdma_readl(fsl_chan->qdma, block + FSL_QDMA_BCQMR(fsl_queue->id));
+	reg |= FSL_QDMA_BCQMR_EI;
+	qdma_writel(fsl_chan->qdma, reg, block + FSL_QDMA_BCQMR(fsl_queue->id));
+	fsl_chan->status = DMA_IN_PROGRESS;
+}
+
+static void fsl_qdma_free_desc(struct virt_dma_desc *vdesc)
+{
+	unsigned long flags;
+	struct fsl_qdma_comp *fsl_comp;
+	struct fsl_qdma_queue *fsl_queue;
+
+	fsl_comp = to_fsl_qdma_comp(vdesc);
+	fsl_queue = fsl_comp->qchan->queue;
+
+	spin_lock_irqsave(&fsl_queue->queue_lock, flags);
+	list_add_tail(&fsl_comp->list, &fsl_queue->comp_free);
+	spin_unlock_irqrestore(&fsl_queue->queue_lock, flags);
+}
+
+static void fsl_qdma_issue_pending(struct dma_chan *chan)
+{
+	unsigned long flags;
+	struct fsl_qdma_chan *fsl_chan = to_fsl_qdma_chan(chan);
+	struct fsl_qdma_queue *fsl_queue = fsl_chan->queue;
+
+	spin_lock_irqsave(&fsl_queue->queue_lock, flags);
+	spin_lock(&fsl_chan->vchan.lock);
+	if (vchan_issue_pending(&fsl_chan->vchan))
+		fsl_qdma_enqueue_desc(fsl_chan);
+	spin_unlock(&fsl_chan->vchan.lock);
+	spin_unlock_irqrestore(&fsl_queue->queue_lock, flags);
+}
+
+static void fsl_qdma_synchronize(struct dma_chan *chan)
+{
+	struct fsl_qdma_chan *fsl_chan = to_fsl_qdma_chan(chan);
+
+	vchan_synchronize(&fsl_chan->vchan);
+}
+
+static int fsl_qdma_terminate_all(struct dma_chan *chan)
+{
+	LIST_HEAD(head);
+	unsigned long flags;
+	struct fsl_qdma_chan *fsl_chan = to_fsl_qdma_chan(chan);
+
+	spin_lock_irqsave(&fsl_chan->vchan.lock, flags);
+	vchan_get_all_descriptors(&fsl_chan->vchan, &head);
+	spin_unlock_irqrestore(&fsl_chan->vchan.lock, flags);
+	vchan_dma_desc_free_list(&fsl_chan->vchan, &head);
+	return 0;
+}
+
+static int fsl_qdma_alloc_chan_resources(struct dma_chan *chan)
+{
+	int ret;
+	struct fsl_qdma_chan *fsl_chan = to_fsl_qdma_chan(chan);
+	struct fsl_qdma_engine *fsl_qdma = fsl_chan->qdma;
+	struct fsl_qdma_queue *fsl_queue = fsl_chan->queue;
+
+	if (fsl_queue->comp_pool && fsl_queue->desc_pool)
+		return fsl_qdma->desc_allocated;
+
+	INIT_LIST_HEAD(&fsl_queue->comp_free);
+
+	/*
+	 * The dma pool for queue command buffer
+	 */
+	fsl_queue->comp_pool =
+	dma_pool_create("comp_pool",
+			chan->device->dev,
+			FSL_QDMA_COMMAND_BUFFER_SIZE,
+			64, 0);
+	if (!fsl_queue->comp_pool)
+		return -ENOMEM;
+
+	/*
+	 * The dma pool for Descriptor(SD/DD) buffer
+	 */
+	fsl_queue->desc_pool =
+	dma_pool_create("desc_pool",
+			chan->device->dev,
+			FSL_QDMA_DESCRIPTOR_BUFFER_SIZE,
+			32, 0);
+	if (!fsl_queue->desc_pool)
+		goto err_desc_pool;
+
+	ret = fsl_qdma_pre_request_enqueue_desc(fsl_queue);
+	if (ret) {
+		dev_err(chan->device->dev,
+			"failed to alloc dma buffer for S/G descriptor\n");
+		goto err_mem;
+	}
+
+	fsl_qdma->desc_allocated++;
+	return fsl_qdma->desc_allocated;
+
+err_mem:
+	dma_pool_destroy(fsl_queue->desc_pool);
+err_desc_pool:
+	dma_pool_destroy(fsl_queue->comp_pool);
+	return -ENOMEM;
+}
+
+static int fsl_qdma_probe(struct platform_device *pdev)
+{
+	int ret, i;
+	int blk_num, blk_off;
+	u32 len, chans, queues;
+	struct resource *res;
+	struct fsl_qdma_chan *fsl_chan;
+	struct fsl_qdma_engine *fsl_qdma;
+	struct device_node *np = pdev->dev.of_node;
+
+	ret = of_property_read_u32(np, "dma-channels", &chans);
+	if (ret) {
+		dev_err(&pdev->dev, "Can't get dma-channels.\n");
+		return ret;
+	}
+
+	ret = of_property_read_u32(np, "block-offset", &blk_off);
+	if (ret) {
+		dev_err(&pdev->dev, "Can't get block-offset.\n");
+		return ret;
+	}
+
+	ret = of_property_read_u32(np, "block-number", &blk_num);
+	if (ret) {
+		dev_err(&pdev->dev, "Can't get block-number.\n");
+		return ret;
+	}
+
+	blk_num = min_t(int, blk_num, num_online_cpus());
+
+	len = sizeof(*fsl_qdma);
+	fsl_qdma = devm_kzalloc(&pdev->dev, len, GFP_KERNEL);
+	if (!fsl_qdma)
+		return -ENOMEM;
+
+	len = sizeof(*fsl_chan) * chans;
+	fsl_qdma->chans = devm_kzalloc(&pdev->dev, len, GFP_KERNEL);
+	if (!fsl_qdma->chans)
+		return -ENOMEM;
+
+	len = sizeof(struct fsl_qdma_queue *) * blk_num;
+	fsl_qdma->status = devm_kzalloc(&pdev->dev, len, GFP_KERNEL);
+	if (!fsl_qdma->status)
+		return -ENOMEM;
+
+	len = sizeof(int) * blk_num;
+	fsl_qdma->queue_irq = devm_kzalloc(&pdev->dev, len, GFP_KERNEL);
+	if (!fsl_qdma->queue_irq)
+		return -ENOMEM;
+
+	ret = of_property_read_u32(np, "fsl,dma-queues", &queues);
+	if (ret) {
+		dev_err(&pdev->dev, "Can't get queues.\n");
+		return ret;
+	}
+
+	fsl_qdma->desc_allocated = 0;
+	fsl_qdma->n_chans = chans;
+	fsl_qdma->n_queues = queues;
+	fsl_qdma->block_number = blk_num;
+	fsl_qdma->block_offset = blk_off;
+
+	mutex_init(&fsl_qdma->fsl_qdma_mutex);
+
+	for (i = 0; i < fsl_qdma->block_number; i++) {
+		fsl_qdma->status[i] = fsl_qdma_prep_status_queue(pdev);
+		if (!fsl_qdma->status[i])
+			return -ENOMEM;
+	}
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	fsl_qdma->ctrl_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(fsl_qdma->ctrl_base))
+		return PTR_ERR(fsl_qdma->ctrl_base);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	fsl_qdma->status_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(fsl_qdma->status_base))
+		return PTR_ERR(fsl_qdma->status_base);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 2);
+	fsl_qdma->block_base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(fsl_qdma->block_base))
+		return PTR_ERR(fsl_qdma->block_base);
+	fsl_qdma->queue = fsl_qdma_alloc_queue_resources(pdev, fsl_qdma);
+	if (!fsl_qdma->queue)
+		return -ENOMEM;
+
+	ret = fsl_qdma_irq_init(pdev, fsl_qdma);
+	if (ret)
+		return ret;
+
+	fsl_qdma->irq_base = platform_get_irq_byname(pdev, "qdma-queue0");
+	fsl_qdma->feature = of_property_read_bool(np, "big-endian");
+	INIT_LIST_HEAD(&fsl_qdma->dma_dev.channels);
+
+	for (i = 0; i < fsl_qdma->n_chans; i++) {
+		struct fsl_qdma_chan *fsl_chan = &fsl_qdma->chans[i];
+
+		fsl_chan->qdma = fsl_qdma;
+		fsl_chan->queue = fsl_qdma->queue + i % (fsl_qdma->n_queues *
+							fsl_qdma->block_number);
+		fsl_chan->vchan.desc_free = fsl_qdma_free_desc;
+		vchan_init(&fsl_chan->vchan, &fsl_qdma->dma_dev);
+	}
+
+	dma_cap_set(DMA_MEMCPY, fsl_qdma->dma_dev.cap_mask);
+
+	fsl_qdma->dma_dev.dev = &pdev->dev;
+	fsl_qdma->dma_dev.device_free_chan_resources =
+		fsl_qdma_free_chan_resources;
+	fsl_qdma->dma_dev.device_alloc_chan_resources =
+		fsl_qdma_alloc_chan_resources;
+	fsl_qdma->dma_dev.device_tx_status = dma_cookie_status;
+	fsl_qdma->dma_dev.device_prep_dma_memcpy = fsl_qdma_prep_memcpy;
+	fsl_qdma->dma_dev.device_issue_pending = fsl_qdma_issue_pending;
+	fsl_qdma->dma_dev.device_synchronize = fsl_qdma_synchronize;
+	fsl_qdma->dma_dev.device_terminate_all = fsl_qdma_terminate_all;
+
+	dma_set_mask(&pdev->dev, DMA_BIT_MASK(40));
+
+	platform_set_drvdata(pdev, fsl_qdma);
+
+	ret = dma_async_device_register(&fsl_qdma->dma_dev);
+	if (ret) {
+		dev_err(&pdev->dev,
+			"Can't register NXP Layerscape qDMA engine.\n");
+		return ret;
+	}
+
+	ret = fsl_qdma_reg_init(fsl_qdma);
+	if (ret) {
+		dev_err(&pdev->dev, "Can't Initialize the qDMA engine.\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static void fsl_qdma_cleanup_vchan(struct dma_device *dmadev)
+{
+	struct fsl_qdma_chan *chan, *_chan;
+
+	list_for_each_entry_safe(chan, _chan,
+				 &dmadev->channels, vchan.chan.device_node) {
+		list_del(&chan->vchan.chan.device_node);
+		tasklet_kill(&chan->vchan.task);
+	}
+}
+
+static int fsl_qdma_remove(struct platform_device *pdev)
+{
+	int i;
+	struct fsl_qdma_queue *status;
+	struct device_node *np = pdev->dev.of_node;
+	struct fsl_qdma_engine *fsl_qdma = platform_get_drvdata(pdev);
+
+	fsl_qdma_irq_exit(pdev, fsl_qdma);
+	fsl_qdma_cleanup_vchan(&fsl_qdma->dma_dev);
+	of_dma_controller_free(np);
+	dma_async_device_unregister(&fsl_qdma->dma_dev);
+
+	for (i = 0; i < fsl_qdma->block_number; i++) {
+		status = fsl_qdma->status[i];
+		dma_free_coherent(&pdev->dev, sizeof(struct fsl_qdma_format) *
+				status->n_cq, status->cq, status->bus_addr);
+	}
+	return 0;
+}
+
+static const struct of_device_id fsl_qdma_dt_ids[] = {
+	{ .compatible = "fsl,ls1021a-qdma", },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, fsl_qdma_dt_ids);
+
+static struct platform_driver fsl_qdma_driver = {
+	.driver		= {
+		.name	= "fsl-qdma",
+		.of_match_table = fsl_qdma_dt_ids,
+	},
+	.probe          = fsl_qdma_probe,
+	.remove		= fsl_qdma_remove,
+};
+
+module_platform_driver(fsl_qdma_driver);
+
+MODULE_ALIAS("platform:fsl-qdma");
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("NXP Layerscape qDMA engine driver");
diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index 9d360a3..1e38e6b 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -53,42 +53,42 @@ static const char msg_ld_oom[] = "No free memory for link descriptor";
 
 static void set_sr(struct fsldma_chan *chan, u32 val)
 {
-	DMA_OUT(chan, &chan->regs->sr, val, 32);
+	FSL_DMA_OUT(chan, &chan->regs->sr, val, 32);
 }
 
 static u32 get_sr(struct fsldma_chan *chan)
 {
-	return DMA_IN(chan, &chan->regs->sr, 32);
+	return FSL_DMA_IN(chan, &chan->regs->sr, 32);
 }
 
 static void set_mr(struct fsldma_chan *chan, u32 val)
 {
-	DMA_OUT(chan, &chan->regs->mr, val, 32);
+	FSL_DMA_OUT(chan, &chan->regs->mr, val, 32);
 }
 
 static u32 get_mr(struct fsldma_chan *chan)
 {
-	return DMA_IN(chan, &chan->regs->mr, 32);
+	return FSL_DMA_IN(chan, &chan->regs->mr, 32);
 }
 
 static void set_cdar(struct fsldma_chan *chan, dma_addr_t addr)
 {
-	DMA_OUT(chan, &chan->regs->cdar, addr | FSL_DMA_SNEN, 64);
+	FSL_DMA_OUT(chan, &chan->regs->cdar, addr | FSL_DMA_SNEN, 64);
 }
 
 static dma_addr_t get_cdar(struct fsldma_chan *chan)
 {
-	return DMA_IN(chan, &chan->regs->cdar, 64) & ~FSL_DMA_SNEN;
+	return FSL_DMA_IN(chan, &chan->regs->cdar, 64) & ~FSL_DMA_SNEN;
 }
 
 static void set_bcr(struct fsldma_chan *chan, u32 val)
 {
-	DMA_OUT(chan, &chan->regs->bcr, val, 32);
+	FSL_DMA_OUT(chan, &chan->regs->bcr, val, 32);
 }
 
 static u32 get_bcr(struct fsldma_chan *chan)
 {
-	return DMA_IN(chan, &chan->regs->bcr, 32);
+	return FSL_DMA_IN(chan, &chan->regs->bcr, 32);
 }
 
 /*
diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h
index 4787d48..a9b12f82 100644
--- a/drivers/dma/fsldma.h
+++ b/drivers/dma/fsldma.h
@@ -196,39 +196,67 @@ struct fsldma_chan {
 #define to_fsl_desc(lh) container_of(lh, struct fsl_desc_sw, node)
 #define tx_to_fsl_desc(tx) container_of(tx, struct fsl_desc_sw, async_tx)
 
-#ifndef __powerpc64__
-static u64 in_be64(const u64 __iomem *addr)
+#ifdef	CONFIG_PPC
+#define fsl_ioread32(p)		in_le32(p)
+#define fsl_ioread32be(p)	in_be32(p)
+#define fsl_iowrite32(v, p)	out_le32(p, v)
+#define fsl_iowrite32be(v, p)	out_be32(p, v)
+
+#ifdef __powerpc64__
+#define fsl_ioread64(p)		in_le64(p)
+#define fsl_ioread64be(p)	in_be64(p)
+#define fsl_iowrite64(v, p)	out_le64(p, v)
+#define fsl_iowrite64be(v, p)	out_be64(p, v)
+#else
+static u64 fsl_ioread64(const u64 __iomem *addr)
 {
-	return ((u64)in_be32((u32 __iomem *)addr) << 32) |
-		(in_be32((u32 __iomem *)addr + 1));
+	u32 fsl_addr = lower_32_bits(addr);
+	u64 fsl_addr_hi = (u64)in_le32((u32 *)(fsl_addr + 1)) << 32;
+
+	return fsl_addr_hi | in_le32((u32 *)fsl_addr);
 }
 
-static void out_be64(u64 __iomem *addr, u64 val)
-{
-	out_be32((u32 __iomem *)addr, val >> 32);
-	out_be32((u32 __iomem *)addr + 1, (u32)val);
-}
-
-/* There is no asm instructions for 64 bits reverse loads and stores */
-static u64 in_le64(const u64 __iomem *addr)
-{
-	return ((u64)in_le32((u32 __iomem *)addr + 1) << 32) |
-		(in_le32((u32 __iomem *)addr));
-}
-
-static void out_le64(u64 __iomem *addr, u64 val)
+static void fsl_iowrite64(u64 val, u64 __iomem *addr)
 {
 	out_le32((u32 __iomem *)addr + 1, val >> 32);
 	out_le32((u32 __iomem *)addr, (u32)val);
 }
+
+static u64 fsl_ioread64be(const u64 __iomem *addr)
+{
+	u32 fsl_addr = lower_32_bits(addr);
+	u64 fsl_addr_hi = (u64)in_be32((u32 *)fsl_addr) << 32;
+
+	return fsl_addr_hi | in_be32((u32 *)(fsl_addr + 1));
+}
+
+static void fsl_iowrite64be(u64 val, u64 __iomem *addr)
+{
+	out_be32((u32 __iomem *)addr, val >> 32);
+	out_be32((u32 __iomem *)addr + 1, (u32)val);
+}
+#endif
 #endif
 
-#define DMA_IN(fsl_chan, addr, width)					\
-		(((fsl_chan)->feature & FSL_DMA_BIG_ENDIAN) ?		\
-			in_be##width(addr) : in_le##width(addr))
-#define DMA_OUT(fsl_chan, addr, val, width)				\
-		(((fsl_chan)->feature & FSL_DMA_BIG_ENDIAN) ?		\
-			out_be##width(addr, val) : out_le##width(addr, val))
+#if defined(CONFIG_ARM64) || defined(CONFIG_ARM)
+#define fsl_ioread32(p)		ioread32(p)
+#define fsl_ioread32be(p)	ioread32be(p)
+#define fsl_iowrite32(v, p)	iowrite32(v, p)
+#define fsl_iowrite32be(v, p)	iowrite32be(v, p)
+#define fsl_ioread64(p)		ioread64(p)
+#define fsl_ioread64be(p)	ioread64be(p)
+#define fsl_iowrite64(v, p)	iowrite64(v, p)
+#define fsl_iowrite64be(v, p)	iowrite64be(v, p)
+#endif
+
+#define FSL_DMA_IN(fsl_dma, addr, width)			\
+		(((fsl_dma)->feature & FSL_DMA_BIG_ENDIAN) ?	\
+			fsl_ioread##width##be(addr) : fsl_ioread##width(addr))
+
+#define FSL_DMA_OUT(fsl_dma, addr, val, width)			\
+		(((fsl_dma)->feature & FSL_DMA_BIG_ENDIAN) ?	\
+			fsl_iowrite##width##be(val, addr) : fsl_iowrite	\
+		##width(val, addr))
 
 #define DMA_TO_CPU(fsl_chan, d, width)					\
 		(((fsl_chan)->feature & FSL_DMA_BIG_ENDIAN) ?		\
diff --git a/drivers/dma/imx-dma.c b/drivers/dma/imx-dma.c
index 4a09af3..00a089e 100644
--- a/drivers/dma/imx-dma.c
+++ b/drivers/dma/imx-dma.c
@@ -278,14 +278,14 @@ static int imxdma_hw_chain(struct imxdma_channel *imxdmac)
 /*
  * imxdma_sg_next - prepare next chunk for scatter-gather DMA emulation
  */
-static inline int imxdma_sg_next(struct imxdma_desc *d)
+static inline void imxdma_sg_next(struct imxdma_desc *d)
 {
 	struct imxdma_channel *imxdmac = to_imxdma_chan(d->desc.chan);
 	struct imxdma_engine *imxdma = imxdmac->imxdma;
 	struct scatterlist *sg = d->sg;
-	unsigned long now;
+	size_t now;
 
-	now = min(d->len, sg_dma_len(sg));
+	now = min_t(size_t, d->len, sg_dma_len(sg));
 	if (d->len != IMX_DMA_LENGTH_LOOP)
 		d->len -= now;
 
@@ -303,8 +303,6 @@ static inline int imxdma_sg_next(struct imxdma_desc *d)
 		 imx_dmav1_readl(imxdma, DMA_DAR(imxdmac->channel)),
 		 imx_dmav1_readl(imxdma, DMA_SAR(imxdmac->channel)),
 		 imx_dmav1_readl(imxdma, DMA_CNTR(imxdmac->channel)));
-
-	return now;
 }
 
 static void imxdma_enable_hw(struct imxdma_desc *d)
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index 86708fb..5f3c137 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -377,6 +377,7 @@ struct sdma_channel {
 	unsigned long			watermark_level;
 	u32				shp_addr, per_addr;
 	enum dma_status			status;
+	bool				context_loaded;
 	struct imx_dma_data		data;
 	struct work_struct		terminate_worker;
 };
@@ -440,6 +441,8 @@ struct sdma_engine {
 	unsigned int			irq;
 	dma_addr_t			bd0_phys;
 	struct sdma_buffer_descriptor	*bd0;
+	/* clock ratio for AHB:SDMA core. 1:1 is 1, 2:1 is 0*/
+	bool				clk_ratio;
 };
 
 static int sdma_config_write(struct dma_chan *chan,
@@ -662,8 +665,11 @@ static int sdma_run_channel0(struct sdma_engine *sdma)
 		dev_err(sdma->dev, "Timeout waiting for CH0 ready\n");
 
 	/* Set bits of CONFIG register with dynamic context switching */
-	if (readl(sdma->regs + SDMA_H_CONFIG) == 0)
-		writel_relaxed(SDMA_H_CONFIG_CSM, sdma->regs + SDMA_H_CONFIG);
+	reg = readl(sdma->regs + SDMA_H_CONFIG);
+	if ((reg & SDMA_H_CONFIG_CSM) == 0) {
+		reg |= SDMA_H_CONFIG_CSM;
+		writel_relaxed(reg, sdma->regs + SDMA_H_CONFIG);
+	}
 
 	return ret;
 }
@@ -677,7 +683,7 @@ static int sdma_load_script(struct sdma_engine *sdma, void *buf, int size,
 	int ret;
 	unsigned long flags;
 
-	buf_virt = dma_alloc_coherent(NULL, size, &buf_phys, GFP_KERNEL);
+	buf_virt = dma_alloc_coherent(sdma->dev, size, &buf_phys, GFP_KERNEL);
 	if (!buf_virt) {
 		return -ENOMEM;
 	}
@@ -696,7 +702,7 @@ static int sdma_load_script(struct sdma_engine *sdma, void *buf, int size,
 
 	spin_unlock_irqrestore(&sdma->channel_0_lock, flags);
 
-	dma_free_coherent(NULL, size, buf_virt, buf_phys);
+	dma_free_coherent(sdma->dev, size, buf_virt, buf_phys);
 
 	return ret;
 }
@@ -970,6 +976,9 @@ static int sdma_load_context(struct sdma_channel *sdmac)
 	int ret;
 	unsigned long flags;
 
+	if (sdmac->context_loaded)
+		return 0;
+
 	if (sdmac->direction == DMA_DEV_TO_MEM)
 		load_address = sdmac->pc_from_device;
 	else if (sdmac->direction == DMA_DEV_TO_DEV)
@@ -1012,6 +1021,8 @@ static int sdma_load_context(struct sdma_channel *sdmac)
 
 	spin_unlock_irqrestore(&sdma->channel_0_lock, flags);
 
+	sdmac->context_loaded = true;
+
 	return ret;
 }
 
@@ -1051,6 +1062,7 @@ static void sdma_channel_terminate_work(struct work_struct *work)
 	sdmac->desc = NULL;
 	spin_unlock_irqrestore(&sdmac->vc.lock, flags);
 	vchan_dma_desc_free_list(&sdmac->vc, &head);
+	sdmac->context_loaded = false;
 }
 
 static int sdma_disable_channel_async(struct dma_chan *chan)
@@ -1182,8 +1194,8 @@ static int sdma_request_channel0(struct sdma_engine *sdma)
 {
 	int ret = -EBUSY;
 
-	sdma->bd0 = dma_alloc_coherent(NULL, PAGE_SIZE, &sdma->bd0_phys,
-				       GFP_NOWAIT);
+	sdma->bd0 = dma_alloc_coherent(sdma->dev, PAGE_SIZE, &sdma->bd0_phys,
+					GFP_NOWAIT);
 	if (!sdma->bd0) {
 		ret = -ENOMEM;
 		goto out;
@@ -1205,8 +1217,8 @@ static int sdma_alloc_bd(struct sdma_desc *desc)
 	u32 bd_size = desc->num_bd * sizeof(struct sdma_buffer_descriptor);
 	int ret = 0;
 
-	desc->bd = dma_alloc_coherent(NULL, bd_size, &desc->bd_phys,
-				      GFP_NOWAIT);
+	desc->bd = dma_alloc_coherent(desc->sdmac->sdma->dev, bd_size,
+				       &desc->bd_phys, GFP_NOWAIT);
 	if (!desc->bd) {
 		ret = -ENOMEM;
 		goto out;
@@ -1219,7 +1231,8 @@ static void sdma_free_bd(struct sdma_desc *desc)
 {
 	u32 bd_size = desc->num_bd * sizeof(struct sdma_buffer_descriptor);
 
-	dma_free_coherent(NULL, bd_size, desc->bd, desc->bd_phys);
+	dma_free_coherent(desc->sdmac->sdma->dev, bd_size, desc->bd,
+			  desc->bd_phys);
 }
 
 static void sdma_desc_free(struct virt_dma_desc *vd)
@@ -1839,10 +1852,13 @@ static int sdma_init(struct sdma_engine *sdma)
 	if (ret)
 		goto disable_clk_ipg;
 
+	if (clk_get_rate(sdma->clk_ahb) == clk_get_rate(sdma->clk_ipg))
+		sdma->clk_ratio = 1;
+
 	/* Be sure SDMA has not started yet */
 	writel_relaxed(0, sdma->regs + SDMA_H_C0PTR);
 
-	sdma->channel_control = dma_alloc_coherent(NULL,
+	sdma->channel_control = dma_alloc_coherent(sdma->dev,
 			MAX_DMA_CHANNELS * sizeof (struct sdma_channel_control) +
 			sizeof(struct sdma_context_data),
 			&ccb_phys, GFP_KERNEL);
@@ -1879,8 +1895,10 @@ static int sdma_init(struct sdma_engine *sdma)
 	writel_relaxed(0x4050, sdma->regs + SDMA_CHN0ADDR);
 
 	/* Set bits of CONFIG register but with static context switching */
-	/* FIXME: Check whether to set ACR bit depending on clock ratios */
-	writel_relaxed(0, sdma->regs + SDMA_H_CONFIG);
+	if (sdma->clk_ratio)
+		writel_relaxed(SDMA_H_CONFIG_ACR, sdma->regs + SDMA_H_CONFIG);
+	else
+		writel_relaxed(0, sdma->regs + SDMA_H_CONFIG);
 
 	writel_relaxed(ccb_phys, sdma->regs + SDMA_H_C0PTR);
 
@@ -1903,11 +1921,16 @@ static int sdma_init(struct sdma_engine *sdma)
 static bool sdma_filter_fn(struct dma_chan *chan, void *fn_param)
 {
 	struct sdma_channel *sdmac = to_sdma_chan(chan);
+	struct sdma_engine *sdma = sdmac->sdma;
 	struct imx_dma_data *data = fn_param;
 
 	if (!imx_dma_is_general_purpose(chan))
 		return false;
 
+	/* return false if it's not the right device */
+	if (sdma->dev->of_node != data->of_node)
+		return false;
+
 	sdmac->data = *data;
 	chan->private = &sdmac->data;
 
@@ -1935,6 +1958,7 @@ static struct dma_chan *sdma_xlate(struct of_phandle_args *dma_spec,
 	 * be set to sdmac->event_id1.
 	 */
 	data.dma_request2 = 0;
+	data.of_node = ofdma->of_node;
 
 	return dma_request_channel(mask, sdma_filter_fn, &data);
 }
@@ -2097,6 +2121,7 @@ static int sdma_probe(struct platform_device *pdev)
 	sdma->dma_device.device_prep_dma_memcpy = sdma_prep_memcpy;
 	sdma->dma_device.device_issue_pending = sdma_issue_pending;
 	sdma->dma_device.dev->dma_parms = &sdma->dma_parms;
+	sdma->dma_device.copy_align = 2;
 	dma_set_max_seg_size(sdma->dma_device.dev, SDMA_BD_MAX_CNT);
 
 	platform_set_drvdata(pdev, sdma);
diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c
index 23fb2fa..f373a139 100644
--- a/drivers/dma/ioat/dma.c
+++ b/drivers/dma/ioat/dma.c
@@ -372,6 +372,7 @@ struct ioat_ring_ent **
 ioat_alloc_ring(struct dma_chan *c, int order, gfp_t flags)
 {
 	struct ioatdma_chan *ioat_chan = to_ioat_chan(c);
+	struct ioatdma_device *ioat_dma = ioat_chan->ioat_dma;
 	struct ioat_ring_ent **ring;
 	int total_descs = 1 << order;
 	int i, chunks;
@@ -437,6 +438,17 @@ ioat_alloc_ring(struct dma_chan *c, int order, gfp_t flags)
 	}
 	ring[i]->hw->next = ring[0]->txd.phys;
 
+	/* setup descriptor pre-fetching for v3.4 */
+	if (ioat_dma->cap & IOAT_CAP_DPS) {
+		u16 drsctl = IOAT_CHAN_DRSZ_2MB | IOAT_CHAN_DRS_EN;
+
+		if (chunks == 1)
+			drsctl |= IOAT_CHAN_DRS_AUTOWRAP;
+
+		writew(drsctl, ioat_chan->reg_base + IOAT_CHAN_DRSCTL_OFFSET);
+
+	}
+
 	return ring;
 }
 
diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h
index 1ab42ec..aaafd0e 100644
--- a/drivers/dma/ioat/dma.h
+++ b/drivers/dma/ioat/dma.h
@@ -27,7 +27,7 @@
 #include "registers.h"
 #include "hw.h"
 
-#define IOAT_DMA_VERSION  "4.00"
+#define IOAT_DMA_VERSION  "5.00"
 
 #define IOAT_DMA_DCA_ANY_CPU		~0
 
diff --git a/drivers/dma/ioat/hw.h b/drivers/dma/ioat/hw.h
index abcc51b..781c94d 100644
--- a/drivers/dma/ioat/hw.h
+++ b/drivers/dma/ioat/hw.h
@@ -66,11 +66,14 @@
 
 #define PCI_DEVICE_ID_INTEL_IOAT_SKX	0x2021
 
+#define PCI_DEVICE_ID_INTEL_IOAT_ICX	0x0b00
+
 #define IOAT_VER_1_2            0x12    /* Version 1.2 */
 #define IOAT_VER_2_0            0x20    /* Version 2.0 */
 #define IOAT_VER_3_0            0x30    /* Version 3.0 */
 #define IOAT_VER_3_2            0x32    /* Version 3.2 */
 #define IOAT_VER_3_3            0x33    /* Version 3.3 */
+#define IOAT_VER_3_4		0x34	/* Version 3.4 */
 
 
 int system_has_dca_enabled(struct pci_dev *pdev);
diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c
index 2d810df..d41dc9a 100644
--- a/drivers/dma/ioat/init.c
+++ b/drivers/dma/ioat/init.c
@@ -119,6 +119,9 @@ static const struct pci_device_id ioat_pci_tbl[] = {
 	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE2) },
 	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_BDXDE3) },
 
+	/* I/OAT v3.4 platforms */
+	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IOAT_ICX) },
+
 	{ 0, }
 };
 MODULE_DEVICE_TABLE(pci, ioat_pci_tbl);
@@ -135,10 +138,10 @@ static int ioat3_dma_self_test(struct ioatdma_device *ioat_dma);
 static int ioat_dca_enabled = 1;
 module_param(ioat_dca_enabled, int, 0644);
 MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)");
-int ioat_pending_level = 4;
+int ioat_pending_level = 7;
 module_param(ioat_pending_level, int, 0644);
 MODULE_PARM_DESC(ioat_pending_level,
-		 "high-water mark for pushing ioat descriptors (default: 4)");
+		 "high-water mark for pushing ioat descriptors (default: 7)");
 static char ioat_interrupt_style[32] = "msix";
 module_param_string(ioat_interrupt_style, ioat_interrupt_style,
 		    sizeof(ioat_interrupt_style), 0644);
@@ -635,6 +638,11 @@ static void ioat_free_chan_resources(struct dma_chan *c)
 	ioat_stop(ioat_chan);
 	ioat_reset_hw(ioat_chan);
 
+	/* Put LTR to idle */
+	if (ioat_dma->version >= IOAT_VER_3_4)
+		writeb(IOAT_CHAN_LTR_SWSEL_IDLE,
+			ioat_chan->reg_base + IOAT_CHAN_LTR_SWSEL_OFFSET);
+
 	spin_lock_bh(&ioat_chan->cleanup_lock);
 	spin_lock_bh(&ioat_chan->prep_lock);
 	descs = ioat_ring_space(ioat_chan);
@@ -724,6 +732,28 @@ static int ioat_alloc_chan_resources(struct dma_chan *c)
 	spin_unlock_bh(&ioat_chan->prep_lock);
 	spin_unlock_bh(&ioat_chan->cleanup_lock);
 
+	/* Setting up LTR values for 3.4 or later */
+	if (ioat_chan->ioat_dma->version >= IOAT_VER_3_4) {
+		u32 lat_val;
+
+		lat_val = IOAT_CHAN_LTR_ACTIVE_SNVAL |
+			IOAT_CHAN_LTR_ACTIVE_SNLATSCALE |
+			IOAT_CHAN_LTR_ACTIVE_SNREQMNT;
+		writel(lat_val, ioat_chan->reg_base +
+				IOAT_CHAN_LTR_ACTIVE_OFFSET);
+
+		lat_val = IOAT_CHAN_LTR_IDLE_SNVAL |
+			  IOAT_CHAN_LTR_IDLE_SNLATSCALE |
+			  IOAT_CHAN_LTR_IDLE_SNREQMNT;
+		writel(lat_val, ioat_chan->reg_base +
+				IOAT_CHAN_LTR_IDLE_OFFSET);
+
+		/* Select to active */
+		writeb(IOAT_CHAN_LTR_SWSEL_ACTIVE,
+		       ioat_chan->reg_base +
+		       IOAT_CHAN_LTR_SWSEL_OFFSET);
+	}
+
 	ioat_start_null_desc(ioat_chan);
 
 	/* check that we got off the ground */
@@ -1185,6 +1215,10 @@ static int ioat3_dma_probe(struct ioatdma_device *ioat_dma, int dca)
 	if (err)
 		return err;
 
+	if (ioat_dma->cap & IOAT_CAP_DPS)
+		writeb(ioat_pending_level + 1,
+		       ioat_dma->reg_base + IOAT_PREFETCH_LIMIT_OFFSET);
+
 	return 0;
 }
 
@@ -1350,6 +1384,8 @@ static int ioat_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	pci_set_drvdata(pdev, device);
 
 	device->version = readb(device->reg_base + IOAT_VER_OFFSET);
+	if (device->version >= IOAT_VER_3_4)
+		ioat_dca_enabled = 0;
 	if (device->version >= IOAT_VER_3_0) {
 		if (is_skx_ioat(pdev))
 			device->version = IOAT_VER_3_2;
diff --git a/drivers/dma/ioat/registers.h b/drivers/dma/ioat/registers.h
index 2f3bbc8..99c1c24 100644
--- a/drivers/dma/ioat/registers.h
+++ b/drivers/dma/ioat/registers.h
@@ -84,6 +84,9 @@
 #define IOAT_CAP_PQ				0x00000200
 #define IOAT_CAP_DWBES				0x00002000
 #define IOAT_CAP_RAID16SS			0x00020000
+#define IOAT_CAP_DPS				0x00800000
+
+#define IOAT_PREFETCH_LIMIT_OFFSET		0x4C	/* CHWPREFLMT */
 
 #define IOAT_CHANNEL_MMIO_SIZE			0x80	/* Each Channel MMIO space is this size */
 
@@ -243,4 +246,25 @@
 
 #define IOAT_CHANERR_MASK_OFFSET		0x2C	/* 32-bit Channel Error Register */
 
+#define IOAT_CHAN_DRSCTL_OFFSET			0xB6
+#define IOAT_CHAN_DRSZ_4KB			0x0000
+#define IOAT_CHAN_DRSZ_8KB			0x0001
+#define IOAT_CHAN_DRSZ_2MB			0x0009
+#define IOAT_CHAN_DRS_EN			0x0100
+#define IOAT_CHAN_DRS_AUTOWRAP			0x0200
+
+#define IOAT_CHAN_LTR_SWSEL_OFFSET		0xBC
+#define IOAT_CHAN_LTR_SWSEL_ACTIVE		0x0
+#define IOAT_CHAN_LTR_SWSEL_IDLE		0x1
+
+#define IOAT_CHAN_LTR_ACTIVE_OFFSET		0xC0
+#define IOAT_CHAN_LTR_ACTIVE_SNVAL		0x0000	/* 0 us */
+#define IOAT_CHAN_LTR_ACTIVE_SNLATSCALE		0x0800	/* 1us scale */
+#define IOAT_CHAN_LTR_ACTIVE_SNREQMNT		0x8000	/* snoop req enable */
+
+#define IOAT_CHAN_LTR_IDLE_OFFSET		0xC4
+#define IOAT_CHAN_LTR_IDLE_SNVAL		0x0258	/* 600 us */
+#define IOAT_CHAN_LTR_IDLE_SNLATSCALE		0x0800	/* 1us scale */
+#define IOAT_CHAN_LTR_IDLE_SNREQMNT		0x8000	/* snoop req enable */
+
 #endif /* _IOAT_REGISTERS_H_ */
diff --git a/drivers/dma/k3dma.c b/drivers/dma/k3dma.c
index fdec2b6..5737d92 100644
--- a/drivers/dma/k3dma.c
+++ b/drivers/dma/k3dma.c
@@ -52,8 +52,6 @@
 #define CX_SRC			0x814
 #define CX_DST			0x818
 #define CX_CFG			0x81c
-#define AXI_CFG			0x820
-#define AXI_CFG_DEFAULT		0x201201
 
 #define CX_LLI_CHAIN_EN		0x2
 #define CX_CFG_EN		0x1
@@ -113,9 +111,18 @@ struct k3_dma_dev {
 	struct dma_pool		*pool;
 	u32			dma_channels;
 	u32			dma_requests;
+	u32			dma_channel_mask;
 	unsigned int		irq;
 };
 
+
+#define K3_FLAG_NOCLK	BIT(1)
+
+struct k3dma_soc_data {
+	unsigned long flags;
+};
+
+
 #define to_k3_dma(dmadev) container_of(dmadev, struct k3_dma_dev, slave)
 
 static int k3_dma_config_write(struct dma_chan *chan,
@@ -161,7 +168,6 @@ static void k3_dma_set_desc(struct k3_dma_phy *phy, struct k3_desc_hw *hw)
 	writel_relaxed(hw->count, phy->base + CX_CNT0);
 	writel_relaxed(hw->saddr, phy->base + CX_SRC);
 	writel_relaxed(hw->daddr, phy->base + CX_DST);
-	writel_relaxed(AXI_CFG_DEFAULT, phy->base + AXI_CFG);
 	writel_relaxed(hw->config, phy->base + CX_CFG);
 }
 
@@ -314,6 +320,9 @@ static void k3_dma_tasklet(unsigned long arg)
 	/* check new channel request in d->chan_pending */
 	spin_lock_irq(&d->lock);
 	for (pch = 0; pch < d->dma_channels; pch++) {
+		if (!(d->dma_channel_mask & (1 << pch)))
+			continue;
+
 		p = &d->phy[pch];
 
 		if (p->vchan == NULL && !list_empty(&d->chan_pending)) {
@@ -331,6 +340,9 @@ static void k3_dma_tasklet(unsigned long arg)
 	spin_unlock_irq(&d->lock);
 
 	for (pch = 0; pch < d->dma_channels; pch++) {
+		if (!(d->dma_channel_mask & (1 << pch)))
+			continue;
+
 		if (pch_alloc & (1 << pch)) {
 			p = &d->phy[pch];
 			c = p->vchan;
@@ -790,8 +802,21 @@ static int k3_dma_transfer_resume(struct dma_chan *chan)
 	return 0;
 }
 
+static const struct k3dma_soc_data k3_v1_dma_data = {
+	.flags = 0,
+};
+
+static const struct k3dma_soc_data asp_v1_dma_data = {
+	.flags = K3_FLAG_NOCLK,
+};
+
 static const struct of_device_id k3_pdma_dt_ids[] = {
-	{ .compatible = "hisilicon,k3-dma-1.0", },
+	{ .compatible = "hisilicon,k3-dma-1.0",
+	  .data = &k3_v1_dma_data
+	},
+	{ .compatible = "hisilicon,hisi-pcm-asp-dma-1.0",
+	  .data = &asp_v1_dma_data
+	},
 	{}
 };
 MODULE_DEVICE_TABLE(of, k3_pdma_dt_ids);
@@ -810,6 +835,7 @@ static struct dma_chan *k3_of_dma_simple_xlate(struct of_phandle_args *dma_spec,
 
 static int k3_dma_probe(struct platform_device *op)
 {
+	const struct k3dma_soc_data *soc_data;
 	struct k3_dma_dev *d;
 	const struct of_device_id *of_id;
 	struct resource *iores;
@@ -823,6 +849,10 @@ static int k3_dma_probe(struct platform_device *op)
 	if (!d)
 		return -ENOMEM;
 
+	soc_data = device_get_match_data(&op->dev);
+	if (!soc_data)
+		return -EINVAL;
+
 	d->base = devm_ioremap_resource(&op->dev, iores);
 	if (IS_ERR(d->base))
 		return PTR_ERR(d->base);
@@ -833,12 +863,21 @@ static int k3_dma_probe(struct platform_device *op)
 				"dma-channels", &d->dma_channels);
 		of_property_read_u32((&op->dev)->of_node,
 				"dma-requests", &d->dma_requests);
+		ret = of_property_read_u32((&op->dev)->of_node,
+				"dma-channel-mask", &d->dma_channel_mask);
+		if (ret) {
+			dev_warn(&op->dev,
+				 "dma-channel-mask doesn't exist, considering all as available.\n");
+			d->dma_channel_mask = (u32)~0UL;
+		}
 	}
 
-	d->clk = devm_clk_get(&op->dev, NULL);
-	if (IS_ERR(d->clk)) {
-		dev_err(&op->dev, "no dma clk\n");
-		return PTR_ERR(d->clk);
+	if (!(soc_data->flags & K3_FLAG_NOCLK)) {
+		d->clk = devm_clk_get(&op->dev, NULL);
+		if (IS_ERR(d->clk)) {
+			dev_err(&op->dev, "no dma clk\n");
+			return PTR_ERR(d->clk);
+		}
 	}
 
 	irq = platform_get_irq(op, 0);
@@ -862,8 +901,12 @@ static int k3_dma_probe(struct platform_device *op)
 		return -ENOMEM;
 
 	for (i = 0; i < d->dma_channels; i++) {
-		struct k3_dma_phy *p = &d->phy[i];
+		struct k3_dma_phy *p;
 
+		if (!(d->dma_channel_mask & BIT(i)))
+			continue;
+
+		p = &d->phy[i];
 		p->idx = i;
 		p->base = d->base + i * 0x40;
 	}
diff --git a/drivers/dma/mcf-edma.c b/drivers/dma/mcf-edma.c
index 5de1b07..7de54b2f 100644
--- a/drivers/dma/mcf-edma.c
+++ b/drivers/dma/mcf-edma.c
@@ -214,6 +214,7 @@ static int mcf_edma_probe(struct platform_device *pdev)
 		mcf_chan->edma = mcf_edma;
 		mcf_chan->slave_id = i;
 		mcf_chan->idle = true;
+		mcf_chan->dma_dir = DMA_NONE;
 		mcf_chan->vchan.desc_free = fsl_edma_free_desc;
 		vchan_init(&mcf_chan->vchan, &mcf_edma->dma_dev);
 		iowrite32(0x0, &regs->tcd[i].csr);
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
index 7f59535..65af2e7 100644
--- a/drivers/dma/mv_xor.c
+++ b/drivers/dma/mv_xor.c
@@ -1059,6 +1059,7 @@ mv_xor_channel_add(struct mv_xor_device *xordev,
 		mv_chan->op_in_desc = XOR_MODE_IN_DESC;
 
 	dma_dev = &mv_chan->dmadev;
+	dma_dev->dev = &pdev->dev;
 	mv_chan->xordev = xordev;
 
 	/*
@@ -1091,7 +1092,6 @@ mv_xor_channel_add(struct mv_xor_device *xordev,
 	dma_dev->device_free_chan_resources = mv_xor_free_chan_resources;
 	dma_dev->device_tx_status = mv_xor_status;
 	dma_dev->device_issue_pending = mv_xor_issue_pending;
-	dma_dev->dev = &pdev->dev;
 
 	/* set prep routines based on capability */
 	if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask))
@@ -1153,7 +1153,10 @@ mv_xor_channel_add(struct mv_xor_device *xordev,
 		 dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "",
 		 dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : "");
 
-	dma_async_device_register(dma_dev);
+	ret = dma_async_device_register(dma_dev);
+	if (ret)
+		goto err_free_irq;
+
 	return mv_chan;
 
 err_free_irq:
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index cff1b14..eec79fd 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -2267,7 +2267,6 @@ static int pl330_terminate_all(struct dma_chan *chan)
 	struct dma_pl330_desc *desc;
 	unsigned long flags;
 	struct pl330_dmac *pl330 = pch->dmac;
-	LIST_HEAD(list);
 	bool power_down = false;
 
 	pm_runtime_get_sync(pl330->ddma.dev);
diff --git a/drivers/dma/qcom/bam_dma.c b/drivers/dma/qcom/bam_dma.c
index 1617715..cb860cb 100644
--- a/drivers/dma/qcom/bam_dma.c
+++ b/drivers/dma/qcom/bam_dma.c
@@ -636,8 +636,8 @@ static struct dma_async_tx_descriptor *bam_prep_slave_sg(struct dma_chan *chan,
 		num_alloc += DIV_ROUND_UP(sg_dma_len(sg), BAM_FIFO_SIZE);
 
 	/* allocate enough room to accomodate the number of entries */
-	async_desc = kzalloc(sizeof(*async_desc) +
-			(num_alloc * sizeof(struct bam_desc_hw)), GFP_NOWAIT);
+	async_desc = kzalloc(struct_size(async_desc, desc, num_alloc),
+			     GFP_NOWAIT);
 
 	if (!async_desc)
 		goto err_out;
diff --git a/drivers/dma/qcom/hidma.c b/drivers/dma/qcom/hidma.c
index 43d4b00..411f91f 100644
--- a/drivers/dma/qcom/hidma.c
+++ b/drivers/dma/qcom/hidma.c
@@ -138,24 +138,25 @@ static void hidma_process_completed(struct hidma_chan *mchan)
 		desc = &mdesc->desc;
 		last_cookie = desc->cookie;
 
+		llstat = hidma_ll_status(mdma->lldev, mdesc->tre_ch);
+
 		spin_lock_irqsave(&mchan->lock, irqflags);
+		if (llstat == DMA_COMPLETE) {
+			mchan->last_success = last_cookie;
+			result.result = DMA_TRANS_NOERROR;
+		} else {
+			result.result = DMA_TRANS_ABORTED;
+		}
+
 		dma_cookie_complete(desc);
 		spin_unlock_irqrestore(&mchan->lock, irqflags);
 
-		llstat = hidma_ll_status(mdma->lldev, mdesc->tre_ch);
 		dmaengine_desc_get_callback(desc, &cb);
 
 		dma_run_dependencies(desc);
 
 		spin_lock_irqsave(&mchan->lock, irqflags);
 		list_move(&mdesc->node, &mchan->free);
-
-		if (llstat == DMA_COMPLETE) {
-			mchan->last_success = last_cookie;
-			result.result = DMA_TRANS_NOERROR;
-		} else
-			result.result = DMA_TRANS_ABORTED;
-
 		spin_unlock_irqrestore(&mchan->lock, irqflags);
 
 		dmaengine_desc_callback_invoke(&cb, &result);
@@ -415,6 +416,7 @@ hidma_prep_dma_memcpy(struct dma_chan *dmach, dma_addr_t dest, dma_addr_t src,
 	if (!mdesc)
 		return NULL;
 
+	mdesc->desc.flags = flags;
 	hidma_ll_set_transfer_params(mdma->lldev, mdesc->tre_ch,
 				     src, dest, len, flags,
 				     HIDMA_TRE_MEMCPY);
@@ -447,6 +449,7 @@ hidma_prep_dma_memset(struct dma_chan *dmach, dma_addr_t dest, int value,
 	if (!mdesc)
 		return NULL;
 
+	mdesc->desc.flags = flags;
 	hidma_ll_set_transfer_params(mdma->lldev, mdesc->tre_ch,
 				     value, dest, len, flags,
 				     HIDMA_TRE_MEMSET);
diff --git a/drivers/dma/qcom/hidma_mgmt.c b/drivers/dma/qcom/hidma_mgmt.c
index d64edeb..681de12 100644
--- a/drivers/dma/qcom/hidma_mgmt.c
+++ b/drivers/dma/qcom/hidma_mgmt.c
@@ -423,9 +423,8 @@ static int __init hidma_mgmt_init(void)
 		hidma_mgmt_of_populate_channels(child);
 	}
 #endif
-	platform_driver_register(&hidma_mgmt_driver);
+	return platform_driver_register(&hidma_mgmt_driver);
 
-	return 0;
 }
 module_init(hidma_mgmt_init);
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/sa11x0-dma.c b/drivers/dma/sa11x0-dma.c
index 784d5f1..3fae237 100644
--- a/drivers/dma/sa11x0-dma.c
+++ b/drivers/dma/sa11x0-dma.c
@@ -705,7 +705,6 @@ static int sa11x0_dma_device_pause(struct dma_chan *chan)
 	struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan);
 	struct sa11x0_dma_dev *d = to_sa11x0_dma(chan->device);
 	struct sa11x0_dma_phy *p;
-	LIST_HEAD(head);
 	unsigned long flags;
 
 	dev_dbg(d->slave.dev, "vchan %p: pause\n", &c->vc);
@@ -732,7 +731,6 @@ static int sa11x0_dma_device_resume(struct dma_chan *chan)
 	struct sa11x0_dma_chan *c = to_sa11x0_dma_chan(chan);
 	struct sa11x0_dma_dev *d = to_sa11x0_dma(chan->device);
 	struct sa11x0_dma_phy *p;
-	LIST_HEAD(head);
 	unsigned long flags;
 
 	dev_dbg(d->slave.dev, "vchan %p: resume\n", &c->vc);
diff --git a/drivers/dma/sh/usb-dmac.c b/drivers/dma/sh/usb-dmac.c
index 7f7184c..59403f6 100644
--- a/drivers/dma/sh/usb-dmac.c
+++ b/drivers/dma/sh/usb-dmac.c
@@ -694,6 +694,8 @@ static int usb_dmac_runtime_resume(struct device *dev)
 #endif /* CONFIG_PM */
 
 static const struct dev_pm_ops usb_dmac_pm = {
+	SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+				      pm_runtime_force_resume)
 	SET_RUNTIME_PM_OPS(usb_dmac_runtime_suspend, usb_dmac_runtime_resume,
 			   NULL)
 };
diff --git a/drivers/dma/sprd-dma.c b/drivers/dma/sprd-dma.c
index e2f0167..48431e2 100644
--- a/drivers/dma/sprd-dma.c
+++ b/drivers/dma/sprd-dma.c
@@ -580,15 +580,7 @@ static irqreturn_t dma_irq_handle(int irq, void *dev_id)
 
 static int sprd_dma_alloc_chan_resources(struct dma_chan *chan)
 {
-	struct sprd_dma_chn *schan = to_sprd_dma_chan(chan);
-	int ret;
-
-	ret = pm_runtime_get_sync(chan->device->dev);
-	if (ret < 0)
-		return ret;
-
-	schan->dev_id = SPRD_DMA_SOFTWARE_UID;
-	return 0;
+	return pm_runtime_get_sync(chan->device->dev);
 }
 
 static void sprd_dma_free_chan_resources(struct dma_chan *chan)
@@ -1021,13 +1013,10 @@ static void sprd_dma_free_desc(struct virt_dma_desc *vd)
 static bool sprd_dma_filter_fn(struct dma_chan *chan, void *param)
 {
 	struct sprd_dma_chn *schan = to_sprd_dma_chan(chan);
-	struct sprd_dma_dev *sdev = to_sprd_dma_dev(&schan->vc.chan);
-	u32 req = *(u32 *)param;
+	u32 slave_id = *(u32 *)param;
 
-	if (req < sdev->total_chns)
-		return req == schan->chn_num + 1;
-	else
-		return false;
+	schan->dev_id = slave_id;
+	return true;
 }
 
 static int sprd_dma_probe(struct platform_device *pdev)
diff --git a/drivers/dma/st_fdma.c b/drivers/dma/st_fdma.c
index 07c20aa..bc7a1de 100644
--- a/drivers/dma/st_fdma.c
+++ b/drivers/dma/st_fdma.c
@@ -243,8 +243,7 @@ static struct st_fdma_desc *st_fdma_alloc_desc(struct st_fdma_chan *fchan,
 	struct st_fdma_desc *fdesc;
 	int i;
 
-	fdesc = kzalloc(sizeof(*fdesc) +
-			sizeof(struct st_fdma_sw_node) * sg_len, GFP_NOWAIT);
+	fdesc = kzalloc(struct_size(fdesc, node, sg_len), GFP_NOWAIT);
 	if (!fdesc)
 		return NULL;
 
@@ -294,8 +293,6 @@ static void st_fdma_free_chan_res(struct dma_chan *chan)
 	struct rproc *rproc = fchan->fdev->slim_rproc->rproc;
 	unsigned long flags;
 
-	LIST_HEAD(head);
-
 	dev_dbg(fchan->fdev->dev, "%s: freeing chan:%d\n",
 		__func__, fchan->vchan.chan.chan_id);
 
@@ -626,7 +623,6 @@ static void st_fdma_issue_pending(struct dma_chan *chan)
 static int st_fdma_pause(struct dma_chan *chan)
 {
 	unsigned long flags;
-	LIST_HEAD(head);
 	struct st_fdma_chan *fchan = to_st_fdma_chan(chan);
 	int ch_id = fchan->vchan.chan.chan_id;
 	unsigned long cmd = FDMA_CMD_PAUSE(ch_id);
diff --git a/drivers/dma/stm32-dma.c b/drivers/dma/stm32-dma.c
index 4903a40..ba239b5 100644
--- a/drivers/dma/stm32-dma.c
+++ b/drivers/dma/stm32-dma.c
@@ -23,6 +23,7 @@
 #include <linux/of_device.h>
 #include <linux/of_dma.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 #include <linux/reset.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
@@ -641,12 +642,13 @@ static irqreturn_t stm32_dma_chan_irq(int irq, void *devid)
 {
 	struct stm32_dma_chan *chan = devid;
 	struct stm32_dma_device *dmadev = stm32_dma_get_dev(chan);
-	u32 status, scr;
+	u32 status, scr, sfcr;
 
 	spin_lock(&chan->vchan.lock);
 
 	status = stm32_dma_irq_status(chan);
 	scr = stm32_dma_read(dmadev, STM32_DMA_SCR(chan->id));
+	sfcr = stm32_dma_read(dmadev, STM32_DMA_SFCR(chan->id));
 
 	if (status & STM32_DMA_TCI) {
 		stm32_dma_irq_clear(chan, STM32_DMA_TCI);
@@ -661,10 +663,12 @@ static irqreturn_t stm32_dma_chan_irq(int irq, void *devid)
 	if (status & STM32_DMA_FEI) {
 		stm32_dma_irq_clear(chan, STM32_DMA_FEI);
 		status &= ~STM32_DMA_FEI;
-		if (!(scr & STM32_DMA_SCR_EN))
-			dev_err(chan2dev(chan), "FIFO Error\n");
-		else
-			dev_dbg(chan2dev(chan), "FIFO over/underrun\n");
+		if (sfcr & STM32_DMA_SFCR_FEIE) {
+			if (!(scr & STM32_DMA_SCR_EN))
+				dev_err(chan2dev(chan), "FIFO Error\n");
+			else
+				dev_dbg(chan2dev(chan), "FIFO over/underrun\n");
+		}
 	}
 	if (status) {
 		stm32_dma_irq_clear(chan, status);
@@ -1112,15 +1116,14 @@ static int stm32_dma_alloc_chan_resources(struct dma_chan *c)
 	int ret;
 
 	chan->config_init = false;
-	ret = clk_prepare_enable(dmadev->clk);
-	if (ret < 0) {
-		dev_err(chan2dev(chan), "clk_prepare_enable failed: %d\n", ret);
+
+	ret = pm_runtime_get_sync(dmadev->ddev.dev);
+	if (ret < 0)
 		return ret;
-	}
 
 	ret = stm32_dma_disable_chan(chan);
 	if (ret < 0)
-		clk_disable_unprepare(dmadev->clk);
+		pm_runtime_put(dmadev->ddev.dev);
 
 	return ret;
 }
@@ -1140,7 +1143,7 @@ static void stm32_dma_free_chan_resources(struct dma_chan *c)
 		spin_unlock_irqrestore(&chan->vchan.lock, flags);
 	}
 
-	clk_disable_unprepare(dmadev->clk);
+	pm_runtime_put(dmadev->ddev.dev);
 
 	vchan_free_chan_resources(to_virt_chan(c));
 }
@@ -1240,6 +1243,12 @@ static int stm32_dma_probe(struct platform_device *pdev)
 		return PTR_ERR(dmadev->clk);
 	}
 
+	ret = clk_prepare_enable(dmadev->clk);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "clk_prep_enable error: %d\n", ret);
+		return ret;
+	}
+
 	dmadev->mem2mem = of_property_read_bool(pdev->dev.of_node,
 						"st,mem2mem");
 
@@ -1289,7 +1298,7 @@ static int stm32_dma_probe(struct platform_device *pdev)
 
 	ret = dma_async_device_register(dd);
 	if (ret)
-		return ret;
+		goto clk_free;
 
 	for (i = 0; i < STM32_DMA_MAX_CHANNELS; i++) {
 		chan = &dmadev->chan[i];
@@ -1321,20 +1330,58 @@ static int stm32_dma_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, dmadev);
 
+	pm_runtime_set_active(&pdev->dev);
+	pm_runtime_enable(&pdev->dev);
+	pm_runtime_get_noresume(&pdev->dev);
+	pm_runtime_put(&pdev->dev);
+
 	dev_info(&pdev->dev, "STM32 DMA driver registered\n");
 
 	return 0;
 
 err_unregister:
 	dma_async_device_unregister(dd);
+clk_free:
+	clk_disable_unprepare(dmadev->clk);
 
 	return ret;
 }
 
+#ifdef CONFIG_PM
+static int stm32_dma_runtime_suspend(struct device *dev)
+{
+	struct stm32_dma_device *dmadev = dev_get_drvdata(dev);
+
+	clk_disable_unprepare(dmadev->clk);
+
+	return 0;
+}
+
+static int stm32_dma_runtime_resume(struct device *dev)
+{
+	struct stm32_dma_device *dmadev = dev_get_drvdata(dev);
+	int ret;
+
+	ret = clk_prepare_enable(dmadev->clk);
+	if (ret) {
+		dev_err(dev, "failed to prepare_enable clock\n");
+		return ret;
+	}
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops stm32_dma_pm_ops = {
+	SET_RUNTIME_PM_OPS(stm32_dma_runtime_suspend,
+			   stm32_dma_runtime_resume, NULL)
+};
+
 static struct platform_driver stm32_dma_driver = {
 	.driver = {
 		.name = "stm32-dma",
 		.of_match_table = stm32_dma_of_match,
+		.pm = &stm32_dma_pm_ops,
 	},
 };
 
diff --git a/drivers/dma/stm32-dmamux.c b/drivers/dma/stm32-dmamux.c
index b922db9..a671191 100644
--- a/drivers/dma/stm32-dmamux.c
+++ b/drivers/dma/stm32-dmamux.c
@@ -28,6 +28,7 @@
 #include <linux/module.h>
 #include <linux/of_device.h>
 #include <linux/of_dma.h>
+#include <linux/pm_runtime.h>
 #include <linux/reset.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
@@ -79,8 +80,7 @@ static void stm32_dmamux_free(struct device *dev, void *route_data)
 	stm32_dmamux_write(dmamux->iomem, STM32_DMAMUX_CCR(mux->chan_id), 0);
 	clear_bit(mux->chan_id, dmamux->dma_inuse);
 
-	if (!IS_ERR(dmamux->clk))
-		clk_disable(dmamux->clk);
+	pm_runtime_put_sync(dev);
 
 	spin_unlock_irqrestore(&dmamux->lock, flags);
 
@@ -146,13 +146,10 @@ static void *stm32_dmamux_route_allocate(struct of_phandle_args *dma_spec,
 
 	/* Set dma request */
 	spin_lock_irqsave(&dmamux->lock, flags);
-	if (!IS_ERR(dmamux->clk)) {
-		ret = clk_enable(dmamux->clk);
-		if (ret < 0) {
-			spin_unlock_irqrestore(&dmamux->lock, flags);
-			dev_err(&pdev->dev, "clk_prep_enable issue: %d\n", ret);
-			goto error;
-		}
+	ret = pm_runtime_get_sync(&pdev->dev);
+	if (ret < 0) {
+		spin_unlock_irqrestore(&dmamux->lock, flags);
+		goto error;
 	}
 	spin_unlock_irqrestore(&dmamux->lock, flags);
 
@@ -254,6 +251,7 @@ static int stm32_dmamux_probe(struct platform_device *pdev)
 		dev_warn(&pdev->dev, "DMAMUX defaulting on %u requests\n",
 			 stm32_dmamux->dmamux_requests);
 	}
+	pm_runtime_get_noresume(&pdev->dev);
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	iomem = devm_ioremap_resource(&pdev->dev, res);
@@ -282,6 +280,8 @@ static int stm32_dmamux_probe(struct platform_device *pdev)
 	stm32_dmamux->dmarouter.route_free = stm32_dmamux_free;
 
 	platform_set_drvdata(pdev, stm32_dmamux);
+	pm_runtime_set_active(&pdev->dev);
+	pm_runtime_enable(&pdev->dev);
 
 	if (!IS_ERR(stm32_dmamux->clk)) {
 		ret = clk_prepare_enable(stm32_dmamux->clk);
@@ -291,17 +291,52 @@ static int stm32_dmamux_probe(struct platform_device *pdev)
 		}
 	}
 
+	pm_runtime_get_noresume(&pdev->dev);
+
 	/* Reset the dmamux */
 	for (i = 0; i < stm32_dmamux->dma_requests; i++)
 		stm32_dmamux_write(stm32_dmamux->iomem, STM32_DMAMUX_CCR(i), 0);
 
-	if (!IS_ERR(stm32_dmamux->clk))
-		clk_disable(stm32_dmamux->clk);
+	pm_runtime_put(&pdev->dev);
 
 	return of_dma_router_register(node, stm32_dmamux_route_allocate,
 				     &stm32_dmamux->dmarouter);
 }
 
+#ifdef CONFIG_PM
+static int stm32_dmamux_runtime_suspend(struct device *dev)
+{
+	struct platform_device *pdev =
+		container_of(dev, struct platform_device, dev);
+	struct stm32_dmamux_data *stm32_dmamux = platform_get_drvdata(pdev);
+
+	clk_disable_unprepare(stm32_dmamux->clk);
+
+	return 0;
+}
+
+static int stm32_dmamux_runtime_resume(struct device *dev)
+{
+	struct platform_device *pdev =
+		container_of(dev, struct platform_device, dev);
+	struct stm32_dmamux_data *stm32_dmamux = platform_get_drvdata(pdev);
+	int ret;
+
+	ret = clk_prepare_enable(stm32_dmamux->clk);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to prepare_enable clock\n");
+		return ret;
+	}
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops stm32_dmamux_pm_ops = {
+	SET_RUNTIME_PM_OPS(stm32_dmamux_runtime_suspend,
+			   stm32_dmamux_runtime_resume, NULL)
+};
+
 static const struct of_device_id stm32_dmamux_match[] = {
 	{ .compatible = "st,stm32h7-dmamux" },
 	{},
@@ -312,6 +347,7 @@ static struct platform_driver stm32_dmamux_driver = {
 	.driver = {
 		.name = "stm32-dmamux",
 		.of_match_table = stm32_dmamux_match,
+		.pm = &stm32_dmamux_pm_ops,
 	},
 };
 
diff --git a/drivers/dma/stm32-mdma.c b/drivers/dma/stm32-mdma.c
index 390e4ca..4e0eede 100644
--- a/drivers/dma/stm32-mdma.c
+++ b/drivers/dma/stm32-mdma.c
@@ -37,6 +37,7 @@
 #include <linux/of_device.h>
 #include <linux/of_dma.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 #include <linux/reset.h>
 #include <linux/slab.h>
 
@@ -1456,15 +1457,13 @@ static int stm32_mdma_alloc_chan_resources(struct dma_chan *c)
 		return -ENOMEM;
 	}
 
-	ret = clk_prepare_enable(dmadev->clk);
-	if (ret < 0) {
-		dev_err(chan2dev(chan), "clk_prepare_enable failed: %d\n", ret);
+	ret = pm_runtime_get_sync(dmadev->ddev.dev);
+	if (ret < 0)
 		return ret;
-	}
 
 	ret = stm32_mdma_disable_chan(chan);
 	if (ret < 0)
-		clk_disable_unprepare(dmadev->clk);
+		pm_runtime_put(dmadev->ddev.dev);
 
 	return ret;
 }
@@ -1484,7 +1483,7 @@ static void stm32_mdma_free_chan_resources(struct dma_chan *c)
 		spin_unlock_irqrestore(&chan->vchan.lock, flags);
 	}
 
-	clk_disable_unprepare(dmadev->clk);
+	pm_runtime_put(dmadev->ddev.dev);
 	vchan_free_chan_resources(to_virt_chan(c));
 	dmam_pool_destroy(chan->desc_pool);
 	chan->desc_pool = NULL;
@@ -1579,9 +1578,11 @@ static int stm32_mdma_probe(struct platform_device *pdev)
 
 	dmadev->nr_channels = nr_channels;
 	dmadev->nr_requests = nr_requests;
-	device_property_read_u32_array(&pdev->dev, "st,ahb-addr-masks",
+	ret = device_property_read_u32_array(&pdev->dev, "st,ahb-addr-masks",
 				       dmadev->ahb_addr_masks,
 				       count);
+	if (ret)
+		return ret;
 	dmadev->nr_ahb_addr_masks = count;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -1597,6 +1598,12 @@ static int stm32_mdma_probe(struct platform_device *pdev)
 		return ret;
 	}
 
+	ret = clk_prepare_enable(dmadev->clk);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "clk_prep_enable error: %d\n", ret);
+		return ret;
+	}
+
 	dmadev->rst = devm_reset_control_get(&pdev->dev, NULL);
 	if (!IS_ERR(dmadev->rst)) {
 		reset_control_assert(dmadev->rst);
@@ -1668,6 +1675,10 @@ static int stm32_mdma_probe(struct platform_device *pdev)
 	}
 
 	platform_set_drvdata(pdev, dmadev);
+	pm_runtime_set_active(&pdev->dev);
+	pm_runtime_enable(&pdev->dev);
+	pm_runtime_get_noresume(&pdev->dev);
+	pm_runtime_put(&pdev->dev);
 
 	dev_info(&pdev->dev, "STM32 MDMA driver registered\n");
 
@@ -1677,11 +1688,42 @@ static int stm32_mdma_probe(struct platform_device *pdev)
 	return ret;
 }
 
+#ifdef CONFIG_PM
+static int stm32_mdma_runtime_suspend(struct device *dev)
+{
+	struct stm32_mdma_device *dmadev = dev_get_drvdata(dev);
+
+	clk_disable_unprepare(dmadev->clk);
+
+	return 0;
+}
+
+static int stm32_mdma_runtime_resume(struct device *dev)
+{
+	struct stm32_mdma_device *dmadev = dev_get_drvdata(dev);
+	int ret;
+
+	ret = clk_prepare_enable(dmadev->clk);
+	if (ret) {
+		dev_err(dev, "failed to prepare_enable clock\n");
+		return ret;
+	}
+
+	return 0;
+}
+#endif
+
+static const struct dev_pm_ops stm32_mdma_pm_ops = {
+	SET_RUNTIME_PM_OPS(stm32_mdma_runtime_suspend,
+			   stm32_mdma_runtime_resume, NULL)
+};
+
 static struct platform_driver stm32_mdma_driver = {
 	.probe = stm32_mdma_probe,
 	.driver = {
 		.name = "stm32-mdma",
 		.of_match_table = stm32_mdma_of_match,
+		.pm = &stm32_mdma_pm_ops,
 	},
 };
 
diff --git a/drivers/dma/tegra20-apb-dma.c b/drivers/dma/tegra20-apb-dma.c
index 9a558e3..cf462b1 100644
--- a/drivers/dma/tegra20-apb-dma.c
+++ b/drivers/dma/tegra20-apb-dma.c
@@ -38,6 +38,9 @@
 
 #include "dmaengine.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/tegra_apb_dma.h>
+
 #define TEGRA_APBDMA_GENERAL			0x0
 #define TEGRA_APBDMA_GENERAL_ENABLE		BIT(31)
 
@@ -146,7 +149,7 @@ struct tegra_dma_channel_regs {
 };
 
 /*
- * tegra_dma_sg_req: Dma request details to configure hardware. This
+ * tegra_dma_sg_req: DMA request details to configure hardware. This
  * contains the details for one transfer to configure DMA hw.
  * The client's request for data transfer can be broken into multiple
  * sub-transfer as per requester details and hw support.
@@ -155,7 +158,7 @@ struct tegra_dma_channel_regs {
  */
 struct tegra_dma_sg_req {
 	struct tegra_dma_channel_regs	ch_regs;
-	int				req_len;
+	unsigned int			req_len;
 	bool				configured;
 	bool				last_sg;
 	struct list_head		node;
@@ -169,8 +172,8 @@ struct tegra_dma_sg_req {
  */
 struct tegra_dma_desc {
 	struct dma_async_tx_descriptor	txd;
-	int				bytes_requested;
-	int				bytes_transferred;
+	unsigned int			bytes_requested;
+	unsigned int			bytes_transferred;
 	enum dma_status			dma_status;
 	struct list_head		node;
 	struct list_head		tx_list;
@@ -186,7 +189,7 @@ typedef void (*dma_isr_handler)(struct tegra_dma_channel *tdc,
 /* tegra_dma_channel: Channel specific information */
 struct tegra_dma_channel {
 	struct dma_chan		dma_chan;
-	char			name[30];
+	char			name[12];
 	bool			config_init;
 	int			id;
 	int			irq;
@@ -574,7 +577,7 @@ static bool handle_continuous_head_request(struct tegra_dma_channel *tdc,
 	struct tegra_dma_sg_req *hsgreq = NULL;
 
 	if (list_empty(&tdc->pending_sg_req)) {
-		dev_err(tdc2dev(tdc), "Dma is running without req\n");
+		dev_err(tdc2dev(tdc), "DMA is running without req\n");
 		tegra_dma_stop(tdc);
 		return false;
 	}
@@ -587,7 +590,7 @@ static bool handle_continuous_head_request(struct tegra_dma_channel *tdc,
 	hsgreq = list_first_entry(&tdc->pending_sg_req, typeof(*hsgreq), node);
 	if (!hsgreq->configured) {
 		tegra_dma_stop(tdc);
-		dev_err(tdc2dev(tdc), "Error in dma transfer, aborting dma\n");
+		dev_err(tdc2dev(tdc), "Error in DMA transfer, aborting DMA\n");
 		tegra_dma_abort_all(tdc);
 		return false;
 	}
@@ -636,7 +639,10 @@ static void handle_cont_sngl_cycle_dma_done(struct tegra_dma_channel *tdc,
 
 	sgreq = list_first_entry(&tdc->pending_sg_req, typeof(*sgreq), node);
 	dma_desc = sgreq->dma_desc;
-	dma_desc->bytes_transferred += sgreq->req_len;
+	/* if we dma for long enough the transfer count will wrap */
+	dma_desc->bytes_transferred =
+		(dma_desc->bytes_transferred + sgreq->req_len) %
+		dma_desc->bytes_requested;
 
 	/* Callback need to be call */
 	if (!dma_desc->cb_count)
@@ -669,6 +675,8 @@ static void tegra_dma_tasklet(unsigned long data)
 		dmaengine_desc_get_callback(&dma_desc->txd, &cb);
 		cb_count = dma_desc->cb_count;
 		dma_desc->cb_count = 0;
+		trace_tegra_dma_complete_cb(&tdc->dma_chan, cb_count,
+					    cb.callback);
 		spin_unlock_irqrestore(&tdc->lock, flags);
 		while (cb_count--)
 			dmaengine_desc_callback_invoke(&cb, NULL);
@@ -685,6 +693,7 @@ static irqreturn_t tegra_dma_isr(int irq, void *dev_id)
 
 	spin_lock_irqsave(&tdc->lock, flags);
 
+	trace_tegra_dma_isr(&tdc->dma_chan, irq);
 	status = tdc_read(tdc, TEGRA_APBDMA_CHAN_STATUS);
 	if (status & TEGRA_APBDMA_STATUS_ISE_EOC) {
 		tdc_write(tdc, TEGRA_APBDMA_CHAN_STATUS, status);
@@ -843,6 +852,7 @@ static enum dma_status tegra_dma_tx_status(struct dma_chan *dc,
 		dma_set_residue(txstate, residual);
 	}
 
+	trace_tegra_dma_tx_status(&tdc->dma_chan, cookie, txstate);
 	spin_unlock_irqrestore(&tdc->lock, flags);
 	return ret;
 }
@@ -919,7 +929,7 @@ static int get_transfer_param(struct tegra_dma_channel *tdc,
 		return 0;
 
 	default:
-		dev_err(tdc2dev(tdc), "Dma direction is not supported\n");
+		dev_err(tdc2dev(tdc), "DMA direction is not supported\n");
 		return -EINVAL;
 	}
 	return -EINVAL;
@@ -952,7 +962,7 @@ static struct dma_async_tx_descriptor *tegra_dma_prep_slave_sg(
 	enum dma_slave_buswidth slave_bw;
 
 	if (!tdc->config_init) {
-		dev_err(tdc2dev(tdc), "dma channel is not configured\n");
+		dev_err(tdc2dev(tdc), "DMA channel is not configured\n");
 		return NULL;
 	}
 	if (sg_len < 1) {
@@ -985,7 +995,7 @@ static struct dma_async_tx_descriptor *tegra_dma_prep_slave_sg(
 
 	dma_desc = tegra_dma_desc_get(tdc);
 	if (!dma_desc) {
-		dev_err(tdc2dev(tdc), "Dma descriptors not available\n");
+		dev_err(tdc2dev(tdc), "DMA descriptors not available\n");
 		return NULL;
 	}
 	INIT_LIST_HEAD(&dma_desc->tx_list);
@@ -1005,14 +1015,14 @@ static struct dma_async_tx_descriptor *tegra_dma_prep_slave_sg(
 		if ((len & 3) || (mem & 3) ||
 				(len > tdc->tdma->chip_data->max_dma_count)) {
 			dev_err(tdc2dev(tdc),
-				"Dma length/memory address is not supported\n");
+				"DMA length/memory address is not supported\n");
 			tegra_dma_desc_put(tdc, dma_desc);
 			return NULL;
 		}
 
 		sg_req = tegra_dma_sg_req_get(tdc);
 		if (!sg_req) {
-			dev_err(tdc2dev(tdc), "Dma sg-req not available\n");
+			dev_err(tdc2dev(tdc), "DMA sg-req not available\n");
 			tegra_dma_desc_put(tdc, dma_desc);
 			return NULL;
 		}
@@ -1087,7 +1097,7 @@ static struct dma_async_tx_descriptor *tegra_dma_prep_dma_cyclic(
 	 * terminating the DMA.
 	 */
 	if (tdc->busy) {
-		dev_err(tdc2dev(tdc), "Request not allowed when dma running\n");
+		dev_err(tdc2dev(tdc), "Request not allowed when DMA running\n");
 		return NULL;
 	}
 
@@ -1144,7 +1154,7 @@ static struct dma_async_tx_descriptor *tegra_dma_prep_dma_cyclic(
 	while (remain_len) {
 		sg_req = tegra_dma_sg_req_get(tdc);
 		if (!sg_req) {
-			dev_err(tdc2dev(tdc), "Dma sg-req not available\n");
+			dev_err(tdc2dev(tdc), "DMA sg-req not available\n");
 			tegra_dma_desc_put(tdc, dma_desc);
 			return NULL;
 		}
@@ -1319,8 +1329,9 @@ static int tegra_dma_probe(struct platform_device *pdev)
 		return -ENODEV;
 	}
 
-	tdma = devm_kzalloc(&pdev->dev, sizeof(*tdma) + cdata->nr_channels *
-			sizeof(struct tegra_dma_channel), GFP_KERNEL);
+	tdma = devm_kzalloc(&pdev->dev,
+			    struct_size(tdma, channels, cdata->nr_channels),
+			    GFP_KERNEL);
 	if (!tdma)
 		return -ENOMEM;
 
diff --git a/drivers/dma/tegra210-adma.c b/drivers/dma/tegra210-adma.c
index b26256f..5ec0dd9 100644
--- a/drivers/dma/tegra210-adma.c
+++ b/drivers/dma/tegra210-adma.c
@@ -678,8 +678,9 @@ static int tegra_adma_probe(struct platform_device *pdev)
 		return -ENODEV;
 	}
 
-	tdma = devm_kzalloc(&pdev->dev, sizeof(*tdma) + cdata->nr_channels *
-			    sizeof(struct tegra_adma_chan), GFP_KERNEL);
+	tdma = devm_kzalloc(&pdev->dev,
+			    struct_size(tdma, channels, cdata->nr_channels),
+			    GFP_KERNEL);
 	if (!tdma)
 		return -ENOMEM;
 
diff --git a/drivers/dma/timb_dma.c b/drivers/dma/timb_dma.c
index fc0f9c8..afbb1c9 100644
--- a/drivers/dma/timb_dma.c
+++ b/drivers/dma/timb_dma.c
@@ -643,8 +643,8 @@ static int td_probe(struct platform_device *pdev)
 		DRIVER_NAME))
 		return -EBUSY;
 
-	td  = kzalloc(sizeof(struct timb_dma) +
-		sizeof(struct timb_dma_chan) * pdata->nr_channels, GFP_KERNEL);
+	td  = kzalloc(struct_size(td, channels, pdata->nr_channels),
+		      GFP_KERNEL);
 	if (!td) {
 		err = -ENOMEM;
 		goto err_release_region;
diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c
index cb20b41..c43c1a1 100644
--- a/drivers/dma/xilinx/xilinx_dma.c
+++ b/drivers/dma/xilinx/xilinx_dma.c
@@ -86,6 +86,7 @@
 #define XILINX_DMA_DMASR_DMA_DEC_ERR		BIT(6)
 #define XILINX_DMA_DMASR_DMA_SLAVE_ERR		BIT(5)
 #define XILINX_DMA_DMASR_DMA_INT_ERR		BIT(4)
+#define XILINX_DMA_DMASR_SG_MASK		BIT(3)
 #define XILINX_DMA_DMASR_IDLE			BIT(1)
 #define XILINX_DMA_DMASR_HALTED		BIT(0)
 #define XILINX_DMA_DMASR_DELAY_MASK		GENMASK(31, 24)
@@ -161,7 +162,9 @@
 #define XILINX_DMA_REG_BTT		0x28
 
 /* AXI DMA Specific Masks/Bit fields */
-#define XILINX_DMA_MAX_TRANS_LEN	GENMASK(22, 0)
+#define XILINX_DMA_MAX_TRANS_LEN_MIN	8
+#define XILINX_DMA_MAX_TRANS_LEN_MAX	23
+#define XILINX_DMA_V2_MAX_TRANS_LEN_MAX	26
 #define XILINX_DMA_CR_COALESCE_MAX	GENMASK(23, 16)
 #define XILINX_DMA_CR_CYCLIC_BD_EN_MASK	BIT(4)
 #define XILINX_DMA_CR_COALESCE_SHIFT	16
@@ -412,7 +415,6 @@ struct xilinx_dma_config {
  * @dev: Device Structure
  * @common: DMA device structure
  * @chan: Driver specific DMA channel
- * @has_sg: Specifies whether Scatter-Gather is present or not
  * @mcdma: Specifies whether Multi-Channel is present or not
  * @flush_on_fsync: Flush on frame sync
  * @ext_addr: Indicates 64 bit addressing is supported by dma device
@@ -425,13 +427,13 @@ struct xilinx_dma_config {
  * @rxs_clk: DMA s2mm stream clock
  * @nr_channels: Number of channels DMA device supports
  * @chan_id: DMA channel identifier
+ * @max_buffer_len: Max buffer length
  */
 struct xilinx_dma_device {
 	void __iomem *regs;
 	struct device *dev;
 	struct dma_device common;
 	struct xilinx_dma_chan *chan[XILINX_DMA_MAX_CHANS_PER_DEVICE];
-	bool has_sg;
 	bool mcdma;
 	u32 flush_on_fsync;
 	bool ext_addr;
@@ -444,6 +446,7 @@ struct xilinx_dma_device {
 	struct clk *rxs_clk;
 	u32 nr_channels;
 	u32 chan_id;
+	u32 max_buffer_len;
 };
 
 /* Macros */
@@ -960,6 +963,34 @@ static int xilinx_dma_alloc_chan_resources(struct dma_chan *dchan)
 }
 
 /**
+ * xilinx_dma_calc_copysize - Calculate the amount of data to copy
+ * @chan: Driver specific DMA channel
+ * @size: Total data that needs to be copied
+ * @done: Amount of data that has been already copied
+ *
+ * Return: Amount of data that has to be copied
+ */
+static int xilinx_dma_calc_copysize(struct xilinx_dma_chan *chan,
+				    int size, int done)
+{
+	size_t copy;
+
+	copy = min_t(size_t, size - done,
+		     chan->xdev->max_buffer_len);
+
+	if ((copy + done < size) &&
+	    chan->xdev->common.copy_align) {
+		/*
+		 * If this is not the last descriptor, make sure
+		 * the next one will be properly aligned
+		 */
+		copy = rounddown(copy,
+				 (1 << chan->xdev->common.copy_align));
+	}
+	return copy;
+}
+
+/**
  * xilinx_dma_tx_status - Get DMA transaction status
  * @dchan: DMA channel
  * @cookie: Transaction identifier
@@ -992,7 +1023,7 @@ static enum dma_status xilinx_dma_tx_status(struct dma_chan *dchan,
 			list_for_each_entry(segment, &desc->segments, node) {
 				hw = &segment->hw;
 				residue += (hw->control - hw->status) &
-					   XILINX_DMA_MAX_TRANS_LEN;
+					   chan->xdev->max_buffer_len;
 			}
 		}
 		spin_unlock_irqrestore(&chan->lock, flags);
@@ -1070,7 +1101,8 @@ static void xilinx_vdma_start_transfer(struct xilinx_dma_chan *chan)
 	struct xilinx_vdma_config *config = &chan->config;
 	struct xilinx_dma_tx_descriptor *desc, *tail_desc;
 	u32 reg, j;
-	struct xilinx_vdma_tx_segment *tail_segment;
+	struct xilinx_vdma_tx_segment *segment, *last = NULL;
+	int i = 0;
 
 	/* This function was invoked with lock held */
 	if (chan->err)
@@ -1087,17 +1119,6 @@ static void xilinx_vdma_start_transfer(struct xilinx_dma_chan *chan)
 	tail_desc = list_last_entry(&chan->pending_list,
 				    struct xilinx_dma_tx_descriptor, node);
 
-	tail_segment = list_last_entry(&tail_desc->segments,
-				       struct xilinx_vdma_tx_segment, node);
-
-	/*
-	 * If hardware is idle, then all descriptors on the running lists are
-	 * done, start new transfers
-	 */
-	if (chan->has_sg)
-		dma_ctrl_write(chan, XILINX_DMA_REG_CURDESC,
-				desc->async_tx.phys);
-
 	/* Configure the hardware using info in the config structure */
 	if (chan->has_vflip) {
 		reg = dma_read(chan, XILINX_VDMA_REG_ENABLE_VERTICAL_FLIP);
@@ -1114,15 +1135,11 @@ static void xilinx_vdma_start_transfer(struct xilinx_dma_chan *chan)
 	else
 		reg &= ~XILINX_DMA_DMACR_FRAMECNT_EN;
 
-	/*
-	 * With SG, start with circular mode, so that BDs can be fetched.
-	 * In direct register mode, if not parking, enable circular mode
-	 */
-	if (chan->has_sg || !config->park)
-		reg |= XILINX_DMA_DMACR_CIRC_EN;
-
+	/* If not parking, enable circular mode */
 	if (config->park)
 		reg &= ~XILINX_DMA_DMACR_CIRC_EN;
+	else
+		reg |= XILINX_DMA_DMACR_CIRC_EN;
 
 	dma_ctrl_write(chan, XILINX_DMA_REG_DMACR, reg);
 
@@ -1144,49 +1161,39 @@ static void xilinx_vdma_start_transfer(struct xilinx_dma_chan *chan)
 		return;
 
 	/* Start the transfer */
-	if (chan->has_sg) {
-		dma_ctrl_write(chan, XILINX_DMA_REG_TAILDESC,
-				tail_segment->phys);
-		list_splice_tail_init(&chan->pending_list, &chan->active_list);
-		chan->desc_pendingcount = 0;
-	} else {
-		struct xilinx_vdma_tx_segment *segment, *last = NULL;
-		int i = 0;
+	if (chan->desc_submitcount < chan->num_frms)
+		i = chan->desc_submitcount;
 
-		if (chan->desc_submitcount < chan->num_frms)
-			i = chan->desc_submitcount;
-
-		list_for_each_entry(segment, &desc->segments, node) {
-			if (chan->ext_addr)
-				vdma_desc_write_64(chan,
-					XILINX_VDMA_REG_START_ADDRESS_64(i++),
-					segment->hw.buf_addr,
-					segment->hw.buf_addr_msb);
-			else
-				vdma_desc_write(chan,
+	list_for_each_entry(segment, &desc->segments, node) {
+		if (chan->ext_addr)
+			vdma_desc_write_64(chan,
+				   XILINX_VDMA_REG_START_ADDRESS_64(i++),
+				   segment->hw.buf_addr,
+				   segment->hw.buf_addr_msb);
+		else
+			vdma_desc_write(chan,
 					XILINX_VDMA_REG_START_ADDRESS(i++),
 					segment->hw.buf_addr);
 
-			last = segment;
-		}
-
-		if (!last)
-			return;
-
-		/* HW expects these parameters to be same for one transaction */
-		vdma_desc_write(chan, XILINX_DMA_REG_HSIZE, last->hw.hsize);
-		vdma_desc_write(chan, XILINX_DMA_REG_FRMDLY_STRIDE,
-				last->hw.stride);
-		vdma_desc_write(chan, XILINX_DMA_REG_VSIZE, last->hw.vsize);
-
-		chan->desc_submitcount++;
-		chan->desc_pendingcount--;
-		list_del(&desc->node);
-		list_add_tail(&desc->node, &chan->active_list);
-		if (chan->desc_submitcount == chan->num_frms)
-			chan->desc_submitcount = 0;
+		last = segment;
 	}
 
+	if (!last)
+		return;
+
+	/* HW expects these parameters to be same for one transaction */
+	vdma_desc_write(chan, XILINX_DMA_REG_HSIZE, last->hw.hsize);
+	vdma_desc_write(chan, XILINX_DMA_REG_FRMDLY_STRIDE,
+			last->hw.stride);
+	vdma_desc_write(chan, XILINX_DMA_REG_VSIZE, last->hw.vsize);
+
+	chan->desc_submitcount++;
+	chan->desc_pendingcount--;
+	list_del(&desc->node);
+	list_add_tail(&desc->node, &chan->active_list);
+	if (chan->desc_submitcount == chan->num_frms)
+		chan->desc_submitcount = 0;
+
 	chan->idle = false;
 }
 
@@ -1254,7 +1261,7 @@ static void xilinx_cdma_start_transfer(struct xilinx_dma_chan *chan)
 
 		/* Start the transfer */
 		dma_ctrl_write(chan, XILINX_DMA_REG_BTT,
-				hw->control & XILINX_DMA_MAX_TRANS_LEN);
+				hw->control & chan->xdev->max_buffer_len);
 	}
 
 	list_splice_tail_init(&chan->pending_list, &chan->active_list);
@@ -1357,7 +1364,7 @@ static void xilinx_dma_start_transfer(struct xilinx_dma_chan *chan)
 
 		/* Start the transfer */
 		dma_ctrl_write(chan, XILINX_DMA_REG_BTT,
-			       hw->control & XILINX_DMA_MAX_TRANS_LEN);
+			       hw->control & chan->xdev->max_buffer_len);
 	}
 
 	list_splice_tail_init(&chan->pending_list, &chan->active_list);
@@ -1718,7 +1725,7 @@ xilinx_cdma_prep_memcpy(struct dma_chan *dchan, dma_addr_t dma_dst,
 	struct xilinx_cdma_tx_segment *segment;
 	struct xilinx_cdma_desc_hw *hw;
 
-	if (!len || len > XILINX_DMA_MAX_TRANS_LEN)
+	if (!len || len > chan->xdev->max_buffer_len)
 		return NULL;
 
 	desc = xilinx_dma_alloc_tx_descriptor(chan);
@@ -1808,8 +1815,8 @@ static struct dma_async_tx_descriptor *xilinx_dma_prep_slave_sg(
 			 * Calculate the maximum number of bytes to transfer,
 			 * making sure it is less than the hw limit
 			 */
-			copy = min_t(size_t, sg_dma_len(sg) - sg_used,
-				     XILINX_DMA_MAX_TRANS_LEN);
+			copy = xilinx_dma_calc_copysize(chan, sg_dma_len(sg),
+							sg_used);
 			hw = &segment->hw;
 
 			/* Fill in the descriptor */
@@ -1913,8 +1920,8 @@ static struct dma_async_tx_descriptor *xilinx_dma_prep_dma_cyclic(
 			 * Calculate the maximum number of bytes to transfer,
 			 * making sure it is less than the hw limit
 			 */
-			copy = min_t(size_t, period_len - sg_used,
-				     XILINX_DMA_MAX_TRANS_LEN);
+			copy = xilinx_dma_calc_copysize(chan, period_len,
+							sg_used);
 			hw = &segment->hw;
 			xilinx_axidma_buf(chan, hw, buf_addr, sg_used,
 					  period_len * i);
@@ -2389,7 +2396,6 @@ static int xilinx_dma_chan_probe(struct xilinx_dma_device *xdev,
 
 	chan->dev = xdev->dev;
 	chan->xdev = xdev;
-	chan->has_sg = xdev->has_sg;
 	chan->desc_pendingcount = 0x0;
 	chan->ext_addr = xdev->ext_addr;
 	/* This variable ensures that descriptors are not
@@ -2489,6 +2495,15 @@ static int xilinx_dma_chan_probe(struct xilinx_dma_device *xdev,
 		chan->stop_transfer = xilinx_dma_stop_transfer;
 	}
 
+	/* check if SG is enabled (only for AXIDMA and CDMA) */
+	if (xdev->dma_config->dmatype != XDMA_TYPE_VDMA) {
+		if (dma_ctrl_read(chan, XILINX_DMA_REG_DMASR) &
+		    XILINX_DMA_DMASR_SG_MASK)
+			chan->has_sg = true;
+		dev_dbg(chan->dev, "ch %d: SG %s\n", chan->id,
+			chan->has_sg ? "enabled" : "disabled");
+	}
+
 	/* Initialize the tasklet */
 	tasklet_init(&chan->tasklet, xilinx_dma_do_tasklet,
 			(unsigned long)chan);
@@ -2596,7 +2611,7 @@ static int xilinx_dma_probe(struct platform_device *pdev)
 	struct xilinx_dma_device *xdev;
 	struct device_node *child, *np = pdev->dev.of_node;
 	struct resource *io;
-	u32 num_frames, addr_width;
+	u32 num_frames, addr_width, len_width;
 	int i, err;
 
 	/* Allocate and initialize the DMA engine structure */
@@ -2627,9 +2642,24 @@ static int xilinx_dma_probe(struct platform_device *pdev)
 		return PTR_ERR(xdev->regs);
 
 	/* Retrieve the DMA engine properties from the device tree */
-	xdev->has_sg = of_property_read_bool(node, "xlnx,include-sg");
-	if (xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA)
+	xdev->max_buffer_len = GENMASK(XILINX_DMA_MAX_TRANS_LEN_MAX - 1, 0);
+
+	if (xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) {
 		xdev->mcdma = of_property_read_bool(node, "xlnx,mcdma");
+		if (!of_property_read_u32(node, "xlnx,sg-length-width",
+					  &len_width)) {
+			if (len_width < XILINX_DMA_MAX_TRANS_LEN_MIN ||
+			    len_width > XILINX_DMA_V2_MAX_TRANS_LEN_MAX) {
+				dev_warn(xdev->dev,
+					 "invalid xlnx,sg-length-width property value. Using default width\n");
+			} else {
+				if (len_width > XILINX_DMA_MAX_TRANS_LEN_MAX)
+					dev_warn(xdev->dev, "Please ensure that IP supports buffer length > 23 bits\n");
+				xdev->max_buffer_len =
+					GENMASK(len_width - 1, 0);
+			}
+		}
+	}
 
 	if (xdev->dma_config->dmatype == XDMA_TYPE_VDMA) {
 		err = of_property_read_u32(node, "xlnx,num-fstores",
diff --git a/drivers/tty/serial/8250/8250_lpss.c b/drivers/tty/serial/8250/8250_lpss.c
index 98dbc79..53ca9ba 100644
--- a/drivers/tty/serial/8250/8250_lpss.c
+++ b/drivers/tty/serial/8250/8250_lpss.c
@@ -153,7 +153,6 @@ static int byt_serial_setup(struct lpss8250 *lpss, struct uart_port *port)
 #ifdef CONFIG_SERIAL_8250_DMA
 static const struct dw_dma_platform_data qrk_serial_dma_pdata = {
 	.nr_channels = 2,
-	.is_private = true,
 	.chan_allocation_order = CHAN_ALLOCATION_ASCENDING,
 	.chan_priority = CHAN_PRIORITY_ASCENDING,
 	.block_size = 4095,
diff --git a/include/linux/dma/dw.h b/include/linux/dma/dw.h
index e166cac..9752f374 100644
--- a/include/linux/dma/dw.h
+++ b/include/linux/dma/dw.h
@@ -1,13 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Driver for the Synopsys DesignWare DMA Controller
  *
  * Copyright (C) 2007 Atmel Corporation
  * Copyright (C) 2010-2011 ST Microelectronics
  * Copyright (C) 2014 Intel Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #ifndef _DMA_DW_H
 #define _DMA_DW_H
@@ -45,9 +42,13 @@ struct dw_dma_chip {
 #if IS_ENABLED(CONFIG_DW_DMAC_CORE)
 int dw_dma_probe(struct dw_dma_chip *chip);
 int dw_dma_remove(struct dw_dma_chip *chip);
+int idma32_dma_probe(struct dw_dma_chip *chip);
+int idma32_dma_remove(struct dw_dma_chip *chip);
 #else
 static inline int dw_dma_probe(struct dw_dma_chip *chip) { return -ENODEV; }
 static inline int dw_dma_remove(struct dw_dma_chip *chip) { return 0; }
+static inline int idma32_dma_probe(struct dw_dma_chip *chip) { return -ENODEV; }
+static inline int idma32_dma_remove(struct dw_dma_chip *chip) { return 0; }
 #endif /* CONFIG_DW_DMAC_CORE */
 
 #endif /* _DMA_DW_H */
diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h
index 1a1d58e..f3eaf9e 100644
--- a/include/linux/platform_data/dma-dw.h
+++ b/include/linux/platform_data/dma-dw.h
@@ -1,12 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
 /*
  * Driver for the Synopsys DesignWare DMA Controller
  *
  * Copyright (C) 2007 Atmel Corporation
  * Copyright (C) 2010-2011 ST Microelectronics
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 #ifndef _PLATFORM_DATA_DMA_DW_H
 #define _PLATFORM_DATA_DMA_DW_H
@@ -38,10 +35,6 @@ struct dw_dma_slave {
 /**
  * struct dw_dma_platform_data - Controller configuration parameters
  * @nr_channels: Number of channels supported by hardware (max 8)
- * @is_private: The device channels should be marked as private and not for
- *	by the general purpose DMA channel allocator.
- * @is_memcpy: The device channels do support memory-to-memory transfers.
- * @is_idma32: The type of the DMA controller is iDMA32
  * @chan_allocation_order: Allocate channels starting from 0 or 7
  * @chan_priority: Set channel priority increasing from 0 to 7 or 7 to 0.
  * @block_size: Maximum block size supported by the controller
@@ -53,9 +46,6 @@ struct dw_dma_slave {
  */
 struct dw_dma_platform_data {
 	unsigned int	nr_channels;
-	bool		is_private;
-	bool		is_memcpy;
-	bool		is_idma32;
 #define CHAN_ALLOCATION_ASCENDING	0	/* zero to seven */
 #define CHAN_ALLOCATION_DESCENDING	1	/* seven to zero */
 	unsigned char	chan_allocation_order;
diff --git a/include/linux/platform_data/dma-imx.h b/include/linux/platform_data/dma-imx.h
index 7d964e7..9daea8d 100644
--- a/include/linux/platform_data/dma-imx.h
+++ b/include/linux/platform_data/dma-imx.h
@@ -55,6 +55,7 @@ struct imx_dma_data {
 	int dma_request2; /* secondary DMA request line */
 	enum sdma_peripheral_type peripheral_type;
 	int priority;
+	struct device_node *of_node;
 };
 
 static inline int imx_dma_is_ipu(struct dma_chan *chan)
diff --git a/include/trace/events/tegra_apb_dma.h b/include/trace/events/tegra_apb_dma.h
new file mode 100644
index 0000000..0818f62
--- /dev/null
+++ b/include/trace/events/tegra_apb_dma.h
@@ -0,0 +1,61 @@
+#if !defined(_TRACE_TEGRA_APB_DMA_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_TEGRA_APM_DMA_H
+
+#include <linux/tracepoint.h>
+#include <linux/dmaengine.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM tegra_apb_dma
+
+TRACE_EVENT(tegra_dma_tx_status,
+	TP_PROTO(struct dma_chan *dc, dma_cookie_t cookie, struct dma_tx_state *state),
+	TP_ARGS(dc, cookie, state),
+	TP_STRUCT__entry(
+		__string(chan,	dev_name(&dc->dev->device))
+		__field(dma_cookie_t, cookie)
+		__field(__u32,	residue)
+	),
+	TP_fast_assign(
+		__assign_str(chan, dev_name(&dc->dev->device));
+		__entry->cookie = cookie;
+		__entry->residue = state ? state->residue : (u32)-1;
+	),
+	TP_printk("channel %s: dma cookie %d, residue %u",
+		  __get_str(chan), __entry->cookie, __entry->residue)
+);
+
+TRACE_EVENT(tegra_dma_complete_cb,
+	TP_PROTO(struct dma_chan *dc, int count, void *ptr),
+	TP_ARGS(dc, count, ptr),
+	TP_STRUCT__entry(
+		__string(chan,	dev_name(&dc->dev->device))
+		__field(int,	count)
+		__field(void *,	ptr)
+		),
+	TP_fast_assign(
+		__assign_str(chan, dev_name(&dc->dev->device));
+		__entry->count = count;
+		__entry->ptr = ptr;
+		),
+	TP_printk("channel %s: done %d, ptr %p",
+		  __get_str(chan), __entry->count, __entry->ptr)
+);
+
+TRACE_EVENT(tegra_dma_isr,
+	TP_PROTO(struct dma_chan *dc, int irq),
+	TP_ARGS(dc, irq),
+	TP_STRUCT__entry(
+		__string(chan,	dev_name(&dc->dev->device))
+		__field(int,	irq)
+	),
+	TP_fast_assign(
+		__assign_str(chan, dev_name(&dc->dev->device));
+		__entry->irq = irq;
+	),
+	TP_printk("%s: irq %d\n",  __get_str(chan), __entry->irq)
+);
+
+#endif /*  _TRACE_TEGRADMA_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>