k3dma: Fix occasional DMA ERR issue by using proper dma api

After lots of debugging on an occasional DMA ERR issue, I realized
that the desc structures which we point the dma hardware are being
allocated out of regular memory. This means when we fill the desc
structures, that data doesn't always get flushed out to memory by
the time we start the dma transfer, resulting in the dma engine getting
some null values, resulting in a DMA ERR on the first irq.

Thus, this patch adopts mechanism similar to the zx296702_dma of
allocating the desc structures from a dma pool, so the memory caching
rules are properly set to avoid this issue.

Cc: Zhangfei Gao <zhangfei.gao@linaro.org>
Cc: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Cc: Maxime Ripard <maxime.ripard@free-electrons.com>
Cc: Vinod Koul <vinod.koul@intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Mark Brown <broonie@kernel.org>
Cc: Andy Green <andy@warmcat.com>
Acked-by: Zhangfei Gao <zhangfei.gao@linaro.org>
Signed-off-by: John Stutlz <john.stultz@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
diff --git a/drivers/dma/k3dma.c b/drivers/dma/k3dma.c
index f46b9b86..9d96c95 100644
--- a/drivers/dma/k3dma.c
+++ b/drivers/dma/k3dma.c
@@ -8,6 +8,8 @@
  */
 #include <linux/sched.h>
 #include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
 #include <linux/dmaengine.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
@@ -25,6 +27,7 @@
 
 #define DRIVER_NAME		"k3-dma"
 #define DMA_MAX_SIZE		0x1ffc
+#define LLI_BLOCK_SIZE		(4 * PAGE_SIZE)
 
 #define INT_STAT		0x00
 #define INT_TC1			0x04
@@ -68,7 +71,7 @@
 	dma_addr_t		desc_hw_lli;
 	size_t			desc_num;
 	size_t			size;
-	struct k3_desc_hw	desc_hw[0];
+	struct k3_desc_hw	*desc_hw;
 };
 
 struct k3_dma_phy;
@@ -100,6 +103,7 @@
 	struct k3_dma_phy	*phy;
 	struct k3_dma_chan	*chans;
 	struct clk		*clk;
+	struct dma_pool		*pool;
 	u32			dma_channels;
 	u32			dma_requests;
 	unsigned int		irq;
@@ -414,6 +418,35 @@
 	ds->desc_hw[num].config = ccfg;
 }
 
+static struct k3_dma_desc_sw *k3_dma_alloc_desc_resource(int num,
+							struct dma_chan *chan)
+{
+	struct k3_dma_chan *c = to_k3_chan(chan);
+	struct k3_dma_desc_sw *ds;
+	struct k3_dma_dev *d = to_k3_dma(chan->device);
+	int lli_limit = LLI_BLOCK_SIZE / sizeof(struct k3_desc_hw);
+
+	if (num > lli_limit) {
+		dev_dbg(chan->device->dev, "vch %p: sg num %d exceed max %d\n",
+			&c->vc, num, lli_limit);
+		return NULL;
+	}
+
+	ds = kzalloc(sizeof(*ds), GFP_NOWAIT);
+	if (!ds)
+		return NULL;
+
+	ds->desc_hw = dma_pool_alloc(d->pool, GFP_NOWAIT, &ds->desc_hw_lli);
+	if (!ds->desc_hw) {
+		dev_dbg(chan->device->dev, "vch %p: dma alloc fail\n", &c->vc);
+		kfree(ds);
+		return NULL;
+	}
+	memset(ds->desc_hw, 0, sizeof(struct k3_desc_hw) * num);
+	ds->desc_num = num;
+	return ds;
+}
+
 static struct dma_async_tx_descriptor *k3_dma_prep_memcpy(
 	struct dma_chan *chan,	dma_addr_t dst, dma_addr_t src,
 	size_t len, unsigned long flags)
@@ -427,13 +460,12 @@
 		return NULL;
 
 	num = DIV_ROUND_UP(len, DMA_MAX_SIZE);
-	ds = kzalloc(sizeof(*ds) + num * sizeof(ds->desc_hw[0]), GFP_ATOMIC);
+
+	ds = k3_dma_alloc_desc_resource(num, chan);
 	if (!ds)
 		return NULL;
 
-	ds->desc_hw_lli = __virt_to_phys((unsigned long)&ds->desc_hw[0]);
 	ds->size = len;
-	ds->desc_num = num;
 	num = 0;
 
 	if (!c->ccfg) {
@@ -482,12 +514,9 @@
 			num += DIV_ROUND_UP(avail, DMA_MAX_SIZE) - 1;
 	}
 
-	ds = kzalloc(sizeof(*ds) + num * sizeof(ds->desc_hw[0]), GFP_ATOMIC);
+	ds = k3_dma_alloc_desc_resource(num, chan);
 	if (!ds)
 		return NULL;
-
-	ds->desc_hw_lli = __virt_to_phys((unsigned long)&ds->desc_hw[0]);
-	ds->desc_num = num;
 	num = 0;
 
 	for_each_sg(sgl, sg, sglen, i) {
@@ -645,7 +674,9 @@
 {
 	struct k3_dma_desc_sw *ds =
 		container_of(vd, struct k3_dma_desc_sw, vd);
+	struct k3_dma_dev *d = to_k3_dma(vd->tx.chan->device);
 
+	dma_pool_free(d->pool, ds->desc_hw, ds->desc_hw_lli);
 	kfree(ds);
 }
 
@@ -708,6 +739,12 @@
 
 	d->irq = irq;
 
+	/* A DMA memory pool for LLIs, align on 32-byte boundary */
+	d->pool = dmam_pool_create(DRIVER_NAME, &op->dev,
+					LLI_BLOCK_SIZE, 32, 0);
+	if (!d->pool)
+		return -ENOMEM;
+
 	/* init phy channel */
 	d->phy = devm_kzalloc(&op->dev,
 		d->dma_channels * sizeof(struct k3_dma_phy), GFP_KERNEL);