Merge tag 'dmaengine-fix-5.10-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/vkoul/dmaengine

Pull dmaengine fixes from Vinod Koul:
 "A solitary core fix and a few driver fixes:

  Core:

   - channel_register error handling

  Driver fixes:

   - idxd: wq config registers programming and mapping of portal size

   - ioatdma: unused fn removal

   - pl330: fix burst size

   - ti: pm fix on busy and -Wenum-conversion warns

   - xilinx: SG capability check, usage of xilinx_aximcdma_tx_segment,
     readl_poll_timeout_atomic variant"

* tag 'dmaengine-fix-5.10-rc5' of git://git.kernel.org/pub/scm/linux/kernel/git/vkoul/dmaengine:
  dmaengine: fix error codes in channel_register()
  dmaengine: pl330: _prep_dma_memcpy: Fix wrong burst size
  dmaengine: ioatdma: remove unused function missed during dma_v2 removal
  dmaengine: idxd: fix mapping of portal size
  dmaengine: ti: omap-dma: Block PM if SDMA is busy to fix audio
  dmaengine: xilinx_dma: Fix SG capability check for MCDMA
  dmaengine: xilinx_dma: Fix usage of xilinx_aximcdma_tx_segment
  dmaengine: xilinx_dma: use readl_poll_timeout_atomic variant
  dmaengine: ti: k3-udma: fix -Wenum-conversion warning
  dmaengine: idxd: fix wq config registers offset programming
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 7974fa0..962cbb5 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -1039,16 +1039,15 @@
 static int __dma_async_device_channel_register(struct dma_device *device,
 					       struct dma_chan *chan)
 {
-	int rc = 0;
+	int rc;
 
 	chan->local = alloc_percpu(typeof(*chan->local));
 	if (!chan->local)
-		goto err_out;
+		return -ENOMEM;
 	chan->dev = kzalloc(sizeof(*chan->dev), GFP_KERNEL);
 	if (!chan->dev) {
-		free_percpu(chan->local);
-		chan->local = NULL;
-		goto err_out;
+		rc = -ENOMEM;
+		goto err_free_local;
 	}
 
 	/*
@@ -1061,7 +1060,8 @@
 	if (chan->chan_id < 0) {
 		pr_err("%s: unable to alloc ida for chan: %d\n",
 		       __func__, chan->chan_id);
-		goto err_out;
+		rc = chan->chan_id;
+		goto err_free_dev;
 	}
 
 	chan->dev->device.class = &dma_devclass;
@@ -1082,9 +1082,10 @@
 	mutex_lock(&device->chan_mutex);
 	ida_free(&device->chan_ida, chan->chan_id);
 	mutex_unlock(&device->chan_mutex);
- err_out:
-	free_percpu(chan->local);
+ err_free_dev:
 	kfree(chan->dev);
+ err_free_local:
+	free_percpu(chan->local);
 	return rc;
 }
 
diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c
index 200b910..6633449 100644
--- a/drivers/dma/idxd/device.c
+++ b/drivers/dma/idxd/device.c
@@ -271,7 +271,7 @@
 	resource_size_t start;
 
 	start = pci_resource_start(pdev, IDXD_WQ_BAR);
-	start = start + wq->id * IDXD_PORTAL_SIZE;
+	start += idxd_get_wq_portal_full_offset(wq->id, IDXD_PORTAL_LIMITED);
 
 	wq->dportal = devm_ioremap(dev, start, IDXD_PORTAL_SIZE);
 	if (!wq->dportal)
@@ -295,7 +295,7 @@
 	int i, wq_offset;
 
 	lockdep_assert_held(&idxd->dev_lock);
-	memset(&wq->wqcfg, 0, sizeof(wq->wqcfg));
+	memset(wq->wqcfg, 0, idxd->wqcfg_size);
 	wq->type = IDXD_WQT_NONE;
 	wq->size = 0;
 	wq->group = NULL;
@@ -304,8 +304,8 @@
 	clear_bit(WQ_FLAG_DEDICATED, &wq->flags);
 	memset(wq->name, 0, WQ_NAME_SIZE);
 
-	for (i = 0; i < 8; i++) {
-		wq_offset = idxd->wqcfg_offset + wq->id * 32 + i * sizeof(u32);
+	for (i = 0; i < WQCFG_STRIDES(idxd); i++) {
+		wq_offset = WQCFG_OFFSET(idxd, wq->id, i);
 		iowrite32(0, idxd->reg_base + wq_offset);
 		dev_dbg(dev, "WQ[%d][%d][%#x]: %#x\n",
 			wq->id, i, wq_offset,
@@ -539,10 +539,10 @@
 	if (!wq->group)
 		return 0;
 
-	memset(&wq->wqcfg, 0, sizeof(union wqcfg));
+	memset(wq->wqcfg, 0, idxd->wqcfg_size);
 
 	/* byte 0-3 */
-	wq->wqcfg.wq_size = wq->size;
+	wq->wqcfg->wq_size = wq->size;
 
 	if (wq->size == 0) {
 		dev_warn(dev, "Incorrect work queue size: 0\n");
@@ -550,22 +550,21 @@
 	}
 
 	/* bytes 4-7 */
-	wq->wqcfg.wq_thresh = wq->threshold;
+	wq->wqcfg->wq_thresh = wq->threshold;
 
 	/* byte 8-11 */
-	wq->wqcfg.priv = !!(wq->type == IDXD_WQT_KERNEL);
-	wq->wqcfg.mode = 1;
-
-	wq->wqcfg.priority = wq->priority;
+	wq->wqcfg->priv = !!(wq->type == IDXD_WQT_KERNEL);
+	wq->wqcfg->mode = 1;
+	wq->wqcfg->priority = wq->priority;
 
 	/* bytes 12-15 */
-	wq->wqcfg.max_xfer_shift = ilog2(wq->max_xfer_bytes);
-	wq->wqcfg.max_batch_shift = ilog2(wq->max_batch_size);
+	wq->wqcfg->max_xfer_shift = ilog2(wq->max_xfer_bytes);
+	wq->wqcfg->max_batch_shift = ilog2(wq->max_batch_size);
 
 	dev_dbg(dev, "WQ %d CFGs\n", wq->id);
-	for (i = 0; i < 8; i++) {
-		wq_offset = idxd->wqcfg_offset + wq->id * 32 + i * sizeof(u32);
-		iowrite32(wq->wqcfg.bits[i], idxd->reg_base + wq_offset);
+	for (i = 0; i < WQCFG_STRIDES(idxd); i++) {
+		wq_offset = WQCFG_OFFSET(idxd, wq->id, i);
+		iowrite32(wq->wqcfg->bits[i], idxd->reg_base + wq_offset);
 		dev_dbg(dev, "WQ[%d][%d][%#x]: %#x\n",
 			wq->id, i, wq_offset,
 			ioread32(idxd->reg_base + wq_offset));
diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h
index c64df19..d48f193 100644
--- a/drivers/dma/idxd/idxd.h
+++ b/drivers/dma/idxd/idxd.h
@@ -103,7 +103,7 @@
 	u32 priority;
 	enum idxd_wq_state state;
 	unsigned long flags;
-	union wqcfg wqcfg;
+	union wqcfg *wqcfg;
 	u32 vec_ptr;		/* interrupt steering */
 	struct dsa_hw_desc **hw_descs;
 	int num_descs;
@@ -183,6 +183,7 @@
 	int max_wq_size;
 	int token_limit;
 	int nr_tokens;		/* non-reserved tokens */
+	unsigned int wqcfg_size;
 
 	union sw_err_reg sw_err;
 	wait_queue_head_t cmd_waitq;
diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
index 11e5ce1..0a4432b 100644
--- a/drivers/dma/idxd/init.c
+++ b/drivers/dma/idxd/init.c
@@ -178,6 +178,9 @@
 		wq->idxd_cdev.minor = -1;
 		wq->max_xfer_bytes = idxd->max_xfer_bytes;
 		wq->max_batch_size = idxd->max_batch_size;
+		wq->wqcfg = devm_kzalloc(dev, idxd->wqcfg_size, GFP_KERNEL);
+		if (!wq->wqcfg)
+			return -ENOMEM;
 	}
 
 	for (i = 0; i < idxd->max_engines; i++) {
@@ -251,6 +254,8 @@
 	dev_dbg(dev, "total workqueue size: %u\n", idxd->max_wq_size);
 	idxd->max_wqs = idxd->hw.wq_cap.num_wqs;
 	dev_dbg(dev, "max workqueues: %u\n", idxd->max_wqs);
+	idxd->wqcfg_size = 1 << (idxd->hw.wq_cap.wqcfg_size + IDXD_WQCFG_MIN);
+	dev_dbg(dev, "wqcfg size: %u\n", idxd->wqcfg_size);
 
 	/* reading operation capabilities */
 	for (i = 0; i < 4; i++) {
diff --git a/drivers/dma/idxd/registers.h b/drivers/dma/idxd/registers.h
index a39e7ae..5439033 100644
--- a/drivers/dma/idxd/registers.h
+++ b/drivers/dma/idxd/registers.h
@@ -8,7 +8,7 @@
 
 #define IDXD_MMIO_BAR		0
 #define IDXD_WQ_BAR		2
-#define IDXD_PORTAL_SIZE	0x4000
+#define IDXD_PORTAL_SIZE	PAGE_SIZE
 
 /* MMIO Device BAR0 Registers */
 #define IDXD_VER_OFFSET			0x00
@@ -43,7 +43,8 @@
 	struct {
 		u64 total_wq_size:16;
 		u64 num_wqs:8;
-		u64 rsvd:24;
+		u64 wqcfg_size:4;
+		u64 rsvd:20;
 		u64 shared_mode:1;
 		u64 dedicated_mode:1;
 		u64 rsvd2:1;
@@ -55,6 +56,7 @@
 	u64 bits;
 } __packed;
 #define IDXD_WQCAP_OFFSET		0x20
+#define IDXD_WQCFG_MIN			5
 
 union group_cap_reg {
 	struct {
@@ -333,4 +335,23 @@
 	};
 	u32 bits[8];
 } __packed;
+
+/*
+ * This macro calculates the offset into the WQCFG register
+ * idxd - struct idxd *
+ * n - wq id
+ * ofs - the index of the 32b dword for the config register
+ *
+ * The WQCFG register block is divided into groups per each wq. The n index
+ * allows us to move to the register group that's for that particular wq.
+ * Each register is 32bits. The ofs gives us the number of register to access.
+ */
+#define WQCFG_OFFSET(_idxd_dev, n, ofs) \
+({\
+	typeof(_idxd_dev) __idxd_dev = (_idxd_dev);	\
+	(__idxd_dev)->wqcfg_offset + (n) * (__idxd_dev)->wqcfg_size + sizeof(u32) * (ofs);	\
+})
+
+#define WQCFG_STRIDES(_idxd_dev) ((_idxd_dev)->wqcfg_size / sizeof(u32))
+
 #endif
diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c
index 156a1ee..417048e3 100644
--- a/drivers/dma/idxd/submit.c
+++ b/drivers/dma/idxd/submit.c
@@ -74,7 +74,7 @@
 	if (idxd->state != IDXD_DEV_ENABLED)
 		return -EIO;
 
-	portal = wq->dportal + idxd_get_wq_portal_offset(IDXD_PORTAL_UNLIMITED);
+	portal = wq->dportal;
 	/*
 	 * The wmb() flushes writes to coherent DMA data before possibly
 	 * triggering a DMA read. The wmb() is necessary even on UP because
diff --git a/drivers/dma/ioat/dca.c b/drivers/dma/ioat/dca.c
index 0be3855..289c59ed7 100644
--- a/drivers/dma/ioat/dca.c
+++ b/drivers/dma/ioat/dca.c
@@ -40,16 +40,6 @@
 #define DCA2_TAG_MAP_BYTE3 0x82
 #define DCA2_TAG_MAP_BYTE4 0x82
 
-/* verify if tag map matches expected values */
-static inline int dca2_tag_map_valid(u8 *tag_map)
-{
-	return ((tag_map[0] == DCA2_TAG_MAP_BYTE0) &&
-		(tag_map[1] == DCA2_TAG_MAP_BYTE1) &&
-		(tag_map[2] == DCA2_TAG_MAP_BYTE2) &&
-		(tag_map[3] == DCA2_TAG_MAP_BYTE3) &&
-		(tag_map[4] == DCA2_TAG_MAP_BYTE4));
-}
-
 /*
  * "Legacy" DCA systems do not implement the DCA register set in the
  * I/OAT device.  Software needs direct support for their tag mappings.
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index e9f0101..0f5c193 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -2799,7 +2799,7 @@
 	 * If burst size is smaller than bus width then make sure we only
 	 * transfer one at a time to avoid a burst stradling an MFIFO entry.
 	 */
-	if (desc->rqcfg.brst_size * 8 < pl330->pcfg.data_bus_width)
+	if (burst * 8 < pl330->pcfg.data_bus_width)
 		desc->rqcfg.brst_len = 1;
 
 	desc->bytes_requested = len;
diff --git a/drivers/dma/ti/k3-udma-private.c b/drivers/dma/ti/k3-udma-private.c
index aa24e55..8563a39 100644
--- a/drivers/dma/ti/k3-udma-private.c
+++ b/drivers/dma/ti/k3-udma-private.c
@@ -83,7 +83,7 @@
 #define XUDMA_GET_PUT_RESOURCE(res)					\
 struct udma_##res *xudma_##res##_get(struct udma_dev *ud, int id)	\
 {									\
-	return __udma_reserve_##res(ud, false, id);			\
+	return __udma_reserve_##res(ud, UDMA_TP_NORMAL, id);		\
 }									\
 EXPORT_SYMBOL(xudma_##res##_get);					\
 									\
diff --git a/drivers/dma/ti/omap-dma.c b/drivers/dma/ti/omap-dma.c
index c9fe5e3..268a080 100644
--- a/drivers/dma/ti/omap-dma.c
+++ b/drivers/dma/ti/omap-dma.c
@@ -1522,29 +1522,38 @@
 	}
 }
 
+/* Currently used by omap2 & 3 to block deeper SoC idle states */
+static bool omap_dma_busy(struct omap_dmadev *od)
+{
+	struct omap_chan *c;
+	int lch = -1;
+
+	while (1) {
+		lch = find_next_bit(od->lch_bitmap, od->lch_count, lch + 1);
+		if (lch >= od->lch_count)
+			break;
+		c = od->lch_map[lch];
+		if (!c)
+			continue;
+		if (omap_dma_chan_read(c, CCR) & CCR_ENABLE)
+			return true;
+	}
+
+	return false;
+}
+
 /* Currently only used for omap2. For omap1, also a check for lcd_dma is needed */
 static int omap_dma_busy_notifier(struct notifier_block *nb,
 				  unsigned long cmd, void *v)
 {
 	struct omap_dmadev *od;
-	struct omap_chan *c;
-	int lch = -1;
 
 	od = container_of(nb, struct omap_dmadev, nb);
 
 	switch (cmd) {
 	case CPU_CLUSTER_PM_ENTER:
-		while (1) {
-			lch = find_next_bit(od->lch_bitmap, od->lch_count,
-					    lch + 1);
-			if (lch >= od->lch_count)
-				break;
-			c = od->lch_map[lch];
-			if (!c)
-				continue;
-			if (omap_dma_chan_read(c, CCR) & CCR_ENABLE)
-				return NOTIFY_BAD;
-		}
+		if (omap_dma_busy(od))
+			return NOTIFY_BAD;
 		break;
 	case CPU_CLUSTER_PM_ENTER_FAILED:
 	case CPU_CLUSTER_PM_EXIT:
@@ -1595,6 +1604,8 @@
 
 	switch (cmd) {
 	case CPU_CLUSTER_PM_ENTER:
+		if (omap_dma_busy(od))
+			return NOTIFY_BAD;
 		omap_dma_context_save(od);
 		break;
 	case CPU_CLUSTER_PM_ENTER_FAILED:
diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c
index ecff354..22faea6 100644
--- a/drivers/dma/xilinx/xilinx_dma.c
+++ b/drivers/dma/xilinx/xilinx_dma.c
@@ -517,8 +517,8 @@
 #define to_dma_tx_descriptor(tx) \
 	container_of(tx, struct xilinx_dma_tx_descriptor, async_tx)
 #define xilinx_dma_poll_timeout(chan, reg, val, cond, delay_us, timeout_us) \
-	readl_poll_timeout(chan->xdev->regs + chan->ctrl_offset + reg, val, \
-			   cond, delay_us, timeout_us)
+	readl_poll_timeout_atomic(chan->xdev->regs + chan->ctrl_offset + reg, \
+				  val, cond, delay_us, timeout_us)
 
 /* IO accessors */
 static inline u32 dma_read(struct xilinx_dma_chan *chan, u32 reg)
@@ -948,8 +948,10 @@
 {
 	struct xilinx_cdma_tx_segment *cdma_seg;
 	struct xilinx_axidma_tx_segment *axidma_seg;
+	struct xilinx_aximcdma_tx_segment *aximcdma_seg;
 	struct xilinx_cdma_desc_hw *cdma_hw;
 	struct xilinx_axidma_desc_hw *axidma_hw;
+	struct xilinx_aximcdma_desc_hw *aximcdma_hw;
 	struct list_head *entry;
 	u32 residue = 0;
 
@@ -961,13 +963,23 @@
 			cdma_hw = &cdma_seg->hw;
 			residue += (cdma_hw->control - cdma_hw->status) &
 				   chan->xdev->max_buffer_len;
-		} else {
+		} else if (chan->xdev->dma_config->dmatype ==
+			   XDMA_TYPE_AXIDMA) {
 			axidma_seg = list_entry(entry,
 						struct xilinx_axidma_tx_segment,
 						node);
 			axidma_hw = &axidma_seg->hw;
 			residue += (axidma_hw->control - axidma_hw->status) &
 				   chan->xdev->max_buffer_len;
+		} else {
+			aximcdma_seg =
+				list_entry(entry,
+					   struct xilinx_aximcdma_tx_segment,
+					   node);
+			aximcdma_hw = &aximcdma_seg->hw;
+			residue +=
+				(aximcdma_hw->control - aximcdma_hw->status) &
+				chan->xdev->max_buffer_len;
 		}
 	}
 
@@ -1135,7 +1147,7 @@
 			upper_32_bits(chan->seg_p + sizeof(*chan->seg_mv) *
 				((i + 1) % XILINX_DMA_NUM_DESCS));
 			chan->seg_mv[i].phys = chan->seg_p +
-				sizeof(*chan->seg_v) * i;
+				sizeof(*chan->seg_mv) * i;
 			list_add_tail(&chan->seg_mv[i].node,
 				      &chan->free_seg_list);
 		}
@@ -1560,7 +1572,7 @@
 static void xilinx_mcdma_start_transfer(struct xilinx_dma_chan *chan)
 {
 	struct xilinx_dma_tx_descriptor *head_desc, *tail_desc;
-	struct xilinx_axidma_tx_segment *tail_segment;
+	struct xilinx_aximcdma_tx_segment *tail_segment;
 	u32 reg;
 
 	/*
@@ -1582,7 +1594,7 @@
 	tail_desc = list_last_entry(&chan->pending_list,
 				    struct xilinx_dma_tx_descriptor, node);
 	tail_segment = list_last_entry(&tail_desc->segments,
-				       struct xilinx_axidma_tx_segment, node);
+				       struct xilinx_aximcdma_tx_segment, node);
 
 	reg = dma_ctrl_read(chan, XILINX_MCDMA_CHAN_CR_OFFSET(chan->tdest));
 
@@ -1864,6 +1876,7 @@
 	struct xilinx_vdma_tx_segment *tail_segment;
 	struct xilinx_dma_tx_descriptor *tail_desc;
 	struct xilinx_axidma_tx_segment *axidma_tail_segment;
+	struct xilinx_aximcdma_tx_segment *aximcdma_tail_segment;
 	struct xilinx_cdma_tx_segment *cdma_tail_segment;
 
 	if (list_empty(&chan->pending_list))
@@ -1885,11 +1898,17 @@
 						struct xilinx_cdma_tx_segment,
 						node);
 		cdma_tail_segment->hw.next_desc = (u32)desc->async_tx.phys;
-	} else {
+	} else if (chan->xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) {
 		axidma_tail_segment = list_last_entry(&tail_desc->segments,
 					       struct xilinx_axidma_tx_segment,
 					       node);
 		axidma_tail_segment->hw.next_desc = (u32)desc->async_tx.phys;
+	} else {
+		aximcdma_tail_segment =
+			list_last_entry(&tail_desc->segments,
+					struct xilinx_aximcdma_tx_segment,
+					node);
+		aximcdma_tail_segment->hw.next_desc = (u32)desc->async_tx.phys;
 	}
 
 	/*
@@ -2836,10 +2855,11 @@
 		chan->stop_transfer = xilinx_dma_stop_transfer;
 	}
 
-	/* check if SG is enabled (only for AXIDMA and CDMA) */
+	/* check if SG is enabled (only for AXIDMA, AXIMCDMA, and CDMA) */
 	if (xdev->dma_config->dmatype != XDMA_TYPE_VDMA) {
-		if (dma_ctrl_read(chan, XILINX_DMA_REG_DMASR) &
-		    XILINX_DMA_DMASR_SG_MASK)
+		if (xdev->dma_config->dmatype == XDMA_TYPE_AXIMCDMA ||
+		    dma_ctrl_read(chan, XILINX_DMA_REG_DMASR) &
+			    XILINX_DMA_DMASR_SG_MASK)
 			chan->has_sg = true;
 		dev_dbg(chan->dev, "ch %d: SG %s\n", chan->id,
 			chan->has_sg ? "enabled" : "disabled");