USB: EHCI support for big-endian descriptors

This patch implements supports for EHCI controllers whose in-memory
data structures are represented in big-endian format. This is needed
(unfortunately) for the AMCC PPC440EPx SoC EHCI controller; the EHCI
spec doesn't specify little-endian format, although that's what most
other implementations use.

The guts of the patch are to introduce the hc32 type and change all
references from le32 to hc32.  All access routines are converted from
cpu_to_le32(...) to cpu_to_hc32(ehci, ...) and similar for the other
"direction".  (This is the same approach used with OHCI.)

David fixed:
	Whitespace fixes; refresh against ehci cpufreq patch; move glue
	for that PPC driver to the patch adding it; fix free symbol
	capture bugs in modified "constant" macros; and make "hc32" etc
	be "le32" unless we really need the BE options, so "sparse" can
	do some real good.

Signed-off-by: Stefan Roese <sr@denx.de>
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>


diff --git a/drivers/usb/host/ehci-sched.c b/drivers/usb/host/ehci-sched.c
index 500aebb..d4a8ace 100644
--- a/drivers/usb/host/ehci-sched.c
+++ b/drivers/usb/host/ehci-sched.c
@@ -44,9 +44,10 @@
  * @tag: hardware tag for type of this record
  */
 static union ehci_shadow *
-periodic_next_shadow (union ehci_shadow *periodic, __le32 tag)
+periodic_next_shadow(struct ehci_hcd *ehci, union ehci_shadow *periodic,
+		__hc32 tag)
 {
-	switch (tag) {
+	switch (hc32_to_cpu(ehci, tag)) {
 	case Q_TYPE_QH:
 		return &periodic->qh->qh_next;
 	case Q_TYPE_FSTN:
@@ -62,13 +63,14 @@
 /* caller must hold ehci->lock */
 static void periodic_unlink (struct ehci_hcd *ehci, unsigned frame, void *ptr)
 {
-	union ehci_shadow	*prev_p = &ehci->pshadow [frame];
-	__le32			*hw_p = &ehci->periodic [frame];
+	union ehci_shadow	*prev_p = &ehci->pshadow[frame];
+	__hc32			*hw_p = &ehci->periodic[frame];
 	union ehci_shadow	here = *prev_p;
 
 	/* find predecessor of "ptr"; hw and shadow lists are in sync */
 	while (here.ptr && here.ptr != ptr) {
-		prev_p = periodic_next_shadow (prev_p, Q_NEXT_TYPE (*hw_p));
+		prev_p = periodic_next_shadow(ehci, prev_p,
+				Q_NEXT_TYPE(ehci, *hw_p));
 		hw_p = here.hw_next;
 		here = *prev_p;
 	}
@@ -79,7 +81,8 @@
 	/* update shadow and hardware lists ... the old "next" pointers
 	 * from ptr may still be in use, the caller updates them.
 	 */
-	*prev_p = *periodic_next_shadow (&here, Q_NEXT_TYPE (*hw_p));
+	*prev_p = *periodic_next_shadow(ehci, &here,
+			Q_NEXT_TYPE(ehci, *hw_p));
 	*hw_p = *here.hw_next;
 }
 
@@ -87,18 +90,19 @@
 static unsigned short
 periodic_usecs (struct ehci_hcd *ehci, unsigned frame, unsigned uframe)
 {
-	__le32			*hw_p = &ehci->periodic [frame];
+	__hc32			*hw_p = &ehci->periodic [frame];
 	union ehci_shadow	*q = &ehci->pshadow [frame];
 	unsigned		usecs = 0;
 
 	while (q->ptr) {
-		switch (Q_NEXT_TYPE (*hw_p)) {
+		switch (hc32_to_cpu(ehci, Q_NEXT_TYPE(ehci, *hw_p))) {
 		case Q_TYPE_QH:
 			/* is it in the S-mask? */
-			if (q->qh->hw_info2 & cpu_to_le32 (1 << uframe))
+			if (q->qh->hw_info2 & cpu_to_hc32(ehci, 1 << uframe))
 				usecs += q->qh->usecs;
 			/* ... or C-mask? */
-			if (q->qh->hw_info2 & cpu_to_le32 (1 << (8 + uframe)))
+			if (q->qh->hw_info2 & cpu_to_hc32(ehci,
+					1 << (8 + uframe)))
 				usecs += q->qh->c_usecs;
 			hw_p = &q->qh->hw_next;
 			q = &q->qh->qh_next;
@@ -108,7 +112,7 @@
 			/* for "save place" FSTNs, count the relevant INTR
 			 * bandwidth from the previous frame
 			 */
-			if (q->fstn->hw_prev != EHCI_LIST_END) {
+			if (q->fstn->hw_prev != EHCI_LIST_END(ehci)) {
 				ehci_dbg (ehci, "ignoring FSTN cost ...\n");
 			}
 			hw_p = &q->fstn->hw_next;
@@ -121,9 +125,10 @@
 			break;
 		case Q_TYPE_SITD:
 			/* is it in the S-mask?  (count SPLIT, DATA) */
-			if (q->sitd->hw_uframe & cpu_to_le32 (1 << uframe)) {
+			if (q->sitd->hw_uframe & cpu_to_hc32(ehci,
+					1 << uframe)) {
 				if (q->sitd->hw_fullspeed_ep &
-						__constant_cpu_to_le32 (1<<31))
+						cpu_to_hc32(ehci, 1<<31))
 					usecs += q->sitd->stream->usecs;
 				else	/* worst case for OUT start-split */
 					usecs += HS_USECS_ISO (188);
@@ -131,7 +136,7 @@
 
 			/* ... C-mask?  (count CSPLIT, DATA) */
 			if (q->sitd->hw_uframe &
-					cpu_to_le32 (1 << (8 + uframe))) {
+					cpu_to_hc32(ehci, 1 << (8 + uframe))) {
 				/* worst case for IN complete-split */
 				usecs += q->sitd->stream->c_usecs;
 			}
@@ -173,9 +178,9 @@
  * will cause a transfer in "B-frame" uframe 0.  "B-frames" lag
  * "H-frames" by 1 uframe.  See the EHCI spec sec 4.5 and figure 4.7.
  */
-static inline unsigned char tt_start_uframe(struct ehci_hcd *ehci, __le32 mask)
+static inline unsigned char tt_start_uframe(struct ehci_hcd *ehci, __hc32 mask)
 {
-	unsigned char smask = QH_SMASK & le32_to_cpu(mask);
+	unsigned char smask = QH_SMASK & hc32_to_cpu(ehci, mask);
 	if (!smask) {
 		ehci_err(ehci, "invalid empty smask!\n");
 		/* uframe 7 can't have bw so this will indicate failure */
@@ -217,14 +222,14 @@
 	unsigned short tt_usecs[8]
 )
 {
-	__le32			*hw_p = &ehci->periodic [frame];
+	__hc32			*hw_p = &ehci->periodic [frame];
 	union ehci_shadow	*q = &ehci->pshadow [frame];
 	unsigned char		uf;
 
 	memset(tt_usecs, 0, 16);
 
 	while (q->ptr) {
-		switch (Q_NEXT_TYPE(*hw_p)) {
+		switch (hc32_to_cpu(ehci, Q_NEXT_TYPE(ehci, *hw_p))) {
 		case Q_TYPE_ITD:
 			hw_p = &q->itd->hw_next;
 			q = &q->itd->itd_next;
@@ -247,8 +252,8 @@
 			continue;
 		// case Q_TYPE_FSTN:
 		default:
-			ehci_dbg(ehci,
-				  "ignoring periodic frame %d FSTN\n", frame);
+			ehci_dbg(ehci, "ignoring periodic frame %d FSTN\n",
+					frame);
 			hw_p = &q->fstn->hw_next;
 			q = &q->fstn->fstn_next;
 		}
@@ -368,41 +373,42 @@
 	 */
 	for (; frame < ehci->periodic_size; frame += period) {
 		union ehci_shadow	here;
-		__le32			type;
+		__hc32			type;
 
 		here = ehci->pshadow [frame];
-		type = Q_NEXT_TYPE (ehci->periodic [frame]);
+		type = Q_NEXT_TYPE(ehci, ehci->periodic [frame]);
 		while (here.ptr) {
-			switch (type) {
+			switch (hc32_to_cpu(ehci, type)) {
 			case Q_TYPE_ITD:
-				type = Q_NEXT_TYPE (here.itd->hw_next);
+				type = Q_NEXT_TYPE(ehci, here.itd->hw_next);
 				here = here.itd->itd_next;
 				continue;
 			case Q_TYPE_QH:
 				if (same_tt (dev, here.qh->dev)) {
 					u32		mask;
 
-					mask = le32_to_cpu (here.qh->hw_info2);
+					mask = hc32_to_cpu(ehci,
+							here.qh->hw_info2);
 					/* "knows" no gap is needed */
 					mask |= mask >> 8;
 					if (mask & uf_mask)
 						break;
 				}
-				type = Q_NEXT_TYPE (here.qh->hw_next);
+				type = Q_NEXT_TYPE(ehci, here.qh->hw_next);
 				here = here.qh->qh_next;
 				continue;
 			case Q_TYPE_SITD:
 				if (same_tt (dev, here.sitd->urb->dev)) {
 					u16		mask;
 
-					mask = le32_to_cpu (here.sitd
+					mask = hc32_to_cpu(ehci, here.sitd
 								->hw_uframe);
 					/* FIXME assumes no gap for IN! */
 					mask |= mask >> 8;
 					if (mask & uf_mask)
 						break;
 				}
-				type = Q_NEXT_TYPE (here.sitd->hw_next);
+				type = Q_NEXT_TYPE(ehci, here.sitd->hw_next);
 				here = here.sitd->sitd_next;
 				continue;
 			// case Q_TYPE_FSTN:
@@ -475,13 +481,6 @@
 /*-------------------------------------------------------------------------*/
 #ifdef CONFIG_CPU_FREQ
 
-/* ignore/inactivate bit in QH hw_info1 */
-#define INACTIVATE_BIT __constant_cpu_to_le32(QH_INACTIVATE)
-
-#define HALT_BIT __constant_cpu_to_le32(QTD_STS_HALT)
-#define ACTIVE_BIT __constant_cpu_to_le32(QTD_STS_ACTIVE)
-#define STATUS_BIT __constant_cpu_to_le32(QTD_STS_STS)
-
 static int safe_to_modify_i (struct ehci_hcd *ehci, struct ehci_qh *qh)
 {
 	int now; /* current (frame * 8) + uframe */
@@ -492,8 +491,8 @@
 
 	now = readl(&ehci->regs->frame_index) % (ehci->periodic_size << 3);
 
-	next_start = ((1024 << 3) + (qh->start << 3) + start_uframe - now) %
-		     (qh->period << 3);
+	next_start = ((1024 << 3) + (qh->start << 3) + start_uframe - now)
+			% (qh->period << 3);
 	prev_start = (qh->period << 3) - next_start;
 
 	/*
@@ -510,7 +509,7 @@
 	 */
 	if ((next_start > ehci->i_thresh) && (prev_start > 1))
 		/* safe to set "i" bit if split isn't in progress */
-		return (qh->hw_token & STATUS_BIT) ? 0 : 1;
+		return (qh->hw_token & STATUS_BIT(ehci)) ? 0 : 1;
 	else
 		return 0;
 }
@@ -520,12 +519,14 @@
 {
 	struct ehci_qh	*qh;
 	int		not_done, safe;
+	u32		inactivate = INACTIVATE_BIT(ehci);
+	u32		active = ACTIVE_BIT(ehci);
 
 	do {
 		not_done = 0;
 		list_for_each_entry(qh, &ehci->split_intr_qhs,
-				     split_intr_qhs) {
-			if (qh->hw_info1 & INACTIVATE_BIT)
+				split_intr_qhs) {
+			if (qh->hw_info1 & inactivate)
 				/* already off */
 				continue;
 			/*
@@ -539,8 +540,8 @@
 			if (safe == 0) {
 				not_done = 1;
 			} else if (safe > 0) {
-				qh->was_active = qh->hw_token & ACTIVE_BIT;
-				qh->hw_info1 |= INACTIVATE_BIT;
+				qh->was_active = qh->hw_token & active;
+				qh->hw_info1 |= inactivate;
 			}
 		}
 	} while (not_done);
@@ -552,11 +553,14 @@
 	struct ehci_qh	*qh;
 	u32		token;
 	int		not_done, safe;
+	u32		inactivate = INACTIVATE_BIT(ehci);
+	u32		active = ACTIVE_BIT(ehci);
+	u32		halt = HALT_BIT(ehci);
 
 	do {
 		not_done = 0;
 		list_for_each_entry(qh, &ehci->split_intr_qhs, split_intr_qhs) {
-			if (!(qh->hw_info1 & INACTIVATE_BIT)) /* already on */
+			if (!(qh->hw_info1 & inactivate)) /* already on */
 				continue;
 			/*
 			 * Don't reactivate if cached, or controller might
@@ -568,11 +572,11 @@
 			} else if (safe > 0) {
 				/* See EHCI 1.0 section 4.15.2.4. */
 				token = qh->hw_token;
-				qh->hw_token = (token | HALT_BIT) & ~ACTIVE_BIT;
+				qh->hw_token = (token | halt) & ~active;
 				wmb();
-				qh->hw_info1 &= ~INACTIVATE_BIT;
+				qh->hw_info1 &= ~inactivate;
 				wmb();
-				qh->hw_token = (token & ~HALT_BIT) | qh->was_active;
+				qh->hw_token = (token & ~halt) | qh->was_active;
 			}
 		}
 	} while (not_done);
@@ -592,7 +596,7 @@
 
 	dev_dbg (&qh->dev->dev,
 		"link qh%d-%04x/%p start %d [%d/%d us]\n",
-		period, le32_to_cpup (&qh->hw_info2) & (QH_CMASK | QH_SMASK),
+		period, hc32_to_cpup(ehci, &qh->hw_info2) & (QH_CMASK | QH_SMASK),
 		qh, qh->start, qh->usecs, qh->c_usecs);
 
 #ifdef CONFIG_CPU_FREQ
@@ -603,7 +607,7 @@
 	 */
 	if (ehci->cpufreq_changing)
 		if (!(qh->hw_info1 & (cpu_to_le32(1 << 13))))
-			qh->hw_info1 |= INACTIVATE_BIT;
+			qh->hw_info1 |= INACTIVATE_BIT(ehci);
 #endif
 
 	/* high bandwidth, or otherwise every microframe */
@@ -611,17 +615,17 @@
 		period = 1;
 
 	for (i = qh->start; i < ehci->periodic_size; i += period) {
-		union ehci_shadow	*prev = &ehci->pshadow [i];
-		__le32			*hw_p = &ehci->periodic [i];
+		union ehci_shadow	*prev = &ehci->pshadow[i];
+		__hc32			*hw_p = &ehci->periodic[i];
 		union ehci_shadow	here = *prev;
-		__le32			type = 0;
+		__hc32			type = 0;
 
 		/* skip the iso nodes at list head */
 		while (here.ptr) {
-			type = Q_NEXT_TYPE (*hw_p);
-			if (type == Q_TYPE_QH)
+			type = Q_NEXT_TYPE(ehci, *hw_p);
+			if (type == cpu_to_hc32(ehci, Q_TYPE_QH))
 				break;
-			prev = periodic_next_shadow (prev, type);
+			prev = periodic_next_shadow(ehci, prev, type);
 			hw_p = &here.qh->hw_next;
 			here = *prev;
 		}
@@ -643,7 +647,7 @@
 				qh->hw_next = *hw_p;
 			wmb ();
 			prev->qh = qh;
-			*hw_p = QH_NEXT (qh->qh_dma);
+			*hw_p = QH_NEXT (ehci, qh->qh_dma);
 		}
 	}
 	qh->qh_state = QH_STATE_LINKED;
@@ -677,7 +681,7 @@
 	//   and this qh is active in the current uframe
 	//   (and overlay token SplitXstate is false?)
 	// THEN
-	//   qh->hw_info1 |= __constant_cpu_to_le32 (1 << 7 /* "ignore" */);
+	//   qh->hw_info1 |= __constant_cpu_to_hc32(1 << 7 /* "ignore" */);
 
 #ifdef CONFIG_CPU_FREQ
 	/* remove qh from list of low/full speed interrupt QHs */
@@ -701,7 +705,7 @@
 	dev_dbg (&qh->dev->dev,
 		"unlink qh%d-%04x/%p start %d [%d/%d us]\n",
 		qh->period,
-		le32_to_cpup (&qh->hw_info2) & (QH_CMASK | QH_SMASK),
+		hc32_to_cpup(ehci, &qh->hw_info2) & (QH_CMASK | QH_SMASK),
 		qh, qh->start, qh->usecs, qh->c_usecs);
 
 	/* qh->qh_next still "live" to HC */
@@ -727,7 +731,7 @@
 	 * active high speed queues may need bigger delays...
 	 */
 	if (list_empty (&qh->qtd_list)
-			|| (__constant_cpu_to_le32 (QH_CMASK)
+			|| (cpu_to_hc32(ehci, QH_CMASK)
 					& qh->hw_info2) != 0)
 		wait = 2;
 	else
@@ -735,7 +739,7 @@
 
 	udelay (wait);
 	qh->qh_state = QH_STATE_IDLE;
-	qh->hw_next = EHCI_LIST_END;
+	qh->hw_next = EHCI_LIST_END(ehci);
 	wmb ();
 }
 
@@ -792,7 +796,7 @@
 	unsigned		frame,
 	unsigned		uframe,
 	const struct ehci_qh	*qh,
-	__le32			*c_maskp
+	__hc32			*c_maskp
 )
 {
 	int		retval = -ENOSPC;
@@ -824,7 +828,7 @@
 
 		retval = 0;
 
-		*c_maskp = cpu_to_le32 (mask << 8);
+		*c_maskp = cpu_to_hc32(ehci, mask << 8);
 	}
 #else
 	/* Make sure this tt's buffer is also available for CSPLITs.
@@ -835,7 +839,7 @@
 	 * one smart pass...
 	 */
 	mask = 0x03 << (uframe + qh->gap_uf);
-	*c_maskp = cpu_to_le32 (mask << 8);
+	*c_maskp = cpu_to_hc32(ehci, mask << 8);
 
 	mask |= 1 << uframe;
 	if (tt_no_collision (ehci, qh->period, qh->dev, frame, mask)) {
@@ -855,20 +859,20 @@
 /* "first fit" scheduling policy used the first time through,
  * or when the previous schedule slot can't be re-used.
  */
-static int qh_schedule (struct ehci_hcd *ehci, struct ehci_qh *qh)
+static int qh_schedule(struct ehci_hcd *ehci, struct ehci_qh *qh)
 {
 	int		status;
 	unsigned	uframe;
-	__le32		c_mask;
+	__hc32		c_mask;
 	unsigned	frame;		/* 0..(qh->period - 1), or NO_FRAME */
 
 	qh_refresh(ehci, qh);
-	qh->hw_next = EHCI_LIST_END;
+	qh->hw_next = EHCI_LIST_END(ehci);
 	frame = qh->start;
 
 	/* reuse the previous schedule slots, if we can */
 	if (frame < qh->period) {
-		uframe = ffs (le32_to_cpup (&qh->hw_info2) & QH_SMASK);
+		uframe = ffs(hc32_to_cpup(ehci, &qh->hw_info2) & QH_SMASK);
 		status = check_intr_schedule (ehci, frame, --uframe,
 				qh, &c_mask);
 	} else {
@@ -904,10 +908,10 @@
 		qh->start = frame;
 
 		/* reset S-frame and (maybe) C-frame masks */
-		qh->hw_info2 &= __constant_cpu_to_le32(~(QH_CMASK | QH_SMASK));
+		qh->hw_info2 &= cpu_to_hc32(ehci, ~(QH_CMASK | QH_SMASK));
 		qh->hw_info2 |= qh->period
-			? cpu_to_le32 (1 << uframe)
-			: __constant_cpu_to_le32 (QH_SMASK);
+			? cpu_to_hc32(ehci, 1 << uframe)
+			: cpu_to_hc32(ehci, QH_SMASK);
 		qh->hw_info2 |= c_mask;
 	} else
 		ehci_dbg (ehci, "reused qh %p schedule\n", qh);
@@ -937,7 +941,7 @@
 	spin_lock_irqsave (&ehci->lock, flags);
 
 	if (unlikely(!test_bit(HCD_FLAG_HW_ACCESSIBLE,
-			       &ehci_to_hcd(ehci)->flags))) {
+			&ehci_to_hcd(ehci)->flags))) {
 		status = -ESHUTDOWN;
 		goto done;
 	}
@@ -1027,9 +1031,9 @@
 		buf1 |= maxp;
 		maxp *= multi;
 
-		stream->buf0 = cpu_to_le32 ((epnum << 8) | dev->devnum);
-		stream->buf1 = cpu_to_le32 (buf1);
-		stream->buf2 = cpu_to_le32 (multi);
+		stream->buf0 = cpu_to_hc32(ehci, (epnum << 8) | dev->devnum);
+		stream->buf1 = cpu_to_hc32(ehci, buf1);
+		stream->buf2 = cpu_to_hc32(ehci, multi);
 
 		/* usbfs wants to report the average usecs per frame tied up
 		 * when transfers on this endpoint are scheduled ...
@@ -1072,7 +1076,7 @@
 		bandwidth /= 1 << (interval + 2);
 
 		/* stream->splits gets created from raw_mask later */
-		stream->address = cpu_to_le32 (addr);
+		stream->address = cpu_to_hc32(ehci, addr);
 	}
 	stream->bandwidth = bandwidth;
 
@@ -1206,7 +1210,8 @@
 }
 
 static inline void
-itd_sched_init (
+itd_sched_init(
+	struct ehci_hcd		*ehci,
 	struct ehci_iso_sched	*iso_sched,
 	struct ehci_iso_stream	*stream,
 	struct urb		*urb
@@ -1236,7 +1241,7 @@
 				&& !(urb->transfer_flags & URB_NO_INTERRUPT))
 			trans |= EHCI_ITD_IOC;
 		trans |= length << 16;
-		uframe->transaction = cpu_to_le32 (trans);
+		uframe->transaction = cpu_to_hc32(ehci, trans);
 
 		/* might need to cross a buffer page within a uframe */
 		uframe->bufp = (buf & ~(u64)0x0fff);
@@ -1278,7 +1283,7 @@
 	if (unlikely (sched == NULL))
 		return -ENOMEM;
 
-	itd_sched_init (sched, stream, urb);
+	itd_sched_init(ehci, sched, stream, urb);
 
 	if (urb->interval < 8)
 		num_itds = 1 + (sched->span + 7) / 8;
@@ -1296,7 +1301,7 @@
 		/* prefer previously-allocated itds */
 		if (likely (!list_empty(&stream->free_list))) {
 			itd = list_entry (stream->free_list.prev,
-					 struct ehci_itd, itd_list);
+					struct ehci_itd, itd_list);
 			list_del (&itd->itd_list);
 			itd_dma = itd->itd_dma;
 		} else
@@ -1423,7 +1428,7 @@
 		uframe += period_uframes;
 	} while (uframe < mod);
 
-	stream->splits = cpu_to_le32(stream->raw_mask << (uframe & 7));
+	stream->splits = cpu_to_hc32(ehci, stream->raw_mask << (uframe & 7));
 	return 1;
 }
 
@@ -1544,12 +1549,13 @@
 /*-------------------------------------------------------------------------*/
 
 static inline void
-itd_init (struct ehci_iso_stream *stream, struct ehci_itd *itd)
+itd_init(struct ehci_hcd *ehci, struct ehci_iso_stream *stream,
+		struct ehci_itd *itd)
 {
 	int i;
 
 	/* it's been recently zeroed */
-	itd->hw_next = EHCI_LIST_END;
+	itd->hw_next = EHCI_LIST_END(ehci);
 	itd->hw_bufp [0] = stream->buf0;
 	itd->hw_bufp [1] = stream->buf1;
 	itd->hw_bufp [2] = stream->buf2;
@@ -1561,7 +1567,8 @@
 }
 
 static inline void
-itd_patch (
+itd_patch(
+	struct ehci_hcd		*ehci,
 	struct ehci_itd		*itd,
 	struct ehci_iso_sched	*iso_sched,
 	unsigned		index,
@@ -1576,17 +1583,18 @@
 	uframe &= 0x07;
 	itd->index [uframe] = index;
 
-	itd->hw_transaction [uframe] = uf->transaction;
-	itd->hw_transaction [uframe] |= cpu_to_le32 (pg << 12);
-	itd->hw_bufp [pg] |= cpu_to_le32 (uf->bufp & ~(u32)0);
-	itd->hw_bufp_hi [pg] |= cpu_to_le32 ((u32)(uf->bufp >> 32));
+	itd->hw_transaction[uframe] = uf->transaction;
+	itd->hw_transaction[uframe] |= cpu_to_hc32(ehci, pg << 12);
+	itd->hw_bufp[pg] |= cpu_to_hc32(ehci, uf->bufp & ~(u32)0);
+	itd->hw_bufp_hi[pg] |= cpu_to_hc32(ehci, (u32)(uf->bufp >> 32));
 
 	/* iso_frame_desc[].offset must be strictly increasing */
 	if (unlikely (uf->cross)) {
 		u64	bufp = uf->bufp + 4096;
+
 		itd->pg = ++pg;
-		itd->hw_bufp [pg] |= cpu_to_le32 (bufp & ~(u32)0);
-		itd->hw_bufp_hi [pg] |= cpu_to_le32 ((u32)(bufp >> 32));
+		itd->hw_bufp[pg] |= cpu_to_hc32(ehci, bufp & ~(u32)0);
+		itd->hw_bufp_hi[pg] |= cpu_to_hc32(ehci, (u32)(bufp >> 32));
 	}
 }
 
@@ -1599,7 +1607,7 @@
 	ehci->pshadow [frame].itd = itd;
 	itd->frame = frame;
 	wmb ();
-	ehci->periodic [frame] = cpu_to_le32 (itd->itd_dma) | Q_TYPE_ITD;
+	ehci->periodic[frame] = cpu_to_hc32(ehci, itd->itd_dma | Q_TYPE_ITD);
 }
 
 /* fit urb's itds into the selected schedule slot; activate as needed */
@@ -1644,14 +1652,14 @@
 			list_move_tail (&itd->itd_list, &stream->td_list);
 			itd->stream = iso_stream_get (stream);
 			itd->urb = usb_get_urb (urb);
-			itd_init (stream, itd);
+			itd_init (ehci, stream, itd);
 		}
 
 		uframe = next_uframe & 0x07;
 		frame = next_uframe >> 3;
 
 		itd->usecs [uframe] = stream->usecs;
-		itd_patch (itd, iso_sched, packet, uframe);
+		itd_patch(ehci, itd, iso_sched, packet, uframe);
 
 		next_uframe += stream->interval;
 		stream->depth += stream->interval;
@@ -1699,7 +1707,7 @@
 		urb_index = itd->index[uframe];
 		desc = &urb->iso_frame_desc [urb_index];
 
-		t = le32_to_cpup (&itd->hw_transaction [uframe]);
+		t = hc32_to_cpup(ehci, &itd->hw_transaction [uframe]);
 		itd->hw_transaction [uframe] = 0;
 		stream->depth -= stream->interval;
 
@@ -1829,7 +1837,8 @@
  */
 
 static inline void
-sitd_sched_init (
+sitd_sched_init(
+	struct ehci_hcd		*ehci,
 	struct ehci_iso_sched	*iso_sched,
 	struct ehci_iso_stream	*stream,
 	struct urb		*urb
@@ -1858,7 +1867,7 @@
 				&& !(urb->transfer_flags & URB_NO_INTERRUPT))
 			trans |= SITD_IOC;
 		trans |= length << 16;
-		packet->transaction = cpu_to_le32 (trans);
+		packet->transaction = cpu_to_hc32(ehci, trans);
 
 		/* might need to cross a buffer page within a td */
 		packet->bufp = buf;
@@ -1894,7 +1903,7 @@
 	if (iso_sched == NULL)
 		return -ENOMEM;
 
-	sitd_sched_init (iso_sched, stream, urb);
+	sitd_sched_init(ehci, iso_sched, stream, urb);
 
 	/* allocate/init sITDs */
 	spin_lock_irqsave (&ehci->lock, flags);
@@ -1946,7 +1955,8 @@
 /*-------------------------------------------------------------------------*/
 
 static inline void
-sitd_patch (
+sitd_patch(
+	struct ehci_hcd		*ehci,
 	struct ehci_iso_stream	*stream,
 	struct ehci_sitd	*sitd,
 	struct ehci_iso_sched	*iso_sched,
@@ -1956,20 +1966,20 @@
 	struct ehci_iso_packet	*uf = &iso_sched->packet [index];
 	u64			bufp = uf->bufp;
 
-	sitd->hw_next = EHCI_LIST_END;
+	sitd->hw_next = EHCI_LIST_END(ehci);
 	sitd->hw_fullspeed_ep = stream->address;
 	sitd->hw_uframe = stream->splits;
 	sitd->hw_results = uf->transaction;
-	sitd->hw_backpointer = EHCI_LIST_END;
+	sitd->hw_backpointer = EHCI_LIST_END(ehci);
 
 	bufp = uf->bufp;
-	sitd->hw_buf [0] = cpu_to_le32 (bufp);
-	sitd->hw_buf_hi [0] = cpu_to_le32 (bufp >> 32);
+	sitd->hw_buf[0] = cpu_to_hc32(ehci, bufp);
+	sitd->hw_buf_hi[0] = cpu_to_hc32(ehci, bufp >> 32);
 
-	sitd->hw_buf [1] = cpu_to_le32 (uf->buf1);
+	sitd->hw_buf[1] = cpu_to_hc32(ehci, uf->buf1);
 	if (uf->cross)
 		bufp += 4096;
-	sitd->hw_buf_hi [1] = cpu_to_le32 (bufp >> 32);
+	sitd->hw_buf_hi[1] = cpu_to_hc32(ehci, bufp >> 32);
 	sitd->index = index;
 }
 
@@ -1982,7 +1992,7 @@
 	ehci->pshadow [frame].sitd = sitd;
 	sitd->frame = frame;
 	wmb ();
-	ehci->periodic [frame] = cpu_to_le32 (sitd->sitd_dma) | Q_TYPE_SITD;
+	ehci->periodic[frame] = cpu_to_hc32(ehci, sitd->sitd_dma | Q_TYPE_SITD);
 }
 
 /* fit urb's sitds into the selected schedule slot; activate as needed */
@@ -2010,7 +2020,7 @@
 			urb->dev->devpath, stream->bEndpointAddress & 0x0f,
 			(stream->bEndpointAddress & USB_DIR_IN) ? "in" : "out",
 			(next_uframe >> 3) % ehci->periodic_size,
-			stream->interval, le32_to_cpu (stream->splits));
+			stream->interval, hc32_to_cpu(ehci, stream->splits));
 		stream->start = jiffies;
 	}
 	ehci_to_hcd(ehci)->self.bandwidth_isoc_reqs++;
@@ -2031,7 +2041,7 @@
 		sitd->stream = iso_stream_get (stream);
 		sitd->urb = usb_get_urb (urb);
 
-		sitd_patch (stream, sitd, sched, packet);
+		sitd_patch(ehci, stream, sitd, sched, packet);
 		sitd_link (ehci, (next_uframe >> 3) % ehci->periodic_size,
 				sitd);
 
@@ -2069,7 +2079,7 @@
 
 	urb_index = sitd->index;
 	desc = &urb->iso_frame_desc [urb_index];
-	t = le32_to_cpup (&sitd->hw_results);
+	t = hc32_to_cpup(ehci, &sitd->hw_results);
 
 	/* report transfer status */
 	if (t & SITD_ERRS) {
@@ -2224,7 +2234,7 @@
 
 	for (;;) {
 		union ehci_shadow	q, *q_p;
-		__le32			type, *hw_p;
+		__hc32			type, *hw_p;
 		unsigned		uframes;
 
 		/* don't scan past the live uframe */
@@ -2242,7 +2252,7 @@
 		q_p = &ehci->pshadow [frame];
 		hw_p = &ehci->periodic [frame];
 		q.ptr = q_p->ptr;
-		type = Q_NEXT_TYPE (*hw_p);
+		type = Q_NEXT_TYPE(ehci, *hw_p);
 		modified = 0;
 
 		while (q.ptr != NULL) {
@@ -2251,11 +2261,11 @@
 			int			live;
 
 			live = HC_IS_RUNNING (ehci_to_hcd(ehci)->state);
-			switch (type) {
+			switch (hc32_to_cpu(ehci, type)) {
 			case Q_TYPE_QH:
 				/* handle any completions */
 				temp.qh = qh_get (q.qh);
-				type = Q_NEXT_TYPE (q.qh->hw_next);
+				type = Q_NEXT_TYPE(ehci, q.qh->hw_next);
 				q = q.qh->qh_next;
 				modified = qh_completions (ehci, temp.qh);
 				if (unlikely (list_empty (&temp.qh->qtd_list)))
@@ -2266,10 +2276,10 @@
 				/* for "save place" FSTNs, look at QH entries
 				 * in the previous frame for completions.
 				 */
-				if (q.fstn->hw_prev != EHCI_LIST_END) {
+				if (q.fstn->hw_prev != EHCI_LIST_END(ehci)) {
 					dbg ("ignoring completions from FSTNs");
 				}
-				type = Q_NEXT_TYPE (q.fstn->hw_next);
+				type = Q_NEXT_TYPE(ehci, q.fstn->hw_next);
 				q = q.fstn->fstn_next;
 				break;
 			case Q_TYPE_ITD:
@@ -2277,11 +2287,12 @@
 				rmb ();
 				for (uf = live ? uframes : 8; uf < 8; uf++) {
 					if (0 == (q.itd->hw_transaction [uf]
-							& ITD_ACTIVE))
+							& ITD_ACTIVE(ehci)))
 						continue;
 					q_p = &q.itd->itd_next;
 					hw_p = &q.itd->hw_next;
-					type = Q_NEXT_TYPE (q.itd->hw_next);
+					type = Q_NEXT_TYPE(ehci,
+							q.itd->hw_next);
 					q = *q_p;
 					break;
 				}
@@ -2293,23 +2304,24 @@
 				 */
 				*q_p = q.itd->itd_next;
 				*hw_p = q.itd->hw_next;
-				type = Q_NEXT_TYPE (q.itd->hw_next);
+				type = Q_NEXT_TYPE(ehci, q.itd->hw_next);
 				wmb();
 				modified = itd_complete (ehci, q.itd);
 				q = *q_p;
 				break;
 			case Q_TYPE_SITD:
-				if ((q.sitd->hw_results & SITD_ACTIVE)
+				if ((q.sitd->hw_results & SITD_ACTIVE(ehci))
 						&& live) {
 					q_p = &q.sitd->sitd_next;
 					hw_p = &q.sitd->hw_next;
-					type = Q_NEXT_TYPE (q.sitd->hw_next);
+					type = Q_NEXT_TYPE(ehci,
+							q.sitd->hw_next);
 					q = *q_p;
 					break;
 				}
 				*q_p = q.sitd->sitd_next;
 				*hw_p = q.sitd->hw_next;
-				type = Q_NEXT_TYPE (q.sitd->hw_next);
+				type = Q_NEXT_TYPE(ehci, q.sitd->hw_next);
 				wmb();
 				modified = sitd_complete (ehci, q.sitd);
 				q = *q_p;