net: pxa168_eth: Use dma_wmb/rmb where appropriate

Update the pxa168_eth driver to use the dma_rmb/wmb calls instead of the
full barriers in order to improve performance: reduced 97ns/39ns on
average in tx/rx path on Marvell BG4CT platform.

Signed-off-by: Jisheng Zhang <jszhang@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c
index ab9d0e8..54d5154 100644
--- a/drivers/net/ethernet/marvell/pxa168_eth.c
+++ b/drivers/net/ethernet/marvell/pxa168_eth.c
@@ -342,9 +342,9 @@
 		pep->rx_skb[used_rx_desc] = skb;
 
 		/* Return the descriptor to DMA ownership */
-		wmb();
+		dma_wmb();
 		p_used_rx_desc->cmd_sts = BUF_OWNED_BY_DMA | RX_EN_INT;
-		wmb();
+		dma_wmb();
 
 		/* Move the used descriptor pointer to the next descriptor */
 		pep->rx_used_desc_q = (used_rx_desc + 1) % pep->rx_ring_size;
@@ -794,7 +794,7 @@
 		rx_used_desc = pep->rx_used_desc_q;
 		rx_desc = &pep->p_rx_desc_area[rx_curr_desc];
 		cmd_sts = rx_desc->cmd_sts;
-		rmb();
+		dma_rmb();
 		if (cmd_sts & (BUF_OWNED_BY_DMA))
 			break;
 		skb = pep->rx_skb[rx_curr_desc];
@@ -1287,7 +1287,7 @@
 
 	skb_tx_timestamp(skb);
 
-	wmb();
+	dma_wmb();
 	desc->cmd_sts = BUF_OWNED_BY_DMA | TX_GEN_CRC | TX_FIRST_DESC |
 			TX_ZERO_PADDING | TX_LAST_DESC | TX_EN_INT;
 	wmb();