[PATCH] USB: Converting cdc acm to a ring queue

this patch by David converts the sending queue of the CDC ACM driver
to a queue of URBs. This is needed for quicker devices. Please apply.

Signed-Off-By: Oliver Neukum <oliver@neukum.name>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

 drivers/usb/class/cdc-acm.c |  229 ++++++++++++++++++++++++++++++--------------
 drivers/usb/class/cdc-acm.h |   33 +++++-
 2 files changed, 185 insertions(+), 77 deletions(-)
diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index 1b47514..72936dc 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -6,6 +6,7 @@
  * Copyright (c) 1999 Johannes Erdfelt	<johannes@erdfelt.com>
  * Copyright (c) 2000 Vojtech Pavlik	<vojtech@suse.cz>
  * Copyright (c) 2004 Oliver Neukum	<oliver@neukum.name>
+ * Copyright (c) 2005 David Kubicek	<dave@awk.cz>
  *
  * USB Abstract Control Model driver for USB modems and ISDN adapters
  *
@@ -29,6 +30,7 @@
  *		config we want, sysadmin changes bConfigurationValue in sysfs.
  *	v0.23 - use softirq for rx processing, as needed by tty layer
  *	v0.24 - change probe method to evaluate CDC union descriptor
+ *	v0.25 - downstream tasks paralelized to maximize throughput
  */
 
 /*
@@ -63,14 +65,15 @@
 #include <linux/usb_cdc.h>
 #include <asm/byteorder.h>
 #include <asm/unaligned.h>
+#include <linux/list.h>
 
 #include "cdc-acm.h"
 
 /*
  * Version Information
  */
-#define DRIVER_VERSION "v0.23"
-#define DRIVER_AUTHOR "Armin Fuerst, Pavel Machek, Johannes Erdfelt, Vojtech Pavlik"
+#define DRIVER_VERSION "v0.25"
+#define DRIVER_AUTHOR "Armin Fuerst, Pavel Machek, Johannes Erdfelt, Vojtech Pavlik, David Kubicek"
 #define DRIVER_DESC "USB Abstract Control Model driver for USB modems and ISDN adapters"
 
 static struct usb_driver acm_driver;
@@ -284,7 +287,9 @@
 /* data interface returns incoming bytes, or we got unthrottled */
 static void acm_read_bulk(struct urb *urb, struct pt_regs *regs)
 {
-	struct acm *acm = urb->context;
+	struct acm_rb *buf;
+	struct acm_ru *rcv = urb->context;
+	struct acm *acm = rcv->instance;
 	dbg("Entering acm_read_bulk with status %d\n", urb->status);
 
 	if (!ACM_READY(acm))
@@ -293,49 +298,109 @@
 	if (urb->status)
 		dev_dbg(&acm->data->dev, "bulk rx status %d\n", urb->status);
 
-	/* calling tty_flip_buffer_push() in_irq() isn't allowed */
-	tasklet_schedule(&acm->bh);
+	buf = rcv->buffer;
+	buf->size = urb->actual_length;
+
+	spin_lock(&acm->read_lock);
+	list_add_tail(&rcv->list, &acm->spare_read_urbs);
+	list_add_tail(&buf->list, &acm->filled_read_bufs);
+	spin_unlock(&acm->read_lock);
+
+	tasklet_schedule(&acm->urb_task);
 }
 
 static void acm_rx_tasklet(unsigned long _acm)
 {
 	struct acm *acm = (void *)_acm;
-	struct urb *urb = acm->readurb;
+	struct acm_rb *buf;
 	struct tty_struct *tty = acm->tty;
-	unsigned char *data = urb->transfer_buffer;
+	struct acm_ru *rcv;
+	//unsigned long flags;
 	int i = 0;
 	dbg("Entering acm_rx_tasklet");
 
-	if (urb->actual_length > 0 && !acm->throttle)  {
-		for (i = 0; i < urb->actual_length && !acm->throttle; i++) {
-			/* if we insert more than TTY_FLIPBUF_SIZE characters,
-			 * we drop them. */
-			if (tty->flip.count >= TTY_FLIPBUF_SIZE) {
-				tty_flip_buffer_push(tty);
-			}
-			tty_insert_flip_char(tty, data[i], 0);
-		}
-		dbg("Handed %d bytes to tty layer", i+1);
-		tty_flip_buffer_push(tty);
+	if (!ACM_READY(acm) || acm->throttle)
+		return;
+
+next_buffer:
+	spin_lock(&acm->read_lock);
+	if (list_empty(&acm->filled_read_bufs)) {
+		spin_unlock(&acm->read_lock);
+		goto urbs;
 	}
+	buf = list_entry(acm->filled_read_bufs.next,
+			 struct acm_rb, list);
+	list_del(&buf->list);
+	spin_unlock(&acm->read_lock);
+
+	dbg("acm_rx_tasklet: procesing buf 0x%p, size = %d\n", buf, buf->size);
+
+	for (i = 0; i < buf->size && !acm->throttle; i++) {
+		/* if we insert more than TTY_FLIPBUF_SIZE characters,
+		   we drop them. */
+		if (tty->flip.count >= TTY_FLIPBUF_SIZE) {
+			tty_flip_buffer_push(tty);
+ 		}
+		tty_insert_flip_char(tty, buf->base[i], 0);
+ 	}
+	tty_flip_buffer_push(tty);
 
 	spin_lock(&acm->throttle_lock);
 	if (acm->throttle) {
 		dbg("Throtteling noticed");
-		memmove(data, data + i, urb->actual_length - i);
-		urb->actual_length -= i;
-		acm->resubmit_to_unthrottle = 1;
+		memmove(buf->base, buf->base + i, buf->size - i);
+		buf->size -= i;
 		spin_unlock(&acm->throttle_lock);
+		spin_lock(&acm->read_lock);
+		list_add(&buf->list, &acm->filled_read_bufs);
+		spin_unlock(&acm->read_lock);
 		return;
 	}
 	spin_unlock(&acm->throttle_lock);
 
-	urb->actual_length = 0;
-	urb->dev = acm->dev;
+	spin_lock(&acm->read_lock);
+	list_add(&buf->list, &acm->spare_read_bufs);
+	spin_unlock(&acm->read_lock);
+	goto next_buffer;
 
-	i = usb_submit_urb(urb, GFP_ATOMIC);
-	if (i)
-		dev_dbg(&acm->data->dev, "bulk rx resubmit %d\n", i);
+urbs:
+	while (!list_empty(&acm->spare_read_bufs)) {
+		spin_lock(&acm->read_lock);
+		if (list_empty(&acm->spare_read_urbs)) {
+			spin_unlock(&acm->read_lock);
+			return;
+		}
+		rcv = list_entry(acm->spare_read_urbs.next,
+				 struct acm_ru, list);
+		list_del(&rcv->list);
+		spin_unlock(&acm->read_lock);
+
+		buf = list_entry(acm->spare_read_bufs.next,
+				 struct acm_rb, list);
+		list_del(&buf->list);
+
+		rcv->buffer = buf;
+
+		usb_fill_bulk_urb(rcv->urb, acm->dev,
+				  acm->rx_endpoint,
+				  buf->base,
+				  acm->readsize,
+				  acm_read_bulk, rcv);
+		rcv->urb->transfer_dma = buf->dma;
+		rcv->urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+
+		dbg("acm_rx_tasklet: sending urb 0x%p, rcv 0x%p, buf 0x%p\n", rcv->urb, rcv, buf);
+
+		/* This shouldn't kill the driver as unsuccessful URBs are returned to the
+		   free-urbs-pool and resubmited ASAP */
+		if (usb_submit_urb(rcv->urb, GFP_ATOMIC) < 0) {
+			list_add(&buf->list, &acm->spare_read_bufs);
+			spin_lock(&acm->read_lock);
+			list_add(&rcv->list, &acm->spare_read_urbs);
+			spin_unlock(&acm->read_lock);
+			return;
+		}
+	}
 }
 
 /* data interface wrote those outgoing bytes */
@@ -369,6 +434,7 @@
 {
 	struct acm *acm;
 	int rv = -EINVAL;
+	int i;
 	dbg("Entering acm_tty_open.\n");
 	
 	down(&open_sem);
@@ -382,7 +448,9 @@
 	tty->driver_data = acm;
 	acm->tty = tty;
 
-
+	/* force low_latency on so that our tty_push actually forces the data through,
+	   otherwise it is scheduled, and with high data rates data can get lost. */
+	tty->low_latency = 1;
 
 	if (acm->used++) {
 		goto done;
@@ -394,18 +462,20 @@
 		goto bail_out;
 	}
 
-	acm->readurb->dev = acm->dev;
-	if (usb_submit_urb(acm->readurb, GFP_KERNEL)) {
-		dbg("usb_submit_urb(read bulk) failed");
-		goto bail_out_and_unlink;
-	}
-
 	if (0 > acm_set_control(acm, acm->ctrlout = ACM_CTRL_DTR | ACM_CTRL_RTS))
 		goto full_bailout;
 
-	/* force low_latency on so that our tty_push actually forces the data through, 
-	   otherwise it is scheduled, and with high data rates data can get lost. */
-	tty->low_latency = 1;
+	INIT_LIST_HEAD(&acm->spare_read_urbs);
+	INIT_LIST_HEAD(&acm->spare_read_bufs);
+	INIT_LIST_HEAD(&acm->filled_read_bufs);
+	for (i = 0; i < ACM_NRU; i++) {
+		list_add(&(acm->ru[i].list), &acm->spare_read_urbs);
+	}
+	for (i = 0; i < ACM_NRB; i++) {
+		list_add(&(acm->rb[i].list), &acm->spare_read_bufs);
+	}
+
+	tasklet_schedule(&acm->urb_task);
 
 done:
 err_out:
@@ -413,8 +483,6 @@
 	return rv;
 
 full_bailout:
-	usb_kill_urb(acm->readurb);
-bail_out_and_unlink:
 	usb_kill_urb(acm->ctrlurb);
 bail_out:
 	acm->used--;
@@ -424,18 +492,22 @@
 
 static void acm_tty_unregister(struct acm *acm)
 {
+	int i;
+
 	tty_unregister_device(acm_tty_driver, acm->minor);
 	usb_put_intf(acm->control);
 	acm_table[acm->minor] = NULL;
 	usb_free_urb(acm->ctrlurb);
-	usb_free_urb(acm->readurb);
 	usb_free_urb(acm->writeurb);
+	for (i = 0; i < ACM_NRU; i++)
+		usb_free_urb(acm->ru[i].urb);
 	kfree(acm);
 }
 
 static void acm_tty_close(struct tty_struct *tty, struct file *filp)
 {
 	struct acm *acm = tty->driver_data;
+	int i;
 
 	if (!acm || !acm->used)
 		return;
@@ -446,7 +518,8 @@
 			acm_set_control(acm, acm->ctrlout = 0);
 			usb_kill_urb(acm->ctrlurb);
 			usb_kill_urb(acm->writeurb);
-			usb_kill_urb(acm->readurb);
+			for (i = 0; i < ACM_NRU; i++)
+				usb_kill_urb(acm->ru[i].urb);
 		} else
 			acm_tty_unregister(acm);
 	}
@@ -528,10 +601,7 @@
 	spin_lock_bh(&acm->throttle_lock);
 	acm->throttle = 0;
 	spin_unlock_bh(&acm->throttle_lock);
-	if (acm->resubmit_to_unthrottle) {
-		acm->resubmit_to_unthrottle = 0;
-		acm_read_bulk(acm->readurb, NULL);
-	}
+	tasklet_schedule(&acm->urb_task);
 }
 
 static void acm_tty_break_ctl(struct tty_struct *tty, int state)
@@ -694,6 +764,7 @@
 	int call_interface_num = -1;
 	int data_interface_num;
 	unsigned long quirks;
+	int i;
 
 	/* handle quirks deadly to normal probing*/
 	quirks = (unsigned long)id->driver_info;
@@ -833,7 +904,7 @@
 	}
 
 	ctrlsize = le16_to_cpu(epctrl->wMaxPacketSize);
-	readsize = le16_to_cpu(epread->wMaxPacketSize);
+	readsize = le16_to_cpu(epread->wMaxPacketSize)*2;
 	acm->writesize = le16_to_cpu(epwrite->wMaxPacketSize);
 	acm->control = control_interface;
 	acm->data = data_interface;
@@ -842,12 +913,14 @@
 	acm->ctrl_caps = ac_management_function;
 	acm->ctrlsize = ctrlsize;
 	acm->readsize = readsize;
-	acm->bh.func = acm_rx_tasklet;
-	acm->bh.data = (unsigned long) acm;
+	acm->urb_task.func = acm_rx_tasklet;
+	acm->urb_task.data = (unsigned long) acm;
 	INIT_WORK(&acm->work, acm_softint, acm);
 	spin_lock_init(&acm->throttle_lock);
 	spin_lock_init(&acm->write_lock);
+	spin_lock_init(&acm->read_lock);
 	acm->write_ready = 1;
+	acm->rx_endpoint = usb_rcvbulkpipe(usb_dev, epread->bEndpointAddress);
 
 	buf = usb_buffer_alloc(usb_dev, ctrlsize, GFP_KERNEL, &acm->ctrl_dma);
 	if (!buf) {
@@ -856,13 +929,6 @@
 	}
 	acm->ctrl_buffer = buf;
 
-	buf = usb_buffer_alloc(usb_dev, readsize, GFP_KERNEL, &acm->read_dma);
-	if (!buf) {
-		dev_dbg(&intf->dev, "out of memory (read buffer alloc)\n");
-		goto alloc_fail3;
-	}
-	acm->read_buffer = buf;
-
 	if (acm_write_buffers_alloc(acm) < 0) {
 		dev_dbg(&intf->dev, "out of memory (write buffer alloc)\n");
 		goto alloc_fail4;
@@ -873,10 +939,25 @@
 		dev_dbg(&intf->dev, "out of memory (ctrlurb kmalloc)\n");
 		goto alloc_fail5;
 	}
-	acm->readurb = usb_alloc_urb(0, GFP_KERNEL);
-	if (!acm->readurb) {
-		dev_dbg(&intf->dev, "out of memory (readurb kmalloc)\n");
-		goto alloc_fail6;
+	for (i = 0; i < ACM_NRU; i++) {
+		struct acm_ru *rcv = &(acm->ru[i]);
+
+		if (!(rcv->urb = usb_alloc_urb(0, GFP_KERNEL))) {
+			dev_dbg(&intf->dev, "out of memory (read urbs usb_alloc_urb)\n");
+			goto alloc_fail7;
+		}
+
+		rcv->urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+		rcv->instance = acm;
+	}
+	for (i = 0; i < ACM_NRB; i++) {
+		struct acm_rb *buf = &(acm->rb[i]);
+
+		// Using usb_buffer_alloc instead of kmalloc as Oliver suggested
+		if (!(buf->base = usb_buffer_alloc(acm->dev, readsize, GFP_KERNEL, &buf->dma))) {
+			dev_dbg(&intf->dev, "out of memory (read bufs usb_buffer_alloc)\n");
+			goto alloc_fail7;
+		}
 	}
 	acm->writeurb = usb_alloc_urb(0, GFP_KERNEL);
 	if (!acm->writeurb) {
@@ -889,15 +970,9 @@
 	acm->ctrlurb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
 	acm->ctrlurb->transfer_dma = acm->ctrl_dma;
 
-	usb_fill_bulk_urb(acm->readurb, usb_dev, usb_rcvbulkpipe(usb_dev, epread->bEndpointAddress),
-			  acm->read_buffer, readsize, acm_read_bulk, acm);
-	acm->readurb->transfer_flags |= URB_NO_FSBR | URB_NO_TRANSFER_DMA_MAP;
-	acm->readurb->transfer_dma = acm->read_dma;
-
 	usb_fill_bulk_urb(acm->writeurb, usb_dev, usb_sndbulkpipe(usb_dev, epwrite->bEndpointAddress),
 			  NULL, acm->writesize, acm_write_bulk, acm);
 	acm->writeurb->transfer_flags |= URB_NO_FSBR | URB_NO_TRANSFER_DMA_MAP;
-	/* acm->writeurb->transfer_dma = 0; */
 
 	dev_info(&intf->dev, "ttyACM%d: USB ACM device\n", minor);
 
@@ -917,14 +992,14 @@
 	return 0;
 
 alloc_fail7:
-	usb_free_urb(acm->readurb);
-alloc_fail6:
+	for (i = 0; i < ACM_NRB; i++)
+		usb_buffer_free(usb_dev, acm->readsize, acm->rb[i].base, acm->rb[i].dma);
+	for (i = 0; i < ACM_NRU; i++)
+		usb_free_urb(acm->ru[i].urb);
 	usb_free_urb(acm->ctrlurb);
 alloc_fail5:
 	acm_write_buffers_free(acm);
 alloc_fail4:
-	usb_buffer_free(usb_dev, readsize, acm->read_buffer, acm->read_dma);
-alloc_fail3:
 	usb_buffer_free(usb_dev, ctrlsize, acm->ctrl_buffer, acm->ctrl_dma);
 alloc_fail2:
 	kfree(acm);
@@ -936,6 +1011,7 @@
 {
 	struct acm *acm = usb_get_intfdata (intf);
 	struct usb_device *usb_dev = interface_to_usbdev(intf);
+	int i;
 
 	if (!acm || !acm->dev) {
 		dbg("disconnect on nonexisting interface");
@@ -946,15 +1022,24 @@
 	acm->dev = NULL;
 	usb_set_intfdata (intf, NULL);
 
+	tasklet_disable(&acm->urb_task);
+
 	usb_kill_urb(acm->ctrlurb);
-	usb_kill_urb(acm->readurb);
 	usb_kill_urb(acm->writeurb);
+	for (i = 0; i < ACM_NRU; i++)
+		usb_kill_urb(acm->ru[i].urb);
+
+	INIT_LIST_HEAD(&acm->filled_read_bufs);
+	INIT_LIST_HEAD(&acm->spare_read_bufs);
+
+	tasklet_enable(&acm->urb_task);
 
 	flush_scheduled_work(); /* wait for acm_softint */
 
 	acm_write_buffers_free(acm);
-	usb_buffer_free(usb_dev, acm->readsize, acm->read_buffer, acm->read_dma);
 	usb_buffer_free(usb_dev, acm->ctrlsize, acm->ctrl_buffer, acm->ctrl_dma);
+	for (i = 0; i < ACM_NRB; i++)
+		usb_buffer_free(usb_dev, acm->readsize, acm->rb[i].base, acm->rb[i].dma);
 
 	usb_driver_release_interface(&acm_driver, acm->data);
 
diff --git a/drivers/usb/class/cdc-acm.h b/drivers/usb/class/cdc-acm.h
index 963a5df..fd2aaccd 100644
--- a/drivers/usb/class/cdc-acm.h
+++ b/drivers/usb/class/cdc-acm.h
@@ -59,6 +59,9 @@
  * when processing onlcr, so we only need 2 buffers.
  */
 #define ACM_NWB  2
+#define ACM_NRU  16
+#define ACM_NRB  16
+
 struct acm_wb {
 	unsigned char *buf;
 	dma_addr_t dmah;
@@ -66,22 +69,43 @@
 	int use;
 };
 
+struct acm_rb {
+	struct list_head	list;
+	int			size;
+	unsigned char		*base;
+	dma_addr_t		dma;
+};
+
+struct acm_ru {
+	struct list_head	list;
+	struct acm_rb		*buffer;
+	struct urb		*urb;
+	struct acm		*instance;
+};
+
 struct acm {
 	struct usb_device *dev;				/* the corresponding usb device */
 	struct usb_interface *control;			/* control interface */
 	struct usb_interface *data;			/* data interface */
 	struct tty_struct *tty;				/* the corresponding tty */
-	struct urb *ctrlurb, *readurb, *writeurb;	/* urbs */
-	u8 *ctrl_buffer, *read_buffer;			/* buffers of urbs */
-	dma_addr_t ctrl_dma, read_dma;			/* dma handles of buffers */
+	struct urb *ctrlurb, *writeurb;			/* urbs */
+	u8 *ctrl_buffer;				/* buffers of urbs */
+	dma_addr_t ctrl_dma;				/* dma handles of buffers */
 	struct acm_wb wb[ACM_NWB];
+	struct acm_ru ru[ACM_NRU];
+	struct acm_rb rb[ACM_NRB];
+	int rx_endpoint;
+	spinlock_t read_lock;
+	struct list_head spare_read_urbs;
+	struct list_head spare_read_bufs;
+	struct list_head filled_read_bufs;
 	int write_current;				/* current write buffer */
 	int write_used;					/* number of non-empty write buffers */
 	int write_ready;				/* write urb is not running */
 	spinlock_t write_lock;
 	struct usb_cdc_line_coding line;		/* bits, stop, parity */
 	struct work_struct work;			/* work queue entry for line discipline waking up */
-	struct tasklet_struct bh;			/* rx processing */
+	struct tasklet_struct urb_task;                 /* rx processing */
 	spinlock_t throttle_lock;			/* synchronize throtteling and read callback */
 	unsigned int ctrlin;				/* input control lines (DCD, DSR, RI, break, overruns) */
 	unsigned int ctrlout;				/* output control lines (DTR, RTS) */
@@ -91,7 +115,6 @@
 	unsigned int minor;				/* acm minor number */
 	unsigned char throttle;				/* throttled by tty layer */
 	unsigned char clocal;				/* termios CLOCAL */
-	unsigned char resubmit_to_unthrottle;		/* throtteling has disabled the read urb */
 	unsigned int ctrl_caps;				/* control capabilities from the class specific header */
 };