lguest: improve interrupt handling, speed up stream networking

lguest never checked for pending interrupts when enabling interrupts, and
things still worked.  However, it makes a significant difference to TCP
performance, so it's time we fixed it by introducing a pending_irq flag
and checking it on irq_restore and irq_enable.

These two routines are now too big to patch into the 8/10 bytes
patch space, so we drop that code.

Note: The high latency on interrupt delivery had a very curious
effect: once everything else was optimized, networking without GSO was
faster than networking with GSO, since more interrupts were sent and
hence a greater chance of one getting through to the Guest!

Note2: (Almost) Closing the same loophole for iret doesn't have any
measurable effect, so I'm leaving that patch for the moment.

Before:
	1GB tcpblast Guest->Host:		30.7 seconds
	1GB tcpblast Guest->Host (no GSO):	76.0 seconds

After:
	1GB tcpblast Guest->Host:		6.8 seconds
	1GB tcpblast Guest->Host (no GSO):	27.8 seconds

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c
index 8ca1def..03fbc88 100644
--- a/drivers/lguest/core.c
+++ b/drivers/lguest/core.c
@@ -189,6 +189,7 @@
 	/* We stop running once the Guest is dead. */
 	while (!cpu->lg->dead) {
 		unsigned int irq;
+		bool more;
 
 		/* First we run any hypercalls the Guest wants done. */
 		if (cpu->hcall)
@@ -213,9 +214,9 @@
 		/* Check if there are any interrupts which can be delivered now:
 		 * if so, this sets up the hander to be executed when we next
 		 * run the Guest. */
-		irq = interrupt_pending(cpu);
+		irq = interrupt_pending(cpu, &more);
 		if (irq < LGUEST_IRQS)
-			try_deliver_interrupt(cpu, irq);
+			try_deliver_interrupt(cpu, irq, more);
 
 		/* All long-lived kernel loops need to check with this horrible
 		 * thing called the freezer.  If the Host is trying to suspend,
@@ -233,7 +234,7 @@
 			set_current_state(TASK_INTERRUPTIBLE);
 			/* Just before we sleep, make sure nothing snuck in
 			 * which we should be doing. */
-			if (interrupt_pending(cpu) < LGUEST_IRQS
+			if (interrupt_pending(cpu, &more) < LGUEST_IRQS
 			    || cpu->break_out)
 				set_current_state(TASK_RUNNING);
 			else
diff --git a/drivers/lguest/hypercalls.c b/drivers/lguest/hypercalls.c
index 54d66f0..f252b71 100644
--- a/drivers/lguest/hypercalls.c
+++ b/drivers/lguest/hypercalls.c
@@ -37,6 +37,10 @@
 		/* This call does nothing, except by breaking out of the Guest
 		 * it makes us process all the asynchronous hypercalls. */
 		break;
+	case LHCALL_SEND_INTERRUPTS:
+		/* This call does nothing too, but by breaking out of the Guest
+		 * it makes us process any pending interrupts. */
+		break;
 	case LHCALL_LGUEST_INIT:
 		/* You can't get here unless you're already initialized.  Don't
 		 * do that. */
diff --git a/drivers/lguest/interrupts_and_traps.c b/drivers/lguest/interrupts_and_traps.c
index a8c966f..5a10754 100644
--- a/drivers/lguest/interrupts_and_traps.c
+++ b/drivers/lguest/interrupts_and_traps.c
@@ -131,7 +131,7 @@
  * interrupt_pending() returns the first pending interrupt which isn't blocked
  * by the Guest.  It is called before every entry to the Guest, and just before
  * we go to sleep when the Guest has halted itself. */
-unsigned int interrupt_pending(struct lg_cpu *cpu)
+unsigned int interrupt_pending(struct lg_cpu *cpu, bool *more)
 {
 	unsigned int irq;
 	DECLARE_BITMAP(blk, LGUEST_IRQS);
@@ -149,13 +149,14 @@
 
 	/* Find the first interrupt. */
 	irq = find_first_bit(blk, LGUEST_IRQS);
+	*more = find_next_bit(blk, LGUEST_IRQS, irq+1);
 
 	return irq;
 }
 
 /* This actually diverts the Guest to running an interrupt handler, once an
  * interrupt has been identified by interrupt_pending(). */
-void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq)
+void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more)
 {
 	struct desc_struct *idt;
 
@@ -178,8 +179,12 @@
 		u32 irq_enabled;
 		if (get_user(irq_enabled, &cpu->lg->lguest_data->irq_enabled))
 			irq_enabled = 0;
-		if (!irq_enabled)
+		if (!irq_enabled) {
+			/* Make sure they know an IRQ is pending. */
+			put_user(X86_EFLAGS_IF,
+				 &cpu->lg->lguest_data->irq_pending);
 			return;
+		}
 	}
 
 	/* Look at the IDT entry the Guest gave us for this interrupt.  The
@@ -202,6 +207,11 @@
 	 * here is a compromise which means at least it gets updated every
 	 * timer interrupt. */
 	write_timestamp(cpu);
+
+	/* If there are no other interrupts we want to deliver, clear
+	 * the pending flag. */
+	if (!more)
+		put_user(0, &cpu->lg->lguest_data->irq_pending);
 }
 /*:*/
 
diff --git a/drivers/lguest/lg.h b/drivers/lguest/lg.h
index 6743cf14..573896533 100644
--- a/drivers/lguest/lg.h
+++ b/drivers/lguest/lg.h
@@ -139,8 +139,8 @@
 #define pgd_pfn(x)	(pgd_val(x) >> PAGE_SHIFT)
 
 /* interrupts_and_traps.c: */
-unsigned int interrupt_pending(struct lg_cpu *cpu);
-void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq);
+unsigned int interrupt_pending(struct lg_cpu *cpu, bool *more);
+void try_deliver_interrupt(struct lg_cpu *cpu, unsigned int irq, bool more);
 bool deliver_trap(struct lg_cpu *cpu, unsigned int num);
 void load_guest_idt_entry(struct lg_cpu *cpu, unsigned int i,
 			  u32 low, u32 hi);