Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/jk/spufs into merge
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c
index 87eb07f..712001f 100644
--- a/arch/powerpc/platforms/cell/spu_base.c
+++ b/arch/powerpc/platforms/cell/spu_base.c
@@ -81,9 +81,12 @@
 void spu_invalidate_slbs(struct spu *spu)
 {
 	struct spu_priv2 __iomem *priv2 = spu->priv2;
+	unsigned long flags;
 
+	spin_lock_irqsave(&spu->register_lock, flags);
 	if (spu_mfc_sr1_get(spu) & MFC_STATE1_RELOCATE_MASK)
 		out_be64(&priv2->slb_invalidate_all_W, 0UL);
+	spin_unlock_irqrestore(&spu->register_lock, flags);
 }
 EXPORT_SYMBOL_GPL(spu_invalidate_slbs);
 
@@ -148,7 +151,11 @@
 			__func__, slbe, slb->vsid, slb->esid);
 
 	out_be64(&priv2->slb_index_W, slbe);
+	/* set invalid before writing vsid */
+	out_be64(&priv2->slb_esid_RW, 0);
+	/* now it's safe to write the vsid */
 	out_be64(&priv2->slb_vsid_RW, slb->vsid);
+	/* setting the new esid makes the entry valid again */
 	out_be64(&priv2->slb_esid_RW, slb->esid);
 }
 
@@ -290,9 +297,11 @@
 		nr_slbs++;
 	}
 
+	spin_lock_irq(&spu->register_lock);
 	/* Add the set of SLBs */
 	for (i = 0; i < nr_slbs; i++)
 		spu_load_slb(spu, i, &slbs[i]);
+	spin_unlock_irq(&spu->register_lock);
 }
 EXPORT_SYMBOL_GPL(spu_setup_kernel_slbs);
 
@@ -337,13 +346,14 @@
 	if (stat & CLASS1_STORAGE_FAULT_INTR)
 		spu_mfc_dsisr_set(spu, 0ul);
 	spu_int_stat_clear(spu, 1, stat);
-	spin_unlock(&spu->register_lock);
-	pr_debug("%s: %lx %lx %lx %lx\n", __FUNCTION__, mask, stat,
-			dar, dsisr);
 
 	if (stat & CLASS1_SEGMENT_FAULT_INTR)
 		__spu_trap_data_seg(spu, dar);
 
+	spin_unlock(&spu->register_lock);
+	pr_debug("%s: %lx %lx %lx %lx\n", __FUNCTION__, mask, stat,
+			dar, dsisr);
+
 	if (stat & CLASS1_STORAGE_FAULT_INTR)
 		__spu_trap_data_map(spu, dar, dsisr);
 
diff --git a/arch/powerpc/platforms/cell/spufs/context.c b/arch/powerpc/platforms/cell/spufs/context.c
index 133995e..cf6c2c8 100644
--- a/arch/powerpc/platforms/cell/spufs/context.c
+++ b/arch/powerpc/platforms/cell/spufs/context.c
@@ -109,13 +109,12 @@
 
 	/*
 	 * This is basically an open-coded spu_acquire_saved, except that
-	 * we don't acquire the state mutex interruptible.
+	 * we don't acquire the state mutex interruptible, and we don't
+	 * want this context to be rescheduled on release.
 	 */
 	mutex_lock(&ctx->state_mutex);
-	if (ctx->state != SPU_STATE_SAVED) {
-		set_bit(SPU_SCHED_WAS_ACTIVE, &ctx->sched_flags);
+	if (ctx->state != SPU_STATE_SAVED)
 		spu_deactivate(ctx);
-	}
 
 	mm = ctx->owner;
 	ctx->owner = NULL;
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index c66c375..f7a7e86 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -367,6 +367,13 @@
 		return NOPFN_SIGBUS;
 
 	/*
+	 * Because we release the mmap_sem, the context may be destroyed while
+	 * we're in spu_wait. Grab an extra reference so it isn't destroyed
+	 * in the meantime.
+	 */
+	get_spu_context(ctx);
+
+	/*
 	 * We have to wait for context to be loaded before we have
 	 * pages to hand out to the user, but we don't want to wait
 	 * with the mmap_sem held.
@@ -375,7 +382,7 @@
 	 * hanged.
 	 */
 	if (spu_acquire(ctx))
-		return NOPFN_REFAULT;
+		goto refault;
 
 	if (ctx->state == SPU_STATE_SAVED) {
 		up_read(&current->mm->mmap_sem);
@@ -391,6 +398,9 @@
 
 	if (!ret)
 		spu_release(ctx);
+
+refault:
+	put_spu_context(ctx);
 	return NOPFN_REFAULT;
 }
 
diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c
index 3a59721..5d5f680 100644
--- a/arch/powerpc/platforms/cell/spufs/sched.c
+++ b/arch/powerpc/platforms/cell/spufs/sched.c
@@ -246,7 +246,7 @@
 	spu_switch_notify(spu, ctx);
 	ctx->state = SPU_STATE_RUNNABLE;
 
-	spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED);
+	spuctx_switch_state(ctx, SPU_UTIL_USER);
 }
 
 /*
diff --git a/arch/powerpc/platforms/cell/spufs/sputrace.c b/arch/powerpc/platforms/cell/spufs/sputrace.c
index 01974f7..79aa773 100644
--- a/arch/powerpc/platforms/cell/spufs/sputrace.c
+++ b/arch/powerpc/platforms/cell/spufs/sputrace.c
@@ -58,12 +58,12 @@
 		ktime_to_timespec(ktime_sub(t->tstamp, sputrace_start));
 
 	return snprintf(tbuf, n,
-		"[%lu.%09lu] %d: %s (thread = %d, spu = %d)\n",
+		"[%lu.%09lu] %d: %s (ctxthread = %d, spu = %d)\n",
 		(unsigned long) tv.tv_sec,
 		(unsigned long) tv.tv_nsec,
-		t->owner_tid,
-		t->name,
 		t->curr_tid,
+		t->name,
+		t->owner_tid,
 		t->number);
 }
 
@@ -188,6 +188,7 @@
 	{ "spufs_ps_nopfn__insert", "%p %p", spu_context_event },
 	{ "spu_acquire_saved__enter", "%p", spu_context_nospu_event },
 	{ "destroy_spu_context__enter", "%p", spu_context_nospu_event },
+	{ "spufs_stop_callback__enter", "%p %p", spu_context_event },
 };
 
 static int __init sputrace_init(void)
diff --git a/arch/powerpc/platforms/cell/spufs/switch.c b/arch/powerpc/platforms/cell/spufs/switch.c
index 6f5886c..e9dc7a5 100644
--- a/arch/powerpc/platforms/cell/spufs/switch.c
+++ b/arch/powerpc/platforms/cell/spufs/switch.c
@@ -34,6 +34,7 @@
 
 #include <linux/module.h>
 #include <linux/errno.h>
+#include <linux/hardirq.h>
 #include <linux/sched.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
@@ -117,6 +118,8 @@
 	 *     Write INT_MASK_class1 with value of 0.
 	 *     Save INT_Mask_class2 in CSA.
 	 *     Write INT_MASK_class2 with value of 0.
+	 *     Synchronize all three interrupts to be sure
+	 *     we no longer execute a handler on another CPU.
 	 */
 	spin_lock_irq(&spu->register_lock);
 	if (csa) {
@@ -129,6 +132,9 @@
 	spu_int_mask_set(spu, 2, 0ul);
 	eieio();
 	spin_unlock_irq(&spu->register_lock);
+	synchronize_irq(spu->irqs[0]);
+	synchronize_irq(spu->irqs[1]);
+	synchronize_irq(spu->irqs[2]);
 }
 
 static inline void set_watchdog_timer(struct spu_state *csa, struct spu *spu)