arch/metag/kernel/smp.c - linux - Git at Google

 /*
  *  Copyright (C) 2009,2010,2011 Imagination Technologies Ltd.
  *
  *  Copyright (C) 2002 ARM Limited, All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
 #include <linux/atomic.h>
 #include <linux/completion.h>
 #include <linux/delay.h>
 #include <linux/init.h>
 #include <linux/spinlock.h>
 #include <linux/sched.h>
 #include <linux/interrupt.h>
 #include <linux/cache.h>
 #include <linux/profile.h>
 #include <linux/errno.h>
 #include <linux/mm.h>
 #include <linux/err.h>
 #include <linux/cpu.h>
 #include <linux/smp.h>
 #include <linux/seq_file.h>
 #include <linux/irq.h>
 #include <linux/bootmem.h>

 #include <asm/cacheflush.h>
 #include <asm/cachepart.h>
 #include <asm/core_reg.h>
 #include <asm/cpu.h>
 #include <asm/global_lock.h>
 #include <asm/metag_mem.h>
 #include <asm/mmu_context.h>
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
 #include <asm/processor.h>
 #include <asm/setup.h>
 #include <asm/tlbflush.h>
 #include <asm/hwthread.h>
 #include <asm/traps.h>

 #define SYSC_DCPART(n)	(SYSC_DCPART0 + SYSC_xCPARTn_STRIDE * (n))
 #define SYSC_ICPART(n)	(SYSC_ICPART0 + SYSC_xCPARTn_STRIDE * (n))

 DECLARE_PER_CPU(PTBI, pTBI);

 void *secondary_data_stack;

 /*
  * structures for inter-processor calls
  * - A collection of single bit ipi messages.
  */
 struct ipi_data {
 	spinlock_t lock;
 	unsigned long ipi_count;
 	unsigned long bits;
 };

 static DEFINE_PER_CPU(struct ipi_data, ipi_data) = {
 	.lock	= __SPIN_LOCK_UNLOCKED(ipi_data.lock),
 };

 static DEFINE_SPINLOCK(boot_lock);

 static DECLARE_COMPLETION(cpu_running);

 /*
  * "thread" is assumed to be a valid Meta hardware thread ID.
  */
 int __cpuinit boot_secondary(unsigned int thread, struct task_struct *idle)
 {
 	u32 val;

 	/*
 	 * set synchronisation state between this boot processor
 	 * and the secondary one
 	 */
 	spin_lock(&boot_lock);

 	core_reg_write(TXUPC_ID, 0, thread, (unsigned int)secondary_startup);
 	core_reg_write(TXUPC_ID, 1, thread, 0);

 	/*
 	 * Give the thread privilege (PSTAT) and clear potentially problematic
 	 * bits in the process (namely ISTAT, CBMarker, CBMarkerI, LSM_STEP).
 	 */
 	core_reg_write(TXUCT_ID, TXSTATUS_REGNUM, thread, TXSTATUS_PSTAT_BIT);

 	/* Clear the minim enable bit. */
 	val = core_reg_read(TXUCT_ID, TXPRIVEXT_REGNUM, thread);
 	core_reg_write(TXUCT_ID, TXPRIVEXT_REGNUM, thread, val & ~0x80);

 	/*
 	 * set the ThreadEnable bit (0x1) in the TXENABLE register
 	 * for the specified thread - off it goes!
 	 */
 	val = core_reg_read(TXUCT_ID, TXENABLE_REGNUM, thread);
 	core_reg_write(TXUCT_ID, TXENABLE_REGNUM, thread, val | 0x1);

 	/*
 	 * now the secondary core is starting up let it run its
 	 * calibrations, then wait for it to finish
 	 */
 	spin_unlock(&boot_lock);

 	return 0;
 }

 /**
  * describe_cachepart_change: describe a change to cache partitions.
  * @thread:	Hardware thread number.
  * @label:	Label of cache type, e.g. "dcache" or "icache".
  * @sz:		Total size of the cache.
  * @old:	Old cache partition configuration (*CPART* register).
  * @new:	New cache partition configuration (*CPART* register).
  *
  * If the cache partition has changed, prints a message to the log describing
  * those changes.
  */
 static __cpuinit void describe_cachepart_change(unsigned int thread,
 						const char *label,
 						unsigned int sz,
 						unsigned int old,
 						unsigned int new)
 {
 	unsigned int lor1, land1, gor1, gand1;
 	unsigned int lor2, land2, gor2, gand2;
 	unsigned int diff = old ^ new;

 	if (!diff)
 		return;

 	pr_info("Thread %d: %s partition changed:", thread, label);
 	if (diff & (SYSC_xCPARTL_OR_BITS | SYSC_xCPARTL_AND_BITS)) {
 		lor1   = (old & SYSC_xCPARTL_OR_BITS)  >> SYSC_xCPARTL_OR_S;
 		lor2   = (new & SYSC_xCPARTL_OR_BITS)  >> SYSC_xCPARTL_OR_S;
 		land1  = (old & SYSC_xCPARTL_AND_BITS) >> SYSC_xCPARTL_AND_S;
 		land2  = (new & SYSC_xCPARTL_AND_BITS) >> SYSC_xCPARTL_AND_S;
 		pr_cont(" L:%#x+%#x->%#x+%#x",
 			(lor1 * sz) >> 4,
 			((land1 + 1) * sz) >> 4,
 			(lor2 * sz) >> 4,
 			((land2 + 1) * sz) >> 4);
 	}
 	if (diff & (SYSC_xCPARTG_OR_BITS | SYSC_xCPARTG_AND_BITS)) {
 		gor1   = (old & SYSC_xCPARTG_OR_BITS)  >> SYSC_xCPARTG_OR_S;
 		gor2   = (new & SYSC_xCPARTG_OR_BITS)  >> SYSC_xCPARTG_OR_S;
 		gand1  = (old & SYSC_xCPARTG_AND_BITS) >> SYSC_xCPARTG_AND_S;
 		gand2  = (new & SYSC_xCPARTG_AND_BITS) >> SYSC_xCPARTG_AND_S;
 		pr_cont(" G:%#x+%#x->%#x+%#x",
 			(gor1 * sz) >> 4,
 			((gand1 + 1) * sz) >> 4,
 			(gor2 * sz) >> 4,
 			((gand2 + 1) * sz) >> 4);
 	}
 	if (diff & SYSC_CWRMODE_BIT)
 		pr_cont(" %sWR",
 			(new & SYSC_CWRMODE_BIT) ? "+" : "-");
 	if (diff & SYSC_DCPART_GCON_BIT)
 		pr_cont(" %sGCOn",
 			(new & SYSC_DCPART_GCON_BIT) ? "+" : "-");
 	pr_cont("\n");
 }

 /**
  * setup_smp_cache: ensure cache coherency for new SMP thread.
  * @thread:	New hardware thread number.
  *
  * Ensures that coherency is enabled and that the threads share the same cache
  * partitions.
  */
 static __cpuinit void setup_smp_cache(unsigned int thread)
 {
 	unsigned int this_thread, lflags;
 	unsigned int dcsz, dcpart_this, dcpart_old, dcpart_new;
 	unsigned int icsz, icpart_old, icpart_new;

 	/*
 	 * Copy over the current thread's cache partition configuration to the
 	 * new thread so that they share cache partitions.
 	 */
 	__global_lock2(lflags);
 	this_thread = hard_processor_id();
 	/* Share dcache partition */
 	dcpart_this = metag_in32(SYSC_DCPART(this_thread));
 	dcpart_old = metag_in32(SYSC_DCPART(thread));
 	dcpart_new = dcpart_this;
 #if PAGE_OFFSET < LINGLOBAL_BASE
 	/*
 	 * For the local data cache to be coherent the threads must also have
 	 * GCOn enabled.
 	 */
 	dcpart_new |= SYSC_DCPART_GCON_BIT;
 	metag_out32(dcpart_new, SYSC_DCPART(this_thread));
 #endif
 	metag_out32(dcpart_new, SYSC_DCPART(thread));
 	/* Share icache partition too */
 	icpart_new = metag_in32(SYSC_ICPART(this_thread));
 	icpart_old = metag_in32(SYSC_ICPART(thread));
 	metag_out32(icpart_new, SYSC_ICPART(thread));
 	__global_unlock2(lflags);

 	/*
 	 * Log if the cache partitions were altered so the user is aware of any
 	 * potential unintentional cache wastage.
 	 */
 	dcsz = get_dcache_size();
 	icsz = get_dcache_size();
 	describe_cachepart_change(this_thread, "dcache", dcsz,
 				  dcpart_this, dcpart_new);
 	describe_cachepart_change(thread, "dcache", dcsz,
 				  dcpart_old, dcpart_new);
 	describe_cachepart_change(thread, "icache", icsz,
 				  icpart_old, icpart_new);
 }

 int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle)
 {
 	unsigned int thread = cpu_2_hwthread_id[cpu];
 	int ret;

 	load_pgd(swapper_pg_dir, thread);

 	flush_tlb_all();

 	setup_smp_cache(thread);

 	/*
 	 * Tell the secondary CPU where to find its idle thread's stack.
 	 */
 	secondary_data_stack = task_stack_page(idle);

 	wmb();

 	/*
 	 * Now bring the CPU into our world.
 	 */
 	ret = boot_secondary(thread, idle);
 	if (ret == 0) {
 		/*
 		 * CPU was successfully started, wait for it
 		 * to come online or time out.
 		 */
 		wait_for_completion_timeout(&cpu_running,
 					    msecs_to_jiffies(1000));

 		if (!cpu_online(cpu))
 			ret = -EIO;
 	}

 	secondary_data_stack = NULL;

 	if (ret) {
 		pr_crit("CPU%u: processor failed to boot\n", cpu);

 		/*
 		 * FIXME: We need to clean up the new idle thread. --rmk
 		 */
 	}

 	return ret;
 }

 #ifdef CONFIG_HOTPLUG_CPU
 static DECLARE_COMPLETION(cpu_killed);

 /*
  * __cpu_disable runs on the processor to be shutdown.
  */
 int __cpuexit __cpu_disable(void)
 {
 	unsigned int cpu = smp_processor_id();

 	/*
 	 * Take this CPU offline.  Once we clear this, we can't return,
 	 * and we must not schedule until we're ready to give up the cpu.
 	 */
 	set_cpu_online(cpu, false);

 	/*
 	 * OK - migrate IRQs away from this CPU
 	 */
 	migrate_irqs();

 	/*
 	 * Flush user cache and TLB mappings, and then remove this CPU
 	 * from the vm mask set of all processes.
 	 */
 	flush_cache_all();
 	local_flush_tlb_all();

 	clear_tasks_mm_cpumask(cpu);

 	return 0;
 }

 /*
  * called on the thread which is asking for a CPU to be shutdown -
  * waits until shutdown has completed, or it is timed out.
  */
 void __cpuexit __cpu_die(unsigned int cpu)
 {
 	if (!wait_for_completion_timeout(&cpu_killed, msecs_to_jiffies(1)))
 		pr_err("CPU%u: unable to kill\n", cpu);
 }

 /*
  * Called from the idle thread for the CPU which has been shutdown.
  *
  * Note that we do not return from this function. If this cpu is
  * brought online again it will need to run secondary_startup().
  */
 void __cpuexit cpu_die(void)
 {
 	local_irq_disable();
 	idle_task_exit();

 	complete(&cpu_killed);

 	asm ("XOR	TXENABLE, D0Re0,D0Re0\n");
 }
 #endif /* CONFIG_HOTPLUG_CPU */

 /*
  * Called by both boot and secondaries to move global data into
  * per-processor storage.
  */
 void __cpuinit smp_store_cpu_info(unsigned int cpuid)
 {
 	struct cpuinfo_metag *cpu_info = &per_cpu(cpu_data, cpuid);

 	cpu_info->loops_per_jiffy = loops_per_jiffy;
 }

 /*
  * This is the secondary CPU boot entry.  We're using this CPUs
  * idle thread stack and the global page tables.
  */
 asmlinkage void secondary_start_kernel(void)
 {
 	struct mm_struct *mm = &init_mm;
 	unsigned int cpu = smp_processor_id();

 	/*
 	 * All kernel threads share the same mm context; grab a
 	 * reference and switch to it.
 	 */
 	atomic_inc(&mm->mm_users);
 	atomic_inc(&mm->mm_count);
 	current->active_mm = mm;
 	cpumask_set_cpu(cpu, mm_cpumask(mm));
 	enter_lazy_tlb(mm, current);
 	local_flush_tlb_all();

 	/*
 	 * TODO: Some day it might be useful for each Linux CPU to
 	 * have its own TBI structure. That would allow each Linux CPU
 	 * to run different interrupt handlers for the same IRQ
 	 * number.
 	 *
 	 * For now, simply copying the pointer to the boot CPU's TBI
 	 * structure is sufficient because we always want to run the
 	 * same interrupt handler whatever CPU takes the interrupt.
 	 */
 	per_cpu(pTBI, cpu) = __TBI(TBID_ISTAT_BIT);

 	if (!per_cpu(pTBI, cpu))
 		panic("No TBI found!");

 	per_cpu_trap_init(cpu);

 	preempt_disable();

 	setup_priv();

 	notify_cpu_starting(cpu);

 	pr_info("CPU%u (thread %u): Booted secondary processor\n",
 		cpu, cpu_2_hwthread_id[cpu]);

 	calibrate_delay();
 	smp_store_cpu_info(cpu);

 	/*
 	 * OK, now it's safe to let the boot CPU continue
 	 */
 	set_cpu_online(cpu, true);
 	complete(&cpu_running);

 	/*
 	 * Enable local interrupts.
 	 */
 	tbi_startup_interrupt(TBID_SIGNUM_TRT);
 	local_irq_enable();

 	/*
 	 * OK, it's off to the idle thread for us
 	 */
 	cpu_startup_entry(CPUHP_ONLINE);
 }

 void __init smp_cpus_done(unsigned int max_cpus)
 {
 	int cpu;
 	unsigned long bogosum = 0;

 	for_each_online_cpu(cpu)
 		bogosum += per_cpu(cpu_data, cpu).loops_per_jiffy;

 	pr_info("SMP: Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
 		num_online_cpus(),
 		bogosum / (500000/HZ),
 		(bogosum / (5000/HZ)) % 100);
 }

 void __init smp_prepare_cpus(unsigned int max_cpus)
 {
 	unsigned int cpu = smp_processor_id();

 	init_new_context(current, &init_mm);
 	current_thread_info()->cpu = cpu;

 	smp_store_cpu_info(cpu);
 	init_cpu_present(cpu_possible_mask);
 }

 void __init smp_prepare_boot_cpu(void)
 {
 	unsigned int cpu = smp_processor_id();

 	per_cpu(pTBI, cpu) = __TBI(TBID_ISTAT_BIT);

 	if (!per_cpu(pTBI, cpu))
 		panic("No TBI found!");
 }

 static void smp_cross_call(cpumask_t callmap, enum ipi_msg_type msg);

 static void send_ipi_message(const struct cpumask *mask, enum ipi_msg_type msg)
 {
 	unsigned long flags;
 	unsigned int cpu;
 	cpumask_t map;

 	cpumask_clear(&map);
 	local_irq_save(flags);

 	for_each_cpu(cpu, mask) {
 		struct ipi_data *ipi = &per_cpu(ipi_data, cpu);

 		spin_lock(&ipi->lock);

 		/*
 		 * KICK interrupts are queued in hardware so we'll get
 		 * multiple interrupts if we call smp_cross_call()
 		 * multiple times for one msg. The problem is that we
 		 * only have one bit for each message - we can't queue
 		 * them in software.
 		 *
 		 * The first time through ipi_handler() we'll clear
 		 * the msg bit, having done all the work. But when we
 		 * return we'll get _another_ interrupt (and another,
 		 * and another until we've handled all the queued
 		 * KICKs). Running ipi_handler() when there's no work
 		 * to do is bad because that's how kick handler
 		 * chaining detects who the KICK was intended for.
 		 * See arch/metag/kernel/kick.c for more details.
 		 *
 		 * So only add 'cpu' to 'map' if we haven't already
 		 * queued a KICK interrupt for 'msg'.
 		 */
 		if (!(ipi->bits & (1 << msg))) {
 			ipi->bits |= 1 << msg;
 			cpumask_set_cpu(cpu, &map);
 		}

 		spin_unlock(&ipi->lock);
 	}

 	/*
 	 * Call the platform specific cross-CPU call function.
 	 */
 	smp_cross_call(map, msg);

 	local_irq_restore(flags);
 }

 void arch_send_call_function_ipi_mask(const struct cpumask *mask)
 {
 	send_ipi_message(mask, IPI_CALL_FUNC);
 }

 void arch_send_call_function_single_ipi(int cpu)
 {
 	send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE);
 }

 void show_ipi_list(struct seq_file *p)
 {
 	unsigned int cpu;

 	seq_puts(p, "IPI:");

 	for_each_present_cpu(cpu)
 		seq_printf(p, " %10lu", per_cpu(ipi_data, cpu).ipi_count);

 	seq_putc(p, '\n');
 }

 static DEFINE_SPINLOCK(stop_lock);

 /*
  * Main handler for inter-processor interrupts
  *
  * For Meta, the ipimask now only identifies a single
  * category of IPI (Bit 1 IPIs have been replaced by a
  * different mechanism):
  *
  *  Bit 0 - Inter-processor function call
  */
 static int do_IPI(struct pt_regs *regs)
 {
 	unsigned int cpu = smp_processor_id();
 	struct ipi_data *ipi = &per_cpu(ipi_data, cpu);
 	struct pt_regs *old_regs = set_irq_regs(regs);
 	unsigned long msgs, nextmsg;
 	int handled = 0;

 	ipi->ipi_count++;

 	spin_lock(&ipi->lock);
 	msgs = ipi->bits;
 	nextmsg = msgs & -msgs;
 	ipi->bits &= ~nextmsg;
 	spin_unlock(&ipi->lock);

 	if (nextmsg) {
 		handled = 1;

 		nextmsg = ffz(~nextmsg);
 		switch (nextmsg) {
 		case IPI_RESCHEDULE:
 			scheduler_ipi();
 			break;

 		case IPI_CALL_FUNC:
 			generic_smp_call_function_interrupt();
 			break;

 		case IPI_CALL_FUNC_SINGLE:
 			generic_smp_call_function_single_interrupt();
 			break;

 		default:
 			pr_crit("CPU%u: Unknown IPI message 0x%lx\n",
 				cpu, nextmsg);
 			break;
 		}
 	}

 	set_irq_regs(old_regs);

 	return handled;
 }

 void smp_send_reschedule(int cpu)
 {
 	send_ipi_message(cpumask_of(cpu), IPI_RESCHEDULE);
 }

 static void stop_this_cpu(void *data)
 {
 	unsigned int cpu = smp_processor_id();

 	if (system_state == SYSTEM_BOOTING ||
 	    system_state == SYSTEM_RUNNING) {
 		spin_lock(&stop_lock);
 		pr_crit("CPU%u: stopping\n", cpu);
 		dump_stack();
 		spin_unlock(&stop_lock);
 	}

 	set_cpu_online(cpu, false);

 	local_irq_disable();

 	hard_processor_halt(HALT_OK);
 }

 void smp_send_stop(void)
 {
 	smp_call_function(stop_this_cpu, NULL, 0);
 }

 /*
  * not supported here
  */
 int setup_profiling_timer(unsigned int multiplier)
 {
 	return -EINVAL;
 }

 /*
  * We use KICKs for inter-processor interrupts.
  *
  * For every CPU in "callmap" the IPI data must already have been
  * stored in that CPU's "ipi_data" member prior to calling this
  * function.
  */
 static void kick_raise_softirq(cpumask_t callmap, unsigned int irq)
 {
 	int cpu;

 	for_each_cpu(cpu, &callmap) {
 		unsigned int thread;

 		thread = cpu_2_hwthread_id[cpu];

 		BUG_ON(thread == BAD_HWTHREAD_ID);

 		metag_out32(1, T0KICKI + (thread * TnXKICK_STRIDE));
 	}
 }

 static TBIRES ipi_handler(TBIRES State, int SigNum, int Triggers,
 		   int Inst, PTBI pTBI, int *handled)
 {
 	*handled = do_IPI((struct pt_regs *)State.Sig.pCtx);

 	return State;
 }

 static struct kick_irq_handler ipi_irq = {
 	.func = ipi_handler,
 };

 static void smp_cross_call(cpumask_t callmap, enum ipi_msg_type msg)
 {
 	kick_raise_softirq(callmap, 1);
 }

 static inline unsigned int get_core_count(void)
 {
 	int i;
 	unsigned int ret = 0;

 	for (i = 0; i < CONFIG_NR_CPUS; i++) {
 		if (core_reg_read(TXUCT_ID, TXENABLE_REGNUM, i))
 			ret++;
 	}

 	return ret;
 }

 /*
  * Initialise the CPU possible map early - this describes the CPUs
  * which may be present or become present in the system.
  */
 void __init smp_init_cpus(void)
 {
 	unsigned int i, ncores = get_core_count();

 	/* If no hwthread_map early param was set use default mapping */
 	for (i = 0; i < NR_CPUS; i++)
 		if (cpu_2_hwthread_id[i] == BAD_HWTHREAD_ID) {
 			cpu_2_hwthread_id[i] = i;
 			hwthread_id_2_cpu[i] = i;
 		}

 	for (i = 0; i < ncores; i++)
 		set_cpu_possible(i, true);

 	kick_register_func(&ipi_irq);
 }
	/*
	* Copyright (C) 2009,2010,2011 Imagination Technologies Ltd.
	*
	* Copyright (C) 2002 ARM Limited, All Rights Reserved.
	*
	* This program is free software; you can redistribute it and/or modify
	* it under the terms of the GNU General Public License version 2 as
	* published by the Free Software Foundation.
	*/
	#include <linux/atomic.h>
	#include <linux/completion.h>
	#include <linux/delay.h>
	#include <linux/init.h>
	#include <linux/spinlock.h>
	#include <linux/sched.h>
	#include <linux/interrupt.h>
	#include <linux/cache.h>
	#include <linux/profile.h>
	#include <linux/errno.h>
	#include <linux/mm.h>
	#include <linux/err.h>
	#include <linux/cpu.h>
	#include <linux/smp.h>
	#include <linux/seq_file.h>
	#include <linux/irq.h>
	#include <linux/bootmem.h>

	#include <asm/cacheflush.h>
	#include <asm/cachepart.h>
	#include <asm/core_reg.h>
	#include <asm/cpu.h>
	#include <asm/global_lock.h>
	#include <asm/metag_mem.h>
	#include <asm/mmu_context.h>
	#include <asm/pgtable.h>
	#include <asm/pgalloc.h>
	#include <asm/processor.h>
	#include <asm/setup.h>
	#include <asm/tlbflush.h>
	#include <asm/hwthread.h>
	#include <asm/traps.h>

	#define SYSC_DCPART(n) (SYSC_DCPART0 + SYSC_xCPARTn_STRIDE * (n))
	#define SYSC_ICPART(n) (SYSC_ICPART0 + SYSC_xCPARTn_STRIDE * (n))

	DECLARE_PER_CPU(PTBI, pTBI);

	void *secondary_data_stack;

	/*
	* structures for inter-processor calls
	* - A collection of single bit ipi messages.
	*/
	struct ipi_data {
	spinlock_t lock;
	unsigned long ipi_count;
	unsigned long bits;
	};

	static DEFINE_PER_CPU(struct ipi_data, ipi_data) = {
	.lock = __SPIN_LOCK_UNLOCKED(ipi_data.lock),
	};

	static DEFINE_SPINLOCK(boot_lock);

	static DECLARE_COMPLETION(cpu_running);

	/*
	* "thread" is assumed to be a valid Meta hardware thread ID.
	*/
	int __cpuinit boot_secondary(unsigned int thread, struct task_struct *idle)
	{
	u32 val;

	/*
	* set synchronisation state between this boot processor
	* and the secondary one
	*/
	spin_lock(&boot_lock);

	core_reg_write(TXUPC_ID, 0, thread, (unsigned int)secondary_startup);
	core_reg_write(TXUPC_ID, 1, thread, 0);

	/*
	* Give the thread privilege (PSTAT) and clear potentially problematic
	* bits in the process (namely ISTAT, CBMarker, CBMarkerI, LSM_STEP).
	*/
	core_reg_write(TXUCT_ID, TXSTATUS_REGNUM, thread, TXSTATUS_PSTAT_BIT);

	/* Clear the minim enable bit. */
	val = core_reg_read(TXUCT_ID, TXPRIVEXT_REGNUM, thread);
	core_reg_write(TXUCT_ID, TXPRIVEXT_REGNUM, thread, val & ~0x80);

	/*
	* set the ThreadEnable bit (0x1) in the TXENABLE register
	* for the specified thread - off it goes!
	*/
	val = core_reg_read(TXUCT_ID, TXENABLE_REGNUM, thread);
	core_reg_write(TXUCT_ID, TXENABLE_REGNUM, thread, val \| 0x1);

	/*
	* now the secondary core is starting up let it run its
	* calibrations, then wait for it to finish
	*/
	spin_unlock(&boot_lock);

	return 0;
	}

	/**
	* describe_cachepart_change: describe a change to cache partitions.
	* @thread: Hardware thread number.
	* @label: Label of cache type, e.g. "dcache" or "icache".
	* @sz: Total size of the cache.
	* @old: Old cache partition configuration (CPART register).
	* @new: New cache partition configuration (CPART register).
	*
	* If the cache partition has changed, prints a message to the log describing
	* those changes.
	*/
	static __cpuinit void describe_cachepart_change(unsigned int thread,
	const char *label,
	unsigned int sz,
	unsigned int old,
	unsigned int new)
	{
	unsigned int lor1, land1, gor1, gand1;
	unsigned int lor2, land2, gor2, gand2;
	unsigned int diff = old ^ new;

	if (!diff)
	return;

	pr_info("Thread %d: %s partition changed:", thread, label);
	if (diff & (SYSC_xCPARTL_OR_BITS \| SYSC_xCPARTL_AND_BITS)) {
	lor1 = (old & SYSC_xCPARTL_OR_BITS) >> SYSC_xCPARTL_OR_S;
	lor2 = (new & SYSC_xCPARTL_OR_BITS) >> SYSC_xCPARTL_OR_S;
	land1 = (old & SYSC_xCPARTL_AND_BITS) >> SYSC_xCPARTL_AND_S;
	land2 = (new & SYSC_xCPARTL_AND_BITS) >> SYSC_xCPARTL_AND_S;
	pr_cont(" L:%#x+%#x->%#x+%#x",
	(lor1 * sz) >> 4,
	((land1 + 1) * sz) >> 4,
	(lor2 * sz) >> 4,
	((land2 + 1) * sz) >> 4);
	}
	if (diff & (SYSC_xCPARTG_OR_BITS \| SYSC_xCPARTG_AND_BITS)) {
	gor1 = (old & SYSC_xCPARTG_OR_BITS) >> SYSC_xCPARTG_OR_S;
	gor2 = (new & SYSC_xCPARTG_OR_BITS) >> SYSC_xCPARTG_OR_S;
	gand1 = (old & SYSC_xCPARTG_AND_BITS) >> SYSC_xCPARTG_AND_S;
	gand2 = (new & SYSC_xCPARTG_AND_BITS) >> SYSC_xCPARTG_AND_S;
	pr_cont(" G:%#x+%#x->%#x+%#x",
	(gor1 * sz) >> 4,
	((gand1 + 1) * sz) >> 4,
	(gor2 * sz) >> 4,
	((gand2 + 1) * sz) >> 4);
	}
	if (diff & SYSC_CWRMODE_BIT)
	pr_cont(" %sWR",
	(new & SYSC_CWRMODE_BIT) ? "+" : "-");
	if (diff & SYSC_DCPART_GCON_BIT)
	pr_cont(" %sGCOn",
	(new & SYSC_DCPART_GCON_BIT) ? "+" : "-");
	pr_cont("\n");
	}

	/**
	* setup_smp_cache: ensure cache coherency for new SMP thread.
	* @thread: New hardware thread number.
	*
	* Ensures that coherency is enabled and that the threads share the same cache
	* partitions.
	*/
	static __cpuinit void setup_smp_cache(unsigned int thread)
	{
	unsigned int this_thread, lflags;
	unsigned int dcsz, dcpart_this, dcpart_old, dcpart_new;
	unsigned int icsz, icpart_old, icpart_new;

	/*
	* Copy over the current thread's cache partition configuration to the
	* new thread so that they share cache partitions.
	*/
	__global_lock2(lflags);
	this_thread = hard_processor_id();
	/* Share dcache partition */
	dcpart_this = metag_in32(SYSC_DCPART(this_thread));
	dcpart_old = metag_in32(SYSC_DCPART(thread));
	dcpart_new = dcpart_this;
	#if PAGE_OFFSET < LINGLOBAL_BASE
	/*
	* For the local data cache to be coherent the threads must also have
	* GCOn enabled.
	*/
	dcpart_new \|= SYSC_DCPART_GCON_BIT;
	metag_out32(dcpart_new, SYSC_DCPART(this_thread));
	#endif
	metag_out32(dcpart_new, SYSC_DCPART(thread));
	/* Share icache partition too */
	icpart_new = metag_in32(SYSC_ICPART(this_thread));
	icpart_old = metag_in32(SYSC_ICPART(thread));
	metag_out32(icpart_new, SYSC_ICPART(thread));
	__global_unlock2(lflags);

	/*
	* Log if the cache partitions were altered so the user is aware of any
	* potential unintentional cache wastage.
	*/
	dcsz = get_dcache_size();
	icsz = get_dcache_size();
	describe_cachepart_change(this_thread, "dcache", dcsz,
	dcpart_this, dcpart_new);
	describe_cachepart_change(thread, "dcache", dcsz,
	dcpart_old, dcpart_new);
	describe_cachepart_change(thread, "icache", icsz,
	icpart_old, icpart_new);
	}

	int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle)
	{
	unsigned int thread = cpu_2_hwthread_id[cpu];
	int ret;

	load_pgd(swapper_pg_dir, thread);

	flush_tlb_all();

	setup_smp_cache(thread);

	/*
	* Tell the secondary CPU where to find its idle thread's stack.
	*/
	secondary_data_stack = task_stack_page(idle);

	wmb();

	/*
	* Now bring the CPU into our world.
	*/
	ret = boot_secondary(thread, idle);
	if (ret == 0) {
	/*
	* CPU was successfully started, wait for it
	* to come online or time out.
	*/
	wait_for_completion_timeout(&cpu_running,
	msecs_to_jiffies(1000));

	if (!cpu_online(cpu))
	ret = -EIO;
	}

	secondary_data_stack = NULL;

	if (ret) {
	pr_crit("CPU%u: processor failed to boot\n", cpu);

	/*
	* FIXME: We need to clean up the new idle thread. --rmk
	*/
	}

	return ret;
	}

	#ifdef CONFIG_HOTPLUG_CPU
	static DECLARE_COMPLETION(cpu_killed);

	/*
	* __cpu_disable runs on the processor to be shutdown.
	*/
	int __cpuexit __cpu_disable(void)
	{
	unsigned int cpu = smp_processor_id();

	/*
	* Take this CPU offline. Once we clear this, we can't return,
	* and we must not schedule until we're ready to give up the cpu.
	*/
	set_cpu_online(cpu, false);

	/*
	* OK - migrate IRQs away from this CPU
	*/
	migrate_irqs();

	/*
	* Flush user cache and TLB mappings, and then remove this CPU
	* from the vm mask set of all processes.
	*/
	flush_cache_all();
	local_flush_tlb_all();

	clear_tasks_mm_cpumask(cpu);

	return 0;
	}

	/*
	* called on the thread which is asking for a CPU to be shutdown -
	* waits until shutdown has completed, or it is timed out.
	*/
	void __cpuexit __cpu_die(unsigned int cpu)
	{
	if (!wait_for_completion_timeout(&cpu_killed, msecs_to_jiffies(1)))
	pr_err("CPU%u: unable to kill\n", cpu);
	}

	/*
	* Called from the idle thread for the CPU which has been shutdown.
	*
	* Note that we do not return from this function. If this cpu is
	* brought online again it will need to run secondary_startup().
	*/
	void __cpuexit cpu_die(void)
	{
	local_irq_disable();
	idle_task_exit();

	complete(&cpu_killed);

	asm ("XOR TXENABLE, D0Re0,D0Re0\n");
	}
	#endif /* CONFIG_HOTPLUG_CPU */

	/*
	* Called by both boot and secondaries to move global data into
	* per-processor storage.
	*/
	void __cpuinit smp_store_cpu_info(unsigned int cpuid)
	{
	struct cpuinfo_metag *cpu_info = &per_cpu(cpu_data, cpuid);

	cpu_info->loops_per_jiffy = loops_per_jiffy;
	}

	/*
	* This is the secondary CPU boot entry. We're using this CPUs
	* idle thread stack and the global page tables.
	*/
	asmlinkage void secondary_start_kernel(void)
	{
	struct mm_struct *mm = &init_mm;
	unsigned int cpu = smp_processor_id();

	/*
	* All kernel threads share the same mm context; grab a
	* reference and switch to it.
	*/
	atomic_inc(&mm->mm_users);
	atomic_inc(&mm->mm_count);
	current->active_mm = mm;
	cpumask_set_cpu(cpu, mm_cpumask(mm));
	enter_lazy_tlb(mm, current);
	local_flush_tlb_all();

	/*
	* TODO: Some day it might be useful for each Linux CPU to
	* have its own TBI structure. That would allow each Linux CPU
	* to run different interrupt handlers for the same IRQ
	* number.
	*
	* For now, simply copying the pointer to the boot CPU's TBI
	* structure is sufficient because we always want to run the
	* same interrupt handler whatever CPU takes the interrupt.
	*/
	per_cpu(pTBI, cpu) = __TBI(TBID_ISTAT_BIT);

	if (!per_cpu(pTBI, cpu))
	panic("No TBI found!");

	per_cpu_trap_init(cpu);

	preempt_disable();

	setup_priv();

	notify_cpu_starting(cpu);

	pr_info("CPU%u (thread %u): Booted secondary processor\n",
	cpu, cpu_2_hwthread_id[cpu]);

	calibrate_delay();
	smp_store_cpu_info(cpu);

	/*
	* OK, now it's safe to let the boot CPU continue
	*/
	set_cpu_online(cpu, true);
	complete(&cpu_running);

	/*
	* Enable local interrupts.
	*/
	tbi_startup_interrupt(TBID_SIGNUM_TRT);
	local_irq_enable();

	/*
	* OK, it's off to the idle thread for us
	*/
	cpu_startup_entry(CPUHP_ONLINE);
	}

	void __init smp_cpus_done(unsigned int max_cpus)
	{
	int cpu;
	unsigned long bogosum = 0;

	for_each_online_cpu(cpu)
	bogosum += per_cpu(cpu_data, cpu).loops_per_jiffy;

	pr_info("SMP: Total of %d processors activated (%lu.%02lu BogoMIPS).\n",
	num_online_cpus(),
	bogosum / (500000/HZ),
	(bogosum / (5000/HZ)) % 100);
	}

	void __init smp_prepare_cpus(unsigned int max_cpus)
	{
	unsigned int cpu = smp_processor_id();

	init_new_context(current, &init_mm);
	current_thread_info()->cpu = cpu;

	smp_store_cpu_info(cpu);
	init_cpu_present(cpu_possible_mask);
	}

	void __init smp_prepare_boot_cpu(void)
	{
	unsigned int cpu = smp_processor_id();

	per_cpu(pTBI, cpu) = __TBI(TBID_ISTAT_BIT);

	if (!per_cpu(pTBI, cpu))
	panic("No TBI found!");
	}

	static void smp_cross_call(cpumask_t callmap, enum ipi_msg_type msg);

	static void send_ipi_message(const struct cpumask *mask, enum ipi_msg_type msg)
	{
	unsigned long flags;
	unsigned int cpu;
	cpumask_t map;

	cpumask_clear(&map);
	local_irq_save(flags);

	for_each_cpu(cpu, mask) {
	struct ipi_data *ipi = &per_cpu(ipi_data, cpu);

	spin_lock(&ipi->lock);

	/*
	* KICK interrupts are queued in hardware so we'll get
	* multiple interrupts if we call smp_cross_call()
	* multiple times for one msg. The problem is that we
	* only have one bit for each message - we can't queue
	* them in software.
	*
	* The first time through ipi_handler() we'll clear
	* the msg bit, having done all the work. But when we
	* return we'll get _another_ interrupt (and another,
	* and another until we've handled all the queued
	* KICKs). Running ipi_handler() when there's no work
	* to do is bad because that's how kick handler
	* chaining detects who the KICK was intended for.
	* See arch/metag/kernel/kick.c for more details.
	*
	* So only add 'cpu' to 'map' if we haven't already
	* queued a KICK interrupt for 'msg'.
	*/
	if (!(ipi->bits & (1 << msg))) {
	ipi->bits \|= 1 << msg;
	cpumask_set_cpu(cpu, &map);
	}

	spin_unlock(&ipi->lock);
	}

	/*
	* Call the platform specific cross-CPU call function.
	*/
	smp_cross_call(map, msg);

	local_irq_restore(flags);
	}

	void arch_send_call_function_ipi_mask(const struct cpumask *mask)
	{
	send_ipi_message(mask, IPI_CALL_FUNC);
	}

	void arch_send_call_function_single_ipi(int cpu)
	{
	send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE);
	}

	void show_ipi_list(struct seq_file *p)
	{
	unsigned int cpu;

	seq_puts(p, "IPI:");

	for_each_present_cpu(cpu)
	seq_printf(p, " %10lu", per_cpu(ipi_data, cpu).ipi_count);

	seq_putc(p, '\n');
	}

	static DEFINE_SPINLOCK(stop_lock);

	/*
	* Main handler for inter-processor interrupts
	*
	* For Meta, the ipimask now only identifies a single
	* category of IPI (Bit 1 IPIs have been replaced by a
	* different mechanism):
	*
	* Bit 0 - Inter-processor function call
	*/
	static int do_IPI(struct pt_regs *regs)
	{
	unsigned int cpu = smp_processor_id();
	struct ipi_data *ipi = &per_cpu(ipi_data, cpu);
	struct pt_regs *old_regs = set_irq_regs(regs);
	unsigned long msgs, nextmsg;
	int handled = 0;

	ipi->ipi_count++;

	spin_lock(&ipi->lock);
	msgs = ipi->bits;
	nextmsg = msgs & -msgs;
	ipi->bits &= ~nextmsg;
	spin_unlock(&ipi->lock);

	if (nextmsg) {
	handled = 1;

	nextmsg = ffz(~nextmsg);
	switch (nextmsg) {
	case IPI_RESCHEDULE:
	scheduler_ipi();
	break;

	case IPI_CALL_FUNC:
	generic_smp_call_function_interrupt();
	break;

	case IPI_CALL_FUNC_SINGLE:
	generic_smp_call_function_single_interrupt();
	break;

	default:
	pr_crit("CPU%u: Unknown IPI message 0x%lx\n",
	cpu, nextmsg);
	break;
	}
	}

	set_irq_regs(old_regs);

	return handled;
	}

	void smp_send_reschedule(int cpu)
	{
	send_ipi_message(cpumask_of(cpu), IPI_RESCHEDULE);
	}

	static void stop_this_cpu(void *data)
	{
	unsigned int cpu = smp_processor_id();

	if (system_state == SYSTEM_BOOTING \|\|
	system_state == SYSTEM_RUNNING) {
	spin_lock(&stop_lock);
	pr_crit("CPU%u: stopping\n", cpu);
	dump_stack();
	spin_unlock(&stop_lock);
	}

	set_cpu_online(cpu, false);

	local_irq_disable();

	hard_processor_halt(HALT_OK);
	}

	void smp_send_stop(void)
	{
	smp_call_function(stop_this_cpu, NULL, 0);
	}

	/*
	* not supported here
	*/
	int setup_profiling_timer(unsigned int multiplier)
	{
	return -EINVAL;
	}

	/*
	* We use KICKs for inter-processor interrupts.
	*
	* For every CPU in "callmap" the IPI data must already have been
	* stored in that CPU's "ipi_data" member prior to calling this
	* function.
	*/
	static void kick_raise_softirq(cpumask_t callmap, unsigned int irq)
	{
	int cpu;

	for_each_cpu(cpu, &callmap) {
	unsigned int thread;

	thread = cpu_2_hwthread_id[cpu];

	BUG_ON(thread == BAD_HWTHREAD_ID);

	metag_out32(1, T0KICKI + (thread * TnXKICK_STRIDE));
	}
	}

	static TBIRES ipi_handler(TBIRES State, int SigNum, int Triggers,
	int Inst, PTBI pTBI, int *handled)
	{
	handled = do_IPI((struct pt_regs )State.Sig.pCtx);

	return State;
	}

	static struct kick_irq_handler ipi_irq = {
	.func = ipi_handler,
	};

	static void smp_cross_call(cpumask_t callmap, enum ipi_msg_type msg)
	{
	kick_raise_softirq(callmap, 1);
	}

	static inline unsigned int get_core_count(void)
	{
	int i;
	unsigned int ret = 0;

	for (i = 0; i < CONFIG_NR_CPUS; i++) {
	if (core_reg_read(TXUCT_ID, TXENABLE_REGNUM, i))
	ret++;
	}

	return ret;
	}

	/*
	* Initialise the CPU possible map early - this describes the CPUs
	* which may be present or become present in the system.
	*/
	void __init smp_init_cpus(void)
	{
	unsigned int i, ncores = get_core_count();

	/* If no hwthread_map early param was set use default mapping */
	for (i = 0; i < NR_CPUS; i++)
	if (cpu_2_hwthread_id[i] == BAD_HWTHREAD_ID) {
	cpu_2_hwthread_id[i] = i;
	hwthread_id_2_cpu[i] = i;
	}

	for (i = 0; i < ncores; i++)
	set_cpu_possible(i, true);

	kick_register_func(&ipi_irq);
	}