arch/um/kernel/irq.c - linux - Git at Google

 // SPDX-License-Identifier: GPL-2.0
 /*
  * Copyright (C) 2017 - Cambridge Greys Ltd
  * Copyright (C) 2011 - 2014 Cisco Systems Inc
  * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
  * Derived (i.e. mostly copied) from arch/i386/kernel/irq.c:
  *	Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
  */

 #include <linux/cpumask.h>
 #include <linux/hardirq.h>
 #include <linux/interrupt.h>
 #include <linux/kernel_stat.h>
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
 #include <as-layout.h>
 #include <kern_util.h>
 #include <os.h>
 #include <irq_user.h>
 #include <irq_kern.h>
 #include <as-layout.h>


 extern void free_irqs(void);

 /* When epoll triggers we do not know why it did so
  * we can also have different IRQs for read and write.
  * This is why we keep a small irq_reg array for each fd -
  * one entry per IRQ type
  */
 struct irq_reg {
 	void *id;
 	int irq;
 	/* it's cheaper to store this than to query it */
 	int events;
 	bool active;
 	bool pending;
 	bool wakeup;
 };

 struct irq_entry {
 	struct list_head list;
 	int fd;
 	struct irq_reg reg[NUM_IRQ_TYPES];
 	bool suspended;
 	bool sigio_workaround;
 };

 static DEFINE_SPINLOCK(irq_lock);
 static LIST_HEAD(active_fds);
 static DECLARE_BITMAP(irqs_allocated, NR_IRQS);

 static void irq_io_loop(struct irq_reg *irq, struct uml_pt_regs *regs)
 {
 /*
  * irq->active guards against reentry
  * irq->pending accumulates pending requests
  * if pending is raised the irq_handler is re-run
  * until pending is cleared
  */
 	if (irq->active) {
 		irq->active = false;

 		do {
 			irq->pending = false;
 			do_IRQ(irq->irq, regs);
 		} while (irq->pending);

 		irq->active = true;
 	} else {
 		irq->pending = true;
 	}
 }

 void sigio_handler_suspend(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
 {
 	/* nothing */
 }

 void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
 {
 	struct irq_entry *irq_entry;
 	int n, i;

 	while (1) {
 		/* This is now lockless - epoll keeps back-referencesto the irqs
 		 * which have trigger it so there is no need to walk the irq
 		 * list and lock it every time. We avoid locking by turning off
 		 * IO for a specific fd by executing os_del_epoll_fd(fd) before
 		 * we do any changes to the actual data structures
 		 */
 		n = os_waiting_for_events_epoll();

 		if (n <= 0) {
 			if (n == -EINTR)
 				continue;
 			else
 				break;
 		}

 		for (i = 0; i < n ; i++) {
 			enum um_irq_type t;

 			irq_entry = os_epoll_get_data_pointer(i);

 			for (t = 0; t < NUM_IRQ_TYPES; t++) {
 				int events = irq_entry->reg[t].events;

 				if (!events)
 					continue;

 				if (os_epoll_triggered(i, events) > 0)
 					irq_io_loop(&irq_entry->reg[t], regs);
 			}
 		}
 	}

 	free_irqs();
 }

 static struct irq_entry *get_irq_entry_by_fd(int fd)
 {
 	struct irq_entry *walk;

 	lockdep_assert_held(&irq_lock);

 	list_for_each_entry(walk, &active_fds, list) {
 		if (walk->fd == fd)
 			return walk;
 	}

 	return NULL;
 }

 static void free_irq_entry(struct irq_entry *to_free, bool remove)
 {
 	if (!to_free)
 		return;

 	if (remove)
 		os_del_epoll_fd(to_free->fd);
 	list_del(&to_free->list);
 	kfree(to_free);
 }

 static bool update_irq_entry(struct irq_entry *entry)
 {
 	enum um_irq_type i;
 	int events = 0;

 	for (i = 0; i < NUM_IRQ_TYPES; i++)
 		events |= entry->reg[i].events;

 	if (events) {
 		/* will modify (instead of add) if needed */
 		os_add_epoll_fd(events, entry->fd, entry);
 		return true;
 	}

 	os_del_epoll_fd(entry->fd);
 	return false;
 }

 static void update_or_free_irq_entry(struct irq_entry *entry)
 {
 	if (!update_irq_entry(entry))
 		free_irq_entry(entry, false);
 }

 static int activate_fd(int irq, int fd, enum um_irq_type type, void *dev_id)
 {
 	struct irq_entry *irq_entry;
 	int err, events = os_event_mask(type);
 	unsigned long flags;

 	err = os_set_fd_async(fd);
 	if (err < 0)
 		goto out;

 	spin_lock_irqsave(&irq_lock, flags);
 	irq_entry = get_irq_entry_by_fd(fd);
 	if (irq_entry) {
 		/* cannot register the same FD twice with the same type */
 		if (WARN_ON(irq_entry->reg[type].events)) {
 			err = -EALREADY;
 			goto out_unlock;
 		}

 		/* temporarily disable to avoid IRQ-side locking */
 		os_del_epoll_fd(fd);
 	} else {
 		irq_entry = kzalloc(sizeof(*irq_entry), GFP_ATOMIC);
 		if (!irq_entry) {
 			err = -ENOMEM;
 			goto out_unlock;
 		}
 		irq_entry->fd = fd;
 		list_add_tail(&irq_entry->list, &active_fds);
 		maybe_sigio_broken(fd);
 	}

 	irq_entry->reg[type].id = dev_id;
 	irq_entry->reg[type].irq = irq;
 	irq_entry->reg[type].active = true;
 	irq_entry->reg[type].events = events;

 	WARN_ON(!update_irq_entry(irq_entry));
 	spin_unlock_irqrestore(&irq_lock, flags);

 	return 0;
 out_unlock:
 	spin_unlock_irqrestore(&irq_lock, flags);
 out:
 	return err;
 }

 /*
  * Remove the entry or entries for a specific FD, if you
  * don't want to remove all the possible entries then use
  * um_free_irq() or deactivate_fd() instead.
  */
 void free_irq_by_fd(int fd)
 {
 	struct irq_entry *to_free;
 	unsigned long flags;

 	spin_lock_irqsave(&irq_lock, flags);
 	to_free = get_irq_entry_by_fd(fd);
 	free_irq_entry(to_free, true);
 	spin_unlock_irqrestore(&irq_lock, flags);
 }
 EXPORT_SYMBOL(free_irq_by_fd);

 static void free_irq_by_irq_and_dev(unsigned int irq, void *dev)
 {
 	struct irq_entry *entry;
 	unsigned long flags;

 	spin_lock_irqsave(&irq_lock, flags);
 	list_for_each_entry(entry, &active_fds, list) {
 		enum um_irq_type i;

 		for (i = 0; i < NUM_IRQ_TYPES; i++) {
 			struct irq_reg *reg = &entry->reg[i];

 			if (!reg->events)
 				continue;
 			if (reg->irq != irq)
 				continue;
 			if (reg->id != dev)
 				continue;

 			os_del_epoll_fd(entry->fd);
 			reg->events = 0;
 			update_or_free_irq_entry(entry);
 			goto out;
 		}
 	}
 out:
 	spin_unlock_irqrestore(&irq_lock, flags);
 }

 void deactivate_fd(int fd, int irqnum)
 {
 	struct irq_entry *entry;
 	unsigned long flags;
 	enum um_irq_type i;

 	os_del_epoll_fd(fd);

 	spin_lock_irqsave(&irq_lock, flags);
 	entry = get_irq_entry_by_fd(fd);
 	if (!entry)
 		goto out;

 	for (i = 0; i < NUM_IRQ_TYPES; i++) {
 		if (!entry->reg[i].events)
 			continue;
 		if (entry->reg[i].irq == irqnum)
 			entry->reg[i].events = 0;
 	}

 	update_or_free_irq_entry(entry);
 out:
 	spin_unlock_irqrestore(&irq_lock, flags);

 	ignore_sigio_fd(fd);
 }
 EXPORT_SYMBOL(deactivate_fd);

 /*
  * Called just before shutdown in order to provide a clean exec
  * environment in case the system is rebooting.  No locking because
  * that would cause a pointless shutdown hang if something hadn't
  * released the lock.
  */
 int deactivate_all_fds(void)
 {
 	struct irq_entry *entry;

 	/* Stop IO. The IRQ loop has no lock so this is our
 	 * only way of making sure we are safe to dispose
 	 * of all IRQ handlers
 	 */
 	os_set_ioignore();

 	/* we can no longer call kfree() here so just deactivate */
 	list_for_each_entry(entry, &active_fds, list)
 		os_del_epoll_fd(entry->fd);
 	os_close_epoll_fd();
 	return 0;
 }

 /*
  * do_IRQ handles all normal device IRQs (the special
  * SMP cross-CPU interrupts have their own specific
  * handlers).
  */
 unsigned int do_IRQ(int irq, struct uml_pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs((struct pt_regs *)regs);
 	irq_enter();
 	generic_handle_irq(irq);
 	irq_exit();
 	set_irq_regs(old_regs);
 	return 1;
 }

 void um_free_irq(int irq, void *dev)
 {
 	if (WARN(irq < 0 || irq > NR_IRQS, "freeing invalid irq %d", irq))
 		return;

 	free_irq_by_irq_and_dev(irq, dev);
 	free_irq(irq, dev);
 	clear_bit(irq, irqs_allocated);
 }
 EXPORT_SYMBOL(um_free_irq);

 int um_request_irq(int irq, int fd, enum um_irq_type type,
 		   irq_handler_t handler, unsigned long irqflags,
 		   const char *devname, void *dev_id)
 {
 	int err;

 	if (irq == UM_IRQ_ALLOC) {
 		int i;

 		for (i = UM_FIRST_DYN_IRQ; i < NR_IRQS; i++) {
 			if (!test_and_set_bit(i, irqs_allocated)) {
 				irq = i;
 				break;
 			}
 		}
 	}

 	if (irq < 0)
 		return -ENOSPC;

 	if (fd != -1) {
 		err = activate_fd(irq, fd, type, dev_id);
 		if (err)
 			goto error;
 	}

 	err = request_irq(irq, handler, irqflags, devname, dev_id);
 	if (err < 0)
 		goto error;

 	return irq;
 error:
 	clear_bit(irq, irqs_allocated);
 	return err;
 }
 EXPORT_SYMBOL(um_request_irq);

 #ifdef CONFIG_PM_SLEEP
 void um_irqs_suspend(void)
 {
 	struct irq_entry *entry;
 	unsigned long flags;

 	sig_info[SIGIO] = sigio_handler_suspend;

 	spin_lock_irqsave(&irq_lock, flags);
 	list_for_each_entry(entry, &active_fds, list) {
 		enum um_irq_type t;
 		bool wake = false;

 		for (t = 0; t < NUM_IRQ_TYPES; t++) {
 			if (!entry->reg[t].events)
 				continue;

 			/*
 			 * For the SIGIO_WRITE_IRQ, which is used to handle the
 			 * SIGIO workaround thread, we need special handling:
 			 * enable wake for it itself, but below we tell it about
 			 * any FDs that should be suspended.
 			 */
 			if (entry->reg[t].wakeup ||
 			    entry->reg[t].irq == SIGIO_WRITE_IRQ) {
 				wake = true;
 				break;
 			}
 		}

 		if (!wake) {
 			entry->suspended = true;
 			os_clear_fd_async(entry->fd);
 			entry->sigio_workaround =
 				!__ignore_sigio_fd(entry->fd);
 		}
 	}
 	spin_unlock_irqrestore(&irq_lock, flags);
 }

 void um_irqs_resume(void)
 {
 	struct irq_entry *entry;
 	unsigned long flags;

 	spin_lock_irqsave(&irq_lock, flags);
 	list_for_each_entry(entry, &active_fds, list) {
 		if (entry->suspended) {
 			int err = os_set_fd_async(entry->fd);

 			WARN(err < 0, "os_set_fd_async returned %d\n", err);
 			entry->suspended = false;

 			if (entry->sigio_workaround) {
 				err = __add_sigio_fd(entry->fd);
 				WARN(err < 0, "add_sigio_returned %d\n", err);
 			}
 		}
 	}
 	spin_unlock_irqrestore(&irq_lock, flags);

 	sig_info[SIGIO] = sigio_handler;
 	send_sigio_to_self();
 }

 static int normal_irq_set_wake(struct irq_data *d, unsigned int on)
 {
 	struct irq_entry *entry;
 	unsigned long flags;

 	spin_lock_irqsave(&irq_lock, flags);
 	list_for_each_entry(entry, &active_fds, list) {
 		enum um_irq_type t;

 		for (t = 0; t < NUM_IRQ_TYPES; t++) {
 			if (!entry->reg[t].events)
 				continue;

 			if (entry->reg[t].irq != d->irq)
 				continue;
 			entry->reg[t].wakeup = on;
 			goto unlock;
 		}
 	}
 unlock:
 	spin_unlock_irqrestore(&irq_lock, flags);
 	return 0;
 }
 #else
 #define normal_irq_set_wake NULL
 #endif

 /*
  * irq_chip must define at least enable/disable and ack when
  * the edge handler is used.
  */
 static void dummy(struct irq_data *d)
 {
 }

 /* This is used for everything other than the timer. */
 static struct irq_chip normal_irq_type = {
 	.name = "SIGIO",
 	.irq_disable = dummy,
 	.irq_enable = dummy,
 	.irq_ack = dummy,
 	.irq_mask = dummy,
 	.irq_unmask = dummy,
 	.irq_set_wake = normal_irq_set_wake,
 };

 static struct irq_chip alarm_irq_type = {
 	.name = "SIGALRM",
 	.irq_disable = dummy,
 	.irq_enable = dummy,
 	.irq_ack = dummy,
 	.irq_mask = dummy,
 	.irq_unmask = dummy,
 };

 void __init init_IRQ(void)
 {
 	int i;

 	irq_set_chip_and_handler(TIMER_IRQ, &alarm_irq_type, handle_edge_irq);

 	for (i = 1; i < NR_IRQS; i++)
 		irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq);
 	/* Initialize EPOLL Loop */
 	os_setup_epoll();
 }

 /*
  * IRQ stack entry and exit:
  *
  * Unlike i386, UML doesn't receive IRQs on the normal kernel stack
  * and switch over to the IRQ stack after some preparation.  We use
  * sigaltstack to receive signals on a separate stack from the start.
  * These two functions make sure the rest of the kernel won't be too
  * upset by being on a different stack.  The IRQ stack has a
  * thread_info structure at the bottom so that current et al continue
  * to work.
  *
  * to_irq_stack copies the current task's thread_info to the IRQ stack
  * thread_info and sets the tasks's stack to point to the IRQ stack.
  *
  * from_irq_stack copies the thread_info struct back (flags may have
  * been modified) and resets the task's stack pointer.
  *
  * Tricky bits -
  *
  * What happens when two signals race each other?  UML doesn't block
  * signals with sigprocmask, SA_DEFER, or sa_mask, so a second signal
  * could arrive while a previous one is still setting up the
  * thread_info.
  *
  * There are three cases -
  *     The first interrupt on the stack - sets up the thread_info and
  * handles the interrupt
  *     A nested interrupt interrupting the copying of the thread_info -
  * can't handle the interrupt, as the stack is in an unknown state
  *     A nested interrupt not interrupting the copying of the
  * thread_info - doesn't do any setup, just handles the interrupt
  *
  * The first job is to figure out whether we interrupted stack setup.
  * This is done by xchging the signal mask with thread_info->pending.
  * If the value that comes back is zero, then there is no setup in
  * progress, and the interrupt can be handled.  If the value is
  * non-zero, then there is stack setup in progress.  In order to have
  * the interrupt handled, we leave our signal in the mask, and it will
  * be handled by the upper handler after it has set up the stack.
  *
  * Next is to figure out whether we are the outer handler or a nested
  * one.  As part of setting up the stack, thread_info->real_thread is
  * set to non-NULL (and is reset to NULL on exit).  This is the
  * nesting indicator.  If it is non-NULL, then the stack is already
  * set up and the handler can run.
  */

 static unsigned long pending_mask;

 unsigned long to_irq_stack(unsigned long *mask_out)
 {
 	struct thread_info *ti;
 	unsigned long mask, old;
 	int nested;

 	mask = xchg(&pending_mask, *mask_out);
 	if (mask != 0) {
 		/*
 		 * If any interrupts come in at this point, we want to
 		 * make sure that their bits aren't lost by our
 		 * putting our bit in.  So, this loop accumulates bits
 		 * until xchg returns the same value that we put in.
 		 * When that happens, there were no new interrupts,
 		 * and pending_mask contains a bit for each interrupt
 		 * that came in.
 		 */
 		old = *mask_out;
 		do {
 			old |= mask;
 			mask = xchg(&pending_mask, old);
 		} while (mask != old);
 		return 1;
 	}

 	ti = current_thread_info();
 	nested = (ti->real_thread != NULL);
 	if (!nested) {
 		struct task_struct *task;
 		struct thread_info *tti;

 		task = cpu_tasks[ti->cpu].task;
 		tti = task_thread_info(task);

 		*ti = *tti;
 		ti->real_thread = tti;
 		task->stack = ti;
 	}

 	mask = xchg(&pending_mask, 0);
 	*mask_out |= mask | nested;
 	return 0;
 }

 unsigned long from_irq_stack(int nested)
 {
 	struct thread_info *ti, *to;
 	unsigned long mask;

 	ti = current_thread_info();

 	pending_mask = 1;

 	to = ti->real_thread;
 	current->stack = to;
 	ti->real_thread = NULL;
 	*to = *ti;

 	mask = xchg(&pending_mask, 0);
 	return mask & ~1;
 }
	// SPDX-License-Identifier: GPL-2.0
	/*
	* Copyright (C) 2017 - Cambridge Greys Ltd
	* Copyright (C) 2011 - 2014 Cisco Systems Inc
	* Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
	* Derived (i.e. mostly copied) from arch/i386/kernel/irq.c:
	* Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
	*/

	#include <linux/cpumask.h>
	#include <linux/hardirq.h>
	#include <linux/interrupt.h>
	#include <linux/kernel_stat.h>
	#include <linux/module.h>
	#include <linux/sched.h>
	#include <linux/seq_file.h>
	#include <linux/slab.h>
	#include <as-layout.h>
	#include <kern_util.h>
	#include <os.h>
	#include <irq_user.h>
	#include <irq_kern.h>
	#include <as-layout.h>


	extern void free_irqs(void);

	/* When epoll triggers we do not know why it did so
	* we can also have different IRQs for read and write.
	* This is why we keep a small irq_reg array for each fd -
	* one entry per IRQ type
	*/
	struct irq_reg {
	void *id;
	int irq;
	/* it's cheaper to store this than to query it */
	int events;
	bool active;
	bool pending;
	bool wakeup;
	};

	struct irq_entry {
	struct list_head list;
	int fd;
	struct irq_reg reg[NUM_IRQ_TYPES];
	bool suspended;
	bool sigio_workaround;
	};

	static DEFINE_SPINLOCK(irq_lock);
	static LIST_HEAD(active_fds);
	static DECLARE_BITMAP(irqs_allocated, NR_IRQS);

	static void irq_io_loop(struct irq_reg irq, struct uml_pt_regs regs)
	{
	/*
	* irq->active guards against reentry
	* irq->pending accumulates pending requests
	* if pending is raised the irq_handler is re-run
	* until pending is cleared
	*/
	if (irq->active) {
	irq->active = false;

	do {
	irq->pending = false;
	do_IRQ(irq->irq, regs);
	} while (irq->pending);

	irq->active = true;
	} else {
	irq->pending = true;
	}
	}

	void sigio_handler_suspend(int sig, struct siginfo unused_si, struct uml_pt_regs regs)
	{
	/* nothing */
	}

	void sigio_handler(int sig, struct siginfo unused_si, struct uml_pt_regs regs)
	{
	struct irq_entry *irq_entry;
	int n, i;

	while (1) {
	/* This is now lockless - epoll keeps back-referencesto the irqs
	* which have trigger it so there is no need to walk the irq
	* list and lock it every time. We avoid locking by turning off
	* IO for a specific fd by executing os_del_epoll_fd(fd) before
	* we do any changes to the actual data structures
	*/
	n = os_waiting_for_events_epoll();

	if (n <= 0) {
	if (n == -EINTR)
	continue;
	else
	break;
	}

	for (i = 0; i < n ; i++) {
	enum um_irq_type t;

	irq_entry = os_epoll_get_data_pointer(i);

	for (t = 0; t < NUM_IRQ_TYPES; t++) {
	int events = irq_entry->reg[t].events;

	if (!events)
	continue;

	if (os_epoll_triggered(i, events) > 0)
	irq_io_loop(&irq_entry->reg[t], regs);
	}
	}
	}

	free_irqs();
	}

	static struct irq_entry *get_irq_entry_by_fd(int fd)
	{
	struct irq_entry *walk;

	lockdep_assert_held(&irq_lock);

	list_for_each_entry(walk, &active_fds, list) {
	if (walk->fd == fd)
	return walk;
	}

	return NULL;
	}

	static void free_irq_entry(struct irq_entry *to_free, bool remove)
	{
	if (!to_free)
	return;

	if (remove)
	os_del_epoll_fd(to_free->fd);
	list_del(&to_free->list);
	kfree(to_free);
	}

	static bool update_irq_entry(struct irq_entry *entry)
	{
	enum um_irq_type i;
	int events = 0;

	for (i = 0; i < NUM_IRQ_TYPES; i++)
	events \|= entry->reg[i].events;

	if (events) {
	/* will modify (instead of add) if needed */
	os_add_epoll_fd(events, entry->fd, entry);
	return true;
	}

	os_del_epoll_fd(entry->fd);
	return false;
	}

	static void update_or_free_irq_entry(struct irq_entry *entry)
	{
	if (!update_irq_entry(entry))
	free_irq_entry(entry, false);
	}

	static int activate_fd(int irq, int fd, enum um_irq_type type, void *dev_id)
	{
	struct irq_entry *irq_entry;
	int err, events = os_event_mask(type);
	unsigned long flags;

	err = os_set_fd_async(fd);
	if (err < 0)
	goto out;

	spin_lock_irqsave(&irq_lock, flags);
	irq_entry = get_irq_entry_by_fd(fd);
	if (irq_entry) {
	/* cannot register the same FD twice with the same type */
	if (WARN_ON(irq_entry->reg[type].events)) {
	err = -EALREADY;
	goto out_unlock;
	}

	/* temporarily disable to avoid IRQ-side locking */
	os_del_epoll_fd(fd);
	} else {
	irq_entry = kzalloc(sizeof(*irq_entry), GFP_ATOMIC);
	if (!irq_entry) {
	err = -ENOMEM;
	goto out_unlock;
	}
	irq_entry->fd = fd;
	list_add_tail(&irq_entry->list, &active_fds);
	maybe_sigio_broken(fd);
	}

	irq_entry->reg[type].id = dev_id;
	irq_entry->reg[type].irq = irq;
	irq_entry->reg[type].active = true;
	irq_entry->reg[type].events = events;

	WARN_ON(!update_irq_entry(irq_entry));
	spin_unlock_irqrestore(&irq_lock, flags);

	return 0;
	out_unlock:
	spin_unlock_irqrestore(&irq_lock, flags);
	out:
	return err;
	}

	/*
	* Remove the entry or entries for a specific FD, if you
	* don't want to remove all the possible entries then use
	* um_free_irq() or deactivate_fd() instead.
	*/
	void free_irq_by_fd(int fd)
	{
	struct irq_entry *to_free;
	unsigned long flags;

	spin_lock_irqsave(&irq_lock, flags);
	to_free = get_irq_entry_by_fd(fd);
	free_irq_entry(to_free, true);
	spin_unlock_irqrestore(&irq_lock, flags);
	}
	EXPORT_SYMBOL(free_irq_by_fd);

	static void free_irq_by_irq_and_dev(unsigned int irq, void *dev)
	{
	struct irq_entry *entry;
	unsigned long flags;

	spin_lock_irqsave(&irq_lock, flags);
	list_for_each_entry(entry, &active_fds, list) {
	enum um_irq_type i;

	for (i = 0; i < NUM_IRQ_TYPES; i++) {
	struct irq_reg *reg = &entry->reg[i];

	if (!reg->events)
	continue;
	if (reg->irq != irq)
	continue;
	if (reg->id != dev)
	continue;

	os_del_epoll_fd(entry->fd);
	reg->events = 0;
	update_or_free_irq_entry(entry);
	goto out;
	}
	}
	out:
	spin_unlock_irqrestore(&irq_lock, flags);
	}

	void deactivate_fd(int fd, int irqnum)
	{
	struct irq_entry *entry;
	unsigned long flags;
	enum um_irq_type i;

	os_del_epoll_fd(fd);

	spin_lock_irqsave(&irq_lock, flags);
	entry = get_irq_entry_by_fd(fd);
	if (!entry)
	goto out;

	for (i = 0; i < NUM_IRQ_TYPES; i++) {
	if (!entry->reg[i].events)
	continue;
	if (entry->reg[i].irq == irqnum)
	entry->reg[i].events = 0;
	}

	update_or_free_irq_entry(entry);
	out:
	spin_unlock_irqrestore(&irq_lock, flags);

	ignore_sigio_fd(fd);
	}
	EXPORT_SYMBOL(deactivate_fd);

	/*
	* Called just before shutdown in order to provide a clean exec
	* environment in case the system is rebooting. No locking because
	* that would cause a pointless shutdown hang if something hadn't
	* released the lock.
	*/
	int deactivate_all_fds(void)
	{
	struct irq_entry *entry;

	/* Stop IO. The IRQ loop has no lock so this is our
	* only way of making sure we are safe to dispose
	* of all IRQ handlers
	*/
	os_set_ioignore();

	/* we can no longer call kfree() here so just deactivate */
	list_for_each_entry(entry, &active_fds, list)
	os_del_epoll_fd(entry->fd);
	os_close_epoll_fd();
	return 0;
	}

	/*
	* do_IRQ handles all normal device IRQs (the special
	* SMP cross-CPU interrupts have their own specific
	* handlers).
	*/
	unsigned int do_IRQ(int irq, struct uml_pt_regs *regs)
	{
	struct pt_regs old_regs = set_irq_regs((struct pt_regs )regs);
	irq_enter();
	generic_handle_irq(irq);
	irq_exit();
	set_irq_regs(old_regs);
	return 1;
	}

	void um_free_irq(int irq, void *dev)
	{
	if (WARN(irq < 0 \|\| irq > NR_IRQS, "freeing invalid irq %d", irq))
	return;

	free_irq_by_irq_and_dev(irq, dev);
	free_irq(irq, dev);
	clear_bit(irq, irqs_allocated);
	}
	EXPORT_SYMBOL(um_free_irq);

	int um_request_irq(int irq, int fd, enum um_irq_type type,
	irq_handler_t handler, unsigned long irqflags,
	const char devname, void dev_id)
	{
	int err;

	if (irq == UM_IRQ_ALLOC) {
	int i;

	for (i = UM_FIRST_DYN_IRQ; i < NR_IRQS; i++) {
	if (!test_and_set_bit(i, irqs_allocated)) {
	irq = i;
	break;
	}
	}
	}

	if (irq < 0)
	return -ENOSPC;

	if (fd != -1) {
	err = activate_fd(irq, fd, type, dev_id);
	if (err)
	goto error;
	}

	err = request_irq(irq, handler, irqflags, devname, dev_id);
	if (err < 0)
	goto error;

	return irq;
	error:
	clear_bit(irq, irqs_allocated);
	return err;
	}
	EXPORT_SYMBOL(um_request_irq);

	#ifdef CONFIG_PM_SLEEP
	void um_irqs_suspend(void)
	{
	struct irq_entry *entry;
	unsigned long flags;

	sig_info[SIGIO] = sigio_handler_suspend;

	spin_lock_irqsave(&irq_lock, flags);
	list_for_each_entry(entry, &active_fds, list) {
	enum um_irq_type t;
	bool wake = false;

	for (t = 0; t < NUM_IRQ_TYPES; t++) {
	if (!entry->reg[t].events)
	continue;

	/*
	* For the SIGIO_WRITE_IRQ, which is used to handle the
	* SIGIO workaround thread, we need special handling:
	* enable wake for it itself, but below we tell it about
	* any FDs that should be suspended.
	*/
	if (entry->reg[t].wakeup \|\|
	entry->reg[t].irq == SIGIO_WRITE_IRQ) {
	wake = true;
	break;
	}
	}

	if (!wake) {
	entry->suspended = true;
	os_clear_fd_async(entry->fd);
	entry->sigio_workaround =
	!__ignore_sigio_fd(entry->fd);
	}
	}
	spin_unlock_irqrestore(&irq_lock, flags);
	}

	void um_irqs_resume(void)
	{
	struct irq_entry *entry;
	unsigned long flags;

	spin_lock_irqsave(&irq_lock, flags);
	list_for_each_entry(entry, &active_fds, list) {
	if (entry->suspended) {
	int err = os_set_fd_async(entry->fd);

	WARN(err < 0, "os_set_fd_async returned %d\n", err);
	entry->suspended = false;

	if (entry->sigio_workaround) {
	err = __add_sigio_fd(entry->fd);
	WARN(err < 0, "add_sigio_returned %d\n", err);
	}
	}
	}
	spin_unlock_irqrestore(&irq_lock, flags);

	sig_info[SIGIO] = sigio_handler;
	send_sigio_to_self();
	}

	static int normal_irq_set_wake(struct irq_data *d, unsigned int on)
	{
	struct irq_entry *entry;
	unsigned long flags;

	spin_lock_irqsave(&irq_lock, flags);
	list_for_each_entry(entry, &active_fds, list) {
	enum um_irq_type t;

	for (t = 0; t < NUM_IRQ_TYPES; t++) {
	if (!entry->reg[t].events)
	continue;

	if (entry->reg[t].irq != d->irq)
	continue;
	entry->reg[t].wakeup = on;
	goto unlock;
	}
	}
	unlock:
	spin_unlock_irqrestore(&irq_lock, flags);
	return 0;
	}
	#else
	#define normal_irq_set_wake NULL
	#endif

	/*
	* irq_chip must define at least enable/disable and ack when
	* the edge handler is used.
	*/
	static void dummy(struct irq_data *d)
	{
	}

	/* This is used for everything other than the timer. */
	static struct irq_chip normal_irq_type = {
	.name = "SIGIO",
	.irq_disable = dummy,
	.irq_enable = dummy,
	.irq_ack = dummy,
	.irq_mask = dummy,
	.irq_unmask = dummy,
	.irq_set_wake = normal_irq_set_wake,
	};

	static struct irq_chip alarm_irq_type = {
	.name = "SIGALRM",
	.irq_disable = dummy,
	.irq_enable = dummy,
	.irq_ack = dummy,
	.irq_mask = dummy,
	.irq_unmask = dummy,
	};

	void __init init_IRQ(void)
	{
	int i;

	irq_set_chip_and_handler(TIMER_IRQ, &alarm_irq_type, handle_edge_irq);

	for (i = 1; i < NR_IRQS; i++)
	irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq);
	/* Initialize EPOLL Loop */
	os_setup_epoll();
	}

	/*
	* IRQ stack entry and exit:
	*
	* Unlike i386, UML doesn't receive IRQs on the normal kernel stack
	* and switch over to the IRQ stack after some preparation. We use
	* sigaltstack to receive signals on a separate stack from the start.
	* These two functions make sure the rest of the kernel won't be too
	* upset by being on a different stack. The IRQ stack has a
	* thread_info structure at the bottom so that current et al continue
	* to work.
	*
	* to_irq_stack copies the current task's thread_info to the IRQ stack
	* thread_info and sets the tasks's stack to point to the IRQ stack.
	*
	* from_irq_stack copies the thread_info struct back (flags may have
	* been modified) and resets the task's stack pointer.
	*
	* Tricky bits -
	*
	* What happens when two signals race each other? UML doesn't block
	* signals with sigprocmask, SA_DEFER, or sa_mask, so a second signal
	* could arrive while a previous one is still setting up the
	* thread_info.
	*
	* There are three cases -
	* The first interrupt on the stack - sets up the thread_info and
	* handles the interrupt
	* A nested interrupt interrupting the copying of the thread_info -
	* can't handle the interrupt, as the stack is in an unknown state
	* A nested interrupt not interrupting the copying of the
	* thread_info - doesn't do any setup, just handles the interrupt
	*
	* The first job is to figure out whether we interrupted stack setup.
	* This is done by xchging the signal mask with thread_info->pending.
	* If the value that comes back is zero, then there is no setup in
	* progress, and the interrupt can be handled. If the value is
	* non-zero, then there is stack setup in progress. In order to have
	* the interrupt handled, we leave our signal in the mask, and it will
	* be handled by the upper handler after it has set up the stack.
	*
	* Next is to figure out whether we are the outer handler or a nested
	* one. As part of setting up the stack, thread_info->real_thread is
	* set to non-NULL (and is reset to NULL on exit). This is the
	* nesting indicator. If it is non-NULL, then the stack is already
	* set up and the handler can run.
	*/

	static unsigned long pending_mask;

	unsigned long to_irq_stack(unsigned long *mask_out)
	{
	struct thread_info *ti;
	unsigned long mask, old;
	int nested;

	mask = xchg(&pending_mask, *mask_out);
	if (mask != 0) {
	/*
	* If any interrupts come in at this point, we want to
	* make sure that their bits aren't lost by our
	* putting our bit in. So, this loop accumulates bits
	* until xchg returns the same value that we put in.
	* When that happens, there were no new interrupts,
	* and pending_mask contains a bit for each interrupt
	* that came in.
	*/
	old = *mask_out;
	do {
	old \|= mask;
	mask = xchg(&pending_mask, old);
	} while (mask != old);
	return 1;
	}

	ti = current_thread_info();
	nested = (ti->real_thread != NULL);
	if (!nested) {
	struct task_struct *task;
	struct thread_info *tti;

	task = cpu_tasks[ti->cpu].task;
	tti = task_thread_info(task);

	ti = tti;
	ti->real_thread = tti;
	task->stack = ti;
	}

	mask = xchg(&pending_mask, 0);
	*mask_out \|= mask \| nested;
	return 0;
	}

	unsigned long from_irq_stack(int nested)
	{
	struct thread_info ti, to;
	unsigned long mask;

	ti = current_thread_info();

	pending_mask = 1;

	to = ti->real_thread;
	current->stack = to;
	ti->real_thread = NULL;
	to = ti;

	mask = xchg(&pending_mask, 0);
	return mask & ~1;
	}