Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/geert/linux-m68k

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/geert/linux-m68k:
  m68k: Finally remove leftover markers sections
  m68k/mac: Fix mac_irq_pending() for PSC MACE and SCC
  m68k/mac: Fix compiler warning in via_read_time()
  zorro: Fix four checkpatch warnings
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 7ff4669..c34f96c 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -12,6 +12,7 @@
 #include <asm/pgtable.h>
 
 #include <xen/interface/xen.h>
+#include <xen/grant_table.h>
 #include <xen/features.h>
 
 /* Xen machine address */
@@ -48,14 +49,11 @@
 					     unsigned long pfn_e);
 
 extern int m2p_add_override(unsigned long mfn, struct page *page,
-			    bool clear_pte);
+			    struct gnttab_map_grant_ref *kmap_op);
 extern int m2p_remove_override(struct page *page, bool clear_pte);
 extern struct page *m2p_find_override(unsigned long mfn);
 extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn);
 
-#ifdef CONFIG_XEN_DEBUG_FS
-extern int p2m_dump_show(struct seq_file *m, void *v);
-#endif
 static inline unsigned long pfn_to_mfn(unsigned long pfn)
 {
 	unsigned long mfn;
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index f1a6244d..794bc95 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -75,8 +75,10 @@
 	/*
 	 * Undefined/reserved opcodes, conditional jump, Opcode Extension
 	 * Groups, and some special opcodes can not boost.
+	 * This is non-const to keep gcc from statically optimizing it out, as
+	 * variable_test_bit makes gcc think only *(unsigned long*) is used.
 	 */
-static const u32 twobyte_is_boostable[256 / 32] = {
+static u32 twobyte_is_boostable[256 / 32] = {
 	/*      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f          */
 	/*      ----------------------------------------------          */
 	W(0x00, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0) | /* 00 */
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 1017c7b..492ade8 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -175,8 +175,10 @@
 					       "pcifront-msi-x" :
 					       "pcifront-msi",
 						DOMID_SELF);
-		if (irq < 0)
+		if (irq < 0) {
+			ret = irq;
 			goto free;
+		}
 		i++;
 	}
 	kfree(v);
@@ -221,8 +223,10 @@
 		if (msg.data != XEN_PIRQ_MSI_DATA ||
 		    xen_irq_from_pirq(pirq) < 0) {
 			pirq = xen_allocate_pirq_msi(dev, msidesc);
-			if (pirq < 0)
+			if (pirq < 0) {
+				irq = -ENODEV;
 				goto error;
+			}
 			xen_msi_compose_msg(dev, pirq, &msg);
 			__write_msi_msg(msidesc, &msg);
 			dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq);
@@ -244,10 +248,12 @@
 error:
 	dev_err(&dev->dev,
 		"Xen PCI frontend has not registered MSI/MSI-X support!\n");
-	return -ENODEV;
+	return irq;
 }
 
 #ifdef CONFIG_XEN_DOM0
+static bool __read_mostly pci_seg_supported = true;
+
 static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 {
 	int ret = 0;
@@ -265,10 +271,11 @@
 
 		memset(&map_irq, 0, sizeof(map_irq));
 		map_irq.domid = domid;
-		map_irq.type = MAP_PIRQ_TYPE_MSI;
+		map_irq.type = MAP_PIRQ_TYPE_MSI_SEG;
 		map_irq.index = -1;
 		map_irq.pirq = -1;
-		map_irq.bus = dev->bus->number;
+		map_irq.bus = dev->bus->number |
+			      (pci_domain_nr(dev->bus) << 16);
 		map_irq.devfn = dev->devfn;
 
 		if (type == PCI_CAP_ID_MSIX) {
@@ -285,7 +292,20 @@
 			map_irq.entry_nr = msidesc->msi_attrib.entry_nr;
 		}
 
-		ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq);
+		ret = -EINVAL;
+		if (pci_seg_supported)
+			ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq,
+						    &map_irq);
+		if (ret == -EINVAL && !pci_domain_nr(dev->bus)) {
+			map_irq.type = MAP_PIRQ_TYPE_MSI;
+			map_irq.index = -1;
+			map_irq.pirq = -1;
+			map_irq.bus = dev->bus->number;
+			ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq,
+						    &map_irq);
+			if (ret != -EINVAL)
+				pci_seg_supported = false;
+		}
 		if (ret) {
 			dev_warn(&dev->dev, "xen map irq failed %d for %d domain\n",
 				 ret, domid);
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig
index 5cc821c..26c731a 100644
--- a/arch/x86/xen/Kconfig
+++ b/arch/x86/xen/Kconfig
@@ -25,8 +25,7 @@
 
 config XEN_PVHVM
 	def_bool y
-	depends on XEN
-	depends on X86_LOCAL_APIC
+	depends on XEN && PCI && X86_LOCAL_APIC
 
 config XEN_MAX_DOMAIN_MEMORY
        int
@@ -49,11 +48,3 @@
 	help
 	  Enable statistics output and various tuning options in debugfs.
 	  Enabling this option may incur a significant performance overhead.
-
-config XEN_DEBUG
-	bool "Enable Xen debug checks"
-	depends on XEN
-	default n
-	help
-	  Enable various WARN_ON checks in the Xen MMU code.
-	  Enabling this option WILL incur a significant performance overhead.
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 2d69617..da8afd5 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -251,6 +251,7 @@
 			~((1 << X86_FEATURE_APIC) |  /* disable local APIC */
 			  (1 << X86_FEATURE_ACPI));  /* disable ACPI */
 	ax = 1;
+	cx = 0;
 	xen_cpuid(&ax, &bx, &cx, &dx);
 
 	xsave_mask =
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 3dd53f9..87f6673 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -495,41 +495,6 @@
 }
 PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte);
 
-#ifdef CONFIG_XEN_DEBUG
-pte_t xen_make_pte_debug(pteval_t pte)
-{
-	phys_addr_t addr = (pte & PTE_PFN_MASK);
-	phys_addr_t other_addr;
-	bool io_page = false;
-	pte_t _pte;
-
-	if (pte & _PAGE_IOMAP)
-		io_page = true;
-
-	_pte = xen_make_pte(pte);
-
-	if (!addr)
-		return _pte;
-
-	if (io_page &&
-	    (xen_initial_domain() || addr >= ISA_END_ADDRESS)) {
-		other_addr = pfn_to_mfn(addr >> PAGE_SHIFT) << PAGE_SHIFT;
-		WARN_ONCE(addr != other_addr,
-			"0x%lx is using VM_IO, but it is 0x%lx!\n",
-			(unsigned long)addr, (unsigned long)other_addr);
-	} else {
-		pteval_t iomap_set = (_pte.pte & PTE_FLAGS_MASK) & _PAGE_IOMAP;
-		other_addr = (_pte.pte & PTE_PFN_MASK);
-		WARN_ONCE((addr == other_addr) && (!io_page) && (!iomap_set),
-			"0x%lx is missing VM_IO (and wasn't fixed)!\n",
-			(unsigned long)addr);
-	}
-
-	return _pte;
-}
-PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_debug);
-#endif
-
 static pgd_t xen_make_pgd(pgdval_t pgd)
 {
 	pgd = pte_pfn_to_mfn(pgd);
@@ -1992,9 +1957,6 @@
 
 static void __init xen_post_allocator_init(void)
 {
-#ifdef CONFIG_XEN_DEBUG
-	pv_mmu_ops.make_pte = PV_CALLEE_SAVE(xen_make_pte_debug);
-#endif
 	pv_mmu_ops.set_pte = xen_set_pte;
 	pv_mmu_ops.set_pmd = xen_set_pmd;
 	pv_mmu_ops.set_pud = xen_set_pud;
@@ -2404,17 +2366,3 @@
 	return err;
 }
 EXPORT_SYMBOL_GPL(xen_remap_domain_mfn_range);
-
-#ifdef CONFIG_XEN_DEBUG_FS
-static int p2m_dump_open(struct inode *inode, struct file *filp)
-{
-	return single_open(filp, p2m_dump_show, NULL);
-}
-
-static const struct file_operations p2m_dump_fops = {
-	.open		= p2m_dump_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-#endif /* CONFIG_XEN_DEBUG_FS */
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 58efeb9..1b267e7 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -161,7 +161,9 @@
 #include <asm/xen/page.h>
 #include <asm/xen/hypercall.h>
 #include <asm/xen/hypervisor.h>
+#include <xen/grant_table.h>
 
+#include "multicalls.h"
 #include "xen-ops.h"
 
 static void __init m2p_override_init(void);
@@ -676,7 +678,8 @@
 }
 
 /* Add an MFN override for a particular page */
-int m2p_add_override(unsigned long mfn, struct page *page, bool clear_pte)
+int m2p_add_override(unsigned long mfn, struct page *page,
+		struct gnttab_map_grant_ref *kmap_op)
 {
 	unsigned long flags;
 	unsigned long pfn;
@@ -692,16 +695,28 @@
 					"m2p_add_override: pfn %lx not mapped", pfn))
 			return -EINVAL;
 	}
-
-	page->private = mfn;
+	WARN_ON(PagePrivate(page));
+	SetPagePrivate(page);
+	set_page_private(page, mfn);
 	page->index = pfn_to_mfn(pfn);
 
 	if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn))))
 		return -ENOMEM;
 
-	if (clear_pte && !PageHighMem(page))
-		/* Just zap old mapping for now */
-		pte_clear(&init_mm, address, ptep);
+	if (kmap_op != NULL) {
+		if (!PageHighMem(page)) {
+			struct multicall_space mcs =
+				xen_mc_entry(sizeof(*kmap_op));
+
+			MULTI_grant_table_op(mcs.mc,
+					GNTTABOP_map_grant_ref, kmap_op, 1);
+
+			xen_mc_issue(PARAVIRT_LAZY_MMU);
+		}
+		/* let's use dev_bus_addr to record the old mfn instead */
+		kmap_op->dev_bus_addr = page->index;
+		page->index = (unsigned long) kmap_op;
+	}
 	spin_lock_irqsave(&m2p_override_lock, flags);
 	list_add(&page->lru,  &m2p_overrides[mfn_hash(mfn)]);
 	spin_unlock_irqrestore(&m2p_override_lock, flags);
@@ -735,13 +750,56 @@
 	spin_lock_irqsave(&m2p_override_lock, flags);
 	list_del(&page->lru);
 	spin_unlock_irqrestore(&m2p_override_lock, flags);
-	set_phys_to_machine(pfn, page->index);
+	WARN_ON(!PagePrivate(page));
+	ClearPagePrivate(page);
 
-	if (clear_pte && !PageHighMem(page))
-		set_pte_at(&init_mm, address, ptep,
-				pfn_pte(pfn, PAGE_KERNEL));
-		/* No tlb flush necessary because the caller already
-		 * left the pte unmapped. */
+	if (clear_pte) {
+		struct gnttab_map_grant_ref *map_op =
+			(struct gnttab_map_grant_ref *) page->index;
+		set_phys_to_machine(pfn, map_op->dev_bus_addr);
+		if (!PageHighMem(page)) {
+			struct multicall_space mcs;
+			struct gnttab_unmap_grant_ref *unmap_op;
+
+			/*
+			 * It might be that we queued all the m2p grant table
+			 * hypercalls in a multicall, then m2p_remove_override
+			 * get called before the multicall has actually been
+			 * issued. In this case handle is going to -1 because
+			 * it hasn't been modified yet.
+			 */
+			if (map_op->handle == -1)
+				xen_mc_flush();
+			/*
+			 * Now if map_op->handle is negative it means that the
+			 * hypercall actually returned an error.
+			 */
+			if (map_op->handle == GNTST_general_error) {
+				printk(KERN_WARNING "m2p_remove_override: "
+						"pfn %lx mfn %lx, failed to modify kernel mappings",
+						pfn, mfn);
+				return -1;
+			}
+
+			mcs = xen_mc_entry(
+					sizeof(struct gnttab_unmap_grant_ref));
+			unmap_op = mcs.args;
+			unmap_op->host_addr = map_op->host_addr;
+			unmap_op->handle = map_op->handle;
+			unmap_op->dev_bus_addr = 0;
+
+			MULTI_grant_table_op(mcs.mc,
+					GNTTABOP_unmap_grant_ref, unmap_op, 1);
+
+			xen_mc_issue(PARAVIRT_LAZY_MMU);
+
+			set_pte_at(&init_mm, address, ptep,
+					pfn_pte(pfn, PAGE_KERNEL));
+			__flush_tlb_single(address);
+			map_op->host_addr = 0;
+		}
+	} else
+		set_phys_to_machine(pfn, page->index);
 
 	return 0;
 }
@@ -758,7 +816,7 @@
 	spin_lock_irqsave(&m2p_override_lock, flags);
 
 	list_for_each_entry(p, bucket, lru) {
-		if (p->private == mfn) {
+		if (page_private(p) == mfn) {
 			ret = p;
 			break;
 		}
@@ -782,17 +840,21 @@
 EXPORT_SYMBOL_GPL(m2p_find_override_pfn);
 
 #ifdef CONFIG_XEN_DEBUG_FS
-
-int p2m_dump_show(struct seq_file *m, void *v)
+#include <linux/debugfs.h>
+#include "debugfs.h"
+static int p2m_dump_show(struct seq_file *m, void *v)
 {
 	static const char * const level_name[] = { "top", "middle",
-						"entry", "abnormal" };
-	static const char * const type_name[] = { "identity", "missing",
-						"pfn", "abnormal"};
+						"entry", "abnormal", "error"};
 #define TYPE_IDENTITY 0
 #define TYPE_MISSING 1
 #define TYPE_PFN 2
 #define TYPE_UNKNOWN 3
+	static const char * const type_name[] = {
+				[TYPE_IDENTITY] = "identity",
+				[TYPE_MISSING] = "missing",
+				[TYPE_PFN] = "pfn",
+				[TYPE_UNKNOWN] = "abnormal"};
 	unsigned long pfn, prev_pfn_type = 0, prev_pfn_level = 0;
 	unsigned int uninitialized_var(prev_level);
 	unsigned int uninitialized_var(prev_type);
@@ -856,4 +918,32 @@
 #undef TYPE_PFN
 #undef TYPE_UNKNOWN
 }
-#endif
+
+static int p2m_dump_open(struct inode *inode, struct file *filp)
+{
+	return single_open(filp, p2m_dump_show, NULL);
+}
+
+static const struct file_operations p2m_dump_fops = {
+	.open		= p2m_dump_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static struct dentry *d_mmu_debug;
+
+static int __init xen_p2m_debugfs(void)
+{
+	struct dentry *d_xen = xen_init_debugfs();
+
+	if (d_xen == NULL)
+		return -ENOMEM;
+
+	d_mmu_debug = debugfs_create_dir("mmu", d_xen);
+
+	debugfs_create_file("p2m", 0600, d_mmu_debug, NULL, &p2m_dump_fops);
+	return 0;
+}
+fs_initcall(xen_p2m_debugfs);
+#endif /* CONFIG_XEN_DEBUG_FS */
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 46d6d21..38d0af4 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -37,7 +37,10 @@
 extern void xen_syscall32_target(void);
 
 /* Amount of extra memory space we add to the e820 ranges */
-phys_addr_t xen_extra_mem_start, xen_extra_mem_size;
+struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
+
+/* Number of pages released from the initial allocation. */
+unsigned long xen_released_pages;
 
 /* 
  * The maximum amount of extra memory compared to the base size.  The
@@ -51,48 +54,47 @@
  */
 #define EXTRA_MEM_RATIO		(10)
 
-static void __init xen_add_extra_mem(unsigned long pages)
+static void __init xen_add_extra_mem(u64 start, u64 size)
 {
 	unsigned long pfn;
+	int i;
 
-	u64 size = (u64)pages * PAGE_SIZE;
-	u64 extra_start = xen_extra_mem_start + xen_extra_mem_size;
+	for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++) {
+		/* Add new region. */
+		if (xen_extra_mem[i].size == 0) {
+			xen_extra_mem[i].start = start;
+			xen_extra_mem[i].size  = size;
+			break;
+		}
+		/* Append to existing region. */
+		if (xen_extra_mem[i].start + xen_extra_mem[i].size == start) {
+			xen_extra_mem[i].size += size;
+			break;
+		}
+	}
+	if (i == XEN_EXTRA_MEM_MAX_REGIONS)
+		printk(KERN_WARNING "Warning: not enough extra memory regions\n");
 
-	if (!pages)
-		return;
+	memblock_x86_reserve_range(start, start + size, "XEN EXTRA");
 
-	e820_add_region(extra_start, size, E820_RAM);
-	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
+	xen_max_p2m_pfn = PFN_DOWN(start + size);
 
-	memblock_x86_reserve_range(extra_start, extra_start + size, "XEN EXTRA");
-
-	xen_extra_mem_size += size;
-
-	xen_max_p2m_pfn = PFN_DOWN(extra_start + size);
-
-	for (pfn = PFN_DOWN(extra_start); pfn <= xen_max_p2m_pfn; pfn++)
+	for (pfn = PFN_DOWN(start); pfn <= xen_max_p2m_pfn; pfn++)
 		__set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
 }
 
-static unsigned long __init xen_release_chunk(phys_addr_t start_addr,
-					      phys_addr_t end_addr)
+static unsigned long __init xen_release_chunk(unsigned long start,
+					      unsigned long end)
 {
 	struct xen_memory_reservation reservation = {
 		.address_bits = 0,
 		.extent_order = 0,
 		.domid        = DOMID_SELF
 	};
-	unsigned long start, end;
 	unsigned long len = 0;
 	unsigned long pfn;
 	int ret;
 
-	start = PFN_UP(start_addr);
-	end = PFN_DOWN(end_addr);
-
-	if (end <= start)
-		return 0;
-
 	for(pfn = start; pfn < end; pfn++) {
 		unsigned long mfn = pfn_to_mfn(pfn);
 
@@ -117,72 +119,52 @@
 	return len;
 }
 
-static unsigned long __init xen_return_unused_memory(unsigned long max_pfn,
-						     const struct e820map *e820)
+static unsigned long __init xen_set_identity_and_release(
+	const struct e820entry *list, size_t map_size, unsigned long nr_pages)
 {
-	phys_addr_t max_addr = PFN_PHYS(max_pfn);
-	phys_addr_t last_end = ISA_END_ADDRESS;
+	phys_addr_t start = 0;
 	unsigned long released = 0;
-	int i;
-
-	/* Free any unused memory above the low 1Mbyte. */
-	for (i = 0; i < e820->nr_map && last_end < max_addr; i++) {
-		phys_addr_t end = e820->map[i].addr;
-		end = min(max_addr, end);
-
-		if (last_end < end)
-			released += xen_release_chunk(last_end, end);
-		last_end = max(last_end, e820->map[i].addr + e820->map[i].size);
-	}
-
-	if (last_end < max_addr)
-		released += xen_release_chunk(last_end, max_addr);
-
-	printk(KERN_INFO "released %lu pages of unused memory\n", released);
-	return released;
-}
-
-static unsigned long __init xen_set_identity(const struct e820entry *list,
-					     ssize_t map_size)
-{
-	phys_addr_t last = xen_initial_domain() ? 0 : ISA_END_ADDRESS;
-	phys_addr_t start_pci = last;
-	const struct e820entry *entry;
 	unsigned long identity = 0;
+	const struct e820entry *entry;
 	int i;
 
+	/*
+	 * Combine non-RAM regions and gaps until a RAM region (or the
+	 * end of the map) is reached, then set the 1:1 map and
+	 * release the pages (if available) in those non-RAM regions.
+	 *
+	 * The combined non-RAM regions are rounded to a whole number
+	 * of pages so any partial pages are accessible via the 1:1
+	 * mapping.  This is needed for some BIOSes that put (for
+	 * example) the DMI tables in a reserved region that begins on
+	 * a non-page boundary.
+	 */
 	for (i = 0, entry = list; i < map_size; i++, entry++) {
-		phys_addr_t start = entry->addr;
-		phys_addr_t end = start + entry->size;
+		phys_addr_t end = entry->addr + entry->size;
 
-		if (start < last)
-			start = last;
+		if (entry->type == E820_RAM || i == map_size - 1) {
+			unsigned long start_pfn = PFN_DOWN(start);
+			unsigned long end_pfn = PFN_UP(end);
 
-		if (end <= start)
-			continue;
+			if (entry->type == E820_RAM)
+				end_pfn = PFN_UP(entry->addr);
 
-		/* Skip over the 1MB region. */
-		if (last > end)
-			continue;
+			if (start_pfn < end_pfn) {
+				if (start_pfn < nr_pages)
+					released += xen_release_chunk(
+						start_pfn, min(end_pfn, nr_pages));
 
-		if ((entry->type == E820_RAM) || (entry->type == E820_UNUSABLE)) {
-			if (start > start_pci)
 				identity += set_phys_range_identity(
-						PFN_UP(start_pci), PFN_DOWN(start));
-
-			/* Without saving 'last' we would gooble RAM too
-			 * at the end of the loop. */
-			last = end;
-			start_pci = end;
-			continue;
+					start_pfn, end_pfn);
+			}
+			start = end;
 		}
-		start_pci = min(start, start_pci);
-		last = end;
 	}
-	if (last > start_pci)
-		identity += set_phys_range_identity(
-					PFN_UP(start_pci), PFN_DOWN(last));
-	return identity;
+
+	printk(KERN_INFO "Released %lu pages of unused memory\n", released);
+	printk(KERN_INFO "Set %ld page(s) to 1-1 mapping\n", identity);
+
+	return released;
 }
 
 static unsigned long __init xen_get_max_pages(void)
@@ -197,21 +179,32 @@
 	return min(max_pages, MAX_DOMAIN_PAGES);
 }
 
+static void xen_align_and_add_e820_region(u64 start, u64 size, int type)
+{
+	u64 end = start + size;
+
+	/* Align RAM regions to page boundaries. */
+	if (type == E820_RAM) {
+		start = PAGE_ALIGN(start);
+		end &= ~((u64)PAGE_SIZE - 1);
+	}
+
+	e820_add_region(start, end - start, type);
+}
+
 /**
  * machine_specific_memory_setup - Hook for machine specific memory setup.
  **/
 char * __init xen_memory_setup(void)
 {
 	static struct e820entry map[E820MAX] __initdata;
-	static struct e820entry map_raw[E820MAX] __initdata;
 
 	unsigned long max_pfn = xen_start_info->nr_pages;
 	unsigned long long mem_end;
 	int rc;
 	struct xen_memory_map memmap;
+	unsigned long max_pages;
 	unsigned long extra_pages = 0;
-	unsigned long extra_limit;
-	unsigned long identity_pages = 0;
 	int i;
 	int op;
 
@@ -237,58 +230,65 @@
 	}
 	BUG_ON(rc);
 
-	memcpy(map_raw, map, sizeof(map));
-	e820.nr_map = 0;
-	xen_extra_mem_start = mem_end;
-	for (i = 0; i < memmap.nr_entries; i++) {
-		unsigned long long end;
+	/* Make sure the Xen-supplied memory map is well-ordered. */
+	sanitize_e820_map(map, memmap.nr_entries, &memmap.nr_entries);
 
-		/* Guard against non-page aligned E820 entries. */
-		if (map[i].type == E820_RAM)
-			map[i].size -= (map[i].size + map[i].addr) % PAGE_SIZE;
+	max_pages = xen_get_max_pages();
+	if (max_pages > max_pfn)
+		extra_pages += max_pages - max_pfn;
 
-		end = map[i].addr + map[i].size;
-		if (map[i].type == E820_RAM && end > mem_end) {
-			/* RAM off the end - may be partially included */
-			u64 delta = min(map[i].size, end - mem_end);
+	/*
+	 * Set P2M for all non-RAM pages and E820 gaps to be identity
+	 * type PFNs.  Any RAM pages that would be made inaccesible by
+	 * this are first released.
+	 */
+	xen_released_pages = xen_set_identity_and_release(
+		map, memmap.nr_entries, max_pfn);
+	extra_pages += xen_released_pages;
 
-			map[i].size -= delta;
-			end -= delta;
+	/*
+	 * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
+	 * factor the base size.  On non-highmem systems, the base
+	 * size is the full initial memory allocation; on highmem it
+	 * is limited to the max size of lowmem, so that it doesn't
+	 * get completely filled.
+	 *
+	 * In principle there could be a problem in lowmem systems if
+	 * the initial memory is also very large with respect to
+	 * lowmem, but we won't try to deal with that here.
+	 */
+	extra_pages = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
+			  extra_pages);
 
-			extra_pages += PFN_DOWN(delta);
-			/*
-			 * Set RAM below 4GB that is not for us to be unusable.
-			 * This prevents "System RAM" address space from being
-			 * used as potential resource for I/O address (happens
-			 * when 'allocate_resource' is called).
-			 */
-			if (delta &&
-				(xen_initial_domain() && end < 0x100000000ULL))
-				e820_add_region(end, delta, E820_UNUSABLE);
+	i = 0;
+	while (i < memmap.nr_entries) {
+		u64 addr = map[i].addr;
+		u64 size = map[i].size;
+		u32 type = map[i].type;
+
+		if (type == E820_RAM) {
+			if (addr < mem_end) {
+				size = min(size, mem_end - addr);
+			} else if (extra_pages) {
+				size = min(size, (u64)extra_pages * PAGE_SIZE);
+				extra_pages -= size / PAGE_SIZE;
+				xen_add_extra_mem(addr, size);
+			} else
+				type = E820_UNUSABLE;
 		}
 
-		if (map[i].size > 0 && end > xen_extra_mem_start)
-			xen_extra_mem_start = end;
+		xen_align_and_add_e820_region(addr, size, type);
 
-		/* Add region if any remains */
-		if (map[i].size > 0)
-			e820_add_region(map[i].addr, map[i].size, map[i].type);
+		map[i].addr += size;
+		map[i].size -= size;
+		if (map[i].size == 0)
+			i++;
 	}
-	/* Align the balloon area so that max_low_pfn does not get set
-	 * to be at the _end_ of the PCI gap at the far end (fee01000).
-	 * Note that xen_extra_mem_start gets set in the loop above to be
-	 * past the last E820 region. */
-	if (xen_initial_domain() && (xen_extra_mem_start < (1ULL<<32)))
-		xen_extra_mem_start = (1ULL<<32);
 
 	/*
 	 * In domU, the ISA region is normal, usable memory, but we
 	 * reserve ISA memory anyway because too many things poke
 	 * about in there.
-	 *
-	 * In Dom0, the host E820 information can leave gaps in the
-	 * ISA range, which would cause us to release those pages.  To
-	 * avoid this, we unconditionally reserve them here.
 	 */
 	e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS,
 			E820_RESERVED);
@@ -305,44 +305,6 @@
 
 	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
 
-	extra_limit = xen_get_max_pages();
-	if (max_pfn + extra_pages > extra_limit) {
-		if (extra_limit > max_pfn)
-			extra_pages = extra_limit - max_pfn;
-		else
-			extra_pages = 0;
-	}
-
-	extra_pages += xen_return_unused_memory(xen_start_info->nr_pages, &e820);
-
-	/*
-	 * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
-	 * factor the base size.  On non-highmem systems, the base
-	 * size is the full initial memory allocation; on highmem it
-	 * is limited to the max size of lowmem, so that it doesn't
-	 * get completely filled.
-	 *
-	 * In principle there could be a problem in lowmem systems if
-	 * the initial memory is also very large with respect to
-	 * lowmem, but we won't try to deal with that here.
-	 */
-	extra_limit = min(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
-			  max_pfn + extra_pages);
-
-	if (extra_limit >= max_pfn)
-		extra_pages = extra_limit - max_pfn;
-	else
-		extra_pages = 0;
-
-	xen_add_extra_mem(extra_pages);
-
-	/*
-	 * Set P2M for all non-RAM pages and E820 gaps to be identity
-	 * type PFNs. We supply it with the non-sanitized version
-	 * of the E820.
-	 */
-	identity_pages = xen_set_identity(map_raw, memmap.nr_entries);
-	printk(KERN_INFO "Set %ld page(s) to 1-1 mapping.\n", identity_pages);
 	return "Xen";
 }
 
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 2330a9a..1540792 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -396,7 +396,7 @@
 			continue;
 
 		ret = m2p_add_override(PFN_DOWN(map[i].dev_bus_addr),
-			blkbk->pending_page(pending_req, i), false);
+			blkbk->pending_page(pending_req, i), NULL);
 		if (ret) {
 			pr_alert(DRV_PFX "Failed to install M2P override for %lx (ret: %d)\n",
 				 (unsigned long)map[i].dev_bus_addr, ret);
diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
index 6fa215a..90832a9 100644
--- a/drivers/pci/xen-pcifront.c
+++ b/drivers/pci/xen-pcifront.c
@@ -400,9 +400,8 @@
 			dev_info(&pdev->xdev->dev, "claiming resource %s/%d\n",
 				pci_name(dev), i);
 			if (pci_claim_resource(dev, i)) {
-				dev_err(&pdev->xdev->dev, "Could not claim "
-					"resource %s/%d! Device offline. Try "
-					"giving less than 4GB to domain.\n",
+				dev_err(&pdev->xdev->dev, "Could not claim resource %s/%d! "
+					"Device offline. Try using e820_host=1 in the guest config.\n",
 					pci_name(dev), i);
 			}
 		}
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 5f7ff8e..8795480 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -137,16 +137,6 @@
 	  to other domains. This can be used to implement frontend drivers
 	  or as part of an inter-domain shared memory channel.
 
-config XEN_PLATFORM_PCI
-	tristate "xen platform pci device driver"
-	depends on XEN_PVHVM && PCI
-	default m
-	help
-	  Driver for the Xen PCI Platform device: it is responsible for
-	  initializing xenbus and grant_table when running in a Xen HVM
-	  domain. As a consequence this driver is required to run any Xen PV
-	  frontend on Xen HVM.
-
 config SWIOTLB_XEN
 	def_bool y
 	depends on PCI
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 72bbb27..974fffd 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -14,7 +14,7 @@
 obj-$(CONFIG_XEN_GRANT_DEV_ALLOC)	+= xen-gntalloc.o
 obj-$(CONFIG_XENFS)			+= xenfs/
 obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
-obj-$(CONFIG_XEN_PLATFORM_PCI)		+= xen-platform-pci.o
+obj-$(CONFIG_XEN_PVHVM)			+= platform-pci.o
 obj-$(CONFIG_XEN_TMEM)			+= tmem.o
 obj-$(CONFIG_SWIOTLB_XEN)		+= swiotlb-xen.o
 obj-$(CONFIG_XEN_DOM0)			+= pci.o
@@ -23,5 +23,3 @@
 xen-evtchn-y				:= evtchn.o
 xen-gntdev-y				:= gntdev.o
 xen-gntalloc-y				:= gntalloc.o
-
-xen-platform-pci-y			:= platform-pci.o
diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c
index 5dfd8f8..5876e1a 100644
--- a/drivers/xen/balloon.c
+++ b/drivers/xen/balloon.c
@@ -501,20 +501,24 @@
  * alloc_xenballooned_pages - get pages that have been ballooned out
  * @nr_pages: Number of pages to get
  * @pages: pages returned
+ * @highmem: highmem or lowmem pages
  * @return 0 on success, error otherwise
  */
-int alloc_xenballooned_pages(int nr_pages, struct page** pages)
+int alloc_xenballooned_pages(int nr_pages, struct page **pages, bool highmem)
 {
 	int pgno = 0;
 	struct page* page;
 	mutex_lock(&balloon_mutex);
 	while (pgno < nr_pages) {
-		page = balloon_retrieve(true);
-		if (page) {
+		page = balloon_retrieve(highmem);
+		if (page && PageHighMem(page) == highmem) {
 			pages[pgno++] = page;
 		} else {
 			enum bp_state st;
-			st = decrease_reservation(nr_pages - pgno, GFP_HIGHUSER);
+			if (page)
+				balloon_append(page);
+			st = decrease_reservation(nr_pages - pgno,
+					highmem ? GFP_HIGHUSER : GFP_USER);
 			if (st != BP_DONE)
 				goto out_undo;
 		}
@@ -555,17 +559,40 @@
 }
 EXPORT_SYMBOL(free_xenballooned_pages);
 
-static int __init balloon_init(void)
+static void __init balloon_add_region(unsigned long start_pfn,
+				      unsigned long pages)
 {
 	unsigned long pfn, extra_pfn_end;
 	struct page *page;
 
+	/*
+	 * If the amount of usable memory has been limited (e.g., with
+	 * the 'mem' command line parameter), don't add pages beyond
+	 * this limit.
+	 */
+	extra_pfn_end = min(max_pfn, start_pfn + pages);
+
+	for (pfn = start_pfn; pfn < extra_pfn_end; pfn++) {
+		page = pfn_to_page(pfn);
+		/* totalram_pages and totalhigh_pages do not
+		   include the boot-time balloon extension, so
+		   don't subtract from it. */
+		__balloon_append(page);
+	}
+}
+
+static int __init balloon_init(void)
+{
+	int i;
+
 	if (!xen_domain())
 		return -ENODEV;
 
 	pr_info("xen/balloon: Initialising balloon driver.\n");
 
-	balloon_stats.current_pages = xen_pv_domain() ? min(xen_start_info->nr_pages, max_pfn) : max_pfn;
+	balloon_stats.current_pages = xen_pv_domain()
+		? min(xen_start_info->nr_pages - xen_released_pages, max_pfn)
+		: max_pfn;
 	balloon_stats.target_pages  = balloon_stats.current_pages;
 	balloon_stats.balloon_low   = 0;
 	balloon_stats.balloon_high  = 0;
@@ -584,24 +611,13 @@
 #endif
 
 	/*
-	 * Initialise the balloon with excess memory space.  We need
-	 * to make sure we don't add memory which doesn't exist or
-	 * logically exist.  The E820 map can be trimmed to be smaller
-	 * than the amount of physical memory due to the mem= command
-	 * line parameter.  And if this is a 32-bit non-HIGHMEM kernel
-	 * on a system with memory which requires highmem to access,
-	 * don't try to use it.
+	 * Initialize the balloon with pages from the extra memory
+	 * regions (see arch/x86/xen/setup.c).
 	 */
-	extra_pfn_end = min(min(max_pfn, e820_end_of_ram_pfn()),
-			    (unsigned long)PFN_DOWN(xen_extra_mem_start + xen_extra_mem_size));
-	for (pfn = PFN_UP(xen_extra_mem_start);
-	     pfn < extra_pfn_end;
-	     pfn++) {
-		page = pfn_to_page(pfn);
-		/* totalram_pages and totalhigh_pages do not include the boot-time
-		   balloon extension, so don't subtract from it. */
-		__balloon_append(page);
-	}
+	for (i = 0; i < XEN_EXTRA_MEM_MAX_REGIONS; i++)
+		if (xen_extra_mem[i].size)
+			balloon_add_region(PFN_UP(xen_extra_mem[i].start),
+					   PFN_DOWN(xen_extra_mem[i].size));
 
 	return 0;
 }
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index 7523719..7a55b29 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -432,7 +432,8 @@
 
 	irq = irq_alloc_desc_from(first, -1);
 
-	xen_irq_init(irq);
+	if (irq >= 0)
+		xen_irq_init(irq);
 
 	return irq;
 }
@@ -713,7 +714,7 @@
 	mutex_lock(&irq_mapping_update_lock);
 
 	irq = xen_allocate_irq_dynamic();
-	if (irq == -1)
+	if (irq < 0)
 		goto out;
 
 	irq_set_chip_and_handler_name(irq, &xen_pirq_chip, handle_edge_irq,
@@ -729,7 +730,7 @@
 error_irq:
 	mutex_unlock(&irq_mapping_update_lock);
 	xen_free_irq(irq);
-	return -1;
+	return ret;
 }
 #endif
 
@@ -779,7 +780,7 @@
 	mutex_lock(&irq_mapping_update_lock);
 
 	list_for_each_entry(info, &xen_irq_list_head, list) {
-		if (info == NULL || info->type != IRQT_PIRQ)
+		if (info->type != IRQT_PIRQ)
 			continue;
 		irq = info->irq;
 		if (info->u.pirq.pirq == pirq)
@@ -872,11 +873,32 @@
 	return err ? : bind_evtchn_to_irq(bind_interdomain.local_port);
 }
 
+static int find_virq(unsigned int virq, unsigned int cpu)
+{
+	struct evtchn_status status;
+	int port, rc = -ENOENT;
+
+	memset(&status, 0, sizeof(status));
+	for (port = 0; port <= NR_EVENT_CHANNELS; port++) {
+		status.dom = DOMID_SELF;
+		status.port = port;
+		rc = HYPERVISOR_event_channel_op(EVTCHNOP_status, &status);
+		if (rc < 0)
+			continue;
+		if (status.status != EVTCHNSTAT_virq)
+			continue;
+		if (status.u.virq == virq && status.vcpu == cpu) {
+			rc = port;
+			break;
+		}
+	}
+	return rc;
+}
 
 int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
 {
 	struct evtchn_bind_virq bind_virq;
-	int evtchn, irq;
+	int evtchn, irq, ret;
 
 	mutex_lock(&irq_mapping_update_lock);
 
@@ -892,10 +914,16 @@
 
 		bind_virq.virq = virq;
 		bind_virq.vcpu = cpu;
-		if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
-						&bind_virq) != 0)
-			BUG();
-		evtchn = bind_virq.port;
+		ret = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
+						&bind_virq);
+		if (ret == 0)
+			evtchn = bind_virq.port;
+		else {
+			if (ret == -EEXIST)
+				ret = find_virq(virq, cpu);
+			BUG_ON(ret < 0);
+			evtchn = ret;
+		}
 
 		xen_irq_info_virq_init(cpu, irq, evtchn, virq);
 
@@ -1670,6 +1698,7 @@
 
 	evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq),
 				    GFP_KERNEL);
+	BUG_ON(!evtchn_to_irq);
 	for (i = 0; i < NR_EVENT_CHANNELS; i++)
 		evtchn_to_irq[i] = -1;
 
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index f914b26..880798a 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -83,6 +83,7 @@
 	struct ioctl_gntdev_grant_ref *grants;
 	struct gnttab_map_grant_ref   *map_ops;
 	struct gnttab_unmap_grant_ref *unmap_ops;
+	struct gnttab_map_grant_ref   *kmap_ops;
 	struct page **pages;
 };
 
@@ -116,19 +117,22 @@
 	add->grants    = kzalloc(sizeof(add->grants[0])    * count, GFP_KERNEL);
 	add->map_ops   = kzalloc(sizeof(add->map_ops[0])   * count, GFP_KERNEL);
 	add->unmap_ops = kzalloc(sizeof(add->unmap_ops[0]) * count, GFP_KERNEL);
+	add->kmap_ops  = kzalloc(sizeof(add->kmap_ops[0])  * count, GFP_KERNEL);
 	add->pages     = kzalloc(sizeof(add->pages[0])     * count, GFP_KERNEL);
 	if (NULL == add->grants    ||
 	    NULL == add->map_ops   ||
 	    NULL == add->unmap_ops ||
+	    NULL == add->kmap_ops  ||
 	    NULL == add->pages)
 		goto err;
 
-	if (alloc_xenballooned_pages(count, add->pages))
+	if (alloc_xenballooned_pages(count, add->pages, false /* lowmem */))
 		goto err;
 
 	for (i = 0; i < count; i++) {
 		add->map_ops[i].handle = -1;
 		add->unmap_ops[i].handle = -1;
+		add->kmap_ops[i].handle = -1;
 	}
 
 	add->index = 0;
@@ -142,6 +146,7 @@
 	kfree(add->grants);
 	kfree(add->map_ops);
 	kfree(add->unmap_ops);
+	kfree(add->kmap_ops);
 	kfree(add);
 	return NULL;
 }
@@ -243,10 +248,35 @@
 			gnttab_set_unmap_op(&map->unmap_ops[i], addr,
 				map->flags, -1 /* handle */);
 		}
+	} else {
+		/*
+		 * Setup the map_ops corresponding to the pte entries pointing
+		 * to the kernel linear addresses of the struct pages.
+		 * These ptes are completely different from the user ptes dealt
+		 * with find_grant_ptes.
+		 */
+		for (i = 0; i < map->count; i++) {
+			unsigned level;
+			unsigned long address = (unsigned long)
+				pfn_to_kaddr(page_to_pfn(map->pages[i]));
+			pte_t *ptep;
+			u64 pte_maddr = 0;
+			BUG_ON(PageHighMem(map->pages[i]));
+
+			ptep = lookup_address(address, &level);
+			pte_maddr = arbitrary_virt_to_machine(ptep).maddr;
+			gnttab_set_map_op(&map->kmap_ops[i], pte_maddr,
+				map->flags |
+				GNTMAP_host_map |
+				GNTMAP_contains_pte,
+				map->grants[i].ref,
+				map->grants[i].domid);
+		}
 	}
 
 	pr_debug("map %d+%d\n", map->index, map->count);
-	err = gnttab_map_refs(map->map_ops, map->pages, map->count);
+	err = gnttab_map_refs(map->map_ops, use_ptemod ? map->kmap_ops : NULL,
+			map->pages, map->count);
 	if (err)
 		return err;
 
@@ -462,13 +492,11 @@
 
 	pr_debug("priv %p\n", priv);
 
-	spin_lock(&priv->lock);
 	while (!list_empty(&priv->maps)) {
 		map = list_entry(priv->maps.next, struct grant_map, next);
 		list_del(&map->next);
 		gntdev_put_map(map);
 	}
-	spin_unlock(&priv->lock);
 
 	if (use_ptemod)
 		mmu_notifier_unregister(&priv->mn, priv->mm);
@@ -532,10 +560,11 @@
 	map = gntdev_find_map_index(priv, op.index >> PAGE_SHIFT, op.count);
 	if (map) {
 		list_del(&map->next);
-		gntdev_put_map(map);
 		err = 0;
 	}
 	spin_unlock(&priv->lock);
+	if (map)
+		gntdev_put_map(map);
 	return err;
 }
 
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 4f44b34..8c71ab80 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -448,7 +448,8 @@
 EXPORT_SYMBOL_GPL(gnttab_max_grant_frames);
 
 int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
-		    struct page **pages, unsigned int count)
+			struct gnttab_map_grant_ref *kmap_ops,
+			struct page **pages, unsigned int count)
 {
 	int i, ret;
 	pte_t *pte;
@@ -488,8 +489,7 @@
 			 */
 			return -EOPNOTSUPP;
 		}
-		ret = m2p_add_override(mfn, pages[i],
-				       map_ops[i].flags & GNTMAP_contains_pte);
+		ret = m2p_add_override(mfn, pages[i], &kmap_ops[i]);
 		if (ret)
 			return ret;
 	}
diff --git a/drivers/xen/pci.c b/drivers/xen/pci.c
index cef4bafc0..6605707 100644
--- a/drivers/xen/pci.c
+++ b/drivers/xen/pci.c
@@ -18,6 +18,7 @@
  */
 
 #include <linux/pci.h>
+#include <linux/acpi.h>
 #include <xen/xen.h>
 #include <xen/interface/physdev.h>
 #include <xen/interface/xen.h>
@@ -26,26 +27,85 @@
 #include <asm/xen/hypercall.h>
 #include "../pci/pci.h"
 
+static bool __read_mostly pci_seg_supported = true;
+
 static int xen_add_device(struct device *dev)
 {
 	int r;
 	struct pci_dev *pci_dev = to_pci_dev(dev);
+#ifdef CONFIG_PCI_IOV
+	struct pci_dev *physfn = pci_dev->physfn;
+#endif
+
+	if (pci_seg_supported) {
+		struct physdev_pci_device_add add = {
+			.seg = pci_domain_nr(pci_dev->bus),
+			.bus = pci_dev->bus->number,
+			.devfn = pci_dev->devfn
+		};
+#ifdef CONFIG_ACPI
+		acpi_handle handle;
+#endif
 
 #ifdef CONFIG_PCI_IOV
-	if (pci_dev->is_virtfn) {
+		if (pci_dev->is_virtfn) {
+			add.flags = XEN_PCI_DEV_VIRTFN;
+			add.physfn.bus = physfn->bus->number;
+			add.physfn.devfn = physfn->devfn;
+		} else
+#endif
+		if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn))
+			add.flags = XEN_PCI_DEV_EXTFN;
+
+#ifdef CONFIG_ACPI
+		handle = DEVICE_ACPI_HANDLE(&pci_dev->dev);
+		if (!handle)
+			handle = DEVICE_ACPI_HANDLE(pci_dev->bus->bridge);
+#ifdef CONFIG_PCI_IOV
+		if (!handle && pci_dev->is_virtfn)
+			handle = DEVICE_ACPI_HANDLE(physfn->bus->bridge);
+#endif
+		if (handle) {
+			acpi_status status;
+
+			do {
+				unsigned long long pxm;
+
+				status = acpi_evaluate_integer(handle, "_PXM",
+							       NULL, &pxm);
+				if (ACPI_SUCCESS(status)) {
+					add.optarr[0] = pxm;
+					add.flags |= XEN_PCI_DEV_PXM;
+					break;
+				}
+				status = acpi_get_parent(handle, &handle);
+			} while (ACPI_SUCCESS(status));
+		}
+#endif /* CONFIG_ACPI */
+
+		r = HYPERVISOR_physdev_op(PHYSDEVOP_pci_device_add, &add);
+		if (r != -ENOSYS)
+			return r;
+		pci_seg_supported = false;
+	}
+
+	if (pci_domain_nr(pci_dev->bus))
+		r = -ENOSYS;
+#ifdef CONFIG_PCI_IOV
+	else if (pci_dev->is_virtfn) {
 		struct physdev_manage_pci_ext manage_pci_ext = {
 			.bus		= pci_dev->bus->number,
 			.devfn		= pci_dev->devfn,
 			.is_virtfn 	= 1,
-			.physfn.bus	= pci_dev->physfn->bus->number,
-			.physfn.devfn	= pci_dev->physfn->devfn,
+			.physfn.bus	= physfn->bus->number,
+			.physfn.devfn	= physfn->devfn,
 		};
 
 		r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_add_ext,
 			&manage_pci_ext);
-	} else
+	}
 #endif
-	if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) {
+	else if (pci_ari_enabled(pci_dev->bus) && PCI_SLOT(pci_dev->devfn)) {
 		struct physdev_manage_pci_ext manage_pci_ext = {
 			.bus		= pci_dev->bus->number,
 			.devfn		= pci_dev->devfn,
@@ -71,13 +131,27 @@
 {
 	int r;
 	struct pci_dev *pci_dev = to_pci_dev(dev);
-	struct physdev_manage_pci manage_pci;
 
-	manage_pci.bus = pci_dev->bus->number;
-	manage_pci.devfn = pci_dev->devfn;
+	if (pci_seg_supported) {
+		struct physdev_pci_device device = {
+			.seg = pci_domain_nr(pci_dev->bus),
+			.bus = pci_dev->bus->number,
+			.devfn = pci_dev->devfn
+		};
 
-	r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove,
-		&manage_pci);
+		r = HYPERVISOR_physdev_op(PHYSDEVOP_pci_device_remove,
+					  &device);
+	} else if (pci_domain_nr(pci_dev->bus))
+		r = -ENOSYS;
+	else {
+		struct physdev_manage_pci manage_pci = {
+			.bus = pci_dev->bus->number,
+			.devfn = pci_dev->devfn
+		};
+
+		r = HYPERVISOR_physdev_op(PHYSDEVOP_manage_pci_remove,
+					  &manage_pci);
+	}
 
 	return r;
 }
@@ -96,13 +170,16 @@
 		r = xen_remove_device(dev);
 		break;
 	default:
-		break;
+		return NOTIFY_DONE;
 	}
-
-	return r;
+	if (r)
+		dev_err(dev, "Failed to %s - passthrough or MSI/MSI-X might fail!\n",
+			action == BUS_NOTIFY_ADD_DEVICE ? "add" :
+			(action == BUS_NOTIFY_DEL_DEVICE ? "delete" : "?"));
+	return NOTIFY_OK;
 }
 
-struct notifier_block device_nb = {
+static struct notifier_block device_nb = {
 	.notifier_call = xen_pci_notifier,
 };
 
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 6e8c15a..c984768 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -38,6 +38,7 @@
 #include <xen/swiotlb-xen.h>
 #include <xen/page.h>
 #include <xen/xen-ops.h>
+#include <xen/hvc-console.h>
 /*
  * Used to do a quick range check in swiotlb_tbl_unmap_single and
  * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this
@@ -146,8 +147,10 @@
 void __init xen_swiotlb_init(int verbose)
 {
 	unsigned long bytes;
-	int rc;
+	int rc = -ENOMEM;
 	unsigned long nr_tbl;
+	char *m = NULL;
+	unsigned int repeat = 3;
 
 	nr_tbl = swioltb_nr_tbl();
 	if (nr_tbl)
@@ -156,16 +159,17 @@
 		xen_io_tlb_nslabs = (64 * 1024 * 1024 >> IO_TLB_SHIFT);
 		xen_io_tlb_nslabs = ALIGN(xen_io_tlb_nslabs, IO_TLB_SEGSIZE);
 	}
-
+retry:
 	bytes = xen_io_tlb_nslabs << IO_TLB_SHIFT;
 
 	/*
 	 * Get IO TLB memory from any location.
 	 */
 	xen_io_tlb_start = alloc_bootmem(bytes);
-	if (!xen_io_tlb_start)
-		panic("Cannot allocate SWIOTLB buffer");
-
+	if (!xen_io_tlb_start) {
+		m = "Cannot allocate Xen-SWIOTLB buffer!\n";
+		goto error;
+	}
 	xen_io_tlb_end = xen_io_tlb_start + bytes;
 	/*
 	 * And replace that memory with pages under 4GB.
@@ -173,17 +177,28 @@
 	rc = xen_swiotlb_fixup(xen_io_tlb_start,
 			       bytes,
 			       xen_io_tlb_nslabs);
-	if (rc)
+	if (rc) {
+		free_bootmem(__pa(xen_io_tlb_start), bytes);
+		m = "Failed to get contiguous memory for DMA from Xen!\n"\
+		    "You either: don't have the permissions, do not have"\
+		    " enough free memory under 4GB, or the hypervisor memory"\
+		    "is too fragmented!";
 		goto error;
-
+	}
 	start_dma_addr = xen_virt_to_bus(xen_io_tlb_start);
 	swiotlb_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs, verbose);
 
 	return;
 error:
-	panic("DMA(%d): Failed to exchange pages allocated for DMA with Xen! "\
-	      "We either don't have the permission or you do not have enough"\
-	      "free memory under 4GB!\n", rc);
+	if (repeat--) {
+		xen_io_tlb_nslabs = max(1024UL, /* Min is 2MB */
+					(xen_io_tlb_nslabs >> 1));
+		printk(KERN_INFO "Xen-SWIOTLB: Lowering to %luMB\n",
+		      (xen_io_tlb_nslabs << IO_TLB_SHIFT) >> 20);
+		goto retry;
+	}
+	xen_raw_printk("%s (rc:%d)", m, rc);
+	panic("%s (rc:%d)", m, rc);
 }
 
 void *
@@ -194,6 +209,8 @@
 	int order = get_order(size);
 	u64 dma_mask = DMA_BIT_MASK(32);
 	unsigned long vstart;
+	phys_addr_t phys;
+	dma_addr_t dev_addr;
 
 	/*
 	* Ignore region specifiers - the kernel's ideas of
@@ -209,18 +226,26 @@
 	vstart = __get_free_pages(flags, order);
 	ret = (void *)vstart;
 
-	if (hwdev && hwdev->coherent_dma_mask)
-		dma_mask = dma_alloc_coherent_mask(hwdev, flags);
+	if (!ret)
+		return ret;
 
-	if (ret) {
+	if (hwdev && hwdev->coherent_dma_mask)
+		dma_mask = hwdev->coherent_dma_mask;
+
+	phys = virt_to_phys(ret);
+	dev_addr = xen_phys_to_bus(phys);
+	if (((dev_addr + size - 1 <= dma_mask)) &&
+	    !range_straddles_page_boundary(phys, size))
+		*dma_handle = dev_addr;
+	else {
 		if (xen_create_contiguous_region(vstart, order,
 						 fls64(dma_mask)) != 0) {
 			free_pages(vstart, order);
 			return NULL;
 		}
-		memset(ret, 0, size);
 		*dma_handle = virt_to_machine(ret).maddr;
 	}
+	memset(ret, 0, size);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(xen_swiotlb_alloc_coherent);
@@ -230,11 +255,21 @@
 			  dma_addr_t dev_addr)
 {
 	int order = get_order(size);
+	phys_addr_t phys;
+	u64 dma_mask = DMA_BIT_MASK(32);
 
 	if (dma_release_from_coherent(hwdev, order, vaddr))
 		return;
 
-	xen_destroy_contiguous_region((unsigned long)vaddr, order);
+	if (hwdev && hwdev->coherent_dma_mask)
+		dma_mask = hwdev->coherent_dma_mask;
+
+	phys = virt_to_phys(vaddr);
+
+	if (((dev_addr + size - 1 > dma_mask)) ||
+	    range_straddles_page_boundary(phys, size))
+		xen_destroy_contiguous_region((unsigned long)vaddr, order);
+
 	free_pages((unsigned long)vaddr, order);
 }
 EXPORT_SYMBOL_GPL(xen_swiotlb_free_coherent);
@@ -278,9 +313,10 @@
 	/*
 	 * Ensure that the address returned is DMA'ble
 	 */
-	if (!dma_capable(dev, dev_addr, size))
-		panic("map_single: bounce buffer is not DMA'ble");
-
+	if (!dma_capable(dev, dev_addr, size)) {
+		swiotlb_tbl_unmap_single(dev, map, size, dir);
+		dev_addr = 0;
+	}
 	return dev_addr;
 }
 EXPORT_SYMBOL_GPL(xen_swiotlb_map_page);
diff --git a/drivers/xen/xen-pciback/conf_space.c b/drivers/xen/xen-pciback/conf_space.c
index a803144..444345a 100644
--- a/drivers/xen/xen-pciback/conf_space.c
+++ b/drivers/xen/xen-pciback/conf_space.c
@@ -15,7 +15,6 @@
 #include "conf_space.h"
 #include "conf_space_quirks.h"
 
-#define DRV_NAME	"xen-pciback"
 static int permissive;
 module_param(permissive, bool, 0644);
 
diff --git a/drivers/xen/xen-pciback/conf_space_header.c b/drivers/xen/xen-pciback/conf_space_header.c
index da3cbdf..3daf862 100644
--- a/drivers/xen/xen-pciback/conf_space_header.c
+++ b/drivers/xen/xen-pciback/conf_space_header.c
@@ -15,7 +15,6 @@
 	int which;
 };
 
-#define DRV_NAME	"xen-pciback"
 #define is_enable_cmd(value) ((value)&(PCI_COMMAND_MEMORY|PCI_COMMAND_IO))
 #define is_master_cmd(value) ((value)&PCI_COMMAND_MASTER)
 
@@ -25,7 +24,7 @@
 	int ret;
 
 	ret = xen_pcibk_read_config_word(dev, offset, value, data);
-	if (!atomic_read(&dev->enable_cnt))
+	if (!pci_is_enabled(dev))
 		return ret;
 
 	for (i = 0; i < PCI_ROM_RESOURCE; i++) {
@@ -187,7 +186,7 @@
 
 	bar_info->val = res[pos].start |
 			(res[pos].flags & PCI_REGION_FLAG_MASK);
-	bar_info->len_val = res[pos].end - res[pos].start + 1;
+	bar_info->len_val = resource_size(&res[pos]);
 }
 
 static void *bar_init(struct pci_dev *dev, int offset)
diff --git a/drivers/xen/xen-pciback/conf_space_quirks.c b/drivers/xen/xen-pciback/conf_space_quirks.c
index 921a889..7476791 100644
--- a/drivers/xen/xen-pciback/conf_space_quirks.c
+++ b/drivers/xen/xen-pciback/conf_space_quirks.c
@@ -12,7 +12,6 @@
 #include "conf_space_quirks.h"
 
 LIST_HEAD(xen_pcibk_quirks);
-#define	DRV_NAME	"xen-pciback"
 static inline const struct pci_device_id *
 match_one_device(const struct pci_device_id *id, const struct pci_dev *dev)
 {
@@ -36,7 +35,7 @@
 			goto out;
 	tmp_quirk = NULL;
 	printk(KERN_DEBUG DRV_NAME
-	       ":quirk didn't match any device xen_pciback knows about\n");
+	       ": quirk didn't match any device known\n");
 out:
 	return tmp_quirk;
 }
diff --git a/drivers/xen/xen-pciback/passthrough.c b/drivers/xen/xen-pciback/passthrough.c
index 1d32a9a..828dddc 100644
--- a/drivers/xen/xen-pciback/passthrough.c
+++ b/drivers/xen/xen-pciback/passthrough.c
@@ -7,13 +7,13 @@
 
 #include <linux/list.h>
 #include <linux/pci.h>
-#include <linux/spinlock.h>
+#include <linux/mutex.h>
 #include "pciback.h"
 
 struct passthrough_dev_data {
 	/* Access to dev_list must be protected by lock */
 	struct list_head dev_list;
-	spinlock_t lock;
+	struct mutex lock;
 };
 
 static struct pci_dev *__xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev,
@@ -24,9 +24,8 @@
 	struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
 	struct pci_dev_entry *dev_entry;
 	struct pci_dev *dev = NULL;
-	unsigned long flags;
 
-	spin_lock_irqsave(&dev_data->lock, flags);
+	mutex_lock(&dev_data->lock);
 
 	list_for_each_entry(dev_entry, &dev_data->dev_list, list) {
 		if (domain == (unsigned int)pci_domain_nr(dev_entry->dev->bus)
@@ -37,7 +36,7 @@
 		}
 	}
 
-	spin_unlock_irqrestore(&dev_data->lock, flags);
+	mutex_unlock(&dev_data->lock);
 
 	return dev;
 }
@@ -48,7 +47,6 @@
 {
 	struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
 	struct pci_dev_entry *dev_entry;
-	unsigned long flags;
 	unsigned int domain, bus, devfn;
 	int err;
 
@@ -57,9 +55,9 @@
 		return -ENOMEM;
 	dev_entry->dev = dev;
 
-	spin_lock_irqsave(&dev_data->lock, flags);
+	mutex_lock(&dev_data->lock);
 	list_add_tail(&dev_entry->list, &dev_data->dev_list);
-	spin_unlock_irqrestore(&dev_data->lock, flags);
+	mutex_unlock(&dev_data->lock);
 
 	/* Publish this device. */
 	domain = (unsigned int)pci_domain_nr(dev->bus);
@@ -76,9 +74,8 @@
 	struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
 	struct pci_dev_entry *dev_entry, *t;
 	struct pci_dev *found_dev = NULL;
-	unsigned long flags;
 
-	spin_lock_irqsave(&dev_data->lock, flags);
+	mutex_lock(&dev_data->lock);
 
 	list_for_each_entry_safe(dev_entry, t, &dev_data->dev_list, list) {
 		if (dev_entry->dev == dev) {
@@ -88,7 +85,7 @@
 		}
 	}
 
-	spin_unlock_irqrestore(&dev_data->lock, flags);
+	mutex_unlock(&dev_data->lock);
 
 	if (found_dev)
 		pcistub_put_pci_dev(found_dev);
@@ -102,7 +99,7 @@
 	if (!dev_data)
 		return -ENOMEM;
 
-	spin_lock_init(&dev_data->lock);
+	mutex_init(&dev_data->lock);
 
 	INIT_LIST_HEAD(&dev_data->dev_list);
 
@@ -116,14 +113,14 @@
 {
 	int err = 0;
 	struct passthrough_dev_data *dev_data = pdev->pci_dev_data;
-	struct pci_dev_entry *dev_entry, *e, *tmp;
+	struct pci_dev_entry *dev_entry, *e;
 	struct pci_dev *dev;
 	int found;
 	unsigned int domain, bus;
 
-	spin_lock(&dev_data->lock);
+	mutex_lock(&dev_data->lock);
 
-	list_for_each_entry_safe(dev_entry, tmp, &dev_data->dev_list, list) {
+	list_for_each_entry(dev_entry, &dev_data->dev_list, list) {
 		/* Only publish this device as a root if none of its
 		 * parent bridges are exported
 		 */
@@ -142,16 +139,13 @@
 		bus = (unsigned int)dev_entry->dev->bus->number;
 
 		if (!found) {
-			spin_unlock(&dev_data->lock);
 			err = publish_root_cb(pdev, domain, bus);
 			if (err)
 				break;
-			spin_lock(&dev_data->lock);
 		}
 	}
 
-	if (!err)
-		spin_unlock(&dev_data->lock);
+	mutex_unlock(&dev_data->lock);
 
 	return err;
 }
@@ -182,7 +176,7 @@
 	return 1;
 }
 
-struct xen_pcibk_backend xen_pcibk_passthrough_backend = {
+const struct xen_pcibk_backend xen_pcibk_passthrough_backend = {
 	.name           = "passthrough",
 	.init           = __xen_pcibk_init_devices,
 	.free		= __xen_pcibk_release_devices,
diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c
index aec214a..8f06e1e 100644
--- a/drivers/xen/xen-pciback/pci_stub.c
+++ b/drivers/xen/xen-pciback/pci_stub.c
@@ -21,8 +21,6 @@
 #include "conf_space.h"
 #include "conf_space_quirks.h"
 
-#define DRV_NAME	"xen-pciback"
-
 static char *pci_devs_to_hide;
 wait_queue_head_t xen_pcibk_aer_wait_queue;
 /*Add sem for sync AER handling and xen_pcibk remove/reconfigue ops,
@@ -222,6 +220,8 @@
 	}
 
 	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
+	if (WARN_ON(!found_psdev))
+		return;
 
 	/*hold this lock for avoiding breaking link between
 	* pcistub and xen_pcibk when AER is in processing
@@ -514,12 +514,9 @@
 	int err;
 	char nodename[PCI_NODENAME_MAX];
 
-	if (!psdev)
-		dev_err(&psdev->dev->dev,
-			"device is NULL when do AER recovery/kill_domain\n");
+	BUG_ON(!psdev);
 	snprintf(nodename, PCI_NODENAME_MAX, "/local/domain/0/backend/pci/%d/0",
 		psdev->pdev->xdev->otherend_id);
-	nodename[strlen(nodename)] = '\0';
 
 again:
 	err = xenbus_transaction_start(&xbt);
@@ -605,7 +602,7 @@
 	if (test_bit(_XEN_PCIF_active,
 		(unsigned long *)&psdev->pdev->sh_info->flags)) {
 		dev_dbg(&psdev->dev->dev,
-			"schedule pci_conf service in xen_pcibk\n");
+			"schedule pci_conf service in " DRV_NAME "\n");
 		xen_pcibk_test_and_schedule_op(psdev->pdev);
 	}
 
@@ -995,8 +992,7 @@
 		err = count;
 	return err;
 }
-
-DRIVER_ATTR(new_slot, S_IWUSR, NULL, pcistub_slot_add);
+static DRIVER_ATTR(new_slot, S_IWUSR, NULL, pcistub_slot_add);
 
 static ssize_t pcistub_slot_remove(struct device_driver *drv, const char *buf,
 				   size_t count)
@@ -1015,8 +1011,7 @@
 		err = count;
 	return err;
 }
-
-DRIVER_ATTR(remove_slot, S_IWUSR, NULL, pcistub_slot_remove);
+static DRIVER_ATTR(remove_slot, S_IWUSR, NULL, pcistub_slot_remove);
 
 static ssize_t pcistub_slot_show(struct device_driver *drv, char *buf)
 {
@@ -1039,8 +1034,7 @@
 
 	return count;
 }
-
-DRIVER_ATTR(slots, S_IRUSR, pcistub_slot_show, NULL);
+static DRIVER_ATTR(slots, S_IRUSR, pcistub_slot_show, NULL);
 
 static ssize_t pcistub_irq_handler_show(struct device_driver *drv, char *buf)
 {
@@ -1069,8 +1063,7 @@
 	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
 	return count;
 }
-
-DRIVER_ATTR(irq_handlers, S_IRUSR, pcistub_irq_handler_show, NULL);
+static DRIVER_ATTR(irq_handlers, S_IRUSR, pcistub_irq_handler_show, NULL);
 
 static ssize_t pcistub_irq_handler_switch(struct device_driver *drv,
 					  const char *buf,
@@ -1106,7 +1099,8 @@
 		err = count;
 	return err;
 }
-DRIVER_ATTR(irq_handler_state, S_IWUSR, NULL, pcistub_irq_handler_switch);
+static DRIVER_ATTR(irq_handler_state, S_IWUSR, NULL,
+		   pcistub_irq_handler_switch);
 
 static ssize_t pcistub_quirk_add(struct device_driver *drv, const char *buf,
 				 size_t count)
@@ -1170,8 +1164,8 @@
 
 	return count;
 }
-
-DRIVER_ATTR(quirks, S_IRUSR | S_IWUSR, pcistub_quirk_show, pcistub_quirk_add);
+static DRIVER_ATTR(quirks, S_IRUSR | S_IWUSR, pcistub_quirk_show,
+		   pcistub_quirk_add);
 
 static ssize_t permissive_add(struct device_driver *drv, const char *buf,
 			      size_t count)
@@ -1236,8 +1230,8 @@
 	spin_unlock_irqrestore(&pcistub_devices_lock, flags);
 	return count;
 }
-
-DRIVER_ATTR(permissive, S_IRUSR | S_IWUSR, permissive_show, permissive_add);
+static DRIVER_ATTR(permissive, S_IRUSR | S_IWUSR, permissive_show,
+		   permissive_add);
 
 static void pcistub_exit(void)
 {
@@ -1374,3 +1368,4 @@
 module_exit(xen_pcibk_cleanup);
 
 MODULE_LICENSE("Dual BSD/GPL");
+MODULE_ALIAS("xen-backend:pci");
diff --git a/drivers/xen/xen-pciback/pciback.h b/drivers/xen/xen-pciback/pciback.h
index a0e131a..e9b4011 100644
--- a/drivers/xen/xen-pciback/pciback.h
+++ b/drivers/xen/xen-pciback/pciback.h
@@ -15,6 +15,8 @@
 #include <linux/atomic.h>
 #include <xen/interface/io/pciif.h>
 
+#define DRV_NAME	"xen-pciback"
+
 struct pci_dev_entry {
 	struct list_head list;
 	struct pci_dev *dev;
@@ -27,7 +29,7 @@
 
 struct xen_pcibk_device {
 	void *pci_dev_data;
-	spinlock_t dev_lock;
+	struct mutex dev_lock;
 	struct xenbus_device *xdev;
 	struct xenbus_watch be_watch;
 	u8 be_watching;
@@ -89,7 +91,7 @@
  *  passthrough - BDFs are exactly like in the host.
  */
 struct xen_pcibk_backend {
-	char *name;
+	const char *name;
 	int (*init)(struct xen_pcibk_device *pdev);
 	void (*free)(struct xen_pcibk_device *pdev);
 	int (*find)(struct pci_dev *pcidev, struct xen_pcibk_device *pdev,
@@ -104,9 +106,9 @@
 			       unsigned int devfn);
 };
 
-extern struct xen_pcibk_backend xen_pcibk_vpci_backend;
-extern struct xen_pcibk_backend xen_pcibk_passthrough_backend;
-extern struct xen_pcibk_backend *xen_pcibk_backend;
+extern const struct xen_pcibk_backend xen_pcibk_vpci_backend;
+extern const struct xen_pcibk_backend xen_pcibk_passthrough_backend;
+extern const struct xen_pcibk_backend *xen_pcibk_backend;
 
 static inline int xen_pcibk_add_pci_dev(struct xen_pcibk_device *pdev,
 					struct pci_dev *dev,
@@ -116,13 +118,14 @@
 	if (xen_pcibk_backend && xen_pcibk_backend->add)
 		return xen_pcibk_backend->add(pdev, dev, devid, publish_cb);
 	return -1;
-};
+}
+
 static inline void xen_pcibk_release_pci_dev(struct xen_pcibk_device *pdev,
 					     struct pci_dev *dev)
 {
 	if (xen_pcibk_backend && xen_pcibk_backend->free)
 		return xen_pcibk_backend->release(pdev, dev);
-};
+}
 
 static inline struct pci_dev *
 xen_pcibk_get_pci_dev(struct xen_pcibk_device *pdev, unsigned int domain,
@@ -131,7 +134,8 @@
 	if (xen_pcibk_backend && xen_pcibk_backend->get)
 		return xen_pcibk_backend->get(pdev, domain, bus, devfn);
 	return NULL;
-};
+}
+
 /**
 * Add for domain0 PCIE-AER handling. Get guest domain/bus/devfn in xen_pcibk
 * before sending aer request to pcifront, so that guest could identify
@@ -148,25 +152,29 @@
 		return xen_pcibk_backend->find(pcidev, pdev, domain, bus,
 					       devfn);
 	return -1;
-};
+}
+
 static inline int xen_pcibk_init_devices(struct xen_pcibk_device *pdev)
 {
 	if (xen_pcibk_backend && xen_pcibk_backend->init)
 		return xen_pcibk_backend->init(pdev);
 	return -1;
-};
+}
+
 static inline int xen_pcibk_publish_pci_roots(struct xen_pcibk_device *pdev,
 					      publish_pci_root_cb cb)
 {
 	if (xen_pcibk_backend && xen_pcibk_backend->publish)
 		return xen_pcibk_backend->publish(pdev, cb);
 	return -1;
-};
+}
+
 static inline void xen_pcibk_release_devices(struct xen_pcibk_device *pdev)
 {
 	if (xen_pcibk_backend && xen_pcibk_backend->free)
 		return xen_pcibk_backend->free(pdev);
-};
+}
+
 /* Handles events from front-end */
 irqreturn_t xen_pcibk_handle_event(int irq, void *dev_id);
 void xen_pcibk_do_op(struct work_struct *data);
diff --git a/drivers/xen/xen-pciback/pciback_ops.c b/drivers/xen/xen-pciback/pciback_ops.c
index 8c95c34..63616d7 100644
--- a/drivers/xen/xen-pciback/pciback_ops.c
+++ b/drivers/xen/xen-pciback/pciback_ops.c
@@ -10,7 +10,6 @@
 #include <linux/sched.h>
 #include "pciback.h"
 
-#define DRV_NAME	"xen-pciback"
 int verbose_request;
 module_param(verbose_request, int, 0644);
 
diff --git a/drivers/xen/xen-pciback/vpci.c b/drivers/xen/xen-pciback/vpci.c
index 4a42cfb..46d140b 100644
--- a/drivers/xen/xen-pciback/vpci.c
+++ b/drivers/xen/xen-pciback/vpci.c
@@ -8,16 +8,15 @@
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <linux/pci.h>
-#include <linux/spinlock.h>
+#include <linux/mutex.h>
 #include "pciback.h"
 
 #define PCI_SLOT_MAX 32
-#define DRV_NAME	"xen-pciback"
 
 struct vpci_dev_data {
 	/* Access to dev_list must be protected by lock */
 	struct list_head dev_list[PCI_SLOT_MAX];
-	spinlock_t lock;
+	struct mutex lock;
 };
 
 static inline struct list_head *list_first(struct list_head *head)
@@ -33,13 +32,12 @@
 	struct pci_dev_entry *entry;
 	struct pci_dev *dev = NULL;
 	struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
-	unsigned long flags;
 
 	if (domain != 0 || bus != 0)
 		return NULL;
 
 	if (PCI_SLOT(devfn) < PCI_SLOT_MAX) {
-		spin_lock_irqsave(&vpci_dev->lock, flags);
+		mutex_lock(&vpci_dev->lock);
 
 		list_for_each_entry(entry,
 				    &vpci_dev->dev_list[PCI_SLOT(devfn)],
@@ -50,7 +48,7 @@
 			}
 		}
 
-		spin_unlock_irqrestore(&vpci_dev->lock, flags);
+		mutex_unlock(&vpci_dev->lock);
 	}
 	return dev;
 }
@@ -71,7 +69,6 @@
 	int err = 0, slot, func = -1;
 	struct pci_dev_entry *t, *dev_entry;
 	struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
-	unsigned long flags;
 
 	if ((dev->class >> 24) == PCI_BASE_CLASS_BRIDGE) {
 		err = -EFAULT;
@@ -90,7 +87,7 @@
 
 	dev_entry->dev = dev;
 
-	spin_lock_irqsave(&vpci_dev->lock, flags);
+	mutex_lock(&vpci_dev->lock);
 
 	/* Keep multi-function devices together on the virtual PCI bus */
 	for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
@@ -129,7 +126,7 @@
 			 "No more space on root virtual PCI bus");
 
 unlock:
-	spin_unlock_irqrestore(&vpci_dev->lock, flags);
+	mutex_unlock(&vpci_dev->lock);
 
 	/* Publish this device. */
 	if (!err)
@@ -145,14 +142,13 @@
 	int slot;
 	struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
 	struct pci_dev *found_dev = NULL;
-	unsigned long flags;
 
-	spin_lock_irqsave(&vpci_dev->lock, flags);
+	mutex_lock(&vpci_dev->lock);
 
 	for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
-		struct pci_dev_entry *e, *tmp;
-		list_for_each_entry_safe(e, tmp, &vpci_dev->dev_list[slot],
-					 list) {
+		struct pci_dev_entry *e;
+
+		list_for_each_entry(e, &vpci_dev->dev_list[slot], list) {
 			if (e->dev == dev) {
 				list_del(&e->list);
 				found_dev = e->dev;
@@ -163,7 +159,7 @@
 	}
 
 out:
-	spin_unlock_irqrestore(&vpci_dev->lock, flags);
+	mutex_unlock(&vpci_dev->lock);
 
 	if (found_dev)
 		pcistub_put_pci_dev(found_dev);
@@ -178,7 +174,7 @@
 	if (!vpci_dev)
 		return -ENOMEM;
 
-	spin_lock_init(&vpci_dev->lock);
+	mutex_init(&vpci_dev->lock);
 
 	for (slot = 0; slot < PCI_SLOT_MAX; slot++)
 		INIT_LIST_HEAD(&vpci_dev->dev_list[slot]);
@@ -222,10 +218,9 @@
 	struct pci_dev_entry *entry;
 	struct pci_dev *dev = NULL;
 	struct vpci_dev_data *vpci_dev = pdev->pci_dev_data;
-	unsigned long flags;
 	int found = 0, slot;
 
-	spin_lock_irqsave(&vpci_dev->lock, flags);
+	mutex_lock(&vpci_dev->lock);
 	for (slot = 0; slot < PCI_SLOT_MAX; slot++) {
 		list_for_each_entry(entry,
 			    &vpci_dev->dev_list[slot],
@@ -243,11 +238,11 @@
 			}
 		}
 	}
-	spin_unlock_irqrestore(&vpci_dev->lock, flags);
+	mutex_unlock(&vpci_dev->lock);
 	return found;
 }
 
-struct xen_pcibk_backend xen_pcibk_vpci_backend = {
+const struct xen_pcibk_backend xen_pcibk_vpci_backend = {
 	.name		= "vpci",
 	.init		= __xen_pcibk_init_devices,
 	.free		= __xen_pcibk_release_devices,
diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c
index 978d2c6..474d52e 100644
--- a/drivers/xen/xen-pciback/xenbus.c
+++ b/drivers/xen/xen-pciback/xenbus.c
@@ -13,7 +13,6 @@
 #include <asm/xen/pci.h>
 #include "pciback.h"
 
-#define	DRV_NAME	"xen-pciback"
 #define INVALID_EVTCHN_IRQ  (-1)
 struct workqueue_struct *xen_pcibk_wq;
 
@@ -44,7 +43,7 @@
 	pdev->xdev = xdev;
 	dev_set_drvdata(&xdev->dev, pdev);
 
-	spin_lock_init(&pdev->dev_lock);
+	mutex_init(&pdev->dev_lock);
 
 	pdev->sh_info = NULL;
 	pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
@@ -62,14 +61,12 @@
 
 static void xen_pcibk_disconnect(struct xen_pcibk_device *pdev)
 {
-	spin_lock(&pdev->dev_lock);
-
+	mutex_lock(&pdev->dev_lock);
 	/* Ensure the guest can't trigger our handler before removing devices */
 	if (pdev->evtchn_irq != INVALID_EVTCHN_IRQ) {
 		unbind_from_irqhandler(pdev->evtchn_irq, pdev);
 		pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
 	}
-	spin_unlock(&pdev->dev_lock);
 
 	/* If the driver domain started an op, make sure we complete it
 	 * before releasing the shared memory */
@@ -77,13 +74,11 @@
 	/* Note, the workqueue does not use spinlocks at all.*/
 	flush_workqueue(xen_pcibk_wq);
 
-	spin_lock(&pdev->dev_lock);
 	if (pdev->sh_info != NULL) {
 		xenbus_unmap_ring_vfree(pdev->xdev, pdev->sh_info);
 		pdev->sh_info = NULL;
 	}
-	spin_unlock(&pdev->dev_lock);
-
+	mutex_unlock(&pdev->dev_lock);
 }
 
 static void free_pdev(struct xen_pcibk_device *pdev)
@@ -120,9 +115,7 @@
 		goto out;
 	}
 
-	spin_lock(&pdev->dev_lock);
 	pdev->sh_info = vaddr;
-	spin_unlock(&pdev->dev_lock);
 
 	err = bind_interdomain_evtchn_to_irqhandler(
 		pdev->xdev->otherend_id, remote_evtchn, xen_pcibk_handle_event,
@@ -132,10 +125,7 @@
 				 "Error binding event channel to IRQ");
 		goto out;
 	}
-
-	spin_lock(&pdev->dev_lock);
 	pdev->evtchn_irq = err;
-	spin_unlock(&pdev->dev_lock);
 	err = 0;
 
 	dev_dbg(&pdev->xdev->dev, "Attached!\n");
@@ -150,6 +140,7 @@
 	char *magic = NULL;
 
 
+	mutex_lock(&pdev->dev_lock);
 	/* Make sure we only do this setup once */
 	if (xenbus_read_driver_state(pdev->xdev->nodename) !=
 	    XenbusStateInitialised)
@@ -176,7 +167,7 @@
 	if (magic == NULL || strcmp(magic, XEN_PCI_MAGIC) != 0) {
 		xenbus_dev_fatal(pdev->xdev, -EFAULT,
 				 "version mismatch (%s/%s) with pcifront - "
-				 "halting xen_pcibk",
+				 "halting " DRV_NAME,
 				 magic, XEN_PCI_MAGIC);
 		goto out;
 	}
@@ -194,6 +185,7 @@
 
 	dev_dbg(&pdev->xdev->dev, "Connected? %d\n", err);
 out:
+	mutex_unlock(&pdev->dev_lock);
 
 	kfree(magic);
 
@@ -369,6 +361,7 @@
 
 	dev_dbg(&pdev->xdev->dev, "Reconfiguring device ...\n");
 
+	mutex_lock(&pdev->dev_lock);
 	/* Make sure we only reconfigure once */
 	if (xenbus_read_driver_state(pdev->xdev->nodename) !=
 	    XenbusStateReconfiguring)
@@ -506,6 +499,7 @@
 	}
 
 out:
+	mutex_unlock(&pdev->dev_lock);
 	return 0;
 }
 
@@ -562,6 +556,7 @@
 	char dev_str[64];
 	char state_str[64];
 
+	mutex_lock(&pdev->dev_lock);
 	/* It's possible we could get the call to setup twice, so make sure
 	 * we're not already connected.
 	 */
@@ -642,10 +637,10 @@
 				 "Error switching to initialised state!");
 
 out:
+	mutex_unlock(&pdev->dev_lock);
 	if (!err)
 		/* see if pcifront is already configured (if not, we'll wait) */
 		xen_pcibk_attach(pdev);
-
 	return err;
 }
 
@@ -724,7 +719,7 @@
 	.otherend_changed	= xen_pcibk_frontend_changed,
 };
 
-struct xen_pcibk_backend *xen_pcibk_backend;
+const struct xen_pcibk_backend *__read_mostly xen_pcibk_backend;
 
 int __init xen_pcibk_xenbus_register(void)
 {
diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c
index 6ea852e..d93c708 100644
--- a/drivers/xen/xen-selfballoon.c
+++ b/drivers/xen/xen-selfballoon.c
@@ -68,6 +68,8 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/bootmem.h>
+#include <linux/swap.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
 #include <linux/module.h>
@@ -93,6 +95,15 @@
 /* In HZ, controls frequency of worker invocation. */
 static unsigned int selfballoon_interval __read_mostly = 5;
 
+/*
+ * Minimum usable RAM in MB for selfballooning target for balloon.
+ * If non-zero, it is added to totalreserve_pages and self-ballooning
+ * will not balloon below the sum.  If zero, a piecewise linear function
+ * is calculated as a minimum and added to totalreserve_pages.  Note that
+ * setting this value indiscriminately may cause OOMs and crashes.
+ */
+static unsigned int selfballoon_min_usable_mb;
+
 static void selfballoon_process(struct work_struct *work);
 static DECLARE_DELAYED_WORK(selfballoon_worker, selfballoon_process);
 
@@ -189,20 +200,23 @@
 __setup("selfballooning", xen_selfballooning_setup);
 #endif /* CONFIG_FRONTSWAP */
 
+#define MB2PAGES(mb)	((mb) << (20 - PAGE_SHIFT))
+
 /*
  * Use current balloon size, the goal (vm_committed_as), and hysteresis
  * parameters to set a new target balloon size
  */
 static void selfballoon_process(struct work_struct *work)
 {
-	unsigned long cur_pages, goal_pages, tgt_pages;
+	unsigned long cur_pages, goal_pages, tgt_pages, floor_pages;
+	unsigned long useful_pages;
 	bool reset_timer = false;
 
 	if (xen_selfballooning_enabled) {
-		cur_pages = balloon_stats.current_pages;
+		cur_pages = totalram_pages;
 		tgt_pages = cur_pages; /* default is no change */
 		goal_pages = percpu_counter_read_positive(&vm_committed_as) +
-			balloon_stats.current_pages - totalram_pages;
+				totalreserve_pages;
 #ifdef CONFIG_FRONTSWAP
 		/* allow space for frontswap pages to be repatriated */
 		if (frontswap_selfshrinking && frontswap_enabled)
@@ -217,7 +231,26 @@
 				((goal_pages - cur_pages) /
 				  selfballoon_uphysteresis);
 		/* else if cur_pages == goal_pages, no change */
-		balloon_set_new_target(tgt_pages);
+		useful_pages = max_pfn - totalreserve_pages;
+		if (selfballoon_min_usable_mb != 0)
+			floor_pages = totalreserve_pages +
+					MB2PAGES(selfballoon_min_usable_mb);
+		/* piecewise linear function ending in ~3% slope */
+		else if (useful_pages < MB2PAGES(16))
+			floor_pages = max_pfn; /* not worth ballooning */
+		else if (useful_pages < MB2PAGES(64))
+			floor_pages = totalreserve_pages + MB2PAGES(16) +
+					((useful_pages - MB2PAGES(16)) >> 1);
+		else if (useful_pages < MB2PAGES(512))
+			floor_pages = totalreserve_pages + MB2PAGES(40) +
+					((useful_pages - MB2PAGES(40)) >> 3);
+		else /* useful_pages >= MB2PAGES(512) */
+			floor_pages = totalreserve_pages + MB2PAGES(99) +
+					((useful_pages - MB2PAGES(99)) >> 5);
+		if (tgt_pages < floor_pages)
+			tgt_pages = floor_pages;
+		balloon_set_new_target(tgt_pages +
+			balloon_stats.current_pages - totalram_pages);
 		reset_timer = true;
 	}
 #ifdef CONFIG_FRONTSWAP
@@ -340,6 +373,31 @@
 static SYSDEV_ATTR(selfballoon_uphysteresis, S_IRUGO | S_IWUSR,
 		   show_selfballoon_uphys, store_selfballoon_uphys);
 
+SELFBALLOON_SHOW(selfballoon_min_usable_mb, "%d\n",
+				selfballoon_min_usable_mb);
+
+static ssize_t store_selfballoon_min_usable_mb(struct sys_device *dev,
+					       struct sysdev_attribute *attr,
+					       const char *buf,
+					       size_t count)
+{
+	unsigned long val;
+	int err;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+	err = strict_strtoul(buf, 10, &val);
+	if (err || val == 0)
+		return -EINVAL;
+	selfballoon_min_usable_mb = val;
+	return count;
+}
+
+static SYSDEV_ATTR(selfballoon_min_usable_mb, S_IRUGO | S_IWUSR,
+		   show_selfballoon_min_usable_mb,
+		   store_selfballoon_min_usable_mb);
+
+
 #ifdef CONFIG_FRONTSWAP
 SELFBALLOON_SHOW(frontswap_selfshrinking, "%d\n", frontswap_selfshrinking);
 
@@ -421,6 +479,7 @@
 	&attr_selfballoon_interval.attr,
 	&attr_selfballoon_downhysteresis.attr,
 	&attr_selfballoon_uphysteresis.attr,
+	&attr_selfballoon_min_usable_mb.attr,
 #ifdef CONFIG_FRONTSWAP
 	&attr_frontswap_selfshrinking.attr,
 	&attr_frontswap_hysteresis.attr,
diff --git a/drivers/xen/xenbus/xenbus_comms.c b/drivers/xen/xenbus/xenbus_comms.c
index 090c61e..2eff7a6 100644
--- a/drivers/xen/xenbus/xenbus_comms.c
+++ b/drivers/xen/xenbus/xenbus_comms.c
@@ -212,7 +212,9 @@
 		printk(KERN_WARNING "XENBUS response ring is not quiescent "
 		       "(%08x:%08x): fixing up\n",
 		       intf->rsp_cons, intf->rsp_prod);
-		intf->rsp_cons = intf->rsp_prod;
+		/* breaks kdump */
+		if (!reset_devices)
+			intf->rsp_cons = intf->rsp_prod;
 	}
 
 	if (xenbus_irq) {
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index bd2f90c..cef9b0b 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -684,64 +684,74 @@
 
 device_initcall(xenbus_probe_initcall);
 
-static int __init xenbus_init(void)
+/* Set up event channel for xenstored which is run as a local process
+ * (this is normally used only in dom0)
+ */
+static int __init xenstored_local_init(void)
 {
 	int err = 0;
 	unsigned long page = 0;
+	struct evtchn_alloc_unbound alloc_unbound;
 
-	DPRINTK("");
+	/* Allocate Xenstore page */
+	page = get_zeroed_page(GFP_KERNEL);
+	if (!page)
+		goto out_err;
 
-	err = -ENODEV;
+	xen_store_mfn = xen_start_info->store_mfn =
+		pfn_to_mfn(virt_to_phys((void *)page) >>
+			   PAGE_SHIFT);
+
+	/* Next allocate a local port which xenstored can bind to */
+	alloc_unbound.dom        = DOMID_SELF;
+	alloc_unbound.remote_dom = DOMID_SELF;
+
+	err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
+					  &alloc_unbound);
+	if (err == -ENOSYS)
+		goto out_err;
+
+	BUG_ON(err);
+	xen_store_evtchn = xen_start_info->store_evtchn =
+		alloc_unbound.port;
+
+	return 0;
+
+ out_err:
+	if (page != 0)
+		free_page(page);
+	return err;
+}
+
+static int __init xenbus_init(void)
+{
+	int err = 0;
+
 	if (!xen_domain())
-		return err;
+		return -ENODEV;
 
-	/*
-	 * Domain0 doesn't have a store_evtchn or store_mfn yet.
-	 */
-	if (xen_initial_domain()) {
-		struct evtchn_alloc_unbound alloc_unbound;
-
-		/* Allocate Xenstore page */
-		page = get_zeroed_page(GFP_KERNEL);
-		if (!page)
+	if (xen_hvm_domain()) {
+		uint64_t v = 0;
+		err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v);
+		if (err)
 			goto out_error;
-
-		xen_store_mfn = xen_start_info->store_mfn =
-			pfn_to_mfn(virt_to_phys((void *)page) >>
-				   PAGE_SHIFT);
-
-		/* Next allocate a local port which xenstored can bind to */
-		alloc_unbound.dom        = DOMID_SELF;
-		alloc_unbound.remote_dom = 0;
-
-		err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
-						  &alloc_unbound);
-		if (err == -ENOSYS)
+		xen_store_evtchn = (int)v;
+		err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v);
+		if (err)
 			goto out_error;
-
-		BUG_ON(err);
-		xen_store_evtchn = xen_start_info->store_evtchn =
-			alloc_unbound.port;
-
-		xen_store_interface = mfn_to_virt(xen_store_mfn);
+		xen_store_mfn = (unsigned long)v;
+		xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE);
 	} else {
-		if (xen_hvm_domain()) {
-			uint64_t v = 0;
-			err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v);
-			if (err)
-				goto out_error;
-			xen_store_evtchn = (int)v;
-			err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v);
-			if (err)
-				goto out_error;
-			xen_store_mfn = (unsigned long)v;
-			xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE);
-		} else {
-			xen_store_evtchn = xen_start_info->store_evtchn;
-			xen_store_mfn = xen_start_info->store_mfn;
-			xen_store_interface = mfn_to_virt(xen_store_mfn);
+		xen_store_evtchn = xen_start_info->store_evtchn;
+		xen_store_mfn = xen_start_info->store_mfn;
+		if (xen_store_evtchn)
 			xenstored_ready = 1;
+		else {
+			err = xenstored_local_init();
+			if (err)
+				goto out_error;
 		}
+		xen_store_interface = mfn_to_virt(xen_store_mfn);
 	}
 
 	/* Initialize the interface to xenstore. */
@@ -760,12 +770,7 @@
 	proc_mkdir("xen", NULL);
 #endif
 
-	return 0;
-
-  out_error:
-	if (page != 0)
-		free_page(page);
-
+ out_error:
 	return err;
 }
 
diff --git a/drivers/xen/xenbus/xenbus_probe_backend.c b/drivers/xen/xenbus/xenbus_probe_backend.c
index 60adf91..32417b5 100644
--- a/drivers/xen/xenbus/xenbus_probe_backend.c
+++ b/drivers/xen/xenbus/xenbus_probe_backend.c
@@ -104,8 +104,6 @@
 
 	xdev = to_xenbus_device(dev);
 	bus = container_of(xdev->dev.bus, struct xen_bus_type, bus);
-	if (xdev == NULL)
-		return -ENODEV;
 
 	if (add_uevent_var(env, "MODALIAS=xen-backend:%s", xdev->devicetype))
 		return -ENOMEM;
diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c
index ed2ba47..540587e 100644
--- a/drivers/xen/xenbus/xenbus_probe_frontend.c
+++ b/drivers/xen/xenbus/xenbus_probe_frontend.c
@@ -248,10 +248,131 @@
 }
 EXPORT_SYMBOL_GPL(__xenbus_register_frontend);
 
+static DECLARE_WAIT_QUEUE_HEAD(backend_state_wq);
+static int backend_state;
+
+static void xenbus_reset_backend_state_changed(struct xenbus_watch *w,
+					const char **v, unsigned int l)
+{
+	xenbus_scanf(XBT_NIL, v[XS_WATCH_PATH], "", "%i", &backend_state);
+	printk(KERN_DEBUG "XENBUS: backend %s %s\n",
+			v[XS_WATCH_PATH], xenbus_strstate(backend_state));
+	wake_up(&backend_state_wq);
+}
+
+static void xenbus_reset_wait_for_backend(char *be, int expected)
+{
+	long timeout;
+	timeout = wait_event_interruptible_timeout(backend_state_wq,
+			backend_state == expected, 5 * HZ);
+	if (timeout <= 0)
+		printk(KERN_INFO "XENBUS: backend %s timed out.\n", be);
+}
+
+/*
+ * Reset frontend if it is in Connected or Closed state.
+ * Wait for backend to catch up.
+ * State Connected happens during kdump, Closed after kexec.
+ */
+static void xenbus_reset_frontend(char *fe, char *be, int be_state)
+{
+	struct xenbus_watch be_watch;
+
+	printk(KERN_DEBUG "XENBUS: backend %s %s\n",
+			be, xenbus_strstate(be_state));
+
+	memset(&be_watch, 0, sizeof(be_watch));
+	be_watch.node = kasprintf(GFP_NOIO | __GFP_HIGH, "%s/state", be);
+	if (!be_watch.node)
+		return;
+
+	be_watch.callback = xenbus_reset_backend_state_changed;
+	backend_state = XenbusStateUnknown;
+
+	printk(KERN_INFO "XENBUS: triggering reconnect on %s\n", be);
+	register_xenbus_watch(&be_watch);
+
+	/* fall through to forward backend to state XenbusStateInitialising */
+	switch (be_state) {
+	case XenbusStateConnected:
+		xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateClosing);
+		xenbus_reset_wait_for_backend(be, XenbusStateClosing);
+
+	case XenbusStateClosing:
+		xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateClosed);
+		xenbus_reset_wait_for_backend(be, XenbusStateClosed);
+
+	case XenbusStateClosed:
+		xenbus_printf(XBT_NIL, fe, "state", "%d", XenbusStateInitialising);
+		xenbus_reset_wait_for_backend(be, XenbusStateInitWait);
+	}
+
+	unregister_xenbus_watch(&be_watch);
+	printk(KERN_INFO "XENBUS: reconnect done on %s\n", be);
+	kfree(be_watch.node);
+}
+
+static void xenbus_check_frontend(char *class, char *dev)
+{
+	int be_state, fe_state, err;
+	char *backend, *frontend;
+
+	frontend = kasprintf(GFP_NOIO | __GFP_HIGH, "device/%s/%s", class, dev);
+	if (!frontend)
+		return;
+
+	err = xenbus_scanf(XBT_NIL, frontend, "state", "%i", &fe_state);
+	if (err != 1)
+		goto out;
+
+	switch (fe_state) {
+	case XenbusStateConnected:
+	case XenbusStateClosed:
+		printk(KERN_DEBUG "XENBUS: frontend %s %s\n",
+				frontend, xenbus_strstate(fe_state));
+		backend = xenbus_read(XBT_NIL, frontend, "backend", NULL);
+		if (!backend || IS_ERR(backend))
+			goto out;
+		err = xenbus_scanf(XBT_NIL, backend, "state", "%i", &be_state);
+		if (err == 1)
+			xenbus_reset_frontend(frontend, backend, be_state);
+		kfree(backend);
+		break;
+	default:
+		break;
+	}
+out:
+	kfree(frontend);
+}
+
+static void xenbus_reset_state(void)
+{
+	char **devclass, **dev;
+	int devclass_n, dev_n;
+	int i, j;
+
+	devclass = xenbus_directory(XBT_NIL, "device", "", &devclass_n);
+	if (IS_ERR(devclass))
+		return;
+
+	for (i = 0; i < devclass_n; i++) {
+		dev = xenbus_directory(XBT_NIL, "device", devclass[i], &dev_n);
+		if (IS_ERR(dev))
+			continue;
+		for (j = 0; j < dev_n; j++)
+			xenbus_check_frontend(devclass[i], dev[j]);
+		kfree(dev);
+	}
+	kfree(devclass);
+}
+
 static int frontend_probe_and_watch(struct notifier_block *notifier,
 				   unsigned long event,
 				   void *data)
 {
+	/* reset devices in Connected or Closed state */
+	if (xen_hvm_domain())
+		xenbus_reset_state();
 	/* Enumerate devices in xenstore and watch for changes. */
 	xenbus_probe_devices(&xenbus_frontend);
 	register_xenbus_watch(&fe_watch);
diff --git a/drivers/xen/xenbus/xenbus_xs.c b/drivers/xen/xenbus/xenbus_xs.c
index 5534690..b3b8f2f 100644
--- a/drivers/xen/xenbus/xenbus_xs.c
+++ b/drivers/xen/xenbus/xenbus_xs.c
@@ -45,6 +45,7 @@
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <xen/xenbus.h>
+#include <xen/xen.h>
 #include "xenbus_comms.h"
 
 struct xs_stored_msg {
@@ -620,6 +621,15 @@
 	return NULL;
 }
 
+static void xs_reset_watches(void)
+{
+	int err;
+
+	err = xs_error(xs_single(XBT_NIL, XS_RESET_WATCHES, "", NULL));
+	if (err && err != -EEXIST)
+		printk(KERN_WARNING "xs_reset_watches failed: %d\n", err);
+}
+
 /* Register callback to watch this node. */
 int register_xenbus_watch(struct xenbus_watch *watch)
 {
@@ -638,8 +648,7 @@
 
 	err = xs_watch(watch->node, token);
 
-	/* Ignore errors due to multiple registration. */
-	if ((err != 0) && (err != -EEXIST)) {
+	if (err) {
 		spin_lock(&watches_lock);
 		list_del(&watch->list);
 		spin_unlock(&watches_lock);
@@ -897,5 +906,9 @@
 	if (IS_ERR(task))
 		return PTR_ERR(task);
 
+	/* shutdown watches for kexec boot */
+	if (xen_hvm_domain())
+		xs_reset_watches();
+
 	return 0;
 }
diff --git a/include/xen/balloon.h b/include/xen/balloon.h
index 76f7538..d29c153 100644
--- a/include/xen/balloon.h
+++ b/include/xen/balloon.h
@@ -25,8 +25,9 @@
 
 void balloon_set_new_target(unsigned long target);
 
-int alloc_xenballooned_pages(int nr_pages, struct page** pages);
-void free_xenballooned_pages(int nr_pages, struct page** pages);
+int alloc_xenballooned_pages(int nr_pages, struct page **pages,
+		bool highmem);
+void free_xenballooned_pages(int nr_pages, struct page **pages);
 
 struct sys_device;
 #ifdef CONFIG_XEN_SELFBALLOONING
diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h
index b1fab6b..6b99bfb 100644
--- a/include/xen/grant_table.h
+++ b/include/xen/grant_table.h
@@ -156,6 +156,7 @@
 #define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr))
 
 int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
+			struct gnttab_map_grant_ref *kmap_ops,
 		    struct page **pages, unsigned int count);
 int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
 		      struct page **pages, unsigned int count);
diff --git a/include/xen/interface/io/xs_wire.h b/include/xen/interface/io/xs_wire.h
index 99fcffb..f0b6890 100644
--- a/include/xen/interface/io/xs_wire.h
+++ b/include/xen/interface/io/xs_wire.h
@@ -26,7 +26,11 @@
     XS_SET_PERMS,
     XS_WATCH_EVENT,
     XS_ERROR,
-    XS_IS_DOMAIN_INTRODUCED
+    XS_IS_DOMAIN_INTRODUCED,
+    XS_RESUME,
+    XS_SET_TARGET,
+    XS_RESTRICT,
+    XS_RESET_WATCHES
 };
 
 #define XS_WRITE_NONE "NONE"
diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h
index 534cac8..c1080d9 100644
--- a/include/xen/interface/physdev.h
+++ b/include/xen/interface/physdev.h
@@ -109,6 +109,7 @@
 #define MAP_PIRQ_TYPE_MSI		0x0
 #define MAP_PIRQ_TYPE_GSI		0x1
 #define MAP_PIRQ_TYPE_UNKNOWN		0x2
+#define MAP_PIRQ_TYPE_MSI_SEG		0x3
 
 #define PHYSDEVOP_map_pirq		13
 struct physdev_map_pirq {
@@ -119,7 +120,7 @@
     int index;
     /* IN or OUT */
     int pirq;
-    /* IN */
+    /* IN - high 16 bits hold segment for MAP_PIRQ_TYPE_MSI_SEG */
     int bus;
     /* IN */
     int devfn;
@@ -198,6 +199,37 @@
     uint32_t pirq;
 };
 
+#define XEN_PCI_DEV_EXTFN              0x1
+#define XEN_PCI_DEV_VIRTFN             0x2
+#define XEN_PCI_DEV_PXM                0x4
+
+#define PHYSDEVOP_pci_device_add        25
+struct physdev_pci_device_add {
+    /* IN */
+    uint16_t seg;
+    uint8_t bus;
+    uint8_t devfn;
+    uint32_t flags;
+    struct {
+        uint8_t bus;
+        uint8_t devfn;
+    } physfn;
+#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+    uint32_t optarr[];
+#elif defined(__GNUC__)
+    uint32_t optarr[0];
+#endif
+};
+
+#define PHYSDEVOP_pci_device_remove     26
+#define PHYSDEVOP_restore_msi_ext       27
+struct physdev_pci_device {
+    /* IN */
+    uint16_t seg;
+    uint8_t bus;
+    uint8_t devfn;
+};
+
 /*
  * Notify that some PIRQ-bound event channels have been unmasked.
  * ** This command is obsolete since interface version 0x00030202 and is **
diff --git a/include/xen/page.h b/include/xen/page.h
index 0be36b9..12765b6 100644
--- a/include/xen/page.h
+++ b/include/xen/page.h
@@ -3,6 +3,16 @@
 
 #include <asm/xen/page.h>
 
-extern phys_addr_t xen_extra_mem_start, xen_extra_mem_size;
+struct xen_memory_region {
+	phys_addr_t start;
+	phys_addr_t size;
+};
+
+#define XEN_EXTRA_MEM_MAX_REGIONS 128 /* == E820MAX */
+
+extern __initdata
+struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS];
+
+extern unsigned long xen_released_pages;
 
 #endif	/* _XEN_PAGE_H */