Merge branch 'devel-3.7'

Conflicts: config-pvops patches.xen/pvops-0005-xen-acpi-sleep-Enable-ACPI-sleep-via-the-__acpi_os_p.patch patches.xen/pvops-0009-xen-enlighten-Expose-MWAIT-and-MWAIT_LEAF-if-hypervi.patch patches.xen/pvops-blkfront-eject-support.patch patches.xen/pvops-netback-calculate-correctly-the-SKB-slots.patch rel-pvops series-pvops.conf version-pvops
2013-02-23 16:43:45 +01:00 · 2013-02-23 16:43:45 +01:00 · a2acb741b6
commit a2acb741b6
parent 5be073ac26 d0d5261907
22 changed files with 5525 additions and 3041 deletions
--- a/1539
+++ b/1539
--- a/kernel.spec
+++ b/kernel.spec
@ -133,6 +133,7 @@ fi

 make prepare $MAKE_ARGS
 make scripts $MAKE_ARGS
+make scripts_basic $MAKE_ARGS
 krel=$(make -s kernelrelease $MAKE_ARGS)

 if [ "$krel" != "%kernelrelease" ]; then
@ -323,6 +324,7 @@ mkdir -p %buildroot/%vm_install_dir
 /sbin/dracut --nomdadmconf --nolvmconf \
    --kmoddir %buildroot/lib/modules/%kernelrelease \
    --include %_sourcedir/vm-initramfs / \
+    --add "dm" --omit "plymouth" \
    -d "xenblk xen-blkfront cdrom ext4 jbd2 crc16 dm_snapshot" \
    %buildroot/%vm_install_dir/initramfs %kernelrelease

--- a/patches.xen/pvops-0001-x86-acpi-sleep-Provide-registration-for-acpi_suspend.patch
+++ b/patches.xen/pvops-0001-x86-acpi-sleep-Provide-registration-for-acpi_suspend.patch
@ -38,8 +38,8 @@ index 610001d..68cf060 100644
 -extern int acpi_suspend_lowlevel(void);
 +extern int (*acpi_suspend_lowlevel)(void);
 
- extern const unsigned char acpi_wakeup_code[];
- #define acpi_wakeup_address (__pa(TRAMPOLINE_SYM(acpi_wakeup_code)))
+ /* Physical address to resume after wakeup */
+ #define acpi_wakeup_address ((unsigned long)(real_mode_header->wakeup_start))
 diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
 index ce664f3..c3a5b95 100644
 --- a/arch/x86/kernel/acpi/boot.c
@ -82,8 +82,8 @@ index 103b6ab..4d2d0b1 100644
 -int acpi_suspend_lowlevel(void)
 +int x86_acpi_suspend_lowlevel(void)
 {
- 	struct wakeup_header *header;
- 	/* address in low memory of the wakeup routine. */
+ 	struct wakeup_header *header =
+ 		(struct wakeup_header *) __va(real_mode_header->wakeup_header);
 diff --git a/arch/x86/kernel/acpi/sleep.h b/arch/x86/kernel/acpi/sleep.h
 index 416d4be..4d3feb5 100644
 --- a/arch/x86/kernel/acpi/sleep.h
--- a/patches.xen/pvops-0001-xen-Add-privcmd-device-driver.patch
+++ b/patches.xen/pvops-0001-xen-Add-privcmd-device-driver.patch
@ -1,967 +0,0 @@
-From d8414d3c157dc1f83e73c17447ba41fe5afa9d3d Mon Sep 17 00:00:00 2001
-From: Bastian Blank <waldi@debian.org>
-Date: Fri, 16 Dec 2011 11:34:33 -0500
-Subject: xen: Add privcmd device driver
-
-Access to arbitrary hypercalls is currently provided via xenfs. This
-adds a standard character device to handle this. The support in xenfs
-remains for backward compatibility and uses the device driver code.
-
-Signed-off-by: Bastian Blank <waldi@debian.org>
-Acked-by: Ian Campbell <ian.campbell@citrix.com>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
- drivers/xen/Kconfig         |    7 +
- drivers/xen/Makefile        |    2 +
- drivers/xen/privcmd.c       |  437 +++++++++++++++++++++++++++++++++++++++++++
- drivers/xen/privcmd.h       |    3 +
- drivers/xen/xenfs/Makefile  |    2 +-
- drivers/xen/xenfs/privcmd.c |  400 ---------------------------------------
- drivers/xen/xenfs/super.c   |    3 +-
- drivers/xen/xenfs/xenfs.h   |    1 -
- 8 files changed, 452 insertions(+), 403 deletions(-)
- create mode 100644 drivers/xen/privcmd.c
- create mode 100644 drivers/xen/privcmd.h
- delete mode 100644 drivers/xen/xenfs/privcmd.c
-
-diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
-index 8795480..a1ced52 100644
--- a/drivers/xen/Kconfig
-+++ b/drivers/xen/Kconfig
-@@ -86,6 +86,7 @@ config XEN_BACKEND
- 
- config XENFS
- 	tristate "Xen filesystem"
-+	select XEN_PRIVCMD
- 	default y
- 	help
- 	  The xen filesystem provides a way for domains to share
-@@ -171,4 +172,10 @@ config XEN_PCIDEV_BACKEND
- 	  xen-pciback.hide=(03:00.0)(04:00.0)
- 
- 	  If in doubt, say m.
-+
-+config XEN_PRIVCMD
-+	tristate
-+	depends on XEN
-+	default m
-+
- endmenu
-diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
-index 974fffd..aa31337 100644
--- a/drivers/xen/Makefile
-+++ b/drivers/xen/Makefile
-@@ -19,7 +19,9 @@ obj-$(CONFIG_XEN_TMEM)			+= tmem.o
- obj-$(CONFIG_SWIOTLB_XEN)		+= swiotlb-xen.o
- obj-$(CONFIG_XEN_DOM0)			+= pci.o
- obj-$(CONFIG_XEN_PCIDEV_BACKEND)	+= xen-pciback/
-+obj-$(CONFIG_XEN_PRIVCMD)		+= xen-privcmd.o
- 
- xen-evtchn-y				:= evtchn.o
- xen-gntdev-y				:= gntdev.o
- xen-gntalloc-y				:= gntalloc.o
-+xen-privcmd-y				:= privcmd.o
-diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
-new file mode 100644
-index 0000000..4e8d3da
--- /dev/null
-+++ b/drivers/xen/privcmd.c
-@@ -0,0 +1,437 @@
-+/******************************************************************************
-+ * privcmd.c
-+ *
-+ * Interface to privileged domain-0 commands.
-+ *
-+ * Copyright (c) 2002-2004, K A Fraser, B Dragovic
-+ */
-+
-+#include <linux/kernel.h>
-+#include <linux/module.h>
-+#include <linux/sched.h>
-+#include <linux/slab.h>
-+#include <linux/string.h>
-+#include <linux/errno.h>
-+#include <linux/mm.h>
-+#include <linux/mman.h>
-+#include <linux/uaccess.h>
-+#include <linux/swap.h>
-+#include <linux/highmem.h>
-+#include <linux/pagemap.h>
-+#include <linux/seq_file.h>
-+#include <linux/miscdevice.h>
-+
-+#include <asm/pgalloc.h>
-+#include <asm/pgtable.h>
-+#include <asm/tlb.h>
-+#include <asm/xen/hypervisor.h>
-+#include <asm/xen/hypercall.h>
-+
-+#include <xen/xen.h>
-+#include <xen/privcmd.h>
-+#include <xen/interface/xen.h>
-+#include <xen/features.h>
-+#include <xen/page.h>
-+#include <xen/xen-ops.h>
-+
-+#include "privcmd.h"
-+
-+MODULE_LICENSE("GPL");
-+
-+#ifndef HAVE_ARCH_PRIVCMD_MMAP
-+static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma);
-+#endif
-+
-+static long privcmd_ioctl_hypercall(void __user *udata)
-+{
-+	struct privcmd_hypercall hypercall;
-+	long ret;
-+
-+	if (copy_from_user(&hypercall, udata, sizeof(hypercall)))
-+		return -EFAULT;
-+
-+	ret = privcmd_call(hypercall.op,
-+			   hypercall.arg[0], hypercall.arg[1],
-+			   hypercall.arg[2], hypercall.arg[3],
-+			   hypercall.arg[4]);
-+
-+	return ret;
-+}
-+
-+static void free_page_list(struct list_head *pages)
-+{
-+	struct page *p, *n;
-+
-+	list_for_each_entry_safe(p, n, pages, lru)
-+		__free_page(p);
-+
-+	INIT_LIST_HEAD(pages);
-+}
-+
-+/*
-+ * Given an array of items in userspace, return a list of pages
-+ * containing the data.  If copying fails, either because of memory
-+ * allocation failure or a problem reading user memory, return an
-+ * error code; its up to the caller to dispose of any partial list.
-+ */
-+static int gather_array(struct list_head *pagelist,
-+			unsigned nelem, size_t size,
-+			void __user *data)
-+{
-+	unsigned pageidx;
-+	void *pagedata;
-+	int ret;
-+
-+	if (size > PAGE_SIZE)
-+		return 0;
-+
-+	pageidx = PAGE_SIZE;
-+	pagedata = NULL;	/* quiet, gcc */
-+	while (nelem--) {
-+		if (pageidx > PAGE_SIZE-size) {
-+			struct page *page = alloc_page(GFP_KERNEL);
-+
-+			ret = -ENOMEM;
-+			if (page == NULL)
-+				goto fail;
-+
-+			pagedata = page_address(page);
-+
-+			list_add_tail(&page->lru, pagelist);
-+			pageidx = 0;
-+		}
-+
-+		ret = -EFAULT;
-+		if (copy_from_user(pagedata + pageidx, data, size))
-+			goto fail;
-+
-+		data += size;
-+		pageidx += size;
-+	}
-+
-+	ret = 0;
-+
-+fail:
-+	return ret;
-+}
-+
-+/*
-+ * Call function "fn" on each element of the array fragmented
-+ * over a list of pages.
-+ */
-+static int traverse_pages(unsigned nelem, size_t size,
-+			  struct list_head *pos,
-+			  int (*fn)(void *data, void *state),
-+			  void *state)
-+{
-+	void *pagedata;
-+	unsigned pageidx;
-+	int ret = 0;
-+
-+	BUG_ON(size > PAGE_SIZE);
-+
-+	pageidx = PAGE_SIZE;
-+	pagedata = NULL;	/* hush, gcc */
-+
-+	while (nelem--) {
-+		if (pageidx > PAGE_SIZE-size) {
-+			struct page *page;
-+			pos = pos->next;
-+			page = list_entry(pos, struct page, lru);
-+			pagedata = page_address(page);
-+			pageidx = 0;
-+		}
-+
-+		ret = (*fn)(pagedata + pageidx, state);
-+		if (ret)
-+			break;
-+		pageidx += size;
-+	}
-+
-+	return ret;
-+}
-+
-+struct mmap_mfn_state {
-+	unsigned long va;
-+	struct vm_area_struct *vma;
-+	domid_t domain;
-+};
-+
-+static int mmap_mfn_range(void *data, void *state)
-+{
-+	struct privcmd_mmap_entry *msg = data;
-+	struct mmap_mfn_state *st = state;
-+	struct vm_area_struct *vma = st->vma;
-+	int rc;
-+
-+	/* Do not allow range to wrap the address space. */
-+	if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) ||
-+	    ((unsigned long)(msg->npages << PAGE_SHIFT) >= -st->va))
-+		return -EINVAL;
-+
-+	/* Range chunks must be contiguous in va space. */
-+	if ((msg->va != st->va) ||
-+	    ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end))
-+		return -EINVAL;
-+
-+	rc = xen_remap_domain_mfn_range(vma,
-+					msg->va & PAGE_MASK,
-+					msg->mfn, msg->npages,
-+					vma->vm_page_prot,
-+					st->domain);
-+	if (rc < 0)
-+		return rc;
-+
-+	st->va += msg->npages << PAGE_SHIFT;
-+
-+	return 0;
-+}
-+
-+static long privcmd_ioctl_mmap(void __user *udata)
-+{
-+	struct privcmd_mmap mmapcmd;
-+	struct mm_struct *mm = current->mm;
-+	struct vm_area_struct *vma;
-+	int rc;
-+	LIST_HEAD(pagelist);
-+	struct mmap_mfn_state state;
-+
-+	if (!xen_initial_domain())
-+		return -EPERM;
-+
-+	if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
-+		return -EFAULT;
-+
-+	rc = gather_array(&pagelist,
-+			  mmapcmd.num, sizeof(struct privcmd_mmap_entry),
-+			  mmapcmd.entry);
-+
-+	if (rc || list_empty(&pagelist))
-+		goto out;
-+
-+	down_write(&mm->mmap_sem);
-+
-+	{
-+		struct page *page = list_first_entry(&pagelist,
-+						     struct page, lru);
-+		struct privcmd_mmap_entry *msg = page_address(page);
-+
-+		vma = find_vma(mm, msg->va);
-+		rc = -EINVAL;
-+
-+		if (!vma || (msg->va != vma->vm_start) ||
-+		    !privcmd_enforce_singleshot_mapping(vma))
-+			goto out_up;
-+	}
-+
-+	state.va = vma->vm_start;
-+	state.vma = vma;
-+	state.domain = mmapcmd.dom;
-+
-+	rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry),
-+			    &pagelist,
-+			    mmap_mfn_range, &state);
-+
-+
-+out_up:
-+	up_write(&mm->mmap_sem);
-+
-+out:
-+	free_page_list(&pagelist);
-+
-+	return rc;
-+}
-+
-+struct mmap_batch_state {
-+	domid_t domain;
-+	unsigned long va;
-+	struct vm_area_struct *vma;
-+	int err;
-+
-+	xen_pfn_t __user *user;
-+};
-+
-+static int mmap_batch_fn(void *data, void *state)
-+{
-+	xen_pfn_t *mfnp = data;
-+	struct mmap_batch_state *st = state;
-+
-+	if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
-+				       st->vma->vm_page_prot, st->domain) < 0) {
-+		*mfnp |= 0xf0000000U;
-+		st->err++;
-+	}
-+	st->va += PAGE_SIZE;
-+
-+	return 0;
-+}
-+
-+static int mmap_return_errors(void *data, void *state)
-+{
-+	xen_pfn_t *mfnp = data;
-+	struct mmap_batch_state *st = state;
-+
-+	return put_user(*mfnp, st->user++);
-+}
-+
-+static struct vm_operations_struct privcmd_vm_ops;
-+
-+static long privcmd_ioctl_mmap_batch(void __user *udata)
-+{
-+	int ret;
-+	struct privcmd_mmapbatch m;
-+	struct mm_struct *mm = current->mm;
-+	struct vm_area_struct *vma;
-+	unsigned long nr_pages;
-+	LIST_HEAD(pagelist);
-+	struct mmap_batch_state state;
-+
-+	if (!xen_initial_domain())
-+		return -EPERM;
-+
-+	if (copy_from_user(&m, udata, sizeof(m)))
-+		return -EFAULT;
-+
-+	nr_pages = m.num;
-+	if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
-+		return -EINVAL;
-+
-+	ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
-+			   m.arr);
-+
-+	if (ret || list_empty(&pagelist))
-+		goto out;
-+
-+	down_write(&mm->mmap_sem);
-+
-+	vma = find_vma(mm, m.addr);
-+	ret = -EINVAL;
-+	if (!vma ||
-+	    vma->vm_ops != &privcmd_vm_ops ||
-+	    (m.addr != vma->vm_start) ||
-+	    ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) ||
-+	    !privcmd_enforce_singleshot_mapping(vma)) {
-+		up_write(&mm->mmap_sem);
-+		goto out;
-+	}
-+
-+	state.domain = m.dom;
-+	state.vma = vma;
-+	state.va = m.addr;
-+	state.err = 0;
-+
-+	ret = traverse_pages(m.num, sizeof(xen_pfn_t),
-+			     &pagelist, mmap_batch_fn, &state);
-+
-+	up_write(&mm->mmap_sem);
-+
-+	if (state.err > 0) {
-+		state.user = m.arr;
-+		ret = traverse_pages(m.num, sizeof(xen_pfn_t),
-+			       &pagelist,
-+			       mmap_return_errors, &state);
-+	}
-+
-+out:
-+	free_page_list(&pagelist);
-+
-+	return ret;
-+}
-+
-+static long privcmd_ioctl(struct file *file,
-+			  unsigned int cmd, unsigned long data)
-+{
-+	int ret = -ENOSYS;
-+	void __user *udata = (void __user *) data;
-+
-+	switch (cmd) {
-+	case IOCTL_PRIVCMD_HYPERCALL:
-+		ret = privcmd_ioctl_hypercall(udata);
-+		break;
-+
-+	case IOCTL_PRIVCMD_MMAP:
-+		ret = privcmd_ioctl_mmap(udata);
-+		break;
-+
-+	case IOCTL_PRIVCMD_MMAPBATCH:
-+		ret = privcmd_ioctl_mmap_batch(udata);
-+		break;
-+
-+	default:
-+		ret = -EINVAL;
-+		break;
-+	}
-+
-+	return ret;
-+}
-+
-+#ifndef HAVE_ARCH_PRIVCMD_MMAP
-+static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-+{
-+	printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n",
-+	       vma, vma->vm_start, vma->vm_end,
-+	       vmf->pgoff, vmf->virtual_address);
-+
-+	return VM_FAULT_SIGBUS;
-+}
-+
-+static struct vm_operations_struct privcmd_vm_ops = {
-+	.fault = privcmd_fault
-+};
-+
-+static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
-+{
-+	/* Unsupported for auto-translate guests. */
-+	if (xen_feature(XENFEAT_auto_translated_physmap))
-+		return -ENOSYS;
-+
-+	/* DONTCOPY is essential for Xen because copy_page_range doesn't know
-+	 * how to recreate these mappings */
-+	vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP;
-+	vma->vm_ops = &privcmd_vm_ops;
-+	vma->vm_private_data = NULL;
-+
-+	return 0;
-+}
-+
-+static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
-+{
-+	return (xchg(&vma->vm_private_data, (void *)1) == NULL);
-+}
-+#endif
-+
-+const struct file_operations xen_privcmd_fops = {
-+	.owner = THIS_MODULE,
-+	.unlocked_ioctl = privcmd_ioctl,
-+	.mmap = privcmd_mmap,
-+};
-+EXPORT_SYMBOL_GPL(xen_privcmd_fops);
-+
-+static struct miscdevice privcmd_dev = {
-+	.minor = MISC_DYNAMIC_MINOR,
-+	.name = "xen/privcmd",
-+	.fops = &xen_privcmd_fops,
-+};
-+
-+static int __init privcmd_init(void)
-+{
-+	int err;
-+
-+	if (!xen_domain())
-+		return -ENODEV;
-+
-+	err = misc_register(&privcmd_dev);
-+	if (err != 0) {
-+		printk(KERN_ERR "Could not register Xen privcmd device\n");
-+		return err;
-+	}
-+	return 0;
-+}
-+
-+static void __exit privcmd_exit(void)
-+{
-+	misc_deregister(&privcmd_dev);
-+}
-+
-+module_init(privcmd_init);
-+module_exit(privcmd_exit);
-diff --git a/drivers/xen/privcmd.h b/drivers/xen/privcmd.h
-new file mode 100644
-index 0000000..14facae
--- /dev/null
-+++ b/drivers/xen/privcmd.h
-@@ -0,0 +1,3 @@
-+#include <linux/fs.h>
-+
-+extern const struct file_operations xen_privcmd_fops;
-diff --git a/drivers/xen/xenfs/Makefile b/drivers/xen/xenfs/Makefile
-index 4fde944..5d45ff1 100644
--- a/drivers/xen/xenfs/Makefile
-+++ b/drivers/xen/xenfs/Makefile
-@@ -1,4 +1,4 @@
- obj-$(CONFIG_XENFS) += xenfs.o
- 
-xenfs-y			  = super.o xenbus.o privcmd.o
-+xenfs-y			  = super.o xenbus.o
- xenfs-$(CONFIG_XEN_DOM0) += xenstored.o
-diff --git a/drivers/xen/xenfs/privcmd.c b/drivers/xen/xenfs/privcmd.c
-deleted file mode 100644
-index dbd3b16..0000000
--- a/drivers/xen/xenfs/privcmd.c
-+++ /dev/null
-@@ -1,400 +0,0 @@
-/******************************************************************************
- * privcmd.c
- *
- * Interface to privileged domain-0 commands.
- *
- * Copyright (c) 2002-2004, K A Fraser, B Dragovic
- */
-
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/mm.h>
-#include <linux/mman.h>
-#include <linux/uaccess.h>
-#include <linux/swap.h>
-#include <linux/highmem.h>
-#include <linux/pagemap.h>
-#include <linux/seq_file.h>
-
-#include <asm/pgalloc.h>
-#include <asm/pgtable.h>
-#include <asm/tlb.h>
-#include <asm/xen/hypervisor.h>
-#include <asm/xen/hypercall.h>
-
-#include <xen/xen.h>
-#include <xen/privcmd.h>
-#include <xen/interface/xen.h>
-#include <xen/features.h>
-#include <xen/page.h>
-#include <xen/xen-ops.h>
-
-#ifndef HAVE_ARCH_PRIVCMD_MMAP
-static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma);
-#endif
-
-static long privcmd_ioctl_hypercall(void __user *udata)
-{
-	struct privcmd_hypercall hypercall;
-	long ret;
-
-	if (copy_from_user(&hypercall, udata, sizeof(hypercall)))
-		return -EFAULT;
-
-	ret = privcmd_call(hypercall.op,
-			   hypercall.arg[0], hypercall.arg[1],
-			   hypercall.arg[2], hypercall.arg[3],
-			   hypercall.arg[4]);
-
-	return ret;
-}
-
-static void free_page_list(struct list_head *pages)
-{
-	struct page *p, *n;
-
-	list_for_each_entry_safe(p, n, pages, lru)
-		__free_page(p);
-
-	INIT_LIST_HEAD(pages);
-}
-
-/*
- * Given an array of items in userspace, return a list of pages
- * containing the data.  If copying fails, either because of memory
- * allocation failure or a problem reading user memory, return an
- * error code; its up to the caller to dispose of any partial list.
- */
-static int gather_array(struct list_head *pagelist,
-			unsigned nelem, size_t size,
-			void __user *data)
-{
-	unsigned pageidx;
-	void *pagedata;
-	int ret;
-
-	if (size > PAGE_SIZE)
-		return 0;
-
-	pageidx = PAGE_SIZE;
-	pagedata = NULL;	/* quiet, gcc */
-	while (nelem--) {
-		if (pageidx > PAGE_SIZE-size) {
-			struct page *page = alloc_page(GFP_KERNEL);
-
-			ret = -ENOMEM;
-			if (page == NULL)
-				goto fail;
-
-			pagedata = page_address(page);
-
-			list_add_tail(&page->lru, pagelist);
-			pageidx = 0;
-		}
-
-		ret = -EFAULT;
-		if (copy_from_user(pagedata + pageidx, data, size))
-			goto fail;
-
-		data += size;
-		pageidx += size;
-	}
-
-	ret = 0;
-
-fail:
-	return ret;
-}
-
-/*
- * Call function "fn" on each element of the array fragmented
- * over a list of pages.
- */
-static int traverse_pages(unsigned nelem, size_t size,
-			  struct list_head *pos,
-			  int (*fn)(void *data, void *state),
-			  void *state)
-{
-	void *pagedata;
-	unsigned pageidx;
-	int ret = 0;
-
-	BUG_ON(size > PAGE_SIZE);
-
-	pageidx = PAGE_SIZE;
-	pagedata = NULL;	/* hush, gcc */
-
-	while (nelem--) {
-		if (pageidx > PAGE_SIZE-size) {
-			struct page *page;
-			pos = pos->next;
-			page = list_entry(pos, struct page, lru);
-			pagedata = page_address(page);
-			pageidx = 0;
-		}
-
-		ret = (*fn)(pagedata + pageidx, state);
-		if (ret)
-			break;
-		pageidx += size;
-	}
-
-	return ret;
-}
-
-struct mmap_mfn_state {
-	unsigned long va;
-	struct vm_area_struct *vma;
-	domid_t domain;
-};
-
-static int mmap_mfn_range(void *data, void *state)
-{
-	struct privcmd_mmap_entry *msg = data;
-	struct mmap_mfn_state *st = state;
-	struct vm_area_struct *vma = st->vma;
-	int rc;
-
-	/* Do not allow range to wrap the address space. */
-	if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) ||
-	    ((unsigned long)(msg->npages << PAGE_SHIFT) >= -st->va))
-		return -EINVAL;
-
-	/* Range chunks must be contiguous in va space. */
-	if ((msg->va != st->va) ||
-	    ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end))
-		return -EINVAL;
-
-	rc = xen_remap_domain_mfn_range(vma,
-					msg->va & PAGE_MASK,
-					msg->mfn, msg->npages,
-					vma->vm_page_prot,
-					st->domain);
-	if (rc < 0)
-		return rc;
-
-	st->va += msg->npages << PAGE_SHIFT;
-
-	return 0;
-}
-
-static long privcmd_ioctl_mmap(void __user *udata)
-{
-	struct privcmd_mmap mmapcmd;
-	struct mm_struct *mm = current->mm;
-	struct vm_area_struct *vma;
-	int rc;
-	LIST_HEAD(pagelist);
-	struct mmap_mfn_state state;
-
-	if (!xen_initial_domain())
-		return -EPERM;
-
-	if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
-		return -EFAULT;
-
-	rc = gather_array(&pagelist,
-			  mmapcmd.num, sizeof(struct privcmd_mmap_entry),
-			  mmapcmd.entry);
-
-	if (rc || list_empty(&pagelist))
-		goto out;
-
-	down_write(&mm->mmap_sem);
-
-	{
-		struct page *page = list_first_entry(&pagelist,
-						     struct page, lru);
-		struct privcmd_mmap_entry *msg = page_address(page);
-
-		vma = find_vma(mm, msg->va);
-		rc = -EINVAL;
-
-		if (!vma || (msg->va != vma->vm_start) ||
-		    !privcmd_enforce_singleshot_mapping(vma))
-			goto out_up;
-	}
-
-	state.va = vma->vm_start;
-	state.vma = vma;
-	state.domain = mmapcmd.dom;
-
-	rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry),
-			    &pagelist,
-			    mmap_mfn_range, &state);
-
-
-out_up:
-	up_write(&mm->mmap_sem);
-
-out:
-	free_page_list(&pagelist);
-
-	return rc;
-}
-
-struct mmap_batch_state {
-	domid_t domain;
-	unsigned long va;
-	struct vm_area_struct *vma;
-	int err;
-
-	xen_pfn_t __user *user;
-};
-
-static int mmap_batch_fn(void *data, void *state)
-{
-	xen_pfn_t *mfnp = data;
-	struct mmap_batch_state *st = state;
-
-	if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
-				       st->vma->vm_page_prot, st->domain) < 0) {
-		*mfnp |= 0xf0000000U;
-		st->err++;
-	}
-	st->va += PAGE_SIZE;
-
-	return 0;
-}
-
-static int mmap_return_errors(void *data, void *state)
-{
-	xen_pfn_t *mfnp = data;
-	struct mmap_batch_state *st = state;
-
-	return put_user(*mfnp, st->user++);
-}
-
-static struct vm_operations_struct privcmd_vm_ops;
-
-static long privcmd_ioctl_mmap_batch(void __user *udata)
-{
-	int ret;
-	struct privcmd_mmapbatch m;
-	struct mm_struct *mm = current->mm;
-	struct vm_area_struct *vma;
-	unsigned long nr_pages;
-	LIST_HEAD(pagelist);
-	struct mmap_batch_state state;
-
-	if (!xen_initial_domain())
-		return -EPERM;
-
-	if (copy_from_user(&m, udata, sizeof(m)))
-		return -EFAULT;
-
-	nr_pages = m.num;
-	if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
-		return -EINVAL;
-
-	ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
-			   m.arr);
-
-	if (ret || list_empty(&pagelist))
-		goto out;
-
-	down_write(&mm->mmap_sem);
-
-	vma = find_vma(mm, m.addr);
-	ret = -EINVAL;
-	if (!vma ||
-	    vma->vm_ops != &privcmd_vm_ops ||
-	    (m.addr != vma->vm_start) ||
-	    ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) ||
-	    !privcmd_enforce_singleshot_mapping(vma)) {
-		up_write(&mm->mmap_sem);
-		goto out;
-	}
-
-	state.domain = m.dom;
-	state.vma = vma;
-	state.va = m.addr;
-	state.err = 0;
-
-	ret = traverse_pages(m.num, sizeof(xen_pfn_t),
-			     &pagelist, mmap_batch_fn, &state);
-
-	up_write(&mm->mmap_sem);
-
-	if (state.err > 0) {
-		state.user = m.arr;
-		ret = traverse_pages(m.num, sizeof(xen_pfn_t),
-			       &pagelist,
-			       mmap_return_errors, &state);
-	}
-
-out:
-	free_page_list(&pagelist);
-
-	return ret;
-}
-
-static long privcmd_ioctl(struct file *file,
-			  unsigned int cmd, unsigned long data)
-{
-	int ret = -ENOSYS;
-	void __user *udata = (void __user *) data;
-
-	switch (cmd) {
-	case IOCTL_PRIVCMD_HYPERCALL:
-		ret = privcmd_ioctl_hypercall(udata);
-		break;
-
-	case IOCTL_PRIVCMD_MMAP:
-		ret = privcmd_ioctl_mmap(udata);
-		break;
-
-	case IOCTL_PRIVCMD_MMAPBATCH:
-		ret = privcmd_ioctl_mmap_batch(udata);
-		break;
-
-	default:
-		ret = -EINVAL;
-		break;
-	}
-
-	return ret;
-}
-
-#ifndef HAVE_ARCH_PRIVCMD_MMAP
-static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
-	printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n",
-	       vma, vma->vm_start, vma->vm_end,
-	       vmf->pgoff, vmf->virtual_address);
-
-	return VM_FAULT_SIGBUS;
-}
-
-static struct vm_operations_struct privcmd_vm_ops = {
-	.fault = privcmd_fault
-};
-
-static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
-{
-	/* Unsupported for auto-translate guests. */
-	if (xen_feature(XENFEAT_auto_translated_physmap))
-		return -ENOSYS;
-
-	/* DONTCOPY is essential for Xen because copy_page_range doesn't know
-	 * how to recreate these mappings */
-	vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP;
-	vma->vm_ops = &privcmd_vm_ops;
-	vma->vm_private_data = NULL;
-
-	return 0;
-}
-
-static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
-{
-	return (xchg(&vma->vm_private_data, (void *)1) == NULL);
-}
-#endif
-
-const struct file_operations privcmd_file_ops = {
-	.unlocked_ioctl = privcmd_ioctl,
-	.mmap = privcmd_mmap,
-};
-diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c
-index 1aa3897..a55fbf9 100644
--- a/drivers/xen/xenfs/super.c
-+++ b/drivers/xen/xenfs/super.c
-@@ -16,6 +16,7 @@
- #include <xen/xen.h>
- 
- #include "xenfs.h"
-+#include "../privcmd.h"
- 
- #include <asm/xen/hypervisor.h>
- 
-@@ -84,7 +85,7 @@ static int xenfs_fill_super(struct super_block *sb, void *data, int silent)
- 		[1] = {},
- 		{ "xenbus", &xenbus_file_ops, S_IRUSR|S_IWUSR },
- 		{ "capabilities", &capabilities_file_ops, S_IRUGO },
-		{ "privcmd", &privcmd_file_ops, S_IRUSR|S_IWUSR },
-+		{ "privcmd", &xen_privcmd_fops, S_IRUSR|S_IWUSR },
- 		{""},
- 	};
- 	int rc;
-diff --git a/drivers/xen/xenfs/xenfs.h b/drivers/xen/xenfs/xenfs.h
-index b68aa62..5056306 100644
--- a/drivers/xen/xenfs/xenfs.h
-+++ b/drivers/xen/xenfs/xenfs.h
-@@ -2,7 +2,6 @@
- #define _XENFS_XENBUS_H
- 
- extern const struct file_operations xenbus_file_ops;
-extern const struct file_operations privcmd_file_ops;
- extern const struct file_operations xsd_kva_file_ops;
- extern const struct file_operations xsd_port_file_ops;
- 
-- 
-1.7.6.4
-
--- a/patches.xen/pvops-0002-x86-acpi-tboot-Have-a-ACPI-os-prepare-sleep-instead-.patch
+++ b/patches.xen/pvops-0002-x86-acpi-tboot-Have-a-ACPI-os-prepare-sleep-instead-.patch
@ -1,177 +0,0 @@
-From 8fd04efb7e41da12d85ad382b7c7092fe832bebb Mon Sep 17 00:00:00 2001
-From: Tang Liang <liang.tang@oracle.com>
-Date: Fri, 9 Dec 2011 10:05:54 +0800
-Subject: x86, acpi, tboot: Have a ACPI os prepare sleep instead of calling
- tboot_sleep.
-
-The ACPI suspend path makes a call to tboot_sleep right before
-it writes the PM1A, PM1B values. We replace the direct call to
-tboot via an registration callback similar to __acpi_register_gsi.
-
-CC: Thomas Gleixner <tglx@linutronix.de>
-CC: "H. Peter Anvin" <hpa@zytor.com>
-CC: x86@kernel.org
-CC: Len Brown <len.brown@intel.com>
-Acked-by: Joseph Cihula <joseph.cihula@intel.com>
-CC: Shane Wang <shane.wang@intel.com>
-CC: xen-devel@lists.xensource.com
-CC: linux-pm@lists.linux-foundation.org
-CC: tboot-devel@lists.sourceforge.net
-CC: linux-acpi@vger.kernel.org
-[v1: Added __attribute__ ((unused))]
-[v2: Introduced a wrapper instead of changing tboot_sleep return values]
-[v3: Added return value AE_CTRL_SKIP for acpi_os_sleep_prepare]
-Signed-off-by: Tang Liang <liang.tang@oracle.com>
-[v1: Fix compile issues on IA64 and PPC64]
-[v2: Fix where __acpi_os_prepare_sleep==NULL and did not go in sleep properly]
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
- arch/x86/kernel/tboot.c       |    8 ++++++++
- drivers/acpi/acpica/hwsleep.c |   10 +++++++---
- drivers/acpi/osl.c            |   24 ++++++++++++++++++++++++
- include/acpi/acexcep.h        |    1 +
- include/linux/acpi.h          |   10 ++++++++++
- include/linux/tboot.h         |    1 -
- 6 files changed, 50 insertions(+), 4 deletions(-)
-
-diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
-index e2410e2..1a4ab7d 100644
--- a/arch/x86/kernel/tboot.c
-+++ b/arch/x86/kernel/tboot.c
-@@ -297,6 +297,12 @@ void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control)
- 
- 	tboot_shutdown(acpi_shutdown_map[sleep_state]);
- }
-+static int tboot_sleep_wrapper(u8 sleep_state, u32 pm1a_control,
-+			       u32 pm1b_control)
-+{
-+	tboot_sleep(sleep_state, pm1a_control, pm1b_control);
-+	return 0;
-+}
- 
- static atomic_t ap_wfs_count;
- 
-@@ -345,6 +351,8 @@ static __init int tboot_late_init(void)
- 
- 	atomic_set(&ap_wfs_count, 0);
- 	register_hotcpu_notifier(&tboot_cpu_notifier);
-+
-+	acpi_os_set_prepare_sleep(&tboot_sleep_wrapper);
- 	return 0;
- }
- 
-diff --git a/drivers/acpi/acpica/hwsleep.c b/drivers/acpi/acpica/hwsleep.c
-index d52da30..992359a 100644
--- a/drivers/acpi/acpica/hwsleep.c
-+++ b/drivers/acpi/acpica/hwsleep.c
-@@ -43,9 +43,9 @@
-  */
- 
- #include <acpi/acpi.h>
-+#include <linux/acpi.h>
- #include "accommon.h"
- #include "actables.h"
-#include <linux/tboot.h>
- #include <linux/module.h>
- 
- #define _COMPONENT          ACPI_HARDWARE
-@@ -344,8 +344,12 @@ acpi_status asmlinkage acpi_enter_sleep_state(u8 sleep_state)
- 
- 	ACPI_FLUSH_CPU_CACHE();
- 
-	tboot_sleep(sleep_state, pm1a_control, pm1b_control);
-
-+	status = acpi_os_prepare_sleep(sleep_state, pm1a_control,
-+				       pm1b_control);
-+	if (ACPI_SKIP(status))
-+		return_ACPI_STATUS(AE_OK);
-+	if (ACPI_FAILURE(status))
-+		return_ACPI_STATUS(status);
- 	/* Write #2: Write both SLP_TYP + SLP_EN */
- 
- 	status = acpi_hw_write_pm1_control(pm1a_control, pm1b_control);
-diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
-index f31c5c5..f3aae4b 100644
--- a/drivers/acpi/osl.c
-+++ b/drivers/acpi/osl.c
-@@ -76,6 +76,9 @@ EXPORT_SYMBOL(acpi_in_debugger);
- extern char line_buf[80];
- #endif				/*ENABLE_DEBUGGER */
- 
-+static int (*__acpi_os_prepare_sleep)(u8 sleep_state, u32 pm1a_ctrl,
-+				      u32 pm1b_ctrl);
-+
- static acpi_osd_handler acpi_irq_handler;
- static void *acpi_irq_context;
- static struct workqueue_struct *kacpid_wq;
-@@ -1659,3 +1662,24 @@ acpi_status acpi_os_terminate(void)
- 
- 	return AE_OK;
- }
-+
-+acpi_status acpi_os_prepare_sleep(u8 sleep_state, u32 pm1a_control,
-+				  u32 pm1b_control)
-+{
-+	int rc = 0;
-+	if (__acpi_os_prepare_sleep)
-+		rc = __acpi_os_prepare_sleep(sleep_state,
-+					     pm1a_control, pm1b_control);
-+	if (rc < 0)
-+		return AE_ERROR;
-+	else if (rc > 0)
-+		return AE_CTRL_SKIP;
-+
-+	return AE_OK;
-+}
-+
-+void acpi_os_set_prepare_sleep(int (*func)(u8 sleep_state,
-+			       u32 pm1a_ctrl, u32 pm1b_ctrl))
-+{
-+	__acpi_os_prepare_sleep = func;
-+}
-diff --git a/include/acpi/acexcep.h b/include/acpi/acexcep.h
-index 5b6c391..fa0d22c 100644
--- a/include/acpi/acexcep.h
-+++ b/include/acpi/acexcep.h
-@@ -57,6 +57,7 @@
- #define ACPI_SUCCESS(a)                 (!(a))
- #define ACPI_FAILURE(a)                 (a)
- 
-+#define ACPI_SKIP(a)                    (a == AE_CTRL_SKIP)
- #define AE_OK                           (acpi_status) 0x0000
- 
- /*
-diff --git a/include/linux/acpi.h b/include/linux/acpi.h
-index 627a3a4..9393f73 100644
--- a/include/linux/acpi.h
-+++ b/include/linux/acpi.h
-@@ -363,4 +363,14 @@ static inline int suspend_nvs_register(unsigned long a, unsigned long b)
- }
- #endif
- 
-+#ifdef CONFIG_ACPI
-+void acpi_os_set_prepare_sleep(int (*func)(u8 sleep_state,
-+			       u32 pm1a_ctrl,  u32 pm1b_ctrl));
-+
-+acpi_status acpi_os_prepare_sleep(u8 sleep_state,
-+				  u32 pm1a_control, u32 pm1b_control);
-+#else
-+#define acpi_os_set_prepare_sleep(func, pm1a_ctrl, pm1b_ctrl) do { } while (0)
-+#endif
-+
- #endif	/*_LINUX_ACPI_H*/
-diff --git a/include/linux/tboot.h b/include/linux/tboot.h
-index 1dba6ee..c75128b 100644
--- a/include/linux/tboot.h
-+++ b/include/linux/tboot.h
-@@ -143,7 +143,6 @@ static inline int tboot_enabled(void)
- 
- extern void tboot_probe(void);
- extern void tboot_shutdown(u32 shutdown_type);
-extern void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control);
- extern struct acpi_table_header *tboot_get_dmar_table(
- 				      struct acpi_table_header *dmar_tbl);
- extern int tboot_force_iommu(void);
-- 
-1.7.6.4
-
--- a/patches.xen/pvops-0003-tboot-Add-return-values-for-tboot_sleep.patch
+++ b/patches.xen/pvops-0003-tboot-Add-return-values-for-tboot_sleep.patch
@ -1,67 +0,0 @@
-From 6f327383cd7ebef1fcc092e2d759ceb9d90dfb36 Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
-Date: Thu, 8 Dec 2011 17:14:08 +0800
-Subject: tboot: Add return values for tboot_sleep
-
-.. as appropiately. As tboot_sleep now returns values.
-remove tboot_sleep_wrapper.
-
-Suggested-by: "Rafael J. Wysocki" <rjw@sisk.pl>
-Acked-by: Joseph Cihula <joseph.cihula@intel.com>
-[v1: Return -1/0/+1 instead of ACPI_xx values]
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
- arch/x86/kernel/tboot.c |   13 ++++---------
- 1 files changed, 4 insertions(+), 9 deletions(-)
-
-diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
-index 1a4ab7d..6410744 100644
--- a/arch/x86/kernel/tboot.c
-+++ b/arch/x86/kernel/tboot.c
-@@ -272,7 +272,7 @@ static void tboot_copy_fadt(const struct acpi_table_fadt *fadt)
- 		offsetof(struct acpi_table_facs, firmware_waking_vector);
- }
- 
-void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control)
-+static int tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control)
- {
- 	static u32 acpi_shutdown_map[ACPI_S_STATE_COUNT] = {
- 		/* S0,1,2: */ -1, -1, -1,
-@@ -281,7 +281,7 @@ void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control)
- 		/* S5: */ TB_SHUTDOWN_S5 };
- 
- 	if (!tboot_enabled())
-		return;
-+		return 0;
- 
- 	tboot_copy_fadt(&acpi_gbl_FADT);
- 	tboot->acpi_sinfo.pm1a_cnt_val = pm1a_control;
-@@ -292,15 +292,10 @@ void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control)
- 	if (sleep_state >= ACPI_S_STATE_COUNT ||
- 	    acpi_shutdown_map[sleep_state] == -1) {
- 		pr_warning("unsupported sleep state 0x%x\n", sleep_state);
-		return;
-+		return -1;
- 	}
- 
- 	tboot_shutdown(acpi_shutdown_map[sleep_state]);
-}
-static int tboot_sleep_wrapper(u8 sleep_state, u32 pm1a_control,
-			       u32 pm1b_control)
-{
-	tboot_sleep(sleep_state, pm1a_control, pm1b_control);
- 	return 0;
- }
- 
-@@ -352,7 +347,7 @@ static __init int tboot_late_init(void)
- 	atomic_set(&ap_wfs_count, 0);
- 	register_hotcpu_notifier(&tboot_cpu_notifier);
- 
-	acpi_os_set_prepare_sleep(&tboot_sleep_wrapper);
-+	acpi_os_set_prepare_sleep(&tboot_sleep);
- 	return 0;
- }
- 
-- 
-1.7.6.4
-
--- a/patches.xen/pvops-0003-xen-acpi-sleep-Register-to-the-acpi_suspend_lowlevel.patch
+++ b/patches.xen/pvops-0003-xen-acpi-sleep-Register-to-the-acpi_suspend_lowlevel.patch
--- a/patches.xen/pvops-0005-xen-acpi-sleep-Enable-ACPI-sleep-via-the-__acpi_os_p.patch
+++ b/patches.xen/pvops-0005-xen-acpi-sleep-Enable-ACPI-sleep-via-the-__acpi_os_p.patch
@ -1,197 +0,0 @@
-From 9b10575276a220543b8791f2cb8268fbd4a0bc2e Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
-Date: Thu, 8 Dec 2011 17:32:23 +0800
-Subject: xen/acpi/sleep: Enable ACPI sleep via the __acpi_os_prepare_sleep
-
-Provide the registration callback to call in the Xen's
-ACPI sleep functionality. This means that during S3/S5
-we make a hypercall XENPF_enter_acpi_sleep with the
-proper PM1A/PM1B registers.
-
-Based of Ke Yu's <ke.yu@intel.com> initial idea.
-[ From http://xenbits.xensource.com/linux-2.6.18-xen.hg
-change c68699484a65 ]
-
-[v1: Added Copyright and license]
-[v2: Added check if PM1A/B the 16-bits MSB contain something. The spec
-     only uses 16-bits but might have more in future]
-Signed-off-by: Liang Tang <liang.tang@oracle.com>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
- arch/x86/xen/enlighten.c |    3 ++
- drivers/xen/Makefile     |    2 +-
- drivers/xen/acpi.c       |   62 ++++++++++++++++++++++++++++++++++++++++++++++
- include/xen/acpi.h       |   58 +++++++++++++++++++++++++++++++++++++++++++
- 4 files changed, 124 insertions(+), 1 deletions(-)
- create mode 100644 drivers/xen/acpi.c
- create mode 100644 include/xen/acpi.h
-
-diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
-index 12eb07b..a5277c2 100644
--- a/arch/x86/xen/enlighten.c
-+++ b/arch/x86/xen/enlighten.c
-@@ -42,6 +42,7 @@
- #include <xen/page.h>
- #include <xen/hvm.h>
- #include <xen/hvc-console.h>
-+#include <xen/acpi.h>
- 
- #include <asm/paravirt.h>
- #include <asm/apic.h>
-@@ -1275,6 +1276,8 @@ asmlinkage void __init xen_start_kernel(void)
- 
- 		/* Make sure ACS will be enabled */
- 		pci_request_acs();
-+
-+		xen_acpi_sleep_register();
- 	}
- #ifdef CONFIG_PCI
- 	/* PCI BIOS service won't work from a PV guest. */
-diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
-index aa31337..77a845f 100644
--- a/drivers/xen/Makefile
-+++ b/drivers/xen/Makefile
-@@ -17,7 +17,7 @@ obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
- obj-$(CONFIG_XEN_PVHVM)			+= platform-pci.o
- obj-$(CONFIG_XEN_TMEM)			+= tmem.o
- obj-$(CONFIG_SWIOTLB_XEN)		+= swiotlb-xen.o
-obj-$(CONFIG_XEN_DOM0)			+= pci.o
-+obj-$(CONFIG_XEN_DOM0)			+= pci.o acpi.o
- obj-$(CONFIG_XEN_PCIDEV_BACKEND)	+= xen-pciback/
- obj-$(CONFIG_XEN_PRIVCMD)		+= xen-privcmd.o
- 
-diff --git a/drivers/xen/acpi.c b/drivers/xen/acpi.c
-new file mode 100644
-index 0000000..119d42a
--- /dev/null
-+++ b/drivers/xen/acpi.c
-@@ -0,0 +1,62 @@
-+/******************************************************************************
-+ * acpi.c
-+ * acpi file for domain 0 kernel
-+ *
-+ * Copyright (c) 2011 Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
-+ * Copyright (c) 2011 Yu Ke ke.yu@intel.com
-+ *
-+ * This program is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU General Public License version 2
-+ * as published by the Free Software Foundation; or, when distributed
-+ * separately from the Linux kernel or incorporated into other
-+ * software packages, subject to the following license:
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining a copy
-+ * of this source file (the "Software"), to deal in the Software without
-+ * restriction, including without limitation the rights to use, copy, modify,
-+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
-+ * and to permit persons to whom the Software is furnished to do so, subject to
-+ * the following conditions:
-+ *
-+ * The above copyright notice and this permission notice shall be included in
-+ * all copies or substantial portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-+ * IN THE SOFTWARE.
-+ */
-+
-+#include <xen/acpi.h>
-+#include <xen/interface/platform.h>
-+#include <asm/xen/hypercall.h>
-+#include <asm/xen/hypervisor.h>
-+
-+int xen_acpi_notify_hypervisor_state(u8 sleep_state,
-+				     u32 pm1a_cnt, u32 pm1b_cnt)
-+{
-+	struct xen_platform_op op = {
-+		.cmd = XENPF_enter_acpi_sleep,
-+		.interface_version = XENPF_INTERFACE_VERSION,
-+		.u = {
-+			.enter_acpi_sleep = {
-+				.pm1a_cnt_val = (u16)pm1a_cnt,
-+				.pm1b_cnt_val = (u16)pm1b_cnt,
-+				.sleep_state = sleep_state,
-+			},
-+		},
-+	};
-+
-+	if ((pm1a_cnt & 0xffff0000) || (pm1b_cnt & 0xffff0000)) {
-+		WARN(1, "Using more than 16bits of PM1A/B 0x%x/0x%x!"
-+		     "Email xen-devel@lists.xensource.com  Thank you.\n", \
-+		     pm1a_cnt, pm1b_cnt);
-+		return -1;
-+	}
-+
-+	HYPERVISOR_dom0_op(&op);
-+	return 1;
-+}
-diff --git a/include/xen/acpi.h b/include/xen/acpi.h
-new file mode 100644
-index 0000000..48a9c01
--- /dev/null
-+++ b/include/xen/acpi.h
-@@ -0,0 +1,58 @@
-+/******************************************************************************
-+ * acpi.h
-+ * acpi file for domain 0 kernel
-+ *
-+ * Copyright (c) 2011 Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
-+ * Copyright (c) 2011 Yu Ke <ke.yu@intel.com>
-+ *
-+ * This program is free software; you can redistribute it and/or
-+ * modify it under the terms of the GNU General Public License version 2
-+ * as published by the Free Software Foundation; or, when distributed
-+ * separately from the Linux kernel or incorporated into other
-+ * software packages, subject to the following license:
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining a copy
-+ * of this source file (the "Software"), to deal in the Software without
-+ * restriction, including without limitation the rights to use, copy, modify,
-+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
-+ * and to permit persons to whom the Software is furnished to do so, subject to
-+ * the following conditions:
-+ *
-+ * The above copyright notice and this permission notice shall be included in
-+ * all copies or substantial portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-+ * IN THE SOFTWARE.
-+ */
-+
-+#ifndef _XEN_ACPI_H
-+#define _XEN_ACPI_H
-+
-+#include <linux/types.h>
-+
-+#ifdef CONFIG_XEN_DOM0
-+#include <asm/xen/hypervisor.h>
-+#include <xen/xen.h>
-+#include <linux/acpi.h>
-+
-+int xen_acpi_notify_hypervisor_state(u8 sleep_state,
-+				     u32 pm1a_cnt, u32 pm1b_cnd);
-+
-+static inline void xen_acpi_sleep_register(void)
-+{
-+	if (xen_initial_domain())
-+		acpi_os_set_prepare_sleep(
-+			&xen_acpi_notify_hypervisor_state);
-+}
-+#else
-+static inline void xen_acpi_sleep_register(void)
-+{
-+}
-+#endif
-+
-+#endif	/* _XEN_ACPI_H */
-- 
-1.7.6.4
-
--- a/patches.xen/pvops-0007-xen-Utilize-the-restore_msi_irqs-hook.patch
+++ b/patches.xen/pvops-0007-xen-Utilize-the-restore_msi_irqs-hook.patch
@ -1,81 +0,0 @@
-From 86ceafdf50d67bcb2a5196122797a6972bedd279 Mon Sep 17 00:00:00 2001
-From: Tang Liang <liang.tang@oracle.com>
-Date: Thu, 8 Dec 2011 17:36:39 +0800
-Subject: xen: Utilize the restore_msi_irqs hook.
-
-to make a hypercall to restore the vectors in the MSI/MSI-X
-configuration space.
-
-Signed-off-by: Tang Liang <liang.tang@oracle.com>
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
- arch/x86/pci/xen.c              |   27 +++++++++++++++++++++++++++
- include/xen/interface/physdev.h |    7 +++++++
- 2 files changed, 34 insertions(+), 0 deletions(-)
-
-diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
-index 492ade8..249a5ae 100644
--- a/arch/x86/pci/xen.c
-+++ b/arch/x86/pci/xen.c
-@@ -324,6 +324,32 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
- out:
- 	return ret;
- }
-+
-+static void xen_initdom_restore_msi_irqs(struct pci_dev *dev, int irq)
-+{
-+	int ret = 0;
-+
-+	if (pci_seg_supported) {
-+		struct physdev_pci_device restore_ext;
-+
-+		restore_ext.seg = pci_domain_nr(dev->bus);
-+		restore_ext.bus = dev->bus->number;
-+		restore_ext.devfn = dev->devfn;
-+		ret = HYPERVISOR_physdev_op(PHYSDEVOP_restore_msi_ext,
-+					&restore_ext);
-+		if (ret == -ENOSYS)
-+			pci_seg_supported = false;
-+		WARN(ret && ret != -ENOSYS, "restore_msi_ext -> %d\n", ret);
-+	}
-+	if (!pci_seg_supported) {
-+		struct physdev_restore_msi restore;
-+
-+		restore.bus = dev->bus->number;
-+		restore.devfn = dev->devfn;
-+		ret = HYPERVISOR_physdev_op(PHYSDEVOP_restore_msi, &restore);
-+		WARN(ret && ret != -ENOSYS, "restore_msi -> %d\n", ret);
-+	}
-+}
- #endif
- 
- static void xen_teardown_msi_irqs(struct pci_dev *dev)
-@@ -446,6 +472,7 @@ int __init pci_xen_initial_domain(void)
- #ifdef CONFIG_PCI_MSI
- 	x86_msi.setup_msi_irqs = xen_initdom_setup_msi_irqs;
- 	x86_msi.teardown_msi_irq = xen_teardown_msi_irq;
-+	x86_msi.restore_msi_irqs = xen_initdom_restore_msi_irqs;
- #endif
- 	xen_setup_acpi_sci();
- 	__acpi_register_gsi = acpi_register_gsi_xen;
-diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h
-index c1080d9..0c28989 100644
--- a/include/xen/interface/physdev.h
-+++ b/include/xen/interface/physdev.h
-@@ -145,6 +145,13 @@ struct physdev_manage_pci {
- 	uint8_t devfn;
- };
- 
-+#define PHYSDEVOP_restore_msi            19
-+struct physdev_restore_msi {
-+	/* IN */
-+	uint8_t bus;
-+	uint8_t devfn;
-+};
-+
- #define PHYSDEVOP_manage_pci_add_ext	20
- struct physdev_manage_pci_ext {
- 	/* IN */
-- 
-1.7.6.4
-
--- a/patches.xen/pvops-0008-xen-setup-pm-acpi-Remove-the-call-to-boot_option_idl.patch
+++ b/patches.xen/pvops-0008-xen-setup-pm-acpi-Remove-the-call-to-boot_option_idl.patch
@ -1,31 +0,0 @@
-From cfb37553f53f993c22aad05c219581dfbc726bcc Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
-Date: Mon, 23 Jan 2012 10:53:57 -0500
-Subject: xen/setup/pm/acpi: Remove the call to boot_option_idle_override.
-
-We needed that call in the past to force the kernel to use
-default_idle (which called safe_halt, which called xen_safe_halt).
-
-But set_pm_idle_to_default() does now that, so there is no need
-to use this boot option operand.
-
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
- arch/x86/xen/setup.c |    1 -
- 1 files changed, 0 insertions(+), 1 deletions(-)
-
-diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
-index e03c636..1236623 100644
--- a/arch/x86/xen/setup.c
-+++ b/arch/x86/xen/setup.c
-@@ -420,7 +420,6 @@ void __init xen_arch_setup(void)
- 	boot_cpu_data.hlt_works_ok = 1;
- #endif
- 	disable_cpuidle();
-	boot_option_idle_override = IDLE_HALT;
- 	WARN_ON(set_pm_idle_to_default());
- 	fiddle_vdso();
- }
-- 
-1.7.6.4
-
--- a/patches.xen/pvops-0009-xen-enlighten-Expose-MWAIT-and-MWAIT_LEAF-if-hypervi.patch
+++ b/patches.xen/pvops-0009-xen-enlighten-Expose-MWAIT-and-MWAIT_LEAF-if-hypervi.patch
@ -1,216 +0,0 @@
-From d281ee8c6d58a7f5d1f4241238daa315fb959e31 Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
-Date: Mon, 13 Feb 2012 22:26:32 -0500
-Subject: xen/enlighten: Expose MWAIT and MWAIT_LEAF if hypervisor OKs it.
-
-For the hypervisor to take advantage of the MWAIT support it needs
-to extract from the ACPI _CST the register address. But the
-hypervisor does not have the support to parse DSDT so it relies on
-the initial domain (dom0) to parse the ACPI Power Management information
-and push it up to the hypervisor. The pushing of the data is done
-by the processor_harveset_xen module which parses the information that
-the ACPI parser has graciously exposed in 'struct acpi_processor'.
-
-For the ACPI parser to also expose the Cx states for MWAIT, we need
-to expose the MWAIT capability (leaf 1). Furthermore we also need to
-expose the MWAIT_LEAF capability (leaf 5) for cstate.c to properly
-function.
-
-The hypervisor could expose these flags when it traps the XEN_EMULATE_PREFIX
-operations, but it can't do it since it needs to be backwards compatible.
-Instead we choose to use the native CPUID to figure out if the MWAIT
-capability exists and use the XEN_SET_PDC query hypercall to figure out
-if the hypervisor wants us to expose the MWAIT_LEAF capability or not.
-
-Note: The XEN_SET_PDC query was implemented in c/s 23783:
-"ACPI: add _PDC input override mechanism".
-
-With this in place, instead of
- C3 ACPI IOPORT 415
-we get now
- C3:ACPI FFH INTEL MWAIT 0x20
-
-Note: The cpu_idle which would be calling the mwait variants for idling
-never gets set b/c we set the default pm_idle to be the hypercall variant.
-
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
- arch/x86/xen/enlighten.c         |   92 +++++++++++++++++++++++++++++++++++++-
- include/xen/interface/platform.h |    4 +-
- 2 files changed, 94 insertions(+), 2 deletions(-)
-
-diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
-index 12eb07b..4c82936 100644
--- a/arch/x86/xen/enlighten.c
-+++ b/arch/x86/xen/enlighten.c
-@@ -62,6 +62,14 @@
- #include <asm/stackprotector.h>
- #include <asm/hypervisor.h>
- #include <asm/pci_x86.h>
-+#include <asm/mwait.h>
-+
-+#ifdef CONFIG_ACPI
-+#include <asm/acpi.h>
-+#include <acpi/pdc_intel.h>
-+#include <acpi/processor.h>
-+#include <xen/interface/platform.h>
-+#endif
- 
- #include "xen-ops.h"
- #include "mmu.h"
-@@ -200,13 +208,17 @@ static void __init xen_banner(void)
- static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0;
- static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0;
- 
-+static __read_mostly unsigned int cpuid_leaf1_ecx_set_mask;
-+static __read_mostly unsigned int cpuid_leaf5_ecx_val;
-+static __read_mostly unsigned int cpuid_leaf5_edx_val;
-+
- static void xen_cpuid(unsigned int *ax, unsigned int *bx,
- 		      unsigned int *cx, unsigned int *dx)
- {
- 	unsigned maskebx = ~0;
- 	unsigned maskecx = ~0;
- 	unsigned maskedx = ~0;
-
-+	unsigned setecx = 0;
- 	/*
- 	 * Mask out inconvenient features, to try and disable as many
- 	 * unsupported kernel subsystems as possible.
-@@ -214,9 +226,18 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
- 	switch (*ax) {
- 	case 1:
- 		maskecx = cpuid_leaf1_ecx_mask;
-+		setecx = cpuid_leaf1_ecx_set_mask;
- 		maskedx = cpuid_leaf1_edx_mask;
- 		break;
- 
-+	case CPUID_MWAIT_LEAF:
-+		/* Synthesize the values.. */
-+		*ax = 0;
-+		*bx = 0;
-+		*cx = cpuid_leaf5_ecx_val;
-+		*dx = cpuid_leaf5_edx_val;
-+		return;
-+
- 	case CPUID_THERM_POWER_LEAF:
- 		/* Disabling APERFMPERF for kernel usage */
- 		maskecx = ~(1 << APERFMPERF_PRESENT);
-@@ -232,9 +253,75 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
- 
- 	*bx &= maskebx;
- 	*cx &= maskecx;
-+	*cx |= setecx;
- 	*dx &= maskedx;
-+
- }
- 
-+static bool __init xen_check_mwait(void)
-+{
-+#if CONFIG_ACPI
-+	struct xen_platform_op op = {
-+		.cmd			= XENPF_set_processor_pminfo,
-+		.u.set_pminfo.id	= -1,
-+		.u.set_pminfo.type	= XEN_PM_PDC,
-+	};
-+	uint32_t buf[3];
-+	unsigned int ax, bx, cx, dx;
-+	unsigned int mwait_mask;
-+
-+	/* We need to determine whether it is OK to expose the MWAIT
-+	 * capability to the kernel to harvest deeper than C3 states from ACPI
-+	 * _CST using the processor_harvest_xen.c module. For this to work, we
-+	 * need to gather the MWAIT_LEAF values (which the cstate.c code
-+	 * checks against). The hypervisor won't expose the MWAIT flag because
-+	 * it would break backwards compatibility; so we will find out directly
-+	 * from the hardware and hypercall.
-+	 */
-+	if (!xen_initial_domain())
-+		return false;
-+
-+	ax = 1;
-+	cx = 0;
-+
-+	native_cpuid(&ax, &bx, &cx, &dx);
-+
-+	mwait_mask = (1 << (X86_FEATURE_EST % 32)) |
-+		     (1 << (X86_FEATURE_MWAIT % 32));
-+
-+	if ((cx & mwait_mask) != mwait_mask)
-+		return false;
-+
-+	/* We need to emulate the MWAIT_LEAF and for that we need both
-+	 * ecx and edx. The hypercall provides only partial information.
-+	 */
-+
-+	ax = CPUID_MWAIT_LEAF;
-+	bx = 0;
-+	cx = 0;
-+	dx = 0;
-+
-+	native_cpuid(&ax, &bx, &cx, &dx);
-+
-+	/* Ask the Hypervisor whether to clear ACPI_PDC_C_C2C3_FFH. If so,
-+	 * don't expose MWAIT_LEAF and let ACPI pick the IOPORT version of C3.
-+	 */
-+	buf[0] = ACPI_PDC_REVISION_ID;
-+	buf[1] = 1;
-+	buf[2] = (ACPI_PDC_C_CAPABILITY_SMP | ACPI_PDC_EST_CAPABILITY_SWSMP);
-+
-+	set_xen_guest_handle(op.u.set_pminfo.pdc, buf);
-+
-+	if ((HYPERVISOR_dom0_op(&op) == 0) &&
-+	    (buf[2] & (ACPI_PDC_C_C1_FFH | ACPI_PDC_C_C2C3_FFH))) {
-+		cpuid_leaf5_ecx_val = cx;
-+		cpuid_leaf5_edx_val = dx;
-+	}
-+	return true;
-+#else
-+	return false;
-+#endif
-+}
- static void __init xen_init_cpuid_mask(void)
- {
- 	unsigned int ax, bx, cx, dx;
-@@ -261,6 +348,9 @@ static void __init xen_init_cpuid_mask(void)
- 	/* Xen will set CR4.OSXSAVE if supported and not disabled by force */
- 	if ((cx & xsave_mask) != xsave_mask)
- 		cpuid_leaf1_ecx_mask &= ~xsave_mask; /* disable XSAVE & OSXSAVE */
-+
-+	if (xen_check_mwait())
-+		cpuid_leaf1_ecx_set_mask = (1 << (X86_FEATURE_MWAIT % 32));
- }
- 
- static void xen_set_debugreg(int reg, unsigned long val)
-diff --git a/include/xen/interface/platform.h b/include/xen/interface/platform.h
-index c168468..6220b98 100644
--- a/include/xen/interface/platform.h
-+++ b/include/xen/interface/platform.h
-@@ -200,7 +200,7 @@ DEFINE_GUEST_HANDLE_STRUCT(xenpf_getidletime_t);
- #define XEN_PM_CX   0
- #define XEN_PM_PX   1
- #define XEN_PM_TX   2
-
-+#define XEN_PM_PDC  3
- /* Px sub info type */
- #define XEN_PX_PCT   1
- #define XEN_PX_PSS   2
-@@ -286,6 +286,7 @@ struct xen_processor_performance {
- };
- DEFINE_GUEST_HANDLE_STRUCT(xen_processor_performance);
- 
-+DEFINE_GUEST_HANDLE(uint32_t);
- struct xenpf_set_processor_pminfo {
- 	/* IN variables */
- 	uint32_t id;    /* ACPI CPU ID */
-@@ -293,6 +294,7 @@ struct xenpf_set_processor_pminfo {
- 	union {
- 		struct xen_processor_power          power;/* Cx: _CST/_CSD */
- 		struct xen_processor_performance    perf; /* Px: _PPC/_PCT/_PSS/_PSD */
-+		GUEST_HANDLE(uint32_t)              pdc;
- 	};
- };
- DEFINE_GUEST_HANDLE_STRUCT(xenpf_set_processor_pminfo);
-- 
-1.7.6.4
-
--- a/patches.xen/pvops-0010-CPUFREQ-xen-governor-for-Xen-hypervisor-frequency-sc.patch
+++ b/patches.xen/pvops-0010-CPUFREQ-xen-governor-for-Xen-hypervisor-frequency-sc.patch
@ -1,529 +0,0 @@
-From 20e7a07fa0f8a0dbe30a0f732686d78849d29d96 Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
-Date: Fri, 3 Feb 2012 16:03:20 -0500
-Subject: [CPUFREQ] xen: governor for Xen hypervisor frequency scaling.
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-This CPU freq governor leaves the frequency decision to the Xen hypervisor.
-
-To do that the driver parses the Power Management data and uploads said
-information to the Xen hypervisor. Then the Xen hypervisor can select the
-proper Cx and Pxx states for the initial domain and all other domains.
-
-To upload the information, this CPU frequency driver reads Power Management (PM)
-(_Pxx and _Cx) which are populated in the 'struct acpi_processor' structure.
-It simply reads the contents of that structure and pass it up the Xen hypervisor.
-For that to work we depend on the appropriate CPU frequency scaling driver
-to do the heavy-lifting - so that the contents is correct.
-
-The CPU frequency governor it has been loaded also sets up a timer
-to check if the ACPI IDs count is different from the APIC ID count - which
-can happen if the user choose to use dom0_max_vcpu argument. In such a case
-a backup of the PM structure is used and uploaded to the hypervisor.
-
-[v1-v2: Initial RFC implementations that were posted]
-[v3: Changed the name to passthru suggested by Pasi Kärkkäinen <pasik@iki.fi>]
-[v4: Added vCPU != pCPU support - aka dom0_max_vcpus support]
-[v5: Cleaned up the driver, fix bug under Athlon XP]
-[v6: Changed the driver to a CPU frequency governor]
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
- drivers/xen/Kconfig       |   15 ++
- drivers/xen/Makefile      |    2 +-
- drivers/xen/cpufreq_xen.c |  445 +++++++++++++++++++++++++++++++++++++++++++++
- 3 files changed, 461 insertions(+), 1 deletions(-)
- create mode 100644 drivers/xen/cpufreq_xen.c
-
-diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
-index a1ced52..28ba371 100644
--- a/drivers/xen/Kconfig
-+++ b/drivers/xen/Kconfig
-@@ -178,4 +178,19 @@ config XEN_PRIVCMD
- 	depends on XEN
- 	default m
- 
-+config CPU_FREQ_GOV_XEN
-+	tristate "'xen' governor for hypervisor scaling"
-+	depends on XEN && X86 && ACPI_PROCESSOR && CPU_FREQ
-+	default m
-+	help
-+          This cpufreq governor leaves the frequency decision to the Xen hypervisor.
-+
-+	  To do that the driver parses the Power Management data and uploads said
-+	  information to the Xen hypervisor. Then the Xen hypervisor can select the
-+          proper Cx and Pxx states.
-+
-+          To compile this driver as a module, choose M here: the
-+          module will be called cpufreq_xen.  If you do not know what to choose,
-+          select M here.
-+
- endmenu
-diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
-index aa31337..5802220 100644
--- a/drivers/xen/Makefile
-+++ b/drivers/xen/Makefile
-@@ -20,7 +20,7 @@ obj-$(CONFIG_SWIOTLB_XEN)		+= swiotlb-xen.o
- obj-$(CONFIG_XEN_DOM0)			+= pci.o acpi.o
- obj-$(CONFIG_XEN_PCIDEV_BACKEND)	+= xen-pciback/
- obj-$(CONFIG_XEN_PRIVCMD)		+= xen-privcmd.o
-
-+obj-$(CONFIG_CPU_FREQ_GOV_XEN)		+= cpufreq_xen.o
- xen-evtchn-y				:= evtchn.o
- xen-gntdev-y				:= gntdev.o
- xen-gntalloc-y				:= gntalloc.o
-diff --git a/drivers/xen/cpufreq_xen.c b/drivers/xen/cpufreq_xen.c
-new file mode 100644
-index 0000000..1b709bf
--- /dev/null
-+++ b/drivers/xen/cpufreq_xen.c
-@@ -0,0 +1,445 @@
-+/*
-+ * Copyright 2012 by Oracle Inc
-+ * Author: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
-+ *
-+ * This code borrows ideas from https://lkml.org/lkml/2011/11/30/249
-+ * so many thanks go to Kevin Tian <kevin.tian@intel.com>
-+ * and Yu Ke <ke.yu@intel.com>.
-+ *
-+ * This program is free software; you can redistribute it and/or modify it
-+ * under the terms and conditions of the GNU General Public License,
-+ * version 2, as published by the Free Software Foundation.
-+ *
-+ * This program is distributed in the hope it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
-+ * more details.
-+ *
-+ */
-+
-+#include <linux/cpumask.h>
-+#include <linux/cpufreq.h>
-+#include <linux/freezer.h>
-+#include <linux/kernel.h>
-+#include <linux/kthread.h>
-+#include <linux/init.h>
-+#include <linux/module.h>
-+#include <linux/types.h>
-+#include <acpi/acpi_bus.h>
-+#include <acpi/acpi_drivers.h>
-+#include <acpi/processor.h>
-+
-+#include <xen/interface/platform.h>
-+#include <asm/xen/hypercall.h>
-+
-+#define DRV_NAME "cpufreq-xen"
-+
-+static int no_hypercall;
-+MODULE_PARM_DESC(off, "Inhibit the hypercall.");
-+module_param_named(off, no_hypercall, int, 0400);
-+
-+/*
-+ * Mutex to protect the acpi_ids_done.
-+ */
-+static DEFINE_MUTEX(acpi_ids_mutex);
-+/*
-+ * Don't think convert this to cpumask_var_t or use cpumask_bit - as those
-+ * shrink to nr_cpu_bits (which is dependent on possible_cpu), which can be
-+ * less than what we want to put in.
-+ */
-+#define NR_ACPI_CPUS	NR_CPUS
-+#define MAX_ACPI_BITS	(BITS_TO_LONGS(NR_ACPI_CPUS))
-+static unsigned long *acpi_ids_done;
-+/*
-+ * Again, don't convert to cpumask - as we are reading the raw ACPI CPU ids
-+ * which can go beyond what we presently see.
-+ */
-+static unsigned long *acpi_id_present;
-+
-+/*
-+ * Pertient data for the timer to be launched to check if the # of
-+ * ACPI CPU ids is different from the one we have processed.
-+ */
-+#define DELAY_TIMER	msecs_to_jiffies(5000 /* 5 sec */)
-+static struct acpi_processor *pr_backup;
-+static struct delayed_work work;
-+
-+static int push_cxx_to_hypervisor(struct acpi_processor *_pr)
-+{
-+	struct xen_platform_op op = {
-+		.cmd			= XENPF_set_processor_pminfo,
-+		.interface_version	= XENPF_INTERFACE_VERSION,
-+		.u.set_pminfo.id	= _pr->acpi_id,
-+		.u.set_pminfo.type	= XEN_PM_CX,
-+	};
-+	struct xen_processor_cx *dst_cx, *dst_cx_states = NULL;
-+	struct acpi_processor_cx *cx;
-+	int i, ok, ret = 0;
-+
-+	dst_cx_states = kcalloc(_pr->power.count,
-+				sizeof(struct xen_processor_cx), GFP_KERNEL);
-+	if (!dst_cx_states)
-+		return -ENOMEM;
-+
-+	for (ok = 0, i = 1; i <= _pr->power.count; i++) {
-+		cx = &_pr->power.states[i];
-+		if (!cx->valid)
-+			continue;
-+
-+		dst_cx = &(dst_cx_states[ok++]);
-+
-+		dst_cx->reg.space_id = ACPI_ADR_SPACE_SYSTEM_IO;
-+		if (cx->entry_method == ACPI_CSTATE_SYSTEMIO) {
-+			dst_cx->reg.bit_width = 8;
-+			dst_cx->reg.bit_offset = 0;
-+			dst_cx->reg.access_size = 1;
-+		} else {
-+			dst_cx->reg.space_id = ACPI_ADR_SPACE_FIXED_HARDWARE;
-+			if (cx->entry_method == ACPI_CSTATE_FFH) {
-+				/* NATIVE_CSTATE_BEYOND_HALT */
-+				dst_cx->reg.bit_offset = 2;
-+				dst_cx->reg.bit_width = 1; /* VENDOR_INTEL */
-+			}
-+			dst_cx->reg.access_size = 0;
-+		}
-+		dst_cx->reg.address = cx->address;
-+
-+		dst_cx->type = cx->type;
-+		dst_cx->latency = cx->latency;
-+		dst_cx->power = cx->power;
-+
-+		dst_cx->dpcnt = 0;
-+		set_xen_guest_handle(dst_cx->dp, NULL);
-+#ifdef DEBUG
-+		pr_debug(DRV_NAME ": CX: ID:%d [C%d:%s] entry:%d\n",
-+			_pr->acpi_id, cx->type, cx->desc, cx->entry_method);
-+#endif
-+	}
-+	if (!ok) {
-+		pr_err(DRV_NAME ": No _Cx for CPU %d\n", _pr->acpi_id);
-+		kfree(dst_cx_states);
-+		return -EINVAL;
-+	}
-+	op.u.set_pminfo.power.count = ok;
-+	op.u.set_pminfo.power.flags.bm_control = _pr->flags.bm_control;
-+	op.u.set_pminfo.power.flags.bm_check = _pr->flags.bm_check;
-+	op.u.set_pminfo.power.flags.has_cst = _pr->flags.has_cst;
-+	op.u.set_pminfo.power.flags.power_setup_done =
-+		_pr->flags.power_setup_done;
-+
-+	set_xen_guest_handle(op.u.set_pminfo.power.states, dst_cx_states);
-+
-+	if (!no_hypercall)
-+		ret = HYPERVISOR_dom0_op(&op);
-+
-+	if (ret)
-+		pr_err(DRV_NAME "(CX): Hypervisor error (%d) for ACPI ID: %d\n",
-+		       ret, _pr->acpi_id);
-+
-+	kfree(dst_cx_states);
-+
-+	return ret;
-+}
-+static struct xen_processor_px *
-+xen_copy_pss_data(struct acpi_processor *_pr,
-+		  struct xen_processor_performance *dst_perf)
-+{
-+	struct xen_processor_px *dst_states = NULL;
-+	int i;
-+
-+	BUILD_BUG_ON(sizeof(struct xen_processor_px) !=
-+		     sizeof(struct acpi_processor_px));
-+
-+	dst_states = kcalloc(_pr->performance->state_count,
-+			     sizeof(struct xen_processor_px), GFP_KERNEL);
-+	if (!dst_states)
-+		return ERR_PTR(-ENOMEM);
-+
-+	dst_perf->state_count = _pr->performance->state_count;
-+	for (i = 0; i < _pr->performance->state_count; i++) {
-+		/* Fortunatly for us, they are both the same size */
-+		memcpy(&(dst_states[i]), &(_pr->performance->states[i]),
-+		       sizeof(struct acpi_processor_px));
-+	}
-+	return dst_states;
-+}
-+static int xen_copy_psd_data(struct acpi_processor *_pr,
-+			     struct xen_processor_performance *dst)
-+{
-+	BUILD_BUG_ON(sizeof(struct xen_psd_package) !=
-+		     sizeof(struct acpi_psd_package));
-+
-+	if (_pr->performance->shared_type != CPUFREQ_SHARED_TYPE_NONE) {
-+		dst->shared_type = _pr->performance->shared_type;
-+
-+		memcpy(&(dst->domain_info), &(_pr->performance->domain_info),
-+		       sizeof(struct acpi_psd_package));
-+	} else {
-+		if ((&cpu_data(0))->x86_vendor != X86_VENDOR_AMD)
-+			return -EINVAL;
-+
-+		/* On AMD, the powernow-k8 is loaded before acpi_cpufreq
-+		 * meaning that acpi_processor_preregister_performance never
-+		 * gets called which would parse the _PSD. The only relevant
-+		 * information from _PSD we need is whether it is HW_ALL or any
-+		 * other type. AMD K8 >= are SW_ALL or SW_ANY, AMD K7<= HW_ANY.
-+		 * This driver checks at the start whether it is K8 so it
-+		 * if we get here it can only be K8.
-+		 */
-+		dst->shared_type = CPUFREQ_SHARED_TYPE_ANY;
-+		dst->domain_info.coord_type = DOMAIN_COORD_TYPE_SW_ANY;
-+		dst->domain_info.num_processors = num_online_cpus();
-+	}
-+	return 0;
-+}
-+static int xen_copy_pct_data(struct acpi_pct_register *pct,
-+			     struct xen_pct_register *dst_pct)
-+{
-+	/* It would be nice if you could just do 'memcpy(pct, dst_pct') but
-+	 * sadly the Xen structure did not have the proper padding so the
-+	 * descriptor field takes two (dst_pct) bytes instead of one (pct).
-+	 */
-+	dst_pct->descriptor = pct->descriptor;
-+	dst_pct->length = pct->length;
-+	dst_pct->space_id = pct->space_id;
-+	dst_pct->bit_width = pct->bit_width;
-+	dst_pct->bit_offset = pct->bit_offset;
-+	dst_pct->reserved = pct->reserved;
-+	dst_pct->address = pct->address;
-+	return 0;
-+}
-+static int push_pxx_to_hypervisor(struct acpi_processor *_pr)
-+{
-+	int ret = 0;
-+	struct xen_platform_op op = {
-+		.cmd			= XENPF_set_processor_pminfo,
-+		.interface_version	= XENPF_INTERFACE_VERSION,
-+		.u.set_pminfo.id	= _pr->acpi_id,
-+		.u.set_pminfo.type	= XEN_PM_PX,
-+	};
-+	struct xen_processor_performance *dst_perf;
-+	struct xen_processor_px *dst_states = NULL;
-+
-+	dst_perf = &op.u.set_pminfo.perf;
-+
-+	dst_perf->platform_limit = _pr->performance_platform_limit;
-+	dst_perf->flags |= XEN_PX_PPC;
-+	xen_copy_pct_data(&(_pr->performance->control_register),
-+			  &dst_perf->control_register);
-+	xen_copy_pct_data(&(_pr->performance->status_register),
-+			  &dst_perf->status_register);
-+	dst_perf->flags |= XEN_PX_PCT;
-+	dst_states = xen_copy_pss_data(_pr, dst_perf);
-+	if (!IS_ERR_OR_NULL(dst_states)) {
-+		set_xen_guest_handle(dst_perf->states, dst_states);
-+		dst_perf->flags |= XEN_PX_PSS;
-+	}
-+	if (!xen_copy_psd_data(_pr, dst_perf))
-+		dst_perf->flags |= XEN_PX_PSD;
-+
-+	if (!no_hypercall)
-+		ret = HYPERVISOR_dom0_op(&op);
-+
-+	if (ret)
-+		pr_err(DRV_NAME "(_PXX): Hypervisor error (%d) for ACPI ID %d\n",
-+		       ret, _pr->acpi_id);
-+
-+	if (!IS_ERR_OR_NULL(dst_states))
-+		kfree(dst_states);
-+
-+	return ret;
-+}
-+static int upload_pm_data(struct acpi_processor *_pr)
-+{
-+	int err = 0;
-+
-+	if (__test_and_set_bit(_pr->acpi_id, acpi_ids_done))
-+		return -EBUSY;
-+
-+	if (_pr->flags.power)
-+		err = push_cxx_to_hypervisor(_pr);
-+
-+	if (_pr->performance && _pr->performance->states)
-+		err |= push_pxx_to_hypervisor(_pr);
-+
-+	return err;
-+}
-+static acpi_status
-+read_acpi_id(acpi_handle handle, u32 lvl, void *context, void **rv)
-+{
-+	u32 acpi_id;
-+	acpi_status status;
-+	acpi_object_type acpi_type;
-+	unsigned long long tmp;
-+	union acpi_object object = { 0 };
-+	struct acpi_buffer buffer = { sizeof(union acpi_object), &object };
-+
-+	status = acpi_get_type(handle, &acpi_type);
-+	if (ACPI_FAILURE(status))
-+		return AE_OK;
-+
-+	switch (acpi_type) {
-+	case ACPI_TYPE_PROCESSOR:
-+		status = acpi_evaluate_object(handle, NULL, NULL, &buffer);
-+		if (ACPI_FAILURE(status))
-+			return AE_OK;
-+		acpi_id = object.processor.proc_id;
-+		break;
-+	case ACPI_TYPE_DEVICE:
-+		status = acpi_evaluate_integer(handle, "_UID", NULL, &tmp);
-+		if (ACPI_FAILURE(status))
-+			return AE_OK;
-+		acpi_id = tmp;
-+		break;
-+	default:
-+		return AE_OK;
-+	}
-+	if (acpi_id > NR_ACPI_CPUS) {
-+		WARN_ONCE(1, "There are %d ACPI processors, but kernel can only do %d!\n",
-+		     acpi_id, NR_ACPI_CPUS);
-+		return AE_OK;
-+	}
-+	__set_bit(acpi_id, acpi_id_present);
-+
-+	return AE_OK;
-+}
-+static unsigned int more_acpi_ids(void)
-+{
-+	unsigned int n = 0;
-+
-+	acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
-+			    ACPI_UINT32_MAX,
-+			    read_acpi_id, NULL, NULL, NULL);
-+	acpi_get_devices("ACPI0007", read_acpi_id, NULL, NULL);
-+
-+	mutex_lock(&acpi_ids_mutex);
-+	if (!bitmap_equal(acpi_id_present, acpi_ids_done, MAX_ACPI_BITS))
-+		n = bitmap_weight(acpi_id_present, MAX_ACPI_BITS);
-+	mutex_unlock(&acpi_ids_mutex);
-+
-+	return n;
-+}
-+static void do_check_acpi_id_timer(struct work_struct *_work)
-+{
-+	/* All online CPUs have been processed at this stage. Now verify
-+	 * whether in fact "online CPUs" == physical CPUs.
-+	 */
-+	acpi_id_present = kcalloc(MAX_ACPI_BITS, sizeof(unsigned long), GFP_KERNEL);
-+	if (!acpi_id_present)
-+		return;
-+	memset(acpi_id_present, 0, MAX_ACPI_BITS * sizeof(unsigned long));
-+
-+	if (more_acpi_ids()) {
-+		int cpu;
-+		if (!pr_backup) {
-+			schedule_delayed_work(&work, DELAY_TIMER);
-+			return;
-+		}
-+		for_each_set_bit(cpu, acpi_id_present, MAX_ACPI_BITS) {
-+			pr_backup->acpi_id = cpu;
-+			mutex_lock(&acpi_ids_mutex);
-+			(void)upload_pm_data(pr_backup);
-+			mutex_unlock(&acpi_ids_mutex);
-+		}
-+	}
-+	kfree(acpi_id_present);
-+	acpi_id_present = NULL;
-+}
-+
-+static int cpufreq_governor_xen(struct cpufreq_policy *policy,
-+				unsigned int event)
-+{
-+	struct acpi_processor *_pr;
-+
-+	switch (event) {
-+	case CPUFREQ_GOV_START:
-+	case CPUFREQ_GOV_LIMITS:
-+		/* Set it to max and let the hypervisor take over */
-+		__cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H);
-+
-+		_pr = per_cpu(processors, policy->cpu /* APIC ID */);
-+		if (!_pr)
-+			break;
-+
-+		mutex_lock(&acpi_ids_mutex);
-+		if (!pr_backup) {
-+			pr_backup = kzalloc(sizeof(struct acpi_processor), GFP_KERNEL);
-+			memcpy(pr_backup, _pr, sizeof(struct acpi_processor));
-+
-+			INIT_DELAYED_WORK_DEFERRABLE(&work, do_check_acpi_id_timer);
-+			schedule_delayed_work(&work, DELAY_TIMER);
-+		}
-+		(void)upload_pm_data(_pr);
-+		mutex_unlock(&acpi_ids_mutex);
-+		break;
-+	default:
-+		break;
-+	}
-+	return 0;
-+}
-+static struct cpufreq_governor cpufreq_gov_xen = {
-+	.name		= "xen",
-+	.governor	= cpufreq_governor_xen,
-+	.owner		= THIS_MODULE,
-+};
-+static int __init check_prereq(void)
-+{
-+	struct cpuinfo_x86 *c = &cpu_data(0);
-+
-+	if (!xen_initial_domain())
-+		return -ENODEV;
-+
-+	if (!acpi_gbl_FADT.smi_command)
-+		return -ENODEV;
-+
-+	if (c->x86_vendor == X86_VENDOR_INTEL) {
-+		if (!cpu_has(c, X86_FEATURE_EST))
-+			return -ENODEV;
-+
-+		return 0;
-+	}
-+	if (c->x86_vendor == X86_VENDOR_AMD) {
-+		u32 hi = 0, lo = 0;
-+		/* Copied from powernow-k8.h, can't include ../cpufreq/powernow
-+		 * as we get compile warnings for the static functions.
-+		 */
-+#define MSR_PSTATE_CUR_LIMIT    0xc0010061 /* pstate current limit MSR */
-+		rdmsr(MSR_PSTATE_CUR_LIMIT, lo, hi);
-+
-+		/* If the MSR cannot provide the data, the powernow-k8
-+		 * won't process the data properly either.
-+		 */
-+		if (hi || lo)
-+			return 0;
-+	}
-+	return -ENODEV;
-+}
-+
-+static int __init xen_processor_passthru_init(void)
-+{
-+	int rc = check_prereq();
-+
-+	if (rc)
-+		return rc;
-+
-+	acpi_ids_done = kcalloc(MAX_ACPI_BITS, sizeof(unsigned long), GFP_KERNEL);
-+	if (!acpi_ids_done)
-+		return -ENOMEM;
-+	memset(acpi_ids_done, 0, MAX_ACPI_BITS * sizeof(unsigned long));
-+
-+	return cpufreq_register_governor(&cpufreq_gov_xen);
-+}
-+static void __exit xen_processor_passthru_exit(void)
-+{
-+	cpufreq_unregister_governor(&cpufreq_gov_xen);
-+	cancel_delayed_work_sync(&work);
-+	kfree(acpi_ids_done);
-+	kfree(pr_backup);
-+}
-+
-+MODULE_AUTHOR("Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>");
-+MODULE_DESCRIPTION("CPUfreq policy governor 'xen' which uploads PM data to Xen hypervisor");
-+MODULE_LICENSE("GPL");
-+
-+late_initcall(xen_processor_passthru_init);
-+module_exit(xen_processor_passthru_exit);
-- 
-1.7.6.4
-
--- a/patches.xen/pvops-0011-x86-PCI-Expand-the-x86_msi_ops-to-have-a-restore-MSI.patch
+++ b/patches.xen/pvops-0011-x86-PCI-Expand-the-x86_msi_ops-to-have-a-restore-MSI.patch
@ -1,135 +0,0 @@
-From 76ccc297018d25d55b789bbd508861ef1e2cdb0c Mon Sep 17 00:00:00 2001
-From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
-Date: Fri, 16 Dec 2011 17:38:18 -0500
-Subject: x86/PCI: Expand the x86_msi_ops to have a restore MSIs.
-
-The MSI restore function will become a function pointer in an
-x86_msi_ops struct. It defaults to the implementation in the
-io_apic.c and msi.c. We piggyback on the indirection mechanism
-introduced by "x86: Introduce x86_msi_ops".
-
-Cc: x86@kernel.org
-Cc: Thomas Gleixner <tglx@linutronix.de>
-Cc: "H. Peter Anvin" <hpa@zytor.com>
-Cc: linux-pci@vger.kernel.org
-Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
-Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
- arch/x86/include/asm/pci.h      |    9 +++++++++
- arch/x86/include/asm/x86_init.h |    1 +
- arch/x86/kernel/x86_init.c      |    1 +
- drivers/pci/msi.c               |   29 +++++++++++++++++++++++++++--
- 4 files changed, 38 insertions(+), 2 deletions(-)
-
-diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
-index d498943..df75d07 100644
--- a/arch/x86/include/asm/pci.h
-+++ b/arch/x86/include/asm/pci.h
-@@ -112,19 +112,28 @@ static inline void x86_teardown_msi_irq(unsigned int irq)
- {
- 	x86_msi.teardown_msi_irq(irq);
- }
-+static inline void x86_restore_msi_irqs(struct pci_dev *dev, int irq)
-+{
-+	x86_msi.restore_msi_irqs(dev, irq);
-+}
- #define arch_setup_msi_irqs x86_setup_msi_irqs
- #define arch_teardown_msi_irqs x86_teardown_msi_irqs
- #define arch_teardown_msi_irq x86_teardown_msi_irq
-+#define arch_restore_msi_irqs x86_restore_msi_irqs
- /* implemented in arch/x86/kernel/apic/io_apic. */
- int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
- void native_teardown_msi_irq(unsigned int irq);
-+void native_restore_msi_irqs(struct pci_dev *dev, int irq);
- /* default to the implementation in drivers/lib/msi.c */
- #define HAVE_DEFAULT_MSI_TEARDOWN_IRQS
-+#define HAVE_DEFAULT_MSI_RESTORE_IRQS
- void default_teardown_msi_irqs(struct pci_dev *dev);
-+void default_restore_msi_irqs(struct pci_dev *dev, int irq);
- #else
- #define native_setup_msi_irqs		NULL
- #define native_teardown_msi_irq		NULL
- #define default_teardown_msi_irqs	NULL
-+#define default_restore_msi_irqs	NULL
- #endif
- 
- #define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys)
-diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
-index 1971e65..cd52084 100644
--- a/arch/x86/include/asm/x86_init.h
-+++ b/arch/x86/include/asm/x86_init.h
-@@ -177,6 +177,7 @@ struct x86_msi_ops {
- 	int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type);
- 	void (*teardown_msi_irq)(unsigned int irq);
- 	void (*teardown_msi_irqs)(struct pci_dev *dev);
-+	void (*restore_msi_irqs)(struct pci_dev *dev, int irq);
- };
- 
- extern struct x86_init_ops x86_init;
-diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
-index c1d6cd5..83b05ad 100644
--- a/arch/x86/kernel/x86_init.c
-+++ b/arch/x86/kernel/x86_init.c
-@@ -114,4 +114,5 @@ struct x86_msi_ops x86_msi = {
- 	.setup_msi_irqs = native_setup_msi_irqs,
- 	.teardown_msi_irq = native_teardown_msi_irq,
- 	.teardown_msi_irqs = default_teardown_msi_irqs,
-+	.restore_msi_irqs = default_restore_msi_irqs,
- };
-diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
-index 82de95e..a825d78 100644
--- a/drivers/pci/msi.c
-+++ b/drivers/pci/msi.c
-@@ -86,6 +86,31 @@ void default_teardown_msi_irqs(struct pci_dev *dev)
- }
- #endif
- 
-+#ifndef arch_restore_msi_irqs
-+# define arch_restore_msi_irqs default_restore_msi_irqs
-+# define HAVE_DEFAULT_MSI_RESTORE_IRQS
-+#endif
-+
-+#ifdef HAVE_DEFAULT_MSI_RESTORE_IRQS
-+void default_restore_msi_irqs(struct pci_dev *dev, int irq)
-+{
-+	struct msi_desc *entry;
-+
-+	entry = NULL;
-+	if (dev->msix_enabled) {
-+		list_for_each_entry(entry, &dev->msi_list, list) {
-+			if (irq == entry->irq)
-+				break;
-+		}
-+	} else if (dev->msi_enabled)  {
-+		entry = irq_get_msi_desc(irq);
-+	}
-+
-+	if (entry)
-+		write_msi_msg(irq, &entry->msg);
-+}
-+#endif
-+
- static void msi_set_enable(struct pci_dev *dev, int pos, int enable)
- {
- 	u16 control;
-@@ -372,7 +397,7 @@ static void __pci_restore_msi_state(struct pci_dev *dev)
- 
- 	pci_intx_for_msi(dev, 0);
- 	msi_set_enable(dev, pos, 0);
-	write_msi_msg(dev->irq, &entry->msg);
-+	arch_restore_msi_irqs(dev, dev->irq);
- 
- 	pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control);
- 	msi_mask_irq(entry, msi_capable_mask(control), entry->masked);
-@@ -400,7 +425,7 @@ static void __pci_restore_msix_state(struct pci_dev *dev)
- 	pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control);
- 
- 	list_for_each_entry(entry, &dev->msi_list, list) {
-		write_msi_msg(entry->irq, &entry->msg);
-+		arch_restore_msi_irqs(dev, entry->irq);
- 		msix_mask_irq(entry, entry->masked);
- 	}
- 
-- 
-1.7.6.4
-
--- a/patches.xen/pvops-0100-usb-xen-pvusb-driver.patch
+++ b/patches.xen/pvops-0100-usb-xen-pvusb-driver.patch
--- a/patches.xen/pvops-3.4-Revert-xen-pat-Disable-PAT-support-for-now.patch
+++ b/patches.xen/pvops-3.4-Revert-xen-pat-Disable-PAT-support-for-now.patch
@ -0,0 +1,72 @@
+From 433928d3823f561919ead305194e46e5311b573d Mon Sep 17 00:00:00 2001
+From: Marek Marczykowski <marmarek@invisiblethingslab.com>
+Date: Sat, 23 Jun 2012 19:50:44 +0200
+Subject: [PATCH 1/2] Revert "xen/pat: Disable PAT support for now."
+Organization: Invisible Things Lab
+
+This reverts commit 8eaffa67b43e99ae581622c5133e20b0f48bcef1.
+
+We haven't observed failure which is workarounded by this patch, but it caused
+horrible GPU performance. Anyway there is "nopat" option.
+
+Signed-off-by: Marek Marczykowski <marmarek@invisiblethingslab.com>
+---
+ arch/x86/xen/enlighten.c |    2 --
+ arch/x86/xen/mmu.c       |    8 ++++----
+ 2 files changed, 4 insertions(+), 6 deletions(-)
+
+diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
+index 6c7f1e8..bf3319c 100644
+--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
+@@ -1269,9 +1269,7 @@ asmlinkage void __init xen_start_kernel(void)
+ 
+ 	/* Prevent unwanted bits from being set in PTEs. */
+ 	__supported_pte_mask &= ~_PAGE_GLOBAL;
+-#if 0
+ 	if (!xen_initial_domain())
+-#endif
+ 		__supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
+ 
+ 	__supported_pte_mask |= _PAGE_IOMAP;
+diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
+index 69f5857..a5d252a 100644
+--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
+@@ -420,13 +420,13 @@ static pteval_t iomap_pte(pteval_t val)
+ static pteval_t xen_pte_val(pte_t pte)
+ {
+ 	pteval_t pteval = pte.pte;
+-#if 0
+
+ 	/* If this is a WC pte, convert back from Xen WC to Linux WC */
+ 	if ((pteval & (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT)) == _PAGE_PAT) {
+ 		WARN_ON(!pat_enabled);
+ 		pteval = (pteval & ~_PAGE_PAT) | _PAGE_PWT;
+ 	}
+-#endif
+
+ 	if (xen_initial_domain() && (pteval & _PAGE_IOMAP))
+ 		return pteval;
+ 
+@@ -468,7 +468,7 @@ void xen_set_pat(u64 pat)
+ static pte_t xen_make_pte(pteval_t pte)
+ {
+ 	phys_addr_t addr = (pte & PTE_PFN_MASK);
+-#if 0
+
+ 	/* If Linux is trying to set a WC pte, then map to the Xen WC.
+ 	 * If _PAGE_PAT is set, then it probably means it is really
+ 	 * _PAGE_PSE, so avoid fiddling with the PAT mapping and hope
+@@ -481,7 +481,7 @@ static pte_t xen_make_pte(pteval_t pte)
+ 		if ((pte & (_PAGE_PCD | _PAGE_PWT)) == _PAGE_PWT)
+ 			pte = (pte & ~(_PAGE_PCD | _PAGE_PWT)) | _PAGE_PAT;
+ 	}
+-#endif
+
+ 	/*
+ 	 * Unprivileged domains are allowed to do IOMAPpings for
+ 	 * PCI passthrough, but not map ISA space.  The ISA
+-- 
+1.7.4.4
+
--- a/patches.xen/pvops-3.4-x86-cpa-Use-pte_attrs-instead-of-pte_flags-on-CPA-se.patch
+++ b/patches.xen/pvops-3.4-x86-cpa-Use-pte_attrs-instead-of-pte_flags-on-CPA-se.patch
@ -0,0 +1,196 @@
+From f37a97dead89d07bce4d8fedc4c295c9bc700ab5 Mon Sep 17 00:00:00 2001
+From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Date: Fri, 4 Nov 2011 11:59:34 -0400
+Subject: [PATCH 2/2] x86/cpa: Use pte_attrs instead of pte_flags on
+ CPA/set_p.._wb/wc operations.
+
+When using the paravirt interface, most of the page operations are wrapped
+in the pvops interface. The one that is not is the pte_flags. The reason
+being that for most cases, the "raw" PTE flag values for baremetal and whatever
+pvops platform is running (in this case) - share the same bit meaning.
+
+Except for PAT. Under Linux, the PAT MSR is written to be:
+
+          PAT4                 PAT0
+---+----+----+----+-----+----+----+
+ WC | WC | WB | UC | UC- | WC | WB |  <= Linux
+---+----+----+----+-----+----+----+
+ WC | WT | WB | UC | UC- | WT | WB |  <= BIOS
+---+----+----+----+-----+----+----+
+ WC | WP | WC | UC | UC- | WT | WB |  <= Xen
+---+----+----+----+-----+----+----+
+
+The lookup of this index table translates to looking up
+Bit 7, Bit 4, and Bit 3 of PTE:
+
+ PAT/PSE (bit 7) ... PCD (bit 4) .. PWT (bit 3).
+
+If all bits are off, then we are using PAT0. If bit 3 turned on,
+then we are using PAT1, if bit 3 and bit 4, then PAT2..
+
+Back to the PAT MSR table:
+
+As you can see, the PAT1 translates to PAT4 under Xen. Under Linux
+we only use PAT0, PAT1, and PAT2 for the caching as:
+
+ WB = none (so PAT0)
+ WC = PWT (bit 3 on)
+ UC = PWT | PCD (bit 3 and 4 are on).
+
+But to make it work with Xen, we end up doing for WC a translation:
+
+ PWT (so bit 3 on) --> PAT (so bit 7 is on) and clear bit 3
+
+And to translate back (when the paravirt pte_val is used) we would:
+
+ PAT (bit 7 on) --> PWT (bit 3 on) and clear bit 7.
+
+This works quite well, except if code uses the pte_flags, as pte_flags
+reads the raw value and does not go through the paravirt. Which means
+that if (when running under Xen):
+
+ 1) we allocate some pages.
+ 2) call set_pages_array_wc, which ends up calling:
+     __page_change_att_set_clr(.., __pgprot(__PAGE_WC),  /* set */
+                                 , __pgprot(__PAGE_MASK), /* clear */
+    which ends up reading the _raw_ PTE flags and _only_ look at the
+    _PTE_FLAG_MASK contents with __PAGE_MASK cleared (0x18) and
+    __PAGE_WC (0x8) set.
+
+     read raw *pte -> 0x67
+     *pte = 0x67 & ^0x18 | 0x8
+     *pte = 0x67 & 0xfffffe7 | 0x8
+     *pte = 0x6f
+
+   [now set_pte_atomic is called, and 0x6f is written in, but under
+    xen_make_pte, the bit 3 is translated to bit 7, so it ends up
+    writting 0xa7, which is correct]
+
+ 3) do something to them.
+ 4) call set_pages_array_wb
+     __page_change_att_set_clr(.., __pgprot(__PAGE_WB),  /* set */
+                                 , __pgprot(__PAGE_MASK), /* clear */
+    which ends up reading the _raw_ PTE and _only_ look at the
+    _PTE_FLAG_MASK contents with _PAGE_MASK cleared (0x18) and
+    __PAGE_WB (0x0) set:
+
+     read raw *pte -> 0xa7
+     *pte = 0xa7 & &0x18 | 0
+     *pte = 0xa7 & 0xfffffe7 | 0
+     *pte = 0xa7
+
+   [we check whether the old PTE is different from the new one
+
+    if (pte_val(old_pte) != pte_val(new_pte)) {
+        set_pte_atomic(kpte, new_pte);
+        ...
+
+   and find out that 0xA7 == 0xA7 so we do not write the new PTE value in]
+
+   End result is that we failed at removing the WC caching bit!
+
+ 5) free them.
+   [and have pages with PAT4 (bit 7) set, so other subsystems end up using
+    the pages that have the write combined bit set resulting in crashes. Yikes!].
+
+The fix, which this patch proposes, is to wrap the pte_pgprot in the CPA
+code with newly introduced pte_attrs which can go through the pvops interface
+to get the "emulated" value instead of the raw. Naturally if CONFIG_PARAVIRT is
+not set, it would end calling native_pte_val.
+
+The other way to fix this is by wrapping pte_flags and go through the pvops
+interface and it really is the Right Thing to do.  The problem is, that past
+experience with mprotect stuff demonstrates that it be really expensive in inner
+loops, and pte_flags() is used in some very perf-critical areas.
+
+Example code to run this and see the various mysterious subsystems/applications
+crashing
+
+MODULE_AUTHOR("Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>");
+MODULE_DESCRIPTION("wb_to_wc_and_back");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(WB_TO_WC);
+
+static int thread(void *arg)
+{
+	struct page *a[MAX_PAGES];
+	unsigned int i, j;
+	do {
+		for (j = 0, i = 0;i < MAX_PAGES; i++, j++) {
+			a[i] = alloc_page(GFP_KERNEL);
+			if (!a[i])
+				break;
+		}
+		set_pages_array_wc(a, j);
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule_timeout_interruptible(HZ);
+		for (i = 0; i < j; i++) {
+			unsigned long *addr = page_address(a[i]);
+			if (addr) {
+				memset(addr, 0xc2, PAGE_SIZE);
+			}
+		}
+		set_pages_array_wb(a, j);
+		for (i = 0; i< MAX_PAGES; i++) {
+			if (a[i])
+				__free_page(a[i]);
+			a[i] = NULL;
+		}
+	} while (!kthread_should_stop());
+	return 0;
+}
+static struct task_struct *t;
+static int __init wb_to_wc_init(void)
+{
+	t = kthread_run(thread, NULL, "wb_to_wc_and_back");
+	return 0;
+}
+static void __exit wb_to_wc_exit(void)
+{
+	if (t)
+		kthread_stop(t);
+}
+module_init(wb_to_wc_init);
+module_exit(wb_to_wc_exit);
+
+This fixes RH BZ #742032, #787403, and #745574
+Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+Tested-by: Tom Goetz <tom.goetz@virtualcomputer.com>
+CC: stable@kernel.org
+---
+ arch/x86/include/asm/pgtable.h |    5 +++++
+ arch/x86/mm/pageattr.c         |    2 +-
+ 2 files changed, 6 insertions(+), 1 deletions(-)
+
+diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
+index 49afb3f..fa7bd2c 100644
+--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
+@@ -349,6 +349,11 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
+ 	return __pgprot(preservebits | addbits);
+ }
+ 
+static inline pgprot_t pte_attrs(pte_t pte)
+{
+	return __pgprot(pte_val(pte) & PTE_FLAGS_MASK);
+}
+
+ #define pte_pgprot(x) __pgprot(pte_flags(x) & PTE_FLAGS_MASK)
+ 
+ #define canon_pgprot(p) __pgprot(massage_pgprot(p))
+diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
+index e1ebde3..1ae1b4b 100644
+--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
+@@ -651,7 +651,7 @@ repeat:
+ 
+ 	if (level == PG_LEVEL_4K) {
+ 		pte_t new_pte;
+-		pgprot_t new_prot = pte_pgprot(old_pte);
+		pgprot_t new_prot = pte_attrs(old_pte);
+ 		unsigned long pfn = pte_pfn(old_pte);
+ 
+ 		pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
+-- 
+1.7.4.4
+
--- a/patches.xen/pvops-blkfront-eject-support.patch
+++ b/patches.xen/pvops-blkfront-eject-support.patch
@ -1,9 +1,9 @@
 --- linux-3.4.1.orig/drivers/block/xen-blkfront.c	2012-06-01 09:18:44.000000000 +0200
 +++ linux-3.4.1/drivers/block/xen-blkfront.c	2012-07-15 15:54:31.350255623 +0200
@@ -44,6 +44,7 @@
- #include <linux/slab.h>
 #include <linux/mutex.h>
 #include <linux/scatterlist.h>
+ #include <linux/bitmap.h>
 +#include <linux/fd.h>
 
 #include <xen/xen.h>
--- a/patches.xen/pvops-enable-netfront-in-dom0.patch
+++ b/patches.xen/pvops-enable-netfront-in-dom0.patch
@ -1,24 +0,0 @@
-diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
-index 698b905..e31ebff 100644
--- a/drivers/net/xen-netfront.c
-+++ b/drivers/net/xen-netfront.c
-@@ -1953,9 +1953,6 @@ static int __init netif_init(void)
- 	if (!xen_domain())
- 		return -ENODEV;
- 
-	if (xen_initial_domain())
-		return 0;
-
- 	printk(KERN_INFO "Initialising Xen virtual ethernet driver.\n");
- 
- 	return xenbus_register_frontend(&netfront_driver);
-@@ -1965,9 +1962,6 @@ module_init(netif_init);
- 
- static void __exit netif_exit(void)
- {
-	if (xen_initial_domain())
-		return;
-
- 	xenbus_unregister_driver(&netfront_driver);
- }
- module_exit(netif_exit);
--- a/patches.xen/pvops-netback-calculate-correctly-the-SKB-slots.patch
+++ b/patches.xen/pvops-netback-calculate-correctly-the-SKB-slots.patch
@ -1,42 +0,0 @@
-From: Simon Graham <simon.graham@citrix.com>
-To: Ian Campbell <Ian.Campbell@citrix.com>, "konrad.wilk@oracle.com"
-	<konrad.wilk@oracle.com>, "xen-devel@lists.xensource.com"
-	<xen-devel@lists.xensource.com>, "netdev@vger.kernel.org"
-	<netdev@vger.kernel.org>
-Date: Thu, 24 May 2012 12:26:07 -0400
-Cc: "bhutchings@solarflare.com" <bhutchings@solarflare.com>,
-	Simon Graham <simon.graham@citrix.com>,
-	"davem@davemloft.net" <davem@davemloft.net>,
-	"adnan.misherfi@oracle.com" <adnan.misherfi@oracle.com>
-Subject: [Xen-devel] [PATCH] xen/netback: Calculate the number of SKB slots
- required correctly
-
-When calculating the number of slots required for a packet header, the code
-was reserving too many slots if the header crossed a page boundary. Since
-netbk_gop_skb copies the header to the start of the page, the count of
-slots required for the header should be based solely on the header size.
-
-This problem is easy to reproduce if a VIF is bridged to a USB 3G modem
-device as the skb->data value always starts near the end of the first page.
-
-Signed-off-by: Simon Graham <simon.graham@citrix.com>
---
- drivers/net/xen-netback/netback.c |    3 +--
- 1 files changed, 1 insertions(+), 2 deletions(-)
-
-diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
-index 2596401..f4a6fca 100644
--- a/drivers/net/xen-netback/netback.c
-+++ b/drivers/net/xen-netback/netback.c
-@@ -325,8 +325,7 @@ unsigned int xen_netbk_count_skb_slots(struct xenvif *vif, struct sk_buff *skb)
- 	unsigned int count;
- 	int i, copy_off;
- 
-	count = DIV_ROUND_UP(
-			offset_in_page(skb->data)+skb_headlen(skb), PAGE_SIZE);
-+	count = DIV_ROUND_UP(skb_headlen(skb), PAGE_SIZE);
- 
- 	copy_off = skb_headlen(skb) % PAGE_SIZE;
- 
-- 
-1.7.9.1
--- a/2
+++ b/2
@ -1 +1 @@
-7
+1
--- a/series-pvops.conf
+++ b/series-pvops.conf
@ -1,15 +1,12 @@
-patches.xen/pvops-0001-xen-Add-privcmd-device-driver.patch
-patches.xen/pvops-0002-x86-acpi-tboot-Have-a-ACPI-os-prepare-sleep-instead-.patch
-patches.xen/pvops-0003-tboot-Add-return-values-for-tboot_sleep.patch
-patches.xen/pvops-0004-x86-acpi-sleep-Provide-registration-for-acpi_suspend.patch
-patches.xen/pvops-0005-xen-acpi-sleep-Enable-ACPI-sleep-via-the-__acpi_os_p.patch
-patches.xen/pvops-0006-xen-acpi-sleep-Register-to-the-acpi_suspend_lowlevel.patch
-patches.xen/pvops-0007-xen-Utilize-the-restore_msi_irqs-hook.patch
-patches.xen/pvops-0008-xen-setup-pm-acpi-Remove-the-call-to-boot_option_idl.patch
-patches.xen/pvops-0009-xen-enlighten-Expose-MWAIT-and-MWAIT_LEAF-if-hypervi.patch
-patches.xen/pvops-0010-CPUFREQ-xen-governor-for-Xen-hypervisor-frequency-sc.patch
-patches.xen/pvops-0011-x86-PCI-Expand-the-x86_msi_ops-to-have-a-restore-MSI.patch
-patches.xen/pvops-enable-netfront-in-dom0.patch
-patches.xen/pvops-netback-calculate-correctly-the-SKB-slots.patch
+# ACPI S3
+patches.xen/pvops-0001-x86-acpi-sleep-Provide-registration-for-acpi_suspend.patch
+patches.xen/pvops-0003-xen-acpi-sleep-Register-to-the-acpi_suspend_lowlevel.patch
+
+# fix for GPU performance (revert workaround and apply proper fix), should go in 3.5
+patches.xen/pvops-3.4-Revert-xen-pat-Disable-PAT-support-for-now.patch
+patches.xen/pvops-3.4-x86-cpa-Use-pte_attrs-instead-of-pte_flags-on-CPA-se.patch
+
+# Additional features
+patches.xen/pvops-0100-usb-xen-pvusb-driver.patch
 patches.xen/pvops-blkfront-removable-flag.patch
 patches.xen/pvops-blkfront-eject-support.patch
--- a/2
+++ b/2
@ -1 +1 @@
-3.2.30
+3.7.6
 @ -1 +1 @@
 .2.30
 .7.6