Cleanup obsolete patches.

This commit is contained in:
Marek Marczykowski 2013-02-06 00:28:03 +01:00
parent 98796f66c1
commit c2dda25031
13 changed files with 0 additions and 2643 deletions

View File

@ -1,967 +0,0 @@
From d8414d3c157dc1f83e73c17447ba41fe5afa9d3d Mon Sep 17 00:00:00 2001
From: Bastian Blank <waldi@debian.org>
Date: Fri, 16 Dec 2011 11:34:33 -0500
Subject: xen: Add privcmd device driver
Access to arbitrary hypercalls is currently provided via xenfs. This
adds a standard character device to handle this. The support in xenfs
remains for backward compatibility and uses the device driver code.
Signed-off-by: Bastian Blank <waldi@debian.org>
Acked-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
drivers/xen/Kconfig | 7 +
drivers/xen/Makefile | 2 +
drivers/xen/privcmd.c | 437 +++++++++++++++++++++++++++++++++++++++++++
drivers/xen/privcmd.h | 3 +
drivers/xen/xenfs/Makefile | 2 +-
drivers/xen/xenfs/privcmd.c | 400 ---------------------------------------
drivers/xen/xenfs/super.c | 3 +-
drivers/xen/xenfs/xenfs.h | 1 -
8 files changed, 452 insertions(+), 403 deletions(-)
create mode 100644 drivers/xen/privcmd.c
create mode 100644 drivers/xen/privcmd.h
delete mode 100644 drivers/xen/xenfs/privcmd.c
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 8795480..a1ced52 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -86,6 +86,7 @@ config XEN_BACKEND
config XENFS
tristate "Xen filesystem"
+ select XEN_PRIVCMD
default y
help
The xen filesystem provides a way for domains to share
@@ -171,4 +172,10 @@ config XEN_PCIDEV_BACKEND
xen-pciback.hide=(03:00.0)(04:00.0)
If in doubt, say m.
+
+config XEN_PRIVCMD
+ tristate
+ depends on XEN
+ default m
+
endmenu
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index 974fffd..aa31337 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -19,7 +19,9 @@ obj-$(CONFIG_XEN_TMEM) += tmem.o
obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o
obj-$(CONFIG_XEN_DOM0) += pci.o
obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/
+obj-$(CONFIG_XEN_PRIVCMD) += xen-privcmd.o
xen-evtchn-y := evtchn.o
xen-gntdev-y := gntdev.o
xen-gntalloc-y := gntalloc.o
+xen-privcmd-y := privcmd.o
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
new file mode 100644
index 0000000..4e8d3da
--- /dev/null
+++ b/drivers/xen/privcmd.c
@@ -0,0 +1,437 @@
+/******************************************************************************
+ * privcmd.c
+ *
+ * Interface to privileged domain-0 commands.
+ *
+ * Copyright (c) 2002-2004, K A Fraser, B Dragovic
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/uaccess.h>
+#include <linux/swap.h>
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <linux/seq_file.h>
+#include <linux/miscdevice.h>
+
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/tlb.h>
+#include <asm/xen/hypervisor.h>
+#include <asm/xen/hypercall.h>
+
+#include <xen/xen.h>
+#include <xen/privcmd.h>
+#include <xen/interface/xen.h>
+#include <xen/features.h>
+#include <xen/page.h>
+#include <xen/xen-ops.h>
+
+#include "privcmd.h"
+
+MODULE_LICENSE("GPL");
+
+#ifndef HAVE_ARCH_PRIVCMD_MMAP
+static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma);
+#endif
+
+static long privcmd_ioctl_hypercall(void __user *udata)
+{
+ struct privcmd_hypercall hypercall;
+ long ret;
+
+ if (copy_from_user(&hypercall, udata, sizeof(hypercall)))
+ return -EFAULT;
+
+ ret = privcmd_call(hypercall.op,
+ hypercall.arg[0], hypercall.arg[1],
+ hypercall.arg[2], hypercall.arg[3],
+ hypercall.arg[4]);
+
+ return ret;
+}
+
+static void free_page_list(struct list_head *pages)
+{
+ struct page *p, *n;
+
+ list_for_each_entry_safe(p, n, pages, lru)
+ __free_page(p);
+
+ INIT_LIST_HEAD(pages);
+}
+
+/*
+ * Given an array of items in userspace, return a list of pages
+ * containing the data. If copying fails, either because of memory
+ * allocation failure or a problem reading user memory, return an
+ * error code; its up to the caller to dispose of any partial list.
+ */
+static int gather_array(struct list_head *pagelist,
+ unsigned nelem, size_t size,
+ void __user *data)
+{
+ unsigned pageidx;
+ void *pagedata;
+ int ret;
+
+ if (size > PAGE_SIZE)
+ return 0;
+
+ pageidx = PAGE_SIZE;
+ pagedata = NULL; /* quiet, gcc */
+ while (nelem--) {
+ if (pageidx > PAGE_SIZE-size) {
+ struct page *page = alloc_page(GFP_KERNEL);
+
+ ret = -ENOMEM;
+ if (page == NULL)
+ goto fail;
+
+ pagedata = page_address(page);
+
+ list_add_tail(&page->lru, pagelist);
+ pageidx = 0;
+ }
+
+ ret = -EFAULT;
+ if (copy_from_user(pagedata + pageidx, data, size))
+ goto fail;
+
+ data += size;
+ pageidx += size;
+ }
+
+ ret = 0;
+
+fail:
+ return ret;
+}
+
+/*
+ * Call function "fn" on each element of the array fragmented
+ * over a list of pages.
+ */
+static int traverse_pages(unsigned nelem, size_t size,
+ struct list_head *pos,
+ int (*fn)(void *data, void *state),
+ void *state)
+{
+ void *pagedata;
+ unsigned pageidx;
+ int ret = 0;
+
+ BUG_ON(size > PAGE_SIZE);
+
+ pageidx = PAGE_SIZE;
+ pagedata = NULL; /* hush, gcc */
+
+ while (nelem--) {
+ if (pageidx > PAGE_SIZE-size) {
+ struct page *page;
+ pos = pos->next;
+ page = list_entry(pos, struct page, lru);
+ pagedata = page_address(page);
+ pageidx = 0;
+ }
+
+ ret = (*fn)(pagedata + pageidx, state);
+ if (ret)
+ break;
+ pageidx += size;
+ }
+
+ return ret;
+}
+
+struct mmap_mfn_state {
+ unsigned long va;
+ struct vm_area_struct *vma;
+ domid_t domain;
+};
+
+static int mmap_mfn_range(void *data, void *state)
+{
+ struct privcmd_mmap_entry *msg = data;
+ struct mmap_mfn_state *st = state;
+ struct vm_area_struct *vma = st->vma;
+ int rc;
+
+ /* Do not allow range to wrap the address space. */
+ if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) ||
+ ((unsigned long)(msg->npages << PAGE_SHIFT) >= -st->va))
+ return -EINVAL;
+
+ /* Range chunks must be contiguous in va space. */
+ if ((msg->va != st->va) ||
+ ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end))
+ return -EINVAL;
+
+ rc = xen_remap_domain_mfn_range(vma,
+ msg->va & PAGE_MASK,
+ msg->mfn, msg->npages,
+ vma->vm_page_prot,
+ st->domain);
+ if (rc < 0)
+ return rc;
+
+ st->va += msg->npages << PAGE_SHIFT;
+
+ return 0;
+}
+
+static long privcmd_ioctl_mmap(void __user *udata)
+{
+ struct privcmd_mmap mmapcmd;
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ int rc;
+ LIST_HEAD(pagelist);
+ struct mmap_mfn_state state;
+
+ if (!xen_initial_domain())
+ return -EPERM;
+
+ if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
+ return -EFAULT;
+
+ rc = gather_array(&pagelist,
+ mmapcmd.num, sizeof(struct privcmd_mmap_entry),
+ mmapcmd.entry);
+
+ if (rc || list_empty(&pagelist))
+ goto out;
+
+ down_write(&mm->mmap_sem);
+
+ {
+ struct page *page = list_first_entry(&pagelist,
+ struct page, lru);
+ struct privcmd_mmap_entry *msg = page_address(page);
+
+ vma = find_vma(mm, msg->va);
+ rc = -EINVAL;
+
+ if (!vma || (msg->va != vma->vm_start) ||
+ !privcmd_enforce_singleshot_mapping(vma))
+ goto out_up;
+ }
+
+ state.va = vma->vm_start;
+ state.vma = vma;
+ state.domain = mmapcmd.dom;
+
+ rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry),
+ &pagelist,
+ mmap_mfn_range, &state);
+
+
+out_up:
+ up_write(&mm->mmap_sem);
+
+out:
+ free_page_list(&pagelist);
+
+ return rc;
+}
+
+struct mmap_batch_state {
+ domid_t domain;
+ unsigned long va;
+ struct vm_area_struct *vma;
+ int err;
+
+ xen_pfn_t __user *user;
+};
+
+static int mmap_batch_fn(void *data, void *state)
+{
+ xen_pfn_t *mfnp = data;
+ struct mmap_batch_state *st = state;
+
+ if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
+ st->vma->vm_page_prot, st->domain) < 0) {
+ *mfnp |= 0xf0000000U;
+ st->err++;
+ }
+ st->va += PAGE_SIZE;
+
+ return 0;
+}
+
+static int mmap_return_errors(void *data, void *state)
+{
+ xen_pfn_t *mfnp = data;
+ struct mmap_batch_state *st = state;
+
+ return put_user(*mfnp, st->user++);
+}
+
+static struct vm_operations_struct privcmd_vm_ops;
+
+static long privcmd_ioctl_mmap_batch(void __user *udata)
+{
+ int ret;
+ struct privcmd_mmapbatch m;
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *vma;
+ unsigned long nr_pages;
+ LIST_HEAD(pagelist);
+ struct mmap_batch_state state;
+
+ if (!xen_initial_domain())
+ return -EPERM;
+
+ if (copy_from_user(&m, udata, sizeof(m)))
+ return -EFAULT;
+
+ nr_pages = m.num;
+ if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
+ return -EINVAL;
+
+ ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
+ m.arr);
+
+ if (ret || list_empty(&pagelist))
+ goto out;
+
+ down_write(&mm->mmap_sem);
+
+ vma = find_vma(mm, m.addr);
+ ret = -EINVAL;
+ if (!vma ||
+ vma->vm_ops != &privcmd_vm_ops ||
+ (m.addr != vma->vm_start) ||
+ ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) ||
+ !privcmd_enforce_singleshot_mapping(vma)) {
+ up_write(&mm->mmap_sem);
+ goto out;
+ }
+
+ state.domain = m.dom;
+ state.vma = vma;
+ state.va = m.addr;
+ state.err = 0;
+
+ ret = traverse_pages(m.num, sizeof(xen_pfn_t),
+ &pagelist, mmap_batch_fn, &state);
+
+ up_write(&mm->mmap_sem);
+
+ if (state.err > 0) {
+ state.user = m.arr;
+ ret = traverse_pages(m.num, sizeof(xen_pfn_t),
+ &pagelist,
+ mmap_return_errors, &state);
+ }
+
+out:
+ free_page_list(&pagelist);
+
+ return ret;
+}
+
+static long privcmd_ioctl(struct file *file,
+ unsigned int cmd, unsigned long data)
+{
+ int ret = -ENOSYS;
+ void __user *udata = (void __user *) data;
+
+ switch (cmd) {
+ case IOCTL_PRIVCMD_HYPERCALL:
+ ret = privcmd_ioctl_hypercall(udata);
+ break;
+
+ case IOCTL_PRIVCMD_MMAP:
+ ret = privcmd_ioctl_mmap(udata);
+ break;
+
+ case IOCTL_PRIVCMD_MMAPBATCH:
+ ret = privcmd_ioctl_mmap_batch(udata);
+ break;
+
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+#ifndef HAVE_ARCH_PRIVCMD_MMAP
+static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n",
+ vma, vma->vm_start, vma->vm_end,
+ vmf->pgoff, vmf->virtual_address);
+
+ return VM_FAULT_SIGBUS;
+}
+
+static struct vm_operations_struct privcmd_vm_ops = {
+ .fault = privcmd_fault
+};
+
+static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ /* Unsupported for auto-translate guests. */
+ if (xen_feature(XENFEAT_auto_translated_physmap))
+ return -ENOSYS;
+
+ /* DONTCOPY is essential for Xen because copy_page_range doesn't know
+ * how to recreate these mappings */
+ vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP;
+ vma->vm_ops = &privcmd_vm_ops;
+ vma->vm_private_data = NULL;
+
+ return 0;
+}
+
+static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
+{
+ return (xchg(&vma->vm_private_data, (void *)1) == NULL);
+}
+#endif
+
+const struct file_operations xen_privcmd_fops = {
+ .owner = THIS_MODULE,
+ .unlocked_ioctl = privcmd_ioctl,
+ .mmap = privcmd_mmap,
+};
+EXPORT_SYMBOL_GPL(xen_privcmd_fops);
+
+static struct miscdevice privcmd_dev = {
+ .minor = MISC_DYNAMIC_MINOR,
+ .name = "xen/privcmd",
+ .fops = &xen_privcmd_fops,
+};
+
+static int __init privcmd_init(void)
+{
+ int err;
+
+ if (!xen_domain())
+ return -ENODEV;
+
+ err = misc_register(&privcmd_dev);
+ if (err != 0) {
+ printk(KERN_ERR "Could not register Xen privcmd device\n");
+ return err;
+ }
+ return 0;
+}
+
+static void __exit privcmd_exit(void)
+{
+ misc_deregister(&privcmd_dev);
+}
+
+module_init(privcmd_init);
+module_exit(privcmd_exit);
diff --git a/drivers/xen/privcmd.h b/drivers/xen/privcmd.h
new file mode 100644
index 0000000..14facae
--- /dev/null
+++ b/drivers/xen/privcmd.h
@@ -0,0 +1,3 @@
+#include <linux/fs.h>
+
+extern const struct file_operations xen_privcmd_fops;
diff --git a/drivers/xen/xenfs/Makefile b/drivers/xen/xenfs/Makefile
index 4fde944..5d45ff1 100644
--- a/drivers/xen/xenfs/Makefile
+++ b/drivers/xen/xenfs/Makefile
@@ -1,4 +1,4 @@
obj-$(CONFIG_XENFS) += xenfs.o
-xenfs-y = super.o xenbus.o privcmd.o
+xenfs-y = super.o xenbus.o
xenfs-$(CONFIG_XEN_DOM0) += xenstored.o
diff --git a/drivers/xen/xenfs/privcmd.c b/drivers/xen/xenfs/privcmd.c
deleted file mode 100644
index dbd3b16..0000000
--- a/drivers/xen/xenfs/privcmd.c
+++ /dev/null
@@ -1,400 +0,0 @@
-/******************************************************************************
- * privcmd.c
- *
- * Interface to privileged domain-0 commands.
- *
- * Copyright (c) 2002-2004, K A Fraser, B Dragovic
- */
-
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/mm.h>
-#include <linux/mman.h>
-#include <linux/uaccess.h>
-#include <linux/swap.h>
-#include <linux/highmem.h>
-#include <linux/pagemap.h>
-#include <linux/seq_file.h>
-
-#include <asm/pgalloc.h>
-#include <asm/pgtable.h>
-#include <asm/tlb.h>
-#include <asm/xen/hypervisor.h>
-#include <asm/xen/hypercall.h>
-
-#include <xen/xen.h>
-#include <xen/privcmd.h>
-#include <xen/interface/xen.h>
-#include <xen/features.h>
-#include <xen/page.h>
-#include <xen/xen-ops.h>
-
-#ifndef HAVE_ARCH_PRIVCMD_MMAP
-static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma);
-#endif
-
-static long privcmd_ioctl_hypercall(void __user *udata)
-{
- struct privcmd_hypercall hypercall;
- long ret;
-
- if (copy_from_user(&hypercall, udata, sizeof(hypercall)))
- return -EFAULT;
-
- ret = privcmd_call(hypercall.op,
- hypercall.arg[0], hypercall.arg[1],
- hypercall.arg[2], hypercall.arg[3],
- hypercall.arg[4]);
-
- return ret;
-}
-
-static void free_page_list(struct list_head *pages)
-{
- struct page *p, *n;
-
- list_for_each_entry_safe(p, n, pages, lru)
- __free_page(p);
-
- INIT_LIST_HEAD(pages);
-}
-
-/*
- * Given an array of items in userspace, return a list of pages
- * containing the data. If copying fails, either because of memory
- * allocation failure or a problem reading user memory, return an
- * error code; its up to the caller to dispose of any partial list.
- */
-static int gather_array(struct list_head *pagelist,
- unsigned nelem, size_t size,
- void __user *data)
-{
- unsigned pageidx;
- void *pagedata;
- int ret;
-
- if (size > PAGE_SIZE)
- return 0;
-
- pageidx = PAGE_SIZE;
- pagedata = NULL; /* quiet, gcc */
- while (nelem--) {
- if (pageidx > PAGE_SIZE-size) {
- struct page *page = alloc_page(GFP_KERNEL);
-
- ret = -ENOMEM;
- if (page == NULL)
- goto fail;
-
- pagedata = page_address(page);
-
- list_add_tail(&page->lru, pagelist);
- pageidx = 0;
- }
-
- ret = -EFAULT;
- if (copy_from_user(pagedata + pageidx, data, size))
- goto fail;
-
- data += size;
- pageidx += size;
- }
-
- ret = 0;
-
-fail:
- return ret;
-}
-
-/*
- * Call function "fn" on each element of the array fragmented
- * over a list of pages.
- */
-static int traverse_pages(unsigned nelem, size_t size,
- struct list_head *pos,
- int (*fn)(void *data, void *state),
- void *state)
-{
- void *pagedata;
- unsigned pageidx;
- int ret = 0;
-
- BUG_ON(size > PAGE_SIZE);
-
- pageidx = PAGE_SIZE;
- pagedata = NULL; /* hush, gcc */
-
- while (nelem--) {
- if (pageidx > PAGE_SIZE-size) {
- struct page *page;
- pos = pos->next;
- page = list_entry(pos, struct page, lru);
- pagedata = page_address(page);
- pageidx = 0;
- }
-
- ret = (*fn)(pagedata + pageidx, state);
- if (ret)
- break;
- pageidx += size;
- }
-
- return ret;
-}
-
-struct mmap_mfn_state {
- unsigned long va;
- struct vm_area_struct *vma;
- domid_t domain;
-};
-
-static int mmap_mfn_range(void *data, void *state)
-{
- struct privcmd_mmap_entry *msg = data;
- struct mmap_mfn_state *st = state;
- struct vm_area_struct *vma = st->vma;
- int rc;
-
- /* Do not allow range to wrap the address space. */
- if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) ||
- ((unsigned long)(msg->npages << PAGE_SHIFT) >= -st->va))
- return -EINVAL;
-
- /* Range chunks must be contiguous in va space. */
- if ((msg->va != st->va) ||
- ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end))
- return -EINVAL;
-
- rc = xen_remap_domain_mfn_range(vma,
- msg->va & PAGE_MASK,
- msg->mfn, msg->npages,
- vma->vm_page_prot,
- st->domain);
- if (rc < 0)
- return rc;
-
- st->va += msg->npages << PAGE_SHIFT;
-
- return 0;
-}
-
-static long privcmd_ioctl_mmap(void __user *udata)
-{
- struct privcmd_mmap mmapcmd;
- struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
- int rc;
- LIST_HEAD(pagelist);
- struct mmap_mfn_state state;
-
- if (!xen_initial_domain())
- return -EPERM;
-
- if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
- return -EFAULT;
-
- rc = gather_array(&pagelist,
- mmapcmd.num, sizeof(struct privcmd_mmap_entry),
- mmapcmd.entry);
-
- if (rc || list_empty(&pagelist))
- goto out;
-
- down_write(&mm->mmap_sem);
-
- {
- struct page *page = list_first_entry(&pagelist,
- struct page, lru);
- struct privcmd_mmap_entry *msg = page_address(page);
-
- vma = find_vma(mm, msg->va);
- rc = -EINVAL;
-
- if (!vma || (msg->va != vma->vm_start) ||
- !privcmd_enforce_singleshot_mapping(vma))
- goto out_up;
- }
-
- state.va = vma->vm_start;
- state.vma = vma;
- state.domain = mmapcmd.dom;
-
- rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry),
- &pagelist,
- mmap_mfn_range, &state);
-
-
-out_up:
- up_write(&mm->mmap_sem);
-
-out:
- free_page_list(&pagelist);
-
- return rc;
-}
-
-struct mmap_batch_state {
- domid_t domain;
- unsigned long va;
- struct vm_area_struct *vma;
- int err;
-
- xen_pfn_t __user *user;
-};
-
-static int mmap_batch_fn(void *data, void *state)
-{
- xen_pfn_t *mfnp = data;
- struct mmap_batch_state *st = state;
-
- if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
- st->vma->vm_page_prot, st->domain) < 0) {
- *mfnp |= 0xf0000000U;
- st->err++;
- }
- st->va += PAGE_SIZE;
-
- return 0;
-}
-
-static int mmap_return_errors(void *data, void *state)
-{
- xen_pfn_t *mfnp = data;
- struct mmap_batch_state *st = state;
-
- return put_user(*mfnp, st->user++);
-}
-
-static struct vm_operations_struct privcmd_vm_ops;
-
-static long privcmd_ioctl_mmap_batch(void __user *udata)
-{
- int ret;
- struct privcmd_mmapbatch m;
- struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
- unsigned long nr_pages;
- LIST_HEAD(pagelist);
- struct mmap_batch_state state;
-
- if (!xen_initial_domain())
- return -EPERM;
-
- if (copy_from_user(&m, udata, sizeof(m)))
- return -EFAULT;
-
- nr_pages = m.num;
- if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
- return -EINVAL;
-
- ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
- m.arr);
-
- if (ret || list_empty(&pagelist))
- goto out;
-
- down_write(&mm->mmap_sem);
-
- vma = find_vma(mm, m.addr);
- ret = -EINVAL;
- if (!vma ||
- vma->vm_ops != &privcmd_vm_ops ||
- (m.addr != vma->vm_start) ||
- ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) ||
- !privcmd_enforce_singleshot_mapping(vma)) {
- up_write(&mm->mmap_sem);
- goto out;
- }
-
- state.domain = m.dom;
- state.vma = vma;
- state.va = m.addr;
- state.err = 0;
-
- ret = traverse_pages(m.num, sizeof(xen_pfn_t),
- &pagelist, mmap_batch_fn, &state);
-
- up_write(&mm->mmap_sem);
-
- if (state.err > 0) {
- state.user = m.arr;
- ret = traverse_pages(m.num, sizeof(xen_pfn_t),
- &pagelist,
- mmap_return_errors, &state);
- }
-
-out:
- free_page_list(&pagelist);
-
- return ret;
-}
-
-static long privcmd_ioctl(struct file *file,
- unsigned int cmd, unsigned long data)
-{
- int ret = -ENOSYS;
- void __user *udata = (void __user *) data;
-
- switch (cmd) {
- case IOCTL_PRIVCMD_HYPERCALL:
- ret = privcmd_ioctl_hypercall(udata);
- break;
-
- case IOCTL_PRIVCMD_MMAP:
- ret = privcmd_ioctl_mmap(udata);
- break;
-
- case IOCTL_PRIVCMD_MMAPBATCH:
- ret = privcmd_ioctl_mmap_batch(udata);
- break;
-
- default:
- ret = -EINVAL;
- break;
- }
-
- return ret;
-}
-
-#ifndef HAVE_ARCH_PRIVCMD_MMAP
-static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
- printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n",
- vma, vma->vm_start, vma->vm_end,
- vmf->pgoff, vmf->virtual_address);
-
- return VM_FAULT_SIGBUS;
-}
-
-static struct vm_operations_struct privcmd_vm_ops = {
- .fault = privcmd_fault
-};
-
-static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
-{
- /* Unsupported for auto-translate guests. */
- if (xen_feature(XENFEAT_auto_translated_physmap))
- return -ENOSYS;
-
- /* DONTCOPY is essential for Xen because copy_page_range doesn't know
- * how to recreate these mappings */
- vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP;
- vma->vm_ops = &privcmd_vm_ops;
- vma->vm_private_data = NULL;
-
- return 0;
-}
-
-static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
-{
- return (xchg(&vma->vm_private_data, (void *)1) == NULL);
-}
-#endif
-
-const struct file_operations privcmd_file_ops = {
- .unlocked_ioctl = privcmd_ioctl,
- .mmap = privcmd_mmap,
-};
diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c
index 1aa3897..a55fbf9 100644
--- a/drivers/xen/xenfs/super.c
+++ b/drivers/xen/xenfs/super.c
@@ -16,6 +16,7 @@
#include <xen/xen.h>
#include "xenfs.h"
+#include "../privcmd.h"
#include <asm/xen/hypervisor.h>
@@ -84,7 +85,7 @@ static int xenfs_fill_super(struct super_block *sb, void *data, int silent)
[1] = {},
{ "xenbus", &xenbus_file_ops, S_IRUSR|S_IWUSR },
{ "capabilities", &capabilities_file_ops, S_IRUGO },
- { "privcmd", &privcmd_file_ops, S_IRUSR|S_IWUSR },
+ { "privcmd", &xen_privcmd_fops, S_IRUSR|S_IWUSR },
{""},
};
int rc;
diff --git a/drivers/xen/xenfs/xenfs.h b/drivers/xen/xenfs/xenfs.h
index b68aa62..5056306 100644
--- a/drivers/xen/xenfs/xenfs.h
+++ b/drivers/xen/xenfs/xenfs.h
@@ -2,7 +2,6 @@
#define _XENFS_XENBUS_H
extern const struct file_operations xenbus_file_ops;
-extern const struct file_operations privcmd_file_ops;
extern const struct file_operations xsd_kva_file_ops;
extern const struct file_operations xsd_port_file_ops;
--
1.7.6.4

View File

@ -1,177 +0,0 @@
From 8fd04efb7e41da12d85ad382b7c7092fe832bebb Mon Sep 17 00:00:00 2001
From: Tang Liang <liang.tang@oracle.com>
Date: Fri, 9 Dec 2011 10:05:54 +0800
Subject: x86, acpi, tboot: Have a ACPI os prepare sleep instead of calling
tboot_sleep.
The ACPI suspend path makes a call to tboot_sleep right before
it writes the PM1A, PM1B values. We replace the direct call to
tboot via an registration callback similar to __acpi_register_gsi.
CC: Thomas Gleixner <tglx@linutronix.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: x86@kernel.org
CC: Len Brown <len.brown@intel.com>
Acked-by: Joseph Cihula <joseph.cihula@intel.com>
CC: Shane Wang <shane.wang@intel.com>
CC: xen-devel@lists.xensource.com
CC: linux-pm@lists.linux-foundation.org
CC: tboot-devel@lists.sourceforge.net
CC: linux-acpi@vger.kernel.org
[v1: Added __attribute__ ((unused))]
[v2: Introduced a wrapper instead of changing tboot_sleep return values]
[v3: Added return value AE_CTRL_SKIP for acpi_os_sleep_prepare]
Signed-off-by: Tang Liang <liang.tang@oracle.com>
[v1: Fix compile issues on IA64 and PPC64]
[v2: Fix where __acpi_os_prepare_sleep==NULL and did not go in sleep properly]
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
arch/x86/kernel/tboot.c | 8 ++++++++
drivers/acpi/acpica/hwsleep.c | 10 +++++++---
drivers/acpi/osl.c | 24 ++++++++++++++++++++++++
include/acpi/acexcep.h | 1 +
include/linux/acpi.h | 10 ++++++++++
include/linux/tboot.h | 1 -
6 files changed, 50 insertions(+), 4 deletions(-)
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index e2410e2..1a4ab7d 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -297,6 +297,12 @@ void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control)
tboot_shutdown(acpi_shutdown_map[sleep_state]);
}
+static int tboot_sleep_wrapper(u8 sleep_state, u32 pm1a_control,
+ u32 pm1b_control)
+{
+ tboot_sleep(sleep_state, pm1a_control, pm1b_control);
+ return 0;
+}
static atomic_t ap_wfs_count;
@@ -345,6 +351,8 @@ static __init int tboot_late_init(void)
atomic_set(&ap_wfs_count, 0);
register_hotcpu_notifier(&tboot_cpu_notifier);
+
+ acpi_os_set_prepare_sleep(&tboot_sleep_wrapper);
return 0;
}
diff --git a/drivers/acpi/acpica/hwsleep.c b/drivers/acpi/acpica/hwsleep.c
index d52da30..992359a 100644
--- a/drivers/acpi/acpica/hwsleep.c
+++ b/drivers/acpi/acpica/hwsleep.c
@@ -43,9 +43,9 @@
*/
#include <acpi/acpi.h>
+#include <linux/acpi.h>
#include "accommon.h"
#include "actables.h"
-#include <linux/tboot.h>
#include <linux/module.h>
#define _COMPONENT ACPI_HARDWARE
@@ -344,8 +344,12 @@ acpi_status asmlinkage acpi_enter_sleep_state(u8 sleep_state)
ACPI_FLUSH_CPU_CACHE();
- tboot_sleep(sleep_state, pm1a_control, pm1b_control);
-
+ status = acpi_os_prepare_sleep(sleep_state, pm1a_control,
+ pm1b_control);
+ if (ACPI_SKIP(status))
+ return_ACPI_STATUS(AE_OK);
+ if (ACPI_FAILURE(status))
+ return_ACPI_STATUS(status);
/* Write #2: Write both SLP_TYP + SLP_EN */
status = acpi_hw_write_pm1_control(pm1a_control, pm1b_control);
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index f31c5c5..f3aae4b 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -76,6 +76,9 @@ EXPORT_SYMBOL(acpi_in_debugger);
extern char line_buf[80];
#endif /*ENABLE_DEBUGGER */
+static int (*__acpi_os_prepare_sleep)(u8 sleep_state, u32 pm1a_ctrl,
+ u32 pm1b_ctrl);
+
static acpi_osd_handler acpi_irq_handler;
static void *acpi_irq_context;
static struct workqueue_struct *kacpid_wq;
@@ -1659,3 +1662,24 @@ acpi_status acpi_os_terminate(void)
return AE_OK;
}
+
+acpi_status acpi_os_prepare_sleep(u8 sleep_state, u32 pm1a_control,
+ u32 pm1b_control)
+{
+ int rc = 0;
+ if (__acpi_os_prepare_sleep)
+ rc = __acpi_os_prepare_sleep(sleep_state,
+ pm1a_control, pm1b_control);
+ if (rc < 0)
+ return AE_ERROR;
+ else if (rc > 0)
+ return AE_CTRL_SKIP;
+
+ return AE_OK;
+}
+
+void acpi_os_set_prepare_sleep(int (*func)(u8 sleep_state,
+ u32 pm1a_ctrl, u32 pm1b_ctrl))
+{
+ __acpi_os_prepare_sleep = func;
+}
diff --git a/include/acpi/acexcep.h b/include/acpi/acexcep.h
index 5b6c391..fa0d22c 100644
--- a/include/acpi/acexcep.h
+++ b/include/acpi/acexcep.h
@@ -57,6 +57,7 @@
#define ACPI_SUCCESS(a) (!(a))
#define ACPI_FAILURE(a) (a)
+#define ACPI_SKIP(a) (a == AE_CTRL_SKIP)
#define AE_OK (acpi_status) 0x0000
/*
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 627a3a4..9393f73 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -363,4 +363,14 @@ static inline int suspend_nvs_register(unsigned long a, unsigned long b)
}
#endif
+#ifdef CONFIG_ACPI
+void acpi_os_set_prepare_sleep(int (*func)(u8 sleep_state,
+ u32 pm1a_ctrl, u32 pm1b_ctrl));
+
+acpi_status acpi_os_prepare_sleep(u8 sleep_state,
+ u32 pm1a_control, u32 pm1b_control);
+#else
+#define acpi_os_set_prepare_sleep(func, pm1a_ctrl, pm1b_ctrl) do { } while (0)
+#endif
+
#endif /*_LINUX_ACPI_H*/
diff --git a/include/linux/tboot.h b/include/linux/tboot.h
index 1dba6ee..c75128b 100644
--- a/include/linux/tboot.h
+++ b/include/linux/tboot.h
@@ -143,7 +143,6 @@ static inline int tboot_enabled(void)
extern void tboot_probe(void);
extern void tboot_shutdown(u32 shutdown_type);
-extern void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control);
extern struct acpi_table_header *tboot_get_dmar_table(
struct acpi_table_header *dmar_tbl);
extern int tboot_force_iommu(void);
--
1.7.6.4

View File

@ -1,67 +0,0 @@
From 6f327383cd7ebef1fcc092e2d759ceb9d90dfb36 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Thu, 8 Dec 2011 17:14:08 +0800
Subject: tboot: Add return values for tboot_sleep
.. as appropiately. As tboot_sleep now returns values.
remove tboot_sleep_wrapper.
Suggested-by: "Rafael J. Wysocki" <rjw@sisk.pl>
Acked-by: Joseph Cihula <joseph.cihula@intel.com>
[v1: Return -1/0/+1 instead of ACPI_xx values]
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
arch/x86/kernel/tboot.c | 13 ++++---------
1 files changed, 4 insertions(+), 9 deletions(-)
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index 1a4ab7d..6410744 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -272,7 +272,7 @@ static void tboot_copy_fadt(const struct acpi_table_fadt *fadt)
offsetof(struct acpi_table_facs, firmware_waking_vector);
}
-void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control)
+static int tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control)
{
static u32 acpi_shutdown_map[ACPI_S_STATE_COUNT] = {
/* S0,1,2: */ -1, -1, -1,
@@ -281,7 +281,7 @@ void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control)
/* S5: */ TB_SHUTDOWN_S5 };
if (!tboot_enabled())
- return;
+ return 0;
tboot_copy_fadt(&acpi_gbl_FADT);
tboot->acpi_sinfo.pm1a_cnt_val = pm1a_control;
@@ -292,15 +292,10 @@ void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control)
if (sleep_state >= ACPI_S_STATE_COUNT ||
acpi_shutdown_map[sleep_state] == -1) {
pr_warning("unsupported sleep state 0x%x\n", sleep_state);
- return;
+ return -1;
}
tboot_shutdown(acpi_shutdown_map[sleep_state]);
-}
-static int tboot_sleep_wrapper(u8 sleep_state, u32 pm1a_control,
- u32 pm1b_control)
-{
- tboot_sleep(sleep_state, pm1a_control, pm1b_control);
return 0;
}
@@ -352,7 +347,7 @@ static __init int tboot_late_init(void)
atomic_set(&ap_wfs_count, 0);
register_hotcpu_notifier(&tboot_cpu_notifier);
- acpi_os_set_prepare_sleep(&tboot_sleep_wrapper);
+ acpi_os_set_prepare_sleep(&tboot_sleep);
return 0;
}
--
1.7.6.4

View File

@ -1,112 +0,0 @@
From b2ed886e43ec90bae86d6cae6582b457e76d1fd8 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Thu, 8 Dec 2011 17:16:43 +0800
Subject: x86/acpi/sleep: Provide registration for acpi_suspend_lowlevel.
Which by default will be x86_acpi_suspend_lowlevel.
This registration allows us to register another callback
if there is a need to use another platform specific callback.
CC: Thomas Gleixner <tglx@linutronix.de>
CC: "H. Peter Anvin" <hpa@zytor.com>
CC: x86@kernel.org
CC: Len Brown <len.brown@intel.com>
CC: Joseph Cihula <joseph.cihula@intel.com>
CC: Shane Wang <shane.wang@intel.com>
CC: linux-pm@lists.linux-foundation.org
CC: linux-acpi@vger.kernel.org
CC: Len Brown <len.brown@intel.com>
Signed-off-by: Liang Tang <liang.tang@oracle.com>
[v1: Fix when CONFIG_ACPI_SLEEP is not set]
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
arch/x86/include/asm/acpi.h | 2 +-
arch/x86/kernel/acpi/boot.c | 7 +++++++
arch/x86/kernel/acpi/sleep.c | 4 ++--
arch/x86/kernel/acpi/sleep.h | 2 ++
drivers/acpi/sleep.c | 2 ++
5 files changed, 14 insertions(+), 3 deletions(-)
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 610001d..68cf060 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -115,7 +115,7 @@ static inline void acpi_disable_pci(void)
}
/* Low-level suspend routine. */
-extern int acpi_suspend_lowlevel(void);
+extern int (*acpi_suspend_lowlevel)(void);
extern const unsigned char acpi_wakeup_code[];
#define acpi_wakeup_address (__pa(TRAMPOLINE_SYM(acpi_wakeup_code)))
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index ce664f3..c3a5b95 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -44,6 +44,7 @@
#include <asm/mpspec.h>
#include <asm/smp.h>
+#include "sleep.h" /* To include x86_acpi_suspend_lowlevel */
static int __initdata acpi_force = 0;
u32 acpi_rsdt_forced;
int acpi_disabled;
@@ -558,6 +559,12 @@ static int acpi_register_gsi_ioapic(struct device *dev, u32 gsi,
int (*__acpi_register_gsi)(struct device *dev, u32 gsi,
int trigger, int polarity) = acpi_register_gsi_pic;
+#ifdef CONFIG_ACPI_SLEEP
+int (*acpi_suspend_lowlevel)(void) = x86_acpi_suspend_lowlevel;
+#else
+int (*acpi_suspend_lowlevel)(void);
+#endif
+
/*
* success: return IRQ number (>=0)
* failure: return < 0
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 103b6ab..4d2d0b1 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -25,12 +25,12 @@ static char temp_stack[4096];
#endif
/**
- * acpi_suspend_lowlevel - save kernel state
+ * x86_acpi_suspend_lowlevel - save kernel state
*
* Create an identity mapped page table and copy the wakeup routine to
* low memory.
*/
-int acpi_suspend_lowlevel(void)
+int x86_acpi_suspend_lowlevel(void)
{
struct wakeup_header *header;
/* address in low memory of the wakeup routine. */
diff --git a/arch/x86/kernel/acpi/sleep.h b/arch/x86/kernel/acpi/sleep.h
index 416d4be..4d3feb5 100644
--- a/arch/x86/kernel/acpi/sleep.h
+++ b/arch/x86/kernel/acpi/sleep.h
@@ -13,3 +13,5 @@ extern unsigned long acpi_copy_wakeup_routine(unsigned long);
extern void wakeup_long64(void);
extern void do_suspend_lowlevel(void);
+
+extern int x86_acpi_suspend_lowlevel(void);
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 0a7ed69..44dbdde 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -254,6 +254,8 @@ static int acpi_suspend_enter(suspend_state_t pm_state)
break;
case ACPI_STATE_S3:
+ if (!acpi_suspend_lowlevel)
+ return -ENOSYS;
error = acpi_suspend_lowlevel();
if (error)
return error;
--
1.7.6.4

View File

@ -1,197 +0,0 @@
From 9b10575276a220543b8791f2cb8268fbd4a0bc2e Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Thu, 8 Dec 2011 17:32:23 +0800
Subject: xen/acpi/sleep: Enable ACPI sleep via the __acpi_os_prepare_sleep
Provide the registration callback to call in the Xen's
ACPI sleep functionality. This means that during S3/S5
we make a hypercall XENPF_enter_acpi_sleep with the
proper PM1A/PM1B registers.
Based of Ke Yu's <ke.yu@intel.com> initial idea.
[ From http://xenbits.xensource.com/linux-2.6.18-xen.hg
change c68699484a65 ]
[v1: Added Copyright and license]
[v2: Added check if PM1A/B the 16-bits MSB contain something. The spec
only uses 16-bits but might have more in future]
Signed-off-by: Liang Tang <liang.tang@oracle.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
arch/x86/xen/enlighten.c | 3 ++
drivers/xen/Makefile | 2 +-
drivers/xen/acpi.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++
include/xen/acpi.h | 58 +++++++++++++++++++++++++++++++++++++++++++
4 files changed, 124 insertions(+), 1 deletions(-)
create mode 100644 drivers/xen/acpi.c
create mode 100644 include/xen/acpi.h
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 12eb07b..a5277c2 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -42,6 +42,7 @@
#include <xen/page.h>
#include <xen/hvm.h>
#include <xen/hvc-console.h>
+#include <xen/acpi.h>
#include <asm/paravirt.h>
#include <asm/apic.h>
@@ -1275,6 +1276,8 @@ asmlinkage void __init xen_start_kernel(void)
/* Make sure ACS will be enabled */
pci_request_acs();
+
+ xen_acpi_sleep_register();
}
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index aa31337..77a845f 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -17,7 +17,7 @@ obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o
obj-$(CONFIG_XEN_PVHVM) += platform-pci.o
obj-$(CONFIG_XEN_TMEM) += tmem.o
obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o
-obj-$(CONFIG_XEN_DOM0) += pci.o
+obj-$(CONFIG_XEN_DOM0) += pci.o acpi.o
obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/
obj-$(CONFIG_XEN_PRIVCMD) += xen-privcmd.o
diff --git a/drivers/xen/acpi.c b/drivers/xen/acpi.c
new file mode 100644
index 0000000..119d42a
--- /dev/null
+++ b/drivers/xen/acpi.c
@@ -0,0 +1,62 @@
+/******************************************************************************
+ * acpi.c
+ * acpi file for domain 0 kernel
+ *
+ * Copyright (c) 2011 Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+ * Copyright (c) 2011 Yu Ke ke.yu@intel.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <xen/acpi.h>
+#include <xen/interface/platform.h>
+#include <asm/xen/hypercall.h>
+#include <asm/xen/hypervisor.h>
+
+int xen_acpi_notify_hypervisor_state(u8 sleep_state,
+ u32 pm1a_cnt, u32 pm1b_cnt)
+{
+ struct xen_platform_op op = {
+ .cmd = XENPF_enter_acpi_sleep,
+ .interface_version = XENPF_INTERFACE_VERSION,
+ .u = {
+ .enter_acpi_sleep = {
+ .pm1a_cnt_val = (u16)pm1a_cnt,
+ .pm1b_cnt_val = (u16)pm1b_cnt,
+ .sleep_state = sleep_state,
+ },
+ },
+ };
+
+ if ((pm1a_cnt & 0xffff0000) || (pm1b_cnt & 0xffff0000)) {
+ WARN(1, "Using more than 16bits of PM1A/B 0x%x/0x%x!"
+ "Email xen-devel@lists.xensource.com Thank you.\n", \
+ pm1a_cnt, pm1b_cnt);
+ return -1;
+ }
+
+ HYPERVISOR_dom0_op(&op);
+ return 1;
+}
diff --git a/include/xen/acpi.h b/include/xen/acpi.h
new file mode 100644
index 0000000..48a9c01
--- /dev/null
+++ b/include/xen/acpi.h
@@ -0,0 +1,58 @@
+/******************************************************************************
+ * acpi.h
+ * acpi file for domain 0 kernel
+ *
+ * Copyright (c) 2011 Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+ * Copyright (c) 2011 Yu Ke <ke.yu@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef _XEN_ACPI_H
+#define _XEN_ACPI_H
+
+#include <linux/types.h>
+
+#ifdef CONFIG_XEN_DOM0
+#include <asm/xen/hypervisor.h>
+#include <xen/xen.h>
+#include <linux/acpi.h>
+
+int xen_acpi_notify_hypervisor_state(u8 sleep_state,
+ u32 pm1a_cnt, u32 pm1b_cnd);
+
+static inline void xen_acpi_sleep_register(void)
+{
+ if (xen_initial_domain())
+ acpi_os_set_prepare_sleep(
+ &xen_acpi_notify_hypervisor_state);
+}
+#else
+static inline void xen_acpi_sleep_register(void)
+{
+}
+#endif
+
+#endif /* _XEN_ACPI_H */
--
1.7.6.4

View File

@ -1,53 +0,0 @@
From 5000cd48f33e3e4d31cdeda0751188794f8bebf4 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Thu, 8 Dec 2011 17:34:58 +0800
Subject: xen/acpi/sleep: Register to the acpi_suspend_lowlevel a callback.
We piggyback on "x86/acpi: Provide registration for acpi_suspend_lowlevel."
to register a Xen version of the callback. The callback does not
do anything special - except it omits the x86_acpi_suspend_lowlevel.
It does that b/c during suspend it tries to save cr8 values (which
the hypervisor does not support), and then on resume path the
cr3, cr8, idt, and gdt are all resumed which clashes with what
the hypervisor has set up for the guest.
Signed-off-by: Liang Tang <liang.tang@oracle.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
include/xen/acpi.h | 16 +++++++++++++++-
1 files changed, 15 insertions(+), 1 deletions(-)
diff --git a/include/xen/acpi.h b/include/xen/acpi.h
index 48a9c01..ebaabbb 100644
--- a/include/xen/acpi.h
+++ b/include/xen/acpi.h
@@ -43,11 +43,25 @@
int xen_acpi_notify_hypervisor_state(u8 sleep_state,
u32 pm1a_cnt, u32 pm1b_cnd);
+static inline int xen_acpi_suspend_lowlevel(void)
+{
+ /*
+ * Xen will save and restore CPU context, so
+ * we can skip that and just go straight to
+ * the suspend.
+ */
+ acpi_enter_sleep_state(ACPI_STATE_S3);
+ return 0;
+}
+
static inline void xen_acpi_sleep_register(void)
{
- if (xen_initial_domain())
+ if (xen_initial_domain()) {
acpi_os_set_prepare_sleep(
&xen_acpi_notify_hypervisor_state);
+
+ acpi_suspend_lowlevel = xen_acpi_suspend_lowlevel;
+ }
}
#else
static inline void xen_acpi_sleep_register(void)
--
1.7.6.4

View File

@ -1,81 +0,0 @@
From 86ceafdf50d67bcb2a5196122797a6972bedd279 Mon Sep 17 00:00:00 2001
From: Tang Liang <liang.tang@oracle.com>
Date: Thu, 8 Dec 2011 17:36:39 +0800
Subject: xen: Utilize the restore_msi_irqs hook.
to make a hypercall to restore the vectors in the MSI/MSI-X
configuration space.
Signed-off-by: Tang Liang <liang.tang@oracle.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
arch/x86/pci/xen.c | 27 +++++++++++++++++++++++++++
include/xen/interface/physdev.h | 7 +++++++
2 files changed, 34 insertions(+), 0 deletions(-)
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 492ade8..249a5ae 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -324,6 +324,32 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
out:
return ret;
}
+
+static void xen_initdom_restore_msi_irqs(struct pci_dev *dev, int irq)
+{
+ int ret = 0;
+
+ if (pci_seg_supported) {
+ struct physdev_pci_device restore_ext;
+
+ restore_ext.seg = pci_domain_nr(dev->bus);
+ restore_ext.bus = dev->bus->number;
+ restore_ext.devfn = dev->devfn;
+ ret = HYPERVISOR_physdev_op(PHYSDEVOP_restore_msi_ext,
+ &restore_ext);
+ if (ret == -ENOSYS)
+ pci_seg_supported = false;
+ WARN(ret && ret != -ENOSYS, "restore_msi_ext -> %d\n", ret);
+ }
+ if (!pci_seg_supported) {
+ struct physdev_restore_msi restore;
+
+ restore.bus = dev->bus->number;
+ restore.devfn = dev->devfn;
+ ret = HYPERVISOR_physdev_op(PHYSDEVOP_restore_msi, &restore);
+ WARN(ret && ret != -ENOSYS, "restore_msi -> %d\n", ret);
+ }
+}
#endif
static void xen_teardown_msi_irqs(struct pci_dev *dev)
@@ -446,6 +472,7 @@ int __init pci_xen_initial_domain(void)
#ifdef CONFIG_PCI_MSI
x86_msi.setup_msi_irqs = xen_initdom_setup_msi_irqs;
x86_msi.teardown_msi_irq = xen_teardown_msi_irq;
+ x86_msi.restore_msi_irqs = xen_initdom_restore_msi_irqs;
#endif
xen_setup_acpi_sci();
__acpi_register_gsi = acpi_register_gsi_xen;
diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h
index c1080d9..0c28989 100644
--- a/include/xen/interface/physdev.h
+++ b/include/xen/interface/physdev.h
@@ -145,6 +145,13 @@ struct physdev_manage_pci {
uint8_t devfn;
};
+#define PHYSDEVOP_restore_msi 19
+struct physdev_restore_msi {
+ /* IN */
+ uint8_t bus;
+ uint8_t devfn;
+};
+
#define PHYSDEVOP_manage_pci_add_ext 20
struct physdev_manage_pci_ext {
/* IN */
--
1.7.6.4

View File

@ -1,31 +0,0 @@
From cfb37553f53f993c22aad05c219581dfbc726bcc Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Mon, 23 Jan 2012 10:53:57 -0500
Subject: xen/setup/pm/acpi: Remove the call to boot_option_idle_override.
We needed that call in the past to force the kernel to use
default_idle (which called safe_halt, which called xen_safe_halt).
But set_pm_idle_to_default() does now that, so there is no need
to use this boot option operand.
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
arch/x86/xen/setup.c | 1 -
1 files changed, 0 insertions(+), 1 deletions(-)
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index e03c636..1236623 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -420,7 +420,6 @@ void __init xen_arch_setup(void)
boot_cpu_data.hlt_works_ok = 1;
#endif
disable_cpuidle();
- boot_option_idle_override = IDLE_HALT;
WARN_ON(set_pm_idle_to_default());
fiddle_vdso();
}
--
1.7.6.4

View File

@ -1,216 +0,0 @@
From d281ee8c6d58a7f5d1f4241238daa315fb959e31 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Mon, 13 Feb 2012 22:26:32 -0500
Subject: xen/enlighten: Expose MWAIT and MWAIT_LEAF if hypervisor OKs it.
For the hypervisor to take advantage of the MWAIT support it needs
to extract from the ACPI _CST the register address. But the
hypervisor does not have the support to parse DSDT so it relies on
the initial domain (dom0) to parse the ACPI Power Management information
and push it up to the hypervisor. The pushing of the data is done
by the processor_harveset_xen module which parses the information that
the ACPI parser has graciously exposed in 'struct acpi_processor'.
For the ACPI parser to also expose the Cx states for MWAIT, we need
to expose the MWAIT capability (leaf 1). Furthermore we also need to
expose the MWAIT_LEAF capability (leaf 5) for cstate.c to properly
function.
The hypervisor could expose these flags when it traps the XEN_EMULATE_PREFIX
operations, but it can't do it since it needs to be backwards compatible.
Instead we choose to use the native CPUID to figure out if the MWAIT
capability exists and use the XEN_SET_PDC query hypercall to figure out
if the hypervisor wants us to expose the MWAIT_LEAF capability or not.
Note: The XEN_SET_PDC query was implemented in c/s 23783:
"ACPI: add _PDC input override mechanism".
With this in place, instead of
C3 ACPI IOPORT 415
we get now
C3:ACPI FFH INTEL MWAIT 0x20
Note: The cpu_idle which would be calling the mwait variants for idling
never gets set b/c we set the default pm_idle to be the hypercall variant.
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
arch/x86/xen/enlighten.c | 92 +++++++++++++++++++++++++++++++++++++-
include/xen/interface/platform.h | 4 +-
2 files changed, 94 insertions(+), 2 deletions(-)
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 12eb07b..4c82936 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -62,6 +62,14 @@
#include <asm/reboot.h>
#include <asm/stackprotector.h>
#include <asm/hypervisor.h>
+#include <asm/mwait.h>
+
+#ifdef CONFIG_ACPI
+#include <asm/acpi.h>
+#include <acpi/pdc_intel.h>
+#include <acpi/processor.h>
+#include <xen/interface/platform.h>
+#endif
#include "xen-ops.h"
#include "mmu.h"
@@ -200,13 +208,17 @@ static void __init xen_banner(void)
static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0;
static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0;
+static __read_mostly unsigned int cpuid_leaf1_ecx_set_mask;
+static __read_mostly unsigned int cpuid_leaf5_ecx_val;
+static __read_mostly unsigned int cpuid_leaf5_edx_val;
+
static void xen_cpuid(unsigned int *ax, unsigned int *bx,
unsigned int *cx, unsigned int *dx)
{
unsigned maskebx = ~0;
unsigned maskecx = ~0;
unsigned maskedx = ~0;
-
+ unsigned setecx = 0;
/*
* Mask out inconvenient features, to try and disable as many
* unsupported kernel subsystems as possible.
@@ -214,9 +226,18 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
switch (*ax) {
case 1:
maskecx = cpuid_leaf1_ecx_mask;
+ setecx = cpuid_leaf1_ecx_set_mask;
maskedx = cpuid_leaf1_edx_mask;
break;
+ case CPUID_MWAIT_LEAF:
+ /* Synthesize the values.. */
+ *ax = 0;
+ *bx = 0;
+ *cx = cpuid_leaf5_ecx_val;
+ *dx = cpuid_leaf5_edx_val;
+ return;
+
case 0xb:
/* Suppress extended topology stuff */
maskebx = 0;
@@ -232,9 +253,75 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
*bx &= maskebx;
*cx &= maskecx;
+ *cx |= setecx;
*dx &= maskedx;
+
}
+static bool __init xen_check_mwait(void)
+{
+#if CONFIG_ACPI
+ struct xen_platform_op op = {
+ .cmd = XENPF_set_processor_pminfo,
+ .u.set_pminfo.id = -1,
+ .u.set_pminfo.type = XEN_PM_PDC,
+ };
+ uint32_t buf[3];
+ unsigned int ax, bx, cx, dx;
+ unsigned int mwait_mask;
+
+ /* We need to determine whether it is OK to expose the MWAIT
+ * capability to the kernel to harvest deeper than C3 states from ACPI
+ * _CST using the processor_harvest_xen.c module. For this to work, we
+ * need to gather the MWAIT_LEAF values (which the cstate.c code
+ * checks against). The hypervisor won't expose the MWAIT flag because
+ * it would break backwards compatibility; so we will find out directly
+ * from the hardware and hypercall.
+ */
+ if (!xen_initial_domain())
+ return false;
+
+ ax = 1;
+ cx = 0;
+
+ native_cpuid(&ax, &bx, &cx, &dx);
+
+ mwait_mask = (1 << (X86_FEATURE_EST % 32)) |
+ (1 << (X86_FEATURE_MWAIT % 32));
+
+ if ((cx & mwait_mask) != mwait_mask)
+ return false;
+
+ /* We need to emulate the MWAIT_LEAF and for that we need both
+ * ecx and edx. The hypercall provides only partial information.
+ */
+
+ ax = CPUID_MWAIT_LEAF;
+ bx = 0;
+ cx = 0;
+ dx = 0;
+
+ native_cpuid(&ax, &bx, &cx, &dx);
+
+ /* Ask the Hypervisor whether to clear ACPI_PDC_C_C2C3_FFH. If so,
+ * don't expose MWAIT_LEAF and let ACPI pick the IOPORT version of C3.
+ */
+ buf[0] = ACPI_PDC_REVISION_ID;
+ buf[1] = 1;
+ buf[2] = (ACPI_PDC_C_CAPABILITY_SMP | ACPI_PDC_EST_CAPABILITY_SWSMP);
+
+ set_xen_guest_handle(op.u.set_pminfo.pdc, buf);
+
+ if ((HYPERVISOR_dom0_op(&op) == 0) &&
+ (buf[2] & (ACPI_PDC_C_C1_FFH | ACPI_PDC_C_C2C3_FFH))) {
+ cpuid_leaf5_ecx_val = cx;
+ cpuid_leaf5_edx_val = dx;
+ }
+ return true;
+#else
+ return false;
+#endif
+}
static void __init xen_init_cpuid_mask(void)
{
unsigned int ax, bx, cx, dx;
@@ -261,6 +348,9 @@ static void __init xen_init_cpuid_mask(void)
/* Xen will set CR4.OSXSAVE if supported and not disabled by force */
if ((cx & xsave_mask) != xsave_mask)
cpuid_leaf1_ecx_mask &= ~xsave_mask; /* disable XSAVE & OSXSAVE */
+
+ if (xen_check_mwait())
+ cpuid_leaf1_ecx_set_mask = (1 << (X86_FEATURE_MWAIT % 32));
}
static void xen_set_debugreg(int reg, unsigned long val)
diff --git a/include/xen/interface/platform.h b/include/xen/interface/platform.h
index c168468..6220b98 100644
--- a/include/xen/interface/platform.h
+++ b/include/xen/interface/platform.h
@@ -200,7 +200,7 @@ DEFINE_GUEST_HANDLE_STRUCT(xenpf_getidletime_t);
#define XEN_PM_CX 0
#define XEN_PM_PX 1
#define XEN_PM_TX 2
-
+#define XEN_PM_PDC 3
/* Px sub info type */
#define XEN_PX_PCT 1
#define XEN_PX_PSS 2
@@ -286,6 +286,7 @@ struct xen_processor_performance {
};
DEFINE_GUEST_HANDLE_STRUCT(xen_processor_performance);
+DEFINE_GUEST_HANDLE(uint32_t);
struct xenpf_set_processor_pminfo {
/* IN variables */
uint32_t id; /* ACPI CPU ID */
@@ -293,6 +294,7 @@ struct xenpf_set_processor_pminfo {
union {
struct xen_processor_power power;/* Cx: _CST/_CSD */
struct xen_processor_performance perf; /* Px: _PPC/_PCT/_PSS/_PSD */
+ GUEST_HANDLE(uint32_t) pdc;
};
};
DEFINE_GUEST_HANDLE_STRUCT(xenpf_set_processor_pminfo);
--
1.7.6.4

View File

@ -1,529 +0,0 @@
From 20e7a07fa0f8a0dbe30a0f732686d78849d29d96 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Fri, 3 Feb 2012 16:03:20 -0500
Subject: [CPUFREQ] xen: governor for Xen hypervisor frequency scaling.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This CPU freq governor leaves the frequency decision to the Xen hypervisor.
To do that the driver parses the Power Management data and uploads said
information to the Xen hypervisor. Then the Xen hypervisor can select the
proper Cx and Pxx states for the initial domain and all other domains.
To upload the information, this CPU frequency driver reads Power Management (PM)
(_Pxx and _Cx) which are populated in the 'struct acpi_processor' structure.
It simply reads the contents of that structure and pass it up the Xen hypervisor.
For that to work we depend on the appropriate CPU frequency scaling driver
to do the heavy-lifting - so that the contents is correct.
The CPU frequency governor it has been loaded also sets up a timer
to check if the ACPI IDs count is different from the APIC ID count - which
can happen if the user choose to use dom0_max_vcpu argument. In such a case
a backup of the PM structure is used and uploaded to the hypervisor.
[v1-v2: Initial RFC implementations that were posted]
[v3: Changed the name to passthru suggested by Pasi Kärkkäinen <pasik@iki.fi>]
[v4: Added vCPU != pCPU support - aka dom0_max_vcpus support]
[v5: Cleaned up the driver, fix bug under Athlon XP]
[v6: Changed the driver to a CPU frequency governor]
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
drivers/xen/Kconfig | 15 ++
drivers/xen/Makefile | 2 +-
drivers/xen/cpufreq_xen.c | 445 +++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 461 insertions(+), 1 deletions(-)
create mode 100644 drivers/xen/cpufreq_xen.c
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index a1ced52..28ba371 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -178,4 +178,19 @@ config XEN_PRIVCMD
depends on XEN
default m
+config CPU_FREQ_GOV_XEN
+ tristate "'xen' governor for hypervisor scaling"
+ depends on XEN && X86 && ACPI_PROCESSOR && CPU_FREQ
+ default m
+ help
+ This cpufreq governor leaves the frequency decision to the Xen hypervisor.
+
+ To do that the driver parses the Power Management data and uploads said
+ information to the Xen hypervisor. Then the Xen hypervisor can select the
+ proper Cx and Pxx states.
+
+ To compile this driver as a module, choose M here: the
+ module will be called cpufreq_xen. If you do not know what to choose,
+ select M here.
+
endmenu
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
index aa31337..5802220 100644
--- a/drivers/xen/Makefile
+++ b/drivers/xen/Makefile
@@ -20,7 +20,7 @@ obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o
obj-$(CONFIG_XEN_DOM0) += pci.o acpi.o
obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/
obj-$(CONFIG_XEN_PRIVCMD) += xen-privcmd.o
-
+obj-$(CONFIG_CPU_FREQ_GOV_XEN) += cpufreq_xen.o
xen-evtchn-y := evtchn.o
xen-gntdev-y := gntdev.o
xen-gntalloc-y := gntalloc.o
diff --git a/drivers/xen/cpufreq_xen.c b/drivers/xen/cpufreq_xen.c
new file mode 100644
index 0000000..1b709bf
--- /dev/null
+++ b/drivers/xen/cpufreq_xen.c
@@ -0,0 +1,445 @@
+/*
+ * Copyright 2012 by Oracle Inc
+ * Author: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
+ *
+ * This code borrows ideas from https://lkml.org/lkml/2011/11/30/249
+ * so many thanks go to Kevin Tian <kevin.tian@intel.com>
+ * and Yu Ke <ke.yu@intel.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <linux/cpumask.h>
+#include <linux/cpufreq.h>
+#include <linux/freezer.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <acpi/acpi_bus.h>
+#include <acpi/acpi_drivers.h>
+#include <acpi/processor.h>
+
+#include <xen/interface/platform.h>
+#include <asm/xen/hypercall.h>
+
+#define DRV_NAME "cpufreq-xen"
+
+static int no_hypercall;
+MODULE_PARM_DESC(off, "Inhibit the hypercall.");
+module_param_named(off, no_hypercall, int, 0400);
+
+/*
+ * Mutex to protect the acpi_ids_done.
+ */
+static DEFINE_MUTEX(acpi_ids_mutex);
+/*
+ * Don't think convert this to cpumask_var_t or use cpumask_bit - as those
+ * shrink to nr_cpu_bits (which is dependent on possible_cpu), which can be
+ * less than what we want to put in.
+ */
+#define NR_ACPI_CPUS NR_CPUS
+#define MAX_ACPI_BITS (BITS_TO_LONGS(NR_ACPI_CPUS))
+static unsigned long *acpi_ids_done;
+/*
+ * Again, don't convert to cpumask - as we are reading the raw ACPI CPU ids
+ * which can go beyond what we presently see.
+ */
+static unsigned long *acpi_id_present;
+
+/*
+ * Pertient data for the timer to be launched to check if the # of
+ * ACPI CPU ids is different from the one we have processed.
+ */
+#define DELAY_TIMER msecs_to_jiffies(5000 /* 5 sec */)
+static struct acpi_processor *pr_backup;
+static struct delayed_work work;
+
+static int push_cxx_to_hypervisor(struct acpi_processor *_pr)
+{
+ struct xen_platform_op op = {
+ .cmd = XENPF_set_processor_pminfo,
+ .interface_version = XENPF_INTERFACE_VERSION,
+ .u.set_pminfo.id = _pr->acpi_id,
+ .u.set_pminfo.type = XEN_PM_CX,
+ };
+ struct xen_processor_cx *dst_cx, *dst_cx_states = NULL;
+ struct acpi_processor_cx *cx;
+ int i, ok, ret = 0;
+
+ dst_cx_states = kcalloc(_pr->power.count,
+ sizeof(struct xen_processor_cx), GFP_KERNEL);
+ if (!dst_cx_states)
+ return -ENOMEM;
+
+ for (ok = 0, i = 1; i <= _pr->power.count; i++) {
+ cx = &_pr->power.states[i];
+ if (!cx->valid)
+ continue;
+
+ dst_cx = &(dst_cx_states[ok++]);
+
+ dst_cx->reg.space_id = ACPI_ADR_SPACE_SYSTEM_IO;
+ if (cx->entry_method == ACPI_CSTATE_SYSTEMIO) {
+ dst_cx->reg.bit_width = 8;
+ dst_cx->reg.bit_offset = 0;
+ dst_cx->reg.access_size = 1;
+ } else {
+ dst_cx->reg.space_id = ACPI_ADR_SPACE_FIXED_HARDWARE;
+ if (cx->entry_method == ACPI_CSTATE_FFH) {
+ /* NATIVE_CSTATE_BEYOND_HALT */
+ dst_cx->reg.bit_offset = 2;
+ dst_cx->reg.bit_width = 1; /* VENDOR_INTEL */
+ }
+ dst_cx->reg.access_size = 0;
+ }
+ dst_cx->reg.address = cx->address;
+
+ dst_cx->type = cx->type;
+ dst_cx->latency = cx->latency;
+ dst_cx->power = cx->power;
+
+ dst_cx->dpcnt = 0;
+ set_xen_guest_handle(dst_cx->dp, NULL);
+#ifdef DEBUG
+ pr_debug(DRV_NAME ": CX: ID:%d [C%d:%s] entry:%d\n",
+ _pr->acpi_id, cx->type, cx->desc, cx->entry_method);
+#endif
+ }
+ if (!ok) {
+ pr_err(DRV_NAME ": No _Cx for CPU %d\n", _pr->acpi_id);
+ kfree(dst_cx_states);
+ return -EINVAL;
+ }
+ op.u.set_pminfo.power.count = ok;
+ op.u.set_pminfo.power.flags.bm_control = _pr->flags.bm_control;
+ op.u.set_pminfo.power.flags.bm_check = _pr->flags.bm_check;
+ op.u.set_pminfo.power.flags.has_cst = _pr->flags.has_cst;
+ op.u.set_pminfo.power.flags.power_setup_done =
+ _pr->flags.power_setup_done;
+
+ set_xen_guest_handle(op.u.set_pminfo.power.states, dst_cx_states);
+
+ if (!no_hypercall)
+ ret = HYPERVISOR_dom0_op(&op);
+
+ if (ret)
+ pr_err(DRV_NAME "(CX): Hypervisor error (%d) for ACPI ID: %d\n",
+ ret, _pr->acpi_id);
+
+ kfree(dst_cx_states);
+
+ return ret;
+}
+static struct xen_processor_px *
+xen_copy_pss_data(struct acpi_processor *_pr,
+ struct xen_processor_performance *dst_perf)
+{
+ struct xen_processor_px *dst_states = NULL;
+ int i;
+
+ BUILD_BUG_ON(sizeof(struct xen_processor_px) !=
+ sizeof(struct acpi_processor_px));
+
+ dst_states = kcalloc(_pr->performance->state_count,
+ sizeof(struct xen_processor_px), GFP_KERNEL);
+ if (!dst_states)
+ return ERR_PTR(-ENOMEM);
+
+ dst_perf->state_count = _pr->performance->state_count;
+ for (i = 0; i < _pr->performance->state_count; i++) {
+ /* Fortunatly for us, they are both the same size */
+ memcpy(&(dst_states[i]), &(_pr->performance->states[i]),
+ sizeof(struct acpi_processor_px));
+ }
+ return dst_states;
+}
+static int xen_copy_psd_data(struct acpi_processor *_pr,
+ struct xen_processor_performance *dst)
+{
+ BUILD_BUG_ON(sizeof(struct xen_psd_package) !=
+ sizeof(struct acpi_psd_package));
+
+ if (_pr->performance->shared_type != CPUFREQ_SHARED_TYPE_NONE) {
+ dst->shared_type = _pr->performance->shared_type;
+
+ memcpy(&(dst->domain_info), &(_pr->performance->domain_info),
+ sizeof(struct acpi_psd_package));
+ } else {
+ if ((&cpu_data(0))->x86_vendor != X86_VENDOR_AMD)
+ return -EINVAL;
+
+ /* On AMD, the powernow-k8 is loaded before acpi_cpufreq
+ * meaning that acpi_processor_preregister_performance never
+ * gets called which would parse the _PSD. The only relevant
+ * information from _PSD we need is whether it is HW_ALL or any
+ * other type. AMD K8 >= are SW_ALL or SW_ANY, AMD K7<= HW_ANY.
+ * This driver checks at the start whether it is K8 so it
+ * if we get here it can only be K8.
+ */
+ dst->shared_type = CPUFREQ_SHARED_TYPE_ANY;
+ dst->domain_info.coord_type = DOMAIN_COORD_TYPE_SW_ANY;
+ dst->domain_info.num_processors = num_online_cpus();
+ }
+ return 0;
+}
+static int xen_copy_pct_data(struct acpi_pct_register *pct,
+ struct xen_pct_register *dst_pct)
+{
+ /* It would be nice if you could just do 'memcpy(pct, dst_pct') but
+ * sadly the Xen structure did not have the proper padding so the
+ * descriptor field takes two (dst_pct) bytes instead of one (pct).
+ */
+ dst_pct->descriptor = pct->descriptor;
+ dst_pct->length = pct->length;
+ dst_pct->space_id = pct->space_id;
+ dst_pct->bit_width = pct->bit_width;
+ dst_pct->bit_offset = pct->bit_offset;
+ dst_pct->reserved = pct->reserved;
+ dst_pct->address = pct->address;
+ return 0;
+}
+static int push_pxx_to_hypervisor(struct acpi_processor *_pr)
+{
+ int ret = 0;
+ struct xen_platform_op op = {
+ .cmd = XENPF_set_processor_pminfo,
+ .interface_version = XENPF_INTERFACE_VERSION,
+ .u.set_pminfo.id = _pr->acpi_id,
+ .u.set_pminfo.type = XEN_PM_PX,
+ };
+ struct xen_processor_performance *dst_perf;
+ struct xen_processor_px *dst_states = NULL;
+
+ dst_perf = &op.u.set_pminfo.perf;
+
+ dst_perf->platform_limit = _pr->performance_platform_limit;
+ dst_perf->flags |= XEN_PX_PPC;
+ xen_copy_pct_data(&(_pr->performance->control_register),
+ &dst_perf->control_register);
+ xen_copy_pct_data(&(_pr->performance->status_register),
+ &dst_perf->status_register);
+ dst_perf->flags |= XEN_PX_PCT;
+ dst_states = xen_copy_pss_data(_pr, dst_perf);
+ if (!IS_ERR_OR_NULL(dst_states)) {
+ set_xen_guest_handle(dst_perf->states, dst_states);
+ dst_perf->flags |= XEN_PX_PSS;
+ }
+ if (!xen_copy_psd_data(_pr, dst_perf))
+ dst_perf->flags |= XEN_PX_PSD;
+
+ if (!no_hypercall)
+ ret = HYPERVISOR_dom0_op(&op);
+
+ if (ret)
+ pr_err(DRV_NAME "(_PXX): Hypervisor error (%d) for ACPI ID %d\n",
+ ret, _pr->acpi_id);
+
+ if (!IS_ERR_OR_NULL(dst_states))
+ kfree(dst_states);
+
+ return ret;
+}
+static int upload_pm_data(struct acpi_processor *_pr)
+{
+ int err = 0;
+
+ if (__test_and_set_bit(_pr->acpi_id, acpi_ids_done))
+ return -EBUSY;
+
+ if (_pr->flags.power)
+ err = push_cxx_to_hypervisor(_pr);
+
+ if (_pr->performance && _pr->performance->states)
+ err |= push_pxx_to_hypervisor(_pr);
+
+ return err;
+}
+static acpi_status
+read_acpi_id(acpi_handle handle, u32 lvl, void *context, void **rv)
+{
+ u32 acpi_id;
+ acpi_status status;
+ acpi_object_type acpi_type;
+ unsigned long long tmp;
+ union acpi_object object = { 0 };
+ struct acpi_buffer buffer = { sizeof(union acpi_object), &object };
+
+ status = acpi_get_type(handle, &acpi_type);
+ if (ACPI_FAILURE(status))
+ return AE_OK;
+
+ switch (acpi_type) {
+ case ACPI_TYPE_PROCESSOR:
+ status = acpi_evaluate_object(handle, NULL, NULL, &buffer);
+ if (ACPI_FAILURE(status))
+ return AE_OK;
+ acpi_id = object.processor.proc_id;
+ break;
+ case ACPI_TYPE_DEVICE:
+ status = acpi_evaluate_integer(handle, "_UID", NULL, &tmp);
+ if (ACPI_FAILURE(status))
+ return AE_OK;
+ acpi_id = tmp;
+ break;
+ default:
+ return AE_OK;
+ }
+ if (acpi_id > NR_ACPI_CPUS) {
+ WARN_ONCE(1, "There are %d ACPI processors, but kernel can only do %d!\n",
+ acpi_id, NR_ACPI_CPUS);
+ return AE_OK;
+ }
+ __set_bit(acpi_id, acpi_id_present);
+
+ return AE_OK;
+}
+static unsigned int more_acpi_ids(void)
+{
+ unsigned int n = 0;
+
+ acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
+ ACPI_UINT32_MAX,
+ read_acpi_id, NULL, NULL, NULL);
+ acpi_get_devices("ACPI0007", read_acpi_id, NULL, NULL);
+
+ mutex_lock(&acpi_ids_mutex);
+ if (!bitmap_equal(acpi_id_present, acpi_ids_done, MAX_ACPI_BITS))
+ n = bitmap_weight(acpi_id_present, MAX_ACPI_BITS);
+ mutex_unlock(&acpi_ids_mutex);
+
+ return n;
+}
+static void do_check_acpi_id_timer(struct work_struct *_work)
+{
+ /* All online CPUs have been processed at this stage. Now verify
+ * whether in fact "online CPUs" == physical CPUs.
+ */
+ acpi_id_present = kcalloc(MAX_ACPI_BITS, sizeof(unsigned long), GFP_KERNEL);
+ if (!acpi_id_present)
+ return;
+ memset(acpi_id_present, 0, MAX_ACPI_BITS * sizeof(unsigned long));
+
+ if (more_acpi_ids()) {
+ int cpu;
+ if (!pr_backup) {
+ schedule_delayed_work(&work, DELAY_TIMER);
+ return;
+ }
+ for_each_set_bit(cpu, acpi_id_present, MAX_ACPI_BITS) {
+ pr_backup->acpi_id = cpu;
+ mutex_lock(&acpi_ids_mutex);
+ (void)upload_pm_data(pr_backup);
+ mutex_unlock(&acpi_ids_mutex);
+ }
+ }
+ kfree(acpi_id_present);
+ acpi_id_present = NULL;
+}
+
+static int cpufreq_governor_xen(struct cpufreq_policy *policy,
+ unsigned int event)
+{
+ struct acpi_processor *_pr;
+
+ switch (event) {
+ case CPUFREQ_GOV_START:
+ case CPUFREQ_GOV_LIMITS:
+ /* Set it to max and let the hypervisor take over */
+ __cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H);
+
+ _pr = per_cpu(processors, policy->cpu /* APIC ID */);
+ if (!_pr)
+ break;
+
+ mutex_lock(&acpi_ids_mutex);
+ if (!pr_backup) {
+ pr_backup = kzalloc(sizeof(struct acpi_processor), GFP_KERNEL);
+ memcpy(pr_backup, _pr, sizeof(struct acpi_processor));
+
+ INIT_DELAYED_WORK_DEFERRABLE(&work, do_check_acpi_id_timer);
+ schedule_delayed_work(&work, DELAY_TIMER);
+ }
+ (void)upload_pm_data(_pr);
+ mutex_unlock(&acpi_ids_mutex);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+static struct cpufreq_governor cpufreq_gov_xen = {
+ .name = "xen",
+ .governor = cpufreq_governor_xen,
+ .owner = THIS_MODULE,
+};
+static int __init check_prereq(void)
+{
+ struct cpuinfo_x86 *c = &cpu_data(0);
+
+ if (!xen_initial_domain())
+ return -ENODEV;
+
+ if (!acpi_gbl_FADT.smi_command)
+ return -ENODEV;
+
+ if (c->x86_vendor == X86_VENDOR_INTEL) {
+ if (!cpu_has(c, X86_FEATURE_EST))
+ return -ENODEV;
+
+ return 0;
+ }
+ if (c->x86_vendor == X86_VENDOR_AMD) {
+ u32 hi = 0, lo = 0;
+ /* Copied from powernow-k8.h, can't include ../cpufreq/powernow
+ * as we get compile warnings for the static functions.
+ */
+#define MSR_PSTATE_CUR_LIMIT 0xc0010061 /* pstate current limit MSR */
+ rdmsr(MSR_PSTATE_CUR_LIMIT, lo, hi);
+
+ /* If the MSR cannot provide the data, the powernow-k8
+ * won't process the data properly either.
+ */
+ if (hi || lo)
+ return 0;
+ }
+ return -ENODEV;
+}
+
+static int __init xen_processor_passthru_init(void)
+{
+ int rc = check_prereq();
+
+ if (rc)
+ return rc;
+
+ acpi_ids_done = kcalloc(MAX_ACPI_BITS, sizeof(unsigned long), GFP_KERNEL);
+ if (!acpi_ids_done)
+ return -ENOMEM;
+ memset(acpi_ids_done, 0, MAX_ACPI_BITS * sizeof(unsigned long));
+
+ return cpufreq_register_governor(&cpufreq_gov_xen);
+}
+static void __exit xen_processor_passthru_exit(void)
+{
+ cpufreq_unregister_governor(&cpufreq_gov_xen);
+ cancel_delayed_work_sync(&work);
+ kfree(acpi_ids_done);
+ kfree(pr_backup);
+}
+
+MODULE_AUTHOR("Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>");
+MODULE_DESCRIPTION("CPUfreq policy governor 'xen' which uploads PM data to Xen hypervisor");
+MODULE_LICENSE("GPL");
+
+late_initcall(xen_processor_passthru_init);
+module_exit(xen_processor_passthru_exit);
--
1.7.6.4

View File

@ -1,135 +0,0 @@
From 76ccc297018d25d55b789bbd508861ef1e2cdb0c Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Fri, 16 Dec 2011 17:38:18 -0500
Subject: x86/PCI: Expand the x86_msi_ops to have a restore MSIs.
The MSI restore function will become a function pointer in an
x86_msi_ops struct. It defaults to the implementation in the
io_apic.c and msi.c. We piggyback on the indirection mechanism
introduced by "x86: Introduce x86_msi_ops".
Cc: x86@kernel.org
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: linux-pci@vger.kernel.org
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
arch/x86/include/asm/pci.h | 9 +++++++++
arch/x86/include/asm/x86_init.h | 1 +
arch/x86/kernel/x86_init.c | 1 +
drivers/pci/msi.c | 29 +++++++++++++++++++++++++++--
4 files changed, 38 insertions(+), 2 deletions(-)
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index d498943..df75d07 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -112,19 +112,28 @@ static inline void x86_teardown_msi_irq(unsigned int irq)
{
x86_msi.teardown_msi_irq(irq);
}
+static inline void x86_restore_msi_irqs(struct pci_dev *dev, int irq)
+{
+ x86_msi.restore_msi_irqs(dev, irq);
+}
#define arch_setup_msi_irqs x86_setup_msi_irqs
#define arch_teardown_msi_irqs x86_teardown_msi_irqs
#define arch_teardown_msi_irq x86_teardown_msi_irq
+#define arch_restore_msi_irqs x86_restore_msi_irqs
/* implemented in arch/x86/kernel/apic/io_apic. */
int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
void native_teardown_msi_irq(unsigned int irq);
+void native_restore_msi_irqs(struct pci_dev *dev, int irq);
/* default to the implementation in drivers/lib/msi.c */
#define HAVE_DEFAULT_MSI_TEARDOWN_IRQS
+#define HAVE_DEFAULT_MSI_RESTORE_IRQS
void default_teardown_msi_irqs(struct pci_dev *dev);
+void default_restore_msi_irqs(struct pci_dev *dev, int irq);
#else
#define native_setup_msi_irqs NULL
#define native_teardown_msi_irq NULL
#define default_teardown_msi_irqs NULL
+#define default_restore_msi_irqs NULL
#endif
#define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys)
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 1971e65..cd52084 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -177,6 +177,7 @@ struct x86_msi_ops {
int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type);
void (*teardown_msi_irq)(unsigned int irq);
void (*teardown_msi_irqs)(struct pci_dev *dev);
+ void (*restore_msi_irqs)(struct pci_dev *dev, int irq);
};
extern struct x86_init_ops x86_init;
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index c1d6cd5..83b05ad 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -114,4 +114,5 @@ struct x86_msi_ops x86_msi = {
.setup_msi_irqs = native_setup_msi_irqs,
.teardown_msi_irq = native_teardown_msi_irq,
.teardown_msi_irqs = default_teardown_msi_irqs,
+ .restore_msi_irqs = default_restore_msi_irqs,
};
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 82de95e..a825d78 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -86,6 +86,31 @@ void default_teardown_msi_irqs(struct pci_dev *dev)
}
#endif
+#ifndef arch_restore_msi_irqs
+# define arch_restore_msi_irqs default_restore_msi_irqs
+# define HAVE_DEFAULT_MSI_RESTORE_IRQS
+#endif
+
+#ifdef HAVE_DEFAULT_MSI_RESTORE_IRQS
+void default_restore_msi_irqs(struct pci_dev *dev, int irq)
+{
+ struct msi_desc *entry;
+
+ entry = NULL;
+ if (dev->msix_enabled) {
+ list_for_each_entry(entry, &dev->msi_list, list) {
+ if (irq == entry->irq)
+ break;
+ }
+ } else if (dev->msi_enabled) {
+ entry = irq_get_msi_desc(irq);
+ }
+
+ if (entry)
+ write_msi_msg(irq, &entry->msg);
+}
+#endif
+
static void msi_set_enable(struct pci_dev *dev, int pos, int enable)
{
u16 control;
@@ -372,7 +397,7 @@ static void __pci_restore_msi_state(struct pci_dev *dev)
pci_intx_for_msi(dev, 0);
msi_set_enable(dev, pos, 0);
- write_msi_msg(dev->irq, &entry->msg);
+ arch_restore_msi_irqs(dev, dev->irq);
pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control);
msi_mask_irq(entry, msi_capable_mask(control), entry->masked);
@@ -400,7 +425,7 @@ static void __pci_restore_msix_state(struct pci_dev *dev)
pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control);
list_for_each_entry(entry, &dev->msi_list, list) {
- write_msi_msg(entry->irq, &entry->msg);
+ arch_restore_msi_irqs(dev, entry->irq);
msix_mask_irq(entry, entry->masked);
}
--
1.7.6.4

View File

@ -1,24 +0,0 @@
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 698b905..e31ebff 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -1953,9 +1953,6 @@ static int __init netif_init(void)
if (!xen_domain())
return -ENODEV;
- if (xen_initial_domain())
- return 0;
-
printk(KERN_INFO "Initialising Xen virtual ethernet driver.\n");
return xenbus_register_frontend(&netfront_driver);
@@ -1965,9 +1962,6 @@ module_init(netif_init);
static void __exit netif_exit(void)
{
- if (xen_initial_domain())
- return;
-
xenbus_unregister_driver(&netfront_driver);
}
module_exit(netif_exit);

View File

@ -1,54 +0,0 @@
From 8c9ce606a60e4a0cb447bdc082ce383b96b227b4 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Fri, 25 May 2012 16:11:09 -0400
Subject: [PATCH 2/3] xen/blkback: Copy id field when doing BLKIF_DISCARD.
We weren't copying the id field so when we sent the response
back to the frontend (especially with a 64-bit host and 32-bit
guest), we ended up using a random value. This lead to the
frontend crashing as it would try to pass to __blk_end_request_all
a NULL 'struct request' (b/c it would use the 'id' to find the
proper 'struct request' in its shadow array) and end up crashing:
BUG: unable to handle kernel NULL pointer dereference at 000000e4
IP: [<c0646d4c>] __blk_end_request_all+0xc/0x40
.. snip..
EIP is at __blk_end_request_all+0xc/0x40
.. snip..
[<ed95db72>] blkif_interrupt+0x172/0x330 [xen_blkfront]
This fixes the bug by passing in the proper id for the response.
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=824641
CC: stable@kernel.org
Tested-by: William Dauchy <wdauchy@gmail.com>
Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
drivers/block/xen-blkback/common.h | 2 ++
1 files changed, 2 insertions(+), 0 deletions(-)
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index 773cf27..9ad3b5e 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -257,6 +257,7 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst,
break;
case BLKIF_OP_DISCARD:
dst->u.discard.flag = src->u.discard.flag;
+ dst->u.discard.id = src->u.discard.id;
dst->u.discard.sector_number = src->u.discard.sector_number;
dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
break;
@@ -287,6 +288,7 @@ static inline void blkif_get_x86_64_req(struct blkif_request *dst,
break;
case BLKIF_OP_DISCARD:
dst->u.discard.flag = src->u.discard.flag;
+ dst->u.discard.id = src->u.discard.id;
dst->u.discard.sector_number = src->u.discard.sector_number;
dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
break;
--
1.7.4.4