Merge branch 'devel-3.7'
Conflicts: config-pvops patches.xen/pvops-0005-xen-acpi-sleep-Enable-ACPI-sleep-via-the-__acpi_os_p.patch patches.xen/pvops-0009-xen-enlighten-Expose-MWAIT-and-MWAIT_LEAF-if-hypervi.patch patches.xen/pvops-blkfront-eject-support.patch patches.xen/pvops-netback-calculate-correctly-the-SKB-slots.patch rel-pvops series-pvops.conf version-pvops
This commit is contained in:
commit
a2acb741b6
1539
config-pvops
1539
config-pvops
File diff suppressed because it is too large
Load Diff
@ -133,6 +133,7 @@ fi
|
|||||||
|
|
||||||
make prepare $MAKE_ARGS
|
make prepare $MAKE_ARGS
|
||||||
make scripts $MAKE_ARGS
|
make scripts $MAKE_ARGS
|
||||||
|
make scripts_basic $MAKE_ARGS
|
||||||
krel=$(make -s kernelrelease $MAKE_ARGS)
|
krel=$(make -s kernelrelease $MAKE_ARGS)
|
||||||
|
|
||||||
if [ "$krel" != "%kernelrelease" ]; then
|
if [ "$krel" != "%kernelrelease" ]; then
|
||||||
@ -323,6 +324,7 @@ mkdir -p %buildroot/%vm_install_dir
|
|||||||
/sbin/dracut --nomdadmconf --nolvmconf \
|
/sbin/dracut --nomdadmconf --nolvmconf \
|
||||||
--kmoddir %buildroot/lib/modules/%kernelrelease \
|
--kmoddir %buildroot/lib/modules/%kernelrelease \
|
||||||
--include %_sourcedir/vm-initramfs / \
|
--include %_sourcedir/vm-initramfs / \
|
||||||
|
--add "dm" --omit "plymouth" \
|
||||||
-d "xenblk xen-blkfront cdrom ext4 jbd2 crc16 dm_snapshot" \
|
-d "xenblk xen-blkfront cdrom ext4 jbd2 crc16 dm_snapshot" \
|
||||||
%buildroot/%vm_install_dir/initramfs %kernelrelease
|
%buildroot/%vm_install_dir/initramfs %kernelrelease
|
||||||
|
|
||||||
|
@ -38,8 +38,8 @@ index 610001d..68cf060 100644
|
|||||||
-extern int acpi_suspend_lowlevel(void);
|
-extern int acpi_suspend_lowlevel(void);
|
||||||
+extern int (*acpi_suspend_lowlevel)(void);
|
+extern int (*acpi_suspend_lowlevel)(void);
|
||||||
|
|
||||||
extern const unsigned char acpi_wakeup_code[];
|
/* Physical address to resume after wakeup */
|
||||||
#define acpi_wakeup_address (__pa(TRAMPOLINE_SYM(acpi_wakeup_code)))
|
#define acpi_wakeup_address ((unsigned long)(real_mode_header->wakeup_start))
|
||||||
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
|
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
|
||||||
index ce664f3..c3a5b95 100644
|
index ce664f3..c3a5b95 100644
|
||||||
--- a/arch/x86/kernel/acpi/boot.c
|
--- a/arch/x86/kernel/acpi/boot.c
|
||||||
@ -82,8 +82,8 @@ index 103b6ab..4d2d0b1 100644
|
|||||||
-int acpi_suspend_lowlevel(void)
|
-int acpi_suspend_lowlevel(void)
|
||||||
+int x86_acpi_suspend_lowlevel(void)
|
+int x86_acpi_suspend_lowlevel(void)
|
||||||
{
|
{
|
||||||
struct wakeup_header *header;
|
struct wakeup_header *header =
|
||||||
/* address in low memory of the wakeup routine. */
|
(struct wakeup_header *) __va(real_mode_header->wakeup_header);
|
||||||
diff --git a/arch/x86/kernel/acpi/sleep.h b/arch/x86/kernel/acpi/sleep.h
|
diff --git a/arch/x86/kernel/acpi/sleep.h b/arch/x86/kernel/acpi/sleep.h
|
||||||
index 416d4be..4d3feb5 100644
|
index 416d4be..4d3feb5 100644
|
||||||
--- a/arch/x86/kernel/acpi/sleep.h
|
--- a/arch/x86/kernel/acpi/sleep.h
|
@ -1,967 +0,0 @@
|
|||||||
From d8414d3c157dc1f83e73c17447ba41fe5afa9d3d Mon Sep 17 00:00:00 2001
|
|
||||||
From: Bastian Blank <waldi@debian.org>
|
|
||||||
Date: Fri, 16 Dec 2011 11:34:33 -0500
|
|
||||||
Subject: xen: Add privcmd device driver
|
|
||||||
|
|
||||||
Access to arbitrary hypercalls is currently provided via xenfs. This
|
|
||||||
adds a standard character device to handle this. The support in xenfs
|
|
||||||
remains for backward compatibility and uses the device driver code.
|
|
||||||
|
|
||||||
Signed-off-by: Bastian Blank <waldi@debian.org>
|
|
||||||
Acked-by: Ian Campbell <ian.campbell@citrix.com>
|
|
||||||
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
|
|
||||||
---
|
|
||||||
drivers/xen/Kconfig | 7 +
|
|
||||||
drivers/xen/Makefile | 2 +
|
|
||||||
drivers/xen/privcmd.c | 437 +++++++++++++++++++++++++++++++++++++++++++
|
|
||||||
drivers/xen/privcmd.h | 3 +
|
|
||||||
drivers/xen/xenfs/Makefile | 2 +-
|
|
||||||
drivers/xen/xenfs/privcmd.c | 400 ---------------------------------------
|
|
||||||
drivers/xen/xenfs/super.c | 3 +-
|
|
||||||
drivers/xen/xenfs/xenfs.h | 1 -
|
|
||||||
8 files changed, 452 insertions(+), 403 deletions(-)
|
|
||||||
create mode 100644 drivers/xen/privcmd.c
|
|
||||||
create mode 100644 drivers/xen/privcmd.h
|
|
||||||
delete mode 100644 drivers/xen/xenfs/privcmd.c
|
|
||||||
|
|
||||||
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
|
|
||||||
index 8795480..a1ced52 100644
|
|
||||||
--- a/drivers/xen/Kconfig
|
|
||||||
+++ b/drivers/xen/Kconfig
|
|
||||||
@@ -86,6 +86,7 @@ config XEN_BACKEND
|
|
||||||
|
|
||||||
config XENFS
|
|
||||||
tristate "Xen filesystem"
|
|
||||||
+ select XEN_PRIVCMD
|
|
||||||
default y
|
|
||||||
help
|
|
||||||
The xen filesystem provides a way for domains to share
|
|
||||||
@@ -171,4 +172,10 @@ config XEN_PCIDEV_BACKEND
|
|
||||||
xen-pciback.hide=(03:00.0)(04:00.0)
|
|
||||||
|
|
||||||
If in doubt, say m.
|
|
||||||
+
|
|
||||||
+config XEN_PRIVCMD
|
|
||||||
+ tristate
|
|
||||||
+ depends on XEN
|
|
||||||
+ default m
|
|
||||||
+
|
|
||||||
endmenu
|
|
||||||
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
|
|
||||||
index 974fffd..aa31337 100644
|
|
||||||
--- a/drivers/xen/Makefile
|
|
||||||
+++ b/drivers/xen/Makefile
|
|
||||||
@@ -19,7 +19,9 @@ obj-$(CONFIG_XEN_TMEM) += tmem.o
|
|
||||||
obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o
|
|
||||||
obj-$(CONFIG_XEN_DOM0) += pci.o
|
|
||||||
obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/
|
|
||||||
+obj-$(CONFIG_XEN_PRIVCMD) += xen-privcmd.o
|
|
||||||
|
|
||||||
xen-evtchn-y := evtchn.o
|
|
||||||
xen-gntdev-y := gntdev.o
|
|
||||||
xen-gntalloc-y := gntalloc.o
|
|
||||||
+xen-privcmd-y := privcmd.o
|
|
||||||
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
|
|
||||||
new file mode 100644
|
|
||||||
index 0000000..4e8d3da
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/drivers/xen/privcmd.c
|
|
||||||
@@ -0,0 +1,437 @@
|
|
||||||
+/******************************************************************************
|
|
||||||
+ * privcmd.c
|
|
||||||
+ *
|
|
||||||
+ * Interface to privileged domain-0 commands.
|
|
||||||
+ *
|
|
||||||
+ * Copyright (c) 2002-2004, K A Fraser, B Dragovic
|
|
||||||
+ */
|
|
||||||
+
|
|
||||||
+#include <linux/kernel.h>
|
|
||||||
+#include <linux/module.h>
|
|
||||||
+#include <linux/sched.h>
|
|
||||||
+#include <linux/slab.h>
|
|
||||||
+#include <linux/string.h>
|
|
||||||
+#include <linux/errno.h>
|
|
||||||
+#include <linux/mm.h>
|
|
||||||
+#include <linux/mman.h>
|
|
||||||
+#include <linux/uaccess.h>
|
|
||||||
+#include <linux/swap.h>
|
|
||||||
+#include <linux/highmem.h>
|
|
||||||
+#include <linux/pagemap.h>
|
|
||||||
+#include <linux/seq_file.h>
|
|
||||||
+#include <linux/miscdevice.h>
|
|
||||||
+
|
|
||||||
+#include <asm/pgalloc.h>
|
|
||||||
+#include <asm/pgtable.h>
|
|
||||||
+#include <asm/tlb.h>
|
|
||||||
+#include <asm/xen/hypervisor.h>
|
|
||||||
+#include <asm/xen/hypercall.h>
|
|
||||||
+
|
|
||||||
+#include <xen/xen.h>
|
|
||||||
+#include <xen/privcmd.h>
|
|
||||||
+#include <xen/interface/xen.h>
|
|
||||||
+#include <xen/features.h>
|
|
||||||
+#include <xen/page.h>
|
|
||||||
+#include <xen/xen-ops.h>
|
|
||||||
+
|
|
||||||
+#include "privcmd.h"
|
|
||||||
+
|
|
||||||
+MODULE_LICENSE("GPL");
|
|
||||||
+
|
|
||||||
+#ifndef HAVE_ARCH_PRIVCMD_MMAP
|
|
||||||
+static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma);
|
|
||||||
+#endif
|
|
||||||
+
|
|
||||||
+static long privcmd_ioctl_hypercall(void __user *udata)
|
|
||||||
+{
|
|
||||||
+ struct privcmd_hypercall hypercall;
|
|
||||||
+ long ret;
|
|
||||||
+
|
|
||||||
+ if (copy_from_user(&hypercall, udata, sizeof(hypercall)))
|
|
||||||
+ return -EFAULT;
|
|
||||||
+
|
|
||||||
+ ret = privcmd_call(hypercall.op,
|
|
||||||
+ hypercall.arg[0], hypercall.arg[1],
|
|
||||||
+ hypercall.arg[2], hypercall.arg[3],
|
|
||||||
+ hypercall.arg[4]);
|
|
||||||
+
|
|
||||||
+ return ret;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static void free_page_list(struct list_head *pages)
|
|
||||||
+{
|
|
||||||
+ struct page *p, *n;
|
|
||||||
+
|
|
||||||
+ list_for_each_entry_safe(p, n, pages, lru)
|
|
||||||
+ __free_page(p);
|
|
||||||
+
|
|
||||||
+ INIT_LIST_HEAD(pages);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/*
|
|
||||||
+ * Given an array of items in userspace, return a list of pages
|
|
||||||
+ * containing the data. If copying fails, either because of memory
|
|
||||||
+ * allocation failure or a problem reading user memory, return an
|
|
||||||
+ * error code; its up to the caller to dispose of any partial list.
|
|
||||||
+ */
|
|
||||||
+static int gather_array(struct list_head *pagelist,
|
|
||||||
+ unsigned nelem, size_t size,
|
|
||||||
+ void __user *data)
|
|
||||||
+{
|
|
||||||
+ unsigned pageidx;
|
|
||||||
+ void *pagedata;
|
|
||||||
+ int ret;
|
|
||||||
+
|
|
||||||
+ if (size > PAGE_SIZE)
|
|
||||||
+ return 0;
|
|
||||||
+
|
|
||||||
+ pageidx = PAGE_SIZE;
|
|
||||||
+ pagedata = NULL; /* quiet, gcc */
|
|
||||||
+ while (nelem--) {
|
|
||||||
+ if (pageidx > PAGE_SIZE-size) {
|
|
||||||
+ struct page *page = alloc_page(GFP_KERNEL);
|
|
||||||
+
|
|
||||||
+ ret = -ENOMEM;
|
|
||||||
+ if (page == NULL)
|
|
||||||
+ goto fail;
|
|
||||||
+
|
|
||||||
+ pagedata = page_address(page);
|
|
||||||
+
|
|
||||||
+ list_add_tail(&page->lru, pagelist);
|
|
||||||
+ pageidx = 0;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ ret = -EFAULT;
|
|
||||||
+ if (copy_from_user(pagedata + pageidx, data, size))
|
|
||||||
+ goto fail;
|
|
||||||
+
|
|
||||||
+ data += size;
|
|
||||||
+ pageidx += size;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ ret = 0;
|
|
||||||
+
|
|
||||||
+fail:
|
|
||||||
+ return ret;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+/*
|
|
||||||
+ * Call function "fn" on each element of the array fragmented
|
|
||||||
+ * over a list of pages.
|
|
||||||
+ */
|
|
||||||
+static int traverse_pages(unsigned nelem, size_t size,
|
|
||||||
+ struct list_head *pos,
|
|
||||||
+ int (*fn)(void *data, void *state),
|
|
||||||
+ void *state)
|
|
||||||
+{
|
|
||||||
+ void *pagedata;
|
|
||||||
+ unsigned pageidx;
|
|
||||||
+ int ret = 0;
|
|
||||||
+
|
|
||||||
+ BUG_ON(size > PAGE_SIZE);
|
|
||||||
+
|
|
||||||
+ pageidx = PAGE_SIZE;
|
|
||||||
+ pagedata = NULL; /* hush, gcc */
|
|
||||||
+
|
|
||||||
+ while (nelem--) {
|
|
||||||
+ if (pageidx > PAGE_SIZE-size) {
|
|
||||||
+ struct page *page;
|
|
||||||
+ pos = pos->next;
|
|
||||||
+ page = list_entry(pos, struct page, lru);
|
|
||||||
+ pagedata = page_address(page);
|
|
||||||
+ pageidx = 0;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ ret = (*fn)(pagedata + pageidx, state);
|
|
||||||
+ if (ret)
|
|
||||||
+ break;
|
|
||||||
+ pageidx += size;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ return ret;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+struct mmap_mfn_state {
|
|
||||||
+ unsigned long va;
|
|
||||||
+ struct vm_area_struct *vma;
|
|
||||||
+ domid_t domain;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+static int mmap_mfn_range(void *data, void *state)
|
|
||||||
+{
|
|
||||||
+ struct privcmd_mmap_entry *msg = data;
|
|
||||||
+ struct mmap_mfn_state *st = state;
|
|
||||||
+ struct vm_area_struct *vma = st->vma;
|
|
||||||
+ int rc;
|
|
||||||
+
|
|
||||||
+ /* Do not allow range to wrap the address space. */
|
|
||||||
+ if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) ||
|
|
||||||
+ ((unsigned long)(msg->npages << PAGE_SHIFT) >= -st->va))
|
|
||||||
+ return -EINVAL;
|
|
||||||
+
|
|
||||||
+ /* Range chunks must be contiguous in va space. */
|
|
||||||
+ if ((msg->va != st->va) ||
|
|
||||||
+ ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end))
|
|
||||||
+ return -EINVAL;
|
|
||||||
+
|
|
||||||
+ rc = xen_remap_domain_mfn_range(vma,
|
|
||||||
+ msg->va & PAGE_MASK,
|
|
||||||
+ msg->mfn, msg->npages,
|
|
||||||
+ vma->vm_page_prot,
|
|
||||||
+ st->domain);
|
|
||||||
+ if (rc < 0)
|
|
||||||
+ return rc;
|
|
||||||
+
|
|
||||||
+ st->va += msg->npages << PAGE_SHIFT;
|
|
||||||
+
|
|
||||||
+ return 0;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static long privcmd_ioctl_mmap(void __user *udata)
|
|
||||||
+{
|
|
||||||
+ struct privcmd_mmap mmapcmd;
|
|
||||||
+ struct mm_struct *mm = current->mm;
|
|
||||||
+ struct vm_area_struct *vma;
|
|
||||||
+ int rc;
|
|
||||||
+ LIST_HEAD(pagelist);
|
|
||||||
+ struct mmap_mfn_state state;
|
|
||||||
+
|
|
||||||
+ if (!xen_initial_domain())
|
|
||||||
+ return -EPERM;
|
|
||||||
+
|
|
||||||
+ if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
|
|
||||||
+ return -EFAULT;
|
|
||||||
+
|
|
||||||
+ rc = gather_array(&pagelist,
|
|
||||||
+ mmapcmd.num, sizeof(struct privcmd_mmap_entry),
|
|
||||||
+ mmapcmd.entry);
|
|
||||||
+
|
|
||||||
+ if (rc || list_empty(&pagelist))
|
|
||||||
+ goto out;
|
|
||||||
+
|
|
||||||
+ down_write(&mm->mmap_sem);
|
|
||||||
+
|
|
||||||
+ {
|
|
||||||
+ struct page *page = list_first_entry(&pagelist,
|
|
||||||
+ struct page, lru);
|
|
||||||
+ struct privcmd_mmap_entry *msg = page_address(page);
|
|
||||||
+
|
|
||||||
+ vma = find_vma(mm, msg->va);
|
|
||||||
+ rc = -EINVAL;
|
|
||||||
+
|
|
||||||
+ if (!vma || (msg->va != vma->vm_start) ||
|
|
||||||
+ !privcmd_enforce_singleshot_mapping(vma))
|
|
||||||
+ goto out_up;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ state.va = vma->vm_start;
|
|
||||||
+ state.vma = vma;
|
|
||||||
+ state.domain = mmapcmd.dom;
|
|
||||||
+
|
|
||||||
+ rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry),
|
|
||||||
+ &pagelist,
|
|
||||||
+ mmap_mfn_range, &state);
|
|
||||||
+
|
|
||||||
+
|
|
||||||
+out_up:
|
|
||||||
+ up_write(&mm->mmap_sem);
|
|
||||||
+
|
|
||||||
+out:
|
|
||||||
+ free_page_list(&pagelist);
|
|
||||||
+
|
|
||||||
+ return rc;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+struct mmap_batch_state {
|
|
||||||
+ domid_t domain;
|
|
||||||
+ unsigned long va;
|
|
||||||
+ struct vm_area_struct *vma;
|
|
||||||
+ int err;
|
|
||||||
+
|
|
||||||
+ xen_pfn_t __user *user;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+static int mmap_batch_fn(void *data, void *state)
|
|
||||||
+{
|
|
||||||
+ xen_pfn_t *mfnp = data;
|
|
||||||
+ struct mmap_batch_state *st = state;
|
|
||||||
+
|
|
||||||
+ if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
|
|
||||||
+ st->vma->vm_page_prot, st->domain) < 0) {
|
|
||||||
+ *mfnp |= 0xf0000000U;
|
|
||||||
+ st->err++;
|
|
||||||
+ }
|
|
||||||
+ st->va += PAGE_SIZE;
|
|
||||||
+
|
|
||||||
+ return 0;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static int mmap_return_errors(void *data, void *state)
|
|
||||||
+{
|
|
||||||
+ xen_pfn_t *mfnp = data;
|
|
||||||
+ struct mmap_batch_state *st = state;
|
|
||||||
+
|
|
||||||
+ return put_user(*mfnp, st->user++);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static struct vm_operations_struct privcmd_vm_ops;
|
|
||||||
+
|
|
||||||
+static long privcmd_ioctl_mmap_batch(void __user *udata)
|
|
||||||
+{
|
|
||||||
+ int ret;
|
|
||||||
+ struct privcmd_mmapbatch m;
|
|
||||||
+ struct mm_struct *mm = current->mm;
|
|
||||||
+ struct vm_area_struct *vma;
|
|
||||||
+ unsigned long nr_pages;
|
|
||||||
+ LIST_HEAD(pagelist);
|
|
||||||
+ struct mmap_batch_state state;
|
|
||||||
+
|
|
||||||
+ if (!xen_initial_domain())
|
|
||||||
+ return -EPERM;
|
|
||||||
+
|
|
||||||
+ if (copy_from_user(&m, udata, sizeof(m)))
|
|
||||||
+ return -EFAULT;
|
|
||||||
+
|
|
||||||
+ nr_pages = m.num;
|
|
||||||
+ if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
|
|
||||||
+ return -EINVAL;
|
|
||||||
+
|
|
||||||
+ ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
|
|
||||||
+ m.arr);
|
|
||||||
+
|
|
||||||
+ if (ret || list_empty(&pagelist))
|
|
||||||
+ goto out;
|
|
||||||
+
|
|
||||||
+ down_write(&mm->mmap_sem);
|
|
||||||
+
|
|
||||||
+ vma = find_vma(mm, m.addr);
|
|
||||||
+ ret = -EINVAL;
|
|
||||||
+ if (!vma ||
|
|
||||||
+ vma->vm_ops != &privcmd_vm_ops ||
|
|
||||||
+ (m.addr != vma->vm_start) ||
|
|
||||||
+ ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) ||
|
|
||||||
+ !privcmd_enforce_singleshot_mapping(vma)) {
|
|
||||||
+ up_write(&mm->mmap_sem);
|
|
||||||
+ goto out;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ state.domain = m.dom;
|
|
||||||
+ state.vma = vma;
|
|
||||||
+ state.va = m.addr;
|
|
||||||
+ state.err = 0;
|
|
||||||
+
|
|
||||||
+ ret = traverse_pages(m.num, sizeof(xen_pfn_t),
|
|
||||||
+ &pagelist, mmap_batch_fn, &state);
|
|
||||||
+
|
|
||||||
+ up_write(&mm->mmap_sem);
|
|
||||||
+
|
|
||||||
+ if (state.err > 0) {
|
|
||||||
+ state.user = m.arr;
|
|
||||||
+ ret = traverse_pages(m.num, sizeof(xen_pfn_t),
|
|
||||||
+ &pagelist,
|
|
||||||
+ mmap_return_errors, &state);
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+out:
|
|
||||||
+ free_page_list(&pagelist);
|
|
||||||
+
|
|
||||||
+ return ret;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static long privcmd_ioctl(struct file *file,
|
|
||||||
+ unsigned int cmd, unsigned long data)
|
|
||||||
+{
|
|
||||||
+ int ret = -ENOSYS;
|
|
||||||
+ void __user *udata = (void __user *) data;
|
|
||||||
+
|
|
||||||
+ switch (cmd) {
|
|
||||||
+ case IOCTL_PRIVCMD_HYPERCALL:
|
|
||||||
+ ret = privcmd_ioctl_hypercall(udata);
|
|
||||||
+ break;
|
|
||||||
+
|
|
||||||
+ case IOCTL_PRIVCMD_MMAP:
|
|
||||||
+ ret = privcmd_ioctl_mmap(udata);
|
|
||||||
+ break;
|
|
||||||
+
|
|
||||||
+ case IOCTL_PRIVCMD_MMAPBATCH:
|
|
||||||
+ ret = privcmd_ioctl_mmap_batch(udata);
|
|
||||||
+ break;
|
|
||||||
+
|
|
||||||
+ default:
|
|
||||||
+ ret = -EINVAL;
|
|
||||||
+ break;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ return ret;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+#ifndef HAVE_ARCH_PRIVCMD_MMAP
|
|
||||||
+static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
|
||||||
+{
|
|
||||||
+ printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n",
|
|
||||||
+ vma, vma->vm_start, vma->vm_end,
|
|
||||||
+ vmf->pgoff, vmf->virtual_address);
|
|
||||||
+
|
|
||||||
+ return VM_FAULT_SIGBUS;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static struct vm_operations_struct privcmd_vm_ops = {
|
|
||||||
+ .fault = privcmd_fault
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
|
|
||||||
+{
|
|
||||||
+ /* Unsupported for auto-translate guests. */
|
|
||||||
+ if (xen_feature(XENFEAT_auto_translated_physmap))
|
|
||||||
+ return -ENOSYS;
|
|
||||||
+
|
|
||||||
+ /* DONTCOPY is essential for Xen because copy_page_range doesn't know
|
|
||||||
+ * how to recreate these mappings */
|
|
||||||
+ vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP;
|
|
||||||
+ vma->vm_ops = &privcmd_vm_ops;
|
|
||||||
+ vma->vm_private_data = NULL;
|
|
||||||
+
|
|
||||||
+ return 0;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
|
|
||||||
+{
|
|
||||||
+ return (xchg(&vma->vm_private_data, (void *)1) == NULL);
|
|
||||||
+}
|
|
||||||
+#endif
|
|
||||||
+
|
|
||||||
+const struct file_operations xen_privcmd_fops = {
|
|
||||||
+ .owner = THIS_MODULE,
|
|
||||||
+ .unlocked_ioctl = privcmd_ioctl,
|
|
||||||
+ .mmap = privcmd_mmap,
|
|
||||||
+};
|
|
||||||
+EXPORT_SYMBOL_GPL(xen_privcmd_fops);
|
|
||||||
+
|
|
||||||
+static struct miscdevice privcmd_dev = {
|
|
||||||
+ .minor = MISC_DYNAMIC_MINOR,
|
|
||||||
+ .name = "xen/privcmd",
|
|
||||||
+ .fops = &xen_privcmd_fops,
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
+static int __init privcmd_init(void)
|
|
||||||
+{
|
|
||||||
+ int err;
|
|
||||||
+
|
|
||||||
+ if (!xen_domain())
|
|
||||||
+ return -ENODEV;
|
|
||||||
+
|
|
||||||
+ err = misc_register(&privcmd_dev);
|
|
||||||
+ if (err != 0) {
|
|
||||||
+ printk(KERN_ERR "Could not register Xen privcmd device\n");
|
|
||||||
+ return err;
|
|
||||||
+ }
|
|
||||||
+ return 0;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static void __exit privcmd_exit(void)
|
|
||||||
+{
|
|
||||||
+ misc_deregister(&privcmd_dev);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+module_init(privcmd_init);
|
|
||||||
+module_exit(privcmd_exit);
|
|
||||||
diff --git a/drivers/xen/privcmd.h b/drivers/xen/privcmd.h
|
|
||||||
new file mode 100644
|
|
||||||
index 0000000..14facae
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/drivers/xen/privcmd.h
|
|
||||||
@@ -0,0 +1,3 @@
|
|
||||||
+#include <linux/fs.h>
|
|
||||||
+
|
|
||||||
+extern const struct file_operations xen_privcmd_fops;
|
|
||||||
diff --git a/drivers/xen/xenfs/Makefile b/drivers/xen/xenfs/Makefile
|
|
||||||
index 4fde944..5d45ff1 100644
|
|
||||||
--- a/drivers/xen/xenfs/Makefile
|
|
||||||
+++ b/drivers/xen/xenfs/Makefile
|
|
||||||
@@ -1,4 +1,4 @@
|
|
||||||
obj-$(CONFIG_XENFS) += xenfs.o
|
|
||||||
|
|
||||||
-xenfs-y = super.o xenbus.o privcmd.o
|
|
||||||
+xenfs-y = super.o xenbus.o
|
|
||||||
xenfs-$(CONFIG_XEN_DOM0) += xenstored.o
|
|
||||||
diff --git a/drivers/xen/xenfs/privcmd.c b/drivers/xen/xenfs/privcmd.c
|
|
||||||
deleted file mode 100644
|
|
||||||
index dbd3b16..0000000
|
|
||||||
--- a/drivers/xen/xenfs/privcmd.c
|
|
||||||
+++ /dev/null
|
|
||||||
@@ -1,400 +0,0 @@
|
|
||||||
-/******************************************************************************
|
|
||||||
- * privcmd.c
|
|
||||||
- *
|
|
||||||
- * Interface to privileged domain-0 commands.
|
|
||||||
- *
|
|
||||||
- * Copyright (c) 2002-2004, K A Fraser, B Dragovic
|
|
||||||
- */
|
|
||||||
-
|
|
||||||
-#include <linux/kernel.h>
|
|
||||||
-#include <linux/sched.h>
|
|
||||||
-#include <linux/slab.h>
|
|
||||||
-#include <linux/string.h>
|
|
||||||
-#include <linux/errno.h>
|
|
||||||
-#include <linux/mm.h>
|
|
||||||
-#include <linux/mman.h>
|
|
||||||
-#include <linux/uaccess.h>
|
|
||||||
-#include <linux/swap.h>
|
|
||||||
-#include <linux/highmem.h>
|
|
||||||
-#include <linux/pagemap.h>
|
|
||||||
-#include <linux/seq_file.h>
|
|
||||||
-
|
|
||||||
-#include <asm/pgalloc.h>
|
|
||||||
-#include <asm/pgtable.h>
|
|
||||||
-#include <asm/tlb.h>
|
|
||||||
-#include <asm/xen/hypervisor.h>
|
|
||||||
-#include <asm/xen/hypercall.h>
|
|
||||||
-
|
|
||||||
-#include <xen/xen.h>
|
|
||||||
-#include <xen/privcmd.h>
|
|
||||||
-#include <xen/interface/xen.h>
|
|
||||||
-#include <xen/features.h>
|
|
||||||
-#include <xen/page.h>
|
|
||||||
-#include <xen/xen-ops.h>
|
|
||||||
-
|
|
||||||
-#ifndef HAVE_ARCH_PRIVCMD_MMAP
|
|
||||||
-static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma);
|
|
||||||
-#endif
|
|
||||||
-
|
|
||||||
-static long privcmd_ioctl_hypercall(void __user *udata)
|
|
||||||
-{
|
|
||||||
- struct privcmd_hypercall hypercall;
|
|
||||||
- long ret;
|
|
||||||
-
|
|
||||||
- if (copy_from_user(&hypercall, udata, sizeof(hypercall)))
|
|
||||||
- return -EFAULT;
|
|
||||||
-
|
|
||||||
- ret = privcmd_call(hypercall.op,
|
|
||||||
- hypercall.arg[0], hypercall.arg[1],
|
|
||||||
- hypercall.arg[2], hypercall.arg[3],
|
|
||||||
- hypercall.arg[4]);
|
|
||||||
-
|
|
||||||
- return ret;
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
-static void free_page_list(struct list_head *pages)
|
|
||||||
-{
|
|
||||||
- struct page *p, *n;
|
|
||||||
-
|
|
||||||
- list_for_each_entry_safe(p, n, pages, lru)
|
|
||||||
- __free_page(p);
|
|
||||||
-
|
|
||||||
- INIT_LIST_HEAD(pages);
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
-/*
|
|
||||||
- * Given an array of items in userspace, return a list of pages
|
|
||||||
- * containing the data. If copying fails, either because of memory
|
|
||||||
- * allocation failure or a problem reading user memory, return an
|
|
||||||
- * error code; its up to the caller to dispose of any partial list.
|
|
||||||
- */
|
|
||||||
-static int gather_array(struct list_head *pagelist,
|
|
||||||
- unsigned nelem, size_t size,
|
|
||||||
- void __user *data)
|
|
||||||
-{
|
|
||||||
- unsigned pageidx;
|
|
||||||
- void *pagedata;
|
|
||||||
- int ret;
|
|
||||||
-
|
|
||||||
- if (size > PAGE_SIZE)
|
|
||||||
- return 0;
|
|
||||||
-
|
|
||||||
- pageidx = PAGE_SIZE;
|
|
||||||
- pagedata = NULL; /* quiet, gcc */
|
|
||||||
- while (nelem--) {
|
|
||||||
- if (pageidx > PAGE_SIZE-size) {
|
|
||||||
- struct page *page = alloc_page(GFP_KERNEL);
|
|
||||||
-
|
|
||||||
- ret = -ENOMEM;
|
|
||||||
- if (page == NULL)
|
|
||||||
- goto fail;
|
|
||||||
-
|
|
||||||
- pagedata = page_address(page);
|
|
||||||
-
|
|
||||||
- list_add_tail(&page->lru, pagelist);
|
|
||||||
- pageidx = 0;
|
|
||||||
- }
|
|
||||||
-
|
|
||||||
- ret = -EFAULT;
|
|
||||||
- if (copy_from_user(pagedata + pageidx, data, size))
|
|
||||||
- goto fail;
|
|
||||||
-
|
|
||||||
- data += size;
|
|
||||||
- pageidx += size;
|
|
||||||
- }
|
|
||||||
-
|
|
||||||
- ret = 0;
|
|
||||||
-
|
|
||||||
-fail:
|
|
||||||
- return ret;
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
-/*
|
|
||||||
- * Call function "fn" on each element of the array fragmented
|
|
||||||
- * over a list of pages.
|
|
||||||
- */
|
|
||||||
-static int traverse_pages(unsigned nelem, size_t size,
|
|
||||||
- struct list_head *pos,
|
|
||||||
- int (*fn)(void *data, void *state),
|
|
||||||
- void *state)
|
|
||||||
-{
|
|
||||||
- void *pagedata;
|
|
||||||
- unsigned pageidx;
|
|
||||||
- int ret = 0;
|
|
||||||
-
|
|
||||||
- BUG_ON(size > PAGE_SIZE);
|
|
||||||
-
|
|
||||||
- pageidx = PAGE_SIZE;
|
|
||||||
- pagedata = NULL; /* hush, gcc */
|
|
||||||
-
|
|
||||||
- while (nelem--) {
|
|
||||||
- if (pageidx > PAGE_SIZE-size) {
|
|
||||||
- struct page *page;
|
|
||||||
- pos = pos->next;
|
|
||||||
- page = list_entry(pos, struct page, lru);
|
|
||||||
- pagedata = page_address(page);
|
|
||||||
- pageidx = 0;
|
|
||||||
- }
|
|
||||||
-
|
|
||||||
- ret = (*fn)(pagedata + pageidx, state);
|
|
||||||
- if (ret)
|
|
||||||
- break;
|
|
||||||
- pageidx += size;
|
|
||||||
- }
|
|
||||||
-
|
|
||||||
- return ret;
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
-struct mmap_mfn_state {
|
|
||||||
- unsigned long va;
|
|
||||||
- struct vm_area_struct *vma;
|
|
||||||
- domid_t domain;
|
|
||||||
-};
|
|
||||||
-
|
|
||||||
-static int mmap_mfn_range(void *data, void *state)
|
|
||||||
-{
|
|
||||||
- struct privcmd_mmap_entry *msg = data;
|
|
||||||
- struct mmap_mfn_state *st = state;
|
|
||||||
- struct vm_area_struct *vma = st->vma;
|
|
||||||
- int rc;
|
|
||||||
-
|
|
||||||
- /* Do not allow range to wrap the address space. */
|
|
||||||
- if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) ||
|
|
||||||
- ((unsigned long)(msg->npages << PAGE_SHIFT) >= -st->va))
|
|
||||||
- return -EINVAL;
|
|
||||||
-
|
|
||||||
- /* Range chunks must be contiguous in va space. */
|
|
||||||
- if ((msg->va != st->va) ||
|
|
||||||
- ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end))
|
|
||||||
- return -EINVAL;
|
|
||||||
-
|
|
||||||
- rc = xen_remap_domain_mfn_range(vma,
|
|
||||||
- msg->va & PAGE_MASK,
|
|
||||||
- msg->mfn, msg->npages,
|
|
||||||
- vma->vm_page_prot,
|
|
||||||
- st->domain);
|
|
||||||
- if (rc < 0)
|
|
||||||
- return rc;
|
|
||||||
-
|
|
||||||
- st->va += msg->npages << PAGE_SHIFT;
|
|
||||||
-
|
|
||||||
- return 0;
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
-static long privcmd_ioctl_mmap(void __user *udata)
|
|
||||||
-{
|
|
||||||
- struct privcmd_mmap mmapcmd;
|
|
||||||
- struct mm_struct *mm = current->mm;
|
|
||||||
- struct vm_area_struct *vma;
|
|
||||||
- int rc;
|
|
||||||
- LIST_HEAD(pagelist);
|
|
||||||
- struct mmap_mfn_state state;
|
|
||||||
-
|
|
||||||
- if (!xen_initial_domain())
|
|
||||||
- return -EPERM;
|
|
||||||
-
|
|
||||||
- if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
|
|
||||||
- return -EFAULT;
|
|
||||||
-
|
|
||||||
- rc = gather_array(&pagelist,
|
|
||||||
- mmapcmd.num, sizeof(struct privcmd_mmap_entry),
|
|
||||||
- mmapcmd.entry);
|
|
||||||
-
|
|
||||||
- if (rc || list_empty(&pagelist))
|
|
||||||
- goto out;
|
|
||||||
-
|
|
||||||
- down_write(&mm->mmap_sem);
|
|
||||||
-
|
|
||||||
- {
|
|
||||||
- struct page *page = list_first_entry(&pagelist,
|
|
||||||
- struct page, lru);
|
|
||||||
- struct privcmd_mmap_entry *msg = page_address(page);
|
|
||||||
-
|
|
||||||
- vma = find_vma(mm, msg->va);
|
|
||||||
- rc = -EINVAL;
|
|
||||||
-
|
|
||||||
- if (!vma || (msg->va != vma->vm_start) ||
|
|
||||||
- !privcmd_enforce_singleshot_mapping(vma))
|
|
||||||
- goto out_up;
|
|
||||||
- }
|
|
||||||
-
|
|
||||||
- state.va = vma->vm_start;
|
|
||||||
- state.vma = vma;
|
|
||||||
- state.domain = mmapcmd.dom;
|
|
||||||
-
|
|
||||||
- rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry),
|
|
||||||
- &pagelist,
|
|
||||||
- mmap_mfn_range, &state);
|
|
||||||
-
|
|
||||||
-
|
|
||||||
-out_up:
|
|
||||||
- up_write(&mm->mmap_sem);
|
|
||||||
-
|
|
||||||
-out:
|
|
||||||
- free_page_list(&pagelist);
|
|
||||||
-
|
|
||||||
- return rc;
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
-struct mmap_batch_state {
|
|
||||||
- domid_t domain;
|
|
||||||
- unsigned long va;
|
|
||||||
- struct vm_area_struct *vma;
|
|
||||||
- int err;
|
|
||||||
-
|
|
||||||
- xen_pfn_t __user *user;
|
|
||||||
-};
|
|
||||||
-
|
|
||||||
-static int mmap_batch_fn(void *data, void *state)
|
|
||||||
-{
|
|
||||||
- xen_pfn_t *mfnp = data;
|
|
||||||
- struct mmap_batch_state *st = state;
|
|
||||||
-
|
|
||||||
- if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
|
|
||||||
- st->vma->vm_page_prot, st->domain) < 0) {
|
|
||||||
- *mfnp |= 0xf0000000U;
|
|
||||||
- st->err++;
|
|
||||||
- }
|
|
||||||
- st->va += PAGE_SIZE;
|
|
||||||
-
|
|
||||||
- return 0;
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
-static int mmap_return_errors(void *data, void *state)
|
|
||||||
-{
|
|
||||||
- xen_pfn_t *mfnp = data;
|
|
||||||
- struct mmap_batch_state *st = state;
|
|
||||||
-
|
|
||||||
- return put_user(*mfnp, st->user++);
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
-static struct vm_operations_struct privcmd_vm_ops;
|
|
||||||
-
|
|
||||||
-static long privcmd_ioctl_mmap_batch(void __user *udata)
|
|
||||||
-{
|
|
||||||
- int ret;
|
|
||||||
- struct privcmd_mmapbatch m;
|
|
||||||
- struct mm_struct *mm = current->mm;
|
|
||||||
- struct vm_area_struct *vma;
|
|
||||||
- unsigned long nr_pages;
|
|
||||||
- LIST_HEAD(pagelist);
|
|
||||||
- struct mmap_batch_state state;
|
|
||||||
-
|
|
||||||
- if (!xen_initial_domain())
|
|
||||||
- return -EPERM;
|
|
||||||
-
|
|
||||||
- if (copy_from_user(&m, udata, sizeof(m)))
|
|
||||||
- return -EFAULT;
|
|
||||||
-
|
|
||||||
- nr_pages = m.num;
|
|
||||||
- if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
|
|
||||||
- return -EINVAL;
|
|
||||||
-
|
|
||||||
- ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
|
|
||||||
- m.arr);
|
|
||||||
-
|
|
||||||
- if (ret || list_empty(&pagelist))
|
|
||||||
- goto out;
|
|
||||||
-
|
|
||||||
- down_write(&mm->mmap_sem);
|
|
||||||
-
|
|
||||||
- vma = find_vma(mm, m.addr);
|
|
||||||
- ret = -EINVAL;
|
|
||||||
- if (!vma ||
|
|
||||||
- vma->vm_ops != &privcmd_vm_ops ||
|
|
||||||
- (m.addr != vma->vm_start) ||
|
|
||||||
- ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) ||
|
|
||||||
- !privcmd_enforce_singleshot_mapping(vma)) {
|
|
||||||
- up_write(&mm->mmap_sem);
|
|
||||||
- goto out;
|
|
||||||
- }
|
|
||||||
-
|
|
||||||
- state.domain = m.dom;
|
|
||||||
- state.vma = vma;
|
|
||||||
- state.va = m.addr;
|
|
||||||
- state.err = 0;
|
|
||||||
-
|
|
||||||
- ret = traverse_pages(m.num, sizeof(xen_pfn_t),
|
|
||||||
- &pagelist, mmap_batch_fn, &state);
|
|
||||||
-
|
|
||||||
- up_write(&mm->mmap_sem);
|
|
||||||
-
|
|
||||||
- if (state.err > 0) {
|
|
||||||
- state.user = m.arr;
|
|
||||||
- ret = traverse_pages(m.num, sizeof(xen_pfn_t),
|
|
||||||
- &pagelist,
|
|
||||||
- mmap_return_errors, &state);
|
|
||||||
- }
|
|
||||||
-
|
|
||||||
-out:
|
|
||||||
- free_page_list(&pagelist);
|
|
||||||
-
|
|
||||||
- return ret;
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
-static long privcmd_ioctl(struct file *file,
|
|
||||||
- unsigned int cmd, unsigned long data)
|
|
||||||
-{
|
|
||||||
- int ret = -ENOSYS;
|
|
||||||
- void __user *udata = (void __user *) data;
|
|
||||||
-
|
|
||||||
- switch (cmd) {
|
|
||||||
- case IOCTL_PRIVCMD_HYPERCALL:
|
|
||||||
- ret = privcmd_ioctl_hypercall(udata);
|
|
||||||
- break;
|
|
||||||
-
|
|
||||||
- case IOCTL_PRIVCMD_MMAP:
|
|
||||||
- ret = privcmd_ioctl_mmap(udata);
|
|
||||||
- break;
|
|
||||||
-
|
|
||||||
- case IOCTL_PRIVCMD_MMAPBATCH:
|
|
||||||
- ret = privcmd_ioctl_mmap_batch(udata);
|
|
||||||
- break;
|
|
||||||
-
|
|
||||||
- default:
|
|
||||||
- ret = -EINVAL;
|
|
||||||
- break;
|
|
||||||
- }
|
|
||||||
-
|
|
||||||
- return ret;
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
-#ifndef HAVE_ARCH_PRIVCMD_MMAP
|
|
||||||
-static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
|
||||||
-{
|
|
||||||
- printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n",
|
|
||||||
- vma, vma->vm_start, vma->vm_end,
|
|
||||||
- vmf->pgoff, vmf->virtual_address);
|
|
||||||
-
|
|
||||||
- return VM_FAULT_SIGBUS;
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
-static struct vm_operations_struct privcmd_vm_ops = {
|
|
||||||
- .fault = privcmd_fault
|
|
||||||
-};
|
|
||||||
-
|
|
||||||
-static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
|
|
||||||
-{
|
|
||||||
- /* Unsupported for auto-translate guests. */
|
|
||||||
- if (xen_feature(XENFEAT_auto_translated_physmap))
|
|
||||||
- return -ENOSYS;
|
|
||||||
-
|
|
||||||
- /* DONTCOPY is essential for Xen because copy_page_range doesn't know
|
|
||||||
- * how to recreate these mappings */
|
|
||||||
- vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP;
|
|
||||||
- vma->vm_ops = &privcmd_vm_ops;
|
|
||||||
- vma->vm_private_data = NULL;
|
|
||||||
-
|
|
||||||
- return 0;
|
|
||||||
-}
|
|
||||||
-
|
|
||||||
-static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
|
|
||||||
-{
|
|
||||||
- return (xchg(&vma->vm_private_data, (void *)1) == NULL);
|
|
||||||
-}
|
|
||||||
-#endif
|
|
||||||
-
|
|
||||||
-const struct file_operations privcmd_file_ops = {
|
|
||||||
- .unlocked_ioctl = privcmd_ioctl,
|
|
||||||
- .mmap = privcmd_mmap,
|
|
||||||
-};
|
|
||||||
diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c
|
|
||||||
index 1aa3897..a55fbf9 100644
|
|
||||||
--- a/drivers/xen/xenfs/super.c
|
|
||||||
+++ b/drivers/xen/xenfs/super.c
|
|
||||||
@@ -16,6 +16,7 @@
|
|
||||||
#include <xen/xen.h>
|
|
||||||
|
|
||||||
#include "xenfs.h"
|
|
||||||
+#include "../privcmd.h"
|
|
||||||
|
|
||||||
#include <asm/xen/hypervisor.h>
|
|
||||||
|
|
||||||
@@ -84,7 +85,7 @@ static int xenfs_fill_super(struct super_block *sb, void *data, int silent)
|
|
||||||
[1] = {},
|
|
||||||
{ "xenbus", &xenbus_file_ops, S_IRUSR|S_IWUSR },
|
|
||||||
{ "capabilities", &capabilities_file_ops, S_IRUGO },
|
|
||||||
- { "privcmd", &privcmd_file_ops, S_IRUSR|S_IWUSR },
|
|
||||||
+ { "privcmd", &xen_privcmd_fops, S_IRUSR|S_IWUSR },
|
|
||||||
{""},
|
|
||||||
};
|
|
||||||
int rc;
|
|
||||||
diff --git a/drivers/xen/xenfs/xenfs.h b/drivers/xen/xenfs/xenfs.h
|
|
||||||
index b68aa62..5056306 100644
|
|
||||||
--- a/drivers/xen/xenfs/xenfs.h
|
|
||||||
+++ b/drivers/xen/xenfs/xenfs.h
|
|
||||||
@@ -2,7 +2,6 @@
|
|
||||||
#define _XENFS_XENBUS_H
|
|
||||||
|
|
||||||
extern const struct file_operations xenbus_file_ops;
|
|
||||||
-extern const struct file_operations privcmd_file_ops;
|
|
||||||
extern const struct file_operations xsd_kva_file_ops;
|
|
||||||
extern const struct file_operations xsd_port_file_ops;
|
|
||||||
|
|
||||||
--
|
|
||||||
1.7.6.4
|
|
||||||
|
|
@ -1,177 +0,0 @@
|
|||||||
From 8fd04efb7e41da12d85ad382b7c7092fe832bebb Mon Sep 17 00:00:00 2001
|
|
||||||
From: Tang Liang <liang.tang@oracle.com>
|
|
||||||
Date: Fri, 9 Dec 2011 10:05:54 +0800
|
|
||||||
Subject: x86, acpi, tboot: Have a ACPI os prepare sleep instead of calling
|
|
||||||
tboot_sleep.
|
|
||||||
|
|
||||||
The ACPI suspend path makes a call to tboot_sleep right before
|
|
||||||
it writes the PM1A, PM1B values. We replace the direct call to
|
|
||||||
tboot via an registration callback similar to __acpi_register_gsi.
|
|
||||||
|
|
||||||
CC: Thomas Gleixner <tglx@linutronix.de>
|
|
||||||
CC: "H. Peter Anvin" <hpa@zytor.com>
|
|
||||||
CC: x86@kernel.org
|
|
||||||
CC: Len Brown <len.brown@intel.com>
|
|
||||||
Acked-by: Joseph Cihula <joseph.cihula@intel.com>
|
|
||||||
CC: Shane Wang <shane.wang@intel.com>
|
|
||||||
CC: xen-devel@lists.xensource.com
|
|
||||||
CC: linux-pm@lists.linux-foundation.org
|
|
||||||
CC: tboot-devel@lists.sourceforge.net
|
|
||||||
CC: linux-acpi@vger.kernel.org
|
|
||||||
[v1: Added __attribute__ ((unused))]
|
|
||||||
[v2: Introduced a wrapper instead of changing tboot_sleep return values]
|
|
||||||
[v3: Added return value AE_CTRL_SKIP for acpi_os_sleep_prepare]
|
|
||||||
Signed-off-by: Tang Liang <liang.tang@oracle.com>
|
|
||||||
[v1: Fix compile issues on IA64 and PPC64]
|
|
||||||
[v2: Fix where __acpi_os_prepare_sleep==NULL and did not go in sleep properly]
|
|
||||||
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
|
|
||||||
---
|
|
||||||
arch/x86/kernel/tboot.c | 8 ++++++++
|
|
||||||
drivers/acpi/acpica/hwsleep.c | 10 +++++++---
|
|
||||||
drivers/acpi/osl.c | 24 ++++++++++++++++++++++++
|
|
||||||
include/acpi/acexcep.h | 1 +
|
|
||||||
include/linux/acpi.h | 10 ++++++++++
|
|
||||||
include/linux/tboot.h | 1 -
|
|
||||||
6 files changed, 50 insertions(+), 4 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
|
|
||||||
index e2410e2..1a4ab7d 100644
|
|
||||||
--- a/arch/x86/kernel/tboot.c
|
|
||||||
+++ b/arch/x86/kernel/tboot.c
|
|
||||||
@@ -297,6 +297,12 @@ void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control)
|
|
||||||
|
|
||||||
tboot_shutdown(acpi_shutdown_map[sleep_state]);
|
|
||||||
}
|
|
||||||
+static int tboot_sleep_wrapper(u8 sleep_state, u32 pm1a_control,
|
|
||||||
+ u32 pm1b_control)
|
|
||||||
+{
|
|
||||||
+ tboot_sleep(sleep_state, pm1a_control, pm1b_control);
|
|
||||||
+ return 0;
|
|
||||||
+}
|
|
||||||
|
|
||||||
static atomic_t ap_wfs_count;
|
|
||||||
|
|
||||||
@@ -345,6 +351,8 @@ static __init int tboot_late_init(void)
|
|
||||||
|
|
||||||
atomic_set(&ap_wfs_count, 0);
|
|
||||||
register_hotcpu_notifier(&tboot_cpu_notifier);
|
|
||||||
+
|
|
||||||
+ acpi_os_set_prepare_sleep(&tboot_sleep_wrapper);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
diff --git a/drivers/acpi/acpica/hwsleep.c b/drivers/acpi/acpica/hwsleep.c
|
|
||||||
index d52da30..992359a 100644
|
|
||||||
--- a/drivers/acpi/acpica/hwsleep.c
|
|
||||||
+++ b/drivers/acpi/acpica/hwsleep.c
|
|
||||||
@@ -43,9 +43,9 @@
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <acpi/acpi.h>
|
|
||||||
+#include <linux/acpi.h>
|
|
||||||
#include "accommon.h"
|
|
||||||
#include "actables.h"
|
|
||||||
-#include <linux/tboot.h>
|
|
||||||
#include <linux/module.h>
|
|
||||||
|
|
||||||
#define _COMPONENT ACPI_HARDWARE
|
|
||||||
@@ -344,8 +344,12 @@ acpi_status asmlinkage acpi_enter_sleep_state(u8 sleep_state)
|
|
||||||
|
|
||||||
ACPI_FLUSH_CPU_CACHE();
|
|
||||||
|
|
||||||
- tboot_sleep(sleep_state, pm1a_control, pm1b_control);
|
|
||||||
-
|
|
||||||
+ status = acpi_os_prepare_sleep(sleep_state, pm1a_control,
|
|
||||||
+ pm1b_control);
|
|
||||||
+ if (ACPI_SKIP(status))
|
|
||||||
+ return_ACPI_STATUS(AE_OK);
|
|
||||||
+ if (ACPI_FAILURE(status))
|
|
||||||
+ return_ACPI_STATUS(status);
|
|
||||||
/* Write #2: Write both SLP_TYP + SLP_EN */
|
|
||||||
|
|
||||||
status = acpi_hw_write_pm1_control(pm1a_control, pm1b_control);
|
|
||||||
diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
|
|
||||||
index f31c5c5..f3aae4b 100644
|
|
||||||
--- a/drivers/acpi/osl.c
|
|
||||||
+++ b/drivers/acpi/osl.c
|
|
||||||
@@ -76,6 +76,9 @@ EXPORT_SYMBOL(acpi_in_debugger);
|
|
||||||
extern char line_buf[80];
|
|
||||||
#endif /*ENABLE_DEBUGGER */
|
|
||||||
|
|
||||||
+static int (*__acpi_os_prepare_sleep)(u8 sleep_state, u32 pm1a_ctrl,
|
|
||||||
+ u32 pm1b_ctrl);
|
|
||||||
+
|
|
||||||
static acpi_osd_handler acpi_irq_handler;
|
|
||||||
static void *acpi_irq_context;
|
|
||||||
static struct workqueue_struct *kacpid_wq;
|
|
||||||
@@ -1659,3 +1662,24 @@ acpi_status acpi_os_terminate(void)
|
|
||||||
|
|
||||||
return AE_OK;
|
|
||||||
}
|
|
||||||
+
|
|
||||||
+acpi_status acpi_os_prepare_sleep(u8 sleep_state, u32 pm1a_control,
|
|
||||||
+ u32 pm1b_control)
|
|
||||||
+{
|
|
||||||
+ int rc = 0;
|
|
||||||
+ if (__acpi_os_prepare_sleep)
|
|
||||||
+ rc = __acpi_os_prepare_sleep(sleep_state,
|
|
||||||
+ pm1a_control, pm1b_control);
|
|
||||||
+ if (rc < 0)
|
|
||||||
+ return AE_ERROR;
|
|
||||||
+ else if (rc > 0)
|
|
||||||
+ return AE_CTRL_SKIP;
|
|
||||||
+
|
|
||||||
+ return AE_OK;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+void acpi_os_set_prepare_sleep(int (*func)(u8 sleep_state,
|
|
||||||
+ u32 pm1a_ctrl, u32 pm1b_ctrl))
|
|
||||||
+{
|
|
||||||
+ __acpi_os_prepare_sleep = func;
|
|
||||||
+}
|
|
||||||
diff --git a/include/acpi/acexcep.h b/include/acpi/acexcep.h
|
|
||||||
index 5b6c391..fa0d22c 100644
|
|
||||||
--- a/include/acpi/acexcep.h
|
|
||||||
+++ b/include/acpi/acexcep.h
|
|
||||||
@@ -57,6 +57,7 @@
|
|
||||||
#define ACPI_SUCCESS(a) (!(a))
|
|
||||||
#define ACPI_FAILURE(a) (a)
|
|
||||||
|
|
||||||
+#define ACPI_SKIP(a) (a == AE_CTRL_SKIP)
|
|
||||||
#define AE_OK (acpi_status) 0x0000
|
|
||||||
|
|
||||||
/*
|
|
||||||
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
|
|
||||||
index 627a3a4..9393f73 100644
|
|
||||||
--- a/include/linux/acpi.h
|
|
||||||
+++ b/include/linux/acpi.h
|
|
||||||
@@ -363,4 +363,14 @@ static inline int suspend_nvs_register(unsigned long a, unsigned long b)
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
+#ifdef CONFIG_ACPI
|
|
||||||
+void acpi_os_set_prepare_sleep(int (*func)(u8 sleep_state,
|
|
||||||
+ u32 pm1a_ctrl, u32 pm1b_ctrl));
|
|
||||||
+
|
|
||||||
+acpi_status acpi_os_prepare_sleep(u8 sleep_state,
|
|
||||||
+ u32 pm1a_control, u32 pm1b_control);
|
|
||||||
+#else
|
|
||||||
+#define acpi_os_set_prepare_sleep(func, pm1a_ctrl, pm1b_ctrl) do { } while (0)
|
|
||||||
+#endif
|
|
||||||
+
|
|
||||||
#endif /*_LINUX_ACPI_H*/
|
|
||||||
diff --git a/include/linux/tboot.h b/include/linux/tboot.h
|
|
||||||
index 1dba6ee..c75128b 100644
|
|
||||||
--- a/include/linux/tboot.h
|
|
||||||
+++ b/include/linux/tboot.h
|
|
||||||
@@ -143,7 +143,6 @@ static inline int tboot_enabled(void)
|
|
||||||
|
|
||||||
extern void tboot_probe(void);
|
|
||||||
extern void tboot_shutdown(u32 shutdown_type);
|
|
||||||
-extern void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control);
|
|
||||||
extern struct acpi_table_header *tboot_get_dmar_table(
|
|
||||||
struct acpi_table_header *dmar_tbl);
|
|
||||||
extern int tboot_force_iommu(void);
|
|
||||||
--
|
|
||||||
1.7.6.4
|
|
||||||
|
|
@ -1,67 +0,0 @@
|
|||||||
From 6f327383cd7ebef1fcc092e2d759ceb9d90dfb36 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
|
|
||||||
Date: Thu, 8 Dec 2011 17:14:08 +0800
|
|
||||||
Subject: tboot: Add return values for tboot_sleep
|
|
||||||
|
|
||||||
.. as appropiately. As tboot_sleep now returns values.
|
|
||||||
remove tboot_sleep_wrapper.
|
|
||||||
|
|
||||||
Suggested-by: "Rafael J. Wysocki" <rjw@sisk.pl>
|
|
||||||
Acked-by: Joseph Cihula <joseph.cihula@intel.com>
|
|
||||||
[v1: Return -1/0/+1 instead of ACPI_xx values]
|
|
||||||
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
|
|
||||||
---
|
|
||||||
arch/x86/kernel/tboot.c | 13 ++++---------
|
|
||||||
1 files changed, 4 insertions(+), 9 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
|
|
||||||
index 1a4ab7d..6410744 100644
|
|
||||||
--- a/arch/x86/kernel/tboot.c
|
|
||||||
+++ b/arch/x86/kernel/tboot.c
|
|
||||||
@@ -272,7 +272,7 @@ static void tboot_copy_fadt(const struct acpi_table_fadt *fadt)
|
|
||||||
offsetof(struct acpi_table_facs, firmware_waking_vector);
|
|
||||||
}
|
|
||||||
|
|
||||||
-void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control)
|
|
||||||
+static int tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control)
|
|
||||||
{
|
|
||||||
static u32 acpi_shutdown_map[ACPI_S_STATE_COUNT] = {
|
|
||||||
/* S0,1,2: */ -1, -1, -1,
|
|
||||||
@@ -281,7 +281,7 @@ void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control)
|
|
||||||
/* S5: */ TB_SHUTDOWN_S5 };
|
|
||||||
|
|
||||||
if (!tboot_enabled())
|
|
||||||
- return;
|
|
||||||
+ return 0;
|
|
||||||
|
|
||||||
tboot_copy_fadt(&acpi_gbl_FADT);
|
|
||||||
tboot->acpi_sinfo.pm1a_cnt_val = pm1a_control;
|
|
||||||
@@ -292,15 +292,10 @@ void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control)
|
|
||||||
if (sleep_state >= ACPI_S_STATE_COUNT ||
|
|
||||||
acpi_shutdown_map[sleep_state] == -1) {
|
|
||||||
pr_warning("unsupported sleep state 0x%x\n", sleep_state);
|
|
||||||
- return;
|
|
||||||
+ return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
tboot_shutdown(acpi_shutdown_map[sleep_state]);
|
|
||||||
-}
|
|
||||||
-static int tboot_sleep_wrapper(u8 sleep_state, u32 pm1a_control,
|
|
||||||
- u32 pm1b_control)
|
|
||||||
-{
|
|
||||||
- tboot_sleep(sleep_state, pm1a_control, pm1b_control);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
@@ -352,7 +347,7 @@ static __init int tboot_late_init(void)
|
|
||||||
atomic_set(&ap_wfs_count, 0);
|
|
||||||
register_hotcpu_notifier(&tboot_cpu_notifier);
|
|
||||||
|
|
||||||
- acpi_os_set_prepare_sleep(&tboot_sleep_wrapper);
|
|
||||||
+ acpi_os_set_prepare_sleep(&tboot_sleep);
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
--
|
|
||||||
1.7.6.4
|
|
||||||
|
|
@ -1,197 +0,0 @@
|
|||||||
From 9b10575276a220543b8791f2cb8268fbd4a0bc2e Mon Sep 17 00:00:00 2001
|
|
||||||
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
|
|
||||||
Date: Thu, 8 Dec 2011 17:32:23 +0800
|
|
||||||
Subject: xen/acpi/sleep: Enable ACPI sleep via the __acpi_os_prepare_sleep
|
|
||||||
|
|
||||||
Provide the registration callback to call in the Xen's
|
|
||||||
ACPI sleep functionality. This means that during S3/S5
|
|
||||||
we make a hypercall XENPF_enter_acpi_sleep with the
|
|
||||||
proper PM1A/PM1B registers.
|
|
||||||
|
|
||||||
Based of Ke Yu's <ke.yu@intel.com> initial idea.
|
|
||||||
[ From http://xenbits.xensource.com/linux-2.6.18-xen.hg
|
|
||||||
change c68699484a65 ]
|
|
||||||
|
|
||||||
[v1: Added Copyright and license]
|
|
||||||
[v2: Added check if PM1A/B the 16-bits MSB contain something. The spec
|
|
||||||
only uses 16-bits but might have more in future]
|
|
||||||
Signed-off-by: Liang Tang <liang.tang@oracle.com>
|
|
||||||
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
|
|
||||||
---
|
|
||||||
arch/x86/xen/enlighten.c | 3 ++
|
|
||||||
drivers/xen/Makefile | 2 +-
|
|
||||||
drivers/xen/acpi.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++
|
|
||||||
include/xen/acpi.h | 58 +++++++++++++++++++++++++++++++++++++++++++
|
|
||||||
4 files changed, 124 insertions(+), 1 deletions(-)
|
|
||||||
create mode 100644 drivers/xen/acpi.c
|
|
||||||
create mode 100644 include/xen/acpi.h
|
|
||||||
|
|
||||||
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
|
|
||||||
index 12eb07b..a5277c2 100644
|
|
||||||
--- a/arch/x86/xen/enlighten.c
|
|
||||||
+++ b/arch/x86/xen/enlighten.c
|
|
||||||
@@ -42,6 +42,7 @@
|
|
||||||
#include <xen/page.h>
|
|
||||||
#include <xen/hvm.h>
|
|
||||||
#include <xen/hvc-console.h>
|
|
||||||
+#include <xen/acpi.h>
|
|
||||||
|
|
||||||
#include <asm/paravirt.h>
|
|
||||||
#include <asm/apic.h>
|
|
||||||
@@ -1275,6 +1276,8 @@ asmlinkage void __init xen_start_kernel(void)
|
|
||||||
|
|
||||||
/* Make sure ACS will be enabled */
|
|
||||||
pci_request_acs();
|
|
||||||
+
|
|
||||||
+ xen_acpi_sleep_register();
|
|
||||||
}
|
|
||||||
#ifdef CONFIG_PCI
|
|
||||||
/* PCI BIOS service won't work from a PV guest. */
|
|
||||||
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
|
|
||||||
index aa31337..77a845f 100644
|
|
||||||
--- a/drivers/xen/Makefile
|
|
||||||
+++ b/drivers/xen/Makefile
|
|
||||||
@@ -17,7 +17,7 @@ obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o
|
|
||||||
obj-$(CONFIG_XEN_PVHVM) += platform-pci.o
|
|
||||||
obj-$(CONFIG_XEN_TMEM) += tmem.o
|
|
||||||
obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o
|
|
||||||
-obj-$(CONFIG_XEN_DOM0) += pci.o
|
|
||||||
+obj-$(CONFIG_XEN_DOM0) += pci.o acpi.o
|
|
||||||
obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/
|
|
||||||
obj-$(CONFIG_XEN_PRIVCMD) += xen-privcmd.o
|
|
||||||
|
|
||||||
diff --git a/drivers/xen/acpi.c b/drivers/xen/acpi.c
|
|
||||||
new file mode 100644
|
|
||||||
index 0000000..119d42a
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/drivers/xen/acpi.c
|
|
||||||
@@ -0,0 +1,62 @@
|
|
||||||
+/******************************************************************************
|
|
||||||
+ * acpi.c
|
|
||||||
+ * acpi file for domain 0 kernel
|
|
||||||
+ *
|
|
||||||
+ * Copyright (c) 2011 Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
|
|
||||||
+ * Copyright (c) 2011 Yu Ke ke.yu@intel.com
|
|
||||||
+ *
|
|
||||||
+ * This program is free software; you can redistribute it and/or
|
|
||||||
+ * modify it under the terms of the GNU General Public License version 2
|
|
||||||
+ * as published by the Free Software Foundation; or, when distributed
|
|
||||||
+ * separately from the Linux kernel or incorporated into other
|
|
||||||
+ * software packages, subject to the following license:
|
|
||||||
+ *
|
|
||||||
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
||||||
+ * of this source file (the "Software"), to deal in the Software without
|
|
||||||
+ * restriction, including without limitation the rights to use, copy, modify,
|
|
||||||
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
||||||
+ * and to permit persons to whom the Software is furnished to do so, subject to
|
|
||||||
+ * the following conditions:
|
|
||||||
+ *
|
|
||||||
+ * The above copyright notice and this permission notice shall be included in
|
|
||||||
+ * all copies or substantial portions of the Software.
|
|
||||||
+ *
|
|
||||||
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
||||||
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
||||||
+ * IN THE SOFTWARE.
|
|
||||||
+ */
|
|
||||||
+
|
|
||||||
+#include <xen/acpi.h>
|
|
||||||
+#include <xen/interface/platform.h>
|
|
||||||
+#include <asm/xen/hypercall.h>
|
|
||||||
+#include <asm/xen/hypervisor.h>
|
|
||||||
+
|
|
||||||
+int xen_acpi_notify_hypervisor_state(u8 sleep_state,
|
|
||||||
+ u32 pm1a_cnt, u32 pm1b_cnt)
|
|
||||||
+{
|
|
||||||
+ struct xen_platform_op op = {
|
|
||||||
+ .cmd = XENPF_enter_acpi_sleep,
|
|
||||||
+ .interface_version = XENPF_INTERFACE_VERSION,
|
|
||||||
+ .u = {
|
|
||||||
+ .enter_acpi_sleep = {
|
|
||||||
+ .pm1a_cnt_val = (u16)pm1a_cnt,
|
|
||||||
+ .pm1b_cnt_val = (u16)pm1b_cnt,
|
|
||||||
+ .sleep_state = sleep_state,
|
|
||||||
+ },
|
|
||||||
+ },
|
|
||||||
+ };
|
|
||||||
+
|
|
||||||
+ if ((pm1a_cnt & 0xffff0000) || (pm1b_cnt & 0xffff0000)) {
|
|
||||||
+ WARN(1, "Using more than 16bits of PM1A/B 0x%x/0x%x!"
|
|
||||||
+ "Email xen-devel@lists.xensource.com Thank you.\n", \
|
|
||||||
+ pm1a_cnt, pm1b_cnt);
|
|
||||||
+ return -1;
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ HYPERVISOR_dom0_op(&op);
|
|
||||||
+ return 1;
|
|
||||||
+}
|
|
||||||
diff --git a/include/xen/acpi.h b/include/xen/acpi.h
|
|
||||||
new file mode 100644
|
|
||||||
index 0000000..48a9c01
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/include/xen/acpi.h
|
|
||||||
@@ -0,0 +1,58 @@
|
|
||||||
+/******************************************************************************
|
|
||||||
+ * acpi.h
|
|
||||||
+ * acpi file for domain 0 kernel
|
|
||||||
+ *
|
|
||||||
+ * Copyright (c) 2011 Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
|
|
||||||
+ * Copyright (c) 2011 Yu Ke <ke.yu@intel.com>
|
|
||||||
+ *
|
|
||||||
+ * This program is free software; you can redistribute it and/or
|
|
||||||
+ * modify it under the terms of the GNU General Public License version 2
|
|
||||||
+ * as published by the Free Software Foundation; or, when distributed
|
|
||||||
+ * separately from the Linux kernel or incorporated into other
|
|
||||||
+ * software packages, subject to the following license:
|
|
||||||
+ *
|
|
||||||
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
||||||
+ * of this source file (the "Software"), to deal in the Software without
|
|
||||||
+ * restriction, including without limitation the rights to use, copy, modify,
|
|
||||||
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
|
||||||
+ * and to permit persons to whom the Software is furnished to do so, subject to
|
|
||||||
+ * the following conditions:
|
|
||||||
+ *
|
|
||||||
+ * The above copyright notice and this permission notice shall be included in
|
|
||||||
+ * all copies or substantial portions of the Software.
|
|
||||||
+ *
|
|
||||||
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
||||||
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
||||||
+ * IN THE SOFTWARE.
|
|
||||||
+ */
|
|
||||||
+
|
|
||||||
+#ifndef _XEN_ACPI_H
|
|
||||||
+#define _XEN_ACPI_H
|
|
||||||
+
|
|
||||||
+#include <linux/types.h>
|
|
||||||
+
|
|
||||||
+#ifdef CONFIG_XEN_DOM0
|
|
||||||
+#include <asm/xen/hypervisor.h>
|
|
||||||
+#include <xen/xen.h>
|
|
||||||
+#include <linux/acpi.h>
|
|
||||||
+
|
|
||||||
+int xen_acpi_notify_hypervisor_state(u8 sleep_state,
|
|
||||||
+ u32 pm1a_cnt, u32 pm1b_cnd);
|
|
||||||
+
|
|
||||||
+static inline void xen_acpi_sleep_register(void)
|
|
||||||
+{
|
|
||||||
+ if (xen_initial_domain())
|
|
||||||
+ acpi_os_set_prepare_sleep(
|
|
||||||
+ &xen_acpi_notify_hypervisor_state);
|
|
||||||
+}
|
|
||||||
+#else
|
|
||||||
+static inline void xen_acpi_sleep_register(void)
|
|
||||||
+{
|
|
||||||
+}
|
|
||||||
+#endif
|
|
||||||
+
|
|
||||||
+#endif /* _XEN_ACPI_H */
|
|
||||||
--
|
|
||||||
1.7.6.4
|
|
||||||
|
|
@ -1,81 +0,0 @@
|
|||||||
From 86ceafdf50d67bcb2a5196122797a6972bedd279 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Tang Liang <liang.tang@oracle.com>
|
|
||||||
Date: Thu, 8 Dec 2011 17:36:39 +0800
|
|
||||||
Subject: xen: Utilize the restore_msi_irqs hook.
|
|
||||||
|
|
||||||
to make a hypercall to restore the vectors in the MSI/MSI-X
|
|
||||||
configuration space.
|
|
||||||
|
|
||||||
Signed-off-by: Tang Liang <liang.tang@oracle.com>
|
|
||||||
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
|
|
||||||
---
|
|
||||||
arch/x86/pci/xen.c | 27 +++++++++++++++++++++++++++
|
|
||||||
include/xen/interface/physdev.h | 7 +++++++
|
|
||||||
2 files changed, 34 insertions(+), 0 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
|
|
||||||
index 492ade8..249a5ae 100644
|
|
||||||
--- a/arch/x86/pci/xen.c
|
|
||||||
+++ b/arch/x86/pci/xen.c
|
|
||||||
@@ -324,6 +324,32 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
|
|
||||||
out:
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
+
|
|
||||||
+static void xen_initdom_restore_msi_irqs(struct pci_dev *dev, int irq)
|
|
||||||
+{
|
|
||||||
+ int ret = 0;
|
|
||||||
+
|
|
||||||
+ if (pci_seg_supported) {
|
|
||||||
+ struct physdev_pci_device restore_ext;
|
|
||||||
+
|
|
||||||
+ restore_ext.seg = pci_domain_nr(dev->bus);
|
|
||||||
+ restore_ext.bus = dev->bus->number;
|
|
||||||
+ restore_ext.devfn = dev->devfn;
|
|
||||||
+ ret = HYPERVISOR_physdev_op(PHYSDEVOP_restore_msi_ext,
|
|
||||||
+ &restore_ext);
|
|
||||||
+ if (ret == -ENOSYS)
|
|
||||||
+ pci_seg_supported = false;
|
|
||||||
+ WARN(ret && ret != -ENOSYS, "restore_msi_ext -> %d\n", ret);
|
|
||||||
+ }
|
|
||||||
+ if (!pci_seg_supported) {
|
|
||||||
+ struct physdev_restore_msi restore;
|
|
||||||
+
|
|
||||||
+ restore.bus = dev->bus->number;
|
|
||||||
+ restore.devfn = dev->devfn;
|
|
||||||
+ ret = HYPERVISOR_physdev_op(PHYSDEVOP_restore_msi, &restore);
|
|
||||||
+ WARN(ret && ret != -ENOSYS, "restore_msi -> %d\n", ret);
|
|
||||||
+ }
|
|
||||||
+}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static void xen_teardown_msi_irqs(struct pci_dev *dev)
|
|
||||||
@@ -446,6 +472,7 @@ int __init pci_xen_initial_domain(void)
|
|
||||||
#ifdef CONFIG_PCI_MSI
|
|
||||||
x86_msi.setup_msi_irqs = xen_initdom_setup_msi_irqs;
|
|
||||||
x86_msi.teardown_msi_irq = xen_teardown_msi_irq;
|
|
||||||
+ x86_msi.restore_msi_irqs = xen_initdom_restore_msi_irqs;
|
|
||||||
#endif
|
|
||||||
xen_setup_acpi_sci();
|
|
||||||
__acpi_register_gsi = acpi_register_gsi_xen;
|
|
||||||
diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h
|
|
||||||
index c1080d9..0c28989 100644
|
|
||||||
--- a/include/xen/interface/physdev.h
|
|
||||||
+++ b/include/xen/interface/physdev.h
|
|
||||||
@@ -145,6 +145,13 @@ struct physdev_manage_pci {
|
|
||||||
uint8_t devfn;
|
|
||||||
};
|
|
||||||
|
|
||||||
+#define PHYSDEVOP_restore_msi 19
|
|
||||||
+struct physdev_restore_msi {
|
|
||||||
+ /* IN */
|
|
||||||
+ uint8_t bus;
|
|
||||||
+ uint8_t devfn;
|
|
||||||
+};
|
|
||||||
+
|
|
||||||
#define PHYSDEVOP_manage_pci_add_ext 20
|
|
||||||
struct physdev_manage_pci_ext {
|
|
||||||
/* IN */
|
|
||||||
--
|
|
||||||
1.7.6.4
|
|
||||||
|
|
@ -1,31 +0,0 @@
|
|||||||
From cfb37553f53f993c22aad05c219581dfbc726bcc Mon Sep 17 00:00:00 2001
|
|
||||||
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
|
|
||||||
Date: Mon, 23 Jan 2012 10:53:57 -0500
|
|
||||||
Subject: xen/setup/pm/acpi: Remove the call to boot_option_idle_override.
|
|
||||||
|
|
||||||
We needed that call in the past to force the kernel to use
|
|
||||||
default_idle (which called safe_halt, which called xen_safe_halt).
|
|
||||||
|
|
||||||
But set_pm_idle_to_default() does now that, so there is no need
|
|
||||||
to use this boot option operand.
|
|
||||||
|
|
||||||
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
|
|
||||||
---
|
|
||||||
arch/x86/xen/setup.c | 1 -
|
|
||||||
1 files changed, 0 insertions(+), 1 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
|
|
||||||
index e03c636..1236623 100644
|
|
||||||
--- a/arch/x86/xen/setup.c
|
|
||||||
+++ b/arch/x86/xen/setup.c
|
|
||||||
@@ -420,7 +420,6 @@ void __init xen_arch_setup(void)
|
|
||||||
boot_cpu_data.hlt_works_ok = 1;
|
|
||||||
#endif
|
|
||||||
disable_cpuidle();
|
|
||||||
- boot_option_idle_override = IDLE_HALT;
|
|
||||||
WARN_ON(set_pm_idle_to_default());
|
|
||||||
fiddle_vdso();
|
|
||||||
}
|
|
||||||
--
|
|
||||||
1.7.6.4
|
|
||||||
|
|
@ -1,216 +0,0 @@
|
|||||||
From d281ee8c6d58a7f5d1f4241238daa315fb959e31 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
|
|
||||||
Date: Mon, 13 Feb 2012 22:26:32 -0500
|
|
||||||
Subject: xen/enlighten: Expose MWAIT and MWAIT_LEAF if hypervisor OKs it.
|
|
||||||
|
|
||||||
For the hypervisor to take advantage of the MWAIT support it needs
|
|
||||||
to extract from the ACPI _CST the register address. But the
|
|
||||||
hypervisor does not have the support to parse DSDT so it relies on
|
|
||||||
the initial domain (dom0) to parse the ACPI Power Management information
|
|
||||||
and push it up to the hypervisor. The pushing of the data is done
|
|
||||||
by the processor_harveset_xen module which parses the information that
|
|
||||||
the ACPI parser has graciously exposed in 'struct acpi_processor'.
|
|
||||||
|
|
||||||
For the ACPI parser to also expose the Cx states for MWAIT, we need
|
|
||||||
to expose the MWAIT capability (leaf 1). Furthermore we also need to
|
|
||||||
expose the MWAIT_LEAF capability (leaf 5) for cstate.c to properly
|
|
||||||
function.
|
|
||||||
|
|
||||||
The hypervisor could expose these flags when it traps the XEN_EMULATE_PREFIX
|
|
||||||
operations, but it can't do it since it needs to be backwards compatible.
|
|
||||||
Instead we choose to use the native CPUID to figure out if the MWAIT
|
|
||||||
capability exists and use the XEN_SET_PDC query hypercall to figure out
|
|
||||||
if the hypervisor wants us to expose the MWAIT_LEAF capability or not.
|
|
||||||
|
|
||||||
Note: The XEN_SET_PDC query was implemented in c/s 23783:
|
|
||||||
"ACPI: add _PDC input override mechanism".
|
|
||||||
|
|
||||||
With this in place, instead of
|
|
||||||
C3 ACPI IOPORT 415
|
|
||||||
we get now
|
|
||||||
C3:ACPI FFH INTEL MWAIT 0x20
|
|
||||||
|
|
||||||
Note: The cpu_idle which would be calling the mwait variants for idling
|
|
||||||
never gets set b/c we set the default pm_idle to be the hypercall variant.
|
|
||||||
|
|
||||||
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
|
|
||||||
---
|
|
||||||
arch/x86/xen/enlighten.c | 92 +++++++++++++++++++++++++++++++++++++-
|
|
||||||
include/xen/interface/platform.h | 4 +-
|
|
||||||
2 files changed, 94 insertions(+), 2 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
|
|
||||||
index 12eb07b..4c82936 100644
|
|
||||||
--- a/arch/x86/xen/enlighten.c
|
|
||||||
+++ b/arch/x86/xen/enlighten.c
|
|
||||||
@@ -62,6 +62,14 @@
|
|
||||||
#include <asm/stackprotector.h>
|
|
||||||
#include <asm/hypervisor.h>
|
|
||||||
#include <asm/pci_x86.h>
|
|
||||||
+#include <asm/mwait.h>
|
|
||||||
+
|
|
||||||
+#ifdef CONFIG_ACPI
|
|
||||||
+#include <asm/acpi.h>
|
|
||||||
+#include <acpi/pdc_intel.h>
|
|
||||||
+#include <acpi/processor.h>
|
|
||||||
+#include <xen/interface/platform.h>
|
|
||||||
+#endif
|
|
||||||
|
|
||||||
#include "xen-ops.h"
|
|
||||||
#include "mmu.h"
|
|
||||||
@@ -200,13 +208,17 @@ static void __init xen_banner(void)
|
|
||||||
static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0;
|
|
||||||
static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0;
|
|
||||||
|
|
||||||
+static __read_mostly unsigned int cpuid_leaf1_ecx_set_mask;
|
|
||||||
+static __read_mostly unsigned int cpuid_leaf5_ecx_val;
|
|
||||||
+static __read_mostly unsigned int cpuid_leaf5_edx_val;
|
|
||||||
+
|
|
||||||
static void xen_cpuid(unsigned int *ax, unsigned int *bx,
|
|
||||||
unsigned int *cx, unsigned int *dx)
|
|
||||||
{
|
|
||||||
unsigned maskebx = ~0;
|
|
||||||
unsigned maskecx = ~0;
|
|
||||||
unsigned maskedx = ~0;
|
|
||||||
-
|
|
||||||
+ unsigned setecx = 0;
|
|
||||||
/*
|
|
||||||
* Mask out inconvenient features, to try and disable as many
|
|
||||||
* unsupported kernel subsystems as possible.
|
|
||||||
@@ -214,9 +226,18 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
|
|
||||||
switch (*ax) {
|
|
||||||
case 1:
|
|
||||||
maskecx = cpuid_leaf1_ecx_mask;
|
|
||||||
+ setecx = cpuid_leaf1_ecx_set_mask;
|
|
||||||
maskedx = cpuid_leaf1_edx_mask;
|
|
||||||
break;
|
|
||||||
|
|
||||||
+ case CPUID_MWAIT_LEAF:
|
|
||||||
+ /* Synthesize the values.. */
|
|
||||||
+ *ax = 0;
|
|
||||||
+ *bx = 0;
|
|
||||||
+ *cx = cpuid_leaf5_ecx_val;
|
|
||||||
+ *dx = cpuid_leaf5_edx_val;
|
|
||||||
+ return;
|
|
||||||
+
|
|
||||||
case CPUID_THERM_POWER_LEAF:
|
|
||||||
/* Disabling APERFMPERF for kernel usage */
|
|
||||||
maskecx = ~(1 << APERFMPERF_PRESENT);
|
|
||||||
@@ -232,9 +253,75 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
|
|
||||||
|
|
||||||
*bx &= maskebx;
|
|
||||||
*cx &= maskecx;
|
|
||||||
+ *cx |= setecx;
|
|
||||||
*dx &= maskedx;
|
|
||||||
+
|
|
||||||
}
|
|
||||||
|
|
||||||
+static bool __init xen_check_mwait(void)
|
|
||||||
+{
|
|
||||||
+#if CONFIG_ACPI
|
|
||||||
+ struct xen_platform_op op = {
|
|
||||||
+ .cmd = XENPF_set_processor_pminfo,
|
|
||||||
+ .u.set_pminfo.id = -1,
|
|
||||||
+ .u.set_pminfo.type = XEN_PM_PDC,
|
|
||||||
+ };
|
|
||||||
+ uint32_t buf[3];
|
|
||||||
+ unsigned int ax, bx, cx, dx;
|
|
||||||
+ unsigned int mwait_mask;
|
|
||||||
+
|
|
||||||
+ /* We need to determine whether it is OK to expose the MWAIT
|
|
||||||
+ * capability to the kernel to harvest deeper than C3 states from ACPI
|
|
||||||
+ * _CST using the processor_harvest_xen.c module. For this to work, we
|
|
||||||
+ * need to gather the MWAIT_LEAF values (which the cstate.c code
|
|
||||||
+ * checks against). The hypervisor won't expose the MWAIT flag because
|
|
||||||
+ * it would break backwards compatibility; so we will find out directly
|
|
||||||
+ * from the hardware and hypercall.
|
|
||||||
+ */
|
|
||||||
+ if (!xen_initial_domain())
|
|
||||||
+ return false;
|
|
||||||
+
|
|
||||||
+ ax = 1;
|
|
||||||
+ cx = 0;
|
|
||||||
+
|
|
||||||
+ native_cpuid(&ax, &bx, &cx, &dx);
|
|
||||||
+
|
|
||||||
+ mwait_mask = (1 << (X86_FEATURE_EST % 32)) |
|
|
||||||
+ (1 << (X86_FEATURE_MWAIT % 32));
|
|
||||||
+
|
|
||||||
+ if ((cx & mwait_mask) != mwait_mask)
|
|
||||||
+ return false;
|
|
||||||
+
|
|
||||||
+ /* We need to emulate the MWAIT_LEAF and for that we need both
|
|
||||||
+ * ecx and edx. The hypercall provides only partial information.
|
|
||||||
+ */
|
|
||||||
+
|
|
||||||
+ ax = CPUID_MWAIT_LEAF;
|
|
||||||
+ bx = 0;
|
|
||||||
+ cx = 0;
|
|
||||||
+ dx = 0;
|
|
||||||
+
|
|
||||||
+ native_cpuid(&ax, &bx, &cx, &dx);
|
|
||||||
+
|
|
||||||
+ /* Ask the Hypervisor whether to clear ACPI_PDC_C_C2C3_FFH. If so,
|
|
||||||
+ * don't expose MWAIT_LEAF and let ACPI pick the IOPORT version of C3.
|
|
||||||
+ */
|
|
||||||
+ buf[0] = ACPI_PDC_REVISION_ID;
|
|
||||||
+ buf[1] = 1;
|
|
||||||
+ buf[2] = (ACPI_PDC_C_CAPABILITY_SMP | ACPI_PDC_EST_CAPABILITY_SWSMP);
|
|
||||||
+
|
|
||||||
+ set_xen_guest_handle(op.u.set_pminfo.pdc, buf);
|
|
||||||
+
|
|
||||||
+ if ((HYPERVISOR_dom0_op(&op) == 0) &&
|
|
||||||
+ (buf[2] & (ACPI_PDC_C_C1_FFH | ACPI_PDC_C_C2C3_FFH))) {
|
|
||||||
+ cpuid_leaf5_ecx_val = cx;
|
|
||||||
+ cpuid_leaf5_edx_val = dx;
|
|
||||||
+ }
|
|
||||||
+ return true;
|
|
||||||
+#else
|
|
||||||
+ return false;
|
|
||||||
+#endif
|
|
||||||
+}
|
|
||||||
static void __init xen_init_cpuid_mask(void)
|
|
||||||
{
|
|
||||||
unsigned int ax, bx, cx, dx;
|
|
||||||
@@ -261,6 +348,9 @@ static void __init xen_init_cpuid_mask(void)
|
|
||||||
/* Xen will set CR4.OSXSAVE if supported and not disabled by force */
|
|
||||||
if ((cx & xsave_mask) != xsave_mask)
|
|
||||||
cpuid_leaf1_ecx_mask &= ~xsave_mask; /* disable XSAVE & OSXSAVE */
|
|
||||||
+
|
|
||||||
+ if (xen_check_mwait())
|
|
||||||
+ cpuid_leaf1_ecx_set_mask = (1 << (X86_FEATURE_MWAIT % 32));
|
|
||||||
}
|
|
||||||
|
|
||||||
static void xen_set_debugreg(int reg, unsigned long val)
|
|
||||||
diff --git a/include/xen/interface/platform.h b/include/xen/interface/platform.h
|
|
||||||
index c168468..6220b98 100644
|
|
||||||
--- a/include/xen/interface/platform.h
|
|
||||||
+++ b/include/xen/interface/platform.h
|
|
||||||
@@ -200,7 +200,7 @@ DEFINE_GUEST_HANDLE_STRUCT(xenpf_getidletime_t);
|
|
||||||
#define XEN_PM_CX 0
|
|
||||||
#define XEN_PM_PX 1
|
|
||||||
#define XEN_PM_TX 2
|
|
||||||
-
|
|
||||||
+#define XEN_PM_PDC 3
|
|
||||||
/* Px sub info type */
|
|
||||||
#define XEN_PX_PCT 1
|
|
||||||
#define XEN_PX_PSS 2
|
|
||||||
@@ -286,6 +286,7 @@ struct xen_processor_performance {
|
|
||||||
};
|
|
||||||
DEFINE_GUEST_HANDLE_STRUCT(xen_processor_performance);
|
|
||||||
|
|
||||||
+DEFINE_GUEST_HANDLE(uint32_t);
|
|
||||||
struct xenpf_set_processor_pminfo {
|
|
||||||
/* IN variables */
|
|
||||||
uint32_t id; /* ACPI CPU ID */
|
|
||||||
@@ -293,6 +294,7 @@ struct xenpf_set_processor_pminfo {
|
|
||||||
union {
|
|
||||||
struct xen_processor_power power;/* Cx: _CST/_CSD */
|
|
||||||
struct xen_processor_performance perf; /* Px: _PPC/_PCT/_PSS/_PSD */
|
|
||||||
+ GUEST_HANDLE(uint32_t) pdc;
|
|
||||||
};
|
|
||||||
};
|
|
||||||
DEFINE_GUEST_HANDLE_STRUCT(xenpf_set_processor_pminfo);
|
|
||||||
--
|
|
||||||
1.7.6.4
|
|
||||||
|
|
@ -1,529 +0,0 @@
|
|||||||
From 20e7a07fa0f8a0dbe30a0f732686d78849d29d96 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
|
|
||||||
Date: Fri, 3 Feb 2012 16:03:20 -0500
|
|
||||||
Subject: [CPUFREQ] xen: governor for Xen hypervisor frequency scaling.
|
|
||||||
MIME-Version: 1.0
|
|
||||||
Content-Type: text/plain; charset=UTF-8
|
|
||||||
Content-Transfer-Encoding: 8bit
|
|
||||||
|
|
||||||
This CPU freq governor leaves the frequency decision to the Xen hypervisor.
|
|
||||||
|
|
||||||
To do that the driver parses the Power Management data and uploads said
|
|
||||||
information to the Xen hypervisor. Then the Xen hypervisor can select the
|
|
||||||
proper Cx and Pxx states for the initial domain and all other domains.
|
|
||||||
|
|
||||||
To upload the information, this CPU frequency driver reads Power Management (PM)
|
|
||||||
(_Pxx and _Cx) which are populated in the 'struct acpi_processor' structure.
|
|
||||||
It simply reads the contents of that structure and pass it up the Xen hypervisor.
|
|
||||||
For that to work we depend on the appropriate CPU frequency scaling driver
|
|
||||||
to do the heavy-lifting - so that the contents is correct.
|
|
||||||
|
|
||||||
The CPU frequency governor it has been loaded also sets up a timer
|
|
||||||
to check if the ACPI IDs count is different from the APIC ID count - which
|
|
||||||
can happen if the user choose to use dom0_max_vcpu argument. In such a case
|
|
||||||
a backup of the PM structure is used and uploaded to the hypervisor.
|
|
||||||
|
|
||||||
[v1-v2: Initial RFC implementations that were posted]
|
|
||||||
[v3: Changed the name to passthru suggested by Pasi Kärkkäinen <pasik@iki.fi>]
|
|
||||||
[v4: Added vCPU != pCPU support - aka dom0_max_vcpus support]
|
|
||||||
[v5: Cleaned up the driver, fix bug under Athlon XP]
|
|
||||||
[v6: Changed the driver to a CPU frequency governor]
|
|
||||||
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
|
|
||||||
---
|
|
||||||
drivers/xen/Kconfig | 15 ++
|
|
||||||
drivers/xen/Makefile | 2 +-
|
|
||||||
drivers/xen/cpufreq_xen.c | 445 +++++++++++++++++++++++++++++++++++++++++++++
|
|
||||||
3 files changed, 461 insertions(+), 1 deletions(-)
|
|
||||||
create mode 100644 drivers/xen/cpufreq_xen.c
|
|
||||||
|
|
||||||
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
|
|
||||||
index a1ced52..28ba371 100644
|
|
||||||
--- a/drivers/xen/Kconfig
|
|
||||||
+++ b/drivers/xen/Kconfig
|
|
||||||
@@ -178,4 +178,19 @@ config XEN_PRIVCMD
|
|
||||||
depends on XEN
|
|
||||||
default m
|
|
||||||
|
|
||||||
+config CPU_FREQ_GOV_XEN
|
|
||||||
+ tristate "'xen' governor for hypervisor scaling"
|
|
||||||
+ depends on XEN && X86 && ACPI_PROCESSOR && CPU_FREQ
|
|
||||||
+ default m
|
|
||||||
+ help
|
|
||||||
+ This cpufreq governor leaves the frequency decision to the Xen hypervisor.
|
|
||||||
+
|
|
||||||
+ To do that the driver parses the Power Management data and uploads said
|
|
||||||
+ information to the Xen hypervisor. Then the Xen hypervisor can select the
|
|
||||||
+ proper Cx and Pxx states.
|
|
||||||
+
|
|
||||||
+ To compile this driver as a module, choose M here: the
|
|
||||||
+ module will be called cpufreq_xen. If you do not know what to choose,
|
|
||||||
+ select M here.
|
|
||||||
+
|
|
||||||
endmenu
|
|
||||||
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
|
|
||||||
index aa31337..5802220 100644
|
|
||||||
--- a/drivers/xen/Makefile
|
|
||||||
+++ b/drivers/xen/Makefile
|
|
||||||
@@ -20,7 +20,7 @@ obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o
|
|
||||||
obj-$(CONFIG_XEN_DOM0) += pci.o acpi.o
|
|
||||||
obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/
|
|
||||||
obj-$(CONFIG_XEN_PRIVCMD) += xen-privcmd.o
|
|
||||||
-
|
|
||||||
+obj-$(CONFIG_CPU_FREQ_GOV_XEN) += cpufreq_xen.o
|
|
||||||
xen-evtchn-y := evtchn.o
|
|
||||||
xen-gntdev-y := gntdev.o
|
|
||||||
xen-gntalloc-y := gntalloc.o
|
|
||||||
diff --git a/drivers/xen/cpufreq_xen.c b/drivers/xen/cpufreq_xen.c
|
|
||||||
new file mode 100644
|
|
||||||
index 0000000..1b709bf
|
|
||||||
--- /dev/null
|
|
||||||
+++ b/drivers/xen/cpufreq_xen.c
|
|
||||||
@@ -0,0 +1,445 @@
|
|
||||||
+/*
|
|
||||||
+ * Copyright 2012 by Oracle Inc
|
|
||||||
+ * Author: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
|
|
||||||
+ *
|
|
||||||
+ * This code borrows ideas from https://lkml.org/lkml/2011/11/30/249
|
|
||||||
+ * so many thanks go to Kevin Tian <kevin.tian@intel.com>
|
|
||||||
+ * and Yu Ke <ke.yu@intel.com>.
|
|
||||||
+ *
|
|
||||||
+ * This program is free software; you can redistribute it and/or modify it
|
|
||||||
+ * under the terms and conditions of the GNU General Public License,
|
|
||||||
+ * version 2, as published by the Free Software Foundation.
|
|
||||||
+ *
|
|
||||||
+ * This program is distributed in the hope it will be useful, but WITHOUT
|
|
||||||
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
||||||
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
|
|
||||||
+ * more details.
|
|
||||||
+ *
|
|
||||||
+ */
|
|
||||||
+
|
|
||||||
+#include <linux/cpumask.h>
|
|
||||||
+#include <linux/cpufreq.h>
|
|
||||||
+#include <linux/freezer.h>
|
|
||||||
+#include <linux/kernel.h>
|
|
||||||
+#include <linux/kthread.h>
|
|
||||||
+#include <linux/init.h>
|
|
||||||
+#include <linux/module.h>
|
|
||||||
+#include <linux/types.h>
|
|
||||||
+#include <acpi/acpi_bus.h>
|
|
||||||
+#include <acpi/acpi_drivers.h>
|
|
||||||
+#include <acpi/processor.h>
|
|
||||||
+
|
|
||||||
+#include <xen/interface/platform.h>
|
|
||||||
+#include <asm/xen/hypercall.h>
|
|
||||||
+
|
|
||||||
+#define DRV_NAME "cpufreq-xen"
|
|
||||||
+
|
|
||||||
+static int no_hypercall;
|
|
||||||
+MODULE_PARM_DESC(off, "Inhibit the hypercall.");
|
|
||||||
+module_param_named(off, no_hypercall, int, 0400);
|
|
||||||
+
|
|
||||||
+/*
|
|
||||||
+ * Mutex to protect the acpi_ids_done.
|
|
||||||
+ */
|
|
||||||
+static DEFINE_MUTEX(acpi_ids_mutex);
|
|
||||||
+/*
|
|
||||||
+ * Don't think convert this to cpumask_var_t or use cpumask_bit - as those
|
|
||||||
+ * shrink to nr_cpu_bits (which is dependent on possible_cpu), which can be
|
|
||||||
+ * less than what we want to put in.
|
|
||||||
+ */
|
|
||||||
+#define NR_ACPI_CPUS NR_CPUS
|
|
||||||
+#define MAX_ACPI_BITS (BITS_TO_LONGS(NR_ACPI_CPUS))
|
|
||||||
+static unsigned long *acpi_ids_done;
|
|
||||||
+/*
|
|
||||||
+ * Again, don't convert to cpumask - as we are reading the raw ACPI CPU ids
|
|
||||||
+ * which can go beyond what we presently see.
|
|
||||||
+ */
|
|
||||||
+static unsigned long *acpi_id_present;
|
|
||||||
+
|
|
||||||
+/*
|
|
||||||
+ * Pertient data for the timer to be launched to check if the # of
|
|
||||||
+ * ACPI CPU ids is different from the one we have processed.
|
|
||||||
+ */
|
|
||||||
+#define DELAY_TIMER msecs_to_jiffies(5000 /* 5 sec */)
|
|
||||||
+static struct acpi_processor *pr_backup;
|
|
||||||
+static struct delayed_work work;
|
|
||||||
+
|
|
||||||
+static int push_cxx_to_hypervisor(struct acpi_processor *_pr)
|
|
||||||
+{
|
|
||||||
+ struct xen_platform_op op = {
|
|
||||||
+ .cmd = XENPF_set_processor_pminfo,
|
|
||||||
+ .interface_version = XENPF_INTERFACE_VERSION,
|
|
||||||
+ .u.set_pminfo.id = _pr->acpi_id,
|
|
||||||
+ .u.set_pminfo.type = XEN_PM_CX,
|
|
||||||
+ };
|
|
||||||
+ struct xen_processor_cx *dst_cx, *dst_cx_states = NULL;
|
|
||||||
+ struct acpi_processor_cx *cx;
|
|
||||||
+ int i, ok, ret = 0;
|
|
||||||
+
|
|
||||||
+ dst_cx_states = kcalloc(_pr->power.count,
|
|
||||||
+ sizeof(struct xen_processor_cx), GFP_KERNEL);
|
|
||||||
+ if (!dst_cx_states)
|
|
||||||
+ return -ENOMEM;
|
|
||||||
+
|
|
||||||
+ for (ok = 0, i = 1; i <= _pr->power.count; i++) {
|
|
||||||
+ cx = &_pr->power.states[i];
|
|
||||||
+ if (!cx->valid)
|
|
||||||
+ continue;
|
|
||||||
+
|
|
||||||
+ dst_cx = &(dst_cx_states[ok++]);
|
|
||||||
+
|
|
||||||
+ dst_cx->reg.space_id = ACPI_ADR_SPACE_SYSTEM_IO;
|
|
||||||
+ if (cx->entry_method == ACPI_CSTATE_SYSTEMIO) {
|
|
||||||
+ dst_cx->reg.bit_width = 8;
|
|
||||||
+ dst_cx->reg.bit_offset = 0;
|
|
||||||
+ dst_cx->reg.access_size = 1;
|
|
||||||
+ } else {
|
|
||||||
+ dst_cx->reg.space_id = ACPI_ADR_SPACE_FIXED_HARDWARE;
|
|
||||||
+ if (cx->entry_method == ACPI_CSTATE_FFH) {
|
|
||||||
+ /* NATIVE_CSTATE_BEYOND_HALT */
|
|
||||||
+ dst_cx->reg.bit_offset = 2;
|
|
||||||
+ dst_cx->reg.bit_width = 1; /* VENDOR_INTEL */
|
|
||||||
+ }
|
|
||||||
+ dst_cx->reg.access_size = 0;
|
|
||||||
+ }
|
|
||||||
+ dst_cx->reg.address = cx->address;
|
|
||||||
+
|
|
||||||
+ dst_cx->type = cx->type;
|
|
||||||
+ dst_cx->latency = cx->latency;
|
|
||||||
+ dst_cx->power = cx->power;
|
|
||||||
+
|
|
||||||
+ dst_cx->dpcnt = 0;
|
|
||||||
+ set_xen_guest_handle(dst_cx->dp, NULL);
|
|
||||||
+#ifdef DEBUG
|
|
||||||
+ pr_debug(DRV_NAME ": CX: ID:%d [C%d:%s] entry:%d\n",
|
|
||||||
+ _pr->acpi_id, cx->type, cx->desc, cx->entry_method);
|
|
||||||
+#endif
|
|
||||||
+ }
|
|
||||||
+ if (!ok) {
|
|
||||||
+ pr_err(DRV_NAME ": No _Cx for CPU %d\n", _pr->acpi_id);
|
|
||||||
+ kfree(dst_cx_states);
|
|
||||||
+ return -EINVAL;
|
|
||||||
+ }
|
|
||||||
+ op.u.set_pminfo.power.count = ok;
|
|
||||||
+ op.u.set_pminfo.power.flags.bm_control = _pr->flags.bm_control;
|
|
||||||
+ op.u.set_pminfo.power.flags.bm_check = _pr->flags.bm_check;
|
|
||||||
+ op.u.set_pminfo.power.flags.has_cst = _pr->flags.has_cst;
|
|
||||||
+ op.u.set_pminfo.power.flags.power_setup_done =
|
|
||||||
+ _pr->flags.power_setup_done;
|
|
||||||
+
|
|
||||||
+ set_xen_guest_handle(op.u.set_pminfo.power.states, dst_cx_states);
|
|
||||||
+
|
|
||||||
+ if (!no_hypercall)
|
|
||||||
+ ret = HYPERVISOR_dom0_op(&op);
|
|
||||||
+
|
|
||||||
+ if (ret)
|
|
||||||
+ pr_err(DRV_NAME "(CX): Hypervisor error (%d) for ACPI ID: %d\n",
|
|
||||||
+ ret, _pr->acpi_id);
|
|
||||||
+
|
|
||||||
+ kfree(dst_cx_states);
|
|
||||||
+
|
|
||||||
+ return ret;
|
|
||||||
+}
|
|
||||||
+static struct xen_processor_px *
|
|
||||||
+xen_copy_pss_data(struct acpi_processor *_pr,
|
|
||||||
+ struct xen_processor_performance *dst_perf)
|
|
||||||
+{
|
|
||||||
+ struct xen_processor_px *dst_states = NULL;
|
|
||||||
+ int i;
|
|
||||||
+
|
|
||||||
+ BUILD_BUG_ON(sizeof(struct xen_processor_px) !=
|
|
||||||
+ sizeof(struct acpi_processor_px));
|
|
||||||
+
|
|
||||||
+ dst_states = kcalloc(_pr->performance->state_count,
|
|
||||||
+ sizeof(struct xen_processor_px), GFP_KERNEL);
|
|
||||||
+ if (!dst_states)
|
|
||||||
+ return ERR_PTR(-ENOMEM);
|
|
||||||
+
|
|
||||||
+ dst_perf->state_count = _pr->performance->state_count;
|
|
||||||
+ for (i = 0; i < _pr->performance->state_count; i++) {
|
|
||||||
+ /* Fortunatly for us, they are both the same size */
|
|
||||||
+ memcpy(&(dst_states[i]), &(_pr->performance->states[i]),
|
|
||||||
+ sizeof(struct acpi_processor_px));
|
|
||||||
+ }
|
|
||||||
+ return dst_states;
|
|
||||||
+}
|
|
||||||
+static int xen_copy_psd_data(struct acpi_processor *_pr,
|
|
||||||
+ struct xen_processor_performance *dst)
|
|
||||||
+{
|
|
||||||
+ BUILD_BUG_ON(sizeof(struct xen_psd_package) !=
|
|
||||||
+ sizeof(struct acpi_psd_package));
|
|
||||||
+
|
|
||||||
+ if (_pr->performance->shared_type != CPUFREQ_SHARED_TYPE_NONE) {
|
|
||||||
+ dst->shared_type = _pr->performance->shared_type;
|
|
||||||
+
|
|
||||||
+ memcpy(&(dst->domain_info), &(_pr->performance->domain_info),
|
|
||||||
+ sizeof(struct acpi_psd_package));
|
|
||||||
+ } else {
|
|
||||||
+ if ((&cpu_data(0))->x86_vendor != X86_VENDOR_AMD)
|
|
||||||
+ return -EINVAL;
|
|
||||||
+
|
|
||||||
+ /* On AMD, the powernow-k8 is loaded before acpi_cpufreq
|
|
||||||
+ * meaning that acpi_processor_preregister_performance never
|
|
||||||
+ * gets called which would parse the _PSD. The only relevant
|
|
||||||
+ * information from _PSD we need is whether it is HW_ALL or any
|
|
||||||
+ * other type. AMD K8 >= are SW_ALL or SW_ANY, AMD K7<= HW_ANY.
|
|
||||||
+ * This driver checks at the start whether it is K8 so it
|
|
||||||
+ * if we get here it can only be K8.
|
|
||||||
+ */
|
|
||||||
+ dst->shared_type = CPUFREQ_SHARED_TYPE_ANY;
|
|
||||||
+ dst->domain_info.coord_type = DOMAIN_COORD_TYPE_SW_ANY;
|
|
||||||
+ dst->domain_info.num_processors = num_online_cpus();
|
|
||||||
+ }
|
|
||||||
+ return 0;
|
|
||||||
+}
|
|
||||||
+static int xen_copy_pct_data(struct acpi_pct_register *pct,
|
|
||||||
+ struct xen_pct_register *dst_pct)
|
|
||||||
+{
|
|
||||||
+ /* It would be nice if you could just do 'memcpy(pct, dst_pct') but
|
|
||||||
+ * sadly the Xen structure did not have the proper padding so the
|
|
||||||
+ * descriptor field takes two (dst_pct) bytes instead of one (pct).
|
|
||||||
+ */
|
|
||||||
+ dst_pct->descriptor = pct->descriptor;
|
|
||||||
+ dst_pct->length = pct->length;
|
|
||||||
+ dst_pct->space_id = pct->space_id;
|
|
||||||
+ dst_pct->bit_width = pct->bit_width;
|
|
||||||
+ dst_pct->bit_offset = pct->bit_offset;
|
|
||||||
+ dst_pct->reserved = pct->reserved;
|
|
||||||
+ dst_pct->address = pct->address;
|
|
||||||
+ return 0;
|
|
||||||
+}
|
|
||||||
+static int push_pxx_to_hypervisor(struct acpi_processor *_pr)
|
|
||||||
+{
|
|
||||||
+ int ret = 0;
|
|
||||||
+ struct xen_platform_op op = {
|
|
||||||
+ .cmd = XENPF_set_processor_pminfo,
|
|
||||||
+ .interface_version = XENPF_INTERFACE_VERSION,
|
|
||||||
+ .u.set_pminfo.id = _pr->acpi_id,
|
|
||||||
+ .u.set_pminfo.type = XEN_PM_PX,
|
|
||||||
+ };
|
|
||||||
+ struct xen_processor_performance *dst_perf;
|
|
||||||
+ struct xen_processor_px *dst_states = NULL;
|
|
||||||
+
|
|
||||||
+ dst_perf = &op.u.set_pminfo.perf;
|
|
||||||
+
|
|
||||||
+ dst_perf->platform_limit = _pr->performance_platform_limit;
|
|
||||||
+ dst_perf->flags |= XEN_PX_PPC;
|
|
||||||
+ xen_copy_pct_data(&(_pr->performance->control_register),
|
|
||||||
+ &dst_perf->control_register);
|
|
||||||
+ xen_copy_pct_data(&(_pr->performance->status_register),
|
|
||||||
+ &dst_perf->status_register);
|
|
||||||
+ dst_perf->flags |= XEN_PX_PCT;
|
|
||||||
+ dst_states = xen_copy_pss_data(_pr, dst_perf);
|
|
||||||
+ if (!IS_ERR_OR_NULL(dst_states)) {
|
|
||||||
+ set_xen_guest_handle(dst_perf->states, dst_states);
|
|
||||||
+ dst_perf->flags |= XEN_PX_PSS;
|
|
||||||
+ }
|
|
||||||
+ if (!xen_copy_psd_data(_pr, dst_perf))
|
|
||||||
+ dst_perf->flags |= XEN_PX_PSD;
|
|
||||||
+
|
|
||||||
+ if (!no_hypercall)
|
|
||||||
+ ret = HYPERVISOR_dom0_op(&op);
|
|
||||||
+
|
|
||||||
+ if (ret)
|
|
||||||
+ pr_err(DRV_NAME "(_PXX): Hypervisor error (%d) for ACPI ID %d\n",
|
|
||||||
+ ret, _pr->acpi_id);
|
|
||||||
+
|
|
||||||
+ if (!IS_ERR_OR_NULL(dst_states))
|
|
||||||
+ kfree(dst_states);
|
|
||||||
+
|
|
||||||
+ return ret;
|
|
||||||
+}
|
|
||||||
+static int upload_pm_data(struct acpi_processor *_pr)
|
|
||||||
+{
|
|
||||||
+ int err = 0;
|
|
||||||
+
|
|
||||||
+ if (__test_and_set_bit(_pr->acpi_id, acpi_ids_done))
|
|
||||||
+ return -EBUSY;
|
|
||||||
+
|
|
||||||
+ if (_pr->flags.power)
|
|
||||||
+ err = push_cxx_to_hypervisor(_pr);
|
|
||||||
+
|
|
||||||
+ if (_pr->performance && _pr->performance->states)
|
|
||||||
+ err |= push_pxx_to_hypervisor(_pr);
|
|
||||||
+
|
|
||||||
+ return err;
|
|
||||||
+}
|
|
||||||
+static acpi_status
|
|
||||||
+read_acpi_id(acpi_handle handle, u32 lvl, void *context, void **rv)
|
|
||||||
+{
|
|
||||||
+ u32 acpi_id;
|
|
||||||
+ acpi_status status;
|
|
||||||
+ acpi_object_type acpi_type;
|
|
||||||
+ unsigned long long tmp;
|
|
||||||
+ union acpi_object object = { 0 };
|
|
||||||
+ struct acpi_buffer buffer = { sizeof(union acpi_object), &object };
|
|
||||||
+
|
|
||||||
+ status = acpi_get_type(handle, &acpi_type);
|
|
||||||
+ if (ACPI_FAILURE(status))
|
|
||||||
+ return AE_OK;
|
|
||||||
+
|
|
||||||
+ switch (acpi_type) {
|
|
||||||
+ case ACPI_TYPE_PROCESSOR:
|
|
||||||
+ status = acpi_evaluate_object(handle, NULL, NULL, &buffer);
|
|
||||||
+ if (ACPI_FAILURE(status))
|
|
||||||
+ return AE_OK;
|
|
||||||
+ acpi_id = object.processor.proc_id;
|
|
||||||
+ break;
|
|
||||||
+ case ACPI_TYPE_DEVICE:
|
|
||||||
+ status = acpi_evaluate_integer(handle, "_UID", NULL, &tmp);
|
|
||||||
+ if (ACPI_FAILURE(status))
|
|
||||||
+ return AE_OK;
|
|
||||||
+ acpi_id = tmp;
|
|
||||||
+ break;
|
|
||||||
+ default:
|
|
||||||
+ return AE_OK;
|
|
||||||
+ }
|
|
||||||
+ if (acpi_id > NR_ACPI_CPUS) {
|
|
||||||
+ WARN_ONCE(1, "There are %d ACPI processors, but kernel can only do %d!\n",
|
|
||||||
+ acpi_id, NR_ACPI_CPUS);
|
|
||||||
+ return AE_OK;
|
|
||||||
+ }
|
|
||||||
+ __set_bit(acpi_id, acpi_id_present);
|
|
||||||
+
|
|
||||||
+ return AE_OK;
|
|
||||||
+}
|
|
||||||
+static unsigned int more_acpi_ids(void)
|
|
||||||
+{
|
|
||||||
+ unsigned int n = 0;
|
|
||||||
+
|
|
||||||
+ acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
|
|
||||||
+ ACPI_UINT32_MAX,
|
|
||||||
+ read_acpi_id, NULL, NULL, NULL);
|
|
||||||
+ acpi_get_devices("ACPI0007", read_acpi_id, NULL, NULL);
|
|
||||||
+
|
|
||||||
+ mutex_lock(&acpi_ids_mutex);
|
|
||||||
+ if (!bitmap_equal(acpi_id_present, acpi_ids_done, MAX_ACPI_BITS))
|
|
||||||
+ n = bitmap_weight(acpi_id_present, MAX_ACPI_BITS);
|
|
||||||
+ mutex_unlock(&acpi_ids_mutex);
|
|
||||||
+
|
|
||||||
+ return n;
|
|
||||||
+}
|
|
||||||
+static void do_check_acpi_id_timer(struct work_struct *_work)
|
|
||||||
+{
|
|
||||||
+ /* All online CPUs have been processed at this stage. Now verify
|
|
||||||
+ * whether in fact "online CPUs" == physical CPUs.
|
|
||||||
+ */
|
|
||||||
+ acpi_id_present = kcalloc(MAX_ACPI_BITS, sizeof(unsigned long), GFP_KERNEL);
|
|
||||||
+ if (!acpi_id_present)
|
|
||||||
+ return;
|
|
||||||
+ memset(acpi_id_present, 0, MAX_ACPI_BITS * sizeof(unsigned long));
|
|
||||||
+
|
|
||||||
+ if (more_acpi_ids()) {
|
|
||||||
+ int cpu;
|
|
||||||
+ if (!pr_backup) {
|
|
||||||
+ schedule_delayed_work(&work, DELAY_TIMER);
|
|
||||||
+ return;
|
|
||||||
+ }
|
|
||||||
+ for_each_set_bit(cpu, acpi_id_present, MAX_ACPI_BITS) {
|
|
||||||
+ pr_backup->acpi_id = cpu;
|
|
||||||
+ mutex_lock(&acpi_ids_mutex);
|
|
||||||
+ (void)upload_pm_data(pr_backup);
|
|
||||||
+ mutex_unlock(&acpi_ids_mutex);
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
+ kfree(acpi_id_present);
|
|
||||||
+ acpi_id_present = NULL;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static int cpufreq_governor_xen(struct cpufreq_policy *policy,
|
|
||||||
+ unsigned int event)
|
|
||||||
+{
|
|
||||||
+ struct acpi_processor *_pr;
|
|
||||||
+
|
|
||||||
+ switch (event) {
|
|
||||||
+ case CPUFREQ_GOV_START:
|
|
||||||
+ case CPUFREQ_GOV_LIMITS:
|
|
||||||
+ /* Set it to max and let the hypervisor take over */
|
|
||||||
+ __cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H);
|
|
||||||
+
|
|
||||||
+ _pr = per_cpu(processors, policy->cpu /* APIC ID */);
|
|
||||||
+ if (!_pr)
|
|
||||||
+ break;
|
|
||||||
+
|
|
||||||
+ mutex_lock(&acpi_ids_mutex);
|
|
||||||
+ if (!pr_backup) {
|
|
||||||
+ pr_backup = kzalloc(sizeof(struct acpi_processor), GFP_KERNEL);
|
|
||||||
+ memcpy(pr_backup, _pr, sizeof(struct acpi_processor));
|
|
||||||
+
|
|
||||||
+ INIT_DELAYED_WORK_DEFERRABLE(&work, do_check_acpi_id_timer);
|
|
||||||
+ schedule_delayed_work(&work, DELAY_TIMER);
|
|
||||||
+ }
|
|
||||||
+ (void)upload_pm_data(_pr);
|
|
||||||
+ mutex_unlock(&acpi_ids_mutex);
|
|
||||||
+ break;
|
|
||||||
+ default:
|
|
||||||
+ break;
|
|
||||||
+ }
|
|
||||||
+ return 0;
|
|
||||||
+}
|
|
||||||
+static struct cpufreq_governor cpufreq_gov_xen = {
|
|
||||||
+ .name = "xen",
|
|
||||||
+ .governor = cpufreq_governor_xen,
|
|
||||||
+ .owner = THIS_MODULE,
|
|
||||||
+};
|
|
||||||
+static int __init check_prereq(void)
|
|
||||||
+{
|
|
||||||
+ struct cpuinfo_x86 *c = &cpu_data(0);
|
|
||||||
+
|
|
||||||
+ if (!xen_initial_domain())
|
|
||||||
+ return -ENODEV;
|
|
||||||
+
|
|
||||||
+ if (!acpi_gbl_FADT.smi_command)
|
|
||||||
+ return -ENODEV;
|
|
||||||
+
|
|
||||||
+ if (c->x86_vendor == X86_VENDOR_INTEL) {
|
|
||||||
+ if (!cpu_has(c, X86_FEATURE_EST))
|
|
||||||
+ return -ENODEV;
|
|
||||||
+
|
|
||||||
+ return 0;
|
|
||||||
+ }
|
|
||||||
+ if (c->x86_vendor == X86_VENDOR_AMD) {
|
|
||||||
+ u32 hi = 0, lo = 0;
|
|
||||||
+ /* Copied from powernow-k8.h, can't include ../cpufreq/powernow
|
|
||||||
+ * as we get compile warnings for the static functions.
|
|
||||||
+ */
|
|
||||||
+#define MSR_PSTATE_CUR_LIMIT 0xc0010061 /* pstate current limit MSR */
|
|
||||||
+ rdmsr(MSR_PSTATE_CUR_LIMIT, lo, hi);
|
|
||||||
+
|
|
||||||
+ /* If the MSR cannot provide the data, the powernow-k8
|
|
||||||
+ * won't process the data properly either.
|
|
||||||
+ */
|
|
||||||
+ if (hi || lo)
|
|
||||||
+ return 0;
|
|
||||||
+ }
|
|
||||||
+ return -ENODEV;
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+static int __init xen_processor_passthru_init(void)
|
|
||||||
+{
|
|
||||||
+ int rc = check_prereq();
|
|
||||||
+
|
|
||||||
+ if (rc)
|
|
||||||
+ return rc;
|
|
||||||
+
|
|
||||||
+ acpi_ids_done = kcalloc(MAX_ACPI_BITS, sizeof(unsigned long), GFP_KERNEL);
|
|
||||||
+ if (!acpi_ids_done)
|
|
||||||
+ return -ENOMEM;
|
|
||||||
+ memset(acpi_ids_done, 0, MAX_ACPI_BITS * sizeof(unsigned long));
|
|
||||||
+
|
|
||||||
+ return cpufreq_register_governor(&cpufreq_gov_xen);
|
|
||||||
+}
|
|
||||||
+static void __exit xen_processor_passthru_exit(void)
|
|
||||||
+{
|
|
||||||
+ cpufreq_unregister_governor(&cpufreq_gov_xen);
|
|
||||||
+ cancel_delayed_work_sync(&work);
|
|
||||||
+ kfree(acpi_ids_done);
|
|
||||||
+ kfree(pr_backup);
|
|
||||||
+}
|
|
||||||
+
|
|
||||||
+MODULE_AUTHOR("Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>");
|
|
||||||
+MODULE_DESCRIPTION("CPUfreq policy governor 'xen' which uploads PM data to Xen hypervisor");
|
|
||||||
+MODULE_LICENSE("GPL");
|
|
||||||
+
|
|
||||||
+late_initcall(xen_processor_passthru_init);
|
|
||||||
+module_exit(xen_processor_passthru_exit);
|
|
||||||
--
|
|
||||||
1.7.6.4
|
|
||||||
|
|
@ -1,135 +0,0 @@
|
|||||||
From 76ccc297018d25d55b789bbd508861ef1e2cdb0c Mon Sep 17 00:00:00 2001
|
|
||||||
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
|
|
||||||
Date: Fri, 16 Dec 2011 17:38:18 -0500
|
|
||||||
Subject: x86/PCI: Expand the x86_msi_ops to have a restore MSIs.
|
|
||||||
|
|
||||||
The MSI restore function will become a function pointer in an
|
|
||||||
x86_msi_ops struct. It defaults to the implementation in the
|
|
||||||
io_apic.c and msi.c. We piggyback on the indirection mechanism
|
|
||||||
introduced by "x86: Introduce x86_msi_ops".
|
|
||||||
|
|
||||||
Cc: x86@kernel.org
|
|
||||||
Cc: Thomas Gleixner <tglx@linutronix.de>
|
|
||||||
Cc: "H. Peter Anvin" <hpa@zytor.com>
|
|
||||||
Cc: linux-pci@vger.kernel.org
|
|
||||||
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
|
|
||||||
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
|
|
||||||
---
|
|
||||||
arch/x86/include/asm/pci.h | 9 +++++++++
|
|
||||||
arch/x86/include/asm/x86_init.h | 1 +
|
|
||||||
arch/x86/kernel/x86_init.c | 1 +
|
|
||||||
drivers/pci/msi.c | 29 +++++++++++++++++++++++++++--
|
|
||||||
4 files changed, 38 insertions(+), 2 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
|
|
||||||
index d498943..df75d07 100644
|
|
||||||
--- a/arch/x86/include/asm/pci.h
|
|
||||||
+++ b/arch/x86/include/asm/pci.h
|
|
||||||
@@ -112,19 +112,28 @@ static inline void x86_teardown_msi_irq(unsigned int irq)
|
|
||||||
{
|
|
||||||
x86_msi.teardown_msi_irq(irq);
|
|
||||||
}
|
|
||||||
+static inline void x86_restore_msi_irqs(struct pci_dev *dev, int irq)
|
|
||||||
+{
|
|
||||||
+ x86_msi.restore_msi_irqs(dev, irq);
|
|
||||||
+}
|
|
||||||
#define arch_setup_msi_irqs x86_setup_msi_irqs
|
|
||||||
#define arch_teardown_msi_irqs x86_teardown_msi_irqs
|
|
||||||
#define arch_teardown_msi_irq x86_teardown_msi_irq
|
|
||||||
+#define arch_restore_msi_irqs x86_restore_msi_irqs
|
|
||||||
/* implemented in arch/x86/kernel/apic/io_apic. */
|
|
||||||
int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
|
|
||||||
void native_teardown_msi_irq(unsigned int irq);
|
|
||||||
+void native_restore_msi_irqs(struct pci_dev *dev, int irq);
|
|
||||||
/* default to the implementation in drivers/lib/msi.c */
|
|
||||||
#define HAVE_DEFAULT_MSI_TEARDOWN_IRQS
|
|
||||||
+#define HAVE_DEFAULT_MSI_RESTORE_IRQS
|
|
||||||
void default_teardown_msi_irqs(struct pci_dev *dev);
|
|
||||||
+void default_restore_msi_irqs(struct pci_dev *dev, int irq);
|
|
||||||
#else
|
|
||||||
#define native_setup_msi_irqs NULL
|
|
||||||
#define native_teardown_msi_irq NULL
|
|
||||||
#define default_teardown_msi_irqs NULL
|
|
||||||
+#define default_restore_msi_irqs NULL
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys)
|
|
||||||
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
|
|
||||||
index 1971e65..cd52084 100644
|
|
||||||
--- a/arch/x86/include/asm/x86_init.h
|
|
||||||
+++ b/arch/x86/include/asm/x86_init.h
|
|
||||||
@@ -177,6 +177,7 @@ struct x86_msi_ops {
|
|
||||||
int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type);
|
|
||||||
void (*teardown_msi_irq)(unsigned int irq);
|
|
||||||
void (*teardown_msi_irqs)(struct pci_dev *dev);
|
|
||||||
+ void (*restore_msi_irqs)(struct pci_dev *dev, int irq);
|
|
||||||
};
|
|
||||||
|
|
||||||
extern struct x86_init_ops x86_init;
|
|
||||||
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
|
|
||||||
index c1d6cd5..83b05ad 100644
|
|
||||||
--- a/arch/x86/kernel/x86_init.c
|
|
||||||
+++ b/arch/x86/kernel/x86_init.c
|
|
||||||
@@ -114,4 +114,5 @@ struct x86_msi_ops x86_msi = {
|
|
||||||
.setup_msi_irqs = native_setup_msi_irqs,
|
|
||||||
.teardown_msi_irq = native_teardown_msi_irq,
|
|
||||||
.teardown_msi_irqs = default_teardown_msi_irqs,
|
|
||||||
+ .restore_msi_irqs = default_restore_msi_irqs,
|
|
||||||
};
|
|
||||||
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
|
|
||||||
index 82de95e..a825d78 100644
|
|
||||||
--- a/drivers/pci/msi.c
|
|
||||||
+++ b/drivers/pci/msi.c
|
|
||||||
@@ -86,6 +86,31 @@ void default_teardown_msi_irqs(struct pci_dev *dev)
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
+#ifndef arch_restore_msi_irqs
|
|
||||||
+# define arch_restore_msi_irqs default_restore_msi_irqs
|
|
||||||
+# define HAVE_DEFAULT_MSI_RESTORE_IRQS
|
|
||||||
+#endif
|
|
||||||
+
|
|
||||||
+#ifdef HAVE_DEFAULT_MSI_RESTORE_IRQS
|
|
||||||
+void default_restore_msi_irqs(struct pci_dev *dev, int irq)
|
|
||||||
+{
|
|
||||||
+ struct msi_desc *entry;
|
|
||||||
+
|
|
||||||
+ entry = NULL;
|
|
||||||
+ if (dev->msix_enabled) {
|
|
||||||
+ list_for_each_entry(entry, &dev->msi_list, list) {
|
|
||||||
+ if (irq == entry->irq)
|
|
||||||
+ break;
|
|
||||||
+ }
|
|
||||||
+ } else if (dev->msi_enabled) {
|
|
||||||
+ entry = irq_get_msi_desc(irq);
|
|
||||||
+ }
|
|
||||||
+
|
|
||||||
+ if (entry)
|
|
||||||
+ write_msi_msg(irq, &entry->msg);
|
|
||||||
+}
|
|
||||||
+#endif
|
|
||||||
+
|
|
||||||
static void msi_set_enable(struct pci_dev *dev, int pos, int enable)
|
|
||||||
{
|
|
||||||
u16 control;
|
|
||||||
@@ -372,7 +397,7 @@ static void __pci_restore_msi_state(struct pci_dev *dev)
|
|
||||||
|
|
||||||
pci_intx_for_msi(dev, 0);
|
|
||||||
msi_set_enable(dev, pos, 0);
|
|
||||||
- write_msi_msg(dev->irq, &entry->msg);
|
|
||||||
+ arch_restore_msi_irqs(dev, dev->irq);
|
|
||||||
|
|
||||||
pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control);
|
|
||||||
msi_mask_irq(entry, msi_capable_mask(control), entry->masked);
|
|
||||||
@@ -400,7 +425,7 @@ static void __pci_restore_msix_state(struct pci_dev *dev)
|
|
||||||
pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control);
|
|
||||||
|
|
||||||
list_for_each_entry(entry, &dev->msi_list, list) {
|
|
||||||
- write_msi_msg(entry->irq, &entry->msg);
|
|
||||||
+ arch_restore_msi_irqs(dev, entry->irq);
|
|
||||||
msix_mask_irq(entry, entry->masked);
|
|
||||||
}
|
|
||||||
|
|
||||||
--
|
|
||||||
1.7.6.4
|
|
||||||
|
|
4254
patches.xen/pvops-0100-usb-xen-pvusb-driver.patch
Normal file
4254
patches.xen/pvops-0100-usb-xen-pvusb-driver.patch
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,72 @@
|
|||||||
|
From 433928d3823f561919ead305194e46e5311b573d Mon Sep 17 00:00:00 2001
|
||||||
|
From: Marek Marczykowski <marmarek@invisiblethingslab.com>
|
||||||
|
Date: Sat, 23 Jun 2012 19:50:44 +0200
|
||||||
|
Subject: [PATCH 1/2] Revert "xen/pat: Disable PAT support for now."
|
||||||
|
Organization: Invisible Things Lab
|
||||||
|
|
||||||
|
This reverts commit 8eaffa67b43e99ae581622c5133e20b0f48bcef1.
|
||||||
|
|
||||||
|
We haven't observed failure which is workarounded by this patch, but it caused
|
||||||
|
horrible GPU performance. Anyway there is "nopat" option.
|
||||||
|
|
||||||
|
Signed-off-by: Marek Marczykowski <marmarek@invisiblethingslab.com>
|
||||||
|
---
|
||||||
|
arch/x86/xen/enlighten.c | 2 --
|
||||||
|
arch/x86/xen/mmu.c | 8 ++++----
|
||||||
|
2 files changed, 4 insertions(+), 6 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
|
||||||
|
index 6c7f1e8..bf3319c 100644
|
||||||
|
--- a/arch/x86/xen/enlighten.c
|
||||||
|
+++ b/arch/x86/xen/enlighten.c
|
||||||
|
@@ -1269,9 +1269,7 @@ asmlinkage void __init xen_start_kernel(void)
|
||||||
|
|
||||||
|
/* Prevent unwanted bits from being set in PTEs. */
|
||||||
|
__supported_pte_mask &= ~_PAGE_GLOBAL;
|
||||||
|
-#if 0
|
||||||
|
if (!xen_initial_domain())
|
||||||
|
-#endif
|
||||||
|
__supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
|
||||||
|
|
||||||
|
__supported_pte_mask |= _PAGE_IOMAP;
|
||||||
|
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
|
||||||
|
index 69f5857..a5d252a 100644
|
||||||
|
--- a/arch/x86/xen/mmu.c
|
||||||
|
+++ b/arch/x86/xen/mmu.c
|
||||||
|
@@ -420,13 +420,13 @@ static pteval_t iomap_pte(pteval_t val)
|
||||||
|
static pteval_t xen_pte_val(pte_t pte)
|
||||||
|
{
|
||||||
|
pteval_t pteval = pte.pte;
|
||||||
|
-#if 0
|
||||||
|
+
|
||||||
|
/* If this is a WC pte, convert back from Xen WC to Linux WC */
|
||||||
|
if ((pteval & (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT)) == _PAGE_PAT) {
|
||||||
|
WARN_ON(!pat_enabled);
|
||||||
|
pteval = (pteval & ~_PAGE_PAT) | _PAGE_PWT;
|
||||||
|
}
|
||||||
|
-#endif
|
||||||
|
+
|
||||||
|
if (xen_initial_domain() && (pteval & _PAGE_IOMAP))
|
||||||
|
return pteval;
|
||||||
|
|
||||||
|
@@ -468,7 +468,7 @@ void xen_set_pat(u64 pat)
|
||||||
|
static pte_t xen_make_pte(pteval_t pte)
|
||||||
|
{
|
||||||
|
phys_addr_t addr = (pte & PTE_PFN_MASK);
|
||||||
|
-#if 0
|
||||||
|
+
|
||||||
|
/* If Linux is trying to set a WC pte, then map to the Xen WC.
|
||||||
|
* If _PAGE_PAT is set, then it probably means it is really
|
||||||
|
* _PAGE_PSE, so avoid fiddling with the PAT mapping and hope
|
||||||
|
@@ -481,7 +481,7 @@ static pte_t xen_make_pte(pteval_t pte)
|
||||||
|
if ((pte & (_PAGE_PCD | _PAGE_PWT)) == _PAGE_PWT)
|
||||||
|
pte = (pte & ~(_PAGE_PCD | _PAGE_PWT)) | _PAGE_PAT;
|
||||||
|
}
|
||||||
|
-#endif
|
||||||
|
+
|
||||||
|
/*
|
||||||
|
* Unprivileged domains are allowed to do IOMAPpings for
|
||||||
|
* PCI passthrough, but not map ISA space. The ISA
|
||||||
|
--
|
||||||
|
1.7.4.4
|
||||||
|
|
@ -0,0 +1,196 @@
|
|||||||
|
From f37a97dead89d07bce4d8fedc4c295c9bc700ab5 Mon Sep 17 00:00:00 2001
|
||||||
|
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
|
||||||
|
Date: Fri, 4 Nov 2011 11:59:34 -0400
|
||||||
|
Subject: [PATCH 2/2] x86/cpa: Use pte_attrs instead of pte_flags on
|
||||||
|
CPA/set_p.._wb/wc operations.
|
||||||
|
|
||||||
|
When using the paravirt interface, most of the page operations are wrapped
|
||||||
|
in the pvops interface. The one that is not is the pte_flags. The reason
|
||||||
|
being that for most cases, the "raw" PTE flag values for baremetal and whatever
|
||||||
|
pvops platform is running (in this case) - share the same bit meaning.
|
||||||
|
|
||||||
|
Except for PAT. Under Linux, the PAT MSR is written to be:
|
||||||
|
|
||||||
|
PAT4 PAT0
|
||||||
|
+---+----+----+----+-----+----+----+
|
||||||
|
WC | WC | WB | UC | UC- | WC | WB | <= Linux
|
||||||
|
+---+----+----+----+-----+----+----+
|
||||||
|
WC | WT | WB | UC | UC- | WT | WB | <= BIOS
|
||||||
|
+---+----+----+----+-----+----+----+
|
||||||
|
WC | WP | WC | UC | UC- | WT | WB | <= Xen
|
||||||
|
+---+----+----+----+-----+----+----+
|
||||||
|
|
||||||
|
The lookup of this index table translates to looking up
|
||||||
|
Bit 7, Bit 4, and Bit 3 of PTE:
|
||||||
|
|
||||||
|
PAT/PSE (bit 7) ... PCD (bit 4) .. PWT (bit 3).
|
||||||
|
|
||||||
|
If all bits are off, then we are using PAT0. If bit 3 turned on,
|
||||||
|
then we are using PAT1, if bit 3 and bit 4, then PAT2..
|
||||||
|
|
||||||
|
Back to the PAT MSR table:
|
||||||
|
|
||||||
|
As you can see, the PAT1 translates to PAT4 under Xen. Under Linux
|
||||||
|
we only use PAT0, PAT1, and PAT2 for the caching as:
|
||||||
|
|
||||||
|
WB = none (so PAT0)
|
||||||
|
WC = PWT (bit 3 on)
|
||||||
|
UC = PWT | PCD (bit 3 and 4 are on).
|
||||||
|
|
||||||
|
But to make it work with Xen, we end up doing for WC a translation:
|
||||||
|
|
||||||
|
PWT (so bit 3 on) --> PAT (so bit 7 is on) and clear bit 3
|
||||||
|
|
||||||
|
And to translate back (when the paravirt pte_val is used) we would:
|
||||||
|
|
||||||
|
PAT (bit 7 on) --> PWT (bit 3 on) and clear bit 7.
|
||||||
|
|
||||||
|
This works quite well, except if code uses the pte_flags, as pte_flags
|
||||||
|
reads the raw value and does not go through the paravirt. Which means
|
||||||
|
that if (when running under Xen):
|
||||||
|
|
||||||
|
1) we allocate some pages.
|
||||||
|
2) call set_pages_array_wc, which ends up calling:
|
||||||
|
__page_change_att_set_clr(.., __pgprot(__PAGE_WC), /* set */
|
||||||
|
, __pgprot(__PAGE_MASK), /* clear */
|
||||||
|
which ends up reading the _raw_ PTE flags and _only_ look at the
|
||||||
|
_PTE_FLAG_MASK contents with __PAGE_MASK cleared (0x18) and
|
||||||
|
__PAGE_WC (0x8) set.
|
||||||
|
|
||||||
|
read raw *pte -> 0x67
|
||||||
|
*pte = 0x67 & ^0x18 | 0x8
|
||||||
|
*pte = 0x67 & 0xfffffe7 | 0x8
|
||||||
|
*pte = 0x6f
|
||||||
|
|
||||||
|
[now set_pte_atomic is called, and 0x6f is written in, but under
|
||||||
|
xen_make_pte, the bit 3 is translated to bit 7, so it ends up
|
||||||
|
writting 0xa7, which is correct]
|
||||||
|
|
||||||
|
3) do something to them.
|
||||||
|
4) call set_pages_array_wb
|
||||||
|
__page_change_att_set_clr(.., __pgprot(__PAGE_WB), /* set */
|
||||||
|
, __pgprot(__PAGE_MASK), /* clear */
|
||||||
|
which ends up reading the _raw_ PTE and _only_ look at the
|
||||||
|
_PTE_FLAG_MASK contents with _PAGE_MASK cleared (0x18) and
|
||||||
|
__PAGE_WB (0x0) set:
|
||||||
|
|
||||||
|
read raw *pte -> 0xa7
|
||||||
|
*pte = 0xa7 & &0x18 | 0
|
||||||
|
*pte = 0xa7 & 0xfffffe7 | 0
|
||||||
|
*pte = 0xa7
|
||||||
|
|
||||||
|
[we check whether the old PTE is different from the new one
|
||||||
|
|
||||||
|
if (pte_val(old_pte) != pte_val(new_pte)) {
|
||||||
|
set_pte_atomic(kpte, new_pte);
|
||||||
|
...
|
||||||
|
|
||||||
|
and find out that 0xA7 == 0xA7 so we do not write the new PTE value in]
|
||||||
|
|
||||||
|
End result is that we failed at removing the WC caching bit!
|
||||||
|
|
||||||
|
5) free them.
|
||||||
|
[and have pages with PAT4 (bit 7) set, so other subsystems end up using
|
||||||
|
the pages that have the write combined bit set resulting in crashes. Yikes!].
|
||||||
|
|
||||||
|
The fix, which this patch proposes, is to wrap the pte_pgprot in the CPA
|
||||||
|
code with newly introduced pte_attrs which can go through the pvops interface
|
||||||
|
to get the "emulated" value instead of the raw. Naturally if CONFIG_PARAVIRT is
|
||||||
|
not set, it would end calling native_pte_val.
|
||||||
|
|
||||||
|
The other way to fix this is by wrapping pte_flags and go through the pvops
|
||||||
|
interface and it really is the Right Thing to do. The problem is, that past
|
||||||
|
experience with mprotect stuff demonstrates that it be really expensive in inner
|
||||||
|
loops, and pte_flags() is used in some very perf-critical areas.
|
||||||
|
|
||||||
|
Example code to run this and see the various mysterious subsystems/applications
|
||||||
|
crashing
|
||||||
|
|
||||||
|
MODULE_AUTHOR("Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>");
|
||||||
|
MODULE_DESCRIPTION("wb_to_wc_and_back");
|
||||||
|
MODULE_LICENSE("GPL");
|
||||||
|
MODULE_VERSION(WB_TO_WC);
|
||||||
|
|
||||||
|
static int thread(void *arg)
|
||||||
|
{
|
||||||
|
struct page *a[MAX_PAGES];
|
||||||
|
unsigned int i, j;
|
||||||
|
do {
|
||||||
|
for (j = 0, i = 0;i < MAX_PAGES; i++, j++) {
|
||||||
|
a[i] = alloc_page(GFP_KERNEL);
|
||||||
|
if (!a[i])
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
set_pages_array_wc(a, j);
|
||||||
|
set_current_state(TASK_INTERRUPTIBLE);
|
||||||
|
schedule_timeout_interruptible(HZ);
|
||||||
|
for (i = 0; i < j; i++) {
|
||||||
|
unsigned long *addr = page_address(a[i]);
|
||||||
|
if (addr) {
|
||||||
|
memset(addr, 0xc2, PAGE_SIZE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
set_pages_array_wb(a, j);
|
||||||
|
for (i = 0; i< MAX_PAGES; i++) {
|
||||||
|
if (a[i])
|
||||||
|
__free_page(a[i]);
|
||||||
|
a[i] = NULL;
|
||||||
|
}
|
||||||
|
} while (!kthread_should_stop());
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
static struct task_struct *t;
|
||||||
|
static int __init wb_to_wc_init(void)
|
||||||
|
{
|
||||||
|
t = kthread_run(thread, NULL, "wb_to_wc_and_back");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
static void __exit wb_to_wc_exit(void)
|
||||||
|
{
|
||||||
|
if (t)
|
||||||
|
kthread_stop(t);
|
||||||
|
}
|
||||||
|
module_init(wb_to_wc_init);
|
||||||
|
module_exit(wb_to_wc_exit);
|
||||||
|
|
||||||
|
This fixes RH BZ #742032, #787403, and #745574
|
||||||
|
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
|
||||||
|
Tested-by: Tom Goetz <tom.goetz@virtualcomputer.com>
|
||||||
|
CC: stable@kernel.org
|
||||||
|
---
|
||||||
|
arch/x86/include/asm/pgtable.h | 5 +++++
|
||||||
|
arch/x86/mm/pageattr.c | 2 +-
|
||||||
|
2 files changed, 6 insertions(+), 1 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
|
||||||
|
index 49afb3f..fa7bd2c 100644
|
||||||
|
--- a/arch/x86/include/asm/pgtable.h
|
||||||
|
+++ b/arch/x86/include/asm/pgtable.h
|
||||||
|
@@ -349,6 +349,11 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
|
||||||
|
return __pgprot(preservebits | addbits);
|
||||||
|
}
|
||||||
|
|
||||||
|
+static inline pgprot_t pte_attrs(pte_t pte)
|
||||||
|
+{
|
||||||
|
+ return __pgprot(pte_val(pte) & PTE_FLAGS_MASK);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
#define pte_pgprot(x) __pgprot(pte_flags(x) & PTE_FLAGS_MASK)
|
||||||
|
|
||||||
|
#define canon_pgprot(p) __pgprot(massage_pgprot(p))
|
||||||
|
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
|
||||||
|
index e1ebde3..1ae1b4b 100644
|
||||||
|
--- a/arch/x86/mm/pageattr.c
|
||||||
|
+++ b/arch/x86/mm/pageattr.c
|
||||||
|
@@ -651,7 +651,7 @@ repeat:
|
||||||
|
|
||||||
|
if (level == PG_LEVEL_4K) {
|
||||||
|
pte_t new_pte;
|
||||||
|
- pgprot_t new_prot = pte_pgprot(old_pte);
|
||||||
|
+ pgprot_t new_prot = pte_attrs(old_pte);
|
||||||
|
unsigned long pfn = pte_pfn(old_pte);
|
||||||
|
|
||||||
|
pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
|
||||||
|
--
|
||||||
|
1.7.4.4
|
||||||
|
|
@ -1,9 +1,9 @@
|
|||||||
--- linux-3.4.1.orig/drivers/block/xen-blkfront.c 2012-06-01 09:18:44.000000000 +0200
|
--- linux-3.4.1.orig/drivers/block/xen-blkfront.c 2012-06-01 09:18:44.000000000 +0200
|
||||||
+++ linux-3.4.1/drivers/block/xen-blkfront.c 2012-07-15 15:54:31.350255623 +0200
|
+++ linux-3.4.1/drivers/block/xen-blkfront.c 2012-07-15 15:54:31.350255623 +0200
|
||||||
@@ -44,6 +44,7 @@
|
@@ -44,6 +44,7 @@
|
||||||
#include <linux/slab.h>
|
|
||||||
#include <linux/mutex.h>
|
#include <linux/mutex.h>
|
||||||
#include <linux/scatterlist.h>
|
#include <linux/scatterlist.h>
|
||||||
|
#include <linux/bitmap.h>
|
||||||
+#include <linux/fd.h>
|
+#include <linux/fd.h>
|
||||||
|
|
||||||
#include <xen/xen.h>
|
#include <xen/xen.h>
|
||||||
|
@ -1,24 +0,0 @@
|
|||||||
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
|
|
||||||
index 698b905..e31ebff 100644
|
|
||||||
--- a/drivers/net/xen-netfront.c
|
|
||||||
+++ b/drivers/net/xen-netfront.c
|
|
||||||
@@ -1953,9 +1953,6 @@ static int __init netif_init(void)
|
|
||||||
if (!xen_domain())
|
|
||||||
return -ENODEV;
|
|
||||||
|
|
||||||
- if (xen_initial_domain())
|
|
||||||
- return 0;
|
|
||||||
-
|
|
||||||
printk(KERN_INFO "Initialising Xen virtual ethernet driver.\n");
|
|
||||||
|
|
||||||
return xenbus_register_frontend(&netfront_driver);
|
|
||||||
@@ -1965,9 +1962,6 @@ module_init(netif_init);
|
|
||||||
|
|
||||||
static void __exit netif_exit(void)
|
|
||||||
{
|
|
||||||
- if (xen_initial_domain())
|
|
||||||
- return;
|
|
||||||
-
|
|
||||||
xenbus_unregister_driver(&netfront_driver);
|
|
||||||
}
|
|
||||||
module_exit(netif_exit);
|
|
@ -1,42 +0,0 @@
|
|||||||
From: Simon Graham <simon.graham@citrix.com>
|
|
||||||
To: Ian Campbell <Ian.Campbell@citrix.com>, "konrad.wilk@oracle.com"
|
|
||||||
<konrad.wilk@oracle.com>, "xen-devel@lists.xensource.com"
|
|
||||||
<xen-devel@lists.xensource.com>, "netdev@vger.kernel.org"
|
|
||||||
<netdev@vger.kernel.org>
|
|
||||||
Date: Thu, 24 May 2012 12:26:07 -0400
|
|
||||||
Cc: "bhutchings@solarflare.com" <bhutchings@solarflare.com>,
|
|
||||||
Simon Graham <simon.graham@citrix.com>,
|
|
||||||
"davem@davemloft.net" <davem@davemloft.net>,
|
|
||||||
"adnan.misherfi@oracle.com" <adnan.misherfi@oracle.com>
|
|
||||||
Subject: [Xen-devel] [PATCH] xen/netback: Calculate the number of SKB slots
|
|
||||||
required correctly
|
|
||||||
|
|
||||||
When calculating the number of slots required for a packet header, the code
|
|
||||||
was reserving too many slots if the header crossed a page boundary. Since
|
|
||||||
netbk_gop_skb copies the header to the start of the page, the count of
|
|
||||||
slots required for the header should be based solely on the header size.
|
|
||||||
|
|
||||||
This problem is easy to reproduce if a VIF is bridged to a USB 3G modem
|
|
||||||
device as the skb->data value always starts near the end of the first page.
|
|
||||||
|
|
||||||
Signed-off-by: Simon Graham <simon.graham@citrix.com>
|
|
||||||
---
|
|
||||||
drivers/net/xen-netback/netback.c | 3 +--
|
|
||||||
1 files changed, 1 insertions(+), 2 deletions(-)
|
|
||||||
|
|
||||||
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
|
|
||||||
index 2596401..f4a6fca 100644
|
|
||||||
--- a/drivers/net/xen-netback/netback.c
|
|
||||||
+++ b/drivers/net/xen-netback/netback.c
|
|
||||||
@@ -325,8 +325,7 @@ unsigned int xen_netbk_count_skb_slots(struct xenvif *vif, struct sk_buff *skb)
|
|
||||||
unsigned int count;
|
|
||||||
int i, copy_off;
|
|
||||||
|
|
||||||
- count = DIV_ROUND_UP(
|
|
||||||
- offset_in_page(skb->data)+skb_headlen(skb), PAGE_SIZE);
|
|
||||||
+ count = DIV_ROUND_UP(skb_headlen(skb), PAGE_SIZE);
|
|
||||||
|
|
||||||
copy_off = skb_headlen(skb) % PAGE_SIZE;
|
|
||||||
|
|
||||||
--
|
|
||||||
1.7.9.1
|
|
@ -1,15 +1,12 @@
|
|||||||
patches.xen/pvops-0001-xen-Add-privcmd-device-driver.patch
|
# ACPI S3
|
||||||
patches.xen/pvops-0002-x86-acpi-tboot-Have-a-ACPI-os-prepare-sleep-instead-.patch
|
patches.xen/pvops-0001-x86-acpi-sleep-Provide-registration-for-acpi_suspend.patch
|
||||||
patches.xen/pvops-0003-tboot-Add-return-values-for-tboot_sleep.patch
|
patches.xen/pvops-0003-xen-acpi-sleep-Register-to-the-acpi_suspend_lowlevel.patch
|
||||||
patches.xen/pvops-0004-x86-acpi-sleep-Provide-registration-for-acpi_suspend.patch
|
|
||||||
patches.xen/pvops-0005-xen-acpi-sleep-Enable-ACPI-sleep-via-the-__acpi_os_p.patch
|
# fix for GPU performance (revert workaround and apply proper fix), should go in 3.5
|
||||||
patches.xen/pvops-0006-xen-acpi-sleep-Register-to-the-acpi_suspend_lowlevel.patch
|
patches.xen/pvops-3.4-Revert-xen-pat-Disable-PAT-support-for-now.patch
|
||||||
patches.xen/pvops-0007-xen-Utilize-the-restore_msi_irqs-hook.patch
|
patches.xen/pvops-3.4-x86-cpa-Use-pte_attrs-instead-of-pte_flags-on-CPA-se.patch
|
||||||
patches.xen/pvops-0008-xen-setup-pm-acpi-Remove-the-call-to-boot_option_idl.patch
|
|
||||||
patches.xen/pvops-0009-xen-enlighten-Expose-MWAIT-and-MWAIT_LEAF-if-hypervi.patch
|
# Additional features
|
||||||
patches.xen/pvops-0010-CPUFREQ-xen-governor-for-Xen-hypervisor-frequency-sc.patch
|
patches.xen/pvops-0100-usb-xen-pvusb-driver.patch
|
||||||
patches.xen/pvops-0011-x86-PCI-Expand-the-x86_msi_ops-to-have-a-restore-MSI.patch
|
|
||||||
patches.xen/pvops-enable-netfront-in-dom0.patch
|
|
||||||
patches.xen/pvops-netback-calculate-correctly-the-SKB-slots.patch
|
|
||||||
patches.xen/pvops-blkfront-removable-flag.patch
|
patches.xen/pvops-blkfront-removable-flag.patch
|
||||||
patches.xen/pvops-blkfront-eject-support.patch
|
patches.xen/pvops-blkfront-eject-support.patch
|
||||||
|
@ -1 +1 @@
|
|||||||
3.2.30
|
3.7.6
|
||||||
|
Loading…
Reference in New Issue
Block a user