968 lines
22 KiB
Diff
968 lines
22 KiB
Diff
|
From d8414d3c157dc1f83e73c17447ba41fe5afa9d3d Mon Sep 17 00:00:00 2001
|
||
|
From: Bastian Blank <waldi@debian.org>
|
||
|
Date: Fri, 16 Dec 2011 11:34:33 -0500
|
||
|
Subject: xen: Add privcmd device driver
|
||
|
|
||
|
Access to arbitrary hypercalls is currently provided via xenfs. This
|
||
|
adds a standard character device to handle this. The support in xenfs
|
||
|
remains for backward compatibility and uses the device driver code.
|
||
|
|
||
|
Signed-off-by: Bastian Blank <waldi@debian.org>
|
||
|
Acked-by: Ian Campbell <ian.campbell@citrix.com>
|
||
|
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
|
||
|
---
|
||
|
drivers/xen/Kconfig | 7 +
|
||
|
drivers/xen/Makefile | 2 +
|
||
|
drivers/xen/privcmd.c | 437 +++++++++++++++++++++++++++++++++++++++++++
|
||
|
drivers/xen/privcmd.h | 3 +
|
||
|
drivers/xen/xenfs/Makefile | 2 +-
|
||
|
drivers/xen/xenfs/privcmd.c | 400 ---------------------------------------
|
||
|
drivers/xen/xenfs/super.c | 3 +-
|
||
|
drivers/xen/xenfs/xenfs.h | 1 -
|
||
|
8 files changed, 452 insertions(+), 403 deletions(-)
|
||
|
create mode 100644 drivers/xen/privcmd.c
|
||
|
create mode 100644 drivers/xen/privcmd.h
|
||
|
delete mode 100644 drivers/xen/xenfs/privcmd.c
|
||
|
|
||
|
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
|
||
|
index 8795480..a1ced52 100644
|
||
|
--- a/drivers/xen/Kconfig
|
||
|
+++ b/drivers/xen/Kconfig
|
||
|
@@ -86,6 +86,7 @@ config XEN_BACKEND
|
||
|
|
||
|
config XENFS
|
||
|
tristate "Xen filesystem"
|
||
|
+ select XEN_PRIVCMD
|
||
|
default y
|
||
|
help
|
||
|
The xen filesystem provides a way for domains to share
|
||
|
@@ -171,4 +172,10 @@ config XEN_PCIDEV_BACKEND
|
||
|
xen-pciback.hide=(03:00.0)(04:00.0)
|
||
|
|
||
|
If in doubt, say m.
|
||
|
+
|
||
|
+config XEN_PRIVCMD
|
||
|
+ tristate
|
||
|
+ depends on XEN
|
||
|
+ default m
|
||
|
+
|
||
|
endmenu
|
||
|
diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
|
||
|
index 974fffd..aa31337 100644
|
||
|
--- a/drivers/xen/Makefile
|
||
|
+++ b/drivers/xen/Makefile
|
||
|
@@ -19,7 +19,9 @@ obj-$(CONFIG_XEN_TMEM) += tmem.o
|
||
|
obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o
|
||
|
obj-$(CONFIG_XEN_DOM0) += pci.o
|
||
|
obj-$(CONFIG_XEN_PCIDEV_BACKEND) += xen-pciback/
|
||
|
+obj-$(CONFIG_XEN_PRIVCMD) += xen-privcmd.o
|
||
|
|
||
|
xen-evtchn-y := evtchn.o
|
||
|
xen-gntdev-y := gntdev.o
|
||
|
xen-gntalloc-y := gntalloc.o
|
||
|
+xen-privcmd-y := privcmd.o
|
||
|
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
|
||
|
new file mode 100644
|
||
|
index 0000000..4e8d3da
|
||
|
--- /dev/null
|
||
|
+++ b/drivers/xen/privcmd.c
|
||
|
@@ -0,0 +1,437 @@
|
||
|
+/******************************************************************************
|
||
|
+ * privcmd.c
|
||
|
+ *
|
||
|
+ * Interface to privileged domain-0 commands.
|
||
|
+ *
|
||
|
+ * Copyright (c) 2002-2004, K A Fraser, B Dragovic
|
||
|
+ */
|
||
|
+
|
||
|
+#include <linux/kernel.h>
|
||
|
+#include <linux/module.h>
|
||
|
+#include <linux/sched.h>
|
||
|
+#include <linux/slab.h>
|
||
|
+#include <linux/string.h>
|
||
|
+#include <linux/errno.h>
|
||
|
+#include <linux/mm.h>
|
||
|
+#include <linux/mman.h>
|
||
|
+#include <linux/uaccess.h>
|
||
|
+#include <linux/swap.h>
|
||
|
+#include <linux/highmem.h>
|
||
|
+#include <linux/pagemap.h>
|
||
|
+#include <linux/seq_file.h>
|
||
|
+#include <linux/miscdevice.h>
|
||
|
+
|
||
|
+#include <asm/pgalloc.h>
|
||
|
+#include <asm/pgtable.h>
|
||
|
+#include <asm/tlb.h>
|
||
|
+#include <asm/xen/hypervisor.h>
|
||
|
+#include <asm/xen/hypercall.h>
|
||
|
+
|
||
|
+#include <xen/xen.h>
|
||
|
+#include <xen/privcmd.h>
|
||
|
+#include <xen/interface/xen.h>
|
||
|
+#include <xen/features.h>
|
||
|
+#include <xen/page.h>
|
||
|
+#include <xen/xen-ops.h>
|
||
|
+
|
||
|
+#include "privcmd.h"
|
||
|
+
|
||
|
+MODULE_LICENSE("GPL");
|
||
|
+
|
||
|
+#ifndef HAVE_ARCH_PRIVCMD_MMAP
|
||
|
+static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma);
|
||
|
+#endif
|
||
|
+
|
||
|
+static long privcmd_ioctl_hypercall(void __user *udata)
|
||
|
+{
|
||
|
+ struct privcmd_hypercall hypercall;
|
||
|
+ long ret;
|
||
|
+
|
||
|
+ if (copy_from_user(&hypercall, udata, sizeof(hypercall)))
|
||
|
+ return -EFAULT;
|
||
|
+
|
||
|
+ ret = privcmd_call(hypercall.op,
|
||
|
+ hypercall.arg[0], hypercall.arg[1],
|
||
|
+ hypercall.arg[2], hypercall.arg[3],
|
||
|
+ hypercall.arg[4]);
|
||
|
+
|
||
|
+ return ret;
|
||
|
+}
|
||
|
+
|
||
|
+static void free_page_list(struct list_head *pages)
|
||
|
+{
|
||
|
+ struct page *p, *n;
|
||
|
+
|
||
|
+ list_for_each_entry_safe(p, n, pages, lru)
|
||
|
+ __free_page(p);
|
||
|
+
|
||
|
+ INIT_LIST_HEAD(pages);
|
||
|
+}
|
||
|
+
|
||
|
+/*
|
||
|
+ * Given an array of items in userspace, return a list of pages
|
||
|
+ * containing the data. If copying fails, either because of memory
|
||
|
+ * allocation failure or a problem reading user memory, return an
|
||
|
+ * error code; its up to the caller to dispose of any partial list.
|
||
|
+ */
|
||
|
+static int gather_array(struct list_head *pagelist,
|
||
|
+ unsigned nelem, size_t size,
|
||
|
+ void __user *data)
|
||
|
+{
|
||
|
+ unsigned pageidx;
|
||
|
+ void *pagedata;
|
||
|
+ int ret;
|
||
|
+
|
||
|
+ if (size > PAGE_SIZE)
|
||
|
+ return 0;
|
||
|
+
|
||
|
+ pageidx = PAGE_SIZE;
|
||
|
+ pagedata = NULL; /* quiet, gcc */
|
||
|
+ while (nelem--) {
|
||
|
+ if (pageidx > PAGE_SIZE-size) {
|
||
|
+ struct page *page = alloc_page(GFP_KERNEL);
|
||
|
+
|
||
|
+ ret = -ENOMEM;
|
||
|
+ if (page == NULL)
|
||
|
+ goto fail;
|
||
|
+
|
||
|
+ pagedata = page_address(page);
|
||
|
+
|
||
|
+ list_add_tail(&page->lru, pagelist);
|
||
|
+ pageidx = 0;
|
||
|
+ }
|
||
|
+
|
||
|
+ ret = -EFAULT;
|
||
|
+ if (copy_from_user(pagedata + pageidx, data, size))
|
||
|
+ goto fail;
|
||
|
+
|
||
|
+ data += size;
|
||
|
+ pageidx += size;
|
||
|
+ }
|
||
|
+
|
||
|
+ ret = 0;
|
||
|
+
|
||
|
+fail:
|
||
|
+ return ret;
|
||
|
+}
|
||
|
+
|
||
|
+/*
|
||
|
+ * Call function "fn" on each element of the array fragmented
|
||
|
+ * over a list of pages.
|
||
|
+ */
|
||
|
+static int traverse_pages(unsigned nelem, size_t size,
|
||
|
+ struct list_head *pos,
|
||
|
+ int (*fn)(void *data, void *state),
|
||
|
+ void *state)
|
||
|
+{
|
||
|
+ void *pagedata;
|
||
|
+ unsigned pageidx;
|
||
|
+ int ret = 0;
|
||
|
+
|
||
|
+ BUG_ON(size > PAGE_SIZE);
|
||
|
+
|
||
|
+ pageidx = PAGE_SIZE;
|
||
|
+ pagedata = NULL; /* hush, gcc */
|
||
|
+
|
||
|
+ while (nelem--) {
|
||
|
+ if (pageidx > PAGE_SIZE-size) {
|
||
|
+ struct page *page;
|
||
|
+ pos = pos->next;
|
||
|
+ page = list_entry(pos, struct page, lru);
|
||
|
+ pagedata = page_address(page);
|
||
|
+ pageidx = 0;
|
||
|
+ }
|
||
|
+
|
||
|
+ ret = (*fn)(pagedata + pageidx, state);
|
||
|
+ if (ret)
|
||
|
+ break;
|
||
|
+ pageidx += size;
|
||
|
+ }
|
||
|
+
|
||
|
+ return ret;
|
||
|
+}
|
||
|
+
|
||
|
+struct mmap_mfn_state {
|
||
|
+ unsigned long va;
|
||
|
+ struct vm_area_struct *vma;
|
||
|
+ domid_t domain;
|
||
|
+};
|
||
|
+
|
||
|
+static int mmap_mfn_range(void *data, void *state)
|
||
|
+{
|
||
|
+ struct privcmd_mmap_entry *msg = data;
|
||
|
+ struct mmap_mfn_state *st = state;
|
||
|
+ struct vm_area_struct *vma = st->vma;
|
||
|
+ int rc;
|
||
|
+
|
||
|
+ /* Do not allow range to wrap the address space. */
|
||
|
+ if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) ||
|
||
|
+ ((unsigned long)(msg->npages << PAGE_SHIFT) >= -st->va))
|
||
|
+ return -EINVAL;
|
||
|
+
|
||
|
+ /* Range chunks must be contiguous in va space. */
|
||
|
+ if ((msg->va != st->va) ||
|
||
|
+ ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end))
|
||
|
+ return -EINVAL;
|
||
|
+
|
||
|
+ rc = xen_remap_domain_mfn_range(vma,
|
||
|
+ msg->va & PAGE_MASK,
|
||
|
+ msg->mfn, msg->npages,
|
||
|
+ vma->vm_page_prot,
|
||
|
+ st->domain);
|
||
|
+ if (rc < 0)
|
||
|
+ return rc;
|
||
|
+
|
||
|
+ st->va += msg->npages << PAGE_SHIFT;
|
||
|
+
|
||
|
+ return 0;
|
||
|
+}
|
||
|
+
|
||
|
+static long privcmd_ioctl_mmap(void __user *udata)
|
||
|
+{
|
||
|
+ struct privcmd_mmap mmapcmd;
|
||
|
+ struct mm_struct *mm = current->mm;
|
||
|
+ struct vm_area_struct *vma;
|
||
|
+ int rc;
|
||
|
+ LIST_HEAD(pagelist);
|
||
|
+ struct mmap_mfn_state state;
|
||
|
+
|
||
|
+ if (!xen_initial_domain())
|
||
|
+ return -EPERM;
|
||
|
+
|
||
|
+ if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
|
||
|
+ return -EFAULT;
|
||
|
+
|
||
|
+ rc = gather_array(&pagelist,
|
||
|
+ mmapcmd.num, sizeof(struct privcmd_mmap_entry),
|
||
|
+ mmapcmd.entry);
|
||
|
+
|
||
|
+ if (rc || list_empty(&pagelist))
|
||
|
+ goto out;
|
||
|
+
|
||
|
+ down_write(&mm->mmap_sem);
|
||
|
+
|
||
|
+ {
|
||
|
+ struct page *page = list_first_entry(&pagelist,
|
||
|
+ struct page, lru);
|
||
|
+ struct privcmd_mmap_entry *msg = page_address(page);
|
||
|
+
|
||
|
+ vma = find_vma(mm, msg->va);
|
||
|
+ rc = -EINVAL;
|
||
|
+
|
||
|
+ if (!vma || (msg->va != vma->vm_start) ||
|
||
|
+ !privcmd_enforce_singleshot_mapping(vma))
|
||
|
+ goto out_up;
|
||
|
+ }
|
||
|
+
|
||
|
+ state.va = vma->vm_start;
|
||
|
+ state.vma = vma;
|
||
|
+ state.domain = mmapcmd.dom;
|
||
|
+
|
||
|
+ rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry),
|
||
|
+ &pagelist,
|
||
|
+ mmap_mfn_range, &state);
|
||
|
+
|
||
|
+
|
||
|
+out_up:
|
||
|
+ up_write(&mm->mmap_sem);
|
||
|
+
|
||
|
+out:
|
||
|
+ free_page_list(&pagelist);
|
||
|
+
|
||
|
+ return rc;
|
||
|
+}
|
||
|
+
|
||
|
+struct mmap_batch_state {
|
||
|
+ domid_t domain;
|
||
|
+ unsigned long va;
|
||
|
+ struct vm_area_struct *vma;
|
||
|
+ int err;
|
||
|
+
|
||
|
+ xen_pfn_t __user *user;
|
||
|
+};
|
||
|
+
|
||
|
+static int mmap_batch_fn(void *data, void *state)
|
||
|
+{
|
||
|
+ xen_pfn_t *mfnp = data;
|
||
|
+ struct mmap_batch_state *st = state;
|
||
|
+
|
||
|
+ if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
|
||
|
+ st->vma->vm_page_prot, st->domain) < 0) {
|
||
|
+ *mfnp |= 0xf0000000U;
|
||
|
+ st->err++;
|
||
|
+ }
|
||
|
+ st->va += PAGE_SIZE;
|
||
|
+
|
||
|
+ return 0;
|
||
|
+}
|
||
|
+
|
||
|
+static int mmap_return_errors(void *data, void *state)
|
||
|
+{
|
||
|
+ xen_pfn_t *mfnp = data;
|
||
|
+ struct mmap_batch_state *st = state;
|
||
|
+
|
||
|
+ return put_user(*mfnp, st->user++);
|
||
|
+}
|
||
|
+
|
||
|
+static struct vm_operations_struct privcmd_vm_ops;
|
||
|
+
|
||
|
+static long privcmd_ioctl_mmap_batch(void __user *udata)
|
||
|
+{
|
||
|
+ int ret;
|
||
|
+ struct privcmd_mmapbatch m;
|
||
|
+ struct mm_struct *mm = current->mm;
|
||
|
+ struct vm_area_struct *vma;
|
||
|
+ unsigned long nr_pages;
|
||
|
+ LIST_HEAD(pagelist);
|
||
|
+ struct mmap_batch_state state;
|
||
|
+
|
||
|
+ if (!xen_initial_domain())
|
||
|
+ return -EPERM;
|
||
|
+
|
||
|
+ if (copy_from_user(&m, udata, sizeof(m)))
|
||
|
+ return -EFAULT;
|
||
|
+
|
||
|
+ nr_pages = m.num;
|
||
|
+ if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
|
||
|
+ return -EINVAL;
|
||
|
+
|
||
|
+ ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
|
||
|
+ m.arr);
|
||
|
+
|
||
|
+ if (ret || list_empty(&pagelist))
|
||
|
+ goto out;
|
||
|
+
|
||
|
+ down_write(&mm->mmap_sem);
|
||
|
+
|
||
|
+ vma = find_vma(mm, m.addr);
|
||
|
+ ret = -EINVAL;
|
||
|
+ if (!vma ||
|
||
|
+ vma->vm_ops != &privcmd_vm_ops ||
|
||
|
+ (m.addr != vma->vm_start) ||
|
||
|
+ ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) ||
|
||
|
+ !privcmd_enforce_singleshot_mapping(vma)) {
|
||
|
+ up_write(&mm->mmap_sem);
|
||
|
+ goto out;
|
||
|
+ }
|
||
|
+
|
||
|
+ state.domain = m.dom;
|
||
|
+ state.vma = vma;
|
||
|
+ state.va = m.addr;
|
||
|
+ state.err = 0;
|
||
|
+
|
||
|
+ ret = traverse_pages(m.num, sizeof(xen_pfn_t),
|
||
|
+ &pagelist, mmap_batch_fn, &state);
|
||
|
+
|
||
|
+ up_write(&mm->mmap_sem);
|
||
|
+
|
||
|
+ if (state.err > 0) {
|
||
|
+ state.user = m.arr;
|
||
|
+ ret = traverse_pages(m.num, sizeof(xen_pfn_t),
|
||
|
+ &pagelist,
|
||
|
+ mmap_return_errors, &state);
|
||
|
+ }
|
||
|
+
|
||
|
+out:
|
||
|
+ free_page_list(&pagelist);
|
||
|
+
|
||
|
+ return ret;
|
||
|
+}
|
||
|
+
|
||
|
+static long privcmd_ioctl(struct file *file,
|
||
|
+ unsigned int cmd, unsigned long data)
|
||
|
+{
|
||
|
+ int ret = -ENOSYS;
|
||
|
+ void __user *udata = (void __user *) data;
|
||
|
+
|
||
|
+ switch (cmd) {
|
||
|
+ case IOCTL_PRIVCMD_HYPERCALL:
|
||
|
+ ret = privcmd_ioctl_hypercall(udata);
|
||
|
+ break;
|
||
|
+
|
||
|
+ case IOCTL_PRIVCMD_MMAP:
|
||
|
+ ret = privcmd_ioctl_mmap(udata);
|
||
|
+ break;
|
||
|
+
|
||
|
+ case IOCTL_PRIVCMD_MMAPBATCH:
|
||
|
+ ret = privcmd_ioctl_mmap_batch(udata);
|
||
|
+ break;
|
||
|
+
|
||
|
+ default:
|
||
|
+ ret = -EINVAL;
|
||
|
+ break;
|
||
|
+ }
|
||
|
+
|
||
|
+ return ret;
|
||
|
+}
|
||
|
+
|
||
|
+#ifndef HAVE_ARCH_PRIVCMD_MMAP
|
||
|
+static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||
|
+{
|
||
|
+ printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n",
|
||
|
+ vma, vma->vm_start, vma->vm_end,
|
||
|
+ vmf->pgoff, vmf->virtual_address);
|
||
|
+
|
||
|
+ return VM_FAULT_SIGBUS;
|
||
|
+}
|
||
|
+
|
||
|
+static struct vm_operations_struct privcmd_vm_ops = {
|
||
|
+ .fault = privcmd_fault
|
||
|
+};
|
||
|
+
|
||
|
+static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
|
||
|
+{
|
||
|
+ /* Unsupported for auto-translate guests. */
|
||
|
+ if (xen_feature(XENFEAT_auto_translated_physmap))
|
||
|
+ return -ENOSYS;
|
||
|
+
|
||
|
+ /* DONTCOPY is essential for Xen because copy_page_range doesn't know
|
||
|
+ * how to recreate these mappings */
|
||
|
+ vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP;
|
||
|
+ vma->vm_ops = &privcmd_vm_ops;
|
||
|
+ vma->vm_private_data = NULL;
|
||
|
+
|
||
|
+ return 0;
|
||
|
+}
|
||
|
+
|
||
|
+static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
|
||
|
+{
|
||
|
+ return (xchg(&vma->vm_private_data, (void *)1) == NULL);
|
||
|
+}
|
||
|
+#endif
|
||
|
+
|
||
|
+const struct file_operations xen_privcmd_fops = {
|
||
|
+ .owner = THIS_MODULE,
|
||
|
+ .unlocked_ioctl = privcmd_ioctl,
|
||
|
+ .mmap = privcmd_mmap,
|
||
|
+};
|
||
|
+EXPORT_SYMBOL_GPL(xen_privcmd_fops);
|
||
|
+
|
||
|
+static struct miscdevice privcmd_dev = {
|
||
|
+ .minor = MISC_DYNAMIC_MINOR,
|
||
|
+ .name = "xen/privcmd",
|
||
|
+ .fops = &xen_privcmd_fops,
|
||
|
+};
|
||
|
+
|
||
|
+static int __init privcmd_init(void)
|
||
|
+{
|
||
|
+ int err;
|
||
|
+
|
||
|
+ if (!xen_domain())
|
||
|
+ return -ENODEV;
|
||
|
+
|
||
|
+ err = misc_register(&privcmd_dev);
|
||
|
+ if (err != 0) {
|
||
|
+ printk(KERN_ERR "Could not register Xen privcmd device\n");
|
||
|
+ return err;
|
||
|
+ }
|
||
|
+ return 0;
|
||
|
+}
|
||
|
+
|
||
|
+static void __exit privcmd_exit(void)
|
||
|
+{
|
||
|
+ misc_deregister(&privcmd_dev);
|
||
|
+}
|
||
|
+
|
||
|
+module_init(privcmd_init);
|
||
|
+module_exit(privcmd_exit);
|
||
|
diff --git a/drivers/xen/privcmd.h b/drivers/xen/privcmd.h
|
||
|
new file mode 100644
|
||
|
index 0000000..14facae
|
||
|
--- /dev/null
|
||
|
+++ b/drivers/xen/privcmd.h
|
||
|
@@ -0,0 +1,3 @@
|
||
|
+#include <linux/fs.h>
|
||
|
+
|
||
|
+extern const struct file_operations xen_privcmd_fops;
|
||
|
diff --git a/drivers/xen/xenfs/Makefile b/drivers/xen/xenfs/Makefile
|
||
|
index 4fde944..5d45ff1 100644
|
||
|
--- a/drivers/xen/xenfs/Makefile
|
||
|
+++ b/drivers/xen/xenfs/Makefile
|
||
|
@@ -1,4 +1,4 @@
|
||
|
obj-$(CONFIG_XENFS) += xenfs.o
|
||
|
|
||
|
-xenfs-y = super.o xenbus.o privcmd.o
|
||
|
+xenfs-y = super.o xenbus.o
|
||
|
xenfs-$(CONFIG_XEN_DOM0) += xenstored.o
|
||
|
diff --git a/drivers/xen/xenfs/privcmd.c b/drivers/xen/xenfs/privcmd.c
|
||
|
deleted file mode 100644
|
||
|
index dbd3b16..0000000
|
||
|
--- a/drivers/xen/xenfs/privcmd.c
|
||
|
+++ /dev/null
|
||
|
@@ -1,400 +0,0 @@
|
||
|
-/******************************************************************************
|
||
|
- * privcmd.c
|
||
|
- *
|
||
|
- * Interface to privileged domain-0 commands.
|
||
|
- *
|
||
|
- * Copyright (c) 2002-2004, K A Fraser, B Dragovic
|
||
|
- */
|
||
|
-
|
||
|
-#include <linux/kernel.h>
|
||
|
-#include <linux/sched.h>
|
||
|
-#include <linux/slab.h>
|
||
|
-#include <linux/string.h>
|
||
|
-#include <linux/errno.h>
|
||
|
-#include <linux/mm.h>
|
||
|
-#include <linux/mman.h>
|
||
|
-#include <linux/uaccess.h>
|
||
|
-#include <linux/swap.h>
|
||
|
-#include <linux/highmem.h>
|
||
|
-#include <linux/pagemap.h>
|
||
|
-#include <linux/seq_file.h>
|
||
|
-
|
||
|
-#include <asm/pgalloc.h>
|
||
|
-#include <asm/pgtable.h>
|
||
|
-#include <asm/tlb.h>
|
||
|
-#include <asm/xen/hypervisor.h>
|
||
|
-#include <asm/xen/hypercall.h>
|
||
|
-
|
||
|
-#include <xen/xen.h>
|
||
|
-#include <xen/privcmd.h>
|
||
|
-#include <xen/interface/xen.h>
|
||
|
-#include <xen/features.h>
|
||
|
-#include <xen/page.h>
|
||
|
-#include <xen/xen-ops.h>
|
||
|
-
|
||
|
-#ifndef HAVE_ARCH_PRIVCMD_MMAP
|
||
|
-static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma);
|
||
|
-#endif
|
||
|
-
|
||
|
-static long privcmd_ioctl_hypercall(void __user *udata)
|
||
|
-{
|
||
|
- struct privcmd_hypercall hypercall;
|
||
|
- long ret;
|
||
|
-
|
||
|
- if (copy_from_user(&hypercall, udata, sizeof(hypercall)))
|
||
|
- return -EFAULT;
|
||
|
-
|
||
|
- ret = privcmd_call(hypercall.op,
|
||
|
- hypercall.arg[0], hypercall.arg[1],
|
||
|
- hypercall.arg[2], hypercall.arg[3],
|
||
|
- hypercall.arg[4]);
|
||
|
-
|
||
|
- return ret;
|
||
|
-}
|
||
|
-
|
||
|
-static void free_page_list(struct list_head *pages)
|
||
|
-{
|
||
|
- struct page *p, *n;
|
||
|
-
|
||
|
- list_for_each_entry_safe(p, n, pages, lru)
|
||
|
- __free_page(p);
|
||
|
-
|
||
|
- INIT_LIST_HEAD(pages);
|
||
|
-}
|
||
|
-
|
||
|
-/*
|
||
|
- * Given an array of items in userspace, return a list of pages
|
||
|
- * containing the data. If copying fails, either because of memory
|
||
|
- * allocation failure or a problem reading user memory, return an
|
||
|
- * error code; its up to the caller to dispose of any partial list.
|
||
|
- */
|
||
|
-static int gather_array(struct list_head *pagelist,
|
||
|
- unsigned nelem, size_t size,
|
||
|
- void __user *data)
|
||
|
-{
|
||
|
- unsigned pageidx;
|
||
|
- void *pagedata;
|
||
|
- int ret;
|
||
|
-
|
||
|
- if (size > PAGE_SIZE)
|
||
|
- return 0;
|
||
|
-
|
||
|
- pageidx = PAGE_SIZE;
|
||
|
- pagedata = NULL; /* quiet, gcc */
|
||
|
- while (nelem--) {
|
||
|
- if (pageidx > PAGE_SIZE-size) {
|
||
|
- struct page *page = alloc_page(GFP_KERNEL);
|
||
|
-
|
||
|
- ret = -ENOMEM;
|
||
|
- if (page == NULL)
|
||
|
- goto fail;
|
||
|
-
|
||
|
- pagedata = page_address(page);
|
||
|
-
|
||
|
- list_add_tail(&page->lru, pagelist);
|
||
|
- pageidx = 0;
|
||
|
- }
|
||
|
-
|
||
|
- ret = -EFAULT;
|
||
|
- if (copy_from_user(pagedata + pageidx, data, size))
|
||
|
- goto fail;
|
||
|
-
|
||
|
- data += size;
|
||
|
- pageidx += size;
|
||
|
- }
|
||
|
-
|
||
|
- ret = 0;
|
||
|
-
|
||
|
-fail:
|
||
|
- return ret;
|
||
|
-}
|
||
|
-
|
||
|
-/*
|
||
|
- * Call function "fn" on each element of the array fragmented
|
||
|
- * over a list of pages.
|
||
|
- */
|
||
|
-static int traverse_pages(unsigned nelem, size_t size,
|
||
|
- struct list_head *pos,
|
||
|
- int (*fn)(void *data, void *state),
|
||
|
- void *state)
|
||
|
-{
|
||
|
- void *pagedata;
|
||
|
- unsigned pageidx;
|
||
|
- int ret = 0;
|
||
|
-
|
||
|
- BUG_ON(size > PAGE_SIZE);
|
||
|
-
|
||
|
- pageidx = PAGE_SIZE;
|
||
|
- pagedata = NULL; /* hush, gcc */
|
||
|
-
|
||
|
- while (nelem--) {
|
||
|
- if (pageidx > PAGE_SIZE-size) {
|
||
|
- struct page *page;
|
||
|
- pos = pos->next;
|
||
|
- page = list_entry(pos, struct page, lru);
|
||
|
- pagedata = page_address(page);
|
||
|
- pageidx = 0;
|
||
|
- }
|
||
|
-
|
||
|
- ret = (*fn)(pagedata + pageidx, state);
|
||
|
- if (ret)
|
||
|
- break;
|
||
|
- pageidx += size;
|
||
|
- }
|
||
|
-
|
||
|
- return ret;
|
||
|
-}
|
||
|
-
|
||
|
-struct mmap_mfn_state {
|
||
|
- unsigned long va;
|
||
|
- struct vm_area_struct *vma;
|
||
|
- domid_t domain;
|
||
|
-};
|
||
|
-
|
||
|
-static int mmap_mfn_range(void *data, void *state)
|
||
|
-{
|
||
|
- struct privcmd_mmap_entry *msg = data;
|
||
|
- struct mmap_mfn_state *st = state;
|
||
|
- struct vm_area_struct *vma = st->vma;
|
||
|
- int rc;
|
||
|
-
|
||
|
- /* Do not allow range to wrap the address space. */
|
||
|
- if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) ||
|
||
|
- ((unsigned long)(msg->npages << PAGE_SHIFT) >= -st->va))
|
||
|
- return -EINVAL;
|
||
|
-
|
||
|
- /* Range chunks must be contiguous in va space. */
|
||
|
- if ((msg->va != st->va) ||
|
||
|
- ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end))
|
||
|
- return -EINVAL;
|
||
|
-
|
||
|
- rc = xen_remap_domain_mfn_range(vma,
|
||
|
- msg->va & PAGE_MASK,
|
||
|
- msg->mfn, msg->npages,
|
||
|
- vma->vm_page_prot,
|
||
|
- st->domain);
|
||
|
- if (rc < 0)
|
||
|
- return rc;
|
||
|
-
|
||
|
- st->va += msg->npages << PAGE_SHIFT;
|
||
|
-
|
||
|
- return 0;
|
||
|
-}
|
||
|
-
|
||
|
-static long privcmd_ioctl_mmap(void __user *udata)
|
||
|
-{
|
||
|
- struct privcmd_mmap mmapcmd;
|
||
|
- struct mm_struct *mm = current->mm;
|
||
|
- struct vm_area_struct *vma;
|
||
|
- int rc;
|
||
|
- LIST_HEAD(pagelist);
|
||
|
- struct mmap_mfn_state state;
|
||
|
-
|
||
|
- if (!xen_initial_domain())
|
||
|
- return -EPERM;
|
||
|
-
|
||
|
- if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
|
||
|
- return -EFAULT;
|
||
|
-
|
||
|
- rc = gather_array(&pagelist,
|
||
|
- mmapcmd.num, sizeof(struct privcmd_mmap_entry),
|
||
|
- mmapcmd.entry);
|
||
|
-
|
||
|
- if (rc || list_empty(&pagelist))
|
||
|
- goto out;
|
||
|
-
|
||
|
- down_write(&mm->mmap_sem);
|
||
|
-
|
||
|
- {
|
||
|
- struct page *page = list_first_entry(&pagelist,
|
||
|
- struct page, lru);
|
||
|
- struct privcmd_mmap_entry *msg = page_address(page);
|
||
|
-
|
||
|
- vma = find_vma(mm, msg->va);
|
||
|
- rc = -EINVAL;
|
||
|
-
|
||
|
- if (!vma || (msg->va != vma->vm_start) ||
|
||
|
- !privcmd_enforce_singleshot_mapping(vma))
|
||
|
- goto out_up;
|
||
|
- }
|
||
|
-
|
||
|
- state.va = vma->vm_start;
|
||
|
- state.vma = vma;
|
||
|
- state.domain = mmapcmd.dom;
|
||
|
-
|
||
|
- rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry),
|
||
|
- &pagelist,
|
||
|
- mmap_mfn_range, &state);
|
||
|
-
|
||
|
-
|
||
|
-out_up:
|
||
|
- up_write(&mm->mmap_sem);
|
||
|
-
|
||
|
-out:
|
||
|
- free_page_list(&pagelist);
|
||
|
-
|
||
|
- return rc;
|
||
|
-}
|
||
|
-
|
||
|
-struct mmap_batch_state {
|
||
|
- domid_t domain;
|
||
|
- unsigned long va;
|
||
|
- struct vm_area_struct *vma;
|
||
|
- int err;
|
||
|
-
|
||
|
- xen_pfn_t __user *user;
|
||
|
-};
|
||
|
-
|
||
|
-static int mmap_batch_fn(void *data, void *state)
|
||
|
-{
|
||
|
- xen_pfn_t *mfnp = data;
|
||
|
- struct mmap_batch_state *st = state;
|
||
|
-
|
||
|
- if (xen_remap_domain_mfn_range(st->vma, st->va & PAGE_MASK, *mfnp, 1,
|
||
|
- st->vma->vm_page_prot, st->domain) < 0) {
|
||
|
- *mfnp |= 0xf0000000U;
|
||
|
- st->err++;
|
||
|
- }
|
||
|
- st->va += PAGE_SIZE;
|
||
|
-
|
||
|
- return 0;
|
||
|
-}
|
||
|
-
|
||
|
-static int mmap_return_errors(void *data, void *state)
|
||
|
-{
|
||
|
- xen_pfn_t *mfnp = data;
|
||
|
- struct mmap_batch_state *st = state;
|
||
|
-
|
||
|
- return put_user(*mfnp, st->user++);
|
||
|
-}
|
||
|
-
|
||
|
-static struct vm_operations_struct privcmd_vm_ops;
|
||
|
-
|
||
|
-static long privcmd_ioctl_mmap_batch(void __user *udata)
|
||
|
-{
|
||
|
- int ret;
|
||
|
- struct privcmd_mmapbatch m;
|
||
|
- struct mm_struct *mm = current->mm;
|
||
|
- struct vm_area_struct *vma;
|
||
|
- unsigned long nr_pages;
|
||
|
- LIST_HEAD(pagelist);
|
||
|
- struct mmap_batch_state state;
|
||
|
-
|
||
|
- if (!xen_initial_domain())
|
||
|
- return -EPERM;
|
||
|
-
|
||
|
- if (copy_from_user(&m, udata, sizeof(m)))
|
||
|
- return -EFAULT;
|
||
|
-
|
||
|
- nr_pages = m.num;
|
||
|
- if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
|
||
|
- return -EINVAL;
|
||
|
-
|
||
|
- ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
|
||
|
- m.arr);
|
||
|
-
|
||
|
- if (ret || list_empty(&pagelist))
|
||
|
- goto out;
|
||
|
-
|
||
|
- down_write(&mm->mmap_sem);
|
||
|
-
|
||
|
- vma = find_vma(mm, m.addr);
|
||
|
- ret = -EINVAL;
|
||
|
- if (!vma ||
|
||
|
- vma->vm_ops != &privcmd_vm_ops ||
|
||
|
- (m.addr != vma->vm_start) ||
|
||
|
- ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) ||
|
||
|
- !privcmd_enforce_singleshot_mapping(vma)) {
|
||
|
- up_write(&mm->mmap_sem);
|
||
|
- goto out;
|
||
|
- }
|
||
|
-
|
||
|
- state.domain = m.dom;
|
||
|
- state.vma = vma;
|
||
|
- state.va = m.addr;
|
||
|
- state.err = 0;
|
||
|
-
|
||
|
- ret = traverse_pages(m.num, sizeof(xen_pfn_t),
|
||
|
- &pagelist, mmap_batch_fn, &state);
|
||
|
-
|
||
|
- up_write(&mm->mmap_sem);
|
||
|
-
|
||
|
- if (state.err > 0) {
|
||
|
- state.user = m.arr;
|
||
|
- ret = traverse_pages(m.num, sizeof(xen_pfn_t),
|
||
|
- &pagelist,
|
||
|
- mmap_return_errors, &state);
|
||
|
- }
|
||
|
-
|
||
|
-out:
|
||
|
- free_page_list(&pagelist);
|
||
|
-
|
||
|
- return ret;
|
||
|
-}
|
||
|
-
|
||
|
-static long privcmd_ioctl(struct file *file,
|
||
|
- unsigned int cmd, unsigned long data)
|
||
|
-{
|
||
|
- int ret = -ENOSYS;
|
||
|
- void __user *udata = (void __user *) data;
|
||
|
-
|
||
|
- switch (cmd) {
|
||
|
- case IOCTL_PRIVCMD_HYPERCALL:
|
||
|
- ret = privcmd_ioctl_hypercall(udata);
|
||
|
- break;
|
||
|
-
|
||
|
- case IOCTL_PRIVCMD_MMAP:
|
||
|
- ret = privcmd_ioctl_mmap(udata);
|
||
|
- break;
|
||
|
-
|
||
|
- case IOCTL_PRIVCMD_MMAPBATCH:
|
||
|
- ret = privcmd_ioctl_mmap_batch(udata);
|
||
|
- break;
|
||
|
-
|
||
|
- default:
|
||
|
- ret = -EINVAL;
|
||
|
- break;
|
||
|
- }
|
||
|
-
|
||
|
- return ret;
|
||
|
-}
|
||
|
-
|
||
|
-#ifndef HAVE_ARCH_PRIVCMD_MMAP
|
||
|
-static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||
|
-{
|
||
|
- printk(KERN_DEBUG "privcmd_fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n",
|
||
|
- vma, vma->vm_start, vma->vm_end,
|
||
|
- vmf->pgoff, vmf->virtual_address);
|
||
|
-
|
||
|
- return VM_FAULT_SIGBUS;
|
||
|
-}
|
||
|
-
|
||
|
-static struct vm_operations_struct privcmd_vm_ops = {
|
||
|
- .fault = privcmd_fault
|
||
|
-};
|
||
|
-
|
||
|
-static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
|
||
|
-{
|
||
|
- /* Unsupported for auto-translate guests. */
|
||
|
- if (xen_feature(XENFEAT_auto_translated_physmap))
|
||
|
- return -ENOSYS;
|
||
|
-
|
||
|
- /* DONTCOPY is essential for Xen because copy_page_range doesn't know
|
||
|
- * how to recreate these mappings */
|
||
|
- vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP;
|
||
|
- vma->vm_ops = &privcmd_vm_ops;
|
||
|
- vma->vm_private_data = NULL;
|
||
|
-
|
||
|
- return 0;
|
||
|
-}
|
||
|
-
|
||
|
-static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
|
||
|
-{
|
||
|
- return (xchg(&vma->vm_private_data, (void *)1) == NULL);
|
||
|
-}
|
||
|
-#endif
|
||
|
-
|
||
|
-const struct file_operations privcmd_file_ops = {
|
||
|
- .unlocked_ioctl = privcmd_ioctl,
|
||
|
- .mmap = privcmd_mmap,
|
||
|
-};
|
||
|
diff --git a/drivers/xen/xenfs/super.c b/drivers/xen/xenfs/super.c
|
||
|
index 1aa3897..a55fbf9 100644
|
||
|
--- a/drivers/xen/xenfs/super.c
|
||
|
+++ b/drivers/xen/xenfs/super.c
|
||
|
@@ -16,6 +16,7 @@
|
||
|
#include <xen/xen.h>
|
||
|
|
||
|
#include "xenfs.h"
|
||
|
+#include "../privcmd.h"
|
||
|
|
||
|
#include <asm/xen/hypervisor.h>
|
||
|
|
||
|
@@ -84,7 +85,7 @@ static int xenfs_fill_super(struct super_block *sb, void *data, int silent)
|
||
|
[1] = {},
|
||
|
{ "xenbus", &xenbus_file_ops, S_IRUSR|S_IWUSR },
|
||
|
{ "capabilities", &capabilities_file_ops, S_IRUGO },
|
||
|
- { "privcmd", &privcmd_file_ops, S_IRUSR|S_IWUSR },
|
||
|
+ { "privcmd", &xen_privcmd_fops, S_IRUSR|S_IWUSR },
|
||
|
{""},
|
||
|
};
|
||
|
int rc;
|
||
|
diff --git a/drivers/xen/xenfs/xenfs.h b/drivers/xen/xenfs/xenfs.h
|
||
|
index b68aa62..5056306 100644
|
||
|
--- a/drivers/xen/xenfs/xenfs.h
|
||
|
+++ b/drivers/xen/xenfs/xenfs.h
|
||
|
@@ -2,7 +2,6 @@
|
||
|
#define _XENFS_XENBUS_H
|
||
|
|
||
|
extern const struct file_operations xenbus_file_ops;
|
||
|
-extern const struct file_operations privcmd_file_ops;
|
||
|
extern const struct file_operations xsd_kva_file_ops;
|
||
|
extern const struct file_operations xsd_port_file_ops;
|
||
|
|
||
|
--
|
||
|
1.7.6.4
|
||
|
|