Subject: Fix Xen build wrt. Xen files coming from mainline. From: http://xenbits.xensource.com/linux-2.6.18-xen.hg (tip 1017:948c933f8839) Patch-mainline: n/a Acked-by: jbeulich@novell.com --- head-2010-05-12.orig/drivers/xen/Makefile 2010-05-12 08:55:24.000000000 +0200 +++ head-2010-05-12/drivers/xen/Makefile 2010-01-19 16:01:03.000000000 +0100 @@ -1,12 +1,28 @@ -obj-y += grant-table.o features.o events.o manage.o +obj-y += core/ +obj-y += console/ +obj-y += evtchn/ obj-y += xenbus/ +obj-y += char/ -nostackp := $(call cc-option, -fno-stack-protector) -CFLAGS_features.o := $(nostackp) - -obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o -obj-$(CONFIG_XEN_XENCOMM) += xencomm.o -obj-$(CONFIG_XEN_BALLOON) += balloon.o -obj-$(CONFIG_XEN_DEV_EVTCHN) += evtchn.o -obj-$(CONFIG_XENFS) += xenfs/ -obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o \ No newline at end of file +obj-y += util.o +obj-$(CONFIG_XEN_BALLOON) += balloon/ +obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/ +obj-$(CONFIG_XEN_BLKDEV_TAP) += blktap/ +obj-$(CONFIG_XEN_BLKDEV_TAP2) += blktap2/ +obj-$(CONFIG_XEN_NETDEV_BACKEND) += netback/ +obj-$(CONFIG_XEN_TPMDEV_BACKEND) += tpmback/ +obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += blkfront/ +obj-$(CONFIG_XEN_NETDEV_FRONTEND) += netfront/ +obj-$(CONFIG_XEN_PCIDEV_BACKEND) += pciback/ +obj-$(CONFIG_XEN_PCIDEV_FRONTEND) += pcifront/ +obj-$(CONFIG_XEN_FRAMEBUFFER) += fbfront/ +obj-$(CONFIG_XEN_KEYBOARD) += fbfront/ +obj-$(CONFIG_XEN_SCSI_BACKEND) += scsiback/ +obj-$(CONFIG_XEN_SCSI_FRONTEND) += scsifront/ +obj-$(CONFIG_XEN_USB_BACKEND) += usbback/ +obj-$(CONFIG_XEN_USB_FRONTEND) += usbfront/ +obj-$(CONFIG_XEN_PRIVCMD) += privcmd/ +obj-$(CONFIG_XEN_GRANT_DEV) += gntdev/ +obj-$(CONFIG_XEN_NETDEV_ACCEL_SFC_UTIL) += sfc_netutil/ +obj-$(CONFIG_XEN_NETDEV_ACCEL_SFC_FRONTEND) += sfc_netfront/ +obj-$(CONFIG_XEN_NETDEV_ACCEL_SFC_BACKEND) += sfc_netback/ --- head-2010-05-12.orig/drivers/xen/xenbus/Makefile 2010-05-12 08:55:24.000000000 +0200 +++ head-2010-05-12/drivers/xen/xenbus/Makefile 2010-01-19 16:01:03.000000000 +0100 @@ -1,7 +1,9 @@ -obj-y += xenbus.o +obj-y += xenbus_client.o xenbus_comms.o xenbus_xs.o xenbus_probe.o +obj-$(CONFIG_XEN_BACKEND) += xenbus_be.o -xenbus-objs = -xenbus-objs += xenbus_client.o -xenbus-objs += xenbus_comms.o -xenbus-objs += xenbus_xs.o -xenbus-objs += xenbus_probe.o +xenbus_be-objs = +xenbus_be-objs += xenbus_backend_client.o + +xenbus-$(CONFIG_XEN_BACKEND) += xenbus_probe_backend.o +obj-y += $(xenbus-y) $(xenbus-m) +obj-$(CONFIG_XEN_XENBUS_DEV) += xenbus_dev.o --- head-2010-05-12.orig/drivers/xen/xenbus/xenbus_client.c 2010-05-12 08:55:24.000000000 +0200 +++ head-2010-05-12/drivers/xen/xenbus/xenbus_client.c 2010-01-19 16:01:03.000000000 +0100 @@ -31,14 +31,17 @@ */ #include -#include -#include -#include -#include -#include -#include -#include +#include +#include #include +#include + +#ifdef HAVE_XEN_PLATFORM_COMPAT_H +#include +#endif + +#define DPRINTK(fmt, args...) \ + pr_debug("xenbus_client (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args) const char *xenbus_strstate(enum xenbus_state state) { @@ -50,25 +53,13 @@ const char *xenbus_strstate(enum xenbus_ [ XenbusStateConnected ] = "Connected", [ XenbusStateClosing ] = "Closing", [ XenbusStateClosed ] = "Closed", + [ XenbusStateReconfiguring ] = "Reconfiguring", + [ XenbusStateReconfigured ] = "Reconfigured", }; return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID"; } EXPORT_SYMBOL_GPL(xenbus_strstate); -/** - * xenbus_watch_path - register a watch - * @dev: xenbus device - * @path: path to watch - * @watch: watch to register - * @callback: callback to register - * - * Register a @watch on the given path, using the given xenbus_watch structure - * for storage, and the given @callback function as the callback. Return 0 on - * success, or -errno on error. On success, the given @path will be saved as - * @watch->node, and remains the caller's to free. On error, @watch->node will - * be NULL, the device will switch to %XenbusStateClosing, and the error will - * be saved in the store. - */ int xenbus_watch_path(struct xenbus_device *dev, const char *path, struct xenbus_watch *watch, void (*callback)(struct xenbus_watch *, @@ -92,57 +83,26 @@ int xenbus_watch_path(struct xenbus_devi EXPORT_SYMBOL_GPL(xenbus_watch_path); -/** - * xenbus_watch_pathfmt - register a watch on a sprintf-formatted path - * @dev: xenbus device - * @watch: watch to register - * @callback: callback to register - * @pathfmt: format of path to watch - * - * Register a watch on the given @path, using the given xenbus_watch - * structure for storage, and the given @callback function as the callback. - * Return 0 on success, or -errno on error. On success, the watched path - * (@path/@path2) will be saved as @watch->node, and becomes the caller's to - * kfree(). On error, watch->node will be NULL, so the caller has nothing to - * free, the device will switch to %XenbusStateClosing, and the error will be - * saved in the store. - */ -int xenbus_watch_pathfmt(struct xenbus_device *dev, - struct xenbus_watch *watch, - void (*callback)(struct xenbus_watch *, - const char **, unsigned int), - const char *pathfmt, ...) +int xenbus_watch_path2(struct xenbus_device *dev, const char *path, + const char *path2, struct xenbus_watch *watch, + void (*callback)(struct xenbus_watch *, + const char **, unsigned int)) { int err; - va_list ap; - char *path; - - va_start(ap, pathfmt); - path = kvasprintf(GFP_NOIO | __GFP_HIGH, pathfmt, ap); - va_end(ap); - - if (!path) { + char *state = kasprintf(GFP_NOIO | __GFP_HIGH, "%s/%s", path, path2); + if (!state) { xenbus_dev_fatal(dev, -ENOMEM, "allocating path for watch"); return -ENOMEM; } - err = xenbus_watch_path(dev, path, watch, callback); + err = xenbus_watch_path(dev, state, watch, callback); if (err) - kfree(path); + kfree(state); return err; } -EXPORT_SYMBOL_GPL(xenbus_watch_pathfmt); +EXPORT_SYMBOL_GPL(xenbus_watch_path2); -/** - * xenbus_switch_state - * @dev: xenbus device - * @state: new state - * - * Advertise in the store a change of the given driver to the given new_state. - * Return 0 on success, or -errno on error. On error, the device will switch - * to XenbusStateClosing, and the error will be saved in the store. - */ int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state) { /* We check whether the state is currently set to the given value, and @@ -201,13 +161,12 @@ static char *error_path(struct xenbus_de } -static void xenbus_va_dev_error(struct xenbus_device *dev, int err, - const char *fmt, va_list ap) +void _dev_error(struct xenbus_device *dev, int err, const char *fmt, + va_list ap) { int ret; unsigned int len; - char *printf_buffer = NULL; - char *path_buffer = NULL; + char *printf_buffer = NULL, *path_buffer = NULL; #define PRINTF_BUFFER_SIZE 4096 printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_KERNEL); @@ -224,13 +183,13 @@ static void xenbus_va_dev_error(struct x path_buffer = error_path(dev); if (path_buffer == NULL) { - dev_err(&dev->dev, "failed to write error node for %s (%s)\n", + printk("xenbus: failed to write error node for %s (%s)\n", dev->nodename, printf_buffer); goto fail; } if (xenbus_write(XBT_NIL, path_buffer, "error", printf_buffer) != 0) { - dev_err(&dev->dev, "failed to write error node for %s (%s)\n", + printk("xenbus: failed to write error node for %s (%s)\n", dev->nodename, printf_buffer); goto fail; } @@ -241,57 +200,32 @@ fail: } -/** - * xenbus_dev_error - * @dev: xenbus device - * @err: error to report - * @fmt: error message format - * - * Report the given negative errno into the store, along with the given - * formatted message. - */ -void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, ...) +void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, + ...) { va_list ap; va_start(ap, fmt); - xenbus_va_dev_error(dev, err, fmt, ap); + _dev_error(dev, err, fmt, ap); va_end(ap); } EXPORT_SYMBOL_GPL(xenbus_dev_error); -/** - * xenbus_dev_fatal - * @dev: xenbus device - * @err: error to report - * @fmt: error message format - * - * Equivalent to xenbus_dev_error(dev, err, fmt, args), followed by - * xenbus_switch_state(dev, XenbusStateClosing) to schedule an orderly - * closedown of this driver and its peer. - */ -void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, ...) +void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, + ...) { va_list ap; va_start(ap, fmt); - xenbus_va_dev_error(dev, err, fmt, ap); + _dev_error(dev, err, fmt, ap); va_end(ap); xenbus_switch_state(dev, XenbusStateClosing); } EXPORT_SYMBOL_GPL(xenbus_dev_fatal); -/** - * xenbus_grant_ring - * @dev: xenbus device - * @ring_mfn: mfn of ring to grant - - * Grant access to the given @ring_mfn to the peer of the given device. Return - * 0 on success, or -errno on error. On error, the device will switch to - * XenbusStateClosing, and the error will be saved in the store. - */ + int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn) { int err = gnttab_grant_foreign_access(dev->otherend_id, ring_mfn, 0); @@ -302,18 +236,12 @@ int xenbus_grant_ring(struct xenbus_devi EXPORT_SYMBOL_GPL(xenbus_grant_ring); -/** - * Allocate an event channel for the given xenbus_device, assigning the newly - * created local port to *port. Return 0 on success, or -errno on error. On - * error, the device will switch to XenbusStateClosing, and the error will be - * saved in the store. - */ int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port) { struct evtchn_alloc_unbound alloc_unbound; int err; - alloc_unbound.dom = DOMID_SELF; + alloc_unbound.dom = DOMID_SELF; alloc_unbound.remote_dom = dev->otherend_id; err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, @@ -328,36 +256,6 @@ int xenbus_alloc_evtchn(struct xenbus_de EXPORT_SYMBOL_GPL(xenbus_alloc_evtchn); -/** - * Bind to an existing interdomain event channel in another domain. Returns 0 - * on success and stores the local port in *port. On error, returns -errno, - * switches the device to XenbusStateClosing, and saves the error in XenStore. - */ -int xenbus_bind_evtchn(struct xenbus_device *dev, int remote_port, int *port) -{ - struct evtchn_bind_interdomain bind_interdomain; - int err; - - bind_interdomain.remote_dom = dev->otherend_id; - bind_interdomain.remote_port = remote_port; - - err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, - &bind_interdomain); - if (err) - xenbus_dev_fatal(dev, err, - "binding to event channel %d from domain %d", - remote_port, dev->otherend_id); - else - *port = bind_interdomain.local_port; - - return err; -} -EXPORT_SYMBOL_GPL(xenbus_bind_evtchn); - - -/** - * Free an existing event channel. Returns 0 on success or -errno on error. - */ int xenbus_free_evtchn(struct xenbus_device *dev, int port) { struct evtchn_close close; @@ -374,189 +272,6 @@ int xenbus_free_evtchn(struct xenbus_dev EXPORT_SYMBOL_GPL(xenbus_free_evtchn); -/** - * xenbus_map_ring_valloc - * @dev: xenbus device - * @gnt_ref: grant reference - * @vaddr: pointer to address to be filled out by mapping - * - * Based on Rusty Russell's skeleton driver's map_page. - * Map a page of memory into this domain from another domain's grant table. - * xenbus_map_ring_valloc allocates a page of virtual address space, maps the - * page to that address, and sets *vaddr to that address. - * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h) - * or -ENOMEM on error. If an error is returned, device will switch to - * XenbusStateClosing and the error message will be saved in XenStore. - */ -int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr) -{ - struct gnttab_map_grant_ref op = { - .flags = GNTMAP_host_map, - .ref = gnt_ref, - .dom = dev->otherend_id, - }; - struct vm_struct *area; - - *vaddr = NULL; - - area = xen_alloc_vm_area(PAGE_SIZE); - if (!area) - return -ENOMEM; - - op.host_addr = (unsigned long)area->addr; - - if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) - BUG(); - - if (op.status != GNTST_okay) { - xen_free_vm_area(area); - xenbus_dev_fatal(dev, op.status, - "mapping in shared page %d from domain %d", - gnt_ref, dev->otherend_id); - return op.status; - } - - /* Stuff the handle in an unused field */ - area->phys_addr = (unsigned long)op.handle; - - *vaddr = area->addr; - return 0; -} -EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc); - - -/** - * xenbus_map_ring - * @dev: xenbus device - * @gnt_ref: grant reference - * @handle: pointer to grant handle to be filled - * @vaddr: address to be mapped to - * - * Map a page of memory into this domain from another domain's grant table. - * xenbus_map_ring does not allocate the virtual address space (you must do - * this yourself!). It only maps in the page to the specified address. - * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h) - * or -ENOMEM on error. If an error is returned, device will switch to - * XenbusStateClosing and the error message will be saved in XenStore. - */ -int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref, - grant_handle_t *handle, void *vaddr) -{ - struct gnttab_map_grant_ref op = { - .host_addr = (unsigned long)vaddr, - .flags = GNTMAP_host_map, - .ref = gnt_ref, - .dom = dev->otherend_id, - }; - - if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) - BUG(); - - if (op.status != GNTST_okay) { - xenbus_dev_fatal(dev, op.status, - "mapping in shared page %d from domain %d", - gnt_ref, dev->otherend_id); - } else - *handle = op.handle; - - return op.status; -} -EXPORT_SYMBOL_GPL(xenbus_map_ring); - - -/** - * xenbus_unmap_ring_vfree - * @dev: xenbus device - * @vaddr: addr to unmap - * - * Based on Rusty Russell's skeleton driver's unmap_page. - * Unmap a page of memory in this domain that was imported from another domain. - * Use xenbus_unmap_ring_vfree if you mapped in your memory with - * xenbus_map_ring_valloc (it will free the virtual address space). - * Returns 0 on success and returns GNTST_* on error - * (see xen/include/interface/grant_table.h). - */ -int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr) -{ - struct vm_struct *area; - struct gnttab_unmap_grant_ref op = { - .host_addr = (unsigned long)vaddr, - }; - - /* It'd be nice if linux/vmalloc.h provided a find_vm_area(void *addr) - * method so that we don't have to muck with vmalloc internals here. - * We could force the user to hang on to their struct vm_struct from - * xenbus_map_ring_valloc, but these 6 lines considerably simplify - * this API. - */ - read_lock(&vmlist_lock); - for (area = vmlist; area != NULL; area = area->next) { - if (area->addr == vaddr) - break; - } - read_unlock(&vmlist_lock); - - if (!area) { - xenbus_dev_error(dev, -ENOENT, - "can't find mapped virtual address %p", vaddr); - return GNTST_bad_virt_addr; - } - - op.handle = (grant_handle_t)area->phys_addr; - - if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) - BUG(); - - if (op.status == GNTST_okay) - xen_free_vm_area(area); - else - xenbus_dev_error(dev, op.status, - "unmapping page at handle %d error %d", - (int16_t)area->phys_addr, op.status); - - return op.status; -} -EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree); - - -/** - * xenbus_unmap_ring - * @dev: xenbus device - * @handle: grant handle - * @vaddr: addr to unmap - * - * Unmap a page of memory in this domain that was imported from another domain. - * Returns 0 on success and returns GNTST_* on error - * (see xen/include/interface/grant_table.h). - */ -int xenbus_unmap_ring(struct xenbus_device *dev, - grant_handle_t handle, void *vaddr) -{ - struct gnttab_unmap_grant_ref op = { - .host_addr = (unsigned long)vaddr, - .handle = handle, - }; - - if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) - BUG(); - - if (op.status != GNTST_okay) - xenbus_dev_error(dev, op.status, - "unmapping page at handle %d error %d", - handle, op.status); - - return op.status; -} -EXPORT_SYMBOL_GPL(xenbus_unmap_ring); - - -/** - * xenbus_read_driver_state - * @path: path for driver - * - * Return the state of the driver rooted at the given store path, or - * XenbusStateUnknown if no state can be read. - */ enum xenbus_state xenbus_read_driver_state(const char *path) { enum xenbus_state result; --- head-2010-05-12.orig/drivers/xen/xenbus/xenbus_comms.c 2010-05-12 08:55:24.000000000 +0200 +++ head-2010-05-12/drivers/xen/xenbus/xenbus_comms.c 2010-01-19 16:01:03.000000000 +0100 @@ -34,25 +34,55 @@ #include #include #include +#include +#include +#include #include -#include -#include -#include + +#include + #include "xenbus_comms.h" +#ifdef HAVE_XEN_PLATFORM_COMPAT_H +#include +#endif + static int xenbus_irq; -static DECLARE_WORK(probe_work, xenbus_probe); +extern void xenbus_probe(void *); +static DECLARE_WORK(probe_work, xenbus_probe, NULL); static DECLARE_WAIT_QUEUE_HEAD(xb_waitq); -static irqreturn_t wake_waiting(int irq, void *unused) +static irqreturn_t wake_waiting(int irq, void *unused, struct pt_regs *regs) { - if (unlikely(xenstored_ready == 0)) { - xenstored_ready = 1; - schedule_work(&probe_work); + int old, new; + + old = atomic_read(&xenbus_xsd_state); + switch (old) { + case XENBUS_XSD_UNCOMMITTED: + BUG(); + return IRQ_HANDLED; + + case XENBUS_XSD_FOREIGN_INIT: + new = XENBUS_XSD_FOREIGN_READY; + break; + + case XENBUS_XSD_LOCAL_INIT: + new = XENBUS_XSD_LOCAL_READY; + break; + + case XENBUS_XSD_FOREIGN_READY: + case XENBUS_XSD_LOCAL_READY: + default: + goto wake; } + old = atomic_cmpxchg(&xenbus_xsd_state, old, new); + if (old != new) + schedule_work(&probe_work); + +wake: wake_up(&xb_waitq); return IRQ_HANDLED; } @@ -82,13 +112,6 @@ static const void *get_input_chunk(XENST return buf + MASK_XENSTORE_IDX(cons); } -/** - * xb_write - low level write - * @data: buffer to send - * @len: length of buffer - * - * Returns 0 on success, error otherwise. - */ int xb_write(const void *data, unsigned len) { struct xenstore_domain_interface *intf = xen_store_interface; @@ -197,12 +220,11 @@ int xb_read(void *data, unsigned len) return 0; } -/** - * xb_init_comms - Set up interrupt handler off store event channel. - */ +/* Set up interrupt handler off store event channel. */ int xb_init_comms(void) { struct xenstore_domain_interface *intf = xen_store_interface; + int err; if (intf->req_prod != intf->req_cons) printk(KERN_ERR "XENBUS request ring is not quiescent " @@ -215,20 +237,18 @@ int xb_init_comms(void) intf->rsp_cons = intf->rsp_prod; } - if (xenbus_irq) { - /* Already have an irq; assume we're resuming */ - rebind_evtchn_irq(xen_store_evtchn, xenbus_irq); - } else { - int err; - err = bind_evtchn_to_irqhandler(xen_store_evtchn, wake_waiting, - 0, "xenbus", &xb_waitq); - if (err <= 0) { - printk(KERN_ERR "XENBUS request irq failed %i\n", err); - return err; - } + if (xenbus_irq) + unbind_from_irqhandler(xenbus_irq, &xb_waitq); - xenbus_irq = err; + err = bind_caller_port_to_irqhandler( + xen_store_evtchn, wake_waiting, + 0, "xenbus", &xb_waitq); + if (err <= 0) { + printk(KERN_ERR "XENBUS request irq failed %i\n", err); + return err; } + xenbus_irq = err; + return 0; } --- head-2010-05-12.orig/drivers/xen/xenbus/xenbus_comms.h 2010-05-12 08:55:24.000000000 +0200 +++ head-2010-05-12/drivers/xen/xenbus/xenbus_comms.h 2010-01-19 16:01:03.000000000 +0100 @@ -43,4 +43,20 @@ int xs_input_avail(void); extern struct xenstore_domain_interface *xen_store_interface; extern int xen_store_evtchn; +/* For xenbus internal use. */ +enum { + XENBUS_XSD_UNCOMMITTED = 0, + XENBUS_XSD_FOREIGN_INIT, + XENBUS_XSD_FOREIGN_READY, + XENBUS_XSD_LOCAL_INIT, + XENBUS_XSD_LOCAL_READY, +}; +extern atomic_t xenbus_xsd_state; + +static inline int is_xenstored_ready(void) +{ + int s = atomic_read(&xenbus_xsd_state); + return s == XENBUS_XSD_FOREIGN_READY || s == XENBUS_XSD_LOCAL_READY; +} + #endif /* _XENBUS_COMMS_H */ --- head-2010-05-12.orig/drivers/xen/xenbus/xenbus_probe.c 2010-05-12 08:55:24.000000000 +0200 +++ head-2010-05-12/drivers/xen/xenbus/xenbus_probe.c 2010-01-26 09:08:16.000000000 +0100 @@ -4,6 +4,7 @@ * Copyright (C) 2005 Rusty Russell, IBM Corporation * Copyright (C) 2005 Mike Wray, Hewlett-Packard * Copyright (C) 2005, 2006 XenSource Ltd + * Copyright (C) 2007 Solarflare Communications, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License version 2 @@ -32,7 +33,7 @@ #define DPRINTK(fmt, args...) \ pr_debug("xenbus_probe (%s:%d) " fmt ".\n", \ - __func__, __LINE__, ##args) + __FUNCTION__, __LINE__, ##args) #include #include @@ -40,32 +41,37 @@ #include #include #include -#include #include -#include #include -#include -#include +#include +#include +#include #include +#include #include -#include - -#include +#include #include -#include -#include +#include +#include +#include +#ifdef MODULE +#include +#endif #include "xenbus_comms.h" #include "xenbus_probe.h" +#ifdef HAVE_XEN_PLATFORM_COMPAT_H +#include +#endif int xen_store_evtchn; -EXPORT_SYMBOL(xen_store_evtchn); - struct xenstore_domain_interface *xen_store_interface; static unsigned long xen_store_mfn; +extern struct mutex xenwatch_mutex; + static BLOCKING_NOTIFIER_HEAD(xenstore_chain); static void wait_for_devices(struct xenbus_driver *xendrv); @@ -74,9 +80,6 @@ static int xenbus_probe_frontend(const c static void xenbus_dev_shutdown(struct device *_dev); -static int xenbus_dev_suspend(struct device *dev, pm_message_t state); -static int xenbus_dev_resume(struct device *dev); - /* If something in array of ids matches this device, return it. */ static const struct xenbus_device_id * match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev) @@ -98,16 +101,6 @@ int xenbus_match(struct device *_dev, st return match_device(drv->ids, to_xenbus_device(_dev)) != NULL; } -static int xenbus_uevent(struct device *_dev, struct kobj_uevent_env *env) -{ - struct xenbus_device *dev = to_xenbus_device(_dev); - - if (add_uevent_var(env, "MODALIAS=xen:%s", dev->devicetype)) - return -ENOMEM; - - return 0; -} - /* device// => - */ static int frontend_bus_id(char bus_id[XEN_BUS_ID_SIZE], const char *nodename) { @@ -176,9 +169,30 @@ static int read_backend_details(struct x return read_otherend_details(xendev, "backend-id", "backend"); } -static struct device_attribute xenbus_dev_attrs[] = { - __ATTR_NULL -}; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) +static int xenbus_uevent_frontend(struct device *dev, char **envp, + int num_envp, char *buffer, int buffer_size) +{ + struct xenbus_device *xdev; + int length = 0, i = 0; + + if (dev == NULL) + return -ENODEV; + xdev = to_xenbus_device(dev); + if (xdev == NULL) + return -ENODEV; + + /* stuff we want to pass to /sbin/hotplug */ + add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, + "XENBUS_TYPE=%s", xdev->devicetype); + add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, + "XENBUS_PATH=%s", xdev->nodename); + add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, + "MODALIAS=xen:%s", xdev->devicetype); + + return 0; +} +#endif /* Bus type for frontend drivers. */ static struct xen_bus_type xenbus_frontend = { @@ -186,17 +200,19 @@ static struct xen_bus_type xenbus_fronte .levels = 2, /* device/type/ */ .get_bus_id = frontend_bus_id, .probe = xenbus_probe_frontend, + .error = -ENODEV, .bus = { - .name = "xen", - .match = xenbus_match, - .uevent = xenbus_uevent, - .probe = xenbus_dev_probe, - .remove = xenbus_dev_remove, - .shutdown = xenbus_dev_shutdown, - .dev_attrs = xenbus_dev_attrs, - - .suspend = xenbus_dev_suspend, - .resume = xenbus_dev_resume, + .name = "xen", + .match = xenbus_match, +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) + .probe = xenbus_dev_probe, + .remove = xenbus_dev_remove, + .shutdown = xenbus_dev_shutdown, + .uevent = xenbus_uevent_frontend, +#endif + }, + .dev = { + .bus_id = "xen", }, }; @@ -213,17 +229,16 @@ static void otherend_changed(struct xenb if (!dev->otherend || strncmp(dev->otherend, vec[XS_WATCH_PATH], strlen(dev->otherend))) { - dev_dbg(&dev->dev, "Ignoring watch at %s\n", - vec[XS_WATCH_PATH]); + DPRINTK("Ignoring watch at %s", vec[XS_WATCH_PATH]); return; } state = xenbus_read_driver_state(dev->otherend); - dev_dbg(&dev->dev, "state is %d, (%s), %s, %s\n", - state, xenbus_strstate(state), dev->otherend_watch.node, - vec[XS_WATCH_PATH]); + DPRINTK("state is %d (%s), %s, %s", state, xenbus_strstate(state), + dev->otherend_watch.node, vec[XS_WATCH_PATH]); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) /* * Ignore xenbus transitions during shutdown. This prevents us doing * work that can fail e.g., when the rootfs is gone. @@ -237,6 +252,7 @@ static void otherend_changed(struct xenb xenbus_frontend_closed(dev); return; } +#endif if (drv->otherend_changed) drv->otherend_changed(dev, state); @@ -256,8 +272,8 @@ static int talk_to_otherend(struct xenbu static int watch_otherend(struct xenbus_device *dev) { - return xenbus_watch_pathfmt(dev, &dev->otherend_watch, otherend_changed, - "%s/%s", dev->otherend, "state"); + return xenbus_watch_path2(dev, dev->otherend, "state", + &dev->otherend_watch, otherend_changed); } @@ -283,8 +299,9 @@ int xenbus_dev_probe(struct device *_dev err = talk_to_otherend(dev); if (err) { - dev_warn(&dev->dev, "talk_to_otherend on %s failed.\n", - dev->nodename); + printk(KERN_WARNING + "xenbus_probe: talk_to_otherend on %s failed.\n", + dev->nodename); return err; } @@ -294,7 +311,8 @@ int xenbus_dev_probe(struct device *_dev err = watch_otherend(dev); if (err) { - dev_warn(&dev->dev, "watch_otherend on %s failed.\n", + printk(KERN_WARNING + "xenbus_probe: watch_otherend on %s failed.\n", dev->nodename); return err; } @@ -330,43 +348,64 @@ static void xenbus_dev_shutdown(struct d DPRINTK("%s", dev->nodename); +/* Commented out since xenstored stubdom is now minios based not linux based +#define XENSTORE_DOMAIN_SHARES_THIS_KERNEL +*/ +#ifndef XENSTORE_DOMAIN_SHARES_THIS_KERNEL + if (is_initial_xendomain()) +#endif + return; + get_device(&dev->dev); if (dev->state != XenbusStateConnected) { - printk(KERN_INFO "%s: %s: %s != Connected, skipping\n", __func__, + printk("%s: %s: %s != Connected, skipping\n", __FUNCTION__, dev->nodename, xenbus_strstate(dev->state)); goto out; } xenbus_switch_state(dev, XenbusStateClosing); + + if (!strcmp(dev->devicetype, "vfb")) + goto out; + timeout = wait_for_completion_timeout(&dev->down, timeout); if (!timeout) - printk(KERN_INFO "%s: %s timeout closing device\n", - __func__, dev->nodename); + printk("%s: %s timeout closing device\n", __FUNCTION__, dev->nodename); out: put_device(&dev->dev); } int xenbus_register_driver_common(struct xenbus_driver *drv, - struct xen_bus_type *bus, - struct module *owner, - const char *mod_name) + struct xen_bus_type *bus) { + int ret; + + if (bus->error) + return bus->error; + drv->driver.name = drv->name; drv->driver.bus = &bus->bus; - drv->driver.owner = owner; - drv->driver.mod_name = mod_name; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10) + drv->driver.owner = drv->owner; +#endif +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16) + drv->driver.probe = xenbus_dev_probe; + drv->driver.remove = xenbus_dev_remove; + drv->driver.shutdown = xenbus_dev_shutdown; +#endif - return driver_register(&drv->driver); + mutex_lock(&xenwatch_mutex); + ret = driver_register(&drv->driver); + mutex_unlock(&xenwatch_mutex); + return ret; } -int __xenbus_register_frontend(struct xenbus_driver *drv, - struct module *owner, const char *mod_name) +int xenbus_register_frontend(struct xenbus_driver *drv) { int ret; drv->read_otherend_details = read_backend_details; - ret = xenbus_register_driver_common(drv, &xenbus_frontend, - owner, mod_name); + ret = xenbus_register_driver_common(drv, &xenbus_frontend); if (ret) return ret; @@ -375,7 +414,7 @@ int __xenbus_register_frontend(struct xe return 0; } -EXPORT_SYMBOL_GPL(__xenbus_register_frontend); +EXPORT_SYMBOL_GPL(xenbus_register_frontend); void xenbus_unregister_driver(struct xenbus_driver *drv) { @@ -453,31 +492,30 @@ static void xenbus_dev_release(struct de } static ssize_t xendev_show_nodename(struct device *dev, - struct device_attribute *attr, char *buf) +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,13) + struct device_attribute *attr, +#endif + char *buf) { return sprintf(buf, "%s\n", to_xenbus_device(dev)->nodename); } static DEVICE_ATTR(nodename, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_nodename, NULL); static ssize_t xendev_show_devtype(struct device *dev, - struct device_attribute *attr, char *buf) +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,13) + struct device_attribute *attr, +#endif + char *buf) { return sprintf(buf, "%s\n", to_xenbus_device(dev)->devicetype); } static DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL); -static ssize_t xendev_show_modalias(struct device *dev, - struct device_attribute *attr, char *buf) -{ - return sprintf(buf, "xen:%s\n", to_xenbus_device(dev)->devicetype); -} -static DEVICE_ATTR(modalias, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_modalias, NULL); int xenbus_probe_node(struct xen_bus_type *bus, const char *type, const char *nodename) { - char devname[XEN_BUS_ID_SIZE]; int err; struct xenbus_device *xendev; size_t stringlen; @@ -485,6 +523,9 @@ int xenbus_probe_node(struct xen_bus_typ enum xenbus_state state = xenbus_read_driver_state(nodename); + if (bus->error) + return bus->error; + if (state != XenbusStateInitialising) { /* Device is not new, so ignore it. This can happen if a device is going away after switching to Closed. */ @@ -509,15 +550,14 @@ int xenbus_probe_node(struct xen_bus_typ xendev->devicetype = tmpstring; init_completion(&xendev->down); + xendev->dev.parent = &bus->dev; xendev->dev.bus = &bus->bus; xendev->dev.release = xenbus_dev_release; - err = bus->get_bus_id(devname, xendev->nodename); + err = bus->get_bus_id(xendev->dev.bus_id, xendev->nodename); if (err) goto fail; - dev_set_name(&xendev->dev, devname); - /* Register with generic device framework. */ err = device_register(&xendev->dev); if (err) @@ -525,22 +565,15 @@ int xenbus_probe_node(struct xen_bus_typ err = device_create_file(&xendev->dev, &dev_attr_nodename); if (err) - goto fail_unregister; - + goto unregister; err = device_create_file(&xendev->dev, &dev_attr_devtype); if (err) - goto fail_remove_nodename; - - err = device_create_file(&xendev->dev, &dev_attr_modalias); - if (err) - goto fail_remove_devtype; + goto unregister; return 0; -fail_remove_devtype: - device_remove_file(&xendev->dev, &dev_attr_devtype); -fail_remove_nodename: +unregister: device_remove_file(&xendev->dev, &dev_attr_nodename); -fail_unregister: + device_remove_file(&xendev->dev, &dev_attr_devtype); device_unregister(&xendev->dev); fail: kfree(xendev); @@ -553,8 +586,10 @@ static int xenbus_probe_frontend(const c char *nodename; int err; - nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", - xenbus_frontend.root, type, name); + if (!strcmp(type, "console")) + return 0; + + nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", xenbus_frontend.root, type, name); if (!nodename) return -ENOMEM; @@ -591,6 +626,9 @@ int xenbus_probe_devices(struct xen_bus_ char **dir; unsigned int i, dir_n; + if (bus->error) + return bus->error; + dir = xenbus_directory(XBT_NIL, bus->root, "", &dir_n); if (IS_ERR(dir)) return PTR_ERR(dir); @@ -627,15 +665,15 @@ static int strsep_len(const char *str, c return (len == 0) ? i : -ERANGE; } -void xenbus_dev_changed(const char *node, struct xen_bus_type *bus) +void dev_changed(const char *node, struct xen_bus_type *bus) { int exists, rootlen; struct xenbus_device *dev; char type[XEN_BUS_ID_SIZE]; const char *p, *root; - if (char_count(node, '/') < 2) - return; + if (bus->error || char_count(node, '/') < 2) + return; exists = xenbus_exists(XBT_NIL, node, ""); if (!exists) { @@ -663,14 +701,13 @@ void xenbus_dev_changed(const char *node kfree(root); } -EXPORT_SYMBOL_GPL(xenbus_dev_changed); static void frontend_changed(struct xenbus_watch *watch, const char **vec, unsigned int len) { DPRINTK(""); - xenbus_dev_changed(vec[XS_WATCH_PATH], &xenbus_frontend); + dev_changed(vec[XS_WATCH_PATH], &xenbus_frontend); } /* We watch for devices appearing and vanishing. */ @@ -679,7 +716,7 @@ static struct xenbus_watch fe_watch = { .callback = frontend_changed, }; -static int xenbus_dev_suspend(struct device *dev, pm_message_t state) +static int suspend_dev(struct device *dev, void *data) { int err = 0; struct xenbus_driver *drv; @@ -692,14 +729,35 @@ static int xenbus_dev_suspend(struct dev drv = to_xenbus_driver(dev->driver); xdev = container_of(dev, struct xenbus_device, dev); if (drv->suspend) - err = drv->suspend(xdev, state); + err = drv->suspend(xdev); + if (err) + printk(KERN_WARNING + "xenbus: suspend %s failed: %i\n", dev->bus_id, err); + return 0; +} + +static int suspend_cancel_dev(struct device *dev, void *data) +{ + int err = 0; + struct xenbus_driver *drv; + struct xenbus_device *xdev; + + DPRINTK(""); + + if (dev->driver == NULL) + return 0; + drv = to_xenbus_driver(dev->driver); + xdev = container_of(dev, struct xenbus_device, dev); + if (drv->suspend_cancel) + err = drv->suspend_cancel(xdev); if (err) printk(KERN_WARNING - "xenbus: suspend %s failed: %i\n", dev_name(dev), err); + "xenbus: suspend_cancel %s failed: %i\n", + dev->bus_id, err); return 0; } -static int xenbus_dev_resume(struct device *dev) +static int resume_dev(struct device *dev, void *data) { int err; struct xenbus_driver *drv; @@ -717,7 +775,7 @@ static int xenbus_dev_resume(struct devi if (err) { printk(KERN_WARNING "xenbus: resume (talk_to_otherend) %s failed: %i\n", - dev_name(dev), err); + dev->bus_id, err); return err; } @@ -728,7 +786,7 @@ static int xenbus_dev_resume(struct devi if (err) { printk(KERN_WARNING "xenbus: resume %s failed: %i\n", - dev_name(dev), err); + dev->bus_id, err); return err; } } @@ -737,22 +795,52 @@ static int xenbus_dev_resume(struct devi if (err) { printk(KERN_WARNING "xenbus_probe: resume (watch_otherend) %s failed: " - "%d.\n", dev_name(dev), err); + "%d.\n", dev->bus_id, err); return err; } return 0; } +void xenbus_suspend(void) +{ + DPRINTK(""); + + if (!xenbus_frontend.error) + bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_dev); + xenbus_backend_suspend(suspend_dev); + xs_suspend(); +} +EXPORT_SYMBOL_GPL(xenbus_suspend); + +void xenbus_resume(void) +{ + xb_init_comms(); + xs_resume(); + if (!xenbus_frontend.error) + bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, resume_dev); + xenbus_backend_resume(resume_dev); +} +EXPORT_SYMBOL_GPL(xenbus_resume); + +void xenbus_suspend_cancel(void) +{ + xs_suspend_cancel(); + if (!xenbus_frontend.error) + bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_cancel_dev); + xenbus_backend_resume(suspend_cancel_dev); +} +EXPORT_SYMBOL_GPL(xenbus_suspend_cancel); + /* A flag to determine if xenstored is 'ready' (i.e. has started) */ -int xenstored_ready = 0; +atomic_t xenbus_xsd_state = ATOMIC_INIT(XENBUS_XSD_UNCOMMITTED); int register_xenstore_notifier(struct notifier_block *nb) { int ret = 0; - if (xenstored_ready > 0) + if (is_xenstored_ready()) ret = nb->notifier_call(nb, 0, NULL); else blocking_notifier_chain_register(&xenstore_chain, nb); @@ -767,9 +855,10 @@ void unregister_xenstore_notifier(struct } EXPORT_SYMBOL_GPL(unregister_xenstore_notifier); -void xenbus_probe(struct work_struct *unused) + +void xenbus_probe(void *unused) { - BUG_ON((xenstored_ready <= 0)); + BUG_ON(!is_xenstored_ready()); /* Enumerate devices in xenstore and watch for changes. */ xenbus_probe_devices(&xenbus_frontend); @@ -780,71 +869,252 @@ void xenbus_probe(struct work_struct *un blocking_notifier_call_chain(&xenstore_chain, 0, NULL); } -static int __init xenbus_probe_init(void) + +#if defined(CONFIG_PROC_FS) && defined(CONFIG_XEN_PRIVILEGED_GUEST) +static struct file_operations xsd_kva_fops; +static struct proc_dir_entry *xsd_kva_intf; +static struct proc_dir_entry *xsd_port_intf; + +static int xsd_kva_mmap(struct file *file, struct vm_area_struct *vma) +{ + size_t size = vma->vm_end - vma->vm_start; + int old; + int rc; + + old = atomic_cmpxchg(&xenbus_xsd_state, + XENBUS_XSD_UNCOMMITTED, + XENBUS_XSD_LOCAL_INIT); + switch (old) { + case XENBUS_XSD_UNCOMMITTED: + rc = xb_init_comms(); + if (rc != 0) + return rc; + break; + + case XENBUS_XSD_FOREIGN_INIT: + case XENBUS_XSD_FOREIGN_READY: + return -EBUSY; + + case XENBUS_XSD_LOCAL_INIT: + case XENBUS_XSD_LOCAL_READY: + default: + break; + } + + if ((size > PAGE_SIZE) || (vma->vm_pgoff != 0)) + return -EINVAL; + + if (remap_pfn_range(vma, vma->vm_start, mfn_to_pfn(xen_store_mfn), + size, vma->vm_page_prot)) + return -EAGAIN; + + return 0; +} + +static int xsd_kva_read(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len; + + len = sprintf(page, "0x%p", xen_store_interface); + *eof = 1; + return len; +} + +static int xsd_port_read(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len; + + len = sprintf(page, "%d", xen_store_evtchn); + *eof = 1; + return len; +} +#endif + +static int xb_free_port(evtchn_port_t port) +{ + struct evtchn_close close; + close.port = port; + return HYPERVISOR_event_channel_op(EVTCHNOP_close, &close); +} + +int xenbus_conn(domid_t remote_dom, unsigned long *grant_ref, evtchn_port_t *local_port) +{ + struct evtchn_alloc_unbound alloc_unbound; + int rc, rc2; + + BUG_ON(atomic_read(&xenbus_xsd_state) != XENBUS_XSD_FOREIGN_INIT); + BUG_ON(!is_initial_xendomain()); + +#if defined(CONFIG_PROC_FS) && defined(CONFIG_XEN_PRIVILEGED_GUEST) + remove_xen_proc_entry("xsd_kva"); + remove_xen_proc_entry("xsd_port"); +#endif + + rc = xb_free_port(xen_store_evtchn); + if (rc != 0) + goto fail0; + + alloc_unbound.dom = DOMID_SELF; + alloc_unbound.remote_dom = remote_dom; + rc = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, + &alloc_unbound); + if (rc != 0) + goto fail0; + *local_port = xen_store_evtchn = alloc_unbound.port; + + /* keep the old page (xen_store_mfn, xen_store_interface) */ + rc = gnttab_grant_foreign_access(remote_dom, xen_store_mfn, + GTF_permit_access); + if (rc < 0) + goto fail1; + *grant_ref = rc; + + rc = xb_init_comms(); + if (rc != 0) + goto fail1; + + return 0; + +fail1: + rc2 = xb_free_port(xen_store_evtchn); + if (rc2 != 0) + printk(KERN_WARNING + "XENBUS: Error freeing xenstore event channel: %d\n", + rc2); +fail0: + xen_store_evtchn = -1; + return rc; +} + +static int xenbus_probe_init(void) { int err = 0; + unsigned long page = 0; DPRINTK(""); - err = -ENODEV; - if (!xen_domain()) - goto out_error; + if (!is_running_on_xen()) + return -ENODEV; /* Register ourselves with the kernel bus subsystem */ - err = bus_register(&xenbus_frontend.bus); - if (err) - goto out_error; - - err = xenbus_backend_bus_register(); - if (err) - goto out_unreg_front; + xenbus_frontend.error = bus_register(&xenbus_frontend.bus); + if (xenbus_frontend.error) + printk(KERN_WARNING + "XENBUS: Error registering frontend bus: %i\n", + xenbus_frontend.error); + xenbus_backend_bus_register(); /* * Domain0 doesn't have a store_evtchn or store_mfn yet. */ - if (xen_initial_domain()) { - /* dom0 not yet supported */ + if (is_initial_xendomain()) { + struct evtchn_alloc_unbound alloc_unbound; + + /* Allocate page. */ + page = get_zeroed_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + + xen_store_mfn = xen_start_info->store_mfn = + pfn_to_mfn(virt_to_phys((void *)page) >> + PAGE_SHIFT); + + /* Next allocate a local port which xenstored can bind to */ + alloc_unbound.dom = DOMID_SELF; + alloc_unbound.remote_dom = DOMID_SELF; + + err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, + &alloc_unbound); + if (err == -ENOSYS) + goto err; + BUG_ON(err); + xen_store_evtchn = xen_start_info->store_evtchn = + alloc_unbound.port; + +#if defined(CONFIG_PROC_FS) && defined(CONFIG_XEN_PRIVILEGED_GUEST) + /* And finally publish the above info in /proc/xen */ + xsd_kva_intf = create_xen_proc_entry("xsd_kva", 0600); + if (xsd_kva_intf) { + memcpy(&xsd_kva_fops, xsd_kva_intf->proc_fops, + sizeof(xsd_kva_fops)); + xsd_kva_fops.mmap = xsd_kva_mmap; + xsd_kva_intf->proc_fops = &xsd_kva_fops; + xsd_kva_intf->read_proc = xsd_kva_read; + } + xsd_port_intf = create_xen_proc_entry("xsd_port", 0400); + if (xsd_port_intf) + xsd_port_intf->read_proc = xsd_port_read; +#endif + xen_store_interface = mfn_to_virt(xen_store_mfn); } else { - xenstored_ready = 1; + atomic_set(&xenbus_xsd_state, XENBUS_XSD_FOREIGN_READY); +#ifdef CONFIG_XEN xen_store_evtchn = xen_start_info->store_evtchn; xen_store_mfn = xen_start_info->store_mfn; + xen_store_interface = mfn_to_virt(xen_store_mfn); +#else + xen_store_evtchn = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN); + xen_store_mfn = hvm_get_parameter(HVM_PARAM_STORE_PFN); + xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT, + PAGE_SIZE); +#endif + /* Initialize the shared memory rings to talk to xenstored */ + err = xb_init_comms(); + if (err) + goto err; } - xen_store_interface = mfn_to_virt(xen_store_mfn); + + xenbus_dev_init(); /* Initialize the interface to xenstore. */ err = xs_init(); if (err) { printk(KERN_WARNING "XENBUS: Error initializing xenstore comms: %i\n", err); - goto out_unreg_back; + goto err; } - if (!xen_initial_domain()) - xenbus_probe(NULL); + /* Register ourselves with the kernel device subsystem */ + if (!xenbus_frontend.error) { + xenbus_frontend.error = device_register(&xenbus_frontend.dev); + if (xenbus_frontend.error) { + bus_unregister(&xenbus_frontend.bus); + printk(KERN_WARNING + "XENBUS: Error registering frontend device: %i\n", + xenbus_frontend.error); + } + } + xenbus_backend_device_register(); -#ifdef CONFIG_XEN_COMPAT_XENFS - /* - * Create xenfs mountpoint in /proc for compatibility with - * utilities that expect to find "xenbus" under "/proc/xen". - */ - proc_mkdir("xen", NULL); -#endif + if (!is_initial_xendomain()) + xenbus_probe(NULL); return 0; - out_unreg_back: - xenbus_backend_bus_unregister(); + err: + if (page) + free_page(page); - out_unreg_front: - bus_unregister(&xenbus_frontend.bus); + /* + * Do not unregister the xenbus front/backend buses here. The buses + * must exist because front/backend drivers will use them when they are + * registered. + */ - out_error: return err; } +#ifdef CONFIG_XEN postcore_initcall(xenbus_probe_init); - -MODULE_LICENSE("GPL"); +MODULE_LICENSE("Dual BSD/GPL"); +#else +int xenbus_init(void) +{ + return xenbus_probe_init(); +} +#endif static int is_device_connecting(struct device *dev, void *data) { @@ -871,6 +1141,8 @@ static int is_device_connecting(struct d static int exists_connecting_device(struct device_driver *drv) { + if (xenbus_frontend.error) + return xenbus_frontend.error; return bus_for_each_dev(&xenbus_frontend.bus, NULL, drv, is_device_connecting); } @@ -879,6 +1151,7 @@ static int print_device_status(struct de { struct xenbus_device *xendev = to_xenbus_device(dev); struct device_driver *drv = data; + struct xenbus_driver *xendrv; /* Is this operation limited to a particular driver? */ if (drv && (dev->driver != drv)) @@ -888,7 +1161,10 @@ static int print_device_status(struct de /* Information only: is this too noisy? */ printk(KERN_INFO "XENBUS: Device with no driver: %s\n", xendev->nodename); - } else if (xendev->state < XenbusStateConnected) { + return 0; + } + + if (xendev->state < XenbusStateConnected) { enum xenbus_state rstate = XenbusStateUnknown; if (xendev->otherend) rstate = xenbus_read_driver_state(xendev->otherend); @@ -897,6 +1173,11 @@ static int print_device_status(struct de xendev->nodename, xendev->state, rstate); } + xendrv = to_xenbus_driver(dev->driver); + if (xendrv->is_ready && !xendrv->is_ready(xendev)) + printk(KERN_WARNING "XENBUS: Device not ready: %s\n", + xendev->nodename); + return 0; } @@ -923,7 +1204,7 @@ static void wait_for_devices(struct xenb struct device_driver *drv = xendrv ? &xendrv->driver : NULL; unsigned int seconds_waited = 0; - if (!ready_to_wait_for_devices || !xen_domain()) + if (!ready_to_wait_for_devices || !is_running_on_xen()) return; while (exists_connecting_device(drv)) { @@ -950,10 +1231,18 @@ static void wait_for_devices(struct xenb #ifndef MODULE static int __init boot_wait_for_devices(void) { - ready_to_wait_for_devices = 1; - wait_for_devices(NULL); + if (!xenbus_frontend.error) { + ready_to_wait_for_devices = 1; + wait_for_devices(NULL); + } return 0; } late_initcall(boot_wait_for_devices); #endif + +int xenbus_for_each_frontend(void *arg, int (*fn)(struct device *, void *)) +{ + return bus_for_each_dev(&xenbus_frontend.bus, NULL, arg, fn); +} +EXPORT_SYMBOL_GPL(xenbus_for_each_frontend); --- head-2010-05-12.orig/drivers/xen/xenbus/xenbus_probe.h 2010-05-12 08:55:24.000000000 +0200 +++ head-2010-05-12/drivers/xen/xenbus/xenbus_probe.h 2010-01-19 16:01:04.000000000 +0100 @@ -34,43 +34,47 @@ #ifndef _XENBUS_PROBE_H #define _XENBUS_PROBE_H +#ifndef BUS_ID_SIZE #define XEN_BUS_ID_SIZE 20 +#else +#define XEN_BUS_ID_SIZE BUS_ID_SIZE +#endif -#ifdef CONFIG_XEN_BACKEND +#if defined(CONFIG_XEN_BACKEND) || defined(CONFIG_XEN_BACKEND_MODULE) extern void xenbus_backend_suspend(int (*fn)(struct device *, void *)); extern void xenbus_backend_resume(int (*fn)(struct device *, void *)); extern void xenbus_backend_probe_and_watch(void); -extern int xenbus_backend_bus_register(void); -extern void xenbus_backend_bus_unregister(void); +extern void xenbus_backend_bus_register(void); +extern void xenbus_backend_device_register(void); #else static inline void xenbus_backend_suspend(int (*fn)(struct device *, void *)) {} static inline void xenbus_backend_resume(int (*fn)(struct device *, void *)) {} static inline void xenbus_backend_probe_and_watch(void) {} -static inline int xenbus_backend_bus_register(void) { return 0; } -static inline void xenbus_backend_bus_unregister(void) {} +static inline void xenbus_backend_bus_register(void) {} +static inline void xenbus_backend_device_register(void) {} #endif struct xen_bus_type { char *root; + int error; unsigned int levels; int (*get_bus_id)(char bus_id[XEN_BUS_ID_SIZE], const char *nodename); int (*probe)(const char *type, const char *dir); struct bus_type bus; + struct device dev; }; extern int xenbus_match(struct device *_dev, struct device_driver *_drv); extern int xenbus_dev_probe(struct device *_dev); extern int xenbus_dev_remove(struct device *_dev); extern int xenbus_register_driver_common(struct xenbus_driver *drv, - struct xen_bus_type *bus, - struct module *owner, - const char *mod_name); + struct xen_bus_type *bus); extern int xenbus_probe_node(struct xen_bus_type *bus, const char *type, const char *nodename); extern int xenbus_probe_devices(struct xen_bus_type *bus); -extern void xenbus_dev_changed(const char *node, struct xen_bus_type *bus); +extern void dev_changed(const char *node, struct xen_bus_type *bus); #endif --- head-2010-05-12.orig/drivers/xen/xenbus/xenbus_xs.c 2010-05-12 08:55:24.000000000 +0200 +++ head-2010-05-12/drivers/xen/xenbus/xenbus_xs.c 2010-01-19 16:01:04.000000000 +0100 @@ -47,6 +47,14 @@ #include #include "xenbus_comms.h" +#ifdef HAVE_XEN_PLATFORM_COMPAT_H +#include +#endif + +#ifndef PF_NOFREEZE /* Old kernel (pre-2.6.6). */ +#define PF_NOFREEZE 0 +#endif + struct xs_stored_msg { struct list_head list; @@ -76,6 +84,14 @@ struct xs_handle { /* * Mutex ordering: transaction_mutex -> watch_mutex -> request_mutex. * response_mutex is never taken simultaneously with the other three. + * + * transaction_mutex must be held before incrementing + * transaction_count. The mutex is held when a suspend is in + * progress to prevent new transactions starting. + * + * When decrementing transaction_count to zero the wait queue + * should be woken up, the suspend code waits for count to + * reach zero. */ /* One request at a time. */ @@ -85,7 +101,9 @@ struct xs_handle { struct mutex response_mutex; /* Protect transactions against save/restore. */ - struct rw_semaphore transaction_mutex; + struct mutex transaction_mutex; + atomic_t transaction_count; + wait_queue_head_t transaction_wq; /* Protect watch (de)register against save/restore. */ struct rw_semaphore watch_mutex; @@ -108,7 +126,7 @@ static DEFINE_SPINLOCK(watch_events_lock * carrying out work. */ static pid_t xenwatch_pid; -static DEFINE_MUTEX(xenwatch_mutex); +/* static */ DEFINE_MUTEX(xenwatch_mutex); static DECLARE_WAIT_QUEUE_HEAD(watch_events_waitq); static int get_error(const char *errorstring) @@ -157,6 +175,31 @@ static void *read_reply(enum xsd_sockmsg return body; } +static void transaction_start(void) +{ + mutex_lock(&xs_state.transaction_mutex); + atomic_inc(&xs_state.transaction_count); + mutex_unlock(&xs_state.transaction_mutex); +} + +static void transaction_end(void) +{ + if (atomic_dec_and_test(&xs_state.transaction_count)) + wake_up(&xs_state.transaction_wq); +} + +static void transaction_suspend(void) +{ + mutex_lock(&xs_state.transaction_mutex); + wait_event(xs_state.transaction_wq, + atomic_read(&xs_state.transaction_count) == 0); +} + +static void transaction_resume(void) +{ + mutex_unlock(&xs_state.transaction_mutex); +} + void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg) { void *ret; @@ -164,7 +207,7 @@ void *xenbus_dev_request_and_reply(struc int err; if (req_msg.type == XS_TRANSACTION_START) - down_read(&xs_state.transaction_mutex); + transaction_start(); mutex_lock(&xs_state.request_mutex); @@ -177,14 +220,13 @@ void *xenbus_dev_request_and_reply(struc mutex_unlock(&xs_state.request_mutex); - if ((msg->type == XS_TRANSACTION_END) || + if ((req_msg.type == XS_TRANSACTION_END) || ((req_msg.type == XS_TRANSACTION_START) && (msg->type == XS_ERROR))) - up_read(&xs_state.transaction_mutex); + transaction_end(); return ret; } -EXPORT_SYMBOL(xenbus_dev_request_and_reply); /* Send message to xs, get kmalloc'ed reply. ERR_PTR() on error. */ static void *xs_talkv(struct xenbus_transaction t, @@ -214,7 +256,7 @@ static void *xs_talkv(struct xenbus_tran } for (i = 0; i < num_vecs; i++) { - err = xb_write(iovec[i].iov_base, iovec[i].iov_len); + err = xb_write(iovec[i].iov_base, iovec[i].iov_len);; if (err) { mutex_unlock(&xs_state.request_mutex); return ERR_PTR(err); @@ -295,7 +337,7 @@ static char **split(char *strings, unsig char *p, **ret; /* Count the strings. */ - *num = count_strings(strings, len); + *num = count_strings(strings, len) + 1; /* Transfer to one big alloc for easy freeing. */ ret = kmalloc(*num * sizeof(char *) + len, GFP_NOIO | __GFP_HIGH); @@ -309,6 +351,7 @@ static char **split(char *strings, unsig strings = (char *)&ret[*num]; for (p = strings, *num = 0; p < strings + len; p += strlen(p) + 1) ret[(*num)++] = p; + ret[*num] = strings + len; return ret; } @@ -432,11 +475,11 @@ int xenbus_transaction_start(struct xenb { char *id_str; - down_read(&xs_state.transaction_mutex); + transaction_start(); id_str = xs_single(XBT_NIL, XS_TRANSACTION_START, "", NULL); if (IS_ERR(id_str)) { - up_read(&xs_state.transaction_mutex); + transaction_end(); return PTR_ERR(id_str); } @@ -461,7 +504,7 @@ int xenbus_transaction_end(struct xenbus err = xs_error(xs_single(t, XS_TRANSACTION_END, abortstr, NULL)); - up_read(&xs_state.transaction_mutex); + transaction_end(); return err; } @@ -622,6 +665,8 @@ void unregister_xenbus_watch(struct xenb char token[sizeof(watch) * 2 + 1]; int err; + BUG_ON(watch->flags & XBWF_new_thread); + sprintf(token, "%lX", (long)watch); down_read(&xs_state.watch_mutex); @@ -639,11 +684,6 @@ void unregister_xenbus_watch(struct xenb up_read(&xs_state.watch_mutex); - /* Make sure there are no callbacks running currently (unless - its us) */ - if (current->pid != xenwatch_pid) - mutex_lock(&xenwatch_mutex); - /* Cancel pending watch events. */ spin_lock(&watch_events_lock); list_for_each_entry_safe(msg, tmp, &watch_events, list) { @@ -655,14 +695,17 @@ void unregister_xenbus_watch(struct xenb } spin_unlock(&watch_events_lock); - if (current->pid != xenwatch_pid) + /* Flush any currently-executing callback, unless we are it. :-) */ + if (current->pid != xenwatch_pid) { + mutex_lock(&xenwatch_mutex); mutex_unlock(&xenwatch_mutex); + } } EXPORT_SYMBOL_GPL(unregister_xenbus_watch); void xs_suspend(void) { - down_write(&xs_state.transaction_mutex); + transaction_suspend(); down_write(&xs_state.watch_mutex); mutex_lock(&xs_state.request_mutex); mutex_lock(&xs_state.response_mutex); @@ -673,11 +716,9 @@ void xs_resume(void) struct xenbus_watch *watch; char token[sizeof(watch) * 2 + 1]; - xb_init_comms(); - mutex_unlock(&xs_state.response_mutex); mutex_unlock(&xs_state.request_mutex); - up_write(&xs_state.transaction_mutex); + transaction_resume(); /* No need for watches_lock: the watch_mutex is sufficient. */ list_for_each_entry(watch, &watches, list) { @@ -693,7 +734,25 @@ void xs_suspend_cancel(void) mutex_unlock(&xs_state.response_mutex); mutex_unlock(&xs_state.request_mutex); up_write(&xs_state.watch_mutex); - up_write(&xs_state.transaction_mutex); + mutex_unlock(&xs_state.transaction_mutex); +} + +static int xenwatch_handle_callback(void *data) +{ + struct xs_stored_msg *msg = data; + + msg->u.watch.handle->callback(msg->u.watch.handle, + (const char **)msg->u.watch.vec, + msg->u.watch.vec_size); + + kfree(msg->u.watch.vec); + kfree(msg); + + /* Kill this kthread if we were spawned just for this callback. */ + if (current->pid != xenwatch_pid) + do_exit(0); + + return 0; } static int xenwatch_thread(void *unused) @@ -701,6 +760,7 @@ static int xenwatch_thread(void *unused) struct list_head *ent; struct xs_stored_msg *msg; + current->flags |= PF_NOFREEZE; for (;;) { wait_event_interruptible(watch_events_waitq, !list_empty(&watch_events)); @@ -716,17 +776,29 @@ static int xenwatch_thread(void *unused) list_del(ent); spin_unlock(&watch_events_lock); - if (ent != &watch_events) { - msg = list_entry(ent, struct xs_stored_msg, list); - msg->u.watch.handle->callback( - msg->u.watch.handle, - (const char **)msg->u.watch.vec, - msg->u.watch.vec_size); - kfree(msg->u.watch.vec); - kfree(msg); + if (ent == &watch_events) { + mutex_unlock(&xenwatch_mutex); + continue; } - mutex_unlock(&xenwatch_mutex); + msg = list_entry(ent, struct xs_stored_msg, list); + + /* + * Unlock the mutex before running an XBWF_new_thread + * handler. kthread_run can block which can deadlock + * against unregister_xenbus_watch() if we need to + * unregister other watches in order to make + * progress. This can occur on resume before the swap + * device is attached. + */ + if (msg->u.watch.handle->flags & XBWF_new_thread) { + mutex_unlock(&xenwatch_mutex); + kthread_run(xenwatch_handle_callback, + msg, "xenwatch_cb"); + } else { + xenwatch_handle_callback(msg); + mutex_unlock(&xenwatch_mutex); + } } return 0; @@ -820,6 +892,7 @@ static int xenbus_thread(void *unused) { int err; + current->flags |= PF_NOFREEZE; for (;;) { err = process_msg(); if (err) @@ -834,7 +907,6 @@ static int xenbus_thread(void *unused) int xs_init(void) { - int err; struct task_struct *task; INIT_LIST_HEAD(&xs_state.reply_list); @@ -843,13 +915,10 @@ int xs_init(void) mutex_init(&xs_state.request_mutex); mutex_init(&xs_state.response_mutex); - init_rwsem(&xs_state.transaction_mutex); + mutex_init(&xs_state.transaction_mutex); init_rwsem(&xs_state.watch_mutex); - - /* Initialize the shared memory rings to talk to xenstored */ - err = xb_init_comms(); - if (err) - return err; + atomic_set(&xs_state.transaction_count, 0); + init_waitqueue_head(&xs_state.transaction_wq); task = kthread_run(xenwatch_thread, NULL, "xenwatch"); if (IS_ERR(task)) --- head-2010-05-12.orig/include/xen/evtchn.h 2010-05-12 08:55:24.000000000 +0200 +++ head-2010-05-12/include/xen/evtchn.h 2010-01-19 16:01:04.000000000 +0100 @@ -1,7 +1,11 @@ +#if defined(CONFIG_PARAVIRT_XEN) || !defined(__KERNEL__) +#include "public/evtchn.h" +#else /****************************************************************************** * evtchn.h * - * Interface to /dev/xen/evtchn. + * Communication via Xen event channels. + * Also definitions for the device that demuxes notifications to userspace. * * Copyright (c) 2003-2005, K A Fraser * @@ -30,59 +34,117 @@ * IN THE SOFTWARE. */ -#ifndef __LINUX_PUBLIC_EVTCHN_H__ -#define __LINUX_PUBLIC_EVTCHN_H__ +#ifndef __ASM_EVTCHN_H__ +#define __ASM_EVTCHN_H__ -/* - * Bind a fresh port to VIRQ @virq. - * Return allocated port. - */ -#define IOCTL_EVTCHN_BIND_VIRQ \ - _IOC(_IOC_NONE, 'E', 0, sizeof(struct ioctl_evtchn_bind_virq)) -struct ioctl_evtchn_bind_virq { - unsigned int virq; -}; +#include +#include +#include +#include +#include +#include /* - * Bind a fresh port to remote <@remote_domain, @remote_port>. - * Return allocated port. + * LOW-LEVEL DEFINITIONS */ -#define IOCTL_EVTCHN_BIND_INTERDOMAIN \ - _IOC(_IOC_NONE, 'E', 1, sizeof(struct ioctl_evtchn_bind_interdomain)) -struct ioctl_evtchn_bind_interdomain { - unsigned int remote_domain, remote_port; -}; /* - * Allocate a fresh port for binding to @remote_domain. - * Return allocated port. - */ -#define IOCTL_EVTCHN_BIND_UNBOUND_PORT \ - _IOC(_IOC_NONE, 'E', 2, sizeof(struct ioctl_evtchn_bind_unbound_port)) -struct ioctl_evtchn_bind_unbound_port { - unsigned int remote_domain; -}; + * Dynamically bind an event source to an IRQ-like callback handler. + * On some platforms this may not be implemented via the Linux IRQ subsystem. + * The IRQ argument passed to the callback handler is the same as returned + * from the bind call. It may not correspond to a Linux IRQ number. + * Returns IRQ or negative errno. + */ +int bind_caller_port_to_irqhandler( + unsigned int caller_port, + irqreturn_t (*handler)(int, void *, struct pt_regs *), + unsigned long irqflags, + const char *devname, + void *dev_id); +int bind_listening_port_to_irqhandler( + unsigned int remote_domain, + irqreturn_t (*handler)(int, void *, struct pt_regs *), + unsigned long irqflags, + const char *devname, + void *dev_id); +int bind_interdomain_evtchn_to_irqhandler( + unsigned int remote_domain, + unsigned int remote_port, + irqreturn_t (*handler)(int, void *, struct pt_regs *), + unsigned long irqflags, + const char *devname, + void *dev_id); +int bind_virq_to_irqhandler( + unsigned int virq, + unsigned int cpu, + irqreturn_t (*handler)(int, void *, struct pt_regs *), + unsigned long irqflags, + const char *devname, + void *dev_id); +int bind_ipi_to_irqhandler( + unsigned int ipi, + unsigned int cpu, + irqreturn_t (*handler)(int, void *, struct pt_regs *), + unsigned long irqflags, + const char *devname, + void *dev_id); /* - * Unbind previously allocated @port. - */ -#define IOCTL_EVTCHN_UNBIND \ - _IOC(_IOC_NONE, 'E', 3, sizeof(struct ioctl_evtchn_unbind)) -struct ioctl_evtchn_unbind { - unsigned int port; -}; + * Common unbind function for all event sources. Takes IRQ to unbind from. + * Automatically closes the underlying event channel (except for bindings + * made with bind_caller_port_to_irqhandler()). + */ +void unbind_from_irqhandler(unsigned int irq, void *dev_id); + +void irq_resume(void); + +/* Entry point for notifications into Linux subsystems. */ +asmlinkage void evtchn_do_upcall(struct pt_regs *regs); + +/* Entry point for notifications into the userland character device. */ +void evtchn_device_upcall(int port); + +/* Mark a PIRQ as unavailable for dynamic allocation. */ +void evtchn_register_pirq(int irq); +/* Map a Xen-supplied PIRQ to a dynamically allocated one. */ +int evtchn_map_pirq(int irq, int xen_pirq); +/* Look up a Xen-supplied PIRQ for a dynamically allocated one. */ +int evtchn_get_xen_pirq(int irq); + +void mask_evtchn(int port); +void disable_all_local_evtchn(void); +void unmask_evtchn(int port); + +#ifdef CONFIG_SMP +void rebind_evtchn_to_cpu(int port, unsigned int cpu); +#else +#define rebind_evtchn_to_cpu(port, cpu) ((void)0) +#endif + +static inline int test_and_set_evtchn_mask(int port) +{ + shared_info_t *s = HYPERVISOR_shared_info; + return synch_test_and_set_bit(port, s->evtchn_mask); +} + +static inline void clear_evtchn(int port) +{ + shared_info_t *s = HYPERVISOR_shared_info; + synch_clear_bit(port, s->evtchn_pending); +} + +static inline void notify_remote_via_evtchn(int port) +{ + struct evtchn_send send = { .port = port }; + VOID(HYPERVISOR_event_channel_op(EVTCHNOP_send, &send)); +} /* - * Unbind previously allocated @port. + * Use these to access the event channel underlying the IRQ handle returned + * by bind_*_to_irqhandler(). */ -#define IOCTL_EVTCHN_NOTIFY \ - _IOC(_IOC_NONE, 'E', 4, sizeof(struct ioctl_evtchn_notify)) -struct ioctl_evtchn_notify { - unsigned int port; -}; - -/* Clear and reinitialise the event buffer. Clear error condition. */ -#define IOCTL_EVTCHN_RESET \ - _IOC(_IOC_NONE, 'E', 5, 0) +void notify_remote_via_irq(int irq); +int irq_to_evtchn_port(int irq); -#endif /* __LINUX_PUBLIC_EVTCHN_H__ */ +#endif /* __ASM_EVTCHN_H__ */ +#endif /* CONFIG_PARAVIRT_XEN */ --- head-2010-05-12.orig/include/xen/interface/callback.h 2010-05-12 08:55:24.000000000 +0200 +++ head-2010-05-12/include/xen/interface/callback.h 2010-01-19 16:01:04.000000000 +0100 @@ -86,6 +86,8 @@ struct callback_register { uint16_t flags; xen_callback_t address; }; +typedef struct callback_register callback_register_t; +DEFINE_XEN_GUEST_HANDLE(callback_register_t); /* * Unregister a callback. @@ -98,5 +100,12 @@ struct callback_unregister { uint16_t type; uint16_t _unused; }; +typedef struct callback_unregister callback_unregister_t; +DEFINE_XEN_GUEST_HANDLE(callback_unregister_t); + +#if __XEN_INTERFACE_VERSION__ < 0x00030207 +#undef CALLBACKTYPE_sysenter +#define CALLBACKTYPE_sysenter CALLBACKTYPE_sysenter_deprecated +#endif #endif /* __XEN_PUBLIC_CALLBACK_H__ */ --- head-2010-05-12.orig/include/xen/interface/elfnote.h 2010-05-12 08:55:24.000000000 +0200 +++ head-2010-05-12/include/xen/interface/elfnote.h 2010-01-19 16:01:04.000000000 +0100 @@ -3,6 +3,24 @@ * * Definitions used for the Xen ELF notes. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Copyright (c) 2006, Ian Campbell, XenSource Ltd. */ @@ -10,7 +28,7 @@ #define __XEN_PUBLIC_ELFNOTE_H__ /* - * The notes should live in a SHT_NOTE segment and have "Xen" in the + * The notes should live in a PT_NOTE segment and have "Xen" in the * name field. * * Numeric types are either 4 or 8 bytes depending on the content of @@ -22,8 +40,6 @@ /* * NAME=VALUE pair (string). - * - * LEGACY: FEATURES and PAE */ #define XEN_ELFNOTE_INFO 0 @@ -90,7 +106,12 @@ #define XEN_ELFNOTE_LOADER 8 /* - * The kernel supports PAE (x86/32 only, string = "yes" or "no"). + * The kernel supports PAE (x86/32 only, string = "yes", "no" or + * "bimodal"). + * + * For compatibility with Xen 3.0.3 and earlier the "bimodal" setting + * may be given as "yes,bimodal" which will cause older Xen to treat + * this kernel as PAE. * * LEGACY: PAE (n.b. The legacy interface included a provision to * indicate 'extended-cr3' support allowing L3 page tables to be @@ -140,6 +161,76 @@ */ #define XEN_ELFNOTE_SUSPEND_CANCEL 14 +/* + * The (non-default) location the initial phys-to-machine map should be + * placed at by the hypervisor (Dom0) or the tools (DomU). + * The kernel must be prepared for this mapping to be established using + * large pages, despite such otherwise not being available to guests. + * The kernel must also be able to handle the page table pages used for + * this mapping not being accessible through the initial mapping. + * (Only x86-64 supports this at present.) + */ +#define XEN_ELFNOTE_INIT_P2M 15 + +/* + * The number of the highest elfnote defined. + */ +#define XEN_ELFNOTE_MAX XEN_ELFNOTE_INIT_P2M + +/* + * System information exported through crash notes. + * + * The kexec / kdump code will create one XEN_ELFNOTE_CRASH_INFO + * note in case of a system crash. This note will contain various + * information about the system, see xen/include/xen/elfcore.h. + */ +#define XEN_ELFNOTE_CRASH_INFO 0x1000001 + +/* + * System registers exported through crash notes. + * + * The kexec / kdump code will create one XEN_ELFNOTE_CRASH_REGS + * note per cpu in case of a system crash. This note is architecture + * specific and will contain registers not saved in the "CORE" note. + * See xen/include/xen/elfcore.h for more information. + */ +#define XEN_ELFNOTE_CRASH_REGS 0x1000002 + + +/* + * xen dump-core none note. + * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_NONE + * in its dump file to indicate that the file is xen dump-core + * file. This note doesn't have any other information. + * See tools/libxc/xc_core.h for more information. + */ +#define XEN_ELFNOTE_DUMPCORE_NONE 0x2000000 + +/* + * xen dump-core header note. + * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_HEADER + * in its dump file. + * See tools/libxc/xc_core.h for more information. + */ +#define XEN_ELFNOTE_DUMPCORE_HEADER 0x2000001 + +/* + * xen dump-core xen version note. + * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_XEN_VERSION + * in its dump file. It contains the xen version obtained via the + * XENVER hypercall. + * See tools/libxc/xc_core.h for more information. + */ +#define XEN_ELFNOTE_DUMPCORE_XEN_VERSION 0x2000002 + +/* + * xen dump-core format version note. + * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION + * in its dump file. It contains a format version identifier. + * See tools/libxc/xc_core.h for more information. + */ +#define XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION 0x2000003 + #endif /* __XEN_PUBLIC_ELFNOTE_H__ */ /* --- head-2010-05-12.orig/include/xen/interface/event_channel.h 2010-05-12 08:55:24.000000000 +0200 +++ head-2010-05-12/include/xen/interface/event_channel.h 2010-01-19 16:01:04.000000000 +0100 @@ -3,6 +3,24 @@ * * Event channels between domains. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Copyright (c) 2003-2004, K A Fraser. */ @@ -11,8 +29,15 @@ #include +/* + * Prototype for this hypercall is: + * int event_channel_op(int cmd, void *args) + * @cmd == EVTCHNOP_??? (event-channel operation). + * @args == Operation-specific extra arguments (NULL if none). + */ + typedef uint32_t evtchn_port_t; -DEFINE_GUEST_HANDLE(evtchn_port_t); +DEFINE_XEN_GUEST_HANDLE(evtchn_port_t); /* * EVTCHNOP_alloc_unbound: Allocate a port in domain and mark as @@ -22,13 +47,14 @@ DEFINE_GUEST_HANDLE(evtchn_port_t); * 1. If the caller is unprivileged then must be DOMID_SELF. * 2. may be DOMID_SELF, allowing loopback connections. */ -#define EVTCHNOP_alloc_unbound 6 +#define EVTCHNOP_alloc_unbound 6 struct evtchn_alloc_unbound { - /* IN parameters */ - domid_t dom, remote_dom; - /* OUT parameters */ - evtchn_port_t port; + /* IN parameters */ + domid_t dom, remote_dom; + /* OUT parameters */ + evtchn_port_t port; }; +typedef struct evtchn_alloc_unbound evtchn_alloc_unbound_t; /* * EVTCHNOP_bind_interdomain: Construct an interdomain event channel between @@ -41,29 +67,35 @@ struct evtchn_alloc_unbound { */ #define EVTCHNOP_bind_interdomain 0 struct evtchn_bind_interdomain { - /* IN parameters. */ - domid_t remote_dom; - evtchn_port_t remote_port; - /* OUT parameters. */ - evtchn_port_t local_port; + /* IN parameters. */ + domid_t remote_dom; + evtchn_port_t remote_port; + /* OUT parameters. */ + evtchn_port_t local_port; }; +typedef struct evtchn_bind_interdomain evtchn_bind_interdomain_t; /* * EVTCHNOP_bind_virq: Bind a local event channel to VIRQ on specified * vcpu. * NOTES: - * 1. A virtual IRQ may be bound to at most one event channel per vcpu. - * 2. The allocated event channel is bound to the specified vcpu. The binding - * may not be changed. + * 1. Virtual IRQs are classified as per-vcpu or global. See the VIRQ list + * in xen.h for the classification of each VIRQ. + * 2. Global VIRQs must be allocated on VCPU0 but can subsequently be + * re-bound via EVTCHNOP_bind_vcpu. + * 3. Per-vcpu VIRQs may be bound to at most one event channel per vcpu. + * The allocated event channel is bound to the specified vcpu and the + * binding cannot be changed. */ -#define EVTCHNOP_bind_virq 1 +#define EVTCHNOP_bind_virq 1 struct evtchn_bind_virq { - /* IN parameters. */ - uint32_t virq; - uint32_t vcpu; - /* OUT parameters. */ - evtchn_port_t port; + /* IN parameters. */ + uint32_t virq; + uint32_t vcpu; + /* OUT parameters. */ + evtchn_port_t port; }; +typedef struct evtchn_bind_virq evtchn_bind_virq_t; /* * EVTCHNOP_bind_pirq: Bind a local event channel to PIRQ . @@ -71,15 +103,16 @@ struct evtchn_bind_virq { * 1. A physical IRQ may be bound to at most one event channel per domain. * 2. Only a sufficiently-privileged domain may bind to a physical IRQ. */ -#define EVTCHNOP_bind_pirq 2 +#define EVTCHNOP_bind_pirq 2 struct evtchn_bind_pirq { - /* IN parameters. */ - uint32_t pirq; + /* IN parameters. */ + uint32_t pirq; #define BIND_PIRQ__WILL_SHARE 1 - uint32_t flags; /* BIND_PIRQ__* */ - /* OUT parameters. */ - evtchn_port_t port; + uint32_t flags; /* BIND_PIRQ__* */ + /* OUT parameters. */ + evtchn_port_t port; }; +typedef struct evtchn_bind_pirq evtchn_bind_pirq_t; /* * EVTCHNOP_bind_ipi: Bind a local event channel to receive events. @@ -87,33 +120,36 @@ struct evtchn_bind_pirq { * 1. The allocated event channel is bound to the specified vcpu. The binding * may not be changed. */ -#define EVTCHNOP_bind_ipi 7 +#define EVTCHNOP_bind_ipi 7 struct evtchn_bind_ipi { - uint32_t vcpu; - /* OUT parameters. */ - evtchn_port_t port; + uint32_t vcpu; + /* OUT parameters. */ + evtchn_port_t port; }; +typedef struct evtchn_bind_ipi evtchn_bind_ipi_t; /* * EVTCHNOP_close: Close a local event channel . If the channel is * interdomain then the remote end is placed in the unbound state * (EVTCHNSTAT_unbound), awaiting a new connection. */ -#define EVTCHNOP_close 3 +#define EVTCHNOP_close 3 struct evtchn_close { - /* IN parameters. */ - evtchn_port_t port; + /* IN parameters. */ + evtchn_port_t port; }; +typedef struct evtchn_close evtchn_close_t; /* * EVTCHNOP_send: Send an event to the remote end of the channel whose local * endpoint is . */ -#define EVTCHNOP_send 4 +#define EVTCHNOP_send 4 struct evtchn_send { - /* IN parameters. */ - evtchn_port_t port; + /* IN parameters. */ + evtchn_port_t port; }; +typedef struct evtchn_send evtchn_send_t; /* * EVTCHNOP_status: Get the current status of the communication channel which @@ -123,75 +159,98 @@ struct evtchn_send { * 2. Only a sufficiently-privileged domain may obtain the status of an event * channel for which is not DOMID_SELF. */ -#define EVTCHNOP_status 5 +#define EVTCHNOP_status 5 struct evtchn_status { - /* IN parameters */ - domid_t dom; - evtchn_port_t port; - /* OUT parameters */ -#define EVTCHNSTAT_closed 0 /* Channel is not in use. */ -#define EVTCHNSTAT_unbound 1 /* Channel is waiting interdom connection.*/ -#define EVTCHNSTAT_interdomain 2 /* Channel is connected to remote domain. */ -#define EVTCHNSTAT_pirq 3 /* Channel is bound to a phys IRQ line. */ -#define EVTCHNSTAT_virq 4 /* Channel is bound to a virtual IRQ line */ -#define EVTCHNSTAT_ipi 5 /* Channel is bound to a virtual IPI line */ - uint32_t status; - uint32_t vcpu; /* VCPU to which this channel is bound. */ - union { - struct { - domid_t dom; - } unbound; /* EVTCHNSTAT_unbound */ - struct { - domid_t dom; - evtchn_port_t port; - } interdomain; /* EVTCHNSTAT_interdomain */ - uint32_t pirq; /* EVTCHNSTAT_pirq */ - uint32_t virq; /* EVTCHNSTAT_virq */ - } u; + /* IN parameters */ + domid_t dom; + evtchn_port_t port; + /* OUT parameters */ +#define EVTCHNSTAT_closed 0 /* Channel is not in use. */ +#define EVTCHNSTAT_unbound 1 /* Channel is waiting interdom connection.*/ +#define EVTCHNSTAT_interdomain 2 /* Channel is connected to remote domain. */ +#define EVTCHNSTAT_pirq 3 /* Channel is bound to a phys IRQ line. */ +#define EVTCHNSTAT_virq 4 /* Channel is bound to a virtual IRQ line */ +#define EVTCHNSTAT_ipi 5 /* Channel is bound to a virtual IPI line */ + uint32_t status; + uint32_t vcpu; /* VCPU to which this channel is bound. */ + union { + struct { + domid_t dom; + } unbound; /* EVTCHNSTAT_unbound */ + struct { + domid_t dom; + evtchn_port_t port; + } interdomain; /* EVTCHNSTAT_interdomain */ + uint32_t pirq; /* EVTCHNSTAT_pirq */ + uint32_t virq; /* EVTCHNSTAT_virq */ + } u; }; +typedef struct evtchn_status evtchn_status_t; /* * EVTCHNOP_bind_vcpu: Specify which vcpu a channel should notify when an * event is pending. * NOTES: - * 1. IPI- and VIRQ-bound channels always notify the vcpu that initialised - * the binding. This binding cannot be changed. - * 2. All other channels notify vcpu0 by default. This default is set when + * 1. IPI-bound channels always notify the vcpu specified at bind time. + * This binding cannot be changed. + * 2. Per-VCPU VIRQ channels always notify the vcpu specified at bind time. + * This binding cannot be changed. + * 3. All other channels notify vcpu0 by default. This default is set when * the channel is allocated (a port that is freed and subsequently reused * has its binding reset to vcpu0). */ -#define EVTCHNOP_bind_vcpu 8 +#define EVTCHNOP_bind_vcpu 8 struct evtchn_bind_vcpu { - /* IN parameters. */ - evtchn_port_t port; - uint32_t vcpu; + /* IN parameters. */ + evtchn_port_t port; + uint32_t vcpu; }; +typedef struct evtchn_bind_vcpu evtchn_bind_vcpu_t; /* * EVTCHNOP_unmask: Unmask the specified local event-channel port and deliver * a notification to the appropriate VCPU if an event is pending. */ -#define EVTCHNOP_unmask 9 +#define EVTCHNOP_unmask 9 struct evtchn_unmask { - /* IN parameters. */ - evtchn_port_t port; + /* IN parameters. */ + evtchn_port_t port; }; +typedef struct evtchn_unmask evtchn_unmask_t; +/* + * EVTCHNOP_reset: Close all event channels associated with specified domain. + * NOTES: + * 1. may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may specify other than DOMID_SELF. + */ +#define EVTCHNOP_reset 10 +struct evtchn_reset { + /* IN parameters. */ + domid_t dom; +}; +typedef struct evtchn_reset evtchn_reset_t; + +/* + * Argument to event_channel_op_compat() hypercall. Superceded by new + * event_channel_op() hypercall since 0x00030202. + */ struct evtchn_op { - uint32_t cmd; /* EVTCHNOP_* */ - union { - struct evtchn_alloc_unbound alloc_unbound; - struct evtchn_bind_interdomain bind_interdomain; - struct evtchn_bind_virq bind_virq; - struct evtchn_bind_pirq bind_pirq; - struct evtchn_bind_ipi bind_ipi; - struct evtchn_close close; - struct evtchn_send send; - struct evtchn_status status; - struct evtchn_bind_vcpu bind_vcpu; - struct evtchn_unmask unmask; - } u; + uint32_t cmd; /* EVTCHNOP_* */ + union { + struct evtchn_alloc_unbound alloc_unbound; + struct evtchn_bind_interdomain bind_interdomain; + struct evtchn_bind_virq bind_virq; + struct evtchn_bind_pirq bind_pirq; + struct evtchn_bind_ipi bind_ipi; + struct evtchn_close close; + struct evtchn_send send; + struct evtchn_status status; + struct evtchn_bind_vcpu bind_vcpu; + struct evtchn_unmask unmask; + } u; }; -DEFINE_GUEST_HANDLE_STRUCT(evtchn_op); +typedef struct evtchn_op evtchn_op_t; +DEFINE_XEN_GUEST_HANDLE(evtchn_op_t); #endif /* __XEN_PUBLIC_EVENT_CHANNEL_H__ */ --- head-2010-05-12.orig/include/xen/interface/features.h 2010-05-12 08:55:24.000000000 +0200 +++ head-2010-05-12/include/xen/interface/features.h 2010-01-19 16:01:04.000000000 +0100 @@ -3,6 +3,24 @@ * * Feature flags, reported by XENVER_get_features. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Copyright (c) 2006, Keir Fraser */ @@ -41,6 +59,15 @@ /* x86: Does this Xen host support the MMU_PT_UPDATE_PRESERVE_AD hypercall? */ #define XENFEAT_mmu_pt_update_preserve_ad 5 +/* x86: Does this Xen host support the MMU_{CLEAR,COPY}_PAGE hypercall? */ +#define XENFEAT_highmem_assist 6 + +/* + * If set, GNTTABOP_map_grant_ref honors flags to be placed into guest kernel + * available pte bits. + */ +#define XENFEAT_gnttab_map_avail_bits 7 + #define XENFEAT_NR_SUBMAPS 1 #endif /* __XEN_PUBLIC_FEATURES_H__ */ --- head-2010-05-12.orig/include/xen/interface/grant_table.h 2010-05-12 08:55:24.000000000 +0200 +++ head-2010-05-12/include/xen/interface/grant_table.h 2010-01-19 16:01:04.000000000 +0100 @@ -28,6 +28,7 @@ #ifndef __XEN_PUBLIC_GRANT_TABLE_H__ #define __XEN_PUBLIC_GRANT_TABLE_H__ +#include "xen.h" /*********************************** * GRANT TABLE REPRESENTATION @@ -84,12 +85,26 @@ */ /* + * Reference to a grant entry in a specified domain's grant table. + */ +typedef uint32_t grant_ref_t; + +/* * A grant table comprises a packed array of grant entries in one or more * page frames shared between Xen and a guest. * [XEN]: This field is written by Xen and read by the sharing guest. * [GST]: This field is written by the guest and read by Xen. */ -struct grant_entry { + +/* + * Version 1 of the grant table entry structure is maintained purely + * for backwards compatibility. New guests should use version 2. + */ +#if __XEN_INTERFACE_VERSION__ < 0x0003020a +#define grant_entry_v1 grant_entry +#define grant_entry_v1_t grant_entry_t +#endif +struct grant_entry_v1 { /* GTF_xxx: various type and flag information. [XEN,GST] */ uint16_t flags; /* The domain being granted foreign privileges. [GST] */ @@ -100,6 +115,7 @@ struct grant_entry { */ uint32_t frame; }; +typedef struct grant_entry_v1 grant_entry_v1_t; /* * Type of grant entry. @@ -107,10 +123,13 @@ struct grant_entry { * GTF_permit_access: Allow @domid to map/access @frame. * GTF_accept_transfer: Allow @domid to transfer ownership of one page frame * to this guest. Xen writes the page number to @frame. + * GTF_transitive: Allow @domid to transitively access a subrange of + * @trans_grant in @trans_domid. No mappings are allowed. */ #define GTF_invalid (0U<<0) #define GTF_permit_access (1U<<0) #define GTF_accept_transfer (2U<<0) +#define GTF_transitive (3U<<0) #define GTF_type_mask (3U<<0) /* @@ -118,6 +137,10 @@ struct grant_entry { * GTF_readonly: Restrict @domid to read-only mappings and accesses. [GST] * GTF_reading: Grant entry is currently mapped for reading by @domid. [XEN] * GTF_writing: Grant entry is currently mapped for writing by @domid. [XEN] + * GTF_PAT, GTF_PWT, GTF_PCD: (x86) cache attribute flags for the grant [GST] + * GTF_sub_page: Grant access to only a subrange of the page. @domid + * will only be allowed to copy from the grant, and not + * map it. [GST] */ #define _GTF_readonly (2) #define GTF_readonly (1U<<_GTF_readonly) @@ -125,6 +148,14 @@ struct grant_entry { #define GTF_reading (1U<<_GTF_reading) #define _GTF_writing (4) #define GTF_writing (1U<<_GTF_writing) +#define _GTF_PWT (5) +#define GTF_PWT (1U<<_GTF_PWT) +#define _GTF_PCD (6) +#define GTF_PCD (1U<<_GTF_PCD) +#define _GTF_PAT (7) +#define GTF_PAT (1U<<_GTF_PAT) +#define _GTF_sub_page (8) +#define GTF_sub_page (1U<<_GTF_sub_page) /* * Subflags for GTF_accept_transfer: @@ -141,15 +172,87 @@ struct grant_entry { #define _GTF_transfer_completed (3) #define GTF_transfer_completed (1U<<_GTF_transfer_completed) - -/*********************************** - * GRANT TABLE QUERIES AND USES +/* + * Version 2 grant table entries. These fulfil the same role as + * version 1 entries, but can represent more complicated operations. + * Any given domain will have either a version 1 or a version 2 table, + * and every entry in the table will be the same version. + * + * The interface by which domains use grant references does not depend + * on the grant table version in use by the other domain. */ +#if __XEN_INTERFACE_VERSION__ >= 0x0003020a +/* + * Version 1 and version 2 grant entries share a common prefix. The + * fields of the prefix are documented as part of struct + * grant_entry_v1. + */ +struct grant_entry_header { + uint16_t flags; + domid_t domid; +}; +typedef struct grant_entry_header grant_entry_header_t; /* - * Reference to a grant entry in a specified domain's grant table. + * Version 2 of the grant entry structure. + */ +union grant_entry_v2 { + grant_entry_header_t hdr; + + /* + * This member is used for V1-style full page grants, where either: + * + * -- hdr.type is GTF_accept_transfer, or + * -- hdr.type is GTF_permit_access and GTF_sub_page is not set. + * + * In that case, the frame field has the same semantics as the + * field of the same name in the V1 entry structure. + */ + struct { + grant_entry_header_t hdr; + uint32_t pad0; + uint64_t frame; + } full_page; + + /* + * If the grant type is GTF_grant_access and GTF_sub_page is set, + * @domid is allowed to access bytes [@page_off,@page_off+@length) + * in frame @frame. + */ + struct { + grant_entry_header_t hdr; + uint16_t page_off; + uint16_t length; + uint64_t frame; + } sub_page; + + /* + * If the grant is GTF_transitive, @domid is allowed to use the + * grant @gref in domain @trans_domid, as if it was the local + * domain. Obviously, the transitive access must be compatible + * with the original grant. + * + * The current version of Xen does not allow transitive grants + * to be mapped. + */ + struct { + grant_entry_header_t hdr; + domid_t trans_domid; + uint16_t pad0; + grant_ref_t gref; + } transitive; + + uint32_t __spacer[4]; /* Pad to a power of two */ +}; +typedef union grant_entry_v2 grant_entry_v2_t; + +typedef uint16_t grant_status_t; + +#endif /* __XEN_INTERFACE_VERSION__ */ + +/*********************************** + * GRANT TABLE QUERIES AND USES */ -typedef uint32_t grant_ref_t; /* * Handle to track a mapping created via a grant reference. @@ -185,7 +288,8 @@ struct gnttab_map_grant_ref { grant_handle_t handle; uint64_t dev_bus_addr; }; -DEFINE_GUEST_HANDLE_STRUCT(gnttab_map_grant_ref); +typedef struct gnttab_map_grant_ref gnttab_map_grant_ref_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_map_grant_ref_t); /* * GNTTABOP_unmap_grant_ref: Destroy one or more grant-reference mappings @@ -207,7 +311,8 @@ struct gnttab_unmap_grant_ref { /* OUT parameters. */ int16_t status; /* GNTST_* */ }; -DEFINE_GUEST_HANDLE_STRUCT(gnttab_unmap_grant_ref); +typedef struct gnttab_unmap_grant_ref gnttab_unmap_grant_ref_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_grant_ref_t); /* * GNTTABOP_setup_table: Set up a grant table for comprising at least @@ -225,9 +330,10 @@ struct gnttab_setup_table { uint32_t nr_frames; /* OUT parameters. */ int16_t status; /* GNTST_* */ - GUEST_HANDLE(ulong) frame_list; + XEN_GUEST_HANDLE(ulong) frame_list; }; -DEFINE_GUEST_HANDLE_STRUCT(gnttab_setup_table); +typedef struct gnttab_setup_table gnttab_setup_table_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_setup_table_t); /* * GNTTABOP_dump_table: Dump the contents of the grant table to the @@ -240,7 +346,8 @@ struct gnttab_dump_table { /* OUT parameters. */ int16_t status; /* GNTST_* */ }; -DEFINE_GUEST_HANDLE_STRUCT(gnttab_dump_table); +typedef struct gnttab_dump_table gnttab_dump_table_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_dump_table_t); /* * GNTTABOP_transfer_grant_ref: Transfer to a foreign domain. The @@ -253,13 +360,15 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_dump_t #define GNTTABOP_transfer 4 struct gnttab_transfer { /* IN parameters. */ - unsigned long mfn; + xen_pfn_t mfn; domid_t domid; grant_ref_t ref; /* OUT parameters. */ int16_t status; }; -DEFINE_GUEST_HANDLE_STRUCT(gnttab_transfer); +typedef struct gnttab_transfer gnttab_transfer_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_transfer_t); + /* * GNTTABOP_copy: Hypervisor based copy @@ -283,24 +392,26 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_transf #define GNTCOPY_source_gref (1<<_GNTCOPY_source_gref) #define _GNTCOPY_dest_gref (1) #define GNTCOPY_dest_gref (1<<_GNTCOPY_dest_gref) +#define _GNTCOPY_can_fail (2) +#define GNTCOPY_can_fail (1<<_GNTCOPY_can_fail) #define GNTTABOP_copy 5 -struct gnttab_copy { - /* IN parameters. */ - struct { - union { - grant_ref_t ref; - unsigned long gmfn; - } u; - domid_t domid; - uint16_t offset; - } source, dest; - uint16_t len; - uint16_t flags; /* GNTCOPY_* */ - /* OUT parameters. */ - int16_t status; -}; -DEFINE_GUEST_HANDLE_STRUCT(gnttab_copy); +typedef struct gnttab_copy { + /* IN parameters. */ + struct { + union { + grant_ref_t ref; + xen_pfn_t gmfn; + } u; + domid_t domid; + uint16_t offset; + } source, dest; + uint16_t len; + uint16_t flags; /* GNTCOPY_* */ + /* OUT parameters. */ + int16_t status; +} gnttab_copy_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_copy_t); /* * GNTTABOP_query_size: Query the current and maximum sizes of the shared @@ -318,10 +429,92 @@ struct gnttab_query_size { uint32_t max_nr_frames; int16_t status; /* GNTST_* */ }; -DEFINE_GUEST_HANDLE_STRUCT(gnttab_query_size); +typedef struct gnttab_query_size gnttab_query_size_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_query_size_t); + +/* + * GNTTABOP_unmap_and_replace: Destroy one or more grant-reference mappings + * tracked by but atomically replace the page table entry with one + * pointing to the machine address under . will be + * redirected to the null entry. + * NOTES: + * 1. The call may fail in an undefined manner if either mapping is not + * tracked by . + * 2. After executing a batch of unmaps, it is guaranteed that no stale + * mappings will remain in the device or host TLBs. + */ +#define GNTTABOP_unmap_and_replace 7 +struct gnttab_unmap_and_replace { + /* IN parameters. */ + uint64_t host_addr; + uint64_t new_addr; + grant_handle_t handle; + /* OUT parameters. */ + int16_t status; /* GNTST_* */ +}; +typedef struct gnttab_unmap_and_replace gnttab_unmap_and_replace_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_and_replace_t); + +#if __XEN_INTERFACE_VERSION__ >= 0x0003020a +/* + * GNTTABOP_set_version: Request a particular version of the grant + * table shared table structure. This operation can only be performed + * once in any given domain. It must be performed before any grants + * are activated; otherwise, the domain will be stuck with version 1. + * The only defined versions are 1 and 2. + */ +#define GNTTABOP_set_version 8 +struct gnttab_set_version { + /* IN/OUT parameters */ + uint32_t version; +}; +typedef struct gnttab_set_version gnttab_set_version_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_set_version_t); + + +/* + * GNTTABOP_get_status_frames: Get the list of frames used to store grant + * status for . In grant format version 2, the status is separated + * from the other shared grant fields to allow more efficient synchronization + * using barriers instead of atomic cmpexch operations. + * specify the size of vector . + * The frame addresses are returned in the . + * Only addresses are returned, even if the table is larger. + * NOTES: + * 1. may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may specify != DOMID_SELF. + */ +#define GNTTABOP_get_status_frames 9 +struct gnttab_get_status_frames { + /* IN parameters. */ + uint32_t nr_frames; + domid_t dom; + /* OUT parameters. */ + int16_t status; /* GNTST_* */ + XEN_GUEST_HANDLE(uint64_t) frame_list; +}; +typedef struct gnttab_get_status_frames gnttab_get_status_frames_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_get_status_frames_t); /* - * Bitfield values for update_pin_status.flags. + * GNTTABOP_get_version: Get the grant table version which is in + * effect for domain . + */ +#define GNTTABOP_get_version 10 +struct gnttab_get_version { + /* IN parameters */ + domid_t dom; + uint16_t pad; + /* OUT parameters */ + uint32_t version; +}; +typedef struct gnttab_get_version gnttab_get_version_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_get_version_t); + +#endif /* __XEN_INTERFACE_VERSION__ */ + +/* + * Bitfield values for gnttab_map_grant_ref.flags. */ /* Map the grant entry for access by I/O devices. */ #define _GNTMAP_device_map (0) @@ -348,6 +541,16 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_query_ #define _GNTMAP_contains_pte (4) #define GNTMAP_contains_pte (1<<_GNTMAP_contains_pte) +#define _GNTMAP_can_fail (5) +#define GNTMAP_can_fail (1<<_GNTMAP_can_fail) + +/* + * Bits to be placed in guest kernel available PTE bits (architecture + * dependent; only supported when XENFEAT_gnttab_map_avail_bits is set). + */ +#define _GNTMAP_guest_avail0 (16) +#define GNTMAP_guest_avail_mask ((uint32_t)~0 << _GNTMAP_guest_avail0) + /* * Values for error status returns. All errors are -ve. */ @@ -361,7 +564,9 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_query_ #define GNTST_no_device_space (-7) /* Out of space in I/O MMU. */ #define GNTST_permission_denied (-8) /* Not enough privilege for operation. */ #define GNTST_bad_page (-9) /* Specified page was invalid for op. */ -#define GNTST_bad_copy_arg (-10) /* copy arguments cross page boundary */ +#define GNTST_bad_copy_arg (-10) /* copy arguments cross page boundary. */ +#define GNTST_address_too_big (-11) /* transfer page address too large. */ +#define GNTST_eagain (-12) /* Could not map at the moment. Retry. */ #define GNTTABOP_error_msgs { \ "okay", \ @@ -374,7 +579,9 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_query_ "no spare translation slot in the I/O MMU", \ "permission denied", \ "bad page", \ - "copy arguments cross page boundary" \ + "copy arguments cross page boundary", \ + "page address size too large", \ + "could not map at the moment, retry" \ } #endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */ --- head-2010-05-12.orig/include/xen/interface/io/blkif.h 2010-05-12 08:55:24.000000000 +0200 +++ head-2010-05-12/include/xen/interface/io/blkif.h 2010-01-19 16:01:04.000000000 +0100 @@ -3,6 +3,24 @@ * * Unified block-device I/O interface for Xen guest OSes. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Copyright (c) 2003-2004, Keir Fraser */ @@ -24,8 +42,10 @@ * rsp_event appropriately (e.g., using RING_FINAL_CHECK_FOR_RESPONSES()). */ -typedef uint16_t blkif_vdev_t; -typedef uint64_t blkif_sector_t; +#ifndef blkif_vdev_t +#define blkif_vdev_t uint16_t +#endif +#define blkif_sector_t uint64_t /* * REQUEST CODES. @@ -34,7 +54,7 @@ typedef uint64_t blkif_sector_t; #define BLKIF_OP_WRITE 1 /* * Recognised only if "feature-barrier" is present in backend xenbus info. - * The "feature_barrier" node contains a boolean indicating whether barrier + * The "feature-barrier" node contains a boolean indicating whether barrier * requests are likely to succeed or fail. Either way, a barrier request * may fail at any time with BLKIF_RSP_EOPNOTSUPP if it is unsupported by * the underlying block-device hardware. The boolean simply indicates whether @@ -43,33 +63,57 @@ typedef uint64_t blkif_sector_t; * create the "feature-barrier" node! */ #define BLKIF_OP_WRITE_BARRIER 2 +/* + * Recognised if "feature-flush-cache" is present in backend xenbus + * info. A flush will ask the underlying storage hardware to flush its + * non-volatile caches as appropriate. The "feature-flush-cache" node + * contains a boolean indicating whether flush requests are likely to + * succeed or fail. Either way, a flush request may fail at any time + * with BLKIF_RSP_EOPNOTSUPP if it is unsupported by the underlying + * block-device hardware. The boolean simply indicates whether or not it + * is worthwhile for the frontend to attempt flushes. If a backend does + * not recognise BLKIF_OP_WRITE_FLUSH_CACHE, it should *not* create the + * "feature-flush-cache" node! + */ +#define BLKIF_OP_FLUSH_DISKCACHE 3 /* * Maximum scatter/gather segments per request. - * This is carefully chosen so that sizeof(struct blkif_ring) <= PAGE_SIZE. + * This is carefully chosen so that sizeof(blkif_ring_t) <= PAGE_SIZE. * NB. This could be 12 if the ring indexes weren't stored in the same page. */ #define BLKIF_MAX_SEGMENTS_PER_REQUEST 11 +/* + * NB. first_sect and last_sect in blkif_request_segment, as well as + * sector_number in blkif_request, are always expressed in 512-byte units. + * However they must be properly aligned to the real sector size of the + * physical disk, which is reported in the "sector-size" node in the backend + * xenbus info. Also the xenbus "sectors" node is expressed in 512-byte units. + */ +struct blkif_request_segment { + grant_ref_t gref; /* reference to I/O buffer frame */ + /* @first_sect: first sector in frame to transfer (inclusive). */ + /* @last_sect: last sector in frame to transfer (inclusive). */ + uint8_t first_sect, last_sect; +}; + struct blkif_request { - uint8_t operation; /* BLKIF_OP_??? */ - uint8_t nr_segments; /* number of segments */ - blkif_vdev_t handle; /* only for read/write requests */ - uint64_t id; /* private guest value, echoed in resp */ - blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ - struct blkif_request_segment { - grant_ref_t gref; /* reference to I/O buffer frame */ - /* @first_sect: first sector in frame to transfer (inclusive). */ - /* @last_sect: last sector in frame to transfer (inclusive). */ - uint8_t first_sect, last_sect; - } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + uint8_t operation; /* BLKIF_OP_??? */ + uint8_t nr_segments; /* number of segments */ + blkif_vdev_t handle; /* only for read/write requests */ + uint64_t id; /* private guest value, echoed in resp */ + blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ + struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; }; +typedef struct blkif_request blkif_request_t; struct blkif_response { - uint64_t id; /* copied from request */ - uint8_t operation; /* copied from request */ - int16_t status; /* BLKIF_RSP_??? */ + uint64_t id; /* copied from request */ + uint8_t operation; /* copied from request */ + int16_t status; /* BLKIF_RSP_??? */ }; +typedef struct blkif_response blkif_response_t; /* * STATUS RETURN CODES. --- head-2010-05-12.orig/include/xen/interface/io/console.h 2010-05-12 08:55:24.000000000 +0200 +++ head-2010-05-12/include/xen/interface/io/console.h 2010-01-19 16:01:04.000000000 +0100 @@ -3,6 +3,24 @@ * * Console I/O interface for Xen guest OSes. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Copyright (c) 2005, Keir Fraser */ --- head-2010-05-12.orig/include/xen/interface/io/fbif.h 2010-05-12 08:55:24.000000000 +0200 +++ head-2010-05-12/include/xen/interface/io/fbif.h 2010-01-19 16:01:04.000000000 +0100 @@ -41,12 +41,13 @@ */ #define XENFB_TYPE_UPDATE 2 -struct xenfb_update { - uint8_t type; /* XENFB_TYPE_UPDATE */ - int32_t x; /* source x */ - int32_t y; /* source y */ - int32_t width; /* rect width */ - int32_t height; /* rect height */ +struct xenfb_update +{ + uint8_t type; /* XENFB_TYPE_UPDATE */ + int32_t x; /* source x */ + int32_t y; /* source y */ + int32_t width; /* rect width */ + int32_t height; /* rect height */ }; /* @@ -55,36 +56,58 @@ struct xenfb_update { */ #define XENFB_TYPE_RESIZE 3 -struct xenfb_resize { - uint8_t type; /* XENFB_TYPE_RESIZE */ - int32_t width; /* width in pixels */ - int32_t height; /* height in pixels */ - int32_t stride; /* stride in bytes */ - int32_t depth; /* depth in bits */ - int32_t offset; /* start offset within framebuffer */ +struct xenfb_resize +{ + uint8_t type; /* XENFB_TYPE_RESIZE */ + int32_t width; /* width in pixels */ + int32_t height; /* height in pixels */ + int32_t stride; /* stride in bytes */ + int32_t depth; /* depth in bits */ + int32_t offset; /* offset of the framebuffer in bytes */ }; #define XENFB_OUT_EVENT_SIZE 40 -union xenfb_out_event { - uint8_t type; - struct xenfb_update update; - struct xenfb_resize resize; - char pad[XENFB_OUT_EVENT_SIZE]; +union xenfb_out_event +{ + uint8_t type; + struct xenfb_update update; + struct xenfb_resize resize; + char pad[XENFB_OUT_EVENT_SIZE]; }; /* In events (backend -> frontend) */ /* * Frontends should ignore unknown in events. - * No in events currently defined. */ +/* + * Framebuffer refresh period advice + * Backend sends it to advise the frontend their preferred period of + * refresh. Frontends that keep the framebuffer constantly up-to-date + * just ignore it. Frontends that use the advice should immediately + * refresh the framebuffer (and send an update notification event if + * those have been requested), then use the update frequency to guide + * their periodical refreshs. + */ +#define XENFB_TYPE_REFRESH_PERIOD 1 +#define XENFB_NO_REFRESH 0 + +struct xenfb_refresh_period +{ + uint8_t type; /* XENFB_TYPE_UPDATE_PERIOD */ + uint32_t period; /* period of refresh, in ms, + * XENFB_NO_REFRESH if no refresh is needed */ +}; + #define XENFB_IN_EVENT_SIZE 40 -union xenfb_in_event { - uint8_t type; - char pad[XENFB_IN_EVENT_SIZE]; +union xenfb_in_event +{ + uint8_t type; + struct xenfb_refresh_period refresh_period; + char pad[XENFB_IN_EVENT_SIZE]; }; /* shared page */ @@ -93,41 +116,41 @@ union xenfb_in_event { #define XENFB_IN_RING_LEN (XENFB_IN_RING_SIZE / XENFB_IN_EVENT_SIZE) #define XENFB_IN_RING_OFFS 1024 #define XENFB_IN_RING(page) \ - ((union xenfb_in_event *)((char *)(page) + XENFB_IN_RING_OFFS)) + ((union xenfb_in_event *)((char *)(page) + XENFB_IN_RING_OFFS)) #define XENFB_IN_RING_REF(page, idx) \ - (XENFB_IN_RING((page))[(idx) % XENFB_IN_RING_LEN]) + (XENFB_IN_RING((page))[(idx) % XENFB_IN_RING_LEN]) #define XENFB_OUT_RING_SIZE 2048 #define XENFB_OUT_RING_LEN (XENFB_OUT_RING_SIZE / XENFB_OUT_EVENT_SIZE) #define XENFB_OUT_RING_OFFS (XENFB_IN_RING_OFFS + XENFB_IN_RING_SIZE) #define XENFB_OUT_RING(page) \ - ((union xenfb_out_event *)((char *)(page) + XENFB_OUT_RING_OFFS)) + ((union xenfb_out_event *)((char *)(page) + XENFB_OUT_RING_OFFS)) #define XENFB_OUT_RING_REF(page, idx) \ - (XENFB_OUT_RING((page))[(idx) % XENFB_OUT_RING_LEN]) + (XENFB_OUT_RING((page))[(idx) % XENFB_OUT_RING_LEN]) -struct xenfb_page { - uint32_t in_cons, in_prod; - uint32_t out_cons, out_prod; - - int32_t width; /* width of the framebuffer (in pixels) */ - int32_t height; /* height of the framebuffer (in pixels) */ - uint32_t line_length; /* length of a row of pixels (in bytes) */ - uint32_t mem_length; /* length of the framebuffer (in bytes) */ - uint8_t depth; /* depth of a pixel (in bits) */ - - /* - * Framebuffer page directory - * - * Each directory page holds PAGE_SIZE / sizeof(*pd) - * framebuffer pages, and can thus map up to PAGE_SIZE * - * PAGE_SIZE / sizeof(*pd) bytes. With PAGE_SIZE == 4096 and - * sizeof(unsigned long) == 4/8, that's 4 Megs 32 bit and 2 - * Megs 64 bit. 256 directories give enough room for a 512 - * Meg framebuffer with a max resolution of 12,800x10,240. - * Should be enough for a while with room leftover for - * expansion. - */ - unsigned long pd[256]; +struct xenfb_page +{ + uint32_t in_cons, in_prod; + uint32_t out_cons, out_prod; + + int32_t width; /* the width of the framebuffer (in pixels) */ + int32_t height; /* the height of the framebuffer (in pixels) */ + uint32_t line_length; /* the length of a row of pixels (in bytes) */ + uint32_t mem_length; /* the length of the framebuffer (in bytes) */ + uint8_t depth; /* the depth of a pixel (in bits) */ + + /* + * Framebuffer page directory + * + * Each directory page holds PAGE_SIZE / sizeof(*pd) + * framebuffer pages, and can thus map up to PAGE_SIZE * + * PAGE_SIZE / sizeof(*pd) bytes. With PAGE_SIZE == 4096 and + * sizeof(unsigned long) == 4/8, that's 4 Megs 32 bit and 2 Megs + * 64 bit. 256 directories give enough room for a 512 Meg + * framebuffer with a max resolution of 12,800x10,240. Should + * be enough for a while with room leftover for expansion. + */ + unsigned long pd[256]; }; /* --- head-2010-05-12.orig/include/xen/interface/io/kbdif.h 2010-05-12 08:55:24.000000000 +0200 +++ head-2010-05-12/include/xen/interface/io/kbdif.h 2010-01-19 16:01:04.000000000 +0100 @@ -45,34 +45,38 @@ */ #define XENKBD_TYPE_POS 4 -struct xenkbd_motion { - uint8_t type; /* XENKBD_TYPE_MOTION */ - int32_t rel_x; /* relative X motion */ - int32_t rel_y; /* relative Y motion */ - int32_t rel_z; /* relative Z motion (wheel) */ -}; - -struct xenkbd_key { - uint8_t type; /* XENKBD_TYPE_KEY */ - uint8_t pressed; /* 1 if pressed; 0 otherwise */ - uint32_t keycode; /* KEY_* from linux/input.h */ -}; - -struct xenkbd_position { - uint8_t type; /* XENKBD_TYPE_POS */ - int32_t abs_x; /* absolute X position (in FB pixels) */ - int32_t abs_y; /* absolute Y position (in FB pixels) */ - int32_t rel_z; /* relative Z motion (wheel) */ +struct xenkbd_motion +{ + uint8_t type; /* XENKBD_TYPE_MOTION */ + int32_t rel_x; /* relative X motion */ + int32_t rel_y; /* relative Y motion */ + int32_t rel_z; /* relative Z motion (wheel) */ +}; + +struct xenkbd_key +{ + uint8_t type; /* XENKBD_TYPE_KEY */ + uint8_t pressed; /* 1 if pressed; 0 otherwise */ + uint32_t keycode; /* KEY_* from linux/input.h */ +}; + +struct xenkbd_position +{ + uint8_t type; /* XENKBD_TYPE_POS */ + int32_t abs_x; /* absolute X position (in FB pixels) */ + int32_t abs_y; /* absolute Y position (in FB pixels) */ + int32_t rel_z; /* relative Z motion (wheel) */ }; #define XENKBD_IN_EVENT_SIZE 40 -union xenkbd_in_event { - uint8_t type; - struct xenkbd_motion motion; - struct xenkbd_key key; - struct xenkbd_position pos; - char pad[XENKBD_IN_EVENT_SIZE]; +union xenkbd_in_event +{ + uint8_t type; + struct xenkbd_motion motion; + struct xenkbd_key key; + struct xenkbd_position pos; + char pad[XENKBD_IN_EVENT_SIZE]; }; /* Out events (frontend -> backend) */ @@ -85,9 +89,10 @@ union xenkbd_in_event { #define XENKBD_OUT_EVENT_SIZE 40 -union xenkbd_out_event { - uint8_t type; - char pad[XENKBD_OUT_EVENT_SIZE]; +union xenkbd_out_event +{ + uint8_t type; + char pad[XENKBD_OUT_EVENT_SIZE]; }; /* shared page */ @@ -96,21 +101,22 @@ union xenkbd_out_event { #define XENKBD_IN_RING_LEN (XENKBD_IN_RING_SIZE / XENKBD_IN_EVENT_SIZE) #define XENKBD_IN_RING_OFFS 1024 #define XENKBD_IN_RING(page) \ - ((union xenkbd_in_event *)((char *)(page) + XENKBD_IN_RING_OFFS)) + ((union xenkbd_in_event *)((char *)(page) + XENKBD_IN_RING_OFFS)) #define XENKBD_IN_RING_REF(page, idx) \ - (XENKBD_IN_RING((page))[(idx) % XENKBD_IN_RING_LEN]) + (XENKBD_IN_RING((page))[(idx) % XENKBD_IN_RING_LEN]) #define XENKBD_OUT_RING_SIZE 1024 #define XENKBD_OUT_RING_LEN (XENKBD_OUT_RING_SIZE / XENKBD_OUT_EVENT_SIZE) #define XENKBD_OUT_RING_OFFS (XENKBD_IN_RING_OFFS + XENKBD_IN_RING_SIZE) #define XENKBD_OUT_RING(page) \ - ((union xenkbd_out_event *)((char *)(page) + XENKBD_OUT_RING_OFFS)) + ((union xenkbd_out_event *)((char *)(page) + XENKBD_OUT_RING_OFFS)) #define XENKBD_OUT_RING_REF(page, idx) \ - (XENKBD_OUT_RING((page))[(idx) % XENKBD_OUT_RING_LEN]) + (XENKBD_OUT_RING((page))[(idx) % XENKBD_OUT_RING_LEN]) -struct xenkbd_page { - uint32_t in_cons, in_prod; - uint32_t out_cons, out_prod; +struct xenkbd_page +{ + uint32_t in_cons, in_prod; + uint32_t out_cons, out_prod; }; #endif --- head-2010-05-12.orig/include/xen/interface/io/netif.h 2010-05-12 08:55:24.000000000 +0200 +++ head-2010-05-12/include/xen/interface/io/netif.h 2010-01-19 16:01:04.000000000 +0100 @@ -3,6 +3,24 @@ * * Unified network-device I/O interface for Xen guest OSes. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Copyright (c) 2003-2004, Keir Fraser */ @@ -47,18 +65,21 @@ #define _NETTXF_extra_info (3) #define NETTXF_extra_info (1U<<_NETTXF_extra_info) -struct xen_netif_tx_request { +struct netif_tx_request { grant_ref_t gref; /* Reference to buffer page */ uint16_t offset; /* Offset within buffer page */ uint16_t flags; /* NETTXF_* */ uint16_t id; /* Echoed in response message. */ uint16_t size; /* Packet size in bytes. */ }; +typedef struct netif_tx_request netif_tx_request_t; /* Types of netif_extra_info descriptors. */ -#define XEN_NETIF_EXTRA_TYPE_NONE (0) /* Never used - invalid */ -#define XEN_NETIF_EXTRA_TYPE_GSO (1) /* u.gso */ -#define XEN_NETIF_EXTRA_TYPE_MAX (2) +#define XEN_NETIF_EXTRA_TYPE_NONE (0) /* Never used - invalid */ +#define XEN_NETIF_EXTRA_TYPE_GSO (1) /* u.gso */ +#define XEN_NETIF_EXTRA_TYPE_MCAST_ADD (2) /* u.mcast */ +#define XEN_NETIF_EXTRA_TYPE_MCAST_DEL (3) /* u.mcast */ +#define XEN_NETIF_EXTRA_TYPE_MAX (4) /* netif_extra_info flags. */ #define _XEN_NETIF_EXTRA_FLAG_MORE (0) @@ -71,49 +92,68 @@ struct xen_netif_tx_request { * This structure needs to fit within both netif_tx_request and * netif_rx_response for compatibility. */ -struct xen_netif_extra_info { - uint8_t type; /* XEN_NETIF_EXTRA_TYPE_* */ - uint8_t flags; /* XEN_NETIF_EXTRA_FLAG_* */ - - union { - struct { - /* - * Maximum payload size of each segment. For - * example, for TCP this is just the path MSS. - */ - uint16_t size; - - /* - * GSO type. This determines the protocol of - * the packet and any extra features required - * to segment the packet properly. - */ - uint8_t type; /* XEN_NETIF_GSO_TYPE_* */ - - /* Future expansion. */ - uint8_t pad; - - /* - * GSO features. This specifies any extra GSO - * features required to process this packet, - * such as ECN support for TCPv4. - */ - uint16_t features; /* XEN_NETIF_GSO_FEAT_* */ - } gso; +struct netif_extra_info { + uint8_t type; /* XEN_NETIF_EXTRA_TYPE_* */ + uint8_t flags; /* XEN_NETIF_EXTRA_FLAG_* */ + + union { + /* + * XEN_NETIF_EXTRA_TYPE_GSO: + */ + struct { + /* + * Maximum payload size of each segment. For example, for TCP this + * is just the path MSS. + */ + uint16_t size; + + /* + * GSO type. This determines the protocol of the packet and any + * extra features required to segment the packet properly. + */ + uint8_t type; /* XEN_NETIF_GSO_TYPE_* */ + + /* Future expansion. */ + uint8_t pad; + + /* + * GSO features. This specifies any extra GSO features required + * to process this packet, such as ECN support for TCPv4. + */ + uint16_t features; /* XEN_NETIF_GSO_FEAT_* */ + } gso; + + /* + * XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL}: + * Backend advertises availability via 'feature-multicast-control' + * xenbus node containing value '1'. + * Frontend requests this feature by advertising + * 'request-multicast-control' xenbus node containing value '1'. + * If multicast control is requested then multicast flooding is + * disabled and the frontend must explicitly register its interest + * in multicast groups using dummy transmit requests containing + * MCAST_{ADD,DEL} extra-info fragments. + */ + struct { + uint8_t addr[6]; /* Address to add/remove. */ + } mcast; - uint16_t pad[3]; - } u; + uint16_t pad[3]; + } u; }; +typedef struct netif_extra_info netif_extra_info_t; -struct xen_netif_tx_response { - uint16_t id; - int16_t status; /* NETIF_RSP_* */ +struct netif_tx_response { + uint16_t id; + int16_t status; /* NETIF_RSP_* */ }; +typedef struct netif_tx_response netif_tx_response_t; -struct xen_netif_rx_request { - uint16_t id; /* Echoed in response message. */ - grant_ref_t gref; /* Reference to incoming granted frame */ +struct netif_rx_request { + uint16_t id; /* Echoed in response message. */ + grant_ref_t gref; /* Reference to incoming granted frame */ }; +typedef struct netif_rx_request netif_rx_request_t; /* Packet data has been validated against protocol checksum. */ #define _NETRXF_data_validated (0) @@ -131,23 +171,20 @@ struct xen_netif_rx_request { #define _NETRXF_extra_info (3) #define NETRXF_extra_info (1U<<_NETRXF_extra_info) -struct xen_netif_rx_response { +struct netif_rx_response { uint16_t id; uint16_t offset; /* Offset in page of start of received packet */ uint16_t flags; /* NETRXF_* */ int16_t status; /* -ve: BLKIF_RSP_* ; +ve: Rx'ed pkt size. */ }; +typedef struct netif_rx_response netif_rx_response_t; /* * Generate netif ring structures and types. */ -DEFINE_RING_TYPES(xen_netif_tx, - struct xen_netif_tx_request, - struct xen_netif_tx_response); -DEFINE_RING_TYPES(xen_netif_rx, - struct xen_netif_rx_request, - struct xen_netif_rx_response); +DEFINE_RING_TYPES(netif_tx, struct netif_tx_request, struct netif_tx_response); +DEFINE_RING_TYPES(netif_rx, struct netif_rx_request, struct netif_rx_response); #define NETIF_RSP_DROPPED -2 #define NETIF_RSP_ERROR -1 --- head-2010-05-12.orig/include/xen/interface/io/protocols.h 2010-05-12 08:55:24.000000000 +0200 +++ head-2010-05-12/include/xen/interface/io/protocols.h 2010-01-19 16:01:04.000000000 +0100 @@ -1,10 +1,31 @@ +/****************************************************************************** + * protocols.h + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + #ifndef __XEN_PROTOCOLS_H__ #define __XEN_PROTOCOLS_H__ #define XEN_IO_PROTO_ABI_X86_32 "x86_32-abi" #define XEN_IO_PROTO_ABI_X86_64 "x86_64-abi" #define XEN_IO_PROTO_ABI_IA64 "ia64-abi" -#define XEN_IO_PROTO_ABI_POWERPC64 "powerpc64-abi" #if defined(__i386__) # define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_32 @@ -12,8 +33,6 @@ # define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_64 #elif defined(__ia64__) # define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_IA64 -#elif defined(__powerpc64__) -# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_POWERPC64 #else # error arch fixup needed here #endif --- head-2010-05-12.orig/include/xen/interface/io/ring.h 2010-05-12 08:55:24.000000000 +0200 +++ head-2010-05-12/include/xen/interface/io/ring.h 2010-02-24 13:13:46.000000000 +0100 @@ -3,16 +3,42 @@ * * Shared producer-consumer ring macros. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Tim Deegan and Andrew Warfield November 2004. */ #ifndef __XEN_PUBLIC_IO_RING_H__ #define __XEN_PUBLIC_IO_RING_H__ +#include "../xen-compat.h" + +#if __XEN_INTERFACE_VERSION__ < 0x00030208 +#define xen_mb() mb() +#define xen_rmb() rmb() +#define xen_wmb() wmb() +#endif + typedef unsigned int RING_IDX; /* Round a 32-bit unsigned constant down to the nearest power of two. */ -#define __RD2(_x) (((_x) & 0x00000002) ? 0x2 : ((_x) & 0x1)) +#define __RD2(_x) (((_x) & 0x00000002) ? 0x2 : ((_x) & 0x1)) #define __RD4(_x) (((_x) & 0x0000000c) ? __RD2((_x)>>2)<<2 : __RD2(_x)) #define __RD8(_x) (((_x) & 0x000000f0) ? __RD4((_x)>>4)<<4 : __RD4(_x)) #define __RD16(_x) (((_x) & 0x0000ff00) ? __RD8((_x)>>8)<<8 : __RD8(_x)) @@ -24,74 +50,84 @@ typedef unsigned int RING_IDX; * A ring contains as many entries as will fit, rounded down to the nearest * power of two (so we can mask with (size-1) to loop around). */ +#define __CONST_RING_SIZE(_s, _sz) \ + (__RD32(((_sz) - offsetof(struct _s##_sring, ring)) / \ + sizeof(((struct _s##_sring *)0)->ring[0]))) +/* + * The same for passing in an actual pointer instead of a name tag. + */ #define __RING_SIZE(_s, _sz) \ - (__RD32(((_sz) - (long)&(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0]))) + (__RD32(((_sz) - (long)(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0]))) /* * Macros to make the correct C datatypes for a new kind of ring. * * To make a new ring datatype, you need to have two message structures, - * let's say struct request, and struct response already defined. + * let's say request_t, and response_t already defined. * * In a header where you want the ring datatype declared, you then do: * - * DEFINE_RING_TYPES(mytag, struct request, struct response); + * DEFINE_RING_TYPES(mytag, request_t, response_t); * * These expand out to give you a set of types, as you can see below. * The most important of these are: * - * struct mytag_sring - The shared ring. - * struct mytag_front_ring - The 'front' half of the ring. - * struct mytag_back_ring - The 'back' half of the ring. + * mytag_sring_t - The shared ring. + * mytag_front_ring_t - The 'front' half of the ring. + * mytag_back_ring_t - The 'back' half of the ring. * * To initialize a ring in your code you need to know the location and size * of the shared memory area (PAGE_SIZE, for instance). To initialise * the front half: * - * struct mytag_front_ring front_ring; - * SHARED_RING_INIT((struct mytag_sring *)shared_page); - * FRONT_RING_INIT(&front_ring, (struct mytag_sring *)shared_page, - * PAGE_SIZE); + * mytag_front_ring_t front_ring; + * SHARED_RING_INIT((mytag_sring_t *)shared_page); + * FRONT_RING_INIT(&front_ring, (mytag_sring_t *)shared_page, PAGE_SIZE); * * Initializing the back follows similarly (note that only the front * initializes the shared ring): * - * struct mytag_back_ring back_ring; - * BACK_RING_INIT(&back_ring, (struct mytag_sring *)shared_page, - * PAGE_SIZE); + * mytag_back_ring_t back_ring; + * BACK_RING_INIT(&back_ring, (mytag_sring_t *)shared_page, PAGE_SIZE); */ -#define DEFINE_RING_TYPES(__name, __req_t, __rsp_t) \ - \ -/* Shared ring entry */ \ -union __name##_sring_entry { \ - __req_t req; \ - __rsp_t rsp; \ -}; \ - \ -/* Shared ring page */ \ -struct __name##_sring { \ - RING_IDX req_prod, req_event; \ - RING_IDX rsp_prod, rsp_event; \ - uint8_t pad[48]; \ - union __name##_sring_entry ring[1]; /* variable-length */ \ -}; \ - \ -/* "Front" end's private variables */ \ -struct __name##_front_ring { \ - RING_IDX req_prod_pvt; \ - RING_IDX rsp_cons; \ - unsigned int nr_ents; \ - struct __name##_sring *sring; \ -}; \ - \ -/* "Back" end's private variables */ \ -struct __name##_back_ring { \ - RING_IDX rsp_prod_pvt; \ - RING_IDX req_cons; \ - unsigned int nr_ents; \ - struct __name##_sring *sring; \ -}; +#define DEFINE_RING_TYPES(__name, __req_t, __rsp_t) \ + \ +/* Shared ring entry */ \ +union __name##_sring_entry { \ + __req_t req; \ + __rsp_t rsp; \ +}; \ + \ +/* Shared ring page */ \ +struct __name##_sring { \ + RING_IDX req_prod, req_event; \ + RING_IDX rsp_prod, rsp_event; \ + uint8_t netfront_smartpoll_active; \ + uint8_t pad[47]; \ + union __name##_sring_entry ring[1]; /* variable-length */ \ +}; \ + \ +/* "Front" end's private variables */ \ +struct __name##_front_ring { \ + RING_IDX req_prod_pvt; \ + RING_IDX rsp_cons; \ + unsigned int nr_ents; \ + struct __name##_sring *sring; \ +}; \ + \ +/* "Back" end's private variables */ \ +struct __name##_back_ring { \ + RING_IDX rsp_prod_pvt; \ + RING_IDX req_cons; \ + unsigned int nr_ents; \ + struct __name##_sring *sring; \ +}; \ + \ +/* Syntactic sugar */ \ +typedef struct __name##_sring __name##_sring_t; \ +typedef struct __name##_front_ring __name##_front_ring_t; \ +typedef struct __name##_back_ring __name##_back_ring_t /* * Macros for manipulating rings. @@ -109,86 +145,94 @@ struct __name##_back_ring { \ */ /* Initialising empty rings */ -#define SHARED_RING_INIT(_s) do { \ - (_s)->req_prod = (_s)->rsp_prod = 0; \ - (_s)->req_event = (_s)->rsp_event = 1; \ - memset((_s)->pad, 0, sizeof((_s)->pad)); \ +#define SHARED_RING_INIT(_s) do { \ + (_s)->req_prod = (_s)->rsp_prod = 0; \ + (_s)->req_event = (_s)->rsp_event = 1; \ + (void)memset((_s)->pad, 0, sizeof((_s)->pad)); \ } while(0) -#define FRONT_RING_INIT(_r, _s, __size) do { \ - (_r)->req_prod_pvt = 0; \ - (_r)->rsp_cons = 0; \ - (_r)->nr_ents = __RING_SIZE(_s, __size); \ - (_r)->sring = (_s); \ +#define FRONT_RING_INIT(_r, _s, __size) do { \ + (_r)->req_prod_pvt = 0; \ + (_r)->rsp_cons = 0; \ + (_r)->nr_ents = __RING_SIZE(_s, __size); \ + (_r)->sring = (_s); \ } while (0) -#define BACK_RING_INIT(_r, _s, __size) do { \ - (_r)->rsp_prod_pvt = 0; \ - (_r)->req_cons = 0; \ - (_r)->nr_ents = __RING_SIZE(_s, __size); \ - (_r)->sring = (_s); \ +#define BACK_RING_INIT(_r, _s, __size) do { \ + (_r)->rsp_prod_pvt = 0; \ + (_r)->req_cons = 0; \ + (_r)->nr_ents = __RING_SIZE(_s, __size); \ + (_r)->sring = (_s); \ } while (0) /* Initialize to existing shared indexes -- for recovery */ -#define FRONT_RING_ATTACH(_r, _s, __size) do { \ - (_r)->sring = (_s); \ - (_r)->req_prod_pvt = (_s)->req_prod; \ - (_r)->rsp_cons = (_s)->rsp_prod; \ - (_r)->nr_ents = __RING_SIZE(_s, __size); \ +#define FRONT_RING_ATTACH(_r, _s, __size) do { \ + (_r)->sring = (_s); \ + (_r)->req_prod_pvt = (_s)->req_prod; \ + (_r)->rsp_cons = (_s)->rsp_prod; \ + (_r)->nr_ents = __RING_SIZE(_s, __size); \ } while (0) -#define BACK_RING_ATTACH(_r, _s, __size) do { \ - (_r)->sring = (_s); \ - (_r)->rsp_prod_pvt = (_s)->rsp_prod; \ - (_r)->req_cons = (_s)->req_prod; \ - (_r)->nr_ents = __RING_SIZE(_s, __size); \ +#define BACK_RING_ATTACH(_r, _s, __size) do { \ + (_r)->sring = (_s); \ + (_r)->rsp_prod_pvt = (_s)->rsp_prod; \ + (_r)->req_cons = (_s)->req_prod; \ + (_r)->nr_ents = __RING_SIZE(_s, __size); \ } while (0) /* How big is this ring? */ -#define RING_SIZE(_r) \ +#define RING_SIZE(_r) \ ((_r)->nr_ents) /* Number of free requests (for use on front side only). */ -#define RING_FREE_REQUESTS(_r) \ +#define RING_FREE_REQUESTS(_r) \ (RING_SIZE(_r) - ((_r)->req_prod_pvt - (_r)->rsp_cons)) /* Test if there is an empty slot available on the front ring. * (This is only meaningful from the front. ) */ -#define RING_FULL(_r) \ +#define RING_FULL(_r) \ (RING_FREE_REQUESTS(_r) == 0) /* Test if there are outstanding messages to be processed on a ring. */ -#define RING_HAS_UNCONSUMED_RESPONSES(_r) \ +#define RING_HAS_UNCONSUMED_RESPONSES(_r) \ ((_r)->sring->rsp_prod - (_r)->rsp_cons) -#define RING_HAS_UNCONSUMED_REQUESTS(_r) \ - ({ \ - unsigned int req = (_r)->sring->req_prod - (_r)->req_cons; \ - unsigned int rsp = RING_SIZE(_r) - \ - ((_r)->req_cons - (_r)->rsp_prod_pvt); \ - req < rsp ? req : rsp; \ - }) +#ifdef __GNUC__ +#define RING_HAS_UNCONSUMED_REQUESTS(_r) ({ \ + unsigned int req = (_r)->sring->req_prod - (_r)->req_cons; \ + unsigned int rsp = RING_SIZE(_r) - \ + ((_r)->req_cons - (_r)->rsp_prod_pvt); \ + req < rsp ? req : rsp; \ +}) +#else +/* Same as above, but without the nice GCC ({ ... }) syntax. */ +#define RING_HAS_UNCONSUMED_REQUESTS(_r) \ + ((((_r)->sring->req_prod - (_r)->req_cons) < \ + (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt))) ? \ + ((_r)->sring->req_prod - (_r)->req_cons) : \ + (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt))) +#endif /* Direct access to individual ring elements, by index. */ -#define RING_GET_REQUEST(_r, _idx) \ +#define RING_GET_REQUEST(_r, _idx) \ (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req)) -#define RING_GET_RESPONSE(_r, _idx) \ +#define RING_GET_RESPONSE(_r, _idx) \ (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp)) /* Loop termination condition: Would the specified index overflow the ring? */ -#define RING_REQUEST_CONS_OVERFLOW(_r, _cons) \ +#define RING_REQUEST_CONS_OVERFLOW(_r, _cons) \ (((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r)) -#define RING_PUSH_REQUESTS(_r) do { \ - wmb(); /* back sees requests /before/ updated producer index */ \ - (_r)->sring->req_prod = (_r)->req_prod_pvt; \ +#define RING_PUSH_REQUESTS(_r) do { \ + xen_wmb(); /* back sees requests /before/ updated producer index */ \ + (_r)->sring->req_prod = (_r)->req_prod_pvt; \ } while (0) -#define RING_PUSH_RESPONSES(_r) do { \ - wmb(); /* front sees responses /before/ updated producer index */ \ - (_r)->sring->rsp_prod = (_r)->rsp_prod_pvt; \ +#define RING_PUSH_RESPONSES(_r) do { \ + xen_wmb(); /* front sees resps /before/ updated producer index */ \ + (_r)->sring->rsp_prod = (_r)->rsp_prod_pvt; \ } while (0) /* @@ -221,40 +265,40 @@ struct __name##_back_ring { \ * field appropriately. */ -#define RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(_r, _notify) do { \ - RING_IDX __old = (_r)->sring->req_prod; \ - RING_IDX __new = (_r)->req_prod_pvt; \ - wmb(); /* back sees requests /before/ updated producer index */ \ - (_r)->sring->req_prod = __new; \ - mb(); /* back sees new requests /before/ we check req_event */ \ - (_notify) = ((RING_IDX)(__new - (_r)->sring->req_event) < \ - (RING_IDX)(__new - __old)); \ +#define RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(_r, _notify) do { \ + RING_IDX __old = (_r)->sring->req_prod; \ + RING_IDX __new = (_r)->req_prod_pvt; \ + xen_wmb(); /* back sees requests /before/ updated producer index */ \ + (_r)->sring->req_prod = __new; \ + xen_mb(); /* back sees new requests /before/ we check req_event */ \ + (_notify) = ((RING_IDX)(__new - (_r)->sring->req_event) < \ + (RING_IDX)(__new - __old)); \ } while (0) -#define RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(_r, _notify) do { \ - RING_IDX __old = (_r)->sring->rsp_prod; \ - RING_IDX __new = (_r)->rsp_prod_pvt; \ - wmb(); /* front sees responses /before/ updated producer index */ \ - (_r)->sring->rsp_prod = __new; \ - mb(); /* front sees new responses /before/ we check rsp_event */ \ - (_notify) = ((RING_IDX)(__new - (_r)->sring->rsp_event) < \ - (RING_IDX)(__new - __old)); \ +#define RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(_r, _notify) do { \ + RING_IDX __old = (_r)->sring->rsp_prod; \ + RING_IDX __new = (_r)->rsp_prod_pvt; \ + xen_wmb(); /* front sees resps /before/ updated producer index */ \ + (_r)->sring->rsp_prod = __new; \ + xen_mb(); /* front sees new resps /before/ we check rsp_event */ \ + (_notify) = ((RING_IDX)(__new - (_r)->sring->rsp_event) < \ + (RING_IDX)(__new - __old)); \ } while (0) -#define RING_FINAL_CHECK_FOR_REQUESTS(_r, _work_to_do) do { \ - (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ - if (_work_to_do) break; \ - (_r)->sring->req_event = (_r)->req_cons + 1; \ - mb(); \ - (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ +#define RING_FINAL_CHECK_FOR_REQUESTS(_r, _work_to_do) do { \ + (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ + if (_work_to_do) break; \ + (_r)->sring->req_event = (_r)->req_cons + 1; \ + xen_mb(); \ + (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ } while (0) -#define RING_FINAL_CHECK_FOR_RESPONSES(_r, _work_to_do) do { \ - (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ - if (_work_to_do) break; \ - (_r)->sring->rsp_event = (_r)->rsp_cons + 1; \ - mb(); \ - (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ +#define RING_FINAL_CHECK_FOR_RESPONSES(_r, _work_to_do) do { \ + (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ + if (_work_to_do) break; \ + (_r)->sring->rsp_event = (_r)->rsp_cons + 1; \ + xen_mb(); \ + (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ } while (0) #endif /* __XEN_PUBLIC_IO_RING_H__ */ --- head-2010-05-12.orig/include/xen/interface/io/xenbus.h 2010-05-12 08:55:24.000000000 +0200 +++ head-2010-05-12/include/xen/interface/io/xenbus.h 2010-01-19 16:01:04.000000000 +0100 @@ -3,42 +3,68 @@ * * Xenbus protocol details. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Copyright (C) 2005 XenSource Ltd. */ #ifndef _XEN_PUBLIC_IO_XENBUS_H #define _XEN_PUBLIC_IO_XENBUS_H -/* The state of either end of the Xenbus, i.e. the current communication - status of initialisation across the bus. States here imply nothing about - the state of the connection between the driver and the kernel's device - layers. */ -enum xenbus_state -{ - XenbusStateUnknown = 0, - XenbusStateInitialising = 1, - XenbusStateInitWait = 2, /* Finished early - initialisation, but waiting - for information from the peer - or hotplug scripts. */ - XenbusStateInitialised = 3, /* Initialised and waiting for a - connection from the peer. */ - XenbusStateConnected = 4, - XenbusStateClosing = 5, /* The device is being closed - due to an error or an unplug - event. */ - XenbusStateClosed = 6 +/* + * The state of either end of the Xenbus, i.e. the current communication + * status of initialisation across the bus. States here imply nothing about + * the state of the connection between the driver and the kernel's device + * layers. + */ +enum xenbus_state { + XenbusStateUnknown = 0, + + XenbusStateInitialising = 1, + + /* + * InitWait: Finished early initialisation but waiting for information + * from the peer or hotplug scripts. + */ + XenbusStateInitWait = 2, + + /* + * Initialised: Waiting for a connection from the peer. + */ + XenbusStateInitialised = 3, + XenbusStateConnected = 4, + + /* + * Closing: The device is being closed due to an error or an unplug event. + */ + XenbusStateClosing = 5, + + XenbusStateClosed = 6, + + /* + * Reconfiguring: The device is being reconfigured. + */ + XenbusStateReconfiguring = 7, + + XenbusStateReconfigured = 8 }; +typedef enum xenbus_state XenbusState; #endif /* _XEN_PUBLIC_IO_XENBUS_H */ - -/* - * Local variables: - * c-file-style: "linux" - * indent-tabs-mode: t - * c-indent-level: 8 - * c-basic-offset: 8 - * tab-width: 8 - * End: - */ --- head-2010-05-12.orig/include/xen/interface/io/xs_wire.h 2010-05-12 08:55:24.000000000 +0200 +++ head-2010-05-12/include/xen/interface/io/xs_wire.h 2010-01-19 16:01:04.000000000 +0100 @@ -1,6 +1,25 @@ /* * Details of the "wire" protocol between Xen Store Daemon and client * library or guest kernel. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Copyright (C) 2005 Rusty Russell IBM Corporation */ @@ -26,7 +45,9 @@ enum xsd_sockmsg_type XS_SET_PERMS, XS_WATCH_EVENT, XS_ERROR, - XS_IS_DOMAIN_INTRODUCED + XS_IS_DOMAIN_INTRODUCED, + XS_RESUME, + XS_SET_TARGET }; #define XS_WRITE_NONE "NONE" @@ -39,8 +60,14 @@ struct xsd_errors int errnum; const char *errstring; }; +#ifdef EINVAL #define XSD_ERROR(x) { x, #x } -static struct xsd_errors xsd_errors[] __attribute__((unused)) = { +/* LINTED: static unused */ +static struct xsd_errors xsd_errors[] +#if defined(__GNUC__) +__attribute__((unused)) +#endif + = { XSD_ERROR(EINVAL), XSD_ERROR(EACCES), XSD_ERROR(EEXIST), @@ -56,6 +83,7 @@ static struct xsd_errors xsd_errors[] __ XSD_ERROR(EAGAIN), XSD_ERROR(EISCONN) }; +#endif struct xsd_sockmsg { @@ -84,4 +112,11 @@ struct xenstore_domain_interface { XENSTORE_RING_IDX rsp_cons, rsp_prod; }; +/* Violating this is very bad. See docs/misc/xenstore.txt. */ +#define XENSTORE_PAYLOAD_MAX 4096 + +/* Violating these just gets you an error back */ +#define XENSTORE_ABS_PATH_MAX 3072 +#define XENSTORE_REL_PATH_MAX 2048 + #endif /* _XS_WIRE_H */ --- head-2010-05-12.orig/include/xen/interface/memory.h 2010-05-12 08:55:24.000000000 +0200 +++ head-2010-05-12/include/xen/interface/memory.h 2010-01-19 16:01:04.000000000 +0100 @@ -3,20 +3,57 @@ * * Memory reservation and information. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Copyright (c) 2005, Keir Fraser */ #ifndef __XEN_PUBLIC_MEMORY_H__ #define __XEN_PUBLIC_MEMORY_H__ +#include "xen.h" + /* - * Increase or decrease the specified domain's memory reservation. Returns a - * -ve errcode on failure, or the # extents successfully allocated or freed. + * Increase or decrease the specified domain's memory reservation. Returns the + * number of extents successfully allocated or freed. * arg == addr of struct xen_memory_reservation. */ #define XENMEM_increase_reservation 0 #define XENMEM_decrease_reservation 1 #define XENMEM_populate_physmap 6 + +#if __XEN_INTERFACE_VERSION__ >= 0x00030209 +/* + * Maximum # bits addressable by the user of the allocated region (e.g., I/O + * devices often have a 32-bit limitation even in 64-bit systems). If zero + * then the user has no addressing restriction. This field is not used by + * XENMEM_decrease_reservation. + */ +#define XENMEMF_address_bits(x) (x) +#define XENMEMF_get_address_bits(x) ((x) & 0xffu) +/* NUMA node to allocate from. */ +#define XENMEMF_node(x) (((x) + 1) << 8) +#define XENMEMF_get_node(x) ((((x) >> 8) - 1) & 0xffu) +/* Flag to populate physmap with populate-on-demand entries */ +#define XENMEMF_populate_on_demand (1<<16) +#endif + struct xen_memory_reservation { /* @@ -29,28 +66,70 @@ struct xen_memory_reservation { * OUT: GMFN bases of extents that were allocated * (NB. This command also updates the mach_to_phys translation table) */ - GUEST_HANDLE(ulong) extent_start; + XEN_GUEST_HANDLE(xen_pfn_t) extent_start; /* Number of extents, and size/alignment of each (2^extent_order pages). */ - unsigned long nr_extents; + xen_ulong_t nr_extents; unsigned int extent_order; - /* - * Maximum # bits addressable by the user of the allocated region (e.g., - * I/O devices often have a 32-bit limitation even in 64-bit systems). If - * zero then the user has no addressing restriction. - * This field is not used by XENMEM_decrease_reservation. - */ +#if __XEN_INTERFACE_VERSION__ >= 0x00030209 + /* XENMEMF flags. */ + unsigned int mem_flags; +#else unsigned int address_bits; +#endif /* * Domain whose reservation is being changed. * Unprivileged domains can specify only DOMID_SELF. */ domid_t domid; +}; +typedef struct xen_memory_reservation xen_memory_reservation_t; +DEFINE_XEN_GUEST_HANDLE(xen_memory_reservation_t); +/* + * An atomic exchange of memory pages. If return code is zero then + * @out.extent_list provides GMFNs of the newly-allocated memory. + * Returns zero on complete success, otherwise a negative error code. + * On complete success then always @nr_exchanged == @in.nr_extents. + * On partial success @nr_exchanged indicates how much work was done. + */ +#define XENMEM_exchange 11 +struct xen_memory_exchange { + /* + * [IN] Details of memory extents to be exchanged (GMFN bases). + * Note that @in.address_bits is ignored and unused. + */ + struct xen_memory_reservation in; + + /* + * [IN/OUT] Details of new memory extents. + * We require that: + * 1. @in.domid == @out.domid + * 2. @in.nr_extents << @in.extent_order == + * @out.nr_extents << @out.extent_order + * 3. @in.extent_start and @out.extent_start lists must not overlap + * 4. @out.extent_start lists GPFN bases to be populated + * 5. @out.extent_start is overwritten with allocated GMFN bases + */ + struct xen_memory_reservation out; + + /* + * [OUT] Number of input extents that were successfully exchanged: + * 1. The first @nr_exchanged input extents were successfully + * deallocated. + * 2. The corresponding first entries in the output extent list correctly + * indicate the GMFNs that were successfully exchanged. + * 3. All other input and output extents are untouched. + * 4. If not all input exents are exchanged then the return code of this + * command will be non-zero. + * 5. THIS FIELD MUST BE INITIALISED TO ZERO BY THE CALLER! + */ + xen_ulong_t nr_exchanged; }; -DEFINE_GUEST_HANDLE_STRUCT(xen_memory_reservation); +typedef struct xen_memory_exchange xen_memory_exchange_t; +DEFINE_XEN_GUEST_HANDLE(xen_memory_exchange_t); /* * Returns the maximum machine frame number of mapped RAM in this system. @@ -68,6 +147,11 @@ DEFINE_GUEST_HANDLE_STRUCT(xen_memory_re #define XENMEM_maximum_reservation 4 /* + * Returns the maximum GPFN in use by the guest, or -ve errcode on failure. + */ +#define XENMEM_maximum_gpfn 14 + +/* * Returns a list of MFN bases of 2MB extents comprising the machine_to_phys * mapping table. Architectures which do not have a m2p table do not implement * this command. @@ -86,7 +170,7 @@ struct xen_machphys_mfn_list { * any large discontiguities in the machine address space, 2MB gaps in * the machphys table will be represented by an MFN base of zero. */ - GUEST_HANDLE(ulong) extent_start; + XEN_GUEST_HANDLE(xen_pfn_t) extent_start; /* * Number of extents written to the above array. This will be smaller @@ -94,7 +178,22 @@ struct xen_machphys_mfn_list { */ unsigned int nr_extents; }; -DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list); +typedef struct xen_machphys_mfn_list xen_machphys_mfn_list_t; +DEFINE_XEN_GUEST_HANDLE(xen_machphys_mfn_list_t); + +/* + * Returns the location in virtual address space of the machine_to_phys + * mapping table. Architectures which do not have a m2p table, or which do not + * map it by default into guest address space, do not implement this command. + * arg == addr of xen_machphys_mapping_t. + */ +#define XENMEM_machphys_mapping 12 +struct xen_machphys_mapping { + xen_ulong_t v_start, v_end; /* Start and end virtual addresses. */ + xen_ulong_t max_mfn; /* Maximum MFN that can be looked up. */ +}; +typedef struct xen_machphys_mapping xen_machphys_mapping_t; +DEFINE_XEN_GUEST_HANDLE(xen_machphys_mapping_t); /* * Sets the GPFN at which a particular page appears in the specified guest's @@ -109,37 +208,84 @@ struct xen_add_to_physmap { /* Source mapping space. */ #define XENMAPSPACE_shared_info 0 /* shared info page */ #define XENMAPSPACE_grant_table 1 /* grant table page */ +#define XENMAPSPACE_gmfn 2 /* GMFN */ unsigned int space; +#define XENMAPIDX_grant_table_status 0x80000000 + /* Index into source mapping space. */ - unsigned long idx; + xen_ulong_t idx; /* GPFN where the source mapping page should appear. */ - unsigned long gpfn; + xen_pfn_t gpfn; }; -DEFINE_GUEST_HANDLE_STRUCT(xen_add_to_physmap); +typedef struct xen_add_to_physmap xen_add_to_physmap_t; +DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_t); + +/*** REMOVED ***/ +/*#define XENMEM_translate_gpfn_list 8*/ /* - * Translates a list of domain-specific GPFNs into MFNs. Returns a -ve error - * code on failure. This call only works for auto-translated guests. + * Returns the pseudo-physical memory map as it was when the domain + * was started (specified by XENMEM_set_memory_map). + * arg == addr of xen_memory_map_t. */ -#define XENMEM_translate_gpfn_list 8 -struct xen_translate_gpfn_list { - /* Which domain to translate for? */ - domid_t domid; - - /* Length of list. */ - unsigned long nr_gpfns; - - /* List of GPFNs to translate. */ - GUEST_HANDLE(ulong) gpfn_list; +#define XENMEM_memory_map 9 +struct xen_memory_map { + /* + * On call the number of entries which can be stored in buffer. On + * return the number of entries which have been stored in + * buffer. + */ + unsigned int nr_entries; /* - * Output list to contain MFN translations. May be the same as the input - * list (in which case each input GPFN is overwritten with the output MFN). + * Entries in the buffer are in the same format as returned by the + * BIOS INT 0x15 EAX=0xE820 call. */ - GUEST_HANDLE(ulong) mfn_list; + XEN_GUEST_HANDLE(void) buffer; +}; +typedef struct xen_memory_map xen_memory_map_t; +DEFINE_XEN_GUEST_HANDLE(xen_memory_map_t); + +/* + * Returns the real physical memory map. Passes the same structure as + * XENMEM_memory_map. + * arg == addr of xen_memory_map_t. + */ +#define XENMEM_machine_memory_map 10 + +/* + * Set the pseudo-physical memory map of a domain, as returned by + * XENMEM_memory_map. + * arg == addr of xen_foreign_memory_map_t. + */ +#define XENMEM_set_memory_map 13 +struct xen_foreign_memory_map { + domid_t domid; + struct xen_memory_map map; }; -DEFINE_GUEST_HANDLE_STRUCT(xen_translate_gpfn_list); +typedef struct xen_foreign_memory_map xen_foreign_memory_map_t; +DEFINE_XEN_GUEST_HANDLE(xen_foreign_memory_map_t); + +#define XENMEM_set_pod_target 16 +#define XENMEM_get_pod_target 17 +struct xen_pod_target { + /* IN */ + uint64_t target_pages; + /* OUT */ + uint64_t tot_pages; + uint64_t pod_cache_pages; + uint64_t pod_entries; + /* IN */ + domid_t domid; +}; +typedef struct xen_pod_target xen_pod_target_t; + +/* + * Get the number of MFNs saved through memory sharing. + * The call never fails. + */ +#define XENMEM_get_sharing_freed_pages 18 #endif /* __XEN_PUBLIC_MEMORY_H__ */ --- head-2010-05-12.orig/include/xen/interface/physdev.h 2010-05-12 08:55:24.000000000 +0200 +++ head-2010-05-12/include/xen/interface/physdev.h 2010-01-19 16:01:04.000000000 +0100 @@ -21,10 +21,12 @@ #ifndef __XEN_PUBLIC_PHYSDEV_H__ #define __XEN_PUBLIC_PHYSDEV_H__ +#include "xen.h" + /* * Prototype for this hypercall is: * int physdev_op(int cmd, void *args) - * @cmd == PHYSDEVOP_??? (physdev operation). + * @cmd == PHYSDEVOP_??? (physdev operation). * @args == Operation-specific extra arguments (NULL if none). */ @@ -32,114 +34,231 @@ * Notify end-of-interrupt (EOI) for the specified IRQ. * @arg == pointer to physdev_eoi structure. */ -#define PHYSDEVOP_eoi 12 +#define PHYSDEVOP_eoi 12 struct physdev_eoi { - /* IN */ - uint32_t irq; + /* IN */ + uint32_t irq; +}; +typedef struct physdev_eoi physdev_eoi_t; +DEFINE_XEN_GUEST_HANDLE(physdev_eoi_t); + +/* + * Register a shared page for the hypervisor to indicate whether the guest + * must issue PHYSDEVOP_eoi. The semantics of PHYSDEVOP_eoi change slightly + * once the guest used this function in that the associated event channel + * will automatically get unmasked. The page registered is used as a bit + * array indexed by Xen's PIRQ value. + */ +#define PHYSDEVOP_pirq_eoi_gmfn 17 +struct physdev_pirq_eoi_gmfn { + /* IN */ + xen_pfn_t gmfn; }; +typedef struct physdev_pirq_eoi_gmfn physdev_pirq_eoi_gmfn_t; +DEFINE_XEN_GUEST_HANDLE(physdev_pirq_eoi_gmfn_t); /* * Query the status of an IRQ line. * @arg == pointer to physdev_irq_status_query structure. */ -#define PHYSDEVOP_irq_status_query 5 +#define PHYSDEVOP_irq_status_query 5 struct physdev_irq_status_query { - /* IN */ - uint32_t irq; - /* OUT */ - uint32_t flags; /* XENIRQSTAT_* */ + /* IN */ + uint32_t irq; + /* OUT */ + uint32_t flags; /* XENIRQSTAT_* */ }; +typedef struct physdev_irq_status_query physdev_irq_status_query_t; +DEFINE_XEN_GUEST_HANDLE(physdev_irq_status_query_t); /* Need to call PHYSDEVOP_eoi when the IRQ has been serviced? */ -#define _XENIRQSTAT_needs_eoi (0) -#define XENIRQSTAT_needs_eoi (1U<<_XENIRQSTAT_needs_eoi) +#define _XENIRQSTAT_needs_eoi (0) +#define XENIRQSTAT_needs_eoi (1U<<_XENIRQSTAT_needs_eoi) /* IRQ shared by multiple guests? */ -#define _XENIRQSTAT_shared (1) -#define XENIRQSTAT_shared (1U<<_XENIRQSTAT_shared) +#define _XENIRQSTAT_shared (1) +#define XENIRQSTAT_shared (1U<<_XENIRQSTAT_shared) /* * Set the current VCPU's I/O privilege level. * @arg == pointer to physdev_set_iopl structure. */ -#define PHYSDEVOP_set_iopl 6 +#define PHYSDEVOP_set_iopl 6 struct physdev_set_iopl { - /* IN */ - uint32_t iopl; + /* IN */ + uint32_t iopl; }; +typedef struct physdev_set_iopl physdev_set_iopl_t; +DEFINE_XEN_GUEST_HANDLE(physdev_set_iopl_t); /* * Set the current VCPU's I/O-port permissions bitmap. * @arg == pointer to physdev_set_iobitmap structure. */ -#define PHYSDEVOP_set_iobitmap 7 +#define PHYSDEVOP_set_iobitmap 7 struct physdev_set_iobitmap { - /* IN */ - uint8_t * bitmap; - uint32_t nr_ports; + /* IN */ +#if __XEN_INTERFACE_VERSION__ >= 0x00030205 + XEN_GUEST_HANDLE(uint8) bitmap; +#else + uint8_t *bitmap; +#endif + uint32_t nr_ports; }; +typedef struct physdev_set_iobitmap physdev_set_iobitmap_t; +DEFINE_XEN_GUEST_HANDLE(physdev_set_iobitmap_t); /* * Read or write an IO-APIC register. * @arg == pointer to physdev_apic structure. */ -#define PHYSDEVOP_apic_read 8 -#define PHYSDEVOP_apic_write 9 +#define PHYSDEVOP_apic_read 8 +#define PHYSDEVOP_apic_write 9 struct physdev_apic { - /* IN */ - unsigned long apic_physbase; - uint32_t reg; - /* IN or OUT */ - uint32_t value; + /* IN */ + unsigned long apic_physbase; + uint32_t reg; + /* IN or OUT */ + uint32_t value; }; +typedef struct physdev_apic physdev_apic_t; +DEFINE_XEN_GUEST_HANDLE(physdev_apic_t); /* * Allocate or free a physical upcall vector for the specified IRQ line. * @arg == pointer to physdev_irq structure. */ -#define PHYSDEVOP_alloc_irq_vector 10 -#define PHYSDEVOP_free_irq_vector 11 +#define PHYSDEVOP_alloc_irq_vector 10 +#define PHYSDEVOP_free_irq_vector 11 struct physdev_irq { - /* IN */ - uint32_t irq; - /* IN or OUT */ - uint32_t vector; + /* IN */ + uint32_t irq; + /* IN or OUT */ + uint32_t vector; +}; +typedef struct physdev_irq physdev_irq_t; +DEFINE_XEN_GUEST_HANDLE(physdev_irq_t); + +#define MAP_PIRQ_TYPE_MSI 0x0 +#define MAP_PIRQ_TYPE_GSI 0x1 +#define MAP_PIRQ_TYPE_UNKNOWN 0x2 + +#define PHYSDEVOP_map_pirq 13 +struct physdev_map_pirq { + domid_t domid; + /* IN */ + int type; + /* IN */ + int index; + /* IN or OUT */ + int pirq; + /* IN */ + int bus; + /* IN */ + int devfn; + /* IN */ + int entry_nr; + /* IN */ + uint64_t table_base; +}; +typedef struct physdev_map_pirq physdev_map_pirq_t; +DEFINE_XEN_GUEST_HANDLE(physdev_map_pirq_t); + +#define PHYSDEVOP_unmap_pirq 14 +struct physdev_unmap_pirq { + domid_t domid; + /* IN */ + int pirq; +}; + +typedef struct physdev_unmap_pirq physdev_unmap_pirq_t; +DEFINE_XEN_GUEST_HANDLE(physdev_unmap_pirq_t); + +#define PHYSDEVOP_manage_pci_add 15 +#define PHYSDEVOP_manage_pci_remove 16 +struct physdev_manage_pci { + /* IN */ + uint8_t bus; + uint8_t devfn; +}; + +typedef struct physdev_manage_pci physdev_manage_pci_t; +DEFINE_XEN_GUEST_HANDLE(physdev_manage_pci_t); + +#define PHYSDEVOP_restore_msi 19 +struct physdev_restore_msi { + /* IN */ + uint8_t bus; + uint8_t devfn; +}; +typedef struct physdev_restore_msi physdev_restore_msi_t; +DEFINE_XEN_GUEST_HANDLE(physdev_restore_msi_t); + +#define PHYSDEVOP_manage_pci_add_ext 20 +struct physdev_manage_pci_ext { + /* IN */ + uint8_t bus; + uint8_t devfn; + unsigned is_extfn; + unsigned is_virtfn; + struct { + uint8_t bus; + uint8_t devfn; + } physfn; }; +typedef struct physdev_manage_pci_ext physdev_manage_pci_ext_t; +DEFINE_XEN_GUEST_HANDLE(physdev_manage_pci_ext_t); + /* * Argument to physdev_op_compat() hypercall. Superceded by new physdev_op() * hypercall since 0x00030202. */ struct physdev_op { - uint32_t cmd; - union { - struct physdev_irq_status_query irq_status_query; - struct physdev_set_iopl set_iopl; - struct physdev_set_iobitmap set_iobitmap; - struct physdev_apic apic_op; - struct physdev_irq irq_op; - } u; + uint32_t cmd; + union { + struct physdev_irq_status_query irq_status_query; + struct physdev_set_iopl set_iopl; + struct physdev_set_iobitmap set_iobitmap; + struct physdev_apic apic_op; + struct physdev_irq irq_op; + } u; +}; +typedef struct physdev_op physdev_op_t; +DEFINE_XEN_GUEST_HANDLE(physdev_op_t); + +#define PHYSDEVOP_setup_gsi 21 +struct physdev_setup_gsi { + int gsi; + /* IN */ + uint8_t triggering; + /* IN */ + uint8_t polarity; + /* IN */ }; +typedef struct physdev_setup_gsi physdev_setup_gsi_t; +DEFINE_XEN_GUEST_HANDLE(physdev_setup_gsi_t); + /* * Notify that some PIRQ-bound event channels have been unmasked. * ** This command is obsolete since interface version 0x00030202 and is ** - * ** unsupported by newer versions of Xen. ** + * ** unsupported by newer versions of Xen. ** */ -#define PHYSDEVOP_IRQ_UNMASK_NOTIFY 4 +#define PHYSDEVOP_IRQ_UNMASK_NOTIFY 4 /* * These all-capitals physdev operation names are superceded by the new names * (defined above) since interface version 0x00030202. */ -#define PHYSDEVOP_IRQ_STATUS_QUERY PHYSDEVOP_irq_status_query -#define PHYSDEVOP_SET_IOPL PHYSDEVOP_set_iopl -#define PHYSDEVOP_SET_IOBITMAP PHYSDEVOP_set_iobitmap -#define PHYSDEVOP_APIC_READ PHYSDEVOP_apic_read -#define PHYSDEVOP_APIC_WRITE PHYSDEVOP_apic_write -#define PHYSDEVOP_ASSIGN_VECTOR PHYSDEVOP_alloc_irq_vector -#define PHYSDEVOP_FREE_VECTOR PHYSDEVOP_free_irq_vector +#define PHYSDEVOP_IRQ_STATUS_QUERY PHYSDEVOP_irq_status_query +#define PHYSDEVOP_SET_IOPL PHYSDEVOP_set_iopl +#define PHYSDEVOP_SET_IOBITMAP PHYSDEVOP_set_iobitmap +#define PHYSDEVOP_APIC_READ PHYSDEVOP_apic_read +#define PHYSDEVOP_APIC_WRITE PHYSDEVOP_apic_write +#define PHYSDEVOP_ASSIGN_VECTOR PHYSDEVOP_alloc_irq_vector +#define PHYSDEVOP_FREE_VECTOR PHYSDEVOP_free_irq_vector #define PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY XENIRQSTAT_needs_eoi -#define PHYSDEVOP_IRQ_SHARED XENIRQSTAT_shared +#define PHYSDEVOP_IRQ_SHARED XENIRQSTAT_shared #endif /* __XEN_PUBLIC_PHYSDEV_H__ */ --- head-2010-05-12.orig/include/xen/interface/sched.h 2010-05-12 08:55:24.000000000 +0200 +++ head-2010-05-12/include/xen/interface/sched.h 2010-01-19 16:01:04.000000000 +0100 @@ -3,6 +3,24 @@ * * Scheduler state interactions * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Copyright (c) 2005, Keir Fraser */ @@ -13,17 +31,17 @@ /* * The prototype for this hypercall is: - * long sched_op_new(int cmd, void *arg) + * long sched_op(int cmd, void *arg) * @cmd == SCHEDOP_??? (scheduler operation). * @arg == Operation-specific extra argument(s), as described below. * - * **NOTE**: - * Versions of Xen prior to 3.0.2 provide only the following legacy version + * Versions of Xen prior to 3.0.2 provided only the following legacy version * of this hypercall, supporting only the commands yield, block and shutdown: * long sched_op(int cmd, unsigned long arg) * @cmd == SCHEDOP_??? (scheduler operation). * @arg == 0 (SCHEDOP_yield and SCHEDOP_block) * == SHUTDOWN_* code (SCHEDOP_shutdown) + * This legacy version is available to new guests as sched_op_compat(). */ /* @@ -49,7 +67,8 @@ struct sched_shutdown { unsigned int reason; /* SHUTDOWN_* */ }; -DEFINE_GUEST_HANDLE_STRUCT(sched_shutdown); +typedef struct sched_shutdown sched_shutdown_t; +DEFINE_XEN_GUEST_HANDLE(sched_shutdown_t); /* * Poll a set of event-channel ports. Return when one or more are pending. An @@ -58,11 +77,26 @@ DEFINE_GUEST_HANDLE_STRUCT(sched_shutdow */ #define SCHEDOP_poll 3 struct sched_poll { - GUEST_HANDLE(evtchn_port_t) ports; + XEN_GUEST_HANDLE(evtchn_port_t) ports; unsigned int nr_ports; uint64_t timeout; }; -DEFINE_GUEST_HANDLE_STRUCT(sched_poll); +typedef struct sched_poll sched_poll_t; +DEFINE_XEN_GUEST_HANDLE(sched_poll_t); + +/* + * Declare a shutdown for another domain. The main use of this function is + * in interpreting shutdown requests and reasons for fully-virtualized + * domains. A para-virtualized domain may use SCHEDOP_shutdown directly. + * @arg == pointer to sched_remote_shutdown structure. + */ +#define SCHEDOP_remote_shutdown 4 +struct sched_remote_shutdown { + domid_t domain_id; /* Remote domain ID */ + unsigned int reason; /* SHUTDOWN_xxx reason */ +}; +typedef struct sched_remote_shutdown sched_remote_shutdown_t; +DEFINE_XEN_GUEST_HANDLE(sched_remote_shutdown_t); /* * Reason codes for SCHEDOP_shutdown. These may be interpreted by control --- head-2010-05-12.orig/include/xen/interface/vcpu.h 2010-05-12 08:55:24.000000000 +0200 +++ head-2010-05-12/include/xen/interface/vcpu.h 2010-01-19 16:01:04.000000000 +0100 @@ -27,11 +27,13 @@ #ifndef __XEN_PUBLIC_VCPU_H__ #define __XEN_PUBLIC_VCPU_H__ +#include "xen.h" + /* * Prototype for this hypercall is: - * int vcpu_op(int cmd, int vcpuid, void *extra_args) - * @cmd == VCPUOP_??? (VCPU operation). - * @vcpuid == VCPU to operate on. + * int vcpu_op(int cmd, int vcpuid, void *extra_args) + * @cmd == VCPUOP_??? (VCPU operation). + * @vcpuid == VCPU to operate on. * @extra_args == Operation-specific extra arguments (NULL if none). */ @@ -40,52 +42,53 @@ * newly-initialised VCPU will not run until it is brought up by VCPUOP_up. * * @extra_arg == pointer to vcpu_guest_context structure containing initial - * state for the VCPU. + * state for the VCPU. */ -#define VCPUOP_initialise 0 +#define VCPUOP_initialise 0 /* * Bring up a VCPU. This makes the VCPU runnable. This operation will fail * if the VCPU has not been initialised (VCPUOP_initialise). */ -#define VCPUOP_up 1 +#define VCPUOP_up 1 /* * Bring down a VCPU (i.e., make it non-runnable). * There are a few caveats that callers should observe: - * 1. This operation may return, and VCPU_is_up may return false, before the - * VCPU stops running (i.e., the command is asynchronous). It is a good - * idea to ensure that the VCPU has entered a non-critical loop before - * bringing it down. Alternatively, this operation is guaranteed - * synchronous if invoked by the VCPU itself. - * 2. After a VCPU is initialised, there is currently no way to drop all its - * references to domain memory. Even a VCPU that is down still holds - * memory references via its pagetable base pointer and GDT. It is good - * practise to move a VCPU onto an 'idle' or default page table, LDT and - * GDT before bringing it down. + * 1. This operation may return, and VCPU_is_up may return false, before the + * VCPU stops running (i.e., the command is asynchronous). It is a good + * idea to ensure that the VCPU has entered a non-critical loop before + * bringing it down. Alternatively, this operation is guaranteed + * synchronous if invoked by the VCPU itself. + * 2. After a VCPU is initialised, there is currently no way to drop all its + * references to domain memory. Even a VCPU that is down still holds + * memory references via its pagetable base pointer and GDT. It is good + * practise to move a VCPU onto an 'idle' or default page table, LDT and + * GDT before bringing it down. */ -#define VCPUOP_down 2 +#define VCPUOP_down 2 /* Returns 1 if the given VCPU is up. */ -#define VCPUOP_is_up 3 +#define VCPUOP_is_up 3 /* * Return information about the state and running time of a VCPU. * @extra_arg == pointer to vcpu_runstate_info structure. */ -#define VCPUOP_get_runstate_info 4 +#define VCPUOP_get_runstate_info 4 struct vcpu_runstate_info { - /* VCPU's current state (RUNSTATE_*). */ - int state; - /* When was current state entered (system time, ns)? */ - uint64_t state_entry_time; - /* - * Time spent in each RUNSTATE_* (ns). The sum of these times is - * guaranteed not to drift from system time. - */ - uint64_t time[4]; + /* VCPU's current state (RUNSTATE_*). */ + int state; + /* When was current state entered (system time, ns)? */ + uint64_t state_entry_time; + /* + * Time spent in each RUNSTATE_* (ns). The sum of these times is + * guaranteed not to drift from system time. + */ + uint64_t time[4]; }; -DEFINE_GUEST_HANDLE_STRUCT(vcpu_runstate_info); +typedef struct vcpu_runstate_info vcpu_runstate_info_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_runstate_info_t); /* VCPU is currently running on a physical CPU. */ #define RUNSTATE_running 0 @@ -108,47 +111,52 @@ DEFINE_GUEST_HANDLE_STRUCT(vcpu_runstate * Register a shared memory area from which the guest may obtain its own * runstate information without needing to execute a hypercall. * Notes: - * 1. The registered address may be virtual or physical, depending on the - * platform. The virtual address should be registered on x86 systems. - * 2. Only one shared area may be registered per VCPU. The shared area is - * updated by the hypervisor each time the VCPU is scheduled. Thus - * runstate.state will always be RUNSTATE_running and - * runstate.state_entry_time will indicate the system time at which the - * VCPU was last scheduled to run. + * 1. The registered address may be virtual or physical or guest handle, + * depending on the platform. Virtual address or guest handle should be + * registered on x86 systems. + * 2. Only one shared area may be registered per VCPU. The shared area is + * updated by the hypervisor each time the VCPU is scheduled. Thus + * runstate.state will always be RUNSTATE_running and + * runstate.state_entry_time will indicate the system time at which the + * VCPU was last scheduled to run. * @extra_arg == pointer to vcpu_register_runstate_memory_area structure. */ #define VCPUOP_register_runstate_memory_area 5 struct vcpu_register_runstate_memory_area { - union { - GUEST_HANDLE(vcpu_runstate_info) h; - struct vcpu_runstate_info *v; - uint64_t p; - } addr; + union { + XEN_GUEST_HANDLE(vcpu_runstate_info_t) h; + struct vcpu_runstate_info *v; + uint64_t p; + } addr; }; +typedef struct vcpu_register_runstate_memory_area vcpu_register_runstate_memory_area_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_register_runstate_memory_area_t); /* * Set or stop a VCPU's periodic timer. Every VCPU has one periodic timer * which can be set via these commands. Periods smaller than one millisecond * may not be supported. */ -#define VCPUOP_set_periodic_timer 6 /* arg == vcpu_set_periodic_timer_t */ -#define VCPUOP_stop_periodic_timer 7 /* arg == NULL */ +#define VCPUOP_set_periodic_timer 6 /* arg == vcpu_set_periodic_timer_t */ +#define VCPUOP_stop_periodic_timer 7 /* arg == NULL */ struct vcpu_set_periodic_timer { - uint64_t period_ns; + uint64_t period_ns; }; -DEFINE_GUEST_HANDLE_STRUCT(vcpu_set_periodic_timer); +typedef struct vcpu_set_periodic_timer vcpu_set_periodic_timer_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_set_periodic_timer_t); /* * Set or stop a VCPU's single-shot timer. Every VCPU has one single-shot * timer which can be set via these commands. */ -#define VCPUOP_set_singleshot_timer 8 /* arg == vcpu_set_singleshot_timer_t */ +#define VCPUOP_set_singleshot_timer 8 /* arg == vcpu_set_singleshot_timer_t */ #define VCPUOP_stop_singleshot_timer 9 /* arg == NULL */ struct vcpu_set_singleshot_timer { - uint64_t timeout_abs_ns; - uint32_t flags; /* VCPU_SSHOTTMR_??? */ + uint64_t timeout_abs_ns; /* Absolute system time value in nanoseconds. */ + uint32_t flags; /* VCPU_SSHOTTMR_??? */ }; -DEFINE_GUEST_HANDLE_STRUCT(vcpu_set_singleshot_timer); +typedef struct vcpu_set_singleshot_timer vcpu_set_singleshot_timer_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_set_singleshot_timer_t); /* Flags to VCPUOP_set_singleshot_timer. */ /* Require the timeout to be in the future (return -ETIME if it's passed). */ @@ -161,13 +169,65 @@ DEFINE_GUEST_HANDLE_STRUCT(vcpu_set_sing * structure in a convenient place, such as in a per-cpu data area. * The pointer need not be page aligned, but the structure must not * cross a page boundary. + * + * This may be called only once per vcpu. */ -#define VCPUOP_register_vcpu_info 10 /* arg == struct vcpu_info */ +#define VCPUOP_register_vcpu_info 10 /* arg == vcpu_register_vcpu_info_t */ struct vcpu_register_vcpu_info { uint64_t mfn; /* mfn of page to place vcpu_info */ uint32_t offset; /* offset within page */ uint32_t rsvd; /* unused */ }; -DEFINE_GUEST_HANDLE_STRUCT(vcpu_register_vcpu_info); +typedef struct vcpu_register_vcpu_info vcpu_register_vcpu_info_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_register_vcpu_info_t); + +/* Send an NMI to the specified VCPU. @extra_arg == NULL. */ +#define VCPUOP_send_nmi 11 + +/* + * Get the physical ID information for a pinned vcpu's underlying physical + * processor. The physical ID informmation is architecture-specific. + * On x86: id[31:0]=apic_id, id[63:32]=acpi_id, and all values 0xff and + * greater are reserved. + * This command returns -EINVAL if it is not a valid operation for this VCPU. + */ +#define VCPUOP_get_physid 12 /* arg == vcpu_get_physid_t */ +struct vcpu_get_physid { + uint64_t phys_id; +}; +typedef struct vcpu_get_physid vcpu_get_physid_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_get_physid_t); +#define xen_vcpu_physid_to_x86_apicid(physid) \ + ((((uint32_t)(physid)) >= 0xff) ? 0xff : ((uint8_t)(physid))) +#define xen_vcpu_physid_to_x86_acpiid(physid) \ + ((((uint32_t)((physid)>>32)) >= 0xff) ? 0xff : ((uint8_t)((physid)>>32))) + +/* + * Register a memory location to get a secondary copy of the vcpu time + * parameters. The master copy still exists as part of the vcpu shared + * memory area, and this secondary copy is updated whenever the master copy + * is updated (and using the same versioning scheme for synchronisation). + * + * The intent is that this copy may be mapped (RO) into userspace so + * that usermode can compute system time using the time info and the + * tsc. Usermode will see an array of vcpu_time_info structures, one + * for each vcpu, and choose the right one by an existing mechanism + * which allows it to get the current vcpu number (such as via a + * segment limit). It can then apply the normal algorithm to compute + * system time from the tsc. + * + * @extra_arg == pointer to vcpu_register_time_info_memory_area structure. + */ +#define VCPUOP_register_vcpu_time_memory_area 13 +DEFINE_XEN_GUEST_HANDLE(vcpu_time_info_t); +struct vcpu_register_time_memory_area { + union { + XEN_GUEST_HANDLE(vcpu_time_info_t) h; + struct vcpu_time_info *v; + uint64_t p; + } addr; +}; +typedef struct vcpu_register_time_memory_area vcpu_register_time_memory_area_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_register_time_memory_area_t); #endif /* __XEN_PUBLIC_VCPU_H__ */ --- head-2010-05-12.orig/include/xen/interface/version.h 2010-05-12 08:55:24.000000000 +0200 +++ head-2010-05-12/include/xen/interface/version.h 2010-01-19 16:01:04.000000000 +0100 @@ -3,6 +3,24 @@ * * Xen version, type, and compile information. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Copyright (c) 2005, Nguyen Anh Quynh * Copyright (c) 2005, Keir Fraser */ @@ -10,17 +28,15 @@ #ifndef __XEN_PUBLIC_VERSION_H__ #define __XEN_PUBLIC_VERSION_H__ -/* NB. All ops return zero on success, except XENVER_version. */ +/* NB. All ops return zero on success, except XENVER_{version,pagesize} */ /* arg == NULL; returns major:minor (16:16). */ #define XENVER_version 0 /* arg == xen_extraversion_t. */ #define XENVER_extraversion 1 -struct xen_extraversion { - char extraversion[16]; -}; -#define XEN_EXTRAVERSION_LEN (sizeof(struct xen_extraversion)) +typedef char xen_extraversion_t[16]; +#define XEN_EXTRAVERSION_LEN (sizeof(xen_extraversion_t)) /* arg == xen_compile_info_t. */ #define XENVER_compile_info 2 @@ -30,29 +46,28 @@ struct xen_compile_info { char compile_domain[32]; char compile_date[32]; }; +typedef struct xen_compile_info xen_compile_info_t; #define XENVER_capabilities 3 -struct xen_capabilities_info { - char info[1024]; -}; -#define XEN_CAPABILITIES_INFO_LEN (sizeof(struct xen_capabilities_info)) +typedef char xen_capabilities_info_t[1024]; +#define XEN_CAPABILITIES_INFO_LEN (sizeof(xen_capabilities_info_t)) #define XENVER_changeset 4 -struct xen_changeset_info { - char info[64]; -}; -#define XEN_CHANGESET_INFO_LEN (sizeof(struct xen_changeset_info)) +typedef char xen_changeset_info_t[64]; +#define XEN_CHANGESET_INFO_LEN (sizeof(xen_changeset_info_t)) #define XENVER_platform_parameters 5 struct xen_platform_parameters { unsigned long virt_start; }; +typedef struct xen_platform_parameters xen_platform_parameters_t; #define XENVER_get_features 6 struct xen_feature_info { unsigned int submap_idx; /* IN: which 32-bit submap to return */ uint32_t submap; /* OUT: 32-bit submap */ }; +typedef struct xen_feature_info xen_feature_info_t; /* Declares the features reported by XENVER_get_features. */ #include "features.h" @@ -60,4 +75,10 @@ struct xen_feature_info { /* arg == NULL; returns host memory page size. */ #define XENVER_pagesize 7 +/* arg == xen_domain_handle_t. */ +#define XENVER_guest_handle 8 + +#define XENVER_commandline 9 +typedef char xen_commandline_t[1024]; + #endif /* __XEN_PUBLIC_VERSION_H__ */ --- head-2010-05-12.orig/include/xen/interface/xen.h 2010-05-12 08:55:24.000000000 +0200 +++ head-2010-05-12/include/xen/interface/xen.h 2010-05-07 11:10:48.000000000 +0200 @@ -3,35 +3,69 @@ * * Guest OS interface to Xen. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Copyright (c) 2004, K A Fraser */ #ifndef __XEN_PUBLIC_XEN_H__ #define __XEN_PUBLIC_XEN_H__ -#include +#include "xen-compat.h" +#ifdef CONFIG_PARAVIRT_XEN #include +#endif -/* - * XEN "SYSTEM CALLS" (a.k.a. HYPERCALLS). - */ +#if defined(__i386__) || defined(__x86_64__) +#include "arch-x86/xen.h" +#elif defined(__ia64__) +#include "arch-ia64.h" +#else +#error "Unsupported architecture" +#endif + +#ifndef __ASSEMBLY__ +/* Guest handles for primitive C types. */ +DEFINE_XEN_GUEST_HANDLE(char); +__DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char); +DEFINE_XEN_GUEST_HANDLE(int); +__DEFINE_XEN_GUEST_HANDLE(uint, unsigned int); +DEFINE_XEN_GUEST_HANDLE(long); +__DEFINE_XEN_GUEST_HANDLE(ulong, unsigned long); +DEFINE_XEN_GUEST_HANDLE(void); + +DEFINE_XEN_GUEST_HANDLE(uint64_t); +DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); +#endif /* - * x86_32: EAX = vector; EBX, ECX, EDX, ESI, EDI = args 1, 2, 3, 4, 5. - * EAX = return value - * (argument registers may be clobbered on return) - * x86_64: RAX = vector; RDI, RSI, RDX, R10, R8, R9 = args 1, 2, 3, 4, 5, 6. - * RAX = return value - * (argument registers not clobbered on return; RCX, R11 are) + * HYPERCALLS */ + #define __HYPERVISOR_set_trap_table 0 #define __HYPERVISOR_mmu_update 1 #define __HYPERVISOR_set_gdt 2 #define __HYPERVISOR_stack_switch 3 #define __HYPERVISOR_set_callbacks 4 #define __HYPERVISOR_fpu_taskswitch 5 -#define __HYPERVISOR_sched_op 6 -#define __HYPERVISOR_dom0_op 7 +#define __HYPERVISOR_sched_op_compat 6 /* compat since 0x00030101 */ +#define __HYPERVISOR_platform_op 7 #define __HYPERVISOR_set_debugreg 8 #define __HYPERVISOR_get_debugreg 9 #define __HYPERVISOR_update_descriptor 10 @@ -39,10 +73,10 @@ #define __HYPERVISOR_multicall 13 #define __HYPERVISOR_update_va_mapping 14 #define __HYPERVISOR_set_timer_op 15 -#define __HYPERVISOR_event_channel_op_compat 16 +#define __HYPERVISOR_event_channel_op_compat 16 /* compat since 0x00030202 */ #define __HYPERVISOR_xen_version 17 #define __HYPERVISOR_console_io 18 -#define __HYPERVISOR_physdev_op_compat 19 +#define __HYPERVISOR_physdev_op_compat 19 /* compat since 0x00030202 */ #define __HYPERVISOR_grant_table_op 20 #define __HYPERVISOR_vm_assist 21 #define __HYPERVISOR_update_va_mapping_otherdomain 22 @@ -50,7 +84,7 @@ #define __HYPERVISOR_vcpu_op 24 #define __HYPERVISOR_set_segment_base 25 /* x86/64 only */ #define __HYPERVISOR_mmuext_op 26 -#define __HYPERVISOR_acm_op 27 +#define __HYPERVISOR_xsm_op 27 #define __HYPERVISOR_nmi_op 28 #define __HYPERVISOR_sched_op_new 29 #define __HYPERVISOR_callback_op 30 @@ -58,6 +92,10 @@ #define __HYPERVISOR_event_channel_op 32 #define __HYPERVISOR_physdev_op 33 #define __HYPERVISOR_hvm_op 34 +#define __HYPERVISOR_sysctl 35 +#define __HYPERVISOR_domctl 36 +#define __HYPERVISOR_kexec_op 37 +#define __HYPERVISOR_tmem_op 38 /* Architecture-specific hypercall definitions. */ #define __HYPERVISOR_arch_0 48 @@ -70,15 +108,50 @@ #define __HYPERVISOR_arch_7 55 /* + * HYPERCALL COMPATIBILITY. + */ + +/* New sched_op hypercall introduced in 0x00030101. */ +#if __XEN_INTERFACE_VERSION__ < 0x00030101 +#undef __HYPERVISOR_sched_op +#define __HYPERVISOR_sched_op __HYPERVISOR_sched_op_compat +#else +#define __HYPERVISOR_sched_op __HYPERVISOR_sched_op_new +#endif + +/* New event-channel and physdev hypercalls introduced in 0x00030202. */ +#if __XEN_INTERFACE_VERSION__ < 0x00030202 +#undef __HYPERVISOR_event_channel_op +#define __HYPERVISOR_event_channel_op __HYPERVISOR_event_channel_op_compat +#undef __HYPERVISOR_physdev_op +#define __HYPERVISOR_physdev_op __HYPERVISOR_physdev_op_compat +#endif + +/* New platform_op hypercall introduced in 0x00030204. */ +#if __XEN_INTERFACE_VERSION__ < 0x00030204 +#define __HYPERVISOR_dom0_op __HYPERVISOR_platform_op +#endif + +/* * VIRTUAL INTERRUPTS * * Virtual interrupts that a guest OS may receive from Xen. - */ -#define VIRQ_TIMER 0 /* Timebase update, and/or requested timeout. */ -#define VIRQ_DEBUG 1 /* Request guest to dump debug info. */ -#define VIRQ_CONSOLE 2 /* (DOM0) Bytes received on emergency console. */ -#define VIRQ_DOM_EXC 3 /* (DOM0) Exceptional event for some domain. */ -#define VIRQ_DEBUGGER 6 /* (DOM0) A domain has paused for debugging. */ + * + * In the side comments, 'V.' denotes a per-VCPU VIRQ while 'G.' denotes a + * global VIRQ. The former can be bound once per VCPU and cannot be re-bound. + * The latter can be allocated only once per guest: they must initially be + * allocated to VCPU0 but can subsequently be re-bound. + */ +#define VIRQ_TIMER 0 /* V. Timebase update, and/or requested timeout. */ +#define VIRQ_DEBUG 1 /* V. Request guest to dump debug info. */ +#define VIRQ_CONSOLE 2 /* G. (DOM0) Bytes received on emergency console. */ +#define VIRQ_DOM_EXC 3 /* G. (DOM0) Exceptional event for some domain. */ +#define VIRQ_TBUF 4 /* G. (DOM0) Trace buffer has records available. */ +#define VIRQ_DEBUGGER 6 /* G. (DOM0) A domain has paused for debugging. */ +#define VIRQ_XENOPROF 7 /* V. XenOprofile interrupt: new sample available */ +#define VIRQ_CON_RING 8 /* G. (DOM0) Bytes received on console */ +#define VIRQ_PCPU_STATE 9 /* G. (DOM0) PCPU state changed */ +#define VIRQ_MEM_EVENT 10 /* G. (DOM0) A memory event has occured */ /* Architecture-specific VIRQ definitions. */ #define VIRQ_ARCH_0 16 @@ -91,19 +164,28 @@ #define VIRQ_ARCH_7 23 #define NR_VIRQS 24 + /* - * MMU-UPDATE REQUESTS + * HYPERVISOR_mmu_update(reqs, count, pdone, foreigndom) * - * HYPERVISOR_mmu_update() accepts a list of (ptr, val) pairs. - * A foreigndom (FD) can be specified (or DOMID_SELF for none). - * Where the FD has some effect, it is described below. - * ptr[1:0] specifies the appropriate MMU_* command. + * @reqs is an array of mmu_update_t structures ((ptr, val) pairs). + * @count is the length of the above array. + * @pdone is an output parameter indicating number of completed operations + * @foreigndom[15:0]: FD, the expected owner of data pages referenced in this + * hypercall invocation. Can be DOMID_SELF. + * @foreigndom[31:16]: PFD, the expected owner of pagetable pages referenced + * in this hypercall invocation. The value of this field + * (x) encodes the PFD as follows: + * x == 0 => PFD == DOMID_SELF + * x != 0 => PFD == x - 1 * + * Sub-commands: ptr[1:0] specifies the appropriate MMU_* command. + * ------------- * ptr[1:0] == MMU_NORMAL_PT_UPDATE: - * Updates an entry in a page table. If updating an L1 table, and the new - * table entry is valid/present, the mapped frame must belong to the FD, if - * an FD has been specified. If attempting to map an I/O page then the - * caller assumes the privilege of the FD. + * Updates an entry in a page table belonging to PFD. If updating an L1 table, + * and the new table entry is valid/present, the mapped frame must belong to + * FD. If attempting to map an I/O page then the caller assumes the privilege + * of the FD. * FD == DOMID_IO: Permit /only/ I/O mappings, at the priv level of the caller. * FD == DOMID_XEN: Map restricted areas of Xen's heap space. * ptr[:2] -- Machine address of the page-table entry to modify. @@ -119,8 +201,8 @@ * As MMU_NORMAL_PT_UPDATE above, but A/D bits currently in the PTE are ORed * with those in @val. */ -#define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */ -#define MMU_MACHPHYS_UPDATE 1 /* ptr = MA of frame to modify entry for */ +#define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */ +#define MMU_MACHPHYS_UPDATE 1 /* ptr = MA of frame to modify entry for */ #define MMU_PT_UPDATE_PRESERVE_AD 2 /* atomically: *ptr = val | (*ptr&(A|D)) */ /* @@ -163,9 +245,20 @@ * cmd: MMUEXT_FLUSH_CACHE * No additional arguments. Writes back and flushes cache contents. * + * cmd: MMUEXT_FLUSH_CACHE_GLOBAL + * No additional arguments. Writes back and flushes cache contents + * on all CPUs in the system. + * * cmd: MMUEXT_SET_LDT * linear_addr: Linear address of LDT base (NB. must be page-aligned). * nr_ents: Number of entries in LDT. + * + * cmd: MMUEXT_CLEAR_PAGE + * mfn: Machine frame number to be cleared. + * + * cmd: MMUEXT_COPY_PAGE + * mfn: Machine frame number of the destination page. + * src_mfn: Machine frame number of the source page. */ #define MMUEXT_PIN_L1_TABLE 0 #define MMUEXT_PIN_L2_TABLE 1 @@ -182,24 +275,35 @@ #define MMUEXT_FLUSH_CACHE 12 #define MMUEXT_SET_LDT 13 #define MMUEXT_NEW_USER_BASEPTR 15 +#define MMUEXT_CLEAR_PAGE 16 +#define MMUEXT_COPY_PAGE 17 +#define MMUEXT_FLUSH_CACHE_GLOBAL 18 #ifndef __ASSEMBLY__ struct mmuext_op { - unsigned int cmd; - union { - /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR */ - unsigned long mfn; - /* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */ - unsigned long linear_addr; - } arg1; - union { - /* SET_LDT */ - unsigned int nr_ents; - /* TLB_FLUSH_MULTI, INVLPG_MULTI */ - void *vcpumask; - } arg2; + unsigned int cmd; + union { + /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR + * CLEAR_PAGE, COPY_PAGE */ + xen_pfn_t mfn; + /* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */ + unsigned long linear_addr; + } arg1; + union { + /* SET_LDT */ + unsigned int nr_ents; + /* TLB_FLUSH_MULTI, INVLPG_MULTI */ +#if __XEN_INTERFACE_VERSION__ >= 0x00030205 + XEN_GUEST_HANDLE(const_void) vcpumask; +#else + const void *vcpumask; +#endif + /* COPY_PAGE */ + xen_pfn_t src_mfn; + } arg2; }; -DEFINE_GUEST_HANDLE_STRUCT(mmuext_op); +typedef struct mmuext_op mmuext_op_t; +DEFINE_XEN_GUEST_HANDLE(mmuext_op_t); #endif /* These are passed as 'flags' to update_va_mapping. They can be ORed. */ @@ -224,11 +328,24 @@ DEFINE_GUEST_HANDLE_STRUCT(mmuext_op); */ #define VMASST_CMD_enable 0 #define VMASST_CMD_disable 1 + +/* x86/32 guests: simulate full 4GB segment limits. */ #define VMASST_TYPE_4gb_segments 0 + +/* x86/32 guests: trap (vector 15) whenever above vmassist is used. */ #define VMASST_TYPE_4gb_segments_notify 1 + +/* + * x86 guests: support writes to bottom-level PTEs. + * NB1. Page-directory entries cannot be written. + * NB2. Guest must continue to remove all writable mappings of PTEs. + */ #define VMASST_TYPE_writable_pagetables 2 + +/* x86/PAE guests: support PDPTs above 4GB. */ #define VMASST_TYPE_pae_extended_cr3 3 -#define MAX_VMASST_TYPE 3 + +#define MAX_VMASST_TYPE 3 #ifndef __ASSEMBLY__ @@ -260,6 +377,13 @@ typedef uint16_t domid_t; #define DOMID_XEN (0x7FF2U) /* + * DOMID_COW is used as the owner of sharable pages */ +#define DOMID_COW (0x7FF3U) + +/* DOMID_INVALID is used to identity invalid domid */ +#define DOMID_INVALID (0x7FFFU) + +/* * Send an array of these to HYPERVISOR_mmu_update(). * NB. The fields are natural pointer/address size for this architecture. */ @@ -267,18 +391,19 @@ struct mmu_update { uint64_t ptr; /* Machine address of PTE. */ uint64_t val; /* New contents of PTE. */ }; -DEFINE_GUEST_HANDLE_STRUCT(mmu_update); +typedef struct mmu_update mmu_update_t; +DEFINE_XEN_GUEST_HANDLE(mmu_update_t); /* * Send an array of these to HYPERVISOR_multicall(). * NB. The fields are natural register size for this architecture. */ struct multicall_entry { - unsigned long op; - long result; + unsigned long op, result; unsigned long args[6]; }; -DEFINE_GUEST_HANDLE_STRUCT(multicall_entry); +typedef struct multicall_entry multicall_entry_t; +DEFINE_XEN_GUEST_HANDLE(multicall_entry_t); /* * Event channel endpoints per domain: @@ -287,173 +412,271 @@ DEFINE_GUEST_HANDLE_STRUCT(multicall_ent #define NR_EVENT_CHANNELS (sizeof(unsigned long) * sizeof(unsigned long) * 64) struct vcpu_time_info { - /* - * Updates to the following values are preceded and followed - * by an increment of 'version'. The guest can therefore - * detect updates by looking for changes to 'version'. If the - * least-significant bit of the version number is set then an - * update is in progress and the guest must wait to read a - * consistent set of values. The correct way to interact with - * the version number is similar to Linux's seqlock: see the - * implementations of read_seqbegin/read_seqretry. - */ - uint32_t version; - uint32_t pad0; - uint64_t tsc_timestamp; /* TSC at last update of time vals. */ - uint64_t system_time; /* Time, in nanosecs, since boot. */ - /* - * Current system time: - * system_time + ((tsc - tsc_timestamp) << tsc_shift) * tsc_to_system_mul - * CPU frequency (Hz): - * ((10^9 << 32) / tsc_to_system_mul) >> tsc_shift - */ - uint32_t tsc_to_system_mul; - int8_t tsc_shift; - int8_t pad1[3]; + /* + * Updates to the following values are preceded and followed by an + * increment of 'version'. The guest can therefore detect updates by + * looking for changes to 'version'. If the least-significant bit of + * the version number is set then an update is in progress and the guest + * must wait to read a consistent set of values. + * The correct way to interact with the version number is similar to + * Linux's seqlock: see the implementations of read_seqbegin/read_seqretry. + */ + uint32_t version; + uint32_t pad0; + uint64_t tsc_timestamp; /* TSC at last update of time vals. */ + uint64_t system_time; /* Time, in nanosecs, since boot. */ + /* + * Current system time: + * system_time + + * ((((tsc - tsc_timestamp) << tsc_shift) * tsc_to_system_mul) >> 32) + * CPU frequency (Hz): + * ((10^9 << 32) / tsc_to_system_mul) >> tsc_shift + */ + uint32_t tsc_to_system_mul; + int8_t tsc_shift; + int8_t pad1[3]; }; /* 32 bytes */ +typedef struct vcpu_time_info vcpu_time_info_t; struct vcpu_info { - /* - * 'evtchn_upcall_pending' is written non-zero by Xen to indicate - * a pending notification for a particular VCPU. It is then cleared - * by the guest OS /before/ checking for pending work, thus avoiding - * a set-and-check race. Note that the mask is only accessed by Xen - * on the CPU that is currently hosting the VCPU. This means that the - * pending and mask flags can be updated by the guest without special - * synchronisation (i.e., no need for the x86 LOCK prefix). - * This may seem suboptimal because if the pending flag is set by - * a different CPU then an IPI may be scheduled even when the mask - * is set. However, note: - * 1. The task of 'interrupt holdoff' is covered by the per-event- - * channel mask bits. A 'noisy' event that is continually being - * triggered can be masked at source at this very precise - * granularity. - * 2. The main purpose of the per-VCPU mask is therefore to restrict - * reentrant execution: whether for concurrency control, or to - * prevent unbounded stack usage. Whatever the purpose, we expect - * that the mask will be asserted only for short periods at a time, - * and so the likelihood of a 'spurious' IPI is suitably small. - * The mask is read before making an event upcall to the guest: a - * non-zero mask therefore guarantees that the VCPU will not receive - * an upcall activation. The mask is cleared when the VCPU requests - * to block: this avoids wakeup-waiting races. - */ - uint8_t evtchn_upcall_pending; - uint8_t evtchn_upcall_mask; - unsigned long evtchn_pending_sel; - struct arch_vcpu_info arch; - struct pvclock_vcpu_time_info time; + /* + * 'evtchn_upcall_pending' is written non-zero by Xen to indicate + * a pending notification for a particular VCPU. It is then cleared + * by the guest OS /before/ checking for pending work, thus avoiding + * a set-and-check race. Note that the mask is only accessed by Xen + * on the CPU that is currently hosting the VCPU. This means that the + * pending and mask flags can be updated by the guest without special + * synchronisation (i.e., no need for the x86 LOCK prefix). + * This may seem suboptimal because if the pending flag is set by + * a different CPU then an IPI may be scheduled even when the mask + * is set. However, note: + * 1. The task of 'interrupt holdoff' is covered by the per-event- + * channel mask bits. A 'noisy' event that is continually being + * triggered can be masked at source at this very precise + * granularity. + * 2. The main purpose of the per-VCPU mask is therefore to restrict + * reentrant execution: whether for concurrency control, or to + * prevent unbounded stack usage. Whatever the purpose, we expect + * that the mask will be asserted only for short periods at a time, + * and so the likelihood of a 'spurious' IPI is suitably small. + * The mask is read before making an event upcall to the guest: a + * non-zero mask therefore guarantees that the VCPU will not receive + * an upcall activation. The mask is cleared when the VCPU requests + * to block: this avoids wakeup-waiting races. + */ + uint8_t evtchn_upcall_pending; + uint8_t evtchn_upcall_mask; + unsigned long evtchn_pending_sel; + struct arch_vcpu_info arch; +#ifdef CONFIG_PARAVIRT_XEN + struct pvclock_vcpu_time_info time; +#else + struct vcpu_time_info time; +#endif }; /* 64 bytes (x86) */ +#ifndef __XEN__ +typedef struct vcpu_info vcpu_info_t; +#endif /* * Xen/kernel shared data -- pointer provided in start_info. - * NB. We expect that this struct is smaller than a page. + * + * This structure is defined to be both smaller than a page, and the + * only data on the shared page, but may vary in actual size even within + * compatible Xen versions; guests should not rely on the size + * of this structure remaining constant. */ struct shared_info { - struct vcpu_info vcpu_info[MAX_VIRT_CPUS]; + struct vcpu_info vcpu_info[XEN_LEGACY_MAX_VCPUS]; - /* - * A domain can create "event channels" on which it can send and receive - * asynchronous event notifications. There are three classes of event that - * are delivered by this mechanism: - * 1. Bi-directional inter- and intra-domain connections. Domains must - * arrange out-of-band to set up a connection (usually by allocating - * an unbound 'listener' port and avertising that via a storage service - * such as xenstore). - * 2. Physical interrupts. A domain with suitable hardware-access - * privileges can bind an event-channel port to a physical interrupt - * source. - * 3. Virtual interrupts ('events'). A domain can bind an event-channel - * port to a virtual interrupt source, such as the virtual-timer - * device or the emergency console. - * - * Event channels are addressed by a "port index". Each channel is - * associated with two bits of information: - * 1. PENDING -- notifies the domain that there is a pending notification - * to be processed. This bit is cleared by the guest. - * 2. MASK -- if this bit is clear then a 0->1 transition of PENDING - * will cause an asynchronous upcall to be scheduled. This bit is only - * updated by the guest. It is read-only within Xen. If a channel - * becomes pending while the channel is masked then the 'edge' is lost - * (i.e., when the channel is unmasked, the guest must manually handle - * pending notifications as no upcall will be scheduled by Xen). - * - * To expedite scanning of pending notifications, any 0->1 pending - * transition on an unmasked channel causes a corresponding bit in a - * per-vcpu selector word to be set. Each bit in the selector covers a - * 'C long' in the PENDING bitfield array. - */ - unsigned long evtchn_pending[sizeof(unsigned long) * 8]; - unsigned long evtchn_mask[sizeof(unsigned long) * 8]; - - /* - * Wallclock time: updated only by control software. Guests should base - * their gettimeofday() syscall on this wallclock-base value. - */ - struct pvclock_wall_clock wc; + /* + * A domain can create "event channels" on which it can send and receive + * asynchronous event notifications. There are three classes of event that + * are delivered by this mechanism: + * 1. Bi-directional inter- and intra-domain connections. Domains must + * arrange out-of-band to set up a connection (usually by allocating + * an unbound 'listener' port and avertising that via a storage service + * such as xenstore). + * 2. Physical interrupts. A domain with suitable hardware-access + * privileges can bind an event-channel port to a physical interrupt + * source. + * 3. Virtual interrupts ('events'). A domain can bind an event-channel + * port to a virtual interrupt source, such as the virtual-timer + * device or the emergency console. + * + * Event channels are addressed by a "port index". Each channel is + * associated with two bits of information: + * 1. PENDING -- notifies the domain that there is a pending notification + * to be processed. This bit is cleared by the guest. + * 2. MASK -- if this bit is clear then a 0->1 transition of PENDING + * will cause an asynchronous upcall to be scheduled. This bit is only + * updated by the guest. It is read-only within Xen. If a channel + * becomes pending while the channel is masked then the 'edge' is lost + * (i.e., when the channel is unmasked, the guest must manually handle + * pending notifications as no upcall will be scheduled by Xen). + * + * To expedite scanning of pending notifications, any 0->1 pending + * transition on an unmasked channel causes a corresponding bit in a + * per-vcpu selector word to be set. Each bit in the selector covers a + * 'C long' in the PENDING bitfield array. + */ + unsigned long evtchn_pending[sizeof(unsigned long) * 8]; + unsigned long evtchn_mask[sizeof(unsigned long) * 8]; + + /* + * Wallclock time: updated only by control software. Guests should base + * their gettimeofday() syscall on this wallclock-base value. + */ +#ifdef CONFIG_PARAVIRT_XEN + struct pvclock_wall_clock wc; +#else + uint32_t wc_version; /* Version counter: see vcpu_time_info_t. */ + uint32_t wc_sec; /* Secs 00:00:00 UTC, Jan 1, 1970. */ + uint32_t wc_nsec; /* Nsecs 00:00:00 UTC, Jan 1, 1970. */ +#endif - struct arch_shared_info arch; + struct arch_shared_info arch; }; +#ifndef __XEN__ +typedef struct shared_info shared_info_t; +#endif /* - * Start-of-day memory layout for the initial domain (DOM0): + * Start-of-day memory layout: * 1. The domain is started within contiguous virtual-memory region. - * 2. The contiguous region begins and ends on an aligned 4MB boundary. - * 3. The region start corresponds to the load address of the OS image. - * If the load address is not 4MB aligned then the address is rounded down. - * 4. This the order of bootstrap elements in the initial virtual region: + * 2. The contiguous region ends on an aligned 4MB boundary. + * 3. This the order of bootstrap elements in the initial virtual region: * a. relocated kernel image * b. initial ram disk [mod_start, mod_len] * c. list of allocated page frames [mfn_list, nr_pages] + * (unless relocated due to XEN_ELFNOTE_INIT_P2M) * d. start_info_t structure [register ESI (x86)] * e. bootstrap page tables [pt_base, CR3 (x86)] * f. bootstrap stack [register ESP (x86)] - * 5. Bootstrap elements are packed together, but each is 4kB-aligned. - * 6. The initial ram disk may be omitted. - * 7. The list of page frames forms a contiguous 'pseudo-physical' memory + * 4. Bootstrap elements are packed together, but each is 4kB-aligned. + * 5. The initial ram disk may be omitted. + * 6. The list of page frames forms a contiguous 'pseudo-physical' memory * layout for the domain. In particular, the bootstrap virtual-memory * region is a 1:1 mapping to the first section of the pseudo-physical map. - * 8. All bootstrap elements are mapped read-writable for the guest OS. The + * 7. All bootstrap elements are mapped read-writable for the guest OS. The * only exception is the bootstrap page table, which is mapped read-only. - * 9. There is guaranteed to be at least 512kB padding after the final + * 8. There is guaranteed to be at least 512kB padding after the final * bootstrap element. If necessary, the bootstrap virtual region is * extended by an extra 4MB to ensure this. */ #define MAX_GUEST_CMDLINE 1024 struct start_info { - /* THE FOLLOWING ARE FILLED IN BOTH ON INITIAL BOOT AND ON RESUME. */ - char magic[32]; /* "xen--". */ - unsigned long nr_pages; /* Total pages allocated to this domain. */ - unsigned long shared_info; /* MACHINE address of shared info struct. */ - uint32_t flags; /* SIF_xxx flags. */ - unsigned long store_mfn; /* MACHINE page number of shared page. */ - uint32_t store_evtchn; /* Event channel for store communication. */ - union { - struct { - unsigned long mfn; /* MACHINE page number of console page. */ - uint32_t evtchn; /* Event channel for console page. */ - } domU; - struct { - uint32_t info_off; /* Offset of console_info struct. */ - uint32_t info_size; /* Size of console_info struct from start.*/ - } dom0; - } console; - /* THE FOLLOWING ARE ONLY FILLED IN ON INITIAL BOOT (NOT RESUME). */ - unsigned long pt_base; /* VIRTUAL address of page directory. */ - unsigned long nr_pt_frames; /* Number of bootstrap p.t. frames. */ - unsigned long mfn_list; /* VIRTUAL address of page-frame list. */ - unsigned long mod_start; /* VIRTUAL address of pre-loaded module. */ - unsigned long mod_len; /* Size (bytes) of pre-loaded module. */ - int8_t cmd_line[MAX_GUEST_CMDLINE]; + /* THE FOLLOWING ARE FILLED IN BOTH ON INITIAL BOOT AND ON RESUME. */ + char magic[32]; /* "xen--". */ + unsigned long nr_pages; /* Total pages allocated to this domain. */ + unsigned long shared_info; /* MACHINE address of shared info struct. */ + uint32_t flags; /* SIF_xxx flags. */ + xen_pfn_t store_mfn; /* MACHINE page number of shared page. */ + uint32_t store_evtchn; /* Event channel for store communication. */ + union { + struct { + xen_pfn_t mfn; /* MACHINE page number of console page. */ + uint32_t evtchn; /* Event channel for console page. */ + } domU; + struct { + uint32_t info_off; /* Offset of console_info struct. */ + uint32_t info_size; /* Size of console_info struct from start.*/ + } dom0; + } console; + /* THE FOLLOWING ARE ONLY FILLED IN ON INITIAL BOOT (NOT RESUME). */ + unsigned long pt_base; /* VIRTUAL address of page directory. */ + unsigned long nr_pt_frames; /* Number of bootstrap p.t. frames. */ + unsigned long mfn_list; /* VIRTUAL address of page-frame list. */ + unsigned long mod_start; /* VIRTUAL address of pre-loaded module. */ + unsigned long mod_len; /* Size (bytes) of pre-loaded module. */ + int8_t cmd_line[MAX_GUEST_CMDLINE]; + /* The pfn range here covers both page table and p->m table frames. */ + unsigned long first_p2m_pfn;/* 1st pfn forming initial P->M table. */ + unsigned long nr_p2m_frames;/* # of pfns forming initial P->M table. */ }; +typedef struct start_info start_info_t; + +/* New console union for dom0 introduced in 0x00030203. */ +#if __XEN_INTERFACE_VERSION__ < 0x00030203 +#define console_mfn console.domU.mfn +#define console_evtchn console.domU.evtchn +#endif /* These flags are passed in the 'flags' field of start_info_t. */ #define SIF_PRIVILEGED (1<<0) /* Is the domain privileged? */ #define SIF_INITDOMAIN (1<<1) /* Is this the initial control domain? */ +#define SIF_MULTIBOOT_MOD (1<<2) /* Is mod_start a multiboot module? */ +#define SIF_PM_MASK (0xFF<<8) /* reserve 1 byte for xen-pm options */ -typedef uint64_t cpumap_t; +/* + * A multiboot module is a package containing modules very similar to a + * multiboot module array. The only differences are: + * - the array of module descriptors is by convention simply at the beginning + * of the multiboot module, + * - addresses in the module descriptors are based on the beginning of the + * multiboot module, + * - the number of modules is determined by a termination descriptor that has + * mod_start == 0. + * + * This permits to both build it statically and reference it in a configuration + * file, and let the PV guest easily rebase the addresses to virtual addresses + * and at the same time count the number of modules. + */ +struct xen_multiboot_mod_list +{ + /* Address of first byte of the module */ + uint32_t mod_start; + /* Address of last byte of the module (inclusive) */ + uint32_t mod_end; + /* Address of zero-terminated command line */ + uint32_t cmdline; + /* Unused, must be zero */ + uint32_t pad; +}; + +typedef struct dom0_vga_console_info { + uint8_t video_type; /* DOM0_VGA_CONSOLE_??? */ +#define XEN_VGATYPE_TEXT_MODE_3 0x03 +#define XEN_VGATYPE_VESA_LFB 0x23 + + union { + struct { + /* Font height, in pixels. */ + uint16_t font_height; + /* Cursor location (column, row). */ + uint16_t cursor_x, cursor_y; + /* Number of rows and columns (dimensions in characters). */ + uint16_t rows, columns; + } text_mode_3; + + struct { + /* Width and height, in pixels. */ + uint16_t width, height; + /* Bytes per scan line. */ + uint16_t bytes_per_line; + /* Bits per pixel. */ + uint16_t bits_per_pixel; + /* LFB physical address, and size (in units of 64kB). */ + uint32_t lfb_base; + uint32_t lfb_size; + /* RGB mask offsets and sizes, as defined by VBE 1.2+ */ + uint8_t red_pos, red_size; + uint8_t green_pos, green_size; + uint8_t blue_pos, blue_size; + uint8_t rsvd_pos, rsvd_size; +#if __XEN_INTERFACE_VERSION__ >= 0x00030206 + /* VESA capabilities (offset 0xa, VESA command 0x4f00). */ + uint32_t gbl_caps; + /* Mode attributes (offset 0x0, VESA command 0x4f01). */ + uint16_t mode_attrs; +#endif + } vesa_lfb; + } u; +} dom0_vga_console_info_t; +#define xen_vga_console_info dom0_vga_console_info +#define xen_vga_console_info_t dom0_vga_console_info_t typedef uint8_t xen_domain_handle_t[16]; @@ -461,6 +684,11 @@ typedef uint8_t xen_domain_handle_t[16]; #define __mk_unsigned_long(x) x ## UL #define mk_unsigned_long(x) __mk_unsigned_long(x) +__DEFINE_XEN_GUEST_HANDLE(uint8, uint8_t); +__DEFINE_XEN_GUEST_HANDLE(uint16, uint16_t); +__DEFINE_XEN_GUEST_HANDLE(uint32, uint32_t); +__DEFINE_XEN_GUEST_HANDLE(uint64, uint64_t); + #else /* __ASSEMBLY__ */ /* In assembly code we cannot use C numeric constant suffixes. */ @@ -468,4 +696,14 @@ typedef uint8_t xen_domain_handle_t[16]; #endif /* !__ASSEMBLY__ */ +/* Default definitions for macros used by domctl/sysctl. */ +#if defined(__XEN__) || defined(__XEN_TOOLS__) +#ifndef uint64_aligned_t +#define uint64_aligned_t uint64_t +#endif +#ifndef XEN_GUEST_HANDLE_64 +#define XEN_GUEST_HANDLE_64(name) XEN_GUEST_HANDLE(name) +#endif +#endif + #endif /* __XEN_PUBLIC_XEN_H__ */ --- head-2010-05-12.orig/include/xen/xenbus.h 2010-05-12 08:55:24.000000000 +0200 +++ head-2010-05-12/include/xen/xenbus.h 2010-01-19 16:01:04.000000000 +0100 @@ -39,7 +39,7 @@ #include #include #include -#include +#include #include #include #include @@ -56,8 +56,17 @@ struct xenbus_watch /* Callback (executed in a process context with no locks held). */ void (*callback)(struct xenbus_watch *, const char **vec, unsigned int len); + + /* See XBWF_ definitions below. */ + unsigned long flags; }; +/* + * Execute callback in its own kthread. Useful if the callback is long + * running or heavily serialised, to avoid taking out the main xenwatch thread + * for a long period of time (or even unwittingly causing a deadlock). + */ +#define XBWF_new_thread 1 /* A xenbus device. */ struct xenbus_device { @@ -92,7 +101,8 @@ struct xenbus_driver { void (*otherend_changed)(struct xenbus_device *dev, enum xenbus_state backend_state); int (*remove)(struct xenbus_device *dev); - int (*suspend)(struct xenbus_device *dev, pm_message_t state); + int (*suspend)(struct xenbus_device *dev); + int (*suspend_cancel)(struct xenbus_device *dev); int (*resume)(struct xenbus_device *dev); int (*uevent)(struct xenbus_device *, char **, int, char *, int); struct device_driver driver; @@ -105,27 +115,8 @@ static inline struct xenbus_driver *to_x return container_of(drv, struct xenbus_driver, driver); } -int __must_check __xenbus_register_frontend(struct xenbus_driver *drv, - struct module *owner, - const char *mod_name); - -static inline int __must_check -xenbus_register_frontend(struct xenbus_driver *drv) -{ - WARN_ON(drv->owner != THIS_MODULE); - return __xenbus_register_frontend(drv, THIS_MODULE, KBUILD_MODNAME); -} - -int __must_check __xenbus_register_backend(struct xenbus_driver *drv, - struct module *owner, - const char *mod_name); -static inline int __must_check -xenbus_register_backend(struct xenbus_driver *drv) -{ - WARN_ON(drv->owner != THIS_MODULE); - return __xenbus_register_backend(drv, THIS_MODULE, KBUILD_MODNAME); -} - +int xenbus_register_frontend(struct xenbus_driver *drv); +int xenbus_register_backend(struct xenbus_driver *drv); void xenbus_unregister_driver(struct xenbus_driver *drv); struct xenbus_transaction @@ -165,7 +156,6 @@ int xenbus_printf(struct xenbus_transact int xenbus_gather(struct xenbus_transaction t, const char *dir, ...); /* notifer routines for when the xenstore comes up */ -extern int xenstored_ready; int register_xenstore_notifier(struct notifier_block *nb); void unregister_xenstore_notifier(struct notifier_block *nb); @@ -178,12 +168,9 @@ void xs_suspend_cancel(void); /* Used by xenbus_dev to borrow kernel's store connection. */ void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg); -struct work_struct; - /* Prepare for domain suspend: then resume or cancel the suspend. */ void xenbus_suspend(void); void xenbus_resume(void); -void xenbus_probe(struct work_struct *); void xenbus_suspend_cancel(void); #define XENBUS_IS_ERR_READ(str) ({ \ @@ -196,38 +183,125 @@ void xenbus_suspend_cancel(void); #define XENBUS_EXIST_ERR(err) ((err) == -ENOENT || (err) == -ERANGE) + +/** + * Register a watch on the given path, using the given xenbus_watch structure + * for storage, and the given callback function as the callback. Return 0 on + * success, or -errno on error. On success, the given path will be saved as + * watch->node, and remains the caller's to free. On error, watch->node will + * be NULL, the device will switch to XenbusStateClosing, and the error will + * be saved in the store. + */ int xenbus_watch_path(struct xenbus_device *dev, const char *path, struct xenbus_watch *watch, void (*callback)(struct xenbus_watch *, const char **, unsigned int)); -int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch, - void (*callback)(struct xenbus_watch *, - const char **, unsigned int), - const char *pathfmt, ...) - __attribute__ ((format (printf, 4, 5))); + +/** + * Register a watch on the given path/path2, using the given xenbus_watch + * structure for storage, and the given callback function as the callback. + * Return 0 on success, or -errno on error. On success, the watched path + * (path/path2) will be saved as watch->node, and becomes the caller's to + * kfree(). On error, watch->node will be NULL, so the caller has nothing to + * free, the device will switch to XenbusStateClosing, and the error will be + * saved in the store. + */ +int xenbus_watch_path2(struct xenbus_device *dev, const char *path, + const char *path2, struct xenbus_watch *watch, + void (*callback)(struct xenbus_watch *, + const char **, unsigned int)); + + +/** + * Advertise in the store a change of the given driver to the given new_state. + * Return 0 on success, or -errno on error. On error, the device will switch + * to XenbusStateClosing, and the error will be saved in the store. + */ int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state new_state); + + +/** + * Grant access to the given ring_mfn to the peer of the given device. Return + * 0 on success, or -errno on error. On error, the device will switch to + * XenbusStateClosing, and the error will be saved in the store. + */ int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn); -int xenbus_map_ring_valloc(struct xenbus_device *dev, - int gnt_ref, void **vaddr); + + +/** + * Map a page of memory into this domain from another domain's grant table. + * xenbus_map_ring_valloc allocates a page of virtual address space, maps the + * page to that address, and sets *vaddr to that address. + * xenbus_map_ring does not allocate the virtual address space (you must do + * this yourself!). It only maps in the page to the specified address. + * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h) + * or -ENOMEM on error. If an error is returned, device will switch to + * XenbusStateClosing and the error message will be saved in XenStore. + */ +struct vm_struct *xenbus_map_ring_valloc(struct xenbus_device *dev, + int gnt_ref); int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref, grant_handle_t *handle, void *vaddr); -int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr); + +/** + * Unmap a page of memory in this domain that was imported from another domain. + * Use xenbus_unmap_ring_vfree if you mapped in your memory with + * xenbus_map_ring_valloc (it will free the virtual address space). + * Returns 0 on success and returns GNTST_* on error + * (see xen/include/interface/grant_table.h). + */ +int xenbus_unmap_ring_vfree(struct xenbus_device *dev, struct vm_struct *); int xenbus_unmap_ring(struct xenbus_device *dev, grant_handle_t handle, void *vaddr); + +/** + * Allocate an event channel for the given xenbus_device, assigning the newly + * created local port to *port. Return 0 on success, or -errno on error. On + * error, the device will switch to XenbusStateClosing, and the error will be + * saved in the store. + */ int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port); -int xenbus_bind_evtchn(struct xenbus_device *dev, int remote_port, int *port); + + +/** + * Free an existing event channel. Returns 0 on success or -errno on error. + */ int xenbus_free_evtchn(struct xenbus_device *dev, int port); + +/** + * Return the state of the driver rooted at the given store path, or + * XenbusStateUnknown if no state can be read. + */ enum xenbus_state xenbus_read_driver_state(const char *path); -void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, ...); -void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, ...); + +/*** + * Report the given negative errno into the store, along with the given + * formatted message. + */ +void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, + ...); + + +/*** + * Equivalent to xenbus_dev_error(dev, err, fmt, args), followed by + * xenbus_switch_state(dev, NULL, XenbusStateClosing) to schedule an orderly + * closedown of this driver and its peer. + */ +void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, + ...); + +int xenbus_dev_init(void); const char *xenbus_strstate(enum xenbus_state state); int xenbus_dev_is_online(struct xenbus_device *dev); int xenbus_frontend_closed(struct xenbus_device *dev); +int xenbus_for_each_backend(void *arg, int (*fn)(struct device *, void *)); +int xenbus_for_each_frontend(void *arg, int (*fn)(struct device *, void *)); + #endif /* _XEN_XENBUS_H */