Subject: Fix Xen build wrt. Xen files coming from mainline. From: http://xenbits.xensource.com/linux-2.6.18-xen.hg (tip 1073:8fe973d8fb98) Patch-mainline: n/a Acked-by: jbeulich@novell.com --- head-2011-03-17.orig/drivers/xen/Makefile 2011-03-17 13:45:28.000000000 +0100 +++ head-2011-03-17/drivers/xen/Makefile 2011-01-31 15:14:12.000000000 +0100 @@ -1,22 +1,28 @@ -obj-y += grant-table.o features.o events.o manage.o +obj-y += core/ +obj-y += console/ +obj-y += evtchn/ obj-y += xenbus/ +obj-y += char/ -nostackp := $(call cc-option, -fno-stack-protector) -CFLAGS_features.o := $(nostackp) - -obj-$(CONFIG_BLOCK) += biomerge.o -obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o -obj-$(CONFIG_XEN_XENCOMM) += xencomm.o -obj-$(CONFIG_XEN_BALLOON) += balloon.o -obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o -obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o -obj-$(CONFIG_XENFS) += xenfs/ -obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o -obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o -obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o -obj-$(CONFIG_XEN_DOM0) += pci.o - -xen-evtchn-y := evtchn.o -xen-gntdev-y := gntdev.o - -xen-platform-pci-y := platform-pci.o +obj-y += util.o +obj-$(CONFIG_XEN_BALLOON) += balloon/ +obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/ +obj-$(CONFIG_XEN_BLKDEV_TAP) += blktap/ +obj-$(CONFIG_XEN_BLKDEV_TAP2) += blktap2/ +obj-$(CONFIG_XEN_NETDEV_BACKEND) += netback/ +obj-$(CONFIG_XEN_TPMDEV_BACKEND) += tpmback/ +obj-$(CONFIG_XEN_BLKDEV_FRONTEND) += blkfront/ +obj-$(CONFIG_XEN_NETDEV_FRONTEND) += netfront/ +obj-$(CONFIG_XEN_PCIDEV_BACKEND) += pciback/ +obj-$(CONFIG_XEN_PCIDEV_FRONTEND) += pcifront/ +obj-$(CONFIG_XEN_FRAMEBUFFER) += fbfront/ +obj-$(CONFIG_XEN_KEYBOARD) += fbfront/ +obj-$(CONFIG_XEN_SCSI_BACKEND) += scsiback/ +obj-$(CONFIG_XEN_SCSI_FRONTEND) += scsifront/ +obj-$(CONFIG_XEN_USB_BACKEND) += usbback/ +obj-$(CONFIG_XEN_USB_FRONTEND) += usbfront/ +obj-$(CONFIG_XEN_PRIVCMD) += privcmd/ +obj-$(CONFIG_XEN_GRANT_DEV) += gntdev/ +obj-$(CONFIG_XEN_NETDEV_ACCEL_SFC_UTIL) += sfc_netutil/ +obj-$(CONFIG_XEN_NETDEV_ACCEL_SFC_FRONTEND) += sfc_netfront/ +obj-$(CONFIG_XEN_NETDEV_ACCEL_SFC_BACKEND) += sfc_netback/ --- head-2011-03-17.orig/drivers/xen/xenbus/Makefile 2011-03-17 13:45:28.000000000 +0100 +++ head-2011-03-17/drivers/xen/xenbus/Makefile 2011-02-02 17:06:11.000000000 +0100 @@ -1,12 +1,9 @@ -obj-y += xenbus.o +obj-y += xenbus_client.o xenbus_comms.o xenbus_xs.o xenbus_probe.o +obj-$(CONFIG_XEN_BACKEND) += xenbus_be.o -xenbus-objs = -xenbus-objs += xenbus_client.o -xenbus-objs += xenbus_comms.o -xenbus-objs += xenbus_xs.o -xenbus-objs += xenbus_probe.o +xenbus_be-objs = +xenbus_be-objs += xenbus_backend_client.o -xenbus-be-objs-$(CONFIG_XEN_BACKEND) += xenbus_probe_backend.o -xenbus-objs += $(xenbus-be-objs-y) - -obj-$(CONFIG_XEN_XENBUS_FRONTEND) += xenbus_probe_frontend.o +xenbus-$(CONFIG_XEN_BACKEND) += xenbus_probe_backend.o +obj-y += $(xenbus-y) $(xenbus-m) +obj-$(CONFIG_XEN_XENBUS_DEV) += xenbus_dev.o --- head-2011-03-17.orig/drivers/xen/xenbus/xenbus_client.c 2011-03-17 13:45:28.000000000 +0100 +++ head-2011-03-17/drivers/xen/xenbus/xenbus_client.c 2011-01-31 15:14:12.000000000 +0100 @@ -31,14 +31,17 @@ */ #include -#include -#include -#include -#include -#include -#include -#include +#include +#include #include +#include + +#ifdef HAVE_XEN_PLATFORM_COMPAT_H +#include +#endif + +#define DPRINTK(fmt, args...) \ + pr_debug("xenbus_client (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args) const char *xenbus_strstate(enum xenbus_state state) { @@ -49,28 +52,14 @@ const char *xenbus_strstate(enum xenbus_ [ XenbusStateInitialised ] = "Initialised", [ XenbusStateConnected ] = "Connected", [ XenbusStateClosing ] = "Closing", - [ XenbusStateClosed ] = "Closed", - [XenbusStateReconfiguring] = "Reconfiguring", - [XenbusStateReconfigured] = "Reconfigured", + [ XenbusStateClosed ] = "Closed", + [ XenbusStateReconfiguring ] = "Reconfiguring", + [ XenbusStateReconfigured ] = "Reconfigured", }; return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID"; } EXPORT_SYMBOL_GPL(xenbus_strstate); -/** - * xenbus_watch_path - register a watch - * @dev: xenbus device - * @path: path to watch - * @watch: watch to register - * @callback: callback to register - * - * Register a @watch on the given path, using the given xenbus_watch structure - * for storage, and the given @callback function as the callback. Return 0 on - * success, or -errno on error. On success, the given @path will be saved as - * @watch->node, and remains the caller's to free. On error, @watch->node will - * be NULL, the device will switch to %XenbusStateClosing, and the error will - * be saved in the store. - */ int xenbus_watch_path(struct xenbus_device *dev, const char *path, struct xenbus_watch *watch, void (*callback)(struct xenbus_watch *, @@ -94,53 +83,27 @@ int xenbus_watch_path(struct xenbus_devi EXPORT_SYMBOL_GPL(xenbus_watch_path); -/** - * xenbus_watch_pathfmt - register a watch on a sprintf-formatted path - * @dev: xenbus device - * @watch: watch to register - * @callback: callback to register - * @pathfmt: format of path to watch - * - * Register a watch on the given @path, using the given xenbus_watch - * structure for storage, and the given @callback function as the callback. - * Return 0 on success, or -errno on error. On success, the watched path - * (@path/@path2) will be saved as @watch->node, and becomes the caller's to - * kfree(). On error, watch->node will be NULL, so the caller has nothing to - * free, the device will switch to %XenbusStateClosing, and the error will be - * saved in the store. - */ -int xenbus_watch_pathfmt(struct xenbus_device *dev, - struct xenbus_watch *watch, - void (*callback)(struct xenbus_watch *, - const char **, unsigned int), - const char *pathfmt, ...) +int xenbus_watch_path2(struct xenbus_device *dev, const char *path, + const char *path2, struct xenbus_watch *watch, + void (*callback)(struct xenbus_watch *, + const char **, unsigned int)) { int err; - va_list ap; - char *path; - - va_start(ap, pathfmt); - path = kvasprintf(GFP_NOIO | __GFP_HIGH, pathfmt, ap); - va_end(ap); - - if (!path) { + char *state = kasprintf(GFP_NOIO | __GFP_HIGH, "%s/%s", path, path2); + if (!state) { xenbus_dev_fatal(dev, -ENOMEM, "allocating path for watch"); return -ENOMEM; } - err = xenbus_watch_path(dev, path, watch, callback); + err = xenbus_watch_path(dev, state, watch, callback); if (err) - kfree(path); + kfree(state); return err; } -EXPORT_SYMBOL_GPL(xenbus_watch_pathfmt); +EXPORT_SYMBOL_GPL(xenbus_watch_path2); -static void xenbus_switch_fatal(struct xenbus_device *, int, int, - const char *, ...); -static int -__xenbus_switch_state(struct xenbus_device *dev, - enum xenbus_state state, int depth) +int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state) { /* We check whether the state is currently set to the given value, and if not, then the state is set. We don't want to unconditionally @@ -155,59 +118,29 @@ __xenbus_switch_state(struct xenbus_devi would not get reset if the transaction was aborted. */ - struct xenbus_transaction xbt; int current_state; - int err, abort; + int err; if (state == dev->state) return 0; -again: - abort = 1; - - err = xenbus_transaction_start(&xbt); - if (err) { - xenbus_switch_fatal(dev, depth, err, "starting transaction"); - return 0; - } - - err = xenbus_scanf(xbt, dev->nodename, "state", "%d", ¤t_state); + err = xenbus_scanf(XBT_NIL, dev->nodename, "state", "%d", + ¤t_state); if (err != 1) - goto abort; + return 0; - err = xenbus_printf(xbt, dev->nodename, "state", "%d", state); + err = xenbus_printf(XBT_NIL, dev->nodename, "state", "%d", state); if (err) { - xenbus_switch_fatal(dev, depth, err, "writing new state"); - goto abort; + if (state != XenbusStateClosing) /* Avoid looping */ + xenbus_dev_fatal(dev, err, "writing new state"); + return err; } - abort = 0; -abort: - err = xenbus_transaction_end(xbt, abort); - if (err) { - if (err == -EAGAIN && !abort) - goto again; - xenbus_switch_fatal(dev, depth, err, "ending transaction"); - } else - dev->state = state; + dev->state = state; return 0; } -/** - * xenbus_switch_state - * @dev: xenbus device - * @state: new state - * - * Advertise in the store a change of the given driver to the given new_state. - * Return 0 on success, or -errno on error. On error, the device will switch - * to XenbusStateClosing, and the error will be saved in the store. - */ -int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state) -{ - return __xenbus_switch_state(dev, state, 0); -} - EXPORT_SYMBOL_GPL(xenbus_switch_state); int xenbus_frontend_closed(struct xenbus_device *dev) @@ -228,13 +161,12 @@ static char *error_path(struct xenbus_de } -static void xenbus_va_dev_error(struct xenbus_device *dev, int err, - const char *fmt, va_list ap) +void _dev_error(struct xenbus_device *dev, int err, const char *fmt, + va_list ap) { int ret; unsigned int len; - char *printf_buffer = NULL; - char *path_buffer = NULL; + char *printf_buffer = NULL, *path_buffer = NULL; #define PRINTF_BUFFER_SIZE 4096 printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_KERNEL); @@ -251,13 +183,13 @@ static void xenbus_va_dev_error(struct x path_buffer = error_path(dev); if (path_buffer == NULL) { - dev_err(&dev->dev, "failed to write error node for %s (%s)\n", + printk("xenbus: failed to write error node for %s (%s)\n", dev->nodename, printf_buffer); goto fail; } if (xenbus_write(XBT_NIL, path_buffer, "error", printf_buffer) != 0) { - dev_err(&dev->dev, "failed to write error node for %s (%s)\n", + printk("xenbus: failed to write error node for %s (%s)\n", dev->nodename, printf_buffer); goto fail; } @@ -268,74 +200,30 @@ fail: } -/** - * xenbus_dev_error - * @dev: xenbus device - * @err: error to report - * @fmt: error message format - * - * Report the given negative errno into the store, along with the given - * formatted message. - */ void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, ...) { va_list ap; va_start(ap, fmt); - xenbus_va_dev_error(dev, err, fmt, ap); + _dev_error(dev, err, fmt, ap); va_end(ap); } EXPORT_SYMBOL_GPL(xenbus_dev_error); -/** - * xenbus_dev_fatal - * @dev: xenbus device - * @err: error to report - * @fmt: error message format - * - * Equivalent to xenbus_dev_error(dev, err, fmt, args), followed by - * xenbus_switch_state(dev, XenbusStateClosing) to schedule an orderly - * closedown of this driver and its peer. - */ void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, ...) { va_list ap; va_start(ap, fmt); - xenbus_va_dev_error(dev, err, fmt, ap); + _dev_error(dev, err, fmt, ap); va_end(ap); xenbus_switch_state(dev, XenbusStateClosing); } EXPORT_SYMBOL_GPL(xenbus_dev_fatal); -/** - * Equivalent to xenbus_dev_fatal(dev, err, fmt, args), but helps - * avoiding recursion within xenbus_switch_state. - */ -static void xenbus_switch_fatal(struct xenbus_device *dev, int depth, int err, - const char *fmt, ...) -{ - va_list ap; - - va_start(ap, fmt); - xenbus_va_dev_error(dev, err, fmt, ap); - va_end(ap); - if (!depth) - __xenbus_switch_state(dev, XenbusStateClosing, 1); -} - -/** - * xenbus_grant_ring - * @dev: xenbus device - * @ring_mfn: mfn of ring to grant - - * Grant access to the given @ring_mfn to the peer of the given device. Return - * 0 on success, or -errno on error. On error, the device will switch to - * XenbusStateClosing, and the error will be saved in the store. - */ int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn) { int err = gnttab_grant_foreign_access(dev->otherend_id, ring_mfn, 0); @@ -346,18 +234,12 @@ int xenbus_grant_ring(struct xenbus_devi EXPORT_SYMBOL_GPL(xenbus_grant_ring); -/** - * Allocate an event channel for the given xenbus_device, assigning the newly - * created local port to *port. Return 0 on success, or -errno on error. On - * error, the device will switch to XenbusStateClosing, and the error will be - * saved in the store. - */ int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port) { struct evtchn_alloc_unbound alloc_unbound; int err; - alloc_unbound.dom = DOMID_SELF; + alloc_unbound.dom = DOMID_SELF; alloc_unbound.remote_dom = dev->otherend_id; err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, @@ -372,36 +254,6 @@ int xenbus_alloc_evtchn(struct xenbus_de EXPORT_SYMBOL_GPL(xenbus_alloc_evtchn); -/** - * Bind to an existing interdomain event channel in another domain. Returns 0 - * on success and stores the local port in *port. On error, returns -errno, - * switches the device to XenbusStateClosing, and saves the error in XenStore. - */ -int xenbus_bind_evtchn(struct xenbus_device *dev, int remote_port, int *port) -{ - struct evtchn_bind_interdomain bind_interdomain; - int err; - - bind_interdomain.remote_dom = dev->otherend_id; - bind_interdomain.remote_port = remote_port; - - err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, - &bind_interdomain); - if (err) - xenbus_dev_fatal(dev, err, - "binding to event channel %d from domain %d", - remote_port, dev->otherend_id); - else - *port = bind_interdomain.local_port; - - return err; -} -EXPORT_SYMBOL_GPL(xenbus_bind_evtchn); - - -/** - * Free an existing event channel. Returns 0 on success or -errno on error. - */ int xenbus_free_evtchn(struct xenbus_device *dev, int port) { struct evtchn_close close; @@ -418,189 +270,6 @@ int xenbus_free_evtchn(struct xenbus_dev EXPORT_SYMBOL_GPL(xenbus_free_evtchn); -/** - * xenbus_map_ring_valloc - * @dev: xenbus device - * @gnt_ref: grant reference - * @vaddr: pointer to address to be filled out by mapping - * - * Based on Rusty Russell's skeleton driver's map_page. - * Map a page of memory into this domain from another domain's grant table. - * xenbus_map_ring_valloc allocates a page of virtual address space, maps the - * page to that address, and sets *vaddr to that address. - * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h) - * or -ENOMEM on error. If an error is returned, device will switch to - * XenbusStateClosing and the error message will be saved in XenStore. - */ -int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr) -{ - struct gnttab_map_grant_ref op = { - .flags = GNTMAP_host_map, - .ref = gnt_ref, - .dom = dev->otherend_id, - }; - struct vm_struct *area; - - *vaddr = NULL; - - area = xen_alloc_vm_area(PAGE_SIZE); - if (!area) - return -ENOMEM; - - op.host_addr = (unsigned long)area->addr; - - if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) - BUG(); - - if (op.status != GNTST_okay) { - xen_free_vm_area(area); - xenbus_dev_fatal(dev, op.status, - "mapping in shared page %d from domain %d", - gnt_ref, dev->otherend_id); - return op.status; - } - - /* Stuff the handle in an unused field */ - area->phys_addr = (unsigned long)op.handle; - - *vaddr = area->addr; - return 0; -} -EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc); - - -/** - * xenbus_map_ring - * @dev: xenbus device - * @gnt_ref: grant reference - * @handle: pointer to grant handle to be filled - * @vaddr: address to be mapped to - * - * Map a page of memory into this domain from another domain's grant table. - * xenbus_map_ring does not allocate the virtual address space (you must do - * this yourself!). It only maps in the page to the specified address. - * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h) - * or -ENOMEM on error. If an error is returned, device will switch to - * XenbusStateClosing and the error message will be saved in XenStore. - */ -int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref, - grant_handle_t *handle, void *vaddr) -{ - struct gnttab_map_grant_ref op = { - .host_addr = (unsigned long)vaddr, - .flags = GNTMAP_host_map, - .ref = gnt_ref, - .dom = dev->otherend_id, - }; - - if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) - BUG(); - - if (op.status != GNTST_okay) { - xenbus_dev_fatal(dev, op.status, - "mapping in shared page %d from domain %d", - gnt_ref, dev->otherend_id); - } else - *handle = op.handle; - - return op.status; -} -EXPORT_SYMBOL_GPL(xenbus_map_ring); - - -/** - * xenbus_unmap_ring_vfree - * @dev: xenbus device - * @vaddr: addr to unmap - * - * Based on Rusty Russell's skeleton driver's unmap_page. - * Unmap a page of memory in this domain that was imported from another domain. - * Use xenbus_unmap_ring_vfree if you mapped in your memory with - * xenbus_map_ring_valloc (it will free the virtual address space). - * Returns 0 on success and returns GNTST_* on error - * (see xen/include/interface/grant_table.h). - */ -int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr) -{ - struct vm_struct *area; - struct gnttab_unmap_grant_ref op = { - .host_addr = (unsigned long)vaddr, - }; - - /* It'd be nice if linux/vmalloc.h provided a find_vm_area(void *addr) - * method so that we don't have to muck with vmalloc internals here. - * We could force the user to hang on to their struct vm_struct from - * xenbus_map_ring_valloc, but these 6 lines considerably simplify - * this API. - */ - read_lock(&vmlist_lock); - for (area = vmlist; area != NULL; area = area->next) { - if (area->addr == vaddr) - break; - } - read_unlock(&vmlist_lock); - - if (!area) { - xenbus_dev_error(dev, -ENOENT, - "can't find mapped virtual address %p", vaddr); - return GNTST_bad_virt_addr; - } - - op.handle = (grant_handle_t)area->phys_addr; - - if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) - BUG(); - - if (op.status == GNTST_okay) - xen_free_vm_area(area); - else - xenbus_dev_error(dev, op.status, - "unmapping page at handle %d error %d", - (int16_t)area->phys_addr, op.status); - - return op.status; -} -EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree); - - -/** - * xenbus_unmap_ring - * @dev: xenbus device - * @handle: grant handle - * @vaddr: addr to unmap - * - * Unmap a page of memory in this domain that was imported from another domain. - * Returns 0 on success and returns GNTST_* on error - * (see xen/include/interface/grant_table.h). - */ -int xenbus_unmap_ring(struct xenbus_device *dev, - grant_handle_t handle, void *vaddr) -{ - struct gnttab_unmap_grant_ref op = { - .host_addr = (unsigned long)vaddr, - .handle = handle, - }; - - if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) - BUG(); - - if (op.status != GNTST_okay) - xenbus_dev_error(dev, op.status, - "unmapping page at handle %d error %d", - handle, op.status); - - return op.status; -} -EXPORT_SYMBOL_GPL(xenbus_unmap_ring); - - -/** - * xenbus_read_driver_state - * @path: path for driver - * - * Return the state of the driver rooted at the given store path, or - * XenbusStateUnknown if no state can be read. - */ enum xenbus_state xenbus_read_driver_state(const char *path) { enum xenbus_state result; --- head-2011-03-17.orig/drivers/xen/xenbus/xenbus_comms.c 2011-03-17 13:45:28.000000000 +0100 +++ head-2011-03-17/drivers/xen/xenbus/xenbus_comms.c 2011-01-31 15:14:12.000000000 +0100 @@ -34,25 +34,55 @@ #include #include #include +#include +#include +#include #include -#include -#include -#include + +#include + #include "xenbus_comms.h" +#ifdef HAVE_XEN_PLATFORM_COMPAT_H +#include +#endif + static int xenbus_irq; -static DECLARE_WORK(probe_work, xenbus_probe); +extern void xenbus_probe(void *); +static DECLARE_WORK(probe_work, xenbus_probe, NULL); static DECLARE_WAIT_QUEUE_HEAD(xb_waitq); -static irqreturn_t wake_waiting(int irq, void *unused) +static irqreturn_t wake_waiting(int irq, void *unused, struct pt_regs *regs) { - if (unlikely(xenstored_ready == 0)) { - xenstored_ready = 1; - schedule_work(&probe_work); + int old, new; + + old = atomic_read(&xenbus_xsd_state); + switch (old) { + case XENBUS_XSD_UNCOMMITTED: + BUG(); + return IRQ_HANDLED; + + case XENBUS_XSD_FOREIGN_INIT: + new = XENBUS_XSD_FOREIGN_READY; + break; + + case XENBUS_XSD_LOCAL_INIT: + new = XENBUS_XSD_LOCAL_READY; + break; + + case XENBUS_XSD_FOREIGN_READY: + case XENBUS_XSD_LOCAL_READY: + default: + goto wake; } + old = atomic_cmpxchg(&xenbus_xsd_state, old, new); + if (old != new) + schedule_work(&probe_work); + +wake: wake_up(&xb_waitq); return IRQ_HANDLED; } @@ -82,13 +112,6 @@ static const void *get_input_chunk(XENST return buf + MASK_XENSTORE_IDX(cons); } -/** - * xb_write - low level write - * @data: buffer to send - * @len: length of buffer - * - * Returns 0 on success, error otherwise. - */ int xb_write(const void *data, unsigned len) { struct xenstore_domain_interface *intf = xen_store_interface; @@ -197,12 +220,11 @@ int xb_read(void *data, unsigned len) return 0; } -/** - * xb_init_comms - Set up interrupt handler off store event channel. - */ +/* Set up interrupt handler off store event channel. */ int xb_init_comms(void) { struct xenstore_domain_interface *intf = xen_store_interface; + int err; if (intf->req_prod != intf->req_cons) printk(KERN_ERR "XENBUS request ring is not quiescent " @@ -215,20 +237,18 @@ int xb_init_comms(void) intf->rsp_cons = intf->rsp_prod; } - if (xenbus_irq) { - /* Already have an irq; assume we're resuming */ - rebind_evtchn_irq(xen_store_evtchn, xenbus_irq); - } else { - int err; - err = bind_evtchn_to_irqhandler(xen_store_evtchn, wake_waiting, - 0, "xenbus", &xb_waitq); - if (err <= 0) { - printk(KERN_ERR "XENBUS request irq failed %i\n", err); - return err; - } + if (xenbus_irq) + unbind_from_irqhandler(xenbus_irq, &xb_waitq); - xenbus_irq = err; + err = bind_caller_port_to_irqhandler( + xen_store_evtchn, wake_waiting, + 0, "xenbus", &xb_waitq); + if (err <= 0) { + printk(KERN_ERR "XENBUS request irq failed %i\n", err); + return err; } + xenbus_irq = err; + return 0; } --- head-2011-03-17.orig/drivers/xen/xenbus/xenbus_comms.h 2011-03-17 13:45:28.000000000 +0100 +++ head-2011-03-17/drivers/xen/xenbus/xenbus_comms.h 2011-01-31 15:14:12.000000000 +0100 @@ -43,4 +43,20 @@ int xs_input_avail(void); extern struct xenstore_domain_interface *xen_store_interface; extern int xen_store_evtchn; +/* For xenbus internal use. */ +enum { + XENBUS_XSD_UNCOMMITTED = 0, + XENBUS_XSD_FOREIGN_INIT, + XENBUS_XSD_FOREIGN_READY, + XENBUS_XSD_LOCAL_INIT, + XENBUS_XSD_LOCAL_READY, +}; +extern atomic_t xenbus_xsd_state; + +static inline int is_xenstored_ready(void) +{ + int s = atomic_read(&xenbus_xsd_state); + return s == XENBUS_XSD_FOREIGN_READY || s == XENBUS_XSD_LOCAL_READY; +} + #endif /* _XENBUS_COMMS_H */ --- head-2011-03-17.orig/drivers/xen/xenbus/xenbus_probe.c 2011-03-17 13:45:28.000000000 +0100 +++ head-2011-03-17/drivers/xen/xenbus/xenbus_probe.c 2011-01-31 17:51:15.000000000 +0100 @@ -4,6 +4,7 @@ * Copyright (C) 2005 Rusty Russell, IBM Corporation * Copyright (C) 2005 Mike Wray, Hewlett-Packard * Copyright (C) 2005, 2006 XenSource Ltd + * Copyright (C) 2007 Solarflare Communications, Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License version 2 @@ -32,7 +33,7 @@ #define DPRINTK(fmt, args...) \ pr_debug("xenbus_probe (%s:%d) " fmt ".\n", \ - __func__, __LINE__, ##args) + __FUNCTION__, __LINE__, ##args) #include #include @@ -40,38 +41,46 @@ #include #include #include -#include #include -#include #include -#include -#include +#include +#include +#include #include +#include #include -#include - -#include +#include #include -#include -#include - +#include +#include +#include +#ifdef MODULE #include +#endif #include "xenbus_comms.h" #include "xenbus_probe.h" +#ifdef HAVE_XEN_PLATFORM_COMPAT_H +#include +#endif int xen_store_evtchn; -EXPORT_SYMBOL_GPL(xen_store_evtchn); - struct xenstore_domain_interface *xen_store_interface; -EXPORT_SYMBOL_GPL(xen_store_interface); static unsigned long xen_store_mfn; +extern struct mutex xenwatch_mutex; + static BLOCKING_NOTIFIER_HEAD(xenstore_chain); +static void wait_for_devices(struct xenbus_driver *xendrv); + +static int xenbus_probe_frontend(const char *type, const char *name); + +static void xenbus_dev_shutdown(struct device *_dev); + /* If something in array of ids matches this device, return it. */ static const struct xenbus_device_id * match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev) @@ -92,7 +101,24 @@ int xenbus_match(struct device *_dev, st return match_device(drv->ids, to_xenbus_device(_dev)) != NULL; } -EXPORT_SYMBOL_GPL(xenbus_match); + +/* device// => - */ +static int frontend_bus_id(char bus_id[XEN_BUS_ID_SIZE], const char *nodename) +{ + nodename = strchr(nodename, '/'); + if (!nodename || strlen(nodename + 1) >= XEN_BUS_ID_SIZE) { + printk(KERN_WARNING "XENBUS: bad frontend %s\n", nodename); + return -EINVAL; + } + + strlcpy(bus_id, nodename + 1, XEN_BUS_ID_SIZE); + if (!strchr(bus_id, '/')) { + printk(KERN_WARNING "XENBUS: bus_id %s no slash\n", bus_id); + return -EINVAL; + } + *strchr(bus_id, '/') = '-'; + return 0; +} static void free_otherend_details(struct xenbus_device *dev) @@ -112,30 +138,7 @@ static void free_otherend_watch(struct x } -static int talk_to_otherend(struct xenbus_device *dev) -{ - struct xenbus_driver *drv = to_xenbus_driver(dev->dev.driver); - - free_otherend_watch(dev); - free_otherend_details(dev); - - return drv->read_otherend_details(dev); -} - - - -static int watch_otherend(struct xenbus_device *dev) -{ - struct xen_bus_type *bus = - container_of(dev->dev.bus, struct xen_bus_type, bus); - - return xenbus_watch_pathfmt(dev, &dev->otherend_watch, - bus->otherend_changed, - "%s/%s", dev->otherend, "state"); -} - - -int xenbus_read_otherend_details(struct xenbus_device *xendev, +int read_otherend_details(struct xenbus_device *xendev, char *id_node, char *path_node) { int err = xenbus_gather(XBT_NIL, xendev->nodename, @@ -160,11 +163,62 @@ int xenbus_read_otherend_details(struct return 0; } -EXPORT_SYMBOL_GPL(xenbus_read_otherend_details); -void xenbus_otherend_changed(struct xenbus_watch *watch, - const char **vec, unsigned int len, - int ignore_on_shutdown) + +static int read_backend_details(struct xenbus_device *xendev) +{ + return read_otherend_details(xendev, "backend-id", "backend"); +} + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) +static int xenbus_uevent_frontend(struct device *dev, char **envp, + int num_envp, char *buffer, int buffer_size) +{ + struct xenbus_device *xdev; + int length = 0, i = 0; + + if (dev == NULL) + return -ENODEV; + xdev = to_xenbus_device(dev); + if (xdev == NULL) + return -ENODEV; + + /* stuff we want to pass to /sbin/hotplug */ + add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, + "XENBUS_TYPE=%s", xdev->devicetype); + add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, + "XENBUS_PATH=%s", xdev->nodename); + add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, + "MODALIAS=xen:%s", xdev->devicetype); + + return 0; +} +#endif + +/* Bus type for frontend drivers. */ +static struct xen_bus_type xenbus_frontend = { + .root = "device", + .levels = 2, /* device/type/ */ + .get_bus_id = frontend_bus_id, + .probe = xenbus_probe_frontend, + .error = -ENODEV, + .bus = { + .name = "xen", + .match = xenbus_match, +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) + .probe = xenbus_dev_probe, + .remove = xenbus_dev_remove, + .shutdown = xenbus_dev_shutdown, + .uevent = xenbus_uevent_frontend, +#endif + }, + .dev = { + .bus_id = "xen", + }, +}; + +static void otherend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) { struct xenbus_device *dev = container_of(watch, struct xenbus_device, otherend_watch); @@ -176,31 +230,54 @@ void xenbus_otherend_changed(struct xenb if (!dev->otherend || strncmp(dev->otherend, vec[XS_WATCH_PATH], strlen(dev->otherend))) { - dev_dbg(&dev->dev, "Ignoring watch at %s\n", - vec[XS_WATCH_PATH]); + DPRINTK("Ignoring watch at %s", vec[XS_WATCH_PATH]); return; } state = xenbus_read_driver_state(dev->otherend); - dev_dbg(&dev->dev, "state is %d, (%s), %s, %s\n", - state, xenbus_strstate(state), dev->otherend_watch.node, - vec[XS_WATCH_PATH]); + DPRINTK("state is %d (%s), %s, %s", state, xenbus_strstate(state), + dev->otherend_watch.node, vec[XS_WATCH_PATH]); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) /* * Ignore xenbus transitions during shutdown. This prevents us doing * work that can fail e.g., when the rootfs is gone. */ if (system_state > SYSTEM_RUNNING) { - if (ignore_on_shutdown && (state == XenbusStateClosing)) + struct xen_bus_type *bus = bus; + bus = container_of(dev->dev.bus, struct xen_bus_type, bus); + /* If we're frontend, drive the state machine to Closed. */ + /* This should cause the backend to release our resources. */ + if ((bus == &xenbus_frontend) && (state == XenbusStateClosing)) xenbus_frontend_closed(dev); return; } +#endif if (drv->otherend_changed) drv->otherend_changed(dev, state); } -EXPORT_SYMBOL_GPL(xenbus_otherend_changed); + + +static int talk_to_otherend(struct xenbus_device *dev) +{ + struct xenbus_driver *drv = to_xenbus_driver(dev->dev.driver); + + free_otherend_watch(dev); + free_otherend_details(dev); + + return drv->read_otherend_details(dev); +} + + + +static int watch_otherend(struct xenbus_device *dev) +{ + return xenbus_watch_path2(dev, dev->otherend, "state", + &dev->otherend_watch, otherend_changed); +} + int xenbus_dev_probe(struct device *_dev) { @@ -224,8 +301,9 @@ int xenbus_dev_probe(struct device *_dev err = talk_to_otherend(dev); if (err) { - dev_warn(&dev->dev, "talk_to_otherend on %s failed.\n", - dev->nodename); + printk(KERN_WARNING + "xenbus_probe: talk_to_otherend on %s failed.\n", + dev->nodename); return err; } @@ -235,7 +313,8 @@ int xenbus_dev_probe(struct device *_dev err = watch_otherend(dev); if (err) { - dev_warn(&dev->dev, "watch_otherend on %s failed.\n", + printk(KERN_WARNING + "xenbus_probe: watch_otherend on %s failed.\n", dev->nodename); return err; } @@ -244,9 +323,8 @@ int xenbus_dev_probe(struct device *_dev fail: xenbus_dev_error(dev, err, "xenbus_dev_probe on %s", dev->nodename); xenbus_switch_state(dev, XenbusStateClosed); - return err; + return -ENODEV; } -EXPORT_SYMBOL_GPL(xenbus_dev_probe); int xenbus_dev_remove(struct device *_dev) { @@ -264,44 +342,81 @@ int xenbus_dev_remove(struct device *_de xenbus_switch_state(dev, XenbusStateClosed); return 0; } -EXPORT_SYMBOL_GPL(xenbus_dev_remove); -void xenbus_dev_shutdown(struct device *_dev) +static void xenbus_dev_shutdown(struct device *_dev) { struct xenbus_device *dev = to_xenbus_device(_dev); unsigned long timeout = 5*HZ; DPRINTK("%s", dev->nodename); +/* Commented out since xenstored stubdom is now minios based not linux based +#define XENSTORE_DOMAIN_SHARES_THIS_KERNEL +*/ +#ifndef XENSTORE_DOMAIN_SHARES_THIS_KERNEL + if (is_initial_xendomain()) +#endif + return; + get_device(&dev->dev); if (dev->state != XenbusStateConnected) { - printk(KERN_INFO "%s: %s: %s != Connected, skipping\n", __func__, + printk("%s: %s: %s != Connected, skipping\n", __FUNCTION__, dev->nodename, xenbus_strstate(dev->state)); goto out; } xenbus_switch_state(dev, XenbusStateClosing); + + if (!strcmp(dev->devicetype, "vfb")) + goto out; + timeout = wait_for_completion_timeout(&dev->down, timeout); if (!timeout) - printk(KERN_INFO "%s: %s timeout closing device\n", - __func__, dev->nodename); + printk("%s: %s timeout closing device\n", __FUNCTION__, dev->nodename); out: put_device(&dev->dev); } -EXPORT_SYMBOL_GPL(xenbus_dev_shutdown); int xenbus_register_driver_common(struct xenbus_driver *drv, - struct xen_bus_type *bus, - struct module *owner, - const char *mod_name) + struct xen_bus_type *bus) { + int ret; + + if (bus->error) + return bus->error; + drv->driver.name = drv->name; drv->driver.bus = &bus->bus; - drv->driver.owner = owner; - drv->driver.mod_name = mod_name; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10) + drv->driver.owner = drv->owner; +#endif +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16) + drv->driver.probe = xenbus_dev_probe; + drv->driver.remove = xenbus_dev_remove; + drv->driver.shutdown = xenbus_dev_shutdown; +#endif + + mutex_lock(&xenwatch_mutex); + ret = driver_register(&drv->driver); + mutex_unlock(&xenwatch_mutex); + return ret; +} + +int xenbus_register_frontend(struct xenbus_driver *drv) +{ + int ret; + + drv->read_otherend_details = read_backend_details; + + ret = xenbus_register_driver_common(drv, &xenbus_frontend); + if (ret) + return ret; - return driver_register(&drv->driver); + /* If this driver is loaded as a module wait for devices to attach. */ + wait_for_devices(drv); + + return 0; } -EXPORT_SYMBOL_GPL(xenbus_register_driver_common); +EXPORT_SYMBOL_GPL(xenbus_register_frontend); void xenbus_unregister_driver(struct xenbus_driver *drv) { @@ -379,31 +494,30 @@ static void xenbus_dev_release(struct de } static ssize_t xendev_show_nodename(struct device *dev, - struct device_attribute *attr, char *buf) +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,13) + struct device_attribute *attr, +#endif + char *buf) { return sprintf(buf, "%s\n", to_xenbus_device(dev)->nodename); } static DEVICE_ATTR(nodename, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_nodename, NULL); static ssize_t xendev_show_devtype(struct device *dev, - struct device_attribute *attr, char *buf) +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,13) + struct device_attribute *attr, +#endif + char *buf) { return sprintf(buf, "%s\n", to_xenbus_device(dev)->devicetype); } static DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL); -static ssize_t xendev_show_modalias(struct device *dev, - struct device_attribute *attr, char *buf) -{ - return sprintf(buf, "xen:%s\n", to_xenbus_device(dev)->devicetype); -} -static DEVICE_ATTR(modalias, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_modalias, NULL); int xenbus_probe_node(struct xen_bus_type *bus, const char *type, const char *nodename) { - char devname[XEN_BUS_ID_SIZE]; int err; struct xenbus_device *xendev; size_t stringlen; @@ -411,6 +525,9 @@ int xenbus_probe_node(struct xen_bus_typ enum xenbus_state state = xenbus_read_driver_state(nodename); + if (bus->error) + return bus->error; + if (state != XenbusStateInitialising) { /* Device is not new, so ignore it. This can happen if a device is going away after switching to Closed. */ @@ -435,15 +552,14 @@ int xenbus_probe_node(struct xen_bus_typ xendev->devicetype = tmpstring; init_completion(&xendev->down); + xendev->dev.parent = &bus->dev; xendev->dev.bus = &bus->bus; xendev->dev.release = xenbus_dev_release; - err = bus->get_bus_id(devname, xendev->nodename); + err = bus->get_bus_id(xendev->dev.bus_id, xendev->nodename); if (err) goto fail; - dev_set_name(&xendev->dev, devname); - /* Register with generic device framework. */ err = device_register(&xendev->dev); if (err) @@ -451,28 +567,40 @@ int xenbus_probe_node(struct xen_bus_typ err = device_create_file(&xendev->dev, &dev_attr_nodename); if (err) - goto fail_unregister; - + goto unregister; err = device_create_file(&xendev->dev, &dev_attr_devtype); if (err) - goto fail_remove_nodename; - - err = device_create_file(&xendev->dev, &dev_attr_modalias); - if (err) - goto fail_remove_devtype; + goto unregister; return 0; -fail_remove_devtype: - device_remove_file(&xendev->dev, &dev_attr_devtype); -fail_remove_nodename: +unregister: device_remove_file(&xendev->dev, &dev_attr_nodename); -fail_unregister: + device_remove_file(&xendev->dev, &dev_attr_devtype); device_unregister(&xendev->dev); fail: kfree(xendev); return err; } -EXPORT_SYMBOL_GPL(xenbus_probe_node); + +/* device// */ +static int xenbus_probe_frontend(const char *type, const char *name) +{ + char *nodename; + int err; + + if (!strcmp(type, "console")) + return 0; + + nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", xenbus_frontend.root, type, name); + if (!nodename) + return -ENOMEM; + + DPRINTK("%s", nodename); + + err = xenbus_probe_node(&xenbus_frontend, type, nodename); + kfree(nodename); + return err; +} static int xenbus_probe_device_type(struct xen_bus_type *bus, const char *type) { @@ -486,7 +614,7 @@ static int xenbus_probe_device_type(stru return PTR_ERR(dir); for (i = 0; i < dir_n; i++) { - err = bus->probe(bus, type, dir[i]); + err = bus->probe(type, dir[i]); if (err) break; } @@ -501,6 +629,9 @@ int xenbus_probe_devices(struct xen_bus_ char **dir; unsigned int i, dir_n; + if (bus->error) + return bus->error; + dir = xenbus_directory(XBT_NIL, bus->root, "", &dir_n); if (IS_ERR(dir)) return PTR_ERR(dir); @@ -514,7 +645,6 @@ int xenbus_probe_devices(struct xen_bus_ kfree(dir); return err; } -EXPORT_SYMBOL_GPL(xenbus_probe_devices); static unsigned int char_count(const char *str, char c) { @@ -539,15 +669,15 @@ static int strsep_len(const char *str, c return (len == 0) ? i : -ERANGE; } -void xenbus_dev_changed(const char *node, struct xen_bus_type *bus) +void dev_changed(const char *node, struct xen_bus_type *bus) { int exists, rootlen; struct xenbus_device *dev; char type[XEN_BUS_ID_SIZE]; const char *p, *root; - if (char_count(node, '/') < 2) - return; + if (bus->error || char_count(node, '/') < 2) + return; exists = xenbus_exists(XBT_NIL, node, ""); if (!exists) { @@ -575,46 +705,81 @@ void xenbus_dev_changed(const char *node kfree(root); } -EXPORT_SYMBOL_GPL(xenbus_dev_changed); -int xenbus_dev_suspend(struct device *dev, pm_message_t state) +static void frontend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + DPRINTK(""); + + dev_changed(vec[XS_WATCH_PATH], &xenbus_frontend); +} + +/* We watch for devices appearing and vanishing. */ +static struct xenbus_watch fe_watch = { + .node = "device", + .callback = frontend_changed, +}; + +static int suspend_dev(struct device *dev, void *data) { int err = 0; struct xenbus_driver *drv; - struct xenbus_device *xdev - = container_of(dev, struct xenbus_device, dev); + struct xenbus_device *xdev; - DPRINTK("%s", xdev->nodename); + DPRINTK(""); if (dev->driver == NULL) return 0; drv = to_xenbus_driver(dev->driver); + xdev = container_of(dev, struct xenbus_device, dev); if (drv->suspend) - err = drv->suspend(xdev, state); + err = drv->suspend(xdev); if (err) printk(KERN_WARNING - "xenbus: suspend %s failed: %i\n", dev_name(dev), err); + "xenbus: suspend %s failed: %i\n", dev->bus_id, err); return 0; } -EXPORT_SYMBOL_GPL(xenbus_dev_suspend); -int xenbus_dev_resume(struct device *dev) +static int suspend_cancel_dev(struct device *dev, void *data) +{ + int err = 0; + struct xenbus_driver *drv; + struct xenbus_device *xdev; + + DPRINTK(""); + + if (dev->driver == NULL) + return 0; + drv = to_xenbus_driver(dev->driver); + xdev = container_of(dev, struct xenbus_device, dev); + if (drv->suspend_cancel) + err = drv->suspend_cancel(xdev); + if (err) + printk(KERN_WARNING + "xenbus: suspend_cancel %s failed: %i\n", + dev->bus_id, err); + return 0; +} + +static int resume_dev(struct device *dev, void *data) { int err; struct xenbus_driver *drv; - struct xenbus_device *xdev - = container_of(dev, struct xenbus_device, dev); + struct xenbus_device *xdev; - DPRINTK("%s", xdev->nodename); + DPRINTK(""); if (dev->driver == NULL) return 0; + drv = to_xenbus_driver(dev->driver); + xdev = container_of(dev, struct xenbus_device, dev); + err = talk_to_otherend(xdev); if (err) { printk(KERN_WARNING "xenbus: resume (talk_to_otherend) %s failed: %i\n", - dev_name(dev), err); + dev->bus_id, err); return err; } @@ -625,7 +790,7 @@ int xenbus_dev_resume(struct device *dev if (err) { printk(KERN_WARNING "xenbus: resume %s failed: %i\n", - dev_name(dev), err); + dev->bus_id, err); return err; } } @@ -634,23 +799,52 @@ int xenbus_dev_resume(struct device *dev if (err) { printk(KERN_WARNING "xenbus_probe: resume (watch_otherend) %s failed: " - "%d.\n", dev_name(dev), err); + "%d.\n", dev->bus_id, err); return err; } return 0; } -EXPORT_SYMBOL_GPL(xenbus_dev_resume); + +void xenbus_suspend(void) +{ + DPRINTK(""); + + if (!xenbus_frontend.error) + bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_dev); + xenbus_backend_suspend(suspend_dev); + xs_suspend(); +} +EXPORT_SYMBOL_GPL(xenbus_suspend); + +void xenbus_resume(void) +{ + xb_init_comms(); + xs_resume(); + if (!xenbus_frontend.error) + bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, resume_dev); + xenbus_backend_resume(resume_dev); +} +EXPORT_SYMBOL_GPL(xenbus_resume); + +void xenbus_suspend_cancel(void) +{ + xs_suspend_cancel(); + if (!xenbus_frontend.error) + bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_cancel_dev); + xenbus_backend_resume(suspend_cancel_dev); +} +EXPORT_SYMBOL_GPL(xenbus_suspend_cancel); /* A flag to determine if xenstored is 'ready' (i.e. has started) */ -int xenstored_ready = 0; +atomic_t xenbus_xsd_state = ATOMIC_INIT(XENBUS_XSD_UNCOMMITTED); int register_xenstore_notifier(struct notifier_block *nb) { int ret = 0; - if (xenstored_ready > 0) + if (is_xenstored_ready()) ret = nb->notifier_call(nb, 0, NULL); else blocking_notifier_chain_register(&xenstore_chain, nb); @@ -665,50 +859,167 @@ void unregister_xenstore_notifier(struct } EXPORT_SYMBOL_GPL(unregister_xenstore_notifier); -void xenbus_probe(struct work_struct *unused) + +void xenbus_probe(void *unused) { - xenstored_ready = 1; + BUG_ON(!is_xenstored_ready()); + + /* Enumerate devices in xenstore and watch for changes. */ + xenbus_probe_devices(&xenbus_frontend); + register_xenbus_watch(&fe_watch); + xenbus_backend_probe_and_watch(); /* Notify others that xenstore is up */ blocking_notifier_call_chain(&xenstore_chain, 0, NULL); } -EXPORT_SYMBOL_GPL(xenbus_probe); -static int __init xenbus_probe_initcall(void) -{ - if (!xen_domain()) - return -ENODEV; - if (xen_initial_domain() || xen_hvm_domain()) - return 0; +#if defined(CONFIG_PROC_FS) && defined(CONFIG_XEN_PRIVILEGED_GUEST) +static struct file_operations xsd_kva_fops; +static struct proc_dir_entry *xsd_kva_intf; +static struct proc_dir_entry *xsd_port_intf; + +static int xsd_kva_mmap(struct file *file, struct vm_area_struct *vma) +{ + size_t size = vma->vm_end - vma->vm_start; + int old; + int rc; + + old = atomic_cmpxchg(&xenbus_xsd_state, + XENBUS_XSD_UNCOMMITTED, + XENBUS_XSD_LOCAL_INIT); + switch (old) { + case XENBUS_XSD_UNCOMMITTED: + rc = xb_init_comms(); + if (rc != 0) + return rc; + break; + + case XENBUS_XSD_FOREIGN_INIT: + case XENBUS_XSD_FOREIGN_READY: + return -EBUSY; + + case XENBUS_XSD_LOCAL_INIT: + case XENBUS_XSD_LOCAL_READY: + default: + break; + } + + if ((size > PAGE_SIZE) || (vma->vm_pgoff != 0)) + return -EINVAL; + + if (remap_pfn_range(vma, vma->vm_start, mfn_to_pfn(xen_store_mfn), + size, vma->vm_page_prot)) + return -EAGAIN; - xenbus_probe(NULL); return 0; } -device_initcall(xenbus_probe_initcall); +static int xsd_kva_read(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len; + + len = sprintf(page, "0x%p", xen_store_interface); + *eof = 1; + return len; +} + +static int xsd_port_read(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + int len; + + len = sprintf(page, "%d", xen_store_evtchn); + *eof = 1; + return len; +} +#endif + +static int xb_free_port(evtchn_port_t port) +{ + struct evtchn_close close; + close.port = port; + return HYPERVISOR_event_channel_op(EVTCHNOP_close, &close); +} + +int xenbus_conn(domid_t remote_dom, unsigned long *grant_ref, evtchn_port_t *local_port) +{ + struct evtchn_alloc_unbound alloc_unbound; + int rc, rc2; + + BUG_ON(atomic_read(&xenbus_xsd_state) != XENBUS_XSD_FOREIGN_INIT); + BUG_ON(!is_initial_xendomain()); + +#if defined(CONFIG_PROC_FS) && defined(CONFIG_XEN_PRIVILEGED_GUEST) + remove_xen_proc_entry("xsd_kva"); + remove_xen_proc_entry("xsd_port"); +#endif -static int __init xenbus_init(void) + rc = xb_free_port(xen_store_evtchn); + if (rc != 0) + goto fail0; + + alloc_unbound.dom = DOMID_SELF; + alloc_unbound.remote_dom = remote_dom; + rc = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, + &alloc_unbound); + if (rc != 0) + goto fail0; + *local_port = xen_store_evtchn = alloc_unbound.port; + + /* keep the old page (xen_store_mfn, xen_store_interface) */ + rc = gnttab_grant_foreign_access(remote_dom, xen_store_mfn, + GTF_permit_access); + if (rc < 0) + goto fail1; + *grant_ref = rc; + + rc = xb_init_comms(); + if (rc != 0) + goto fail1; + + return 0; + +fail1: + rc2 = xb_free_port(xen_store_evtchn); + if (rc2 != 0) + printk(KERN_WARNING + "XENBUS: Error freeing xenstore event channel: %d\n", + rc2); +fail0: + xen_store_evtchn = -1; + return rc; +} + +static int xenbus_probe_init(void) { int err = 0; unsigned long page = 0; DPRINTK(""); - err = -ENODEV; - if (!xen_domain()) - return err; + if (!is_running_on_xen()) + return -ENODEV; + + /* Register ourselves with the kernel bus subsystem */ + xenbus_frontend.error = bus_register(&xenbus_frontend.bus); + if (xenbus_frontend.error) + printk(KERN_WARNING + "XENBUS: Error registering frontend bus: %i\n", + xenbus_frontend.error); + xenbus_backend_bus_register(); /* * Domain0 doesn't have a store_evtchn or store_mfn yet. */ - if (xen_initial_domain()) { + if (is_initial_xendomain()) { struct evtchn_alloc_unbound alloc_unbound; /* Allocate Xenstore page */ page = get_zeroed_page(GFP_KERNEL); if (!page) - goto out_error; + return -ENOMEM; xen_store_mfn = xen_start_info->store_mfn = pfn_to_mfn(virt_to_phys((void *)page) >> @@ -716,63 +1027,226 @@ static int __init xenbus_init(void) /* Next allocate a local port which xenstored can bind to */ alloc_unbound.dom = DOMID_SELF; - alloc_unbound.remote_dom = 0; + alloc_unbound.remote_dom = DOMID_SELF; err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, &alloc_unbound); if (err == -ENOSYS) - goto out_error; + goto err; BUG_ON(err); xen_store_evtchn = xen_start_info->store_evtchn = alloc_unbound.port; +#if defined(CONFIG_PROC_FS) && defined(CONFIG_XEN_PRIVILEGED_GUEST) + /* And finally publish the above info in /proc/xen */ + xsd_kva_intf = create_xen_proc_entry("xsd_kva", 0600); + if (xsd_kva_intf) { + memcpy(&xsd_kva_fops, xsd_kva_intf->proc_fops, + sizeof(xsd_kva_fops)); + xsd_kva_fops.mmap = xsd_kva_mmap; + xsd_kva_intf->proc_fops = &xsd_kva_fops; + xsd_kva_intf->read_proc = xsd_kva_read; + } + xsd_port_intf = create_xen_proc_entry("xsd_port", 0400); + if (xsd_port_intf) + xsd_port_intf->read_proc = xsd_port_read; +#endif xen_store_interface = mfn_to_virt(xen_store_mfn); } else { - if (xen_hvm_domain()) { - uint64_t v = 0; - err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v); - if (err) - goto out_error; - xen_store_evtchn = (int)v; - err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v); - if (err) - goto out_error; - xen_store_mfn = (unsigned long)v; - xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT, PAGE_SIZE); - } else { - xen_store_evtchn = xen_start_info->store_evtchn; - xen_store_mfn = xen_start_info->store_mfn; - xen_store_interface = mfn_to_virt(xen_store_mfn); - xenstored_ready = 1; - } + atomic_set(&xenbus_xsd_state, XENBUS_XSD_FOREIGN_READY); +#ifdef CONFIG_XEN + xen_store_evtchn = xen_start_info->store_evtchn; + xen_store_mfn = xen_start_info->store_mfn; + xen_store_interface = mfn_to_virt(xen_store_mfn); +#else + xen_store_evtchn = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN); + xen_store_mfn = hvm_get_parameter(HVM_PARAM_STORE_PFN); + xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT, + PAGE_SIZE); +#endif + /* Initialize the shared memory rings to talk to xenstored */ + err = xb_init_comms(); + if (err) + goto err; } + xenbus_dev_init(); + /* Initialize the interface to xenstore. */ err = xs_init(); if (err) { printk(KERN_WARNING "XENBUS: Error initializing xenstore comms: %i\n", err); - goto out_error; + goto err; + } + + /* Register ourselves with the kernel device subsystem */ + if (!xenbus_frontend.error) { + xenbus_frontend.error = device_register(&xenbus_frontend.dev); + if (xenbus_frontend.error) { + bus_unregister(&xenbus_frontend.bus); + printk(KERN_WARNING + "XENBUS: Error registering frontend device: %i\n", + xenbus_frontend.error); + } } + xenbus_backend_device_register(); -#ifdef CONFIG_XEN_COMPAT_XENFS + if (!is_initial_xendomain()) + xenbus_probe(NULL); + + return 0; + + err: /* - * Create xenfs mountpoint in /proc for compatibility with - * utilities that expect to find "xenbus" under "/proc/xen". + * Do not unregister the xenbus front/backend buses here. The buses + * must exist because front/backend drivers will use them when they are + * registered. */ - proc_mkdir("xen", NULL); + + if (page != 0) + free_page(page); + return err; +} + +#ifdef CONFIG_XEN +postcore_initcall(xenbus_probe_init); +MODULE_LICENSE("Dual BSD/GPL"); +#else +int xenbus_init(void) +{ + return xenbus_probe_init(); +} #endif +static int is_device_connecting(struct device *dev, void *data) +{ + struct xenbus_device *xendev = to_xenbus_device(dev); + struct device_driver *drv = data; + struct xenbus_driver *xendrv; + + /* + * A device with no driver will never connect. We care only about + * devices which should currently be in the process of connecting. + */ + if (!dev->driver) + return 0; + + /* Is this search limited to a particular driver? */ + if (drv && (dev->driver != drv)) + return 0; + + xendrv = to_xenbus_driver(dev->driver); + return (xendev->state < XenbusStateConnected || + (xendev->state == XenbusStateConnected && + xendrv->is_ready && !xendrv->is_ready(xendev))); +} + +static int exists_connecting_device(struct device_driver *drv) +{ + if (xenbus_frontend.error) + return xenbus_frontend.error; + return bus_for_each_dev(&xenbus_frontend.bus, NULL, drv, + is_device_connecting); +} + +static int print_device_status(struct device *dev, void *data) +{ + struct xenbus_device *xendev = to_xenbus_device(dev); + struct device_driver *drv = data; + struct xenbus_driver *xendrv; + + /* Is this operation limited to a particular driver? */ + if (drv && (dev->driver != drv)) + return 0; + + if (!dev->driver) { + /* Information only: is this too noisy? */ + printk(KERN_INFO "XENBUS: Device with no driver: %s\n", + xendev->nodename); + return 0; + } + + if (xendev->state < XenbusStateConnected) { + enum xenbus_state rstate = XenbusStateUnknown; + if (xendev->otherend) + rstate = xenbus_read_driver_state(xendev->otherend); + printk(KERN_WARNING "XENBUS: Timeout connecting " + "to device: %s (local state %d, remote state %d)\n", + xendev->nodename, xendev->state, rstate); + } + + xendrv = to_xenbus_driver(dev->driver); + if (xendrv->is_ready && !xendrv->is_ready(xendev)) + printk(KERN_WARNING "XENBUS: Device not ready: %s\n", + xendev->nodename); + return 0; +} - out_error: - if (page != 0) - free_page(page); +/* We only wait for device setup after most initcalls have run. */ +static int ready_to_wait_for_devices; - return err; +/* + * On a 5-minute timeout, wait for all devices currently configured. We need + * to do this to guarantee that the filesystems and / or network devices + * needed for boot are available, before we can allow the boot to proceed. + * + * This needs to be on a late_initcall, to happen after the frontend device + * drivers have been initialised, but before the root fs is mounted. + * + * A possible improvement here would be to have the tools add a per-device + * flag to the store entry, indicating whether it is needed at boot time. + * This would allow people who knew what they were doing to accelerate their + * boot slightly, but of course needs tools or manual intervention to set up + * those flags correctly. + */ +static void wait_for_devices(struct xenbus_driver *xendrv) +{ + unsigned long start = jiffies; + struct device_driver *drv = xendrv ? &xendrv->driver : NULL; + unsigned int seconds_waited = 0; + + if (!ready_to_wait_for_devices || !is_running_on_xen()) + return; + + while (exists_connecting_device(drv)) { + if (time_after(jiffies, start + (seconds_waited+5)*HZ)) { + if (!seconds_waited) + printk(KERN_WARNING "XENBUS: Waiting for " + "devices to initialise: "); + seconds_waited += 5; + printk("%us...", 300 - seconds_waited); + if (seconds_waited == 300) + break; + } + + schedule_timeout_interruptible(HZ/10); + } + + if (seconds_waited) + printk("\n"); + + bus_for_each_dev(&xenbus_frontend.bus, NULL, drv, + print_device_status); } -postcore_initcall(xenbus_init); +#ifndef MODULE +static int __init boot_wait_for_devices(void) +{ + if (!xenbus_frontend.error) { + ready_to_wait_for_devices = 1; + wait_for_devices(NULL); + } + return 0; +} -MODULE_LICENSE("GPL"); +late_initcall(boot_wait_for_devices); +#endif + +int xenbus_for_each_frontend(void *arg, int (*fn)(struct device *, void *)) +{ + return bus_for_each_dev(&xenbus_frontend.bus, NULL, arg, fn); +} +EXPORT_SYMBOL_GPL(xenbus_for_each_frontend); --- head-2011-03-17.orig/drivers/xen/xenbus/xenbus_probe.h 2011-03-17 13:45:28.000000000 +0100 +++ head-2011-03-17/drivers/xen/xenbus/xenbus_probe.h 2011-01-31 16:11:24.000000000 +0100 @@ -34,44 +34,47 @@ #ifndef _XENBUS_PROBE_H #define _XENBUS_PROBE_H +#ifndef BUS_ID_SIZE #define XEN_BUS_ID_SIZE 20 +#else +#define XEN_BUS_ID_SIZE BUS_ID_SIZE +#endif + +#if defined(CONFIG_XEN_BACKEND) || defined(CONFIG_XEN_BACKEND_MODULE) +extern void xenbus_backend_suspend(int (*fn)(struct device *, void *)); +extern void xenbus_backend_resume(int (*fn)(struct device *, void *)); +extern void xenbus_backend_probe_and_watch(void); +extern void xenbus_backend_bus_register(void); +extern void xenbus_backend_device_register(void); +#else +static inline void xenbus_backend_suspend(int (*fn)(struct device *, void *)) {} +static inline void xenbus_backend_resume(int (*fn)(struct device *, void *)) {} +static inline void xenbus_backend_probe_and_watch(void) {} +static inline void xenbus_backend_bus_register(void) {} +static inline void xenbus_backend_device_register(void) {} +#endif struct xen_bus_type { char *root; + int error; unsigned int levels; int (*get_bus_id)(char bus_id[XEN_BUS_ID_SIZE], const char *nodename); - int (*probe)(struct xen_bus_type *bus, const char *type, - const char *dir); - void (*otherend_changed)(struct xenbus_watch *watch, const char **vec, - unsigned int len); + int (*probe)(const char *type, const char *dir); struct bus_type bus; + struct device dev; }; extern int xenbus_match(struct device *_dev, struct device_driver *_drv); extern int xenbus_dev_probe(struct device *_dev); extern int xenbus_dev_remove(struct device *_dev); extern int xenbus_register_driver_common(struct xenbus_driver *drv, - struct xen_bus_type *bus, - struct module *owner, - const char *mod_name); + struct xen_bus_type *bus); extern int xenbus_probe_node(struct xen_bus_type *bus, const char *type, const char *nodename); extern int xenbus_probe_devices(struct xen_bus_type *bus); -extern void xenbus_dev_changed(const char *node, struct xen_bus_type *bus); - -extern void xenbus_dev_shutdown(struct device *_dev); - -extern int xenbus_dev_suspend(struct device *dev, pm_message_t state); -extern int xenbus_dev_resume(struct device *dev); - -extern void xenbus_otherend_changed(struct xenbus_watch *watch, - const char **vec, unsigned int len, - int ignore_on_shutdown); - -extern int xenbus_read_otherend_details(struct xenbus_device *xendev, - char *id_node, char *path_node); +extern void dev_changed(const char *node, struct xen_bus_type *bus); #endif --- head-2011-03-17.orig/drivers/xen/xenbus/xenbus_probe_backend.c 2011-03-17 13:45:28.000000000 +0100 +++ head-2011-03-17/drivers/xen/xenbus/xenbus_probe_backend.c 2011-02-01 14:49:58.000000000 +0100 @@ -33,7 +33,7 @@ #define DPRINTK(fmt, args...) \ pr_debug("xenbus_probe (%s:%d) " fmt ".\n", \ - __func__, __LINE__, ##args) + __FUNCTION__, __LINE__, ##args) #include #include @@ -43,16 +43,35 @@ #include #include +#include #include +#include #include -#include #include #include +#include +#include #include #include "xenbus_comms.h" #include "xenbus_probe.h" +#ifdef HAVE_XEN_PLATFORM_COMPAT_H +#include +#endif + +static int xenbus_uevent_backend(struct device *dev, char **envp, + int num_envp, char *buffer, int buffer_size); +static int xenbus_probe_backend(const char *type, const char *domid); + +extern int read_otherend_details(struct xenbus_device *xendev, + char *id_node, char *path_node); + +static int read_frontend_details(struct xenbus_device *xendev) +{ + return read_otherend_details(xendev, "frontend-id", "frontend"); +} + /* backend/// => -- */ static int backend_bus_id(char bus_id[XEN_BUS_ID_SIZE], const char *nodename) { @@ -90,12 +109,32 @@ static int backend_bus_id(char bus_id[XE return 0; } -static int xenbus_uevent_backend(struct device *dev, - struct kobj_uevent_env *env) +static struct xen_bus_type xenbus_backend = { + .root = "backend", + .levels = 3, /* backend/type// */ + .get_bus_id = backend_bus_id, + .probe = xenbus_probe_backend, + .error = -ENODEV, + .bus = { + .name = "xen-backend", + .match = xenbus_match, + .probe = xenbus_dev_probe, + .remove = xenbus_dev_remove, +// .shutdown = xenbus_dev_shutdown, + .uevent = xenbus_uevent_backend, + }, + .dev = { + .bus_id = "xen-backend", + }, +}; + +static int xenbus_uevent_backend(struct device *dev, char **envp, + int num_envp, char *buffer, int buffer_size) { struct xenbus_device *xdev; struct xenbus_driver *drv; - struct xen_bus_type *bus; + int i = 0; + int length = 0; DPRINTK(""); @@ -103,32 +142,46 @@ static int xenbus_uevent_backend(struct return -ENODEV; xdev = to_xenbus_device(dev); - bus = container_of(xdev->dev.bus, struct xen_bus_type, bus); if (xdev == NULL) return -ENODEV; /* stuff we want to pass to /sbin/hotplug */ - if (add_uevent_var(env, "XENBUS_TYPE=%s", xdev->devicetype)) - return -ENOMEM; + add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, + "XENBUS_TYPE=%s", xdev->devicetype); - if (add_uevent_var(env, "XENBUS_PATH=%s", xdev->nodename)) - return -ENOMEM; + add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, + "XENBUS_PATH=%s", xdev->nodename); - if (add_uevent_var(env, "XENBUS_BASE_PATH=%s", bus->root)) - return -ENOMEM; + add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, + "XENBUS_BASE_PATH=%s", xenbus_backend.root); + + /* terminate, set to next free slot, shrink available space */ + envp[i] = NULL; + envp = &envp[i]; + num_envp -= i; + buffer = &buffer[length]; + buffer_size -= length; if (dev->driver) { drv = to_xenbus_driver(dev->driver); if (drv && drv->uevent) - return drv->uevent(xdev, env); + return drv->uevent(xdev, envp, num_envp, buffer, + buffer_size); } return 0; } +int xenbus_register_backend(struct xenbus_driver *drv) +{ + drv->read_otherend_details = read_frontend_details; + + return xenbus_register_driver_common(drv, &xenbus_backend); +} +EXPORT_SYMBOL_GPL(xenbus_register_backend); + /* backend/// */ -static int xenbus_probe_backend_unit(struct xen_bus_type *bus, - const char *dir, +static int xenbus_probe_backend_unit(const char *dir, const char *type, const char *name) { @@ -141,14 +194,13 @@ static int xenbus_probe_backend_unit(str DPRINTK("%s\n", nodename); - err = xenbus_probe_node(bus, type, nodename); + err = xenbus_probe_node(&xenbus_backend, type, nodename); kfree(nodename); return err; } /* backend// */ -static int xenbus_probe_backend(struct xen_bus_type *bus, const char *type, - const char *domid) +static int xenbus_probe_backend(const char *type, const char *domid) { char *nodename; int err = 0; @@ -157,7 +209,7 @@ static int xenbus_probe_backend(struct x DPRINTK(""); - nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", bus->root, type, domid); + nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", xenbus_backend.root, type, domid); if (!nodename) return -ENOMEM; @@ -168,7 +220,7 @@ static int xenbus_probe_backend(struct x } for (i = 0; i < dir_n; i++) { - err = xenbus_probe_backend_unit(bus, nodename, type, dir[i]); + err = xenbus_probe_backend_unit(nodename, type, dir[i]); if (err) break; } @@ -177,39 +229,12 @@ static int xenbus_probe_backend(struct x return err; } -static void frontend_changed(struct xenbus_watch *watch, - const char **vec, unsigned int len) -{ - xenbus_otherend_changed(watch, vec, len, 0); -} - -static struct device_attribute xenbus_backend_dev_attrs[] = { - __ATTR_NULL -}; - -static struct xen_bus_type xenbus_backend = { - .root = "backend", - .levels = 3, /* backend/type// */ - .get_bus_id = backend_bus_id, - .probe = xenbus_probe_backend, - .otherend_changed = frontend_changed, - .bus = { - .name = "xen-backend", - .match = xenbus_match, - .uevent = xenbus_uevent_backend, - .probe = xenbus_dev_probe, - .remove = xenbus_dev_remove, - .shutdown = xenbus_dev_shutdown, - .dev_attrs = xenbus_backend_dev_attrs, - }, -}; - static void backend_changed(struct xenbus_watch *watch, const char **vec, unsigned int len) { DPRINTK(""); - xenbus_dev_changed(vec[XS_WATCH_PATH], &xenbus_backend); + dev_changed(vec[XS_WATCH_PATH], &xenbus_backend); } static struct xenbus_watch be_watch = { @@ -217,60 +242,51 @@ static struct xenbus_watch be_watch = { .callback = backend_changed, }; -static int read_frontend_details(struct xenbus_device *xendev) -{ - return xenbus_read_otherend_details(xendev, "frontend-id", "frontend"); -} - -int xenbus_dev_is_online(struct xenbus_device *dev) +void xenbus_backend_suspend(int (*fn)(struct device *, void *)) { - int rc, val; - - rc = xenbus_scanf(XBT_NIL, dev->nodename, "online", "%d", &val); - if (rc != 1) - val = 0; /* no online node present */ - - return val; + DPRINTK(""); + if (!xenbus_backend.error) + bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, fn); } -EXPORT_SYMBOL_GPL(xenbus_dev_is_online); -int __xenbus_register_backend(struct xenbus_driver *drv, - struct module *owner, const char *mod_name) +void xenbus_backend_resume(int (*fn)(struct device *, void *)) { - drv->read_otherend_details = read_frontend_details; - - return xenbus_register_driver_common(drv, &xenbus_backend, - owner, mod_name); + DPRINTK(""); + if (!xenbus_backend.error) + bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, fn); } -EXPORT_SYMBOL_GPL(__xenbus_register_backend); -static int backend_probe_and_watch(struct notifier_block *notifier, - unsigned long event, - void *data) +void xenbus_backend_probe_and_watch(void) { - /* Enumerate devices in xenstore and watch for changes. */ xenbus_probe_devices(&xenbus_backend); register_xenbus_watch(&be_watch); - - return NOTIFY_DONE; } -static int __init xenbus_probe_backend_init(void) +void xenbus_backend_bus_register(void) { - static struct notifier_block xenstore_notifier = { - .notifier_call = backend_probe_and_watch - }; - int err; - - DPRINTK(""); - - /* Register ourselves with the kernel bus subsystem */ - err = bus_register(&xenbus_backend.bus); - if (err) - return err; - - register_xenstore_notifier(&xenstore_notifier); + xenbus_backend.error = bus_register(&xenbus_backend.bus); + if (xenbus_backend.error) + printk(KERN_WARNING + "XENBUS: Error registering backend bus: %i\n", + xenbus_backend.error); +} + +void xenbus_backend_device_register(void) +{ + if (xenbus_backend.error) + return; + + xenbus_backend.error = device_register(&xenbus_backend.dev); + if (xenbus_backend.error) { + bus_unregister(&xenbus_backend.bus); + printk(KERN_WARNING + "XENBUS: Error registering backend device: %i\n", + xenbus_backend.error); + } +} - return 0; +int xenbus_for_each_backend(void *arg, int (*fn)(struct device *, void *)) +{ + return bus_for_each_dev(&xenbus_backend.bus, NULL, arg, fn); } -subsys_initcall(xenbus_probe_backend_init); +EXPORT_SYMBOL_GPL(xenbus_for_each_backend); --- head-2011-03-17.orig/drivers/xen/xenbus/xenbus_xs.c 2011-03-17 13:45:28.000000000 +0100 +++ head-2011-03-17/drivers/xen/xenbus/xenbus_xs.c 2011-01-31 15:14:12.000000000 +0100 @@ -47,6 +47,14 @@ #include #include "xenbus_comms.h" +#ifdef HAVE_XEN_PLATFORM_COMPAT_H +#include +#endif + +#ifndef PF_NOFREEZE /* Old kernel (pre-2.6.6). */ +#define PF_NOFREEZE 0 +#endif + struct xs_stored_msg { struct list_head list; @@ -118,7 +126,7 @@ static DEFINE_SPINLOCK(watch_events_lock * carrying out work. */ static pid_t xenwatch_pid; -static DEFINE_MUTEX(xenwatch_mutex); +/* static */ DEFINE_MUTEX(xenwatch_mutex); static DECLARE_WAIT_QUEUE_HEAD(watch_events_waitq); static int get_error(const char *errorstring) @@ -212,14 +220,13 @@ void *xenbus_dev_request_and_reply(struc mutex_unlock(&xs_state.request_mutex); - if ((msg->type == XS_TRANSACTION_END) || + if ((req_msg.type == XS_TRANSACTION_END) || ((req_msg.type == XS_TRANSACTION_START) && (msg->type == XS_ERROR))) transaction_end(); return ret; } -EXPORT_SYMBOL(xenbus_dev_request_and_reply); /* Send message to xs, get kmalloc'ed reply. ERR_PTR() on error. */ static void *xs_talkv(struct xenbus_transaction t, @@ -330,7 +337,7 @@ static char **split(char *strings, unsig char *p, **ret; /* Count the strings. */ - *num = count_strings(strings, len); + *num = count_strings(strings, len) + 1; /* Transfer to one big alloc for easy freeing. */ ret = kmalloc(*num * sizeof(char *) + len, GFP_NOIO | __GFP_HIGH); @@ -344,6 +351,7 @@ static char **split(char *strings, unsig strings = (char *)&ret[*num]; for (p = strings, *num = 0; p < strings + len; p += strlen(p) + 1) ret[(*num)++] = p; + ret[*num] = strings + len; return ret; } @@ -657,6 +665,8 @@ void unregister_xenbus_watch(struct xenb char token[sizeof(watch) * 2 + 1]; int err; + BUG_ON(watch->flags & XBWF_new_thread); + sprintf(token, "%lX", (long)watch); down_read(&xs_state.watch_mutex); @@ -674,11 +684,6 @@ void unregister_xenbus_watch(struct xenb up_read(&xs_state.watch_mutex); - /* Make sure there are no callbacks running currently (unless - its us) */ - if (current->pid != xenwatch_pid) - mutex_lock(&xenwatch_mutex); - /* Cancel pending watch events. */ spin_lock(&watch_events_lock); list_for_each_entry_safe(msg, tmp, &watch_events, list) { @@ -690,8 +695,11 @@ void unregister_xenbus_watch(struct xenb } spin_unlock(&watch_events_lock); - if (current->pid != xenwatch_pid) + /* Flush any currently-executing callback, unless we are it. :-) */ + if (current->pid != xenwatch_pid) { + mutex_lock(&xenwatch_mutex); mutex_unlock(&xenwatch_mutex); + } } EXPORT_SYMBOL_GPL(unregister_xenbus_watch); @@ -708,8 +716,6 @@ void xs_resume(void) struct xenbus_watch *watch; char token[sizeof(watch) * 2 + 1]; - xb_init_comms(); - mutex_unlock(&xs_state.response_mutex); mutex_unlock(&xs_state.request_mutex); transaction_resume(); @@ -731,11 +737,30 @@ void xs_suspend_cancel(void) mutex_unlock(&xs_state.transaction_mutex); } +static int xenwatch_handle_callback(void *data) +{ + struct xs_stored_msg *msg = data; + + msg->u.watch.handle->callback(msg->u.watch.handle, + (const char **)msg->u.watch.vec, + msg->u.watch.vec_size); + + kfree(msg->u.watch.vec); + kfree(msg); + + /* Kill this kthread if we were spawned just for this callback. */ + if (current->pid != xenwatch_pid) + do_exit(0); + + return 0; +} + static int xenwatch_thread(void *unused) { struct list_head *ent; struct xs_stored_msg *msg; + current->flags |= PF_NOFREEZE; for (;;) { wait_event_interruptible(watch_events_waitq, !list_empty(&watch_events)); @@ -751,17 +776,29 @@ static int xenwatch_thread(void *unused) list_del(ent); spin_unlock(&watch_events_lock); - if (ent != &watch_events) { - msg = list_entry(ent, struct xs_stored_msg, list); - msg->u.watch.handle->callback( - msg->u.watch.handle, - (const char **)msg->u.watch.vec, - msg->u.watch.vec_size); - kfree(msg->u.watch.vec); - kfree(msg); + if (ent == &watch_events) { + mutex_unlock(&xenwatch_mutex); + continue; } - mutex_unlock(&xenwatch_mutex); + msg = list_entry(ent, struct xs_stored_msg, list); + + /* + * Unlock the mutex before running an XBWF_new_thread + * handler. kthread_run can block which can deadlock + * against unregister_xenbus_watch() if we need to + * unregister other watches in order to make + * progress. This can occur on resume before the swap + * device is attached. + */ + if (msg->u.watch.handle->flags & XBWF_new_thread) { + mutex_unlock(&xenwatch_mutex); + kthread_run(xenwatch_handle_callback, + msg, "xenwatch_cb"); + } else { + xenwatch_handle_callback(msg); + mutex_unlock(&xenwatch_mutex); + } } return 0; @@ -855,6 +892,7 @@ static int xenbus_thread(void *unused) { int err; + current->flags |= PF_NOFREEZE; for (;;) { err = process_msg(); if (err) @@ -869,7 +907,6 @@ static int xenbus_thread(void *unused) int xs_init(void) { - int err; struct task_struct *task; INIT_LIST_HEAD(&xs_state.reply_list); @@ -883,11 +920,6 @@ int xs_init(void) atomic_set(&xs_state.transaction_count, 0); init_waitqueue_head(&xs_state.transaction_wq); - /* Initialize the shared memory rings to talk to xenstored */ - err = xb_init_comms(); - if (err) - return err; - task = kthread_run(xenwatch_thread, NULL, "xenwatch"); if (IS_ERR(task)) return PTR_ERR(task); --- head-2011-03-17.orig/include/xen/evtchn.h 2011-03-17 13:45:28.000000000 +0100 +++ head-2011-03-17/include/xen/evtchn.h 2011-01-31 15:14:12.000000000 +0100 @@ -1,7 +1,11 @@ +#if defined(CONFIG_PARAVIRT_XEN) || !defined(__KERNEL__) +#include "public/evtchn.h" +#else /****************************************************************************** * evtchn.h * - * Interface to /dev/xen/evtchn. + * Communication via Xen event channels. + * Also definitions for the device that demuxes notifications to userspace. * * Copyright (c) 2003-2005, K A Fraser * @@ -30,59 +34,117 @@ * IN THE SOFTWARE. */ -#ifndef __LINUX_PUBLIC_EVTCHN_H__ -#define __LINUX_PUBLIC_EVTCHN_H__ +#ifndef __ASM_EVTCHN_H__ +#define __ASM_EVTCHN_H__ -/* - * Bind a fresh port to VIRQ @virq. - * Return allocated port. - */ -#define IOCTL_EVTCHN_BIND_VIRQ \ - _IOC(_IOC_NONE, 'E', 0, sizeof(struct ioctl_evtchn_bind_virq)) -struct ioctl_evtchn_bind_virq { - unsigned int virq; -}; +#include +#include +#include +#include +#include +#include /* - * Bind a fresh port to remote <@remote_domain, @remote_port>. - * Return allocated port. + * LOW-LEVEL DEFINITIONS */ -#define IOCTL_EVTCHN_BIND_INTERDOMAIN \ - _IOC(_IOC_NONE, 'E', 1, sizeof(struct ioctl_evtchn_bind_interdomain)) -struct ioctl_evtchn_bind_interdomain { - unsigned int remote_domain, remote_port; -}; /* - * Allocate a fresh port for binding to @remote_domain. - * Return allocated port. - */ -#define IOCTL_EVTCHN_BIND_UNBOUND_PORT \ - _IOC(_IOC_NONE, 'E', 2, sizeof(struct ioctl_evtchn_bind_unbound_port)) -struct ioctl_evtchn_bind_unbound_port { - unsigned int remote_domain; -}; + * Dynamically bind an event source to an IRQ-like callback handler. + * On some platforms this may not be implemented via the Linux IRQ subsystem. + * The IRQ argument passed to the callback handler is the same as returned + * from the bind call. It may not correspond to a Linux IRQ number. + * Returns IRQ or negative errno. + */ +int bind_caller_port_to_irqhandler( + unsigned int caller_port, + irqreturn_t (*handler)(int, void *, struct pt_regs *), + unsigned long irqflags, + const char *devname, + void *dev_id); +int bind_listening_port_to_irqhandler( + unsigned int remote_domain, + irqreturn_t (*handler)(int, void *, struct pt_regs *), + unsigned long irqflags, + const char *devname, + void *dev_id); +int bind_interdomain_evtchn_to_irqhandler( + unsigned int remote_domain, + unsigned int remote_port, + irqreturn_t (*handler)(int, void *, struct pt_regs *), + unsigned long irqflags, + const char *devname, + void *dev_id); +int bind_virq_to_irqhandler( + unsigned int virq, + unsigned int cpu, + irqreturn_t (*handler)(int, void *, struct pt_regs *), + unsigned long irqflags, + const char *devname, + void *dev_id); +int bind_ipi_to_irqhandler( + unsigned int ipi, + unsigned int cpu, + irqreturn_t (*handler)(int, void *, struct pt_regs *), + unsigned long irqflags, + const char *devname, + void *dev_id); /* - * Unbind previously allocated @port. - */ -#define IOCTL_EVTCHN_UNBIND \ - _IOC(_IOC_NONE, 'E', 3, sizeof(struct ioctl_evtchn_unbind)) -struct ioctl_evtchn_unbind { - unsigned int port; -}; + * Common unbind function for all event sources. Takes IRQ to unbind from. + * Automatically closes the underlying event channel (except for bindings + * made with bind_caller_port_to_irqhandler()). + */ +void unbind_from_irqhandler(unsigned int irq, void *dev_id); + +void irq_resume(void); + +/* Entry point for notifications into Linux subsystems. */ +asmlinkage void evtchn_do_upcall(struct pt_regs *regs); + +/* Entry point for notifications into the userland character device. */ +void evtchn_device_upcall(int port); + +/* Mark a PIRQ as unavailable for dynamic allocation. */ +void evtchn_register_pirq(int irq); +/* Map a Xen-supplied PIRQ to a dynamically allocated one. */ +int evtchn_map_pirq(int irq, int xen_pirq); +/* Look up a Xen-supplied PIRQ for a dynamically allocated one. */ +int evtchn_get_xen_pirq(int irq); + +void mask_evtchn(int port); +void disable_all_local_evtchn(void); +void unmask_evtchn(int port); + +#ifdef CONFIG_SMP +void rebind_evtchn_to_cpu(int port, unsigned int cpu); +#else +#define rebind_evtchn_to_cpu(port, cpu) ((void)0) +#endif + +static inline int test_and_set_evtchn_mask(int port) +{ + shared_info_t *s = HYPERVISOR_shared_info; + return synch_test_and_set_bit(port, s->evtchn_mask); +} + +static inline void clear_evtchn(int port) +{ + shared_info_t *s = HYPERVISOR_shared_info; + synch_clear_bit(port, s->evtchn_pending); +} + +static inline void notify_remote_via_evtchn(int port) +{ + struct evtchn_send send = { .port = port }; + VOID(HYPERVISOR_event_channel_op(EVTCHNOP_send, &send)); +} /* - * Unbind previously allocated @port. + * Use these to access the event channel underlying the IRQ handle returned + * by bind_*_to_irqhandler(). */ -#define IOCTL_EVTCHN_NOTIFY \ - _IOC(_IOC_NONE, 'E', 4, sizeof(struct ioctl_evtchn_notify)) -struct ioctl_evtchn_notify { - unsigned int port; -}; - -/* Clear and reinitialise the event buffer. Clear error condition. */ -#define IOCTL_EVTCHN_RESET \ - _IOC(_IOC_NONE, 'E', 5, 0) +void notify_remote_via_irq(int irq); +int irq_to_evtchn_port(int irq); -#endif /* __LINUX_PUBLIC_EVTCHN_H__ */ +#endif /* __ASM_EVTCHN_H__ */ +#endif /* CONFIG_PARAVIRT_XEN */ --- head-2011-03-17.orig/include/xen/hvm.h 2011-03-17 13:45:28.000000000 +0100 +++ head-2011-03-17/include/xen/hvm.h 2011-01-31 15:14:12.000000000 +0100 @@ -3,9 +3,8 @@ #define XEN_HVM_H__ #include -#include -static inline int hvm_get_parameter(int idx, uint64_t *value) +static inline unsigned long hvm_get_parameter(int idx) { struct xen_hvm_param xhv; int r; @@ -16,15 +15,9 @@ static inline int hvm_get_parameter(int if (r < 0) { printk(KERN_ERR "Cannot get hvm parameter %d: %d!\n", idx, r); - return r; + return 0; } - *value = xhv.value; - return r; + return xhv.value; } -#define HVM_CALLBACK_VIA_TYPE_VECTOR 0x2 -#define HVM_CALLBACK_VIA_TYPE_SHIFT 56 -#define HVM_CALLBACK_VECTOR(x) (((uint64_t)HVM_CALLBACK_VIA_TYPE_VECTOR)<<\ - HVM_CALLBACK_VIA_TYPE_SHIFT | (x)) - #endif /* XEN_HVM_H__ */ --- head-2011-03-17.orig/include/xen/interface/callback.h 2011-03-17 13:45:28.000000000 +0100 +++ head-2011-03-17/include/xen/interface/callback.h 2011-01-31 15:14:12.000000000 +0100 @@ -86,6 +86,8 @@ struct callback_register { uint16_t flags; xen_callback_t address; }; +typedef struct callback_register callback_register_t; +DEFINE_XEN_GUEST_HANDLE(callback_register_t); /* * Unregister a callback. @@ -98,5 +100,12 @@ struct callback_unregister { uint16_t type; uint16_t _unused; }; +typedef struct callback_unregister callback_unregister_t; +DEFINE_XEN_GUEST_HANDLE(callback_unregister_t); + +#if __XEN_INTERFACE_VERSION__ < 0x00030207 +#undef CALLBACKTYPE_sysenter +#define CALLBACKTYPE_sysenter CALLBACKTYPE_sysenter_deprecated +#endif #endif /* __XEN_PUBLIC_CALLBACK_H__ */ --- head-2011-03-17.orig/include/xen/interface/elfnote.h 2011-03-17 13:45:28.000000000 +0100 +++ head-2011-03-17/include/xen/interface/elfnote.h 2011-03-17 13:50:24.000000000 +0100 @@ -3,6 +3,24 @@ * * Definitions used for the Xen ELF notes. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Copyright (c) 2006, Ian Campbell, XenSource Ltd. */ @@ -10,7 +28,7 @@ #define __XEN_PUBLIC_ELFNOTE_H__ /* - * The notes should live in a SHT_NOTE segment and have "Xen" in the + * The notes should live in a PT_NOTE segment and have "Xen" in the * name field. * * Numeric types are either 4 or 8 bytes depending on the content of @@ -22,8 +40,6 @@ /* * NAME=VALUE pair (string). - * - * LEGACY: FEATURES and PAE */ #define XEN_ELFNOTE_INFO 0 @@ -90,7 +106,12 @@ #define XEN_ELFNOTE_LOADER 8 /* - * The kernel supports PAE (x86/32 only, string = "yes" or "no"). + * The kernel supports PAE (x86/32 only, string = "yes", "no" or + * "bimodal"). + * + * For compatibility with Xen 3.0.3 and earlier the "bimodal" setting + * may be given as "yes,bimodal" which will cause older Xen to treat + * this kernel as PAE. * * LEGACY: PAE (n.b. The legacy interface included a provision to * indicate 'extended-cr3' support allowing L3 page tables to be @@ -140,6 +161,82 @@ */ #define XEN_ELFNOTE_SUSPEND_CANCEL 14 +/* + * The (non-default) location the initial phys-to-machine map should be + * placed at by the hypervisor (Dom0) or the tools (DomU). + * The kernel must be prepared for this mapping to be established using + * large pages, despite such otherwise not being available to guests. + * The kernel must also be able to handle the page table pages used for + * this mapping not being accessible through the initial mapping. + * (Only x86-64 supports this at present.) + */ +#define XEN_ELFNOTE_INIT_P2M 15 + +/* + * Whether or not the guest can deal with being passed an initrd not + * mapped through its initial page tables. + */ +#define XEN_ELFNOTE_MOD_START_PFN 16 + +/* + * The number of the highest elfnote defined. + */ +#define XEN_ELFNOTE_MAX XEN_ELFNOTE_MOD_START_PFN + +/* + * System information exported through crash notes. + * + * The kexec / kdump code will create one XEN_ELFNOTE_CRASH_INFO + * note in case of a system crash. This note will contain various + * information about the system, see xen/include/xen/elfcore.h. + */ +#define XEN_ELFNOTE_CRASH_INFO 0x1000001 + +/* + * System registers exported through crash notes. + * + * The kexec / kdump code will create one XEN_ELFNOTE_CRASH_REGS + * note per cpu in case of a system crash. This note is architecture + * specific and will contain registers not saved in the "CORE" note. + * See xen/include/xen/elfcore.h for more information. + */ +#define XEN_ELFNOTE_CRASH_REGS 0x1000002 + + +/* + * xen dump-core none note. + * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_NONE + * in its dump file to indicate that the file is xen dump-core + * file. This note doesn't have any other information. + * See tools/libxc/xc_core.h for more information. + */ +#define XEN_ELFNOTE_DUMPCORE_NONE 0x2000000 + +/* + * xen dump-core header note. + * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_HEADER + * in its dump file. + * See tools/libxc/xc_core.h for more information. + */ +#define XEN_ELFNOTE_DUMPCORE_HEADER 0x2000001 + +/* + * xen dump-core xen version note. + * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_XEN_VERSION + * in its dump file. It contains the xen version obtained via the + * XENVER hypercall. + * See tools/libxc/xc_core.h for more information. + */ +#define XEN_ELFNOTE_DUMPCORE_XEN_VERSION 0x2000002 + +/* + * xen dump-core format version note. + * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION + * in its dump file. It contains a format version identifier. + * See tools/libxc/xc_core.h for more information. + */ +#define XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION 0x2000003 + #endif /* __XEN_PUBLIC_ELFNOTE_H__ */ /* --- head-2011-03-17.orig/include/xen/interface/event_channel.h 2011-03-17 13:45:28.000000000 +0100 +++ head-2011-03-17/include/xen/interface/event_channel.h 2011-01-31 15:14:12.000000000 +0100 @@ -3,6 +3,24 @@ * * Event channels between domains. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Copyright (c) 2003-2004, K A Fraser. */ @@ -11,8 +29,15 @@ #include +/* + * Prototype for this hypercall is: + * int event_channel_op(int cmd, void *args) + * @cmd == EVTCHNOP_??? (event-channel operation). + * @args == Operation-specific extra arguments (NULL if none). + */ + typedef uint32_t evtchn_port_t; -DEFINE_GUEST_HANDLE(evtchn_port_t); +DEFINE_XEN_GUEST_HANDLE(evtchn_port_t); /* * EVTCHNOP_alloc_unbound: Allocate a port in domain and mark as @@ -22,13 +47,14 @@ DEFINE_GUEST_HANDLE(evtchn_port_t); * 1. If the caller is unprivileged then must be DOMID_SELF. * 2. may be DOMID_SELF, allowing loopback connections. */ -#define EVTCHNOP_alloc_unbound 6 +#define EVTCHNOP_alloc_unbound 6 struct evtchn_alloc_unbound { - /* IN parameters */ - domid_t dom, remote_dom; - /* OUT parameters */ - evtchn_port_t port; + /* IN parameters */ + domid_t dom, remote_dom; + /* OUT parameters */ + evtchn_port_t port; }; +typedef struct evtchn_alloc_unbound evtchn_alloc_unbound_t; /* * EVTCHNOP_bind_interdomain: Construct an interdomain event channel between @@ -41,29 +67,35 @@ struct evtchn_alloc_unbound { */ #define EVTCHNOP_bind_interdomain 0 struct evtchn_bind_interdomain { - /* IN parameters. */ - domid_t remote_dom; - evtchn_port_t remote_port; - /* OUT parameters. */ - evtchn_port_t local_port; + /* IN parameters. */ + domid_t remote_dom; + evtchn_port_t remote_port; + /* OUT parameters. */ + evtchn_port_t local_port; }; +typedef struct evtchn_bind_interdomain evtchn_bind_interdomain_t; /* * EVTCHNOP_bind_virq: Bind a local event channel to VIRQ on specified * vcpu. * NOTES: - * 1. A virtual IRQ may be bound to at most one event channel per vcpu. - * 2. The allocated event channel is bound to the specified vcpu. The binding - * may not be changed. + * 1. Virtual IRQs are classified as per-vcpu or global. See the VIRQ list + * in xen.h for the classification of each VIRQ. + * 2. Global VIRQs must be allocated on VCPU0 but can subsequently be + * re-bound via EVTCHNOP_bind_vcpu. + * 3. Per-vcpu VIRQs may be bound to at most one event channel per vcpu. + * The allocated event channel is bound to the specified vcpu and the + * binding cannot be changed. */ -#define EVTCHNOP_bind_virq 1 +#define EVTCHNOP_bind_virq 1 struct evtchn_bind_virq { - /* IN parameters. */ - uint32_t virq; - uint32_t vcpu; - /* OUT parameters. */ - evtchn_port_t port; + /* IN parameters. */ + uint32_t virq; + uint32_t vcpu; + /* OUT parameters. */ + evtchn_port_t port; }; +typedef struct evtchn_bind_virq evtchn_bind_virq_t; /* * EVTCHNOP_bind_pirq: Bind a local event channel to PIRQ . @@ -71,15 +103,16 @@ struct evtchn_bind_virq { * 1. A physical IRQ may be bound to at most one event channel per domain. * 2. Only a sufficiently-privileged domain may bind to a physical IRQ. */ -#define EVTCHNOP_bind_pirq 2 +#define EVTCHNOP_bind_pirq 2 struct evtchn_bind_pirq { - /* IN parameters. */ - uint32_t pirq; + /* IN parameters. */ + uint32_t pirq; #define BIND_PIRQ__WILL_SHARE 1 - uint32_t flags; /* BIND_PIRQ__* */ - /* OUT parameters. */ - evtchn_port_t port; + uint32_t flags; /* BIND_PIRQ__* */ + /* OUT parameters. */ + evtchn_port_t port; }; +typedef struct evtchn_bind_pirq evtchn_bind_pirq_t; /* * EVTCHNOP_bind_ipi: Bind a local event channel to receive events. @@ -87,33 +120,36 @@ struct evtchn_bind_pirq { * 1. The allocated event channel is bound to the specified vcpu. The binding * may not be changed. */ -#define EVTCHNOP_bind_ipi 7 +#define EVTCHNOP_bind_ipi 7 struct evtchn_bind_ipi { - uint32_t vcpu; - /* OUT parameters. */ - evtchn_port_t port; + uint32_t vcpu; + /* OUT parameters. */ + evtchn_port_t port; }; +typedef struct evtchn_bind_ipi evtchn_bind_ipi_t; /* * EVTCHNOP_close: Close a local event channel . If the channel is * interdomain then the remote end is placed in the unbound state * (EVTCHNSTAT_unbound), awaiting a new connection. */ -#define EVTCHNOP_close 3 +#define EVTCHNOP_close 3 struct evtchn_close { - /* IN parameters. */ - evtchn_port_t port; + /* IN parameters. */ + evtchn_port_t port; }; +typedef struct evtchn_close evtchn_close_t; /* * EVTCHNOP_send: Send an event to the remote end of the channel whose local * endpoint is . */ -#define EVTCHNOP_send 4 +#define EVTCHNOP_send 4 struct evtchn_send { - /* IN parameters. */ - evtchn_port_t port; + /* IN parameters. */ + evtchn_port_t port; }; +typedef struct evtchn_send evtchn_send_t; /* * EVTCHNOP_status: Get the current status of the communication channel which @@ -123,75 +159,98 @@ struct evtchn_send { * 2. Only a sufficiently-privileged domain may obtain the status of an event * channel for which is not DOMID_SELF. */ -#define EVTCHNOP_status 5 +#define EVTCHNOP_status 5 struct evtchn_status { - /* IN parameters */ - domid_t dom; - evtchn_port_t port; - /* OUT parameters */ -#define EVTCHNSTAT_closed 0 /* Channel is not in use. */ -#define EVTCHNSTAT_unbound 1 /* Channel is waiting interdom connection.*/ -#define EVTCHNSTAT_interdomain 2 /* Channel is connected to remote domain. */ -#define EVTCHNSTAT_pirq 3 /* Channel is bound to a phys IRQ line. */ -#define EVTCHNSTAT_virq 4 /* Channel is bound to a virtual IRQ line */ -#define EVTCHNSTAT_ipi 5 /* Channel is bound to a virtual IPI line */ - uint32_t status; - uint32_t vcpu; /* VCPU to which this channel is bound. */ - union { - struct { - domid_t dom; - } unbound; /* EVTCHNSTAT_unbound */ - struct { - domid_t dom; - evtchn_port_t port; - } interdomain; /* EVTCHNSTAT_interdomain */ - uint32_t pirq; /* EVTCHNSTAT_pirq */ - uint32_t virq; /* EVTCHNSTAT_virq */ - } u; + /* IN parameters */ + domid_t dom; + evtchn_port_t port; + /* OUT parameters */ +#define EVTCHNSTAT_closed 0 /* Channel is not in use. */ +#define EVTCHNSTAT_unbound 1 /* Channel is waiting interdom connection.*/ +#define EVTCHNSTAT_interdomain 2 /* Channel is connected to remote domain. */ +#define EVTCHNSTAT_pirq 3 /* Channel is bound to a phys IRQ line. */ +#define EVTCHNSTAT_virq 4 /* Channel is bound to a virtual IRQ line */ +#define EVTCHNSTAT_ipi 5 /* Channel is bound to a virtual IPI line */ + uint32_t status; + uint32_t vcpu; /* VCPU to which this channel is bound. */ + union { + struct { + domid_t dom; + } unbound; /* EVTCHNSTAT_unbound */ + struct { + domid_t dom; + evtchn_port_t port; + } interdomain; /* EVTCHNSTAT_interdomain */ + uint32_t pirq; /* EVTCHNSTAT_pirq */ + uint32_t virq; /* EVTCHNSTAT_virq */ + } u; }; +typedef struct evtchn_status evtchn_status_t; /* * EVTCHNOP_bind_vcpu: Specify which vcpu a channel should notify when an * event is pending. * NOTES: - * 1. IPI- and VIRQ-bound channels always notify the vcpu that initialised - * the binding. This binding cannot be changed. - * 2. All other channels notify vcpu0 by default. This default is set when + * 1. IPI-bound channels always notify the vcpu specified at bind time. + * This binding cannot be changed. + * 2. Per-VCPU VIRQ channels always notify the vcpu specified at bind time. + * This binding cannot be changed. + * 3. All other channels notify vcpu0 by default. This default is set when * the channel is allocated (a port that is freed and subsequently reused * has its binding reset to vcpu0). */ -#define EVTCHNOP_bind_vcpu 8 +#define EVTCHNOP_bind_vcpu 8 struct evtchn_bind_vcpu { - /* IN parameters. */ - evtchn_port_t port; - uint32_t vcpu; + /* IN parameters. */ + evtchn_port_t port; + uint32_t vcpu; }; +typedef struct evtchn_bind_vcpu evtchn_bind_vcpu_t; /* * EVTCHNOP_unmask: Unmask the specified local event-channel port and deliver * a notification to the appropriate VCPU if an event is pending. */ -#define EVTCHNOP_unmask 9 +#define EVTCHNOP_unmask 9 struct evtchn_unmask { - /* IN parameters. */ - evtchn_port_t port; + /* IN parameters. */ + evtchn_port_t port; }; +typedef struct evtchn_unmask evtchn_unmask_t; +/* + * EVTCHNOP_reset: Close all event channels associated with specified domain. + * NOTES: + * 1. may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may specify other than DOMID_SELF. + */ +#define EVTCHNOP_reset 10 +struct evtchn_reset { + /* IN parameters. */ + domid_t dom; +}; +typedef struct evtchn_reset evtchn_reset_t; + +/* + * Argument to event_channel_op_compat() hypercall. Superceded by new + * event_channel_op() hypercall since 0x00030202. + */ struct evtchn_op { - uint32_t cmd; /* EVTCHNOP_* */ - union { - struct evtchn_alloc_unbound alloc_unbound; - struct evtchn_bind_interdomain bind_interdomain; - struct evtchn_bind_virq bind_virq; - struct evtchn_bind_pirq bind_pirq; - struct evtchn_bind_ipi bind_ipi; - struct evtchn_close close; - struct evtchn_send send; - struct evtchn_status status; - struct evtchn_bind_vcpu bind_vcpu; - struct evtchn_unmask unmask; - } u; + uint32_t cmd; /* EVTCHNOP_* */ + union { + struct evtchn_alloc_unbound alloc_unbound; + struct evtchn_bind_interdomain bind_interdomain; + struct evtchn_bind_virq bind_virq; + struct evtchn_bind_pirq bind_pirq; + struct evtchn_bind_ipi bind_ipi; + struct evtchn_close close; + struct evtchn_send send; + struct evtchn_status status; + struct evtchn_bind_vcpu bind_vcpu; + struct evtchn_unmask unmask; + } u; }; -DEFINE_GUEST_HANDLE_STRUCT(evtchn_op); +typedef struct evtchn_op evtchn_op_t; +DEFINE_XEN_GUEST_HANDLE(evtchn_op_t); #endif /* __XEN_PUBLIC_EVENT_CHANNEL_H__ */ --- head-2011-03-17.orig/include/xen/interface/features.h 2011-03-17 13:45:28.000000000 +0100 +++ head-2011-03-17/include/xen/interface/features.h 2011-01-31 15:14:12.000000000 +0100 @@ -3,6 +3,24 @@ * * Feature flags, reported by XENVER_get_features. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Copyright (c) 2006, Keir Fraser */ @@ -41,6 +59,15 @@ /* x86: Does this Xen host support the MMU_PT_UPDATE_PRESERVE_AD hypercall? */ #define XENFEAT_mmu_pt_update_preserve_ad 5 +/* x86: Does this Xen host support the MMU_{CLEAR,COPY}_PAGE hypercall? */ +#define XENFEAT_highmem_assist 6 + +/* + * If set, GNTTABOP_map_grant_ref honors flags to be placed into guest kernel + * available pte bits. + */ +#define XENFEAT_gnttab_map_avail_bits 7 + /* x86: Does this Xen host support the HVM callback vector type? */ #define XENFEAT_hvm_callback_vector 8 --- head-2011-03-17.orig/include/xen/interface/grant_table.h 2011-03-17 13:45:28.000000000 +0100 +++ head-2011-03-17/include/xen/interface/grant_table.h 2011-01-31 15:14:12.000000000 +0100 @@ -85,12 +85,26 @@ */ /* + * Reference to a grant entry in a specified domain's grant table. + */ +typedef uint32_t grant_ref_t; + +/* * A grant table comprises a packed array of grant entries in one or more * page frames shared between Xen and a guest. * [XEN]: This field is written by Xen and read by the sharing guest. * [GST]: This field is written by the guest and read by Xen. */ -struct grant_entry { + +/* + * Version 1 of the grant table entry structure is maintained purely + * for backwards compatibility. New guests should use version 2. + */ +#if __XEN_INTERFACE_VERSION__ < 0x0003020a +#define grant_entry_v1 grant_entry +#define grant_entry_v1_t grant_entry_t +#endif +struct grant_entry_v1 { /* GTF_xxx: various type and flag information. [XEN,GST] */ uint16_t flags; /* The domain being granted foreign privileges. [GST] */ @@ -101,6 +115,7 @@ struct grant_entry { */ uint32_t frame; }; +typedef struct grant_entry_v1 grant_entry_v1_t; /* * Type of grant entry. @@ -108,10 +123,13 @@ struct grant_entry { * GTF_permit_access: Allow @domid to map/access @frame. * GTF_accept_transfer: Allow @domid to transfer ownership of one page frame * to this guest. Xen writes the page number to @frame. + * GTF_transitive: Allow @domid to transitively access a subrange of + * @trans_grant in @trans_domid. No mappings are allowed. */ #define GTF_invalid (0U<<0) #define GTF_permit_access (1U<<0) #define GTF_accept_transfer (2U<<0) +#define GTF_transitive (3U<<0) #define GTF_type_mask (3U<<0) /* @@ -119,6 +137,10 @@ struct grant_entry { * GTF_readonly: Restrict @domid to read-only mappings and accesses. [GST] * GTF_reading: Grant entry is currently mapped for reading by @domid. [XEN] * GTF_writing: Grant entry is currently mapped for writing by @domid. [XEN] + * GTF_PAT, GTF_PWT, GTF_PCD: (x86) cache attribute flags for the grant [GST] + * GTF_sub_page: Grant access to only a subrange of the page. @domid + * will only be allowed to copy from the grant, and not + * map it. [GST] */ #define _GTF_readonly (2) #define GTF_readonly (1U<<_GTF_readonly) @@ -126,6 +148,14 @@ struct grant_entry { #define GTF_reading (1U<<_GTF_reading) #define _GTF_writing (4) #define GTF_writing (1U<<_GTF_writing) +#define _GTF_PWT (5) +#define GTF_PWT (1U<<_GTF_PWT) +#define _GTF_PCD (6) +#define GTF_PCD (1U<<_GTF_PCD) +#define _GTF_PAT (7) +#define GTF_PAT (1U<<_GTF_PAT) +#define _GTF_sub_page (8) +#define GTF_sub_page (1U<<_GTF_sub_page) /* * Subflags for GTF_accept_transfer: @@ -142,15 +172,87 @@ struct grant_entry { #define _GTF_transfer_completed (3) #define GTF_transfer_completed (1U<<_GTF_transfer_completed) - -/*********************************** - * GRANT TABLE QUERIES AND USES +/* + * Version 2 grant table entries. These fulfil the same role as + * version 1 entries, but can represent more complicated operations. + * Any given domain will have either a version 1 or a version 2 table, + * and every entry in the table will be the same version. + * + * The interface by which domains use grant references does not depend + * on the grant table version in use by the other domain. */ +#if __XEN_INTERFACE_VERSION__ >= 0x0003020a +/* + * Version 1 and version 2 grant entries share a common prefix. The + * fields of the prefix are documented as part of struct + * grant_entry_v1. + */ +struct grant_entry_header { + uint16_t flags; + domid_t domid; +}; +typedef struct grant_entry_header grant_entry_header_t; /* - * Reference to a grant entry in a specified domain's grant table. + * Version 2 of the grant entry structure. + */ +union grant_entry_v2 { + grant_entry_header_t hdr; + + /* + * This member is used for V1-style full page grants, where either: + * + * -- hdr.type is GTF_accept_transfer, or + * -- hdr.type is GTF_permit_access and GTF_sub_page is not set. + * + * In that case, the frame field has the same semantics as the + * field of the same name in the V1 entry structure. + */ + struct { + grant_entry_header_t hdr; + uint32_t pad0; + uint64_t frame; + } full_page; + + /* + * If the grant type is GTF_grant_access and GTF_sub_page is set, + * @domid is allowed to access bytes [@page_off,@page_off+@length) + * in frame @frame. + */ + struct { + grant_entry_header_t hdr; + uint16_t page_off; + uint16_t length; + uint64_t frame; + } sub_page; + + /* + * If the grant is GTF_transitive, @domid is allowed to use the + * grant @gref in domain @trans_domid, as if it was the local + * domain. Obviously, the transitive access must be compatible + * with the original grant. + * + * The current version of Xen does not allow transitive grants + * to be mapped. + */ + struct { + grant_entry_header_t hdr; + domid_t trans_domid; + uint16_t pad0; + grant_ref_t gref; + } transitive; + + uint32_t __spacer[4]; /* Pad to a power of two */ +}; +typedef union grant_entry_v2 grant_entry_v2_t; + +typedef uint16_t grant_status_t; + +#endif /* __XEN_INTERFACE_VERSION__ */ + +/*********************************** + * GRANT TABLE QUERIES AND USES */ -typedef uint32_t grant_ref_t; /* * Handle to track a mapping created via a grant reference. @@ -186,7 +288,8 @@ struct gnttab_map_grant_ref { grant_handle_t handle; uint64_t dev_bus_addr; }; -DEFINE_GUEST_HANDLE_STRUCT(gnttab_map_grant_ref); +typedef struct gnttab_map_grant_ref gnttab_map_grant_ref_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_map_grant_ref_t); /* * GNTTABOP_unmap_grant_ref: Destroy one or more grant-reference mappings @@ -208,7 +311,8 @@ struct gnttab_unmap_grant_ref { /* OUT parameters. */ int16_t status; /* GNTST_* */ }; -DEFINE_GUEST_HANDLE_STRUCT(gnttab_unmap_grant_ref); +typedef struct gnttab_unmap_grant_ref gnttab_unmap_grant_ref_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_grant_ref_t); /* * GNTTABOP_setup_table: Set up a grant table for comprising at least @@ -226,9 +330,10 @@ struct gnttab_setup_table { uint32_t nr_frames; /* OUT parameters. */ int16_t status; /* GNTST_* */ - GUEST_HANDLE(ulong) frame_list; + XEN_GUEST_HANDLE(ulong) frame_list; }; -DEFINE_GUEST_HANDLE_STRUCT(gnttab_setup_table); +typedef struct gnttab_setup_table gnttab_setup_table_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_setup_table_t); /* * GNTTABOP_dump_table: Dump the contents of the grant table to the @@ -241,7 +346,8 @@ struct gnttab_dump_table { /* OUT parameters. */ int16_t status; /* GNTST_* */ }; -DEFINE_GUEST_HANDLE_STRUCT(gnttab_dump_table); +typedef struct gnttab_dump_table gnttab_dump_table_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_dump_table_t); /* * GNTTABOP_transfer_grant_ref: Transfer to a foreign domain. The @@ -254,13 +360,15 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_dump_t #define GNTTABOP_transfer 4 struct gnttab_transfer { /* IN parameters. */ - unsigned long mfn; + xen_pfn_t mfn; domid_t domid; grant_ref_t ref; /* OUT parameters. */ int16_t status; }; -DEFINE_GUEST_HANDLE_STRUCT(gnttab_transfer); +typedef struct gnttab_transfer gnttab_transfer_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_transfer_t); + /* * GNTTABOP_copy: Hypervisor based copy @@ -284,24 +392,26 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_transf #define GNTCOPY_source_gref (1<<_GNTCOPY_source_gref) #define _GNTCOPY_dest_gref (1) #define GNTCOPY_dest_gref (1<<_GNTCOPY_dest_gref) +#define _GNTCOPY_can_fail (2) +#define GNTCOPY_can_fail (1<<_GNTCOPY_can_fail) #define GNTTABOP_copy 5 -struct gnttab_copy { - /* IN parameters. */ - struct { - union { - grant_ref_t ref; - unsigned long gmfn; - } u; - domid_t domid; - uint16_t offset; - } source, dest; - uint16_t len; - uint16_t flags; /* GNTCOPY_* */ - /* OUT parameters. */ - int16_t status; -}; -DEFINE_GUEST_HANDLE_STRUCT(gnttab_copy); +typedef struct gnttab_copy { + /* IN parameters. */ + struct { + union { + grant_ref_t ref; + xen_pfn_t gmfn; + } u; + domid_t domid; + uint16_t offset; + } source, dest; + uint16_t len; + uint16_t flags; /* GNTCOPY_* */ + /* OUT parameters. */ + int16_t status; +} gnttab_copy_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_copy_t); /* * GNTTABOP_query_size: Query the current and maximum sizes of the shared @@ -319,10 +429,92 @@ struct gnttab_query_size { uint32_t max_nr_frames; int16_t status; /* GNTST_* */ }; -DEFINE_GUEST_HANDLE_STRUCT(gnttab_query_size); +typedef struct gnttab_query_size gnttab_query_size_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_query_size_t); + +/* + * GNTTABOP_unmap_and_replace: Destroy one or more grant-reference mappings + * tracked by but atomically replace the page table entry with one + * pointing to the machine address under . will be + * redirected to the null entry. + * NOTES: + * 1. The call may fail in an undefined manner if either mapping is not + * tracked by . + * 2. After executing a batch of unmaps, it is guaranteed that no stale + * mappings will remain in the device or host TLBs. + */ +#define GNTTABOP_unmap_and_replace 7 +struct gnttab_unmap_and_replace { + /* IN parameters. */ + uint64_t host_addr; + uint64_t new_addr; + grant_handle_t handle; + /* OUT parameters. */ + int16_t status; /* GNTST_* */ +}; +typedef struct gnttab_unmap_and_replace gnttab_unmap_and_replace_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_and_replace_t); + +#if __XEN_INTERFACE_VERSION__ >= 0x0003020a +/* + * GNTTABOP_set_version: Request a particular version of the grant + * table shared table structure. This operation can only be performed + * once in any given domain. It must be performed before any grants + * are activated; otherwise, the domain will be stuck with version 1. + * The only defined versions are 1 and 2. + */ +#define GNTTABOP_set_version 8 +struct gnttab_set_version { + /* IN/OUT parameters */ + uint32_t version; +}; +typedef struct gnttab_set_version gnttab_set_version_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_set_version_t); + + +/* + * GNTTABOP_get_status_frames: Get the list of frames used to store grant + * status for . In grant format version 2, the status is separated + * from the other shared grant fields to allow more efficient synchronization + * using barriers instead of atomic cmpexch operations. + * specify the size of vector . + * The frame addresses are returned in the . + * Only addresses are returned, even if the table is larger. + * NOTES: + * 1. may be specified as DOMID_SELF. + * 2. Only a sufficiently-privileged domain may specify != DOMID_SELF. + */ +#define GNTTABOP_get_status_frames 9 +struct gnttab_get_status_frames { + /* IN parameters. */ + uint32_t nr_frames; + domid_t dom; + /* OUT parameters. */ + int16_t status; /* GNTST_* */ + XEN_GUEST_HANDLE(uint64_t) frame_list; +}; +typedef struct gnttab_get_status_frames gnttab_get_status_frames_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_get_status_frames_t); /* - * Bitfield values for update_pin_status.flags. + * GNTTABOP_get_version: Get the grant table version which is in + * effect for domain . + */ +#define GNTTABOP_get_version 10 +struct gnttab_get_version { + /* IN parameters */ + domid_t dom; + uint16_t pad; + /* OUT parameters */ + uint32_t version; +}; +typedef struct gnttab_get_version gnttab_get_version_t; +DEFINE_XEN_GUEST_HANDLE(gnttab_get_version_t); + +#endif /* __XEN_INTERFACE_VERSION__ */ + +/* + * Bitfield values for gnttab_map_grant_ref.flags. */ /* Map the grant entry for access by I/O devices. */ #define _GNTMAP_device_map (0) @@ -349,6 +541,16 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_query_ #define _GNTMAP_contains_pte (4) #define GNTMAP_contains_pte (1<<_GNTMAP_contains_pte) +#define _GNTMAP_can_fail (5) +#define GNTMAP_can_fail (1<<_GNTMAP_can_fail) + +/* + * Bits to be placed in guest kernel available PTE bits (architecture + * dependent; only supported when XENFEAT_gnttab_map_avail_bits is set). + */ +#define _GNTMAP_guest_avail0 (16) +#define GNTMAP_guest_avail_mask ((uint32_t)~0 << _GNTMAP_guest_avail0) + /* * Values for error status returns. All errors are -ve. */ @@ -362,7 +564,9 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_query_ #define GNTST_no_device_space (-7) /* Out of space in I/O MMU. */ #define GNTST_permission_denied (-8) /* Not enough privilege for operation. */ #define GNTST_bad_page (-9) /* Specified page was invalid for op. */ -#define GNTST_bad_copy_arg (-10) /* copy arguments cross page boundary */ +#define GNTST_bad_copy_arg (-10) /* copy arguments cross page boundary. */ +#define GNTST_address_too_big (-11) /* transfer page address too large. */ +#define GNTST_eagain (-12) /* Could not map at the moment. Retry. */ #define GNTTABOP_error_msgs { \ "okay", \ @@ -375,7 +579,9 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_query_ "no spare translation slot in the I/O MMU", \ "permission denied", \ "bad page", \ - "copy arguments cross page boundary" \ + "copy arguments cross page boundary", \ + "page address size too large", \ + "could not map at the moment, retry" \ } #endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */ --- head-2011-03-17.orig/include/xen/interface/hvm/hvm_op.h 2011-03-17 13:45:28.000000000 +0100 +++ head-2011-03-17/include/xen/interface/hvm/hvm_op.h 2011-03-17 13:50:24.000000000 +0100 @@ -21,6 +21,9 @@ #ifndef __XEN_PUBLIC_HVM_HVM_OP_H__ #define __XEN_PUBLIC_HVM_HVM_OP_H__ +#include "../xen.h" +#include "../trace.h" + /* Get/set subcommands: the second argument of the hypercall is a * pointer to a xen_hvm_param struct. */ #define HVMOP_set_param 0 @@ -30,17 +33,197 @@ struct xen_hvm_param { uint32_t index; /* IN */ uint64_t value; /* IN/OUT */ }; -DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_param); +typedef struct xen_hvm_param xen_hvm_param_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_param_t); + +/* Set the logical level of one of a domain's PCI INTx wires. */ +#define HVMOP_set_pci_intx_level 2 +struct xen_hvm_set_pci_intx_level { + /* Domain to be updated. */ + domid_t domid; + /* PCI INTx identification in PCI topology (domain:bus:device:intx). */ + uint8_t domain, bus, device, intx; + /* Assertion level (0 = unasserted, 1 = asserted). */ + uint8_t level; +}; +typedef struct xen_hvm_set_pci_intx_level xen_hvm_set_pci_intx_level_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_pci_intx_level_t); + +/* Set the logical level of one of a domain's ISA IRQ wires. */ +#define HVMOP_set_isa_irq_level 3 +struct xen_hvm_set_isa_irq_level { + /* Domain to be updated. */ + domid_t domid; + /* ISA device identification, by ISA IRQ (0-15). */ + uint8_t isa_irq; + /* Assertion level (0 = unasserted, 1 = asserted). */ + uint8_t level; +}; +typedef struct xen_hvm_set_isa_irq_level xen_hvm_set_isa_irq_level_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_isa_irq_level_t); + +#define HVMOP_set_pci_link_route 4 +struct xen_hvm_set_pci_link_route { + /* Domain to be updated. */ + domid_t domid; + /* PCI link identifier (0-3). */ + uint8_t link; + /* ISA IRQ (1-15), or 0 (disable link). */ + uint8_t isa_irq; +}; +typedef struct xen_hvm_set_pci_link_route xen_hvm_set_pci_link_route_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_pci_link_route_t); + +/* Flushes all VCPU TLBs: @arg must be NULL. */ +#define HVMOP_flush_tlbs 5 + +/* Following tools-only interfaces may change in future. */ +#if defined(__XEN__) || defined(__XEN_TOOLS__) + +/* Track dirty VRAM. */ +#define HVMOP_track_dirty_vram 6 +struct xen_hvm_track_dirty_vram { + /* Domain to be tracked. */ + domid_t domid; + /* First pfn to track. */ + uint64_aligned_t first_pfn; + /* Number of pages to track. */ + uint64_aligned_t nr; + /* OUT variable. */ + /* Dirty bitmap buffer. */ + XEN_GUEST_HANDLE_64(uint8) dirty_bitmap; +}; +typedef struct xen_hvm_track_dirty_vram xen_hvm_track_dirty_vram_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_track_dirty_vram_t); + +/* Notify that some pages got modified by the Device Model. */ +#define HVMOP_modified_memory 7 +struct xen_hvm_modified_memory { + /* Domain to be updated. */ + domid_t domid; + /* First pfn. */ + uint64_aligned_t first_pfn; + /* Number of pages. */ + uint64_aligned_t nr; +}; +typedef struct xen_hvm_modified_memory xen_hvm_modified_memory_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_modified_memory_t); + +#define HVMOP_set_mem_type 8 +typedef enum { + HVMMEM_ram_rw, /* Normal read/write guest RAM */ + HVMMEM_ram_ro, /* Read-only; writes are discarded */ + HVMMEM_mmio_dm, /* Reads and write go to the device model */ +} hvmmem_type_t; +/* Notify that a region of memory is to be treated in a specific way. */ +struct xen_hvm_set_mem_type { + /* Domain to be updated. */ + domid_t domid; + /* Memory type */ + uint16_t hvmmem_type; + /* Number of pages. */ + uint32_t nr; + /* First pfn. */ + uint64_aligned_t first_pfn; +}; +typedef struct xen_hvm_set_mem_type xen_hvm_set_mem_type_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_mem_type_t); + +#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ /* Hint from PV drivers for pagetable destruction. */ #define HVMOP_pagetable_dying 9 struct xen_hvm_pagetable_dying { /* Domain with a pagetable about to be destroyed. */ domid_t domid; + uint16_t pad[3]; /* align next field on 8-byte boundary */ /* guest physical address of the toplevel pagetable dying */ - aligned_u64 gpa; + uint64_t gpa; }; typedef struct xen_hvm_pagetable_dying xen_hvm_pagetable_dying_t; -DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_pagetable_dying_t); - +DEFINE_XEN_GUEST_HANDLE(xen_hvm_pagetable_dying_t); + +/* Get the current Xen time, in nanoseconds since system boot. */ +#define HVMOP_get_time 10 +struct xen_hvm_get_time { + uint64_t now; /* OUT */ +}; +typedef struct xen_hvm_get_time xen_hvm_get_time_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_get_time_t); + +#define HVMOP_xentrace 11 +struct xen_hvm_xentrace { + uint16_t event, extra_bytes; + uint8_t extra[TRACE_EXTRA_MAX * sizeof(uint32_t)]; +}; +typedef struct xen_hvm_xentrace xen_hvm_xentrace_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_xentrace_t); + +/* Following tools-only interfaces may change in future. */ +#if defined(__XEN__) || defined(__XEN_TOOLS__) + +#define HVMOP_set_mem_access 12 +typedef enum { + HVMMEM_access_n, + HVMMEM_access_r, + HVMMEM_access_w, + HVMMEM_access_rw, + HVMMEM_access_x, + HVMMEM_access_rx, + HVMMEM_access_wx, + HVMMEM_access_rwx, + HVMMEM_access_rx2rw, /* Page starts off as r-x, but automatically + * change to r-w on a write */ + HVMMEM_access_default /* Take the domain default */ +} hvmmem_access_t; +/* Notify that a region of memory is to have specific access types */ +struct xen_hvm_set_mem_access { + /* Domain to be updated. */ + domid_t domid; + /* Memory type */ + uint16_t hvmmem_access; /* hvm_access_t */ + /* Number of pages, ignored on setting default access */ + uint32_t nr; + /* First pfn, or ~0ull to set the default access for new pages */ + uint64_aligned_t first_pfn; +}; +typedef struct xen_hvm_set_mem_access xen_hvm_set_mem_access_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_mem_access_t); + +#define HVMOP_get_mem_access 13 +/* Get the specific access type for that region of memory */ +struct xen_hvm_get_mem_access { + /* Domain to be queried. */ + domid_t domid; + /* Memory type: OUT */ + uint16_t hvmmem_access; /* hvm_access_t */ + /* pfn, or ~0ull for default access for new pages. IN */ + uint64_aligned_t pfn; +}; +typedef struct xen_hvm_get_mem_access xen_hvm_get_mem_access_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_get_mem_access_t); + +#define HVMOP_inject_trap 14 +/* Inject a trap into a VCPU, which will get taken up on the next + * scheduling of it. Note that the caller should know enough of the + * state of the CPU before injecting, to know what the effect of + * injecting the trap will be. + */ +struct xen_hvm_inject_trap { + /* Domain to be queried. */ + domid_t domid; + /* VCPU */ + uint32_t vcpuid; + /* Trap number */ + uint32_t trap; + /* Error code, or -1 to skip */ + uint32_t error_code; + /* CR2 for page faults */ + uint64_aligned_t cr2; +}; +typedef struct xen_hvm_inject_trap xen_hvm_inject_trap_t; +DEFINE_XEN_GUEST_HANDLE(xen_hvm_inject_trap_t); + +#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ + #endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */ --- head-2011-03-17.orig/include/xen/interface/hvm/params.h 2011-03-17 13:45:28.000000000 +0100 +++ head-2011-03-17/include/xen/interface/hvm/params.h 2011-03-17 13:50:24.000000000 +0100 @@ -33,11 +33,17 @@ * val[63:56] == 1: val[55:0] is a delivery PCI INTx line, as follows: * Domain = val[47:32], Bus = val[31:16], * DevFn = val[15: 8], IntX = val[ 1: 0] - * val[63:56] == 2: val[7:0] is a vector number. + * val[63:56] == 2: val[7:0] is a vector number, check for + * XENFEAT_hvm_callback_vector to know if this delivery + * method is available. * If val == 0 then CPU0 event-channel notifications are not delivered. */ #define HVM_PARAM_CALLBACK_IRQ 0 +/* + * These are not used by Xen. They are here for convenience of HVM-guest + * xenbus implementations. + */ #define HVM_PARAM_STORE_PFN 1 #define HVM_PARAM_STORE_EVTCHN 2 @@ -47,6 +53,19 @@ #define HVM_PARAM_BUFIOREQ_PFN 6 +#ifdef __ia64__ + +#define HVM_PARAM_NVRAM_FD 7 +#define HVM_PARAM_VHPT_SIZE 8 +#define HVM_PARAM_BUFPIOREQ_PFN 9 + +#elif defined(__i386__) || defined(__x86_64__) + +/* Expose Viridian interfaces to this HVM guest? */ +#define HVM_PARAM_VIRIDIAN 9 + +#endif + /* * Set mode for virtual timers (currently x86 only): * delay_for_missed_ticks (default): @@ -90,6 +109,34 @@ /* Boolean: Enable aligning all periodic vpts to reduce interrupts */ #define HVM_PARAM_VPT_ALIGN 16 -#define HVM_NR_PARAMS 17 +/* Console debug shared memory ring and event channel */ +#define HVM_PARAM_CONSOLE_PFN 17 +#define HVM_PARAM_CONSOLE_EVTCHN 18 + +/* + * Select location of ACPI PM1a and TMR control blocks. Currently two locations + * are supported, specified by version 0 or 1 in this parameter: + * - 0: default, use the old addresses + * PM1A_EVT == 0x1f40; PM1A_CNT == 0x1f44; PM_TMR == 0x1f48 + * - 1: use the new default qemu addresses + * PM1A_EVT == 0xb000; PM1A_CNT == 0xb004; PM_TMR == 0xb008 + * You can find these address definitions in + */ +#define HVM_PARAM_ACPI_IOPORTS_LOCATION 19 + +/* Enable blocking memory events, async or sync (pause vcpu until response) + * onchangeonly indicates messages only on a change of value */ +#define HVM_PARAM_MEMORY_EVENT_CR0 20 +#define HVM_PARAM_MEMORY_EVENT_CR3 21 +#define HVM_PARAM_MEMORY_EVENT_CR4 22 +#define HVM_PARAM_MEMORY_EVENT_INT3 23 + +#define HVMPME_MODE_MASK (3 << 0) +#define HVMPME_mode_disabled 0 +#define HVMPME_mode_async 1 +#define HVMPME_mode_sync 2 +#define HVMPME_onchangeonly (1 << 2) + +#define HVM_NR_PARAMS 24 #endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */ --- head-2011-03-17.orig/include/xen/interface/io/blkif.h 2011-03-17 13:45:28.000000000 +0100 +++ head-2011-03-17/include/xen/interface/io/blkif.h 2011-03-17 13:50:24.000000000 +0100 @@ -3,6 +3,24 @@ * * Unified block-device I/O interface for Xen guest OSes. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Copyright (c) 2003-2004, Keir Fraser */ @@ -24,8 +42,10 @@ * rsp_event appropriately (e.g., using RING_FINAL_CHECK_FOR_RESPONSES()). */ -typedef uint16_t blkif_vdev_t; -typedef uint64_t blkif_sector_t; +#ifndef blkif_vdev_t +#define blkif_vdev_t uint16_t +#endif +#define blkif_sector_t uint64_t /* * REQUEST CODES. @@ -34,7 +54,7 @@ typedef uint64_t blkif_sector_t; #define BLKIF_OP_WRITE 1 /* * Recognised only if "feature-barrier" is present in backend xenbus info. - * The "feature_barrier" node contains a boolean indicating whether barrier + * The "feature-barrier" node contains a boolean indicating whether barrier * requests are likely to succeed or fail. Either way, a barrier request * may fail at any time with BLKIF_RSP_EOPNOTSUPP if it is unsupported by * the underlying block-device hardware. The boolean simply indicates whether @@ -43,33 +63,96 @@ typedef uint64_t blkif_sector_t; * create the "feature-barrier" node! */ #define BLKIF_OP_WRITE_BARRIER 2 +/* + * Recognised if "feature-flush-cache" is present in backend xenbus + * info. A flush will ask the underlying storage hardware to flush its + * non-volatile caches as appropriate. The "feature-flush-cache" node + * contains a boolean indicating whether flush requests are likely to + * succeed or fail. Either way, a flush request may fail at any time + * with BLKIF_RSP_EOPNOTSUPP if it is unsupported by the underlying + * block-device hardware. The boolean simply indicates whether or not it + * is worthwhile for the frontend to attempt flushes. If a backend does + * not recognise BLKIF_OP_WRITE_FLUSH_CACHE, it should *not* create the + * "feature-flush-cache" node! + */ +#define BLKIF_OP_FLUSH_DISKCACHE 3 +/* + * Used in SLES sources for device specific command packet + * contained within the request. Reserved for that purpose. + */ +#define BLKIF_OP_RESERVED_1 4 +/* + * Recognised only if "feature-trim" is present in backend xenbus info. + * The "feature-trim" node contains a boolean indicating whether trim + * requests are likely to succeed or fail. Either way, a trim request + * may fail at any time with BLKIF_RSP_EOPNOTSUPP if it is unsupported by + * the underlying block-device hardware. The boolean simply indicates whether + * or not it is worthwhile for the frontend to attempt trim requests. + * If a backend does not recognise BLKIF_OP_TRIM, it should *not* + * create the "feature-trim" node! + * + * Trim operation is a request for the underlying block device to mark + * extents to be erased. Trim operations are passed with sector_number as the + * sector index to begin trim operations at and nr_sectors as the number of + * sectors to be trimmed. The specified sectors should be trimmed if the + * underlying block device supports trim operations, or a BLKIF_RSP_EOPNOTSUPP + * should be returned. More information about trim operations at: + * http://t13.org/Documents/UploadedDocuments/docs2008/ + * e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc + */ +#define BLKIF_OP_TRIM 5 /* * Maximum scatter/gather segments per request. - * This is carefully chosen so that sizeof(struct blkif_ring) <= PAGE_SIZE. + * This is carefully chosen so that sizeof(blkif_ring_t) <= PAGE_SIZE. * NB. This could be 12 if the ring indexes weren't stored in the same page. */ #define BLKIF_MAX_SEGMENTS_PER_REQUEST 11 +/* + * NB. first_sect and last_sect in blkif_request_segment, as well as + * sector_number in blkif_request, are always expressed in 512-byte units. + * However they must be properly aligned to the real sector size of the + * physical disk, which is reported in the "sector-size" node in the backend + * xenbus info. Also the xenbus "sectors" node is expressed in 512-byte units. + */ +struct blkif_request_segment { + grant_ref_t gref; /* reference to I/O buffer frame */ + /* @first_sect: first sector in frame to transfer (inclusive). */ + /* @last_sect: last sector in frame to transfer (inclusive). */ + uint8_t first_sect, last_sect; +}; + struct blkif_request { - uint8_t operation; /* BLKIF_OP_??? */ - uint8_t nr_segments; /* number of segments */ - blkif_vdev_t handle; /* only for read/write requests */ - uint64_t id; /* private guest value, echoed in resp */ - blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ - struct blkif_request_segment { - grant_ref_t gref; /* reference to I/O buffer frame */ - /* @first_sect: first sector in frame to transfer (inclusive). */ - /* @last_sect: last sector in frame to transfer (inclusive). */ - uint8_t first_sect, last_sect; - } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; + uint8_t operation; /* BLKIF_OP_??? */ + uint8_t nr_segments; /* number of segments */ + blkif_vdev_t handle; /* only for read/write requests */ + uint64_t id; /* private guest value, echoed in resp */ + blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */ + struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; +}; +typedef struct blkif_request blkif_request_t; + +/* + * Cast to this structure when blkif_request.operation == BLKIF_OP_TRIM + * sizeof(struct blkif_request_trim) <= sizeof(struct blkif_request) + */ +struct blkif_request_trim { + uint8_t operation; /* BLKIF_OP_TRIM */ + uint8_t reserved; /* */ + blkif_vdev_t handle; /* same as for read/write requests */ + uint64_t id; /* private guest value, echoed in resp */ + blkif_sector_t sector_number;/* start sector idx on disk */ + uint64_t nr_sectors; /* number of contiguous sectors to trim */ }; +typedef struct blkif_request_trim blkif_request_trim_t; struct blkif_response { - uint64_t id; /* copied from request */ - uint8_t operation; /* copied from request */ - int16_t status; /* BLKIF_RSP_??? */ + uint64_t id; /* copied from request */ + uint8_t operation; /* copied from request */ + int16_t status; /* BLKIF_RSP_??? */ }; +typedef struct blkif_response blkif_response_t; /* * STATUS RETURN CODES. --- head-2011-03-17.orig/include/xen/interface/io/console.h 2011-03-17 13:45:28.000000000 +0100 +++ head-2011-03-17/include/xen/interface/io/console.h 2011-01-31 15:14:12.000000000 +0100 @@ -3,6 +3,24 @@ * * Console I/O interface for Xen guest OSes. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Copyright (c) 2005, Keir Fraser */ --- head-2011-03-17.orig/include/xen/interface/io/fbif.h 2011-03-17 13:45:28.000000000 +0100 +++ head-2011-03-17/include/xen/interface/io/fbif.h 2011-01-31 15:14:12.000000000 +0100 @@ -41,12 +41,13 @@ */ #define XENFB_TYPE_UPDATE 2 -struct xenfb_update { - uint8_t type; /* XENFB_TYPE_UPDATE */ - int32_t x; /* source x */ - int32_t y; /* source y */ - int32_t width; /* rect width */ - int32_t height; /* rect height */ +struct xenfb_update +{ + uint8_t type; /* XENFB_TYPE_UPDATE */ + int32_t x; /* source x */ + int32_t y; /* source y */ + int32_t width; /* rect width */ + int32_t height; /* rect height */ }; /* @@ -55,36 +56,58 @@ struct xenfb_update { */ #define XENFB_TYPE_RESIZE 3 -struct xenfb_resize { - uint8_t type; /* XENFB_TYPE_RESIZE */ - int32_t width; /* width in pixels */ - int32_t height; /* height in pixels */ - int32_t stride; /* stride in bytes */ - int32_t depth; /* depth in bits */ - int32_t offset; /* start offset within framebuffer */ +struct xenfb_resize +{ + uint8_t type; /* XENFB_TYPE_RESIZE */ + int32_t width; /* width in pixels */ + int32_t height; /* height in pixels */ + int32_t stride; /* stride in bytes */ + int32_t depth; /* depth in bits */ + int32_t offset; /* offset of the framebuffer in bytes */ }; #define XENFB_OUT_EVENT_SIZE 40 -union xenfb_out_event { - uint8_t type; - struct xenfb_update update; - struct xenfb_resize resize; - char pad[XENFB_OUT_EVENT_SIZE]; +union xenfb_out_event +{ + uint8_t type; + struct xenfb_update update; + struct xenfb_resize resize; + char pad[XENFB_OUT_EVENT_SIZE]; }; /* In events (backend -> frontend) */ /* * Frontends should ignore unknown in events. - * No in events currently defined. */ +/* + * Framebuffer refresh period advice + * Backend sends it to advise the frontend their preferred period of + * refresh. Frontends that keep the framebuffer constantly up-to-date + * just ignore it. Frontends that use the advice should immediately + * refresh the framebuffer (and send an update notification event if + * those have been requested), then use the update frequency to guide + * their periodical refreshs. + */ +#define XENFB_TYPE_REFRESH_PERIOD 1 +#define XENFB_NO_REFRESH 0 + +struct xenfb_refresh_period +{ + uint8_t type; /* XENFB_TYPE_UPDATE_PERIOD */ + uint32_t period; /* period of refresh, in ms, + * XENFB_NO_REFRESH if no refresh is needed */ +}; + #define XENFB_IN_EVENT_SIZE 40 -union xenfb_in_event { - uint8_t type; - char pad[XENFB_IN_EVENT_SIZE]; +union xenfb_in_event +{ + uint8_t type; + struct xenfb_refresh_period refresh_period; + char pad[XENFB_IN_EVENT_SIZE]; }; /* shared page */ @@ -93,41 +116,41 @@ union xenfb_in_event { #define XENFB_IN_RING_LEN (XENFB_IN_RING_SIZE / XENFB_IN_EVENT_SIZE) #define XENFB_IN_RING_OFFS 1024 #define XENFB_IN_RING(page) \ - ((union xenfb_in_event *)((char *)(page) + XENFB_IN_RING_OFFS)) + ((union xenfb_in_event *)((char *)(page) + XENFB_IN_RING_OFFS)) #define XENFB_IN_RING_REF(page, idx) \ - (XENFB_IN_RING((page))[(idx) % XENFB_IN_RING_LEN]) + (XENFB_IN_RING((page))[(idx) % XENFB_IN_RING_LEN]) #define XENFB_OUT_RING_SIZE 2048 #define XENFB_OUT_RING_LEN (XENFB_OUT_RING_SIZE / XENFB_OUT_EVENT_SIZE) #define XENFB_OUT_RING_OFFS (XENFB_IN_RING_OFFS + XENFB_IN_RING_SIZE) #define XENFB_OUT_RING(page) \ - ((union xenfb_out_event *)((char *)(page) + XENFB_OUT_RING_OFFS)) + ((union xenfb_out_event *)((char *)(page) + XENFB_OUT_RING_OFFS)) #define XENFB_OUT_RING_REF(page, idx) \ - (XENFB_OUT_RING((page))[(idx) % XENFB_OUT_RING_LEN]) + (XENFB_OUT_RING((page))[(idx) % XENFB_OUT_RING_LEN]) -struct xenfb_page { - uint32_t in_cons, in_prod; - uint32_t out_cons, out_prod; - - int32_t width; /* width of the framebuffer (in pixels) */ - int32_t height; /* height of the framebuffer (in pixels) */ - uint32_t line_length; /* length of a row of pixels (in bytes) */ - uint32_t mem_length; /* length of the framebuffer (in bytes) */ - uint8_t depth; /* depth of a pixel (in bits) */ - - /* - * Framebuffer page directory - * - * Each directory page holds PAGE_SIZE / sizeof(*pd) - * framebuffer pages, and can thus map up to PAGE_SIZE * - * PAGE_SIZE / sizeof(*pd) bytes. With PAGE_SIZE == 4096 and - * sizeof(unsigned long) == 4/8, that's 4 Megs 32 bit and 2 - * Megs 64 bit. 256 directories give enough room for a 512 - * Meg framebuffer with a max resolution of 12,800x10,240. - * Should be enough for a while with room leftover for - * expansion. - */ - unsigned long pd[256]; +struct xenfb_page +{ + uint32_t in_cons, in_prod; + uint32_t out_cons, out_prod; + + int32_t width; /* the width of the framebuffer (in pixels) */ + int32_t height; /* the height of the framebuffer (in pixels) */ + uint32_t line_length; /* the length of a row of pixels (in bytes) */ + uint32_t mem_length; /* the length of the framebuffer (in bytes) */ + uint8_t depth; /* the depth of a pixel (in bits) */ + + /* + * Framebuffer page directory + * + * Each directory page holds PAGE_SIZE / sizeof(*pd) + * framebuffer pages, and can thus map up to PAGE_SIZE * + * PAGE_SIZE / sizeof(*pd) bytes. With PAGE_SIZE == 4096 and + * sizeof(unsigned long) == 4/8, that's 4 Megs 32 bit and 2 Megs + * 64 bit. 256 directories give enough room for a 512 Meg + * framebuffer with a max resolution of 12,800x10,240. Should + * be enough for a while with room leftover for expansion. + */ + unsigned long pd[256]; }; /* --- head-2011-03-17.orig/include/xen/interface/io/kbdif.h 2011-03-17 13:45:28.000000000 +0100 +++ head-2011-03-17/include/xen/interface/io/kbdif.h 2011-01-31 15:14:12.000000000 +0100 @@ -45,34 +45,38 @@ */ #define XENKBD_TYPE_POS 4 -struct xenkbd_motion { - uint8_t type; /* XENKBD_TYPE_MOTION */ - int32_t rel_x; /* relative X motion */ - int32_t rel_y; /* relative Y motion */ - int32_t rel_z; /* relative Z motion (wheel) */ -}; - -struct xenkbd_key { - uint8_t type; /* XENKBD_TYPE_KEY */ - uint8_t pressed; /* 1 if pressed; 0 otherwise */ - uint32_t keycode; /* KEY_* from linux/input.h */ -}; - -struct xenkbd_position { - uint8_t type; /* XENKBD_TYPE_POS */ - int32_t abs_x; /* absolute X position (in FB pixels) */ - int32_t abs_y; /* absolute Y position (in FB pixels) */ - int32_t rel_z; /* relative Z motion (wheel) */ +struct xenkbd_motion +{ + uint8_t type; /* XENKBD_TYPE_MOTION */ + int32_t rel_x; /* relative X motion */ + int32_t rel_y; /* relative Y motion */ + int32_t rel_z; /* relative Z motion (wheel) */ +}; + +struct xenkbd_key +{ + uint8_t type; /* XENKBD_TYPE_KEY */ + uint8_t pressed; /* 1 if pressed; 0 otherwise */ + uint32_t keycode; /* KEY_* from linux/input.h */ +}; + +struct xenkbd_position +{ + uint8_t type; /* XENKBD_TYPE_POS */ + int32_t abs_x; /* absolute X position (in FB pixels) */ + int32_t abs_y; /* absolute Y position (in FB pixels) */ + int32_t rel_z; /* relative Z motion (wheel) */ }; #define XENKBD_IN_EVENT_SIZE 40 -union xenkbd_in_event { - uint8_t type; - struct xenkbd_motion motion; - struct xenkbd_key key; - struct xenkbd_position pos; - char pad[XENKBD_IN_EVENT_SIZE]; +union xenkbd_in_event +{ + uint8_t type; + struct xenkbd_motion motion; + struct xenkbd_key key; + struct xenkbd_position pos; + char pad[XENKBD_IN_EVENT_SIZE]; }; /* Out events (frontend -> backend) */ @@ -85,9 +89,10 @@ union xenkbd_in_event { #define XENKBD_OUT_EVENT_SIZE 40 -union xenkbd_out_event { - uint8_t type; - char pad[XENKBD_OUT_EVENT_SIZE]; +union xenkbd_out_event +{ + uint8_t type; + char pad[XENKBD_OUT_EVENT_SIZE]; }; /* shared page */ @@ -96,21 +101,22 @@ union xenkbd_out_event { #define XENKBD_IN_RING_LEN (XENKBD_IN_RING_SIZE / XENKBD_IN_EVENT_SIZE) #define XENKBD_IN_RING_OFFS 1024 #define XENKBD_IN_RING(page) \ - ((union xenkbd_in_event *)((char *)(page) + XENKBD_IN_RING_OFFS)) + ((union xenkbd_in_event *)((char *)(page) + XENKBD_IN_RING_OFFS)) #define XENKBD_IN_RING_REF(page, idx) \ - (XENKBD_IN_RING((page))[(idx) % XENKBD_IN_RING_LEN]) + (XENKBD_IN_RING((page))[(idx) % XENKBD_IN_RING_LEN]) #define XENKBD_OUT_RING_SIZE 1024 #define XENKBD_OUT_RING_LEN (XENKBD_OUT_RING_SIZE / XENKBD_OUT_EVENT_SIZE) #define XENKBD_OUT_RING_OFFS (XENKBD_IN_RING_OFFS + XENKBD_IN_RING_SIZE) #define XENKBD_OUT_RING(page) \ - ((union xenkbd_out_event *)((char *)(page) + XENKBD_OUT_RING_OFFS)) + ((union xenkbd_out_event *)((char *)(page) + XENKBD_OUT_RING_OFFS)) #define XENKBD_OUT_RING_REF(page, idx) \ - (XENKBD_OUT_RING((page))[(idx) % XENKBD_OUT_RING_LEN]) + (XENKBD_OUT_RING((page))[(idx) % XENKBD_OUT_RING_LEN]) -struct xenkbd_page { - uint32_t in_cons, in_prod; - uint32_t out_cons, out_prod; +struct xenkbd_page +{ + uint32_t in_cons, in_prod; + uint32_t out_cons, out_prod; }; #endif --- head-2011-03-17.orig/include/xen/interface/io/netif.h 2011-03-17 13:45:28.000000000 +0100 +++ head-2011-03-17/include/xen/interface/io/netif.h 2011-01-31 15:14:12.000000000 +0100 @@ -3,6 +3,24 @@ * * Unified network-device I/O interface for Xen guest OSes. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Copyright (c) 2003-2004, Keir Fraser */ @@ -47,18 +65,21 @@ #define _NETTXF_extra_info (3) #define NETTXF_extra_info (1U<<_NETTXF_extra_info) -struct xen_netif_tx_request { +struct netif_tx_request { grant_ref_t gref; /* Reference to buffer page */ uint16_t offset; /* Offset within buffer page */ uint16_t flags; /* NETTXF_* */ uint16_t id; /* Echoed in response message. */ uint16_t size; /* Packet size in bytes. */ }; +typedef struct netif_tx_request netif_tx_request_t; /* Types of netif_extra_info descriptors. */ -#define XEN_NETIF_EXTRA_TYPE_NONE (0) /* Never used - invalid */ -#define XEN_NETIF_EXTRA_TYPE_GSO (1) /* u.gso */ -#define XEN_NETIF_EXTRA_TYPE_MAX (2) +#define XEN_NETIF_EXTRA_TYPE_NONE (0) /* Never used - invalid */ +#define XEN_NETIF_EXTRA_TYPE_GSO (1) /* u.gso */ +#define XEN_NETIF_EXTRA_TYPE_MCAST_ADD (2) /* u.mcast */ +#define XEN_NETIF_EXTRA_TYPE_MCAST_DEL (3) /* u.mcast */ +#define XEN_NETIF_EXTRA_TYPE_MAX (4) /* netif_extra_info flags. */ #define _XEN_NETIF_EXTRA_FLAG_MORE (0) @@ -71,49 +92,68 @@ struct xen_netif_tx_request { * This structure needs to fit within both netif_tx_request and * netif_rx_response for compatibility. */ -struct xen_netif_extra_info { - uint8_t type; /* XEN_NETIF_EXTRA_TYPE_* */ - uint8_t flags; /* XEN_NETIF_EXTRA_FLAG_* */ - - union { - struct { - /* - * Maximum payload size of each segment. For - * example, for TCP this is just the path MSS. - */ - uint16_t size; - - /* - * GSO type. This determines the protocol of - * the packet and any extra features required - * to segment the packet properly. - */ - uint8_t type; /* XEN_NETIF_GSO_TYPE_* */ - - /* Future expansion. */ - uint8_t pad; - - /* - * GSO features. This specifies any extra GSO - * features required to process this packet, - * such as ECN support for TCPv4. - */ - uint16_t features; /* XEN_NETIF_GSO_FEAT_* */ - } gso; +struct netif_extra_info { + uint8_t type; /* XEN_NETIF_EXTRA_TYPE_* */ + uint8_t flags; /* XEN_NETIF_EXTRA_FLAG_* */ + + union { + /* + * XEN_NETIF_EXTRA_TYPE_GSO: + */ + struct { + /* + * Maximum payload size of each segment. For example, for TCP this + * is just the path MSS. + */ + uint16_t size; + + /* + * GSO type. This determines the protocol of the packet and any + * extra features required to segment the packet properly. + */ + uint8_t type; /* XEN_NETIF_GSO_TYPE_* */ + + /* Future expansion. */ + uint8_t pad; + + /* + * GSO features. This specifies any extra GSO features required + * to process this packet, such as ECN support for TCPv4. + */ + uint16_t features; /* XEN_NETIF_GSO_FEAT_* */ + } gso; + + /* + * XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL}: + * Backend advertises availability via 'feature-multicast-control' + * xenbus node containing value '1'. + * Frontend requests this feature by advertising + * 'request-multicast-control' xenbus node containing value '1'. + * If multicast control is requested then multicast flooding is + * disabled and the frontend must explicitly register its interest + * in multicast groups using dummy transmit requests containing + * MCAST_{ADD,DEL} extra-info fragments. + */ + struct { + uint8_t addr[6]; /* Address to add/remove. */ + } mcast; - uint16_t pad[3]; - } u; + uint16_t pad[3]; + } u; }; +typedef struct netif_extra_info netif_extra_info_t; -struct xen_netif_tx_response { - uint16_t id; - int16_t status; /* NETIF_RSP_* */ +struct netif_tx_response { + uint16_t id; + int16_t status; /* NETIF_RSP_* */ }; +typedef struct netif_tx_response netif_tx_response_t; -struct xen_netif_rx_request { - uint16_t id; /* Echoed in response message. */ - grant_ref_t gref; /* Reference to incoming granted frame */ +struct netif_rx_request { + uint16_t id; /* Echoed in response message. */ + grant_ref_t gref; /* Reference to incoming granted frame */ }; +typedef struct netif_rx_request netif_rx_request_t; /* Packet data has been validated against protocol checksum. */ #define _NETRXF_data_validated (0) @@ -131,23 +171,20 @@ struct xen_netif_rx_request { #define _NETRXF_extra_info (3) #define NETRXF_extra_info (1U<<_NETRXF_extra_info) -struct xen_netif_rx_response { +struct netif_rx_response { uint16_t id; uint16_t offset; /* Offset in page of start of received packet */ uint16_t flags; /* NETRXF_* */ int16_t status; /* -ve: BLKIF_RSP_* ; +ve: Rx'ed pkt size. */ }; +typedef struct netif_rx_response netif_rx_response_t; /* * Generate netif ring structures and types. */ -DEFINE_RING_TYPES(xen_netif_tx, - struct xen_netif_tx_request, - struct xen_netif_tx_response); -DEFINE_RING_TYPES(xen_netif_rx, - struct xen_netif_rx_request, - struct xen_netif_rx_response); +DEFINE_RING_TYPES(netif_tx, struct netif_tx_request, struct netif_tx_response); +DEFINE_RING_TYPES(netif_rx, struct netif_rx_request, struct netif_rx_response); #define NETIF_RSP_DROPPED -2 #define NETIF_RSP_ERROR -1 --- head-2011-03-17.orig/include/xen/interface/io/protocols.h 2011-03-17 13:45:28.000000000 +0100 +++ head-2011-03-17/include/xen/interface/io/protocols.h 2011-01-31 15:14:12.000000000 +0100 @@ -1,10 +1,31 @@ +/****************************************************************************** + * protocols.h + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + #ifndef __XEN_PROTOCOLS_H__ #define __XEN_PROTOCOLS_H__ #define XEN_IO_PROTO_ABI_X86_32 "x86_32-abi" #define XEN_IO_PROTO_ABI_X86_64 "x86_64-abi" #define XEN_IO_PROTO_ABI_IA64 "ia64-abi" -#define XEN_IO_PROTO_ABI_POWERPC64 "powerpc64-abi" #if defined(__i386__) # define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_32 @@ -12,8 +33,6 @@ # define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_64 #elif defined(__ia64__) # define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_IA64 -#elif defined(__powerpc64__) -# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_POWERPC64 #else # error arch fixup needed here #endif --- head-2011-03-17.orig/include/xen/interface/io/ring.h 2011-03-17 13:45:28.000000000 +0100 +++ head-2011-03-17/include/xen/interface/io/ring.h 2011-01-31 15:14:12.000000000 +0100 @@ -3,16 +3,42 @@ * * Shared producer-consumer ring macros. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Tim Deegan and Andrew Warfield November 2004. */ #ifndef __XEN_PUBLIC_IO_RING_H__ #define __XEN_PUBLIC_IO_RING_H__ +#include "../xen-compat.h" + +#if __XEN_INTERFACE_VERSION__ < 0x00030208 +#define xen_mb() mb() +#define xen_rmb() rmb() +#define xen_wmb() wmb() +#endif + typedef unsigned int RING_IDX; /* Round a 32-bit unsigned constant down to the nearest power of two. */ -#define __RD2(_x) (((_x) & 0x00000002) ? 0x2 : ((_x) & 0x1)) +#define __RD2(_x) (((_x) & 0x00000002) ? 0x2 : ((_x) & 0x1)) #define __RD4(_x) (((_x) & 0x0000000c) ? __RD2((_x)>>2)<<2 : __RD2(_x)) #define __RD8(_x) (((_x) & 0x000000f0) ? __RD4((_x)>>4)<<4 : __RD4(_x)) #define __RD16(_x) (((_x) & 0x0000ff00) ? __RD8((_x)>>8)<<8 : __RD8(_x)) @@ -31,74 +57,86 @@ typedef unsigned int RING_IDX; /* * The same for passing in an actual pointer instead of a name tag. */ -#define __RING_SIZE(_s, _sz) \ - (__RD32(((_sz) - (long)&(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0]))) +#define __RING_SIZE(_s, _sz) \ + (__RD32(((_sz) - (long)(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0]))) /* * Macros to make the correct C datatypes for a new kind of ring. * * To make a new ring datatype, you need to have two message structures, - * let's say struct request, and struct response already defined. + * let's say request_t, and response_t already defined. * * In a header where you want the ring datatype declared, you then do: * - * DEFINE_RING_TYPES(mytag, struct request, struct response); + * DEFINE_RING_TYPES(mytag, request_t, response_t); * * These expand out to give you a set of types, as you can see below. * The most important of these are: * - * struct mytag_sring - The shared ring. - * struct mytag_front_ring - The 'front' half of the ring. - * struct mytag_back_ring - The 'back' half of the ring. + * mytag_sring_t - The shared ring. + * mytag_front_ring_t - The 'front' half of the ring. + * mytag_back_ring_t - The 'back' half of the ring. * * To initialize a ring in your code you need to know the location and size * of the shared memory area (PAGE_SIZE, for instance). To initialise * the front half: * - * struct mytag_front_ring front_ring; - * SHARED_RING_INIT((struct mytag_sring *)shared_page); - * FRONT_RING_INIT(&front_ring, (struct mytag_sring *)shared_page, - * PAGE_SIZE); + * mytag_front_ring_t front_ring; + * SHARED_RING_INIT((mytag_sring_t *)shared_page); + * FRONT_RING_INIT(&front_ring, (mytag_sring_t *)shared_page, PAGE_SIZE); * * Initializing the back follows similarly (note that only the front * initializes the shared ring): * - * struct mytag_back_ring back_ring; - * BACK_RING_INIT(&back_ring, (struct mytag_sring *)shared_page, - * PAGE_SIZE); + * mytag_back_ring_t back_ring; + * BACK_RING_INIT(&back_ring, (mytag_sring_t *)shared_page, PAGE_SIZE); */ -#define DEFINE_RING_TYPES(__name, __req_t, __rsp_t) \ - \ -/* Shared ring entry */ \ -union __name##_sring_entry { \ - __req_t req; \ - __rsp_t rsp; \ -}; \ - \ -/* Shared ring page */ \ -struct __name##_sring { \ - RING_IDX req_prod, req_event; \ - RING_IDX rsp_prod, rsp_event; \ - uint8_t pad[48]; \ - union __name##_sring_entry ring[1]; /* variable-length */ \ -}; \ - \ -/* "Front" end's private variables */ \ -struct __name##_front_ring { \ - RING_IDX req_prod_pvt; \ - RING_IDX rsp_cons; \ - unsigned int nr_ents; \ - struct __name##_sring *sring; \ -}; \ - \ -/* "Back" end's private variables */ \ -struct __name##_back_ring { \ - RING_IDX rsp_prod_pvt; \ - RING_IDX req_cons; \ - unsigned int nr_ents; \ - struct __name##_sring *sring; \ -}; +#define DEFINE_RING_TYPES(__name, __req_t, __rsp_t) \ + \ +/* Shared ring entry */ \ +union __name##_sring_entry { \ + __req_t req; \ + __rsp_t rsp; \ +}; \ + \ +/* Shared ring page */ \ +struct __name##_sring { \ + RING_IDX req_prod, req_event; \ + RING_IDX rsp_prod, rsp_event; \ + union { \ + struct { \ + uint8_t smartpoll_active; \ + } netif; \ + struct { \ + uint8_t msg; \ + } tapif_user; \ + uint8_t pvt_pad[4]; \ + } private; \ + uint8_t __pad[44]; \ + union __name##_sring_entry ring[1]; /* variable-length */ \ +}; \ + \ +/* "Front" end's private variables */ \ +struct __name##_front_ring { \ + RING_IDX req_prod_pvt; \ + RING_IDX rsp_cons; \ + unsigned int nr_ents; \ + struct __name##_sring *sring; \ +}; \ + \ +/* "Back" end's private variables */ \ +struct __name##_back_ring { \ + RING_IDX rsp_prod_pvt; \ + RING_IDX req_cons; \ + unsigned int nr_ents; \ + struct __name##_sring *sring; \ +}; \ + \ +/* Syntactic sugar */ \ +typedef struct __name##_sring __name##_sring_t; \ +typedef struct __name##_front_ring __name##_front_ring_t; \ +typedef struct __name##_back_ring __name##_back_ring_t /* * Macros for manipulating rings. @@ -116,86 +154,95 @@ struct __name##_back_ring { \ */ /* Initialising empty rings */ -#define SHARED_RING_INIT(_s) do { \ - (_s)->req_prod = (_s)->rsp_prod = 0; \ - (_s)->req_event = (_s)->rsp_event = 1; \ - memset((_s)->pad, 0, sizeof((_s)->pad)); \ +#define SHARED_RING_INIT(_s) do { \ + (_s)->req_prod = (_s)->rsp_prod = 0; \ + (_s)->req_event = (_s)->rsp_event = 1; \ + (void)memset((_s)->private.pvt_pad, 0, sizeof((_s)->private.pvt_pad)); \ + (void)memset((_s)->__pad, 0, sizeof((_s)->__pad)); \ } while(0) -#define FRONT_RING_INIT(_r, _s, __size) do { \ - (_r)->req_prod_pvt = 0; \ - (_r)->rsp_cons = 0; \ - (_r)->nr_ents = __RING_SIZE(_s, __size); \ - (_r)->sring = (_s); \ +#define FRONT_RING_INIT(_r, _s, __size) do { \ + (_r)->req_prod_pvt = 0; \ + (_r)->rsp_cons = 0; \ + (_r)->nr_ents = __RING_SIZE(_s, __size); \ + (_r)->sring = (_s); \ } while (0) -#define BACK_RING_INIT(_r, _s, __size) do { \ - (_r)->rsp_prod_pvt = 0; \ - (_r)->req_cons = 0; \ - (_r)->nr_ents = __RING_SIZE(_s, __size); \ - (_r)->sring = (_s); \ +#define BACK_RING_INIT(_r, _s, __size) do { \ + (_r)->rsp_prod_pvt = 0; \ + (_r)->req_cons = 0; \ + (_r)->nr_ents = __RING_SIZE(_s, __size); \ + (_r)->sring = (_s); \ } while (0) /* Initialize to existing shared indexes -- for recovery */ -#define FRONT_RING_ATTACH(_r, _s, __size) do { \ - (_r)->sring = (_s); \ - (_r)->req_prod_pvt = (_s)->req_prod; \ - (_r)->rsp_cons = (_s)->rsp_prod; \ - (_r)->nr_ents = __RING_SIZE(_s, __size); \ +#define FRONT_RING_ATTACH(_r, _s, __size) do { \ + (_r)->sring = (_s); \ + (_r)->req_prod_pvt = (_s)->req_prod; \ + (_r)->rsp_cons = (_s)->rsp_prod; \ + (_r)->nr_ents = __RING_SIZE(_s, __size); \ } while (0) -#define BACK_RING_ATTACH(_r, _s, __size) do { \ - (_r)->sring = (_s); \ - (_r)->rsp_prod_pvt = (_s)->rsp_prod; \ - (_r)->req_cons = (_s)->req_prod; \ - (_r)->nr_ents = __RING_SIZE(_s, __size); \ +#define BACK_RING_ATTACH(_r, _s, __size) do { \ + (_r)->sring = (_s); \ + (_r)->rsp_prod_pvt = (_s)->rsp_prod; \ + (_r)->req_cons = (_s)->req_prod; \ + (_r)->nr_ents = __RING_SIZE(_s, __size); \ } while (0) /* How big is this ring? */ -#define RING_SIZE(_r) \ +#define RING_SIZE(_r) \ ((_r)->nr_ents) /* Number of free requests (for use on front side only). */ -#define RING_FREE_REQUESTS(_r) \ +#define RING_FREE_REQUESTS(_r) \ (RING_SIZE(_r) - ((_r)->req_prod_pvt - (_r)->rsp_cons)) /* Test if there is an empty slot available on the front ring. * (This is only meaningful from the front. ) */ -#define RING_FULL(_r) \ +#define RING_FULL(_r) \ (RING_FREE_REQUESTS(_r) == 0) /* Test if there are outstanding messages to be processed on a ring. */ -#define RING_HAS_UNCONSUMED_RESPONSES(_r) \ +#define RING_HAS_UNCONSUMED_RESPONSES(_r) \ ((_r)->sring->rsp_prod - (_r)->rsp_cons) -#define RING_HAS_UNCONSUMED_REQUESTS(_r) \ - ({ \ - unsigned int req = (_r)->sring->req_prod - (_r)->req_cons; \ - unsigned int rsp = RING_SIZE(_r) - \ - ((_r)->req_cons - (_r)->rsp_prod_pvt); \ - req < rsp ? req : rsp; \ - }) +#ifdef __GNUC__ +#define RING_HAS_UNCONSUMED_REQUESTS(_r) ({ \ + unsigned int req = (_r)->sring->req_prod - (_r)->req_cons; \ + unsigned int rsp = RING_SIZE(_r) - \ + ((_r)->req_cons - (_r)->rsp_prod_pvt); \ + req < rsp ? req : rsp; \ +}) +#else +/* Same as above, but without the nice GCC ({ ... }) syntax. */ +#define RING_HAS_UNCONSUMED_REQUESTS(_r) \ + ((((_r)->sring->req_prod - (_r)->req_cons) < \ + (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt))) ? \ + ((_r)->sring->req_prod - (_r)->req_cons) : \ + (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt))) +#endif /* Direct access to individual ring elements, by index. */ -#define RING_GET_REQUEST(_r, _idx) \ +#define RING_GET_REQUEST(_r, _idx) \ (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req)) -#define RING_GET_RESPONSE(_r, _idx) \ +#define RING_GET_RESPONSE(_r, _idx) \ (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp)) /* Loop termination condition: Would the specified index overflow the ring? */ -#define RING_REQUEST_CONS_OVERFLOW(_r, _cons) \ +#define RING_REQUEST_CONS_OVERFLOW(_r, _cons) \ (((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r)) -#define RING_PUSH_REQUESTS(_r) do { \ - wmb(); /* back sees requests /before/ updated producer index */ \ - (_r)->sring->req_prod = (_r)->req_prod_pvt; \ +#define RING_PUSH_REQUESTS(_r) do { \ + xen_wmb(); /* back sees requests /before/ updated producer index */ \ + (_r)->sring->req_prod = (_r)->req_prod_pvt; \ } while (0) -#define RING_PUSH_RESPONSES(_r) do { \ - wmb(); /* front sees responses /before/ updated producer index */ \ - (_r)->sring->rsp_prod = (_r)->rsp_prod_pvt; \ +#define RING_PUSH_RESPONSES(_r) do { \ + xen_wmb(); /* front sees resps /before/ updated producer index */ \ + (_r)->sring->rsp_prod = (_r)->rsp_prod_pvt; \ } while (0) /* @@ -228,40 +275,40 @@ struct __name##_back_ring { \ * field appropriately. */ -#define RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(_r, _notify) do { \ - RING_IDX __old = (_r)->sring->req_prod; \ - RING_IDX __new = (_r)->req_prod_pvt; \ - wmb(); /* back sees requests /before/ updated producer index */ \ - (_r)->sring->req_prod = __new; \ - mb(); /* back sees new requests /before/ we check req_event */ \ - (_notify) = ((RING_IDX)(__new - (_r)->sring->req_event) < \ - (RING_IDX)(__new - __old)); \ +#define RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(_r, _notify) do { \ + RING_IDX __old = (_r)->sring->req_prod; \ + RING_IDX __new = (_r)->req_prod_pvt; \ + xen_wmb(); /* back sees requests /before/ updated producer index */ \ + (_r)->sring->req_prod = __new; \ + xen_mb(); /* back sees new requests /before/ we check req_event */ \ + (_notify) = ((RING_IDX)(__new - (_r)->sring->req_event) < \ + (RING_IDX)(__new - __old)); \ } while (0) -#define RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(_r, _notify) do { \ - RING_IDX __old = (_r)->sring->rsp_prod; \ - RING_IDX __new = (_r)->rsp_prod_pvt; \ - wmb(); /* front sees responses /before/ updated producer index */ \ - (_r)->sring->rsp_prod = __new; \ - mb(); /* front sees new responses /before/ we check rsp_event */ \ - (_notify) = ((RING_IDX)(__new - (_r)->sring->rsp_event) < \ - (RING_IDX)(__new - __old)); \ +#define RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(_r, _notify) do { \ + RING_IDX __old = (_r)->sring->rsp_prod; \ + RING_IDX __new = (_r)->rsp_prod_pvt; \ + xen_wmb(); /* front sees resps /before/ updated producer index */ \ + (_r)->sring->rsp_prod = __new; \ + xen_mb(); /* front sees new resps /before/ we check rsp_event */ \ + (_notify) = ((RING_IDX)(__new - (_r)->sring->rsp_event) < \ + (RING_IDX)(__new - __old)); \ } while (0) -#define RING_FINAL_CHECK_FOR_REQUESTS(_r, _work_to_do) do { \ - (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ - if (_work_to_do) break; \ - (_r)->sring->req_event = (_r)->req_cons + 1; \ - mb(); \ - (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ +#define RING_FINAL_CHECK_FOR_REQUESTS(_r, _work_to_do) do { \ + (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ + if (_work_to_do) break; \ + (_r)->sring->req_event = (_r)->req_cons + 1; \ + xen_mb(); \ + (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r); \ } while (0) -#define RING_FINAL_CHECK_FOR_RESPONSES(_r, _work_to_do) do { \ - (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ - if (_work_to_do) break; \ - (_r)->sring->rsp_event = (_r)->rsp_cons + 1; \ - mb(); \ - (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ +#define RING_FINAL_CHECK_FOR_RESPONSES(_r, _work_to_do) do { \ + (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ + if (_work_to_do) break; \ + (_r)->sring->rsp_event = (_r)->rsp_cons + 1; \ + xen_mb(); \ + (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r); \ } while (0) #endif /* __XEN_PUBLIC_IO_RING_H__ */ --- head-2011-03-17.orig/include/xen/interface/io/xenbus.h 2011-03-17 13:45:28.000000000 +0100 +++ head-2011-03-17/include/xen/interface/io/xenbus.h 2011-01-31 15:14:12.000000000 +0100 @@ -36,6 +36,7 @@ enum xenbus_state XenbusStateReconfigured = 8 }; +typedef enum xenbus_state XenbusState; #endif /* _XEN_PUBLIC_IO_XENBUS_H */ --- head-2011-03-17.orig/include/xen/interface/io/xs_wire.h 2011-03-17 13:45:28.000000000 +0100 +++ head-2011-03-17/include/xen/interface/io/xs_wire.h 2011-01-31 15:14:12.000000000 +0100 @@ -1,6 +1,25 @@ /* * Details of the "wire" protocol between Xen Store Daemon and client * library or guest kernel. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Copyright (C) 2005 Rusty Russell IBM Corporation */ @@ -26,7 +45,10 @@ enum xsd_sockmsg_type XS_SET_PERMS, XS_WATCH_EVENT, XS_ERROR, - XS_IS_DOMAIN_INTRODUCED + XS_IS_DOMAIN_INTRODUCED, + XS_RESUME, + XS_SET_TARGET, + XS_RESTRICT }; #define XS_WRITE_NONE "NONE" @@ -39,8 +61,14 @@ struct xsd_errors int errnum; const char *errstring; }; +#ifdef EINVAL #define XSD_ERROR(x) { x, #x } -static struct xsd_errors xsd_errors[] __attribute__((unused)) = { +/* LINTED: static unused */ +static struct xsd_errors xsd_errors[] +#if defined(__GNUC__) +__attribute__((unused)) +#endif + = { XSD_ERROR(EINVAL), XSD_ERROR(EACCES), XSD_ERROR(EEXIST), @@ -56,6 +84,7 @@ static struct xsd_errors xsd_errors[] __ XSD_ERROR(EAGAIN), XSD_ERROR(EISCONN) }; +#endif struct xsd_sockmsg { @@ -84,4 +113,11 @@ struct xenstore_domain_interface { XENSTORE_RING_IDX rsp_cons, rsp_prod; }; +/* Violating this is very bad. See docs/misc/xenstore.txt. */ +#define XENSTORE_PAYLOAD_MAX 4096 + +/* Violating these just gets you an error back */ +#define XENSTORE_ABS_PATH_MAX 3072 +#define XENSTORE_REL_PATH_MAX 2048 + #endif /* _XS_WIRE_H */ --- head-2011-03-17.orig/include/xen/interface/memory.h 2011-03-17 13:45:28.000000000 +0100 +++ head-2011-03-17/include/xen/interface/memory.h 2011-01-31 15:14:12.000000000 +0100 @@ -3,13 +3,31 @@ * * Memory reservation and information. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Copyright (c) 2005, Keir Fraser */ #ifndef __XEN_PUBLIC_MEMORY_H__ #define __XEN_PUBLIC_MEMORY_H__ -#include +#include "xen.h" /* * Increase or decrease the specified domain's memory reservation. Returns a @@ -19,6 +37,26 @@ #define XENMEM_increase_reservation 0 #define XENMEM_decrease_reservation 1 #define XENMEM_populate_physmap 6 + +#if __XEN_INTERFACE_VERSION__ >= 0x00030209 +/* + * Maximum # bits addressable by the user of the allocated region (e.g., I/O + * devices often have a 32-bit limitation even in 64-bit systems). If zero + * then the user has no addressing restriction. This field is not used by + * XENMEM_decrease_reservation. + */ +#define XENMEMF_address_bits(x) (x) +#define XENMEMF_get_address_bits(x) ((x) & 0xffu) +/* NUMA node to allocate from. */ +#define XENMEMF_node(x) (((x) + 1) << 8) +#define XENMEMF_get_node(x) ((((x) >> 8) - 1) & 0xffu) +/* Flag to populate physmap with populate-on-demand entries */ +#define XENMEMF_populate_on_demand (1<<16) +/* Flag to request allocation only from the node specified */ +#define XENMEMF_exact_node_request (1<<17) +#define XENMEMF_exact_node(n) (XENMEMF_node(n) | XENMEMF_exact_node_request) +#endif + struct xen_memory_reservation { /* @@ -31,28 +69,27 @@ struct xen_memory_reservation { * OUT: GMFN bases of extents that were allocated * (NB. This command also updates the mach_to_phys translation table) */ - GUEST_HANDLE(ulong) extent_start; + XEN_GUEST_HANDLE(xen_pfn_t) extent_start; /* Number of extents, and size/alignment of each (2^extent_order pages). */ - unsigned long nr_extents; + xen_ulong_t nr_extents; unsigned int extent_order; - /* - * Maximum # bits addressable by the user of the allocated region (e.g., - * I/O devices often have a 32-bit limitation even in 64-bit systems). If - * zero then the user has no addressing restriction. - * This field is not used by XENMEM_decrease_reservation. - */ +#if __XEN_INTERFACE_VERSION__ >= 0x00030209 + /* XENMEMF flags. */ + unsigned int mem_flags; +#else unsigned int address_bits; +#endif /* * Domain whose reservation is being changed. * Unprivileged domains can specify only DOMID_SELF. */ domid_t domid; - }; -DEFINE_GUEST_HANDLE_STRUCT(xen_memory_reservation); +typedef struct xen_memory_reservation xen_memory_reservation_t; +DEFINE_XEN_GUEST_HANDLE(xen_memory_reservation_t); /* * An atomic exchange of memory pages. If return code is zero then @@ -92,10 +129,11 @@ struct xen_memory_exchange { * command will be non-zero. * 5. THIS FIELD MUST BE INITIALISED TO ZERO BY THE CALLER! */ - unsigned long nr_exchanged; + xen_ulong_t nr_exchanged; }; +typedef struct xen_memory_exchange xen_memory_exchange_t; +DEFINE_XEN_GUEST_HANDLE(xen_memory_exchange_t); -DEFINE_GUEST_HANDLE_STRUCT(xen_memory_exchange); /* * Returns the maximum machine frame number of mapped RAM in this system. * This command always succeeds (it never returns an error code). @@ -112,6 +150,11 @@ DEFINE_GUEST_HANDLE_STRUCT(xen_memory_ex #define XENMEM_maximum_reservation 4 /* + * Returns the maximum GPFN in use by the guest, or -ve errcode on failure. + */ +#define XENMEM_maximum_gpfn 14 + +/* * Returns a list of MFN bases of 2MB extents comprising the machine_to_phys * mapping table. Architectures which do not have a m2p table do not implement * this command. @@ -130,7 +173,7 @@ struct xen_machphys_mfn_list { * any large discontiguities in the machine address space, 2MB gaps in * the machphys table will be represented by an MFN base of zero. */ - GUEST_HANDLE(ulong) extent_start; + XEN_GUEST_HANDLE(xen_pfn_t) extent_start; /* * Number of extents written to the above array. This will be smaller @@ -138,7 +181,8 @@ struct xen_machphys_mfn_list { */ unsigned int nr_extents; }; -DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list); +typedef struct xen_machphys_mfn_list xen_machphys_mfn_list_t; +DEFINE_XEN_GUEST_HANDLE(xen_machphys_mfn_list_t); /* * Returns the location in virtual address space of the machine_to_phys @@ -148,10 +192,11 @@ DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_ */ #define XENMEM_machphys_mapping 12 struct xen_machphys_mapping { - unsigned long v_start, v_end; /* Start and end virtual addresses. */ - unsigned long max_mfn; /* Maximum MFN that can be looked up. */ + xen_ulong_t v_start, v_end; /* Start and end virtual addresses. */ + xen_ulong_t max_mfn; /* Maximum MFN that can be looked up. */ }; -DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mapping_t); +typedef struct xen_machphys_mapping xen_machphys_mapping_t; +DEFINE_XEN_GUEST_HANDLE(xen_machphys_mapping_t); /* * Sets the GPFN at which a particular page appears in the specified guest's @@ -166,38 +211,22 @@ struct xen_add_to_physmap { /* Source mapping space. */ #define XENMAPSPACE_shared_info 0 /* shared info page */ #define XENMAPSPACE_grant_table 1 /* grant table page */ +#define XENMAPSPACE_gmfn 2 /* GMFN */ unsigned int space; +#define XENMAPIDX_grant_table_status 0x80000000 + /* Index into source mapping space. */ - unsigned long idx; + xen_ulong_t idx; /* GPFN where the source mapping page should appear. */ - unsigned long gpfn; + xen_pfn_t gpfn; }; -DEFINE_GUEST_HANDLE_STRUCT(xen_add_to_physmap); +typedef struct xen_add_to_physmap xen_add_to_physmap_t; +DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_t); -/* - * Translates a list of domain-specific GPFNs into MFNs. Returns a -ve error - * code on failure. This call only works for auto-translated guests. - */ -#define XENMEM_translate_gpfn_list 8 -struct xen_translate_gpfn_list { - /* Which domain to translate for? */ - domid_t domid; - - /* Length of list. */ - unsigned long nr_gpfns; - - /* List of GPFNs to translate. */ - GUEST_HANDLE(ulong) gpfn_list; - - /* - * Output list to contain MFN translations. May be the same as the input - * list (in which case each input GPFN is overwritten with the output MFN). - */ - GUEST_HANDLE(ulong) mfn_list; -}; -DEFINE_GUEST_HANDLE_STRUCT(xen_translate_gpfn_list); +/*** REMOVED ***/ +/*#define XENMEM_translate_gpfn_list 8*/ /* * Returns the pseudo-physical memory map as it was when the domain @@ -217,9 +246,10 @@ struct xen_memory_map { * Entries in the buffer are in the same format as returned by the * BIOS INT 0x15 EAX=0xE820 call. */ - GUEST_HANDLE(void) buffer; + XEN_GUEST_HANDLE(void) buffer; }; -DEFINE_GUEST_HANDLE_STRUCT(xen_memory_map); +typedef struct xen_memory_map xen_memory_map_t; +DEFINE_XEN_GUEST_HANDLE(xen_memory_map_t); /* * Returns the real physical memory map. Passes the same structure as @@ -228,10 +258,37 @@ DEFINE_GUEST_HANDLE_STRUCT(xen_memory_ma */ #define XENMEM_machine_memory_map 10 +/* + * Set the pseudo-physical memory map of a domain, as returned by + * XENMEM_memory_map. + * arg == addr of xen_foreign_memory_map_t. + */ +#define XENMEM_set_memory_map 13 +struct xen_foreign_memory_map { + domid_t domid; + struct xen_memory_map map; +}; +typedef struct xen_foreign_memory_map xen_foreign_memory_map_t; +DEFINE_XEN_GUEST_HANDLE(xen_foreign_memory_map_t); + +#define XENMEM_set_pod_target 16 +#define XENMEM_get_pod_target 17 +struct xen_pod_target { + /* IN */ + uint64_t target_pages; + /* OUT */ + uint64_t tot_pages; + uint64_t pod_cache_pages; + uint64_t pod_entries; + /* IN */ + domid_t domid; +}; +typedef struct xen_pod_target xen_pod_target_t; /* - * Prevent the balloon driver from changing the memory reservation - * during a driver critical region. + * Get the number of MFNs saved through memory sharing. + * The call never fails. */ -extern spinlock_t xen_reservation_lock; +#define XENMEM_get_sharing_freed_pages 18 + #endif /* __XEN_PUBLIC_MEMORY_H__ */ --- head-2011-03-17.orig/include/xen/interface/physdev.h 2011-03-17 13:45:28.000000000 +0100 +++ head-2011-03-17/include/xen/interface/physdev.h 2011-03-17 13:50:24.000000000 +0100 @@ -21,6 +21,8 @@ #ifndef __XEN_PUBLIC_PHYSDEV_H__ #define __XEN_PUBLIC_PHYSDEV_H__ +#include "xen.h" + /* * Prototype for this hypercall is: * int physdev_op(int cmd, void *args) @@ -37,6 +39,23 @@ struct physdev_eoi { /* IN */ uint32_t irq; }; +typedef struct physdev_eoi physdev_eoi_t; +DEFINE_XEN_GUEST_HANDLE(physdev_eoi_t); + +/* + * Register a shared page for the hypervisor to indicate whether the guest + * must issue PHYSDEVOP_eoi. The semantics of PHYSDEVOP_eoi change slightly + * once the guest used this function in that the associated event channel + * will automatically get unmasked. The page registered is used as a bit + * array indexed by Xen's PIRQ value. + */ +#define PHYSDEVOP_pirq_eoi_gmfn 17 +struct physdev_pirq_eoi_gmfn { + /* IN */ + xen_pfn_t gmfn; +}; +typedef struct physdev_pirq_eoi_gmfn physdev_pirq_eoi_gmfn_t; +DEFINE_XEN_GUEST_HANDLE(physdev_pirq_eoi_gmfn_t); /* * Query the status of an IRQ line. @@ -49,6 +68,8 @@ struct physdev_irq_status_query { /* OUT */ uint32_t flags; /* XENIRQSTAT_* */ }; +typedef struct physdev_irq_status_query physdev_irq_status_query_t; +DEFINE_XEN_GUEST_HANDLE(physdev_irq_status_query_t); /* Need to call PHYSDEVOP_eoi when the IRQ has been serviced? */ #define _XENIRQSTAT_needs_eoi (0) @@ -67,6 +88,8 @@ struct physdev_set_iopl { /* IN */ uint32_t iopl; }; +typedef struct physdev_set_iopl physdev_set_iopl_t; +DEFINE_XEN_GUEST_HANDLE(physdev_set_iopl_t); /* * Set the current VCPU's I/O-port permissions bitmap. @@ -75,9 +98,15 @@ struct physdev_set_iopl { #define PHYSDEVOP_set_iobitmap 7 struct physdev_set_iobitmap { /* IN */ +#if __XEN_INTERFACE_VERSION__ >= 0x00030205 + XEN_GUEST_HANDLE(uint8) bitmap; +#else uint8_t * bitmap; +#endif uint32_t nr_ports; }; +typedef struct physdev_set_iobitmap physdev_set_iobitmap_t; +DEFINE_XEN_GUEST_HANDLE(physdev_set_iobitmap_t); /* * Read or write an IO-APIC register. @@ -92,6 +121,8 @@ struct physdev_apic { /* IN or OUT */ uint32_t value; }; +typedef struct physdev_apic physdev_apic_t; +DEFINE_XEN_GUEST_HANDLE(physdev_apic_t); /* * Allocate or free a physical upcall vector for the specified IRQ line. @@ -105,6 +136,8 @@ struct physdev_irq { /* IN or OUT */ uint32_t vector; }; +typedef struct physdev_irq physdev_irq_t; +DEFINE_XEN_GUEST_HANDLE(physdev_irq_t); #define MAP_PIRQ_TYPE_MSI 0x0 #define MAP_PIRQ_TYPE_GSI 0x1 @@ -128,6 +161,8 @@ struct physdev_map_pirq { /* IN */ uint64_t table_base; }; +typedef struct physdev_map_pirq physdev_map_pirq_t; +DEFINE_XEN_GUEST_HANDLE(physdev_map_pirq_t); #define PHYSDEVOP_unmap_pirq 14 struct physdev_unmap_pirq { @@ -135,6 +170,8 @@ struct physdev_unmap_pirq { /* IN */ int pirq; }; +typedef struct physdev_unmap_pirq physdev_unmap_pirq_t; +DEFINE_XEN_GUEST_HANDLE(physdev_unmap_pirq_t); #define PHYSDEVOP_manage_pci_add 15 #define PHYSDEVOP_manage_pci_remove 16 @@ -143,6 +180,17 @@ struct physdev_manage_pci { uint8_t bus; uint8_t devfn; }; +typedef struct physdev_manage_pci physdev_manage_pci_t; +DEFINE_XEN_GUEST_HANDLE(physdev_manage_pci_t); + +#define PHYSDEVOP_restore_msi 19 +struct physdev_restore_msi { + /* IN */ + uint8_t bus; + uint8_t devfn; +}; +typedef struct physdev_restore_msi physdev_restore_msi_t; +DEFINE_XEN_GUEST_HANDLE(physdev_restore_msi_t); #define PHYSDEVOP_manage_pci_add_ext 20 struct physdev_manage_pci_ext { @@ -156,6 +204,8 @@ struct physdev_manage_pci_ext { uint8_t devfn; } physfn; }; +typedef struct physdev_manage_pci_ext physdev_manage_pci_ext_t; +DEFINE_XEN_GUEST_HANDLE(physdev_manage_pci_ext_t); /* * Argument to physdev_op_compat() hypercall. Superceded by new physdev_op() @@ -171,6 +221,8 @@ struct physdev_op { struct physdev_irq irq_op; } u; }; +typedef struct physdev_op physdev_op_t; +DEFINE_XEN_GUEST_HANDLE(physdev_op_t); #define PHYSDEVOP_setup_gsi 21 struct physdev_setup_gsi { @@ -181,12 +233,10 @@ struct physdev_setup_gsi { uint8_t polarity; /* IN */ }; +typedef struct physdev_setup_gsi physdev_setup_gsi_t; +DEFINE_XEN_GUEST_HANDLE(physdev_setup_gsi_t); -#define PHYSDEVOP_get_nr_pirqs 22 -struct physdev_nr_pirqs { - /* OUT */ - uint32_t nr_pirqs; -}; +/* leave PHYSDEVOP 22 free */ /* type is MAP_PIRQ_TYPE_GSI or MAP_PIRQ_TYPE_MSI * the hypercall returns a free pirq */ @@ -198,6 +248,9 @@ struct physdev_get_free_pirq { uint32_t pirq; }; +typedef struct physdev_get_free_pirq physdev_get_free_pirq_t; +DEFINE_XEN_GUEST_HANDLE(physdev_get_free_pirq_t); + /* * Notify that some PIRQ-bound event channels have been unmasked. * ** This command is obsolete since interface version 0x00030202 and is ** --- head-2011-03-17.orig/include/xen/interface/sched.h 2011-03-17 13:45:28.000000000 +0100 +++ head-2011-03-17/include/xen/interface/sched.h 2011-01-31 15:14:12.000000000 +0100 @@ -3,6 +3,24 @@ * * Scheduler state interactions * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Copyright (c) 2005, Keir Fraser */ @@ -13,17 +31,17 @@ /* * The prototype for this hypercall is: - * long sched_op_new(int cmd, void *arg) + * long sched_op(int cmd, void *arg) * @cmd == SCHEDOP_??? (scheduler operation). * @arg == Operation-specific extra argument(s), as described below. * - * **NOTE**: - * Versions of Xen prior to 3.0.2 provide only the following legacy version + * Versions of Xen prior to 3.0.2 provided only the following legacy version * of this hypercall, supporting only the commands yield, block and shutdown: * long sched_op(int cmd, unsigned long arg) * @cmd == SCHEDOP_??? (scheduler operation). * @arg == 0 (SCHEDOP_yield and SCHEDOP_block) * == SHUTDOWN_* code (SCHEDOP_shutdown) + * This legacy version is available to new guests as sched_op_compat(). */ /* @@ -49,7 +67,8 @@ struct sched_shutdown { unsigned int reason; /* SHUTDOWN_* */ }; -DEFINE_GUEST_HANDLE_STRUCT(sched_shutdown); +typedef struct sched_shutdown sched_shutdown_t; +DEFINE_XEN_GUEST_HANDLE(sched_shutdown_t); /* * Poll a set of event-channel ports. Return when one or more are pending. An @@ -58,11 +77,49 @@ DEFINE_GUEST_HANDLE_STRUCT(sched_shutdow */ #define SCHEDOP_poll 3 struct sched_poll { - GUEST_HANDLE(evtchn_port_t) ports; + XEN_GUEST_HANDLE(evtchn_port_t) ports; unsigned int nr_ports; uint64_t timeout; }; -DEFINE_GUEST_HANDLE_STRUCT(sched_poll); +typedef struct sched_poll sched_poll_t; +DEFINE_XEN_GUEST_HANDLE(sched_poll_t); + +/* + * Declare a shutdown for another domain. The main use of this function is + * in interpreting shutdown requests and reasons for fully-virtualized + * domains. A para-virtualized domain may use SCHEDOP_shutdown directly. + * @arg == pointer to sched_remote_shutdown structure. + */ +#define SCHEDOP_remote_shutdown 4 +struct sched_remote_shutdown { + domid_t domain_id; /* Remote domain ID */ + unsigned int reason; /* SHUTDOWN_xxx reason */ +}; +typedef struct sched_remote_shutdown sched_remote_shutdown_t; +DEFINE_XEN_GUEST_HANDLE(sched_remote_shutdown_t); + +/* + * Latch a shutdown code, so that when the domain later shuts down it + * reports this code to the control tools. + * @arg == as for SCHEDOP_shutdown. + */ +#define SCHEDOP_shutdown_code 5 + +/* + * Setup, poke and destroy a domain watchdog timer. + * @arg == pointer to sched_watchdog structure. + * With id == 0, setup a domain watchdog timer to cause domain shutdown + * after timeout, returns watchdog id. + * With id != 0 and timeout == 0, destroy domain watchdog timer. + * With id != 0 and timeout != 0, poke watchdog timer and set new timeout. + */ +#define SCHEDOP_watchdog 6 +struct sched_watchdog { + uint32_t id; /* watchdog ID */ + uint32_t timeout; /* timeout */ +}; +typedef struct sched_watchdog sched_watchdog_t; +DEFINE_XEN_GUEST_HANDLE(sched_watchdog_t); /* * Reason codes for SCHEDOP_shutdown. These may be interpreted by control @@ -73,5 +130,6 @@ DEFINE_GUEST_HANDLE_STRUCT(sched_poll); #define SHUTDOWN_reboot 1 /* Clean up, kill, and then restart. */ #define SHUTDOWN_suspend 2 /* Clean up, save suspend info, kill. */ #define SHUTDOWN_crash 3 /* Tell controller we've crashed. */ +#define SHUTDOWN_watchdog 4 /* Restart because watchdog time expired. */ #endif /* __XEN_PUBLIC_SCHED_H__ */ --- head-2011-03-17.orig/include/xen/interface/vcpu.h 2011-03-17 13:45:28.000000000 +0100 +++ head-2011-03-17/include/xen/interface/vcpu.h 2011-03-17 13:50:24.000000000 +0100 @@ -27,11 +27,13 @@ #ifndef __XEN_PUBLIC_VCPU_H__ #define __XEN_PUBLIC_VCPU_H__ +#include "xen.h" + /* * Prototype for this hypercall is: - * int vcpu_op(int cmd, int vcpuid, void *extra_args) - * @cmd == VCPUOP_??? (VCPU operation). - * @vcpuid == VCPU to operate on. + * int vcpu_op(int cmd, int vcpuid, void *extra_args) + * @cmd == VCPUOP_??? (VCPU operation). + * @vcpuid == VCPU to operate on. * @extra_args == Operation-specific extra arguments (NULL if none). */ @@ -40,52 +42,53 @@ * newly-initialised VCPU will not run until it is brought up by VCPUOP_up. * * @extra_arg == pointer to vcpu_guest_context structure containing initial - * state for the VCPU. + * state for the VCPU. */ -#define VCPUOP_initialise 0 +#define VCPUOP_initialise 0 /* * Bring up a VCPU. This makes the VCPU runnable. This operation will fail * if the VCPU has not been initialised (VCPUOP_initialise). */ -#define VCPUOP_up 1 +#define VCPUOP_up 1 /* * Bring down a VCPU (i.e., make it non-runnable). * There are a few caveats that callers should observe: - * 1. This operation may return, and VCPU_is_up may return false, before the - * VCPU stops running (i.e., the command is asynchronous). It is a good - * idea to ensure that the VCPU has entered a non-critical loop before - * bringing it down. Alternatively, this operation is guaranteed - * synchronous if invoked by the VCPU itself. - * 2. After a VCPU is initialised, there is currently no way to drop all its - * references to domain memory. Even a VCPU that is down still holds - * memory references via its pagetable base pointer and GDT. It is good - * practise to move a VCPU onto an 'idle' or default page table, LDT and - * GDT before bringing it down. + * 1. This operation may return, and VCPU_is_up may return false, before the + * VCPU stops running (i.e., the command is asynchronous). It is a good + * idea to ensure that the VCPU has entered a non-critical loop before + * bringing it down. Alternatively, this operation is guaranteed + * synchronous if invoked by the VCPU itself. + * 2. After a VCPU is initialised, there is currently no way to drop all its + * references to domain memory. Even a VCPU that is down still holds + * memory references via its pagetable base pointer and GDT. It is good + * practise to move a VCPU onto an 'idle' or default page table, LDT and + * GDT before bringing it down. */ -#define VCPUOP_down 2 +#define VCPUOP_down 2 /* Returns 1 if the given VCPU is up. */ -#define VCPUOP_is_up 3 +#define VCPUOP_is_up 3 /* * Return information about the state and running time of a VCPU. * @extra_arg == pointer to vcpu_runstate_info structure. */ -#define VCPUOP_get_runstate_info 4 +#define VCPUOP_get_runstate_info 4 struct vcpu_runstate_info { - /* VCPU's current state (RUNSTATE_*). */ - int state; - /* When was current state entered (system time, ns)? */ - uint64_t state_entry_time; - /* - * Time spent in each RUNSTATE_* (ns). The sum of these times is - * guaranteed not to drift from system time. - */ - uint64_t time[4]; + /* VCPU's current state (RUNSTATE_*). */ + int state; + /* When was current state entered (system time, ns)? */ + uint64_t state_entry_time; + /* + * Time spent in each RUNSTATE_* (ns). The sum of these times is + * guaranteed not to drift from system time. + */ + uint64_t time[4]; }; -DEFINE_GUEST_HANDLE_STRUCT(vcpu_runstate_info); +typedef struct vcpu_runstate_info vcpu_runstate_info_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_runstate_info_t); /* VCPU is currently running on a physical CPU. */ #define RUNSTATE_running 0 @@ -108,47 +111,52 @@ DEFINE_GUEST_HANDLE_STRUCT(vcpu_runstate * Register a shared memory area from which the guest may obtain its own * runstate information without needing to execute a hypercall. * Notes: - * 1. The registered address may be virtual or physical, depending on the - * platform. The virtual address should be registered on x86 systems. - * 2. Only one shared area may be registered per VCPU. The shared area is - * updated by the hypervisor each time the VCPU is scheduled. Thus - * runstate.state will always be RUNSTATE_running and - * runstate.state_entry_time will indicate the system time at which the - * VCPU was last scheduled to run. + * 1. The registered address may be virtual or physical or guest handle, + * depending on the platform. Virtual address or guest handle should be + * registered on x86 systems. + * 2. Only one shared area may be registered per VCPU. The shared area is + * updated by the hypervisor each time the VCPU is scheduled. Thus + * runstate.state will always be RUNSTATE_running and + * runstate.state_entry_time will indicate the system time at which the + * VCPU was last scheduled to run. * @extra_arg == pointer to vcpu_register_runstate_memory_area structure. */ #define VCPUOP_register_runstate_memory_area 5 struct vcpu_register_runstate_memory_area { - union { - GUEST_HANDLE(vcpu_runstate_info) h; - struct vcpu_runstate_info *v; - uint64_t p; - } addr; + union { + XEN_GUEST_HANDLE(vcpu_runstate_info_t) h; + struct vcpu_runstate_info *v; + uint64_t p; + } addr; }; +typedef struct vcpu_register_runstate_memory_area vcpu_register_runstate_memory_area_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_register_runstate_memory_area_t); /* * Set or stop a VCPU's periodic timer. Every VCPU has one periodic timer * which can be set via these commands. Periods smaller than one millisecond * may not be supported. */ -#define VCPUOP_set_periodic_timer 6 /* arg == vcpu_set_periodic_timer_t */ -#define VCPUOP_stop_periodic_timer 7 /* arg == NULL */ +#define VCPUOP_set_periodic_timer 6 /* arg == vcpu_set_periodic_timer_t */ +#define VCPUOP_stop_periodic_timer 7 /* arg == NULL */ struct vcpu_set_periodic_timer { - uint64_t period_ns; + uint64_t period_ns; }; -DEFINE_GUEST_HANDLE_STRUCT(vcpu_set_periodic_timer); +typedef struct vcpu_set_periodic_timer vcpu_set_periodic_timer_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_set_periodic_timer_t); /* * Set or stop a VCPU's single-shot timer. Every VCPU has one single-shot * timer which can be set via these commands. */ -#define VCPUOP_set_singleshot_timer 8 /* arg == vcpu_set_singleshot_timer_t */ +#define VCPUOP_set_singleshot_timer 8 /* arg == vcpu_set_singleshot_timer_t */ #define VCPUOP_stop_singleshot_timer 9 /* arg == NULL */ struct vcpu_set_singleshot_timer { - uint64_t timeout_abs_ns; - uint32_t flags; /* VCPU_SSHOTTMR_??? */ + uint64_t timeout_abs_ns; /* Absolute system time value in nanoseconds. */ + uint32_t flags; /* VCPU_SSHOTTMR_??? */ }; -DEFINE_GUEST_HANDLE_STRUCT(vcpu_set_singleshot_timer); +typedef struct vcpu_set_singleshot_timer vcpu_set_singleshot_timer_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_set_singleshot_timer_t); /* Flags to VCPUOP_set_singleshot_timer. */ /* Require the timeout to be in the future (return -ETIME if it's passed). */ @@ -161,13 +169,62 @@ DEFINE_GUEST_HANDLE_STRUCT(vcpu_set_sing * structure in a convenient place, such as in a per-cpu data area. * The pointer need not be page aligned, but the structure must not * cross a page boundary. + * + * This may be called only once per vcpu. */ -#define VCPUOP_register_vcpu_info 10 /* arg == struct vcpu_info */ +#define VCPUOP_register_vcpu_info 10 /* arg == vcpu_register_vcpu_info_t */ struct vcpu_register_vcpu_info { uint64_t mfn; /* mfn of page to place vcpu_info */ uint32_t offset; /* offset within page */ uint32_t rsvd; /* unused */ }; -DEFINE_GUEST_HANDLE_STRUCT(vcpu_register_vcpu_info); +typedef struct vcpu_register_vcpu_info vcpu_register_vcpu_info_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_register_vcpu_info_t); + +/* Send an NMI to the specified VCPU. @extra_arg == NULL. */ +#define VCPUOP_send_nmi 11 + +/* + * Get the physical ID information for a pinned vcpu's underlying physical + * processor. The physical ID informmation is architecture-specific. + * On x86: id[31:0]=apic_id, id[63:32]=acpi_id. + * This command returns -EINVAL if it is not a valid operation for this VCPU. + */ +#define VCPUOP_get_physid 12 /* arg == vcpu_get_physid_t */ +struct vcpu_get_physid { + uint64_t phys_id; +}; +typedef struct vcpu_get_physid vcpu_get_physid_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_get_physid_t); +#define xen_vcpu_physid_to_x86_apicid(physid) ((uint32_t)(physid)) +#define xen_vcpu_physid_to_x86_acpiid(physid) ((uint32_t)((physid) >> 32)) + +/* + * Register a memory location to get a secondary copy of the vcpu time + * parameters. The master copy still exists as part of the vcpu shared + * memory area, and this secondary copy is updated whenever the master copy + * is updated (and using the same versioning scheme for synchronisation). + * + * The intent is that this copy may be mapped (RO) into userspace so + * that usermode can compute system time using the time info and the + * tsc. Usermode will see an array of vcpu_time_info structures, one + * for each vcpu, and choose the right one by an existing mechanism + * which allows it to get the current vcpu number (such as via a + * segment limit). It can then apply the normal algorithm to compute + * system time from the tsc. + * + * @extra_arg == pointer to vcpu_register_time_info_memory_area structure. + */ +#define VCPUOP_register_vcpu_time_memory_area 13 +DEFINE_XEN_GUEST_HANDLE(vcpu_time_info_t); +struct vcpu_register_time_memory_area { + union { + XEN_GUEST_HANDLE(vcpu_time_info_t) h; + struct vcpu_time_info *v; + uint64_t p; + } addr; +}; +typedef struct vcpu_register_time_memory_area vcpu_register_time_memory_area_t; +DEFINE_XEN_GUEST_HANDLE(vcpu_register_time_memory_area_t); #endif /* __XEN_PUBLIC_VCPU_H__ */ --- head-2011-03-17.orig/include/xen/interface/version.h 2011-03-17 13:45:28.000000000 +0100 +++ head-2011-03-17/include/xen/interface/version.h 2011-01-31 15:14:12.000000000 +0100 @@ -3,6 +3,24 @@ * * Xen version, type, and compile information. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Copyright (c) 2005, Nguyen Anh Quynh * Copyright (c) 2005, Keir Fraser */ @@ -10,17 +28,15 @@ #ifndef __XEN_PUBLIC_VERSION_H__ #define __XEN_PUBLIC_VERSION_H__ -/* NB. All ops return zero on success, except XENVER_version. */ +/* NB. All ops return zero on success, except XENVER_{version,pagesize} */ /* arg == NULL; returns major:minor (16:16). */ #define XENVER_version 0 /* arg == xen_extraversion_t. */ #define XENVER_extraversion 1 -struct xen_extraversion { - char extraversion[16]; -}; -#define XEN_EXTRAVERSION_LEN (sizeof(struct xen_extraversion)) +typedef char xen_extraversion_t[16]; +#define XEN_EXTRAVERSION_LEN (sizeof(xen_extraversion_t)) /* arg == xen_compile_info_t. */ #define XENVER_compile_info 2 @@ -30,29 +46,28 @@ struct xen_compile_info { char compile_domain[32]; char compile_date[32]; }; +typedef struct xen_compile_info xen_compile_info_t; #define XENVER_capabilities 3 -struct xen_capabilities_info { - char info[1024]; -}; -#define XEN_CAPABILITIES_INFO_LEN (sizeof(struct xen_capabilities_info)) +typedef char xen_capabilities_info_t[1024]; +#define XEN_CAPABILITIES_INFO_LEN (sizeof(xen_capabilities_info_t)) #define XENVER_changeset 4 -struct xen_changeset_info { - char info[64]; -}; -#define XEN_CHANGESET_INFO_LEN (sizeof(struct xen_changeset_info)) +typedef char xen_changeset_info_t[64]; +#define XEN_CHANGESET_INFO_LEN (sizeof(xen_changeset_info_t)) #define XENVER_platform_parameters 5 struct xen_platform_parameters { unsigned long virt_start; }; +typedef struct xen_platform_parameters xen_platform_parameters_t; #define XENVER_get_features 6 struct xen_feature_info { unsigned int submap_idx; /* IN: which 32-bit submap to return */ uint32_t submap; /* OUT: 32-bit submap */ }; +typedef struct xen_feature_info xen_feature_info_t; /* Declares the features reported by XENVER_get_features. */ #include "features.h" @@ -60,4 +75,10 @@ struct xen_feature_info { /* arg == NULL; returns host memory page size. */ #define XENVER_pagesize 7 +/* arg == xen_domain_handle_t. */ +#define XENVER_guest_handle 8 + +#define XENVER_commandline 9 +typedef char xen_commandline_t[1024]; + #endif /* __XEN_PUBLIC_VERSION_H__ */ --- head-2011-03-17.orig/include/xen/interface/xen.h 2011-03-17 13:45:28.000000000 +0100 +++ head-2011-03-17/include/xen/interface/xen.h 2011-03-17 13:50:24.000000000 +0100 @@ -3,35 +3,69 @@ * * Guest OS interface to Xen. * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * * Copyright (c) 2004, K A Fraser */ #ifndef __XEN_PUBLIC_XEN_H__ #define __XEN_PUBLIC_XEN_H__ -#include +#include "xen-compat.h" +#ifdef CONFIG_PARAVIRT_XEN #include +#endif -/* - * XEN "SYSTEM CALLS" (a.k.a. HYPERCALLS). - */ +#if defined(__i386__) || defined(__x86_64__) +#include "arch-x86/xen.h" +#elif defined(__ia64__) +#include "arch-ia64.h" +#else +#error "Unsupported architecture" +#endif + +#ifndef __ASSEMBLY__ +/* Guest handles for primitive C types. */ +DEFINE_XEN_GUEST_HANDLE(char); +__DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char); +DEFINE_XEN_GUEST_HANDLE(int); +__DEFINE_XEN_GUEST_HANDLE(uint, unsigned int); +DEFINE_XEN_GUEST_HANDLE(long); +__DEFINE_XEN_GUEST_HANDLE(ulong, unsigned long); +DEFINE_XEN_GUEST_HANDLE(void); + +DEFINE_XEN_GUEST_HANDLE(uint64_t); +DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); +#endif /* - * x86_32: EAX = vector; EBX, ECX, EDX, ESI, EDI = args 1, 2, 3, 4, 5. - * EAX = return value - * (argument registers may be clobbered on return) - * x86_64: RAX = vector; RDI, RSI, RDX, R10, R8, R9 = args 1, 2, 3, 4, 5, 6. - * RAX = return value - * (argument registers not clobbered on return; RCX, R11 are) + * HYPERCALLS */ + #define __HYPERVISOR_set_trap_table 0 #define __HYPERVISOR_mmu_update 1 #define __HYPERVISOR_set_gdt 2 #define __HYPERVISOR_stack_switch 3 #define __HYPERVISOR_set_callbacks 4 #define __HYPERVISOR_fpu_taskswitch 5 -#define __HYPERVISOR_sched_op 6 -#define __HYPERVISOR_dom0_op 7 +#define __HYPERVISOR_sched_op_compat 6 /* compat since 0x00030101 */ +#define __HYPERVISOR_platform_op 7 #define __HYPERVISOR_set_debugreg 8 #define __HYPERVISOR_get_debugreg 9 #define __HYPERVISOR_update_descriptor 10 @@ -39,10 +73,10 @@ #define __HYPERVISOR_multicall 13 #define __HYPERVISOR_update_va_mapping 14 #define __HYPERVISOR_set_timer_op 15 -#define __HYPERVISOR_event_channel_op_compat 16 +#define __HYPERVISOR_event_channel_op_compat 16 /* compat since 0x00030202 */ #define __HYPERVISOR_xen_version 17 #define __HYPERVISOR_console_io 18 -#define __HYPERVISOR_physdev_op_compat 19 +#define __HYPERVISOR_physdev_op_compat 19 /* compat since 0x00030202 */ #define __HYPERVISOR_grant_table_op 20 #define __HYPERVISOR_vm_assist 21 #define __HYPERVISOR_update_va_mapping_otherdomain 22 @@ -50,7 +84,7 @@ #define __HYPERVISOR_vcpu_op 24 #define __HYPERVISOR_set_segment_base 25 /* x86/64 only */ #define __HYPERVISOR_mmuext_op 26 -#define __HYPERVISOR_acm_op 27 +#define __HYPERVISOR_xsm_op 27 #define __HYPERVISOR_nmi_op 28 #define __HYPERVISOR_sched_op_new 29 #define __HYPERVISOR_callback_op 30 @@ -58,6 +92,10 @@ #define __HYPERVISOR_event_channel_op 32 #define __HYPERVISOR_physdev_op 33 #define __HYPERVISOR_hvm_op 34 +#define __HYPERVISOR_sysctl 35 +#define __HYPERVISOR_domctl 36 +#define __HYPERVISOR_kexec_op 37 +#define __HYPERVISOR_tmem_op 38 /* Architecture-specific hypercall definitions. */ #define __HYPERVISOR_arch_0 48 @@ -70,15 +108,50 @@ #define __HYPERVISOR_arch_7 55 /* + * HYPERCALL COMPATIBILITY. + */ + +/* New sched_op hypercall introduced in 0x00030101. */ +#if __XEN_INTERFACE_VERSION__ < 0x00030101 +#undef __HYPERVISOR_sched_op +#define __HYPERVISOR_sched_op __HYPERVISOR_sched_op_compat +#else +#define __HYPERVISOR_sched_op __HYPERVISOR_sched_op_new +#endif + +/* New event-channel and physdev hypercalls introduced in 0x00030202. */ +#if __XEN_INTERFACE_VERSION__ < 0x00030202 +#undef __HYPERVISOR_event_channel_op +#define __HYPERVISOR_event_channel_op __HYPERVISOR_event_channel_op_compat +#undef __HYPERVISOR_physdev_op +#define __HYPERVISOR_physdev_op __HYPERVISOR_physdev_op_compat +#endif + +/* New platform_op hypercall introduced in 0x00030204. */ +#if __XEN_INTERFACE_VERSION__ < 0x00030204 +#define __HYPERVISOR_dom0_op __HYPERVISOR_platform_op +#endif + +/* * VIRTUAL INTERRUPTS * * Virtual interrupts that a guest OS may receive from Xen. - */ -#define VIRQ_TIMER 0 /* Timebase update, and/or requested timeout. */ -#define VIRQ_DEBUG 1 /* Request guest to dump debug info. */ -#define VIRQ_CONSOLE 2 /* (DOM0) Bytes received on emergency console. */ -#define VIRQ_DOM_EXC 3 /* (DOM0) Exceptional event for some domain. */ -#define VIRQ_DEBUGGER 6 /* (DOM0) A domain has paused for debugging. */ + * + * In the side comments, 'V.' denotes a per-VCPU VIRQ while 'G.' denotes a + * global VIRQ. The former can be bound once per VCPU and cannot be re-bound. + * The latter can be allocated only once per guest: they must initially be + * allocated to VCPU0 but can subsequently be re-bound. + */ +#define VIRQ_TIMER 0 /* V. Timebase update, and/or requested timeout. */ +#define VIRQ_DEBUG 1 /* V. Request guest to dump debug info. */ +#define VIRQ_CONSOLE 2 /* G. (DOM0) Bytes received on emergency console. */ +#define VIRQ_DOM_EXC 3 /* G. (DOM0) Exceptional event for some domain. */ +#define VIRQ_TBUF 4 /* G. (DOM0) Trace buffer has records available. */ +#define VIRQ_DEBUGGER 6 /* G. (DOM0) A domain has paused for debugging. */ +#define VIRQ_XENOPROF 7 /* V. XenOprofile interrupt: new sample available */ +#define VIRQ_CON_RING 8 /* G. (DOM0) Bytes received on console */ +#define VIRQ_PCPU_STATE 9 /* G. (DOM0) PCPU state changed */ +#define VIRQ_MEM_EVENT 10 /* G. (DOM0) A memory event has occured */ /* Architecture-specific VIRQ definitions. */ #define VIRQ_ARCH_0 16 @@ -91,19 +164,28 @@ #define VIRQ_ARCH_7 23 #define NR_VIRQS 24 + /* - * MMU-UPDATE REQUESTS + * HYPERVISOR_mmu_update(reqs, count, pdone, foreigndom) * - * HYPERVISOR_mmu_update() accepts a list of (ptr, val) pairs. - * A foreigndom (FD) can be specified (or DOMID_SELF for none). - * Where the FD has some effect, it is described below. - * ptr[1:0] specifies the appropriate MMU_* command. + * @reqs is an array of mmu_update_t structures ((ptr, val) pairs). + * @count is the length of the above array. + * @pdone is an output parameter indicating number of completed operations + * @foreigndom[15:0]: FD, the expected owner of data pages referenced in this + * hypercall invocation. Can be DOMID_SELF. + * @foreigndom[31:16]: PFD, the expected owner of pagetable pages referenced + * in this hypercall invocation. The value of this field + * (x) encodes the PFD as follows: + * x == 0 => PFD == DOMID_SELF + * x != 0 => PFD == x - 1 * + * Sub-commands: ptr[1:0] specifies the appropriate MMU_* command. + * ------------- * ptr[1:0] == MMU_NORMAL_PT_UPDATE: - * Updates an entry in a page table. If updating an L1 table, and the new - * table entry is valid/present, the mapped frame must belong to the FD, if - * an FD has been specified. If attempting to map an I/O page then the - * caller assumes the privilege of the FD. + * Updates an entry in a page table belonging to PFD. If updating an L1 table, + * and the new table entry is valid/present, the mapped frame must belong to + * FD. If attempting to map an I/O page then the caller assumes the privilege + * of the FD. * FD == DOMID_IO: Permit /only/ I/O mappings, at the priv level of the caller. * FD == DOMID_XEN: Map restricted areas of Xen's heap space. * ptr[:2] -- Machine address of the page-table entry to modify. @@ -119,8 +201,8 @@ * As MMU_NORMAL_PT_UPDATE above, but A/D bits currently in the PTE are ORed * with those in @val. */ -#define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */ -#define MMU_MACHPHYS_UPDATE 1 /* ptr = MA of frame to modify entry for */ +#define MMU_NORMAL_PT_UPDATE 0 /* checked '*ptr = val'. ptr is MA. */ +#define MMU_MACHPHYS_UPDATE 1 /* ptr = MA of frame to modify entry for */ #define MMU_PT_UPDATE_PRESERVE_AD 2 /* atomically: *ptr = val | (*ptr&(A|D)) */ /* @@ -163,9 +245,23 @@ * cmd: MMUEXT_FLUSH_CACHE * No additional arguments. Writes back and flushes cache contents. * + * cmd: MMUEXT_FLUSH_CACHE_GLOBAL + * No additional arguments. Writes back and flushes cache contents + * on all CPUs in the system. + * * cmd: MMUEXT_SET_LDT * linear_addr: Linear address of LDT base (NB. must be page-aligned). * nr_ents: Number of entries in LDT. + * + * cmd: MMUEXT_CLEAR_PAGE + * mfn: Machine frame number to be cleared. + * + * cmd: MMUEXT_COPY_PAGE + * mfn: Machine frame number of the destination page. + * src_mfn: Machine frame number of the source page. + * + * cmd: MMUEXT_[UN]MARK_SUPER + * mfn: Machine frame number of head of superpage to be [un]marked. */ #define MMUEXT_PIN_L1_TABLE 0 #define MMUEXT_PIN_L2_TABLE 1 @@ -182,24 +278,37 @@ #define MMUEXT_FLUSH_CACHE 12 #define MMUEXT_SET_LDT 13 #define MMUEXT_NEW_USER_BASEPTR 15 +#define MMUEXT_CLEAR_PAGE 16 +#define MMUEXT_COPY_PAGE 17 +#define MMUEXT_FLUSH_CACHE_GLOBAL 18 +#define MMUEXT_MARK_SUPER 19 +#define MMUEXT_UNMARK_SUPER 20 #ifndef __ASSEMBLY__ struct mmuext_op { - unsigned int cmd; - union { - /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR */ - unsigned long mfn; - /* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */ - unsigned long linear_addr; - } arg1; - union { - /* SET_LDT */ - unsigned int nr_ents; - /* TLB_FLUSH_MULTI, INVLPG_MULTI */ - void *vcpumask; - } arg2; + unsigned int cmd; + union { + /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR + * CLEAR_PAGE, COPY_PAGE, [UN]MARK_SUPER */ + xen_pfn_t mfn; + /* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */ + unsigned long linear_addr; + } arg1; + union { + /* SET_LDT */ + unsigned int nr_ents; + /* TLB_FLUSH_MULTI, INVLPG_MULTI */ +#if __XEN_INTERFACE_VERSION__ >= 0x00030205 + XEN_GUEST_HANDLE(const_void) vcpumask; +#else + const void *vcpumask; +#endif + /* COPY_PAGE */ + xen_pfn_t src_mfn; + } arg2; }; -DEFINE_GUEST_HANDLE_STRUCT(mmuext_op); +typedef struct mmuext_op mmuext_op_t; +DEFINE_XEN_GUEST_HANDLE(mmuext_op_t); #endif /* These are passed as 'flags' to update_va_mapping. They can be ORed. */ @@ -224,11 +333,24 @@ DEFINE_GUEST_HANDLE_STRUCT(mmuext_op); */ #define VMASST_CMD_enable 0 #define VMASST_CMD_disable 1 + +/* x86/32 guests: simulate full 4GB segment limits. */ #define VMASST_TYPE_4gb_segments 0 + +/* x86/32 guests: trap (vector 15) whenever above vmassist is used. */ #define VMASST_TYPE_4gb_segments_notify 1 + +/* + * x86 guests: support writes to bottom-level PTEs. + * NB1. Page-directory entries cannot be written. + * NB2. Guest must continue to remove all writable mappings of PTEs. + */ #define VMASST_TYPE_writable_pagetables 2 + +/* x86/PAE guests: support PDPTs above 4GB. */ #define VMASST_TYPE_pae_extended_cr3 3 -#define MAX_VMASST_TYPE 3 + +#define MAX_VMASST_TYPE 3 #ifndef __ASSEMBLY__ @@ -260,6 +382,16 @@ typedef uint16_t domid_t; #define DOMID_XEN (0x7FF2U) /* + * DOMID_COW is used as the owner of sharable pages */ +#define DOMID_COW (0x7FF3U) + +/* DOMID_INVALID is used to identify pages with unknown owner. */ +#define DOMID_INVALID (0x7FF4U) + +/* Idle domain. */ +#define DOMID_IDLE (0x7FFFU) + +/* * Send an array of these to HYPERVISOR_mmu_update(). * NB. The fields are natural pointer/address size for this architecture. */ @@ -267,18 +399,19 @@ struct mmu_update { uint64_t ptr; /* Machine address of PTE. */ uint64_t val; /* New contents of PTE. */ }; -DEFINE_GUEST_HANDLE_STRUCT(mmu_update); +typedef struct mmu_update mmu_update_t; +DEFINE_XEN_GUEST_HANDLE(mmu_update_t); /* * Send an array of these to HYPERVISOR_multicall(). * NB. The fields are natural register size for this architecture. */ struct multicall_entry { - unsigned long op; - long result; + unsigned long op, result; unsigned long args[6]; }; -DEFINE_GUEST_HANDLE_STRUCT(multicall_entry); +typedef struct multicall_entry multicall_entry_t; +DEFINE_XEN_GUEST_HANDLE(multicall_entry_t); /* * Event channel endpoints per domain: @@ -287,173 +420,274 @@ DEFINE_GUEST_HANDLE_STRUCT(multicall_ent #define NR_EVENT_CHANNELS (sizeof(unsigned long) * sizeof(unsigned long) * 64) struct vcpu_time_info { - /* - * Updates to the following values are preceded and followed - * by an increment of 'version'. The guest can therefore - * detect updates by looking for changes to 'version'. If the - * least-significant bit of the version number is set then an - * update is in progress and the guest must wait to read a - * consistent set of values. The correct way to interact with - * the version number is similar to Linux's seqlock: see the - * implementations of read_seqbegin/read_seqretry. - */ - uint32_t version; - uint32_t pad0; - uint64_t tsc_timestamp; /* TSC at last update of time vals. */ - uint64_t system_time; /* Time, in nanosecs, since boot. */ - /* - * Current system time: - * system_time + ((tsc - tsc_timestamp) << tsc_shift) * tsc_to_system_mul - * CPU frequency (Hz): - * ((10^9 << 32) / tsc_to_system_mul) >> tsc_shift - */ - uint32_t tsc_to_system_mul; - int8_t tsc_shift; - int8_t pad1[3]; + /* + * Updates to the following values are preceded and followed by an + * increment of 'version'. The guest can therefore detect updates by + * looking for changes to 'version'. If the least-significant bit of + * the version number is set then an update is in progress and the guest + * must wait to read a consistent set of values. + * The correct way to interact with the version number is similar to + * Linux's seqlock: see the implementations of read_seqbegin/read_seqretry. + */ + uint32_t version; + uint32_t pad0; + uint64_t tsc_timestamp; /* TSC at last update of time vals. */ + uint64_t system_time; /* Time, in nanosecs, since boot. */ + /* + * Current system time: + * system_time + + * ((((tsc - tsc_timestamp) << tsc_shift) * tsc_to_system_mul) >> 32) + * CPU frequency (Hz): + * ((10^9 << 32) / tsc_to_system_mul) >> tsc_shift + */ + uint32_t tsc_to_system_mul; + int8_t tsc_shift; + int8_t pad1[3]; }; /* 32 bytes */ +typedef struct vcpu_time_info vcpu_time_info_t; struct vcpu_info { - /* - * 'evtchn_upcall_pending' is written non-zero by Xen to indicate - * a pending notification for a particular VCPU. It is then cleared - * by the guest OS /before/ checking for pending work, thus avoiding - * a set-and-check race. Note that the mask is only accessed by Xen - * on the CPU that is currently hosting the VCPU. This means that the - * pending and mask flags can be updated by the guest without special - * synchronisation (i.e., no need for the x86 LOCK prefix). - * This may seem suboptimal because if the pending flag is set by - * a different CPU then an IPI may be scheduled even when the mask - * is set. However, note: - * 1. The task of 'interrupt holdoff' is covered by the per-event- - * channel mask bits. A 'noisy' event that is continually being - * triggered can be masked at source at this very precise - * granularity. - * 2. The main purpose of the per-VCPU mask is therefore to restrict - * reentrant execution: whether for concurrency control, or to - * prevent unbounded stack usage. Whatever the purpose, we expect - * that the mask will be asserted only for short periods at a time, - * and so the likelihood of a 'spurious' IPI is suitably small. - * The mask is read before making an event upcall to the guest: a - * non-zero mask therefore guarantees that the VCPU will not receive - * an upcall activation. The mask is cleared when the VCPU requests - * to block: this avoids wakeup-waiting races. - */ - uint8_t evtchn_upcall_pending; - uint8_t evtchn_upcall_mask; - unsigned long evtchn_pending_sel; - struct arch_vcpu_info arch; - struct pvclock_vcpu_time_info time; + /* + * 'evtchn_upcall_pending' is written non-zero by Xen to indicate + * a pending notification for a particular VCPU. It is then cleared + * by the guest OS /before/ checking for pending work, thus avoiding + * a set-and-check race. Note that the mask is only accessed by Xen + * on the CPU that is currently hosting the VCPU. This means that the + * pending and mask flags can be updated by the guest without special + * synchronisation (i.e., no need for the x86 LOCK prefix). + * This may seem suboptimal because if the pending flag is set by + * a different CPU then an IPI may be scheduled even when the mask + * is set. However, note: + * 1. The task of 'interrupt holdoff' is covered by the per-event- + * channel mask bits. A 'noisy' event that is continually being + * triggered can be masked at source at this very precise + * granularity. + * 2. The main purpose of the per-VCPU mask is therefore to restrict + * reentrant execution: whether for concurrency control, or to + * prevent unbounded stack usage. Whatever the purpose, we expect + * that the mask will be asserted only for short periods at a time, + * and so the likelihood of a 'spurious' IPI is suitably small. + * The mask is read before making an event upcall to the guest: a + * non-zero mask therefore guarantees that the VCPU will not receive + * an upcall activation. The mask is cleared when the VCPU requests + * to block: this avoids wakeup-waiting races. + */ + uint8_t evtchn_upcall_pending; + uint8_t evtchn_upcall_mask; + unsigned long evtchn_pending_sel; + struct arch_vcpu_info arch; +#ifdef CONFIG_PARAVIRT_XEN + struct pvclock_vcpu_time_info time; +#else + struct vcpu_time_info time; +#endif }; /* 64 bytes (x86) */ +#ifndef __XEN__ +typedef struct vcpu_info vcpu_info_t; +#endif /* * Xen/kernel shared data -- pointer provided in start_info. - * NB. We expect that this struct is smaller than a page. + * + * This structure is defined to be both smaller than a page, and the + * only data on the shared page, but may vary in actual size even within + * compatible Xen versions; guests should not rely on the size + * of this structure remaining constant. */ struct shared_info { - struct vcpu_info vcpu_info[MAX_VIRT_CPUS]; + struct vcpu_info vcpu_info[XEN_LEGACY_MAX_VCPUS]; - /* - * A domain can create "event channels" on which it can send and receive - * asynchronous event notifications. There are three classes of event that - * are delivered by this mechanism: - * 1. Bi-directional inter- and intra-domain connections. Domains must - * arrange out-of-band to set up a connection (usually by allocating - * an unbound 'listener' port and avertising that via a storage service - * such as xenstore). - * 2. Physical interrupts. A domain with suitable hardware-access - * privileges can bind an event-channel port to a physical interrupt - * source. - * 3. Virtual interrupts ('events'). A domain can bind an event-channel - * port to a virtual interrupt source, such as the virtual-timer - * device or the emergency console. - * - * Event channels are addressed by a "port index". Each channel is - * associated with two bits of information: - * 1. PENDING -- notifies the domain that there is a pending notification - * to be processed. This bit is cleared by the guest. - * 2. MASK -- if this bit is clear then a 0->1 transition of PENDING - * will cause an asynchronous upcall to be scheduled. This bit is only - * updated by the guest. It is read-only within Xen. If a channel - * becomes pending while the channel is masked then the 'edge' is lost - * (i.e., when the channel is unmasked, the guest must manually handle - * pending notifications as no upcall will be scheduled by Xen). - * - * To expedite scanning of pending notifications, any 0->1 pending - * transition on an unmasked channel causes a corresponding bit in a - * per-vcpu selector word to be set. Each bit in the selector covers a - * 'C long' in the PENDING bitfield array. - */ - unsigned long evtchn_pending[sizeof(unsigned long) * 8]; - unsigned long evtchn_mask[sizeof(unsigned long) * 8]; - - /* - * Wallclock time: updated only by control software. Guests should base - * their gettimeofday() syscall on this wallclock-base value. - */ - struct pvclock_wall_clock wc; + /* + * A domain can create "event channels" on which it can send and receive + * asynchronous event notifications. There are three classes of event that + * are delivered by this mechanism: + * 1. Bi-directional inter- and intra-domain connections. Domains must + * arrange out-of-band to set up a connection (usually by allocating + * an unbound 'listener' port and avertising that via a storage service + * such as xenstore). + * 2. Physical interrupts. A domain with suitable hardware-access + * privileges can bind an event-channel port to a physical interrupt + * source. + * 3. Virtual interrupts ('events'). A domain can bind an event-channel + * port to a virtual interrupt source, such as the virtual-timer + * device or the emergency console. + * + * Event channels are addressed by a "port index". Each channel is + * associated with two bits of information: + * 1. PENDING -- notifies the domain that there is a pending notification + * to be processed. This bit is cleared by the guest. + * 2. MASK -- if this bit is clear then a 0->1 transition of PENDING + * will cause an asynchronous upcall to be scheduled. This bit is only + * updated by the guest. It is read-only within Xen. If a channel + * becomes pending while the channel is masked then the 'edge' is lost + * (i.e., when the channel is unmasked, the guest must manually handle + * pending notifications as no upcall will be scheduled by Xen). + * + * To expedite scanning of pending notifications, any 0->1 pending + * transition on an unmasked channel causes a corresponding bit in a + * per-vcpu selector word to be set. Each bit in the selector covers a + * 'C long' in the PENDING bitfield array. + */ + unsigned long evtchn_pending[sizeof(unsigned long) * 8]; + unsigned long evtchn_mask[sizeof(unsigned long) * 8]; + + /* + * Wallclock time: updated only by control software. Guests should base + * their gettimeofday() syscall on this wallclock-base value. + */ +#ifdef CONFIG_PARAVIRT_XEN + struct pvclock_wall_clock wc; +#else + uint32_t wc_version; /* Version counter: see vcpu_time_info_t. */ + uint32_t wc_sec; /* Secs 00:00:00 UTC, Jan 1, 1970. */ + uint32_t wc_nsec; /* Nsecs 00:00:00 UTC, Jan 1, 1970. */ +#endif - struct arch_shared_info arch; + struct arch_shared_info arch; }; +#ifndef __XEN__ +typedef struct shared_info shared_info_t; +#endif /* - * Start-of-day memory layout for the initial domain (DOM0): + * Start-of-day memory layout: * 1. The domain is started within contiguous virtual-memory region. - * 2. The contiguous region begins and ends on an aligned 4MB boundary. - * 3. The region start corresponds to the load address of the OS image. - * If the load address is not 4MB aligned then the address is rounded down. - * 4. This the order of bootstrap elements in the initial virtual region: + * 2. The contiguous region ends on an aligned 4MB boundary. + * 3. This the order of bootstrap elements in the initial virtual region: * a. relocated kernel image * b. initial ram disk [mod_start, mod_len] * c. list of allocated page frames [mfn_list, nr_pages] + * (unless relocated due to XEN_ELFNOTE_INIT_P2M) * d. start_info_t structure [register ESI (x86)] * e. bootstrap page tables [pt_base, CR3 (x86)] * f. bootstrap stack [register ESP (x86)] - * 5. Bootstrap elements are packed together, but each is 4kB-aligned. - * 6. The initial ram disk may be omitted. - * 7. The list of page frames forms a contiguous 'pseudo-physical' memory + * 4. Bootstrap elements are packed together, but each is 4kB-aligned. + * 5. The initial ram disk may be omitted. + * 6. The list of page frames forms a contiguous 'pseudo-physical' memory * layout for the domain. In particular, the bootstrap virtual-memory * region is a 1:1 mapping to the first section of the pseudo-physical map. - * 8. All bootstrap elements are mapped read-writable for the guest OS. The + * 7. All bootstrap elements are mapped read-writable for the guest OS. The * only exception is the bootstrap page table, which is mapped read-only. - * 9. There is guaranteed to be at least 512kB padding after the final + * 8. There is guaranteed to be at least 512kB padding after the final * bootstrap element. If necessary, the bootstrap virtual region is * extended by an extra 4MB to ensure this. */ #define MAX_GUEST_CMDLINE 1024 struct start_info { - /* THE FOLLOWING ARE FILLED IN BOTH ON INITIAL BOOT AND ON RESUME. */ - char magic[32]; /* "xen--". */ - unsigned long nr_pages; /* Total pages allocated to this domain. */ - unsigned long shared_info; /* MACHINE address of shared info struct. */ - uint32_t flags; /* SIF_xxx flags. */ - unsigned long store_mfn; /* MACHINE page number of shared page. */ - uint32_t store_evtchn; /* Event channel for store communication. */ - union { - struct { - unsigned long mfn; /* MACHINE page number of console page. */ - uint32_t evtchn; /* Event channel for console page. */ - } domU; - struct { - uint32_t info_off; /* Offset of console_info struct. */ - uint32_t info_size; /* Size of console_info struct from start.*/ - } dom0; - } console; - /* THE FOLLOWING ARE ONLY FILLED IN ON INITIAL BOOT (NOT RESUME). */ - unsigned long pt_base; /* VIRTUAL address of page directory. */ - unsigned long nr_pt_frames; /* Number of bootstrap p.t. frames. */ - unsigned long mfn_list; /* VIRTUAL address of page-frame list. */ - unsigned long mod_start; /* VIRTUAL address of pre-loaded module. */ - unsigned long mod_len; /* Size (bytes) of pre-loaded module. */ - int8_t cmd_line[MAX_GUEST_CMDLINE]; + /* THE FOLLOWING ARE FILLED IN BOTH ON INITIAL BOOT AND ON RESUME. */ + char magic[32]; /* "xen--". */ + unsigned long nr_pages; /* Total pages allocated to this domain. */ + unsigned long shared_info; /* MACHINE address of shared info struct. */ + uint32_t flags; /* SIF_xxx flags. */ + xen_pfn_t store_mfn; /* MACHINE page number of shared page. */ + uint32_t store_evtchn; /* Event channel for store communication. */ + union { + struct { + xen_pfn_t mfn; /* MACHINE page number of console page. */ + uint32_t evtchn; /* Event channel for console page. */ + } domU; + struct { + uint32_t info_off; /* Offset of console_info struct. */ + uint32_t info_size; /* Size of console_info struct from start.*/ + } dom0; + } console; + /* THE FOLLOWING ARE ONLY FILLED IN ON INITIAL BOOT (NOT RESUME). */ + unsigned long pt_base; /* VIRTUAL address of page directory. */ + unsigned long nr_pt_frames; /* Number of bootstrap p.t. frames. */ + unsigned long mfn_list; /* VIRTUAL address of page-frame list. */ + unsigned long mod_start; /* VIRTUAL address of pre-loaded module */ + /* (PFN of pre-loaded module if */ + /* SIF_MOD_START_PFN set in flags). */ + unsigned long mod_len; /* Size (bytes) of pre-loaded module. */ + int8_t cmd_line[MAX_GUEST_CMDLINE]; + /* The pfn range here covers both page table and p->m table frames. */ + unsigned long first_p2m_pfn;/* 1st pfn forming initial P->M table. */ + unsigned long nr_p2m_frames;/* # of pfns forming initial P->M table. */ }; +typedef struct start_info start_info_t; + +/* New console union for dom0 introduced in 0x00030203. */ +#if __XEN_INTERFACE_VERSION__ < 0x00030203 +#define console_mfn console.domU.mfn +#define console_evtchn console.domU.evtchn +#endif /* These flags are passed in the 'flags' field of start_info_t. */ #define SIF_PRIVILEGED (1<<0) /* Is the domain privileged? */ #define SIF_INITDOMAIN (1<<1) /* Is this the initial control domain? */ +#define SIF_MULTIBOOT_MOD (1<<2) /* Is mod_start a multiboot module? */ +#define SIF_MOD_START_PFN (1<<3) /* Is mod_start a PFN? */ +#define SIF_PM_MASK (0xFF<<8) /* reserve 1 byte for xen-pm options */ + +/* + * A multiboot module is a package containing modules very similar to a + * multiboot module array. The only differences are: + * - the array of module descriptors is by convention simply at the beginning + * of the multiboot module, + * - addresses in the module descriptors are based on the beginning of the + * multiboot module, + * - the number of modules is determined by a termination descriptor that has + * mod_start == 0. + * + * This permits to both build it statically and reference it in a configuration + * file, and let the PV guest easily rebase the addresses to virtual addresses + * and at the same time count the number of modules. + */ +struct xen_multiboot_mod_list +{ + /* Address of first byte of the module */ + uint32_t mod_start; + /* Address of last byte of the module (inclusive) */ + uint32_t mod_end; + /* Address of zero-terminated command line */ + uint32_t cmdline; + /* Unused, must be zero */ + uint32_t pad; +}; -typedef uint64_t cpumap_t; +typedef struct dom0_vga_console_info { + uint8_t video_type; /* DOM0_VGA_CONSOLE_??? */ +#define XEN_VGATYPE_TEXT_MODE_3 0x03 +#define XEN_VGATYPE_VESA_LFB 0x23 + + union { + struct { + /* Font height, in pixels. */ + uint16_t font_height; + /* Cursor location (column, row). */ + uint16_t cursor_x, cursor_y; + /* Number of rows and columns (dimensions in characters). */ + uint16_t rows, columns; + } text_mode_3; + + struct { + /* Width and height, in pixels. */ + uint16_t width, height; + /* Bytes per scan line. */ + uint16_t bytes_per_line; + /* Bits per pixel. */ + uint16_t bits_per_pixel; + /* LFB physical address, and size (in units of 64kB). */ + uint32_t lfb_base; + uint32_t lfb_size; + /* RGB mask offsets and sizes, as defined by VBE 1.2+ */ + uint8_t red_pos, red_size; + uint8_t green_pos, green_size; + uint8_t blue_pos, blue_size; + uint8_t rsvd_pos, rsvd_size; +#if __XEN_INTERFACE_VERSION__ >= 0x00030206 + /* VESA capabilities (offset 0xa, VESA command 0x4f00). */ + uint32_t gbl_caps; + /* Mode attributes (offset 0x0, VESA command 0x4f01). */ + uint16_t mode_attrs; +#endif + } vesa_lfb; + } u; +} dom0_vga_console_info_t; +#define xen_vga_console_info dom0_vga_console_info +#define xen_vga_console_info_t dom0_vga_console_info_t typedef uint8_t xen_domain_handle_t[16]; @@ -461,6 +695,11 @@ typedef uint8_t xen_domain_handle_t[16]; #define __mk_unsigned_long(x) x ## UL #define mk_unsigned_long(x) __mk_unsigned_long(x) +__DEFINE_XEN_GUEST_HANDLE(uint8, uint8_t); +__DEFINE_XEN_GUEST_HANDLE(uint16, uint16_t); +__DEFINE_XEN_GUEST_HANDLE(uint32, uint32_t); +__DEFINE_XEN_GUEST_HANDLE(uint64, uint64_t); + #else /* __ASSEMBLY__ */ /* In assembly code we cannot use C numeric constant suffixes. */ @@ -468,4 +707,23 @@ typedef uint8_t xen_domain_handle_t[16]; #endif /* !__ASSEMBLY__ */ +/* Default definitions for macros used by domctl/sysctl. */ +#if defined(__XEN__) || defined(__XEN_TOOLS__) + +#ifndef uint64_aligned_t +#define uint64_aligned_t uint64_t +#endif +#ifndef XEN_GUEST_HANDLE_64 +#define XEN_GUEST_HANDLE_64(name) XEN_GUEST_HANDLE(name) +#endif + +#ifndef __ASSEMBLY__ +struct xenctl_cpumap { + XEN_GUEST_HANDLE_64(uint8) bitmap; + uint32_t nr_cpus; +}; +#endif + +#endif /* defined(__XEN__) || defined(__XEN_TOOLS__) */ + #endif /* __XEN_PUBLIC_XEN_H__ */ --- head-2011-03-17.orig/include/xen/xenbus.h 2011-03-17 13:45:28.000000000 +0100 +++ head-2011-03-17/include/xen/xenbus.h 2011-01-31 17:53:45.000000000 +0100 @@ -39,7 +39,7 @@ #include #include #include -#include +#include #include #include #include @@ -56,8 +56,17 @@ struct xenbus_watch /* Callback (executed in a process context with no locks held). */ void (*callback)(struct xenbus_watch *, const char **vec, unsigned int len); + + /* See XBWF_ definitions below. */ + unsigned long flags; }; +/* + * Execute callback in its own kthread. Useful if the callback is long + * running or heavily serialised, to avoid taking out the main xenwatch thread + * for a long period of time (or even unwittingly causing a deadlock). + */ +#define XBWF_new_thread 1 /* A xenbus device. */ struct xenbus_device { @@ -92,9 +101,10 @@ struct xenbus_driver { void (*otherend_changed)(struct xenbus_device *dev, enum xenbus_state backend_state); int (*remove)(struct xenbus_device *dev); - int (*suspend)(struct xenbus_device *dev, pm_message_t state); + int (*suspend)(struct xenbus_device *dev); + int (*suspend_cancel)(struct xenbus_device *dev); int (*resume)(struct xenbus_device *dev); - int (*uevent)(struct xenbus_device *, struct kobj_uevent_env *); + int (*uevent)(struct xenbus_device *, char **, int, char *, int); struct device_driver driver; int (*read_otherend_details)(struct xenbus_device *dev); int (*is_ready)(struct xenbus_device *dev); @@ -105,27 +115,8 @@ static inline struct xenbus_driver *to_x return container_of(drv, struct xenbus_driver, driver); } -int __must_check __xenbus_register_frontend(struct xenbus_driver *drv, - struct module *owner, - const char *mod_name); - -static inline int __must_check -xenbus_register_frontend(struct xenbus_driver *drv) -{ - WARN_ON(drv->owner != THIS_MODULE); - return __xenbus_register_frontend(drv, THIS_MODULE, KBUILD_MODNAME); -} - -int __must_check __xenbus_register_backend(struct xenbus_driver *drv, - struct module *owner, - const char *mod_name); -static inline int __must_check -xenbus_register_backend(struct xenbus_driver *drv) -{ - WARN_ON(drv->owner != THIS_MODULE); - return __xenbus_register_backend(drv, THIS_MODULE, KBUILD_MODNAME); -} - +int xenbus_register_frontend(struct xenbus_driver *drv); +int xenbus_register_backend(struct xenbus_driver *drv); void xenbus_unregister_driver(struct xenbus_driver *drv); struct xenbus_transaction @@ -165,7 +156,6 @@ int xenbus_printf(struct xenbus_transact int xenbus_gather(struct xenbus_transaction t, const char *dir, ...); /* notifer routines for when the xenstore comes up */ -extern int xenstored_ready; int register_xenstore_notifier(struct notifier_block *nb); void unregister_xenstore_notifier(struct notifier_block *nb); @@ -178,12 +168,9 @@ void xs_suspend_cancel(void); /* Used by xenbus_dev to borrow kernel's store connection. */ void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg); -struct work_struct; - /* Prepare for domain suspend: then resume or cancel the suspend. */ void xenbus_suspend(void); void xenbus_resume(void); -void xenbus_probe(struct work_struct *); void xenbus_suspend_cancel(void); #define XENBUS_IS_ERR_READ(str) ({ \ @@ -196,38 +183,125 @@ void xenbus_suspend_cancel(void); #define XENBUS_EXIST_ERR(err) ((err) == -ENOENT || (err) == -ERANGE) + +/** + * Register a watch on the given path, using the given xenbus_watch structure + * for storage, and the given callback function as the callback. Return 0 on + * success, or -errno on error. On success, the given path will be saved as + * watch->node, and remains the caller's to free. On error, watch->node will + * be NULL, the device will switch to XenbusStateClosing, and the error will + * be saved in the store. + */ int xenbus_watch_path(struct xenbus_device *dev, const char *path, struct xenbus_watch *watch, void (*callback)(struct xenbus_watch *, const char **, unsigned int)); -int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch, - void (*callback)(struct xenbus_watch *, - const char **, unsigned int), - const char *pathfmt, ...) - __attribute__ ((format (printf, 4, 5))); + +/** + * Register a watch on the given path/path2, using the given xenbus_watch + * structure for storage, and the given callback function as the callback. + * Return 0 on success, or -errno on error. On success, the watched path + * (path/path2) will be saved as watch->node, and becomes the caller's to + * kfree(). On error, watch->node will be NULL, so the caller has nothing to + * free, the device will switch to XenbusStateClosing, and the error will be + * saved in the store. + */ +int xenbus_watch_path2(struct xenbus_device *dev, const char *path, + const char *path2, struct xenbus_watch *watch, + void (*callback)(struct xenbus_watch *, + const char **, unsigned int)); + + +/** + * Advertise in the store a change of the given driver to the given new_state. + * Return 0 on success, or -errno on error. On error, the device will switch + * to XenbusStateClosing, and the error will be saved in the store. + */ int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state new_state); + + +/** + * Grant access to the given ring_mfn to the peer of the given device. Return + * 0 on success, or -errno on error. On error, the device will switch to + * XenbusStateClosing, and the error will be saved in the store. + */ int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn); -int xenbus_map_ring_valloc(struct xenbus_device *dev, - int gnt_ref, void **vaddr); + + +/** + * Map a page of memory into this domain from another domain's grant table. + * xenbus_map_ring_valloc allocates a page of virtual address space, maps the + * page to that address, and sets *vaddr to that address. + * xenbus_map_ring does not allocate the virtual address space (you must do + * this yourself!). It only maps in the page to the specified address. + * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h) + * or -ENOMEM on error. If an error is returned, device will switch to + * XenbusStateClosing and the error message will be saved in XenStore. + */ +struct vm_struct *xenbus_map_ring_valloc(struct xenbus_device *dev, + int gnt_ref); int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref, grant_handle_t *handle, void *vaddr); -int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr); + +/** + * Unmap a page of memory in this domain that was imported from another domain. + * Use xenbus_unmap_ring_vfree if you mapped in your memory with + * xenbus_map_ring_valloc (it will free the virtual address space). + * Returns 0 on success and returns GNTST_* on error + * (see xen/include/interface/grant_table.h). + */ +int xenbus_unmap_ring_vfree(struct xenbus_device *dev, struct vm_struct *); int xenbus_unmap_ring(struct xenbus_device *dev, grant_handle_t handle, void *vaddr); + +/** + * Allocate an event channel for the given xenbus_device, assigning the newly + * created local port to *port. Return 0 on success, or -errno on error. On + * error, the device will switch to XenbusStateClosing, and the error will be + * saved in the store. + */ int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port); -int xenbus_bind_evtchn(struct xenbus_device *dev, int remote_port, int *port); + + +/** + * Free an existing event channel. Returns 0 on success or -errno on error. + */ int xenbus_free_evtchn(struct xenbus_device *dev, int port); + +/** + * Return the state of the driver rooted at the given store path, or + * XenbusStateUnknown if no state can be read. + */ enum xenbus_state xenbus_read_driver_state(const char *path); -void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, ...); -void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, ...); + +/*** + * Report the given negative errno into the store, along with the given + * formatted message. + */ +void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, + ...); + + +/*** + * Equivalent to xenbus_dev_error(dev, err, fmt, args), followed by + * xenbus_switch_state(dev, NULL, XenbusStateClosing) to schedule an orderly + * closedown of this driver and its peer. + */ +void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, + ...); + +int xenbus_dev_init(void); const char *xenbus_strstate(enum xenbus_state state); int xenbus_dev_is_online(struct xenbus_device *dev); int xenbus_frontend_closed(struct xenbus_device *dev); +int xenbus_for_each_backend(void *arg, int (*fn)(struct device *, void *)); +int xenbus_for_each_frontend(void *arg, int (*fn)(struct device *, void *)); + #endif /* _XEN_XENBUS_H */