qubes-linux-kernel/patches.xen/xen3-fixup-xen

Subject: Fix Xen build wrt. Xen files coming from mainline.
From: http://xenbits.xensource.com/linux-2.6.18-xen.hg (tip 1017:948c933f8839)
Patch-mainline: n/a

Acked-by: jbeulich@novell.com

--- head-2010-05-12.orig/drivers/xen/Makefile	2010-05-12 08:55:24.000000000 +0200
+++ head-2010-05-12/drivers/xen/Makefile	2010-01-19 16:01:03.000000000 +0100
@@ -1,12 +1,28 @@
-obj-y	+= grant-table.o features.o events.o manage.o
+obj-y	+= core/
+obj-y	+= console/
+obj-y	+= evtchn/
 obj-y	+= xenbus/
+obj-y	+= char/

-nostackp := $(call cc-option, -fno-stack-protector)
-CFLAGS_features.o			:= $(nostackp)
-
-obj-$(CONFIG_HOTPLUG_CPU)	+= cpu_hotplug.o
-obj-$(CONFIG_XEN_XENCOMM)	+= xencomm.o
-obj-$(CONFIG_XEN_BALLOON)	+= balloon.o
-obj-$(CONFIG_XEN_DEV_EVTCHN)	+= evtchn.o
-obj-$(CONFIG_XENFS)		+= xenfs/
-obj-$(CONFIG_XEN_SYS_HYPERVISOR)	+= sys-hypervisor.o
\ No newline at end of file
+obj-y	+= util.o
+obj-$(CONFIG_XEN_BALLOON)		+= balloon/
+obj-$(CONFIG_XEN_BLKDEV_BACKEND)	+= blkback/
+obj-$(CONFIG_XEN_BLKDEV_TAP)		+= blktap/
+obj-$(CONFIG_XEN_BLKDEV_TAP2)           += blktap2/
+obj-$(CONFIG_XEN_NETDEV_BACKEND)	+= netback/
+obj-$(CONFIG_XEN_TPMDEV_BACKEND)	+= tpmback/
+obj-$(CONFIG_XEN_BLKDEV_FRONTEND)	+= blkfront/
+obj-$(CONFIG_XEN_NETDEV_FRONTEND)	+= netfront/
+obj-$(CONFIG_XEN_PCIDEV_BACKEND)	+= pciback/
+obj-$(CONFIG_XEN_PCIDEV_FRONTEND)	+= pcifront/
+obj-$(CONFIG_XEN_FRAMEBUFFER)		+= fbfront/
+obj-$(CONFIG_XEN_KEYBOARD)		+= fbfront/
+obj-$(CONFIG_XEN_SCSI_BACKEND)		+= scsiback/
+obj-$(CONFIG_XEN_SCSI_FRONTEND)		+= scsifront/
+obj-$(CONFIG_XEN_USB_BACKEND)		+= usbback/
+obj-$(CONFIG_XEN_USB_FRONTEND)		+= usbfront/
+obj-$(CONFIG_XEN_PRIVCMD)	+= privcmd/
+obj-$(CONFIG_XEN_GRANT_DEV)	+= gntdev/
+obj-$(CONFIG_XEN_NETDEV_ACCEL_SFC_UTIL)		+= sfc_netutil/
+obj-$(CONFIG_XEN_NETDEV_ACCEL_SFC_FRONTEND)	+= sfc_netfront/
+obj-$(CONFIG_XEN_NETDEV_ACCEL_SFC_BACKEND)	+= sfc_netback/
--- head-2010-05-12.orig/drivers/xen/xenbus/Makefile	2010-05-12 08:55:24.000000000 +0200
+++ head-2010-05-12/drivers/xen/xenbus/Makefile	2010-01-19 16:01:03.000000000 +0100
@@ -1,7 +1,9 @@
-obj-y	+= xenbus.o
+obj-y += xenbus_client.o xenbus_comms.o xenbus_xs.o xenbus_probe.o
+obj-$(CONFIG_XEN_BACKEND) += xenbus_be.o

-xenbus-objs =
-xenbus-objs += xenbus_client.o
-xenbus-objs += xenbus_comms.o
-xenbus-objs += xenbus_xs.o
-xenbus-objs += xenbus_probe.o
+xenbus_be-objs =
+xenbus_be-objs += xenbus_backend_client.o
+
+xenbus-$(CONFIG_XEN_BACKEND) += xenbus_probe_backend.o
+obj-y += $(xenbus-y) $(xenbus-m)
+obj-$(CONFIG_XEN_XENBUS_DEV) += xenbus_dev.o
--- head-2010-05-12.orig/drivers/xen/xenbus/xenbus_client.c	2010-05-12 08:55:24.000000000 +0200
+++ head-2010-05-12/drivers/xen/xenbus/xenbus_client.c	2010-01-19 16:01:03.000000000 +0100
@@ -31,14 +31,17 @@
  */

 #include <linux/slab.h>
-#include <linux/types.h>
-#include <linux/vmalloc.h>
-#include <asm/xen/hypervisor.h>
-#include <xen/interface/xen.h>
-#include <xen/interface/event_channel.h>
-#include <xen/events.h>
-#include <xen/grant_table.h>
+#include <xen/evtchn.h>
+#include <xen/gnttab.h>
 #include <xen/xenbus.h>
+#include <xen/driver_util.h>
+
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
+#define DPRINTK(fmt, args...) \
+    pr_debug("xenbus_client (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args)

 const char *xenbus_strstate(enum xenbus_state state)
 {
@@ -50,25 +53,13 @@ const char *xenbus_strstate(enum xenbus_
 		[ XenbusStateConnected    ] = "Connected",
 		[ XenbusStateClosing      ] = "Closing",
 		[ XenbusStateClosed	  ] = "Closed",
+		[ XenbusStateReconfiguring ] = "Reconfiguring",
+		[ XenbusStateReconfigured ] = "Reconfigured",
 	};
 	return (state < ARRAY_SIZE(name)) ? name[state] : "INVALID";
 }
 EXPORT_SYMBOL_GPL(xenbus_strstate);

-/**
- * xenbus_watch_path - register a watch
- * @dev: xenbus device
- * @path: path to watch
- * @watch: watch to register
- * @callback: callback to register
- *
- * Register a @watch on the given path, using the given xenbus_watch structure
- * for storage, and the given @callback function as the callback.  Return 0 on
- * success, or -errno on error.  On success, the given @path will be saved as
- * @watch->node, and remains the caller's to free.  On error, @watch->node will
- * be NULL, the device will switch to %XenbusStateClosing, and the error will
- * be saved in the store.
- */
 int xenbus_watch_path(struct xenbus_device *dev, const char *path,
 		      struct xenbus_watch *watch,
 		      void (*callback)(struct xenbus_watch *,
@@ -92,57 +83,26 @@ int xenbus_watch_path(struct xenbus_devi
 EXPORT_SYMBOL_GPL(xenbus_watch_path);


-/**
- * xenbus_watch_pathfmt - register a watch on a sprintf-formatted path
- * @dev: xenbus device
- * @watch: watch to register
- * @callback: callback to register
- * @pathfmt: format of path to watch
- *
- * Register a watch on the given @path, using the given xenbus_watch
- * structure for storage, and the given @callback function as the callback.
- * Return 0 on success, or -errno on error.  On success, the watched path
- * (@path/@path2) will be saved as @watch->node, and becomes the caller's to
- * kfree().  On error, watch->node will be NULL, so the caller has nothing to
- * free, the device will switch to %XenbusStateClosing, and the error will be
- * saved in the store.
- */
-int xenbus_watch_pathfmt(struct xenbus_device *dev,
-			 struct xenbus_watch *watch,
-			 void (*callback)(struct xenbus_watch *,
-					const char **, unsigned int),
-			 const char *pathfmt, ...)
+int xenbus_watch_path2(struct xenbus_device *dev, const char *path,
+		       const char *path2, struct xenbus_watch *watch,
+		       void (*callback)(struct xenbus_watch *,
+					const char **, unsigned int))
 {
 	int err;
-	va_list ap;
-	char *path;
-
-	va_start(ap, pathfmt);
-	path = kvasprintf(GFP_NOIO | __GFP_HIGH, pathfmt, ap);
-	va_end(ap);
-
-	if (!path) {
+	char *state = kasprintf(GFP_NOIO | __GFP_HIGH, "%s/%s", path, path2);
+	if (!state) {
 		xenbus_dev_fatal(dev, -ENOMEM, "allocating path for watch");
 		return -ENOMEM;
 	}
-	err = xenbus_watch_path(dev, path, watch, callback);
+	err = xenbus_watch_path(dev, state, watch, callback);

 	if (err)
-		kfree(path);
+		kfree(state);
 	return err;
 }
-EXPORT_SYMBOL_GPL(xenbus_watch_pathfmt);
+EXPORT_SYMBOL_GPL(xenbus_watch_path2);


-/**
- * xenbus_switch_state
- * @dev: xenbus device
- * @state: new state
- *
- * Advertise in the store a change of the given driver to the given new_state.
- * Return 0 on success, or -errno on error.  On error, the device will switch
- * to XenbusStateClosing, and the error will be saved in the store.
- */
 int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state)
 {
 	/* We check whether the state is currently set to the given value, and
@@ -201,13 +161,12 @@ static char *error_path(struct xenbus_de
 }


-static void xenbus_va_dev_error(struct xenbus_device *dev, int err,
-				const char *fmt, va_list ap)
+void _dev_error(struct xenbus_device *dev, int err, const char *fmt,
+		va_list ap)
 {
 	int ret;
 	unsigned int len;
-	char *printf_buffer = NULL;
-	char *path_buffer = NULL;
+	char *printf_buffer = NULL, *path_buffer = NULL;

 #define PRINTF_BUFFER_SIZE 4096
 	printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_KERNEL);
@@ -224,13 +183,13 @@ static void xenbus_va_dev_error(struct x
 	path_buffer = error_path(dev);

 	if (path_buffer == NULL) {
-		dev_err(&dev->dev, "failed to write error node for %s (%s)\n",
+		printk("xenbus: failed to write error node for %s (%s)\n",
 		       dev->nodename, printf_buffer);
 		goto fail;
 	}

 	if (xenbus_write(XBT_NIL, path_buffer, "error", printf_buffer) != 0) {
-		dev_err(&dev->dev, "failed to write error node for %s (%s)\n",
+		printk("xenbus: failed to write error node for %s (%s)\n",
 		       dev->nodename, printf_buffer);
 		goto fail;
 	}
@@ -241,57 +200,32 @@ fail:
 }


-/**
- * xenbus_dev_error
- * @dev: xenbus device
- * @err: error to report
- * @fmt: error message format
- *
- * Report the given negative errno into the store, along with the given
- * formatted message.
- */
-void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, ...)
+void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt,
+		      ...)
 {
 	va_list ap;

 	va_start(ap, fmt);
-	xenbus_va_dev_error(dev, err, fmt, ap);
+	_dev_error(dev, err, fmt, ap);
 	va_end(ap);
 }
 EXPORT_SYMBOL_GPL(xenbus_dev_error);

-/**
- * xenbus_dev_fatal
- * @dev: xenbus device
- * @err: error to report
- * @fmt: error message format
- *
- * Equivalent to xenbus_dev_error(dev, err, fmt, args), followed by
- * xenbus_switch_state(dev, XenbusStateClosing) to schedule an orderly
- * closedown of this driver and its peer.
- */

-void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, ...)
+void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt,
+		      ...)
 {
 	va_list ap;

 	va_start(ap, fmt);
-	xenbus_va_dev_error(dev, err, fmt, ap);
+	_dev_error(dev, err, fmt, ap);
 	va_end(ap);

 	xenbus_switch_state(dev, XenbusStateClosing);
 }
 EXPORT_SYMBOL_GPL(xenbus_dev_fatal);

-/**
- * xenbus_grant_ring
- * @dev: xenbus device
- * @ring_mfn: mfn of ring to grant
-
- * Grant access to the given @ring_mfn to the peer of the given device.  Return
- * 0 on success, or -errno on error.  On error, the device will switch to
- * XenbusStateClosing, and the error will be saved in the store.
- */
+
 int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn)
 {
 	int err = gnttab_grant_foreign_access(dev->otherend_id, ring_mfn, 0);
@@ -302,18 +236,12 @@ int xenbus_grant_ring(struct xenbus_devi
 EXPORT_SYMBOL_GPL(xenbus_grant_ring);


-/**
- * Allocate an event channel for the given xenbus_device, assigning the newly
- * created local port to *port.  Return 0 on success, or -errno on error.  On
- * error, the device will switch to XenbusStateClosing, and the error will be
- * saved in the store.
- */
 int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port)
 {
 	struct evtchn_alloc_unbound alloc_unbound;
 	int err;

-	alloc_unbound.dom = DOMID_SELF;
+	alloc_unbound.dom        = DOMID_SELF;
 	alloc_unbound.remote_dom = dev->otherend_id;

 	err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
@@ -328,36 +256,6 @@ int xenbus_alloc_evtchn(struct xenbus_de
 EXPORT_SYMBOL_GPL(xenbus_alloc_evtchn);


-/**
- * Bind to an existing interdomain event channel in another domain. Returns 0
- * on success and stores the local port in *port. On error, returns -errno,
- * switches the device to XenbusStateClosing, and saves the error in XenStore.
- */
-int xenbus_bind_evtchn(struct xenbus_device *dev, int remote_port, int *port)
-{
-	struct evtchn_bind_interdomain bind_interdomain;
-	int err;
-
-	bind_interdomain.remote_dom = dev->otherend_id;
-	bind_interdomain.remote_port = remote_port;
-
-	err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain,
-					  &bind_interdomain);
-	if (err)
-		xenbus_dev_fatal(dev, err,
-				 "binding to event channel %d from domain %d",
-				 remote_port, dev->otherend_id);
-	else
-		*port = bind_interdomain.local_port;
-
-	return err;
-}
-EXPORT_SYMBOL_GPL(xenbus_bind_evtchn);
-
-
-/**
- * Free an existing event channel. Returns 0 on success or -errno on error.
- */
 int xenbus_free_evtchn(struct xenbus_device *dev, int port)
 {
 	struct evtchn_close close;
@@ -374,189 +272,6 @@ int xenbus_free_evtchn(struct xenbus_dev
 EXPORT_SYMBOL_GPL(xenbus_free_evtchn);


-/**
- * xenbus_map_ring_valloc
- * @dev: xenbus device
- * @gnt_ref: grant reference
- * @vaddr: pointer to address to be filled out by mapping
- *
- * Based on Rusty Russell's skeleton driver's map_page.
- * Map a page of memory into this domain from another domain's grant table.
- * xenbus_map_ring_valloc allocates a page of virtual address space, maps the
- * page to that address, and sets *vaddr to that address.
- * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h)
- * or -ENOMEM on error. If an error is returned, device will switch to
- * XenbusStateClosing and the error message will be saved in XenStore.
- */
-int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr)
-{
-	struct gnttab_map_grant_ref op = {
-		.flags = GNTMAP_host_map,
-		.ref   = gnt_ref,
-		.dom   = dev->otherend_id,
-	};
-	struct vm_struct *area;
-
-	*vaddr = NULL;
-
-	area = xen_alloc_vm_area(PAGE_SIZE);
-	if (!area)
-		return -ENOMEM;
-
-	op.host_addr = (unsigned long)area->addr;
-
-	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
-		BUG();
-
-	if (op.status != GNTST_okay) {
-		xen_free_vm_area(area);
-		xenbus_dev_fatal(dev, op.status,
-				 "mapping in shared page %d from domain %d",
-				 gnt_ref, dev->otherend_id);
-		return op.status;
-	}
-
-	/* Stuff the handle in an unused field */
-	area->phys_addr = (unsigned long)op.handle;
-
-	*vaddr = area->addr;
-	return 0;
-}
-EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc);
-
-
-/**
- * xenbus_map_ring
- * @dev: xenbus device
- * @gnt_ref: grant reference
- * @handle: pointer to grant handle to be filled
- * @vaddr: address to be mapped to
- *
- * Map a page of memory into this domain from another domain's grant table.
- * xenbus_map_ring does not allocate the virtual address space (you must do
- * this yourself!). It only maps in the page to the specified address.
- * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h)
- * or -ENOMEM on error. If an error is returned, device will switch to
- * XenbusStateClosing and the error message will be saved in XenStore.
- */
-int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
-		    grant_handle_t *handle, void *vaddr)
-{
-	struct gnttab_map_grant_ref op = {
-		.host_addr = (unsigned long)vaddr,
-		.flags     = GNTMAP_host_map,
-		.ref       = gnt_ref,
-		.dom       = dev->otherend_id,
-	};
-
-	if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1))
-		BUG();
-
-	if (op.status != GNTST_okay) {
-		xenbus_dev_fatal(dev, op.status,
-				 "mapping in shared page %d from domain %d",
-				 gnt_ref, dev->otherend_id);
-	} else
-		*handle = op.handle;
-
-	return op.status;
-}
-EXPORT_SYMBOL_GPL(xenbus_map_ring);
-
-
-/**
- * xenbus_unmap_ring_vfree
- * @dev: xenbus device
- * @vaddr: addr to unmap
- *
- * Based on Rusty Russell's skeleton driver's unmap_page.
- * Unmap a page of memory in this domain that was imported from another domain.
- * Use xenbus_unmap_ring_vfree if you mapped in your memory with
- * xenbus_map_ring_valloc (it will free the virtual address space).
- * Returns 0 on success and returns GNTST_* on error
- * (see xen/include/interface/grant_table.h).
- */
-int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr)
-{
-	struct vm_struct *area;
-	struct gnttab_unmap_grant_ref op = {
-		.host_addr = (unsigned long)vaddr,
-	};
-
-	/* It'd be nice if linux/vmalloc.h provided a find_vm_area(void *addr)
-	 * method so that we don't have to muck with vmalloc internals here.
-	 * We could force the user to hang on to their struct vm_struct from
-	 * xenbus_map_ring_valloc, but these 6 lines considerably simplify
-	 * this API.
-	 */
-	read_lock(&vmlist_lock);
-	for (area = vmlist; area != NULL; area = area->next) {
-		if (area->addr == vaddr)
-			break;
-	}
-	read_unlock(&vmlist_lock);
-
-	if (!area) {
-		xenbus_dev_error(dev, -ENOENT,
-				 "can't find mapped virtual address %p", vaddr);
-		return GNTST_bad_virt_addr;
-	}
-
-	op.handle = (grant_handle_t)area->phys_addr;
-
-	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
-		BUG();
-
-	if (op.status == GNTST_okay)
-		xen_free_vm_area(area);
-	else
-		xenbus_dev_error(dev, op.status,
-				 "unmapping page at handle %d error %d",
-				 (int16_t)area->phys_addr, op.status);
-
-	return op.status;
-}
-EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);
-
-
-/**
- * xenbus_unmap_ring
- * @dev: xenbus device
- * @handle: grant handle
- * @vaddr: addr to unmap
- *
- * Unmap a page of memory in this domain that was imported from another domain.
- * Returns 0 on success and returns GNTST_* on error
- * (see xen/include/interface/grant_table.h).
- */
-int xenbus_unmap_ring(struct xenbus_device *dev,
-		      grant_handle_t handle, void *vaddr)
-{
-	struct gnttab_unmap_grant_ref op = {
-		.host_addr = (unsigned long)vaddr,
-		.handle    = handle,
-	};
-
-	if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
-		BUG();
-
-	if (op.status != GNTST_okay)
-		xenbus_dev_error(dev, op.status,
-				 "unmapping page at handle %d error %d",
-				 handle, op.status);
-
-	return op.status;
-}
-EXPORT_SYMBOL_GPL(xenbus_unmap_ring);
-
-
-/**
- * xenbus_read_driver_state
- * @path: path for driver
- *
- * Return the state of the driver rooted at the given store path, or
- * XenbusStateUnknown if no state can be read.
- */
 enum xenbus_state xenbus_read_driver_state(const char *path)
 {
 	enum xenbus_state result;
--- head-2010-05-12.orig/drivers/xen/xenbus/xenbus_comms.c	2010-05-12 08:55:24.000000000 +0200
+++ head-2010-05-12/drivers/xen/xenbus/xenbus_comms.c	2010-01-19 16:01:03.000000000 +0100
@@ -34,25 +34,55 @@
 #include <linux/interrupt.h>
 #include <linux/sched.h>
 #include <linux/err.h>
+#include <linux/ptrace.h>
+#include <linux/workqueue.h>
+#include <xen/evtchn.h>
 #include <xen/xenbus.h>
-#include <asm/xen/hypervisor.h>
-#include <xen/events.h>
-#include <xen/page.h>
+
+#include <asm/hypervisor.h>
+
 #include "xenbus_comms.h"

+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
 static int xenbus_irq;

-static DECLARE_WORK(probe_work, xenbus_probe);
+extern void xenbus_probe(void *);
+static DECLARE_WORK(probe_work, xenbus_probe, NULL);

 static DECLARE_WAIT_QUEUE_HEAD(xb_waitq);

-static irqreturn_t wake_waiting(int irq, void *unused)
+static irqreturn_t wake_waiting(int irq, void *unused, struct pt_regs *regs)
 {
-	if (unlikely(xenstored_ready == 0)) {
-		xenstored_ready = 1;
-		schedule_work(&probe_work);
+	int old, new;
+
+	old = atomic_read(&xenbus_xsd_state);
+	switch (old) {
+		case XENBUS_XSD_UNCOMMITTED:
+			BUG();
+			return IRQ_HANDLED;
+
+		case XENBUS_XSD_FOREIGN_INIT:
+			new = XENBUS_XSD_FOREIGN_READY;
+			break;
+
+		case XENBUS_XSD_LOCAL_INIT:
+			new = XENBUS_XSD_LOCAL_READY;
+			break;
+
+		case XENBUS_XSD_FOREIGN_READY:
+		case XENBUS_XSD_LOCAL_READY:
+		default:
+			goto wake;
 	}

+	old = atomic_cmpxchg(&xenbus_xsd_state, old, new);
+	if (old != new)
+		schedule_work(&probe_work);
+
+wake:
 	wake_up(&xb_waitq);
 	return IRQ_HANDLED;
 }
@@ -82,13 +112,6 @@ static const void *get_input_chunk(XENST
 	return buf + MASK_XENSTORE_IDX(cons);
 }

-/**
- * xb_write - low level write
- * @data: buffer to send
- * @len: length of buffer
- *
- * Returns 0 on success, error otherwise.
- */
 int xb_write(const void *data, unsigned len)
 {
 	struct xenstore_domain_interface *intf = xen_store_interface;
@@ -197,12 +220,11 @@ int xb_read(void *data, unsigned len)
 	return 0;
 }

-/**
- * xb_init_comms - Set up interrupt handler off store event channel.
- */
+/* Set up interrupt handler off store event channel. */
 int xb_init_comms(void)
 {
 	struct xenstore_domain_interface *intf = xen_store_interface;
+	int err;

 	if (intf->req_prod != intf->req_cons)
 		printk(KERN_ERR "XENBUS request ring is not quiescent "
@@ -215,20 +237,18 @@ int xb_init_comms(void)
 		intf->rsp_cons = intf->rsp_prod;
 	}

-	if (xenbus_irq) {
-		/* Already have an irq; assume we're resuming */
-		rebind_evtchn_irq(xen_store_evtchn, xenbus_irq);
-	} else {
-		int err;
-		err = bind_evtchn_to_irqhandler(xen_store_evtchn, wake_waiting,
-						0, "xenbus", &xb_waitq);
-		if (err <= 0) {
-			printk(KERN_ERR "XENBUS request irq failed %i\n", err);
-			return err;
-		}
+	if (xenbus_irq)
+		unbind_from_irqhandler(xenbus_irq, &xb_waitq);

-		xenbus_irq = err;
+	err = bind_caller_port_to_irqhandler(
+		xen_store_evtchn, wake_waiting,
+		0, "xenbus", &xb_waitq);
+	if (err <= 0) {
+		printk(KERN_ERR "XENBUS request irq failed %i\n", err);
+		return err;
 	}

+	xenbus_irq = err;
+
 	return 0;
 }
--- head-2010-05-12.orig/drivers/xen/xenbus/xenbus_comms.h	2010-05-12 08:55:24.000000000 +0200
+++ head-2010-05-12/drivers/xen/xenbus/xenbus_comms.h	2010-01-19 16:01:03.000000000 +0100
@@ -43,4 +43,20 @@ int xs_input_avail(void);
 extern struct xenstore_domain_interface *xen_store_interface;
 extern int xen_store_evtchn;

+/* For xenbus internal use. */
+enum {
+	XENBUS_XSD_UNCOMMITTED = 0,
+	XENBUS_XSD_FOREIGN_INIT,
+	XENBUS_XSD_FOREIGN_READY,
+	XENBUS_XSD_LOCAL_INIT,
+	XENBUS_XSD_LOCAL_READY,
+};
+extern atomic_t xenbus_xsd_state;
+
+static inline int is_xenstored_ready(void)
+{
+	int s = atomic_read(&xenbus_xsd_state);
+	return s == XENBUS_XSD_FOREIGN_READY || s == XENBUS_XSD_LOCAL_READY;
+}
+
 #endif /* _XENBUS_COMMS_H */
--- head-2010-05-12.orig/drivers/xen/xenbus/xenbus_probe.c	2010-05-12 08:55:24.000000000 +0200
+++ head-2010-05-12/drivers/xen/xenbus/xenbus_probe.c	2010-01-26 09:08:16.000000000 +0100
@@ -4,6 +4,7 @@
  * Copyright (C) 2005 Rusty Russell, IBM Corporation
  * Copyright (C) 2005 Mike Wray, Hewlett-Packard
  * Copyright (C) 2005, 2006 XenSource Ltd
+ * Copyright (C) 2007 Solarflare Communications, Inc.
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License version 2
@@ -32,7 +33,7 @@

 #define DPRINTK(fmt, args...)				\
 	pr_debug("xenbus_probe (%s:%d) " fmt ".\n",	\
-		 __func__, __LINE__, ##args)
+		 __FUNCTION__, __LINE__, ##args)

 #include <linux/kernel.h>
 #include <linux/err.h>
@@ -40,32 +41,37 @@
 #include <linux/ctype.h>
 #include <linux/fcntl.h>
 #include <linux/mm.h>
-#include <linux/proc_fs.h>
 #include <linux/notifier.h>
-#include <linux/kthread.h>
 #include <linux/mutex.h>
-#include <linux/io.h>
-#include <linux/slab.h>
+#include <linux/module.h>
+#include <xen/gnttab.h>

+#include <asm/io.h>
 #include <asm/page.h>
+#include <asm/maddr.h>
 #include <asm/pgtable.h>
-#include <asm/xen/hypervisor.h>
-
-#include <xen/xen.h>
+#include <asm/hypervisor.h>
 #include <xen/xenbus.h>
-#include <xen/events.h>
-#include <xen/page.h>
+#include <xen/xen_proc.h>
+#include <xen/evtchn.h>
+#include <xen/features.h>
+#ifdef MODULE
+#include <xen/hvm.h>
+#endif

 #include "xenbus_comms.h"
 #include "xenbus_probe.h"

+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif

 int xen_store_evtchn;
-EXPORT_SYMBOL(xen_store_evtchn);
-
 struct xenstore_domain_interface *xen_store_interface;
 static unsigned long xen_store_mfn;

+extern struct mutex xenwatch_mutex;
+
 static BLOCKING_NOTIFIER_HEAD(xenstore_chain);

 static void wait_for_devices(struct xenbus_driver *xendrv);
@@ -74,9 +80,6 @@ static int xenbus_probe_frontend(const c

 static void xenbus_dev_shutdown(struct device *_dev);

-static int xenbus_dev_suspend(struct device *dev, pm_message_t state);
-static int xenbus_dev_resume(struct device *dev);
-
 /* If something in array of ids matches this device, return it. */
 static const struct xenbus_device_id *
 match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev)
@@ -98,16 +101,6 @@ int xenbus_match(struct device *_dev, st
 	return match_device(drv->ids, to_xenbus_device(_dev)) != NULL;
 }

-static int xenbus_uevent(struct device *_dev, struct kobj_uevent_env *env)
-{
-	struct xenbus_device *dev = to_xenbus_device(_dev);
-
-	if (add_uevent_var(env, "MODALIAS=xen:%s", dev->devicetype))
-		return -ENOMEM;
-
-	return 0;
-}
-
 /* device/<type>/<id> => <type>-<id> */
 static int frontend_bus_id(char bus_id[XEN_BUS_ID_SIZE], const char *nodename)
 {
@@ -176,9 +169,30 @@ static int read_backend_details(struct x
 	return read_otherend_details(xendev, "backend-id", "backend");
 }

-static struct device_attribute xenbus_dev_attrs[] = {
-	__ATTR_NULL
-};
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
+static int xenbus_uevent_frontend(struct device *dev, char **envp,
+				  int num_envp, char *buffer, int buffer_size)
+{
+	struct xenbus_device *xdev;
+	int length = 0, i = 0;
+
+	if (dev == NULL)
+		return -ENODEV;
+	xdev = to_xenbus_device(dev);
+	if (xdev == NULL)
+		return -ENODEV;
+
+	/* stuff we want to pass to /sbin/hotplug */
+	add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
+		       "XENBUS_TYPE=%s", xdev->devicetype);
+	add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
+		       "XENBUS_PATH=%s", xdev->nodename);
+	add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
+		       "MODALIAS=xen:%s", xdev->devicetype);
+
+	return 0;
+}
+#endif

 /* Bus type for frontend drivers. */
 static struct xen_bus_type xenbus_frontend = {
@@ -186,17 +200,19 @@ static struct xen_bus_type xenbus_fronte
 	.levels = 2, 		/* device/type/<id> */
 	.get_bus_id = frontend_bus_id,
 	.probe = xenbus_probe_frontend,
+	.error = -ENODEV,
 	.bus = {
-		.name      = "xen",
-		.match     = xenbus_match,
-		.uevent    = xenbus_uevent,
-		.probe     = xenbus_dev_probe,
-		.remove    = xenbus_dev_remove,
-		.shutdown  = xenbus_dev_shutdown,
-		.dev_attrs = xenbus_dev_attrs,
-
-		.suspend   = xenbus_dev_suspend,
-		.resume    = xenbus_dev_resume,
+		.name     = "xen",
+		.match    = xenbus_match,
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
+		.probe    = xenbus_dev_probe,
+		.remove   = xenbus_dev_remove,
+		.shutdown = xenbus_dev_shutdown,
+		.uevent   = xenbus_uevent_frontend,
+#endif
+	},
+	.dev = {
+		.bus_id = "xen",
 	},
 };

@@ -213,17 +229,16 @@ static void otherend_changed(struct xenb
 	if (!dev->otherend ||
 	    strncmp(dev->otherend, vec[XS_WATCH_PATH],
 		    strlen(dev->otherend))) {
-		dev_dbg(&dev->dev, "Ignoring watch at %s\n",
-			vec[XS_WATCH_PATH]);
+		DPRINTK("Ignoring watch at %s", vec[XS_WATCH_PATH]);
 		return;
 	}

 	state = xenbus_read_driver_state(dev->otherend);

-	dev_dbg(&dev->dev, "state is %d, (%s), %s, %s\n",
-		state, xenbus_strstate(state), dev->otherend_watch.node,
-		vec[XS_WATCH_PATH]);
+	DPRINTK("state is %d (%s), %s, %s", state, xenbus_strstate(state),
+		dev->otherend_watch.node, vec[XS_WATCH_PATH]);

+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
 	/*
 	 * Ignore xenbus transitions during shutdown. This prevents us doing
 	 * work that can fail e.g., when the rootfs is gone.
@@ -237,6 +252,7 @@ static void otherend_changed(struct xenb
 			xenbus_frontend_closed(dev);
 		return;
 	}
+#endif

 	if (drv->otherend_changed)
 		drv->otherend_changed(dev, state);
@@ -256,8 +272,8 @@ static int talk_to_otherend(struct xenbu

 static int watch_otherend(struct xenbus_device *dev)
 {
-	return xenbus_watch_pathfmt(dev, &dev->otherend_watch, otherend_changed,
-				    "%s/%s", dev->otherend, "state");
+	return xenbus_watch_path2(dev, dev->otherend, "state",
+				  &dev->otherend_watch, otherend_changed);
 }


@@ -283,8 +299,9 @@ int xenbus_dev_probe(struct device *_dev

 	err = talk_to_otherend(dev);
 	if (err) {
-		dev_warn(&dev->dev, "talk_to_otherend on %s failed.\n",
-			 dev->nodename);
+		printk(KERN_WARNING
+		       "xenbus_probe: talk_to_otherend on %s failed.\n",
+		       dev->nodename);
 		return err;
 	}

@@ -294,7 +311,8 @@ int xenbus_dev_probe(struct device *_dev

 	err = watch_otherend(dev);
 	if (err) {
-		dev_warn(&dev->dev, "watch_otherend on %s failed.\n",
+		printk(KERN_WARNING
+		       "xenbus_probe: watch_otherend on %s failed.\n",
 		       dev->nodename);
 		return err;
 	}
@@ -330,43 +348,64 @@ static void xenbus_dev_shutdown(struct d

 	DPRINTK("%s", dev->nodename);

+/* Commented out since xenstored stubdom is now minios based not linux based
+#define XENSTORE_DOMAIN_SHARES_THIS_KERNEL
+*/
+#ifndef XENSTORE_DOMAIN_SHARES_THIS_KERNEL
+	if (is_initial_xendomain())
+#endif
+		return;
+
 	get_device(&dev->dev);
 	if (dev->state != XenbusStateConnected) {
-		printk(KERN_INFO "%s: %s: %s != Connected, skipping\n", __func__,
+		printk("%s: %s: %s != Connected, skipping\n", __FUNCTION__,
 		       dev->nodename, xenbus_strstate(dev->state));
 		goto out;
 	}
 	xenbus_switch_state(dev, XenbusStateClosing);
+
+	if (!strcmp(dev->devicetype, "vfb"))
+		goto out;
+
 	timeout = wait_for_completion_timeout(&dev->down, timeout);
 	if (!timeout)
-		printk(KERN_INFO "%s: %s timeout closing device\n",
-		       __func__, dev->nodename);
+		printk("%s: %s timeout closing device\n", __FUNCTION__, dev->nodename);
  out:
 	put_device(&dev->dev);
 }

 int xenbus_register_driver_common(struct xenbus_driver *drv,
-				  struct xen_bus_type *bus,
-				  struct module *owner,
-				  const char *mod_name)
+				  struct xen_bus_type *bus)
 {
+	int ret;
+
+	if (bus->error)
+		return bus->error;
+
 	drv->driver.name = drv->name;
 	drv->driver.bus = &bus->bus;
-	drv->driver.owner = owner;
-	drv->driver.mod_name = mod_name;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,10)
+	drv->driver.owner = drv->owner;
+#endif
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16)
+	drv->driver.probe = xenbus_dev_probe;
+	drv->driver.remove = xenbus_dev_remove;
+	drv->driver.shutdown = xenbus_dev_shutdown;
+#endif

-	return driver_register(&drv->driver);
+	mutex_lock(&xenwatch_mutex);
+	ret = driver_register(&drv->driver);
+	mutex_unlock(&xenwatch_mutex);
+	return ret;
 }

-int __xenbus_register_frontend(struct xenbus_driver *drv,
-			       struct module *owner, const char *mod_name)
+int xenbus_register_frontend(struct xenbus_driver *drv)
 {
 	int ret;

 	drv->read_otherend_details = read_backend_details;

-	ret = xenbus_register_driver_common(drv, &xenbus_frontend,
-					    owner, mod_name);
+	ret = xenbus_register_driver_common(drv, &xenbus_frontend);
 	if (ret)
 		return ret;

@@ -375,7 +414,7 @@ int __xenbus_register_frontend(struct xe

 	return 0;
 }
-EXPORT_SYMBOL_GPL(__xenbus_register_frontend);
+EXPORT_SYMBOL_GPL(xenbus_register_frontend);

 void xenbus_unregister_driver(struct xenbus_driver *drv)
 {
@@ -453,31 +492,30 @@ static void xenbus_dev_release(struct de
 }

 static ssize_t xendev_show_nodename(struct device *dev,
-				    struct device_attribute *attr, char *buf)
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,13)
+				    struct device_attribute *attr,
+#endif
+				    char *buf)
 {
 	return sprintf(buf, "%s\n", to_xenbus_device(dev)->nodename);
 }
 static DEVICE_ATTR(nodename, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_nodename, NULL);

 static ssize_t xendev_show_devtype(struct device *dev,
-				   struct device_attribute *attr, char *buf)
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,13)
+				   struct device_attribute *attr,
+#endif
+				   char *buf)
 {
 	return sprintf(buf, "%s\n", to_xenbus_device(dev)->devicetype);
 }
 static DEVICE_ATTR(devtype, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_devtype, NULL);

-static ssize_t xendev_show_modalias(struct device *dev,
-				    struct device_attribute *attr, char *buf)
-{
-	return sprintf(buf, "xen:%s\n", to_xenbus_device(dev)->devicetype);
-}
-static DEVICE_ATTR(modalias, S_IRUSR | S_IRGRP | S_IROTH, xendev_show_modalias, NULL);

 int xenbus_probe_node(struct xen_bus_type *bus,
 		      const char *type,
 		      const char *nodename)
 {
-	char devname[XEN_BUS_ID_SIZE];
 	int err;
 	struct xenbus_device *xendev;
 	size_t stringlen;
@@ -485,6 +523,9 @@ int xenbus_probe_node(struct xen_bus_typ

 	enum xenbus_state state = xenbus_read_driver_state(nodename);

+	if (bus->error)
+		return bus->error;
+
 	if (state != XenbusStateInitialising) {
 		/* Device is not new, so ignore it.  This can happen if a
 		   device is going away after switching to Closed.  */
@@ -509,15 +550,14 @@ int xenbus_probe_node(struct xen_bus_typ
 	xendev->devicetype = tmpstring;
 	init_completion(&xendev->down);

+	xendev->dev.parent = &bus->dev;
 	xendev->dev.bus = &bus->bus;
 	xendev->dev.release = xenbus_dev_release;

-	err = bus->get_bus_id(devname, xendev->nodename);
+	err = bus->get_bus_id(xendev->dev.bus_id, xendev->nodename);
 	if (err)
 		goto fail;

-	dev_set_name(&xendev->dev, devname);
-
 	/* Register with generic device framework. */
 	err = device_register(&xendev->dev);
 	if (err)
@@ -525,22 +565,15 @@ int xenbus_probe_node(struct xen_bus_typ

 	err = device_create_file(&xendev->dev, &dev_attr_nodename);
 	if (err)
-		goto fail_unregister;
-
+		goto unregister;
 	err = device_create_file(&xendev->dev, &dev_attr_devtype);
 	if (err)
-		goto fail_remove_nodename;
-
-	err = device_create_file(&xendev->dev, &dev_attr_modalias);
-	if (err)
-		goto fail_remove_devtype;
+		goto unregister;

 	return 0;
-fail_remove_devtype:
-	device_remove_file(&xendev->dev, &dev_attr_devtype);
-fail_remove_nodename:
+unregister:
 	device_remove_file(&xendev->dev, &dev_attr_nodename);
-fail_unregister:
+	device_remove_file(&xendev->dev, &dev_attr_devtype);
 	device_unregister(&xendev->dev);
 fail:
 	kfree(xendev);
@@ -553,8 +586,10 @@ static int xenbus_probe_frontend(const c
 	char *nodename;
 	int err;

-	nodename = kasprintf(GFP_KERNEL, "%s/%s/%s",
-			     xenbus_frontend.root, type, name);
+	if (!strcmp(type, "console"))
+		return 0;
+
+	nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", xenbus_frontend.root, type, name);
 	if (!nodename)
 		return -ENOMEM;

@@ -591,6 +626,9 @@ int xenbus_probe_devices(struct xen_bus_
 	char **dir;
 	unsigned int i, dir_n;

+	if (bus->error)
+		return bus->error;
+
 	dir = xenbus_directory(XBT_NIL, bus->root, "", &dir_n);
 	if (IS_ERR(dir))
 		return PTR_ERR(dir);
@@ -627,15 +665,15 @@ static int strsep_len(const char *str, c
 	return (len == 0) ? i : -ERANGE;
 }

-void xenbus_dev_changed(const char *node, struct xen_bus_type *bus)
+void dev_changed(const char *node, struct xen_bus_type *bus)
 {
 	int exists, rootlen;
 	struct xenbus_device *dev;
 	char type[XEN_BUS_ID_SIZE];
 	const char *p, *root;

-	if (char_count(node, '/') < 2)
-		return;
+	if (bus->error || char_count(node, '/') < 2)
+ 		return;

 	exists = xenbus_exists(XBT_NIL, node, "");
 	if (!exists) {
@@ -663,14 +701,13 @@ void xenbus_dev_changed(const char *node

 	kfree(root);
 }
-EXPORT_SYMBOL_GPL(xenbus_dev_changed);

 static void frontend_changed(struct xenbus_watch *watch,
 			     const char **vec, unsigned int len)
 {
 	DPRINTK("");

-	xenbus_dev_changed(vec[XS_WATCH_PATH], &xenbus_frontend);
+	dev_changed(vec[XS_WATCH_PATH], &xenbus_frontend);
 }

 /* We watch for devices appearing and vanishing. */
@@ -679,7 +716,7 @@ static struct xenbus_watch fe_watch = {
 	.callback = frontend_changed,
 };

-static int xenbus_dev_suspend(struct device *dev, pm_message_t state)
+static int suspend_dev(struct device *dev, void *data)
 {
 	int err = 0;
 	struct xenbus_driver *drv;
@@ -692,14 +729,35 @@ static int xenbus_dev_suspend(struct dev
 	drv = to_xenbus_driver(dev->driver);
 	xdev = container_of(dev, struct xenbus_device, dev);
 	if (drv->suspend)
-		err = drv->suspend(xdev, state);
+		err = drv->suspend(xdev);
+	if (err)
+		printk(KERN_WARNING
+		       "xenbus: suspend %s failed: %i\n", dev->bus_id, err);
+	return 0;
+}
+
+static int suspend_cancel_dev(struct device *dev, void *data)
+{
+	int err = 0;
+	struct xenbus_driver *drv;
+	struct xenbus_device *xdev;
+
+	DPRINTK("");
+
+	if (dev->driver == NULL)
+		return 0;
+	drv = to_xenbus_driver(dev->driver);
+	xdev = container_of(dev, struct xenbus_device, dev);
+	if (drv->suspend_cancel)
+		err = drv->suspend_cancel(xdev);
 	if (err)
 		printk(KERN_WARNING
-		       "xenbus: suspend %s failed: %i\n", dev_name(dev), err);
+		       "xenbus: suspend_cancel %s failed: %i\n",
+		       dev->bus_id, err);
 	return 0;
 }

-static int xenbus_dev_resume(struct device *dev)
+static int resume_dev(struct device *dev, void *data)
 {
 	int err;
 	struct xenbus_driver *drv;
@@ -717,7 +775,7 @@ static int xenbus_dev_resume(struct devi
 	if (err) {
 		printk(KERN_WARNING
 		       "xenbus: resume (talk_to_otherend) %s failed: %i\n",
-		       dev_name(dev), err);
+		       dev->bus_id, err);
 		return err;
 	}

@@ -728,7 +786,7 @@ static int xenbus_dev_resume(struct devi
 		if (err) {
 			printk(KERN_WARNING
 			       "xenbus: resume %s failed: %i\n",
-			       dev_name(dev), err);
+			       dev->bus_id, err);
 			return err;
 		}
 	}
@@ -737,22 +795,52 @@ static int xenbus_dev_resume(struct devi
 	if (err) {
 		printk(KERN_WARNING
 		       "xenbus_probe: resume (watch_otherend) %s failed: "
-		       "%d.\n", dev_name(dev), err);
+		       "%d.\n", dev->bus_id, err);
 		return err;
 	}

 	return 0;
 }

+void xenbus_suspend(void)
+{
+	DPRINTK("");
+
+	if (!xenbus_frontend.error)
+		bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_dev);
+	xenbus_backend_suspend(suspend_dev);
+	xs_suspend();
+}
+EXPORT_SYMBOL_GPL(xenbus_suspend);
+
+void xenbus_resume(void)
+{
+	xb_init_comms();
+	xs_resume();
+	if (!xenbus_frontend.error)
+		bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, resume_dev);
+	xenbus_backend_resume(resume_dev);
+}
+EXPORT_SYMBOL_GPL(xenbus_resume);
+
+void xenbus_suspend_cancel(void)
+{
+	xs_suspend_cancel();
+	if (!xenbus_frontend.error)
+		bus_for_each_dev(&xenbus_frontend.bus, NULL, NULL, suspend_cancel_dev);
+	xenbus_backend_resume(suspend_cancel_dev);
+}
+EXPORT_SYMBOL_GPL(xenbus_suspend_cancel);
+
 /* A flag to determine if xenstored is 'ready' (i.e. has started) */
-int xenstored_ready = 0;
+atomic_t xenbus_xsd_state = ATOMIC_INIT(XENBUS_XSD_UNCOMMITTED);


 int register_xenstore_notifier(struct notifier_block *nb)
 {
 	int ret = 0;

-	if (xenstored_ready > 0)
+	if (is_xenstored_ready())
 		ret = nb->notifier_call(nb, 0, NULL);
 	else
 		blocking_notifier_chain_register(&xenstore_chain, nb);
@@ -767,9 +855,10 @@ void unregister_xenstore_notifier(struct
 }
 EXPORT_SYMBOL_GPL(unregister_xenstore_notifier);

-void xenbus_probe(struct work_struct *unused)
+
+void xenbus_probe(void *unused)
 {
-	BUG_ON((xenstored_ready <= 0));
+	BUG_ON(!is_xenstored_ready());

 	/* Enumerate devices in xenstore and watch for changes. */
 	xenbus_probe_devices(&xenbus_frontend);
@@ -780,71 +869,252 @@ void xenbus_probe(struct work_struct *un
 	blocking_notifier_call_chain(&xenstore_chain, 0, NULL);
 }

-static int __init xenbus_probe_init(void)
+
+#if defined(CONFIG_PROC_FS) && defined(CONFIG_XEN_PRIVILEGED_GUEST)
+static struct file_operations xsd_kva_fops;
+static struct proc_dir_entry *xsd_kva_intf;
+static struct proc_dir_entry *xsd_port_intf;
+
+static int xsd_kva_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	size_t size = vma->vm_end - vma->vm_start;
+	int old;
+	int rc;
+
+	old = atomic_cmpxchg(&xenbus_xsd_state,
+	                   XENBUS_XSD_UNCOMMITTED,
+	                   XENBUS_XSD_LOCAL_INIT);
+	switch (old) {
+		case XENBUS_XSD_UNCOMMITTED:
+			rc = xb_init_comms();
+			if (rc != 0)
+				return rc;
+			break;
+
+		case XENBUS_XSD_FOREIGN_INIT:
+		case XENBUS_XSD_FOREIGN_READY:
+			return -EBUSY;
+
+		case XENBUS_XSD_LOCAL_INIT:
+		case XENBUS_XSD_LOCAL_READY:
+		default:
+			break;
+	}
+
+	if ((size > PAGE_SIZE) || (vma->vm_pgoff != 0))
+		return -EINVAL;
+
+	if (remap_pfn_range(vma, vma->vm_start, mfn_to_pfn(xen_store_mfn),
+			    size, vma->vm_page_prot))
+		return -EAGAIN;
+
+	return 0;
+}
+
+static int xsd_kva_read(char *page, char **start, off_t off,
+			int count, int *eof, void *data)
+{
+	int len;
+
+	len  = sprintf(page, "0x%p", xen_store_interface);
+	*eof = 1;
+	return len;
+}
+
+static int xsd_port_read(char *page, char **start, off_t off,
+			 int count, int *eof, void *data)
+{
+	int len;
+
+	len  = sprintf(page, "%d", xen_store_evtchn);
+	*eof = 1;
+	return len;
+}
+#endif
+
+static int xb_free_port(evtchn_port_t port)
+{
+	struct evtchn_close close;
+	close.port = port;
+	return HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
+}
+
+int xenbus_conn(domid_t remote_dom, unsigned long *grant_ref, evtchn_port_t *local_port)
+{
+	struct evtchn_alloc_unbound alloc_unbound;
+	int rc, rc2;
+
+	BUG_ON(atomic_read(&xenbus_xsd_state) != XENBUS_XSD_FOREIGN_INIT);
+	BUG_ON(!is_initial_xendomain());
+
+#if defined(CONFIG_PROC_FS) && defined(CONFIG_XEN_PRIVILEGED_GUEST)
+	remove_xen_proc_entry("xsd_kva");
+	remove_xen_proc_entry("xsd_port");
+#endif
+
+	rc = xb_free_port(xen_store_evtchn);
+	if (rc != 0)
+		goto fail0;
+
+	alloc_unbound.dom = DOMID_SELF;
+	alloc_unbound.remote_dom = remote_dom;
+	rc = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
+	                                 &alloc_unbound);
+	if (rc != 0)
+		goto fail0;
+	*local_port = xen_store_evtchn = alloc_unbound.port;
+
+	/* keep the old page (xen_store_mfn, xen_store_interface) */
+	rc = gnttab_grant_foreign_access(remote_dom, xen_store_mfn,
+	                                 GTF_permit_access);
+	if (rc < 0)
+		goto fail1;
+	*grant_ref = rc;
+
+	rc = xb_init_comms();
+	if (rc != 0)
+		goto fail1;
+
+	return 0;
+
+fail1:
+	rc2 = xb_free_port(xen_store_evtchn);
+	if (rc2 != 0)
+		printk(KERN_WARNING
+		       "XENBUS: Error freeing xenstore event channel: %d\n",
+		       rc2);
+fail0:
+	xen_store_evtchn = -1;
+	return rc;
+}
+
+static int xenbus_probe_init(void)
 {
 	int err = 0;
+	unsigned long page = 0;

 	DPRINTK("");

-	err = -ENODEV;
-	if (!xen_domain())
-		goto out_error;
+	if (!is_running_on_xen())
+		return -ENODEV;

 	/* Register ourselves with the kernel bus subsystem */
-	err = bus_register(&xenbus_frontend.bus);
-	if (err)
-		goto out_error;
-
-	err = xenbus_backend_bus_register();
-	if (err)
-		goto out_unreg_front;
+	xenbus_frontend.error = bus_register(&xenbus_frontend.bus);
+	if (xenbus_frontend.error)
+		printk(KERN_WARNING
+		       "XENBUS: Error registering frontend bus: %i\n",
+		       xenbus_frontend.error);
+	xenbus_backend_bus_register();

 	/*
 	 * Domain0 doesn't have a store_evtchn or store_mfn yet.
 	 */
-	if (xen_initial_domain()) {
-		/* dom0 not yet supported */
+	if (is_initial_xendomain()) {
+		struct evtchn_alloc_unbound alloc_unbound;
+
+		/* Allocate page. */
+		page = get_zeroed_page(GFP_KERNEL);
+		if (!page)
+			return -ENOMEM;
+
+		xen_store_mfn = xen_start_info->store_mfn =
+			pfn_to_mfn(virt_to_phys((void *)page) >>
+				   PAGE_SHIFT);
+
+		/* Next allocate a local port which xenstored can bind to */
+		alloc_unbound.dom        = DOMID_SELF;
+		alloc_unbound.remote_dom = DOMID_SELF;
+
+		err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound,
+						  &alloc_unbound);
+		if (err == -ENOSYS)
+			goto err;
+		BUG_ON(err);
+		xen_store_evtchn = xen_start_info->store_evtchn =
+			alloc_unbound.port;
+
+#if defined(CONFIG_PROC_FS) && defined(CONFIG_XEN_PRIVILEGED_GUEST)
+		/* And finally publish the above info in /proc/xen */
+		xsd_kva_intf = create_xen_proc_entry("xsd_kva", 0600);
+		if (xsd_kva_intf) {
+			memcpy(&xsd_kva_fops, xsd_kva_intf->proc_fops,
+			       sizeof(xsd_kva_fops));
+			xsd_kva_fops.mmap = xsd_kva_mmap;
+			xsd_kva_intf->proc_fops = &xsd_kva_fops;
+			xsd_kva_intf->read_proc = xsd_kva_read;
+		}
+		xsd_port_intf = create_xen_proc_entry("xsd_port", 0400);
+		if (xsd_port_intf)
+			xsd_port_intf->read_proc = xsd_port_read;
+#endif
+		xen_store_interface = mfn_to_virt(xen_store_mfn);
 	} else {
-		xenstored_ready = 1;
+		atomic_set(&xenbus_xsd_state, XENBUS_XSD_FOREIGN_READY);
+#ifdef CONFIG_XEN
 		xen_store_evtchn = xen_start_info->store_evtchn;
 		xen_store_mfn = xen_start_info->store_mfn;
+		xen_store_interface = mfn_to_virt(xen_store_mfn);
+#else
+		xen_store_evtchn = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN);
+		xen_store_mfn = hvm_get_parameter(HVM_PARAM_STORE_PFN);
+		xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT,
+					      PAGE_SIZE);
+#endif
+		/* Initialize the shared memory rings to talk to xenstored */
+		err = xb_init_comms();
+		if (err)
+			goto err;
 	}
-	xen_store_interface = mfn_to_virt(xen_store_mfn);
+
+	xenbus_dev_init();

 	/* Initialize the interface to xenstore. */
 	err = xs_init();
 	if (err) {
 		printk(KERN_WARNING
 		       "XENBUS: Error initializing xenstore comms: %i\n", err);
-		goto out_unreg_back;
+		goto err;
 	}

-	if (!xen_initial_domain())
-		xenbus_probe(NULL);
+	/* Register ourselves with the kernel device subsystem */
+	if (!xenbus_frontend.error) {
+		xenbus_frontend.error = device_register(&xenbus_frontend.dev);
+		if (xenbus_frontend.error) {
+			bus_unregister(&xenbus_frontend.bus);
+			printk(KERN_WARNING
+			       "XENBUS: Error registering frontend device: %i\n",
+			       xenbus_frontend.error);
+		}
+	}
+	xenbus_backend_device_register();

-#ifdef CONFIG_XEN_COMPAT_XENFS
-	/*
-	 * Create xenfs mountpoint in /proc for compatibility with
-	 * utilities that expect to find "xenbus" under "/proc/xen".
-	 */
-	proc_mkdir("xen", NULL);
-#endif
+	if (!is_initial_xendomain())
+		xenbus_probe(NULL);

 	return 0;

-  out_unreg_back:
-	xenbus_backend_bus_unregister();
+ err:
+	if (page)
+		free_page(page);

-  out_unreg_front:
-	bus_unregister(&xenbus_frontend.bus);
+	/*
+	 * Do not unregister the xenbus front/backend buses here. The buses
+	 * must exist because front/backend drivers will use them when they are
+	 * registered.
+	 */

-  out_error:
 	return err;
 }

+#ifdef CONFIG_XEN
 postcore_initcall(xenbus_probe_init);
-
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("Dual BSD/GPL");
+#else
+int xenbus_init(void)
+{
+	return xenbus_probe_init();
+}
+#endif

 static int is_device_connecting(struct device *dev, void *data)
 {
@@ -871,6 +1141,8 @@ static int is_device_connecting(struct d

 static int exists_connecting_device(struct device_driver *drv)
 {
+	if (xenbus_frontend.error)
+		return xenbus_frontend.error;
 	return bus_for_each_dev(&xenbus_frontend.bus, NULL, drv,
 				is_device_connecting);
 }
@@ -879,6 +1151,7 @@ static int print_device_status(struct de
 {
 	struct xenbus_device *xendev = to_xenbus_device(dev);
 	struct device_driver *drv = data;
+	struct xenbus_driver *xendrv;

 	/* Is this operation limited to a particular driver? */
 	if (drv && (dev->driver != drv))
@@ -888,7 +1161,10 @@ static int print_device_status(struct de
 		/* Information only: is this too noisy? */
 		printk(KERN_INFO "XENBUS: Device with no driver: %s\n",
 		       xendev->nodename);
-	} else if (xendev->state < XenbusStateConnected) {
+		return 0;
+	}
+
+	if (xendev->state < XenbusStateConnected) {
 		enum xenbus_state rstate = XenbusStateUnknown;
 		if (xendev->otherend)
 			rstate = xenbus_read_driver_state(xendev->otherend);
@@ -897,6 +1173,11 @@ static int print_device_status(struct de
 		       xendev->nodename, xendev->state, rstate);
 	}

+	xendrv = to_xenbus_driver(dev->driver);
+	if (xendrv->is_ready && !xendrv->is_ready(xendev))
+		printk(KERN_WARNING "XENBUS: Device not ready: %s\n",
+		       xendev->nodename);
+
 	return 0;
 }

@@ -923,7 +1204,7 @@ static void wait_for_devices(struct xenb
 	struct device_driver *drv = xendrv ? &xendrv->driver : NULL;
 	unsigned int seconds_waited = 0;

-	if (!ready_to_wait_for_devices || !xen_domain())
+	if (!ready_to_wait_for_devices || !is_running_on_xen())
 		return;

 	while (exists_connecting_device(drv)) {
@@ -950,10 +1231,18 @@ static void wait_for_devices(struct xenb
 #ifndef MODULE
 static int __init boot_wait_for_devices(void)
 {
-	ready_to_wait_for_devices = 1;
-	wait_for_devices(NULL);
+	if (!xenbus_frontend.error) {
+		ready_to_wait_for_devices = 1;
+		wait_for_devices(NULL);
+	}
 	return 0;
 }

 late_initcall(boot_wait_for_devices);
 #endif
+
+int xenbus_for_each_frontend(void *arg, int (*fn)(struct device *, void *))
+{
+	return bus_for_each_dev(&xenbus_frontend.bus, NULL, arg, fn);
+}
+EXPORT_SYMBOL_GPL(xenbus_for_each_frontend);
--- head-2010-05-12.orig/drivers/xen/xenbus/xenbus_probe.h	2010-05-12 08:55:24.000000000 +0200
+++ head-2010-05-12/drivers/xen/xenbus/xenbus_probe.h	2010-01-19 16:01:04.000000000 +0100
@@ -34,43 +34,47 @@
 #ifndef _XENBUS_PROBE_H
 #define _XENBUS_PROBE_H

+#ifndef BUS_ID_SIZE
 #define XEN_BUS_ID_SIZE			20
+#else
+#define XEN_BUS_ID_SIZE			BUS_ID_SIZE
+#endif

-#ifdef CONFIG_XEN_BACKEND
+#if defined(CONFIG_XEN_BACKEND) || defined(CONFIG_XEN_BACKEND_MODULE)
 extern void xenbus_backend_suspend(int (*fn)(struct device *, void *));
 extern void xenbus_backend_resume(int (*fn)(struct device *, void *));
 extern void xenbus_backend_probe_and_watch(void);
-extern int xenbus_backend_bus_register(void);
-extern void xenbus_backend_bus_unregister(void);
+extern void xenbus_backend_bus_register(void);
+extern void xenbus_backend_device_register(void);
 #else
 static inline void xenbus_backend_suspend(int (*fn)(struct device *, void *)) {}
 static inline void xenbus_backend_resume(int (*fn)(struct device *, void *)) {}
 static inline void xenbus_backend_probe_and_watch(void) {}
-static inline int xenbus_backend_bus_register(void) { return 0; }
-static inline void xenbus_backend_bus_unregister(void) {}
+static inline void xenbus_backend_bus_register(void) {}
+static inline void xenbus_backend_device_register(void) {}
 #endif

 struct xen_bus_type
 {
 	char *root;
+	int error;
 	unsigned int levels;
 	int (*get_bus_id)(char bus_id[XEN_BUS_ID_SIZE], const char *nodename);
 	int (*probe)(const char *type, const char *dir);
 	struct bus_type bus;
+	struct device dev;
 };

 extern int xenbus_match(struct device *_dev, struct device_driver *_drv);
 extern int xenbus_dev_probe(struct device *_dev);
 extern int xenbus_dev_remove(struct device *_dev);
 extern int xenbus_register_driver_common(struct xenbus_driver *drv,
-					 struct xen_bus_type *bus,
-					 struct module *owner,
-					 const char *mod_name);
+					 struct xen_bus_type *bus);
 extern int xenbus_probe_node(struct xen_bus_type *bus,
 			     const char *type,
 			     const char *nodename);
 extern int xenbus_probe_devices(struct xen_bus_type *bus);

-extern void xenbus_dev_changed(const char *node, struct xen_bus_type *bus);
+extern void dev_changed(const char *node, struct xen_bus_type *bus);

 #endif
--- head-2010-05-12.orig/drivers/xen/xenbus/xenbus_xs.c	2010-05-12 08:55:24.000000000 +0200
+++ head-2010-05-12/drivers/xen/xenbus/xenbus_xs.c	2010-01-19 16:01:04.000000000 +0100
@@ -47,6 +47,14 @@
 #include <xen/xenbus.h>
 #include "xenbus_comms.h"

+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
+#include <xen/platform-compat.h>
+#endif
+
+#ifndef PF_NOFREEZE /* Old kernel (pre-2.6.6). */
+#define PF_NOFREEZE	0
+#endif
+
 struct xs_stored_msg {
 	struct list_head list;

@@ -76,6 +84,14 @@ struct xs_handle {
 	/*
 	 * Mutex ordering: transaction_mutex -> watch_mutex -> request_mutex.
 	 * response_mutex is never taken simultaneously with the other three.
+	 *
+	 * transaction_mutex must be held before incrementing
+	 * transaction_count. The mutex is held when a suspend is in
+	 * progress to prevent new transactions starting.
+	 *
+	 * When decrementing transaction_count to zero the wait queue
+	 * should be woken up, the suspend code waits for count to
+	 * reach zero.
 	 */

 	/* One request at a time. */
@@ -85,7 +101,9 @@ struct xs_handle {
 	struct mutex response_mutex;

 	/* Protect transactions against save/restore. */
-	struct rw_semaphore transaction_mutex;
+	struct mutex transaction_mutex;
+	atomic_t transaction_count;
+	wait_queue_head_t transaction_wq;

 	/* Protect watch (de)register against save/restore. */
 	struct rw_semaphore watch_mutex;
@@ -108,7 +126,7 @@ static DEFINE_SPINLOCK(watch_events_lock
  * carrying out work.
  */
 static pid_t xenwatch_pid;
-static DEFINE_MUTEX(xenwatch_mutex);
+/* static */ DEFINE_MUTEX(xenwatch_mutex);
 static DECLARE_WAIT_QUEUE_HEAD(watch_events_waitq);

 static int get_error(const char *errorstring)
@@ -157,6 +175,31 @@ static void *read_reply(enum xsd_sockmsg
 	return body;
 }

+static void transaction_start(void)
+{
+	mutex_lock(&xs_state.transaction_mutex);
+	atomic_inc(&xs_state.transaction_count);
+	mutex_unlock(&xs_state.transaction_mutex);
+}
+
+static void transaction_end(void)
+{
+	if (atomic_dec_and_test(&xs_state.transaction_count))
+		wake_up(&xs_state.transaction_wq);
+}
+
+static void transaction_suspend(void)
+{
+	mutex_lock(&xs_state.transaction_mutex);
+	wait_event(xs_state.transaction_wq,
+		   atomic_read(&xs_state.transaction_count) == 0);
+}
+
+static void transaction_resume(void)
+{
+	mutex_unlock(&xs_state.transaction_mutex);
+}
+
 void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg)
 {
 	void *ret;
@@ -164,7 +207,7 @@ void *xenbus_dev_request_and_reply(struc
 	int err;

 	if (req_msg.type == XS_TRANSACTION_START)
-		down_read(&xs_state.transaction_mutex);
+		transaction_start();

 	mutex_lock(&xs_state.request_mutex);

@@ -177,14 +220,13 @@ void *xenbus_dev_request_and_reply(struc

 	mutex_unlock(&xs_state.request_mutex);

-	if ((msg->type == XS_TRANSACTION_END) ||
+	if ((req_msg.type == XS_TRANSACTION_END) ||
 	    ((req_msg.type == XS_TRANSACTION_START) &&
 	     (msg->type == XS_ERROR)))
-		up_read(&xs_state.transaction_mutex);
+		transaction_end();

 	return ret;
 }
-EXPORT_SYMBOL(xenbus_dev_request_and_reply);

 /* Send message to xs, get kmalloc'ed reply.  ERR_PTR() on error. */
 static void *xs_talkv(struct xenbus_transaction t,
@@ -214,7 +256,7 @@ static void *xs_talkv(struct xenbus_tran
 	}

 	for (i = 0; i < num_vecs; i++) {
-		err = xb_write(iovec[i].iov_base, iovec[i].iov_len);
+		err = xb_write(iovec[i].iov_base, iovec[i].iov_len);;
 		if (err) {
 			mutex_unlock(&xs_state.request_mutex);
 			return ERR_PTR(err);
@@ -295,7 +337,7 @@ static char **split(char *strings, unsig
 	char *p, **ret;

 	/* Count the strings. */
-	*num = count_strings(strings, len);
+	*num = count_strings(strings, len) + 1;

 	/* Transfer to one big alloc for easy freeing. */
 	ret = kmalloc(*num * sizeof(char *) + len, GFP_NOIO | __GFP_HIGH);
@@ -309,6 +351,7 @@ static char **split(char *strings, unsig
 	strings = (char *)&ret[*num];
 	for (p = strings, *num = 0; p < strings + len; p += strlen(p) + 1)
 		ret[(*num)++] = p;
+	ret[*num] = strings + len;

 	return ret;
 }
@@ -432,11 +475,11 @@ int xenbus_transaction_start(struct xenb
 {
 	char *id_str;

-	down_read(&xs_state.transaction_mutex);
+	transaction_start();

 	id_str = xs_single(XBT_NIL, XS_TRANSACTION_START, "", NULL);
 	if (IS_ERR(id_str)) {
-		up_read(&xs_state.transaction_mutex);
+		transaction_end();
 		return PTR_ERR(id_str);
 	}

@@ -461,7 +504,7 @@ int xenbus_transaction_end(struct xenbus

 	err = xs_error(xs_single(t, XS_TRANSACTION_END, abortstr, NULL));

-	up_read(&xs_state.transaction_mutex);
+	transaction_end();

 	return err;
 }
@@ -622,6 +665,8 @@ void unregister_xenbus_watch(struct xenb
 	char token[sizeof(watch) * 2 + 1];
 	int err;

+	BUG_ON(watch->flags & XBWF_new_thread);
+
 	sprintf(token, "%lX", (long)watch);

 	down_read(&xs_state.watch_mutex);
@@ -639,11 +684,6 @@ void unregister_xenbus_watch(struct xenb

 	up_read(&xs_state.watch_mutex);

-	/* Make sure there are no callbacks running currently (unless
-	   its us) */
-	if (current->pid != xenwatch_pid)
-		mutex_lock(&xenwatch_mutex);
-
 	/* Cancel pending watch events. */
 	spin_lock(&watch_events_lock);
 	list_for_each_entry_safe(msg, tmp, &watch_events, list) {
@@ -655,14 +695,17 @@ void unregister_xenbus_watch(struct xenb
 	}
 	spin_unlock(&watch_events_lock);

-	if (current->pid != xenwatch_pid)
+	/* Flush any currently-executing callback, unless we are it. :-) */
+	if (current->pid != xenwatch_pid) {
+		mutex_lock(&xenwatch_mutex);
 		mutex_unlock(&xenwatch_mutex);
+	}
 }
 EXPORT_SYMBOL_GPL(unregister_xenbus_watch);

 void xs_suspend(void)
 {
-	down_write(&xs_state.transaction_mutex);
+	transaction_suspend();
 	down_write(&xs_state.watch_mutex);
 	mutex_lock(&xs_state.request_mutex);
 	mutex_lock(&xs_state.response_mutex);
@@ -673,11 +716,9 @@ void xs_resume(void)
 	struct xenbus_watch *watch;
 	char token[sizeof(watch) * 2 + 1];

-	xb_init_comms();
-
 	mutex_unlock(&xs_state.response_mutex);
 	mutex_unlock(&xs_state.request_mutex);
-	up_write(&xs_state.transaction_mutex);
+	transaction_resume();

 	/* No need for watches_lock: the watch_mutex is sufficient. */
 	list_for_each_entry(watch, &watches, list) {
@@ -693,7 +734,25 @@ void xs_suspend_cancel(void)
 	mutex_unlock(&xs_state.response_mutex);
 	mutex_unlock(&xs_state.request_mutex);
 	up_write(&xs_state.watch_mutex);
-	up_write(&xs_state.transaction_mutex);
+	mutex_unlock(&xs_state.transaction_mutex);
+}
+
+static int xenwatch_handle_callback(void *data)
+{
+	struct xs_stored_msg *msg = data;
+
+	msg->u.watch.handle->callback(msg->u.watch.handle,
+				      (const char **)msg->u.watch.vec,
+				      msg->u.watch.vec_size);
+
+	kfree(msg->u.watch.vec);
+	kfree(msg);
+
+	/* Kill this kthread if we were spawned just for this callback. */
+	if (current->pid != xenwatch_pid)
+		do_exit(0);
+
+	return 0;
 }

 static int xenwatch_thread(void *unused)
@@ -701,6 +760,7 @@ static int xenwatch_thread(void *unused)
 	struct list_head *ent;
 	struct xs_stored_msg *msg;

+	current->flags |= PF_NOFREEZE;
 	for (;;) {
 		wait_event_interruptible(watch_events_waitq,
 					 !list_empty(&watch_events));
@@ -716,17 +776,29 @@ static int xenwatch_thread(void *unused)
 			list_del(ent);
 		spin_unlock(&watch_events_lock);

-		if (ent != &watch_events) {
-			msg = list_entry(ent, struct xs_stored_msg, list);
-			msg->u.watch.handle->callback(
-				msg->u.watch.handle,
-				(const char **)msg->u.watch.vec,
-				msg->u.watch.vec_size);
-			kfree(msg->u.watch.vec);
-			kfree(msg);
+		if (ent == &watch_events) {
+			mutex_unlock(&xenwatch_mutex);
+			continue;
 		}

-		mutex_unlock(&xenwatch_mutex);
+		msg = list_entry(ent, struct xs_stored_msg, list);
+
+		/*
+		 * Unlock the mutex before running an XBWF_new_thread
+		 * handler. kthread_run can block which can deadlock
+		 * against unregister_xenbus_watch() if we need to
+		 * unregister other watches in order to make
+		 * progress. This can occur on resume before the swap
+		 * device is attached.
+		 */
+		if (msg->u.watch.handle->flags & XBWF_new_thread) {
+			mutex_unlock(&xenwatch_mutex);
+			kthread_run(xenwatch_handle_callback,
+				    msg, "xenwatch_cb");
+		} else {
+			xenwatch_handle_callback(msg);
+			mutex_unlock(&xenwatch_mutex);
+		}
 	}

 	return 0;
@@ -820,6 +892,7 @@ static int xenbus_thread(void *unused)
 {
 	int err;

+	current->flags |= PF_NOFREEZE;
 	for (;;) {
 		err = process_msg();
 		if (err)
@@ -834,7 +907,6 @@ static int xenbus_thread(void *unused)

 int xs_init(void)
 {
-	int err;
 	struct task_struct *task;

 	INIT_LIST_HEAD(&xs_state.reply_list);
@@ -843,13 +915,10 @@ int xs_init(void)

 	mutex_init(&xs_state.request_mutex);
 	mutex_init(&xs_state.response_mutex);
-	init_rwsem(&xs_state.transaction_mutex);
+	mutex_init(&xs_state.transaction_mutex);
 	init_rwsem(&xs_state.watch_mutex);
-
-	/* Initialize the shared memory rings to talk to xenstored */
-	err = xb_init_comms();
-	if (err)
-		return err;
+	atomic_set(&xs_state.transaction_count, 0);
+	init_waitqueue_head(&xs_state.transaction_wq);

 	task = kthread_run(xenwatch_thread, NULL, "xenwatch");
 	if (IS_ERR(task))
--- head-2010-05-12.orig/include/xen/evtchn.h	2010-05-12 08:55:24.000000000 +0200
+++ head-2010-05-12/include/xen/evtchn.h	2010-01-19 16:01:04.000000000 +0100
@@ -1,7 +1,11 @@
+#if defined(CONFIG_PARAVIRT_XEN) || !defined(__KERNEL__)
+#include "public/evtchn.h"
+#else
 /******************************************************************************
  * evtchn.h
  *
- * Interface to /dev/xen/evtchn.
+ * Communication via Xen event channels.
+ * Also definitions for the device that demuxes notifications to userspace.
  *
  * Copyright (c) 2003-2005, K A Fraser
  *
@@ -30,59 +34,117 @@
  * IN THE SOFTWARE.
  */

-#ifndef __LINUX_PUBLIC_EVTCHN_H__
-#define __LINUX_PUBLIC_EVTCHN_H__
+#ifndef __ASM_EVTCHN_H__
+#define __ASM_EVTCHN_H__

-/*
- * Bind a fresh port to VIRQ @virq.
- * Return allocated port.
- */
-#define IOCTL_EVTCHN_BIND_VIRQ				\
-	_IOC(_IOC_NONE, 'E', 0, sizeof(struct ioctl_evtchn_bind_virq))
-struct ioctl_evtchn_bind_virq {
-	unsigned int virq;
-};
+#include <linux/interrupt.h>
+#include <asm/hypervisor.h>
+#include <asm/ptrace.h>
+#include <asm/synch_bitops.h>
+#include <xen/interface/event_channel.h>
+#include <linux/smp.h>

 /*
- * Bind a fresh port to remote <@remote_domain, @remote_port>.
- * Return allocated port.
+ * LOW-LEVEL DEFINITIONS
  */
-#define IOCTL_EVTCHN_BIND_INTERDOMAIN			\
-	_IOC(_IOC_NONE, 'E', 1, sizeof(struct ioctl_evtchn_bind_interdomain))
-struct ioctl_evtchn_bind_interdomain {
-	unsigned int remote_domain, remote_port;
-};

 /*
- * Allocate a fresh port for binding to @remote_domain.
- * Return allocated port.
- */
-#define IOCTL_EVTCHN_BIND_UNBOUND_PORT			\
-	_IOC(_IOC_NONE, 'E', 2, sizeof(struct ioctl_evtchn_bind_unbound_port))
-struct ioctl_evtchn_bind_unbound_port {
-	unsigned int remote_domain;
-};
+ * Dynamically bind an event source to an IRQ-like callback handler.
+ * On some platforms this may not be implemented via the Linux IRQ subsystem.
+ * The IRQ argument passed to the callback handler is the same as returned
+ * from the bind call. It may not correspond to a Linux IRQ number.
+ * Returns IRQ or negative errno.
+ */
+int bind_caller_port_to_irqhandler(
+	unsigned int caller_port,
+	irqreturn_t (*handler)(int, void *, struct pt_regs *),
+	unsigned long irqflags,
+	const char *devname,
+	void *dev_id);
+int bind_listening_port_to_irqhandler(
+	unsigned int remote_domain,
+	irqreturn_t (*handler)(int, void *, struct pt_regs *),
+	unsigned long irqflags,
+	const char *devname,
+	void *dev_id);
+int bind_interdomain_evtchn_to_irqhandler(
+	unsigned int remote_domain,
+	unsigned int remote_port,
+	irqreturn_t (*handler)(int, void *, struct pt_regs *),
+	unsigned long irqflags,
+	const char *devname,
+	void *dev_id);
+int bind_virq_to_irqhandler(
+	unsigned int virq,
+	unsigned int cpu,
+	irqreturn_t (*handler)(int, void *, struct pt_regs *),
+	unsigned long irqflags,
+	const char *devname,
+	void *dev_id);
+int bind_ipi_to_irqhandler(
+	unsigned int ipi,
+	unsigned int cpu,
+	irqreturn_t (*handler)(int, void *, struct pt_regs *),
+	unsigned long irqflags,
+	const char *devname,
+	void *dev_id);

 /*
- * Unbind previously allocated @port.
- */
-#define IOCTL_EVTCHN_UNBIND				\
-	_IOC(_IOC_NONE, 'E', 3, sizeof(struct ioctl_evtchn_unbind))
-struct ioctl_evtchn_unbind {
-	unsigned int port;
-};
+ * Common unbind function for all event sources. Takes IRQ to unbind from.
+ * Automatically closes the underlying event channel (except for bindings
+ * made with bind_caller_port_to_irqhandler()).
+ */
+void unbind_from_irqhandler(unsigned int irq, void *dev_id);
+
+void irq_resume(void);
+
+/* Entry point for notifications into Linux subsystems. */
+asmlinkage void evtchn_do_upcall(struct pt_regs *regs);
+
+/* Entry point for notifications into the userland character device. */
+void evtchn_device_upcall(int port);
+
+/* Mark a PIRQ as unavailable for dynamic allocation. */
+void evtchn_register_pirq(int irq);
+/* Map a Xen-supplied PIRQ to a dynamically allocated one. */
+int evtchn_map_pirq(int irq, int xen_pirq);
+/* Look up a Xen-supplied PIRQ for a dynamically allocated one. */
+int evtchn_get_xen_pirq(int irq);
+
+void mask_evtchn(int port);
+void disable_all_local_evtchn(void);
+void unmask_evtchn(int port);
+
+#ifdef CONFIG_SMP
+void rebind_evtchn_to_cpu(int port, unsigned int cpu);
+#else
+#define rebind_evtchn_to_cpu(port, cpu)	((void)0)
+#endif
+
+static inline int test_and_set_evtchn_mask(int port)
+{
+	shared_info_t *s = HYPERVISOR_shared_info;
+	return synch_test_and_set_bit(port, s->evtchn_mask);
+}
+
+static inline void clear_evtchn(int port)
+{
+	shared_info_t *s = HYPERVISOR_shared_info;
+	synch_clear_bit(port, s->evtchn_pending);
+}
+
+static inline void notify_remote_via_evtchn(int port)
+{
+	struct evtchn_send send = { .port = port };
+	VOID(HYPERVISOR_event_channel_op(EVTCHNOP_send, &send));
+}

 /*
- * Unbind previously allocated @port.
+ * Use these to access the event channel underlying the IRQ handle returned
+ * by bind_*_to_irqhandler().
  */
-#define IOCTL_EVTCHN_NOTIFY				\
-	_IOC(_IOC_NONE, 'E', 4, sizeof(struct ioctl_evtchn_notify))
-struct ioctl_evtchn_notify {
-	unsigned int port;
-};
-
-/* Clear and reinitialise the event buffer. Clear error condition. */
-#define IOCTL_EVTCHN_RESET				\
-	_IOC(_IOC_NONE, 'E', 5, 0)
+void notify_remote_via_irq(int irq);
+int irq_to_evtchn_port(int irq);

-#endif /* __LINUX_PUBLIC_EVTCHN_H__ */
+#endif /* __ASM_EVTCHN_H__ */
+#endif /* CONFIG_PARAVIRT_XEN */
--- head-2010-05-12.orig/include/xen/interface/callback.h	2010-05-12 08:55:24.000000000 +0200
+++ head-2010-05-12/include/xen/interface/callback.h	2010-01-19 16:01:04.000000000 +0100
@@ -86,6 +86,8 @@ struct callback_register {
 	uint16_t flags;
 	xen_callback_t address;
 };
+typedef struct callback_register callback_register_t;
+DEFINE_XEN_GUEST_HANDLE(callback_register_t);

 /*
  * Unregister a callback.
@@ -98,5 +100,12 @@ struct callback_unregister {
     uint16_t type;
     uint16_t _unused;
 };
+typedef struct callback_unregister callback_unregister_t;
+DEFINE_XEN_GUEST_HANDLE(callback_unregister_t);
+
+#if __XEN_INTERFACE_VERSION__ < 0x00030207
+#undef CALLBACKTYPE_sysenter
+#define CALLBACKTYPE_sysenter CALLBACKTYPE_sysenter_deprecated
+#endif

 #endif /* __XEN_PUBLIC_CALLBACK_H__ */
--- head-2010-05-12.orig/include/xen/interface/elfnote.h	2010-05-12 08:55:24.000000000 +0200
+++ head-2010-05-12/include/xen/interface/elfnote.h	2010-01-19 16:01:04.000000000 +0100
@@ -3,6 +3,24 @@
  *
  * Definitions used for the Xen ELF notes.
  *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
  * Copyright (c) 2006, Ian Campbell, XenSource Ltd.
  */

@@ -10,7 +28,7 @@
 #define __XEN_PUBLIC_ELFNOTE_H__

 /*
- * The notes should live in a SHT_NOTE segment and have "Xen" in the
+ * The notes should live in a PT_NOTE segment and have "Xen" in the
  * name field.
  *
  * Numeric types are either 4 or 8 bytes depending on the content of
@@ -22,8 +40,6 @@

 /*
  * NAME=VALUE pair (string).
- *
- * LEGACY: FEATURES and PAE
  */
 #define XEN_ELFNOTE_INFO           0

@@ -90,7 +106,12 @@
 #define XEN_ELFNOTE_LOADER         8

 /*
- * The kernel supports PAE (x86/32 only, string = "yes" or "no").
+ * The kernel supports PAE (x86/32 only, string = "yes", "no" or
+ * "bimodal").
+ *
+ * For compatibility with Xen 3.0.3 and earlier the "bimodal" setting
+ * may be given as "yes,bimodal" which will cause older Xen to treat
+ * this kernel as PAE.
  *
  * LEGACY: PAE (n.b. The legacy interface included a provision to
  * indicate 'extended-cr3' support allowing L3 page tables to be
@@ -140,6 +161,76 @@
  */
 #define XEN_ELFNOTE_SUSPEND_CANCEL 14

+/*
+ * The (non-default) location the initial phys-to-machine map should be
+ * placed at by the hypervisor (Dom0) or the tools (DomU).
+ * The kernel must be prepared for this mapping to be established using
+ * large pages, despite such otherwise not being available to guests.
+ * The kernel must also be able to handle the page table pages used for
+ * this mapping not being accessible through the initial mapping.
+ * (Only x86-64 supports this at present.)
+ */
+#define XEN_ELFNOTE_INIT_P2M      15
+
+/*
+ * The number of the highest elfnote defined.
+ */
+#define XEN_ELFNOTE_MAX XEN_ELFNOTE_INIT_P2M
+
+/*
+ * System information exported through crash notes.
+ *
+ * The kexec / kdump code will create one XEN_ELFNOTE_CRASH_INFO
+ * note in case of a system crash. This note will contain various
+ * information about the system, see xen/include/xen/elfcore.h.
+ */
+#define XEN_ELFNOTE_CRASH_INFO 0x1000001
+
+/*
+ * System registers exported through crash notes.
+ *
+ * The kexec / kdump code will create one XEN_ELFNOTE_CRASH_REGS
+ * note per cpu in case of a system crash. This note is architecture
+ * specific and will contain registers not saved in the "CORE" note.
+ * See xen/include/xen/elfcore.h for more information.
+ */
+#define XEN_ELFNOTE_CRASH_REGS 0x1000002
+
+
+/*
+ * xen dump-core none note.
+ * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_NONE
+ * in its dump file to indicate that the file is xen dump-core
+ * file. This note doesn't have any other information.
+ * See tools/libxc/xc_core.h for more information.
+ */
+#define XEN_ELFNOTE_DUMPCORE_NONE               0x2000000
+
+/*
+ * xen dump-core header note.
+ * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_HEADER
+ * in its dump file.
+ * See tools/libxc/xc_core.h for more information.
+ */
+#define XEN_ELFNOTE_DUMPCORE_HEADER             0x2000001
+
+/*
+ * xen dump-core xen version note.
+ * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_XEN_VERSION
+ * in its dump file. It contains the xen version obtained via the
+ * XENVER hypercall.
+ * See tools/libxc/xc_core.h for more information.
+ */
+#define XEN_ELFNOTE_DUMPCORE_XEN_VERSION        0x2000002
+
+/*
+ * xen dump-core format version note.
+ * xm dump-core code will create one XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION
+ * in its dump file. It contains a format version identifier.
+ * See tools/libxc/xc_core.h for more information.
+ */
+#define XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION     0x2000003
+
 #endif /* __XEN_PUBLIC_ELFNOTE_H__ */

 /*
--- head-2010-05-12.orig/include/xen/interface/event_channel.h	2010-05-12 08:55:24.000000000 +0200
+++ head-2010-05-12/include/xen/interface/event_channel.h	2010-01-19 16:01:04.000000000 +0100
@@ -3,6 +3,24 @@
  *
  * Event channels between domains.
  *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
  * Copyright (c) 2003-2004, K A Fraser.
  */

@@ -11,8 +29,15 @@

 #include <xen/interface/xen.h>

+/*
+ * Prototype for this hypercall is:
+ *  int event_channel_op(int cmd, void *args)
+ * @cmd  == EVTCHNOP_??? (event-channel operation).
+ * @args == Operation-specific extra arguments (NULL if none).
+ */
+
 typedef uint32_t evtchn_port_t;
-DEFINE_GUEST_HANDLE(evtchn_port_t);
+DEFINE_XEN_GUEST_HANDLE(evtchn_port_t);

 /*
  * EVTCHNOP_alloc_unbound: Allocate a port in domain <dom> and mark as
@@ -22,13 +47,14 @@ DEFINE_GUEST_HANDLE(evtchn_port_t);
  *  1. If the caller is unprivileged then <dom> must be DOMID_SELF.
  *  2. <rdom> may be DOMID_SELF, allowing loopback connections.
  */
-#define EVTCHNOP_alloc_unbound	  6
+#define EVTCHNOP_alloc_unbound    6
 struct evtchn_alloc_unbound {
-	/* IN parameters */
-	domid_t dom, remote_dom;
-	/* OUT parameters */
-	evtchn_port_t port;
+    /* IN parameters */
+    domid_t dom, remote_dom;
+    /* OUT parameters */
+    evtchn_port_t port;
 };
+typedef struct evtchn_alloc_unbound evtchn_alloc_unbound_t;

 /*
  * EVTCHNOP_bind_interdomain: Construct an interdomain event channel between
@@ -41,29 +67,35 @@ struct evtchn_alloc_unbound {
  */
 #define EVTCHNOP_bind_interdomain 0
 struct evtchn_bind_interdomain {
-	/* IN parameters. */
-	domid_t remote_dom;
-	evtchn_port_t remote_port;
-	/* OUT parameters. */
-	evtchn_port_t local_port;
+    /* IN parameters. */
+    domid_t remote_dom;
+    evtchn_port_t remote_port;
+    /* OUT parameters. */
+    evtchn_port_t local_port;
 };
+typedef struct evtchn_bind_interdomain evtchn_bind_interdomain_t;

 /*
  * EVTCHNOP_bind_virq: Bind a local event channel to VIRQ <irq> on specified
  * vcpu.
  * NOTES:
- *  1. A virtual IRQ may be bound to at most one event channel per vcpu.
- *  2. The allocated event channel is bound to the specified vcpu. The binding
- *     may not be changed.
+ *  1. Virtual IRQs are classified as per-vcpu or global. See the VIRQ list
+ *     in xen.h for the classification of each VIRQ.
+ *  2. Global VIRQs must be allocated on VCPU0 but can subsequently be
+ *     re-bound via EVTCHNOP_bind_vcpu.
+ *  3. Per-vcpu VIRQs may be bound to at most one event channel per vcpu.
+ *     The allocated event channel is bound to the specified vcpu and the
+ *     binding cannot be changed.
  */
-#define EVTCHNOP_bind_virq	  1
+#define EVTCHNOP_bind_virq        1
 struct evtchn_bind_virq {
-	/* IN parameters. */
-	uint32_t virq;
-	uint32_t vcpu;
-	/* OUT parameters. */
-	evtchn_port_t port;
+    /* IN parameters. */
+    uint32_t virq;
+    uint32_t vcpu;
+    /* OUT parameters. */
+    evtchn_port_t port;
 };
+typedef struct evtchn_bind_virq evtchn_bind_virq_t;

 /*
  * EVTCHNOP_bind_pirq: Bind a local event channel to PIRQ <irq>.
@@ -71,15 +103,16 @@ struct evtchn_bind_virq {
  *  1. A physical IRQ may be bound to at most one event channel per domain.
  *  2. Only a sufficiently-privileged domain may bind to a physical IRQ.
  */
-#define EVTCHNOP_bind_pirq	  2
+#define EVTCHNOP_bind_pirq        2
 struct evtchn_bind_pirq {
-	/* IN parameters. */
-	uint32_t pirq;
+    /* IN parameters. */
+    uint32_t pirq;
 #define BIND_PIRQ__WILL_SHARE 1
-	uint32_t flags; /* BIND_PIRQ__* */
-	/* OUT parameters. */
-	evtchn_port_t port;
+    uint32_t flags; /* BIND_PIRQ__* */
+    /* OUT parameters. */
+    evtchn_port_t port;
 };
+typedef struct evtchn_bind_pirq evtchn_bind_pirq_t;

 /*
  * EVTCHNOP_bind_ipi: Bind a local event channel to receive events.
@@ -87,33 +120,36 @@ struct evtchn_bind_pirq {
  *  1. The allocated event channel is bound to the specified vcpu. The binding
  *     may not be changed.
  */
-#define EVTCHNOP_bind_ipi	  7
+#define EVTCHNOP_bind_ipi         7
 struct evtchn_bind_ipi {
-	uint32_t vcpu;
-	/* OUT parameters. */
-	evtchn_port_t port;
+    uint32_t vcpu;
+    /* OUT parameters. */
+    evtchn_port_t port;
 };
+typedef struct evtchn_bind_ipi evtchn_bind_ipi_t;

 /*
  * EVTCHNOP_close: Close a local event channel <port>. If the channel is
  * interdomain then the remote end is placed in the unbound state
  * (EVTCHNSTAT_unbound), awaiting a new connection.
  */
-#define EVTCHNOP_close		  3
+#define EVTCHNOP_close            3
 struct evtchn_close {
-	/* IN parameters. */
-	evtchn_port_t port;
+    /* IN parameters. */
+    evtchn_port_t port;
 };
+typedef struct evtchn_close evtchn_close_t;

 /*
  * EVTCHNOP_send: Send an event to the remote end of the channel whose local
  * endpoint is <port>.
  */
-#define EVTCHNOP_send		  4
+#define EVTCHNOP_send             4
 struct evtchn_send {
-	/* IN parameters. */
-	evtchn_port_t port;
+    /* IN parameters. */
+    evtchn_port_t port;
 };
+typedef struct evtchn_send evtchn_send_t;

 /*
  * EVTCHNOP_status: Get the current status of the communication channel which
@@ -123,75 +159,98 @@ struct evtchn_send {
  *  2. Only a sufficiently-privileged domain may obtain the status of an event
  *     channel for which <dom> is not DOMID_SELF.
  */
-#define EVTCHNOP_status		  5
+#define EVTCHNOP_status           5
 struct evtchn_status {
-	/* IN parameters */
-	domid_t  dom;
-	evtchn_port_t port;
-	/* OUT parameters */
-#define EVTCHNSTAT_closed	0  /* Channel is not in use.		     */
-#define EVTCHNSTAT_unbound	1  /* Channel is waiting interdom connection.*/
-#define EVTCHNSTAT_interdomain	2  /* Channel is connected to remote domain. */
-#define EVTCHNSTAT_pirq		3  /* Channel is bound to a phys IRQ line.   */
-#define EVTCHNSTAT_virq		4  /* Channel is bound to a virtual IRQ line */
-#define EVTCHNSTAT_ipi		5  /* Channel is bound to a virtual IPI line */
-	uint32_t status;
-	uint32_t vcpu;		   /* VCPU to which this channel is bound.   */
-	union {
-		struct {
-			domid_t dom;
-		} unbound; /* EVTCHNSTAT_unbound */
-		struct {
-			domid_t dom;
-			evtchn_port_t port;
-		} interdomain; /* EVTCHNSTAT_interdomain */
-		uint32_t pirq;	    /* EVTCHNSTAT_pirq	      */
-		uint32_t virq;	    /* EVTCHNSTAT_virq	      */
-	} u;
+    /* IN parameters */
+    domid_t  dom;
+    evtchn_port_t port;
+    /* OUT parameters */
+#define EVTCHNSTAT_closed       0  /* Channel is not in use.                 */
+#define EVTCHNSTAT_unbound      1  /* Channel is waiting interdom connection.*/
+#define EVTCHNSTAT_interdomain  2  /* Channel is connected to remote domain. */
+#define EVTCHNSTAT_pirq         3  /* Channel is bound to a phys IRQ line.   */
+#define EVTCHNSTAT_virq         4  /* Channel is bound to a virtual IRQ line */
+#define EVTCHNSTAT_ipi          5  /* Channel is bound to a virtual IPI line */
+    uint32_t status;
+    uint32_t vcpu;                 /* VCPU to which this channel is bound.   */
+    union {
+        struct {
+            domid_t dom;
+        } unbound; /* EVTCHNSTAT_unbound */
+        struct {
+            domid_t dom;
+            evtchn_port_t port;
+        } interdomain; /* EVTCHNSTAT_interdomain */
+        uint32_t pirq;      /* EVTCHNSTAT_pirq        */
+        uint32_t virq;      /* EVTCHNSTAT_virq        */
+    } u;
 };
+typedef struct evtchn_status evtchn_status_t;

 /*
  * EVTCHNOP_bind_vcpu: Specify which vcpu a channel should notify when an
  * event is pending.
  * NOTES:
- *  1. IPI- and VIRQ-bound channels always notify the vcpu that initialised
- *     the binding. This binding cannot be changed.
- *  2. All other channels notify vcpu0 by default. This default is set when
+ *  1. IPI-bound channels always notify the vcpu specified at bind time.
+ *     This binding cannot be changed.
+ *  2. Per-VCPU VIRQ channels always notify the vcpu specified at bind time.
+ *     This binding cannot be changed.
+ *  3. All other channels notify vcpu0 by default. This default is set when
  *     the channel is allocated (a port that is freed and subsequently reused
  *     has its binding reset to vcpu0).
  */
-#define EVTCHNOP_bind_vcpu	  8
+#define EVTCHNOP_bind_vcpu        8
 struct evtchn_bind_vcpu {
-	/* IN parameters. */
-	evtchn_port_t port;
-	uint32_t vcpu;
+    /* IN parameters. */
+    evtchn_port_t port;
+    uint32_t vcpu;
 };
+typedef struct evtchn_bind_vcpu evtchn_bind_vcpu_t;

 /*
  * EVTCHNOP_unmask: Unmask the specified local event-channel port and deliver
  * a notification to the appropriate VCPU if an event is pending.
  */
-#define EVTCHNOP_unmask		  9
+#define EVTCHNOP_unmask           9
 struct evtchn_unmask {
-	/* IN parameters. */
-	evtchn_port_t port;
+    /* IN parameters. */
+    evtchn_port_t port;
 };
+typedef struct evtchn_unmask evtchn_unmask_t;

+/*
+ * EVTCHNOP_reset: Close all event channels associated with specified domain.
+ * NOTES:
+ *  1. <dom> may be specified as DOMID_SELF.
+ *  2. Only a sufficiently-privileged domain may specify other than DOMID_SELF.
+ */
+#define EVTCHNOP_reset           10
+struct evtchn_reset {
+    /* IN parameters. */
+    domid_t dom;
+};
+typedef struct evtchn_reset evtchn_reset_t;
+
+/*
+ * Argument to event_channel_op_compat() hypercall. Superceded by new
+ * event_channel_op() hypercall since 0x00030202.
+ */
 struct evtchn_op {
-	uint32_t cmd; /* EVTCHNOP_* */
-	union {
-		struct evtchn_alloc_unbound    alloc_unbound;
-		struct evtchn_bind_interdomain bind_interdomain;
-		struct evtchn_bind_virq	       bind_virq;
-		struct evtchn_bind_pirq	       bind_pirq;
-		struct evtchn_bind_ipi	       bind_ipi;
-		struct evtchn_close	       close;
-		struct evtchn_send	       send;
-		struct evtchn_status	       status;
-		struct evtchn_bind_vcpu	       bind_vcpu;
-		struct evtchn_unmask	       unmask;
-	} u;
+    uint32_t cmd; /* EVTCHNOP_* */
+    union {
+        struct evtchn_alloc_unbound    alloc_unbound;
+        struct evtchn_bind_interdomain bind_interdomain;
+        struct evtchn_bind_virq        bind_virq;
+        struct evtchn_bind_pirq        bind_pirq;
+        struct evtchn_bind_ipi         bind_ipi;
+        struct evtchn_close            close;
+        struct evtchn_send             send;
+        struct evtchn_status           status;
+        struct evtchn_bind_vcpu        bind_vcpu;
+        struct evtchn_unmask           unmask;
+    } u;
 };
-DEFINE_GUEST_HANDLE_STRUCT(evtchn_op);
+typedef struct evtchn_op evtchn_op_t;
+DEFINE_XEN_GUEST_HANDLE(evtchn_op_t);

 #endif /* __XEN_PUBLIC_EVENT_CHANNEL_H__ */
--- head-2010-05-12.orig/include/xen/interface/features.h	2010-05-12 08:55:24.000000000 +0200
+++ head-2010-05-12/include/xen/interface/features.h	2010-01-19 16:01:04.000000000 +0100
@@ -3,6 +3,24 @@
  *
  * Feature flags, reported by XENVER_get_features.
  *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
  * Copyright (c) 2006, Keir Fraser <keir@xensource.com>
  */

@@ -41,6 +59,15 @@
 /* x86: Does this Xen host support the MMU_PT_UPDATE_PRESERVE_AD hypercall? */
 #define XENFEAT_mmu_pt_update_preserve_ad  5

+/* x86: Does this Xen host support the MMU_{CLEAR,COPY}_PAGE hypercall? */
+#define XENFEAT_highmem_assist             6
+
+/*
+ * If set, GNTTABOP_map_grant_ref honors flags to be placed into guest kernel
+ * available pte bits.
+ */
+#define XENFEAT_gnttab_map_avail_bits      7
+
 #define XENFEAT_NR_SUBMAPS 1

 #endif /* __XEN_PUBLIC_FEATURES_H__ */
--- head-2010-05-12.orig/include/xen/interface/grant_table.h	2010-05-12 08:55:24.000000000 +0200
+++ head-2010-05-12/include/xen/interface/grant_table.h	2010-01-19 16:01:04.000000000 +0100
@@ -28,6 +28,7 @@
 #ifndef __XEN_PUBLIC_GRANT_TABLE_H__
 #define __XEN_PUBLIC_GRANT_TABLE_H__

+#include "xen.h"

 /***********************************
  * GRANT TABLE REPRESENTATION
@@ -84,12 +85,26 @@
  */

 /*
+ * Reference to a grant entry in a specified domain's grant table.
+ */
+typedef uint32_t grant_ref_t;
+
+/*
  * A grant table comprises a packed array of grant entries in one or more
  * page frames shared between Xen and a guest.
  * [XEN]: This field is written by Xen and read by the sharing guest.
  * [GST]: This field is written by the guest and read by Xen.
  */
-struct grant_entry {
+
+/*
+ * Version 1 of the grant table entry structure is maintained purely
+ * for backwards compatibility.  New guests should use version 2.
+ */
+#if __XEN_INTERFACE_VERSION__ < 0x0003020a
+#define grant_entry_v1 grant_entry
+#define grant_entry_v1_t grant_entry_t
+#endif
+struct grant_entry_v1 {
     /* GTF_xxx: various type and flag information.  [XEN,GST] */
     uint16_t flags;
     /* The domain being granted foreign privileges. [GST] */
@@ -100,6 +115,7 @@ struct grant_entry {
      */
     uint32_t frame;
 };
+typedef struct grant_entry_v1 grant_entry_v1_t;

 /*
  * Type of grant entry.
@@ -107,10 +123,13 @@ struct grant_entry {
  *  GTF_permit_access: Allow @domid to map/access @frame.
  *  GTF_accept_transfer: Allow @domid to transfer ownership of one page frame
  *                       to this guest. Xen writes the page number to @frame.
+ *  GTF_transitive: Allow @domid to transitively access a subrange of
+ *                  @trans_grant in @trans_domid.  No mappings are allowed.
  */
 #define GTF_invalid         (0U<<0)
 #define GTF_permit_access   (1U<<0)
 #define GTF_accept_transfer (2U<<0)
+#define GTF_transitive      (3U<<0)
 #define GTF_type_mask       (3U<<0)

 /*
@@ -118,6 +137,10 @@ struct grant_entry {
  *  GTF_readonly: Restrict @domid to read-only mappings and accesses. [GST]
  *  GTF_reading: Grant entry is currently mapped for reading by @domid. [XEN]
  *  GTF_writing: Grant entry is currently mapped for writing by @domid. [XEN]
+ *  GTF_PAT, GTF_PWT, GTF_PCD: (x86) cache attribute flags for the grant [GST]
+ *  GTF_sub_page: Grant access to only a subrange of the page.  @domid
+ *                will only be allowed to copy from the grant, and not
+ *                map it. [GST]
  */
 #define _GTF_readonly       (2)
 #define GTF_readonly        (1U<<_GTF_readonly)
@@ -125,6 +148,14 @@ struct grant_entry {
 #define GTF_reading         (1U<<_GTF_reading)
 #define _GTF_writing        (4)
 #define GTF_writing         (1U<<_GTF_writing)
+#define _GTF_PWT            (5)
+#define GTF_PWT             (1U<<_GTF_PWT)
+#define _GTF_PCD            (6)
+#define GTF_PCD             (1U<<_GTF_PCD)
+#define _GTF_PAT            (7)
+#define GTF_PAT             (1U<<_GTF_PAT)
+#define _GTF_sub_page       (8)
+#define GTF_sub_page        (1U<<_GTF_sub_page)

 /*
  * Subflags for GTF_accept_transfer:
@@ -141,15 +172,87 @@ struct grant_entry {
 #define _GTF_transfer_completed (3)
 #define GTF_transfer_completed  (1U<<_GTF_transfer_completed)

-
-/***********************************
- * GRANT TABLE QUERIES AND USES
+/*
+ * Version 2 grant table entries.  These fulfil the same role as
+ * version 1 entries, but can represent more complicated operations.
+ * Any given domain will have either a version 1 or a version 2 table,
+ * and every entry in the table will be the same version.
+ *
+ * The interface by which domains use grant references does not depend
+ * on the grant table version in use by the other domain.
  */
+#if __XEN_INTERFACE_VERSION__ >= 0x0003020a
+/*
+ * Version 1 and version 2 grant entries share a common prefix.  The
+ * fields of the prefix are documented as part of struct
+ * grant_entry_v1.
+ */
+struct grant_entry_header {
+    uint16_t flags;
+    domid_t  domid;
+};
+typedef struct grant_entry_header grant_entry_header_t;

 /*
- * Reference to a grant entry in a specified domain's grant table.
+ * Version 2 of the grant entry structure.
+ */
+union grant_entry_v2 {
+    grant_entry_header_t hdr;
+
+    /*
+     * This member is used for V1-style full page grants, where either:
+     *
+     * -- hdr.type is GTF_accept_transfer, or
+     * -- hdr.type is GTF_permit_access and GTF_sub_page is not set.
+     *
+     * In that case, the frame field has the same semantics as the
+     * field of the same name in the V1 entry structure.
+     */
+    struct {
+        grant_entry_header_t hdr;
+        uint32_t pad0;
+        uint64_t frame;
+    } full_page;
+
+    /*
+     * If the grant type is GTF_grant_access and GTF_sub_page is set,
+     * @domid is allowed to access bytes [@page_off,@page_off+@length)
+     * in frame @frame.
+     */
+    struct {
+        grant_entry_header_t hdr;
+        uint16_t page_off;
+        uint16_t length;
+        uint64_t frame;
+    } sub_page;
+
+    /*
+     * If the grant is GTF_transitive, @domid is allowed to use the
+     * grant @gref in domain @trans_domid, as if it was the local
+     * domain.  Obviously, the transitive access must be compatible
+     * with the original grant.
+     *
+     * The current version of Xen does not allow transitive grants
+     * to be mapped.
+     */
+    struct {
+        grant_entry_header_t hdr;
+        domid_t trans_domid;
+        uint16_t pad0;
+        grant_ref_t gref;
+    } transitive;
+
+    uint32_t __spacer[4]; /* Pad to a power of two */
+};
+typedef union grant_entry_v2 grant_entry_v2_t;
+
+typedef uint16_t grant_status_t;
+
+#endif /* __XEN_INTERFACE_VERSION__ */
+
+/***********************************
+ * GRANT TABLE QUERIES AND USES
  */
-typedef uint32_t grant_ref_t;

 /*
  * Handle to track a mapping created via a grant reference.
@@ -185,7 +288,8 @@ struct gnttab_map_grant_ref {
     grant_handle_t handle;
     uint64_t dev_bus_addr;
 };
-DEFINE_GUEST_HANDLE_STRUCT(gnttab_map_grant_ref);
+typedef struct gnttab_map_grant_ref gnttab_map_grant_ref_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_map_grant_ref_t);

 /*
  * GNTTABOP_unmap_grant_ref: Destroy one or more grant-reference mappings
@@ -207,7 +311,8 @@ struct gnttab_unmap_grant_ref {
     /* OUT parameters. */
     int16_t  status;              /* GNTST_* */
 };
-DEFINE_GUEST_HANDLE_STRUCT(gnttab_unmap_grant_ref);
+typedef struct gnttab_unmap_grant_ref gnttab_unmap_grant_ref_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_grant_ref_t);

 /*
  * GNTTABOP_setup_table: Set up a grant table for <dom> comprising at least
@@ -225,9 +330,10 @@ struct gnttab_setup_table {
     uint32_t nr_frames;
     /* OUT parameters. */
     int16_t  status;              /* GNTST_* */
-    GUEST_HANDLE(ulong) frame_list;
+    XEN_GUEST_HANDLE(ulong) frame_list;
 };
-DEFINE_GUEST_HANDLE_STRUCT(gnttab_setup_table);
+typedef struct gnttab_setup_table gnttab_setup_table_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_setup_table_t);

 /*
  * GNTTABOP_dump_table: Dump the contents of the grant table to the
@@ -240,7 +346,8 @@ struct gnttab_dump_table {
     /* OUT parameters. */
     int16_t status;               /* GNTST_* */
 };
-DEFINE_GUEST_HANDLE_STRUCT(gnttab_dump_table);
+typedef struct gnttab_dump_table gnttab_dump_table_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_dump_table_t);

 /*
  * GNTTABOP_transfer_grant_ref: Transfer <frame> to a foreign domain. The
@@ -253,13 +360,15 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_dump_t
 #define GNTTABOP_transfer                4
 struct gnttab_transfer {
     /* IN parameters. */
-    unsigned long mfn;
+    xen_pfn_t     mfn;
     domid_t       domid;
     grant_ref_t   ref;
     /* OUT parameters. */
     int16_t       status;
 };
-DEFINE_GUEST_HANDLE_STRUCT(gnttab_transfer);
+typedef struct gnttab_transfer gnttab_transfer_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_transfer_t);
+

 /*
  * GNTTABOP_copy: Hypervisor based copy
@@ -283,24 +392,26 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_transf
 #define GNTCOPY_source_gref       (1<<_GNTCOPY_source_gref)
 #define _GNTCOPY_dest_gref        (1)
 #define GNTCOPY_dest_gref         (1<<_GNTCOPY_dest_gref)
+#define _GNTCOPY_can_fail         (2)
+#define GNTCOPY_can_fail          (1<<_GNTCOPY_can_fail)

 #define GNTTABOP_copy                 5
-struct gnttab_copy {
-	/* IN parameters. */
-	struct {
-		union {
-			grant_ref_t ref;
-			unsigned long   gmfn;
-		} u;
-		domid_t  domid;
-		uint16_t offset;
-	} source, dest;
-	uint16_t      len;
-	uint16_t      flags;          /* GNTCOPY_* */
-	/* OUT parameters. */
-	int16_t       status;
-};
-DEFINE_GUEST_HANDLE_STRUCT(gnttab_copy);
+typedef struct gnttab_copy {
+    /* IN parameters. */
+    struct {
+        union {
+            grant_ref_t ref;
+            xen_pfn_t   gmfn;
+        } u;
+        domid_t  domid;
+        uint16_t offset;
+    } source, dest;
+    uint16_t      len;
+    uint16_t      flags;          /* GNTCOPY_* */
+    /* OUT parameters. */
+    int16_t       status;
+} gnttab_copy_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_copy_t);

 /*
  * GNTTABOP_query_size: Query the current and maximum sizes of the shared
@@ -318,10 +429,92 @@ struct gnttab_query_size {
     uint32_t max_nr_frames;
     int16_t  status;              /* GNTST_* */
 };
-DEFINE_GUEST_HANDLE_STRUCT(gnttab_query_size);
+typedef struct gnttab_query_size gnttab_query_size_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_query_size_t);
+
+/*
+ * GNTTABOP_unmap_and_replace: Destroy one or more grant-reference mappings
+ * tracked by <handle> but atomically replace the page table entry with one
+ * pointing to the machine address under <new_addr>.  <new_addr> will be
+ * redirected to the null entry.
+ * NOTES:
+ *  1. The call may fail in an undefined manner if either mapping is not
+ *     tracked by <handle>.
+ *  2. After executing a batch of unmaps, it is guaranteed that no stale
+ *     mappings will remain in the device or host TLBs.
+ */
+#define GNTTABOP_unmap_and_replace    7
+struct gnttab_unmap_and_replace {
+    /* IN parameters. */
+    uint64_t host_addr;
+    uint64_t new_addr;
+    grant_handle_t handle;
+    /* OUT parameters. */
+    int16_t  status;              /* GNTST_* */
+};
+typedef struct gnttab_unmap_and_replace gnttab_unmap_and_replace_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_and_replace_t);
+
+#if __XEN_INTERFACE_VERSION__ >= 0x0003020a
+/*
+ * GNTTABOP_set_version: Request a particular version of the grant
+ * table shared table structure.  This operation can only be performed
+ * once in any given domain.  It must be performed before any grants
+ * are activated; otherwise, the domain will be stuck with version 1.
+ * The only defined versions are 1 and 2.
+ */
+#define GNTTABOP_set_version          8
+struct gnttab_set_version {
+    /* IN/OUT parameters */
+    uint32_t version;
+};
+typedef struct gnttab_set_version gnttab_set_version_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_set_version_t);
+
+
+/*
+ * GNTTABOP_get_status_frames: Get the list of frames used to store grant
+ * status for <dom>. In grant format version 2, the status is separated
+ * from the other shared grant fields to allow more efficient synchronization
+ * using barriers instead of atomic cmpexch operations.
+ * <nr_frames> specify the size of vector <frame_list>.
+ * The frame addresses are returned in the <frame_list>.
+ * Only <nr_frames> addresses are returned, even if the table is larger.
+ * NOTES:
+ *  1. <dom> may be specified as DOMID_SELF.
+ *  2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF.
+ */
+#define GNTTABOP_get_status_frames     9
+struct gnttab_get_status_frames {
+    /* IN parameters. */
+    uint32_t nr_frames;
+    domid_t  dom;
+    /* OUT parameters. */
+    int16_t  status;              /* GNTST_* */
+    XEN_GUEST_HANDLE(uint64_t) frame_list;
+};
+typedef struct gnttab_get_status_frames gnttab_get_status_frames_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_get_status_frames_t);

 /*
- * Bitfield values for update_pin_status.flags.
+ * GNTTABOP_get_version: Get the grant table version which is in
+ * effect for domain <dom>.
+ */
+#define GNTTABOP_get_version          10
+struct gnttab_get_version {
+    /* IN parameters */
+    domid_t dom;
+    uint16_t pad;
+    /* OUT parameters */
+    uint32_t version;
+};
+typedef struct gnttab_get_version gnttab_get_version_t;
+DEFINE_XEN_GUEST_HANDLE(gnttab_get_version_t);
+
+#endif /* __XEN_INTERFACE_VERSION__ */
+
+/*
+ * Bitfield values for gnttab_map_grant_ref.flags.
  */
  /* Map the grant entry for access by I/O devices. */
 #define _GNTMAP_device_map      (0)
@@ -348,6 +541,16 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_query_
 #define _GNTMAP_contains_pte    (4)
 #define GNTMAP_contains_pte     (1<<_GNTMAP_contains_pte)

+#define _GNTMAP_can_fail        (5)
+#define GNTMAP_can_fail         (1<<_GNTMAP_can_fail)
+
+/*
+ * Bits to be placed in guest kernel available PTE bits (architecture
+ * dependent; only supported when XENFEAT_gnttab_map_avail_bits is set).
+ */
+#define _GNTMAP_guest_avail0    (16)
+#define GNTMAP_guest_avail_mask ((uint32_t)~0 << _GNTMAP_guest_avail0)
+
 /*
  * Values for error status returns. All errors are -ve.
  */
@@ -361,7 +564,9 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_query_
 #define GNTST_no_device_space  (-7) /* Out of space in I/O MMU.              */
 #define GNTST_permission_denied (-8) /* Not enough privilege for operation.  */
 #define GNTST_bad_page         (-9) /* Specified page was invalid for op.    */
-#define GNTST_bad_copy_arg    (-10) /* copy arguments cross page boundary */
+#define GNTST_bad_copy_arg    (-10) /* copy arguments cross page boundary.   */
+#define GNTST_address_too_big (-11) /* transfer page address too large.      */
+#define GNTST_eagain          (-12) /* Could not map at the moment. Retry.   */

 #define GNTTABOP_error_msgs {                   \
     "okay",                                     \
@@ -374,7 +579,9 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_query_
     "no spare translation slot in the I/O MMU", \
     "permission denied",                        \
     "bad page",                                 \
-    "copy arguments cross page boundary"        \
+    "copy arguments cross page boundary",       \
+    "page address size too large",              \
+    "could not map at the moment, retry"        \
 }

 #endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */
--- head-2010-05-12.orig/include/xen/interface/io/blkif.h	2010-05-12 08:55:24.000000000 +0200
+++ head-2010-05-12/include/xen/interface/io/blkif.h	2010-01-19 16:01:04.000000000 +0100
@@ -3,6 +3,24 @@
  *
  * Unified block-device I/O interface for Xen guest OSes.
  *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
  * Copyright (c) 2003-2004, Keir Fraser
  */

@@ -24,8 +42,10 @@
  * rsp_event appropriately (e.g., using RING_FINAL_CHECK_FOR_RESPONSES()).
  */

-typedef uint16_t blkif_vdev_t;
-typedef uint64_t blkif_sector_t;
+#ifndef blkif_vdev_t
+#define blkif_vdev_t   uint16_t
+#endif
+#define blkif_sector_t uint64_t

 /*
  * REQUEST CODES.
@@ -34,7 +54,7 @@ typedef uint64_t blkif_sector_t;
 #define BLKIF_OP_WRITE             1
 /*
  * Recognised only if "feature-barrier" is present in backend xenbus info.
- * The "feature_barrier" node contains a boolean indicating whether barrier
+ * The "feature-barrier" node contains a boolean indicating whether barrier
  * requests are likely to succeed or fail. Either way, a barrier request
  * may fail at any time with BLKIF_RSP_EOPNOTSUPP if it is unsupported by
  * the underlying block-device hardware. The boolean simply indicates whether
@@ -43,33 +63,57 @@ typedef uint64_t blkif_sector_t;
  * create the "feature-barrier" node!
  */
 #define BLKIF_OP_WRITE_BARRIER     2
+/*
+ * Recognised if "feature-flush-cache" is present in backend xenbus
+ * info.  A flush will ask the underlying storage hardware to flush its
+ * non-volatile caches as appropriate.  The "feature-flush-cache" node
+ * contains a boolean indicating whether flush requests are likely to
+ * succeed or fail. Either way, a flush request may fail at any time
+ * with BLKIF_RSP_EOPNOTSUPP if it is unsupported by the underlying
+ * block-device hardware. The boolean simply indicates whether or not it
+ * is worthwhile for the frontend to attempt flushes.  If a backend does
+ * not recognise BLKIF_OP_WRITE_FLUSH_CACHE, it should *not* create the
+ * "feature-flush-cache" node!
+ */
+#define BLKIF_OP_FLUSH_DISKCACHE   3

 /*
  * Maximum scatter/gather segments per request.
- * This is carefully chosen so that sizeof(struct blkif_ring) <= PAGE_SIZE.
+ * This is carefully chosen so that sizeof(blkif_ring_t) <= PAGE_SIZE.
  * NB. This could be 12 if the ring indexes weren't stored in the same page.
  */
 #define BLKIF_MAX_SEGMENTS_PER_REQUEST 11

+/*
+ * NB. first_sect and last_sect in blkif_request_segment, as well as
+ * sector_number in blkif_request, are always expressed in 512-byte units.
+ * However they must be properly aligned to the real sector size of the
+ * physical disk, which is reported in the "sector-size" node in the backend
+ * xenbus info. Also the xenbus "sectors" node is expressed in 512-byte units.
+ */
+struct blkif_request_segment {
+    grant_ref_t gref;        /* reference to I/O buffer frame        */
+    /* @first_sect: first sector in frame to transfer (inclusive).   */
+    /* @last_sect: last sector in frame to transfer (inclusive).     */
+    uint8_t     first_sect, last_sect;
+};
+
 struct blkif_request {
-	uint8_t        operation;    /* BLKIF_OP_???                         */
-	uint8_t        nr_segments;  /* number of segments                   */
-	blkif_vdev_t   handle;       /* only for read/write requests         */
-	uint64_t       id;           /* private guest value, echoed in resp  */
-	blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
-	struct blkif_request_segment {
-		grant_ref_t gref;        /* reference to I/O buffer frame        */
-		/* @first_sect: first sector in frame to transfer (inclusive).   */
-		/* @last_sect: last sector in frame to transfer (inclusive).     */
-		uint8_t     first_sect, last_sect;
-	} seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
+    uint8_t        operation;    /* BLKIF_OP_???                         */
+    uint8_t        nr_segments;  /* number of segments                   */
+    blkif_vdev_t   handle;       /* only for read/write requests         */
+    uint64_t       id;           /* private guest value, echoed in resp  */
+    blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
+    struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
 };
+typedef struct blkif_request blkif_request_t;

 struct blkif_response {
-	uint64_t        id;              /* copied from request */
-	uint8_t         operation;       /* copied from request */
-	int16_t         status;          /* BLKIF_RSP_???       */
+    uint64_t        id;              /* copied from request */
+    uint8_t         operation;       /* copied from request */
+    int16_t         status;          /* BLKIF_RSP_???       */
 };
+typedef struct blkif_response blkif_response_t;

 /*
  * STATUS RETURN CODES.
--- head-2010-05-12.orig/include/xen/interface/io/console.h	2010-05-12 08:55:24.000000000 +0200
+++ head-2010-05-12/include/xen/interface/io/console.h	2010-01-19 16:01:04.000000000 +0100
@@ -3,6 +3,24 @@
  *
  * Console I/O interface for Xen guest OSes.
  *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
  * Copyright (c) 2005, Keir Fraser
  */

--- head-2010-05-12.orig/include/xen/interface/io/fbif.h	2010-05-12 08:55:24.000000000 +0200
+++ head-2010-05-12/include/xen/interface/io/fbif.h	2010-01-19 16:01:04.000000000 +0100
@@ -41,12 +41,13 @@
  */
 #define XENFB_TYPE_UPDATE 2

-struct xenfb_update {
-	uint8_t type;		/* XENFB_TYPE_UPDATE */
-	int32_t x;		/* source x */
-	int32_t y;		/* source y */
-	int32_t width;		/* rect width */
-	int32_t height;		/* rect height */
+struct xenfb_update
+{
+    uint8_t type;    /* XENFB_TYPE_UPDATE */
+    int32_t x;      /* source x */
+    int32_t y;      /* source y */
+    int32_t width;  /* rect width */
+    int32_t height; /* rect height */
 };

 /*
@@ -55,36 +56,58 @@ struct xenfb_update {
  */
 #define XENFB_TYPE_RESIZE 3

-struct xenfb_resize {
-	uint8_t type;		/* XENFB_TYPE_RESIZE */
-	int32_t width;		/* width in pixels */
-	int32_t height;		/* height in pixels */
-	int32_t stride;		/* stride in bytes */
-	int32_t depth;		/* depth in bits */
-	int32_t offset;		/* start offset within framebuffer */
+struct xenfb_resize
+{
+    uint8_t type;    /* XENFB_TYPE_RESIZE */
+    int32_t width;   /* width in pixels */
+    int32_t height;  /* height in pixels */
+    int32_t stride;  /* stride in bytes */
+    int32_t depth;   /* depth in bits */
+    int32_t offset;  /* offset of the framebuffer in bytes */
 };

 #define XENFB_OUT_EVENT_SIZE 40

-union xenfb_out_event {
-	uint8_t type;
-	struct xenfb_update update;
-	struct xenfb_resize resize;
-	char pad[XENFB_OUT_EVENT_SIZE];
+union xenfb_out_event
+{
+    uint8_t type;
+    struct xenfb_update update;
+    struct xenfb_resize resize;
+    char pad[XENFB_OUT_EVENT_SIZE];
 };

 /* In events (backend -> frontend) */

 /*
  * Frontends should ignore unknown in events.
- * No in events currently defined.
  */

+/*
+ * Framebuffer refresh period advice
+ * Backend sends it to advise the frontend their preferred period of
+ * refresh.  Frontends that keep the framebuffer constantly up-to-date
+ * just ignore it.  Frontends that use the advice should immediately
+ * refresh the framebuffer (and send an update notification event if
+ * those have been requested), then use the update frequency to guide
+ * their periodical refreshs.
+ */
+#define XENFB_TYPE_REFRESH_PERIOD 1
+#define XENFB_NO_REFRESH 0
+
+struct xenfb_refresh_period
+{
+    uint8_t type;    /* XENFB_TYPE_UPDATE_PERIOD */
+    uint32_t period; /* period of refresh, in ms,
+                      * XENFB_NO_REFRESH if no refresh is needed */
+};
+
 #define XENFB_IN_EVENT_SIZE 40

-union xenfb_in_event {
-	uint8_t type;
-	char pad[XENFB_IN_EVENT_SIZE];
+union xenfb_in_event
+{
+    uint8_t type;
+    struct xenfb_refresh_period refresh_period;
+    char pad[XENFB_IN_EVENT_SIZE];
 };

 /* shared page */
@@ -93,41 +116,41 @@ union xenfb_in_event {
 #define XENFB_IN_RING_LEN (XENFB_IN_RING_SIZE / XENFB_IN_EVENT_SIZE)
 #define XENFB_IN_RING_OFFS 1024
 #define XENFB_IN_RING(page) \
-	((union xenfb_in_event *)((char *)(page) + XENFB_IN_RING_OFFS))
+    ((union xenfb_in_event *)((char *)(page) + XENFB_IN_RING_OFFS))
 #define XENFB_IN_RING_REF(page, idx) \
-	(XENFB_IN_RING((page))[(idx) % XENFB_IN_RING_LEN])
+    (XENFB_IN_RING((page))[(idx) % XENFB_IN_RING_LEN])

 #define XENFB_OUT_RING_SIZE 2048
 #define XENFB_OUT_RING_LEN (XENFB_OUT_RING_SIZE / XENFB_OUT_EVENT_SIZE)
 #define XENFB_OUT_RING_OFFS (XENFB_IN_RING_OFFS + XENFB_IN_RING_SIZE)
 #define XENFB_OUT_RING(page) \
-	((union xenfb_out_event *)((char *)(page) + XENFB_OUT_RING_OFFS))
+    ((union xenfb_out_event *)((char *)(page) + XENFB_OUT_RING_OFFS))
 #define XENFB_OUT_RING_REF(page, idx) \
-	(XENFB_OUT_RING((page))[(idx) % XENFB_OUT_RING_LEN])
+    (XENFB_OUT_RING((page))[(idx) % XENFB_OUT_RING_LEN])

-struct xenfb_page {
-	uint32_t in_cons, in_prod;
-	uint32_t out_cons, out_prod;
-
-	int32_t width;          /* width of the framebuffer (in pixels) */
-	int32_t height;         /* height of the framebuffer (in pixels) */
-	uint32_t line_length;   /* length of a row of pixels (in bytes) */
-	uint32_t mem_length;    /* length of the framebuffer (in bytes) */
-	uint8_t depth;          /* depth of a pixel (in bits) */
-
-	/*
-	 * Framebuffer page directory
-	 *
-	 * Each directory page holds PAGE_SIZE / sizeof(*pd)
-	 * framebuffer pages, and can thus map up to PAGE_SIZE *
-	 * PAGE_SIZE / sizeof(*pd) bytes.  With PAGE_SIZE == 4096 and
-	 * sizeof(unsigned long) == 4/8, that's 4 Megs 32 bit and 2
-	 * Megs 64 bit.  256 directories give enough room for a 512
-	 * Meg framebuffer with a max resolution of 12,800x10,240.
-	 * Should be enough for a while with room leftover for
-	 * expansion.
-	 */
-	unsigned long pd[256];
+struct xenfb_page
+{
+    uint32_t in_cons, in_prod;
+    uint32_t out_cons, out_prod;
+
+    int32_t width;          /* the width of the framebuffer (in pixels) */
+    int32_t height;         /* the height of the framebuffer (in pixels) */
+    uint32_t line_length;   /* the length of a row of pixels (in bytes) */
+    uint32_t mem_length;    /* the length of the framebuffer (in bytes) */
+    uint8_t depth;          /* the depth of a pixel (in bits) */
+
+    /*
+     * Framebuffer page directory
+     *
+     * Each directory page holds PAGE_SIZE / sizeof(*pd)
+     * framebuffer pages, and can thus map up to PAGE_SIZE *
+     * PAGE_SIZE / sizeof(*pd) bytes.  With PAGE_SIZE == 4096 and
+     * sizeof(unsigned long) == 4/8, that's 4 Megs 32 bit and 2 Megs
+     * 64 bit.  256 directories give enough room for a 512 Meg
+     * framebuffer with a max resolution of 12,800x10,240.  Should
+     * be enough for a while with room leftover for expansion.
+     */
+    unsigned long pd[256];
 };

 /*
--- head-2010-05-12.orig/include/xen/interface/io/kbdif.h	2010-05-12 08:55:24.000000000 +0200
+++ head-2010-05-12/include/xen/interface/io/kbdif.h	2010-01-19 16:01:04.000000000 +0100
@@ -45,34 +45,38 @@
  */
 #define XENKBD_TYPE_POS     4

-struct xenkbd_motion {
-	uint8_t type;		/* XENKBD_TYPE_MOTION */
-	int32_t rel_x;		/* relative X motion */
-	int32_t rel_y;		/* relative Y motion */
-	int32_t rel_z;		/* relative Z motion (wheel) */
-};
-
-struct xenkbd_key {
-	uint8_t type;		/* XENKBD_TYPE_KEY */
-	uint8_t pressed;	/* 1 if pressed; 0 otherwise */
-	uint32_t keycode;	/* KEY_* from linux/input.h */
-};
-
-struct xenkbd_position {
-	uint8_t type;		/* XENKBD_TYPE_POS */
-	int32_t abs_x;		/* absolute X position (in FB pixels) */
-	int32_t abs_y;		/* absolute Y position (in FB pixels) */
-	int32_t rel_z;		/* relative Z motion (wheel) */
+struct xenkbd_motion
+{
+    uint8_t type;        /* XENKBD_TYPE_MOTION */
+    int32_t rel_x;       /* relative X motion */
+    int32_t rel_y;       /* relative Y motion */
+    int32_t rel_z;       /* relative Z motion (wheel) */
+};
+
+struct xenkbd_key
+{
+    uint8_t type;         /* XENKBD_TYPE_KEY */
+    uint8_t pressed;      /* 1 if pressed; 0 otherwise */
+    uint32_t keycode;     /* KEY_* from linux/input.h */
+};
+
+struct xenkbd_position
+{
+    uint8_t type;        /* XENKBD_TYPE_POS */
+    int32_t abs_x;       /* absolute X position (in FB pixels) */
+    int32_t abs_y;       /* absolute Y position (in FB pixels) */
+    int32_t rel_z;       /* relative Z motion (wheel) */
 };

 #define XENKBD_IN_EVENT_SIZE 40

-union xenkbd_in_event {
-	uint8_t type;
-	struct xenkbd_motion motion;
-	struct xenkbd_key key;
-	struct xenkbd_position pos;
-	char pad[XENKBD_IN_EVENT_SIZE];
+union xenkbd_in_event
+{
+    uint8_t type;
+    struct xenkbd_motion motion;
+    struct xenkbd_key key;
+    struct xenkbd_position pos;
+    char pad[XENKBD_IN_EVENT_SIZE];
 };

 /* Out events (frontend -> backend) */
@@ -85,9 +89,10 @@ union xenkbd_in_event {

 #define XENKBD_OUT_EVENT_SIZE 40

-union xenkbd_out_event {
-	uint8_t type;
-	char pad[XENKBD_OUT_EVENT_SIZE];
+union xenkbd_out_event
+{
+    uint8_t type;
+    char pad[XENKBD_OUT_EVENT_SIZE];
 };

 /* shared page */
@@ -96,21 +101,22 @@ union xenkbd_out_event {
 #define XENKBD_IN_RING_LEN (XENKBD_IN_RING_SIZE / XENKBD_IN_EVENT_SIZE)
 #define XENKBD_IN_RING_OFFS 1024
 #define XENKBD_IN_RING(page) \
-	((union xenkbd_in_event *)((char *)(page) + XENKBD_IN_RING_OFFS))
+    ((union xenkbd_in_event *)((char *)(page) + XENKBD_IN_RING_OFFS))
 #define XENKBD_IN_RING_REF(page, idx) \
-	(XENKBD_IN_RING((page))[(idx) % XENKBD_IN_RING_LEN])
+    (XENKBD_IN_RING((page))[(idx) % XENKBD_IN_RING_LEN])

 #define XENKBD_OUT_RING_SIZE 1024
 #define XENKBD_OUT_RING_LEN (XENKBD_OUT_RING_SIZE / XENKBD_OUT_EVENT_SIZE)
 #define XENKBD_OUT_RING_OFFS (XENKBD_IN_RING_OFFS + XENKBD_IN_RING_SIZE)
 #define XENKBD_OUT_RING(page) \
-	((union xenkbd_out_event *)((char *)(page) + XENKBD_OUT_RING_OFFS))
+    ((union xenkbd_out_event *)((char *)(page) + XENKBD_OUT_RING_OFFS))
 #define XENKBD_OUT_RING_REF(page, idx) \
-	(XENKBD_OUT_RING((page))[(idx) % XENKBD_OUT_RING_LEN])
+    (XENKBD_OUT_RING((page))[(idx) % XENKBD_OUT_RING_LEN])

-struct xenkbd_page {
-	uint32_t in_cons, in_prod;
-	uint32_t out_cons, out_prod;
+struct xenkbd_page
+{
+    uint32_t in_cons, in_prod;
+    uint32_t out_cons, out_prod;
 };

 #endif
--- head-2010-05-12.orig/include/xen/interface/io/netif.h	2010-05-12 08:55:24.000000000 +0200
+++ head-2010-05-12/include/xen/interface/io/netif.h	2010-01-19 16:01:04.000000000 +0100
@@ -3,6 +3,24 @@
  *
  * Unified network-device I/O interface for Xen guest OSes.
  *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
  * Copyright (c) 2003-2004, Keir Fraser
  */

@@ -47,18 +65,21 @@
 #define _NETTXF_extra_info     (3)
 #define  NETTXF_extra_info     (1U<<_NETTXF_extra_info)

-struct xen_netif_tx_request {
+struct netif_tx_request {
     grant_ref_t gref;      /* Reference to buffer page */
     uint16_t offset;       /* Offset within buffer page */
     uint16_t flags;        /* NETTXF_* */
     uint16_t id;           /* Echoed in response message. */
     uint16_t size;         /* Packet size in bytes.       */
 };
+typedef struct netif_tx_request netif_tx_request_t;

 /* Types of netif_extra_info descriptors. */
-#define XEN_NETIF_EXTRA_TYPE_NONE  (0)  /* Never used - invalid */
-#define XEN_NETIF_EXTRA_TYPE_GSO   (1)  /* u.gso */
-#define XEN_NETIF_EXTRA_TYPE_MAX   (2)
+#define XEN_NETIF_EXTRA_TYPE_NONE      (0)  /* Never used - invalid */
+#define XEN_NETIF_EXTRA_TYPE_GSO       (1)  /* u.gso */
+#define XEN_NETIF_EXTRA_TYPE_MCAST_ADD (2)  /* u.mcast */
+#define XEN_NETIF_EXTRA_TYPE_MCAST_DEL (3)  /* u.mcast */
+#define XEN_NETIF_EXTRA_TYPE_MAX       (4)

 /* netif_extra_info flags. */
 #define _XEN_NETIF_EXTRA_FLAG_MORE (0)
@@ -71,49 +92,68 @@ struct xen_netif_tx_request {
  * This structure needs to fit within both netif_tx_request and
  * netif_rx_response for compatibility.
  */
-struct xen_netif_extra_info {
-	uint8_t type;  /* XEN_NETIF_EXTRA_TYPE_* */
-	uint8_t flags; /* XEN_NETIF_EXTRA_FLAG_* */
-
-	union {
-		struct {
-			/*
-			 * Maximum payload size of each segment. For
-			 * example, for TCP this is just the path MSS.
-			 */
-			uint16_t size;
-
-			/*
-			 * GSO type. This determines the protocol of
-			 * the packet and any extra features required
-			 * to segment the packet properly.
-			 */
-			uint8_t type; /* XEN_NETIF_GSO_TYPE_* */
-
-			/* Future expansion. */
-			uint8_t pad;
-
-			/*
-			 * GSO features. This specifies any extra GSO
-			 * features required to process this packet,
-			 * such as ECN support for TCPv4.
-			 */
-			uint16_t features; /* XEN_NETIF_GSO_FEAT_* */
-		} gso;
+struct netif_extra_info {
+    uint8_t type;  /* XEN_NETIF_EXTRA_TYPE_* */
+    uint8_t flags; /* XEN_NETIF_EXTRA_FLAG_* */
+
+    union {
+        /*
+         * XEN_NETIF_EXTRA_TYPE_GSO:
+         */
+        struct {
+            /*
+             * Maximum payload size of each segment. For example, for TCP this
+             * is just the path MSS.
+             */
+            uint16_t size;
+
+            /*
+             * GSO type. This determines the protocol of the packet and any
+             * extra features required to segment the packet properly.
+             */
+            uint8_t type; /* XEN_NETIF_GSO_TYPE_* */
+
+            /* Future expansion. */
+            uint8_t pad;
+
+            /*
+             * GSO features. This specifies any extra GSO features required
+             * to process this packet, such as ECN support for TCPv4.
+             */
+            uint16_t features; /* XEN_NETIF_GSO_FEAT_* */
+        } gso;
+
+        /*
+         * XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL}:
+         * Backend advertises availability via 'feature-multicast-control'
+         * xenbus node containing value '1'.
+         * Frontend requests this feature by advertising
+         * 'request-multicast-control' xenbus node containing value '1'.
+         * If multicast control is requested then multicast flooding is
+         * disabled and the frontend must explicitly register its interest
+         * in multicast groups using dummy transmit requests containing
+         * MCAST_{ADD,DEL} extra-info fragments.
+         */
+        struct {
+            uint8_t addr[6]; /* Address to add/remove. */
+        } mcast;

-		uint16_t pad[3];
-	} u;
+        uint16_t pad[3];
+    } u;
 };
+typedef struct netif_extra_info netif_extra_info_t;

-struct xen_netif_tx_response {
-	uint16_t id;
-	int16_t  status;       /* NETIF_RSP_* */
+struct netif_tx_response {
+    uint16_t id;
+    int16_t  status;       /* NETIF_RSP_* */
 };
+typedef struct netif_tx_response netif_tx_response_t;

-struct xen_netif_rx_request {
-	uint16_t    id;        /* Echoed in response message.        */
-	grant_ref_t gref;      /* Reference to incoming granted frame */
+struct netif_rx_request {
+    uint16_t    id;        /* Echoed in response message.        */
+    grant_ref_t gref;      /* Reference to incoming granted frame */
 };
+typedef struct netif_rx_request netif_rx_request_t;

 /* Packet data has been validated against protocol checksum. */
 #define _NETRXF_data_validated (0)
@@ -131,23 +171,20 @@ struct xen_netif_rx_request {
 #define _NETRXF_extra_info     (3)
 #define  NETRXF_extra_info     (1U<<_NETRXF_extra_info)

-struct xen_netif_rx_response {
+struct netif_rx_response {
     uint16_t id;
     uint16_t offset;       /* Offset in page of start of received packet  */
     uint16_t flags;        /* NETRXF_* */
     int16_t  status;       /* -ve: BLKIF_RSP_* ; +ve: Rx'ed pkt size. */
 };
+typedef struct netif_rx_response netif_rx_response_t;

 /*
  * Generate netif ring structures and types.
  */

-DEFINE_RING_TYPES(xen_netif_tx,
-		  struct xen_netif_tx_request,
-		  struct xen_netif_tx_response);
-DEFINE_RING_TYPES(xen_netif_rx,
-		  struct xen_netif_rx_request,
-		  struct xen_netif_rx_response);
+DEFINE_RING_TYPES(netif_tx, struct netif_tx_request, struct netif_tx_response);
+DEFINE_RING_TYPES(netif_rx, struct netif_rx_request, struct netif_rx_response);

 #define NETIF_RSP_DROPPED         -2
 #define NETIF_RSP_ERROR           -1
--- head-2010-05-12.orig/include/xen/interface/io/protocols.h	2010-05-12 08:55:24.000000000 +0200
+++ head-2010-05-12/include/xen/interface/io/protocols.h	2010-01-19 16:01:04.000000000 +0100
@@ -1,10 +1,31 @@
+/******************************************************************************
+ * protocols.h
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
 #ifndef __XEN_PROTOCOLS_H__
 #define __XEN_PROTOCOLS_H__

 #define XEN_IO_PROTO_ABI_X86_32     "x86_32-abi"
 #define XEN_IO_PROTO_ABI_X86_64     "x86_64-abi"
 #define XEN_IO_PROTO_ABI_IA64       "ia64-abi"
-#define XEN_IO_PROTO_ABI_POWERPC64  "powerpc64-abi"

 #if defined(__i386__)
 # define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_32
@@ -12,8 +33,6 @@
 # define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_64
 #elif defined(__ia64__)
 # define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_IA64
-#elif defined(__powerpc64__)
-# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_POWERPC64
 #else
 # error arch fixup needed here
 #endif
--- head-2010-05-12.orig/include/xen/interface/io/ring.h	2010-05-12 08:55:24.000000000 +0200
+++ head-2010-05-12/include/xen/interface/io/ring.h	2010-02-24 13:13:46.000000000 +0100
@@ -3,16 +3,42 @@
  *
  * Shared producer-consumer ring macros.
  *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
  * Tim Deegan and Andrew Warfield November 2004.
  */

 #ifndef __XEN_PUBLIC_IO_RING_H__
 #define __XEN_PUBLIC_IO_RING_H__

+#include "../xen-compat.h"
+
+#if __XEN_INTERFACE_VERSION__ < 0x00030208
+#define xen_mb()  mb()
+#define xen_rmb() rmb()
+#define xen_wmb() wmb()
+#endif
+
 typedef unsigned int RING_IDX;

 /* Round a 32-bit unsigned constant down to the nearest power of two. */
-#define __RD2(_x)  (((_x) & 0x00000002) ? 0x2		       : ((_x) & 0x1))
+#define __RD2(_x)  (((_x) & 0x00000002) ? 0x2                  : ((_x) & 0x1))
 #define __RD4(_x)  (((_x) & 0x0000000c) ? __RD2((_x)>>2)<<2    : __RD2(_x))
 #define __RD8(_x)  (((_x) & 0x000000f0) ? __RD4((_x)>>4)<<4    : __RD4(_x))
 #define __RD16(_x) (((_x) & 0x0000ff00) ? __RD8((_x)>>8)<<8    : __RD8(_x))
@@ -24,74 +50,84 @@ typedef unsigned int RING_IDX;
  * A ring contains as many entries as will fit, rounded down to the nearest
  * power of two (so we can mask with (size-1) to loop around).
  */
+#define __CONST_RING_SIZE(_s, _sz) \
+    (__RD32(((_sz) - offsetof(struct _s##_sring, ring)) / \
+	    sizeof(((struct _s##_sring *)0)->ring[0])))
+/*
+ * The same for passing in an actual pointer instead of a name tag.
+ */
 #define __RING_SIZE(_s, _sz) \
-    (__RD32(((_sz) - (long)&(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0])))
+    (__RD32(((_sz) - (long)(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0])))

 /*
  * Macros to make the correct C datatypes for a new kind of ring.
  *
  * To make a new ring datatype, you need to have two message structures,
- * let's say struct request, and struct response already defined.
+ * let's say request_t, and response_t already defined.
  *
  * In a header where you want the ring datatype declared, you then do:
  *
- *     DEFINE_RING_TYPES(mytag, struct request, struct response);
+ *     DEFINE_RING_TYPES(mytag, request_t, response_t);
  *
  * These expand out to give you a set of types, as you can see below.
  * The most important of these are:
  *
- *     struct mytag_sring      - The shared ring.
- *     struct mytag_front_ring - The 'front' half of the ring.
- *     struct mytag_back_ring  - The 'back' half of the ring.
+ *     mytag_sring_t      - The shared ring.
+ *     mytag_front_ring_t - The 'front' half of the ring.
+ *     mytag_back_ring_t  - The 'back' half of the ring.
  *
  * To initialize a ring in your code you need to know the location and size
  * of the shared memory area (PAGE_SIZE, for instance). To initialise
  * the front half:
  *
- *     struct mytag_front_ring front_ring;
- *     SHARED_RING_INIT((struct mytag_sring *)shared_page);
- *     FRONT_RING_INIT(&front_ring, (struct mytag_sring *)shared_page,
- *		       PAGE_SIZE);
+ *     mytag_front_ring_t front_ring;
+ *     SHARED_RING_INIT((mytag_sring_t *)shared_page);
+ *     FRONT_RING_INIT(&front_ring, (mytag_sring_t *)shared_page, PAGE_SIZE);
  *
  * Initializing the back follows similarly (note that only the front
  * initializes the shared ring):
  *
- *     struct mytag_back_ring back_ring;
- *     BACK_RING_INIT(&back_ring, (struct mytag_sring *)shared_page,
- *		      PAGE_SIZE);
+ *     mytag_back_ring_t back_ring;
+ *     BACK_RING_INIT(&back_ring, (mytag_sring_t *)shared_page, PAGE_SIZE);
  */

-#define DEFINE_RING_TYPES(__name, __req_t, __rsp_t)			\
-									\
-/* Shared ring entry */							\
-union __name##_sring_entry {						\
-    __req_t req;							\
-    __rsp_t rsp;							\
-};									\
-									\
-/* Shared ring page */							\
-struct __name##_sring {							\
-    RING_IDX req_prod, req_event;					\
-    RING_IDX rsp_prod, rsp_event;					\
-    uint8_t  pad[48];							\
-    union __name##_sring_entry ring[1]; /* variable-length */		\
-};									\
-									\
-/* "Front" end's private variables */					\
-struct __name##_front_ring {						\
-    RING_IDX req_prod_pvt;						\
-    RING_IDX rsp_cons;							\
-    unsigned int nr_ents;						\
-    struct __name##_sring *sring;					\
-};									\
-									\
-/* "Back" end's private variables */					\
-struct __name##_back_ring {						\
-    RING_IDX rsp_prod_pvt;						\
-    RING_IDX req_cons;							\
-    unsigned int nr_ents;						\
-    struct __name##_sring *sring;					\
-};
+#define DEFINE_RING_TYPES(__name, __req_t, __rsp_t)                     \
+                                                                        \
+/* Shared ring entry */                                                 \
+union __name##_sring_entry {                                            \
+    __req_t req;                                                        \
+    __rsp_t rsp;                                                        \
+};                                                                      \
+                                                                        \
+/* Shared ring page */                                                  \
+struct __name##_sring {                                                 \
+    RING_IDX req_prod, req_event;                                       \
+    RING_IDX rsp_prod, rsp_event;                                       \
+    uint8_t  netfront_smartpoll_active;                                 \
+    uint8_t  pad[47];                                                   \
+    union __name##_sring_entry ring[1]; /* variable-length */           \
+};                                                                      \
+                                                                        \
+/* "Front" end's private variables */                                   \
+struct __name##_front_ring {                                            \
+    RING_IDX req_prod_pvt;                                              \
+    RING_IDX rsp_cons;                                                  \
+    unsigned int nr_ents;                                               \
+    struct __name##_sring *sring;                                       \
+};                                                                      \
+                                                                        \
+/* "Back" end's private variables */                                    \
+struct __name##_back_ring {                                             \
+    RING_IDX rsp_prod_pvt;                                              \
+    RING_IDX req_cons;                                                  \
+    unsigned int nr_ents;                                               \
+    struct __name##_sring *sring;                                       \
+};                                                                      \
+                                                                        \
+/* Syntactic sugar */                                                   \
+typedef struct __name##_sring __name##_sring_t;                         \
+typedef struct __name##_front_ring __name##_front_ring_t;               \
+typedef struct __name##_back_ring __name##_back_ring_t

 /*
  * Macros for manipulating rings.
@@ -109,86 +145,94 @@ struct __name##_back_ring {						\
  */

 /* Initialising empty rings */
-#define SHARED_RING_INIT(_s) do {					\
-    (_s)->req_prod  = (_s)->rsp_prod  = 0;				\
-    (_s)->req_event = (_s)->rsp_event = 1;				\
-    memset((_s)->pad, 0, sizeof((_s)->pad));				\
+#define SHARED_RING_INIT(_s) do {                                       \
+    (_s)->req_prod  = (_s)->rsp_prod  = 0;                              \
+    (_s)->req_event = (_s)->rsp_event = 1;                              \
+    (void)memset((_s)->pad, 0, sizeof((_s)->pad));                      \
 } while(0)

-#define FRONT_RING_INIT(_r, _s, __size) do {				\
-    (_r)->req_prod_pvt = 0;						\
-    (_r)->rsp_cons = 0;							\
-    (_r)->nr_ents = __RING_SIZE(_s, __size);				\
-    (_r)->sring = (_s);							\
+#define FRONT_RING_INIT(_r, _s, __size) do {                            \
+    (_r)->req_prod_pvt = 0;                                             \
+    (_r)->rsp_cons = 0;                                                 \
+    (_r)->nr_ents = __RING_SIZE(_s, __size);                            \
+    (_r)->sring = (_s);                                                 \
 } while (0)

-#define BACK_RING_INIT(_r, _s, __size) do {				\
-    (_r)->rsp_prod_pvt = 0;						\
-    (_r)->req_cons = 0;							\
-    (_r)->nr_ents = __RING_SIZE(_s, __size);				\
-    (_r)->sring = (_s);							\
+#define BACK_RING_INIT(_r, _s, __size) do {                             \
+    (_r)->rsp_prod_pvt = 0;                                             \
+    (_r)->req_cons = 0;                                                 \
+    (_r)->nr_ents = __RING_SIZE(_s, __size);                            \
+    (_r)->sring = (_s);                                                 \
 } while (0)

 /* Initialize to existing shared indexes -- for recovery */
-#define FRONT_RING_ATTACH(_r, _s, __size) do {				\
-    (_r)->sring = (_s);							\
-    (_r)->req_prod_pvt = (_s)->req_prod;				\
-    (_r)->rsp_cons = (_s)->rsp_prod;					\
-    (_r)->nr_ents = __RING_SIZE(_s, __size);				\
+#define FRONT_RING_ATTACH(_r, _s, __size) do {                          \
+    (_r)->sring = (_s);                                                 \
+    (_r)->req_prod_pvt = (_s)->req_prod;                                \
+    (_r)->rsp_cons = (_s)->rsp_prod;                                    \
+    (_r)->nr_ents = __RING_SIZE(_s, __size);                            \
 } while (0)

-#define BACK_RING_ATTACH(_r, _s, __size) do {				\
-    (_r)->sring = (_s);							\
-    (_r)->rsp_prod_pvt = (_s)->rsp_prod;				\
-    (_r)->req_cons = (_s)->req_prod;					\
-    (_r)->nr_ents = __RING_SIZE(_s, __size);				\
+#define BACK_RING_ATTACH(_r, _s, __size) do {                           \
+    (_r)->sring = (_s);                                                 \
+    (_r)->rsp_prod_pvt = (_s)->rsp_prod;                                \
+    (_r)->req_cons = (_s)->req_prod;                                    \
+    (_r)->nr_ents = __RING_SIZE(_s, __size);                            \
 } while (0)

 /* How big is this ring? */
-#define RING_SIZE(_r)							\
+#define RING_SIZE(_r)                                                   \
     ((_r)->nr_ents)

 /* Number of free requests (for use on front side only). */
-#define RING_FREE_REQUESTS(_r)						\
+#define RING_FREE_REQUESTS(_r)                                          \
     (RING_SIZE(_r) - ((_r)->req_prod_pvt - (_r)->rsp_cons))

 /* Test if there is an empty slot available on the front ring.
  * (This is only meaningful from the front. )
  */
-#define RING_FULL(_r)							\
+#define RING_FULL(_r)                                                   \
     (RING_FREE_REQUESTS(_r) == 0)

 /* Test if there are outstanding messages to be processed on a ring. */
-#define RING_HAS_UNCONSUMED_RESPONSES(_r)				\
+#define RING_HAS_UNCONSUMED_RESPONSES(_r)                               \
     ((_r)->sring->rsp_prod - (_r)->rsp_cons)

-#define RING_HAS_UNCONSUMED_REQUESTS(_r)				\
-    ({									\
-	unsigned int req = (_r)->sring->req_prod - (_r)->req_cons;	\
-	unsigned int rsp = RING_SIZE(_r) -				\
-			   ((_r)->req_cons - (_r)->rsp_prod_pvt);	\
-	req < rsp ? req : rsp;						\
-    })
+#ifdef __GNUC__
+#define RING_HAS_UNCONSUMED_REQUESTS(_r) ({                             \
+    unsigned int req = (_r)->sring->req_prod - (_r)->req_cons;          \
+    unsigned int rsp = RING_SIZE(_r) -                                  \
+        ((_r)->req_cons - (_r)->rsp_prod_pvt);                          \
+    req < rsp ? req : rsp;                                              \
+})
+#else
+/* Same as above, but without the nice GCC ({ ... }) syntax. */
+#define RING_HAS_UNCONSUMED_REQUESTS(_r)                                \
+    ((((_r)->sring->req_prod - (_r)->req_cons) <                        \
+      (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt))) ?        \
+     ((_r)->sring->req_prod - (_r)->req_cons) :                         \
+     (RING_SIZE(_r) - ((_r)->req_cons - (_r)->rsp_prod_pvt)))
+#endif

 /* Direct access to individual ring elements, by index. */
-#define RING_GET_REQUEST(_r, _idx)					\
+#define RING_GET_REQUEST(_r, _idx)                                      \
     (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req))

-#define RING_GET_RESPONSE(_r, _idx)					\
+#define RING_GET_RESPONSE(_r, _idx)                                     \
     (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp))

 /* Loop termination condition: Would the specified index overflow the ring? */
-#define RING_REQUEST_CONS_OVERFLOW(_r, _cons)				\
+#define RING_REQUEST_CONS_OVERFLOW(_r, _cons)                           \
     (((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r))

-#define RING_PUSH_REQUESTS(_r) do {					\
-    wmb(); /* back sees requests /before/ updated producer index */	\
-    (_r)->sring->req_prod = (_r)->req_prod_pvt;				\
+#define RING_PUSH_REQUESTS(_r) do {                                     \
+    xen_wmb(); /* back sees requests /before/ updated producer index */ \
+    (_r)->sring->req_prod = (_r)->req_prod_pvt;                         \
 } while (0)

-#define RING_PUSH_RESPONSES(_r) do {					\
-    wmb(); /* front sees responses /before/ updated producer index */	\
-    (_r)->sring->rsp_prod = (_r)->rsp_prod_pvt;				\
+#define RING_PUSH_RESPONSES(_r) do {                                    \
+    xen_wmb(); /* front sees resps /before/ updated producer index */   \
+    (_r)->sring->rsp_prod = (_r)->rsp_prod_pvt;                         \
 } while (0)

 /*
@@ -221,40 +265,40 @@ struct __name##_back_ring {						\
  *  field appropriately.
  */

-#define RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(_r, _notify) do {		\
-    RING_IDX __old = (_r)->sring->req_prod;				\
-    RING_IDX __new = (_r)->req_prod_pvt;				\
-    wmb(); /* back sees requests /before/ updated producer index */	\
-    (_r)->sring->req_prod = __new;					\
-    mb(); /* back sees new requests /before/ we check req_event */	\
-    (_notify) = ((RING_IDX)(__new - (_r)->sring->req_event) <		\
-		 (RING_IDX)(__new - __old));				\
+#define RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(_r, _notify) do {           \
+    RING_IDX __old = (_r)->sring->req_prod;                             \
+    RING_IDX __new = (_r)->req_prod_pvt;                                \
+    xen_wmb(); /* back sees requests /before/ updated producer index */ \
+    (_r)->sring->req_prod = __new;                                      \
+    xen_mb(); /* back sees new requests /before/ we check req_event */  \
+    (_notify) = ((RING_IDX)(__new - (_r)->sring->req_event) <           \
+                 (RING_IDX)(__new - __old));                            \
 } while (0)

-#define RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(_r, _notify) do {		\
-    RING_IDX __old = (_r)->sring->rsp_prod;				\
-    RING_IDX __new = (_r)->rsp_prod_pvt;				\
-    wmb(); /* front sees responses /before/ updated producer index */	\
-    (_r)->sring->rsp_prod = __new;					\
-    mb(); /* front sees new responses /before/ we check rsp_event */	\
-    (_notify) = ((RING_IDX)(__new - (_r)->sring->rsp_event) <		\
-		 (RING_IDX)(__new - __old));				\
+#define RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(_r, _notify) do {          \
+    RING_IDX __old = (_r)->sring->rsp_prod;                             \
+    RING_IDX __new = (_r)->rsp_prod_pvt;                                \
+    xen_wmb(); /* front sees resps /before/ updated producer index */   \
+    (_r)->sring->rsp_prod = __new;                                      \
+    xen_mb(); /* front sees new resps /before/ we check rsp_event */    \
+    (_notify) = ((RING_IDX)(__new - (_r)->sring->rsp_event) <           \
+                 (RING_IDX)(__new - __old));                            \
 } while (0)

-#define RING_FINAL_CHECK_FOR_REQUESTS(_r, _work_to_do) do {		\
-    (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r);			\
-    if (_work_to_do) break;						\
-    (_r)->sring->req_event = (_r)->req_cons + 1;			\
-    mb();								\
-    (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r);			\
+#define RING_FINAL_CHECK_FOR_REQUESTS(_r, _work_to_do) do {             \
+    (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r);                   \
+    if (_work_to_do) break;                                             \
+    (_r)->sring->req_event = (_r)->req_cons + 1;                        \
+    xen_mb();                                                           \
+    (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r);                   \
 } while (0)

-#define RING_FINAL_CHECK_FOR_RESPONSES(_r, _work_to_do) do {		\
-    (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r);			\
-    if (_work_to_do) break;						\
-    (_r)->sring->rsp_event = (_r)->rsp_cons + 1;			\
-    mb();								\
-    (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r);			\
+#define RING_FINAL_CHECK_FOR_RESPONSES(_r, _work_to_do) do {            \
+    (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r);                  \
+    if (_work_to_do) break;                                             \
+    (_r)->sring->rsp_event = (_r)->rsp_cons + 1;                        \
+    xen_mb();                                                           \
+    (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r);                  \
 } while (0)

 #endif /* __XEN_PUBLIC_IO_RING_H__ */
--- head-2010-05-12.orig/include/xen/interface/io/xenbus.h	2010-05-12 08:55:24.000000000 +0200
+++ head-2010-05-12/include/xen/interface/io/xenbus.h	2010-01-19 16:01:04.000000000 +0100
@@ -3,42 +3,68 @@
  *
  * Xenbus protocol details.
  *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
  * Copyright (C) 2005 XenSource Ltd.
  */

 #ifndef _XEN_PUBLIC_IO_XENBUS_H
 #define _XEN_PUBLIC_IO_XENBUS_H

-/* The state of either end of the Xenbus, i.e. the current communication
-   status of initialisation across the bus.  States here imply nothing about
-   the state of the connection between the driver and the kernel's device
-   layers.  */
-enum xenbus_state
-{
-	XenbusStateUnknown      = 0,
-	XenbusStateInitialising = 1,
-	XenbusStateInitWait     = 2,  /* Finished early
-					 initialisation, but waiting
-					 for information from the peer
-					 or hotplug scripts. */
-	XenbusStateInitialised  = 3,  /* Initialised and waiting for a
-					 connection from the peer. */
-	XenbusStateConnected    = 4,
-	XenbusStateClosing      = 5,  /* The device is being closed
-					 due to an error or an unplug
-					 event. */
-	XenbusStateClosed       = 6
+/*
+ * The state of either end of the Xenbus, i.e. the current communication
+ * status of initialisation across the bus.  States here imply nothing about
+ * the state of the connection between the driver and the kernel's device
+ * layers.
+ */
+enum xenbus_state {
+    XenbusStateUnknown       = 0,
+
+    XenbusStateInitialising  = 1,
+
+    /*
+     * InitWait: Finished early initialisation but waiting for information
+     * from the peer or hotplug scripts.
+     */
+    XenbusStateInitWait      = 2,
+
+    /*
+     * Initialised: Waiting for a connection from the peer.
+     */
+    XenbusStateInitialised   = 3,

+    XenbusStateConnected     = 4,
+
+    /*
+     * Closing: The device is being closed due to an error or an unplug event.
+     */
+    XenbusStateClosing       = 5,
+
+    XenbusStateClosed        = 6,
+
+    /*
+     * Reconfiguring: The device is being reconfigured.
+     */
+    XenbusStateReconfiguring = 7,
+
+    XenbusStateReconfigured  = 8
 };
+typedef enum xenbus_state XenbusState;

 #endif /* _XEN_PUBLIC_IO_XENBUS_H */
-
-/*
- * Local variables:
- *  c-file-style: "linux"
- *  indent-tabs-mode: t
- *  c-indent-level: 8
- *  c-basic-offset: 8
- *  tab-width: 8
- * End:
- */
--- head-2010-05-12.orig/include/xen/interface/io/xs_wire.h	2010-05-12 08:55:24.000000000 +0200
+++ head-2010-05-12/include/xen/interface/io/xs_wire.h	2010-01-19 16:01:04.000000000 +0100
@@ -1,6 +1,25 @@
 /*
  * Details of the "wire" protocol between Xen Store Daemon and client
  * library or guest kernel.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
  * Copyright (C) 2005 Rusty Russell IBM Corporation
  */

@@ -26,7 +45,9 @@ enum xsd_sockmsg_type
     XS_SET_PERMS,
     XS_WATCH_EVENT,
     XS_ERROR,
-    XS_IS_DOMAIN_INTRODUCED
+    XS_IS_DOMAIN_INTRODUCED,
+    XS_RESUME,
+    XS_SET_TARGET
 };

 #define XS_WRITE_NONE "NONE"
@@ -39,8 +60,14 @@ struct xsd_errors
     int errnum;
     const char *errstring;
 };
+#ifdef EINVAL
 #define XSD_ERROR(x) { x, #x }
-static struct xsd_errors xsd_errors[] __attribute__((unused)) = {
+/* LINTED: static unused */
+static struct xsd_errors xsd_errors[]
+#if defined(__GNUC__)
+__attribute__((unused))
+#endif
+    = {
     XSD_ERROR(EINVAL),
     XSD_ERROR(EACCES),
     XSD_ERROR(EEXIST),
@@ -56,6 +83,7 @@ static struct xsd_errors xsd_errors[] __
     XSD_ERROR(EAGAIN),
     XSD_ERROR(EISCONN)
 };
+#endif

 struct xsd_sockmsg
 {
@@ -84,4 +112,11 @@ struct xenstore_domain_interface {
     XENSTORE_RING_IDX rsp_cons, rsp_prod;
 };

+/* Violating this is very bad.  See docs/misc/xenstore.txt. */
+#define XENSTORE_PAYLOAD_MAX 4096
+
+/* Violating these just gets you an error back */
+#define XENSTORE_ABS_PATH_MAX 3072
+#define XENSTORE_REL_PATH_MAX 2048
+
 #endif /* _XS_WIRE_H */
--- head-2010-05-12.orig/include/xen/interface/memory.h	2010-05-12 08:55:24.000000000 +0200
+++ head-2010-05-12/include/xen/interface/memory.h	2010-01-19 16:01:04.000000000 +0100
@@ -3,20 +3,57 @@
  *
  * Memory reservation and information.
  *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
  * Copyright (c) 2005, Keir Fraser <keir@xensource.com>
  */

 #ifndef __XEN_PUBLIC_MEMORY_H__
 #define __XEN_PUBLIC_MEMORY_H__

+#include "xen.h"
+
 /*
- * Increase or decrease the specified domain's memory reservation. Returns a
- * -ve errcode on failure, or the # extents successfully allocated or freed.
+ * Increase or decrease the specified domain's memory reservation. Returns the
+ * number of extents successfully allocated or freed.
  * arg == addr of struct xen_memory_reservation.
  */
 #define XENMEM_increase_reservation 0
 #define XENMEM_decrease_reservation 1
 #define XENMEM_populate_physmap     6
+
+#if __XEN_INTERFACE_VERSION__ >= 0x00030209
+/*
+ * Maximum # bits addressable by the user of the allocated region (e.g., I/O
+ * devices often have a 32-bit limitation even in 64-bit systems). If zero
+ * then the user has no addressing restriction. This field is not used by
+ * XENMEM_decrease_reservation.
+ */
+#define XENMEMF_address_bits(x)     (x)
+#define XENMEMF_get_address_bits(x) ((x) & 0xffu)
+/* NUMA node to allocate from. */
+#define XENMEMF_node(x)     (((x) + 1) << 8)
+#define XENMEMF_get_node(x) ((((x) >> 8) - 1) & 0xffu)
+/* Flag to populate physmap with populate-on-demand entries */
+#define XENMEMF_populate_on_demand (1<<16)
+#endif
+
 struct xen_memory_reservation {

     /*
@@ -29,28 +66,70 @@ struct xen_memory_reservation {
      *   OUT: GMFN bases of extents that were allocated
      *   (NB. This command also updates the mach_to_phys translation table)
      */
-    GUEST_HANDLE(ulong) extent_start;
+    XEN_GUEST_HANDLE(xen_pfn_t) extent_start;

     /* Number of extents, and size/alignment of each (2^extent_order pages). */
-    unsigned long  nr_extents;
+    xen_ulong_t    nr_extents;
     unsigned int   extent_order;

-    /*
-     * Maximum # bits addressable by the user of the allocated region (e.g.,
-     * I/O devices often have a 32-bit limitation even in 64-bit systems). If
-     * zero then the user has no addressing restriction.
-     * This field is not used by XENMEM_decrease_reservation.
-     */
+#if __XEN_INTERFACE_VERSION__ >= 0x00030209
+    /* XENMEMF flags. */
+    unsigned int   mem_flags;
+#else
     unsigned int   address_bits;
+#endif

     /*
      * Domain whose reservation is being changed.
      * Unprivileged domains can specify only DOMID_SELF.
      */
     domid_t        domid;
+};
+typedef struct xen_memory_reservation xen_memory_reservation_t;
+DEFINE_XEN_GUEST_HANDLE(xen_memory_reservation_t);

+/*
+ * An atomic exchange of memory pages. If return code is zero then
+ * @out.extent_list provides GMFNs of the newly-allocated memory.
+ * Returns zero on complete success, otherwise a negative error code.
+ * On complete success then always @nr_exchanged == @in.nr_extents.
+ * On partial success @nr_exchanged indicates how much work was done.
+ */
+#define XENMEM_exchange             11
+struct xen_memory_exchange {
+    /*
+     * [IN] Details of memory extents to be exchanged (GMFN bases).
+     * Note that @in.address_bits is ignored and unused.
+     */
+    struct xen_memory_reservation in;
+
+    /*
+     * [IN/OUT] Details of new memory extents.
+     * We require that:
+     *  1. @in.domid == @out.domid
+     *  2. @in.nr_extents  << @in.extent_order ==
+     *     @out.nr_extents << @out.extent_order
+     *  3. @in.extent_start and @out.extent_start lists must not overlap
+     *  4. @out.extent_start lists GPFN bases to be populated
+     *  5. @out.extent_start is overwritten with allocated GMFN bases
+     */
+    struct xen_memory_reservation out;
+
+    /*
+     * [OUT] Number of input extents that were successfully exchanged:
+     *  1. The first @nr_exchanged input extents were successfully
+     *     deallocated.
+     *  2. The corresponding first entries in the output extent list correctly
+     *     indicate the GMFNs that were successfully exchanged.
+     *  3. All other input and output extents are untouched.
+     *  4. If not all input exents are exchanged then the return code of this
+     *     command will be non-zero.
+     *  5. THIS FIELD MUST BE INITIALISED TO ZERO BY THE CALLER!
+     */
+    xen_ulong_t nr_exchanged;
 };
-DEFINE_GUEST_HANDLE_STRUCT(xen_memory_reservation);
+typedef struct xen_memory_exchange xen_memory_exchange_t;
+DEFINE_XEN_GUEST_HANDLE(xen_memory_exchange_t);

 /*
  * Returns the maximum machine frame number of mapped RAM in this system.
@@ -68,6 +147,11 @@ DEFINE_GUEST_HANDLE_STRUCT(xen_memory_re
 #define XENMEM_maximum_reservation  4

 /*
+ * Returns the maximum GPFN in use by the guest, or -ve errcode on failure.
+ */
+#define XENMEM_maximum_gpfn         14
+
+/*
  * Returns a list of MFN bases of 2MB extents comprising the machine_to_phys
  * mapping table. Architectures which do not have a m2p table do not implement
  * this command.
@@ -86,7 +170,7 @@ struct xen_machphys_mfn_list {
      * any large discontiguities in the machine address space, 2MB gaps in
      * the machphys table will be represented by an MFN base of zero.
      */
-    GUEST_HANDLE(ulong) extent_start;
+    XEN_GUEST_HANDLE(xen_pfn_t) extent_start;

     /*
      * Number of extents written to the above array. This will be smaller
@@ -94,7 +178,22 @@ struct xen_machphys_mfn_list {
      */
     unsigned int nr_extents;
 };
-DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list);
+typedef struct xen_machphys_mfn_list xen_machphys_mfn_list_t;
+DEFINE_XEN_GUEST_HANDLE(xen_machphys_mfn_list_t);
+
+/*
+ * Returns the location in virtual address space of the machine_to_phys
+ * mapping table. Architectures which do not have a m2p table, or which do not
+ * map it by default into guest address space, do not implement this command.
+ * arg == addr of xen_machphys_mapping_t.
+ */
+#define XENMEM_machphys_mapping     12
+struct xen_machphys_mapping {
+    xen_ulong_t v_start, v_end; /* Start and end virtual addresses.   */
+    xen_ulong_t max_mfn;        /* Maximum MFN that can be looked up. */
+};
+typedef struct xen_machphys_mapping xen_machphys_mapping_t;
+DEFINE_XEN_GUEST_HANDLE(xen_machphys_mapping_t);

 /*
  * Sets the GPFN at which a particular page appears in the specified guest's
@@ -109,37 +208,84 @@ struct xen_add_to_physmap {
     /* Source mapping space. */
 #define XENMAPSPACE_shared_info 0 /* shared info page */
 #define XENMAPSPACE_grant_table 1 /* grant table page */
+#define XENMAPSPACE_gmfn        2 /* GMFN */
     unsigned int space;

+#define XENMAPIDX_grant_table_status 0x80000000
+
     /* Index into source mapping space. */
-    unsigned long idx;
+    xen_ulong_t idx;

     /* GPFN where the source mapping page should appear. */
-    unsigned long gpfn;
+    xen_pfn_t     gpfn;
 };
-DEFINE_GUEST_HANDLE_STRUCT(xen_add_to_physmap);
+typedef struct xen_add_to_physmap xen_add_to_physmap_t;
+DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_t);
+
+/*** REMOVED ***/
+/*#define XENMEM_translate_gpfn_list  8*/

 /*
- * Translates a list of domain-specific GPFNs into MFNs. Returns a -ve error
- * code on failure. This call only works for auto-translated guests.
+ * Returns the pseudo-physical memory map as it was when the domain
+ * was started (specified by XENMEM_set_memory_map).
+ * arg == addr of xen_memory_map_t.
  */
-#define XENMEM_translate_gpfn_list  8
-struct xen_translate_gpfn_list {
-    /* Which domain to translate for? */
-    domid_t domid;
-
-    /* Length of list. */
-    unsigned long nr_gpfns;
-
-    /* List of GPFNs to translate. */
-    GUEST_HANDLE(ulong) gpfn_list;
+#define XENMEM_memory_map           9
+struct xen_memory_map {
+    /*
+     * On call the number of entries which can be stored in buffer. On
+     * return the number of entries which have been stored in
+     * buffer.
+     */
+    unsigned int nr_entries;

     /*
-     * Output list to contain MFN translations. May be the same as the input
-     * list (in which case each input GPFN is overwritten with the output MFN).
+     * Entries in the buffer are in the same format as returned by the
+     * BIOS INT 0x15 EAX=0xE820 call.
      */
-    GUEST_HANDLE(ulong) mfn_list;
+    XEN_GUEST_HANDLE(void) buffer;
+};
+typedef struct xen_memory_map xen_memory_map_t;
+DEFINE_XEN_GUEST_HANDLE(xen_memory_map_t);
+
+/*
+ * Returns the real physical memory map. Passes the same structure as
+ * XENMEM_memory_map.
+ * arg == addr of xen_memory_map_t.
+ */
+#define XENMEM_machine_memory_map   10
+
+/*
+ * Set the pseudo-physical memory map of a domain, as returned by
+ * XENMEM_memory_map.
+ * arg == addr of xen_foreign_memory_map_t.
+ */
+#define XENMEM_set_memory_map       13
+struct xen_foreign_memory_map {
+    domid_t domid;
+    struct xen_memory_map map;
 };
-DEFINE_GUEST_HANDLE_STRUCT(xen_translate_gpfn_list);
+typedef struct xen_foreign_memory_map xen_foreign_memory_map_t;
+DEFINE_XEN_GUEST_HANDLE(xen_foreign_memory_map_t);
+
+#define XENMEM_set_pod_target       16
+#define XENMEM_get_pod_target       17
+struct xen_pod_target {
+    /* IN */
+    uint64_t target_pages;
+    /* OUT */
+    uint64_t tot_pages;
+    uint64_t pod_cache_pages;
+    uint64_t pod_entries;
+    /* IN */
+    domid_t domid;
+};
+typedef struct xen_pod_target xen_pod_target_t;
+
+/*
+ * Get the number of MFNs saved through memory sharing.
+ * The call never fails.
+ */
+#define XENMEM_get_sharing_freed_pages    18

 #endif /* __XEN_PUBLIC_MEMORY_H__ */
--- head-2010-05-12.orig/include/xen/interface/physdev.h	2010-05-12 08:55:24.000000000 +0200
+++ head-2010-05-12/include/xen/interface/physdev.h	2010-01-19 16:01:04.000000000 +0100
@@ -21,10 +21,12 @@
 #ifndef __XEN_PUBLIC_PHYSDEV_H__
 #define __XEN_PUBLIC_PHYSDEV_H__

+#include "xen.h"
+
 /*
  * Prototype for this hypercall is:
  *  int physdev_op(int cmd, void *args)
- * @cmd	 == PHYSDEVOP_??? (physdev operation).
+ * @cmd  == PHYSDEVOP_??? (physdev operation).
  * @args == Operation-specific extra arguments (NULL if none).
  */

@@ -32,114 +34,231 @@
  * Notify end-of-interrupt (EOI) for the specified IRQ.
  * @arg == pointer to physdev_eoi structure.
  */
-#define PHYSDEVOP_eoi			12
+#define PHYSDEVOP_eoi                   12
 struct physdev_eoi {
-	/* IN */
-	uint32_t irq;
+    /* IN */
+    uint32_t irq;
+};
+typedef struct physdev_eoi physdev_eoi_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_eoi_t);
+
+/*
+ * Register a shared page for the hypervisor to indicate whether the guest
+ * must issue PHYSDEVOP_eoi. The semantics of PHYSDEVOP_eoi change slightly
+ * once the guest used this function in that the associated event channel
+ * will automatically get unmasked. The page registered is used as a bit
+ * array indexed by Xen's PIRQ value.
+ */
+#define PHYSDEVOP_pirq_eoi_gmfn         17
+struct physdev_pirq_eoi_gmfn {
+    /* IN */
+    xen_pfn_t gmfn;
 };
+typedef struct physdev_pirq_eoi_gmfn physdev_pirq_eoi_gmfn_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_pirq_eoi_gmfn_t);

 /*
  * Query the status of an IRQ line.
  * @arg == pointer to physdev_irq_status_query structure.
  */
-#define PHYSDEVOP_irq_status_query	 5
+#define PHYSDEVOP_irq_status_query       5
 struct physdev_irq_status_query {
-	/* IN */
-	uint32_t irq;
-	/* OUT */
-	uint32_t flags; /* XENIRQSTAT_* */
+    /* IN */
+    uint32_t irq;
+    /* OUT */
+    uint32_t flags; /* XENIRQSTAT_* */
 };
+typedef struct physdev_irq_status_query physdev_irq_status_query_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_irq_status_query_t);

 /* Need to call PHYSDEVOP_eoi when the IRQ has been serviced? */
-#define _XENIRQSTAT_needs_eoi	(0)
-#define	 XENIRQSTAT_needs_eoi	(1U<<_XENIRQSTAT_needs_eoi)
+#define _XENIRQSTAT_needs_eoi   (0)
+#define  XENIRQSTAT_needs_eoi   (1U<<_XENIRQSTAT_needs_eoi)

 /* IRQ shared by multiple guests? */
-#define _XENIRQSTAT_shared	(1)
-#define	 XENIRQSTAT_shared	(1U<<_XENIRQSTAT_shared)
+#define _XENIRQSTAT_shared      (1)
+#define  XENIRQSTAT_shared      (1U<<_XENIRQSTAT_shared)

 /*
  * Set the current VCPU's I/O privilege level.
  * @arg == pointer to physdev_set_iopl structure.
  */
-#define PHYSDEVOP_set_iopl		 6
+#define PHYSDEVOP_set_iopl               6
 struct physdev_set_iopl {
-	/* IN */
-	uint32_t iopl;
+    /* IN */
+    uint32_t iopl;
 };
+typedef struct physdev_set_iopl physdev_set_iopl_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_set_iopl_t);

 /*
  * Set the current VCPU's I/O-port permissions bitmap.
  * @arg == pointer to physdev_set_iobitmap structure.
  */
-#define PHYSDEVOP_set_iobitmap		 7
+#define PHYSDEVOP_set_iobitmap           7
 struct physdev_set_iobitmap {
-	/* IN */
-	uint8_t * bitmap;
-	uint32_t nr_ports;
+    /* IN */
+#if __XEN_INTERFACE_VERSION__ >= 0x00030205
+    XEN_GUEST_HANDLE(uint8) bitmap;
+#else
+    uint8_t *bitmap;
+#endif
+    uint32_t nr_ports;
 };
+typedef struct physdev_set_iobitmap physdev_set_iobitmap_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_set_iobitmap_t);

 /*
  * Read or write an IO-APIC register.
  * @arg == pointer to physdev_apic structure.
  */
-#define PHYSDEVOP_apic_read		 8
-#define PHYSDEVOP_apic_write		 9
+#define PHYSDEVOP_apic_read              8
+#define PHYSDEVOP_apic_write             9
 struct physdev_apic {
-	/* IN */
-	unsigned long apic_physbase;
-	uint32_t reg;
-	/* IN or OUT */
-	uint32_t value;
+    /* IN */
+    unsigned long apic_physbase;
+    uint32_t reg;
+    /* IN or OUT */
+    uint32_t value;
 };
+typedef struct physdev_apic physdev_apic_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_apic_t);

 /*
  * Allocate or free a physical upcall vector for the specified IRQ line.
  * @arg == pointer to physdev_irq structure.
  */
-#define PHYSDEVOP_alloc_irq_vector	10
-#define PHYSDEVOP_free_irq_vector	11
+#define PHYSDEVOP_alloc_irq_vector      10
+#define PHYSDEVOP_free_irq_vector       11
 struct physdev_irq {
-	/* IN */
-	uint32_t irq;
-	/* IN or OUT */
-	uint32_t vector;
+    /* IN */
+    uint32_t irq;
+    /* IN or OUT */
+    uint32_t vector;
+};
+typedef struct physdev_irq physdev_irq_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_irq_t);
+
+#define MAP_PIRQ_TYPE_MSI               0x0
+#define MAP_PIRQ_TYPE_GSI               0x1
+#define MAP_PIRQ_TYPE_UNKNOWN           0x2
+
+#define PHYSDEVOP_map_pirq               13
+struct physdev_map_pirq {
+    domid_t domid;
+    /* IN */
+    int type;
+    /* IN */
+    int index;
+    /* IN or OUT */
+    int pirq;
+    /* IN */
+    int bus;
+    /* IN */
+    int devfn;
+    /* IN */
+    int entry_nr;
+    /* IN */
+    uint64_t table_base;
+};
+typedef struct physdev_map_pirq physdev_map_pirq_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_map_pirq_t);
+
+#define PHYSDEVOP_unmap_pirq             14
+struct physdev_unmap_pirq {
+    domid_t domid;
+    /* IN */
+    int pirq;
+};
+
+typedef struct physdev_unmap_pirq physdev_unmap_pirq_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_unmap_pirq_t);
+
+#define PHYSDEVOP_manage_pci_add         15
+#define PHYSDEVOP_manage_pci_remove      16
+struct physdev_manage_pci {
+    /* IN */
+    uint8_t bus;
+    uint8_t devfn;
+};
+
+typedef struct physdev_manage_pci physdev_manage_pci_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_manage_pci_t);
+
+#define PHYSDEVOP_restore_msi            19
+struct physdev_restore_msi {
+    /* IN */
+    uint8_t bus;
+    uint8_t devfn;
+};
+typedef struct physdev_restore_msi physdev_restore_msi_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_restore_msi_t);
+
+#define PHYSDEVOP_manage_pci_add_ext     20
+struct physdev_manage_pci_ext {
+    /* IN */
+    uint8_t bus;
+    uint8_t devfn;
+    unsigned is_extfn;
+    unsigned is_virtfn;
+    struct {
+        uint8_t bus;
+        uint8_t devfn;
+    } physfn;
 };

+typedef struct physdev_manage_pci_ext physdev_manage_pci_ext_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_manage_pci_ext_t);
+
 /*
  * Argument to physdev_op_compat() hypercall. Superceded by new physdev_op()
  * hypercall since 0x00030202.
  */
 struct physdev_op {
-	uint32_t cmd;
-	union {
-		struct physdev_irq_status_query	     irq_status_query;
-		struct physdev_set_iopl		     set_iopl;
-		struct physdev_set_iobitmap	     set_iobitmap;
-		struct physdev_apic		     apic_op;
-		struct physdev_irq		     irq_op;
-	} u;
+    uint32_t cmd;
+    union {
+        struct physdev_irq_status_query      irq_status_query;
+        struct physdev_set_iopl              set_iopl;
+        struct physdev_set_iobitmap          set_iobitmap;
+        struct physdev_apic                  apic_op;
+        struct physdev_irq                   irq_op;
+    } u;
+};
+typedef struct physdev_op physdev_op_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_op_t);
+
+#define PHYSDEVOP_setup_gsi    21
+struct physdev_setup_gsi {
+    int gsi;
+    /* IN */
+    uint8_t triggering;
+    /* IN */
+    uint8_t polarity;
+    /* IN */
 };

+typedef struct physdev_setup_gsi physdev_setup_gsi_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_setup_gsi_t);
+
 /*
  * Notify that some PIRQ-bound event channels have been unmasked.
  * ** This command is obsolete since interface version 0x00030202 and is **
- * ** unsupported by newer versions of Xen.				 **
+ * ** unsupported by newer versions of Xen.                              **
  */
-#define PHYSDEVOP_IRQ_UNMASK_NOTIFY	 4
+#define PHYSDEVOP_IRQ_UNMASK_NOTIFY      4

 /*
  * These all-capitals physdev operation names are superceded by the new names
  * (defined above) since interface version 0x00030202.
  */
-#define PHYSDEVOP_IRQ_STATUS_QUERY	 PHYSDEVOP_irq_status_query
-#define PHYSDEVOP_SET_IOPL		 PHYSDEVOP_set_iopl
-#define PHYSDEVOP_SET_IOBITMAP		 PHYSDEVOP_set_iobitmap
-#define PHYSDEVOP_APIC_READ		 PHYSDEVOP_apic_read
-#define PHYSDEVOP_APIC_WRITE		 PHYSDEVOP_apic_write
-#define PHYSDEVOP_ASSIGN_VECTOR		 PHYSDEVOP_alloc_irq_vector
-#define PHYSDEVOP_FREE_VECTOR		 PHYSDEVOP_free_irq_vector
+#define PHYSDEVOP_IRQ_STATUS_QUERY       PHYSDEVOP_irq_status_query
+#define PHYSDEVOP_SET_IOPL               PHYSDEVOP_set_iopl
+#define PHYSDEVOP_SET_IOBITMAP           PHYSDEVOP_set_iobitmap
+#define PHYSDEVOP_APIC_READ              PHYSDEVOP_apic_read
+#define PHYSDEVOP_APIC_WRITE             PHYSDEVOP_apic_write
+#define PHYSDEVOP_ASSIGN_VECTOR          PHYSDEVOP_alloc_irq_vector
+#define PHYSDEVOP_FREE_VECTOR            PHYSDEVOP_free_irq_vector
 #define PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY XENIRQSTAT_needs_eoi
-#define PHYSDEVOP_IRQ_SHARED		 XENIRQSTAT_shared
+#define PHYSDEVOP_IRQ_SHARED             XENIRQSTAT_shared

 #endif /* __XEN_PUBLIC_PHYSDEV_H__ */
--- head-2010-05-12.orig/include/xen/interface/sched.h	2010-05-12 08:55:24.000000000 +0200
+++ head-2010-05-12/include/xen/interface/sched.h	2010-01-19 16:01:04.000000000 +0100
@@ -3,6 +3,24 @@
  *
  * Scheduler state interactions
  *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
  * Copyright (c) 2005, Keir Fraser <keir@xensource.com>
  */

@@ -13,17 +31,17 @@

 /*
  * The prototype for this hypercall is:
- *  long sched_op_new(int cmd, void *arg)
+ *  long sched_op(int cmd, void *arg)
  * @cmd == SCHEDOP_??? (scheduler operation).
  * @arg == Operation-specific extra argument(s), as described below.
  *
- * **NOTE**:
- * Versions of Xen prior to 3.0.2 provide only the following legacy version
+ * Versions of Xen prior to 3.0.2 provided only the following legacy version
  * of this hypercall, supporting only the commands yield, block and shutdown:
  *  long sched_op(int cmd, unsigned long arg)
  * @cmd == SCHEDOP_??? (scheduler operation).
  * @arg == 0               (SCHEDOP_yield and SCHEDOP_block)
  *      == SHUTDOWN_* code (SCHEDOP_shutdown)
+ * This legacy version is available to new guests as sched_op_compat().
  */

 /*
@@ -49,7 +67,8 @@
 struct sched_shutdown {
     unsigned int reason; /* SHUTDOWN_* */
 };
-DEFINE_GUEST_HANDLE_STRUCT(sched_shutdown);
+typedef struct sched_shutdown sched_shutdown_t;
+DEFINE_XEN_GUEST_HANDLE(sched_shutdown_t);

 /*
  * Poll a set of event-channel ports. Return when one or more are pending. An
@@ -58,11 +77,26 @@ DEFINE_GUEST_HANDLE_STRUCT(sched_shutdow
  */
 #define SCHEDOP_poll        3
 struct sched_poll {
-    GUEST_HANDLE(evtchn_port_t) ports;
+    XEN_GUEST_HANDLE(evtchn_port_t) ports;
     unsigned int nr_ports;
     uint64_t timeout;
 };
-DEFINE_GUEST_HANDLE_STRUCT(sched_poll);
+typedef struct sched_poll sched_poll_t;
+DEFINE_XEN_GUEST_HANDLE(sched_poll_t);
+
+/*
+ * Declare a shutdown for another domain. The main use of this function is
+ * in interpreting shutdown requests and reasons for fully-virtualized
+ * domains.  A para-virtualized domain may use SCHEDOP_shutdown directly.
+ * @arg == pointer to sched_remote_shutdown structure.
+ */
+#define SCHEDOP_remote_shutdown        4
+struct sched_remote_shutdown {
+    domid_t domain_id;         /* Remote domain ID */
+    unsigned int reason;       /* SHUTDOWN_xxx reason */
+};
+typedef struct sched_remote_shutdown sched_remote_shutdown_t;
+DEFINE_XEN_GUEST_HANDLE(sched_remote_shutdown_t);

 /*
  * Reason codes for SCHEDOP_shutdown. These may be interpreted by control
--- head-2010-05-12.orig/include/xen/interface/vcpu.h	2010-05-12 08:55:24.000000000 +0200
+++ head-2010-05-12/include/xen/interface/vcpu.h	2010-01-19 16:01:04.000000000 +0100
@@ -27,11 +27,13 @@
 #ifndef __XEN_PUBLIC_VCPU_H__
 #define __XEN_PUBLIC_VCPU_H__

+#include "xen.h"
+
 /*
  * Prototype for this hypercall is:
- *	int vcpu_op(int cmd, int vcpuid, void *extra_args)
- * @cmd		   == VCPUOP_??? (VCPU operation).
- * @vcpuid	   == VCPU to operate on.
+ *  int vcpu_op(int cmd, int vcpuid, void *extra_args)
+ * @cmd        == VCPUOP_??? (VCPU operation).
+ * @vcpuid     == VCPU to operate on.
  * @extra_args == Operation-specific extra arguments (NULL if none).
  */

@@ -40,52 +42,53 @@
  * newly-initialised VCPU will not run until it is brought up by VCPUOP_up.
  *
  * @extra_arg == pointer to vcpu_guest_context structure containing initial
- *				 state for the VCPU.
+ *               state for the VCPU.
  */
-#define VCPUOP_initialise			 0
+#define VCPUOP_initialise            0

 /*
  * Bring up a VCPU. This makes the VCPU runnable. This operation will fail
  * if the VCPU has not been initialised (VCPUOP_initialise).
  */
-#define VCPUOP_up					 1
+#define VCPUOP_up                    1

 /*
  * Bring down a VCPU (i.e., make it non-runnable).
  * There are a few caveats that callers should observe:
- *	1. This operation may return, and VCPU_is_up may return false, before the
- *	   VCPU stops running (i.e., the command is asynchronous). It is a good
- *	   idea to ensure that the VCPU has entered a non-critical loop before
- *	   bringing it down. Alternatively, this operation is guaranteed
- *	   synchronous if invoked by the VCPU itself.
- *	2. After a VCPU is initialised, there is currently no way to drop all its
- *	   references to domain memory. Even a VCPU that is down still holds
- *	   memory references via its pagetable base pointer and GDT. It is good
- *	   practise to move a VCPU onto an 'idle' or default page table, LDT and
- *	   GDT before bringing it down.
+ *  1. This operation may return, and VCPU_is_up may return false, before the
+ *     VCPU stops running (i.e., the command is asynchronous). It is a good
+ *     idea to ensure that the VCPU has entered a non-critical loop before
+ *     bringing it down. Alternatively, this operation is guaranteed
+ *     synchronous if invoked by the VCPU itself.
+ *  2. After a VCPU is initialised, there is currently no way to drop all its
+ *     references to domain memory. Even a VCPU that is down still holds
+ *     memory references via its pagetable base pointer and GDT. It is good
+ *     practise to move a VCPU onto an 'idle' or default page table, LDT and
+ *     GDT before bringing it down.
  */
-#define VCPUOP_down					 2
+#define VCPUOP_down                  2

 /* Returns 1 if the given VCPU is up. */
-#define VCPUOP_is_up				 3
+#define VCPUOP_is_up                 3

 /*
  * Return information about the state and running time of a VCPU.
  * @extra_arg == pointer to vcpu_runstate_info structure.
  */
-#define VCPUOP_get_runstate_info	 4
+#define VCPUOP_get_runstate_info     4
 struct vcpu_runstate_info {
-		/* VCPU's current state (RUNSTATE_*). */
-		int		 state;
-		/* When was current state entered (system time, ns)? */
-		uint64_t state_entry_time;
-		/*
-		 * Time spent in each RUNSTATE_* (ns). The sum of these times is
-		 * guaranteed not to drift from system time.
-		 */
-		uint64_t time[4];
+    /* VCPU's current state (RUNSTATE_*). */
+    int      state;
+    /* When was current state entered (system time, ns)? */
+    uint64_t state_entry_time;
+    /*
+     * Time spent in each RUNSTATE_* (ns). The sum of these times is
+     * guaranteed not to drift from system time.
+     */
+    uint64_t time[4];
 };
-DEFINE_GUEST_HANDLE_STRUCT(vcpu_runstate_info);
+typedef struct vcpu_runstate_info vcpu_runstate_info_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_runstate_info_t);

 /* VCPU is currently running on a physical CPU. */
 #define RUNSTATE_running  0
@@ -108,47 +111,52 @@ DEFINE_GUEST_HANDLE_STRUCT(vcpu_runstate
  * Register a shared memory area from which the guest may obtain its own
  * runstate information without needing to execute a hypercall.
  * Notes:
- *	1. The registered address may be virtual or physical, depending on the
- *	   platform. The virtual address should be registered on x86 systems.
- *	2. Only one shared area may be registered per VCPU. The shared area is
- *	   updated by the hypervisor each time the VCPU is scheduled. Thus
- *	   runstate.state will always be RUNSTATE_running and
- *	   runstate.state_entry_time will indicate the system time at which the
- *	   VCPU was last scheduled to run.
+ *  1. The registered address may be virtual or physical or guest handle,
+ *     depending on the platform. Virtual address or guest handle should be
+ *     registered on x86 systems.
+ *  2. Only one shared area may be registered per VCPU. The shared area is
+ *     updated by the hypervisor each time the VCPU is scheduled. Thus
+ *     runstate.state will always be RUNSTATE_running and
+ *     runstate.state_entry_time will indicate the system time at which the
+ *     VCPU was last scheduled to run.
  * @extra_arg == pointer to vcpu_register_runstate_memory_area structure.
  */
 #define VCPUOP_register_runstate_memory_area 5
 struct vcpu_register_runstate_memory_area {
-		union {
-				GUEST_HANDLE(vcpu_runstate_info) h;
-				struct vcpu_runstate_info *v;
-				uint64_t p;
-		} addr;
+    union {
+        XEN_GUEST_HANDLE(vcpu_runstate_info_t) h;
+        struct vcpu_runstate_info *v;
+        uint64_t p;
+    } addr;
 };
+typedef struct vcpu_register_runstate_memory_area vcpu_register_runstate_memory_area_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_register_runstate_memory_area_t);

 /*
  * Set or stop a VCPU's periodic timer. Every VCPU has one periodic timer
  * which can be set via these commands. Periods smaller than one millisecond
  * may not be supported.
  */
-#define VCPUOP_set_periodic_timer	 6 /* arg == vcpu_set_periodic_timer_t */
-#define VCPUOP_stop_periodic_timer	 7 /* arg == NULL */
+#define VCPUOP_set_periodic_timer    6 /* arg == vcpu_set_periodic_timer_t */
+#define VCPUOP_stop_periodic_timer   7 /* arg == NULL */
 struct vcpu_set_periodic_timer {
-		uint64_t period_ns;
+    uint64_t period_ns;
 };
-DEFINE_GUEST_HANDLE_STRUCT(vcpu_set_periodic_timer);
+typedef struct vcpu_set_periodic_timer vcpu_set_periodic_timer_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_set_periodic_timer_t);

 /*
  * Set or stop a VCPU's single-shot timer. Every VCPU has one single-shot
  * timer which can be set via these commands.
  */
-#define VCPUOP_set_singleshot_timer	 8 /* arg == vcpu_set_singleshot_timer_t */
+#define VCPUOP_set_singleshot_timer  8 /* arg == vcpu_set_singleshot_timer_t */
 #define VCPUOP_stop_singleshot_timer 9 /* arg == NULL */
 struct vcpu_set_singleshot_timer {
-		uint64_t timeout_abs_ns;
-		uint32_t flags;			   /* VCPU_SSHOTTMR_??? */
+    uint64_t timeout_abs_ns;   /* Absolute system time value in nanoseconds. */
+    uint32_t flags;            /* VCPU_SSHOTTMR_??? */
 };
-DEFINE_GUEST_HANDLE_STRUCT(vcpu_set_singleshot_timer);
+typedef struct vcpu_set_singleshot_timer vcpu_set_singleshot_timer_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_set_singleshot_timer_t);

 /* Flags to VCPUOP_set_singleshot_timer. */
  /* Require the timeout to be in the future (return -ETIME if it's passed). */
@@ -161,13 +169,65 @@ DEFINE_GUEST_HANDLE_STRUCT(vcpu_set_sing
  * structure in a convenient place, such as in a per-cpu data area.
  * The pointer need not be page aligned, but the structure must not
  * cross a page boundary.
+ *
+ * This may be called only once per vcpu.
  */
-#define VCPUOP_register_vcpu_info   10  /* arg == struct vcpu_info */
+#define VCPUOP_register_vcpu_info   10  /* arg == vcpu_register_vcpu_info_t */
 struct vcpu_register_vcpu_info {
     uint64_t mfn;    /* mfn of page to place vcpu_info */
     uint32_t offset; /* offset within page */
     uint32_t rsvd;   /* unused */
 };
-DEFINE_GUEST_HANDLE_STRUCT(vcpu_register_vcpu_info);
+typedef struct vcpu_register_vcpu_info vcpu_register_vcpu_info_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_register_vcpu_info_t);
+
+/* Send an NMI to the specified VCPU. @extra_arg == NULL. */
+#define VCPUOP_send_nmi             11
+
+/*
+ * Get the physical ID information for a pinned vcpu's underlying physical
+ * processor.  The physical ID informmation is architecture-specific.
+ * On x86: id[31:0]=apic_id, id[63:32]=acpi_id, and all values 0xff and
+ *         greater are reserved.
+ * This command returns -EINVAL if it is not a valid operation for this VCPU.
+ */
+#define VCPUOP_get_physid           12 /* arg == vcpu_get_physid_t */
+struct vcpu_get_physid {
+    uint64_t phys_id;
+};
+typedef struct vcpu_get_physid vcpu_get_physid_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_get_physid_t);
+#define xen_vcpu_physid_to_x86_apicid(physid) \
+    ((((uint32_t)(physid)) >= 0xff) ? 0xff : ((uint8_t)(physid)))
+#define xen_vcpu_physid_to_x86_acpiid(physid) \
+    ((((uint32_t)((physid)>>32)) >= 0xff) ? 0xff : ((uint8_t)((physid)>>32)))
+
+/*
+ * Register a memory location to get a secondary copy of the vcpu time
+ * parameters.  The master copy still exists as part of the vcpu shared
+ * memory area, and this secondary copy is updated whenever the master copy
+ * is updated (and using the same versioning scheme for synchronisation).
+ *
+ * The intent is that this copy may be mapped (RO) into userspace so
+ * that usermode can compute system time using the time info and the
+ * tsc.  Usermode will see an array of vcpu_time_info structures, one
+ * for each vcpu, and choose the right one by an existing mechanism
+ * which allows it to get the current vcpu number (such as via a
+ * segment limit).  It can then apply the normal algorithm to compute
+ * system time from the tsc.
+ *
+ * @extra_arg == pointer to vcpu_register_time_info_memory_area structure.
+ */
+#define VCPUOP_register_vcpu_time_memory_area   13
+DEFINE_XEN_GUEST_HANDLE(vcpu_time_info_t);
+struct vcpu_register_time_memory_area {
+    union {
+        XEN_GUEST_HANDLE(vcpu_time_info_t) h;
+        struct vcpu_time_info *v;
+        uint64_t p;
+    } addr;
+};
+typedef struct vcpu_register_time_memory_area vcpu_register_time_memory_area_t;
+DEFINE_XEN_GUEST_HANDLE(vcpu_register_time_memory_area_t);

 #endif /* __XEN_PUBLIC_VCPU_H__ */
--- head-2010-05-12.orig/include/xen/interface/version.h	2010-05-12 08:55:24.000000000 +0200
+++ head-2010-05-12/include/xen/interface/version.h	2010-01-19 16:01:04.000000000 +0100
@@ -3,6 +3,24 @@
  *
  * Xen version, type, and compile information.
  *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
  * Copyright (c) 2005, Nguyen Anh Quynh <aquynh@gmail.com>
  * Copyright (c) 2005, Keir Fraser <keir@xensource.com>
  */
@@ -10,17 +28,15 @@
 #ifndef __XEN_PUBLIC_VERSION_H__
 #define __XEN_PUBLIC_VERSION_H__

-/* NB. All ops return zero on success, except XENVER_version. */
+/* NB. All ops return zero on success, except XENVER_{version,pagesize} */

 /* arg == NULL; returns major:minor (16:16). */
 #define XENVER_version      0

 /* arg == xen_extraversion_t. */
 #define XENVER_extraversion 1
-struct xen_extraversion {
-    char extraversion[16];
-};
-#define XEN_EXTRAVERSION_LEN (sizeof(struct xen_extraversion))
+typedef char xen_extraversion_t[16];
+#define XEN_EXTRAVERSION_LEN (sizeof(xen_extraversion_t))

 /* arg == xen_compile_info_t. */
 #define XENVER_compile_info 2
@@ -30,29 +46,28 @@ struct xen_compile_info {
     char compile_domain[32];
     char compile_date[32];
 };
+typedef struct xen_compile_info xen_compile_info_t;

 #define XENVER_capabilities 3
-struct xen_capabilities_info {
-    char info[1024];
-};
-#define XEN_CAPABILITIES_INFO_LEN (sizeof(struct xen_capabilities_info))
+typedef char xen_capabilities_info_t[1024];
+#define XEN_CAPABILITIES_INFO_LEN (sizeof(xen_capabilities_info_t))

 #define XENVER_changeset 4
-struct xen_changeset_info {
-    char info[64];
-};
-#define XEN_CHANGESET_INFO_LEN (sizeof(struct xen_changeset_info))
+typedef char xen_changeset_info_t[64];
+#define XEN_CHANGESET_INFO_LEN (sizeof(xen_changeset_info_t))

 #define XENVER_platform_parameters 5
 struct xen_platform_parameters {
     unsigned long virt_start;
 };
+typedef struct xen_platform_parameters xen_platform_parameters_t;

 #define XENVER_get_features 6
 struct xen_feature_info {
     unsigned int submap_idx;    /* IN: which 32-bit submap to return */
     uint32_t     submap;        /* OUT: 32-bit submap */
 };
+typedef struct xen_feature_info xen_feature_info_t;

 /* Declares the features reported by XENVER_get_features. */
 #include "features.h"
@@ -60,4 +75,10 @@ struct xen_feature_info {
 /* arg == NULL; returns host memory page size. */
 #define XENVER_pagesize 7

+/* arg == xen_domain_handle_t. */
+#define XENVER_guest_handle 8
+
+#define XENVER_commandline 9
+typedef char xen_commandline_t[1024];
+
 #endif /* __XEN_PUBLIC_VERSION_H__ */
--- head-2010-05-12.orig/include/xen/interface/xen.h	2010-05-12 08:55:24.000000000 +0200
+++ head-2010-05-12/include/xen/interface/xen.h	2010-05-07 11:10:48.000000000 +0200
@@ -3,35 +3,69 @@
  *
  * Guest OS interface to Xen.
  *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
  * Copyright (c) 2004, K A Fraser
  */

 #ifndef __XEN_PUBLIC_XEN_H__
 #define __XEN_PUBLIC_XEN_H__

-#include <asm/xen/interface.h>
+#include "xen-compat.h"
+#ifdef CONFIG_PARAVIRT_XEN
 #include <asm/pvclock-abi.h>
+#endif

-/*
- * XEN "SYSTEM CALLS" (a.k.a. HYPERCALLS).
- */
+#if defined(__i386__) || defined(__x86_64__)
+#include "arch-x86/xen.h"
+#elif defined(__ia64__)
+#include "arch-ia64.h"
+#else
+#error "Unsupported architecture"
+#endif
+
+#ifndef __ASSEMBLY__
+/* Guest handles for primitive C types. */
+DEFINE_XEN_GUEST_HANDLE(char);
+__DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char);
+DEFINE_XEN_GUEST_HANDLE(int);
+__DEFINE_XEN_GUEST_HANDLE(uint,  unsigned int);
+DEFINE_XEN_GUEST_HANDLE(long);
+__DEFINE_XEN_GUEST_HANDLE(ulong, unsigned long);
+DEFINE_XEN_GUEST_HANDLE(void);
+
+DEFINE_XEN_GUEST_HANDLE(uint64_t);
+DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
+#endif

 /*
- * x86_32: EAX = vector; EBX, ECX, EDX, ESI, EDI = args 1, 2, 3, 4, 5.
- *         EAX = return value
- *         (argument registers may be clobbered on return)
- * x86_64: RAX = vector; RDI, RSI, RDX, R10, R8, R9 = args 1, 2, 3, 4, 5, 6.
- *         RAX = return value
- *         (argument registers not clobbered on return; RCX, R11 are)
+ * HYPERCALLS
  */
+
 #define __HYPERVISOR_set_trap_table        0
 #define __HYPERVISOR_mmu_update            1
 #define __HYPERVISOR_set_gdt               2
 #define __HYPERVISOR_stack_switch          3
 #define __HYPERVISOR_set_callbacks         4
 #define __HYPERVISOR_fpu_taskswitch        5
-#define __HYPERVISOR_sched_op              6
-#define __HYPERVISOR_dom0_op               7
+#define __HYPERVISOR_sched_op_compat       6 /* compat since 0x00030101 */
+#define __HYPERVISOR_platform_op           7
 #define __HYPERVISOR_set_debugreg          8
 #define __HYPERVISOR_get_debugreg          9
 #define __HYPERVISOR_update_descriptor    10
@@ -39,10 +73,10 @@
 #define __HYPERVISOR_multicall            13
 #define __HYPERVISOR_update_va_mapping    14
 #define __HYPERVISOR_set_timer_op         15
-#define __HYPERVISOR_event_channel_op_compat 16
+#define __HYPERVISOR_event_channel_op_compat 16 /* compat since 0x00030202 */
 #define __HYPERVISOR_xen_version          17
 #define __HYPERVISOR_console_io           18
-#define __HYPERVISOR_physdev_op_compat    19
+#define __HYPERVISOR_physdev_op_compat    19 /* compat since 0x00030202 */
 #define __HYPERVISOR_grant_table_op       20
 #define __HYPERVISOR_vm_assist            21
 #define __HYPERVISOR_update_va_mapping_otherdomain 22
@@ -50,7 +84,7 @@
 #define __HYPERVISOR_vcpu_op              24
 #define __HYPERVISOR_set_segment_base     25 /* x86/64 only */
 #define __HYPERVISOR_mmuext_op            26
-#define __HYPERVISOR_acm_op               27
+#define __HYPERVISOR_xsm_op               27
 #define __HYPERVISOR_nmi_op               28
 #define __HYPERVISOR_sched_op_new         29
 #define __HYPERVISOR_callback_op          30
@@ -58,6 +92,10 @@
 #define __HYPERVISOR_event_channel_op     32
 #define __HYPERVISOR_physdev_op           33
 #define __HYPERVISOR_hvm_op               34
+#define __HYPERVISOR_sysctl               35
+#define __HYPERVISOR_domctl               36
+#define __HYPERVISOR_kexec_op             37
+#define __HYPERVISOR_tmem_op              38

 /* Architecture-specific hypercall definitions. */
 #define __HYPERVISOR_arch_0               48
@@ -70,15 +108,50 @@
 #define __HYPERVISOR_arch_7               55

 /*
+ * HYPERCALL COMPATIBILITY.
+ */
+
+/* New sched_op hypercall introduced in 0x00030101. */
+#if __XEN_INTERFACE_VERSION__ < 0x00030101
+#undef __HYPERVISOR_sched_op
+#define __HYPERVISOR_sched_op __HYPERVISOR_sched_op_compat
+#else
+#define __HYPERVISOR_sched_op __HYPERVISOR_sched_op_new
+#endif
+
+/* New event-channel and physdev hypercalls introduced in 0x00030202. */
+#if __XEN_INTERFACE_VERSION__ < 0x00030202
+#undef __HYPERVISOR_event_channel_op
+#define __HYPERVISOR_event_channel_op __HYPERVISOR_event_channel_op_compat
+#undef __HYPERVISOR_physdev_op
+#define __HYPERVISOR_physdev_op __HYPERVISOR_physdev_op_compat
+#endif
+
+/* New platform_op hypercall introduced in 0x00030204. */
+#if __XEN_INTERFACE_VERSION__ < 0x00030204
+#define __HYPERVISOR_dom0_op __HYPERVISOR_platform_op
+#endif
+
+/*
  * VIRTUAL INTERRUPTS
  *
  * Virtual interrupts that a guest OS may receive from Xen.
- */
-#define VIRQ_TIMER      0  /* Timebase update, and/or requested timeout.  */
-#define VIRQ_DEBUG      1  /* Request guest to dump debug info.           */
-#define VIRQ_CONSOLE    2  /* (DOM0) Bytes received on emergency console. */
-#define VIRQ_DOM_EXC    3  /* (DOM0) Exceptional event for some domain.   */
-#define VIRQ_DEBUGGER   6  /* (DOM0) A domain has paused for debugging.   */
+ *
+ * In the side comments, 'V.' denotes a per-VCPU VIRQ while 'G.' denotes a
+ * global VIRQ. The former can be bound once per VCPU and cannot be re-bound.
+ * The latter can be allocated only once per guest: they must initially be
+ * allocated to VCPU0 but can subsequently be re-bound.
+ */
+#define VIRQ_TIMER      0  /* V. Timebase update, and/or requested timeout.  */
+#define VIRQ_DEBUG      1  /* V. Request guest to dump debug info.           */
+#define VIRQ_CONSOLE    2  /* G. (DOM0) Bytes received on emergency console. */
+#define VIRQ_DOM_EXC    3  /* G. (DOM0) Exceptional event for some domain.   */
+#define VIRQ_TBUF       4  /* G. (DOM0) Trace buffer has records available.  */
+#define VIRQ_DEBUGGER   6  /* G. (DOM0) A domain has paused for debugging.   */
+#define VIRQ_XENOPROF   7  /* V. XenOprofile interrupt: new sample available */
+#define VIRQ_CON_RING   8  /* G. (DOM0) Bytes received on console            */
+#define VIRQ_PCPU_STATE 9  /* G. (DOM0) PCPU state changed                   */
+#define VIRQ_MEM_EVENT  10 /* G. (DOM0) A memory event has occured           */

 /* Architecture-specific VIRQ definitions. */
 #define VIRQ_ARCH_0    16
@@ -91,19 +164,28 @@
 #define VIRQ_ARCH_7    23

 #define NR_VIRQS       24
+
 /*
- * MMU-UPDATE REQUESTS
+ * HYPERVISOR_mmu_update(reqs, count, pdone, foreigndom)
  *
- * HYPERVISOR_mmu_update() accepts a list of (ptr, val) pairs.
- * A foreigndom (FD) can be specified (or DOMID_SELF for none).
- * Where the FD has some effect, it is described below.
- * ptr[1:0] specifies the appropriate MMU_* command.
+ * @reqs is an array of mmu_update_t structures ((ptr, val) pairs).
+ * @count is the length of the above array.
+ * @pdone is an output parameter indicating number of completed operations
+ * @foreigndom[15:0]: FD, the expected owner of data pages referenced in this
+ *                    hypercall invocation. Can be DOMID_SELF.
+ * @foreigndom[31:16]: PFD, the expected owner of pagetable pages referenced
+ *                     in this hypercall invocation. The value of this field
+ *                     (x) encodes the PFD as follows:
+ *                     x == 0 => PFD == DOMID_SELF
+ *                     x != 0 => PFD == x - 1
  *
+ * Sub-commands: ptr[1:0] specifies the appropriate MMU_* command.
+ * -------------
  * ptr[1:0] == MMU_NORMAL_PT_UPDATE:
- * Updates an entry in a page table. If updating an L1 table, and the new
- * table entry is valid/present, the mapped frame must belong to the FD, if
- * an FD has been specified. If attempting to map an I/O page then the
- * caller assumes the privilege of the FD.
+ * Updates an entry in a page table belonging to PFD. If updating an L1 table,
+ * and the new table entry is valid/present, the mapped frame must belong to
+ * FD. If attempting to map an I/O page then the caller assumes the privilege
+ * of the FD.
  * FD == DOMID_IO: Permit /only/ I/O mappings, at the priv level of the caller.
  * FD == DOMID_XEN: Map restricted areas of Xen's heap space.
  * ptr[:2]  -- Machine address of the page-table entry to modify.
@@ -119,8 +201,8 @@
  * As MMU_NORMAL_PT_UPDATE above, but A/D bits currently in the PTE are ORed
  * with those in @val.
  */
-#define MMU_NORMAL_PT_UPDATE      0 /* checked '*ptr = val'. ptr is MA.       */
-#define MMU_MACHPHYS_UPDATE       1 /* ptr = MA of frame to modify entry for  */
+#define MMU_NORMAL_PT_UPDATE      0 /* checked '*ptr = val'. ptr is MA.      */
+#define MMU_MACHPHYS_UPDATE       1 /* ptr = MA of frame to modify entry for */
 #define MMU_PT_UPDATE_PRESERVE_AD 2 /* atomically: *ptr = val | (*ptr&(A|D)) */

 /*
@@ -163,9 +245,20 @@
  * cmd: MMUEXT_FLUSH_CACHE
  * No additional arguments. Writes back and flushes cache contents.
  *
+ * cmd: MMUEXT_FLUSH_CACHE_GLOBAL
+ * No additional arguments. Writes back and flushes cache contents
+ * on all CPUs in the system.
+ *
  * cmd: MMUEXT_SET_LDT
  * linear_addr: Linear address of LDT base (NB. must be page-aligned).
  * nr_ents: Number of entries in LDT.
+ *
+ * cmd: MMUEXT_CLEAR_PAGE
+ * mfn: Machine frame number to be cleared.
+ *
+ * cmd: MMUEXT_COPY_PAGE
+ * mfn: Machine frame number of the destination page.
+ * src_mfn: Machine frame number of the source page.
  */
 #define MMUEXT_PIN_L1_TABLE      0
 #define MMUEXT_PIN_L2_TABLE      1
@@ -182,24 +275,35 @@
 #define MMUEXT_FLUSH_CACHE      12
 #define MMUEXT_SET_LDT          13
 #define MMUEXT_NEW_USER_BASEPTR 15
+#define MMUEXT_CLEAR_PAGE       16
+#define MMUEXT_COPY_PAGE        17
+#define MMUEXT_FLUSH_CACHE_GLOBAL 18

 #ifndef __ASSEMBLY__
 struct mmuext_op {
-	unsigned int cmd;
-	union {
-		/* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR */
-		unsigned long mfn;
-		/* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */
-		unsigned long linear_addr;
-	} arg1;
-	union {
-		/* SET_LDT */
-		unsigned int nr_ents;
-		/* TLB_FLUSH_MULTI, INVLPG_MULTI */
-		void *vcpumask;
-	} arg2;
+    unsigned int cmd;
+    union {
+        /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR
+         * CLEAR_PAGE, COPY_PAGE */
+        xen_pfn_t     mfn;
+        /* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */
+        unsigned long linear_addr;
+    } arg1;
+    union {
+        /* SET_LDT */
+        unsigned int nr_ents;
+        /* TLB_FLUSH_MULTI, INVLPG_MULTI */
+#if __XEN_INTERFACE_VERSION__ >= 0x00030205
+        XEN_GUEST_HANDLE(const_void) vcpumask;
+#else
+        const void *vcpumask;
+#endif
+        /* COPY_PAGE */
+        xen_pfn_t src_mfn;
+    } arg2;
 };
-DEFINE_GUEST_HANDLE_STRUCT(mmuext_op);
+typedef struct mmuext_op mmuext_op_t;
+DEFINE_XEN_GUEST_HANDLE(mmuext_op_t);
 #endif

 /* These are passed as 'flags' to update_va_mapping. They can be ORed. */
@@ -224,11 +328,24 @@ DEFINE_GUEST_HANDLE_STRUCT(mmuext_op);
  */
 #define VMASST_CMD_enable                0
 #define VMASST_CMD_disable               1
+
+/* x86/32 guests: simulate full 4GB segment limits. */
 #define VMASST_TYPE_4gb_segments         0
+
+/* x86/32 guests: trap (vector 15) whenever above vmassist is used. */
 #define VMASST_TYPE_4gb_segments_notify  1
+
+/*
+ * x86 guests: support writes to bottom-level PTEs.
+ * NB1. Page-directory entries cannot be written.
+ * NB2. Guest must continue to remove all writable mappings of PTEs.
+ */
 #define VMASST_TYPE_writable_pagetables  2
+
+/* x86/PAE guests: support PDPTs above 4GB. */
 #define VMASST_TYPE_pae_extended_cr3     3
-#define MAX_VMASST_TYPE 3
+
+#define MAX_VMASST_TYPE                  3

 #ifndef __ASSEMBLY__

@@ -260,6 +377,13 @@ typedef uint16_t domid_t;
 #define DOMID_XEN  (0x7FF2U)

 /*
+ * DOMID_COW is used as the owner of sharable pages */
+#define DOMID_COW  (0x7FF3U)
+
+/* DOMID_INVALID is used to identity invalid domid */
+#define DOMID_INVALID (0x7FFFU)
+
+/*
  * Send an array of these to HYPERVISOR_mmu_update().
  * NB. The fields are natural pointer/address size for this architecture.
  */
@@ -267,18 +391,19 @@ struct mmu_update {
     uint64_t ptr;       /* Machine address of PTE. */
     uint64_t val;       /* New contents of PTE.    */
 };
-DEFINE_GUEST_HANDLE_STRUCT(mmu_update);
+typedef struct mmu_update mmu_update_t;
+DEFINE_XEN_GUEST_HANDLE(mmu_update_t);

 /*
  * Send an array of these to HYPERVISOR_multicall().
  * NB. The fields are natural register size for this architecture.
  */
 struct multicall_entry {
-    unsigned long op;
-    long result;
+    unsigned long op, result;
     unsigned long args[6];
 };
-DEFINE_GUEST_HANDLE_STRUCT(multicall_entry);
+typedef struct multicall_entry multicall_entry_t;
+DEFINE_XEN_GUEST_HANDLE(multicall_entry_t);

 /*
  * Event channel endpoints per domain:
@@ -287,173 +412,271 @@ DEFINE_GUEST_HANDLE_STRUCT(multicall_ent
 #define NR_EVENT_CHANNELS (sizeof(unsigned long) * sizeof(unsigned long) * 64)

 struct vcpu_time_info {
-	/*
-	 * Updates to the following values are preceded and followed
-	 * by an increment of 'version'. The guest can therefore
-	 * detect updates by looking for changes to 'version'. If the
-	 * least-significant bit of the version number is set then an
-	 * update is in progress and the guest must wait to read a
-	 * consistent set of values.  The correct way to interact with
-	 * the version number is similar to Linux's seqlock: see the
-	 * implementations of read_seqbegin/read_seqretry.
-	 */
-	uint32_t version;
-	uint32_t pad0;
-	uint64_t tsc_timestamp;   /* TSC at last update of time vals.  */
-	uint64_t system_time;     /* Time, in nanosecs, since boot.    */
-	/*
-	 * Current system time:
-	 *   system_time + ((tsc - tsc_timestamp) << tsc_shift) * tsc_to_system_mul
-	 * CPU frequency (Hz):
-	 *   ((10^9 << 32) / tsc_to_system_mul) >> tsc_shift
-	 */
-	uint32_t tsc_to_system_mul;
-	int8_t   tsc_shift;
-	int8_t   pad1[3];
+    /*
+     * Updates to the following values are preceded and followed by an
+     * increment of 'version'. The guest can therefore detect updates by
+     * looking for changes to 'version'. If the least-significant bit of
+     * the version number is set then an update is in progress and the guest
+     * must wait to read a consistent set of values.
+     * The correct way to interact with the version number is similar to
+     * Linux's seqlock: see the implementations of read_seqbegin/read_seqretry.
+     */
+    uint32_t version;
+    uint32_t pad0;
+    uint64_t tsc_timestamp;   /* TSC at last update of time vals.  */
+    uint64_t system_time;     /* Time, in nanosecs, since boot.    */
+    /*
+     * Current system time:
+     *   system_time +
+     *   ((((tsc - tsc_timestamp) << tsc_shift) * tsc_to_system_mul) >> 32)
+     * CPU frequency (Hz):
+     *   ((10^9 << 32) / tsc_to_system_mul) >> tsc_shift
+     */
+    uint32_t tsc_to_system_mul;
+    int8_t   tsc_shift;
+    int8_t   pad1[3];
 }; /* 32 bytes */
+typedef struct vcpu_time_info vcpu_time_info_t;

 struct vcpu_info {
-	/*
-	 * 'evtchn_upcall_pending' is written non-zero by Xen to indicate
-	 * a pending notification for a particular VCPU. It is then cleared
-	 * by the guest OS /before/ checking for pending work, thus avoiding
-	 * a set-and-check race. Note that the mask is only accessed by Xen
-	 * on the CPU that is currently hosting the VCPU. This means that the
-	 * pending and mask flags can be updated by the guest without special
-	 * synchronisation (i.e., no need for the x86 LOCK prefix).
-	 * This may seem suboptimal because if the pending flag is set by
-	 * a different CPU then an IPI may be scheduled even when the mask
-	 * is set. However, note:
-	 *  1. The task of 'interrupt holdoff' is covered by the per-event-
-	 *     channel mask bits. A 'noisy' event that is continually being
-	 *     triggered can be masked at source at this very precise
-	 *     granularity.
-	 *  2. The main purpose of the per-VCPU mask is therefore to restrict
-	 *     reentrant execution: whether for concurrency control, or to
-	 *     prevent unbounded stack usage. Whatever the purpose, we expect
-	 *     that the mask will be asserted only for short periods at a time,
-	 *     and so the likelihood of a 'spurious' IPI is suitably small.
-	 * The mask is read before making an event upcall to the guest: a
-	 * non-zero mask therefore guarantees that the VCPU will not receive
-	 * an upcall activation. The mask is cleared when the VCPU requests
-	 * to block: this avoids wakeup-waiting races.
-	 */
-	uint8_t evtchn_upcall_pending;
-	uint8_t evtchn_upcall_mask;
-	unsigned long evtchn_pending_sel;
-	struct arch_vcpu_info arch;
-	struct pvclock_vcpu_time_info time;
+    /*
+     * 'evtchn_upcall_pending' is written non-zero by Xen to indicate
+     * a pending notification for a particular VCPU. It is then cleared
+     * by the guest OS /before/ checking for pending work, thus avoiding
+     * a set-and-check race. Note that the mask is only accessed by Xen
+     * on the CPU that is currently hosting the VCPU. This means that the
+     * pending and mask flags can be updated by the guest without special
+     * synchronisation (i.e., no need for the x86 LOCK prefix).
+     * This may seem suboptimal because if the pending flag is set by
+     * a different CPU then an IPI may be scheduled even when the mask
+     * is set. However, note:
+     *  1. The task of 'interrupt holdoff' is covered by the per-event-
+     *     channel mask bits. A 'noisy' event that is continually being
+     *     triggered can be masked at source at this very precise
+     *     granularity.
+     *  2. The main purpose of the per-VCPU mask is therefore to restrict
+     *     reentrant execution: whether for concurrency control, or to
+     *     prevent unbounded stack usage. Whatever the purpose, we expect
+     *     that the mask will be asserted only for short periods at a time,
+     *     and so the likelihood of a 'spurious' IPI is suitably small.
+     * The mask is read before making an event upcall to the guest: a
+     * non-zero mask therefore guarantees that the VCPU will not receive
+     * an upcall activation. The mask is cleared when the VCPU requests
+     * to block: this avoids wakeup-waiting races.
+     */
+    uint8_t evtchn_upcall_pending;
+    uint8_t evtchn_upcall_mask;
+    unsigned long evtchn_pending_sel;
+    struct arch_vcpu_info arch;
+#ifdef CONFIG_PARAVIRT_XEN
+    struct pvclock_vcpu_time_info time;
+#else
+    struct vcpu_time_info time;
+#endif
 }; /* 64 bytes (x86) */
+#ifndef __XEN__
+typedef struct vcpu_info vcpu_info_t;
+#endif

 /*
  * Xen/kernel shared data -- pointer provided in start_info.
- * NB. We expect that this struct is smaller than a page.
+ *
+ * This structure is defined to be both smaller than a page, and the
+ * only data on the shared page, but may vary in actual size even within
+ * compatible Xen versions; guests should not rely on the size
+ * of this structure remaining constant.
  */
 struct shared_info {
-	struct vcpu_info vcpu_info[MAX_VIRT_CPUS];
+    struct vcpu_info vcpu_info[XEN_LEGACY_MAX_VCPUS];

-	/*
-	 * A domain can create "event channels" on which it can send and receive
-	 * asynchronous event notifications. There are three classes of event that
-	 * are delivered by this mechanism:
-	 *  1. Bi-directional inter- and intra-domain connections. Domains must
-	 *     arrange out-of-band to set up a connection (usually by allocating
-	 *     an unbound 'listener' port and avertising that via a storage service
-	 *     such as xenstore).
-	 *  2. Physical interrupts. A domain with suitable hardware-access
-	 *     privileges can bind an event-channel port to a physical interrupt
-	 *     source.
-	 *  3. Virtual interrupts ('events'). A domain can bind an event-channel
-	 *     port to a virtual interrupt source, such as the virtual-timer
-	 *     device or the emergency console.
-	 *
-	 * Event channels are addressed by a "port index". Each channel is
-	 * associated with two bits of information:
-	 *  1. PENDING -- notifies the domain that there is a pending notification
-	 *     to be processed. This bit is cleared by the guest.
-	 *  2. MASK -- if this bit is clear then a 0->1 transition of PENDING
-	 *     will cause an asynchronous upcall to be scheduled. This bit is only
-	 *     updated by the guest. It is read-only within Xen. If a channel
-	 *     becomes pending while the channel is masked then the 'edge' is lost
-	 *     (i.e., when the channel is unmasked, the guest must manually handle
-	 *     pending notifications as no upcall will be scheduled by Xen).
-	 *
-	 * To expedite scanning of pending notifications, any 0->1 pending
-	 * transition on an unmasked channel causes a corresponding bit in a
-	 * per-vcpu selector word to be set. Each bit in the selector covers a
-	 * 'C long' in the PENDING bitfield array.
-	 */
-	unsigned long evtchn_pending[sizeof(unsigned long) * 8];
-	unsigned long evtchn_mask[sizeof(unsigned long) * 8];
-
-	/*
-	 * Wallclock time: updated only by control software. Guests should base
-	 * their gettimeofday() syscall on this wallclock-base value.
-	 */
-	struct pvclock_wall_clock wc;
+    /*
+     * A domain can create "event channels" on which it can send and receive
+     * asynchronous event notifications. There are three classes of event that
+     * are delivered by this mechanism:
+     *  1. Bi-directional inter- and intra-domain connections. Domains must
+     *     arrange out-of-band to set up a connection (usually by allocating
+     *     an unbound 'listener' port and avertising that via a storage service
+     *     such as xenstore).
+     *  2. Physical interrupts. A domain with suitable hardware-access
+     *     privileges can bind an event-channel port to a physical interrupt
+     *     source.
+     *  3. Virtual interrupts ('events'). A domain can bind an event-channel
+     *     port to a virtual interrupt source, such as the virtual-timer
+     *     device or the emergency console.
+     *
+     * Event channels are addressed by a "port index". Each channel is
+     * associated with two bits of information:
+     *  1. PENDING -- notifies the domain that there is a pending notification
+     *     to be processed. This bit is cleared by the guest.
+     *  2. MASK -- if this bit is clear then a 0->1 transition of PENDING
+     *     will cause an asynchronous upcall to be scheduled. This bit is only
+     *     updated by the guest. It is read-only within Xen. If a channel
+     *     becomes pending while the channel is masked then the 'edge' is lost
+     *     (i.e., when the channel is unmasked, the guest must manually handle
+     *     pending notifications as no upcall will be scheduled by Xen).
+     *
+     * To expedite scanning of pending notifications, any 0->1 pending
+     * transition on an unmasked channel causes a corresponding bit in a
+     * per-vcpu selector word to be set. Each bit in the selector covers a
+     * 'C long' in the PENDING bitfield array.
+     */
+    unsigned long evtchn_pending[sizeof(unsigned long) * 8];
+    unsigned long evtchn_mask[sizeof(unsigned long) * 8];
+
+    /*
+     * Wallclock time: updated only by control software. Guests should base
+     * their gettimeofday() syscall on this wallclock-base value.
+     */
+#ifdef CONFIG_PARAVIRT_XEN
+    struct pvclock_wall_clock wc;
+#else
+    uint32_t wc_version;      /* Version counter: see vcpu_time_info_t. */
+    uint32_t wc_sec;          /* Secs  00:00:00 UTC, Jan 1, 1970.  */
+    uint32_t wc_nsec;         /* Nsecs 00:00:00 UTC, Jan 1, 1970.  */
+#endif

-	struct arch_shared_info arch;
+    struct arch_shared_info arch;

 };
+#ifndef __XEN__
+typedef struct shared_info shared_info_t;
+#endif

 /*
- * Start-of-day memory layout for the initial domain (DOM0):
+ * Start-of-day memory layout:
  *  1. The domain is started within contiguous virtual-memory region.
- *  2. The contiguous region begins and ends on an aligned 4MB boundary.
- *  3. The region start corresponds to the load address of the OS image.
- *     If the load address is not 4MB aligned then the address is rounded down.
- *  4. This the order of bootstrap elements in the initial virtual region:
+ *  2. The contiguous region ends on an aligned 4MB boundary.
+ *  3. This the order of bootstrap elements in the initial virtual region:
  *      a. relocated kernel image
  *      b. initial ram disk              [mod_start, mod_len]
  *      c. list of allocated page frames [mfn_list, nr_pages]
+ *         (unless relocated due to XEN_ELFNOTE_INIT_P2M)
  *      d. start_info_t structure        [register ESI (x86)]
  *      e. bootstrap page tables         [pt_base, CR3 (x86)]
  *      f. bootstrap stack               [register ESP (x86)]
- *  5. Bootstrap elements are packed together, but each is 4kB-aligned.
- *  6. The initial ram disk may be omitted.
- *  7. The list of page frames forms a contiguous 'pseudo-physical' memory
+ *  4. Bootstrap elements are packed together, but each is 4kB-aligned.
+ *  5. The initial ram disk may be omitted.
+ *  6. The list of page frames forms a contiguous 'pseudo-physical' memory
  *     layout for the domain. In particular, the bootstrap virtual-memory
  *     region is a 1:1 mapping to the first section of the pseudo-physical map.
- *  8. All bootstrap elements are mapped read-writable for the guest OS. The
+ *  7. All bootstrap elements are mapped read-writable for the guest OS. The
  *     only exception is the bootstrap page table, which is mapped read-only.
- *  9. There is guaranteed to be at least 512kB padding after the final
+ *  8. There is guaranteed to be at least 512kB padding after the final
  *     bootstrap element. If necessary, the bootstrap virtual region is
  *     extended by an extra 4MB to ensure this.
  */

 #define MAX_GUEST_CMDLINE 1024
 struct start_info {
-	/* THE FOLLOWING ARE FILLED IN BOTH ON INITIAL BOOT AND ON RESUME.    */
-	char magic[32];             /* "xen-<version>-<platform>".            */
-	unsigned long nr_pages;     /* Total pages allocated to this domain.  */
-	unsigned long shared_info;  /* MACHINE address of shared info struct. */
-	uint32_t flags;             /* SIF_xxx flags.                         */
-	unsigned long store_mfn;    /* MACHINE page number of shared page.    */
-	uint32_t store_evtchn;      /* Event channel for store communication. */
-	union {
-		struct {
-			unsigned long mfn;  /* MACHINE page number of console page.   */
-			uint32_t  evtchn;   /* Event channel for console page.        */
-		} domU;
-		struct {
-			uint32_t info_off;  /* Offset of console_info struct.         */
-			uint32_t info_size; /* Size of console_info struct from start.*/
-		} dom0;
-	} console;
-	/* THE FOLLOWING ARE ONLY FILLED IN ON INITIAL BOOT (NOT RESUME).     */
-	unsigned long pt_base;      /* VIRTUAL address of page directory.     */
-	unsigned long nr_pt_frames; /* Number of bootstrap p.t. frames.       */
-	unsigned long mfn_list;     /* VIRTUAL address of page-frame list.    */
-	unsigned long mod_start;    /* VIRTUAL address of pre-loaded module.  */
-	unsigned long mod_len;      /* Size (bytes) of pre-loaded module.     */
-	int8_t cmd_line[MAX_GUEST_CMDLINE];
+    /* THE FOLLOWING ARE FILLED IN BOTH ON INITIAL BOOT AND ON RESUME.    */
+    char magic[32];             /* "xen-<version>-<platform>".            */
+    unsigned long nr_pages;     /* Total pages allocated to this domain.  */
+    unsigned long shared_info;  /* MACHINE address of shared info struct. */
+    uint32_t flags;             /* SIF_xxx flags.                         */
+    xen_pfn_t store_mfn;        /* MACHINE page number of shared page.    */
+    uint32_t store_evtchn;      /* Event channel for store communication. */
+    union {
+        struct {
+            xen_pfn_t mfn;      /* MACHINE page number of console page.   */
+            uint32_t  evtchn;   /* Event channel for console page.        */
+        } domU;
+        struct {
+            uint32_t info_off;  /* Offset of console_info struct.         */
+            uint32_t info_size; /* Size of console_info struct from start.*/
+        } dom0;
+    } console;
+    /* THE FOLLOWING ARE ONLY FILLED IN ON INITIAL BOOT (NOT RESUME).     */
+    unsigned long pt_base;      /* VIRTUAL address of page directory.     */
+    unsigned long nr_pt_frames; /* Number of bootstrap p.t. frames.       */
+    unsigned long mfn_list;     /* VIRTUAL address of page-frame list.    */
+    unsigned long mod_start;    /* VIRTUAL address of pre-loaded module.  */
+    unsigned long mod_len;      /* Size (bytes) of pre-loaded module.     */
+    int8_t cmd_line[MAX_GUEST_CMDLINE];
+    /* The pfn range here covers both page table and p->m table frames.   */
+    unsigned long first_p2m_pfn;/* 1st pfn forming initial P->M table.    */
+    unsigned long nr_p2m_frames;/* # of pfns forming initial P->M table.  */
 };
+typedef struct start_info start_info_t;
+
+/* New console union for dom0 introduced in 0x00030203. */
+#if __XEN_INTERFACE_VERSION__ < 0x00030203
+#define console_mfn    console.domU.mfn
+#define console_evtchn console.domU.evtchn
+#endif

 /* These flags are passed in the 'flags' field of start_info_t. */
 #define SIF_PRIVILEGED    (1<<0)  /* Is the domain privileged? */
 #define SIF_INITDOMAIN    (1<<1)  /* Is this the initial control domain? */
+#define SIF_MULTIBOOT_MOD (1<<2)  /* Is mod_start a multiboot module? */
+#define SIF_PM_MASK       (0xFF<<8) /* reserve 1 byte for xen-pm options */

-typedef uint64_t cpumap_t;
+/*
+ * A multiboot module is a package containing modules very similar to a
+ * multiboot module array. The only differences are:
+ * - the array of module descriptors is by convention simply at the beginning
+ *   of the multiboot module,
+ * - addresses in the module descriptors are based on the beginning of the
+ *   multiboot module,
+ * - the number of modules is determined by a termination descriptor that has
+ *   mod_start == 0.
+ *
+ * This permits to both build it statically and reference it in a configuration
+ * file, and let the PV guest easily rebase the addresses to virtual addresses
+ * and at the same time count the number of modules.
+ */
+struct xen_multiboot_mod_list
+{
+    /* Address of first byte of the module */
+    uint32_t mod_start;
+    /* Address of last byte of the module (inclusive) */
+    uint32_t mod_end;
+    /* Address of zero-terminated command line */
+    uint32_t cmdline;
+    /* Unused, must be zero */
+    uint32_t pad;
+};
+
+typedef struct dom0_vga_console_info {
+    uint8_t video_type; /* DOM0_VGA_CONSOLE_??? */
+#define XEN_VGATYPE_TEXT_MODE_3 0x03
+#define XEN_VGATYPE_VESA_LFB    0x23
+
+    union {
+        struct {
+            /* Font height, in pixels. */
+            uint16_t font_height;
+            /* Cursor location (column, row). */
+            uint16_t cursor_x, cursor_y;
+            /* Number of rows and columns (dimensions in characters). */
+            uint16_t rows, columns;
+        } text_mode_3;
+
+        struct {
+            /* Width and height, in pixels. */
+            uint16_t width, height;
+            /* Bytes per scan line. */
+            uint16_t bytes_per_line;
+            /* Bits per pixel. */
+            uint16_t bits_per_pixel;
+            /* LFB physical address, and size (in units of 64kB). */
+            uint32_t lfb_base;
+            uint32_t lfb_size;
+            /* RGB mask offsets and sizes, as defined by VBE 1.2+ */
+            uint8_t  red_pos, red_size;
+            uint8_t  green_pos, green_size;
+            uint8_t  blue_pos, blue_size;
+            uint8_t  rsvd_pos, rsvd_size;
+#if __XEN_INTERFACE_VERSION__ >= 0x00030206
+            /* VESA capabilities (offset 0xa, VESA command 0x4f00). */
+            uint32_t gbl_caps;
+            /* Mode attributes (offset 0x0, VESA command 0x4f01). */
+            uint16_t mode_attrs;
+#endif
+        } vesa_lfb;
+    } u;
+} dom0_vga_console_info_t;
+#define xen_vga_console_info dom0_vga_console_info
+#define xen_vga_console_info_t dom0_vga_console_info_t

 typedef uint8_t xen_domain_handle_t[16];

@@ -461,6 +684,11 @@ typedef uint8_t xen_domain_handle_t[16];
 #define __mk_unsigned_long(x) x ## UL
 #define mk_unsigned_long(x) __mk_unsigned_long(x)

+__DEFINE_XEN_GUEST_HANDLE(uint8,  uint8_t);
+__DEFINE_XEN_GUEST_HANDLE(uint16, uint16_t);
+__DEFINE_XEN_GUEST_HANDLE(uint32, uint32_t);
+__DEFINE_XEN_GUEST_HANDLE(uint64, uint64_t);
+
 #else /* __ASSEMBLY__ */

 /* In assembly code we cannot use C numeric constant suffixes. */
@@ -468,4 +696,14 @@ typedef uint8_t xen_domain_handle_t[16];

 #endif /* !__ASSEMBLY__ */

+/* Default definitions for macros used by domctl/sysctl. */
+#if defined(__XEN__) || defined(__XEN_TOOLS__)
+#ifndef uint64_aligned_t
+#define uint64_aligned_t uint64_t
+#endif
+#ifndef XEN_GUEST_HANDLE_64
+#define XEN_GUEST_HANDLE_64(name) XEN_GUEST_HANDLE(name)
+#endif
+#endif
+
 #endif /* __XEN_PUBLIC_XEN_H__ */
--- head-2010-05-12.orig/include/xen/xenbus.h	2010-05-12 08:55:24.000000000 +0200
+++ head-2010-05-12/include/xen/xenbus.h	2010-01-19 16:01:04.000000000 +0100
@@ -39,7 +39,7 @@
 #include <linux/mutex.h>
 #include <linux/completion.h>
 #include <linux/init.h>
-#include <linux/slab.h>
+#include <linux/err.h>
 #include <xen/interface/xen.h>
 #include <xen/interface/grant_table.h>
 #include <xen/interface/io/xenbus.h>
@@ -56,8 +56,17 @@ struct xenbus_watch
 	/* Callback (executed in a process context with no locks held). */
 	void (*callback)(struct xenbus_watch *,
 			 const char **vec, unsigned int len);
+
+	/* See XBWF_ definitions below. */
+	unsigned long flags;
 };

+/*
+ * Execute callback in its own kthread. Useful if the callback is long
+ * running or heavily serialised, to avoid taking out the main xenwatch thread
+ * for a long period of time (or even unwittingly causing a deadlock).
+ */
+#define XBWF_new_thread	1

 /* A xenbus device. */
 struct xenbus_device {
@@ -92,7 +101,8 @@ struct xenbus_driver {
 	void (*otherend_changed)(struct xenbus_device *dev,
 				 enum xenbus_state backend_state);
 	int (*remove)(struct xenbus_device *dev);
-	int (*suspend)(struct xenbus_device *dev, pm_message_t state);
+	int (*suspend)(struct xenbus_device *dev);
+	int (*suspend_cancel)(struct xenbus_device *dev);
 	int (*resume)(struct xenbus_device *dev);
 	int (*uevent)(struct xenbus_device *, char **, int, char *, int);
 	struct device_driver driver;
@@ -105,27 +115,8 @@ static inline struct xenbus_driver *to_x
 	return container_of(drv, struct xenbus_driver, driver);
 }

-int __must_check __xenbus_register_frontend(struct xenbus_driver *drv,
-					    struct module *owner,
-					    const char *mod_name);
-
-static inline int __must_check
-xenbus_register_frontend(struct xenbus_driver *drv)
-{
-	WARN_ON(drv->owner != THIS_MODULE);
-	return __xenbus_register_frontend(drv, THIS_MODULE, KBUILD_MODNAME);
-}
-
-int __must_check __xenbus_register_backend(struct xenbus_driver *drv,
-					   struct module *owner,
-					   const char *mod_name);
-static inline int __must_check
-xenbus_register_backend(struct xenbus_driver *drv)
-{
-	WARN_ON(drv->owner != THIS_MODULE);
-	return __xenbus_register_backend(drv, THIS_MODULE, KBUILD_MODNAME);
-}
-
+int xenbus_register_frontend(struct xenbus_driver *drv);
+int xenbus_register_backend(struct xenbus_driver *drv);
 void xenbus_unregister_driver(struct xenbus_driver *drv);

 struct xenbus_transaction
@@ -165,7 +156,6 @@ int xenbus_printf(struct xenbus_transact
 int xenbus_gather(struct xenbus_transaction t, const char *dir, ...);

 /* notifer routines for when the xenstore comes up */
-extern int xenstored_ready;
 int register_xenstore_notifier(struct notifier_block *nb);
 void unregister_xenstore_notifier(struct notifier_block *nb);

@@ -178,12 +168,9 @@ void xs_suspend_cancel(void);
 /* Used by xenbus_dev to borrow kernel's store connection. */
 void *xenbus_dev_request_and_reply(struct xsd_sockmsg *msg);

-struct work_struct;
-
 /* Prepare for domain suspend: then resume or cancel the suspend. */
 void xenbus_suspend(void);
 void xenbus_resume(void);
-void xenbus_probe(struct work_struct *);
 void xenbus_suspend_cancel(void);

 #define XENBUS_IS_ERR_READ(str) ({			\
@@ -196,38 +183,125 @@ void xenbus_suspend_cancel(void);

 #define XENBUS_EXIST_ERR(err) ((err) == -ENOENT || (err) == -ERANGE)

+
+/**
+ * Register a watch on the given path, using the given xenbus_watch structure
+ * for storage, and the given callback function as the callback.  Return 0 on
+ * success, or -errno on error.  On success, the given path will be saved as
+ * watch->node, and remains the caller's to free.  On error, watch->node will
+ * be NULL, the device will switch to XenbusStateClosing, and the error will
+ * be saved in the store.
+ */
 int xenbus_watch_path(struct xenbus_device *dev, const char *path,
 		      struct xenbus_watch *watch,
 		      void (*callback)(struct xenbus_watch *,
 				       const char **, unsigned int));
-int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch,
-			 void (*callback)(struct xenbus_watch *,
-					  const char **, unsigned int),
-			 const char *pathfmt, ...)
-	__attribute__ ((format (printf, 4, 5)));

+
+/**
+ * Register a watch on the given path/path2, using the given xenbus_watch
+ * structure for storage, and the given callback function as the callback.
+ * Return 0 on success, or -errno on error.  On success, the watched path
+ * (path/path2) will be saved as watch->node, and becomes the caller's to
+ * kfree().  On error, watch->node will be NULL, so the caller has nothing to
+ * free, the device will switch to XenbusStateClosing, and the error will be
+ * saved in the store.
+ */
+int xenbus_watch_path2(struct xenbus_device *dev, const char *path,
+		       const char *path2, struct xenbus_watch *watch,
+		       void (*callback)(struct xenbus_watch *,
+					const char **, unsigned int));
+
+
+/**
+ * Advertise in the store a change of the given driver to the given new_state.
+ * Return 0 on success, or -errno on error.  On error, the device will switch
+ * to XenbusStateClosing, and the error will be saved in the store.
+ */
 int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state new_state);
+
+
+/**
+ * Grant access to the given ring_mfn to the peer of the given device.  Return
+ * 0 on success, or -errno on error.  On error, the device will switch to
+ * XenbusStateClosing, and the error will be saved in the store.
+ */
 int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn);
-int xenbus_map_ring_valloc(struct xenbus_device *dev,
-			   int gnt_ref, void **vaddr);
+
+
+/**
+ * Map a page of memory into this domain from another domain's grant table.
+ * xenbus_map_ring_valloc allocates a page of virtual address space, maps the
+ * page to that address, and sets *vaddr to that address.
+ * xenbus_map_ring does not allocate the virtual address space (you must do
+ * this yourself!). It only maps in the page to the specified address.
+ * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h)
+ * or -ENOMEM on error. If an error is returned, device will switch to
+ * XenbusStateClosing and the error message will be saved in XenStore.
+ */
+struct vm_struct *xenbus_map_ring_valloc(struct xenbus_device *dev,
+					 int gnt_ref);
 int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
 			   grant_handle_t *handle, void *vaddr);

-int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr);
+
+/**
+ * Unmap a page of memory in this domain that was imported from another domain.
+ * Use xenbus_unmap_ring_vfree if you mapped in your memory with
+ * xenbus_map_ring_valloc (it will free the virtual address space).
+ * Returns 0 on success and returns GNTST_* on error
+ * (see xen/include/interface/grant_table.h).
+ */
+int xenbus_unmap_ring_vfree(struct xenbus_device *dev, struct vm_struct *);
 int xenbus_unmap_ring(struct xenbus_device *dev,
 		      grant_handle_t handle, void *vaddr);

+
+/**
+ * Allocate an event channel for the given xenbus_device, assigning the newly
+ * created local port to *port.  Return 0 on success, or -errno on error.  On
+ * error, the device will switch to XenbusStateClosing, and the error will be
+ * saved in the store.
+ */
 int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port);
-int xenbus_bind_evtchn(struct xenbus_device *dev, int remote_port, int *port);
+
+
+/**
+ * Free an existing event channel. Returns 0 on success or -errno on error.
+ */
 int xenbus_free_evtchn(struct xenbus_device *dev, int port);

+
+/**
+ * Return the state of the driver rooted at the given store path, or
+ * XenbusStateUnknown if no state can be read.
+ */
 enum xenbus_state xenbus_read_driver_state(const char *path);

-void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt, ...);
-void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, ...);
+
+/***
+ * Report the given negative errno into the store, along with the given
+ * formatted message.
+ */
+void xenbus_dev_error(struct xenbus_device *dev, int err, const char *fmt,
+		      ...);
+
+
+/***
+ * Equivalent to xenbus_dev_error(dev, err, fmt, args), followed by
+ * xenbus_switch_state(dev, NULL, XenbusStateClosing) to schedule an orderly
+ * closedown of this driver and its peer.
+ */
+void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt,
+		      ...);
+
+int xenbus_dev_init(void);

 const char *xenbus_strstate(enum xenbus_state state);
 int xenbus_dev_is_online(struct xenbus_device *dev);
 int xenbus_frontend_closed(struct xenbus_device *dev);

+int xenbus_for_each_backend(void *arg, int (*fn)(struct device *, void *));
+int xenbus_for_each_frontend(void *arg, int (*fn)(struct device *, void *));
+
 #endif /* _XEN_XENBUS_H */