You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
qubes-linux-kernel/patches.xen/xen-x86-msr-on-pcpu

823 lines
22 KiB

From: jbeulich@novell.com
Subject: introduce {rd,wr}msr_safe_on_pcpu() and add/enable users
Patch-mainline: n/a
--- head-2011-03-11.orig/arch/x86/Kconfig 2011-02-02 15:10:34.000000000 +0100
+++ head-2011-03-11/arch/x86/Kconfig 2011-02-03 14:42:26.000000000 +0100
@@ -1048,6 +1048,7 @@ config MICROCODE_OLD_INTERFACE
config X86_MSR
tristate "/dev/cpu/*/msr - Model-specific register support"
+ select XEN_DOMCTL if XEN_PRIVILEGED_GUEST
---help---
This device gives privileged processes access to the x86
Model-Specific Registers (MSRs). It is a character device with
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ head-2011-03-11/arch/x86/kernel/msr-xen.c 2011-02-03 14:42:26.000000000 +0100
@@ -0,0 +1,339 @@
+#ifndef CONFIG_XEN_PRIVILEGED_GUEST
+#include "msr.c"
+#else
+/* ----------------------------------------------------------------------- *
+ *
+ * Copyright 2010 Novell, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 675 Mass Ave, Cambridge MA 02139,
+ * USA; either version 2 of the License, or (at your option) any later
+ * version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * x86 MSR access device
+ *
+ * This device is accessed by lseek() to the appropriate register number
+ * and then read/write in chunks of 8 bytes. A larger size means multiple
+ * reads or writes of the same register.
+ *
+ * This driver uses /dev/xen/cpu/%d/msr where %d correlates to the minor
+ * number, and on an SMP box will direct the access to pCPU %d.
+ */
+
+static int msr_init(void);
+static void msr_exit(void);
+
+#define msr_init(args...) _msr_init(args)
+#define msr_exit(args...) _msr_exit(args)
+#include "msr.c"
+#undef msr_exit
+#undef msr_init
+
+#include <linux/slab.h>
+#include <xen/pcpu.h>
+
+static struct class *pmsr_class;
+static unsigned int minor_bias = 10;
+static unsigned int nr_xen_cpu_ids;
+static unsigned long *xen_cpu_online_map;
+
+#define PMSR_DEV(cpu) MKDEV(MSR_MAJOR, (cpu) + minor_bias)
+
+static unsigned int pmsr_minor(struct inode *inode)
+{
+ return iminor(inode) - minor_bias;
+}
+
+static ssize_t pmsr_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ u32 __user *tmp = (u32 __user *) buf;
+ u32 data[2];
+ u32 reg = *ppos;
+ unsigned int cpu = pmsr_minor(file->f_path.dentry->d_inode);
+ int err = 0;
+ ssize_t bytes = 0;
+
+ if (count % 8)
+ return -EINVAL; /* Invalid chunk size */
+
+ for (; count; count -= 8) {
+ err = rdmsr_safe_on_pcpu(cpu, reg, &data[0], &data[1]);
+ if (err)
+ break;
+ if (copy_to_user(tmp, &data, 8)) {
+ err = -EFAULT;
+ break;
+ }
+ tmp += 2;
+ bytes += 8;
+ }
+
+ return bytes ? bytes : err;
+}
+
+static ssize_t pmsr_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ const u32 __user *tmp = (const u32 __user *)buf;
+ u32 data[2];
+ u32 reg = *ppos;
+ unsigned int cpu = pmsr_minor(file->f_path.dentry->d_inode);
+ int err = 0;
+ ssize_t bytes = 0;
+
+ if (count % 8)
+ return -EINVAL; /* Invalid chunk size */
+
+ for (; count; count -= 8) {
+ if (copy_from_user(&data, tmp, 8)) {
+ err = -EFAULT;
+ break;
+ }
+ err = wrmsr_safe_on_pcpu(cpu, reg, data[0], data[1]);
+ if (err)
+ break;
+ tmp += 2;
+ bytes += 8;
+ }
+
+ return bytes ? bytes : err;
+}
+
+static long pmsr_ioctl(struct file *file, unsigned int ioc, unsigned long arg)
+{
+ u32 __user *uregs = (u32 __user *)arg;
+ u32 regs[8];
+ unsigned int cpu = pmsr_minor(file->f_path.dentry->d_inode);
+ int err;
+
+ switch (ioc) {
+ case X86_IOC_RDMSR_REGS:
+ if (!(file->f_mode & FMODE_READ)) {
+ err = -EBADF;
+ break;
+ }
+ if (copy_from_user(&regs, uregs, sizeof regs)) {
+ err = -EFAULT;
+ break;
+ }
+ err = rdmsr_safe_regs_on_pcpu(cpu, regs);
+ if (err)
+ break;
+ if (copy_to_user(uregs, &regs, sizeof regs))
+ err = -EFAULT;
+ break;
+
+ case X86_IOC_WRMSR_REGS:
+ if (!(file->f_mode & FMODE_WRITE)) {
+ err = -EBADF;
+ break;
+ }
+ if (copy_from_user(&regs, uregs, sizeof regs)) {
+ err = -EFAULT;
+ break;
+ }
+ err = wrmsr_safe_regs_on_pcpu(cpu, regs);
+ if (err)
+ break;
+ if (copy_to_user(uregs, &regs, sizeof regs))
+ err = -EFAULT;
+ break;
+
+ default:
+ err = -ENOTTY;
+ break;
+ }
+
+ return err;
+}
+
+static int pmsr_open(struct inode *inode, struct file *file)
+{
+ unsigned int cpu;
+
+ cpu = pmsr_minor(file->f_path.dentry->d_inode);
+ if (cpu >= nr_xen_cpu_ids || !test_bit(cpu, xen_cpu_online_map))
+ return -ENXIO; /* No such CPU */
+
+ return 0;
+}
+
+/*
+ * File operations we support
+ */
+static const struct file_operations pmsr_fops = {
+ .owner = THIS_MODULE,
+ .llseek = msr_seek,
+ .read = pmsr_read,
+ .write = pmsr_write,
+ .open = pmsr_open,
+ .unlocked_ioctl = pmsr_ioctl,
+ .compat_ioctl = pmsr_ioctl,
+};
+
+static int pmsr_device_create(unsigned int cpu)
+{
+ struct device *dev;
+
+ if (cpu >= nr_xen_cpu_ids) {
+ static bool warned;
+ unsigned long *map;
+
+ if ((minor_bias + cpu) >> MINORBITS) {
+ if (!warned) {
+ warned = true;
+ pr_warning("Physical MSRs of CPUs beyond %u"
+ " will not be accessible\n",
+ MINORMASK - minor_bias);
+ }
+ return -EDOM;
+ }
+
+ map = kzalloc(BITS_TO_LONGS(cpu + 1) * sizeof(*map),
+ GFP_KERNEL);
+ if (!map) {
+ if (!warned) {
+ warned = true;
+ pr_warning("Physical MSRs of CPUs beyond %u"
+ " may not be accessible\n",
+ nr_xen_cpu_ids - 1);
+ }
+ return -ENOMEM;
+ }
+
+ memcpy(map, xen_cpu_online_map,
+ BITS_TO_LONGS(nr_xen_cpu_ids)
+ * sizeof(*xen_cpu_online_map));
+ nr_xen_cpu_ids = min_t(unsigned int,
+ BITS_TO_LONGS(cpu + 1) * BITS_PER_LONG,
+ MINORMASK + 1 - minor_bias);
+ kfree(xchg(&xen_cpu_online_map, map));
+ }
+ set_bit(cpu, xen_cpu_online_map);
+ dev = device_create(pmsr_class, NULL, PMSR_DEV(cpu), NULL,
+ "pmsr%d", cpu);
+ return IS_ERR(dev) ? PTR_ERR(dev) : 0;
+}
+
+static void pmsr_device_destroy(unsigned int cpu)
+{
+ clear_bit(cpu, xen_cpu_online_map);
+ device_destroy(pmsr_class, PMSR_DEV(cpu));
+}
+
+static int pmsr_cpu_callback(struct notifier_block *nfb,
+ unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (unsigned long)hcpu;
+
+ switch (action) {
+ case CPU_ONLINE:
+ pmsr_device_create(cpu);
+ break;
+ case CPU_DEAD:
+ pmsr_device_destroy(cpu);
+ break;
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block pmsr_cpu_notifier = {
+ .notifier_call = pmsr_cpu_callback,
+};
+
+static char *pmsr_devnode(struct device *dev, mode_t *mode)
+{
+ return kasprintf(GFP_KERNEL, "xen/cpu/%u/msr",
+ MINOR(dev->devt) - minor_bias);
+}
+
+static int __init msr_init(void)
+{
+ int err;
+ xen_platform_op_t op = {
+ .cmd = XENPF_get_cpuinfo,
+ .interface_version = XENPF_INTERFACE_VERSION,
+ .u.pcpu_info.xen_cpuid = 0
+ };
+
+ err = _msr_init();
+ if (err || !is_initial_xendomain())
+ return err;
+
+ do {
+ err = HYPERVISOR_platform_op(&op);
+ } while (err == -EBUSY);
+ if (err)
+ goto out;
+ nr_xen_cpu_ids = BITS_TO_LONGS(op.u.pcpu_info.max_present + 1)
+ * BITS_PER_LONG;
+
+ while (minor_bias < NR_CPUS)
+ minor_bias *= 10;
+ if ((minor_bias + nr_xen_cpu_ids - 1) >> MINORBITS)
+ minor_bias = NR_CPUS;
+ if ((minor_bias + nr_xen_cpu_ids - 1) >> MINORBITS)
+ nr_xen_cpu_ids = MINORMASK + 1 - NR_CPUS;
+
+ xen_cpu_online_map = kzalloc(BITS_TO_LONGS(nr_xen_cpu_ids)
+ * sizeof(*xen_cpu_online_map),
+ GFP_KERNEL);
+ if (!xen_cpu_online_map) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ if (__register_chrdev(MSR_MAJOR, minor_bias,
+ MINORMASK + 1 - minor_bias,
+ "pcpu/msr", &pmsr_fops)) {
+ pr_err("msr: unable to get minors for pmsr\n");
+ goto out;
+ }
+ pmsr_class = class_create(THIS_MODULE, "pmsr");
+ if (IS_ERR(pmsr_class)) {
+ err = PTR_ERR(pmsr_class);
+ goto out_chrdev;
+ }
+ pmsr_class->devnode = pmsr_devnode;
+ err = register_pcpu_notifier(&pmsr_cpu_notifier);
+
+ if (!err && !nr_xen_cpu_ids)
+ err = -ENODEV;
+ if (!err)
+ return 0;
+
+ class_destroy(pmsr_class);
+
+out_chrdev:
+ __unregister_chrdev(MSR_MAJOR, minor_bias,
+ MINORMASK + 1 - minor_bias, "pcpu/msr");
+out:
+ if (err)
+ pr_warning("msr: can't initialize physical MSR access (%d)\n",
+ err);
+ nr_xen_cpu_ids = 0;
+ kfree(xen_cpu_online_map);
+ return 0;
+}
+
+static void __exit msr_exit(void)
+{
+ if (nr_xen_cpu_ids) {
+ unsigned int cpu = 0;
+
+ unregister_pcpu_notifier(&pmsr_cpu_notifier);
+ for_each_set_bit(cpu, xen_cpu_online_map, nr_xen_cpu_ids)
+ msr_device_destroy(cpu);
+ class_destroy(pmsr_class);
+ __unregister_chrdev(MSR_MAJOR, minor_bias,
+ MINORMASK + 1 - minor_bias, "pcpu/msr");
+ kfree(xen_cpu_online_map);
+ }
+ _msr_exit();
+}
+#endif /* CONFIG_XEN_PRIVILEGED_GUEST */
--- head-2011-03-11.orig/drivers/hwmon/Kconfig 2011-02-01 15:04:27.000000000 +0100
+++ head-2011-03-11/drivers/hwmon/Kconfig 2011-03-11 11:17:24.000000000 +0100
@@ -392,7 +392,8 @@ config SENSORS_GPIO_FAN
config SENSORS_CORETEMP
tristate "Intel Core/Core2/Atom temperature sensor"
- depends on X86 && PCI && !XEN && EXPERIMENTAL
+ depends on X86 && PCI && !XEN_UNPRIVILEGED_GUEST && EXPERIMENTAL
+ select XEN_DOMCTL if XEN
help
If you say yes here you get support for the temperature
sensor inside your CPU. Most of the family 6 CPUs
@@ -400,7 +401,8 @@ config SENSORS_CORETEMP
config SENSORS_PKGTEMP
tristate "Intel processor package temperature sensor"
- depends on X86 && !XEN && EXPERIMENTAL
+ depends on X86 && !XEN_UNPRIVILEGED_GUEST && EXPERIMENTAL
+ select XEN_DOMCTL if XEN
help
If you say yes here you get support for the package level temperature
sensor inside your CPU. Check documentation/driver for details.
@@ -943,7 +945,8 @@ config SENSORS_TMP421
config SENSORS_VIA_CPUTEMP
tristate "VIA CPU temperature sensor"
- depends on X86 && !XEN
+ depends on X86 && !XEN_UNPRIVILEGED_GUEST
+ select XEN_DOMCTL if XEN
help
If you say yes here you get support for the temperature
sensor inside your CPU. Supported are all known variants of
--- head-2011-03-11.orig/drivers/xen/core/Makefile 2011-02-02 15:10:34.000000000 +0100
+++ head-2011-03-11/drivers/xen/core/Makefile 2011-02-03 14:42:26.000000000 +0100
@@ -5,8 +5,7 @@
obj-y := evtchn.o gnttab.o reboot.o machine_reboot.o
priv-$(CONFIG_PCI) += pci.o
-priv-$(CONFIG_ACPI_HOTPLUG_CPU) += pcpu.o
-obj-$(CONFIG_XEN_PRIVILEGED_GUEST) += firmware.o $(priv-y)
+obj-$(CONFIG_XEN_PRIVILEGED_GUEST) += firmware.o pcpu.o $(priv-y)
obj-$(CONFIG_PROC_FS) += xen_proc.o
obj-$(CONFIG_SYS_HYPERVISOR) += hypervisor_sysfs.o
obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
@@ -17,4 +16,4 @@ obj-$(CONFIG_KEXEC) += machine_kexec.o
obj-$(CONFIG_GENERIC_CLOCKEVENTS) += clockevents.o
obj-$(CONFIG_XEN_DOMCTL) += domctl.o
CFLAGS_domctl.o := -D__XEN_PUBLIC_XEN_H__ -D__XEN_PUBLIC_GRANT_TABLE_H__
-CFLAGS_domctl.o += -D__XEN_TOOLS__ -imacros xen/interface/domctl.h
+CFLAGS_domctl.o += -D__XEN_TOOLS__ -imacros xen/interface/domctl.h -imacros xen/interface/sysctl.h
--- head-2011-03-11.orig/drivers/xen/core/domctl.c 2010-11-23 16:20:20.000000000 +0100
+++ head-2011-03-11/drivers/xen/core/domctl.c 2011-02-03 14:42:26.000000000 +0100
@@ -92,6 +92,110 @@ union xen_domctl {
} v5, v6, v7;
};
+struct xen_sysctl_physinfo_v6 {
+ uint32_t threads_per_core;
+ uint32_t cores_per_socket;
+ uint32_t nr_cpus;
+ uint32_t nr_nodes;
+ uint32_t cpu_khz;
+ uint64_aligned_t total_pages;
+ uint64_aligned_t free_pages;
+ uint64_aligned_t scrub_pages;
+ uint32_t hw_cap[8];
+ uint32_t max_cpu_id;
+ union {
+ XEN_GUEST_HANDLE(uint32) cpu_to_node;
+ uint64_aligned_t _ctn_align;
+ };
+ uint32_t capabilities;
+};
+
+struct xen_sysctl_physinfo_v7 {
+ uint32_t threads_per_core;
+ uint32_t cores_per_socket;
+ uint32_t nr_cpus;
+ uint32_t max_node_id;
+ uint32_t cpu_khz;
+ uint64_aligned_t total_pages;
+ uint64_aligned_t free_pages;
+ uint64_aligned_t scrub_pages;
+ uint32_t hw_cap[8];
+ uint32_t max_cpu_id;
+ union {
+ XEN_GUEST_HANDLE(uint32) cpu_to_node;
+ uint64_aligned_t _ctn_align;
+ };
+ uint32_t capabilities;
+};
+
+#define XEN_SYSCTL_pm_op_get_cputopo 0x20
+struct xen_get_cputopo_v6 {
+ uint32_t max_cpus;
+ union {
+ XEN_GUEST_HANDLE(uint32) cpu_to_core;
+ uint64_aligned_t _ctc_align;
+ };
+ union {
+ XEN_GUEST_HANDLE(uint32) cpu_to_socket;
+ uint64_aligned_t _cts_align;
+ };
+ uint32_t nr_cpus;
+};
+
+struct xen_sysctl_pm_op_v6 {
+ uint32_t cmd;
+ uint32_t cpuid;
+ union {
+ struct xen_get_cputopo_v6 get_topo;
+ };
+};
+#define xen_sysctl_pm_op_v7 xen_sysctl_pm_op_v6
+
+struct xen_sysctl_topologyinfo_v8 {
+ uint32_t max_cpu_index;
+ union {
+ XEN_GUEST_HANDLE(uint32) cpu_to_core;
+ uint64_aligned_t _ctc_align;
+ };
+ union {
+ XEN_GUEST_HANDLE(uint32) cpu_to_socket;
+ uint64_aligned_t _cts_align;
+ };
+ union {
+ XEN_GUEST_HANDLE(uint32) cpu_to_node;
+ uint64_aligned_t _ctn_align;
+ };
+};
+
+union xen_sysctl {
+ /* v6: Xen 3.4.x */
+ struct {
+ uint32_t cmd;
+ uint32_t interface_version;
+ union {
+ struct xen_sysctl_physinfo_v6 physinfo;
+ struct xen_sysctl_pm_op_v6 pm_op;
+ };
+ } v6;
+ /* v7: Xen 4.0.x */
+ struct {
+ uint32_t cmd;
+ uint32_t interface_version;
+ union {
+ struct xen_sysctl_physinfo_v7 physinfo;
+ struct xen_sysctl_pm_op_v7 pm_op;
+ };
+ } v7;
+ /* v8: Xen 4.1+ */
+ struct {
+ uint32_t cmd;
+ uint32_t interface_version;
+ union {
+ struct xen_sysctl_topologyinfo_v8 topologyinfo;
+ };
+ } v8;
+};
+
/* The actual code comes here */
static inline int hypervisor_domctl(void *domctl)
@@ -99,6 +203,11 @@ static inline int hypervisor_domctl(void
return _hypercall1(int, domctl, domctl);
}
+static inline int hypervisor_sysctl(void *sysctl)
+{
+ return _hypercall1(int, sysctl, sysctl);
+}
+
int xen_guest_address_size(int domid)
{
union xen_domctl domctl;
@@ -263,6 +372,172 @@ int xen_set_physical_cpu_affinity(int pc
}
EXPORT_SYMBOL_GPL(xen_set_physical_cpu_affinity);
+int xen_get_topology_info(unsigned int cpu, u32 *core, u32 *sock, u32 *node)
+{
+ union xen_sysctl sysctl;
+ uint32_t *cores = NULL, *socks = NULL, *nodes = NULL;
+ unsigned int nr;
+ int rc;
+
+ if (core)
+ cores = kmalloc((cpu + 1) * sizeof(*cores), GFP_KERNEL);
+ if (sock)
+ socks = kmalloc((cpu + 1) * sizeof(*socks), GFP_KERNEL);
+ if (node)
+ nodes = kmalloc((cpu + 1) * sizeof(*nodes), GFP_KERNEL);
+ if ((core && !cores) || (sock && !socks) || (node && !nodes)) {
+ kfree(cores);
+ kfree(socks);
+ kfree(nodes);
+ return -ENOMEM;
+ }
+
+#define topologyinfo(ver) do { \
+ memset(&sysctl, 0, sizeof(sysctl)); \
+ sysctl.v##ver.cmd = XEN_SYSCTL_topologyinfo; \
+ sysctl.v##ver.interface_version = ver; \
+ sysctl.v##ver.topologyinfo.max_cpu_index = cpu; \
+ set_xen_guest_handle(sysctl.v##ver.topologyinfo.cpu_to_core, \
+ cores); \
+ set_xen_guest_handle(sysctl.v##ver.topologyinfo.cpu_to_socket, \
+ socks); \
+ set_xen_guest_handle(sysctl.v##ver.topologyinfo.cpu_to_node, \
+ nodes); \
+ rc = hypervisor_sysctl(&sysctl); \
+ nr = sysctl.v##ver.topologyinfo.max_cpu_index + 1; \
+} while (0)
+
+ BUILD_BUG_ON(XEN_SYSCTL_INTERFACE_VERSION > 8);
+ topologyinfo(8);
+
+#if CONFIG_XEN_COMPAT < 0x040100
+#define pm_op_cputopo(ver) do { \
+ memset(&sysctl, 0, sizeof(sysctl)); \
+ sysctl.v##ver.cmd = XEN_SYSCTL_pm_op; \
+ sysctl.v##ver.interface_version = ver; \
+ sysctl.v##ver.pm_op.cmd = XEN_SYSCTL_pm_op_get_cputopo; \
+ sysctl.v##ver.pm_op.cpuid = 0; \
+ sysctl.v##ver.pm_op.get_topo.max_cpus = cpu + 1; \
+ set_xen_guest_handle(sysctl.v##ver.pm_op.get_topo.cpu_to_core, \
+ cores); \
+ set_xen_guest_handle(sysctl.v##ver.pm_op.get_topo.cpu_to_socket,\
+ socks); \
+ rc = hypervisor_sysctl(&sysctl); \
+ memset(&sysctl, 0, sizeof(sysctl)); \
+ sysctl.v##ver.cmd = XEN_SYSCTL_physinfo; \
+ sysctl.v##ver.interface_version = ver; \
+ sysctl.v##ver.physinfo.max_cpu_id = cpu; \
+ set_xen_guest_handle(sysctl.v##ver.physinfo.cpu_to_node, nodes);\
+ rc = hypervisor_sysctl(&sysctl) ?: rc; \
+ nr = sysctl.v##ver.physinfo.max_cpu_id + 1; \
+} while (0)
+
+ if (rc)
+ pm_op_cputopo(7);
+#endif
+#if CONFIG_XEN_COMPAT < 0x040000
+ if (rc)
+ pm_op_cputopo(6);
+#endif
+
+ if (!rc && cpu >= nr)
+ rc = -EDOM;
+
+ if (!rc && core && (*core = cores[cpu]) == INVALID_TOPOLOGY_ID)
+ rc = -ENOENT;
+ kfree(cores);
+
+ if (!rc && sock && (*sock = socks[cpu]) == INVALID_TOPOLOGY_ID)
+ rc = -ENOENT;
+ kfree(socks);
+
+ if (!rc && node && (*node = nodes[cpu]) == INVALID_TOPOLOGY_ID)
+ rc = -ENOENT;
+ kfree(nodes);
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(xen_get_topology_info);
+
+#include <xen/pcpu.h>
+#include <asm/msr.h>
+
+int rdmsr_safe_on_pcpu(unsigned int pcpu, u32 msr_no, u32 *l, u32 *h)
+{
+ int err = xen_set_physical_cpu_affinity(pcpu);
+
+ switch (err) {
+ case 0:
+ err = rdmsr_safe(msr_no, l, h);
+ WARN_ON_ONCE(xen_set_physical_cpu_affinity(-1));
+ break;
+ case -EINVAL:
+ /* Fall back in case this is due to dom0_vcpus_pinned. */
+ err = rdmsr_safe_on_cpu(pcpu, msr_no, l, h) ?: 1;
+ break;
+ }
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(rdmsr_safe_on_pcpu);
+
+int wrmsr_safe_on_pcpu(unsigned int pcpu, u32 msr_no, u32 l, u32 h)
+{
+ int err = xen_set_physical_cpu_affinity(pcpu);
+
+ switch (err) {
+ case 0:
+ err = wrmsr_safe(msr_no, l, h);
+ WARN_ON_ONCE(xen_set_physical_cpu_affinity(-1));
+ break;
+ case -EINVAL:
+ /* Fall back in case this is due to dom0_vcpus_pinned. */
+ err = wrmsr_safe_on_cpu(pcpu, msr_no, l, h) ?: 1;
+ break;
+ }
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(wrmsr_safe_on_pcpu);
+
+int rdmsr_safe_regs_on_pcpu(unsigned int pcpu, u32 *regs)
+{
+ int err = xen_set_physical_cpu_affinity(pcpu);
+
+ switch (err) {
+ case 0:
+ err = rdmsr_safe_regs(regs);
+ WARN_ON_ONCE(xen_set_physical_cpu_affinity(-1));
+ break;
+ case -EINVAL:
+ /* Fall back in case this is due to dom0_vcpus_pinned. */
+ err = rdmsr_safe_regs_on_cpu(pcpu, regs) ?: 1;
+ break;
+ }
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(rdmsr_safe_regs_on_pcpu);
+
+int wrmsr_safe_regs_on_pcpu(unsigned int pcpu, u32 *regs)
+{
+ int err = xen_set_physical_cpu_affinity(pcpu);
+
+ switch (err) {
+ case 0:
+ err = wrmsr_safe_regs(regs);
+ WARN_ON_ONCE(xen_set_physical_cpu_affinity(-1));
+ break;
+ case -EINVAL:
+ /* Fall back in case this is due to dom0_vcpus_pinned. */
+ err = wrmsr_safe_regs_on_cpu(pcpu, regs) ?: 1;
+ break;
+ }
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(wrmsr_safe_regs_on_pcpu);
+
#endif /* CONFIG_X86 */
MODULE_LICENSE("GPL");
--- head-2011-03-11.orig/drivers/xen/core/domctl.h 2010-11-23 16:20:20.000000000 +0100
+++ head-2011-03-11/drivers/xen/core/domctl.h 2011-02-03 14:42:26.000000000 +0100
@@ -1,3 +1,4 @@
int xen_guest_address_size(int domid);
int xen_guest_blkif_protocol(int domid);
int xen_set_physical_cpu_affinity(int pcpu);
+int xen_get_topology_info(unsigned int cpu, u32 *core, u32 *socket, u32 *node);
--- head-2011-03-11.orig/drivers/xen/core/pcpu.c 2011-02-02 15:09:57.000000000 +0100
+++ head-2011-03-11/drivers/xen/core/pcpu.c 2011-02-03 14:42:26.000000000 +0100
@@ -11,6 +11,7 @@
#include <asm/hypervisor.h>
#include <xen/interface/platform.h>
#include <xen/evtchn.h>
+#include <xen/pcpu.h>
#include <acpi/processor.h>
struct pcpu {
@@ -35,6 +36,44 @@ static DEFINE_MUTEX(xen_pcpu_lock);
static LIST_HEAD(xen_pcpus);
+static BLOCKING_NOTIFIER_HEAD(pcpu_chain);
+
+static inline void *notifier_param(const struct pcpu *pcpu)
+{
+ return (void *)(unsigned long)pcpu->xen_id;
+}
+
+int register_pcpu_notifier(struct notifier_block *nb)
+{
+ int err;
+
+ get_pcpu_lock();
+
+ err = blocking_notifier_chain_register(&pcpu_chain, nb);
+
+ if (!err) {
+ struct pcpu *pcpu;
+
+ list_for_each_entry(pcpu, &xen_pcpus, pcpu_list)
+ if (xen_pcpu_online(pcpu->flags))
+ nb->notifier_call(nb, CPU_ONLINE,
+ notifier_param(pcpu));
+ }
+
+ put_pcpu_lock();
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(register_pcpu_notifier);
+
+void unregister_pcpu_notifier(struct notifier_block *nb)
+{
+ get_pcpu_lock();
+ blocking_notifier_chain_unregister(&pcpu_chain, nb);
+ put_pcpu_lock();
+}
+EXPORT_SYMBOL_GPL(unregister_pcpu_notifier);
+
static int xen_pcpu_down(uint32_t xen_id)
{
xen_platform_op_t op = {
@@ -151,12 +190,16 @@ static int xen_pcpu_online_check(struct
if (xen_pcpu_online(info->flags) && !xen_pcpu_online(pcpu->flags)) {
/* the pcpu is onlined */
pcpu->flags |= XEN_PCPU_FLAGS_ONLINE;
+ blocking_notifier_call_chain(&pcpu_chain, CPU_ONLINE,
+ notifier_param(pcpu));
kobject_uevent(&pcpu->sysdev.kobj, KOBJ_ONLINE);
result = 1;
} else if (!xen_pcpu_online(info->flags) &&
xen_pcpu_online(pcpu->flags)) {
/* The pcpu is offlined now */
pcpu->flags &= ~XEN_PCPU_FLAGS_ONLINE;
+ blocking_notifier_call_chain(&pcpu_chain, CPU_DEAD,
+ notifier_param(pcpu));
kobject_uevent(&pcpu->sysdev.kobj, KOBJ_OFFLINE);
result = 1;
}
@@ -350,6 +393,8 @@ static irqreturn_t xen_pcpu_interrupt(in
return IRQ_HANDLED;
}
+#ifdef CONFIG_ACPI_HOTPLUG_CPU
+
int xen_pcpu_hotplug(int type)
{
schedule_work(&xen_pcpu_work);
@@ -387,6 +432,8 @@ int xen_pcpu_index(uint32_t id, bool is_
}
EXPORT_SYMBOL_GPL(xen_pcpu_index);
+#endif /* CONFIG_ACPI_HOTPLUG_CPU */
+
static int __init xen_pcpu_init(void)
{
int err;
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ head-2011-03-11/include/xen/pcpu.h 2011-02-03 14:42:26.000000000 +0100
@@ -0,0 +1,18 @@
+#ifndef _XEN_SYSCTL_H
+#define _XEN_SYSCTL_H
+
+#include <linux/notifier.h>
+
+int register_pcpu_notifier(struct notifier_block *);
+void unregister_pcpu_notifier(struct notifier_block *);
+
+#ifdef CONFIG_X86
+int __must_check rdmsr_safe_on_pcpu(unsigned int pcpu, u32 msr_no,
+ u32 *l, u32 *h);
+int __must_check wrmsr_safe_on_pcpu(unsigned int pcpu, u32 msr_no,
+ u32 l, u32 h);
+int __must_check rdmsr_safe_regs_on_pcpu(unsigned int pcpu, u32 *regs);
+int __must_check wrmsr_safe_regs_on_pcpu(unsigned int pcpu, u32 *regs);
+#endif
+
+#endif /* _XEN_SYSCTL_H */