629 lines
21 KiB
Plaintext
629 lines
21 KiB
Plaintext
From: jbeulich@novell.com
|
|
Subject: x86: use per-cpu storage for shared vcpu_info structure
|
|
Patch-mainline: obsolete
|
|
|
|
... reducing access code size and latency, as well as being the
|
|
prerequisite for removing the limitation on 32 vCPU-s per guest.
|
|
|
|
--- head-2010-04-15.orig/arch/x86/include/asm/percpu.h 2010-04-28 15:44:01.000000000 +0200
|
|
+++ head-2010-04-15/arch/x86/include/asm/percpu.h 2010-03-25 14:42:40.000000000 +0100
|
|
@@ -190,6 +190,38 @@ do { \
|
|
pfo_ret__; \
|
|
})
|
|
|
|
+#define percpu_xchg_op(op, var, val) \
|
|
+({ \
|
|
+ typedef typeof(var) pxo_T__; \
|
|
+ pxo_T__ pxo_ret__; \
|
|
+ if (0) \
|
|
+ pxo_ret__ = (val); \
|
|
+ switch (sizeof(var)) { \
|
|
+ case 1: \
|
|
+ asm(op "b %0,"__percpu_arg(1) \
|
|
+ : "=q" (pxo_ret__), "+m" (var) \
|
|
+ : "0" ((pxo_T__)(val))); \
|
|
+ break; \
|
|
+ case 2: \
|
|
+ asm(op "w %0,"__percpu_arg(1) \
|
|
+ : "=r" (pxo_ret__), "+m" (var) \
|
|
+ : "0" ((pxo_T__)(val))); \
|
|
+ break; \
|
|
+ case 4: \
|
|
+ asm(op "l %0,"__percpu_arg(1) \
|
|
+ : "=r" (pxo_ret__), "+m" (var) \
|
|
+ : "0" ((pxo_T__)(val))); \
|
|
+ break; \
|
|
+ case 8: \
|
|
+ asm(op "q %0,"__percpu_arg(1) \
|
|
+ : "=r" (pxo_ret__), "+m" (var) \
|
|
+ : "0" ((pxo_T__)(val))); \
|
|
+ break; \
|
|
+ default: __bad_percpu_size(); \
|
|
+ } \
|
|
+ pxo_ret__; \
|
|
+})
|
|
+
|
|
/*
|
|
* percpu_read() makes gcc load the percpu variable every time it is
|
|
* accessed while percpu_read_stable() allows the value to be cached.
|
|
@@ -207,6 +239,10 @@ do { \
|
|
#define percpu_and(var, val) percpu_to_op("and", var, val)
|
|
#define percpu_or(var, val) percpu_to_op("or", var, val)
|
|
#define percpu_xor(var, val) percpu_to_op("xor", var, val)
|
|
+#define percpu_xchg(var, val) percpu_xchg_op("xchg", var, val)
|
|
+#if defined(CONFIG_X86_XADD) || defined(CONFIG_X86_64)
|
|
+#define percpu_xadd(var, val) percpu_xchg_op("xadd", var, val)
|
|
+#endif
|
|
|
|
#define __this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
|
|
#define __this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp))
|
|
--- head-2010-04-15.orig/arch/x86/include/mach-xen/asm/hypervisor.h 2010-03-25 14:41:00.000000000 +0100
|
|
+++ head-2010-04-15/arch/x86/include/mach-xen/asm/hypervisor.h 2010-03-25 14:41:15.000000000 +0100
|
|
@@ -50,12 +50,26 @@
|
|
|
|
extern shared_info_t *HYPERVISOR_shared_info;
|
|
|
|
+#ifdef CONFIG_XEN_VCPU_INFO_PLACEMENT
|
|
+DECLARE_PER_CPU(struct vcpu_info, vcpu_info);
|
|
+#define vcpu_info(cpu) (&per_cpu(vcpu_info, cpu))
|
|
+#define current_vcpu_info() (&__get_cpu_var(vcpu_info))
|
|
+#define vcpu_info_read(fld) percpu_read(vcpu_info.fld)
|
|
+#define vcpu_info_write(fld, val) percpu_write(vcpu_info.fld, val)
|
|
+#define vcpu_info_xchg(fld, val) percpu_xchg(vcpu_info.fld, val)
|
|
+void setup_vcpu_info(unsigned int cpu);
|
|
+void adjust_boot_vcpu_info(void);
|
|
+#else
|
|
#define vcpu_info(cpu) (HYPERVISOR_shared_info->vcpu_info + (cpu))
|
|
#ifdef CONFIG_SMP
|
|
#define current_vcpu_info() vcpu_info(smp_processor_id())
|
|
#else
|
|
#define current_vcpu_info() vcpu_info(0)
|
|
#endif
|
|
+#define vcpu_info_read(fld) (current_vcpu_info()->fld)
|
|
+#define vcpu_info_write(fld, val) (current_vcpu_info()->fld = (val))
|
|
+static inline void setup_vcpu_info(unsigned int cpu) {}
|
|
+#endif
|
|
|
|
#ifdef CONFIG_X86_32
|
|
extern unsigned long hypervisor_virt_start;
|
|
--- head-2010-04-15.orig/arch/x86/include/mach-xen/asm/irqflags.h 2010-03-24 15:32:27.000000000 +0100
|
|
+++ head-2010-04-15/arch/x86/include/mach-xen/asm/irqflags.h 2010-03-25 14:41:15.000000000 +0100
|
|
@@ -12,7 +12,7 @@
|
|
* includes these barriers, for example.
|
|
*/
|
|
|
|
-#define xen_save_fl(void) (current_vcpu_info()->evtchn_upcall_mask)
|
|
+#define xen_save_fl(void) vcpu_info_read(evtchn_upcall_mask)
|
|
|
|
#define xen_restore_fl(f) \
|
|
do { \
|
|
@@ -28,7 +28,7 @@ do { \
|
|
|
|
#define xen_irq_disable() \
|
|
do { \
|
|
- current_vcpu_info()->evtchn_upcall_mask = 1; \
|
|
+ vcpu_info_write(evtchn_upcall_mask, 1); \
|
|
barrier(); \
|
|
} while (0)
|
|
|
|
@@ -90,8 +90,6 @@ static inline void halt(void)
|
|
#define evtchn_upcall_pending /* 0 */
|
|
#define evtchn_upcall_mask 1
|
|
|
|
-#define sizeof_vcpu_shift 6
|
|
-
|
|
#ifdef CONFIG_X86_64
|
|
# define __REG_si %rsi
|
|
# define __CPU_num PER_CPU_VAR(cpu_number)
|
|
@@ -100,6 +98,22 @@ static inline void halt(void)
|
|
# define __CPU_num TI_cpu(%ebp)
|
|
#endif
|
|
|
|
+#ifdef CONFIG_XEN_VCPU_INFO_PLACEMENT
|
|
+
|
|
+#define GET_VCPU_INFO PER_CPU(vcpu_info, __REG_si)
|
|
+#define __DISABLE_INTERRUPTS movb $1,PER_CPU_VAR(vcpu_info+evtchn_upcall_mask)
|
|
+#define __ENABLE_INTERRUPTS movb $0,PER_CPU_VAR(vcpu_info+evtchn_upcall_mask)
|
|
+#define __TEST_PENDING cmpb $0,PER_CPU_VAR(vcpu_info+evtchn_upcall_pending+0)
|
|
+#define DISABLE_INTERRUPTS(clb) __DISABLE_INTERRUPTS
|
|
+#define ENABLE_INTERRUPTS(clb) __ENABLE_INTERRUPTS
|
|
+
|
|
+#define __SIZEOF_DISABLE_INTERRUPTS 8
|
|
+#define __SIZEOF_TEST_PENDING 8
|
|
+
|
|
+#else /* CONFIG_XEN_VCPU_INFO_PLACEMENT */
|
|
+
|
|
+#define sizeof_vcpu_shift 6
|
|
+
|
|
#ifdef CONFIG_SMP
|
|
#define GET_VCPU_INFO movl __CPU_num,%esi ; \
|
|
shl $sizeof_vcpu_shift,%esi ; \
|
|
@@ -116,15 +130,21 @@ static inline void halt(void)
|
|
#define ENABLE_INTERRUPTS(clb) GET_VCPU_INFO ; \
|
|
__ENABLE_INTERRUPTS
|
|
|
|
+#define __SIZEOF_DISABLE_INTERRUPTS 4
|
|
+#define __SIZEOF_TEST_PENDING 3
|
|
+
|
|
+#endif /* CONFIG_XEN_VCPU_INFO_PLACEMENT */
|
|
+
|
|
#ifndef CONFIG_X86_64
|
|
#define INTERRUPT_RETURN iret
|
|
-#define ENABLE_INTERRUPTS_SYSEXIT __ENABLE_INTERRUPTS ; \
|
|
+#define ENABLE_INTERRUPTS_SYSEXIT \
|
|
+ movb $0,evtchn_upcall_mask(%esi) /* __ENABLE_INTERRUPTS */ ; \
|
|
sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/ ; \
|
|
- __TEST_PENDING ; \
|
|
+ cmpb $0,evtchn_upcall_pending(%esi) /* __TEST_PENDING */ ; \
|
|
jnz 14f /* process more events if necessary... */ ; \
|
|
movl PT_ESI(%esp), %esi ; \
|
|
sysexit ; \
|
|
-14: __DISABLE_INTERRUPTS ; \
|
|
+14: movb $1,evtchn_upcall_mask(%esi) /* __DISABLE_INTERRUPTS */ ; \
|
|
TRACE_IRQS_OFF ; \
|
|
sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/ ; \
|
|
mov $__KERNEL_PERCPU, %ecx ; \
|
|
--- head-2010-04-15.orig/arch/x86/include/mach-xen/asm/pgtable_64.h 2010-03-25 14:41:00.000000000 +0100
|
|
+++ head-2010-04-15/arch/x86/include/mach-xen/asm/pgtable_64.h 2010-03-25 14:41:15.000000000 +0100
|
|
@@ -117,6 +117,8 @@ static inline void xen_set_pgd(pgd_t *pg
|
|
|
|
#define __pte_mfn(_pte) (((_pte).pte & PTE_PFN_MASK) >> PAGE_SHIFT)
|
|
|
|
+extern unsigned long early_arbitrary_virt_to_mfn(void *va);
|
|
+
|
|
/*
|
|
* Conversion functions: convert a page and protection to a page entry,
|
|
* and a page entry and page directory to the page they refer to.
|
|
--- head-2010-04-15.orig/arch/x86/include/mach-xen/asm/system.h 2010-01-25 13:43:44.000000000 +0100
|
|
+++ head-2010-04-15/arch/x86/include/mach-xen/asm/system.h 2010-03-25 14:41:15.000000000 +0100
|
|
@@ -245,8 +245,8 @@ static inline void xen_write_cr0(unsigne
|
|
asm volatile("mov %0,%%cr0": : "r" (val), "m" (__force_order));
|
|
}
|
|
|
|
-#define xen_read_cr2() (current_vcpu_info()->arch.cr2)
|
|
-#define xen_write_cr2(val) ((void)(current_vcpu_info()->arch.cr2 = (val)))
|
|
+#define xen_read_cr2() vcpu_info_read(arch.cr2)
|
|
+#define xen_write_cr2(val) vcpu_info_write(arch.cr2, val)
|
|
|
|
static inline unsigned long xen_read_cr3(void)
|
|
{
|
|
--- head-2010-04-15.orig/arch/x86/kernel/cpu/common-xen.c 2010-03-24 16:00:05.000000000 +0100
|
|
+++ head-2010-04-15/arch/x86/kernel/cpu/common-xen.c 2010-03-25 14:41:15.000000000 +0100
|
|
@@ -335,8 +335,16 @@ static const char *__cpuinit table_looku
|
|
__u32 cpu_caps_cleared[NCAPINTS] __cpuinitdata;
|
|
__u32 cpu_caps_set[NCAPINTS] __cpuinitdata;
|
|
|
|
-void load_percpu_segment(int cpu)
|
|
+void __ref load_percpu_segment(int cpu)
|
|
{
|
|
+#ifdef CONFIG_XEN_VCPU_INFO_PLACEMENT
|
|
+ static bool done;
|
|
+
|
|
+ if (!done) {
|
|
+ done = true;
|
|
+ adjust_boot_vcpu_info();
|
|
+ }
|
|
+#endif
|
|
#ifdef CONFIG_X86_32
|
|
loadsegment(fs, __KERNEL_PERCPU);
|
|
#else
|
|
--- head-2010-04-15.orig/arch/x86/kernel/entry_32-xen.S 2010-01-25 15:45:18.000000000 +0100
|
|
+++ head-2010-04-15/arch/x86/kernel/entry_32-xen.S 2010-03-25 14:41:15.000000000 +0100
|
|
@@ -471,6 +471,9 @@ sysenter_exit:
|
|
movl PT_EIP(%esp), %edx
|
|
movl PT_OLDESP(%esp), %ecx
|
|
xorl %ebp,%ebp
|
|
+#ifdef CONFIG_XEN_VCPU_INFO_PLACEMENT
|
|
+ GET_VCPU_INFO
|
|
+#endif
|
|
TRACE_IRQS_ON
|
|
1: mov PT_FS(%esp), %fs
|
|
PTGS_TO_GS
|
|
@@ -1036,7 +1039,9 @@ critical_region_fixup:
|
|
|
|
.section .rodata,"a"
|
|
critical_fixup_table:
|
|
- .byte -1,-1,-1 # testb $0xff,(%esi) = __TEST_PENDING
|
|
+ .rept __SIZEOF_TEST_PENDING
|
|
+ .byte -1
|
|
+ .endr
|
|
.byte -1,-1 # jnz 14f
|
|
.byte 0 # pop %ebx
|
|
.byte 1 # pop %ecx
|
|
@@ -1055,7 +1060,9 @@ critical_fixup_table:
|
|
.byte 10,10,10 # add $8,%esp
|
|
#endif
|
|
.byte 12 # iret
|
|
- .byte -1,-1,-1,-1 # movb $1,1(%esi) = __DISABLE_INTERRUPTS
|
|
+ .rept __SIZEOF_DISABLE_INTERRUPTS
|
|
+ .byte -1
|
|
+ .endr
|
|
.previous
|
|
|
|
# Hypervisor uses this for application faults while it executes.
|
|
--- head-2010-04-15.orig/arch/x86/kernel/head-xen.c 2010-04-15 11:46:18.000000000 +0200
|
|
+++ head-2010-04-15/arch/x86/kernel/head-xen.c 2010-04-28 17:22:58.000000000 +0200
|
|
@@ -153,6 +153,8 @@ void __init xen_start_kernel(void)
|
|
HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
|
|
clear_page(empty_zero_page);
|
|
|
|
+ setup_vcpu_info(0);
|
|
+
|
|
/* Set up mapping of lowest 1MB of physical memory. */
|
|
for (i = 0; i < NR_FIX_ISAMAPS; i++)
|
|
if (is_initial_xendomain())
|
|
--- head-2010-04-15.orig/arch/x86/kernel/time-xen.c 2010-03-02 10:20:42.000000000 +0100
|
|
+++ head-2010-04-15/arch/x86/kernel/time-xen.c 2010-03-25 14:41:15.000000000 +0100
|
|
@@ -282,16 +282,10 @@ static void get_time_values_from_xen(uns
|
|
local_irq_restore(flags);
|
|
}
|
|
|
|
-static inline int time_values_up_to_date(unsigned int cpu)
|
|
+static inline int time_values_up_to_date(void)
|
|
{
|
|
- struct vcpu_time_info *src;
|
|
- struct shadow_time_info *dst;
|
|
-
|
|
- src = &vcpu_info(cpu)->time;
|
|
- dst = &per_cpu(shadow_time, cpu);
|
|
-
|
|
rmb();
|
|
- return (dst->version == src->version);
|
|
+ return percpu_read(shadow_time.version) == vcpu_info_read(time.version);
|
|
}
|
|
|
|
static void sync_xen_wallclock(unsigned long dummy);
|
|
@@ -337,7 +331,7 @@ unsigned long long xen_local_clock(void)
|
|
local_time_version = shadow->version;
|
|
rdtsc_barrier();
|
|
time = shadow->system_timestamp + get_nsec_offset(shadow);
|
|
- if (!time_values_up_to_date(cpu))
|
|
+ if (!time_values_up_to_date())
|
|
get_time_values_from_xen(cpu);
|
|
barrier();
|
|
} while (local_time_version != shadow->version);
|
|
--- head-2010-04-15.orig/arch/x86/mm/hypervisor.c 2010-03-25 14:41:00.000000000 +0100
|
|
+++ head-2010-04-15/arch/x86/mm/hypervisor.c 2010-03-25 14:41:15.000000000 +0100
|
|
@@ -41,6 +41,7 @@
|
|
#include <xen/balloon.h>
|
|
#include <xen/features.h>
|
|
#include <xen/interface/memory.h>
|
|
+#include <xen/interface/vcpu.h>
|
|
#include <linux/module.h>
|
|
#include <linux/percpu.h>
|
|
#include <linux/highmem.h>
|
|
@@ -50,7 +51,104 @@
|
|
EXPORT_SYMBOL(hypercall_page);
|
|
|
|
shared_info_t *__read_mostly HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
|
|
+#ifndef CONFIG_XEN_VCPU_INFO_PLACEMENT
|
|
EXPORT_SYMBOL(HYPERVISOR_shared_info);
|
|
+#else
|
|
+DEFINE_PER_CPU(struct vcpu_info, vcpu_info) __aligned(sizeof(struct vcpu_info));
|
|
+EXPORT_PER_CPU_SYMBOL(vcpu_info);
|
|
+
|
|
+void __ref setup_vcpu_info(unsigned int cpu)
|
|
+{
|
|
+ struct vcpu_info *v = &per_cpu(vcpu_info, cpu);
|
|
+ struct vcpu_register_vcpu_info info;
|
|
+#ifdef CONFIG_X86_64
|
|
+ static bool first = true;
|
|
+
|
|
+ if (first) {
|
|
+ first = false;
|
|
+ info.mfn = early_arbitrary_virt_to_mfn(v);
|
|
+ } else
|
|
+#endif
|
|
+ info.mfn = arbitrary_virt_to_mfn(v);
|
|
+ info.offset = offset_in_page(v);
|
|
+
|
|
+ if (HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_info, cpu, &info))
|
|
+ BUG();
|
|
+}
|
|
+
|
|
+void __init adjust_boot_vcpu_info(void)
|
|
+{
|
|
+ unsigned long lpfn, rpfn, lmfn, rmfn;
|
|
+ pte_t *lpte, *rpte;
|
|
+ unsigned int level;
|
|
+ mmu_update_t mmu[2];
|
|
+
|
|
+ /*
|
|
+ * setup_vcpu_info() cannot be used more than once for a given (v)CPU,
|
|
+ * hence we must swap the underlying MFNs of the two pages holding old
|
|
+ * and new vcpu_info of the boot CPU.
|
|
+ *
|
|
+ * Do *not* use __get_cpu_var() or percpu_{write,...}() here, as the per-
|
|
+ * CPU segment didn't get reloaded yet. Using percpu_read(), as in
|
|
+ * arch_use_lazy_mmu_mode(), though undesirable, is safe except for the
|
|
+ * accesses to variables that were updated in setup_percpu_areas().
|
|
+ */
|
|
+ lpte = lookup_address((unsigned long)&vcpu_info
|
|
+ + (__per_cpu_load - __per_cpu_start),
|
|
+ &level);
|
|
+ rpte = lookup_address((unsigned long)&per_cpu(vcpu_info, 0), &level);
|
|
+ BUG_ON(!lpte || !(pte_flags(*lpte) & _PAGE_PRESENT));
|
|
+ BUG_ON(!rpte || !(pte_flags(*rpte) & _PAGE_PRESENT));
|
|
+ lmfn = __pte_mfn(*lpte);
|
|
+ rmfn = __pte_mfn(*rpte);
|
|
+
|
|
+ if (lmfn == rmfn)
|
|
+ return;
|
|
+
|
|
+ lpfn = mfn_to_local_pfn(lmfn);
|
|
+ rpfn = mfn_to_local_pfn(rmfn);
|
|
+
|
|
+ printk(KERN_INFO
|
|
+ "Swapping MFNs for PFN %lx and %lx (MFN %lx and %lx)\n",
|
|
+ lpfn, rpfn, lmfn, rmfn);
|
|
+
|
|
+ xen_l1_entry_update(lpte, pfn_pte_ma(rmfn, pte_pgprot(*lpte)));
|
|
+ xen_l1_entry_update(rpte, pfn_pte_ma(lmfn, pte_pgprot(*rpte)));
|
|
+#ifdef CONFIG_X86_64
|
|
+ if (HYPERVISOR_update_va_mapping((unsigned long)__va(lpfn<<PAGE_SHIFT),
|
|
+ pfn_pte_ma(rmfn, PAGE_KERNEL_RO), 0))
|
|
+ BUG();
|
|
+#endif
|
|
+ if (HYPERVISOR_update_va_mapping((unsigned long)__va(rpfn<<PAGE_SHIFT),
|
|
+ pfn_pte_ma(lmfn, PAGE_KERNEL),
|
|
+ UVMF_TLB_FLUSH))
|
|
+ BUG();
|
|
+
|
|
+ set_phys_to_machine(lpfn, rmfn);
|
|
+ set_phys_to_machine(rpfn, lmfn);
|
|
+
|
|
+ mmu[0].ptr = ((uint64_t)lmfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
|
|
+ mmu[0].val = rpfn;
|
|
+ mmu[1].ptr = ((uint64_t)rmfn << PAGE_SHIFT) | MMU_MACHPHYS_UPDATE;
|
|
+ mmu[1].val = lpfn;
|
|
+ if (HYPERVISOR_mmu_update(mmu, 2, NULL, DOMID_SELF))
|
|
+ BUG();
|
|
+
|
|
+ /*
|
|
+ * Copy over all contents of the page just replaced, except for the
|
|
+ * vcpu_info itself, as it may have got updated after having been
|
|
+ * copied from __per_cpu_load[].
|
|
+ */
|
|
+ memcpy(__va(rpfn << PAGE_SHIFT),
|
|
+ __va(lpfn << PAGE_SHIFT),
|
|
+ (unsigned long)&vcpu_info & (PAGE_SIZE - 1));
|
|
+ level = (unsigned long)(&vcpu_info + 1) & (PAGE_SIZE - 1);
|
|
+ if (level)
|
|
+ memcpy(__va(rpfn << PAGE_SHIFT) + level,
|
|
+ __va(lpfn << PAGE_SHIFT) + level,
|
|
+ PAGE_SIZE - level);
|
|
+}
|
|
+#endif
|
|
|
|
#define NR_MC BITS_PER_LONG
|
|
#define NR_MMU BITS_PER_LONG
|
|
--- head-2010-04-15.orig/arch/x86/mm/init_64-xen.c 2010-04-15 11:47:48.000000000 +0200
|
|
+++ head-2010-04-15/arch/x86/mm/init_64-xen.c 2010-04-15 11:48:38.000000000 +0200
|
|
@@ -118,6 +118,26 @@ void __meminit early_make_page_readonly(
|
|
BUG();
|
|
}
|
|
|
|
+unsigned long __init early_arbitrary_virt_to_mfn(void *v)
|
|
+{
|
|
+ unsigned long va = (unsigned long)v, addr, *page;
|
|
+
|
|
+ BUG_ON(va < __START_KERNEL_map);
|
|
+
|
|
+ page = (void *)(xen_read_cr3() + __START_KERNEL_map);
|
|
+
|
|
+ addr = page[pgd_index(va)];
|
|
+ addr_to_page(addr, page);
|
|
+
|
|
+ addr = page[pud_index(va)];
|
|
+ addr_to_page(addr, page);
|
|
+
|
|
+ addr = page[pmd_index(va)];
|
|
+ addr_to_page(addr, page);
|
|
+
|
|
+ return (page[pte_index(va)] & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT;
|
|
+}
|
|
+
|
|
#ifndef CONFIG_XEN
|
|
static int __init parse_direct_gbpages_off(char *arg)
|
|
{
|
|
--- head-2010-04-15.orig/drivers/xen/Kconfig 2010-03-31 14:12:07.000000000 +0200
|
|
+++ head-2010-04-15/drivers/xen/Kconfig 2010-03-31 14:12:47.000000000 +0200
|
|
@@ -367,6 +367,18 @@ config XEN_COMPAT
|
|
default 0x030002 if XEN_COMPAT_030002_AND_LATER
|
|
default 0
|
|
|
|
+config XEN_VCPU_INFO_PLACEMENT
|
|
+ bool "Place shared vCPU info in per-CPU storage"
|
|
+# depends on X86 && (XEN_COMPAT >= 0x00030101)
|
|
+ depends on X86
|
|
+ depends on !XEN_COMPAT_030002_AND_LATER
|
|
+ depends on !XEN_COMPAT_030004_AND_LATER
|
|
+ depends on !XEN_COMPAT_030100_AND_LATER
|
|
+ default SMP
|
|
+ ---help---
|
|
+ This allows faster access to the per-vCPU shared info
|
|
+ structure.
|
|
+
|
|
endmenu
|
|
|
|
config HAVE_IRQ_IGNORE_UNHANDLED
|
|
--- head-2010-04-15.orig/drivers/xen/core/evtchn.c 2010-04-23 15:20:52.000000000 +0200
|
|
+++ head-2010-04-15/drivers/xen/core/evtchn.c 2010-04-23 15:21:14.000000000 +0200
|
|
@@ -323,6 +323,24 @@ static DEFINE_PER_CPU(unsigned int, upca
|
|
static DEFINE_PER_CPU(unsigned int, current_l1i);
|
|
static DEFINE_PER_CPU(unsigned int, current_l2i);
|
|
|
|
+#ifndef vcpu_info_xchg
|
|
+#define vcpu_info_xchg(fld, val) xchg(¤t_vcpu_info()->fld, val)
|
|
+#endif
|
|
+
|
|
+#ifndef percpu_xadd
|
|
+#define percpu_xadd(var, val) \
|
|
+({ \
|
|
+ typeof(var) __tmp_var__; \
|
|
+ unsigned long flags; \
|
|
+ local_irq_save(flags); \
|
|
+ __tmp_var__ = get_cpu_var(var); \
|
|
+ __get_cpu_var(var) += (val); \
|
|
+ put_cpu_var(var); \
|
|
+ local_irq_restore(flags); \
|
|
+ __tmp_var__; \
|
|
+})
|
|
+#endif
|
|
+
|
|
/* NB. Interrupts are disabled on entry. */
|
|
asmlinkage void __irq_entry evtchn_do_upcall(struct pt_regs *regs)
|
|
{
|
|
@@ -331,25 +349,25 @@ asmlinkage void __irq_entry evtchn_do_up
|
|
unsigned long masked_l1, masked_l2;
|
|
unsigned int l1i, l2i, start_l1i, start_l2i, port, count, i;
|
|
int irq;
|
|
- vcpu_info_t *vcpu_info = current_vcpu_info();
|
|
|
|
exit_idle();
|
|
irq_enter();
|
|
|
|
do {
|
|
/* Avoid a callback storm when we reenable delivery. */
|
|
- vcpu_info->evtchn_upcall_pending = 0;
|
|
+ vcpu_info_write(evtchn_upcall_pending, 0);
|
|
|
|
/* Nested invocations bail immediately. */
|
|
- percpu_add(upcall_count, 1);
|
|
- if (unlikely(percpu_read(upcall_count) != 1))
|
|
+ if (unlikely(percpu_xadd(upcall_count, 1)))
|
|
break;
|
|
|
|
#ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */
|
|
/* Clear master flag /before/ clearing selector flag. */
|
|
wmb();
|
|
+#else
|
|
+ barrier();
|
|
#endif
|
|
- l1 = xchg(&vcpu_info->evtchn_pending_sel, 0);
|
|
+ l1 = vcpu_info_xchg(evtchn_pending_sel, 0);
|
|
|
|
start_l1i = l1i = percpu_read(current_l1i);
|
|
start_l2i = percpu_read(current_l2i);
|
|
@@ -1370,7 +1388,6 @@ void unmask_evtchn(int port)
|
|
{
|
|
shared_info_t *s = HYPERVISOR_shared_info;
|
|
unsigned int cpu = smp_processor_id();
|
|
- vcpu_info_t *vcpu_info = &s->vcpu_info[cpu];
|
|
|
|
BUG_ON(!irqs_disabled());
|
|
|
|
@@ -1384,10 +1401,13 @@ void unmask_evtchn(int port)
|
|
synch_clear_bit(port, s->evtchn_mask);
|
|
|
|
/* Did we miss an interrupt 'edge'? Re-fire if so. */
|
|
- if (synch_test_bit(port, s->evtchn_pending) &&
|
|
- !synch_test_and_set_bit(port / BITS_PER_LONG,
|
|
- &vcpu_info->evtchn_pending_sel))
|
|
- vcpu_info->evtchn_upcall_pending = 1;
|
|
+ if (synch_test_bit(port, s->evtchn_pending)) {
|
|
+ vcpu_info_t *v = current_vcpu_info();
|
|
+
|
|
+ if (!synch_test_and_set_bit(port / BITS_PER_LONG,
|
|
+ &v->evtchn_pending_sel))
|
|
+ v->evtchn_upcall_pending = 1;
|
|
+ }
|
|
}
|
|
EXPORT_SYMBOL_GPL(unmask_evtchn);
|
|
|
|
--- head-2010-04-15.orig/drivers/xen/core/machine_reboot.c 2010-03-25 14:41:06.000000000 +0100
|
|
+++ head-2010-04-15/drivers/xen/core/machine_reboot.c 2010-03-25 14:41:15.000000000 +0100
|
|
@@ -74,7 +74,7 @@ static void pre_suspend(void)
|
|
mfn_to_pfn(xen_start_info->console.domU.mfn);
|
|
}
|
|
|
|
-static void post_suspend(int suspend_cancelled)
|
|
+static void post_suspend(int suspend_cancelled, int fast_suspend)
|
|
{
|
|
int i, j, k, fpp;
|
|
unsigned long shinfo_mfn;
|
|
@@ -91,8 +91,21 @@ static void post_suspend(int suspend_can
|
|
#ifdef CONFIG_SMP
|
|
cpumask_copy(vcpu_initialized_mask, cpu_online_mask);
|
|
#endif
|
|
- for_each_possible_cpu(i)
|
|
+ for_each_possible_cpu(i) {
|
|
setup_runstate_area(i);
|
|
+
|
|
+#ifdef CONFIG_XEN_VCPU_INFO_PLACEMENT
|
|
+ if (fast_suspend && i != smp_processor_id()
|
|
+ && HYPERVISOR_vcpu_op(VCPUOP_down, i, NULL))
|
|
+ BUG();
|
|
+
|
|
+ setup_vcpu_info(i);
|
|
+
|
|
+ if (fast_suspend && i != smp_processor_id()
|
|
+ && HYPERVISOR_vcpu_op(VCPUOP_up, i, NULL))
|
|
+ BUG();
|
|
+#endif
|
|
+ }
|
|
}
|
|
|
|
shinfo_mfn = xen_start_info->shared_info >> PAGE_SHIFT;
|
|
@@ -134,7 +147,7 @@ static void post_suspend(int suspend_can
|
|
#define switch_idle_mm() ((void)0)
|
|
#define mm_pin_all() ((void)0)
|
|
#define pre_suspend() xen_pre_suspend()
|
|
-#define post_suspend(x) xen_post_suspend(x)
|
|
+#define post_suspend(x, f) xen_post_suspend(x)
|
|
|
|
#endif
|
|
|
|
@@ -165,7 +178,7 @@ static int take_machine_down(void *_susp
|
|
BUG_ON(suspend_cancelled > 0);
|
|
suspend->resume_notifier(suspend_cancelled);
|
|
if (suspend_cancelled >= 0)
|
|
- post_suspend(suspend_cancelled);
|
|
+ post_suspend(suspend_cancelled, suspend->fast_suspend);
|
|
if (!suspend_cancelled)
|
|
xen_clockevents_resume();
|
|
if (suspend_cancelled >= 0)
|
|
--- head-2010-04-15.orig/drivers/xen/core/smpboot.c 2010-04-28 17:21:59.000000000 +0200
|
|
+++ head-2010-04-15/drivers/xen/core/smpboot.c 2010-04-28 17:22:15.000000000 +0200
|
|
@@ -348,8 +348,13 @@ void __init smp_prepare_cpus(unsigned in
|
|
|
|
void __init smp_prepare_boot_cpu(void)
|
|
{
|
|
+ unsigned int cpu;
|
|
+
|
|
switch_to_new_gdt(smp_processor_id());
|
|
prefill_possible_map();
|
|
+ for_each_possible_cpu(cpu)
|
|
+ if (cpu != smp_processor_id())
|
|
+ setup_vcpu_info(cpu);
|
|
}
|
|
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
|
--- head-2010-04-15.orig/drivers/xen/core/spinlock.c 2010-03-19 08:48:35.000000000 +0100
|
|
+++ head-2010-04-15/drivers/xen/core/spinlock.c 2010-03-25 14:41:15.000000000 +0100
|
|
@@ -105,7 +105,7 @@ bool xen_spin_wait(arch_spinlock_t *lock
|
|
spinning.prev = percpu_read(_spinning);
|
|
smp_wmb();
|
|
percpu_write(_spinning, &spinning);
|
|
- upcall_mask = current_vcpu_info()->evtchn_upcall_mask;
|
|
+ upcall_mask = vcpu_info_read(evtchn_upcall_mask);
|
|
|
|
do {
|
|
bool nested = false;
|
|
@@ -171,12 +171,12 @@ bool xen_spin_wait(arch_spinlock_t *lock
|
|
* intended event processing will happen with the poll
|
|
* call.
|
|
*/
|
|
- current_vcpu_info()->evtchn_upcall_mask =
|
|
- nested ? upcall_mask : flags;
|
|
+ vcpu_info_write(evtchn_upcall_mask,
|
|
+ nested ? upcall_mask : flags);
|
|
|
|
xen_poll_irq(irq);
|
|
|
|
- current_vcpu_info()->evtchn_upcall_mask = upcall_mask;
|
|
+ vcpu_info_write(evtchn_upcall_mask, upcall_mask);
|
|
|
|
rc = !xen_test_irq_pending(irq);
|
|
if (!rc)
|