From: Linux Kernel Mailing List Subject: Linux: 2.6.38 Patch-mainline: 2.6.38 This patch contains the differences between 2.6.37 and 2.6.38. Acked-by: Jeff Mahoney Automatically created from "patches.kernel.org/patch-2.6.38" by xen-port-patches.py --- head-2011-03-17.orig/arch/x86/Kconfig 2011-02-01 15:09:47.000000000 +0100 +++ head-2011-03-17/arch/x86/Kconfig 2011-02-01 16:43:32.000000000 +0100 @@ -51,7 +51,7 @@ config X86 select HAVE_KERNEL_GZIP select HAVE_KERNEL_BZIP2 if !XEN select HAVE_KERNEL_LZMA if !XEN - select HAVE_KERNEL_XZ + select HAVE_KERNEL_XZ if !XEN select HAVE_KERNEL_LZO if !XEN select HAVE_HW_BREAKPOINT select HAVE_MIXED_BREAKPOINTS_REGS @@ -513,7 +513,7 @@ config X86_ES7000 config X86_32_IRIS tristate "Eurobraille/Iris poweroff module" - depends on X86_32 + depends on X86_32 && !XEN ---help--- The Iris machines from EuroBraille do not have APM or ACPI support to shut themselves down properly. A special I/O sequence is --- head-2011-03-17.orig/arch/x86/include/asm/apic.h 2011-01-31 14:53:50.000000000 +0100 +++ head-2011-03-17/arch/x86/include/asm/apic.h 2011-02-17 10:23:17.000000000 +0100 @@ -237,7 +237,11 @@ extern void setup_local_APIC(void); extern void end_local_APIC_setup(void); extern void bsp_end_local_APIC_setup(void); extern void init_apic_mappings(void); +#ifndef CONFIG_XEN void register_lapic_address(unsigned long address); +#else +#define register_lapic_address(address) +#endif extern void setup_boot_APIC_clock(void); extern void setup_secondary_APIC_clock(void); extern int APIC_init_uniprocessor(void); --- head-2011-03-17.orig/arch/x86/include/asm/xen/hypervisor.h 2011-03-17 14:35:43.000000000 +0100 +++ head-2011-03-17/arch/x86/include/asm/xen/hypervisor.h 2011-02-03 12:23:23.000000000 +0100 @@ -58,7 +58,7 @@ static inline uint32_t xen_cpuid_base(vo return 0; } -#ifdef CONFIG_XEN +#ifdef CONFIG_PARAVIRT_XEN extern bool xen_hvm_need_lapic(void); static inline bool xen_x2apic_para_available(void) --- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/fixmap.h 2011-02-01 15:09:47.000000000 +0100 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/fixmap.h 2011-02-01 15:41:35.000000000 +0100 @@ -123,7 +123,11 @@ enum fixed_addresses { #endif FIX_TEXT_POKE1, /* reserve 2 pages for text_poke() */ FIX_TEXT_POKE0, /* first page is last, because allocation is backward */ +#ifdef CONFIG_X86_MRST + FIX_LNW_VRTC, +#endif __end_of_permanent_fixed_addresses, + /* * 256 temporary boot-time mappings, used by early_ioremap(), * before ioremap() is functional. --- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/mach_traps.h 2007-06-12 13:14:02.000000000 +0200 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/mach_traps.h 2011-02-07 12:21:00.000000000 +0100 @@ -9,7 +9,11 @@ #include #include -static inline void clear_mem_error(unsigned char reason) {} +#define NMI_REASON_SERR 0x80 +#define NMI_REASON_IOCHK 0x40 +#define NMI_REASON_MASK (NMI_REASON_SERR | NMI_REASON_IOCHK) + +static inline void clear_serr_error(unsigned char reason) {} static inline void clear_io_check_error(unsigned char reason) {} static inline unsigned char get_nmi_reason(void) @@ -21,9 +25,9 @@ static inline unsigned char get_nmi_reas * port 0x61. */ if (test_bit(_XEN_NMIREASON_io_error, &s->arch.nmi_reason)) - reason |= 0x40; + reason |= NMI_REASON_IOCHK; if (test_bit(_XEN_NMIREASON_parity_error, &s->arch.nmi_reason)) - reason |= 0x80; + reason |= NMI_REASON_SERR; return reason; } --- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/mmu_context.h 2011-02-01 14:54:13.000000000 +0100 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/mmu_context.h 2011-02-08 10:25:49.000000000 +0100 @@ -87,8 +87,6 @@ static inline void switch_mm(struct mm_s BUG_ON(!xen_feature(XENFEAT_writable_page_tables) && !PagePinned(virt_to_page(next->pgd))); - /* stop flush ipis for the previous mm */ - cpumask_clear_cpu(cpu, mm_cpumask(prev)); #if defined(CONFIG_SMP) && !defined(CONFIG_XEN) /* XEN: no lazy tlb */ percpu_write(cpu_tlbstate.state, TLBSTATE_OK); percpu_write(cpu_tlbstate.active_mm, next); @@ -119,6 +117,9 @@ static inline void switch_mm(struct mm_s } BUG_ON(HYPERVISOR_mmuext_op(_op, op-_op, NULL, DOMID_SELF)); + + /* stop TLB flushes for the previous mm */ + cpumask_clear_cpu(cpu, mm_cpumask(prev)); } #if defined(CONFIG_SMP) && !defined(CONFIG_XEN) /* XEN: no lazy tlb */ else { --- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/pci.h 2011-02-01 15:09:47.000000000 +0100 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/pci.h 2011-02-01 15:41:35.000000000 +0100 @@ -71,6 +71,7 @@ extern unsigned long pci_mem_start; #define PCIBIOS_MIN_CARDBUS_IO 0x4000 +extern int pcibios_enabled; void pcibios_config_init(void); struct pci_bus *pcibios_scan_root(int bus); --- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/perf_event.h 2011-02-01 15:04:27.000000000 +0100 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/perf_event.h 2011-02-01 15:41:55.000000000 +0100 @@ -34,6 +34,4 @@ #endif -static inline void init_hw_perf_events(void) {} - #endif /* _ASM_X86_PERF_EVENT_H */ --- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/pgalloc.h 2011-02-01 15:03:03.000000000 +0100 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/pgalloc.h 2011-02-01 15:41:35.000000000 +0100 @@ -106,7 +106,7 @@ static inline void pmd_free(struct mm_st extern void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd); static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, - unsigned long adddress) + unsigned long address) { ___pmd_free_tlb(tlb, pmd); } --- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/pgtable.h 2011-02-01 15:09:47.000000000 +0100 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/pgtable.h 2011-02-01 15:55:04.000000000 +0100 @@ -32,6 +32,7 @@ extern struct mm_struct *pgd_page_get_mm #define set_pte(ptep, pte) xen_set_pte(ptep, pte) #define set_pte_at(mm, addr, ptep, pte) xen_set_pte_at(mm, addr, ptep, pte) +#define set_pmd_at(mm, addr, pmdp, pmd) xen_set_pmd_at(mm, addr, pmdp, pmd) #define set_pte_atomic(ptep, pte) \ xen_set_pte_atomic(ptep, pte) @@ -56,6 +57,8 @@ extern struct mm_struct *pgd_page_get_mm #define pte_update(mm, addr, ptep) do { } while (0) #define pte_update_defer(mm, addr, ptep) do { } while (0) +#define pmd_update(mm, addr, ptep) do { } while (0) +#define pmd_update_defer(mm, addr, ptep) do { } while (0) #define pgd_val(x) xen_pgd_val(x) #define __pgd(x) xen_make_pgd(x) @@ -89,6 +92,11 @@ static inline int pte_young(pte_t pte) return pte_flags(pte) & _PAGE_ACCESSED; } +static inline int pmd_young(pmd_t pmd) +{ + return pmd_flags(pmd) & _PAGE_ACCESSED; +} + static inline int pte_write(pte_t pte) { return pte_flags(pte) & _PAGE_RW; @@ -139,6 +147,23 @@ static inline int pmd_large(pmd_t pte) (_PAGE_PSE | _PAGE_PRESENT); } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static inline int pmd_trans_splitting(pmd_t pmd) +{ + return pmd_val(pmd) & _PAGE_SPLITTING; +} + +static inline int pmd_trans_huge(pmd_t pmd) +{ + return pmd_val(pmd) & _PAGE_PSE; +} + +static inline int has_transparent_hugepage(void) +{ + return cpu_has_pse; +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + static inline pte_t pte_set_flags(pte_t pte, pteval_t set) { pteval_t v = __pte_val(pte); @@ -213,6 +238,57 @@ static inline pte_t pte_mkspecial(pte_t return pte_set_flags(pte, _PAGE_SPECIAL); } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static inline pmd_t pmd_set_flags(pmd_t pmd, pmdval_t set) +{ + pmdval_t v = native_pmd_val(pmd); + + return __pmd(v | set); +} + +static inline pmd_t pmd_clear_flags(pmd_t pmd, pmdval_t clear) +{ + pmdval_t v = native_pmd_val(pmd); + + return __pmd(v & ~clear); +} + +static inline pmd_t pmd_mkold(pmd_t pmd) +{ + return pmd_clear_flags(pmd, _PAGE_ACCESSED); +} + +static inline pmd_t pmd_wrprotect(pmd_t pmd) +{ + return pmd_clear_flags(pmd, _PAGE_RW); +} + +static inline pmd_t pmd_mkdirty(pmd_t pmd) +{ + return pmd_set_flags(pmd, _PAGE_DIRTY); +} + +static inline pmd_t pmd_mkhuge(pmd_t pmd) +{ + return pmd_set_flags(pmd, _PAGE_PSE); +} + +static inline pmd_t pmd_mkyoung(pmd_t pmd) +{ + return pmd_set_flags(pmd, _PAGE_ACCESSED); +} + +static inline pmd_t pmd_mkwrite(pmd_t pmd) +{ + return pmd_set_flags(pmd, _PAGE_RW); +} + +static inline pmd_t pmd_mknotpresent(pmd_t pmd) +{ + return pmd_clear_flags(pmd, _PAGE_PRESENT); +} +#endif + /* * Mask out unsupported bits in a present pgprot. Non-present pgprots * can use those bits for other purposes, so leave them be. @@ -253,6 +329,18 @@ static inline pte_t pte_modify(pte_t pte return __pte(val); } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) +{ + pmdval_t val = pmd_val(pmd); + + val &= _HPAGE_CHG_MASK; + val |= massage_pgprot(newprot) & ~_HPAGE_CHG_MASK; + + return __pmd(val); +} +#endif + /* mprotect needs to preserve PAT bits when updating vm_page_prot */ #define pgprot_modify pgprot_modify static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) @@ -353,7 +441,7 @@ static inline unsigned long pmd_page_vad * Currently stuck as a macro due to indirect forward reference to * linux/mmzone.h's __section_mem_map_addr() definition: */ -#define pmd_page(pmd) pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT) +#define pmd_page(pmd) pfn_to_page((pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT) /* * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD] @@ -529,6 +617,14 @@ static inline pte_t xen_local_ptep_get_a return res; } +static inline pmd_t xen_local_pmdp_get_and_clear(pmd_t *pmdp) +{ + pmd_t res = *pmdp; + + xen_pmd_clear(pmdp); + return res; +} + static inline void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep , pte_t pte) { @@ -537,6 +633,12 @@ static inline void xen_set_pte_at(struct xen_set_pte(ptep, pte); } +static inline void xen_set_pmd_at(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp , pmd_t pmd) +{ + xen_set_pmd(pmdp, pmd); +} + static inline void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { @@ -641,6 +743,53 @@ static inline void ptep_set_wrprotect(st #define flush_tlb_fix_spurious_fault(vma, address) +#define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot)) + +#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS +extern int pmdp_set_access_flags(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp, + pmd_t entry, int dirty); + +#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG +extern int pmdp_test_and_clear_young(struct vm_area_struct *vma, + unsigned long addr, pmd_t *pmdp); + +#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH +extern int pmdp_clear_flush_young(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp); + + +#define __HAVE_ARCH_PMDP_SPLITTING_FLUSH +extern void pmdp_splitting_flush(struct vm_area_struct *vma, + unsigned long addr, pmd_t *pmdp); + +#define __HAVE_ARCH_PMD_WRITE +static inline int pmd_write(pmd_t pmd) +{ + return pmd_flags(pmd) & _PAGE_RW; +} + +#define __HAVE_ARCH_PMDP_GET_AND_CLEAR +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp) +{ + pmd_t pmd = xen_pmdp_get_and_clear(pmdp); + pmd_update(mm, addr, pmdp); + return pmd; +} +#endif + +#define __HAVE_ARCH_PMDP_SET_WRPROTECT +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static inline void pmdp_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pmd_t *pmdp) +{ + clear_bit(_PAGE_BIT_RW, (unsigned long *)pmdp); + pmd_update(mm, addr, pmdp); +} +#endif + /* * clone_pgd_range(pgd_t *dst, pgd_t *src, int count); * --- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/pgtable-3level.h 2011-02-01 15:04:27.000000000 +0100 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/pgtable-3level.h 2011-02-01 15:47:16.000000000 +0100 @@ -106,6 +106,31 @@ static inline pte_t xen_ptep_get_and_cle #define __pte_mfn(_pte) (((_pte).pte_low >> PAGE_SHIFT) | \ ((_pte).pte_high << (32-PAGE_SHIFT))) +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#ifdef CONFIG_SMP +union split_pmd { + struct { + u32 pmd_low; + u32 pmd_high; + }; + pmd_t pmd; +}; +static inline pmd_t xen_pmdp_get_and_clear(pmd_t *pmdp) +{ + union split_pmd res, *orig = (union split_pmd *)pmdp; + + /* xchg acts as a barrier before setting of the high bits */ + res.pmd_low = xchg(&orig->pmd_low, 0); + res.pmd_high = orig->pmd_high; + orig->pmd_high = 0; + + return res.pmd; +} +#else +#define xen_pmdp_get_and_clear(xp) xen_local_pmdp_get_and_clear(xp) +#endif +#endif + /* * Bits 0, 6 and 7 are taken in the low part of the pte, * put the 32 bits of offset into the high part. --- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/pgtable_64.h 2011-02-01 15:09:47.000000000 +0100 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/pgtable_64.h 2011-02-03 14:39:36.000000000 +0100 @@ -65,6 +65,16 @@ static inline void xen_set_pte_atomic(pt xen_set_pte(ptep, pte); } +static inline void xen_set_pmd(pmd_t *pmdp, pmd_t pmd) +{ + xen_l2_entry_update(pmdp, pmd); +} + +static inline void xen_pmd_clear(pmd_t *pmd) +{ + xen_set_pmd(pmd, xen_make_pmd(0)); +} + #ifdef CONFIG_SMP static inline pte_t xen_ptep_get_and_clear(pte_t *xp, pte_t ret) { @@ -74,15 +84,16 @@ static inline pte_t xen_ptep_get_and_cle #define xen_ptep_get_and_clear(xp, pte) xen_local_ptep_get_and_clear(xp, pte) #endif -static inline void xen_set_pmd(pmd_t *pmdp, pmd_t pmd) -{ - xen_l2_entry_update(pmdp, pmd); -} - -static inline void xen_pmd_clear(pmd_t *pmd) +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#ifdef CONFIG_SMP +static inline pmd_t xen_pmdp_get_and_clear(pmd_t *xp) { - xen_set_pmd(pmd, xen_make_pmd(0)); + return xen_make_pmd(xchg(&xp->pmd, 0)); } +#else +#define xen_pmdp_get_and_clear(xp) xen_local_pmdp_get_and_clear(xp) +#endif +#endif static inline void xen_set_pud(pud_t *pudp, pud_t pud) { @@ -175,6 +186,7 @@ extern void cleanup_highmap(void); #define kc_offset_to_vaddr(o) ((o) | ~__VIRTUAL_MASK) #define __HAVE_ARCH_PTE_SAME + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_PGTABLE_64_H */ --- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/pgtable_types.h 2011-02-01 14:54:13.000000000 +0100 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/pgtable_types.h 2011-02-01 15:41:35.000000000 +0100 @@ -22,6 +22,7 @@ #define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */ #define _PAGE_BIT_SPECIAL _PAGE_BIT_UNUSED1 #define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1 +#define _PAGE_BIT_SPLITTING _PAGE_BIT_UNUSED1 /* only valid on a PSE pmd */ #define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ /* If _PAGE_BIT_PRESENT is clear, we use these: */ @@ -45,6 +46,7 @@ #define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) #define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL) #define _PAGE_CPA_TEST (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST) +#define _PAGE_SPLITTING (_AT(pteval_t, 1) << _PAGE_BIT_SPLITTING) #define __HAVE_ARCH_PTE_SPECIAL #ifdef CONFIG_KMEMCHECK @@ -78,6 +80,7 @@ extern unsigned int __kernel_page_user; /* Set of bits not changed in pte_modify */ #define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_CACHE_MASK | _PAGE_IOMAP | \ _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY) +#define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE) /* * PAT settings are part of the hypervisor interface, which sets the --- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/processor.h 2011-03-03 16:47:48.000000000 +0100 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/processor.h 2011-03-03 16:47:59.000000000 +0100 @@ -152,10 +152,9 @@ extern __u32 cpu_caps_set[NCAPINTS]; #ifdef CONFIG_SMP DECLARE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); #define cpu_data(cpu) per_cpu(cpu_info, cpu) -#define current_cpu_data __get_cpu_var(cpu_info) #else +#define cpu_info boot_cpu_data #define cpu_data(cpu) boot_cpu_data -#define current_cpu_data boot_cpu_data #endif extern const struct seq_operations cpuinfo_op; @@ -716,10 +715,11 @@ extern void select_idle_routine(const st extern void init_c1e_mask(void); extern unsigned long boot_option_idle_override; -extern unsigned long idle_halt; -extern unsigned long idle_nomwait; extern bool c1e_detected; +enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_NOMWAIT, + IDLE_POLL, IDLE_FORCE_MWAIT}; + extern void enable_sep_cpu(void); extern int sysenter_setup(void); @@ -856,7 +856,7 @@ extern unsigned long thread_saved_pc(str /* * The below -8 is to reserve 8 bytes on top of the ring0 stack. * This is necessary to guarantee that the entire "struct pt_regs" - * is accessable even if the CPU haven't stored the SS/ESP registers + * is accessible even if the CPU haven't stored the SS/ESP registers * on the stack (interrupt gate does not save these registers * when switching to the same priv ring). * Therefore beware: accessing the ss/esp fields of the --- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/smp.h 2011-03-03 16:12:15.000000000 +0100 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/smp.h 2011-03-03 16:12:54.000000000 +0100 @@ -47,10 +47,7 @@ DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_ #ifndef CONFIG_XEN /* Static state in head.S used to set up a CPU */ -extern struct { - void *sp; - unsigned short ss; -} stack_start; +extern unsigned long stack_start; /* Initial stack pointer address */ struct smp_ops { void (*smp_prepare_boot_cpu)(void); --- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/system_64.h 2011-02-01 14:39:24.000000000 +0100 +++ /dev/null 1970-01-01 00:00:00.000000000 +0000 @@ -1,20 +0,0 @@ -#ifndef _ASM_X86_SYSTEM_64_H -#define _ASM_X86_SYSTEM_64_H - -#include -#include - - -static inline unsigned long read_cr8(void) -{ - return 0; -} - -static inline void write_cr8(unsigned long val) -{ - BUG_ON(val); -} - -#include - -#endif /* _ASM_X86_SYSTEM_64_H */ --- head-2011-03-17.orig/arch/x86/kernel/acpi/boot.c 2011-03-11 10:59:49.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/acpi/boot.c 2011-03-11 11:06:22.000000000 +0100 @@ -73,10 +73,11 @@ int acpi_sci_override_gsi __initdata; #ifndef CONFIG_XEN int acpi_skip_timer_override __initdata; int acpi_use_timer_override __initdata; +int acpi_fix_pin2_polarity __initdata; #else #define acpi_skip_timer_override 0 +#define acpi_fix_pin2_polarity 0 #endif -int acpi_fix_pin2_polarity __initdata; #ifdef CONFIG_X86_LOCAL_APIC static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; --- head-2011-03-17.orig/arch/x86/kernel/acpi/sleep-xen.c 2011-02-01 15:09:47.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/acpi/sleep-xen.c 2011-02-08 10:25:49.000000000 +0100 @@ -12,10 +12,8 @@ #include #include #include - -#ifdef CONFIG_X86_32 #include -#endif +#include #include "realmode/wakeup.h" #include "sleep.h" @@ -103,7 +101,7 @@ int acpi_save_state_mem(void) #else /* CONFIG_64BIT */ header->trampoline_segment = setup_trampoline() >> 4; #ifdef CONFIG_SMP - stack_start.sp = temp_stack + sizeof(temp_stack); + stack_start = (unsigned long)temp_stack + sizeof(temp_stack); early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(smp_processor_id()); initial_gs = per_cpu_offset(smp_processor_id()); @@ -155,8 +153,17 @@ void __init acpi_reserve_wakeup_memory(v #endif } - #ifndef CONFIG_ACPI_PV_SLEEP +int __init acpi_configure_wakeup_memory(void) +{ + if (acpi_realmode) + set_memory_x(acpi_realmode, WAKEUP_SIZE >> PAGE_SHIFT); + + return 0; +} +arch_initcall(acpi_configure_wakeup_memory); + + static int __init acpi_sleep_setup(char *str) { while ((str != NULL) && (*str != '\0')) { --- head-2011-03-17.orig/arch/x86/kernel/apic/Makefile 2011-02-01 14:55:46.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/apic/Makefile 2011-02-03 14:05:07.000000000 +0100 @@ -20,8 +20,6 @@ obj-$(CONFIG_X86_NUMAQ) += numaq_32.o obj-$(CONFIG_X86_ES7000) += es7000_32.o obj-$(CONFIG_X86_SUMMIT) += summit_32.o -obj-$(CONFIG_XEN) += nmi.o - probe_64-$(CONFIG_XEN) := probe_32.o disabled-obj-$(CONFIG_XEN) := apic_flat_$(BITS).o apic_noop.o --- head-2011-03-17.orig/arch/x86/kernel/apic/io_apic-xen.c 2011-02-01 15:09:47.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/apic/io_apic-xen.c 2011-02-17 10:30:00.000000000 +0100 @@ -50,7 +50,6 @@ #include #include #include -#include #include #include @@ -138,6 +137,26 @@ static int __init parse_noapic(char *str } early_param("noapic", parse_noapic); +/* Will be called in mpparse/acpi/sfi codes for saving IRQ info */ +void mp_save_irq(struct mpc_intsrc *m) +{ + int i; + + apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x," + " IRQ %02x, APIC ID %x, APIC INT %02x\n", + m->irqtype, m->irqflag & 3, (m->irqflag >> 2) & 3, m->srcbus, + m->srcbusirq, m->dstapic, m->dstirq); + + for (i = 0; i < mp_irq_entries; i++) { + if (!memcmp(&mp_irqs[i], m, sizeof(*m))) + return; + } + + memcpy(&mp_irqs[mp_irq_entries], m, sizeof(*m)); + if (++mp_irq_entries == MAX_IRQ_SOURCES) + panic("Max # of irq sources exceeded!!\n"); +} + #ifndef CONFIG_XEN struct irq_pin_list { int apic, pin; @@ -149,6 +168,7 @@ static struct irq_pin_list *alloc_irq_pi return kzalloc_node(sizeof(struct irq_pin_list), GFP_KERNEL, node); } + /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ #ifdef CONFIG_SPARSE_IRQ static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY]; @@ -2014,8 +2034,7 @@ void disable_IO_APIC(void) * * by Matt Domsch Tue Dec 21 12:25:05 CST 1999 */ - -void __init setup_ioapic_ids_from_mpc(void) +void __init setup_ioapic_ids_from_mpc_nocheck(void) { union IO_APIC_reg_00 reg_00; physid_mask_t phys_id_present_map; @@ -2024,15 +2043,6 @@ void __init setup_ioapic_ids_from_mpc(vo unsigned char old_id; unsigned long flags; - if (acpi_ioapic) - return; - /* - * Don't check I/O APIC IDs for xAPIC systems. They have - * no meaning without the serial APIC bus. - */ - if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) - || APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) - return; /* * This is broken; anything with a real cpu count has to * circumvent this idiocy regardless. @@ -2086,7 +2096,6 @@ void __init setup_ioapic_ids_from_mpc(vo physids_or(phys_id_present_map, phys_id_present_map, tmp); } - /* * We need to adjust the IRQ routing table * if the ID changed. @@ -2098,9 +2107,12 @@ void __init setup_ioapic_ids_from_mpc(vo = mp_ioapics[apic_id].apicid; /* - * Read the right value from the MPC table and - * write it into the ID register. + * Update the ID register according to the right value + * from the MPC table if they are different. */ + if (mp_ioapics[apic_id].apicid == reg_00.bits.ID) + continue; + apic_printk(APIC_VERBOSE, KERN_INFO "...changing IO-APIC physical APIC ID to %d ...", mp_ioapics[apic_id].apicid); @@ -2122,6 +2134,21 @@ void __init setup_ioapic_ids_from_mpc(vo apic_printk(APIC_VERBOSE, " ok.\n"); } } + +void __init setup_ioapic_ids_from_mpc(void) +{ + + if (acpi_ioapic) + return; + /* + * Don't check I/O APIC IDs for xAPIC systems. They have + * no meaning without the serial APIC bus. + */ + if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) + || APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) + return; + setup_ioapic_ids_from_mpc_nocheck(); +} #endif int no_timer_check __initdata; @@ -2382,7 +2409,7 @@ asmlinkage void smp_irq_move_cleanup_int unsigned int irr; struct irq_desc *desc; struct irq_cfg *cfg; - irq = __get_cpu_var(vector_irq)[vector]; + irq = __this_cpu_read(vector_irq[vector]); if (irq == -1) continue; @@ -2416,7 +2443,7 @@ asmlinkage void smp_irq_move_cleanup_int apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR); goto unlock; } - __get_cpu_var(vector_irq)[vector] = -1; + __this_cpu_write(vector_irq[vector], -1); unlock: raw_spin_unlock(&desc->lock); } @@ -2728,24 +2755,6 @@ static void lapic_register_intr(int irq) "edge"); } -static void __init setup_nmi(void) -{ - /* - * Dirty trick to enable the NMI watchdog ... - * We put the 8259A master into AEOI mode and - * unmask on all local APICs LVT0 as NMI. - * - * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire') - * is from Maciej W. Rozycki - so we do not have to EOI from - * the NMI handler or the timer interrupt. - */ - apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ..."); - - enable_NMI_through_LVT0(); - - apic_printk(APIC_VERBOSE, " done.\n"); -} - /* * This looks a bit hackish but it's about the only one way of sending * a few INTA cycles to 8259As and any associated glue logic. ICR does @@ -2851,15 +2860,6 @@ static inline void __init check_timer(vo */ apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); legacy_pic->init(1); -#ifdef CONFIG_X86_32 - { - unsigned int ver; - - ver = apic_read(APIC_LVR); - ver = GET_APIC_VERSION(ver); - timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); - } -#endif pin1 = find_isa_irq_pin(0, mp_INT); apic1 = find_isa_irq_apic(0, mp_INT); @@ -2907,10 +2907,6 @@ static inline void __init check_timer(vo unmask_ioapic(cfg); } if (timer_irq_works()) { - if (nmi_watchdog == NMI_IO_APIC) { - setup_nmi(); - legacy_pic->unmask(0); - } if (disable_timer_pin_1 > 0) clear_IO_APIC_pin(0, pin1); goto out; @@ -2936,11 +2932,6 @@ static inline void __init check_timer(vo if (timer_irq_works()) { apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); timer_through_8259 = 1; - if (nmi_watchdog == NMI_IO_APIC) { - legacy_pic->mask(0); - setup_nmi(); - legacy_pic->unmask(0); - } goto out; } /* @@ -2952,15 +2943,6 @@ static inline void __init check_timer(vo apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); } - if (nmi_watchdog == NMI_IO_APIC) { - apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work " - "through the IO-APIC - disabling NMI Watchdog!\n"); - nmi_watchdog = NMI_NONE; - } -#ifdef CONFIG_X86_32 - timer_ack = 0; -#endif - apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer as Virtual Wire IRQ...\n"); @@ -3741,7 +3723,7 @@ int __init io_apic_get_redir_entries (in } #ifndef CONFIG_XEN -void __init probe_nr_irqs_gsi(void) +static void __init probe_nr_irqs_gsi(void) { int nr; @@ -4069,7 +4051,7 @@ static struct resource * __init ioapic_s return res; } -void __init ioapic_init_mappings(void) +void __init ioapic_and_gsi_init(void) { unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0; struct resource *ioapic_res; @@ -4107,6 +4089,8 @@ fake_ioapic_page: ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1; ioapic_res++; } + + probe_nr_irqs_gsi(); } void __init ioapic_insert_resources(void) @@ -4132,6 +4116,9 @@ int mp_find_ioapic(u32 gsi) { int i = 0; + if (nr_ioapics == 0) + return -1; + /* Find the IOAPIC that manages this GSI. */ for (i = 0; i < nr_ioapics; i++) { if ((gsi >= mp_gsi_routing[i].gsi_base) @@ -4220,7 +4207,8 @@ void __init pre_init_apic_IRQ0(void) printk(KERN_INFO "Early APIC setup for system timer0\n"); #ifndef CONFIG_SMP - phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid); + physid_set_mask_of_physid(boot_cpu_physical_apicid, + &phys_cpu_present_map); #endif /* Make sure the irq descriptor is set up */ cfg = alloc_irq_and_cfg_at(0, 0); --- head-2011-03-17.orig/arch/x86/kernel/cpu/common-xen.c 2011-03-17 14:43:08.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/cpu/common-xen.c 2011-03-17 14:43:14.000000000 +0100 @@ -931,7 +931,6 @@ void __init identify_boot_cpu(void) #else vgetcpu_set_mode(); #endif - init_hw_perf_events(); } #ifdef CONFIG_XEN --- head-2011-03-17.orig/arch/x86/kernel/cpu/intel_cacheinfo.c 2011-02-08 10:06:37.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/cpu/intel_cacheinfo.c 2011-02-08 10:08:56.000000000 +0100 @@ -965,7 +965,7 @@ static struct attribute *default_attrs[] NULL }; -#ifdef CONFIG_AMD_NB +#if defined(CONFIG_AMD_NB) && !defined(CONFIG_XEN) static struct attribute ** __cpuinit amd_l3_attrs(void) { static struct attribute **attrs; @@ -1105,7 +1105,7 @@ static int __cpuinit cache_add_dev(struc this_leaf = CPUID4_INFO_IDX(cpu, i); ktype_cache.default_attrs = default_attrs; -#ifdef CONFIG_AMD_NB +#if defined(CONFIG_AMD_NB) && !defined(CONFIG_XEN) if (this_leaf->l3) ktype_cache.default_attrs = amd_l3_attrs(); #endif --- head-2011-03-17.orig/arch/x86/kernel/e820-xen.c 2011-02-01 15:09:47.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/e820-xen.c 2011-02-01 15:41:35.000000000 +0100 @@ -14,6 +14,7 @@ #include #include #include +#include #include #include --- head-2011-03-17.orig/arch/x86/kernel/early_printk-xen.c 2011-02-01 15:09:47.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/early_printk-xen.c 2011-02-01 15:41:35.000000000 +0100 @@ -272,7 +272,7 @@ static int __init setup_early_printk(cha if (!strncmp(buf, "xen", 3)) early_console_register(&xenboot_console, keep); #endif -#ifdef CONFIG_X86_MRST_EARLY_PRINTK +#ifdef CONFIG_EARLY_PRINTK_MRST if (!strncmp(buf, "mrst", 4)) { mrst_early_console_init(); early_console_register(&early_mrst_console, keep); @@ -282,7 +282,6 @@ static int __init setup_early_printk(cha hsu_early_console_init(); early_console_register(&early_hsu_console, keep); } - #endif buf++; } --- head-2011-03-17.orig/arch/x86/kernel/entry_32-xen.S 2011-02-01 15:09:47.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/entry_32-xen.S 2011-02-01 15:41:35.000000000 +0100 @@ -1676,6 +1676,16 @@ ENTRY(general_protection) CFI_ENDPROC END(general_protection) +#ifdef CONFIG_KVM_GUEST +ENTRY(async_page_fault) + RING0_EC_FRAME + pushl $do_async_page_fault + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC +END(apf_page_fault) +#endif + /* * End of kprobes section */ --- head-2011-03-17.orig/arch/x86/kernel/entry_64-xen.S 2011-02-01 15:09:47.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/entry_64-xen.S 2011-02-01 15:41:35.000000000 +0100 @@ -331,17 +331,21 @@ NMI_MASK = 0x80000000 ENTRY(save_args) XCPT_FRAME cld - movq_cfi rdi, RDI+16-ARGOFFSET - movq_cfi rsi, RSI+16-ARGOFFSET - movq_cfi rdx, RDX+16-ARGOFFSET - movq_cfi rcx, RCX+16-ARGOFFSET - movq_cfi rax, RAX+16-ARGOFFSET - movq_cfi r8, R8+16-ARGOFFSET - movq_cfi r9, R9+16-ARGOFFSET - movq_cfi r10, R10+16-ARGOFFSET - movq_cfi r11, R11+16-ARGOFFSET + /* + * start from rbp in pt_regs and jump over + * return address. + */ + movq_cfi rdi, RDI+8-RBP + movq_cfi rsi, RSI+8-RBP + movq_cfi rdx, RDX+8-RBP + movq_cfi rcx, RCX+8-RBP + movq_cfi rax, RAX+8-RBP + movq_cfi r8, R8+8-RBP + movq_cfi r9, R9+8-RBP + movq_cfi r10, R10+8-RBP + movq_cfi r11, R11+8-RBP - leaq -ARGOFFSET+16(%rsp),%rdi /* arg1 for handler */ + leaq -RBP+8(%rsp),%rdi /* arg1 for handler */ movq_cfi rbp, 8 /* push %rbp */ leaq 8(%rsp), %rbp /* mov %rsp, %ebp */ testl $3, CS(%rdi) @@ -1142,6 +1146,9 @@ paranoidzeroentry_ist int3 do_int3 DEBUG paranoiderrorentry stack_segment do_stack_segment errorentry general_protection do_general_protection errorentry page_fault do_page_fault +#ifdef CONFIG_KVM_GUEST +errorentry async_page_fault do_async_page_fault +#endif #ifdef CONFIG_X86_MCE paranoidzeroentry machine_check *machine_check_vector(%rip) #endif --- head-2011-03-17.orig/arch/x86/kernel/head32-xen.c 2011-02-01 15:09:47.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/head32-xen.c 2011-02-01 15:41:35.000000000 +0100 @@ -81,6 +81,9 @@ void __init i386_start_kernel(void) case X86_SUBARCH_MRST: x86_mrst_early_setup(); break; + case X86_SUBARCH_CE4100: + x86_ce4100_early_setup(); + break; default: i386_default_early_setup(); break; --- head-2011-03-17.orig/arch/x86/kernel/irq-xen.c 2011-02-01 15:09:47.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/irq-xen.c 2011-02-18 15:17:23.000000000 +0100 @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -254,7 +255,7 @@ unsigned int __irq_entry do_IRQ(struct p exit_idle(); irq_enter(); - irq = __get_cpu_var(vector_irq)[vector]; + irq = __this_cpu_read(vector_irq[vector]); if (!handle_irq(irq, regs)) { ack_APIC_irq(); @@ -294,6 +295,15 @@ void smp_x86_platform_ipi(struct pt_regs } #endif +#ifdef CONFIG_OF +unsigned int irq_create_of_mapping(struct device_node *controller, + const u32 *intspec, unsigned int intsize) +{ + return intspec[0]; +} +EXPORT_SYMBOL_GPL(irq_create_of_mapping); +#endif + #ifdef CONFIG_HOTPLUG_CPU #include /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ @@ -369,7 +379,8 @@ void fixup_irqs(void) continue; if (xen_test_irq_pending(irq)) { - data = irq_get_irq_data(irq); + desc = irq_to_desc(irq); + data = &desc->irq_data; raw_spin_lock(&desc->lock); if (data->chip->irq_retrigger) data->chip->irq_retrigger(data); --- head-2011-03-17.orig/arch/x86/kernel/mpparse-xen.c 2011-02-01 15:09:47.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/mpparse-xen.c 2011-02-01 16:09:24.000000000 +0100 @@ -130,21 +130,8 @@ static void __init MP_bus_info(struct mp static void __init MP_ioapic_info(struct mpc_ioapic *m) { - if (!(m->flags & MPC_APIC_USABLE)) - return; - - printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n", - m->apicid, m->apicver, m->apicaddr); - - mp_register_ioapic(m->apicid, m->apicaddr, gsi_top); -} - -static void print_MP_intsrc_info(struct mpc_intsrc *m) -{ - apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x," - " IRQ %02x, APIC ID %x, APIC INT %02x\n", - m->irqtype, m->irqflag & 3, (m->irqflag >> 2) & 3, m->srcbus, - m->srcbusirq, m->dstapic, m->dstirq); + if (m->flags & MPC_APIC_USABLE) + mp_register_ioapic(m->apicid, m->apicaddr, gsi_top); } static void __init print_mp_irq_info(struct mpc_intsrc *mp_irq) @@ -156,73 +143,11 @@ static void __init print_mp_irq_info(str mp_irq->srcbusirq, mp_irq->dstapic, mp_irq->dstirq); } -static void __init assign_to_mp_irq(struct mpc_intsrc *m, - struct mpc_intsrc *mp_irq) -{ - mp_irq->dstapic = m->dstapic; - mp_irq->type = m->type; - mp_irq->irqtype = m->irqtype; - mp_irq->irqflag = m->irqflag; - mp_irq->srcbus = m->srcbus; - mp_irq->srcbusirq = m->srcbusirq; - mp_irq->dstirq = m->dstirq; -} - -static void __init assign_to_mpc_intsrc(struct mpc_intsrc *mp_irq, - struct mpc_intsrc *m) -{ - m->dstapic = mp_irq->dstapic; - m->type = mp_irq->type; - m->irqtype = mp_irq->irqtype; - m->irqflag = mp_irq->irqflag; - m->srcbus = mp_irq->srcbus; - m->srcbusirq = mp_irq->srcbusirq; - m->dstirq = mp_irq->dstirq; -} - -static int __init mp_irq_mpc_intsrc_cmp(struct mpc_intsrc *mp_irq, - struct mpc_intsrc *m) -{ - if (mp_irq->dstapic != m->dstapic) - return 1; - if (mp_irq->type != m->type) - return 2; - if (mp_irq->irqtype != m->irqtype) - return 3; - if (mp_irq->irqflag != m->irqflag) - return 4; - if (mp_irq->srcbus != m->srcbus) - return 5; - if (mp_irq->srcbusirq != m->srcbusirq) - return 6; - if (mp_irq->dstirq != m->dstirq) - return 7; - - return 0; -} - -static void __init MP_intsrc_info(struct mpc_intsrc *m) -{ - int i; - - print_MP_intsrc_info(m); - - for (i = 0; i < mp_irq_entries; i++) { - if (!mp_irq_mpc_intsrc_cmp(&mp_irqs[i], m)) - return; - } - - assign_to_mp_irq(m, &mp_irqs[mp_irq_entries]); - if (++mp_irq_entries == MAX_IRQ_SOURCES) - panic("Max # of irq sources exceeded!!\n"); -} #else /* CONFIG_X86_IO_APIC */ static inline void __init MP_bus_info(struct mpc_bus *m) {} static inline void __init MP_ioapic_info(struct mpc_ioapic *m) {} -static inline void __init MP_intsrc_info(struct mpc_intsrc *m) {} #endif /* CONFIG_X86_IO_APIC */ - static void __init MP_lintsrc_info(struct mpc_lintsrc *m) { apic_printk(APIC_VERBOSE, "Lint: type %d, pol %d, trig %d, bus %02x," @@ -234,7 +159,6 @@ static void __init MP_lintsrc_info(struc /* * Read/parse the MPC */ - static int __init smp_check_mpc(struct mpc_table *mpc, char *oem, char *str) { @@ -289,20 +213,6 @@ static void __init smp_dump_mptable(stru void __init default_smp_read_mpc_oem(struct mpc_table *mpc) { } -static void __init smp_register_lapic_address(unsigned long address) -{ -#ifndef CONFIG_XEN - mp_lapic_addr = address; - - set_fixmap_nocache(FIX_APIC_BASE, address); - if (boot_cpu_physical_apicid == -1U) { - boot_cpu_physical_apicid = read_apic_id(); - apic_version[boot_cpu_physical_apicid] = - GET_APIC_VERSION(apic_read(APIC_LVR)); - } -#endif -} - static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) { char str[16]; @@ -318,18 +228,14 @@ static int __init smp_read_mpc(struct mp #ifdef CONFIG_X86_32 generic_mps_oem_check(mpc, oem, str); #endif - /* save the local APIC address, it might be non-default */ + /* Initialize the lapic mapping */ if (!acpi_lapic) - mp_lapic_addr = mpc->lapic; + register_lapic_address(mpc->lapic); #endif if (early) return 1; - /* Initialize the lapic mapping */ - if (!acpi_lapic) - smp_register_lapic_address(mpc->lapic); - if (mpc->oemptr) x86_init.mpparse.smp_read_mpc_oem(mpc); @@ -355,7 +261,7 @@ static int __init smp_read_mpc(struct mp skip_entry(&mpt, &count, sizeof(struct mpc_ioapic)); break; case MP_INTSRC: - MP_intsrc_info((struct mpc_intsrc *)mpt); + mp_save_irq((struct mpc_intsrc *)mpt); skip_entry(&mpt, &count, sizeof(struct mpc_intsrc)); break; case MP_LINTSRC: @@ -447,13 +353,13 @@ static void __init construct_default_ioi intsrc.srcbusirq = i; intsrc.dstirq = i ? i : 2; /* IRQ0 to INTIN2 */ - MP_intsrc_info(&intsrc); + mp_save_irq(&intsrc); } intsrc.irqtype = mp_ExtINT; intsrc.srcbusirq = 0; intsrc.dstirq = 0; /* 8259A to INTIN0 */ - MP_intsrc_info(&intsrc); + mp_save_irq(&intsrc); } @@ -824,11 +730,11 @@ static void __init check_irq_src(struct int i; apic_printk(APIC_VERBOSE, "OLD "); - print_MP_intsrc_info(m); + print_mp_irq_info(m); i = get_MP_intsrc_index(m); if (i > 0) { - assign_to_mpc_intsrc(&mp_irqs[i], m); + memcpy(m, &mp_irqs[i], sizeof(*m)); apic_printk(APIC_VERBOSE, "NEW "); print_mp_irq_info(&mp_irqs[i]); return; @@ -915,14 +821,14 @@ static int __init replace_intsrc_all(st if (nr_m_spare > 0) { apic_printk(APIC_VERBOSE, "*NEW* found\n"); nr_m_spare--; - assign_to_mpc_intsrc(&mp_irqs[i], m_spare[nr_m_spare]); + memcpy(m_spare[nr_m_spare], &mp_irqs[i], sizeof(mp_irqs[i])); m_spare[nr_m_spare] = NULL; } else { struct mpc_intsrc *m = (struct mpc_intsrc *)mpt; count += sizeof(struct mpc_intsrc); if (check_slot(mpc_new_phys, mpc_new_length, count) < 0) goto out; - assign_to_mpc_intsrc(&mp_irqs[i], m); + memcpy(m, &mp_irqs[i], sizeof(*m)); mpc->length = count; mpt += sizeof(struct mpc_intsrc); } --- head-2011-03-17.orig/arch/x86/kernel/process-xen.c 2011-03-03 16:11:01.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/process-xen.c 2011-03-03 16:13:18.000000000 +0100 @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -23,11 +24,6 @@ #include #include -unsigned long idle_halt; -EXPORT_SYMBOL(idle_halt); -unsigned long idle_nomwait; -EXPORT_SYMBOL(idle_nomwait); - struct kmem_cache *task_xstate_cachep; EXPORT_SYMBOL_GPL(task_xstate_cachep); @@ -93,27 +89,36 @@ void exit_thread(void) void show_regs(struct pt_regs *regs) { show_registers(regs); - show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs), - regs->bp); + show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs)); } void show_regs_common(void) { - const char *board, *product; + const char *vendor, *product, *board; - board = dmi_get_system_info(DMI_BOARD_NAME); - if (!board) - board = ""; + vendor = dmi_get_system_info(DMI_SYS_VENDOR); + if (!vendor) + vendor = ""; product = dmi_get_system_info(DMI_PRODUCT_NAME); if (!product) product = ""; + /* Board Name is optional */ + board = dmi_get_system_info(DMI_BOARD_NAME); + printk(KERN_CONT "\n"); - printk(KERN_DEFAULT "Pid: %d, comm: %.20s %s %s %.*s %s/%s\n", + printk(KERN_DEFAULT "Pid: %d, comm: %.20s %s %s %.*s", current->pid, current->comm, print_tainted(), init_utsname()->release, (int)strcspn(init_utsname()->version, " "), - init_utsname()->version, board, product); + init_utsname()->version); + printk(KERN_CONT " "); + printk(KERN_CONT "%s %s", vendor, product); + if (board) { + printk(KERN_CONT "/"); + printk(KERN_CONT "%s", board); + } + printk(KERN_CONT "\n"); } void flush_thread(void) @@ -315,7 +320,7 @@ long sys_execve(const char __user *name, /* * Idle related variables and functions */ -unsigned long boot_option_idle_override = 0; +unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE; EXPORT_SYMBOL(boot_option_idle_override); /* @@ -331,6 +336,7 @@ EXPORT_SYMBOL(pm_idle); void xen_idle(void) { trace_power_start(POWER_CSTATE, 1, smp_processor_id()); + trace_cpu_idle(1, smp_processor_id()); current_thread_info()->status &= ~TS_POLLING; /* * TS_POLLING-cleared state must be visible before we @@ -343,6 +349,8 @@ void xen_idle(void) else local_irq_enable(); current_thread_info()->status |= TS_POLLING; + trace_power_end(smp_processor_id()); + trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); } #ifdef CONFIG_APM_MODULE EXPORT_SYMBOL(default_idle); @@ -396,9 +404,8 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait); */ void mwait_idle_with_hints(unsigned long ax, unsigned long cx) { - trace_power_start(POWER_CSTATE, (ax>>4)+1, smp_processor_id()); if (!need_resched()) { - if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) + if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLUSH_MONITOR)) clflush((void *)¤t_thread_info()->flags); __monitor((void *)¤t_thread_info()->flags, 0, 0); @@ -413,7 +420,8 @@ static void mwait_idle(void) { if (!need_resched()) { trace_power_start(POWER_CSTATE, 1, smp_processor_id()); - if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) + trace_cpu_idle(1, smp_processor_id()); + if (cpu_has(__this_cpu_ptr(&cpu_info), X86_FEATURE_CLFLUSH_MONITOR)) clflush((void *)¤t_thread_info()->flags); __monitor((void *)¤t_thread_info()->flags, 0, 0); @@ -422,6 +430,8 @@ static void mwait_idle(void) __sti_mwait(0, 0); else local_irq_enable(); + trace_power_end(smp_processor_id()); + trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); } else local_irq_enable(); } @@ -435,10 +445,12 @@ static void mwait_idle(void) static void poll_idle(void) { trace_power_start(POWER_CSTATE, 0, smp_processor_id()); + trace_cpu_idle(0, smp_processor_id()); local_irq_enable(); while (!need_resched()) cpu_relax(); - trace_power_end(0); + trace_power_end(smp_processor_id()); + trace_cpu_idle(PWR_EVENT_EXIT, smp_processor_id()); } #ifndef CONFIG_XEN @@ -454,17 +466,16 @@ static void poll_idle(void) * * idle=mwait overrides this decision and forces the usage of mwait. */ -static int __cpuinitdata force_mwait; #define MWAIT_INFO 0x05 #define MWAIT_ECX_EXTENDED_INFO 0x01 #define MWAIT_EDX_C1 0xf0 -static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c) +int mwait_usable(const struct cpuinfo_x86 *c) { u32 eax, ebx, ecx, edx; - if (force_mwait) + if (boot_option_idle_override == IDLE_FORCE_MWAIT) return 1; if (c->cpuid_level < MWAIT_INFO) @@ -589,10 +600,11 @@ static int __init idle_setup(char *str) if (!strcmp(str, "poll")) { printk("using polling idle threads.\n"); pm_idle = poll_idle; + boot_option_idle_override = IDLE_POLL; #ifndef CONFIG_XEN - } else if (!strcmp(str, "mwait")) - force_mwait = 1; - else if (!strcmp(str, "halt")) { + } else if (!strcmp(str, "mwait")) { + boot_option_idle_override = IDLE_FORCE_MWAIT; + } else if (!strcmp(str, "halt")) { /* * When the boot option of idle=halt is added, halt is * forced to be used for CPU idle. In such case CPU C2/C3 @@ -601,8 +613,7 @@ static int __init idle_setup(char *str) * the boot_option_idle_override. */ pm_idle = default_idle; - idle_halt = 1; - return 0; + boot_option_idle_override = IDLE_HALT; } else if (!strcmp(str, "nomwait")) { /* * If the boot option of "idle=nomwait" is added, @@ -610,13 +621,11 @@ static int __init idle_setup(char *str) * states. In such case it won't touch the variable * of boot_option_idle_override. */ - idle_nomwait = 1; - return 0; + boot_option_idle_override = IDLE_NOMWAIT; #endif } else return -1; - boot_option_idle_override = 1; return 0; } early_param("idle", idle_setup); --- head-2011-03-17.orig/arch/x86/kernel/process_32-xen.c 2011-02-02 08:47:59.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/process_32-xen.c 2011-02-02 08:48:21.000000000 +0100 @@ -59,8 +59,6 @@ #include #include -#include - asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); asmlinkage void cstar_ret_from_fork(void) __asm__("cstar_ret_from_fork"); @@ -116,8 +114,6 @@ void cpu_idle(void) stop_critical_timings(); xen_idle(); start_critical_timings(); - - trace_power_end(smp_processor_id()); } tick_nohz_restart_sched_tick(); preempt_enable_no_resched(); --- head-2011-03-17.orig/arch/x86/kernel/process_64-xen.c 2011-02-02 08:47:56.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/process_64-xen.c 2011-02-02 08:48:24.000000000 +0100 @@ -56,8 +56,6 @@ #include #include -#include - asmlinkage extern void ret_from_fork(void); static DEFINE_PER_CPU(unsigned char, is_idle); @@ -145,8 +143,6 @@ void cpu_idle(void) xen_idle(); start_critical_timings(); - trace_power_end(smp_processor_id()); - /* In many cases the interrupt that ended idle has already called exit_idle. But some idle loops can be woken up without interrupt. */ --- head-2011-03-17.orig/arch/x86/kernel/setup-xen.c 2011-03-03 16:25:11.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/setup-xen.c 2011-03-03 16:25:29.000000000 +0100 @@ -777,7 +777,7 @@ static u64 __init get_max_mapped(void) void __init setup_arch(char **cmdline_p) { int acpi = 0; - int k8 = 0; + int amd = 0; unsigned long flags; #ifdef CONFIG_XEN unsigned int i; @@ -1125,12 +1125,12 @@ void __init setup_arch(char **cmdline_p) acpi = acpi_numa_init(); #endif -#ifdef CONFIG_K8_NUMA +#ifdef CONFIG_AMD_NUMA if (!acpi) - k8 = !k8_numa_init(0, max_pfn); + amd = !amd_numa_init(0, max_pfn); #endif - initmem_init(0, max_pfn, acpi, k8); + initmem_init(0, max_pfn, acpi, amd); memblock_find_dma_reserve(); dma32_reserve_bootmem(); @@ -1259,10 +1259,7 @@ void __init setup_arch(char **cmdline_p) #ifndef CONFIG_XEN init_apic_mappings(); - ioapic_init_mappings(); - - /* need to wait for io_apic is mapped */ - probe_nr_irqs_gsi(); + ioapic_and_gsi_init(); kvm_guest_init(); --- head-2011-03-17.orig/arch/x86/kernel/traps-xen.c 2011-02-01 15:09:47.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/traps-xen.c 2011-02-01 16:25:32.000000000 +0100 @@ -87,6 +87,13 @@ EXPORT_SYMBOL_GPL(used_vectors); static int ignore_nmis; +int unknown_nmi_panic; +/* + * Prevent NMI reason port (0x61) being accessed simultaneously, can + * only be used in NMI handler. + */ +static DEFINE_RAW_SPINLOCK(nmi_reason_lock); + static inline void conditional_sti(struct pt_regs *regs) { if (regs->flags & X86_EFLAGS_IF) @@ -304,16 +311,23 @@ gp_in_kernel: die("general protection fault", regs, error_code); } -static notrace __kprobes void -mem_parity_error(unsigned char reason, struct pt_regs *regs) +static int __init setup_unknown_nmi_panic(char *str) { - printk(KERN_EMERG - "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", - reason, smp_processor_id()); + unknown_nmi_panic = 1; + return 1; +} +__setup("unknown_nmi_panic", setup_unknown_nmi_panic); - printk(KERN_EMERG - "You have some hardware problem, likely on the PCI bus.\n"); +static notrace __kprobes void +pci_serr_error(unsigned char reason, struct pt_regs *regs) +{ + pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n", + reason, smp_processor_id()); + /* + * On some machines, PCI SERR line is used to report memory + * errors. EDAC makes use of it. + */ #if defined(CONFIG_EDAC) if (edac_handler_set()) { edac_atomic_assert_error(); @@ -324,16 +338,18 @@ mem_parity_error(unsigned char reason, s if (panic_on_unrecovered_nmi) panic("NMI: Not continuing"); - printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); + pr_emerg("Dazed and confused, but trying to continue\n"); - /* Clear and disable the memory parity error line. */ - clear_mem_error(reason); + /* Clear and disable the PCI SERR error line. */ + clear_serr_error(reason); } static notrace __kprobes void io_check_error(unsigned char reason, struct pt_regs *regs) { - printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n"); + pr_emerg( + "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n", + reason, smp_processor_id()); show_registers(regs); if (panic_on_io_nmi) @@ -359,69 +375,50 @@ unknown_nmi_error(unsigned char reason, return; } #endif - printk(KERN_EMERG - "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", - reason, smp_processor_id()); + pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", + reason, smp_processor_id()); - printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n"); - if (panic_on_unrecovered_nmi) + pr_emerg("Do you have a strange power saving mode enabled?\n"); + if (unknown_nmi_panic || panic_on_unrecovered_nmi) panic("NMI: Not continuing"); - printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); + pr_emerg("Dazed and confused, but trying to continue\n"); } static notrace __kprobes void default_do_nmi(struct pt_regs *regs) { unsigned char reason = 0; - int cpu; - - cpu = smp_processor_id(); - - /* Only the BSP gets external NMIs from the system. */ - if (!cpu) - reason = get_nmi_reason(); - - if (!(reason & 0xc0)) { - if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) - == NOTIFY_STOP) - return; - -#ifdef CONFIG_X86_LOCAL_APIC - if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) - == NOTIFY_STOP) - return; -#ifndef CONFIG_LOCKUP_DETECTOR + /* + * CPU-specific NMI must be processed before non-CPU-specific + * NMI, otherwise we may lose it, because the CPU-specific + * NMI can not be detected/processed on other CPUs. + */ + if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP) + return; + + /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */ + raw_spin_lock(&nmi_reason_lock); + reason = get_nmi_reason(); + + if (reason & NMI_REASON_MASK) { + if (reason & NMI_REASON_SERR) + pci_serr_error(reason, regs); + else if (reason & NMI_REASON_IOCHK) + io_check_error(reason, regs); +#ifdef CONFIG_X86_32 /* - * Ok, so this is none of the documented NMI sources, - * so it must be the NMI watchdog. + * Reassert NMI in case it became active + * meanwhile as it's edge-triggered: */ - if (nmi_watchdog_tick(regs, reason)) - return; - if (!do_nmi_callback(regs, cpu)) -#endif /* !CONFIG_LOCKUP_DETECTOR */ - unknown_nmi_error(reason, regs); -#else - unknown_nmi_error(reason, regs); + reassert_nmi(); #endif - + raw_spin_unlock(&nmi_reason_lock); return; } - if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) - return; + raw_spin_unlock(&nmi_reason_lock); - /* AK: following checks seem to be broken on modern chipsets. FIXME */ - if (reason & 0x80) - mem_parity_error(reason, regs); - if (reason & 0x40) - io_check_error(reason, regs); -#ifdef CONFIG_X86_32 - /* - * Reassert NMI in case it became active meanwhile - * as it's edge-triggered: - */ - reassert_nmi(); -#endif + unknown_nmi_error(reason, regs); } dotraplinkage notrace __kprobes void @@ -439,14 +436,12 @@ do_nmi(struct pt_regs *regs, long error_ void stop_nmi(void) { - acpi_nmi_disable(); ignore_nmis++; } void restart_nmi(void) { ignore_nmis--; - acpi_nmi_enable(); } /* May run on IST stack. */ --- head-2011-03-17.orig/arch/x86/mm/fault-xen.c 2011-02-01 15:09:47.000000000 +0100 +++ head-2011-03-17/arch/x86/mm/fault-xen.c 2011-03-17 14:22:21.000000000 +0100 @@ -237,15 +237,14 @@ void vmalloc_sync_all(void) for (address = VMALLOC_START & PMD_MASK; address >= TASK_SIZE && address < FIXADDR_TOP; address += PMD_SIZE) { - - unsigned long flags; struct page *page; - spin_lock_irqsave(&pgd_lock, flags); + spin_lock(&pgd_lock); list_for_each_entry(page, &pgd_list, lru) { spinlock_t *pgt_lock; pmd_t *ret; + /* the pgt_lock only for Xen */ pgt_lock = &pgd_page_get_mm(page)->page_table_lock; spin_lock(pgt_lock); @@ -255,7 +254,7 @@ void vmalloc_sync_all(void) if (!ret) break; } - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock(&pgd_lock); } } @@ -836,6 +835,13 @@ mm_fault_error(struct pt_regs *regs, uns unsigned long address, unsigned int fault) { if (fault & VM_FAULT_OOM) { + /* Kernel mode? Handle exceptions or die: */ + if (!(error_code & PF_USER)) { + up_read(¤t->mm->mmap_sem); + no_context(regs, error_code, address); + return; + } + out_of_memory(regs, error_code, address); } else { if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON| --- head-2011-03-17.orig/arch/x86/mm/init-xen.c 2011-02-01 15:09:47.000000000 +0100 +++ head-2011-03-17/arch/x86/mm/init-xen.c 2011-02-01 15:41:35.000000000 +0100 @@ -404,8 +404,9 @@ void free_init_pages(char *what, unsigne /* * We just marked the kernel text read only above, now that * we are going to free part of that, we need to make that - * writeable first. + * writeable and non-executable first. */ + set_memory_nx(begin, (end - begin) >> PAGE_SHIFT); set_memory_rw(begin, (end - begin) >> PAGE_SHIFT); printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); --- head-2011-03-17.orig/arch/x86/mm/init_32-xen.c 2011-02-01 15:09:47.000000000 +0100 +++ head-2011-03-17/arch/x86/mm/init_32-xen.c 2011-02-01 15:41:35.000000000 +0100 @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -242,7 +243,7 @@ page_table_range_init(unsigned long star static inline int is_kernel_text(unsigned long addr) { - if (addr >= PAGE_OFFSET && addr <= (unsigned long)__init_end) + if (addr >= (unsigned long)_text && addr <= (unsigned long)__init_end) return 1; return 0; } @@ -775,6 +776,7 @@ void __init paging_init(void) /* * NOTE: at this point the bootmem allocator is fully available. */ + olpc_dt_build_devicetree(); sparse_init(); zone_sizes_init(); } @@ -980,6 +982,23 @@ void set_kernel_text_ro(void) set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); } +static void mark_nxdata_nx(void) +{ + /* + * When this called, init has already been executed and released, + * so everything past _etext sould be NX. + */ + unsigned long start = PFN_ALIGN(_etext); + /* + * This comes from is_kernel_text upper limit. Also HPAGE where used: + */ + unsigned long size = (((unsigned long)__init_end + HPAGE_SIZE) & HPAGE_MASK) - start; + + if (__supported_pte_mask & _PAGE_NX) + printk(KERN_INFO "NX-protecting the kernel data: %luk\n", size >> 10); + set_pages_nx(virt_to_page(start), size >> PAGE_SHIFT); +} + void mark_rodata_ro(void) { unsigned long start = PFN_ALIGN(_text); @@ -1014,6 +1033,7 @@ void mark_rodata_ro(void) printk(KERN_INFO "Testing CPA: write protecting again\n"); set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); #endif + mark_nxdata_nx(); } #endif --- head-2011-03-17.orig/arch/x86/mm/init_64-xen.c 2011-02-01 15:09:47.000000000 +0100 +++ head-2011-03-17/arch/x86/mm/init_64-xen.c 2011-03-17 14:22:21.000000000 +0100 @@ -173,18 +173,18 @@ void sync_global_pgds(unsigned long star for (address = start; address <= end; address += PGDIR_SIZE) { const pgd_t *pgd_ref = pgd_offset_k(address); - unsigned long flags; struct page *page; if (pgd_none(*pgd_ref)) continue; - spin_lock_irqsave(&pgd_lock, flags); + spin_lock(&pgd_lock); list_for_each_entry(page, &pgd_list, lru) { pgd_t *pgd; spinlock_t *pgt_lock; pgd = (pgd_t *)page_address(page) + pgd_index(address); + /* the pgt_lock only for Xen */ pgt_lock = &pgd_page_get_mm(page)->page_table_lock; spin_lock(pgt_lock); @@ -196,7 +196,7 @@ void sync_global_pgds(unsigned long star spin_unlock(pgt_lock); } - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock(&pgd_lock); } } --- head-2011-03-17.orig/arch/x86/mm/ioremap-xen.c 2011-02-07 15:42:09.000000000 +0100 +++ head-2011-03-17/arch/x86/mm/ioremap-xen.c 2011-02-07 15:42:18.000000000 +0100 @@ -163,6 +163,16 @@ int create_lookup_pte_addr(struct mm_str EXPORT_SYMBOL(create_lookup_pte_addr); +#ifdef CONFIG_MODULES +/* + * Force the implementation of ioremap_page_range() to be pulled in from + * lib/lib.a even if there is no other reference from the core kernel to it + * (native uses it in __ioremap_caller()), so that it gets exported. + */ +static void *const __section(.discard.ioremap) __used +_ioremap_page_range = ioremap_page_range; +#endif + /* * Fix up the linear direct mapping of the kernel to avoid cache attribute * conflicts. --- head-2011-03-17.orig/arch/x86/mm/pageattr-xen.c 2011-02-01 15:03:10.000000000 +0100 +++ head-2011-03-17/arch/x86/mm/pageattr-xen.c 2011-03-17 14:22:21.000000000 +0100 @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -56,12 +57,10 @@ static unsigned long direct_pages_count[ void update_page_count(int level, unsigned long pages) { - unsigned long flags; - /* Protect against CPA */ - spin_lock_irqsave(&pgd_lock, flags); + spin_lock(&pgd_lock); direct_pages_count[level] += pages; - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock(&pgd_lock); } static void split_page_count(int level) @@ -256,12 +255,12 @@ static inline pgprot_t static_protection { pgprot_t forbidden = __pgprot(0); -#ifndef CONFIG_XEN /* * The BIOS area between 640k and 1Mb needs to be executable for * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support. */ - if (within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT)) +#ifdef CONFIG_PCI_BIOS + if (pcibios_enabled && within(pfn, BIOS_BEGIN >> PAGE_SHIFT, BIOS_END >> PAGE_SHIFT)) pgprot_val(forbidden) |= _PAGE_NX; #endif @@ -405,16 +404,16 @@ static int try_preserve_large_page(pte_t *kpte, unsigned long address, struct cpa_data *cpa) { - unsigned long nextpage_addr, numpages, pmask, psize, flags, addr, pfn; + unsigned long nextpage_addr, numpages, pmask, psize, addr, pfn; pte_t new_pte, old_pte, *tmp; - pgprot_t old_prot, new_prot; + pgprot_t old_prot, new_prot, req_prot; int i, do_split = 1; unsigned int level; if (cpa->force_split) return 1; - spin_lock_irqsave(&pgd_lock, flags); + spin_lock(&pgd_lock); /* * Check for races, another CPU might have split this page * up already: @@ -452,10 +451,10 @@ try_preserve_large_page(pte_t *kpte, uns * We are safe now. Check whether the new pgprot is the same: */ old_pte = *kpte; - old_prot = new_prot = pte_pgprot(old_pte); + old_prot = new_prot = req_prot = pte_pgprot(old_pte); - pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr); - pgprot_val(new_prot) |= pgprot_val(cpa->mask_set); + pgprot_val(req_prot) &= ~pgprot_val(cpa->mask_clr); + pgprot_val(req_prot) |= pgprot_val(cpa->mask_set); /* * old_pte points to the large page base address. So we need @@ -464,22 +463,21 @@ try_preserve_large_page(pte_t *kpte, uns pfn = pte_pfn(old_pte) + ((address & (psize - 1)) >> PAGE_SHIFT); cpa->pfn = pfn; - new_prot = static_protections(new_prot, address, pfn); + new_prot = static_protections(req_prot, address, pfn); /* * We need to check the full range, whether * static_protection() requires a different pgprot for one of * the pages in the range we try to preserve: */ - if (pfn < max_mapnr) { - addr = address + PAGE_SIZE; - for (i = 1; i < cpa->numpages && ++pfn < max_mapnr; - i++, addr += PAGE_SIZE) { - pgprot_t chk_prot = static_protections(new_prot, addr, pfn); + addr = address & pmask; + pfn = pte_pfn(old_pte); + for (i = 0; i < (psize >> PAGE_SHIFT) && pfn < max_mapnr; + i++, addr += PAGE_SIZE, pfn++) { + pgprot_t chk_prot = static_protections(req_prot, addr, pfn); - if (pgprot_val(chk_prot) != pgprot_val(new_prot)) - goto out_unlock; - } + if (pgprot_val(chk_prot) != pgprot_val(new_prot)) + goto out_unlock; } /* @@ -499,7 +497,7 @@ try_preserve_large_page(pte_t *kpte, uns * that we limited the number of possible pages already to * the number of pages in the large page. */ - if (address == (nextpage_addr - psize) && cpa->numpages == numpages) { + if (address == (address & pmask) && cpa->numpages == (psize >> PAGE_SHIFT)) { /* * The address is aligned and the number of pages * covers the full page. @@ -511,14 +509,14 @@ try_preserve_large_page(pte_t *kpte, uns } out_unlock: - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock(&pgd_lock); return do_split; } static int split_large_page(pte_t *kpte, unsigned long address) { - unsigned long flags, mfn, mfninc = 1; + unsigned long mfn, mfninc = 1; unsigned int i, level; pte_t *pbase, *tmp; pgprot_t ref_prot; @@ -532,7 +530,7 @@ static int split_large_page(pte_t *kpte, if (!base) return -ENOMEM; - spin_lock_irqsave(&pgd_lock, flags); + spin_lock(&pgd_lock); /* * Check for races, another CPU might have split this page * up for us already: @@ -608,7 +606,7 @@ out_unlock: */ if (base) __free_page(base); - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock(&pgd_lock); return 0; } --- head-2011-03-17.orig/arch/x86/mm/pgtable-xen.c 2011-02-01 15:09:47.000000000 +0100 +++ head-2011-03-17/arch/x86/mm/pgtable-xen.c 2011-03-17 14:26:03.000000000 +0100 @@ -358,23 +358,23 @@ void mm_unpin(struct mm_struct *mm) void mm_pin_all(void) { struct page *page; - unsigned long flags; if (xen_feature(XENFEAT_writable_page_tables)) return; /* * Allow uninterrupted access to the pgd_list. Also protects - * __pgd_pin() by disabling preemption. + * __pgd_pin() by ensuring preemption is disabled. * All other CPUs must be at a safe point (e.g., in stop_machine * or offlined entirely). */ - spin_lock_irqsave(&pgd_lock, flags); + BUG_ON(!irqs_disabled()); + spin_lock(&pgd_lock); list_for_each_entry(page, &pgd_list, lru) { if (!PagePinned(page)) __pgd_pin((pgd_t *)page_address(page)); } - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock(&pgd_lock); } void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) @@ -470,12 +470,10 @@ static void pgd_ctor(struct mm_struct *m static void pgd_dtor(pgd_t *pgd) { - unsigned long flags; /* can be called from interrupt context */ - if (!SHARED_KERNEL_PMD) { - spin_lock_irqsave(&pgd_lock, flags); + spin_lock(&pgd_lock); pgd_list_del(pgd); - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock(&pgd_lock); } pgd_test_and_unpin(pgd); @@ -641,7 +639,6 @@ pgd_t *pgd_alloc(struct mm_struct *mm) { pgd_t *pgd; pmd_t *pmds[PREALLOCATED_PMDS]; - unsigned long flags; pgd = (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ORDER); @@ -661,13 +658,13 @@ pgd_t *pgd_alloc(struct mm_struct *mm) * respect to anything walking the pgd_list, so that they * never see a partially populated pgd. */ - spin_lock_irqsave(&pgd_lock, flags); + spin_lock(&pgd_lock); #ifdef CONFIG_X86_PAE /* Protect against save/restore: move below 4GB under pgd_lock. */ if (!xen_feature(XENFEAT_pae_pgdir_above_4gb) && xen_create_contiguous_region((unsigned long)pgd, 0, 32)) { - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock(&pgd_lock); goto out_free_pmds; } #endif @@ -675,7 +672,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm) pgd_ctor(mm, pgd); pgd_prepopulate_pmd(mm, pgd, pmds); - spin_unlock_irqrestore(&pgd_lock, flags); + spin_unlock(&pgd_lock); return pgd; @@ -735,6 +732,25 @@ int ptep_set_access_flags(struct vm_area return changed; } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +int pmdp_set_access_flags(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp, + pmd_t entry, int dirty) +{ + int changed = !pmd_same(*pmdp, entry); + + VM_BUG_ON(address & ~HPAGE_PMD_MASK); + + if (changed && dirty) { + *pmdp = entry; + pmd_update_defer(vma->vm_mm, address, pmdp); + flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); + } + + return changed; +} +#endif + int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { @@ -750,6 +766,23 @@ int ptep_test_and_clear_young(struct vm_ return ret; } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +int pmdp_test_and_clear_young(struct vm_area_struct *vma, + unsigned long addr, pmd_t *pmdp) +{ + int ret = 0; + + if (pmd_young(*pmdp)) + ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, + (unsigned long *)pmdp); + + if (ret) + pmd_update(vma->vm_mm, addr, pmdp); + + return ret; +} +#endif + int ptep_clear_flush_young(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { @@ -765,6 +798,36 @@ int ptep_clear_flush_young(struct vm_are return young; } +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +int pmdp_clear_flush_young(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp) +{ + int young; + + VM_BUG_ON(address & ~HPAGE_PMD_MASK); + + young = pmdp_test_and_clear_young(vma, address, pmdp); + if (young) + flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); + + return young; +} + +void pmdp_splitting_flush(struct vm_area_struct *vma, + unsigned long address, pmd_t *pmdp) +{ + int set; + VM_BUG_ON(address & ~HPAGE_PMD_MASK); + set = !test_and_set_bit(_PAGE_BIT_SPLITTING, + (unsigned long *)pmdp); + if (set) { + pmd_update(vma->vm_mm, address, pmdp); + /* need tlb flush only to serialize against gup-fast */ + flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE); + } +} +#endif + /** * reserve_top_address - reserves a hole in the top of kernel address space * @reserve - size of hole to reserve --- head-2011-03-17.orig/arch/x86/pci/irq-xen.c 2011-02-01 15:09:47.000000000 +0100 +++ head-2011-03-17/arch/x86/pci/irq-xen.c 2011-02-01 15:41:35.000000000 +0100 @@ -595,7 +595,8 @@ static __init int intel_router_probe(str case PCI_DEVICE_ID_INTEL_ICH10_1: case PCI_DEVICE_ID_INTEL_ICH10_2: case PCI_DEVICE_ID_INTEL_ICH10_3: - case PCI_DEVICE_ID_INTEL_PATSBURG_LPC: + case PCI_DEVICE_ID_INTEL_PATSBURG_LPC_0: + case PCI_DEVICE_ID_INTEL_PATSBURG_LPC_1: r->name = "PIIX/ICH"; r->get = pirq_piix_get; r->set = pirq_piix_set; --- head-2011-03-17.orig/drivers/hwmon/coretemp-xen.c 2011-02-01 15:04:27.000000000 +0100 +++ head-2011-03-17/drivers/hwmon/coretemp-xen.c 2011-02-01 16:38:02.000000000 +0100 @@ -20,6 +20,8 @@ * 02110-1301 USA. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -454,8 +456,8 @@ static int coretemp_device_add(unsigned * without thermal sensors will be filtered out. */ if (!(info.cpuid_6_eax & 0x1)) { - printk(KERN_INFO DRVNAME ": CPU (model=0x%x)" - " has no thermal sensor.\n", info.pdev_entry->x86_model); + pr_info("CPU (model=0x%x) has no thermal sensor\n", + info.pdev_entry->x86_model); goto exit_entry_free; } @@ -478,7 +480,7 @@ static int coretemp_device_add(unsigned pdev = platform_device_alloc(DRVNAME, cpu); if (!pdev) { err = -ENOMEM; - printk(KERN_ERR DRVNAME ": Device allocation failed\n"); + pr_err("Device allocation failed\n"); goto exit; } @@ -488,8 +490,7 @@ static int coretemp_device_add(unsigned err = platform_device_add(pdev); if (err) { - printk(KERN_ERR DRVNAME ": Device addition failed (%d)\n", - err); + pr_err("Device addition failed (%d)\n", err); goto exit_device_put; } --- head-2011-03-17.orig/drivers/hwmon/pkgtemp-xen.c 2011-02-01 15:04:27.000000000 +0100 +++ head-2011-03-17/drivers/hwmon/pkgtemp-xen.c 2011-02-01 16:38:31.000000000 +0100 @@ -20,6 +20,8 @@ * 02110-1301 USA. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -310,7 +312,7 @@ static int pkgtemp_device_add(unsigned i pdev = platform_device_alloc(DRVNAME, cpu); if (!pdev) { err = -ENOMEM; - printk(KERN_ERR DRVNAME ": Device allocation failed\n"); + pr_err("Device allocation failed\n"); goto exit; } @@ -319,8 +321,7 @@ static int pkgtemp_device_add(unsigned i err = platform_device_add(pdev); if (err) { - printk(KERN_ERR DRVNAME ": Device addition failed (%d)\n", - err); + pr_err("Device addition failed (%d)\n", err); goto exit_device_put; } --- head-2011-03-17.orig/drivers/hwmon/via-cputemp-xen.c 2011-02-01 15:04:27.000000000 +0100 +++ head-2011-03-17/drivers/hwmon/via-cputemp-xen.c 2011-02-01 16:40:53.000000000 +0100 @@ -21,6 +21,8 @@ * 02110-1301 USA. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -224,15 +226,14 @@ static int via_cputemp_device_add(unsign goto exit_entry_free; if (pdev_entry->x86_model > 0x0f) { - printk(KERN_WARNING DRVNAME ": Unknown CPU " - "model 0x%x\n", pdev_entry->x86_model); + pr_warn("Unknown CPU model 0x%x\n", pdev_entry->x86_model); goto exit_entry_free; } pdev = platform_device_alloc(DRVNAME, cpu); if (!pdev) { err = -ENOMEM; - printk(KERN_ERR DRVNAME ": Device allocation failed\n"); + pr_err("Device allocation failed\n"); goto exit_entry_free; } @@ -241,8 +242,7 @@ static int via_cputemp_device_add(unsign err = platform_device_add(pdev); if (err) { - printk(KERN_ERR DRVNAME ": Device addition failed (%d)\n", - err); + pr_err("Device addition failed (%d)\n", err); goto exit_device_put; } @@ -268,8 +268,9 @@ static void via_cputemp_device_remove(un if (p->pdev->id == cpu) { platform_device_unregister(p->pdev); list_del(&p->list); + mutex_unlock(&pdev_list_mutex); kfree(p); - break; + return; } } mutex_unlock(&pdev_list_mutex); --- head-2011-03-17.orig/drivers/scsi/arcmsr/arcmsr.h 2011-03-17 14:35:43.000000000 +0100 +++ head-2011-03-17/drivers/scsi/arcmsr/arcmsr.h 2011-02-17 10:23:02.000000000 +0100 @@ -46,7 +46,7 @@ struct device_attribute; /*The limit of outstanding scsi command that firmware can handle*/ #define ARCMSR_MAX_OUTSTANDING_CMD 256 -#ifdef CONFIG_XEN +#if defined(CONFIG_XEN) || defined(CONFIG_PARAVIRT_XEN) #define ARCMSR_MAX_FREECCB_NUM 160 #else #define ARCMSR_MAX_FREECCB_NUM 320 --- head-2011-03-17.orig/drivers/xen/Kconfig 2011-02-01 15:09:47.000000000 +0100 +++ head-2011-03-17/drivers/xen/Kconfig 2011-02-02 17:03:22.000000000 +0100 @@ -380,7 +380,7 @@ config XEN_DEV_EVTCHN firing. If in doubt, say yes. -config XEN_BACKEND +config PARAVIRT_XEN_BACKEND bool "Backend driver support" depends on XEN_DOM0 default y @@ -427,7 +427,7 @@ config XEN_XENBUS_FRONTEND config XEN_GNTDEV tristate "userspace grant access device driver" - depends on XEN + depends on PARAVIRT_XEN select MMU_NOTIFIER help Allows userspace processes to use grants. --- head-2011-03-17.orig/drivers/xen/Makefile 2011-02-24 15:17:40.000000000 +0100 +++ head-2011-03-17/drivers/xen/Makefile 2011-02-07 14:39:12.000000000 +0100 @@ -24,13 +24,17 @@ obj-$(CONFIG_HOTPLUG_CPU) += $(xen-hotp obj-$(CONFIG_XEN_XENCOMM) += xencomm.o obj-$(CONFIG_XEN_BALLOON) += $(xen-balloon-y) obj-$(CONFIG_XEN_DEV_EVTCHN) += $(xen-evtchn-name-y).o +obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o obj-$(CONFIG_XENFS) += xenfs/ obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o -obj-$(CONFIG_XEN_PLATFORM_PCI) += platform-pci.o +obj-$(CONFIG_XEN_PLATFORM_PCI) += xen-platform-pci.o obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o obj-$(CONFIG_XEN_DOM0) += pci.o xen-evtchn-y := evtchn.o +xen-gntdev-y := gntdev.o + +xen-platform-pci-y := platform-pci.o obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/ obj-$(CONFIG_XEN_BLKDEV_TAP) += blktap/ --- head-2011-03-17.orig/drivers/xen/blkback/vbd.c 2011-02-01 14:50:44.000000000 +0100 +++ head-2011-03-17/drivers/xen/blkback/vbd.c 2011-02-07 14:04:20.000000000 +0100 @@ -63,8 +63,10 @@ int vbd_create(blkif_t *blkif, blkif_vde vbd->pdevice = MKDEV(major, minor); - bdev = open_by_devnum(vbd->pdevice, - vbd->readonly ? FMODE_READ : FMODE_WRITE); + bdev = blkdev_get_by_dev(vbd->pdevice, + FMODE_READ | (vbd->readonly ? 0 + : FMODE_WRITE | FMODE_EXCL), + blkif); if (IS_ERR(bdev)) { DPRINTK("vbd_creat: device %08x could not be opened.\n", @@ -96,7 +98,8 @@ void vbd_free(struct vbd *vbd) { if (vbd->bdev) blkdev_put(vbd->bdev, - vbd->readonly ? FMODE_READ : FMODE_WRITE); + FMODE_READ | (vbd->readonly ? 0 + : FMODE_WRITE | FMODE_EXCL)); vbd->bdev = NULL; } --- head-2011-03-17.orig/drivers/xen/blkfront/blkfront.c 2011-02-01 15:09:47.000000000 +0100 +++ head-2011-03-17/drivers/xen/blkfront/blkfront.c 2011-02-03 12:37:02.000000000 +0100 @@ -437,7 +437,7 @@ static void blkfront_closing(struct blkf spin_unlock_irqrestore(&blkif_io_lock, flags); /* Flush gnttab callback work. Must be done with no locks held. */ - flush_scheduled_work(); + flush_work_sync(&info->work); xlvbd_sysfs_delif(info); @@ -894,7 +894,7 @@ static void blkif_free(struct blkfront_i spin_unlock_irq(&blkif_io_lock); /* Flush gnttab callback work. Must be done with no locks held. */ - flush_scheduled_work(); + flush_work_sync(&info->work); /* Free resources associated with old device channel. */ if (info->ring_ref != GRANT_INVALID_REF) { --- head-2011-03-17.orig/drivers/xen/blktap2/device.c 2011-02-01 15:09:47.000000000 +0100 +++ head-2011-03-17/drivers/xen/blktap2/device.c 2011-02-07 14:13:37.000000000 +0100 @@ -754,7 +754,7 @@ blktap_device_close_bdev(struct blktap * dev = &tap->device; if (dev->bdev) - blkdev_put(dev->bdev, FMODE_WRITE); + blkdev_put(dev->bdev, FMODE_WRITE|FMODE_EXCL); dev->bdev = NULL; clear_bit(BLKTAP_PASSTHROUGH, &tap->dev_inuse); @@ -768,7 +768,7 @@ blktap_device_open_bdev(struct blktap *t dev = &tap->device; - bdev = open_by_devnum(pdev, FMODE_WRITE); + bdev = blkdev_get_by_dev(pdev, FMODE_WRITE|FMODE_EXCL, tap); if (IS_ERR(bdev)) { BTERR("opening device %x:%x failed: %ld\n", MAJOR(pdev), MINOR(pdev), PTR_ERR(bdev)); @@ -778,7 +778,7 @@ blktap_device_open_bdev(struct blktap *t if (!bdev->bd_disk) { BTERR("device %x:%x doesn't exist\n", MAJOR(pdev), MINOR(pdev)); - blkdev_put(bdev, FMODE_WRITE); + blkdev_put(bdev, FMODE_WRITE|FMODE_EXCL); return -ENOENT; } --- head-2011-03-17.orig/drivers/xen/core/evtchn.c 2011-02-03 11:12:32.000000000 +0100 +++ head-2011-03-17/drivers/xen/core/evtchn.c 2011-02-10 16:24:57.000000000 +0100 @@ -621,12 +621,16 @@ static void unbind_from_irq(unsigned int cfg->info = IRQ_UNBOUND; /* Zap stats across IRQ changes of use. */ - for_each_possible_cpu(cpu) + for_each_possible_cpu(cpu) { #ifdef CONFIG_GENERIC_HARDIRQS - irq_to_desc(irq)->kstat_irqs[cpu] = 0; + struct irq_desc *desc = irq_to_desc(irq); + + if (desc->kstat_irqs) + *per_cpu_ptr(desc->kstat_irqs, cpu) = 0; #else kstat_cpu(cpu).irqs[irq] = 0; #endif + } } spin_unlock(&irq_mapping_update_lock); --- head-2011-03-17.orig/drivers/xen/core/smpboot.c 2011-03-03 16:11:42.000000000 +0100 +++ head-2011-03-17/drivers/xen/core/smpboot.c 2011-03-03 16:13:04.000000000 +0100 @@ -188,7 +188,7 @@ static void __cpuinit xen_smp_intr_exit( static void __cpuinit cpu_bringup(void) { cpu_init(); - identify_secondary_cpu(¤t_cpu_data); + identify_secondary_cpu(__this_cpu_ptr(&cpu_info)); touch_softlockup_watchdog(); preempt_disable(); local_irq_enable(); --- head-2011-03-17.orig/drivers/xen/fbfront/xenfb.c 2011-02-01 15:03:03.000000000 +0100 +++ head-2011-03-17/drivers/xen/fbfront/xenfb.c 2011-02-08 10:37:50.000000000 +0100 @@ -555,12 +555,12 @@ xenfb_make_preferred_console(void) if (console_set_on_cmdline) return; - acquire_console_sem(); - for (c = console_drivers; c; c = c->next) { + console_lock(); + for_each_console(c) { if (!strcmp(c->name, "tty") && c->index == 0) break; } - release_console_sem(); + console_unlock(); if (c) { unregister_console(c); c->flags |= CON_CONSDEV; --- head-2011-03-17.orig/drivers/xen/netfront/netfront.c 2011-02-09 16:05:34.000000000 +0100 +++ head-2011-03-17/drivers/xen/netfront/netfront.c 2011-02-03 12:49:56.000000000 +0100 @@ -132,17 +132,18 @@ static inline int skb_gso_ok(struct sk_b return (features & NETIF_F_TSO); } -static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb) +#define netif_skb_features(skb) ((skb)->dev->features) +static inline int netif_needs_gso(struct sk_buff *skb, int features) { return skb_is_gso(skb) && - (!skb_gso_ok(skb, dev->features) || + (!skb_gso_ok(skb, features) || unlikely(skb->ip_summed != CHECKSUM_PARTIAL)); } #else #define HAVE_GSO 0 #define HAVE_TSO 0 #define HAVE_CSUM_OFFLOAD 0 -#define netif_needs_gso(dev, skb) 0 +#define netif_needs_gso(skb, feat) 0 #define dev_disable_gso_features(dev) ((void)0) #define ethtool_op_set_tso(dev, data) (-ENOSYS) #endif @@ -952,7 +953,7 @@ static int network_start_xmit(struct sk_ if (unlikely(!netfront_carrier_ok(np) || (frags > 1 && !xennet_can_sg(dev)) || - netif_needs_gso(dev, skb))) { + netif_needs_gso(skb, netif_skb_features(skb)))) { spin_unlock_irq(&np->tx_lock); goto drop; } --- head-2011-03-17.orig/drivers/xen/pcifront/xenbus.c 2011-02-01 14:50:44.000000000 +0100 +++ head-2011-03-17/drivers/xen/pcifront/xenbus.c 2011-02-03 12:39:42.000000000 +0100 @@ -61,7 +61,7 @@ static void free_pdev(struct pcifront_de pcifront_free_roots(pdev); /*For PCIE_AER error handling job*/ - flush_scheduled_work(); + flush_work_sync(&pdev->op_work); if (pdev->irq > 0) unbind_from_irqhandler(pdev->irq, pdev); --- head-2011-03-17.orig/drivers/xen/sfc_netback/accel_xenbus.c 2011-02-01 14:50:44.000000000 +0100 +++ head-2011-03-17/drivers/xen/sfc_netback/accel_xenbus.c 2011-02-03 12:38:43.000000000 +0100 @@ -701,7 +701,7 @@ fail_config_watch: * Flush the scheduled work queue before freeing bend to get * rid of any pending netback_accel_msg_rx_handler() */ - flush_scheduled_work(); + flush_work_sync(&bend->handle_msg); mutex_lock(&bend->bend_mutex); net_accel_update_state(dev, XenbusStateUnknown); @@ -781,7 +781,7 @@ int netback_accel_remove(struct xenbus_d * Flush the scheduled work queue before freeing bend to get * rid of any pending netback_accel_msg_rx_handler() */ - flush_scheduled_work(); + flush_work_sync(&bend->handle_msg); mutex_lock(&bend->bend_mutex); --- head-2011-03-17.orig/drivers/xen/xenbus/Makefile 2011-02-02 17:06:11.000000000 +0100 +++ head-2011-03-17/drivers/xen/xenbus/Makefile 2011-02-02 17:08:58.000000000 +0100 @@ -7,3 +7,6 @@ xenbus_be-objs += xenbus_backend_client. xenbus-$(CONFIG_XEN_BACKEND) += xenbus_probe_backend.o obj-y += $(xenbus-y) $(xenbus-m) obj-$(CONFIG_XEN_XENBUS_DEV) += xenbus_dev.o + +obj-$(CONFIG_PARAVIRT_XEN_BACKEND) += xenbus_probe_backend.o +obj-$(CONFIG_XEN_XENBUS_FRONTEND) += xenbus_probe_frontend.o --- head-2011-03-17.orig/drivers/xen/xenbus/xenbus_probe.c 2011-02-01 15:09:47.000000000 +0100 +++ head-2011-03-17/drivers/xen/xenbus/xenbus_probe.c 2011-02-07 12:54:16.000000000 +0100 @@ -65,8 +65,6 @@ #include #include #include - -#include #endif #ifndef CONFIG_XEN @@ -96,15 +94,12 @@ extern struct mutex xenwatch_mutex; static BLOCKING_NOTIFIER_HEAD(xenstore_chain); +#if defined(CONFIG_XEN) || defined(MODULE) static void wait_for_devices(struct xenbus_driver *xendrv); -static int xenbus_probe_frontend(const char *type, const char *name); - -static void xenbus_dev_shutdown(struct device *_dev); - -#if !defined(CONFIG_XEN) && !defined(MODULE) -static int xenbus_dev_suspend(struct device *dev, pm_message_t state); -static int xenbus_dev_resume(struct device *dev); +#define PARAVIRT_EXPORT_SYMBOL(sym) __typeof__(sym) sym; +#else +#define PARAVIRT_EXPORT_SYMBOL EXPORT_SYMBOL_GPL #endif /* If something in array of ids matches this device, return it. */ @@ -127,24 +122,7 @@ int xenbus_match(struct device *_dev, st return match_device(drv->ids, to_xenbus_device(_dev)) != NULL; } - -/* device// => - */ -static int frontend_bus_id(char bus_id[XEN_BUS_ID_SIZE], const char *nodename) -{ - nodename = strchr(nodename, '/'); - if (!nodename || strlen(nodename + 1) >= XEN_BUS_ID_SIZE) { - pr_warning("XENBUS: bad frontend %s\n", nodename); - return -EINVAL; - } - - strlcpy(bus_id, nodename + 1, XEN_BUS_ID_SIZE); - if (!strchr(bus_id, '/')) { - pr_warning("XENBUS: bus_id %s no slash\n", bus_id); - return -EINVAL; - } - *strchr(bus_id, '/') = '-'; - return 0; -} +PARAVIRT_EXPORT_SYMBOL(xenbus_match); static void free_otherend_details(struct xenbus_device *dev) @@ -164,7 +142,7 @@ static void free_otherend_watch(struct x } -int read_otherend_details(struct xenbus_device *xendev, +int xenbus_read_otherend_details(struct xenbus_device *xendev, char *id_node, char *path_node) { int err = xenbus_gather(XBT_NIL, xendev->nodename, @@ -189,74 +167,22 @@ int read_otherend_details(struct xenbus_ return 0; } +PARAVIRT_EXPORT_SYMBOL(xenbus_read_otherend_details); +#if defined(CONFIG_XEN) || defined(MODULE) static int read_backend_details(struct xenbus_device *xendev) { - return read_otherend_details(xendev, "backend-id", "backend"); -} - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) -static int xenbus_uevent_frontend(struct device *dev, struct kobj_uevent_env *env) -{ - struct xenbus_device *xdev; - - if (dev == NULL) - return -ENODEV; - xdev = to_xenbus_device(dev); - if (xdev == NULL) - return -ENODEV; - - /* stuff we want to pass to /sbin/hotplug */ -#if defined(CONFIG_XEN) || defined(MODULE) - add_uevent_var(env, "XENBUS_TYPE=%s", xdev->devicetype); - add_uevent_var(env, "XENBUS_PATH=%s", xdev->nodename); -#endif - add_uevent_var(env, "MODALIAS=xen:%s", xdev->devicetype); - - return 0; + return xenbus_read_otherend_details(xendev, "backend-id", "backend"); } -#endif - -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,29) -static struct device_attribute xenbus_dev_attrs[] = { - __ATTR_NULL -}; -#endif - -/* Bus type for frontend drivers. */ -static struct xen_bus_type xenbus_frontend = { - .root = "device", - .levels = 2, /* device/type/ */ - .get_bus_id = frontend_bus_id, - .probe = xenbus_probe_frontend, - .error = -ENODEV, - .bus = { - .name = "xen", - .match = xenbus_match, -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) - .probe = xenbus_dev_probe, - .remove = xenbus_dev_remove, - .shutdown = xenbus_dev_shutdown, - .uevent = xenbus_uevent_frontend, -#endif -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,29) - .dev_attrs = xenbus_dev_attrs, -#endif -#if !defined(CONFIG_XEN) && !defined(MODULE) - .suspend = xenbus_dev_suspend, - .resume = xenbus_dev_resume, -#endif - }, -#if defined(CONFIG_XEN) || defined(MODULE) - .dev = { - .init_name = "xen", - }, -#endif -}; static void otherend_changed(struct xenbus_watch *watch, const char **vec, unsigned int len) +#else /* !CONFIG_XEN && !MODULE */ +void xenbus_otherend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len, + int ignore_on_shutdown) +#endif /* CONFIG_XEN || MODULE */ { struct xenbus_device *dev = container_of(watch, struct xenbus_device, otherend_watch); @@ -284,11 +210,15 @@ static void otherend_changed(struct xenb * work that can fail e.g., when the rootfs is gone. */ if (system_state > SYSTEM_RUNNING) { - struct xen_bus_type *bus = bus; - bus = container_of(dev->dev.bus, struct xen_bus_type, bus); /* If we're frontend, drive the state machine to Closed. */ /* This should cause the backend to release our resources. */ - if ((bus == &xenbus_frontend) && (state == XenbusStateClosing)) +# if defined(CONFIG_XEN) || defined(MODULE) + const struct xen_bus_type *bus = + container_of(dev->dev.bus, struct xen_bus_type, bus); + int ignore_on_shutdown = (bus->levels == 2); +# endif + + if (ignore_on_shutdown && (state == XenbusStateClosing)) xenbus_frontend_closed(dev); return; } @@ -297,6 +227,7 @@ static void otherend_changed(struct xenb if (drv->otherend_changed) drv->otherend_changed(dev, state); } +PARAVIRT_EXPORT_SYMBOL(xenbus_otherend_changed); static int talk_to_otherend(struct xenbus_device *dev) @@ -317,7 +248,11 @@ static int watch_otherend(struct xenbus_ return xenbus_watch_path2(dev, dev->otherend, "state", &dev->otherend_watch, otherend_changed); #else - return xenbus_watch_pathfmt(dev, &dev->otherend_watch, otherend_changed, + struct xen_bus_type *bus = + container_of(dev->dev.bus, struct xen_bus_type, bus); + + return xenbus_watch_pathfmt(dev, &dev->otherend_watch, + bus->otherend_changed, "%s/%s", dev->otherend, "state"); #endif } @@ -367,8 +302,13 @@ int xenbus_dev_probe(struct device *_dev fail: xenbus_dev_error(dev, err, "xenbus_dev_probe on %s", dev->nodename); xenbus_switch_state(dev, XenbusStateClosed); +#if defined(CONFIG_XEN) || defined(MODULE) return -ENODEV; +#else + return err; +#endif } +PARAVIRT_EXPORT_SYMBOL(xenbus_dev_probe); int xenbus_dev_remove(struct device *_dev) { @@ -386,8 +326,9 @@ int xenbus_dev_remove(struct device *_de xenbus_switch_state(dev, XenbusStateClosed); return 0; } +PARAVIRT_EXPORT_SYMBOL(xenbus_dev_remove); -static void xenbus_dev_shutdown(struct device *_dev) +void xenbus_dev_shutdown(struct device *_dev) { struct xenbus_device *dev = to_xenbus_device(_dev); unsigned long timeout = 5*HZ; @@ -420,6 +361,7 @@ static void xenbus_dev_shutdown(struct d out: put_device(&dev->dev); } +PARAVIRT_EXPORT_SYMBOL(xenbus_dev_shutdown); int xenbus_register_driver_common(struct xenbus_driver *drv, struct xen_bus_type *bus, @@ -450,25 +392,7 @@ int xenbus_register_driver_common(struct mutex_unlock(&xenwatch_mutex); return ret; } - -int __xenbus_register_frontend(struct xenbus_driver *drv, - struct module *owner, const char *mod_name) -{ - int ret; - - drv->read_otherend_details = read_backend_details; - - ret = xenbus_register_driver_common(drv, &xenbus_frontend, - owner, mod_name); - if (ret) - return ret; - - /* If this driver is loaded as a module wait for devices to attach. */ - wait_for_devices(drv); - - return 0; -} -EXPORT_SYMBOL_GPL(__xenbus_register_frontend); +PARAVIRT_EXPORT_SYMBOL(xenbus_register_driver_common); void xenbus_unregister_driver(struct xenbus_driver *drv) { @@ -661,9 +585,31 @@ fail: kfree(xendev); return err; } +PARAVIRT_EXPORT_SYMBOL(xenbus_probe_node); + +#if defined(CONFIG_XEN) || defined(MODULE) + +/* device// => - */ +static int frontend_bus_id(char bus_id[XEN_BUS_ID_SIZE], const char *nodename) +{ + nodename = strchr(nodename, '/'); + if (!nodename || strlen(nodename + 1) >= XEN_BUS_ID_SIZE) { + pr_warning("XENBUS: bad frontend %s\n", nodename); + return -EINVAL; + } + + strlcpy(bus_id, nodename + 1, XEN_BUS_ID_SIZE); + if (!strchr(bus_id, '/')) { + pr_warning("XENBUS: bus_id %s no slash\n", bus_id); + return -EINVAL; + } + *strchr(bus_id, '/') = '-'; + return 0; +} /* device// */ -static int xenbus_probe_frontend(const char *type, const char *name) +static int xenbus_probe_frontend(struct xen_bus_type *bus, const char *type, + const char *name) { char *nodename; int err; @@ -671,18 +617,90 @@ static int xenbus_probe_frontend(const c if (!strcmp(type, "console")) return 0; - nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", - xenbus_frontend.root, type, name); + nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", bus->root, type, name); if (!nodename) return -ENOMEM; DPRINTK("%s", nodename); - err = xenbus_probe_node(&xenbus_frontend, type, nodename); + err = xenbus_probe_node(bus, type, nodename); kfree(nodename); return err; } +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) +static int xenbus_uevent_frontend(struct device *dev, struct kobj_uevent_env *env) +{ + struct xenbus_device *xdev; + + if (dev == NULL) + return -ENODEV; + xdev = to_xenbus_device(dev); + if (xdev == NULL) + return -ENODEV; + + /* stuff we want to pass to /sbin/hotplug */ + if (add_uevent_var(env, "XENBUS_TYPE=%s", xdev->devicetype) || + add_uevent_var(env, "XENBUS_PATH=%s", xdev->nodename) || + add_uevent_var(env, "MODALIAS=xen:%s", xdev->devicetype)) + return -ENOMEM; + + return 0; +} +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,29) +static struct device_attribute xenbus_dev_attrs[] = { + __ATTR_NULL +}; +#endif + +/* Bus type for frontend drivers. */ +static struct xen_bus_type xenbus_frontend = { + .root = "device", + .levels = 2, /* device/type/ */ + .get_bus_id = frontend_bus_id, + .probe = xenbus_probe_frontend, + .error = -ENODEV, + .bus = { + .name = "xen", + .match = xenbus_match, +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) + .probe = xenbus_dev_probe, + .remove = xenbus_dev_remove, + .shutdown = xenbus_dev_shutdown, + .uevent = xenbus_uevent_frontend, +#endif +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,29) + .dev_attrs = xenbus_dev_attrs, +#endif + }, + .dev = { + .init_name = "xen", + }, +}; + +int __xenbus_register_frontend(struct xenbus_driver *drv, + struct module *owner, const char *mod_name) +{ + int ret; + + drv->read_otherend_details = read_backend_details; + + ret = xenbus_register_driver_common(drv, &xenbus_frontend, + owner, mod_name); + if (ret) + return ret; + + /* If this driver is loaded as a module wait for devices to attach. */ + wait_for_devices(drv); + + return 0; +} +EXPORT_SYMBOL_GPL(__xenbus_register_frontend); + +#endif + static int xenbus_probe_device_type(struct xen_bus_type *bus, const char *type) { int err = 0; @@ -695,7 +713,7 @@ static int xenbus_probe_device_type(stru return PTR_ERR(dir); for (i = 0; i < dir_n; i++) { - err = bus->probe(type, dir[i]); + err = bus->probe(bus, type, dir[i]); if (err) break; } @@ -726,6 +744,7 @@ int xenbus_probe_devices(struct xen_bus_ kfree(dir); return err; } +PARAVIRT_EXPORT_SYMBOL(xenbus_probe_devices); static unsigned int char_count(const char *str, char c) { @@ -786,10 +805,9 @@ void xenbus_dev_changed(const char *node kfree(root); } -#if !defined(CONFIG_XEN) && !defined(MODULE) -EXPORT_SYMBOL_GPL(xenbus_dev_changed); -#endif +PARAVIRT_EXPORT_SYMBOL(xenbus_dev_changed); +#if defined(CONFIG_XEN) || defined(MODULE) static void frontend_changed(struct xenbus_watch *watch, const char **vec, unsigned int len) { @@ -804,22 +822,21 @@ static struct xenbus_watch fe_watch = { .callback = frontend_changed, }; -#if !defined(CONFIG_XEN) && !defined(MODULE) -static int xenbus_dev_suspend(struct device *dev, pm_message_t state) -#else static int suspend_dev(struct device *dev, void *data) +#else +int xenbus_dev_suspend(struct device *dev, pm_message_t state) #endif { int err = 0; struct xenbus_driver *drv; - struct xenbus_device *xdev; + struct xenbus_device *xdev + = container_of(dev, struct xenbus_device, dev); - DPRINTK(""); + DPRINTK("%s", xdev->nodename); if (dev->driver == NULL) return 0; drv = to_xenbus_driver(dev->driver); - xdev = container_of(dev, struct xenbus_device, dev); if (drv->suspend) #if !defined(CONFIG_XEN) && !defined(MODULE) err = drv->suspend(xdev, state); @@ -831,6 +848,7 @@ static int suspend_dev(struct device *de dev_name(dev), err); return 0; } +PARAVIRT_EXPORT_SYMBOL(xenbus_dev_suspend); #if defined(CONFIG_XEN) || defined(MODULE) static int suspend_cancel_dev(struct device *dev, void *data) @@ -852,26 +870,22 @@ static int suspend_cancel_dev(struct dev dev_name(dev), err); return 0; } -#endif -#if !defined(CONFIG_XEN) && !defined(MODULE) -static int xenbus_dev_resume(struct device *dev) -#else static int resume_dev(struct device *dev, void *data) +#else +int xenbus_dev_resume(struct device *dev) #endif { int err; struct xenbus_driver *drv; - struct xenbus_device *xdev; + struct xenbus_device *xdev + = container_of(dev, struct xenbus_device, dev); - DPRINTK(""); + DPRINTK("%s", xdev->nodename); if (dev->driver == NULL) return 0; - drv = to_xenbus_driver(dev->driver); - xdev = container_of(dev, struct xenbus_device, dev); - err = talk_to_otherend(xdev); if (err) { pr_warning("xenbus: resume (talk_to_otherend) %s failed: %i\n", @@ -899,6 +913,7 @@ static int resume_dev(struct device *dev return 0; } +PARAVIRT_EXPORT_SYMBOL(xenbus_dev_resume); #if defined(CONFIG_XEN) || defined(MODULE) void xenbus_suspend(void) @@ -960,17 +975,19 @@ void xenbus_probe(struct work_struct *un { BUG_ON(!is_xenstored_ready()); +#if defined(CONFIG_XEN) || defined(MODULE) /* Enumerate devices in xenstore and watch for changes. */ xenbus_probe_devices(&xenbus_frontend); register_xenbus_watch(&fe_watch); xenbus_backend_probe_and_watch(); +#endif /* Notify others that xenstore is up */ blocking_notifier_call_chain(&xenstore_chain, 0, NULL); } -#if !defined(CONFIG_XEN) && !defined(MODULE) -EXPORT_SYMBOL_GPL(xenbus_probe); +PARAVIRT_EXPORT_SYMBOL(xenbus_probe); +#if !defined(CONFIG_XEN) && !defined(MODULE) static int __init xenbus_probe_initcall(void) { if (!xen_domain()) @@ -1112,12 +1129,14 @@ int __devinit xenbus_init(void) if (!is_running_on_xen()) return -ENODEV; +#if defined(CONFIG_XEN) || defined(MODULE) /* Register ourselves with the kernel bus subsystem */ xenbus_frontend.error = bus_register(&xenbus_frontend.bus); if (xenbus_frontend.error) pr_warning("XENBUS: Error registering frontend bus: %i\n", xenbus_frontend.error); xenbus_backend_bus_register(); +#endif /* * Domain0 doesn't have a store_evtchn or store_mfn yet. @@ -1221,10 +1240,8 @@ int __devinit xenbus_init(void) " %d\n", xenbus_frontend.error); } } -#endif xenbus_backend_device_register(); -#if defined(CONFIG_XEN) || defined(MODULE) if (!is_initial_xendomain()) xenbus_probe(NULL); #endif @@ -1248,6 +1265,7 @@ int __devinit xenbus_init(void) if (page != 0) free_page(page); + return err; } @@ -1260,6 +1278,8 @@ MODULE_LICENSE("GPL"); #endif #endif +#if defined(CONFIG_XEN) || defined(MODULE) + static int is_device_connecting(struct device *dev, void *data) { struct xenbus_device *xendev = to_xenbus_device(dev); @@ -1395,3 +1415,5 @@ int xenbus_for_each_frontend(void *arg, return bus_for_each_dev(&xenbus_frontend.bus, NULL, arg, fn); } EXPORT_SYMBOL_GPL(xenbus_for_each_frontend); + +#endif /* CONFIG_XEN || MODULE */ --- head-2011-03-17.orig/drivers/xen/xenbus/xenbus_probe.h 2011-02-07 14:42:39.000000000 +0100 +++ head-2011-03-17/drivers/xen/xenbus/xenbus_probe.h 2011-02-07 14:43:11.000000000 +0100 @@ -67,11 +67,15 @@ struct xen_bus_type int error; unsigned int levels; int (*get_bus_id)(char bus_id[XEN_BUS_ID_SIZE], const char *nodename); - int (*probe)(const char *type, const char *dir); - struct bus_type bus; -#if defined(CONFIG_XEN) || defined(MODULE) + int (*probe)(struct xen_bus_type *bus, const char *type, + const char *dir); +#if !defined(CONFIG_XEN) && !defined(HAVE_XEN_PLATFORM_COMPAT_H) + void (*otherend_changed)(struct xenbus_watch *watch, const char **vec, + unsigned int len); +#else struct device dev; #endif + struct bus_type bus; }; extern int xenbus_match(struct device *_dev, struct device_driver *_drv); @@ -88,4 +92,16 @@ extern int xenbus_probe_devices(struct x extern void xenbus_dev_changed(const char *node, struct xen_bus_type *bus); +extern void xenbus_dev_shutdown(struct device *_dev); + +extern int xenbus_dev_suspend(struct device *dev, pm_message_t state); +extern int xenbus_dev_resume(struct device *dev); + +extern void xenbus_otherend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len, + int ignore_on_shutdown); + +extern int xenbus_read_otherend_details(struct xenbus_device *xendev, + char *id_node, char *path_node); + #endif --- head-2011-03-17.orig/drivers/xen/xenbus/xenbus_probe_backend.c 2011-02-01 15:03:03.000000000 +0100 +++ head-2011-03-17/drivers/xen/xenbus/xenbus_probe_backend.c 2011-02-03 08:30:05.000000000 +0100 @@ -33,7 +33,7 @@ #define DPRINTK(fmt, args...) \ pr_debug("xenbus_probe (%s:%d) " fmt ".\n", \ - __FUNCTION__, __LINE__, ##args) + __func__, __LINE__, ##args) #include #include @@ -45,14 +45,17 @@ #include #include -#include #include -#include #include +#if !defined(CONFIG_XEN) && !defined(HAVE_XEN_PLATFORM_COMPAT_H) +#include +#endif #include #include +#if defined(CONFIG_XEN) || defined(HAVE_XEN_PLATFORM_COMPAT_H) #include #include +#endif #include #include "xenbus_comms.h" @@ -62,17 +65,6 @@ #include #endif -static int xenbus_uevent_backend(struct device *dev, struct kobj_uevent_env *env); -static int xenbus_probe_backend(const char *type, const char *domid); - -extern int read_otherend_details(struct xenbus_device *xendev, - char *id_node, char *path_node); - -static int read_frontend_details(struct xenbus_device *xendev) -{ - return read_otherend_details(xendev, "frontend-id", "frontend"); -} - /* backend/// => -- */ static int backend_bus_id(char bus_id[XEN_BUS_ID_SIZE], const char *nodename) { @@ -110,34 +102,12 @@ static int backend_bus_id(char bus_id[XE return 0; } -static struct device_attribute xenbus_backend_attrs[] = { - __ATTR_NULL -}; - -static struct xen_bus_type xenbus_backend = { - .root = "backend", - .levels = 3, /* backend/type// */ - .get_bus_id = backend_bus_id, - .probe = xenbus_probe_backend, - .error = -ENODEV, - .bus = { - .name = "xen-backend", - .match = xenbus_match, - .probe = xenbus_dev_probe, - .remove = xenbus_dev_remove, -// .shutdown = xenbus_dev_shutdown, - .uevent = xenbus_uevent_backend, - .dev_attrs = xenbus_backend_attrs, - }, - .dev = { - .init_name = "xen-backend", - }, -}; - -static int xenbus_uevent_backend(struct device *dev, struct kobj_uevent_env *env) +static int xenbus_uevent_backend(struct device *dev, + struct kobj_uevent_env *env) { struct xenbus_device *xdev; struct xenbus_driver *drv; + struct xen_bus_type *bus; DPRINTK(""); @@ -145,15 +115,19 @@ static int xenbus_uevent_backend(struct return -ENODEV; xdev = to_xenbus_device(dev); + bus = container_of(xdev->dev.bus, struct xen_bus_type, bus); if (xdev == NULL) return -ENODEV; /* stuff we want to pass to /sbin/hotplug */ - add_uevent_var(env, "XENBUS_TYPE=%s", xdev->devicetype); + if (add_uevent_var(env, "XENBUS_TYPE=%s", xdev->devicetype)) + return -ENOMEM; - add_uevent_var(env, "XENBUS_PATH=%s", xdev->nodename); + if (add_uevent_var(env, "XENBUS_PATH=%s", xdev->nodename)) + return -ENOMEM; - add_uevent_var(env, "XENBUS_BASE_PATH=%s", xenbus_backend.root); + if (add_uevent_var(env, "XENBUS_BASE_PATH=%s", bus->root)) + return -ENOMEM; if (dev->driver) { drv = to_xenbus_driver(dev->driver); @@ -164,18 +138,9 @@ static int xenbus_uevent_backend(struct return 0; } -int __xenbus_register_backend(struct xenbus_driver *drv, - struct module *owner, const char *mod_name) -{ - drv->read_otherend_details = read_frontend_details; - - return xenbus_register_driver_common(drv, &xenbus_backend, - owner, mod_name); -} -EXPORT_SYMBOL_GPL(__xenbus_register_backend); - /* backend/// */ -static int xenbus_probe_backend_unit(const char *dir, +static int xenbus_probe_backend_unit(struct xen_bus_type *bus, + const char *dir, const char *type, const char *name) { @@ -188,13 +153,14 @@ static int xenbus_probe_backend_unit(con DPRINTK("%s\n", nodename); - err = xenbus_probe_node(&xenbus_backend, type, nodename); + err = xenbus_probe_node(bus, type, nodename); kfree(nodename); return err; } /* backend// */ -static int xenbus_probe_backend(const char *type, const char *domid) +static int xenbus_probe_backend(struct xen_bus_type *bus, const char *type, + const char *domid) { char *nodename; int err = 0; @@ -203,7 +169,7 @@ static int xenbus_probe_backend(const ch DPRINTK(""); - nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", xenbus_backend.root, type, domid); + nodename = kasprintf(GFP_KERNEL, "%s/%s/%s", bus->root, type, domid); if (!nodename) return -ENOMEM; @@ -214,7 +180,7 @@ static int xenbus_probe_backend(const ch } for (i = 0; i < dir_n; i++) { - err = xenbus_probe_backend_unit(nodename, type, dir[i]); + err = xenbus_probe_backend_unit(bus, nodename, type, dir[i]); if (err) break; } @@ -223,6 +189,44 @@ static int xenbus_probe_backend(const ch return err; } +#if !defined(CONFIG_XEN) && !defined(HAVE_XEN_PLATFORM_COMPAT_H) +static void frontend_changed(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + xenbus_otherend_changed(watch, vec, len, 0); +} +#endif + +static struct device_attribute xenbus_backend_dev_attrs[] = { + __ATTR_NULL +}; + +static struct xen_bus_type xenbus_backend = { + .root = "backend", + .levels = 3, /* backend/type// */ + .get_bus_id = backend_bus_id, + .probe = xenbus_probe_backend, +#if !defined(CONFIG_XEN) && !defined(HAVE_XEN_PLATFORM_COMPAT_H) + .otherend_changed = frontend_changed, +#else + .dev = { + .init_name = "xen-backend", + }, +#endif + .error = -ENODEV, + .bus = { + .name = "xen-backend", + .match = xenbus_match, + .uevent = xenbus_uevent_backend, + .probe = xenbus_dev_probe, + .remove = xenbus_dev_remove, +#if !defined(CONFIG_XEN) && !defined(HAVE_XEN_PLATFORM_COMPAT_H) + .shutdown = xenbus_dev_shutdown, +#endif + .dev_attrs = xenbus_backend_dev_attrs, + }, +}; + static void backend_changed(struct xenbus_watch *watch, const char **vec, unsigned int len) { @@ -236,6 +240,47 @@ static struct xenbus_watch be_watch = { .callback = backend_changed, }; +static int read_frontend_details(struct xenbus_device *xendev) +{ + return xenbus_read_otherend_details(xendev, "frontend-id", "frontend"); +} + +#if !defined(CONFIG_XEN) && !defined(HAVE_XEN_PLATFORM_COMPAT_H) + +int xenbus_dev_is_online(struct xenbus_device *dev) +{ + int rc, val; + + rc = xenbus_scanf(XBT_NIL, dev->nodename, "online", "%d", &val); + if (rc != 1) + val = 0; /* no online node present */ + + return val; +} +EXPORT_SYMBOL_GPL(xenbus_dev_is_online); + +int __xenbus_register_backend(struct xenbus_driver *drv, + struct module *owner, const char *mod_name) +{ + drv->read_otherend_details = read_frontend_details; + + return xenbus_register_driver_common(drv, &xenbus_backend, + owner, mod_name); +} +EXPORT_SYMBOL_GPL(__xenbus_register_backend); + +#else + +int __xenbus_register_backend(struct xenbus_driver *drv, + struct module *owner, const char *mod_name) +{ + drv->read_otherend_details = read_frontend_details; + + return xenbus_register_driver_common(drv, &xenbus_backend, + owner, mod_name); +} +EXPORT_SYMBOL_GPL(__xenbus_register_backend); + void xenbus_backend_suspend(int (*fn)(struct device *, void *)) { DPRINTK(""); @@ -250,12 +295,49 @@ void xenbus_backend_resume(int (*fn)(str bus_for_each_dev(&xenbus_backend.bus, NULL, NULL, fn); } +#endif + +#if !defined(CONFIG_XEN) && !defined(HAVE_XEN_PLATFORM_COMPAT_H) +static int backend_probe_and_watch(struct notifier_block *notifier, + unsigned long event, + void *data) +#else void xenbus_backend_probe_and_watch(void) +#endif { + /* Enumerate devices in xenstore and watch for changes. */ xenbus_probe_devices(&xenbus_backend); register_xenbus_watch(&be_watch); + +#if !defined(CONFIG_XEN) && !defined(HAVE_XEN_PLATFORM_COMPAT_H) + return NOTIFY_DONE; +#endif } +#if !defined(CONFIG_XEN) && !defined(HAVE_XEN_PLATFORM_COMPAT_H) + +static int __init xenbus_probe_backend_init(void) +{ + static struct notifier_block xenstore_notifier = { + .notifier_call = backend_probe_and_watch + }; + int err; + + DPRINTK(""); + + /* Register ourselves with the kernel bus subsystem */ + err = bus_register(&xenbus_backend.bus); + if (err) + return err; + + register_xenstore_notifier(&xenstore_notifier); + + return 0; +} +subsys_initcall(xenbus_probe_backend_init); + +#else + void xenbus_backend_bus_register(void) { xenbus_backend.error = bus_register(&xenbus_backend.bus); @@ -282,3 +364,5 @@ int xenbus_for_each_backend(void *arg, i return bus_for_each_dev(&xenbus_backend.bus, NULL, arg, fn); } EXPORT_SYMBOL_GPL(xenbus_for_each_backend); + +#endif --- head-2011-03-17.orig/include/xen/gntdev.h 2011-03-17 14:35:43.000000000 +0100 +++ head-2011-03-17/include/xen/gntdev.h 2011-02-03 13:52:59.000000000 +0100 @@ -1,119 +1,3 @@ -/****************************************************************************** - * gntdev.h - * - * Interface to /dev/xen/gntdev. - * - * Copyright (c) 2007, D G Murray - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation; or, when distributed - * separately from the Linux kernel or incorporated into other - * software packages, subject to the following license: - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this source file (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, modify, - * merge, publish, distribute, sublicense, and/or sell copies of the Software, - * and to permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#ifndef __LINUX_PUBLIC_GNTDEV_H__ -#define __LINUX_PUBLIC_GNTDEV_H__ - -struct ioctl_gntdev_grant_ref { - /* The domain ID of the grant to be mapped. */ - uint32_t domid; - /* The grant reference of the grant to be mapped. */ - uint32_t ref; -}; - -/* - * Inserts the grant references into the mapping table of an instance - * of gntdev. N.B. This does not perform the mapping, which is deferred - * until mmap() is called with @index as the offset. - */ -#define IOCTL_GNTDEV_MAP_GRANT_REF \ -_IOC(_IOC_NONE, 'G', 0, sizeof(struct ioctl_gntdev_map_grant_ref)) -struct ioctl_gntdev_map_grant_ref { - /* IN parameters */ - /* The number of grants to be mapped. */ - uint32_t count; - uint32_t pad; - /* OUT parameters */ - /* The offset to be used on a subsequent call to mmap(). */ - uint64_t index; - /* Variable IN parameter. */ - /* Array of grant references, of size @count. */ - struct ioctl_gntdev_grant_ref refs[1]; -}; - -/* - * Removes the grant references from the mapping table of an instance of - * of gntdev. N.B. munmap() must be called on the relevant virtual address(es) - * before this ioctl is called, or an error will result. - */ -#define IOCTL_GNTDEV_UNMAP_GRANT_REF \ -_IOC(_IOC_NONE, 'G', 1, sizeof(struct ioctl_gntdev_unmap_grant_ref)) -struct ioctl_gntdev_unmap_grant_ref { - /* IN parameters */ - /* The offset was returned by the corresponding map operation. */ - uint64_t index; - /* The number of pages to be unmapped. */ - uint32_t count; - uint32_t pad; -}; - -/* - * Returns the offset in the driver's address space that corresponds - * to @vaddr. This can be used to perform a munmap(), followed by an - * UNMAP_GRANT_REF ioctl, where no state about the offset is retained by - * the caller. The number of pages that were allocated at the same time as - * @vaddr is returned in @count. - * - * N.B. Where more than one page has been mapped into a contiguous range, the - * supplied @vaddr must correspond to the start of the range; otherwise - * an error will result. It is only possible to munmap() the entire - * contiguously-allocated range at once, and not any subrange thereof. - */ -#define IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR \ -_IOC(_IOC_NONE, 'G', 2, sizeof(struct ioctl_gntdev_get_offset_for_vaddr)) -struct ioctl_gntdev_get_offset_for_vaddr { - /* IN parameters */ - /* The virtual address of the first mapped page in a range. */ - uint64_t vaddr; - /* OUT parameters */ - /* The offset that was used in the initial mmap() operation. */ - uint64_t offset; - /* The number of pages mapped in the VM area that begins at @vaddr. */ - uint32_t count; - uint32_t pad; -}; - -/* - * Sets the maximum number of grants that may mapped at once by this gntdev - * instance. - * - * N.B. This must be called before any other ioctl is performed on the device. - */ -#define IOCTL_GNTDEV_SET_MAX_GRANTS \ -_IOC(_IOC_NONE, 'G', 3, sizeof(struct ioctl_gntdev_set_max_grants)) -struct ioctl_gntdev_set_max_grants { - /* IN parameter */ - /* The maximum number of grants that may be mapped at once. */ - uint32_t count; -}; - -#endif /* __LINUX_PUBLIC_GNTDEV_H__ */ +#if defined(CONFIG_PARAVIRT_XEN) || !defined(__KERNEL__) +#include "public/gntdev.h" +#endif --- head-2011-03-17.orig/include/xen/public/gntdev.h 2008-04-02 12:34:02.000000000 +0200 +++ head-2011-03-17/include/xen/public/gntdev.h 2011-02-03 13:52:28.000000000 +0100 @@ -66,7 +66,7 @@ struct ioctl_gntdev_map_grant_ref { * before this ioctl is called, or an error will result. */ #define IOCTL_GNTDEV_UNMAP_GRANT_REF \ -_IOC(_IOC_NONE, 'G', 1, sizeof(struct ioctl_gntdev_unmap_grant_ref)) +_IOC(_IOC_NONE, 'G', 1, sizeof(struct ioctl_gntdev_unmap_grant_ref)) struct ioctl_gntdev_unmap_grant_ref { /* IN parameters */ /* The offset was returned by the corresponding map operation. */ --- head-2011-03-17.orig/lib/swiotlb-xen.c 2011-02-01 15:09:47.000000000 +0100 +++ head-2011-03-17/lib/swiotlb-xen.c 2011-03-11 11:06:22.000000000 +0100 @@ -48,7 +48,7 @@ int swiotlb_force; static char *io_tlb_start, *io_tlb_end; /* - * The number of IO TLB blocks (in groups of 64) betweeen io_tlb_start and + * The number of IO TLB blocks (in groups of 64) between io_tlb_start and * io_tlb_end. This is command line adjustable via setup_io_tlb_npages. */ static unsigned long io_tlb_nslabs; @@ -567,6 +567,15 @@ dma_addr_t swiotlb_map_page(struct devic } dev_addr = swiotlb_virt_to_bus(dev, map); + + /* + * Ensure that the address returned is DMA'ble + */ + if (!dma_capable(dev, dev_addr, size)) { + swiotlb_tbl_unmap_single(dev, map, size, dir); + dev_addr = swiotlb_virt_to_bus(dev, io_tlb_overflow_buffer); + } + return dev_addr; } EXPORT_SYMBOL_GPL(swiotlb_map_page); --- head-2011-03-17.orig/mm/Kconfig 2011-01-31 14:34:25.000000000 +0100 +++ head-2011-03-17/mm/Kconfig 2011-02-01 16:44:46.000000000 +0100 @@ -304,7 +304,7 @@ config NOMMU_INITIAL_TRIM_EXCESS config TRANSPARENT_HUGEPAGE bool "Transparent Hugepage Support" - depends on X86 && MMU + depends on X86 && !XEN && MMU select COMPACTION help Transparent Hugepages allows the kernel to use huge pages and