From: Linux Kernel Mailing List Subject: Linux: 2.6.34 Patch-mainline: 2.6.34 This patch contains the differences between 2.6.33 and 2.6.34. Acked-by: Jeff Mahoney Automatically created from "patches.kernel.org/patch-2.6.34" by xen-port-patches.py --- head-2010-05-25.orig/arch/x86/Kconfig 2010-03-24 16:00:05.000000000 +0100 +++ head-2010-05-25/arch/x86/Kconfig 2010-03-25 16:41:03.000000000 +0100 @@ -106,7 +106,7 @@ config SBUS bool config NEED_DMA_MAP_STATE - def_bool (X86_64 || DMAR || DMA_API_DEBUG) + def_bool (X86_64 || DMAR || DMA_API_DEBUG || SWIOTLB) config GENERIC_ISA_DMA def_bool y --- head-2010-05-25.orig/arch/x86/ia32/ia32entry-xen.S 2010-03-24 16:00:05.000000000 +0100 +++ head-2010-05-25/arch/x86/ia32/ia32entry-xen.S 2010-05-12 09:08:52.000000000 +0200 @@ -456,7 +456,7 @@ ia32_sys_call_table: .quad quiet_ni_syscall /* old mpx syscall holder */ .quad sys_setpgid .quad quiet_ni_syscall /* old ulimit syscall holder */ - .quad sys32_olduname + .quad sys_olduname .quad sys_umask /* 60 */ .quad sys_chroot .quad compat_sys_ustat @@ -479,7 +479,7 @@ ia32_sys_call_table: .quad compat_sys_settimeofday .quad sys_getgroups16 /* 80 */ .quad sys_setgroups16 - .quad sys32_old_select + .quad compat_sys_old_select .quad sys_symlink .quad sys_lstat .quad sys_readlink /* 85 */ @@ -506,7 +506,7 @@ ia32_sys_call_table: .quad compat_sys_newstat .quad compat_sys_newlstat .quad compat_sys_newfstat - .quad sys32_uname + .quad sys_uname .quad stub32_iopl /* 110 */ .quad sys_vhangup .quad quiet_ni_syscall /* old "idle" system call */ @@ -519,7 +519,7 @@ ia32_sys_call_table: .quad stub32_sigreturn .quad stub32_clone /* 120 */ .quad sys_setdomainname - .quad sys_uname + .quad sys_newuname .quad sys_modify_ldt .quad compat_sys_adjtimex .quad sys32_mprotect /* 125 */ --- head-2010-05-25.orig/arch/x86/include/asm/i8259.h 2010-05-25 09:31:21.000000000 +0200 +++ head-2010-05-25/arch/x86/include/asm/i8259.h 2010-03-25 11:31:58.000000000 +0100 @@ -54,11 +54,13 @@ extern struct irq_chip i8259A_chip; struct legacy_pic { int nr_legacy_irqs; +#ifndef CONFIG_XEN struct irq_chip *chip; void (*mask_all)(void); void (*restore_mask)(void); void (*init)(int auto_eoi); int (*irq_pending)(unsigned int irq); +#endif void (*make_irq)(unsigned int irq); }; --- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/fixmap.h 2010-03-24 15:32:27.000000000 +0100 +++ head-2010-05-25/arch/x86/include/mach-xen/asm/fixmap.h 2010-04-15 10:29:09.000000000 +0200 @@ -82,6 +82,9 @@ enum fixed_addresses { #endif FIX_DBGP_BASE, FIX_EARLYCON_MEM_BASE, +#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT + FIX_OHCI1394_BASE, +#endif #ifdef CONFIG_X86_LOCAL_APIC FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */ #endif @@ -125,17 +128,20 @@ enum fixed_addresses { * 256 temporary boot-time mappings, used by early_ioremap(), * before ioremap() is functional. * - * We round it up to the next 256 pages boundary so that we - * can have a single pgd entry and a single pte table: + * If necessary we round it up to the next 256 pages boundary so + * that we can have a single pgd entry and a single pte table: */ #define NR_FIX_BTMAPS 64 #define FIX_BTMAPS_SLOTS 4 - FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 256 - - (__end_of_permanent_fixed_addresses & 255), - FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_SLOTS - 1, -#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT - FIX_OHCI1394_BASE, -#endif +#define TOTAL_FIX_BTMAPS (NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS) + FIX_BTMAP_END = + (__end_of_permanent_fixed_addresses ^ + (__end_of_permanent_fixed_addresses + TOTAL_FIX_BTMAPS - 1)) & + -PTRS_PER_PTE + ? __end_of_permanent_fixed_addresses + TOTAL_FIX_BTMAPS - + (__end_of_permanent_fixed_addresses & (TOTAL_FIX_BTMAPS - 1)) + : __end_of_permanent_fixed_addresses, + FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1, #ifdef CONFIG_X86_32 FIX_WP_TEST, #endif --- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/io.h 2010-03-24 15:25:06.000000000 +0100 +++ head-2010-05-25/arch/x86/include/mach-xen/asm/io.h 2010-05-12 09:09:25.000000000 +0200 @@ -1,8 +1,42 @@ #ifndef _ASM_X86_IO_H #define _ASM_X86_IO_H +/* + * This file contains the definitions for the x86 IO instructions + * inb/inw/inl/outb/outw/outl and the "string versions" of the same + * (insb/insw/insl/outsb/outsw/outsl). You can also use "pausing" + * versions of the single-IO instructions (inb_p/inw_p/..). + * + * This file is not meant to be obfuscating: it's just complicated + * to (a) handle it all in a way that makes gcc able to optimize it + * as well as possible and (b) trying to avoid writing the same thing + * over and over again with slight variations and possibly making a + * mistake somewhere. + */ + +/* + * Thanks to James van Artsdalen for a better timing-fix than + * the two short jumps: using outb's to a nonexistent port seems + * to guarantee better timings even on fast machines. + * + * On the other hand, I'd like to be sure of a non-existent port: + * I feel a bit unsafe about using 0x80 (should be safe, though) + * + * Linus + */ + + /* + * Bit simplified and optimized by Jan Hubicka + * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999. + * + * isa_memset_io, isa_memcpy_fromio, isa_memcpy_toio added, + * isa_read[wl] and isa_write[wl] fixed + * - Arnaldo Carvalho de Melo + */ + #define ARCH_HAS_IOREMAP_WC +#include #include #include #include @@ -84,8 +118,6 @@ static inline void writeq(__u64 val, vol #define readq readq #define writeq writeq -#define native_io_delay xen_io_delay - /** * virt_to_phys - map virtual addresses to physical * @address: address to remap @@ -181,11 +213,110 @@ static inline void __iomem *ioremap(reso extern void iounmap(volatile void __iomem *addr); -#ifdef CONFIG_X86_32 -# include "../../asm/io_32.h" -#else -# include "../../asm/io_64.h" +#ifdef __KERNEL__ + +#include + +#include + +/* + * Convert a virtual cached pointer to an uncached pointer + */ +#define xlate_dev_kmem_ptr(p) p + +static inline void +memset_io(volatile void __iomem *addr, unsigned char val, size_t count) +{ + memset((void __force *)addr, val, count); +} + +static inline void +memcpy_fromio(void *dst, const volatile void __iomem *src, size_t count) +{ + memcpy(dst, (const void __force *)src, count); +} + +static inline void +memcpy_toio(volatile void __iomem *dst, const void *src, size_t count) +{ + memcpy((void __force *)dst, src, count); +} + +/* + * Cache management + * + * This needed for two cases + * 1. Out of order aware processors + * 2. Accidentally out of order processors (PPro errata #51) + */ + +static inline void flush_write_buffers(void) +{ +#if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE) + asm volatile("lock; addl $0,0(%%esp)": : :"memory"); +#endif +} + +#endif /* __KERNEL__ */ + +extern void native_io_delay(void); + +extern int io_delay_type; +extern void io_delay_init(void); + +static inline void slow_down_io(void) +{ + native_io_delay(); +#ifdef REALLY_SLOW_IO + native_io_delay(); + native_io_delay(); + native_io_delay(); #endif +} + +#define BUILDIO(bwl, bw, type) \ +static inline void out##bwl(unsigned type value, int port) \ +{ \ + asm volatile("out" #bwl " %" #bw "0, %w1" \ + : : "a"(value), "Nd"(port)); \ +} \ + \ +static inline unsigned type in##bwl(int port) \ +{ \ + unsigned type value; \ + asm volatile("in" #bwl " %w1, %" #bw "0" \ + : "=a"(value) : "Nd"(port)); \ + return value; \ +} \ + \ +static inline void out##bwl##_p(unsigned type value, int port) \ +{ \ + out##bwl(value, port); \ + slow_down_io(); \ +} \ + \ +static inline unsigned type in##bwl##_p(int port) \ +{ \ + unsigned type value = in##bwl(port); \ + slow_down_io(); \ + return value; \ +} \ + \ +static inline void outs##bwl(int port, const void *addr, unsigned long count) \ +{ \ + asm volatile("rep; outs" #bwl \ + : "+S"(addr), "+c"(count) : "d"(port)); \ +} \ + \ +static inline void ins##bwl(int port, void *addr, unsigned long count) \ +{ \ + asm volatile("rep; ins" #bwl \ + : "+D"(addr), "+c"(count) : "d"(port)); \ +} + +BUILDIO(b, b, char) +BUILDIO(w, w, short) +BUILDIO(l, , int) #if defined(__KERNEL__) && !defined(__ASSEMBLY__) @@ -200,8 +331,6 @@ extern void iounmap(volatile void __iome && bvec_to_pseudophys(vec1) + (vec1)->bv_len \ == bvec_to_pseudophys(vec2)) -#undef __ISA_IO_base - #endif extern void *xlate_dev_mem_ptr(unsigned long phys); @@ -223,6 +352,7 @@ extern void __iomem *early_ioremap(resou extern void __iomem *early_memremap(resource_size_t phys_addr, unsigned long size); extern void early_iounmap(void __iomem *addr, unsigned long size); +extern void fixup_early_ioremap(void); #define IO_SPACE_LIMIT 0xffff --- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/irq_vectors.h 2010-03-24 15:25:21.000000000 +0100 +++ head-2010-05-25/arch/x86/include/mach-xen/asm/irq_vectors.h 2010-03-29 18:11:31.000000000 +0200 @@ -3,11 +3,9 @@ #define MCE_VECTOR 0x12 +#define IA32_SYSCALL_VECTOR 0x80 #ifdef CONFIG_X86_32 # define SYSCALL_VECTOR 0x80 -# define IA32_SYSCALL_VECTOR 0x80 -#else -# define IA32_SYSCALL_VECTOR 0x80 #endif #define RESCHEDULE_VECTOR 0 @@ -57,9 +55,17 @@ static inline int invalid_vm86_irq(int i * are bound using the provided bind/unbind functions. */ #define PIRQ_BASE 0 +/* PHYSDEVOP_pirq_eoi_gmfn restriction: */ +#define PIRQ_MAX(n) ((n) < (1 << (PAGE_SHIFT + 3)) - NR_VECTORS \ + ? (n) : (1 << (PAGE_SHIFT + 3)) - NR_VECTORS) + +#define IO_APIC_VECTOR_LIMIT PIRQ_MAX(32 * MAX_IO_APICS) -#define CPU_VECTOR_LIMIT ( 8 * NR_CPUS ) -#define IO_APIC_VECTOR_LIMIT ( 32 * MAX_IO_APICS ) +#ifdef CONFIG_SPARSE_IRQ +# define CPU_VECTOR_LIMIT PIRQ_MAX(64 * NR_CPUS) +#else +# define CPU_VECTOR_LIMIT PIRQ_MAX(32 * NR_CPUS) +#endif #ifdef CONFIG_X86_IO_APIC # if !defined(NR_CPUS) || !defined(MAX_IO_APICS) @@ -69,10 +75,11 @@ static inline int invalid_vm86_irq(int i (CPU_VECTOR_LIMIT > IO_APIC_VECTOR_LIMIT ? \ (NR_VECTORS + CPU_VECTOR_LIMIT) : \ (NR_VECTORS + IO_APIC_VECTOR_LIMIT)) -# elif NR_CPUS < MAX_IO_APICS -# define NR_PIRQS (NR_VECTORS + 4*CPU_VECTOR_LIMIT) # else -# define NR_PIRQS (NR_VECTORS + IO_APIC_VECTOR_LIMIT) +# define NR_PIRQS \ + (CPU_VECTOR_LIMIT < IO_APIC_VECTOR_LIMIT ? \ + (NR_VECTORS + CPU_VECTOR_LIMIT) : \ + (NR_VECTORS + IO_APIC_VECTOR_LIMIT)) # endif #elif defined(CONFIG_XEN_PCIDEV_FRONTEND) # define NR_PIRQS (NR_VECTORS + CPU_VECTOR_LIMIT) --- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/pci.h 2010-03-24 15:32:27.000000000 +0100 +++ head-2010-05-25/arch/x86/include/mach-xen/asm/pci.h 2010-03-25 17:13:42.000000000 +0100 @@ -48,8 +48,15 @@ static inline int pci_proc_domain(struct #ifdef CONFIG_PCI extern unsigned int pcibios_assign_all_busses(void); +extern int pci_legacy_init(void); +# ifdef CONFIG_ACPI +# define x86_default_pci_init pci_acpi_init +# else +# define x86_default_pci_init pci_legacy_init +# endif #else -#define pcibios_assign_all_busses() 0 +# define pcibios_assign_all_busses() 0 +# define x86_default_pci_init NULL #endif #include @@ -97,41 +104,14 @@ extern void pci_iommu_alloc(void); #define PCI_DMA_BUS_IS_PHYS 0 -#if defined(CONFIG_X86_64) || defined(CONFIG_DMAR) || defined(CONFIG_DMA_API_DEBUG) \ - || defined(CONFIG_SWIOTLB) - -#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \ - dma_addr_t ADDR_NAME; -#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \ - __u32 LEN_NAME; -#define pci_unmap_addr(PTR, ADDR_NAME) \ - ((PTR)->ADDR_NAME) -#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) \ - (((PTR)->ADDR_NAME) = (VAL)) -#define pci_unmap_len(PTR, LEN_NAME) \ - ((PTR)->LEN_NAME) -#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \ - (((PTR)->LEN_NAME) = (VAL)) - -#else - -#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) dma_addr_t ADDR_NAME[0]; -#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) unsigned LEN_NAME[0]; -#define pci_unmap_addr(PTR, ADDR_NAME) sizeof((PTR)->ADDR_NAME) -#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) \ - do { break; } while (pci_unmap_addr(PTR, ADDR_NAME)) -#define pci_unmap_len(PTR, LEN_NAME) sizeof((PTR)->LEN_NAME) -#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \ - do { break; } while (pci_unmap_len(PTR, LEN_NAME)) - -#endif - #endif /* __KERNEL__ */ #ifdef CONFIG_X86_64 #include "../../asm/pci_64.h" #endif +void dma32_reserve_bootmem(void); + /* implement the pci_ DMA API in terms of the generic device dma_ one */ #include --- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/pgalloc.h 2010-03-24 15:25:21.000000000 +0100 +++ head-2010-05-25/arch/x86/include/mach-xen/asm/pgalloc.h 2010-03-25 16:41:03.000000000 +0100 @@ -27,6 +27,11 @@ pmd_t *early_get_pmd(unsigned long va); #endif /* + * Flags to use when allocating a user page table page. + */ +extern gfp_t __userpte_alloc_gfp; + +/* * Allocate and free page tables. */ extern pgd_t *pgd_alloc(struct mm_struct *); --- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/pgtable_32.h 2010-03-24 15:25:21.000000000 +0100 +++ head-2010-05-25/arch/x86/include/mach-xen/asm/pgtable_32.h 2010-04-15 10:48:32.000000000 +0200 @@ -18,7 +18,6 @@ #include #include -#include #include #include #include @@ -79,7 +78,7 @@ do { \ * The i386 doesn't have any external MMU info: the kernel page * tables contain all the necessary information. */ -#define update_mmu_cache(vma, address, pte) do { } while (0) +#define update_mmu_cache(vma, address, ptep) do { } while (0) void make_lowmem_page_readonly(void *va, unsigned int feature); void make_lowmem_page_writable(void *va, unsigned int feature); --- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/pgtable_64.h 2010-03-24 15:25:21.000000000 +0100 +++ head-2010-05-25/arch/x86/include/mach-xen/asm/pgtable_64.h 2010-03-25 16:41:03.000000000 +0100 @@ -136,7 +136,7 @@ static inline int pgd_large(pgd_t pgd) { #define pte_unmap(pte) /* NOP */ #define pte_unmap_nested(pte) /* NOP */ -#define update_mmu_cache(vma, address, pte) do { } while (0) +#define update_mmu_cache(vma, address, ptep) do { } while (0) /* Encode and de-code a swap entry */ #if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE --- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/smp.h 2010-03-24 15:32:27.000000000 +0100 +++ head-2010-05-25/arch/x86/include/mach-xen/asm/smp.h 2010-04-26 11:32:06.000000000 +0200 @@ -135,6 +135,8 @@ int native_cpu_disable(void); void native_cpu_die(unsigned int cpu); void native_play_dead(void); void play_dead_common(void); +void wbinvd_on_cpu(int cpu); +int wbinvd_on_all_cpus(void); #else /* CONFIG_XEN */ @@ -162,8 +164,19 @@ static inline int num_booting_cpus(void) { return cpumask_weight(cpu_callout_mask); } +#elif /* !CONFIG_SMP && */ !defined(CONFIG_XEN) +#define wbinvd_on_cpu(cpu) wbinvd() +static inline int wbinvd_on_all_cpus(void) +{ + wbinvd(); + return 0; +} #endif /* CONFIG_SMP */ +#ifdef CONFIG_XEN +int wbinvd_on_all_cpus(void); +#endif + extern unsigned disabled_cpus __cpuinitdata; #include --- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/system.h 2010-03-24 16:00:05.000000000 +0100 +++ head-2010-05-25/arch/x86/include/mach-xen/asm/system.h 2010-03-25 16:41:14.000000000 +0100 @@ -31,7 +31,7 @@ extern void show_regs_common(void); "movl %P[task_canary](%[next]), %%ebx\n\t" \ "movl %%ebx, "__percpu_arg([stack_canary])"\n\t" #define __switch_canary_oparam \ - , [stack_canary] "=m" (per_cpu_var(stack_canary.canary)) + , [stack_canary] "=m" (stack_canary.canary) #define __switch_canary_iparam \ , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) #else /* CC_STACKPROTECTOR */ @@ -113,7 +113,7 @@ do { \ "movq %P[task_canary](%%rsi),%%r8\n\t" \ "movq %%r8,"__percpu_arg([gs_canary])"\n\t" #define __switch_canary_oparam \ - , [gs_canary] "=m" (per_cpu_var(irq_stack_union.stack_canary)) + , [gs_canary] "=m" (irq_stack_union.stack_canary) #define __switch_canary_iparam \ , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) #else /* CC_STACKPROTECTOR */ @@ -132,7 +132,7 @@ do { \ __switch_canary \ "movq %P[thread_info](%%rsi),%%r8\n\t" \ "movq %%rax,%%rdi\n\t" \ - "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \ + "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \ "jnz ret_from_fork\n\t" \ RESTORE_CONTEXT \ : "=a" (last) \ @@ -142,7 +142,7 @@ do { \ [ti_flags] "i" (offsetof(struct thread_info, flags)), \ [_tif_fork] "i" (_TIF_FORK), \ [thread_info] "i" (offsetof(struct task_struct, stack)), \ - [current_task] "m" (per_cpu_var(current_task)) \ + [current_task] "m" (current_task) \ __switch_canary_iparam \ : "memory", "cc" __EXTRA_CLOBBER) #endif --- head-2010-05-25.orig/arch/x86/kernel/apic/io_apic-xen.c 2010-03-24 16:00:05.000000000 +0100 +++ head-2010-05-25/arch/x86/kernel/apic/io_apic-xen.c 2010-05-12 09:09:25.000000000 +0200 @@ -36,6 +36,7 @@ #include #include #include /* time_after() */ +#include #ifdef CONFIG_ACPI #include #endif @@ -69,9 +70,12 @@ #include /* Fake i8259 */ -#define make_8259A_irq(_irq) (io_apic_irqs &= ~(1UL<<(_irq))) -#define disable_8259A_irq(_irq) ((void)0) -#define i8259A_irq_pending(_irq) (0) +static void make_8259A_irq(unsigned int irq) { io_apic_irqs &= ~(1UL<nr_legacy_irqs) { + nr_irqs_gsi = 0; + io_apic_irqs = ~0UL; + } + cfg = irq_cfgx; count = ARRAY_SIZE(irq_cfgx); node= cpu_to_node(boot_cpu_id); @@ -205,8 +187,14 @@ int __init arch_early_irq_init(void) desc->chip_data = &cfg[i]; zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node); zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node); - if (i < nr_legacy_irqs) - cpumask_setall(cfg[i].domain); + /* + * For legacy IRQ's, start with assigning irq0 to irq15 to + * IRQ0_VECTOR to IRQ15_VECTOR on cpu 0. + */ + if (i < legacy_pic->nr_legacy_irqs) { + cfg[i].vector = IRQ0_VECTOR + i; + cpumask_set_cpu(0, cfg[i].domain); + } } return 0; @@ -451,7 +439,7 @@ static bool io_apic_level_ack_pending(st struct irq_pin_list *entry; unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); for_each_irq_pin(entry, cfg->irq_2_pin) { unsigned int reg; int pin; @@ -460,11 +448,11 @@ static bool io_apic_level_ack_pending(st reg = io_apic_read(entry->apic, 0x10 + pin*2); /* Is the remote IRR bit set? */ if (reg & IO_APIC_REDIR_REMOTE_IRR) { - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); return true; } } - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); return false; } @@ -480,10 +468,10 @@ static struct IO_APIC_route_entry ioapic { union entry_union eu; unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); return eu.entry; } #endif @@ -507,9 +495,9 @@ __ioapic_write_entry(int apic, int pin, void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) { unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); __ioapic_write_entry(apic, pin, e); - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); } #ifndef CONFIG_XEN @@ -523,10 +511,10 @@ static void ioapic_mask_entry(int apic, unsigned long flags; union entry_union eu = { .entry.mask = 1 }; - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(apic, 0x10 + 2*pin, eu.w1); io_apic_write(apic, 0x11 + 2*pin, eu.w2); - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); } /* @@ -653,9 +641,9 @@ static void mask_IO_APIC_irq_desc(struct BUG_ON(!cfg); - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); __mask_IO_APIC_irq(cfg); - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); } static void unmask_IO_APIC_irq_desc(struct irq_desc *desc) @@ -663,9 +651,9 @@ static void unmask_IO_APIC_irq_desc(stru struct irq_cfg *cfg = desc->chip_data; unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); __unmask_IO_APIC_irq(cfg); - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); } static void mask_IO_APIC_irq(unsigned int irq) @@ -922,7 +910,7 @@ static int __init find_isa_irq_apic(int */ static int EISA_ELCR(unsigned int irq) { - if (irq < nr_legacy_irqs) { + if (irq < legacy_pic->nr_legacy_irqs) { unsigned int port = 0x4d0 + (irq >> 3); return (inb(port) >> (irq & 7)) & 1; } @@ -1198,12 +1186,12 @@ void lock_vector_lock(void) /* Used to the online set of cpus does not change * during assign_irq_vector. */ - spin_lock(&vector_lock); + raw_spin_lock(&vector_lock); } void unlock_vector_lock(void) { - spin_unlock(&vector_lock); + raw_spin_unlock(&vector_lock); } static int @@ -1220,7 +1208,8 @@ __assign_irq_vector(int irq, struct irq_ * Also, we've got to be careful not to trash gate * 0x80, because int 0x80 is hm, kind of importantish. ;) */ - static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; + static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START; + static int current_offset = VECTOR_OFFSET_START % 8; unsigned int old_vector; int cpu, err; cpumask_var_t tmp_mask; @@ -1256,7 +1245,7 @@ next: if (vector >= first_system_vector) { /* If out of vectors on large boxen, must share them. */ offset = (offset + 1) % 8; - vector = FIRST_DEVICE_VECTOR + offset; + vector = FIRST_EXTERNAL_VECTOR + offset; } if (unlikely(current_vector == vector)) continue; @@ -1294,9 +1283,9 @@ int assign_irq_vector(int irq, struct ir int err; unsigned long flags; - spin_lock_irqsave(&vector_lock, flags); + raw_spin_lock_irqsave(&vector_lock, flags); err = __assign_irq_vector(irq, cfg, mask); - spin_unlock_irqrestore(&vector_lock, flags); + raw_spin_unlock_irqrestore(&vector_lock, flags); return err; } @@ -1330,14 +1319,27 @@ static void __clear_irq_vector(int irq, void __setup_vector_irq(int cpu) { /* Initialize vector_irq on a new cpu */ - /* This function must be called with vector_lock held */ int irq, vector; struct irq_cfg *cfg; struct irq_desc *desc; + /* + * vector_lock will make sure that we don't run into irq vector + * assignments that might be happening on another cpu in parallel, + * while we setup our initial vector to irq mappings. + */ + raw_spin_lock(&vector_lock); /* Mark the inuse vectors */ for_each_irq_desc(irq, desc) { cfg = desc->chip_data; + + /* + * If it is a legacy IRQ handled by the legacy PIC, this cpu + * will be part of the irq_cfg's domain. + */ + if (irq < legacy_pic->nr_legacy_irqs && !IO_APIC_IRQ(irq)) + cpumask_set_cpu(cpu, cfg->domain); + if (!cpumask_test_cpu(cpu, cfg->domain)) continue; vector = cfg->vector; @@ -1353,6 +1355,7 @@ void __setup_vector_irq(int cpu) if (!cpumask_test_cpu(cpu, cfg->domain)) per_cpu(vector_irq, cpu)[vector] = -1; } + raw_spin_unlock(&vector_lock); } static struct irq_chip ioapic_chip; @@ -1508,6 +1511,16 @@ static void setup_IO_APIC_irq(int apic_i cfg = desc->chip_data; +#ifndef CONFIG_XEN + /* + * For legacy irqs, cfg->domain starts with cpu 0 for legacy + * controllers like 8259. Now that IO-APIC can handle this irq, update + * the cfg->domain. + */ + if (irq < legacy_pic->nr_legacy_irqs && cpumask_test_cpu(0, cfg->domain)) + apic->vector_allocation_domain(0, cfg->domain); +#endif + if (assign_irq_vector(irq, cfg, apic->target_cpus())) return; @@ -1533,8 +1546,10 @@ static void setup_IO_APIC_irq(int apic_i } ioapic_register_intr(irq, desc, trigger); - if (irq < nr_legacy_irqs) - disable_8259A_irq(irq); +#ifndef CONFIG_XEN + if (irq < legacy_pic->nr_legacy_irqs) + legacy_pic->chip->mask(irq); +#endif ioapic_write_entry(apic_id, pin, entry); } @@ -1545,7 +1560,7 @@ static struct { static void __init setup_IO_APIC_irqs(void) { - int apic_id = 0, pin, idx, irq; + int apic_id, pin, idx, irq; int notcon = 0; struct irq_desc *desc; struct irq_cfg *cfg; @@ -1553,14 +1568,7 @@ static void __init setup_IO_APIC_irqs(vo apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); -#ifdef CONFIG_ACPI - if (!acpi_disabled && acpi_ioapic) { - apic_id = mp_find_ioapic(0); - if (apic_id < 0) - apic_id = 0; - } -#endif - + for (apic_id = 0; apic_id < nr_ioapics; apic_id++) for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) { idx = find_irq_entry(apic_id, pin, mp_INT); if (idx == -1) { @@ -1582,6 +1590,9 @@ static void __init setup_IO_APIC_irqs(vo irq = pin_2_irq(idx, apic_id, pin); + if ((apic_id > 0) && (irq > 16)) + continue; + #ifdef CONFIG_XEN if (irq < PIRQ_BASE || irq >= PIRQ_BASE + nr_pirqs) continue; @@ -1615,6 +1626,60 @@ static void __init setup_IO_APIC_irqs(vo " (apicid-pin) not connected\n"); } +/* + * for the gsit that is not in first ioapic + * but could not use acpi_register_gsi() + * like some special sci in IBM x3330 + */ +void setup_IO_APIC_irq_extra(u32 gsi) +{ + int apic_id = 0, pin, idx, irq; + int node = cpu_to_node(boot_cpu_id); + struct irq_desc *desc; + struct irq_cfg *cfg; + + /* + * Convert 'gsi' to 'ioapic.pin'. + */ + apic_id = mp_find_ioapic(gsi); + if (apic_id < 0) + return; + + pin = mp_find_ioapic_pin(apic_id, gsi); + idx = find_irq_entry(apic_id, pin, mp_INT); + if (idx == -1) + return; + + irq = pin_2_irq(idx, apic_id, pin); +#ifdef CONFIG_XEN + if (irq < PIRQ_BASE || irq >= PIRQ_BASE + nr_pirqs) + return; +#endif +#ifdef CONFIG_SPARSE_IRQ + desc = irq_to_desc(irq); + if (desc) + return; +#endif + desc = irq_to_desc_alloc_node(irq, node); + if (!desc) { + printk(KERN_INFO "can not get irq_desc for %d\n", irq); + return; + } + + cfg = desc->chip_data; + add_pin_to_irq_node(cfg, node, apic_id, pin); + + if (test_bit(pin, mp_ioapic_routing[apic_id].pin_programmed)) { + pr_debug("Pin %d-%d already programmed\n", + mp_ioapics[apic_id].apicid, pin); + return; + } + set_bit(pin, mp_ioapic_routing[apic_id].pin_programmed); + + setup_IO_APIC_irq(apic_id, pin, irq, desc, + irq_trigger(idx), irq_polarity(idx)); +} + #ifndef CONFIG_XEN /* * Set up the timer pin, possibly with the 8259A-master behind. @@ -1679,14 +1744,14 @@ __apicdebuginit(void) print_IO_APIC(void for (apic = 0; apic < nr_ioapics; apic++) { - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(apic, 0); reg_01.raw = io_apic_read(apic, 1); if (reg_01.bits.version >= 0x10) reg_02.raw = io_apic_read(apic, 2); if (reg_01.bits.version >= 0x20) reg_03.raw = io_apic_read(apic, 3); - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); printk("\n"); printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid); @@ -1725,7 +1790,7 @@ __apicdebuginit(void) print_IO_APIC(void printk(KERN_DEBUG ".... IRQ redirection table:\n"); printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol" - " Stat Dmod Deli Vect: \n"); + " Stat Dmod Deli Vect:\n"); for (i = 0; i <= reg_01.bits.entries; i++) { struct IO_APIC_route_entry entry; @@ -1903,12 +1968,12 @@ __apicdebuginit(void) print_PIC(void) unsigned int v; unsigned long flags; - if (!nr_legacy_irqs) + if (!legacy_pic->nr_legacy_irqs) return; printk(KERN_DEBUG "\nprinting PIC contents\n"); - spin_lock_irqsave(&i8259A_lock, flags); + raw_spin_lock_irqsave(&i8259A_lock, flags); v = inb(0xa1) << 8 | inb(0x21); printk(KERN_DEBUG "... PIC IMR: %04x\n", v); @@ -1922,7 +1987,7 @@ __apicdebuginit(void) print_PIC(void) outb(0x0a,0xa0); outb(0x0a,0x20); - spin_unlock_irqrestore(&i8259A_lock, flags); + raw_spin_unlock_irqrestore(&i8259A_lock, flags); printk(KERN_DEBUG "... PIC ISR: %04x\n", v); @@ -1984,13 +2049,13 @@ void __init enable_IO_APIC(void) * The number of IO-APIC IRQ registers (== #pins): */ for (apic = 0; apic < nr_ioapics; apic++) { - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); reg_01.raw = io_apic_read(apic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); nr_ioapic_registers[apic] = reg_01.bits.entries+1; } - if (!nr_legacy_irqs) + if (!legacy_pic->nr_legacy_irqs) return; #ifndef CONFIG_XEN @@ -2052,7 +2117,7 @@ void disable_IO_APIC(void) */ clear_IO_APIC(); - if (!nr_legacy_irqs) + if (!legacy_pic->nr_legacy_irqs) return; /* @@ -2131,9 +2196,9 @@ void __init setup_ioapic_ids_from_mpc(vo for (apic_id = 0; apic_id < nr_ioapics; apic_id++) { /* Read the register 0 value */ - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(apic_id, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); old_id = mp_ioapics[apic_id].apicid; @@ -2192,16 +2257,16 @@ void __init setup_ioapic_ids_from_mpc(vo mp_ioapics[apic_id].apicid); reg_00.bits.ID = mp_ioapics[apic_id].apicid; - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(apic_id, 0, reg_00.raw); - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); /* * Sanity check */ - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(apic_id, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); if (reg_00.bits.ID != mp_ioapics[apic_id].apicid) printk("could not set ID!\n"); else @@ -2284,15 +2349,15 @@ static unsigned int startup_ioapic_irq(u unsigned long flags; struct irq_cfg *cfg; - spin_lock_irqsave(&ioapic_lock, flags); - if (irq < nr_legacy_irqs) { - disable_8259A_irq(irq); - if (i8259A_irq_pending(irq)) + raw_spin_lock_irqsave(&ioapic_lock, flags); + if (irq < legacy_pic->nr_legacy_irqs) { + legacy_pic->chip->mask(irq); + if (legacy_pic->irq_pending(irq)) was_pending = 1; } cfg = irq_cfg(irq); __unmask_IO_APIC_irq(cfg); - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); return was_pending; } @@ -2303,9 +2368,9 @@ static int ioapic_retrigger_irq(unsigned struct irq_cfg *cfg = irq_cfg(irq); unsigned long flags; - spin_lock_irqsave(&vector_lock, flags); + raw_spin_lock_irqsave(&vector_lock, flags); apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector); - spin_unlock_irqrestore(&vector_lock, flags); + raw_spin_unlock_irqrestore(&vector_lock, flags); return 1; } @@ -2398,14 +2463,14 @@ set_ioapic_affinity_irq_desc(struct irq_ irq = desc->irq; cfg = desc->chip_data; - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); ret = set_desc_affinity(desc, mask, &dest); if (!ret) { /* Only the high 8 bits are valid. */ dest = SET_APIC_LOGICAL_ID(dest); __target_IO_APIC_irq(irq, dest, cfg); } - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); return ret; } @@ -2575,6 +2640,9 @@ void irq_force_complete_move(int irq) struct irq_desc *desc = irq_to_desc(irq); struct irq_cfg *cfg = desc->chip_data; + if (!cfg) + return; + __irq_complete_move(&desc, cfg->vector); } #else @@ -2640,9 +2708,9 @@ static void eoi_ioapic_irq(struct irq_de irq = desc->irq; cfg = desc->chip_data; - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); __eoi_ioapic_irq(irq, cfg); - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); } static void ack_apic_level(unsigned int irq) @@ -2825,8 +2893,8 @@ static inline void init_IO_APIC_traps(vo * so default to an old-fashioned 8259 * interrupt if we can.. */ - if (irq < nr_legacy_irqs) - make_8259A_irq(irq); + if (irq < legacy_pic->nr_legacy_irqs) + legacy_pic->make_irq(irq); else /* Strange. Oh, well.. */ desc->chip = &no_irq_chip; @@ -2984,7 +3052,7 @@ static inline void __init check_timer(vo /* * get/set the timer IRQ vector: */ - disable_8259A_irq(0); + legacy_pic->chip->mask(0); assign_irq_vector(0, cfg, apic->target_cpus()); /* @@ -2997,7 +3065,7 @@ static inline void __init check_timer(vo * automatically. */ apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); - init_8259A(1); + legacy_pic->init(1); #ifdef CONFIG_X86_32 { unsigned int ver; @@ -3056,7 +3124,7 @@ static inline void __init check_timer(vo if (timer_irq_works()) { if (nmi_watchdog == NMI_IO_APIC) { setup_nmi(); - enable_8259A_irq(0); + legacy_pic->chip->unmask(0); } if (disable_timer_pin_1 > 0) clear_IO_APIC_pin(0, pin1); @@ -3079,14 +3147,14 @@ static inline void __init check_timer(vo */ replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2); setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); - enable_8259A_irq(0); + legacy_pic->chip->unmask(0); if (timer_irq_works()) { apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); timer_through_8259 = 1; if (nmi_watchdog == NMI_IO_APIC) { - disable_8259A_irq(0); + legacy_pic->chip->mask(0); setup_nmi(); - enable_8259A_irq(0); + legacy_pic->chip->unmask(0); } goto out; } @@ -3094,7 +3162,7 @@ static inline void __init check_timer(vo * Cleanup, just in case ... */ local_irq_disable(); - disable_8259A_irq(0); + legacy_pic->chip->mask(0); clear_IO_APIC_pin(apic2, pin2); apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); } @@ -3113,22 +3181,22 @@ static inline void __init check_timer(vo lapic_register_intr(0, desc); apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ - enable_8259A_irq(0); + legacy_pic->chip->unmask(0); if (timer_irq_works()) { apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); goto out; } local_irq_disable(); - disable_8259A_irq(0); + legacy_pic->chip->mask(0); apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer as ExtINT IRQ...\n"); - init_8259A(0); - make_8259A_irq(0); + legacy_pic->init(0); + legacy_pic->make_irq(0); apic_write(APIC_LVT0, APIC_DM_EXTINT); unlock_ExtINT_logic(); @@ -3177,7 +3245,7 @@ void __init setup_IO_APIC(void) * calling enable_IO_APIC() is moved to setup_local_APIC for BP */ #endif - io_apic_irqs = nr_legacy_irqs ? ~PIC_IRQS : ~0UL; + io_apic_irqs = legacy_pic->nr_legacy_irqs ? ~PIC_IRQS : ~0UL; apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); /* @@ -3190,7 +3258,7 @@ void __init setup_IO_APIC(void) #endif setup_IO_APIC_irqs(); init_IO_APIC_traps(); - if (nr_legacy_irqs) + if (legacy_pic->nr_legacy_irqs) check_timer(); } @@ -3248,13 +3316,13 @@ static int ioapic_resume(struct sys_devi data = container_of(dev, struct sysfs_ioapic_data, dev); entry = data->entry; - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(dev->id, 0); if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) { reg_00.bits.ID = mp_ioapics[dev->id].apicid; io_apic_write(dev->id, 0, reg_00.raw); } - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); for (i = 0; i < nr_ioapic_registers[dev->id]; i++) ioapic_write_entry(dev->id, i, entry[i]); @@ -3317,7 +3385,7 @@ unsigned int create_irq_nr(unsigned int if (irq_want < nr_irqs_gsi) irq_want = nr_irqs_gsi; - spin_lock_irqsave(&vector_lock, flags); + raw_spin_lock_irqsave(&vector_lock, flags); for (new = irq_want; new < nr_irqs; new++) { desc_new = irq_to_desc_alloc_node(new, node); if (!desc_new) { @@ -3336,14 +3404,11 @@ unsigned int create_irq_nr(unsigned int irq = new; break; } - spin_unlock_irqrestore(&vector_lock, flags); + raw_spin_unlock_irqrestore(&vector_lock, flags); + + if (irq > 0) + dynamic_irq_init_keep_chip_data(irq); - if (irq > 0) { - dynamic_irq_init(irq); - /* restore it, in case dynamic_irq_init clear it */ - if (desc_new) - desc_new->chip_data = cfg_new; - } return irq; } @@ -3365,20 +3430,13 @@ int create_irq(void) void destroy_irq(unsigned int irq) { unsigned long flags; - struct irq_cfg *cfg; - struct irq_desc *desc; - /* store it, in case dynamic_irq_cleanup clear it */ - desc = irq_to_desc(irq); - cfg = desc->chip_data; - dynamic_irq_cleanup(irq); - /* connect back irq_cfg */ - desc->chip_data = cfg; + dynamic_irq_cleanup_keep_chip_data(irq); free_irte(irq); - spin_lock_irqsave(&vector_lock, flags); - __clear_irq_vector(irq, cfg); - spin_unlock_irqrestore(&vector_lock, flags); + raw_spin_lock_irqsave(&vector_lock, flags); + __clear_irq_vector(irq, get_irq_chip_data(irq)); + raw_spin_unlock_irqrestore(&vector_lock, flags); } #endif /* CONFIG_XEN */ @@ -3916,9 +3974,9 @@ int __init io_apic_get_redir_entries (in union IO_APIC_reg_01 reg_01; unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); reg_01.raw = io_apic_read(ioapic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); return reg_01.bits.entries; } @@ -4010,7 +4068,7 @@ static int __io_apic_set_pci_routing(str /* * IRQs < 16 are already in the irq_2_pin[] map */ - if (irq >= nr_legacy_irqs) { + if (irq >= legacy_pic->nr_legacy_irqs) { cfg = desc->chip_data; if (add_pin_to_irq_node_nopanic(cfg, node, ioapic, pin)) { printk(KERN_INFO "can not add pin %d for irq %d\n", @@ -4090,9 +4148,9 @@ int __init io_apic_get_unique_id(int ioa if (physids_empty(apic_id_map)) apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map); - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); reg_00.raw = io_apic_read(ioapic, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); if (apic_id >= get_physical_broadcast()) { printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying " @@ -4126,10 +4184,10 @@ int __init io_apic_get_unique_id(int ioa if (reg_00.bits.ID != apic_id) { reg_00.bits.ID = apic_id; - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); io_apic_write(ioapic, 0, reg_00.raw); reg_00.raw = io_apic_read(ioapic, 0); - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); /* Sanity check */ if (reg_00.bits.ID != apic_id) { @@ -4151,9 +4209,9 @@ int __init io_apic_get_version(int ioapi union IO_APIC_reg_01 reg_01; unsigned long flags; - spin_lock_irqsave(&ioapic_lock, flags); + raw_spin_lock_irqsave(&ioapic_lock, flags); reg_01.raw = io_apic_read(ioapic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); + raw_spin_unlock_irqrestore(&ioapic_lock, flags); return reg_01.bits.version; } @@ -4186,27 +4244,23 @@ int acpi_get_override_irq(int bus_irq, i #ifdef CONFIG_SMP void __init setup_ioapic_dest(void) { - int pin, ioapic = 0, irq, irq_entry; + int pin, ioapic, irq, irq_entry; struct irq_desc *desc; const struct cpumask *mask; if (skip_ioapic_setup == 1) return; -#ifdef CONFIG_ACPI - if (!acpi_disabled && acpi_ioapic) { - ioapic = mp_find_ioapic(0); - if (ioapic < 0) - ioapic = 0; - } -#endif - + for (ioapic = 0; ioapic < nr_ioapics; ioapic++) for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { irq_entry = find_irq_entry(ioapic, pin, mp_INT); if (irq_entry == -1) continue; irq = pin_2_irq(irq_entry, ioapic, pin); + if ((ioapic > 0) && (irq > 16)) + continue; + desc = irq_to_desc(irq); /* @@ -4394,3 +4448,26 @@ void __init mp_register_ioapic(int id, u nr_ioapics++; } + +#ifdef CONFIG_X86_MRST +/* Enable IOAPIC early just for system timer */ +void __init pre_init_apic_IRQ0(void) +{ + struct irq_cfg *cfg; + struct irq_desc *desc; + + printk(KERN_INFO "Early APIC setup for system timer0\n"); +#ifndef CONFIG_SMP + phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid); +#endif + desc = irq_to_desc_alloc_node(0, 0); + + setup_local_APIC(); + + cfg = irq_cfg(0); + add_pin_to_irq_node(cfg, 0, 0, 0); + set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge"); + + setup_IO_APIC_irq(0, 0, 0, desc, 0, 0); +} +#endif --- head-2010-05-25.orig/arch/x86/kernel/cpu/intel_cacheinfo.c 2010-05-25 09:20:14.000000000 +0200 +++ head-2010-05-25/arch/x86/kernel/cpu/intel_cacheinfo.c 2010-05-25 09:25:34.000000000 +0200 @@ -301,7 +301,7 @@ struct _cache_attr { ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count); }; -#ifdef CONFIG_CPU_SUP_AMD +#if defined(CONFIG_CPU_SUP_AMD) && !defined(CONFIG_XEN) static unsigned int __cpuinit amd_calc_l3_indices(void) { /* @@ -873,7 +873,7 @@ static struct attribute *default_attrs[] static struct attribute *default_l3_attrs[] = { DEFAULT_SYSFS_CACHE_ATTRS, -#ifdef CONFIG_CPU_SUP_AMD +#if defined(CONFIG_CPU_SUP_AMD) && !defined(CONFIG_XEN) &cache_disable_0.attr, &cache_disable_1.attr, #endif --- head-2010-05-25.orig/arch/x86/kernel/cpu/mcheck/mce_dom0.c 2010-03-24 15:25:21.000000000 +0100 +++ head-2010-05-25/arch/x86/kernel/cpu/mcheck/mce_dom0.c 2010-04-15 13:39:58.000000000 +0200 @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include --- head-2010-05-25.orig/arch/x86/kernel/cpu/mtrr/main-xen.c 2010-03-24 15:32:27.000000000 +0100 +++ head-2010-05-25/arch/x86/kernel/cpu/mtrr/main-xen.c 2010-03-25 11:33:03.000000000 +0100 @@ -25,12 +25,12 @@ void generic_get_mtrr(unsigned int reg, *type = op.u.read_memtype.type; } -struct mtrr_ops generic_mtrr_ops = { +const struct mtrr_ops generic_mtrr_ops = { .use_intel_if = 1, .get = generic_get_mtrr, }; -struct mtrr_ops *mtrr_if = &generic_mtrr_ops; +const struct mtrr_ops *mtrr_if = &generic_mtrr_ops; unsigned int num_var_ranges; unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; --- head-2010-05-25.orig/arch/x86/kernel/e820-xen.c 2010-03-24 16:00:05.000000000 +0100 +++ head-2010-05-25/arch/x86/kernel/e820-xen.c 2010-04-15 10:48:32.000000000 +0200 @@ -12,17 +12,10 @@ #include #include #include -#include -#include -#include -#include -#include #include #include #include -#include -#include #include #include #include @@ -550,31 +543,55 @@ u64 __init e820_remove_range(u64 start, int checktype) { int i; + u64 end; u64 real_removed_size = 0; if (size > (ULLONG_MAX - start)) size = ULLONG_MAX - start; + end = start + size; + printk(KERN_DEBUG "e820 remove range: %016Lx - %016Lx ", + (unsigned long long) start, + (unsigned long long) end); + if (checktype) + e820_print_type(old_type); + printk(KERN_CONT "\n"); + for (i = 0; i < e820.nr_map; i++) { struct e820entry *ei = &e820.map[i]; u64 final_start, final_end; + u64 ei_end; if (checktype && ei->type != old_type) continue; + + ei_end = ei->addr + ei->size; /* totally covered? */ - if (ei->addr >= start && - (ei->addr + ei->size) <= (start + size)) { + if (ei->addr >= start && ei_end <= end) { real_removed_size += ei->size; memset(ei, 0, sizeof(struct e820entry)); continue; } + + /* new range is totally covered? */ + if (ei->addr < start && ei_end > end) { + e820_add_region(end, ei_end - end, ei->type); + ei->size = start - ei->addr; + real_removed_size += size; + continue; + } + /* partially covered */ final_start = max(start, ei->addr); - final_end = min(start + size, ei->addr + ei->size); + final_end = min(end, ei_end); if (final_start >= final_end) continue; real_removed_size += final_end - final_start; + /* + * left range could be head or tail, so need to update + * size at first. + */ ei->size -= final_end - final_start; if (ei->addr < final_start) continue; @@ -769,320 +786,44 @@ core_initcall(e820_mark_nvs_memory); #endif /* - * Early reserved memory areas. - */ -#define MAX_EARLY_RES 32 - -struct early_res { - u64 start, end; - char name[16]; - char overlap_ok; -}; -static struct early_res early_res[MAX_EARLY_RES] __initdata = { -#ifndef CONFIG_XEN - { 0, PAGE_SIZE, "BIOS data page", 1 }, /* BIOS data page */ -#if defined(CONFIG_X86_32) && defined(CONFIG_X86_TRAMPOLINE) - /* - * But first pinch a few for the stack/trampoline stuff - * FIXME: Don't need the extra page at 4K, but need to fix - * trampoline before removing it. (see the GDT stuff) - */ - { PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE", 1 }, -#endif -#endif - {} -}; - -static int __init find_overlapped_early(u64 start, u64 end) -{ - int i; - struct early_res *r; - - for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { - r = &early_res[i]; - if (end > r->start && start < r->end) - break; - } - - return i; -} - -/* - * Drop the i-th range from the early reservation map, - * by copying any higher ranges down one over it, and - * clearing what had been the last slot. - */ -static void __init drop_range(int i) -{ - int j; - - for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++) - ; - - memmove(&early_res[i], &early_res[i + 1], - (j - 1 - i) * sizeof(struct early_res)); - - early_res[j - 1].end = 0; -} - -/* - * Split any existing ranges that: - * 1) are marked 'overlap_ok', and - * 2) overlap with the stated range [start, end) - * into whatever portion (if any) of the existing range is entirely - * below or entirely above the stated range. Drop the portion - * of the existing range that overlaps with the stated range, - * which will allow the caller of this routine to then add that - * stated range without conflicting with any existing range. + * Find a free area with specified alignment in a specific range. */ -static void __init drop_overlaps_that_are_ok(u64 start, u64 end) +u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align) { int i; - struct early_res *r; - u64 lower_start, lower_end; - u64 upper_start, upper_end; - char name[16]; - for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { - r = &early_res[i]; + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + u64 addr; + u64 ei_start, ei_last; - /* Continue past non-overlapping ranges */ - if (end <= r->start || start >= r->end) + if (ei->type != E820_RAM) continue; - /* - * Leave non-ok overlaps as is; let caller - * panic "Overlapping early reservations" - * when it hits this overlap. - */ - if (!r->overlap_ok) - return; - - /* - * We have an ok overlap. We will drop it from the early - * reservation map, and add back in any non-overlapping - * portions (lower or upper) as separate, overlap_ok, - * non-overlapping ranges. - */ - - /* 1. Note any non-overlapping (lower or upper) ranges. */ - strncpy(name, r->name, sizeof(name) - 1); - - lower_start = lower_end = 0; - upper_start = upper_end = 0; - if (r->start < start) { - lower_start = r->start; - lower_end = start; - } - if (r->end > end) { - upper_start = end; - upper_end = r->end; - } - - /* 2. Drop the original ok overlapping range */ - drop_range(i); - - i--; /* resume for-loop on copied down entry */ - - /* 3. Add back in any non-overlapping ranges. */ - if (lower_end) - reserve_early_overlap_ok(lower_start, lower_end, name); - if (upper_end) - reserve_early_overlap_ok(upper_start, upper_end, name); - } -} - -static void __init __reserve_early(u64 start, u64 end, char *name, - int overlap_ok) -{ - int i; - struct early_res *r; - - i = find_overlapped_early(start, end); - if (i >= MAX_EARLY_RES) - panic("Too many early reservations"); - r = &early_res[i]; - if (r->end) - panic("Overlapping early reservations " - "%llx-%llx %s to %llx-%llx %s\n", - start, end - 1, name?name:"", r->start, - r->end - 1, r->name); - r->start = start; - r->end = end; - r->overlap_ok = overlap_ok; - if (name) - strncpy(r->name, name, sizeof(r->name) - 1); -} - -/* - * A few early reservtations come here. - * - * The 'overlap_ok' in the name of this routine does -not- mean it - * is ok for these reservations to overlap an earlier reservation. - * Rather it means that it is ok for subsequent reservations to - * overlap this one. - * - * Use this entry point to reserve early ranges when you are doing - * so out of "Paranoia", reserving perhaps more memory than you need, - * just in case, and don't mind a subsequent overlapping reservation - * that is known to be needed. - * - * The drop_overlaps_that_are_ok() call here isn't really needed. - * It would be needed if we had two colliding 'overlap_ok' - * reservations, so that the second such would not panic on the - * overlap with the first. We don't have any such as of this - * writing, but might as well tolerate such if it happens in - * the future. - */ -void __init reserve_early_overlap_ok(u64 start, u64 end, char *name) -{ - drop_overlaps_that_are_ok(start, end); - __reserve_early(start, end, name, 1); -} - -/* - * Most early reservations come here. - * - * We first have drop_overlaps_that_are_ok() drop any pre-existing - * 'overlap_ok' ranges, so that we can then reserve this memory - * range without risk of panic'ing on an overlapping overlap_ok - * early reservation. - */ -void __init reserve_early(u64 start, u64 end, char *name) -{ - if (start >= end) - return; - - drop_overlaps_that_are_ok(start, end); - __reserve_early(start, end, name, 0); -} - -void __init free_early(u64 start, u64 end) -{ - struct early_res *r; - int i; - - i = find_overlapped_early(start, end); - r = &early_res[i]; - if (i >= MAX_EARLY_RES || r->end != end || r->start != start) - panic("free_early on not reserved area: %llx-%llx!", - start, end - 1); - - drop_range(i); -} - -void __init early_res_to_bootmem(u64 start, u64 end) -{ - int i, count; - u64 final_start, final_end; - - count = 0; - for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) - count++; - - printk(KERN_INFO "(%d early reservations) ==> bootmem [%010llx - %010llx]\n", - count, start, end); - for (i = 0; i < count; i++) { - struct early_res *r = &early_res[i]; - printk(KERN_INFO " #%d [%010llx - %010llx] %16s", i, - r->start, r->end, r->name); - final_start = max(start, r->start); - final_end = min(end, r->end); - if (final_start >= final_end) { - printk(KERN_CONT "\n"); - continue; - } - printk(KERN_CONT " ==> [%010llx - %010llx]\n", - final_start, final_end); - reserve_bootmem_generic(final_start, final_end - final_start, - BOOTMEM_DEFAULT); - } -} + ei_last = ei->addr + ei->size; + ei_start = ei->addr; + addr = find_early_area(ei_start, ei_last, start, end, + size, align); -/* Check for already reserved areas */ -static inline int __init bad_addr(u64 *addrp, u64 size, u64 align) -{ - int i; - u64 addr = *addrp; - int changed = 0; - struct early_res *r; -again: - i = find_overlapped_early(addr, addr + size); - r = &early_res[i]; - if (i < MAX_EARLY_RES && r->end) { - *addrp = addr = round_up(r->end, align); - changed = 1; - goto again; + if (addr != -1ULL) + return addr; } - return changed; + return -1ULL; } -/* Check for already reserved areas */ -static inline int __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align) +u64 __init find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align) { - int i; - u64 addr = *addrp, last; - u64 size = *sizep; - int changed = 0; -again: - last = addr + size; - for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) { - struct early_res *r = &early_res[i]; - if (last > r->start && addr < r->start) { - size = r->start - addr; - changed = 1; - goto again; - } - if (last > r->end && addr < r->end) { - addr = round_up(r->end, align); - size = last - addr; - changed = 1; - goto again; - } - if (last <= r->end && addr >= r->start) { - (*sizep)++; - return 0; - } - } - if (changed) { - *addrp = addr; - *sizep = size; - } - return changed; + return find_e820_area(start, end, size, align); } -/* - * Find a free area with specified alignment in a specific range. - */ -u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align) +u64 __init get_max_mapped(void) { - int i; + u64 end = max_pfn_mapped; - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - u64 addr, last; - u64 ei_last; + end <<= PAGE_SHIFT; - if (ei->type != E820_RAM) - continue; - addr = round_up(ei->addr, align); - ei_last = ei->addr + ei->size; - if (addr < start) - addr = round_up(start, align); - if (addr >= ei_last) - continue; - while (bad_addr(&addr, size, align) && addr+size <= ei_last) - ; - last = addr + size; - if (last > ei_last) - continue; - if (last > end) - continue; - return addr; - } - return -1ULL; + return end; } - /* * Find next free range after *start */ @@ -1092,25 +833,19 @@ u64 __init find_e820_area_size(u64 start for (i = 0; i < e820.nr_map; i++) { struct e820entry *ei = &e820.map[i]; - u64 addr, last; - u64 ei_last; + u64 addr; + u64 ei_start, ei_last; if (ei->type != E820_RAM) continue; - addr = round_up(ei->addr, align); + ei_last = ei->addr + ei->size; - if (addr < start) - addr = round_up(start, align); - if (addr >= ei_last) - continue; - *sizep = ei_last - addr; - while (bad_addr_size(&addr, sizep, align) && - addr + *sizep <= ei_last) - ; - last = addr + *sizep; - if (last > ei_last) - continue; - return addr; + ei_start = ei->addr; + addr = find_early_area_size(ei_start, ei_last, start, + sizep, align); + + if (addr != -1ULL) + return addr; } return -1ULL; @@ -1544,6 +1279,8 @@ void __init e820_reserve_resources_late( end = MAX_RESOURCE_SIZE; if (start >= end) continue; + printk(KERN_DEBUG "reserve RAM buffer: %016llx - %016llx ", + start, end); reserve_region_with_split(&iomem_resource, start, end, "RAM buffer"); } --- head-2010-05-25.orig/arch/x86/kernel/head32-xen.c 2010-03-24 16:00:05.000000000 +0100 +++ head-2010-05-25/arch/x86/kernel/head32-xen.c 2010-04-15 10:29:09.000000000 +0200 @@ -7,6 +7,7 @@ #include #include +#include #include #include @@ -32,15 +33,26 @@ static void __init i386_default_early_se void __init i386_start_kernel(void) { +#ifdef CONFIG_X86_TRAMPOLINE + /* + * But first pinch a few for the stack/trampoline stuff + * FIXME: Don't need the extra page at 4K, but need to fix + * trampoline before removing it. (see the GDT stuff) + */ + reserve_early_overlap_ok(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, + "EX TRAMPOLINE"); +#endif + reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); #ifndef CONFIG_XEN #ifdef CONFIG_BLK_DEV_INITRD /* Reserve INITRD */ if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { + /* Assume only end is not page aligned */ u64 ramdisk_image = boot_params.hdr.ramdisk_image; u64 ramdisk_size = boot_params.hdr.ramdisk_size; - u64 ramdisk_end = ramdisk_image + ramdisk_size; + u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); } #endif --- head-2010-05-25.orig/arch/x86/kernel/head_32-xen.S 2010-03-24 15:32:27.000000000 +0100 +++ head-2010-05-25/arch/x86/kernel/head_32-xen.S 2010-03-25 11:52:54.000000000 +0100 @@ -67,8 +67,8 @@ ENTRY(startup_32) * The linker can't handle this by relocation. Manually set * base address in stack canary segment descriptor. */ - movl $per_cpu__gdt_page,%eax - movl $per_cpu__stack_canary,%ecx + movl $gdt_page,%eax + movl $stack_canary,%ecx movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax) shrl $16, %ecx movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax) @@ -79,7 +79,7 @@ ENTRY(startup_32) # need to be preserved. movl XEN_START_mfn_list(%esi), %ebx - movl $(per_cpu__gdt_page - __PAGE_OFFSET), %eax + movl $(gdt_page - __PAGE_OFFSET), %eax shrl $PAGE_SHIFT, %eax movl (%ebx,%eax,4), %ecx pushl %ecx # frame number for set_gdt below @@ -89,7 +89,7 @@ ENTRY(startup_32) shldl $PAGE_SHIFT, %ecx, %edx shll $PAGE_SHIFT, %ecx orl $_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_DIRTY, %ecx - movl $per_cpu__gdt_page, %ebx + movl $gdt_page, %ebx movl $__HYPERVISOR_update_va_mapping, %eax int $0x82 --- head-2010-05-25.orig/arch/x86/kernel/ldt-xen.c 2010-03-24 15:32:27.000000000 +0100 +++ head-2010-05-25/arch/x86/kernel/ldt-xen.c 2010-04-15 10:48:32.000000000 +0200 @@ -7,6 +7,7 @@ */ #include +#include #include #include #include --- head-2010-05-25.orig/arch/x86/kernel/mpparse-xen.c 2010-03-24 16:00:05.000000000 +0100 +++ head-2010-05-25/arch/x86/kernel/mpparse-xen.c 2010-04-15 10:48:32.000000000 +0200 @@ -677,7 +677,7 @@ static void __init smp_reserve_memory(st { unsigned long size = get_mpc_size(mpf->physptr); - reserve_early(mpf->physptr, mpf->physptr+size, "MP-table mpc"); + reserve_early_overlap_ok(mpf->physptr, mpf->physptr+size, "MP-table mpc"); } #endif @@ -710,7 +710,7 @@ static int __init smp_scan_config(unsign mpf, (u64)virt_to_phys(mpf)); mem = virt_to_phys(mpf); - reserve_early(mem, mem + sizeof(*mpf), "MP-table mpf"); + reserve_early_overlap_ok(mem, mem + sizeof(*mpf), "MP-table mpf"); if (mpf->physptr) smp_reserve_memory(mpf); #else --- head-2010-05-25.orig/arch/x86/kernel/pci-dma-xen.c 2010-03-24 16:00:05.000000000 +0100 +++ head-2010-05-25/arch/x86/kernel/pci-dma-xen.c 2010-04-15 10:48:32.000000000 +0200 @@ -2,6 +2,7 @@ #include #include #include +#include #include #include @@ -38,7 +39,7 @@ int iommu_detected __read_mostly = 0; * This variable becomes 1 if iommu=pt is passed on the kernel command line. * If this variable is 1, IOMMU implementations do no DMA translation for * devices and allow every device to access to whole physical memory. This is - * useful if a user want to use an IOMMU only for KVM device assignment to + * useful if a user wants to use an IOMMU only for KVM device assignment to * guests and not for driver dma translation. */ int iommu_pass_through __read_mostly; @@ -65,7 +66,7 @@ int dma_set_mask(struct device *dev, u64 } EXPORT_SYMBOL(dma_set_mask); -#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN) +#if defined(CONFIG_X86_64) && !defined(CONFIG_NUMA) && !defined(CONFIG_XEN) static __initdata void *dma32_bootmem_ptr; static unsigned long dma32_bootmem_size __initdata = (128ULL<<20); @@ -116,6 +117,14 @@ static void __init dma32_free_bootmem(vo dma32_bootmem_ptr = NULL; dma32_bootmem_size = 0; } +#else +void __init dma32_reserve_bootmem(void) +{ +} +static void __init dma32_free_bootmem(void) +{ +} + #endif static struct dma_map_ops swiotlb_dma_ops = { @@ -137,10 +146,9 @@ static struct dma_map_ops swiotlb_dma_op void __init pci_iommu_alloc(void) { -#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN) /* free the range so iommu could get some range less than 4G */ dma32_free_bootmem(); -#endif + if (pci_swiotlb_detect()) goto out; --- head-2010-05-25.orig/arch/x86/kernel/process-xen.c 2010-03-24 16:00:05.000000000 +0100 +++ head-2010-05-25/arch/x86/kernel/process-xen.c 2010-05-25 09:30:59.000000000 +0200 @@ -94,6 +94,13 @@ void exit_thread(void) } } +void show_regs(struct pt_regs *regs) +{ + show_registers(regs); + show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs), + regs->bp); +} + void show_regs_common(void) { const char *board, *product; @@ -503,21 +510,39 @@ static int __cpuinit mwait_usable(const } /* - * Check for AMD CPUs, which have potentially C1E support + * Check for AMD CPUs, where APIC timer interrupt does not wake up CPU from C1e. + * For more information see + * - Erratum #400 for NPT family 0xf and family 0x10 CPUs + * - Erratum #365 for family 0x11 (not affected because C1e not in use) */ static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c) { + u64 val; if (c->x86_vendor != X86_VENDOR_AMD) - return 0; - - if (c->x86 < 0x0F) - return 0; + goto no_c1e_idle; /* Family 0x0f models < rev F do not have C1E */ - if (c->x86 == 0x0f && c->x86_model < 0x40) - return 0; + if (c->x86 == 0x0F && c->x86_model >= 0x40) + return 1; - return 1; + if (c->x86 == 0x10) { + /* + * check OSVW bit for CPUs that are not affected + * by erratum #400 + */ + if (cpu_has(c, X86_FEATURE_OSVW)) { + rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, val); + if (val >= 2) { + rdmsrl(MSR_AMD64_OSVW_STATUS, val); + if (!(val & BIT(1))) + goto no_c1e_idle; + } + } + return 1; + } + +no_c1e_idle: + return 0; } static cpumask_var_t c1e_mask; @@ -586,7 +611,7 @@ void __cpuinit select_idle_routine(const #ifndef CONFIG_XEN #ifdef CONFIG_SMP if (pm_idle == poll_idle && smp_num_siblings > 1) { - printk(KERN_WARNING "WARNING: polling idle and HT enabled," + printk_once(KERN_WARNING "WARNING: polling idle and HT enabled," " performance may degrade.\n"); } #endif --- head-2010-05-25.orig/arch/x86/kernel/process_32-xen.c 2010-03-24 16:00:05.000000000 +0100 +++ head-2010-05-25/arch/x86/kernel/process_32-xen.c 2010-03-25 10:38:31.000000000 +0100 @@ -179,12 +179,6 @@ void __show_regs(struct pt_regs *regs, i d6, d7); } -void show_regs(struct pt_regs *regs) -{ - show_registers(regs); - show_trace(NULL, regs, ®s->sp, regs->bp); -} - void release_thread(struct task_struct *dead_task) { BUG_ON(dead_task->mm); --- head-2010-05-25.orig/arch/x86/kernel/process_64-xen.c 2010-03-24 16:00:05.000000000 +0100 +++ head-2010-05-25/arch/x86/kernel/process_64-xen.c 2010-05-12 09:09:00.000000000 +0200 @@ -219,12 +219,6 @@ void __show_regs(struct pt_regs *regs, i printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7); } -void show_regs(struct pt_regs *regs) -{ - show_registers(regs); - show_trace(NULL, regs, (void *)(regs + 1), regs->bp); -} - void xen_load_gs_index(unsigned gs) { WARN_ON(HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, gs)); @@ -295,12 +289,12 @@ int copy_thread(unsigned long clone_flag set_tsk_thread_flag(p, TIF_FORK); - p->thread.fs = me->thread.fs; - p->thread.gs = me->thread.gs; p->thread.io_bitmap_ptr = NULL; savesegment(gs, p->thread.gsindex); + p->thread.gs = p->thread.gsindex ? 0 : me->thread.gs; savesegment(fs, p->thread.fsindex); + p->thread.fs = p->thread.fsindex ? 0 : me->thread.fs; savesegment(es, p->thread.es); savesegment(ds, p->thread.ds); --- head-2010-05-25.orig/arch/x86/kernel/setup-xen.c 2010-03-24 16:00:05.000000000 +0100 +++ head-2010-05-25/arch/x86/kernel/setup-xen.c 2010-04-15 10:48:32.000000000 +0200 @@ -55,7 +55,6 @@ #include #include #include -#include #include #include @@ -151,7 +150,9 @@ EXPORT_SYMBOL(xen_start_info); unsigned long max_low_pfn_mapped; unsigned long max_pfn_mapped; +#ifdef CONFIG_DMI RESERVE_BRK(dmi_alloc, 65536); +#endif unsigned int boot_cpu_id __read_mostly; @@ -348,15 +349,17 @@ static void __init reserve_brk(void) static void __init relocate_initrd(void) { #ifndef CONFIG_XEN + /* Assume only end is not page aligned */ u64 ramdisk_image = boot_params.hdr.ramdisk_image; u64 ramdisk_size = boot_params.hdr.ramdisk_size; + u64 area_size = PAGE_ALIGN(ramdisk_size); u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT; u64 ramdisk_here; unsigned long slop, clen, mapaddr; char *p, *q; /* We need to move the initrd down into lowmem */ - ramdisk_here = find_e820_area(0, end_of_lowmem, ramdisk_size, + ramdisk_here = find_e820_area(0, end_of_lowmem, area_size, PAGE_SIZE); if (ramdisk_here == -1ULL) @@ -365,7 +368,7 @@ static void __init relocate_initrd(void) /* Note: this includes all the lowmem currently occupied by the initrd, we rely on that fact to keep the data intact. */ - reserve_early(ramdisk_here, ramdisk_here + ramdisk_size, + reserve_early(ramdisk_here, ramdisk_here + area_size, "NEW RAMDISK"); initrd_start = ramdisk_here + PAGE_OFFSET; initrd_end = initrd_start + ramdisk_size; @@ -416,10 +419,11 @@ static void __init relocate_initrd(void) static void __init reserve_initrd(void) { + /* Assume only end is not page aligned */ #ifndef CONFIG_XEN u64 ramdisk_image = boot_params.hdr.ramdisk_image; u64 ramdisk_size = boot_params.hdr.ramdisk_size; - u64 ramdisk_end = ramdisk_image + ramdisk_size; + u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT; if (!boot_params.hdr.type_of_loader || @@ -428,7 +432,7 @@ static void __init reserve_initrd(void) #else unsigned long ramdisk_image = __pa(xen_start_info->mod_start); unsigned long ramdisk_size = xen_start_info->mod_len; - unsigned long ramdisk_end = ramdisk_image + ramdisk_size; + unsigned long ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); unsigned long end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT; if (!xen_start_info->mod_start || !ramdisk_size) @@ -671,6 +675,18 @@ static int __init setup_elfcorehdr(char early_param("elfcorehdr", setup_elfcorehdr); #endif +static __init void reserve_ibft_region(void) +{ + unsigned long addr, size = 0; + + addr = find_ibft_region(&size); + +#ifndef CONFIG_XEN + if (size) + reserve_early_overlap_ok(addr, addr + size, "ibft"); +#endif +} + #ifdef CONFIG_X86_RESERVE_LOW_64K static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) { @@ -734,6 +750,25 @@ static struct dmi_system_id __initdata b {} }; +#ifndef CONFIG_XEN +static void __init trim_bios_range(void) +{ + /* + * A special case is the first 4Kb of memory; + * This is a BIOS owned area, not kernel ram, but generally + * not listed as such in the E820 table. + */ + e820_update_range(0, PAGE_SIZE, E820_RAM, E820_RESERVED); + /* + * special case: Some BIOSen report the PC BIOS + * area (640->1Mb) as ram even though it is not. + * take them out. + */ + e820_remove_range(BIOS_BEGIN, BIOS_END - BIOS_BEGIN, E820_RAM, 1); + sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); +} +#endif + /* * Determine if we were loaded by an EFI loader. If so, then we have also been * passed the efi memmap, systab, etc., so we should use these data structures @@ -940,7 +975,7 @@ void __init setup_arch(char **cmdline_p) insert_resource(&iomem_resource, &data_resource); insert_resource(&iomem_resource, &bss_resource); - + trim_bios_range(); #ifdef CONFIG_X86_32 if (ppro_with_ram_bug()) { e820_update_range(0x70000000ULL, 0x40000ULL, E820_RAM, @@ -1007,6 +1042,8 @@ void __init setup_arch(char **cmdline_p) */ find_smp_config(); + reserve_ibft_region(); + reserve_trampoline_memory(); #ifdef CONFIG_ACPI_SLEEP @@ -1077,17 +1114,11 @@ void __init setup_arch(char **cmdline_p) #endif initmem_init(0, max_pfn, acpi, k8); - -#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN) - /* - * dma32_reserve_bootmem() allocates bootmem which may conflict - * with the crashkernel command line, so do that after - * reserve_crashkernel() - */ - dma32_reserve_bootmem(); +#ifndef CONFIG_NO_BOOTMEM + early_res_to_bootmem(0, max_low_pfn< #include #include +#include #include #include --- head-2010-05-25.orig/arch/x86/kernel/time-xen.c 2010-05-12 09:02:50.000000000 +0200 +++ head-2010-05-25/arch/x86/kernel/time-xen.c 2010-05-12 09:03:15.000000000 +0200 @@ -597,7 +597,7 @@ static cycle_t xen_clocksource_read(stru #endif } -static void xen_clocksource_resume(void) +static void xen_clocksource_resume(struct clocksource *cs) { extern void time_resume(void); @@ -619,18 +619,18 @@ static struct clocksource clocksource_xe struct vcpu_runstate_info *setup_runstate_area(unsigned int cpu) { struct vcpu_register_runstate_memory_area area; - struct vcpu_runstate_info *runstate = &per_cpu(runstate, cpu); + struct vcpu_runstate_info *rs = &per_cpu(runstate, cpu); int rc; - set_xen_guest_handle(area.addr.h, runstate); + set_xen_guest_handle(area.addr.h, rs); rc = HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, cpu, &area); if (rc) { BUILD_BUG_ON(RUNSTATE_running); - memset(runstate, 0, sizeof(*runstate)); + memset(rs, 0, sizeof(*rs)); WARN_ON(rc != -ENOSYS); } - return runstate; + return rs; } static void init_missing_ticks_accounting(unsigned int cpu) --- head-2010-05-25.orig/arch/x86/kernel/traps-xen.c 2010-03-24 16:00:05.000000000 +0100 +++ head-2010-05-25/arch/x86/kernel/traps-xen.c 2010-03-25 16:41:03.000000000 +0100 @@ -527,6 +527,9 @@ dotraplinkage void __kprobes do_debug(st get_debugreg(dr6, 6); + /* Filter out all the reserved bits which are preset to 1 */ + dr6 &= ~DR6_RESERVED; + /* Catch kmemcheck conditions first of all! */ if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) return; --- head-2010-05-25.orig/arch/x86/kernel/vsyscall_64-xen.c 2010-03-24 16:00:05.000000000 +0100 +++ head-2010-05-25/arch/x86/kernel/vsyscall_64-xen.c 2010-03-25 10:38:31.000000000 +0100 @@ -308,7 +308,8 @@ static int __init vsyscall_init(void) register_sysctl_table(kernel_root_table2); #endif on_each_cpu(cpu_vsyscall_init, NULL, 1); - hotcpu_notifier(cpu_vsyscall_notifier, 0); + /* notifier priority > KVM */ + hotcpu_notifier(cpu_vsyscall_notifier, 30); return 0; } --- head-2010-05-25.orig/arch/x86/kernel/x86_init-xen.c 2010-03-24 16:00:05.000000000 +0100 +++ head-2010-05-25/arch/x86/kernel/x86_init-xen.c 2010-03-25 17:21:48.000000000 +0100 @@ -5,8 +5,12 @@ */ #include #include +#include +#include +#include #include +#include #include #include #include @@ -68,6 +72,12 @@ struct x86_init_ops x86_init __initdata .iommu = { .iommu_init = iommu_init_noop, }, + + .pci = { + .init = x86_default_pci_init, + .init_irq = x86_default_pci_init_irq, + .fixup_irqs = x86_default_pci_fixup_irqs, + }, }; struct x86_platform_ops x86_platform = { --- head-2010-05-25.orig/arch/x86/lib/Makefile 2010-03-24 15:01:37.000000000 +0100 +++ head-2010-05-25/arch/x86/lib/Makefile 2010-04-28 16:13:29.000000000 +0200 @@ -15,6 +15,7 @@ $(obj)/inat.o: $(obj)/inat-tables.c clean-files := inat-tables.c obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o +obj-$(CONFIG_XEN) += cache-smp.o lib-y := delay.o lib-y += thunk_$(BITS).o --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2010-05-25/arch/x86/lib/cache-smp-xen.c 2010-05-07 11:12:27.000000000 +0200 @@ -0,0 +1,27 @@ +#include +#include +#include + +static void __wbinvd(void *dummy) +{ + wbinvd(); +} + +#ifndef CONFIG_XEN +void wbinvd_on_cpu(int cpu) +{ + smp_call_function_single(cpu, __wbinvd, NULL, 1); +} +EXPORT_SYMBOL(wbinvd_on_cpu); +#endif + +int wbinvd_on_all_cpus(void) +{ + struct mmuext_op op = { .cmd = MMUEXT_FLUSH_CACHE_GLOBAL }; + + if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) == 0) + return 0; + /* Best effort as fallback. */ + return on_each_cpu(__wbinvd, NULL, 1); +} +EXPORT_SYMBOL(wbinvd_on_all_cpus); --- head-2010-05-25.orig/arch/x86/mm/init-xen.c 2010-03-24 16:00:05.000000000 +0100 +++ head-2010-05-25/arch/x86/mm/init-xen.c 2010-04-15 10:48:32.000000000 +0200 @@ -1,3 +1,4 @@ +#include #include #include #include @@ -283,12 +284,7 @@ unsigned long __init_refok init_memory_m if (!after_bootmem) find_early_table_space(end, use_pse, use_gbpages); -#ifdef CONFIG_X86_32 - for (i = 0; i < nr_range; i++) - kernel_physical_mapping_init(mr[i].start, mr[i].end, - mr[i].page_size_mask); - ret = end; -#else /* CONFIG_X86_64 */ +#ifdef CONFIG_X86_64 #define addr_to_page(addr) \ ((unsigned long *) \ ((mfn_to_pfn(((addr) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT) \ @@ -320,12 +316,12 @@ unsigned long __init_refok init_memory_m va += PAGE_SIZE; } } +#undef addr_to_page +#endif for (i = 0; i < nr_range; i++) ret = kernel_physical_mapping_init(mr[i].start, mr[i].end, mr[i].page_size_mask); -#undef addr_to_page -#endif #ifdef CONFIG_X86_32 early_ioremap_page_table_range_init(); @@ -377,11 +373,23 @@ int devmem_is_allowed(unsigned long page void free_init_pages(char *what, unsigned long begin, unsigned long end) { - unsigned long addr = begin; + unsigned long addr; + unsigned long begin_aligned, end_aligned; + + /* Make sure boundaries are page aligned */ + begin_aligned = PAGE_ALIGN(begin); + end_aligned = end & PAGE_MASK; + + if (WARN_ON(begin_aligned != begin || end_aligned != end)) { + begin = begin_aligned; + end = end_aligned; + } - if (addr >= end) + if (begin >= end) return; + addr = begin; + /* * If debugging page accesses then do not free this memory but * mark them not present - any buggy init-section access will @@ -389,7 +397,7 @@ void free_init_pages(char *what, unsigne */ #ifdef CONFIG_DEBUG_PAGEALLOC printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n", - begin, PAGE_ALIGN(end)); + begin, end); set_memory_np(begin, (end - begin) >> PAGE_SHIFT); #else /* @@ -404,8 +412,7 @@ void free_init_pages(char *what, unsigne for (; addr < end; addr += PAGE_SIZE) { ClearPageReserved(virt_to_page(addr)); init_page_count(virt_to_page(addr)); - memset((void *)(addr & ~(PAGE_SIZE-1)), - POISON_FREE_INITMEM, PAGE_SIZE); + memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); #ifdef CONFIG_X86_64 if (addr >= __START_KERNEL_map) { /* make_readonly() reports all kernel addresses. */ @@ -434,6 +441,15 @@ void free_initmem(void) #ifdef CONFIG_BLK_DEV_INITRD void free_initrd_mem(unsigned long start, unsigned long end) { - free_init_pages("initrd memory", start, end); + /* + * end could be not aligned, and We can not align that, + * decompresser could be confused by aligned initrd_end + * We already reserve the end partial page before in + * - i386_start_kernel() + * - x86_64_start_kernel() + * - relocate_initrd() + * So here We can do PAGE_ALIGN() safely to get partial page to be freed + */ + free_init_pages("initrd memory", start, PAGE_ALIGN(end)); } #endif --- head-2010-05-25.orig/arch/x86/mm/init_32-xen.c 2010-03-24 16:00:05.000000000 +0100 +++ head-2010-05-25/arch/x86/mm/init_32-xen.c 2010-04-15 10:51:33.000000000 +0200 @@ -25,11 +25,11 @@ #include #include #include -#include #include #include #include #include +#include #include #include @@ -257,6 +257,7 @@ kernel_physical_mapping_init(unsigned lo unsigned long page_size_mask) { int use_pse = page_size_mask == (1<nr_pages); @@ -787,11 +791,17 @@ void __init setup_bootmem_allocator(void if (bootmap == -1L) panic("Cannot find bootmem map of size %ld\n", bootmap_size); reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP"); +#elif defined(CONFIG_XEN) + if (max_low_pfn > xen_start_info->nr_pages) + reserve_early(xen_start_info->nr_pages << PAGE_SHIFT, + max_low_pfn << PAGE_SHIFT, "BALLOON"); +#endif printk(KERN_INFO " mapped low ram: 0 - %08lx\n", max_pfn_mapped< #include #include +#include #include #include @@ -403,9 +404,13 @@ static inline int __meminit make_readonl * No need for writable mapping of kernel image. This also ensures that * page and descriptor tables embedded inside don't have writable * mappings. Exclude the vsyscall area here, allowing alternative - * instruction patching to work. + * instruction patching to work. The range must be in sync with that + * passed to reserve_early() (as "TEXT DATA BSS"), since all other + * regions can be allocated from under CONFIG_NO_BOOTMEM and thus must + * be writable. */ - if ((paddr >= __pa_symbol(&_text)) && (paddr < __pa(_brk_end)) + if ((paddr >= __pa_symbol(&_text)) + && (paddr < (__pa_symbol(__bss_stop) & PAGE_MASK)) && !(paddr >= __pa_symbol(&__vsyscall_0) && paddr < __pa_symbol(&__vsyscall_0) + PAGE_SIZE)) readonly = 1; @@ -813,6 +818,7 @@ kernel_physical_mapping_init(unsigned lo void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn, int acpi, int k8) { +#ifndef CONFIG_NO_BOOTMEM unsigned long bootmap_size, bootmap; e820_register_active_regions(0, start_pfn, end_pfn); @@ -825,12 +831,19 @@ void __init initmem_init(unsigned long s PAGE_SIZE); if (bootmap == -1L) panic("Cannot find bootmem map of size %ld\n", bootmap_size); + reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP"); /* don't touch min_low_pfn */ bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT, 0, end_pfn); free_bootmem_with_active_regions(0, end_pfn); - early_res_to_bootmem(0, end_pfn< xen_start_info->nr_pages) + reserve_early(xen_start_info->nr_pages << PAGE_SHIFT, + end_pfn << PAGE_SHIFT, "BALLOON"); +#endif +#endif } #endif @@ -1243,7 +1256,7 @@ vmemmap_populate(struct page *start_page if (pmd_none(*pmd)) { pte_t entry; - p = vmemmap_alloc_block(PMD_SIZE, node); + p = vmemmap_alloc_block_buf(PMD_SIZE, node); if (!p) return -ENOMEM; --- head-2010-05-25.orig/arch/x86/mm/ioremap-xen.c 2010-03-24 16:00:05.000000000 +0100 +++ head-2010-05-25/arch/x86/mm/ioremap-xen.c 2010-05-12 09:13:00.000000000 +0200 @@ -142,6 +142,11 @@ int direct_kernel_remap_pfn_range(unsign } EXPORT_SYMBOL(direct_kernel_remap_pfn_range); +int page_is_ram(unsigned long pagenr) +{ + return pagenr < max_pfn; +} + static int lookup_pte_fn( pte_t *pte, struct page *pmd_page, unsigned long addr, void *data) { @@ -177,45 +182,6 @@ int touch_pte_range(struct mm_struct *mm EXPORT_SYMBOL(touch_pte_range); -int page_is_ram(unsigned long pagenr) -{ - resource_size_t addr, end; - int i; - -#ifndef CONFIG_XEN - /* - * A special case is the first 4Kb of memory; - * This is a BIOS owned area, not kernel ram, but generally - * not listed as such in the E820 table. - */ - if (pagenr == 0) - return 0; - - /* - * Second special case: Some BIOSen report the PC BIOS - * area (640->1Mb) as ram even though it is not. - */ - if (pagenr >= (BIOS_BEGIN >> PAGE_SHIFT) && - pagenr < (BIOS_END >> PAGE_SHIFT)) - return 0; -#endif - - for (i = 0; i < e820.nr_map; i++) { - /* - * Not usable memory: - */ - if (e820.map[i].type != E820_RAM) - continue; - addr = (e820.map[i].addr + PAGE_SIZE-1) >> PAGE_SHIFT; - end = (e820.map[i].addr + e820.map[i].size) >> PAGE_SHIFT; - - - if ((pagenr >= addr) && (pagenr < end)) - return 1; - } - return 0; -} - /* * Fix up the linear direct mapping of the kernel to avoid cache attribute * conflicts. @@ -606,6 +572,10 @@ void __init early_ioremap_init(void) * The boot-ioremap range spans multiple pmds, for which * we are not prepared: */ +#define __FIXADDR_TOP (-PAGE_SIZE) + BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT) + != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT)); +#undef __FIXADDR_TOP if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) { WARN_ON(1); printk(KERN_WARNING "pmd %p != %p\n", @@ -665,6 +635,22 @@ static inline void __init early_clear_fi static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata; static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata; +#ifndef CONFIG_XEN +void __init fixup_early_ioremap(void) +{ + int i; + + for (i = 0; i < FIX_BTMAPS_SLOTS; i++) { + if (prev_map[i]) { + WARN_ON(1); + break; + } + } + + early_ioremap_init(); +} +#endif + static int __init check_early_ioremap_leak(void) { int count = 0; --- head-2010-05-25.orig/arch/x86/mm/pageattr-xen.c 2010-03-24 16:00:05.000000000 +0100 +++ head-2010-05-25/arch/x86/mm/pageattr-xen.c 2010-04-15 10:48:32.000000000 +0200 @@ -6,13 +6,13 @@ #include #include #include -#include #include #include #include #include #include #include +#include #include #include @@ -293,8 +293,29 @@ static inline pgprot_t static_protection */ if (kernel_set_to_readonly && within(address, (unsigned long)_text, - (unsigned long)__end_rodata_hpage_align)) - pgprot_val(forbidden) |= _PAGE_RW; + (unsigned long)__end_rodata_hpage_align)) { + unsigned int level; + + /* + * Don't enforce the !RW mapping for the kernel text mapping, + * if the current mapping is already using small page mapping. + * No need to work hard to preserve large page mappings in this + * case. + * + * This also fixes the Linux Xen paravirt guest boot failure + * (because of unexpected read-only mappings for kernel identity + * mappings). In this paravirt guest case, the kernel text + * mapping and the kernel identity mapping share the same + * page-table pages. Thus we can't really use different + * protections for the kernel text and identity mappings. Also, + * these shared mappings are made of small page mappings. + * Thus this don't enforce !RW mapping for small page kernel + * text mapping logic will help Linux Xen parvirt guest boot + * aswell. + */ + if (lookup_address(address, &level) && (level != PG_LEVEL_4K)) + pgprot_val(forbidden) |= _PAGE_RW; + } #endif prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden)); --- head-2010-05-25.orig/arch/x86/mm/pat-xen.c 2010-03-24 16:00:05.000000000 +0100 +++ head-2010-05-25/arch/x86/mm/pat-xen.c 2010-04-15 10:48:32.000000000 +0200 @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include --- head-2010-05-25.orig/arch/x86/mm/pgtable-xen.c 2010-03-24 15:32:27.000000000 +0100 +++ head-2010-05-25/arch/x86/mm/pgtable-xen.c 2010-04-15 10:53:40.000000000 +0200 @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -10,6 +11,14 @@ #define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO +#ifdef CONFIG_HIGHPTE +#define PGALLOC_USER_GFP __GFP_HIGHMEM +#else +#define PGALLOC_USER_GFP 0 +#endif + +gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP; + pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { pte_t *pte = (pte_t *)__get_free_page(PGALLOC_GFP); @@ -28,11 +37,7 @@ pgtable_t pte_alloc_one(struct mm_struct { struct page *pte; -#ifdef CONFIG_HIGHPTE - pte = alloc_pages(PGALLOC_GFP | __GFP_HIGHMEM, 0); -#else - pte = alloc_pages(PGALLOC_GFP, 0); -#endif + pte = alloc_pages(__userpte_alloc_gfp, 0); if (pte) { pgtable_page_ctor(pte); SetPageForeign(pte, _pte_free); @@ -41,6 +46,23 @@ pgtable_t pte_alloc_one(struct mm_struct return pte; } +static int __init setup_userpte(char *arg) +{ + if (!arg) + return -EINVAL; + + /* + * "userpte=nohigh" disables allocation of user pagetables in + * high memory. + */ + if (strcmp(arg, "nohigh") == 0) + __userpte_alloc_gfp &= ~__GFP_HIGHMEM; + else + return -EINVAL; + return 0; +} +early_param("userpte", setup_userpte); + void __pte_free(pgtable_t pte) { if (!PageHighMem(pte)) { --- head-2010-05-25.orig/arch/x86/mm/pgtable_32-xen.c 2010-03-24 15:25:06.000000000 +0100 +++ head-2010-05-25/arch/x86/mm/pgtable_32-xen.c 2010-05-12 09:09:25.000000000 +0200 @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include @@ -142,6 +141,7 @@ static int __init parse_reservetop(char address = memparse(arg, &arg); reserve_top_address(address); + fixup_early_ioremap(); return 0; } early_param("reservetop", parse_reservetop); --- head-2010-05-25.orig/arch/x86/pci/irq-xen.c 2010-03-24 15:25:21.000000000 +0100 +++ head-2010-05-25/arch/x86/pci/irq-xen.c 2010-04-15 10:48:32.000000000 +0200 @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include @@ -53,7 +52,7 @@ struct irq_router_handler { int (*probe)(struct irq_router *r, struct pci_dev *router, u16 device); }; -int (*pcibios_enable_irq)(struct pci_dev *dev) = NULL; +int (*pcibios_enable_irq)(struct pci_dev *dev) = pirq_enable_irq; void (*pcibios_disable_irq)(struct pci_dev *dev) = NULL; /* @@ -596,6 +595,8 @@ static __init int intel_router_probe(str case PCI_DEVICE_ID_INTEL_ICH10_1: case PCI_DEVICE_ID_INTEL_ICH10_2: case PCI_DEVICE_ID_INTEL_ICH10_3: + case PCI_DEVICE_ID_INTEL_CPT_LPC1: + case PCI_DEVICE_ID_INTEL_CPT_LPC2: r->name = "PIIX/ICH"; r->get = pirq_piix_get; r->set = pirq_piix_set; @@ -1022,7 +1023,7 @@ static int pcibios_lookup_irq(struct pci return 1; } -static void __init pcibios_fixup_irqs(void) +void __init pcibios_fixup_irqs(void) { struct pci_dev *dev = NULL; u8 pin; @@ -1116,12 +1117,12 @@ static struct dmi_system_id __initdata p { } }; -int __init pcibios_irq_init(void) +void __init pcibios_irq_init(void) { DBG(KERN_DEBUG "PCI: IRQ init\n"); - if (pcibios_enable_irq || raw_pci_ops == NULL) - return 0; + if (raw_pci_ops == NULL) + return; dmi_check_system(pciirq_dmi_table); @@ -1148,9 +1149,7 @@ int __init pcibios_irq_init(void) pirq_table = NULL; } - pcibios_enable_irq = pirq_enable_irq; - - pcibios_fixup_irqs(); + x86_init.pci.fixup_irqs(); if (io_apic_assign_pci_irqs && pci_routeirq) { struct pci_dev *dev = NULL; @@ -1163,8 +1162,6 @@ int __init pcibios_irq_init(void) for_each_pci_dev(dev) pirq_enable_irq(dev); } - - return 0; } static void pirq_penalize_isa_irq(int irq, int active) --- head-2010-05-25.orig/drivers/char/tpm/tpm_vtpm.c 2010-03-24 15:12:46.000000000 +0100 +++ head-2010-05-25/drivers/char/tpm/tpm_vtpm.c 2010-04-15 13:41:04.000000000 +0200 @@ -16,6 +16,7 @@ #include #include +#include #include #include #include --- head-2010-05-25.orig/drivers/char/tpm/tpm_xen.c 2010-03-24 15:09:15.000000000 +0100 +++ head-2010-05-25/drivers/char/tpm/tpm_xen.c 2010-04-15 13:41:56.000000000 +0200 @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include --- head-2010-05-25.orig/drivers/misc/Kconfig 2010-05-25 09:31:21.000000000 +0200 +++ head-2010-05-25/drivers/misc/Kconfig 2010-04-29 10:01:27.000000000 +0200 @@ -313,7 +313,7 @@ config TI_DAC7512 config VMWARE_BALLOON tristate "VMware Balloon Driver" - depends on X86 + depends on X86 && !XEN help This is VMware physical memory management driver which acts like a "balloon" that can be inflated to reclaim physical pages --- head-2010-05-25.orig/drivers/pci/msi-xen.c 2010-03-24 15:32:27.000000000 +0100 +++ head-2010-05-25/drivers/pci/msi-xen.c 2010-04-15 10:48:32.000000000 +0200 @@ -18,6 +18,7 @@ #include #include #include +#include #include --- head-2010-05-25.orig/drivers/xen/Kconfig 2010-03-31 14:01:28.000000000 +0200 +++ head-2010-05-25/drivers/xen/Kconfig 2010-03-31 14:08:31.000000000 +0200 @@ -23,6 +23,7 @@ config XEN_UNPRIVILEGED_GUEST select PM_SLEEP select PM_SLEEP_SMP if SMP select PM_RUNTIME if PCI + select PM_OPS if PCI select SUSPEND config XEN_PRIVCMD @@ -336,6 +337,10 @@ config HAVE_IRQ_IGNORE_UNHANDLED config NO_IDLE_HZ def_bool y +config ARCH_HAS_WALK_MEMORY + def_bool y + depends on X86 + config XEN_SMPBOOT def_bool y depends on SMP && !PPC_XEN @@ -375,7 +380,6 @@ config XEN_SCRUB_PAGES config XEN_DEV_EVTCHN tristate "Xen /dev/xen/evtchn device" - depends on XEN || PARAVIRT_XEN default PARAVIRT_XEN || XEN_PRIVILEGED_GUEST || m help The evtchn driver allows a userspace process to triger event --- head-2010-05-25.orig/drivers/xen/balloon/balloon.c 2010-04-15 10:11:45.000000000 +0200 +++ head-2010-05-25/drivers/xen/balloon/balloon.c 2010-04-15 11:00:29.000000000 +0200 @@ -43,7 +43,7 @@ #include #include #include -#include +#include #include #include #include --- head-2010-05-25.orig/drivers/xen/blkback/blkback-pagemap.c 2009-06-09 15:01:37.000000000 +0200 +++ head-2010-05-25/drivers/xen/blkback/blkback-pagemap.c 2010-04-15 13:39:30.000000000 +0200 @@ -1,4 +1,5 @@ #include +#include #include "blkback-pagemap.h" static int blkback_pagemap_size; --- head-2010-05-25.orig/drivers/xen/blkfront/vbd.c 2010-03-24 15:32:27.000000000 +0100 +++ head-2010-05-25/drivers/xen/blkfront/vbd.c 2010-03-25 16:41:12.000000000 +0100 @@ -314,15 +314,14 @@ xlvbd_init_blk_queue(struct gendisk *gd, /* Hard sector size and max sectors impersonate the equiv. hardware. */ blk_queue_logical_block_size(rq, sector_size); - blk_queue_max_sectors(rq, 512); + blk_queue_max_hw_sectors(rq, 512); /* Each segment in a request is up to an aligned page in size. */ blk_queue_segment_boundary(rq, PAGE_SIZE - 1); blk_queue_max_segment_size(rq, PAGE_SIZE); /* Ensure a merged request will fit in a single I/O ring slot. */ - blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST); - blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST); + blk_queue_max_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST); /* Make sure buffer addresses are sector-aligned. */ blk_queue_dma_alignment(rq, 511); --- head-2010-05-25.orig/drivers/xen/blktap2/blktap.h 2010-03-24 15:12:36.000000000 +0100 +++ head-2010-05-25/drivers/xen/blktap2/blktap.h 2010-04-15 11:24:08.000000000 +0200 @@ -1,6 +1,7 @@ #ifndef _BLKTAP_H_ #define _BLKTAP_H_ +#include #include #include #include --- head-2010-05-25.orig/drivers/xen/blktap2/device.c 2010-03-24 15:32:27.000000000 +0100 +++ head-2010-05-25/drivers/xen/blktap2/device.c 2010-04-19 14:54:02.000000000 +0200 @@ -991,15 +991,14 @@ blktap_device_configure(struct blktap *t /* Hard sector size and max sectors impersonate the equiv. hardware. */ blk_queue_logical_block_size(rq, tap->params.sector_size); - blk_queue_max_sectors(rq, 512); + blk_queue_max_hw_sectors(rq, 512); /* Each segment in a request is up to an aligned page in size. */ blk_queue_segment_boundary(rq, PAGE_SIZE - 1); blk_queue_max_segment_size(rq, PAGE_SIZE); /* Ensure a merged request will fit in a single I/O ring slot. */ - blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST); - blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST); + blk_queue_max_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST); /* Make sure buffer addresses are sector-aligned. */ blk_queue_dma_alignment(rq, 511); --- head-2010-05-25.orig/drivers/xen/blktap2/sysfs.c 2010-03-24 16:00:05.000000000 +0100 +++ head-2010-05-25/drivers/xen/blktap2/sysfs.c 2010-05-25 09:25:30.000000000 +0200 @@ -379,13 +379,15 @@ blktap_sysfs_destroy(struct blktap *tap) } static ssize_t -blktap_sysfs_show_verbosity(struct class *class, char *buf) +blktap_sysfs_show_verbosity(struct class *class, struct class_attribute *attr, + char *buf) { return sprintf(buf, "%d\n", blktap_debug_level); } static ssize_t -blktap_sysfs_set_verbosity(struct class *class, const char *buf, size_t size) +blktap_sysfs_set_verbosity(struct class *class, struct class_attribute *attr, + const char *buf, size_t size) { int level; @@ -400,7 +402,8 @@ CLASS_ATTR(verbosity, S_IRUSR | S_IWUSR, blktap_sysfs_show_verbosity, blktap_sysfs_set_verbosity); static ssize_t -blktap_sysfs_show_devices(struct class *class, char *buf) +blktap_sysfs_show_devices(struct class *class, struct class_attribute *attr, + char *buf) { int i, ret; struct blktap *tap; --- head-2010-05-25.orig/drivers/xen/char/mem.c 2010-03-24 16:00:05.000000000 +0100 +++ head-2010-05-25/drivers/xen/char/mem.c 2010-04-15 10:48:32.000000000 +0200 @@ -3,7 +3,7 @@ * * Copyright (C) 1991, 1992 Linus Torvalds * - * Added devfs support. + * Added devfs support. * Jan-11-1998, C. Scott Ananian * Shared /dev/zero mmapping support, Feb 2000, Kanoj Sarcar */ @@ -65,10 +65,10 @@ static inline int range_is_allowed(unsig } /* - * This funcion reads the *physical* memory. The f_pos points directly to the - * memory location. + * This funcion reads the *physical* memory. The f_pos points directly to the + * memory location. */ -static ssize_t read_mem(struct file * file, char __user * buf, +static ssize_t read_mem(struct file *file, char __user *buf, size_t count, loff_t *ppos) { unsigned long p = *ppos; @@ -114,7 +114,7 @@ static ssize_t read_mem(struct file * fi return read; } -static ssize_t write_mem(struct file * file, const char __user * buf, +static ssize_t write_mem(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { unsigned long p = *ppos, ignored; @@ -161,7 +161,7 @@ static struct vm_operations_struct mmap_ #endif }; -static int xen_mmap_mem(struct file * file, struct vm_area_struct * vma) +static int xen_mmap_mem(struct file *file, struct vm_area_struct *vma) { size_t size = vma->vm_end - vma->vm_start; @@ -191,24 +191,26 @@ static int xen_mmap_mem(struct file * fi * also note that seeking relative to the "end of file" isn't supported: * it has no meaning, so it returns -EINVAL. */ -static loff_t memory_lseek(struct file * file, loff_t offset, int orig) +static loff_t memory_lseek(struct file *file, loff_t offset, int orig) { loff_t ret; mutex_lock(&file->f_path.dentry->d_inode->i_mutex); switch (orig) { - case 0: - file->f_pos = offset; - ret = file->f_pos; - force_successful_syscall_return(); + case SEEK_CUR: + offset += file->f_pos; + case SEEK_SET: + /* to avoid userland mistaking f_pos=-9 as -EBADF=-9 */ + if ((unsigned long long)offset >= ~0xFFFULL) { + ret = -EOVERFLOW; break; - case 1: - file->f_pos += offset; - ret = file->f_pos; - force_successful_syscall_return(); - break; - default: - ret = -EINVAL; + } + file->f_pos = offset; + ret = file->f_pos; + force_successful_syscall_return(); + break; + default: + ret = -EINVAL; } mutex_unlock(&file->f_path.dentry->d_inode->i_mutex); return ret; --- head-2010-05-25.orig/drivers/xen/core/evtchn.c 2010-03-31 14:37:57.000000000 +0200 +++ head-2010-05-25/drivers/xen/core/evtchn.c 2010-04-15 11:03:28.000000000 +0200 @@ -31,6 +31,7 @@ */ #include +#include #include #include #include --- head-2010-05-25.orig/drivers/xen/core/gnttab.c 2010-03-24 15:12:46.000000000 +0100 +++ head-2010-05-25/drivers/xen/core/gnttab.c 2010-04-15 11:04:07.000000000 +0200 @@ -32,6 +32,7 @@ */ #include +#include #include #include #include --- head-2010-05-25.orig/drivers/xen/core/hypervisor_sysfs.c 2010-03-24 15:10:37.000000000 +0100 +++ head-2010-05-25/drivers/xen/core/hypervisor_sysfs.c 2010-03-25 14:27:48.000000000 +0100 @@ -36,7 +36,7 @@ static ssize_t hyp_sysfs_store(struct ko return 0; } -static struct sysfs_ops hyp_sysfs_ops = { +static const struct sysfs_ops hyp_sysfs_ops = { .show = hyp_sysfs_show, .store = hyp_sysfs_store, }; --- head-2010-05-25.orig/drivers/xen/core/reboot.c 2010-03-24 15:32:27.000000000 +0100 +++ head-2010-05-25/drivers/xen/core/reboot.c 2010-04-15 11:07:05.000000000 +0200 @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include --- head-2010-05-25.orig/drivers/xen/core/spinlock.c 2010-03-24 16:00:05.000000000 +0100 +++ head-2010-05-25/drivers/xen/core/spinlock.c 2010-04-15 10:14:50.000000000 +0200 @@ -22,7 +22,7 @@ struct spinning { unsigned int ticket; struct spinning *prev; }; -static DEFINE_PER_CPU(struct spinning *, spinning); +static DEFINE_PER_CPU(struct spinning *, _spinning); /* * Protect removal of objects: Addition can be done lockless, and even * removal itself doesn't need protection - what needs to be prevented is @@ -78,7 +78,7 @@ static unsigned int spin_adjust(struct s unsigned int xen_spin_adjust(const arch_spinlock_t *lock, unsigned int token) { - return spin_adjust(percpu_read(spinning), lock, token); + return spin_adjust(percpu_read(_spinning), lock, token); } bool xen_spin_wait(arch_spinlock_t *lock, unsigned int *ptok, @@ -97,9 +97,9 @@ bool xen_spin_wait(arch_spinlock_t *lock /* announce we're spinning */ spinning.ticket = *ptok >> TICKET_SHIFT; spinning.lock = lock; - spinning.prev = percpu_read(spinning); + spinning.prev = percpu_read(_spinning); smp_wmb(); - percpu_write(spinning, &spinning); + percpu_write(_spinning, &spinning); upcall_mask = current_vcpu_info()->evtchn_upcall_mask; do { @@ -184,7 +184,7 @@ bool xen_spin_wait(arch_spinlock_t *lock /* announce we're done */ other = spinning.prev; - percpu_write(spinning, other); + percpu_write(_spinning, other); rm_lock = &__get_cpu_var(spinning_rm_lock); raw_local_irq_disable(); arch_write_lock(rm_lock); @@ -228,7 +228,7 @@ void xen_spin_kick(arch_spinlock_t *lock raw_local_irq_save(flags); arch_read_lock(rm_lock); - spinning = per_cpu(spinning, cpu); + spinning = per_cpu(_spinning, cpu); smp_rmb(); while (spinning) { if (spinning->lock == lock && spinning->ticket == token) --- head-2010-05-25.orig/drivers/xen/core/xen_sysfs.c 2010-03-24 15:10:37.000000000 +0100 +++ head-2010-05-25/drivers/xen/core/xen_sysfs.c 2010-04-15 11:04:56.000000000 +0200 @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include --- head-2010-05-25.orig/drivers/xen/fbfront/xenfb.c 2010-03-24 15:25:21.000000000 +0100 +++ head-2010-05-25/drivers/xen/fbfront/xenfb.c 2010-04-15 11:11:34.000000000 +0200 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include --- head-2010-05-25.orig/drivers/xen/fbfront/xenkbd.c 2010-03-24 15:25:21.000000000 +0100 +++ head-2010-05-25/drivers/xen/fbfront/xenkbd.c 2010-04-15 11:11:42.000000000 +0200 @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include --- head-2010-05-25.orig/drivers/xen/gntdev/gntdev.c 2010-03-24 15:12:46.000000000 +0100 +++ head-2010-05-25/drivers/xen/gntdev/gntdev.c 2010-04-15 11:13:05.000000000 +0200 @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include --- head-2010-05-25.orig/drivers/xen/netfront/netfront.h 2010-03-24 15:10:29.000000000 +0100 +++ head-2010-05-25/drivers/xen/netfront/netfront.h 2010-04-15 11:15:06.000000000 +0200 @@ -34,6 +34,7 @@ #define NETFRONT_H #include +#include #include #include #include --- head-2010-05-25.orig/drivers/xen/pciback/conf_space_capability_msi.c 2008-09-15 13:40:15.000000000 +0200 +++ head-2010-05-25/drivers/xen/pciback/conf_space_capability_msi.c 2010-04-15 11:21:45.000000000 +0200 @@ -1,12 +1,10 @@ /* * PCI Backend -- Configuration overlay for MSI capability */ -#include -#include +#include "pciback.h" #include "conf_space.h" #include "conf_space_capability.h" #include -#include "pciback.h" int pciback_enable_msi(struct pciback_device *pdev, struct pci_dev *dev, struct xen_pci_op *op) --- head-2010-05-25.orig/drivers/xen/pciback/pciback.h 2010-03-24 15:08:58.000000000 +0100 +++ head-2010-05-25/drivers/xen/pciback/pciback.h 2010-04-15 11:20:39.000000000 +0200 @@ -6,6 +6,7 @@ #ifndef __XEN_PCIBACK_H__ #define __XEN_PCIBACK_H__ +#include #include #include #include --- head-2010-05-25.orig/drivers/xen/pciback/slot.c 2009-03-18 10:39:32.000000000 +0100 +++ head-2010-05-25/drivers/xen/pciback/slot.c 2010-04-15 11:21:14.000000000 +0200 @@ -6,10 +6,6 @@ * Author: Tristan Gingold , from vpci.c */ -#include -#include -#include -#include #include "pciback.h" /* There are at most 32 slots in a pci bus. */ --- head-2010-05-25.orig/drivers/xen/pciback/vpci.c 2009-03-18 10:39:32.000000000 +0100 +++ head-2010-05-25/drivers/xen/pciback/vpci.c 2010-04-15 11:21:09.000000000 +0200 @@ -5,10 +5,6 @@ * Author: Ryan Wilson */ -#include -#include -#include -#include #include "pciback.h" #define PCI_SLOT_MAX 32 --- head-2010-05-25.orig/drivers/xen/pcifront/pcifront.h 2010-03-24 15:08:58.000000000 +0100 +++ head-2010-05-25/drivers/xen/pcifront/pcifront.h 2010-04-15 11:14:10.000000000 +0200 @@ -6,6 +6,7 @@ #ifndef __XEN_PCIFRONT_H__ #define __XEN_PCIFRONT_H__ +#include #include #include #include --- head-2010-05-25.orig/drivers/xen/scsiback/xenbus.c 2010-03-24 15:25:21.000000000 +0100 +++ head-2010-05-25/drivers/xen/scsiback/xenbus.c 2010-03-25 14:20:20.000000000 +0100 @@ -353,7 +353,7 @@ fail: } -static struct xenbus_device_id scsiback_ids[] = { +static const struct xenbus_device_id scsiback_ids[] = { { "vscsi" }, { "" } }; --- head-2010-05-25.orig/drivers/xen/scsifront/xenbus.c 2010-03-24 15:25:21.000000000 +0100 +++ head-2010-05-25/drivers/xen/scsifront/xenbus.c 2010-04-15 11:07:44.000000000 +0200 @@ -30,6 +30,7 @@ #include +#include #include "common.h" #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11) @@ -393,7 +394,7 @@ static void scsifront_backend_changed(st } -static struct xenbus_device_id scsifront_ids[] = { +static const struct xenbus_device_id scsifront_ids[] = { { "vscsi" }, { "" } }; --- head-2010-05-25.orig/drivers/xen/sfc_netfront/accel.h 2010-03-24 15:12:46.000000000 +0100 +++ head-2010-05-25/drivers/xen/sfc_netfront/accel.h 2010-04-15 11:23:26.000000000 +0200 @@ -35,6 +35,7 @@ #include #include +#include #include #include --- head-2010-05-25.orig/drivers/xen/sfc_netutil/accel_cuckoo_hash.c 2008-02-20 09:32:49.000000000 +0100 +++ head-2010-05-25/drivers/xen/sfc_netutil/accel_cuckoo_hash.c 2010-04-15 11:11:11.000000000 +0200 @@ -24,6 +24,7 @@ #include /* needed for linux/random.h */ #include +#include #include "accel_cuckoo_hash.h" #include "accel_util.h" --- head-2010-05-25.orig/drivers/xen/sfc_netutil/accel_util.c 2010-01-04 11:56:34.000000000 +0100 +++ head-2010-05-25/drivers/xen/sfc_netutil/accel_util.c 2010-04-15 11:10:59.000000000 +0200 @@ -22,6 +22,7 @@ **************************************************************************** */ +#include #include #include #include --- head-2010-05-25.orig/drivers/xen/xenbus/xenbus_client.c 2010-03-24 15:17:58.000000000 +0100 +++ head-2010-05-25/drivers/xen/xenbus/xenbus_client.c 2010-04-15 11:16:49.000000000 +0200 @@ -30,8 +30,8 @@ * IN THE SOFTWARE. */ -#if defined(CONFIG_XEN) || defined(MODULE) #include +#if defined(CONFIG_XEN) || defined(MODULE) #include #include #include --- head-2010-05-25.orig/drivers/xen/xenbus/xenbus_dev.c 2010-03-24 15:32:27.000000000 +0100 +++ head-2010-05-25/drivers/xen/xenbus/xenbus_dev.c 2010-04-15 11:19:13.000000000 +0200 @@ -33,6 +33,7 @@ */ #include +#include #include #include #include --- head-2010-05-25.orig/drivers/xen/xenbus/xenbus_probe.c 2010-03-24 16:00:05.000000000 +0100 +++ head-2010-05-25/drivers/xen/xenbus/xenbus_probe.c 2010-04-15 11:18:19.000000000 +0200 @@ -47,6 +47,7 @@ #include #include #include +#include #include #include --- head-2010-05-25.orig/drivers/xen/xenbus/xenbus_probe_backend.c 2010-03-24 15:25:21.000000000 +0100 +++ head-2010-05-25/drivers/xen/xenbus/xenbus_probe_backend.c 2010-04-15 11:18:42.000000000 +0200 @@ -42,6 +42,7 @@ #include #include #include +#include #include #include --- head-2010-05-25.orig/fs/proc/kcore.c 2010-05-25 09:31:21.000000000 +0200 +++ head-2010-05-25/fs/proc/kcore.c 2010-04-15 10:15:01.000000000 +0200 @@ -130,7 +130,7 @@ static void __kcore_update_ram(struct li } -#ifdef CONFIG_HIGHMEM +#if defined(CONFIG_HIGHMEM) || defined(CONFIG_XEN) /* * If no highmem, we can assume [0...max_low_pfn) continuous range of memory * because memory hole is not as big as !HIGHMEM case. @@ -146,7 +146,11 @@ static int kcore_update_ram(void) if (!ent) return -ENOMEM; ent->addr = (unsigned long)__va(0); +#ifdef CONFIG_HIGHMEM ent->size = max_low_pfn << PAGE_SHIFT; +#else + ent->size = max_pfn << PAGE_SHIFT; +#endif ent->type = KCORE_RAM; list_add(&ent->list, &head); __kcore_update_ram(&head); --- head-2010-05-25.orig/include/xen/xenbus.h 2010-03-24 15:25:21.000000000 +0100 +++ head-2010-05-25/include/xen/xenbus.h 2010-04-15 11:30:32.000000000 +0200 @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include --- head-2010-05-25.orig/kernel/early_res.c 2010-05-25 09:31:21.000000000 +0200 +++ head-2010-05-25/kernel/early_res.c 2010-04-15 10:17:11.000000000 +0200 @@ -321,11 +321,19 @@ void __init free_early(u64 start, u64 en i = find_overlapped_early(start, end); r = &early_res[i]; +#ifdef CONFIG_XEN /* Shouldn't it always be this way? */ + if (i >= max_early_res || r->end < end || r->start > start) + panic("free_early on not reserved area: %llx-%llx!", + start, end - 1); + + drop_range_partial(i, start, end); +#else if (i >= max_early_res || r->end != end || r->start != start) panic("free_early on not reserved area: %llx-%llx!", start, end - 1); drop_range(i); +#endif } void __init free_early_partial(u64 start, u64 end) @@ -393,9 +401,7 @@ static void __init subtract_early_res(st int __init get_free_all_memory_range(struct range **rangep, int nodeid) { int i, count; - u64 start = 0, end; - u64 size; - u64 mem; + u64 end, size, mem = -1ULL; struct range *range; int nr_range; @@ -409,9 +415,11 @@ int __init get_free_all_memory_range(str end = get_max_mapped(); #ifdef MAX_DMA32_PFN if (end > (MAX_DMA32_PFN << PAGE_SHIFT)) - start = MAX_DMA32_PFN << PAGE_SHIFT; + mem = find_fw_memmap_area(MAX_DMA32_PFN << PAGE_SHIFT, end, + size, sizeof(struct range)); #endif - mem = find_fw_memmap_area(start, end, size, sizeof(struct range)); + if (mem == -1ULL) + mem = find_fw_memmap_area(0, end, size, sizeof(struct range)); if (mem == -1ULL) panic("can not find more space for range free"); --- head-2010-05-25.orig/kernel/resource.c 2010-05-25 09:31:21.000000000 +0200 +++ head-2010-05-25/kernel/resource.c 2010-04-15 10:17:16.000000000 +0200 @@ -343,6 +343,7 @@ int walk_system_ram_range(unsigned long #endif +#if !defined(CONFIG_XEN) || !defined(CONFIG_ARCH_HAS_WALK_MEMORY) static int __is_ram(unsigned long pfn, unsigned long nr_pages, void *arg) { return 1; @@ -355,6 +356,7 @@ int __weak page_is_ram(unsigned long pfn { return walk_system_ram_range(pfn, 1, NULL, __is_ram) == 1; } +#endif /* * Find empty slot in the resource tree given range and alignment. --- head-2010-05-25.orig/lib/swiotlb-xen.c 2010-03-24 16:00:05.000000000 +0100 +++ head-2010-05-25/lib/swiotlb-xen.c 2010-04-15 10:54:48.000000000 +0200 @@ -25,6 +25,8 @@ #include #include #include +#include + #include #include #include --- head-2010-05-25.orig/mm/page_alloc.c 2010-03-24 15:25:21.000000000 +0100 +++ head-2010-05-25/mm/page_alloc.c 2010-03-25 16:31:14.000000000 +0100 @@ -4696,7 +4696,8 @@ static void __setup_per_zone_wmarks(void high = percpu_pagelist_fraction ? zone->present_pages / percpu_pagelist_fraction : 5 * zone_batchsize(zone); - setup_pagelist_highmark(zone_pcp(zone, cpu), high); + setup_pagelist_highmark( + per_cpu_ptr(zone->pageset, cpu), high); } } #endif