qubes-linux-kernel/patches.xen/xen3-patch-2.6.34

3738 lines
114 KiB
Plaintext
Raw Normal View History

From: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>
Subject: Linux: 2.6.34
Patch-mainline: 2.6.34
This patch contains the differences between 2.6.33 and 2.6.34.
Acked-by: Jeff Mahoney <jeffm@suse.com>
Automatically created from "patches.kernel.org/patch-2.6.34" by xen-port-patches.py
--- head-2010-05-25.orig/arch/x86/Kconfig 2010-03-24 16:00:05.000000000 +0100
+++ head-2010-05-25/arch/x86/Kconfig 2010-03-25 16:41:03.000000000 +0100
@@ -106,7 +106,7 @@ config SBUS
bool
config NEED_DMA_MAP_STATE
- def_bool (X86_64 || DMAR || DMA_API_DEBUG)
+ def_bool (X86_64 || DMAR || DMA_API_DEBUG || SWIOTLB)
config GENERIC_ISA_DMA
def_bool y
--- head-2010-05-25.orig/arch/x86/ia32/ia32entry-xen.S 2010-03-24 16:00:05.000000000 +0100
+++ head-2010-05-25/arch/x86/ia32/ia32entry-xen.S 2010-05-12 09:08:52.000000000 +0200
@@ -456,7 +456,7 @@ ia32_sys_call_table:
.quad quiet_ni_syscall /* old mpx syscall holder */
.quad sys_setpgid
.quad quiet_ni_syscall /* old ulimit syscall holder */
- .quad sys32_olduname
+ .quad sys_olduname
.quad sys_umask /* 60 */
.quad sys_chroot
.quad compat_sys_ustat
@@ -479,7 +479,7 @@ ia32_sys_call_table:
.quad compat_sys_settimeofday
.quad sys_getgroups16 /* 80 */
.quad sys_setgroups16
- .quad sys32_old_select
+ .quad compat_sys_old_select
.quad sys_symlink
.quad sys_lstat
.quad sys_readlink /* 85 */
@@ -506,7 +506,7 @@ ia32_sys_call_table:
.quad compat_sys_newstat
.quad compat_sys_newlstat
.quad compat_sys_newfstat
- .quad sys32_uname
+ .quad sys_uname
.quad stub32_iopl /* 110 */
.quad sys_vhangup
.quad quiet_ni_syscall /* old "idle" system call */
@@ -519,7 +519,7 @@ ia32_sys_call_table:
.quad stub32_sigreturn
.quad stub32_clone /* 120 */
.quad sys_setdomainname
- .quad sys_uname
+ .quad sys_newuname
.quad sys_modify_ldt
.quad compat_sys_adjtimex
.quad sys32_mprotect /* 125 */
--- head-2010-05-25.orig/arch/x86/include/asm/i8259.h 2010-05-25 09:31:21.000000000 +0200
+++ head-2010-05-25/arch/x86/include/asm/i8259.h 2010-03-25 11:31:58.000000000 +0100
@@ -54,11 +54,13 @@ extern struct irq_chip i8259A_chip;
struct legacy_pic {
int nr_legacy_irqs;
+#ifndef CONFIG_XEN
struct irq_chip *chip;
void (*mask_all)(void);
void (*restore_mask)(void);
void (*init)(int auto_eoi);
int (*irq_pending)(unsigned int irq);
+#endif
void (*make_irq)(unsigned int irq);
};
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/fixmap.h 2010-03-24 15:32:27.000000000 +0100
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/fixmap.h 2010-04-15 10:29:09.000000000 +0200
@@ -82,6 +82,9 @@ enum fixed_addresses {
#endif
FIX_DBGP_BASE,
FIX_EARLYCON_MEM_BASE,
+#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
+ FIX_OHCI1394_BASE,
+#endif
#ifdef CONFIG_X86_LOCAL_APIC
FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
#endif
@@ -125,17 +128,20 @@ enum fixed_addresses {
* 256 temporary boot-time mappings, used by early_ioremap(),
* before ioremap() is functional.
*
- * We round it up to the next 256 pages boundary so that we
- * can have a single pgd entry and a single pte table:
+ * If necessary we round it up to the next 256 pages boundary so
+ * that we can have a single pgd entry and a single pte table:
*/
#define NR_FIX_BTMAPS 64
#define FIX_BTMAPS_SLOTS 4
- FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 256 -
- (__end_of_permanent_fixed_addresses & 255),
- FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_SLOTS - 1,
-#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
- FIX_OHCI1394_BASE,
-#endif
+#define TOTAL_FIX_BTMAPS (NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS)
+ FIX_BTMAP_END =
+ (__end_of_permanent_fixed_addresses ^
+ (__end_of_permanent_fixed_addresses + TOTAL_FIX_BTMAPS - 1)) &
+ -PTRS_PER_PTE
+ ? __end_of_permanent_fixed_addresses + TOTAL_FIX_BTMAPS -
+ (__end_of_permanent_fixed_addresses & (TOTAL_FIX_BTMAPS - 1))
+ : __end_of_permanent_fixed_addresses,
+ FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1,
#ifdef CONFIG_X86_32
FIX_WP_TEST,
#endif
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/io.h 2010-03-24 15:25:06.000000000 +0100
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/io.h 2010-05-12 09:09:25.000000000 +0200
@@ -1,8 +1,42 @@
#ifndef _ASM_X86_IO_H
#define _ASM_X86_IO_H
+/*
+ * This file contains the definitions for the x86 IO instructions
+ * inb/inw/inl/outb/outw/outl and the "string versions" of the same
+ * (insb/insw/insl/outsb/outsw/outsl). You can also use "pausing"
+ * versions of the single-IO instructions (inb_p/inw_p/..).
+ *
+ * This file is not meant to be obfuscating: it's just complicated
+ * to (a) handle it all in a way that makes gcc able to optimize it
+ * as well as possible and (b) trying to avoid writing the same thing
+ * over and over again with slight variations and possibly making a
+ * mistake somewhere.
+ */
+
+/*
+ * Thanks to James van Artsdalen for a better timing-fix than
+ * the two short jumps: using outb's to a nonexistent port seems
+ * to guarantee better timings even on fast machines.
+ *
+ * On the other hand, I'd like to be sure of a non-existent port:
+ * I feel a bit unsafe about using 0x80 (should be safe, though)
+ *
+ * Linus
+ */
+
+ /*
+ * Bit simplified and optimized by Jan Hubicka
+ * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999.
+ *
+ * isa_memset_io, isa_memcpy_fromio, isa_memcpy_toio added,
+ * isa_read[wl] and isa_write[wl] fixed
+ * - Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ */
+
#define ARCH_HAS_IOREMAP_WC
+#include <linux/string.h>
#include <linux/compiler.h>
#include <asm-generic/int-ll64.h>
#include <asm/page.h>
@@ -84,8 +118,6 @@ static inline void writeq(__u64 val, vol
#define readq readq
#define writeq writeq
-#define native_io_delay xen_io_delay
-
/**
* virt_to_phys - map virtual addresses to physical
* @address: address to remap
@@ -181,11 +213,110 @@ static inline void __iomem *ioremap(reso
extern void iounmap(volatile void __iomem *addr);
-#ifdef CONFIG_X86_32
-# include "../../asm/io_32.h"
-#else
-# include "../../asm/io_64.h"
+#ifdef __KERNEL__
+
+#include <asm-generic/iomap.h>
+
+#include <linux/vmalloc.h>
+
+/*
+ * Convert a virtual cached pointer to an uncached pointer
+ */
+#define xlate_dev_kmem_ptr(p) p
+
+static inline void
+memset_io(volatile void __iomem *addr, unsigned char val, size_t count)
+{
+ memset((void __force *)addr, val, count);
+}
+
+static inline void
+memcpy_fromio(void *dst, const volatile void __iomem *src, size_t count)
+{
+ memcpy(dst, (const void __force *)src, count);
+}
+
+static inline void
+memcpy_toio(volatile void __iomem *dst, const void *src, size_t count)
+{
+ memcpy((void __force *)dst, src, count);
+}
+
+/*
+ * Cache management
+ *
+ * This needed for two cases
+ * 1. Out of order aware processors
+ * 2. Accidentally out of order processors (PPro errata #51)
+ */
+
+static inline void flush_write_buffers(void)
+{
+#if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE)
+ asm volatile("lock; addl $0,0(%%esp)": : :"memory");
+#endif
+}
+
+#endif /* __KERNEL__ */
+
+extern void native_io_delay(void);
+
+extern int io_delay_type;
+extern void io_delay_init(void);
+
+static inline void slow_down_io(void)
+{
+ native_io_delay();
+#ifdef REALLY_SLOW_IO
+ native_io_delay();
+ native_io_delay();
+ native_io_delay();
#endif
+}
+
+#define BUILDIO(bwl, bw, type) \
+static inline void out##bwl(unsigned type value, int port) \
+{ \
+ asm volatile("out" #bwl " %" #bw "0, %w1" \
+ : : "a"(value), "Nd"(port)); \
+} \
+ \
+static inline unsigned type in##bwl(int port) \
+{ \
+ unsigned type value; \
+ asm volatile("in" #bwl " %w1, %" #bw "0" \
+ : "=a"(value) : "Nd"(port)); \
+ return value; \
+} \
+ \
+static inline void out##bwl##_p(unsigned type value, int port) \
+{ \
+ out##bwl(value, port); \
+ slow_down_io(); \
+} \
+ \
+static inline unsigned type in##bwl##_p(int port) \
+{ \
+ unsigned type value = in##bwl(port); \
+ slow_down_io(); \
+ return value; \
+} \
+ \
+static inline void outs##bwl(int port, const void *addr, unsigned long count) \
+{ \
+ asm volatile("rep; outs" #bwl \
+ : "+S"(addr), "+c"(count) : "d"(port)); \
+} \
+ \
+static inline void ins##bwl(int port, void *addr, unsigned long count) \
+{ \
+ asm volatile("rep; ins" #bwl \
+ : "+D"(addr), "+c"(count) : "d"(port)); \
+}
+
+BUILDIO(b, b, char)
+BUILDIO(w, w, short)
+BUILDIO(l, , int)
#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
@@ -200,8 +331,6 @@ extern void iounmap(volatile void __iome
&& bvec_to_pseudophys(vec1) + (vec1)->bv_len \
== bvec_to_pseudophys(vec2))
-#undef __ISA_IO_base
-
#endif
extern void *xlate_dev_mem_ptr(unsigned long phys);
@@ -223,6 +352,7 @@ extern void __iomem *early_ioremap(resou
extern void __iomem *early_memremap(resource_size_t phys_addr,
unsigned long size);
extern void early_iounmap(void __iomem *addr, unsigned long size);
+extern void fixup_early_ioremap(void);
#define IO_SPACE_LIMIT 0xffff
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/irq_vectors.h 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/irq_vectors.h 2010-03-29 18:11:31.000000000 +0200
@@ -3,11 +3,9 @@
#define MCE_VECTOR 0x12
+#define IA32_SYSCALL_VECTOR 0x80
#ifdef CONFIG_X86_32
# define SYSCALL_VECTOR 0x80
-# define IA32_SYSCALL_VECTOR 0x80
-#else
-# define IA32_SYSCALL_VECTOR 0x80
#endif
#define RESCHEDULE_VECTOR 0
@@ -57,9 +55,17 @@ static inline int invalid_vm86_irq(int i
* are bound using the provided bind/unbind functions.
*/
#define PIRQ_BASE 0
+/* PHYSDEVOP_pirq_eoi_gmfn restriction: */
+#define PIRQ_MAX(n) ((n) < (1 << (PAGE_SHIFT + 3)) - NR_VECTORS \
+ ? (n) : (1 << (PAGE_SHIFT + 3)) - NR_VECTORS)
+
+#define IO_APIC_VECTOR_LIMIT PIRQ_MAX(32 * MAX_IO_APICS)
-#define CPU_VECTOR_LIMIT ( 8 * NR_CPUS )
-#define IO_APIC_VECTOR_LIMIT ( 32 * MAX_IO_APICS )
+#ifdef CONFIG_SPARSE_IRQ
+# define CPU_VECTOR_LIMIT PIRQ_MAX(64 * NR_CPUS)
+#else
+# define CPU_VECTOR_LIMIT PIRQ_MAX(32 * NR_CPUS)
+#endif
#ifdef CONFIG_X86_IO_APIC
# if !defined(NR_CPUS) || !defined(MAX_IO_APICS)
@@ -69,10 +75,11 @@ static inline int invalid_vm86_irq(int i
(CPU_VECTOR_LIMIT > IO_APIC_VECTOR_LIMIT ? \
(NR_VECTORS + CPU_VECTOR_LIMIT) : \
(NR_VECTORS + IO_APIC_VECTOR_LIMIT))
-# elif NR_CPUS < MAX_IO_APICS
-# define NR_PIRQS (NR_VECTORS + 4*CPU_VECTOR_LIMIT)
# else
-# define NR_PIRQS (NR_VECTORS + IO_APIC_VECTOR_LIMIT)
+# define NR_PIRQS \
+ (CPU_VECTOR_LIMIT < IO_APIC_VECTOR_LIMIT ? \
+ (NR_VECTORS + CPU_VECTOR_LIMIT) : \
+ (NR_VECTORS + IO_APIC_VECTOR_LIMIT))
# endif
#elif defined(CONFIG_XEN_PCIDEV_FRONTEND)
# define NR_PIRQS (NR_VECTORS + CPU_VECTOR_LIMIT)
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/pci.h 2010-03-24 15:32:27.000000000 +0100
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/pci.h 2010-03-25 17:13:42.000000000 +0100
@@ -48,8 +48,15 @@ static inline int pci_proc_domain(struct
#ifdef CONFIG_PCI
extern unsigned int pcibios_assign_all_busses(void);
+extern int pci_legacy_init(void);
+# ifdef CONFIG_ACPI
+# define x86_default_pci_init pci_acpi_init
+# else
+# define x86_default_pci_init pci_legacy_init
+# endif
#else
-#define pcibios_assign_all_busses() 0
+# define pcibios_assign_all_busses() 0
+# define x86_default_pci_init NULL
#endif
#include <asm/hypervisor.h>
@@ -97,41 +104,14 @@ extern void pci_iommu_alloc(void);
#define PCI_DMA_BUS_IS_PHYS 0
-#if defined(CONFIG_X86_64) || defined(CONFIG_DMAR) || defined(CONFIG_DMA_API_DEBUG) \
- || defined(CONFIG_SWIOTLB)
-
-#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \
- dma_addr_t ADDR_NAME;
-#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \
- __u32 LEN_NAME;
-#define pci_unmap_addr(PTR, ADDR_NAME) \
- ((PTR)->ADDR_NAME)
-#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) \
- (((PTR)->ADDR_NAME) = (VAL))
-#define pci_unmap_len(PTR, LEN_NAME) \
- ((PTR)->LEN_NAME)
-#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \
- (((PTR)->LEN_NAME) = (VAL))
-
-#else
-
-#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) dma_addr_t ADDR_NAME[0];
-#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) unsigned LEN_NAME[0];
-#define pci_unmap_addr(PTR, ADDR_NAME) sizeof((PTR)->ADDR_NAME)
-#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) \
- do { break; } while (pci_unmap_addr(PTR, ADDR_NAME))
-#define pci_unmap_len(PTR, LEN_NAME) sizeof((PTR)->LEN_NAME)
-#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \
- do { break; } while (pci_unmap_len(PTR, LEN_NAME))
-
-#endif
-
#endif /* __KERNEL__ */
#ifdef CONFIG_X86_64
#include "../../asm/pci_64.h"
#endif
+void dma32_reserve_bootmem(void);
+
/* implement the pci_ DMA API in terms of the generic device dma_ one */
#include <asm-generic/pci-dma-compat.h>
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/pgalloc.h 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/pgalloc.h 2010-03-25 16:41:03.000000000 +0100
@@ -27,6 +27,11 @@ pmd_t *early_get_pmd(unsigned long va);
#endif
/*
+ * Flags to use when allocating a user page table page.
+ */
+extern gfp_t __userpte_alloc_gfp;
+
+/*
* Allocate and free page tables.
*/
extern pgd_t *pgd_alloc(struct mm_struct *);
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/pgtable_32.h 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/pgtable_32.h 2010-04-15 10:48:32.000000000 +0200
@@ -18,7 +18,6 @@
#include <linux/threads.h>
#include <linux/bitops.h>
-#include <linux/slab.h>
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/sched.h>
@@ -79,7 +78,7 @@ do { \
* The i386 doesn't have any external MMU info: the kernel page
* tables contain all the necessary information.
*/
-#define update_mmu_cache(vma, address, pte) do { } while (0)
+#define update_mmu_cache(vma, address, ptep) do { } while (0)
void make_lowmem_page_readonly(void *va, unsigned int feature);
void make_lowmem_page_writable(void *va, unsigned int feature);
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/pgtable_64.h 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/pgtable_64.h 2010-03-25 16:41:03.000000000 +0100
@@ -136,7 +136,7 @@ static inline int pgd_large(pgd_t pgd) {
#define pte_unmap(pte) /* NOP */
#define pte_unmap_nested(pte) /* NOP */
-#define update_mmu_cache(vma, address, pte) do { } while (0)
+#define update_mmu_cache(vma, address, ptep) do { } while (0)
/* Encode and de-code a swap entry */
#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/smp.h 2010-03-24 15:32:27.000000000 +0100
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/smp.h 2010-04-26 11:32:06.000000000 +0200
@@ -135,6 +135,8 @@ int native_cpu_disable(void);
void native_cpu_die(unsigned int cpu);
void native_play_dead(void);
void play_dead_common(void);
+void wbinvd_on_cpu(int cpu);
+int wbinvd_on_all_cpus(void);
#else /* CONFIG_XEN */
@@ -162,8 +164,19 @@ static inline int num_booting_cpus(void)
{
return cpumask_weight(cpu_callout_mask);
}
+#elif /* !CONFIG_SMP && */ !defined(CONFIG_XEN)
+#define wbinvd_on_cpu(cpu) wbinvd()
+static inline int wbinvd_on_all_cpus(void)
+{
+ wbinvd();
+ return 0;
+}
#endif /* CONFIG_SMP */
+#ifdef CONFIG_XEN
+int wbinvd_on_all_cpus(void);
+#endif
+
extern unsigned disabled_cpus __cpuinitdata;
#include <asm/smp-processor-id.h>
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/system.h 2010-03-24 16:00:05.000000000 +0100
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/system.h 2010-03-25 16:41:14.000000000 +0100
@@ -31,7 +31,7 @@ extern void show_regs_common(void);
"movl %P[task_canary](%[next]), %%ebx\n\t" \
"movl %%ebx, "__percpu_arg([stack_canary])"\n\t"
#define __switch_canary_oparam \
- , [stack_canary] "=m" (per_cpu_var(stack_canary.canary))
+ , [stack_canary] "=m" (stack_canary.canary)
#define __switch_canary_iparam \
, [task_canary] "i" (offsetof(struct task_struct, stack_canary))
#else /* CC_STACKPROTECTOR */
@@ -113,7 +113,7 @@ do { \
"movq %P[task_canary](%%rsi),%%r8\n\t" \
"movq %%r8,"__percpu_arg([gs_canary])"\n\t"
#define __switch_canary_oparam \
- , [gs_canary] "=m" (per_cpu_var(irq_stack_union.stack_canary))
+ , [gs_canary] "=m" (irq_stack_union.stack_canary)
#define __switch_canary_iparam \
, [task_canary] "i" (offsetof(struct task_struct, stack_canary))
#else /* CC_STACKPROTECTOR */
@@ -132,7 +132,7 @@ do { \
__switch_canary \
"movq %P[thread_info](%%rsi),%%r8\n\t" \
"movq %%rax,%%rdi\n\t" \
- "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \
+ "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \
"jnz ret_from_fork\n\t" \
RESTORE_CONTEXT \
: "=a" (last) \
@@ -142,7 +142,7 @@ do { \
[ti_flags] "i" (offsetof(struct thread_info, flags)), \
[_tif_fork] "i" (_TIF_FORK), \
[thread_info] "i" (offsetof(struct task_struct, stack)), \
- [current_task] "m" (per_cpu_var(current_task)) \
+ [current_task] "m" (current_task) \
__switch_canary_iparam \
: "memory", "cc" __EXTRA_CLOBBER)
#endif
--- head-2010-05-25.orig/arch/x86/kernel/apic/io_apic-xen.c 2010-03-24 16:00:05.000000000 +0100
+++ head-2010-05-25/arch/x86/kernel/apic/io_apic-xen.c 2010-05-12 09:09:25.000000000 +0200
@@ -36,6 +36,7 @@
#include <linux/freezer.h>
#include <linux/kthread.h>
#include <linux/jiffies.h> /* time_after() */
+#include <linux/slab.h>
#ifdef CONFIG_ACPI
#include <acpi/acpi_bus.h>
#endif
@@ -69,9 +70,12 @@
#include <xen/evtchn.h>
/* Fake i8259 */
-#define make_8259A_irq(_irq) (io_apic_irqs &= ~(1UL<<(_irq)))
-#define disable_8259A_irq(_irq) ((void)0)
-#define i8259A_irq_pending(_irq) (0)
+static void make_8259A_irq(unsigned int irq) { io_apic_irqs &= ~(1UL<<irq); }
+static const struct legacy_pic xen_legacy_pic = {
+ .nr_legacy_irqs = NR_IRQS_LEGACY,
+ .make_irq = make_8259A_irq
+};
+#define legacy_pic (&xen_legacy_pic)
unsigned long io_apic_irqs;
#endif /* CONFIG_XEN */
@@ -86,9 +90,9 @@ unsigned long io_apic_irqs;
*/
int sis_apic_bug = -1;
-static DEFINE_SPINLOCK(ioapic_lock);
+static DEFINE_RAW_SPINLOCK(ioapic_lock);
#ifndef CONFIG_XEN
-static DEFINE_SPINLOCK(vector_lock);
+static DEFINE_RAW_SPINLOCK(vector_lock);
#endif
/*
@@ -110,12 +114,8 @@ struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCE
int mp_irq_entries;
#ifndef CONFIG_XEN
-/* Number of legacy interrupts */
-static int nr_legacy_irqs __read_mostly = NR_IRQS_LEGACY;
/* GSI interrupts */
static int nr_irqs_gsi = NR_IRQS_LEGACY;
-#else
-#define nr_legacy_irqs NR_IRQS_LEGACY
#endif
#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
@@ -160,33 +160,10 @@ static struct irq_pin_list *get_one_free
/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
#ifdef CONFIG_SPARSE_IRQ
-static struct irq_cfg irq_cfgx[] = {
+static struct irq_cfg irq_cfgx[NR_IRQS_LEGACY];
#else
-static struct irq_cfg irq_cfgx[NR_IRQS] = {
+static struct irq_cfg irq_cfgx[NR_IRQS];
#endif
- [0] = { .vector = IRQ0_VECTOR, },
- [1] = { .vector = IRQ1_VECTOR, },
- [2] = { .vector = IRQ2_VECTOR, },
- [3] = { .vector = IRQ3_VECTOR, },
- [4] = { .vector = IRQ4_VECTOR, },
- [5] = { .vector = IRQ5_VECTOR, },
- [6] = { .vector = IRQ6_VECTOR, },
- [7] = { .vector = IRQ7_VECTOR, },
- [8] = { .vector = IRQ8_VECTOR, },
- [9] = { .vector = IRQ9_VECTOR, },
- [10] = { .vector = IRQ10_VECTOR, },
- [11] = { .vector = IRQ11_VECTOR, },
- [12] = { .vector = IRQ12_VECTOR, },
- [13] = { .vector = IRQ13_VECTOR, },
- [14] = { .vector = IRQ14_VECTOR, },
- [15] = { .vector = IRQ15_VECTOR, },
-};
-
-void __init io_apic_disable_legacy(void)
-{
- nr_legacy_irqs = 0;
- nr_irqs_gsi = 0;
-}
int __init arch_early_irq_init(void)
{
@@ -196,6 +173,11 @@ int __init arch_early_irq_init(void)
int node;
int i;
+ if (!legacy_pic->nr_legacy_irqs) {
+ nr_irqs_gsi = 0;
+ io_apic_irqs = ~0UL;
+ }
+
cfg = irq_cfgx;
count = ARRAY_SIZE(irq_cfgx);
node= cpu_to_node(boot_cpu_id);
@@ -205,8 +187,14 @@ int __init arch_early_irq_init(void)
desc->chip_data = &cfg[i];
zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node);
zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node);
- if (i < nr_legacy_irqs)
- cpumask_setall(cfg[i].domain);
+ /*
+ * For legacy IRQ's, start with assigning irq0 to irq15 to
+ * IRQ0_VECTOR to IRQ15_VECTOR on cpu 0.
+ */
+ if (i < legacy_pic->nr_legacy_irqs) {
+ cfg[i].vector = IRQ0_VECTOR + i;
+ cpumask_set_cpu(0, cfg[i].domain);
+ }
}
return 0;
@@ -451,7 +439,7 @@ static bool io_apic_level_ack_pending(st
struct irq_pin_list *entry;
unsigned long flags;
- spin_lock_irqsave(&ioapic_lock, flags);
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
for_each_irq_pin(entry, cfg->irq_2_pin) {
unsigned int reg;
int pin;
@@ -460,11 +448,11 @@ static bool io_apic_level_ack_pending(st
reg = io_apic_read(entry->apic, 0x10 + pin*2);
/* Is the remote IRR bit set? */
if (reg & IO_APIC_REDIR_REMOTE_IRR) {
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
return true;
}
}
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
return false;
}
@@ -480,10 +468,10 @@ static struct IO_APIC_route_entry ioapic
{
union entry_union eu;
unsigned long flags;
- spin_lock_irqsave(&ioapic_lock, flags);
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
return eu.entry;
}
#endif
@@ -507,9 +495,9 @@ __ioapic_write_entry(int apic, int pin,
void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
{
unsigned long flags;
- spin_lock_irqsave(&ioapic_lock, flags);
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
__ioapic_write_entry(apic, pin, e);
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
}
#ifndef CONFIG_XEN
@@ -523,10 +511,10 @@ static void ioapic_mask_entry(int apic,
unsigned long flags;
union entry_union eu = { .entry.mask = 1 };
- spin_lock_irqsave(&ioapic_lock, flags);
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
io_apic_write(apic, 0x10 + 2*pin, eu.w1);
io_apic_write(apic, 0x11 + 2*pin, eu.w2);
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
}
/*
@@ -653,9 +641,9 @@ static void mask_IO_APIC_irq_desc(struct
BUG_ON(!cfg);
- spin_lock_irqsave(&ioapic_lock, flags);
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
__mask_IO_APIC_irq(cfg);
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
}
static void unmask_IO_APIC_irq_desc(struct irq_desc *desc)
@@ -663,9 +651,9 @@ static void unmask_IO_APIC_irq_desc(stru
struct irq_cfg *cfg = desc->chip_data;
unsigned long flags;
- spin_lock_irqsave(&ioapic_lock, flags);
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
__unmask_IO_APIC_irq(cfg);
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
}
static void mask_IO_APIC_irq(unsigned int irq)
@@ -922,7 +910,7 @@ static int __init find_isa_irq_apic(int
*/
static int EISA_ELCR(unsigned int irq)
{
- if (irq < nr_legacy_irqs) {
+ if (irq < legacy_pic->nr_legacy_irqs) {
unsigned int port = 0x4d0 + (irq >> 3);
return (inb(port) >> (irq & 7)) & 1;
}
@@ -1198,12 +1186,12 @@ void lock_vector_lock(void)
/* Used to the online set of cpus does not change
* during assign_irq_vector.
*/
- spin_lock(&vector_lock);
+ raw_spin_lock(&vector_lock);
}
void unlock_vector_lock(void)
{
- spin_unlock(&vector_lock);
+ raw_spin_unlock(&vector_lock);
}
static int
@@ -1220,7 +1208,8 @@ __assign_irq_vector(int irq, struct irq_
* Also, we've got to be careful not to trash gate
* 0x80, because int 0x80 is hm, kind of importantish. ;)
*/
- static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
+ static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
+ static int current_offset = VECTOR_OFFSET_START % 8;
unsigned int old_vector;
int cpu, err;
cpumask_var_t tmp_mask;
@@ -1256,7 +1245,7 @@ next:
if (vector >= first_system_vector) {
/* If out of vectors on large boxen, must share them. */
offset = (offset + 1) % 8;
- vector = FIRST_DEVICE_VECTOR + offset;
+ vector = FIRST_EXTERNAL_VECTOR + offset;
}
if (unlikely(current_vector == vector))
continue;
@@ -1294,9 +1283,9 @@ int assign_irq_vector(int irq, struct ir
int err;
unsigned long flags;
- spin_lock_irqsave(&vector_lock, flags);
+ raw_spin_lock_irqsave(&vector_lock, flags);
err = __assign_irq_vector(irq, cfg, mask);
- spin_unlock_irqrestore(&vector_lock, flags);
+ raw_spin_unlock_irqrestore(&vector_lock, flags);
return err;
}
@@ -1330,14 +1319,27 @@ static void __clear_irq_vector(int irq,
void __setup_vector_irq(int cpu)
{
/* Initialize vector_irq on a new cpu */
- /* This function must be called with vector_lock held */
int irq, vector;
struct irq_cfg *cfg;
struct irq_desc *desc;
+ /*
+ * vector_lock will make sure that we don't run into irq vector
+ * assignments that might be happening on another cpu in parallel,
+ * while we setup our initial vector to irq mappings.
+ */
+ raw_spin_lock(&vector_lock);
/* Mark the inuse vectors */
for_each_irq_desc(irq, desc) {
cfg = desc->chip_data;
+
+ /*
+ * If it is a legacy IRQ handled by the legacy PIC, this cpu
+ * will be part of the irq_cfg's domain.
+ */
+ if (irq < legacy_pic->nr_legacy_irqs && !IO_APIC_IRQ(irq))
+ cpumask_set_cpu(cpu, cfg->domain);
+
if (!cpumask_test_cpu(cpu, cfg->domain))
continue;
vector = cfg->vector;
@@ -1353,6 +1355,7 @@ void __setup_vector_irq(int cpu)
if (!cpumask_test_cpu(cpu, cfg->domain))
per_cpu(vector_irq, cpu)[vector] = -1;
}
+ raw_spin_unlock(&vector_lock);
}
static struct irq_chip ioapic_chip;
@@ -1508,6 +1511,16 @@ static void setup_IO_APIC_irq(int apic_i
cfg = desc->chip_data;
+#ifndef CONFIG_XEN
+ /*
+ * For legacy irqs, cfg->domain starts with cpu 0 for legacy
+ * controllers like 8259. Now that IO-APIC can handle this irq, update
+ * the cfg->domain.
+ */
+ if (irq < legacy_pic->nr_legacy_irqs && cpumask_test_cpu(0, cfg->domain))
+ apic->vector_allocation_domain(0, cfg->domain);
+#endif
+
if (assign_irq_vector(irq, cfg, apic->target_cpus()))
return;
@@ -1533,8 +1546,10 @@ static void setup_IO_APIC_irq(int apic_i
}
ioapic_register_intr(irq, desc, trigger);
- if (irq < nr_legacy_irqs)
- disable_8259A_irq(irq);
+#ifndef CONFIG_XEN
+ if (irq < legacy_pic->nr_legacy_irqs)
+ legacy_pic->chip->mask(irq);
+#endif
ioapic_write_entry(apic_id, pin, entry);
}
@@ -1545,7 +1560,7 @@ static struct {
static void __init setup_IO_APIC_irqs(void)
{
- int apic_id = 0, pin, idx, irq;
+ int apic_id, pin, idx, irq;
int notcon = 0;
struct irq_desc *desc;
struct irq_cfg *cfg;
@@ -1553,14 +1568,7 @@ static void __init setup_IO_APIC_irqs(vo
apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
-#ifdef CONFIG_ACPI
- if (!acpi_disabled && acpi_ioapic) {
- apic_id = mp_find_ioapic(0);
- if (apic_id < 0)
- apic_id = 0;
- }
-#endif
-
+ for (apic_id = 0; apic_id < nr_ioapics; apic_id++)
for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) {
idx = find_irq_entry(apic_id, pin, mp_INT);
if (idx == -1) {
@@ -1582,6 +1590,9 @@ static void __init setup_IO_APIC_irqs(vo
irq = pin_2_irq(idx, apic_id, pin);
+ if ((apic_id > 0) && (irq > 16))
+ continue;
+
#ifdef CONFIG_XEN
if (irq < PIRQ_BASE || irq >= PIRQ_BASE + nr_pirqs)
continue;
@@ -1615,6 +1626,60 @@ static void __init setup_IO_APIC_irqs(vo
" (apicid-pin) not connected\n");
}
+/*
+ * for the gsit that is not in first ioapic
+ * but could not use acpi_register_gsi()
+ * like some special sci in IBM x3330
+ */
+void setup_IO_APIC_irq_extra(u32 gsi)
+{
+ int apic_id = 0, pin, idx, irq;
+ int node = cpu_to_node(boot_cpu_id);
+ struct irq_desc *desc;
+ struct irq_cfg *cfg;
+
+ /*
+ * Convert 'gsi' to 'ioapic.pin'.
+ */
+ apic_id = mp_find_ioapic(gsi);
+ if (apic_id < 0)
+ return;
+
+ pin = mp_find_ioapic_pin(apic_id, gsi);
+ idx = find_irq_entry(apic_id, pin, mp_INT);
+ if (idx == -1)
+ return;
+
+ irq = pin_2_irq(idx, apic_id, pin);
+#ifdef CONFIG_XEN
+ if (irq < PIRQ_BASE || irq >= PIRQ_BASE + nr_pirqs)
+ return;
+#endif
+#ifdef CONFIG_SPARSE_IRQ
+ desc = irq_to_desc(irq);
+ if (desc)
+ return;
+#endif
+ desc = irq_to_desc_alloc_node(irq, node);
+ if (!desc) {
+ printk(KERN_INFO "can not get irq_desc for %d\n", irq);
+ return;
+ }
+
+ cfg = desc->chip_data;
+ add_pin_to_irq_node(cfg, node, apic_id, pin);
+
+ if (test_bit(pin, mp_ioapic_routing[apic_id].pin_programmed)) {
+ pr_debug("Pin %d-%d already programmed\n",
+ mp_ioapics[apic_id].apicid, pin);
+ return;
+ }
+ set_bit(pin, mp_ioapic_routing[apic_id].pin_programmed);
+
+ setup_IO_APIC_irq(apic_id, pin, irq, desc,
+ irq_trigger(idx), irq_polarity(idx));
+}
+
#ifndef CONFIG_XEN
/*
* Set up the timer pin, possibly with the 8259A-master behind.
@@ -1679,14 +1744,14 @@ __apicdebuginit(void) print_IO_APIC(void
for (apic = 0; apic < nr_ioapics; apic++) {
- spin_lock_irqsave(&ioapic_lock, flags);
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
reg_00.raw = io_apic_read(apic, 0);
reg_01.raw = io_apic_read(apic, 1);
if (reg_01.bits.version >= 0x10)
reg_02.raw = io_apic_read(apic, 2);
if (reg_01.bits.version >= 0x20)
reg_03.raw = io_apic_read(apic, 3);
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
printk("\n");
printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid);
@@ -1725,7 +1790,7 @@ __apicdebuginit(void) print_IO_APIC(void
printk(KERN_DEBUG ".... IRQ redirection table:\n");
printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
- " Stat Dmod Deli Vect: \n");
+ " Stat Dmod Deli Vect:\n");
for (i = 0; i <= reg_01.bits.entries; i++) {
struct IO_APIC_route_entry entry;
@@ -1903,12 +1968,12 @@ __apicdebuginit(void) print_PIC(void)
unsigned int v;
unsigned long flags;
- if (!nr_legacy_irqs)
+ if (!legacy_pic->nr_legacy_irqs)
return;
printk(KERN_DEBUG "\nprinting PIC contents\n");
- spin_lock_irqsave(&i8259A_lock, flags);
+ raw_spin_lock_irqsave(&i8259A_lock, flags);
v = inb(0xa1) << 8 | inb(0x21);
printk(KERN_DEBUG "... PIC IMR: %04x\n", v);
@@ -1922,7 +1987,7 @@ __apicdebuginit(void) print_PIC(void)
outb(0x0a,0xa0);
outb(0x0a,0x20);
- spin_unlock_irqrestore(&i8259A_lock, flags);
+ raw_spin_unlock_irqrestore(&i8259A_lock, flags);
printk(KERN_DEBUG "... PIC ISR: %04x\n", v);
@@ -1984,13 +2049,13 @@ void __init enable_IO_APIC(void)
* The number of IO-APIC IRQ registers (== #pins):
*/
for (apic = 0; apic < nr_ioapics; apic++) {
- spin_lock_irqsave(&ioapic_lock, flags);
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
reg_01.raw = io_apic_read(apic, 1);
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
nr_ioapic_registers[apic] = reg_01.bits.entries+1;
}
- if (!nr_legacy_irqs)
+ if (!legacy_pic->nr_legacy_irqs)
return;
#ifndef CONFIG_XEN
@@ -2052,7 +2117,7 @@ void disable_IO_APIC(void)
*/
clear_IO_APIC();
- if (!nr_legacy_irqs)
+ if (!legacy_pic->nr_legacy_irqs)
return;
/*
@@ -2131,9 +2196,9 @@ void __init setup_ioapic_ids_from_mpc(vo
for (apic_id = 0; apic_id < nr_ioapics; apic_id++) {
/* Read the register 0 value */
- spin_lock_irqsave(&ioapic_lock, flags);
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
reg_00.raw = io_apic_read(apic_id, 0);
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
old_id = mp_ioapics[apic_id].apicid;
@@ -2192,16 +2257,16 @@ void __init setup_ioapic_ids_from_mpc(vo
mp_ioapics[apic_id].apicid);
reg_00.bits.ID = mp_ioapics[apic_id].apicid;
- spin_lock_irqsave(&ioapic_lock, flags);
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
io_apic_write(apic_id, 0, reg_00.raw);
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
/*
* Sanity check
*/
- spin_lock_irqsave(&ioapic_lock, flags);
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
reg_00.raw = io_apic_read(apic_id, 0);
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
if (reg_00.bits.ID != mp_ioapics[apic_id].apicid)
printk("could not set ID!\n");
else
@@ -2284,15 +2349,15 @@ static unsigned int startup_ioapic_irq(u
unsigned long flags;
struct irq_cfg *cfg;
- spin_lock_irqsave(&ioapic_lock, flags);
- if (irq < nr_legacy_irqs) {
- disable_8259A_irq(irq);
- if (i8259A_irq_pending(irq))
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
+ if (irq < legacy_pic->nr_legacy_irqs) {
+ legacy_pic->chip->mask(irq);
+ if (legacy_pic->irq_pending(irq))
was_pending = 1;
}
cfg = irq_cfg(irq);
__unmask_IO_APIC_irq(cfg);
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
return was_pending;
}
@@ -2303,9 +2368,9 @@ static int ioapic_retrigger_irq(unsigned
struct irq_cfg *cfg = irq_cfg(irq);
unsigned long flags;
- spin_lock_irqsave(&vector_lock, flags);
+ raw_spin_lock_irqsave(&vector_lock, flags);
apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector);
- spin_unlock_irqrestore(&vector_lock, flags);
+ raw_spin_unlock_irqrestore(&vector_lock, flags);
return 1;
}
@@ -2398,14 +2463,14 @@ set_ioapic_affinity_irq_desc(struct irq_
irq = desc->irq;
cfg = desc->chip_data;
- spin_lock_irqsave(&ioapic_lock, flags);
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
ret = set_desc_affinity(desc, mask, &dest);
if (!ret) {
/* Only the high 8 bits are valid. */
dest = SET_APIC_LOGICAL_ID(dest);
__target_IO_APIC_irq(irq, dest, cfg);
}
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
return ret;
}
@@ -2575,6 +2640,9 @@ void irq_force_complete_move(int irq)
struct irq_desc *desc = irq_to_desc(irq);
struct irq_cfg *cfg = desc->chip_data;
+ if (!cfg)
+ return;
+
__irq_complete_move(&desc, cfg->vector);
}
#else
@@ -2640,9 +2708,9 @@ static void eoi_ioapic_irq(struct irq_de
irq = desc->irq;
cfg = desc->chip_data;
- spin_lock_irqsave(&ioapic_lock, flags);
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
__eoi_ioapic_irq(irq, cfg);
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
}
static void ack_apic_level(unsigned int irq)
@@ -2825,8 +2893,8 @@ static inline void init_IO_APIC_traps(vo
* so default to an old-fashioned 8259
* interrupt if we can..
*/
- if (irq < nr_legacy_irqs)
- make_8259A_irq(irq);
+ if (irq < legacy_pic->nr_legacy_irqs)
+ legacy_pic->make_irq(irq);
else
/* Strange. Oh, well.. */
desc->chip = &no_irq_chip;
@@ -2984,7 +3052,7 @@ static inline void __init check_timer(vo
/*
* get/set the timer IRQ vector:
*/
- disable_8259A_irq(0);
+ legacy_pic->chip->mask(0);
assign_irq_vector(0, cfg, apic->target_cpus());
/*
@@ -2997,7 +3065,7 @@ static inline void __init check_timer(vo
* automatically.
*/
apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
- init_8259A(1);
+ legacy_pic->init(1);
#ifdef CONFIG_X86_32
{
unsigned int ver;
@@ -3056,7 +3124,7 @@ static inline void __init check_timer(vo
if (timer_irq_works()) {
if (nmi_watchdog == NMI_IO_APIC) {
setup_nmi();
- enable_8259A_irq(0);
+ legacy_pic->chip->unmask(0);
}
if (disable_timer_pin_1 > 0)
clear_IO_APIC_pin(0, pin1);
@@ -3079,14 +3147,14 @@ static inline void __init check_timer(vo
*/
replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2);
setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
- enable_8259A_irq(0);
+ legacy_pic->chip->unmask(0);
if (timer_irq_works()) {
apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
timer_through_8259 = 1;
if (nmi_watchdog == NMI_IO_APIC) {
- disable_8259A_irq(0);
+ legacy_pic->chip->mask(0);
setup_nmi();
- enable_8259A_irq(0);
+ legacy_pic->chip->unmask(0);
}
goto out;
}
@@ -3094,7 +3162,7 @@ static inline void __init check_timer(vo
* Cleanup, just in case ...
*/
local_irq_disable();
- disable_8259A_irq(0);
+ legacy_pic->chip->mask(0);
clear_IO_APIC_pin(apic2, pin2);
apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
}
@@ -3113,22 +3181,22 @@ static inline void __init check_timer(vo
lapic_register_intr(0, desc);
apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */
- enable_8259A_irq(0);
+ legacy_pic->chip->unmask(0);
if (timer_irq_works()) {
apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
goto out;
}
local_irq_disable();
- disable_8259A_irq(0);
+ legacy_pic->chip->mask(0);
apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
apic_printk(APIC_QUIET, KERN_INFO
"...trying to set up timer as ExtINT IRQ...\n");
- init_8259A(0);
- make_8259A_irq(0);
+ legacy_pic->init(0);
+ legacy_pic->make_irq(0);
apic_write(APIC_LVT0, APIC_DM_EXTINT);
unlock_ExtINT_logic();
@@ -3177,7 +3245,7 @@ void __init setup_IO_APIC(void)
* calling enable_IO_APIC() is moved to setup_local_APIC for BP
*/
#endif
- io_apic_irqs = nr_legacy_irqs ? ~PIC_IRQS : ~0UL;
+ io_apic_irqs = legacy_pic->nr_legacy_irqs ? ~PIC_IRQS : ~0UL;
apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
/*
@@ -3190,7 +3258,7 @@ void __init setup_IO_APIC(void)
#endif
setup_IO_APIC_irqs();
init_IO_APIC_traps();
- if (nr_legacy_irqs)
+ if (legacy_pic->nr_legacy_irqs)
check_timer();
}
@@ -3248,13 +3316,13 @@ static int ioapic_resume(struct sys_devi
data = container_of(dev, struct sysfs_ioapic_data, dev);
entry = data->entry;
- spin_lock_irqsave(&ioapic_lock, flags);
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
reg_00.raw = io_apic_read(dev->id, 0);
if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) {
reg_00.bits.ID = mp_ioapics[dev->id].apicid;
io_apic_write(dev->id, 0, reg_00.raw);
}
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
ioapic_write_entry(dev->id, i, entry[i]);
@@ -3317,7 +3385,7 @@ unsigned int create_irq_nr(unsigned int
if (irq_want < nr_irqs_gsi)
irq_want = nr_irqs_gsi;
- spin_lock_irqsave(&vector_lock, flags);
+ raw_spin_lock_irqsave(&vector_lock, flags);
for (new = irq_want; new < nr_irqs; new++) {
desc_new = irq_to_desc_alloc_node(new, node);
if (!desc_new) {
@@ -3336,14 +3404,11 @@ unsigned int create_irq_nr(unsigned int
irq = new;
break;
}
- spin_unlock_irqrestore(&vector_lock, flags);
+ raw_spin_unlock_irqrestore(&vector_lock, flags);
+
+ if (irq > 0)
+ dynamic_irq_init_keep_chip_data(irq);
- if (irq > 0) {
- dynamic_irq_init(irq);
- /* restore it, in case dynamic_irq_init clear it */
- if (desc_new)
- desc_new->chip_data = cfg_new;
- }
return irq;
}
@@ -3365,20 +3430,13 @@ int create_irq(void)
void destroy_irq(unsigned int irq)
{
unsigned long flags;
- struct irq_cfg *cfg;
- struct irq_desc *desc;
- /* store it, in case dynamic_irq_cleanup clear it */
- desc = irq_to_desc(irq);
- cfg = desc->chip_data;
- dynamic_irq_cleanup(irq);
- /* connect back irq_cfg */
- desc->chip_data = cfg;
+ dynamic_irq_cleanup_keep_chip_data(irq);
free_irte(irq);
- spin_lock_irqsave(&vector_lock, flags);
- __clear_irq_vector(irq, cfg);
- spin_unlock_irqrestore(&vector_lock, flags);
+ raw_spin_lock_irqsave(&vector_lock, flags);
+ __clear_irq_vector(irq, get_irq_chip_data(irq));
+ raw_spin_unlock_irqrestore(&vector_lock, flags);
}
#endif /* CONFIG_XEN */
@@ -3916,9 +3974,9 @@ int __init io_apic_get_redir_entries (in
union IO_APIC_reg_01 reg_01;
unsigned long flags;
- spin_lock_irqsave(&ioapic_lock, flags);
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
reg_01.raw = io_apic_read(ioapic, 1);
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
return reg_01.bits.entries;
}
@@ -4010,7 +4068,7 @@ static int __io_apic_set_pci_routing(str
/*
* IRQs < 16 are already in the irq_2_pin[] map
*/
- if (irq >= nr_legacy_irqs) {
+ if (irq >= legacy_pic->nr_legacy_irqs) {
cfg = desc->chip_data;
if (add_pin_to_irq_node_nopanic(cfg, node, ioapic, pin)) {
printk(KERN_INFO "can not add pin %d for irq %d\n",
@@ -4090,9 +4148,9 @@ int __init io_apic_get_unique_id(int ioa
if (physids_empty(apic_id_map))
apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map);
- spin_lock_irqsave(&ioapic_lock, flags);
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
reg_00.raw = io_apic_read(ioapic, 0);
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
if (apic_id >= get_physical_broadcast()) {
printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
@@ -4126,10 +4184,10 @@ int __init io_apic_get_unique_id(int ioa
if (reg_00.bits.ID != apic_id) {
reg_00.bits.ID = apic_id;
- spin_lock_irqsave(&ioapic_lock, flags);
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
io_apic_write(ioapic, 0, reg_00.raw);
reg_00.raw = io_apic_read(ioapic, 0);
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
/* Sanity check */
if (reg_00.bits.ID != apic_id) {
@@ -4151,9 +4209,9 @@ int __init io_apic_get_version(int ioapi
union IO_APIC_reg_01 reg_01;
unsigned long flags;
- spin_lock_irqsave(&ioapic_lock, flags);
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
reg_01.raw = io_apic_read(ioapic, 1);
- spin_unlock_irqrestore(&ioapic_lock, flags);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
return reg_01.bits.version;
}
@@ -4186,27 +4244,23 @@ int acpi_get_override_irq(int bus_irq, i
#ifdef CONFIG_SMP
void __init setup_ioapic_dest(void)
{
- int pin, ioapic = 0, irq, irq_entry;
+ int pin, ioapic, irq, irq_entry;
struct irq_desc *desc;
const struct cpumask *mask;
if (skip_ioapic_setup == 1)
return;
-#ifdef CONFIG_ACPI
- if (!acpi_disabled && acpi_ioapic) {
- ioapic = mp_find_ioapic(0);
- if (ioapic < 0)
- ioapic = 0;
- }
-#endif
-
+ for (ioapic = 0; ioapic < nr_ioapics; ioapic++)
for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
irq_entry = find_irq_entry(ioapic, pin, mp_INT);
if (irq_entry == -1)
continue;
irq = pin_2_irq(irq_entry, ioapic, pin);
+ if ((ioapic > 0) && (irq > 16))
+ continue;
+
desc = irq_to_desc(irq);
/*
@@ -4394,3 +4448,26 @@ void __init mp_register_ioapic(int id, u
nr_ioapics++;
}
+
+#ifdef CONFIG_X86_MRST
+/* Enable IOAPIC early just for system timer */
+void __init pre_init_apic_IRQ0(void)
+{
+ struct irq_cfg *cfg;
+ struct irq_desc *desc;
+
+ printk(KERN_INFO "Early APIC setup for system timer0\n");
+#ifndef CONFIG_SMP
+ phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid);
+#endif
+ desc = irq_to_desc_alloc_node(0, 0);
+
+ setup_local_APIC();
+
+ cfg = irq_cfg(0);
+ add_pin_to_irq_node(cfg, 0, 0, 0);
+ set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
+
+ setup_IO_APIC_irq(0, 0, 0, desc, 0, 0);
+}
+#endif
--- head-2010-05-25.orig/arch/x86/kernel/cpu/intel_cacheinfo.c 2010-05-25 09:20:14.000000000 +0200
+++ head-2010-05-25/arch/x86/kernel/cpu/intel_cacheinfo.c 2010-05-25 09:25:34.000000000 +0200
@@ -301,7 +301,7 @@ struct _cache_attr {
ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count);
};
-#ifdef CONFIG_CPU_SUP_AMD
+#if defined(CONFIG_CPU_SUP_AMD) && !defined(CONFIG_XEN)
static unsigned int __cpuinit amd_calc_l3_indices(void)
{
/*
@@ -873,7 +873,7 @@ static struct attribute *default_attrs[]
static struct attribute *default_l3_attrs[] = {
DEFAULT_SYSFS_CACHE_ATTRS,
-#ifdef CONFIG_CPU_SUP_AMD
+#if defined(CONFIG_CPU_SUP_AMD) && !defined(CONFIG_XEN)
&cache_disable_0.attr,
&cache_disable_1.attr,
#endif
--- head-2010-05-25.orig/arch/x86/kernel/cpu/mcheck/mce_dom0.c 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/arch/x86/kernel/cpu/mcheck/mce_dom0.c 2010-04-15 13:39:58.000000000 +0200
@@ -1,6 +1,7 @@
#include <linux/init.h>
#include <linux/types.h>
#include <linux/kernel.h>
+#include <linux/slab.h>
#include <xen/interface/xen.h>
#include <xen/evtchn.h>
#include <xen/interface/vcpu.h>
--- head-2010-05-25.orig/arch/x86/kernel/cpu/mtrr/main-xen.c 2010-03-24 15:32:27.000000000 +0100
+++ head-2010-05-25/arch/x86/kernel/cpu/mtrr/main-xen.c 2010-03-25 11:33:03.000000000 +0100
@@ -25,12 +25,12 @@ void generic_get_mtrr(unsigned int reg,
*type = op.u.read_memtype.type;
}
-struct mtrr_ops generic_mtrr_ops = {
+const struct mtrr_ops generic_mtrr_ops = {
.use_intel_if = 1,
.get = generic_get_mtrr,
};
-struct mtrr_ops *mtrr_if = &generic_mtrr_ops;
+const struct mtrr_ops *mtrr_if = &generic_mtrr_ops;
unsigned int num_var_ranges;
unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
--- head-2010-05-25.orig/arch/x86/kernel/e820-xen.c 2010-03-24 16:00:05.000000000 +0100
+++ head-2010-05-25/arch/x86/kernel/e820-xen.c 2010-04-15 10:48:32.000000000 +0200
@@ -12,17 +12,10 @@
#include <linux/types.h>
#include <linux/init.h>
#include <linux/bootmem.h>
-#include <linux/ioport.h>
-#include <linux/string.h>
-#include <linux/kexec.h>
-#include <linux/module.h>
-#include <linux/mm.h>
#include <linux/pfn.h>
#include <linux/suspend.h>
#include <linux/firmware-map.h>
-#include <asm/pgtable.h>
-#include <asm/page.h>
#include <asm/e820.h>
#include <asm/proto.h>
#include <asm/setup.h>
@@ -550,31 +543,55 @@ u64 __init e820_remove_range(u64 start,
int checktype)
{
int i;
+ u64 end;
u64 real_removed_size = 0;
if (size > (ULLONG_MAX - start))
size = ULLONG_MAX - start;
+ end = start + size;
+ printk(KERN_DEBUG "e820 remove range: %016Lx - %016Lx ",
+ (unsigned long long) start,
+ (unsigned long long) end);
+ if (checktype)
+ e820_print_type(old_type);
+ printk(KERN_CONT "\n");
+
for (i = 0; i < e820.nr_map; i++) {
struct e820entry *ei = &e820.map[i];
u64 final_start, final_end;
+ u64 ei_end;
if (checktype && ei->type != old_type)
continue;
+
+ ei_end = ei->addr + ei->size;
/* totally covered? */
- if (ei->addr >= start &&
- (ei->addr + ei->size) <= (start + size)) {
+ if (ei->addr >= start && ei_end <= end) {
real_removed_size += ei->size;
memset(ei, 0, sizeof(struct e820entry));
continue;
}
+
+ /* new range is totally covered? */
+ if (ei->addr < start && ei_end > end) {
+ e820_add_region(end, ei_end - end, ei->type);
+ ei->size = start - ei->addr;
+ real_removed_size += size;
+ continue;
+ }
+
/* partially covered */
final_start = max(start, ei->addr);
- final_end = min(start + size, ei->addr + ei->size);
+ final_end = min(end, ei_end);
if (final_start >= final_end)
continue;
real_removed_size += final_end - final_start;
+ /*
+ * left range could be head or tail, so need to update
+ * size at first.
+ */
ei->size -= final_end - final_start;
if (ei->addr < final_start)
continue;
@@ -769,320 +786,44 @@ core_initcall(e820_mark_nvs_memory);
#endif
/*
- * Early reserved memory areas.
- */
-#define MAX_EARLY_RES 32
-
-struct early_res {
- u64 start, end;
- char name[16];
- char overlap_ok;
-};
-static struct early_res early_res[MAX_EARLY_RES] __initdata = {
-#ifndef CONFIG_XEN
- { 0, PAGE_SIZE, "BIOS data page", 1 }, /* BIOS data page */
-#if defined(CONFIG_X86_32) && defined(CONFIG_X86_TRAMPOLINE)
- /*
- * But first pinch a few for the stack/trampoline stuff
- * FIXME: Don't need the extra page at 4K, but need to fix
- * trampoline before removing it. (see the GDT stuff)
- */
- { PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE", 1 },
-#endif
-#endif
- {}
-};
-
-static int __init find_overlapped_early(u64 start, u64 end)
-{
- int i;
- struct early_res *r;
-
- for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
- r = &early_res[i];
- if (end > r->start && start < r->end)
- break;
- }
-
- return i;
-}
-
-/*
- * Drop the i-th range from the early reservation map,
- * by copying any higher ranges down one over it, and
- * clearing what had been the last slot.
- */
-static void __init drop_range(int i)
-{
- int j;
-
- for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++)
- ;
-
- memmove(&early_res[i], &early_res[i + 1],
- (j - 1 - i) * sizeof(struct early_res));
-
- early_res[j - 1].end = 0;
-}
-
-/*
- * Split any existing ranges that:
- * 1) are marked 'overlap_ok', and
- * 2) overlap with the stated range [start, end)
- * into whatever portion (if any) of the existing range is entirely
- * below or entirely above the stated range. Drop the portion
- * of the existing range that overlaps with the stated range,
- * which will allow the caller of this routine to then add that
- * stated range without conflicting with any existing range.
+ * Find a free area with specified alignment in a specific range.
*/
-static void __init drop_overlaps_that_are_ok(u64 start, u64 end)
+u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align)
{
int i;
- struct early_res *r;
- u64 lower_start, lower_end;
- u64 upper_start, upper_end;
- char name[16];
- for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
- r = &early_res[i];
+ for (i = 0; i < e820.nr_map; i++) {
+ struct e820entry *ei = &e820.map[i];
+ u64 addr;
+ u64 ei_start, ei_last;
- /* Continue past non-overlapping ranges */
- if (end <= r->start || start >= r->end)
+ if (ei->type != E820_RAM)
continue;
- /*
- * Leave non-ok overlaps as is; let caller
- * panic "Overlapping early reservations"
- * when it hits this overlap.
- */
- if (!r->overlap_ok)
- return;
-
- /*
- * We have an ok overlap. We will drop it from the early
- * reservation map, and add back in any non-overlapping
- * portions (lower or upper) as separate, overlap_ok,
- * non-overlapping ranges.
- */
-
- /* 1. Note any non-overlapping (lower or upper) ranges. */
- strncpy(name, r->name, sizeof(name) - 1);
-
- lower_start = lower_end = 0;
- upper_start = upper_end = 0;
- if (r->start < start) {
- lower_start = r->start;
- lower_end = start;
- }
- if (r->end > end) {
- upper_start = end;
- upper_end = r->end;
- }
-
- /* 2. Drop the original ok overlapping range */
- drop_range(i);
-
- i--; /* resume for-loop on copied down entry */
-
- /* 3. Add back in any non-overlapping ranges. */
- if (lower_end)
- reserve_early_overlap_ok(lower_start, lower_end, name);
- if (upper_end)
- reserve_early_overlap_ok(upper_start, upper_end, name);
- }
-}
-
-static void __init __reserve_early(u64 start, u64 end, char *name,
- int overlap_ok)
-{
- int i;
- struct early_res *r;
-
- i = find_overlapped_early(start, end);
- if (i >= MAX_EARLY_RES)
- panic("Too many early reservations");
- r = &early_res[i];
- if (r->end)
- panic("Overlapping early reservations "
- "%llx-%llx %s to %llx-%llx %s\n",
- start, end - 1, name?name:"", r->start,
- r->end - 1, r->name);
- r->start = start;
- r->end = end;
- r->overlap_ok = overlap_ok;
- if (name)
- strncpy(r->name, name, sizeof(r->name) - 1);
-}
-
-/*
- * A few early reservtations come here.
- *
- * The 'overlap_ok' in the name of this routine does -not- mean it
- * is ok for these reservations to overlap an earlier reservation.
- * Rather it means that it is ok for subsequent reservations to
- * overlap this one.
- *
- * Use this entry point to reserve early ranges when you are doing
- * so out of "Paranoia", reserving perhaps more memory than you need,
- * just in case, and don't mind a subsequent overlapping reservation
- * that is known to be needed.
- *
- * The drop_overlaps_that_are_ok() call here isn't really needed.
- * It would be needed if we had two colliding 'overlap_ok'
- * reservations, so that the second such would not panic on the
- * overlap with the first. We don't have any such as of this
- * writing, but might as well tolerate such if it happens in
- * the future.
- */
-void __init reserve_early_overlap_ok(u64 start, u64 end, char *name)
-{
- drop_overlaps_that_are_ok(start, end);
- __reserve_early(start, end, name, 1);
-}
-
-/*
- * Most early reservations come here.
- *
- * We first have drop_overlaps_that_are_ok() drop any pre-existing
- * 'overlap_ok' ranges, so that we can then reserve this memory
- * range without risk of panic'ing on an overlapping overlap_ok
- * early reservation.
- */
-void __init reserve_early(u64 start, u64 end, char *name)
-{
- if (start >= end)
- return;
-
- drop_overlaps_that_are_ok(start, end);
- __reserve_early(start, end, name, 0);
-}
-
-void __init free_early(u64 start, u64 end)
-{
- struct early_res *r;
- int i;
-
- i = find_overlapped_early(start, end);
- r = &early_res[i];
- if (i >= MAX_EARLY_RES || r->end != end || r->start != start)
- panic("free_early on not reserved area: %llx-%llx!",
- start, end - 1);
-
- drop_range(i);
-}
-
-void __init early_res_to_bootmem(u64 start, u64 end)
-{
- int i, count;
- u64 final_start, final_end;
-
- count = 0;
- for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++)
- count++;
-
- printk(KERN_INFO "(%d early reservations) ==> bootmem [%010llx - %010llx]\n",
- count, start, end);
- for (i = 0; i < count; i++) {
- struct early_res *r = &early_res[i];
- printk(KERN_INFO " #%d [%010llx - %010llx] %16s", i,
- r->start, r->end, r->name);
- final_start = max(start, r->start);
- final_end = min(end, r->end);
- if (final_start >= final_end) {
- printk(KERN_CONT "\n");
- continue;
- }
- printk(KERN_CONT " ==> [%010llx - %010llx]\n",
- final_start, final_end);
- reserve_bootmem_generic(final_start, final_end - final_start,
- BOOTMEM_DEFAULT);
- }
-}
+ ei_last = ei->addr + ei->size;
+ ei_start = ei->addr;
+ addr = find_early_area(ei_start, ei_last, start, end,
+ size, align);
-/* Check for already reserved areas */
-static inline int __init bad_addr(u64 *addrp, u64 size, u64 align)
-{
- int i;
- u64 addr = *addrp;
- int changed = 0;
- struct early_res *r;
-again:
- i = find_overlapped_early(addr, addr + size);
- r = &early_res[i];
- if (i < MAX_EARLY_RES && r->end) {
- *addrp = addr = round_up(r->end, align);
- changed = 1;
- goto again;
+ if (addr != -1ULL)
+ return addr;
}
- return changed;
+ return -1ULL;
}
-/* Check for already reserved areas */
-static inline int __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align)
+u64 __init find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align)
{
- int i;
- u64 addr = *addrp, last;
- u64 size = *sizep;
- int changed = 0;
-again:
- last = addr + size;
- for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
- struct early_res *r = &early_res[i];
- if (last > r->start && addr < r->start) {
- size = r->start - addr;
- changed = 1;
- goto again;
- }
- if (last > r->end && addr < r->end) {
- addr = round_up(r->end, align);
- size = last - addr;
- changed = 1;
- goto again;
- }
- if (last <= r->end && addr >= r->start) {
- (*sizep)++;
- return 0;
- }
- }
- if (changed) {
- *addrp = addr;
- *sizep = size;
- }
- return changed;
+ return find_e820_area(start, end, size, align);
}
-/*
- * Find a free area with specified alignment in a specific range.
- */
-u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align)
+u64 __init get_max_mapped(void)
{
- int i;
+ u64 end = max_pfn_mapped;
- for (i = 0; i < e820.nr_map; i++) {
- struct e820entry *ei = &e820.map[i];
- u64 addr, last;
- u64 ei_last;
+ end <<= PAGE_SHIFT;
- if (ei->type != E820_RAM)
- continue;
- addr = round_up(ei->addr, align);
- ei_last = ei->addr + ei->size;
- if (addr < start)
- addr = round_up(start, align);
- if (addr >= ei_last)
- continue;
- while (bad_addr(&addr, size, align) && addr+size <= ei_last)
- ;
- last = addr + size;
- if (last > ei_last)
- continue;
- if (last > end)
- continue;
- return addr;
- }
- return -1ULL;
+ return end;
}
-
/*
* Find next free range after *start
*/
@@ -1092,25 +833,19 @@ u64 __init find_e820_area_size(u64 start
for (i = 0; i < e820.nr_map; i++) {
struct e820entry *ei = &e820.map[i];
- u64 addr, last;
- u64 ei_last;
+ u64 addr;
+ u64 ei_start, ei_last;
if (ei->type != E820_RAM)
continue;
- addr = round_up(ei->addr, align);
+
ei_last = ei->addr + ei->size;
- if (addr < start)
- addr = round_up(start, align);
- if (addr >= ei_last)
- continue;
- *sizep = ei_last - addr;
- while (bad_addr_size(&addr, sizep, align) &&
- addr + *sizep <= ei_last)
- ;
- last = addr + *sizep;
- if (last > ei_last)
- continue;
- return addr;
+ ei_start = ei->addr;
+ addr = find_early_area_size(ei_start, ei_last, start,
+ sizep, align);
+
+ if (addr != -1ULL)
+ return addr;
}
return -1ULL;
@@ -1544,6 +1279,8 @@ void __init e820_reserve_resources_late(
end = MAX_RESOURCE_SIZE;
if (start >= end)
continue;
+ printk(KERN_DEBUG "reserve RAM buffer: %016llx - %016llx ",
+ start, end);
reserve_region_with_split(&iomem_resource, start, end,
"RAM buffer");
}
--- head-2010-05-25.orig/arch/x86/kernel/head32-xen.c 2010-03-24 16:00:05.000000000 +0100
+++ head-2010-05-25/arch/x86/kernel/head32-xen.c 2010-04-15 10:29:09.000000000 +0200
@@ -7,6 +7,7 @@
#include <linux/init.h>
#include <linux/start_kernel.h>
+#include <linux/mm.h>
#include <asm/setup.h>
#include <asm/sections.h>
@@ -32,15 +33,26 @@ static void __init i386_default_early_se
void __init i386_start_kernel(void)
{
+#ifdef CONFIG_X86_TRAMPOLINE
+ /*
+ * But first pinch a few for the stack/trampoline stuff
+ * FIXME: Don't need the extra page at 4K, but need to fix
+ * trampoline before removing it. (see the GDT stuff)
+ */
+ reserve_early_overlap_ok(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE,
+ "EX TRAMPOLINE");
+#endif
+
reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
#ifndef CONFIG_XEN
#ifdef CONFIG_BLK_DEV_INITRD
/* Reserve INITRD */
if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
+ /* Assume only end is not page aligned */
u64 ramdisk_image = boot_params.hdr.ramdisk_image;
u64 ramdisk_size = boot_params.hdr.ramdisk_size;
- u64 ramdisk_end = ramdisk_image + ramdisk_size;
+ u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
}
#endif
--- head-2010-05-25.orig/arch/x86/kernel/head_32-xen.S 2010-03-24 15:32:27.000000000 +0100
+++ head-2010-05-25/arch/x86/kernel/head_32-xen.S 2010-03-25 11:52:54.000000000 +0100
@@ -67,8 +67,8 @@ ENTRY(startup_32)
* The linker can't handle this by relocation. Manually set
* base address in stack canary segment descriptor.
*/
- movl $per_cpu__gdt_page,%eax
- movl $per_cpu__stack_canary,%ecx
+ movl $gdt_page,%eax
+ movl $stack_canary,%ecx
movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax)
shrl $16, %ecx
movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax)
@@ -79,7 +79,7 @@ ENTRY(startup_32)
# need to be preserved.
movl XEN_START_mfn_list(%esi), %ebx
- movl $(per_cpu__gdt_page - __PAGE_OFFSET), %eax
+ movl $(gdt_page - __PAGE_OFFSET), %eax
shrl $PAGE_SHIFT, %eax
movl (%ebx,%eax,4), %ecx
pushl %ecx # frame number for set_gdt below
@@ -89,7 +89,7 @@ ENTRY(startup_32)
shldl $PAGE_SHIFT, %ecx, %edx
shll $PAGE_SHIFT, %ecx
orl $_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_DIRTY, %ecx
- movl $per_cpu__gdt_page, %ebx
+ movl $gdt_page, %ebx
movl $__HYPERVISOR_update_va_mapping, %eax
int $0x82
--- head-2010-05-25.orig/arch/x86/kernel/ldt-xen.c 2010-03-24 15:32:27.000000000 +0100
+++ head-2010-05-25/arch/x86/kernel/ldt-xen.c 2010-04-15 10:48:32.000000000 +0200
@@ -7,6 +7,7 @@
*/
#include <linux/errno.h>
+#include <linux/gfp.h>
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/mm.h>
--- head-2010-05-25.orig/arch/x86/kernel/mpparse-xen.c 2010-03-24 16:00:05.000000000 +0100
+++ head-2010-05-25/arch/x86/kernel/mpparse-xen.c 2010-04-15 10:48:32.000000000 +0200
@@ -677,7 +677,7 @@ static void __init smp_reserve_memory(st
{
unsigned long size = get_mpc_size(mpf->physptr);
- reserve_early(mpf->physptr, mpf->physptr+size, "MP-table mpc");
+ reserve_early_overlap_ok(mpf->physptr, mpf->physptr+size, "MP-table mpc");
}
#endif
@@ -710,7 +710,7 @@ static int __init smp_scan_config(unsign
mpf, (u64)virt_to_phys(mpf));
mem = virt_to_phys(mpf);
- reserve_early(mem, mem + sizeof(*mpf), "MP-table mpf");
+ reserve_early_overlap_ok(mem, mem + sizeof(*mpf), "MP-table mpf");
if (mpf->physptr)
smp_reserve_memory(mpf);
#else
--- head-2010-05-25.orig/arch/x86/kernel/pci-dma-xen.c 2010-03-24 16:00:05.000000000 +0100
+++ head-2010-05-25/arch/x86/kernel/pci-dma-xen.c 2010-04-15 10:48:32.000000000 +0200
@@ -2,6 +2,7 @@
#include <linux/dma-debug.h>
#include <linux/dmar.h>
#include <linux/bootmem.h>
+#include <linux/gfp.h>
#include <linux/pci.h>
#include <linux/kmemleak.h>
@@ -38,7 +39,7 @@ int iommu_detected __read_mostly = 0;
* This variable becomes 1 if iommu=pt is passed on the kernel command line.
* If this variable is 1, IOMMU implementations do no DMA translation for
* devices and allow every device to access to whole physical memory. This is
- * useful if a user want to use an IOMMU only for KVM device assignment to
+ * useful if a user wants to use an IOMMU only for KVM device assignment to
* guests and not for driver dma translation.
*/
int iommu_pass_through __read_mostly;
@@ -65,7 +66,7 @@ int dma_set_mask(struct device *dev, u64
}
EXPORT_SYMBOL(dma_set_mask);
-#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
+#if defined(CONFIG_X86_64) && !defined(CONFIG_NUMA) && !defined(CONFIG_XEN)
static __initdata void *dma32_bootmem_ptr;
static unsigned long dma32_bootmem_size __initdata = (128ULL<<20);
@@ -116,6 +117,14 @@ static void __init dma32_free_bootmem(vo
dma32_bootmem_ptr = NULL;
dma32_bootmem_size = 0;
}
+#else
+void __init dma32_reserve_bootmem(void)
+{
+}
+static void __init dma32_free_bootmem(void)
+{
+}
+
#endif
static struct dma_map_ops swiotlb_dma_ops = {
@@ -137,10 +146,9 @@ static struct dma_map_ops swiotlb_dma_op
void __init pci_iommu_alloc(void)
{
-#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
/* free the range so iommu could get some range less than 4G */
dma32_free_bootmem();
-#endif
+
if (pci_swiotlb_detect())
goto out;
--- head-2010-05-25.orig/arch/x86/kernel/process-xen.c 2010-03-24 16:00:05.000000000 +0100
+++ head-2010-05-25/arch/x86/kernel/process-xen.c 2010-05-25 09:30:59.000000000 +0200
@@ -94,6 +94,13 @@ void exit_thread(void)
}
}
+void show_regs(struct pt_regs *regs)
+{
+ show_registers(regs);
+ show_trace(NULL, regs, (unsigned long *)kernel_stack_pointer(regs),
+ regs->bp);
+}
+
void show_regs_common(void)
{
const char *board, *product;
@@ -503,21 +510,39 @@ static int __cpuinit mwait_usable(const
}
/*
- * Check for AMD CPUs, which have potentially C1E support
+ * Check for AMD CPUs, where APIC timer interrupt does not wake up CPU from C1e.
+ * For more information see
+ * - Erratum #400 for NPT family 0xf and family 0x10 CPUs
+ * - Erratum #365 for family 0x11 (not affected because C1e not in use)
*/
static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c)
{
+ u64 val;
if (c->x86_vendor != X86_VENDOR_AMD)
- return 0;
-
- if (c->x86 < 0x0F)
- return 0;
+ goto no_c1e_idle;
/* Family 0x0f models < rev F do not have C1E */
- if (c->x86 == 0x0f && c->x86_model < 0x40)
- return 0;
+ if (c->x86 == 0x0F && c->x86_model >= 0x40)
+ return 1;
- return 1;
+ if (c->x86 == 0x10) {
+ /*
+ * check OSVW bit for CPUs that are not affected
+ * by erratum #400
+ */
+ if (cpu_has(c, X86_FEATURE_OSVW)) {
+ rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, val);
+ if (val >= 2) {
+ rdmsrl(MSR_AMD64_OSVW_STATUS, val);
+ if (!(val & BIT(1)))
+ goto no_c1e_idle;
+ }
+ }
+ return 1;
+ }
+
+no_c1e_idle:
+ return 0;
}
static cpumask_var_t c1e_mask;
@@ -586,7 +611,7 @@ void __cpuinit select_idle_routine(const
#ifndef CONFIG_XEN
#ifdef CONFIG_SMP
if (pm_idle == poll_idle && smp_num_siblings > 1) {
- printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
+ printk_once(KERN_WARNING "WARNING: polling idle and HT enabled,"
" performance may degrade.\n");
}
#endif
--- head-2010-05-25.orig/arch/x86/kernel/process_32-xen.c 2010-03-24 16:00:05.000000000 +0100
+++ head-2010-05-25/arch/x86/kernel/process_32-xen.c 2010-03-25 10:38:31.000000000 +0100
@@ -179,12 +179,6 @@ void __show_regs(struct pt_regs *regs, i
d6, d7);
}
-void show_regs(struct pt_regs *regs)
-{
- show_registers(regs);
- show_trace(NULL, regs, &regs->sp, regs->bp);
-}
-
void release_thread(struct task_struct *dead_task)
{
BUG_ON(dead_task->mm);
--- head-2010-05-25.orig/arch/x86/kernel/process_64-xen.c 2010-03-24 16:00:05.000000000 +0100
+++ head-2010-05-25/arch/x86/kernel/process_64-xen.c 2010-05-12 09:09:00.000000000 +0200
@@ -219,12 +219,6 @@ void __show_regs(struct pt_regs *regs, i
printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
}
-void show_regs(struct pt_regs *regs)
-{
- show_registers(regs);
- show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
-}
-
void xen_load_gs_index(unsigned gs)
{
WARN_ON(HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, gs));
@@ -295,12 +289,12 @@ int copy_thread(unsigned long clone_flag
set_tsk_thread_flag(p, TIF_FORK);
- p->thread.fs = me->thread.fs;
- p->thread.gs = me->thread.gs;
p->thread.io_bitmap_ptr = NULL;
savesegment(gs, p->thread.gsindex);
+ p->thread.gs = p->thread.gsindex ? 0 : me->thread.gs;
savesegment(fs, p->thread.fsindex);
+ p->thread.fs = p->thread.fsindex ? 0 : me->thread.fs;
savesegment(es, p->thread.es);
savesegment(ds, p->thread.ds);
--- head-2010-05-25.orig/arch/x86/kernel/setup-xen.c 2010-03-24 16:00:05.000000000 +0100
+++ head-2010-05-25/arch/x86/kernel/setup-xen.c 2010-04-15 10:48:32.000000000 +0200
@@ -55,7 +55,6 @@
#include <linux/stddef.h>
#include <linux/unistd.h>
#include <linux/ptrace.h>
-#include <linux/slab.h>
#include <linux/user.h>
#include <linux/delay.h>
@@ -151,7 +150,9 @@ EXPORT_SYMBOL(xen_start_info);
unsigned long max_low_pfn_mapped;
unsigned long max_pfn_mapped;
+#ifdef CONFIG_DMI
RESERVE_BRK(dmi_alloc, 65536);
+#endif
unsigned int boot_cpu_id __read_mostly;
@@ -348,15 +349,17 @@ static void __init reserve_brk(void)
static void __init relocate_initrd(void)
{
#ifndef CONFIG_XEN
+ /* Assume only end is not page aligned */
u64 ramdisk_image = boot_params.hdr.ramdisk_image;
u64 ramdisk_size = boot_params.hdr.ramdisk_size;
+ u64 area_size = PAGE_ALIGN(ramdisk_size);
u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT;
u64 ramdisk_here;
unsigned long slop, clen, mapaddr;
char *p, *q;
/* We need to move the initrd down into lowmem */
- ramdisk_here = find_e820_area(0, end_of_lowmem, ramdisk_size,
+ ramdisk_here = find_e820_area(0, end_of_lowmem, area_size,
PAGE_SIZE);
if (ramdisk_here == -1ULL)
@@ -365,7 +368,7 @@ static void __init relocate_initrd(void)
/* Note: this includes all the lowmem currently occupied by
the initrd, we rely on that fact to keep the data intact. */
- reserve_early(ramdisk_here, ramdisk_here + ramdisk_size,
+ reserve_early(ramdisk_here, ramdisk_here + area_size,
"NEW RAMDISK");
initrd_start = ramdisk_here + PAGE_OFFSET;
initrd_end = initrd_start + ramdisk_size;
@@ -416,10 +419,11 @@ static void __init relocate_initrd(void)
static void __init reserve_initrd(void)
{
+ /* Assume only end is not page aligned */
#ifndef CONFIG_XEN
u64 ramdisk_image = boot_params.hdr.ramdisk_image;
u64 ramdisk_size = boot_params.hdr.ramdisk_size;
- u64 ramdisk_end = ramdisk_image + ramdisk_size;
+ u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT;
if (!boot_params.hdr.type_of_loader ||
@@ -428,7 +432,7 @@ static void __init reserve_initrd(void)
#else
unsigned long ramdisk_image = __pa(xen_start_info->mod_start);
unsigned long ramdisk_size = xen_start_info->mod_len;
- unsigned long ramdisk_end = ramdisk_image + ramdisk_size;
+ unsigned long ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
unsigned long end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT;
if (!xen_start_info->mod_start || !ramdisk_size)
@@ -671,6 +675,18 @@ static int __init setup_elfcorehdr(char
early_param("elfcorehdr", setup_elfcorehdr);
#endif
+static __init void reserve_ibft_region(void)
+{
+ unsigned long addr, size = 0;
+
+ addr = find_ibft_region(&size);
+
+#ifndef CONFIG_XEN
+ if (size)
+ reserve_early_overlap_ok(addr, addr + size, "ibft");
+#endif
+}
+
#ifdef CONFIG_X86_RESERVE_LOW_64K
static int __init dmi_low_memory_corruption(const struct dmi_system_id *d)
{
@@ -734,6 +750,25 @@ static struct dmi_system_id __initdata b
{}
};
+#ifndef CONFIG_XEN
+static void __init trim_bios_range(void)
+{
+ /*
+ * A special case is the first 4Kb of memory;
+ * This is a BIOS owned area, not kernel ram, but generally
+ * not listed as such in the E820 table.
+ */
+ e820_update_range(0, PAGE_SIZE, E820_RAM, E820_RESERVED);
+ /*
+ * special case: Some BIOSen report the PC BIOS
+ * area (640->1Mb) as ram even though it is not.
+ * take them out.
+ */
+ e820_remove_range(BIOS_BEGIN, BIOS_END - BIOS_BEGIN, E820_RAM, 1);
+ sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
+}
+#endif
+
/*
* Determine if we were loaded by an EFI loader. If so, then we have also been
* passed the efi memmap, systab, etc., so we should use these data structures
@@ -940,7 +975,7 @@ void __init setup_arch(char **cmdline_p)
insert_resource(&iomem_resource, &data_resource);
insert_resource(&iomem_resource, &bss_resource);
-
+ trim_bios_range();
#ifdef CONFIG_X86_32
if (ppro_with_ram_bug()) {
e820_update_range(0x70000000ULL, 0x40000ULL, E820_RAM,
@@ -1007,6 +1042,8 @@ void __init setup_arch(char **cmdline_p)
*/
find_smp_config();
+ reserve_ibft_region();
+
reserve_trampoline_memory();
#ifdef CONFIG_ACPI_SLEEP
@@ -1077,17 +1114,11 @@ void __init setup_arch(char **cmdline_p)
#endif
initmem_init(0, max_pfn, acpi, k8);
-
-#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
- /*
- * dma32_reserve_bootmem() allocates bootmem which may conflict
- * with the crashkernel command line, so do that after
- * reserve_crashkernel()
- */
- dma32_reserve_bootmem();
+#ifndef CONFIG_NO_BOOTMEM
+ early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT);
#endif
- reserve_ibft_region();
+ dma32_reserve_bootmem();
#ifdef CONFIG_KVM_CLOCK
kvmclock_init();
--- head-2010-05-25.orig/arch/x86/kernel/smp-xen.c 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/arch/x86/kernel/smp-xen.c 2010-04-15 10:48:32.000000000 +0200
@@ -21,6 +21,7 @@
#include <linux/cache.h>
#include <linux/interrupt.h>
#include <linux/cpu.h>
+#include <linux/gfp.h>
#include <asm/mtrr.h>
#include <asm/tlbflush.h>
--- head-2010-05-25.orig/arch/x86/kernel/time-xen.c 2010-05-12 09:02:50.000000000 +0200
+++ head-2010-05-25/arch/x86/kernel/time-xen.c 2010-05-12 09:03:15.000000000 +0200
@@ -597,7 +597,7 @@ static cycle_t xen_clocksource_read(stru
#endif
}
-static void xen_clocksource_resume(void)
+static void xen_clocksource_resume(struct clocksource *cs)
{
extern void time_resume(void);
@@ -619,18 +619,18 @@ static struct clocksource clocksource_xe
struct vcpu_runstate_info *setup_runstate_area(unsigned int cpu)
{
struct vcpu_register_runstate_memory_area area;
- struct vcpu_runstate_info *runstate = &per_cpu(runstate, cpu);
+ struct vcpu_runstate_info *rs = &per_cpu(runstate, cpu);
int rc;
- set_xen_guest_handle(area.addr.h, runstate);
+ set_xen_guest_handle(area.addr.h, rs);
rc = HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, cpu, &area);
if (rc) {
BUILD_BUG_ON(RUNSTATE_running);
- memset(runstate, 0, sizeof(*runstate));
+ memset(rs, 0, sizeof(*rs));
WARN_ON(rc != -ENOSYS);
}
- return runstate;
+ return rs;
}
static void init_missing_ticks_accounting(unsigned int cpu)
--- head-2010-05-25.orig/arch/x86/kernel/traps-xen.c 2010-03-24 16:00:05.000000000 +0100
+++ head-2010-05-25/arch/x86/kernel/traps-xen.c 2010-03-25 16:41:03.000000000 +0100
@@ -527,6 +527,9 @@ dotraplinkage void __kprobes do_debug(st
get_debugreg(dr6, 6);
+ /* Filter out all the reserved bits which are preset to 1 */
+ dr6 &= ~DR6_RESERVED;
+
/* Catch kmemcheck conditions first of all! */
if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
return;
--- head-2010-05-25.orig/arch/x86/kernel/vsyscall_64-xen.c 2010-03-24 16:00:05.000000000 +0100
+++ head-2010-05-25/arch/x86/kernel/vsyscall_64-xen.c 2010-03-25 10:38:31.000000000 +0100
@@ -308,7 +308,8 @@ static int __init vsyscall_init(void)
register_sysctl_table(kernel_root_table2);
#endif
on_each_cpu(cpu_vsyscall_init, NULL, 1);
- hotcpu_notifier(cpu_vsyscall_notifier, 0);
+ /* notifier priority > KVM */
+ hotcpu_notifier(cpu_vsyscall_notifier, 30);
return 0;
}
--- head-2010-05-25.orig/arch/x86/kernel/x86_init-xen.c 2010-03-24 16:00:05.000000000 +0100
+++ head-2010-05-25/arch/x86/kernel/x86_init-xen.c 2010-03-25 17:21:48.000000000 +0100
@@ -5,8 +5,12 @@
*/
#include <linux/bitmap.h>
#include <linux/init.h>
+#include <linux/ioport.h>
+#include <linux/list.h>
+#include <linux/spinlock_types.h>
#include <linux/threads.h>
+#include <asm/pci_x86.h>
#include <asm/mpspec.h>
#include <asm/setup.h>
#include <asm/apic.h>
@@ -68,6 +72,12 @@ struct x86_init_ops x86_init __initdata
.iommu = {
.iommu_init = iommu_init_noop,
},
+
+ .pci = {
+ .init = x86_default_pci_init,
+ .init_irq = x86_default_pci_init_irq,
+ .fixup_irqs = x86_default_pci_fixup_irqs,
+ },
};
struct x86_platform_ops x86_platform = {
--- head-2010-05-25.orig/arch/x86/lib/Makefile 2010-03-24 15:01:37.000000000 +0100
+++ head-2010-05-25/arch/x86/lib/Makefile 2010-04-28 16:13:29.000000000 +0200
@@ -15,6 +15,7 @@ $(obj)/inat.o: $(obj)/inat-tables.c
clean-files := inat-tables.c
obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o
+obj-$(CONFIG_XEN) += cache-smp.o
lib-y := delay.o
lib-y += thunk_$(BITS).o
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ head-2010-05-25/arch/x86/lib/cache-smp-xen.c 2010-05-07 11:12:27.000000000 +0200
@@ -0,0 +1,27 @@
+#include <linux/smp.h>
+#include <linux/module.h>
+#include <asm/hypervisor.h>
+
+static void __wbinvd(void *dummy)
+{
+ wbinvd();
+}
+
+#ifndef CONFIG_XEN
+void wbinvd_on_cpu(int cpu)
+{
+ smp_call_function_single(cpu, __wbinvd, NULL, 1);
+}
+EXPORT_SYMBOL(wbinvd_on_cpu);
+#endif
+
+int wbinvd_on_all_cpus(void)
+{
+ struct mmuext_op op = { .cmd = MMUEXT_FLUSH_CACHE_GLOBAL };
+
+ if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) == 0)
+ return 0;
+ /* Best effort as fallback. */
+ return on_each_cpu(__wbinvd, NULL, 1);
+}
+EXPORT_SYMBOL(wbinvd_on_all_cpus);
--- head-2010-05-25.orig/arch/x86/mm/init-xen.c 2010-03-24 16:00:05.000000000 +0100
+++ head-2010-05-25/arch/x86/mm/init-xen.c 2010-04-15 10:48:32.000000000 +0200
@@ -1,3 +1,4 @@
+#include <linux/gfp.h>
#include <linux/initrd.h>
#include <linux/ioport.h>
#include <linux/swap.h>
@@ -283,12 +284,7 @@ unsigned long __init_refok init_memory_m
if (!after_bootmem)
find_early_table_space(end, use_pse, use_gbpages);
-#ifdef CONFIG_X86_32
- for (i = 0; i < nr_range; i++)
- kernel_physical_mapping_init(mr[i].start, mr[i].end,
- mr[i].page_size_mask);
- ret = end;
-#else /* CONFIG_X86_64 */
+#ifdef CONFIG_X86_64
#define addr_to_page(addr) \
((unsigned long *) \
((mfn_to_pfn(((addr) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT) \
@@ -320,12 +316,12 @@ unsigned long __init_refok init_memory_m
va += PAGE_SIZE;
}
}
+#undef addr_to_page
+#endif
for (i = 0; i < nr_range; i++)
ret = kernel_physical_mapping_init(mr[i].start, mr[i].end,
mr[i].page_size_mask);
-#undef addr_to_page
-#endif
#ifdef CONFIG_X86_32
early_ioremap_page_table_range_init();
@@ -377,11 +373,23 @@ int devmem_is_allowed(unsigned long page
void free_init_pages(char *what, unsigned long begin, unsigned long end)
{
- unsigned long addr = begin;
+ unsigned long addr;
+ unsigned long begin_aligned, end_aligned;
+
+ /* Make sure boundaries are page aligned */
+ begin_aligned = PAGE_ALIGN(begin);
+ end_aligned = end & PAGE_MASK;
+
+ if (WARN_ON(begin_aligned != begin || end_aligned != end)) {
+ begin = begin_aligned;
+ end = end_aligned;
+ }
- if (addr >= end)
+ if (begin >= end)
return;
+ addr = begin;
+
/*
* If debugging page accesses then do not free this memory but
* mark them not present - any buggy init-section access will
@@ -389,7 +397,7 @@ void free_init_pages(char *what, unsigne
*/
#ifdef CONFIG_DEBUG_PAGEALLOC
printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n",
- begin, PAGE_ALIGN(end));
+ begin, end);
set_memory_np(begin, (end - begin) >> PAGE_SHIFT);
#else
/*
@@ -404,8 +412,7 @@ void free_init_pages(char *what, unsigne
for (; addr < end; addr += PAGE_SIZE) {
ClearPageReserved(virt_to_page(addr));
init_page_count(virt_to_page(addr));
- memset((void *)(addr & ~(PAGE_SIZE-1)),
- POISON_FREE_INITMEM, PAGE_SIZE);
+ memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE);
#ifdef CONFIG_X86_64
if (addr >= __START_KERNEL_map) {
/* make_readonly() reports all kernel addresses. */
@@ -434,6 +441,15 @@ void free_initmem(void)
#ifdef CONFIG_BLK_DEV_INITRD
void free_initrd_mem(unsigned long start, unsigned long end)
{
- free_init_pages("initrd memory", start, end);
+ /*
+ * end could be not aligned, and We can not align that,
+ * decompresser could be confused by aligned initrd_end
+ * We already reserve the end partial page before in
+ * - i386_start_kernel()
+ * - x86_64_start_kernel()
+ * - relocate_initrd()
+ * So here We can do PAGE_ALIGN() safely to get partial page to be freed
+ */
+ free_init_pages("initrd memory", start, PAGE_ALIGN(end));
}
#endif
--- head-2010-05-25.orig/arch/x86/mm/init_32-xen.c 2010-03-24 16:00:05.000000000 +0100
+++ head-2010-05-25/arch/x86/mm/init_32-xen.c 2010-04-15 10:51:33.000000000 +0200
@@ -25,11 +25,11 @@
#include <linux/pfn.h>
#include <linux/poison.h>
#include <linux/bootmem.h>
-#include <linux/slab.h>
#include <linux/proc_fs.h>
#include <linux/memory_hotplug.h>
#include <linux/initrd.h>
#include <linux/cpumask.h>
+#include <linux/gfp.h>
#include <linux/dma-mapping.h>
#include <linux/scatterlist.h>
@@ -257,6 +257,7 @@ kernel_physical_mapping_init(unsigned lo
unsigned long page_size_mask)
{
int use_pse = page_size_mask == (1<<PG_LEVEL_2M);
+ unsigned long last_map_addr = end;
unsigned long start_pfn, end_pfn;
pgd_t *pgd_base = swapper_pg_dir;
int pgd_idx, pmd_idx, pte_ofs;
@@ -376,9 +377,10 @@ repeat:
prot = PAGE_KERNEL_EXEC;
pages_4k++;
- if (mapping_iter == 1)
+ if (mapping_iter == 1) {
set_pte(pte, pfn_pte(pfn, init_prot));
- else
+ last_map_addr = (pfn << PAGE_SHIFT) + PAGE_SIZE;
+ } else
set_pte(pte, pfn_pte(pfn, prot));
}
}
@@ -404,7 +406,7 @@ repeat:
mapping_iter = 2;
goto repeat;
}
- return 0;
+ return last_map_addr;
}
pte_t *kmap_pte;
@@ -750,6 +752,7 @@ static void __init zone_sizes_init(void)
free_area_init_nodes(max_zone_pfns);
}
+#ifndef CONFIG_NO_BOOTMEM
static unsigned long __init setup_node_bootmem(int nodeid,
unsigned long start_pfn,
unsigned long end_pfn,
@@ -766,13 +769,14 @@ static unsigned long __init setup_node_b
printk(KERN_INFO " node %d bootmap %08lx - %08lx\n",
nodeid, bootmap, bootmap + bootmap_size);
free_bootmem_with_active_regions(nodeid, end_pfn);
- early_res_to_bootmem(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
return bootmap + bootmap_size;
}
+#endif
void __init setup_bootmem_allocator(void)
{
+#ifndef CONFIG_NO_BOOTMEM
int nodeid;
unsigned long bootmap_size, bootmap;
unsigned long end_xen_pfn = min(max_low_pfn, xen_start_info->nr_pages);
@@ -787,11 +791,17 @@ void __init setup_bootmem_allocator(void
if (bootmap == -1L)
panic("Cannot find bootmem map of size %ld\n", bootmap_size);
reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP");
+#elif defined(CONFIG_XEN)
+ if (max_low_pfn > xen_start_info->nr_pages)
+ reserve_early(xen_start_info->nr_pages << PAGE_SHIFT,
+ max_low_pfn << PAGE_SHIFT, "BALLOON");
+#endif
printk(KERN_INFO " mapped low ram: 0 - %08lx\n",
max_pfn_mapped<<PAGE_SHIFT);
printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT);
+#ifndef CONFIG_NO_BOOTMEM
for_each_online_node(nodeid) {
unsigned long start_pfn, end_pfn;
@@ -809,6 +819,7 @@ void __init setup_bootmem_allocator(void
bootmap = setup_node_bootmem(nodeid, start_pfn, end_pfn,
bootmap);
}
+#endif
after_bootmem = 1;
}
--- head-2010-05-25.orig/arch/x86/mm/init_64-xen.c 2010-03-24 16:00:05.000000000 +0100
+++ head-2010-05-25/arch/x86/mm/init_64-xen.c 2010-04-15 10:48:32.000000000 +0200
@@ -32,6 +32,7 @@
#include <linux/module.h>
#include <linux/memory_hotplug.h>
#include <linux/nmi.h>
+#include <linux/gfp.h>
#include <asm/processor.h>
#include <asm/bios_ebda.h>
@@ -403,9 +404,13 @@ static inline int __meminit make_readonl
* No need for writable mapping of kernel image. This also ensures that
* page and descriptor tables embedded inside don't have writable
* mappings. Exclude the vsyscall area here, allowing alternative
- * instruction patching to work.
+ * instruction patching to work. The range must be in sync with that
+ * passed to reserve_early() (as "TEXT DATA BSS"), since all other
+ * regions can be allocated from under CONFIG_NO_BOOTMEM and thus must
+ * be writable.
*/
- if ((paddr >= __pa_symbol(&_text)) && (paddr < __pa(_brk_end))
+ if ((paddr >= __pa_symbol(&_text))
+ && (paddr < (__pa_symbol(__bss_stop) & PAGE_MASK))
&& !(paddr >= __pa_symbol(&__vsyscall_0)
&& paddr < __pa_symbol(&__vsyscall_0) + PAGE_SIZE))
readonly = 1;
@@ -813,6 +818,7 @@ kernel_physical_mapping_init(unsigned lo
void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
int acpi, int k8)
{
+#ifndef CONFIG_NO_BOOTMEM
unsigned long bootmap_size, bootmap;
e820_register_active_regions(0, start_pfn, end_pfn);
@@ -825,12 +831,19 @@ void __init initmem_init(unsigned long s
PAGE_SIZE);
if (bootmap == -1L)
panic("Cannot find bootmem map of size %ld\n", bootmap_size);
+ reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP");
/* don't touch min_low_pfn */
bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT,
0, end_pfn);
free_bootmem_with_active_regions(0, end_pfn);
- early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT);
- reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT);
+#else
+ e820_register_active_regions(0, start_pfn, end_pfn);
+#ifdef CONFIG_XEN
+ if (end_pfn > xen_start_info->nr_pages)
+ reserve_early(xen_start_info->nr_pages << PAGE_SHIFT,
+ end_pfn << PAGE_SHIFT, "BALLOON");
+#endif
+#endif
}
#endif
@@ -1243,7 +1256,7 @@ vmemmap_populate(struct page *start_page
if (pmd_none(*pmd)) {
pte_t entry;
- p = vmemmap_alloc_block(PMD_SIZE, node);
+ p = vmemmap_alloc_block_buf(PMD_SIZE, node);
if (!p)
return -ENOMEM;
--- head-2010-05-25.orig/arch/x86/mm/ioremap-xen.c 2010-03-24 16:00:05.000000000 +0100
+++ head-2010-05-25/arch/x86/mm/ioremap-xen.c 2010-05-12 09:13:00.000000000 +0200
@@ -142,6 +142,11 @@ int direct_kernel_remap_pfn_range(unsign
}
EXPORT_SYMBOL(direct_kernel_remap_pfn_range);
+int page_is_ram(unsigned long pagenr)
+{
+ return pagenr < max_pfn;
+}
+
static int lookup_pte_fn(
pte_t *pte, struct page *pmd_page, unsigned long addr, void *data)
{
@@ -177,45 +182,6 @@ int touch_pte_range(struct mm_struct *mm
EXPORT_SYMBOL(touch_pte_range);
-int page_is_ram(unsigned long pagenr)
-{
- resource_size_t addr, end;
- int i;
-
-#ifndef CONFIG_XEN
- /*
- * A special case is the first 4Kb of memory;
- * This is a BIOS owned area, not kernel ram, but generally
- * not listed as such in the E820 table.
- */
- if (pagenr == 0)
- return 0;
-
- /*
- * Second special case: Some BIOSen report the PC BIOS
- * area (640->1Mb) as ram even though it is not.
- */
- if (pagenr >= (BIOS_BEGIN >> PAGE_SHIFT) &&
- pagenr < (BIOS_END >> PAGE_SHIFT))
- return 0;
-#endif
-
- for (i = 0; i < e820.nr_map; i++) {
- /*
- * Not usable memory:
- */
- if (e820.map[i].type != E820_RAM)
- continue;
- addr = (e820.map[i].addr + PAGE_SIZE-1) >> PAGE_SHIFT;
- end = (e820.map[i].addr + e820.map[i].size) >> PAGE_SHIFT;
-
-
- if ((pagenr >= addr) && (pagenr < end))
- return 1;
- }
- return 0;
-}
-
/*
* Fix up the linear direct mapping of the kernel to avoid cache attribute
* conflicts.
@@ -606,6 +572,10 @@ void __init early_ioremap_init(void)
* The boot-ioremap range spans multiple pmds, for which
* we are not prepared:
*/
+#define __FIXADDR_TOP (-PAGE_SIZE)
+ BUILD_BUG_ON((__fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT)
+ != (__fix_to_virt(FIX_BTMAP_END) >> PMD_SHIFT));
+#undef __FIXADDR_TOP
if (pmd != early_ioremap_pmd(fix_to_virt(FIX_BTMAP_END))) {
WARN_ON(1);
printk(KERN_WARNING "pmd %p != %p\n",
@@ -665,6 +635,22 @@ static inline void __init early_clear_fi
static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata;
static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata;
+#ifndef CONFIG_XEN
+void __init fixup_early_ioremap(void)
+{
+ int i;
+
+ for (i = 0; i < FIX_BTMAPS_SLOTS; i++) {
+ if (prev_map[i]) {
+ WARN_ON(1);
+ break;
+ }
+ }
+
+ early_ioremap_init();
+}
+#endif
+
static int __init check_early_ioremap_leak(void)
{
int count = 0;
--- head-2010-05-25.orig/arch/x86/mm/pageattr-xen.c 2010-03-24 16:00:05.000000000 +0100
+++ head-2010-05-25/arch/x86/mm/pageattr-xen.c 2010-04-15 10:48:32.000000000 +0200
@@ -6,13 +6,13 @@
#include <linux/bootmem.h>
#include <linux/module.h>
#include <linux/sched.h>
-#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/interrupt.h>
#include <linux/seq_file.h>
#include <linux/debugfs.h>
#include <linux/pfn.h>
#include <linux/percpu.h>
+#include <linux/gfp.h>
#include <asm/e820.h>
#include <asm/processor.h>
@@ -293,8 +293,29 @@ static inline pgprot_t static_protection
*/
if (kernel_set_to_readonly &&
within(address, (unsigned long)_text,
- (unsigned long)__end_rodata_hpage_align))
- pgprot_val(forbidden) |= _PAGE_RW;
+ (unsigned long)__end_rodata_hpage_align)) {
+ unsigned int level;
+
+ /*
+ * Don't enforce the !RW mapping for the kernel text mapping,
+ * if the current mapping is already using small page mapping.
+ * No need to work hard to preserve large page mappings in this
+ * case.
+ *
+ * This also fixes the Linux Xen paravirt guest boot failure
+ * (because of unexpected read-only mappings for kernel identity
+ * mappings). In this paravirt guest case, the kernel text
+ * mapping and the kernel identity mapping share the same
+ * page-table pages. Thus we can't really use different
+ * protections for the kernel text and identity mappings. Also,
+ * these shared mappings are made of small page mappings.
+ * Thus this don't enforce !RW mapping for small page kernel
+ * text mapping logic will help Linux Xen parvirt guest boot
+ * aswell.
+ */
+ if (lookup_address(address, &level) && (level != PG_LEVEL_4K))
+ pgprot_val(forbidden) |= _PAGE_RW;
+ }
#endif
prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));
--- head-2010-05-25.orig/arch/x86/mm/pat-xen.c 2010-03-24 16:00:05.000000000 +0100
+++ head-2010-05-25/arch/x86/mm/pat-xen.c 2010-04-15 10:48:32.000000000 +0200
@@ -12,7 +12,7 @@
#include <linux/debugfs.h>
#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/gfp.h>
+#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/fs.h>
#include <linux/rbtree.h>
--- head-2010-05-25.orig/arch/x86/mm/pgtable-xen.c 2010-03-24 15:32:27.000000000 +0100
+++ head-2010-05-25/arch/x86/mm/pgtable-xen.c 2010-04-15 10:53:40.000000000 +0200
@@ -1,4 +1,5 @@
#include <linux/mm.h>
+#include <linux/gfp.h>
#include <linux/module.h>
#include <xen/features.h>
#include <asm/pgalloc.h>
@@ -10,6 +11,14 @@
#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO
+#ifdef CONFIG_HIGHPTE
+#define PGALLOC_USER_GFP __GFP_HIGHMEM
+#else
+#define PGALLOC_USER_GFP 0
+#endif
+
+gfp_t __userpte_alloc_gfp = PGALLOC_GFP | PGALLOC_USER_GFP;
+
pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
{
pte_t *pte = (pte_t *)__get_free_page(PGALLOC_GFP);
@@ -28,11 +37,7 @@ pgtable_t pte_alloc_one(struct mm_struct
{
struct page *pte;
-#ifdef CONFIG_HIGHPTE
- pte = alloc_pages(PGALLOC_GFP | __GFP_HIGHMEM, 0);
-#else
- pte = alloc_pages(PGALLOC_GFP, 0);
-#endif
+ pte = alloc_pages(__userpte_alloc_gfp, 0);
if (pte) {
pgtable_page_ctor(pte);
SetPageForeign(pte, _pte_free);
@@ -41,6 +46,23 @@ pgtable_t pte_alloc_one(struct mm_struct
return pte;
}
+static int __init setup_userpte(char *arg)
+{
+ if (!arg)
+ return -EINVAL;
+
+ /*
+ * "userpte=nohigh" disables allocation of user pagetables in
+ * high memory.
+ */
+ if (strcmp(arg, "nohigh") == 0)
+ __userpte_alloc_gfp &= ~__GFP_HIGHMEM;
+ else
+ return -EINVAL;
+ return 0;
+}
+early_param("userpte", setup_userpte);
+
void __pte_free(pgtable_t pte)
{
if (!PageHighMem(pte)) {
--- head-2010-05-25.orig/arch/x86/mm/pgtable_32-xen.c 2010-03-24 15:25:06.000000000 +0100
+++ head-2010-05-25/arch/x86/mm/pgtable_32-xen.c 2010-05-12 09:09:25.000000000 +0200
@@ -6,7 +6,6 @@
#include <linux/swap.h>
#include <linux/smp.h>
#include <linux/highmem.h>
-#include <linux/slab.h>
#include <linux/pagemap.h>
#include <linux/spinlock.h>
#include <linux/module.h>
@@ -142,6 +141,7 @@ static int __init parse_reservetop(char
address = memparse(arg, &arg);
reserve_top_address(address);
+ fixup_early_ioremap();
return 0;
}
early_param("reservetop", parse_reservetop);
--- head-2010-05-25.orig/arch/x86/pci/irq-xen.c 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/arch/x86/pci/irq-xen.c 2010-04-15 10:48:32.000000000 +0200
@@ -8,7 +8,6 @@
#include <linux/kernel.h>
#include <linux/pci.h>
#include <linux/init.h>
-#include <linux/slab.h>
#include <linux/interrupt.h>
#include <linux/dmi.h>
#include <linux/io.h>
@@ -53,7 +52,7 @@ struct irq_router_handler {
int (*probe)(struct irq_router *r, struct pci_dev *router, u16 device);
};
-int (*pcibios_enable_irq)(struct pci_dev *dev) = NULL;
+int (*pcibios_enable_irq)(struct pci_dev *dev) = pirq_enable_irq;
void (*pcibios_disable_irq)(struct pci_dev *dev) = NULL;
/*
@@ -596,6 +595,8 @@ static __init int intel_router_probe(str
case PCI_DEVICE_ID_INTEL_ICH10_1:
case PCI_DEVICE_ID_INTEL_ICH10_2:
case PCI_DEVICE_ID_INTEL_ICH10_3:
+ case PCI_DEVICE_ID_INTEL_CPT_LPC1:
+ case PCI_DEVICE_ID_INTEL_CPT_LPC2:
r->name = "PIIX/ICH";
r->get = pirq_piix_get;
r->set = pirq_piix_set;
@@ -1022,7 +1023,7 @@ static int pcibios_lookup_irq(struct pci
return 1;
}
-static void __init pcibios_fixup_irqs(void)
+void __init pcibios_fixup_irqs(void)
{
struct pci_dev *dev = NULL;
u8 pin;
@@ -1116,12 +1117,12 @@ static struct dmi_system_id __initdata p
{ }
};
-int __init pcibios_irq_init(void)
+void __init pcibios_irq_init(void)
{
DBG(KERN_DEBUG "PCI: IRQ init\n");
- if (pcibios_enable_irq || raw_pci_ops == NULL)
- return 0;
+ if (raw_pci_ops == NULL)
+ return;
dmi_check_system(pciirq_dmi_table);
@@ -1148,9 +1149,7 @@ int __init pcibios_irq_init(void)
pirq_table = NULL;
}
- pcibios_enable_irq = pirq_enable_irq;
-
- pcibios_fixup_irqs();
+ x86_init.pci.fixup_irqs();
if (io_apic_assign_pci_irqs && pci_routeirq) {
struct pci_dev *dev = NULL;
@@ -1163,8 +1162,6 @@ int __init pcibios_irq_init(void)
for_each_pci_dev(dev)
pirq_enable_irq(dev);
}
-
- return 0;
}
static void pirq_penalize_isa_irq(int irq, int active)
--- head-2010-05-25.orig/drivers/char/tpm/tpm_vtpm.c 2010-03-24 15:12:46.000000000 +0100
+++ head-2010-05-25/drivers/char/tpm/tpm_vtpm.c 2010-04-15 13:41:04.000000000 +0200
@@ -16,6 +16,7 @@
#include <asm/uaccess.h>
#include <linux/list.h>
+#include <linux/slab.h>
#include <linux/device.h>
#include <linux/interrupt.h>
#include <linux/platform_device.h>
--- head-2010-05-25.orig/drivers/char/tpm/tpm_xen.c 2010-03-24 15:09:15.000000000 +0100
+++ head-2010-05-25/drivers/char/tpm/tpm_xen.c 2010-04-15 13:41:56.000000000 +0200
@@ -37,6 +37,7 @@
#include <linux/err.h>
#include <linux/interrupt.h>
#include <linux/mutex.h>
+#include <linux/slab.h>
#include <asm/uaccess.h>
#include <xen/evtchn.h>
#include <xen/interface/grant_table.h>
--- head-2010-05-25.orig/drivers/misc/Kconfig 2010-05-25 09:31:21.000000000 +0200
+++ head-2010-05-25/drivers/misc/Kconfig 2010-04-29 10:01:27.000000000 +0200
@@ -313,7 +313,7 @@ config TI_DAC7512
config VMWARE_BALLOON
tristate "VMware Balloon Driver"
- depends on X86
+ depends on X86 && !XEN
help
This is VMware physical memory management driver which acts
like a "balloon" that can be inflated to reclaim physical pages
--- head-2010-05-25.orig/drivers/pci/msi-xen.c 2010-03-24 15:32:27.000000000 +0100
+++ head-2010-05-25/drivers/pci/msi-xen.c 2010-04-15 10:48:32.000000000 +0200
@@ -18,6 +18,7 @@
#include <linux/smp.h>
#include <linux/errno.h>
#include <linux/io.h>
+#include <linux/slab.h>
#include <xen/evtchn.h>
--- head-2010-05-25.orig/drivers/xen/Kconfig 2010-03-31 14:01:28.000000000 +0200
+++ head-2010-05-25/drivers/xen/Kconfig 2010-03-31 14:08:31.000000000 +0200
@@ -23,6 +23,7 @@ config XEN_UNPRIVILEGED_GUEST
select PM_SLEEP
select PM_SLEEP_SMP if SMP
select PM_RUNTIME if PCI
+ select PM_OPS if PCI
select SUSPEND
config XEN_PRIVCMD
@@ -336,6 +337,10 @@ config HAVE_IRQ_IGNORE_UNHANDLED
config NO_IDLE_HZ
def_bool y
+config ARCH_HAS_WALK_MEMORY
+ def_bool y
+ depends on X86
+
config XEN_SMPBOOT
def_bool y
depends on SMP && !PPC_XEN
@@ -375,7 +380,6 @@ config XEN_SCRUB_PAGES
config XEN_DEV_EVTCHN
tristate "Xen /dev/xen/evtchn device"
- depends on XEN || PARAVIRT_XEN
default PARAVIRT_XEN || XEN_PRIVILEGED_GUEST || m
help
The evtchn driver allows a userspace process to triger event
--- head-2010-05-25.orig/drivers/xen/balloon/balloon.c 2010-04-15 10:11:45.000000000 +0200
+++ head-2010-05-25/drivers/xen/balloon/balloon.c 2010-04-15 11:00:29.000000000 +0200
@@ -43,7 +43,7 @@
#include <linux/pagemap.h>
#include <linux/bootmem.h>
#include <linux/highmem.h>
-#include <linux/vmalloc.h>
+#include <linux/slab.h>
#include <linux/mutex.h>
#include <xen/xen_proc.h>
#include <asm/hypervisor.h>
--- head-2010-05-25.orig/drivers/xen/blkback/blkback-pagemap.c 2009-06-09 15:01:37.000000000 +0200
+++ head-2010-05-25/drivers/xen/blkback/blkback-pagemap.c 2010-04-15 13:39:30.000000000 +0200
@@ -1,4 +1,5 @@
#include <linux/module.h>
+#include <linux/slab.h>
#include "blkback-pagemap.h"
static int blkback_pagemap_size;
--- head-2010-05-25.orig/drivers/xen/blkfront/vbd.c 2010-03-24 15:32:27.000000000 +0100
+++ head-2010-05-25/drivers/xen/blkfront/vbd.c 2010-03-25 16:41:12.000000000 +0100
@@ -314,15 +314,14 @@ xlvbd_init_blk_queue(struct gendisk *gd,
/* Hard sector size and max sectors impersonate the equiv. hardware. */
blk_queue_logical_block_size(rq, sector_size);
- blk_queue_max_sectors(rq, 512);
+ blk_queue_max_hw_sectors(rq, 512);
/* Each segment in a request is up to an aligned page in size. */
blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
blk_queue_max_segment_size(rq, PAGE_SIZE);
/* Ensure a merged request will fit in a single I/O ring slot. */
- blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
- blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
+ blk_queue_max_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
/* Make sure buffer addresses are sector-aligned. */
blk_queue_dma_alignment(rq, 511);
--- head-2010-05-25.orig/drivers/xen/blktap2/blktap.h 2010-03-24 15:12:36.000000000 +0100
+++ head-2010-05-25/drivers/xen/blktap2/blktap.h 2010-04-15 11:24:08.000000000 +0200
@@ -1,6 +1,7 @@
#ifndef _BLKTAP_H_
#define _BLKTAP_H_
+#include <linux/slab.h>
#include <linux/fs.h>
#include <linux/poll.h>
#include <linux/cdev.h>
--- head-2010-05-25.orig/drivers/xen/blktap2/device.c 2010-03-24 15:32:27.000000000 +0100
+++ head-2010-05-25/drivers/xen/blktap2/device.c 2010-04-19 14:54:02.000000000 +0200
@@ -991,15 +991,14 @@ blktap_device_configure(struct blktap *t
/* Hard sector size and max sectors impersonate the equiv. hardware. */
blk_queue_logical_block_size(rq, tap->params.sector_size);
- blk_queue_max_sectors(rq, 512);
+ blk_queue_max_hw_sectors(rq, 512);
/* Each segment in a request is up to an aligned page in size. */
blk_queue_segment_boundary(rq, PAGE_SIZE - 1);
blk_queue_max_segment_size(rq, PAGE_SIZE);
/* Ensure a merged request will fit in a single I/O ring slot. */
- blk_queue_max_phys_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
- blk_queue_max_hw_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
+ blk_queue_max_segments(rq, BLKIF_MAX_SEGMENTS_PER_REQUEST);
/* Make sure buffer addresses are sector-aligned. */
blk_queue_dma_alignment(rq, 511);
--- head-2010-05-25.orig/drivers/xen/blktap2/sysfs.c 2010-03-24 16:00:05.000000000 +0100
+++ head-2010-05-25/drivers/xen/blktap2/sysfs.c 2010-05-25 09:25:30.000000000 +0200
@@ -379,13 +379,15 @@ blktap_sysfs_destroy(struct blktap *tap)
}
static ssize_t
-blktap_sysfs_show_verbosity(struct class *class, char *buf)
+blktap_sysfs_show_verbosity(struct class *class, struct class_attribute *attr,
+ char *buf)
{
return sprintf(buf, "%d\n", blktap_debug_level);
}
static ssize_t
-blktap_sysfs_set_verbosity(struct class *class, const char *buf, size_t size)
+blktap_sysfs_set_verbosity(struct class *class, struct class_attribute *attr,
+ const char *buf, size_t size)
{
int level;
@@ -400,7 +402,8 @@ CLASS_ATTR(verbosity, S_IRUSR | S_IWUSR,
blktap_sysfs_show_verbosity, blktap_sysfs_set_verbosity);
static ssize_t
-blktap_sysfs_show_devices(struct class *class, char *buf)
+blktap_sysfs_show_devices(struct class *class, struct class_attribute *attr,
+ char *buf)
{
int i, ret;
struct blktap *tap;
--- head-2010-05-25.orig/drivers/xen/char/mem.c 2010-03-24 16:00:05.000000000 +0100
+++ head-2010-05-25/drivers/xen/char/mem.c 2010-04-15 10:48:32.000000000 +0200
@@ -3,7 +3,7 @@
*
* Copyright (C) 1991, 1992 Linus Torvalds
*
- * Added devfs support.
+ * Added devfs support.
* Jan-11-1998, C. Scott Ananian <cananian@alumni.princeton.edu>
* Shared /dev/zero mmapping support, Feb 2000, Kanoj Sarcar <kanoj@sgi.com>
*/
@@ -65,10 +65,10 @@ static inline int range_is_allowed(unsig
}
/*
- * This funcion reads the *physical* memory. The f_pos points directly to the
- * memory location.
+ * This funcion reads the *physical* memory. The f_pos points directly to the
+ * memory location.
*/
-static ssize_t read_mem(struct file * file, char __user * buf,
+static ssize_t read_mem(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
unsigned long p = *ppos;
@@ -114,7 +114,7 @@ static ssize_t read_mem(struct file * fi
return read;
}
-static ssize_t write_mem(struct file * file, const char __user * buf,
+static ssize_t write_mem(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
unsigned long p = *ppos, ignored;
@@ -161,7 +161,7 @@ static struct vm_operations_struct mmap_
#endif
};
-static int xen_mmap_mem(struct file * file, struct vm_area_struct * vma)
+static int xen_mmap_mem(struct file *file, struct vm_area_struct *vma)
{
size_t size = vma->vm_end - vma->vm_start;
@@ -191,24 +191,26 @@ static int xen_mmap_mem(struct file * fi
* also note that seeking relative to the "end of file" isn't supported:
* it has no meaning, so it returns -EINVAL.
*/
-static loff_t memory_lseek(struct file * file, loff_t offset, int orig)
+static loff_t memory_lseek(struct file *file, loff_t offset, int orig)
{
loff_t ret;
mutex_lock(&file->f_path.dentry->d_inode->i_mutex);
switch (orig) {
- case 0:
- file->f_pos = offset;
- ret = file->f_pos;
- force_successful_syscall_return();
+ case SEEK_CUR:
+ offset += file->f_pos;
+ case SEEK_SET:
+ /* to avoid userland mistaking f_pos=-9 as -EBADF=-9 */
+ if ((unsigned long long)offset >= ~0xFFFULL) {
+ ret = -EOVERFLOW;
break;
- case 1:
- file->f_pos += offset;
- ret = file->f_pos;
- force_successful_syscall_return();
- break;
- default:
- ret = -EINVAL;
+ }
+ file->f_pos = offset;
+ ret = file->f_pos;
+ force_successful_syscall_return();
+ break;
+ default:
+ ret = -EINVAL;
}
mutex_unlock(&file->f_path.dentry->d_inode->i_mutex);
return ret;
--- head-2010-05-25.orig/drivers/xen/core/evtchn.c 2010-03-31 14:37:57.000000000 +0200
+++ head-2010-05-25/drivers/xen/core/evtchn.c 2010-04-15 11:03:28.000000000 +0200
@@ -31,6 +31,7 @@
*/
#include <linux/module.h>
+#include <linux/slab.h>
#include <linux/irq.h>
#include <linux/interrupt.h>
#include <linux/sched.h>
--- head-2010-05-25.orig/drivers/xen/core/gnttab.c 2010-03-24 15:12:46.000000000 +0100
+++ head-2010-05-25/drivers/xen/core/gnttab.c 2010-04-15 11:04:07.000000000 +0200
@@ -32,6 +32,7 @@
*/
#include <linux/module.h>
+#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/seqlock.h>
--- head-2010-05-25.orig/drivers/xen/core/hypervisor_sysfs.c 2010-03-24 15:10:37.000000000 +0100
+++ head-2010-05-25/drivers/xen/core/hypervisor_sysfs.c 2010-03-25 14:27:48.000000000 +0100
@@ -36,7 +36,7 @@ static ssize_t hyp_sysfs_store(struct ko
return 0;
}
-static struct sysfs_ops hyp_sysfs_ops = {
+static const struct sysfs_ops hyp_sysfs_ops = {
.show = hyp_sysfs_show,
.store = hyp_sysfs_store,
};
--- head-2010-05-25.orig/drivers/xen/core/reboot.c 2010-03-24 15:32:27.000000000 +0100
+++ head-2010-05-25/drivers/xen/core/reboot.c 2010-04-15 11:07:05.000000000 +0200
@@ -3,6 +3,7 @@
#include <linux/kernel.h>
#include <linux/unistd.h>
#include <linux/module.h>
+#include <linux/slab.h>
#include <linux/reboot.h>
#include <linux/sched.h>
#include <linux/sysrq.h>
--- head-2010-05-25.orig/drivers/xen/core/spinlock.c 2010-03-24 16:00:05.000000000 +0100
+++ head-2010-05-25/drivers/xen/core/spinlock.c 2010-04-15 10:14:50.000000000 +0200
@@ -22,7 +22,7 @@ struct spinning {
unsigned int ticket;
struct spinning *prev;
};
-static DEFINE_PER_CPU(struct spinning *, spinning);
+static DEFINE_PER_CPU(struct spinning *, _spinning);
/*
* Protect removal of objects: Addition can be done lockless, and even
* removal itself doesn't need protection - what needs to be prevented is
@@ -78,7 +78,7 @@ static unsigned int spin_adjust(struct s
unsigned int xen_spin_adjust(const arch_spinlock_t *lock, unsigned int token)
{
- return spin_adjust(percpu_read(spinning), lock, token);
+ return spin_adjust(percpu_read(_spinning), lock, token);
}
bool xen_spin_wait(arch_spinlock_t *lock, unsigned int *ptok,
@@ -97,9 +97,9 @@ bool xen_spin_wait(arch_spinlock_t *lock
/* announce we're spinning */
spinning.ticket = *ptok >> TICKET_SHIFT;
spinning.lock = lock;
- spinning.prev = percpu_read(spinning);
+ spinning.prev = percpu_read(_spinning);
smp_wmb();
- percpu_write(spinning, &spinning);
+ percpu_write(_spinning, &spinning);
upcall_mask = current_vcpu_info()->evtchn_upcall_mask;
do {
@@ -184,7 +184,7 @@ bool xen_spin_wait(arch_spinlock_t *lock
/* announce we're done */
other = spinning.prev;
- percpu_write(spinning, other);
+ percpu_write(_spinning, other);
rm_lock = &__get_cpu_var(spinning_rm_lock);
raw_local_irq_disable();
arch_write_lock(rm_lock);
@@ -228,7 +228,7 @@ void xen_spin_kick(arch_spinlock_t *lock
raw_local_irq_save(flags);
arch_read_lock(rm_lock);
- spinning = per_cpu(spinning, cpu);
+ spinning = per_cpu(_spinning, cpu);
smp_rmb();
while (spinning) {
if (spinning->lock == lock && spinning->ticket == token)
--- head-2010-05-25.orig/drivers/xen/core/xen_sysfs.c 2010-03-24 15:10:37.000000000 +0100
+++ head-2010-05-25/drivers/xen/core/xen_sysfs.c 2010-04-15 11:04:56.000000000 +0200
@@ -11,6 +11,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/init.h>
+#include <linux/slab.h>
#include <asm/hypervisor.h>
#include <xen/features.h>
#include <xen/hypervisor_sysfs.h>
--- head-2010-05-25.orig/drivers/xen/fbfront/xenfb.c 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/drivers/xen/fbfront/xenfb.c 2010-04-15 11:11:34.000000000 +0200
@@ -25,6 +25,7 @@
#include <linux/module.h>
#include <linux/vmalloc.h>
#include <linux/mm.h>
+#include <linux/slab.h>
#include <linux/mutex.h>
#include <linux/freezer.h>
#include <asm/hypervisor.h>
--- head-2010-05-25.orig/drivers/xen/fbfront/xenkbd.c 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/drivers/xen/fbfront/xenkbd.c 2010-04-15 11:11:42.000000000 +0200
@@ -20,6 +20,7 @@
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/module.h>
+#include <linux/slab.h>
#include <linux/input.h>
#include <asm/hypervisor.h>
#include <xen/evtchn.h>
--- head-2010-05-25.orig/drivers/xen/gntdev/gntdev.c 2010-03-24 15:12:46.000000000 +0100
+++ head-2010-05-25/drivers/xen/gntdev/gntdev.c 2010-04-15 11:13:05.000000000 +0200
@@ -23,6 +23,7 @@
#include <linux/fs.h>
#include <linux/device.h>
#include <linux/mm.h>
+#include <linux/slab.h>
#include <linux/mman.h>
#include <asm/uaccess.h>
#include <asm/io.h>
--- head-2010-05-25.orig/drivers/xen/netfront/netfront.h 2010-03-24 15:10:29.000000000 +0100
+++ head-2010-05-25/drivers/xen/netfront/netfront.h 2010-04-15 11:15:06.000000000 +0200
@@ -34,6 +34,7 @@
#define NETFRONT_H
#include <xen/interface/io/netif.h>
+#include <linux/slab.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/list.h>
--- head-2010-05-25.orig/drivers/xen/pciback/conf_space_capability_msi.c 2008-09-15 13:40:15.000000000 +0200
+++ head-2010-05-25/drivers/xen/pciback/conf_space_capability_msi.c 2010-04-15 11:21:45.000000000 +0200
@@ -1,12 +1,10 @@
/*
* PCI Backend -- Configuration overlay for MSI capability
*/
-#include <linux/pci.h>
-#include <linux/slab.h>
+#include "pciback.h"
#include "conf_space.h"
#include "conf_space_capability.h"
#include <xen/interface/io/pciif.h>
-#include "pciback.h"
int pciback_enable_msi(struct pciback_device *pdev,
struct pci_dev *dev, struct xen_pci_op *op)
--- head-2010-05-25.orig/drivers/xen/pciback/pciback.h 2010-03-24 15:08:58.000000000 +0100
+++ head-2010-05-25/drivers/xen/pciback/pciback.h 2010-04-15 11:20:39.000000000 +0200
@@ -6,6 +6,7 @@
#ifndef __XEN_PCIBACK_H__
#define __XEN_PCIBACK_H__
+#include <linux/slab.h>
#include <linux/pci.h>
#include <linux/interrupt.h>
#include <xen/xenbus.h>
--- head-2010-05-25.orig/drivers/xen/pciback/slot.c 2009-03-18 10:39:32.000000000 +0100
+++ head-2010-05-25/drivers/xen/pciback/slot.c 2010-04-15 11:21:14.000000000 +0200
@@ -6,10 +6,6 @@
* Author: Tristan Gingold <tristan.gingold@bull.net>, from vpci.c
*/
-#include <linux/list.h>
-#include <linux/slab.h>
-#include <linux/pci.h>
-#include <linux/spinlock.h>
#include "pciback.h"
/* There are at most 32 slots in a pci bus. */
--- head-2010-05-25.orig/drivers/xen/pciback/vpci.c 2009-03-18 10:39:32.000000000 +0100
+++ head-2010-05-25/drivers/xen/pciback/vpci.c 2010-04-15 11:21:09.000000000 +0200
@@ -5,10 +5,6 @@
* Author: Ryan Wilson <hap9@epoch.ncsc.mil>
*/
-#include <linux/list.h>
-#include <linux/slab.h>
-#include <linux/pci.h>
-#include <linux/spinlock.h>
#include "pciback.h"
#define PCI_SLOT_MAX 32
--- head-2010-05-25.orig/drivers/xen/pcifront/pcifront.h 2010-03-24 15:08:58.000000000 +0100
+++ head-2010-05-25/drivers/xen/pcifront/pcifront.h 2010-04-15 11:14:10.000000000 +0200
@@ -6,6 +6,7 @@
#ifndef __XEN_PCIFRONT_H__
#define __XEN_PCIFRONT_H__
+#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/pci.h>
#include <xen/xenbus.h>
--- head-2010-05-25.orig/drivers/xen/scsiback/xenbus.c 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/drivers/xen/scsiback/xenbus.c 2010-03-25 14:20:20.000000000 +0100
@@ -353,7 +353,7 @@ fail:
}
-static struct xenbus_device_id scsiback_ids[] = {
+static const struct xenbus_device_id scsiback_ids[] = {
{ "vscsi" },
{ "" }
};
--- head-2010-05-25.orig/drivers/xen/scsifront/xenbus.c 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/drivers/xen/scsifront/xenbus.c 2010-04-15 11:07:44.000000000 +0200
@@ -30,6 +30,7 @@
#include <linux/version.h>
+#include <linux/slab.h>
#include "common.h"
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11)
@@ -393,7 +394,7 @@ static void scsifront_backend_changed(st
}
-static struct xenbus_device_id scsifront_ids[] = {
+static const struct xenbus_device_id scsifront_ids[] = {
{ "vscsi" },
{ "" }
};
--- head-2010-05-25.orig/drivers/xen/sfc_netfront/accel.h 2010-03-24 15:12:46.000000000 +0100
+++ head-2010-05-25/drivers/xen/sfc_netfront/accel.h 2010-04-15 11:23:26.000000000 +0200
@@ -35,6 +35,7 @@
#include <xen/evtchn.h>
#include <linux/kernel.h>
+#include <linux/slab.h>
#include <linux/version.h>
#include <linux/list.h>
--- head-2010-05-25.orig/drivers/xen/sfc_netutil/accel_cuckoo_hash.c 2008-02-20 09:32:49.000000000 +0100
+++ head-2010-05-25/drivers/xen/sfc_netutil/accel_cuckoo_hash.c 2010-04-15 11:11:11.000000000 +0200
@@ -24,6 +24,7 @@
#include <linux/types.h> /* needed for linux/random.h */
#include <linux/random.h>
+#include <linux/slab.h>
#include "accel_cuckoo_hash.h"
#include "accel_util.h"
--- head-2010-05-25.orig/drivers/xen/sfc_netutil/accel_util.c 2010-01-04 11:56:34.000000000 +0100
+++ head-2010-05-25/drivers/xen/sfc_netutil/accel_util.c 2010-04-15 11:10:59.000000000 +0200
@@ -22,6 +22,7 @@
****************************************************************************
*/
+#include <linux/slab.h>
#include <linux/if_ether.h>
#include <linux/delay.h>
#include <asm/io.h>
--- head-2010-05-25.orig/drivers/xen/xenbus/xenbus_client.c 2010-03-24 15:17:58.000000000 +0100
+++ head-2010-05-25/drivers/xen/xenbus/xenbus_client.c 2010-04-15 11:16:49.000000000 +0200
@@ -30,8 +30,8 @@
* IN THE SOFTWARE.
*/
-#if defined(CONFIG_XEN) || defined(MODULE)
#include <linux/slab.h>
+#if defined(CONFIG_XEN) || defined(MODULE)
#include <xen/evtchn.h>
#include <xen/gnttab.h>
#include <xen/driver_util.h>
--- head-2010-05-25.orig/drivers/xen/xenbus/xenbus_dev.c 2010-03-24 15:32:27.000000000 +0100
+++ head-2010-05-25/drivers/xen/xenbus/xenbus_dev.c 2010-04-15 11:19:13.000000000 +0200
@@ -33,6 +33,7 @@
*/
#include <linux/kernel.h>
+#include <linux/slab.h>
#include <linux/errno.h>
#include <linux/uio.h>
#include <linux/notifier.h>
--- head-2010-05-25.orig/drivers/xen/xenbus/xenbus_probe.c 2010-03-24 16:00:05.000000000 +0100
+++ head-2010-05-25/drivers/xen/xenbus/xenbus_probe.c 2010-04-15 11:18:19.000000000 +0200
@@ -47,6 +47,7 @@
#include <linux/notifier.h>
#include <linux/mutex.h>
#include <linux/io.h>
+#include <linux/slab.h>
#include <asm/page.h>
#include <asm/pgtable.h>
--- head-2010-05-25.orig/drivers/xen/xenbus/xenbus_probe_backend.c 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/drivers/xen/xenbus/xenbus_probe_backend.c 2010-04-15 11:18:42.000000000 +0200
@@ -42,6 +42,7 @@
#include <linux/ctype.h>
#include <linux/fcntl.h>
#include <linux/mm.h>
+#include <linux/slab.h>
#include <linux/notifier.h>
#include <asm/io.h>
--- head-2010-05-25.orig/fs/proc/kcore.c 2010-05-25 09:31:21.000000000 +0200
+++ head-2010-05-25/fs/proc/kcore.c 2010-04-15 10:15:01.000000000 +0200
@@ -130,7 +130,7 @@ static void __kcore_update_ram(struct li
}
-#ifdef CONFIG_HIGHMEM
+#if defined(CONFIG_HIGHMEM) || defined(CONFIG_XEN)
/*
* If no highmem, we can assume [0...max_low_pfn) continuous range of memory
* because memory hole is not as big as !HIGHMEM case.
@@ -146,7 +146,11 @@ static int kcore_update_ram(void)
if (!ent)
return -ENOMEM;
ent->addr = (unsigned long)__va(0);
+#ifdef CONFIG_HIGHMEM
ent->size = max_low_pfn << PAGE_SHIFT;
+#else
+ ent->size = max_pfn << PAGE_SHIFT;
+#endif
ent->type = KCORE_RAM;
list_add(&ent->list, &head);
__kcore_update_ram(&head);
--- head-2010-05-25.orig/include/xen/xenbus.h 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/include/xen/xenbus.h 2010-04-15 11:30:32.000000000 +0200
@@ -39,6 +39,7 @@
#include <linux/mutex.h>
#include <linux/completion.h>
#include <linux/init.h>
+#include <linux/slab.h>
#include <linux/err.h>
#include <xen/interface/xen.h>
#include <xen/interface/grant_table.h>
--- head-2010-05-25.orig/kernel/early_res.c 2010-05-25 09:31:21.000000000 +0200
+++ head-2010-05-25/kernel/early_res.c 2010-04-15 10:17:11.000000000 +0200
@@ -321,11 +321,19 @@ void __init free_early(u64 start, u64 en
i = find_overlapped_early(start, end);
r = &early_res[i];
+#ifdef CONFIG_XEN /* Shouldn't it always be this way? */
+ if (i >= max_early_res || r->end < end || r->start > start)
+ panic("free_early on not reserved area: %llx-%llx!",
+ start, end - 1);
+
+ drop_range_partial(i, start, end);
+#else
if (i >= max_early_res || r->end != end || r->start != start)
panic("free_early on not reserved area: %llx-%llx!",
start, end - 1);
drop_range(i);
+#endif
}
void __init free_early_partial(u64 start, u64 end)
@@ -393,9 +401,7 @@ static void __init subtract_early_res(st
int __init get_free_all_memory_range(struct range **rangep, int nodeid)
{
int i, count;
- u64 start = 0, end;
- u64 size;
- u64 mem;
+ u64 end, size, mem = -1ULL;
struct range *range;
int nr_range;
@@ -409,9 +415,11 @@ int __init get_free_all_memory_range(str
end = get_max_mapped();
#ifdef MAX_DMA32_PFN
if (end > (MAX_DMA32_PFN << PAGE_SHIFT))
- start = MAX_DMA32_PFN << PAGE_SHIFT;
+ mem = find_fw_memmap_area(MAX_DMA32_PFN << PAGE_SHIFT, end,
+ size, sizeof(struct range));
#endif
- mem = find_fw_memmap_area(start, end, size, sizeof(struct range));
+ if (mem == -1ULL)
+ mem = find_fw_memmap_area(0, end, size, sizeof(struct range));
if (mem == -1ULL)
panic("can not find more space for range free");
--- head-2010-05-25.orig/kernel/resource.c 2010-05-25 09:31:21.000000000 +0200
+++ head-2010-05-25/kernel/resource.c 2010-04-15 10:17:16.000000000 +0200
@@ -343,6 +343,7 @@ int walk_system_ram_range(unsigned long
#endif
+#if !defined(CONFIG_XEN) || !defined(CONFIG_ARCH_HAS_WALK_MEMORY)
static int __is_ram(unsigned long pfn, unsigned long nr_pages, void *arg)
{
return 1;
@@ -355,6 +356,7 @@ int __weak page_is_ram(unsigned long pfn
{
return walk_system_ram_range(pfn, 1, NULL, __is_ram) == 1;
}
+#endif
/*
* Find empty slot in the resource tree given range and alignment.
--- head-2010-05-25.orig/lib/swiotlb-xen.c 2010-03-24 16:00:05.000000000 +0100
+++ head-2010-05-25/lib/swiotlb-xen.c 2010-04-15 10:54:48.000000000 +0200
@@ -25,6 +25,8 @@
#include <linux/bootmem.h>
#include <linux/iommu-helper.h>
#include <linux/highmem.h>
+#include <linux/gfp.h>
+
#include <asm/io.h>
#include <asm/pci.h>
#include <asm/dma.h>
--- head-2010-05-25.orig/mm/page_alloc.c 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/mm/page_alloc.c 2010-03-25 16:31:14.000000000 +0100
@@ -4696,7 +4696,8 @@ static void __setup_per_zone_wmarks(void
high = percpu_pagelist_fraction
? zone->present_pages / percpu_pagelist_fraction
: 5 * zone_batchsize(zone);
- setup_pagelist_highmark(zone_pcp(zone, cpu), high);
+ setup_pagelist_highmark(
+ per_cpu_ptr(zone->pageset, cpu), high);
}
}
#endif