You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
qubes-linux-kernel/patches.xen/xen3-patch-2.6.31

7755 lines
230 KiB

From: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>
Subject: Linux: 2.6.31
Patch-mainline: 2.6.31
This patch contains the differences between 2.6.30 and 2.6.31.
Acked-by: Jeff Mahoney <jeffm@suse.com>
Automatically created from "patches.kernel.org/patch-2.6.31" by xen-port-patches.py
--- head-2011-03-17.orig/arch/x86/Kconfig 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/arch/x86/Kconfig 2011-02-01 14:50:44.000000000 +0100
@@ -21,7 +21,7 @@ config X86
select HAVE_UNSTABLE_SCHED_CLOCK
select HAVE_IDE
select HAVE_OPROFILE
- select HAVE_PERF_EVENTS
+ select HAVE_PERF_EVENTS if !XEN
select HAVE_IRQ_WORK
select HAVE_IOREMAP_PROT
select HAVE_KPROBES
@@ -879,7 +879,7 @@ config X86_REROUTE_FOR_BROKEN_BOOT_IRQS
config X86_MCE
bool "Machine Check / overheating reporting"
- depends on !X86_XEN && !XEN_UNPRIVILEGED_GUEST
+ depends on !XEN_UNPRIVILEGED_GUEST
---help---
Machine Check support allows the processor to notify the
kernel if it detects a problem (e.g. overheating, data corruption).
@@ -912,7 +912,7 @@ config X86_MCE_AMD
config X86_ANCIENT_MCE
bool "Support for old Pentium 5 / WinChip machine checks"
- depends on X86_32 && X86_MCE
+ depends on X86_32 && X86_MCE && !XEN
---help---
Include support for machine check handling on old Pentium 5 or WinChip
systems. These typically need to be enabled explicitely on the command
@@ -1609,6 +1609,7 @@ config KEXEC_JUMP
config PHYSICAL_START
hex "Physical address where the kernel is loaded" if (EXPERT || CRASH_DUMP || XEN)
+ default 0x100000 if XEN
default "0x1000000"
---help---
This gives the physical address where the kernel is loaded.
--- head-2011-03-17.orig/arch/x86/ia32/ia32entry-xen.S 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/arch/x86/ia32/ia32entry-xen.S 2011-02-01 14:50:44.000000000 +0100
@@ -770,9 +770,11 @@ ia32_sys_call_table:
.quad compat_sys_signalfd4
.quad sys_eventfd2
.quad sys_epoll_create1
- .quad sys_dup3 /* 330 */
+ .quad sys_dup3 /* 330 */
.quad sys_pipe2
.quad sys_inotify_init1
.quad compat_sys_preadv
.quad compat_sys_pwritev
+ .quad compat_sys_rt_tgsigqueueinfo /* 335 */
+ .quad sys_perf_counter_open
ia32_syscall_end:
--- head-2011-03-17.orig/arch/x86/include/asm/hw_irq.h 2011-02-01 14:42:26.000000000 +0100
+++ head-2011-03-17/arch/x86/include/asm/hw_irq.h 2011-02-01 14:50:44.000000000 +0100
@@ -142,6 +142,7 @@ extern asmlinkage void smp_invalidate_in
extern irqreturn_t smp_reschedule_interrupt(int, void *);
extern irqreturn_t smp_call_function_interrupt(int, void *);
extern irqreturn_t smp_call_function_single_interrupt(int, void *);
+extern irqreturn_t smp_reboot_interrupt(int, void *);
#endif
#endif
--- head-2011-03-17.orig/arch/x86/include/asm/required-features.h 2011-03-17 14:35:44.000000000 +0100
+++ head-2011-03-17/arch/x86/include/asm/required-features.h 2011-02-01 14:50:44.000000000 +0100
@@ -48,7 +48,7 @@
#endif
#ifdef CONFIG_X86_64
-#ifdef CONFIG_PARAVIRT
+#if defined(CONFIG_PARAVIRT) || defined(CONFIG_XEN)
/* Paravirtualized systems may not have PSE or PGE available */
#define NEED_PSE 0
#define NEED_PGE 0
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/agp.h 2011-02-01 14:39:24.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/agp.h 2011-02-01 14:50:44.000000000 +0100
@@ -48,6 +48,7 @@
/* Convert a physical address to an address suitable for the GART. */
#define phys_to_gart(x) phys_to_machine(x)
#define gart_to_phys(x) machine_to_phys(x)
+#define page_to_gart(x) phys_to_gart(page_to_pseudophys(x))
/* GATT allocation. Returns/accepts GATT kernel virtual address. */
#define alloc_gatt_pages(order) ({ \
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/desc.h 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/desc.h 2011-02-01 14:50:44.000000000 +0100
@@ -1,7 +1,6 @@
#ifndef _ASM_X86_DESC_H
#define _ASM_X86_DESC_H
-#ifndef __ASSEMBLY__
#include <asm/desc_defs.h>
#include <asm/ldt.h>
#include <asm/mmu.h>
@@ -406,29 +405,4 @@ static inline void set_system_intr_gate_
}
#endif
-#else
-/*
- * GET_DESC_BASE reads the descriptor base of the specified segment.
- *
- * Args:
- * idx - descriptor index
- * gdt - GDT pointer
- * base - 32bit register to which the base will be written
- * lo_w - lo word of the "base" register
- * lo_b - lo byte of the "base" register
- * hi_b - hi byte of the low word of the "base" register
- *
- * Example:
- * GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah)
- * Will read the base address of GDT_ENTRY_ESPFIX_SS and put it into %eax.
- */
-#define GET_DESC_BASE(idx, gdt, base, lo_w, lo_b, hi_b) \
- movb idx * 8 + 4(gdt), lo_b; \
- movb idx * 8 + 7(gdt), hi_b; \
- shll $16, base; \
- movw idx * 8 + 2(gdt), lo_w;
-
-
-#endif /* __ASSEMBLY__ */
-
#endif /* _ASM_X86_DESC_H */
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/fixmap.h 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/fixmap.h 2011-02-01 14:50:44.000000000 +0100
@@ -118,12 +118,9 @@ enum fixed_addresses {
#ifdef CONFIG_PARAVIRT
FIX_PARAVIRT_BOOTMAP,
#endif
- FIX_TEXT_POKE0, /* reserve 2 pages for text_poke() */
- FIX_TEXT_POKE1,
+ FIX_TEXT_POKE1, /* reserve 2 pages for text_poke() */
+ FIX_TEXT_POKE0, /* first page is last, because allocation is backward */
__end_of_permanent_fixed_addresses,
-#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
- FIX_OHCI1394_BASE,
-#endif
/*
* 256 temporary boot-time mappings, used by early_ioremap(),
* before ioremap() is functional.
@@ -136,6 +133,9 @@ enum fixed_addresses {
FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 256 -
(__end_of_permanent_fixed_addresses & 255),
FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_SLOTS - 1,
+#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
+ FIX_OHCI1394_BASE,
+#endif
#ifdef CONFIG_X86_32
FIX_WP_TEST,
#endif
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/hypercall.h 2011-02-01 14:38:38.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/hypercall.h 2011-02-01 14:50:44.000000000 +0100
@@ -265,7 +265,7 @@ HYPERVISOR_memory_op(
unsigned int cmd, void *arg)
{
if (arch_use_lazy_mmu_mode())
- xen_multicall_flush(false);
+ xen_multicall_flush();
return _hypercall2(int, memory_op, cmd, arg);
}
@@ -336,7 +336,7 @@ HYPERVISOR_grant_table_op(
int rc;
if (arch_use_lazy_mmu_mode())
- xen_multicall_flush(false);
+ xen_multicall_flush();
#ifdef GNTTABOP_map_grant_ref
if (cmd == GNTTABOP_map_grant_ref)
#endif
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/hypervisor.h 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/hypervisor.h 2011-03-11 11:13:19.000000000 +0100
@@ -144,7 +144,7 @@ void scrub_pages(void *, unsigned int);
DECLARE_PER_CPU(bool, xen_lazy_mmu);
-void xen_multicall_flush(bool);
+void xen_multicall_flush(void);
int __must_check xen_multi_update_va_mapping(unsigned long va, pte_t,
unsigned long flags);
@@ -162,7 +162,7 @@ static inline void arch_enter_lazy_mmu_m
static inline void arch_leave_lazy_mmu_mode(void)
{
percpu_write(xen_lazy_mmu, false);
- xen_multicall_flush(false);
+ xen_multicall_flush();
}
#define arch_use_lazy_mmu_mode() unlikely(percpu_read(xen_lazy_mmu))
@@ -176,13 +176,13 @@ static inline void arch_leave_lazy_mmu_m
static inline void arch_flush_lazy_mmu_mode(void)
{
if (arch_use_lazy_mmu_mode())
- xen_multicall_flush(false);
+ xen_multicall_flush();
}
#endif
#else /* !CONFIG_XEN || MODULE */
-static inline void xen_multicall_flush(bool ignore) {}
+static inline void xen_multicall_flush(void) {}
#define arch_use_lazy_mmu_mode() false
#define xen_multi_update_va_mapping(...) ({ BUG(); -ENOSYS; })
#define xen_multi_mmu_update(...) ({ BUG(); -ENOSYS; })
@@ -356,4 +356,9 @@ MULTI_grant_table_op(multicall_entry_t *
#define uvm_multi(cpumask) ((unsigned long)cpus_addr(cpumask) | UVMF_MULTI)
+#ifdef LINUX
+/* drivers/staging/ use Windows-style types, including VOID */
+#undef VOID
+#endif
+
#endif /* __HYPERVISOR_H__ */
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/irq_vectors.h 2011-02-15 17:33:07.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/irq_vectors.h 2011-02-15 17:33:45.000000000 +0100
@@ -1,8 +1,11 @@
#ifndef _ASM_X86_IRQ_VECTORS_H
#define _ASM_X86_IRQ_VECTORS_H
+#define MCE_VECTOR 0x12
+
#ifdef CONFIG_X86_32
# define SYSCALL_VECTOR 0x80
+# define IA32_SYSCALL_VECTOR 0x80
#else
# define IA32_SYSCALL_VECTOR 0x80
#endif
@@ -11,7 +14,8 @@
#define CALL_FUNCTION_VECTOR 1
#define NMI_VECTOR 0x02
#define CALL_FUNC_SINGLE_VECTOR 3
-#define NR_IPIS 4
+#define REBOOT_VECTOR 4
+#define NR_IPIS 5
/*
* The maximum number of vectors supported by i386 processors
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/pci.h 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/pci.h 2011-02-01 14:50:44.000000000 +0100
@@ -97,7 +97,8 @@ extern void pci_iommu_alloc(void);
#define PCI_DMA_BUS_IS_PHYS 0
-#if defined(CONFIG_X86_64) || defined(CONFIG_DMA_API_DEBUG) || defined(CONFIG_SWIOTLB)
+#if defined(CONFIG_X86_64) || defined(CONFIG_DMAR) || defined(CONFIG_DMA_API_DEBUG) \
+ || defined(CONFIG_SWIOTLB)
#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \
dma_addr_t ADDR_NAME;
@@ -136,6 +137,7 @@ extern void pci_iommu_alloc(void);
/* generic pci stuff */
#include <asm-generic/pci.h>
+#define PCIBIOS_MAX_MEM_32 0xffffffff
#ifdef CONFIG_NUMA
/* Returns the node based on pci bus */
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/pgalloc.h 2011-02-01 14:39:24.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/pgalloc.h 2011-02-01 14:50:44.000000000 +0100
@@ -51,7 +51,13 @@ static inline void pte_free(struct mm_st
__pte_free(pte);
}
-extern void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte);
+extern void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte);
+
+static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte,
+ unsigned long address)
+{
+ ___pte_free_tlb(tlb, pte);
+}
static inline void pmd_populate_kernel(struct mm_struct *mm,
pmd_t *pmd, pte_t *pte)
@@ -92,7 +98,13 @@ static inline void pmd_free(struct mm_st
__pmd_free(virt_to_page(pmd));
}
-extern void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd);
+extern void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd);
+
+static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
+ unsigned long adddress)
+{
+ ___pmd_free_tlb(tlb, pmd);
+}
#ifdef CONFIG_X86_PAE
extern void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd);
@@ -145,7 +157,14 @@ static inline void pud_free(struct mm_st
__pmd_free(virt_to_page(pud));
}
-extern void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud);
+extern void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud);
+
+static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
+ unsigned long address)
+{
+ ___pud_free_tlb(tlb, pud);
+}
+
#endif /* PAGETABLE_LEVELS > 3 */
#endif /* PAGETABLE_LEVELS > 2 */
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/pgtable.h 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/pgtable.h 2011-02-01 14:50:44.000000000 +0100
@@ -2,6 +2,7 @@
#define _ASM_X86_PGTABLE_H
#include <asm/page.h>
+#include <asm/e820.h>
#include <asm/pgtable_types.h>
@@ -78,6 +79,8 @@ static inline void __init paravirt_paget
#define pte_val(x) xen_pte_val(x)
#define __pte(x) xen_make_pte(x)
+#define arch_end_context_switch(prev) do {} while(0)
+
/*
* The following only work if pte_present() is true.
* Undefined behaviour if not..
@@ -264,10 +267,17 @@ static inline pgprot_t pgprot_modify(pgp
#define canon_pgprot(p) __pgprot(massage_pgprot(p))
-static inline int is_new_memtype_allowed(unsigned long flags,
- unsigned long new_flags)
+static inline int is_new_memtype_allowed(u64 paddr, unsigned long size,
+ unsigned long flags,
+ unsigned long new_flags)
{
/*
+ * PAT type is always WB for ISA. So no need to check.
+ */
+ if (is_ISA_range(paddr, paddr + size - 1))
+ return 1;
+
+ /*
* Certain new memtypes are not allowed with certain
* requested memtype:
* - request is uncached, return cannot be write-back
@@ -312,6 +322,11 @@ static inline int pte_present(pte_t a)
return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE);
}
+static inline int pte_hidden(pte_t pte)
+{
+ return pte_flags(pte) & _PAGE_HIDDEN;
+}
+
static inline int pmd_present(pmd_t pmd)
{
#if CONFIG_XEN_COMPAT <= 0x030002
@@ -511,6 +526,8 @@ static inline int pgd_none(pgd_t pgd)
#ifndef __ASSEMBLY__
+#define direct_gbpages 0
+
/* local pte updates need not use xchg for locking */
static inline pte_t xen_local_ptep_get_and_clear(pte_t *ptep, pte_t res)
{
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/pgtable_32.h 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/pgtable_32.h 2011-02-01 14:50:44.000000000 +0100
@@ -48,13 +48,17 @@ extern void set_pmd_pfn(unsigned long, u
#endif
#if defined(CONFIG_HIGHPTE)
+#define __KM_PTE \
+ (in_nmi() ? KM_NMI_PTE : \
+ in_irq() ? KM_IRQ_PTE : \
+ KM_PTE0)
#define pte_offset_map(dir, address) \
- ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE0) + \
+ ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), __KM_PTE) + \
pte_index((address)))
#define pte_offset_map_nested(dir, address) \
((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE1) + \
pte_index((address)))
-#define pte_unmap(pte) kunmap_atomic((pte), KM_PTE0)
+#define pte_unmap(pte) kunmap_atomic((pte), __KM_PTE)
#define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1)
#else
#define pte_offset_map(dir, address) \
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/pgtable_64.h 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/pgtable_64.h 2011-02-01 14:50:44.000000000 +0100
@@ -33,10 +33,6 @@ extern pgd_t init_level4_pgt[];
extern void paging_init(void);
-#endif /* !__ASSEMBLY__ */
-
-#ifndef __ASSEMBLY__
-
#define pte_ERROR(e) \
printk("%s:%d: bad pte %p(%016lx pfn %010lx).\n", \
__FILE__, __LINE__, &(e), __pte_val(e), pte_pfn(e))
@@ -142,8 +138,6 @@ static inline int pgd_large(pgd_t pgd) {
#define update_mmu_cache(vma, address, pte) do { } while (0)
-#define direct_gbpages 0
-
/* Encode and de-code a swap entry */
#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1)
@@ -178,10 +172,7 @@ extern void cleanup_highmap(void);
/* fs/proc/kcore.c */
#define kc_vaddr_to_offset(v) ((v) & __VIRTUAL_MASK)
-#define kc_offset_to_vaddr(o) \
- (((o) & (1UL << (__VIRTUAL_MASK_SHIFT - 1))) \
- ? ((o) | ~__VIRTUAL_MASK) \
- : (o))
+#define kc_offset_to_vaddr(o) ((o) | ~__VIRTUAL_MASK)
#define __HAVE_ARCH_PTE_SAME
#endif /* !__ASSEMBLY__ */
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/pgtable_64_types.h 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/pgtable_64_types.h 2011-02-01 14:50:44.000000000 +0100
@@ -51,11 +51,12 @@ typedef union { pteval_t pte; unsigned i
#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE - 1))
+/* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */
#define MAX_PHYSMEM_BITS 43
#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
-#define VMALLOC_START _AC(0xffffc20000000000, UL)
-#define VMALLOC_END _AC(0xffffe1ffffffffff, UL)
-#define VMEMMAP_START _AC(0xffffe20000000000, UL)
+#define VMALLOC_START _AC(0xffffc90000000000, UL)
+#define VMALLOC_END _AC(0xffffe8ffffffffff, UL)
+#define VMEMMAP_START _AC(0xffffea0000000000, UL)
#define MODULES_VADDR _AC(0xffffffffa0000000, UL)
#define MODULES_END _AC(0xffffffffff000000, UL)
#define MODULES_LEN (MODULES_END - MODULES_VADDR)
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/pgtable_types.h 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/pgtable_types.h 2011-02-01 14:50:44.000000000 +0100
@@ -18,7 +18,7 @@
#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */
#define _PAGE_BIT_UNUSED1 9 /* available for programmer */
#define _PAGE_BIT_IOMAP 10 /* flag used to indicate IO mapping */
-#define _PAGE_BIT_UNUSED3 11
+#define _PAGE_BIT_HIDDEN 11 /* hidden by kmemcheck */
#define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */
#define _PAGE_BIT_SPECIAL _PAGE_BIT_UNUSED1
#define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1
@@ -41,13 +41,18 @@
#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
#define _PAGE_UNUSED1 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1)
#define _PAGE_IOMAP (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP)
-#define _PAGE_UNUSED3 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED3)
#define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT)
#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
#define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL)
#define _PAGE_CPA_TEST (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST)
#define __HAVE_ARCH_PTE_SPECIAL
+#ifdef CONFIG_KMEMCHECK
+#define _PAGE_HIDDEN (_AT(pteval_t, 1) << _PAGE_BIT_HIDDEN)
+#else
+#define _PAGE_HIDDEN (_AT(pteval_t, 0))
+#endif
+
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
#define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX)
#else
@@ -330,7 +335,6 @@ typedef struct page *pgtable_t;
extern pteval_t __supported_pte_mask;
extern int nx_enabled;
-extern void set_nx(void);
#define pgprot_writecombine pgprot_writecombine
extern pgprot_t pgprot_writecombine(pgprot_t prot);
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/processor.h 2011-03-03 16:45:53.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/processor.h 2011-03-03 16:46:07.000000000 +0100
@@ -146,7 +146,8 @@ struct cpuinfo_x86 {
extern struct cpuinfo_x86 boot_cpu_data;
extern struct cpuinfo_x86 new_cpu_data;
-extern __u32 cleared_cpu_caps[NCAPINTS];
+extern __u32 cpu_caps_cleared[NCAPINTS];
+extern __u32 cpu_caps_set[NCAPINTS];
#ifdef CONFIG_SMP
DECLARE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
@@ -427,9 +428,6 @@ DECLARE_PER_CPU(unsigned long, stack_can
extern unsigned int xstate_size;
extern void free_thread_xstate(struct task_struct *);
extern struct kmem_cache *task_xstate_cachep;
-extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
-extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
-extern unsigned short num_cache_leaves;
struct thread_struct {
/* Cached TLS descriptors: */
@@ -444,8 +442,12 @@ struct thread_struct {
unsigned short fsindex;
unsigned short gsindex;
#endif
+#ifdef CONFIG_X86_32
unsigned long ip;
+#endif
+#ifdef CONFIG_X86_64
unsigned long fs;
+#endif
unsigned long gs;
/* Hardware debugging registers: */
unsigned long debugreg0;
@@ -474,14 +476,8 @@ struct thread_struct {
unsigned io_bitmap_max;
/* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */
unsigned long debugctlmsr;
-#ifdef CONFIG_X86_DS
-/* Debug Store context; see include/asm-x86/ds.h; goes into MSR_IA32_DS_AREA */
+ /* Debug Store context; see asm/ds.h */
struct ds_context *ds_ctx;
-#endif /* CONFIG_X86_DS */
-#ifdef CONFIG_X86_PTRACE_BTS
-/* the signal to send on a bts buffer overflow */
- unsigned int bts_ovfl_signal;
-#endif /* CONFIG_X86_PTRACE_BTS */
};
static inline unsigned long xen_get_debugreg(int regno)
@@ -751,6 +747,21 @@ static inline unsigned long get_debugctl
return debugctlmsr;
}
+static inline unsigned long get_debugctlmsr_on_cpu(int cpu)
+{
+ u64 debugctlmsr = 0;
+ u32 val1, val2;
+
+#ifndef CONFIG_X86_DEBUGCTLMSR
+ if (boot_cpu_data.x86 < 6)
+ return 0;
+#endif
+ rdmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR, &val1, &val2);
+ debugctlmsr = val1 | ((u64)val2 << 32);
+
+ return debugctlmsr;
+}
+
static inline void update_debugctlmsr(unsigned long debugctlmsr)
{
#ifndef CONFIG_X86_DEBUGCTLMSR
@@ -760,6 +771,18 @@ static inline void update_debugctlmsr(un
wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
}
+static inline void update_debugctlmsr_on_cpu(int cpu,
+ unsigned long debugctlmsr)
+{
+#ifndef CONFIG_X86_DEBUGCTLMSR
+ if (boot_cpu_data.x86 < 6)
+ return;
+#endif
+ wrmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR,
+ (u32)((u64)debugctlmsr),
+ (u32)((u64)debugctlmsr >> 32));
+}
+
/*
* from system description table in BIOS. Mostly for MCA use, but
* others may find it useful:
@@ -770,6 +793,7 @@ extern unsigned int BIOS_revision;
/* Boot loader type from the setup header: */
extern int bootloader_type;
+extern int bootloader_version;
extern char ignore_fpu_irq;
@@ -830,7 +854,6 @@ static inline void spin_lock_prefetch(co
.vm86_info = NULL, \
.sysenter_cs = __KERNEL_CS, \
.io_bitmap_ptr = NULL, \
- .fs = __KERNEL_PERCPU, \
}
/*
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/smp.h 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/smp.h 2011-02-01 14:50:44.000000000 +0100
@@ -198,7 +198,7 @@ extern unsigned disabled_cpus __cpuinitd
static inline int logical_smp_processor_id(void)
{
/* we don't want to mark this access volatile - bad code generation */
- return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR));
+ return GET_APIC_LOGICAL_ID(apic_read(APIC_LDR));
}
#endif
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/spinlock.h 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/spinlock.h 2011-02-01 14:50:44.000000000 +0100
@@ -432,4 +432,8 @@ static inline void __raw_write_unlock(ra
#define _raw_read_relax(lock) cpu_relax()
#define _raw_write_relax(lock) cpu_relax()
+/* The {read|write|spin}_lock() on x86 are full memory barriers. */
+static inline void smp_mb__after_lock(void) { }
+#define ARCH_HAS_SMP_MB_AFTER_LOCK
+
#endif /* _ASM_X86_SPINLOCK_H */
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/tlbflush.h 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/tlbflush.h 2011-02-01 14:50:44.000000000 +0100
@@ -111,6 +111,6 @@ static inline void flush_tlb_kernel_rang
flush_tlb_all();
}
-extern void zap_low_mappings(void);
+extern void zap_low_mappings(bool early);
#endif /* _ASM_X86_TLBFLUSH_H */
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/xor.h 2011-02-01 14:39:24.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/xor.h 2011-02-01 14:50:44.000000000 +0100
@@ -1,4 +1,7 @@
-#ifdef CONFIG_X86_32
+#ifdef CONFIG_KMEMCHECK
+/* kmemcheck doesn't handle MMX/SSE/SSE2 instructions */
+# include <asm-generic/xor.h>
+#elif defined(CONFIG_X86_32)
# include "../../asm/xor_32.h"
#else
# include "xor_64.h"
--- head-2011-03-17.orig/arch/x86/kernel/Makefile 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/Makefile 2011-02-01 14:50:44.000000000 +0100
@@ -128,6 +128,6 @@ ifeq ($(CONFIG_X86_64),y)
endif
disabled-obj-$(CONFIG_XEN) := %_uv.o crash.o early-quirks.o hpet.o i8253.o \
- i8259.o irqinit_$(BITS).o pci-swiotlb.o reboot.o smpboot.o tsc.o \
- tsc_sync.o uv_%.o vsmp_64.o
+ i8259.o irqinit.o pci-swiotlb.o reboot.o smpboot.o tsc.o tsc_sync.o \
+ uv_%.o vsmp_64.o
disabled-obj-$(CONFIG_XEN_UNPRIVILEGED_GUEST) += probe_roms_32.o
--- head-2011-03-17.orig/arch/x86/kernel/acpi/sleep-xen.c 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/acpi/sleep-xen.c 2011-02-01 14:50:44.000000000 +0100
@@ -107,7 +107,7 @@ int acpi_save_state_mem(void)
initial_gs = per_cpu_offset(smp_processor_id());
#endif
initial_code = (unsigned long)wakeup_long64;
- saved_magic = 0x123456789abcdef0;
+ saved_magic = 0x123456789abcdef0L;
#endif /* CONFIG_64BIT */
#endif
--- head-2011-03-17.orig/arch/x86/kernel/apic/io_apic-xen.c 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/apic/io_apic-xen.c 2011-02-01 14:50:44.000000000 +0100
@@ -51,6 +51,7 @@
#include <asm/i8259.h>
#include <asm/nmi.h>
#include <asm/setup.h>
+#include <asm/hw_irq.h>
#include <asm/apic.h>
@@ -135,12 +136,9 @@ struct irq_pin_list {
struct irq_pin_list *next;
};
-static struct irq_pin_list *get_one_free_irq_2_pin(int cpu)
+static struct irq_pin_list *get_one_free_irq_2_pin(int node)
{
struct irq_pin_list *pin;
- int node;
-
- node = cpu_to_node(cpu);
pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node);
@@ -154,9 +152,6 @@ struct irq_cfg {
unsigned move_cleanup_count;
u8 vector;
u8 move_in_progress : 1;
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
- u8 move_desc_pending : 1;
-#endif
};
/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
@@ -188,16 +183,18 @@ int __init arch_early_irq_init(void)
struct irq_cfg *cfg;
struct irq_desc *desc;
int count;
+ int node;
int i;
cfg = irq_cfgx;
count = ARRAY_SIZE(irq_cfgx);
+ node= cpu_to_node(boot_cpu_id);
for (i = 0; i < count; i++) {
desc = irq_to_desc(i);
desc->chip_data = &cfg[i];
- alloc_bootmem_cpumask_var(&cfg[i].domain);
- alloc_bootmem_cpumask_var(&cfg[i].old_domain);
+ zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node);
+ zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node);
if (i < NR_IRQS_LEGACY)
cpumask_setall(cfg[i].domain);
}
@@ -218,12 +215,9 @@ static struct irq_cfg *irq_cfg(unsigned
return cfg;
}
-static struct irq_cfg *get_one_free_irq_cfg(int cpu)
+static struct irq_cfg *get_one_free_irq_cfg(int node)
{
struct irq_cfg *cfg;
- int node;
-
- node = cpu_to_node(cpu);
cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
if (cfg) {
@@ -244,13 +238,13 @@ static struct irq_cfg *get_one_free_irq_
return cfg;
}
-int arch_init_chip_data(struct irq_desc *desc, int cpu)
+int arch_init_chip_data(struct irq_desc *desc, int node)
{
struct irq_cfg *cfg;
cfg = desc->chip_data;
if (!cfg) {
- desc->chip_data = get_one_free_irq_cfg(cpu);
+ desc->chip_data = get_one_free_irq_cfg(node);
if (!desc->chip_data) {
printk(KERN_ERR "can not alloc irq_cfg\n");
BUG_ON(1);
@@ -260,10 +254,9 @@ int arch_init_chip_data(struct irq_desc
return 0;
}
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
-
+/* for move_irq_desc */
static void
-init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
+init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int node)
{
struct irq_pin_list *old_entry, *head, *tail, *entry;
@@ -272,7 +265,7 @@ init_copy_irq_2_pin(struct irq_cfg *old_
if (!old_entry)
return;
- entry = get_one_free_irq_2_pin(cpu);
+ entry = get_one_free_irq_2_pin(node);
if (!entry)
return;
@@ -282,7 +275,7 @@ init_copy_irq_2_pin(struct irq_cfg *old_
tail = entry;
old_entry = old_entry->next;
while (old_entry) {
- entry = get_one_free_irq_2_pin(cpu);
+ entry = get_one_free_irq_2_pin(node);
if (!entry) {
entry = head;
while (entry) {
@@ -322,12 +315,12 @@ static void free_irq_2_pin(struct irq_cf
}
void arch_init_copy_chip_data(struct irq_desc *old_desc,
- struct irq_desc *desc, int cpu)
+ struct irq_desc *desc, int node)
{
struct irq_cfg *cfg;
struct irq_cfg *old_cfg;
- cfg = get_one_free_irq_cfg(cpu);
+ cfg = get_one_free_irq_cfg(node);
if (!cfg)
return;
@@ -338,7 +331,7 @@ void arch_init_copy_chip_data(struct irq
memcpy(cfg, old_cfg, sizeof(struct irq_cfg));
- init_copy_irq_2_pin(old_cfg, cfg, cpu);
+ init_copy_irq_2_pin(old_cfg, cfg, node);
}
static void free_irq_cfg(struct irq_cfg *old_cfg)
@@ -362,19 +355,7 @@ void arch_free_chip_data(struct irq_desc
old_desc->chip_data = NULL;
}
}
-
-static void
-set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
-{
- struct irq_cfg *cfg = desc->chip_data;
-
- if (!cfg->move_in_progress) {
- /* it means that domain is not changed */
- if (!cpumask_intersects(desc->affinity, mask))
- cfg->move_desc_pending = 1;
- }
-}
-#endif
+/* end for move_irq_desc */
#else
static struct irq_cfg *irq_cfg(unsigned int irq)
@@ -384,13 +365,6 @@ static struct irq_cfg *irq_cfg(unsigned
#endif
-#ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC
-static inline void
-set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
-{
-}
-#endif
-
struct io_apic {
unsigned int index;
unsigned int unused[3];
@@ -522,7 +496,8 @@ static struct IO_APIC_route_entry ioapic
static void
__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
{
- union entry_union eu;
+ union entry_union eu = {{0, 0}};
+
eu.entry = e;
io_apic_write(apic, 0x11 + 2*pin, eu.w2);
io_apic_write(apic, 0x10 + 2*pin, eu.w1);
@@ -553,132 +528,18 @@ static void ioapic_mask_entry(int apic,
spin_unlock_irqrestore(&ioapic_lock, flags);
}
-#ifdef CONFIG_SMP
-static void send_cleanup_vector(struct irq_cfg *cfg)
-{
- cpumask_var_t cleanup_mask;
-
- if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
- unsigned int i;
- cfg->move_cleanup_count = 0;
- for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
- cfg->move_cleanup_count++;
- for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
- apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
- } else {
- cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
- cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
- apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
- free_cpumask_var(cleanup_mask);
- }
- cfg->move_in_progress = 0;
-}
-
-static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
-{
- int apic, pin;
- struct irq_pin_list *entry;
- u8 vector = cfg->vector;
-
- entry = cfg->irq_2_pin;
- for (;;) {
- unsigned int reg;
-
- if (!entry)
- break;
-
- apic = entry->apic;
- pin = entry->pin;
- /*
- * With interrupt-remapping, destination information comes
- * from interrupt-remapping table entry.
- */
- if (!irq_remapped(irq))
- io_apic_write(apic, 0x11 + pin*2, dest);
- reg = io_apic_read(apic, 0x10 + pin*2);
- reg &= ~IO_APIC_REDIR_VECTOR_MASK;
- reg |= vector;
- io_apic_modify(apic, 0x10 + pin*2, reg);
- if (!entry->next)
- break;
- entry = entry->next;
- }
-}
-
-static int
-assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
-
-/*
- * Either sets desc->affinity to a valid value, and returns
- * ->cpu_mask_to_apicid of that, or returns BAD_APICID and
- * leaves desc->affinity untouched.
- */
-static unsigned int
-set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
-{
- struct irq_cfg *cfg;
- unsigned int irq;
-
- if (!cpumask_intersects(mask, cpu_online_mask))
- return BAD_APICID;
-
- irq = desc->irq;
- cfg = desc->chip_data;
- if (assign_irq_vector(irq, cfg, mask))
- return BAD_APICID;
-
- /* check that before desc->addinity get updated */
- set_extra_move_desc(desc, mask);
-
- cpumask_copy(desc->affinity, mask);
-
- return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
-}
-
-static void
-set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
-{
- struct irq_cfg *cfg;
- unsigned long flags;
- unsigned int dest;
- unsigned int irq;
-
- irq = desc->irq;
- cfg = desc->chip_data;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- dest = set_desc_affinity(desc, mask);
- if (dest != BAD_APICID) {
- /* Only the high 8 bits are valid. */
- dest = SET_APIC_LOGICAL_ID(dest);
- __target_IO_APIC_irq(irq, dest, cfg);
- }
- spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-static void
-set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
-{
- struct irq_desc *desc;
-
- desc = irq_to_desc(irq);
-
- set_ioapic_affinity_irq_desc(desc, mask);
-}
-#endif /* CONFIG_SMP */
-
/*
* The common case is 1:1 IRQ<->pin mappings. Sometimes there are
* shared ISA-space IRQs, so we have to support them. We are super
* fast in the common case, and fast for shared ISA-space IRQs.
*/
-static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin)
+static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
{
struct irq_pin_list *entry;
entry = cfg->irq_2_pin;
if (!entry) {
- entry = get_one_free_irq_2_pin(cpu);
+ entry = get_one_free_irq_2_pin(node);
if (!entry) {
printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n",
apic, pin);
@@ -698,7 +559,7 @@ static void add_pin_to_irq_cpu(struct ir
entry = entry->next;
}
- entry->next = get_one_free_irq_2_pin(cpu);
+ entry->next = get_one_free_irq_2_pin(node);
entry = entry->next;
entry->apic = apic;
entry->pin = pin;
@@ -707,7 +568,7 @@ static void add_pin_to_irq_cpu(struct ir
/*
* Reroute an IRQ to a different pin.
*/
-static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu,
+static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node,
int oldapic, int oldpin,
int newapic, int newpin)
{
@@ -727,7 +588,7 @@ static void __init replace_pin_at_irq_cp
/* why? call replace before add? */
if (!replaced)
- add_pin_to_irq_cpu(cfg, cpu, newapic, newpin);
+ add_pin_to_irq_node(cfg, node, newapic, newpin);
}
static inline void io_apic_modify_irq(struct irq_cfg *cfg,
@@ -847,7 +708,7 @@ static void clear_IO_APIC (void)
clear_IO_APIC_pin(apic, pin);
}
#else
-#define add_pin_to_irq_cpu(cfg, cpu, apic, pin)
+#define add_pin_to_irq_node(cfg, node, apic, pin)
#endif /* !CONFIG_XEN */
#ifdef CONFIG_X86_32
@@ -888,7 +749,7 @@ static int __init ioapic_pirq_setup(char
__setup("pirq=", ioapic_pirq_setup);
#endif /* CONFIG_X86_32 */
-#ifdef CONFIG_INTR_REMAP
+#ifndef CONFIG_XEN
struct IO_APIC_route_entry **alloc_ioapic_entries(void)
{
int apic;
@@ -986,20 +847,6 @@ int restore_IO_APIC_setup(struct IO_APIC
return 0;
}
-void reinit_intr_remapped_IO_APIC(int intr_remapping,
- struct IO_APIC_route_entry **ioapic_entries)
-
-{
- /*
- * for now plain restore of previous settings.
- * TBD: In the case of OS enabling interrupt-remapping,
- * IO-APIC RTE's need to be setup to point to interrupt-remapping
- * table entries. for now, do a plain restore, and wait for
- * the setup_IO_APIC_irqs() to do proper initialization.
- */
- restore_IO_APIC_setup(ioapic_entries);
-}
-
void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries)
{
int apic;
@@ -1009,7 +856,7 @@ void free_ioapic_entries(struct IO_APIC_
kfree(ioapic_entries);
}
-#endif
+#endif /* CONFIG_XEN */
/*
* Find the IRQ entry number of a certain pin.
@@ -1072,54 +919,6 @@ static int __init find_isa_irq_apic(int
}
#endif
-/*
- * Find a specific PCI IRQ entry.
- * Not an __init, possibly needed by modules
- */
-static int pin_2_irq(int idx, int apic, int pin);
-
-int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
-{
- int apic, i, best_guess = -1;
-
- apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
- bus, slot, pin);
- if (test_bit(bus, mp_bus_not_pci)) {
- apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
- return -1;
- }
- for (i = 0; i < mp_irq_entries; i++) {
- int lbus = mp_irqs[i].srcbus;
-
- for (apic = 0; apic < nr_ioapics; apic++)
- if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic ||
- mp_irqs[i].dstapic == MP_APIC_ALL)
- break;
-
- if (!test_bit(lbus, mp_bus_not_pci) &&
- !mp_irqs[i].irqtype &&
- (bus == lbus) &&
- (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) {
- int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq);
-
- if (!(apic || IO_APIC_IRQ(irq)))
- continue;
-
- if (pin == (mp_irqs[i].srcbusirq & 3))
- return irq;
- /*
- * Use the first all-but-pin matching entry as a
- * best-guess fuzzy result for broken mptables.
- */
- if (best_guess < 0)
- best_guess = irq;
- }
- }
- return best_guess;
-}
-
-EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
-
#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
/*
* EISA Edge/Level control register, ELCR
@@ -1338,6 +1137,64 @@ static int pin_2_irq(int idx, int apic,
return irq;
}
+/*
+ * Find a specific PCI IRQ entry.
+ * Not an __init, possibly needed by modules
+ */
+int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin,
+ struct io_apic_irq_attr *irq_attr)
+{
+ int apic, i, best_guess = -1;
+
+ apic_printk(APIC_DEBUG,
+ "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
+ bus, slot, pin);
+ if (test_bit(bus, mp_bus_not_pci)) {
+ apic_printk(APIC_VERBOSE,
+ "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
+ return -1;
+ }
+ for (i = 0; i < mp_irq_entries; i++) {
+ int lbus = mp_irqs[i].srcbus;
+
+ for (apic = 0; apic < nr_ioapics; apic++)
+ if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic ||
+ mp_irqs[i].dstapic == MP_APIC_ALL)
+ break;
+
+ if (!test_bit(lbus, mp_bus_not_pci) &&
+ !mp_irqs[i].irqtype &&
+ (bus == lbus) &&
+ (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) {
+ int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq);
+
+ if (!(apic || IO_APIC_IRQ(irq)))
+ continue;
+
+ if (pin == (mp_irqs[i].srcbusirq & 3)) {
+ set_io_apic_irq_attr(irq_attr, apic,
+ mp_irqs[i].dstirq,
+ irq_trigger(i),
+ irq_polarity(i));
+ return irq;
+ }
+ /*
+ * Use the first all-but-pin matching entry as a
+ * best-guess fuzzy result for broken mptables.
+ */
+ if (best_guess < 0) {
+ set_io_apic_irq_attr(irq_attr, apic,
+ mp_irqs[i].dstirq,
+ irq_trigger(i),
+ irq_polarity(i));
+ best_guess = irq;
+ }
+ }
+ }
+ return best_guess;
+}
+EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
+
#ifndef CONFIG_XEN
void lock_vector_lock(void)
{
@@ -1609,6 +1466,9 @@ int setup_ioapic_entry(int apic_id, int
irte.vector = vector;
irte.dest_id = IRTE_DEST(destination);
+ /* Set source-id of interrupt request */
+ set_ioapic_sid(&irte, apic_id);
+
modify_irte(irq, &irte);
ir_entry->index2 = (index >> 15) & 0x1;
@@ -1684,63 +1544,75 @@ static void setup_IO_APIC_irq(int apic_i
ioapic_write_entry(apic_id, pin, entry);
}
+static struct {
+ DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
+} mp_ioapic_routing[MAX_IO_APICS];
+
static void __init setup_IO_APIC_irqs(void)
{
- int apic_id, pin, idx, irq;
+ int apic_id = 0, pin, idx, irq;
int notcon = 0;
struct irq_desc *desc;
struct irq_cfg *cfg;
- int cpu = boot_cpu_id;
+ int node = cpu_to_node(boot_cpu_id);
apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
- for (apic_id = 0; apic_id < nr_ioapics; apic_id++) {
- for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) {
+#ifdef CONFIG_ACPI
+ if (!acpi_disabled && acpi_ioapic) {
+ apic_id = mp_find_ioapic(0);
+ if (apic_id < 0)
+ apic_id = 0;
+ }
+#endif
- idx = find_irq_entry(apic_id, pin, mp_INT);
- if (idx == -1) {
- if (!notcon) {
- notcon = 1;
- apic_printk(APIC_VERBOSE,
- KERN_DEBUG " %d-%d",
- mp_ioapics[apic_id].apicid, pin);
- } else
- apic_printk(APIC_VERBOSE, " %d-%d",
- mp_ioapics[apic_id].apicid, pin);
- continue;
- }
- if (notcon) {
+ for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) {
+ idx = find_irq_entry(apic_id, pin, mp_INT);
+ if (idx == -1) {
+ if (!notcon) {
+ notcon = 1;
apic_printk(APIC_VERBOSE,
- " (apicid-pin) not connected\n");
- notcon = 0;
- }
+ KERN_DEBUG " %d-%d",
+ mp_ioapics[apic_id].apicid, pin);
+ } else
+ apic_printk(APIC_VERBOSE, " %d-%d",
+ mp_ioapics[apic_id].apicid, pin);
+ continue;
+ }
+ if (notcon) {
+ apic_printk(APIC_VERBOSE,
+ " (apicid-pin) not connected\n");
+ notcon = 0;
+ }
- irq = pin_2_irq(idx, apic_id, pin);
+ irq = pin_2_irq(idx, apic_id, pin);
#ifdef CONFIG_XEN
- if (irq < PIRQ_BASE || irq >= PIRQ_BASE + nr_pirqs)
- continue;
+ if (irq < PIRQ_BASE || irq >= PIRQ_BASE + nr_pirqs)
+ continue;
#else
- /*
- * Skip the timer IRQ if there's a quirk handler
- * installed and if it returns 1:
- */
- if (apic->multi_timer_check &&
- apic->multi_timer_check(apic_id, irq))
- continue;
+ /*
+ * Skip the timer IRQ if there's a quirk handler
+ * installed and if it returns 1:
+ */
+ if (apic->multi_timer_check &&
+ apic->multi_timer_check(apic_id, irq))
+ continue;
#endif
- desc = irq_to_desc_alloc_cpu(irq, cpu);
- if (!desc) {
- printk(KERN_INFO "can not get irq_desc for %d\n", irq);
- continue;
- }
- cfg = desc->chip_data;
- add_pin_to_irq_cpu(cfg, cpu, apic_id, pin);
-
- setup_IO_APIC_irq(apic_id, pin, irq, desc,
- irq_trigger(idx), irq_polarity(idx));
+ desc = irq_to_desc_alloc_node(irq, node);
+ if (!desc) {
+ printk(KERN_INFO "can not get irq_desc for %d\n", irq);
+ continue;
}
+ cfg = desc->chip_data;
+ add_pin_to_irq_node(cfg, node, apic_id, pin);
+ /*
+ * don't mark it in pin_programmed, so later acpi could
+ * set it correctly when irq < 16
+ */
+ setup_IO_APIC_irq(apic_id, pin, irq, desc,
+ irq_trigger(idx), irq_polarity(idx));
}
if (notcon)
@@ -1908,36 +1780,30 @@ __apicdebuginit(void) print_IO_APIC(void
return;
}
-__apicdebuginit(void) print_APIC_bitfield(int base)
+__apicdebuginit(void) print_APIC_field(int base)
{
- unsigned int v;
- int i, j;
+ int i;
if (apic_verbosity == APIC_QUIET)
return;
- printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
- for (i = 0; i < 8; i++) {
- v = apic_read(base + i*0x10);
- for (j = 0; j < 32; j++) {
- if (v & (1<<j))
- printk("1");
- else
- printk("0");
- }
- printk("\n");
- }
+ printk(KERN_DEBUG);
+
+ for (i = 0; i < 8; i++)
+ printk(KERN_CONT "%08x", apic_read(base + i*0x10));
+
+ printk(KERN_CONT "\n");
}
__apicdebuginit(void) print_local_APIC(void *dummy)
{
- unsigned int v, ver, maxlvt;
+ unsigned int i, v, ver, maxlvt;
u64 icr;
if (apic_verbosity == APIC_QUIET)
return;
- printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
+ printk(KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
smp_processor_id(), hard_smp_processor_id());
v = apic_read(APIC_ID);
printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, read_apic_id());
@@ -1978,11 +1844,11 @@ __apicdebuginit(void) print_local_APIC(v
printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
printk(KERN_DEBUG "... APIC ISR field:\n");
- print_APIC_bitfield(APIC_ISR);
+ print_APIC_field(APIC_ISR);
printk(KERN_DEBUG "... APIC TMR field:\n");
- print_APIC_bitfield(APIC_TMR);
+ print_APIC_field(APIC_TMR);
printk(KERN_DEBUG "... APIC IRR field:\n");
- print_APIC_bitfield(APIC_IRR);
+ print_APIC_field(APIC_IRR);
if (APIC_INTEGRATED(ver)) { /* !82489DX */
if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
@@ -2019,6 +1885,18 @@ __apicdebuginit(void) print_local_APIC(v
printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
v = apic_read(APIC_TDCR);
printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
+
+ if (boot_cpu_has(X86_FEATURE_EXTAPIC)) {
+ v = apic_read(APIC_EFEAT);
+ maxlvt = (v >> 16) & 0xff;
+ printk(KERN_DEBUG "... APIC EFEAT: %08x\n", v);
+ v = apic_read(APIC_ECTRL);
+ printk(KERN_DEBUG "... APIC ECTRL: %08x\n", v);
+ for (i = 0; i < maxlvt; i++) {
+ v = apic_read(APIC_EILVTn(i));
+ printk(KERN_DEBUG "... APIC EILVT%d: %08x\n", i, v);
+ }
+ }
printk("\n");
}
@@ -2067,6 +1945,11 @@ __apicdebuginit(void) print_PIC(void)
__apicdebuginit(int) print_all_ICs(void)
{
print_PIC();
+
+ /* don't print out if apic is not there */
+ if (!cpu_has_apic || disable_apic)
+ return 0;
+
print_all_local_APICs();
print_IO_APIC();
@@ -2188,7 +2071,9 @@ void disable_IO_APIC(void)
/*
* Use virtual wire A mode when interrupt remapping is enabled.
*/
- disconnect_bsp_APIC(!intr_remapping_enabled && ioapic_i8259.pin != -1);
+ if (cpu_has_apic)
+ disconnect_bsp_APIC(!intr_remapping_enabled &&
+ ioapic_i8259.pin != -1);
}
#ifdef CONFIG_X86_32
@@ -2427,7 +2312,119 @@ static int ioapic_retrigger_irq(unsigned
* races.
*/
-#ifdef CONFIG_SMP
+#ifdef CONFIG_SMP
+static void send_cleanup_vector(struct irq_cfg *cfg)
+{
+ cpumask_var_t cleanup_mask;
+
+ if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
+ unsigned int i;
+ cfg->move_cleanup_count = 0;
+ for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
+ cfg->move_cleanup_count++;
+ for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
+ apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
+ } else {
+ cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
+ cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
+ apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+ free_cpumask_var(cleanup_mask);
+ }
+ cfg->move_in_progress = 0;
+}
+
+static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
+{
+ int apic, pin;
+ struct irq_pin_list *entry;
+ u8 vector = cfg->vector;
+
+ entry = cfg->irq_2_pin;
+ for (;;) {
+ unsigned int reg;
+
+ if (!entry)
+ break;
+
+ apic = entry->apic;
+ pin = entry->pin;
+ /*
+ * With interrupt-remapping, destination information comes
+ * from interrupt-remapping table entry.
+ */
+ if (!irq_remapped(irq))
+ io_apic_write(apic, 0x11 + pin*2, dest);
+ reg = io_apic_read(apic, 0x10 + pin*2);
+ reg &= ~IO_APIC_REDIR_VECTOR_MASK;
+ reg |= vector;
+ io_apic_modify(apic, 0x10 + pin*2, reg);
+ if (!entry->next)
+ break;
+ entry = entry->next;
+ }
+}
+
+static int
+assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
+
+/*
+ * Either sets desc->affinity to a valid value, and returns
+ * ->cpu_mask_to_apicid of that, or returns BAD_APICID and
+ * leaves desc->affinity untouched.
+ */
+static unsigned int
+set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
+{
+ struct irq_cfg *cfg;
+ unsigned int irq;
+
+ if (!cpumask_intersects(mask, cpu_online_mask))
+ return BAD_APICID;
+
+ irq = desc->irq;
+ cfg = desc->chip_data;
+ if (assign_irq_vector(irq, cfg, mask))
+ return BAD_APICID;
+
+ cpumask_copy(desc->affinity, mask);
+
+ return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
+}
+
+static int
+set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
+{
+ struct irq_cfg *cfg;
+ unsigned long flags;
+ unsigned int dest;
+ unsigned int irq;
+ int ret = -1;
+
+ irq = desc->irq;
+ cfg = desc->chip_data;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ dest = set_desc_affinity(desc, mask);
+ if (dest != BAD_APICID) {
+ /* Only the high 8 bits are valid. */
+ dest = SET_APIC_LOGICAL_ID(dest);
+ __target_IO_APIC_irq(irq, dest, cfg);
+ ret = 0;
+ }
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+ return ret;
+}
+
+static int
+set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
+{
+ struct irq_desc *desc;
+
+ desc = irq_to_desc(irq);
+
+ return set_ioapic_affinity_irq_desc(desc, mask);
+}
#ifdef CONFIG_INTR_REMAP
@@ -2442,26 +2439,25 @@ static int ioapic_retrigger_irq(unsigned
* Real vector that is used for interrupting cpu will be coming from
* the interrupt-remapping table entry.
*/
-static void
+static int
migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
{
struct irq_cfg *cfg;
struct irte irte;
unsigned int dest;
unsigned int irq;
+ int ret = -1;
if (!cpumask_intersects(mask, cpu_online_mask))
- return;
+ return ret;
irq = desc->irq;
if (get_irte(irq, &irte))
- return;
+ return ret;
cfg = desc->chip_data;
if (assign_irq_vector(irq, cfg, mask))
- return;
-
- set_extra_move_desc(desc, mask);
+ return ret;
dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
@@ -2477,27 +2473,30 @@ migrate_ioapic_irq_desc(struct irq_desc
send_cleanup_vector(cfg);
cpumask_copy(desc->affinity, mask);
+
+ return 0;
}
/*
* Migrates the IRQ destination in the process context.
*/
-static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
+static int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
const struct cpumask *mask)
{
- migrate_ioapic_irq_desc(desc, mask);
+ return migrate_ioapic_irq_desc(desc, mask);
}
-static void set_ir_ioapic_affinity_irq(unsigned int irq,
+static int set_ir_ioapic_affinity_irq(unsigned int irq,
const struct cpumask *mask)
{
struct irq_desc *desc = irq_to_desc(irq);
- set_ir_ioapic_affinity_irq_desc(desc, mask);
+ return set_ir_ioapic_affinity_irq_desc(desc, mask);
}
#else
-static inline void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
+static inline int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
const struct cpumask *mask)
{
+ return 0;
}
#endif
@@ -2559,86 +2558,19 @@ static void irq_complete_move(struct irq
struct irq_cfg *cfg = desc->chip_data;
unsigned vector, me;
- if (likely(!cfg->move_in_progress)) {
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
- if (likely(!cfg->move_desc_pending))
- return;
-
- /* domain has not changed, but affinity did */
- me = smp_processor_id();
- if (cpumask_test_cpu(me, desc->affinity)) {
- *descp = desc = move_irq_desc(desc, me);
- /* get the new one */
- cfg = desc->chip_data;
- cfg->move_desc_pending = 0;
- }
-#endif
+ if (likely(!cfg->move_in_progress))
return;
- }
vector = ~get_irq_regs()->orig_ax;
me = smp_processor_id();
- if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) {
-#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
- *descp = desc = move_irq_desc(desc, me);
- /* get the new one */
- cfg = desc->chip_data;
-#endif
+ if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
send_cleanup_vector(cfg);
- }
}
#else
static inline void irq_complete_move(struct irq_desc **descp) {}
#endif
-static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
-{
- int apic, pin;
- struct irq_pin_list *entry;
-
- entry = cfg->irq_2_pin;
- for (;;) {
-
- if (!entry)
- break;
-
- apic = entry->apic;
- pin = entry->pin;
- io_apic_eoi(apic, pin);
- entry = entry->next;
- }
-}
-
-static void
-eoi_ioapic_irq(struct irq_desc *desc)
-{
- struct irq_cfg *cfg;
- unsigned long flags;
- unsigned int irq;
-
- irq = desc->irq;
- cfg = desc->chip_data;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- __eoi_ioapic_irq(irq, cfg);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
-#ifdef CONFIG_X86_X2APIC
-static void ack_x2apic_level(unsigned int irq)
-{
- struct irq_desc *desc = irq_to_desc(irq);
- ack_x2APIC_irq();
- eoi_ioapic_irq(desc);
-}
-
-static void ack_x2apic_edge(unsigned int irq)
-{
- ack_x2APIC_irq();
-}
-#endif
-
static void ack_apic_edge(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
@@ -2702,9 +2634,6 @@ static void ack_apic_level(unsigned int
*/
ack_APIC_irq();
- if (irq_remapped(irq))
- eoi_ioapic_irq(desc);
-
/* Now we can move and renable the irq */
if (unlikely(do_unmask_irq)) {
/* Only migrate the irq if the ack has been received.
@@ -2751,22 +2680,50 @@ static void ack_apic_level(unsigned int
}
#ifdef CONFIG_INTR_REMAP
+static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
+{
+ int apic, pin;
+ struct irq_pin_list *entry;
+
+ entry = cfg->irq_2_pin;
+ for (;;) {
+
+ if (!entry)
+ break;
+
+ apic = entry->apic;
+ pin = entry->pin;
+ io_apic_eoi(apic, pin);
+ entry = entry->next;
+ }
+}
+
+static void
+eoi_ioapic_irq(struct irq_desc *desc)
+{
+ struct irq_cfg *cfg;
+ unsigned long flags;
+ unsigned int irq;
+
+ irq = desc->irq;
+ cfg = desc->chip_data;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ __eoi_ioapic_irq(irq, cfg);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
static void ir_ack_apic_edge(unsigned int irq)
{
-#ifdef CONFIG_X86_X2APIC
- if (x2apic_enabled())
- return ack_x2apic_edge(irq);
-#endif
- return ack_apic_edge(irq);
+ ack_APIC_irq();
}
static void ir_ack_apic_level(unsigned int irq)
{
-#ifdef CONFIG_X86_X2APIC
- if (x2apic_enabled())
- return ack_x2apic_level(irq);
-#endif
- return ack_apic_level(irq);
+ struct irq_desc *desc = irq_to_desc(irq);
+
+ ack_APIC_irq();
+ eoi_ioapic_irq(desc);
}
#endif /* CONFIG_INTR_REMAP */
@@ -2977,7 +2934,7 @@ static inline void __init check_timer(vo
{
struct irq_desc *desc = irq_to_desc(0);
struct irq_cfg *cfg = desc->chip_data;
- int cpu = boot_cpu_id;
+ int node = cpu_to_node(boot_cpu_id);
int apic1, pin1, apic2, pin2;
unsigned long flags;
int no_pin1 = 0;
@@ -3043,7 +3000,7 @@ static inline void __init check_timer(vo
* Ok, does IRQ0 through the IOAPIC work?
*/
if (no_pin1) {
- add_pin_to_irq_cpu(cfg, cpu, apic1, pin1);
+ add_pin_to_irq_node(cfg, node, apic1, pin1);
setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
} else {
/* for edge trigger, setup_IO_APIC_irq already
@@ -3080,7 +3037,7 @@ static inline void __init check_timer(vo
/*
* legacy devices should be connected to IO APIC #0
*/
- replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2);
+ replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2);
setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
enable_8259A_irq(0);
if (timer_irq_works()) {
@@ -3310,14 +3267,13 @@ static int nr_irqs_gsi = NR_IRQS_LEGACY;
/*
* Dynamic irq allocate and deallocation
*/
-unsigned int create_irq_nr(unsigned int irq_want)
+unsigned int create_irq_nr(unsigned int irq_want, int node)
{
/* Allocate an unused irq */
unsigned int irq;
unsigned int new;
unsigned long flags;
struct irq_cfg *cfg_new = NULL;
- int cpu = boot_cpu_id;
struct irq_desc *desc_new = NULL;
irq = 0;
@@ -3326,7 +3282,7 @@ unsigned int create_irq_nr(unsigned int
spin_lock_irqsave(&vector_lock, flags);
for (new = irq_want; new < nr_irqs; new++) {
- desc_new = irq_to_desc_alloc_cpu(new, cpu);
+ desc_new = irq_to_desc_alloc_node(new, node);
if (!desc_new) {
printk(KERN_INFO "can not get irq_desc for %d\n", new);
continue;
@@ -3335,6 +3291,9 @@ unsigned int create_irq_nr(unsigned int
if (cfg_new->vector != 0)
continue;
+
+ desc_new = move_irq_desc(desc_new, node);
+
if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0)
irq = new;
break;
@@ -3352,11 +3311,12 @@ unsigned int create_irq_nr(unsigned int
int create_irq(void)
{
+ int node = cpu_to_node(boot_cpu_id);
unsigned int irq_want;
int irq;
irq_want = nr_irqs_gsi;
- irq = create_irq_nr(irq_want);
+ irq = create_irq_nr(irq_want, node);
if (irq == 0)
irq = -1;
@@ -3422,6 +3382,9 @@ static int msi_compose_msg(struct pci_de
irte.vector = cfg->vector;
irte.dest_id = IRTE_DEST(dest);
+ /* Set source-id of interrupt request */
+ set_msi_sid(&irte, pdev);
+
modify_irte(irq, &irte);
msg->address_hi = MSI_ADDR_BASE_HI;
@@ -3459,7 +3422,7 @@ static int msi_compose_msg(struct pci_de
}
#ifdef CONFIG_SMP
-static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
+static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
{
struct irq_desc *desc = irq_to_desc(irq);
struct irq_cfg *cfg;
@@ -3468,7 +3431,7 @@ static void set_msi_irq_affinity(unsigne
dest = set_desc_affinity(desc, mask);
if (dest == BAD_APICID)
- return;
+ return -1;
cfg = desc->chip_data;
@@ -3480,13 +3443,15 @@ static void set_msi_irq_affinity(unsigne
msg.address_lo |= MSI_ADDR_DEST_ID(dest);
write_msi_msg_desc(desc, &msg);
+
+ return 0;
}
#ifdef CONFIG_INTR_REMAP
/*
* Migrate the MSI irq to another cpumask. This migration is
* done in the process context using interrupt-remapping hardware.
*/
-static void
+static int
ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
{
struct irq_desc *desc = irq_to_desc(irq);
@@ -3495,11 +3460,11 @@ ir_set_msi_irq_affinity(unsigned int irq
struct irte irte;
if (get_irte(irq, &irte))
- return;
+ return -1;
dest = set_desc_affinity(desc, mask);
if (dest == BAD_APICID)
- return;
+ return -1;
irte.vector = cfg->vector;
irte.dest_id = IRTE_DEST(dest);
@@ -3516,6 +3481,8 @@ ir_set_msi_irq_affinity(unsigned int irq
*/
if (cfg->move_in_progress)
send_cleanup_vector(cfg);
+
+ return 0;
}
#endif
@@ -3611,15 +3578,17 @@ int arch_setup_msi_irqs(struct pci_dev *
unsigned int irq_want;
struct intel_iommu *iommu = NULL;
int index = 0;
+ int node;
/* x86 doesn't support multiple MSI yet */
if (type == PCI_CAP_ID_MSI && nvec > 1)
return 1;
+ node = dev_to_node(&dev->dev);
irq_want = nr_irqs_gsi;
sub_handle = 0;
list_for_each_entry(msidesc, &dev->msi_list, list) {
- irq = create_irq_nr(irq_want);
+ irq = create_irq_nr(irq_want, node);
if (irq == 0)
return -1;
irq_want = irq + 1;
@@ -3669,7 +3638,7 @@ void arch_teardown_msi_irq(unsigned int
#if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP)
#ifdef CONFIG_SMP
-static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
+static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
{
struct irq_desc *desc = irq_to_desc(irq);
struct irq_cfg *cfg;
@@ -3678,7 +3647,7 @@ static void dmar_msi_set_affinity(unsign
dest = set_desc_affinity(desc, mask);
if (dest == BAD_APICID)
- return;
+ return -1;
cfg = desc->chip_data;
@@ -3690,11 +3659,13 @@ static void dmar_msi_set_affinity(unsign
msg.address_lo |= MSI_ADDR_DEST_ID(dest);
dmar_msi_write(irq, &msg);
+
+ return 0;
}
#endif /* CONFIG_SMP */
-struct irq_chip dmar_msi_type = {
+static struct irq_chip dmar_msi_type = {
.name = "DMAR_MSI",
.unmask = dmar_msi_unmask,
.mask = dmar_msi_mask,
@@ -3723,7 +3694,7 @@ int arch_setup_dmar_msi(unsigned int irq
#ifdef CONFIG_HPET_TIMER
#ifdef CONFIG_SMP
-static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
+static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
{
struct irq_desc *desc = irq_to_desc(irq);
struct irq_cfg *cfg;
@@ -3732,7 +3703,7 @@ static void hpet_msi_set_affinity(unsign
dest = set_desc_affinity(desc, mask);
if (dest == BAD_APICID)
- return;
+ return -1;
cfg = desc->chip_data;
@@ -3744,6 +3715,8 @@ static void hpet_msi_set_affinity(unsign
msg.address_lo |= MSI_ADDR_DEST_ID(dest);
hpet_msi_write(irq, &msg);
+
+ return 0;
}
#endif /* CONFIG_SMP */
@@ -3800,7 +3773,7 @@ static void target_ht_irq(unsigned int i
write_ht_irq_msg(irq, &msg);
}
-static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
+static int set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
{
struct irq_desc *desc = irq_to_desc(irq);
struct irq_cfg *cfg;
@@ -3808,11 +3781,13 @@ static void set_ht_irq_affinity(unsigned
dest = set_desc_affinity(desc, mask);
if (dest == BAD_APICID)
- return;
+ return -1;
cfg = desc->chip_data;
target_ht_irq(irq, dest, cfg->vector);
+
+ return 0;
}
#endif
@@ -3887,6 +3862,8 @@ int arch_enable_uv_irq(char *irq_name, u
unsigned long flags;
int err;
+ BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
+
cfg = irq_cfg(irq);
err = assign_irq_vector(irq, cfg, eligible_cpu);
@@ -3900,19 +3877,20 @@ int arch_enable_uv_irq(char *irq_name, u
mmr_value = 0;
entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
- BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
-
- entry->vector = cfg->vector;
- entry->delivery_mode = apic->irq_delivery_mode;
- entry->dest_mode = apic->irq_dest_mode;
- entry->polarity = 0;
- entry->trigger = 0;
- entry->mask = 0;
- entry->dest = apic->cpu_mask_to_apicid(eligible_cpu);
+ entry->vector = cfg->vector;
+ entry->delivery_mode = apic->irq_delivery_mode;
+ entry->dest_mode = apic->irq_dest_mode;
+ entry->polarity = 0;
+ entry->trigger = 0;
+ entry->mask = 0;
+ entry->dest = apic->cpu_mask_to_apicid(eligible_cpu);
mmr_pnode = uv_blade_to_pnode(mmr_blade);
uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
+ if (cfg->move_in_progress)
+ send_cleanup_vector(cfg);
+
return irq;
}
@@ -3926,10 +3904,10 @@ void arch_disable_uv_irq(int mmr_blade,
struct uv_IO_APIC_route_entry *entry;
int mmr_pnode;
+ BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
+
mmr_value = 0;
entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
- BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
-
entry->mask = 1;
mmr_pnode = uv_blade_to_pnode(mmr_blade);
@@ -3995,14 +3973,85 @@ int __init arch_probe_nr_irqs(void)
#endif
#endif /* CONFIG_XEN */
+static int __io_apic_set_pci_routing(struct device *dev, int irq,
+ struct io_apic_irq_attr *irq_attr)
+{
+ struct irq_desc *desc;
+ struct irq_cfg *cfg;
+ int node;
+ int ioapic, pin;
+ int trigger, polarity;
+
+ ioapic = irq_attr->ioapic;
+#ifdef CONFIG_XEN
+ if (irq < PIRQ_BASE || irq >= PIRQ_BASE + nr_pirqs) {
+ apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ %d\n",
+ ioapic, irq);
+ return -EINVAL;
+ }
+#endif
+ if (!IO_APIC_IRQ(irq)) {
+ apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
+ ioapic);
+ return -EINVAL;
+ }
+
+ if (dev)
+ node = dev_to_node(dev);
+ else
+ node = cpu_to_node(boot_cpu_id);
+
+ desc = irq_to_desc_alloc_node(irq, node);
+ if (!desc) {
+ printk(KERN_INFO "can not get irq_desc %d\n", irq);
+ return 0;
+ }
+
+ pin = irq_attr->ioapic_pin;
+ trigger = irq_attr->trigger;
+ polarity = irq_attr->polarity;
+
+ /*
+ * IRQs < 16 are already in the irq_2_pin[] map
+ */
+ if (irq >= NR_IRQS_LEGACY) {
+ cfg = desc->chip_data;
+ add_pin_to_irq_node(cfg, node, ioapic, pin);
+ }
+
+ setup_IO_APIC_irq(ioapic, pin, irq, desc, trigger, polarity);
+
+ return 0;
+}
+
+int io_apic_set_pci_routing(struct device *dev, int irq,
+ struct io_apic_irq_attr *irq_attr)
+{
+ int ioapic, pin;
+ /*
+ * Avoid pin reprogramming. PRTs typically include entries
+ * with redundant pin->gsi mappings (but unique PCI devices);
+ * we only program the IOAPIC on the first.
+ */
+ ioapic = irq_attr->ioapic;
+ pin = irq_attr->ioapic_pin;
+ if (test_bit(pin, mp_ioapic_routing[ioapic].pin_programmed)) {
+ pr_debug("Pin %d-%d already programmed\n",
+ mp_ioapics[ioapic].apicid, pin);
+ return 0;
+ }
+ set_bit(pin, mp_ioapic_routing[ioapic].pin_programmed);
+
+ return __io_apic_set_pci_routing(dev, irq, irq_attr);
+}
+
/* --------------------------------------------------------------------------
ACPI-based IOAPIC Configuration
-------------------------------------------------------------------------- */
#ifdef CONFIG_ACPI
-#ifdef CONFIG_X86_32
-#ifndef CONFIG_XEN
+#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
int __init io_apic_get_unique_id(int ioapic, int apic_id)
{
union IO_APIC_reg_00 reg_00;
@@ -4076,7 +4125,7 @@ int __init io_apic_get_unique_id(int ioa
return apic_id;
}
-#endif /* !CONFIG_XEN */
+#endif
int __init io_apic_get_version(int ioapic)
{
@@ -4089,47 +4138,6 @@ int __init io_apic_get_version(int ioapi
return reg_01.bits.version;
}
-#endif
-
-int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
-{
- struct irq_desc *desc;
- struct irq_cfg *cfg;
- int cpu = boot_cpu_id;
-
-#ifdef CONFIG_XEN
- if (irq < PIRQ_BASE || irq >= PIRQ_BASE + nr_pirqs) {
- apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ %d\n",
- ioapic, irq);
- return -EINVAL;
- }
-#endif
-
- if (!IO_APIC_IRQ(irq)) {
- apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
- ioapic);
- return -EINVAL;
- }
-
- desc = irq_to_desc_alloc_cpu(irq, cpu);
- if (!desc) {
- printk(KERN_INFO "can not get irq_desc %d\n", irq);
- return 0;
- }
-
- /*
- * IRQs < 16 are already in the irq_2_pin[] map
- */
- if (irq >= NR_IRQS_LEGACY) {
- cfg = desc->chip_data;
- add_pin_to_irq_cpu(cfg, cpu, ioapic, pin);
- }
-
- setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity);
-
- return 0;
-}
-
int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
{
@@ -4161,51 +4169,44 @@ int acpi_get_override_irq(int bus_irq, i
#ifdef CONFIG_SMP
void __init setup_ioapic_dest(void)
{
- int pin, ioapic, irq, irq_entry;
+ int pin, ioapic = 0, irq, irq_entry;
struct irq_desc *desc;
- struct irq_cfg *cfg;
const struct cpumask *mask;
if (skip_ioapic_setup == 1)
return;
- for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
- for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
- irq_entry = find_irq_entry(ioapic, pin, mp_INT);
- if (irq_entry == -1)
- continue;
- irq = pin_2_irq(irq_entry, ioapic, pin);
-
- /* setup_IO_APIC_irqs could fail to get vector for some device
- * when you have too many devices, because at that time only boot
- * cpu is online.
- */
- desc = irq_to_desc(irq);
- cfg = desc->chip_data;
- if (!cfg->vector) {
- setup_IO_APIC_irq(ioapic, pin, irq, desc,
- irq_trigger(irq_entry),
- irq_polarity(irq_entry));
- continue;
+#ifdef CONFIG_ACPI
+ if (!acpi_disabled && acpi_ioapic) {
+ ioapic = mp_find_ioapic(0);
+ if (ioapic < 0)
+ ioapic = 0;
+ }
+#endif
- }
+ for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
+ irq_entry = find_irq_entry(ioapic, pin, mp_INT);
+ if (irq_entry == -1)
+ continue;
+ irq = pin_2_irq(irq_entry, ioapic, pin);
- /*
- * Honour affinities which have been set in early boot
- */
- if (desc->status &
- (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
- mask = desc->affinity;
- else
- mask = apic->target_cpus();
+ desc = irq_to_desc(irq);
- if (intr_remapping_enabled)
- set_ir_ioapic_affinity_irq_desc(desc, mask);
- else
- set_ioapic_affinity_irq_desc(desc, mask);
- }
+ /*
+ * Honour affinities which have been set in early boot
+ */
+ if (desc->status &
+ (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
+ mask = desc->affinity;
+ else
+ mask = apic->target_cpus();
+ if (intr_remapping_enabled)
+ set_ir_ioapic_affinity_irq_desc(desc, mask);
+ else
+ set_ioapic_affinity_irq_desc(desc, mask);
}
+
}
#endif
@@ -4288,29 +4289,21 @@ fake_ioapic_page:
}
}
-static int __init ioapic_insert_resources(void)
+void __init ioapic_insert_resources(void)
{
int i;
struct resource *r = ioapic_resources;
if (!r) {
- if (nr_ioapics > 0) {
+ if (nr_ioapics > 0)
printk(KERN_ERR
"IO APIC resources couldn't be allocated.\n");
- return -1;
- }
- return 0;
+ return;
}
for (i = 0; i < nr_ioapics; i++) {
insert_resource(&iomem_resource, r);
r++;
}
-
- return 0;
}
-
-/* Insert the IO APIC resources after PCI initialization has occured to handle
- * IO APICS that are mapped in on a BAR in PCI space. */
-late_initcall(ioapic_insert_resources);
#endif /* !CONFIG_XEN */
--- head-2011-03-17.orig/arch/x86/kernel/apic/probe_32-xen.c 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/apic/probe_32-xen.c 2011-02-01 14:50:44.000000000 +0100
@@ -20,23 +20,12 @@
#include <asm/apic.h>
#include <asm/setup.h>
-#include <linux/threads.h>
-#include <linux/cpumask.h>
-#include <asm/mpspec.h>
-#include <asm/fixmap.h>
-#include <asm/apicdef.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
#include <linux/smp.h>
-#include <linux/init.h>
#include <asm/ipi.h>
-#include <linux/smp.h>
-#include <linux/init.h>
#include <linux/interrupt.h>
#include <asm/acpi.h>
#include <asm/e820.h>
-#include <asm/setup.h>
static int xen_phys_pkg_id(int cpuid_apic, int index_msb)
{
--- head-2011-03-17.orig/arch/x86/kernel/cpu/amd.c 2011-02-01 14:38:38.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/cpu/amd.c 2011-02-01 14:50:44.000000000 +0100
@@ -415,7 +415,7 @@ static void __cpuinit early_init_amd(str
(c->x86_model == 8 && c->x86_mask >= 8))
set_cpu_cap(c, X86_FEATURE_K6_MTRR);
#endif
-#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI)
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI) && !defined(CONFIG_XEN)
/* check CPU config space for extended APIC ID */
if (cpu_has_apic && c->x86 >= 0xf) {
unsigned int val;
--- head-2011-03-17.orig/arch/x86/kernel/cpu/common-xen.c 2011-03-17 14:42:07.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/cpu/common-xen.c 2011-03-17 14:42:17.000000000 +0100
@@ -13,6 +13,7 @@
#include <linux/io.h>
#include <asm/stackprotector.h>
+#include <asm/perf_counter.h>
#include <asm/mmu_context.h>
#include <asm/hypervisor.h>
#include <asm/processor.h>
@@ -66,7 +67,30 @@ void __init setup_cpu_local_masks(void)
#endif
}
-static const struct cpu_dev *this_cpu __cpuinitdata;
+static void __cpuinit default_init(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_X86_64
+ display_cacheinfo(c);
+#else
+ /* Not much we can do here... */
+ /* Check if at least it has cpuid */
+ if (c->cpuid_level == -1) {
+ /* No cpuid. It must be an ancient CPU */
+ if (c->x86 == 4)
+ strcpy(c->x86_model_id, "486");
+ else if (c->x86 == 3)
+ strcpy(c->x86_model_id, "386");
+ }
+#endif
+}
+
+static const struct cpu_dev __cpuinitconst default_cpu = {
+ .c_init = default_init,
+ .c_vendor = "Unknown",
+ .c_x86_vendor = X86_VENDOR_UNKNOWN,
+};
+
+static const struct cpu_dev *this_cpu __cpuinitdata = &default_cpu;
DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
#ifdef CONFIG_X86_64
@@ -116,7 +140,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_p
/* data */
[GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } },
- [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } },
+ [GDT_ENTRY_ESPFIX_SS] = { { { 0x0000ffff, 0x00cf9200 } } },
#endif
[GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } },
GDT_STACK_CANARY_INIT
@@ -312,7 +336,8 @@ static const char *__cpuinit table_looku
return NULL; /* Not found */
}
-__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
+__u32 cpu_caps_cleared[NCAPINTS] __cpuinitdata;
+__u32 cpu_caps_set[NCAPINTS] __cpuinitdata;
void load_percpu_segment(int cpu)
{
@@ -361,29 +386,6 @@ void switch_to_new_gdt(int cpu)
static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {};
-static void __cpuinit default_init(struct cpuinfo_x86 *c)
-{
-#ifdef CONFIG_X86_64
- display_cacheinfo(c);
-#else
- /* Not much we can do here... */
- /* Check if at least it has cpuid */
- if (c->cpuid_level == -1) {
- /* No cpuid. It must be an ancient CPU */
- if (c->x86 == 4)
- strcpy(c->x86_model_id, "486");
- else if (c->x86 == 3)
- strcpy(c->x86_model_id, "386");
- }
-#endif
-}
-
-static const struct cpu_dev __cpuinitconst default_cpu = {
- .c_init = default_init,
- .c_vendor = "Unknown",
- .c_x86_vendor = X86_VENDOR_UNKNOWN,
-};
-
static void __cpuinit get_model_name(struct cpuinfo_x86 *c)
{
unsigned int *v;
@@ -516,7 +518,6 @@ out:
static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
{
char *v = c->x86_vendor_id;
- static int printed;
int i;
for (i = 0; i < X86_VENDOR_NUM; i++) {
@@ -533,13 +534,9 @@ static void __cpuinit get_cpu_vendor(str
}
}
- if (!printed) {
- printed++;
- printk(KERN_ERR
- "CPU: vendor_id '%s' unknown, using generic init.\n", v);
-
- printk(KERN_ERR "CPU: Your system may be unstable.\n");
- }
+ printk_once(KERN_ERR
+ "CPU: vendor_id '%s' unknown, using generic init.\n" \
+ "CPU: Your system may be unstable.\n", v);
c->x86_vendor = X86_VENDOR_UNKNOWN;
this_cpu = &default_cpu;
@@ -805,6 +802,12 @@ static void __cpuinit identify_cpu(struc
if (this_cpu->c_identify)
this_cpu->c_identify(c);
+ /* Clear/Set all flags overriden by options, after probe */
+ for (i = 0; i < NCAPINTS; i++) {
+ c->x86_capability[i] &= ~cpu_caps_cleared[i];
+ c->x86_capability[i] |= cpu_caps_set[i];
+ }
+
#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
#endif
@@ -850,6 +853,16 @@ static void __cpuinit identify_cpu(struc
#endif
init_hypervisor(c);
+
+ /*
+ * Clear/Set all flags overriden by options, need do it
+ * before following smp all cpus cap AND.
+ */
+ for (i = 0; i < NCAPINTS; i++) {
+ c->x86_capability[i] &= ~cpu_caps_cleared[i];
+ c->x86_capability[i] |= cpu_caps_set[i];
+ }
+
/*
* On SMP, boot_cpu_data holds the common feature set between
* all CPUs; so make sure that we indicate which features are
@@ -862,10 +875,6 @@ static void __cpuinit identify_cpu(struc
boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
}
- /* Clear all flags overriden by options */
- for (i = 0; i < NCAPINTS; i++)
- c->x86_capability[i] &= ~cleared_cpu_caps[i];
-
#ifdef CONFIG_X86_MCE
/* Init Machine Check Exception if available. */
mcheck_init(c);
@@ -898,6 +907,7 @@ void __init identify_boot_cpu(void)
#else
vgetcpu_set_mode();
#endif
+ init_hw_perf_counters();
}
void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
--- head-2011-03-17.orig/arch/x86/kernel/cpu/mcheck/Makefile 2011-01-31 17:29:16.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/cpu/mcheck/Makefile 2011-02-01 14:50:44.000000000 +0100
@@ -11,5 +11,3 @@ obj-$(CONFIG_X86_MCE_INJECT) += mce-inje
obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o
obj-$(CONFIG_ACPI_APEI) += mce-apei.o
-
-disabled-obj-$(CONFIG_XEN) := therm_throt.o
--- head-2011-03-17.orig/arch/x86/kernel/cpu/mcheck/mce.c 2011-01-31 14:53:50.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/cpu/mcheck/mce.c 2011-02-01 14:50:44.000000000 +0100
@@ -137,10 +137,12 @@ void mce_setup(struct mce *m)
m->time = get_seconds();
m->cpuvendor = boot_cpu_data.x86_vendor;
m->cpuid = cpuid_eax(1);
+#ifndef CONFIG_XEN
#ifdef CONFIG_SMP
m->socketid = cpu_data(m->extcpu).phys_proc_id;
#endif
m->apicid = cpu_data(m->extcpu).initial_apicid;
+#endif
rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap);
}
@@ -483,7 +485,9 @@ static inline void mce_get_rip(struct mc
*/
asmlinkage void smp_mce_self_interrupt(struct pt_regs *regs)
{
+#ifndef CONFIG_XEN
ack_APIC_irq();
+#endif
exit_idle();
irq_enter();
mce_notify_irq();
@@ -506,7 +510,7 @@ static void mce_report_event(struct pt_r
return;
}
-#ifdef CONFIG_X86_LOCAL_APIC
+#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_XEN)
/*
* Without APIC do not notify. The event will be picked
* up eventually.
@@ -2167,7 +2171,7 @@ static __init int mcheck_init_device(voi
#ifdef CONFIG_X86_XEN_MCE
if (is_initial_xendomain()) {
/* Register vIRQ handler for MCE LOG processing */
- extern void bind_virq_for_mce(void);
+ extern int bind_virq_for_mce(void);
printk(KERN_DEBUG "MCE: bind virq for DOM0 logging\n");
bind_virq_for_mce();
--- head-2011-03-17.orig/arch/x86/kernel/cpu/mcheck/mce_dom0.c 2011-01-31 17:56:27.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/cpu/mcheck/mce_dom0.c 2011-02-01 14:50:44.000000000 +0100
@@ -7,12 +7,17 @@
#include <asm/hypercall.h>
#include <asm/mce.h>
+static xen_mc_logical_cpu_t *g_physinfo;
+static unsigned int ncpus;
+
static int convert_log(struct mc_info *mi)
{
struct mcinfo_common *mic = NULL;
struct mcinfo_global *mc_global;
struct mcinfo_bank *mc_bank;
struct mce m;
+ unsigned int i;
+ bool found = false;
x86_mcinfo_lookup(mic, mi, MC_TYPE_GLOBAL);
if (mic == NULL)
@@ -21,9 +26,21 @@ static int convert_log(struct mc_info *m
return -1;
}
+ mce_setup(&m);
mc_global = (struct mcinfo_global*)mic;
m.mcgstatus = mc_global->mc_gstatus;
- m.cpu = mc_global->mc_coreid;/*for test*/
+ m.apicid = mc_global->mc_apicid;
+
+ for (i = 0; i < ncpus; i++)
+ if (g_physinfo[i].mc_apicid == m.apicid) {
+ found = true;
+ break;
+ }
+ WARN_ON_ONCE(!found);
+ m.socketid = mc_global->mc_socketid;
+ m.cpu = m.extcpu = g_physinfo[i].mc_cpunr;
+ m.cpuvendor = (__u8)g_physinfo[i].mc_vendor;
+
x86_mcinfo_lookup(mic, mi, MC_TYPE_BANK);
do
{
@@ -36,7 +53,6 @@ static int convert_log(struct mc_info *m
m.status = mc_bank->mc_status;
m.addr = mc_bank->mc_addr;
m.tsc = mc_bank->mc_tsc;
- m.res1 = mc_bank->mc_ctrl2;
m.bank = mc_bank->mc_bank;
printk(KERN_DEBUG "[CPU%d, BANK%d, addr %llx, state %llx]\n",
m.bank, m.cpu, m.addr, m.status);
@@ -116,18 +132,55 @@ end:
return IRQ_HANDLED;
}
-void bind_virq_for_mce(void)
+int __init bind_virq_for_mce(void)
{
int ret;
+ xen_mc_t mc_op;
+
+ g_mi = kmalloc(sizeof(*g_mi), GFP_KERNEL);
+ if (!g_mi)
+ return -ENOMEM;
+
+ /* fetch physical CPU count */
+ mc_op.cmd = XEN_MC_physcpuinfo;
+ mc_op.interface_version = XEN_MCA_INTERFACE_VERSION;
+ set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, NULL);
+ ret = HYPERVISOR_mca(&mc_op);
+ if (ret) {
+ pr_err("MCE: Failed to get physical CPU count\n");
+ kfree(g_mi);
+ return ret;
+ }
+
+ /* fetch CPU physical info for later reference */
+ ncpus = mc_op.u.mc_physcpuinfo.ncpus;
+ g_physinfo = kmalloc(sizeof(*g_physinfo) * ncpus, GFP_KERNEL);
+ if (!g_physinfo) {
+ kfree(g_mi);
+ return -ENOMEM;
+ }
+ set_xen_guest_handle(mc_op.u.mc_physcpuinfo.info, g_physinfo);
+ ret = HYPERVISOR_mca(&mc_op);
+ if (ret) {
+ pr_err("MCE: Failed to get physical CPUs' info\n");
+ kfree(g_mi);
+ kfree(g_physinfo);
+ return ret;
+ }
ret = bind_virq_to_irqhandler(VIRQ_MCA, 0,
mce_dom0_interrupt, 0, "mce", NULL);
- g_mi = kmalloc(sizeof(struct mc_info), GFP_KERNEL);
- if (ret < 0)
- pr_err("MCE_DOM0_LOG: bind_virq for DOM0 failed\n");
+ if (ret < 0) {
+ pr_err("MCE: Failed to bind vIRQ for Dom0\n");
+ kfree(g_mi);
+ kfree(g_physinfo);
+ return ret;
+ }
/* Log the machine checks left over from the previous reset. */
mce_dom0_interrupt(VIRQ_MCA, NULL);
+
+ return 0;
}
--- head-2011-03-17.orig/arch/x86/kernel/e820-xen.c 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/e820-xen.c 2011-02-01 14:50:44.000000000 +0100
@@ -659,7 +659,7 @@ __init int e820_search_gap(unsigned long
*/
__init void e820_setup_gap(void)
{
- unsigned long gapstart, gapsize, round;
+ unsigned long gapstart, gapsize;
int found;
gapstart = 0x10000000;
@@ -668,24 +668,18 @@ __init void e820_setup_gap(void)
#ifdef CONFIG_X86_64
if (!found) {
- printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit "
- "address range\n"
- KERN_ERR "PCI: Unassigned devices with 32bit resource "
- "registers may break!\n");
+ printk(KERN_ERR
+ "PCI: Warning: Cannot find a gap in the 32bit address range\n"
+ "PCI: Unassigned devices with 32bit resource registers may break!\n");
found = e820_search_gap(&gapstart, &gapsize, MAX_GAP_END, 0);
WARN_ON(!found);
}
#endif
/*
- * See how much we want to round up: start off with
- * rounding to the next 1MB area.
+ * e820_reserve_resources_late protect stolen RAM already
*/
- round = 0x100000;
- while ((gapsize >> 4) > round)
- round += round;
- /* Fun with two's complement */
- pci_mem_start = (gapstart + round) & -round;
+ pci_mem_start = gapstart;
printk(KERN_INFO
"Allocating PCI resources starting at %lx (gap: %lx:%lx)\n",
@@ -1495,6 +1489,25 @@ void __init e820_reserve_resources(void)
}
}
+/* How much should we pad RAM ending depending on where it is? */
+static unsigned long ram_alignment(resource_size_t pos)
+{
+ unsigned long mb = pos >> 20;
+
+ /* To 64kB in the first megabyte */
+ if (!mb)
+ return 64*1024;
+
+ /* To 1MB in the first 16MB */
+ if (mb < 16)
+ return 1024*1024;
+
+ /* To 32MB for anything above that */
+ return 32*1024*1024;
+}
+
+#define MAX_RESOURCE_SIZE ((resource_size_t)-1)
+
void __init e820_reserve_resources_late(void)
{
int i;
@@ -1506,6 +1519,26 @@ void __init e820_reserve_resources_late(
insert_resource_expand_to_fit(&iomem_resource, res);
res++;
}
+
+ /*
+ * Try to bump up RAM regions to reasonable boundaries to
+ * avoid stolen RAM:
+ */
+ for (i = 0; i < e820.nr_map; i++) {
+ struct e820entry *entry = &e820.map[i];
+ u64 start, end;
+
+ if (entry->type != E820_RAM)
+ continue;
+ start = entry->addr + entry->size;
+ end = round_up(start, ram_alignment(start)) - 1;
+ if (end > MAX_RESOURCE_SIZE)
+ end = MAX_RESOURCE_SIZE;
+ if (start >= end)
+ continue;
+ reserve_region_with_split(&iomem_resource, start, end,
+ "RAM buffer");
+ }
}
#undef e820
--- head-2011-03-17.orig/arch/x86/kernel/entry_32-xen.S 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/entry_32-xen.S 2011-02-01 14:50:44.000000000 +0100
@@ -48,7 +48,6 @@
#include <asm/segment.h>
#include <asm/smp.h>
#include <asm/page_types.h>
-#include <asm/desc.h>
#include <asm/percpu.h>
#include <asm/dwarf2.h>
#include <asm/processor-flags.h>
@@ -88,7 +87,7 @@ NMI_MASK = 0x80000000
#define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
#else
#define preempt_stop(clobbers)
-#define resume_kernel restore_nocheck
+#define resume_kernel restore_all
#endif
.macro TRACE_IRQS_IRET
@@ -376,7 +375,7 @@ END(ret_from_exception)
ENTRY(resume_kernel)
DISABLE_INTERRUPTS(CLBR_ANY)
cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
- jnz restore_nocheck
+ jnz restore_all
need_resched:
movl TI_flags(%ebp), %ecx # need_resched set ?
testb $_TIF_NEED_RESCHED, %cl
@@ -569,6 +568,8 @@ syscall_exit:
jne syscall_exit_work
restore_all:
+ TRACE_IRQS_IRET
+restore_all_notrace:
#ifndef CONFIG_XEN
movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS
# Warning: PT_OLDSS(%esp) contains the wrong/random values if we
@@ -594,8 +595,6 @@ restore_nocheck:
CFI_REMEMBER_STATE
jnz restore_all_enable_events # != 0 => enable event delivery
#endif
- TRACE_IRQS_IRET
-restore_nocheck_notrace:
RESTORE_REGS 4 # skip orig_eax/error_code
CFI_ADJUST_CFA_OFFSET -4
irq_return:
@@ -632,22 +631,34 @@ ldt_ss:
jne restore_nocheck
#endif
- /* If returning to userspace with 16bit stack,
- * try to fix the higher word of ESP, as the CPU
- * won't restore it.
- * This is an "official" bug of all the x86-compatible
- * CPUs, which we can try to work around to make
- * dosemu and wine happy. */
- movl PT_OLDESP(%esp), %eax
- movl %esp, %edx
- call patch_espfix_desc
+/*
+ * Setup and switch to ESPFIX stack
+ *
+ * We're returning to userspace with a 16 bit stack. The CPU will not
+ * restore the high word of ESP for us on executing iret... This is an
+ * "official" bug of all the x86-compatible CPUs, which we can work
+ * around to make dosemu and wine happy. We do this by preloading the
+ * high word of ESP with the high word of the userspace ESP while
+ * compensating for the offset by changing to the ESPFIX segment with
+ * a base address that matches for the difference.
+ */
+ mov %esp, %edx /* load kernel esp */
+ mov PT_OLDESP(%esp), %eax /* load userspace esp */
+ mov %dx, %ax /* eax: new kernel esp */
+ sub %eax, %edx /* offset (low word is 0) */
+ PER_CPU(gdt_page, %ebx)
+ shr $16, %edx
+ mov %dl, GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx) /* bits 16..23 */
+ mov %dh, GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx) /* bits 24..31 */
pushl $__ESPFIX_SS
CFI_ADJUST_CFA_OFFSET 4
- pushl %eax
+ push %eax /* new kernel esp */
CFI_ADJUST_CFA_OFFSET 4
+ /* Disable interrupts, but do not irqtrace this section: we
+ * will soon execute iret and the tracer was already set to
+ * the irqstate after the iret */
DISABLE_INTERRUPTS(CLBR_EAX)
- TRACE_IRQS_OFF
- lss (%esp), %esp
+ lss (%esp), %esp /* switch to espfix segment */
CFI_ADJUST_CFA_OFFSET -8
jmp restore_nocheck
#else
@@ -786,15 +797,24 @@ PTREGSCALL(vm86old)
#ifndef CONFIG_XEN
.macro FIXUP_ESPFIX_STACK
- /* since we are on a wrong stack, we cant make it a C code :( */
+/*
+ * Switch back for ESPFIX stack to the normal zerobased stack
+ *
+ * We can't call C functions using the ESPFIX stack. This code reads
+ * the high word of the segment base from the GDT and swiches to the
+ * normal stack and adjusts ESP with the matching offset.
+ */
+ /* fixup the stack */
PER_CPU(gdt_page, %ebx)
- GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah)
- addl %esp, %eax
+ mov GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx), %al /* bits 16..23 */
+ mov GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx), %ah /* bits 24..31 */
+ shl $16, %eax
+ addl %esp, %eax /* the adjusted stack pointer */
pushl $__KERNEL_DS
CFI_ADJUST_CFA_OFFSET 4
pushl %eax
CFI_ADJUST_CFA_OFFSET 4
- lss (%esp), %esp
+ lss (%esp), %esp /* switch to the normal stack segment */
CFI_ADJUST_CFA_OFFSET -8
.endm
.macro UNWIND_ESPFIX_STACK
@@ -1284,6 +1304,7 @@ ENTRY(ftrace_graph_caller)
pushl %edx
movl 0xc(%esp), %edx
lea 0x4(%ebp), %eax
+ movl (%ebp), %ecx
subl $MCOUNT_INSN_SIZE, %edx
call prepare_ftrace_return
popl %edx
@@ -1298,6 +1319,7 @@ return_to_handler:
pushl %eax
pushl %ecx
pushl %edx
+ movl %ebp, %eax
call ftrace_return_to_handler
movl %eax, 0xc(%esp)
popl %edx
@@ -1593,7 +1615,7 @@ nmi_stack_correct:
xorl %edx,%edx # zero error code
movl %esp,%eax # pt_regs pointer
call do_nmi
- jmp restore_nocheck_notrace
+ jmp restore_all_notrace
CFI_ENDPROC
nmi_stack_fixup:
--- head-2011-03-17.orig/arch/x86/kernel/entry_64.S 2011-02-16 16:02:30.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/entry_64.S 2011-02-16 16:02:54.000000000 +0100
@@ -1363,7 +1363,7 @@ apicinterrupt XEN_HVM_EVTCHN_CALLBACK \
paranoidzeroentry_ist debug do_debug DEBUG_STACK
paranoidzeroentry_ist int3 do_int3 DEBUG_STACK
paranoiderrorentry stack_segment do_stack_segment
-#ifdef CONFIG_XEN
+#ifdef CONFIG_PARAVIRT_XEN
zeroentry xen_debug do_debug
zeroentry xen_int3 do_int3
errorentry xen_stack_segment do_stack_segment
--- head-2011-03-17.orig/arch/x86/kernel/entry_64-xen.S 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/entry_64-xen.S 2011-02-01 14:50:44.000000000 +0100
@@ -139,6 +139,7 @@ ENTRY(ftrace_graph_caller)
leaq 8(%rbp), %rdi
movq 0x38(%rsp), %rsi
+ movq (%rbp), %rdx
subq $MCOUNT_INSN_SIZE, %rsi
call prepare_ftrace_return
@@ -151,27 +152,15 @@ END(ftrace_graph_caller)
GLOBAL(return_to_handler)
subq $80, %rsp
+ /* Save the return values */
movq %rax, (%rsp)
- movq %rcx, 8(%rsp)
- movq %rdx, 16(%rsp)
- movq %rsi, 24(%rsp)
- movq %rdi, 32(%rsp)
- movq %r8, 40(%rsp)
- movq %r9, 48(%rsp)
- movq %r10, 56(%rsp)
- movq %r11, 64(%rsp)
+ movq %rdx, 8(%rsp)
+ movq %rbp, %rdi
call ftrace_return_to_handler
movq %rax, 72(%rsp)
- movq 64(%rsp), %r11
- movq 56(%rsp), %r10
- movq 48(%rsp), %r9
- movq 40(%rsp), %r8
- movq 32(%rsp), %rdi
- movq 24(%rsp), %rsi
- movq 16(%rsp), %rdx
- movq 8(%rsp), %rcx
+ movq 8(%rsp), %rdx
movq (%rsp), %rax
addq $72, %rsp
retq
@@ -869,6 +858,8 @@ END(\sym)
#ifdef CONFIG_SMP
apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \
irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
+apicinterrupt REBOOT_VECTOR \
+ reboot_interrupt smp_reboot_interrupt
#endif
#ifdef CONFIG_X86_UV
@@ -900,10 +891,15 @@ apicinterrupt INVALIDATE_TLB_VECTOR_STAR
#endif
apicinterrupt THRESHOLD_APIC_VECTOR \
- threshold_interrupt mce_threshold_interrupt
+ threshold_interrupt smp_threshold_interrupt
apicinterrupt THERMAL_APIC_VECTOR \
thermal_interrupt smp_thermal_interrupt
+#ifdef CONFIG_X86_MCE
+apicinterrupt MCE_SELF_VECTOR \
+ mce_self_interrupt smp_mce_self_interrupt
+#endif
+
#ifdef CONFIG_SMP
apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \
call_function_single_interrupt smp_call_function_single_interrupt
@@ -917,6 +913,11 @@ apicinterrupt ERROR_APIC_VECTOR \
error_interrupt smp_error_interrupt
apicinterrupt SPURIOUS_APIC_VECTOR \
spurious_interrupt smp_spurious_interrupt
+
+#ifdef CONFIG_PERF_COUNTERS
+apicinterrupt LOCAL_PENDING_VECTOR \
+ perf_pending_interrupt smp_perf_pending_interrupt
+#endif
#endif /* !CONFIG_XEN */
/*
@@ -1219,7 +1220,7 @@ paranoiderrorentry stack_segment do_stac
errorentry general_protection do_general_protection
errorentry page_fault do_page_fault
#ifdef CONFIG_X86_MCE
-paranoidzeroentry machine_check do_machine_check
+paranoidzeroentry machine_check *machine_check_vector(%rip)
#endif
#ifndef CONFIG_XEN
--- head-2011-03-17.orig/arch/x86/kernel/head_32-xen.S 2011-03-03 16:23:08.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/head_32-xen.S 2011-03-03 16:23:25.000000000 +0100
@@ -118,12 +118,6 @@ ENTRY(hypercall_page)
CFI_ENDPROC
/*
- * Real beginning of normal "text" segment
- */
-ENTRY(stext)
-ENTRY(_stext)
-
-/*
* BSS section
*/
.section ".bss.page_aligned","wa"
--- head-2011-03-17.orig/arch/x86/kernel/head_64-xen.S 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/head_64-xen.S 2011-02-01 14:50:44.000000000 +0100
@@ -15,7 +15,6 @@
#include <linux/threads.h>
#include <linux/init.h>
#include <linux/elfnote.h>
-#include <asm/desc.h>
#include <asm/segment.h>
#include <asm/page.h>
#include <asm/msr.h>
--- head-2011-03-17.orig/arch/x86/kernel/init_task.c 2011-03-17 14:35:44.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/init_task.c 2011-02-01 14:50:44.000000000 +0100
@@ -31,6 +31,7 @@ union thread_union init_thread_union __i
struct task_struct init_task = INIT_TASK(init_task);
EXPORT_SYMBOL(init_task);
+#ifndef CONFIG_X86_NO_TSS
/*
* per-CPU TSS segments. Threads are completely 'soft' on Linux,
* no more per-task TSS's. The TSS size is kept cacheline-aligned
@@ -39,4 +40,4 @@ EXPORT_SYMBOL(init_task);
* on exact cacheline boundaries, to eliminate cacheline ping-pong.
*/
DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS;
-
+#endif
--- head-2011-03-17.orig/arch/x86/kernel/irq-xen.c 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/irq-xen.c 2011-02-01 14:50:44.000000000 +0100
@@ -12,6 +12,8 @@
#include <asm/io_apic.h>
#include <asm/irq.h>
#include <asm/idle.h>
+#include <asm/mce.h>
+#include <asm/hw_irq.h>
atomic_t irq_err_count;
@@ -26,9 +28,10 @@ void (*generic_interrupt_extension)(void
*/
void ack_bad_irq(unsigned int irq)
{
- printk(KERN_ERR "unexpected IRQ trap at irq %02x\n", irq);
+ if (printk_ratelimit())
+ pr_err("unexpected IRQ trap at vector %02x\n", irq);
-#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_XEN)
+#ifndef CONFIG_XEN
/*
* Currently unexpected vectors happen only on SMP and APIC.
* We _must_ ack these because every local APIC has only N
@@ -38,8 +41,7 @@ void ack_bad_irq(unsigned int irq)
* completely.
* But only ack when the APIC is enabled -AK
*/
- if (cpu_has_apic)
- ack_APIC_irq();
+ ack_APIC_irq();
#endif
}
@@ -65,6 +67,14 @@ static int show_other_interrupts(struct
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
seq_printf(p, " Spurious interrupts\n");
+ seq_printf(p, "%*s: ", prec, "CNT");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
+ seq_printf(p, " Performance counter interrupts\n");
+ seq_printf(p, "%*s: ", prec, "PND");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs);
+ seq_printf(p, " Performance pending work\n");
#endif
#ifndef CONFIG_XEN
if (generic_interrupt_extension) {
@@ -95,17 +105,27 @@ static int show_other_interrupts(struct
seq_printf(p, " Spinlock wakeups\n");
#endif
#endif
-#ifdef CONFIG_X86_MCE
+#ifdef CONFIG_X86_THERMAL_VECTOR
seq_printf(p, "%*s: ", prec, "TRM");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count);
seq_printf(p, " Thermal event interrupts\n");
-# ifdef CONFIG_X86_64
+#endif
+#ifdef CONFIG_X86_MCE_THRESHOLD
seq_printf(p, "%*s: ", prec, "THR");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
seq_printf(p, " Threshold APIC interrupts\n");
-# endif
+#endif
+#ifdef CONFIG_X86_NEW_MCE
+ seq_printf(p, "%*s: ", prec, "MCE");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", per_cpu(mce_exception_count, j));
+ seq_printf(p, " Machine check exceptions\n");
+ seq_printf(p, "%*s: ", prec, "MCP");
+ for_each_online_cpu(j)
+ seq_printf(p, "%10u ", per_cpu(mce_poll_count, j));
+ seq_printf(p, " Machine check polls\n");
#endif
seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
#if defined(CONFIG_X86_IO_APIC)
@@ -177,6 +197,8 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
#ifdef CONFIG_X86_LOCAL_APIC
sum += irq_stats(cpu)->apic_timer_irqs;
sum += irq_stats(cpu)->irq_spurious_count;
+ sum += irq_stats(cpu)->apic_perf_irqs;
+ sum += irq_stats(cpu)->apic_pending_irqs;
#endif
#ifndef CONFIG_XEN
if (generic_interrupt_extension)
@@ -191,11 +213,15 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
sum += irq_stats(cpu)->irq_lock_count;
#endif
#endif
-#ifdef CONFIG_X86_MCE
+#ifdef CONFIG_X86_THERMAL_VECTOR
sum += irq_stats(cpu)->irq_thermal_count;
-# ifdef CONFIG_X86_64
+#endif
+#ifdef CONFIG_X86_MCE_THRESHOLD
sum += irq_stats(cpu)->irq_threshold_count;
#endif
+#ifdef CONFIG_X86_NEW_MCE
+ sum += per_cpu(mce_exception_count, cpu);
+ sum += per_cpu(mce_poll_count, cpu);
#endif
return sum;
}
@@ -231,14 +257,11 @@ unsigned int __irq_entry do_IRQ(struct p
irq = __get_cpu_var(vector_irq)[vector];
if (!handle_irq(irq, regs)) {
-#ifdef CONFIG_X86_64
- if (!disable_apic)
- ack_APIC_irq();
-#endif
+ ack_APIC_irq();
if (printk_ratelimit())
- printk(KERN_EMERG "%s: %d.%d No irq handler for vector (irq %d)\n",
- __func__, smp_processor_id(), vector, irq);
+ pr_emerg("%s: %d.%d No irq handler for vector (irq %d)\n",
+ __func__, smp_processor_id(), vector, irq);
}
irq_exit();
--- head-2011-03-17.orig/arch/x86/kernel/microcode_core-xen.c 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/microcode_core-xen.c 2011-02-01 14:50:44.000000000 +0100
@@ -22,27 +22,21 @@
* 2 of the License, or (at your option) any later version.
*/
#include <linux/platform_device.h>
-#include <linux/capability.h>
#include <linux/miscdevice.h>
-#include <linux/firmware.h>
+#include <linux/capability.h>
#include <linux/smp_lock.h>
-#include <linux/spinlock.h>
-#include <linux/cpumask.h>
-#include <linux/uaccess.h>
-#include <linux/vmalloc.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/mutex.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <linux/slab.h>
#include <linux/cpu.h>
#include <linux/fs.h>
#include <linux/mm.h>
+#include <linux/firmware.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
#include <asm/microcode.h>
#include <asm/processor.h>
-#include <asm/msr.h>
MODULE_DESCRIPTION("Microcode Update Driver");
MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
@@ -53,7 +47,18 @@ module_param(verbose, int, 0644);
#define MICROCODE_VERSION "2.00-xen"
-/* no concurrent ->write()s are allowed on /dev/cpu/microcode */
+/*
+ * Synchronization.
+ *
+ * All non cpu-hotplug-callback call sites use:
+ *
+ * - microcode_mutex to synchronize with each other;
+ * - get/put_online_cpus() to synchronize with
+ * the cpu-hotplug-callback call sites.
+ *
+ * We guarantee that only a single cpu is being
+ * updated at any particular moment of time.
+ */
static DEFINE_MUTEX(microcode_mutex);
#ifdef CONFIG_MICROCODE_OLD_INTERFACE
@@ -90,18 +95,16 @@ static int microcode_open(struct inode *
static ssize_t microcode_write(struct file *file, const char __user *buf,
size_t len, loff_t *ppos)
{
- ssize_t ret;
+ ssize_t ret = -EINVAL;
if ((len >> PAGE_SHIFT) > num_physpages) {
- printk(KERN_ERR "microcode: too much data (max %ld pages)\n",
- num_physpages);
- return -EINVAL;
+ pr_err("microcode: too much data (max %ld pages)\n", num_physpages);
+ return ret;
}
mutex_lock(&microcode_mutex);
- ret = do_microcode_update(buf, len);
- if (!ret)
+ if (do_microcode_update(buf, len) == 0)
ret = (ssize_t)len;
mutex_unlock(&microcode_mutex);
@@ -110,15 +113,16 @@ static ssize_t microcode_write(struct fi
}
static const struct file_operations microcode_fops = {
- .owner = THIS_MODULE,
- .write = microcode_write,
- .open = microcode_open,
+ .owner = THIS_MODULE,
+ .write = microcode_write,
+ .open = microcode_open,
};
static struct miscdevice microcode_dev = {
- .minor = MICROCODE_MINOR,
- .name = "microcode",
- .fops = &microcode_fops,
+ .minor = MICROCODE_MINOR,
+ .name = "microcode",
+ .devnode = "cpu/microcode",
+ .fops = &microcode_fops,
};
static int __init microcode_dev_init(void)
@@ -127,9 +131,7 @@ static int __init microcode_dev_init(voi
error = misc_register(&microcode_dev);
if (error) {
- printk(KERN_ERR
- "microcode: can't misc_register on minor=%d\n",
- MICROCODE_MINOR);
+ pr_err("microcode: can't misc_register on minor=%d\n", MICROCODE_MINOR);
return error;
}
@@ -188,38 +190,35 @@ static int __init microcode_init(void)
else if (c->x86_vendor == X86_VENDOR_AMD)
fw_name = "amd-ucode/microcode_amd.bin";
else {
- printk(KERN_ERR "microcode: no support for this CPU vendor\n");
+ pr_err("microcode: no support for this CPU vendor\n");
return -ENODEV;
}
- error = microcode_dev_init();
- if (error)
- return error;
microcode_pdev = platform_device_register_simple("microcode", -1,
NULL, 0);
if (IS_ERR(microcode_pdev)) {
- microcode_dev_exit();
return PTR_ERR(microcode_pdev);
}
+ error = microcode_dev_init();
+ if (error)
+ return error;
+
request_microcode(fw_name);
- printk(KERN_INFO
- "Microcode Update Driver: v" MICROCODE_VERSION
+ pr_info("Microcode Update Driver: v" MICROCODE_VERSION
" <tigran@aivazian.fsnet.co.uk>,"
" Peter Oruba\n");
return 0;
}
+module_init(microcode_init);
static void __exit microcode_exit(void)
{
microcode_dev_exit();
platform_device_unregister(microcode_pdev);
- printk(KERN_INFO
- "Microcode Update Driver: v" MICROCODE_VERSION " removed.\n");
+ pr_info("Microcode Update Driver: v" MICROCODE_VERSION " removed.\n");
}
-
-module_init(microcode_init);
module_exit(microcode_exit);
--- head-2011-03-17.orig/arch/x86/kernel/mpparse-xen.c 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/mpparse-xen.c 2011-02-01 14:50:44.000000000 +0100
@@ -17,6 +17,7 @@
#include <linux/acpi.h>
#include <linux/module.h>
#include <linux/smp.h>
+#include <linux/pci.h>
#include <asm/mtrr.h>
#include <asm/mpspec.h>
@@ -904,24 +905,17 @@ static
inline void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) {}
#endif /* CONFIG_X86_IO_APIC */
-static int check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length,
- int count)
+static int
+check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length, int count)
{
- if (!mpc_new_phys) {
- pr_info("No spare slots, try to append...take your risk, "
- "new mpc_length %x\n", count);
- } else {
- if (count <= mpc_new_length)
- pr_info("No spare slots, try to append..., "
- "new mpc_length %x\n", count);
- else {
- pr_err("mpc_new_length %lx is too small\n",
- mpc_new_length);
- return -1;
- }
+ int ret = 0;
+
+ if (!mpc_new_phys || count <= mpc_new_length) {
+ WARN(1, "update_mptable: No spare slots (length: %x)\n", count);
+ return -1;
}
- return 0;
+ return ret;
}
static int __init replace_intsrc_all(struct mpc_table *mpc,
@@ -980,7 +974,7 @@ static int __init replace_intsrc_all(st
} else {
struct mpc_intsrc *m = (struct mpc_intsrc *)mpt;
count += sizeof(struct mpc_intsrc);
- if (!check_slot(mpc_new_phys, mpc_new_length, count))
+ if (check_slot(mpc_new_phys, mpc_new_length, count) < 0)
goto out;
assign_to_mpc_intsrc(&mp_irqs[i], m);
mpc->length = count;
@@ -997,11 +991,14 @@ out:
return 0;
}
-static int __initdata enable_update_mptable;
+int enable_update_mptable;
static int __init update_mptable_setup(char *str)
{
enable_update_mptable = 1;
+#ifdef CONFIG_PCI
+ pci_routeirq = 1;
+#endif
return 0;
}
early_param("update_mptable", update_mptable_setup);
@@ -1014,6 +1011,9 @@ static int __initdata alloc_mptable;
static int __init parse_alloc_mptable_opt(char *p)
{
enable_update_mptable = 1;
+#ifdef CONFIG_PCI
+ pci_routeirq = 1;
+#endif
alloc_mptable = 1;
if (!p)
return 0;
--- head-2011-03-17.orig/arch/x86/kernel/pci-dma-xen.c 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/pci-dma-xen.c 2011-02-01 14:50:44.000000000 +0100
@@ -32,6 +32,8 @@ int no_iommu __read_mostly;
/* Set this to 1 if there is a HW IOMMU in the system */
int iommu_detected __read_mostly = 0;
+int iommu_pass_through;
+
dma_addr_t bad_dma_address __read_mostly = 0;
EXPORT_SYMBOL(bad_dma_address);
@@ -264,6 +266,10 @@ static __init int iommu_setup(char *p)
if (!strncmp(p, "soft", 4))
swiotlb = 1;
#endif
+ if (!strncmp(p, "pt", 2)) {
+ iommu_pass_through = 1;
+ return 1;
+ }
gart_parse_options(p);
@@ -371,6 +377,8 @@ static int __init pci_iommu_init(void)
void pci_iommu_shutdown(void)
{
gart_iommu_shutdown();
+
+ amd_iommu_shutdown();
}
/* Must execute after PCI subsystem */
fs_initcall(pci_iommu_init);
--- head-2011-03-17.orig/arch/x86/kernel/process-xen.c 2011-03-03 16:06:40.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/process-xen.c 2011-03-03 16:07:25.000000000 +0100
@@ -8,12 +8,15 @@
#include <linux/module.h>
#include <linux/pm.h>
#include <linux/clockchips.h>
+#include <linux/random.h>
#include <trace/power.h>
#include <asm/system.h>
#include <asm/apic.h>
+#include <asm/syscalls.h>
#include <asm/idle.h>
#include <asm/uaccess.h>
#include <asm/i387.h>
+#include <asm/ds.h>
#include <xen/evtchn.h>
unsigned long idle_halt;
@@ -46,6 +49,8 @@ void free_thread_xstate(struct task_stru
kmem_cache_free(task_xstate_cachep, tsk->thread.xstate);
tsk->thread.xstate = NULL;
}
+
+ WARN(tsk->thread.ds_ctx, "leaking DS context\n");
}
void free_thread_info(struct thread_info *ti)
@@ -59,7 +64,7 @@ void arch_task_cache_init(void)
task_xstate_cachep =
kmem_cache_create("task_xstate", xstate_size,
__alignof__(union thread_xstate),
- SLAB_PANIC, NULL);
+ SLAB_PANIC | SLAB_NOTRACK, NULL);
}
/*
@@ -85,8 +90,6 @@ void exit_thread(void)
t->io_bitmap_max = 0;
kfree(bp);
}
-
- ds_exit_thread(current);
}
void flush_thread(void)
@@ -471,16 +474,12 @@ static void c1e_idle(void)
if (!cpumask_test_cpu(cpu, c1e_mask)) {
cpumask_set_cpu(cpu, c1e_mask);
/*
- * Force broadcast so ACPI can not interfere. Needs
- * to run with interrupts enabled as it uses
- * smp_function_call.
+ * Force broadcast so ACPI can not interfere.
*/
- local_irq_enable();
clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE,
&cpu);
printk(KERN_INFO "Switch to broadcast mode on CPU%d\n",
cpu);
- local_irq_disable();
}
clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
@@ -575,3 +574,16 @@ static int __init idle_setup(char *str)
}
early_param("idle", idle_setup);
+unsigned long arch_align_stack(unsigned long sp)
+{
+ if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
+ sp -= get_random_int() % 8192;
+ return sp & ~0xf;
+}
+
+unsigned long arch_randomize_brk(struct mm_struct *mm)
+{
+ unsigned long range_end = mm->brk + 0x02000000;
+ return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
+}
+
--- head-2011-03-17.orig/arch/x86/kernel/process_32-xen.c 2011-02-02 08:37:24.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/process_32-xen.c 2011-02-02 08:37:43.000000000 +0100
@@ -9,8 +9,6 @@
* This file handles the architecture-dependent parts of process handling..
*/
-#include <stdarg.h>
-
#include <linux/stackprotector.h>
#include <linux/cpu.h>
#include <linux/errno.h>
@@ -33,7 +31,6 @@
#include <linux/module.h>
#include <linux/kallsyms.h>
#include <linux/ptrace.h>
-#include <linux/random.h>
#include <linux/personality.h>
#include <linux/tick.h>
#include <linux/percpu.h>
@@ -297,7 +294,8 @@ int copy_thread(unsigned long clone_flag
p->thread.io_bitmap_max = 0;
}
- ds_copy_thread(p, current);
+ clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
+ p->thread.ds_ctx = NULL;
clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
p->thread.debugctlmsr = 0;
@@ -468,7 +466,7 @@ __switch_to(struct task_struct *prev_p,
* done before math_state_restore, so the TS bit is up
* to date.
*/
- arch_leave_lazy_cpu_mode();
+ arch_end_context_switch(next_p);
/* If the task has used fpu the last 5 timeslices, just do a full
* restore of the math state immediately to avoid the trap; the
@@ -558,15 +556,3 @@ unsigned long get_wchan(struct task_stru
return 0;
}
-unsigned long arch_align_stack(unsigned long sp)
-{
- if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
- sp -= get_random_int() % 8192;
- return sp & ~0xf;
-}
-
-unsigned long arch_randomize_brk(struct mm_struct *mm)
-{
- unsigned long range_end = mm->brk + 0x02000000;
- return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
-}
--- head-2011-03-17.orig/arch/x86/kernel/process_64-xen.c 2011-02-02 08:37:17.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/process_64-xen.c 2011-02-02 08:37:47.000000000 +0100
@@ -17,8 +17,6 @@
* This file handles the architecture-dependent parts of process handling..
*/
-#include <stdarg.h>
-
#include <linux/stackprotector.h>
#include <linux/cpu.h>
#include <linux/errno.h>
@@ -35,7 +33,6 @@
#include <linux/delay.h>
#include <linux/module.h>
#include <linux/ptrace.h>
-#include <linux/random.h>
#include <linux/notifier.h>
#include <linux/kprobes.h>
#include <linux/kdebug.h>
@@ -344,7 +341,8 @@ int copy_thread(unsigned long clone_flag
}
p->thread.iopl = current->thread.iopl;
- ds_copy_thread(p, me);
+ clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
+ p->thread.ds_ctx = NULL;
clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
p->thread.debugctlmsr = 0;
@@ -506,7 +504,7 @@ __switch_to(struct task_struct *prev_p,
* done before math_state_restore, so the TS bit is up
* to date.
*/
- arch_leave_lazy_cpu_mode();
+ arch_end_context_switch(next_p);
/*
* Switch FS and GS.
@@ -723,15 +721,3 @@ long sys_arch_prctl(int code, unsigned l
return do_arch_prctl(current, code, addr);
}
-unsigned long arch_align_stack(unsigned long sp)
-{
- if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
- sp -= get_random_int() % 8192;
- return sp & ~0xf;
-}
-
-unsigned long arch_randomize_brk(struct mm_struct *mm)
-{
- unsigned long range_end = mm->brk + 0x02000000;
- return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
-}
--- head-2011-03-17.orig/arch/x86/kernel/setup-xen.c 2011-03-03 16:22:49.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/setup-xen.c 2011-03-03 16:23:32.000000000 +0100
@@ -142,6 +142,14 @@ EXPORT_SYMBOL(xen_start_info);
#define ARCH_SETUP
#endif
+/*
+ * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
+ * The direct mapping extends to max_pfn_mapped, so that we can directly access
+ * apertures, ACPI and other tables without having to play with fixmaps.
+ */
+unsigned long max_low_pfn_mapped;
+unsigned long max_pfn_mapped;
+
RESERVE_BRK(dmi_alloc, 65536);
unsigned int boot_cpu_id __read_mostly;
@@ -247,8 +255,8 @@ unsigned long mmu_cr4_features;
unsigned long mmu_cr4_features = X86_CR4_PAE;
#endif
-/* Boot loader ID as an integer, for the benefit of proc_dointvec */
-int bootloader_type;
+/* Boot loader ID and version as integers, for the benefit of proc_dointvec */
+int bootloader_type, bootloader_version;
/*
* Setup options
@@ -316,6 +324,20 @@ void * __init extend_brk(size_t size, si
return ret;
}
+#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
+static void __init init_gbpages(void)
+{
+ if (direct_gbpages && cpu_has_gbpages)
+ printk(KERN_INFO "Using GB pages for direct mapping\n");
+ else
+ direct_gbpages = 0;
+}
+#else
+static inline void init_gbpages(void)
+{
+}
+#endif
+
static void __init reserve_brk(void)
{
if (_brk_end > _brk_start)
@@ -328,15 +350,13 @@ static void __init reserve_brk(void)
#ifdef CONFIG_BLK_DEV_INITRD
-#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
-
#define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT)
static void __init relocate_initrd(void)
{
-
+#ifndef CONFIG_XEN
u64 ramdisk_image = boot_params.hdr.ramdisk_image;
u64 ramdisk_size = boot_params.hdr.ramdisk_size;
- u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT;
+ u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT;
u64 ramdisk_here;
unsigned long slop, clen, mapaddr;
char *p, *q;
@@ -391,8 +411,14 @@ static void __init relocate_initrd(void)
" %08llx - %08llx\n",
ramdisk_image, ramdisk_image + ramdisk_size - 1,
ramdisk_here, ramdisk_here + ramdisk_size - 1);
-}
+#else
+ printk(KERN_ERR "initrd extends beyond end of memory "
+ "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
+ __pa(xen_start_info->mod_start) + xen_start_info->mod_len,
+ max_low_pfn_mapped << PAGE_SHIFT);
+ initrd_start = 0;
#endif
+}
static void __init reserve_initrd(void)
{
@@ -400,7 +426,7 @@ static void __init reserve_initrd(void)
u64 ramdisk_image = boot_params.hdr.ramdisk_image;
u64 ramdisk_size = boot_params.hdr.ramdisk_size;
u64 ramdisk_end = ramdisk_image + ramdisk_size;
- u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT;
+ u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT;
if (!boot_params.hdr.type_of_loader ||
!ramdisk_image || !ramdisk_size)
@@ -409,7 +435,7 @@ static void __init reserve_initrd(void)
unsigned long ramdisk_image = __pa(xen_start_info->mod_start);
unsigned long ramdisk_size = xen_start_info->mod_len;
unsigned long ramdisk_end = ramdisk_image + ramdisk_size;
- unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT;
+ unsigned long end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT;
if (!xen_start_info->mod_start || !ramdisk_size)
return; /* No initrd provided by bootloader */
@@ -442,14 +468,8 @@ static void __init reserve_initrd(void)
return;
}
-#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
relocate_initrd();
-#else
- printk(KERN_ERR "initrd extends beyond end of memory "
- "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
- ramdisk_end, end_of_lowmem);
- initrd_start = 0;
-#endif
+
free_early(ramdisk_image, ramdisk_end);
}
#else
@@ -721,6 +741,19 @@ static struct dmi_system_id __initdata b
DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies"),
},
},
+ {
+ /*
+ * AMI BIOS with low memory corruption was found on Intel DG45ID board.
+ * It hase different DMI_BIOS_VENDOR = "Intel Corp.", for now we will
+ * match only DMI_BOARD_NAME and see if there is more bad products
+ * with this vendor.
+ */
+ .callback = dmi_low_memory_corruption,
+ .ident = "AMI BIOS",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_NAME, "DG45ID"),
+ },
+ },
#endif
{}
};
@@ -788,6 +821,12 @@ void __init setup_arch(char **cmdline_p)
#endif
saved_video_mode = boot_params.hdr.vid_mode;
bootloader_type = boot_params.hdr.type_of_loader;
+ if ((bootloader_type >> 4) == 0xe) {
+ bootloader_type &= 0xf;
+ bootloader_type |= (boot_params.hdr.ext_loader_type+0x10) << 4;
+ }
+ bootloader_version = bootloader_type & 0xf;
+ bootloader_version |= boot_params.hdr.ext_loader_ver << 4;
#ifdef CONFIG_BLK_DEV_RAM
rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK;
@@ -970,14 +1009,22 @@ void __init setup_arch(char **cmdline_p)
max_low_pfn = max_pfn;
high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
+#ifndef CONFIG_XEN
+ max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT;
+#endif
#endif
#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
setup_bios_corruption_check();
#endif
+ printk(KERN_DEBUG "initial memory mapped : 0 - %08lx\n",
+ max_pfn_mapped<<PAGE_SHIFT);
+
reserve_brk();
+ init_gbpages();
+
/* max_pfn_mapped is updated here */
max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
max_pfn_mapped = max_low_pfn_mapped;
@@ -1219,24 +1266,6 @@ void __init setup_arch(char **cmdline_p)
#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
/**
- * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors
- *
- * Description:
- * Perform any necessary interrupt initialisation prior to setting up
- * the "ordinary" interrupt call gates. For legacy reasons, the ISA
- * interrupts should be initialised here if the machine emulates a PC
- * in any way.
- **/
-void __init x86_quirk_pre_intr_init(void)
-{
- if (x86_quirks->arch_pre_intr_init) {
- if (x86_quirks->arch_pre_intr_init())
- return;
- }
- init_ISA_irqs();
-}
-
-/**
* x86_quirk_intr_init - post gate setup interrupt initialisation
*
* Description:
--- head-2011-03-17.orig/arch/x86/kernel/smp-xen.c 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/smp-xen.c 2011-02-01 14:50:44.000000000 +0100
@@ -135,11 +135,36 @@ void xen_send_call_func_ipi(const struct
* this function calls the 'stop' function on all other CPUs in the system.
*/
+irqreturn_t smp_reboot_interrupt(int irq, void *dev_id)
+{
+ stop_this_cpu(NULL);
+
+ return IRQ_HANDLED;
+}
+
void xen_smp_send_stop(void)
{
unsigned long flags;
+ unsigned long wait;
+
+ /*
+ * Use an own vector here because smp_call_function
+ * does lots of things not suitable in a panic situation.
+ * On most systems we could also use an NMI here,
+ * but there are a few systems around where NMI
+ * is problematic so stay with an non NMI for now
+ * (this implies we cannot stop CPUs spinning with irq off
+ * currently)
+ */
+ if (num_online_cpus() > 1) {
+ xen_send_IPI_allbutself(REBOOT_VECTOR);
+
+ /* Don't wait longer than a second */
+ wait = USEC_PER_SEC;
+ while (num_online_cpus() > 1 && wait--)
+ udelay(1);
+ }
- smp_call_function(stop_this_cpu, NULL, 0);
local_irq_save(flags);
disable_all_local_evtchn();
local_irq_restore(flags);
--- head-2011-03-17.orig/arch/x86/kernel/traps-xen.c 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/traps-xen.c 2011-02-01 14:50:44.000000000 +0100
@@ -45,6 +45,7 @@
#include <linux/edac.h>
#endif
+#include <asm/kmemcheck.h>
#include <asm/stacktrace.h>
#include <asm/processor.h>
#include <asm/debugreg.h>
@@ -53,6 +54,7 @@
#include <asm/traps.h>
#include <asm/desc.h>
#include <asm/i387.h>
+#include <asm/mce.h>
#include <asm/mach_traps.h>
@@ -64,8 +66,6 @@
#include <asm/setup.h>
#include <asm/traps.h>
-#include "cpu/mcheck/mce.h"
-
asmlinkage int system_call(void);
/* Do we ignore FPU interrupts ? */
@@ -347,6 +347,9 @@ io_check_error(unsigned char reason, str
printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n");
show_registers(regs);
+ if (panic_on_io_nmi)
+ panic("NMI IOCK error: Not continuing");
+
/* Re-enable the IOCK line, wait for a few seconds */
clear_io_check_error(reason);
}
@@ -527,6 +530,10 @@ dotraplinkage void __kprobes do_debug(st
get_debugreg(condition, 6);
+ /* Catch kmemcheck conditions first of all! */
+ if (condition & DR_STEP && kmemcheck_trap(regs))
+ return;
+
/*
* The processor cleared BTF, so don't mark that we need it set.
*/
@@ -792,15 +799,15 @@ unsigned long patch_espfix_desc(unsigned
return new_kesp;
}
-#else
+#endif
+
asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
{
}
-asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void)
+asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void)
{
}
-#endif
#endif /* CONFIG_XEN */
/*
@@ -834,9 +841,6 @@ asmlinkage void math_state_restore(void)
}
/* NB. 'clts' is done for us by Xen during virtual trap. */
-#ifdef CONFIG_X86_32
- restore_fpu(tsk);
-#else
/*
* Paranoid restore. send a SIGSEGV if we fail to restore the state.
*/
@@ -845,7 +849,7 @@ asmlinkage void math_state_restore(void)
force_sig(SIGSEGV, tsk);
return;
}
-#endif
+
thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
tsk->fpu_counter++;
}
--- head-2011-03-17.orig/arch/x86/kernel/vsyscall_64-xen.c 2011-02-01 14:42:26.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/vsyscall_64-xen.c 2011-02-01 14:50:44.000000000 +0100
@@ -132,15 +132,7 @@ static __always_inline void do_vgettimeo
return;
}
- /*
- * Surround the RDTSC by barriers, to make sure it's not
- * speculated to outside the seqlock critical section and
- * does not cause time warps:
- */
- rdtsc_barrier();
now = vread();
- rdtsc_barrier();
-
base = __vsyscall_gtod_data.clock.cycle_last;
mask = __vsyscall_gtod_data.clock.mask;
mult = __vsyscall_gtod_data.clock.mult;
--- head-2011-03-17.orig/arch/x86/mm/dump_pagetables-xen.c 2011-02-01 14:39:24.000000000 +0100
+++ head-2011-03-17/arch/x86/mm/dump_pagetables-xen.c 2011-02-01 14:50:44.000000000 +0100
@@ -173,13 +173,14 @@ static void note_page(struct seq_file *m
st->current_address >= st->marker[1].start_address) {
const char *unit = units;
unsigned long delta;
+ int width = sizeof(unsigned long) * 2;
/*
* Now print the actual finished series
*/
- seq_printf(m, "0x%p-0x%p ",
- (void *)st->start_address,
- (void *)st->current_address);
+ seq_printf(m, "0x%0*lx-0x%0*lx ",
+ width, st->start_address,
+ width, st->current_address);
delta = (st->current_address - st->start_address) >> 10;
while (!(delta & 1023) && unit[1]) {
--- head-2011-03-17.orig/arch/x86/mm/fault-xen.c 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/arch/x86/mm/fault-xen.c 2011-02-01 14:50:44.000000000 +0100
@@ -3,40 +3,18 @@
* Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs.
* Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar
*/
-#include <linux/interrupt.h>
-#include <linux/mmiotrace.h>
-#include <linux/bootmem.h>
-#include <linux/compiler.h>
-#include <linux/highmem.h>
-#include <linux/kprobes.h>
-#include <linux/uaccess.h>
-#include <linux/vmalloc.h>
-#include <linux/vt_kern.h>
-#include <linux/signal.h>
-#include <linux/kernel.h>
-#include <linux/ptrace.h>
-#include <linux/string.h>
-#include <linux/module.h>
-#include <linux/kdebug.h>
-#include <linux/errno.h>
-#include <linux/magic.h>
-#include <linux/sched.h>
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/mman.h>
-#include <linux/tty.h>
-#include <linux/smp.h>
-#include <linux/mm.h>
-
-#include <asm-generic/sections.h>
-
-#include <asm/tlbflush.h>
-#include <asm/pgalloc.h>
-#include <asm/segment.h>
-#include <asm/system.h>
-#include <asm/proto.h>
-#include <asm/traps.h>
-#include <asm/desc.h>
+#include <linux/magic.h> /* STACK_END_MAGIC */
+#include <linux/sched.h> /* test_thread_flag(), ... */
+#include <linux/kdebug.h> /* oops_begin/end, ... */
+#include <linux/module.h> /* search_exception_table */
+#include <linux/bootmem.h> /* max_low_pfn */
+#include <linux/kprobes.h> /* __kprobes, ... */
+#include <linux/mmiotrace.h> /* kmmio_handler, ... */
+#include <linux/perf_counter.h> /* perf_swcounter_event */
+
+#include <asm/traps.h> /* dotraplinkage, ... */
+#include <asm/pgalloc.h> /* pgd_*(), ... */
+#include <asm/kmemcheck.h> /* kmemcheck_*(), ... */
/*
* Page fault error code bits:
@@ -228,10 +206,7 @@ static inline pmd_t *vmalloc_sync_one(pg
if (!pmd_present(*pmd_k))
return NULL;
- if (!pmd_present(*pmd)) {
- bool lazy = percpu_read(xen_lazy_mmu);
-
- percpu_write(xen_lazy_mmu, false);
+ if (!pmd_present(*pmd))
#if CONFIG_XEN_COMPAT > 0x030002
set_pmd(pmd, *pmd_k);
#else
@@ -241,10 +216,8 @@ static inline pmd_t *vmalloc_sync_one(pg
*/
set_pmd(pmd, __pmd(pmd_val(*pmd_k)));
#endif
- percpu_write(xen_lazy_mmu, lazy);
- } else {
+ else
BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
- }
return pmd_k;
}
@@ -474,10 +447,11 @@ static noinline int vmalloc_fault(unsign
}
static const char errata93_warning[] =
-KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
-KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n"
-KERN_ERR "******* Please consider a BIOS update.\n"
-KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n";
+KERN_ERR
+"******* Your BIOS seems to not contain a fix for K8 errata #93\n"
+"******* Working around it, but it may cause SEGVs or burn power.\n"
+"******* Please consider a BIOS update.\n"
+"******* Disabling USB legacy in the BIOS may also help.\n";
/*
* No vm86 mode in 64-bit mode:
@@ -562,8 +536,6 @@ bad:
static int is_errata93(struct pt_regs *regs, unsigned long address)
{
#ifdef CONFIG_X86_64
- static int once;
-
if (address != regs->ip)
return 0;
@@ -573,10 +545,7 @@ static int is_errata93(struct pt_regs *r
address |= 0xffffffffUL << 32;
if ((address >= (u64)_stext && address <= (u64)_etext) ||
(address >= MODULES_VADDR && address <= MODULES_END)) {
- if (!once) {
- printk(errata93_warning);
- once = 1;
- }
+ printk_once(errata93_warning);
regs->ip = address;
return 1;
}
@@ -749,7 +718,7 @@ show_signal_msg(struct pt_regs *regs, un
if (!printk_ratelimit())
return;
- printk(KERN_CONT "%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
+ printk("%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
tsk->comm, task_pid_nr(tsk), address,
(void *)regs->ip, (void *)regs->sp, error_code);
@@ -1011,11 +980,17 @@ do_page_fault(struct pt_regs *regs, unsi
tsk = current;
mm = tsk->mm;
- prefetchw(&mm->mmap_sem);
-
/* Get the faulting address: */
address = read_cr2();
+ /*
+ * Detect and handle instructions that would cause a page fault for
+ * both a tracked kernel page and a userspace page.
+ */
+ if (kmemcheck_active(regs))
+ kmemcheck_hide(regs);
+ prefetchw(&mm->mmap_sem);
+
if (unlikely(kmmio_fault(regs, address)))
return;
@@ -1044,9 +1019,13 @@ do_page_fault(struct pt_regs *regs, unsi
return;
}
- if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
- vmalloc_fault(address) >= 0)
- return;
+ if (!(error_code & (PF_RSVD | PF_USER | PF_PROT))) {
+ if (vmalloc_fault(address) >= 0)
+ return;
+
+ if (kmemcheck_fault(regs, address, error_code))
+ return;
+ }
/* Can handle a stale RO->RW TLB: */
if (spurious_fault(error_code, address))
@@ -1085,6 +1064,8 @@ do_page_fault(struct pt_regs *regs, unsi
if (unlikely(error_code & PF_RSVD))
pgtable_bad(regs, error_code, address);
+ perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
+
/*
* If we're in an interrupt, have no user context or are running
* in an atomic region then we must not take the fault:
@@ -1171,17 +1152,22 @@ good_area:
* make sure we exit gracefully rather than endlessly redo
* the fault:
*/
- fault = handle_mm_fault(mm, vma, address, write);
+ fault = handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0);
if (unlikely(fault & VM_FAULT_ERROR)) {
mm_fault_error(regs, error_code, address, fault);
return;
}
- if (fault & VM_FAULT_MAJOR)
+ if (fault & VM_FAULT_MAJOR) {
tsk->maj_flt++;
- else
+ perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+ regs, address);
+ } else {
tsk->min_flt++;
+ perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+ regs, address);
+ }
check_v8086_mode(regs, address, tsk);
--- head-2011-03-17.orig/arch/x86/mm/highmem_32-xen.c 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/arch/x86/mm/highmem_32-xen.c 2011-02-01 14:50:44.000000000 +0100
@@ -44,7 +44,6 @@ void *kmap_atomic_prot(struct page *page
vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
BUG_ON(!pte_none(*(kmap_pte-idx)));
set_pte_at(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot));
- /*arch_flush_lazy_mmu_mode();*/
return (void *)vaddr;
}
@@ -74,7 +73,6 @@ void kunmap_atomic(void *kvaddr, enum km
#endif
}
- /*arch_flush_lazy_mmu_mode();*/
pagefault_enable();
}
@@ -150,6 +148,7 @@ EXPORT_SYMBOL(kmap);
EXPORT_SYMBOL(kunmap);
EXPORT_SYMBOL(kmap_atomic);
EXPORT_SYMBOL(kunmap_atomic);
+EXPORT_SYMBOL(kmap_atomic_prot);
#ifdef CONFIG_HIGHPTE
EXPORT_SYMBOL(kmap_atomic_to_page);
#endif
--- head-2011-03-17.orig/arch/x86/mm/hypervisor.c 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/arch/x86/mm/hypervisor.c 2011-02-01 14:50:44.000000000 +0100
@@ -116,8 +116,8 @@ static int _xen_multicall_flush(bool ret
return 0;
}
-void xen_multicall_flush(bool force) {
- if (force || use_lazy_mmu_mode())
+void xen_multicall_flush(void) {
+ if (use_lazy_mmu_mode())
_xen_multicall_flush(false);
}
--- head-2011-03-17.orig/arch/x86/mm/init-xen.c 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/arch/x86/mm/init-xen.c 2011-02-01 14:50:44.000000000 +0100
@@ -1,3 +1,4 @@
+#include <linux/initrd.h>
#include <linux/ioport.h>
#include <linux/swap.h>
#include <linux/bootmem.h>
@@ -11,6 +12,10 @@
#include <asm/setup.h>
#include <asm/system.h>
#include <asm/tlbflush.h>
+#include <asm/tlb.h>
+#include <asm/proto.h>
+
+DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
unsigned long __meminitdata e820_table_start;
unsigned long __meminitdata e820_table_end;
@@ -31,6 +36,69 @@ extern unsigned long extend_init_mapping
extern void xen_finish_init_mapping(void);
#endif
+int nx_enabled;
+
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
+static int disable_nx __cpuinitdata;
+
+/*
+ * noexec = on|off
+ *
+ * Control non-executable mappings for processes.
+ *
+ * on Enable
+ * off Disable
+ */
+static int __init noexec_setup(char *str)
+{
+ if (!str)
+ return -EINVAL;
+ if (!strncmp(str, "on", 2)) {
+ __supported_pte_mask |= _PAGE_NX;
+ disable_nx = 0;
+ } else if (!strncmp(str, "off", 3)) {
+ disable_nx = 1;
+ __supported_pte_mask &= ~_PAGE_NX;
+ }
+ return 0;
+}
+early_param("noexec", noexec_setup);
+#endif
+
+#ifdef CONFIG_X86_PAE
+static void __init set_nx(void)
+{
+ unsigned int v[4], l, h;
+
+ if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) {
+ cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]);
+
+ if ((v[3] & (1 << 20)) && !disable_nx) {
+ rdmsr(MSR_EFER, l, h);
+ l |= EFER_NX;
+ wrmsr(MSR_EFER, l, h);
+ nx_enabled = 1;
+ __supported_pte_mask |= _PAGE_NX;
+ }
+ }
+}
+#else
+static inline void set_nx(void)
+{
+}
+#endif
+
+#ifdef CONFIG_X86_64
+void __cpuinit check_efer(void)
+{
+ unsigned long efer;
+
+ rdmsrl(MSR_EFER, efer);
+ if (!(efer & EFER_NX) || disable_nx)
+ __supported_pte_mask &= ~_PAGE_NX;
+}
+#endif
+
static void __init find_early_table_space(unsigned long end, int use_pse,
int use_gbpages)
{
@@ -127,20 +195,6 @@ static int __meminit save_mr(struct map_
return nr_range;
}
-#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
-static void __init init_gbpages(void)
-{
- if (direct_gbpages && cpu_has_gbpages)
- printk(KERN_INFO "Using GB pages for direct mapping\n");
- else
- direct_gbpages = 0;
-}
-#else
-static inline void init_gbpages(void)
-{
-}
-#endif
-
/*
* Setup the direct mapping of the physical memory at PAGE_OFFSET.
* This runs before bootmem is initialized and gets pages directly from
@@ -160,10 +214,7 @@ unsigned long __init_refok init_memory_m
printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end);
- if (!after_bootmem)
- init_gbpages();
-
-#ifdef CONFIG_DEBUG_PAGEALLOC
+#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK)
/*
* For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
* This will simplify cpa(), which otherwise needs to support splitting
@@ -175,12 +226,9 @@ unsigned long __init_refok init_memory_m
use_gbpages = direct_gbpages;
#endif
-#ifdef CONFIG_X86_32
-#ifdef CONFIG_X86_PAE
set_nx();
if (nx_enabled)
printk(KERN_INFO "NX (Execute Disable) protection: active\n");
-#endif
/* Enable PSE if available */
if (cpu_has_pse)
@@ -191,7 +239,6 @@ unsigned long __init_refok init_memory_m
set_in_cr4(X86_CR4_PGE);
__supported_pte_mask |= _PAGE_GLOBAL;
}
-#endif
if (use_gbpages)
page_size_mask |= 1 << PG_LEVEL_1G;
--- head-2011-03-17.orig/arch/x86/mm/init_32-xen.c 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/arch/x86/mm/init_32-xen.c 2011-02-01 14:50:44.000000000 +0100
@@ -52,12 +52,9 @@
#include <asm/swiotlb.h>
#include <asm/setup.h>
#include <asm/cacheflush.h>
+#include <asm/page_types.h>
#include <asm/init.h>
-unsigned long max_low_pfn_mapped;
-unsigned long max_pfn_mapped;
-
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
unsigned long highstart_pfn, highend_pfn;
static noinline int do_test_wp_bit(void);
@@ -122,7 +119,7 @@ static pte_t * __init one_page_table_ini
pte_t *page_table = NULL;
if (after_bootmem) {
-#ifdef CONFIG_DEBUG_PAGEALLOC
+#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK)
page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
#endif
if (!page_table)
@@ -569,7 +566,7 @@ static inline void save_pg_dir(void)
}
#endif /* !CONFIG_ACPI_SLEEP */
-void zap_low_mappings(void)
+void zap_low_mappings(bool early)
{
int i;
@@ -586,64 +583,16 @@ void zap_low_mappings(void)
set_pgd(swapper_pg_dir+i, __pgd(0));
#endif
}
- flush_tlb_all();
-}
-int nx_enabled;
+ if (early)
+ __flush_tlb();
+ else
+ flush_tlb_all();
+}
pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL);
EXPORT_SYMBOL_GPL(__supported_pte_mask);
-#ifdef CONFIG_X86_PAE
-
-static int disable_nx __initdata;
-
-/*
- * noexec = on|off
- *
- * Control non executable mappings.
- *
- * on Enable
- * off Disable
- */
-static int __init noexec_setup(char *str)
-{
- if (!str || !strcmp(str, "on")) {
- if (cpu_has_nx) {
- __supported_pte_mask |= _PAGE_NX;
- disable_nx = 0;
- }
- } else {
- if (!strcmp(str, "off")) {
- disable_nx = 1;
- __supported_pte_mask &= ~_PAGE_NX;
- } else {
- return -EINVAL;
- }
- }
-
- return 0;
-}
-early_param("noexec", noexec_setup);
-
-void __init set_nx(void)
-{
- unsigned int v[4], l, h;
-
- if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) {
- cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]);
-
- if ((v[3] & (1 << 20)) && !disable_nx) {
- rdmsr(MSR_EFER, l, h);
- l |= EFER_NX;
- wrmsr(MSR_EFER, l, h);
- nx_enabled = 1;
- __supported_pte_mask |= _PAGE_NX;
- }
- }
-}
-#endif
-
/* user-defined highmem size */
static unsigned int highmem_pages = -1;
@@ -763,15 +712,15 @@ void __init initmem_init(unsigned long s
highstart_pfn = highend_pfn = max_pfn;
if (max_pfn > max_low_pfn)
highstart_pfn = max_low_pfn;
- memory_present(0, 0, highend_pfn);
e820_register_active_regions(0, 0, highend_pfn);
+ sparse_memory_present_with_active_regions(0);
printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
pages_to_mb(highend_pfn - highstart_pfn));
num_physpages = highend_pfn;
high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
#else
- memory_present(0, 0, max_low_pfn);
e820_register_active_regions(0, 0, max_low_pfn);
+ sparse_memory_present_with_active_regions(0);
num_physpages = max_low_pfn;
high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
#endif
@@ -1074,7 +1023,7 @@ void __init mem_init(void)
test_wp_bit();
save_pg_dir();
- zap_low_mappings();
+ zap_low_mappings(true);
SetPagePinned(virt_to_page(init_mm.pgd));
}
--- head-2011-03-17.orig/arch/x86/mm/init_64-xen.c 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/arch/x86/mm/init_64-xen.c 2011-02-01 14:50:44.000000000 +0100
@@ -56,21 +56,11 @@
#include <xen/features.h>
-/*
- * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
- * The direct mapping extends to max_pfn_mapped, so that we can directly access
- * apertures, ACPI and other tables without having to play with fixmaps.
- */
-unsigned long max_low_pfn_mapped;
-unsigned long max_pfn_mapped;
-
#if CONFIG_XEN_COMPAT <= 0x030002
unsigned int __kernel_page_user;
EXPORT_SYMBOL(__kernel_page_user);
#endif
-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
-
extern pmd_t level2_fixmap_pgt[PTRS_PER_PMD];
extern pte_t level1_fixmap_pgt[PTRS_PER_PTE];
@@ -151,39 +141,6 @@ early_param("gbpages", parse_direct_gbpa
pteval_t __supported_pte_mask __read_mostly = ~0UL;
EXPORT_SYMBOL_GPL(__supported_pte_mask);
-static int disable_nx __cpuinitdata;
-
-/*
- * noexec=on|off
- * Control non-executable mappings for 64-bit processes.
- *
- * on Enable (default)
- * off Disable
- */
-static int __init nonx_setup(char *str)
-{
- if (!str)
- return -EINVAL;
- if (!strncmp(str, "on", 2)) {
- __supported_pte_mask |= _PAGE_NX;
- disable_nx = 0;
- } else if (!strncmp(str, "off", 3)) {
- disable_nx = 1;
- __supported_pte_mask &= ~_PAGE_NX;
- }
- return 0;
-}
-early_param("noexec", nonx_setup);
-
-void __cpuinit check_efer(void)
-{
- unsigned long efer;
-
- rdmsrl(MSR_EFER, efer);
- if (!(efer & EFER_NX) || disable_nx)
- __supported_pte_mask &= ~_PAGE_NX;
-}
-
int force_personality32;
/*
@@ -213,7 +170,7 @@ static __ref void *spp_getpage(void)
void *ptr;
if (after_bootmem)
- ptr = (void *) get_zeroed_page(GFP_ATOMIC);
+ ptr = (void *) get_zeroed_page(GFP_ATOMIC | __GFP_NOTRACK);
else if (e820_table_end < e820_table_top) {
ptr = __va(e820_table_end << PAGE_SHIFT);
e820_table_end++;
@@ -399,7 +356,7 @@ static __ref void *alloc_low_page(unsign
void *adr;
if (after_bootmem) {
- adr = (void *)get_zeroed_page(GFP_ATOMIC);
+ adr = (void *)get_zeroed_page(GFP_ATOMIC | __GFP_NOTRACK);
*phys = __pa(adr);
return adr;
@@ -804,7 +761,7 @@ void __init xen_finish_init_mapping(void
e820_table_top = e820_table_end;
}
-unsigned long __init
+unsigned long __meminit
kernel_physical_mapping_init(unsigned long start,
unsigned long end,
unsigned long page_size_mask)
@@ -873,6 +830,7 @@ void __init initmem_init(unsigned long s
early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT);
reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT);
}
+#endif
void __init paging_init(void)
{
@@ -883,13 +841,21 @@ void __init paging_init(void)
max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
max_zone_pfns[ZONE_NORMAL] = max_pfn;
- memory_present(0, 0, max_pfn);
+ sparse_memory_present_with_active_regions(MAX_NUMNODES);
sparse_init();
+
+ /*
+ * clear the default setting with node 0
+ * note: don't use nodes_clear here, that is really clearing when
+ * numa support is not compiled in, and later node_set_state
+ * will not set it back.
+ */
+ node_clear_state(0, N_NORMAL_MEMORY);
+
free_area_init_nodes(max_zone_pfns);
SetPagePinned(virt_to_page(init_mm.pgd));
}
-#endif
/*
* Memory hotplug specific functions
@@ -1084,7 +1050,7 @@ int __init reserve_bootmem_generic(unsig
return ret;
#else
- reserve_bootmem(phys, len, BOOTMEM_DEFAULT);
+ reserve_bootmem(phys, len, flags);
#endif
#ifndef CONFIG_XEN
--- head-2011-03-17.orig/arch/x86/mm/iomap_32-xen.c 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/arch/x86/mm/iomap_32-xen.c 2011-02-01 14:50:44.000000000 +0100
@@ -84,7 +84,6 @@ iounmap_atomic(void *kvaddr, enum km_typ
if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx))
kpte_clear_flush(kmap_pte-idx, vaddr);
- /*arch_flush_lazy_mmu_mode();*/
pagefault_enable();
}
EXPORT_SYMBOL_GPL(iounmap_atomic);
--- head-2011-03-17.orig/arch/x86/mm/pageattr-xen.c 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/arch/x86/mm/pageattr-xen.c 2011-02-01 14:50:44.000000000 +0100
@@ -11,6 +11,7 @@
#include <linux/interrupt.h>
#include <linux/seq_file.h>
#include <linux/debugfs.h>
+#include <linux/pfn.h>
#include <asm/e820.h>
#include <asm/processor.h>
@@ -486,7 +487,7 @@ static int split_large_page(pte_t *kpte,
if (!debug_pagealloc)
spin_unlock(&cpa_lock);
- base = alloc_pages(GFP_KERNEL, 0);
+ base = alloc_pages(GFP_KERNEL | __GFP_NOTRACK, 0);
if (!debug_pagealloc)
spin_lock(&cpa_lock);
if (!base)
@@ -610,9 +611,12 @@ static int __change_page_attr(struct cpa
unsigned int level;
pte_t *kpte, old_pte;
- if (cpa->flags & CPA_PAGES_ARRAY)
- address = (unsigned long)page_address(cpa->pages[cpa->curpage]);
- else if (cpa->flags & CPA_ARRAY)
+ if (cpa->flags & CPA_PAGES_ARRAY) {
+ struct page *page = cpa->pages[cpa->curpage];
+ if (unlikely(PageHighMem(page)))
+ return 0;
+ address = (unsigned long)page_address(page);
+ } else if (cpa->flags & CPA_ARRAY)
address = cpa->vaddr[cpa->curpage];
else
address = *cpa->vaddr;
@@ -702,8 +706,9 @@ static int __change_page_attr_set_clr(st
static int cpa_process_alias(struct cpa_data *cpa)
{
struct cpa_data alias_cpa;
- int ret = 0;
- unsigned long temp_cpa_vaddr, vaddr;
+ unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT);
+ unsigned long vaddr, remapped;
+ int ret;
if (cpa->pfn >= max_pfn_mapped)
return 0;
@@ -716,9 +721,12 @@ static int cpa_process_alias(struct cpa_
* No need to redo, when the primary call touched the direct
* mapping already:
*/
- if (cpa->flags & CPA_PAGES_ARRAY)
- vaddr = (unsigned long)page_address(cpa->pages[cpa->curpage]);
- else if (cpa->flags & CPA_ARRAY)
+ if (cpa->flags & CPA_PAGES_ARRAY) {
+ struct page *page = cpa->pages[cpa->curpage];
+ if (unlikely(PageHighMem(page)))
+ return 0;
+ vaddr = (unsigned long)page_address(page);
+ } else if (cpa->flags & CPA_ARRAY)
vaddr = cpa->vaddr[cpa->curpage];
else
vaddr = *cpa->vaddr;
@@ -727,42 +735,55 @@ static int cpa_process_alias(struct cpa_
PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)))) {
alias_cpa = *cpa;
- temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
- alias_cpa.vaddr = &temp_cpa_vaddr;
+ alias_cpa.vaddr = &laddr;
alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
-
ret = __change_page_attr_set_clr(&alias_cpa, 0);
+ if (ret)
+ return ret;
}
#ifdef CONFIG_X86_64
- if (ret)
- return ret;
- /*
- * No need to redo, when the primary call touched the high
- * mapping already:
- */
- if (within(vaddr, (unsigned long) _text, _brk_end))
- return 0;
-
/*
- * If the physical address is inside the kernel map, we need
+ * If the primary call didn't touch the high mapping already
+ * and the physical address is inside the kernel map, we need
* to touch the high mapped kernel as well:
*/
- if (!within(cpa->pfn, highmap_start_pfn(), highmap_end_pfn()))
- return 0;
+ if (!within(vaddr, (unsigned long)_text, _brk_end) &&
+ within(cpa->pfn, highmap_start_pfn(), highmap_end_pfn())) {
+ unsigned long temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) +
+ __START_KERNEL_map;
+ alias_cpa = *cpa;
+ alias_cpa.vaddr = &temp_cpa_vaddr;
+ alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
- alias_cpa = *cpa;
- temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map;
- alias_cpa.vaddr = &temp_cpa_vaddr;
- alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
+ /*
+ * The high mapping range is imprecise, so ignore the
+ * return value.
+ */
+ __change_page_attr_set_clr(&alias_cpa, 0);
+ }
+#endif
/*
- * The high mapping range is imprecise, so ignore the return value.
- */
- __change_page_attr_set_clr(&alias_cpa, 0);
-#endif
- return ret;
+ * If the PMD page was partially used for per-cpu remapping,
+ * the recycled area needs to be split and modified. Because
+ * the area is always proper subset of a PMD page
+ * cpa->numpages is guaranteed to be 1 for these areas, so
+ * there's no need to loop over and check for further remaps.
+ */
+ remapped = (unsigned long)pcpu_lpage_remapped((void *)laddr);
+ if (remapped) {
+ WARN_ON(cpa->numpages > 1);
+ alias_cpa = *cpa;
+ alias_cpa.vaddr = &remapped;
+ alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
+ ret = __change_page_attr_set_clr(&alias_cpa, 0);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
}
static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
@@ -860,15 +881,6 @@ static int change_page_attr_set_clr(unsi
vm_unmap_aliases();
- /*
- * If we're called with lazy mmu updates enabled, the
- * in-memory pte state may be stale. Flush pending updates to
- * bring them up to date.
- *
- arch_flush_lazy_mmu_mode();*/
- if (arch_use_lazy_mmu_mode())
- xen_multicall_flush(true);
-
cpa.vaddr = addr;
cpa.pages = pages;
cpa.numpages = numpages;
@@ -913,14 +925,6 @@ static int change_page_attr_set_clr(unsi
} else
cpa_flush_all(cache);
- /*
- * If we've been called with lazy mmu updates enabled, then
- * make sure that everything gets flushed out before we
- * return.
- *
- arch_flush_lazy_mmu_mode();*/
- WARN_ON_ONCE(arch_use_lazy_mmu_mode() && !irq_count());
-
out:
return ret;
}
@@ -1065,12 +1069,15 @@ EXPORT_SYMBOL(set_memory_array_uc);
int _set_memory_wc(unsigned long addr, int numpages)
{
int ret;
+ unsigned long addr_copy = addr;
+
ret = change_page_attr_set(&addr, numpages,
__pgprot(_PAGE_CACHE_UC_MINUS), 0);
-
if (!ret) {
- ret = change_page_attr_set(&addr, numpages,
- __pgprot(_PAGE_CACHE_WC), 0);
+ ret = change_page_attr_set_clr(&addr_copy, numpages,
+ __pgprot(_PAGE_CACHE_WC),
+ __pgprot(_PAGE_CACHE_MASK),
+ 0, 0, NULL);
}
return ret;
}
@@ -1187,7 +1194,9 @@ int set_pages_array_uc(struct page **pag
int free_idx;
for (i = 0; i < addrinarray; i++) {
- start = (unsigned long)page_address(pages[i]);
+ if (PageHighMem(pages[i]))
+ continue;
+ start = page_to_pfn(pages[i]) << PAGE_SHIFT;
end = start + PAGE_SIZE;
if (reserve_memtype(start, end, _PAGE_CACHE_UC_MINUS, NULL))
goto err_out;
@@ -1200,7 +1209,9 @@ int set_pages_array_uc(struct page **pag
err_out:
free_idx = i;
for (i = 0; i < free_idx; i++) {
- start = (unsigned long)page_address(pages[i]);
+ if (PageHighMem(pages[i]))
+ continue;
+ start = page_to_pfn(pages[i]) << PAGE_SHIFT;
end = start + PAGE_SIZE;
free_memtype(start, end);
}
@@ -1229,7 +1240,9 @@ int set_pages_array_wb(struct page **pag
return retval;
for (i = 0; i < addrinarray; i++) {
- start = (unsigned long)page_address(pages[i]);
+ if (PageHighMem(pages[i]))
+ continue;
+ start = page_to_pfn(pages[i]) << PAGE_SHIFT;
end = start + PAGE_SIZE;
free_memtype(start, end);
}
--- head-2011-03-17.orig/arch/x86/mm/pat-xen.c 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/arch/x86/mm/pat-xen.c 2011-02-01 14:50:44.000000000 +0100
@@ -639,7 +639,8 @@ static int reserve_pfn_range(u64 paddr,
return ret;
if (flags != want_flags) {
- if (strict_prot || !is_new_memtype_allowed(want_flags, flags)) {
+ if (strict_prot ||
+ !is_new_memtype_allowed(paddr, size, want_flags, flags)) {
free_memtype(paddr, paddr + size);
printk(KERN_ERR "%s:%d map pfn expected mapping type %s"
" for %Lx-%Lx, got %s\n",
--- head-2011-03-17.orig/arch/x86/mm/pgtable-xen.c 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/arch/x86/mm/pgtable-xen.c 2011-02-01 14:50:44.000000000 +0100
@@ -8,9 +8,11 @@
#include <asm/hypervisor.h>
#include <asm/mmu_context.h>
+#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO
+
pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
{
- pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+ pte_t *pte = (pte_t *)__get_free_page(PGALLOC_GFP);
if (pte)
make_lowmem_page_readonly(pte, XENFEAT_writable_page_tables);
return pte;
@@ -27,9 +29,9 @@ pgtable_t pte_alloc_one(struct mm_struct
struct page *pte;
#ifdef CONFIG_HIGHPTE
- pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0);
+ pte = alloc_pages(PGALLOC_GFP | __GFP_HIGHMEM, 0);
#else
- pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
+ pte = alloc_pages(PGALLOC_GFP, 0);
#endif
if (pte) {
pgtable_page_ctor(pte);
@@ -65,7 +67,7 @@ void __pte_free(pgtable_t pte)
__free_page(pte);
}
-void __pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
+void ___pte_free_tlb(struct mmu_gather *tlb, struct page *pte)
{
pgtable_page_dtor(pte);
paravirt_release_pte(page_to_pfn(pte));
@@ -83,7 +85,7 @@ pmd_t *pmd_alloc_one(struct mm_struct *m
{
struct page *pmd;
- pmd = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0);
+ pmd = alloc_pages(PGALLOC_GFP, 0);
if (!pmd)
return NULL;
SetPageForeign(pmd, _pmd_free);
@@ -107,14 +109,14 @@ void __pmd_free(pgtable_t pmd)
__free_page(pmd);
}
-void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
+void ___pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd)
{
paravirt_release_pmd(__pa(pmd) >> PAGE_SHIFT);
tlb_remove_page(tlb, virt_to_page(pmd));
}
#if PAGETABLE_LEVELS > 3
-void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
+void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud)
{
paravirt_release_pud(__pa(pud) >> PAGE_SHIFT);
tlb_remove_page(tlb, virt_to_page(pud));
@@ -609,7 +611,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
pmd_t *pmds[PREALLOCATED_PMDS];
unsigned long flags;
- pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, PGD_ORDER);
+ pgd = (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ORDER);
if (pgd == NULL)
goto out;
--- head-2011-03-17.orig/arch/x86/pci/i386.c 2011-03-17 14:35:44.000000000 +0100
+++ head-2011-03-17/arch/x86/pci/i386.c 2011-02-01 14:50:44.000000000 +0100
@@ -239,12 +239,14 @@ void __init pcibios_resource_survey(void
pcibios_allocate_resources(1);
e820_reserve_resources_late();
+#ifndef CONFIG_XEN
/*
* Insert the IO APIC resources after PCI initialization has
* occured to handle IO APICS that are mapped in on a BAR in
* PCI space, but before trying to assign unassigned pci res.
*/
ioapic_insert_resources();
+#endif
}
/**
--- head-2011-03-17.orig/arch/x86/pci/irq-xen.c 2011-02-01 14:42:26.000000000 +0100
+++ head-2011-03-17/arch/x86/pci/irq-xen.c 2011-02-01 14:50:44.000000000 +0100
@@ -895,6 +895,9 @@ static int pcibios_lookup_irq(struct pci
return 0;
}
+ if (io_apic_assign_pci_irqs)
+ return 0;
+
/* Find IRQ routing entry */
if (!pirq_table)
@@ -1045,56 +1048,15 @@ static void __init pcibios_fixup_irqs(vo
pirq_penalty[dev->irq]++;
}
+ if (io_apic_assign_pci_irqs)
+ return;
+
dev = NULL;
while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
if (!pin)
continue;
-#ifdef CONFIG_X86_IO_APIC
- /*
- * Recalculate IRQ numbers if we use the I/O APIC.
- */
- if (io_apic_assign_pci_irqs) {
- int irq;
-
- /*
- * interrupt pins are numbered starting from 1
- */
- irq = IO_APIC_get_PCI_irq_vector(dev->bus->number,
- PCI_SLOT(dev->devfn), pin - 1);
- /*
- * Busses behind bridges are typically not listed in the
- * MP-table. In this case we have to look up the IRQ
- * based on the parent bus, parent slot, and pin number.
- * The SMP code detects such bridged busses itself so we
- * should get into this branch reliably.
- */
- if (irq < 0 && dev->bus->parent) {
- /* go back to the bridge */
- struct pci_dev *bridge = dev->bus->self;
- int bus;
-
- pin = pci_swizzle_interrupt_pin(dev, pin);
- bus = bridge->bus->number;
- irq = IO_APIC_get_PCI_irq_vector(bus,
- PCI_SLOT(bridge->devfn), pin - 1);
- if (irq >= 0)
- dev_warn(&dev->dev,
- "using bridge %s INT %c to "
- "get IRQ %d\n",
- pci_name(bridge),
- 'A' + pin - 1, irq);
- }
- if (irq >= 0) {
- dev_info(&dev->dev,
- "PCI->APIC IRQ transform: INT %c "
- "-> IRQ %d\n",
- 'A' + pin - 1, irq);
- dev->irq = irq;
- }
- }
-#endif
/*
* Still no IRQ? Try to lookup one...
*/
@@ -1189,6 +1151,19 @@ int __init pcibios_irq_init(void)
pcibios_enable_irq = pirq_enable_irq;
pcibios_fixup_irqs();
+
+ if (io_apic_assign_pci_irqs && pci_routeirq) {
+ struct pci_dev *dev = NULL;
+ /*
+ * PCI IRQ routing is set up by pci_enable_device(), but we
+ * also do it here in case there are still broken drivers that
+ * don't use pci_enable_device().
+ */
+ printk(KERN_INFO "PCI: Routing PCI interrupts for all devices because \"pci=routeirq\" specified\n");
+ for_each_pci_dev(dev)
+ pirq_enable_irq(dev);
+ }
+
return 0;
}
@@ -1219,16 +1194,23 @@ void pcibios_penalize_isa_irq(int irq, i
static int pirq_enable_irq(struct pci_dev *dev)
{
u8 pin;
- struct pci_dev *temp_dev;
pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
- if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) {
+ if (pin && !pcibios_lookup_irq(dev, 1)) {
char *msg = "";
+ if (!io_apic_assign_pci_irqs && dev->irq)
+ return 0;
+
if (io_apic_assign_pci_irqs) {
+#ifdef CONFIG_X86_IO_APIC
+ struct pci_dev *temp_dev;
int irq;
+ struct io_apic_irq_attr irq_attr;
- irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin - 1);
+ irq = IO_APIC_get_PCI_irq_vector(dev->bus->number,
+ PCI_SLOT(dev->devfn),
+ pin - 1, &irq_attr);
/*
* Busses behind bridges are typically not listed in the MP-table.
* In this case we have to look up the IRQ based on the parent bus,
@@ -1241,7 +1223,8 @@ static int pirq_enable_irq(struct pci_de
pin = pci_swizzle_interrupt_pin(dev, pin);
irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number,
- PCI_SLOT(bridge->devfn), pin - 1);
+ PCI_SLOT(bridge->devfn),
+ pin - 1, &irq_attr);
if (irq >= 0)
dev_warn(&dev->dev, "using bridge %s "
"INT %c to get IRQ %d\n",
@@ -1251,12 +1234,15 @@ static int pirq_enable_irq(struct pci_de
}
dev = temp_dev;
if (irq >= 0) {
+ io_apic_set_pci_routing(&dev->dev, irq,
+ &irq_attr);
+ dev->irq = irq;
dev_info(&dev->dev, "PCI->APIC IRQ transform: "
"INT %c -> IRQ %d\n", 'A' + pin - 1, irq);
- dev->irq = irq;
return 0;
} else
msg = "; probably buggy MP table";
+#endif
} else if (pci_probe & PCI_BIOS_IRQ_SCAN)
msg = "";
else
--- head-2011-03-17.orig/arch/x86/pci/pcifront.c 2011-02-01 14:42:26.000000000 +0100
+++ head-2011-03-17/arch/x86/pci/pcifront.c 2011-02-01 14:50:44.000000000 +0100
@@ -6,6 +6,7 @@
*/
#include <linux/module.h>
#include <linux/init.h>
+#include <linux/irq.h>
#include <linux/pci.h>
#include <asm/acpi.h>
#include <asm/pci_x86.h>
@@ -15,6 +16,8 @@ static int pcifront_enable_irq(struct pc
{
u8 irq;
pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &irq);
+ if (!irq_to_desc_alloc_node(irq, numa_node_id()))
+ return -ENOMEM;
evtchn_register_pirq(irq);
dev->irq = irq;
--- head-2011-03-17.orig/arch/x86/vdso/vdso32-setup-xen.c 2011-02-01 14:42:26.000000000 +0100
+++ head-2011-03-17/arch/x86/vdso/vdso32-setup-xen.c 2011-02-01 14:50:44.000000000 +0100
@@ -377,6 +377,8 @@ int arch_setup_additional_pages(struct l
}
}
+ current->mm->context.vdso = (void *)addr;
+
if (compat_uses_vma || !compat) {
/*
* MAYWRITE to allow gdb to COW and set breakpoints
@@ -397,11 +399,13 @@ int arch_setup_additional_pages(struct l
goto up_fail;
}
- current->mm->context.vdso = (void *)addr;
current_thread_info()->sysenter_return =
VDSO32_SYMBOL(addr, SYSENTER_RETURN);
up_fail:
+ if (ret)
+ current->mm->context.vdso = NULL;
+
up_write(&mm->mmap_sem);
return ret;
--- head-2011-03-17.orig/drivers/acpi/processor_driver.c 2011-02-01 14:39:24.000000000 +0100
+++ head-2011-03-17/drivers/acpi/processor_driver.c 2011-02-01 14:50:44.000000000 +0100
@@ -340,7 +340,14 @@ static int acpi_processor_get_info(struc
* generated as the following format:
* CPU+CPU ID.
*/
- sprintf(acpi_device_bid(device), "CPU%X", pr->id);
+ if (pr->id != -1)
+ sprintf(acpi_device_bid(device), "CPU%X", pr->id);
+ else
+ snprintf(acpi_device_bid(device),
+ ARRAY_SIZE(acpi_device_bid(device)),
+ "#%0*X",
+ (int)ARRAY_SIZE(acpi_device_bid(device)) - 2,
+ pr->acpi_id);
ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Processor [%d:%d]\n", pr->id,
pr->acpi_id));
--- head-2011-03-17.orig/drivers/char/agp/intel-gtt.c 2011-03-11 10:53:08.000000000 +0100
+++ head-2011-03-17/drivers/char/agp/intel-gtt.c 2011-03-11 11:00:05.000000000 +0100
@@ -282,7 +282,11 @@ static struct agp_memory *alloc_agpphysm
new->page_count = pg_count;
new->num_scratch_pages = pg_count;
new->type = AGP_PHYS_MEMORY;
+#ifndef CONFIG_XEN
new->physical = page_to_phys(new->pages[0]);
+#else
+ new->physical = page_to_pseudophys(new->pages[0]);
+#endif
return new;
}
--- head-2011-03-17.orig/drivers/edac/Kconfig 2011-03-17 14:35:44.000000000 +0100
+++ head-2011-03-17/drivers/edac/Kconfig 2011-02-01 14:50:44.000000000 +0100
@@ -77,6 +77,7 @@ config EDAC_MCE
config EDAC_AMD64
tristate "AMD64 (Opteron, Athlon64) K8, F10h"
depends on EDAC_MM_EDAC && AMD_NB && X86_64 && EDAC_DECODE_MCE
+ depends on !XEN
help
Support for error detection and correction of DRAM ECC errors on
the AMD64 families of memory controllers (K8 and F10h)
--- head-2011-03-17.orig/drivers/gpu/drm/ttm/ttm_bo.c 2011-03-17 14:35:44.000000000 +0100
+++ head-2011-03-17/drivers/gpu/drm/ttm/ttm_bo.c 2011-02-01 14:50:44.000000000 +0100
@@ -1440,6 +1440,14 @@ int ttm_bo_global_init(struct drm_global
ret = -ENOMEM;
goto out_no_drp;
}
+#ifdef CONFIG_XEN
+ ret = xen_limit_pages_to_max_mfn(glob->dummy_read_page, 0, 32);
+ if (!ret)
+ clear_page(page_address(glob->dummy_read_page));
+ else
+ printk(KERN_WARNING
+ "Error restricting dummy read page: %d\n", ret);
+#endif
INIT_LIST_HEAD(&glob->swap_lru);
INIT_LIST_HEAD(&glob->device_list);
--- head-2011-03-17.orig/drivers/gpu/drm/ttm/ttm_bo_vm.c 2011-03-17 14:35:44.000000000 +0100
+++ head-2011-03-17/drivers/gpu/drm/ttm/ttm_bo_vm.c 2011-03-02 11:54:22.000000000 +0100
@@ -169,7 +169,13 @@ static int ttm_bo_vm_fault(struct vm_are
if (bo->mem.bus.is_iomem) {
vma->vm_page_prot = ttm_io_prot(bo->mem.placement,
vma->vm_page_prot);
+#if defined(CONFIG_XEN) && defined(_PAGE_IOMAP)
+ pgprot_val(vma->vm_page_prot) |= _PAGE_IOMAP;
+#endif
} else {
+#if defined(CONFIG_XEN) && defined(_PAGE_IOMAP)
+ pgprot_val(vma->vm_page_prot) &= ~_PAGE_IOMAP;
+#endif
ttm = bo->ttm;
vma->vm_page_prot = (bo->mem.placement & TTM_PL_FLAG_CACHED) ?
vm_get_page_prot(vma->vm_flags) :
--- head-2011-03-17.orig/drivers/pci/msi-xen.c 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/drivers/pci/msi-xen.c 2011-02-01 14:50:44.000000000 +0100
@@ -54,22 +54,17 @@ int arch_msi_check_device(struct pci_dev
}
#endif
-static void __msi_set_enable(struct pci_dev *dev, int pos, int enable)
+static void msi_set_enable(struct pci_dev *dev, int pos, int enable)
{
u16 control;
- if (pos) {
- pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control);
- control &= ~PCI_MSI_FLAGS_ENABLE;
- if (enable)
- control |= PCI_MSI_FLAGS_ENABLE;
- pci_write_config_word(dev, pos + PCI_MSI_FLAGS, control);
- }
-}
+ BUG_ON(!pos);
-static void msi_set_enable(struct pci_dev *dev, int enable)
-{
- __msi_set_enable(dev, pci_find_capability(dev, PCI_CAP_ID_MSI), enable);
+ pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control);
+ control &= ~PCI_MSI_FLAGS_ENABLE;
+ if (enable)
+ control |= PCI_MSI_FLAGS_ENABLE;
+ pci_write_config_word(dev, pos + PCI_MSI_FLAGS, control);
}
static void msix_set_enable(struct pci_dev *dev, int enable)
@@ -294,8 +289,11 @@ void pci_restore_msi_state(struct pci_de
return;
pci_intx_for_msi(dev, 0);
- if (dev->msi_enabled)
- msi_set_enable(dev, 0);
+ if (dev->msi_enabled) {
+ int pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
+
+ msi_set_enable(dev, pos, 0);
+ }
if (dev->msix_enabled)
msix_set_enable(dev, 0);
@@ -322,9 +320,9 @@ static int msi_capability_init(struct pc
int pos, pirq;
u16 control;
- msi_set_enable(dev, 0); /* Ensure msi is disabled as I set it up */
-
pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
+ msi_set_enable(dev, pos, 0); /* Disable MSI during set up */
+
pci_read_config_word(dev, msi_control_reg(pos), &control);
WARN_ON(nvec > 1); /* XXX */
@@ -334,7 +332,7 @@ static int msi_capability_init(struct pc
/* Set MSI enabled bits */
pci_intx_for_msi(dev, 0);
- msi_set_enable(dev, 1);
+ msi_set_enable(dev, pos, 1);
dev->msi_enabled = 1;
dev->irq = pirq;
@@ -356,6 +354,7 @@ static int msix_capability_init(struct p
{
u64 table_base;
int pirq, i, j, mapped, pos;
+ u16 control;
struct msi_dev_list *msi_dev_entry = get_msi_dev_pirq_list(dev);
struct msi_pirq_entry *pirq_entry;
@@ -365,11 +364,24 @@ static int msix_capability_init(struct p
msix_set_enable(dev, 0);/* Ensure msix is disabled as I set it up */
pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
+ pci_read_config_word(dev, pos + PCI_MSIX_FLAGS, &control);
+
+ /* Ensure MSI-X is disabled while it is set up */
+ control &= ~PCI_MSIX_FLAGS_ENABLE;
+ pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control);
+
table_base = find_table_base(dev, pos);
if (!table_base)
return -ENODEV;
- /* MSI-X Table Initialization */
+ /*
+ * Some devices require MSI-X to be enabled before we can touch the
+ * MSI-X registers. We need to mask all the vectors to prevent
+ * interrupts coming in before they're fully set up.
+ */
+ control |= PCI_MSIX_FLAGS_MASKALL | PCI_MSIX_FLAGS_ENABLE;
+ pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control);
+
for (i = 0; i < nvec; i++) {
mapped = 0;
list_for_each_entry(pirq_entry, &msi_dev_entry->pirq_list_head, list) {
@@ -406,10 +418,13 @@ static int msix_capability_init(struct p
return avail;
}
+ /* Set MSI-X enabled bits and unmask the function */
pci_intx_for_msi(dev, 0);
- msix_set_enable(dev, 1);
dev->msix_enabled = 1;
+ control &= ~PCI_MSIX_FLAGS_MASKALL;
+ pci_write_config_word(dev, pos + PCI_MSIX_FLAGS, control);
+
return 0;
}
@@ -531,7 +546,7 @@ EXPORT_SYMBOL(pci_enable_msi_block);
extern void pci_frontend_disable_msi(struct pci_dev* dev);
void pci_msi_shutdown(struct pci_dev *dev)
{
- int pirq;
+ int pirq, pos;
struct msi_dev_list *msi_dev_entry = get_msi_dev_pirq_list(dev);
if (!pci_msi_enable || !dev || !dev->msi_enabled)
@@ -553,7 +568,8 @@ void pci_msi_shutdown(struct pci_dev *de
msi_unmap_pirq(dev, pirq);
/* Disable MSI mode */
- msi_set_enable(dev, 0);
+ pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
+ msi_set_enable(dev, pos, 0);
pci_intx_for_msi(dev, 1);
dev->msi_enabled = 0;
}
@@ -593,8 +609,8 @@ int pci_msix_table_size(struct pci_dev *
* indicates the successful configuration of MSI-X capability structure
* with new allocated MSI-X irqs. A return of < 0 indicates a failure.
* Or a return of > 0 indicates that driver request is exceeding the number
- * of irqs available. Driver should use the returned value to re-send
- * its request.
+ * of irqs or MSI-X vectors available. Driver should use the returned value to
+ * re-send its request.
**/
extern int pci_frontend_enable_msix(struct pci_dev *dev,
struct msix_entry *entries, int nvec);
@@ -650,7 +666,7 @@ int pci_enable_msix(struct pci_dev* dev,
nr_entries = pci_msix_table_size(dev);
if (nvec > nr_entries)
- return -EINVAL;
+ return nr_entries;
/* Check for any invalid entries */
for (i = 0; i < nvec; i++) {
--- head-2011-03-17.orig/drivers/staging/vt6655/ttype.h 2011-03-17 14:35:44.000000000 +0100
+++ head-2011-03-17/drivers/staging/vt6655/ttype.h 2010-06-22 15:50:05.000000000 +0200
@@ -30,6 +30,9 @@
#ifndef __TTYPE_H__
#define __TTYPE_H__
+#ifdef CONFIG_XEN
+#include <asm/hypervisor.h>
+#endif
/******* Common definitions and typedefs ***********************************/
--- head-2011-03-17.orig/drivers/xen/Kconfig 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/drivers/xen/Kconfig 2011-02-02 15:37:23.000000000 +0100
@@ -371,7 +371,8 @@ config XEN_SCRUB_PAGES
config XEN_DEV_EVTCHN
tristate "Xen /dev/xen/evtchn device"
- default y
+ depends on XEN || PARAVIRT_XEN
+ default PARAVIRT_XEN || XEN_PRIVILEGED_GUEST || m
help
The evtchn driver allows a userspace process to triger event
channels and to receive notification of an event channel
@@ -411,7 +412,7 @@ config XEN_COMPAT_XENFS
config XEN_SYS_HYPERVISOR
bool "Create xen entries under /sys/hypervisor"
- depends on SYSFS
+ depends on PARAVIRT_XEN && SYSFS
select SYS_HYPERVISOR
default y
help
--- head-2011-03-17.orig/drivers/xen/Makefile 2011-02-24 14:09:54.000000000 +0100
+++ head-2011-03-17/drivers/xen/Makefile 2011-02-24 14:10:06.000000000 +0100
@@ -5,7 +5,6 @@ xen-balloon-$(CONFIG_PARAVIRT_XEN) := ba
xen-balloon-$(CONFIG_XEN) := balloon/
obj-$(CONFIG_XEN) += core/
obj-$(CONFIG_XEN) += console/
-obj-$(CONFIG_XEN) += evtchn/
obj-y += xenbus/
obj-$(CONFIG_XEN) += char/
@@ -15,7 +14,9 @@ obj-$(CONFIG_XEN) += features.o $(xen-
obj-$(CONFIG_HOTPLUG_CPU) += $(xen-hotplug-y)
obj-$(CONFIG_XEN_XENCOMM) += xencomm.o
obj-$(CONFIG_XEN_BALLOON) += $(xen-balloon-y)
+obj-$(CONFIG_XEN_DEV_EVTCHN) += evtchn.o
obj-$(CONFIG_XENFS) += xenfs/
+obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o
obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/
obj-$(CONFIG_XEN_BLKDEV_TAP) += blktap/
obj-$(CONFIG_XEN_BLKDEV_TAP2) += blktap2/ blktap2-new/
--- head-2011-03-17.orig/drivers/xen/balloon/balloon.c 2011-02-01 14:38:38.000000000 +0100
+++ head-2011-03-17/drivers/xen/balloon/balloon.c 2011-02-01 14:50:44.000000000 +0100
@@ -321,7 +321,7 @@ static int increase_reservation(unsigned
balloon_unlock(flags);
#ifndef MODULE
- setup_per_zone_pages_min();
+ setup_per_zone_wmarks();
if (rc > 0)
kswapd_run(0);
if (need_zonelists_rebuild)
--- head-2011-03-17.orig/drivers/xen/blkback/blkback.c 2011-01-31 18:01:51.000000000 +0100
+++ head-2011-03-17/drivers/xen/blkback/blkback.c 2011-02-01 14:50:44.000000000 +0100
@@ -495,7 +495,7 @@ static void dispatch_rw_block_io(blkif_t
for (i = 0; i < nseg; i++) {
if (((int)preq.sector_number|(int)seg[i].nsec) &
- ((bdev_hardsect_size(preq.bdev) >> 9) - 1)) {
+ ((bdev_logical_block_size(preq.bdev) >> 9) - 1)) {
DPRINTK("Misaligned I/O request from domain %d",
blkif->domid);
goto fail_put_bio;
--- head-2011-03-17.orig/drivers/xen/blkback/vbd.c 2011-02-01 14:39:24.000000000 +0100
+++ head-2011-03-17/drivers/xen/blkback/vbd.c 2011-02-01 14:50:44.000000000 +0100
@@ -47,7 +47,7 @@ unsigned int vbd_info(struct vbd *vbd)
unsigned long vbd_secsize(struct vbd *vbd)
{
- return bdev_hardsect_size(vbd->bdev);
+ return bdev_logical_block_size(vbd->bdev);
}
int vbd_create(blkif_t *blkif, blkif_vdev_t handle, unsigned major,
--- head-2011-03-17.orig/drivers/xen/blkback/xenbus.c 2011-01-31 17:49:31.000000000 +0100
+++ head-2011-03-17/drivers/xen/blkback/xenbus.c 2011-02-01 14:50:44.000000000 +0100
@@ -108,7 +108,7 @@ static void update_blkif_status(blkif_t
if (!get_device(_dev)) \
return ret; \
dev = to_xenbus_device(_dev); \
- if ((be = dev->dev.driver_data) != NULL) \
+ if ((be = dev_get_drvdata(&dev->dev)) != NULL) \
ret = sprintf(buf, format, ##args); \
put_device(_dev); \
return ret; \
@@ -173,7 +173,7 @@ void xenvbd_sysfs_delif(struct xenbus_de
static int blkback_remove(struct xenbus_device *dev)
{
- struct backend_info *be = dev->dev.driver_data;
+ struct backend_info *be = dev_get_drvdata(&dev->dev);
DPRINTK("");
@@ -194,7 +194,7 @@ static int blkback_remove(struct xenbus_
}
kfree(be);
- dev->dev.driver_data = NULL;
+ dev_set_drvdata(&dev->dev, NULL);
return 0;
}
@@ -229,7 +229,7 @@ static int blkback_probe(struct xenbus_d
return -ENOMEM;
}
be->dev = dev;
- dev->dev.driver_data = be;
+ dev_set_drvdata(&dev->dev, be);
be->blkif = blkif_alloc(dev->otherend_id);
if (IS_ERR(be->blkif)) {
@@ -352,7 +352,7 @@ static void backend_changed(struct xenbu
static void frontend_changed(struct xenbus_device *dev,
enum xenbus_state frontend_state)
{
- struct backend_info *be = dev->dev.driver_data;
+ struct backend_info *be = dev_get_drvdata(&dev->dev);
int err;
DPRINTK("%s", xenbus_strstate(frontend_state));
--- head-2011-03-17.orig/drivers/xen/blkfront/blkfront.c 2011-02-01 14:39:24.000000000 +0100
+++ head-2011-03-17/drivers/xen/blkfront/blkfront.c 2011-02-01 14:50:44.000000000 +0100
@@ -119,12 +119,12 @@ static int blkfront_probe(struct xenbus_
/* Front end dir is a number, which is used as the id. */
info->handle = simple_strtoul(strrchr(dev->nodename,'/')+1, NULL, 0);
- dev->dev.driver_data = info;
+ dev_set_drvdata(&dev->dev, info);
err = talk_to_backend(dev, info);
if (err) {
kfree(info);
- dev->dev.driver_data = NULL;
+ dev_set_drvdata(&dev->dev, NULL);
return err;
}
@@ -140,7 +140,7 @@ static int blkfront_probe(struct xenbus_
*/
static int blkfront_resume(struct xenbus_device *dev)
{
- struct blkfront_info *info = dev->dev.driver_data;
+ struct blkfront_info *info = dev_get_drvdata(&dev->dev);
int err;
DPRINTK("blkfront_resume: %s\n", dev->nodename);
@@ -265,7 +265,7 @@ fail:
static void backend_changed(struct xenbus_device *dev,
enum xenbus_state backend_state)
{
- struct blkfront_info *info = dev->dev.driver_data;
+ struct blkfront_info *info = dev_get_drvdata(&dev->dev);
struct block_device *bd;
DPRINTK("blkfront:backend_changed.\n");
@@ -433,7 +433,7 @@ static void blkfront_closing(struct blkf
static int blkfront_remove(struct xenbus_device *dev)
{
- struct blkfront_info *info = dev->dev.driver_data;
+ struct blkfront_info *info = dev_get_drvdata(&dev->dev);
DPRINTK("blkfront_remove: %s removed\n", dev->nodename);
@@ -682,7 +682,7 @@ static int blkif_queue_request(struct re
info->shadow[id].request = (unsigned long)req;
ring_req->id = id;
- ring_req->sector_number = (blkif_sector_t)req->sector;
+ ring_req->sector_number = (blkif_sector_t)blk_rq_pos(req);
ring_req->handle = info->handle;
ring_req->operation = rq_data_dir(req) ?
@@ -738,25 +738,25 @@ void do_blkif_request(struct request_que
queued = 0;
- while ((req = elv_next_request(rq)) != NULL) {
+ while ((req = blk_peek_request(rq)) != NULL) {
info = req->rq_disk->private_data;
- if (!blk_fs_request(req)) {
- end_request(req, 0);
- continue;
- }
if (RING_FULL(&info->ring))
goto wait;
- DPRINTK("do_blk_req %p: cmd %p, sec %llx, "
- "(%u/%li) buffer:%p [%s]\n",
- req, req->cmd, (long long)req->sector,
- req->current_nr_sectors,
- req->nr_sectors, req->buffer,
- rq_data_dir(req) ? "write" : "read");
+ blk_start_request(req);
+ if (!blk_fs_request(req)) {
+ __blk_end_request_all(req, -EIO);
+ continue;
+ }
+
+ DPRINTK("do_blk_req %p: cmd %p, sec %llx, "
+ "(%u/%u) buffer:%p [%s]\n",
+ req, req->cmd, (long long)blk_rq_pos(req),
+ blk_rq_cur_sectors(req), blk_rq_sectors(req),
+ req->buffer, rq_data_dir(req) ? "write" : "read");
- blkdev_dequeue_request(req);
if (blkif_queue_request(req)) {
blk_requeue_request(rq, req);
wait:
@@ -822,8 +822,7 @@ static irqreturn_t blkif_int(int irq, vo
DPRINTK("Bad return from blkdev data "
"request: %x\n", bret->status);
- ret = __blk_end_request(req, ret, blk_rq_bytes(req));
- BUG_ON(ret);
+ __blk_end_request_all(req, ret);
break;
default:
BUG();
@@ -953,7 +952,7 @@ static int blkif_recover(struct blkfront
int blkfront_is_ready(struct xenbus_device *dev)
{
- struct blkfront_info *info = dev->dev.driver_data;
+ struct blkfront_info *info = dev_get_drvdata(&dev->dev);
return info->is_ready && info->xbdev;
}
--- head-2011-03-17.orig/drivers/xen/blkfront/vbd.c 2011-02-01 14:42:26.000000000 +0100
+++ head-2011-03-17/drivers/xen/blkfront/vbd.c 2011-02-01 14:50:44.000000000 +0100
@@ -310,7 +310,7 @@ xlvbd_init_blk_queue(struct gendisk *gd,
#endif
/* Hard sector size and max sectors impersonate the equiv. hardware. */
- blk_queue_hardsect_size(rq, sector_size);
+ blk_queue_logical_block_size(rq, sector_size);
blk_queue_max_sectors(rq, 512);
/* Each segment in a request is up to an aligned page in size. */
@@ -499,7 +499,7 @@ static ssize_t show_media(struct device
struct device_attribute *attr, char *buf)
{
struct xenbus_device *xendev = to_xenbus_device(dev);
- struct blkfront_info *info = xendev->dev.driver_data;
+ struct blkfront_info *info = dev_get_drvdata(&xendev->dev);
if (info->gd->flags & GENHD_FL_CD)
return sprintf(buf, "cdrom\n");
--- head-2011-03-17.orig/drivers/xen/blktap/blktap.c 2011-02-17 10:11:18.000000000 +0100
+++ head-2011-03-17/drivers/xen/blktap/blktap.c 2011-02-17 10:16:17.000000000 +0100
@@ -279,6 +279,15 @@ static inline unsigned int OFFSET_TO_SEG
} while(0)
+static char *blktap_nodename(struct device *dev)
+{
+ return kasprintf(GFP_KERNEL, "xen/blktap%u", MINOR(dev->devt));
+}
+
+static struct device_type blktap_type = {
+ .nodename = blktap_nodename
+};
+
/******************************************************************
* BLKTAP VM OPS
*/
@@ -438,7 +447,6 @@ static const struct file_operations blkt
static tap_blkif_t *get_next_free_dev(void)
{
- struct class *class;
tap_blkif_t *info;
int minor;
@@ -501,9 +509,9 @@ found:
wmb();
tapfds[minor] = info;
- if ((class = get_xen_class()) != NULL)
- device_create(class, NULL, MKDEV(blktap_major, minor),
- NULL, "blktap%d", minor);
+ xen_class_device_create(&blktap_type, NULL,
+ MKDEV(blktap_major, minor),
+ NULL, "blktap%d", minor);
}
out:
@@ -546,7 +554,8 @@ void signal_tapdisk(int idx)
return;
if (info->pid > 0) {
- ptask = find_task_by_pid_ns(info->pid, info->pid_ns);
+ ptask = pid_task(find_pid_ns(info->pid, info->pid_ns),
+ PIDTYPE_PID);
if (ptask)
info->status = CLEANSHUTDOWN;
}
@@ -1700,7 +1709,6 @@ static void make_response(blkif_t *blkif
static int __init blkif_init(void)
{
int i, ret;
- struct class *class;
if (!is_running_on_xen())
return -ENODEV;
@@ -1736,7 +1744,7 @@ static int __init blkif_init(void)
DPRINTK("Created misc_dev %d:0 [/dev/xen/blktap0]\n", ret);
/* Make sure the xen class exists */
- if ((class = get_xen_class()) != NULL) {
+ if (get_xen_class()) {
/*
* This will allow udev to create the blktap ctrl device.
* We only want to create blktap0 first. We don't want
@@ -1744,8 +1752,9 @@ static int __init blkif_init(void)
* We only create the device when a request of a new device is
* made.
*/
- device_create(class, NULL, MKDEV(blktap_major, 0), NULL,
- "blktap0");
+ xen_class_device_create(&blktap_type, NULL,
+ MKDEV(blktap_major, 0), NULL,
+ "blktap0");
} else {
/* this is bad, but not fatal */
WPRINTK("blktap: sysfs xen_class not created\n");
--- head-2011-03-17.orig/drivers/xen/blktap/xenbus.c 2011-01-31 17:56:27.000000000 +0100
+++ head-2011-03-17/drivers/xen/blktap/xenbus.c 2011-02-01 14:50:44.000000000 +0100
@@ -128,7 +128,7 @@ static int blktap_name(blkif_t *blkif, c
if (!get_device(_dev)) \
return ret; \
dev = to_xenbus_device(_dev); \
- if ((be = dev->dev.driver_data) != NULL) \
+ if ((be = dev_get_drvdata(&dev->dev)) != NULL) \
ret = sprintf(buf, format, ##args); \
put_device(_dev); \
return ret; \
@@ -158,7 +158,7 @@ static struct attribute_group tapstat_gr
int xentap_sysfs_addif(struct xenbus_device *dev)
{
int err;
- struct backend_info *be = dev->dev.driver_data;
+ struct backend_info *be = dev_get_drvdata(&dev->dev);
err = sysfs_create_group(&dev->dev.kobj, &tapstat_group);
if (!err)
be->group_added = 1;
@@ -167,14 +167,14 @@ int xentap_sysfs_addif(struct xenbus_dev
void xentap_sysfs_delif(struct xenbus_device *dev)
{
- struct backend_info *be = dev->dev.driver_data;
+ struct backend_info *be = dev_get_drvdata(&dev->dev);
sysfs_remove_group(&dev->dev.kobj, &tapstat_group);
be->group_added = 0;
}
static int blktap_remove(struct xenbus_device *dev)
{
- struct backend_info *be = dev->dev.driver_data;
+ struct backend_info *be = dev_get_drvdata(&dev->dev);
if (be->group_added)
xentap_sysfs_delif(be->dev);
@@ -192,7 +192,7 @@ static int blktap_remove(struct xenbus_d
be->blkif = NULL;
}
kfree(be);
- dev->dev.driver_data = NULL;
+ dev_set_drvdata(&dev->dev, NULL);
return 0;
}
@@ -261,7 +261,7 @@ static int blktap_probe(struct xenbus_de
}
be->dev = dev;
- dev->dev.driver_data = be;
+ dev_set_drvdata(&dev->dev, be);
be->xenbus_id = get_id(dev->nodename);
be->blkif = tap_alloc_blkif(dev->otherend_id);
@@ -351,7 +351,7 @@ static void blkif_disconnect(blkif_t *bl
static void tap_frontend_changed(struct xenbus_device *dev,
enum xenbus_state frontend_state)
{
- struct backend_info *be = dev->dev.driver_data;
+ struct backend_info *be = dev_get_drvdata(&dev->dev);
int err;
DPRINTK("fe_changed(%s,%d)\n", dev->nodename, frontend_state);
--- head-2011-03-17.orig/drivers/xen/blktap2/blktap.h 2011-01-31 18:07:35.000000000 +0100
+++ head-2011-03-17/drivers/xen/blktap2/blktap.h 2011-02-01 14:50:44.000000000 +0100
@@ -25,6 +25,8 @@ extern int blktap_debug_level;
#define BTWARN(_f, _a...) BTPRINTK(0, KERN_WARNING, 0, _f, ##_a)
#define BTERR(_f, _a...) BTPRINTK(0, KERN_ERR, 0, _f, ##_a)
+#define BLKTAP2_DEV_DIR "xen/blktap-2/"
+
#define MAX_BLKTAP_DEVICE 256
#define BLKTAP_CONTROL 1
--- head-2011-03-17.orig/drivers/xen/blktap2/control.c 2011-01-31 17:56:27.000000000 +0100
+++ head-2011-03-17/drivers/xen/blktap2/control.c 2011-02-01 14:50:44.000000000 +0100
@@ -154,6 +154,7 @@ static const struct file_operations blkt
static struct miscdevice blktap_misc = {
.minor = MISC_DYNAMIC_MINOR,
.name = "blktap-control",
+ .devnode = BLKTAP2_DEV_DIR "control",
.fops = &blktap_control_file_operations,
};
--- head-2011-03-17.orig/drivers/xen/blktap2/device.c 2011-02-07 14:14:26.000000000 +0100
+++ head-2011-03-17/drivers/xen/blktap2/device.c 2011-02-01 14:50:44.000000000 +0100
@@ -206,13 +206,6 @@ flush_tlb_kernel_page(unsigned long kvad
#endif
}
-static void
-blktap_device_end_dequeued_request(struct request *req, int ret)
-{
- if (blk_end_request(req, ret, blk_rq_bytes(req)))
- BUG();
-}
-
/*
* tap->tap_sem held on entry
*/
@@ -378,7 +371,7 @@ blktap_device_fail_pending_requests(stru
blktap_unmap(tap, request);
req = (struct request *)(unsigned long)request->id;
- blktap_device_end_dequeued_request(req, -ENODEV);
+ blk_end_request_all(req, -ENODEV);
blktap_request_free(tap, request);
}
@@ -417,7 +410,7 @@ blktap_device_finish_request(struct blkt
if (unlikely(res->status != BLKIF_RSP_OKAY))
BTERR("Bad return from device data "
"request: %x\n", res->status);
- blktap_device_end_dequeued_request(req,
+ blk_end_request_all(req,
res->status == BLKIF_RSP_OKAY ? 0 : -EIO);
break;
default:
@@ -647,7 +640,7 @@ blktap_device_process_request(struct blk
ring = &tap->ring;
usr_idx = request->usr_idx;
blkif_req.id = usr_idx;
- blkif_req.sector_number = (blkif_sector_t)req->sector;
+ blkif_req.sector_number = (blkif_sector_t)blk_rq_pos(req);
blkif_req.handle = 0;
blkif_req.operation = rq_data_dir(req) ?
BLKIF_OP_WRITE : BLKIF_OP_READ;
@@ -844,20 +837,22 @@ blktap_device_run_queue(struct blktap *t
BTDBG("running queue for %d\n", tap->minor);
- while ((req = elv_next_request(rq)) != NULL) {
+ while ((req = blk_peek_request(rq)) != NULL) {
if (!blk_fs_request(req)) {
- end_request(req, 0);
+ blk_start_request(req);
+ __blk_end_request_all(req, -EIO);
continue;
}
if (blk_barrier_rq(req)) {
- end_request(req, 0);
+ blk_start_request(req);
+ __blk_end_request_all(req, -EOPNOTSUPP);
continue;
}
#ifdef ENABLE_PASSTHROUGH
if (test_bit(BLKTAP_PASSTHROUGH, &tap->dev_inuse)) {
- blkdev_dequeue_request(req);
+ blk_start_request(req);
blktap_device_forward_request(tap, req);
continue;
}
@@ -877,13 +872,13 @@ blktap_device_run_queue(struct blktap *t
goto wait;
}
- BTDBG("req %p: dev %d cmd %p, sec 0x%llx, (0x%x/0x%lx) "
+ BTDBG("req %p: dev %d cmd %p, sec 0x%llx, (0x%x/0x%x) "
"buffer:%p [%s], pending: %p\n", req, tap->minor,
- req->cmd, (unsigned long long)req->sector,
- req->current_nr_sectors, req->nr_sectors, req->buffer,
+ req->cmd, (unsigned long long)blk_rq_pos(req),
+ blk_rq_cur_sectors(req), blk_rq_sectors(req), req->buffer,
rq_data_dir(req) ? "write" : "read", request);
- blkdev_dequeue_request(req);
+ blk_start_request(req);
spin_unlock_irq(&dev->lock);
down_write(&tap->tap_sem);
@@ -892,7 +887,7 @@ blktap_device_run_queue(struct blktap *t
if (!err)
queued++;
else {
- blktap_device_end_dequeued_request(req, err);
+ blk_end_request_all(req, err);
blktap_request_free(tap, request);
}
@@ -932,11 +927,12 @@ blktap_device_do_request(struct request_
return;
fail:
- while ((req = elv_next_request(rq))) {
+ while ((req = blk_fetch_request(rq))) {
BTERR("device closed: failing secs %llu - %llu\n",
- (unsigned long long)req->sector,
- (unsigned long long)req->sector + req->nr_sectors);
- end_request(req, 0);
+ (unsigned long long)blk_rq_pos(req),
+ (unsigned long long)blk_rq_pos(req)
+ + blk_rq_cur_sectors(req));
+ __blk_end_request_all(req, -EIO);
}
}
@@ -991,7 +987,7 @@ blktap_device_configure(struct blktap *t
set_capacity(dev->gd, tap->params.capacity);
/* Hard sector size and max sectors impersonate the equiv. hardware. */
- blk_queue_hardsect_size(rq, tap->params.sector_size);
+ blk_queue_logical_block_size(rq, tap->params.sector_size);
blk_queue_max_sectors(rq, 512);
/* Each segment in a request is up to an aligned page in size. */
@@ -1089,6 +1085,12 @@ blktap_device_destroy(struct blktap *tap
return 0;
}
+static char *blktap_nodename(struct gendisk *gd)
+{
+ return kasprintf(GFP_KERNEL, BLKTAP2_DEV_DIR "tapdev%u",
+ gd->first_minor);
+}
+
int
blktap_device_create(struct blktap *tap)
{
@@ -1125,6 +1127,7 @@ blktap_device_create(struct blktap *tap)
gd->major = blktap_device_major;
gd->first_minor = minor;
+ gd->nodename = blktap_nodename;
gd->fops = &blktap_device_file_operations;
gd->private_data = dev;
--- head-2011-03-17.orig/drivers/xen/blktap2/sysfs.c 2011-02-01 14:38:38.000000000 +0100
+++ head-2011-03-17/drivers/xen/blktap2/sysfs.c 2011-02-01 14:50:44.000000000 +0100
@@ -436,6 +436,12 @@ blktap_sysfs_free(void)
class_destroy(class);
}
+static char *blktap_nodename(struct device *dev)
+{
+ return kasprintf(GFP_KERNEL, BLKTAP2_DEV_DIR "blktap%u",
+ MINOR(dev->devt));
+}
+
int __init
blktap_sysfs_init(void)
{
@@ -449,6 +455,8 @@ blktap_sysfs_init(void)
if (IS_ERR(cls))
return PTR_ERR(cls);
+ cls->nodename = blktap_nodename;
+
err = class_create_file(cls, &class_attr_verbosity);
if (!err) {
err = class_create_file(cls, &class_attr_devices);
--- head-2011-03-17.orig/drivers/xen/console/console.c 2011-02-01 14:38:38.000000000 +0100
+++ head-2011-03-17/drivers/xen/console/console.c 2011-02-01 14:50:44.000000000 +0100
@@ -46,7 +46,6 @@
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/console.h>
-#include <linux/bootmem.h>
#include <linux/sysrq.h>
#include <linux/screen_info.h>
#include <linux/vt.h>
@@ -236,7 +235,7 @@ static int __init xen_console_init(void)
goto out;
}
- wbuf = alloc_bootmem(wbuf_size);
+ wbuf = kmalloc(wbuf_size, GFP_KERNEL);
register_console(&kcons_info);
@@ -632,8 +631,8 @@ static void xencons_close(struct tty_str
tty->closing = 1;
tty_wait_until_sent(tty, 0);
tty_driver_flush_buffer(tty);
- if (tty->ldisc.ops->flush_buffer != NULL)
- tty->ldisc.ops->flush_buffer(tty);
+ if (tty->ldisc->ops->flush_buffer)
+ tty->ldisc->ops->flush_buffer(tty);
tty->closing = 0;
spin_lock_irqsave(&xencons_lock, flags);
xencons_tty = NULL;
--- head-2011-03-17.orig/drivers/xen/core/evtchn.c 2011-02-09 13:57:45.000000000 +0100
+++ head-2011-03-17/drivers/xen/core/evtchn.c 2011-02-01 14:50:44.000000000 +0100
@@ -35,7 +35,6 @@
#include <linux/interrupt.h>
#include <linux/sched.h>
#include <linux/kernel_stat.h>
-#include <linux/bootmem.h>
#include <linux/ftrace.h>
#include <linux/version.h>
#include <asm/atomic.h>
@@ -138,6 +137,12 @@ static inline unsigned int type_from_irq
return cfg ? cfg->info >> (32 - _IRQT_BITS) : IRQT_UNBOUND;
}
+unsigned int irq_from_evtchn(unsigned int port)
+{
+ return evtchn_to_irq[port];
+}
+EXPORT_SYMBOL_GPL(irq_from_evtchn);
+
/* IRQ <-> VIRQ mapping. */
DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]) = {[0 ... NR_VIRQS-1] = -1};
@@ -328,6 +333,8 @@ asmlinkage void __irq_entry evtchn_do_up
}
do {
+ bool handled = false;
+
masked_l2 = l2 & ((~0UL) << l2i);
if (masked_l2 == 0)
break;
@@ -338,13 +345,12 @@ asmlinkage void __irq_entry evtchn_do_up
mask_evtchn(port);
if ((irq = evtchn_to_irq[port]) != -1) {
clear_evtchn(port);
- if (!handle_irq(irq, regs)
- && printk_ratelimit())
- pr_emerg("No handler for "
- "irq %d (port %u)\n",
- irq, port);
- } else
- evtchn_device_upcall(port);
+ handled = handle_irq(irq, regs);
+ }
+ if (!handled && printk_ratelimit())
+ pr_emerg("No handler for irq %d"
+ " (port %u)\n",
+ irq, port);
l2i = (l2i + 1) % BITS_PER_LONG;
@@ -371,16 +377,26 @@ asmlinkage void __irq_entry evtchn_do_up
set_irq_regs(old_regs);
}
-static int find_unbound_irq(unsigned int cpu, struct irq_chip *chip)
+static int find_unbound_irq(unsigned int node, struct irq_chip *chip)
{
static int warned;
int irq;
for (irq = DYNIRQ_BASE; irq < nr_irqs; irq++) {
- struct irq_desc *desc = irq_to_desc_alloc_cpu(irq, cpu);
- struct irq_cfg *cfg = desc->chip_data;
+ struct irq_desc *desc;
+ struct irq_cfg *cfg;
- if (!cfg->bindcount) {
+ desc = irq_to_desc(irq);
+ if (!desc)
+ desc = irq_to_desc_alloc_node(irq, node);
+ else if (desc->chip != &no_irq_chip &&
+ desc->chip != &dynirq_chip)
+ continue;
+ if (!desc)
+ return -ENOMEM;
+
+ cfg = desc->chip_data;
+ if (cfg && !cfg->bindcount) {
desc->status |= IRQ_NOPROBE;
set_irq_chip_and_handler_name(irq, chip,
handle_fasteoi_irq,
@@ -407,7 +423,7 @@ static int bind_caller_port_to_irq(unsig
spin_lock(&irq_mapping_update_lock);
if ((irq = evtchn_to_irq[caller_port]) == -1) {
- if ((irq = find_unbound_irq(smp_processor_id(), &dynirq_chip)) < 0)
+ if ((irq = find_unbound_irq(numa_node_id(), &dynirq_chip)) < 0)
goto out;
evtchn_to_irq[caller_port] = irq;
@@ -430,9 +446,8 @@ static int bind_local_port_to_irq(unsign
BUG_ON(evtchn_to_irq[local_port] != -1);
- if ((irq = find_unbound_irq(smp_processor_id(), &dynirq_chip)) < 0) {
- struct evtchn_close close = { .port = local_port };
- if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close))
+ if ((irq = find_unbound_irq(numa_node_id(), &dynirq_chip)) < 0) {
+ if (close_evtchn(local_port))
BUG();
goto out;
}
@@ -483,7 +498,8 @@ static int bind_virq_to_irq(unsigned int
spin_lock(&irq_mapping_update_lock);
if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1) {
- if ((irq = find_unbound_irq(cpu, &dynirq_chip)) < 0)
+ if ((irq = find_unbound_irq(cpu_to_node(cpu),
+ &dynirq_chip)) < 0)
goto out;
bind_virq.virq = virq;
@@ -516,7 +532,8 @@ static int bind_ipi_to_irq(unsigned int
spin_lock(&irq_mapping_update_lock);
if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1) {
- if ((irq = find_unbound_irq(cpu, &dynirq_chip)) < 0)
+ if ((irq = find_unbound_irq(cpu_to_node(cpu),
+ &dynirq_chip)) < 0)
goto out;
bind_ipi.vcpu = cpu;
@@ -542,16 +559,14 @@ static int bind_ipi_to_irq(unsigned int
static void unbind_from_irq(unsigned int irq)
{
- struct evtchn_close close;
unsigned int cpu;
int evtchn = evtchn_from_irq(irq);
spin_lock(&irq_mapping_update_lock);
if (!--irq_cfg(irq)->bindcount && VALID_EVTCHN(evtchn)) {
- close.port = evtchn;
if ((type_from_irq(irq) != IRQT_CALLER_PORT) &&
- HYPERVISOR_event_channel_op(EVTCHNOP_close, &close))
+ close_evtchn(evtchn))
BUG();
switch (type_from_irq(irq)) {
@@ -732,9 +747,11 @@ static void rebind_irq_to_cpu(unsigned i
rebind_evtchn_to_cpu(evtchn, tcpu);
}
-static void set_affinity_irq(unsigned int irq, const struct cpumask *dest)
+static int set_affinity_irq(unsigned int irq, const struct cpumask *dest)
{
rebind_irq_to_cpu(irq, cpumask_first(dest));
+
+ return 0;
}
#endif
@@ -908,7 +925,6 @@ static unsigned int startup_pirq(unsigne
static void shutdown_pirq(unsigned int irq)
{
- struct evtchn_close close;
int evtchn = evtchn_from_irq(irq);
if (!VALID_EVTCHN(evtchn))
@@ -916,8 +932,7 @@ static void shutdown_pirq(unsigned int i
mask_evtchn(evtchn);
- close.port = evtchn;
- if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0)
+ if (close_evtchn(evtchn))
BUG();
bind_evtchn_to_cpu(evtchn, 0);
@@ -1252,7 +1267,7 @@ int evtchn_map_pirq(int irq, int xen_pir
if (irq < 0) {
#ifdef CONFIG_SPARSE_IRQ
spin_lock(&irq_mapping_update_lock);
- irq = find_unbound_irq(smp_processor_id(), &pirq_chip);
+ irq = find_unbound_irq(numa_node_id(), &pirq_chip);
if (irq >= 0) {
struct irq_desc *desc;
struct irq_cfg *cfg;
@@ -1280,7 +1295,7 @@ int evtchn_map_pirq(int irq, int xen_pir
if (identity_mapped_irq(irq))
continue;
- desc = irq_to_desc_alloc_cpu(irq, smp_processor_id());
+ desc = irq_to_desc_alloc_node(irq, numa_node_id());
cfg = desc->chip_data;
if (!index_from_irq(irq)) {
BUG_ON(type_from_irq(irq) != IRQT_UNBOUND);
@@ -1340,8 +1355,9 @@ void __init xen_init_IRQ(void)
#else
i = nr_pirqs;
#endif
- pirq_needs_eoi = alloc_bootmem_pages(sizeof(unsigned long)
- * BITS_TO_LONGS(ALIGN(i, PAGE_SIZE * 8)));
+ i = get_order(sizeof(unsigned long) * BITS_TO_LONGS(i));
+ pirq_needs_eoi = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, i);
+ BUILD_BUG_ON(NR_PIRQS > PAGE_SIZE * 8);
eoi_gmfn.gmfn = virt_to_machine(pirq_needs_eoi) >> PAGE_SHIFT;
if (HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn, &eoi_gmfn) == 0)
pirq_eoi_does_unmask = true;
--- head-2011-03-17.orig/drivers/xen/core/smpboot.c 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/drivers/xen/core/smpboot.c 2011-02-01 14:50:44.000000000 +0100
@@ -40,9 +40,11 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
static DEFINE_PER_CPU(int, resched_irq);
static DEFINE_PER_CPU(int, callfunc_irq);
static DEFINE_PER_CPU(int, call1func_irq);
+static DEFINE_PER_CPU(int, reboot_irq);
static char resched_name[NR_CPUS][15];
static char callfunc_name[NR_CPUS][15];
static char call1func_name[NR_CPUS][15];
+static char reboot_name[NR_CPUS][15];
void __init prefill_possible_map(void)
{
@@ -74,7 +76,7 @@ static int __cpuinit xen_smp_intr_init(u
int rc;
per_cpu(resched_irq, cpu) = per_cpu(callfunc_irq, cpu) =
- per_cpu(call1func_irq, cpu) = -1;
+ per_cpu(call1func_irq, cpu) = per_cpu(reboot_irq, cpu) = -1;
sprintf(resched_name[cpu], "resched%u", cpu);
rc = bind_ipi_to_irqhandler(RESCHEDULE_VECTOR,
@@ -109,6 +111,17 @@ static int __cpuinit xen_smp_intr_init(u
goto fail;
per_cpu(call1func_irq, cpu) = rc;
+ sprintf(reboot_name[cpu], "reboot%u", cpu);
+ rc = bind_ipi_to_irqhandler(REBOOT_VECTOR,
+ cpu,
+ smp_reboot_interrupt,
+ IRQF_DISABLED|IRQF_NOBALANCING,
+ reboot_name[cpu],
+ NULL);
+ if (rc < 0)
+ goto fail;
+ per_cpu(reboot_irq, cpu) = rc;
+
rc = xen_spinlock_init(cpu);
if (rc < 0)
goto fail;
@@ -125,6 +138,8 @@ static int __cpuinit xen_smp_intr_init(u
unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
if (per_cpu(call1func_irq, cpu) >= 0)
unbind_from_irqhandler(per_cpu(call1func_irq, cpu), NULL);
+ if (per_cpu(reboot_irq, cpu) >= 0)
+ unbind_from_irqhandler(per_cpu(reboot_irq, cpu), NULL);
xen_spinlock_cleanup(cpu);
return rc;
}
@@ -138,6 +153,7 @@ static void __cpuinit xen_smp_intr_exit(
unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
unbind_from_irqhandler(per_cpu(call1func_irq, cpu), NULL);
+ unbind_from_irqhandler(per_cpu(reboot_irq, cpu), NULL);
xen_spinlock_cleanup(cpu);
}
#endif
--- head-2011-03-17.orig/drivers/xen/evtchn.c 2011-03-17 14:35:44.000000000 +0100
+++ head-2011-03-17/drivers/xen/evtchn.c 2011-02-01 14:50:44.000000000 +0100
@@ -48,10 +48,17 @@
#include <linux/mutex.h>
#include <linux/cpu.h>
+#ifdef CONFIG_PARAVIRT_XEN
#include <xen/xen.h>
#include <xen/events.h>
#include <xen/evtchn.h>
#include <asm/xen/hypervisor.h>
+#else
+#include <xen/evtchn.h>
+#include <xen/public/evtchn.h>
+#define xen_domain() is_running_on_xen()
+#define bind_evtchn_to_irqhandler bind_caller_port_to_irqhandler
+#endif
struct per_user_data {
struct mutex bind_mutex; /* serialize bind/unbind operations */
@@ -278,6 +285,9 @@ static void evtchn_unbind_from_user(stru
int irq = irq_from_evtchn(port);
unbind_from_irqhandler(irq, (void *)(unsigned long)port);
+#ifdef CONFIG_XEN
+ WARN_ON(close_evtchn(port));
+#endif
set_port_user(port, NULL);
}
@@ -450,7 +460,8 @@ static int evtchn_open(struct inode *ino
if (u == NULL)
return -ENOMEM;
- u->name = kasprintf(GFP_KERNEL, "evtchn:%s", current->comm);
+ u->name = kasprintf(GFP_KERNEL, "evtchn:%s[%d]",
+ current->comm, current->pid);
if (u->name == NULL) {
kfree(u);
return -ENOMEM;
@@ -519,6 +530,7 @@ static const struct file_operations evtc
static struct miscdevice evtchn_miscdev = {
.minor = MISC_DYNAMIC_MINOR,
.name = "xen/evtchn",
+ .devnode = "xen/evtchn",
.fops = &evtchn_fops,
};
static int __init evtchn_init(void)
@@ -534,10 +546,10 @@ static int __init evtchn_init(void)
spin_lock_init(&port_user_lock);
- /* Create '/dev/misc/evtchn'. */
+ /* Create '/dev/xen/evtchn'. */
err = misc_register(&evtchn_miscdev);
if (err != 0) {
- printk(KERN_ALERT "Could not register /dev/misc/evtchn\n");
+ pr_alert("Could not register /dev/xen/evtchn\n");
return err;
}
--- head-2011-03-17.orig/drivers/xen/fbfront/xenfb.c 2011-02-17 10:11:23.000000000 +0100
+++ head-2011-03-17/drivers/xen/fbfront/xenfb.c 2011-02-17 10:16:12.000000000 +0100
@@ -597,7 +597,7 @@ static int __devinit xenfb_probe(struct
fb_size = XENFB_DEFAULT_FB_LEN;
}
- dev->dev.driver_data = info;
+ dev_set_drvdata(&dev->dev, info);
info->xbdev = dev;
info->irq = -1;
info->x1 = info->y1 = INT_MAX;
@@ -701,7 +701,7 @@ static int __devinit xenfb_probe(struct
static int xenfb_resume(struct xenbus_device *dev)
{
- struct xenfb_info *info = dev->dev.driver_data;
+ struct xenfb_info *info = dev_get_drvdata(&dev->dev);
xenfb_disconnect_backend(info);
xenfb_init_shared_page(info, info->fb_info);
@@ -710,7 +710,7 @@ static int xenfb_resume(struct xenbus_de
static int xenfb_remove(struct xenbus_device *dev)
{
- struct xenfb_info *info = dev->dev.driver_data;
+ struct xenfb_info *info = dev_get_drvdata(&dev->dev);
del_timer(&info->refresh);
if (info->kthread)
@@ -819,7 +819,7 @@ static void xenfb_disconnect_backend(str
static void xenfb_backend_changed(struct xenbus_device *dev,
enum xenbus_state backend_state)
{
- struct xenfb_info *info = dev->dev.driver_data;
+ struct xenfb_info *info = dev_get_drvdata(&dev->dev);
int val;
switch (backend_state) {
--- head-2011-03-17.orig/drivers/xen/fbfront/xenkbd.c 2011-02-01 14:38:38.000000000 +0100
+++ head-2011-03-17/drivers/xen/fbfront/xenkbd.c 2011-02-01 14:50:44.000000000 +0100
@@ -113,7 +113,7 @@ int __devinit xenkbd_probe(struct xenbus
xenbus_dev_fatal(dev, -ENOMEM, "allocating info structure");
return -ENOMEM;
}
- dev->dev.driver_data = info;
+ dev_set_drvdata(&dev->dev, info);
info->xbdev = dev;
snprintf(info->phys, sizeof(info->phys), "xenbus/%s", dev->nodename);
@@ -186,7 +186,7 @@ int __devinit xenkbd_probe(struct xenbus
static int xenkbd_resume(struct xenbus_device *dev)
{
- struct xenkbd_info *info = dev->dev.driver_data;
+ struct xenkbd_info *info = dev_get_drvdata(&dev->dev);
xenkbd_disconnect_backend(info);
info->page->in_cons = info->page->in_prod = 0;
@@ -196,7 +196,7 @@ static int xenkbd_resume(struct xenbus_d
static int xenkbd_remove(struct xenbus_device *dev)
{
- struct xenkbd_info *info = dev->dev.driver_data;
+ struct xenkbd_info *info = dev_get_drvdata(&dev->dev);
xenkbd_disconnect_backend(info);
input_unregister_device(info->kbd);
@@ -262,7 +262,7 @@ static void xenkbd_disconnect_backend(st
static void xenkbd_backend_changed(struct xenbus_device *dev,
enum xenbus_state backend_state)
{
- struct xenkbd_info *info = dev->dev.driver_data;
+ struct xenkbd_info *info = dev_get_drvdata(&dev->dev);
int ret, val;
switch (backend_state) {
--- head-2011-03-17.orig/drivers/xen/gntdev/gntdev.c 2011-02-01 14:38:38.000000000 +0100
+++ head-2011-03-17/drivers/xen/gntdev/gntdev.c 2011-02-01 14:50:44.000000000 +0100
@@ -371,10 +371,18 @@ nomem_out:
/* Interface functions. */
+static char *gntdev_nodename(struct device *dev)
+{
+ return kstrdup("xen/" GNTDEV_NAME, GFP_KERNEL);
+}
+
+static struct device_type gntdev_type = {
+ .nodename = gntdev_nodename
+};
+
/* Initialises the driver. Called when the module is loaded. */
static int __init gntdev_init(void)
{
- struct class *class;
struct device *device;
if (!is_running_on_xen()) {
@@ -393,14 +401,9 @@ static int __init gntdev_init(void)
* device, and output the major number so that the device can be
* created manually using mknod.
*/
- if ((class = get_xen_class()) == NULL) {
- pr_err("Error setting up xen_class\n");
- pr_err("gntdev created, major number = %d\n", gntdev_major);
- return 0;
- }
-
- device = device_create(class, NULL, MKDEV(gntdev_major, 0),
- NULL, GNTDEV_NAME);
+ device = xen_class_device_create(&gntdev_type, NULL,
+ MKDEV(gntdev_major, 0),
+ NULL, GNTDEV_NAME);
if (IS_ERR(device)) {
pr_err("Error creating gntdev device in xen_class\n");
pr_err("gntdev created, major number = %d\n", gntdev_major);
--- head-2011-03-17.orig/drivers/xen/netback/accel.c 2011-01-31 17:29:16.000000000 +0100
+++ head-2011-03-17/drivers/xen/netback/accel.c 2011-02-01 14:50:44.000000000 +0100
@@ -103,7 +103,7 @@ static int netback_accelerator_probe_bac
struct xenbus_device *xendev = to_xenbus_device(dev);
if (!strcmp("vif", xendev->devicetype)) {
- struct backend_info *be = xendev->dev.driver_data;
+ struct backend_info *be = dev_get_drvdata(&xendev->dev);
if (match_accelerator(xendev, be, accelerator) &&
try_module_get(accelerator->hooks->owner)) {
@@ -124,7 +124,7 @@ static int netback_accelerator_remove_ba
(struct netback_accelerator *)arg;
if (!strcmp("vif", xendev->devicetype)) {
- struct backend_info *be = xendev->dev.driver_data;
+ struct backend_info *be = dev_get_drvdata(&xendev->dev);
if (be->accelerator == accelerator) {
be->accelerator->hooks->remove(xendev);
--- head-2011-03-17.orig/drivers/xen/netback/loopback.c 2011-03-01 11:52:05.000000000 +0100
+++ head-2011-03-17/drivers/xen/netback/loopback.c 2011-02-01 14:50:44.000000000 +0100
@@ -139,8 +139,8 @@ static int loopback_start_xmit(struct sk
return 0;
}
- dst_release(skb->dst);
- skb->dst = NULL;
+ dst_release(skb_dst(skb));
+ skb_dst_set(skb, NULL);
skb_orphan(skb);
--- head-2011-03-17.orig/drivers/xen/netback/xenbus.c 2011-01-31 17:56:27.000000000 +0100
+++ head-2011-03-17/drivers/xen/netback/xenbus.c 2011-02-01 14:50:44.000000000 +0100
@@ -38,7 +38,7 @@ static void netback_disconnect(struct de
static int netback_remove(struct xenbus_device *dev)
{
- struct backend_info *be = dev->dev.driver_data;
+ struct backend_info *be = dev_get_drvdata(&dev->dev);
netback_remove_accelerators(be, dev);
@@ -49,7 +49,7 @@ static int netback_remove(struct xenbus_
static void netback_disconnect(struct device *xbdev_dev, int clear)
{
- struct backend_info *be = xbdev_dev->driver_data;
+ struct backend_info *be = dev_get_drvdata(xbdev_dev);
if (be->netif)
kobject_uevent(&xbdev_dev->kobj, KOBJ_OFFLINE);
@@ -60,7 +60,7 @@ static void netback_disconnect(struct de
be->netif = NULL;
}
if (clear)
- xbdev_dev->driver_data = NULL;
+ dev_set_drvdata(xbdev_dev, NULL);
up_write(&teardown_sem);
}
@@ -84,7 +84,7 @@ static int netback_probe(struct xenbus_d
}
be->dev = dev;
- dev->dev.driver_data = be;
+ dev_set_drvdata(&dev->dev, be);
sg = 1;
if (netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB)
@@ -181,7 +181,7 @@ static int netback_uevent(struct xenbus_
kfree(val);
down_read(&teardown_sem);
- be = xdev->dev.driver_data;
+ be = dev_get_drvdata(&xdev->dev);
if (be && be->netif)
add_uevent_var(env, "vif=%s", be->netif->dev->name);
up_read(&teardown_sem);
@@ -224,7 +224,7 @@ static void backend_create_netif(struct
static void frontend_changed(struct xenbus_device *dev,
enum xenbus_state frontend_state)
{
- struct backend_info *be = dev->dev.driver_data;
+ struct backend_info *be = dev_get_drvdata(&dev->dev);
DPRINTK("%s", xenbus_strstate(frontend_state));
--- head-2011-03-17.orig/drivers/xen/netfront/netfront.c 2011-02-09 16:04:51.000000000 +0100
+++ head-2011-03-17/drivers/xen/netfront/netfront.c 2011-02-01 14:50:44.000000000 +0100
@@ -256,7 +256,7 @@ static int __devinit netfront_probe(stru
}
info = netdev_priv(netdev);
- dev->dev.driver_data = info;
+ dev_set_drvdata(&dev->dev, info);
err = register_netdev(info->netdev);
if (err) {
@@ -277,13 +277,13 @@ static int __devinit netfront_probe(stru
fail:
free_netdev(netdev);
- dev->dev.driver_data = NULL;
+ dev_set_drvdata(&dev->dev, NULL);
return err;
}
static int __devexit netfront_remove(struct xenbus_device *dev)
{
- struct netfront_info *info = dev->dev.driver_data;
+ struct netfront_info *info = dev_get_drvdata(&dev->dev);
DPRINTK("%s\n", dev->nodename);
@@ -305,14 +305,14 @@ static int __devexit netfront_remove(str
static int netfront_suspend(struct xenbus_device *dev)
{
- struct netfront_info *info = dev->dev.driver_data;
+ struct netfront_info *info = dev_get_drvdata(&dev->dev);
return netfront_accelerator_suspend(info, dev);
}
static int netfront_suspend_cancel(struct xenbus_device *dev)
{
- struct netfront_info *info = dev->dev.driver_data;
+ struct netfront_info *info = dev_get_drvdata(&dev->dev);
return netfront_accelerator_suspend_cancel(info, dev);
}
@@ -325,7 +325,7 @@ static int netfront_suspend_cancel(struc
*/
static int netfront_resume(struct xenbus_device *dev)
{
- struct netfront_info *info = dev->dev.driver_data;
+ struct netfront_info *info = dev_get_drvdata(&dev->dev);
DPRINTK("%s\n", dev->nodename);
@@ -530,7 +530,7 @@ static int setup_device(struct xenbus_de
static void backend_changed(struct xenbus_device *dev,
enum xenbus_state backend_state)
{
- struct netfront_info *np = dev->dev.driver_data;
+ struct netfront_info *np = dev_get_drvdata(&dev->dev);
struct net_device *netdev = np->netdev;
DPRINTK("%s\n", xenbus_strstate(backend_state));
--- head-2011-03-17.orig/drivers/xen/pciback/xenbus.c 2011-01-31 17:56:27.000000000 +0100
+++ head-2011-03-17/drivers/xen/pciback/xenbus.c 2011-02-01 14:50:44.000000000 +0100
@@ -24,7 +24,7 @@ static struct pciback_device *alloc_pdev
dev_dbg(&xdev->dev, "allocated pdev @ 0x%p\n", pdev);
pdev->xdev = xdev;
- xdev->dev.driver_data = pdev;
+ dev_set_drvdata(&xdev->dev, pdev);
spin_lock_init(&pdev->dev_lock);
@@ -74,7 +74,7 @@ static void free_pdev(struct pciback_dev
pciback_release_devices(pdev);
- pdev->xdev->dev.driver_data = NULL;
+ dev_set_drvdata(&pdev->xdev->dev, NULL);
pdev->xdev = NULL;
kfree(pdev);
@@ -475,7 +475,7 @@ static int pciback_reconfigure(struct pc
static void pciback_frontend_changed(struct xenbus_device *xdev,
enum xenbus_state fe_state)
{
- struct pciback_device *pdev = xdev->dev.driver_data;
+ struct pciback_device *pdev = dev_get_drvdata(&xdev->dev);
dev_dbg(&xdev->dev, "fe state changed %d\n", fe_state);
@@ -668,7 +668,7 @@ static int pciback_xenbus_probe(struct x
static int pciback_xenbus_remove(struct xenbus_device *dev)
{
- struct pciback_device *pdev = dev->dev.driver_data;
+ struct pciback_device *pdev = dev_get_drvdata(&dev->dev);
if (pdev != NULL)
free_pdev(pdev);
--- head-2011-03-17.orig/drivers/xen/pcifront/pci_op.c 2011-01-31 17:56:27.000000000 +0100
+++ head-2011-03-17/drivers/xen/pcifront/pci_op.c 2011-02-01 14:50:44.000000000 +0100
@@ -416,7 +416,7 @@ void pci_frontend_disable_msi(struct pci
#endif /* CONFIG_PCI_MSI */
/* Claim resources for the PCI frontend as-is, backend won't allow changes */
-static void pcifront_claim_resource(struct pci_dev *dev, void *data)
+static int pcifront_claim_resource(struct pci_dev *dev, void *data)
{
struct pcifront_device *pdev = data;
int i;
@@ -431,6 +431,8 @@ static void pcifront_claim_resource(stru
pci_claim_resource(dev, i);
}
}
+
+ return 0;
}
int __devinit pcifront_scan_root(struct pcifront_device *pdev,
--- head-2011-03-17.orig/drivers/xen/pcifront/xenbus.c 2011-01-31 17:32:29.000000000 +0100
+++ head-2011-03-17/drivers/xen/pcifront/xenbus.c 2011-02-01 14:50:44.000000000 +0100
@@ -34,7 +34,7 @@ static struct pcifront_device *alloc_pde
/*Flag for registering PV AER handler*/
set_bit(_XEN_PCIB_AERHANDLER, (void*)&pdev->sh_info->flags);
- xdev->dev.driver_data = pdev;
+ dev_set_drvdata(&xdev->dev, pdev);
pdev->xdev = xdev;
INIT_LIST_HEAD(&pdev->root_buses);
@@ -75,7 +75,7 @@ static void free_pdev(struct pcifront_de
else
free_page((unsigned long)pdev->sh_info);
- pdev->xdev->dev.driver_data = NULL;
+ dev_set_drvdata(&pdev->xdev->dev, NULL);
kfree(pdev);
}
@@ -394,7 +394,7 @@ static int pcifront_detach_devices(struc
static void __init_refok pcifront_backend_changed(struct xenbus_device *xdev,
enum xenbus_state be_state)
{
- struct pcifront_device *pdev = xdev->dev.driver_data;
+ struct pcifront_device *pdev = dev_get_drvdata(&xdev->dev);
switch (be_state) {
case XenbusStateUnknown:
@@ -446,8 +446,8 @@ static int pcifront_xenbus_probe(struct
static int pcifront_xenbus_remove(struct xenbus_device *xdev)
{
- if (xdev->dev.driver_data)
- free_pdev(xdev->dev.driver_data);
+ if (dev_get_drvdata(&xdev->dev))
+ free_pdev(dev_get_drvdata(&xdev->dev));
return 0;
}
--- head-2011-03-17.orig/drivers/xen/scsiback/scsiback.c 2011-01-31 17:56:27.000000000 +0100
+++ head-2011-03-17/drivers/xen/scsiback/scsiback.c 2011-02-01 14:50:44.000000000 +0100
@@ -224,7 +224,7 @@ static void scsiback_cmd_done(struct req
int errors;
sense_buffer = req->sense;
- resid = req->data_len;
+ resid = blk_rq_bytes(req);
errors = req->errors;
if (errors != 0) {
@@ -331,21 +331,6 @@ fail_flush:
return -ENOMEM;
}
-/* quoted scsi_lib.c/scsi_merge_bio */
-static int scsiback_merge_bio(struct request *rq, struct bio *bio)
-{
- struct request_queue *q = rq->q;
-
- bio->bi_flags &= ~(1 << BIO_SEG_VALID);
- if (rq_data_dir(rq) == WRITE)
- bio->bi_rw |= (1 << BIO_RW);
-
- blk_queue_bounce(q, &bio);
-
- return blk_rq_append_bio(q, rq, bio);
-}
-
-
/* quoted scsi_lib.c/scsi_bi_endio */
static void scsiback_bi_endio(struct bio *bio, int error)
{
@@ -355,29 +340,28 @@ static void scsiback_bi_endio(struct bio
/* quoted scsi_lib.c/scsi_req_map_sg . */
-static int request_map_sg(struct request *rq, pending_req_t *pending_req, unsigned int count)
+static struct bio *request_map_sg(pending_req_t *pending_req)
{
- struct request_queue *q = rq->q;
- int nr_pages;
- unsigned int nsegs = count;
- unsigned int data_len = 0, len, bytes, off;
+ struct request_queue *q = pending_req->sdev->request_queue;
+ unsigned int nsegs = (unsigned int)pending_req->nr_segments;
+ unsigned int i, len, bytes, off, nr_pages, nr_vecs = 0;
struct scatterlist *sg;
struct page *page;
- struct bio *bio = NULL;
- int i, err, nr_vecs = 0;
+ struct bio *bio = NULL, *bio_first = NULL, *bio_last = NULL;
+ int err;
for_each_sg (pending_req->sgl, sg, nsegs, i) {
page = sg_page(sg);
off = sg->offset;
len = sg->length;
- data_len += len;
nr_pages = (len + off + PAGE_SIZE - 1) >> PAGE_SHIFT;
while (len > 0) {
bytes = min_t(unsigned int, len, PAGE_SIZE - off);
if (!bio) {
- nr_vecs = min_t(int, BIO_MAX_PAGES, nr_pages);
+ nr_vecs = min_t(unsigned int, BIO_MAX_PAGES,
+ nr_pages);
nr_pages -= nr_vecs;
bio = bio_alloc(GFP_KERNEL, nr_vecs);
if (!bio) {
@@ -385,6 +369,11 @@ static int request_map_sg(struct request
goto free_bios;
}
bio->bi_end_io = scsiback_bi_endio;
+ if (bio_last)
+ bio_last->bi_next = bio;
+ else
+ bio_first = bio;
+ bio_last = bio;
}
if (bio_add_pc_page(q, bio, page, bytes, off) !=
@@ -395,11 +384,9 @@ static int request_map_sg(struct request
}
if (bio->bi_vcnt >= nr_vecs) {
- err = scsiback_merge_bio(rq, bio);
- if (err) {
- bio_endio(bio, 0);
- goto free_bios;
- }
+ bio->bi_flags &= ~(1 << BIO_SEG_VALID);
+ if (pending_req->sc_data_direction == WRITE)
+ bio->bi_rw |= (1 << BIO_RW);
bio = NULL;
}
@@ -409,21 +396,15 @@ static int request_map_sg(struct request
}
}
- rq->buffer = rq->data = NULL;
- rq->data_len = data_len;
-
- return 0;
+ return bio_first;
free_bios:
- while ((bio = rq->bio) != NULL) {
- rq->bio = bio->bi_next;
- /*
- * call endio instead of bio_put incase it was bounced
- */
- bio_endio(bio, 0);
+ while ((bio = bio_first) != NULL) {
+ bio_first = bio->bi_next;
+ bio_put(bio);
}
- return err;
+ return ERR_PTR(err);
}
@@ -431,7 +412,6 @@ void scsiback_cmd_exec(pending_req_t *pe
{
int cmd_len = (int)pending_req->cmd_len;
int data_dir = (int)pending_req->sc_data_direction;
- unsigned int nr_segments = (unsigned int)pending_req->nr_segments;
unsigned int timeout;
struct request *rq;
int write;
@@ -445,7 +425,30 @@ void scsiback_cmd_exec(pending_req_t *pe
timeout = VSCSIIF_TIMEOUT;
write = (data_dir == DMA_TO_DEVICE);
- rq = blk_get_request(pending_req->sdev->request_queue, write, GFP_KERNEL);
+ if (pending_req->nr_segments) {
+ struct bio *bio = request_map_sg(pending_req);
+
+ if (IS_ERR(bio)) {
+ pr_err("scsiback: SG Request Map Error\n");
+ return;
+ }
+
+ rq = blk_make_request(pending_req->sdev->request_queue, bio,
+ GFP_KERNEL);
+ if (IS_ERR(rq)) {
+ pr_err("scsiback: Make Request Error\n");
+ return;
+ }
+
+ rq->buffer = NULL;
+ } else {
+ rq = blk_get_request(pending_req->sdev->request_queue, write,
+ GFP_KERNEL);
+ if (unlikely(!rq)) {
+ pr_err("scsiback: Get Request Error\n");
+ return;
+ }
+ }
rq->cmd_type = REQ_TYPE_BLOCK_PC;
rq->cmd_len = cmd_len;
@@ -460,14 +463,6 @@ void scsiback_cmd_exec(pending_req_t *pe
rq->timeout = timeout;
rq->end_io_data = pending_req;
- if (nr_segments) {
-
- if (request_map_sg(rq, pending_req, nr_segments)) {
- pr_err("scsiback: SG Request Map Error\n");
- return;
- }
- }
-
scsiback_get(pending_req->info);
blk_execute_rq_nowait(rq->q, NULL, rq, 1, scsiback_cmd_done);
--- head-2011-03-17.orig/drivers/xen/scsiback/xenbus.c 2011-01-31 17:56:27.000000000 +0100
+++ head-2011-03-17/drivers/xen/scsiback/xenbus.c 2011-02-01 14:50:44.000000000 +0100
@@ -226,7 +226,7 @@ static void scsiback_do_lun_hotplug(stru
static void scsiback_frontend_changed(struct xenbus_device *dev,
enum xenbus_state frontend_state)
{
- struct backend_info *be = dev->dev.driver_data;
+ struct backend_info *be = dev_get_drvdata(&dev->dev);
int err;
switch (frontend_state) {
@@ -283,7 +283,7 @@ static void scsiback_frontend_changed(st
static int scsiback_remove(struct xenbus_device *dev)
{
- struct backend_info *be = dev->dev.driver_data;
+ struct backend_info *be = dev_get_drvdata(&dev->dev);
if (be->info) {
scsiback_disconnect(be->info);
@@ -293,7 +293,7 @@ static int scsiback_remove(struct xenbus
}
kfree(be);
- dev->dev.driver_data = NULL;
+ dev_set_drvdata(&dev->dev, NULL);
return 0;
}
@@ -316,7 +316,7 @@ static int scsiback_probe(struct xenbus_
return -ENOMEM;
}
be->dev = dev;
- dev->dev.driver_data = be;
+ dev_set_drvdata(&dev->dev, be);
be->info = vscsibk_info_alloc(dev->otherend_id);
if (IS_ERR(be->info)) {
--- head-2011-03-17.orig/drivers/xen/scsifront/xenbus.c 2011-02-08 10:04:06.000000000 +0100
+++ head-2011-03-17/drivers/xen/scsifront/xenbus.c 2011-02-08 10:05:30.000000000 +0100
@@ -189,7 +189,7 @@ static int scsifront_probe(struct xenbus
info->host = host;
- dev->dev.driver_data = info;
+ dev_set_drvdata(&dev->dev, info);
info->dev = dev;
for (i = 0; i < VSCSIIF_MAX_REQS; i++) {
@@ -243,7 +243,7 @@ free_sring:
static int scsifront_remove(struct xenbus_device *dev)
{
- struct vscsifrnt_info *info = dev->dev.driver_data;
+ struct vscsifrnt_info *info = dev_get_drvdata(&dev->dev);
DPRINTK("%s: %s removed\n",__FUNCTION__ ,dev->nodename);
@@ -355,7 +355,7 @@ static void scsifront_do_lun_hotplug(str
static void scsifront_backend_changed(struct xenbus_device *dev,
enum xenbus_state backend_state)
{
- struct vscsifrnt_info *info = dev->dev.driver_data;
+ struct vscsifrnt_info *info = dev_get_drvdata(&dev->dev);
DPRINTK("%p %u %u\n", dev, dev->state, backend_state);
--- head-2011-03-17.orig/drivers/xen/sfc_netback/accel_xenbus.c 2011-01-31 17:29:16.000000000 +0100
+++ head-2011-03-17/drivers/xen/sfc_netback/accel_xenbus.c 2011-02-01 14:50:44.000000000 +0100
@@ -36,7 +36,7 @@
#define NODENAME_PATH_FMT "backend/vif/%d/%d"
#define NETBACK_ACCEL_FROM_XENBUS_DEVICE(_dev) (struct netback_accel *) \
- ((struct backend_info *)(_dev)->dev.driver_data)->netback_accel_priv
+ ((struct backend_info *)dev_get_drvdata(&(_dev)->dev))->netback_accel_priv
/* List of all the bends currently in existence. */
struct netback_accel *bend_list = NULL;
@@ -615,7 +615,7 @@ int netback_accel_probe(struct xenbus_de
mutex_lock(&bend->bend_mutex);
/* ...and store it where we can get at it */
- binfo = (struct backend_info *) dev->dev.driver_data;
+ binfo = dev_get_drvdata(&dev->dev);
binfo->netback_accel_priv = bend;
/* And vice-versa */
bend->hdev_data = dev;
@@ -729,7 +729,7 @@ int netback_accel_remove(struct xenbus_d
struct netback_accel *bend;
int frontend_state;
- binfo = (struct backend_info *) dev->dev.driver_data;
+ binfo = dev_get_drvdata(&dev->dev);
bend = (struct netback_accel *) binfo->netback_accel_priv;
DPRINTK("%s: dev %p bend %p\n", __FUNCTION__, dev, bend);
--- head-2011-03-17.orig/drivers/xen/sfc_netfront/accel_xenbus.c 2011-01-31 17:32:29.000000000 +0100
+++ head-2011-03-17/drivers/xen/sfc_netfront/accel_xenbus.c 2011-02-01 14:50:44.000000000 +0100
@@ -727,8 +727,7 @@ int netfront_accel_probe(struct net_devi
int netfront_accel_remove(struct xenbus_device *dev)
{
- struct netfront_info *np =
- (struct netfront_info *)dev->dev.driver_data;
+ struct netfront_info *np = dev_get_drvdata(&dev->dev);
netfront_accel_vnic *vnic = (netfront_accel_vnic *)np->accel_priv;
DPRINTK("%s %s\n", __FUNCTION__, dev->nodename);
--- head-2011-03-17.orig/drivers/xen/sys-hypervisor.c 2011-03-17 14:35:44.000000000 +0100
+++ head-2011-03-17/drivers/xen/sys-hypervisor.c 2011-02-01 14:50:44.000000000 +0100
@@ -20,6 +20,8 @@
#include <xen/interface/xen.h>
#include <xen/interface/version.h>
+#include "xenbus/xenbus_comms.h"
+
#define HYPERVISOR_ATTR_RO(_name) \
static struct hyp_sysfs_attr _name##_attr = __ATTR_RO(_name)
@@ -118,9 +120,8 @@ static ssize_t uuid_show(struct hyp_sysf
{
char *vm, *val;
int ret;
- extern int xenstored_ready;
- if (!xenstored_ready)
+ if (!is_xenstored_ready())
return -EBUSY;
vm = xenbus_read(XBT_NIL, "vm", "", NULL);
--- head-2011-03-17.orig/drivers/xen/tpmback/xenbus.c 2011-01-31 17:32:22.000000000 +0100
+++ head-2011-03-17/drivers/xen/tpmback/xenbus.c 2011-02-01 14:50:44.000000000 +0100
@@ -54,7 +54,7 @@ long int tpmback_get_instance(struct bac
static int tpmback_remove(struct xenbus_device *dev)
{
- struct backend_info *be = dev->dev.driver_data;
+ struct backend_info *be = dev_get_drvdata(&dev->dev);
if (!be) return 0;
@@ -70,7 +70,7 @@ static int tpmback_remove(struct xenbus_
be->tpmif = NULL;
}
kfree(be);
- dev->dev.driver_data = NULL;
+ dev_set_drvdata(&dev->dev, NULL);
return 0;
}
@@ -89,7 +89,7 @@ static int tpmback_probe(struct xenbus_d
be->is_instance_set = 0;
be->dev = dev;
- dev->dev.driver_data = be;
+ dev_set_drvdata(&dev->dev, be);
err = xenbus_watch_path2(dev, dev->nodename,
"instance", &be->backend_watch,
@@ -139,7 +139,7 @@ static void backend_changed(struct xenbu
static void frontend_changed(struct xenbus_device *dev,
enum xenbus_state frontend_state)
{
- struct backend_info *be = dev->dev.driver_data;
+ struct backend_info *be = dev_get_drvdata(&dev->dev);
int err;
switch (frontend_state) {
--- head-2011-03-17.orig/drivers/xen/usbback/usbback.h 2011-01-31 17:49:31.000000000 +0100
+++ head-2011-03-17/drivers/xen/usbback/usbback.h 2011-02-01 14:50:44.000000000 +0100
@@ -63,6 +63,12 @@
struct usbstub;
+#ifndef BUS_ID_SIZE
+#define USBBACK_BUS_ID_SIZE 20
+#else
+#define USBBACK_BUS_ID_SIZE BUS_ID_SIZE
+#endif
+
#define USB_DEV_ADDR_SIZE 128
typedef struct usbif_st {
@@ -110,7 +116,7 @@ typedef struct usbif_st {
struct vusb_port_id {
struct list_head id_list;
- char phys_bus[BUS_ID_SIZE];
+ char phys_bus[USBBACK_BUS_ID_SIZE];
domid_t domid;
unsigned int handle;
int portnum;
--- head-2011-03-17.orig/drivers/xen/usbback/usbstub.c 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/drivers/xen/usbback/usbstub.c 2011-02-01 14:50:44.000000000 +0100
@@ -56,7 +56,7 @@ struct vusb_port_id *find_portid_by_busi
spin_lock_irqsave(&port_list_lock, flags);
list_for_each_entry(portid, &port_list, id_list) {
- if (!(strncmp(portid->phys_bus, busid, BUS_ID_SIZE))) {
+ if (!(strncmp(portid->phys_bus, busid, USBBACK_BUS_ID_SIZE))) {
found = 1;
break;
}
@@ -110,7 +110,7 @@ int portid_add(const char *busid,
portid->handle = handle;
portid->portnum = portnum;
- strncpy(portid->phys_bus, busid, BUS_ID_SIZE);
+ strncpy(portid->phys_bus, busid, USBBACK_BUS_ID_SIZE);
spin_lock_irqsave(&port_list_lock, flags);
list_add(&portid->id_list, &port_list);
@@ -228,7 +228,7 @@ static int usbstub_probe(struct usb_inte
usbbk_hotplug_notify(usbif, portid->portnum, udev->speed);
} else {
/* maybe already called and connected by other intf */
- if (strncmp(stub->portid->phys_bus, busid, BUS_ID_SIZE))
+ if (strncmp(stub->portid->phys_bus, busid, USBBACK_BUS_ID_SIZE))
goto out; /* invalid call */
}
--- head-2011-03-17.orig/drivers/xen/usbback/xenbus.c 2011-01-31 17:56:27.000000000 +0100
+++ head-2011-03-17/drivers/xen/usbback/xenbus.c 2011-02-01 14:50:44.000000000 +0100
@@ -112,7 +112,7 @@ again:
*/
portid = find_portid(usbif->domid, usbif->handle, i);
if (portid) {
- if ((strncmp(portid->phys_bus, busid, BUS_ID_SIZE)))
+ if ((strncmp(portid->phys_bus, busid, USBBACK_BUS_ID_SIZE)))
xenbus_dev_fatal(dev, err,
"can't add port/%d, remove first", i);
else
@@ -142,7 +142,7 @@ abort:
static int usbback_remove(struct xenbus_device *dev)
{
- usbif_t *usbif = dev->dev.driver_data;
+ usbif_t *usbif = dev_get_drvdata(&dev->dev);
int i;
if (usbif->backend_watch.node) {
@@ -158,7 +158,7 @@ static int usbback_remove(struct xenbus_
usbif_disconnect(usbif);
usbif_free(usbif);;
}
- dev->dev.driver_data = NULL;
+ dev_set_drvdata(&dev->dev, NULL);
return 0;
}
@@ -182,7 +182,7 @@ static int usbback_probe(struct xenbus_d
return -ENOMEM;
}
usbif->xbdev = dev;
- dev->dev.driver_data = usbif;
+ dev_set_drvdata(&dev->dev, usbif);
err = xenbus_scanf(XBT_NIL, dev->nodename,
"num-ports", "%d", &num_ports);
@@ -260,7 +260,7 @@ static int connect_rings(usbif_t *usbif)
static void frontend_changed(struct xenbus_device *dev,
enum xenbus_state frontend_state)
{
- usbif_t *usbif = dev->dev.driver_data;
+ usbif_t *usbif = dev_get_drvdata(&dev->dev);
int err;
switch (frontend_state) {
--- head-2011-03-17.orig/drivers/xen/usbfront/xenbus.c 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/drivers/xen/usbfront/xenbus.c 2011-02-01 14:50:44.000000000 +0100
@@ -187,7 +187,7 @@ out:
static int connect(struct xenbus_device *dev)
{
- struct usbfront_info *info = dev->dev.driver_data;
+ struct usbfront_info *info = dev_get_drvdata(&dev->dev);
usbif_conn_request_t *req;
int i, idx, err;
@@ -299,7 +299,7 @@ static int usbfront_probe(struct xenbus_
}
info = hcd_to_info(hcd);
- dev->dev.driver_data = info;
+ dev_set_drvdata(&dev->dev, info);
err = usb_add_hcd(hcd, 0, 0);
if (err != 0) {
@@ -314,13 +314,13 @@ static int usbfront_probe(struct xenbus_
fail:
usb_put_hcd(hcd);
- dev->dev.driver_data = NULL;
+ dev_set_drvdata(&dev->dev, NULL);
return err;
}
static void usbfront_disconnect(struct xenbus_device *dev)
{
- struct usbfront_info *info = dev->dev.driver_data;
+ struct usbfront_info *info = dev_get_drvdata(&dev->dev);
struct usb_hcd *hcd = info_to_hcd(info);
usb_remove_hcd(hcd);
@@ -364,7 +364,7 @@ static void backend_changed(struct xenbu
static int usbfront_remove(struct xenbus_device *dev)
{
- struct usbfront_info *info = dev->dev.driver_data;
+ struct usbfront_info *info = dev_get_drvdata(&dev->dev);
struct usb_hcd *hcd = info_to_hcd(info);
destroy_rings(info);
--- head-2011-03-17.orig/drivers/xen/util.c 2011-01-31 17:56:27.000000000 +0100
+++ head-2011-03-17/drivers/xen/util.c 2011-02-01 14:50:44.000000000 +0100
@@ -1,20 +1,74 @@
#include <linux/err.h>
#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
#include <xen/driver_util.h>
-struct class *get_xen_class(void)
+static struct class *_get_xen_class(void)
{
static struct class *xen_class;
+ static DEFINE_MUTEX(xc_mutex);
- if (xen_class)
- return xen_class;
-
- xen_class = class_create(THIS_MODULE, "xen");
- if (IS_ERR(xen_class)) {
+ mutex_lock(&xc_mutex);
+ if (IS_ERR_OR_NULL(xen_class))
+ xen_class = class_create(THIS_MODULE, "xen");
+ mutex_unlock(&xc_mutex);
+ if (IS_ERR(xen_class))
pr_err("failed to create xen sysfs class\n");
- xen_class = NULL;
- }
return xen_class;
}
+
+struct class *get_xen_class(void)
+{
+ struct class *class = _get_xen_class();
+
+ return !IS_ERR(class) ? class : NULL;
+}
EXPORT_SYMBOL_GPL(get_xen_class);
+
+static void xcdev_release(struct device *dev)
+{
+ kfree(dev);
+}
+
+struct device *xen_class_device_create(struct device_type *type,
+ struct device *parent,
+ dev_t devt, void *drvdata,
+ const char *fmt, ...)
+{
+ struct device *dev;
+ int err;
+
+ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+ if (dev) {
+ va_list vargs;
+
+ va_start(vargs, fmt);
+ err = kobject_set_name_vargs(&dev->kobj, fmt, vargs);
+ va_end(vargs);
+ } else
+ err = -ENOMEM;
+
+ if (!err) {
+ dev->devt = devt;
+ dev->class = _get_xen_class();
+ if (IS_ERR(dev->class))
+ err = PTR_ERR(dev->class);
+ }
+
+ if (!err) {
+ dev->type = type;
+ dev->parent = parent;
+ dev_set_drvdata(dev, drvdata);
+ dev->release = xcdev_release;
+ err = device_register(dev);
+ if (!err)
+ return dev;
+ put_device(dev);
+ } else
+ kfree(dev);
+
+ return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(xen_class_device_create);
--- head-2011-03-17.orig/drivers/xen/xenbus/xenbus_probe.c 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/drivers/xen/xenbus/xenbus_probe.c 2011-02-01 14:50:44.000000000 +0100
@@ -92,6 +92,11 @@ static int xenbus_probe_frontend(const c
static void xenbus_dev_shutdown(struct device *_dev);
+#if !defined(CONFIG_XEN) && !defined(MODULE)
+static int xenbus_dev_suspend(struct device *dev, pm_message_t state);
+static int xenbus_dev_resume(struct device *dev);
+#endif
+
/* If something in array of ids matches this device, return it. */
static const struct xenbus_device_id *
match_device(const struct xenbus_device_id *arr, struct xenbus_device *dev)
@@ -228,6 +233,10 @@ static struct xen_bus_type xenbus_fronte
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,29)
.dev_attrs = xenbus_dev_attrs,
#endif
+#if !defined(CONFIG_XEN) && !defined(MODULE)
+ .suspend = xenbus_dev_suspend,
+ .resume = xenbus_dev_resume,
+#endif
},
#if defined(CONFIG_XEN) || defined(MODULE)
.dev = {
@@ -767,6 +776,9 @@ void xenbus_dev_changed(const char *node
kfree(root);
}
+#if !defined(CONFIG_XEN) && !defined(MODULE)
+EXPORT_SYMBOL_GPL(xenbus_dev_changed);
+#endif
static void frontend_changed(struct xenbus_watch *watch,
const char **vec, unsigned int len)
@@ -782,7 +794,11 @@ static struct xenbus_watch fe_watch = {
.callback = frontend_changed,
};
+#if !defined(CONFIG_XEN) && !defined(MODULE)
+static int xenbus_dev_suspend(struct device *dev, pm_message_t state)
+#else
static int suspend_dev(struct device *dev, void *data)
+#endif
{
int err = 0;
struct xenbus_driver *drv;
@@ -795,13 +811,18 @@ static int suspend_dev(struct device *de
drv = to_xenbus_driver(dev->driver);
xdev = container_of(dev, struct xenbus_device, dev);
if (drv->suspend)
+#if !defined(CONFIG_XEN) && !defined(MODULE)
+ err = drv->suspend(xdev, state);
+#else
err = drv->suspend(xdev);
+#endif
if (err)
pr_warning("xenbus: suspend %s failed: %i\n",
dev_name(dev), err);
return 0;
}
+#if defined(CONFIG_XEN) || defined(MODULE)
static int suspend_cancel_dev(struct device *dev, void *data)
{
int err = 0;
@@ -821,8 +842,13 @@ static int suspend_cancel_dev(struct dev
dev_name(dev), err);
return 0;
}
+#endif
+#if !defined(CONFIG_XEN) && !defined(MODULE)
+static int xenbus_dev_resume(struct device *dev)
+#else
static int resume_dev(struct device *dev, void *data)
+#endif
{
int err;
struct xenbus_driver *drv;
@@ -864,6 +890,7 @@ static int resume_dev(struct device *dev
return 0;
}
+#if defined(CONFIG_XEN) || defined(MODULE)
void xenbus_suspend(void)
{
DPRINTK("");
@@ -893,6 +920,7 @@ void xenbus_suspend_cancel(void)
xenbus_backend_resume(suspend_cancel_dev);
}
EXPORT_SYMBOL_GPL(xenbus_suspend_cancel);
+#endif
/* A flag to determine if xenstored is 'ready' (i.e. has started) */
atomic_t xenbus_xsd_state = ATOMIC_INIT(XENBUS_XSD_UNCOMMITTED);
@@ -995,13 +1023,6 @@ static int xsd_port_read(char *page, cha
#endif
#if defined(CONFIG_XEN_XENBUS_DEV) || defined(MODULE)
-static int xb_free_port(evtchn_port_t port)
-{
- struct evtchn_close close;
- close.port = port;
- return HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
-}
-
int xenbus_conn(domid_t remote_dom, unsigned long *grant_ref, evtchn_port_t *local_port)
{
struct evtchn_alloc_unbound alloc_unbound;
@@ -1015,7 +1036,7 @@ int xenbus_conn(domid_t remote_dom, unsi
remove_xen_proc_entry("xsd_port");
#endif
- rc = xb_free_port(xen_store_evtchn);
+ rc = close_evtchn(xen_store_evtchn);
if (rc != 0)
goto fail0;
@@ -1041,7 +1062,7 @@ int xenbus_conn(domid_t remote_dom, unsi
return 0;
fail1:
- rc2 = xb_free_port(xen_store_evtchn);
+ rc2 = close_evtchn(xen_store_evtchn);
if (rc2 != 0)
pr_warning("XENBUS: Error freeing xenstore event channel:"
" %d\n", rc2);
--- head-2011-03-17.orig/drivers/xen/xenbus/xenbus_xs.c 2011-02-01 14:42:26.000000000 +0100
+++ head-2011-03-17/drivers/xen/xenbus/xenbus_xs.c 2011-02-01 14:50:44.000000000 +0100
@@ -718,6 +718,10 @@ void xs_resume(void)
struct xenbus_watch *watch;
char token[sizeof(watch) * 2 + 1];
+#if !defined(CONFIG_XEN) && !defined(MODULE)
+ xb_init_comms();
+#endif
+
mutex_unlock(&xs_state.response_mutex);
mutex_unlock(&xs_state.request_mutex);
transaction_resume();
--- head-2011-03-17.orig/include/Kbuild 2011-02-01 14:38:38.000000000 +0100
+++ head-2011-03-17/include/Kbuild 2011-02-01 14:50:44.000000000 +0100
@@ -8,6 +8,5 @@ header-y += mtd/
header-y += rdma/
header-y += video/
header-y += drm/
-header-y += xen/public/
header-y += xen/
header-y += scsi/
--- head-2011-03-17.orig/include/xen/Kbuild 2011-01-31 14:31:28.000000000 +0100
+++ head-2011-03-17/include/xen/Kbuild 2011-02-01 14:50:44.000000000 +0100
@@ -1,3 +1,2 @@
-header-y += evtchn.h
header-y += privcmd.h
header-y += public/
--- head-2011-03-17.orig/include/xen/driver_util.h 2011-01-31 17:49:31.000000000 +0100
+++ head-2011-03-17/include/xen/driver_util.h 2011-02-01 14:50:44.000000000 +0100
@@ -1,8 +1,14 @@
#ifndef __XEN_DRIVER_UTIL_H__
#define __XEN_DRIVER_UTIL_H__
+#include <linux/compiler.h>
#include <linux/device.h>
extern struct class *get_xen_class(void);
+extern struct device *xen_class_device_create(struct device_type *,
+ struct device *parent,
+ dev_t devt, void *drvdata,
+ const char *fmt, ...)
+ __printf(5, 6);
#endif /* __XEN_DRIVER_UTIL_H__ */
--- head-2011-03-17.orig/include/xen/evtchn.h 2011-02-01 14:42:26.000000000 +0100
+++ head-2011-03-17/include/xen/evtchn.h 2011-02-01 14:50:44.000000000 +0100
@@ -113,9 +113,6 @@ void irq_resume(void);
/* Entry point for notifications into Linux subsystems. */
asmlinkage void evtchn_do_upcall(struct pt_regs *regs);
-/* Entry point for notifications into the userland character device. */
-void evtchn_device_upcall(int port);
-
/* Mark a PIRQ as unavailable for dynamic allocation. */
void evtchn_register_pirq(int irq);
/* Map a Xen-supplied PIRQ to a dynamically allocated one. */
@@ -126,6 +123,7 @@ int evtchn_get_xen_pirq(int irq);
void mask_evtchn(int port);
void disable_all_local_evtchn(void);
void unmask_evtchn(int port);
+unsigned int irq_from_evtchn(unsigned int port);
#ifdef CONFIG_SMP
void rebind_evtchn_to_cpu(int port, unsigned int cpu);
@@ -163,6 +161,12 @@ static inline void notify_remote_via_evt
VOID(HYPERVISOR_event_channel_op(EVTCHNOP_send, &send));
}
+static inline int close_evtchn(int port)
+{
+ struct evtchn_close close = { .port = port };
+ return HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
+}
+
/*
* Use these to access the event channel underlying the IRQ handle returned
* by bind_*_to_irqhandler().
--- head-2011-03-17.orig/include/xen/xenbus.h 2011-02-02 16:58:42.000000000 +0100
+++ head-2011-03-17/include/xen/xenbus.h 2011-02-02 16:59:07.000000000 +0100
@@ -104,8 +104,12 @@ struct xenbus_driver {
void (*otherend_changed)(struct xenbus_device *dev,
enum xenbus_state backend_state);
int (*remove)(struct xenbus_device *dev);
+#if !defined(CONFIG_XEN) && !defined(HAVE_XEN_PLATFORM_COMPAT_H)
+ int (*suspend)(struct xenbus_device *dev, pm_message_t state);
+#else
int (*suspend)(struct xenbus_device *dev);
int (*suspend_cancel)(struct xenbus_device *dev);
+#endif
int (*resume)(struct xenbus_device *dev);
int (*uevent)(struct xenbus_device *, struct kobj_uevent_env *);
struct device_driver driver;
--- head-2011-03-17.orig/lib/swiotlb-xen.c 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/lib/swiotlb-xen.c 2011-02-01 14:50:44.000000000 +0100
@@ -47,8 +47,8 @@ int swiotlb;
int swiotlb_force;
/*
- * Used to do a quick range check in swiotlb_unmap_single and
- * swiotlb_sync_single_*, to see if the memory was in fact allocated by this
+ * Used to do a quick range check in unmap_single and
+ * sync_single_*, to see if the memory was in fact allocated by this
* API.
*/
static char *io_tlb_start, *io_tlb_end;
@@ -167,7 +167,7 @@ dma_addr_t swiotlb_phys_to_bus(struct de
return phys_to_machine(paddr);
}
-phys_addr_t swiotlb_bus_to_phys(dma_addr_t baddr)
+phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr)
{
return machine_to_phys(baddr);
}
@@ -178,9 +178,15 @@ static dma_addr_t swiotlb_virt_to_bus(st
return swiotlb_phys_to_bus(hwdev, virt_to_phys(address));
}
-static void *swiotlb_bus_to_virt(dma_addr_t address)
+void * __weak swiotlb_bus_to_virt(struct device *hwdev, dma_addr_t address)
{
- return phys_to_virt(swiotlb_bus_to_phys(address));
+ return phys_to_virt(swiotlb_bus_to_phys(hwdev, address));
+}
+
+int __weak swiotlb_arch_address_needs_mapping(struct device *hwdev,
+ dma_addr_t addr, size_t size)
+{
+ return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size);
}
int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size)
@@ -315,7 +321,7 @@ static void swiotlb_bounce(phys_addr_t p
unsigned long flags;
while (size) {
- sz = min((size_t)(PAGE_SIZE - offset), size);
+ sz = min_t(size_t, PAGE_SIZE - offset, size);
local_irq_save(flags);
buffer = kmap_atomic(pfn_to_page(pfn),
@@ -449,7 +455,7 @@ found:
* dma_addr is the kernel virtual address of the bounce buffer to unmap.
*/
static void
-unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
+do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
{
unsigned long flags;
int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
@@ -544,7 +550,7 @@ swiotlb_full(struct device *dev, size_t
* PCI address to use is returned.
*
* Once the device is given the dma address, the device owns this memory until
- * either swiotlb_unmap_single or swiotlb_dma_sync_single is performed.
+ * either swiotlb_unmap_page or swiotlb_dma_sync_single is performed.
*/
dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size,
@@ -558,7 +564,7 @@ dma_addr_t swiotlb_map_page(struct devic
BUG_ON(dir == DMA_NONE);
/*
- * If the pointer passed in happens to be in the device's DMA window,
+ * If the address happens to be in the device's DMA window,
* we can safely return the device addr and not worry about bounce
* buffering it.
*/
@@ -583,23 +589,32 @@ EXPORT_SYMBOL_GPL(swiotlb_map_page);
/*
* Unmap a single streaming mode DMA translation. The dma_addr and size must
- * match what was provided for in a previous swiotlb_map_single call. All
+ * match what was provided for in a previous swiotlb_map_page call. All
* other usages are undefined.
*
* After this call, reads by the cpu to the buffer are guaranteed to see
* whatever the device wrote there.
*/
+static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
+ size_t size, int dir)
+{
+ char *dma_addr = swiotlb_bus_to_virt(hwdev, dev_addr);
+
+ BUG_ON(dir == DMA_NONE);
+
+ if (is_swiotlb_buffer(dev_addr)) {
+ do_unmap_single(hwdev, dma_addr, size, dir);
+ return;
+ }
+
+ gnttab_dma_unmap_page(dev_addr);
+}
+
void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
size_t size, enum dma_data_direction dir,
struct dma_attrs *attrs)
{
- char *dma_addr = swiotlb_bus_to_virt(dev_addr);
-
- BUG_ON(dir == DMA_NONE);
- if (is_swiotlb_buffer(dev_addr))
- unmap_single(hwdev, dma_addr, size, dir);
- else
- gnttab_dma_unmap_page(dev_addr);
+ unmap_single(hwdev, dev_addr, size, dir);
}
EXPORT_SYMBOL_GPL(swiotlb_unmap_page);
@@ -607,7 +622,7 @@ EXPORT_SYMBOL_GPL(swiotlb_unmap_page);
* Make physical memory consistent for a single streaming mode DMA translation
* after a transfer.
*
- * If you perform a swiotlb_map_single() but wish to interrogate the buffer
+ * If you perform a swiotlb_map_page() but wish to interrogate the buffer
* using the cpu, yet do not wish to teardown the PCI dma mapping, you must
* call this function before doing so. At the next point you give the PCI dma
* address back to the card, you must first perform a
@@ -617,9 +632,10 @@ static void
swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
size_t size, int dir, int target)
{
- char *dma_addr = swiotlb_bus_to_virt(dev_addr);
+ char *dma_addr = swiotlb_bus_to_virt(hwdev, dev_addr);
BUG_ON(dir == DMA_NONE);
+
if (is_swiotlb_buffer(dev_addr))
sync_single(hwdev, dma_addr, size, dir, target);
}
@@ -648,11 +664,7 @@ swiotlb_sync_single_range(struct device
unsigned long offset, size_t size,
int dir, int target)
{
- char *dma_addr = swiotlb_bus_to_virt(dev_addr);
-
- BUG_ON(dir == DMA_NONE);
- if (is_swiotlb_buffer(dev_addr))
- sync_single(hwdev, dma_addr + offset, size, dir, target);
+ swiotlb_sync_single(hwdev, dev_addr + offset, size, dir, target);
}
void
@@ -677,7 +689,7 @@ EXPORT_SYMBOL_GPL(swiotlb_sync_single_ra
/*
* Map a set of buffers described by scatterlist in streaming mode for DMA.
- * This is the scatter-gather version of the above swiotlb_map_single
+ * This is the scatter-gather version of the above swiotlb_map_page
* interface. Here the scatter gather list elements are each tagged with the
* appropriate dma address and length. They are obtained via
* sg_dma_{address,length}(SG).
@@ -688,7 +700,7 @@ EXPORT_SYMBOL_GPL(swiotlb_sync_single_ra
* The routine returns the number of addr/length pairs actually
* used, at most nents.
*
- * Device ownership issues as mentioned above for swiotlb_map_single are the
+ * Device ownership issues as mentioned above for swiotlb_map_page are the
* same here.
*/
int
@@ -741,7 +753,7 @@ EXPORT_SYMBOL(swiotlb_map_sg);
/*
* Unmap a set of streaming mode DMA translations. Again, cpu read rules
- * concerning calls here are the same as for swiotlb_unmap_single() above.
+ * concerning calls here are the same as for swiotlb_unmap_page() above.
*/
void
swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
@@ -752,13 +764,9 @@ swiotlb_unmap_sg_attrs(struct device *hw
BUG_ON(dir == DMA_NONE);
- for_each_sg(sgl, sg, nelems, i) {
- if (sg->dma_address != sg_phys(sg))
- unmap_single(hwdev, swiotlb_bus_to_virt(sg->dma_address),
- sg->dma_length, dir);
- else
- gnttab_dma_unmap_page(sg->dma_address);
- }
+ for_each_sg(sgl, sg, nelems, i)
+ unmap_single(hwdev, sg->dma_address, sg->dma_length, dir);
+
}
EXPORT_SYMBOL(swiotlb_unmap_sg_attrs);
@@ -784,13 +792,9 @@ swiotlb_sync_sg(struct device *hwdev, st
struct scatterlist *sg;
int i;
- BUG_ON(dir == DMA_NONE);
-
- for_each_sg(sgl, sg, nelems, i) {
- if (sg->dma_address != sg_phys(sg))
- sync_single(hwdev, swiotlb_bus_to_virt(sg->dma_address),
+ for_each_sg(sgl, sg, nelems, i)
+ swiotlb_sync_single(hwdev, sg->dma_address,
sg->dma_length, dir, target);
- }
}
void
--- head-2011-03-17.orig/mm/init-mm.c 2011-03-17 14:35:44.000000000 +0100
+++ head-2011-03-17/mm/init-mm.c 2011-02-01 14:50:44.000000000 +0100
@@ -13,6 +13,10 @@
#define INIT_MM_CONTEXT(name)
#endif
+#ifdef CONFIG_X86_XEN
+#define swapper_pg_dir ((pgd_t *)NULL)
+#endif
+
struct mm_struct init_mm = {
.mm_rb = RB_ROOT,
.pgd = swapper_pg_dir,
--- head-2011-03-17.orig/mm/memory.c 2011-01-31 18:01:51.000000000 +0100
+++ head-2011-03-17/mm/memory.c 2011-02-01 14:50:44.000000000 +0100
@@ -1522,7 +1522,7 @@ int __get_user_pages(struct task_struct
vmas[i] = vma;
i++;
start += PAGE_SIZE;
- len--;
+ nr_pages--;
continue;
}
}
--- head-2011-03-17.orig/mm/page_alloc.c 2011-02-08 10:05:20.000000000 +0100
+++ head-2011-03-17/mm/page_alloc.c 2011-02-01 14:50:44.000000000 +0100
@@ -649,6 +649,7 @@ static bool free_pages_prepare(struct pa
#ifdef CONFIG_XEN
if (PageForeign(page)) {
+ WARN_ON(wasMlocked);
PageForeignDestructor(page, order);
return;
}