You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
qubes-linux-kernel/patches.xen/xen3-patch-2.6.37

7444 lines
212 KiB

From: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>
Subject: Linux: 2.6.37
Patch-mainline: 2.6.37
This patch contains the differences between 2.6.36 and 2.6.37.
Acked-by: Jeff Mahoney <jeffm@suse.com>
Automatically created from "patches.kernel.org/patch-2.6.37" by xen-port-patches.py
--- head-2011-03-17.orig/arch/x86/Kconfig 2011-02-17 13:43:12.000000000 +0100
+++ head-2011-03-17/arch/x86/Kconfig 2011-02-01 15:09:47.000000000 +0100
@@ -1782,7 +1782,6 @@ config USE_PERCPU_NUMA_NODE_ID
depends on NUMA
menu "Power management and ACPI options"
- depends on !XEN_UNPRIVILEGED_GUEST
config ARCH_HIBERNATION_HEADER
def_bool y
@@ -1790,6 +1789,8 @@ config ARCH_HIBERNATION_HEADER
source "kernel/power/Kconfig"
+if !XEN_UNPRIVILEGED_GUEST
+
source "drivers/acpi/Kconfig"
source "drivers/sfi/Kconfig"
@@ -1925,6 +1926,8 @@ source "drivers/cpuidle/Kconfig"
source "drivers/idle/Kconfig"
+endif # !XEN_UNPRIVILEGED_GUEST
+
endmenu
@@ -2005,7 +2008,7 @@ config PCI_OLPC
config PCI_XEN
def_bool y
- depends on PCI && XEN
+ depends on PCI && PARAVIRT_XEN
select SWIOTLB_XEN
config PCI_DOMAINS
@@ -2030,21 +2033,6 @@ config PCI_CNB20LE_QUIRK
You should say N unless you know you need this.
-config XEN_PCIDEV_FRONTEND
- def_bool y
- prompt "Xen PCI Frontend" if X86_64
- depends on PCI && XEN && (PCI_GOXEN_FE || PCI_GOANY || X86_64)
- select HOTPLUG
- help
- The PCI device frontend driver allows the kernel to import arbitrary
- PCI devices from a PCI backend to support PCI driver domains.
-
-config XEN_PCIDEV_FE_DEBUG
- bool "Xen PCI Frontend Debugging"
- depends on XEN_PCIDEV_FRONTEND
- help
- Enables some debug statements within the PCI Frontend.
-
config DMAR
bool "Support for DMA Remapping Devices (EXPERIMENTAL)"
depends on PCI_MSI && ACPI && !XEN && EXPERIMENTAL
--- head-2011-03-17.orig/arch/x86/include/asm/hw_irq.h 2011-02-01 14:55:46.000000000 +0100
+++ head-2011-03-17/arch/x86/include/asm/hw_irq.h 2011-02-01 15:09:47.000000000 +0100
@@ -78,6 +78,7 @@ static inline void set_io_apic_irq_attr(
irq_attr->polarity = polarity;
}
+#ifndef CONFIG_XEN
struct irq_2_iommu {
struct intel_iommu *iommu;
u16 irte_index;
@@ -85,7 +86,6 @@ struct irq_2_iommu {
u8 irte_mask;
};
-#ifndef CONFIG_XEN
/*
* This is performance-critical, we want to do it O(1)
*
@@ -147,6 +147,7 @@ extern irqreturn_t smp_reschedule_interr
extern irqreturn_t smp_call_function_interrupt(int, void *);
extern irqreturn_t smp_call_function_single_interrupt(int, void *);
extern irqreturn_t smp_reboot_interrupt(int, void *);
+extern irqreturn_t smp_irq_work_interrupt(int, void *);
#endif
#endif
--- head-2011-03-17.orig/arch/x86/include/asm/io.h 2011-03-17 14:35:43.000000000 +0100
+++ head-2011-03-17/arch/x86/include/asm/io.h 2011-02-01 15:09:47.000000000 +0100
@@ -353,7 +353,7 @@ extern void early_iounmap(void __iomem *
extern void fixup_early_ioremap(void);
extern bool is_early_ioremap_ptep(pte_t *ptep);
-#ifdef CONFIG_XEN
+#ifdef CONFIG_PARAVIRT_XEN
struct bio_vec;
extern bool xen_biovec_phys_mergeable(const struct bio_vec *vec1,
@@ -362,7 +362,7 @@ extern bool xen_biovec_phys_mergeable(co
#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \
(__BIOVEC_PHYS_MERGEABLE(vec1, vec2) && \
(!xen_domain() || xen_biovec_phys_mergeable(vec1, vec2)))
-#endif /* CONFIG_XEN */
+#endif /* CONFIG_PARAVIRT_XEN */
#define IO_SPACE_LIMIT 0xffff
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/fixmap.h 2011-02-01 15:03:03.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/fixmap.h 2011-02-01 15:09:47.000000000 +0100
@@ -217,5 +217,20 @@ static inline unsigned long virt_to_fix(
BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
return __virt_to_fix(vaddr);
}
+
+/* Return an pointer with offset calculated */
+static __always_inline unsigned long
+__set_fixmap_offset(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags)
+{
+ __set_fixmap(idx, phys, flags);
+ return fix_to_virt(idx) + (phys & (PAGE_SIZE - 1));
+}
+
+#define set_fixmap_offset(idx, phys) \
+ __set_fixmap_offset(idx, phys, PAGE_KERNEL)
+
+#define set_fixmap_offset_nocache(idx, phys) \
+ __set_fixmap_offset(idx, phys, PAGE_KERNEL_NOCACHE)
+
#endif /* !__ASSEMBLY__ */
#endif /* _ASM_X86_FIXMAP_H */
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/highmem.h 2011-02-01 15:04:27.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/highmem.h 2011-02-01 15:09:47.000000000 +0100
@@ -58,15 +58,16 @@ extern void kunmap_high(struct page *pag
void *kmap(struct page *page);
void kunmap(struct page *page);
-void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot);
-void *kmap_atomic(struct page *page, enum km_type type);
-void kunmap_atomic_notypecheck(void *kvaddr, enum km_type type);
-void *kmap_atomic_pfn(unsigned long pfn, enum km_type type);
-void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot);
+
+void *kmap_atomic_prot(struct page *page, pgprot_t prot);
+void *__kmap_atomic(struct page *page);
+void __kunmap_atomic(void *kvaddr);
+void *kmap_atomic_pfn(unsigned long pfn);
+void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot);
struct page *kmap_atomic_to_page(void *ptr);
-#define kmap_atomic_pte(page, type) \
- kmap_atomic_prot(page, type, \
+#define kmap_atomic_pte(page) \
+ kmap_atomic_prot(page, \
PagePinned(page) ? PAGE_KERNEL_RO : kmap_prot)
#define flush_cache_kmaps() do { } while (0)
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/io.h 2011-02-01 15:03:03.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/io.h 2011-02-01 15:09:47.000000000 +0100
@@ -212,6 +212,7 @@ static inline void __iomem *ioremap(reso
extern void iounmap(volatile void __iomem *addr);
+extern void set_iounmap_nonlazy(void);
#ifdef __KERNEL__
@@ -353,6 +354,7 @@ extern void __iomem *early_memremap(reso
unsigned long size);
extern void early_iounmap(void __iomem *addr, unsigned long size);
extern void fixup_early_ioremap(void);
+extern bool is_early_ioremap_ptep(pte_t *ptep);
#define IO_SPACE_LIMIT 0xffff
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/irq_vectors.h 2011-02-15 17:49:16.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/irq_vectors.h 2011-02-15 17:50:13.000000000 +0100
@@ -13,7 +13,12 @@
#define NMI_VECTOR 0x02
#define CALL_FUNC_SINGLE_VECTOR 3
#define REBOOT_VECTOR 4
+#ifdef CONFIG_IRQ_WORK
+#define IRQ_WORK_VECTOR 5
+#define NR_IPIS 6
+#else
#define NR_IPIS 5
+#endif
/*
* The maximum number of vectors supported by i386 processors
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/irqflags.h 2011-02-01 14:54:13.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/irqflags.h 2011-02-01 15:09:47.000000000 +0100
@@ -47,19 +47,19 @@ void xen_safe_halt(void);
void xen_halt(void);
-#define __raw_local_save_flags() xen_save_fl()
+#define arch_local_save_flags() xen_save_fl()
-#define raw_local_irq_restore(flags) xen_restore_fl(flags)
+#define arch_local_irq_restore(flags) xen_restore_fl(flags)
-#define raw_local_irq_disable() xen_irq_disable()
+#define arch_local_irq_disable() xen_irq_disable()
-#define raw_local_irq_enable() xen_irq_enable()
+#define arch_local_irq_enable() xen_irq_enable()
/*
* Used in the idle loop; sti takes one instruction cycle
* to complete:
*/
-static inline void raw_safe_halt(void)
+static inline void arch_safe_halt(void)
{
xen_safe_halt();
}
@@ -76,11 +76,11 @@ static inline void halt(void)
/*
* For spinlocks, etc:
*/
-#define __raw_local_irq_save() \
+#define arch_local_irq_save() \
({ \
- unsigned long flags = __raw_local_save_flags(); \
+ unsigned long flags = arch_local_save_flags(); \
\
- raw_local_irq_disable(); \
+ arch_local_irq_disable(); \
\
flags; \
})
@@ -140,22 +140,16 @@ sysexit_ecrit: /**** END OF SYSEXIT CRIT
#endif /* __ASSEMBLY__ */
#ifndef __ASSEMBLY__
-#define raw_local_save_flags(flags) \
- do { (flags) = __raw_local_save_flags(); } while (0)
-
-#define raw_local_irq_save(flags) \
- do { (flags) = __raw_local_irq_save(); } while (0)
-
-static inline int raw_irqs_disabled_flags(unsigned long flags)
+static inline int arch_irqs_disabled_flags(unsigned long flags)
{
return (flags != 0);
}
-#define raw_irqs_disabled() \
+#define arch_irqs_disabled() \
({ \
- unsigned long flags = __raw_local_save_flags(); \
+ unsigned long flags = arch_local_save_flags(); \
\
- raw_irqs_disabled_flags(flags); \
+ arch_irqs_disabled_flags(flags); \
})
#else
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/pci.h 2011-02-01 15:04:27.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/pci.h 2011-02-01 15:09:47.000000000 +0100
@@ -7,6 +7,7 @@
#include <linux/string.h>
#include <asm/scatterlist.h>
#include <asm/io.h>
+#include <asm/x86_init.h>
#ifdef __KERNEL__
@@ -100,9 +101,36 @@ static inline void early_quirks(void) {
extern void pci_iommu_alloc(void);
-/* MSI arch hooks */
-#define arch_setup_msi_irqs arch_setup_msi_irqs
-#define arch_teardown_msi_irqs arch_teardown_msi_irqs
+#if defined(CONFIG_PCI_MSI) && !defined(CONFIG_XEN)
+/* MSI arch specific hooks */
+static inline int x86_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+ return x86_msi.setup_msi_irqs(dev, nvec, type);
+}
+
+static inline void x86_teardown_msi_irqs(struct pci_dev *dev)
+{
+ x86_msi.teardown_msi_irqs(dev);
+}
+
+static inline void x86_teardown_msi_irq(unsigned int irq)
+{
+ x86_msi.teardown_msi_irq(irq);
+}
+#define arch_setup_msi_irqs x86_setup_msi_irqs
+#define arch_teardown_msi_irqs x86_teardown_msi_irqs
+#define arch_teardown_msi_irq x86_teardown_msi_irq
+/* implemented in arch/x86/kernel/apic/io_apic. */
+int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
+void native_teardown_msi_irq(unsigned int irq);
+/* default to the implementation in drivers/lib/msi.c */
+#define HAVE_DEFAULT_MSI_TEARDOWN_IRQS
+void default_teardown_msi_irqs(struct pci_dev *dev);
+#else
+#define native_setup_msi_irqs NULL
+#define native_teardown_msi_irq NULL
+#define default_teardown_msi_irqs NULL
+#endif
#define PCI_DMA_BUS_IS_PHYS 0
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/pgtable.h 2011-02-01 14:55:46.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/pgtable.h 2011-02-01 15:09:47.000000000 +0100
@@ -28,6 +28,8 @@ extern unsigned long empty_zero_page[PAG
extern spinlock_t pgd_lock;
extern struct list_head pgd_list;
+extern struct mm_struct *pgd_page_get_mm(struct page *page);
+
#define set_pte(ptep, pte) xen_set_pte(ptep, pte)
#define set_pte_at(mm, addr, ptep, pte) xen_set_pte_at(mm, addr, ptep, pte)
@@ -637,6 +639,8 @@ static inline void ptep_set_wrprotect(st
set_pte_at(mm, addr, ptep, pte_wrprotect(pte));
}
+#define flush_tlb_fix_spurious_fault(vma, address)
+
/*
* clone_pgd_range(pgd_t *dst, pgd_t *src, int count);
*
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/pgtable_32.h 2011-02-01 15:04:27.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/pgtable_32.h 2011-02-01 15:09:47.000000000 +0100
@@ -25,7 +25,7 @@
struct vm_area_struct;
extern pgd_t *swapper_pg_dir;
-extern pgd_t trampoline_pg_dir[1024];
+extern pgd_t initial_page_table[1024];
static inline void pgtable_cache_init(void) { }
static inline void check_pgt_cache(void) { }
@@ -48,24 +48,14 @@ extern void set_pmd_pfn(unsigned long, u
#endif
#if defined(CONFIG_HIGHPTE)
-#define __KM_PTE \
- (in_nmi() ? KM_NMI_PTE : \
- in_irq() ? KM_IRQ_PTE : \
- KM_PTE0)
#define pte_offset_map(dir, address) \
- ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), __KM_PTE) + \
+ ((pte_t *)kmap_atomic_pte(pmd_page(*(dir))) + \
pte_index((address)))
-#define pte_offset_map_nested(dir, address) \
- ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE1) + \
- pte_index((address)))
-#define pte_unmap(pte) kunmap_atomic((pte), __KM_PTE)
-#define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1)
+#define pte_unmap(pte) kunmap_atomic((pte))
#else
#define pte_offset_map(dir, address) \
((pte_t *)page_address(pmd_page(*(dir))) + pte_index((address)))
-#define pte_offset_map_nested(dir, address) pte_offset_map((dir), (address))
#define pte_unmap(pte) do { } while (0)
-#define pte_unmap_nested(pte) do { } while (0)
#endif
/* Clear a kernel PTE and flush it from the TLB */
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/pgtable_64.h 2011-02-01 15:04:27.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/pgtable_64.h 2011-02-01 15:09:47.000000000 +0100
@@ -109,6 +109,8 @@ static inline void xen_pgd_clear(pgd_t *
#define __pte_mfn(_pte) (((_pte).pte & PTE_PFN_MASK) >> PAGE_SHIFT)
+extern void sync_global_pgds(unsigned long start, unsigned long end);
+
/*
* Conversion functions: convert a page and protection to a page entry,
* and a page entry and page directory to the page they refer to.
@@ -132,9 +134,7 @@ static inline int pgd_large(pgd_t pgd) {
/* x86-64 always has all page tables mapped. */
#define pte_offset_map(dir, address) pte_offset_kernel((dir), (address))
-#define pte_offset_map_nested(dir, address) pte_offset_kernel((dir), (address))
#define pte_unmap(pte) ((void)(pte))/* NOP */
-#define pte_unmap_nested(pte) ((void)(pte)) /* NOP */
#define update_mmu_cache(vma, address, ptep) do { } while (0)
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/processor.h 2011-03-03 16:47:27.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/processor.h 2011-03-03 16:47:48.000000000 +0100
@@ -120,6 +120,8 @@ struct cpuinfo_x86 {
u16 phys_proc_id;
/* Core id: */
u16 cpu_core_id;
+ /* Compute unit id */
+ u8 compute_unit_id;
#endif
#ifdef CONFIG_SMP
/* Index into per_cpu list: */
@@ -556,7 +558,7 @@ extern unsigned long mmu_cr4_features;
static inline void set_in_cr4(unsigned long mask)
{
- unsigned cr4;
+ unsigned long cr4;
mmu_cr4_features |= mask;
cr4 = read_cr4();
@@ -566,7 +568,7 @@ static inline void set_in_cr4(unsigned l
static inline void clear_in_cr4(unsigned long mask)
{
- unsigned cr4;
+ unsigned long cr4;
mmu_cr4_features &= ~mask;
cr4 = read_cr4();
@@ -718,31 +720,6 @@ extern unsigned long idle_halt;
extern unsigned long idle_nomwait;
extern bool c1e_detected;
-#ifndef CONFIG_XEN
-/*
- * on systems with caches, caches must be flashed as the absolute
- * last instruction before going into a suspended halt. Otherwise,
- * dirty data can linger in the cache and become stale on resume,
- * leading to strange errors.
- *
- * perform a variety of operations to guarantee that the compiler
- * will not reorder instructions. wbinvd itself is serializing
- * so the processor will not reorder.
- *
- * Systems without cache can just go into halt.
- */
-static inline void wbinvd_halt(void)
-{
- mb();
- /* check for clflush to determine if wbinvd is legal */
- if (cpu_has_clflush)
- asm volatile("cli; wbinvd; 1: hlt; jmp 1b" : : : "memory");
- else
- while (1)
- halt();
-}
-#endif
-
extern void enable_sep_cpu(void);
extern int sysenter_setup(void);
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/smp.h 2011-03-03 16:10:16.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/smp.h 2011-03-03 16:12:15.000000000 +0100
@@ -57,7 +57,7 @@ struct smp_ops {
void (*smp_prepare_cpus)(unsigned max_cpus);
void (*smp_cpus_done)(unsigned max_cpus);
- void (*smp_send_stop)(void);
+ void (*stop_other_cpus)(int wait);
void (*smp_send_reschedule)(int cpu);
int (*cpu_up)(unsigned cpu);
@@ -76,7 +76,12 @@ extern struct smp_ops smp_ops;
static inline void smp_send_stop(void)
{
- smp_ops.smp_send_stop();
+ smp_ops.stop_other_cpus(0);
+}
+
+static inline void stop_other_cpus(void)
+{
+ smp_ops.stop_other_cpus(1);
}
static inline void smp_prepare_boot_cpu(void)
@@ -148,12 +153,16 @@ void smp_store_cpu_info(int id);
extern int __cpu_disable(void);
extern void __cpu_die(unsigned int cpu);
-void xen_smp_send_stop(void);
+void xen_stop_other_cpus(int wait);
void xen_smp_send_reschedule(int cpu);
void xen_send_call_func_ipi(const struct cpumask *mask);
void xen_send_call_func_single_ipi(int cpu);
-#define smp_send_stop xen_smp_send_stop
+static inline void smp_send_stop(void)
+{
+ xen_stop_other_cpus(0);
+}
+
#define smp_send_reschedule xen_smp_send_reschedule
#define arch_send_call_function_single_ipi xen_send_call_func_single_ipi
#define arch_send_call_function_ipi_mask xen_send_call_func_ipi
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/spinlock.h 2011-02-01 14:55:46.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/spinlock.h 2011-02-01 15:09:47.000000000 +0100
@@ -200,16 +200,16 @@ static inline int __ticket_spin_is_conte
static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock)
{
unsigned int token, count;
- unsigned int flags = __raw_local_irq_save();
+ unsigned int flags = arch_local_irq_save();
bool free;
__ticket_spin_lock_preamble;
if (likely(free)) {
- raw_local_irq_restore(flags);
+ arch_local_irq_restore(flags);
return;
}
token = xen_spin_adjust(lock, token);
- raw_local_irq_restore(flags);
+ arch_local_irq_restore(flags);
do {
count = 1 << 10;
__ticket_spin_lock_body;
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/swiotlb.h 2011-02-01 14:55:46.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/swiotlb.h 2011-02-01 15:09:47.000000000 +0100
@@ -1,6 +1,4 @@
#include_next <asm/swiotlb.h>
-#define pci_swiotlb_detect() 1
-
dma_addr_t swiotlb_map_single_phys(struct device *, phys_addr_t, size_t size,
int dir);
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/tlbflush.h 2011-02-01 14:54:13.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/tlbflush.h 2011-02-01 15:09:47.000000000 +0100
@@ -111,6 +111,4 @@ static inline void flush_tlb_kernel_rang
flush_tlb_all();
}
-extern void zap_low_mappings(bool early);
-
#endif /* _ASM_X86_TLBFLUSH_H */
--- head-2011-03-17.orig/arch/x86/kernel/Makefile 2011-02-01 14:54:13.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/Makefile 2011-02-01 15:09:47.000000000 +0100
@@ -125,7 +125,6 @@ ifeq ($(CONFIG_X86_64),y)
obj-y += vsmp_64.o
endif
-disabled-obj-$(CONFIG_XEN) := %_uv.o crash.o early-quirks.o hpet.o i8253.o \
- i8259.o irqinit.o pci-swiotlb.o reboot.o smpboot.o tsc.o tsc_sync.o \
- uv_%.o vsmp_64.o
+disabled-obj-$(CONFIG_XEN) := crash.o early-quirks.o hpet.o i8253.o i8259.o \
+ irqinit.o pci-swiotlb.o reboot.o smpboot.o tsc.o tsc_sync.o vsmp_64.o
disabled-obj-$(CONFIG_XEN_UNPRIVILEGED_GUEST) += probe_roms_32.o
--- head-2011-03-17.orig/arch/x86/kernel/acpi/sleep-xen.c 2011-02-01 15:04:27.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/acpi/sleep-xen.c 2011-02-01 15:09:47.000000000 +0100
@@ -7,11 +7,16 @@
#include <linux/acpi.h>
#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/dmi.h>
#include <linux/cpumask.h>
#include <asm/segment.h>
#include <asm/desc.h>
+#ifdef CONFIG_X86_32
+#include <asm/pgtable.h>
+#endif
+
#include "realmode/wakeup.h"
#include "sleep.h"
@@ -93,7 +98,7 @@ int acpi_save_state_mem(void)
#ifndef CONFIG_64BIT
header->pmode_entry = (u32)&wakeup_pmode_return;
- header->pmode_cr3 = (u32)(swsusp_pg_dir - __PAGE_OFFSET);
+ header->pmode_cr3 = (u32)__pa(&initial_page_table);
saved_magic = 0x12345678;
#else /* CONFIG_64BIT */
header->trampoline_segment = setup_trampoline() >> 4;
@@ -130,7 +135,7 @@ void acpi_restore_state_mem(void)
void __init acpi_reserve_wakeup_memory(void)
{
#ifndef CONFIG_ACPI_PV_SLEEP
- unsigned long mem;
+ phys_addr_t mem;
if ((&wakeup_code_end - &wakeup_code_start) > WAKEUP_SIZE) {
printk(KERN_ERR
@@ -138,15 +143,15 @@ void __init acpi_reserve_wakeup_memory(v
return;
}
- mem = find_e820_area(0, 1<<20, WAKEUP_SIZE, PAGE_SIZE);
+ mem = memblock_find_in_range(0, 1<<20, WAKEUP_SIZE, PAGE_SIZE);
- if (mem == -1L) {
+ if (mem == MEMBLOCK_ERROR) {
printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n");
return;
}
acpi_realmode = (unsigned long) phys_to_virt(mem);
acpi_wakeup_address = mem;
- reserve_early(mem, mem + WAKEUP_SIZE, "ACPI WAKEUP");
+ memblock_x86_reserve_range(mem, mem + WAKEUP_SIZE, "ACPI WAKEUP");
#endif
}
--- head-2011-03-17.orig/arch/x86/kernel/apic/io_apic-xen.c 2011-02-01 15:04:27.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/apic/io_apic-xen.c 2011-02-01 15:09:47.000000000 +0100
@@ -144,13 +144,9 @@ struct irq_pin_list {
struct irq_pin_list *next;
};
-static struct irq_pin_list *get_one_free_irq_2_pin(int node)
+static struct irq_pin_list *alloc_irq_pin_list(int node)
{
- struct irq_pin_list *pin;
-
- pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node);
-
- return pin;
+ return kzalloc_node(sizeof(struct irq_pin_list), GFP_KERNEL, node);
}
/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
@@ -163,10 +159,7 @@ static struct irq_cfg irq_cfgx[NR_IRQS];
int __init arch_early_irq_init(void)
{
struct irq_cfg *cfg;
- struct irq_desc *desc;
- int count;
- int node;
- int i;
+ int count, node, i;
if (!legacy_pic->nr_legacy_irqs) {
nr_irqs_gsi = 0;
@@ -175,13 +168,15 @@ int __init arch_early_irq_init(void)
cfg = irq_cfgx;
count = ARRAY_SIZE(irq_cfgx);
- node= cpu_to_node(boot_cpu_id);
+ node = cpu_to_node(0);
+
+ /* Make sure the legacy interrupts are marked in the bitmap */
+ irq_reserve_irqs(0, legacy_pic->nr_legacy_irqs);
for (i = 0; i < count; i++) {
- desc = irq_to_desc(i);
- desc->chip_data = &cfg[i];
- zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node);
- zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node);
+ set_irq_chip_data(i, &cfg[i]);
+ zalloc_cpumask_var_node(&cfg[i].domain, GFP_KERNEL, node);
+ zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_KERNEL, node);
/*
* For legacy IRQ's, start with assigning irq0 to irq15 to
* IRQ0_VECTOR to IRQ15_VECTOR on cpu 0.
@@ -196,170 +191,88 @@ int __init arch_early_irq_init(void)
}
#ifdef CONFIG_SPARSE_IRQ
-struct irq_cfg *irq_cfg(unsigned int irq)
+static struct irq_cfg *irq_cfg(unsigned int irq)
{
- struct irq_cfg *cfg = NULL;
- struct irq_desc *desc;
-
- desc = irq_to_desc(irq);
- if (desc)
- cfg = desc->chip_data;
-
- return cfg;
+ return get_irq_chip_data(irq);
}
-static struct irq_cfg *get_one_free_irq_cfg(int node)
+static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node)
{
struct irq_cfg *cfg;
- cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
- if (cfg) {
- if (!zalloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) {
- kfree(cfg);
- cfg = NULL;
- } else if (!zalloc_cpumask_var_node(&cfg->old_domain,
- GFP_ATOMIC, node)) {
- free_cpumask_var(cfg->domain);
- kfree(cfg);
- cfg = NULL;
- }
- }
-
+ cfg = kzalloc_node(sizeof(*cfg), GFP_KERNEL, node);
+ if (!cfg)
+ return NULL;
+ if (!zalloc_cpumask_var_node(&cfg->domain, GFP_KERNEL, node))
+ goto out_cfg;
+ if (!zalloc_cpumask_var_node(&cfg->old_domain, GFP_KERNEL, node))
+ goto out_domain;
return cfg;
+out_domain:
+ free_cpumask_var(cfg->domain);
+out_cfg:
+ kfree(cfg);
+ return NULL;
}
-int arch_init_chip_data(struct irq_desc *desc, int node)
-{
- struct irq_cfg *cfg;
-
- cfg = desc->chip_data;
- if (!cfg) {
- desc->chip_data = get_one_free_irq_cfg(node);
- if (!desc->chip_data) {
- printk(KERN_ERR "can not alloc irq_cfg\n");
- BUG_ON(1);
- }
- }
-
- return 0;
-}
-
-/* for move_irq_desc */
-static void
-init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int node)
+static void free_irq_cfg(unsigned int at, struct irq_cfg *cfg)
{
- struct irq_pin_list *old_entry, *head, *tail, *entry;
-
- cfg->irq_2_pin = NULL;
- old_entry = old_cfg->irq_2_pin;
- if (!old_entry)
- return;
-
- entry = get_one_free_irq_2_pin(node);
- if (!entry)
+ if (!cfg)
return;
+ set_irq_chip_data(at, NULL);
+ free_cpumask_var(cfg->domain);
+ free_cpumask_var(cfg->old_domain);
+ kfree(cfg);
+}
- entry->apic = old_entry->apic;
- entry->pin = old_entry->pin;
- head = entry;
- tail = entry;
- old_entry = old_entry->next;
- while (old_entry) {
- entry = get_one_free_irq_2_pin(node);
- if (!entry) {
- entry = head;
- while (entry) {
- head = entry->next;
- kfree(entry);
- entry = head;
- }
- /* still use the old one */
- return;
- }
- entry->apic = old_entry->apic;
- entry->pin = old_entry->pin;
- tail->next = entry;
- tail = entry;
- old_entry = old_entry->next;
- }
+#else
- tail->next = NULL;
- cfg->irq_2_pin = head;
+struct irq_cfg *irq_cfg(unsigned int irq)
+{
+ return irq < nr_irqs ? irq_cfgx + irq : NULL;
}
-static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg)
+static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node)
{
- struct irq_pin_list *entry, *next;
-
- if (old_cfg->irq_2_pin == cfg->irq_2_pin)
- return;
+ return irq_cfgx + irq;
+}
- entry = old_cfg->irq_2_pin;
+static inline void free_irq_cfg(unsigned int at, struct irq_cfg *cfg) { }
- while (entry) {
- next = entry->next;
- kfree(entry);
- entry = next;
- }
- old_cfg->irq_2_pin = NULL;
-}
+#endif
-void arch_init_copy_chip_data(struct irq_desc *old_desc,
- struct irq_desc *desc, int node)
+static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node)
{
+ int res = irq_alloc_desc_at(at, node);
struct irq_cfg *cfg;
- struct irq_cfg *old_cfg;
-
- cfg = get_one_free_irq_cfg(node);
-
- if (!cfg)
- return;
-
- desc->chip_data = cfg;
- old_cfg = old_desc->chip_data;
-
- cfg->vector = old_cfg->vector;
- cfg->move_in_progress = old_cfg->move_in_progress;
- cpumask_copy(cfg->domain, old_cfg->domain);
- cpumask_copy(cfg->old_domain, old_cfg->old_domain);
-
- init_copy_irq_2_pin(old_cfg, cfg, node);
-}
+ if (res < 0) {
+ if (res != -EEXIST)
+ return NULL;
+ cfg = get_irq_chip_data(at);
+ if (cfg)
+ return cfg;
+ }
-static void free_irq_cfg(struct irq_cfg *cfg)
-{
- free_cpumask_var(cfg->domain);
- free_cpumask_var(cfg->old_domain);
- kfree(cfg);
+ cfg = alloc_irq_cfg(at, node);
+ if (cfg)
+ set_irq_chip_data(at, cfg);
+ else
+ irq_free_desc(at);
+ return cfg;
}
-void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
+static int alloc_irq_from(unsigned int from, int node)
{
- struct irq_cfg *old_cfg, *cfg;
-
- old_cfg = old_desc->chip_data;
- cfg = desc->chip_data;
-
- if (old_cfg == cfg)
- return;
-
- if (old_cfg) {
- free_irq_2_pin(old_cfg, cfg);
- free_irq_cfg(old_cfg);
- old_desc->chip_data = NULL;
- }
+ return irq_alloc_desc_from(from, node);
}
-/* end for move_irq_desc */
-#else
-struct irq_cfg *irq_cfg(unsigned int irq)
+static void free_irq_at(unsigned int at, struct irq_cfg *cfg)
{
- return irq < nr_irqs ? irq_cfgx + irq : NULL;
+ free_irq_cfg(at, cfg);
+ irq_free_desc(at);
}
-#endif
-
struct io_apic {
unsigned int index;
unsigned int unused[3];
@@ -492,7 +405,7 @@ __ioapic_write_entry(int apic, int pin,
io_apic_write(apic, 0x10 + 2*pin, eu.w1);
}
-void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
+static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
{
unsigned long flags;
raw_spin_lock_irqsave(&ioapic_lock, flags);
@@ -523,7 +436,7 @@ static void ioapic_mask_entry(int apic,
* fast in the common case, and fast for shared ISA-space IRQs.
*/
static int
-add_pin_to_irq_node_nopanic(struct irq_cfg *cfg, int node, int apic, int pin)
+__add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
{
struct irq_pin_list **last, *entry;
@@ -535,7 +448,7 @@ add_pin_to_irq_node_nopanic(struct irq_c
last = &entry->next;
}
- entry = get_one_free_irq_2_pin(node);
+ entry = alloc_irq_pin_list(node);
if (!entry) {
printk(KERN_ERR "can not alloc irq_pin_list (%d,%d,%d)\n",
node, apic, pin);
@@ -550,7 +463,7 @@ add_pin_to_irq_node_nopanic(struct irq_c
static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
{
- if (add_pin_to_irq_node_nopanic(cfg, node, apic, pin))
+ if (__add_pin_to_irq_node(cfg, node, apic, pin))
panic("IO-APIC: failed to add irq-pin. Can not proceed\n");
}
@@ -613,11 +526,6 @@ static void __unmask_and_level_IO_APIC_i
IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
}
-static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
-{
- io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
-}
-
static void io_apic_sync(struct irq_pin_list *entry)
{
/*
@@ -629,44 +537,37 @@ static void io_apic_sync(struct irq_pin_
readl(&io_apic->data);
}
-static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
+static void mask_ioapic(struct irq_cfg *cfg)
{
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
+ raw_spin_unlock_irqrestore(&ioapic_lock, flags);
}
-static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
+static void mask_ioapic_irq(struct irq_data *data)
{
- struct irq_cfg *cfg = desc->chip_data;
- unsigned long flags;
-
- BUG_ON(!cfg);
+ mask_ioapic(data->chip_data);
+}
- raw_spin_lock_irqsave(&ioapic_lock, flags);
- __mask_IO_APIC_irq(cfg);
- raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+static void __unmask_ioapic(struct irq_cfg *cfg)
+{
+ io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
}
-static void unmask_IO_APIC_irq_desc(struct irq_desc *desc)
+static void unmask_ioapic(struct irq_cfg *cfg)
{
- struct irq_cfg *cfg = desc->chip_data;
unsigned long flags;
raw_spin_lock_irqsave(&ioapic_lock, flags);
- __unmask_IO_APIC_irq(cfg);
+ __unmask_ioapic(cfg);
raw_spin_unlock_irqrestore(&ioapic_lock, flags);
}
-static void mask_IO_APIC_irq(unsigned int irq)
+static void unmask_ioapic_irq(struct irq_data *data)
{
- struct irq_desc *desc = irq_to_desc(irq);
-
- mask_IO_APIC_irq_desc(desc);
-}
-static void unmask_IO_APIC_irq(unsigned int irq)
-{
- struct irq_desc *desc = irq_to_desc(irq);
-
- unmask_IO_APIC_irq_desc(desc);
+ unmask_ioapic(data->chip_data);
}
static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
@@ -693,7 +594,7 @@ static void clear_IO_APIC (void)
}
#else
#define add_pin_to_irq_node(cfg, node, apic, pin)
-#define add_pin_to_irq_node_nopanic(cfg, node, apic, pin) 0
+#define __add_pin_to_irq_node(cfg, node, apic, pin) 0
#endif /* !CONFIG_XEN */
#ifdef CONFIG_X86_32
@@ -741,14 +642,14 @@ struct IO_APIC_route_entry **alloc_ioapi
struct IO_APIC_route_entry **ioapic_entries;
ioapic_entries = kzalloc(sizeof(*ioapic_entries) * nr_ioapics,
- GFP_ATOMIC);
+ GFP_KERNEL);
if (!ioapic_entries)
return 0;
for (apic = 0; apic < nr_ioapics; apic++) {
ioapic_entries[apic] =
kzalloc(sizeof(struct IO_APIC_route_entry) *
- nr_ioapic_registers[apic], GFP_ATOMIC);
+ nr_ioapic_registers[apic], GFP_KERNEL);
if (!ioapic_entries[apic])
goto nomem;
}
@@ -1314,7 +1215,6 @@ void __setup_vector_irq(int cpu)
/* Initialize vector_irq on a new cpu */
int irq, vector;
struct irq_cfg *cfg;
- struct irq_desc *desc;
/*
* vector_lock will make sure that we don't run into irq vector
@@ -1323,9 +1223,10 @@ void __setup_vector_irq(int cpu)
*/
raw_spin_lock(&vector_lock);
/* Mark the inuse vectors */
- for_each_irq_desc(irq, desc) {
- cfg = desc->chip_data;
-
+ for_each_active_irq(irq) {
+ cfg = get_irq_chip_data(irq);
+ if (!cfg)
+ continue;
/*
* If it is a legacy IRQ handled by the legacy PIC, this cpu
* will be part of the irq_cfg's domain.
@@ -1382,17 +1283,17 @@ static inline int IO_APIC_irq_trigger(in
}
#endif
-static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long trigger)
+static void ioapic_register_intr(unsigned int irq, unsigned long trigger)
{
if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
trigger == IOAPIC_LEVEL)
- desc->status |= IRQ_LEVEL;
+ irq_set_status_flags(irq, IRQ_LEVEL);
else
- desc->status &= ~IRQ_LEVEL;
+ irq_clear_status_flags(irq, IRQ_LEVEL);
- if (irq_remapped(irq)) {
- desc->status |= IRQ_MOVE_PCNTXT;
+ if (irq_remapped(get_irq_chip_data(irq))) {
+ irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
if (trigger)
set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
handle_fasteoi_irq,
@@ -1414,13 +1315,13 @@ static void ioapic_register_intr(int irq
}
#else /* !CONFIG_XEN */
#define __clear_irq_vector(irq, cfg) ((void)0)
-#define ioapic_register_intr(irq, desc, trigger) evtchn_register_pirq(irq)
+#define ioapic_register_intr(irq, trigger) evtchn_register_pirq(irq)
#endif
-int setup_ioapic_entry(int apic_id, int irq,
- struct IO_APIC_route_entry *entry,
- unsigned int destination, int trigger,
- int polarity, int vector, int pin)
+static int setup_ioapic_entry(int apic_id, int irq,
+ struct IO_APIC_route_entry *entry,
+ unsigned int destination, int trigger,
+ int polarity, int vector, int pin)
{
/*
* add it to the IO-APIC irq-routing table:
@@ -1442,21 +1343,7 @@ int setup_ioapic_entry(int apic_id, int
if (index < 0)
panic("Failed to allocate IRTE for ioapic %d\n", apic_id);
- memset(&irte, 0, sizeof(irte));
-
- irte.present = 1;
- irte.dst_mode = apic->irq_dest_mode;
- /*
- * Trigger mode in the IRTE will always be edge, and the
- * actual level or edge trigger will be setup in the IO-APIC
- * RTE. This will help simplify level triggered irq migration.
- * For more details, see the comments above explainig IO-APIC
- * irq migration in the presence of interrupt-remapping.
- */
- irte.trigger_mode = 0;
- irte.dlvry_mode = apic->irq_delivery_mode;
- irte.vector = vector;
- irte.dest_id = IRTE_DEST(destination);
+ prepare_irte(&irte, vector, destination);
/* Set source-id of interrupt request */
set_ioapic_sid(&irte, apic_id);
@@ -1493,18 +1380,14 @@ int setup_ioapic_entry(int apic_id, int
return 0;
}
-static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq_desc *desc,
- int trigger, int polarity)
+static void setup_ioapic_irq(int apic_id, int pin, unsigned int irq,
+ struct irq_cfg *cfg, int trigger, int polarity)
{
- struct irq_cfg *cfg;
struct IO_APIC_route_entry entry;
unsigned int dest;
if (!IO_APIC_IRQ(irq))
return;
-
- cfg = desc->chip_data;
-
#ifndef CONFIG_XEN
/*
* For legacy irqs, cfg->domain starts with cpu 0 for legacy
@@ -1539,10 +1422,10 @@ static void setup_IO_APIC_irq(int apic_i
return;
}
- ioapic_register_intr(irq, desc, trigger);
+ ioapic_register_intr(irq, trigger);
#ifndef CONFIG_XEN
if (irq < legacy_pic->nr_legacy_irqs)
- legacy_pic->chip->mask(irq);
+ legacy_pic->mask(irq);
#endif
ioapic_write_entry(apic_id, pin, entry);
@@ -1554,11 +1437,9 @@ static struct {
static void __init setup_IO_APIC_irqs(void)
{
- int apic_id, pin, idx, irq;
- int notcon = 0;
- struct irq_desc *desc;
+ int apic_id, pin, idx, irq, notcon = 0;
+ int node = cpu_to_node(0);
struct irq_cfg *cfg;
- int node = cpu_to_node(boot_cpu_id);
apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
@@ -1600,19 +1481,17 @@ static void __init setup_IO_APIC_irqs(vo
continue;
#endif
- desc = irq_to_desc_alloc_node(irq, node);
- if (!desc) {
- printk(KERN_INFO "can not get irq_desc for %d\n", irq);
+ cfg = alloc_irq_and_cfg_at(irq, node);
+ if (!cfg)
continue;
- }
- cfg = desc->chip_data;
+
add_pin_to_irq_node(cfg, node, apic_id, pin);
/*
* don't mark it in pin_programmed, so later acpi could
* set it correctly when irq < 16
*/
- setup_IO_APIC_irq(apic_id, pin, irq, desc,
- irq_trigger(idx), irq_polarity(idx));
+ setup_ioapic_irq(apic_id, pin, irq, cfg, irq_trigger(idx),
+ irq_polarity(idx));
}
if (notcon)
@@ -1627,9 +1506,7 @@ static void __init setup_IO_APIC_irqs(vo
*/
void setup_IO_APIC_irq_extra(u32 gsi)
{
- int apic_id = 0, pin, idx, irq;
- int node = cpu_to_node(boot_cpu_id);
- struct irq_desc *desc;
+ int apic_id = 0, pin, idx, irq, node = cpu_to_node(0);
struct irq_cfg *cfg;
/*
@@ -1649,18 +1526,15 @@ void setup_IO_APIC_irq_extra(u32 gsi)
if (irq < PIRQ_BASE || irq >= PIRQ_BASE + nr_pirqs)
return;
#endif
-#ifdef CONFIG_SPARSE_IRQ
- desc = irq_to_desc(irq);
- if (desc)
+
+ /* Only handle the non legacy irqs on secondary ioapics */
+ if (apic_id == 0 || irq < NR_IRQS_LEGACY)
return;
-#endif
- desc = irq_to_desc_alloc_node(irq, node);
- if (!desc) {
- printk(KERN_INFO "can not get irq_desc for %d\n", irq);
+
+ cfg = alloc_irq_and_cfg_at(irq, node);
+ if (!cfg)
return;
- }
- cfg = desc->chip_data;
add_pin_to_irq_node(cfg, node, apic_id, pin);
if (test_bit(pin, mp_ioapic_routing[apic_id].pin_programmed)) {
@@ -1670,7 +1544,7 @@ void setup_IO_APIC_irq_extra(u32 gsi)
}
set_bit(pin, mp_ioapic_routing[apic_id].pin_programmed);
- setup_IO_APIC_irq(apic_id, pin, irq, desc,
+ setup_ioapic_irq(apic_id, pin, irq, cfg,
irq_trigger(idx), irq_polarity(idx));
}
@@ -1722,7 +1596,6 @@ __apicdebuginit(void) print_IO_APIC(void
union IO_APIC_reg_03 reg_03;
unsigned long flags;
struct irq_cfg *cfg;
- struct irq_desc *desc;
unsigned int irq;
printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
@@ -1809,10 +1682,10 @@ __apicdebuginit(void) print_IO_APIC(void
}
}
printk(KERN_DEBUG "IRQ to pin mappings:\n");
- for_each_irq_desc(irq, desc) {
+ for_each_active_irq(irq) {
struct irq_pin_list *entry;
- cfg = desc->chip_data;
+ cfg = get_irq_chip_data(irq);
if (!cfg)
continue;
entry = cfg->irq_2_pin;
@@ -2319,29 +2192,26 @@ static int __init timer_irq_works(void)
* an edge even if it isn't on the 8259A...
*/
-static unsigned int startup_ioapic_irq(unsigned int irq)
+static unsigned int startup_ioapic_irq(struct irq_data *data)
{
- int was_pending = 0;
+ int was_pending = 0, irq = data->irq;
unsigned long flags;
- struct irq_cfg *cfg;
raw_spin_lock_irqsave(&ioapic_lock, flags);
if (irq < legacy_pic->nr_legacy_irqs) {
- legacy_pic->chip->mask(irq);
+ legacy_pic->mask(irq);
if (legacy_pic->irq_pending(irq))
was_pending = 1;
}
- cfg = irq_cfg(irq);
- __unmask_IO_APIC_irq(cfg);
+ __unmask_ioapic(data->chip_data);
raw_spin_unlock_irqrestore(&ioapic_lock, flags);
return was_pending;
}
-static int ioapic_retrigger_irq(unsigned int irq)
+static int ioapic_retrigger_irq(struct irq_data *data)
{
-
- struct irq_cfg *cfg = irq_cfg(irq);
+ struct irq_cfg *cfg = data->chip_data;
unsigned long flags;
raw_spin_lock_irqsave(&vector_lock, flags);
@@ -2392,7 +2262,7 @@ static void __target_IO_APIC_irq(unsigne
* With interrupt-remapping, destination information comes
* from interrupt-remapping table entry.
*/
- if (!irq_remapped(irq))
+ if (!irq_remapped(cfg))
io_apic_write(apic, 0x11 + pin*2, dest);
reg = io_apic_read(apic, 0x10 + pin*2);
reg &= ~IO_APIC_REDIR_VECTOR_MASK;
@@ -2402,65 +2272,46 @@ static void __target_IO_APIC_irq(unsigne
}
/*
- * Either sets desc->affinity to a valid value, and returns
+ * Either sets data->affinity to a valid value, and returns
* ->cpu_mask_to_apicid of that in dest_id, or returns -1 and
- * leaves desc->affinity untouched.
+ * leaves data->affinity untouched.
*/
-unsigned int
-set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask,
- unsigned int *dest_id)
+int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
+ unsigned int *dest_id)
{
- struct irq_cfg *cfg;
- unsigned int irq;
+ struct irq_cfg *cfg = data->chip_data;
if (!cpumask_intersects(mask, cpu_online_mask))
return -1;
- irq = desc->irq;
- cfg = desc->chip_data;
- if (assign_irq_vector(irq, cfg, mask))
+ if (assign_irq_vector(data->irq, data->chip_data, mask))
return -1;
- cpumask_copy(desc->affinity, mask);
+ cpumask_copy(data->affinity, mask);
- *dest_id = apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
+ *dest_id = apic->cpu_mask_to_apicid_and(mask, cfg->domain);
return 0;
}
static int
-set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
+ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
+ bool force)
{
- struct irq_cfg *cfg;
+ unsigned int dest, irq = data->irq;
unsigned long flags;
- unsigned int dest;
- unsigned int irq;
- int ret = -1;
-
- irq = desc->irq;
- cfg = desc->chip_data;
+ int ret;
raw_spin_lock_irqsave(&ioapic_lock, flags);
- ret = set_desc_affinity(desc, mask, &dest);
+ ret = __ioapic_set_affinity(data, mask, &dest);
if (!ret) {
/* Only the high 8 bits are valid. */
dest = SET_APIC_LOGICAL_ID(dest);
- __target_IO_APIC_irq(irq, dest, cfg);
+ __target_IO_APIC_irq(irq, dest, data->chip_data);
}
raw_spin_unlock_irqrestore(&ioapic_lock, flags);
-
return ret;
}
-static int
-set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
-{
- struct irq_desc *desc;
-
- desc = irq_to_desc(irq);
-
- return set_ioapic_affinity_irq_desc(desc, mask);
-}
-
#ifdef CONFIG_INTR_REMAP
/*
@@ -2475,24 +2326,21 @@ set_ioapic_affinity_irq(unsigned int irq
* the interrupt-remapping table entry.
*/
static int
-migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
+ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
+ bool force)
{
- struct irq_cfg *cfg;
+ struct irq_cfg *cfg = data->chip_data;
+ unsigned int dest, irq = data->irq;
struct irte irte;
- unsigned int dest;
- unsigned int irq;
- int ret = -1;
if (!cpumask_intersects(mask, cpu_online_mask))
- return ret;
+ return -EINVAL;
- irq = desc->irq;
if (get_irte(irq, &irte))
- return ret;
+ return -EBUSY;
- cfg = desc->chip_data;
if (assign_irq_vector(irq, cfg, mask))
- return ret;
+ return -EBUSY;
dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
@@ -2507,29 +2355,14 @@ migrate_ioapic_irq_desc(struct irq_desc
if (cfg->move_in_progress)
send_cleanup_vector(cfg);
- cpumask_copy(desc->affinity, mask);
-
+ cpumask_copy(data->affinity, mask);
return 0;
}
-/*
- * Migrates the IRQ destination in the process context.
- */
-static int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
- const struct cpumask *mask)
-{
- return migrate_ioapic_irq_desc(desc, mask);
-}
-static int set_ir_ioapic_affinity_irq(unsigned int irq,
- const struct cpumask *mask)
-{
- struct irq_desc *desc = irq_to_desc(irq);
-
- return set_ir_ioapic_affinity_irq_desc(desc, mask);
-}
#else
-static inline int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
- const struct cpumask *mask)
+static inline int
+ir_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
+ bool force)
{
return 0;
}
@@ -2591,10 +2424,8 @@ unlock:
irq_exit();
}
-static void __irq_complete_move(struct irq_desc **descp, unsigned vector)
+static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector)
{
- struct irq_desc *desc = *descp;
- struct irq_cfg *cfg = desc->chip_data;
unsigned me;
if (likely(!cfg->move_in_progress))
@@ -2606,31 +2437,28 @@ static void __irq_complete_move(struct i
send_cleanup_vector(cfg);
}
-static void irq_complete_move(struct irq_desc **descp)
+static void irq_complete_move(struct irq_cfg *cfg)
{
- __irq_complete_move(descp, ~get_irq_regs()->orig_ax);
+ __irq_complete_move(cfg, ~get_irq_regs()->orig_ax);
}
void irq_force_complete_move(int irq)
{
- struct irq_desc *desc = irq_to_desc(irq);
- struct irq_cfg *cfg = desc->chip_data;
+ struct irq_cfg *cfg = get_irq_chip_data(irq);
if (!cfg)
return;
- __irq_complete_move(&desc, cfg->vector);
+ __irq_complete_move(cfg, cfg->vector);
}
#else
-static inline void irq_complete_move(struct irq_desc **descp) {}
+static inline void irq_complete_move(struct irq_cfg *cfg) { }
#endif
-static void ack_apic_edge(unsigned int irq)
+static void ack_apic_edge(struct irq_data *data)
{
- struct irq_desc *desc = irq_to_desc(irq);
-
- irq_complete_move(&desc);
- move_native_irq(irq);
+ irq_complete_move(data->chip_data);
+ move_native_irq(data->irq);
ack_APIC_irq();
}
@@ -2652,10 +2480,12 @@ atomic_t irq_mis_count;
* Otherwise, we simulate the EOI message manually by changing the trigger
* mode to edge and then back to level, with RTE being masked during this.
*/
-static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
+static void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
{
struct irq_pin_list *entry;
+ unsigned long flags;
+ raw_spin_lock_irqsave(&ioapic_lock, flags);
for_each_irq_pin(entry, cfg->irq_2_pin) {
if (mp_ioapics[entry->apic].apicver >= 0x20) {
/*
@@ -2664,7 +2494,7 @@ static void __eoi_ioapic_irq(unsigned in
* intr-remapping table entry. Hence for the io-apic
* EOI we use the pin number.
*/
- if (irq_remapped(irq))
+ if (irq_remapped(cfg))
io_apic_eoi(entry->apic, entry->pin);
else
io_apic_eoi(entry->apic, cfg->vector);
@@ -2673,36 +2503,21 @@ static void __eoi_ioapic_irq(unsigned in
__unmask_and_level_IO_APIC_irq(entry);
}
}
-}
-
-static void eoi_ioapic_irq(struct irq_desc *desc)
-{
- struct irq_cfg *cfg;
- unsigned long flags;
- unsigned int irq;
-
- irq = desc->irq;
- cfg = desc->chip_data;
-
- raw_spin_lock_irqsave(&ioapic_lock, flags);
- __eoi_ioapic_irq(irq, cfg);
raw_spin_unlock_irqrestore(&ioapic_lock, flags);
}
-static void ack_apic_level(unsigned int irq)
+static void ack_apic_level(struct irq_data *data)
{
- struct irq_desc *desc = irq_to_desc(irq);
+ struct irq_cfg *cfg = data->chip_data;
+ int i, do_unmask_irq = 0, irq = data->irq;
unsigned long v;
- int i;
- struct irq_cfg *cfg;
- int do_unmask_irq = 0;
- irq_complete_move(&desc);
+ irq_complete_move(cfg);
#ifdef CONFIG_GENERIC_PENDING_IRQ
/* If we are moving the irq we need to mask it */
- if (unlikely(desc->status & IRQ_MOVE_PENDING)) {
+ if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
do_unmask_irq = 1;
- mask_IO_APIC_irq_desc(desc);
+ mask_ioapic(cfg);
}
#endif
@@ -2738,7 +2553,6 @@ static void ack_apic_level(unsigned int
* we use the above logic (mask+edge followed by unmask+level) from
* Manfred Spraul to clear the remote IRR.
*/
- cfg = desc->chip_data;
i = cfg->vector;
v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
@@ -2758,7 +2572,7 @@ static void ack_apic_level(unsigned int
if (!(v & (1 << (i & 0x1f)))) {
atomic_inc(&irq_mis_count);
- eoi_ioapic_irq(desc);
+ eoi_ioapic_irq(irq, cfg);
}
/* Now we can move and renable the irq */
@@ -2789,62 +2603,58 @@ static void ack_apic_level(unsigned int
* accurate and is causing problems then it is a hardware bug
* and you can go talk to the chipset vendor about it.
*/
- cfg = desc->chip_data;
if (!io_apic_level_ack_pending(cfg))
move_masked_irq(irq);
- unmask_IO_APIC_irq_desc(desc);
+ unmask_ioapic(cfg);
}
}
#ifdef CONFIG_INTR_REMAP
-static void ir_ack_apic_edge(unsigned int irq)
+static void ir_ack_apic_edge(struct irq_data *data)
{
ack_APIC_irq();
}
-static void ir_ack_apic_level(unsigned int irq)
+static void ir_ack_apic_level(struct irq_data *data)
{
- struct irq_desc *desc = irq_to_desc(irq);
-
ack_APIC_irq();
- eoi_ioapic_irq(desc);
+ eoi_ioapic_irq(data->irq, data->chip_data);
}
#endif /* CONFIG_INTR_REMAP */
static struct irq_chip ioapic_chip __read_mostly = {
- .name = "IO-APIC",
- .startup = startup_ioapic_irq,
- .mask = mask_IO_APIC_irq,
- .unmask = unmask_IO_APIC_irq,
- .ack = ack_apic_edge,
- .eoi = ack_apic_level,
+ .name = "IO-APIC",
+ .irq_startup = startup_ioapic_irq,
+ .irq_mask = mask_ioapic_irq,
+ .irq_unmask = unmask_ioapic_irq,
+ .irq_ack = ack_apic_edge,
+ .irq_eoi = ack_apic_level,
#ifdef CONFIG_SMP
- .set_affinity = set_ioapic_affinity_irq,
+ .irq_set_affinity = ioapic_set_affinity,
#endif
- .retrigger = ioapic_retrigger_irq,
+ .irq_retrigger = ioapic_retrigger_irq,
};
static struct irq_chip ir_ioapic_chip __read_mostly = {
- .name = "IR-IO-APIC",
- .startup = startup_ioapic_irq,
- .mask = mask_IO_APIC_irq,
- .unmask = unmask_IO_APIC_irq,
+ .name = "IR-IO-APIC",
+ .irq_startup = startup_ioapic_irq,
+ .irq_mask = mask_ioapic_irq,
+ .irq_unmask = unmask_ioapic_irq,
#ifdef CONFIG_INTR_REMAP
- .ack = ir_ack_apic_edge,
- .eoi = ir_ack_apic_level,
+ .irq_ack = ir_ack_apic_edge,
+ .irq_eoi = ir_ack_apic_level,
#ifdef CONFIG_SMP
- .set_affinity = set_ir_ioapic_affinity_irq,
+ .irq_set_affinity = ir_ioapic_set_affinity,
#endif
#endif
- .retrigger = ioapic_retrigger_irq,
+ .irq_retrigger = ioapic_retrigger_irq,
};
#endif /* !CONFIG_XEN */
static inline void init_IO_APIC_traps(void)
{
- int irq;
- struct irq_desc *desc;
struct irq_cfg *cfg;
+ unsigned int irq;
/*
* NOTE! The local APIC isn't very good at handling
@@ -2857,12 +2667,12 @@ static inline void init_IO_APIC_traps(vo
* Also, we've got to be careful not to trash gate
* 0x80, because int 0x80 is hm, kind of importantish. ;)
*/
- for_each_irq_desc(irq, desc) {
+ for_each_active_irq(irq) {
#ifdef CONFIG_XEN
if (irq < PIRQ_BASE || irq >= PIRQ_BASE + nr_pirqs)
continue;
#endif
- cfg = desc->chip_data;
+ cfg = get_irq_chip_data(irq);
if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
/*
* Hmm.. We don't have an entry for this,
@@ -2873,7 +2683,7 @@ static inline void init_IO_APIC_traps(vo
legacy_pic->make_irq(irq);
else
/* Strange. Oh, well.. */
- desc->chip = &no_irq_chip;
+ set_irq_chip(irq, &no_irq_chip);
}
}
}
@@ -2883,7 +2693,7 @@ static inline void init_IO_APIC_traps(vo
* The local APIC irq-chip implementation:
*/
-static void mask_lapic_irq(unsigned int irq)
+static void mask_lapic_irq(struct irq_data *data)
{
unsigned long v;
@@ -2891,7 +2701,7 @@ static void mask_lapic_irq(unsigned int
apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
}
-static void unmask_lapic_irq(unsigned int irq)
+static void unmask_lapic_irq(struct irq_data *data)
{
unsigned long v;
@@ -2899,21 +2709,21 @@ static void unmask_lapic_irq(unsigned in
apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
}
-static void ack_lapic_irq(unsigned int irq)
+static void ack_lapic_irq(struct irq_data *data)
{
ack_APIC_irq();
}
static struct irq_chip lapic_chip __read_mostly = {
.name = "local-APIC",
- .mask = mask_lapic_irq,
- .unmask = unmask_lapic_irq,
- .ack = ack_lapic_irq,
+ .irq_mask = mask_lapic_irq,
+ .irq_unmask = unmask_lapic_irq,
+ .irq_ack = ack_lapic_irq,
};
-static void lapic_register_intr(int irq, struct irq_desc *desc)
+static void lapic_register_intr(int irq)
{
- desc->status &= ~IRQ_LEVEL;
+ irq_clear_status_flags(irq, IRQ_LEVEL);
set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
"edge");
}
@@ -3016,9 +2826,8 @@ int timer_through_8259 __initdata;
*/
static inline void __init check_timer(void)
{
- struct irq_desc *desc = irq_to_desc(0);
- struct irq_cfg *cfg = desc->chip_data;
- int node = cpu_to_node(boot_cpu_id);
+ struct irq_cfg *cfg = get_irq_chip_data(0);
+ int node = cpu_to_node(0);
int apic1, pin1, apic2, pin2;
unsigned long flags;
int no_pin1 = 0;
@@ -3028,7 +2837,7 @@ static inline void __init check_timer(vo
/*
* get/set the timer IRQ vector:
*/
- legacy_pic->chip->mask(0);
+ legacy_pic->mask(0);
assign_irq_vector(0, cfg, apic->target_cpus());
/*
@@ -3087,7 +2896,7 @@ static inline void __init check_timer(vo
add_pin_to_irq_node(cfg, node, apic1, pin1);
setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
} else {
- /* for edge trigger, setup_IO_APIC_irq already
+ /* for edge trigger, setup_ioapic_irq already
* leave it unmasked.
* so only need to unmask if it is level-trigger
* do we really have level trigger timer?
@@ -3095,12 +2904,12 @@ static inline void __init check_timer(vo
int idx;
idx = find_irq_entry(apic1, pin1, mp_INT);
if (idx != -1 && irq_trigger(idx))
- unmask_IO_APIC_irq_desc(desc);
+ unmask_ioapic(cfg);
}
if (timer_irq_works()) {
if (nmi_watchdog == NMI_IO_APIC) {
setup_nmi();
- legacy_pic->chip->unmask(0);
+ legacy_pic->unmask(0);
}
if (disable_timer_pin_1 > 0)
clear_IO_APIC_pin(0, pin1);
@@ -3123,14 +2932,14 @@ static inline void __init check_timer(vo
*/
replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2);
setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
- legacy_pic->chip->unmask(0);
+ legacy_pic->unmask(0);
if (timer_irq_works()) {
apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
timer_through_8259 = 1;
if (nmi_watchdog == NMI_IO_APIC) {
- legacy_pic->chip->mask(0);
+ legacy_pic->mask(0);
setup_nmi();
- legacy_pic->chip->unmask(0);
+ legacy_pic->unmask(0);
}
goto out;
}
@@ -3138,7 +2947,7 @@ static inline void __init check_timer(vo
* Cleanup, just in case ...
*/
local_irq_disable();
- legacy_pic->chip->mask(0);
+ legacy_pic->mask(0);
clear_IO_APIC_pin(apic2, pin2);
apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
}
@@ -3155,16 +2964,16 @@ static inline void __init check_timer(vo
apic_printk(APIC_QUIET, KERN_INFO
"...trying to set up timer as Virtual Wire IRQ...\n");
- lapic_register_intr(0, desc);
+ lapic_register_intr(0);
apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */
- legacy_pic->chip->unmask(0);
+ legacy_pic->unmask(0);
if (timer_irq_works()) {
apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
goto out;
}
local_irq_disable();
- legacy_pic->chip->mask(0);
+ legacy_pic->mask(0);
apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
@@ -3344,49 +3153,42 @@ device_initcall(ioapic_init_sysfs);
/*
* Dynamic irq allocate and deallocation
*/
-unsigned int create_irq_nr(unsigned int irq_want, int node)
+unsigned int create_irq_nr(unsigned int from, int node)
{
- /* Allocate an unused irq */
- unsigned int irq;
- unsigned int new;
+ struct irq_cfg *cfg;
unsigned long flags;
- struct irq_cfg *cfg_new = NULL;
- struct irq_desc *desc_new = NULL;
-
- irq = 0;
- if (irq_want < nr_irqs_gsi)
- irq_want = nr_irqs_gsi;
-
- raw_spin_lock_irqsave(&vector_lock, flags);
- for (new = irq_want; new < nr_irqs; new++) {
- desc_new = irq_to_desc_alloc_node(new, node);
- if (!desc_new) {
- printk(KERN_INFO "can not get irq_desc for %d\n", new);
- continue;
- }
- cfg_new = desc_new->chip_data;
-
- if (cfg_new->vector != 0)
- continue;
+ unsigned int ret = 0;
+ int irq;
- desc_new = move_irq_desc(desc_new, node);
- cfg_new = desc_new->chip_data;
+ if (from < nr_irqs_gsi)
+ from = nr_irqs_gsi;
- if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0)
- irq = new;
- break;
+ irq = alloc_irq_from(from, node);
+ if (irq < 0)
+ return 0;
+ cfg = alloc_irq_cfg(irq, node);
+ if (!cfg) {
+ free_irq_at(irq, NULL);
+ return 0;
}
- raw_spin_unlock_irqrestore(&vector_lock, flags);
- if (irq > 0)
- dynamic_irq_init_keep_chip_data(irq);
+ raw_spin_lock_irqsave(&vector_lock, flags);
+ if (!__assign_irq_vector(irq, cfg, apic->target_cpus()))
+ ret = irq;
+ raw_spin_unlock_irqrestore(&vector_lock, flags);
- return irq;
+ if (ret) {
+ set_irq_chip_data(irq, cfg);
+ irq_clear_status_flags(irq, IRQ_NOREQUEST);
+ } else {
+ free_irq_at(irq, cfg);
+ }
+ return ret;
}
int create_irq(void)
{
- int node = cpu_to_node(boot_cpu_id);
+ int node = cpu_to_node(0);
unsigned int irq_want;
int irq;
@@ -3401,14 +3203,17 @@ int create_irq(void)
void destroy_irq(unsigned int irq)
{
+ struct irq_cfg *cfg = get_irq_chip_data(irq);
unsigned long flags;
- dynamic_irq_cleanup_keep_chip_data(irq);
+ irq_set_status_flags(irq, IRQ_NOREQUEST|IRQ_NOPROBE);
- free_irte(irq);
+ if (irq_remapped(cfg))
+ free_irte(irq);
raw_spin_lock_irqsave(&vector_lock, flags);
- __clear_irq_vector(irq, get_irq_chip_data(irq));
+ __clear_irq_vector(irq, cfg);
raw_spin_unlock_irqrestore(&vector_lock, flags);
+ free_irq_at(irq, cfg);
}
#endif /* !CONFIG_XEN */
@@ -3433,7 +3238,7 @@ static int msi_compose_msg(struct pci_de
dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
- if (irq_remapped(irq)) {
+ if (irq_remapped(get_irq_chip_data(irq))) {
struct irte irte;
int ir_index;
u16 sub_handle;
@@ -3441,14 +3246,7 @@ static int msi_compose_msg(struct pci_de
ir_index = map_irq_to_irte_handle(irq, &sub_handle);
BUG_ON(ir_index == -1);
- memset (&irte, 0, sizeof(irte));
-
- irte.present = 1;
- irte.dst_mode = apic->irq_dest_mode;
- irte.trigger_mode = 0; /* edge */
- irte.dlvry_mode = apic->irq_delivery_mode;
- irte.vector = cfg->vector;
- irte.dest_id = IRTE_DEST(dest);
+ prepare_irte(&irte, cfg->vector, dest);
/* Set source-id of interrupt request */
if (pdev)
@@ -3493,26 +3291,24 @@ static int msi_compose_msg(struct pci_de
}
#ifdef CONFIG_SMP
-static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
+static int
+msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
{
- struct irq_desc *desc = irq_to_desc(irq);
- struct irq_cfg *cfg;
+ struct irq_cfg *cfg = data->chip_data;
struct msi_msg msg;
unsigned int dest;
- if (set_desc_affinity(desc, mask, &dest))
+ if (__ioapic_set_affinity(data, mask, &dest))
return -1;
- cfg = desc->chip_data;
-
- get_cached_msi_msg_desc(desc, &msg);
+ __get_cached_msi_msg(data->msi_desc, &msg);
msg.data &= ~MSI_DATA_VECTOR_MASK;
msg.data |= MSI_DATA_VECTOR(cfg->vector);
msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
msg.address_lo |= MSI_ADDR_DEST_ID(dest);
- write_msi_msg_desc(desc, &msg);
+ __write_msi_msg(data->msi_desc, &msg);
return 0;
}
@@ -3522,17 +3318,17 @@ static int set_msi_irq_affinity(unsigned
* done in the process context using interrupt-remapping hardware.
*/
static int
-ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
+ir_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
+ bool force)
{
- struct irq_desc *desc = irq_to_desc(irq);
- struct irq_cfg *cfg = desc->chip_data;
- unsigned int dest;
+ struct irq_cfg *cfg = data->chip_data;
+ unsigned int dest, irq = data->irq;
struct irte irte;
if (get_irte(irq, &irte))
return -1;
- if (set_desc_affinity(desc, mask, &dest))
+ if (__ioapic_set_affinity(data, mask, &dest))
return -1;
irte.vector = cfg->vector;
@@ -3562,27 +3358,27 @@ ir_set_msi_irq_affinity(unsigned int irq
* which implement the MSI or MSI-X Capability Structure.
*/
static struct irq_chip msi_chip = {
- .name = "PCI-MSI",
- .unmask = unmask_msi_irq,
- .mask = mask_msi_irq,
- .ack = ack_apic_edge,
+ .name = "PCI-MSI",
+ .irq_unmask = unmask_msi_irq,
+ .irq_mask = mask_msi_irq,
+ .irq_ack = ack_apic_edge,
#ifdef CONFIG_SMP
- .set_affinity = set_msi_irq_affinity,
+ .irq_set_affinity = msi_set_affinity,
#endif
- .retrigger = ioapic_retrigger_irq,
+ .irq_retrigger = ioapic_retrigger_irq,
};
static struct irq_chip msi_ir_chip = {
- .name = "IR-PCI-MSI",
- .unmask = unmask_msi_irq,
- .mask = mask_msi_irq,
+ .name = "IR-PCI-MSI",
+ .irq_unmask = unmask_msi_irq,
+ .irq_mask = mask_msi_irq,
#ifdef CONFIG_INTR_REMAP
- .ack = ir_ack_apic_edge,
+ .irq_ack = ir_ack_apic_edge,
#ifdef CONFIG_SMP
- .set_affinity = ir_set_msi_irq_affinity,
+ .irq_set_affinity = ir_msi_set_affinity,
#endif
#endif
- .retrigger = ioapic_retrigger_irq,
+ .irq_retrigger = ioapic_retrigger_irq,
};
/*
@@ -3614,8 +3410,8 @@ static int msi_alloc_irte(struct pci_dev
static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
{
- int ret;
struct msi_msg msg;
+ int ret;
ret = msi_compose_msg(dev, irq, &msg, -1);
if (ret < 0)
@@ -3624,12 +3420,8 @@ static int setup_msi_irq(struct pci_dev
set_irq_msi(irq, msidesc);
write_msi_msg(irq, &msg);
- if (irq_remapped(irq)) {
- struct irq_desc *desc = irq_to_desc(irq);
- /*
- * irq migration in process context
- */
- desc->status |= IRQ_MOVE_PCNTXT;
+ if (irq_remapped(get_irq_chip_data(irq))) {
+ irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
} else
set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
@@ -3639,15 +3431,12 @@ static int setup_msi_irq(struct pci_dev
return 0;
}
-int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
{
- unsigned int irq;
- int ret, sub_handle;
+ int node, ret, sub_handle, index = 0;
+ unsigned int irq, irq_want;
struct msi_desc *msidesc;
- unsigned int irq_want;
struct intel_iommu *iommu = NULL;
- int index = 0;
- int node;
/* x86 doesn't support multiple MSI yet */
if (type == PCI_CAP_ID_MSI && nvec > 1)
@@ -3700,31 +3489,31 @@ error:
return ret;
}
-void arch_teardown_msi_irq(unsigned int irq)
+void native_teardown_msi_irq(unsigned int irq)
{
destroy_irq(irq);
}
#if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP)
#ifdef CONFIG_SMP
-static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
+static int
+dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
+ bool force)
{
- struct irq_desc *desc = irq_to_desc(irq);
- struct irq_cfg *cfg;
+ struct irq_cfg *cfg = data->chip_data;
+ unsigned int dest, irq = data->irq;
struct msi_msg msg;
- unsigned int dest;
- if (set_desc_affinity(desc, mask, &dest))
+ if (__ioapic_set_affinity(data, mask, &dest))
return -1;
- cfg = desc->chip_data;
-
dmar_msi_read(irq, &msg);
msg.data &= ~MSI_DATA_VECTOR_MASK;
msg.data |= MSI_DATA_VECTOR(cfg->vector);
msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+ msg.address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(dest);
dmar_msi_write(irq, &msg);
@@ -3734,14 +3523,14 @@ static int dmar_msi_set_affinity(unsigne
#endif /* CONFIG_SMP */
static struct irq_chip dmar_msi_type = {
- .name = "DMAR_MSI",
- .unmask = dmar_msi_unmask,
- .mask = dmar_msi_mask,
- .ack = ack_apic_edge,
+ .name = "DMAR_MSI",
+ .irq_unmask = dmar_msi_unmask,
+ .irq_mask = dmar_msi_mask,
+ .irq_ack = ack_apic_edge,
#ifdef CONFIG_SMP
- .set_affinity = dmar_msi_set_affinity,
+ .irq_set_affinity = dmar_msi_set_affinity,
#endif
- .retrigger = ioapic_retrigger_irq,
+ .irq_retrigger = ioapic_retrigger_irq,
};
int arch_setup_dmar_msi(unsigned int irq)
@@ -3762,26 +3551,24 @@ int arch_setup_dmar_msi(unsigned int irq
#ifdef CONFIG_HPET_TIMER
#ifdef CONFIG_SMP
-static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
+static int hpet_msi_set_affinity(struct irq_data *data,
+ const struct cpumask *mask, bool force)
{
- struct irq_desc *desc = irq_to_desc(irq);
- struct irq_cfg *cfg;
+ struct irq_cfg *cfg = data->chip_data;
struct msi_msg msg;
unsigned int dest;
- if (set_desc_affinity(desc, mask, &dest))
+ if (__ioapic_set_affinity(data, mask, &dest))
return -1;
- cfg = desc->chip_data;
-
- hpet_msi_read(irq, &msg);
+ hpet_msi_read(data->handler_data, &msg);
msg.data &= ~MSI_DATA_VECTOR_MASK;
msg.data |= MSI_DATA_VECTOR(cfg->vector);
msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
msg.address_lo |= MSI_ADDR_DEST_ID(dest);
- hpet_msi_write(irq, &msg);
+ hpet_msi_write(data->handler_data, &msg);
return 0;
}
@@ -3789,34 +3576,33 @@ static int hpet_msi_set_affinity(unsigne
#endif /* CONFIG_SMP */
static struct irq_chip ir_hpet_msi_type = {
- .name = "IR-HPET_MSI",
- .unmask = hpet_msi_unmask,
- .mask = hpet_msi_mask,
+ .name = "IR-HPET_MSI",
+ .irq_unmask = hpet_msi_unmask,
+ .irq_mask = hpet_msi_mask,
#ifdef CONFIG_INTR_REMAP
- .ack = ir_ack_apic_edge,
+ .irq_ack = ir_ack_apic_edge,
#ifdef CONFIG_SMP
- .set_affinity = ir_set_msi_irq_affinity,
+ .irq_set_affinity = ir_msi_set_affinity,
#endif
#endif
- .retrigger = ioapic_retrigger_irq,
+ .irq_retrigger = ioapic_retrigger_irq,
};
static struct irq_chip hpet_msi_type = {
.name = "HPET_MSI",
- .unmask = hpet_msi_unmask,
- .mask = hpet_msi_mask,
- .ack = ack_apic_edge,
+ .irq_unmask = hpet_msi_unmask,
+ .irq_mask = hpet_msi_mask,
+ .irq_ack = ack_apic_edge,
#ifdef CONFIG_SMP
- .set_affinity = hpet_msi_set_affinity,
+ .irq_set_affinity = hpet_msi_set_affinity,
#endif
- .retrigger = ioapic_retrigger_irq,
+ .irq_retrigger = ioapic_retrigger_irq,
};
int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
{
- int ret;
struct msi_msg msg;
- struct irq_desc *desc = irq_to_desc(irq);
+ int ret;
if (intr_remapping_enabled) {
struct intel_iommu *iommu = map_hpet_to_ir(id);
@@ -3834,9 +3620,9 @@ int arch_setup_hpet_msi(unsigned int irq
if (ret < 0)
return ret;
- hpet_msi_write(irq, &msg);
- desc->status |= IRQ_MOVE_PCNTXT;
- if (irq_remapped(irq))
+ hpet_msi_write(get_irq_data(irq), &msg);
+ irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
+ if (irq_remapped(get_irq_chip_data(irq)))
set_irq_chip_and_handler_name(irq, &ir_hpet_msi_type,
handle_edge_irq, "edge");
else
@@ -3869,33 +3655,30 @@ static void target_ht_irq(unsigned int i
write_ht_irq_msg(irq, &msg);
}
-static int set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
+static int
+ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
{
- struct irq_desc *desc = irq_to_desc(irq);
- struct irq_cfg *cfg;
+ struct irq_cfg *cfg = data->chip_data;
unsigned int dest;
- if (set_desc_affinity(desc, mask, &dest))
+ if (__ioapic_set_affinity(data, mask, &dest))
return -1;
- cfg = desc->chip_data;
-
- target_ht_irq(irq, dest, cfg->vector);
-
+ target_ht_irq(data->irq, dest, cfg->vector);
return 0;
}
#endif
static struct irq_chip ht_irq_chip = {
- .name = "PCI-HT",
- .mask = mask_ht_irq,
- .unmask = unmask_ht_irq,
- .ack = ack_apic_edge,
+ .name = "PCI-HT",
+ .irq_mask = mask_ht_irq,
+ .irq_unmask = unmask_ht_irq,
+ .irq_ack = ack_apic_edge,
#ifdef CONFIG_SMP
- .set_affinity = set_ht_irq_affinity,
+ .irq_set_affinity = ht_set_affinity,
#endif
- .retrigger = ioapic_retrigger_irq,
+ .irq_retrigger = ioapic_retrigger_irq,
};
int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
@@ -3969,6 +3752,11 @@ void __init probe_nr_irqs_gsi(void)
printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi);
}
+int get_nr_irqs_gsi(void)
+{
+ return nr_irqs_gsi;
+}
+
#ifdef CONFIG_SPARSE_IRQ
int __init arch_probe_nr_irqs(void)
{
@@ -3987,7 +3775,7 @@ int __init arch_probe_nr_irqs(void)
if (nr < nr_irqs)
nr_irqs = nr;
- return 0;
+ return NR_IRQS_LEGACY;
}
#endif
#endif /* CONFIG_XEN */
@@ -3995,7 +3783,6 @@ int __init arch_probe_nr_irqs(void)
static int __io_apic_set_pci_routing(struct device *dev, int irq,
struct io_apic_irq_attr *irq_attr)
{
- struct irq_desc *desc;
struct irq_cfg *cfg;
int node;
int ioapic, pin;
@@ -4018,13 +3805,11 @@ static int __io_apic_set_pci_routing(str
if (dev)
node = dev_to_node(dev);
else
- node = cpu_to_node(boot_cpu_id);
+ node = cpu_to_node(0);
- desc = irq_to_desc_alloc_node(irq, node);
- if (!desc) {
- printk(KERN_INFO "can not get irq_desc %d\n", irq);
+ cfg = alloc_irq_and_cfg_at(irq, node);
+ if (!cfg)
return 0;
- }
pin = irq_attr->ioapic_pin;
trigger = irq_attr->trigger;
@@ -4034,15 +3819,14 @@ static int __io_apic_set_pci_routing(str
* IRQs < 16 are already in the irq_2_pin[] map
*/
if (irq >= legacy_pic->nr_legacy_irqs) {
- cfg = desc->chip_data;
- if (add_pin_to_irq_node_nopanic(cfg, node, ioapic, pin)) {
+ if (__add_pin_to_irq_node(cfg, node, ioapic, pin)) {
printk(KERN_INFO "can not add pin %d for irq %d\n",
pin, irq);
return 0;
}
}
- setup_IO_APIC_irq(ioapic, pin, irq, desc, trigger, polarity);
+ setup_ioapic_irq(ioapic, pin, irq, cfg, trigger, polarity);
return 0;
}
@@ -4238,14 +4022,14 @@ void __init setup_ioapic_dest(void)
*/
if (desc->status &
(IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
- mask = desc->affinity;
+ mask = desc->irq_data.affinity;
else
mask = apic->target_cpus();
if (intr_remapping_enabled)
- set_ir_ioapic_affinity_irq_desc(desc, mask);
+ ir_ioapic_set_affinity(&desc->irq_data, mask, false);
else
- set_ioapic_affinity_irq_desc(desc, mask);
+ ioapic_set_affinity(&desc->irq_data, mask, false);
}
}
@@ -4433,20 +4217,19 @@ void __init mp_register_ioapic(int id, u
void __init pre_init_apic_IRQ0(void)
{
struct irq_cfg *cfg;
- struct irq_desc *desc;
printk(KERN_INFO "Early APIC setup for system timer0\n");
#ifndef CONFIG_SMP
phys_cpu_present_map = physid_mask_of_physid(boot_cpu_physical_apicid);
#endif
- desc = irq_to_desc_alloc_node(0, 0);
+ /* Make sure the irq descriptor is set up */
+ cfg = alloc_irq_and_cfg_at(0, 0);
setup_local_APIC();
- cfg = irq_cfg(0);
add_pin_to_irq_node(cfg, 0, 0, 0);
set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
- setup_IO_APIC_irq(0, 0, 0, desc, 0, 0);
+ setup_ioapic_irq(0, 0, 0, cfg, 0, 0);
}
#endif
--- head-2011-03-17.orig/arch/x86/kernel/cpu/common-xen.c 2011-03-17 14:43:00.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/cpu/common-xen.c 2011-03-17 14:43:08.000000000 +0100
@@ -696,7 +696,7 @@ static void __init early_identify_cpu(st
this_cpu->c_early_init(c);
#ifdef CONFIG_SMP
- c->cpu_index = boot_cpu_id;
+ c->cpu_index = 0;
#endif
filter_cpuid_features(c, false);
}
@@ -735,16 +735,21 @@ void __init early_cpu_init(void)
}
/*
- * The NOPL instruction is supposed to exist on all CPUs with
- * family >= 6; unfortunately, that's not true in practice because
- * of early VIA chips and (more importantly) broken virtualizers that
- * are not easy to detect. In the latter case it doesn't even *fail*
- * reliably, so probing for it doesn't even work. Disable it completely
+ * The NOPL instruction is supposed to exist on all CPUs of family >= 6;
+ * unfortunately, that's not true in practice because of early VIA
+ * chips and (more importantly) broken virtualizers that are not easy
+ * to detect. In the latter case it doesn't even *fail* reliably, so
+ * probing for it doesn't even work. Disable it completely on 32-bit
* unless we can find a reliable way to detect all the broken cases.
+ * Enable it explicitly on 64-bit for non-constant inputs of cpu_has().
*/
static void __cpuinit detect_nopl(struct cpuinfo_x86 *c)
{
+#ifdef CONFIG_X86_32
clear_cpu_cap(c, X86_FEATURE_NOPL);
+#else
+ set_cpu_cap(c, X86_FEATURE_NOPL);
+#endif
}
static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
@@ -1355,13 +1360,6 @@ void __cpuinit cpu_init(void)
clear_all_debug_regs();
dbg_restore_debug_regs();
- /*
- * Force FPU initialization:
- */
- current_thread_info()->status = 0;
- clear_used_math();
- mxcsr_feature_mask_init();
-
fpu_init();
xsave_init();
}
--- head-2011-03-17.orig/arch/x86/kernel/e820-xen.c 2011-02-01 15:03:10.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/e820-xen.c 2011-02-01 15:09:47.000000000 +0100
@@ -15,6 +15,7 @@
#include <linux/pfn.h>
#include <linux/suspend.h>
#include <linux/firmware-map.h>
+#include <linux/memblock.h>
#include <asm/e820.h>
#include <asm/proto.h>
@@ -786,73 +787,7 @@ core_initcall(e820_mark_nvs_memory);
#endif
/*
- * Find a free area with specified alignment in a specific range.
- */
-u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align)
-{
- int i;
-
- for (i = 0; i < e820.nr_map; i++) {
- struct e820entry *ei = &e820.map[i];
- u64 addr;
- u64 ei_start, ei_last;
-
- if (ei->type != E820_RAM)
- continue;
-
- ei_last = ei->addr + ei->size;
- ei_start = ei->addr;
- addr = find_early_area(ei_start, ei_last, start, end,
- size, align);
-
- if (addr != -1ULL)
- return addr;
- }
- return -1ULL;
-}
-
-u64 __init find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align)
-{
- return find_e820_area(start, end, size, align);
-}
-
-u64 __init get_max_mapped(void)
-{
- u64 end = max_pfn_mapped;
-
- end <<= PAGE_SHIFT;
-
- return end;
-}
-/*
- * Find next free range after *start
- */
-u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align)
-{
- int i;
-
- for (i = 0; i < e820.nr_map; i++) {
- struct e820entry *ei = &e820.map[i];
- u64 addr;
- u64 ei_start, ei_last;
-
- if (ei->type != E820_RAM)
- continue;
-
- ei_last = ei->addr + ei->size;
- ei_start = ei->addr;
- addr = find_early_area_size(ei_start, ei_last, start,
- sizep, align);
-
- if (addr != -1ULL)
- return addr;
- }
-
- return -1ULL;
-}
-
-/*
- * pre allocated 4k and reserved it in e820
+ * pre allocated 4k and reserved it in memblock and e820_saved
*/
u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align)
{
@@ -869,8 +804,8 @@ u64 __init early_reserve_e820(u64 startt
}
#endif
for (start = startt; ; start += size) {
- start = find_e820_area_size(start, &size, align);
- if (!(start + 1))
+ start = memblock_x86_find_in_range_size(start, &size, align);
+ if (start == MEMBLOCK_ERROR)
return 0;
if (size >= sizet)
break;
@@ -924,10 +859,9 @@ u64 __init early_reserve_e820(u64 startt
return 0;
}
#endif
- e820_update_range(addr, sizet, E820_RAM, E820_RESERVED);
+ memblock_x86_reserve_range(addr, addr + sizet, "new next");
e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED);
- printk(KERN_INFO "update e820 for early_reserve_e820\n");
- update_e820();
+ printk(KERN_INFO "update e820_saved for early_reserve_e820\n");
update_e820_saved();
return addr;
@@ -989,83 +923,6 @@ unsigned long __init e820_end_of_low_ram
{
return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM);
}
-/*
- * Finds an active region in the address range from start_pfn to last_pfn and
- * returns its range in ei_startpfn and ei_endpfn for the e820 entry.
- */
-int __init e820_find_active_region(const struct e820entry *ei,
- unsigned long start_pfn,
- unsigned long last_pfn,
- unsigned long *ei_startpfn,
- unsigned long *ei_endpfn)
-{
- u64 align = PAGE_SIZE;
-
-#ifdef CONFIG_XEN
- if (last_pfn > xen_start_info->nr_pages)
- last_pfn = xen_start_info->nr_pages;
-#endif
-
- *ei_startpfn = round_up(ei->addr, align) >> PAGE_SHIFT;
- *ei_endpfn = round_down(ei->addr + ei->size, align) >> PAGE_SHIFT;
-
- /* Skip map entries smaller than a page */
- if (*ei_startpfn >= *ei_endpfn)
- return 0;
-
- /* Skip if map is outside the node */
- if (ei->type != E820_RAM || *ei_endpfn <= start_pfn ||
- *ei_startpfn >= last_pfn)
- return 0;
-
- /* Check for overlaps */
- if (*ei_startpfn < start_pfn)
- *ei_startpfn = start_pfn;
- if (*ei_endpfn > last_pfn)
- *ei_endpfn = last_pfn;
-
- return 1;
-}
-
-/* Walk the e820 map and register active regions within a node */
-void __init e820_register_active_regions(int nid, unsigned long start_pfn,
- unsigned long last_pfn)
-{
- unsigned long ei_startpfn;
- unsigned long ei_endpfn;
- int i;
-
- for (i = 0; i < e820.nr_map; i++)
- if (e820_find_active_region(&e820.map[i],
- start_pfn, last_pfn,
- &ei_startpfn, &ei_endpfn))
- add_active_range(nid, ei_startpfn, ei_endpfn);
-#ifdef CONFIG_XEN
- BUG_ON(nid);
- add_active_range(nid, last_pfn, last_pfn);
-#endif
-}
-
-/*
- * Find the hole size (in bytes) in the memory range.
- * @start: starting address of the memory range to scan
- * @end: ending address of the memory range to scan
- */
-u64 __init e820_hole_size(u64 start, u64 end)
-{
- unsigned long start_pfn = start >> PAGE_SHIFT;
- unsigned long last_pfn = end >> PAGE_SHIFT;
- unsigned long ei_startpfn, ei_endpfn, ram = 0;
- int i;
-
- for (i = 0; i < e820.nr_map; i++) {
- if (e820_find_active_region(&e820.map[i],
- start_pfn, last_pfn,
- &ei_startpfn, &ei_endpfn))
- ram += ei_endpfn - ei_startpfn;
- }
- return end - start - ((u64)ram << PAGE_SHIFT);
-}
static void early_panic(char *msg)
{
@@ -1344,3 +1201,48 @@ void __init setup_memory_map(void)
printk(KERN_INFO "Xen-provided physical RAM map:\n");
_e820_print_map(&e820, who);
}
+
+void __init memblock_x86_fill(void)
+{
+ int i;
+ u64 end;
+
+ /*
+ * EFI may have more than 128 entries
+ * We are safe to enable resizing, beause memblock_x86_fill()
+ * is rather later for x86
+ */
+ memblock_can_resize = 1;
+
+ for (i = 0; i < e820.nr_map; i++) {
+ struct e820entry *ei = &e820.map[i];
+
+ end = ei->addr + ei->size;
+ if (end != (resource_size_t)end)
+ continue;
+
+ if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN)
+ continue;
+
+ memblock_add(ei->addr, ei->size);
+ }
+
+ memblock_analyze();
+ memblock_dump_all();
+}
+
+void __init memblock_find_dma_reserve(void)
+{
+#ifdef CONFIG_X86_64
+ u64 free_size_pfn;
+ u64 mem_size_pfn;
+ /*
+ * need to find out used area below MAX_DMA_PFN
+ * need to use memblock to get free size in [0, MAX_DMA_PFN]
+ * at first, and assume boot_mem will not take below MAX_DMA_PFN
+ */
+ mem_size_pfn = memblock_x86_memory_in_range(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT;
+ free_size_pfn = memblock_x86_free_memory_in_range(0, MAX_DMA_PFN << PAGE_SHIFT) >> PAGE_SHIFT;
+ set_dma_reserve(mem_size_pfn - free_size_pfn);
+#endif
+}
--- head-2011-03-17.orig/arch/x86/kernel/early_printk-xen.c 2011-02-01 15:03:10.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/early_printk-xen.c 2011-02-01 15:09:47.000000000 +0100
@@ -13,6 +13,7 @@
#include <asm/setup.h>
#include <asm/pci-direct.h>
#include <asm/fixmap.h>
+#include <asm/mrst.h>
#include <asm/pgtable.h>
#include <linux/usb/ehci_def.h>
@@ -271,6 +272,18 @@ static int __init setup_early_printk(cha
if (!strncmp(buf, "xen", 3))
early_console_register(&xenboot_console, keep);
#endif
+#ifdef CONFIG_X86_MRST_EARLY_PRINTK
+ if (!strncmp(buf, "mrst", 4)) {
+ mrst_early_console_init();
+ early_console_register(&early_mrst_console, keep);
+ }
+
+ if (!strncmp(buf, "hsu", 3)) {
+ hsu_early_console_init();
+ early_console_register(&early_hsu_console, keep);
+ }
+
+#endif
buf++;
}
return 0;
--- head-2011-03-17.orig/arch/x86/kernel/entry_32-xen.S 2011-02-01 15:04:27.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/entry_32-xen.S 2011-02-01 15:09:47.000000000 +0100
@@ -119,8 +119,7 @@ NMI_MASK = 0x80000000
/* unfortunately push/pop can't be no-op */
.macro PUSH_GS
- pushl $0
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $0
.endm
.macro POP_GS pop=0
addl $(4 + \pop), %esp
@@ -144,14 +143,12 @@ NMI_MASK = 0x80000000
#else /* CONFIG_X86_32_LAZY_GS */
.macro PUSH_GS
- pushl %gs
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %gs
/*CFI_REL_OFFSET gs, 0*/
.endm
.macro POP_GS pop=0
-98: popl %gs
- CFI_ADJUST_CFA_OFFSET -4
+98: popl_cfi %gs
/*CFI_RESTORE gs*/
.if \pop <> 0
add $\pop, %esp
@@ -199,35 +196,25 @@ NMI_MASK = 0x80000000
.macro SAVE_ALL
cld
PUSH_GS
- pushl %fs
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %fs
/*CFI_REL_OFFSET fs, 0;*/
- pushl %es
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %es
/*CFI_REL_OFFSET es, 0;*/
- pushl %ds
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ds
/*CFI_REL_OFFSET ds, 0;*/
- pushl %eax
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %eax
CFI_REL_OFFSET eax, 0
- pushl %ebp
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ebp
CFI_REL_OFFSET ebp, 0
- pushl %edi
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %edi
CFI_REL_OFFSET edi, 0
- pushl %esi
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %esi
CFI_REL_OFFSET esi, 0
- pushl %edx
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %edx
CFI_REL_OFFSET edx, 0
- pushl %ecx
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ecx
CFI_REL_OFFSET ecx, 0
- pushl %ebx
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ebx
CFI_REL_OFFSET ebx, 0
movl $(__USER_DS), %edx
movl %edx, %ds
@@ -238,39 +225,29 @@ NMI_MASK = 0x80000000
.endm
.macro RESTORE_INT_REGS
- popl %ebx
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %ebx
CFI_RESTORE ebx
- popl %ecx
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %ecx
CFI_RESTORE ecx
- popl %edx
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %edx
CFI_RESTORE edx
- popl %esi
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %esi
CFI_RESTORE esi
- popl %edi
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %edi
CFI_RESTORE edi
- popl %ebp
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %ebp
CFI_RESTORE ebp
- popl %eax
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %eax
CFI_RESTORE eax
.endm
.macro RESTORE_REGS pop=0
RESTORE_INT_REGS
-1: popl %ds
- CFI_ADJUST_CFA_OFFSET -4
+1: popl_cfi %ds
/*CFI_RESTORE ds;*/
-2: popl %es
- CFI_ADJUST_CFA_OFFSET -4
+2: popl_cfi %es
/*CFI_RESTORE es;*/
-3: popl %fs
- CFI_ADJUST_CFA_OFFSET -4
+3: popl_cfi %fs
/*CFI_RESTORE fs;*/
POP_GS \pop
.pushsection .fixup, "ax"
@@ -324,16 +301,12 @@ NMI_MASK = 0x80000000
ENTRY(ret_from_fork)
CFI_STARTPROC
- pushl %eax
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %eax
call schedule_tail
GET_THREAD_INFO(%ebp)
- popl %eax
- CFI_ADJUST_CFA_OFFSET -4
- pushl $0x0202 # Reset kernel eflags
- CFI_ADJUST_CFA_OFFSET 4
- popfl
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %eax
+ pushl_cfi $0x0202 # Reset kernel eflags
+ popfl_cfi
jmp syscall_exit
CFI_ENDPROC
END(ret_from_fork)
@@ -413,29 +386,23 @@ sysenter_past_esp:
* enough kernel state to call TRACE_IRQS_OFF can be called - but
* we immediately enable interrupts at that point anyway.
*/
- pushl $(__USER_DS)
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $__USER_DS
/*CFI_REL_OFFSET ss, 0*/
- pushl %ebp
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ebp
CFI_REL_OFFSET esp, 0
- pushfl
+ pushfl_cfi
orl $X86_EFLAGS_IF, (%esp)
- CFI_ADJUST_CFA_OFFSET 4
- pushl $(__USER_CS)
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $__USER_CS
/*CFI_REL_OFFSET cs, 0*/
/*
* Push current_thread_info()->sysenter_return to the stack.
* A tiny bit of offset fixup is necessary - 4*4 means the 4 words
* pushed above; +8 corresponds to copy_thread's esp0 setting.
*/
- pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi ((TI_sysenter_return)-THREAD_SIZE_asm+8+4*4)(%esp)
CFI_REL_OFFSET eip, 0
- pushl %eax
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %eax
SAVE_ALL
ENABLE_INTERRUPTS(CLBR_NONE)
@@ -490,8 +457,7 @@ sysenter_audit:
movl %eax,%edx /* 2nd arg: syscall number */
movl $AUDIT_ARCH_I386,%eax /* 1st arg: audit arch */
call audit_syscall_entry
- pushl %ebx
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ebx
movl PT_EAX(%esp),%eax /* reload syscall number */
jmp sysenter_do_call
@@ -535,8 +501,7 @@ ENTRY(ia32pv_sysenter_target)
addl $4,%esp
CFI_ADJUST_CFA_OFFSET -4
/* +5*4 is SS:ESP,EFLAGS,CS:EIP. +8 is esp0 setting. */
- pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
/*
* Load the potential sixth argument from user stack.
* Careful about security.
@@ -559,8 +524,7 @@ ENDPROC(ia32pv_sysenter_target)
# system call handler stub
ENTRY(system_call)
RING0_INT_FRAME # can't unwind into user space anyway
- pushl %eax # save orig_eax
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %eax # save orig_eax
SAVE_ALL
GET_THREAD_INFO(%ebp)
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
@@ -609,7 +573,6 @@ restore_nocheck:
jnz restore_all_enable_events # != 0 => enable event delivery
#endif
RESTORE_REGS 4 # skip orig_eax/error_code
- CFI_ADJUST_CFA_OFFSET -4
irq_return:
INTERRUPT_RETURN
.section .fixup,"ax"
@@ -663,10 +626,8 @@ ldt_ss:
shr $16, %edx
mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */
mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */
- pushl $__ESPFIX_SS
- CFI_ADJUST_CFA_OFFSET 4
- push %eax /* new kernel esp */
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $__ESPFIX_SS
+ pushl_cfi %eax /* new kernel esp */
/* Disable interrupts, but do not irqtrace this section: we
* will soon execute iret and the tracer was already set to
* the irqstate after the iret */
@@ -735,11 +696,9 @@ work_notifysig: # deal with pending s
ALIGN
work_notifysig_v86:
- pushl %ecx # save ti_flags for do_notify_resume
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ecx # save ti_flags for do_notify_resume
call save_v86_state # %eax contains pt_regs pointer
- popl %ecx
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %ecx
movl %eax, %esp
#else
movl %esp, %eax
@@ -819,14 +778,18 @@ ptregs_##name: \
#define PTREGSCALL3(name) \
ALIGN; \
ptregs_##name: \
+ CFI_STARTPROC; \
leal 4(%esp),%eax; \
- pushl %eax; \
+ pushl_cfi %eax; \
movl PT_EDX(%eax),%ecx; \
movl PT_ECX(%eax),%edx; \
movl PT_EBX(%eax),%eax; \
call sys_##name; \
addl $4,%esp; \
- ret
+ CFI_ADJUST_CFA_OFFSET -4; \
+ ret; \
+ CFI_ENDPROC; \
+ENDPROC(ptregs_##name)
PTREGSCALL1(iopl)
PTREGSCALL0(fork)
@@ -841,15 +804,19 @@ PTREGSCALL1(vm86old)
/* Clone is an oddball. The 4th arg is in %edi */
ALIGN;
ptregs_clone:
+ CFI_STARTPROC
leal 4(%esp),%eax
- pushl %eax
- pushl PT_EDI(%eax)
+ pushl_cfi %eax
+ pushl_cfi PT_EDI(%eax)
movl PT_EDX(%eax),%ecx
movl PT_ECX(%eax),%edx
movl PT_EBX(%eax),%eax
call sys_clone
addl $8,%esp
+ CFI_ADJUST_CFA_OFFSET -8
ret
+ CFI_ENDPROC
+ENDPROC(ptregs_clone)
#ifndef CONFIG_XEN
.macro FIXUP_ESPFIX_STACK
@@ -865,10 +832,8 @@ ptregs_clone:
mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */
shl $16, %eax
addl %esp, %eax /* the adjusted stack pointer */
- pushl $__KERNEL_DS
- CFI_ADJUST_CFA_OFFSET 4
- pushl %eax
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $__KERNEL_DS
+ pushl_cfi %eax
lss (%esp), %esp /* switch to the normal stack segment */
CFI_ADJUST_CFA_OFFSET -8
.endm
@@ -905,8 +870,7 @@ vector=FIRST_EXTERNAL_VECTOR
.if vector <> FIRST_EXTERNAL_VECTOR
CFI_ADJUST_CFA_OFFSET -4
.endif
-1: pushl $(~vector+0x80) /* Note: always in signed byte range */
- CFI_ADJUST_CFA_OFFSET 4
+1: pushl_cfi $(~vector+0x80) /* Note: always in signed byte range */
.if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
jmp 2f
.endif
@@ -946,8 +910,7 @@ ENDPROC(common_interrupt)
#define BUILD_INTERRUPT3(name, nr, fn) \
ENTRY(name) \
RING0_INT_FRAME; \
- pushl $~(nr); \
- CFI_ADJUST_CFA_OFFSET 4; \
+ pushl_cfi $~(nr); \
SAVE_ALL; \
TRACE_IRQS_OFF \
movl %esp,%eax; \
@@ -984,8 +947,7 @@ ENDPROC(name)
# so we can simply throw away the new one.
ENTRY(hypervisor_callback)
RING0_INT_FRAME
- pushl %eax
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %eax
SAVE_ALL
movl PT_CS(%esp),%ecx
movl PT_EIP(%esp),%eax
@@ -1005,8 +967,7 @@ ENTRY(hypervisor_callback)
addl $PT_OLDESP,%esp # Remove eflags...ebx from stack frame.
#endif
.Ldo_upcall:
- push %esp
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %esp
call evtchn_do_upcall
add $4,%esp
CFI_ADJUST_CFA_OFFSET -4
@@ -1111,21 +1072,18 @@ ENTRY(failsafe_callback)
ENTRY(coprocessor_error)
RING0_INT_FRAME
- pushl $0
- CFI_ADJUST_CFA_OFFSET 4
- pushl $do_coprocessor_error
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $0
+ pushl_cfi $do_coprocessor_error
jmp error_code
CFI_ENDPROC
END(coprocessor_error)
ENTRY(simd_coprocessor_error)
RING0_INT_FRAME
- pushl $0
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $0
#ifdef CONFIG_X86_INVD_BUG
/* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */
-661: pushl $do_general_protection
+661: pushl_cfi $do_general_protection
662:
.section .altinstructions,"a"
.balign 4
@@ -1140,19 +1098,16 @@ ENTRY(simd_coprocessor_error)
664:
.previous
#else
- pushl $do_simd_coprocessor_error
+ pushl_cfi $do_simd_coprocessor_error
#endif
- CFI_ADJUST_CFA_OFFSET 4
jmp error_code
CFI_ENDPROC
END(simd_coprocessor_error)
ENTRY(device_not_available)
RING0_INT_FRAME
- pushl $-1 # mark this as an int
- CFI_ADJUST_CFA_OFFSET 4
- pushl $do_device_not_available
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $-1 # mark this as an int
+ pushl_cfi $do_device_not_available
jmp error_code
CFI_ENDPROC
END(device_not_available)
@@ -1174,82 +1129,68 @@ END(native_irq_enable_sysexit)
ENTRY(overflow)
RING0_INT_FRAME
- pushl $0
- CFI_ADJUST_CFA_OFFSET 4
- pushl $do_overflow
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $0
+ pushl_cfi $do_overflow
jmp error_code
CFI_ENDPROC
END(overflow)
ENTRY(bounds)
RING0_INT_FRAME
- pushl $0
- CFI_ADJUST_CFA_OFFSET 4
- pushl $do_bounds
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $0
+ pushl_cfi $do_bounds
jmp error_code
CFI_ENDPROC
END(bounds)
ENTRY(invalid_op)
RING0_INT_FRAME
- pushl $0
- CFI_ADJUST_CFA_OFFSET 4
- pushl $do_invalid_op
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $0
+ pushl_cfi $do_invalid_op
jmp error_code
CFI_ENDPROC
END(invalid_op)
ENTRY(coprocessor_segment_overrun)
RING0_INT_FRAME
- pushl $0
- CFI_ADJUST_CFA_OFFSET 4
- pushl $do_coprocessor_segment_overrun
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $0
+ pushl_cfi $do_coprocessor_segment_overrun
jmp error_code
CFI_ENDPROC
END(coprocessor_segment_overrun)
ENTRY(invalid_TSS)
RING0_EC_FRAME
- pushl $do_invalid_TSS
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $do_invalid_TSS
jmp error_code
CFI_ENDPROC
END(invalid_TSS)
ENTRY(segment_not_present)
RING0_EC_FRAME
- pushl $do_segment_not_present
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $do_segment_not_present
jmp error_code
CFI_ENDPROC
END(segment_not_present)
ENTRY(stack_segment)
RING0_EC_FRAME
- pushl $do_stack_segment
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $do_stack_segment
jmp error_code
CFI_ENDPROC
END(stack_segment)
ENTRY(alignment_check)
RING0_EC_FRAME
- pushl $do_alignment_check
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $do_alignment_check
jmp error_code
CFI_ENDPROC
END(alignment_check)
ENTRY(divide_error)
RING0_INT_FRAME
- pushl $0 # no error code
- CFI_ADJUST_CFA_OFFSET 4
- pushl $do_divide_error
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $0 # no error code
+ pushl_cfi $do_divide_error
jmp error_code
CFI_ENDPROC
END(divide_error)
@@ -1257,10 +1198,8 @@ END(divide_error)
#ifdef CONFIG_X86_MCE
ENTRY(machine_check)
RING0_INT_FRAME
- pushl $0
- CFI_ADJUST_CFA_OFFSET 4
- pushl machine_check_vector
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $0
+ pushl_cfi machine_check_vector
jmp error_code
CFI_ENDPROC
END(machine_check)
@@ -1269,18 +1208,15 @@ END(machine_check)
#ifndef CONFIG_XEN
ENTRY(spurious_interrupt_bug)
RING0_INT_FRAME
- pushl $0
- CFI_ADJUST_CFA_OFFSET 4
- pushl $do_spurious_interrupt_bug
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $0
+ pushl_cfi $do_spurious_interrupt_bug
jmp error_code
CFI_ENDPROC
#endif /* !CONFIG_XEN */
ENTRY(fixup_4gb_segment)
RING0_EC_FRAME
- pushl $do_fixup_4gb_segment
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $do_fixup_4gb_segment
jmp error_code
CFI_ENDPROC
END(spurious_interrupt_bug)
@@ -1413,8 +1349,7 @@ ENTRY(ia32pv_cstar_target)
movl %ebp,%ecx
movl $__USER_CS,4(%esp)
movl 12(%esp),%ebp
- pushl %eax # save orig_eax
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %eax # save orig_eax
/*
* Load the potential sixth argument from user stack.
* Careful about security.
@@ -1545,40 +1480,29 @@ mask=0
ENTRY(page_fault)
RING0_EC_FRAME
- pushl $do_page_fault
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $do_page_fault
ALIGN
error_code:
/* the function address is in %gs's slot on the stack */
- pushl %fs
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %fs
/*CFI_REL_OFFSET fs, 0*/
- pushl %es
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %es
/*CFI_REL_OFFSET es, 0*/
- pushl %ds
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ds
/*CFI_REL_OFFSET ds, 0*/
- pushl %eax
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %eax
CFI_REL_OFFSET eax, 0
- pushl %ebp
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ebp
CFI_REL_OFFSET ebp, 0
- pushl %edi
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %edi
CFI_REL_OFFSET edi, 0
- pushl %esi
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %esi
CFI_REL_OFFSET esi, 0
- pushl %edx
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %edx
CFI_REL_OFFSET edx, 0
- pushl %ecx
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ecx
CFI_REL_OFFSET ecx, 0
- pushl %ebx
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ebx
CFI_REL_OFFSET ebx, 0
cld
movl $(__KERNEL_PERCPU), %ecx
@@ -1621,12 +1545,9 @@ END(page_fault)
movl TSS_sysenter_sp0 + \offset(%esp), %esp
CFI_DEF_CFA esp, 0
CFI_UNDEFINED eip
- pushfl
- CFI_ADJUST_CFA_OFFSET 4
- pushl $__KERNEL_CS
- CFI_ADJUST_CFA_OFFSET 4
- pushl $sysenter_past_esp
- CFI_ADJUST_CFA_OFFSET 4
+ pushfl_cfi
+ pushl_cfi $__KERNEL_CS
+ pushl_cfi $sysenter_past_esp
CFI_REL_OFFSET eip, 0
.endm
#endif /* CONFIG_XEN */
@@ -1639,8 +1560,7 @@ ENTRY(debug)
FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn
debug_stack_correct:
#endif /* !CONFIG_XEN */
- pushl $-1 # mark this as an int
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $-1 # mark this as an int
SAVE_ALL
TRACE_IRQS_OFF
xorl %edx,%edx # error code 0
@@ -1660,33 +1580,28 @@ END(debug)
*/
ENTRY(nmi)
RING0_INT_FRAME
- pushl %eax
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %eax
#ifndef CONFIG_XEN
movl %ss, %eax
cmpw $__ESPFIX_SS, %ax
- popl %eax
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %eax
je nmi_espfix_stack
cmpl $ia32_sysenter_target,(%esp)
je nmi_stack_fixup
- pushl %eax
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %eax
movl %esp,%eax
/* Do not access memory above the end of our stack page,
* it might not exist.
*/
andl $(THREAD_SIZE-1),%eax
cmpl $(THREAD_SIZE-20),%eax
- popl %eax
- CFI_ADJUST_CFA_OFFSET -4
+ popl_cfi %eax
jae nmi_stack_correct
cmpl $ia32_sysenter_target,12(%esp)
je nmi_debug_stack_check
nmi_stack_correct:
/* We have a RING0_INT_FRAME here */
- pushl %eax
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %eax
SAVE_ALL
xorl %edx,%edx # zero error code
movl %esp,%eax # pt_regs pointer
@@ -1715,18 +1630,14 @@ nmi_espfix_stack:
*
* create the pointer to lss back
*/
- pushl %ss
- CFI_ADJUST_CFA_OFFSET 4
- pushl %esp
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %ss
+ pushl_cfi %esp
addl $4, (%esp)
/* copy the iret frame of 12 bytes */
.rept 3
- pushl 16(%esp)
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi 16(%esp)
.endr
- pushl %eax
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi %eax
SAVE_ALL
FIXUP_ESPFIX_STACK # %eax == %esp
xorl %edx,%edx # zero error code
@@ -1748,8 +1659,7 @@ END(nmi)
ENTRY(int3)
RING0_INT_FRAME
- pushl $-1 # mark this as an int
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $-1 # mark this as an int
SAVE_ALL
TRACE_IRQS_OFF
xorl %edx,%edx # zero error code
@@ -1761,8 +1671,7 @@ END(int3)
ENTRY(general_protection)
RING0_EC_FRAME
- pushl $do_general_protection
- CFI_ADJUST_CFA_OFFSET 4
+ pushl_cfi $do_general_protection
jmp error_code
CFI_ENDPROC
END(general_protection)
--- head-2011-03-17.orig/arch/x86/kernel/entry_64-xen.S 2011-02-01 15:04:27.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/entry_64-xen.S 2011-02-01 15:09:47.000000000 +0100
@@ -204,23 +204,17 @@ NMI_MASK = 0x80000000
.macro FAKE_STACK_FRAME child_rip
/* push in order ss, rsp, eflags, cs, rip */
xorl %eax, %eax
- pushq $__KERNEL_DS /* ss */
- CFI_ADJUST_CFA_OFFSET 8
+ pushq_cfi $__KERNEL_DS /* ss */
/*CFI_REL_OFFSET ss,0*/
- pushq %rax /* rsp */
- CFI_ADJUST_CFA_OFFSET 8
+ pushq_cfi %rax /* rsp */
CFI_REL_OFFSET rsp,0
- pushq $X86_EFLAGS_IF /* eflags - interrupts on */
- CFI_ADJUST_CFA_OFFSET 8
+ pushq_cfi $X86_EFLAGS_IF /* eflags - interrupts on */
/*CFI_REL_OFFSET rflags,0*/
- pushq $__KERNEL_CS /* cs */
- CFI_ADJUST_CFA_OFFSET 8
+ pushq_cfi $__KERNEL_CS /* cs */
/*CFI_REL_OFFSET cs,0*/
- pushq \child_rip /* rip */
- CFI_ADJUST_CFA_OFFSET 8
+ pushq_cfi \child_rip /* rip */
CFI_REL_OFFSET rip,0
- pushq %rax /* orig rax */
- CFI_ADJUST_CFA_OFFSET 8
+ pushq_cfi %rax /* orig rax */
.endm
.macro UNFAKE_STACK_FRAME
@@ -333,6 +327,7 @@ NMI_MASK = 0x80000000
#ifndef CONFIG_XEN
/* save partial stack frame */
+ .pushsection .kprobes.text, "ax"
ENTRY(save_args)
XCPT_FRAME
cld
@@ -372,6 +367,7 @@ ENTRY(save_args)
ret
CFI_ENDPROC
END(save_args)
+ .popsection
#endif
ENTRY(save_rest)
@@ -433,10 +429,8 @@ ENTRY(ret_from_fork)
LOCK ; btr $TIF_FORK,TI_flags(%r8)
- push kernel_eflags(%rip)
- CFI_ADJUST_CFA_OFFSET 8
- popf # reset kernel eflags
- CFI_ADJUST_CFA_OFFSET -8
+ pushq_cfi kernel_eflags(%rip)
+ popfq_cfi # reset kernel eflags
call schedule_tail # rdi: 'prev' task parameter
@@ -532,11 +526,9 @@ sysret_careful:
jnc sysret_signal
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
- pushq %rdi
- CFI_ADJUST_CFA_OFFSET 8
+ pushq_cfi %rdi
call schedule
- popq %rdi
- CFI_ADJUST_CFA_OFFSET -8
+ popq_cfi %rdi
jmp sysret_check
/* Handle a signal */
@@ -649,11 +641,9 @@ int_careful:
jnc int_very_careful
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
- pushq %rdi
- CFI_ADJUST_CFA_OFFSET 8
+ pushq_cfi %rdi
call schedule
- popq %rdi
- CFI_ADJUST_CFA_OFFSET -8
+ popq_cfi %rdi
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
jmp int_with_check
@@ -667,12 +657,10 @@ int_check_syscall_exit_work:
/* Check for syscall exit trace */
testl $_TIF_WORK_SYSCALL_EXIT,%edx
jz int_signal
- pushq %rdi
- CFI_ADJUST_CFA_OFFSET 8
+ pushq_cfi %rdi
leaq 8(%rsp),%rdi # &ptregs -> arg1
call syscall_trace_leave
- popq %rdi
- CFI_ADJUST_CFA_OFFSET -8
+ popq_cfi %rdi
andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
jmp int_restore_rest
@@ -729,9 +717,8 @@ END(ptregscall_common)
ENTRY(stub_execve)
CFI_STARTPROC
- popq %r11
- CFI_ADJUST_CFA_OFFSET -8
- CFI_REGISTER rip, r11
+ addq $8, %rsp
+ PARTIAL_FRAME 0
SAVE_REST
FIXUP_TOP_OF_STACK %r11
movq %rsp, %rcx
@@ -750,7 +737,7 @@ END(stub_execve)
ENTRY(stub_rt_sigreturn)
CFI_STARTPROC
addq $8, %rsp
- CFI_ADJUST_CFA_OFFSET -8
+ PARTIAL_FRAME 0
SAVE_REST
movq %rsp,%rdi
FIXUP_TOP_OF_STACK %r11
@@ -792,11 +779,9 @@ retint_careful:
jnc retint_signal
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
- pushq %rdi
- CFI_ADJUST_CFA_OFFSET 8
+ pushq_cfi %rdi
call schedule
- popq %rdi
- CFI_ADJUST_CFA_OFFSET -8
+ popq_cfi %rdi
GET_THREAD_INFO(%rcx)
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
@@ -842,8 +827,7 @@ END(retint_check)
.macro apicinterrupt num sym do_sym
ENTRY(\sym)
INTR_FRAME
- pushq $~(\num)
- CFI_ADJUST_CFA_OFFSET 8
+ pushq_cfi $~(\num)
interrupt \do_sym
jmp error_entry
CFI_ENDPROC
@@ -867,22 +851,10 @@ apicinterrupt X86_PLATFORM_IPI_VECTOR \
x86_platform_ipi smp_x86_platform_ipi
#ifdef CONFIG_SMP
-apicinterrupt INVALIDATE_TLB_VECTOR_START+0 \
- invalidate_interrupt0 smp_invalidate_interrupt
-apicinterrupt INVALIDATE_TLB_VECTOR_START+1 \
- invalidate_interrupt1 smp_invalidate_interrupt
-apicinterrupt INVALIDATE_TLB_VECTOR_START+2 \
- invalidate_interrupt2 smp_invalidate_interrupt
-apicinterrupt INVALIDATE_TLB_VECTOR_START+3 \
- invalidate_interrupt3 smp_invalidate_interrupt
-apicinterrupt INVALIDATE_TLB_VECTOR_START+4 \
- invalidate_interrupt4 smp_invalidate_interrupt
-apicinterrupt INVALIDATE_TLB_VECTOR_START+5 \
- invalidate_interrupt5 smp_invalidate_interrupt
-apicinterrupt INVALIDATE_TLB_VECTOR_START+6 \
- invalidate_interrupt6 smp_invalidate_interrupt
-apicinterrupt INVALIDATE_TLB_VECTOR_START+7 \
- invalidate_interrupt7 smp_invalidate_interrupt
+.irpc idx, "01234567"
+apicinterrupt (INVALIDATE_TLB_VECTOR_START)+\idx \
+ invalidate_interrupt\idx smp_invalidate_interrupt
+.endr
#endif
apicinterrupt THRESHOLD_APIC_VECTOR \
@@ -909,9 +881,9 @@ apicinterrupt ERROR_APIC_VECTOR \
apicinterrupt SPURIOUS_APIC_VECTOR \
spurious_interrupt smp_spurious_interrupt
-#ifdef CONFIG_PERF_EVENTS
-apicinterrupt LOCAL_PENDING_VECTOR \
- perf_pending_interrupt smp_perf_pending_interrupt
+#ifdef CONFIG_IRQ_WORK
+apicinterrupt IRQ_WORK_VECTOR \
+ irq_work_interrupt smp_irq_work_interrupt
#endif
#endif /* !CONFIG_XEN */
@@ -926,8 +898,8 @@ ENTRY(\sym)
movq 8(%rsp),%r11
CFI_RESTORE r11
movq $-1,8(%rsp) /* ORIG_RAX: no syscall to restart */
- subq $(15-1)*8,%rsp
- CFI_ADJUST_CFA_OFFSET (15-1)*8
+ subq $ORIG_RAX-R15-1*8,%rsp
+ CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15-1*8
call error_entry
DEFAULT_FRAME 0
movq %rsp,%rdi /* pt_regs pointer */
@@ -953,8 +925,8 @@ ENTRY(\sym)
CFI_RESTORE rcx
movq 8(%rsp),%r11
CFI_RESTORE r11
- subq $(15-2)*8,%rsp
- CFI_ADJUST_CFA_OFFSET (15-2)*8
+ subq $ORIG_RAX-R15-2*8,%rsp
+ CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15-2*8
call error_entry
DEFAULT_FRAME 0
movq %rsp,%rdi /* pt_regs pointer */
@@ -1074,8 +1046,7 @@ ENTRY(failsafe_callback)
CFI_RESTORE r11
addq $0x30,%rsp
CFI_ADJUST_CFA_OFFSET -0x30
- pushq $0
- CFI_ADJUST_CFA_OFFSET 8
+ pushq_cfi $0
SAVE_ALL
jmp error_exit
CFI_ENDPROC
@@ -1143,8 +1114,7 @@ END(kernel_execve)
/* Call softirq on interrupt stack. Interrupts are off. */
ENTRY(call_softirq)
CFI_STARTPROC
- push %rbp
- CFI_ADJUST_CFA_OFFSET 8
+ pushq_cfi %rbp
CFI_REL_OFFSET rbp,0
mov %rsp,%rbp
CFI_DEF_CFA_REGISTER rbp
@@ -1153,6 +1123,7 @@ ENTRY(call_softirq)
push %rbp # backlink for old unwinder
call __do_softirq
leaveq
+ CFI_RESTORE rbp
CFI_DEF_CFA_REGISTER rsp
CFI_ADJUST_CFA_OFFSET -8
decl PER_CPU_VAR(irq_count)
@@ -1191,7 +1162,7 @@ paranoidzeroentry machine_check *machine
/* ebx: no swapgs flag */
ENTRY(paranoid_exit)
- INTR_FRAME
+ DEFAULT_FRAME
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
testl %ebx,%ebx /* swapgs needed? */
@@ -1271,7 +1242,6 @@ error_sti:
#endif
TRACE_IRQS_OFF
ret
- CFI_ENDPROC
#ifndef CONFIG_XEN
/*
@@ -1298,6 +1268,7 @@ bstep_iret:
movq %rcx,RIP+8(%rsp)
jmp error_swapgs
#endif
+ CFI_ENDPROC
END(error_entry)
@@ -1338,11 +1309,9 @@ END(do_nmi_callback)
#ifndef CONFIG_IA32_EMULATION
ENTRY(ignore_sysret)
INTR_FRAME
- popq %rcx
- CFI_ADJUST_CFA_OFFSET -8
+ popq_cfi %rcx
CFI_RESTORE rcx
- popq %r11
- CFI_ADJUST_CFA_OFFSET -8
+ popq_cfi %r11
CFI_RESTORE r11
mov $-ENOSYS,%eax
HYPERVISOR_IRET 0
--- head-2011-03-17.orig/arch/x86/kernel/head-xen.c 2011-02-01 14:55:46.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/head-xen.c 2011-02-01 15:09:47.000000000 +0100
@@ -1,5 +1,6 @@
#include <linux/kernel.h>
#include <linux/init.h>
+#include <linux/memblock.h>
#include <linux/pci.h>
#include <asm/setup.h>
@@ -53,7 +54,7 @@ void __init reserve_ebda_region(void)
lowmem = 0x9f000;
/* reserve all memory between lowmem and the 1MB mark */
- reserve_early_overlap_ok(lowmem, 0x100000, "BIOS reserved");
+ memblock_x86_reserve_range(lowmem, 0x100000, "* BIOS reserved");
}
#else /* CONFIG_XEN */
#include <linux/module.h>
@@ -103,10 +104,12 @@ void __init xen_start_kernel(void)
WARN_ON(HYPERVISOR_vm_assist(VMASST_CMD_enable,
VMASST_TYPE_writable_pagetables));
- reserve_early(ALIGN(__pa_symbol(&_end), PAGE_SIZE),
- __pa(xen_start_info->pt_base)
- + (xen_start_info->nr_pt_frames << PAGE_SHIFT),
- "Xen provided");
+ memblock_init();
+ memblock_x86_reserve_range(ALIGN(__pa_symbol(&_end), PAGE_SIZE),
+ __pa(xen_start_info->pt_base)
+ + (xen_start_info->nr_pt_frames
+ << PAGE_SHIFT),
+ "Xen provided");
#ifdef CONFIG_X86_32
{
--- head-2011-03-17.orig/arch/x86/kernel/head32-xen.c 2011-02-01 15:03:03.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/head32-xen.c 2011-02-01 15:09:47.000000000 +0100
@@ -8,6 +8,7 @@
#include <linux/init.h>
#include <linux/start_kernel.h>
#include <linux/mm.h>
+#include <linux/memblock.h>
#include <asm/setup.h>
#include <asm/sections.h>
@@ -17,6 +18,7 @@
#include <asm/apic.h>
#include <asm/io_apic.h>
#include <asm/bios_ebda.h>
+#include <asm/tlbflush.h>
static void __init i386_default_early_setup(void)
{
@@ -49,17 +51,18 @@ void __init i386_start_kernel(void)
BUG_ON(pte_index(hypervisor_virt_start));
#endif
+ memblock_init();
+
#ifdef CONFIG_X86_TRAMPOLINE
/*
* But first pinch a few for the stack/trampoline stuff
* FIXME: Don't need the extra page at 4K, but need to fix
* trampoline before removing it. (see the GDT stuff)
*/
- reserve_early_overlap_ok(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE,
- "EX TRAMPOLINE");
+ memblock_x86_reserve_range(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE");
#endif
- reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
+ memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
#ifndef CONFIG_XEN
#ifdef CONFIG_BLK_DEV_INITRD
@@ -69,7 +72,7 @@ void __init i386_start_kernel(void)
u64 ramdisk_image = boot_params.hdr.ramdisk_image;
u64 ramdisk_size = boot_params.hdr.ramdisk_size;
u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size);
- reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
+ memblock_x86_reserve_range(ramdisk_image, ramdisk_end, "RAMDISK");
}
#endif
--- head-2011-03-17.orig/arch/x86/kernel/head64-xen.c 2011-02-01 14:55:46.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/head64-xen.c 2011-02-01 15:09:47.000000000 +0100
@@ -15,6 +15,7 @@
#include <linux/percpu.h>
#include <linux/start_kernel.h>
#include <linux/io.h>
+#include <linux/memblock.h>
#include <asm/processor.h>
#include <asm/proto.h>
@@ -119,7 +120,9 @@ void __init x86_64_start_reservations(ch
{
copy_bootdata(__va(real_mode_data));
- reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
+ memblock_init();
+
+ memblock_x86_reserve_range(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
/*
* At this point everything still needed from the boot loader
--- head-2011-03-17.orig/arch/x86/kernel/irq-xen.c 2011-02-01 14:55:46.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/irq-xen.c 2011-02-01 15:09:47.000000000 +0100
@@ -71,10 +71,10 @@ static int show_other_interrupts(struct
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
seq_printf(p, " Performance monitoring interrupts\n");
- seq_printf(p, "%*s: ", prec, "PND");
+ seq_printf(p, "%*s: ", prec, "IWI");
for_each_online_cpu(j)
- seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs);
- seq_printf(p, " Performance pending work\n");
+ seq_printf(p, "%10u ", irq_stats(j)->apic_irq_work_irqs);
+ seq_printf(p, " IRQ work interrupts\n");
#endif
#ifndef CONFIG_XEN
if (x86_platform_ipi_callback) {
@@ -172,7 +172,7 @@ int show_interrupts(struct seq_file *p,
seq_printf(p, "%*d: ", prec, i);
for_each_online_cpu(j)
seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
- seq_printf(p, " %8s", desc->chip->name);
+ seq_printf(p, " %8s", desc->irq_data.chip->name);
seq_printf(p, "-%-8s", desc->name);
if (action) {
@@ -198,7 +198,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
sum += irq_stats(cpu)->apic_timer_irqs;
sum += irq_stats(cpu)->irq_spurious_count;
sum += irq_stats(cpu)->apic_perf_irqs;
- sum += irq_stats(cpu)->apic_pending_irqs;
+ sum += irq_stats(cpu)->apic_irq_work_irqs;
#endif
#ifndef CONFIG_XEN
if (x86_platform_ipi_callback)
@@ -302,6 +302,7 @@ void fixup_irqs(void)
unsigned int irq;
static int warned;
struct irq_desc *desc;
+ struct irq_data *data;
static DECLARE_BITMAP(irqs_used, NR_IRQS);
for_each_irq_desc(irq, desc) {
@@ -317,7 +318,8 @@ void fixup_irqs(void)
/* interrupt's are disabled at this point */
raw_spin_lock(&desc->lock);
- affinity = desc->affinity;
+ data = &desc->irq_data;
+ affinity = data->affinity;
if (!irq_has_action(irq) ||
cpumask_subset(affinity, cpu_online_mask)) {
raw_spin_unlock(&desc->lock);
@@ -332,16 +334,16 @@ void fixup_irqs(void)
affinity = cpu_all_mask;
}
- if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->mask)
- desc->chip->mask(irq);
+ if (!(desc->status & IRQ_MOVE_PCNTXT) && data->chip->irq_mask)
+ data->chip->irq_mask(data);
- if (desc->chip->set_affinity)
- desc->chip->set_affinity(irq, affinity);
- else if (desc->chip != &no_irq_chip && !(warned++))
+ if (data->chip->irq_set_affinity)
+ data->chip->irq_set_affinity(data, affinity, true);
+ else if (data->chip != &no_irq_chip && !(warned++))
set_affinity = 0;
- if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->unmask)
- desc->chip->unmask(irq);
+ if (!(desc->status & IRQ_MOVE_PCNTXT) && data->chip->irq_unmask)
+ data->chip->irq_unmask(data);
raw_spin_unlock(&desc->lock);
@@ -367,9 +369,10 @@ void fixup_irqs(void)
continue;
if (xen_test_irq_pending(irq)) {
+ data = irq_get_irq_data(irq);
raw_spin_lock(&desc->lock);
- if (desc->chip->retrigger)
- desc->chip->retrigger(irq);
+ if (data->chip->irq_retrigger)
+ data->chip->irq_retrigger(data);
raw_spin_unlock(&desc->lock);
}
}
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ head-2011-03-17/arch/x86/kernel/irq_work-xen.c 2011-02-03 11:19:35.000000000 +0100
@@ -0,0 +1,23 @@
+/*
+ * x86/Xen specific code for irq_work
+ */
+
+#include <linux/kernel.h>
+#include <linux/irq_work.h>
+#include <linux/hardirq.h>
+#include <asm/ipi.h>
+
+#ifdef CONFIG_SMP
+irqreturn_t smp_irq_work_interrupt(int irq, void *dev_id)
+{
+ inc_irq_stat(apic_irq_work_irqs);
+ irq_work_run();
+
+ return IRQ_HANDLED;
+}
+
+void arch_irq_work_raise(void)
+{
+ xen_send_IPI_self(IRQ_WORK_VECTOR);
+}
+#endif
--- head-2011-03-17.orig/arch/x86/kernel/microcode_core-xen.c 2011-02-01 15:03:10.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/microcode_core-xen.c 2011-02-01 15:09:47.000000000 +0100
@@ -12,7 +12,7 @@
* Software Developer's Manual
* Order Number 253668 or free download from:
*
- * http://developer.intel.com/design/pentium4/manuals/253668.htm
+ * http://developer.intel.com/Assets/PDF/manual/253668.pdf
*
* For more information, go to http://www.urbanmyth.org/microcode
*
@@ -117,6 +117,7 @@ static const struct file_operations micr
.owner = THIS_MODULE,
.write = microcode_write,
.open = microcode_open,
+ .llseek = no_llseek,
};
static struct miscdevice microcode_dev = {
--- head-2011-03-17.orig/arch/x86/kernel/mpparse-xen.c 2011-02-01 15:04:27.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/mpparse-xen.c 2011-02-01 15:09:47.000000000 +0100
@@ -11,6 +11,7 @@
#include <linux/init.h>
#include <linux/delay.h>
#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/kernel_stat.h>
#include <linux/mc146818rtc.h>
#include <linux/bitops.h>
@@ -686,7 +687,7 @@ static void __init smp_reserve_memory(st
{
unsigned long size = get_mpc_size(mpf->physptr);
- reserve_early_overlap_ok(mpf->physptr, mpf->physptr+size, "MP-table mpc");
+ memblock_x86_reserve_range(mpf->physptr, mpf->physptr+size, "* MP-table mpc");
}
#endif
@@ -719,7 +720,7 @@ static int __init smp_scan_config(unsign
mpf, (u64)virt_to_phys(mpf));
mem = virt_to_phys(mpf);
- reserve_early_overlap_ok(mem, mem + sizeof(*mpf), "MP-table mpf");
+ memblock_x86_reserve_range(mem, mem + sizeof(*mpf), "* MP-table mpf");
if (mpf->physptr)
smp_reserve_memory(mpf);
#else
--- head-2011-03-17.orig/arch/x86/kernel/pci-dma-xen.c 2011-02-01 15:04:27.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/pci-dma-xen.c 2011-02-01 15:09:47.000000000 +0100
@@ -11,8 +11,8 @@
#include <asm/iommu.h>
#include <asm/gart.h>
#include <asm/calgary.h>
-#include <asm/amd_iommu.h>
#include <asm/x86_init.h>
+#include <asm/iommu_table.h>
static int forbid_dac __read_mostly;
@@ -44,6 +44,8 @@ int iommu_detected __read_mostly = 0;
*/
int iommu_pass_through __read_mostly;
+extern struct iommu_table_entry __iommu_table[], __iommu_table_end[];
+
/* Dummy device used for NULL arguments (normally ISA). */
struct device x86_dma_fallback_dev = {
.init_name = "fallback device",
@@ -142,7 +144,10 @@ static struct dma_map_ops swiotlb_dma_op
.dma_supported = swiotlb_dma_supported
};
-#define pci_xen_swiotlb_detect() 1
+static int __init pci_xen_swiotlb_detect(void)
+{
+ return 1;
+}
static void __init pci_xen_swiotlb_init(void)
{
@@ -153,26 +158,28 @@ static void __init pci_xen_swiotlb_init(
}
}
+IOMMU_INIT_FINISH(pci_xen_swiotlb_detect, NULL, pci_xen_swiotlb_init, NULL);
+
void __init pci_iommu_alloc(void)
{
+ struct iommu_table_entry *p;
+
/* free the range so iommu could get some range less than 4G */
dma32_free_bootmem();
- if (pci_xen_swiotlb_detect() || pci_swiotlb_detect())
- goto out;
-
- gart_iommu_hole_init();
-
- detect_calgary();
+ sort_iommu_table(__iommu_table, __iommu_table_end);
+ check_iommu_entries(__iommu_table, __iommu_table_end);
- detect_intel_iommu();
-
- /* needs to be called after gart_iommu_hole_init */
- amd_iommu_detect();
-out:
- pci_xen_swiotlb_init();
+ for (p = __iommu_table; p < __iommu_table_end; p++) {
+ if (p && p->detect && p->detect() > 0) {
+ p->flags |= IOMMU_DETECTED;
+ if (p->early_init)
+ p->early_init();
+ if (p->flags & IOMMU_FINISH_IF_DETECTED)
+ break;
+ }
+ }
}
-
void *dma_generic_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_addr, gfp_t flag)
{
@@ -375,6 +382,7 @@ EXPORT_SYMBOL(dma_supported);
static int __init pci_iommu_init(void)
{
+ struct iommu_table_entry *p;
dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
#ifdef CONFIG_PCI
@@ -382,14 +390,10 @@ static int __init pci_iommu_init(void)
#endif
x86_init.iommu.iommu_init();
-#ifndef CONFIG_XEN
- if (swiotlb || xen_swiotlb) {
- printk(KERN_INFO "PCI-DMA: "
- "Using software bounce buffering for IO (SWIOTLB)\n");
- swiotlb_print_info();
- } else
- swiotlb_free();
-#endif
+ for (p = __iommu_table; p < __iommu_table_end; p++) {
+ if (p && (p->flags & IOMMU_DETECTED) && p->late_init)
+ p->late_init();
+ }
return 0;
}
--- head-2011-03-17.orig/arch/x86/kernel/setup-xen.c 2011-03-03 16:25:01.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/setup-xen.c 2011-03-03 16:25:11.000000000 +0100
@@ -31,6 +31,7 @@
#include <linux/apm_bios.h>
#include <linux/initrd.h>
#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/seq_file.h>
#include <linux/console.h>
#include <linux/mca.h>
@@ -83,7 +84,6 @@
#include <asm/dmi.h>
#include <asm/io_apic.h>
#include <asm/ist.h>
-#include <asm/vmi.h>
#include <asm/setup_arch.h>
#include <asm/bios_ebda.h>
#include <asm/cacheflush.h>
@@ -107,11 +107,12 @@
#include <asm/percpu.h>
#include <asm/topology.h>
#include <asm/apicdef.h>
-#include <asm/k8.h>
+#include <asm/amd_nb.h>
#ifdef CONFIG_X86_64
#include <asm/numa_64.h>
#endif
#include <asm/mce.h>
+#include <asm/alternative.h>
#ifdef CONFIG_XEN
#include <asm/hypervisor.h>
@@ -155,7 +156,6 @@ unsigned long max_pfn_mapped;
RESERVE_BRK(dmi_alloc, 65536);
#endif
-unsigned int boot_cpu_id __read_mostly;
static __initdata unsigned long _brk_start = (unsigned long)__brk_base;
unsigned long _brk_end = (unsigned long)__brk_base;
@@ -337,7 +337,7 @@ static inline void init_gbpages(void)
static void __init reserve_brk(void)
{
if (_brk_end > _brk_start)
- reserve_early(__pa(_brk_start), __pa(_brk_end), "BRK");
+ memblock_x86_reserve_range(__pa(_brk_start), __pa(_brk_end), "BRK");
/* Mark brk area as locked down and no longer taking any
new allocations */
@@ -360,17 +360,16 @@ static void __init relocate_initrd(void)
char *p, *q;
/* We need to move the initrd down into lowmem */
- ramdisk_here = find_e820_area(0, end_of_lowmem, area_size,
+ ramdisk_here = memblock_find_in_range(0, end_of_lowmem, area_size,
PAGE_SIZE);
- if (ramdisk_here == -1ULL)
+ if (ramdisk_here == MEMBLOCK_ERROR)
panic("Cannot find place for new RAMDISK of size %lld\n",
ramdisk_size);
/* Note: this includes all the lowmem currently occupied by
the initrd, we rely on that fact to keep the data intact. */
- reserve_early(ramdisk_here, ramdisk_here + area_size,
- "NEW RAMDISK");
+ memblock_x86_reserve_range(ramdisk_here, ramdisk_here + area_size, "NEW RAMDISK");
initrd_start = ramdisk_here + PAGE_OFFSET;
initrd_end = initrd_start + ramdisk_size;
printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n",
@@ -443,7 +442,7 @@ static void __init reserve_initrd(void)
initrd_start = 0;
if (ramdisk_size >= (end_of_lowmem>>1)) {
- free_early(ramdisk_image, ramdisk_end);
+ memblock_x86_free_range(ramdisk_image, ramdisk_end);
printk(KERN_ERR "initrd too large to handle, "
"disabling initrd\n");
return;
@@ -469,7 +468,7 @@ static void __init reserve_initrd(void)
relocate_initrd();
- free_early(ramdisk_image, ramdisk_end);
+ memblock_x86_free_range(ramdisk_image, ramdisk_end);
}
#else
static void __init reserve_initrd(void)
@@ -529,7 +528,7 @@ static void __init e820_reserve_setup_da
#endif
}
-static void __init reserve_early_setup_data(void)
+static void __init memblock_x86_reserve_range_setup_data(void)
{
#ifndef CONFIG_XEN
struct setup_data *data;
@@ -542,7 +541,7 @@ static void __init reserve_early_setup_d
while (pa_data) {
data = early_memremap(pa_data, sizeof(*data));
sprintf(buf, "setup data %x", data->type);
- reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf);
+ memblock_x86_reserve_range(pa_data, pa_data+sizeof(*data)+data->len, buf);
pa_data = data->next;
early_iounmap(data, sizeof(*data));
}
@@ -565,6 +564,18 @@ static inline unsigned long long get_tot
return total << PAGE_SHIFT;
}
+/*
+ * Keep the crash kernel below this limit. On 32 bits earlier kernels
+ * would limit the kernel to the low 512 MiB due to mapping restrictions.
+ * On 64 bits, kexec-tools currently limits us to 896 MiB; increase this
+ * limit once kexec-tools are fixed.
+ */
+#ifdef CONFIG_X86_32
+# define CRASH_KERNEL_ADDR_MAX (512 << 20)
+#else
+# define CRASH_KERNEL_ADDR_MAX (896 << 20)
+#endif
+
static void __init reserve_crashkernel(void)
{
unsigned long long total_mem;
@@ -582,23 +593,27 @@ static void __init reserve_crashkernel(v
if (crash_base <= 0) {
const unsigned long long alignment = 16<<20; /* 16M */
- crash_base = find_e820_area(alignment, ULONG_MAX, crash_size,
- alignment);
- if (crash_base == -1ULL) {
+ /*
+ * kexec want bzImage is below CRASH_KERNEL_ADDR_MAX
+ */
+ crash_base = memblock_find_in_range(alignment,
+ CRASH_KERNEL_ADDR_MAX, crash_size, alignment);
+
+ if (crash_base == MEMBLOCK_ERROR) {
pr_info("crashkernel reservation failed - No suitable area found.\n");
return;
}
} else {
unsigned long long start;
- start = find_e820_area(crash_base, ULONG_MAX, crash_size,
- 1<<20);
+ start = memblock_find_in_range(crash_base,
+ crash_base + crash_size, crash_size, 1<<20);
if (start != crash_base) {
pr_info("crashkernel reservation failed - memory is in use.\n");
return;
}
}
- reserve_early(crash_base, crash_base + crash_size, "CRASH KERNEL");
+ memblock_x86_reserve_range(crash_base, crash_base + crash_size, "CRASH KERNEL");
printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
"for crashkernel (System RAM: %ldMB)\n",
@@ -684,93 +699,27 @@ static __init void reserve_ibft_region(v
#ifndef CONFIG_XEN
if (size)
- reserve_early_overlap_ok(addr, addr + size, "ibft");
+ memblock_x86_reserve_range(addr, addr + size, "* ibft");
#endif
}
-#ifdef CONFIG_X86_RESERVE_LOW_64K
-static int __init dmi_low_memory_corruption(const struct dmi_system_id *d)
-{
- printk(KERN_NOTICE
- "%s detected: BIOS may corrupt low RAM, working around it.\n",
- d->ident);
-
- e820_update_range(0, 0x10000, E820_RAM, E820_RESERVED);
- sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
-
- return 0;
-}
-#endif
-
-/* List of systems that have known low memory corruption BIOS problems */
-static struct dmi_system_id __initdata bad_bios_dmi_table[] = {
-#ifdef CONFIG_X86_RESERVE_LOW_64K
- {
- .callback = dmi_low_memory_corruption,
- .ident = "AMI BIOS",
- .matches = {
- DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."),
- },
- },
- {
- .callback = dmi_low_memory_corruption,
- .ident = "Phoenix BIOS",
- .matches = {
- DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies"),
- },
- },
- {
- .callback = dmi_low_memory_corruption,
- .ident = "Phoenix/MSC BIOS",
- .matches = {
- DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix/MSC"),
- },
- },
- /*
- * AMI BIOS with low memory corruption was found on Intel DG45ID and
- * DG45FC boards.
- * It has a different DMI_BIOS_VENDOR = "Intel Corp.", for now we will
- * match only DMI_BOARD_NAME and see if there is more bad products
- * with this vendor.
- */
- {
- .callback = dmi_low_memory_corruption,
- .ident = "AMI BIOS",
- .matches = {
- DMI_MATCH(DMI_BOARD_NAME, "DG45ID"),
- },
- },
- {
- .callback = dmi_low_memory_corruption,
- .ident = "AMI BIOS",
- .matches = {
- DMI_MATCH(DMI_BOARD_NAME, "DG45FC"),
- },
- },
- /*
- * The Dell Inspiron Mini 1012 has DMI_BIOS_VENDOR = "Dell Inc.", so
- * match on the product name.
- */
- {
- .callback = dmi_low_memory_corruption,
- .ident = "Phoenix BIOS",
- .matches = {
- DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 1012"),
- },
- },
-#endif
- {}
-};
-
#ifndef CONFIG_XEN
+static unsigned reserve_low = CONFIG_X86_RESERVE_LOW << 10;
+
static void __init trim_bios_range(void)
{
/*
* A special case is the first 4Kb of memory;
* This is a BIOS owned area, not kernel ram, but generally
* not listed as such in the E820 table.
+ *
+ * This typically reserves additional memory (64KiB by default)
+ * since some BIOSes are known to corrupt low memory. See the
+ * Kconfig help text for X86_RESERVE_LOW.
*/
- e820_update_range(0, PAGE_SIZE, E820_RAM, E820_RESERVED);
+ e820_update_range(0, ALIGN(reserve_low, PAGE_SIZE),
+ E820_RAM, E820_RESERVED);
+
/*
* special case: Some BIOSen report the PC BIOS
* area (640->1Mb) as ram even though it is not.
@@ -779,8 +728,39 @@ static void __init trim_bios_range(void)
e820_remove_range(BIOS_BEGIN, BIOS_END - BIOS_BEGIN, E820_RAM, 1);
sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
}
+
+static int __init parse_reservelow(char *p)
+{
+ unsigned long long size;
+
+ if (!p)
+ return -EINVAL;
+
+ size = memparse(p, &p);
+
+ if (size < 4096)
+ size = 4096;
+
+ if (size > 640*1024)
+ size = 640*1024;
+
+ reserve_low = size;
+
+ return 0;
+}
+
+early_param("reservelow", parse_reservelow);
#endif
+static u64 __init get_max_mapped(void)
+{
+ u64 end = max_pfn_mapped;
+
+ end <<= PAGE_SHIFT;
+
+ return end;
+}
+
/*
* Determine if we were loaded by an EFI loader. If so, then we have also been
* passed the efi memmap, systab, etc., so we should use these data structures
@@ -798,6 +778,7 @@ void __init setup_arch(char **cmdline_p)
{
int acpi = 0;
int k8 = 0;
+ unsigned long flags;
#ifdef CONFIG_XEN
unsigned int i;
unsigned long p2m_pages;
@@ -820,14 +801,27 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_X86_32
memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
visws_early_detect();
+
+#ifndef CONFIG_XEN
+ /*
+ * copy kernel address range established so far and switch
+ * to the proper swapper page table
+ */
+ clone_pgd_range(swapper_pg_dir + KERNEL_PGD_BOUNDARY,
+ initial_page_table + KERNEL_PGD_BOUNDARY,
+ KERNEL_PGD_PTRS);
+
+ load_cr3(swapper_pg_dir);
+ __flush_tlb_all();
+#endif
#else
printk(KERN_INFO "Command line: %s\n", boot_command_line);
#endif
- /* VMI may relocate the fixmap; do this before touching ioremap area */
- vmi_init();
-
- /* OFW also may relocate the fixmap */
+ /*
+ * If we have OLPC OFW, we might end up relocating the fixmap due to
+ * reserve_top(), so do this before touching the ioremap area.
+ */
olpc_ofw_detect();
early_trap_init();
@@ -873,7 +867,7 @@ void __init setup_arch(char **cmdline_p)
#endif
4)) {
efi_enabled = 1;
- efi_reserve_early();
+ efi_memblock_x86_reserve_range();
}
#endif
#else /* CONFIG_XEN */
@@ -901,6 +895,7 @@ void __init setup_arch(char **cmdline_p)
x86_init.oem.arch_setup();
+ iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1;
setup_memory_map();
parse_setup_data();
/* update the e820_saved too */
@@ -953,11 +948,8 @@ void __init setup_arch(char **cmdline_p)
x86_report_nx();
- /* Must be before kernel pagetables are setup */
- vmi_activate();
-
/* after early param, so could get panic from serial */
- reserve_early_setup_data();
+ memblock_x86_reserve_range_setup_data();
if (acpi_mps_check()) {
#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_XEN)
@@ -976,12 +968,9 @@ void __init setup_arch(char **cmdline_p)
if (efi_enabled)
efi_init();
- if (is_initial_xendomain()) {
+ if (is_initial_xendomain())
dmi_scan_machine();
- dmi_check_system(bad_bios_dmi_table);
- }
-
/*
* VMware detection requires dmi to be available, so this
* needs to be done after dmi_scan_machine, for the BP.
@@ -1016,8 +1005,6 @@ void __init setup_arch(char **cmdline_p)
*/
max_pfn = e820_end_of_ram_pfn();
- /* preallocate 4k for mptable mpc */
- early_reserve_e820_mpc_new();
/* update e820 for memory not covered by WB MTRRs */
mtrr_bp_init();
#ifndef CONFIG_XEN
@@ -1044,20 +1031,8 @@ void __init setup_arch(char **cmdline_p)
max_low_pfn = max_pfn;
high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
-#ifndef CONFIG_XEN
- max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT;
-#endif
#endif
-#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
- setup_bios_corruption_check();
-#endif
-
- printk(KERN_DEBUG "initial memory mapped : 0 - %08lx\n",
- max_pfn_mapped<<PAGE_SHIFT);
-
- reserve_brk();
-
/*
* Find and reserve possible boot-time SMP configuration:
*/
@@ -1065,6 +1040,26 @@ void __init setup_arch(char **cmdline_p)
reserve_ibft_region();
+ /*
+ * Need to conclude brk, before memblock_x86_fill()
+ * it could use memblock_find_in_range, could overlap with
+ * brk area.
+ */
+ reserve_brk();
+
+ memblock.current_limit = get_max_mapped();
+ memblock_x86_fill();
+
+ /* preallocate 4k for mptable mpc */
+ early_reserve_e820_mpc_new();
+
+#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
+ setup_bios_corruption_check();
+#endif
+
+ printk(KERN_DEBUG "initial memory mapped : 0 - %08lx\n",
+ max_pfn_mapped<<PAGE_SHIFT);
+
reserve_trampoline_memory();
#ifdef CONFIG_ACPI_SLEEP
@@ -1088,6 +1083,7 @@ void __init setup_arch(char **cmdline_p)
max_low_pfn = max_pfn;
}
#endif
+ memblock.current_limit = get_max_mapped();
/*
* NOTE: On x86-32, only from this point on, fixmaps are ready for use.
@@ -1135,10 +1131,7 @@ void __init setup_arch(char **cmdline_p)
#endif
initmem_init(0, max_pfn, acpi, k8);
-#ifndef CONFIG_NO_BOOTMEM
- early_res_to_bootmem(0, max_low_pfn<<PAGE_SHIFT);
-#endif
-
+ memblock_find_dma_reserve();
dma32_reserve_bootmem();
#ifdef CONFIG_KVM_CLOCK
@@ -1149,7 +1142,12 @@ void __init setup_arch(char **cmdline_p)
paging_init();
x86_init.paging.pagetable_setup_done(swapper_pg_dir);
- setup_trampoline_page_table();
+#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
+ /* sync back kernel address range */
+ clone_pgd_range(initial_page_table + KERNEL_PGD_BOUNDARY,
+ swapper_pg_dir + KERNEL_PGD_BOUNDARY,
+ KERNEL_PGD_PTRS);
+#endif
tboot_probe();
@@ -1305,6 +1303,10 @@ void __init setup_arch(char **cmdline_p)
x86_init.oem.banner();
mcheck_init();
+
+ local_irq_save(flags);
+ arch_init_ideal_nop5();
+ local_irq_restore(flags);
}
#ifdef CONFIG_X86_32
--- head-2011-03-17.orig/arch/x86/kernel/smp-xen.c 2011-02-01 15:03:03.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/smp-xen.c 2011-02-01 15:09:47.000000000 +0100
@@ -143,10 +143,10 @@ irqreturn_t smp_reboot_interrupt(int irq
return IRQ_HANDLED;
}
-void xen_smp_send_stop(void)
+void xen_stop_other_cpus(int wait)
{
unsigned long flags;
- unsigned long wait;
+ unsigned long timeout;
/*
* Use an own vector here because smp_call_function
@@ -160,9 +160,12 @@ void xen_smp_send_stop(void)
if (num_online_cpus() > 1) {
xen_send_IPI_allbutself(REBOOT_VECTOR);
- /* Don't wait longer than a second */
- wait = USEC_PER_SEC;
- while (num_online_cpus() > 1 && wait--)
+ /*
+ * Don't wait longer than a second if the caller
+ * didn't ask us to wait.
+ */
+ timeout = USEC_PER_SEC;
+ while (num_online_cpus() > 1 && (wait || timeout--))
udelay(1);
}
--- head-2011-03-17.orig/arch/x86/kernel/traps-xen.c 2011-02-01 15:04:27.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/traps-xen.c 2011-02-01 15:09:47.000000000 +0100
@@ -568,6 +568,7 @@ dotraplinkage void __kprobes do_debug(st
if (regs->flags & X86_VM_MASK) {
handle_vm86_trap((struct kernel_vm86_regs *) regs,
error_code, 1);
+ preempt_conditional_cli(regs);
return;
}
@@ -770,21 +771,10 @@ asmlinkage void math_state_restore(void)
}
EXPORT_SYMBOL_GPL(math_state_restore);
-#ifndef CONFIG_MATH_EMULATION
-void math_emulate(struct math_emu_info *info)
-{
- printk(KERN_EMERG
- "math-emulation not enabled and no coprocessor found.\n");
- printk(KERN_EMERG "killing %s.\n", current->comm);
- force_sig(SIGFPE, current);
- schedule();
-}
-#endif /* CONFIG_MATH_EMULATION */
-
dotraplinkage void __kprobes
do_device_not_available(struct pt_regs *regs, long error_code)
{
-#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
+#ifdef CONFIG_MATH_EMULATION
if (read_cr0() & X86_CR0_EM) {
struct math_emu_info info = { };
@@ -792,12 +782,12 @@ do_device_not_available(struct pt_regs *
info.regs = regs;
math_emulate(&info);
- } else {
- math_state_restore(); /* interrupts still off */
- conditional_sti(regs);
+ return;
}
-#else
- math_state_restore();
+#endif
+ math_state_restore(); /* interrupts still off */
+#ifdef CONFIG_X86_32
+ conditional_sti(regs);
#endif
}
@@ -880,20 +870,6 @@ void __init trap_init(void)
if (ret)
printk("HYPERVISOR_set_trap_table failed (%d)\n", ret);
-#ifdef CONFIG_X86_32
- if (cpu_has_fxsr) {
- printk(KERN_INFO "Enabling fast FPU save and restore... ");
- set_in_cr4(X86_CR4_OSFXSR);
- printk("done.\n");
- }
- if (cpu_has_xmm) {
- printk(KERN_INFO
- "Enabling unmasked SIMD FPU exception support... ");
- set_in_cr4(X86_CR4_OSXMMEXCPT);
- printk("done.\n");
- }
-
-#endif
/*
* Should be a barrier for any external CPU state:
*/
--- head-2011-03-17.orig/arch/x86/mm/fault-xen.c 2011-02-01 15:04:27.000000000 +0100
+++ head-2011-03-17/arch/x86/mm/fault-xen.c 2011-02-01 15:09:47.000000000 +0100
@@ -11,6 +11,7 @@
#include <linux/kprobes.h> /* __kprobes, ... */
#include <linux/mmiotrace.h> /* kmmio_handler, ... */
#include <linux/perf_event.h> /* perf_sw_event */
+#include <linux/hugetlb.h> /* hstate_index_to_shift */
#include <asm/traps.h> /* dotraplinkage, ... */
#include <asm/pgalloc.h> /* pgd_*(), ... */
@@ -160,15 +161,20 @@ is_prefetch(struct pt_regs *regs, unsign
static void
force_sig_info_fault(int si_signo, int si_code, unsigned long address,
- struct task_struct *tsk)
+ struct task_struct *tsk, int fault)
{
+ unsigned lsb = 0;
siginfo_t info;
info.si_signo = si_signo;
info.si_errno = 0;
info.si_code = si_code;
info.si_addr = (void __user *)address;
- info.si_addr_lsb = si_code == BUS_MCEERR_AR ? PAGE_SHIFT : 0;
+ if (fault & VM_FAULT_HWPOISON_LARGE)
+ lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault));
+ if (fault & VM_FAULT_HWPOISON)
+ lsb = PAGE_SHIFT;
+ info.si_addr_lsb = lsb;
force_sig_info(si_signo, &info, tsk);
}
@@ -176,9 +182,6 @@ force_sig_info_fault(int si_signo, int s
DEFINE_SPINLOCK(pgd_lock);
LIST_HEAD(pgd_list);
-#define pgd_page_table(what, pg) \
- spin_##what(&((struct mm_struct *)(pg)->private)->page_table_lock)
-
#ifdef CONFIG_X86_32
static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
{
@@ -240,13 +243,16 @@ void vmalloc_sync_all(void)
spin_lock_irqsave(&pgd_lock, flags);
list_for_each_entry(page, &pgd_list, lru) {
- pmd_t *pmd;
+ spinlock_t *pgt_lock;
+ pmd_t *ret;
+
+ pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
- pgd_page_table(lock, page);
- pmd = vmalloc_sync_one(page_address(page), address);
- pgd_page_table(unlock, page);
+ spin_lock(pgt_lock);
+ ret = vmalloc_sync_one(page_address(page), address);
+ spin_unlock(pgt_lock);
- if (!pmd)
+ if (!ret)
break;
}
spin_unlock_irqrestore(&pgd_lock, flags);
@@ -268,6 +274,8 @@ static noinline __kprobes int vmalloc_fa
if (!(address >= VMALLOC_START && address < VMALLOC_END))
return -1;
+ WARN_ON_ONCE(in_nmi());
+
/*
* Synchronize this task's top level page-table
* with the 'reference' page table.
@@ -343,31 +351,7 @@ out:
void vmalloc_sync_all(void)
{
- unsigned long address;
-
- for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END;
- address += PGDIR_SIZE) {
-
- const pgd_t *pgd_ref = pgd_offset_k(address);
- unsigned long flags;
- struct page *page;
-
- if (pgd_none(*pgd_ref))
- continue;
-
- spin_lock_irqsave(&pgd_lock, flags);
- list_for_each_entry(page, &pgd_list, lru) {
- pgd_t *pgd;
- pgd = (pgd_t *)page_address(page) + pgd_index(address);
- pgd_page_table(lock, page);
- if (pgd_none(*pgd))
- set_pgd(pgd, *pgd_ref);
- else
- BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
- pgd_page_table(unlock, page);
- }
- spin_unlock_irqrestore(&pgd_lock, flags);
- }
+ sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END);
}
/*
@@ -388,6 +372,8 @@ static noinline __kprobes int vmalloc_fa
if (!(address >= VMALLOC_START && address < VMALLOC_END))
return -1;
+ WARN_ON_ONCE(in_nmi());
+
/*
* Copy kernel mappings over when needed. This can also
* happen within a race in page table update. In the later
@@ -750,7 +736,7 @@ __bad_area_nosemaphore(struct pt_regs *r
tsk->thread.error_code = error_code | (address >= TASK_SIZE);
tsk->thread.trap_no = 14;
- force_sig_info_fault(SIGSEGV, si_code, address, tsk);
+ force_sig_info_fault(SIGSEGV, si_code, address, tsk, 0);
return;
}
@@ -835,14 +821,14 @@ do_sigbus(struct pt_regs *regs, unsigned
tsk->thread.trap_no = 14;
#ifdef CONFIG_MEMORY_FAILURE
- if (fault & VM_FAULT_HWPOISON) {
+ if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) {
printk(KERN_ERR
"MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n",
tsk->comm, tsk->pid, address);
code = BUS_MCEERR_AR;
}
#endif
- force_sig_info_fault(SIGBUS, code, address, tsk);
+ force_sig_info_fault(SIGBUS, code, address, tsk, fault);
}
static noinline void
@@ -852,7 +838,8 @@ mm_fault_error(struct pt_regs *regs, uns
if (fault & VM_FAULT_OOM) {
out_of_memory(regs, error_code, address);
} else {
- if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON))
+ if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
+ VM_FAULT_HWPOISON_LARGE))
do_sigbus(regs, error_code, address, fault);
else
BUG();
@@ -913,8 +900,14 @@ spurious_fault(unsigned long error_code,
if (pmd_large(*pmd))
return spurious_fault_check(error_code, (pte_t *) pmd);
+ /*
+ * Note: don't use pte_present() here, since it returns true
+ * if the _PAGE_PROTNONE bit is set. However, this aliases the
+ * _PAGE_GLOBAL bit, which for kernel pages give false positives
+ * when CONFIG_DEBUG_PAGEALLOC is used.
+ */
pte = pte_offset_kernel(pmd, address);
- if (!pte_present(*pte))
+ if (!(pte_flags(*pte) & _PAGE_PRESENT))
return 0;
ret = spurious_fault_check(error_code, pte);
@@ -934,9 +927,9 @@ spurious_fault(unsigned long error_code,
int show_unhandled_signals = 1;
static inline int
-access_error(unsigned long error_code, int write, struct vm_area_struct *vma)
+access_error(unsigned long error_code, struct vm_area_struct *vma)
{
- if (write) {
+ if (error_code & PF_WRITE) {
/* write, present and write, not present: */
if (unlikely(!(vma->vm_flags & VM_WRITE)))
return 1;
@@ -971,8 +964,10 @@ do_page_fault(struct pt_regs *regs, unsi
struct task_struct *tsk;
unsigned long address;
struct mm_struct *mm;
- int write;
int fault;
+ int write = error_code & PF_WRITE;
+ unsigned int flags = FAULT_FLAG_ALLOW_RETRY |
+ (write ? FAULT_FLAG_WRITE : 0);
/* Set the "privileged fault" bit to something sane. */
if (user_mode_vm(regs))
@@ -1100,6 +1095,7 @@ do_page_fault(struct pt_regs *regs, unsi
bad_area_nosemaphore(regs, error_code, address);
return;
}
+retry:
down_read(&mm->mmap_sem);
} else {
/*
@@ -1143,9 +1139,7 @@ do_page_fault(struct pt_regs *regs, unsi
* we can handle it..
*/
good_area:
- write = error_code & PF_WRITE;
-
- if (unlikely(access_error(error_code, write, vma))) {
+ if (unlikely(access_error(error_code, vma))) {
bad_area_access_error(regs, error_code, address);
return;
}
@@ -1155,21 +1149,34 @@ good_area:
* make sure we exit gracefully rather than endlessly redo
* the fault:
*/
- fault = handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0);
+ fault = handle_mm_fault(mm, vma, address, flags);
if (unlikely(fault & VM_FAULT_ERROR)) {
mm_fault_error(regs, error_code, address, fault);
return;
}
- if (fault & VM_FAULT_MAJOR) {
- tsk->maj_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
- regs, address);
- } else {
- tsk->min_flt++;
- perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
- regs, address);
+ /*
+ * Major/minor page fault accounting is only done on the
+ * initial attempt. If we go through a retry, it is extremely
+ * likely that the page will be found in page cache at that point.
+ */
+ if (flags & FAULT_FLAG_ALLOW_RETRY) {
+ if (fault & VM_FAULT_MAJOR) {
+ tsk->maj_flt++;
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+ regs, address);
+ } else {
+ tsk->min_flt++;
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+ regs, address);
+ }
+ if (fault & VM_FAULT_RETRY) {
+ /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
+ * of starvation. */
+ flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ goto retry;
+ }
}
check_v8086_mode(regs, address, tsk);
--- head-2011-03-17.orig/arch/x86/mm/highmem_32-xen.c 2011-02-01 15:04:27.000000000 +0100
+++ head-2011-03-17/arch/x86/mm/highmem_32-xen.c 2011-02-01 15:09:47.000000000 +0100
@@ -9,6 +9,7 @@ void *kmap(struct page *page)
return page_address(page);
return kmap_high(page);
}
+EXPORT_SYMBOL(kmap);
void kunmap(struct page *page)
{
@@ -18,6 +19,7 @@ void kunmap(struct page *page)
return;
kunmap_high(page);
}
+EXPORT_SYMBOL(kunmap);
/*
* kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because
@@ -27,10 +29,10 @@ void kunmap(struct page *page)
* However when holding an atomic kmap it is not legal to sleep, so atomic
* kmaps are appropriate for short, tight code paths only.
*/
-void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
+void *kmap_atomic_prot(struct page *page, pgprot_t prot)
{
- enum fixed_addresses idx;
unsigned long vaddr;
+ int idx, type;
/* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
pagefault_disable();
@@ -38,8 +40,7 @@ void *kmap_atomic_prot(struct page *page
if (!PageHighMem(page))
return page_address(page);
- debug_kmap_atomic(type);
-
+ type = kmap_atomic_idx_push();
idx = type + KM_TYPE_NR*smp_processor_id();
vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
BUG_ON(!pte_none(*(kmap_pte-idx)));
@@ -47,44 +48,57 @@ void *kmap_atomic_prot(struct page *page
return (void *)vaddr;
}
+EXPORT_SYMBOL(kmap_atomic_prot);
-void *kmap_atomic(struct page *page, enum km_type type)
+void *__kmap_atomic(struct page *page)
+{
+ return kmap_atomic_prot(page, kmap_prot);
+}
+EXPORT_SYMBOL(__kmap_atomic);
+
+/*
+ * This is the same as kmap_atomic() but can map memory that doesn't
+ * have a struct page associated with it.
+ */
+void *kmap_atomic_pfn(unsigned long pfn)
{
- return kmap_atomic_prot(page, type, kmap_prot);
+ return kmap_atomic_prot_pfn(pfn, kmap_prot);
}
+EXPORT_SYMBOL_GPL(kmap_atomic_pfn);
-void kunmap_atomic_notypecheck(void *kvaddr, enum km_type type)
+void __kunmap_atomic(void *kvaddr)
{
unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
- enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
- /*
- * Force other mappings to Oops if they'll try to access this pte
- * without first remap it. Keeping stale mappings around is a bad idea
- * also, in case the page changes cacheability attributes or becomes
- * a protected page in a hypervisor.
- */
- if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx))
+ if (vaddr >= __fix_to_virt(FIX_KMAP_END) &&
+ vaddr <= __fix_to_virt(FIX_KMAP_BEGIN)) {
+ int idx, type;
+
+ type = kmap_atomic_idx();
+ idx = type + KM_TYPE_NR * smp_processor_id();
+
+#ifdef CONFIG_DEBUG_HIGHMEM
+ WARN_ON_ONCE(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
+#endif
+ /*
+ * Force other mappings to Oops if they'll try to access this
+ * pte without first remap it. Keeping stale mappings around
+ * is a bad idea also, in case the page changes cacheability
+ * attributes or becomes a protected page in a hypervisor.
+ */
kpte_clear_flush(kmap_pte-idx, vaddr);
- else {
+ kmap_atomic_idx_pop();
+ }
#ifdef CONFIG_DEBUG_HIGHMEM
+ else {
BUG_ON(vaddr < PAGE_OFFSET);
BUG_ON(vaddr >= (unsigned long)high_memory);
-#endif
}
+#endif
pagefault_enable();
}
-
-/*
- * This is the same as kmap_atomic() but can map memory that doesn't
- * have a struct page associated with it.
- */
-void *kmap_atomic_pfn(unsigned long pfn, enum km_type type)
-{
- return kmap_atomic_prot_pfn(pfn, type, kmap_prot);
-}
-EXPORT_SYMBOL_GPL(kmap_atomic_pfn); /* temporarily in use by i915 GEM until vmap */
+EXPORT_SYMBOL(__kunmap_atomic);
struct page *kmap_atomic_to_page(void *ptr)
{
@@ -98,6 +112,7 @@ struct page *kmap_atomic_to_page(void *p
pte = kmap_pte - (idx - FIX_KMAP_BEGIN);
return pte_page(*pte);
}
+EXPORT_SYMBOL(kmap_atomic_to_page);
void clear_highpage(struct page *page)
{
@@ -117,6 +132,7 @@ void clear_highpage(struct page *page)
clear_page(kaddr);
kunmap_atomic(kaddr, KM_USER0);
}
+EXPORT_SYMBOL(clear_highpage);
void copy_highpage(struct page *to, struct page *from)
{
@@ -143,14 +159,6 @@ void copy_highpage(struct page *to, stru
kunmap_atomic(vfrom, KM_USER0);
kunmap_atomic(vto, KM_USER1);
}
-
-EXPORT_SYMBOL(kmap);
-EXPORT_SYMBOL(kunmap);
-EXPORT_SYMBOL(kmap_atomic);
-EXPORT_SYMBOL(kunmap_atomic_notypecheck);
-EXPORT_SYMBOL(kmap_atomic_prot);
-EXPORT_SYMBOL(kmap_atomic_to_page);
-EXPORT_SYMBOL(clear_highpage);
EXPORT_SYMBOL(copy_highpage);
void __init set_highmem_pages_init(void)
--- head-2011-03-17.orig/arch/x86/mm/init-xen.c 2011-02-01 15:03:03.000000000 +0100
+++ head-2011-03-17/arch/x86/mm/init-xen.c 2011-02-01 15:09:47.000000000 +0100
@@ -2,6 +2,7 @@
#include <linux/initrd.h>
#include <linux/ioport.h>
#include <linux/swap.h>
+#include <linux/memblock.h>
#include <linux/bootmem.h>
#include <asm/cacheflush.h>
@@ -340,7 +341,7 @@ unsigned long __init_refok init_memory_m
__flush_tlb_all();
if (!after_bootmem && e820_table_top > e820_table_start)
- reserve_early(e820_table_start << PAGE_SHIFT,
+ memblock_x86_reserve_range(e820_table_start << PAGE_SHIFT,
e820_table_top << PAGE_SHIFT, "PGTABLE");
if (!after_bootmem)
--- head-2011-03-17.orig/arch/x86/mm/init_32-xen.c 2011-02-01 15:03:03.000000000 +0100
+++ head-2011-03-17/arch/x86/mm/init_32-xen.c 2011-02-01 15:09:47.000000000 +0100
@@ -25,6 +25,7 @@
#include <linux/pfn.h>
#include <linux/poison.h>
#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/proc_fs.h>
#include <linux/memory_hotplug.h>
#include <linux/initrd.h>
@@ -70,7 +71,7 @@ static __init void *alloc_low_page(void)
panic("alloc_low_page: ran out of memory");
adr = __va(pfn * PAGE_SIZE);
- memset(adr, 0, PAGE_SIZE);
+ clear_page(adr);
return adr;
}
@@ -458,49 +459,28 @@ static void __init add_one_highpage_init
totalhigh_pages++;
}
-struct add_highpages_data {
- unsigned long start_pfn;
- unsigned long end_pfn;
-};
-
-static int __init add_highpages_work_fn(unsigned long start_pfn,
- unsigned long end_pfn, void *datax)
-{
- int node_pfn;
- struct page *page;
- unsigned long final_start_pfn, final_end_pfn;
- struct add_highpages_data *data;
-
- data = (struct add_highpages_data *)datax;
-
- final_start_pfn = max(start_pfn, data->start_pfn);
- final_end_pfn = min(end_pfn, data->end_pfn);
- if (final_start_pfn >= final_end_pfn)
- return 0;
-
- for (node_pfn = final_start_pfn; node_pfn < final_end_pfn;
- node_pfn++) {
- if (!pfn_valid(node_pfn))
- continue;
- page = pfn_to_page(node_pfn);
- add_one_highpage_init(page);
- }
-
- return 0;
-
-}
-
-void __init add_highpages_with_active_regions(int nid, unsigned long start_pfn,
- unsigned long end_pfn)
+void __init add_highpages_with_active_regions(int nid,
+ unsigned long start_pfn, unsigned long end_pfn)
{
- struct add_highpages_data data;
+ struct range *range;
+ int nr_range;
+ int i;
- data.start_pfn = start_pfn;
- data.end_pfn = end_pfn;
+ nr_range = __get_free_all_memory_range(&range, nid, start_pfn, end_pfn);
- work_with_active_regions(nid, add_highpages_work_fn, &data);
+ for (i = 0; i < nr_range; i++) {
+ struct page *page;
+ int node_pfn;
+
+ for (node_pfn = range[i].start; node_pfn < range[i].end;
+ node_pfn++) {
+ if (!pfn_valid(node_pfn))
+ continue;
+ page = pfn_to_page(node_pfn);
+ add_one_highpage_init(page);
+ }
+ }
}
-
#else
static inline void permanent_kmaps_init(pgd_t *pgd_base)
{
@@ -550,48 +530,6 @@ static void __init pagetable_init(void)
permanent_kmaps_init(pgd_base);
}
-#if defined(CONFIG_ACPI_SLEEP) && !defined(CONFIG_XEN)
-/*
- * ACPI suspend needs this for resume, because things like the intel-agp
- * driver might have split up a kernel 4MB mapping.
- */
-char swsusp_pg_dir[PAGE_SIZE]
- __attribute__ ((aligned(PAGE_SIZE)));
-
-static inline void save_pg_dir(void)
-{
- memcpy(swsusp_pg_dir, swapper_pg_dir, PAGE_SIZE);
-}
-#else /* !CONFIG_ACPI_SLEEP */
-static inline void save_pg_dir(void)
-{
-}
-#endif /* !CONFIG_ACPI_SLEEP */
-
-void zap_low_mappings(bool early)
-{
- int i;
-
- /*
- * Zap initial low-memory mappings.
- *
- * Note that "pgd_clear()" doesn't do it for
- * us, because pgd_clear() is a no-op on i386.
- */
- for (i = 0; i < KERNEL_PGD_BOUNDARY; i++) {
-#if defined(CONFIG_X86_PAE) && !defined(CONFIG_XEN)
- set_pgd(swapper_pg_dir+i, __pgd(1 + __pa(empty_zero_page)));
-#else
- set_pgd(swapper_pg_dir+i, __pgd(0));
-#endif
- }
-
- if (early)
- __flush_tlb();
- else
- flush_tlb_all();
-}
-
pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL);
EXPORT_SYMBOL_GPL(__supported_pte_mask);
@@ -714,14 +652,14 @@ void __init initmem_init(unsigned long s
highstart_pfn = highend_pfn = max_pfn;
if (max_pfn > max_low_pfn)
highstart_pfn = max_low_pfn;
- e820_register_active_regions(0, 0, highend_pfn);
+ memblock_x86_register_active_regions(0, 0, highend_pfn);
sparse_memory_present_with_active_regions(0);
printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
pages_to_mb(highend_pfn - highstart_pfn));
num_physpages = highend_pfn;
high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
#else
- e820_register_active_regions(0, 0, max_low_pfn);
+ memblock_x86_register_active_regions(0, 0, max_low_pfn);
sparse_memory_present_with_active_regions(0);
num_physpages = max_low_pfn;
high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
@@ -752,75 +690,18 @@ static void __init zone_sizes_init(void)
free_area_init_nodes(max_zone_pfns);
}
-#ifndef CONFIG_NO_BOOTMEM
-static unsigned long __init setup_node_bootmem(int nodeid,
- unsigned long start_pfn,
- unsigned long end_pfn,
- unsigned long bootmap)
-{
- unsigned long bootmap_size;
-
- /* don't touch min_low_pfn */
- bootmap_size = init_bootmem_node(NODE_DATA(nodeid),
- bootmap >> PAGE_SHIFT,
- start_pfn, end_pfn);
- printk(KERN_INFO " node %d low ram: %08lx - %08lx\n",
- nodeid, start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
- printk(KERN_INFO " node %d bootmap %08lx - %08lx\n",
- nodeid, bootmap, bootmap + bootmap_size);
- free_bootmem_with_active_regions(nodeid, end_pfn);
-
- return bootmap + bootmap_size;
-}
-#endif
-
void __init setup_bootmem_allocator(void)
{
-#ifndef CONFIG_NO_BOOTMEM
- int nodeid;
- unsigned long bootmap_size, bootmap;
- unsigned long end_xen_pfn = min(max_low_pfn, xen_start_info->nr_pages);
-
- /*
- * Initialize the boot-time allocator (with low memory only):
- */
- bootmap_size = bootmem_bootmap_pages(end_xen_pfn)<<PAGE_SHIFT;
- bootmap = find_e820_area(0, min(max_pfn_mapped,
- xen_start_info->nr_pages)<<PAGE_SHIFT,
- bootmap_size, PAGE_SIZE);
- if (bootmap == -1L)
- panic("Cannot find bootmem map of size %ld\n", bootmap_size);
- reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP");
-#elif defined(CONFIG_XEN)
+#ifdef CONFIG_XEN
if (max_low_pfn > xen_start_info->nr_pages)
- reserve_early(xen_start_info->nr_pages << PAGE_SHIFT,
- max_low_pfn << PAGE_SHIFT, "BALLOON");
+ memblock_x86_reserve_range(xen_start_info->nr_pages << PAGE_SHIFT,
+ max_low_pfn << PAGE_SHIFT, "BALLOON");
#endif
printk(KERN_INFO " mapped low ram: 0 - %08lx\n",
max_pfn_mapped<<PAGE_SHIFT);
printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT);
-#ifndef CONFIG_NO_BOOTMEM
- for_each_online_node(nodeid) {
- unsigned long start_pfn, end_pfn;
-
-#ifdef CONFIG_NEED_MULTIPLE_NODES
- start_pfn = node_start_pfn[nodeid];
- end_pfn = node_end_pfn[nodeid];
- if (start_pfn > end_xen_pfn)
- continue;
- if (end_pfn > end_xen_pfn)
- end_pfn = end_xen_pfn;
-#else
- start_pfn = 0;
- end_pfn = end_xen_pfn;
-#endif
- bootmap = setup_node_bootmem(nodeid, start_pfn, end_pfn,
- bootmap);
- }
-#endif
-
after_bootmem = 1;
}
@@ -870,8 +751,8 @@ unsigned long __init extend_init_mapping
}
if (start_pfn > start)
- reserve_early(start << PAGE_SHIFT,
- start_pfn << PAGE_SHIFT, "INITMAP");
+ memblock_x86_reserve_range(start << PAGE_SHIFT,
+ start_pfn << PAGE_SHIFT, "INITMAP");
return start_pfn;
}
@@ -1026,9 +907,6 @@ void __init mem_init(void)
if (boot_cpu_data.wp_works_ok < 0)
test_wp_bit();
- save_pg_dir();
- zap_low_mappings(true);
-
SetPagePinned(virt_to_page(init_mm.pgd));
}
@@ -1139,8 +1017,3 @@ void mark_rodata_ro(void)
}
#endif
-int __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
- int flags)
-{
- return reserve_bootmem(phys, len, flags);
-}
--- head-2011-03-17.orig/arch/x86/mm/init_64-xen.c 2011-02-01 15:04:27.000000000 +0100
+++ head-2011-03-17/arch/x86/mm/init_64-xen.c 2011-02-01 15:09:47.000000000 +0100
@@ -24,6 +24,7 @@
#include <linux/initrd.h>
#include <linux/pagemap.h>
#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/proc_fs.h>
#include <linux/pci.h>
#include <linux/pfn.h>
@@ -54,7 +55,6 @@
#include <asm/cacheflush.h>
#include <asm/init.h>
#include <asm/setup.h>
-#include <linux/bootmem.h>
#include <xen/features.h>
@@ -164,6 +164,43 @@ static int __init nonx32_setup(char *str
__setup("noexec32=", nonx32_setup);
/*
+ * When memory was added/removed make sure all the processes MM have
+ * suitable PGD entries in the local PGD level page.
+ */
+void sync_global_pgds(unsigned long start, unsigned long end)
+{
+ unsigned long address;
+
+ for (address = start; address <= end; address += PGDIR_SIZE) {
+ const pgd_t *pgd_ref = pgd_offset_k(address);
+ unsigned long flags;
+ struct page *page;
+
+ if (pgd_none(*pgd_ref))
+ continue;
+
+ spin_lock_irqsave(&pgd_lock, flags);
+ list_for_each_entry(page, &pgd_list, lru) {
+ pgd_t *pgd;
+ spinlock_t *pgt_lock;
+
+ pgd = (pgd_t *)page_address(page) + pgd_index(address);
+ pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
+ spin_lock(pgt_lock);
+
+ if (pgd_none(*pgd))
+ set_pgd(pgd, *pgd_ref);
+ else
+ BUG_ON(pgd_page_vaddr(*pgd)
+ != pgd_page_vaddr(*pgd_ref));
+
+ spin_unlock(pgt_lock);
+ }
+ spin_unlock_irqrestore(&pgd_lock, flags);
+ }
+}
+
+/*
* NOTE: This function is marked __ref because it calls __init function
* (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0.
*/
@@ -370,7 +407,7 @@ static __ref void *alloc_low_page(unsign
panic("alloc_low_page: ran out of memory");
adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE);
- memset(adr, 0, PAGE_SIZE);
+ clear_page(adr);
*phys = pfn * PAGE_SIZE;
return adr;
}
@@ -772,11 +809,13 @@ kernel_physical_mapping_init(unsigned lo
unsigned long end,
unsigned long page_size_mask)
{
-
+ bool pgd_changed = false;
unsigned long next, last_map_addr = end;
+ unsigned long addr;
start = (unsigned long)__va(start);
end = (unsigned long)__va(end);
+ addr = start;
for (; start < end; start = next) {
pgd_t *pgd = pgd_offset_k(start);
@@ -808,9 +847,13 @@ kernel_physical_mapping_init(unsigned lo
spin_lock(&init_mm.page_table_lock);
pgd_populate(&init_mm, pgd, __va(pud_phys));
spin_unlock(&init_mm.page_table_lock);
+ pgd_changed = true;
}
}
+ if (pgd_changed)
+ sync_global_pgds(addr, end);
+
return last_map_addr;
}
@@ -818,31 +861,11 @@ kernel_physical_mapping_init(unsigned lo
void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
int acpi, int k8)
{
-#ifndef CONFIG_NO_BOOTMEM
- unsigned long bootmap_size, bootmap;
-
- e820_register_active_regions(0, start_pfn, end_pfn);
-#ifdef CONFIG_XEN
- if (end_pfn > xen_start_info->nr_pages)
- end_pfn = xen_start_info->nr_pages;
-#endif
- bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
- bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size,
- PAGE_SIZE);
- if (bootmap == -1L)
- panic("Cannot find bootmem map of size %ld\n", bootmap_size);
- reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP");
- /* don't touch min_low_pfn */
- bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT,
- 0, end_pfn);
- free_bootmem_with_active_regions(0, end_pfn);
-#else
- e820_register_active_regions(0, start_pfn, end_pfn);
+ memblock_x86_register_active_regions(0, start_pfn, end_pfn);
#ifdef CONFIG_XEN
if (end_pfn > xen_start_info->nr_pages)
- reserve_early(xen_start_info->nr_pages << PAGE_SHIFT,
- end_pfn << PAGE_SHIFT, "BALLOON");
-#endif
+ memblock_x86_reserve_range(xen_start_info->nr_pages << PAGE_SHIFT,
+ end_pfn << PAGE_SHIFT, "BALLOON");
#endif
}
#endif
@@ -1062,54 +1085,6 @@ void mark_rodata_ro(void)
#endif
-int __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
- int flags)
-{
-#ifdef CONFIG_NUMA
- int nid, next_nid;
- int ret;
-#endif
- unsigned long pfn = phys >> PAGE_SHIFT;
-
- if (pfn >= max_pfn) {
- /*
- * This can happen with kdump kernels when accessing
- * firmware tables:
- */
- if (pfn < max_pfn_mapped)
- return -EFAULT;
-
- printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %lu\n",
- phys, len);
- return -EFAULT;
- }
-
- /* Should check here against the e820 map to avoid double free */
-#ifdef CONFIG_NUMA
- nid = phys_to_nid(phys);
- next_nid = phys_to_nid(phys + len - 1);
- if (nid == next_nid)
- ret = reserve_bootmem_node(NODE_DATA(nid), phys, len, flags);
- else
- ret = reserve_bootmem(phys, len, flags);
-
- if (ret != 0)
- return ret;
-
-#else
- reserve_bootmem(phys, len, flags);
-#endif
-
-#ifndef CONFIG_XEN
- if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
- dma_reserve += len / PAGE_SIZE;
- set_dma_reserve(dma_reserve);
- }
-#endif
-
- return 0;
-}
-
int kern_addr_valid(unsigned long addr)
{
unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
@@ -1281,6 +1256,7 @@ vmemmap_populate(struct page *start_page
}
}
+ sync_global_pgds((unsigned long)start_page, end);
return 0;
}
--- head-2011-03-17.orig/arch/x86/mm/iomap_32-xen.c 2011-02-01 15:04:27.000000000 +0100
+++ head-2011-03-17/arch/x86/mm/iomap_32-xen.c 2011-02-01 15:09:47.000000000 +0100
@@ -49,21 +49,20 @@ int iomap_create_wc(resource_size_t base
}
EXPORT_SYMBOL_GPL(iomap_create_wc);
-void
-iomap_free(resource_size_t base, unsigned long size)
+void iomap_free(resource_size_t base, unsigned long size)
{
io_free_memtype(base, base + size);
}
EXPORT_SYMBOL_GPL(iomap_free);
-void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
+void *kmap_atomic_prot_pfn(unsigned long pfn, pgprot_t prot)
{
- enum fixed_addresses idx;
unsigned long vaddr;
+ int idx, type;
pagefault_disable();
- debug_kmap_atomic(type);
+ type = kmap_atomic_idx_push();
idx = type + KM_TYPE_NR * smp_processor_id();
vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
set_pte_at(&init_mm, vaddr, kmap_pte - idx, pfn_pte(pfn, prot));
@@ -73,10 +72,10 @@ void *kmap_atomic_prot_pfn(unsigned long
}
/*
- * Map 'mfn' using fixed map 'type' and protections 'prot'
+ * Map 'mfn' using protections 'prot'
*/
void __iomem *
-iomap_atomic_prot_pfn(unsigned long mfn, enum km_type type, pgprot_t prot)
+iomap_atomic_prot_pfn(unsigned long mfn, pgprot_t prot)
{
/*
* For non-PAT systems, promote PAGE_KERNEL_WC to PAGE_KERNEL_UC_MINUS.
@@ -88,24 +87,34 @@ iomap_atomic_prot_pfn(unsigned long mfn,
prot = PAGE_KERNEL_UC_MINUS;
pgprot_val(prot) |= _PAGE_IOMAP;
- return (void __force __iomem *) kmap_atomic_prot_pfn(mfn, type, prot);
+ return (void __force __iomem *) kmap_atomic_prot_pfn(mfn, prot);
}
EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn);
void
-iounmap_atomic(void __iomem *kvaddr, enum km_type type)
+iounmap_atomic(void __iomem *kvaddr)
{
unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
- enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
- /*
- * Force other mappings to Oops if they'll try to access this pte
- * without first remap it. Keeping stale mappings around is a bad idea
- * also, in case the page changes cacheability attributes or becomes
- * a protected page in a hypervisor.
- */
- if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx))
+ if (vaddr >= __fix_to_virt(FIX_KMAP_END) &&
+ vaddr <= __fix_to_virt(FIX_KMAP_BEGIN)) {
+ int idx, type;
+
+ type = kmap_atomic_idx();
+ idx = type + KM_TYPE_NR * smp_processor_id();
+
+#ifdef CONFIG_DEBUG_HIGHMEM
+ WARN_ON_ONCE(vaddr != __fix_to_virt(FIX_KMAP_BEGIN + idx));
+#endif
+ /*
+ * Force other mappings to Oops if they'll try to access this
+ * pte without first remap it. Keeping stale mappings around
+ * is a bad idea also, in case the page changes cacheability
+ * attributes or becomes a protected page in a hypervisor.
+ */
kpte_clear_flush(kmap_pte-idx, vaddr);
+ kmap_atomic_idx_pop();
+ }
pagefault_enable();
}
--- head-2011-03-17.orig/arch/x86/mm/ioremap-xen.c 2011-02-07 15:42:02.000000000 +0100
+++ head-2011-03-17/arch/x86/mm/ioremap-xen.c 2011-02-07 15:42:09.000000000 +0100
@@ -532,6 +532,11 @@ static inline pte_t * __init early_iorem
return &bm_pte[pte_index(addr)];
}
+bool __init is_early_ioremap_ptep(pte_t *ptep)
+{
+ return ptep >= &bm_pte[0] && ptep < &bm_pte[PAGE_SIZE/sizeof(pte_t)];
+}
+
static unsigned long slot_virt[FIX_BTMAPS_SLOTS] __initdata;
void __init early_ioremap_init(void)
--- head-2011-03-17.orig/arch/x86/mm/memblock.c 2011-03-17 14:35:43.000000000 +0100
+++ head-2011-03-17/arch/x86/mm/memblock.c 2011-02-01 15:09:47.000000000 +0100
@@ -293,6 +293,11 @@ static int __init memblock_x86_find_acti
{
u64 align = PAGE_SIZE;
+#ifdef CONFIG_XEN
+ if (last_pfn > xen_start_info->nr_pages)
+ last_pfn = xen_start_info->nr_pages;
+#endif
+
*ei_startpfn = round_up(ei->base, align) >> PAGE_SHIFT;
*ei_endpfn = round_down(ei->base + ei->size, align) >> PAGE_SHIFT;
@@ -325,6 +330,11 @@ void __init memblock_x86_register_active
if (memblock_x86_find_active_region(r, start_pfn, last_pfn,
&ei_startpfn, &ei_endpfn))
add_active_range(nid, ei_startpfn, ei_endpfn);
+
+#ifdef CONFIG_XEN
+ BUG_ON(nid);
+ add_active_range(nid, last_pfn, last_pfn);
+#endif
}
/*
--- head-2011-03-17.orig/arch/x86/mm/pgtable-xen.c 2011-02-01 15:03:03.000000000 +0100
+++ head-2011-03-17/arch/x86/mm/pgtable-xen.c 2011-02-01 15:09:47.000000000 +0100
@@ -428,7 +428,19 @@ static inline void pgd_list_del(pgd_t *p
#define UNSHARED_PTRS_PER_PGD \
(SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
-static void pgd_ctor(pgd_t *pgd)
+
+static void pgd_set_mm(pgd_t *pgd, struct mm_struct *mm)
+{
+ BUILD_BUG_ON(sizeof(virt_to_page(pgd)->index) < sizeof(mm));
+ virt_to_page(pgd)->index = (pgoff_t)mm;
+}
+
+struct mm_struct *pgd_page_get_mm(struct page *page)
+{
+ return (struct mm_struct *)page->index;
+}
+
+static void pgd_ctor(struct mm_struct *mm, pgd_t *pgd)
{
pgd_test_and_unpin(pgd);
@@ -441,10 +453,6 @@ static void pgd_ctor(pgd_t *pgd)
clone_pgd_range(pgd + KERNEL_PGD_BOUNDARY,
swapper_pg_dir + KERNEL_PGD_BOUNDARY,
KERNEL_PGD_PTRS);
- paravirt_alloc_pmd_clone(__pa(pgd) >> PAGE_SHIFT,
- __pa(swapper_pg_dir) >> PAGE_SHIFT,
- KERNEL_PGD_BOUNDARY,
- KERNEL_PGD_PTRS);
}
#ifdef CONFIG_X86_64
@@ -454,8 +462,10 @@ static void pgd_ctor(pgd_t *pgd)
#endif
/* list required to sync kernel mapping updates */
- if (!SHARED_KERNEL_PMD)
+ if (!SHARED_KERNEL_PMD) {
+ pgd_set_mm(pgd, mm);
pgd_list_add(pgd);
+ }
}
static void pgd_dtor(pgd_t *pgd)
@@ -662,12 +672,9 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
}
#endif
- pgd_ctor(pgd);
+ pgd_ctor(mm, pgd);
pgd_prepopulate_pmd(mm, pgd, pmds);
- /* Store a back link for vmalloc_sync_all(). */
- set_page_private(virt_to_page(pgd), (unsigned long)mm);
-
spin_unlock_irqrestore(&pgd_lock, flags);
return pgd;
--- head-2011-03-17.orig/arch/x86/pci/irq-xen.c 2011-02-01 15:04:27.000000000 +0100
+++ head-2011-03-17/arch/x86/pci/irq-xen.c 2011-02-01 15:09:47.000000000 +0100
@@ -590,27 +590,28 @@ static __init int intel_router_probe(str
case PCI_DEVICE_ID_INTEL_ICH9_3:
case PCI_DEVICE_ID_INTEL_ICH9_4:
case PCI_DEVICE_ID_INTEL_ICH9_5:
- case PCI_DEVICE_ID_INTEL_TOLAPAI_0:
+ case PCI_DEVICE_ID_INTEL_EP80579_0:
case PCI_DEVICE_ID_INTEL_ICH10_0:
case PCI_DEVICE_ID_INTEL_ICH10_1:
case PCI_DEVICE_ID_INTEL_ICH10_2:
case PCI_DEVICE_ID_INTEL_ICH10_3:
+ case PCI_DEVICE_ID_INTEL_PATSBURG_LPC:
r->name = "PIIX/ICH";
r->get = pirq_piix_get;
r->set = pirq_piix_set;
return 1;
}
- if ((device >= PCI_DEVICE_ID_INTEL_PCH_LPC_MIN) &&
- (device <= PCI_DEVICE_ID_INTEL_PCH_LPC_MAX)) {
+ if ((device >= PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MIN) &&
+ (device <= PCI_DEVICE_ID_INTEL_5_3400_SERIES_LPC_MAX)) {
r->name = "PIIX/ICH";
r->get = pirq_piix_get;
r->set = pirq_piix_set;
return 1;
}
- if ((device >= PCI_DEVICE_ID_INTEL_CPT_LPC_MIN) &&
- (device <= PCI_DEVICE_ID_INTEL_CPT_LPC_MAX)) {
+ if ((device >= PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MIN) &&
+ (device <= PCI_DEVICE_ID_INTEL_COUGARPOINT_LPC_MAX)) {
r->name = "PIIX/ICH";
r->get = pirq_piix_get;
r->set = pirq_piix_set;
--- head-2011-03-17.orig/arch/x86/pci/pcifront.c 2011-02-01 14:50:44.000000000 +0100
+++ head-2011-03-17/arch/x86/pci/pcifront.c 2011-02-01 15:09:47.000000000 +0100
@@ -16,7 +16,7 @@ static int pcifront_enable_irq(struct pc
{
u8 irq;
pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &irq);
- if (!irq_to_desc_alloc_node(irq, numa_node_id()))
+ if (!alloc_irq_and_cfg_at(irq, numa_node_id()))
return -ENOMEM;
evtchn_register_pirq(irq);
dev->irq = irq;
--- head-2011-03-17.orig/arch/x86/xen/Kconfig 2011-02-01 15:04:27.000000000 +0100
+++ head-2011-03-17/arch/x86/xen/Kconfig 2011-02-01 15:09:47.000000000 +0100
@@ -15,13 +15,16 @@ config PARAVIRT_XEN
config XEN_DOM0
def_bool y
- depends on XEN && PCI_XEN && SWIOTLB_XEN
+ depends on PARAVIRT_XEN && PCI_XEN && SWIOTLB_XEN
depends on X86_LOCAL_APIC && X86_IO_APIC && ACPI && PCI
# Dummy symbol since people have come to rely on the PRIVILEGED_GUEST
# name in tools.
-config XEN_PRIVILEGED_GUEST
- def_bool XEN_DOM0
+# This doesn't work together with our identical symbol in drivers/xen/Kconfig
+# (produces a recursive dependency), and renaming it is pointless given that
+# it's meant as a compatibility thing.
+#config XEN_PRIVILEGED_GUEST
+# def_bool XEN_DOM0
config XEN_PVHVM
def_bool y
--- head-2011-03-17.orig/drivers/pci/Kconfig 2011-02-01 14:55:46.000000000 +0100
+++ head-2011-03-17/drivers/pci/Kconfig 2011-02-01 15:09:47.000000000 +0100
@@ -61,9 +61,9 @@ config PCI_STUB
When in doubt, say N.
-config XEN_PCIDEV_FRONTEND
+config PARAVIRT_XEN_PCIDEV_FRONTEND
tristate "Xen PCI Frontend"
- depends on PCI && X86 && XEN
+ depends on PCI && X86 && PARAVIRT_XEN
select HOTPLUG
select PCI_XEN
select XEN_XENBUS_FRONTEND
@@ -72,9 +72,18 @@ config XEN_PCIDEV_FRONTEND
The PCI device frontend driver allows the kernel to import arbitrary
PCI devices from a PCI backend to support PCI driver domains.
+config XEN_PCIDEV_FRONTEND
+ def_bool y
+ prompt "Xen PCI Frontend" if X86_64
+ depends on PCI && XEN && (PCI_GOXEN_FE || PCI_GOANY || X86_64)
+ select HOTPLUG
+ help
+ The PCI device frontend driver allows the kernel to import arbitrary
+ PCI devices from a PCI backend to support PCI driver domains.
+
config XEN_PCIDEV_FE_DEBUG
bool "Xen PCI Frontend debugging"
- depends on XEN_PCIDEV_FRONTEND && PCI_DEBUG
+ depends on XEN_PCIDEV_FRONTEND || (PARAVIRT_XEN_PCIDEV_FRONTEND && PCI_DEBUG)
help
Say Y here if you want the Xen PCI frontend to produce a bunch of debug
messages to the system log. Select this if you are having a
--- head-2011-03-17.orig/drivers/pci/Makefile 2011-01-31 14:32:40.000000000 +0100
+++ head-2011-03-17/drivers/pci/Makefile 2011-02-01 15:09:47.000000000 +0100
@@ -71,6 +71,6 @@ obj-$(CONFIG_PCI_SYSCALL) += syscall.o
obj-$(CONFIG_PCI_STUB) += pci-stub.o
-obj-$(CONFIG_XEN_PCIDEV_FRONTEND) += xen-pcifront.o
+obj-$(CONFIG_PARAVIRT_XEN_PCIDEV_FRONTEND) += xen-pcifront.o
ccflags-$(CONFIG_PCI_DEBUG) := -DDEBUG
--- head-2011-03-17.orig/drivers/pci/xen-pcifront.c 2011-03-17 14:35:43.000000000 +0100
+++ head-2011-03-17/drivers/pci/xen-pcifront.c 2011-02-01 15:09:47.000000000 +0100
@@ -1118,7 +1118,6 @@ static const struct xenbus_device_id xen
static struct xenbus_driver xenbus_pcifront_driver = {
.name = "pcifront",
- .owner = THIS_MODULE,
.ids = xenpci_ids,
.probe = pcifront_xenbus_probe,
.remove = pcifront_xenbus_remove,
--- head-2011-03-17.orig/drivers/xen/Kconfig 2011-02-02 15:37:53.000000000 +0100
+++ head-2011-03-17/drivers/xen/Kconfig 2011-02-01 15:09:47.000000000 +0100
@@ -20,10 +20,6 @@ config XEN_PRIVILEGED_GUEST
config XEN_UNPRIVILEGED_GUEST
def_bool !XEN_PRIVILEGED_GUEST
select PM
- select PM_SLEEP
- select PM_SLEEP_SMP if SMP
- select PM_RUNTIME if PCI
- select PM_OPS if PCI
select SUSPEND
config XEN_PRIVCMD
--- head-2011-03-17.orig/drivers/xen/Makefile 2011-02-24 15:05:06.000000000 +0100
+++ head-2011-03-17/drivers/xen/Makefile 2011-02-24 15:17:40.000000000 +0100
@@ -1,6 +1,8 @@
obj-$(CONFIG_PARAVIRT_XEN) += grant-table.o features.o events.o manage.o
+xen-biomerge-$(CONFIG_PARAVIRT_XEN) := biomerge.o
xen-hotplug-$(CONFIG_PARAVIRT_XEN) := cpu_hotplug.o
xen-balloon-$(CONFIG_PARAVIRT_XEN) := balloon.o
+xen-evtchn-name-$(CONFIG_PARAVIRT_XEN) := xen-evtchn
xen-balloon-$(CONFIG_XEN) := balloon/
obj-$(CONFIG_XEN) += core/
@@ -9,6 +11,7 @@ obj-y += xenbus/
obj-$(CONFIG_XEN) += char/
xen-backend-$(CONFIG_XEN_BACKEND) := util.o
+xen-evtchn-name-$(CONFIG_XEN) := evtchn
nostackp := $(call cc-option, -fno-stack-protector)
ifeq ($(CONFIG_PARAVIRT_XEN),y)
@@ -16,14 +19,19 @@ CFLAGS_features.o := $(nostackp)
endif
obj-$(CONFIG_XEN) += features.o $(xen-backend-y) $(xen-backend-m)
+obj-$(CONFIG_BLOCK) += $(xen-biomerge-y)
obj-$(CONFIG_HOTPLUG_CPU) += $(xen-hotplug-y)
obj-$(CONFIG_XEN_XENCOMM) += xencomm.o
obj-$(CONFIG_XEN_BALLOON) += $(xen-balloon-y)
-obj-$(CONFIG_XEN_DEV_EVTCHN) += evtchn.o
+obj-$(CONFIG_XEN_DEV_EVTCHN) += $(xen-evtchn-name-y).o
obj-$(CONFIG_XENFS) += xenfs/
obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o
obj-$(CONFIG_XEN_PLATFORM_PCI) += platform-pci.o
obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o
+obj-$(CONFIG_XEN_DOM0) += pci.o
+
+xen-evtchn-y := evtchn.o
+
obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/
obj-$(CONFIG_XEN_BLKDEV_TAP) += blktap/
obj-$(CONFIG_XEN_BLKDEV_TAP2) += blktap2/ blktap2-new/
--- head-2011-03-17.orig/drivers/xen/balloon/balloon.c 2011-02-01 15:03:10.000000000 +0100
+++ head-2011-03-17/drivers/xen/balloon/balloon.c 2011-02-01 15:09:47.000000000 +0100
@@ -39,7 +39,6 @@
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/mman.h>
-#include <linux/smp_lock.h>
#include <linux/pagemap.h>
#include <linux/bootmem.h>
#include <linux/highmem.h>
--- head-2011-03-17.orig/drivers/xen/blkback/blkback.c 2011-02-01 14:50:44.000000000 +0100
+++ head-2011-03-17/drivers/xen/blkback/blkback.c 2011-02-01 15:09:47.000000000 +0100
@@ -406,7 +406,7 @@ static void dispatch_rw_block_io(blkif_t
operation = WRITE;
break;
case BLKIF_OP_WRITE_BARRIER:
- operation = WRITE_BARRIER;
+ operation = WRITE_FLUSH_FUA;
break;
default:
operation = 0; /* make gcc happy */
@@ -415,7 +415,7 @@ static void dispatch_rw_block_io(blkif_t
/* Check that number of segments is sane. */
nseg = req->nr_segments;
- if (unlikely(nseg == 0 && operation != WRITE_BARRIER) ||
+ if (unlikely(nseg == 0 && req->operation != BLKIF_OP_WRITE_BARRIER) ||
unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
DPRINTK("Bad number of segments in request (%d)\n", nseg);
goto fail_response;
@@ -525,7 +525,7 @@ static void dispatch_rw_block_io(blkif_t
}
if (!bio) {
- BUG_ON(operation != WRITE_BARRIER);
+ BUG_ON(!(operation & (REQ_FLUSH|REQ_FUA)));
bio = bio_alloc(GFP_KERNEL, 0);
if (unlikely(bio == NULL))
goto fail_put_bio;
@@ -540,7 +540,7 @@ static void dispatch_rw_block_io(blkif_t
if (operation == READ)
blkif->st_rd_sect += preq.nr_sects;
- else if (operation == WRITE || operation == WRITE_BARRIER)
+ else
blkif->st_wr_sect += preq.nr_sects;
return;
--- head-2011-03-17.orig/drivers/xen/blkfront/blkfront.c 2011-02-01 15:04:27.000000000 +0100
+++ head-2011-03-17/drivers/xen/blkfront/blkfront.c 2011-02-01 15:09:47.000000000 +0100
@@ -369,20 +369,23 @@ static void connect(struct blkfront_info
/*
* If there's no "feature-barrier" defined, then it means
* we're dealing with a very old backend which writes
- * synchronously; draining will do what needs to get done.
+ * synchronously; nothing to do.
*
- * If there are barriers, then we can do full queued writes
- * with tagged barriers.
- *
- * If barriers are not supported, then there's no much we can
- * do, so just set ordering to NONE.
+ * If there are barriers, then we use flush.
*/
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,37)
+ if (!err && barrier)
+ info->feature_flush = REQ_FLUSH | REQ_FUA;
+ else
+ info->feature_flush = 0;
+#else
if (err)
- info->feature_barrier = QUEUE_ORDERED_DRAIN;
+ info->feature_flush = QUEUE_ORDERED_DRAIN;
else if (barrier)
- info->feature_barrier = QUEUE_ORDERED_TAG;
+ info->feature_flush = QUEUE_ORDERED_TAG;
else
- info->feature_barrier = QUEUE_ORDERED_NONE;
+ info->feature_flush = QUEUE_ORDERED_NONE;
+#endif
err = xlvbd_add(sectors, info->vdevice, binfo, sector_size, info);
if (err) {
@@ -477,7 +480,7 @@ static inline void ADD_ID_TO_FREELIST(
struct blkfront_info *info, unsigned long id)
{
info->shadow[id].req.id = info->shadow_free;
- info->shadow[id].request = 0;
+ info->shadow[id].request = NULL;
info->shadow_free = id;
}
@@ -658,14 +661,11 @@ int blkif_getgeo(struct block_device *bd
/*
- * blkif_queue_request
+ * Generate a Xen blkfront IO request from a blk layer request. Reads
+ * and writes are handled as expected. Since we lack a loose flush
+ * request, we map flushes into a full ordered barrier.
*
- * request block io
- *
- * id: for guest use only.
- * operation: BLKIF_OP_{READ,WRITE,PROBE}
- * buffer: buffer to read/write into. this should be a
- * virtual address in the guest os.
+ * @req: a request struct
*/
static int blkif_queue_request(struct request *req)
{
@@ -694,7 +694,7 @@ static int blkif_queue_request(struct re
/* Fill out a communications ring structure. */
ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
id = GET_ID_FROM_FREELIST(info);
- info->shadow[id].request = (unsigned long)req;
+ info->shadow[id].request = req;
ring_req->id = id;
ring_req->sector_number = (blkif_sector_t)blk_rq_pos(req);
@@ -702,7 +702,11 @@ static int blkif_queue_request(struct re
ring_req->operation = rq_data_dir(req) ?
BLKIF_OP_WRITE : BLKIF_OP_READ;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,37)
+ if (req->cmd_flags & (REQ_FLUSH | REQ_FUA))
+#else
if (req->cmd_flags & REQ_HARDBARRIER)
+#endif
ring_req->operation = BLKIF_OP_WRITE_BARRIER;
ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
@@ -813,7 +817,7 @@ static irqreturn_t blkif_int(int irq, vo
bret = RING_GET_RESPONSE(&info->ring, i);
id = bret->id;
- req = (struct request *)info->shadow[id].request;
+ req = info->shadow[id].request;
blkif_completion(&info->shadow[id]);
@@ -827,8 +831,23 @@ static irqreturn_t blkif_int(int irq, vo
" write barrier op failed\n",
info->gd->disk_name);
ret = -EOPNOTSUPP;
- info->feature_barrier = QUEUE_ORDERED_NONE;
- xlvbd_barrier(info);
+ }
+ if (unlikely(bret->status == BLKIF_RSP_ERROR &&
+ info->shadow[id].req.nr_segments == 0)) {
+ pr_warning("blkfront: %s:"
+ " empty write barrier op failed\n",
+ info->gd->disk_name);
+ ret = -EOPNOTSUPP;
+ }
+ if (unlikely(ret)) {
+ if (ret == -EOPNOTSUPP)
+ ret = 0;
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,37)
+ info->feature_flush = 0;
+#else
+ info->feature_flush = QUEUE_ORDERED_NONE;
+#endif
+ xlvbd_flush(info);
}
/* fall through */
case BLKIF_OP_READ:
@@ -919,7 +938,7 @@ static int blkif_recover(struct blkfront
/* Stage 3: Find pending requests and requeue them. */
for (i = 0; i < BLK_RING_SIZE; i++) {
/* Not in use? */
- if (copy[i].request == 0)
+ if (!copy[i].request)
continue;
/* Grab a request slot and copy shadow state into it. */
@@ -937,8 +956,7 @@ static int blkif_recover(struct blkfront
req->seg[j].gref,
info->xbdev->otherend_id,
pfn_to_mfn(info->shadow[req->id].frame[j]),
- rq_data_dir((struct request *)
- info->shadow[req->id].request) ?
+ rq_data_dir(info->shadow[req->id].request) ?
GTF_readonly : 0);
info->shadow[req->id].req = *req;
--- head-2011-03-17.orig/drivers/xen/blkfront/block.h 2011-02-01 14:39:24.000000000 +0100
+++ head-2011-03-17/drivers/xen/blkfront/block.h 2011-02-01 15:09:47.000000000 +0100
@@ -83,7 +83,7 @@ struct xlbd_major_info
struct blk_shadow {
blkif_request_t req;
- unsigned long request;
+ struct request *request;
unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST];
};
@@ -111,7 +111,7 @@ struct blkfront_info
struct gnttab_free_callback callback;
struct blk_shadow shadow[BLK_RING_SIZE];
unsigned long shadow_free;
- int feature_barrier;
+ int feature_flush;
int is_ready;
/**
@@ -146,7 +146,7 @@ extern void do_blkif_request (struct req
int xlvbd_add(blkif_sector_t capacity, int device,
u16 vdisk_info, u16 sector_size, struct blkfront_info *info);
void xlvbd_del(struct blkfront_info *info);
-int xlvbd_barrier(struct blkfront_info *info);
+void xlvbd_flush(struct blkfront_info *info);
#ifdef CONFIG_SYSFS
int xlvbd_sysfs_addif(struct blkfront_info *info);
--- head-2011-03-17.orig/drivers/xen/blkfront/vbd.c 2011-02-01 15:04:27.000000000 +0100
+++ head-2011-03-17/drivers/xen/blkfront/vbd.c 2011-02-01 15:09:47.000000000 +0100
@@ -422,7 +422,7 @@ xlvbd_add(blkif_sector_t capacity, int v
info->rq = gd->queue;
info->gd = gd;
- xlvbd_barrier(info);
+ xlvbd_flush(info);
if (vdisk_info & VDISK_READONLY)
set_disk_ro(gd, 1);
@@ -468,36 +468,35 @@ xlvbd_del(struct blkfront_info *info)
info->rq = NULL;
}
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
-int
-xlvbd_barrier(struct blkfront_info *info)
+void
+xlvbd_flush(struct blkfront_info *info)
{
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,37)
+ blk_queue_flush(info->rq, info->feature_flush);
+ pr_info("blkfront: %s: barriers %s\n",
+ info->gd->disk_name,
+ info->feature_flush ? "enabled" : "disabled");
+#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
int err;
const char *barrier;
- switch (info->feature_barrier) {
+ switch (info->feature_flush) {
case QUEUE_ORDERED_DRAIN: barrier = "enabled (drain)"; break;
case QUEUE_ORDERED_TAG: barrier = "enabled (tag)"; break;
case QUEUE_ORDERED_NONE: barrier = "disabled"; break;
default: return -EINVAL;
}
- err = blk_queue_ordered(info->rq, info->feature_barrier);
+ err = blk_queue_ordered(info->rq, info->feature_flush);
if (err)
return err;
pr_info("blkfront: %s: barriers %s\n",
info->gd->disk_name, barrier);
- return 0;
-}
#else
-int
-xlvbd_barrier(struct blkfront_info *info)
-{
- if (info->feature_barrier)
+ if (info->feature_flush)
pr_info("blkfront: %s: barriers disabled\n", info->gd->disk_name);
- return -ENOSYS;
-}
#endif
+}
#ifdef CONFIG_SYSFS
static ssize_t show_media(struct device *dev,
--- head-2011-03-17.orig/drivers/xen/blktap/blktap.c 2011-02-17 10:19:19.000000000 +0100
+++ head-2011-03-17/drivers/xen/blktap/blktap.c 2011-02-17 10:19:26.000000000 +0100
@@ -441,6 +441,7 @@ static const struct file_operations blkt
.unlocked_ioctl = blktap_ioctl,
.open = blktap_open,
.release = blktap_release,
+ .llseek = no_llseek,
.mmap = blktap_mmap,
};
@@ -573,6 +574,8 @@ static int blktap_open(struct inode *ino
tap_blkif_t *info;
int i;
+ nonseekable_open(inode, filp);
+
/* ctrl device, treat differently */
if (!idx)
return 0;
--- head-2011-03-17.orig/drivers/xen/blktap2/device.c 2011-02-01 15:04:27.000000000 +0100
+++ head-2011-03-17/drivers/xen/blktap2/device.c 2011-02-01 15:09:47.000000000 +0100
@@ -844,7 +844,7 @@ blktap_device_run_queue(struct blktap *t
continue;
}
- if (req->cmd_flags & REQ_HARDBARRIER) {
+ if (req->cmd_flags & (REQ_FLUSH|REQ_FUA)) {
blk_start_request(req);
__blk_end_request_all(req, -EOPNOTSUPP);
continue;
--- head-2011-03-17.orig/drivers/xen/blktap2-new/device.c 2011-02-24 16:23:08.000000000 +0100
+++ head-2011-03-17/drivers/xen/blktap2-new/device.c 2011-02-24 16:31:17.000000000 +0100
@@ -302,9 +302,6 @@ blktap_device_configure(struct blktap *t
/* Make sure buffer addresses are sector-aligned. */
blk_queue_dma_alignment(rq, 511);
- /* We are reordering, but cacheless. */
- blk_queue_ordered(rq, QUEUE_ORDERED_DRAIN);
-
spin_unlock_irq(&dev->lock);
}
--- head-2011-03-17.orig/drivers/xen/core/evtchn.c 2011-02-09 12:45:24.000000000 +0100
+++ head-2011-03-17/drivers/xen/core/evtchn.c 2011-02-03 11:12:32.000000000 +0100
@@ -33,6 +33,7 @@
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/irq.h>
+#include <linux/irqdesc.h>
#include <linux/interrupt.h>
#include <linux/sched.h>
#include <linux/kernel_stat.h>
@@ -89,14 +90,17 @@ static struct irq_cfg _irq_cfg[] = {
static inline struct irq_cfg *__pure irq_cfg(unsigned int irq)
{
#ifdef CONFIG_SPARSE_IRQ
- struct irq_desc *desc = irq_to_desc(irq);
-
- return desc ? desc->chip_data : NULL;
+ return get_irq_chip_data(irq);
#else
return irq < NR_IRQS ? _irq_cfg + irq : NULL;
#endif
}
+static inline struct irq_cfg *__pure irq_data_cfg(struct irq_data *data)
+{
+ return irq_data_get_irq_chip_data(data);
+}
+
/* Constructor for packed IRQ information. */
static inline u32 mk_irq_info(u32 type, u32 index, u32 evtchn)
{
@@ -116,26 +120,47 @@ static inline u32 mk_irq_info(u32 type,
* Accessors for packed IRQ information.
*/
+static inline unsigned int evtchn_from_irq_cfg(const struct irq_cfg *cfg)
+{
+ return cfg->info & ((1U << _EVTCHN_BITS) - 1);
+}
+
+static inline unsigned int evtchn_from_irq_data(struct irq_data *data)
+{
+ const struct irq_cfg *cfg = irq_data_cfg(data);
+
+ return cfg ? evtchn_from_irq_cfg(cfg) : 0;
+}
+
static inline unsigned int evtchn_from_irq(int irq)
{
- const struct irq_cfg *cfg = irq_cfg(irq);
+ struct irq_data *data = irq_get_irq_data(irq);
- return cfg ? cfg->info & ((1U << _EVTCHN_BITS) - 1) : 0;
+ return data ? evtchn_from_irq_data(data) : 0;
+}
+
+static inline unsigned int index_from_irq_cfg(const struct irq_cfg *cfg)
+{
+ return (cfg->info >> _EVTCHN_BITS) & ((1U << _INDEX_BITS) - 1);
}
static inline unsigned int index_from_irq(int irq)
{
const struct irq_cfg *cfg = irq_cfg(irq);
- return cfg ? (cfg->info >> _EVTCHN_BITS) & ((1U << _INDEX_BITS) - 1)
- : 0;
+ return cfg ? index_from_irq_cfg(cfg) : 0;
+}
+
+static inline unsigned int type_from_irq_cfg(const struct irq_cfg *cfg)
+{
+ return cfg->info >> (32 - _IRQT_BITS);
}
static inline unsigned int type_from_irq(int irq)
{
const struct irq_cfg *cfg = irq_cfg(irq);
- return cfg ? cfg->info >> (32 - _IRQT_BITS) : IRQT_UNBOUND;
+ return cfg ? type_from_irq_cfg(cfg) : IRQT_UNBOUND;
}
unsigned int irq_from_evtchn(unsigned int port)
@@ -180,7 +205,7 @@ static void bind_evtchn_to_cpu(unsigned
BUG_ON(!test_bit(chn, s->evtchn_mask));
if (irq != -1)
- cpumask_copy(irq_to_desc(irq)->affinity, cpumask_of(cpu));
+ cpumask_copy(irq_get_irq_data(irq)->affinity, cpumask_of(cpu));
clear_bit(chn, per_cpu(cpu_evtchn_mask, cpu_evtchn[chn]));
set_bit(chn, per_cpu(cpu_evtchn_mask, cpu));
@@ -193,10 +218,10 @@ static void init_evtchn_cpu_bindings(voi
/* By default all event channels notify CPU#0. */
for (i = 0; i < nr_irqs; i++) {
- struct irq_desc *desc = irq_to_desc(i);
+ struct irq_data *data = irq_get_irq_data(i);
- if (desc)
- cpumask_copy(desc->affinity, cpumask_of(0));
+ if (data)
+ cpumask_copy(data->affinity, cpumask_of(0));
}
memset(cpu_evtchn, 0, sizeof(cpu_evtchn));
@@ -378,26 +403,24 @@ asmlinkage void __irq_entry evtchn_do_up
set_irq_regs(old_regs);
}
-static int find_unbound_irq(unsigned int node, struct irq_chip *chip)
+static int find_unbound_irq(unsigned int node, struct irq_cfg **pcfg,
+ struct irq_chip *chip)
{
static int warned;
int irq;
for (irq = DYNIRQ_BASE; irq < nr_irqs; irq++) {
- struct irq_desc *desc;
- struct irq_cfg *cfg;
+ struct irq_cfg *cfg = alloc_irq_and_cfg_at(irq, node);
+ struct irq_desc *desc = irq_to_desc(irq);
- desc = irq_to_desc(irq);
- if (!desc)
- desc = irq_to_desc_alloc_node(irq, node);
- else if (desc->chip != &no_irq_chip &&
- desc->chip != &dynirq_chip)
- continue;
- if (!desc)
+ if (unlikely(!cfg))
return -ENOMEM;
+ if (desc->irq_data.chip != &no_irq_chip &&
+ desc->irq_data.chip != chip)
+ continue;
- cfg = desc->chip_data;
- if (cfg && !cfg->bindcount) {
+ if (!cfg->bindcount) {
+ *pcfg = cfg;
desc->status |= IRQ_NOPROBE;
set_irq_chip_and_handler_name(irq, chip,
handle_fasteoi_irq,
@@ -419,20 +442,22 @@ static struct irq_chip dynirq_chip;
static int bind_caller_port_to_irq(unsigned int caller_port)
{
+ struct irq_cfg *cfg;
int irq;
spin_lock(&irq_mapping_update_lock);
if ((irq = evtchn_to_irq[caller_port]) == -1) {
- if ((irq = find_unbound_irq(numa_node_id(), &dynirq_chip)) < 0)
+ if ((irq = find_unbound_irq(numa_node_id(), &cfg,
+ &dynirq_chip)) < 0)
goto out;
evtchn_to_irq[caller_port] = irq;
- irq_cfg(irq)->info = mk_irq_info(IRQT_CALLER_PORT,
- 0, caller_port);
- }
+ cfg->info = mk_irq_info(IRQT_CALLER_PORT, 0, caller_port);
+ } else
+ cfg = irq_cfg(irq);
- irq_cfg(irq)->bindcount++;
+ cfg->bindcount++;
out:
spin_unlock(&irq_mapping_update_lock);
@@ -441,21 +466,22 @@ static int bind_caller_port_to_irq(unsig
static int bind_local_port_to_irq(unsigned int local_port)
{
+ struct irq_cfg *cfg;
int irq;
spin_lock(&irq_mapping_update_lock);
BUG_ON(evtchn_to_irq[local_port] != -1);
- if ((irq = find_unbound_irq(numa_node_id(), &dynirq_chip)) < 0) {
+ if ((irq = find_unbound_irq(numa_node_id(), &cfg, &dynirq_chip)) < 0) {
if (close_evtchn(local_port))
BUG();
goto out;
}
evtchn_to_irq[local_port] = irq;
- irq_cfg(irq)->info = mk_irq_info(IRQT_LOCAL_PORT, 0, local_port);
- irq_cfg(irq)->bindcount++;
+ cfg->info = mk_irq_info(IRQT_LOCAL_PORT, 0, local_port);
+ cfg->bindcount++;
out:
spin_unlock(&irq_mapping_update_lock);
@@ -494,12 +520,13 @@ static int bind_interdomain_evtchn_to_ir
static int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
{
struct evtchn_bind_virq bind_virq;
+ struct irq_cfg *cfg;
int evtchn, irq;
spin_lock(&irq_mapping_update_lock);
if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1) {
- if ((irq = find_unbound_irq(cpu_to_node(cpu),
+ if ((irq = find_unbound_irq(cpu_to_node(cpu), &cfg,
&dynirq_chip)) < 0)
goto out;
@@ -511,14 +538,15 @@ static int bind_virq_to_irq(unsigned int
evtchn = bind_virq.port;
evtchn_to_irq[evtchn] = irq;
- irq_cfg(irq)->info = mk_irq_info(IRQT_VIRQ, virq, evtchn);
+ cfg->info = mk_irq_info(IRQT_VIRQ, virq, evtchn);
per_cpu(virq_to_irq, cpu)[virq] = irq;
bind_evtchn_to_cpu(evtchn, cpu);
- }
+ } else
+ cfg = irq_cfg(irq);
- irq_cfg(irq)->bindcount++;
+ cfg->bindcount++;
out:
spin_unlock(&irq_mapping_update_lock);
@@ -528,12 +556,13 @@ static int bind_virq_to_irq(unsigned int
static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
{
struct evtchn_bind_ipi bind_ipi;
+ struct irq_cfg *cfg;
int evtchn, irq;
spin_lock(&irq_mapping_update_lock);
if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1) {
- if ((irq = find_unbound_irq(cpu_to_node(cpu),
+ if ((irq = find_unbound_irq(cpu_to_node(cpu), &cfg,
&dynirq_chip)) < 0)
goto out;
@@ -544,14 +573,15 @@ static int bind_ipi_to_irq(unsigned int
evtchn = bind_ipi.port;
evtchn_to_irq[evtchn] = irq;
- irq_cfg(irq)->info = mk_irq_info(IRQT_IPI, ipi, evtchn);
+ cfg->info = mk_irq_info(IRQT_IPI, ipi, evtchn);
per_cpu(ipi_to_irq, cpu)[ipi] = irq;
bind_evtchn_to_cpu(evtchn, cpu);
- }
+ } else
+ cfg = irq_cfg(irq);
- irq_cfg(irq)->bindcount++;
+ cfg->bindcount++;
out:
spin_unlock(&irq_mapping_update_lock);
@@ -561,23 +591,24 @@ static int bind_ipi_to_irq(unsigned int
static void unbind_from_irq(unsigned int irq)
{
unsigned int cpu;
- int evtchn = evtchn_from_irq(irq);
+ struct irq_cfg *cfg = irq_cfg(irq);
+ int evtchn = evtchn_from_irq_cfg(cfg);
spin_lock(&irq_mapping_update_lock);
- if (!--irq_cfg(irq)->bindcount && VALID_EVTCHN(evtchn)) {
- if ((type_from_irq(irq) != IRQT_CALLER_PORT) &&
+ if (!--cfg->bindcount && VALID_EVTCHN(evtchn)) {
+ if ((type_from_irq_cfg(cfg) != IRQT_CALLER_PORT) &&
close_evtchn(evtchn))
BUG();
- switch (type_from_irq(irq)) {
+ switch (type_from_irq_cfg(cfg)) {
case IRQT_VIRQ:
per_cpu(virq_to_irq, cpu_from_evtchn(evtchn))
- [index_from_irq(irq)] = -1;
+ [index_from_irq_cfg(cfg)] = -1;
break;
case IRQT_IPI:
per_cpu(ipi_to_irq, cpu_from_evtchn(evtchn))
- [index_from_irq(irq)] = -1;
+ [index_from_irq_cfg(cfg)] = -1;
break;
default:
break;
@@ -587,7 +618,7 @@ static void unbind_from_irq(unsigned int
bind_evtchn_to_cpu(evtchn, 0);
evtchn_to_irq[evtchn] = -1;
- irq_cfg(irq)->info = IRQ_UNBOUND;
+ cfg->info = IRQ_UNBOUND;
/* Zap stats across IRQ changes of use. */
for_each_possible_cpu(cpu)
@@ -740,25 +771,26 @@ void rebind_evtchn_to_cpu(int port, unsi
unmask_evtchn(port);
}
-static void rebind_irq_to_cpu(unsigned int irq, unsigned int tcpu)
+static void rebind_irq_to_cpu(struct irq_data *data, unsigned int tcpu)
{
- int evtchn = evtchn_from_irq(irq);
+ int evtchn = evtchn_from_irq_data(data);
if (VALID_EVTCHN(evtchn))
rebind_evtchn_to_cpu(evtchn, tcpu);
}
-static int set_affinity_irq(unsigned int irq, const struct cpumask *dest)
+static int set_affinity_irq(struct irq_data *data,
+ const struct cpumask *dest, bool force)
{
- rebind_irq_to_cpu(irq, cpumask_first(dest));
+ rebind_irq_to_cpu(data, cpumask_first(dest));
return 0;
}
#endif
-int resend_irq_on_evtchn(unsigned int irq)
+int resend_irq_on_evtchn(struct irq_data *data)
{
- int masked, evtchn = evtchn_from_irq(irq);
+ int masked, evtchn = evtchn_from_irq_data(data);
if (!VALID_EVTCHN(evtchn))
return 1;
@@ -775,52 +807,51 @@ int resend_irq_on_evtchn(unsigned int ir
* Interface to generic handling in irq.c
*/
-static void unmask_dynirq(unsigned int irq)
+static void unmask_dynirq(struct irq_data *data)
{
- int evtchn = evtchn_from_irq(irq);
+ int evtchn = evtchn_from_irq_data(data);
if (VALID_EVTCHN(evtchn))
unmask_evtchn(evtchn);
}
-static void mask_dynirq(unsigned int irq)
+static void mask_dynirq(struct irq_data *data)
{
- int evtchn = evtchn_from_irq(irq);
+ int evtchn = evtchn_from_irq_data(data);
if (VALID_EVTCHN(evtchn))
mask_evtchn(evtchn);
}
-static unsigned int startup_dynirq(unsigned int irq)
+static unsigned int startup_dynirq(struct irq_data *data)
{
- unmask_dynirq(irq);
+ unmask_dynirq(data);
return 0;
}
#define shutdown_dynirq mask_dynirq
-static void end_dynirq(unsigned int irq)
+static void end_dynirq(struct irq_data *data)
{
- if (!(irq_to_desc(irq)->status & IRQ_DISABLED)) {
- move_masked_irq(irq);
- unmask_dynirq(irq);
+ if (!(irq_to_desc(data->irq)->status & IRQ_DISABLED)) {
+ move_masked_irq(data->irq);
+ unmask_dynirq(data);
}
}
static struct irq_chip dynirq_chip = {
- .name = "Dynamic",
- .startup = startup_dynirq,
- .shutdown = shutdown_dynirq,
- .enable = unmask_dynirq,
- .disable = mask_dynirq,
- .mask = mask_dynirq,
- .unmask = unmask_dynirq,
- .end = end_dynirq,
- .eoi = end_dynirq,
+ .name = "Dynamic",
+ .irq_startup = startup_dynirq,
+ .irq_shutdown = shutdown_dynirq,
+ .irq_enable = unmask_dynirq,
+ .irq_disable = mask_dynirq,
+ .irq_mask = mask_dynirq,
+ .irq_unmask = unmask_dynirq,
+ .irq_eoi = end_dynirq,
#ifdef CONFIG_SMP
- .set_affinity = set_affinity_irq,
+ .irq_set_affinity = set_affinity_irq,
#endif
- .retrigger = resend_irq_on_evtchn,
+ .irq_retrigger = resend_irq_on_evtchn,
};
/* Bitmap indicating which PIRQs require Xen to be notified on unmask. */
@@ -873,18 +904,20 @@ static inline void pirq_query_unmask(int
set_bit(irq - PIRQ_BASE, pirq_needs_eoi);
}
-static int set_type_pirq(unsigned int irq, unsigned int type)
+static int set_type_pirq(struct irq_data *data, unsigned int type)
{
if (type != IRQ_TYPE_PROBE)
return -EINVAL;
- set_bit(irq - PIRQ_BASE, probing_pirq);
+ set_bit(data->irq - PIRQ_BASE, probing_pirq);
return 0;
}
-static void enable_pirq(unsigned int irq)
+static void enable_pirq(struct irq_data *data)
{
struct evtchn_bind_pirq bind_pirq;
- int evtchn = evtchn_from_irq(irq);
+ unsigned int irq = data->irq;
+ struct irq_cfg *cfg = irq_data_cfg(data);
+ int evtchn = evtchn_from_irq_cfg(cfg);
unsigned int pirq = irq - PIRQ_BASE;
if (VALID_EVTCHN(evtchn)) {
@@ -910,7 +943,7 @@ static void enable_pirq(unsigned int irq
evtchn_to_irq[evtchn] = irq;
bind_evtchn_to_cpu(evtchn, 0);
- irq_cfg(irq)->info = mk_irq_info(IRQT_PIRQ, bind_pirq.pirq, evtchn);
+ cfg->info = mk_irq_info(IRQT_PIRQ, bind_pirq.pirq, evtchn);
out:
pirq_unmask_and_notify(evtchn, irq);
@@ -918,15 +951,16 @@ static void enable_pirq(unsigned int irq
#define disable_pirq mask_pirq
-static unsigned int startup_pirq(unsigned int irq)
+static unsigned int startup_pirq(struct irq_data *data)
{
- enable_pirq(irq);
+ enable_pirq(data);
return 0;
}
-static void shutdown_pirq(unsigned int irq)
+static void shutdown_pirq(struct irq_data *data)
{
- int evtchn = evtchn_from_irq(irq);
+ struct irq_cfg *cfg = irq_data_cfg(data);
+ int evtchn = evtchn_from_irq_cfg(cfg);
if (!VALID_EVTCHN(evtchn))
return;
@@ -938,48 +972,47 @@ static void shutdown_pirq(unsigned int i
bind_evtchn_to_cpu(evtchn, 0);
evtchn_to_irq[evtchn] = -1;
- irq_cfg(irq)->info = mk_irq_info(IRQT_PIRQ, index_from_irq(irq), 0);
+ cfg->info = mk_irq_info(IRQT_PIRQ, index_from_irq_cfg(cfg), 0);
}
-static void unmask_pirq(unsigned int irq)
+static void unmask_pirq(struct irq_data *data)
{
- int evtchn = evtchn_from_irq(irq);
+ int evtchn = evtchn_from_irq_data(data);
if (VALID_EVTCHN(evtchn))
- pirq_unmask_and_notify(evtchn, irq);
+ pirq_unmask_and_notify(evtchn, data->irq);
}
#define mask_pirq mask_dynirq
-static void end_pirq(unsigned int irq)
+static void end_pirq(struct irq_data *data)
{
- const struct irq_desc *desc = irq_to_desc(irq);
+ const struct irq_desc *desc = irq_to_desc(data->irq);
if ((desc->status & (IRQ_DISABLED|IRQ_PENDING)) ==
(IRQ_DISABLED|IRQ_PENDING))
- shutdown_pirq(irq);
+ shutdown_pirq(data);
else {
if (!(desc->status & IRQ_DISABLED))
- move_masked_irq(irq);
- unmask_pirq(irq);
+ move_masked_irq(data->irq);
+ unmask_pirq(data);
}
}
static struct irq_chip pirq_chip = {
- .name = "Phys",
- .startup = startup_pirq,
- .shutdown = shutdown_pirq,
- .enable = enable_pirq,
- .disable = disable_pirq,
- .mask = mask_pirq,
- .unmask = unmask_pirq,
- .end = end_pirq,
- .eoi = end_pirq,
- .set_type = set_type_pirq,
+ .name = "Phys",
+ .irq_startup = startup_pirq,
+ .irq_shutdown = shutdown_pirq,
+ .irq_enable = enable_pirq,
+ .irq_disable = disable_pirq,
+ .irq_mask = mask_pirq,
+ .irq_unmask = unmask_pirq,
+ .irq_eoi = end_pirq,
+ .irq_set_type = set_type_pirq,
#ifdef CONFIG_SMP
- .set_affinity = set_affinity_irq,
+ .irq_set_affinity = set_affinity_irq,
#endif
- .retrigger = resend_irq_on_evtchn,
+ .irq_retrigger = resend_irq_on_evtchn,
};
int irq_ignore_unhandled(unsigned int irq)
@@ -1169,28 +1202,39 @@ int __init arch_early_irq_init(void)
unsigned int i;
for (i = 0; i < ARRAY_SIZE(_irq_cfg); i++)
- irq_to_desc(i)->chip_data = _irq_cfg + i;
+ set_irq_chip_data(i, _irq_cfg + i);
return 0;
}
-#ifdef CONFIG_SPARSE_IRQ
-int arch_init_chip_data(struct irq_desc *desc, int cpu)
+struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node)
{
- if (!desc->chip_data) {
- /* By default all event channels notify CPU#0. */
- cpumask_copy(desc->affinity, cpumask_of(0));
+ int res = irq_alloc_desc_at(at, node);
+ struct irq_cfg *cfg = NULL;
- desc->chip_data = kzalloc(sizeof(struct irq_cfg), GFP_ATOMIC);
- }
- if (!desc->chip_data) {
- pr_emerg("cannot alloc irq_cfg\n");
- BUG();
+ if (res < 0) {
+ if (res != -EEXIST)
+ return NULL;
+ cfg = get_irq_chip_data(at);
+ if (cfg)
+ return cfg;
}
- return 0;
-}
+#ifdef CONFIG_SPARSE_IRQ
+ /* By default all event channels notify CPU#0. */
+ cpumask_copy(irq_get_irq_data(at)->affinity, cpumask_of(0));
+
+ cfg = kzalloc(sizeof(*cfg), GFP_KERNEL);
+ if (cfg)
+ set_irq_chip_data(at, cfg);
+ else
+ irq_free_desc(at);
+
+ return cfg;
+#else
+ return irq_cfg(at);
#endif
+}
#ifdef CONFIG_SPARSE_IRQ
int nr_pirqs = NR_PIRQS;
@@ -1223,7 +1267,7 @@ int __init arch_probe_nr_irqs(void)
printk(KERN_DEBUG "nr_pirqs: %d\n", nr_pirqs);
- return 0;
+ return ARRAY_SIZE(_irq_cfg);
}
#endif
@@ -1255,10 +1299,12 @@ int assign_irq_vector(int irq, struct ir
void evtchn_register_pirq(int irq)
{
+ struct irq_cfg *cfg = irq_cfg(irq);
+
BUG_ON(irq < PIRQ_BASE || irq - PIRQ_BASE >= nr_pirqs);
- if (identity_mapped_irq(irq) || type_from_irq(irq) != IRQT_UNBOUND)
+ if (identity_mapped_irq(irq) || type_from_irq_cfg(cfg) != IRQT_UNBOUND)
return;
- irq_cfg(irq)->info = mk_irq_info(IRQT_PIRQ, irq, 0);
+ cfg->info = mk_irq_info(IRQT_PIRQ, irq, 0);
set_irq_chip_and_handler_name(irq, &pirq_chip, handle_fasteoi_irq,
"fasteoi");
}
@@ -1267,15 +1313,12 @@ int evtchn_map_pirq(int irq, int xen_pir
{
if (irq < 0) {
#ifdef CONFIG_SPARSE_IRQ
+ struct irq_cfg *cfg;
+
spin_lock(&irq_mapping_update_lock);
- irq = find_unbound_irq(numa_node_id(), &pirq_chip);
+ irq = find_unbound_irq(numa_node_id(), &cfg, &pirq_chip);
if (irq >= 0) {
- struct irq_desc *desc;
- struct irq_cfg *cfg;
-
- desc = irq_to_desc_alloc_node(irq, numa_node_id());
- cfg = desc->chip_data;
- BUG_ON(type_from_irq(irq) != IRQT_UNBOUND);
+ BUG_ON(type_from_irq_cfg(cfg) != IRQT_UNBOUND);
cfg->bindcount++;
cfg->info = mk_irq_info(IRQT_PIRQ, xen_pirq, 0);
}
@@ -1291,15 +1334,17 @@ int evtchn_map_pirq(int irq, int xen_pir
irq = PIRQ_BASE + nr_pirqs - 1;
spin_lock(&irq_alloc_lock);
do {
- struct irq_desc *desc;
struct irq_cfg *cfg;
if (identity_mapped_irq(irq))
continue;
- desc = irq_to_desc_alloc_node(irq, numa_node_id());
- cfg = desc->chip_data;
- if (!index_from_irq(irq)) {
- BUG_ON(type_from_irq(irq) != IRQT_UNBOUND);
+ cfg = alloc_irq_and_cfg_at(irq, numa_node_id());
+ if (unlikely(!cfg)) {
+ spin_unlock(&irq_alloc_lock);
+ return -ENOMEM;
+ }
+ if (!index_from_irq_cfg(cfg)) {
+ BUG_ON(type_from_irq_cfg(cfg) != IRQT_UNBOUND);
cfg->info = mk_irq_info(IRQT_PIRQ,
xen_pirq, 0);
break;
@@ -1312,7 +1357,9 @@ int evtchn_map_pirq(int irq, int xen_pir
handle_fasteoi_irq, "fasteoi");
#endif
} else if (!xen_pirq) {
- if (unlikely(type_from_irq(irq) != IRQT_PIRQ))
+ struct irq_cfg *cfg = irq_cfg(irq);
+
+ if (!cfg || unlikely(type_from_irq_cfg(cfg) != IRQT_PIRQ))
return -EINVAL;
/*
* dynamic_irq_cleanup(irq) would seem to be the correct thing
@@ -1321,9 +1368,9 @@ int evtchn_map_pirq(int irq, int xen_pir
* then causes a warning in dynamic_irq_cleanup().
*/
set_irq_chip_and_handler(irq, NULL, NULL);
- irq_cfg(irq)->info = IRQ_UNBOUND;
+ cfg->info = IRQ_UNBOUND;
#ifdef CONFIG_SPARSE_IRQ
- irq_cfg(irq)->bindcount--;
+ cfg->bindcount--;
#endif
return 0;
} else if (type_from_irq(irq) != IRQT_PIRQ
@@ -1338,10 +1385,12 @@ int evtchn_map_pirq(int irq, int xen_pir
int evtchn_get_xen_pirq(int irq)
{
+ struct irq_cfg *cfg = irq_cfg(irq);
+
if (identity_mapped_irq(irq))
return irq;
- BUG_ON(type_from_irq(irq) != IRQT_PIRQ);
- return index_from_irq(irq);
+ BUG_ON(type_from_irq_cfg(cfg) != IRQT_PIRQ);
+ return index_from_irq_cfg(cfg);
}
void __init xen_init_IRQ(void)
--- head-2011-03-17.orig/drivers/xen/core/smpboot.c 2011-02-01 14:50:44.000000000 +0100
+++ head-2011-03-17/drivers/xen/core/smpboot.c 2011-03-03 16:11:42.000000000 +0100
@@ -11,7 +11,6 @@
#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/kernel_stat.h>
-#include <linux/smp_lock.h>
#include <linux/irq.h>
#include <linux/bootmem.h>
#include <linux/notifier.h>
@@ -34,7 +33,7 @@ extern void smp_trap_init(trap_info_t *)
cpumask_var_t vcpu_initialized_mask;
-DEFINE_PER_CPU(struct cpuinfo_x86, cpu_info);
+DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
EXPORT_PER_CPU_SYMBOL(cpu_info);
static DEFINE_PER_CPU(int, resched_irq);
@@ -46,6 +45,11 @@ static char callfunc_name[NR_CPUS][15];
static char call1func_name[NR_CPUS][15];
static char reboot_name[NR_CPUS][15];
+#ifdef CONFIG_IRQ_WORK
+static DEFINE_PER_CPU(int, irq_work_irq);
+static char irq_work_name[NR_CPUS][15];
+#endif
+
void __init prefill_possible_map(void)
{
int i, rc;
@@ -76,6 +80,9 @@ static int __cpuinit xen_smp_intr_init(u
int rc;
per_cpu(resched_irq, cpu) = per_cpu(callfunc_irq, cpu) =
+#ifdef CONFIG_IRQ_WORK
+ per_cpu(irq_work_irq, cpu) =
+#endif
per_cpu(call1func_irq, cpu) = per_cpu(reboot_irq, cpu) = -1;
sprintf(resched_name[cpu], "resched%u", cpu);
@@ -122,6 +129,19 @@ static int __cpuinit xen_smp_intr_init(u
goto fail;
per_cpu(reboot_irq, cpu) = rc;
+#ifdef CONFIG_IRQ_WORK
+ sprintf(irq_work_name[cpu], "irqwork%u", cpu);
+ rc = bind_ipi_to_irqhandler(IRQ_WORK_VECTOR,
+ cpu,
+ smp_irq_work_interrupt,
+ IRQF_DISABLED|IRQF_NOBALANCING,
+ irq_work_name[cpu],
+ NULL);
+ if (rc < 0)
+ goto fail;
+ per_cpu(irq_work_irq, cpu) = rc;
+#endif
+
rc = xen_spinlock_init(cpu);
if (rc < 0)
goto fail;
@@ -140,6 +160,10 @@ static int __cpuinit xen_smp_intr_init(u
unbind_from_irqhandler(per_cpu(call1func_irq, cpu), NULL);
if (per_cpu(reboot_irq, cpu) >= 0)
unbind_from_irqhandler(per_cpu(reboot_irq, cpu), NULL);
+#ifdef CONFIG_IRQ_WORK
+ if (per_cpu(irq_work_irq, cpu) >= 0)
+ unbind_from_irqhandler(per_cpu(irq_work_irq, cpu), NULL);
+#endif
xen_spinlock_cleanup(cpu);
return rc;
}
@@ -154,6 +178,9 @@ static void __cpuinit xen_smp_intr_exit(
unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
unbind_from_irqhandler(per_cpu(call1func_irq, cpu), NULL);
unbind_from_irqhandler(per_cpu(reboot_irq, cpu), NULL);
+#ifdef CONFIG_IRQ_WORK
+ unbind_from_irqhandler(per_cpu(irq_work_irq, cpu), NULL);
+#endif
xen_spinlock_cleanup(cpu);
}
#endif
--- head-2011-03-17.orig/drivers/xen/core/spinlock.c 2011-03-15 16:33:52.000000000 +0100
+++ head-2011-03-17/drivers/xen/core/spinlock.c 2011-03-03 16:11:45.000000000 +0100
@@ -21,7 +21,7 @@ struct spinning {
struct spinning *prev;
};
static DEFINE_PER_CPU(struct spinning *, _spinning);
-static DEFINE_PER_CPU(evtchn_port_t, poll_evtchn);
+static DEFINE_PER_CPU_READ_MOSTLY(evtchn_port_t, poll_evtchn);
/*
* Protect removal of objects: Addition can be done lockless, and even
* removal itself doesn't need protection - what needs to be prevented is
@@ -153,7 +153,7 @@ bool xen_spin_wait(arch_spinlock_t *lock
*/
arch_spinlock_t *lock = other->lock;
- raw_local_irq_disable();
+ arch_local_irq_disable();
while (lock->cur == other->ticket) {
unsigned int token;
bool kick, free;
@@ -175,7 +175,7 @@ bool xen_spin_wait(arch_spinlock_t *lock
}
/*
- * No need to use raw_local_irq_restore() here, as the
+ * No need to use arch_local_irq_restore() here, as the
* intended event processing will happen with the poll
* call.
*/
@@ -200,7 +200,7 @@ bool xen_spin_wait(arch_spinlock_t *lock
/* announce we're done */
other = spinning.prev;
percpu_write(_spinning, other);
- raw_local_irq_disable();
+ arch_local_irq_disable();
rm_idx = percpu_read(rm_seq.idx);
smp_wmb();
percpu_write(rm_seq.idx, rm_idx + 1);
@@ -229,7 +229,7 @@ bool xen_spin_wait(arch_spinlock_t *lock
rm_idx &= 1;
while (percpu_read(rm_seq.ctr[rm_idx].counter))
cpu_relax();
- raw_local_irq_restore(upcall_mask);
+ arch_local_irq_restore(upcall_mask);
*ptok = lock->cur | (spinning.ticket << TICKET_SHIFT);
return rc;
@@ -256,7 +256,7 @@ void xen_spin_kick(arch_spinlock_t *lock
return;
}
- flags = __raw_local_irq_save();
+ flags = arch_local_irq_save();
for (;;) {
unsigned int rm_idx = per_cpu(rm_seq.idx, cpu);
@@ -281,7 +281,7 @@ void xen_spin_kick(arch_spinlock_t *lock
}
atomic_dec(rm_ctr);
- raw_local_irq_restore(flags);
+ arch_local_irq_restore(flags);
if (unlikely(spinning)) {
notify_remote_via_evtchn(per_cpu(poll_evtchn, cpu));
--- head-2011-03-17.orig/drivers/xen/evtchn.c 2011-02-01 15:03:10.000000000 +0100
+++ head-2011-03-17/drivers/xen/evtchn.c 2011-02-01 15:09:47.000000000 +0100
@@ -528,7 +528,11 @@ static const struct file_operations evtc
static struct miscdevice evtchn_miscdev = {
.minor = MISC_DYNAMIC_MINOR,
+#ifdef CONFIG_PARAVIRT_XEN
.name = "xen/evtchn",
+#else
+ .name = "evtchn",
+#endif
.nodename = "xen/evtchn",
.fops = &evtchn_fops,
};
--- head-2011-03-17.orig/drivers/xen/gntdev/gntdev.c 2011-02-01 15:03:10.000000000 +0100
+++ head-2011-03-17/drivers/xen/gntdev/gntdev.c 2011-02-01 15:09:47.000000000 +0100
@@ -145,6 +145,7 @@ static long gntdev_ioctl(struct file *fl
static const struct file_operations gntdev_fops = {
.owner = THIS_MODULE,
.open = gntdev_open,
+ .llseek = no_llseek,
.release = gntdev_release,
.mmap = gntdev_mmap,
.unlocked_ioctl = gntdev_ioctl
@@ -430,6 +431,8 @@ static int gntdev_open(struct inode *ino
{
gntdev_file_private_data_t *private_data;
+ nonseekable_open(inode, flip);
+
try_module_get(THIS_MODULE);
/* Allocate space for the per-instance private data. */
--- head-2011-03-17.orig/drivers/xen/privcmd/privcmd.c 2011-01-31 18:07:35.000000000 +0100
+++ head-2011-03-17/drivers/xen/privcmd/privcmd.c 2011-02-01 15:09:47.000000000 +0100
@@ -14,7 +14,6 @@
#include <linux/mm.h>
#include <linux/mman.h>
#include <linux/swap.h>
-#include <linux/smp_lock.h>
#include <linux/highmem.h>
#include <linux/pagemap.h>
#include <linux/seq_file.h>
@@ -416,7 +415,8 @@ static int privcmd_mmap(struct file * fi
if (xen_feature(XENFEAT_auto_translated_physmap))
return -ENOSYS;
- /* DONTCOPY is essential for Xen as copy_page_range is broken. */
+ /* DONTCOPY is essential for Xen because copy_page_range doesn't know
+ * how to recreate these mappings */
vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTCOPY;
vma->vm_ops = &privcmd_vm_ops;
vma->vm_private_data = NULL;
@@ -426,6 +426,8 @@ static int privcmd_mmap(struct file * fi
#endif
static const struct file_operations privcmd_file_ops = {
+ .open = nonseekable_open,
+ .llseek = no_llseek,
.unlocked_ioctl = privcmd_ioctl,
.mmap = privcmd_mmap,
};
--- head-2011-03-17.orig/drivers/xen/scsifront/scsifront.c 2011-02-08 10:04:41.000000000 +0100
+++ head-2011-03-17/drivers/xen/scsifront/scsifront.c 2011-02-08 10:08:14.000000000 +0100
@@ -315,11 +315,12 @@ big_to_sg:
return ref_cnt;
}
-static int scsifront_queuecommand(struct scsi_cmnd *sc,
- void (*done)(struct scsi_cmnd *))
+static int scsifront_queuecommand(struct Scsi_Host *shost,
+ struct scsi_cmnd *sc)
{
- struct vscsifrnt_info *info = shost_priv(sc->device->host);
+ struct vscsifrnt_info *info = shost_priv(shost);
vscsiif_request_t *ring_req;
+ unsigned long flags;
int ref_cnt;
uint16_t rqid;
@@ -328,11 +329,12 @@ static int scsifront_queuecommand(struct
sc->cmnd[0],sc->cmnd[1],sc->cmnd[2],sc->cmnd[3],sc->cmnd[4],
sc->cmnd[5],sc->cmnd[6],sc->cmnd[7],sc->cmnd[8],sc->cmnd[9]);
*/
+ spin_lock_irqsave(shost->host_lock, flags);
if (RING_FULL(&info->ring)) {
- goto out_host_busy;
+ spin_unlock_irqrestore(shost->host_lock, flags);
+ return SCSI_MLQUEUE_HOST_BUSY;
}
- sc->scsi_done = done;
sc->result = 0;
ring_req = scsifront_pre_request(info);
@@ -361,27 +363,21 @@ static int scsifront_queuecommand(struct
ref_cnt = map_data_for_request(info, sc, ring_req, rqid);
if (ref_cnt < 0) {
add_id_to_freelist(info, rqid);
+ spin_unlock_irqrestore(shost->host_lock, flags);
if (ref_cnt == (-ENOMEM))
- goto out_host_busy;
- else {
- sc->result = (DID_ERROR << 16);
- goto out_fail_command;
- }
+ return SCSI_MLQUEUE_HOST_BUSY;
+ sc->result = (DID_ERROR << 16);
+ sc->scsi_done(sc);
+ return 0;
}
ring_req->nr_segments = (uint8_t)ref_cnt;
info->shadow[rqid].nr_segments = ref_cnt;
scsifront_do_request(info);
+ spin_unlock_irqrestore(shost->host_lock, flags);
return 0;
-
-out_host_busy:
- return SCSI_MLQUEUE_HOST_BUSY;
-
-out_fail_command:
- done(sc);
- return 0;
}
--- head-2011-03-17.orig/drivers/xen/sfc_netback/ci/tools/platform/linux_kernel.h 2008-02-20 09:32:49.000000000 +0100
+++ head-2011-03-17/drivers/xen/sfc_netback/ci/tools/platform/linux_kernel.h 2011-02-01 15:09:47.000000000 +0100
@@ -54,7 +54,6 @@
#include <linux/in6.h>
#include <linux/spinlock.h>
#include <linux/highmem.h>
-#include <linux/smp_lock.h>
#include <linux/ctype.h>
#include <linux/uio.h>
#include <asm/current.h>
--- head-2011-03-17.orig/drivers/xen/xenbus/xenbus_dev.c 2011-02-01 15:03:03.000000000 +0100
+++ head-2011-03-17/drivers/xen/xenbus/xenbus_dev.c 2011-02-01 15:09:47.000000000 +0100
@@ -454,6 +454,7 @@ static const struct file_operations xenb
.write = xenbus_dev_write,
.open = xenbus_dev_open,
.release = xenbus_dev_release,
+ .llseek = no_llseek,
.poll = xenbus_dev_poll,
#ifdef HAVE_UNLOCKED_IOCTL
.unlocked_ioctl = xenbus_dev_ioctl
--- head-2011-03-17.orig/drivers/xen/xenbus/xenbus_probe.c 2011-02-01 15:04:27.000000000 +0100
+++ head-2011-03-17/drivers/xen/xenbus/xenbus_probe.c 2011-02-01 15:09:47.000000000 +0100
@@ -82,10 +82,13 @@
int xen_store_evtchn;
#if !defined(CONFIG_XEN) && !defined(MODULE)
-EXPORT_SYMBOL(xen_store_evtchn);
+EXPORT_SYMBOL_GPL(xen_store_evtchn);
#endif
struct xenstore_domain_interface *xen_store_interface;
+#if !defined(CONFIG_XEN) && !defined(MODULE)
+EXPORT_SYMBOL_GPL(xen_store_interface);
+#endif
static unsigned long xen_store_mfn;
@@ -1102,9 +1105,7 @@ int __devinit xenbus_init(void)
#endif
{
int err = 0;
-#if defined(CONFIG_XEN) || defined(MODULE)
unsigned long page = 0;
-#endif
DPRINTK("");
@@ -1122,7 +1123,6 @@ int __devinit xenbus_init(void)
* Domain0 doesn't have a store_evtchn or store_mfn yet.
*/
if (is_initial_xendomain()) {
-#if defined(CONFIG_XEN) || defined(MODULE)
struct evtchn_alloc_unbound alloc_unbound;
/* Allocate Xenstore page */
@@ -1161,9 +1161,6 @@ int __devinit xenbus_init(void)
if (xsd_port_intf)
xsd_port_intf->read_proc = xsd_port_read;
#endif
-#else
- /* dom0 not yet supported */
-#endif
xen_store_interface = mfn_to_virt(xen_store_mfn);
} else {
#if !defined(CONFIG_XEN) && !defined(MODULE)
@@ -1249,10 +1246,8 @@ int __devinit xenbus_init(void)
* registered.
*/
-#if defined(CONFIG_XEN) || defined(MODULE)
if (page != 0)
free_page(page);
-#endif
return err;
}
--- head-2011-03-17.orig/include/xen/Kbuild 2011-02-01 14:50:44.000000000 +0100
+++ head-2011-03-17/include/xen/Kbuild 2011-02-01 15:09:47.000000000 +0100
@@ -1,2 +1 @@
-header-y += privcmd.h
header-y += public/
--- head-2011-03-17.orig/include/xen/evtchn.h 2011-02-01 14:55:46.000000000 +0100
+++ head-2011-03-17/include/xen/evtchn.h 2011-02-01 15:09:47.000000000 +0100
@@ -58,6 +58,7 @@ struct irq_cfg {
#endif
};
};
+struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node);
#endif
/*
--- head-2011-03-17.orig/include/xen/interface/memory.h 2011-02-01 15:04:27.000000000 +0100
+++ head-2011-03-17/include/xen/interface/memory.h 2011-02-01 15:09:47.000000000 +0100
@@ -198,6 +198,7 @@ struct xen_machphys_mapping {
xen_ulong_t v_start, v_end; /* Start and end virtual addresses. */
xen_ulong_t max_mfn; /* Maximum MFN that can be looked up. */
};
+DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mapping);
typedef struct xen_machphys_mapping xen_machphys_mapping_t;
DEFINE_XEN_GUEST_HANDLE(xen_machphys_mapping_t);
@@ -252,6 +253,7 @@ struct xen_memory_map {
*/
XEN_GUEST_HANDLE(void) buffer;
};
+DEFINE_GUEST_HANDLE_STRUCT(xen_memory_map);
typedef struct xen_memory_map xen_memory_map_t;
DEFINE_XEN_GUEST_HANDLE(xen_memory_map_t);
--- head-2011-03-17.orig/include/xen/privcmd.h 2011-03-17 14:35:43.000000000 +0100
+++ head-2011-03-17/include/xen/privcmd.h 2011-02-01 15:09:47.000000000 +0100
@@ -1,77 +1,3 @@
-/******************************************************************************
- * privcmd.h
- *
- * Interface to /proc/xen/privcmd.
- *
- * Copyright (c) 2003-2005, K A Fraser
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation; or, when distributed
- * separately from the Linux kernel or incorporated into other
- * software packages, subject to the following license:
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this source file (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use, copy, modify,
- * merge, publish, distribute, sublicense, and/or sell copies of the Software,
- * and to permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-#ifndef __LINUX_PUBLIC_PRIVCMD_H__
-#define __LINUX_PUBLIC_PRIVCMD_H__
-
-#include <linux/types.h>
-#include <linux/compiler.h>
-
-typedef unsigned long xen_pfn_t;
-
-struct privcmd_hypercall {
- __u64 op;
- __u64 arg[5];
-};
-
-struct privcmd_mmap_entry {
- __u64 va;
- __u64 mfn;
- __u64 npages;
-};
-
-struct privcmd_mmap {
- int num;
- domid_t dom; /* target domain */
- struct privcmd_mmap_entry __user *entry;
-};
-
-struct privcmd_mmapbatch {
- int num; /* number of pages to populate */
- domid_t dom; /* target domain */
- __u64 addr; /* virtual address */
- xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */
-};
-
-/*
- * @cmd: IOCTL_PRIVCMD_HYPERCALL
- * @arg: &privcmd_hypercall_t
- * Return: Value returned from execution of the specified hypercall.
- */
-#define IOCTL_PRIVCMD_HYPERCALL \
- _IOC(_IOC_NONE, 'P', 0, sizeof(struct privcmd_hypercall))
-#define IOCTL_PRIVCMD_MMAP \
- _IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap))
-#define IOCTL_PRIVCMD_MMAPBATCH \
- _IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch))
-
-#endif /* __LINUX_PUBLIC_PRIVCMD_H__ */
+#if defined(CONFIG_PARAVIRT_XEN) || !defined(__KERNEL__)
+#include "public/privcmd.h"
+#endif
--- head-2011-03-17.orig/include/xen/public/privcmd.h 2011-02-01 14:38:38.000000000 +0100
+++ head-2011-03-17/include/xen/public/privcmd.h 2011-02-01 15:09:47.000000000 +0100
@@ -34,6 +34,7 @@
#define __LINUX_PUBLIC_PRIVCMD_H__
#include <linux/types.h>
+#include <linux/compiler.h>
typedef struct privcmd_hypercall
{
--- head-2011-03-17.orig/kernel/power/Kconfig 2011-03-17 14:35:43.000000000 +0100
+++ head-2011-03-17/kernel/power/Kconfig 2011-02-01 15:09:47.000000000 +0100
@@ -65,7 +65,7 @@ config PM_TRACE
config PM_TRACE_RTC
bool "Suspend/resume event tracing"
depends on CAN_PM_TRACE
- depends on X86
+ depends on X86 && !XEN_UNPRIVILEGED_GUEST
select PM_TRACE
default n
---help---
@@ -111,7 +111,7 @@ config SUSPEND
config PM_TEST_SUSPEND
bool "Test suspend/resume and wakealarm during bootup"
- depends on SUSPEND && PM_DEBUG && RTC_CLASS=y
+ depends on SUSPEND && PM_DEBUG && RTC_CLASS=y && !XEN_UNPRIVILEGED_GUEST
---help---
This option will let you suspend your machine during bootup, and
make it wake up a few seconds later using an RTC wakeup alarm.
--- head-2011-03-17.orig/lib/swiotlb-xen.c 2011-02-01 15:04:27.000000000 +0100
+++ head-2011-03-17/lib/swiotlb-xen.c 2011-02-01 15:09:47.000000000 +0100
@@ -58,7 +58,7 @@ static unsigned long io_tlb_nslabs;
*/
static unsigned long io_tlb_overflow = 32*1024;
-void *io_tlb_overflow_buffer;
+static void *io_tlb_overflow_buffer;
/*
* This is a free list describing the number of free entries available from
@@ -174,16 +174,16 @@ void __init swiotlb_init_with_tbl(char *
* Allocate and initialize the free list array. This array is used
* to find contiguous free memory regions of size up to IO_TLB_SEGSIZE.
*/
- io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int));
+ io_tlb_list = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_nslabs * sizeof(int)));
for (i = 0; i < io_tlb_nslabs; i++)
io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
io_tlb_index = 0;
- io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(phys_addr_t));
+ io_tlb_orig_addr = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
/*
* Get the overflow emergency buffer
*/
- io_tlb_overflow_buffer = alloc_bootmem(io_tlb_overflow);
+ io_tlb_overflow_buffer = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_overflow));
if (!io_tlb_overflow_buffer)
panic("Cannot allocate SWIOTLB overflow buffer!\n");
@@ -218,7 +218,7 @@ swiotlb_init_with_default_size(size_t de
/*
* Get IO TLB memory from the low pages
*/
- io_tlb_start = alloc_bootmem_pages(bytes);
+ io_tlb_start = alloc_bootmem_pages(PAGE_ALIGN(bytes));
if (!io_tlb_start)
panic("Cannot allocate SWIOTLB buffer");
--- head-2011-03-17.orig/mm/vmalloc.c 2011-02-01 14:39:24.000000000 +0100
+++ head-2011-03-17/mm/vmalloc.c 2011-02-01 15:09:47.000000000 +0100
@@ -478,8 +478,6 @@ static void vmap_debug_free_range(unsign
#ifdef CONFIG_DEBUG_PAGEALLOC
vunmap_page_range(start, end);
flush_tlb_kernel_range(start, end);
-#elif defined(CONFIG_XEN) && defined(CONFIG_X86)
- vunmap_page_range(start, end);
#endif
}