11363 lines
320 KiB
Plaintext
11363 lines
320 KiB
Plaintext
From: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>
|
|
Subject: Linux: 2.6.29
|
|
Patch-mainline: 2.6.29
|
|
|
|
This patch contains the differences between 2.6.28 and 2.6.29.
|
|
|
|
Acked-by: Jeff Mahoney <jeffm@suse.com>
|
|
Automatically created from "patches.kernel.org/patch-2.6.29" by xen-port-patches.py
|
|
|
|
--- head-2011-03-17.orig/arch/x86/Kconfig 2011-02-01 14:39:24.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/Kconfig 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -311,7 +311,6 @@ config X86_XEN
|
|
select X86_PAE
|
|
select X86_UP_APIC if !SMP && XEN_PRIVILEGED_GUEST
|
|
select X86_UP_IOAPIC if !SMP && XEN_PRIVILEGED_GUEST
|
|
- select SWIOTLB
|
|
help
|
|
Choose this option if you plan to run this kernel on top of the
|
|
Xen Hypervisor.
|
|
@@ -349,7 +348,6 @@ config X86_64_XEN
|
|
bool "Enable Xen compatible kernel"
|
|
depends on X86_64
|
|
select XEN
|
|
- select SWIOTLB
|
|
help
|
|
This option will compile a kernel compatible with Xen hypervisor
|
|
|
|
@@ -747,7 +745,7 @@ config AMD_IOMMU_STATS
|
|
|
|
# need this always selected by IOMMU for the VIA workaround
|
|
config SWIOTLB
|
|
- def_bool y if X86_64
|
|
+ def_bool y if X86_64 || XEN
|
|
---help---
|
|
Support for software bounce buffers used on x86-64 systems
|
|
which don't have a hardware IOMMU (e.g. the current generation
|
|
@@ -862,7 +860,7 @@ config X86_XEN_GENAPIC
|
|
|
|
config X86_REROUTE_FOR_BROKEN_BOOT_IRQS
|
|
bool "Reroute for broken boot IRQs"
|
|
- depends on X86_IO_APIC
|
|
+ depends on X86_IO_APIC && !XEN
|
|
---help---
|
|
This option enables a workaround that fixes a source of
|
|
spurious interrupts. This is recommended when threaded
|
|
--- head-2011-03-17.orig/arch/x86/Makefile 2011-02-01 14:39:24.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/Makefile 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -158,8 +158,8 @@ BOOT_TARGETS = bzlilo bzdisk fdimage fdi
|
|
PHONY += bzImage vmlinuz $(BOOT_TARGETS)
|
|
|
|
ifdef CONFIG_XEN
|
|
-KBUILD_CPPFLAGS := -D__XEN_INTERFACE_VERSION__=$(CONFIG_XEN_INTERFACE_VERSION) \
|
|
- -I$(srctree)/arch/x86/include/mach-xen $(KBUILD_CPPFLAGS)
|
|
+LINUXINCLUDE := -D__XEN_INTERFACE_VERSION__=$(CONFIG_XEN_INTERFACE_VERSION) \
|
|
+ -I$(srctree)/arch/x86/include/mach-xen $(LINUXINCLUDE)
|
|
|
|
ifdef CONFIG_X86_64
|
|
LDFLAGS_vmlinux := -e startup_64
|
|
--- head-2011-03-17.orig/arch/x86/ia32/ia32entry-xen.S 2011-02-01 14:39:24.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/ia32/ia32entry-xen.S 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -363,9 +363,9 @@ ENTRY(ia32_syscall)
|
|
orl $TS_COMPAT,TI_status(%r10)
|
|
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
|
|
jnz ia32_tracesys
|
|
-ia32_do_syscall:
|
|
cmpl $(IA32_NR_syscalls-1),%eax
|
|
- ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */
|
|
+ ja ia32_badsys
|
|
+ia32_do_call:
|
|
IA32_ARG_FIXUP
|
|
call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
|
|
ia32_sysret:
|
|
@@ -380,7 +380,9 @@ ia32_tracesys:
|
|
call syscall_trace_enter
|
|
LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
|
|
RESTORE_REST
|
|
- jmp ia32_do_syscall
|
|
+ cmpl $(IA32_NR_syscalls-1),%eax
|
|
+ ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */
|
|
+ jmp ia32_do_call
|
|
END(ia32_syscall)
|
|
|
|
ia32_badsys:
|
|
--- head-2011-03-17.orig/arch/x86/include/asm/hw_irq.h 2011-02-01 14:39:24.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/asm/hw_irq.h 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -145,7 +145,9 @@ extern irqreturn_t smp_call_function_sin
|
|
#endif
|
|
#endif
|
|
|
|
+#ifndef CONFIG_XEN
|
|
extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void);
|
|
+#endif
|
|
|
|
typedef int vector_irq_t[NR_VECTORS];
|
|
DECLARE_PER_CPU(vector_irq_t, vector_irq);
|
|
--- head-2011-03-17.orig/arch/x86/include/asm/hypervisor.h 2011-03-17 14:35:45.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/asm/hypervisor.h 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -60,3 +60,7 @@ static inline bool hypervisor_x2apic_ava
|
|
}
|
|
|
|
#endif
|
|
+
|
|
+#ifdef HAVE_XEN_PLATFORM_COMPAT_H
|
|
+#include_next <asm/hypervisor.h>
|
|
+#endif
|
|
--- head-2011-03-17.orig/arch/x86/include/asm/kexec.h 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/asm/kexec.h 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -12,13 +12,10 @@
|
|
/*
|
|
* The hypervisor interface implicitly requires that all entries (except
|
|
* for possibly the final one) are arranged in matching PA_/VA_ pairs.
|
|
+# define VA_PGD 3
|
|
*/
|
|
-# define PA_PMD_0 8
|
|
-# define VA_PMD_0 9
|
|
-# define PA_PMD_1 10
|
|
-# define VA_PMD_1 11
|
|
-# define PA_SWAP_PAGE 12
|
|
-# define PAGES_NR 13
|
|
+# define PA_SWAP_PAGE 4
|
|
+# define PAGES_NR 5
|
|
# endif /* CONFIG_XEN */
|
|
#else
|
|
# define PA_CONTROL_PAGE 0
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/desc.h 2011-02-01 14:39:24.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/desc.h 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -342,16 +342,14 @@ static inline void set_intr_gate(unsigne
|
|
_set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS);
|
|
}
|
|
|
|
-#define SYS_VECTOR_FREE 0
|
|
-#define SYS_VECTOR_ALLOCED 1
|
|
-
|
|
extern int first_system_vector;
|
|
-extern char system_vectors[];
|
|
+/* used_vectors is BITMAP for irq is not managed by percpu vector_irq */
|
|
+extern unsigned long used_vectors[];
|
|
|
|
static inline void alloc_system_vector(int vector)
|
|
{
|
|
- if (system_vectors[vector] == SYS_VECTOR_FREE) {
|
|
- system_vectors[vector] = SYS_VECTOR_ALLOCED;
|
|
+ if (!test_bit(vector, used_vectors)) {
|
|
+ set_bit(vector, used_vectors);
|
|
if (first_system_vector > vector)
|
|
first_system_vector = vector;
|
|
} else
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/fixmap_64.h 2011-02-01 14:39:24.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/fixmap_64.h 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -16,7 +16,6 @@
|
|
#include <asm/apicdef.h>
|
|
#include <asm/page.h>
|
|
#include <asm/vsyscall.h>
|
|
-#include <asm/efi.h>
|
|
#include <asm/acpi.h>
|
|
|
|
/*
|
|
@@ -52,11 +51,6 @@ enum fixed_addresses {
|
|
FIX_ISAMAP_END,
|
|
FIX_ISAMAP_BEGIN = FIX_ISAMAP_END + NR_FIX_ISAMAPS - 1,
|
|
#endif
|
|
-#ifdef CONFIG_EFI
|
|
- FIX_EFI_IO_MAP_LAST_PAGE,
|
|
- FIX_EFI_IO_MAP_FIRST_PAGE = FIX_EFI_IO_MAP_LAST_PAGE
|
|
- + MAX_EFI_IO_PAGES - 1,
|
|
-#endif
|
|
#ifdef CONFIG_PARAVIRT
|
|
FIX_PARAVIRT_BOOTMAP,
|
|
#else
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/highmem.h 2011-02-01 14:39:24.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/highmem.h 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -79,6 +79,7 @@ static inline void clear_user_highpage(s
|
|
clear_highpage(page);
|
|
}
|
|
#define __HAVE_ARCH_CLEAR_HIGHPAGE
|
|
+#define clear_user_highpage clear_user_highpage
|
|
#define __HAVE_ARCH_CLEAR_USER_HIGHPAGE
|
|
|
|
void copy_highpage(struct page *to, struct page *from);
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/hypervisor.h 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/hypervisor.h 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -69,6 +69,8 @@ extern start_info_t *xen_start_info;
|
|
#define is_initial_xendomain() 0
|
|
#endif
|
|
|
|
+#define init_hypervisor(c) ((void)((c)->x86_hyper_vendor = X86_HYPER_VENDOR_XEN))
|
|
+
|
|
struct vcpu_runstate_info *setup_runstate_area(unsigned int cpu);
|
|
|
|
/* arch/xen/kernel/evtchn.c */
|
|
@@ -139,7 +141,7 @@ void scrub_pages(void *, unsigned int);
|
|
|
|
DECLARE_PER_CPU(bool, xen_lazy_mmu);
|
|
|
|
-int xen_multicall_flush(bool);
|
|
+void xen_multicall_flush(bool);
|
|
|
|
int __must_check xen_multi_update_va_mapping(unsigned long va, pte_t,
|
|
unsigned long flags);
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/io.h 2011-02-01 14:39:24.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/io.h 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -4,6 +4,7 @@
|
|
#define ARCH_HAS_IOREMAP_WC
|
|
|
|
#include <linux/compiler.h>
|
|
+#include <asm-generic/int-ll64.h>
|
|
|
|
#define build_mmio_read(name, size, type, reg, barrier) \
|
|
static inline type name(const volatile void __iomem *addr) \
|
|
@@ -45,21 +46,39 @@ build_mmio_write(__writel, "l", unsigned
|
|
#define mmiowb() barrier()
|
|
|
|
#ifdef CONFIG_X86_64
|
|
+
|
|
build_mmio_read(readq, "q", unsigned long, "=r", :"memory")
|
|
-build_mmio_read(__readq, "q", unsigned long, "=r", )
|
|
build_mmio_write(writeq, "q", unsigned long, "r", :"memory")
|
|
-build_mmio_write(__writeq, "q", unsigned long, "r", )
|
|
|
|
-#define readq_relaxed(a) __readq(a)
|
|
-#define __raw_readq __readq
|
|
-#define __raw_writeq writeq
|
|
-
|
|
-/* Let people know we have them */
|
|
-#define readq readq
|
|
-#define writeq writeq
|
|
+#else
|
|
+
|
|
+static inline __u64 readq(const volatile void __iomem *addr)
|
|
+{
|
|
+ const volatile u32 __iomem *p = addr;
|
|
+ u32 low, high;
|
|
+
|
|
+ low = readl(p);
|
|
+ high = readl(p + 1);
|
|
+
|
|
+ return low + ((u64)high << 32);
|
|
+}
|
|
+
|
|
+static inline void writeq(__u64 val, volatile void __iomem *addr)
|
|
+{
|
|
+ writel(val, addr);
|
|
+ writel(val >> 32, addr+4);
|
|
+}
|
|
+
|
|
#endif
|
|
|
|
-extern int iommu_bio_merge;
|
|
+#define readq_relaxed(a) readq(a)
|
|
+
|
|
+#define __raw_readq(a) readq(a)
|
|
+#define __raw_writeq(val, addr) writeq(val, addr)
|
|
+
|
|
+/* Let people know that we have them */
|
|
+#define readq readq
|
|
+#define writeq writeq
|
|
|
|
#define native_io_delay xen_io_delay
|
|
|
|
@@ -120,7 +139,6 @@ extern void __iomem *ioremap_wc(unsigned
|
|
* A boot-time mapping is currently limited to at most 16 pages.
|
|
*/
|
|
extern void early_ioremap_init(void);
|
|
-extern void early_ioremap_clear(void);
|
|
extern void early_ioremap_reset(void);
|
|
extern void __iomem *early_ioremap(unsigned long offset, unsigned long size);
|
|
extern void __iomem *early_memremap(unsigned long offset, unsigned long size);
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/irq_vectors.h 2011-02-15 17:31:50.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/irq_vectors.h 2011-02-15 17:32:00.000000000 +0100
|
|
@@ -24,6 +24,8 @@
|
|
#define LAST_VM86_IRQ 15
|
|
#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15)
|
|
|
|
+#define NR_IRQS_LEGACY 16
|
|
+
|
|
/*
|
|
* The flat IRQ space is divided into two regions:
|
|
* 1. A one-to-one mapping of real physical IRQs. This space is only used
|
|
@@ -36,8 +38,10 @@
|
|
|
|
#define PIRQ_BASE 0
|
|
#if defined(NR_CPUS) && defined(MAX_IO_APICS)
|
|
-# if NR_CPUS < MAX_IO_APICS
|
|
+# if !defined(CONFIG_SPARSE_IRQ) && NR_CPUS < MAX_IO_APICS
|
|
# define NR_PIRQS (NR_VECTORS + 32 * NR_CPUS)
|
|
+# elif defined(CONFIG_SPARSE_IRQ) && 8 * NR_CPUS > 32 * MAX_IO_APICS
|
|
+# define NR_PIRQS (NR_VECTORS + 8 * NR_CPUS)
|
|
# else
|
|
# define NR_PIRQS (NR_VECTORS + 32 * MAX_IO_APICS)
|
|
# endif
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/mmu_context_32.h 2011-02-01 14:39:24.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/mmu_context_32.h 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -3,10 +3,9 @@
|
|
|
|
static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
|
|
{
|
|
-#if 0 /* XEN: no lazy tlb */
|
|
- unsigned cpu = smp_processor_id();
|
|
- if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK)
|
|
- per_cpu(cpu_tlbstate, cpu).state = TLBSTATE_LAZY;
|
|
+#if defined(CONFIG_SMP) && !defined(CONFIG_XEN) /* XEN: no lazy tlb */
|
|
+ if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK)
|
|
+ x86_write_percpu(cpu_tlbstate.state, TLBSTATE_LAZY);
|
|
#endif
|
|
}
|
|
|
|
@@ -38,9 +37,9 @@ static inline void switch_mm(struct mm_s
|
|
|
|
/* stop flush ipis for the previous mm */
|
|
cpu_clear(cpu, prev->cpu_vm_mask);
|
|
-#if 0 /* XEN: no lazy tlb */
|
|
- per_cpu(cpu_tlbstate, cpu).state = TLBSTATE_OK;
|
|
- per_cpu(cpu_tlbstate, cpu).active_mm = next;
|
|
+#if defined(CONFIG_SMP) && !defined(CONFIG_XEN) /* XEN: no lazy tlb */
|
|
+ x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK);
|
|
+ x86_write_percpu(cpu_tlbstate.active_mm, next);
|
|
#endif
|
|
cpu_set(cpu, next->cpu_vm_mask);
|
|
|
|
@@ -62,10 +61,10 @@ static inline void switch_mm(struct mm_s
|
|
|
|
BUG_ON(HYPERVISOR_mmuext_op(_op, op-_op, NULL, DOMID_SELF));
|
|
}
|
|
-#if 0 /* XEN: no lazy tlb */
|
|
+#if defined(CONFIG_SMP) && !defined(CONFIG_XEN) /* XEN: no lazy tlb */
|
|
else {
|
|
- per_cpu(cpu_tlbstate, cpu).state = TLBSTATE_OK;
|
|
- BUG_ON(per_cpu(cpu_tlbstate, cpu).active_mm != next);
|
|
+ x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK);
|
|
+ BUG_ON(x86_read_percpu(cpu_tlbstate.active_mm) != next);
|
|
|
|
if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
|
|
/* We were in lazy tlb mode and leave_mm disabled
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/pci.h 2011-02-01 14:39:24.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/pci.h 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -22,6 +22,8 @@ struct pci_sysdata {
|
|
};
|
|
|
|
extern int pci_routeirq;
|
|
+extern int noioapicquirk;
|
|
+extern int noioapicreroute;
|
|
|
|
/* scan a bus after allocating a pci_sysdata for it */
|
|
extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops,
|
|
@@ -88,6 +90,8 @@ static inline void pci_dma_burst_advice(
|
|
static inline void early_quirks(void) { }
|
|
#endif
|
|
|
|
+extern void pci_iommu_alloc(void);
|
|
+
|
|
#endif /* __KERNEL__ */
|
|
|
|
#ifdef CONFIG_X86_32
|
|
@@ -104,9 +108,9 @@ static inline void early_quirks(void) {
|
|
|
|
#ifdef CONFIG_NUMA
|
|
/* Returns the node based on pci bus */
|
|
-static inline int __pcibus_to_node(struct pci_bus *bus)
|
|
+static inline int __pcibus_to_node(const struct pci_bus *bus)
|
|
{
|
|
- struct pci_sysdata *sd = bus->sysdata;
|
|
+ const struct pci_sysdata *sd = bus->sysdata;
|
|
|
|
return sd->node;
|
|
}
|
|
@@ -115,6 +119,12 @@ static inline cpumask_t __pcibus_to_cpum
|
|
{
|
|
return node_to_cpumask(__pcibus_to_node(bus));
|
|
}
|
|
+
|
|
+static inline const struct cpumask *
|
|
+cpumask_of_pcibus(const struct pci_bus *bus)
|
|
+{
|
|
+ return cpumask_of_node(__pcibus_to_node(bus));
|
|
+}
|
|
#endif
|
|
|
|
#endif /* _ASM_X86_PCI_H */
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/pgtable.h 2011-02-07 15:41:11.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/pgtable.h 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -22,12 +22,10 @@
|
|
#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */
|
|
|
|
/* If _PAGE_BIT_PRESENT is clear, we use these: */
|
|
-
|
|
-/* set: nonlinear file mapping, saved PTE; unset:swap */
|
|
-#define _PAGE_BIT_FILE _PAGE_BIT_DIRTY
|
|
-
|
|
-/* if the user mapped it with PROT_NONE; pte_present gives true */
|
|
+/* - if the user mapped it with PROT_NONE; pte_present gives true */
|
|
#define _PAGE_BIT_PROTNONE _PAGE_BIT_GLOBAL
|
|
+/* - set: nonlinear file mapping, saved PTE; unset:swap */
|
|
+#define _PAGE_BIT_FILE _PAGE_BIT_DIRTY
|
|
|
|
#define _PAGE_PRESENT (_AT(pteval_t, 1) << _PAGE_BIT_PRESENT)
|
|
#define _PAGE_RW (_AT(pteval_t, 1) << _PAGE_BIT_RW)
|
|
@@ -176,8 +174,19 @@ extern unsigned int __kernel_page_user;
|
|
#define PGD_IDENT_ATTR 0x001 /* PRESENT (no other attributes) */
|
|
#endif
|
|
|
|
+/*
|
|
+ * Macro to mark a page protection value as UC-
|
|
+ */
|
|
+#define pgprot_noncached(prot) \
|
|
+ ((boot_cpu_data.x86 > 3) \
|
|
+ ? (__pgprot(pgprot_val(prot) | _PAGE_CACHE_UC_MINUS)) \
|
|
+ : (prot))
|
|
+
|
|
#ifndef __ASSEMBLY__
|
|
|
|
+#define pgprot_writecombine pgprot_writecombine
|
|
+extern pgprot_t pgprot_writecombine(pgprot_t prot);
|
|
+
|
|
/*
|
|
* ZERO_PAGE is a global shared page that is always zero: used
|
|
* for zero-mapped memory areas etc..
|
|
@@ -309,41 +318,42 @@ static inline pte_t pte_mkspecial(pte_t
|
|
|
|
extern pteval_t __supported_pte_mask;
|
|
|
|
-static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
|
|
+/*
|
|
+ * Mask out unsupported bits in a present pgprot. Non-present pgprots
|
|
+ * can use those bits for other purposes, so leave them be.
|
|
+ */
|
|
+static inline pgprotval_t massage_pgprot(pgprot_t pgprot)
|
|
{
|
|
- pgprotval_t prot = pgprot_val(pgprot);
|
|
+ pgprotval_t protval = pgprot_val(pgprot);
|
|
+
|
|
+ if (protval & _PAGE_PRESENT)
|
|
+ protval &= __supported_pte_mask;
|
|
|
|
- if (prot & _PAGE_PRESENT)
|
|
- prot &= __supported_pte_mask;
|
|
- return __pte(((phys_addr_t)page_nr << PAGE_SHIFT) | prot);
|
|
+ return protval;
|
|
}
|
|
|
|
-static inline pte_t pfn_pte_ma(phys_addr_t page_nr, pgprot_t pgprot)
|
|
+static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
|
|
{
|
|
- pgprotval_t prot = pgprot_val(pgprot);
|
|
+ return __pte(((phys_addr_t)page_nr << PAGE_SHIFT) |
|
|
+ massage_pgprot(pgprot));
|
|
+}
|
|
|
|
- if (prot & _PAGE_PRESENT)
|
|
- prot &= __supported_pte_mask;
|
|
- return __pte_ma((page_nr << PAGE_SHIFT) | prot);
|
|
+static inline pte_t pfn_pte_ma(phys_addr_t page_nr, pgprot_t pgprot)
|
|
+{
|
|
+ return __pte_ma((page_nr << PAGE_SHIFT) | massage_pgprot(pgprot));
|
|
}
|
|
|
|
static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
|
|
{
|
|
- pgprotval_t prot = pgprot_val(pgprot);
|
|
-
|
|
- if (prot & _PAGE_PRESENT)
|
|
- prot &= __supported_pte_mask;
|
|
- return __pmd(((phys_addr_t)page_nr << PAGE_SHIFT) | prot);
|
|
+ return __pmd(((phys_addr_t)page_nr << PAGE_SHIFT) |
|
|
+ massage_pgprot(pgprot));
|
|
}
|
|
|
|
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
|
|
{
|
|
- pgprotval_t prot = pgprot_val(newprot);
|
|
pteval_t val = pte_val(pte) & _PAGE_CHG_MASK;
|
|
|
|
- if (prot & _PAGE_PRESENT)
|
|
- prot &= __supported_pte_mask;
|
|
- val |= prot & ~_PAGE_CHG_MASK;
|
|
+ val |= massage_pgprot(newprot) & ~_PAGE_CHG_MASK;
|
|
|
|
return __pte(val);
|
|
}
|
|
@@ -359,11 +369,33 @@ static inline pgprot_t pgprot_modify(pgp
|
|
|
|
#define pte_pgprot(x) __pgprot(pte_flags(x) & PTE_FLAGS_MASK)
|
|
|
|
-#define canon_pgprot(p) __pgprot(pgprot_val(p) & _PAGE_PRESENT \
|
|
- ? pgprot_val(p) & __supported_pte_mask \
|
|
- : pgprot_val(p))
|
|
+#define canon_pgprot(p) __pgprot(massage_pgprot(p))
|
|
+
|
|
+static inline int is_new_memtype_allowed(unsigned long flags,
|
|
+ unsigned long new_flags)
|
|
+{
|
|
+ /*
|
|
+ * Certain new memtypes are not allowed with certain
|
|
+ * requested memtype:
|
|
+ * - request is uncached, return cannot be write-back
|
|
+ * - request is write-combine, return cannot be write-back
|
|
+ */
|
|
+ if ((flags == _PAGE_CACHE_UC_MINUS &&
|
|
+ new_flags == _PAGE_CACHE_WB) ||
|
|
+ (flags == _PAGE_CACHE_WC &&
|
|
+ new_flags == _PAGE_CACHE_WB)) {
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ return 1;
|
|
+}
|
|
|
|
#ifndef __ASSEMBLY__
|
|
+#ifndef CONFIG_XEN
|
|
+/* Indicate that x86 has its own track and untrack pfn vma functions */
|
|
+#define __HAVE_PFNMAP_TRACKING
|
|
+#endif
|
|
+
|
|
#define __HAVE_PHYS_MEM_ACCESS_PROT
|
|
struct file;
|
|
pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/pgtable-3level.h 2011-02-01 14:39:24.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/pgtable-3level.h 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -151,6 +151,7 @@ static inline int pte_none(pte_t pte)
|
|
#define PTE_FILE_MAX_BITS 32
|
|
|
|
/* Encode and de-code a swap entry */
|
|
+#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > 5)
|
|
#define __swp_type(x) (((x).val) & 0x1f)
|
|
#define __swp_offset(x) ((x).val >> 5)
|
|
#define __swp_entry(type, offset) ((swp_entry_t){(type) | (offset) << 5})
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/pgtable_32.h 2011-02-01 14:39:24.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/pgtable_32.h 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -107,15 +107,6 @@ extern unsigned long pg0[];
|
|
#endif
|
|
|
|
/*
|
|
- * Macro to mark a page protection value as "uncacheable".
|
|
- * On processors which do not support it, this is a no-op.
|
|
- */
|
|
-#define pgprot_noncached(prot) \
|
|
- ((boot_cpu_data.x86 > 3) \
|
|
- ? (__pgprot(pgprot_val(prot) | _PAGE_PCD | _PAGE_PWT)) \
|
|
- : (prot))
|
|
-
|
|
-/*
|
|
* Conversion functions: convert a page and protection to a page entry,
|
|
* and a page entry and page directory to the page they refer to.
|
|
*/
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/pgtable_64.h 2011-02-01 14:39:24.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/pgtable_64.h 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -149,8 +149,8 @@ static inline void xen_pgd_clear(pgd_t *
|
|
#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
|
|
#define PGDIR_MASK (~(PGDIR_SIZE - 1))
|
|
|
|
-
|
|
-#define MAXMEM _AC(0x000004ffffffffff, UL)
|
|
+#define MAX_PHYSMEM_BITS 43
|
|
+#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
|
|
#define VMALLOC_START _AC(0xffffc20000000000, UL)
|
|
#define VMALLOC_END _AC(0xffffe1ffffffffff, UL)
|
|
#define VMEMMAP_START _AC(0xffffe20000000000, UL)
|
|
@@ -183,12 +183,6 @@ static inline int pmd_bad(pmd_t pmd)
|
|
#define __pte_mfn(_pte) (((_pte).pte & PTE_PFN_MASK) >> PAGE_SHIFT)
|
|
|
|
/*
|
|
- * Macro to mark a page protection value as "uncacheable".
|
|
- */
|
|
-#define pgprot_noncached(prot) \
|
|
- (__pgprot(pgprot_val((prot)) | _PAGE_PCD | _PAGE_PWT))
|
|
-
|
|
-/*
|
|
* Conversion functions: convert a page and protection to a page entry,
|
|
* and a page entry and page directory to the page they refer to.
|
|
*/
|
|
@@ -270,6 +264,8 @@ static inline int pud_large(pud_t pte)
|
|
#define SWP_OFFSET_SHIFT (_PAGE_BIT_FILE + 1)
|
|
#endif
|
|
|
|
+#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS)
|
|
+
|
|
#define __swp_type(x) (((x).val >> (_PAGE_BIT_PRESENT + 1)) \
|
|
& ((1U << SWP_TYPE_BITS) - 1))
|
|
#define __swp_offset(x) ((x).val >> SWP_OFFSET_SHIFT)
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/processor.h 2011-03-03 16:45:14.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/processor.h 2011-03-03 16:45:38.000000000 +0100
|
|
@@ -121,6 +121,7 @@ struct cpuinfo_x86 {
|
|
/* Index into per_cpu list: */
|
|
u16 cpu_index;
|
|
#endif
|
|
+ unsigned int x86_hyper_vendor;
|
|
} __attribute__((__aligned__(SMP_CACHE_BYTES)));
|
|
|
|
#define X86_VENDOR_INTEL 0
|
|
@@ -134,6 +135,10 @@ struct cpuinfo_x86 {
|
|
|
|
#define X86_VENDOR_UNKNOWN 0xff
|
|
|
|
+#define X86_HYPER_VENDOR_NONE 0
|
|
+#define X86_HYPER_VENDOR_VMWARE 1
|
|
+#define X86_HYPER_VENDOR_XEN 'X'
|
|
+
|
|
/*
|
|
* capabilities of CPUs
|
|
*/
|
|
@@ -364,7 +369,7 @@ struct i387_soft_struct {
|
|
u8 no_update;
|
|
u8 rm;
|
|
u8 alimit;
|
|
- struct info *info;
|
|
+ struct math_emu_info *info;
|
|
u32 entry_eip;
|
|
};
|
|
|
|
@@ -705,6 +710,19 @@ extern void switch_to_new_gdt(void);
|
|
extern void cpu_init(void);
|
|
extern void init_gdt(int cpu);
|
|
|
|
+static inline unsigned long get_debugctlmsr(void)
|
|
+{
|
|
+ unsigned long debugctlmsr = 0;
|
|
+
|
|
+#ifndef CONFIG_X86_DEBUGCTLMSR
|
|
+ if (boot_cpu_data.x86 < 6)
|
|
+ return 0;
|
|
+#endif
|
|
+ rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
|
|
+
|
|
+ return debugctlmsr;
|
|
+}
|
|
+
|
|
static inline void update_debugctlmsr(unsigned long debugctlmsr)
|
|
{
|
|
#ifndef CONFIG_X86_DEBUGCTLMSR
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/smp.h 2011-02-01 14:39:24.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/smp.h 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -18,9 +18,26 @@
|
|
#include <asm/pda.h>
|
|
#include <asm/thread_info.h>
|
|
|
|
+#ifdef CONFIG_X86_64
|
|
+
|
|
+#define cpu_callin_mask cpu_possible_mask
|
|
+#define cpu_callout_mask cpu_possible_mask
|
|
+extern cpumask_var_t cpu_initialized_mask;
|
|
+extern cpumask_var_t cpu_sibling_setup_mask;
|
|
+
|
|
+#else /* CONFIG_X86_32 */
|
|
+
|
|
+#define cpu_callin_map cpu_possible_map
|
|
#define cpu_callout_map cpu_possible_map
|
|
extern cpumask_t cpu_initialized;
|
|
-#define cpu_callin_map cpu_possible_map
|
|
+extern cpumask_t cpu_sibling_setup_map;
|
|
+
|
|
+#define cpu_callin_mask ((struct cpumask *)&cpu_callin_map)
|
|
+#define cpu_callout_mask ((struct cpumask *)&cpu_callout_map)
|
|
+#define cpu_initialized_mask ((struct cpumask *)&cpu_initialized)
|
|
+#define cpu_sibling_setup_mask ((struct cpumask *)&cpu_sibling_setup_map)
|
|
+
|
|
+#endif /* CONFIG_X86_32 */
|
|
|
|
extern void (*mtrr_hook)(void);
|
|
extern void zap_low_mappings(void);
|
|
@@ -28,7 +45,6 @@ extern void zap_low_mappings(void);
|
|
extern int __cpuinit get_local_pda(int cpu);
|
|
|
|
extern unsigned int num_processors;
|
|
-extern cpumask_t cpu_initialized;
|
|
|
|
#ifndef CONFIG_XEN
|
|
DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
|
|
@@ -39,6 +55,16 @@ DECLARE_PER_CPU(u16, cpu_llc_id);
|
|
DECLARE_PER_CPU(int, cpu_number);
|
|
#endif
|
|
|
|
+static inline const struct cpumask *cpu_sibling_mask(int cpu)
|
|
+{
|
|
+ return cpumask_of(cpu);
|
|
+}
|
|
+
|
|
+static inline const struct cpumask *cpu_core_mask(int cpu)
|
|
+{
|
|
+ return cpumask_of(cpu);
|
|
+}
|
|
+
|
|
#ifndef CONFIG_XEN
|
|
DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid);
|
|
DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid);
|
|
@@ -67,7 +93,7 @@ struct smp_ops {
|
|
void (*cpu_die)(unsigned int cpu);
|
|
void (*play_dead)(void);
|
|
|
|
- void (*send_call_func_ipi)(cpumask_t mask);
|
|
+ void (*send_call_func_ipi)(const struct cpumask *mask);
|
|
void (*send_call_func_single_ipi)(int cpu);
|
|
};
|
|
|
|
@@ -128,7 +154,7 @@ static inline void arch_send_call_functi
|
|
|
|
static inline void arch_send_call_function_ipi(cpumask_t mask)
|
|
{
|
|
- smp_ops.send_call_func_ipi(mask);
|
|
+ smp_ops.send_call_func_ipi(&mask);
|
|
}
|
|
|
|
void cpu_disable_common(void);
|
|
@@ -147,13 +173,13 @@ extern int __cpu_disable(void);
|
|
extern void __cpu_die(unsigned int cpu);
|
|
void xen_smp_send_stop(void);
|
|
void xen_smp_send_reschedule(int cpu);
|
|
-void xen_send_call_func_ipi(cpumask_t mask);
|
|
+void xen_send_call_func_ipi(const struct cpumask *mask);
|
|
void xen_send_call_func_single_ipi(int cpu);
|
|
|
|
#define smp_send_stop xen_smp_send_stop
|
|
#define smp_send_reschedule xen_smp_send_reschedule
|
|
#define arch_send_call_function_single_ipi xen_send_call_func_single_ipi
|
|
-#define arch_send_call_function_ipi xen_send_call_func_ipi
|
|
+#define arch_send_call_function_ipi(m) xen_send_call_func_ipi(&(m))
|
|
|
|
void play_dead(void);
|
|
|
|
@@ -167,7 +193,7 @@ void smp_store_cpu_info(int id);
|
|
/* We don't mark CPUs online until __cpu_up(), so we need another measure */
|
|
static inline int num_booting_cpus(void)
|
|
{
|
|
- return cpus_weight(cpu_callout_map);
|
|
+ return cpumask_weight(cpu_callout_mask);
|
|
}
|
|
#else
|
|
static inline void prefill_possible_map(void)
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/spinlock.h 2011-02-01 14:39:24.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/spinlock.h 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -337,6 +337,7 @@ static inline int __raw_spin_is_contende
|
|
{
|
|
return __raw_spin(is_contended)(lock);
|
|
}
|
|
+#define __raw_spin_is_contended __raw_spin_is_contended
|
|
|
|
static __always_inline void __raw_spin_lock(raw_spinlock_t *lock)
|
|
{
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/system.h 2011-03-03 16:01:23.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/system.h 2011-03-03 16:05:49.000000000 +0100
|
|
@@ -18,12 +18,12 @@
|
|
# define AT_VECTOR_SIZE_ARCH 1
|
|
#endif
|
|
|
|
-#ifdef CONFIG_X86_32
|
|
-
|
|
struct task_struct; /* one of the stranger aspects of C forward declarations */
|
|
struct task_struct *__switch_to(struct task_struct *prev,
|
|
struct task_struct *next);
|
|
|
|
+#ifdef CONFIG_X86_32
|
|
+
|
|
/*
|
|
* Saving eflags is important. It switches not only IOPL between tasks,
|
|
* it also protects other tasks from NT leaking through sysenter etc.
|
|
@@ -300,6 +300,8 @@ extern void free_init_pages(char *what,
|
|
|
|
void xen_idle(void);
|
|
|
|
+void stop_this_cpu(void *dummy);
|
|
+
|
|
/*
|
|
* Force strict CPU ordering.
|
|
* And yes, this is required on UP too when we're talking
|
|
--- head-2011-03-17.orig/arch/x86/include/asm/thread_info.h 2011-01-31 18:01:51.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/asm/thread_info.h 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -154,7 +154,7 @@ struct thread_info {
|
|
|
|
#else
|
|
#define _TIF_WORK_CTXSW (_TIF_NOTSC \
|
|
- /*todo | _TIF_DEBUGCTLMSR | _TIF_DS_AREA_MSR | _TIF_BTS_TRACE_TS*/)
|
|
+ /*todo | _TIF_DEBUGCTLMSR | _TIF_DS_AREA_MSR */)
|
|
#endif
|
|
#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
|
|
#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG)
|
|
--- head-2011-03-17.orig/arch/x86/kernel/acpi/sleep-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/acpi/sleep-xen.c 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -163,6 +163,8 @@ static int __init acpi_sleep_setup(char
|
|
#ifdef CONFIG_HIBERNATION
|
|
if (strncmp(str, "s4_nohwsig", 10) == 0)
|
|
acpi_no_s4_hw_signature();
|
|
+ if (strncmp(str, "s4_nonvs", 8) == 0)
|
|
+ acpi_s4_no_nvs();
|
|
#endif
|
|
if (strncmp(str, "old_ordering", 12) == 0)
|
|
acpi_old_suspend_ordering();
|
|
--- head-2011-03-17.orig/arch/x86/kernel/apic/apic-xen.c 2011-02-24 15:49:32.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/apic/apic-xen.c 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -32,7 +32,7 @@ static int __init apic_set_verbosity(cha
|
|
else if (strcmp("verbose", arg) == 0)
|
|
apic_verbosity = APIC_VERBOSE;
|
|
else {
|
|
- printk(KERN_WARNING "APIC Verbosity level %s not recognised"
|
|
+ pr_warning("APIC Verbosity level %s not recognised"
|
|
" use apic=verbose or apic=debug\n", arg);
|
|
return -EINVAL;
|
|
}
|
|
--- head-2011-03-17.orig/arch/x86/kernel/cpu/Makefile 2011-02-03 14:29:46.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/cpu/Makefile 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -34,7 +34,7 @@ obj-$(CONFIG_CPU_FREQ) += cpufreq/
|
|
|
|
obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o
|
|
|
|
-disabled-obj-$(CONFIG_XEN) := perfctr-watchdog.o
|
|
+disabled-obj-$(CONFIG_XEN) := hypervisor.o perfctr-watchdog.o vmware.o
|
|
|
|
quiet_cmd_mkcapflags = MKCAP $@
|
|
cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@
|
|
--- head-2011-03-17.orig/arch/x86/kernel/cpu/common-xen.c 2011-03-17 14:40:32.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/cpu/common-xen.c 2011-03-17 14:41:35.000000000 +0100
|
|
@@ -38,17 +38,45 @@
|
|
#include <asm/proto.h>
|
|
#include <asm/sections.h>
|
|
#include <asm/setup.h>
|
|
+#include <asm/hypervisor.h>
|
|
|
|
#ifdef CONFIG_XEN
|
|
#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_LOCAL_APIC)
|
|
#define phys_pkg_id(a,b) a
|
|
#endif
|
|
-#include <asm/hypervisor.h>
|
|
#include <xen/interface/callback.h>
|
|
#endif
|
|
|
|
#include "cpu.h"
|
|
|
|
+#ifdef CONFIG_X86_64
|
|
+
|
|
+/* all of these masks are initialized in setup_cpu_local_masks() */
|
|
+#ifndef CONFIG_XEN
|
|
+cpumask_var_t cpu_callin_mask;
|
|
+cpumask_var_t cpu_callout_mask;
|
|
+#endif
|
|
+cpumask_var_t cpu_initialized_mask;
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+/* representing cpus for which sibling maps can be computed */
|
|
+cpumask_var_t cpu_sibling_setup_mask;
|
|
+#endif
|
|
+
|
|
+#else /* CONFIG_X86_32 */
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+cpumask_t cpu_callin_map;
|
|
+cpumask_t cpu_callout_map;
|
|
+#endif
|
|
+cpumask_t cpu_initialized;
|
|
+#ifndef CONFIG_XEN
|
|
+cpumask_t cpu_sibling_setup_map;
|
|
+#endif
|
|
+
|
|
+#endif /* CONFIG_X86_32 */
|
|
+
|
|
+
|
|
static struct cpu_dev *this_cpu __cpuinitdata;
|
|
|
|
#ifdef CONFIG_X86_64
|
|
@@ -380,7 +408,7 @@ void __cpuinit detect_ht(struct cpuinfo_
|
|
printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
|
|
} else if (smp_num_siblings > 1) {
|
|
|
|
- if (smp_num_siblings > NR_CPUS) {
|
|
+ if (smp_num_siblings > nr_cpu_ids) {
|
|
printk(KERN_WARNING "CPU: Unsupported number of siblings %d",
|
|
smp_num_siblings);
|
|
smp_num_siblings = 1;
|
|
@@ -735,6 +763,7 @@ static void __cpuinit identify_cpu(struc
|
|
detect_ht(c);
|
|
#endif
|
|
|
|
+ init_hypervisor(c);
|
|
/*
|
|
* On SMP, boot_cpu_data holds the common feature set between
|
|
* all CPUs; so make sure that we indicate which features are
|
|
@@ -886,8 +915,6 @@ static __init int setup_disablecpuid(cha
|
|
}
|
|
__setup("clearcpuid=", setup_disablecpuid);
|
|
|
|
-cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
|
|
-
|
|
#ifdef CONFIG_X86_64
|
|
struct x8664_pda **_cpu_pda __read_mostly;
|
|
EXPORT_SYMBOL(_cpu_pda);
|
|
@@ -896,7 +923,7 @@ EXPORT_SYMBOL(_cpu_pda);
|
|
struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
|
|
#endif
|
|
|
|
-char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
|
|
+static char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
|
|
|
|
static void __ref switch_pt(int cpu)
|
|
{
|
|
@@ -956,8 +983,8 @@ void __cpuinit pda_init(int cpu)
|
|
}
|
|
|
|
#ifndef CONFIG_X86_NO_TSS
|
|
-char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
|
|
- DEBUG_STKSZ] __page_aligned_bss;
|
|
+static char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
|
|
+ DEBUG_STKSZ] __page_aligned_bss;
|
|
#endif
|
|
|
|
extern asmlinkage void ignore_sysret(void);
|
|
@@ -1045,7 +1072,7 @@ void __cpuinit cpu_init(void)
|
|
|
|
me = current;
|
|
|
|
- if (cpu_test_and_set(cpu, cpu_initialized))
|
|
+ if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask))
|
|
panic("CPU#%d already initialized!\n", cpu);
|
|
|
|
printk(KERN_INFO "Initializing CPU#%d\n", cpu);
|
|
@@ -1170,7 +1197,7 @@ void __cpuinit cpu_init(void)
|
|
#endif
|
|
struct thread_struct *thread = &curr->thread;
|
|
|
|
- if (cpu_test_and_set(cpu, cpu_initialized)) {
|
|
+ if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) {
|
|
printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
|
|
for (;;) local_irq_enable();
|
|
}
|
|
--- head-2011-03-17.orig/arch/x86/kernel/cpu/intel.c 2011-01-31 17:56:27.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/cpu/intel.c 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -36,10 +36,15 @@ static void __cpuinit early_init_intel(s
|
|
rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
|
|
|
|
if (misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID) {
|
|
+#ifndef CONFIG_XEN
|
|
misc_enable &= ~MSR_IA32_MISC_ENABLE_LIMIT_CPUID;
|
|
wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
|
|
c->cpuid_level = cpuid_eax(0);
|
|
get_cpu_cap(c);
|
|
+#else
|
|
+ pr_warning("CPUID levels are restricted -"
|
|
+ " update hypervisor\n");
|
|
+#endif
|
|
}
|
|
}
|
|
|
|
--- head-2011-03-17.orig/arch/x86/kernel/cpu/mtrr/main-xen.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/cpu/mtrr/main-xen.c 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -33,7 +33,7 @@ struct mtrr_ops generic_mtrr_ops = {
|
|
|
|
struct mtrr_ops *mtrr_if = &generic_mtrr_ops;
|
|
unsigned int num_var_ranges;
|
|
-unsigned int mtrr_usage_table[MAX_VAR_RANGES];
|
|
+unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
|
|
|
|
static u64 tom2;
|
|
|
|
--- head-2011-03-17.orig/arch/x86/kernel/e820-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/e820-xen.c 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -719,6 +719,27 @@ void __init e820_mark_nosave_regions(uns
|
|
}
|
|
}
|
|
#endif
|
|
+
|
|
+#ifdef CONFIG_HIBERNATION
|
|
+/**
|
|
+ * Mark ACPI NVS memory region, so that we can save/restore it during
|
|
+ * hibernation and the subsequent resume.
|
|
+ */
|
|
+static int __init e820_mark_nvs_memory(void)
|
|
+{
|
|
+ int i;
|
|
+
|
|
+ for (i = 0; i < e820.nr_map; i++) {
|
|
+ struct e820entry *ei = &e820.map[i];
|
|
+
|
|
+ if (ei->type == E820_NVS)
|
|
+ hibernate_nvs_register(ei->addr, ei->size);
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+core_initcall(e820_mark_nvs_memory);
|
|
+#endif
|
|
#endif
|
|
|
|
/*
|
|
@@ -734,22 +755,6 @@ struct early_res {
|
|
static struct early_res early_res[MAX_EARLY_RES] __initdata = {
|
|
#ifndef CONFIG_XEN
|
|
{ 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */
|
|
-#if defined(CONFIG_X86_64) && defined(CONFIG_X86_TRAMPOLINE)
|
|
- { TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" },
|
|
-#endif
|
|
-#if defined(CONFIG_X86_32) && defined(CONFIG_SMP)
|
|
- /*
|
|
- * But first pinch a few for the stack/trampoline stuff
|
|
- * FIXME: Don't need the extra page at 4K, but need to fix
|
|
- * trampoline before removing it. (see the GDT stuff)
|
|
- */
|
|
- { PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE" },
|
|
- /*
|
|
- * Has to be in very low memory so we can execute
|
|
- * real-mode AP code.
|
|
- */
|
|
- { TRAMPOLINE_BASE, TRAMPOLINE_BASE + PAGE_SIZE, "TRAMPOLINE" },
|
|
-#endif
|
|
#endif
|
|
{}
|
|
};
|
|
--- head-2011-03-17.orig/arch/x86/kernel/early_printk-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/early_printk-xen.c 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -904,49 +904,6 @@ static struct console early_dbgp_console
|
|
};
|
|
#endif
|
|
|
|
-/* Console interface to a host file on AMD's SimNow! */
|
|
-
|
|
-static int simnow_fd;
|
|
-
|
|
-enum {
|
|
- MAGIC1 = 0xBACCD00A,
|
|
- MAGIC2 = 0xCA110000,
|
|
- XOPEN = 5,
|
|
- XWRITE = 4,
|
|
-};
|
|
-
|
|
-static noinline long simnow(long cmd, long a, long b, long c)
|
|
-{
|
|
- long ret;
|
|
-
|
|
- asm volatile("cpuid" :
|
|
- "=a" (ret) :
|
|
- "b" (a), "c" (b), "d" (c), "0" (MAGIC1), "D" (cmd + MAGIC2));
|
|
- return ret;
|
|
-}
|
|
-
|
|
-static void __init simnow_init(char *str)
|
|
-{
|
|
- char *fn = "klog";
|
|
-
|
|
- if (*str == '=')
|
|
- fn = ++str;
|
|
- /* error ignored */
|
|
- simnow_fd = simnow(XOPEN, (unsigned long)fn, O_WRONLY|O_APPEND|O_CREAT, 0644);
|
|
-}
|
|
-
|
|
-static void simnow_write(struct console *con, const char *s, unsigned n)
|
|
-{
|
|
- simnow(XWRITE, simnow_fd, (unsigned long)s, n);
|
|
-}
|
|
-
|
|
-static struct console simnow_console = {
|
|
- .name = "simnow",
|
|
- .write = simnow_write,
|
|
- .flags = CON_PRINTBUFFER,
|
|
- .index = -1,
|
|
-};
|
|
-
|
|
/* Direct interface for emergencies */
|
|
static struct console *early_console = &early_vga_console;
|
|
static int __initdata early_console_initialized;
|
|
@@ -958,7 +915,7 @@ asmlinkage void early_printk(const char
|
|
va_list ap;
|
|
|
|
va_start(ap, fmt);
|
|
- n = vscnprintf(buf, 512, fmt, ap);
|
|
+ n = vscnprintf(buf, sizeof(buf), fmt, ap);
|
|
early_console->write(early_console, buf, n);
|
|
va_end(ap);
|
|
}
|
|
@@ -991,10 +948,6 @@ static int __init setup_early_printk(cha
|
|
current_ypos = boot_params.screen_info.orig_y;
|
|
#endif
|
|
early_console = &early_vga_console;
|
|
- } else if (!strncmp(buf, "simnow", 6)) {
|
|
- simnow_init(buf + 6);
|
|
- early_console = &simnow_console;
|
|
- keep_early = 1;
|
|
#ifdef CONFIG_EARLY_PRINTK_DBGP
|
|
} else if (!strncmp(buf, "dbgp", 4)) {
|
|
if (early_dbgp_init(buf+4) < 0)
|
|
--- head-2011-03-17.orig/arch/x86/kernel/entry_32-xen.S 2011-02-01 14:39:24.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/entry_32-xen.S 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -690,28 +690,37 @@ END(syscall_badsys)
|
|
27:;
|
|
|
|
/*
|
|
- * Build the entry stubs and pointer table with
|
|
- * some assembler magic.
|
|
+ * Build the entry stubs and pointer table with some assembler magic.
|
|
+ * We pack 7 stubs into a single 32-byte chunk, which will fit in a
|
|
+ * single cache line on all modern x86 implementations.
|
|
*/
|
|
-.section .rodata,"a"
|
|
+.section .init.rodata,"a"
|
|
ENTRY(interrupt)
|
|
.text
|
|
-
|
|
+ .p2align 5
|
|
+ .p2align CONFIG_X86_L1_CACHE_SHIFT
|
|
ENTRY(irq_entries_start)
|
|
RING0_INT_FRAME
|
|
-vector=0
|
|
-.rept NR_VECTORS
|
|
- ALIGN
|
|
- .if vector
|
|
+vector=FIRST_EXTERNAL_VECTOR
|
|
+.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7
|
|
+ .balign 32
|
|
+ .rept 7
|
|
+ .if vector < NR_VECTORS
|
|
+ .if vector <> FIRST_EXTERNAL_VECTOR
|
|
CFI_ADJUST_CFA_OFFSET -4
|
|
- .endif
|
|
-1: pushl $~(vector)
|
|
+ .endif
|
|
+1: pushl $(~vector+0x80) /* Note: always in signed byte range */
|
|
CFI_ADJUST_CFA_OFFSET 4
|
|
- jmp common_interrupt
|
|
- .previous
|
|
+ .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
|
|
+ jmp 2f
|
|
+ .endif
|
|
+ .previous
|
|
.long 1b
|
|
- .text
|
|
+ .text
|
|
vector=vector+1
|
|
+ .endif
|
|
+ .endr
|
|
+2: jmp common_interrupt
|
|
.endr
|
|
END(irq_entries_start)
|
|
|
|
@@ -723,8 +732,9 @@ END(interrupt)
|
|
* the CPU automatically disables interrupts when executing an IRQ vector,
|
|
* so IRQ-flags tracing has to follow that:
|
|
*/
|
|
- ALIGN
|
|
+ .p2align CONFIG_X86_L1_CACHE_SHIFT
|
|
common_interrupt:
|
|
+ addl $-0x80,(%esp) /* Adjust vector into the [-256,-1] range */
|
|
SAVE_ALL
|
|
TRACE_IRQS_OFF
|
|
movl %esp,%eax
|
|
@@ -751,68 +761,7 @@ ENDPROC(name)
|
|
|
|
#else
|
|
#define UNWIND_ESPFIX_STACK
|
|
-#endif
|
|
-
|
|
-KPROBE_ENTRY(page_fault)
|
|
- RING0_EC_FRAME
|
|
- pushl $do_page_fault
|
|
- CFI_ADJUST_CFA_OFFSET 4
|
|
- ALIGN
|
|
-error_code:
|
|
- /* the function address is in %fs's slot on the stack */
|
|
- pushl %es
|
|
- CFI_ADJUST_CFA_OFFSET 4
|
|
- /*CFI_REL_OFFSET es, 0*/
|
|
- pushl %ds
|
|
- CFI_ADJUST_CFA_OFFSET 4
|
|
- /*CFI_REL_OFFSET ds, 0*/
|
|
- pushl %eax
|
|
- CFI_ADJUST_CFA_OFFSET 4
|
|
- CFI_REL_OFFSET eax, 0
|
|
- pushl %ebp
|
|
- CFI_ADJUST_CFA_OFFSET 4
|
|
- CFI_REL_OFFSET ebp, 0
|
|
- pushl %edi
|
|
- CFI_ADJUST_CFA_OFFSET 4
|
|
- CFI_REL_OFFSET edi, 0
|
|
- pushl %esi
|
|
- CFI_ADJUST_CFA_OFFSET 4
|
|
- CFI_REL_OFFSET esi, 0
|
|
- pushl %edx
|
|
- CFI_ADJUST_CFA_OFFSET 4
|
|
- CFI_REL_OFFSET edx, 0
|
|
- pushl %ecx
|
|
- CFI_ADJUST_CFA_OFFSET 4
|
|
- CFI_REL_OFFSET ecx, 0
|
|
- pushl %ebx
|
|
- CFI_ADJUST_CFA_OFFSET 4
|
|
- CFI_REL_OFFSET ebx, 0
|
|
- cld
|
|
- pushl %fs
|
|
- CFI_ADJUST_CFA_OFFSET 4
|
|
- /*CFI_REL_OFFSET fs, 0*/
|
|
- movl $(__KERNEL_PERCPU), %ecx
|
|
- movl %ecx, %fs
|
|
- UNWIND_ESPFIX_STACK
|
|
- popl %ecx
|
|
- CFI_ADJUST_CFA_OFFSET -4
|
|
- /*CFI_REGISTER es, ecx*/
|
|
- movl PT_FS(%esp), %edi # get the function address
|
|
- movl PT_ORIG_EAX(%esp), %edx # get the error code
|
|
- movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
|
|
- mov %ecx, PT_FS(%esp)
|
|
- /*CFI_REL_OFFSET fs, ES*/
|
|
- movl $(__USER_DS), %ecx
|
|
- movl %ecx, %ds
|
|
- movl %ecx, %es
|
|
- TRACE_IRQS_OFF
|
|
- movl %esp,%eax # pt_regs pointer
|
|
- call *%edi
|
|
- jmp ret_from_exception
|
|
- CFI_ENDPROC
|
|
-KPROBE_END(page_fault)
|
|
|
|
-#ifdef CONFIG_XEN
|
|
# A note on the "critical region" in our callback handler.
|
|
# We want to avoid stacking callback handlers due to events occurring
|
|
# during handling of the last event. To do this, we keep events disabled
|
|
@@ -981,158 +930,6 @@ ENTRY(device_not_available)
|
|
CFI_ENDPROC
|
|
END(device_not_available)
|
|
|
|
-#ifndef CONFIG_XEN
|
|
-/*
|
|
- * Debug traps and NMI can happen at the one SYSENTER instruction
|
|
- * that sets up the real kernel stack. Check here, since we can't
|
|
- * allow the wrong stack to be used.
|
|
- *
|
|
- * "SYSENTER_stack_sp0+12" is because the NMI/debug handler will have
|
|
- * already pushed 3 words if it hits on the sysenter instruction:
|
|
- * eflags, cs and eip.
|
|
- *
|
|
- * We just load the right stack, and push the three (known) values
|
|
- * by hand onto the new stack - while updating the return eip past
|
|
- * the instruction that would have done it for sysenter.
|
|
- */
|
|
-#define FIX_STACK(offset, ok, label) \
|
|
- cmpw $__KERNEL_CS,4(%esp); \
|
|
- jne ok; \
|
|
-label: \
|
|
- movl SYSENTER_stack_sp0+offset(%esp),%esp; \
|
|
- CFI_DEF_CFA esp, 0; \
|
|
- CFI_UNDEFINED eip; \
|
|
- pushfl; \
|
|
- CFI_ADJUST_CFA_OFFSET 4; \
|
|
- pushl $__KERNEL_CS; \
|
|
- CFI_ADJUST_CFA_OFFSET 4; \
|
|
- pushl $sysenter_past_esp; \
|
|
- CFI_ADJUST_CFA_OFFSET 4; \
|
|
- CFI_REL_OFFSET eip, 0
|
|
-#endif /* CONFIG_XEN */
|
|
-
|
|
-KPROBE_ENTRY(debug)
|
|
- RING0_INT_FRAME
|
|
-#ifndef CONFIG_XEN
|
|
- cmpl $ia32_sysenter_target,(%esp)
|
|
- jne debug_stack_correct
|
|
- FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
|
|
-debug_stack_correct:
|
|
-#endif /* !CONFIG_XEN */
|
|
- pushl $-1 # mark this as an int
|
|
- CFI_ADJUST_CFA_OFFSET 4
|
|
- SAVE_ALL
|
|
- TRACE_IRQS_OFF
|
|
- xorl %edx,%edx # error code 0
|
|
- movl %esp,%eax # pt_regs pointer
|
|
- call do_debug
|
|
- jmp ret_from_exception
|
|
- CFI_ENDPROC
|
|
-KPROBE_END(debug)
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
-/*
|
|
- * NMI is doubly nasty. It can happen _while_ we're handling
|
|
- * a debug fault, and the debug fault hasn't yet been able to
|
|
- * clear up the stack. So we first check whether we got an
|
|
- * NMI on the sysenter entry path, but after that we need to
|
|
- * check whether we got an NMI on the debug path where the debug
|
|
- * fault happened on the sysenter path.
|
|
- */
|
|
-KPROBE_ENTRY(nmi)
|
|
- RING0_INT_FRAME
|
|
- pushl %eax
|
|
- CFI_ADJUST_CFA_OFFSET 4
|
|
- movl %ss, %eax
|
|
- cmpw $__ESPFIX_SS, %ax
|
|
- popl %eax
|
|
- CFI_ADJUST_CFA_OFFSET -4
|
|
- je nmi_espfix_stack
|
|
- cmpl $ia32_sysenter_target,(%esp)
|
|
- je nmi_stack_fixup
|
|
- pushl %eax
|
|
- CFI_ADJUST_CFA_OFFSET 4
|
|
- movl %esp,%eax
|
|
- /* Do not access memory above the end of our stack page,
|
|
- * it might not exist.
|
|
- */
|
|
- andl $(THREAD_SIZE-1),%eax
|
|
- cmpl $(THREAD_SIZE-20),%eax
|
|
- popl %eax
|
|
- CFI_ADJUST_CFA_OFFSET -4
|
|
- jae nmi_stack_correct
|
|
- cmpl $ia32_sysenter_target,12(%esp)
|
|
- je nmi_debug_stack_check
|
|
-nmi_stack_correct:
|
|
- /* We have a RING0_INT_FRAME here */
|
|
- pushl %eax
|
|
- CFI_ADJUST_CFA_OFFSET 4
|
|
- SAVE_ALL
|
|
- TRACE_IRQS_OFF
|
|
- xorl %edx,%edx # zero error code
|
|
- movl %esp,%eax # pt_regs pointer
|
|
- call do_nmi
|
|
- jmp restore_nocheck_notrace
|
|
- CFI_ENDPROC
|
|
-
|
|
-nmi_stack_fixup:
|
|
- RING0_INT_FRAME
|
|
- FIX_STACK(12,nmi_stack_correct, 1)
|
|
- jmp nmi_stack_correct
|
|
-
|
|
-nmi_debug_stack_check:
|
|
- /* We have a RING0_INT_FRAME here */
|
|
- cmpw $__KERNEL_CS,16(%esp)
|
|
- jne nmi_stack_correct
|
|
- cmpl $debug,(%esp)
|
|
- jb nmi_stack_correct
|
|
- cmpl $debug_esp_fix_insn,(%esp)
|
|
- ja nmi_stack_correct
|
|
- FIX_STACK(24,nmi_stack_correct, 1)
|
|
- jmp nmi_stack_correct
|
|
-
|
|
-nmi_espfix_stack:
|
|
- /* We have a RING0_INT_FRAME here.
|
|
- *
|
|
- * create the pointer to lss back
|
|
- */
|
|
- pushl %ss
|
|
- CFI_ADJUST_CFA_OFFSET 4
|
|
- pushl %esp
|
|
- CFI_ADJUST_CFA_OFFSET 4
|
|
- addw $4, (%esp)
|
|
- /* copy the iret frame of 12 bytes */
|
|
- .rept 3
|
|
- pushl 16(%esp)
|
|
- CFI_ADJUST_CFA_OFFSET 4
|
|
- .endr
|
|
- pushl %eax
|
|
- CFI_ADJUST_CFA_OFFSET 4
|
|
- SAVE_ALL
|
|
- TRACE_IRQS_OFF
|
|
- FIXUP_ESPFIX_STACK # %eax == %esp
|
|
- xorl %edx,%edx # zero error code
|
|
- call do_nmi
|
|
- RESTORE_REGS
|
|
- lss 12+4(%esp), %esp # back to espfix stack
|
|
- CFI_ADJUST_CFA_OFFSET -24
|
|
- jmp irq_return
|
|
- CFI_ENDPROC
|
|
-#else
|
|
-KPROBE_ENTRY(nmi)
|
|
- RING0_INT_FRAME
|
|
- pushl %eax
|
|
- CFI_ADJUST_CFA_OFFSET 4
|
|
- SAVE_ALL
|
|
- xorl %edx,%edx # zero error code
|
|
- movl %esp,%eax # pt_regs pointer
|
|
- call do_nmi
|
|
- orl $NMI_MASK, PT_EFLAGS(%esp)
|
|
- jmp restore_all
|
|
- CFI_ENDPROC
|
|
-#endif
|
|
-KPROBE_END(nmi)
|
|
-
|
|
#ifdef CONFIG_PARAVIRT
|
|
ENTRY(native_iret)
|
|
iret
|
|
@@ -1148,19 +945,6 @@ ENTRY(native_irq_enable_sysexit)
|
|
END(native_irq_enable_sysexit)
|
|
#endif
|
|
|
|
-KPROBE_ENTRY(int3)
|
|
- RING0_INT_FRAME
|
|
- pushl $-1 # mark this as an int
|
|
- CFI_ADJUST_CFA_OFFSET 4
|
|
- SAVE_ALL
|
|
- TRACE_IRQS_OFF
|
|
- xorl %edx,%edx # zero error code
|
|
- movl %esp,%eax # pt_regs pointer
|
|
- call do_int3
|
|
- jmp ret_from_exception
|
|
- CFI_ENDPROC
|
|
-KPROBE_END(int3)
|
|
-
|
|
ENTRY(overflow)
|
|
RING0_INT_FRAME
|
|
pushl $0
|
|
@@ -1225,14 +1009,6 @@ ENTRY(stack_segment)
|
|
CFI_ENDPROC
|
|
END(stack_segment)
|
|
|
|
-KPROBE_ENTRY(general_protection)
|
|
- RING0_EC_FRAME
|
|
- pushl $do_general_protection
|
|
- CFI_ADJUST_CFA_OFFSET 4
|
|
- jmp error_code
|
|
- CFI_ENDPROC
|
|
-KPROBE_END(general_protection)
|
|
-
|
|
ENTRY(alignment_check)
|
|
RING0_EC_FRAME
|
|
pushl $do_alignment_check
|
|
@@ -1292,6 +1068,7 @@ ENTRY(kernel_thread_helper)
|
|
push %eax
|
|
CFI_ADJUST_CFA_OFFSET 4
|
|
call do_exit
|
|
+ ud2 # padding for call trace
|
|
CFI_ENDPROC
|
|
ENDPROC(kernel_thread_helper)
|
|
|
|
@@ -1303,6 +1080,9 @@ ENTRY(mcount)
|
|
END(mcount)
|
|
|
|
ENTRY(ftrace_caller)
|
|
+ cmpl $0, function_trace_stop
|
|
+ jne ftrace_stub
|
|
+
|
|
pushl %eax
|
|
pushl %ecx
|
|
pushl %edx
|
|
@@ -1317,6 +1097,11 @@ ftrace_call:
|
|
popl %edx
|
|
popl %ecx
|
|
popl %eax
|
|
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
|
+.globl ftrace_graph_call
|
|
+ftrace_graph_call:
|
|
+ jmp ftrace_stub
|
|
+#endif
|
|
|
|
.globl ftrace_stub
|
|
ftrace_stub:
|
|
@@ -1326,8 +1111,18 @@ END(ftrace_caller)
|
|
#else /* ! CONFIG_DYNAMIC_FTRACE */
|
|
|
|
ENTRY(mcount)
|
|
+ cmpl $0, function_trace_stop
|
|
+ jne ftrace_stub
|
|
+
|
|
cmpl $ftrace_stub, ftrace_trace_function
|
|
jnz trace
|
|
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
|
+ cmpl $ftrace_stub, ftrace_graph_return
|
|
+ jnz ftrace_graph_caller
|
|
+
|
|
+ cmpl $ftrace_graph_entry_stub, ftrace_graph_entry
|
|
+ jnz ftrace_graph_caller
|
|
+#endif
|
|
.globl ftrace_stub
|
|
ftrace_stub:
|
|
ret
|
|
@@ -1346,12 +1141,43 @@ trace:
|
|
popl %edx
|
|
popl %ecx
|
|
popl %eax
|
|
-
|
|
jmp ftrace_stub
|
|
END(mcount)
|
|
#endif /* CONFIG_DYNAMIC_FTRACE */
|
|
#endif /* CONFIG_FUNCTION_TRACER */
|
|
|
|
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
|
+ENTRY(ftrace_graph_caller)
|
|
+ cmpl $0, function_trace_stop
|
|
+ jne ftrace_stub
|
|
+
|
|
+ pushl %eax
|
|
+ pushl %ecx
|
|
+ pushl %edx
|
|
+ movl 0xc(%esp), %edx
|
|
+ lea 0x4(%ebp), %eax
|
|
+ subl $MCOUNT_INSN_SIZE, %edx
|
|
+ call prepare_ftrace_return
|
|
+ popl %edx
|
|
+ popl %ecx
|
|
+ popl %eax
|
|
+ ret
|
|
+END(ftrace_graph_caller)
|
|
+
|
|
+.globl return_to_handler
|
|
+return_to_handler:
|
|
+ pushl $0
|
|
+ pushl %eax
|
|
+ pushl %ecx
|
|
+ pushl %edx
|
|
+ call ftrace_return_to_handler
|
|
+ movl %eax, 0xc(%esp)
|
|
+ popl %edx
|
|
+ popl %ecx
|
|
+ popl %eax
|
|
+ ret
|
|
+#endif
|
|
+
|
|
#include <asm/alternative-asm.h>
|
|
|
|
# pv syscall call handler stub
|
|
@@ -1485,3 +1311,238 @@ mask=0
|
|
#undef sys_fork
|
|
#undef sys_clone
|
|
#undef sys_vfork
|
|
+
|
|
+/*
|
|
+ * Some functions should be protected against kprobes
|
|
+ */
|
|
+ .pushsection .kprobes.text, "ax"
|
|
+
|
|
+ENTRY(page_fault)
|
|
+ RING0_EC_FRAME
|
|
+ pushl $do_page_fault
|
|
+ CFI_ADJUST_CFA_OFFSET 4
|
|
+ ALIGN
|
|
+error_code:
|
|
+ /* the function address is in %fs's slot on the stack */
|
|
+ pushl %es
|
|
+ CFI_ADJUST_CFA_OFFSET 4
|
|
+ /*CFI_REL_OFFSET es, 0*/
|
|
+ pushl %ds
|
|
+ CFI_ADJUST_CFA_OFFSET 4
|
|
+ /*CFI_REL_OFFSET ds, 0*/
|
|
+ pushl %eax
|
|
+ CFI_ADJUST_CFA_OFFSET 4
|
|
+ CFI_REL_OFFSET eax, 0
|
|
+ pushl %ebp
|
|
+ CFI_ADJUST_CFA_OFFSET 4
|
|
+ CFI_REL_OFFSET ebp, 0
|
|
+ pushl %edi
|
|
+ CFI_ADJUST_CFA_OFFSET 4
|
|
+ CFI_REL_OFFSET edi, 0
|
|
+ pushl %esi
|
|
+ CFI_ADJUST_CFA_OFFSET 4
|
|
+ CFI_REL_OFFSET esi, 0
|
|
+ pushl %edx
|
|
+ CFI_ADJUST_CFA_OFFSET 4
|
|
+ CFI_REL_OFFSET edx, 0
|
|
+ pushl %ecx
|
|
+ CFI_ADJUST_CFA_OFFSET 4
|
|
+ CFI_REL_OFFSET ecx, 0
|
|
+ pushl %ebx
|
|
+ CFI_ADJUST_CFA_OFFSET 4
|
|
+ CFI_REL_OFFSET ebx, 0
|
|
+ cld
|
|
+ pushl %fs
|
|
+ CFI_ADJUST_CFA_OFFSET 4
|
|
+ /*CFI_REL_OFFSET fs, 0*/
|
|
+ movl $(__KERNEL_PERCPU), %ecx
|
|
+ movl %ecx, %fs
|
|
+ UNWIND_ESPFIX_STACK
|
|
+ popl %ecx
|
|
+ CFI_ADJUST_CFA_OFFSET -4
|
|
+ /*CFI_REGISTER es, ecx*/
|
|
+ movl PT_FS(%esp), %edi # get the function address
|
|
+ movl PT_ORIG_EAX(%esp), %edx # get the error code
|
|
+ movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
|
|
+ mov %ecx, PT_FS(%esp)
|
|
+ /*CFI_REL_OFFSET fs, ES*/
|
|
+ movl $(__USER_DS), %ecx
|
|
+ movl %ecx, %ds
|
|
+ movl %ecx, %es
|
|
+ TRACE_IRQS_OFF
|
|
+ movl %esp,%eax # pt_regs pointer
|
|
+ call *%edi
|
|
+ jmp ret_from_exception
|
|
+ CFI_ENDPROC
|
|
+END(page_fault)
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+/*
|
|
+ * Debug traps and NMI can happen at the one SYSENTER instruction
|
|
+ * that sets up the real kernel stack. Check here, since we can't
|
|
+ * allow the wrong stack to be used.
|
|
+ *
|
|
+ * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have
|
|
+ * already pushed 3 words if it hits on the sysenter instruction:
|
|
+ * eflags, cs and eip.
|
|
+ *
|
|
+ * We just load the right stack, and push the three (known) values
|
|
+ * by hand onto the new stack - while updating the return eip past
|
|
+ * the instruction that would have done it for sysenter.
|
|
+ */
|
|
+#define FIX_STACK(offset, ok, label) \
|
|
+ cmpw $__KERNEL_CS,4(%esp); \
|
|
+ jne ok; \
|
|
+label: \
|
|
+ movl TSS_sysenter_sp0+offset(%esp),%esp; \
|
|
+ CFI_DEF_CFA esp, 0; \
|
|
+ CFI_UNDEFINED eip; \
|
|
+ pushfl; \
|
|
+ CFI_ADJUST_CFA_OFFSET 4; \
|
|
+ pushl $__KERNEL_CS; \
|
|
+ CFI_ADJUST_CFA_OFFSET 4; \
|
|
+ pushl $sysenter_past_esp; \
|
|
+ CFI_ADJUST_CFA_OFFSET 4; \
|
|
+ CFI_REL_OFFSET eip, 0
|
|
+#endif /* CONFIG_XEN */
|
|
+
|
|
+ENTRY(debug)
|
|
+ RING0_INT_FRAME
|
|
+#ifndef CONFIG_XEN
|
|
+ cmpl $ia32_sysenter_target,(%esp)
|
|
+ jne debug_stack_correct
|
|
+ FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
|
|
+debug_stack_correct:
|
|
+#endif /* !CONFIG_XEN */
|
|
+ pushl $-1 # mark this as an int
|
|
+ CFI_ADJUST_CFA_OFFSET 4
|
|
+ SAVE_ALL
|
|
+ TRACE_IRQS_OFF
|
|
+ xorl %edx,%edx # error code 0
|
|
+ movl %esp,%eax # pt_regs pointer
|
|
+ call do_debug
|
|
+ jmp ret_from_exception
|
|
+ CFI_ENDPROC
|
|
+END(debug)
|
|
+
|
|
+/*
|
|
+ * NMI is doubly nasty. It can happen _while_ we're handling
|
|
+ * a debug fault, and the debug fault hasn't yet been able to
|
|
+ * clear up the stack. So we first check whether we got an
|
|
+ * NMI on the sysenter entry path, but after that we need to
|
|
+ * check whether we got an NMI on the debug path where the debug
|
|
+ * fault happened on the sysenter path.
|
|
+ */
|
|
+ENTRY(nmi)
|
|
+ RING0_INT_FRAME
|
|
+ pushl %eax
|
|
+ CFI_ADJUST_CFA_OFFSET 4
|
|
+#ifndef CONFIG_XEN
|
|
+ movl %ss, %eax
|
|
+ cmpw $__ESPFIX_SS, %ax
|
|
+ popl %eax
|
|
+ CFI_ADJUST_CFA_OFFSET -4
|
|
+ je nmi_espfix_stack
|
|
+ cmpl $ia32_sysenter_target,(%esp)
|
|
+ je nmi_stack_fixup
|
|
+ pushl %eax
|
|
+ CFI_ADJUST_CFA_OFFSET 4
|
|
+ movl %esp,%eax
|
|
+ /* Do not access memory above the end of our stack page,
|
|
+ * it might not exist.
|
|
+ */
|
|
+ andl $(THREAD_SIZE-1),%eax
|
|
+ cmpl $(THREAD_SIZE-20),%eax
|
|
+ popl %eax
|
|
+ CFI_ADJUST_CFA_OFFSET -4
|
|
+ jae nmi_stack_correct
|
|
+ cmpl $ia32_sysenter_target,12(%esp)
|
|
+ je nmi_debug_stack_check
|
|
+nmi_stack_correct:
|
|
+ /* We have a RING0_INT_FRAME here */
|
|
+ pushl %eax
|
|
+ CFI_ADJUST_CFA_OFFSET 4
|
|
+ SAVE_ALL
|
|
+ xorl %edx,%edx # zero error code
|
|
+ movl %esp,%eax # pt_regs pointer
|
|
+ call do_nmi
|
|
+ jmp restore_nocheck_notrace
|
|
+ CFI_ENDPROC
|
|
+
|
|
+nmi_stack_fixup:
|
|
+ RING0_INT_FRAME
|
|
+ FIX_STACK(12,nmi_stack_correct, 1)
|
|
+ jmp nmi_stack_correct
|
|
+
|
|
+nmi_debug_stack_check:
|
|
+ /* We have a RING0_INT_FRAME here */
|
|
+ cmpw $__KERNEL_CS,16(%esp)
|
|
+ jne nmi_stack_correct
|
|
+ cmpl $debug,(%esp)
|
|
+ jb nmi_stack_correct
|
|
+ cmpl $debug_esp_fix_insn,(%esp)
|
|
+ ja nmi_stack_correct
|
|
+ FIX_STACK(24,nmi_stack_correct, 1)
|
|
+ jmp nmi_stack_correct
|
|
+
|
|
+nmi_espfix_stack:
|
|
+ /* We have a RING0_INT_FRAME here.
|
|
+ *
|
|
+ * create the pointer to lss back
|
|
+ */
|
|
+ pushl %ss
|
|
+ CFI_ADJUST_CFA_OFFSET 4
|
|
+ pushl %esp
|
|
+ CFI_ADJUST_CFA_OFFSET 4
|
|
+ addw $4, (%esp)
|
|
+ /* copy the iret frame of 12 bytes */
|
|
+ .rept 3
|
|
+ pushl 16(%esp)
|
|
+ CFI_ADJUST_CFA_OFFSET 4
|
|
+ .endr
|
|
+ pushl %eax
|
|
+ CFI_ADJUST_CFA_OFFSET 4
|
|
+ SAVE_ALL
|
|
+ FIXUP_ESPFIX_STACK # %eax == %esp
|
|
+ xorl %edx,%edx # zero error code
|
|
+ call do_nmi
|
|
+ RESTORE_REGS
|
|
+ lss 12+4(%esp), %esp # back to espfix stack
|
|
+ CFI_ADJUST_CFA_OFFSET -24
|
|
+ jmp irq_return
|
|
+#else
|
|
+ SAVE_ALL
|
|
+ xorl %edx,%edx # zero error code
|
|
+ movl %esp,%eax # pt_regs pointer
|
|
+ call do_nmi
|
|
+ orl $NMI_MASK, PT_EFLAGS(%esp)
|
|
+ jmp restore_all
|
|
+#endif
|
|
+ CFI_ENDPROC
|
|
+END(nmi)
|
|
+
|
|
+ENTRY(int3)
|
|
+ RING0_INT_FRAME
|
|
+ pushl $-1 # mark this as an int
|
|
+ CFI_ADJUST_CFA_OFFSET 4
|
|
+ SAVE_ALL
|
|
+ TRACE_IRQS_OFF
|
|
+ xorl %edx,%edx # zero error code
|
|
+ movl %esp,%eax # pt_regs pointer
|
|
+ call do_int3
|
|
+ jmp ret_from_exception
|
|
+ CFI_ENDPROC
|
|
+END(int3)
|
|
+
|
|
+ENTRY(general_protection)
|
|
+ RING0_EC_FRAME
|
|
+ pushl $do_general_protection
|
|
+ CFI_ADJUST_CFA_OFFSET 4
|
|
+ jmp error_code
|
|
+ CFI_ENDPROC
|
|
+END(general_protection)
|
|
+
|
|
+/*
|
|
+ * End of kprobes section
|
|
+ */
|
|
+ .popsection
|
|
--- head-2011-03-17.orig/arch/x86/kernel/entry_64-xen.S 2011-02-01 14:39:24.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/entry_64-xen.S 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -14,15 +14,15 @@
|
|
*
|
|
* NOTE: This code handles signal-recognition, which happens every time
|
|
* after an interrupt and after each system call.
|
|
- *
|
|
- * Normal syscalls and interrupts don't save a full stack frame, this is
|
|
+ *
|
|
+ * Normal syscalls and interrupts don't save a full stack frame, this is
|
|
* only done for syscall tracing, signals or fork/exec et.al.
|
|
- *
|
|
- * A note on terminology:
|
|
- * - top of stack: Architecture defined interrupt frame from SS to RIP
|
|
- * at the top of the kernel process stack.
|
|
+ *
|
|
+ * A note on terminology:
|
|
+ * - top of stack: Architecture defined interrupt frame from SS to RIP
|
|
+ * at the top of the kernel process stack.
|
|
* - partial stack frame: partially saved registers upto R11.
|
|
- * - full stack frame: Like partial stack frame, but all register saved.
|
|
+ * - full stack frame: Like partial stack frame, but all register saved.
|
|
*
|
|
* Some macro usage:
|
|
* - CFI macros are used to generate dwarf2 unwind information for better
|
|
@@ -65,7 +65,6 @@
|
|
#define __AUDIT_ARCH_LE 0x40000000
|
|
|
|
.code64
|
|
-
|
|
#ifdef CONFIG_FUNCTION_TRACER
|
|
#ifdef CONFIG_DYNAMIC_FTRACE
|
|
ENTRY(mcount)
|
|
@@ -73,16 +72,10 @@ ENTRY(mcount)
|
|
END(mcount)
|
|
|
|
ENTRY(ftrace_caller)
|
|
+ cmpl $0, function_trace_stop
|
|
+ jne ftrace_stub
|
|
|
|
- /* taken from glibc */
|
|
- subq $0x38, %rsp
|
|
- movq %rax, (%rsp)
|
|
- movq %rcx, 8(%rsp)
|
|
- movq %rdx, 16(%rsp)
|
|
- movq %rsi, 24(%rsp)
|
|
- movq %rdi, 32(%rsp)
|
|
- movq %r8, 40(%rsp)
|
|
- movq %r9, 48(%rsp)
|
|
+ MCOUNT_SAVE_FRAME
|
|
|
|
movq 0x38(%rsp), %rdi
|
|
movq 8(%rbp), %rsi
|
|
@@ -92,14 +85,13 @@ ENTRY(ftrace_caller)
|
|
ftrace_call:
|
|
call ftrace_stub
|
|
|
|
- movq 48(%rsp), %r9
|
|
- movq 40(%rsp), %r8
|
|
- movq 32(%rsp), %rdi
|
|
- movq 24(%rsp), %rsi
|
|
- movq 16(%rsp), %rdx
|
|
- movq 8(%rsp), %rcx
|
|
- movq (%rsp), %rax
|
|
- addq $0x38, %rsp
|
|
+ MCOUNT_RESTORE_FRAME
|
|
+
|
|
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
|
+.globl ftrace_graph_call
|
|
+ftrace_graph_call:
|
|
+ jmp ftrace_stub
|
|
+#endif
|
|
|
|
.globl ftrace_stub
|
|
ftrace_stub:
|
|
@@ -108,15 +100,63 @@ END(ftrace_caller)
|
|
|
|
#else /* ! CONFIG_DYNAMIC_FTRACE */
|
|
ENTRY(mcount)
|
|
+ cmpl $0, function_trace_stop
|
|
+ jne ftrace_stub
|
|
+
|
|
cmpq $ftrace_stub, ftrace_trace_function
|
|
jnz trace
|
|
+
|
|
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
|
+ cmpq $ftrace_stub, ftrace_graph_return
|
|
+ jnz ftrace_graph_caller
|
|
+
|
|
+ cmpq $ftrace_graph_entry_stub, ftrace_graph_entry
|
|
+ jnz ftrace_graph_caller
|
|
+#endif
|
|
+
|
|
.globl ftrace_stub
|
|
ftrace_stub:
|
|
retq
|
|
|
|
trace:
|
|
- /* taken from glibc */
|
|
- subq $0x38, %rsp
|
|
+ MCOUNT_SAVE_FRAME
|
|
+
|
|
+ movq 0x38(%rsp), %rdi
|
|
+ movq 8(%rbp), %rsi
|
|
+ subq $MCOUNT_INSN_SIZE, %rdi
|
|
+
|
|
+ call *ftrace_trace_function
|
|
+
|
|
+ MCOUNT_RESTORE_FRAME
|
|
+
|
|
+ jmp ftrace_stub
|
|
+END(mcount)
|
|
+#endif /* CONFIG_DYNAMIC_FTRACE */
|
|
+#endif /* CONFIG_FUNCTION_TRACER */
|
|
+
|
|
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
|
+ENTRY(ftrace_graph_caller)
|
|
+ cmpl $0, function_trace_stop
|
|
+ jne ftrace_stub
|
|
+
|
|
+ MCOUNT_SAVE_FRAME
|
|
+
|
|
+ leaq 8(%rbp), %rdi
|
|
+ movq 0x38(%rsp), %rsi
|
|
+ subq $MCOUNT_INSN_SIZE, %rsi
|
|
+
|
|
+ call prepare_ftrace_return
|
|
+
|
|
+ MCOUNT_RESTORE_FRAME
|
|
+
|
|
+ retq
|
|
+END(ftrace_graph_caller)
|
|
+
|
|
+
|
|
+.globl return_to_handler
|
|
+return_to_handler:
|
|
+ subq $80, %rsp
|
|
+
|
|
movq %rax, (%rsp)
|
|
movq %rcx, 8(%rsp)
|
|
movq %rdx, 16(%rsp)
|
|
@@ -124,13 +164,14 @@ trace:
|
|
movq %rdi, 32(%rsp)
|
|
movq %r8, 40(%rsp)
|
|
movq %r9, 48(%rsp)
|
|
+ movq %r10, 56(%rsp)
|
|
+ movq %r11, 64(%rsp)
|
|
|
|
- movq 0x38(%rsp), %rdi
|
|
- movq 8(%rbp), %rsi
|
|
- subq $MCOUNT_INSN_SIZE, %rdi
|
|
-
|
|
- call *ftrace_trace_function
|
|
+ call ftrace_return_to_handler
|
|
|
|
+ movq %rax, 72(%rsp)
|
|
+ movq 64(%rsp), %r11
|
|
+ movq 56(%rsp), %r10
|
|
movq 48(%rsp), %r9
|
|
movq 40(%rsp), %r8
|
|
movq 32(%rsp), %rdi
|
|
@@ -138,16 +179,14 @@ trace:
|
|
movq 16(%rsp), %rdx
|
|
movq 8(%rsp), %rcx
|
|
movq (%rsp), %rax
|
|
- addq $0x38, %rsp
|
|
+ addq $72, %rsp
|
|
+ retq
|
|
+#endif
|
|
|
|
- jmp ftrace_stub
|
|
-END(mcount)
|
|
-#endif /* CONFIG_DYNAMIC_FTRACE */
|
|
-#endif /* CONFIG_FUNCTION_TRACER */
|
|
|
|
#ifndef CONFIG_PREEMPT
|
|
#define retint_kernel retint_restore_args
|
|
-#endif
|
|
+#endif
|
|
|
|
|
|
.macro TRACE_IRQS_IRETQ offset=ARGOFFSET
|
|
@@ -162,20 +201,20 @@ END(mcount)
|
|
NMI_MASK = 0x80000000
|
|
|
|
/*
|
|
- * C code is not supposed to know about undefined top of stack. Every time
|
|
- * a C function with an pt_regs argument is called from the SYSCALL based
|
|
+ * C code is not supposed to know about undefined top of stack. Every time
|
|
+ * a C function with an pt_regs argument is called from the SYSCALL based
|
|
* fast path FIXUP_TOP_OF_STACK is needed.
|
|
* RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
|
|
* manipulation.
|
|
- */
|
|
-
|
|
- /* %rsp:at FRAMEEND */
|
|
- .macro FIXUP_TOP_OF_STACK tmp
|
|
- movq $__USER_CS,CS(%rsp)
|
|
- movq $-1,RCX(%rsp)
|
|
+ */
|
|
+
|
|
+ /* %rsp:at FRAMEEND */
|
|
+ .macro FIXUP_TOP_OF_STACK tmp offset=0
|
|
+ movq $__USER_CS,CS+\offset(%rsp)
|
|
+ movq $-1,RCX+\offset(%rsp)
|
|
.endm
|
|
|
|
- .macro RESTORE_TOP_OF_STACK tmp,offset=0
|
|
+ .macro RESTORE_TOP_OF_STACK tmp offset=0
|
|
.endm
|
|
|
|
.macro FAKE_STACK_FRAME child_rip
|
|
@@ -187,7 +226,7 @@ NMI_MASK = 0x80000000
|
|
pushq %rax /* rsp */
|
|
CFI_ADJUST_CFA_OFFSET 8
|
|
CFI_REL_OFFSET rsp,0
|
|
- pushq $(1<<9) /* eflags - interrupts on */
|
|
+ pushq $X86_EFLAGS_IF /* eflags - interrupts on */
|
|
CFI_ADJUST_CFA_OFFSET 8
|
|
/*CFI_REL_OFFSET rflags,0*/
|
|
pushq $__KERNEL_CS /* cs */
|
|
@@ -205,36 +244,80 @@ NMI_MASK = 0x80000000
|
|
CFI_ADJUST_CFA_OFFSET -(6*8)
|
|
.endm
|
|
|
|
- .macro CFI_DEFAULT_STACK start=1,adj=0
|
|
+/*
|
|
+ * initial frame state for interrupts (and exceptions without error code)
|
|
+ */
|
|
+ .macro EMPTY_FRAME start=1 offset=0
|
|
.if \start
|
|
- CFI_STARTPROC simple
|
|
+ CFI_STARTPROC simple
|
|
CFI_SIGNAL_FRAME
|
|
- CFI_DEF_CFA rsp,SS+8 - \adj*ARGOFFSET
|
|
+ CFI_DEF_CFA rsp,8+\offset
|
|
.else
|
|
- CFI_DEF_CFA_OFFSET SS+8 - \adj*ARGOFFSET
|
|
+ CFI_DEF_CFA_OFFSET 8+\offset
|
|
.endif
|
|
- .if \adj == 0
|
|
- CFI_REL_OFFSET r15,R15
|
|
- CFI_REL_OFFSET r14,R14
|
|
- CFI_REL_OFFSET r13,R13
|
|
- CFI_REL_OFFSET r12,R12
|
|
- CFI_REL_OFFSET rbp,RBP
|
|
- CFI_REL_OFFSET rbx,RBX
|
|
+ .endm
|
|
+
|
|
+/*
|
|
+ * initial frame state for syscall
|
|
+ */
|
|
+ .macro BASIC_FRAME start=1 offset=0
|
|
+ EMPTY_FRAME \start, SS+8+\offset-RIP
|
|
+ /*CFI_REL_OFFSET ss, SS+\offset-RIP*/
|
|
+ CFI_REL_OFFSET rsp, RSP+\offset-RIP
|
|
+ /*CFI_REL_OFFSET rflags, EFLAGS+\offset-RIP*/
|
|
+ /*CFI_REL_OFFSET cs, CS+\offset-RIP*/
|
|
+ CFI_REL_OFFSET rip, RIP+\offset-RIP
|
|
+ .endm
|
|
+
|
|
+/*
|
|
+ * initial frame state for interrupts (and exceptions without error code)
|
|
+ */
|
|
+ .macro INTR_FRAME start=1 offset=0
|
|
+ .if \start == 1
|
|
+ BASIC_FRAME 1, \offset+2*8
|
|
+ CFI_REL_OFFSET rcx, 0+\offset
|
|
+ CFI_REL_OFFSET r11, 8+\offset
|
|
+ .else
|
|
+ BASIC_FRAME \start, \offset
|
|
.endif
|
|
- CFI_REL_OFFSET r11,R11 - \adj*ARGOFFSET
|
|
- CFI_REL_OFFSET r10,R10 - \adj*ARGOFFSET
|
|
- CFI_REL_OFFSET r9,R9 - \adj*ARGOFFSET
|
|
- CFI_REL_OFFSET r8,R8 - \adj*ARGOFFSET
|
|
- CFI_REL_OFFSET rax,RAX - \adj*ARGOFFSET
|
|
- CFI_REL_OFFSET rcx,RCX - \adj*ARGOFFSET
|
|
- CFI_REL_OFFSET rdx,RDX - \adj*ARGOFFSET
|
|
- CFI_REL_OFFSET rsi,RSI - \adj*ARGOFFSET
|
|
- CFI_REL_OFFSET rdi,RDI - \adj*ARGOFFSET
|
|
- CFI_REL_OFFSET rip,RIP - \adj*ARGOFFSET
|
|
- /*CFI_REL_OFFSET cs,CS - \adj*ARGOFFSET*/
|
|
- /*CFI_REL_OFFSET rflags,EFLAGS - \adj*ARGOFFSET*/
|
|
- CFI_REL_OFFSET rsp,RSP - \adj*ARGOFFSET
|
|
- /*CFI_REL_OFFSET ss,SS - \adj*ARGOFFSET*/
|
|
+ .endm
|
|
+
|
|
+/*
|
|
+ * initial frame state for exceptions with error code (and interrupts
|
|
+ * with vector already pushed)
|
|
+ */
|
|
+ .macro XCPT_FRAME start=1 offset=0
|
|
+ INTR_FRAME \start, RIP+\offset-ORIG_RAX
|
|
+ /*CFI_REL_OFFSET orig_rax, ORIG_RAX-ORIG_RAX*/
|
|
+ .endm
|
|
+
|
|
+/*
|
|
+ * frame that enables calling into C.
|
|
+ */
|
|
+ .macro PARTIAL_FRAME start=1 offset=0
|
|
+ XCPT_FRAME 2*\start, ORIG_RAX+\offset-ARGOFFSET
|
|
+ CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET
|
|
+ CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET
|
|
+ CFI_REL_OFFSET rdx, RDX+\offset-ARGOFFSET
|
|
+ CFI_REL_OFFSET rcx, RCX+\offset-ARGOFFSET
|
|
+ CFI_REL_OFFSET rax, RAX+\offset-ARGOFFSET
|
|
+ CFI_REL_OFFSET r8, R8+\offset-ARGOFFSET
|
|
+ CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET
|
|
+ CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET
|
|
+ CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET
|
|
+ .endm
|
|
+
|
|
+/*
|
|
+ * frame that enables passing a complete pt_regs to a C function.
|
|
+ */
|
|
+ .macro DEFAULT_FRAME start=1 offset=0
|
|
+ PARTIAL_FRAME \start, R11+\offset-R15
|
|
+ CFI_REL_OFFSET rbx, RBX+\offset
|
|
+ CFI_REL_OFFSET rbp, RBP+\offset
|
|
+ CFI_REL_OFFSET r12, R12+\offset
|
|
+ CFI_REL_OFFSET r13, R13+\offset
|
|
+ CFI_REL_OFFSET r14, R14+\offset
|
|
+ CFI_REL_OFFSET r15, R15+\offset
|
|
.endm
|
|
|
|
/*
|
|
@@ -264,70 +347,149 @@ NMI_MASK = 0x80000000
|
|
jmp hypercall_page + (__HYPERVISOR_iret * 32)
|
|
.endm
|
|
|
|
+#ifndef CONFIG_XEN
|
|
+/* save partial stack frame */
|
|
+ENTRY(save_args)
|
|
+ XCPT_FRAME
|
|
+ cld
|
|
+ movq_cfi rdi, RDI+16-ARGOFFSET
|
|
+ movq_cfi rsi, RSI+16-ARGOFFSET
|
|
+ movq_cfi rdx, RDX+16-ARGOFFSET
|
|
+ movq_cfi rcx, RCX+16-ARGOFFSET
|
|
+ movq_cfi rax, RAX+16-ARGOFFSET
|
|
+ movq_cfi r8, R8+16-ARGOFFSET
|
|
+ movq_cfi r9, R9+16-ARGOFFSET
|
|
+ movq_cfi r10, R10+16-ARGOFFSET
|
|
+ movq_cfi r11, R11+16-ARGOFFSET
|
|
+
|
|
+ leaq -ARGOFFSET+16(%rsp),%rdi /* arg1 for handler */
|
|
+ movq_cfi rbp, 8 /* push %rbp */
|
|
+ leaq 8(%rsp), %rbp /* mov %rsp, %ebp */
|
|
+ testl $3, CS(%rdi)
|
|
+ je 1f
|
|
+ SWAPGS
|
|
+ /*
|
|
+ * irqcount is used to check if a CPU is already on an interrupt stack
|
|
+ * or not. While this is essentially redundant with preempt_count it is
|
|
+ * a little cheaper to use a separate counter in the PDA (short of
|
|
+ * moving irq_enter into assembly, which would be too much work)
|
|
+ */
|
|
+1: incl %gs:pda_irqcount
|
|
+ jne 2f
|
|
+ popq_cfi %rax /* move return address... */
|
|
+ mov %gs:pda_irqstackptr,%rsp
|
|
+ EMPTY_FRAME 0
|
|
+ pushq_cfi %rbp /* backlink for unwinder */
|
|
+ pushq_cfi %rax /* ... to the new stack */
|
|
+ /*
|
|
+ * We entered an interrupt context - irqs are off:
|
|
+ */
|
|
+2: TRACE_IRQS_OFF
|
|
+ ret
|
|
+ CFI_ENDPROC
|
|
+END(save_args)
|
|
+#endif
|
|
+
|
|
+ENTRY(save_rest)
|
|
+ PARTIAL_FRAME 1 REST_SKIP+8
|
|
+ movq 5*8+16(%rsp), %r11 /* save return address */
|
|
+ movq_cfi rbx, RBX+16
|
|
+ movq_cfi rbp, RBP+16
|
|
+ movq_cfi r12, R12+16
|
|
+ movq_cfi r13, R13+16
|
|
+ movq_cfi r14, R14+16
|
|
+ movq_cfi r15, R15+16
|
|
+ movq %r11, 8(%rsp) /* return address */
|
|
+ FIXUP_TOP_OF_STACK %r11, 16
|
|
+ ret
|
|
+ CFI_ENDPROC
|
|
+END(save_rest)
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+/* save complete stack frame */
|
|
+ENTRY(save_paranoid)
|
|
+ XCPT_FRAME 1 RDI+8
|
|
+ cld
|
|
+ movq_cfi rdi, RDI+8
|
|
+ movq_cfi rsi, RSI+8
|
|
+ movq_cfi rdx, RDX+8
|
|
+ movq_cfi rcx, RCX+8
|
|
+ movq_cfi rax, RAX+8
|
|
+ movq_cfi r8, R8+8
|
|
+ movq_cfi r9, R9+8
|
|
+ movq_cfi r10, R10+8
|
|
+ movq_cfi r11, R11+8
|
|
+ movq_cfi rbx, RBX+8
|
|
+ movq_cfi rbp, RBP+8
|
|
+ movq_cfi r12, R12+8
|
|
+ movq_cfi r13, R13+8
|
|
+ movq_cfi r14, R14+8
|
|
+ movq_cfi r15, R15+8
|
|
+ movl $1,%ebx
|
|
+ movl $MSR_GS_BASE,%ecx
|
|
+ rdmsr
|
|
+ testl %edx,%edx
|
|
+ js 1f /* negative -> in kernel */
|
|
+ SWAPGS
|
|
+ xorl %ebx,%ebx
|
|
+1: ret
|
|
+ CFI_ENDPROC
|
|
+END(save_paranoid)
|
|
+#endif
|
|
+
|
|
/*
|
|
- * A newly forked process directly context switches into this.
|
|
- */
|
|
-/* rdi: prev */
|
|
+ * A newly forked process directly context switches into this address.
|
|
+ *
|
|
+ * rdi: prev task we switched from
|
|
+ */
|
|
ENTRY(ret_from_fork)
|
|
- CFI_DEFAULT_STACK
|
|
+ DEFAULT_FRAME
|
|
+
|
|
push kernel_eflags(%rip)
|
|
CFI_ADJUST_CFA_OFFSET 8
|
|
- popf # reset kernel eflags
|
|
+ popf # reset kernel eflags
|
|
CFI_ADJUST_CFA_OFFSET -8
|
|
- call schedule_tail
|
|
+
|
|
+ call schedule_tail # rdi: 'prev' task parameter
|
|
+
|
|
GET_THREAD_INFO(%rcx)
|
|
- testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
|
|
- jnz rff_trace
|
|
-rff_action:
|
|
+
|
|
+ CFI_REMEMBER_STATE
|
|
RESTORE_REST
|
|
- testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
|
|
+
|
|
+ testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread?
|
|
je int_ret_from_sys_call
|
|
- testl $_TIF_IA32,TI_flags(%rcx)
|
|
+
|
|
+ testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET
|
|
jnz int_ret_from_sys_call
|
|
- RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
|
|
- jmp ret_from_sys_call
|
|
-rff_trace:
|
|
- movq %rsp,%rdi
|
|
- call syscall_trace_leave
|
|
- GET_THREAD_INFO(%rcx)
|
|
- jmp rff_action
|
|
+
|
|
+ RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET
|
|
+ jmp ret_from_sys_call # go to the SYSRET fastpath
|
|
+
|
|
+ CFI_RESTORE_STATE
|
|
CFI_ENDPROC
|
|
END(ret_from_fork)
|
|
|
|
/*
|
|
- * initial frame state for interrupts and exceptions
|
|
- */
|
|
- .macro _frame ref
|
|
- CFI_STARTPROC simple
|
|
- CFI_SIGNAL_FRAME
|
|
- CFI_DEF_CFA rsp,SS+8-\ref
|
|
- /*CFI_REL_OFFSET ss,SS-\ref*/
|
|
- CFI_REL_OFFSET rsp,RSP-\ref
|
|
- /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/
|
|
- /*CFI_REL_OFFSET cs,CS-\ref*/
|
|
- CFI_REL_OFFSET rip,RIP-\ref
|
|
- .endm
|
|
-
|
|
-/*
|
|
* System call entry. Upto 6 arguments in registers are supported.
|
|
*
|
|
* SYSCALL does not save anything on the stack and does not change the
|
|
* stack pointer.
|
|
*/
|
|
-
|
|
+
|
|
/*
|
|
- * Register setup:
|
|
+ * Register setup:
|
|
* rax system call number
|
|
* rdi arg0
|
|
- * rcx return address for syscall/sysret, C arg3
|
|
+ * rcx return address for syscall/sysret, C arg3
|
|
* rsi arg1
|
|
- * rdx arg2
|
|
+ * rdx arg2
|
|
* r10 arg3 (--> moved to rcx for C)
|
|
* r8 arg4
|
|
* r9 arg5
|
|
* r11 eflags for syscall/sysret, temporary for C
|
|
- * r12-r15,rbp,rbx saved by C code, not touched.
|
|
- *
|
|
+ * r12-r15,rbp,rbx saved by C code, not touched.
|
|
+ *
|
|
* Interrupts are enabled on entry.
|
|
* Only called from user space.
|
|
*
|
|
@@ -337,10 +499,10 @@ END(ret_from_fork)
|
|
* When user can change the frames always force IRET. That is because
|
|
* it deals with uncanonical addresses better. SYSRET has trouble
|
|
* with them due to bugs in both AMD and Intel CPUs.
|
|
- */
|
|
+ */
|
|
|
|
ENTRY(system_call)
|
|
- _frame (RIP-0x10)
|
|
+ INTR_FRAME start=2 offset=2*8
|
|
SAVE_ARGS -8,0
|
|
movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
|
|
GET_THREAD_INFO(%rcx)
|
|
@@ -354,19 +516,19 @@ system_call_fastpath:
|
|
movq %rax,RAX-ARGOFFSET(%rsp)
|
|
/*
|
|
* Syscall return path ending with SYSRET (fast path)
|
|
- * Has incomplete stack frame and undefined top of stack.
|
|
- */
|
|
+ * Has incomplete stack frame and undefined top of stack.
|
|
+ */
|
|
ret_from_sys_call:
|
|
movl $_TIF_ALLWORK_MASK,%edi
|
|
/* edi: flagmask */
|
|
-sysret_check:
|
|
+sysret_check:
|
|
LOCKDEP_SYS_EXIT
|
|
GET_THREAD_INFO(%rcx)
|
|
DISABLE_INTERRUPTS(CLBR_NONE)
|
|
TRACE_IRQS_OFF
|
|
movl TI_flags(%rcx),%edx
|
|
andl %edi,%edx
|
|
- jnz sysret_careful
|
|
+ jnz sysret_careful
|
|
CFI_REMEMBER_STATE
|
|
/*
|
|
* sysretq will re-enable interrupts:
|
|
@@ -378,7 +540,7 @@ sysret_check:
|
|
|
|
CFI_RESTORE_STATE
|
|
/* Handle reschedules */
|
|
- /* edx: work, edi: workmask */
|
|
+ /* edx: work, edi: workmask */
|
|
sysret_careful:
|
|
bt $TIF_NEED_RESCHED,%edx
|
|
jnc sysret_signal
|
|
@@ -391,7 +553,7 @@ sysret_careful:
|
|
CFI_ADJUST_CFA_OFFSET -8
|
|
jmp sysret_check
|
|
|
|
- /* Handle a signal */
|
|
+ /* Handle a signal */
|
|
sysret_signal:
|
|
TRACE_IRQS_ON
|
|
ENABLE_INTERRUPTS(CLBR_NONE)
|
|
@@ -400,17 +562,20 @@ sysret_signal:
|
|
jc sysret_audit
|
|
#endif
|
|
/* edx: work flags (arg3) */
|
|
- leaq do_notify_resume(%rip),%rax
|
|
leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
|
|
xorl %esi,%esi # oldset -> arg2
|
|
- call ptregscall_common
|
|
+ SAVE_REST
|
|
+ FIXUP_TOP_OF_STACK %r11
|
|
+ call do_notify_resume
|
|
+ RESTORE_TOP_OF_STACK %r11
|
|
+ RESTORE_REST
|
|
movl $_TIF_WORK_MASK,%edi
|
|
/* Use IRET because user could have changed frame. This
|
|
works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
|
|
DISABLE_INTERRUPTS(CLBR_NONE)
|
|
TRACE_IRQS_OFF
|
|
jmp int_with_check
|
|
-
|
|
+
|
|
badsys:
|
|
movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
|
|
jmp ret_from_sys_call
|
|
@@ -449,7 +614,7 @@ sysret_audit:
|
|
#endif /* CONFIG_AUDITSYSCALL */
|
|
|
|
/* Do syscall tracing */
|
|
-tracesys:
|
|
+tracesys:
|
|
#ifdef CONFIG_AUDITSYSCALL
|
|
testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
|
|
jz auditsys
|
|
@@ -472,8 +637,8 @@ tracesys:
|
|
call *sys_call_table(,%rax,8)
|
|
movq %rax,RAX-ARGOFFSET(%rsp)
|
|
/* Use IRET because user could have changed frame */
|
|
-
|
|
-/*
|
|
+
|
|
+/*
|
|
* Syscall return path ending with IRET.
|
|
* Has correct top of stack, but partial stack frame.
|
|
*/
|
|
@@ -521,18 +686,18 @@ int_very_careful:
|
|
TRACE_IRQS_ON
|
|
ENABLE_INTERRUPTS(CLBR_NONE)
|
|
SAVE_REST
|
|
- /* Check for syscall exit trace */
|
|
+ /* Check for syscall exit trace */
|
|
testl $_TIF_WORK_SYSCALL_EXIT,%edx
|
|
jz int_signal
|
|
pushq %rdi
|
|
CFI_ADJUST_CFA_OFFSET 8
|
|
- leaq 8(%rsp),%rdi # &ptregs -> arg1
|
|
+ leaq 8(%rsp),%rdi # &ptregs -> arg1
|
|
call syscall_trace_leave
|
|
popq %rdi
|
|
CFI_ADJUST_CFA_OFFSET -8
|
|
andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
|
|
jmp int_restore_rest
|
|
-
|
|
+
|
|
int_signal:
|
|
testl $_TIF_DO_NOTIFY_MASK,%edx
|
|
jz 1f
|
|
@@ -547,22 +712,24 @@ int_restore_rest:
|
|
jmp int_with_check
|
|
CFI_ENDPROC
|
|
END(system_call)
|
|
-
|
|
-/*
|
|
+
|
|
+/*
|
|
* Certain special system calls that need to save a complete full stack frame.
|
|
- */
|
|
-
|
|
+ */
|
|
.macro PTREGSCALL label,func,arg
|
|
- .globl \label
|
|
-\label:
|
|
- leaq \func(%rip),%rax
|
|
- leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */
|
|
- jmp ptregscall_common
|
|
+ENTRY(\label)
|
|
+ PARTIAL_FRAME 1 8 /* offset 8: return address */
|
|
+ subq $REST_SKIP, %rsp
|
|
+ CFI_ADJUST_CFA_OFFSET REST_SKIP
|
|
+ call save_rest
|
|
+ DEFAULT_FRAME 0 8 /* offset 8: return address */
|
|
+ leaq 8(%rsp), \arg /* pt_regs pointer */
|
|
+ call \func
|
|
+ jmp ptregscall_common
|
|
+ CFI_ENDPROC
|
|
END(\label)
|
|
.endm
|
|
|
|
- CFI_STARTPROC
|
|
-
|
|
PTREGSCALL stub_clone, sys_clone, %r8
|
|
PTREGSCALL stub_fork, sys_fork, %rdi
|
|
PTREGSCALL stub_vfork, sys_vfork, %rdi
|
|
@@ -570,25 +737,18 @@ END(\label)
|
|
PTREGSCALL stub_iopl, sys_iopl, %rsi
|
|
|
|
ENTRY(ptregscall_common)
|
|
- popq %r11
|
|
- CFI_ADJUST_CFA_OFFSET -8
|
|
- CFI_REGISTER rip, r11
|
|
- SAVE_REST
|
|
- movq %r11, %r15
|
|
- CFI_REGISTER rip, r15
|
|
- FIXUP_TOP_OF_STACK %r11
|
|
- call *%rax
|
|
- RESTORE_TOP_OF_STACK %r11
|
|
- movq %r15, %r11
|
|
- CFI_REGISTER rip, r11
|
|
- RESTORE_REST
|
|
- pushq %r11
|
|
- CFI_ADJUST_CFA_OFFSET 8
|
|
- CFI_REL_OFFSET rip, 0
|
|
- ret
|
|
+ DEFAULT_FRAME 1 8 /* offset 8: return address */
|
|
+ RESTORE_TOP_OF_STACK %r11, 8
|
|
+ movq_cfi_restore R15+8, r15
|
|
+ movq_cfi_restore R14+8, r14
|
|
+ movq_cfi_restore R13+8, r13
|
|
+ movq_cfi_restore R12+8, r12
|
|
+ movq_cfi_restore RBP+8, rbp
|
|
+ movq_cfi_restore RBX+8, rbx
|
|
+ ret $REST_SKIP /* pop extended registers */
|
|
CFI_ENDPROC
|
|
END(ptregscall_common)
|
|
-
|
|
+
|
|
ENTRY(stub_execve)
|
|
CFI_STARTPROC
|
|
popq %r11
|
|
@@ -604,11 +764,11 @@ ENTRY(stub_execve)
|
|
jmp int_ret_from_sys_call
|
|
CFI_ENDPROC
|
|
END(stub_execve)
|
|
-
|
|
+
|
|
/*
|
|
* sigreturn is special because it needs to restore all registers on return.
|
|
* This cannot be done with SYSRET, so use the IRET return path instead.
|
|
- */
|
|
+ */
|
|
ENTRY(stub_rt_sigreturn)
|
|
CFI_STARTPROC
|
|
addq $8, %rsp
|
|
@@ -623,24 +783,12 @@ ENTRY(stub_rt_sigreturn)
|
|
CFI_ENDPROC
|
|
END(stub_rt_sigreturn)
|
|
|
|
-/* initial frame state for interrupts (and exceptions without error code) */
|
|
-#define INTR_FRAME _frame (RIP-0x10); \
|
|
- CFI_REL_OFFSET rcx,0; \
|
|
- CFI_REL_OFFSET r11,8
|
|
-
|
|
-/* initial frame state for exceptions with error code (and interrupts with
|
|
- vector already pushed) */
|
|
-#define XCPT_FRAME _frame (RIP-0x18); \
|
|
- CFI_REL_OFFSET rcx,0; \
|
|
- CFI_REL_OFFSET r11,8
|
|
-
|
|
-/*
|
|
+/*
|
|
* Interrupt exit.
|
|
- *
|
|
*/
|
|
|
|
retint_with_reschedule:
|
|
- CFI_DEFAULT_STACK adj=1
|
|
+ PARTIAL_FRAME
|
|
movl $_TIF_WORK_MASK,%edi
|
|
retint_check:
|
|
LOCKDEP_SYS_EXIT_IRQ
|
|
@@ -669,20 +817,20 @@ retint_careful:
|
|
pushq %rdi
|
|
CFI_ADJUST_CFA_OFFSET 8
|
|
call schedule
|
|
- popq %rdi
|
|
+ popq %rdi
|
|
CFI_ADJUST_CFA_OFFSET -8
|
|
GET_THREAD_INFO(%rcx)
|
|
DISABLE_INTERRUPTS(CLBR_NONE)
|
|
TRACE_IRQS_OFF
|
|
jmp retint_check
|
|
-
|
|
+
|
|
retint_signal:
|
|
testl $_TIF_DO_NOTIFY_MASK,%edx
|
|
jz retint_restore_args
|
|
TRACE_IRQS_ON
|
|
ENABLE_INTERRUPTS(CLBR_NONE)
|
|
SAVE_REST
|
|
- movq $-1,ORIG_RAX(%rsp)
|
|
+ movq $-1,ORIG_RAX(%rsp)
|
|
xorl %esi,%esi # oldset
|
|
movq %rsp,%rdi # &pt_regs
|
|
call do_notify_resume
|
|
@@ -704,324 +852,132 @@ ENTRY(retint_kernel)
|
|
jnc retint_restore_args
|
|
call preempt_schedule_irq
|
|
jmp retint_kernel /* check again */
|
|
-#endif
|
|
+#endif
|
|
|
|
CFI_ENDPROC
|
|
END(retint_check)
|
|
-
|
|
+
|
|
#ifndef CONFIG_XEN
|
|
/*
|
|
* APIC interrupts.
|
|
- */
|
|
- .macro apicinterrupt num,func
|
|
+ */
|
|
+.macro apicinterrupt num sym do_sym
|
|
+ENTRY(\sym)
|
|
INTR_FRAME
|
|
pushq $~(\num)
|
|
CFI_ADJUST_CFA_OFFSET 8
|
|
- interrupt \func
|
|
+ interrupt \do_sym
|
|
jmp error_entry
|
|
CFI_ENDPROC
|
|
- .endm
|
|
+END(\sym)
|
|
+.endm
|
|
|
|
-ENTRY(thermal_interrupt)
|
|
- apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
|
|
-END(thermal_interrupt)
|
|
-
|
|
-ENTRY(threshold_interrupt)
|
|
- apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
|
|
-END(threshold_interrupt)
|
|
-
|
|
-#ifdef CONFIG_SMP
|
|
-ENTRY(reschedule_interrupt)
|
|
- apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
|
|
-END(reschedule_interrupt)
|
|
-
|
|
- .macro INVALIDATE_ENTRY num
|
|
-ENTRY(invalidate_interrupt\num)
|
|
- apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt
|
|
-END(invalidate_interrupt\num)
|
|
- .endm
|
|
+#ifdef CONFIG_SMP
|
|
+apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \
|
|
+ irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
|
|
+#endif
|
|
+
|
|
+apicinterrupt UV_BAU_MESSAGE \
|
|
+ uv_bau_message_intr1 uv_bau_message_interrupt
|
|
+apicinterrupt LOCAL_TIMER_VECTOR \
|
|
+ apic_timer_interrupt smp_apic_timer_interrupt
|
|
+
|
|
+#ifdef CONFIG_SMP
|
|
+apicinterrupt INVALIDATE_TLB_VECTOR_START+0 \
|
|
+ invalidate_interrupt0 smp_invalidate_interrupt
|
|
+apicinterrupt INVALIDATE_TLB_VECTOR_START+1 \
|
|
+ invalidate_interrupt1 smp_invalidate_interrupt
|
|
+apicinterrupt INVALIDATE_TLB_VECTOR_START+2 \
|
|
+ invalidate_interrupt2 smp_invalidate_interrupt
|
|
+apicinterrupt INVALIDATE_TLB_VECTOR_START+3 \
|
|
+ invalidate_interrupt3 smp_invalidate_interrupt
|
|
+apicinterrupt INVALIDATE_TLB_VECTOR_START+4 \
|
|
+ invalidate_interrupt4 smp_invalidate_interrupt
|
|
+apicinterrupt INVALIDATE_TLB_VECTOR_START+5 \
|
|
+ invalidate_interrupt5 smp_invalidate_interrupt
|
|
+apicinterrupt INVALIDATE_TLB_VECTOR_START+6 \
|
|
+ invalidate_interrupt6 smp_invalidate_interrupt
|
|
+apicinterrupt INVALIDATE_TLB_VECTOR_START+7 \
|
|
+ invalidate_interrupt7 smp_invalidate_interrupt
|
|
+#endif
|
|
|
|
- INVALIDATE_ENTRY 0
|
|
- INVALIDATE_ENTRY 1
|
|
- INVALIDATE_ENTRY 2
|
|
- INVALIDATE_ENTRY 3
|
|
- INVALIDATE_ENTRY 4
|
|
- INVALIDATE_ENTRY 5
|
|
- INVALIDATE_ENTRY 6
|
|
- INVALIDATE_ENTRY 7
|
|
-
|
|
-ENTRY(call_function_interrupt)
|
|
- apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
|
|
-END(call_function_interrupt)
|
|
-ENTRY(call_function_single_interrupt)
|
|
- apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
|
|
-END(call_function_single_interrupt)
|
|
-ENTRY(irq_move_cleanup_interrupt)
|
|
- apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
|
|
-END(irq_move_cleanup_interrupt)
|
|
+apicinterrupt THRESHOLD_APIC_VECTOR \
|
|
+ threshold_interrupt mce_threshold_interrupt
|
|
+apicinterrupt THERMAL_APIC_VECTOR \
|
|
+ thermal_interrupt smp_thermal_interrupt
|
|
+
|
|
+#ifdef CONFIG_SMP
|
|
+apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \
|
|
+ call_function_single_interrupt smp_call_function_single_interrupt
|
|
+apicinterrupt CALL_FUNCTION_VECTOR \
|
|
+ call_function_interrupt smp_call_function_interrupt
|
|
+apicinterrupt RESCHEDULE_VECTOR \
|
|
+ reschedule_interrupt smp_reschedule_interrupt
|
|
#endif
|
|
|
|
-ENTRY(apic_timer_interrupt)
|
|
- apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
|
|
-END(apic_timer_interrupt)
|
|
-
|
|
-ENTRY(uv_bau_message_intr1)
|
|
- apicinterrupt 220,uv_bau_message_interrupt
|
|
-END(uv_bau_message_intr1)
|
|
-
|
|
-ENTRY(error_interrupt)
|
|
- apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
|
|
-END(error_interrupt)
|
|
-
|
|
-ENTRY(spurious_interrupt)
|
|
- apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
|
|
-END(spurious_interrupt)
|
|
+apicinterrupt ERROR_APIC_VECTOR \
|
|
+ error_interrupt smp_error_interrupt
|
|
+apicinterrupt SPURIOUS_APIC_VECTOR \
|
|
+ spurious_interrupt smp_spurious_interrupt
|
|
#endif /* !CONFIG_XEN */
|
|
-
|
|
+
|
|
/*
|
|
* Exception entry points.
|
|
- */
|
|
- .macro zeroentry sym
|
|
+ */
|
|
+.macro zeroentry sym do_sym
|
|
+ENTRY(\sym)
|
|
INTR_FRAME
|
|
movq (%rsp),%rcx
|
|
CFI_RESTORE rcx
|
|
movq 8(%rsp),%r11
|
|
CFI_RESTORE r11
|
|
- addq $0x10,%rsp /* skip rcx and r11 */
|
|
- CFI_ADJUST_CFA_OFFSET -0x10
|
|
- pushq $0 /* push error code/oldrax */
|
|
- CFI_ADJUST_CFA_OFFSET 8
|
|
- pushq %rax /* push real oldrax to the rdi slot */
|
|
- CFI_ADJUST_CFA_OFFSET 8
|
|
- CFI_REL_OFFSET rax,0
|
|
- leaq \sym(%rip),%rax
|
|
- jmp error_entry
|
|
+ movq $-1,8(%rsp) /* ORIG_RAX: no syscall to restart */
|
|
+ subq $(15-1)*8,%rsp
|
|
+ CFI_ADJUST_CFA_OFFSET (15-1)*8
|
|
+ call error_entry
|
|
+ DEFAULT_FRAME 0
|
|
+ movq %rsp,%rdi /* pt_regs pointer */
|
|
+ xorl %esi,%esi /* no error code */
|
|
+ call \do_sym
|
|
+ jmp error_exit /* %ebx: no swapgs flag */
|
|
CFI_ENDPROC
|
|
- .endm
|
|
+END(\sym)
|
|
+.endm
|
|
+
|
|
+.macro paranoidzeroentry sym do_sym
|
|
+ zeroentry \sym \do_sym
|
|
+.endm
|
|
+
|
|
+.macro paranoidzeroentry_ist sym do_sym ist
|
|
+ zeroentry \sym \do_sym
|
|
+.endm
|
|
|
|
- .macro errorentry sym
|
|
+.macro errorentry sym do_sym
|
|
+ENTRY(\sym)
|
|
XCPT_FRAME
|
|
movq (%rsp),%rcx
|
|
CFI_RESTORE rcx
|
|
movq 8(%rsp),%r11
|
|
CFI_RESTORE r11
|
|
- addq $0x10,%rsp /* rsp points to the error code */
|
|
- CFI_ADJUST_CFA_OFFSET -0x10
|
|
- pushq %rax
|
|
- CFI_ADJUST_CFA_OFFSET 8
|
|
- CFI_REL_OFFSET rax,0
|
|
- leaq \sym(%rip),%rax
|
|
- jmp error_entry
|
|
+ subq $(15-2)*8,%rsp
|
|
+ CFI_ADJUST_CFA_OFFSET (15-2)*8
|
|
+ call error_entry
|
|
+ DEFAULT_FRAME 0
|
|
+ movq %rsp,%rdi /* pt_regs pointer */
|
|
+ movq ORIG_RAX(%rsp),%rsi /* get error code */
|
|
+ movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */
|
|
+ call \do_sym
|
|
+ jmp error_exit /* %ebx: no swapgs flag */
|
|
CFI_ENDPROC
|
|
- .endm
|
|
+END(\sym)
|
|
+.endm
|
|
|
|
-#if 0 /* not XEN */
|
|
/* error code is on the stack already */
|
|
- /* handle NMI like exceptions that can happen everywhere */
|
|
- .macro paranoidentry sym, ist=0, irqtrace=1
|
|
- movq (%rsp),%rcx
|
|
- movq 8(%rsp),%r11
|
|
- addq $0x10,%rsp /* skip rcx and r11 */
|
|
- SAVE_ALL
|
|
- cld
|
|
-#if 0 /* not XEN */
|
|
- movl $1,%ebx
|
|
- movl $MSR_GS_BASE,%ecx
|
|
- rdmsr
|
|
- testl %edx,%edx
|
|
- js 1f
|
|
- SWAPGS
|
|
- xorl %ebx,%ebx
|
|
-1:
|
|
-#endif
|
|
- .if \ist
|
|
- movq %gs:pda_data_offset, %rbp
|
|
- .endif
|
|
- .if \irqtrace
|
|
- TRACE_IRQS_OFF
|
|
- .endif
|
|
- movq %rsp,%rdi
|
|
- movq ORIG_RAX(%rsp),%rsi
|
|
- movq $-1,ORIG_RAX(%rsp)
|
|
- .if \ist
|
|
- subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
|
|
- .endif
|
|
- call \sym
|
|
- .if \ist
|
|
- addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp)
|
|
- .endif
|
|
- DISABLE_INTERRUPTS(CLBR_NONE)
|
|
- .if \irqtrace
|
|
- TRACE_IRQS_OFF
|
|
- .endif
|
|
- .endm
|
|
-
|
|
- /*
|
|
- * "Paranoid" exit path from exception stack.
|
|
- * Paranoid because this is used by NMIs and cannot take
|
|
- * any kernel state for granted.
|
|
- * We don't do kernel preemption checks here, because only
|
|
- * NMI should be common and it does not enable IRQs and
|
|
- * cannot get reschedule ticks.
|
|
- *
|
|
- * "trace" is 0 for the NMI handler only, because irq-tracing
|
|
- * is fundamentally NMI-unsafe. (we cannot change the soft and
|
|
- * hard flags at once, atomically)
|
|
- */
|
|
- .macro paranoidexit trace=1
|
|
- /* ebx: no swapgs flag */
|
|
-paranoid_exit\trace:
|
|
- testl %ebx,%ebx /* swapgs needed? */
|
|
- jnz paranoid_restore\trace
|
|
- testl $3,CS(%rsp)
|
|
- jnz paranoid_userspace\trace
|
|
-paranoid_swapgs\trace:
|
|
- .if \trace
|
|
- TRACE_IRQS_IRETQ 0
|
|
- .endif
|
|
- SWAPGS_UNSAFE_STACK
|
|
-paranoid_restore\trace:
|
|
- RESTORE_ALL 8
|
|
- jmp irq_return
|
|
-paranoid_userspace\trace:
|
|
- GET_THREAD_INFO(%rcx)
|
|
- movl TI_flags(%rcx),%ebx
|
|
- andl $_TIF_WORK_MASK,%ebx
|
|
- jz paranoid_swapgs\trace
|
|
- movq %rsp,%rdi /* &pt_regs */
|
|
- call sync_regs
|
|
- movq %rax,%rsp /* switch stack for scheduling */
|
|
- testl $_TIF_NEED_RESCHED,%ebx
|
|
- jnz paranoid_schedule\trace
|
|
- movl %ebx,%edx /* arg3: thread flags */
|
|
- .if \trace
|
|
- TRACE_IRQS_ON
|
|
- .endif
|
|
- ENABLE_INTERRUPTS(CLBR_NONE)
|
|
- xorl %esi,%esi /* arg2: oldset */
|
|
- movq %rsp,%rdi /* arg1: &pt_regs */
|
|
- call do_notify_resume
|
|
- DISABLE_INTERRUPTS(CLBR_NONE)
|
|
- .if \trace
|
|
- TRACE_IRQS_OFF
|
|
- .endif
|
|
- jmp paranoid_userspace\trace
|
|
-paranoid_schedule\trace:
|
|
- .if \trace
|
|
- TRACE_IRQS_ON
|
|
- .endif
|
|
- ENABLE_INTERRUPTS(CLBR_ANY)
|
|
- call schedule
|
|
- DISABLE_INTERRUPTS(CLBR_ANY)
|
|
- .if \trace
|
|
- TRACE_IRQS_OFF
|
|
- .endif
|
|
- jmp paranoid_userspace\trace
|
|
- CFI_ENDPROC
|
|
- .endm
|
|
-#endif
|
|
+.macro paranoiderrorentry sym do_sym
|
|
+ errorentry \sym \do_sym
|
|
+.endm
|
|
|
|
/*
|
|
- * Exception entry point. This expects an error code/orig_rax on the stack
|
|
- * and the exception handler in %rax.
|
|
- */
|
|
-KPROBE_ENTRY(error_entry)
|
|
- _frame RDI
|
|
- CFI_REL_OFFSET rax,0
|
|
- /* rdi slot contains rax, oldrax contains error code */
|
|
- cld
|
|
- subq $14*8,%rsp
|
|
- CFI_ADJUST_CFA_OFFSET (14*8)
|
|
- movq %rsi,13*8(%rsp)
|
|
- CFI_REL_OFFSET rsi,RSI
|
|
- movq 14*8(%rsp),%rsi /* load rax from rdi slot */
|
|
- CFI_REGISTER rax,rsi
|
|
- movq %rdx,12*8(%rsp)
|
|
- CFI_REL_OFFSET rdx,RDX
|
|
- movq %rcx,11*8(%rsp)
|
|
- CFI_REL_OFFSET rcx,RCX
|
|
- movq %rsi,10*8(%rsp) /* store rax */
|
|
- CFI_REL_OFFSET rax,RAX
|
|
- movq %r8, 9*8(%rsp)
|
|
- CFI_REL_OFFSET r8,R8
|
|
- movq %r9, 8*8(%rsp)
|
|
- CFI_REL_OFFSET r9,R9
|
|
- movq %r10,7*8(%rsp)
|
|
- CFI_REL_OFFSET r10,R10
|
|
- movq %r11,6*8(%rsp)
|
|
- CFI_REL_OFFSET r11,R11
|
|
- movq %rbx,5*8(%rsp)
|
|
- CFI_REL_OFFSET rbx,RBX
|
|
- movq %rbp,4*8(%rsp)
|
|
- CFI_REL_OFFSET rbp,RBP
|
|
- movq %r12,3*8(%rsp)
|
|
- CFI_REL_OFFSET r12,R12
|
|
- movq %r13,2*8(%rsp)
|
|
- CFI_REL_OFFSET r13,R13
|
|
- movq %r14,1*8(%rsp)
|
|
- CFI_REL_OFFSET r14,R14
|
|
- movq %r15,(%rsp)
|
|
- CFI_REL_OFFSET r15,R15
|
|
-#if 0
|
|
- cmpl $__KERNEL_CS,CS(%rsp)
|
|
- CFI_REMEMBER_STATE
|
|
- je error_kernelspace
|
|
-#endif
|
|
-error_call_handler:
|
|
- movq %rdi, RDI(%rsp)
|
|
- CFI_REL_OFFSET rdi,RDI
|
|
- movq %rsp,%rdi
|
|
- movq ORIG_RAX(%rsp),%rsi # get error code
|
|
- movq $-1,ORIG_RAX(%rsp)
|
|
- call *%rax
|
|
-error_exit:
|
|
- RESTORE_REST
|
|
- DISABLE_INTERRUPTS(CLBR_NONE)
|
|
- TRACE_IRQS_OFF
|
|
- GET_THREAD_INFO(%rcx)
|
|
- testb $3,CS-ARGOFFSET(%rsp)
|
|
- jz retint_kernel
|
|
- LOCKDEP_SYS_EXIT_IRQ
|
|
- movl TI_flags(%rcx),%edx
|
|
- movl $_TIF_WORK_MASK,%edi
|
|
- andl %edi,%edx
|
|
- jnz retint_careful
|
|
- jmp retint_restore_args
|
|
-
|
|
-#if 0
|
|
- /*
|
|
- * We need to re-write the logic here because we don't do iretq to
|
|
- * to return to user mode. It's still possible that we get trap/fault
|
|
- * in the kernel (when accessing buffers pointed to by system calls,
|
|
- * for example).
|
|
- *
|
|
- */
|
|
- CFI_RESTORE_STATE
|
|
-error_kernelspace:
|
|
- incl %ebx
|
|
- /* There are two places in the kernel that can potentially fault with
|
|
- usergs. Handle them here. The exception handlers after
|
|
- iret run with kernel gs again, so don't set the user space flag.
|
|
- B stepping K8s sometimes report an truncated RIP for IRET
|
|
- exceptions returning to compat mode. Check for these here too. */
|
|
- leaq irq_return(%rip),%rcx
|
|
- cmpq %rcx,RIP(%rsp)
|
|
- je error_swapgs
|
|
- movl %ecx,%ecx /* zero extend */
|
|
- cmpq %rcx,RIP(%rsp)
|
|
- je error_swapgs
|
|
- cmpq $gs_change,RIP(%rsp)
|
|
- je error_swapgs
|
|
- jmp error_sti
|
|
-#endif
|
|
- CFI_ENDPROC
|
|
-KPROBE_END(error_entry)
|
|
-
|
|
-ENTRY(hypervisor_callback)
|
|
- zeroentry do_hypervisor_callback
|
|
-END(hypervisor_callback)
|
|
-
|
|
-/*
|
|
* Copied from arch/xen/i386/kernel/entry.S
|
|
*/
|
|
# A note on the "critical region" in our callback handler.
|
|
@@ -1041,7 +997,7 @@ ENTRY(do_hypervisor_callback) # do_hyp
|
|
# see the correct pointer to the pt_regs
|
|
movq %rdi, %rsp # we don't return, adjust the stack frame
|
|
CFI_ENDPROC
|
|
- CFI_DEFAULT_STACK
|
|
+ DEFAULT_FRAME
|
|
11: incl %gs:pda_irqcount
|
|
movq %rsp,%rbp
|
|
CFI_DEF_CFA_REGISTER rbp
|
|
@@ -1057,7 +1013,7 @@ END(do_hypervisor_callback)
|
|
|
|
ALIGN
|
|
restore_all_enable_events:
|
|
- CFI_DEFAULT_STACK adj=1
|
|
+ PARTIAL_FRAME
|
|
TRACE_IRQS_ON
|
|
__ENABLE_INTERRUPTS
|
|
|
|
@@ -1093,9 +1049,7 @@ ecrit: /**** END OF CRITICAL REGION ***
|
|
# We distinguish between categories by comparing each saved segment register
|
|
# with its current contents: any discrepancy means we in category 1.
|
|
ENTRY(failsafe_callback)
|
|
- _frame (RIP-0x30)
|
|
- CFI_REL_OFFSET rcx, 0
|
|
- CFI_REL_OFFSET r11, 8
|
|
+ INTR_FRAME offset=4*8
|
|
movw %ds,%cx
|
|
cmpw %cx,0x10(%rsp)
|
|
CFI_REMEMBER_STATE
|
|
@@ -1131,20 +1085,19 @@ ENTRY(failsafe_callback)
|
|
SAVE_ALL
|
|
jmp error_exit
|
|
CFI_ENDPROC
|
|
-#if 0
|
|
- .section __ex_table,"a"
|
|
- .align 8
|
|
- .quad gs_change,bad_gs
|
|
- .previous
|
|
- .section .fixup,"ax"
|
|
- /* running with kernelgs */
|
|
-bad_gs:
|
|
-/* swapgs */ /* switch back to user gs */
|
|
- xorl %eax,%eax
|
|
- movl %eax,%gs
|
|
- jmp 2b
|
|
- .previous
|
|
-#endif
|
|
+
|
|
+zeroentry divide_error do_divide_error
|
|
+zeroentry overflow do_overflow
|
|
+zeroentry bounds do_bounds
|
|
+zeroentry invalid_op do_invalid_op
|
|
+zeroentry device_not_available do_device_not_available
|
|
+zeroentry hypervisor_callback do_hypervisor_callback
|
|
+zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun
|
|
+errorentry invalid_TSS do_invalid_TSS
|
|
+errorentry segment_not_present do_segment_not_present
|
|
+zeroentry coprocessor_error do_coprocessor_error
|
|
+errorentry alignment_check do_alignment_check
|
|
+zeroentry simd_coprocessor_error do_simd_coprocessor_error
|
|
|
|
/*
|
|
* Create a kernel thread.
|
|
@@ -1168,7 +1121,7 @@ ENTRY(kernel_thread)
|
|
|
|
xorl %r8d,%r8d
|
|
xorl %r9d,%r9d
|
|
-
|
|
+
|
|
# clone now
|
|
call do_fork
|
|
movq %rax,RAX(%rsp)
|
|
@@ -1179,15 +1132,15 @@ ENTRY(kernel_thread)
|
|
* so internally to the x86_64 port you can rely on kernel_thread()
|
|
* not to reschedule the child before returning, this avoids the need
|
|
* of hacks for example to fork off the per-CPU idle tasks.
|
|
- * [Hopefully no generic code relies on the reschedule -AK]
|
|
+ * [Hopefully no generic code relies on the reschedule -AK]
|
|
*/
|
|
RESTORE_ALL
|
|
UNFAKE_STACK_FRAME
|
|
ret
|
|
CFI_ENDPROC
|
|
-ENDPROC(kernel_thread)
|
|
-
|
|
-child_rip:
|
|
+END(kernel_thread)
|
|
+
|
|
+ENTRY(child_rip)
|
|
pushq $0 # fake return address
|
|
CFI_STARTPROC
|
|
/*
|
|
@@ -1200,8 +1153,9 @@ child_rip:
|
|
# exit
|
|
mov %eax, %edi
|
|
call do_exit
|
|
+ ud2 # padding for call trace
|
|
CFI_ENDPROC
|
|
-ENDPROC(child_rip)
|
|
+END(child_rip)
|
|
|
|
/*
|
|
* execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
|
|
@@ -1221,10 +1175,10 @@ ENDPROC(child_rip)
|
|
ENTRY(kernel_execve)
|
|
CFI_STARTPROC
|
|
FAKE_STACK_FRAME $0
|
|
- SAVE_ALL
|
|
+ SAVE_ALL
|
|
movq %rsp,%rcx
|
|
call sys_execve
|
|
- movq %rax, RAX(%rsp)
|
|
+ movq %rax, RAX(%rsp)
|
|
RESTORE_REST
|
|
testq %rax,%rax
|
|
jne 1f
|
|
@@ -1233,132 +1187,7 @@ ENTRY(kernel_execve)
|
|
UNFAKE_STACK_FRAME
|
|
ret
|
|
CFI_ENDPROC
|
|
-ENDPROC(kernel_execve)
|
|
-
|
|
-KPROBE_ENTRY(page_fault)
|
|
- errorentry do_page_fault
|
|
-KPROBE_END(page_fault)
|
|
-
|
|
-ENTRY(coprocessor_error)
|
|
- zeroentry do_coprocessor_error
|
|
-END(coprocessor_error)
|
|
-
|
|
-ENTRY(simd_coprocessor_error)
|
|
- zeroentry do_simd_coprocessor_error
|
|
-END(simd_coprocessor_error)
|
|
-
|
|
-ENTRY(device_not_available)
|
|
- zeroentry do_device_not_available
|
|
-END(device_not_available)
|
|
-
|
|
- /* runs on exception stack */
|
|
-KPROBE_ENTRY(debug)
|
|
-/* INTR_FRAME
|
|
- PARAVIRT_ADJUST_EXCEPTION_FRAME
|
|
- pushq $0
|
|
- CFI_ADJUST_CFA_OFFSET 8 */
|
|
- zeroentry do_debug
|
|
-/* paranoidexit
|
|
- CFI_ENDPROC */
|
|
-KPROBE_END(debug)
|
|
-
|
|
-KPROBE_ENTRY(nmi)
|
|
- zeroentry do_nmi_callback
|
|
-KPROBE_END(nmi)
|
|
-do_nmi_callback:
|
|
- CFI_STARTPROC
|
|
- addq $8, %rsp
|
|
- CFI_ENDPROC
|
|
- CFI_DEFAULT_STACK
|
|
- call do_nmi
|
|
- orl $NMI_MASK,EFLAGS(%rsp)
|
|
- RESTORE_REST
|
|
- DISABLE_INTERRUPTS(CLBR_NONE)
|
|
- TRACE_IRQS_OFF
|
|
- GET_THREAD_INFO(%rcx)
|
|
- jmp retint_restore_args
|
|
- CFI_ENDPROC
|
|
-END(do_nmi_callback)
|
|
-
|
|
-KPROBE_ENTRY(int3)
|
|
-/* INTR_FRAME
|
|
- PARAVIRT_ADJUST_EXCEPTION_FRAME
|
|
- pushq $0
|
|
- CFI_ADJUST_CFA_OFFSET 8 */
|
|
- zeroentry do_int3
|
|
-/* jmp paranoid_exit1
|
|
- CFI_ENDPROC */
|
|
-KPROBE_END(int3)
|
|
-
|
|
-ENTRY(overflow)
|
|
- zeroentry do_overflow
|
|
-END(overflow)
|
|
-
|
|
-ENTRY(bounds)
|
|
- zeroentry do_bounds
|
|
-END(bounds)
|
|
-
|
|
-ENTRY(invalid_op)
|
|
- zeroentry do_invalid_op
|
|
-END(invalid_op)
|
|
-
|
|
-ENTRY(coprocessor_segment_overrun)
|
|
- zeroentry do_coprocessor_segment_overrun
|
|
-END(coprocessor_segment_overrun)
|
|
-
|
|
-#if 0
|
|
- /* runs on exception stack */
|
|
-ENTRY(double_fault)
|
|
- XCPT_FRAME
|
|
- PARAVIRT_ADJUST_EXCEPTION_FRAME
|
|
- paranoidentry do_double_fault
|
|
- jmp paranoid_exit1
|
|
- CFI_ENDPROC
|
|
-END(double_fault)
|
|
-#endif
|
|
-
|
|
-ENTRY(invalid_TSS)
|
|
- errorentry do_invalid_TSS
|
|
-END(invalid_TSS)
|
|
-
|
|
-ENTRY(segment_not_present)
|
|
- errorentry do_segment_not_present
|
|
-END(segment_not_present)
|
|
-
|
|
- /* runs on exception stack */
|
|
-ENTRY(stack_segment)
|
|
-/* XCPT_FRAME
|
|
- PARAVIRT_ADJUST_EXCEPTION_FRAME
|
|
- paranoidentry do_stack_segment */
|
|
- errorentry do_stack_segment
|
|
-/* jmp paranoid_exit1
|
|
- CFI_ENDPROC */
|
|
-END(stack_segment)
|
|
-
|
|
-KPROBE_ENTRY(general_protection)
|
|
- errorentry do_general_protection
|
|
-KPROBE_END(general_protection)
|
|
-
|
|
-ENTRY(alignment_check)
|
|
- errorentry do_alignment_check
|
|
-END(alignment_check)
|
|
-
|
|
-ENTRY(divide_error)
|
|
- zeroentry do_divide_error
|
|
-END(divide_error)
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
-ENTRY(spurious_interrupt_bug)
|
|
- zeroentry do_spurious_interrupt_bug
|
|
-END(spurious_interrupt_bug)
|
|
-#endif
|
|
-
|
|
-#ifdef CONFIG_X86_MCE
|
|
- /* runs on exception stack */
|
|
-KPROBE_ENTRY(machine_check)
|
|
- zeroentry do_machine_check
|
|
-END(machine_check)
|
|
-#endif
|
|
+END(kernel_execve)
|
|
|
|
/* Call softirq on interrupt stack. Interrupts are off. */
|
|
ENTRY(call_softirq)
|
|
@@ -1378,24 +1207,191 @@ ENTRY(call_softirq)
|
|
decl %gs:pda_irqcount
|
|
ret
|
|
CFI_ENDPROC
|
|
-ENDPROC(call_softirq)
|
|
+END(call_softirq)
|
|
+
|
|
+/*
|
|
+ * Some functions should be protected against kprobes
|
|
+ */
|
|
+ .pushsection .kprobes.text, "ax"
|
|
+
|
|
+paranoidzeroentry_ist debug do_debug DEBUG_STACK
|
|
+zeroentry nmi do_nmi_callback
|
|
+paranoidzeroentry_ist int3 do_int3 DEBUG_STACK
|
|
+paranoiderrorentry stack_segment do_stack_segment
|
|
+errorentry general_protection do_general_protection
|
|
+errorentry page_fault do_page_fault
|
|
+#ifdef CONFIG_X86_MCE
|
|
+paranoidzeroentry machine_check do_machine_check
|
|
+#endif
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+ /*
|
|
+ * "Paranoid" exit path from exception stack.
|
|
+ * Paranoid because this is used by NMIs and cannot take
|
|
+ * any kernel state for granted.
|
|
+ * We don't do kernel preemption checks here, because only
|
|
+ * NMI should be common and it does not enable IRQs and
|
|
+ * cannot get reschedule ticks.
|
|
+ *
|
|
+ * "trace" is 0 for the NMI handler only, because irq-tracing
|
|
+ * is fundamentally NMI-unsafe. (we cannot change the soft and
|
|
+ * hard flags at once, atomically)
|
|
+ */
|
|
+
|
|
+ /* ebx: no swapgs flag */
|
|
+ENTRY(paranoid_exit)
|
|
+ INTR_FRAME
|
|
+ DISABLE_INTERRUPTS(CLBR_NONE)
|
|
+ TRACE_IRQS_OFF
|
|
+ testl %ebx,%ebx /* swapgs needed? */
|
|
+ jnz paranoid_restore
|
|
+ testl $3,CS(%rsp)
|
|
+ jnz paranoid_userspace
|
|
+paranoid_swapgs:
|
|
+ TRACE_IRQS_IRETQ 0
|
|
+ SWAPGS_UNSAFE_STACK
|
|
+paranoid_restore:
|
|
+ RESTORE_ALL 8
|
|
+ jmp irq_return
|
|
+paranoid_userspace:
|
|
+ GET_THREAD_INFO(%rcx)
|
|
+ movl TI_flags(%rcx),%ebx
|
|
+ andl $_TIF_WORK_MASK,%ebx
|
|
+ jz paranoid_swapgs
|
|
+ movq %rsp,%rdi /* &pt_regs */
|
|
+ call sync_regs
|
|
+ movq %rax,%rsp /* switch stack for scheduling */
|
|
+ testl $_TIF_NEED_RESCHED,%ebx
|
|
+ jnz paranoid_schedule
|
|
+ movl %ebx,%edx /* arg3: thread flags */
|
|
+ TRACE_IRQS_ON
|
|
+ ENABLE_INTERRUPTS(CLBR_NONE)
|
|
+ xorl %esi,%esi /* arg2: oldset */
|
|
+ movq %rsp,%rdi /* arg1: &pt_regs */
|
|
+ call do_notify_resume
|
|
+ DISABLE_INTERRUPTS(CLBR_NONE)
|
|
+ TRACE_IRQS_OFF
|
|
+ jmp paranoid_userspace
|
|
+paranoid_schedule:
|
|
+ TRACE_IRQS_ON
|
|
+ ENABLE_INTERRUPTS(CLBR_ANY)
|
|
+ call schedule
|
|
+ DISABLE_INTERRUPTS(CLBR_ANY)
|
|
+ TRACE_IRQS_OFF
|
|
+ jmp paranoid_userspace
|
|
+ CFI_ENDPROC
|
|
+END(paranoid_exit)
|
|
+#endif
|
|
+
|
|
+/*
|
|
+ * Exception entry point. This expects an error code/orig_rax on the stack.
|
|
+ * returns in "no swapgs flag" in %ebx.
|
|
+ */
|
|
+ENTRY(error_entry)
|
|
+ XCPT_FRAME 2
|
|
+ CFI_ADJUST_CFA_OFFSET 15*8
|
|
+ /* oldrax contains error code */
|
|
+ cld
|
|
+ movq_cfi rdi, RDI+8
|
|
+ movq_cfi rsi, RSI+8
|
|
+ movq_cfi rdx, RDX+8
|
|
+ movq_cfi rcx, RCX+8
|
|
+ movq_cfi rax, RAX+8
|
|
+ movq_cfi r8, R8+8
|
|
+ movq_cfi r9, R9+8
|
|
+ movq_cfi r10, R10+8
|
|
+ movq_cfi r11, R11+8
|
|
+ movq_cfi rbx, RBX+8
|
|
+ movq_cfi rbp, RBP+8
|
|
+ movq_cfi r12, R12+8
|
|
+ movq_cfi r13, R13+8
|
|
+ movq_cfi r14, R14+8
|
|
+ movq_cfi r15, R15+8
|
|
+#ifndef CONFIG_XEN
|
|
+ xorl %ebx,%ebx
|
|
+ testl $3,CS+8(%rsp)
|
|
+ je error_kernelspace
|
|
+error_swapgs:
|
|
+ SWAPGS
|
|
+error_sti:
|
|
+#endif
|
|
+ TRACE_IRQS_OFF
|
|
+ ret
|
|
+ CFI_ENDPROC
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+/*
|
|
+ * There are two places in the kernel that can potentially fault with
|
|
+ * usergs. Handle them here. The exception handlers after iret run with
|
|
+ * kernel gs again, so don't set the user space flag. B stepping K8s
|
|
+ * sometimes report an truncated RIP for IRET exceptions returning to
|
|
+ * compat mode. Check for these here too.
|
|
+ */
|
|
+error_kernelspace:
|
|
+ incl %ebx
|
|
+ leaq irq_return(%rip),%rcx
|
|
+ cmpq %rcx,RIP+8(%rsp)
|
|
+ je error_swapgs
|
|
+ movl %ecx,%ecx /* zero extend */
|
|
+ cmpq %rcx,RIP+8(%rsp)
|
|
+ je error_swapgs
|
|
+ cmpq $gs_change,RIP+8(%rsp)
|
|
+ je error_swapgs
|
|
+ jmp error_sti
|
|
+#endif
|
|
+END(error_entry)
|
|
+
|
|
+
|
|
+ENTRY(error_exit)
|
|
+ DEFAULT_FRAME
|
|
+ RESTORE_REST
|
|
+ DISABLE_INTERRUPTS(CLBR_NONE)
|
|
+ TRACE_IRQS_OFF
|
|
+ GET_THREAD_INFO(%rcx)
|
|
+ testb $3,CS-ARGOFFSET(%rsp)
|
|
+ jz retint_kernel
|
|
+ LOCKDEP_SYS_EXIT_IRQ
|
|
+ movl TI_flags(%rcx),%edx
|
|
+ movl $_TIF_WORK_MASK,%edi
|
|
+ andl %edi,%edx
|
|
+ jnz retint_careful
|
|
+ jmp retint_restore_args
|
|
+ CFI_ENDPROC
|
|
+END(error_exit)
|
|
+
|
|
+
|
|
+do_nmi_callback:
|
|
+ CFI_STARTPROC
|
|
+ addq $8, %rsp
|
|
+ CFI_ENDPROC
|
|
+ DEFAULT_FRAME
|
|
+ call do_nmi
|
|
+ orl $NMI_MASK,EFLAGS(%rsp)
|
|
+ RESTORE_REST
|
|
+ DISABLE_INTERRUPTS(CLBR_NONE)
|
|
+ TRACE_IRQS_OFF
|
|
+ GET_THREAD_INFO(%rcx)
|
|
+ jmp retint_restore_args
|
|
+ CFI_ENDPROC
|
|
+END(do_nmi_callback)
|
|
+
|
|
|
|
#ifndef CONFIG_IA32_EMULATION
|
|
-KPROBE_ENTRY(ignore_sysret)
|
|
- CFI_STARTPROC simple
|
|
- CFI_SIGNAL_FRAME
|
|
- CFI_DEF_CFA rsp,SS+8-RIP+16
|
|
-/* CFI_REL_OFFSET ss,SS-RIP+16 */
|
|
- CFI_REL_OFFSET rsp,RSP-RIP+16
|
|
-/* CFI_REL_OFFSET rflags,EFLAGS-RIP+16 */
|
|
-/* CFI_REL_OFFSET cs,CS-RIP+16 */
|
|
- CFI_REL_OFFSET rip,RIP-RIP+16
|
|
+ENTRY(ignore_sysret)
|
|
+ INTR_FRAME
|
|
popq %rcx
|
|
CFI_ADJUST_CFA_OFFSET -8
|
|
+ CFI_RESTORE rcx
|
|
popq %r11
|
|
CFI_ADJUST_CFA_OFFSET -8
|
|
+ CFI_RESTORE r11
|
|
mov $-ENOSYS,%eax
|
|
HYPERVISOR_IRET 0
|
|
CFI_ENDPROC
|
|
-ENDPROC(ignore_sysret)
|
|
+END(ignore_sysret)
|
|
#endif
|
|
+
|
|
+/*
|
|
+ * End of kprobes section
|
|
+ */
|
|
+ .popsection
|
|
--- head-2011-03-17.orig/arch/x86/kernel/head-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/head-xen.c 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -36,7 +36,6 @@ void __init reserve_ebda_region(void)
|
|
|
|
/* start of EBDA area */
|
|
ebda_addr = get_bios_ebda();
|
|
- printk(KERN_INFO "BIOS EBDA/lowmem at: %08x/%08x\n", ebda_addr, lowmem);
|
|
|
|
/* Fixup: bios puts an EBDA in the top 64K segment */
|
|
/* of conventional memory, but does not adjust lowmem. */
|
|
--- head-2011-03-17.orig/arch/x86/kernel/head32-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/head32-xen.c 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -12,9 +12,12 @@
|
|
#include <asm/sections.h>
|
|
#include <asm/e820.h>
|
|
#include <asm/bios_ebda.h>
|
|
+#include <asm/trampoline.h>
|
|
|
|
void __init i386_start_kernel(void)
|
|
{
|
|
+ reserve_trampoline_memory();
|
|
+
|
|
reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
|
|
|
|
#ifndef CONFIG_XEN
|
|
--- head-2011-03-17.orig/arch/x86/kernel/head64-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/head64-xen.c 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -31,9 +31,10 @@
|
|
#include <asm/kdebug.h>
|
|
#include <asm/e820.h>
|
|
#include <asm/bios_ebda.h>
|
|
+#include <asm/trampoline.h>
|
|
|
|
/* boot cpu pda */
|
|
-static struct x8664_pda _boot_cpu_pda __read_mostly;
|
|
+static struct x8664_pda _boot_cpu_pda;
|
|
|
|
#ifdef CONFIG_SMP
|
|
/*
|
|
@@ -163,6 +164,8 @@ void __init x86_64_start_reservations(ch
|
|
{
|
|
copy_bootdata(__va(real_mode_data));
|
|
|
|
+ reserve_trampoline_memory();
|
|
+
|
|
reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
|
|
|
|
reserve_early(round_up(__pa_symbol(&_end), PAGE_SIZE),
|
|
--- head-2011-03-17.orig/arch/x86/kernel/apic/io_apic-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/apic/io_apic-xen.c 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -112,102 +112,276 @@ static int __init parse_noapic(char *str
|
|
}
|
|
early_param("noapic", parse_noapic);
|
|
|
|
+#ifndef CONFIG_XEN
|
|
struct irq_pin_list;
|
|
+
|
|
+/*
|
|
+ * This is performance-critical, we want to do it O(1)
|
|
+ *
|
|
+ * the indexing order of this array favors 1:1 mappings
|
|
+ * between pins and IRQs.
|
|
+ */
|
|
+
|
|
+struct irq_pin_list {
|
|
+ int apic, pin;
|
|
+ struct irq_pin_list *next;
|
|
+};
|
|
+
|
|
+static struct irq_pin_list *get_one_free_irq_2_pin(int cpu)
|
|
+{
|
|
+ struct irq_pin_list *pin;
|
|
+ int node;
|
|
+
|
|
+ node = cpu_to_node(cpu);
|
|
+
|
|
+ pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node);
|
|
+
|
|
+ return pin;
|
|
+}
|
|
+
|
|
struct irq_cfg {
|
|
-#ifndef CONFIG_XEN
|
|
- unsigned int irq;
|
|
struct irq_pin_list *irq_2_pin;
|
|
- cpumask_t domain;
|
|
- cpumask_t old_domain;
|
|
+ cpumask_var_t domain;
|
|
+ cpumask_var_t old_domain;
|
|
unsigned move_cleanup_count;
|
|
-#endif
|
|
u8 vector;
|
|
-#ifndef CONFIG_XEN
|
|
u8 move_in_progress : 1;
|
|
+#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
|
|
+ u8 move_desc_pending : 1;
|
|
#endif
|
|
};
|
|
|
|
/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
|
|
+#ifdef CONFIG_SPARSE_IRQ
|
|
+static struct irq_cfg irq_cfgx[] = {
|
|
+#else
|
|
static struct irq_cfg irq_cfgx[NR_IRQS] = {
|
|
- [0] = { .irq = 0 },
|
|
- [1] = { .irq = 1 },
|
|
- [2] = { .irq = 2 },
|
|
- [3] = { .irq = 3 },
|
|
- [4] = { .irq = 4 },
|
|
- [5] = { .irq = 5 },
|
|
- [6] = { .irq = 6 },
|
|
- [7] = { .irq = 7 },
|
|
- [8] = { .irq = 8 },
|
|
- [9] = { .irq = 9 },
|
|
- [10] = { .irq = 10 },
|
|
- [11] = { .irq = 11 },
|
|
- [12] = { .irq = 12 },
|
|
- [13] = { .irq = 13 },
|
|
- [14] = { .irq = 14 },
|
|
- [15] = { .irq = 15 },
|
|
+#endif
|
|
+ [0] = { .vector = IRQ0_VECTOR, },
|
|
+ [1] = { .vector = IRQ1_VECTOR, },
|
|
+ [2] = { .vector = IRQ2_VECTOR, },
|
|
+ [3] = { .vector = IRQ3_VECTOR, },
|
|
+ [4] = { .vector = IRQ4_VECTOR, },
|
|
+ [5] = { .vector = IRQ5_VECTOR, },
|
|
+ [6] = { .vector = IRQ6_VECTOR, },
|
|
+ [7] = { .vector = IRQ7_VECTOR, },
|
|
+ [8] = { .vector = IRQ8_VECTOR, },
|
|
+ [9] = { .vector = IRQ9_VECTOR, },
|
|
+ [10] = { .vector = IRQ10_VECTOR, },
|
|
+ [11] = { .vector = IRQ11_VECTOR, },
|
|
+ [12] = { .vector = IRQ12_VECTOR, },
|
|
+ [13] = { .vector = IRQ13_VECTOR, },
|
|
+ [14] = { .vector = IRQ14_VECTOR, },
|
|
+ [15] = { .vector = IRQ15_VECTOR, },
|
|
};
|
|
|
|
-#define for_each_irq_cfg(irq, cfg) \
|
|
- for (irq = 0, cfg = irq_cfgx; irq < nr_irqs; irq++, cfg++)
|
|
+int __init arch_early_irq_init(void)
|
|
+{
|
|
+ struct irq_cfg *cfg;
|
|
+ struct irq_desc *desc;
|
|
+ int count;
|
|
+ int i;
|
|
+
|
|
+ cfg = irq_cfgx;
|
|
+ count = ARRAY_SIZE(irq_cfgx);
|
|
|
|
+ for (i = 0; i < count; i++) {
|
|
+ desc = irq_to_desc(i);
|
|
+ desc->chip_data = &cfg[i];
|
|
+ alloc_bootmem_cpumask_var(&cfg[i].domain);
|
|
+ alloc_bootmem_cpumask_var(&cfg[i].old_domain);
|
|
+ if (i < NR_IRQS_LEGACY)
|
|
+ cpumask_setall(cfg[i].domain);
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+#ifdef CONFIG_SPARSE_IRQ
|
|
static struct irq_cfg *irq_cfg(unsigned int irq)
|
|
{
|
|
- return irq < nr_irqs ? irq_cfgx + irq : NULL;
|
|
+ struct irq_cfg *cfg = NULL;
|
|
+ struct irq_desc *desc;
|
|
+
|
|
+ desc = irq_to_desc(irq);
|
|
+ if (desc)
|
|
+ cfg = desc->chip_data;
|
|
+
|
|
+ return cfg;
|
|
}
|
|
|
|
-static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
|
|
+static struct irq_cfg *get_one_free_irq_cfg(int cpu)
|
|
{
|
|
- return irq_cfg(irq);
|
|
+ struct irq_cfg *cfg;
|
|
+ int node;
|
|
+
|
|
+ node = cpu_to_node(cpu);
|
|
+
|
|
+ cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
|
|
+ if (cfg) {
|
|
+ if (!alloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) {
|
|
+ kfree(cfg);
|
|
+ cfg = NULL;
|
|
+ } else if (!alloc_cpumask_var_node(&cfg->old_domain,
|
|
+ GFP_ATOMIC, node)) {
|
|
+ free_cpumask_var(cfg->domain);
|
|
+ kfree(cfg);
|
|
+ cfg = NULL;
|
|
+ } else {
|
|
+ cpumask_clear(cfg->domain);
|
|
+ cpumask_clear(cfg->old_domain);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return cfg;
|
|
}
|
|
|
|
-#ifdef CONFIG_XEN
|
|
-#define irq_2_pin_init()
|
|
-#define add_pin_to_irq(irq, apic, pin)
|
|
-#else
|
|
-/*
|
|
- * Rough estimation of how many shared IRQs there are, can be changed
|
|
- * anytime.
|
|
- */
|
|
-#define MAX_PLUS_SHARED_IRQS NR_IRQS
|
|
-#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
|
|
+int arch_init_chip_data(struct irq_desc *desc, int cpu)
|
|
+{
|
|
+ struct irq_cfg *cfg;
|
|
|
|
-/*
|
|
- * This is performance-critical, we want to do it O(1)
|
|
- *
|
|
- * the indexing order of this array favors 1:1 mappings
|
|
- * between pins and IRQs.
|
|
- */
|
|
+ cfg = desc->chip_data;
|
|
+ if (!cfg) {
|
|
+ desc->chip_data = get_one_free_irq_cfg(cpu);
|
|
+ if (!desc->chip_data) {
|
|
+ printk(KERN_ERR "can not alloc irq_cfg\n");
|
|
+ BUG_ON(1);
|
|
+ }
|
|
+ }
|
|
|
|
-struct irq_pin_list {
|
|
- int apic, pin;
|
|
- struct irq_pin_list *next;
|
|
-};
|
|
+ return 0;
|
|
+}
|
|
|
|
-static struct irq_pin_list irq_2_pin_head[PIN_MAP_SIZE];
|
|
-static struct irq_pin_list *irq_2_pin_ptr;
|
|
+#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
|
|
|
|
-static void __init irq_2_pin_init(void)
|
|
+static void
|
|
+init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu)
|
|
{
|
|
- struct irq_pin_list *pin = irq_2_pin_head;
|
|
- int i;
|
|
+ struct irq_pin_list *old_entry, *head, *tail, *entry;
|
|
+
|
|
+ cfg->irq_2_pin = NULL;
|
|
+ old_entry = old_cfg->irq_2_pin;
|
|
+ if (!old_entry)
|
|
+ return;
|
|
+
|
|
+ entry = get_one_free_irq_2_pin(cpu);
|
|
+ if (!entry)
|
|
+ return;
|
|
+
|
|
+ entry->apic = old_entry->apic;
|
|
+ entry->pin = old_entry->pin;
|
|
+ head = entry;
|
|
+ tail = entry;
|
|
+ old_entry = old_entry->next;
|
|
+ while (old_entry) {
|
|
+ entry = get_one_free_irq_2_pin(cpu);
|
|
+ if (!entry) {
|
|
+ entry = head;
|
|
+ while (entry) {
|
|
+ head = entry->next;
|
|
+ kfree(entry);
|
|
+ entry = head;
|
|
+ }
|
|
+ /* still use the old one */
|
|
+ return;
|
|
+ }
|
|
+ entry->apic = old_entry->apic;
|
|
+ entry->pin = old_entry->pin;
|
|
+ tail->next = entry;
|
|
+ tail = entry;
|
|
+ old_entry = old_entry->next;
|
|
+ }
|
|
|
|
- for (i = 1; i < PIN_MAP_SIZE; i++)
|
|
- pin[i-1].next = &pin[i];
|
|
+ tail->next = NULL;
|
|
+ cfg->irq_2_pin = head;
|
|
+}
|
|
+
|
|
+static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg)
|
|
+{
|
|
+ struct irq_pin_list *entry, *next;
|
|
+
|
|
+ if (old_cfg->irq_2_pin == cfg->irq_2_pin)
|
|
+ return;
|
|
+
|
|
+ entry = old_cfg->irq_2_pin;
|
|
|
|
- irq_2_pin_ptr = &pin[0];
|
|
+ while (entry) {
|
|
+ next = entry->next;
|
|
+ kfree(entry);
|
|
+ entry = next;
|
|
+ }
|
|
+ old_cfg->irq_2_pin = NULL;
|
|
}
|
|
|
|
-static struct irq_pin_list *get_one_free_irq_2_pin(void)
|
|
+void arch_init_copy_chip_data(struct irq_desc *old_desc,
|
|
+ struct irq_desc *desc, int cpu)
|
|
{
|
|
- struct irq_pin_list *pin = irq_2_pin_ptr;
|
|
+ struct irq_cfg *cfg;
|
|
+ struct irq_cfg *old_cfg;
|
|
|
|
- if (!pin)
|
|
- panic("can not get more irq_2_pin\n");
|
|
+ cfg = get_one_free_irq_cfg(cpu);
|
|
|
|
- irq_2_pin_ptr = pin->next;
|
|
- pin->next = NULL;
|
|
- return pin;
|
|
+ if (!cfg)
|
|
+ return;
|
|
+
|
|
+ desc->chip_data = cfg;
|
|
+
|
|
+ old_cfg = old_desc->chip_data;
|
|
+
|
|
+ memcpy(cfg, old_cfg, sizeof(struct irq_cfg));
|
|
+
|
|
+ init_copy_irq_2_pin(old_cfg, cfg, cpu);
|
|
+}
|
|
+
|
|
+static void free_irq_cfg(struct irq_cfg *old_cfg)
|
|
+{
|
|
+ kfree(old_cfg);
|
|
+}
|
|
+
|
|
+void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
|
|
+{
|
|
+ struct irq_cfg *old_cfg, *cfg;
|
|
+
|
|
+ old_cfg = old_desc->chip_data;
|
|
+ cfg = desc->chip_data;
|
|
+
|
|
+ if (old_cfg == cfg)
|
|
+ return;
|
|
+
|
|
+ if (old_cfg) {
|
|
+ free_irq_2_pin(old_cfg, cfg);
|
|
+ free_irq_cfg(old_cfg);
|
|
+ old_desc->chip_data = NULL;
|
|
+ }
|
|
+}
|
|
+
|
|
+static void
|
|
+set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
|
|
+{
|
|
+ struct irq_cfg *cfg = desc->chip_data;
|
|
+
|
|
+ if (!cfg->move_in_progress) {
|
|
+ /* it means that domain is not changed */
|
|
+ if (!cpumask_intersects(&desc->affinity, mask))
|
|
+ cfg->move_desc_pending = 1;
|
|
+ }
|
|
+}
|
|
+#endif
|
|
+
|
|
+#else
|
|
+static struct irq_cfg *irq_cfg(unsigned int irq)
|
|
+{
|
|
+ return irq < nr_irqs ? irq_cfgx + irq : NULL;
|
|
+}
|
|
+
|
|
+#endif
|
|
+
|
|
+#ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC
|
|
+static inline void
|
|
+set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
|
|
+{
|
|
}
|
|
+#endif
|
|
|
|
struct io_apic {
|
|
unsigned int index;
|
|
@@ -220,7 +394,7 @@ static __attribute_const__ struct io_api
|
|
return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
|
|
+ (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK);
|
|
}
|
|
-#endif
|
|
+#endif /* !CONFIG_XEN */
|
|
|
|
static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
|
|
{
|
|
@@ -275,11 +449,10 @@ static inline void io_apic_modify(unsign
|
|
writel(value, &io_apic->data);
|
|
}
|
|
|
|
-static bool io_apic_level_ack_pending(unsigned int irq)
|
|
+static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
|
|
{
|
|
struct irq_pin_list *entry;
|
|
unsigned long flags;
|
|
- struct irq_cfg *cfg = irq_cfg(irq);
|
|
|
|
spin_lock_irqsave(&ioapic_lock, flags);
|
|
entry = cfg->irq_2_pin;
|
|
@@ -365,13 +538,32 @@ static void ioapic_mask_entry(int apic,
|
|
}
|
|
|
|
#ifdef CONFIG_SMP
|
|
-static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
|
|
+static void send_cleanup_vector(struct irq_cfg *cfg)
|
|
+{
|
|
+ cpumask_var_t cleanup_mask;
|
|
+
|
|
+ if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
|
|
+ unsigned int i;
|
|
+ cfg->move_cleanup_count = 0;
|
|
+ for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
|
|
+ cfg->move_cleanup_count++;
|
|
+ for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
|
|
+ send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
|
|
+ } else {
|
|
+ cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
|
|
+ cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
|
|
+ send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
|
|
+ free_cpumask_var(cleanup_mask);
|
|
+ }
|
|
+ cfg->move_in_progress = 0;
|
|
+}
|
|
+
|
|
+static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
|
|
{
|
|
int apic, pin;
|
|
- struct irq_cfg *cfg;
|
|
struct irq_pin_list *entry;
|
|
+ u8 vector = cfg->vector;
|
|
|
|
- cfg = irq_cfg(irq);
|
|
entry = cfg->irq_2_pin;
|
|
for (;;) {
|
|
unsigned int reg;
|
|
@@ -401,36 +593,61 @@ static void __target_IO_APIC_irq(unsigne
|
|
}
|
|
}
|
|
|
|
-static int assign_irq_vector(int irq, cpumask_t mask);
|
|
+static int
|
|
+assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
|
|
+
|
|
+/*
|
|
+ * Either sets desc->affinity to a valid value, and returns cpu_mask_to_apicid
|
|
+ * of that, or returns BAD_APICID and leaves desc->affinity untouched.
|
|
+ */
|
|
+static unsigned int
|
|
+set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
|
|
+{
|
|
+ struct irq_cfg *cfg;
|
|
+ unsigned int irq;
|
|
+
|
|
+ if (!cpumask_intersects(mask, cpu_online_mask))
|
|
+ return BAD_APICID;
|
|
+
|
|
+ irq = desc->irq;
|
|
+ cfg = desc->chip_data;
|
|
+ if (assign_irq_vector(irq, cfg, mask))
|
|
+ return BAD_APICID;
|
|
+
|
|
+ cpumask_and(&desc->affinity, cfg->domain, mask);
|
|
+ set_extra_move_desc(desc, mask);
|
|
+ return cpu_mask_to_apicid_and(&desc->affinity, cpu_online_mask);
|
|
+}
|
|
|
|
-static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
|
|
+static void
|
|
+set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
|
|
{
|
|
struct irq_cfg *cfg;
|
|
unsigned long flags;
|
|
unsigned int dest;
|
|
- cpumask_t tmp;
|
|
- struct irq_desc *desc;
|
|
+ unsigned int irq;
|
|
|
|
- cpus_and(tmp, mask, cpu_online_map);
|
|
- if (cpus_empty(tmp))
|
|
- return;
|
|
+ irq = desc->irq;
|
|
+ cfg = desc->chip_data;
|
|
|
|
- cfg = irq_cfg(irq);
|
|
- if (assign_irq_vector(irq, mask))
|
|
- return;
|
|
+ spin_lock_irqsave(&ioapic_lock, flags);
|
|
+ dest = set_desc_affinity(desc, mask);
|
|
+ if (dest != BAD_APICID) {
|
|
+ /* Only the high 8 bits are valid. */
|
|
+ dest = SET_APIC_LOGICAL_ID(dest);
|
|
+ __target_IO_APIC_irq(irq, dest, cfg);
|
|
+ }
|
|
+ spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+}
|
|
|
|
- cpus_and(tmp, cfg->domain, mask);
|
|
- dest = cpu_mask_to_apicid(tmp);
|
|
- /*
|
|
- * Only the high 8 bits are valid.
|
|
- */
|
|
- dest = SET_APIC_LOGICAL_ID(dest);
|
|
+static void
|
|
+set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask)
|
|
+{
|
|
+ struct irq_desc *desc;
|
|
|
|
desc = irq_to_desc(irq);
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- __target_IO_APIC_irq(irq, dest, cfg->vector);
|
|
- desc->affinity = mask;
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+
|
|
+ set_ioapic_affinity_irq_desc(desc, mask);
|
|
}
|
|
#endif /* CONFIG_SMP */
|
|
|
|
@@ -439,16 +656,18 @@ static void set_ioapic_affinity_irq(unsi
|
|
* shared ISA-space IRQs, so we have to support them. We are super
|
|
* fast in the common case, and fast for shared ISA-space IRQs.
|
|
*/
|
|
-static void add_pin_to_irq(unsigned int irq, int apic, int pin)
|
|
+static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin)
|
|
{
|
|
- struct irq_cfg *cfg;
|
|
struct irq_pin_list *entry;
|
|
|
|
- /* first time to refer irq_cfg, so with new */
|
|
- cfg = irq_cfg_alloc(irq);
|
|
entry = cfg->irq_2_pin;
|
|
if (!entry) {
|
|
- entry = get_one_free_irq_2_pin();
|
|
+ entry = get_one_free_irq_2_pin(cpu);
|
|
+ if (!entry) {
|
|
+ printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n",
|
|
+ apic, pin);
|
|
+ return;
|
|
+ }
|
|
cfg->irq_2_pin = entry;
|
|
entry->apic = apic;
|
|
entry->pin = pin;
|
|
@@ -463,7 +682,7 @@ static void add_pin_to_irq(unsigned int
|
|
entry = entry->next;
|
|
}
|
|
|
|
- entry->next = get_one_free_irq_2_pin();
|
|
+ entry->next = get_one_free_irq_2_pin(cpu);
|
|
entry = entry->next;
|
|
entry->apic = apic;
|
|
entry->pin = pin;
|
|
@@ -472,11 +691,10 @@ static void add_pin_to_irq(unsigned int
|
|
/*
|
|
* Reroute an IRQ to a different pin.
|
|
*/
|
|
-static void __init replace_pin_at_irq(unsigned int irq,
|
|
+static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu,
|
|
int oldapic, int oldpin,
|
|
int newapic, int newpin)
|
|
{
|
|
- struct irq_cfg *cfg = irq_cfg(irq);
|
|
struct irq_pin_list *entry = cfg->irq_2_pin;
|
|
int replaced = 0;
|
|
|
|
@@ -493,18 +711,16 @@ static void __init replace_pin_at_irq(un
|
|
|
|
/* why? call replace before add? */
|
|
if (!replaced)
|
|
- add_pin_to_irq(irq, newapic, newpin);
|
|
+ add_pin_to_irq_cpu(cfg, cpu, newapic, newpin);
|
|
}
|
|
|
|
-static inline void io_apic_modify_irq(unsigned int irq,
|
|
+static inline void io_apic_modify_irq(struct irq_cfg *cfg,
|
|
int mask_and, int mask_or,
|
|
void (*final)(struct irq_pin_list *entry))
|
|
{
|
|
int pin;
|
|
- struct irq_cfg *cfg;
|
|
struct irq_pin_list *entry;
|
|
|
|
- cfg = irq_cfg(irq);
|
|
for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) {
|
|
unsigned int reg;
|
|
pin = entry->pin;
|
|
@@ -517,13 +733,13 @@ static inline void io_apic_modify_irq(un
|
|
}
|
|
}
|
|
|
|
-static void __unmask_IO_APIC_irq(unsigned int irq)
|
|
+static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
|
|
{
|
|
- io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, 0, NULL);
|
|
+ io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
|
|
}
|
|
|
|
#ifdef CONFIG_X86_64
|
|
-void io_apic_sync(struct irq_pin_list *entry)
|
|
+static void io_apic_sync(struct irq_pin_list *entry)
|
|
{
|
|
/*
|
|
* Synchronize the IO-APIC and the CPU by doing
|
|
@@ -534,47 +750,64 @@ void io_apic_sync(struct irq_pin_list *e
|
|
readl(&io_apic->data);
|
|
}
|
|
|
|
-static void __mask_IO_APIC_irq(unsigned int irq)
|
|
+static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
|
|
{
|
|
- io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
|
|
+ io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
|
|
}
|
|
#else /* CONFIG_X86_32 */
|
|
-static void __mask_IO_APIC_irq(unsigned int irq)
|
|
+static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
|
|
{
|
|
- io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, NULL);
|
|
+ io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, NULL);
|
|
}
|
|
|
|
-static void __mask_and_edge_IO_APIC_irq(unsigned int irq)
|
|
+static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg)
|
|
{
|
|
- io_apic_modify_irq(irq, ~IO_APIC_REDIR_LEVEL_TRIGGER,
|
|
+ io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER,
|
|
IO_APIC_REDIR_MASKED, NULL);
|
|
}
|
|
|
|
-static void __unmask_and_level_IO_APIC_irq(unsigned int irq)
|
|
+static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg)
|
|
{
|
|
- io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED,
|
|
+ io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED,
|
|
IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
|
|
}
|
|
#endif /* CONFIG_X86_32 */
|
|
|
|
-static void mask_IO_APIC_irq (unsigned int irq)
|
|
+static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
|
|
{
|
|
+ struct irq_cfg *cfg = desc->chip_data;
|
|
unsigned long flags;
|
|
|
|
+ BUG_ON(!cfg);
|
|
+
|
|
spin_lock_irqsave(&ioapic_lock, flags);
|
|
- __mask_IO_APIC_irq(irq);
|
|
+ __mask_IO_APIC_irq(cfg);
|
|
spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
}
|
|
|
|
-static void unmask_IO_APIC_irq (unsigned int irq)
|
|
+static void unmask_IO_APIC_irq_desc(struct irq_desc *desc)
|
|
{
|
|
+ struct irq_cfg *cfg = desc->chip_data;
|
|
unsigned long flags;
|
|
|
|
spin_lock_irqsave(&ioapic_lock, flags);
|
|
- __unmask_IO_APIC_irq(irq);
|
|
+ __unmask_IO_APIC_irq(cfg);
|
|
spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
}
|
|
|
|
+static void mask_IO_APIC_irq(unsigned int irq)
|
|
+{
|
|
+ struct irq_desc *desc = irq_to_desc(irq);
|
|
+
|
|
+ mask_IO_APIC_irq_desc(desc);
|
|
+}
|
|
+static void unmask_IO_APIC_irq(unsigned int irq)
|
|
+{
|
|
+ struct irq_desc *desc = irq_to_desc(irq);
|
|
+
|
|
+ unmask_IO_APIC_irq_desc(desc);
|
|
+}
|
|
+
|
|
static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
|
|
{
|
|
struct IO_APIC_route_entry entry;
|
|
@@ -614,6 +847,8 @@ void send_IPI_self(int vector)
|
|
apic_write(APIC_ICR, cfg);
|
|
}
|
|
#endif /* !CONFIG_SMP && CONFIG_X86_32*/
|
|
+#else
|
|
+#define add_pin_to_irq_cpu(cfg, cpu, apic, pin)
|
|
#endif /* !CONFIG_XEN */
|
|
|
|
#ifdef CONFIG_X86_32
|
|
@@ -854,7 +1089,7 @@ EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector
|
|
*/
|
|
static int EISA_ELCR(unsigned int irq)
|
|
{
|
|
- if (irq < 16) {
|
|
+ if (irq < NR_IRQS_LEGACY) {
|
|
unsigned int port = 0x4d0 + (irq >> 3);
|
|
return (inb(port) >> (irq & 7)) & 1;
|
|
}
|
|
@@ -1079,52 +1314,118 @@ void unlock_vector_lock(void)
|
|
{
|
|
spin_unlock(&vector_lock);
|
|
}
|
|
-#endif
|
|
|
|
-static int assign_irq_vector(int irq, cpumask_t mask)
|
|
+static int
|
|
+__assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
|
|
{
|
|
- struct physdev_irq irq_op;
|
|
- struct irq_cfg *cfg;
|
|
-
|
|
- if (irq < PIRQ_BASE || irq - PIRQ_BASE >= NR_PIRQS)
|
|
- return -EINVAL;
|
|
+ /*
|
|
+ * NOTE! The local APIC isn't very good at handling
|
|
+ * multiple interrupts at the same interrupt level.
|
|
+ * As the interrupt level is determined by taking the
|
|
+ * vector number and shifting that right by 4, we
|
|
+ * want to spread these out a bit so that they don't
|
|
+ * all fall in the same interrupt level.
|
|
+ *
|
|
+ * Also, we've got to be careful not to trash gate
|
|
+ * 0x80, because int 0x80 is hm, kind of importantish. ;)
|
|
+ */
|
|
+ static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0;
|
|
+ unsigned int old_vector;
|
|
+ int cpu, err;
|
|
+ cpumask_var_t tmp_mask;
|
|
+
|
|
+ if ((cfg->move_in_progress) || cfg->move_cleanup_count)
|
|
+ return -EBUSY;
|
|
+
|
|
+ if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
|
|
+ return -ENOMEM;
|
|
+
|
|
+ old_vector = cfg->vector;
|
|
+ if (old_vector) {
|
|
+ cpumask_and(tmp_mask, mask, cpu_online_mask);
|
|
+ cpumask_and(tmp_mask, cfg->domain, tmp_mask);
|
|
+ if (!cpumask_empty(tmp_mask)) {
|
|
+ free_cpumask_var(tmp_mask);
|
|
+ return 0;
|
|
+ }
|
|
+ }
|
|
|
|
- cfg = irq_cfg(irq);
|
|
+ /* Only try and allocate irqs on cpus that are present */
|
|
+ err = -ENOSPC;
|
|
+ for_each_cpu_and(cpu, mask, cpu_online_mask) {
|
|
+ int new_cpu;
|
|
+ int vector, offset;
|
|
+
|
|
+ vector_allocation_domain(cpu, tmp_mask);
|
|
+
|
|
+ vector = current_vector;
|
|
+ offset = current_offset;
|
|
+next:
|
|
+ vector += 8;
|
|
+ if (vector >= first_system_vector) {
|
|
+ /* If out of vectors on large boxen, must share them. */
|
|
+ offset = (offset + 1) % 8;
|
|
+ vector = FIRST_DEVICE_VECTOR + offset;
|
|
+ }
|
|
+ if (unlikely(current_vector == vector))
|
|
+ continue;
|
|
|
|
- if (cfg->vector)
|
|
- return 0;
|
|
+ if (test_bit(vector, used_vectors))
|
|
+ goto next;
|
|
|
|
- irq_op.irq = irq;
|
|
- if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op))
|
|
- return -ENOSPC;
|
|
+#ifdef CONFIG_KDB
|
|
+ if (vector == KDBENTER_VECTOR)
|
|
+ goto next;
|
|
+#endif /* CONFIG_KDB */
|
|
+ for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
|
|
+ if (per_cpu(vector_irq, new_cpu)[vector] != -1)
|
|
+ goto next;
|
|
+ /* Found one! */
|
|
+ current_vector = vector;
|
|
+ current_offset = offset;
|
|
+ if (old_vector) {
|
|
+ cfg->move_in_progress = 1;
|
|
+ cpumask_copy(cfg->old_domain, cfg->domain);
|
|
+ }
|
|
+ for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
|
|
+ per_cpu(vector_irq, new_cpu)[vector] = irq;
|
|
+ cfg->vector = vector;
|
|
+ cpumask_copy(cfg->domain, tmp_mask);
|
|
+ err = 0;
|
|
+ break;
|
|
+ }
|
|
+ free_cpumask_var(tmp_mask);
|
|
+ return err;
|
|
+}
|
|
|
|
- cfg->vector = irq_op.vector;
|
|
+static int
|
|
+assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
|
|
+{
|
|
+ int err;
|
|
+ unsigned long flags;
|
|
|
|
- return 0;
|
|
+ spin_lock_irqsave(&vector_lock, flags);
|
|
+ err = __assign_irq_vector(irq, cfg, mask);
|
|
+ spin_unlock_irqrestore(&vector_lock, flags);
|
|
+ return err;
|
|
}
|
|
|
|
-#ifndef CONFIG_XEN
|
|
-static void __clear_irq_vector(int irq)
|
|
+static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
|
|
{
|
|
- struct irq_cfg *cfg;
|
|
- cpumask_t mask;
|
|
int cpu, vector;
|
|
|
|
- cfg = irq_cfg(irq);
|
|
BUG_ON(!cfg->vector);
|
|
|
|
vector = cfg->vector;
|
|
- cpus_and(mask, cfg->domain, cpu_online_map);
|
|
- for_each_cpu_mask_nr(cpu, mask)
|
|
+ for_each_cpu_and(cpu, cfg->domain, cpu_online_mask)
|
|
per_cpu(vector_irq, cpu)[vector] = -1;
|
|
|
|
cfg->vector = 0;
|
|
- cpus_clear(cfg->domain);
|
|
+ cpumask_clear(cfg->domain);
|
|
|
|
if (likely(!cfg->move_in_progress))
|
|
return;
|
|
- cpus_and(mask, cfg->old_domain, cpu_online_map);
|
|
- for_each_cpu_mask_nr(cpu, mask) {
|
|
+ for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) {
|
|
for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
|
|
vector++) {
|
|
if (per_cpu(vector_irq, cpu)[vector] != irq)
|
|
@@ -1142,10 +1443,12 @@ void __setup_vector_irq(int cpu)
|
|
/* This function must be called with vector_lock held */
|
|
int irq, vector;
|
|
struct irq_cfg *cfg;
|
|
+ struct irq_desc *desc;
|
|
|
|
/* Mark the inuse vectors */
|
|
- for_each_irq_cfg(irq, cfg) {
|
|
- if (!cpu_isset(cpu, cfg->domain))
|
|
+ for_each_irq_desc(irq, desc) {
|
|
+ cfg = desc->chip_data;
|
|
+ if (!cpumask_test_cpu(cpu, cfg->domain))
|
|
continue;
|
|
vector = cfg->vector;
|
|
per_cpu(vector_irq, cpu)[vector] = irq;
|
|
@@ -1157,7 +1460,7 @@ void __setup_vector_irq(int cpu)
|
|
continue;
|
|
|
|
cfg = irq_cfg(irq);
|
|
- if (!cpu_isset(cpu, cfg->domain))
|
|
+ if (!cpumask_test_cpu(cpu, cfg->domain))
|
|
per_cpu(vector_irq, cpu)[vector] = -1;
|
|
}
|
|
}
|
|
@@ -1195,11 +1498,8 @@ static inline int IO_APIC_irq_trigger(in
|
|
}
|
|
#endif
|
|
|
|
-static void ioapic_register_intr(int irq, unsigned long trigger)
|
|
+static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long trigger)
|
|
{
|
|
- struct irq_desc *desc;
|
|
-
|
|
- desc = irq_to_desc(irq);
|
|
|
|
if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
|
|
trigger == IOAPIC_LEVEL)
|
|
@@ -1230,8 +1530,8 @@ static void ioapic_register_intr(int irq
|
|
handle_edge_irq, "edge");
|
|
}
|
|
#else /* !CONFIG_XEN */
|
|
-#define __clear_irq_vector(irq) ((void)(irq))
|
|
-#define ioapic_register_intr(irq, trigger) evtchn_register_pirq(irq)
|
|
+#define __clear_irq_vector(irq, cfg) ((void)0)
|
|
+#define ioapic_register_intr(irq, desc, trigger) evtchn_register_pirq(irq)
|
|
#endif
|
|
|
|
static int setup_ioapic_entry(int apic, int irq,
|
|
@@ -1295,24 +1595,25 @@ static int setup_ioapic_entry(int apic,
|
|
return 0;
|
|
}
|
|
|
|
-static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
|
|
+static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_desc *desc,
|
|
int trigger, int polarity)
|
|
{
|
|
struct irq_cfg *cfg;
|
|
struct IO_APIC_route_entry entry;
|
|
- cpumask_t mask;
|
|
+ unsigned int dest;
|
|
|
|
if (!IO_APIC_IRQ(irq))
|
|
return;
|
|
|
|
- cfg = irq_cfg(irq);
|
|
+ cfg = desc->chip_data;
|
|
|
|
- mask = TARGET_CPUS;
|
|
- if (assign_irq_vector(irq, mask))
|
|
+ if (assign_irq_vector(irq, cfg, TARGET_CPUS))
|
|
return;
|
|
|
|
#ifndef CONFIG_XEN
|
|
- cpus_and(mask, cfg->domain, mask);
|
|
+ dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
|
|
+#else
|
|
+ dest = cpu_mask_to_apicid(TARGET_CPUS);
|
|
#endif
|
|
|
|
apic_printk(APIC_VERBOSE,KERN_DEBUG
|
|
@@ -1323,16 +1624,15 @@ static void setup_IO_APIC_irq(int apic,
|
|
|
|
|
|
if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
|
|
- cpu_mask_to_apicid(mask), trigger, polarity,
|
|
- cfg->vector)) {
|
|
+ dest, trigger, polarity, cfg->vector)) {
|
|
printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
|
|
mp_ioapics[apic].mp_apicid, pin);
|
|
- __clear_irq_vector(irq);
|
|
+ __clear_irq_vector(irq, cfg);
|
|
return;
|
|
}
|
|
|
|
- ioapic_register_intr(irq, trigger);
|
|
- if (irq < 16)
|
|
+ ioapic_register_intr(irq, desc, trigger);
|
|
+ if (irq < NR_IRQS_LEGACY)
|
|
disable_8259A_irq(irq);
|
|
|
|
ioapic_write_entry(apic, pin, entry);
|
|
@@ -1342,6 +1642,9 @@ static void __init setup_IO_APIC_irqs(vo
|
|
{
|
|
int apic, pin, idx, irq;
|
|
int notcon = 0;
|
|
+ struct irq_desc *desc;
|
|
+ struct irq_cfg *cfg;
|
|
+ int cpu = boot_cpu_id;
|
|
|
|
apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
|
|
|
|
@@ -1376,9 +1679,15 @@ static void __init setup_IO_APIC_irqs(vo
|
|
if (multi_timer_check(apic, irq))
|
|
continue;
|
|
#endif
|
|
- add_pin_to_irq(irq, apic, pin);
|
|
+ desc = irq_to_desc_alloc_cpu(irq, cpu);
|
|
+ if (!desc) {
|
|
+ printk(KERN_INFO "can not get irq_desc for %d\n", irq);
|
|
+ continue;
|
|
+ }
|
|
+ cfg = desc->chip_data;
|
|
+ add_pin_to_irq_cpu(cfg, cpu, apic, pin);
|
|
|
|
- setup_IO_APIC_irq(apic, pin, irq,
|
|
+ setup_IO_APIC_irq(apic, pin, irq, desc,
|
|
irq_trigger(idx), irq_polarity(idx));
|
|
}
|
|
}
|
|
@@ -1438,6 +1747,7 @@ __apicdebuginit(void) print_IO_APIC(void
|
|
union IO_APIC_reg_03 reg_03;
|
|
unsigned long flags;
|
|
struct irq_cfg *cfg;
|
|
+ struct irq_desc *desc;
|
|
unsigned int irq;
|
|
|
|
if (apic_verbosity == APIC_QUIET)
|
|
@@ -1527,8 +1837,11 @@ __apicdebuginit(void) print_IO_APIC(void
|
|
}
|
|
}
|
|
printk(KERN_DEBUG "IRQ to pin mappings:\n");
|
|
- for_each_irq_cfg(irq, cfg) {
|
|
- struct irq_pin_list *entry = cfg->irq_2_pin;
|
|
+ for_each_irq_desc(irq, desc) {
|
|
+ struct irq_pin_list *entry;
|
|
+
|
|
+ cfg = desc->chip_data;
|
|
+ entry = cfg->irq_2_pin;
|
|
if (!entry)
|
|
continue;
|
|
printk(KERN_DEBUG "IRQ%d ", irq);
|
|
@@ -2018,14 +2331,16 @@ static unsigned int startup_ioapic_irq(u
|
|
{
|
|
int was_pending = 0;
|
|
unsigned long flags;
|
|
+ struct irq_cfg *cfg;
|
|
|
|
spin_lock_irqsave(&ioapic_lock, flags);
|
|
- if (irq < 16) {
|
|
+ if (irq < NR_IRQS_LEGACY) {
|
|
disable_8259A_irq(irq);
|
|
if (i8259A_irq_pending(irq))
|
|
was_pending = 1;
|
|
}
|
|
- __unmask_IO_APIC_irq(irq);
|
|
+ cfg = irq_cfg(irq);
|
|
+ __unmask_IO_APIC_irq(cfg);
|
|
spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
|
|
return was_pending;
|
|
@@ -2039,7 +2354,7 @@ static int ioapic_retrigger_irq(unsigned
|
|
unsigned long flags;
|
|
|
|
spin_lock_irqsave(&vector_lock, flags);
|
|
- send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector);
|
|
+ send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector);
|
|
spin_unlock_irqrestore(&vector_lock, flags);
|
|
|
|
return 1;
|
|
@@ -2088,35 +2403,35 @@ static DECLARE_DELAYED_WORK(ir_migration
|
|
* as simple as edge triggered migration and we can do the irq migration
|
|
* with a simple atomic update to IO-APIC RTE.
|
|
*/
|
|
-static void migrate_ioapic_irq(int irq, cpumask_t mask)
|
|
+static void
|
|
+migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
|
|
{
|
|
struct irq_cfg *cfg;
|
|
- struct irq_desc *desc;
|
|
- cpumask_t tmp, cleanup_mask;
|
|
struct irte irte;
|
|
int modify_ioapic_rte;
|
|
unsigned int dest;
|
|
unsigned long flags;
|
|
+ unsigned int irq;
|
|
|
|
- cpus_and(tmp, mask, cpu_online_map);
|
|
- if (cpus_empty(tmp))
|
|
+ if (!cpumask_intersects(mask, cpu_online_mask))
|
|
return;
|
|
|
|
+ irq = desc->irq;
|
|
if (get_irte(irq, &irte))
|
|
return;
|
|
|
|
- if (assign_irq_vector(irq, mask))
|
|
+ cfg = desc->chip_data;
|
|
+ if (assign_irq_vector(irq, cfg, mask))
|
|
return;
|
|
|
|
- cfg = irq_cfg(irq);
|
|
- cpus_and(tmp, cfg->domain, mask);
|
|
- dest = cpu_mask_to_apicid(tmp);
|
|
+ set_extra_move_desc(desc, mask);
|
|
+
|
|
+ dest = cpu_mask_to_apicid_and(cfg->domain, mask);
|
|
|
|
- desc = irq_to_desc(irq);
|
|
modify_ioapic_rte = desc->status & IRQ_LEVEL;
|
|
if (modify_ioapic_rte) {
|
|
spin_lock_irqsave(&ioapic_lock, flags);
|
|
- __target_IO_APIC_irq(irq, dest, cfg->vector);
|
|
+ __target_IO_APIC_irq(irq, dest, cfg);
|
|
spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
}
|
|
|
|
@@ -2128,24 +2443,20 @@ static void migrate_ioapic_irq(int irq,
|
|
*/
|
|
modify_irte(irq, &irte);
|
|
|
|
- if (cfg->move_in_progress) {
|
|
- cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
|
|
- cfg->move_cleanup_count = cpus_weight(cleanup_mask);
|
|
- send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
|
|
- cfg->move_in_progress = 0;
|
|
- }
|
|
+ if (cfg->move_in_progress)
|
|
+ send_cleanup_vector(cfg);
|
|
|
|
- desc->affinity = mask;
|
|
+ cpumask_copy(&desc->affinity, mask);
|
|
}
|
|
|
|
-static int migrate_irq_remapped_level(int irq)
|
|
+static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
|
|
{
|
|
int ret = -1;
|
|
- struct irq_desc *desc = irq_to_desc(irq);
|
|
+ struct irq_cfg *cfg = desc->chip_data;
|
|
|
|
- mask_IO_APIC_irq(irq);
|
|
+ mask_IO_APIC_irq_desc(desc);
|
|
|
|
- if (io_apic_level_ack_pending(irq)) {
|
|
+ if (io_apic_level_ack_pending(cfg)) {
|
|
/*
|
|
* Interrupt in progress. Migrating irq now will change the
|
|
* vector information in the IO-APIC RTE and that will confuse
|
|
@@ -2157,14 +2468,15 @@ static int migrate_irq_remapped_level(in
|
|
}
|
|
|
|
/* everthing is clear. we have right of way */
|
|
- migrate_ioapic_irq(irq, desc->pending_mask);
|
|
+ migrate_ioapic_irq_desc(desc, &desc->pending_mask);
|
|
|
|
ret = 0;
|
|
desc->status &= ~IRQ_MOVE_PENDING;
|
|
- cpus_clear(desc->pending_mask);
|
|
+ cpumask_clear(&desc->pending_mask);
|
|
|
|
unmask:
|
|
- unmask_IO_APIC_irq(irq);
|
|
+ unmask_IO_APIC_irq_desc(desc);
|
|
+
|
|
return ret;
|
|
}
|
|
|
|
@@ -2185,7 +2497,7 @@ static void ir_irq_migration(struct work
|
|
continue;
|
|
}
|
|
|
|
- desc->chip->set_affinity(irq, desc->pending_mask);
|
|
+ desc->chip->set_affinity(irq, &desc->pending_mask);
|
|
spin_unlock_irqrestore(&desc->lock, flags);
|
|
}
|
|
}
|
|
@@ -2194,28 +2506,33 @@ static void ir_irq_migration(struct work
|
|
/*
|
|
* Migrates the IRQ destination in the process context.
|
|
*/
|
|
-static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
|
|
+static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
|
|
+ const struct cpumask *mask)
|
|
{
|
|
- struct irq_desc *desc = irq_to_desc(irq);
|
|
-
|
|
if (desc->status & IRQ_LEVEL) {
|
|
desc->status |= IRQ_MOVE_PENDING;
|
|
- desc->pending_mask = mask;
|
|
- migrate_irq_remapped_level(irq);
|
|
+ cpumask_copy(&desc->pending_mask, mask);
|
|
+ migrate_irq_remapped_level_desc(desc);
|
|
return;
|
|
}
|
|
|
|
- migrate_ioapic_irq(irq, mask);
|
|
+ migrate_ioapic_irq_desc(desc, mask);
|
|
+}
|
|
+static void set_ir_ioapic_affinity_irq(unsigned int irq,
|
|
+ const struct cpumask *mask)
|
|
+{
|
|
+ struct irq_desc *desc = irq_to_desc(irq);
|
|
+
|
|
+ set_ir_ioapic_affinity_irq_desc(desc, mask);
|
|
}
|
|
#endif
|
|
|
|
asmlinkage void smp_irq_move_cleanup_interrupt(void)
|
|
{
|
|
unsigned vector, me;
|
|
+
|
|
ack_APIC_irq();
|
|
-#ifdef CONFIG_X86_64
|
|
exit_idle();
|
|
-#endif
|
|
irq_enter();
|
|
|
|
me = smp_processor_id();
|
|
@@ -2225,6 +2542,9 @@ asmlinkage void smp_irq_move_cleanup_int
|
|
struct irq_cfg *cfg;
|
|
irq = __get_cpu_var(vector_irq)[vector];
|
|
|
|
+ if (irq == -1)
|
|
+ continue;
|
|
+
|
|
desc = irq_to_desc(irq);
|
|
if (!desc)
|
|
continue;
|
|
@@ -2234,7 +2554,7 @@ asmlinkage void smp_irq_move_cleanup_int
|
|
if (!cfg->move_cleanup_count)
|
|
goto unlock;
|
|
|
|
- if ((vector == cfg->vector) && cpu_isset(me, cfg->domain))
|
|
+ if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
|
|
goto unlock;
|
|
|
|
__get_cpu_var(vector_irq)[vector] = -1;
|
|
@@ -2246,28 +2566,45 @@ unlock:
|
|
irq_exit();
|
|
}
|
|
|
|
-static void irq_complete_move(unsigned int irq)
|
|
+static void irq_complete_move(struct irq_desc **descp)
|
|
{
|
|
- struct irq_cfg *cfg = irq_cfg(irq);
|
|
+ struct irq_desc *desc = *descp;
|
|
+ struct irq_cfg *cfg = desc->chip_data;
|
|
unsigned vector, me;
|
|
|
|
- if (likely(!cfg->move_in_progress))
|
|
+ if (likely(!cfg->move_in_progress)) {
|
|
+#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
|
|
+ if (likely(!cfg->move_desc_pending))
|
|
+ return;
|
|
+
|
|
+ /* domain has not changed, but affinity did */
|
|
+ me = smp_processor_id();
|
|
+ if (cpu_isset(me, desc->affinity)) {
|
|
+ *descp = desc = move_irq_desc(desc, me);
|
|
+ /* get the new one */
|
|
+ cfg = desc->chip_data;
|
|
+ cfg->move_desc_pending = 0;
|
|
+ }
|
|
+#endif
|
|
return;
|
|
+ }
|
|
|
|
vector = ~get_irq_regs()->orig_ax;
|
|
me = smp_processor_id();
|
|
- if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
|
|
- cpumask_t cleanup_mask;
|
|
|
|
- cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
|
|
- cfg->move_cleanup_count = cpus_weight(cleanup_mask);
|
|
- send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
|
|
- cfg->move_in_progress = 0;
|
|
+ if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) {
|
|
+#ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC
|
|
+ *descp = desc = move_irq_desc(desc, me);
|
|
+ /* get the new one */
|
|
+ cfg = desc->chip_data;
|
|
+#endif
|
|
+ send_cleanup_vector(cfg);
|
|
}
|
|
}
|
|
#else
|
|
-static inline void irq_complete_move(unsigned int irq) {}
|
|
+static inline void irq_complete_move(struct irq_desc **descp) {}
|
|
#endif
|
|
+
|
|
#ifdef CONFIG_INTR_REMAP
|
|
static void ack_x2apic_level(unsigned int irq)
|
|
{
|
|
@@ -2278,11 +2615,14 @@ static void ack_x2apic_edge(unsigned int
|
|
{
|
|
ack_x2APIC_irq();
|
|
}
|
|
+
|
|
#endif
|
|
|
|
static void ack_apic_edge(unsigned int irq)
|
|
{
|
|
- irq_complete_move(irq);
|
|
+ struct irq_desc *desc = irq_to_desc(irq);
|
|
+
|
|
+ irq_complete_move(&desc);
|
|
move_native_irq(irq);
|
|
ack_APIC_irq();
|
|
}
|
|
@@ -2291,18 +2631,21 @@ atomic_t irq_mis_count;
|
|
|
|
static void ack_apic_level(unsigned int irq)
|
|
{
|
|
+ struct irq_desc *desc = irq_to_desc(irq);
|
|
+
|
|
#ifdef CONFIG_X86_32
|
|
unsigned long v;
|
|
int i;
|
|
#endif
|
|
+ struct irq_cfg *cfg;
|
|
int do_unmask_irq = 0;
|
|
|
|
- irq_complete_move(irq);
|
|
+ irq_complete_move(&desc);
|
|
#ifdef CONFIG_GENERIC_PENDING_IRQ
|
|
/* If we are moving the irq we need to mask it */
|
|
- if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
|
|
+ if (unlikely(desc->status & IRQ_MOVE_PENDING)) {
|
|
do_unmask_irq = 1;
|
|
- mask_IO_APIC_irq(irq);
|
|
+ mask_IO_APIC_irq_desc(desc);
|
|
}
|
|
#endif
|
|
|
|
@@ -2326,7 +2669,8 @@ static void ack_apic_level(unsigned int
|
|
* operation to prevent an edge-triggered interrupt escaping meanwhile.
|
|
* The idea is from Manfred Spraul. --macro
|
|
*/
|
|
- i = irq_cfg(irq)->vector;
|
|
+ cfg = desc->chip_data;
|
|
+ i = cfg->vector;
|
|
|
|
v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
|
|
#endif
|
|
@@ -2365,17 +2709,18 @@ static void ack_apic_level(unsigned int
|
|
* accurate and is causing problems then it is a hardware bug
|
|
* and you can go talk to the chipset vendor about it.
|
|
*/
|
|
- if (!io_apic_level_ack_pending(irq))
|
|
+ cfg = desc->chip_data;
|
|
+ if (!io_apic_level_ack_pending(cfg))
|
|
move_masked_irq(irq);
|
|
- unmask_IO_APIC_irq(irq);
|
|
+ unmask_IO_APIC_irq_desc(desc);
|
|
}
|
|
|
|
#ifdef CONFIG_X86_32
|
|
if (!(v & (1 << (i & 0x1f)))) {
|
|
atomic_inc(&irq_mis_count);
|
|
spin_lock(&ioapic_lock);
|
|
- __mask_and_edge_IO_APIC_irq(irq);
|
|
- __unmask_and_level_IO_APIC_irq(irq);
|
|
+ __mask_and_edge_IO_APIC_irq(cfg);
|
|
+ __unmask_and_level_IO_APIC_irq(cfg);
|
|
spin_unlock(&ioapic_lock);
|
|
}
|
|
#endif
|
|
@@ -2427,24 +2772,23 @@ static inline void init_IO_APIC_traps(vo
|
|
* Also, we've got to be careful not to trash gate
|
|
* 0x80, because int 0x80 is hm, kind of importantish. ;)
|
|
*/
|
|
- for_each_irq_cfg(irq, cfg) {
|
|
+ for_each_irq_desc(irq, desc) {
|
|
#ifdef CONFIG_XEN
|
|
if (irq < PIRQ_BASE || irq >= PIRQ_BASE + NR_PIRQS)
|
|
continue;
|
|
#endif
|
|
- if (IO_APIC_IRQ(irq) && !cfg->vector) {
|
|
+ cfg = desc->chip_data;
|
|
+ if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) {
|
|
/*
|
|
* Hmm.. We don't have an entry for this,
|
|
* so default to an old-fashioned 8259
|
|
* interrupt if we can..
|
|
*/
|
|
- if (irq < 16)
|
|
+ if (irq < NR_IRQS_LEGACY)
|
|
make_8259A_irq(irq);
|
|
- else {
|
|
- desc = irq_to_desc(irq);
|
|
+ else
|
|
/* Strange. Oh, well.. */
|
|
desc->chip = &no_irq_chip;
|
|
- }
|
|
}
|
|
}
|
|
}
|
|
@@ -2470,7 +2814,7 @@ static void unmask_lapic_irq(unsigned in
|
|
apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
|
|
}
|
|
|
|
-static void ack_lapic_irq (unsigned int irq)
|
|
+static void ack_lapic_irq(unsigned int irq)
|
|
{
|
|
ack_APIC_irq();
|
|
}
|
|
@@ -2482,11 +2826,8 @@ static struct irq_chip lapic_chip __read
|
|
.ack = ack_lapic_irq,
|
|
};
|
|
|
|
-static void lapic_register_intr(int irq)
|
|
+static void lapic_register_intr(int irq, struct irq_desc *desc)
|
|
{
|
|
- struct irq_desc *desc;
|
|
-
|
|
- desc = irq_to_desc(irq);
|
|
desc->status &= ~IRQ_LEVEL;
|
|
set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
|
|
"edge");
|
|
@@ -2590,7 +2931,9 @@ int timer_through_8259 __initdata;
|
|
*/
|
|
static inline void __init check_timer(void)
|
|
{
|
|
- struct irq_cfg *cfg = irq_cfg(0);
|
|
+ struct irq_desc *desc = irq_to_desc(0);
|
|
+ struct irq_cfg *cfg = desc->chip_data;
|
|
+ int cpu = boot_cpu_id;
|
|
int apic1, pin1, apic2, pin2;
|
|
unsigned long flags;
|
|
unsigned int ver;
|
|
@@ -2605,7 +2948,7 @@ static inline void __init check_timer(vo
|
|
* get/set the timer IRQ vector:
|
|
*/
|
|
disable_8259A_irq(0);
|
|
- assign_irq_vector(0, TARGET_CPUS);
|
|
+ assign_irq_vector(0, cfg, TARGET_CPUS);
|
|
|
|
/*
|
|
* As IRQ0 is to be enabled in the 8259A, the virtual
|
|
@@ -2656,10 +2999,10 @@ static inline void __init check_timer(vo
|
|
* Ok, does IRQ0 through the IOAPIC work?
|
|
*/
|
|
if (no_pin1) {
|
|
- add_pin_to_irq(0, apic1, pin1);
|
|
+ add_pin_to_irq_cpu(cfg, cpu, apic1, pin1);
|
|
setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
|
|
}
|
|
- unmask_IO_APIC_irq(0);
|
|
+ unmask_IO_APIC_irq_desc(desc);
|
|
if (timer_irq_works()) {
|
|
if (nmi_watchdog == NMI_IO_APIC) {
|
|
setup_nmi();
|
|
@@ -2685,9 +3028,9 @@ static inline void __init check_timer(vo
|
|
/*
|
|
* legacy devices should be connected to IO APIC #0
|
|
*/
|
|
- replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
|
|
+ replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2);
|
|
setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
|
|
- unmask_IO_APIC_irq(0);
|
|
+ unmask_IO_APIC_irq_desc(desc);
|
|
enable_8259A_irq(0);
|
|
if (timer_irq_works()) {
|
|
apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
|
|
@@ -2719,7 +3062,7 @@ static inline void __init check_timer(vo
|
|
apic_printk(APIC_QUIET, KERN_INFO
|
|
"...trying to set up timer as Virtual Wire IRQ...\n");
|
|
|
|
- lapic_register_intr(0);
|
|
+ lapic_register_intr(0, desc);
|
|
apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */
|
|
enable_8259A_irq(0);
|
|
|
|
@@ -2918,22 +3261,26 @@ unsigned int create_irq_nr(unsigned int
|
|
unsigned int irq;
|
|
unsigned int new;
|
|
unsigned long flags;
|
|
- struct irq_cfg *cfg_new;
|
|
-
|
|
- irq_want = nr_irqs - 1;
|
|
+ struct irq_cfg *cfg_new = NULL;
|
|
+ int cpu = boot_cpu_id;
|
|
+ struct irq_desc *desc_new = NULL;
|
|
|
|
irq = 0;
|
|
spin_lock_irqsave(&vector_lock, flags);
|
|
- for (new = irq_want; new > 0; new--) {
|
|
+ for (new = irq_want; new < NR_IRQS; new++) {
|
|
if (platform_legacy_irq(new))
|
|
continue;
|
|
- cfg_new = irq_cfg(new);
|
|
- if (cfg_new && cfg_new->vector != 0)
|
|
+
|
|
+ desc_new = irq_to_desc_alloc_cpu(new, cpu);
|
|
+ if (!desc_new) {
|
|
+ printk(KERN_INFO "can not get irq_desc for %d\n", new);
|
|
+ continue;
|
|
+ }
|
|
+ cfg_new = desc_new->chip_data;
|
|
+
|
|
+ if (cfg_new->vector != 0)
|
|
continue;
|
|
- /* check if need to create one */
|
|
- if (!cfg_new)
|
|
- cfg_new = irq_cfg_alloc(new);
|
|
- if (__assign_irq_vector(new, TARGET_CPUS) == 0)
|
|
+ if (__assign_irq_vector(new, cfg_new, TARGET_CPUS) == 0)
|
|
irq = new;
|
|
break;
|
|
}
|
|
@@ -2941,15 +3288,21 @@ unsigned int create_irq_nr(unsigned int
|
|
|
|
if (irq > 0) {
|
|
dynamic_irq_init(irq);
|
|
+ /* restore it, in case dynamic_irq_init clear it */
|
|
+ if (desc_new)
|
|
+ desc_new->chip_data = cfg_new;
|
|
}
|
|
return irq;
|
|
}
|
|
|
|
+static int nr_irqs_gsi = NR_IRQS_LEGACY;
|
|
int create_irq(void)
|
|
{
|
|
+ unsigned int irq_want;
|
|
int irq;
|
|
|
|
- irq = create_irq_nr(nr_irqs - 1);
|
|
+ irq_want = nr_irqs_gsi;
|
|
+ irq = create_irq_nr(irq_want);
|
|
|
|
if (irq == 0)
|
|
irq = -1;
|
|
@@ -2960,14 +3313,22 @@ int create_irq(void)
|
|
void destroy_irq(unsigned int irq)
|
|
{
|
|
unsigned long flags;
|
|
+ struct irq_cfg *cfg;
|
|
+ struct irq_desc *desc;
|
|
|
|
+ /* store it, in case dynamic_irq_cleanup clear it */
|
|
+ desc = irq_to_desc(irq);
|
|
+ cfg = desc->chip_data;
|
|
dynamic_irq_cleanup(irq);
|
|
+ /* connect back irq_cfg */
|
|
+ if (desc)
|
|
+ desc->chip_data = cfg;
|
|
|
|
#ifdef CONFIG_INTR_REMAP
|
|
free_irte(irq);
|
|
#endif
|
|
spin_lock_irqsave(&vector_lock, flags);
|
|
- __clear_irq_vector(irq);
|
|
+ __clear_irq_vector(irq, cfg);
|
|
spin_unlock_irqrestore(&vector_lock, flags);
|
|
}
|
|
#endif /* !CONFIG_XEN */
|
|
@@ -2981,16 +3342,13 @@ static int msi_compose_msg(struct pci_de
|
|
struct irq_cfg *cfg;
|
|
int err;
|
|
unsigned dest;
|
|
- cpumask_t tmp;
|
|
|
|
- tmp = TARGET_CPUS;
|
|
- err = assign_irq_vector(irq, tmp);
|
|
+ cfg = irq_cfg(irq);
|
|
+ err = assign_irq_vector(irq, cfg, TARGET_CPUS);
|
|
if (err)
|
|
return err;
|
|
|
|
- cfg = irq_cfg(irq);
|
|
- cpus_and(tmp, cfg->domain, tmp);
|
|
- dest = cpu_mask_to_apicid(tmp);
|
|
+ dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
|
|
|
|
#ifdef CONFIG_INTR_REMAP
|
|
if (irq_remapped(irq)) {
|
|
@@ -3044,64 +3402,48 @@ static int msi_compose_msg(struct pci_de
|
|
}
|
|
|
|
#ifdef CONFIG_SMP
|
|
-static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
|
|
+static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
|
|
{
|
|
+ struct irq_desc *desc = irq_to_desc(irq);
|
|
struct irq_cfg *cfg;
|
|
struct msi_msg msg;
|
|
unsigned int dest;
|
|
- cpumask_t tmp;
|
|
- struct irq_desc *desc;
|
|
|
|
- cpus_and(tmp, mask, cpu_online_map);
|
|
- if (cpus_empty(tmp))
|
|
+ dest = set_desc_affinity(desc, mask);
|
|
+ if (dest == BAD_APICID)
|
|
return;
|
|
|
|
- if (assign_irq_vector(irq, mask))
|
|
- return;
|
|
-
|
|
- cfg = irq_cfg(irq);
|
|
- cpus_and(tmp, cfg->domain, mask);
|
|
- dest = cpu_mask_to_apicid(tmp);
|
|
+ cfg = desc->chip_data;
|
|
|
|
- read_msi_msg(irq, &msg);
|
|
+ read_msi_msg_desc(desc, &msg);
|
|
|
|
msg.data &= ~MSI_DATA_VECTOR_MASK;
|
|
msg.data |= MSI_DATA_VECTOR(cfg->vector);
|
|
msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
|
|
msg.address_lo |= MSI_ADDR_DEST_ID(dest);
|
|
|
|
- write_msi_msg(irq, &msg);
|
|
- desc = irq_to_desc(irq);
|
|
- desc->affinity = mask;
|
|
+ write_msi_msg_desc(desc, &msg);
|
|
}
|
|
-
|
|
#ifdef CONFIG_INTR_REMAP
|
|
/*
|
|
* Migrate the MSI irq to another cpumask. This migration is
|
|
* done in the process context using interrupt-remapping hardware.
|
|
*/
|
|
-static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
|
|
+static void
|
|
+ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
|
|
{
|
|
- struct irq_cfg *cfg;
|
|
+ struct irq_desc *desc = irq_to_desc(irq);
|
|
+ struct irq_cfg *cfg = desc->chip_data;
|
|
unsigned int dest;
|
|
- cpumask_t tmp, cleanup_mask;
|
|
struct irte irte;
|
|
- struct irq_desc *desc;
|
|
-
|
|
- cpus_and(tmp, mask, cpu_online_map);
|
|
- if (cpus_empty(tmp))
|
|
- return;
|
|
|
|
if (get_irte(irq, &irte))
|
|
return;
|
|
|
|
- if (assign_irq_vector(irq, mask))
|
|
+ dest = set_desc_affinity(desc, mask);
|
|
+ if (dest == BAD_APICID)
|
|
return;
|
|
|
|
- cfg = irq_cfg(irq);
|
|
- cpus_and(tmp, cfg->domain, mask);
|
|
- dest = cpu_mask_to_apicid(tmp);
|
|
-
|
|
irte.vector = cfg->vector;
|
|
irte.dest_id = IRTE_DEST(dest);
|
|
|
|
@@ -3115,16 +3457,10 @@ static void ir_set_msi_irq_affinity(unsi
|
|
* at the new destination. So, time to cleanup the previous
|
|
* vector allocation.
|
|
*/
|
|
- if (cfg->move_in_progress) {
|
|
- cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
|
|
- cfg->move_cleanup_count = cpus_weight(cleanup_mask);
|
|
- send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
|
|
- cfg->move_in_progress = 0;
|
|
- }
|
|
-
|
|
- desc = irq_to_desc(irq);
|
|
- desc->affinity = mask;
|
|
+ if (cfg->move_in_progress)
|
|
+ send_cleanup_vector(cfg);
|
|
}
|
|
+
|
|
#endif
|
|
#endif /* CONFIG_SMP */
|
|
|
|
@@ -3183,7 +3519,7 @@ static int msi_alloc_irte(struct pci_dev
|
|
}
|
|
#endif
|
|
|
|
-static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
|
|
+static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
|
|
{
|
|
int ret;
|
|
struct msi_msg msg;
|
|
@@ -3192,7 +3528,7 @@ static int setup_msi_irq(struct pci_dev
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
- set_irq_msi(irq, desc);
|
|
+ set_irq_msi(irq, msidesc);
|
|
write_msi_msg(irq, &msg);
|
|
|
|
#ifdef CONFIG_INTR_REMAP
|
|
@@ -3212,26 +3548,13 @@ static int setup_msi_irq(struct pci_dev
|
|
return 0;
|
|
}
|
|
|
|
-static unsigned int build_irq_for_pci_dev(struct pci_dev *dev)
|
|
-{
|
|
- unsigned int irq;
|
|
-
|
|
- irq = dev->bus->number;
|
|
- irq <<= 8;
|
|
- irq |= dev->devfn;
|
|
- irq <<= 12;
|
|
-
|
|
- return irq;
|
|
-}
|
|
-
|
|
-int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
|
|
+int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc)
|
|
{
|
|
unsigned int irq;
|
|
int ret;
|
|
unsigned int irq_want;
|
|
|
|
- irq_want = build_irq_for_pci_dev(dev) + 0x100;
|
|
-
|
|
+ irq_want = nr_irqs_gsi;
|
|
irq = create_irq_nr(irq_want);
|
|
if (irq == 0)
|
|
return -1;
|
|
@@ -3245,7 +3568,7 @@ int arch_setup_msi_irq(struct pci_dev *d
|
|
goto error;
|
|
no_ir:
|
|
#endif
|
|
- ret = setup_msi_irq(dev, desc, irq);
|
|
+ ret = setup_msi_irq(dev, msidesc, irq);
|
|
if (ret < 0) {
|
|
destroy_irq(irq);
|
|
return ret;
|
|
@@ -3263,7 +3586,7 @@ int arch_setup_msi_irqs(struct pci_dev *
|
|
{
|
|
unsigned int irq;
|
|
int ret, sub_handle;
|
|
- struct msi_desc *desc;
|
|
+ struct msi_desc *msidesc;
|
|
unsigned int irq_want;
|
|
|
|
#ifdef CONFIG_INTR_REMAP
|
|
@@ -3271,10 +3594,11 @@ int arch_setup_msi_irqs(struct pci_dev *
|
|
int index = 0;
|
|
#endif
|
|
|
|
- irq_want = build_irq_for_pci_dev(dev) + 0x100;
|
|
+ irq_want = nr_irqs_gsi;
|
|
sub_handle = 0;
|
|
- list_for_each_entry(desc, &dev->msi_list, list) {
|
|
- irq = create_irq_nr(irq_want--);
|
|
+ list_for_each_entry(msidesc, &dev->msi_list, list) {
|
|
+ irq = create_irq_nr(irq_want);
|
|
+ irq_want++;
|
|
if (irq == 0)
|
|
return -1;
|
|
#ifdef CONFIG_INTR_REMAP
|
|
@@ -3306,7 +3630,7 @@ int arch_setup_msi_irqs(struct pci_dev *
|
|
}
|
|
no_ir:
|
|
#endif
|
|
- ret = setup_msi_irq(dev, desc, irq);
|
|
+ ret = setup_msi_irq(dev, msidesc, irq);
|
|
if (ret < 0)
|
|
goto error;
|
|
sub_handle++;
|
|
@@ -3325,24 +3649,18 @@ void arch_teardown_msi_irq(unsigned int
|
|
|
|
#ifdef CONFIG_DMAR
|
|
#ifdef CONFIG_SMP
|
|
-static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
|
|
+static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
|
|
{
|
|
+ struct irq_desc *desc = irq_to_desc(irq);
|
|
struct irq_cfg *cfg;
|
|
struct msi_msg msg;
|
|
unsigned int dest;
|
|
- cpumask_t tmp;
|
|
- struct irq_desc *desc;
|
|
|
|
- cpus_and(tmp, mask, cpu_online_map);
|
|
- if (cpus_empty(tmp))
|
|
+ dest = set_desc_affinity(desc, mask);
|
|
+ if (dest == BAD_APICID)
|
|
return;
|
|
|
|
- if (assign_irq_vector(irq, mask))
|
|
- return;
|
|
-
|
|
- cfg = irq_cfg(irq);
|
|
- cpus_and(tmp, cfg->domain, mask);
|
|
- dest = cpu_mask_to_apicid(tmp);
|
|
+ cfg = desc->chip_data;
|
|
|
|
dmar_msi_read(irq, &msg);
|
|
|
|
@@ -3352,9 +3670,8 @@ static void dmar_msi_set_affinity(unsign
|
|
msg.address_lo |= MSI_ADDR_DEST_ID(dest);
|
|
|
|
dmar_msi_write(irq, &msg);
|
|
- desc = irq_to_desc(irq);
|
|
- desc->affinity = mask;
|
|
}
|
|
+
|
|
#endif /* CONFIG_SMP */
|
|
|
|
struct irq_chip dmar_msi_type = {
|
|
@@ -3386,24 +3703,18 @@ int arch_setup_dmar_msi(unsigned int irq
|
|
#ifdef CONFIG_HPET_TIMER
|
|
|
|
#ifdef CONFIG_SMP
|
|
-static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
|
|
+static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
|
|
{
|
|
+ struct irq_desc *desc = irq_to_desc(irq);
|
|
struct irq_cfg *cfg;
|
|
- struct irq_desc *desc;
|
|
struct msi_msg msg;
|
|
unsigned int dest;
|
|
- cpumask_t tmp;
|
|
|
|
- cpus_and(tmp, mask, cpu_online_map);
|
|
- if (cpus_empty(tmp))
|
|
+ dest = set_desc_affinity(desc, mask);
|
|
+ if (dest == BAD_APICID)
|
|
return;
|
|
|
|
- if (assign_irq_vector(irq, mask))
|
|
- return;
|
|
-
|
|
- cfg = irq_cfg(irq);
|
|
- cpus_and(tmp, cfg->domain, mask);
|
|
- dest = cpu_mask_to_apicid(tmp);
|
|
+ cfg = desc->chip_data;
|
|
|
|
hpet_msi_read(irq, &msg);
|
|
|
|
@@ -3413,9 +3724,8 @@ static void hpet_msi_set_affinity(unsign
|
|
msg.address_lo |= MSI_ADDR_DEST_ID(dest);
|
|
|
|
hpet_msi_write(irq, &msg);
|
|
- desc = irq_to_desc(irq);
|
|
- desc->affinity = mask;
|
|
}
|
|
+
|
|
#endif /* CONFIG_SMP */
|
|
|
|
struct irq_chip hpet_msi_type = {
|
|
@@ -3468,28 +3778,21 @@ static void target_ht_irq(unsigned int i
|
|
write_ht_irq_msg(irq, &msg);
|
|
}
|
|
|
|
-static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
|
|
+static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask)
|
|
{
|
|
+ struct irq_desc *desc = irq_to_desc(irq);
|
|
struct irq_cfg *cfg;
|
|
unsigned int dest;
|
|
- cpumask_t tmp;
|
|
- struct irq_desc *desc;
|
|
|
|
- cpus_and(tmp, mask, cpu_online_map);
|
|
- if (cpus_empty(tmp))
|
|
+ dest = set_desc_affinity(desc, mask);
|
|
+ if (dest == BAD_APICID)
|
|
return;
|
|
|
|
- if (assign_irq_vector(irq, mask))
|
|
- return;
|
|
-
|
|
- cfg = irq_cfg(irq);
|
|
- cpus_and(tmp, cfg->domain, mask);
|
|
- dest = cpu_mask_to_apicid(tmp);
|
|
+ cfg = desc->chip_data;
|
|
|
|
target_ht_irq(irq, dest, cfg->vector);
|
|
- desc = irq_to_desc(irq);
|
|
- desc->affinity = mask;
|
|
}
|
|
+
|
|
#endif
|
|
|
|
static struct irq_chip ht_irq_chip = {
|
|
@@ -3507,17 +3810,14 @@ int arch_setup_ht_irq(unsigned int irq,
|
|
{
|
|
struct irq_cfg *cfg;
|
|
int err;
|
|
- cpumask_t tmp;
|
|
|
|
- tmp = TARGET_CPUS;
|
|
- err = assign_irq_vector(irq, tmp);
|
|
+ cfg = irq_cfg(irq);
|
|
+ err = assign_irq_vector(irq, cfg, TARGET_CPUS);
|
|
if (!err) {
|
|
struct ht_irq_msg msg;
|
|
unsigned dest;
|
|
|
|
- cfg = irq_cfg(irq);
|
|
- cpus_and(tmp, cfg->domain, tmp);
|
|
- dest = cpu_mask_to_apicid(tmp);
|
|
+ dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
|
|
|
|
msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
|
|
|
|
@@ -3553,7 +3853,7 @@ int arch_setup_ht_irq(unsigned int irq,
|
|
int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
|
|
unsigned long mmr_offset)
|
|
{
|
|
- const cpumask_t *eligible_cpu = get_cpu_mask(cpu);
|
|
+ const struct cpumask *eligible_cpu = cpumask_of(cpu);
|
|
struct irq_cfg *cfg;
|
|
int mmr_pnode;
|
|
unsigned long mmr_value;
|
|
@@ -3561,7 +3861,9 @@ int arch_enable_uv_irq(char *irq_name, u
|
|
unsigned long flags;
|
|
int err;
|
|
|
|
- err = assign_irq_vector(irq, *eligible_cpu);
|
|
+ cfg = irq_cfg(irq);
|
|
+
|
|
+ err = assign_irq_vector(irq, cfg, eligible_cpu);
|
|
if (err != 0)
|
|
return err;
|
|
|
|
@@ -3570,8 +3872,6 @@ int arch_enable_uv_irq(char *irq_name, u
|
|
irq_name);
|
|
spin_unlock_irqrestore(&vector_lock, flags);
|
|
|
|
- cfg = irq_cfg(irq);
|
|
-
|
|
mmr_value = 0;
|
|
entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
|
|
BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
|
|
@@ -3582,7 +3882,7 @@ int arch_enable_uv_irq(char *irq_name, u
|
|
entry->polarity = 0;
|
|
entry->trigger = 0;
|
|
entry->mask = 0;
|
|
- entry->dest = cpu_mask_to_apicid(*eligible_cpu);
|
|
+ entry->dest = cpu_mask_to_apicid(eligible_cpu);
|
|
|
|
mmr_pnode = uv_blade_to_pnode(mmr_blade);
|
|
uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
|
|
@@ -3623,10 +3923,29 @@ int __init io_apic_get_redir_entries (in
|
|
return reg_01.bits.entries;
|
|
}
|
|
|
|
-int __init probe_nr_irqs(void)
|
|
+#ifndef CONFIG_XEN
|
|
+void __init probe_nr_irqs_gsi(void)
|
|
{
|
|
- return NR_IRQS;
|
|
+ int nr = 0;
|
|
+
|
|
+ nr = acpi_probe_gsi();
|
|
+ if (nr > nr_irqs_gsi) {
|
|
+ nr_irqs_gsi = nr;
|
|
+ } else {
|
|
+ /* for acpi=off or acpi is not compiled in */
|
|
+ int idx;
|
|
+
|
|
+ nr = 0;
|
|
+ for (idx = 0; idx < nr_ioapics; idx++)
|
|
+ nr += io_apic_get_redir_entries(idx) + 1;
|
|
+
|
|
+ if (nr > nr_irqs_gsi)
|
|
+ nr_irqs_gsi = nr;
|
|
+ }
|
|
+
|
|
+ printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi);
|
|
}
|
|
+#endif
|
|
|
|
/* --------------------------------------------------------------------------
|
|
ACPI-based IOAPIC Configuration
|
|
@@ -3726,6 +4045,10 @@ int __init io_apic_get_version(int ioapi
|
|
|
|
int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
|
|
{
|
|
+ struct irq_desc *desc;
|
|
+ struct irq_cfg *cfg;
|
|
+ int cpu = boot_cpu_id;
|
|
+
|
|
#ifdef CONFIG_XEN
|
|
if (irq < PIRQ_BASE || irq >= PIRQ_BASE + NR_PIRQS) {
|
|
apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ %d\n",
|
|
@@ -3740,13 +4063,21 @@ int io_apic_set_pci_routing (int ioapic,
|
|
return -EINVAL;
|
|
}
|
|
|
|
+ desc = irq_to_desc_alloc_cpu(irq, cpu);
|
|
+ if (!desc) {
|
|
+ printk(KERN_INFO "can not get irq_desc %d\n", irq);
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
/*
|
|
* IRQs < 16 are already in the irq_2_pin[] map
|
|
*/
|
|
- if (irq >= 16)
|
|
- add_pin_to_irq(irq, ioapic, pin);
|
|
+ if (irq >= NR_IRQS_LEGACY) {
|
|
+ cfg = desc->chip_data;
|
|
+ add_pin_to_irq_cpu(cfg, cpu, ioapic, pin);
|
|
+ }
|
|
|
|
- setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity);
|
|
+ setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity);
|
|
|
|
return 0;
|
|
}
|
|
@@ -3785,7 +4116,7 @@ void __init setup_ioapic_dest(void)
|
|
int pin, ioapic, irq, irq_entry;
|
|
struct irq_desc *desc;
|
|
struct irq_cfg *cfg;
|
|
- cpumask_t mask;
|
|
+ const struct cpumask *mask;
|
|
|
|
if (skip_ioapic_setup == 1)
|
|
return;
|
|
@@ -3801,9 +4132,10 @@ void __init setup_ioapic_dest(void)
|
|
* when you have too many devices, because at that time only boot
|
|
* cpu is online.
|
|
*/
|
|
- cfg = irq_cfg(irq);
|
|
+ desc = irq_to_desc(irq);
|
|
+ cfg = desc->chip_data;
|
|
if (!cfg->vector) {
|
|
- setup_IO_APIC_irq(ioapic, pin, irq,
|
|
+ setup_IO_APIC_irq(ioapic, pin, irq, desc,
|
|
irq_trigger(irq_entry),
|
|
irq_polarity(irq_entry));
|
|
continue;
|
|
@@ -3813,19 +4145,18 @@ void __init setup_ioapic_dest(void)
|
|
/*
|
|
* Honour affinities which have been set in early boot
|
|
*/
|
|
- desc = irq_to_desc(irq);
|
|
if (desc->status &
|
|
(IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
|
|
- mask = desc->affinity;
|
|
+ mask = &desc->affinity;
|
|
else
|
|
mask = TARGET_CPUS;
|
|
|
|
#ifdef CONFIG_INTR_REMAP
|
|
if (intr_remapping_enabled)
|
|
- set_ir_ioapic_affinity_irq(irq, mask);
|
|
+ set_ir_ioapic_affinity_irq_desc(desc, mask);
|
|
else
|
|
#endif
|
|
- set_ioapic_affinity_irq(irq, mask);
|
|
+ set_ioapic_affinity_irq_desc(desc, mask);
|
|
}
|
|
|
|
}
|
|
@@ -3874,7 +4205,6 @@ void __init ioapic_init_mappings(void)
|
|
struct resource *ioapic_res;
|
|
int i;
|
|
|
|
- irq_2_pin_init();
|
|
ioapic_res = ioapic_setup_resources();
|
|
for (i = 0; i < nr_ioapics; i++) {
|
|
if (smp_found_config) {
|
|
--- head-2011-03-17.orig/arch/x86/kernel/ioport-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/ioport-xen.c 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -36,7 +36,7 @@ static void set_bitmap(unsigned long *bi
|
|
*/
|
|
asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
|
|
{
|
|
- struct thread_struct * t = ¤t->thread;
|
|
+ struct thread_struct *t = ¤t->thread;
|
|
struct physdev_set_iobitmap set_iobitmap;
|
|
|
|
if ((from + num <= from) || (from + num > IO_BITMAP_BITS))
|
|
--- head-2011-03-17.orig/arch/x86/kernel/apic/ipi-xen.c 2011-02-21 13:56:51.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/apic/ipi-xen.c 2011-02-21 13:56:59.000000000 +0100
|
|
@@ -40,21 +40,29 @@ void send_IPI_self(int vector)
|
|
__send_IPI_shortcut(APIC_DEST_SELF, vector);
|
|
}
|
|
|
|
-void send_IPI_mask_bitmask(cpumask_t cpumask, int vector)
|
|
+void send_IPI_mask_bitmask(const struct cpumask *cpumask, int vector)
|
|
{
|
|
- cpumask_t mask;
|
|
unsigned int cpu;
|
|
|
|
- cpus_andnot(mask, cpumask, cpu_online_map);
|
|
- WARN_ON(!cpus_empty(mask));
|
|
- for_each_online_cpu(cpu)
|
|
- if (cpu_isset(cpu, cpumask))
|
|
- __send_IPI_one(cpu, vector);
|
|
+ WARN_ON(!cpumask_subset(cpumask, cpu_online_mask));
|
|
+ for_each_cpu_and(cpu, cpumask, cpu_online_mask)
|
|
+ __send_IPI_one(cpu, vector);
|
|
}
|
|
|
|
-void send_IPI_mask_sequence(cpumask_t mask, int vector)
|
|
+void send_IPI_mask_sequence(const struct cpumask *mask, int vector)
|
|
{
|
|
send_IPI_mask_bitmask(mask, vector);
|
|
}
|
|
|
|
+void send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
|
|
+{
|
|
+ unsigned int query_cpu;
|
|
+ unsigned int this_cpu = smp_processor_id();
|
|
+
|
|
+ WARN_ON(!cpumask_subset(mask, cpu_online_mask));
|
|
+ for_each_cpu_and(query_cpu, mask, cpu_online_mask)
|
|
+ if (query_cpu != this_cpu)
|
|
+ __send_IPI_one(query_cpu, vector);
|
|
+}
|
|
+
|
|
#endif
|
|
--- head-2011-03-17.orig/arch/x86/kernel/irq-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/irq-xen.c 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -5,10 +5,11 @@
|
|
#include <linux/interrupt.h>
|
|
#include <linux/kernel_stat.h>
|
|
#include <linux/seq_file.h>
|
|
+#include <linux/smp.h>
|
|
|
|
#include <asm/apic.h>
|
|
#include <asm/io_apic.h>
|
|
-#include <asm/smp.h>
|
|
+#include <asm/irq.h>
|
|
|
|
atomic_t irq_err_count;
|
|
|
|
@@ -43,62 +44,62 @@ void ack_bad_irq(unsigned int irq)
|
|
/*
|
|
* /proc/interrupts printing:
|
|
*/
|
|
-static int show_other_interrupts(struct seq_file *p)
|
|
+static int show_other_interrupts(struct seq_file *p, int prec)
|
|
{
|
|
int j;
|
|
|
|
- seq_printf(p, "NMI: ");
|
|
+ seq_printf(p, "%*s: ", prec, "NMI");
|
|
for_each_online_cpu(j)
|
|
seq_printf(p, "%10u ", irq_stats(j)->__nmi_count);
|
|
seq_printf(p, " Non-maskable interrupts\n");
|
|
#ifdef CONFIG_X86_LOCAL_APIC
|
|
- seq_printf(p, "LOC: ");
|
|
+ seq_printf(p, "%*s: ", prec, "LOC");
|
|
for_each_online_cpu(j)
|
|
seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs);
|
|
seq_printf(p, " Local timer interrupts\n");
|
|
#endif
|
|
#ifdef CONFIG_SMP
|
|
- seq_printf(p, "RES: ");
|
|
+ seq_printf(p, "%*s: ", prec, "RES");
|
|
for_each_online_cpu(j)
|
|
seq_printf(p, "%10u ", irq_stats(j)->irq_resched_count);
|
|
seq_printf(p, " Rescheduling interrupts\n");
|
|
- seq_printf(p, "CAL: ");
|
|
+ seq_printf(p, "%*s: ", prec, "CAL");
|
|
for_each_online_cpu(j)
|
|
seq_printf(p, "%10u ", irq_stats(j)->irq_call_count);
|
|
seq_printf(p, " Function call interrupts\n");
|
|
#ifndef CONFIG_XEN
|
|
- seq_printf(p, "TLB: ");
|
|
+ seq_printf(p, "%*s: ", prec, "TLB");
|
|
for_each_online_cpu(j)
|
|
seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count);
|
|
seq_printf(p, " TLB shootdowns\n");
|
|
#else
|
|
- seq_printf(p, "LCK: ");
|
|
+ seq_printf(p, "%*s: ", prec, "LCK");
|
|
for_each_online_cpu(j)
|
|
seq_printf(p, "%10u ", irq_stats(j)->irq_lock_count);
|
|
seq_printf(p, " Spinlock wakeups\n");
|
|
#endif
|
|
#endif
|
|
#ifdef CONFIG_X86_MCE
|
|
- seq_printf(p, "TRM: ");
|
|
+ seq_printf(p, "%*s: ", prec, "TRM");
|
|
for_each_online_cpu(j)
|
|
seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count);
|
|
seq_printf(p, " Thermal event interrupts\n");
|
|
# ifdef CONFIG_X86_64
|
|
- seq_printf(p, "THR: ");
|
|
+ seq_printf(p, "%*s: ", prec, "THR");
|
|
for_each_online_cpu(j)
|
|
seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
|
|
seq_printf(p, " Threshold APIC interrupts\n");
|
|
# endif
|
|
#endif
|
|
#ifdef CONFIG_X86_LOCAL_APIC
|
|
- seq_printf(p, "SPU: ");
|
|
+ seq_printf(p, "%*s: ", prec, "SPU");
|
|
for_each_online_cpu(j)
|
|
seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
|
|
seq_printf(p, " Spurious interrupts\n");
|
|
#endif
|
|
- seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
|
|
+ seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
|
|
#if defined(CONFIG_X86_IO_APIC)
|
|
- seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
|
|
+ seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count));
|
|
#endif
|
|
return 0;
|
|
}
|
|
@@ -106,25 +107,31 @@ static int show_other_interrupts(struct
|
|
int show_interrupts(struct seq_file *p, void *v)
|
|
{
|
|
unsigned long flags, any_count = 0;
|
|
- int i = *(loff_t *) v, j;
|
|
+ int i = *(loff_t *) v, j, prec;
|
|
struct irqaction *action;
|
|
struct irq_desc *desc;
|
|
|
|
if (i > nr_irqs)
|
|
return 0;
|
|
|
|
+ for (prec = 3, j = 1000; prec < 10 && j <= nr_irqs; ++prec)
|
|
+ j *= 10;
|
|
+
|
|
if (i == nr_irqs)
|
|
- return show_other_interrupts(p);
|
|
+ return show_other_interrupts(p, prec);
|
|
|
|
/* print header */
|
|
if (i == 0) {
|
|
- seq_printf(p, " ");
|
|
+ seq_printf(p, "%*s", prec + 8, "");
|
|
for_each_online_cpu(j)
|
|
seq_printf(p, "CPU%-8d", j);
|
|
seq_putc(p, '\n');
|
|
}
|
|
|
|
desc = irq_to_desc(i);
|
|
+ if (!desc)
|
|
+ return 0;
|
|
+
|
|
spin_lock_irqsave(&desc->lock, flags);
|
|
#ifndef CONFIG_SMP
|
|
any_count = kstat_irqs(i);
|
|
@@ -136,7 +143,7 @@ int show_interrupts(struct seq_file *p,
|
|
if (!action && !any_count)
|
|
goto out;
|
|
|
|
- seq_printf(p, "%3d: ", i);
|
|
+ seq_printf(p, "%*d: ", prec, i);
|
|
#ifndef CONFIG_SMP
|
|
seq_printf(p, "%10u ", kstat_irqs(i));
|
|
#else
|
|
--- head-2011-03-17.orig/arch/x86/kernel/ldt-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/ldt-xen.c 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -12,8 +12,8 @@
|
|
#include <linux/mm.h>
|
|
#include <linux/smp.h>
|
|
#include <linux/vmalloc.h>
|
|
+#include <linux/uaccess.h>
|
|
|
|
-#include <asm/uaccess.h>
|
|
#include <asm/system.h>
|
|
#include <asm/ldt.h>
|
|
#include <asm/desc.h>
|
|
--- head-2011-03-17.orig/arch/x86/kernel/machine_kexec_32.c 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/machine_kexec_32.c 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -46,6 +46,17 @@ static int machine_kexec_alloc_page_tabl
|
|
{
|
|
image->arch.pgd = (pgd_t *)get_zeroed_page(GFP_KERNEL);
|
|
#ifdef CONFIG_X86_PAE
|
|
+#ifdef CONFIG_XEN /* machine address must fit into xki->page_list[PA_PGD] */
|
|
+ if (image->arch.pgd) {
|
|
+ struct page *pg = virt_to_page(image->arch.pgd);
|
|
+
|
|
+ if (xen_limit_pages_to_max_mfn(pg, 0, BITS_PER_LONG) < 0) {
|
|
+ image->arch.pgd = NULL;
|
|
+ __free_page(pg);
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+ }
|
|
+#endif
|
|
image->arch.pmd0 = (pmd_t *)get_zeroed_page(GFP_KERNEL);
|
|
image->arch.pmd1 = (pmd_t *)get_zeroed_page(GFP_KERNEL);
|
|
#endif
|
|
@@ -123,13 +134,7 @@ void machine_kexec_setup_load_arg(xen_ke
|
|
memcpy(control_page, relocate_kernel, PAGE_SIZE);
|
|
|
|
xki->page_list[PA_CONTROL_PAGE] = __ma(control_page);
|
|
- xki->page_list[PA_PGD] = __ma(kexec_pgd);
|
|
-#ifdef CONFIG_X86_PAE
|
|
- xki->page_list[PA_PMD_0] = __ma(kexec_pmd0);
|
|
- xki->page_list[PA_PMD_1] = __ma(kexec_pmd1);
|
|
-#endif
|
|
- xki->page_list[PA_PTE_0] = __ma(kexec_pte0);
|
|
- xki->page_list[PA_PTE_1] = __ma(kexec_pte1);
|
|
+ xki->page_list[PA_PGD] = __ma(image->arch.pgd);
|
|
|
|
if (image->type == KEXEC_TYPE_DEFAULT)
|
|
xki->page_list[PA_SWAP_PAGE] = page_to_phys(image->swap_page);
|
|
--- head-2011-03-17.orig/arch/x86/kernel/mpparse-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/mpparse-xen.c 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -2,7 +2,7 @@
|
|
* Intel Multiprocessor Specification 1.1 and 1.4
|
|
* compliant MP-table parsing routines.
|
|
*
|
|
- * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
|
|
+ * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk>
|
|
* (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
|
|
* (c) 2008 Alexey Starikovskiy <astarikovskiy@suse.de>
|
|
*/
|
|
@@ -16,18 +16,18 @@
|
|
#include <linux/bitops.h>
|
|
#include <linux/acpi.h>
|
|
#include <linux/module.h>
|
|
+#include <linux/smp.h>
|
|
|
|
-#include <asm/smp.h>
|
|
#include <asm/mtrr.h>
|
|
#include <asm/mpspec.h>
|
|
#include <asm/pgalloc.h>
|
|
#include <asm/io_apic.h>
|
|
#include <asm/proto.h>
|
|
-#include <asm/acpi.h>
|
|
#include <asm/bios_ebda.h>
|
|
#include <asm/e820.h>
|
|
#include <asm/trampoline.h>
|
|
#include <asm/setup.h>
|
|
+#include <asm/smp.h>
|
|
|
|
#include <mach_apic.h>
|
|
#ifdef CONFIG_X86_32
|
|
@@ -54,13 +54,13 @@ static int __init mpf_checksum(unsigned
|
|
return sum & 0xFF;
|
|
}
|
|
|
|
-static void __init MP_processor_info(struct mpc_config_processor *m)
|
|
+static void __init MP_processor_info(struct mpc_cpu *m)
|
|
{
|
|
#ifndef CONFIG_XEN
|
|
int apicid;
|
|
char *bootup_cpu = "";
|
|
|
|
- if (!(m->mpc_cpuflag & CPU_ENABLED)) {
|
|
+ if (!(m->cpuflag & CPU_ENABLED)) {
|
|
disabled_cpus++;
|
|
return;
|
|
}
|
|
@@ -68,57 +68,57 @@ static void __init MP_processor_info(str
|
|
if (x86_quirks->mpc_apic_id)
|
|
apicid = x86_quirks->mpc_apic_id(m);
|
|
else
|
|
- apicid = m->mpc_apicid;
|
|
+ apicid = m->apicid;
|
|
|
|
- if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
|
|
+ if (m->cpuflag & CPU_BOOTPROCESSOR) {
|
|
bootup_cpu = " (Bootup-CPU)";
|
|
- boot_cpu_physical_apicid = m->mpc_apicid;
|
|
+ boot_cpu_physical_apicid = m->apicid;
|
|
}
|
|
|
|
- printk(KERN_INFO "Processor #%d%s\n", m->mpc_apicid, bootup_cpu);
|
|
- generic_processor_info(apicid, m->mpc_apicver);
|
|
+ printk(KERN_INFO "Processor #%d%s\n", m->apicid, bootup_cpu);
|
|
+ generic_processor_info(apicid, m->apicver);
|
|
#else /* CONFIG_XEN */
|
|
num_processors++;
|
|
#endif
|
|
}
|
|
|
|
#ifdef CONFIG_X86_IO_APIC
|
|
-static void __init MP_bus_info(struct mpc_config_bus *m)
|
|
+static void __init MP_bus_info(struct mpc_bus *m)
|
|
{
|
|
char str[7];
|
|
- memcpy(str, m->mpc_bustype, 6);
|
|
+ memcpy(str, m->bustype, 6);
|
|
str[6] = 0;
|
|
|
|
if (x86_quirks->mpc_oem_bus_info)
|
|
x86_quirks->mpc_oem_bus_info(m, str);
|
|
else
|
|
- apic_printk(APIC_VERBOSE, "Bus #%d is %s\n", m->mpc_busid, str);
|
|
+ apic_printk(APIC_VERBOSE, "Bus #%d is %s\n", m->busid, str);
|
|
|
|
#if MAX_MP_BUSSES < 256
|
|
- if (m->mpc_busid >= MAX_MP_BUSSES) {
|
|
+ if (m->busid >= MAX_MP_BUSSES) {
|
|
printk(KERN_WARNING "MP table busid value (%d) for bustype %s "
|
|
" is too large, max. supported is %d\n",
|
|
- m->mpc_busid, str, MAX_MP_BUSSES - 1);
|
|
+ m->busid, str, MAX_MP_BUSSES - 1);
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) {
|
|
- set_bit(m->mpc_busid, mp_bus_not_pci);
|
|
-#if defined(CONFIG_EISA) || defined (CONFIG_MCA)
|
|
- mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
|
|
+ set_bit(m->busid, mp_bus_not_pci);
|
|
+#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
|
|
+ mp_bus_id_to_type[m->busid] = MP_BUS_ISA;
|
|
#endif
|
|
} else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) {
|
|
if (x86_quirks->mpc_oem_pci_bus)
|
|
x86_quirks->mpc_oem_pci_bus(m);
|
|
|
|
- clear_bit(m->mpc_busid, mp_bus_not_pci);
|
|
-#if defined(CONFIG_EISA) || defined (CONFIG_MCA)
|
|
- mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
|
|
+ clear_bit(m->busid, mp_bus_not_pci);
|
|
+#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
|
|
+ mp_bus_id_to_type[m->busid] = MP_BUS_PCI;
|
|
} else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) {
|
|
- mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
|
|
+ mp_bus_id_to_type[m->busid] = MP_BUS_EISA;
|
|
} else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA) - 1) == 0) {
|
|
- mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
|
|
+ mp_bus_id_to_type[m->busid] = MP_BUS_MCA;
|
|
#endif
|
|
} else
|
|
printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str);
|
|
@@ -142,32 +142,31 @@ static int bad_ioapic(unsigned long addr
|
|
return 0;
|
|
}
|
|
|
|
-static void __init MP_ioapic_info(struct mpc_config_ioapic *m)
|
|
+static void __init MP_ioapic_info(struct mpc_ioapic *m)
|
|
{
|
|
- if (!(m->mpc_flags & MPC_APIC_USABLE))
|
|
+ if (!(m->flags & MPC_APIC_USABLE))
|
|
return;
|
|
|
|
printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n",
|
|
- m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr);
|
|
+ m->apicid, m->apicver, m->apicaddr);
|
|
|
|
- if (bad_ioapic(m->mpc_apicaddr))
|
|
+ if (bad_ioapic(m->apicaddr))
|
|
return;
|
|
|
|
- mp_ioapics[nr_ioapics].mp_apicaddr = m->mpc_apicaddr;
|
|
- mp_ioapics[nr_ioapics].mp_apicid = m->mpc_apicid;
|
|
- mp_ioapics[nr_ioapics].mp_type = m->mpc_type;
|
|
- mp_ioapics[nr_ioapics].mp_apicver = m->mpc_apicver;
|
|
- mp_ioapics[nr_ioapics].mp_flags = m->mpc_flags;
|
|
+ mp_ioapics[nr_ioapics].mp_apicaddr = m->apicaddr;
|
|
+ mp_ioapics[nr_ioapics].mp_apicid = m->apicid;
|
|
+ mp_ioapics[nr_ioapics].mp_type = m->type;
|
|
+ mp_ioapics[nr_ioapics].mp_apicver = m->apicver;
|
|
+ mp_ioapics[nr_ioapics].mp_flags = m->flags;
|
|
nr_ioapics++;
|
|
}
|
|
|
|
-static void print_MP_intsrc_info(struct mpc_config_intsrc *m)
|
|
+static void print_MP_intsrc_info(struct mpc_intsrc *m)
|
|
{
|
|
apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
|
|
" IRQ %02x, APIC ID %x, APIC INT %02x\n",
|
|
- m->mpc_irqtype, m->mpc_irqflag & 3,
|
|
- (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
|
|
- m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
|
|
+ m->irqtype, m->irqflag & 3, (m->irqflag >> 2) & 3, m->srcbus,
|
|
+ m->srcbusirq, m->dstapic, m->dstirq);
|
|
}
|
|
|
|
static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq)
|
|
@@ -179,52 +178,52 @@ static void __init print_mp_irq_info(str
|
|
mp_irq->mp_srcbusirq, mp_irq->mp_dstapic, mp_irq->mp_dstirq);
|
|
}
|
|
|
|
-static void __init assign_to_mp_irq(struct mpc_config_intsrc *m,
|
|
+static void __init assign_to_mp_irq(struct mpc_intsrc *m,
|
|
struct mp_config_intsrc *mp_irq)
|
|
{
|
|
- mp_irq->mp_dstapic = m->mpc_dstapic;
|
|
- mp_irq->mp_type = m->mpc_type;
|
|
- mp_irq->mp_irqtype = m->mpc_irqtype;
|
|
- mp_irq->mp_irqflag = m->mpc_irqflag;
|
|
- mp_irq->mp_srcbus = m->mpc_srcbus;
|
|
- mp_irq->mp_srcbusirq = m->mpc_srcbusirq;
|
|
- mp_irq->mp_dstirq = m->mpc_dstirq;
|
|
+ mp_irq->mp_dstapic = m->dstapic;
|
|
+ mp_irq->mp_type = m->type;
|
|
+ mp_irq->mp_irqtype = m->irqtype;
|
|
+ mp_irq->mp_irqflag = m->irqflag;
|
|
+ mp_irq->mp_srcbus = m->srcbus;
|
|
+ mp_irq->mp_srcbusirq = m->srcbusirq;
|
|
+ mp_irq->mp_dstirq = m->dstirq;
|
|
}
|
|
|
|
static void __init assign_to_mpc_intsrc(struct mp_config_intsrc *mp_irq,
|
|
- struct mpc_config_intsrc *m)
|
|
+ struct mpc_intsrc *m)
|
|
{
|
|
- m->mpc_dstapic = mp_irq->mp_dstapic;
|
|
- m->mpc_type = mp_irq->mp_type;
|
|
- m->mpc_irqtype = mp_irq->mp_irqtype;
|
|
- m->mpc_irqflag = mp_irq->mp_irqflag;
|
|
- m->mpc_srcbus = mp_irq->mp_srcbus;
|
|
- m->mpc_srcbusirq = mp_irq->mp_srcbusirq;
|
|
- m->mpc_dstirq = mp_irq->mp_dstirq;
|
|
+ m->dstapic = mp_irq->mp_dstapic;
|
|
+ m->type = mp_irq->mp_type;
|
|
+ m->irqtype = mp_irq->mp_irqtype;
|
|
+ m->irqflag = mp_irq->mp_irqflag;
|
|
+ m->srcbus = mp_irq->mp_srcbus;
|
|
+ m->srcbusirq = mp_irq->mp_srcbusirq;
|
|
+ m->dstirq = mp_irq->mp_dstirq;
|
|
}
|
|
|
|
static int __init mp_irq_mpc_intsrc_cmp(struct mp_config_intsrc *mp_irq,
|
|
- struct mpc_config_intsrc *m)
|
|
+ struct mpc_intsrc *m)
|
|
{
|
|
- if (mp_irq->mp_dstapic != m->mpc_dstapic)
|
|
+ if (mp_irq->mp_dstapic != m->dstapic)
|
|
return 1;
|
|
- if (mp_irq->mp_type != m->mpc_type)
|
|
+ if (mp_irq->mp_type != m->type)
|
|
return 2;
|
|
- if (mp_irq->mp_irqtype != m->mpc_irqtype)
|
|
+ if (mp_irq->mp_irqtype != m->irqtype)
|
|
return 3;
|
|
- if (mp_irq->mp_irqflag != m->mpc_irqflag)
|
|
+ if (mp_irq->mp_irqflag != m->irqflag)
|
|
return 4;
|
|
- if (mp_irq->mp_srcbus != m->mpc_srcbus)
|
|
+ if (mp_irq->mp_srcbus != m->srcbus)
|
|
return 5;
|
|
- if (mp_irq->mp_srcbusirq != m->mpc_srcbusirq)
|
|
+ if (mp_irq->mp_srcbusirq != m->srcbusirq)
|
|
return 6;
|
|
- if (mp_irq->mp_dstirq != m->mpc_dstirq)
|
|
+ if (mp_irq->mp_dstirq != m->dstirq)
|
|
return 7;
|
|
|
|
return 0;
|
|
}
|
|
|
|
-static void __init MP_intsrc_info(struct mpc_config_intsrc *m)
|
|
+static void __init MP_intsrc_info(struct mpc_intsrc *m)
|
|
{
|
|
int i;
|
|
|
|
@@ -242,59 +241,57 @@ static void __init MP_intsrc_info(struct
|
|
|
|
#endif
|
|
|
|
-static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m)
|
|
+static void __init MP_lintsrc_info(struct mpc_lintsrc *m)
|
|
{
|
|
apic_printk(APIC_VERBOSE, "Lint: type %d, pol %d, trig %d, bus %02x,"
|
|
" IRQ %02x, APIC ID %x, APIC LINT %02x\n",
|
|
- m->mpc_irqtype, m->mpc_irqflag & 3,
|
|
- (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid,
|
|
- m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
|
|
+ m->irqtype, m->irqflag & 3, (m->irqflag >> 2) & 3, m->srcbusid,
|
|
+ m->srcbusirq, m->destapic, m->destapiclint);
|
|
}
|
|
|
|
/*
|
|
* Read/parse the MPC
|
|
*/
|
|
|
|
-static int __init smp_check_mpc(struct mp_config_table *mpc, char *oem,
|
|
- char *str)
|
|
+static int __init smp_check_mpc(struct mpc_table *mpc, char *oem, char *str)
|
|
{
|
|
|
|
- if (memcmp(mpc->mpc_signature, MPC_SIGNATURE, 4)) {
|
|
+ if (memcmp(mpc->signature, MPC_SIGNATURE, 4)) {
|
|
printk(KERN_ERR "MPTABLE: bad signature [%c%c%c%c]!\n",
|
|
- mpc->mpc_signature[0], mpc->mpc_signature[1],
|
|
- mpc->mpc_signature[2], mpc->mpc_signature[3]);
|
|
+ mpc->signature[0], mpc->signature[1],
|
|
+ mpc->signature[2], mpc->signature[3]);
|
|
return 0;
|
|
}
|
|
- if (mpf_checksum((unsigned char *)mpc, mpc->mpc_length)) {
|
|
+ if (mpf_checksum((unsigned char *)mpc, mpc->length)) {
|
|
printk(KERN_ERR "MPTABLE: checksum error!\n");
|
|
return 0;
|
|
}
|
|
- if (mpc->mpc_spec != 0x01 && mpc->mpc_spec != 0x04) {
|
|
+ if (mpc->spec != 0x01 && mpc->spec != 0x04) {
|
|
printk(KERN_ERR "MPTABLE: bad table version (%d)!!\n",
|
|
- mpc->mpc_spec);
|
|
+ mpc->spec);
|
|
return 0;
|
|
}
|
|
- if (!mpc->mpc_lapic) {
|
|
+ if (!mpc->lapic) {
|
|
printk(KERN_ERR "MPTABLE: null local APIC address!\n");
|
|
return 0;
|
|
}
|
|
- memcpy(oem, mpc->mpc_oem, 8);
|
|
+ memcpy(oem, mpc->oem, 8);
|
|
oem[8] = 0;
|
|
printk(KERN_INFO "MPTABLE: OEM ID: %s\n", oem);
|
|
|
|
- memcpy(str, mpc->mpc_productid, 12);
|
|
+ memcpy(str, mpc->productid, 12);
|
|
str[12] = 0;
|
|
|
|
printk(KERN_INFO "MPTABLE: Product ID: %s\n", str);
|
|
|
|
#ifndef CONFIG_XEN
|
|
- printk(KERN_INFO "MPTABLE: APIC at: 0x%X\n", mpc->mpc_lapic);
|
|
+ printk(KERN_INFO "MPTABLE: APIC at: 0x%X\n", mpc->lapic);
|
|
#endif
|
|
|
|
return 1;
|
|
}
|
|
|
|
-static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
|
|
+static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
|
|
{
|
|
char str[16];
|
|
char oem[10];
|
|
@@ -320,15 +317,15 @@ static int __init smp_read_mpc(struct mp
|
|
#ifndef CONFIG_XEN
|
|
/* save the local APIC address, it might be non-default */
|
|
if (!acpi_lapic)
|
|
- mp_lapic_addr = mpc->mpc_lapic;
|
|
+ mp_lapic_addr = mpc->lapic;
|
|
#endif
|
|
|
|
if (early)
|
|
return 1;
|
|
|
|
- if (mpc->mpc_oemptr && x86_quirks->smp_read_mpc_oem) {
|
|
- struct mp_config_oemtable *oem_table = (struct mp_config_oemtable *)(unsigned long)mpc->mpc_oemptr;
|
|
- x86_quirks->smp_read_mpc_oem(oem_table, mpc->mpc_oemsize);
|
|
+ if (mpc->oemptr && x86_quirks->smp_read_mpc_oem) {
|
|
+ struct mpc_oemtable *oem_table = (void *)(long)mpc->oemptr;
|
|
+ x86_quirks->smp_read_mpc_oem(oem_table, mpc->oemsize);
|
|
}
|
|
|
|
/*
|
|
@@ -337,12 +334,11 @@ static int __init smp_read_mpc(struct mp
|
|
if (x86_quirks->mpc_record)
|
|
*x86_quirks->mpc_record = 0;
|
|
|
|
- while (count < mpc->mpc_length) {
|
|
+ while (count < mpc->length) {
|
|
switch (*mpt) {
|
|
case MP_PROCESSOR:
|
|
{
|
|
- struct mpc_config_processor *m =
|
|
- (struct mpc_config_processor *)mpt;
|
|
+ struct mpc_cpu *m = (struct mpc_cpu *)mpt;
|
|
/* ACPI may have already provided this data */
|
|
if (!acpi_lapic)
|
|
MP_processor_info(m);
|
|
@@ -352,8 +348,7 @@ static int __init smp_read_mpc(struct mp
|
|
}
|
|
case MP_BUS:
|
|
{
|
|
- struct mpc_config_bus *m =
|
|
- (struct mpc_config_bus *)mpt;
|
|
+ struct mpc_bus *m = (struct mpc_bus *)mpt;
|
|
#ifdef CONFIG_X86_IO_APIC
|
|
MP_bus_info(m);
|
|
#endif
|
|
@@ -364,30 +359,28 @@ static int __init smp_read_mpc(struct mp
|
|
case MP_IOAPIC:
|
|
{
|
|
#ifdef CONFIG_X86_IO_APIC
|
|
- struct mpc_config_ioapic *m =
|
|
- (struct mpc_config_ioapic *)mpt;
|
|
+ struct mpc_ioapic *m = (struct mpc_ioapic *)mpt;
|
|
MP_ioapic_info(m);
|
|
#endif
|
|
- mpt += sizeof(struct mpc_config_ioapic);
|
|
- count += sizeof(struct mpc_config_ioapic);
|
|
+ mpt += sizeof(struct mpc_ioapic);
|
|
+ count += sizeof(struct mpc_ioapic);
|
|
break;
|
|
}
|
|
case MP_INTSRC:
|
|
{
|
|
#ifdef CONFIG_X86_IO_APIC
|
|
- struct mpc_config_intsrc *m =
|
|
- (struct mpc_config_intsrc *)mpt;
|
|
+ struct mpc_intsrc *m = (struct mpc_intsrc *)mpt;
|
|
|
|
MP_intsrc_info(m);
|
|
#endif
|
|
- mpt += sizeof(struct mpc_config_intsrc);
|
|
- count += sizeof(struct mpc_config_intsrc);
|
|
+ mpt += sizeof(struct mpc_intsrc);
|
|
+ count += sizeof(struct mpc_intsrc);
|
|
break;
|
|
}
|
|
case MP_LINTSRC:
|
|
{
|
|
- struct mpc_config_lintsrc *m =
|
|
- (struct mpc_config_lintsrc *)mpt;
|
|
+ struct mpc_lintsrc *m =
|
|
+ (struct mpc_lintsrc *)mpt;
|
|
MP_lintsrc_info(m);
|
|
mpt += sizeof(*m);
|
|
count += sizeof(*m);
|
|
@@ -398,8 +391,8 @@ static int __init smp_read_mpc(struct mp
|
|
printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n");
|
|
printk(KERN_ERR "type %x\n", *mpt);
|
|
print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16,
|
|
- 1, mpc, mpc->mpc_length, 1);
|
|
- count = mpc->mpc_length;
|
|
+ 1, mpc, mpc->length, 1);
|
|
+ count = mpc->length;
|
|
break;
|
|
}
|
|
if (x86_quirks->mpc_record)
|
|
@@ -430,16 +423,16 @@ static int __init ELCR_trigger(unsigned
|
|
|
|
static void __init construct_default_ioirq_mptable(int mpc_default_type)
|
|
{
|
|
- struct mpc_config_intsrc intsrc;
|
|
+ struct mpc_intsrc intsrc;
|
|
int i;
|
|
int ELCR_fallback = 0;
|
|
|
|
- intsrc.mpc_type = MP_INTSRC;
|
|
- intsrc.mpc_irqflag = 0; /* conforming */
|
|
- intsrc.mpc_srcbus = 0;
|
|
- intsrc.mpc_dstapic = mp_ioapics[0].mp_apicid;
|
|
+ intsrc.type = MP_INTSRC;
|
|
+ intsrc.irqflag = 0; /* conforming */
|
|
+ intsrc.srcbus = 0;
|
|
+ intsrc.dstapic = mp_ioapics[0].mp_apicid;
|
|
|
|
- intsrc.mpc_irqtype = mp_INT;
|
|
+ intsrc.irqtype = mp_INT;
|
|
|
|
/*
|
|
* If true, we have an ISA/PCI system with no IRQ entries
|
|
@@ -482,30 +475,30 @@ static void __init construct_default_ioi
|
|
* irqflag field (level sensitive, active high polarity).
|
|
*/
|
|
if (ELCR_trigger(i))
|
|
- intsrc.mpc_irqflag = 13;
|
|
+ intsrc.irqflag = 13;
|
|
else
|
|
- intsrc.mpc_irqflag = 0;
|
|
+ intsrc.irqflag = 0;
|
|
}
|
|
|
|
- intsrc.mpc_srcbusirq = i;
|
|
- intsrc.mpc_dstirq = i ? i : 2; /* IRQ0 to INTIN2 */
|
|
+ intsrc.srcbusirq = i;
|
|
+ intsrc.dstirq = i ? i : 2; /* IRQ0 to INTIN2 */
|
|
MP_intsrc_info(&intsrc);
|
|
}
|
|
|
|
- intsrc.mpc_irqtype = mp_ExtINT;
|
|
- intsrc.mpc_srcbusirq = 0;
|
|
- intsrc.mpc_dstirq = 0; /* 8259A to INTIN0 */
|
|
+ intsrc.irqtype = mp_ExtINT;
|
|
+ intsrc.srcbusirq = 0;
|
|
+ intsrc.dstirq = 0; /* 8259A to INTIN0 */
|
|
MP_intsrc_info(&intsrc);
|
|
}
|
|
|
|
|
|
static void __init construct_ioapic_table(int mpc_default_type)
|
|
{
|
|
- struct mpc_config_ioapic ioapic;
|
|
- struct mpc_config_bus bus;
|
|
+ struct mpc_ioapic ioapic;
|
|
+ struct mpc_bus bus;
|
|
|
|
- bus.mpc_type = MP_BUS;
|
|
- bus.mpc_busid = 0;
|
|
+ bus.type = MP_BUS;
|
|
+ bus.busid = 0;
|
|
switch (mpc_default_type) {
|
|
default:
|
|
printk(KERN_ERR "???\nUnknown standard configuration %d\n",
|
|
@@ -513,29 +506,29 @@ static void __init construct_ioapic_tabl
|
|
/* fall through */
|
|
case 1:
|
|
case 5:
|
|
- memcpy(bus.mpc_bustype, "ISA ", 6);
|
|
+ memcpy(bus.bustype, "ISA ", 6);
|
|
break;
|
|
case 2:
|
|
case 6:
|
|
case 3:
|
|
- memcpy(bus.mpc_bustype, "EISA ", 6);
|
|
+ memcpy(bus.bustype, "EISA ", 6);
|
|
break;
|
|
case 4:
|
|
case 7:
|
|
- memcpy(bus.mpc_bustype, "MCA ", 6);
|
|
+ memcpy(bus.bustype, "MCA ", 6);
|
|
}
|
|
MP_bus_info(&bus);
|
|
if (mpc_default_type > 4) {
|
|
- bus.mpc_busid = 1;
|
|
- memcpy(bus.mpc_bustype, "PCI ", 6);
|
|
+ bus.busid = 1;
|
|
+ memcpy(bus.bustype, "PCI ", 6);
|
|
MP_bus_info(&bus);
|
|
}
|
|
|
|
- ioapic.mpc_type = MP_IOAPIC;
|
|
- ioapic.mpc_apicid = 2;
|
|
- ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
|
|
- ioapic.mpc_flags = MPC_APIC_USABLE;
|
|
- ioapic.mpc_apicaddr = 0xFEC00000;
|
|
+ ioapic.type = MP_IOAPIC;
|
|
+ ioapic.apicid = 2;
|
|
+ ioapic.apicver = mpc_default_type > 4 ? 0x10 : 0x01;
|
|
+ ioapic.flags = MPC_APIC_USABLE;
|
|
+ ioapic.apicaddr = 0xFEC00000;
|
|
MP_ioapic_info(&ioapic);
|
|
|
|
/*
|
|
@@ -549,8 +542,8 @@ static inline void __init construct_ioap
|
|
|
|
static inline void __init construct_default_ISA_mptable(int mpc_default_type)
|
|
{
|
|
- struct mpc_config_processor processor;
|
|
- struct mpc_config_lintsrc lintsrc;
|
|
+ struct mpc_cpu processor;
|
|
+ struct mpc_lintsrc lintsrc;
|
|
int linttypes[2] = { mp_ExtINT, mp_NMI };
|
|
int i;
|
|
|
|
@@ -564,30 +557,30 @@ static inline void __init construct_defa
|
|
/*
|
|
* 2 CPUs, numbered 0 & 1.
|
|
*/
|
|
- processor.mpc_type = MP_PROCESSOR;
|
|
+ processor.type = MP_PROCESSOR;
|
|
/* Either an integrated APIC or a discrete 82489DX. */
|
|
- processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
|
|
- processor.mpc_cpuflag = CPU_ENABLED;
|
|
- processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
|
|
+ processor.apicver = mpc_default_type > 4 ? 0x10 : 0x01;
|
|
+ processor.cpuflag = CPU_ENABLED;
|
|
+ processor.cpufeature = (boot_cpu_data.x86 << 8) |
|
|
(boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
|
|
- processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
|
|
- processor.mpc_reserved[0] = 0;
|
|
- processor.mpc_reserved[1] = 0;
|
|
+ processor.featureflag = boot_cpu_data.x86_capability[0];
|
|
+ processor.reserved[0] = 0;
|
|
+ processor.reserved[1] = 0;
|
|
for (i = 0; i < 2; i++) {
|
|
- processor.mpc_apicid = i;
|
|
+ processor.apicid = i;
|
|
MP_processor_info(&processor);
|
|
}
|
|
|
|
construct_ioapic_table(mpc_default_type);
|
|
|
|
- lintsrc.mpc_type = MP_LINTSRC;
|
|
- lintsrc.mpc_irqflag = 0; /* conforming */
|
|
- lintsrc.mpc_srcbusid = 0;
|
|
- lintsrc.mpc_srcbusirq = 0;
|
|
- lintsrc.mpc_destapic = MP_APIC_ALL;
|
|
+ lintsrc.type = MP_LINTSRC;
|
|
+ lintsrc.irqflag = 0; /* conforming */
|
|
+ lintsrc.srcbusid = 0;
|
|
+ lintsrc.srcbusirq = 0;
|
|
+ lintsrc.destapic = MP_APIC_ALL;
|
|
for (i = 0; i < 2; i++) {
|
|
- lintsrc.mpc_irqtype = linttypes[i];
|
|
- lintsrc.mpc_destapiclint = i;
|
|
+ lintsrc.irqtype = linttypes[i];
|
|
+ lintsrc.destapiclint = i;
|
|
MP_lintsrc_info(&lintsrc);
|
|
}
|
|
}
|
|
@@ -606,26 +599,23 @@ void __init get_smp_config(void)
|
|
{
|
|
struct intel_mp_floating *mpf = mpf_found;
|
|
|
|
- if (x86_quirks->mach_get_smp_config) {
|
|
- if (x86_quirks->mach_get_smp_config(early))
|
|
- return;
|
|
- }
|
|
+ if (!mpf)
|
|
+ return;
|
|
+
|
|
if (acpi_lapic && early)
|
|
return;
|
|
+
|
|
/*
|
|
- * ACPI supports both logical (e.g. Hyper-Threading) and physical
|
|
- * processors, where MPS only supports physical.
|
|
+ * MPS doesn't support hyperthreading, aka only have
|
|
+ * thread 0 apic id in MPS table
|
|
*/
|
|
- if (acpi_lapic && acpi_ioapic) {
|
|
- printk(KERN_INFO "Using ACPI (MADT) for SMP configuration "
|
|
- "information\n");
|
|
+ if (acpi_lapic && acpi_ioapic)
|
|
return;
|
|
- } else if (acpi_lapic)
|
|
- printk(KERN_INFO "Using ACPI for processor (LAPIC) "
|
|
- "configuration information\n");
|
|
|
|
- if (!mpf)
|
|
- return;
|
|
+ if (x86_quirks->mach_get_smp_config) {
|
|
+ if (x86_quirks->mach_get_smp_config(early))
|
|
+ return;
|
|
+ }
|
|
|
|
printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n",
|
|
mpf->mpf_specification);
|
|
@@ -682,15 +672,15 @@ void __init get_smp_config(void)
|
|
* ISA defaults and hope it will work.
|
|
*/
|
|
if (!mp_irq_entries) {
|
|
- struct mpc_config_bus bus;
|
|
+ struct mpc_bus bus;
|
|
|
|
printk(KERN_ERR "BIOS bug, no explicit IRQ entries, "
|
|
"using default mptable. "
|
|
"(tell your hw vendor)\n");
|
|
|
|
- bus.mpc_type = MP_BUS;
|
|
- bus.mpc_busid = 0;
|
|
- memcpy(bus.mpc_bustype, "ISA ", 6);
|
|
+ bus.type = MP_BUS;
|
|
+ bus.busid = 0;
|
|
+ memcpy(bus.bustype, "ISA ", 6);
|
|
MP_bus_info(&bus);
|
|
|
|
construct_default_ioirq_mptable(0);
|
|
@@ -839,14 +829,14 @@ void __init find_smp_config(void)
|
|
#ifdef CONFIG_X86_IO_APIC
|
|
static u8 __initdata irq_used[MAX_IRQ_SOURCES];
|
|
|
|
-static int __init get_MP_intsrc_index(struct mpc_config_intsrc *m)
|
|
+static int __init get_MP_intsrc_index(struct mpc_intsrc *m)
|
|
{
|
|
int i;
|
|
|
|
- if (m->mpc_irqtype != mp_INT)
|
|
+ if (m->irqtype != mp_INT)
|
|
return 0;
|
|
|
|
- if (m->mpc_irqflag != 0x0f)
|
|
+ if (m->irqflag != 0x0f)
|
|
return 0;
|
|
|
|
/* not legacy */
|
|
@@ -858,9 +848,9 @@ static int __init get_MP_intsrc_index(s
|
|
if (mp_irqs[i].mp_irqflag != 0x0f)
|
|
continue;
|
|
|
|
- if (mp_irqs[i].mp_srcbus != m->mpc_srcbus)
|
|
+ if (mp_irqs[i].mp_srcbus != m->srcbus)
|
|
continue;
|
|
- if (mp_irqs[i].mp_srcbusirq != m->mpc_srcbusirq)
|
|
+ if (mp_irqs[i].mp_srcbusirq != m->srcbusirq)
|
|
continue;
|
|
if (irq_used[i]) {
|
|
/* already claimed */
|
|
@@ -876,10 +866,10 @@ static int __init get_MP_intsrc_index(s
|
|
|
|
#define SPARE_SLOT_NUM 20
|
|
|
|
-static struct mpc_config_intsrc __initdata *m_spare[SPARE_SLOT_NUM];
|
|
+static struct mpc_intsrc __initdata *m_spare[SPARE_SLOT_NUM];
|
|
#endif
|
|
|
|
-static int __init replace_intsrc_all(struct mp_config_table *mpc,
|
|
+static int __init replace_intsrc_all(struct mpc_table *mpc,
|
|
unsigned long mpc_new_phys,
|
|
unsigned long mpc_new_length)
|
|
{
|
|
@@ -891,36 +881,33 @@ static int __init replace_intsrc_all(st
|
|
int count = sizeof(*mpc);
|
|
unsigned char *mpt = ((unsigned char *)mpc) + count;
|
|
|
|
- printk(KERN_INFO "mpc_length %x\n", mpc->mpc_length);
|
|
- while (count < mpc->mpc_length) {
|
|
+ printk(KERN_INFO "mpc_length %x\n", mpc->length);
|
|
+ while (count < mpc->length) {
|
|
switch (*mpt) {
|
|
case MP_PROCESSOR:
|
|
{
|
|
- struct mpc_config_processor *m =
|
|
- (struct mpc_config_processor *)mpt;
|
|
+ struct mpc_cpu *m = (struct mpc_cpu *)mpt;
|
|
mpt += sizeof(*m);
|
|
count += sizeof(*m);
|
|
break;
|
|
}
|
|
case MP_BUS:
|
|
{
|
|
- struct mpc_config_bus *m =
|
|
- (struct mpc_config_bus *)mpt;
|
|
+ struct mpc_bus *m = (struct mpc_bus *)mpt;
|
|
mpt += sizeof(*m);
|
|
count += sizeof(*m);
|
|
break;
|
|
}
|
|
case MP_IOAPIC:
|
|
{
|
|
- mpt += sizeof(struct mpc_config_ioapic);
|
|
- count += sizeof(struct mpc_config_ioapic);
|
|
+ mpt += sizeof(struct mpc_ioapic);
|
|
+ count += sizeof(struct mpc_ioapic);
|
|
break;
|
|
}
|
|
case MP_INTSRC:
|
|
{
|
|
#ifdef CONFIG_X86_IO_APIC
|
|
- struct mpc_config_intsrc *m =
|
|
- (struct mpc_config_intsrc *)mpt;
|
|
+ struct mpc_intsrc *m = (struct mpc_intsrc *)mpt;
|
|
|
|
apic_printk(APIC_VERBOSE, "OLD ");
|
|
print_MP_intsrc_info(m);
|
|
@@ -941,14 +928,14 @@ static int __init replace_intsrc_all(st
|
|
nr_m_spare++;
|
|
}
|
|
#endif
|
|
- mpt += sizeof(struct mpc_config_intsrc);
|
|
- count += sizeof(struct mpc_config_intsrc);
|
|
+ mpt += sizeof(struct mpc_intsrc);
|
|
+ count += sizeof(struct mpc_intsrc);
|
|
break;
|
|
}
|
|
case MP_LINTSRC:
|
|
{
|
|
- struct mpc_config_lintsrc *m =
|
|
- (struct mpc_config_lintsrc *)mpt;
|
|
+ struct mpc_lintsrc *m =
|
|
+ (struct mpc_lintsrc *)mpt;
|
|
mpt += sizeof(*m);
|
|
count += sizeof(*m);
|
|
break;
|
|
@@ -958,7 +945,7 @@ static int __init replace_intsrc_all(st
|
|
printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n");
|
|
printk(KERN_ERR "type %x\n", *mpt);
|
|
print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16,
|
|
- 1, mpc, mpc->mpc_length, 1);
|
|
+ 1, mpc, mpc->length, 1);
|
|
goto out;
|
|
}
|
|
}
|
|
@@ -980,9 +967,8 @@ static int __init replace_intsrc_all(st
|
|
assign_to_mpc_intsrc(&mp_irqs[i], m_spare[nr_m_spare]);
|
|
m_spare[nr_m_spare] = NULL;
|
|
} else {
|
|
- struct mpc_config_intsrc *m =
|
|
- (struct mpc_config_intsrc *)mpt;
|
|
- count += sizeof(struct mpc_config_intsrc);
|
|
+ struct mpc_intsrc *m = (struct mpc_intsrc *)mpt;
|
|
+ count += sizeof(struct mpc_intsrc);
|
|
if (!mpc_new_phys) {
|
|
printk(KERN_INFO "No spare slots, try to append...take your risk, new mpc_length %x\n", count);
|
|
} else {
|
|
@@ -994,17 +980,16 @@ static int __init replace_intsrc_all(st
|
|
}
|
|
}
|
|
assign_to_mpc_intsrc(&mp_irqs[i], m);
|
|
- mpc->mpc_length = count;
|
|
- mpt += sizeof(struct mpc_config_intsrc);
|
|
+ mpc->length = count;
|
|
+ mpt += sizeof(struct mpc_intsrc);
|
|
}
|
|
print_mp_irq_info(&mp_irqs[i]);
|
|
}
|
|
#endif
|
|
out:
|
|
/* update checksum */
|
|
- mpc->mpc_checksum = 0;
|
|
- mpc->mpc_checksum -= mpf_checksum((unsigned char *)mpc,
|
|
- mpc->mpc_length);
|
|
+ mpc->checksum = 0;
|
|
+ mpc->checksum -= mpf_checksum((unsigned char *)mpc, mpc->length);
|
|
|
|
return 0;
|
|
}
|
|
@@ -1050,8 +1035,7 @@ static int __init update_mp_table(void)
|
|
char str[16];
|
|
char oem[10];
|
|
struct intel_mp_floating *mpf;
|
|
- struct mp_config_table *mpc;
|
|
- struct mp_config_table *mpc_new;
|
|
+ struct mpc_table *mpc, *mpc_new;
|
|
|
|
if (!enable_update_mptable)
|
|
return 0;
|
|
@@ -1077,7 +1061,7 @@ static int __init update_mp_table(void)
|
|
printk(KERN_INFO "mpf: %lx\n", (long)arbitrary_virt_to_machine(mpf));
|
|
printk(KERN_INFO "mpf_physptr: %x\n", mpf->mpf_physptr);
|
|
|
|
- if (mpc_new_phys && mpc->mpc_length > mpc_new_length) {
|
|
+ if (mpc_new_phys && mpc->length > mpc_new_length) {
|
|
mpc_new_phys = 0;
|
|
printk(KERN_INFO "mpc_new_length is %ld, please use alloc_mptable=8k\n",
|
|
mpc_new_length);
|
|
@@ -1086,10 +1070,10 @@ static int __init update_mp_table(void)
|
|
if (!mpc_new_phys) {
|
|
unsigned char old, new;
|
|
/* check if we can change the postion */
|
|
- mpc->mpc_checksum = 0;
|
|
- old = mpf_checksum((unsigned char *)mpc, mpc->mpc_length);
|
|
- mpc->mpc_checksum = 0xff;
|
|
- new = mpf_checksum((unsigned char *)mpc, mpc->mpc_length);
|
|
+ mpc->checksum = 0;
|
|
+ old = mpf_checksum((unsigned char *)mpc, mpc->length);
|
|
+ mpc->checksum = 0xff;
|
|
+ new = mpf_checksum((unsigned char *)mpc, mpc->length);
|
|
if (old == new) {
|
|
printk(KERN_INFO "mpc is readonly, please try alloc_mptable instead\n");
|
|
return 0;
|
|
@@ -1101,7 +1085,7 @@ static int __init update_mp_table(void)
|
|
mpc_new_bus = phys_to_machine(mpc_new_phys);
|
|
mpf->mpf_physptr = mpc_new_bus;
|
|
mpc_new = phys_to_virt(mpc_new_phys);
|
|
- memcpy(mpc_new, mpc, mpc->mpc_length);
|
|
+ memcpy(mpc_new, mpc, mpc->length);
|
|
mpc = mpc_new;
|
|
/* check if we can modify that */
|
|
if (mpc_new_bus - mpf->mpf_physptr) {
|
|
--- head-2011-03-17.orig/arch/x86/kernel/pci-dma-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/pci-dma-xen.c 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -6,6 +6,7 @@
|
|
#include <asm/proto.h>
|
|
#include <asm/dma.h>
|
|
#include <asm/iommu.h>
|
|
+#include <asm/gart.h>
|
|
#include <asm/calgary.h>
|
|
#include <asm/amd_iommu.h>
|
|
|
|
@@ -30,11 +31,6 @@ int no_iommu __read_mostly;
|
|
/* Set this to 1 if there is a HW IOMMU in the system */
|
|
int iommu_detected __read_mostly = 0;
|
|
|
|
-/* This tells the BIO block layer to assume merging. Default to off
|
|
- because we cannot guarantee merging later. */
|
|
-int iommu_bio_merge __read_mostly = 0;
|
|
-EXPORT_SYMBOL(iommu_bio_merge);
|
|
-
|
|
dma_addr_t bad_dma_address __read_mostly = 0;
|
|
EXPORT_SYMBOL(bad_dma_address);
|
|
|
|
@@ -42,7 +38,7 @@ EXPORT_SYMBOL(bad_dma_address);
|
|
be probably a smaller DMA mask, but this is bug-to-bug compatible
|
|
to older i386. */
|
|
struct device x86_dma_fallback_dev = {
|
|
- .bus_id = "fallback device",
|
|
+ .init_name = "fallback device",
|
|
.coherent_dma_mask = DMA_32BIT_MASK,
|
|
.dma_mask = &x86_dma_fallback_dev.coherent_dma_mask,
|
|
};
|
|
@@ -105,8 +101,6 @@ static void __init dma32_free_bootmem(vo
|
|
dma32_bootmem_ptr = NULL;
|
|
dma32_bootmem_size = 0;
|
|
}
|
|
-#else
|
|
-#define dma32_free_bootmem() ((void)0)
|
|
#endif
|
|
|
|
static struct dma_mapping_ops swiotlb_dma_ops = {
|
|
@@ -128,8 +122,11 @@ static struct dma_mapping_ops swiotlb_dm
|
|
|
|
void __init pci_iommu_alloc(void)
|
|
{
|
|
+#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
|
|
/* free the range so iommu could get some range less than 4G */
|
|
dma32_free_bootmem();
|
|
+#endif
|
|
+
|
|
/*
|
|
* The order of these functions is important for
|
|
* fall-back/fail-over reasons
|
|
@@ -149,16 +146,6 @@ void __init pci_iommu_alloc(void)
|
|
}
|
|
}
|
|
|
|
-#ifndef CONFIG_XEN
|
|
-unsigned long iommu_nr_pages(unsigned long addr, unsigned long len)
|
|
-{
|
|
- unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE);
|
|
-
|
|
- return size >> PAGE_SHIFT;
|
|
-}
|
|
-EXPORT_SYMBOL(iommu_nr_pages);
|
|
-#endif
|
|
-
|
|
void *dma_generic_alloc_coherent(struct device *dev, size_t size,
|
|
dma_addr_t *dma_addr, gfp_t flag)
|
|
{
|
|
@@ -246,7 +233,6 @@ static __init int iommu_setup(char *p)
|
|
}
|
|
|
|
if (!strncmp(p, "biomerge", 8)) {
|
|
- iommu_bio_merge = 4096;
|
|
iommu_merge = 1;
|
|
force_iommu = 1;
|
|
}
|
|
@@ -385,8 +371,8 @@ fs_initcall(pci_iommu_init);
|
|
static __devinit void via_no_dac(struct pci_dev *dev)
|
|
{
|
|
if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) {
|
|
- printk(KERN_INFO "PCI: VIA PCI bridge detected."
|
|
- "Disabling DAC.\n");
|
|
+ printk(KERN_INFO
|
|
+ "PCI: VIA PCI bridge detected. Disabling DAC.\n");
|
|
forbid_dac = 1;
|
|
}
|
|
}
|
|
--- head-2011-03-17.orig/arch/x86/kernel/process-xen.c 2011-03-03 16:00:33.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/process-xen.c 2011-03-03 16:05:57.000000000 +0100
|
|
@@ -1,13 +1,17 @@
|
|
#include <linux/errno.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/mm.h>
|
|
+#include <asm/idle.h>
|
|
#include <linux/smp.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/module.h>
|
|
#include <linux/pm.h>
|
|
#include <linux/clockchips.h>
|
|
+#include <linux/ftrace.h>
|
|
#include <asm/system.h>
|
|
+#include <asm/apic.h>
|
|
+#include <xen/evtchn.h>
|
|
|
|
unsigned long idle_halt;
|
|
EXPORT_SYMBOL(idle_halt);
|
|
@@ -70,6 +74,9 @@ EXPORT_SYMBOL(pm_idle);
|
|
*/
|
|
void xen_idle(void)
|
|
{
|
|
+ struct power_trace it;
|
|
+
|
|
+ trace_power_start(&it, POWER_CSTATE, 1);
|
|
current_thread_info()->status &= ~TS_POLLING;
|
|
/*
|
|
* TS_POLLING-cleared state must be visible before we
|
|
@@ -82,11 +89,27 @@ void xen_idle(void)
|
|
else
|
|
local_irq_enable();
|
|
current_thread_info()->status |= TS_POLLING;
|
|
+ trace_power_end(&it);
|
|
}
|
|
#ifdef CONFIG_APM_MODULE
|
|
EXPORT_SYMBOL(default_idle);
|
|
#endif
|
|
|
|
+void stop_this_cpu(void *dummy)
|
|
+{
|
|
+ local_irq_disable();
|
|
+ /*
|
|
+ * Remove this CPU:
|
|
+ */
|
|
+ cpu_clear(smp_processor_id(), cpu_online_map);
|
|
+ disable_all_local_evtchn();
|
|
+
|
|
+ for (;;) {
|
|
+ if (hlt_works(smp_processor_id()))
|
|
+ halt();
|
|
+ }
|
|
+}
|
|
+
|
|
static void do_nothing(void *unused)
|
|
{
|
|
}
|
|
@@ -120,24 +143,37 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
|
|
*/
|
|
void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
|
|
{
|
|
+ struct power_trace it;
|
|
+
|
|
+ trace_power_start(&it, POWER_CSTATE, (ax>>4)+1);
|
|
if (!need_resched()) {
|
|
+ if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
|
|
+ clflush((void *)¤t_thread_info()->flags);
|
|
+
|
|
__monitor((void *)¤t_thread_info()->flags, 0, 0);
|
|
smp_mb();
|
|
if (!need_resched())
|
|
__mwait(ax, cx);
|
|
}
|
|
+ trace_power_end(&it);
|
|
}
|
|
|
|
/* Default MONITOR/MWAIT with no hints, used for default C1 state */
|
|
static void mwait_idle(void)
|
|
{
|
|
+ struct power_trace it;
|
|
if (!need_resched()) {
|
|
+ trace_power_start(&it, POWER_CSTATE, 1);
|
|
+ if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
|
|
+ clflush((void *)¤t_thread_info()->flags);
|
|
+
|
|
__monitor((void *)¤t_thread_info()->flags, 0, 0);
|
|
smp_mb();
|
|
if (!need_resched())
|
|
__sti_mwait(0, 0);
|
|
else
|
|
local_irq_enable();
|
|
+ trace_power_end(&it);
|
|
} else
|
|
local_irq_enable();
|
|
}
|
|
@@ -150,9 +186,13 @@ static void mwait_idle(void)
|
|
*/
|
|
static void poll_idle(void)
|
|
{
|
|
+ struct power_trace it;
|
|
+
|
|
+ trace_power_start(&it, POWER_CSTATE, 0);
|
|
local_irq_enable();
|
|
while (!need_resched())
|
|
cpu_relax();
|
|
+ trace_power_end(&it);
|
|
}
|
|
|
|
#ifndef CONFIG_XEN
|
|
@@ -238,7 +278,7 @@ static void c1e_idle(void)
|
|
rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi);
|
|
if (lo & K8_INTP_C1E_ACTIVE_MASK) {
|
|
c1e_detected = 1;
|
|
- if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
|
|
+ if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
|
|
mark_tsc_unstable("TSC halt in AMD C1E");
|
|
printk(KERN_INFO "System has AMD C1E enabled\n");
|
|
set_cpu_cap(&boot_cpu_data, X86_FEATURE_AMDC1E);
|
|
--- head-2011-03-17.orig/arch/x86/kernel/process_32-xen.c 2011-02-02 08:34:28.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/process_32-xen.c 2011-02-02 08:36:38.000000000 +0100
|
|
@@ -38,11 +38,13 @@
|
|
#include <linux/percpu.h>
|
|
#include <linux/prctl.h>
|
|
#include <linux/dmi.h>
|
|
+#include <linux/ftrace.h>
|
|
+#include <linux/uaccess.h>
|
|
+#include <linux/io.h>
|
|
+#include <linux/kdebug.h>
|
|
|
|
-#include <asm/uaccess.h>
|
|
#include <asm/pgtable.h>
|
|
#include <asm/system.h>
|
|
-#include <asm/io.h>
|
|
#include <asm/ldt.h>
|
|
#include <asm/processor.h>
|
|
#include <asm/i387.h>
|
|
@@ -57,10 +59,9 @@
|
|
|
|
#include <asm/tlbflush.h>
|
|
#include <asm/cpu.h>
|
|
-#include <asm/kdebug.h>
|
|
#include <asm/idle.h>
|
|
#include <asm/syscalls.h>
|
|
-#include <asm/smp.h>
|
|
+#include <asm/ds.h>
|
|
|
|
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
|
|
asmlinkage void cstar_ret_from_fork(void) __asm__("cstar_ret_from_fork");
|
|
@@ -106,9 +107,6 @@ void cpu_idle(void)
|
|
check_pgt_cache();
|
|
rmb();
|
|
|
|
- if (rcu_pending(cpu))
|
|
- rcu_check_callbacks(cpu, 0);
|
|
-
|
|
if (cpu_is_offline(cpu))
|
|
play_dead();
|
|
|
|
@@ -206,7 +204,7 @@ extern void kernel_thread_helper(void);
|
|
/*
|
|
* Create a kernel thread
|
|
*/
|
|
-int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
|
|
+int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
|
|
{
|
|
struct pt_regs regs;
|
|
|
|
@@ -245,14 +243,8 @@ void exit_thread(void)
|
|
t->io_bitmap_ptr = NULL;
|
|
clear_thread_flag(TIF_IO_BITMAP);
|
|
}
|
|
-#ifdef CONFIG_X86_DS
|
|
- /* Free any DS contexts that have not been properly released. */
|
|
- if (unlikely(current->thread.ds_ctx)) {
|
|
- /* we clear debugctl to make sure DS is not used. */
|
|
- update_debugctlmsr(0);
|
|
- ds_free(current->thread.ds_ctx);
|
|
- }
|
|
-#endif /* CONFIG_X86_DS */
|
|
+
|
|
+ ds_exit_thread(current);
|
|
}
|
|
|
|
void flush_thread(void)
|
|
@@ -265,7 +257,7 @@ void flush_thread(void)
|
|
tsk->thread.debugreg3 = 0;
|
|
tsk->thread.debugreg6 = 0;
|
|
tsk->thread.debugreg7 = 0;
|
|
- memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
|
|
+ memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
|
|
clear_tsk_thread_flag(tsk, TIF_DEBUG);
|
|
/*
|
|
* Forget coprocessor state..
|
|
@@ -292,9 +284,9 @@ void prepare_to_copy(struct task_struct
|
|
|
|
int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
|
|
unsigned long unused,
|
|
- struct task_struct * p, struct pt_regs * regs)
|
|
+ struct task_struct *p, struct pt_regs *regs)
|
|
{
|
|
- struct pt_regs * childregs;
|
|
+ struct pt_regs *childregs;
|
|
struct task_struct *tsk;
|
|
int err;
|
|
|
|
@@ -338,13 +330,19 @@ int copy_thread(int nr, unsigned long cl
|
|
kfree(p->thread.io_bitmap_ptr);
|
|
p->thread.io_bitmap_max = 0;
|
|
}
|
|
+
|
|
+ ds_copy_thread(p, current);
|
|
+
|
|
+ clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
|
|
+ p->thread.debugctlmsr = 0;
|
|
+
|
|
return err;
|
|
}
|
|
|
|
void
|
|
start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
|
|
{
|
|
- __asm__("movl %0, %%gs" :: "r"(0));
|
|
+ __asm__("movl %0, %%gs" : : "r"(0));
|
|
regs->fs = 0;
|
|
set_fs(USER_DS);
|
|
regs->ds = __USER_DS;
|
|
@@ -418,47 +416,18 @@ int set_tsc_mode(unsigned int val)
|
|
return 0;
|
|
}
|
|
|
|
-#ifdef CONFIG_X86_DS
|
|
-static int update_debugctl(struct thread_struct *prev,
|
|
- struct thread_struct *next, unsigned long debugctl)
|
|
-{
|
|
- unsigned long ds_prev = 0;
|
|
- unsigned long ds_next = 0;
|
|
-
|
|
- if (prev->ds_ctx)
|
|
- ds_prev = (unsigned long)prev->ds_ctx->ds;
|
|
- if (next->ds_ctx)
|
|
- ds_next = (unsigned long)next->ds_ctx->ds;
|
|
-
|
|
- if (ds_next != ds_prev) {
|
|
- /* we clear debugctl to make sure DS
|
|
- * is not in use when we change it */
|
|
- debugctl = 0;
|
|
- update_debugctlmsr(0);
|
|
- wrmsr(MSR_IA32_DS_AREA, ds_next, 0);
|
|
- }
|
|
- return debugctl;
|
|
-}
|
|
-#else
|
|
-static int update_debugctl(struct thread_struct *prev,
|
|
- struct thread_struct *next, unsigned long debugctl)
|
|
-{
|
|
- return debugctl;
|
|
-}
|
|
-#endif /* CONFIG_X86_DS */
|
|
-
|
|
static noinline void
|
|
__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p)
|
|
{
|
|
struct thread_struct *prev, *next;
|
|
- unsigned long debugctl;
|
|
|
|
prev = &prev_p->thread;
|
|
next = &next_p->thread;
|
|
|
|
- debugctl = update_debugctl(prev, next, prev->debugctlmsr);
|
|
-
|
|
- if (next->debugctlmsr != debugctl)
|
|
+ if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) ||
|
|
+ test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR))
|
|
+ ds_switch_to(prev_p, next_p);
|
|
+ else if (next->debugctlmsr != prev->debugctlmsr)
|
|
update_debugctlmsr(next->debugctlmsr);
|
|
|
|
if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
|
|
@@ -479,14 +448,6 @@ __switch_to_xtra(struct task_struct *pre
|
|
else
|
|
hard_enable_TSC();
|
|
}
|
|
-
|
|
-#ifdef CONFIG_X86_PTRACE_BTS
|
|
- if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
|
|
- ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
|
|
-
|
|
- if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
|
|
- ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
|
|
-#endif /* CONFIG_X86_PTRACE_BTS */
|
|
}
|
|
|
|
/*
|
|
@@ -516,7 +477,8 @@ __switch_to_xtra(struct task_struct *pre
|
|
* the task-switch, and shows up in ret_from_fork in entry.S,
|
|
* for example.
|
|
*/
|
|
-struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
|
+__notrace_funcgraph struct task_struct *
|
|
+__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
|
{
|
|
struct thread_struct *prev = &prev_p->thread,
|
|
*next = &next_p->thread;
|
|
@@ -696,7 +658,7 @@ asmlinkage int sys_vfork(struct pt_regs
|
|
asmlinkage int sys_execve(struct pt_regs regs)
|
|
{
|
|
int error;
|
|
- char * filename;
|
|
+ char *filename;
|
|
|
|
filename = getname((char __user *) regs.bx);
|
|
error = PTR_ERR(filename);
|
|
--- head-2011-03-17.orig/arch/x86/kernel/process_64-xen.c 2011-02-02 08:34:22.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/process_64-xen.c 2011-02-02 08:36:43.000000000 +0100
|
|
@@ -42,6 +42,8 @@
|
|
#include <linux/prctl.h>
|
|
#include <linux/uaccess.h>
|
|
#include <linux/io.h>
|
|
+#include <linux/ftrace.h>
|
|
+#include <linux/dmi.h>
|
|
|
|
#include <asm/pgtable.h>
|
|
#include <asm/system.h>
|
|
@@ -57,6 +59,7 @@
|
|
#include <asm/ia32.h>
|
|
#include <asm/idle.h>
|
|
#include <asm/syscalls.h>
|
|
+#include <asm/ds.h>
|
|
|
|
asmlinkage extern void ret_from_fork(void);
|
|
|
|
@@ -154,14 +157,18 @@ void __show_regs(struct pt_regs *regs, i
|
|
unsigned long d0, d1, d2, d3, d6, d7;
|
|
unsigned int fsindex, gsindex;
|
|
unsigned int ds, cs, es;
|
|
+ const char *board;
|
|
|
|
printk("\n");
|
|
print_modules();
|
|
- printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s\n",
|
|
+ board = dmi_get_system_info(DMI_PRODUCT_NAME);
|
|
+ if (!board)
|
|
+ board = "";
|
|
+ printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s\n",
|
|
current->pid, current->comm, print_tainted(),
|
|
init_utsname()->release,
|
|
(int)strcspn(init_utsname()->version, " "),
|
|
- init_utsname()->version);
|
|
+ init_utsname()->version, board);
|
|
printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
|
|
printk_address(regs->ip, 1);
|
|
printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss,
|
|
@@ -252,14 +259,8 @@ void exit_thread(void)
|
|
#endif
|
|
t->io_bitmap_max = 0;
|
|
}
|
|
-#ifdef CONFIG_X86_DS
|
|
- /* Free any DS contexts that have not been properly released. */
|
|
- if (unlikely(t->ds_ctx)) {
|
|
- /* we clear debugctl to make sure DS is not used. */
|
|
- update_debugctlmsr(0);
|
|
- ds_free(t->ds_ctx);
|
|
- }
|
|
-#endif /* CONFIG_X86_DS */
|
|
+
|
|
+ ds_exit_thread(current);
|
|
}
|
|
|
|
void xen_load_gs_index(unsigned gs)
|
|
@@ -395,6 +396,11 @@ int copy_thread(int nr, unsigned long cl
|
|
}
|
|
p->thread.iopl = current->thread.iopl;
|
|
|
|
+ ds_copy_thread(p, me);
|
|
+
|
|
+ clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
|
|
+ p->thread.debugctlmsr = 0;
|
|
+
|
|
err = 0;
|
|
out:
|
|
if (err && p->thread.io_bitmap_ptr) {
|
|
@@ -491,35 +497,14 @@ static inline void __switch_to_xtra(stru
|
|
struct task_struct *next_p)
|
|
{
|
|
struct thread_struct *prev, *next;
|
|
- unsigned long debugctl;
|
|
|
|
prev = &prev_p->thread,
|
|
next = &next_p->thread;
|
|
|
|
- debugctl = prev->debugctlmsr;
|
|
-
|
|
-#ifdef CONFIG_X86_DS
|
|
- {
|
|
- unsigned long ds_prev = 0, ds_next = 0;
|
|
-
|
|
- if (prev->ds_ctx)
|
|
- ds_prev = (unsigned long)prev->ds_ctx->ds;
|
|
- if (next->ds_ctx)
|
|
- ds_next = (unsigned long)next->ds_ctx->ds;
|
|
-
|
|
- if (ds_next != ds_prev) {
|
|
- /*
|
|
- * We clear debugctl to make sure DS
|
|
- * is not in use when we change it:
|
|
- */
|
|
- debugctl = 0;
|
|
- update_debugctlmsr(0);
|
|
- wrmsrl(MSR_IA32_DS_AREA, ds_next);
|
|
- }
|
|
- }
|
|
-#endif /* CONFIG_X86_DS */
|
|
-
|
|
- if (next->debugctlmsr != debugctl)
|
|
+ if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) ||
|
|
+ test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR))
|
|
+ ds_switch_to(prev_p, next_p);
|
|
+ else if (next->debugctlmsr != prev->debugctlmsr)
|
|
update_debugctlmsr(next->debugctlmsr);
|
|
|
|
if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
|
|
@@ -540,14 +525,6 @@ static inline void __switch_to_xtra(stru
|
|
else
|
|
hard_enable_TSC();
|
|
}
|
|
-
|
|
-#ifdef CONFIG_X86_PTRACE_BTS
|
|
- if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
|
|
- ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
|
|
-
|
|
- if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
|
|
- ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
|
|
-#endif /* CONFIG_X86_PTRACE_BTS */
|
|
}
|
|
|
|
/*
|
|
@@ -558,8 +535,9 @@ static inline void __switch_to_xtra(stru
|
|
* - could test fs/gs bitsliced
|
|
*
|
|
* Kprobes not supported here. Set the probe on schedule instead.
|
|
+ * Function graph tracer not supported too.
|
|
*/
|
|
-struct task_struct *
|
|
+__notrace_funcgraph struct task_struct *
|
|
__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
|
{
|
|
struct thread_struct *prev = &prev_p->thread;
|
|
--- head-2011-03-17.orig/arch/x86/kernel/setup-xen.c 2011-03-03 16:22:12.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/setup-xen.c 2011-03-03 16:22:27.000000000 +0100
|
|
@@ -93,11 +93,13 @@
|
|
#include <asm/desc.h>
|
|
#include <asm/dma.h>
|
|
#include <asm/iommu.h>
|
|
+#include <asm/gart.h>
|
|
#include <asm/mmu_context.h>
|
|
#include <asm/proto.h>
|
|
|
|
#include <mach_apic.h>
|
|
#include <asm/paravirt.h>
|
|
+#include <asm/hypervisor.h>
|
|
|
|
#include <asm/percpu.h>
|
|
#include <asm/topology.h>
|
|
@@ -508,6 +510,7 @@ static void __init reserve_early_setup_d
|
|
* @size: Size of the crashkernel memory to reserve.
|
|
* Returns the base address on success, and -1ULL on failure.
|
|
*/
|
|
+static
|
|
unsigned long long __init find_and_reserve_crashkernel(unsigned long long size)
|
|
{
|
|
const unsigned long long alignment = 16<<20; /* 16M */
|
|
@@ -650,165 +653,32 @@ static int __init setup_elfcorehdr(char
|
|
early_param("elfcorehdr", setup_elfcorehdr);
|
|
#endif
|
|
|
|
-static struct x86_quirks default_x86_quirks __initdata;
|
|
-
|
|
-struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
|
|
-
|
|
-/*
|
|
- * Some BIOSes seem to corrupt the low 64k of memory during events
|
|
- * like suspend/resume and unplugging an HDMI cable. Reserve all
|
|
- * remaining free memory in that area and fill it with a distinct
|
|
- * pattern.
|
|
- */
|
|
-#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
|
|
-#define MAX_SCAN_AREAS 8
|
|
-
|
|
-static int __read_mostly memory_corruption_check = -1;
|
|
-
|
|
-static unsigned __read_mostly corruption_check_size = 64*1024;
|
|
-static unsigned __read_mostly corruption_check_period = 60; /* seconds */
|
|
-
|
|
-static struct e820entry scan_areas[MAX_SCAN_AREAS];
|
|
-static int num_scan_areas;
|
|
-
|
|
-
|
|
-static int set_corruption_check(char *arg)
|
|
-{
|
|
- char *end;
|
|
-
|
|
- memory_corruption_check = simple_strtol(arg, &end, 10);
|
|
-
|
|
- return (*end == 0) ? 0 : -EINVAL;
|
|
-}
|
|
-early_param("memory_corruption_check", set_corruption_check);
|
|
-
|
|
-static int set_corruption_check_period(char *arg)
|
|
-{
|
|
- char *end;
|
|
-
|
|
- corruption_check_period = simple_strtoul(arg, &end, 10);
|
|
-
|
|
- return (*end == 0) ? 0 : -EINVAL;
|
|
-}
|
|
-early_param("memory_corruption_check_period", set_corruption_check_period);
|
|
-
|
|
-static int set_corruption_check_size(char *arg)
|
|
+#ifndef CONFIG_XEN
|
|
+static int __init default_update_genapic(void)
|
|
{
|
|
- char *end;
|
|
- unsigned size;
|
|
-
|
|
- size = memparse(arg, &end);
|
|
-
|
|
- if (*end == '\0')
|
|
- corruption_check_size = size;
|
|
+#ifdef CONFIG_X86_SMP
|
|
+# if defined(CONFIG_X86_GENERICARCH) || defined(CONFIG_X86_64)
|
|
+ genapic->wakeup_cpu = wakeup_secondary_cpu_via_init;
|
|
+# endif
|
|
+#endif
|
|
|
|
- return (size == corruption_check_size) ? 0 : -EINVAL;
|
|
+ return 0;
|
|
}
|
|
-early_param("memory_corruption_check_size", set_corruption_check_size);
|
|
-
|
|
-
|
|
-static void __init setup_bios_corruption_check(void)
|
|
-{
|
|
- u64 addr = PAGE_SIZE; /* assume first page is reserved anyway */
|
|
-
|
|
- if (memory_corruption_check == -1) {
|
|
- memory_corruption_check =
|
|
-#ifdef CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK
|
|
- 1
|
|
#else
|
|
- 0
|
|
+#define default_update_genapic NULL
|
|
#endif
|
|
- ;
|
|
- }
|
|
-
|
|
- if (corruption_check_size == 0)
|
|
- memory_corruption_check = 0;
|
|
-
|
|
- if (!memory_corruption_check)
|
|
- return;
|
|
-
|
|
- corruption_check_size = round_up(corruption_check_size, PAGE_SIZE);
|
|
-
|
|
- while(addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) {
|
|
- u64 size;
|
|
- addr = find_e820_area_size(addr, &size, PAGE_SIZE);
|
|
-
|
|
- if (addr == 0)
|
|
- break;
|
|
-
|
|
- if ((addr + size) > corruption_check_size)
|
|
- size = corruption_check_size - addr;
|
|
|
|
- if (size == 0)
|
|
- break;
|
|
-
|
|
- e820_update_range(addr, size, E820_RAM, E820_RESERVED);
|
|
- scan_areas[num_scan_areas].addr = addr;
|
|
- scan_areas[num_scan_areas].size = size;
|
|
- num_scan_areas++;
|
|
-
|
|
- /* Assume we've already mapped this early memory */
|
|
- memset(__va(addr), 0, size);
|
|
-
|
|
- addr += size;
|
|
- }
|
|
-
|
|
- printk(KERN_INFO "Scanning %d areas for low memory corruption\n",
|
|
- num_scan_areas);
|
|
- update_e820();
|
|
-}
|
|
-
|
|
-static struct timer_list periodic_check_timer;
|
|
-
|
|
-void check_for_bios_corruption(void)
|
|
-{
|
|
- int i;
|
|
- int corruption = 0;
|
|
-
|
|
- if (!memory_corruption_check)
|
|
- return;
|
|
-
|
|
- for(i = 0; i < num_scan_areas; i++) {
|
|
- unsigned long *addr = __va(scan_areas[i].addr);
|
|
- unsigned long size = scan_areas[i].size;
|
|
-
|
|
- for(; size; addr++, size -= sizeof(unsigned long)) {
|
|
- if (!*addr)
|
|
- continue;
|
|
- printk(KERN_ERR "Corrupted low memory at %p (%lx phys) = %08lx\n",
|
|
- addr, __pa(addr), *addr);
|
|
- corruption = 1;
|
|
- *addr = 0;
|
|
- }
|
|
- }
|
|
-
|
|
- WARN(corruption, KERN_ERR "Memory corruption detected in low memory\n");
|
|
-}
|
|
-
|
|
-static void periodic_check_for_corruption(unsigned long data)
|
|
-{
|
|
- check_for_bios_corruption();
|
|
- mod_timer(&periodic_check_timer, round_jiffies(jiffies + corruption_check_period*HZ));
|
|
-}
|
|
-
|
|
-void start_periodic_check_for_corruption(void)
|
|
-{
|
|
- if (!memory_corruption_check || corruption_check_period == 0)
|
|
- return;
|
|
-
|
|
- printk(KERN_INFO "Scanning for low memory corruption every %d seconds\n",
|
|
- corruption_check_period);
|
|
+static struct x86_quirks default_x86_quirks __initdata = {
|
|
+ .update_genapic = default_update_genapic,
|
|
+};
|
|
|
|
- init_timer(&periodic_check_timer);
|
|
- periodic_check_timer.function = &periodic_check_for_corruption;
|
|
- periodic_check_for_corruption(0);
|
|
-}
|
|
-#endif
|
|
+struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
|
|
|
|
+#ifdef CONFIG_X86_RESERVE_LOW_64K
|
|
static int __init dmi_low_memory_corruption(const struct dmi_system_id *d)
|
|
{
|
|
printk(KERN_NOTICE
|
|
- "%s detected: BIOS may corrupt low RAM, working it around.\n",
|
|
+ "%s detected: BIOS may corrupt low RAM, working around it.\n",
|
|
d->ident);
|
|
|
|
e820_update_range(0, 0x10000, E820_RAM, E820_RESERVED);
|
|
@@ -816,6 +686,7 @@ static int __init dmi_low_memory_corrupt
|
|
|
|
return 0;
|
|
}
|
|
+#endif
|
|
|
|
/* List of systems that have known low memory corruption BIOS problems */
|
|
static struct dmi_system_id __initdata bad_bios_dmi_table[] = {
|
|
@@ -1025,15 +896,25 @@ void __init setup_arch(char **cmdline_p)
|
|
|
|
finish_e820_parsing();
|
|
|
|
+ if (efi_enabled)
|
|
+ efi_init();
|
|
+
|
|
if (is_initial_xendomain()) {
|
|
dmi_scan_machine();
|
|
|
|
dmi_check_system(bad_bios_dmi_table);
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * VMware detection requires dmi to be available, so this
|
|
+ * needs to be done after dmi_scan_machine, for the BP.
|
|
+ */
|
|
+ init_hypervisor(&boot_cpu_data);
|
|
|
|
#ifdef CONFIG_X86_32
|
|
+ if (is_initial_xendomain())
|
|
probe_roms();
|
|
#endif
|
|
- }
|
|
|
|
#ifndef CONFIG_XEN
|
|
/* after parse_early_param, so could debug it */
|
|
@@ -1041,8 +922,6 @@ void __init setup_arch(char **cmdline_p)
|
|
insert_resource(&iomem_resource, &data_resource);
|
|
insert_resource(&iomem_resource, &bss_resource);
|
|
|
|
- if (efi_enabled)
|
|
- efi_init();
|
|
|
|
#ifdef CONFIG_X86_32
|
|
if (ppro_with_ram_bug()) {
|
|
@@ -1297,7 +1176,7 @@ void __init setup_arch(char **cmdline_p)
|
|
ioapic_init_mappings();
|
|
|
|
/* need to wait for io_apic is mapped */
|
|
- nr_irqs = probe_nr_irqs();
|
|
+ probe_nr_irqs_gsi();
|
|
|
|
kvm_guest_init();
|
|
|
|
--- head-2011-03-17.orig/arch/x86/kernel/smp-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/smp-xen.c 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -1,7 +1,7 @@
|
|
/*
|
|
* Intel SMP support routines.
|
|
*
|
|
- * (c) 1995 Alan Cox, Building #3 <alan@redhat.com>
|
|
+ * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk>
|
|
* (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
|
|
* (c) 2002,2003 Andi Kleen, SuSE Labs.
|
|
*
|
|
@@ -118,30 +118,17 @@ void xen_smp_send_reschedule(int cpu)
|
|
WARN_ON(1);
|
|
return;
|
|
}
|
|
- send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
|
|
+ send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR);
|
|
}
|
|
|
|
void xen_send_call_func_single_ipi(int cpu)
|
|
{
|
|
- send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNC_SINGLE_VECTOR);
|
|
+ send_IPI_mask(cpumask_of(cpu), CALL_FUNC_SINGLE_VECTOR);
|
|
}
|
|
|
|
-void xen_send_call_func_ipi(cpumask_t mask)
|
|
+void xen_send_call_func_ipi(const struct cpumask *mask)
|
|
{
|
|
- send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
|
|
-}
|
|
-
|
|
-static void stop_this_cpu(void *dummy)
|
|
-{
|
|
- local_irq_disable();
|
|
- /*
|
|
- * Remove this CPU:
|
|
- */
|
|
- cpu_clear(smp_processor_id(), cpu_online_map);
|
|
- disable_all_local_evtchn();
|
|
- if (hlt_works(smp_processor_id()))
|
|
- for (;;) halt();
|
|
- for (;;);
|
|
+ send_IPI_mask_allbutself(mask, CALL_FUNCTION_VECTOR);
|
|
}
|
|
|
|
/*
|
|
@@ -165,22 +152,14 @@ void xen_smp_send_stop(void)
|
|
*/
|
|
irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id)
|
|
{
|
|
-#ifdef CONFIG_X86_32
|
|
- __get_cpu_var(irq_stat).irq_resched_count++;
|
|
-#else
|
|
- add_pda(irq_resched_count, 1);
|
|
-#endif
|
|
+ inc_irq_stat(irq_resched_count);
|
|
return IRQ_HANDLED;
|
|
}
|
|
|
|
irqreturn_t smp_call_function_interrupt(int irq, void *dev_id)
|
|
{
|
|
generic_smp_call_function_interrupt();
|
|
-#ifdef CONFIG_X86_32
|
|
- __get_cpu_var(irq_stat).irq_call_count++;
|
|
-#else
|
|
- add_pda(irq_call_count, 1);
|
|
-#endif
|
|
+ inc_irq_stat(irq_call_count);
|
|
|
|
return IRQ_HANDLED;
|
|
}
|
|
@@ -188,11 +167,7 @@ irqreturn_t smp_call_function_interrupt(
|
|
irqreturn_t smp_call_function_single_interrupt(int irq, void *dev_id)
|
|
{
|
|
generic_smp_call_function_single_interrupt();
|
|
-#ifdef CONFIG_X86_32
|
|
- __get_cpu_var(irq_stat).irq_call_count++;
|
|
-#else
|
|
- add_pda(irq_call_count, 1);
|
|
-#endif
|
|
+ inc_irq_stat(irq_call_count);
|
|
|
|
return IRQ_HANDLED;
|
|
}
|
|
--- head-2011-03-17.orig/arch/x86/kernel/time-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/time-xen.c 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -455,11 +455,7 @@ irqreturn_t timer_interrupt(int irq, voi
|
|
struct vcpu_runstate_info runstate;
|
|
|
|
/* Keep nmi watchdog up to date */
|
|
-#ifdef __i386__
|
|
- x86_add_percpu(irq_stat.irq0_irqs, 1);
|
|
-#else
|
|
- add_pda(irq0_irqs, 1);
|
|
-#endif
|
|
+ inc_irq_stat(irq0_irqs);
|
|
|
|
/*
|
|
* Here we are in the timer irq handler. We just have irqs locally
|
|
@@ -521,7 +517,6 @@ irqreturn_t timer_interrupt(int irq, voi
|
|
|
|
/*
|
|
* Account stolen ticks.
|
|
- * HACK: Passing NULL to account_steal_time()
|
|
* ensures that the ticks are accounted as stolen.
|
|
*/
|
|
stolen = runstate.time[RUNSTATE_runnable]
|
|
@@ -534,12 +529,11 @@ irqreturn_t timer_interrupt(int irq, voi
|
|
do_div(stolen, NS_PER_TICK);
|
|
per_cpu(processed_stolen_time, cpu) += stolen * NS_PER_TICK;
|
|
per_cpu(processed_system_time, cpu) += stolen * NS_PER_TICK;
|
|
- account_steal_time(NULL, (cputime_t)stolen);
|
|
+ account_steal_ticks(stolen);
|
|
}
|
|
|
|
/*
|
|
* Account blocked ticks.
|
|
- * HACK: Passing idle_task to account_steal_time()
|
|
* ensures that the ticks are accounted as idle/wait.
|
|
*/
|
|
blocked = runstate.time[RUNSTATE_blocked]
|
|
@@ -551,18 +545,23 @@ irqreturn_t timer_interrupt(int irq, voi
|
|
do_div(blocked, NS_PER_TICK);
|
|
per_cpu(processed_blocked_time, cpu) += blocked * NS_PER_TICK;
|
|
per_cpu(processed_system_time, cpu) += blocked * NS_PER_TICK;
|
|
- account_steal_time(idle_task(cpu), (cputime_t)blocked);
|
|
+ account_idle_ticks(blocked);
|
|
}
|
|
|
|
/* Account user/system ticks. */
|
|
if (delta_cpu > 0) {
|
|
+ cputime_t ct;
|
|
+
|
|
do_div(delta_cpu, NS_PER_TICK);
|
|
per_cpu(processed_system_time, cpu) += delta_cpu * NS_PER_TICK;
|
|
+ ct = jiffies_to_cputime(delta_cpu);
|
|
if (user_mode_vm(get_irq_regs()))
|
|
- account_user_time(current, (cputime_t)delta_cpu);
|
|
- else
|
|
+ account_user_time(current, ct, cputime_to_scaled(ct));
|
|
+ else if (current != idle_task(cpu))
|
|
account_system_time(current, HARDIRQ_OFFSET,
|
|
- (cputime_t)delta_cpu);
|
|
+ ct, cputime_to_scaled(ct));
|
|
+ else
|
|
+ account_idle_ticks(delta_cpu);
|
|
}
|
|
|
|
/* Offlined for more than a few seconds? Avoid lockup warnings. */
|
|
@@ -791,7 +790,7 @@ static void stop_hz_timer(void)
|
|
unsigned long j;
|
|
int rc;
|
|
|
|
- cpu_set(cpu, nohz_cpu_mask);
|
|
+ cpumask_set_cpu(cpu, nohz_cpu_mask);
|
|
|
|
/* See matching smp_mb in rcu_start_batch in rcupdate.c. These mbs */
|
|
/* ensure that if __rcu_pending (nested in rcu_needs_cpu) fetches a */
|
|
@@ -807,7 +806,7 @@ static void stop_hz_timer(void)
|
|
local_softirq_pending() ||
|
|
(j = get_next_timer_interrupt(jiffies),
|
|
time_before_eq(j, jiffies))) {
|
|
- cpu_clear(cpu, nohz_cpu_mask);
|
|
+ cpumask_clear_cpu(cpu, nohz_cpu_mask);
|
|
j = jiffies + 1;
|
|
}
|
|
|
|
@@ -838,7 +837,7 @@ static void start_hz_timer(void)
|
|
}
|
|
#endif
|
|
BUG_ON(rc);
|
|
- cpu_clear(cpu, nohz_cpu_mask);
|
|
+ cpumask_clear_cpu(cpu, nohz_cpu_mask);
|
|
}
|
|
|
|
void xen_safe_halt(void)
|
|
@@ -848,14 +847,12 @@ void xen_safe_halt(void)
|
|
HYPERVISOR_block();
|
|
start_hz_timer();
|
|
}
|
|
-EXPORT_SYMBOL(xen_safe_halt);
|
|
|
|
void xen_halt(void)
|
|
{
|
|
if (irqs_disabled())
|
|
VOID(HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL));
|
|
}
|
|
-EXPORT_SYMBOL(xen_halt);
|
|
|
|
/* No locking required. Interrupts are disabled on all CPUs. */
|
|
void time_resume(void)
|
|
--- head-2011-03-17.orig/arch/x86/kernel/traps-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/traps-xen.c 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -20,7 +20,6 @@
|
|
#include <linux/module.h>
|
|
#include <linux/ptrace.h>
|
|
#include <linux/string.h>
|
|
-#include <linux/unwind.h>
|
|
#include <linux/delay.h>
|
|
#include <linux/errno.h>
|
|
#include <linux/kexec.h>
|
|
@@ -51,7 +50,6 @@
|
|
#include <asm/debugreg.h>
|
|
#include <asm/atomic.h>
|
|
#include <asm/system.h>
|
|
-#include <asm/unwind.h>
|
|
#include <asm/traps.h>
|
|
#include <asm/desc.h>
|
|
#include <asm/i387.h>
|
|
@@ -65,18 +63,10 @@
|
|
#else
|
|
#include <asm/processor-flags.h>
|
|
#include <asm/arch_hooks.h>
|
|
-#include <asm/nmi.h>
|
|
-#include <asm/smp.h>
|
|
-#include <asm/io.h>
|
|
#include <asm/traps.h>
|
|
|
|
#include "cpu/mcheck/mce.h"
|
|
|
|
-#ifndef CONFIG_XEN
|
|
-DECLARE_BITMAP(used_vectors, NR_VECTORS);
|
|
-EXPORT_SYMBOL_GPL(used_vectors);
|
|
-#endif
|
|
-
|
|
asmlinkage int system_call(void);
|
|
|
|
/* Do we ignore FPU interrupts ? */
|
|
@@ -93,6 +83,11 @@ gate_desc idt_table[256]
|
|
#endif
|
|
#endif
|
|
|
|
+#ifndef CONFIG_XEN
|
|
+DECLARE_BITMAP(used_vectors, NR_VECTORS);
|
|
+EXPORT_SYMBOL_GPL(used_vectors);
|
|
+#endif
|
|
+
|
|
static int ignore_nmis;
|
|
|
|
static inline void conditional_sti(struct pt_regs *regs)
|
|
@@ -108,6 +103,12 @@ static inline void preempt_conditional_s
|
|
local_irq_enable();
|
|
}
|
|
|
|
+static inline void conditional_cli(struct pt_regs *regs)
|
|
+{
|
|
+ if (regs->flags & X86_EFLAGS_IF)
|
|
+ local_irq_disable();
|
|
+}
|
|
+
|
|
static inline void preempt_conditional_cli(struct pt_regs *regs)
|
|
{
|
|
if (regs->flags & X86_EFLAGS_IF)
|
|
@@ -298,8 +299,10 @@ dotraplinkage void do_double_fault(struc
|
|
tsk->thread.error_code = error_code;
|
|
tsk->thread.trap_no = 8;
|
|
|
|
- /* This is always a kernel trap and never fixable (and thus must
|
|
- never return). */
|
|
+ /*
|
|
+ * This is always a kernel trap and never fixable (and thus must
|
|
+ * never return).
|
|
+ */
|
|
for (;;)
|
|
die(str, regs, error_code);
|
|
}
|
|
@@ -476,11 +479,7 @@ do_nmi(struct pt_regs *regs, long error_
|
|
{
|
|
nmi_enter();
|
|
|
|
-#ifdef CONFIG_X86_32
|
|
- { int cpu; cpu = smp_processor_id(); ++nmi_count(cpu); }
|
|
-#else
|
|
- add_pda(__nmi_count, 1);
|
|
-#endif
|
|
+ inc_irq_stat(__nmi_count);
|
|
|
|
if (!ignore_nmis)
|
|
default_do_nmi(regs);
|
|
@@ -519,9 +518,11 @@ dotraplinkage void __kprobes do_int3(str
|
|
}
|
|
|
|
#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
|
|
-/* Help handler running on IST stack to switch back to user stack
|
|
- for scheduling or signal handling. The actual stack switch is done in
|
|
- entry.S */
|
|
+/*
|
|
+ * Help handler running on IST stack to switch back to user stack
|
|
+ * for scheduling or signal handling. The actual stack switch is done in
|
|
+ * entry.S
|
|
+ */
|
|
asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
|
|
{
|
|
struct pt_regs *regs = eregs;
|
|
@@ -531,8 +532,10 @@ asmlinkage __kprobes struct pt_regs *syn
|
|
/* Exception from user space */
|
|
else if (user_mode(eregs))
|
|
regs = task_pt_regs(current);
|
|
- /* Exception from kernel and interrupts are enabled. Move to
|
|
- kernel process stack. */
|
|
+ /*
|
|
+ * Exception from kernel and interrupts are enabled. Move to
|
|
+ * kernel process stack.
|
|
+ */
|
|
else if (eregs->flags & X86_EFLAGS_IF)
|
|
regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs));
|
|
if (eregs != regs)
|
|
@@ -624,8 +627,10 @@ clear_dr7:
|
|
|
|
#ifdef CONFIG_X86_32
|
|
debug_vm86:
|
|
+ /* reenable preemption: handle_vm86_trap() might sleep */
|
|
+ dec_preempt_count();
|
|
handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
|
|
- preempt_conditional_cli(regs);
|
|
+ conditional_cli(regs);
|
|
return;
|
|
#endif
|
|
|
|
@@ -659,7 +664,7 @@ void math_error(void __user *ip)
|
|
{
|
|
struct task_struct *task;
|
|
siginfo_t info;
|
|
- unsigned short cwd, swd;
|
|
+ unsigned short cwd, swd, err;
|
|
|
|
/*
|
|
* Save the info for the exception handler and clear the error.
|
|
@@ -670,7 +675,6 @@ void math_error(void __user *ip)
|
|
task->thread.error_code = 0;
|
|
info.si_signo = SIGFPE;
|
|
info.si_errno = 0;
|
|
- info.si_code = __SI_FAULT;
|
|
info.si_addr = ip;
|
|
/*
|
|
* (~cwd & swd) will mask out exceptions that are not set to unmasked
|
|
@@ -684,34 +688,30 @@ void math_error(void __user *ip)
|
|
*/
|
|
cwd = get_fpu_cwd(task);
|
|
swd = get_fpu_swd(task);
|
|
- switch (swd & ~cwd & 0x3f) {
|
|
- case 0x000: /* No unmasked exception */
|
|
-#ifdef CONFIG_X86_32
|
|
- return;
|
|
-#endif
|
|
- default: /* Multiple exceptions */
|
|
- break;
|
|
- case 0x001: /* Invalid Op */
|
|
+
|
|
+ err = swd & ~cwd;
|
|
+
|
|
+ if (err & 0x001) { /* Invalid op */
|
|
/*
|
|
* swd & 0x240 == 0x040: Stack Underflow
|
|
* swd & 0x240 == 0x240: Stack Overflow
|
|
* User must clear the SF bit (0x40) if set
|
|
*/
|
|
info.si_code = FPE_FLTINV;
|
|
- break;
|
|
- case 0x002: /* Denormalize */
|
|
- case 0x010: /* Underflow */
|
|
- info.si_code = FPE_FLTUND;
|
|
- break;
|
|
- case 0x004: /* Zero Divide */
|
|
+ } else if (err & 0x004) { /* Divide by Zero */
|
|
info.si_code = FPE_FLTDIV;
|
|
- break;
|
|
- case 0x008: /* Overflow */
|
|
+ } else if (err & 0x008) { /* Overflow */
|
|
info.si_code = FPE_FLTOVF;
|
|
- break;
|
|
- case 0x020: /* Precision */
|
|
+ } else if (err & 0x012) { /* Denormal, Underflow */
|
|
+ info.si_code = FPE_FLTUND;
|
|
+ } else if (err & 0x020) { /* Precision */
|
|
info.si_code = FPE_FLTRES;
|
|
- break;
|
|
+ } else {
|
|
+ /*
|
|
+ * If we're using IRQ 13, or supposedly even some trap 16
|
|
+ * implementations, it's possible we get a spurious trap...
|
|
+ */
|
|
+ return; /* Spurious trap, no error */
|
|
}
|
|
force_sig_info(SIGFPE, &info, task);
|
|
}
|
|
@@ -901,7 +901,7 @@ asmlinkage void math_state_restore(void)
|
|
EXPORT_SYMBOL_GPL(math_state_restore);
|
|
|
|
#ifndef CONFIG_MATH_EMULATION
|
|
-asmlinkage void math_emulate(long arg)
|
|
+void math_emulate(struct math_emu_info *info)
|
|
{
|
|
printk(KERN_EMERG
|
|
"math-emulation not enabled and no coprocessor found.\n");
|
|
@@ -911,16 +911,19 @@ asmlinkage void math_emulate(long arg)
|
|
}
|
|
#endif /* CONFIG_MATH_EMULATION */
|
|
|
|
-dotraplinkage void __kprobes
|
|
-do_device_not_available(struct pt_regs *regs, long error)
|
|
+dotraplinkage void __kprobes do_device_not_available(struct pt_regs regs)
|
|
{
|
|
#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
|
|
if (read_cr0() & X86_CR0_EM) {
|
|
- conditional_sti(regs);
|
|
- math_emulate(0);
|
|
+ struct math_emu_info info = { };
|
|
+
|
|
+ conditional_sti(®s);
|
|
+
|
|
+ info.regs = ®s;
|
|
+ math_emulate(&info);
|
|
} else {
|
|
math_state_restore(); /* interrupts still off */
|
|
- conditional_sti(regs);
|
|
+ conditional_sti(®s);
|
|
}
|
|
#else
|
|
math_state_restore();
|
|
--- head-2011-03-17.orig/arch/x86/kernel/vsyscall_64-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/vsyscall_64-xen.c 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -17,6 +17,9 @@
|
|
* want per guest time just set the kernel.vsyscall64 sysctl to 0.
|
|
*/
|
|
|
|
+/* Disable profiling for userspace code: */
|
|
+#define DISABLE_BRANCH_PROFILING
|
|
+
|
|
#include <linux/time.h>
|
|
#include <linux/init.h>
|
|
#include <linux/kernel.h>
|
|
@@ -128,7 +131,16 @@ static __always_inline void do_vgettimeo
|
|
gettimeofday(tv,NULL);
|
|
return;
|
|
}
|
|
+
|
|
+ /*
|
|
+ * Surround the RDTSC by barriers, to make sure it's not
|
|
+ * speculated to outside the seqlock critical section and
|
|
+ * does not cause time warps:
|
|
+ */
|
|
+ rdtsc_barrier();
|
|
now = vread();
|
|
+ rdtsc_barrier();
|
|
+
|
|
base = __vsyscall_gtod_data.clock.cycle_last;
|
|
mask = __vsyscall_gtod_data.clock.mask;
|
|
mult = __vsyscall_gtod_data.clock.mult;
|
|
--- head-2011-03-17.orig/arch/x86/mm/fault-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/mm/fault-xen.c 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -53,7 +53,7 @@
|
|
|
|
static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr)
|
|
{
|
|
-#ifdef CONFIG_MMIOTRACE_HOOKS
|
|
+#ifdef CONFIG_MMIOTRACE
|
|
if (unlikely(is_kmmio_active()))
|
|
if (kmmio_handler(regs, addr) == 1)
|
|
return -1;
|
|
@@ -406,7 +406,7 @@ static void show_fault_oops(struct pt_re
|
|
if (pte && pte_present(*pte) && !pte_exec(*pte))
|
|
printk(KERN_CRIT "kernel tried to execute "
|
|
"NX-protected page - exploit attempt? "
|
|
- "(uid: %d)\n", current->uid);
|
|
+ "(uid: %d)\n", current_uid());
|
|
}
|
|
#endif
|
|
|
|
@@ -426,6 +426,7 @@ static noinline void pgtable_bad(unsigne
|
|
unsigned long error_code)
|
|
{
|
|
unsigned long flags = oops_begin();
|
|
+ int sig = SIGKILL;
|
|
struct task_struct *tsk;
|
|
|
|
printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
|
|
@@ -436,8 +437,8 @@ static noinline void pgtable_bad(unsigne
|
|
tsk->thread.trap_no = 14;
|
|
tsk->thread.error_code = error_code;
|
|
if (__die("Bad pagetable", regs, error_code))
|
|
- regs = NULL;
|
|
- oops_end(flags, regs, SIGKILL);
|
|
+ sig = 0;
|
|
+ oops_end(flags, regs, sig);
|
|
}
|
|
#endif
|
|
|
|
@@ -546,10 +547,7 @@ static int vmalloc_fault(unsigned long a
|
|
happen within a race in page table update. In the later
|
|
case just flush. */
|
|
|
|
- /* On Xen the line below does not always work. Needs investigating! */
|
|
- /*pgd = pgd_offset(current->mm ?: &init_mm, address);*/
|
|
- pgd = __va(read_cr3() & PHYSICAL_PAGE_MASK);
|
|
- pgd += pgd_index(address);
|
|
+ pgd = pgd_offset(current->active_mm, address);
|
|
pgd_ref = pgd_offset_k(address);
|
|
if (pgd_none(*pgd_ref))
|
|
return -1;
|
|
@@ -606,6 +604,7 @@ void __kprobes do_page_fault(struct pt_r
|
|
int fault;
|
|
#ifdef CONFIG_X86_64
|
|
unsigned long flags;
|
|
+ int sig;
|
|
#endif
|
|
|
|
/* Set the "privileged fault" bit to something sane. */
|
|
@@ -623,8 +622,6 @@ void __kprobes do_page_fault(struct pt_r
|
|
|
|
si_code = SEGV_MAPERR;
|
|
|
|
- if (notify_page_fault(regs))
|
|
- return;
|
|
if (unlikely(kmmio_fault(regs, address)))
|
|
return;
|
|
|
|
@@ -663,6 +660,9 @@ void __kprobes do_page_fault(struct pt_r
|
|
if (spurious_fault(address, error_code))
|
|
return;
|
|
|
|
+ /* kprobes don't want to hook the spurious faults. */
|
|
+ if (notify_page_fault(regs))
|
|
+ return;
|
|
/*
|
|
* Don't take the mm semaphore here. If we fixup a prefetch
|
|
* fault we could otherwise deadlock.
|
|
@@ -670,6 +670,9 @@ void __kprobes do_page_fault(struct pt_r
|
|
goto bad_area_nosemaphore;
|
|
}
|
|
|
|
+ /* kprobes don't want to hook the spurious faults. */
|
|
+ if (notify_page_fault(regs))
|
|
+ return;
|
|
|
|
/*
|
|
* It's safe to allow irq's after cr2 has been saved and the
|
|
@@ -696,7 +699,6 @@ void __kprobes do_page_fault(struct pt_r
|
|
if (unlikely(in_atomic() || !mm))
|
|
goto bad_area_nosemaphore;
|
|
|
|
-again:
|
|
/*
|
|
* When running in the kernel we expect faults to occur only to
|
|
* addresses in user space. All other faults represent errors in the
|
|
@@ -880,32 +882,22 @@ no_context:
|
|
bust_spinlocks(0);
|
|
do_exit(SIGKILL);
|
|
#else
|
|
+ sig = SIGKILL;
|
|
if (__die("Oops", regs, error_code))
|
|
- regs = NULL;
|
|
+ sig = 0;
|
|
/* Executive summary in case the body of the oops scrolled away */
|
|
printk(KERN_EMERG "CR2: %016lx\n", address);
|
|
- oops_end(flags, regs, SIGKILL);
|
|
+ oops_end(flags, regs, sig);
|
|
#endif
|
|
|
|
-/*
|
|
- * We ran out of memory, or some other thing happened to us that made
|
|
- * us unable to handle the page fault gracefully.
|
|
- */
|
|
out_of_memory:
|
|
+ /*
|
|
+ * We ran out of memory, call the OOM killer, and return the userspace
|
|
+ * (which will retry the fault, or kill us if we got oom-killed).
|
|
+ */
|
|
up_read(&mm->mmap_sem);
|
|
- if (is_global_init(tsk)) {
|
|
- yield();
|
|
- /*
|
|
- * Re-lookup the vma - in theory the vma tree might
|
|
- * have changed:
|
|
- */
|
|
- goto again;
|
|
- }
|
|
-
|
|
- printk("VM: killing process %s\n", tsk->comm);
|
|
- if (error_code & PF_USER)
|
|
- do_group_exit(SIGKILL);
|
|
- goto no_context;
|
|
+ pagefault_out_of_memory();
|
|
+ return;
|
|
|
|
do_sigbus:
|
|
up_read(&mm->mmap_sem);
|
|
--- head-2011-03-17.orig/arch/x86/mm/hypervisor.c 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/mm/hypervisor.c 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -78,12 +78,12 @@ static void multicall_failed(const multi
|
|
BUG();
|
|
}
|
|
|
|
-int xen_multicall_flush(bool ret_last) {
|
|
+static int _xen_multicall_flush(bool ret_last) {
|
|
struct lazy_mmu *lazy = &__get_cpu_var(lazy_mmu);
|
|
multicall_entry_t *mc = lazy->mc;
|
|
unsigned int count = lazy->nr_mc;
|
|
|
|
- if (!count || !use_lazy_mmu_mode())
|
|
+ if (!count)
|
|
return 0;
|
|
|
|
lazy->nr_mc = 0;
|
|
@@ -112,6 +112,11 @@ int xen_multicall_flush(bool ret_last) {
|
|
return 0;
|
|
}
|
|
|
|
+void xen_multicall_flush(bool force) {
|
|
+ if (force || use_lazy_mmu_mode())
|
|
+ _xen_multicall_flush(false);
|
|
+}
|
|
+
|
|
int xen_multi_update_va_mapping(unsigned long va, pte_t pte,
|
|
unsigned long uvmf)
|
|
{
|
|
@@ -128,7 +133,7 @@ int xen_multi_update_va_mapping(unsigned
|
|
#endif
|
|
|
|
if (unlikely(lazy->nr_mc == NR_MC))
|
|
- xen_multicall_flush(false);
|
|
+ _xen_multicall_flush(false);
|
|
|
|
mc = lazy->mc + lazy->nr_mc++;
|
|
mc->op = __HYPERVISOR_update_va_mapping;
|
|
@@ -167,7 +172,7 @@ int xen_multi_mmu_update(mmu_update_t *s
|
|
merge = lazy->nr_mc && !commit
|
|
&& mmu_may_merge(mc - 1, __HYPERVISOR_mmu_update, domid);
|
|
if (unlikely(lazy->nr_mc == NR_MC) && !merge) {
|
|
- xen_multicall_flush(false);
|
|
+ _xen_multicall_flush(false);
|
|
mc = lazy->mc;
|
|
commit = count > NR_MMU || success_count;
|
|
}
|
|
@@ -205,7 +210,7 @@ int xen_multi_mmu_update(mmu_update_t *s
|
|
break;
|
|
}
|
|
|
|
- return commit ? xen_multicall_flush(true) : 0;
|
|
+ return commit ? _xen_multicall_flush(true) : 0;
|
|
}
|
|
|
|
int xen_multi_mmuext_op(struct mmuext_op *src, unsigned int count,
|
|
@@ -289,7 +294,7 @@ int xen_multi_mmuext_op(struct mmuext_op
|
|
merge = lazy->nr_mc && !commit
|
|
&& mmu_may_merge(mc - 1, __HYPERVISOR_mmuext_op, domid);
|
|
if (unlikely(lazy->nr_mc == NR_MC) && !merge) {
|
|
- xen_multicall_flush(false);
|
|
+ _xen_multicall_flush(false);
|
|
mc = lazy->mc;
|
|
commit = count > NR_MMUEXT || success_count;
|
|
}
|
|
@@ -336,7 +341,7 @@ int xen_multi_mmuext_op(struct mmuext_op
|
|
break;
|
|
}
|
|
|
|
- return commit ? xen_multicall_flush(true) : 0;
|
|
+ return commit ? _xen_multicall_flush(true) : 0;
|
|
}
|
|
|
|
void xen_l1_entry_update(pte_t *ptr, pte_t val)
|
|
--- head-2011-03-17.orig/arch/x86/mm/init_32-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/mm/init_32-xen.c 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -71,7 +71,7 @@ static unsigned long __initdata table_to
|
|
|
|
static int __initdata after_init_bootmem;
|
|
|
|
-static __init void *alloc_low_page(unsigned long *phys)
|
|
+static __init void *alloc_low_page(void)
|
|
{
|
|
unsigned long pfn = table_end++;
|
|
void *adr;
|
|
@@ -81,7 +81,6 @@ static __init void *alloc_low_page(unsig
|
|
|
|
adr = __va(pfn * PAGE_SIZE);
|
|
memset(adr, 0, PAGE_SIZE);
|
|
- *phys = pfn * PAGE_SIZE;
|
|
return adr;
|
|
}
|
|
|
|
@@ -96,17 +95,18 @@ static pmd_t * __init one_md_table_init(
|
|
pmd_t *pmd_table;
|
|
|
|
#ifdef CONFIG_X86_PAE
|
|
- unsigned long phys;
|
|
if (!(__pgd_val(*pgd) & _PAGE_PRESENT)) {
|
|
if (after_init_bootmem)
|
|
pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
|
|
else
|
|
- pmd_table = (pmd_t *)alloc_low_page(&phys);
|
|
+ pmd_table = (pmd_t *)alloc_low_page();
|
|
paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
|
|
make_lowmem_page_readonly(pmd_table, XENFEAT_writable_page_tables);
|
|
set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
|
|
pud = pud_offset(pgd, 0);
|
|
BUG_ON(pmd_table != pmd_offset(pud, 0));
|
|
+
|
|
+ return pmd_table;
|
|
}
|
|
#endif
|
|
pud = pud_offset(pgd, 0);
|
|
@@ -135,10 +135,8 @@ static pte_t * __init one_page_table_ini
|
|
if (!page_table)
|
|
page_table =
|
|
(pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
|
|
- } else {
|
|
- unsigned long phys;
|
|
- page_table = (pte_t *)alloc_low_page(&phys);
|
|
- }
|
|
+ } else
|
|
+ page_table = (pte_t *)alloc_low_page();
|
|
|
|
paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT);
|
|
make_lowmem_page_readonly(page_table,
|
|
@@ -150,6 +148,51 @@ static pte_t * __init one_page_table_ini
|
|
return pte_offset_kernel(pmd, 0);
|
|
}
|
|
|
|
+static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd,
|
|
+ unsigned long vaddr, pte_t *lastpte)
|
|
+{
|
|
+#ifdef CONFIG_HIGHMEM
|
|
+ /*
|
|
+ * Something (early fixmap) may already have put a pte
|
|
+ * page here, which causes the page table allocation
|
|
+ * to become nonlinear. Attempt to fix it, and if it
|
|
+ * is still nonlinear then we have to bug.
|
|
+ */
|
|
+ int pmd_idx_kmap_begin = fix_to_virt(FIX_KMAP_END) >> PMD_SHIFT;
|
|
+ int pmd_idx_kmap_end = fix_to_virt(FIX_KMAP_BEGIN) >> PMD_SHIFT;
|
|
+
|
|
+ if (pmd_idx_kmap_begin != pmd_idx_kmap_end
|
|
+ && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin
|
|
+ && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end
|
|
+ && ((__pa(pte) >> PAGE_SHIFT) < table_start
|
|
+ || (__pa(pte) >> PAGE_SHIFT) >= table_end)) {
|
|
+ pte_t *newpte;
|
|
+ int i;
|
|
+
|
|
+ BUG_ON(after_init_bootmem);
|
|
+ newpte = alloc_low_page();
|
|
+ for (i = 0; i < PTRS_PER_PTE; i++)
|
|
+ set_pte(newpte + i, pte[i]);
|
|
+
|
|
+ paravirt_alloc_pte(&init_mm, __pa(newpte) >> PAGE_SHIFT);
|
|
+ make_lowmem_page_readonly(newpte,
|
|
+ XENFEAT_writable_page_tables);
|
|
+ set_pmd(pmd, __pmd(__pa(newpte)|_PAGE_TABLE));
|
|
+ BUG_ON(newpte != pte_offset_kernel(pmd, 0));
|
|
+ __flush_tlb_all();
|
|
+
|
|
+ paravirt_release_pte(__pa(pte) >> PAGE_SHIFT);
|
|
+ make_lowmem_page_writable(pte,
|
|
+ XENFEAT_writable_page_tables);
|
|
+ pte = newpte;
|
|
+ }
|
|
+ BUG_ON(vaddr < fix_to_virt(FIX_KMAP_BEGIN - 1)
|
|
+ && vaddr > fix_to_virt(FIX_KMAP_END)
|
|
+ && lastpte && lastpte + PTRS_PER_PTE != pte);
|
|
+#endif
|
|
+ return pte;
|
|
+}
|
|
+
|
|
/*
|
|
* This function initializes a certain range of kernel virtual memory
|
|
* with new bootmem page tables, everywhere page tables are missing in
|
|
@@ -166,6 +209,7 @@ page_table_range_init(unsigned long star
|
|
unsigned long vaddr;
|
|
pgd_t *pgd;
|
|
pmd_t *pmd;
|
|
+ pte_t *pte = NULL;
|
|
|
|
vaddr = start;
|
|
pgd_idx = pgd_index(vaddr);
|
|
@@ -177,8 +221,10 @@ page_table_range_init(unsigned long star
|
|
pmd = pmd + pmd_index(vaddr);
|
|
for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end);
|
|
pmd++, pmd_idx++) {
|
|
- if (vaddr < hypervisor_virt_start)
|
|
- one_page_table_init(pmd);
|
|
+ if (vaddr >= hypervisor_virt_start)
|
|
+ break;
|
|
+ pte = page_table_kmap_check(one_page_table_init(pmd),
|
|
+ pmd, vaddr, pte);
|
|
|
|
vaddr += PMD_SIZE;
|
|
}
|
|
@@ -361,6 +407,8 @@ int devmem_is_allowed(unsigned long page
|
|
{
|
|
if (pagenr <= 256)
|
|
return 1;
|
|
+ if (iomem_is_exclusive(pagenr << PAGE_SHIFT))
|
|
+ return 0;
|
|
if (mfn_to_local_pfn(pagenr) >= max_pfn)
|
|
return 1;
|
|
return 0;
|
|
@@ -476,8 +524,12 @@ static void __init set_highmem_pages_ini
|
|
#endif /* !CONFIG_NUMA */
|
|
|
|
#else
|
|
-# define permanent_kmaps_init(pgd_base) do { } while (0)
|
|
-# define set_highmem_pages_init() do { } while (0)
|
|
+static inline void permanent_kmaps_init(pgd_t *pgd_base)
|
|
+{
|
|
+}
|
|
+static inline void set_highmem_pages_init(void)
|
|
+{
|
|
+}
|
|
#endif /* CONFIG_HIGHMEM */
|
|
|
|
pgd_t *swapper_pg_dir;
|
|
@@ -509,7 +561,6 @@ static void __init early_ioremap_page_ta
|
|
* Fixed mappings, only the page table structure has to be
|
|
* created - mappings will be set by set_fixmap():
|
|
*/
|
|
- early_ioremap_clear();
|
|
vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
|
|
end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK;
|
|
page_table_range_init(vaddr, end, pgd_base);
|
|
@@ -856,10 +907,7 @@ static void __init find_early_table_spac
|
|
tables += PAGE_ALIGN(ptes * sizeof(pte_t));
|
|
|
|
/* for fixmap */
|
|
- tables += PAGE_SIZE
|
|
- * ((((FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK)
|
|
- - (__fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK))
|
|
- >> PMD_SHIFT);
|
|
+ tables += PAGE_ALIGN(__end_of_fixed_addresses * sizeof(pte_t));
|
|
|
|
table_start = extend_init_mapping(tables);
|
|
|
|
@@ -1023,8 +1071,6 @@ void __init mem_init(void)
|
|
|
|
pci_iommu_alloc();
|
|
|
|
- start_periodic_check_for_corruption();
|
|
-
|
|
#ifdef CONFIG_FLATMEM
|
|
BUG_ON(!mem_map);
|
|
#endif
|
|
@@ -1099,11 +1145,25 @@ void __init mem_init(void)
|
|
(unsigned long)&_text, (unsigned long)&_etext,
|
|
((unsigned long)&_etext - (unsigned long)&_text) >> 10);
|
|
|
|
+ /*
|
|
+ * Check boundaries twice: Some fundamental inconsistencies can
|
|
+ * be detected at build time already.
|
|
+ */
|
|
+#define __FIXADDR_TOP (-PAGE_SIZE)
|
|
+#ifdef CONFIG_HIGHMEM
|
|
+ BUILD_BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
|
|
+ BUILD_BUG_ON(VMALLOC_END > PKMAP_BASE);
|
|
+#endif
|
|
+#define high_memory (-128UL << 20)
|
|
+ BUILD_BUG_ON(VMALLOC_START >= VMALLOC_END);
|
|
+#undef high_memory
|
|
+#undef __FIXADDR_TOP
|
|
+
|
|
#ifdef CONFIG_HIGHMEM
|
|
BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
|
|
BUG_ON(VMALLOC_END > PKMAP_BASE);
|
|
#endif
|
|
- BUG_ON(VMALLOC_START > VMALLOC_END);
|
|
+ BUG_ON(VMALLOC_START >= VMALLOC_END);
|
|
BUG_ON((unsigned long)high_memory > VMALLOC_START);
|
|
|
|
if (boot_cpu_data.wp_works_ok < 0)
|
|
@@ -1123,7 +1183,7 @@ int arch_add_memory(int nid, u64 start,
|
|
unsigned long start_pfn = start >> PAGE_SHIFT;
|
|
unsigned long nr_pages = size >> PAGE_SHIFT;
|
|
|
|
- return __add_pages(zone, start_pfn, nr_pages);
|
|
+ return __add_pages(nid, zone, start_pfn, nr_pages);
|
|
}
|
|
#endif
|
|
|
|
--- head-2011-03-17.orig/arch/x86/mm/init_64-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/mm/init_64-xen.c 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -841,7 +841,7 @@ static void __init init_gbpages(void)
|
|
#endif
|
|
}
|
|
|
|
-static unsigned long __init kernel_physical_mapping_init(unsigned long start,
|
|
+static unsigned long __meminit kernel_physical_mapping_init(unsigned long start,
|
|
unsigned long end,
|
|
unsigned long page_size_mask)
|
|
{
|
|
@@ -966,6 +966,8 @@ unsigned long __init_refok init_memory_m
|
|
pos = start_pfn << PAGE_SHIFT;
|
|
end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT)
|
|
<< (PMD_SHIFT - PAGE_SHIFT);
|
|
+ if (end_pfn > (end >> PAGE_SHIFT))
|
|
+ end_pfn = end >> PAGE_SHIFT;
|
|
if (start_pfn < end_pfn) {
|
|
nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
|
|
pos = end_pfn << PAGE_SHIFT;
|
|
@@ -1146,7 +1148,7 @@ int arch_add_memory(int nid, u64 start,
|
|
if (last_mapped_pfn > max_pfn_mapped)
|
|
max_pfn_mapped = last_mapped_pfn;
|
|
|
|
- ret = __add_pages(zone, start_pfn, nr_pages);
|
|
+ ret = __add_pages(nid, zone, start_pfn, nr_pages);
|
|
WARN_ON_ONCE(ret);
|
|
|
|
return ret;
|
|
@@ -1177,6 +1179,8 @@ int devmem_is_allowed(unsigned long page
|
|
{
|
|
if (pagenr <= 256)
|
|
return 1;
|
|
+ if (iomem_is_exclusive(pagenr << PAGE_SHIFT))
|
|
+ return 0;
|
|
if (mfn_to_local_pfn(pagenr) >= max_pfn)
|
|
return 1;
|
|
return 0;
|
|
@@ -1192,8 +1196,6 @@ void __init mem_init(void)
|
|
unsigned long absent_pages;
|
|
unsigned long pfn;
|
|
|
|
- start_periodic_check_for_corruption();
|
|
-
|
|
pci_iommu_alloc();
|
|
|
|
/* clear_bss() already clear the empty_zero_page */
|
|
--- head-2011-03-17.orig/arch/x86/mm/iomap_32-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/mm/iomap_32-xen.c 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -17,9 +17,21 @@
|
|
*/
|
|
|
|
#include <asm/iomap.h>
|
|
+#include <asm/pat.h>
|
|
#include <linux/bitops.h>
|
|
#include <linux/module.h>
|
|
|
|
+int is_io_mapping_possible(resource_size_t base, unsigned long size)
|
|
+{
|
|
+#ifndef CONFIG_X86_PAE
|
|
+ /* There is no way to map greater than 1 << 32 address without PAE */
|
|
+ if (base + size > 0x100000000ULL)
|
|
+ return 0;
|
|
+#endif
|
|
+ return 1;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(is_io_mapping_possible);
|
|
+
|
|
/* Map 'mfn' using fixed map 'type' and protections 'prot'
|
|
*/
|
|
void *
|
|
@@ -30,6 +42,15 @@ iomap_atomic_prot_pfn(unsigned long mfn,
|
|
|
|
pagefault_disable();
|
|
|
|
+ /*
|
|
+ * For non-PAT systems, promote PAGE_KERNEL_WC to PAGE_KERNEL_UC_MINUS.
|
|
+ * PAGE_KERNEL_WC maps to PWT, which translates to uncached if the
|
|
+ * MTRR is UC or WC. UC_MINUS gets the real intention, of the
|
|
+ * user, which is "WC if the MTRR is WC, UC if you can't do that."
|
|
+ */
|
|
+ if (!pat_enabled && pgprot_val(prot) == pgprot_val(PAGE_KERNEL_WC))
|
|
+ prot = PAGE_KERNEL_UC_MINUS;
|
|
+
|
|
idx = type + KM_TYPE_NR*smp_processor_id();
|
|
vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
|
|
pgprot_val(prot) |= _PAGE_IOMAP;
|
|
--- head-2011-03-17.orig/arch/x86/mm/ioremap-xen.c 2011-02-07 15:41:07.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/mm/ioremap-xen.c 2011-02-07 15:41:20.000000000 +0100
|
|
@@ -274,25 +274,6 @@ int page_is_ram(unsigned long pagenr)
|
|
return 0;
|
|
}
|
|
|
|
-int pagerange_is_ram(unsigned long start, unsigned long end)
|
|
-{
|
|
- int ram_page = 0, not_rampage = 0;
|
|
- unsigned long page_nr;
|
|
-
|
|
- for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT);
|
|
- ++page_nr) {
|
|
- if (page_is_ram(mfn_to_local_pfn(page_nr)))
|
|
- ram_page = 1;
|
|
- else
|
|
- not_rampage = 1;
|
|
-
|
|
- if (ram_page == not_rampage)
|
|
- return -1;
|
|
- }
|
|
-
|
|
- return ram_page;
|
|
-}
|
|
-
|
|
/*
|
|
* Fix up the linear direct mapping of the kernel to avoid cache attribute
|
|
* conflicts.
|
|
@@ -383,7 +364,8 @@ static void __iomem *__ioremap_caller(re
|
|
* Check if the request spans more than any BAR in the iomem resource
|
|
* tree.
|
|
*/
|
|
- WARN_ON(iomem_map_sanity_check(phys_addr, size));
|
|
+ WARN_ONCE(iomem_map_sanity_check(phys_addr, size),
|
|
+ KERN_INFO "Info: mapping multiple BARs. Your kernel is fine.");
|
|
|
|
/*
|
|
* Don't allow anybody to remap normal RAM that we're using..
|
|
@@ -727,38 +709,10 @@ void __init early_ioremap_init(void)
|
|
}
|
|
}
|
|
|
|
-#ifdef CONFIG_X86_32
|
|
-void __init early_ioremap_clear(void)
|
|
-{
|
|
- pmd_t *pmd;
|
|
-
|
|
- if (early_ioremap_debug)
|
|
- printk(KERN_INFO "early_ioremap_clear()\n");
|
|
-
|
|
- pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
|
|
- pmd_clear(pmd);
|
|
- make_lowmem_page_writable(bm_pte, XENFEAT_writable_page_tables);
|
|
- /* paravirt_release_pte(__pa(bm_pte) >> PAGE_SHIFT); */
|
|
- __flush_tlb_all();
|
|
-}
|
|
-
|
|
void __init early_ioremap_reset(void)
|
|
{
|
|
- enum fixed_addresses idx;
|
|
- unsigned long addr, phys;
|
|
- pte_t *pte;
|
|
-
|
|
after_paging_init = 1;
|
|
- for (idx = FIX_BTMAP_BEGIN; idx >= FIX_BTMAP_END; idx--) {
|
|
- addr = fix_to_virt(idx);
|
|
- pte = early_ioremap_pte(addr);
|
|
- if (pte_present(*pte)) {
|
|
- phys = __pte_val(*pte) & PAGE_MASK;
|
|
- set_fixmap(idx, phys);
|
|
- }
|
|
- }
|
|
}
|
|
-#endif /* CONFIG_X86_32 */
|
|
|
|
static void __init __early_set_fixmap(enum fixed_addresses idx,
|
|
unsigned long phys, pgprot_t flags)
|
|
--- head-2011-03-17.orig/arch/x86/mm/pageattr-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/mm/pageattr-xen.c 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -524,22 +524,28 @@ static int split_large_page(pte_t *kpte,
|
|
set_pte(&pbase[i], pfn_pte_ma(mfn, ref_prot));
|
|
|
|
/*
|
|
- * Install the new, split up pagetable. Important details here:
|
|
+ * Install the new, split up pagetable.
|
|
*
|
|
- * On Intel the NX bit of all levels must be cleared to make a
|
|
- * page executable. See section 4.13.2 of Intel 64 and IA-32
|
|
- * Architectures Software Developer's Manual).
|
|
- *
|
|
- * Mark the entry present. The current mapping might be
|
|
- * set to not present, which we preserved above.
|
|
+ * We use the standard kernel pagetable protections for the new
|
|
+ * pagetable protections, the actual ptes set above control the
|
|
+ * primary protection behavior:
|
|
*/
|
|
if (!xen_feature(XENFEAT_writable_page_tables) &&
|
|
HYPERVISOR_update_va_mapping((unsigned long)pbase,
|
|
mk_pte(base, PAGE_KERNEL_RO), 0))
|
|
BUG();
|
|
- ref_prot = pte_pgprot(pte_mkexec(pte_clrhuge(*kpte)));
|
|
- pgprot_val(ref_prot) |= _PAGE_PRESENT;
|
|
- __set_pmd_pte(kpte, address, level, mk_pte(base, ref_prot));
|
|
+ __set_pmd_pte(kpte, address, level, mk_pte(base, __pgprot(_KERNPG_TABLE)));
|
|
+
|
|
+ /*
|
|
+ * Intel Atom errata AAH41 workaround.
|
|
+ *
|
|
+ * The real fix should be in hw or in a microcode update, but
|
|
+ * we also probabilistically try to reduce the window of having
|
|
+ * a large TLB mixed with 4K TLBs while instruction fetches are
|
|
+ * going on.
|
|
+ */
|
|
+ __flush_tlb_all();
|
|
+
|
|
base = NULL;
|
|
|
|
out_unlock:
|
|
@@ -554,6 +560,36 @@ out_unlock:
|
|
return 0;
|
|
}
|
|
|
|
+static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr,
|
|
+ int primary)
|
|
+{
|
|
+ /*
|
|
+ * Ignore all non primary paths.
|
|
+ */
|
|
+ if (!primary)
|
|
+ return 0;
|
|
+
|
|
+ /*
|
|
+ * Ignore the NULL PTE for kernel identity mapping, as it is expected
|
|
+ * to have holes.
|
|
+ * Also set numpages to '1' indicating that we processed cpa req for
|
|
+ * one virtual address page and its pfn. TBD: numpages can be set based
|
|
+ * on the initial value and the level returned by lookup_address().
|
|
+ */
|
|
+ if (within(vaddr, PAGE_OFFSET,
|
|
+ PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))) {
|
|
+ cpa->numpages = 1;
|
|
+ cpa->pfn = __pa(vaddr) >> PAGE_SHIFT;
|
|
+ return 0;
|
|
+ } else {
|
|
+ WARN(1, KERN_WARNING "CPA: called for zero pte. "
|
|
+ "vaddr = %lx cpa->vaddr = %lx\n", vaddr,
|
|
+ *cpa->vaddr);
|
|
+
|
|
+ return -EFAULT;
|
|
+ }
|
|
+}
|
|
+
|
|
static int __change_page_attr(struct cpa_data *cpa, int primary)
|
|
{
|
|
unsigned long address;
|
|
@@ -565,21 +601,14 @@ static int __change_page_attr(struct cpa
|
|
address = cpa->vaddr[cpa->curpage];
|
|
else
|
|
address = *cpa->vaddr;
|
|
-
|
|
repeat:
|
|
kpte = lookup_address(address, &level);
|
|
if (!kpte)
|
|
- return 0;
|
|
+ return __cpa_process_fault(cpa, address, primary);
|
|
|
|
old_pte = *kpte;
|
|
- if (!__pte_val(old_pte)) {
|
|
- if (!primary)
|
|
- return 0;
|
|
- WARN(1, KERN_WARNING "CPA: called for zero pte. "
|
|
- "vaddr = %lx cpa->vaddr = %lx\n", address,
|
|
- *cpa->vaddr);
|
|
- return -EINVAL;
|
|
- }
|
|
+ if (!__pte_val(old_pte))
|
|
+ return __cpa_process_fault(cpa, address, primary);
|
|
|
|
if (level == PG_LEVEL_4K) {
|
|
pte_t new_pte;
|
|
@@ -678,12 +707,7 @@ static int cpa_process_alias(struct cpa_
|
|
vaddr = *cpa->vaddr;
|
|
|
|
if (!(within(vaddr, PAGE_OFFSET,
|
|
- PAGE_OFFSET + (max_low_pfn_mapped << PAGE_SHIFT))
|
|
-#ifdef CONFIG_X86_64
|
|
- || within(vaddr, PAGE_OFFSET + (1UL<<32),
|
|
- PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))
|
|
-#endif
|
|
- )) {
|
|
+ PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)))) {
|
|
|
|
alias_cpa = *cpa;
|
|
temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
|
|
@@ -814,6 +838,15 @@ static int change_page_attr_set_clr(unsi
|
|
|
|
vm_unmap_aliases();
|
|
|
|
+ /*
|
|
+ * If we're called with lazy mmu updates enabled, the
|
|
+ * in-memory pte state may be stale. Flush pending updates to
|
|
+ * bring them up to date.
|
|
+ *
|
|
+ arch_flush_lazy_mmu_mode();*/
|
|
+ if (arch_use_lazy_mmu_mode())
|
|
+ xen_multicall_flush(true);
|
|
+
|
|
cpa.vaddr = addr;
|
|
cpa.numpages = numpages;
|
|
cpa.mask_set = mask_set;
|
|
@@ -856,6 +889,14 @@ static int change_page_attr_set_clr(unsi
|
|
} else
|
|
cpa_flush_all(cache);
|
|
|
|
+ /*
|
|
+ * If we've been called with lazy mmu updates enabled, then
|
|
+ * make sure that everything gets flushed out before we
|
|
+ * return.
|
|
+ *
|
|
+ arch_flush_lazy_mmu_mode();*/
|
|
+ WARN_ON_ONCE(arch_use_lazy_mmu_mode() && !irq_count());
|
|
+
|
|
out:
|
|
return ret;
|
|
}
|
|
--- head-2011-03-17.orig/arch/x86/mm/pat-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/mm/pat-xen.c 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -11,6 +11,7 @@
|
|
#include <linux/bootmem.h>
|
|
#include <linux/debugfs.h>
|
|
#include <linux/kernel.h>
|
|
+#include <linux/module.h>
|
|
#include <linux/gfp.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/fs.h>
|
|
@@ -167,11 +168,12 @@ struct memtype {
|
|
static LIST_HEAD(memtype_list);
|
|
static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */
|
|
|
|
+static int pat_pagerange_is_ram(resource_size_t start, resource_size_t end);
|
|
static inline u8 _mtrr_type_lookup(u64 start, u64 end)
|
|
{
|
|
if (is_initial_xendomain())
|
|
return mtrr_type_lookup(start, end);
|
|
- return pagerange_is_ram(start, end) > 0
|
|
+ return pat_pagerange_is_ram(start, end) > 0
|
|
? MTRR_TYPE_WRCOMB : MTRR_TYPE_UNCACHABLE;
|
|
}
|
|
#define mtrr_type_lookup _mtrr_type_lookup
|
|
@@ -232,6 +234,33 @@ chk_conflict(struct memtype *new, struct
|
|
static struct memtype *cached_entry;
|
|
static u64 cached_start;
|
|
|
|
+static int pat_pagerange_is_ram(resource_size_t start, resource_size_t end)
|
|
+{
|
|
+ int ram_page = 0, not_rampage = 0;
|
|
+ unsigned long page_nr;
|
|
+
|
|
+ for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT);
|
|
+ ++page_nr) {
|
|
+ /*
|
|
+ * For legacy reasons, physical address range in the legacy ISA
|
|
+ * region is tracked as non-RAM. This will allow users of
|
|
+ * /dev/mem to map portions of legacy ISA region, even when
|
|
+ * some of those portions are listed(or not even listed) with
|
|
+ * different e820 types(RAM/reserved/..)
|
|
+ */
|
|
+ if (page_nr >= (ISA_END_ADDRESS >> PAGE_SHIFT) &&
|
|
+ page_is_ram(mfn_to_local_pfn(page_nr)))
|
|
+ ram_page = 1;
|
|
+ else
|
|
+ not_rampage = 1;
|
|
+
|
|
+ if (ram_page == not_rampage)
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ return ram_page;
|
|
+}
|
|
+
|
|
/*
|
|
* For RAM pages, mark the pages as non WB memory type using
|
|
* PageNonWB (PG_arch_1). We allow only one set_memory_uc() or
|
|
@@ -360,9 +389,13 @@ int reserve_memtype(u64 start, u64 end,
|
|
req_type & _PAGE_CACHE_MASK);
|
|
}
|
|
|
|
- is_range_ram = pagerange_is_ram(start, end);
|
|
+ if (new_type)
|
|
+ *new_type = actual_type;
|
|
+
|
|
+ is_range_ram = pat_pagerange_is_ram(start, end);
|
|
if (is_range_ram == 1)
|
|
- return reserve_ram_pages_type(start, end, req_type, new_type);
|
|
+ return reserve_ram_pages_type(start, end, req_type,
|
|
+ new_type);
|
|
else if (is_range_ram < 0)
|
|
return -EINVAL;
|
|
|
|
@@ -374,9 +407,6 @@ int reserve_memtype(u64 start, u64 end,
|
|
new->end = end;
|
|
new->type = actual_type;
|
|
|
|
- if (new_type)
|
|
- *new_type = actual_type;
|
|
-
|
|
spin_lock(&memtype_lock);
|
|
|
|
if (cached_entry && start >= cached_start)
|
|
@@ -464,7 +494,7 @@ int free_memtype(u64 start, u64 end)
|
|
if (is_ISA_range(start, end - 1))
|
|
return 0;
|
|
|
|
- is_range_ram = pagerange_is_ram(start, end);
|
|
+ is_range_ram = pat_pagerange_is_ram(start, end);
|
|
if (is_range_ram == 1)
|
|
return free_ram_pages_type(start, end);
|
|
else if (is_range_ram < 0)
|
|
@@ -623,6 +653,254 @@ void unmap_devmem(unsigned long mfn, uns
|
|
free_memtype(addr, addr + size);
|
|
}
|
|
|
|
+#ifndef CONFIG_XEN
|
|
+/*
|
|
+ * Internal interface to reserve a range of physical memory with prot.
|
|
+ * Reserved non RAM regions only and after successful reserve_memtype,
|
|
+ * this func also keeps identity mapping (if any) in sync with this new prot.
|
|
+ */
|
|
+static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
|
|
+ int strict_prot)
|
|
+{
|
|
+ int is_ram = 0;
|
|
+ int id_sz, ret;
|
|
+ unsigned long flags;
|
|
+ unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK);
|
|
+
|
|
+ is_ram = pat_pagerange_is_ram(paddr, paddr + size);
|
|
+
|
|
+ /*
|
|
+ * reserve_pfn_range() doesn't support RAM pages.
|
|
+ */
|
|
+ if (is_ram != 0)
|
|
+ return -EINVAL;
|
|
+
|
|
+ ret = reserve_memtype(paddr, paddr + size, want_flags, &flags);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+
|
|
+ if (flags != want_flags) {
|
|
+ if (strict_prot || !is_new_memtype_allowed(want_flags, flags)) {
|
|
+ free_memtype(paddr, paddr + size);
|
|
+ printk(KERN_ERR "%s:%d map pfn expected mapping type %s"
|
|
+ " for %Lx-%Lx, got %s\n",
|
|
+ current->comm, current->pid,
|
|
+ cattr_name(want_flags),
|
|
+ (unsigned long long)paddr,
|
|
+ (unsigned long long)(paddr + size),
|
|
+ cattr_name(flags));
|
|
+ return -EINVAL;
|
|
+ }
|
|
+ /*
|
|
+ * We allow returning different type than the one requested in
|
|
+ * non strict case.
|
|
+ */
|
|
+ *vma_prot = __pgprot((pgprot_val(*vma_prot) &
|
|
+ (~_PAGE_CACHE_MASK)) |
|
|
+ flags);
|
|
+ }
|
|
+
|
|
+ /* Need to keep identity mapping in sync */
|
|
+ if (paddr >= __pa(high_memory))
|
|
+ return 0;
|
|
+
|
|
+ id_sz = (__pa(high_memory) < paddr + size) ?
|
|
+ __pa(high_memory) - paddr :
|
|
+ size;
|
|
+
|
|
+ if (ioremap_change_attr((unsigned long)__va(paddr), id_sz, flags) < 0) {
|
|
+ free_memtype(paddr, paddr + size);
|
|
+ printk(KERN_ERR
|
|
+ "%s:%d reserve_pfn_range ioremap_change_attr failed %s "
|
|
+ "for %Lx-%Lx\n",
|
|
+ current->comm, current->pid,
|
|
+ cattr_name(flags),
|
|
+ (unsigned long long)paddr,
|
|
+ (unsigned long long)(paddr + size));
|
|
+ return -EINVAL;
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Internal interface to free a range of physical memory.
|
|
+ * Frees non RAM regions only.
|
|
+ */
|
|
+static void free_pfn_range(u64 paddr, unsigned long size)
|
|
+{
|
|
+ int is_ram;
|
|
+
|
|
+ is_ram = pat_pagerange_is_ram(paddr, paddr + size);
|
|
+ if (is_ram == 0)
|
|
+ free_memtype(paddr, paddr + size);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * track_pfn_vma_copy is called when vma that is covering the pfnmap gets
|
|
+ * copied through copy_page_range().
|
|
+ *
|
|
+ * If the vma has a linear pfn mapping for the entire range, we get the prot
|
|
+ * from pte and reserve the entire vma range with single reserve_pfn_range call.
|
|
+ * Otherwise, we reserve the entire vma range, my ging through the PTEs page
|
|
+ * by page to get physical address and protection.
|
|
+ */
|
|
+int track_pfn_vma_copy(struct vm_area_struct *vma)
|
|
+{
|
|
+ int retval = 0;
|
|
+ unsigned long i, j;
|
|
+ resource_size_t paddr;
|
|
+ unsigned long prot;
|
|
+ unsigned long vma_start = vma->vm_start;
|
|
+ unsigned long vma_end = vma->vm_end;
|
|
+ unsigned long vma_size = vma_end - vma_start;
|
|
+ pgprot_t pgprot;
|
|
+
|
|
+ if (!pat_enabled)
|
|
+ return 0;
|
|
+
|
|
+ if (is_linear_pfn_mapping(vma)) {
|
|
+ /*
|
|
+ * reserve the whole chunk covered by vma. We need the
|
|
+ * starting address and protection from pte.
|
|
+ */
|
|
+ if (follow_phys(vma, vma_start, 0, &prot, &paddr)) {
|
|
+ WARN_ON_ONCE(1);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+ pgprot = __pgprot(prot);
|
|
+ return reserve_pfn_range(paddr, vma_size, &pgprot, 1);
|
|
+ }
|
|
+
|
|
+ /* reserve entire vma page by page, using pfn and prot from pte */
|
|
+ for (i = 0; i < vma_size; i += PAGE_SIZE) {
|
|
+ if (follow_phys(vma, vma_start + i, 0, &prot, &paddr))
|
|
+ continue;
|
|
+
|
|
+ pgprot = __pgprot(prot);
|
|
+ retval = reserve_pfn_range(paddr, PAGE_SIZE, &pgprot, 1);
|
|
+ if (retval)
|
|
+ goto cleanup_ret;
|
|
+ }
|
|
+ return 0;
|
|
+
|
|
+cleanup_ret:
|
|
+ /* Reserve error: Cleanup partial reservation and return error */
|
|
+ for (j = 0; j < i; j += PAGE_SIZE) {
|
|
+ if (follow_phys(vma, vma_start + j, 0, &prot, &paddr))
|
|
+ continue;
|
|
+
|
|
+ free_pfn_range(paddr, PAGE_SIZE);
|
|
+ }
|
|
+
|
|
+ return retval;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * track_pfn_vma_new is called when a _new_ pfn mapping is being established
|
|
+ * for physical range indicated by pfn and size.
|
|
+ *
|
|
+ * prot is passed in as a parameter for the new mapping. If the vma has a
|
|
+ * linear pfn mapping for the entire range reserve the entire vma range with
|
|
+ * single reserve_pfn_range call.
|
|
+ * Otherwise, we look t the pfn and size and reserve only the specified range
|
|
+ * page by page.
|
|
+ *
|
|
+ * Note that this function can be called with caller trying to map only a
|
|
+ * subrange/page inside the vma.
|
|
+ */
|
|
+int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot,
|
|
+ unsigned long pfn, unsigned long size)
|
|
+{
|
|
+ int retval = 0;
|
|
+ unsigned long i, j;
|
|
+ resource_size_t base_paddr;
|
|
+ resource_size_t paddr;
|
|
+ unsigned long vma_start = vma->vm_start;
|
|
+ unsigned long vma_end = vma->vm_end;
|
|
+ unsigned long vma_size = vma_end - vma_start;
|
|
+
|
|
+ if (!pat_enabled)
|
|
+ return 0;
|
|
+
|
|
+ if (is_linear_pfn_mapping(vma)) {
|
|
+ /* reserve the whole chunk starting from vm_pgoff */
|
|
+ paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
|
|
+ return reserve_pfn_range(paddr, vma_size, prot, 0);
|
|
+ }
|
|
+
|
|
+ /* reserve page by page using pfn and size */
|
|
+ base_paddr = (resource_size_t)pfn << PAGE_SHIFT;
|
|
+ for (i = 0; i < size; i += PAGE_SIZE) {
|
|
+ paddr = base_paddr + i;
|
|
+ retval = reserve_pfn_range(paddr, PAGE_SIZE, prot, 0);
|
|
+ if (retval)
|
|
+ goto cleanup_ret;
|
|
+ }
|
|
+ return 0;
|
|
+
|
|
+cleanup_ret:
|
|
+ /* Reserve error: Cleanup partial reservation and return error */
|
|
+ for (j = 0; j < i; j += PAGE_SIZE) {
|
|
+ paddr = base_paddr + j;
|
|
+ free_pfn_range(paddr, PAGE_SIZE);
|
|
+ }
|
|
+
|
|
+ return retval;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * untrack_pfn_vma is called while unmapping a pfnmap for a region.
|
|
+ * untrack can be called for a specific region indicated by pfn and size or
|
|
+ * can be for the entire vma (in which case size can be zero).
|
|
+ */
|
|
+void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn,
|
|
+ unsigned long size)
|
|
+{
|
|
+ unsigned long i;
|
|
+ resource_size_t paddr;
|
|
+ unsigned long prot;
|
|
+ unsigned long vma_start = vma->vm_start;
|
|
+ unsigned long vma_end = vma->vm_end;
|
|
+ unsigned long vma_size = vma_end - vma_start;
|
|
+
|
|
+ if (!pat_enabled)
|
|
+ return;
|
|
+
|
|
+ if (is_linear_pfn_mapping(vma)) {
|
|
+ /* free the whole chunk starting from vm_pgoff */
|
|
+ paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
|
|
+ free_pfn_range(paddr, vma_size);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ if (size != 0 && size != vma_size) {
|
|
+ /* free page by page, using pfn and size */
|
|
+ paddr = (resource_size_t)pfn << PAGE_SHIFT;
|
|
+ for (i = 0; i < size; i += PAGE_SIZE) {
|
|
+ paddr = paddr + i;
|
|
+ free_pfn_range(paddr, PAGE_SIZE);
|
|
+ }
|
|
+ } else {
|
|
+ /* free entire vma, page by page, using the pfn from pte */
|
|
+ for (i = 0; i < vma_size; i += PAGE_SIZE) {
|
|
+ if (follow_phys(vma, vma_start + i, 0, &prot, &paddr))
|
|
+ continue;
|
|
+
|
|
+ free_pfn_range(paddr, PAGE_SIZE);
|
|
+ }
|
|
+ }
|
|
+}
|
|
+#endif /* CONFIG_XEN */
|
|
+
|
|
+pgprot_t pgprot_writecombine(pgprot_t prot)
|
|
+{
|
|
+ if (pat_enabled)
|
|
+ return __pgprot(pgprot_val(prot) | _PAGE_CACHE_WC);
|
|
+ else
|
|
+ return pgprot_noncached(prot);
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(pgprot_writecombine);
|
|
+
|
|
#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT)
|
|
|
|
/* get Nth element of the linked list */
|
|
--- head-2011-03-17.orig/arch/x86/pci/irq-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/pci/irq-xen.c 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -16,8 +16,7 @@
|
|
#include <asm/io_apic.h>
|
|
#include <linux/irq.h>
|
|
#include <linux/acpi.h>
|
|
-
|
|
-#include "pci.h"
|
|
+#include <asm/pci_x86.h>
|
|
|
|
#define PIRQ_SIGNATURE (('$' << 0) + ('P' << 8) + ('I' << 16) + ('R' << 24))
|
|
#define PIRQ_VERSION 0x0100
|
|
@@ -540,7 +539,7 @@ static int pirq_bios_set(struct pci_dev
|
|
{
|
|
struct pci_dev *bridge;
|
|
int pin = pci_get_interrupt_pin(dev, &bridge);
|
|
- return pcibios_set_irq_routing(bridge, pin, irq);
|
|
+ return pcibios_set_irq_routing(bridge, pin - 1, irq);
|
|
}
|
|
|
|
#endif
|
|
@@ -579,6 +578,7 @@ static __init int intel_router_probe(str
|
|
case PCI_DEVICE_ID_INTEL_ICH7_1:
|
|
case PCI_DEVICE_ID_INTEL_ICH7_30:
|
|
case PCI_DEVICE_ID_INTEL_ICH7_31:
|
|
+ case PCI_DEVICE_ID_INTEL_TGP_LPC:
|
|
case PCI_DEVICE_ID_INTEL_ESB2_0:
|
|
case PCI_DEVICE_ID_INTEL_ICH8_0:
|
|
case PCI_DEVICE_ID_INTEL_ICH8_1:
|
|
@@ -894,7 +894,6 @@ static int pcibios_lookup_irq(struct pci
|
|
dev_dbg(&dev->dev, "no interrupt pin\n");
|
|
return 0;
|
|
}
|
|
- pin = pin - 1;
|
|
|
|
/* Find IRQ routing entry */
|
|
|
|
@@ -904,17 +903,17 @@ static int pcibios_lookup_irq(struct pci
|
|
info = pirq_get_info(dev);
|
|
if (!info) {
|
|
dev_dbg(&dev->dev, "PCI INT %c not found in routing table\n",
|
|
- 'A' + pin);
|
|
+ 'A' + pin - 1);
|
|
return 0;
|
|
}
|
|
- pirq = info->irq[pin].link;
|
|
- mask = info->irq[pin].bitmap;
|
|
+ pirq = info->irq[pin - 1].link;
|
|
+ mask = info->irq[pin - 1].bitmap;
|
|
if (!pirq) {
|
|
- dev_dbg(&dev->dev, "PCI INT %c not routed\n", 'A' + pin);
|
|
+ dev_dbg(&dev->dev, "PCI INT %c not routed\n", 'A' + pin - 1);
|
|
return 0;
|
|
}
|
|
dev_dbg(&dev->dev, "PCI INT %c -> PIRQ %02x, mask %04x, excl %04x",
|
|
- 'A' + pin, pirq, mask, pirq_table->exclusive_irqs);
|
|
+ 'A' + pin - 1, pirq, mask, pirq_table->exclusive_irqs);
|
|
mask &= pcibios_irq_mask;
|
|
|
|
/* Work around broken HP Pavilion Notebooks which assign USB to
|
|
@@ -956,7 +955,7 @@ static int pcibios_lookup_irq(struct pci
|
|
newirq = i;
|
|
}
|
|
}
|
|
- dev_dbg(&dev->dev, "PCI INT %c -> newirq %d", 'A' + pin, newirq);
|
|
+ dev_dbg(&dev->dev, "PCI INT %c -> newirq %d", 'A' + pin - 1, newirq);
|
|
|
|
/* Check if it is hardcoded */
|
|
if ((pirq & 0xf0) == 0xf0) {
|
|
@@ -984,18 +983,18 @@ static int pcibios_lookup_irq(struct pci
|
|
return 0;
|
|
}
|
|
}
|
|
- dev_info(&dev->dev, "%s PCI INT %c -> IRQ %d\n", msg, 'A' + pin, irq);
|
|
+ dev_info(&dev->dev, "%s PCI INT %c -> IRQ %d\n", msg, 'A' + pin - 1, irq);
|
|
|
|
/* Update IRQ for all devices with the same pirq value */
|
|
while ((dev2 = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev2)) != NULL) {
|
|
pci_read_config_byte(dev2, PCI_INTERRUPT_PIN, &pin);
|
|
if (!pin)
|
|
continue;
|
|
- pin--;
|
|
+
|
|
info = pirq_get_info(dev2);
|
|
if (!info)
|
|
continue;
|
|
- if (info->irq[pin].link == pirq) {
|
|
+ if (info->irq[pin - 1].link == pirq) {
|
|
/*
|
|
* We refuse to override the dev->irq
|
|
* information. Give a warning!
|
|
@@ -1049,6 +1048,9 @@ static void __init pcibios_fixup_irqs(vo
|
|
dev = NULL;
|
|
while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
|
|
pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
|
|
+ if (!pin)
|
|
+ continue;
|
|
+
|
|
#ifdef CONFIG_X86_IO_APIC
|
|
/*
|
|
* Recalculate IRQ numbers if we use the I/O APIC.
|
|
@@ -1056,15 +1058,11 @@ static void __init pcibios_fixup_irqs(vo
|
|
if (io_apic_assign_pci_irqs) {
|
|
int irq;
|
|
|
|
- if (!pin)
|
|
- continue;
|
|
-
|
|
/*
|
|
* interrupt pins are numbered starting from 1
|
|
*/
|
|
- pin--;
|
|
irq = IO_APIC_get_PCI_irq_vector(dev->bus->number,
|
|
- PCI_SLOT(dev->devfn), pin);
|
|
+ PCI_SLOT(dev->devfn), pin - 1);
|
|
/*
|
|
* Busses behind bridges are typically not listed in the
|
|
* MP-table. In this case we have to look up the IRQ
|
|
@@ -1077,22 +1075,22 @@ static void __init pcibios_fixup_irqs(vo
|
|
struct pci_dev *bridge = dev->bus->self;
|
|
int bus;
|
|
|
|
- pin = (pin + PCI_SLOT(dev->devfn)) % 4;
|
|
+ pin = pci_swizzle_interrupt_pin(dev, pin);
|
|
bus = bridge->bus->number;
|
|
irq = IO_APIC_get_PCI_irq_vector(bus,
|
|
- PCI_SLOT(bridge->devfn), pin);
|
|
+ PCI_SLOT(bridge->devfn), pin - 1);
|
|
if (irq >= 0)
|
|
dev_warn(&dev->dev,
|
|
"using bridge %s INT %c to "
|
|
"get IRQ %d\n",
|
|
pci_name(bridge),
|
|
- 'A' + pin, irq);
|
|
+ 'A' + pin - 1, irq);
|
|
}
|
|
if (irq >= 0) {
|
|
dev_info(&dev->dev,
|
|
"PCI->APIC IRQ transform: INT %c "
|
|
"-> IRQ %d\n",
|
|
- 'A' + pin, irq);
|
|
+ 'A' + pin - 1, irq);
|
|
dev->irq = irq;
|
|
}
|
|
}
|
|
@@ -1100,7 +1098,7 @@ static void __init pcibios_fixup_irqs(vo
|
|
/*
|
|
* Still no IRQ? Try to lookup one...
|
|
*/
|
|
- if (pin && !dev->irq)
|
|
+ if (!dev->irq)
|
|
pcibios_lookup_irq(dev, 0);
|
|
}
|
|
}
|
|
@@ -1227,12 +1225,10 @@ static int pirq_enable_irq(struct pci_de
|
|
if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) {
|
|
char *msg = "";
|
|
|
|
- pin--; /* interrupt pins are numbered starting from 1 */
|
|
-
|
|
if (io_apic_assign_pci_irqs) {
|
|
int irq;
|
|
|
|
- irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin);
|
|
+ irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin - 1);
|
|
/*
|
|
* Busses behind bridges are typically not listed in the MP-table.
|
|
* In this case we have to look up the IRQ based on the parent bus,
|
|
@@ -1243,20 +1239,20 @@ static int pirq_enable_irq(struct pci_de
|
|
while (irq < 0 && dev->bus->parent) { /* go back to the bridge */
|
|
struct pci_dev *bridge = dev->bus->self;
|
|
|
|
- pin = (pin + PCI_SLOT(dev->devfn)) % 4;
|
|
+ pin = pci_swizzle_interrupt_pin(dev, pin);
|
|
irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number,
|
|
- PCI_SLOT(bridge->devfn), pin);
|
|
+ PCI_SLOT(bridge->devfn), pin - 1);
|
|
if (irq >= 0)
|
|
dev_warn(&dev->dev, "using bridge %s "
|
|
"INT %c to get IRQ %d\n",
|
|
- pci_name(bridge), 'A' + pin,
|
|
+ pci_name(bridge), 'A' + pin - 1,
|
|
irq);
|
|
dev = bridge;
|
|
}
|
|
dev = temp_dev;
|
|
if (irq >= 0) {
|
|
dev_info(&dev->dev, "PCI->APIC IRQ transform: "
|
|
- "INT %c -> IRQ %d\n", 'A' + pin, irq);
|
|
+ "INT %c -> IRQ %d\n", 'A' + pin - 1, irq);
|
|
dev->irq = irq;
|
|
return 0;
|
|
} else
|
|
@@ -1275,7 +1271,7 @@ static int pirq_enable_irq(struct pci_de
|
|
return 0;
|
|
|
|
dev_warn(&dev->dev, "can't find IRQ for PCI INT %c%s\n",
|
|
- 'A' + pin, msg);
|
|
+ 'A' + pin - 1, msg);
|
|
}
|
|
return 0;
|
|
}
|
|
--- head-2011-03-17.orig/arch/x86/pci/pcifront.c 2011-01-31 17:56:27.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/pci/pcifront.c 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -8,8 +8,8 @@
|
|
#include <linux/init.h>
|
|
#include <linux/pci.h>
|
|
#include <asm/acpi.h>
|
|
+#include <asm/pci_x86.h>
|
|
#include <xen/evtchn.h>
|
|
-#include "pci.h"
|
|
|
|
static int pcifront_enable_irq(struct pci_dev *dev)
|
|
{
|
|
--- head-2011-03-17.orig/arch/x86/vdso/vdso32-setup-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/vdso/vdso32-setup-xen.c 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -349,7 +349,7 @@ int __init sysenter_setup(void)
|
|
}
|
|
|
|
/* Setup a VMA at program startup for the vsyscall page */
|
|
-int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
|
|
+int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
|
|
{
|
|
struct mm_struct *mm = current->mm;
|
|
unsigned long addr;
|
|
--- head-2011-03-17.orig/drivers/acpi/Kconfig 2011-01-31 14:42:03.000000000 +0100
|
|
+++ head-2011-03-17/drivers/acpi/Kconfig 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -196,7 +196,7 @@ config ACPI_DOCK
|
|
config ACPI_PROCESSOR
|
|
tristate "Processor"
|
|
select THERMAL
|
|
- select CPU_IDLE
|
|
+ select CPU_IDLE if !PROCESSOR_EXTERNAL_CONTROL
|
|
default y
|
|
help
|
|
This driver installs ACPI as the idle handler for Linux and uses
|
|
--- head-2011-03-17.orig/drivers/acpi/processor_core.c 2011-02-01 14:39:24.000000000 +0100
|
|
+++ head-2011-03-17/drivers/acpi/processor_core.c 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -192,7 +192,7 @@ int acpi_get_cpuid(acpi_handle handle, i
|
|
* stub enforcing a 1:1 mapping, we keep it undefined to catch bad
|
|
* uses. Return as if there was a 1:1 mapping.
|
|
*/
|
|
- if (apic_id < NR_CPUS && cpu_possible(apic_id))
|
|
+ if (apic_id < nr_cpu_ids && cpu_possible(apic_id))
|
|
return apic_id;
|
|
#endif
|
|
return -1;
|
|
--- head-2011-03-17.orig/drivers/acpi/processor_idle.c 2011-01-31 17:56:27.000000000 +0100
|
|
+++ head-2011-03-17/drivers/acpi/processor_idle.c 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -125,6 +125,7 @@ static struct dmi_system_id __cpuinitdat
|
|
};
|
|
|
|
|
|
+#ifndef CONFIG_PROCESSOR_EXTERNAL_CONTROL
|
|
/*
|
|
* Callers should disable interrupts before the call and enable
|
|
* interrupts after return.
|
|
@@ -143,6 +144,7 @@ static void acpi_safe_halt(void)
|
|
}
|
|
current_thread_info()->status |= TS_POLLING;
|
|
}
|
|
+#endif
|
|
|
|
#ifdef ARCH_APICTIMER_STOPS_ON_C3
|
|
|
|
@@ -213,7 +215,7 @@ static void lapic_timer_state_broadcast(
|
|
static void lapic_timer_check_state(int state, struct acpi_processor *pr,
|
|
struct acpi_processor_cx *cstate) { }
|
|
static void lapic_timer_propagate_broadcast(struct acpi_processor *pr) { }
|
|
-static void lapic_timer_state_broadcast(struct acpi_processor *pr,
|
|
+static inline void lapic_timer_state_broadcast(struct acpi_processor *pr,
|
|
struct acpi_processor_cx *cx,
|
|
int broadcast)
|
|
{
|
|
@@ -261,7 +263,7 @@ int acpi_processor_resume(struct acpi_de
|
|
return 0;
|
|
}
|
|
|
|
-#if defined(CONFIG_X86)
|
|
+#if defined(CONFIG_X86) && !defined(CONFIG_PROCESSOR_EXTERNAL_CONTROL)
|
|
static void tsc_check_state(int state)
|
|
{
|
|
switch (boot_cpu_data.x86_vendor) {
|
|
@@ -621,7 +623,9 @@ static int acpi_processor_power_verify(s
|
|
unsigned int i;
|
|
unsigned int working = 0;
|
|
|
|
+#ifndef CONFIG_PROCESSOR_EXTERNAL_CONTROL
|
|
pr->power.timer_broadcast_on_state = INT_MAX;
|
|
+#endif
|
|
|
|
for (i = 1; i < ACPI_PROCESSOR_MAX_POWER && i <= max_cstate; i++) {
|
|
struct acpi_processor_cx *cx = &pr->power.states[i];
|
|
@@ -693,6 +697,7 @@ static int acpi_processor_get_power_info
|
|
return 0;
|
|
}
|
|
|
|
+#ifndef CONFIG_PROCESSOR_EXTERNAL_CONTROL
|
|
/**
|
|
* acpi_idle_bm_check - checks if bus master activity was detected
|
|
*/
|
|
@@ -1064,6 +1069,13 @@ static int acpi_processor_setup_cpuidle(
|
|
return 0;
|
|
}
|
|
|
|
+#else /* CONFIG_PROCESSOR_EXTERNAL_CONTROL */
|
|
+static inline int acpi_processor_setup_cpuidle(struct acpi_processor *pr)
|
|
+{
|
|
+ return 0;
|
|
+}
|
|
+#endif /* CONFIG_PROCESSOR_EXTERNAL_CONTROL */
|
|
+
|
|
int acpi_processor_cst_has_changed(struct acpi_processor *pr)
|
|
{
|
|
int ret = 0;
|
|
--- head-2011-03-17.orig/drivers/gpu/drm/i915/i915_drv.c 2011-03-17 14:35:45.000000000 +0100
|
|
+++ head-2011-03-17/drivers/gpu/drm/i915/i915_drv.c 2011-03-17 14:13:15.000000000 +0100
|
|
@@ -722,7 +722,7 @@ static struct drm_driver driver = {
|
|
.open = drm_open,
|
|
.release = drm_release,
|
|
.unlocked_ioctl = drm_ioctl,
|
|
- .mmap = drm_gem_mmap,
|
|
+ .mmap = i915_gem_mmap,
|
|
.poll = drm_poll,
|
|
.fasync = drm_fasync,
|
|
.read = drm_read,
|
|
--- head-2011-03-17.orig/drivers/gpu/drm/i915/i915_drv.h 2011-03-17 14:35:45.000000000 +0100
|
|
+++ head-2011-03-17/drivers/gpu/drm/i915/i915_drv.h 2011-03-17 14:13:13.000000000 +0100
|
|
@@ -1162,6 +1162,11 @@ int __must_check i915_do_wait_request(st
|
|
uint32_t seqno,
|
|
bool interruptible,
|
|
struct intel_ring_buffer *ring);
|
|
+#ifdef CONFIG_XEN
|
|
+int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma);
|
|
+#else
|
|
+#define i915_gem_mmap drm_gem_mmap
|
|
+#endif
|
|
int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
|
|
int __must_check
|
|
i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj,
|
|
--- head-2011-03-17.orig/drivers/gpu/drm/i915/i915_gem.c 2011-03-17 14:35:45.000000000 +0100
|
|
+++ head-2011-03-17/drivers/gpu/drm/i915/i915_gem.c 2011-02-08 10:05:05.000000000 +0100
|
|
@@ -1152,6 +1152,17 @@ i915_gem_mmap_ioctl(struct drm_device *d
|
|
return 0;
|
|
}
|
|
|
|
+#ifdef CONFIG_XEN
|
|
+int i915_gem_mmap(struct file *filp, struct vm_area_struct *vma)
|
|
+{
|
|
+ int ret = drm_gem_mmap(filp, vma);
|
|
+
|
|
+ pgprot_val(vma->vm_page_prot) |= _PAGE_IOMAP;
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+#endif
|
|
+
|
|
/**
|
|
* i915_gem_fault - fault a page into the GTT
|
|
* vma: VMA in question
|
|
--- head-2011-03-17.orig/drivers/oprofile/buffer_sync.c 2011-01-31 17:01:49.000000000 +0100
|
|
+++ head-2011-03-17/drivers/oprofile/buffer_sync.c 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -538,7 +538,6 @@ void sync_buffer(int cpu)
|
|
int cpu_mode = CPU_MODE_KERNEL;
|
|
sync_buffer_state state = sb_buffer_start;
|
|
unsigned int i;
|
|
- int domain_switch = 0;
|
|
unsigned long available;
|
|
unsigned long flags;
|
|
struct op_entry entry;
|
|
@@ -563,15 +562,6 @@ void sync_buffer(int cpu)
|
|
if (!sample)
|
|
break;
|
|
|
|
-#ifdef CONFIG_XEN
|
|
- if (domain_switch) {
|
|
- cpu_current_domain[cpu] = sample->eip;
|
|
- add_domain_switch(sample->eip);
|
|
- domain_switch = 0;
|
|
- continue;
|
|
- }
|
|
-#endif
|
|
-
|
|
if (is_code(sample->eip)) {
|
|
flags = sample->event;
|
|
if (flags & TRACE_BEGIN) {
|
|
@@ -597,8 +587,11 @@ void sync_buffer(int cpu)
|
|
add_user_ctx_switch(new, cookie);
|
|
}
|
|
#ifdef CONFIG_XEN
|
|
- if (flags & DOMAIN_SWITCH)
|
|
- domain_switch = 1;
|
|
+ if ((flags & DOMAIN_SWITCH)
|
|
+ && op_cpu_buffer_get_data(&entry, &val)) {
|
|
+ cpu_current_domain[cpu] = val;
|
|
+ add_domain_switch(val);
|
|
+ }
|
|
#endif
|
|
if (op_cpu_buffer_get_size(&entry))
|
|
add_data(&entry, mm);
|
|
--- head-2011-03-17.orig/drivers/oprofile/cpu_buffer.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-17/drivers/oprofile/cpu_buffer.c 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -417,34 +417,15 @@ void oprofile_add_pc(unsigned long pc, i
|
|
|
|
#ifdef CONFIG_XEN
|
|
/*
|
|
- * This is basically log_sample(b, ESCAPE_CODE, cpu_mode, CPU_TRACE_BEGIN),
|
|
+ * This is basically log_sample(b, ESCAPE_CODE, 1, cpu_mode, CPU_TRACE_BEGIN),
|
|
* as was previously accessible through oprofile_add_pc().
|
|
*/
|
|
void oprofile_add_mode(int cpu_mode)
|
|
{
|
|
struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer);
|
|
- struct task_struct *task;
|
|
|
|
- if (nr_available_slots(cpu_buf) < 3) {
|
|
+ if (op_add_code(cpu_buf, 1, cpu_mode, current))
|
|
cpu_buf->sample_lost_overflow++;
|
|
- return;
|
|
- }
|
|
-
|
|
- task = current;
|
|
-
|
|
- /* notice a switch from user->kernel or vice versa */
|
|
- if (cpu_buf->last_cpu_mode != cpu_mode) {
|
|
- cpu_buf->last_cpu_mode = cpu_mode;
|
|
- add_code(cpu_buf, cpu_mode);
|
|
- }
|
|
-
|
|
- /* notice a task switch */
|
|
- if (cpu_buf->last_task != task) {
|
|
- cpu_buf->last_task = task;
|
|
- add_code(cpu_buf, (unsigned long)task);
|
|
- }
|
|
-
|
|
- add_code(cpu_buf, CPU_TRACE_BEGIN);
|
|
}
|
|
#endif
|
|
|
|
@@ -475,17 +456,18 @@ fail:
|
|
#ifdef CONFIG_XEN
|
|
int oprofile_add_domain_switch(int32_t domain_id)
|
|
{
|
|
- struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer);
|
|
+ struct op_entry entry;
|
|
+ struct op_sample *sample;
|
|
|
|
- /* should have space for switching into and out of domain
|
|
- (2 slots each) plus one sample and one cpu mode switch */
|
|
- if (((nr_available_slots(cpu_buf) < 6) &&
|
|
- (domain_id != COORDINATOR_DOMAIN)) ||
|
|
- (nr_available_slots(cpu_buf) < 2))
|
|
+ sample = op_cpu_buffer_write_reserve(&entry, 1);
|
|
+ if (!sample)
|
|
return 0;
|
|
|
|
- add_code(cpu_buf, DOMAIN_SWITCH);
|
|
- add_sample(cpu_buf, domain_id, 0);
|
|
+ sample->eip = ESCAPE_CODE;
|
|
+ sample->event = DOMAIN_SWITCH;
|
|
+
|
|
+ op_cpu_buffer_add_data(&entry, domain_id);
|
|
+ op_cpu_buffer_write_commit(&entry);
|
|
|
|
current_domain = domain_id;
|
|
|
|
--- head-2011-03-17.orig/drivers/pci/msi-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
+++ head-2011-03-17/drivers/pci/msi-xen.c 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -724,30 +724,21 @@ void pci_no_msi(void)
|
|
pci_msi_enable = 0;
|
|
}
|
|
|
|
+/**
|
|
+ * pci_msi_enabled - is MSI enabled?
|
|
+ *
|
|
+ * Returns true if MSI has not been disabled by the command-line option
|
|
+ * pci=nomsi.
|
|
+ **/
|
|
+int pci_msi_enabled(void)
|
|
+{
|
|
+ return pci_msi_enable;
|
|
+}
|
|
+EXPORT_SYMBOL(pci_msi_enabled);
|
|
+
|
|
void pci_msi_init_pci_dev(struct pci_dev *dev)
|
|
{
|
|
#ifndef CONFIG_XEN
|
|
INIT_LIST_HEAD(&dev->msi_list);
|
|
#endif
|
|
}
|
|
-
|
|
-#ifdef CONFIG_ACPI
|
|
-#include <linux/acpi.h>
|
|
-#include <linux/pci-acpi.h>
|
|
-static void __devinit msi_acpi_init(void)
|
|
-{
|
|
- if (acpi_pci_disabled)
|
|
- return;
|
|
- pci_osc_support_set(OSC_MSI_SUPPORT);
|
|
- pcie_osc_support_set(OSC_MSI_SUPPORT);
|
|
-}
|
|
-#else
|
|
-static inline void msi_acpi_init(void) { }
|
|
-#endif /* CONFIG_ACPI */
|
|
-
|
|
-void __devinit msi_init(void)
|
|
-{
|
|
- if (!pci_msi_enable)
|
|
- return;
|
|
- msi_acpi_init();
|
|
-}
|
|
--- head-2011-03-17.orig/drivers/xen/Kconfig 2011-02-02 15:36:33.000000000 +0100
|
|
+++ head-2011-03-17/drivers/xen/Kconfig 2011-02-02 15:37:07.000000000 +0100
|
|
@@ -393,6 +393,7 @@ config XEN_BACKEND
|
|
|
|
config XENFS
|
|
tristate "Xen filesystem"
|
|
+ depends on PARAVIRT_XEN
|
|
default y
|
|
help
|
|
The xen filesystem provides a way for domains to share
|
|
--- head-2011-03-17.orig/drivers/xen/Makefile 2011-02-01 14:39:24.000000000 +0100
|
|
+++ head-2011-03-17/drivers/xen/Makefile 2011-02-24 14:09:54.000000000 +0100
|
|
@@ -15,6 +15,7 @@ obj-$(CONFIG_XEN) += features.o $(xen-
|
|
obj-$(CONFIG_HOTPLUG_CPU) += $(xen-hotplug-y)
|
|
obj-$(CONFIG_XEN_XENCOMM) += xencomm.o
|
|
obj-$(CONFIG_XEN_BALLOON) += $(xen-balloon-y)
|
|
+obj-$(CONFIG_XENFS) += xenfs/
|
|
obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/
|
|
obj-$(CONFIG_XEN_BLKDEV_TAP) += blktap/
|
|
obj-$(CONFIG_XEN_BLKDEV_TAP2) += blktap2/ blktap2-new/
|
|
--- head-2011-03-17.orig/drivers/xen/balloon/sysfs.c 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/drivers/xen/balloon/sysfs.c 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -67,7 +67,7 @@ static ssize_t store_target_kb(struct sy
|
|
struct sysdev_attribute *attr,
|
|
const char *buf, size_t count)
|
|
{
|
|
- char memstring[64], *endchar;
|
|
+ char *endchar;
|
|
unsigned long long target_bytes;
|
|
|
|
if (!capable(CAP_SYS_ADMIN))
|
|
@@ -75,11 +75,8 @@ static ssize_t store_target_kb(struct sy
|
|
|
|
if (count <= 1)
|
|
return -EBADMSG; /* runt */
|
|
- if (count > sizeof(memstring))
|
|
- return -EFBIG; /* too long */
|
|
- strcpy(memstring, buf);
|
|
|
|
- target_bytes = memparse(memstring, &endchar);
|
|
+ target_bytes = simple_strtoull(buf, &endchar, 0) << 10;
|
|
balloon_set_new_target(target_bytes >> PAGE_SHIFT);
|
|
|
|
return count;
|
|
@@ -88,8 +85,40 @@ static ssize_t store_target_kb(struct sy
|
|
static SYSDEV_ATTR(target_kb, S_IRUGO | S_IWUSR,
|
|
show_target_kb, store_target_kb);
|
|
|
|
+static ssize_t show_target(struct sys_device *dev,
|
|
+ struct sysdev_attribute *attr, char *buf)
|
|
+{
|
|
+ return sprintf(buf, "%llu\n",
|
|
+ (unsigned long long)balloon_stats.target_pages
|
|
+ << PAGE_SHIFT);
|
|
+}
|
|
+
|
|
+static ssize_t store_target(struct sys_device *dev,
|
|
+ struct sysdev_attribute *attr,
|
|
+ const char *buf,
|
|
+ size_t count)
|
|
+{
|
|
+ char *endchar;
|
|
+ unsigned long long target_bytes;
|
|
+
|
|
+ if (!capable(CAP_SYS_ADMIN))
|
|
+ return -EPERM;
|
|
+
|
|
+ if (count <= 1)
|
|
+ return -EBADMSG; /* runt */
|
|
+
|
|
+ target_bytes = memparse(buf, &endchar);
|
|
+ balloon_set_new_target(target_bytes >> PAGE_SHIFT);
|
|
+
|
|
+ return count;
|
|
+}
|
|
+
|
|
+static SYSDEV_ATTR(target, S_IRUGO | S_IWUSR,
|
|
+ show_target, store_target);
|
|
+
|
|
static struct sysdev_attribute *balloon_attrs[] = {
|
|
&attr_target_kb,
|
|
+ &attr_target,
|
|
};
|
|
|
|
static struct attribute *balloon_info_attrs[] = {
|
|
--- head-2011-03-17.orig/drivers/xen/blkfront/vbd.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-17/drivers/xen/blkfront/vbd.c 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -305,6 +305,10 @@ xlvbd_init_blk_queue(struct gendisk *gd,
|
|
if (rq == NULL)
|
|
return -1;
|
|
|
|
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,29)
|
|
+ queue_flag_set_unlocked(QUEUE_FLAG_VIRT, rq);
|
|
+#endif
|
|
+
|
|
/* Hard sector size and max sectors impersonate the equiv. hardware. */
|
|
blk_queue_hardsect_size(rq, sector_size);
|
|
blk_queue_max_sectors(rq, 512);
|
|
--- head-2011-03-17.orig/drivers/xen/core/cpu_hotplug.c 2011-01-31 17:56:27.000000000 +0100
|
|
+++ head-2011-03-17/drivers/xen/core/cpu_hotplug.c 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -11,10 +11,10 @@
|
|
* Set of CPUs that remote admin software will allow us to bring online.
|
|
* Notified to us via xenbus.
|
|
*/
|
|
-static cpumask_t xenbus_allowed_cpumask;
|
|
+static cpumask_var_t xenbus_allowed_cpumask;
|
|
|
|
/* Set of CPUs that local admin will allow us to bring online. */
|
|
-static cpumask_t local_allowed_cpumask = CPU_MASK_ALL;
|
|
+static cpumask_var_t local_allowed_cpumask;
|
|
|
|
static int local_cpu_hotplug_request(void)
|
|
{
|
|
@@ -41,11 +41,11 @@ static void vcpu_hotplug(unsigned int cp
|
|
}
|
|
|
|
if (strcmp(state, "online") == 0) {
|
|
- cpu_set(cpu, xenbus_allowed_cpumask);
|
|
+ cpumask_set_cpu(cpu, xenbus_allowed_cpumask);
|
|
if (!cpu_up(cpu) && dev)
|
|
kobject_uevent(&dev->kobj, KOBJ_ONLINE);
|
|
} else if (strcmp(state, "offline") == 0) {
|
|
- cpu_clear(cpu, xenbus_allowed_cpumask);
|
|
+ cpumask_clear_cpu(cpu, xenbus_allowed_cpumask);
|
|
if (!cpu_down(cpu) && dev)
|
|
kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
|
|
} else {
|
|
@@ -78,7 +78,7 @@ static int smpboot_cpu_notify(struct not
|
|
* as it's always executed from within a stopmachine kthread.
|
|
*/
|
|
if ((action == CPU_DOWN_PREPARE) && local_cpu_hotplug_request())
|
|
- cpu_clear(cpu, local_allowed_cpumask);
|
|
+ cpumask_clear_cpu(cpu, local_allowed_cpumask);
|
|
|
|
return NOTIFY_OK;
|
|
}
|
|
@@ -157,21 +157,26 @@ int cpu_up_check(unsigned int cpu)
|
|
int rc = 0;
|
|
|
|
if (local_cpu_hotplug_request()) {
|
|
- cpu_set(cpu, local_allowed_cpumask);
|
|
- if (!cpu_isset(cpu, xenbus_allowed_cpumask)) {
|
|
+ cpumask_set_cpu(cpu, local_allowed_cpumask);
|
|
+ if (!cpumask_test_cpu(cpu, xenbus_allowed_cpumask)) {
|
|
pr_warning("%s: attempt to bring up CPU %u disallowed "
|
|
"by remote admin.\n", __FUNCTION__, cpu);
|
|
rc = -EBUSY;
|
|
}
|
|
- } else if (!cpu_isset(cpu, local_allowed_cpumask) ||
|
|
- !cpu_isset(cpu, xenbus_allowed_cpumask)) {
|
|
+ } else if (!cpumask_test_cpu(cpu, local_allowed_cpumask) ||
|
|
+ !cpumask_test_cpu(cpu, xenbus_allowed_cpumask)) {
|
|
rc = -EBUSY;
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
-void init_xenbus_allowed_cpumask(void)
|
|
+void __init init_xenbus_allowed_cpumask(void)
|
|
{
|
|
- xenbus_allowed_cpumask = cpu_present_map;
|
|
+ if (!alloc_cpumask_var(&xenbus_allowed_cpumask, GFP_KERNEL))
|
|
+ BUG();
|
|
+ cpumask_copy(xenbus_allowed_cpumask, cpu_present_mask);
|
|
+ if (!alloc_cpumask_var(&local_allowed_cpumask, GFP_KERNEL))
|
|
+ BUG();
|
|
+ cpumask_setall(local_allowed_cpumask);
|
|
}
|
|
--- head-2011-03-17.orig/drivers/xen/core/evtchn.c 2011-02-01 14:39:24.000000000 +0100
|
|
+++ head-2011-03-17/drivers/xen/core/evtchn.c 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -36,6 +36,7 @@
|
|
#include <linux/sched.h>
|
|
#include <linux/kernel_stat.h>
|
|
#include <linux/bootmem.h>
|
|
+#include <linux/ftrace.h>
|
|
#include <linux/version.h>
|
|
#include <asm/atomic.h>
|
|
#include <asm/system.h>
|
|
@@ -57,9 +58,6 @@ static DEFINE_SPINLOCK(irq_mapping_updat
|
|
static int evtchn_to_irq[NR_EVENT_CHANNELS] = {
|
|
[0 ... NR_EVENT_CHANNELS-1] = -1 };
|
|
|
|
-/* Packed IRQ information: binding type, sub-type index, and event channel. */
|
|
-static u32 irq_info[NR_IRQS];
|
|
-
|
|
/* Binding types. */
|
|
enum {
|
|
IRQT_UNBOUND,
|
|
@@ -75,6 +73,30 @@ enum {
|
|
#define _EVTCHN_BITS 12
|
|
#define _INDEX_BITS (32 - _IRQT_BITS - _EVTCHN_BITS)
|
|
|
|
+/* Convenient shorthand for packed representation of an unbound IRQ. */
|
|
+#define IRQ_UNBOUND (IRQT_UNBOUND << (32 - _IRQT_BITS))
|
|
+
|
|
+static struct irq_cfg _irq_cfg[] = {
|
|
+ [0 ...
|
|
+#ifdef CONFIG_SPARSE_IRQ
|
|
+ BUILD_BUG_ON_ZERO(PIRQ_BASE) + NR_IRQS_LEGACY
|
|
+#else
|
|
+ NR_IRQS
|
|
+#endif
|
|
+ - 1].info = IRQ_UNBOUND
|
|
+};
|
|
+
|
|
+static inline struct irq_cfg *__pure irq_cfg(unsigned int irq)
|
|
+{
|
|
+#ifdef CONFIG_SPARSE_IRQ
|
|
+ struct irq_desc *desc = irq_to_desc(irq);
|
|
+
|
|
+ return desc ? desc->chip_data : NULL;
|
|
+#else
|
|
+ return irq < NR_IRQS ? _irq_cfg + irq : NULL;
|
|
+#endif
|
|
+}
|
|
+
|
|
/* Constructor for packed IRQ information. */
|
|
static inline u32 mk_irq_info(u32 type, u32 index, u32 evtchn)
|
|
{
|
|
@@ -90,26 +112,30 @@ static inline u32 mk_irq_info(u32 type,
|
|
return ((type << (32 - _IRQT_BITS)) | (index << _EVTCHN_BITS) | evtchn);
|
|
}
|
|
|
|
-/* Convenient shorthand for packed representation of an unbound IRQ. */
|
|
-#define IRQ_UNBOUND mk_irq_info(IRQT_UNBOUND, 0, 0)
|
|
-
|
|
/*
|
|
* Accessors for packed IRQ information.
|
|
*/
|
|
|
|
static inline unsigned int evtchn_from_irq(int irq)
|
|
{
|
|
- return irq_info[irq] & ((1U << _EVTCHN_BITS) - 1);
|
|
+ const struct irq_cfg *cfg = irq_cfg(irq);
|
|
+
|
|
+ return cfg ? cfg->info & ((1U << _EVTCHN_BITS) - 1) : 0;
|
|
}
|
|
|
|
static inline unsigned int index_from_irq(int irq)
|
|
{
|
|
- return (irq_info[irq] >> _EVTCHN_BITS) & ((1U << _INDEX_BITS) - 1);
|
|
+ const struct irq_cfg *cfg = irq_cfg(irq);
|
|
+
|
|
+ return cfg ? (cfg->info >> _EVTCHN_BITS) & ((1U << _INDEX_BITS) - 1)
|
|
+ : 0;
|
|
}
|
|
|
|
static inline unsigned int type_from_irq(int irq)
|
|
{
|
|
- return irq_info[irq] >> (32 - _IRQT_BITS);
|
|
+ const struct irq_cfg *cfg = irq_cfg(irq);
|
|
+
|
|
+ return cfg ? cfg->info >> (32 - _IRQT_BITS) : IRQT_UNBOUND;
|
|
}
|
|
|
|
/* IRQ <-> VIRQ mapping. */
|
|
@@ -121,9 +147,6 @@ DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS
|
|
#endif
|
|
DEFINE_PER_CPU(int, ipi_to_irq[NR_IPIS]) = {[0 ... NR_IPIS-1] = -1};
|
|
|
|
-/* Reference counts for bindings to IRQs. */
|
|
-static int irq_bindcount[NR_IRQS];
|
|
-
|
|
#ifdef CONFIG_SMP
|
|
|
|
#if CONFIG_NR_CPUS <= 256
|
|
@@ -161,8 +184,12 @@ static void init_evtchn_cpu_bindings(voi
|
|
int i;
|
|
|
|
/* By default all event channels notify CPU#0. */
|
|
- for (i = 0; i < NR_IRQS; i++)
|
|
- irq_to_desc(i)->affinity = cpumask_of_cpu(0);
|
|
+ for (i = 0; i < nr_irqs; i++) {
|
|
+ struct irq_desc *desc = irq_to_desc(i);
|
|
+
|
|
+ if (desc)
|
|
+ desc->affinity = cpumask_of_cpu(0);
|
|
+ }
|
|
|
|
memset(cpu_evtchn, 0, sizeof(cpu_evtchn));
|
|
for_each_possible_cpu(i)
|
|
@@ -239,7 +266,7 @@ static DEFINE_PER_CPU(unsigned int, curr
|
|
static DEFINE_PER_CPU(unsigned int, current_l2i);
|
|
|
|
/* NB. Interrupts are disabled on entry. */
|
|
-asmlinkage void evtchn_do_upcall(struct pt_regs *regs)
|
|
+asmlinkage void __irq_entry evtchn_do_upcall(struct pt_regs *regs)
|
|
{
|
|
unsigned long l1, l2;
|
|
unsigned long masked_l1, masked_l2;
|
|
@@ -341,14 +368,25 @@ asmlinkage void evtchn_do_upcall(struct
|
|
irq_exit();
|
|
}
|
|
|
|
-static int find_unbound_irq(void)
|
|
+static struct irq_chip dynirq_chip;
|
|
+
|
|
+static int find_unbound_irq(unsigned int cpu)
|
|
{
|
|
static int warned;
|
|
int irq;
|
|
|
|
- for (irq = DYNIRQ_BASE; irq < (DYNIRQ_BASE + NR_DYNIRQS); irq++)
|
|
- if (irq_bindcount[irq] == 0)
|
|
+ for (irq = DYNIRQ_BASE; irq < (DYNIRQ_BASE + NR_DYNIRQS); irq++) {
|
|
+ struct irq_desc *desc = irq_to_desc_alloc_cpu(irq, cpu);
|
|
+ struct irq_cfg *cfg = desc->chip_data;
|
|
+
|
|
+ if (!cfg->bindcount) {
|
|
+ desc->status |= IRQ_NOPROBE;
|
|
+ set_irq_chip_and_handler_name(irq, &dynirq_chip,
|
|
+ handle_fasteoi_irq,
|
|
+ "fasteoi");
|
|
return irq;
|
|
+ }
|
|
+ }
|
|
|
|
if (!warned) {
|
|
warned = 1;
|
|
@@ -366,14 +404,15 @@ static int bind_caller_port_to_irq(unsig
|
|
spin_lock(&irq_mapping_update_lock);
|
|
|
|
if ((irq = evtchn_to_irq[caller_port]) == -1) {
|
|
- if ((irq = find_unbound_irq()) < 0)
|
|
+ if ((irq = find_unbound_irq(smp_processor_id())) < 0)
|
|
goto out;
|
|
|
|
evtchn_to_irq[caller_port] = irq;
|
|
- irq_info[irq] = mk_irq_info(IRQT_CALLER_PORT, 0, caller_port);
|
|
+ irq_cfg(irq)->info = mk_irq_info(IRQT_CALLER_PORT,
|
|
+ 0, caller_port);
|
|
}
|
|
|
|
- irq_bindcount[irq]++;
|
|
+ irq_cfg(irq)->bindcount++;
|
|
|
|
out:
|
|
spin_unlock(&irq_mapping_update_lock);
|
|
@@ -388,7 +427,7 @@ static int bind_local_port_to_irq(unsign
|
|
|
|
BUG_ON(evtchn_to_irq[local_port] != -1);
|
|
|
|
- if ((irq = find_unbound_irq()) < 0) {
|
|
+ if ((irq = find_unbound_irq(smp_processor_id())) < 0) {
|
|
struct evtchn_close close = { .port = local_port };
|
|
if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close))
|
|
BUG();
|
|
@@ -396,8 +435,8 @@ static int bind_local_port_to_irq(unsign
|
|
}
|
|
|
|
evtchn_to_irq[local_port] = irq;
|
|
- irq_info[irq] = mk_irq_info(IRQT_LOCAL_PORT, 0, local_port);
|
|
- irq_bindcount[irq]++;
|
|
+ irq_cfg(irq)->info = mk_irq_info(IRQT_LOCAL_PORT, 0, local_port);
|
|
+ irq_cfg(irq)->bindcount++;
|
|
|
|
out:
|
|
spin_unlock(&irq_mapping_update_lock);
|
|
@@ -441,7 +480,7 @@ static int bind_virq_to_irq(unsigned int
|
|
spin_lock(&irq_mapping_update_lock);
|
|
|
|
if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1) {
|
|
- if ((irq = find_unbound_irq()) < 0)
|
|
+ if ((irq = find_unbound_irq(cpu)) < 0)
|
|
goto out;
|
|
|
|
bind_virq.virq = virq;
|
|
@@ -452,14 +491,14 @@ static int bind_virq_to_irq(unsigned int
|
|
evtchn = bind_virq.port;
|
|
|
|
evtchn_to_irq[evtchn] = irq;
|
|
- irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn);
|
|
+ irq_cfg(irq)->info = mk_irq_info(IRQT_VIRQ, virq, evtchn);
|
|
|
|
per_cpu(virq_to_irq, cpu)[virq] = irq;
|
|
|
|
bind_evtchn_to_cpu(evtchn, cpu);
|
|
}
|
|
|
|
- irq_bindcount[irq]++;
|
|
+ irq_cfg(irq)->bindcount++;
|
|
|
|
out:
|
|
spin_unlock(&irq_mapping_update_lock);
|
|
@@ -474,7 +513,7 @@ static int bind_ipi_to_irq(unsigned int
|
|
spin_lock(&irq_mapping_update_lock);
|
|
|
|
if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1) {
|
|
- if ((irq = find_unbound_irq()) < 0)
|
|
+ if ((irq = find_unbound_irq(cpu)) < 0)
|
|
goto out;
|
|
|
|
bind_ipi.vcpu = cpu;
|
|
@@ -484,14 +523,14 @@ static int bind_ipi_to_irq(unsigned int
|
|
evtchn = bind_ipi.port;
|
|
|
|
evtchn_to_irq[evtchn] = irq;
|
|
- irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn);
|
|
+ irq_cfg(irq)->info = mk_irq_info(IRQT_IPI, ipi, evtchn);
|
|
|
|
per_cpu(ipi_to_irq, cpu)[ipi] = irq;
|
|
|
|
bind_evtchn_to_cpu(evtchn, cpu);
|
|
}
|
|
|
|
- irq_bindcount[irq]++;
|
|
+ irq_cfg(irq)->bindcount++;
|
|
|
|
out:
|
|
spin_unlock(&irq_mapping_update_lock);
|
|
@@ -506,7 +545,7 @@ static void unbind_from_irq(unsigned int
|
|
|
|
spin_lock(&irq_mapping_update_lock);
|
|
|
|
- if ((--irq_bindcount[irq] == 0) && VALID_EVTCHN(evtchn)) {
|
|
+ if (!--irq_cfg(irq)->bindcount && VALID_EVTCHN(evtchn)) {
|
|
close.port = evtchn;
|
|
if ((type_from_irq(irq) != IRQT_CALLER_PORT) &&
|
|
HYPERVISOR_event_channel_op(EVTCHNOP_close, &close))
|
|
@@ -529,11 +568,15 @@ static void unbind_from_irq(unsigned int
|
|
bind_evtchn_to_cpu(evtchn, 0);
|
|
|
|
evtchn_to_irq[evtchn] = -1;
|
|
- irq_info[irq] = IRQ_UNBOUND;
|
|
+ irq_cfg(irq)->info = IRQ_UNBOUND;
|
|
|
|
/* Zap stats across IRQ changes of use. */
|
|
for_each_possible_cpu(cpu)
|
|
+#ifdef CONFIG_SPARSE_IRQ
|
|
+ irq_to_desc(irq)->kstat_irqs[cpu] = 0;
|
|
+#else
|
|
kstat_cpu(cpu).irqs[irq] = 0;
|
|
+#endif
|
|
}
|
|
|
|
spin_unlock(&irq_mapping_update_lock);
|
|
@@ -685,10 +728,9 @@ static void rebind_irq_to_cpu(unsigned i
|
|
rebind_evtchn_to_cpu(evtchn, tcpu);
|
|
}
|
|
|
|
-static void set_affinity_irq(unsigned int irq, cpumask_t dest)
|
|
+static void set_affinity_irq(unsigned int irq, const struct cpumask *dest)
|
|
{
|
|
- unsigned tcpu = first_cpu(dest);
|
|
- rebind_irq_to_cpu(irq, tcpu);
|
|
+ rebind_irq_to_cpu(irq, cpumask_first(dest));
|
|
}
|
|
#endif
|
|
|
|
@@ -854,7 +896,7 @@ static void enable_pirq(unsigned int irq
|
|
|
|
evtchn_to_irq[evtchn] = irq;
|
|
bind_evtchn_to_cpu(evtchn, 0);
|
|
- irq_info[irq] = mk_irq_info(IRQT_PIRQ, bind_pirq.pirq, evtchn);
|
|
+ irq_cfg(irq)->info = mk_irq_info(IRQT_PIRQ, bind_pirq.pirq, evtchn);
|
|
|
|
out:
|
|
pirq_unmask_and_notify(evtchn, irq);
|
|
@@ -884,7 +926,7 @@ static void shutdown_pirq(unsigned int i
|
|
|
|
bind_evtchn_to_cpu(evtchn, 0);
|
|
evtchn_to_irq[evtchn] = -1;
|
|
- irq_info[irq] = mk_irq_info(IRQT_PIRQ, index_from_irq(irq), 0);
|
|
+ irq_cfg(irq)->info = mk_irq_info(IRQT_PIRQ, index_from_irq(irq), 0);
|
|
}
|
|
|
|
static void unmask_pirq(unsigned int irq)
|
|
@@ -1009,7 +1051,7 @@ static void restore_cpu_virqs(unsigned i
|
|
if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1)
|
|
continue;
|
|
|
|
- BUG_ON(irq_info[irq] != mk_irq_info(IRQT_VIRQ, virq, 0));
|
|
+ BUG_ON(irq_cfg(irq)->info != mk_irq_info(IRQT_VIRQ, virq, 0));
|
|
|
|
/* Get a new binding from Xen. */
|
|
bind_virq.virq = virq;
|
|
@@ -1021,7 +1063,7 @@ static void restore_cpu_virqs(unsigned i
|
|
|
|
/* Record the new mapping. */
|
|
evtchn_to_irq[evtchn] = irq;
|
|
- irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn);
|
|
+ irq_cfg(irq)->info = mk_irq_info(IRQT_VIRQ, virq, evtchn);
|
|
bind_evtchn_to_cpu(evtchn, cpu);
|
|
|
|
/* Ready for use. */
|
|
@@ -1038,7 +1080,7 @@ static void restore_cpu_ipis(unsigned in
|
|
if ((irq = per_cpu(ipi_to_irq, cpu)[ipi]) == -1)
|
|
continue;
|
|
|
|
- BUG_ON(irq_info[irq] != mk_irq_info(IRQT_IPI, ipi, 0));
|
|
+ BUG_ON(irq_cfg(irq)->info != mk_irq_info(IRQT_IPI, ipi, 0));
|
|
|
|
/* Get a new binding from Xen. */
|
|
bind_ipi.vcpu = cpu;
|
|
@@ -1049,7 +1091,7 @@ static void restore_cpu_ipis(unsigned in
|
|
|
|
/* Record the new mapping. */
|
|
evtchn_to_irq[evtchn] = irq;
|
|
- irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn);
|
|
+ irq_cfg(irq)->info = mk_irq_info(IRQT_IPI, ipi, evtchn);
|
|
bind_evtchn_to_cpu(evtchn, cpu);
|
|
|
|
/* Ready for use. */
|
|
@@ -1061,6 +1103,7 @@ static void restore_cpu_ipis(unsigned in
|
|
void irq_resume(void)
|
|
{
|
|
unsigned int cpu, irq, evtchn;
|
|
+ struct irq_cfg *cfg;
|
|
|
|
init_evtchn_cpu_bindings();
|
|
|
|
@@ -1077,12 +1120,17 @@ void irq_resume(void)
|
|
mask_evtchn(evtchn);
|
|
|
|
/* Check that no PIRQs are still bound. */
|
|
- for (irq = PIRQ_BASE; irq < (PIRQ_BASE + NR_PIRQS); irq++)
|
|
- BUG_ON(irq_info[irq] != IRQ_UNBOUND);
|
|
+ for (irq = PIRQ_BASE; irq < (PIRQ_BASE + NR_PIRQS); irq++) {
|
|
+ cfg = irq_cfg(irq);
|
|
+ BUG_ON(cfg && cfg->info != IRQ_UNBOUND);
|
|
+ }
|
|
|
|
/* No IRQ <-> event-channel mappings. */
|
|
- for (irq = 0; irq < NR_IRQS; irq++)
|
|
- irq_info[irq] &= ~((1U << _EVTCHN_BITS) - 1);
|
|
+ for (irq = 0; irq < nr_irqs; irq++) {
|
|
+ cfg = irq_cfg(irq);
|
|
+ if (cfg)
|
|
+ cfg->info &= ~((1U << _EVTCHN_BITS) - 1);
|
|
+ }
|
|
for (evtchn = 0; evtchn < NR_EVENT_CHANNELS; evtchn++)
|
|
evtchn_to_irq[evtchn] = -1;
|
|
|
|
@@ -1094,10 +1142,56 @@ void irq_resume(void)
|
|
}
|
|
#endif
|
|
|
|
+int __init arch_early_irq_init(void)
|
|
+{
|
|
+ unsigned int i;
|
|
+
|
|
+ for (i = 0; i < ARRAY_SIZE(_irq_cfg); i++)
|
|
+ irq_to_desc(i)->chip_data = _irq_cfg + i;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+#ifdef CONFIG_SPARSE_IRQ
|
|
+int arch_init_chip_data(struct irq_desc *desc, int cpu)
|
|
+{
|
|
+ if (!desc->chip_data) {
|
|
+ /* By default all event channels notify CPU#0. */
|
|
+ desc->affinity = cpumask_of_cpu(0);
|
|
+
|
|
+ desc->chip_data = kzalloc(sizeof(struct irq_cfg), GFP_ATOMIC);
|
|
+ }
|
|
+ if (!desc->chip_data) {
|
|
+ pr_emerg("cannot alloc irq_cfg\n");
|
|
+ BUG();
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+#endif
|
|
+
|
|
#if defined(CONFIG_X86_IO_APIC)
|
|
+int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
|
|
+{
|
|
+ struct physdev_irq irq_op;
|
|
+
|
|
+ if (irq < PIRQ_BASE || irq - PIRQ_BASE >= NR_PIRQS)
|
|
+ return -EINVAL;
|
|
+
|
|
+ if (cfg->vector)
|
|
+ return 0;
|
|
+
|
|
+ irq_op.irq = irq;
|
|
+ if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op))
|
|
+ return -ENOSPC;
|
|
+
|
|
+ cfg->vector = irq_op.vector;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
#define identity_mapped_irq(irq) (!IO_APIC_IRQ((irq) - PIRQ_BASE))
|
|
#elif defined(CONFIG_X86)
|
|
-#define identity_mapped_irq(irq) (((irq) - PIRQ_BASE) < 16)
|
|
+#define identity_mapped_irq(irq) (((irq) - PIRQ_BASE) < NR_IRQS_LEGACY)
|
|
#else
|
|
#define identity_mapped_irq(irq) (1)
|
|
#endif
|
|
@@ -1107,7 +1201,7 @@ void evtchn_register_pirq(int irq)
|
|
BUG_ON(irq < PIRQ_BASE || irq - PIRQ_BASE >= NR_PIRQS);
|
|
if (identity_mapped_irq(irq) || type_from_irq(irq) != IRQT_UNBOUND)
|
|
return;
|
|
- irq_info[irq] = mk_irq_info(IRQT_PIRQ, irq, 0);
|
|
+ irq_cfg(irq)->info = mk_irq_info(IRQT_PIRQ, irq, 0);
|
|
set_irq_chip_and_handler_name(irq, &pirq_chip, handle_fasteoi_irq,
|
|
"fasteoi");
|
|
}
|
|
@@ -1120,12 +1214,17 @@ int evtchn_map_pirq(int irq, int xen_pir
|
|
irq = PIRQ_BASE + NR_PIRQS - 1;
|
|
spin_lock(&irq_alloc_lock);
|
|
do {
|
|
+ struct irq_desc *desc;
|
|
+ struct irq_cfg *cfg;
|
|
+
|
|
if (identity_mapped_irq(irq))
|
|
continue;
|
|
+ desc = irq_to_desc_alloc_cpu(irq, smp_processor_id());
|
|
+ cfg = desc->chip_data;
|
|
if (!index_from_irq(irq)) {
|
|
BUG_ON(type_from_irq(irq) != IRQT_UNBOUND);
|
|
- irq_info[irq] = mk_irq_info(IRQT_PIRQ,
|
|
- xen_pirq, 0);
|
|
+ cfg->info = mk_irq_info(IRQT_PIRQ,
|
|
+ xen_pirq, 0);
|
|
break;
|
|
}
|
|
} while (--irq >= PIRQ_BASE);
|
|
@@ -1144,7 +1243,7 @@ int evtchn_map_pirq(int irq, int xen_pir
|
|
* then causes a warning in dynamic_irq_cleanup().
|
|
*/
|
|
set_irq_chip_and_handler(irq, NULL, NULL);
|
|
- irq_info[irq] = IRQ_UNBOUND;
|
|
+ irq_cfg(irq)->info = IRQ_UNBOUND;
|
|
return 0;
|
|
} else if (type_from_irq(irq) != IRQT_PIRQ
|
|
|| index_from_irq(irq) != xen_pirq) {
|
|
@@ -1181,23 +1280,17 @@ void __init xen_init_IRQ(void)
|
|
for (i = 0; i < NR_EVENT_CHANNELS; i++)
|
|
mask_evtchn(i);
|
|
|
|
- /* No IRQ -> event-channel mappings. */
|
|
- for (i = 0; i < NR_IRQS; i++)
|
|
- irq_info[i] = IRQ_UNBOUND;
|
|
-
|
|
- /* Dynamic IRQ space is currently unbound. Zero the refcnts. */
|
|
+#ifndef CONFIG_SPARSE_IRQ
|
|
for (i = DYNIRQ_BASE; i < (DYNIRQ_BASE + NR_DYNIRQS); i++) {
|
|
- irq_bindcount[i] = 0;
|
|
-
|
|
irq_to_desc(i)->status |= IRQ_NOPROBE;
|
|
set_irq_chip_and_handler_name(i, &dynirq_chip,
|
|
handle_fasteoi_irq, "fasteoi");
|
|
}
|
|
|
|
- /* Phys IRQ space is statically bound (1:1 mapping). Nail refcnts. */
|
|
for (i = PIRQ_BASE; i < (PIRQ_BASE + NR_PIRQS); i++) {
|
|
- irq_bindcount[i] = 1;
|
|
-
|
|
+#else
|
|
+ for (i = PIRQ_BASE; i < (PIRQ_BASE + NR_IRQS_LEGACY); i++) {
|
|
+#endif
|
|
if (!identity_mapped_irq(i))
|
|
continue;
|
|
|
|
--- head-2011-03-17.orig/drivers/xen/core/machine_reboot.c 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/drivers/xen/core/machine_reboot.c 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -19,6 +19,9 @@
|
|
#include <xen/interface/vcpu.h>
|
|
|
|
#if defined(__i386__) || defined(__x86_64__)
|
|
+#include <asm/pci_x86.h>
|
|
+/* TBD: Dom0 should propagate the determined value to Xen. */
|
|
+bool port_cf9_safe = false;
|
|
|
|
/*
|
|
* Power off function, if any
|
|
@@ -79,7 +82,7 @@ static void post_suspend(int suspend_can
|
|
pfn_to_mfn(xen_start_info->console.domU.mfn);
|
|
} else {
|
|
#ifdef CONFIG_SMP
|
|
- cpu_initialized_map = cpu_online_map;
|
|
+ cpumask_copy(vcpu_initialized_mask, cpu_online_mask);
|
|
#endif
|
|
for_each_possible_cpu(i)
|
|
setup_runstate_area(i);
|
|
@@ -219,6 +222,12 @@ int __xen_suspend(int fast_suspend, void
|
|
if (num_possible_cpus() == 1)
|
|
fast_suspend = 0;
|
|
|
|
+ if (fast_suspend) {
|
|
+ err = stop_machine_create();
|
|
+ if (err)
|
|
+ return err;
|
|
+ }
|
|
+
|
|
suspend.fast_suspend = fast_suspend;
|
|
suspend.resume_notifier = resume_notifier;
|
|
|
|
@@ -245,6 +254,8 @@ int __xen_suspend(int fast_suspend, void
|
|
|
|
if (!fast_suspend)
|
|
smp_resume();
|
|
+ else
|
|
+ stop_machine_destroy();
|
|
|
|
return 0;
|
|
}
|
|
--- head-2011-03-17.orig/drivers/xen/core/smpboot.c 2011-02-01 14:39:24.000000000 +0100
|
|
+++ head-2011-03-17/drivers/xen/core/smpboot.c 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -33,11 +33,7 @@ extern void failsafe_callback(void);
|
|
extern void system_call(void);
|
|
extern void smp_trap_init(trap_info_t *);
|
|
|
|
-cpumask_t cpu_online_map;
|
|
-EXPORT_SYMBOL(cpu_online_map);
|
|
-cpumask_t cpu_possible_map;
|
|
-EXPORT_SYMBOL(cpu_possible_map);
|
|
-cpumask_t cpu_initialized_map;
|
|
+cpumask_var_t vcpu_initialized_mask;
|
|
|
|
DEFINE_PER_CPU(struct cpuinfo_x86, cpu_info);
|
|
EXPORT_PER_CPU_SYMBOL(cpu_info);
|
|
@@ -64,10 +60,14 @@ void __init prefill_possible_map(void)
|
|
#endif
|
|
rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
|
|
if (rc >= 0) {
|
|
- cpu_set(i, cpu_possible_map);
|
|
+ set_cpu_possible(i, true);
|
|
nr_cpu_ids = i + 1;
|
|
}
|
|
}
|
|
+ total_cpus = num_possible_cpus();
|
|
+ for (; HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL) >= 0; ++i)
|
|
+ if (i != smp_processor_id())
|
|
+ ++total_cpus;
|
|
}
|
|
|
|
static int __cpuinit xen_smp_intr_init(unsigned int cpu)
|
|
@@ -167,7 +167,7 @@ static void __cpuinit cpu_initialize_con
|
|
|
|
struct task_struct *idle = idle_task(cpu);
|
|
|
|
- if (cpu_test_and_set(cpu, cpu_initialized_map))
|
|
+ if (cpumask_test_and_set_cpu(cpu, vcpu_initialized_mask))
|
|
return;
|
|
|
|
spin_lock(&ctxt_lock);
|
|
@@ -237,13 +237,15 @@ void __init smp_prepare_cpus(unsigned in
|
|
if (xen_smp_intr_init(0))
|
|
BUG();
|
|
|
|
- cpu_initialized_map = cpumask_of_cpu(0);
|
|
+ if (!alloc_cpumask_var(&vcpu_initialized_mask, GFP_KERNEL))
|
|
+ BUG();
|
|
+ cpumask_copy(vcpu_initialized_mask, cpumask_of(0));
|
|
|
|
/* Restrict the possible_map according to max_cpus. */
|
|
while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) {
|
|
- for (cpu = NR_CPUS-1; !cpu_isset(cpu, cpu_possible_map); cpu--)
|
|
+ for (cpu = nr_cpu_ids-1; !cpu_possible(cpu); cpu--)
|
|
continue;
|
|
- cpu_clear(cpu, cpu_possible_map);
|
|
+ set_cpu_possible(cpu, false);
|
|
}
|
|
|
|
for_each_possible_cpu (cpu) {
|
|
@@ -278,10 +280,8 @@ void __init smp_prepare_cpus(unsigned in
|
|
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
|
if (is_initial_xendomain())
|
|
- cpu_set(cpu, cpu_present_map);
|
|
-#else
|
|
- cpu_set(cpu, cpu_present_map);
|
|
#endif
|
|
+ set_cpu_present(cpu, true);
|
|
}
|
|
|
|
init_xenbus_allowed_cpumask();
|
|
@@ -314,22 +314,24 @@ void __init smp_prepare_boot_cpu(void)
|
|
*/
|
|
static int __init initialize_cpu_present_map(void)
|
|
{
|
|
- cpu_present_map = cpu_possible_map;
|
|
+ unsigned int cpu;
|
|
+
|
|
+ for_each_possible_cpu(cpu)
|
|
+ set_cpu_present(cpu, true);
|
|
+
|
|
return 0;
|
|
}
|
|
core_initcall(initialize_cpu_present_map);
|
|
|
|
int __cpuinit __cpu_disable(void)
|
|
{
|
|
- cpumask_t map = cpu_online_map;
|
|
unsigned int cpu = smp_processor_id();
|
|
|
|
if (cpu == 0)
|
|
return -EBUSY;
|
|
|
|
- cpu_clear(cpu, map);
|
|
- fixup_irqs(map);
|
|
- cpu_clear(cpu, cpu_online_map);
|
|
+ set_cpu_online(cpu, false);
|
|
+ fixup_irqs();
|
|
|
|
return 0;
|
|
}
|
|
@@ -369,7 +371,7 @@ int __cpuinit __cpu_up(unsigned int cpu)
|
|
if (rc)
|
|
return rc;
|
|
|
|
- cpu_set(cpu, cpu_online_map);
|
|
+ set_cpu_online(cpu, true);
|
|
|
|
rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
|
|
BUG_ON(rc);
|
|
@@ -381,7 +383,7 @@ void __ref play_dead(void)
|
|
{
|
|
idle_task_exit();
|
|
local_irq_disable();
|
|
- cpu_clear(smp_processor_id(), cpu_initialized);
|
|
+ cpumask_clear_cpu(smp_processor_id(), cpu_initialized_mask);
|
|
preempt_enable_no_resched();
|
|
VOID(HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL));
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
|
--- head-2011-03-17.orig/drivers/xen/netback/interface.c 2011-03-17 14:12:41.000000000 +0100
|
|
+++ head-2011-03-17/drivers/xen/netback/interface.c 2011-02-17 10:15:18.000000000 +0100
|
|
@@ -222,6 +222,15 @@ static struct ethtool_ops network_ethtoo
|
|
.get_strings = netbk_get_strings,
|
|
};
|
|
|
|
+static const struct net_device_ops netif_be_netdev_ops = {
|
|
+ .ndo_open = net_open,
|
|
+ .ndo_stop = net_close,
|
|
+ .ndo_start_xmit = netif_be_start_xmit,
|
|
+ .ndo_change_mtu = netbk_change_mtu,
|
|
+ .ndo_set_mac_address = eth_mac_addr,
|
|
+ .ndo_validate_addr = eth_validate_addr,
|
|
+};
|
|
+
|
|
netif_t *netif_alloc(struct device *parent, domid_t domid, unsigned int handle)
|
|
{
|
|
int err = 0;
|
|
@@ -258,10 +267,7 @@ netif_t *netif_alloc(struct device *pare
|
|
|
|
init_timer(&netif->tx_queue_timeout);
|
|
|
|
- dev->hard_start_xmit = netif_be_start_xmit;
|
|
- dev->open = net_open;
|
|
- dev->stop = net_close;
|
|
- dev->change_mtu = netbk_change_mtu;
|
|
+ dev->netdev_ops = &netif_be_netdev_ops;
|
|
|
|
netif_set_features(netif);
|
|
|
|
--- head-2011-03-17.orig/drivers/xen/netback/loopback.c 2011-01-31 17:32:29.000000000 +0100
|
|
+++ head-2011-03-17/drivers/xen/netback/loopback.c 2011-03-01 11:52:05.000000000 +0100
|
|
@@ -155,7 +155,6 @@ static int loopback_start_xmit(struct sk
|
|
|
|
skb->pkt_type = PACKET_HOST; /* overridden by eth_type_trans() */
|
|
skb->protocol = eth_type_trans(skb, dev);
|
|
- dev->last_rx = jiffies;
|
|
|
|
/* Flush netfilter context: rx'ed skbuffs not expected to have any. */
|
|
nf_reset(skb);
|
|
@@ -194,6 +193,14 @@ static void loopback_set_multicast_list(
|
|
{
|
|
}
|
|
|
|
+static const struct net_device_ops loopback_netdev_ops = {
|
|
+ .ndo_open = loopback_open,
|
|
+ .ndo_stop = loopback_close,
|
|
+ .ndo_start_xmit = loopback_start_xmit,
|
|
+ .ndo_set_multicast_list = loopback_set_multicast_list,
|
|
+ .ndo_change_mtu = NULL, /* allow arbitrary mtu */
|
|
+};
|
|
+
|
|
static void loopback_construct(struct net_device *dev, struct net_device *lo,
|
|
int loop_idx)
|
|
{
|
|
@@ -202,12 +209,7 @@ static void loopback_construct(struct ne
|
|
np->loopback_dev = lo;
|
|
np->loop_idx = loop_idx;
|
|
|
|
- dev->open = loopback_open;
|
|
- dev->stop = loopback_close;
|
|
- dev->hard_start_xmit = loopback_start_xmit;
|
|
- dev->set_multicast_list = loopback_set_multicast_list;
|
|
- dev->change_mtu = NULL; /* allow arbitrary mtu */
|
|
-
|
|
+ dev->netdev_ops = &loopback_netdev_ops;
|
|
dev->tx_queue_len = 0;
|
|
|
|
dev->features = (NETIF_F_HIGHDMA |
|
|
--- head-2011-03-17.orig/drivers/xen/netback/netback.c 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/drivers/xen/netback/netback.c 2011-03-01 11:52:09.000000000 +0100
|
|
@@ -363,7 +363,7 @@ static void xen_network_done_notify(void
|
|
static struct net_device *eth0_dev = NULL;
|
|
if (unlikely(eth0_dev == NULL))
|
|
eth0_dev = __dev_get_by_name(&init_net, "eth0");
|
|
- netif_rx_schedule(eth0_dev, ???);
|
|
+ netif_rx_schedule(???);
|
|
}
|
|
/*
|
|
* Add following to poll() function in NAPI driver (Tigon3 is example):
|
|
@@ -1495,7 +1495,6 @@ static void net_tx_action(unsigned long
|
|
dev->stats.rx_packets++;
|
|
|
|
netif_rx(skb);
|
|
- dev->last_rx = jiffies;
|
|
}
|
|
|
|
out:
|
|
--- head-2011-03-17.orig/drivers/xen/netfront/netfront.c 2011-02-01 14:39:24.000000000 +0100
|
|
+++ head-2011-03-17/drivers/xen/netfront/netfront.c 2011-02-09 16:04:26.000000000 +0100
|
|
@@ -632,7 +632,7 @@ static int network_open(struct net_devic
|
|
if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)){
|
|
netfront_accelerator_call_stop_napi_irq(np, dev);
|
|
|
|
- netif_rx_schedule(dev, &np->napi);
|
|
+ netif_rx_schedule(&np->napi);
|
|
}
|
|
}
|
|
spin_unlock_bh(&np->rx_lock);
|
|
@@ -703,7 +703,7 @@ static void rx_refill_timeout(unsigned l
|
|
|
|
netfront_accelerator_call_stop_napi_irq(np, dev);
|
|
|
|
- netif_rx_schedule(dev, &np->napi);
|
|
+ netif_rx_schedule(&np->napi);
|
|
}
|
|
|
|
static void network_alloc_rx_buffers(struct net_device *dev)
|
|
@@ -1057,8 +1057,7 @@ static irqreturn_t netif_int(int irq, vo
|
|
if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) {
|
|
netfront_accelerator_call_stop_napi_irq(np, dev);
|
|
|
|
- netif_rx_schedule(dev, &np->napi);
|
|
- dev->last_rx = jiffies;
|
|
+ netif_rx_schedule(&np->napi);
|
|
}
|
|
}
|
|
|
|
@@ -1474,7 +1473,6 @@ err:
|
|
|
|
/* Pass it up. */
|
|
netif_receive_skb(skb);
|
|
- dev->last_rx = jiffies;
|
|
}
|
|
|
|
/* If we get a callback with very few responses, reduce fill target. */
|
|
@@ -1516,7 +1514,7 @@ err:
|
|
}
|
|
|
|
if (!more_to_do && !accel_more_to_do)
|
|
- __netif_rx_complete(dev, napi);
|
|
+ __netif_rx_complete(napi);
|
|
|
|
local_irq_restore(flags);
|
|
}
|
|
@@ -2069,6 +2067,18 @@ static void network_set_multicast_list(s
|
|
{
|
|
}
|
|
|
|
+static const struct net_device_ops xennet_netdev_ops = {
|
|
+ .ndo_uninit = netif_uninit,
|
|
+ .ndo_open = network_open,
|
|
+ .ndo_stop = network_close,
|
|
+ .ndo_start_xmit = network_start_xmit,
|
|
+ .ndo_set_multicast_list = network_set_multicast_list,
|
|
+ .ndo_set_mac_address = xennet_set_mac_address,
|
|
+ .ndo_validate_addr = eth_validate_addr,
|
|
+ .ndo_change_mtu = xennet_change_mtu,
|
|
+ .ndo_get_stats = network_get_stats,
|
|
+};
|
|
+
|
|
static struct net_device * __devinit create_netdev(struct xenbus_device *dev)
|
|
{
|
|
int i, err = 0;
|
|
@@ -2124,15 +2134,8 @@ static struct net_device * __devinit cre
|
|
goto exit_free_tx;
|
|
}
|
|
|
|
- netdev->open = network_open;
|
|
- netdev->hard_start_xmit = network_start_xmit;
|
|
- netdev->stop = network_close;
|
|
- netdev->get_stats = network_get_stats;
|
|
+ netdev->netdev_ops = &xennet_netdev_ops;
|
|
netif_napi_add(netdev, &np->napi, netif_poll, 64);
|
|
- netdev->set_multicast_list = network_set_multicast_list;
|
|
- netdev->uninit = netif_uninit;
|
|
- netdev->set_mac_address = xennet_set_mac_address;
|
|
- netdev->change_mtu = xennet_change_mtu;
|
|
netdev->features = NETIF_F_IP_CSUM;
|
|
|
|
SET_ETHTOOL_OPS(netdev, &network_ethtool_ops);
|
|
@@ -2163,7 +2166,7 @@ inetdev_notify(struct notifier_block *th
|
|
struct net_device *dev = ifa->ifa_dev->dev;
|
|
|
|
/* UP event and is it one of our devices? */
|
|
- if (event == NETDEV_UP && dev->open == network_open)
|
|
+ if (event == NETDEV_UP && dev->netdev_ops->ndo_open == network_open)
|
|
send_fake_arp(dev);
|
|
|
|
return NOTIFY_DONE;
|
|
--- head-2011-03-17.orig/drivers/xen/sfc_netfront/accel_msg.c 2011-01-31 17:56:27.000000000 +0100
|
|
+++ head-2011-03-17/drivers/xen/sfc_netfront/accel_msg.c 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -47,7 +47,7 @@ static void vnic_start_interrupts(netfro
|
|
netfront_accel_disable_net_interrupts(vnic);
|
|
vnic->irq_enabled = 0;
|
|
NETFRONT_ACCEL_STATS_OP(vnic->stats.poll_schedule_count++);
|
|
- netif_rx_schedule(vnic->net_dev, &np->napi);
|
|
+ netif_rx_schedule(&np->napi);
|
|
} else {
|
|
/*
|
|
* Nothing yet, make sure we get interrupts through
|
|
@@ -532,7 +532,7 @@ irqreturn_t netfront_accel_net_channel_i
|
|
vnic->stats.event_count_since_irq;
|
|
vnic->stats.event_count_since_irq = 0;
|
|
#endif
|
|
- netif_rx_schedule(net_dev, &np->napi);
|
|
+ netif_rx_schedule(&np->napi);
|
|
}
|
|
else {
|
|
spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags);
|
|
--- head-2011-03-17.orig/drivers/xen/xenbus/xenbus_client.c 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/drivers/xen/xenbus/xenbus_client.c 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -169,7 +169,6 @@ EXPORT_SYMBOL_GPL(xenbus_watch_pathfmt);
|
|
/**
|
|
* xenbus_switch_state
|
|
* @dev: xenbus device
|
|
- * @xbt: transaction handle
|
|
* @state: new state
|
|
*
|
|
* Advertise in the store a change of the given driver to the given new_state.
|
|
@@ -302,7 +301,7 @@ EXPORT_SYMBOL_GPL(xenbus_dev_error);
|
|
* @fmt: error message format
|
|
*
|
|
* Equivalent to xenbus_dev_error(dev, err, fmt, args), followed by
|
|
- * xenbus_switch_state(dev, NULL, XenbusStateClosing) to schedule an orderly
|
|
+ * xenbus_switch_state(dev, XenbusStateClosing) to schedule an orderly
|
|
* closedown of this driver and its peer.
|
|
*/
|
|
void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt, ...)
|
|
--- head-2011-03-17.orig/drivers/xen/xenbus/xenbus_probe.c 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/drivers/xen/xenbus/xenbus_probe.c 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -42,6 +42,7 @@
|
|
#include <linux/ctype.h>
|
|
#include <linux/fcntl.h>
|
|
#include <linux/mm.h>
|
|
+#include <linux/proc_fs.h>
|
|
#include <linux/notifier.h>
|
|
#include <linux/mutex.h>
|
|
#include <linux/io.h>
|
|
@@ -73,6 +74,10 @@
|
|
#endif
|
|
|
|
int xen_store_evtchn;
|
|
+#if !defined(CONFIG_XEN) && !defined(MODULE)
|
|
+EXPORT_SYMBOL(xen_store_evtchn);
|
|
+#endif
|
|
+
|
|
struct xenstore_domain_interface *xen_store_interface;
|
|
|
|
static unsigned long xen_store_mfn;
|
|
@@ -198,6 +203,12 @@ static int xenbus_uevent_frontend(struct
|
|
}
|
|
#endif
|
|
|
|
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,29)
|
|
+static struct device_attribute xenbus_dev_attrs[] = {
|
|
+ __ATTR_NULL
|
|
+};
|
|
+#endif
|
|
+
|
|
/* Bus type for frontend drivers. */
|
|
static struct xen_bus_type xenbus_frontend = {
|
|
.root = "device",
|
|
@@ -206,13 +217,16 @@ static struct xen_bus_type xenbus_fronte
|
|
.probe = xenbus_probe_frontend,
|
|
.error = -ENODEV,
|
|
.bus = {
|
|
- .name = "xen",
|
|
- .match = xenbus_match,
|
|
+ .name = "xen",
|
|
+ .match = xenbus_match,
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)
|
|
- .probe = xenbus_dev_probe,
|
|
- .remove = xenbus_dev_remove,
|
|
- .shutdown = xenbus_dev_shutdown,
|
|
- .uevent = xenbus_uevent_frontend,
|
|
+ .probe = xenbus_dev_probe,
|
|
+ .remove = xenbus_dev_remove,
|
|
+ .shutdown = xenbus_dev_shutdown,
|
|
+ .uevent = xenbus_uevent_frontend,
|
|
+#endif
|
|
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,29)
|
|
+ .dev_attrs = xenbus_dev_attrs,
|
|
#endif
|
|
},
|
|
#if defined(CONFIG_XEN) || defined(MODULE)
|
|
@@ -586,7 +600,17 @@ int xenbus_probe_node(struct xen_bus_typ
|
|
xendev->dev.bus = &bus->bus;
|
|
xendev->dev.release = xenbus_dev_release;
|
|
|
|
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,26)
|
|
+ {
|
|
+ char devname[XEN_BUS_ID_SIZE];
|
|
+
|
|
+ err = bus->get_bus_id(devname, xendev->nodename);
|
|
+ if (!err)
|
|
+ dev_set_name(&xendev->dev, devname);
|
|
+ }
|
|
+#else
|
|
err = bus->get_bus_id(xendev->dev.bus_id, xendev->nodename);
|
|
+#endif
|
|
if (err)
|
|
goto fail;
|
|
|
|
@@ -774,7 +798,7 @@ static int suspend_dev(struct device *de
|
|
err = drv->suspend(xdev);
|
|
if (err)
|
|
pr_warning("xenbus: suspend %s failed: %i\n",
|
|
- dev->bus_id, err);
|
|
+ dev_name(dev), err);
|
|
return 0;
|
|
}
|
|
|
|
@@ -794,7 +818,7 @@ static int suspend_cancel_dev(struct dev
|
|
err = drv->suspend_cancel(xdev);
|
|
if (err)
|
|
pr_warning("xenbus: suspend_cancel %s failed: %i\n",
|
|
- dev->bus_id, err);
|
|
+ dev_name(dev), err);
|
|
return 0;
|
|
}
|
|
|
|
@@ -815,7 +839,7 @@ static int resume_dev(struct device *dev
|
|
err = talk_to_otherend(xdev);
|
|
if (err) {
|
|
pr_warning("xenbus: resume (talk_to_otherend) %s failed: %i\n",
|
|
- dev->bus_id, err);
|
|
+ dev_name(dev), err);
|
|
return err;
|
|
}
|
|
|
|
@@ -825,7 +849,7 @@ static int resume_dev(struct device *dev
|
|
err = drv->resume(xdev);
|
|
if (err) {
|
|
pr_warning("xenbus: resume %s failed: %i\n",
|
|
- dev->bus_id, err);
|
|
+ dev_name(dev), err);
|
|
return err;
|
|
}
|
|
}
|
|
@@ -833,7 +857,7 @@ static int resume_dev(struct device *dev
|
|
err = watch_otherend(xdev);
|
|
if (err) {
|
|
pr_warning("xenbus_probe: resume (watch_otherend) %s failed:"
|
|
- " %d\n", dev->bus_id, err);
|
|
+ " %d\n", dev_name(dev), err);
|
|
return err;
|
|
}
|
|
|
|
@@ -1143,6 +1167,14 @@ static int __devinit xenbus_probe_init(v
|
|
if (!is_initial_xendomain())
|
|
xenbus_probe(NULL);
|
|
|
|
+#if defined(CONFIG_XEN_COMPAT_XENFS) && !defined(MODULE)
|
|
+ /*
|
|
+ * Create xenfs mountpoint in /proc for compatibility with
|
|
+ * utilities that expect to find "xenbus" under "/proc/xen".
|
|
+ */
|
|
+ proc_mkdir("xen", NULL);
|
|
+#endif
|
|
+
|
|
return 0;
|
|
|
|
err:
|
|
--- head-2011-03-17.orig/drivers/xen/xenbus/xenbus_probe.h 2011-02-01 14:39:24.000000000 +0100
|
|
+++ head-2011-03-17/drivers/xen/xenbus/xenbus_probe.h 2011-02-07 14:42:39.000000000 +0100
|
|
@@ -43,6 +43,8 @@
|
|
#ifdef CONFIG_PARAVIRT_XEN
|
|
#define is_running_on_xen() xen_domain()
|
|
#define is_initial_xendomain() xen_initial_domain()
|
|
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26)
|
|
+#define dev_name(dev) ((dev)->bus_id)
|
|
#endif
|
|
|
|
#if defined(CONFIG_XEN_BACKEND) || defined(CONFIG_XEN_BACKEND_MODULE)
|
|
--- head-2011-03-17.orig/drivers/xen/xenbus/xenbus_probe_backend.c 2011-01-31 17:56:27.000000000 +0100
|
|
+++ head-2011-03-17/drivers/xen/xenbus/xenbus_probe_backend.c 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -36,6 +36,7 @@
|
|
__FUNCTION__, __LINE__, ##args)
|
|
|
|
#include <linux/kernel.h>
|
|
+#include <linux/version.h>
|
|
#include <linux/err.h>
|
|
#include <linux/string.h>
|
|
#include <linux/ctype.h>
|
|
@@ -108,6 +109,10 @@ static int backend_bus_id(char bus_id[XE
|
|
return 0;
|
|
}
|
|
|
|
+static struct device_attribute xenbus_backend_attrs[] = {
|
|
+ __ATTR_NULL
|
|
+};
|
|
+
|
|
static struct xen_bus_type xenbus_backend = {
|
|
.root = "backend",
|
|
.levels = 3, /* backend/type/<frontend>/<id> */
|
|
@@ -115,12 +120,13 @@ static struct xen_bus_type xenbus_backen
|
|
.probe = xenbus_probe_backend,
|
|
.error = -ENODEV,
|
|
.bus = {
|
|
- .name = "xen-backend",
|
|
- .match = xenbus_match,
|
|
- .probe = xenbus_dev_probe,
|
|
- .remove = xenbus_dev_remove,
|
|
-// .shutdown = xenbus_dev_shutdown,
|
|
- .uevent = xenbus_uevent_backend,
|
|
+ .name = "xen-backend",
|
|
+ .match = xenbus_match,
|
|
+ .probe = xenbus_dev_probe,
|
|
+ .remove = xenbus_dev_remove,
|
|
+// .shutdown = xenbus_dev_shutdown,
|
|
+ .uevent = xenbus_uevent_backend,
|
|
+ .dev_attrs = xenbus_backend_attrs,
|
|
},
|
|
.dev = {
|
|
.bus_id = "xen-backend",
|
|
--- head-2011-03-17.orig/drivers/xen/xenbus/xenbus_xs.c 2011-01-31 17:56:27.000000000 +0100
|
|
+++ head-2011-03-17/drivers/xen/xenbus/xenbus_xs.c 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -226,6 +226,9 @@ void *xenbus_dev_request_and_reply(struc
|
|
|
|
return ret;
|
|
}
|
|
+#if !defined(CONFIG_XEN) && !defined(MODULE)
|
|
+EXPORT_SYMBOL(xenbus_dev_request_and_reply);
|
|
+#endif
|
|
|
|
/* Send message to xs, get kmalloc'ed reply. ERR_PTR() on error. */
|
|
static void *xs_talkv(struct xenbus_transaction t,
|
|
--- head-2011-03-17.orig/drivers/xen/xenoprof/xenoprofile.c 2011-01-31 18:01:51.000000000 +0100
|
|
+++ head-2011-03-17/drivers/xen/xenoprof/xenoprofile.c 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -49,7 +49,7 @@ static int xenoprof_enabled = 0;
|
|
static int xenoprof_is_primary = 0;
|
|
static int active_defined;
|
|
|
|
-extern unsigned long backtrace_depth;
|
|
+extern unsigned long oprofile_backtrace_depth;
|
|
|
|
/* Number of buffers in shared area (one per VCPU) */
|
|
static int nbuf;
|
|
@@ -338,11 +338,11 @@ static int xenoprof_setup(void)
|
|
active_defined = 1;
|
|
}
|
|
|
|
- if (backtrace_depth > 0) {
|
|
+ if (oprofile_backtrace_depth > 0) {
|
|
ret = HYPERVISOR_xenoprof_op(XENOPROF_set_backtrace,
|
|
- &backtrace_depth);
|
|
+ &oprofile_backtrace_depth);
|
|
if (ret)
|
|
- backtrace_depth = 0;
|
|
+ oprofile_backtrace_depth = 0;
|
|
}
|
|
|
|
ret = HYPERVISOR_xenoprof_op(XENOPROF_reserve_counters, NULL);
|
|
--- head-2011-03-17.orig/include/acpi/processor.h 2011-01-31 14:53:38.000000000 +0100
|
|
+++ head-2011-03-17/include/acpi/processor.h 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -91,13 +91,24 @@ struct acpi_processor_cx {
|
|
};
|
|
|
|
struct acpi_processor_power {
|
|
+#ifdef CONFIG_PROCESSOR_EXTERNAL_CONTROL
|
|
+ union { /* 'dev' is actually only used for taking its address. */
|
|
+#endif
|
|
struct cpuidle_device dev;
|
|
+#ifndef CONFIG_PROCESSOR_EXTERNAL_CONTROL
|
|
struct acpi_processor_cx *state;
|
|
unsigned long bm_check_timestamp;
|
|
u32 default_state;
|
|
+#else
|
|
+ struct {
|
|
+#endif
|
|
int count;
|
|
struct acpi_processor_cx states[ACPI_PROCESSOR_MAX_POWER];
|
|
+#ifndef CONFIG_PROCESSOR_EXTERNAL_CONTROL
|
|
int timer_broadcast_on_state;
|
|
+#else
|
|
+ }; };
|
|
+#endif
|
|
};
|
|
|
|
/* Performance Management */
|
|
--- head-2011-03-17.orig/include/xen/cpu_hotplug.h 2011-02-01 14:39:24.000000000 +0100
|
|
+++ head-2011-03-17/include/xen/cpu_hotplug.h 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -5,7 +5,7 @@
|
|
#include <linux/cpumask.h>
|
|
|
|
#if defined(CONFIG_X86) && defined(CONFIG_SMP)
|
|
-extern cpumask_t cpu_initialized_map;
|
|
+extern cpumask_var_t vcpu_initialized_mask;
|
|
#endif
|
|
|
|
#if defined(CONFIG_HOTPLUG_CPU)
|
|
--- head-2011-03-17.orig/include/xen/evtchn.h 2011-01-31 18:01:51.000000000 +0100
|
|
+++ head-2011-03-17/include/xen/evtchn.h 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -48,6 +48,18 @@
|
|
* LOW-LEVEL DEFINITIONS
|
|
*/
|
|
|
|
+struct irq_cfg {
|
|
+ u32 info;
|
|
+ union {
|
|
+ int bindcount; /* for dynamic IRQs */
|
|
+#ifdef CONFIG_X86_IO_APIC
|
|
+ u8 vector; /* for physical IRQs */
|
|
+#endif
|
|
+ };
|
|
+};
|
|
+
|
|
+int assign_irq_vector(int irq, struct irq_cfg *, const struct cpumask *);
|
|
+
|
|
/*
|
|
* Dynamically bind an event source to an IRQ-like callback handler.
|
|
* On some platforms this may not be implemented via the Linux IRQ subsystem.
|
|
--- head-2011-03-17.orig/include/xen/xenbus.h 2011-01-31 17:56:27.000000000 +0100
|
|
+++ head-2011-03-17/include/xen/xenbus.h 2011-02-02 16:58:42.000000000 +0100
|
|
@@ -325,7 +325,9 @@ void xenbus_dev_error(struct xenbus_devi
|
|
void xenbus_dev_fatal(struct xenbus_device *dev, int err, const char *fmt,
|
|
...);
|
|
|
|
+#if defined(CONFIG_XEN) || defined(HAVE_XEN_PLATFORM_COMPAT_H)
|
|
int xenbus_dev_init(void);
|
|
+#endif
|
|
|
|
const char *xenbus_strstate(enum xenbus_state state);
|
|
int xenbus_dev_is_online(struct xenbus_device *dev);
|
|
--- head-2011-03-17.orig/lib/swiotlb-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
+++ head-2011-03-17/lib/swiotlb-xen.c 2011-02-01 14:42:26.000000000 +0100
|
|
@@ -8,6 +8,7 @@
|
|
* Copyright (C) 2000, 2003 Hewlett-Packard Co
|
|
* David Mosberger-Tang <davidm@hpl.hp.com>
|
|
* Copyright (C) 2005 Keir Fraser <keir@xensource.com>
|
|
+ * 08/12/11 beckyb Add highmem support
|
|
*/
|
|
|
|
#include <linux/cache.h>
|
|
@@ -16,6 +17,8 @@
|
|
#include <linux/pci.h>
|
|
#include <linux/spinlock.h>
|
|
#include <linux/string.h>
|
|
+#include <linux/swiotlb.h>
|
|
+#include <linux/pfn.h>
|
|
#include <linux/types.h>
|
|
#include <linux/ctype.h>
|
|
#include <linux/init.h>
|
|
@@ -30,24 +33,9 @@
|
|
#include <xen/interface/memory.h>
|
|
#include <asm/gnttab_dma.h>
|
|
|
|
-int swiotlb;
|
|
-
|
|
#define OFFSET(val,align) ((unsigned long)((val) & ( (align) - 1)))
|
|
|
|
/*
|
|
- * Maximum allowable number of contiguous slabs to map,
|
|
- * must be a power of 2. What is the appropriate value ?
|
|
- * The complexity of {map,unmap}_single is linearly dependent on this value.
|
|
- */
|
|
-#define IO_TLB_SEGSIZE 128
|
|
-
|
|
-/*
|
|
- * log of the size of each IO TLB slab. The number of slabs is command line
|
|
- * controllable.
|
|
- */
|
|
-#define IO_TLB_SHIFT 11
|
|
-
|
|
-/*
|
|
* Enumeration for sync targets
|
|
*/
|
|
enum dma_sync_target {
|
|
@@ -55,10 +43,9 @@ enum dma_sync_target {
|
|
SYNC_FOR_DEVICE = 1,
|
|
};
|
|
|
|
+int swiotlb;
|
|
int swiotlb_force;
|
|
|
|
-static unsigned long iotlb_nslabs;
|
|
-
|
|
/*
|
|
* Used to do a quick range check in swiotlb_unmap_single and
|
|
* swiotlb_sync_single_*, to see if the memory was in fact allocated by this
|
|
@@ -67,6 +54,12 @@ static unsigned long iotlb_nslabs;
|
|
static char *io_tlb_start, *io_tlb_end;
|
|
|
|
/*
|
|
+ * The number of IO TLB blocks (in groups of 64) betweeen io_tlb_start and
|
|
+ * io_tlb_end. This is command line adjustable via setup_io_tlb_npages.
|
|
+ */
|
|
+static unsigned long io_tlb_nslabs;
|
|
+
|
|
+/*
|
|
* When the IOMMU overflows we return a fallback buffer. This sets the size.
|
|
*/
|
|
static unsigned long io_tlb_overflow = 32*1024;
|
|
@@ -84,10 +77,7 @@ static unsigned int io_tlb_index;
|
|
* We need to save away the original address corresponding to a mapped entry
|
|
* for the sync operations.
|
|
*/
|
|
-static struct phys_addr {
|
|
- struct page *page;
|
|
- unsigned int offset;
|
|
-} *io_tlb_orig_addr;
|
|
+static phys_addr_t *io_tlb_orig_addr;
|
|
|
|
/*
|
|
* Protect the above data structures in the map and unmap calls
|
|
@@ -109,9 +99,9 @@ setup_io_tlb_npages(char *str)
|
|
{
|
|
/* Unlike ia64, the size is aperture in megabytes, not 'slabs'! */
|
|
if (isdigit(*str)) {
|
|
- iotlb_nslabs = simple_strtoul(str, &str, 0) <<
|
|
+ io_tlb_nslabs = simple_strtoul(str, &str, 0) <<
|
|
(20 - IO_TLB_SHIFT);
|
|
- iotlb_nslabs = ALIGN(iotlb_nslabs, IO_TLB_SEGSIZE);
|
|
+ io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
|
|
}
|
|
if (*str == ',')
|
|
++str;
|
|
@@ -129,35 +119,17 @@ setup_io_tlb_npages(char *str)
|
|
__setup("swiotlb=", setup_io_tlb_npages);
|
|
/* make io_tlb_overflow tunable too? */
|
|
|
|
-/*
|
|
- * Statically reserve bounce buffer space and initialize bounce buffer data
|
|
- * structures for the software IO TLB used to implement the PCI DMA API.
|
|
- */
|
|
-void __init
|
|
-swiotlb_init_with_default_size(size_t default_size)
|
|
+void *__init swiotlb_alloc_boot(size_t size, unsigned long nslabs)
|
|
{
|
|
- unsigned long i, bytes;
|
|
+ void *start = alloc_bootmem_pages(size);
|
|
+ unsigned int i;
|
|
int rc;
|
|
|
|
- if (!iotlb_nslabs) {
|
|
- iotlb_nslabs = (default_size >> IO_TLB_SHIFT);
|
|
- iotlb_nslabs = ALIGN(iotlb_nslabs, IO_TLB_SEGSIZE);
|
|
- }
|
|
-
|
|
- bytes = iotlb_nslabs * (1UL << IO_TLB_SHIFT);
|
|
-
|
|
- /*
|
|
- * Get IO TLB memory from the low pages
|
|
- */
|
|
- io_tlb_start = alloc_bootmem_pages(bytes);
|
|
- if (!io_tlb_start)
|
|
- panic("Cannot allocate SWIOTLB buffer!\n");
|
|
-
|
|
dma_bits = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT) + PAGE_SHIFT;
|
|
- for (i = 0; i < iotlb_nslabs; i += IO_TLB_SEGSIZE) {
|
|
+ for (i = 0; i < nslabs; i += IO_TLB_SEGSIZE) {
|
|
do {
|
|
rc = xen_create_contiguous_region(
|
|
- (unsigned long)io_tlb_start + (i << IO_TLB_SHIFT),
|
|
+ (unsigned long)start + (i << IO_TLB_SHIFT),
|
|
get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT),
|
|
dma_bits);
|
|
} while (rc && dma_bits++ < max_dma_bits);
|
|
@@ -166,12 +138,12 @@ swiotlb_init_with_default_size(size_t de
|
|
panic("No suitable physical memory available for SWIOTLB buffer!\n"
|
|
"Use dom0_mem Xen boot parameter to reserve\n"
|
|
"some DMA memory (e.g., dom0_mem=-128M).\n");
|
|
- iotlb_nslabs = i;
|
|
+ io_tlb_nslabs = i;
|
|
i <<= IO_TLB_SHIFT;
|
|
- free_bootmem(__pa(io_tlb_start + i), bytes - i);
|
|
- bytes = i;
|
|
+ free_bootmem(__pa(start + i), size - i);
|
|
+ size = i;
|
|
for (dma_bits = 0; i > 0; i -= IO_TLB_SEGSIZE << IO_TLB_SHIFT) {
|
|
- unsigned int bits = fls64(virt_to_bus(io_tlb_start + i - 1));
|
|
+ unsigned int bits = fls64(virt_to_bus(start + i - 1));
|
|
|
|
if (bits > dma_bits)
|
|
dma_bits = bits;
|
|
@@ -179,18 +151,88 @@ swiotlb_init_with_default_size(size_t de
|
|
break;
|
|
}
|
|
}
|
|
+
|
|
+ return start;
|
|
+}
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+void * __weak swiotlb_alloc(unsigned order, unsigned long nslabs)
|
|
+{
|
|
+ return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order);
|
|
+}
|
|
+#endif
|
|
+
|
|
+dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr)
|
|
+{
|
|
+ return phys_to_machine(paddr);
|
|
+}
|
|
+
|
|
+phys_addr_t swiotlb_bus_to_phys(dma_addr_t baddr)
|
|
+{
|
|
+ return machine_to_phys(baddr);
|
|
+}
|
|
+
|
|
+static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
|
|
+ volatile void *address)
|
|
+{
|
|
+ return swiotlb_phys_to_bus(hwdev, virt_to_phys(address));
|
|
+}
|
|
+
|
|
+static void *swiotlb_bus_to_virt(dma_addr_t address)
|
|
+{
|
|
+ return phys_to_virt(swiotlb_bus_to_phys(address));
|
|
+}
|
|
+
|
|
+int __weak swiotlb_arch_range_needs_mapping(void *ptr, size_t size)
|
|
+{
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void swiotlb_print_info(unsigned long bytes)
|
|
+{
|
|
+ printk(KERN_INFO "Software IO TLB enabled: \n"
|
|
+ " Aperture: %lu megabytes\n"
|
|
+ " Address size: %u bits\n"
|
|
+ " Kernel range: %p - %p\n",
|
|
+ bytes >> 20, dma_bits,
|
|
+ io_tlb_start, io_tlb_end);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Statically reserve bounce buffer space and initialize bounce buffer data
|
|
+ * structures for the software IO TLB used to implement the PCI DMA API.
|
|
+ */
|
|
+void __init
|
|
+swiotlb_init_with_default_size(size_t default_size)
|
|
+{
|
|
+ unsigned long i, bytes;
|
|
+ int rc;
|
|
+
|
|
+ if (!io_tlb_nslabs) {
|
|
+ io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
|
|
+ io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
|
|
+ }
|
|
+
|
|
+ bytes = io_tlb_nslabs << IO_TLB_SHIFT;
|
|
+
|
|
+ /*
|
|
+ * Get IO TLB memory from the low pages
|
|
+ */
|
|
+ io_tlb_start = swiotlb_alloc_boot(bytes, io_tlb_nslabs);
|
|
+ if (!io_tlb_start)
|
|
+ panic("Cannot allocate SWIOTLB buffer!\n");
|
|
+ bytes = io_tlb_nslabs << IO_TLB_SHIFT;
|
|
io_tlb_end = io_tlb_start + bytes;
|
|
|
|
/*
|
|
* Allocate and initialize the free list array. This array is used
|
|
* to find contiguous free memory regions of size up to IO_TLB_SEGSIZE.
|
|
*/
|
|
- io_tlb_list = alloc_bootmem(iotlb_nslabs * sizeof(int));
|
|
- for (i = 0; i < iotlb_nslabs; i++)
|
|
+ io_tlb_list = alloc_bootmem(io_tlb_nslabs * sizeof(int));
|
|
+ for (i = 0; i < io_tlb_nslabs; i++)
|
|
io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
|
|
io_tlb_index = 0;
|
|
- io_tlb_orig_addr = alloc_bootmem(
|
|
- iotlb_nslabs * sizeof(*io_tlb_orig_addr));
|
|
+ io_tlb_orig_addr = alloc_bootmem(io_tlb_nslabs * sizeof(phys_addr_t));
|
|
|
|
/*
|
|
* Get the overflow emergency buffer
|
|
@@ -208,13 +250,7 @@ swiotlb_init_with_default_size(size_t de
|
|
if (rc)
|
|
panic("No suitable physical memory available for SWIOTLB overflow buffer!\n");
|
|
|
|
- printk(KERN_INFO "Software IO TLB enabled: \n"
|
|
- " Aperture: %lu megabytes\n"
|
|
- " Kernel range: %p - %p\n"
|
|
- " Address size: %u bits\n",
|
|
- bytes >> 20,
|
|
- io_tlb_start, io_tlb_end,
|
|
- dma_bits);
|
|
+ swiotlb_print_info(bytes);
|
|
}
|
|
|
|
void __init
|
|
@@ -241,6 +277,11 @@ swiotlb_init(void)
|
|
printk(KERN_INFO "Software IO TLB disabled\n");
|
|
}
|
|
|
|
+static inline int range_needs_mapping(phys_addr_t pa, size_t size)
|
|
+{
|
|
+ return range_straddles_page_boundary(pa, size);
|
|
+}
|
|
+
|
|
static int is_swiotlb_buffer(dma_addr_t addr)
|
|
{
|
|
unsigned long pfn = mfn_to_local_pfn(PFN_DOWN(addr));
|
|
@@ -254,46 +295,50 @@ static int is_swiotlb_buffer(dma_addr_t
|
|
}
|
|
|
|
/*
|
|
+ * Bounce: copy the swiotlb buffer back to the original dma location
|
|
+ *
|
|
* We use __copy_to_user_inatomic to transfer to the host buffer because the
|
|
* buffer may be mapped read-only (e.g, in blkback driver) but lower-level
|
|
* drivers map the buffer for DMA_BIDIRECTIONAL access. This causes an
|
|
* unnecessary copy from the aperture to the host buffer, and a page fault.
|
|
*/
|
|
-static void
|
|
-__sync_single(struct phys_addr buffer, char *dma_addr, size_t size, int dir)
|
|
+static void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
|
|
+ enum dma_data_direction dir)
|
|
{
|
|
- if (PageHighMem(buffer.page)) {
|
|
- size_t len, bytes;
|
|
- char *dev, *host, *kmp;
|
|
- len = size;
|
|
- while (len != 0) {
|
|
- unsigned long flags;
|
|
-
|
|
- if (((bytes = len) + buffer.offset) > PAGE_SIZE)
|
|
- bytes = PAGE_SIZE - buffer.offset;
|
|
- local_irq_save(flags); /* protects KM_BOUNCE_READ */
|
|
- kmp = kmap_atomic(buffer.page, KM_BOUNCE_READ);
|
|
- dev = dma_addr + size - len;
|
|
- host = kmp + buffer.offset;
|
|
- if (dir == DMA_FROM_DEVICE) {
|
|
- if (__copy_to_user_inatomic(host, dev, bytes))
|
|
- /* inaccessible */;
|
|
- } else
|
|
- memcpy(dev, host, bytes);
|
|
- kunmap_atomic(kmp, KM_BOUNCE_READ);
|
|
+ unsigned long pfn = PFN_DOWN(phys);
|
|
+
|
|
+ if (PageHighMem(pfn_to_page(pfn))) {
|
|
+ /* The buffer does not have a mapping. Map it in and copy */
|
|
+ unsigned int offset = phys & ~PAGE_MASK;
|
|
+ char *buffer;
|
|
+ unsigned int sz = 0;
|
|
+ unsigned long flags;
|
|
+
|
|
+ while (size) {
|
|
+ sz = min((size_t)(PAGE_SIZE - offset), size);
|
|
+
|
|
+ local_irq_save(flags);
|
|
+ buffer = kmap_atomic(pfn_to_page(pfn),
|
|
+ KM_BOUNCE_READ);
|
|
+ if (dir == DMA_TO_DEVICE)
|
|
+ memcpy(dma_addr, buffer + offset, sz);
|
|
+ else if (__copy_to_user_inatomic(buffer + offset,
|
|
+ dma_addr, sz))
|
|
+ /* inaccessible */;
|
|
+ kunmap_atomic(buffer, KM_BOUNCE_READ);
|
|
local_irq_restore(flags);
|
|
- len -= bytes;
|
|
- buffer.page++;
|
|
- buffer.offset = 0;
|
|
+
|
|
+ size -= sz;
|
|
+ pfn++;
|
|
+ dma_addr += sz;
|
|
+ offset = 0;
|
|
}
|
|
} else {
|
|
- char *host = (char *)phys_to_virt(
|
|
- page_to_pseudophys(buffer.page)) + buffer.offset;
|
|
- if (dir == DMA_FROM_DEVICE) {
|
|
- if (__copy_to_user_inatomic(host, dma_addr, size))
|
|
- /* inaccessible */;
|
|
- } else if (dir == DMA_TO_DEVICE)
|
|
- memcpy(dma_addr, host, size);
|
|
+ if (dir == DMA_TO_DEVICE)
|
|
+ memcpy(dma_addr, phys_to_virt(phys), size);
|
|
+ else if (__copy_to_user_inatomic(phys_to_virt(phys),
|
|
+ dma_addr, size))
|
|
+ /* inaccessible */;
|
|
}
|
|
}
|
|
|
|
@@ -301,12 +346,11 @@ __sync_single(struct phys_addr buffer, c
|
|
* Allocates bounce buffer and returns its kernel virtual address.
|
|
*/
|
|
static void *
|
|
-map_single(struct device *hwdev, struct phys_addr buffer, size_t size, int dir)
|
|
+map_single(struct device *hwdev, phys_addr_t phys, size_t size, int dir)
|
|
{
|
|
unsigned long flags;
|
|
char *dma_addr;
|
|
unsigned int nslots, stride, index, wrap;
|
|
- struct phys_addr slot_buf;
|
|
int i;
|
|
unsigned long mask;
|
|
unsigned long offset_slots;
|
|
@@ -314,6 +358,10 @@ map_single(struct device *hwdev, struct
|
|
|
|
mask = dma_get_seg_boundary(hwdev);
|
|
offset_slots = -IO_TLB_SEGSIZE;
|
|
+
|
|
+ /*
|
|
+ * Carefully handle integer overflow which can occur when mask == ~0UL.
|
|
+ */
|
|
max_slots = mask + 1
|
|
? ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT
|
|
: 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
|
|
@@ -336,7 +384,7 @@ map_single(struct device *hwdev, struct
|
|
*/
|
|
spin_lock_irqsave(&io_tlb_lock, flags);
|
|
index = ALIGN(io_tlb_index, stride);
|
|
- if (index >= iotlb_nslabs)
|
|
+ if (index >= io_tlb_nslabs)
|
|
index = 0;
|
|
wrap = index;
|
|
|
|
@@ -344,7 +392,7 @@ map_single(struct device *hwdev, struct
|
|
while (iommu_is_span_boundary(index, nslots, offset_slots,
|
|
max_slots)) {
|
|
index += stride;
|
|
- if (index >= iotlb_nslabs)
|
|
+ if (index >= io_tlb_nslabs)
|
|
index = 0;
|
|
if (index == wrap)
|
|
goto not_found;
|
|
@@ -368,13 +416,13 @@ map_single(struct device *hwdev, struct
|
|
* Update the indices to avoid searching in the next
|
|
* round.
|
|
*/
|
|
- io_tlb_index = ((index + nslots) < iotlb_nslabs
|
|
+ io_tlb_index = ((index + nslots) < io_tlb_nslabs
|
|
? (index + nslots) : 0);
|
|
|
|
goto found;
|
|
}
|
|
index += stride;
|
|
- if (index >= iotlb_nslabs)
|
|
+ if (index >= io_tlb_nslabs)
|
|
index = 0;
|
|
} while (index != wrap);
|
|
|
|
@@ -389,29 +437,14 @@ found:
|
|
* This is needed when we sync the memory. Then we sync the buffer if
|
|
* needed.
|
|
*/
|
|
- slot_buf = buffer;
|
|
- for (i = 0; i < nslots; i++) {
|
|
- slot_buf.page += slot_buf.offset >> PAGE_SHIFT;
|
|
- slot_buf.offset &= PAGE_SIZE - 1;
|
|
- io_tlb_orig_addr[index+i] = slot_buf;
|
|
- slot_buf.offset += 1 << IO_TLB_SHIFT;
|
|
- }
|
|
- if ((dir == DMA_TO_DEVICE) || (dir == DMA_BIDIRECTIONAL))
|
|
- __sync_single(buffer, dma_addr, size, DMA_TO_DEVICE);
|
|
+ for (i = 0; i < nslots; i++)
|
|
+ io_tlb_orig_addr[index+i] = phys + (i << IO_TLB_SHIFT);
|
|
+ if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
|
|
+ swiotlb_bounce(phys, dma_addr, size, DMA_TO_DEVICE);
|
|
|
|
return dma_addr;
|
|
}
|
|
|
|
-static struct phys_addr dma_addr_to_phys_addr(char *dma_addr)
|
|
-{
|
|
- int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
|
|
- struct phys_addr buffer = io_tlb_orig_addr[index];
|
|
- buffer.offset += (long)dma_addr & ((1 << IO_TLB_SHIFT) - 1);
|
|
- buffer.page += buffer.offset >> PAGE_SHIFT;
|
|
- buffer.offset &= PAGE_SIZE - 1;
|
|
- return buffer;
|
|
-}
|
|
-
|
|
/*
|
|
* dma_addr is the kernel virtual address of the bounce buffer to unmap.
|
|
*/
|
|
@@ -421,13 +454,13 @@ unmap_single(struct device *hwdev, char
|
|
unsigned long flags;
|
|
int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
|
|
int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
|
|
- struct phys_addr buffer = dma_addr_to_phys_addr(dma_addr);
|
|
+ phys_addr_t phys = io_tlb_orig_addr[index];
|
|
|
|
/*
|
|
* First, sync the memory before unmapping the entry
|
|
*/
|
|
- if ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))
|
|
- __sync_single(buffer, dma_addr, size, DMA_FROM_DEVICE);
|
|
+ if (phys && ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
|
|
+ swiotlb_bounce(phys, dma_addr, size, DMA_FROM_DEVICE);
|
|
|
|
/*
|
|
* Return the buffer to the free list by setting the corresponding
|
|
@@ -462,17 +495,21 @@ static void
|
|
sync_single(struct device *hwdev, char *dma_addr, size_t size,
|
|
int dir, int target)
|
|
{
|
|
- struct phys_addr buffer = dma_addr_to_phys_addr(dma_addr);
|
|
+ int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
|
|
+ phys_addr_t phys = io_tlb_orig_addr[index];
|
|
+
|
|
+ phys += ((unsigned long)dma_addr & ((1 << IO_TLB_SHIFT) - 1));
|
|
+
|
|
switch (target) {
|
|
case SYNC_FOR_CPU:
|
|
if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
|
|
- __sync_single(buffer, dma_addr, size, DMA_FROM_DEVICE);
|
|
+ swiotlb_bounce(phys, dma_addr, size, DMA_FROM_DEVICE);
|
|
else
|
|
BUG_ON(dir != DMA_TO_DEVICE);
|
|
break;
|
|
case SYNC_FOR_DEVICE:
|
|
if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
|
|
- __sync_single(buffer, dma_addr, size, DMA_TO_DEVICE);
|
|
+ swiotlb_bounce(phys, dma_addr, size, DMA_TO_DEVICE);
|
|
else
|
|
BUG_ON(dir != DMA_FROM_DEVICE);
|
|
break;
|
|
@@ -492,7 +529,7 @@ swiotlb_full(struct device *dev, size_t
|
|
* the damage, or panic when the transfer is too big.
|
|
*/
|
|
printk(KERN_ERR "PCI-DMA: Out of SW-IOMMU space for %zu bytes at "
|
|
- "device %s\n", size, dev ? dev->bus_id : "?");
|
|
+ "device %s\n", size, dev ? dev_name(dev) : "?");
|
|
|
|
if (size > io_tlb_overflow && do_panic) {
|
|
if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL)
|
|
@@ -517,7 +554,6 @@ _swiotlb_map_single(struct device *hwdev
|
|
dma_addr_t dev_addr = gnttab_dma_map_page(page) +
|
|
offset_in_page(paddr);
|
|
void *map;
|
|
- struct phys_addr buffer;
|
|
|
|
BUG_ON(dir == DMA_NONE);
|
|
|
|
@@ -526,23 +562,21 @@ _swiotlb_map_single(struct device *hwdev
|
|
* we can safely return the device addr and not worry about bounce
|
|
* buffering it.
|
|
*/
|
|
- if (!range_straddles_page_boundary(paddr, size) &&
|
|
- !address_needs_mapping(hwdev, dev_addr, size))
|
|
+ if (!address_needs_mapping(hwdev, dev_addr, size) &&
|
|
+ !range_needs_mapping(paddr, size))
|
|
return dev_addr;
|
|
|
|
/*
|
|
* Oh well, have to allocate and map a bounce buffer.
|
|
*/
|
|
gnttab_dma_unmap_page(dev_addr);
|
|
- buffer.page = page;
|
|
- buffer.offset = offset_in_page(paddr);
|
|
- map = map_single(hwdev, buffer, size, dir);
|
|
+ map = map_single(hwdev, paddr, size, dir);
|
|
if (!map) {
|
|
swiotlb_full(hwdev, size, dir, 1);
|
|
map = io_tlb_overflow_buffer;
|
|
}
|
|
|
|
- dev_addr = virt_to_bus(map);
|
|
+ dev_addr = swiotlb_virt_to_bus(hwdev, map);
|
|
return dev_addr;
|
|
}
|
|
|
|
@@ -559,6 +593,7 @@ swiotlb_map_single(struct device *hwdev,
|
|
{
|
|
return _swiotlb_map_single(hwdev, virt_to_phys(ptr), size, dir, NULL);
|
|
}
|
|
+EXPORT_SYMBOL(swiotlb_map_single);
|
|
|
|
dma_addr_t
|
|
swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size, int dir)
|
|
@@ -578,7 +613,7 @@ void
|
|
swiotlb_unmap_single_attrs(struct device *hwdev, dma_addr_t dev_addr,
|
|
size_t size, int dir, struct dma_attrs *attrs)
|
|
{
|
|
- char *dma_addr = bus_to_virt(dev_addr);
|
|
+ char *dma_addr = swiotlb_bus_to_virt(dev_addr);
|
|
|
|
BUG_ON(dir == DMA_NONE);
|
|
if (is_swiotlb_buffer(dev_addr))
|
|
@@ -594,6 +629,8 @@ swiotlb_unmap_single(struct device *hwde
|
|
{
|
|
return swiotlb_unmap_single_attrs(hwdev, dev_addr, size, dir, NULL);
|
|
}
|
|
+EXPORT_SYMBOL(swiotlb_unmap_single);
|
|
+
|
|
/*
|
|
* Make physical memory consistent for a single streaming mode DMA translation
|
|
* after a transfer.
|
|
@@ -608,7 +645,7 @@ static void
|
|
swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
|
|
size_t size, int dir, int target)
|
|
{
|
|
- char *dma_addr = bus_to_virt(dev_addr);
|
|
+ char *dma_addr = swiotlb_bus_to_virt(dev_addr);
|
|
|
|
BUG_ON(dir == DMA_NONE);
|
|
if (is_swiotlb_buffer(dev_addr))
|
|
@@ -621,6 +658,7 @@ swiotlb_sync_single_for_cpu(struct devic
|
|
{
|
|
swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU);
|
|
}
|
|
+EXPORT_SYMBOL(swiotlb_sync_single_for_cpu);
|
|
|
|
void
|
|
swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
|
|
@@ -628,6 +666,7 @@ swiotlb_sync_single_for_device(struct de
|
|
{
|
|
swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE);
|
|
}
|
|
+EXPORT_SYMBOL(swiotlb_sync_single_for_device);
|
|
|
|
/*
|
|
* Same as above, but for a sub-range of the mapping.
|
|
@@ -637,7 +676,7 @@ swiotlb_sync_single_range(struct device
|
|
unsigned long offset, size_t size,
|
|
int dir, int target)
|
|
{
|
|
- char *dma_addr = bus_to_virt(dev_addr);
|
|
+ char *dma_addr = swiotlb_bus_to_virt(dev_addr);
|
|
|
|
BUG_ON(dir == DMA_NONE);
|
|
if (is_swiotlb_buffer(dev_addr))
|
|
@@ -651,6 +690,7 @@ swiotlb_sync_single_range_for_cpu(struct
|
|
swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir,
|
|
SYNC_FOR_CPU);
|
|
}
|
|
+EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_cpu);
|
|
|
|
void
|
|
swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr,
|
|
@@ -659,9 +699,8 @@ swiotlb_sync_single_range_for_device(str
|
|
swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir,
|
|
SYNC_FOR_DEVICE);
|
|
}
|
|
+EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device);
|
|
|
|
-void swiotlb_unmap_sg_attrs(struct device *, struct scatterlist *, int, int,
|
|
- struct dma_attrs *);
|
|
/*
|
|
* Map a set of buffers described by scatterlist in streaming mode for DMA.
|
|
* This is the scatter-gather version of the above swiotlb_map_single
|
|
@@ -683,23 +722,23 @@ swiotlb_map_sg_attrs(struct device *hwde
|
|
int dir, struct dma_attrs *attrs)
|
|
{
|
|
struct scatterlist *sg;
|
|
- struct phys_addr buffer;
|
|
- dma_addr_t dev_addr;
|
|
- char *map;
|
|
int i;
|
|
|
|
BUG_ON(dir == DMA_NONE);
|
|
|
|
for_each_sg(sgl, sg, nelems, i) {
|
|
- dev_addr = gnttab_dma_map_page(sg_page(sg)) + sg->offset;
|
|
+ dma_addr_t dev_addr = gnttab_dma_map_page(sg_page(sg))
|
|
+ + sg->offset;
|
|
+ phys_addr_t paddr = page_to_pseudophys(sg_page(sg))
|
|
+ + sg->offset;
|
|
|
|
- if (range_straddles_page_boundary(page_to_pseudophys(sg_page(sg))
|
|
- + sg->offset, sg->length)
|
|
+ if (range_needs_mapping(paddr, sg->length)
|
|
|| address_needs_mapping(hwdev, dev_addr, sg->length)) {
|
|
+ void *map;
|
|
+
|
|
gnttab_dma_unmap_page(dev_addr);
|
|
- buffer.page = sg_page(sg);
|
|
- buffer.offset = sg->offset;
|
|
- map = map_single(hwdev, buffer, sg->length, dir);
|
|
+ map = map_single(hwdev, paddr,
|
|
+ sg->length, dir);
|
|
if (!map) {
|
|
/* Don't panic here, we expect map_sg users
|
|
to do proper error handling. */
|
|
@@ -709,7 +748,7 @@ swiotlb_map_sg_attrs(struct device *hwde
|
|
sgl[0].dma_length = 0;
|
|
return 0;
|
|
}
|
|
- sg->dma_address = virt_to_bus(map);
|
|
+ sg->dma_address = swiotlb_virt_to_bus(hwdev, map);
|
|
} else
|
|
sg->dma_address = dev_addr;
|
|
sg->dma_length = sg->length;
|
|
@@ -724,6 +763,7 @@ swiotlb_map_sg(struct device *hwdev, str
|
|
{
|
|
return swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, NULL);
|
|
}
|
|
+EXPORT_SYMBOL(swiotlb_map_sg);
|
|
|
|
/*
|
|
* Unmap a set of streaming mode DMA translations. Again, cpu read rules
|
|
@@ -740,7 +780,7 @@ swiotlb_unmap_sg_attrs(struct device *hw
|
|
|
|
for_each_sg(sgl, sg, nelems, i) {
|
|
if (sg->dma_address != sg_phys(sg))
|
|
- unmap_single(hwdev, bus_to_virt(sg->dma_address),
|
|
+ unmap_single(hwdev, swiotlb_bus_to_virt(sg->dma_address),
|
|
sg->dma_length, dir);
|
|
else
|
|
gnttab_dma_unmap_page(sg->dma_address);
|
|
@@ -754,6 +794,7 @@ swiotlb_unmap_sg(struct device *hwdev, s
|
|
{
|
|
return swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, NULL);
|
|
}
|
|
+EXPORT_SYMBOL(swiotlb_unmap_sg);
|
|
|
|
/*
|
|
* Make physical memory consistent for a set of streaming mode DMA translations
|
|
@@ -773,7 +814,7 @@ swiotlb_sync_sg(struct device *hwdev, st
|
|
|
|
for_each_sg(sgl, sg, nelems, i) {
|
|
if (sg->dma_address != sg_phys(sg))
|
|
- sync_single(hwdev, bus_to_virt(sg->dma_address),
|
|
+ sync_single(hwdev, swiotlb_bus_to_virt(sg->dma_address),
|
|
sg->dma_length, dir, target);
|
|
}
|
|
}
|
|
@@ -784,6 +825,7 @@ swiotlb_sync_sg_for_cpu(struct device *h
|
|
{
|
|
swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU);
|
|
}
|
|
+EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu);
|
|
|
|
void
|
|
swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
|
|
@@ -791,12 +833,14 @@ swiotlb_sync_sg_for_device(struct device
|
|
{
|
|
swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE);
|
|
}
|
|
+EXPORT_SYMBOL(swiotlb_sync_sg_for_device);
|
|
|
|
int
|
|
swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
|
|
{
|
|
- return (dma_addr == virt_to_bus(io_tlb_overflow_buffer));
|
|
+ return (dma_addr == swiotlb_virt_to_bus(hwdev, io_tlb_overflow_buffer));
|
|
}
|
|
+EXPORT_SYMBOL(swiotlb_dma_mapping_error);
|
|
|
|
/*
|
|
* Return whether the given PCI device DMA address mask can be supported
|
|
@@ -809,14 +853,4 @@ swiotlb_dma_supported (struct device *hw
|
|
{
|
|
return (mask >= ((1UL << dma_bits) - 1));
|
|
}
|
|
-
|
|
-EXPORT_SYMBOL(swiotlb_map_single);
|
|
-EXPORT_SYMBOL(swiotlb_unmap_single);
|
|
-EXPORT_SYMBOL(swiotlb_map_sg);
|
|
-EXPORT_SYMBOL(swiotlb_unmap_sg);
|
|
-EXPORT_SYMBOL(swiotlb_sync_single_for_cpu);
|
|
-EXPORT_SYMBOL(swiotlb_sync_single_for_device);
|
|
-EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu);
|
|
-EXPORT_SYMBOL(swiotlb_sync_sg_for_device);
|
|
-EXPORT_SYMBOL(swiotlb_dma_mapping_error);
|
|
EXPORT_SYMBOL(swiotlb_dma_supported);
|