23573 lines
622 KiB
Plaintext
23573 lines
622 KiB
Plaintext
From: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>
|
|
Subject: [PATCH] Linux: Update to 2.6.28
|
|
Patch-mainline: 2.6.28
|
|
|
|
This patch contains the differences between Linux 2.6.27 and 2.6.28.
|
|
|
|
Acked-by: Jeff Mahoney <jeffm@suse.com>
|
|
Automatically created from "patches.kernel.org/patch-2.6.28" by xen-port-patches.py
|
|
|
|
--- head-2011-03-17.orig/arch/ia64/Kconfig 2011-03-17 14:35:46.000000000 +0100
|
|
+++ head-2011-03-17/arch/ia64/Kconfig 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -230,7 +230,7 @@ config IA64_HP_SIM
|
|
config IA64_XEN_GUEST
|
|
bool "Xen guest"
|
|
select SWIOTLB
|
|
- depends on XEN
|
|
+ depends on PARAVIRT_XEN
|
|
help
|
|
Build a kernel that runs on Xen guest domain. At this moment only
|
|
16KB page size in supported.
|
|
--- head-2011-03-17.orig/arch/ia64/Makefile 2011-03-17 14:35:46.000000000 +0100
|
|
+++ head-2011-03-17/arch/ia64/Makefile 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -55,7 +55,7 @@ core-$(CONFIG_IA64_XEN_GUEST) += arch/ia
|
|
core-$(CONFIG_IA64_SGI_SN2) += arch/ia64/sn/
|
|
core-$(CONFIG_IA64_SGI_UV) += arch/ia64/uv/
|
|
core-$(CONFIG_KVM) += arch/ia64/kvm/
|
|
-core-$(CONFIG_XEN) += arch/ia64/xen/
|
|
+core-$(CONFIG_PARAVIRT_XEN) += arch/ia64/xen/
|
|
|
|
drivers-$(CONFIG_PCI) += arch/ia64/pci/
|
|
drivers-$(CONFIG_IA64_HP_SIM) += arch/ia64/hp/sim/
|
|
--- head-2011-03-17.orig/arch/ia64/include/asm/xen/hypervisor.h 2011-03-17 14:35:46.000000000 +0100
|
|
+++ head-2011-03-17/arch/ia64/include/asm/xen/hypervisor.h 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -40,7 +40,7 @@
|
|
#include <xen/xen.h>
|
|
#include <asm/xen/hypercall.h>
|
|
|
|
-#ifdef CONFIG_XEN
|
|
+#ifdef CONFIG_PARAVIRT_XEN
|
|
extern struct shared_info *HYPERVISOR_shared_info;
|
|
extern struct start_info *xen_start_info;
|
|
|
|
--- head-2011-03-17.orig/arch/ia64/include/asm/xen/interface.h 2011-03-17 14:35:46.000000000 +0100
|
|
+++ head-2011-03-17/arch/ia64/include/asm/xen/interface.h 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -56,29 +56,21 @@
|
|
#ifndef _ASM_IA64_XEN_INTERFACE_H
|
|
#define _ASM_IA64_XEN_INTERFACE_H
|
|
|
|
-#define __DEFINE_GUEST_HANDLE(name, type) \
|
|
+#define __DEFINE_XEN_GUEST_HANDLE(name, type) \
|
|
typedef struct { type *p; } __guest_handle_ ## name
|
|
|
|
#define DEFINE_GUEST_HANDLE_STRUCT(name) \
|
|
- __DEFINE_GUEST_HANDLE(name, struct name)
|
|
-#define DEFINE_GUEST_HANDLE(name) __DEFINE_GUEST_HANDLE(name, name)
|
|
-#define GUEST_HANDLE(name) __guest_handle_ ## name
|
|
-#define GUEST_HANDLE_64(name) GUEST_HANDLE(name)
|
|
+ __DEFINE_XEN_GUEST_HANDLE(name, struct name)
|
|
+#define DEFINE_XEN_GUEST_HANDLE(name) __DEFINE_XEN_GUEST_HANDLE(name, name)
|
|
+#define XEN_GUEST_HANDLE(name) __guest_handle_ ## name
|
|
+#define XEN_GUEST_HANDLE_64(name) XEN_GUEST_HANDLE(name)
|
|
#define set_xen_guest_handle(hnd, val) do { (hnd).p = val; } while (0)
|
|
|
|
#ifndef __ASSEMBLY__
|
|
-/* Guest handles for primitive C types. */
|
|
-__DEFINE_GUEST_HANDLE(uchar, unsigned char);
|
|
-__DEFINE_GUEST_HANDLE(uint, unsigned int);
|
|
-__DEFINE_GUEST_HANDLE(ulong, unsigned long);
|
|
-__DEFINE_GUEST_HANDLE(u64, unsigned long);
|
|
-DEFINE_GUEST_HANDLE(char);
|
|
-DEFINE_GUEST_HANDLE(int);
|
|
-DEFINE_GUEST_HANDLE(long);
|
|
-DEFINE_GUEST_HANDLE(void);
|
|
+__DEFINE_XEN_GUEST_HANDLE(u64, unsigned long);
|
|
|
|
+typedef unsigned long xen_ulong_t;
|
|
typedef unsigned long xen_pfn_t;
|
|
-DEFINE_GUEST_HANDLE(xen_pfn_t);
|
|
#define PRI_xen_pfn "lx"
|
|
#endif
|
|
|
|
@@ -90,7 +82,7 @@ DEFINE_GUEST_HANDLE(xen_pfn_t);
|
|
/* Maximum number of virtual CPUs in multi-processor guests. */
|
|
/* keep sizeof(struct shared_page) <= PAGE_SIZE.
|
|
* this is checked in arch/ia64/xen/hypervisor.c. */
|
|
-#define MAX_VIRT_CPUS 64
|
|
+#define XEN_LEGACY_MAX_VCPUS 64
|
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
--- head-2011-03-17.orig/arch/ia64/kernel/asm-offsets.c 2011-03-17 14:35:46.000000000 +0100
|
|
+++ head-2011-03-17/arch/ia64/kernel/asm-offsets.c 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -290,7 +290,7 @@ void foo(void)
|
|
DEFINE(IA64_ITC_LASTCYCLE_OFFSET,
|
|
offsetof (struct itc_jitter_data_t, itc_lastcycle));
|
|
|
|
-#ifdef CONFIG_XEN
|
|
+#ifdef CONFIG_PARAVIRT_XEN
|
|
BLANK();
|
|
|
|
DEFINE(XEN_NATIVE_ASM, XEN_NATIVE);
|
|
--- head-2011-03-17.orig/arch/ia64/xen/Kconfig 2011-03-17 14:35:46.000000000 +0100
|
|
+++ head-2011-03-17/arch/ia64/xen/Kconfig 2011-02-02 15:36:46.000000000 +0100
|
|
@@ -2,7 +2,7 @@
|
|
# This Kconfig describes xen/ia64 options
|
|
#
|
|
|
|
-config XEN
|
|
+config PARAVIRT_XEN
|
|
bool "Xen hypervisor support"
|
|
default y
|
|
depends on PARAVIRT && MCKINLEY && IA64_PAGE_SIZE_16KB && EXPERIMENTAL
|
|
@@ -16,10 +16,6 @@ config XEN
|
|
Enable Xen hypervisor support. Resulting kernel runs
|
|
both as a guest OS on Xen and natively on hardware.
|
|
|
|
-config XEN_XENCOMM
|
|
- depends on XEN
|
|
- bool
|
|
-
|
|
config NO_IDLE_HZ
|
|
- depends on XEN
|
|
+ depends on PARAVIRT_XEN
|
|
bool
|
|
--- head-2011-03-17.orig/arch/ia64/xen/xcom_hcall.c 2011-03-17 14:35:46.000000000 +0100
|
|
+++ head-2011-03-17/arch/ia64/xen/xcom_hcall.c 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -343,7 +343,7 @@ xencommize_memory_reservation(struct xen
|
|
int
|
|
xencomm_hypercall_memory_op(unsigned int cmd, void *arg)
|
|
{
|
|
- GUEST_HANDLE(xen_pfn_t) extent_start_va[2] = { {NULL}, {NULL} };
|
|
+ XEN_GUEST_HANDLE(xen_pfn_t) extent_start_va[2] = { {NULL}, {NULL} };
|
|
struct xen_memory_reservation *xmr = NULL;
|
|
int rc;
|
|
struct xencomm_handle *desc;
|
|
--- head-2011-03-17.orig/arch/x86/Kconfig 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/Kconfig 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -1028,7 +1028,7 @@ config MICROCODE
|
|
|
|
config MICROCODE_INTEL
|
|
bool "Intel microcode patch loading support"
|
|
- depends on MICROCODE
|
|
+ depends on MICROCODE && !XEN
|
|
default MICROCODE
|
|
select FW_LOADER
|
|
---help---
|
|
@@ -1041,7 +1041,7 @@ config MICROCODE_INTEL
|
|
|
|
config MICROCODE_AMD
|
|
bool "AMD microcode patch loading support"
|
|
- depends on MICROCODE
|
|
+ depends on MICROCODE && !XEN
|
|
select FW_LOADER
|
|
---help---
|
|
If you select this option, microcode patch loading support for AMD
|
|
@@ -1342,6 +1342,7 @@ config HIGHPTE
|
|
|
|
config X86_CHECK_BIOS_CORRUPTION
|
|
bool "Check for low memory corruption"
|
|
+ depends on !XEN
|
|
---help---
|
|
Periodically check for memory corruption in low memory, which
|
|
is suspected to be caused by BIOS. Even when enabled in the
|
|
@@ -1372,6 +1373,7 @@ config X86_BOOTPARAM_MEMORY_CORRUPTION_C
|
|
|
|
config X86_RESERVE_LOW
|
|
int "Amount of low memory, in kilobytes, to reserve for the BIOS"
|
|
+ depends on !XEN
|
|
default 64
|
|
range 4 640
|
|
---help---
|
|
@@ -1495,8 +1497,8 @@ config MTRR_SANITIZER_SPARE_REG_NR_DEFAU
|
|
|
|
config X86_PAT
|
|
def_bool y
|
|
- prompt "x86 PAT support" if EXPERT
|
|
- depends on MTRR
|
|
+ prompt "x86 PAT support" if EXPERT || XEN_UNPRIVILEGED_GUEST
|
|
+ depends on MTRR || (XEN_UNPRIVILEGED_GUEST && XEN_PCIDEV_FRONTEND)
|
|
---help---
|
|
Use PAT attributes to setup page level cache control.
|
|
|
|
@@ -2091,7 +2093,7 @@ config DMAR_FLOPPY_WA
|
|
|
|
config INTR_REMAP
|
|
bool "Support for Interrupt Remapping (EXPERIMENTAL)"
|
|
- depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI && EXPERIMENTAL
|
|
+ depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI && !XEN && EXPERIMENTAL
|
|
---help---
|
|
Supports Interrupt remapping for IO-APIC and MSI devices.
|
|
To use x2apic mode in the CPU's which support x2APIC enhancements or
|
|
--- head-2011-03-17.orig/arch/x86/Kconfig.cpu 2011-03-03 17:48:58.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/Kconfig.cpu 2011-03-03 16:02:15.000000000 +0100
|
|
@@ -446,7 +446,7 @@ config CPU_SUP_INTEL
|
|
config CPU_SUP_CYRIX_32
|
|
default y
|
|
bool "Support Cyrix processors" if PROCESSOR_SELECT
|
|
- depends on !64BIT
|
|
+ depends on !64BIT && !XEN
|
|
---help---
|
|
This enables detection, tunings and quirks for Cyrix processors
|
|
|
|
@@ -486,7 +486,7 @@ config CPU_SUP_CENTAUR
|
|
config CPU_SUP_TRANSMETA_32
|
|
default y
|
|
bool "Support Transmeta processors" if PROCESSOR_SELECT
|
|
- depends on !64BIT
|
|
+ depends on !64BIT && !XEN
|
|
---help---
|
|
This enables detection, tunings and quirks for Transmeta processors
|
|
|
|
@@ -500,7 +500,7 @@ config CPU_SUP_TRANSMETA_32
|
|
config CPU_SUP_UMC_32
|
|
default y
|
|
bool "Support UMC processors" if PROCESSOR_SELECT
|
|
- depends on !64BIT
|
|
+ depends on !64BIT && !XEN
|
|
---help---
|
|
This enables detection, tunings and quirks for UMC processors
|
|
|
|
--- head-2011-03-17.orig/arch/x86/Makefile 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/Makefile 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -117,7 +117,7 @@ endif
|
|
KBUILD_CFLAGS += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,)
|
|
|
|
# Xen subarch support
|
|
-mflags-$(CONFIG_XEN) := -Iinclude/asm-x86/mach-xen
|
|
+mflags-$(CONFIG_XEN) := -Iarch/x86/include/mach-xen
|
|
mcore-$(CONFIG_XEN) := arch/x86/mach-xen/
|
|
|
|
KBUILD_CFLAGS += $(mflags-y)
|
|
@@ -159,7 +159,7 @@ PHONY += bzImage vmlinuz $(BOOT_TARGETS)
|
|
|
|
ifdef CONFIG_XEN
|
|
KBUILD_CPPFLAGS := -D__XEN_INTERFACE_VERSION__=$(CONFIG_XEN_INTERFACE_VERSION) \
|
|
- -Iinclude$(if $(KBUILD_SRC),2)/asm/mach-xen $(KBUILD_CPPFLAGS)
|
|
+ -I$(srctree)/arch/x86/include/mach-xen $(KBUILD_CPPFLAGS)
|
|
|
|
ifdef CONFIG_X86_64
|
|
LDFLAGS_vmlinux := -e startup_64
|
|
--- head-2011-03-17.orig/arch/x86/ia32/ia32entry-xen.S 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/ia32/ia32entry-xen.S 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -39,11 +39,11 @@
|
|
.endm
|
|
|
|
/* clobbers %eax */
|
|
- .macro CLEAR_RREGS
|
|
+ .macro CLEAR_RREGS _r9=rax
|
|
xorl %eax,%eax
|
|
movq %rax,R11(%rsp)
|
|
movq %rax,R10(%rsp)
|
|
- movq %rax,R9(%rsp)
|
|
+ movq %\_r9,R9(%rsp)
|
|
movq %rax,R8(%rsp)
|
|
.endm
|
|
|
|
@@ -52,11 +52,10 @@
|
|
* We don't reload %eax because syscall_trace_enter() returned
|
|
* the value it wants us to use in the table lookup.
|
|
*/
|
|
- .macro LOAD_ARGS32 offset
|
|
- movl \offset(%rsp),%r11d
|
|
- movl \offset+8(%rsp),%r10d
|
|
+ .macro LOAD_ARGS32 offset, _r9=0
|
|
+ .if \_r9
|
|
movl \offset+16(%rsp),%r9d
|
|
- movl \offset+24(%rsp),%r8d
|
|
+ .endif
|
|
movl \offset+40(%rsp),%ecx
|
|
movl \offset+48(%rsp),%edx
|
|
movl \offset+56(%rsp),%esi
|
|
@@ -135,7 +134,7 @@ ENTRY(ia32_sysenter_target)
|
|
SAVE_ARGS 0,0,1
|
|
/* no need to do an access_ok check here because rbp has been
|
|
32bit zero extended */
|
|
-1: movl (%rbp),%r9d
|
|
+1: movl (%rbp),%ebp
|
|
.section __ex_table,"a"
|
|
.quad 1b,ia32_badarg
|
|
.previous
|
|
@@ -146,7 +145,7 @@ ENTRY(ia32_sysenter_target)
|
|
cmpl $(IA32_NR_syscalls-1),%eax
|
|
ja ia32_badsys
|
|
sysenter_do_call:
|
|
- IA32_ARG_FIXUP 1
|
|
+ IA32_ARG_FIXUP
|
|
sysenter_dispatch:
|
|
call *ia32_sys_call_table(,%rax,8)
|
|
movq %rax,RAX-ARGOFFSET(%rsp)
|
|
@@ -204,20 +203,17 @@ sysexit_audit:
|
|
#endif
|
|
|
|
sysenter_tracesys:
|
|
- xchgl %r9d,%ebp
|
|
#ifdef CONFIG_AUDITSYSCALL
|
|
testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10)
|
|
jz sysenter_auditsys
|
|
#endif
|
|
SAVE_REST
|
|
CLEAR_RREGS
|
|
- movq %r9,R9(%rsp)
|
|
movq $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */
|
|
movq %rsp,%rdi /* &pt_regs -> arg1 */
|
|
call syscall_trace_enter
|
|
LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
|
|
RESTORE_REST
|
|
- xchgl %ebp,%r9d
|
|
cmpl $(IA32_NR_syscalls-1),%eax
|
|
ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */
|
|
jmp sysenter_do_call
|
|
@@ -272,9 +268,9 @@ ENTRY(ia32_cstar_target)
|
|
orl $TS_COMPAT,TI_status(%r10)
|
|
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
|
|
jnz cstar_tracesys
|
|
-cstar_do_call:
|
|
cmpl $IA32_NR_syscalls-1,%eax
|
|
ja ia32_badsys
|
|
+cstar_do_call:
|
|
IA32_ARG_FIXUP 1
|
|
cstar_dispatch:
|
|
call *ia32_sys_call_table(,%rax,8)
|
|
@@ -303,15 +299,13 @@ cstar_tracesys:
|
|
#endif
|
|
xchgl %r9d,%ebp
|
|
SAVE_REST
|
|
- CLEAR_RREGS
|
|
- movq %r9,R9(%rsp)
|
|
+ CLEAR_RREGS r9
|
|
movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
|
|
movq %rsp,%rdi /* &pt_regs -> arg1 */
|
|
call syscall_trace_enter
|
|
- LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
|
|
+ LOAD_ARGS32 ARGOFFSET, 1 /* reload args from stack in case ptrace changed it */
|
|
RESTORE_REST
|
|
xchgl %ebp,%r9d
|
|
- movl RSP-ARGOFFSET(%rsp), %r8d
|
|
cmpl $(IA32_NR_syscalls-1),%eax
|
|
ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */
|
|
jmp cstar_do_call
|
|
@@ -522,8 +516,8 @@ ia32_sys_call_table:
|
|
.quad compat_sys_setrlimit /* 75 */
|
|
.quad compat_sys_old_getrlimit /* old_getrlimit */
|
|
.quad compat_sys_getrusage
|
|
- .quad sys32_gettimeofday
|
|
- .quad sys32_settimeofday
|
|
+ .quad compat_sys_gettimeofday
|
|
+ .quad compat_sys_settimeofday
|
|
.quad sys_getgroups16 /* 80 */
|
|
.quad sys_setgroups16
|
|
.quad sys32_old_select
|
|
--- head-2011-03-17.orig/arch/x86/include/asm/agp.h 2011-03-17 14:35:46.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/asm/agp.h 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -15,6 +15,9 @@
|
|
#define map_page_into_agp(page) set_pages_uc(page, 1)
|
|
#define unmap_page_from_agp(page) set_pages_wb(page, 1)
|
|
|
|
+#define map_pages_into_agp set_pages_array_uc
|
|
+#define unmap_pages_from_agp set_pages_array_wb
|
|
+
|
|
/*
|
|
* Could use CLFLUSH here if the cpu supports it. But then it would
|
|
* need to be called for each cacheline of the whole page so it may
|
|
--- head-2011-03-17.orig/arch/x86/include/asm/cpufeature.h 2011-03-17 14:35:46.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/asm/cpufeature.h 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -276,7 +276,11 @@ extern const char * const x86_power_flag
|
|
#define cpu_has_xmm4_1 boot_cpu_has(X86_FEATURE_XMM4_1)
|
|
#define cpu_has_xmm4_2 boot_cpu_has(X86_FEATURE_XMM4_2)
|
|
#define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC)
|
|
+#ifndef CONFIG_XEN
|
|
#define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE)
|
|
+#else
|
|
+#define cpu_has_xsave boot_cpu_has(X86_FEATURE_OSXSAVE)
|
|
+#endif
|
|
#define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR)
|
|
#define cpu_has_pclmulqdq boot_cpu_has(X86_FEATURE_PCLMULQDQ)
|
|
|
|
--- head-2011-03-17.orig/arch/x86/include/asm/hw_irq.h 2011-03-17 14:35:46.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/asm/hw_irq.h 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -128,6 +128,7 @@ extern void smp_error_interrupt(struct p
|
|
extern asmlinkage void smp_irq_move_cleanup_interrupt(void);
|
|
#endif
|
|
#ifdef CONFIG_SMP
|
|
+#ifndef CONFIG_XEN
|
|
extern void smp_reschedule_interrupt(struct pt_regs *);
|
|
extern void smp_call_function_interrupt(struct pt_regs *);
|
|
extern void smp_call_function_single_interrupt(struct pt_regs *);
|
|
@@ -136,6 +137,12 @@ extern void smp_invalidate_interrupt(str
|
|
#else
|
|
extern asmlinkage void smp_invalidate_interrupt(struct pt_regs *);
|
|
#endif
|
|
+#else
|
|
+#include <linux/irqreturn.h>
|
|
+extern irqreturn_t smp_reschedule_interrupt(int, void *);
|
|
+extern irqreturn_t smp_call_function_interrupt(int, void *);
|
|
+extern irqreturn_t smp_call_function_single_interrupt(int, void *);
|
|
+#endif
|
|
#endif
|
|
|
|
extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void);
|
|
--- head-2011-03-17.orig/arch/x86/include/asm/segment.h 2011-03-17 14:35:46.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/asm/segment.h 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -186,7 +186,9 @@
|
|
#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS*8)
|
|
#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS*8+3)
|
|
#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8+3)
|
|
-#ifndef CONFIG_PARAVIRT
|
|
+#if defined(CONFIG_X86_XEN)
|
|
+#define get_kernel_rpl() (xen_feature(XENFEAT_supervisor_mode_kernel)?0:1)
|
|
+#elif !defined(CONFIG_PARAVIRT)
|
|
#define get_kernel_rpl() 0
|
|
#endif
|
|
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/agp.h 2011-01-31 18:01:51.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/agp.h 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -21,6 +21,23 @@
|
|
/* only a fallback: xen_destroy_contiguous_region uses PAGE_KERNEL */ \
|
|
set_pages_wb(page, 1))
|
|
|
|
+#define map_pages_into_agp(pages, nr) ({ \
|
|
+ __typeof__(nr) n__; \
|
|
+ int rc__ = 0; \
|
|
+ for (n__ = 0; n__ < (nr) && !rc__; ++n__) \
|
|
+ rc__ = xen_create_contiguous_region( \
|
|
+ (unsigned long)page_address((pages)[n__]), 0, 32); \
|
|
+ rc__ ?: set_pages_array_uc(pages, nr); \
|
|
+})
|
|
+#define unmap_pages_from_agp(pages, nr) ({ \
|
|
+ __typeof__(nr) n__; \
|
|
+ for (n__ = 0; n__ < nr; ++n__) \
|
|
+ xen_destroy_contiguous_region( \
|
|
+ (unsigned long)page_address((pages)[n__]), 0); \
|
|
+ /* only a fallback: xen_destroy_contiguous_region uses PAGE_KERNEL */ \
|
|
+ set_pages_array_wb(pages, nr); \
|
|
+})
|
|
+
|
|
/*
|
|
* Could use CLFLUSH here if the cpu supports it. But then it would
|
|
* need to be called for each cacheline of the whole page so it may
|
|
@@ -40,4 +57,4 @@
|
|
#define free_gatt_pages(table, order) \
|
|
dma_free_coherent(NULL,PAGE_SIZE<<(order),(table),virt_to_bus(table))
|
|
|
|
-#endif
|
|
+#endif /* _ASM_X86_AGP_H */
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/desc.h 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/desc.h 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -1,5 +1,5 @@
|
|
-#ifndef _ASM_DESC_H_
|
|
-#define _ASM_DESC_H_
|
|
+#ifndef _ASM_X86_DESC_H
|
|
+#define _ASM_X86_DESC_H
|
|
|
|
#ifndef __ASSEMBLY__
|
|
#include <asm/desc_defs.h>
|
|
@@ -24,6 +24,11 @@ static inline void fill_ldt(struct desc_
|
|
desc->d = info->seg_32bit;
|
|
desc->g = info->limit_in_pages;
|
|
desc->base2 = (info->base_addr & 0xff000000) >> 24;
|
|
+ /*
|
|
+ * Don't allow setting of the lm bit. It is useless anyway
|
|
+ * because 64bit system calls require __USER_CS:
|
|
+ */
|
|
+ desc->l = 0;
|
|
}
|
|
|
|
#ifndef CONFIG_X86_NO_IDT
|
|
@@ -98,6 +103,14 @@ static inline int desc_empty(const void
|
|
#define write_idt_entry(dt, entry, g) \
|
|
native_write_idt_entry(dt, entry, g)
|
|
|
|
+static inline void paravirt_alloc_ldt(struct desc_struct *ldt, unsigned entries)
|
|
+{
|
|
+}
|
|
+
|
|
+static inline void paravirt_free_ldt(struct desc_struct *ldt, unsigned entries)
|
|
+{
|
|
+}
|
|
+
|
|
static inline void native_write_idt_entry(gate_desc *idt, int entry,
|
|
const gate_desc *gate)
|
|
{
|
|
@@ -360,20 +373,16 @@ static inline void set_system_intr_gate(
|
|
_set_gate(n, GATE_INTERRUPT, addr, 0x3, 0, __KERNEL_CS);
|
|
}
|
|
|
|
-static inline void set_trap_gate(unsigned int n, void *addr)
|
|
+static inline void set_system_trap_gate(unsigned int n, void *addr)
|
|
{
|
|
BUG_ON((unsigned)n > 0xFF);
|
|
- _set_gate(n, GATE_TRAP, addr, 0, 0, __KERNEL_CS);
|
|
+ _set_gate(n, GATE_TRAP, addr, 0x3, 0, __KERNEL_CS);
|
|
}
|
|
|
|
-static inline void set_system_gate(unsigned int n, void *addr)
|
|
+static inline void set_trap_gate(unsigned int n, void *addr)
|
|
{
|
|
BUG_ON((unsigned)n > 0xFF);
|
|
-#ifdef CONFIG_X86_32
|
|
- _set_gate(n, GATE_TRAP, addr, 0x3, 0, __KERNEL_CS);
|
|
-#else
|
|
- _set_gate(n, GATE_INTERRUPT, addr, 0x3, 0, __KERNEL_CS);
|
|
-#endif
|
|
+ _set_gate(n, GATE_TRAP, addr, 0, 0, __KERNEL_CS);
|
|
}
|
|
|
|
static inline void set_task_gate(unsigned int n, unsigned int gdt_entry)
|
|
@@ -388,7 +397,7 @@ static inline void set_intr_gate_ist(int
|
|
_set_gate(n, GATE_INTERRUPT, addr, 0, ist, __KERNEL_CS);
|
|
}
|
|
|
|
-static inline void set_system_gate_ist(int n, void *addr, unsigned ist)
|
|
+static inline void set_system_intr_gate_ist(int n, void *addr, unsigned ist)
|
|
{
|
|
BUG_ON((unsigned)n > 0xFF);
|
|
_set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS);
|
|
@@ -420,4 +429,4 @@ static inline void set_system_gate_ist(i
|
|
|
|
#endif /* __ASSEMBLY__ */
|
|
|
|
-#endif
|
|
+#endif /* _ASM_X86_DESC_H */
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/dma-mapping.h 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/dma-mapping.h 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -1,17 +1,12 @@
|
|
-#ifndef _ASM_DMA_MAPPING_H_
|
|
+#ifndef _ASM_X86_DMA_MAPPING_H_
|
|
|
|
#include_next <asm/dma-mapping.h>
|
|
|
|
-static inline int
|
|
-address_needs_mapping(struct device *hwdev, dma_addr_t addr)
|
|
-{
|
|
- dma_addr_t mask = 0xffffffff;
|
|
- /* If the device has a mask, use it, otherwise default to 32 bits */
|
|
- if (hwdev && hwdev->dma_mask)
|
|
- mask = *hwdev->dma_mask;
|
|
- return (addr & ~mask) != 0;
|
|
-}
|
|
+void dma_generic_free_coherent(struct device *, size_t, void *, dma_addr_t);
|
|
+
|
|
+#define address_needs_mapping(hwdev, addr, size) \
|
|
+ !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size)
|
|
|
|
extern int range_straddles_page_boundary(paddr_t p, size_t size);
|
|
|
|
-#endif /* _ASM_DMA_MAPPING_H_ */
|
|
+#endif /* _ASM_X86_DMA_MAPPING_H_ */
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/fixmap.h 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/fixmap.h 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -1,5 +1,5 @@
|
|
-#ifndef _ASM_FIXMAP_H
|
|
-#define _ASM_FIXMAP_H
|
|
+#ifndef _ASM_X86_FIXMAP_H
|
|
+#define _ASM_X86_FIXMAP_H
|
|
|
|
#ifdef CONFIG_X86_32
|
|
# include "fixmap_32.h"
|
|
@@ -9,6 +9,10 @@
|
|
|
|
extern int fixmaps_set;
|
|
|
|
+extern pte_t *kmap_pte;
|
|
+extern pgprot_t kmap_prot;
|
|
+extern pte_t *pkmap_page_table;
|
|
+
|
|
void xen_set_fixmap(enum fixed_addresses, maddr_t, pgprot_t);
|
|
|
|
static inline void __set_fixmap(enum fixed_addresses idx,
|
|
@@ -61,4 +65,4 @@ static inline unsigned long virt_to_fix(
|
|
BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
|
|
return __virt_to_fix(vaddr);
|
|
}
|
|
-#endif
|
|
+#endif /* _ASM_X86_FIXMAP_H */
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/fixmap_32.h 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/fixmap_32.h 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -10,8 +10,8 @@
|
|
* Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
|
|
*/
|
|
|
|
-#ifndef _ASM_FIXMAP_32_H
|
|
-#define _ASM_FIXMAP_32_H
|
|
+#ifndef _ASM_X86_FIXMAP_32_H
|
|
+#define _ASM_X86_FIXMAP_32_H
|
|
|
|
/* used by vmalloc.c, vsyscall.lds.S.
|
|
*
|
|
@@ -27,10 +27,8 @@ extern unsigned long __FIXADDR_TOP;
|
|
#include <asm/acpi.h>
|
|
#include <asm/apicdef.h>
|
|
#include <asm/page.h>
|
|
-#ifdef CONFIG_HIGHMEM
|
|
#include <linux/threads.h>
|
|
#include <asm/kmap_types.h>
|
|
-#endif
|
|
|
|
/*
|
|
* Here we define all the compile-time 'special' virtual
|
|
@@ -81,10 +79,8 @@ enum fixed_addresses {
|
|
#ifdef CONFIG_X86_CYCLONE_TIMER
|
|
FIX_CYCLONE_TIMER, /*cyclone timer register*/
|
|
#endif
|
|
-#ifdef CONFIG_HIGHMEM
|
|
FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
|
|
FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
|
|
-#endif
|
|
#ifdef CONFIG_PCI_MMCONFIG
|
|
FIX_PCIE_MCFG,
|
|
#endif
|
|
@@ -100,10 +96,10 @@ enum fixed_addresses {
|
|
* can have a single pgd entry and a single pte table:
|
|
*/
|
|
#define NR_FIX_BTMAPS 64
|
|
-#define FIX_BTMAPS_NESTING 4
|
|
+#define FIX_BTMAPS_SLOTS 4
|
|
FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 256 -
|
|
(__end_of_permanent_fixed_addresses & 255),
|
|
- FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_NESTING - 1,
|
|
+ FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_SLOTS - 1,
|
|
FIX_WP_TEST,
|
|
#ifdef CONFIG_ACPI
|
|
FIX_ACPI_BEGIN,
|
|
@@ -126,4 +122,4 @@ extern void reserve_top_address(unsigned
|
|
#define FIXADDR_BOOT_START (FIXADDR_TOP - __FIXADDR_BOOT_SIZE)
|
|
|
|
#endif /* !__ASSEMBLY__ */
|
|
-#endif
|
|
+#endif /* _ASM_X86_FIXMAP_32_H */
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/fixmap_64.h 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/fixmap_64.h 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -8,8 +8,8 @@
|
|
* Copyright (C) 1998 Ingo Molnar
|
|
*/
|
|
|
|
-#ifndef _ASM_FIXMAP_64_H
|
|
-#define _ASM_FIXMAP_64_H
|
|
+#ifndef _ASM_X86_FIXMAP_64_H
|
|
+#define _ASM_X86_FIXMAP_64_H
|
|
|
|
#include <linux/kernel.h>
|
|
#include <asm/acpi.h>
|
|
@@ -47,6 +47,10 @@ enum fixed_addresses {
|
|
#ifndef CONFIG_XEN
|
|
FIX_IO_APIC_BASE_0,
|
|
FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,
|
|
+#else
|
|
+#define NR_FIX_ISAMAPS 256
|
|
+ FIX_ISAMAP_END,
|
|
+ FIX_ISAMAP_BEGIN = FIX_ISAMAP_END + NR_FIX_ISAMAPS - 1,
|
|
#endif
|
|
#ifdef CONFIG_EFI
|
|
FIX_EFI_IO_MAP_LAST_PAGE,
|
|
@@ -58,29 +62,26 @@ enum fixed_addresses {
|
|
#else
|
|
FIX_SHARED_INFO,
|
|
#endif
|
|
+ __end_of_permanent_fixed_addresses,
|
|
#ifdef CONFIG_ACPI
|
|
FIX_ACPI_BEGIN,
|
|
FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
|
|
#endif
|
|
-#define NR_FIX_ISAMAPS 256
|
|
- FIX_ISAMAP_END,
|
|
- FIX_ISAMAP_BEGIN = FIX_ISAMAP_END + NR_FIX_ISAMAPS - 1,
|
|
#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
|
|
FIX_OHCI1394_BASE,
|
|
#endif
|
|
- __end_of_permanent_fixed_addresses,
|
|
/*
|
|
* 256 temporary boot-time mappings, used by early_ioremap(),
|
|
* before ioremap() is functional.
|
|
*
|
|
- * We round it up to the next 512 pages boundary so that we
|
|
+ * We round it up to the next 256 pages boundary so that we
|
|
* can have a single pgd entry and a single pte table:
|
|
*/
|
|
#define NR_FIX_BTMAPS 64
|
|
-#define FIX_BTMAPS_NESTING 4
|
|
- FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 512 -
|
|
- (__end_of_permanent_fixed_addresses & 511),
|
|
- FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_NESTING - 1,
|
|
+#define FIX_BTMAPS_SLOTS 4
|
|
+ FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 256 -
|
|
+ (__end_of_permanent_fixed_addresses & 255),
|
|
+ FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_SLOTS - 1,
|
|
__end_of_fixed_addresses
|
|
};
|
|
|
|
@@ -92,4 +93,4 @@ enum fixed_addresses {
|
|
#define FIXADDR_USER_START ((unsigned long)VSYSCALL32_VSYSCALL)
|
|
#define FIXADDR_USER_END (FIXADDR_USER_START + PAGE_SIZE)
|
|
|
|
-#endif
|
|
+#endif /* _ASM_X86_FIXMAP_64_H */
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/highmem.h 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/highmem.h 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -15,8 +15,8 @@
|
|
* Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
|
|
*/
|
|
|
|
-#ifndef _ASM_HIGHMEM_H
|
|
-#define _ASM_HIGHMEM_H
|
|
+#ifndef _ASM_X86_HIGHMEM_H
|
|
+#define _ASM_X86_HIGHMEM_H
|
|
|
|
#ifdef __KERNEL__
|
|
|
|
@@ -24,14 +24,11 @@
|
|
#include <linux/threads.h>
|
|
#include <asm/kmap_types.h>
|
|
#include <asm/tlbflush.h>
|
|
+#include <asm/fixmap.h>
|
|
|
|
/* declarations for highmem.c */
|
|
extern unsigned long highstart_pfn, highend_pfn;
|
|
|
|
-extern pte_t *kmap_pte;
|
|
-extern pgprot_t kmap_prot;
|
|
-extern pte_t *pkmap_page_table;
|
|
-
|
|
/*
|
|
* Right now we initialize only a single pte table. It can be extended
|
|
* easily, subsequent pte tables have to be allocated in one physical
|
|
@@ -95,4 +92,4 @@ static inline void copy_user_highpage(st
|
|
|
|
#endif /* __KERNEL__ */
|
|
|
|
-#endif /* _ASM_HIGHMEM_H */
|
|
+#endif /* _ASM_X86_HIGHMEM_H */
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/io.h 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/io.h 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -5,20 +5,6 @@
|
|
|
|
#include <linux/compiler.h>
|
|
|
|
-/*
|
|
- * early_ioremap() and early_iounmap() are for temporary early boot-time
|
|
- * mappings, before the real ioremap() is functional.
|
|
- * A boot-time mapping is currently limited to at most 16 pages.
|
|
- */
|
|
-#ifndef __ASSEMBLY__
|
|
-extern void early_ioremap_init(void);
|
|
-extern void early_ioremap_clear(void);
|
|
-extern void early_ioremap_reset(void);
|
|
-extern void *early_ioremap(unsigned long offset, unsigned long size);
|
|
-extern void early_iounmap(void *addr, unsigned long size);
|
|
-extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys);
|
|
-#endif
|
|
-
|
|
#define build_mmio_read(name, size, type, reg, barrier) \
|
|
static inline type name(const volatile void __iomem *addr) \
|
|
{ type ret; asm volatile("mov" size " %1,%0":reg (ret) \
|
|
@@ -73,12 +59,14 @@ build_mmio_write(__writeq, "q", unsigned
|
|
#define writeq writeq
|
|
#endif
|
|
|
|
+extern int iommu_bio_merge;
|
|
+
|
|
#define native_io_delay xen_io_delay
|
|
|
|
#ifdef CONFIG_X86_32
|
|
-# include "../../io_32.h"
|
|
+# include "../../asm/io_32.h"
|
|
#else
|
|
-# include "../../io_64.h"
|
|
+# include "../../asm/io_64.h"
|
|
#endif
|
|
|
|
#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
|
|
@@ -95,7 +83,7 @@ build_mmio_write(__writeq, "q", unsigned
|
|
(unsigned long)(bv)->bv_offset)
|
|
|
|
#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \
|
|
- (bvec_to_phys(vec1) + (vec1)->bv_len == bvec_to_phys(vec2) \
|
|
+ (__BIOVEC_PHYS_MERGEABLE(vec1, vec2) \
|
|
&& bvec_to_pseudophys(vec1) + (vec1)->bv_len \
|
|
== bvec_to_pseudophys(vec2))
|
|
|
|
@@ -134,8 +122,9 @@ extern void __iomem *ioremap_wc(unsigned
|
|
extern void early_ioremap_init(void);
|
|
extern void early_ioremap_clear(void);
|
|
extern void early_ioremap_reset(void);
|
|
-extern void *early_ioremap(unsigned long offset, unsigned long size);
|
|
-extern void early_iounmap(void *addr, unsigned long size);
|
|
+extern void __iomem *early_ioremap(unsigned long offset, unsigned long size);
|
|
+extern void __iomem *early_memremap(unsigned long offset, unsigned long size);
|
|
+extern void early_iounmap(void __iomem *addr, unsigned long size);
|
|
extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys);
|
|
|
|
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/irq_vectors.h 2011-02-15 17:27:18.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/irq_vectors.h 2011-02-15 17:31:50.000000000 +0100
|
|
@@ -1,5 +1,5 @@
|
|
-#ifndef _ASM_IRQ_VECTORS_H
|
|
-#define _ASM_IRQ_VECTORS_H
|
|
+#ifndef _ASM_X86_IRQ_VECTORS_H
|
|
+#define _ASM_X86_IRQ_VECTORS_H
|
|
|
|
#ifdef CONFIG_X86_32
|
|
# define SYSCALL_VECTOR 0x80
|
|
@@ -47,6 +47,5 @@
|
|
#define NR_DYNIRQS 256
|
|
|
|
#define NR_IRQS (NR_PIRQS + NR_DYNIRQS)
|
|
-#define NR_IRQ_VECTORS NR_IRQS
|
|
|
|
-#endif /* _ASM_IRQ_VECTORS_H */
|
|
+#endif /* _ASM_X86_IRQ_VECTORS_H */
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/irqflags.h 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/irqflags.h 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -157,23 +157,6 @@ static inline int raw_irqs_disabled_flag
|
|
raw_irqs_disabled_flags(flags); \
|
|
})
|
|
|
|
-/*
|
|
- * makes the traced hardirq state match with the machine state
|
|
- *
|
|
- * should be a rarely used function, only in places where its
|
|
- * otherwise impossible to know the irq state, like in traps.
|
|
- */
|
|
-static inline void trace_hardirqs_fixup_flags(unsigned long flags)
|
|
-{
|
|
- if (raw_irqs_disabled_flags(flags))
|
|
- trace_hardirqs_off();
|
|
- else
|
|
- trace_hardirqs_on();
|
|
-}
|
|
-
|
|
-#define trace_hardirqs_fixup() \
|
|
- trace_hardirqs_fixup_flags(__raw_local_save_flags())
|
|
-
|
|
#else
|
|
|
|
#ifdef CONFIG_X86_64
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/maddr.h 2011-01-31 18:01:51.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/maddr.h 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -59,10 +59,10 @@ static inline unsigned long mfn_to_pfn(u
|
|
|
|
/* The array access can fail (e.g., device space beyond end of RAM). */
|
|
asm (
|
|
- "1: "_ASM_MOV_UL" %1,%0\n"
|
|
+ "1: "_ASM_MOV" %1,%0\n"
|
|
"2:\n"
|
|
".section .fixup,\"ax\"\n"
|
|
- "3: "_ASM_MOV_UL" %2,%0\n"
|
|
+ "3: "_ASM_MOV" %2,%0\n"
|
|
" jmp 2b\n"
|
|
".previous\n"
|
|
_ASM_EXTABLE(1b,3b)
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/mmu_context.h 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/mmu_context.h 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -1,5 +1,5 @@
|
|
-#ifndef __ASM_X86_MMU_CONTEXT_H
|
|
-#define __ASM_X86_MMU_CONTEXT_H
|
|
+#ifndef _ASM_X86_MMU_CONTEXT_H
|
|
+#define _ASM_X86_MMU_CONTEXT_H
|
|
|
|
#include <asm/desc.h>
|
|
#include <asm/atomic.h>
|
|
@@ -39,4 +39,4 @@ do { \
|
|
} while (0);
|
|
|
|
|
|
-#endif /* __ASM_X86_MMU_CONTEXT_H */
|
|
+#endif /* _ASM_X86_MMU_CONTEXT_H */
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/mmu_context_32.h 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/mmu_context_32.h 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -1,5 +1,5 @@
|
|
-#ifndef __I386_SCHED_H
|
|
-#define __I386_SCHED_H
|
|
+#ifndef _ASM_X86_MMU_CONTEXT_32_H
|
|
+#define _ASM_X86_MMU_CONTEXT_32_H
|
|
|
|
static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
|
|
{
|
|
@@ -81,4 +81,4 @@ static inline void switch_mm(struct mm_s
|
|
#define deactivate_mm(tsk, mm) \
|
|
asm("movl %0,%%gs": :"r" (0));
|
|
|
|
-#endif
|
|
+#endif /* _ASM_X86_MMU_CONTEXT_32_H */
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/mmu_context_64.h 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/mmu_context_64.h 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -1,5 +1,5 @@
|
|
-#ifndef __X86_64_MMU_CONTEXT_H
|
|
-#define __X86_64_MMU_CONTEXT_H
|
|
+#ifndef _ASM_X86_MMU_CONTEXT_64_H
|
|
+#define _ASM_X86_MMU_CONTEXT_64_H
|
|
|
|
static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
|
|
{
|
|
@@ -103,4 +103,4 @@ do { \
|
|
asm volatile("movl %0,%%fs"::"r"(0)); \
|
|
} while (0)
|
|
|
|
-#endif
|
|
+#endif /* _ASM_X86_MMU_CONTEXT_64_H */
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/pci.h 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/pci.h 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -1,5 +1,5 @@
|
|
-#ifndef __x86_PCI_H
|
|
-#define __x86_PCI_H
|
|
+#ifndef _ASM_X86_PCI_H
|
|
+#define _ASM_X86_PCI_H
|
|
|
|
#include <linux/mm.h> /* for struct page */
|
|
#include <linux/types.h>
|
|
@@ -93,7 +93,7 @@ static inline void early_quirks(void) {
|
|
#ifdef CONFIG_X86_32
|
|
# include "pci_32.h"
|
|
#else
|
|
-# include "pci_64.h"
|
|
+# include "../../asm/pci_64.h"
|
|
#endif
|
|
|
|
/* implement the pci_ DMA API in terms of the generic device dma_ one */
|
|
@@ -117,4 +117,4 @@ static inline cpumask_t __pcibus_to_cpum
|
|
}
|
|
#endif
|
|
|
|
-#endif
|
|
+#endif /* _ASM_X86_PCI_H */
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/pgalloc.h 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/pgalloc.h 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -149,4 +149,4 @@ extern void __pud_free_tlb(struct mmu_ga
|
|
#endif /* PAGETABLE_LEVELS > 3 */
|
|
#endif /* PAGETABLE_LEVELS > 2 */
|
|
|
|
-#endif /* _ASM_X86_PGALLOC_H */
|
|
+#endif /* _ASM_X86_PGALLOC_H */
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/pgtable.h 2011-02-07 15:40:30.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/pgtable.h 2011-02-07 15:41:11.000000000 +0100
|
|
@@ -14,11 +14,11 @@
|
|
#define _PAGE_BIT_PAT 7 /* on 4KB pages */
|
|
#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */
|
|
#define _PAGE_BIT_UNUSED1 9 /* available for programmer */
|
|
-#define _PAGE_BIT_UNUSED2 10
|
|
-#define _PAGE_BIT_IO 11 /* Mapped page is I/O or foreign and
|
|
- * has no associated page struct. */
|
|
+#define _PAGE_BIT_IOMAP 10 /* flag used to indicate IO mapping */
|
|
+#define _PAGE_BIT_UNUSED3 11
|
|
#define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */
|
|
#define _PAGE_BIT_SPECIAL _PAGE_BIT_UNUSED1
|
|
+#define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1
|
|
#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */
|
|
|
|
/* If _PAGE_BIT_PRESENT is clear, we use these: */
|
|
@@ -39,11 +39,12 @@
|
|
#define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE)
|
|
#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
|
|
#define _PAGE_UNUSED1 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1)
|
|
-#define _PAGE_UNUSED2 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED2)
|
|
-#define _PAGE_IO (_AT(pteval_t, 1) << _PAGE_BIT_IO)
|
|
+#define _PAGE_IOMAP (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP)
|
|
+#define _PAGE_UNUSED3 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED3)
|
|
#define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT)
|
|
#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
|
|
#define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL)
|
|
+#define _PAGE_CPA_TEST (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST)
|
|
#define __HAVE_ARCH_PTE_SPECIAL
|
|
|
|
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
|
|
@@ -69,7 +70,7 @@ extern unsigned int __kernel_page_user;
|
|
_PAGE_DIRTY | __kernel_page_user)
|
|
|
|
/* Set of bits not changed in pte_modify */
|
|
-#define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_CACHE_MASK | _PAGE_IO | \
|
|
+#define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_CACHE_MASK | _PAGE_IOMAP | \
|
|
_PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY)
|
|
|
|
/*
|
|
@@ -116,6 +117,11 @@ extern unsigned int __kernel_page_user;
|
|
#define __PAGE_KERNEL_LARGE_NOCACHE (__PAGE_KERNEL | _PAGE_CACHE_UC | _PAGE_PSE)
|
|
#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE)
|
|
|
|
+#define __PAGE_KERNEL_IO (__PAGE_KERNEL | _PAGE_IOMAP)
|
|
+#define __PAGE_KERNEL_IO_NOCACHE (__PAGE_KERNEL_NOCACHE | _PAGE_IOMAP)
|
|
+#define __PAGE_KERNEL_IO_UC_MINUS (__PAGE_KERNEL_UC_MINUS | _PAGE_IOMAP)
|
|
+#define __PAGE_KERNEL_IO_WC (__PAGE_KERNEL_WC | _PAGE_IOMAP)
|
|
+
|
|
#define PAGE_KERNEL __pgprot(__PAGE_KERNEL)
|
|
#define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO)
|
|
#define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC)
|
|
@@ -130,6 +136,11 @@ extern unsigned int __kernel_page_user;
|
|
#define PAGE_KERNEL_VSYSCALL __pgprot(__PAGE_KERNEL_VSYSCALL)
|
|
#define PAGE_KERNEL_VSYSCALL_NOCACHE __pgprot(__PAGE_KERNEL_VSYSCALL_NOCACHE)
|
|
|
|
+#define PAGE_KERNEL_IO __pgprot(__PAGE_KERNEL_IO)
|
|
+#define PAGE_KERNEL_IO_NOCACHE __pgprot(__PAGE_KERNEL_IO_NOCACHE)
|
|
+#define PAGE_KERNEL_IO_UC_MINUS __pgprot(__PAGE_KERNEL_IO_UC_MINUS)
|
|
+#define PAGE_KERNEL_IO_WC __pgprot(__PAGE_KERNEL_IO_WC)
|
|
+
|
|
/* xwr */
|
|
#define __P000 PAGE_NONE
|
|
#define __P001 PAGE_READONLY
|
|
@@ -149,6 +160,22 @@ extern unsigned int __kernel_page_user;
|
|
#define __S110 PAGE_SHARED_EXEC
|
|
#define __S111 PAGE_SHARED_EXEC
|
|
|
|
+/*
|
|
+ * early identity mapping pte attrib macros.
|
|
+ */
|
|
+#ifdef CONFIG_X86_64
|
|
+#define __PAGE_KERNEL_IDENT_LARGE_EXEC __PAGE_KERNEL_LARGE_EXEC
|
|
+#else
|
|
+/*
|
|
+ * For PDE_IDENT_ATTR include USER bit. As the PDE and PTE protection
|
|
+ * bits are combined, this will alow user to access the high address mapped
|
|
+ * VDSO in the presence of CONFIG_COMPAT_VDSO
|
|
+ */
|
|
+#define PTE_IDENT_ATTR 0x003 /* PRESENT+RW */
|
|
+#define PDE_IDENT_ATTR 0x067 /* PRESENT+RW+USER+DIRTY+ACCESSED */
|
|
+#define PGD_IDENT_ATTR 0x001 /* PRESENT (no other attributes) */
|
|
+#endif
|
|
+
|
|
#ifndef __ASSEMBLY__
|
|
|
|
/*
|
|
@@ -205,6 +232,15 @@ static inline int pte_special(pte_t pte)
|
|
return pte_flags(pte) & _PAGE_SPECIAL;
|
|
}
|
|
|
|
+#define pte_mfn(_pte) ((_pte).pte_low & _PAGE_PRESENT ? \
|
|
+ __pte_mfn(_pte) : pfn_to_mfn(__pte_mfn(_pte)))
|
|
+#define pte_pfn(_pte) ((_pte).pte_low & _PAGE_IOMAP ? max_mapnr : \
|
|
+ (_pte).pte_low & _PAGE_PRESENT ? \
|
|
+ mfn_to_local_pfn(__pte_mfn(_pte)) : \
|
|
+ __pte_mfn(_pte))
|
|
+
|
|
+#define pte_page(pte) pfn_to_page(pte_pfn(pte))
|
|
+
|
|
static inline int pmd_large(pmd_t pte)
|
|
{
|
|
return (__pmd_val(pte) & (_PAGE_PSE | _PAGE_PRESENT)) ==
|
|
@@ -347,6 +383,9 @@ static inline void xen_pagetable_setup_s
|
|
static inline void xen_pagetable_setup_done(pgd_t *base) {}
|
|
#endif
|
|
|
|
+struct seq_file;
|
|
+extern void arch_report_meminfo(struct seq_file *m);
|
|
+
|
|
#define set_pte(ptep, pte) xen_set_pte(ptep, pte)
|
|
#define set_pte_at(mm, addr, ptep, pte) xen_set_pte_at(mm, addr, ptep, pte)
|
|
|
|
@@ -638,4 +677,4 @@ int create_lookup_pte_addr(struct mm_str
|
|
|
|
#endif /* __ASSEMBLY__ */
|
|
|
|
-#endif /* _ASM_X86_PGTABLE_H */
|
|
+#endif /* _ASM_X86_PGTABLE_H */
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/pgtable-3level.h 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/pgtable-3level.h 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -1,5 +1,5 @@
|
|
-#ifndef _I386_PGTABLE_3LEVEL_H
|
|
-#define _I386_PGTABLE_3LEVEL_H
|
|
+#ifndef _ASM_X86_PGTABLE_3LEVEL_H
|
|
+#define _ASM_X86_PGTABLE_3LEVEL_H
|
|
|
|
/*
|
|
* Intel Physical Address Extension (PAE) Mode - three-level page
|
|
@@ -102,13 +102,13 @@ static inline void pud_clear(pud_t *pudp
|
|
xen_tlb_flush();
|
|
}
|
|
|
|
-#define pud_page(pud) ((struct page *) __va(pud_val(pud) & PTE_PFN_MASK))
|
|
+#define pud_page(pud) pfn_to_page(pud_val(pud) >> PAGE_SHIFT)
|
|
|
|
#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PTE_PFN_MASK))
|
|
|
|
|
|
/* Find an entry in the second-level page table.. */
|
|
-#define pmd_offset(pud, address) ((pmd_t *)pud_page(*(pud)) + \
|
|
+#define pmd_offset(pud, address) ((pmd_t *)pud_page_vaddr(*(pud)) + \
|
|
pmd_index(address))
|
|
|
|
#ifdef CONFIG_SMP
|
|
@@ -133,8 +133,6 @@ static inline int pte_same(pte_t a, pte_
|
|
return a.pte_low == b.pte_low && a.pte_high == b.pte_high;
|
|
}
|
|
|
|
-#define pte_page(x) pfn_to_page(pte_pfn(x))
|
|
-
|
|
static inline int pte_none(pte_t pte)
|
|
{
|
|
return !(pte.pte_low | pte.pte_high);
|
|
@@ -142,12 +140,6 @@ static inline int pte_none(pte_t pte)
|
|
|
|
#define __pte_mfn(_pte) (((_pte).pte_low >> PAGE_SHIFT) | \
|
|
((_pte).pte_high << (32-PAGE_SHIFT)))
|
|
-#define pte_mfn(_pte) ((_pte).pte_low & _PAGE_PRESENT ? \
|
|
- __pte_mfn(_pte) : pfn_to_mfn(__pte_mfn(_pte)))
|
|
-#define pte_pfn(_pte) ((_pte).pte_low & _PAGE_IO ? max_mapnr : \
|
|
- (_pte).pte_low & _PAGE_PRESENT ? \
|
|
- mfn_to_local_pfn(__pte_mfn(_pte)) : \
|
|
- __pte_mfn(_pte))
|
|
|
|
/*
|
|
* Bits 0, 6 and 7 are taken in the low part of the pte,
|
|
@@ -165,4 +157,4 @@ static inline int pte_none(pte_t pte)
|
|
#define __pte_to_swp_entry(pte) ((swp_entry_t){ (pte).pte_high })
|
|
#define __swp_entry_to_pte(x) ((pte_t){ { .pte_high = (x).val } })
|
|
|
|
-#endif /* _I386_PGTABLE_3LEVEL_H */
|
|
+#endif /* _ASM_X86_PGTABLE_3LEVEL_H */
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/pgtable-3level-defs.h 2011-01-31 17:32:29.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/pgtable-3level-defs.h 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -1,5 +1,5 @@
|
|
-#ifndef _I386_PGTABLE_3LEVEL_DEFS_H
|
|
-#define _I386_PGTABLE_3LEVEL_DEFS_H
|
|
+#ifndef _ASM_X86_PGTABLE_3LEVEL_DEFS_H
|
|
+#define _ASM_X86_PGTABLE_3LEVEL_DEFS_H
|
|
|
|
#define SHARED_KERNEL_PMD 0
|
|
|
|
@@ -21,4 +21,4 @@
|
|
*/
|
|
#define PTRS_PER_PTE 512
|
|
|
|
-#endif /* _I386_PGTABLE_3LEVEL_DEFS_H */
|
|
+#endif /* _ASM_X86_PGTABLE_3LEVEL_DEFS_H */
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/pgtable_32.h 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/pgtable_32.h 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -1,5 +1,5 @@
|
|
-#ifndef _I386_PGTABLE_H
|
|
-#define _I386_PGTABLE_H
|
|
+#ifndef _ASM_X86_PGTABLE_32_H
|
|
+#define _ASM_X86_PGTABLE_32_H
|
|
|
|
/*
|
|
* The Linux memory management assumes a three-level page table setup. On
|
|
@@ -29,6 +29,7 @@ static inline void pgtable_cache_init(vo
|
|
static inline void check_pgt_cache(void) { }
|
|
void paging_init(void);
|
|
|
|
+extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t);
|
|
|
|
/*
|
|
* The Linux x86 paging architecture is 'compile-time dual-mode', it
|
|
@@ -54,8 +55,7 @@ void paging_init(void);
|
|
* area for the same reason. ;)
|
|
*/
|
|
#define VMALLOC_OFFSET (8 * 1024 * 1024)
|
|
-#define VMALLOC_START (((unsigned long)high_memory + 2 * VMALLOC_OFFSET - 1) \
|
|
- & ~(VMALLOC_OFFSET - 1))
|
|
+#define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET)
|
|
#ifdef CONFIG_X86_PAE
|
|
#define LAST_PKMAP 512
|
|
#else
|
|
@@ -71,6 +71,8 @@ void paging_init(void);
|
|
# define VMALLOC_END (FIXADDR_START - 2 * PAGE_SIZE)
|
|
#endif
|
|
|
|
+#define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE)
|
|
+
|
|
/*
|
|
* Define this if things work differently on an i386 and an i486:
|
|
* it will (on an i486) warn about kernel memory accesses that are
|
|
@@ -195,4 +197,4 @@ void make_lowmem_page_writable(void *va,
|
|
#define io_remap_pfn_range(vma, from, pfn, size, prot) \
|
|
direct_remap_pfn_range(vma, from, pfn, size, prot, DOMID_IO)
|
|
|
|
-#endif /* _I386_PGTABLE_H */
|
|
+#endif /* _ASM_X86_PGTABLE_32_H */
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/pgtable_64.h 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/pgtable_64.h 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -1,5 +1,5 @@
|
|
-#ifndef _X86_64_PGTABLE_H
|
|
-#define _X86_64_PGTABLE_H
|
|
+#ifndef _ASM_X86_PGTABLE_64_H
|
|
+#define _ASM_X86_PGTABLE_64_H
|
|
|
|
#include <linux/const.h>
|
|
#ifndef __ASSEMBLY__
|
|
@@ -65,14 +65,14 @@ extern void paging_init(void);
|
|
printk("%s:%d: bad pte %p(%016lx pfn %010lx).\n", \
|
|
__FILE__, __LINE__, &(e), __pte_val(e), pte_pfn(e))
|
|
#define pmd_ERROR(e) \
|
|
- printk("%s:%d: bad pmd %p(%016lx pfn %010lx).\n", \
|
|
+ printk("%s:%d: bad pmd %p(%016lx pfn %010Lx).\n", \
|
|
__FILE__, __LINE__, &(e), __pmd_val(e), pmd_pfn(e))
|
|
#define pud_ERROR(e) \
|
|
- printk("%s:%d: bad pud %p(%016lx pfn %010lx).\n", \
|
|
+ printk("%s:%d: bad pud %p(%016lx pfn %010Lx).\n", \
|
|
__FILE__, __LINE__, &(e), __pud_val(e), \
|
|
(pud_val(e) & __PHYSICAL_MASK) >> PAGE_SHIFT)
|
|
#define pgd_ERROR(e) \
|
|
- printk("%s:%d: bad pgd %p(%016lx pfn %010lx).\n", \
|
|
+ printk("%s:%d: bad pgd %p(%016lx pfn %010Lx).\n", \
|
|
__FILE__, __LINE__, &(e), __pgd_val(e), \
|
|
(pgd_val(e) & __PHYSICAL_MASK) >> PAGE_SHIFT)
|
|
|
|
@@ -181,14 +181,6 @@ static inline int pmd_bad(pmd_t pmd)
|
|
#define pages_to_mb(x) ((x) >> (20 - PAGE_SHIFT)) /* FIXME: is this right? */
|
|
|
|
#define __pte_mfn(_pte) (((_pte).pte & PTE_PFN_MASK) >> PAGE_SHIFT)
|
|
-#define pte_mfn(_pte) ((_pte).pte & _PAGE_PRESENT ? \
|
|
- __pte_mfn(_pte) : pfn_to_mfn(__pte_mfn(_pte)))
|
|
-#define pte_pfn(_pte) ((_pte).pte & _PAGE_IO ? max_mapnr : \
|
|
- (_pte).pte & _PAGE_PRESENT ? \
|
|
- mfn_to_local_pfn(__pte_mfn(_pte)) : \
|
|
- __pte_mfn(_pte))
|
|
-
|
|
-#define pte_page(x) pfn_to_page(pte_pfn((x)))
|
|
|
|
/*
|
|
* Macro to mark a page protection value as "uncacheable".
|
|
@@ -312,4 +304,4 @@ extern void cleanup_highmap(void);
|
|
#define __HAVE_ARCH_PTE_SAME
|
|
#endif /* !__ASSEMBLY__ */
|
|
|
|
-#endif /* _X86_64_PGTABLE_H */
|
|
+#endif /* _ASM_X86_PGTABLE_64_H */
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/processor.h 2011-03-03 16:44:23.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/processor.h 2011-03-03 16:45:14.000000000 +0100
|
|
@@ -1,5 +1,5 @@
|
|
-#ifndef __ASM_X86_PROCESSOR_H
|
|
-#define __ASM_X86_PROCESSOR_H
|
|
+#ifndef _ASM_X86_PROCESSOR_H
|
|
+#define _ASM_X86_PROCESSOR_H
|
|
|
|
#include <asm/processor-flags.h>
|
|
|
|
@@ -20,6 +20,7 @@ struct mm_struct;
|
|
#include <asm/msr.h>
|
|
#include <asm/desc_defs.h>
|
|
#include <asm/nops.h>
|
|
+#include <asm/ds.h>
|
|
|
|
#include <linux/personality.h>
|
|
#include <linux/cpumask.h>
|
|
@@ -72,21 +73,21 @@ struct cpuinfo_x86 {
|
|
char rfu;
|
|
char fdiv_bug;
|
|
char f00f_bug;
|
|
-#endif
|
|
char coma_bug;
|
|
char pad0;
|
|
+#endif
|
|
#else
|
|
/* Number of 4K pages in DTLB/ITLB combined(in pages): */
|
|
int x86_tlbsize;
|
|
__u8 x86_virt_bits;
|
|
__u8 x86_phys_bits;
|
|
+#endif
|
|
#ifndef CONFIG_XEN
|
|
/* CPUID returned core id bits: */
|
|
__u8 x86_coreid_bits;
|
|
#endif
|
|
/* Max extended CPUID function supported: */
|
|
__u32 extended_cpuid_level;
|
|
-#endif
|
|
/* Maximum supported CPUID level, -1=no CPUID: */
|
|
int cpuid_level;
|
|
__u32 x86_capability[NCAPINTS];
|
|
@@ -150,6 +151,8 @@ DECLARE_PER_CPU(struct cpuinfo_x86, cpu_
|
|
#define current_cpu_data boot_cpu_data
|
|
#endif
|
|
|
|
+extern const struct seq_operations cpuinfo_op;
|
|
+
|
|
static inline int hlt_works(int cpu)
|
|
{
|
|
#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
|
|
@@ -163,6 +166,8 @@ static inline int hlt_works(int cpu)
|
|
|
|
extern void cpu_detect(struct cpuinfo_x86 *c);
|
|
|
|
+extern struct pt_regs *idle_regs(struct pt_regs *);
|
|
+
|
|
extern void early_cpu_init(void);
|
|
extern void identify_boot_cpu(void);
|
|
extern void identify_secondary_cpu(struct cpuinfo_x86 *);
|
|
@@ -171,11 +176,8 @@ extern void init_scattered_cpuid_feature
|
|
extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
|
|
extern unsigned short num_cache_leaves;
|
|
|
|
-#if defined(CONFIG_X86_HT) || defined(CONFIG_X86_64)
|
|
+extern void detect_extended_topology(struct cpuinfo_x86 *c);
|
|
extern void detect_ht(struct cpuinfo_x86 *c);
|
|
-#else
|
|
-static inline void detect_ht(struct cpuinfo_x86 *c) {}
|
|
-#endif
|
|
|
|
static inline void xen_cpuid(unsigned int *eax, unsigned int *ebx,
|
|
unsigned int *ecx, unsigned int *edx)
|
|
@@ -337,7 +339,12 @@ struct i387_fxsave_struct {
|
|
/* 16*16 bytes for each XMM-reg = 256 bytes: */
|
|
u32 xmm_space[64];
|
|
|
|
- u32 padding[24];
|
|
+ u32 padding[12];
|
|
+
|
|
+ union {
|
|
+ u32 padding1[12];
|
|
+ u32 sw_reserved[12];
|
|
+ };
|
|
|
|
} __attribute__((aligned(16)));
|
|
|
|
@@ -361,10 +368,23 @@ struct i387_soft_struct {
|
|
u32 entry_eip;
|
|
};
|
|
|
|
+struct xsave_hdr_struct {
|
|
+ u64 xstate_bv;
|
|
+ u64 reserved1[2];
|
|
+ u64 reserved2[5];
|
|
+} __attribute__((packed));
|
|
+
|
|
+struct xsave_struct {
|
|
+ struct i387_fxsave_struct i387;
|
|
+ struct xsave_hdr_struct xsave_hdr;
|
|
+ /* new processor state extensions will go here */
|
|
+} __attribute__ ((packed, aligned (64)));
|
|
+
|
|
union thread_xstate {
|
|
struct i387_fsave_struct fsave;
|
|
struct i387_fxsave_struct fxsave;
|
|
struct i387_soft_struct soft;
|
|
+ struct xsave_struct xsave;
|
|
};
|
|
|
|
#if defined(CONFIG_X86_64) && !defined(CONFIG_X86_NO_TSS)
|
|
@@ -422,9 +442,14 @@ struct thread_struct {
|
|
unsigned io_bitmap_max;
|
|
/* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */
|
|
unsigned long debugctlmsr;
|
|
-/* Debug Store - if not 0 points to a DS Save Area configuration;
|
|
- * goes into MSR_IA32_DS_AREA */
|
|
- unsigned long ds_area_msr;
|
|
+#ifdef CONFIG_X86_DS
|
|
+/* Debug Store context; see include/asm-x86/ds.h; goes into MSR_IA32_DS_AREA */
|
|
+ struct ds_context *ds_ctx;
|
|
+#endif /* CONFIG_X86_DS */
|
|
+#ifdef CONFIG_X86_PTRACE_BTS
|
|
+/* the signal to send on a bts buffer overflow */
|
|
+ unsigned int bts_ovfl_signal;
|
|
+#endif /* CONFIG_X86_PTRACE_BTS */
|
|
};
|
|
|
|
static inline unsigned long xen_get_debugreg(int regno)
|
|
@@ -512,41 +537,6 @@ static inline void clear_in_cr4(unsigned
|
|
write_cr4(cr4);
|
|
}
|
|
|
|
-struct microcode_header {
|
|
- unsigned int hdrver;
|
|
- unsigned int rev;
|
|
- unsigned int date;
|
|
- unsigned int sig;
|
|
- unsigned int cksum;
|
|
- unsigned int ldrver;
|
|
- unsigned int pf;
|
|
- unsigned int datasize;
|
|
- unsigned int totalsize;
|
|
- unsigned int reserved[3];
|
|
-};
|
|
-
|
|
-struct microcode {
|
|
- struct microcode_header hdr;
|
|
- unsigned int bits[0];
|
|
-};
|
|
-
|
|
-typedef struct microcode microcode_t;
|
|
-typedef struct microcode_header microcode_header_t;
|
|
-
|
|
-/* microcode format is extended from prescott processors */
|
|
-struct extended_signature {
|
|
- unsigned int sig;
|
|
- unsigned int pf;
|
|
- unsigned int cksum;
|
|
-};
|
|
-
|
|
-struct extended_sigtable {
|
|
- unsigned int count;
|
|
- unsigned int cksum;
|
|
- unsigned int reserved[3];
|
|
- struct extended_signature sigs[0];
|
|
-};
|
|
-
|
|
typedef struct {
|
|
unsigned long seg;
|
|
} mm_segment_t;
|
|
@@ -894,4 +884,4 @@ extern void start_thread(struct pt_regs
|
|
extern int get_tsc_mode(unsigned long adr);
|
|
extern int set_tsc_mode(unsigned int val);
|
|
|
|
-#endif
|
|
+#endif /* _ASM_X86_PROCESSOR_H */
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/smp.h 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/smp.h 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -1,5 +1,5 @@
|
|
-#ifndef _ASM_X86_SMP_H_
|
|
-#define _ASM_X86_SMP_H_
|
|
+#ifndef _ASM_X86_SMP_H
|
|
+#define _ASM_X86_SMP_H
|
|
#ifndef __ASSEMBLY__
|
|
#include <linux/cpumask.h>
|
|
#include <linux/init.h>
|
|
@@ -34,7 +34,12 @@ extern cpumask_t cpu_initialized;
|
|
DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
|
|
DECLARE_PER_CPU(cpumask_t, cpu_core_map);
|
|
DECLARE_PER_CPU(u16, cpu_llc_id);
|
|
+#endif
|
|
+#ifdef CONFIG_X86_32
|
|
+DECLARE_PER_CPU(int, cpu_number);
|
|
+#endif
|
|
|
|
+#ifndef CONFIG_XEN
|
|
DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid);
|
|
DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid);
|
|
#endif
|
|
@@ -52,12 +57,16 @@ extern struct {
|
|
struct smp_ops {
|
|
void (*smp_prepare_boot_cpu)(void);
|
|
void (*smp_prepare_cpus)(unsigned max_cpus);
|
|
- int (*cpu_up)(unsigned cpu);
|
|
void (*smp_cpus_done)(unsigned max_cpus);
|
|
|
|
void (*smp_send_stop)(void);
|
|
void (*smp_send_reschedule)(int cpu);
|
|
|
|
+ int (*cpu_up)(unsigned cpu);
|
|
+ int (*cpu_disable)(void);
|
|
+ void (*cpu_die)(unsigned int cpu);
|
|
+ void (*play_dead)(void);
|
|
+
|
|
void (*send_call_func_ipi)(cpumask_t mask);
|
|
void (*send_call_func_single_ipi)(int cpu);
|
|
};
|
|
@@ -92,6 +101,21 @@ static inline int __cpu_up(unsigned int
|
|
return smp_ops.cpu_up(cpu);
|
|
}
|
|
|
|
+static inline int __cpu_disable(void)
|
|
+{
|
|
+ return smp_ops.cpu_disable();
|
|
+}
|
|
+
|
|
+static inline void __cpu_die(unsigned int cpu)
|
|
+{
|
|
+ smp_ops.cpu_die(cpu);
|
|
+}
|
|
+
|
|
+static inline void play_dead(void)
|
|
+{
|
|
+ smp_ops.play_dead();
|
|
+}
|
|
+
|
|
static inline void smp_send_reschedule(int cpu)
|
|
{
|
|
smp_ops.smp_send_reschedule(cpu);
|
|
@@ -107,13 +131,20 @@ static inline void arch_send_call_functi
|
|
smp_ops.send_call_func_ipi(mask);
|
|
}
|
|
|
|
+void cpu_disable_common(void);
|
|
void native_smp_prepare_boot_cpu(void);
|
|
void native_smp_prepare_cpus(unsigned int max_cpus);
|
|
void native_smp_cpus_done(unsigned int max_cpus);
|
|
int native_cpu_up(unsigned int cpunum);
|
|
+int native_cpu_disable(void);
|
|
+void native_cpu_die(unsigned int cpu);
|
|
+void native_play_dead(void);
|
|
+void play_dead_common(void);
|
|
|
|
#else /* CONFIG_XEN */
|
|
|
|
+extern int __cpu_disable(void);
|
|
+extern void __cpu_die(unsigned int cpu);
|
|
void xen_smp_send_stop(void);
|
|
void xen_smp_send_reschedule(int cpu);
|
|
void xen_send_call_func_ipi(cpumask_t mask);
|
|
@@ -124,10 +155,11 @@ void xen_send_call_func_single_ipi(int c
|
|
#define arch_send_call_function_single_ipi xen_send_call_func_single_ipi
|
|
#define arch_send_call_function_ipi xen_send_call_func_ipi
|
|
|
|
+void play_dead(void);
|
|
+
|
|
#endif /* CONFIG_XEN */
|
|
|
|
-extern int __cpu_disable(void);
|
|
-extern void __cpu_die(unsigned int cpu);
|
|
+extern void prefill_possible_map(void);
|
|
|
|
void smp_store_cpu_info(int id);
|
|
#define cpu_physical_id(cpu) (cpu)
|
|
@@ -137,15 +169,11 @@ static inline int num_booting_cpus(void)
|
|
{
|
|
return cpus_weight(cpu_callout_map);
|
|
}
|
|
-#endif /* CONFIG_SMP */
|
|
-
|
|
-#if defined(CONFIG_SMP) && (defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_XEN))
|
|
-extern void prefill_possible_map(void);
|
|
#else
|
|
static inline void prefill_possible_map(void)
|
|
{
|
|
}
|
|
-#endif
|
|
+#endif /* CONFIG_SMP */
|
|
|
|
extern unsigned disabled_cpus __cpuinitdata;
|
|
|
|
@@ -155,7 +183,6 @@ extern unsigned disabled_cpus __cpuinitd
|
|
* from the initial startup. We map APIC_BASE very early in page_setup(),
|
|
* so this is correct in the x86 case.
|
|
*/
|
|
-DECLARE_PER_CPU(int, cpu_number);
|
|
#define raw_smp_processor_id() (x86_read_percpu(cpu_number))
|
|
#define safe_smp_processor_id() smp_processor_id()
|
|
|
|
@@ -178,30 +205,33 @@ DECLARE_PER_CPU(int, cpu_number);
|
|
|
|
#ifdef CONFIG_X86_LOCAL_APIC
|
|
|
|
+#ifndef CONFIG_X86_64
|
|
static inline int logical_smp_processor_id(void)
|
|
{
|
|
/* we don't want to mark this access volatile - bad code generation */
|
|
return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR));
|
|
}
|
|
|
|
-#ifndef CONFIG_X86_64
|
|
+#include <mach_apicdef.h>
|
|
static inline unsigned int read_apic_id(void)
|
|
{
|
|
- return *(u32 *)(APIC_BASE + APIC_ID);
|
|
+ unsigned int reg;
|
|
+
|
|
+ reg = *(u32 *)(APIC_BASE + APIC_ID);
|
|
+
|
|
+ return GET_APIC_ID(reg);
|
|
}
|
|
-#else
|
|
-extern unsigned int read_apic_id(void);
|
|
#endif
|
|
|
|
|
|
-# ifdef APIC_DEFINITION
|
|
+# if defined(APIC_DEFINITION) || defined(CONFIG_X86_64)
|
|
extern int hard_smp_processor_id(void);
|
|
# else
|
|
-# include <mach_apicdef.h>
|
|
+#include <mach_apicdef.h>
|
|
static inline int hard_smp_processor_id(void)
|
|
{
|
|
/* we don't want to mark this access volatile - bad code generation */
|
|
- return GET_APIC_ID(read_apic_id());
|
|
+ return read_apic_id();
|
|
}
|
|
# endif /* APIC_DEFINITION */
|
|
|
|
@@ -213,9 +243,11 @@ static inline int hard_smp_processor_id(
|
|
|
|
#endif /* CONFIG_X86_LOCAL_APIC */
|
|
|
|
-#ifdef CONFIG_HOTPLUG_CPU
|
|
-extern void cpu_uninit(void);
|
|
+#ifdef CONFIG_X86_HAS_BOOT_CPU_ID
|
|
+extern unsigned char boot_cpu_id;
|
|
+#else
|
|
+#define boot_cpu_id 0
|
|
#endif
|
|
|
|
#endif /* __ASSEMBLY__ */
|
|
-#endif
|
|
+#endif /* _ASM_X86_SMP_H */
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/spinlock.h 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/spinlock.h 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -1,5 +1,5 @@
|
|
-#ifndef _X86_SPINLOCK_H_
|
|
-#define _X86_SPINLOCK_H_
|
|
+#ifndef _ASM_X86_SPINLOCK_H
|
|
+#define _ASM_X86_SPINLOCK_H
|
|
|
|
#include <asm/atomic.h>
|
|
#include <asm/rwlock.h>
|
|
@@ -453,4 +453,4 @@ static inline void __raw_write_unlock(ra
|
|
#define _raw_read_relax(lock) cpu_relax()
|
|
#define _raw_write_relax(lock) cpu_relax()
|
|
|
|
-#endif
|
|
+#endif /* _ASM_X86_SPINLOCK_H */
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/spinlock_types.h 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/spinlock_types.h 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -1,5 +1,5 @@
|
|
-#ifndef __ASM_SPINLOCK_TYPES_H
|
|
-#define __ASM_SPINLOCK_TYPES_H
|
|
+#ifndef _ASM_X86_SPINLOCK_TYPES_H
|
|
+#define _ASM_X86_SPINLOCK_TYPES_H
|
|
|
|
#ifndef __LINUX_SPINLOCK_TYPES_H
|
|
# error "please don't include this file directly"
|
|
@@ -38,4 +38,4 @@ typedef struct {
|
|
|
|
#define __RAW_RW_LOCK_UNLOCKED { RW_LOCK_BIAS }
|
|
|
|
-#endif
|
|
+#endif /* _ASM_X86_SPINLOCK_TYPES_H */
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/system.h 2011-03-03 15:58:55.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/system.h 2011-03-03 16:01:23.000000000 +0100
|
|
@@ -1,5 +1,5 @@
|
|
-#ifndef _ASM_X86_SYSTEM_H_
|
|
-#define _ASM_X86_SYSTEM_H_
|
|
+#ifndef _ASM_X86_SYSTEM_H
|
|
+#define _ASM_X86_SYSTEM_H
|
|
|
|
#include <asm/asm.h>
|
|
#include <asm/segment.h>
|
|
@@ -65,7 +65,10 @@ do { \
|
|
\
|
|
/* regparm parameters for __switch_to(): */ \
|
|
[prev] "a" (prev), \
|
|
- [next] "d" (next)); \
|
|
+ [next] "d" (next) \
|
|
+ \
|
|
+ : /* reloaded segment registers */ \
|
|
+ "memory"); \
|
|
} while (0)
|
|
|
|
#ifndef CONFIG_XEN
|
|
@@ -405,4 +408,4 @@ static inline void rdtsc_barrier(void)
|
|
alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC);
|
|
}
|
|
|
|
-#endif
|
|
+#endif /* _ASM_X86_SYSTEM_H */
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/system_64.h 2011-01-31 18:01:51.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/system_64.h 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -1,5 +1,5 @@
|
|
-#ifndef __ASM_SYSTEM_H
|
|
-#define __ASM_SYSTEM_H
|
|
+#ifndef _ASM_X86_SYSTEM_64_H
|
|
+#define _ASM_X86_SYSTEM_64_H
|
|
|
|
#include <asm/segment.h>
|
|
#include <asm/cmpxchg.h>
|
|
@@ -17,4 +17,4 @@ static inline void write_cr8(unsigned lo
|
|
|
|
#include <linux/irqflags.h>
|
|
|
|
-#endif
|
|
+#endif /* _ASM_X86_SYSTEM_64_H */
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/tlbflush.h 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/tlbflush.h 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -63,6 +63,10 @@ static inline void flush_tlb_range(struc
|
|
__flush_tlb();
|
|
}
|
|
|
|
+static inline void reset_lazy_tlbstate(void)
|
|
+{
|
|
+}
|
|
+
|
|
#else /* SMP */
|
|
|
|
#include <asm/smp.h>
|
|
@@ -92,6 +96,12 @@ struct tlb_state {
|
|
char __cacheline_padding[L1_CACHE_BYTES-8];
|
|
};
|
|
DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate);
|
|
+
|
|
+void reset_lazy_tlbstate(void);
|
|
+#else
|
|
+static inline void reset_lazy_tlbstate(void)
|
|
+{
|
|
+}
|
|
#endif
|
|
|
|
#endif /* SMP */
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/vga.h 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/vga.h 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -4,8 +4,8 @@
|
|
* (c) 1998 Martin Mares <mj@ucw.cz>
|
|
*/
|
|
|
|
-#ifndef _LINUX_ASM_VGA_H_
|
|
-#define _LINUX_ASM_VGA_H_
|
|
+#ifndef _ASM_X86_VGA_H
|
|
+#define _ASM_X86_VGA_H
|
|
|
|
/*
|
|
* On the PC, we can just recalculate addresses and then
|
|
@@ -17,4 +17,4 @@
|
|
#define vga_readb(x) (*(x))
|
|
#define vga_writeb(x, y) (*(y) = (x))
|
|
|
|
-#endif
|
|
+#endif /* _ASM_X86_VGA_H */
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/xor.h 2011-01-31 17:56:27.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/xor.h 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -1,5 +1,5 @@
|
|
#ifdef CONFIG_X86_32
|
|
-# include "../../xor_32.h"
|
|
+# include "../../asm/xor_32.h"
|
|
#else
|
|
# include "xor_64.h"
|
|
#endif
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/xor_64.h 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/xor_64.h 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -1,5 +1,5 @@
|
|
-#ifndef ASM_X86__XOR_64_H
|
|
-#define ASM_X86__XOR_64_H
|
|
+#ifndef _ASM_X86_XOR_64_H
|
|
+#define _ASM_X86_XOR_64_H
|
|
|
|
/*
|
|
* x86-64 changes / gcc fixes from Andi Kleen.
|
|
@@ -334,4 +334,4 @@ do { \
|
|
deals with a load to a line that is being prefetched. */
|
|
#define XOR_SELECT_TEMPLATE(FASTEST) (&xor_block_sse)
|
|
|
|
-#endif /* ASM_X86__XOR_64_H */
|
|
+#endif /* _ASM_X86_XOR_64_H */
|
|
--- head-2011-03-17.orig/arch/x86/kernel/Makefile 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/Makefile 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -129,7 +129,7 @@ ifeq ($(CONFIG_X86_64),y)
|
|
time_64-$(CONFIG_XEN) += time_32.o
|
|
endif
|
|
|
|
-disabled-obj-$(CONFIG_XEN) := %_uv.o crash.o early-quirks.o hpet.o i8253.o \
|
|
- i8259.o irqinit_$(BITS).o pci-swiotlb_64.o reboot.o smpboot.o \
|
|
- tlb_$(BITS).o tsc.o tsc_sync.o vsmp_64.o
|
|
+disabled-obj-$(CONFIG_XEN) := %_uv.o crash.o early-quirks.o genx2apic_%.o \
|
|
+ hpet.o i8253.o i8259.o irqinit_$(BITS).o pci-swiotlb_64.o reboot.o \
|
|
+ smpboot.o tlb_$(BITS).o tsc.o tsc_sync.o uv_%.o vsmp_64.o
|
|
disabled-obj-$(CONFIG_XEN_UNPRIVILEGED_GUEST) += probe_roms_32.o
|
|
--- head-2011-03-17.orig/arch/x86/kernel/acpi/sleep-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/acpi/sleep-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -10,6 +10,7 @@
|
|
#include <linux/dmi.h>
|
|
#include <linux/cpumask.h>
|
|
#include <asm/segment.h>
|
|
+#include <asm/desc.h>
|
|
|
|
#include "realmode/wakeup.h"
|
|
#include "sleep.h"
|
|
@@ -22,7 +23,7 @@ unsigned long acpi_realmode_flags;
|
|
static unsigned long acpi_realmode;
|
|
|
|
#if defined(CONFIG_SMP) && defined(CONFIG_64BIT)
|
|
-static char temp_stack[10240];
|
|
+static char temp_stack[4096];
|
|
#endif
|
|
#endif
|
|
|
|
@@ -100,7 +101,9 @@ int acpi_save_state_mem(void)
|
|
#else /* CONFIG_64BIT */
|
|
header->trampoline_segment = setup_trampoline() >> 4;
|
|
#ifdef CONFIG_SMP
|
|
- stack_start.sp = temp_stack + 4096;
|
|
+ stack_start.sp = temp_stack + sizeof(temp_stack);
|
|
+ early_gdt_descr.address =
|
|
+ (unsigned long)get_cpu_gdt_table(smp_processor_id());
|
|
#endif
|
|
initial_code = (unsigned long)wakeup_long64;
|
|
saved_magic = 0x123456789abcdef0;
|
|
--- head-2011-03-17.orig/arch/x86/kernel/apic/apic-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/apic/apic-xen.c 2011-02-24 15:49:32.000000000 +0100
|
|
@@ -1,60 +1,13 @@
|
|
/*
|
|
- * Local APIC handling, local APIC timers
|
|
- *
|
|
- * (c) 1999, 2000 Ingo Molnar <mingo@redhat.com>
|
|
- *
|
|
- * Fixes
|
|
- * Maciej W. Rozycki : Bits for genuine 82489DX APICs;
|
|
- * thanks to Eric Gilmore
|
|
- * and Rolf G. Tews
|
|
- * for testing these extensively.
|
|
- * Maciej W. Rozycki : Various updates and fixes.
|
|
- * Mikael Pettersson : Power Management for UP-APIC.
|
|
- * Pavel Machek and
|
|
- * Mikael Pettersson : PM converted to driver model.
|
|
+ * Local APIC handling stubs
|
|
*/
|
|
|
|
#include <linux/init.h>
|
|
-
|
|
-#include <linux/mm.h>
|
|
-#include <linux/delay.h>
|
|
-#include <linux/bootmem.h>
|
|
#include <linux/interrupt.h>
|
|
-#include <linux/mc146818rtc.h>
|
|
-#include <linux/kernel_stat.h>
|
|
-#include <linux/sysdev.h>
|
|
-#include <linux/cpu.h>
|
|
-#include <linux/clockchips.h>
|
|
-#include <linux/acpi_pmtmr.h>
|
|
-#include <linux/module.h>
|
|
|
|
-#include <asm/atomic.h>
|
|
#include <asm/smp.h>
|
|
-#include <asm/mtrr.h>
|
|
-#include <asm/mpspec.h>
|
|
-#include <asm/desc.h>
|
|
-#include <asm/arch_hooks.h>
|
|
-#include <asm/hpet.h>
|
|
-#include <asm/i8253.h>
|
|
-#include <asm/nmi.h>
|
|
-
|
|
-#include <mach_apic.h>
|
|
-#include <mach_apicdef.h>
|
|
-#include <mach_ipi.h>
|
|
-
|
|
-#include "io_ports.h"
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
-/*
|
|
- * cpu_mask that denotes the CPUs that needs timer interrupt coming in as
|
|
- * IPIs in place of local APIC timers
|
|
- */
|
|
-static cpumask_t timer_bcast_ipi;
|
|
-#endif
|
|
-
|
|
-/*
|
|
- * Knob to control our willingness to enable the local APIC.
|
|
- */
|
|
+#include <asm/proto.h>
|
|
+#include <asm/apic.h>
|
|
|
|
/*
|
|
* Debug level, exported for io_apic.c
|
|
@@ -64,21 +17,29 @@ unsigned int apic_verbosity;
|
|
/* Have we found an MP table */
|
|
int smp_found_config;
|
|
|
|
-#ifndef CONFIG_XEN
|
|
-static int modern_apic(void)
|
|
+static int __init apic_set_verbosity(char *arg)
|
|
{
|
|
- /* AMD systems use old APIC versions, so check the CPU */
|
|
- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
|
|
- boot_cpu_data.x86 >= 0xf)
|
|
- return 1;
|
|
- return lapic_get_version() >= 0x14;
|
|
-}
|
|
-#endif /* !CONFIG_XEN */
|
|
+ if (!arg) {
|
|
+#ifdef CONFIG_X86_64
|
|
+ skip_ioapic_setup = 0;
|
|
+ return 0;
|
|
+#endif
|
|
+ return -EINVAL;
|
|
+ }
|
|
|
|
-int get_physical_broadcast(void)
|
|
-{
|
|
- return 0xff;
|
|
+ if (strcmp("debug", arg) == 0)
|
|
+ apic_verbosity = APIC_DEBUG;
|
|
+ else if (strcmp("verbose", arg) == 0)
|
|
+ apic_verbosity = APIC_VERBOSE;
|
|
+ else {
|
|
+ printk(KERN_WARNING "APIC Verbosity level %s not recognised"
|
|
+ " use apic=verbose or apic=debug\n", arg);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
}
|
|
+early_param("apic", apic_set_verbosity);
|
|
|
|
int setup_profiling_timer(unsigned int multiplier)
|
|
{
|
|
@@ -93,9 +54,12 @@ int setup_profiling_timer(unsigned int m
|
|
int __init APIC_init_uniprocessor(void)
|
|
{
|
|
#ifdef CONFIG_X86_IO_APIC
|
|
- if (smp_found_config)
|
|
- if (!skip_ioapic_setup && nr_ioapics)
|
|
- setup_IO_APIC();
|
|
+ if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
|
|
+ setup_IO_APIC();
|
|
+# ifdef CONFIG_X86_64
|
|
+ else
|
|
+ nr_ioapics = 0;
|
|
+# endif
|
|
#endif
|
|
|
|
return 0;
|
|
--- head-2011-03-17.orig/arch/x86/kernel/cpu/topology.c 2011-03-17 14:35:46.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/cpu/topology.c 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -28,7 +28,7 @@
|
|
*/
|
|
void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c)
|
|
{
|
|
-#ifdef CONFIG_SMP
|
|
+#if defined(CONFIG_SMP) && !defined(CONFIG_XEN)
|
|
unsigned int eax, ebx, ecx, edx, sub_index;
|
|
unsigned int ht_mask_width, core_plus_mask_width;
|
|
unsigned int core_select_mask, core_level_siblings;
|
|
--- head-2011-03-17.orig/arch/x86/kernel/cpu/common-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/cpu/common-xen.c 2011-03-17 14:40:32.000000000 +0100
|
|
@@ -1,33 +1,73 @@
|
|
#include <linux/init.h>
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/sched.h>
|
|
#include <linux/string.h>
|
|
+#include <linux/bootmem.h>
|
|
+#include <linux/bitops.h>
|
|
+#include <linux/module.h>
|
|
+#include <linux/kgdb.h>
|
|
+#include <linux/topology.h>
|
|
#include <linux/delay.h>
|
|
#include <linux/smp.h>
|
|
-#include <linux/module.h>
|
|
#include <linux/percpu.h>
|
|
-#include <linux/bootmem.h>
|
|
-#include <asm/processor.h>
|
|
#include <asm/i387.h>
|
|
#include <asm/msr.h>
|
|
#include <asm/io.h>
|
|
+#include <asm/linkage.h>
|
|
#include <asm/mmu_context.h>
|
|
#include <asm/mtrr.h>
|
|
#include <asm/mce.h>
|
|
#include <asm/pat.h>
|
|
#include <asm/asm.h>
|
|
+#include <asm/numa.h>
|
|
+#include <asm/smp.h>
|
|
#ifdef CONFIG_X86_LOCAL_APIC
|
|
#include <asm/mpspec.h>
|
|
#include <asm/apic.h>
|
|
#include <mach_apic.h>
|
|
-#else
|
|
+#include <asm/genapic.h>
|
|
+#elif defined(CONFIG_X86_64_XEN)
|
|
+#include <mach_apic.h>
|
|
+#endif
|
|
+
|
|
+#include <asm/pda.h>
|
|
+#include <asm/pgtable.h>
|
|
+#include <asm/processor.h>
|
|
+#include <asm/desc.h>
|
|
+#include <asm/atomic.h>
|
|
+#include <asm/proto.h>
|
|
+#include <asm/sections.h>
|
|
+#include <asm/setup.h>
|
|
+
|
|
#ifdef CONFIG_XEN
|
|
+#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_LOCAL_APIC)
|
|
#define phys_pkg_id(a,b) a
|
|
#endif
|
|
-#endif
|
|
#include <asm/hypervisor.h>
|
|
+#include <xen/interface/callback.h>
|
|
+#endif
|
|
|
|
#include "cpu.h"
|
|
|
|
+static struct cpu_dev *this_cpu __cpuinitdata;
|
|
+
|
|
+#ifdef CONFIG_X86_64
|
|
+/* We need valid kernel segments for data and code in long mode too
|
|
+ * IRET will check the segment types kkeil 2000/10/28
|
|
+ * Also sysret mandates a special GDT layout
|
|
+ */
|
|
+/* The TLS descriptors are currently at a different place compared to i386.
|
|
+ Hopefully nobody expects them at a fixed place (Wine?) */
|
|
DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
|
|
+ [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
|
|
+ [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
|
|
+ [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
|
|
+ [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
|
|
+ [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
|
|
+ [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
|
|
+} };
|
|
+#else
|
|
+DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
|
|
[GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } },
|
|
[GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } },
|
|
[GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } },
|
|
@@ -63,17 +103,171 @@ DEFINE_PER_CPU(struct gdt_page, gdt_page
|
|
#endif
|
|
[GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } },
|
|
} };
|
|
+#endif
|
|
EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
|
|
|
|
-__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
|
|
-
|
|
+#ifdef CONFIG_X86_32
|
|
static int cachesize_override __cpuinitdata = -1;
|
|
+
|
|
+static int __init cachesize_setup(char *str)
|
|
+{
|
|
+ get_option(&str, &cachesize_override);
|
|
+ return 1;
|
|
+}
|
|
+__setup("cachesize=", cachesize_setup);
|
|
+
|
|
+static int __init x86_fxsr_setup(char *s)
|
|
+{
|
|
+ setup_clear_cpu_cap(X86_FEATURE_FXSR);
|
|
+ setup_clear_cpu_cap(X86_FEATURE_XMM);
|
|
+ return 1;
|
|
+}
|
|
+__setup("nofxsr", x86_fxsr_setup);
|
|
+
|
|
+static int __init x86_sep_setup(char *s)
|
|
+{
|
|
+ setup_clear_cpu_cap(X86_FEATURE_SEP);
|
|
+ return 1;
|
|
+}
|
|
+__setup("nosep", x86_sep_setup);
|
|
+#endif
|
|
+
|
|
+#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
|
|
+/* Standard macro to see if a specific flag is changeable */
|
|
+static inline int flag_is_changeable_p(u32 flag)
|
|
+{
|
|
+ u32 f1, f2;
|
|
+
|
|
+ /*
|
|
+ * Cyrix and IDT cpus allow disabling of CPUID
|
|
+ * so the code below may return different results
|
|
+ * when it is executed before and after enabling
|
|
+ * the CPUID. Add "volatile" to not allow gcc to
|
|
+ * optimize the subsequent calls to this function.
|
|
+ */
|
|
+ asm volatile ("pushfl\n\t"
|
|
+ "pushfl\n\t"
|
|
+ "popl %0\n\t"
|
|
+ "movl %0,%1\n\t"
|
|
+ "xorl %2,%0\n\t"
|
|
+ "pushl %0\n\t"
|
|
+ "popfl\n\t"
|
|
+ "pushfl\n\t"
|
|
+ "popl %0\n\t"
|
|
+ "popfl\n\t"
|
|
+ : "=&r" (f1), "=&r" (f2)
|
|
+ : "ir" (flag));
|
|
+
|
|
+ return ((f1^f2) & flag) != 0;
|
|
+}
|
|
+
|
|
+/* Probe for the CPUID instruction */
|
|
+static int __cpuinit have_cpuid_p(void)
|
|
+{
|
|
+ return flag_is_changeable_p(X86_EFLAGS_ID);
|
|
+}
|
|
+
|
|
static int disable_x86_serial_nr __cpuinitdata = 1;
|
|
|
|
-struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
|
|
+static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
|
|
+{
|
|
+ if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) {
|
|
+ /* Disable processor serial number */
|
|
+ unsigned long lo, hi;
|
|
+ rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
|
|
+ lo |= 0x200000;
|
|
+ wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
|
|
+ printk(KERN_NOTICE "CPU serial number disabled.\n");
|
|
+ clear_cpu_cap(c, X86_FEATURE_PN);
|
|
+
|
|
+ /* Disabling the serial number may affect the cpuid level */
|
|
+ c->cpuid_level = cpuid_eax(0);
|
|
+ }
|
|
+}
|
|
+
|
|
+static int __init x86_serial_nr_setup(char *s)
|
|
+{
|
|
+ disable_x86_serial_nr = 0;
|
|
+ return 1;
|
|
+}
|
|
+__setup("serialnumber", x86_serial_nr_setup);
|
|
+#else
|
|
+static inline int flag_is_changeable_p(u32 flag)
|
|
+{
|
|
+ return 1;
|
|
+}
|
|
+/* Probe for the CPUID instruction */
|
|
+static inline int have_cpuid_p(void)
|
|
+{
|
|
+ return 1;
|
|
+}
|
|
+static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
|
|
+{
|
|
+}
|
|
+#endif
|
|
+
|
|
+/*
|
|
+ * Naming convention should be: <Name> [(<Codename>)]
|
|
+ * This table only is used unless init_<vendor>() below doesn't set it;
|
|
+ * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used
|
|
+ *
|
|
+ */
|
|
+
|
|
+/* Look up CPU names by table lookup. */
|
|
+static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c)
|
|
+{
|
|
+ struct cpu_model_info *info;
|
|
+
|
|
+ if (c->x86_model >= 16)
|
|
+ return NULL; /* Range check */
|
|
+
|
|
+ if (!this_cpu)
|
|
+ return NULL;
|
|
+
|
|
+ info = this_cpu->c_models;
|
|
+
|
|
+ while (info && info->family) {
|
|
+ if (info->family == c->x86)
|
|
+ return info->model_names[c->x86_model];
|
|
+ info++;
|
|
+ }
|
|
+ return NULL; /* Not found */
|
|
+}
|
|
+
|
|
+__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
|
|
+
|
|
+/* Current gdt points %fs at the "master" per-cpu area: after this,
|
|
+ * it's on the real one. */
|
|
+void switch_to_new_gdt(void)
|
|
+{
|
|
+ struct desc_ptr gdt_descr;
|
|
+ unsigned long va, frames[16];
|
|
+ int f;
|
|
+
|
|
+ gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
|
|
+ gdt_descr.size = GDT_SIZE - 1;
|
|
+
|
|
+ for (va = gdt_descr.address, f = 0;
|
|
+ va < gdt_descr.address + gdt_descr.size;
|
|
+ va += PAGE_SIZE, f++) {
|
|
+ frames[f] = virt_to_mfn(va);
|
|
+ make_lowmem_page_readonly(
|
|
+ (void *)va, XENFEAT_writable_descriptor_tables);
|
|
+ }
|
|
+ if (HYPERVISOR_set_gdt(frames, (gdt_descr.size + 1) / 8))
|
|
+ BUG();
|
|
+#ifdef CONFIG_X86_32
|
|
+ asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory");
|
|
+#endif
|
|
+}
|
|
+
|
|
+static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
|
|
|
|
static void __cpuinit default_init(struct cpuinfo_x86 *c)
|
|
{
|
|
+#ifdef CONFIG_X86_64
|
|
+ display_cacheinfo(c);
|
|
+#else
|
|
/* Not much we can do here... */
|
|
/* Check if at least it has cpuid */
|
|
if (c->cpuid_level == -1) {
|
|
@@ -83,28 +277,22 @@ static void __cpuinit default_init(struc
|
|
else if (c->x86 == 3)
|
|
strcpy(c->x86_model_id, "386");
|
|
}
|
|
+#endif
|
|
}
|
|
|
|
static struct cpu_dev __cpuinitdata default_cpu = {
|
|
.c_init = default_init,
|
|
.c_vendor = "Unknown",
|
|
+ .c_x86_vendor = X86_VENDOR_UNKNOWN,
|
|
};
|
|
-static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu;
|
|
-
|
|
-static int __init cachesize_setup(char *str)
|
|
-{
|
|
- get_option(&str, &cachesize_override);
|
|
- return 1;
|
|
-}
|
|
-__setup("cachesize=", cachesize_setup);
|
|
|
|
-int __cpuinit get_model_name(struct cpuinfo_x86 *c)
|
|
+static void __cpuinit get_model_name(struct cpuinfo_x86 *c)
|
|
{
|
|
unsigned int *v;
|
|
char *p, *q;
|
|
|
|
- if (cpuid_eax(0x80000000) < 0x80000004)
|
|
- return 0;
|
|
+ if (c->extended_cpuid_level < 0x80000004)
|
|
+ return;
|
|
|
|
v = (unsigned int *) c->x86_model_id;
|
|
cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
|
|
@@ -123,30 +311,34 @@ int __cpuinit get_model_name(struct cpui
|
|
while (q <= &c->x86_model_id[48])
|
|
*q++ = '\0'; /* Zero-pad the rest */
|
|
}
|
|
-
|
|
- return 1;
|
|
}
|
|
|
|
-
|
|
void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
|
|
{
|
|
- unsigned int n, dummy, ecx, edx, l2size;
|
|
+ unsigned int n, dummy, ebx, ecx, edx, l2size;
|
|
|
|
- n = cpuid_eax(0x80000000);
|
|
+ n = c->extended_cpuid_level;
|
|
|
|
if (n >= 0x80000005) {
|
|
- cpuid(0x80000005, &dummy, &dummy, &ecx, &edx);
|
|
+ cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
|
|
printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n",
|
|
- edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
|
|
- c->x86_cache_size = (ecx>>24)+(edx>>24);
|
|
+ edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
|
|
+ c->x86_cache_size = (ecx>>24) + (edx>>24);
|
|
+#ifdef CONFIG_X86_64
|
|
+ /* On K8 L1 TLB is inclusive, so don't count it */
|
|
+ c->x86_tlbsize = 0;
|
|
+#endif
|
|
}
|
|
|
|
if (n < 0x80000006) /* Some chips just has a large L1. */
|
|
return;
|
|
|
|
- ecx = cpuid_ecx(0x80000006);
|
|
+ cpuid(0x80000006, &dummy, &ebx, &ecx, &edx);
|
|
l2size = ecx >> 16;
|
|
|
|
+#ifdef CONFIG_X86_64
|
|
+ c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff);
|
|
+#else
|
|
/* do processor-specific cache resizing */
|
|
if (this_cpu->c_size_cache)
|
|
l2size = this_cpu->c_size_cache(c, l2size);
|
|
@@ -157,116 +349,106 @@ void __cpuinit display_cacheinfo(struct
|
|
|
|
if (l2size == 0)
|
|
return; /* Again, no L2 cache is possible */
|
|
+#endif
|
|
|
|
c->x86_cache_size = l2size;
|
|
|
|
printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
|
|
- l2size, ecx & 0xFF);
|
|
+ l2size, ecx & 0xFF);
|
|
}
|
|
|
|
-/*
|
|
- * Naming convention should be: <Name> [(<Codename>)]
|
|
- * This table only is used unless init_<vendor>() below doesn't set it;
|
|
- * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used
|
|
- *
|
|
- */
|
|
-
|
|
-/* Look up CPU names by table lookup. */
|
|
-static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c)
|
|
+void __cpuinit detect_ht(struct cpuinfo_x86 *c)
|
|
{
|
|
- struct cpu_model_info *info;
|
|
+#ifdef CONFIG_X86_HT
|
|
+ u32 eax, ebx, ecx, edx;
|
|
+ int index_msb, core_bits;
|
|
|
|
- if (c->x86_model >= 16)
|
|
- return NULL; /* Range check */
|
|
+ if (!cpu_has(c, X86_FEATURE_HT))
|
|
+ return;
|
|
|
|
- if (!this_cpu)
|
|
- return NULL;
|
|
+ if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
|
|
+ goto out;
|
|
|
|
- info = this_cpu->c_models;
|
|
+ if (cpu_has(c, X86_FEATURE_XTOPOLOGY))
|
|
+ return;
|
|
|
|
- while (info && info->family) {
|
|
- if (info->family == c->x86)
|
|
- return info->model_names[c->x86_model];
|
|
- info++;
|
|
+ cpuid(1, &eax, &ebx, &ecx, &edx);
|
|
+
|
|
+ smp_num_siblings = (ebx & 0xff0000) >> 16;
|
|
+
|
|
+ if (smp_num_siblings == 1) {
|
|
+ printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
|
|
+ } else if (smp_num_siblings > 1) {
|
|
+
|
|
+ if (smp_num_siblings > NR_CPUS) {
|
|
+ printk(KERN_WARNING "CPU: Unsupported number of siblings %d",
|
|
+ smp_num_siblings);
|
|
+ smp_num_siblings = 1;
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ index_msb = get_count_order(smp_num_siblings);
|
|
+#ifdef CONFIG_X86_64
|
|
+ c->phys_proc_id = phys_pkg_id(index_msb);
|
|
+#else
|
|
+ c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb);
|
|
+#endif
|
|
+
|
|
+ smp_num_siblings = smp_num_siblings / c->x86_max_cores;
|
|
+
|
|
+ index_msb = get_count_order(smp_num_siblings);
|
|
+
|
|
+ core_bits = get_count_order(c->x86_max_cores);
|
|
+
|
|
+#ifdef CONFIG_X86_64
|
|
+ c->cpu_core_id = phys_pkg_id(index_msb) &
|
|
+ ((1 << core_bits) - 1);
|
|
+#else
|
|
+ c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) &
|
|
+ ((1 << core_bits) - 1);
|
|
+#endif
|
|
}
|
|
- return NULL; /* Not found */
|
|
-}
|
|
|
|
+out:
|
|
+ if ((c->x86_max_cores * smp_num_siblings) > 1) {
|
|
+ printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
|
|
+ c->phys_proc_id);
|
|
+ printk(KERN_INFO "CPU: Processor Core ID: %d\n",
|
|
+ c->cpu_core_id);
|
|
+ }
|
|
+#endif
|
|
+}
|
|
|
|
-static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early)
|
|
+static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
|
|
{
|
|
char *v = c->x86_vendor_id;
|
|
int i;
|
|
static int printed;
|
|
|
|
for (i = 0; i < X86_VENDOR_NUM; i++) {
|
|
- if (cpu_devs[i]) {
|
|
- if (!strcmp(v, cpu_devs[i]->c_ident[0]) ||
|
|
- (cpu_devs[i]->c_ident[1] &&
|
|
- !strcmp(v, cpu_devs[i]->c_ident[1]))) {
|
|
- c->x86_vendor = i;
|
|
- if (!early)
|
|
- this_cpu = cpu_devs[i];
|
|
- return;
|
|
- }
|
|
+ if (!cpu_devs[i])
|
|
+ break;
|
|
+
|
|
+ if (!strcmp(v, cpu_devs[i]->c_ident[0]) ||
|
|
+ (cpu_devs[i]->c_ident[1] &&
|
|
+ !strcmp(v, cpu_devs[i]->c_ident[1]))) {
|
|
+ this_cpu = cpu_devs[i];
|
|
+ c->x86_vendor = this_cpu->c_x86_vendor;
|
|
+ return;
|
|
}
|
|
}
|
|
+
|
|
if (!printed) {
|
|
printed++;
|
|
- printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n");
|
|
+ printk(KERN_ERR "CPU: vendor_id '%s' unknown, using generic init.\n", v);
|
|
printk(KERN_ERR "CPU: Your system may be unstable.\n");
|
|
}
|
|
+
|
|
c->x86_vendor = X86_VENDOR_UNKNOWN;
|
|
this_cpu = &default_cpu;
|
|
}
|
|
|
|
-
|
|
-static int __init x86_fxsr_setup(char *s)
|
|
-{
|
|
- setup_clear_cpu_cap(X86_FEATURE_FXSR);
|
|
- setup_clear_cpu_cap(X86_FEATURE_XMM);
|
|
- return 1;
|
|
-}
|
|
-__setup("nofxsr", x86_fxsr_setup);
|
|
-
|
|
-
|
|
-static int __init x86_sep_setup(char *s)
|
|
-{
|
|
- setup_clear_cpu_cap(X86_FEATURE_SEP);
|
|
- return 1;
|
|
-}
|
|
-__setup("nosep", x86_sep_setup);
|
|
-
|
|
-
|
|
-/* Standard macro to see if a specific flag is changeable */
|
|
-static inline int flag_is_changeable_p(u32 flag)
|
|
-{
|
|
- u32 f1, f2;
|
|
-
|
|
- asm("pushfl\n\t"
|
|
- "pushfl\n\t"
|
|
- "popl %0\n\t"
|
|
- "movl %0,%1\n\t"
|
|
- "xorl %2,%0\n\t"
|
|
- "pushl %0\n\t"
|
|
- "popfl\n\t"
|
|
- "pushfl\n\t"
|
|
- "popl %0\n\t"
|
|
- "popfl\n\t"
|
|
- : "=&r" (f1), "=&r" (f2)
|
|
- : "ir" (flag));
|
|
-
|
|
- return ((f1^f2) & flag) != 0;
|
|
-}
|
|
-
|
|
-
|
|
-/* Probe for the CPUID instruction */
|
|
-static int __cpuinit have_cpuid_p(void)
|
|
-{
|
|
- return flag_is_changeable_p(X86_EFLAGS_ID);
|
|
-}
|
|
-
|
|
-void __init cpu_detect(struct cpuinfo_x86 *c)
|
|
+void __cpuinit cpu_detect(struct cpuinfo_x86 *c)
|
|
{
|
|
/* Get vendor name */
|
|
cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
|
|
@@ -275,48 +457,85 @@ void __init cpu_detect(struct cpuinfo_x8
|
|
(unsigned int *)&c->x86_vendor_id[4]);
|
|
|
|
c->x86 = 4;
|
|
+ /* Intel-defined flags: level 0x00000001 */
|
|
if (c->cpuid_level >= 0x00000001) {
|
|
u32 junk, tfms, cap0, misc;
|
|
cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
|
|
- c->x86 = (tfms >> 8) & 15;
|
|
- c->x86_model = (tfms >> 4) & 15;
|
|
+ c->x86 = (tfms >> 8) & 0xf;
|
|
+ c->x86_model = (tfms >> 4) & 0xf;
|
|
+ c->x86_mask = tfms & 0xf;
|
|
if (c->x86 == 0xf)
|
|
c->x86 += (tfms >> 20) & 0xff;
|
|
if (c->x86 >= 0x6)
|
|
- c->x86_model += ((tfms >> 16) & 0xF) << 4;
|
|
- c->x86_mask = tfms & 15;
|
|
+ c->x86_model += ((tfms >> 16) & 0xf) << 4;
|
|
if (cap0 & (1<<19)) {
|
|
- c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8;
|
|
c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
|
|
+ c->x86_cache_alignment = c->x86_clflush_size;
|
|
}
|
|
}
|
|
}
|
|
-static void __cpuinit early_get_cap(struct cpuinfo_x86 *c)
|
|
+
|
|
+static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c)
|
|
{
|
|
u32 tfms, xlvl;
|
|
- unsigned int ebx;
|
|
+ u32 ebx;
|
|
|
|
- memset(&c->x86_capability, 0, sizeof c->x86_capability);
|
|
- if (have_cpuid_p()) {
|
|
- /* Intel-defined flags: level 0x00000001 */
|
|
- if (c->cpuid_level >= 0x00000001) {
|
|
- u32 capability, excap;
|
|
- cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
|
|
- c->x86_capability[0] = capability;
|
|
- c->x86_capability[4] = excap;
|
|
- }
|
|
+ /* Intel-defined flags: level 0x00000001 */
|
|
+ if (c->cpuid_level >= 0x00000001) {
|
|
+ u32 capability, excap;
|
|
+ cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
|
|
+ c->x86_capability[0] = capability;
|
|
+ c->x86_capability[4] = excap;
|
|
+ }
|
|
|
|
- /* AMD-defined flags: level 0x80000001 */
|
|
- xlvl = cpuid_eax(0x80000000);
|
|
- if ((xlvl & 0xffff0000) == 0x80000000) {
|
|
- if (xlvl >= 0x80000001) {
|
|
- c->x86_capability[1] = cpuid_edx(0x80000001);
|
|
- c->x86_capability[6] = cpuid_ecx(0x80000001);
|
|
- }
|
|
+ /* AMD-defined flags: level 0x80000001 */
|
|
+ xlvl = cpuid_eax(0x80000000);
|
|
+ c->extended_cpuid_level = xlvl;
|
|
+ if ((xlvl & 0xffff0000) == 0x80000000) {
|
|
+ if (xlvl >= 0x80000001) {
|
|
+ c->x86_capability[1] = cpuid_edx(0x80000001);
|
|
+ c->x86_capability[6] = cpuid_ecx(0x80000001);
|
|
}
|
|
+ }
|
|
+
|
|
+#ifdef CONFIG_X86_64
|
|
+ if (c->extended_cpuid_level >= 0x80000008) {
|
|
+ u32 eax = cpuid_eax(0x80000008);
|
|
|
|
+ c->x86_virt_bits = (eax >> 8) & 0xff;
|
|
+ c->x86_phys_bits = eax & 0xff;
|
|
}
|
|
+#endif
|
|
+
|
|
+ if (c->extended_cpuid_level >= 0x80000007)
|
|
+ c->x86_power = cpuid_edx(0x80000007);
|
|
+
|
|
+}
|
|
+
|
|
+static void __cpuinit identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
|
|
+{
|
|
+#ifdef CONFIG_X86_32
|
|
+ int i;
|
|
|
|
+ /*
|
|
+ * First of all, decide if this is a 486 or higher
|
|
+ * It's a 486 if we can modify the AC flag
|
|
+ */
|
|
+ if (flag_is_changeable_p(X86_EFLAGS_AC))
|
|
+ c->x86 = 4;
|
|
+ else
|
|
+ c->x86 = 3;
|
|
+
|
|
+ for (i = 0; i < X86_VENDOR_NUM; i++)
|
|
+ if (cpu_devs[i] && cpu_devs[i]->c_identify) {
|
|
+ c->x86_vendor_id[0] = 0;
|
|
+ cpu_devs[i]->c_identify(c);
|
|
+ if (c->x86_vendor_id[0]) {
|
|
+ get_cpu_vendor(c);
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+#endif
|
|
}
|
|
|
|
/*
|
|
@@ -328,25 +547,65 @@ static void __cpuinit early_get_cap(stru
|
|
* WARNING: this function is only called on the BP. Don't add code here
|
|
* that is supposed to run on all CPUs.
|
|
*/
|
|
-static void __init early_cpu_detect(void)
|
|
+static void __init early_identify_cpu(struct cpuinfo_x86 *c)
|
|
{
|
|
- struct cpuinfo_x86 *c = &boot_cpu_data;
|
|
-
|
|
- c->x86_cache_alignment = 32;
|
|
+#ifdef CONFIG_X86_64
|
|
+ c->x86_clflush_size = 64;
|
|
+#else
|
|
c->x86_clflush_size = 32;
|
|
+#endif
|
|
+ c->x86_cache_alignment = c->x86_clflush_size;
|
|
+
|
|
+ memset(&c->x86_capability, 0, sizeof c->x86_capability);
|
|
+ c->extended_cpuid_level = 0;
|
|
+
|
|
+ if (!have_cpuid_p())
|
|
+ identify_cpu_without_cpuid(c);
|
|
|
|
+ /* cyrix could have cpuid enabled via c_identify()*/
|
|
if (!have_cpuid_p())
|
|
return;
|
|
|
|
cpu_detect(c);
|
|
|
|
- get_cpu_vendor(c, 1);
|
|
+ get_cpu_vendor(c);
|
|
+
|
|
+ get_cpu_cap(c);
|
|
+
|
|
+ if (this_cpu->c_early_init)
|
|
+ this_cpu->c_early_init(c);
|
|
+
|
|
+ validate_pat_support(c);
|
|
|
|
- early_get_cap(c);
|
|
+#ifdef CONFIG_SMP
|
|
+ c->cpu_index = boot_cpu_id;
|
|
+#endif
|
|
+}
|
|
+
|
|
+void __init early_cpu_init(void)
|
|
+{
|
|
+ struct cpu_dev **cdev;
|
|
+ int count = 0;
|
|
+
|
|
+ printk("KERNEL supported cpus:\n");
|
|
+ for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) {
|
|
+ struct cpu_dev *cpudev = *cdev;
|
|
+ unsigned int j;
|
|
+
|
|
+ if (count >= X86_VENDOR_NUM)
|
|
+ break;
|
|
+ cpu_devs[count] = cpudev;
|
|
+ count++;
|
|
+
|
|
+ for (j = 0; j < 2; j++) {
|
|
+ if (!cpudev->c_ident[j])
|
|
+ continue;
|
|
+ printk(" %s %s\n", cpudev->c_vendor,
|
|
+ cpudev->c_ident[j]);
|
|
+ }
|
|
+ }
|
|
|
|
- if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
|
|
- cpu_devs[c->x86_vendor]->c_early_init)
|
|
- cpu_devs[c->x86_vendor]->c_early_init(c);
|
|
+ early_identify_cpu(&boot_cpu_data);
|
|
}
|
|
|
|
/*
|
|
@@ -364,88 +623,43 @@ static void __cpuinit detect_nopl(struct
|
|
|
|
static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
|
|
{
|
|
- u32 tfms, xlvl;
|
|
- unsigned int ebx;
|
|
+ c->extended_cpuid_level = 0;
|
|
|
|
- if (have_cpuid_p()) {
|
|
- /* Get vendor name */
|
|
- cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
|
|
- (unsigned int *)&c->x86_vendor_id[0],
|
|
- (unsigned int *)&c->x86_vendor_id[8],
|
|
- (unsigned int *)&c->x86_vendor_id[4]);
|
|
-
|
|
- get_cpu_vendor(c, 0);
|
|
- /* Initialize the standard set of capabilities */
|
|
- /* Note that the vendor-specific code below might override */
|
|
- /* Intel-defined flags: level 0x00000001 */
|
|
- if (c->cpuid_level >= 0x00000001) {
|
|
- u32 capability, excap;
|
|
- cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
|
|
- c->x86_capability[0] = capability;
|
|
- c->x86_capability[4] = excap;
|
|
- c->x86 = (tfms >> 8) & 15;
|
|
- c->x86_model = (tfms >> 4) & 15;
|
|
- if (c->x86 == 0xf)
|
|
- c->x86 += (tfms >> 20) & 0xff;
|
|
- if (c->x86 >= 0x6)
|
|
- c->x86_model += ((tfms >> 16) & 0xF) << 4;
|
|
- c->x86_mask = tfms & 15;
|
|
-#ifndef CONFIG_XEN
|
|
- c->initial_apicid = (ebx >> 24) & 0xFF;
|
|
-#ifdef CONFIG_X86_HT
|
|
- c->apicid = phys_pkg_id(c->initial_apicid, 0);
|
|
- c->phys_proc_id = c->initial_apicid;
|
|
-#else
|
|
- c->apicid = c->initial_apicid;
|
|
-#endif
|
|
-#endif
|
|
- if (test_cpu_cap(c, X86_FEATURE_CLFLSH))
|
|
- c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8;
|
|
- } else {
|
|
- /* Have CPUID level 0 only - unheard of */
|
|
- c->x86 = 4;
|
|
- }
|
|
+ if (!have_cpuid_p())
|
|
+ identify_cpu_without_cpuid(c);
|
|
|
|
- /* AMD-defined flags: level 0x80000001 */
|
|
- xlvl = cpuid_eax(0x80000000);
|
|
- if ((xlvl & 0xffff0000) == 0x80000000) {
|
|
- if (xlvl >= 0x80000001) {
|
|
- c->x86_capability[1] = cpuid_edx(0x80000001);
|
|
- c->x86_capability[6] = cpuid_ecx(0x80000001);
|
|
- }
|
|
- if (xlvl >= 0x80000004)
|
|
- get_model_name(c); /* Default name */
|
|
- }
|
|
+ /* cyrix could have cpuid enabled via c_identify()*/
|
|
+ if (!have_cpuid_p())
|
|
+ return;
|
|
|
|
- init_scattered_cpuid_features(c);
|
|
- detect_nopl(c);
|
|
- }
|
|
-}
|
|
+ cpu_detect(c);
|
|
|
|
-static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
|
|
-{
|
|
- if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) {
|
|
- /* Disable processor serial number */
|
|
- unsigned long lo, hi;
|
|
- rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
|
|
- lo |= 0x200000;
|
|
- wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
|
|
- printk(KERN_NOTICE "CPU serial number disabled.\n");
|
|
- clear_cpu_cap(c, X86_FEATURE_PN);
|
|
+ get_cpu_vendor(c);
|
|
|
|
- /* Disabling the serial number may affect the cpuid level */
|
|
- c->cpuid_level = cpuid_eax(0);
|
|
- }
|
|
-}
|
|
+ get_cpu_cap(c);
|
|
|
|
-static int __init x86_serial_nr_setup(char *s)
|
|
-{
|
|
- disable_x86_serial_nr = 0;
|
|
- return 1;
|
|
-}
|
|
-__setup("serialnumber", x86_serial_nr_setup);
|
|
+#ifndef CONFIG_XEN
|
|
+ if (c->cpuid_level >= 0x00000001) {
|
|
+ c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF;
|
|
+#ifdef CONFIG_X86_32
|
|
+# ifdef CONFIG_X86_HT
|
|
+ c->apicid = phys_pkg_id(c->initial_apicid, 0);
|
|
+# else
|
|
+ c->apicid = c->initial_apicid;
|
|
+# endif
|
|
+#endif
|
|
+
|
|
+#ifdef CONFIG_X86_HT
|
|
+ c->phys_proc_id = c->initial_apicid;
|
|
+#endif
|
|
+ }
|
|
+#endif
|
|
|
|
+ get_model_name(c); /* Default name */
|
|
|
|
+ init_scattered_cpuid_features(c);
|
|
+ detect_nopl(c);
|
|
+}
|
|
|
|
/*
|
|
* This does the hard work of actually picking apart the CPU stuff...
|
|
@@ -457,34 +671,33 @@ static void __cpuinit identify_cpu(struc
|
|
c->loops_per_jiffy = loops_per_jiffy;
|
|
c->x86_cache_size = -1;
|
|
c->x86_vendor = X86_VENDOR_UNKNOWN;
|
|
- c->cpuid_level = -1; /* CPUID not detected */
|
|
c->x86_model = c->x86_mask = 0; /* So far unknown... */
|
|
c->x86_vendor_id[0] = '\0'; /* Unset */
|
|
c->x86_model_id[0] = '\0'; /* Unset */
|
|
#ifndef CONFIG_XEN
|
|
c->x86_max_cores = 1;
|
|
+ c->x86_coreid_bits = 0;
|
|
#endif
|
|
+#ifdef CONFIG_X86_64
|
|
+ c->x86_clflush_size = 64;
|
|
+#else
|
|
+ c->cpuid_level = -1; /* CPUID not detected */
|
|
c->x86_clflush_size = 32;
|
|
+#endif
|
|
+ c->x86_cache_alignment = c->x86_clflush_size;
|
|
memset(&c->x86_capability, 0, sizeof c->x86_capability);
|
|
if (boot_cpu_has(X86_FEATURE_SYSCALL32))
|
|
set_cpu_cap(c, X86_FEATURE_SYSCALL32);
|
|
|
|
- if (!have_cpuid_p()) {
|
|
- /*
|
|
- * First of all, decide if this is a 486 or higher
|
|
- * It's a 486 if we can modify the AC flag
|
|
- */
|
|
- if (flag_is_changeable_p(X86_EFLAGS_AC))
|
|
- c->x86 = 4;
|
|
- else
|
|
- c->x86 = 3;
|
|
- }
|
|
-
|
|
generic_identify(c);
|
|
|
|
if (this_cpu->c_identify)
|
|
this_cpu->c_identify(c);
|
|
|
|
+#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
|
|
+ c->apicid = phys_pkg_id(0);
|
|
+#endif
|
|
+
|
|
/*
|
|
* Vendor-specific initialization. In this section we
|
|
* canonicalize the feature flags, meaning if there are
|
|
@@ -518,6 +731,10 @@ static void __cpuinit identify_cpu(struc
|
|
c->x86, c->x86_model);
|
|
}
|
|
|
|
+#ifdef CONFIG_X86_64
|
|
+ detect_ht(c);
|
|
+#endif
|
|
+
|
|
/*
|
|
* On SMP, boot_cpu_data holds the common feature set between
|
|
* all CPUs; so make sure that we indicate which features are
|
|
@@ -526,7 +743,7 @@ static void __cpuinit identify_cpu(struc
|
|
*/
|
|
if (c != &boot_cpu_data) {
|
|
/* AND the already accumulated flags with these */
|
|
- for (i = 0 ; i < NCAPINTS ; i++)
|
|
+ for (i = 0; i < NCAPINTS; i++)
|
|
boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
|
|
}
|
|
|
|
@@ -534,72 +751,91 @@ static void __cpuinit identify_cpu(struc
|
|
for (i = 0; i < NCAPINTS; i++)
|
|
c->x86_capability[i] &= ~cleared_cpu_caps[i];
|
|
|
|
+#ifdef CONFIG_X86_MCE
|
|
/* Init Machine Check Exception if available. */
|
|
mcheck_init(c);
|
|
+#endif
|
|
|
|
select_idle_routine(c);
|
|
+
|
|
+#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
|
|
+ numa_add_cpu(smp_processor_id());
|
|
+#endif
|
|
+}
|
|
+
|
|
+#ifdef CONFIG_X86_64
|
|
+static void vgetcpu_set_mode(void)
|
|
+{
|
|
+ if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP))
|
|
+ vgetcpu_mode = VGETCPU_RDTSCP;
|
|
+ else
|
|
+ vgetcpu_mode = VGETCPU_LSL;
|
|
}
|
|
+#endif
|
|
|
|
void __init identify_boot_cpu(void)
|
|
{
|
|
identify_cpu(&boot_cpu_data);
|
|
+#ifdef CONFIG_X86_32
|
|
sysenter_setup();
|
|
enable_sep_cpu();
|
|
+#else
|
|
+ vgetcpu_set_mode();
|
|
+#endif
|
|
}
|
|
|
|
void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
|
|
{
|
|
BUG_ON(c == &boot_cpu_data);
|
|
identify_cpu(c);
|
|
+#ifdef CONFIG_X86_32
|
|
enable_sep_cpu();
|
|
+#endif
|
|
mtrr_ap_init();
|
|
}
|
|
|
|
-#ifdef CONFIG_X86_HT
|
|
-void __cpuinit detect_ht(struct cpuinfo_x86 *c)
|
|
-{
|
|
- u32 eax, ebx, ecx, edx;
|
|
- int index_msb, core_bits;
|
|
-
|
|
- cpuid(1, &eax, &ebx, &ecx, &edx);
|
|
-
|
|
- if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
|
|
- return;
|
|
+struct msr_range {
|
|
+ unsigned min;
|
|
+ unsigned max;
|
|
+};
|
|
|
|
- smp_num_siblings = (ebx & 0xff0000) >> 16;
|
|
+static struct msr_range msr_range_array[] __cpuinitdata = {
|
|
+ { 0x00000000, 0x00000418},
|
|
+ { 0xc0000000, 0xc000040b},
|
|
+ { 0xc0010000, 0xc0010142},
|
|
+ { 0xc0011000, 0xc001103b},
|
|
+};
|
|
|
|
- if (smp_num_siblings == 1) {
|
|
- printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
|
|
- } else if (smp_num_siblings > 1) {
|
|
+static void __cpuinit print_cpu_msr(void)
|
|
+{
|
|
+ unsigned index;
|
|
+ u64 val;
|
|
+ int i;
|
|
+ unsigned index_min, index_max;
|
|
|
|
- if (smp_num_siblings > NR_CPUS) {
|
|
- printk(KERN_WARNING "CPU: Unsupported number of the "
|
|
- "siblings %d", smp_num_siblings);
|
|
- smp_num_siblings = 1;
|
|
- return;
|
|
+ for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) {
|
|
+ index_min = msr_range_array[i].min;
|
|
+ index_max = msr_range_array[i].max;
|
|
+ for (index = index_min; index < index_max; index++) {
|
|
+ if (rdmsrl_amd_safe(index, &val))
|
|
+ continue;
|
|
+ printk(KERN_INFO " MSR%08x: %016llx\n", index, val);
|
|
}
|
|
+ }
|
|
+}
|
|
|
|
- index_msb = get_count_order(smp_num_siblings);
|
|
- c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb);
|
|
-
|
|
- printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
|
|
- c->phys_proc_id);
|
|
-
|
|
- smp_num_siblings = smp_num_siblings / c->x86_max_cores;
|
|
-
|
|
- index_msb = get_count_order(smp_num_siblings) ;
|
|
-
|
|
- core_bits = get_count_order(c->x86_max_cores);
|
|
+static int show_msr __cpuinitdata;
|
|
+static __init int setup_show_msr(char *arg)
|
|
+{
|
|
+ int num;
|
|
|
|
- c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) &
|
|
- ((1 << core_bits) - 1);
|
|
+ get_option(&arg, &num);
|
|
|
|
- if (c->x86_max_cores > 1)
|
|
- printk(KERN_INFO "CPU: Processor Core ID: %d\n",
|
|
- c->cpu_core_id);
|
|
- }
|
|
+ if (num > 0)
|
|
+ show_msr = num;
|
|
+ return 1;
|
|
}
|
|
-#endif
|
|
+__setup("show_msr=", setup_show_msr);
|
|
|
|
static __init int setup_noclflush(char *arg)
|
|
{
|
|
@@ -617,18 +853,26 @@ void __cpuinit print_cpu_info(struct cpu
|
|
else if (c->cpuid_level >= 0)
|
|
vendor = c->x86_vendor_id;
|
|
|
|
- if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor)))
|
|
- printk("%s ", vendor);
|
|
+ if (vendor && !strstr(c->x86_model_id, vendor))
|
|
+ printk(KERN_CONT "%s ", vendor);
|
|
|
|
- if (!c->x86_model_id[0])
|
|
- printk("%d86", c->x86);
|
|
+ if (c->x86_model_id[0])
|
|
+ printk(KERN_CONT "%s", c->x86_model_id);
|
|
else
|
|
- printk("%s", c->x86_model_id);
|
|
+ printk(KERN_CONT "%d86", c->x86);
|
|
|
|
if (c->x86_mask || c->cpuid_level >= 0)
|
|
- printk(" stepping %02x\n", c->x86_mask);
|
|
+ printk(KERN_CONT " stepping %02x\n", c->x86_mask);
|
|
else
|
|
- printk("\n");
|
|
+ printk(KERN_CONT "\n");
|
|
+
|
|
+#ifdef CONFIG_SMP
|
|
+ if (c->cpu_index < show_msr)
|
|
+ print_cpu_msr();
|
|
+#else
|
|
+ if (show_msr)
|
|
+ print_cpu_msr();
|
|
+#endif
|
|
}
|
|
|
|
static __init int setup_disablecpuid(char *arg)
|
|
@@ -644,19 +888,124 @@ __setup("clearcpuid=", setup_disablecpui
|
|
|
|
cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
|
|
|
|
-void __init early_cpu_init(void)
|
|
+#ifdef CONFIG_X86_64
|
|
+struct x8664_pda **_cpu_pda __read_mostly;
|
|
+EXPORT_SYMBOL(_cpu_pda);
|
|
+
|
|
+#ifndef CONFIG_X86_NO_IDT
|
|
+struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
|
|
+#endif
|
|
+
|
|
+char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
|
|
+
|
|
+static void __ref switch_pt(int cpu)
|
|
+{
|
|
+#ifdef CONFIG_XEN
|
|
+ if (cpu == 0)
|
|
+ xen_init_pt();
|
|
+ xen_pt_switch(__pa_symbol(init_level4_pgt));
|
|
+ xen_new_user_pt(__pa_symbol(__user_pgd(init_level4_pgt)));
|
|
+#endif
|
|
+}
|
|
+
|
|
+void __cpuinit pda_init(int cpu)
|
|
+{
|
|
+ struct x8664_pda *pda = cpu_pda(cpu);
|
|
+
|
|
+ /* Setup up data that may be needed in __get_free_pages early */
|
|
+ loadsegment(fs, 0);
|
|
+ loadsegment(gs, 0);
|
|
+#ifndef CONFIG_XEN
|
|
+ /* Memory clobbers used to order PDA accessed */
|
|
+ mb();
|
|
+ wrmsrl(MSR_GS_BASE, pda);
|
|
+ mb();
|
|
+#else
|
|
+ if (HYPERVISOR_set_segment_base(SEGBASE_GS_KERNEL,
|
|
+ (unsigned long)pda))
|
|
+ BUG();
|
|
+#endif
|
|
+
|
|
+ pda->cpunumber = cpu;
|
|
+ pda->irqcount = -1;
|
|
+ pda->kernelstack = (unsigned long)stack_thread_info() -
|
|
+ PDA_STACKOFFSET + THREAD_SIZE;
|
|
+ pda->active_mm = &init_mm;
|
|
+ pda->mmu_state = 0;
|
|
+
|
|
+ if (cpu == 0) {
|
|
+ /* others are initialized in smpboot.c */
|
|
+ pda->pcurrent = &init_task;
|
|
+ pda->irqstackptr = boot_cpu_stack;
|
|
+ pda->irqstackptr += IRQSTACKSIZE - 64;
|
|
+ } else {
|
|
+ if (!pda->irqstackptr) {
|
|
+ pda->irqstackptr = (char *)
|
|
+ __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
|
|
+ if (!pda->irqstackptr)
|
|
+ panic("cannot allocate irqstack for cpu %d",
|
|
+ cpu);
|
|
+ pda->irqstackptr += IRQSTACKSIZE - 64;
|
|
+ }
|
|
+
|
|
+ if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
|
|
+ pda->nodenumber = cpu_to_node(cpu);
|
|
+ }
|
|
+
|
|
+ switch_pt(cpu);
|
|
+}
|
|
+
|
|
+#ifndef CONFIG_X86_NO_TSS
|
|
+char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
|
|
+ DEBUG_STKSZ] __page_aligned_bss;
|
|
+#endif
|
|
+
|
|
+extern asmlinkage void ignore_sysret(void);
|
|
+
|
|
+void __cpuinit syscall_init(void)
|
|
{
|
|
- struct cpu_vendor_dev *cvdev;
|
|
+#ifndef CONFIG_XEN
|
|
+ /*
|
|
+ * LSTAR and STAR live in a bit strange symbiosis.
|
|
+ * They both write to the same internal register. STAR allows to
|
|
+ * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip.
|
|
+ */
|
|
+ wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32);
|
|
+ wrmsrl(MSR_LSTAR, system_call);
|
|
+ wrmsrl(MSR_CSTAR, ignore_sysret);
|
|
+#endif
|
|
|
|
- for (cvdev = __x86cpuvendor_start ;
|
|
- cvdev < __x86cpuvendor_end ;
|
|
- cvdev++)
|
|
- cpu_devs[cvdev->vendor] = cvdev->cpu_dev;
|
|
+#ifdef CONFIG_IA32_EMULATION
|
|
+ syscall32_cpu_init();
|
|
+#elif defined(CONFIG_XEN)
|
|
+ static const struct callback_register __cpuinitconst cstar = {
|
|
+ .type = CALLBACKTYPE_syscall32,
|
|
+ .address = (unsigned long)ignore_sysret
|
|
+ };
|
|
|
|
- early_cpu_detect();
|
|
- validate_pat_support(&boot_cpu_data);
|
|
+ if (HYPERVISOR_callback_op(CALLBACKOP_register, &cstar))
|
|
+ printk(KERN_WARNING "Unable to register CSTAR callback\n");
|
|
+#endif
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+ /* Flags to clear on syscall */
|
|
+ wrmsrl(MSR_SYSCALL_MASK,
|
|
+ X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL);
|
|
+#endif
|
|
}
|
|
|
|
+unsigned long kernel_eflags;
|
|
+
|
|
+#ifndef CONFIG_X86_NO_TSS
|
|
+/*
|
|
+ * Copies of the original ist values from the tss are only accessed during
|
|
+ * debugging, no special alignment required.
|
|
+ */
|
|
+DEFINE_PER_CPU(struct orig_ist, orig_ist);
|
|
+#endif
|
|
+
|
|
+#else
|
|
+
|
|
/* Make sure %fs is initialized properly in idle threads */
|
|
struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs)
|
|
{
|
|
@@ -664,36 +1013,154 @@ struct pt_regs * __cpuinit idle_regs(str
|
|
regs->fs = __KERNEL_PERCPU;
|
|
return regs;
|
|
}
|
|
+#endif
|
|
|
|
-/* Current gdt points %fs at the "master" per-cpu area: after this,
|
|
- * it's on the real one. */
|
|
-void switch_to_new_gdt(void)
|
|
+/*
|
|
+ * cpu_init() initializes state that is per-CPU. Some data is already
|
|
+ * initialized (naturally) in the bootstrap process, such as the GDT
|
|
+ * and IDT. We reload them nevertheless, this function acts as a
|
|
+ * 'CPU state barrier', nothing should get across.
|
|
+ * A lot of state is already set up in PDA init for 64 bit
|
|
+ */
|
|
+#ifdef CONFIG_X86_64
|
|
+void __cpuinit cpu_init(void)
|
|
{
|
|
- struct desc_ptr gdt_descr;
|
|
- unsigned long va, frames[16];
|
|
- int f;
|
|
+ int cpu = stack_smp_processor_id();
|
|
+#ifndef CONFIG_X86_NO_TSS
|
|
+ struct tss_struct *t = &per_cpu(init_tss, cpu);
|
|
+ struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
|
|
+ unsigned long v;
|
|
+ char *estacks = NULL;
|
|
+ int i;
|
|
+#endif
|
|
+ struct task_struct *me;
|
|
|
|
- gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
|
|
- gdt_descr.size = GDT_SIZE - 1;
|
|
+ /* CPU 0 is initialised in head64.c */
|
|
+ if (cpu != 0)
|
|
+ pda_init(cpu);
|
|
+#ifndef CONFIG_X86_NO_TSS
|
|
+ else
|
|
+ estacks = boot_exception_stacks;
|
|
+#endif
|
|
|
|
- for (va = gdt_descr.address, f = 0;
|
|
- va < gdt_descr.address + gdt_descr.size;
|
|
- va += PAGE_SIZE, f++) {
|
|
- frames[f] = virt_to_mfn(va);
|
|
- make_lowmem_page_readonly(
|
|
- (void *)va, XENFEAT_writable_descriptor_tables);
|
|
+ me = current;
|
|
+
|
|
+ if (cpu_test_and_set(cpu, cpu_initialized))
|
|
+ panic("CPU#%d already initialized!\n", cpu);
|
|
+
|
|
+ printk(KERN_INFO "Initializing CPU#%d\n", cpu);
|
|
+
|
|
+ clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
|
|
+
|
|
+ /*
|
|
+ * Initialize the per-CPU GDT with the boot GDT,
|
|
+ * and set up the GDT descriptor:
|
|
+ */
|
|
+
|
|
+ switch_to_new_gdt();
|
|
+#ifndef CONFIG_X86_NO_IDT
|
|
+ load_idt((const struct desc_ptr *)&idt_descr);
|
|
+#endif
|
|
+
|
|
+ memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
|
|
+ syscall_init();
|
|
+
|
|
+ wrmsrl(MSR_FS_BASE, 0);
|
|
+ wrmsrl(MSR_KERNEL_GS_BASE, 0);
|
|
+ barrier();
|
|
+
|
|
+ check_efer();
|
|
+#ifndef CONFIG_XEN
|
|
+ if (cpu != 0 && x2apic)
|
|
+ enable_x2apic();
|
|
+#endif
|
|
+
|
|
+#ifndef CONFIG_X86_NO_TSS
|
|
+ /*
|
|
+ * set up and load the per-CPU TSS
|
|
+ */
|
|
+ if (!orig_ist->ist[0]) {
|
|
+ static const unsigned int order[N_EXCEPTION_STACKS] = {
|
|
+ [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
|
|
+ [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
|
|
+ };
|
|
+ for (v = 0; v < N_EXCEPTION_STACKS; v++) {
|
|
+ if (cpu) {
|
|
+ estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
|
|
+ if (!estacks)
|
|
+ panic("Cannot allocate exception "
|
|
+ "stack %ld %d\n", v, cpu);
|
|
+ }
|
|
+ estacks += PAGE_SIZE << order[v];
|
|
+ orig_ist->ist[v] = t->x86_tss.ist[v] =
|
|
+ (unsigned long)estacks;
|
|
+ }
|
|
}
|
|
- if (HYPERVISOR_set_gdt(frames, (gdt_descr.size + 1) / 8))
|
|
+
|
|
+ t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
|
|
+ /*
|
|
+ * <= is required because the CPU will access up to
|
|
+ * 8 bits beyond the end of the IO permission bitmap.
|
|
+ */
|
|
+ for (i = 0; i <= IO_BITMAP_LONGS; i++)
|
|
+ t->io_bitmap[i] = ~0UL;
|
|
+#endif
|
|
+
|
|
+ atomic_inc(&init_mm.mm_count);
|
|
+ me->active_mm = &init_mm;
|
|
+ if (me->mm)
|
|
BUG();
|
|
- asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory");
|
|
+ enter_lazy_tlb(&init_mm, me);
|
|
+
|
|
+ load_sp0(t, ¤t->thread);
|
|
+#ifndef CONFIG_X86_NO_TSS
|
|
+ set_tss_desc(cpu, t);
|
|
+ load_TR_desc();
|
|
+#endif
|
|
+ load_LDT(&init_mm.context);
|
|
+
|
|
+#ifdef CONFIG_KGDB
|
|
+ /*
|
|
+ * If the kgdb is connected no debug regs should be altered. This
|
|
+ * is only applicable when KGDB and a KGDB I/O module are built
|
|
+ * into the kernel and you are using early debugging with
|
|
+ * kgdbwait. KGDB will control the kernel HW breakpoint registers.
|
|
+ */
|
|
+ if (kgdb_connected && arch_kgdb_ops.correct_hw_break)
|
|
+ arch_kgdb_ops.correct_hw_break();
|
|
+ else {
|
|
+#endif
|
|
+ /*
|
|
+ * Clear all 6 debug registers:
|
|
+ */
|
|
+
|
|
+ set_debugreg(0UL, 0);
|
|
+ set_debugreg(0UL, 1);
|
|
+ set_debugreg(0UL, 2);
|
|
+ set_debugreg(0UL, 3);
|
|
+ set_debugreg(0UL, 6);
|
|
+ set_debugreg(0UL, 7);
|
|
+#ifdef CONFIG_KGDB
|
|
+ /* If the kgdb is connected no debug regs should be altered. */
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ fpu_init();
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+ raw_local_save_flags(kernel_eflags);
|
|
+#else
|
|
+ asm ("pushfq; popq %0" : "=rm" (kernel_eflags));
|
|
+ if (raw_irqs_disabled())
|
|
+ kernel_eflags &= ~X86_EFLAGS_IF;
|
|
+#endif
|
|
+
|
|
+ if (is_uv_system())
|
|
+ uv_cpu_init();
|
|
}
|
|
|
|
-/*
|
|
- * cpu_init() initializes state that is per-CPU. Some data is already
|
|
- * initialized (naturally) in the bootstrap process, such as the GDT
|
|
- * and IDT. We reload them nevertheless, this function acts as a
|
|
- * 'CPU state barrier', nothing should get across.
|
|
- */
|
|
+#else
|
|
+
|
|
void __cpuinit cpu_init(void)
|
|
{
|
|
int cpu = smp_processor_id();
|
|
@@ -747,19 +1214,21 @@ void __cpuinit cpu_init(void)
|
|
/*
|
|
* Force FPU initialization:
|
|
*/
|
|
- current_thread_info()->status = 0;
|
|
+ if (cpu_has_xsave)
|
|
+ current_thread_info()->status = TS_XSAVE;
|
|
+ else
|
|
+ current_thread_info()->status = 0;
|
|
clear_used_math();
|
|
mxcsr_feature_mask_init();
|
|
-}
|
|
|
|
-#if defined(CONFIG_HOTPLUG_CPU) && !defined(CONFIG_XEN)
|
|
-void __cpuinit cpu_uninit(void)
|
|
-{
|
|
- int cpu = raw_smp_processor_id();
|
|
- cpu_clear(cpu, cpu_initialized);
|
|
+ /*
|
|
+ * Boot processor to setup the FP and extended state context info.
|
|
+ */
|
|
+ if (smp_processor_id() == boot_cpu_id)
|
|
+ init_thread_xstate();
|
|
|
|
- /* lazy TLB state */
|
|
- per_cpu(cpu_tlbstate, cpu).state = 0;
|
|
- per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
|
|
+ xsave_init();
|
|
}
|
|
+
|
|
+
|
|
#endif
|
|
--- head-2011-03-17.orig/arch/x86/kernel/cpu/common_64-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
+++ /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
@@ -1,777 +0,0 @@
|
|
-#include <linux/init.h>
|
|
-#include <linux/kernel.h>
|
|
-#include <linux/sched.h>
|
|
-#include <linux/string.h>
|
|
-#include <linux/bootmem.h>
|
|
-#include <linux/bitops.h>
|
|
-#include <linux/module.h>
|
|
-#include <linux/kgdb.h>
|
|
-#include <linux/topology.h>
|
|
-#include <linux/delay.h>
|
|
-#include <linux/smp.h>
|
|
-#include <linux/percpu.h>
|
|
-#include <asm/i387.h>
|
|
-#include <asm/msr.h>
|
|
-#include <asm/io.h>
|
|
-#include <asm/linkage.h>
|
|
-#include <asm/mmu_context.h>
|
|
-#include <asm/mtrr.h>
|
|
-#include <asm/mce.h>
|
|
-#include <asm/pat.h>
|
|
-#include <asm/asm.h>
|
|
-#include <asm/numa.h>
|
|
-#ifdef CONFIG_X86_LOCAL_APIC
|
|
-#include <asm/mpspec.h>
|
|
-#include <asm/apic.h>
|
|
-#include <mach_apic.h>
|
|
-#elif defined(CONFIG_XEN)
|
|
-#include <mach_apic.h>
|
|
-#endif
|
|
-#include <asm/pda.h>
|
|
-#include <asm/pgtable.h>
|
|
-#include <asm/processor.h>
|
|
-#include <asm/desc.h>
|
|
-#include <asm/atomic.h>
|
|
-#include <asm/proto.h>
|
|
-#include <asm/sections.h>
|
|
-#include <asm/setup.h>
|
|
-#include <asm/genapic.h>
|
|
-
|
|
-#include "cpu.h"
|
|
-
|
|
-/* We need valid kernel segments for data and code in long mode too
|
|
- * IRET will check the segment types kkeil 2000/10/28
|
|
- * Also sysret mandates a special GDT layout
|
|
- */
|
|
-/* The TLS descriptors are currently at a different place compared to i386.
|
|
- Hopefully nobody expects them at a fixed place (Wine?) */
|
|
-DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
|
|
- [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
|
|
- [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
|
|
- [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
|
|
- [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
|
|
- [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
|
|
- [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
|
|
-} };
|
|
-EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
|
|
-
|
|
-__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
|
|
-
|
|
-/* Current gdt points %fs at the "master" per-cpu area: after this,
|
|
- * it's on the real one. */
|
|
-void switch_to_new_gdt(void)
|
|
-{
|
|
-#ifndef CONFIG_XEN
|
|
- struct desc_ptr gdt_descr;
|
|
-
|
|
- gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
|
|
- gdt_descr.size = GDT_SIZE - 1;
|
|
- load_gdt(&gdt_descr);
|
|
-#else
|
|
- void *va, *gdt_addr = get_cpu_gdt_table(smp_processor_id());
|
|
- unsigned long frames[16];
|
|
- unsigned int f = 0;
|
|
-
|
|
- for (va = gdt_addr; va < gdt_addr + GDT_SIZE; va += PAGE_SIZE) {
|
|
- frames[f++] = virt_to_mfn(va);
|
|
- make_page_readonly(va, XENFEAT_writable_descriptor_tables);
|
|
- }
|
|
- if (HYPERVISOR_set_gdt(frames, GDT_SIZE / sizeof(struct desc_struct)))
|
|
- BUG();
|
|
-#endif
|
|
-}
|
|
-
|
|
-struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
|
|
-
|
|
-static void __cpuinit default_init(struct cpuinfo_x86 *c)
|
|
-{
|
|
- display_cacheinfo(c);
|
|
-}
|
|
-
|
|
-static struct cpu_dev __cpuinitdata default_cpu = {
|
|
- .c_init = default_init,
|
|
- .c_vendor = "Unknown",
|
|
-};
|
|
-static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu;
|
|
-
|
|
-int __cpuinit get_model_name(struct cpuinfo_x86 *c)
|
|
-{
|
|
- unsigned int *v;
|
|
-
|
|
- if (c->extended_cpuid_level < 0x80000004)
|
|
- return 0;
|
|
-
|
|
- v = (unsigned int *) c->x86_model_id;
|
|
- cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
|
|
- cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
|
|
- cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
|
|
- c->x86_model_id[48] = 0;
|
|
- return 1;
|
|
-}
|
|
-
|
|
-
|
|
-void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
|
|
-{
|
|
- unsigned int n, dummy, ebx, ecx, edx;
|
|
-
|
|
- n = c->extended_cpuid_level;
|
|
-
|
|
- if (n >= 0x80000005) {
|
|
- cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
|
|
- printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), "
|
|
- "D cache %dK (%d bytes/line)\n",
|
|
- edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
|
|
- c->x86_cache_size = (ecx>>24) + (edx>>24);
|
|
- /* On K8 L1 TLB is inclusive, so don't count it */
|
|
- c->x86_tlbsize = 0;
|
|
- }
|
|
-
|
|
- if (n >= 0x80000006) {
|
|
- cpuid(0x80000006, &dummy, &ebx, &ecx, &edx);
|
|
- ecx = cpuid_ecx(0x80000006);
|
|
- c->x86_cache_size = ecx >> 16;
|
|
- c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff);
|
|
-
|
|
- printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
|
|
- c->x86_cache_size, ecx & 0xFF);
|
|
- }
|
|
-}
|
|
-
|
|
-void __cpuinit detect_ht(struct cpuinfo_x86 *c)
|
|
-{
|
|
-#if defined(CONFIG_SMP) && !defined(CONFIG_XEN)
|
|
- u32 eax, ebx, ecx, edx;
|
|
- int index_msb, core_bits;
|
|
-
|
|
- cpuid(1, &eax, &ebx, &ecx, &edx);
|
|
-
|
|
-
|
|
- if (!cpu_has(c, X86_FEATURE_HT))
|
|
- return;
|
|
- if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
|
|
- goto out;
|
|
-
|
|
- smp_num_siblings = (ebx & 0xff0000) >> 16;
|
|
-
|
|
- if (smp_num_siblings == 1) {
|
|
- printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
|
|
- } else if (smp_num_siblings > 1) {
|
|
-
|
|
- if (smp_num_siblings > NR_CPUS) {
|
|
- printk(KERN_WARNING "CPU: Unsupported number of "
|
|
- "siblings %d", smp_num_siblings);
|
|
- smp_num_siblings = 1;
|
|
- return;
|
|
- }
|
|
-
|
|
- index_msb = get_count_order(smp_num_siblings);
|
|
- c->phys_proc_id = phys_pkg_id(index_msb);
|
|
-
|
|
- smp_num_siblings = smp_num_siblings / c->x86_max_cores;
|
|
-
|
|
- index_msb = get_count_order(smp_num_siblings);
|
|
-
|
|
- core_bits = get_count_order(c->x86_max_cores);
|
|
-
|
|
- c->cpu_core_id = phys_pkg_id(index_msb) &
|
|
- ((1 << core_bits) - 1);
|
|
- }
|
|
-out:
|
|
- if ((c->x86_max_cores * smp_num_siblings) > 1) {
|
|
- printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
|
|
- c->phys_proc_id);
|
|
- printk(KERN_INFO "CPU: Processor Core ID: %d\n",
|
|
- c->cpu_core_id);
|
|
- }
|
|
-
|
|
-#endif
|
|
-}
|
|
-
|
|
-static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
|
|
-{
|
|
- char *v = c->x86_vendor_id;
|
|
- int i;
|
|
- static int printed;
|
|
-
|
|
- for (i = 0; i < X86_VENDOR_NUM; i++) {
|
|
- if (cpu_devs[i]) {
|
|
- if (!strcmp(v, cpu_devs[i]->c_ident[0]) ||
|
|
- (cpu_devs[i]->c_ident[1] &&
|
|
- !strcmp(v, cpu_devs[i]->c_ident[1]))) {
|
|
- c->x86_vendor = i;
|
|
- this_cpu = cpu_devs[i];
|
|
- return;
|
|
- }
|
|
- }
|
|
- }
|
|
- if (!printed) {
|
|
- printed++;
|
|
- printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n");
|
|
- printk(KERN_ERR "CPU: Your system may be unstable.\n");
|
|
- }
|
|
- c->x86_vendor = X86_VENDOR_UNKNOWN;
|
|
-}
|
|
-
|
|
-static void __init early_cpu_support_print(void)
|
|
-{
|
|
- int i,j;
|
|
- struct cpu_dev *cpu_devx;
|
|
-
|
|
- printk("KERNEL supported cpus:\n");
|
|
- for (i = 0; i < X86_VENDOR_NUM; i++) {
|
|
- cpu_devx = cpu_devs[i];
|
|
- if (!cpu_devx)
|
|
- continue;
|
|
- for (j = 0; j < 2; j++) {
|
|
- if (!cpu_devx->c_ident[j])
|
|
- continue;
|
|
- printk(" %s %s\n", cpu_devx->c_vendor,
|
|
- cpu_devx->c_ident[j]);
|
|
- }
|
|
- }
|
|
-}
|
|
-
|
|
-/*
|
|
- * The NOPL instruction is supposed to exist on all CPUs with
|
|
- * family >= 6, unfortunately, that's not true in practice because
|
|
- * of early VIA chips and (more importantly) broken virtualizers that
|
|
- * are not easy to detect. Hence, probe for it based on first
|
|
- * principles.
|
|
- *
|
|
- * Note: no 64-bit chip is known to lack these, but put the code here
|
|
- * for consistency with 32 bits, and to make it utterly trivial to
|
|
- * diagnose the problem should it ever surface.
|
|
- */
|
|
-static void __cpuinit detect_nopl(struct cpuinfo_x86 *c)
|
|
-{
|
|
- const u32 nopl_signature = 0x888c53b1; /* Random number */
|
|
- u32 has_nopl = nopl_signature;
|
|
-
|
|
- clear_cpu_cap(c, X86_FEATURE_NOPL);
|
|
- if (c->x86 >= 6) {
|
|
- asm volatile("\n"
|
|
- "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */
|
|
- "2:\n"
|
|
- " .section .fixup,\"ax\"\n"
|
|
- "3: xor %0,%0\n"
|
|
- " jmp 2b\n"
|
|
- " .previous\n"
|
|
- _ASM_EXTABLE(1b,3b)
|
|
- : "+a" (has_nopl));
|
|
-
|
|
- if (has_nopl == nopl_signature)
|
|
- set_cpu_cap(c, X86_FEATURE_NOPL);
|
|
- }
|
|
-}
|
|
-
|
|
-static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c);
|
|
-
|
|
-void __init early_cpu_init(void)
|
|
-{
|
|
- struct cpu_vendor_dev *cvdev;
|
|
-
|
|
- for (cvdev = __x86cpuvendor_start ;
|
|
- cvdev < __x86cpuvendor_end ;
|
|
- cvdev++)
|
|
- cpu_devs[cvdev->vendor] = cvdev->cpu_dev;
|
|
- early_cpu_support_print();
|
|
- early_identify_cpu(&boot_cpu_data);
|
|
-}
|
|
-
|
|
-/* Do some early cpuid on the boot CPU to get some parameter that are
|
|
- needed before check_bugs. Everything advanced is in identify_cpu
|
|
- below. */
|
|
-static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
|
|
-{
|
|
- u32 tfms, xlvl;
|
|
-
|
|
- c->loops_per_jiffy = loops_per_jiffy;
|
|
- c->x86_cache_size = -1;
|
|
- c->x86_vendor = X86_VENDOR_UNKNOWN;
|
|
- c->x86_model = c->x86_mask = 0; /* So far unknown... */
|
|
- c->x86_vendor_id[0] = '\0'; /* Unset */
|
|
- c->x86_model_id[0] = '\0'; /* Unset */
|
|
- c->x86_clflush_size = 64;
|
|
- c->x86_cache_alignment = c->x86_clflush_size;
|
|
-#ifndef CONFIG_XEN
|
|
- c->x86_max_cores = 1;
|
|
- c->x86_coreid_bits = 0;
|
|
-#endif
|
|
- c->extended_cpuid_level = 0;
|
|
- memset(&c->x86_capability, 0, sizeof c->x86_capability);
|
|
-
|
|
- /* Get vendor name */
|
|
- cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
|
|
- (unsigned int *)&c->x86_vendor_id[0],
|
|
- (unsigned int *)&c->x86_vendor_id[8],
|
|
- (unsigned int *)&c->x86_vendor_id[4]);
|
|
-
|
|
- get_cpu_vendor(c);
|
|
-
|
|
- /* Initialize the standard set of capabilities */
|
|
- /* Note that the vendor-specific code below might override */
|
|
-
|
|
- /* Intel-defined flags: level 0x00000001 */
|
|
- if (c->cpuid_level >= 0x00000001) {
|
|
- __u32 misc;
|
|
- cpuid(0x00000001, &tfms, &misc, &c->x86_capability[4],
|
|
- &c->x86_capability[0]);
|
|
- c->x86 = (tfms >> 8) & 0xf;
|
|
- c->x86_model = (tfms >> 4) & 0xf;
|
|
- c->x86_mask = tfms & 0xf;
|
|
- if (c->x86 == 0xf)
|
|
- c->x86 += (tfms >> 20) & 0xff;
|
|
- if (c->x86 >= 0x6)
|
|
- c->x86_model += ((tfms >> 16) & 0xF) << 4;
|
|
- if (test_cpu_cap(c, X86_FEATURE_CLFLSH))
|
|
- c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
|
|
- } else {
|
|
- /* Have CPUID level 0 only - unheard of */
|
|
- c->x86 = 4;
|
|
- }
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
- c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff;
|
|
-#ifdef CONFIG_SMP
|
|
- c->phys_proc_id = c->initial_apicid;
|
|
-#endif
|
|
-#endif
|
|
- /* AMD-defined flags: level 0x80000001 */
|
|
- xlvl = cpuid_eax(0x80000000);
|
|
- c->extended_cpuid_level = xlvl;
|
|
- if ((xlvl & 0xffff0000) == 0x80000000) {
|
|
- if (xlvl >= 0x80000001) {
|
|
- c->x86_capability[1] = cpuid_edx(0x80000001);
|
|
- c->x86_capability[6] = cpuid_ecx(0x80000001);
|
|
- }
|
|
- if (xlvl >= 0x80000004)
|
|
- get_model_name(c); /* Default name */
|
|
- }
|
|
-
|
|
- /* Transmeta-defined flags: level 0x80860001 */
|
|
- xlvl = cpuid_eax(0x80860000);
|
|
- if ((xlvl & 0xffff0000) == 0x80860000) {
|
|
- /* Don't set x86_cpuid_level here for now to not confuse. */
|
|
- if (xlvl >= 0x80860001)
|
|
- c->x86_capability[2] = cpuid_edx(0x80860001);
|
|
- }
|
|
-
|
|
- if (c->extended_cpuid_level >= 0x80000007)
|
|
- c->x86_power = cpuid_edx(0x80000007);
|
|
-
|
|
- if (c->extended_cpuid_level >= 0x80000008) {
|
|
- u32 eax = cpuid_eax(0x80000008);
|
|
-
|
|
- c->x86_virt_bits = (eax >> 8) & 0xff;
|
|
- c->x86_phys_bits = eax & 0xff;
|
|
- }
|
|
-
|
|
- detect_nopl(c);
|
|
-
|
|
- if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
|
|
- cpu_devs[c->x86_vendor]->c_early_init)
|
|
- cpu_devs[c->x86_vendor]->c_early_init(c);
|
|
-
|
|
- validate_pat_support(c);
|
|
-}
|
|
-
|
|
-/*
|
|
- * This does the hard work of actually picking apart the CPU stuff...
|
|
- */
|
|
-static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
|
|
-{
|
|
- int i;
|
|
-
|
|
- early_identify_cpu(c);
|
|
-
|
|
- init_scattered_cpuid_features(c);
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
- c->apicid = phys_pkg_id(0);
|
|
-#endif
|
|
-
|
|
- /*
|
|
- * Vendor-specific initialization. In this section we
|
|
- * canonicalize the feature flags, meaning if there are
|
|
- * features a certain CPU supports which CPUID doesn't
|
|
- * tell us, CPUID claiming incorrect flags, or other bugs,
|
|
- * we handle them here.
|
|
- *
|
|
- * At the end of this section, c->x86_capability better
|
|
- * indicate the features this CPU genuinely supports!
|
|
- */
|
|
- if (this_cpu->c_init)
|
|
- this_cpu->c_init(c);
|
|
-
|
|
- detect_ht(c);
|
|
-
|
|
- /*
|
|
- * On SMP, boot_cpu_data holds the common feature set between
|
|
- * all CPUs; so make sure that we indicate which features are
|
|
- * common between the CPUs. The first time this routine gets
|
|
- * executed, c == &boot_cpu_data.
|
|
- */
|
|
- if (c != &boot_cpu_data) {
|
|
- /* AND the already accumulated flags with these */
|
|
- for (i = 0; i < NCAPINTS; i++)
|
|
- boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
|
|
- }
|
|
-
|
|
- /* Clear all flags overriden by options */
|
|
- for (i = 0; i < NCAPINTS; i++)
|
|
- c->x86_capability[i] &= ~cleared_cpu_caps[i];
|
|
-
|
|
-#ifdef CONFIG_X86_MCE
|
|
- mcheck_init(c);
|
|
-#endif
|
|
- select_idle_routine(c);
|
|
-
|
|
-#ifdef CONFIG_NUMA
|
|
- numa_add_cpu(smp_processor_id());
|
|
-#endif
|
|
-
|
|
-}
|
|
-
|
|
-void __cpuinit identify_boot_cpu(void)
|
|
-{
|
|
- identify_cpu(&boot_cpu_data);
|
|
-}
|
|
-
|
|
-void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
|
|
-{
|
|
- BUG_ON(c == &boot_cpu_data);
|
|
- identify_cpu(c);
|
|
- mtrr_ap_init();
|
|
-}
|
|
-
|
|
-static __init int setup_noclflush(char *arg)
|
|
-{
|
|
- setup_clear_cpu_cap(X86_FEATURE_CLFLSH);
|
|
- return 1;
|
|
-}
|
|
-__setup("noclflush", setup_noclflush);
|
|
-
|
|
-void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
|
|
-{
|
|
- if (c->x86_model_id[0])
|
|
- printk(KERN_CONT "%s", c->x86_model_id);
|
|
-
|
|
- if (c->x86_mask || c->cpuid_level >= 0)
|
|
- printk(KERN_CONT " stepping %02x\n", c->x86_mask);
|
|
- else
|
|
- printk(KERN_CONT "\n");
|
|
-}
|
|
-
|
|
-static __init int setup_disablecpuid(char *arg)
|
|
-{
|
|
- int bit;
|
|
- if (get_option(&arg, &bit) && bit < NCAPINTS*32)
|
|
- setup_clear_cpu_cap(bit);
|
|
- else
|
|
- return 0;
|
|
- return 1;
|
|
-}
|
|
-__setup("clearcpuid=", setup_disablecpuid);
|
|
-
|
|
-cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
|
|
-
|
|
-struct x8664_pda **_cpu_pda __read_mostly;
|
|
-EXPORT_SYMBOL(_cpu_pda);
|
|
-
|
|
-#ifndef CONFIG_X86_NO_IDT
|
|
-struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
|
|
-#endif
|
|
-
|
|
-char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
|
|
-
|
|
-unsigned long __supported_pte_mask __read_mostly = ~0UL;
|
|
-EXPORT_SYMBOL_GPL(__supported_pte_mask);
|
|
-
|
|
-static int do_not_nx __cpuinitdata;
|
|
-
|
|
-/* noexec=on|off
|
|
-Control non executable mappings for 64bit processes.
|
|
-
|
|
-on Enable(default)
|
|
-off Disable
|
|
-*/
|
|
-static int __init nonx_setup(char *str)
|
|
-{
|
|
- if (!str)
|
|
- return -EINVAL;
|
|
- if (!strncmp(str, "on", 2)) {
|
|
- __supported_pte_mask |= _PAGE_NX;
|
|
- do_not_nx = 0;
|
|
- } else if (!strncmp(str, "off", 3)) {
|
|
- do_not_nx = 1;
|
|
- __supported_pte_mask &= ~_PAGE_NX;
|
|
- }
|
|
- return 0;
|
|
-}
|
|
-early_param("noexec", nonx_setup);
|
|
-
|
|
-int force_personality32;
|
|
-
|
|
-/* noexec32=on|off
|
|
-Control non executable heap for 32bit processes.
|
|
-To control the stack too use noexec=off
|
|
-
|
|
-on PROT_READ does not imply PROT_EXEC for 32bit processes (default)
|
|
-off PROT_READ implies PROT_EXEC
|
|
-*/
|
|
-static int __init nonx32_setup(char *str)
|
|
-{
|
|
- if (!strcmp(str, "on"))
|
|
- force_personality32 &= ~READ_IMPLIES_EXEC;
|
|
- else if (!strcmp(str, "off"))
|
|
- force_personality32 |= READ_IMPLIES_EXEC;
|
|
- return 1;
|
|
-}
|
|
-__setup("noexec32=", nonx32_setup);
|
|
-
|
|
-static void __init_refok switch_pt(int cpu)
|
|
-{
|
|
-#ifdef CONFIG_XEN
|
|
- if (cpu == 0)
|
|
- xen_init_pt();
|
|
- xen_pt_switch(__pa_symbol(init_level4_pgt));
|
|
- xen_new_user_pt(__pa_symbol(__user_pgd(init_level4_pgt)));
|
|
-#endif
|
|
-}
|
|
-
|
|
-void pda_init(int cpu)
|
|
-{
|
|
- struct x8664_pda *pda = cpu_pda(cpu);
|
|
-
|
|
- /* Setup up data that may be needed in __get_free_pages early */
|
|
- loadsegment(fs, 0);
|
|
- loadsegment(gs, 0);
|
|
-#ifndef CONFIG_XEN
|
|
- /* Memory clobbers used to order PDA accessed */
|
|
- mb();
|
|
- wrmsrl(MSR_GS_BASE, pda);
|
|
- mb();
|
|
-#else
|
|
- if (HYPERVISOR_set_segment_base(SEGBASE_GS_KERNEL,
|
|
- (unsigned long)pda))
|
|
- BUG();
|
|
-#endif
|
|
-
|
|
- pda->cpunumber = cpu;
|
|
- pda->irqcount = -1;
|
|
- pda->kernelstack = (unsigned long)stack_thread_info() -
|
|
- PDA_STACKOFFSET + THREAD_SIZE;
|
|
- pda->active_mm = &init_mm;
|
|
- pda->mmu_state = 0;
|
|
-
|
|
- if (cpu == 0) {
|
|
- /* others are initialized in smpboot.c */
|
|
- pda->pcurrent = &init_task;
|
|
- pda->irqstackptr = boot_cpu_stack;
|
|
- pda->irqstackptr += IRQSTACKSIZE - 64;
|
|
- } else {
|
|
- if (!pda->irqstackptr) {
|
|
- pda->irqstackptr = (char *)
|
|
- __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
|
|
- if (!pda->irqstackptr)
|
|
- panic("cannot allocate irqstack for cpu %d",
|
|
- cpu);
|
|
- pda->irqstackptr += IRQSTACKSIZE - 64;
|
|
- }
|
|
-
|
|
- if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
|
|
- pda->nodenumber = cpu_to_node(cpu);
|
|
- }
|
|
-
|
|
- switch_pt(cpu);
|
|
-}
|
|
-
|
|
-#ifndef CONFIG_X86_NO_TSS
|
|
-char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
|
|
- DEBUG_STKSZ] __page_aligned_bss;
|
|
-#endif
|
|
-
|
|
-extern asmlinkage void ignore_sysret(void);
|
|
-
|
|
-void __cpuinit syscall_init(void)
|
|
-{
|
|
-#ifndef CONFIG_XEN
|
|
- /*
|
|
- * LSTAR and STAR live in a bit strange symbiosis.
|
|
- * They both write to the same internal register. STAR allows to
|
|
- * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip.
|
|
- */
|
|
- wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32);
|
|
- wrmsrl(MSR_LSTAR, system_call);
|
|
- wrmsrl(MSR_CSTAR, ignore_sysret);
|
|
-
|
|
- /* Flags to clear on syscall */
|
|
- wrmsrl(MSR_SYSCALL_MASK,
|
|
- X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL);
|
|
-#endif
|
|
-#ifdef CONFIG_IA32_EMULATION
|
|
- syscall32_cpu_init();
|
|
-#else
|
|
- static const struct callback_register __cpuinitconst cstar = {
|
|
- .type = CALLBACKTYPE_syscall32,
|
|
- .address = (unsigned long)ignore_sysret
|
|
- };
|
|
-
|
|
- if (HYPERVISOR_callback_op(CALLBACKOP_register, &cstar))
|
|
- printk(KERN_WARNING "Unable to register CSTAR callback\n");
|
|
-#endif
|
|
-}
|
|
-
|
|
-void __cpuinit check_efer(void)
|
|
-{
|
|
- unsigned long efer;
|
|
-
|
|
- rdmsrl(MSR_EFER, efer);
|
|
- if (!(efer & EFER_NX) || do_not_nx)
|
|
- __supported_pte_mask &= ~_PAGE_NX;
|
|
-}
|
|
-
|
|
-unsigned long kernel_eflags;
|
|
-
|
|
-#ifndef CONFIG_X86_NO_TSS
|
|
-/*
|
|
- * Copies of the original ist values from the tss are only accessed during
|
|
- * debugging, no special alignment required.
|
|
- */
|
|
-DEFINE_PER_CPU(struct orig_ist, orig_ist);
|
|
-#endif
|
|
-
|
|
-/*
|
|
- * cpu_init() initializes state that is per-CPU. Some data is already
|
|
- * initialized (naturally) in the bootstrap process, such as the GDT
|
|
- * and IDT. We reload them nevertheless, this function acts as a
|
|
- * 'CPU state barrier', nothing should get across.
|
|
- * A lot of state is already set up in PDA init.
|
|
- */
|
|
-void __cpuinit cpu_init(void)
|
|
-{
|
|
- int cpu = stack_smp_processor_id();
|
|
-#ifndef CONFIG_X86_NO_TSS
|
|
- struct tss_struct *t = &per_cpu(init_tss, cpu);
|
|
- struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
|
|
- unsigned long v;
|
|
- char *estacks = NULL;
|
|
- int i;
|
|
-#endif
|
|
- struct task_struct *me;
|
|
-
|
|
- /* CPU 0 is initialised in head64.c */
|
|
- if (cpu != 0)
|
|
- pda_init(cpu);
|
|
-#ifndef CONFIG_X86_NO_TSS
|
|
- else
|
|
- estacks = boot_exception_stacks;
|
|
-#endif
|
|
-
|
|
- me = current;
|
|
-
|
|
- if (cpu_test_and_set(cpu, cpu_initialized))
|
|
- panic("CPU#%d already initialized!\n", cpu);
|
|
-
|
|
- printk(KERN_INFO "Initializing CPU#%d\n", cpu);
|
|
-
|
|
- clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
|
|
-
|
|
- /*
|
|
- * Initialize the per-CPU GDT with the boot GDT,
|
|
- * and set up the GDT descriptor:
|
|
- */
|
|
-
|
|
- switch_to_new_gdt();
|
|
-#ifndef CONFIG_X86_NO_IDT
|
|
- load_idt((const struct desc_ptr *)&idt_descr);
|
|
-#endif
|
|
-
|
|
- memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
|
|
- syscall_init();
|
|
-
|
|
- wrmsrl(MSR_FS_BASE, 0);
|
|
- wrmsrl(MSR_KERNEL_GS_BASE, 0);
|
|
- barrier();
|
|
-
|
|
- check_efer();
|
|
-
|
|
-#ifndef CONFIG_X86_NO_TSS
|
|
- /*
|
|
- * set up and load the per-CPU TSS
|
|
- */
|
|
- if (!orig_ist->ist[0]) {
|
|
- static const unsigned int order[N_EXCEPTION_STACKS] = {
|
|
- [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
|
|
- [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
|
|
- };
|
|
- for (v = 0; v < N_EXCEPTION_STACKS; v++) {
|
|
- if (cpu) {
|
|
- estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
|
|
- if (!estacks)
|
|
- panic("Cannot allocate exception "
|
|
- "stack %ld %d\n", v, cpu);
|
|
- }
|
|
- estacks += PAGE_SIZE << order[v];
|
|
- orig_ist->ist[v] = t->x86_tss.ist[v] =
|
|
- (unsigned long)estacks;
|
|
- }
|
|
- }
|
|
-
|
|
- t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
|
|
- /*
|
|
- * <= is required because the CPU will access up to
|
|
- * 8 bits beyond the end of the IO permission bitmap.
|
|
- */
|
|
- for (i = 0; i <= IO_BITMAP_LONGS; i++)
|
|
- t->io_bitmap[i] = ~0UL;
|
|
-#endif
|
|
-
|
|
- atomic_inc(&init_mm.mm_count);
|
|
- me->active_mm = &init_mm;
|
|
- if (me->mm)
|
|
- BUG();
|
|
- enter_lazy_tlb(&init_mm, me);
|
|
-
|
|
- load_sp0(t, ¤t->thread);
|
|
-#ifndef CONFIG_X86_NO_TSS
|
|
- set_tss_desc(cpu, t);
|
|
- load_TR_desc();
|
|
-#endif
|
|
- load_LDT(&init_mm.context);
|
|
-
|
|
-#ifdef CONFIG_KGDB
|
|
- /*
|
|
- * If the kgdb is connected no debug regs should be altered. This
|
|
- * is only applicable when KGDB and a KGDB I/O module are built
|
|
- * into the kernel and you are using early debugging with
|
|
- * kgdbwait. KGDB will control the kernel HW breakpoint registers.
|
|
- */
|
|
- if (kgdb_connected && arch_kgdb_ops.correct_hw_break)
|
|
- arch_kgdb_ops.correct_hw_break();
|
|
- else {
|
|
-#endif
|
|
- /*
|
|
- * Clear all 6 debug registers:
|
|
- */
|
|
-
|
|
- set_debugreg(0UL, 0);
|
|
- set_debugreg(0UL, 1);
|
|
- set_debugreg(0UL, 2);
|
|
- set_debugreg(0UL, 3);
|
|
- set_debugreg(0UL, 6);
|
|
- set_debugreg(0UL, 7);
|
|
-#ifdef CONFIG_KGDB
|
|
- /* If the kgdb is connected no debug regs should be altered. */
|
|
- }
|
|
-#endif
|
|
-
|
|
- fpu_init();
|
|
-
|
|
- asm ("pushfq; popq %0" : "=rm" (kernel_eflags));
|
|
- if (raw_irqs_disabled())
|
|
- kernel_eflags &= ~X86_EFLAGS_IF;
|
|
-
|
|
- if (is_uv_system())
|
|
- uv_cpu_init();
|
|
-}
|
|
--- head-2011-03-17.orig/arch/x86/kernel/dumpstack_64.c 2011-03-17 14:35:46.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/dumpstack_64.c 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -21,6 +21,7 @@
|
|
#define N_EXCEPTION_STACKS_END \
|
|
(N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2)
|
|
|
|
+#ifndef CONFIG_X86_NO_TSS
|
|
static char x86_stack_ids[][8] = {
|
|
[ DEBUG_STACK-1 ] = "#DB",
|
|
[ NMI_STACK-1 ] = "NMI",
|
|
@@ -32,10 +33,12 @@ static char x86_stack_ids[][8] = {
|
|
N_EXCEPTION_STACKS_END ] = "#DB[?]"
|
|
#endif
|
|
};
|
|
+#endif
|
|
|
|
static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
|
|
unsigned *usedp, char **idp)
|
|
{
|
|
+#ifndef CONFIG_X86_NO_TSS
|
|
unsigned k;
|
|
|
|
/*
|
|
@@ -95,6 +98,7 @@ static unsigned long *in_exception_stack
|
|
}
|
|
#endif
|
|
}
|
|
+#endif /* CONFIG_X86_NO_TSS */
|
|
return NULL;
|
|
}
|
|
|
|
--- head-2011-03-17.orig/arch/x86/kernel/e820-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/e820-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -167,6 +167,9 @@ static void __init _e820_print_map(const
|
|
case E820_NVS:
|
|
printk(KERN_CONT "(ACPI NVS)\n");
|
|
break;
|
|
+ case E820_UNUSABLE:
|
|
+ printk("(unusable)\n");
|
|
+ break;
|
|
default:
|
|
printk(KERN_CONT "type %u\n", e820->map[i].type);
|
|
break;
|
|
@@ -1399,6 +1402,7 @@ static inline const char *e820_type_to_s
|
|
case E820_RAM: return "System RAM";
|
|
case E820_ACPI: return "ACPI Tables";
|
|
case E820_NVS: return "ACPI Non-volatile Storage";
|
|
+ case E820_UNUSABLE: return "Unusable memory";
|
|
default: return "reserved";
|
|
}
|
|
}
|
|
@@ -1410,6 +1414,7 @@ static inline const char *e820_type_to_s
|
|
/*
|
|
* Mark e820 reserved areas as busy for the resource manager.
|
|
*/
|
|
+static struct resource __initdata *e820_res;
|
|
void __init e820_reserve_resources(void)
|
|
{
|
|
int i;
|
|
@@ -1417,20 +1422,28 @@ void __init e820_reserve_resources(void)
|
|
u64 end;
|
|
|
|
res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map);
|
|
+ e820_res = res;
|
|
for (i = 0; i < e820.nr_map; i++) {
|
|
end = e820.map[i].addr + e820.map[i].size - 1;
|
|
-#ifndef CONFIG_RESOURCES_64BIT
|
|
- if (end > 0x100000000ULL) {
|
|
+ if (end != (resource_size_t)end) {
|
|
res++;
|
|
continue;
|
|
}
|
|
-#endif
|
|
res->name = e820_type_to_string(e820.map[i].type);
|
|
res->start = e820.map[i].addr;
|
|
res->end = end;
|
|
|
|
- res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
|
|
- insert_resource(&iomem_resource, res);
|
|
+ res->flags = IORESOURCE_MEM;
|
|
+
|
|
+ /*
|
|
+ * don't register the region that could be conflicted with
|
|
+ * pci device BAR resource and insert them later in
|
|
+ * pcibios_resource_survey()
|
|
+ */
|
|
+ if (e820.map[i].type != E820_RESERVED || res->start < (1ULL<<20)) {
|
|
+ res->flags |= IORESOURCE_BUSY;
|
|
+ insert_resource(&iomem_resource, res);
|
|
+ }
|
|
res++;
|
|
}
|
|
|
|
@@ -1442,6 +1455,19 @@ void __init e820_reserve_resources(void)
|
|
}
|
|
}
|
|
|
|
+void __init e820_reserve_resources_late(void)
|
|
+{
|
|
+ int i;
|
|
+ struct resource *res;
|
|
+
|
|
+ res = e820_res;
|
|
+ for (i = 0; i < e820.nr_map; i++) {
|
|
+ if (!res->parent && res->end)
|
|
+ insert_resource_expand_to_fit(&iomem_resource, res);
|
|
+ res++;
|
|
+ }
|
|
+}
|
|
+
|
|
#undef e820
|
|
|
|
#ifndef CONFIG_XEN
|
|
--- head-2011-03-17.orig/arch/x86/kernel/early_printk-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/early_printk-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -3,10 +3,18 @@
|
|
#include <linux/init.h>
|
|
#include <linux/string.h>
|
|
#include <linux/screen_info.h>
|
|
+#include <linux/usb/ch9.h>
|
|
+#include <linux/pci_regs.h>
|
|
+#include <linux/pci_ids.h>
|
|
+#include <linux/errno.h>
|
|
#include <asm/io.h>
|
|
#include <asm/processor.h>
|
|
#include <asm/fcntl.h>
|
|
#include <asm/setup.h>
|
|
+#include <asm/pci-direct.h>
|
|
+#include <asm/pgtable.h>
|
|
+#include <asm/fixmap.h>
|
|
+#include <linux/usb/ehci_def.h>
|
|
|
|
#ifndef CONFIG_XEN
|
|
/* Simple VGA output */
|
|
@@ -78,6 +86,7 @@ static int early_serial_base = 0x3f8; /
|
|
static int early_serial_putc(unsigned char ch)
|
|
{
|
|
unsigned timeout = 0xffff;
|
|
+
|
|
while ((inb(early_serial_base + LSR) & XMTRDY) == 0 && --timeout)
|
|
cpu_relax();
|
|
outb(ch, early_serial_base + TXR);
|
|
@@ -111,7 +120,7 @@ static __init void early_serial_init(cha
|
|
if (!strncmp(s, "0x", 2)) {
|
|
early_serial_base = simple_strtoul(s, &e, 16);
|
|
} else {
|
|
- static int bases[] = { 0x3f8, 0x2f8 };
|
|
+ static const int __initconst bases[] = { 0x3f8, 0x2f8 };
|
|
|
|
if (!strncmp(s, "ttyS", 4))
|
|
s += 4;
|
|
@@ -180,6 +189,721 @@ static struct console early_serial_conso
|
|
.index = -1,
|
|
};
|
|
|
|
+#ifdef CONFIG_EARLY_PRINTK_DBGP
|
|
+
|
|
+static struct ehci_caps __iomem *ehci_caps;
|
|
+static struct ehci_regs __iomem *ehci_regs;
|
|
+static struct ehci_dbg_port __iomem *ehci_debug;
|
|
+static unsigned int dbgp_endpoint_out;
|
|
+
|
|
+struct ehci_dev {
|
|
+ u32 bus;
|
|
+ u32 slot;
|
|
+ u32 func;
|
|
+};
|
|
+
|
|
+static struct ehci_dev ehci_dev;
|
|
+
|
|
+#define USB_DEBUG_DEVNUM 127
|
|
+
|
|
+#define DBGP_DATA_TOGGLE 0x8800
|
|
+
|
|
+static inline u32 dbgp_pid_update(u32 x, u32 tok)
|
|
+{
|
|
+ return ((x ^ DBGP_DATA_TOGGLE) & 0xffff00) | (tok & 0xff);
|
|
+}
|
|
+
|
|
+static inline u32 dbgp_len_update(u32 x, u32 len)
|
|
+{
|
|
+ return (x & ~0x0f) | (len & 0x0f);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * USB Packet IDs (PIDs)
|
|
+ */
|
|
+
|
|
+/* token */
|
|
+#define USB_PID_OUT 0xe1
|
|
+#define USB_PID_IN 0x69
|
|
+#define USB_PID_SOF 0xa5
|
|
+#define USB_PID_SETUP 0x2d
|
|
+/* handshake */
|
|
+#define USB_PID_ACK 0xd2
|
|
+#define USB_PID_NAK 0x5a
|
|
+#define USB_PID_STALL 0x1e
|
|
+#define USB_PID_NYET 0x96
|
|
+/* data */
|
|
+#define USB_PID_DATA0 0xc3
|
|
+#define USB_PID_DATA1 0x4b
|
|
+#define USB_PID_DATA2 0x87
|
|
+#define USB_PID_MDATA 0x0f
|
|
+/* Special */
|
|
+#define USB_PID_PREAMBLE 0x3c
|
|
+#define USB_PID_ERR 0x3c
|
|
+#define USB_PID_SPLIT 0x78
|
|
+#define USB_PID_PING 0xb4
|
|
+#define USB_PID_UNDEF_0 0xf0
|
|
+
|
|
+#define USB_PID_DATA_TOGGLE 0x88
|
|
+#define DBGP_CLAIM (DBGP_OWNER | DBGP_ENABLED | DBGP_INUSE)
|
|
+
|
|
+#define PCI_CAP_ID_EHCI_DEBUG 0xa
|
|
+
|
|
+#define HUB_ROOT_RESET_TIME 50 /* times are in msec */
|
|
+#define HUB_SHORT_RESET_TIME 10
|
|
+#define HUB_LONG_RESET_TIME 200
|
|
+#define HUB_RESET_TIMEOUT 500
|
|
+
|
|
+#define DBGP_MAX_PACKET 8
|
|
+
|
|
+static int dbgp_wait_until_complete(void)
|
|
+{
|
|
+ u32 ctrl;
|
|
+ int loop = 0x100000;
|
|
+
|
|
+ do {
|
|
+ ctrl = readl(&ehci_debug->control);
|
|
+ /* Stop when the transaction is finished */
|
|
+ if (ctrl & DBGP_DONE)
|
|
+ break;
|
|
+ } while (--loop > 0);
|
|
+
|
|
+ if (!loop)
|
|
+ return -1;
|
|
+
|
|
+ /*
|
|
+ * Now that we have observed the completed transaction,
|
|
+ * clear the done bit.
|
|
+ */
|
|
+ writel(ctrl | DBGP_DONE, &ehci_debug->control);
|
|
+ return (ctrl & DBGP_ERROR) ? -DBGP_ERRCODE(ctrl) : DBGP_LEN(ctrl);
|
|
+}
|
|
+
|
|
+static void dbgp_mdelay(int ms)
|
|
+{
|
|
+ int i;
|
|
+
|
|
+ while (ms--) {
|
|
+ for (i = 0; i < 1000; i++)
|
|
+ outb(0x1, 0x80);
|
|
+ }
|
|
+}
|
|
+
|
|
+static void dbgp_breath(void)
|
|
+{
|
|
+ /* Sleep to give the debug port a chance to breathe */
|
|
+}
|
|
+
|
|
+static int dbgp_wait_until_done(unsigned ctrl)
|
|
+{
|
|
+ u32 pids, lpid;
|
|
+ int ret;
|
|
+ int loop = 3;
|
|
+
|
|
+retry:
|
|
+ writel(ctrl | DBGP_GO, &ehci_debug->control);
|
|
+ ret = dbgp_wait_until_complete();
|
|
+ pids = readl(&ehci_debug->pids);
|
|
+ lpid = DBGP_PID_GET(pids);
|
|
+
|
|
+ if (ret < 0)
|
|
+ return ret;
|
|
+
|
|
+ /*
|
|
+ * If the port is getting full or it has dropped data
|
|
+ * start pacing ourselves, not necessary but it's friendly.
|
|
+ */
|
|
+ if ((lpid == USB_PID_NAK) || (lpid == USB_PID_NYET))
|
|
+ dbgp_breath();
|
|
+
|
|
+ /* If I get a NACK reissue the transmission */
|
|
+ if (lpid == USB_PID_NAK) {
|
|
+ if (--loop > 0)
|
|
+ goto retry;
|
|
+ }
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static void dbgp_set_data(const void *buf, int size)
|
|
+{
|
|
+ const unsigned char *bytes = buf;
|
|
+ u32 lo, hi;
|
|
+ int i;
|
|
+
|
|
+ lo = hi = 0;
|
|
+ for (i = 0; i < 4 && i < size; i++)
|
|
+ lo |= bytes[i] << (8*i);
|
|
+ for (; i < 8 && i < size; i++)
|
|
+ hi |= bytes[i] << (8*(i - 4));
|
|
+ writel(lo, &ehci_debug->data03);
|
|
+ writel(hi, &ehci_debug->data47);
|
|
+}
|
|
+
|
|
+static void dbgp_get_data(void *buf, int size)
|
|
+{
|
|
+ unsigned char *bytes = buf;
|
|
+ u32 lo, hi;
|
|
+ int i;
|
|
+
|
|
+ lo = readl(&ehci_debug->data03);
|
|
+ hi = readl(&ehci_debug->data47);
|
|
+ for (i = 0; i < 4 && i < size; i++)
|
|
+ bytes[i] = (lo >> (8*i)) & 0xff;
|
|
+ for (; i < 8 && i < size; i++)
|
|
+ bytes[i] = (hi >> (8*(i - 4))) & 0xff;
|
|
+}
|
|
+
|
|
+static int dbgp_bulk_write(unsigned devnum, unsigned endpoint,
|
|
+ const char *bytes, int size)
|
|
+{
|
|
+ u32 pids, addr, ctrl;
|
|
+ int ret;
|
|
+
|
|
+ if (size > DBGP_MAX_PACKET)
|
|
+ return -1;
|
|
+
|
|
+ addr = DBGP_EPADDR(devnum, endpoint);
|
|
+
|
|
+ pids = readl(&ehci_debug->pids);
|
|
+ pids = dbgp_pid_update(pids, USB_PID_OUT);
|
|
+
|
|
+ ctrl = readl(&ehci_debug->control);
|
|
+ ctrl = dbgp_len_update(ctrl, size);
|
|
+ ctrl |= DBGP_OUT;
|
|
+ ctrl |= DBGP_GO;
|
|
+
|
|
+ dbgp_set_data(bytes, size);
|
|
+ writel(addr, &ehci_debug->address);
|
|
+ writel(pids, &ehci_debug->pids);
|
|
+
|
|
+ ret = dbgp_wait_until_done(ctrl);
|
|
+ if (ret < 0)
|
|
+ return ret;
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static int dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data,
|
|
+ int size)
|
|
+{
|
|
+ u32 pids, addr, ctrl;
|
|
+ int ret;
|
|
+
|
|
+ if (size > DBGP_MAX_PACKET)
|
|
+ return -1;
|
|
+
|
|
+ addr = DBGP_EPADDR(devnum, endpoint);
|
|
+
|
|
+ pids = readl(&ehci_debug->pids);
|
|
+ pids = dbgp_pid_update(pids, USB_PID_IN);
|
|
+
|
|
+ ctrl = readl(&ehci_debug->control);
|
|
+ ctrl = dbgp_len_update(ctrl, size);
|
|
+ ctrl &= ~DBGP_OUT;
|
|
+ ctrl |= DBGP_GO;
|
|
+
|
|
+ writel(addr, &ehci_debug->address);
|
|
+ writel(pids, &ehci_debug->pids);
|
|
+ ret = dbgp_wait_until_done(ctrl);
|
|
+ if (ret < 0)
|
|
+ return ret;
|
|
+
|
|
+ if (size > ret)
|
|
+ size = ret;
|
|
+ dbgp_get_data(data, size);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static int dbgp_control_msg(unsigned devnum, int requesttype, int request,
|
|
+ int value, int index, void *data, int size)
|
|
+{
|
|
+ u32 pids, addr, ctrl;
|
|
+ struct usb_ctrlrequest req;
|
|
+ int read;
|
|
+ int ret;
|
|
+
|
|
+ read = (requesttype & USB_DIR_IN) != 0;
|
|
+ if (size > (read ? DBGP_MAX_PACKET:0))
|
|
+ return -1;
|
|
+
|
|
+ /* Compute the control message */
|
|
+ req.bRequestType = requesttype;
|
|
+ req.bRequest = request;
|
|
+ req.wValue = cpu_to_le16(value);
|
|
+ req.wIndex = cpu_to_le16(index);
|
|
+ req.wLength = cpu_to_le16(size);
|
|
+
|
|
+ pids = DBGP_PID_SET(USB_PID_DATA0, USB_PID_SETUP);
|
|
+ addr = DBGP_EPADDR(devnum, 0);
|
|
+
|
|
+ ctrl = readl(&ehci_debug->control);
|
|
+ ctrl = dbgp_len_update(ctrl, sizeof(req));
|
|
+ ctrl |= DBGP_OUT;
|
|
+ ctrl |= DBGP_GO;
|
|
+
|
|
+ /* Send the setup message */
|
|
+ dbgp_set_data(&req, sizeof(req));
|
|
+ writel(addr, &ehci_debug->address);
|
|
+ writel(pids, &ehci_debug->pids);
|
|
+ ret = dbgp_wait_until_done(ctrl);
|
|
+ if (ret < 0)
|
|
+ return ret;
|
|
+
|
|
+ /* Read the result */
|
|
+ return dbgp_bulk_read(devnum, 0, data, size);
|
|
+}
|
|
+
|
|
+
|
|
+/* Find a PCI capability */
|
|
+static u32 __init find_cap(u32 num, u32 slot, u32 func, int cap)
|
|
+{
|
|
+ u8 pos;
|
|
+ int bytes;
|
|
+
|
|
+ if (!(read_pci_config_16(num, slot, func, PCI_STATUS) &
|
|
+ PCI_STATUS_CAP_LIST))
|
|
+ return 0;
|
|
+
|
|
+ pos = read_pci_config_byte(num, slot, func, PCI_CAPABILITY_LIST);
|
|
+ for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) {
|
|
+ u8 id;
|
|
+
|
|
+ pos &= ~3;
|
|
+ id = read_pci_config_byte(num, slot, func, pos+PCI_CAP_LIST_ID);
|
|
+ if (id == 0xff)
|
|
+ break;
|
|
+ if (id == cap)
|
|
+ return pos;
|
|
+
|
|
+ pos = read_pci_config_byte(num, slot, func,
|
|
+ pos+PCI_CAP_LIST_NEXT);
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static u32 __init __find_dbgp(u32 bus, u32 slot, u32 func)
|
|
+{
|
|
+ u32 class;
|
|
+
|
|
+ class = read_pci_config(bus, slot, func, PCI_CLASS_REVISION);
|
|
+ if ((class >> 8) != PCI_CLASS_SERIAL_USB_EHCI)
|
|
+ return 0;
|
|
+
|
|
+ return find_cap(bus, slot, func, PCI_CAP_ID_EHCI_DEBUG);
|
|
+}
|
|
+
|
|
+static u32 __init find_dbgp(int ehci_num, u32 *rbus, u32 *rslot, u32 *rfunc)
|
|
+{
|
|
+ u32 bus, slot, func;
|
|
+
|
|
+ for (bus = 0; bus < 256; bus++) {
|
|
+ for (slot = 0; slot < 32; slot++) {
|
|
+ for (func = 0; func < 8; func++) {
|
|
+ unsigned cap;
|
|
+
|
|
+ cap = __find_dbgp(bus, slot, func);
|
|
+
|
|
+ if (!cap)
|
|
+ continue;
|
|
+ if (ehci_num-- != 0)
|
|
+ continue;
|
|
+ *rbus = bus;
|
|
+ *rslot = slot;
|
|
+ *rfunc = func;
|
|
+ return cap;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int ehci_reset_port(int port)
|
|
+{
|
|
+ u32 portsc;
|
|
+ u32 delay_time, delay;
|
|
+ int loop;
|
|
+
|
|
+ /* Reset the usb debug port */
|
|
+ portsc = readl(&ehci_regs->port_status[port - 1]);
|
|
+ portsc &= ~PORT_PE;
|
|
+ portsc |= PORT_RESET;
|
|
+ writel(portsc, &ehci_regs->port_status[port - 1]);
|
|
+
|
|
+ delay = HUB_ROOT_RESET_TIME;
|
|
+ for (delay_time = 0; delay_time < HUB_RESET_TIMEOUT;
|
|
+ delay_time += delay) {
|
|
+ dbgp_mdelay(delay);
|
|
+
|
|
+ portsc = readl(&ehci_regs->port_status[port - 1]);
|
|
+ if (portsc & PORT_RESET) {
|
|
+ /* force reset to complete */
|
|
+ loop = 2;
|
|
+ writel(portsc & ~(PORT_RWC_BITS | PORT_RESET),
|
|
+ &ehci_regs->port_status[port - 1]);
|
|
+ do {
|
|
+ portsc = readl(&ehci_regs->port_status[port-1]);
|
|
+ } while ((portsc & PORT_RESET) && (--loop > 0));
|
|
+ }
|
|
+
|
|
+ /* Device went away? */
|
|
+ if (!(portsc & PORT_CONNECT))
|
|
+ return -ENOTCONN;
|
|
+
|
|
+ /* bomb out completely if something weird happend */
|
|
+ if ((portsc & PORT_CSC))
|
|
+ return -EINVAL;
|
|
+
|
|
+ /* If we've finished resetting, then break out of the loop */
|
|
+ if (!(portsc & PORT_RESET) && (portsc & PORT_PE))
|
|
+ return 0;
|
|
+ }
|
|
+ return -EBUSY;
|
|
+}
|
|
+
|
|
+static int ehci_wait_for_port(int port)
|
|
+{
|
|
+ u32 status;
|
|
+ int ret, reps;
|
|
+
|
|
+ for (reps = 0; reps < 3; reps++) {
|
|
+ dbgp_mdelay(100);
|
|
+ status = readl(&ehci_regs->status);
|
|
+ if (status & STS_PCD) {
|
|
+ ret = ehci_reset_port(port);
|
|
+ if (ret == 0)
|
|
+ return 0;
|
|
+ }
|
|
+ }
|
|
+ return -ENOTCONN;
|
|
+}
|
|
+
|
|
+#ifdef DBGP_DEBUG
|
|
+# define dbgp_printk early_printk
|
|
+#else
|
|
+static inline void dbgp_printk(const char *fmt, ...) { }
|
|
+#endif
|
|
+
|
|
+typedef void (*set_debug_port_t)(int port);
|
|
+
|
|
+static void default_set_debug_port(int port)
|
|
+{
|
|
+}
|
|
+
|
|
+static set_debug_port_t set_debug_port = default_set_debug_port;
|
|
+
|
|
+static void nvidia_set_debug_port(int port)
|
|
+{
|
|
+ u32 dword;
|
|
+ dword = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func,
|
|
+ 0x74);
|
|
+ dword &= ~(0x0f<<12);
|
|
+ dword |= ((port & 0x0f)<<12);
|
|
+ write_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func, 0x74,
|
|
+ dword);
|
|
+ dbgp_printk("set debug port to %d\n", port);
|
|
+}
|
|
+
|
|
+static void __init detect_set_debug_port(void)
|
|
+{
|
|
+ u32 vendorid;
|
|
+
|
|
+ vendorid = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func,
|
|
+ 0x00);
|
|
+
|
|
+ if ((vendorid & 0xffff) == 0x10de) {
|
|
+ dbgp_printk("using nvidia set_debug_port\n");
|
|
+ set_debug_port = nvidia_set_debug_port;
|
|
+ }
|
|
+}
|
|
+
|
|
+static int __init ehci_setup(void)
|
|
+{
|
|
+ struct usb_debug_descriptor dbgp_desc;
|
|
+ u32 cmd, ctrl, status, portsc, hcs_params;
|
|
+ u32 debug_port, new_debug_port = 0, n_ports;
|
|
+ u32 devnum;
|
|
+ int ret, i;
|
|
+ int loop;
|
|
+ int port_map_tried;
|
|
+ int playtimes = 3;
|
|
+
|
|
+try_next_time:
|
|
+ port_map_tried = 0;
|
|
+
|
|
+try_next_port:
|
|
+
|
|
+ hcs_params = readl(&ehci_caps->hcs_params);
|
|
+ debug_port = HCS_DEBUG_PORT(hcs_params);
|
|
+ n_ports = HCS_N_PORTS(hcs_params);
|
|
+
|
|
+ dbgp_printk("debug_port: %d\n", debug_port);
|
|
+ dbgp_printk("n_ports: %d\n", n_ports);
|
|
+
|
|
+ for (i = 1; i <= n_ports; i++) {
|
|
+ portsc = readl(&ehci_regs->port_status[i-1]);
|
|
+ dbgp_printk("portstatus%d: %08x\n", i, portsc);
|
|
+ }
|
|
+
|
|
+ if (port_map_tried && (new_debug_port != debug_port)) {
|
|
+ if (--playtimes) {
|
|
+ set_debug_port(new_debug_port);
|
|
+ goto try_next_time;
|
|
+ }
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ loop = 10;
|
|
+ /* Reset the EHCI controller */
|
|
+ cmd = readl(&ehci_regs->command);
|
|
+ cmd |= CMD_RESET;
|
|
+ writel(cmd, &ehci_regs->command);
|
|
+ do {
|
|
+ cmd = readl(&ehci_regs->command);
|
|
+ } while ((cmd & CMD_RESET) && (--loop > 0));
|
|
+
|
|
+ if (!loop) {
|
|
+ dbgp_printk("can not reset ehci\n");
|
|
+ return -1;
|
|
+ }
|
|
+ dbgp_printk("ehci reset done\n");
|
|
+
|
|
+ /* Claim ownership, but do not enable yet */
|
|
+ ctrl = readl(&ehci_debug->control);
|
|
+ ctrl |= DBGP_OWNER;
|
|
+ ctrl &= ~(DBGP_ENABLED | DBGP_INUSE);
|
|
+ writel(ctrl, &ehci_debug->control);
|
|
+
|
|
+ /* Start the ehci running */
|
|
+ cmd = readl(&ehci_regs->command);
|
|
+ cmd &= ~(CMD_LRESET | CMD_IAAD | CMD_PSE | CMD_ASE | CMD_RESET);
|
|
+ cmd |= CMD_RUN;
|
|
+ writel(cmd, &ehci_regs->command);
|
|
+
|
|
+ /* Ensure everything is routed to the EHCI */
|
|
+ writel(FLAG_CF, &ehci_regs->configured_flag);
|
|
+
|
|
+ /* Wait until the controller is no longer halted */
|
|
+ loop = 10;
|
|
+ do {
|
|
+ status = readl(&ehci_regs->status);
|
|
+ } while ((status & STS_HALT) && (--loop > 0));
|
|
+
|
|
+ if (!loop) {
|
|
+ dbgp_printk("ehci can be started\n");
|
|
+ return -1;
|
|
+ }
|
|
+ dbgp_printk("ehci started\n");
|
|
+
|
|
+ /* Wait for a device to show up in the debug port */
|
|
+ ret = ehci_wait_for_port(debug_port);
|
|
+ if (ret < 0) {
|
|
+ dbgp_printk("No device found in debug port\n");
|
|
+ goto next_debug_port;
|
|
+ }
|
|
+ dbgp_printk("ehci wait for port done\n");
|
|
+
|
|
+ /* Enable the debug port */
|
|
+ ctrl = readl(&ehci_debug->control);
|
|
+ ctrl |= DBGP_CLAIM;
|
|
+ writel(ctrl, &ehci_debug->control);
|
|
+ ctrl = readl(&ehci_debug->control);
|
|
+ if ((ctrl & DBGP_CLAIM) != DBGP_CLAIM) {
|
|
+ dbgp_printk("No device in debug port\n");
|
|
+ writel(ctrl & ~DBGP_CLAIM, &ehci_debug->control);
|
|
+ goto err;
|
|
+ }
|
|
+ dbgp_printk("debug ported enabled\n");
|
|
+
|
|
+ /* Completely transfer the debug device to the debug controller */
|
|
+ portsc = readl(&ehci_regs->port_status[debug_port - 1]);
|
|
+ portsc &= ~PORT_PE;
|
|
+ writel(portsc, &ehci_regs->port_status[debug_port - 1]);
|
|
+
|
|
+ dbgp_mdelay(100);
|
|
+
|
|
+ /* Find the debug device and make it device number 127 */
|
|
+ for (devnum = 0; devnum <= 127; devnum++) {
|
|
+ ret = dbgp_control_msg(devnum,
|
|
+ USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
|
|
+ USB_REQ_GET_DESCRIPTOR, (USB_DT_DEBUG << 8), 0,
|
|
+ &dbgp_desc, sizeof(dbgp_desc));
|
|
+ if (ret > 0)
|
|
+ break;
|
|
+ }
|
|
+ if (devnum > 127) {
|
|
+ dbgp_printk("Could not find attached debug device\n");
|
|
+ goto err;
|
|
+ }
|
|
+ if (ret < 0) {
|
|
+ dbgp_printk("Attached device is not a debug device\n");
|
|
+ goto err;
|
|
+ }
|
|
+ dbgp_endpoint_out = dbgp_desc.bDebugOutEndpoint;
|
|
+
|
|
+ /* Move the device to 127 if it isn't already there */
|
|
+ if (devnum != USB_DEBUG_DEVNUM) {
|
|
+ ret = dbgp_control_msg(devnum,
|
|
+ USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
|
|
+ USB_REQ_SET_ADDRESS, USB_DEBUG_DEVNUM, 0, NULL, 0);
|
|
+ if (ret < 0) {
|
|
+ dbgp_printk("Could not move attached device to %d\n",
|
|
+ USB_DEBUG_DEVNUM);
|
|
+ goto err;
|
|
+ }
|
|
+ devnum = USB_DEBUG_DEVNUM;
|
|
+ dbgp_printk("debug device renamed to 127\n");
|
|
+ }
|
|
+
|
|
+ /* Enable the debug interface */
|
|
+ ret = dbgp_control_msg(USB_DEBUG_DEVNUM,
|
|
+ USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
|
|
+ USB_REQ_SET_FEATURE, USB_DEVICE_DEBUG_MODE, 0, NULL, 0);
|
|
+ if (ret < 0) {
|
|
+ dbgp_printk(" Could not enable the debug device\n");
|
|
+ goto err;
|
|
+ }
|
|
+ dbgp_printk("debug interface enabled\n");
|
|
+
|
|
+ /* Perform a small write to get the even/odd data state in sync
|
|
+ */
|
|
+ ret = dbgp_bulk_write(USB_DEBUG_DEVNUM, dbgp_endpoint_out, " ", 1);
|
|
+ if (ret < 0) {
|
|
+ dbgp_printk("dbgp_bulk_write failed: %d\n", ret);
|
|
+ goto err;
|
|
+ }
|
|
+ dbgp_printk("small write doned\n");
|
|
+
|
|
+ return 0;
|
|
+err:
|
|
+ /* Things didn't work so remove my claim */
|
|
+ ctrl = readl(&ehci_debug->control);
|
|
+ ctrl &= ~(DBGP_CLAIM | DBGP_OUT);
|
|
+ writel(ctrl, &ehci_debug->control);
|
|
+ return -1;
|
|
+
|
|
+next_debug_port:
|
|
+ port_map_tried |= (1<<(debug_port - 1));
|
|
+ new_debug_port = ((debug_port-1+1)%n_ports) + 1;
|
|
+ if (port_map_tried != ((1<<n_ports) - 1)) {
|
|
+ set_debug_port(new_debug_port);
|
|
+ goto try_next_port;
|
|
+ }
|
|
+ if (--playtimes) {
|
|
+ set_debug_port(new_debug_port);
|
|
+ goto try_next_time;
|
|
+ }
|
|
+
|
|
+ return -1;
|
|
+}
|
|
+
|
|
+static int __init early_dbgp_init(char *s)
|
|
+{
|
|
+ u32 debug_port, bar, offset;
|
|
+ u32 bus, slot, func, cap;
|
|
+ void __iomem *ehci_bar;
|
|
+ u32 dbgp_num;
|
|
+ u32 bar_val;
|
|
+ char *e;
|
|
+ int ret;
|
|
+ u8 byte;
|
|
+
|
|
+ if (!early_pci_allowed())
|
|
+ return -1;
|
|
+
|
|
+ dbgp_num = 0;
|
|
+ if (*s)
|
|
+ dbgp_num = simple_strtoul(s, &e, 10);
|
|
+ dbgp_printk("dbgp_num: %d\n", dbgp_num);
|
|
+
|
|
+ cap = find_dbgp(dbgp_num, &bus, &slot, &func);
|
|
+ if (!cap)
|
|
+ return -1;
|
|
+
|
|
+ dbgp_printk("Found EHCI debug port on %02x:%02x.%1x\n", bus, slot,
|
|
+ func);
|
|
+
|
|
+ debug_port = read_pci_config(bus, slot, func, cap);
|
|
+ bar = (debug_port >> 29) & 0x7;
|
|
+ bar = (bar * 4) + 0xc;
|
|
+ offset = (debug_port >> 16) & 0xfff;
|
|
+ dbgp_printk("bar: %02x offset: %03x\n", bar, offset);
|
|
+ if (bar != PCI_BASE_ADDRESS_0) {
|
|
+ dbgp_printk("only debug ports on bar 1 handled.\n");
|
|
+
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ bar_val = read_pci_config(bus, slot, func, PCI_BASE_ADDRESS_0);
|
|
+ dbgp_printk("bar_val: %02x offset: %03x\n", bar_val, offset);
|
|
+ if (bar_val & ~PCI_BASE_ADDRESS_MEM_MASK) {
|
|
+ dbgp_printk("only simple 32bit mmio bars supported\n");
|
|
+
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ /* double check if the mem space is enabled */
|
|
+ byte = read_pci_config_byte(bus, slot, func, 0x04);
|
|
+ if (!(byte & 0x2)) {
|
|
+ byte |= 0x02;
|
|
+ write_pci_config_byte(bus, slot, func, 0x04, byte);
|
|
+ dbgp_printk("mmio for ehci enabled\n");
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * FIXME I don't have the bar size so just guess PAGE_SIZE is more
|
|
+ * than enough. 1K is the biggest I have seen.
|
|
+ */
|
|
+ set_fixmap_nocache(FIX_DBGP_BASE, bar_val & PAGE_MASK);
|
|
+ ehci_bar = (void __iomem *)__fix_to_virt(FIX_DBGP_BASE);
|
|
+ ehci_bar += bar_val & ~PAGE_MASK;
|
|
+ dbgp_printk("ehci_bar: %p\n", ehci_bar);
|
|
+
|
|
+ ehci_caps = ehci_bar;
|
|
+ ehci_regs = ehci_bar + HC_LENGTH(readl(&ehci_caps->hc_capbase));
|
|
+ ehci_debug = ehci_bar + offset;
|
|
+ ehci_dev.bus = bus;
|
|
+ ehci_dev.slot = slot;
|
|
+ ehci_dev.func = func;
|
|
+
|
|
+ detect_set_debug_port();
|
|
+
|
|
+ ret = ehci_setup();
|
|
+ if (ret < 0) {
|
|
+ dbgp_printk("ehci_setup failed\n");
|
|
+ ehci_debug = NULL;
|
|
+
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void early_dbgp_write(struct console *con, const char *str, u32 n)
|
|
+{
|
|
+ int chunk, ret;
|
|
+
|
|
+ if (!ehci_debug)
|
|
+ return;
|
|
+ while (n > 0) {
|
|
+ chunk = n;
|
|
+ if (chunk > DBGP_MAX_PACKET)
|
|
+ chunk = DBGP_MAX_PACKET;
|
|
+ ret = dbgp_bulk_write(USB_DEBUG_DEVNUM,
|
|
+ dbgp_endpoint_out, str, chunk);
|
|
+ str += chunk;
|
|
+ n -= chunk;
|
|
+ }
|
|
+}
|
|
+
|
|
+static struct console early_dbgp_console = {
|
|
+ .name = "earlydbg",
|
|
+ .write = early_dbgp_write,
|
|
+ .flags = CON_PRINTBUFFER,
|
|
+ .index = -1,
|
|
+};
|
|
+#endif
|
|
+
|
|
/* Console interface to a host file on AMD's SimNow! */
|
|
|
|
static int simnow_fd;
|
|
@@ -194,6 +918,7 @@ enum {
|
|
static noinline long simnow(long cmd, long a, long b, long c)
|
|
{
|
|
long ret;
|
|
+
|
|
asm volatile("cpuid" :
|
|
"=a" (ret) :
|
|
"b" (a), "c" (b), "d" (c), "0" (MAGIC1), "D" (cmd + MAGIC2));
|
|
@@ -203,6 +928,7 @@ static noinline long simnow(long cmd, lo
|
|
static void __init simnow_init(char *str)
|
|
{
|
|
char *fn = "klog";
|
|
+
|
|
if (*str == '=')
|
|
fn = ++str;
|
|
/* error ignored */
|
|
@@ -223,7 +949,7 @@ static struct console simnow_console = {
|
|
|
|
/* Direct interface for emergencies */
|
|
static struct console *early_console = &early_vga_console;
|
|
-static int early_console_initialized;
|
|
+static int __initdata early_console_initialized;
|
|
|
|
asmlinkage void early_printk(const char *fmt, ...)
|
|
{
|
|
@@ -237,10 +963,11 @@ asmlinkage void early_printk(const char
|
|
va_end(ap);
|
|
}
|
|
|
|
-static int __initdata keep_early;
|
|
|
|
static int __init setup_early_printk(char *buf)
|
|
{
|
|
+ int keep_early;
|
|
+
|
|
if (!buf)
|
|
return 0;
|
|
|
|
@@ -248,8 +975,7 @@ static int __init setup_early_printk(cha
|
|
return 0;
|
|
early_console_initialized = 1;
|
|
|
|
- if (strstr(buf, "keep"))
|
|
- keep_early = 1;
|
|
+ keep_early = (strstr(buf, "keep") != NULL);
|
|
|
|
if (!strncmp(buf, "serial", 6)) {
|
|
early_serial_init(buf + 6);
|
|
@@ -269,6 +995,17 @@ static int __init setup_early_printk(cha
|
|
simnow_init(buf + 6);
|
|
early_console = &simnow_console;
|
|
keep_early = 1;
|
|
+#ifdef CONFIG_EARLY_PRINTK_DBGP
|
|
+ } else if (!strncmp(buf, "dbgp", 4)) {
|
|
+ if (early_dbgp_init(buf+4) < 0)
|
|
+ return 0;
|
|
+ early_console = &early_dbgp_console;
|
|
+ /*
|
|
+ * usb subsys will reset ehci controller, so don't keep
|
|
+ * that early console
|
|
+ */
|
|
+ keep_early = 0;
|
|
+#endif
|
|
#ifdef CONFIG_XEN
|
|
} else if (!strncmp(buf, "xen", 3)) {
|
|
early_console = &xenboot_console;
|
|
@@ -282,4 +1019,5 @@ static int __init setup_early_printk(cha
|
|
register_console(early_console);
|
|
return 0;
|
|
}
|
|
+
|
|
early_param("earlyprintk", setup_early_printk);
|
|
--- head-2011-03-17.orig/arch/x86/kernel/entry_32-xen.S 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/entry_32-xen.S 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -700,7 +700,7 @@ ENTRY(interrupt)
|
|
ENTRY(irq_entries_start)
|
|
RING0_INT_FRAME
|
|
vector=0
|
|
-.rept NR_IRQS
|
|
+.rept NR_VECTORS
|
|
ALIGN
|
|
.if vector
|
|
CFI_ADJUST_CFA_OFFSET -4
|
|
@@ -805,6 +805,7 @@ error_code:
|
|
movl $(__USER_DS), %ecx
|
|
movl %ecx, %ds
|
|
movl %ecx, %es
|
|
+ TRACE_IRQS_OFF
|
|
movl %esp,%eax # pt_regs pointer
|
|
call *%edi
|
|
jmp ret_from_exception
|
|
@@ -974,22 +975,9 @@ ENTRY(device_not_available)
|
|
RING0_INT_FRAME
|
|
pushl $-1 # mark this as an int
|
|
CFI_ADJUST_CFA_OFFSET 4
|
|
- SAVE_ALL
|
|
-#ifndef CONFIG_XEN
|
|
- GET_CR0_INTO_EAX
|
|
- testl $0x4, %eax # EM (math emulation bit)
|
|
- je device_available_emulate
|
|
- pushl $0 # temporary storage for ORIG_EIP
|
|
+ pushl $do_device_not_available
|
|
CFI_ADJUST_CFA_OFFSET 4
|
|
- call math_emulate
|
|
- addl $4, %esp
|
|
- CFI_ADJUST_CFA_OFFSET -4
|
|
- jmp ret_from_exception
|
|
-device_available_emulate:
|
|
-#endif
|
|
- preempt_stop(CLBR_ANY)
|
|
- call math_state_restore
|
|
- jmp ret_from_exception
|
|
+ jmp error_code
|
|
CFI_ENDPROC
|
|
END(device_not_available)
|
|
|
|
@@ -1034,6 +1022,7 @@ debug_stack_correct:
|
|
pushl $-1 # mark this as an int
|
|
CFI_ADJUST_CFA_OFFSET 4
|
|
SAVE_ALL
|
|
+ TRACE_IRQS_OFF
|
|
xorl %edx,%edx # error code 0
|
|
movl %esp,%eax # pt_regs pointer
|
|
call do_debug
|
|
@@ -1079,6 +1068,7 @@ nmi_stack_correct:
|
|
pushl %eax
|
|
CFI_ADJUST_CFA_OFFSET 4
|
|
SAVE_ALL
|
|
+ TRACE_IRQS_OFF
|
|
xorl %edx,%edx # zero error code
|
|
movl %esp,%eax # pt_regs pointer
|
|
call do_nmi
|
|
@@ -1119,6 +1109,7 @@ nmi_espfix_stack:
|
|
pushl %eax
|
|
CFI_ADJUST_CFA_OFFSET 4
|
|
SAVE_ALL
|
|
+ TRACE_IRQS_OFF
|
|
FIXUP_ESPFIX_STACK # %eax == %esp
|
|
xorl %edx,%edx # zero error code
|
|
call do_nmi
|
|
@@ -1162,6 +1153,7 @@ KPROBE_ENTRY(int3)
|
|
pushl $-1 # mark this as an int
|
|
CFI_ADJUST_CFA_OFFSET 4
|
|
SAVE_ALL
|
|
+ TRACE_IRQS_OFF
|
|
xorl %edx,%edx # zero error code
|
|
movl %esp,%eax # pt_regs pointer
|
|
call do_int3
|
|
@@ -1303,24 +1295,10 @@ ENTRY(kernel_thread_helper)
|
|
CFI_ENDPROC
|
|
ENDPROC(kernel_thread_helper)
|
|
|
|
-#ifdef CONFIG_FTRACE
|
|
+#ifdef CONFIG_FUNCTION_TRACER
|
|
#ifdef CONFIG_DYNAMIC_FTRACE
|
|
|
|
ENTRY(mcount)
|
|
- pushl %eax
|
|
- pushl %ecx
|
|
- pushl %edx
|
|
- movl 0xc(%esp), %eax
|
|
- subl $MCOUNT_INSN_SIZE, %eax
|
|
-
|
|
-.globl mcount_call
|
|
-mcount_call:
|
|
- call ftrace_stub
|
|
-
|
|
- popl %edx
|
|
- popl %ecx
|
|
- popl %eax
|
|
-
|
|
ret
|
|
END(mcount)
|
|
|
|
@@ -1372,7 +1350,7 @@ trace:
|
|
jmp ftrace_stub
|
|
END(mcount)
|
|
#endif /* CONFIG_DYNAMIC_FTRACE */
|
|
-#endif /* CONFIG_FTRACE */
|
|
+#endif /* CONFIG_FUNCTION_TRACER */
|
|
|
|
#include <asm/alternative-asm.h>
|
|
|
|
--- head-2011-03-17.orig/arch/x86/kernel/entry_64-xen.S 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/entry_64-xen.S 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -66,35 +66,9 @@
|
|
|
|
.code64
|
|
|
|
-#ifdef CONFIG_FTRACE
|
|
+#ifdef CONFIG_FUNCTION_TRACER
|
|
#ifdef CONFIG_DYNAMIC_FTRACE
|
|
ENTRY(mcount)
|
|
-
|
|
- subq $0x38, %rsp
|
|
- movq %rax, (%rsp)
|
|
- movq %rcx, 8(%rsp)
|
|
- movq %rdx, 16(%rsp)
|
|
- movq %rsi, 24(%rsp)
|
|
- movq %rdi, 32(%rsp)
|
|
- movq %r8, 40(%rsp)
|
|
- movq %r9, 48(%rsp)
|
|
-
|
|
- movq 0x38(%rsp), %rdi
|
|
- subq $MCOUNT_INSN_SIZE, %rdi
|
|
-
|
|
-.globl mcount_call
|
|
-mcount_call:
|
|
- call ftrace_stub
|
|
-
|
|
- movq 48(%rsp), %r9
|
|
- movq 40(%rsp), %r8
|
|
- movq 32(%rsp), %rdi
|
|
- movq 24(%rsp), %rsi
|
|
- movq 16(%rsp), %rdx
|
|
- movq 8(%rsp), %rcx
|
|
- movq (%rsp), %rax
|
|
- addq $0x38, %rsp
|
|
-
|
|
retq
|
|
END(mcount)
|
|
|
|
@@ -169,7 +143,7 @@ trace:
|
|
jmp ftrace_stub
|
|
END(mcount)
|
|
#endif /* CONFIG_DYNAMIC_FTRACE */
|
|
-#endif /* CONFIG_FTRACE */
|
|
+#endif /* CONFIG_FUNCTION_TRACER */
|
|
|
|
#ifndef CONFIG_PREEMPT
|
|
#define retint_kernel retint_restore_args
|
|
@@ -297,9 +271,9 @@ NMI_MASK = 0x80000000
|
|
ENTRY(ret_from_fork)
|
|
CFI_DEFAULT_STACK
|
|
push kernel_eflags(%rip)
|
|
- CFI_ADJUST_CFA_OFFSET 4
|
|
+ CFI_ADJUST_CFA_OFFSET 8
|
|
popf # reset kernel eflags
|
|
- CFI_ADJUST_CFA_OFFSET -4
|
|
+ CFI_ADJUST_CFA_OFFSET -8
|
|
call schedule_tail
|
|
GET_THREAD_INFO(%rcx)
|
|
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
|
|
@@ -863,6 +837,9 @@ END(spurious_interrupt)
|
|
.if \ist
|
|
movq %gs:pda_data_offset, %rbp
|
|
.endif
|
|
+ .if \irqtrace
|
|
+ TRACE_IRQS_OFF
|
|
+ .endif
|
|
movq %rsp,%rdi
|
|
movq ORIG_RAX(%rsp),%rsi
|
|
movq $-1,ORIG_RAX(%rsp)
|
|
@@ -1271,7 +1248,7 @@ ENTRY(simd_coprocessor_error)
|
|
END(simd_coprocessor_error)
|
|
|
|
ENTRY(device_not_available)
|
|
- zeroentry math_state_restore
|
|
+ zeroentry do_device_not_available
|
|
END(device_not_available)
|
|
|
|
/* runs on exception stack */
|
|
@@ -1370,9 +1347,11 @@ ENTRY(divide_error)
|
|
zeroentry do_divide_error
|
|
END(divide_error)
|
|
|
|
+#ifndef CONFIG_XEN
|
|
ENTRY(spurious_interrupt_bug)
|
|
zeroentry do_spurious_interrupt_bug
|
|
END(spurious_interrupt_bug)
|
|
+#endif
|
|
|
|
#ifdef CONFIG_X86_MCE
|
|
/* runs on exception stack */
|
|
--- head-2011-03-17.orig/arch/x86/kernel/fixup.c 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/fixup.c 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -37,7 +37,7 @@
|
|
|
|
#define DP(_f, _args...) pr_alert(" " _f "\n" , ## _args )
|
|
|
|
-void do_fixup_4gb_segment(struct pt_regs *regs, long error_code)
|
|
+dotraplinkage void do_fixup_4gb_segment(struct pt_regs *regs, long error_code)
|
|
{
|
|
static unsigned long printed = 0;
|
|
char info[100];
|
|
--- head-2011-03-17.orig/arch/x86/kernel/head-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/head-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -36,6 +36,7 @@ void __init reserve_ebda_region(void)
|
|
|
|
/* start of EBDA area */
|
|
ebda_addr = get_bios_ebda();
|
|
+ printk(KERN_INFO "BIOS EBDA/lowmem at: %08x/%08x\n", ebda_addr, lowmem);
|
|
|
|
/* Fixup: bios puts an EBDA in the top 64K segment */
|
|
/* of conventional memory, but does not adjust lowmem. */
|
|
--- head-2011-03-17.orig/arch/x86/kernel/head64-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/head64-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -151,12 +151,11 @@ void __init x86_64_start_kernel(char * r
|
|
load_idt((const struct desc_ptr *)&idt_descr);
|
|
#endif
|
|
|
|
- early_printk("Kernel alive\n");
|
|
+ if (console_loglevel == 10)
|
|
+ early_printk("Kernel alive\n");
|
|
|
|
x86_64_init_pda();
|
|
|
|
- early_printk("Kernel really alive\n");
|
|
-
|
|
x86_64_start_reservations(real_mode_data);
|
|
}
|
|
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ head-2011-03-17/arch/x86/kernel/apic/io_apic-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -0,0 +1,3937 @@
|
|
+/*
|
|
+ * Intel IO-APIC support for multi-Pentium hosts.
|
|
+ *
|
|
+ * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo
|
|
+ *
|
|
+ * Many thanks to Stig Venaas for trying out countless experimental
|
|
+ * patches and reporting/debugging problems patiently!
|
|
+ *
|
|
+ * (c) 1999, Multiple IO-APIC support, developed by
|
|
+ * Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and
|
|
+ * Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>,
|
|
+ * further tested and cleaned up by Zach Brown <zab@redhat.com>
|
|
+ * and Ingo Molnar <mingo@redhat.com>
|
|
+ *
|
|
+ * Fixes
|
|
+ * Maciej W. Rozycki : Bits for genuine 82489DX APICs;
|
|
+ * thanks to Eric Gilmore
|
|
+ * and Rolf G. Tews
|
|
+ * for testing these extensively
|
|
+ * Paul Diefenbaugh : Added full ACPI support
|
|
+ */
|
|
+
|
|
+#include <linux/mm.h>
|
|
+#include <linux/interrupt.h>
|
|
+#include <linux/init.h>
|
|
+#include <linux/delay.h>
|
|
+#include <linux/sched.h>
|
|
+#include <linux/pci.h>
|
|
+#include <linux/mc146818rtc.h>
|
|
+#include <linux/compiler.h>
|
|
+#include <linux/acpi.h>
|
|
+#include <linux/module.h>
|
|
+#include <linux/sysdev.h>
|
|
+#include <linux/freezer.h>
|
|
+#include <linux/kthread.h>
|
|
+#include <linux/jiffies.h> /* time_after() */
|
|
+#ifdef CONFIG_ACPI
|
|
+#include <acpi/acpi_bus.h>
|
|
+#endif
|
|
+#include <linux/bootmem.h>
|
|
+
|
|
+#include <asm/idle.h>
|
|
+#include <asm/io.h>
|
|
+#include <asm/smp.h>
|
|
+#include <asm/desc.h>
|
|
+#include <asm/proto.h>
|
|
+#include <asm/acpi.h>
|
|
+#include <asm/dma.h>
|
|
+#include <asm/timer.h>
|
|
+#include <asm/i8259.h>
|
|
+#include <asm/nmi.h>
|
|
+#include <asm/setup.h>
|
|
+
|
|
+#include <mach_ipi.h>
|
|
+#include <mach_apic.h>
|
|
+#include <mach_apicdef.h>
|
|
+
|
|
+#ifdef CONFIG_XEN
|
|
+#include <xen/interface/xen.h>
|
|
+#include <xen/interface/physdev.h>
|
|
+#include <xen/evtchn.h>
|
|
+
|
|
+/* Fake i8259 */
|
|
+#define make_8259A_irq(_irq) (io_apic_irqs &= ~(1UL<<(_irq)))
|
|
+#define disable_8259A_irq(_irq) ((void)0)
|
|
+#define i8259A_irq_pending(_irq) (0)
|
|
+
|
|
+unsigned long io_apic_irqs;
|
|
+#endif /* CONFIG_XEN */
|
|
+
|
|
+#define __apicdebuginit(type) static type __init
|
|
+
|
|
+/*
|
|
+ * Is the SiS APIC rmw bug present ?
|
|
+ * -1 = don't know, 0 = no, 1 = yes
|
|
+ */
|
|
+int sis_apic_bug = -1;
|
|
+
|
|
+static DEFINE_SPINLOCK(ioapic_lock);
|
|
+#ifndef CONFIG_XEN
|
|
+static DEFINE_SPINLOCK(vector_lock);
|
|
+#endif
|
|
+
|
|
+/*
|
|
+ * # of IRQ routing registers
|
|
+ */
|
|
+int nr_ioapic_registers[MAX_IO_APICS];
|
|
+
|
|
+/* I/O APIC entries */
|
|
+struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
|
|
+int nr_ioapics;
|
|
+
|
|
+/* MP IRQ source entries */
|
|
+struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
|
|
+
|
|
+/* # of MP IRQ source entries */
|
|
+int mp_irq_entries;
|
|
+
|
|
+#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
|
|
+int mp_bus_id_to_type[MAX_MP_BUSSES];
|
|
+#endif
|
|
+
|
|
+DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
|
|
+
|
|
+int skip_ioapic_setup;
|
|
+
|
|
+static int __init parse_noapic(char *str)
|
|
+{
|
|
+ /* disable IO-APIC */
|
|
+ disable_ioapic_setup();
|
|
+ return 0;
|
|
+}
|
|
+early_param("noapic", parse_noapic);
|
|
+
|
|
+struct irq_pin_list;
|
|
+struct irq_cfg {
|
|
+#ifndef CONFIG_XEN
|
|
+ unsigned int irq;
|
|
+ struct irq_pin_list *irq_2_pin;
|
|
+ cpumask_t domain;
|
|
+ cpumask_t old_domain;
|
|
+ unsigned move_cleanup_count;
|
|
+#endif
|
|
+ u8 vector;
|
|
+#ifndef CONFIG_XEN
|
|
+ u8 move_in_progress : 1;
|
|
+#endif
|
|
+};
|
|
+
|
|
+/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
|
|
+static struct irq_cfg irq_cfgx[NR_IRQS] = {
|
|
+ [0] = { .irq = 0 },
|
|
+ [1] = { .irq = 1 },
|
|
+ [2] = { .irq = 2 },
|
|
+ [3] = { .irq = 3 },
|
|
+ [4] = { .irq = 4 },
|
|
+ [5] = { .irq = 5 },
|
|
+ [6] = { .irq = 6 },
|
|
+ [7] = { .irq = 7 },
|
|
+ [8] = { .irq = 8 },
|
|
+ [9] = { .irq = 9 },
|
|
+ [10] = { .irq = 10 },
|
|
+ [11] = { .irq = 11 },
|
|
+ [12] = { .irq = 12 },
|
|
+ [13] = { .irq = 13 },
|
|
+ [14] = { .irq = 14 },
|
|
+ [15] = { .irq = 15 },
|
|
+};
|
|
+
|
|
+#define for_each_irq_cfg(irq, cfg) \
|
|
+ for (irq = 0, cfg = irq_cfgx; irq < nr_irqs; irq++, cfg++)
|
|
+
|
|
+static struct irq_cfg *irq_cfg(unsigned int irq)
|
|
+{
|
|
+ return irq < nr_irqs ? irq_cfgx + irq : NULL;
|
|
+}
|
|
+
|
|
+static struct irq_cfg *irq_cfg_alloc(unsigned int irq)
|
|
+{
|
|
+ return irq_cfg(irq);
|
|
+}
|
|
+
|
|
+#ifdef CONFIG_XEN
|
|
+#define irq_2_pin_init()
|
|
+#define add_pin_to_irq(irq, apic, pin)
|
|
+#else
|
|
+/*
|
|
+ * Rough estimation of how many shared IRQs there are, can be changed
|
|
+ * anytime.
|
|
+ */
|
|
+#define MAX_PLUS_SHARED_IRQS NR_IRQS
|
|
+#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
|
|
+
|
|
+/*
|
|
+ * This is performance-critical, we want to do it O(1)
|
|
+ *
|
|
+ * the indexing order of this array favors 1:1 mappings
|
|
+ * between pins and IRQs.
|
|
+ */
|
|
+
|
|
+struct irq_pin_list {
|
|
+ int apic, pin;
|
|
+ struct irq_pin_list *next;
|
|
+};
|
|
+
|
|
+static struct irq_pin_list irq_2_pin_head[PIN_MAP_SIZE];
|
|
+static struct irq_pin_list *irq_2_pin_ptr;
|
|
+
|
|
+static void __init irq_2_pin_init(void)
|
|
+{
|
|
+ struct irq_pin_list *pin = irq_2_pin_head;
|
|
+ int i;
|
|
+
|
|
+ for (i = 1; i < PIN_MAP_SIZE; i++)
|
|
+ pin[i-1].next = &pin[i];
|
|
+
|
|
+ irq_2_pin_ptr = &pin[0];
|
|
+}
|
|
+
|
|
+static struct irq_pin_list *get_one_free_irq_2_pin(void)
|
|
+{
|
|
+ struct irq_pin_list *pin = irq_2_pin_ptr;
|
|
+
|
|
+ if (!pin)
|
|
+ panic("can not get more irq_2_pin\n");
|
|
+
|
|
+ irq_2_pin_ptr = pin->next;
|
|
+ pin->next = NULL;
|
|
+ return pin;
|
|
+}
|
|
+
|
|
+struct io_apic {
|
|
+ unsigned int index;
|
|
+ unsigned int unused[3];
|
|
+ unsigned int data;
|
|
+};
|
|
+
|
|
+static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
|
|
+{
|
|
+ return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
|
|
+ + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK);
|
|
+}
|
|
+#endif
|
|
+
|
|
+static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
|
|
+{
|
|
+#ifndef CONFIG_XEN
|
|
+ struct io_apic __iomem *io_apic = io_apic_base(apic);
|
|
+ writel(reg, &io_apic->index);
|
|
+ return readl(&io_apic->data);
|
|
+#else
|
|
+ struct physdev_apic apic_op;
|
|
+ int ret;
|
|
+
|
|
+ apic_op.apic_physbase = mp_ioapics[apic].mp_apicaddr;
|
|
+ apic_op.reg = reg;
|
|
+ ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+ return apic_op.value;
|
|
+#endif
|
|
+}
|
|
+
|
|
+static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
|
|
+{
|
|
+#ifndef CONFIG_XEN
|
|
+ struct io_apic __iomem *io_apic = io_apic_base(apic);
|
|
+ writel(reg, &io_apic->index);
|
|
+ writel(value, &io_apic->data);
|
|
+#else
|
|
+ struct physdev_apic apic_op;
|
|
+
|
|
+ apic_op.apic_physbase = mp_ioapics[apic].mp_apicaddr;
|
|
+ apic_op.reg = reg;
|
|
+ apic_op.value = value;
|
|
+ WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op));
|
|
+#endif
|
|
+}
|
|
+
|
|
+#ifdef CONFIG_XEN
|
|
+#define io_apic_modify io_apic_write
|
|
+#else
|
|
+/*
|
|
+ * Re-write a value: to be used for read-modify-write
|
|
+ * cycles where the read already set up the index register.
|
|
+ *
|
|
+ * Older SiS APIC requires we rewrite the index register
|
|
+ */
|
|
+static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
|
|
+{
|
|
+ struct io_apic __iomem *io_apic = io_apic_base(apic);
|
|
+
|
|
+ if (sis_apic_bug)
|
|
+ writel(reg, &io_apic->index);
|
|
+ writel(value, &io_apic->data);
|
|
+}
|
|
+
|
|
+static bool io_apic_level_ack_pending(unsigned int irq)
|
|
+{
|
|
+ struct irq_pin_list *entry;
|
|
+ unsigned long flags;
|
|
+ struct irq_cfg *cfg = irq_cfg(irq);
|
|
+
|
|
+ spin_lock_irqsave(&ioapic_lock, flags);
|
|
+ entry = cfg->irq_2_pin;
|
|
+ for (;;) {
|
|
+ unsigned int reg;
|
|
+ int pin;
|
|
+
|
|
+ if (!entry)
|
|
+ break;
|
|
+ pin = entry->pin;
|
|
+ reg = io_apic_read(entry->apic, 0x10 + pin*2);
|
|
+ /* Is the remote IRR bit set? */
|
|
+ if (reg & IO_APIC_REDIR_REMOTE_IRR) {
|
|
+ spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+ return true;
|
|
+ }
|
|
+ if (!entry->next)
|
|
+ break;
|
|
+ entry = entry->next;
|
|
+ }
|
|
+ spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+
|
|
+ return false;
|
|
+}
|
|
+#endif /* CONFIG_XEN */
|
|
+
|
|
+union entry_union {
|
|
+ struct { u32 w1, w2; };
|
|
+ struct IO_APIC_route_entry entry;
|
|
+};
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
|
|
+{
|
|
+ union entry_union eu;
|
|
+ unsigned long flags;
|
|
+ spin_lock_irqsave(&ioapic_lock, flags);
|
|
+ eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
|
|
+ eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
|
|
+ spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+ return eu.entry;
|
|
+}
|
|
+#endif
|
|
+
|
|
+/*
|
|
+ * When we write a new IO APIC routing entry, we need to write the high
|
|
+ * word first! If the mask bit in the low word is clear, we will enable
|
|
+ * the interrupt, and we need to make sure the entry is fully populated
|
|
+ * before that happens.
|
|
+ */
|
|
+static void
|
|
+__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
|
|
+{
|
|
+ union entry_union eu;
|
|
+ eu.entry = e;
|
|
+ io_apic_write(apic, 0x11 + 2*pin, eu.w2);
|
|
+ io_apic_write(apic, 0x10 + 2*pin, eu.w1);
|
|
+}
|
|
+
|
|
+static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
|
|
+{
|
|
+ unsigned long flags;
|
|
+ spin_lock_irqsave(&ioapic_lock, flags);
|
|
+ __ioapic_write_entry(apic, pin, e);
|
|
+ spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+}
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+/*
|
|
+ * When we mask an IO APIC routing entry, we need to write the low
|
|
+ * word first, in order to set the mask bit before we change the
|
|
+ * high bits!
|
|
+ */
|
|
+static void ioapic_mask_entry(int apic, int pin)
|
|
+{
|
|
+ unsigned long flags;
|
|
+ union entry_union eu = { .entry.mask = 1 };
|
|
+
|
|
+ spin_lock_irqsave(&ioapic_lock, flags);
|
|
+ io_apic_write(apic, 0x10 + 2*pin, eu.w1);
|
|
+ io_apic_write(apic, 0x11 + 2*pin, eu.w2);
|
|
+ spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+}
|
|
+
|
|
+#ifdef CONFIG_SMP
|
|
+static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
|
|
+{
|
|
+ int apic, pin;
|
|
+ struct irq_cfg *cfg;
|
|
+ struct irq_pin_list *entry;
|
|
+
|
|
+ cfg = irq_cfg(irq);
|
|
+ entry = cfg->irq_2_pin;
|
|
+ for (;;) {
|
|
+ unsigned int reg;
|
|
+
|
|
+ if (!entry)
|
|
+ break;
|
|
+
|
|
+ apic = entry->apic;
|
|
+ pin = entry->pin;
|
|
+#ifdef CONFIG_INTR_REMAP
|
|
+ /*
|
|
+ * With interrupt-remapping, destination information comes
|
|
+ * from interrupt-remapping table entry.
|
|
+ */
|
|
+ if (!irq_remapped(irq))
|
|
+ io_apic_write(apic, 0x11 + pin*2, dest);
|
|
+#else
|
|
+ io_apic_write(apic, 0x11 + pin*2, dest);
|
|
+#endif
|
|
+ reg = io_apic_read(apic, 0x10 + pin*2);
|
|
+ reg &= ~IO_APIC_REDIR_VECTOR_MASK;
|
|
+ reg |= vector;
|
|
+ io_apic_modify(apic, 0x10 + pin*2, reg);
|
|
+ if (!entry->next)
|
|
+ break;
|
|
+ entry = entry->next;
|
|
+ }
|
|
+}
|
|
+
|
|
+static int assign_irq_vector(int irq, cpumask_t mask);
|
|
+
|
|
+static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
|
|
+{
|
|
+ struct irq_cfg *cfg;
|
|
+ unsigned long flags;
|
|
+ unsigned int dest;
|
|
+ cpumask_t tmp;
|
|
+ struct irq_desc *desc;
|
|
+
|
|
+ cpus_and(tmp, mask, cpu_online_map);
|
|
+ if (cpus_empty(tmp))
|
|
+ return;
|
|
+
|
|
+ cfg = irq_cfg(irq);
|
|
+ if (assign_irq_vector(irq, mask))
|
|
+ return;
|
|
+
|
|
+ cpus_and(tmp, cfg->domain, mask);
|
|
+ dest = cpu_mask_to_apicid(tmp);
|
|
+ /*
|
|
+ * Only the high 8 bits are valid.
|
|
+ */
|
|
+ dest = SET_APIC_LOGICAL_ID(dest);
|
|
+
|
|
+ desc = irq_to_desc(irq);
|
|
+ spin_lock_irqsave(&ioapic_lock, flags);
|
|
+ __target_IO_APIC_irq(irq, dest, cfg->vector);
|
|
+ desc->affinity = mask;
|
|
+ spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+}
|
|
+#endif /* CONFIG_SMP */
|
|
+
|
|
+/*
|
|
+ * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
|
|
+ * shared ISA-space IRQs, so we have to support them. We are super
|
|
+ * fast in the common case, and fast for shared ISA-space IRQs.
|
|
+ */
|
|
+static void add_pin_to_irq(unsigned int irq, int apic, int pin)
|
|
+{
|
|
+ struct irq_cfg *cfg;
|
|
+ struct irq_pin_list *entry;
|
|
+
|
|
+ /* first time to refer irq_cfg, so with new */
|
|
+ cfg = irq_cfg_alloc(irq);
|
|
+ entry = cfg->irq_2_pin;
|
|
+ if (!entry) {
|
|
+ entry = get_one_free_irq_2_pin();
|
|
+ cfg->irq_2_pin = entry;
|
|
+ entry->apic = apic;
|
|
+ entry->pin = pin;
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ while (entry->next) {
|
|
+ /* not again, please */
|
|
+ if (entry->apic == apic && entry->pin == pin)
|
|
+ return;
|
|
+
|
|
+ entry = entry->next;
|
|
+ }
|
|
+
|
|
+ entry->next = get_one_free_irq_2_pin();
|
|
+ entry = entry->next;
|
|
+ entry->apic = apic;
|
|
+ entry->pin = pin;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Reroute an IRQ to a different pin.
|
|
+ */
|
|
+static void __init replace_pin_at_irq(unsigned int irq,
|
|
+ int oldapic, int oldpin,
|
|
+ int newapic, int newpin)
|
|
+{
|
|
+ struct irq_cfg *cfg = irq_cfg(irq);
|
|
+ struct irq_pin_list *entry = cfg->irq_2_pin;
|
|
+ int replaced = 0;
|
|
+
|
|
+ while (entry) {
|
|
+ if (entry->apic == oldapic && entry->pin == oldpin) {
|
|
+ entry->apic = newapic;
|
|
+ entry->pin = newpin;
|
|
+ replaced = 1;
|
|
+ /* every one is different, right? */
|
|
+ break;
|
|
+ }
|
|
+ entry = entry->next;
|
|
+ }
|
|
+
|
|
+ /* why? call replace before add? */
|
|
+ if (!replaced)
|
|
+ add_pin_to_irq(irq, newapic, newpin);
|
|
+}
|
|
+
|
|
+static inline void io_apic_modify_irq(unsigned int irq,
|
|
+ int mask_and, int mask_or,
|
|
+ void (*final)(struct irq_pin_list *entry))
|
|
+{
|
|
+ int pin;
|
|
+ struct irq_cfg *cfg;
|
|
+ struct irq_pin_list *entry;
|
|
+
|
|
+ cfg = irq_cfg(irq);
|
|
+ for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) {
|
|
+ unsigned int reg;
|
|
+ pin = entry->pin;
|
|
+ reg = io_apic_read(entry->apic, 0x10 + pin * 2);
|
|
+ reg &= mask_and;
|
|
+ reg |= mask_or;
|
|
+ io_apic_modify(entry->apic, 0x10 + pin * 2, reg);
|
|
+ if (final)
|
|
+ final(entry);
|
|
+ }
|
|
+}
|
|
+
|
|
+static void __unmask_IO_APIC_irq(unsigned int irq)
|
|
+{
|
|
+ io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, 0, NULL);
|
|
+}
|
|
+
|
|
+#ifdef CONFIG_X86_64
|
|
+void io_apic_sync(struct irq_pin_list *entry)
|
|
+{
|
|
+ /*
|
|
+ * Synchronize the IO-APIC and the CPU by doing
|
|
+ * a dummy read from the IO-APIC
|
|
+ */
|
|
+ struct io_apic __iomem *io_apic;
|
|
+ io_apic = io_apic_base(entry->apic);
|
|
+ readl(&io_apic->data);
|
|
+}
|
|
+
|
|
+static void __mask_IO_APIC_irq(unsigned int irq)
|
|
+{
|
|
+ io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
|
|
+}
|
|
+#else /* CONFIG_X86_32 */
|
|
+static void __mask_IO_APIC_irq(unsigned int irq)
|
|
+{
|
|
+ io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, NULL);
|
|
+}
|
|
+
|
|
+static void __mask_and_edge_IO_APIC_irq(unsigned int irq)
|
|
+{
|
|
+ io_apic_modify_irq(irq, ~IO_APIC_REDIR_LEVEL_TRIGGER,
|
|
+ IO_APIC_REDIR_MASKED, NULL);
|
|
+}
|
|
+
|
|
+static void __unmask_and_level_IO_APIC_irq(unsigned int irq)
|
|
+{
|
|
+ io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED,
|
|
+ IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
|
|
+}
|
|
+#endif /* CONFIG_X86_32 */
|
|
+
|
|
+static void mask_IO_APIC_irq (unsigned int irq)
|
|
+{
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&ioapic_lock, flags);
|
|
+ __mask_IO_APIC_irq(irq);
|
|
+ spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+}
|
|
+
|
|
+static void unmask_IO_APIC_irq (unsigned int irq)
|
|
+{
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&ioapic_lock, flags);
|
|
+ __unmask_IO_APIC_irq(irq);
|
|
+ spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+}
|
|
+
|
|
+static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
|
|
+{
|
|
+ struct IO_APIC_route_entry entry;
|
|
+
|
|
+ /* Check delivery_mode to be sure we're not clearing an SMI pin */
|
|
+ entry = ioapic_read_entry(apic, pin);
|
|
+ if (entry.delivery_mode == dest_SMI)
|
|
+ return;
|
|
+ /*
|
|
+ * Disable it in the IO-APIC irq-routing table:
|
|
+ */
|
|
+ ioapic_mask_entry(apic, pin);
|
|
+}
|
|
+
|
|
+static void clear_IO_APIC (void)
|
|
+{
|
|
+ int apic, pin;
|
|
+
|
|
+ for (apic = 0; apic < nr_ioapics; apic++)
|
|
+ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
|
|
+ clear_IO_APIC_pin(apic, pin);
|
|
+}
|
|
+
|
|
+#if !defined(CONFIG_SMP) && defined(CONFIG_X86_32)
|
|
+void send_IPI_self(int vector)
|
|
+{
|
|
+ unsigned int cfg;
|
|
+
|
|
+ /*
|
|
+ * Wait for idle.
|
|
+ */
|
|
+ apic_wait_icr_idle();
|
|
+ cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL;
|
|
+ /*
|
|
+ * Send the IPI. The write to APIC_ICR fires this off.
|
|
+ */
|
|
+ apic_write(APIC_ICR, cfg);
|
|
+}
|
|
+#endif /* !CONFIG_SMP && CONFIG_X86_32*/
|
|
+#endif /* !CONFIG_XEN */
|
|
+
|
|
+#ifdef CONFIG_X86_32
|
|
+/*
|
|
+ * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
|
|
+ * specific CPU-side IRQs.
|
|
+ */
|
|
+
|
|
+#define MAX_PIRQS 8
|
|
+static int pirq_entries [MAX_PIRQS];
|
|
+static int pirqs_enabled;
|
|
+
|
|
+static int __init ioapic_pirq_setup(char *str)
|
|
+{
|
|
+ int i, max;
|
|
+ int ints[MAX_PIRQS+1];
|
|
+
|
|
+ get_options(str, ARRAY_SIZE(ints), ints);
|
|
+
|
|
+ for (i = 0; i < MAX_PIRQS; i++)
|
|
+ pirq_entries[i] = -1;
|
|
+
|
|
+ pirqs_enabled = 1;
|
|
+ apic_printk(APIC_VERBOSE, KERN_INFO
|
|
+ "PIRQ redirection, working around broken MP-BIOS.\n");
|
|
+ max = MAX_PIRQS;
|
|
+ if (ints[0] < MAX_PIRQS)
|
|
+ max = ints[0];
|
|
+
|
|
+ for (i = 0; i < max; i++) {
|
|
+ apic_printk(APIC_VERBOSE, KERN_DEBUG
|
|
+ "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
|
|
+ /*
|
|
+ * PIRQs are mapped upside down, usually.
|
|
+ */
|
|
+ pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
|
|
+ }
|
|
+ return 1;
|
|
+}
|
|
+
|
|
+__setup("pirq=", ioapic_pirq_setup);
|
|
+#endif /* CONFIG_X86_32 */
|
|
+
|
|
+#ifdef CONFIG_INTR_REMAP
|
|
+/* I/O APIC RTE contents at the OS boot up */
|
|
+static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
|
|
+
|
|
+/*
|
|
+ * Saves and masks all the unmasked IO-APIC RTE's
|
|
+ */
|
|
+int save_mask_IO_APIC_setup(void)
|
|
+{
|
|
+ union IO_APIC_reg_01 reg_01;
|
|
+ unsigned long flags;
|
|
+ int apic, pin;
|
|
+
|
|
+ /*
|
|
+ * The number of IO-APIC IRQ registers (== #pins):
|
|
+ */
|
|
+ for (apic = 0; apic < nr_ioapics; apic++) {
|
|
+ spin_lock_irqsave(&ioapic_lock, flags);
|
|
+ reg_01.raw = io_apic_read(apic, 1);
|
|
+ spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+ nr_ioapic_registers[apic] = reg_01.bits.entries+1;
|
|
+ }
|
|
+
|
|
+ for (apic = 0; apic < nr_ioapics; apic++) {
|
|
+ early_ioapic_entries[apic] =
|
|
+ kzalloc(sizeof(struct IO_APIC_route_entry) *
|
|
+ nr_ioapic_registers[apic], GFP_KERNEL);
|
|
+ if (!early_ioapic_entries[apic])
|
|
+ goto nomem;
|
|
+ }
|
|
+
|
|
+ for (apic = 0; apic < nr_ioapics; apic++)
|
|
+ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
|
|
+ struct IO_APIC_route_entry entry;
|
|
+
|
|
+ entry = early_ioapic_entries[apic][pin] =
|
|
+ ioapic_read_entry(apic, pin);
|
|
+ if (!entry.mask) {
|
|
+ entry.mask = 1;
|
|
+ ioapic_write_entry(apic, pin, entry);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+
|
|
+nomem:
|
|
+ while (apic >= 0)
|
|
+ kfree(early_ioapic_entries[apic--]);
|
|
+ memset(early_ioapic_entries, 0,
|
|
+ ARRAY_SIZE(early_ioapic_entries));
|
|
+
|
|
+ return -ENOMEM;
|
|
+}
|
|
+
|
|
+void restore_IO_APIC_setup(void)
|
|
+{
|
|
+ int apic, pin;
|
|
+
|
|
+ for (apic = 0; apic < nr_ioapics; apic++) {
|
|
+ if (!early_ioapic_entries[apic])
|
|
+ break;
|
|
+ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
|
|
+ ioapic_write_entry(apic, pin,
|
|
+ early_ioapic_entries[apic][pin]);
|
|
+ kfree(early_ioapic_entries[apic]);
|
|
+ early_ioapic_entries[apic] = NULL;
|
|
+ }
|
|
+}
|
|
+
|
|
+void reinit_intr_remapped_IO_APIC(int intr_remapping)
|
|
+{
|
|
+ /*
|
|
+ * for now plain restore of previous settings.
|
|
+ * TBD: In the case of OS enabling interrupt-remapping,
|
|
+ * IO-APIC RTE's need to be setup to point to interrupt-remapping
|
|
+ * table entries. for now, do a plain restore, and wait for
|
|
+ * the setup_IO_APIC_irqs() to do proper initialization.
|
|
+ */
|
|
+ restore_IO_APIC_setup();
|
|
+}
|
|
+#endif
|
|
+
|
|
+/*
|
|
+ * Find the IRQ entry number of a certain pin.
|
|
+ */
|
|
+static int find_irq_entry(int apic, int pin, int type)
|
|
+{
|
|
+ int i;
|
|
+
|
|
+ for (i = 0; i < mp_irq_entries; i++)
|
|
+ if (mp_irqs[i].mp_irqtype == type &&
|
|
+ (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid ||
|
|
+ mp_irqs[i].mp_dstapic == MP_APIC_ALL) &&
|
|
+ mp_irqs[i].mp_dstirq == pin)
|
|
+ return i;
|
|
+
|
|
+ return -1;
|
|
+}
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+/*
|
|
+ * Find the pin to which IRQ[irq] (ISA) is connected
|
|
+ */
|
|
+static int __init find_isa_irq_pin(int irq, int type)
|
|
+{
|
|
+ int i;
|
|
+
|
|
+ for (i = 0; i < mp_irq_entries; i++) {
|
|
+ int lbus = mp_irqs[i].mp_srcbus;
|
|
+
|
|
+ if (test_bit(lbus, mp_bus_not_pci) &&
|
|
+ (mp_irqs[i].mp_irqtype == type) &&
|
|
+ (mp_irqs[i].mp_srcbusirq == irq))
|
|
+
|
|
+ return mp_irqs[i].mp_dstirq;
|
|
+ }
|
|
+ return -1;
|
|
+}
|
|
+
|
|
+static int __init find_isa_irq_apic(int irq, int type)
|
|
+{
|
|
+ int i;
|
|
+
|
|
+ for (i = 0; i < mp_irq_entries; i++) {
|
|
+ int lbus = mp_irqs[i].mp_srcbus;
|
|
+
|
|
+ if (test_bit(lbus, mp_bus_not_pci) &&
|
|
+ (mp_irqs[i].mp_irqtype == type) &&
|
|
+ (mp_irqs[i].mp_srcbusirq == irq))
|
|
+ break;
|
|
+ }
|
|
+ if (i < mp_irq_entries) {
|
|
+ int apic;
|
|
+ for(apic = 0; apic < nr_ioapics; apic++) {
|
|
+ if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic)
|
|
+ return apic;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return -1;
|
|
+}
|
|
+#endif
|
|
+
|
|
+/*
|
|
+ * Find a specific PCI IRQ entry.
|
|
+ * Not an __init, possibly needed by modules
|
|
+ */
|
|
+static int pin_2_irq(int idx, int apic, int pin);
|
|
+
|
|
+int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
|
|
+{
|
|
+ int apic, i, best_guess = -1;
|
|
+
|
|
+ apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
|
|
+ bus, slot, pin);
|
|
+ if (test_bit(bus, mp_bus_not_pci)) {
|
|
+ apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
|
|
+ return -1;
|
|
+ }
|
|
+ for (i = 0; i < mp_irq_entries; i++) {
|
|
+ int lbus = mp_irqs[i].mp_srcbus;
|
|
+
|
|
+ for (apic = 0; apic < nr_ioapics; apic++)
|
|
+ if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic ||
|
|
+ mp_irqs[i].mp_dstapic == MP_APIC_ALL)
|
|
+ break;
|
|
+
|
|
+ if (!test_bit(lbus, mp_bus_not_pci) &&
|
|
+ !mp_irqs[i].mp_irqtype &&
|
|
+ (bus == lbus) &&
|
|
+ (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) {
|
|
+ int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq);
|
|
+
|
|
+ if (!(apic || IO_APIC_IRQ(irq)))
|
|
+ continue;
|
|
+
|
|
+ if (pin == (mp_irqs[i].mp_srcbusirq & 3))
|
|
+ return irq;
|
|
+ /*
|
|
+ * Use the first all-but-pin matching entry as a
|
|
+ * best-guess fuzzy result for broken mptables.
|
|
+ */
|
|
+ if (best_guess < 0)
|
|
+ best_guess = irq;
|
|
+ }
|
|
+ }
|
|
+ return best_guess;
|
|
+}
|
|
+
|
|
+EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
|
|
+
|
|
+#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
|
|
+/*
|
|
+ * EISA Edge/Level control register, ELCR
|
|
+ */
|
|
+static int EISA_ELCR(unsigned int irq)
|
|
+{
|
|
+ if (irq < 16) {
|
|
+ unsigned int port = 0x4d0 + (irq >> 3);
|
|
+ return (inb(port) >> (irq & 7)) & 1;
|
|
+ }
|
|
+ apic_printk(APIC_VERBOSE, KERN_INFO
|
|
+ "Broken MPtable reports ISA irq %d\n", irq);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+#endif
|
|
+
|
|
+/* ISA interrupts are always polarity zero edge triggered,
|
|
+ * when listed as conforming in the MP table. */
|
|
+
|
|
+#define default_ISA_trigger(idx) (0)
|
|
+#define default_ISA_polarity(idx) (0)
|
|
+
|
|
+/* EISA interrupts are always polarity zero and can be edge or level
|
|
+ * trigger depending on the ELCR value. If an interrupt is listed as
|
|
+ * EISA conforming in the MP table, that means its trigger type must
|
|
+ * be read in from the ELCR */
|
|
+
|
|
+#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq))
|
|
+#define default_EISA_polarity(idx) default_ISA_polarity(idx)
|
|
+
|
|
+/* PCI interrupts are always polarity one level triggered,
|
|
+ * when listed as conforming in the MP table. */
|
|
+
|
|
+#define default_PCI_trigger(idx) (1)
|
|
+#define default_PCI_polarity(idx) (1)
|
|
+
|
|
+/* MCA interrupts are always polarity zero level triggered,
|
|
+ * when listed as conforming in the MP table. */
|
|
+
|
|
+#define default_MCA_trigger(idx) (1)
|
|
+#define default_MCA_polarity(idx) default_ISA_polarity(idx)
|
|
+
|
|
+static int MPBIOS_polarity(int idx)
|
|
+{
|
|
+ int bus = mp_irqs[idx].mp_srcbus;
|
|
+ int polarity;
|
|
+
|
|
+ /*
|
|
+ * Determine IRQ line polarity (high active or low active):
|
|
+ */
|
|
+ switch (mp_irqs[idx].mp_irqflag & 3)
|
|
+ {
|
|
+ case 0: /* conforms, ie. bus-type dependent polarity */
|
|
+ if (test_bit(bus, mp_bus_not_pci))
|
|
+ polarity = default_ISA_polarity(idx);
|
|
+ else
|
|
+ polarity = default_PCI_polarity(idx);
|
|
+ break;
|
|
+ case 1: /* high active */
|
|
+ {
|
|
+ polarity = 0;
|
|
+ break;
|
|
+ }
|
|
+ case 2: /* reserved */
|
|
+ {
|
|
+ printk(KERN_WARNING "broken BIOS!!\n");
|
|
+ polarity = 1;
|
|
+ break;
|
|
+ }
|
|
+ case 3: /* low active */
|
|
+ {
|
|
+ polarity = 1;
|
|
+ break;
|
|
+ }
|
|
+ default: /* invalid */
|
|
+ {
|
|
+ printk(KERN_WARNING "broken BIOS!!\n");
|
|
+ polarity = 1;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ return polarity;
|
|
+}
|
|
+
|
|
+static int MPBIOS_trigger(int idx)
|
|
+{
|
|
+ int bus = mp_irqs[idx].mp_srcbus;
|
|
+ int trigger;
|
|
+
|
|
+ /*
|
|
+ * Determine IRQ trigger mode (edge or level sensitive):
|
|
+ */
|
|
+ switch ((mp_irqs[idx].mp_irqflag>>2) & 3)
|
|
+ {
|
|
+ case 0: /* conforms, ie. bus-type dependent */
|
|
+ if (test_bit(bus, mp_bus_not_pci))
|
|
+ trigger = default_ISA_trigger(idx);
|
|
+ else
|
|
+ trigger = default_PCI_trigger(idx);
|
|
+#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
|
|
+ switch (mp_bus_id_to_type[bus]) {
|
|
+ case MP_BUS_ISA: /* ISA pin */
|
|
+ {
|
|
+ /* set before the switch */
|
|
+ break;
|
|
+ }
|
|
+ case MP_BUS_EISA: /* EISA pin */
|
|
+ {
|
|
+ trigger = default_EISA_trigger(idx);
|
|
+ break;
|
|
+ }
|
|
+ case MP_BUS_PCI: /* PCI pin */
|
|
+ {
|
|
+ /* set before the switch */
|
|
+ break;
|
|
+ }
|
|
+ case MP_BUS_MCA: /* MCA pin */
|
|
+ {
|
|
+ trigger = default_MCA_trigger(idx);
|
|
+ break;
|
|
+ }
|
|
+ default:
|
|
+ {
|
|
+ printk(KERN_WARNING "broken BIOS!!\n");
|
|
+ trigger = 1;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+#endif
|
|
+ break;
|
|
+ case 1: /* edge */
|
|
+ {
|
|
+ trigger = 0;
|
|
+ break;
|
|
+ }
|
|
+ case 2: /* reserved */
|
|
+ {
|
|
+ printk(KERN_WARNING "broken BIOS!!\n");
|
|
+ trigger = 1;
|
|
+ break;
|
|
+ }
|
|
+ case 3: /* level */
|
|
+ {
|
|
+ trigger = 1;
|
|
+ break;
|
|
+ }
|
|
+ default: /* invalid */
|
|
+ {
|
|
+ printk(KERN_WARNING "broken BIOS!!\n");
|
|
+ trigger = 0;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ return trigger;
|
|
+}
|
|
+
|
|
+static inline int irq_polarity(int idx)
|
|
+{
|
|
+ return MPBIOS_polarity(idx);
|
|
+}
|
|
+
|
|
+static inline int irq_trigger(int idx)
|
|
+{
|
|
+ return MPBIOS_trigger(idx);
|
|
+}
|
|
+
|
|
+int (*ioapic_renumber_irq)(int ioapic, int irq);
|
|
+static int pin_2_irq(int idx, int apic, int pin)
|
|
+{
|
|
+ int irq, i;
|
|
+ int bus = mp_irqs[idx].mp_srcbus;
|
|
+
|
|
+ /*
|
|
+ * Debugging check, we are in big trouble if this message pops up!
|
|
+ */
|
|
+ if (mp_irqs[idx].mp_dstirq != pin)
|
|
+ printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
|
|
+
|
|
+ if (test_bit(bus, mp_bus_not_pci)) {
|
|
+ irq = mp_irqs[idx].mp_srcbusirq;
|
|
+ } else {
|
|
+ /*
|
|
+ * PCI IRQs are mapped in order
|
|
+ */
|
|
+ i = irq = 0;
|
|
+ while (i < apic)
|
|
+ irq += nr_ioapic_registers[i++];
|
|
+ irq += pin;
|
|
+ /*
|
|
+ * For MPS mode, so far only needed by ES7000 platform
|
|
+ */
|
|
+ if (ioapic_renumber_irq)
|
|
+ irq = ioapic_renumber_irq(apic, irq);
|
|
+ }
|
|
+
|
|
+#ifdef CONFIG_X86_32
|
|
+ /*
|
|
+ * PCI IRQ command line redirection. Yes, limits are hardcoded.
|
|
+ */
|
|
+ if ((pin >= 16) && (pin <= 23)) {
|
|
+ if (pirq_entries[pin-16] != -1) {
|
|
+ if (!pirq_entries[pin-16]) {
|
|
+ apic_printk(APIC_VERBOSE, KERN_DEBUG
|
|
+ "disabling PIRQ%d\n", pin-16);
|
|
+ } else {
|
|
+ irq = pirq_entries[pin-16];
|
|
+ apic_printk(APIC_VERBOSE, KERN_DEBUG
|
|
+ "using PIRQ%d -> IRQ %d\n",
|
|
+ pin-16, irq);
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ return irq;
|
|
+}
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+void lock_vector_lock(void)
|
|
+{
|
|
+ /* Used to the online set of cpus does not change
|
|
+ * during assign_irq_vector.
|
|
+ */
|
|
+ spin_lock(&vector_lock);
|
|
+}
|
|
+
|
|
+void unlock_vector_lock(void)
|
|
+{
|
|
+ spin_unlock(&vector_lock);
|
|
+}
|
|
+#endif
|
|
+
|
|
+static int assign_irq_vector(int irq, cpumask_t mask)
|
|
+{
|
|
+ struct physdev_irq irq_op;
|
|
+ struct irq_cfg *cfg;
|
|
+
|
|
+ if (irq < PIRQ_BASE || irq - PIRQ_BASE >= NR_PIRQS)
|
|
+ return -EINVAL;
|
|
+
|
|
+ cfg = irq_cfg(irq);
|
|
+
|
|
+ if (cfg->vector)
|
|
+ return 0;
|
|
+
|
|
+ irq_op.irq = irq;
|
|
+ if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op))
|
|
+ return -ENOSPC;
|
|
+
|
|
+ cfg->vector = irq_op.vector;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+static void __clear_irq_vector(int irq)
|
|
+{
|
|
+ struct irq_cfg *cfg;
|
|
+ cpumask_t mask;
|
|
+ int cpu, vector;
|
|
+
|
|
+ cfg = irq_cfg(irq);
|
|
+ BUG_ON(!cfg->vector);
|
|
+
|
|
+ vector = cfg->vector;
|
|
+ cpus_and(mask, cfg->domain, cpu_online_map);
|
|
+ for_each_cpu_mask_nr(cpu, mask)
|
|
+ per_cpu(vector_irq, cpu)[vector] = -1;
|
|
+
|
|
+ cfg->vector = 0;
|
|
+ cpus_clear(cfg->domain);
|
|
+
|
|
+ if (likely(!cfg->move_in_progress))
|
|
+ return;
|
|
+ cpus_and(mask, cfg->old_domain, cpu_online_map);
|
|
+ for_each_cpu_mask_nr(cpu, mask) {
|
|
+ for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
|
|
+ vector++) {
|
|
+ if (per_cpu(vector_irq, cpu)[vector] != irq)
|
|
+ continue;
|
|
+ per_cpu(vector_irq, cpu)[vector] = -1;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ cfg->move_in_progress = 0;
|
|
+}
|
|
+
|
|
+void __setup_vector_irq(int cpu)
|
|
+{
|
|
+ /* Initialize vector_irq on a new cpu */
|
|
+ /* This function must be called with vector_lock held */
|
|
+ int irq, vector;
|
|
+ struct irq_cfg *cfg;
|
|
+
|
|
+ /* Mark the inuse vectors */
|
|
+ for_each_irq_cfg(irq, cfg) {
|
|
+ if (!cpu_isset(cpu, cfg->domain))
|
|
+ continue;
|
|
+ vector = cfg->vector;
|
|
+ per_cpu(vector_irq, cpu)[vector] = irq;
|
|
+ }
|
|
+ /* Mark the free vectors */
|
|
+ for (vector = 0; vector < NR_VECTORS; ++vector) {
|
|
+ irq = per_cpu(vector_irq, cpu)[vector];
|
|
+ if (irq < 0)
|
|
+ continue;
|
|
+
|
|
+ cfg = irq_cfg(irq);
|
|
+ if (!cpu_isset(cpu, cfg->domain))
|
|
+ per_cpu(vector_irq, cpu)[vector] = -1;
|
|
+ }
|
|
+}
|
|
+
|
|
+static struct irq_chip ioapic_chip;
|
|
+#ifdef CONFIG_INTR_REMAP
|
|
+static struct irq_chip ir_ioapic_chip;
|
|
+#endif
|
|
+
|
|
+#define IOAPIC_AUTO -1
|
|
+#define IOAPIC_EDGE 0
|
|
+#define IOAPIC_LEVEL 1
|
|
+
|
|
+#ifdef CONFIG_X86_32
|
|
+static inline int IO_APIC_irq_trigger(int irq)
|
|
+{
|
|
+ int apic, idx, pin;
|
|
+
|
|
+ for (apic = 0; apic < nr_ioapics; apic++) {
|
|
+ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
|
|
+ idx = find_irq_entry(apic, pin, mp_INT);
|
|
+ if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
|
|
+ return irq_trigger(idx);
|
|
+ }
|
|
+ }
|
|
+ /*
|
|
+ * nonexistent IRQs are edge default
|
|
+ */
|
|
+ return 0;
|
|
+}
|
|
+#else
|
|
+static inline int IO_APIC_irq_trigger(int irq)
|
|
+{
|
|
+ return 1;
|
|
+}
|
|
+#endif
|
|
+
|
|
+static void ioapic_register_intr(int irq, unsigned long trigger)
|
|
+{
|
|
+ struct irq_desc *desc;
|
|
+
|
|
+ desc = irq_to_desc(irq);
|
|
+
|
|
+ if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
|
|
+ trigger == IOAPIC_LEVEL)
|
|
+ desc->status |= IRQ_LEVEL;
|
|
+ else
|
|
+ desc->status &= ~IRQ_LEVEL;
|
|
+
|
|
+#ifdef CONFIG_INTR_REMAP
|
|
+ if (irq_remapped(irq)) {
|
|
+ desc->status |= IRQ_MOVE_PCNTXT;
|
|
+ if (trigger)
|
|
+ set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
|
|
+ handle_fasteoi_irq,
|
|
+ "fasteoi");
|
|
+ else
|
|
+ set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
|
|
+ handle_edge_irq, "edge");
|
|
+ return;
|
|
+ }
|
|
+#endif
|
|
+ if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
|
|
+ trigger == IOAPIC_LEVEL)
|
|
+ set_irq_chip_and_handler_name(irq, &ioapic_chip,
|
|
+ handle_fasteoi_irq,
|
|
+ "fasteoi");
|
|
+ else
|
|
+ set_irq_chip_and_handler_name(irq, &ioapic_chip,
|
|
+ handle_edge_irq, "edge");
|
|
+}
|
|
+#else /* !CONFIG_XEN */
|
|
+#define __clear_irq_vector(irq) ((void)(irq))
|
|
+#define ioapic_register_intr(irq, trigger) evtchn_register_pirq(irq)
|
|
+#endif
|
|
+
|
|
+static int setup_ioapic_entry(int apic, int irq,
|
|
+ struct IO_APIC_route_entry *entry,
|
|
+ unsigned int destination, int trigger,
|
|
+ int polarity, int vector)
|
|
+{
|
|
+ /*
|
|
+ * add it to the IO-APIC irq-routing table:
|
|
+ */
|
|
+ memset(entry,0,sizeof(*entry));
|
|
+
|
|
+#ifdef CONFIG_INTR_REMAP
|
|
+ if (intr_remapping_enabled) {
|
|
+ struct intel_iommu *iommu = map_ioapic_to_ir(apic);
|
|
+ struct irte irte;
|
|
+ struct IR_IO_APIC_route_entry *ir_entry =
|
|
+ (struct IR_IO_APIC_route_entry *) entry;
|
|
+ int index;
|
|
+
|
|
+ if (!iommu)
|
|
+ panic("No mapping iommu for ioapic %d\n", apic);
|
|
+
|
|
+ index = alloc_irte(iommu, irq, 1);
|
|
+ if (index < 0)
|
|
+ panic("Failed to allocate IRTE for ioapic %d\n", apic);
|
|
+
|
|
+ memset(&irte, 0, sizeof(irte));
|
|
+
|
|
+ irte.present = 1;
|
|
+ irte.dst_mode = INT_DEST_MODE;
|
|
+ irte.trigger_mode = trigger;
|
|
+ irte.dlvry_mode = INT_DELIVERY_MODE;
|
|
+ irte.vector = vector;
|
|
+ irte.dest_id = IRTE_DEST(destination);
|
|
+
|
|
+ modify_irte(irq, &irte);
|
|
+
|
|
+ ir_entry->index2 = (index >> 15) & 0x1;
|
|
+ ir_entry->zero = 0;
|
|
+ ir_entry->format = 1;
|
|
+ ir_entry->index = (index & 0x7fff);
|
|
+ } else
|
|
+#endif
|
|
+ {
|
|
+ entry->delivery_mode = INT_DELIVERY_MODE;
|
|
+ entry->dest_mode = INT_DEST_MODE;
|
|
+ entry->dest = destination;
|
|
+ }
|
|
+
|
|
+ entry->mask = 0; /* enable IRQ */
|
|
+ entry->trigger = trigger;
|
|
+ entry->polarity = polarity;
|
|
+ entry->vector = vector;
|
|
+
|
|
+ /* Mask level triggered irqs.
|
|
+ * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
|
|
+ */
|
|
+ if (trigger)
|
|
+ entry->mask = 1;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
|
|
+ int trigger, int polarity)
|
|
+{
|
|
+ struct irq_cfg *cfg;
|
|
+ struct IO_APIC_route_entry entry;
|
|
+ cpumask_t mask;
|
|
+
|
|
+ if (!IO_APIC_IRQ(irq))
|
|
+ return;
|
|
+
|
|
+ cfg = irq_cfg(irq);
|
|
+
|
|
+ mask = TARGET_CPUS;
|
|
+ if (assign_irq_vector(irq, mask))
|
|
+ return;
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+ cpus_and(mask, cfg->domain, mask);
|
|
+#endif
|
|
+
|
|
+ apic_printk(APIC_VERBOSE,KERN_DEBUG
|
|
+ "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
|
|
+ "IRQ %d Mode:%i Active:%i)\n",
|
|
+ apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
|
|
+ irq, trigger, polarity);
|
|
+
|
|
+
|
|
+ if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
|
|
+ cpu_mask_to_apicid(mask), trigger, polarity,
|
|
+ cfg->vector)) {
|
|
+ printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
|
|
+ mp_ioapics[apic].mp_apicid, pin);
|
|
+ __clear_irq_vector(irq);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ ioapic_register_intr(irq, trigger);
|
|
+ if (irq < 16)
|
|
+ disable_8259A_irq(irq);
|
|
+
|
|
+ ioapic_write_entry(apic, pin, entry);
|
|
+}
|
|
+
|
|
+static void __init setup_IO_APIC_irqs(void)
|
|
+{
|
|
+ int apic, pin, idx, irq;
|
|
+ int notcon = 0;
|
|
+
|
|
+ apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
|
|
+
|
|
+ for (apic = 0; apic < nr_ioapics; apic++) {
|
|
+ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
|
|
+
|
|
+ idx = find_irq_entry(apic, pin, mp_INT);
|
|
+ if (idx == -1) {
|
|
+ if (!notcon) {
|
|
+ notcon = 1;
|
|
+ apic_printk(APIC_VERBOSE,
|
|
+ KERN_DEBUG " %d-%d",
|
|
+ mp_ioapics[apic].mp_apicid,
|
|
+ pin);
|
|
+ } else
|
|
+ apic_printk(APIC_VERBOSE, " %d-%d",
|
|
+ mp_ioapics[apic].mp_apicid,
|
|
+ pin);
|
|
+ continue;
|
|
+ }
|
|
+ if (notcon) {
|
|
+ apic_printk(APIC_VERBOSE,
|
|
+ " (apicid-pin) not connected\n");
|
|
+ notcon = 0;
|
|
+ }
|
|
+
|
|
+ irq = pin_2_irq(idx, apic, pin);
|
|
+#if defined(CONFIG_XEN)
|
|
+ if (irq < PIRQ_BASE || irq >= PIRQ_BASE + NR_PIRQS)
|
|
+ continue;
|
|
+#elif defined(CONFIG_X86_32)
|
|
+ if (multi_timer_check(apic, irq))
|
|
+ continue;
|
|
+#endif
|
|
+ add_pin_to_irq(irq, apic, pin);
|
|
+
|
|
+ setup_IO_APIC_irq(apic, pin, irq,
|
|
+ irq_trigger(idx), irq_polarity(idx));
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (notcon)
|
|
+ apic_printk(APIC_VERBOSE,
|
|
+ " (apicid-pin) not connected\n");
|
|
+}
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+/*
|
|
+ * Set up the timer pin, possibly with the 8259A-master behind.
|
|
+ */
|
|
+static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
|
|
+ int vector)
|
|
+{
|
|
+ struct IO_APIC_route_entry entry;
|
|
+
|
|
+#ifdef CONFIG_INTR_REMAP
|
|
+ if (intr_remapping_enabled)
|
|
+ return;
|
|
+#endif
|
|
+
|
|
+ memset(&entry, 0, sizeof(entry));
|
|
+
|
|
+ /*
|
|
+ * We use logical delivery to get the timer IRQ
|
|
+ * to the first CPU.
|
|
+ */
|
|
+ entry.dest_mode = INT_DEST_MODE;
|
|
+ entry.mask = 1; /* mask IRQ now */
|
|
+ entry.dest = cpu_mask_to_apicid(TARGET_CPUS);
|
|
+ entry.delivery_mode = INT_DELIVERY_MODE;
|
|
+ entry.polarity = 0;
|
|
+ entry.trigger = 0;
|
|
+ entry.vector = vector;
|
|
+
|
|
+ /*
|
|
+ * The timer IRQ doesn't have to know that behind the
|
|
+ * scene we may have a 8259A-master in AEOI mode ...
|
|
+ */
|
|
+ set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
|
|
+
|
|
+ /*
|
|
+ * Add it to the IO-APIC irq-routing table:
|
|
+ */
|
|
+ ioapic_write_entry(apic, pin, entry);
|
|
+}
|
|
+
|
|
+
|
|
+__apicdebuginit(void) print_IO_APIC(void)
|
|
+{
|
|
+ int apic, i;
|
|
+ union IO_APIC_reg_00 reg_00;
|
|
+ union IO_APIC_reg_01 reg_01;
|
|
+ union IO_APIC_reg_02 reg_02;
|
|
+ union IO_APIC_reg_03 reg_03;
|
|
+ unsigned long flags;
|
|
+ struct irq_cfg *cfg;
|
|
+ unsigned int irq;
|
|
+
|
|
+ if (apic_verbosity == APIC_QUIET)
|
|
+ return;
|
|
+
|
|
+ printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
|
|
+ for (i = 0; i < nr_ioapics; i++)
|
|
+ printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
|
|
+ mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]);
|
|
+
|
|
+ /*
|
|
+ * We are a bit conservative about what we expect. We have to
|
|
+ * know about every hardware change ASAP.
|
|
+ */
|
|
+ printk(KERN_INFO "testing the IO APIC.......................\n");
|
|
+
|
|
+ for (apic = 0; apic < nr_ioapics; apic++) {
|
|
+
|
|
+ spin_lock_irqsave(&ioapic_lock, flags);
|
|
+ reg_00.raw = io_apic_read(apic, 0);
|
|
+ reg_01.raw = io_apic_read(apic, 1);
|
|
+ if (reg_01.bits.version >= 0x10)
|
|
+ reg_02.raw = io_apic_read(apic, 2);
|
|
+ if (reg_01.bits.version >= 0x20)
|
|
+ reg_03.raw = io_apic_read(apic, 3);
|
|
+ spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+
|
|
+ printk("\n");
|
|
+ printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
|
|
+ printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
|
|
+ printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
|
|
+ printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type);
|
|
+ printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS);
|
|
+
|
|
+ printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)®_01);
|
|
+ printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries);
|
|
+
|
|
+ printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ);
|
|
+ printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version);
|
|
+
|
|
+ /*
|
|
+ * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
|
|
+ * but the value of reg_02 is read as the previous read register
|
|
+ * value, so ignore it if reg_02 == reg_01.
|
|
+ */
|
|
+ if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
|
|
+ printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
|
|
+ printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration);
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
|
|
+ * or reg_03, but the value of reg_0[23] is read as the previous read
|
|
+ * register value, so ignore it if reg_03 == reg_0[12].
|
|
+ */
|
|
+ if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
|
|
+ reg_03.raw != reg_01.raw) {
|
|
+ printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
|
|
+ printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT);
|
|
+ }
|
|
+
|
|
+ printk(KERN_DEBUG ".... IRQ redirection table:\n");
|
|
+
|
|
+ printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
|
|
+ " Stat Dmod Deli Vect: \n");
|
|
+
|
|
+ for (i = 0; i <= reg_01.bits.entries; i++) {
|
|
+ struct IO_APIC_route_entry entry;
|
|
+
|
|
+ entry = ioapic_read_entry(apic, i);
|
|
+
|
|
+ printk(KERN_DEBUG " %02x %03X ",
|
|
+ i,
|
|
+ entry.dest
|
|
+ );
|
|
+
|
|
+ printk("%1d %1d %1d %1d %1d %1d %1d %02X\n",
|
|
+ entry.mask,
|
|
+ entry.trigger,
|
|
+ entry.irr,
|
|
+ entry.polarity,
|
|
+ entry.delivery_status,
|
|
+ entry.dest_mode,
|
|
+ entry.delivery_mode,
|
|
+ entry.vector
|
|
+ );
|
|
+ }
|
|
+ }
|
|
+ printk(KERN_DEBUG "IRQ to pin mappings:\n");
|
|
+ for_each_irq_cfg(irq, cfg) {
|
|
+ struct irq_pin_list *entry = cfg->irq_2_pin;
|
|
+ if (!entry)
|
|
+ continue;
|
|
+ printk(KERN_DEBUG "IRQ%d ", irq);
|
|
+ for (;;) {
|
|
+ printk("-> %d:%d", entry->apic, entry->pin);
|
|
+ if (!entry->next)
|
|
+ break;
|
|
+ entry = entry->next;
|
|
+ }
|
|
+ printk("\n");
|
|
+ }
|
|
+
|
|
+ printk(KERN_INFO ".................................... done.\n");
|
|
+
|
|
+ return;
|
|
+}
|
|
+
|
|
+__apicdebuginit(void) print_APIC_bitfield(int base)
|
|
+{
|
|
+ unsigned int v;
|
|
+ int i, j;
|
|
+
|
|
+ if (apic_verbosity == APIC_QUIET)
|
|
+ return;
|
|
+
|
|
+ printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
|
|
+ for (i = 0; i < 8; i++) {
|
|
+ v = apic_read(base + i*0x10);
|
|
+ for (j = 0; j < 32; j++) {
|
|
+ if (v & (1<<j))
|
|
+ printk("1");
|
|
+ else
|
|
+ printk("0");
|
|
+ }
|
|
+ printk("\n");
|
|
+ }
|
|
+}
|
|
+
|
|
+__apicdebuginit(void) print_local_APIC(void *dummy)
|
|
+{
|
|
+ unsigned int v, ver, maxlvt;
|
|
+ u64 icr;
|
|
+
|
|
+ if (apic_verbosity == APIC_QUIET)
|
|
+ return;
|
|
+
|
|
+ printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
|
|
+ smp_processor_id(), hard_smp_processor_id());
|
|
+ v = apic_read(APIC_ID);
|
|
+ printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, read_apic_id());
|
|
+ v = apic_read(APIC_LVR);
|
|
+ printk(KERN_INFO "... APIC VERSION: %08x\n", v);
|
|
+ ver = GET_APIC_VERSION(v);
|
|
+ maxlvt = lapic_get_maxlvt();
|
|
+
|
|
+ v = apic_read(APIC_TASKPRI);
|
|
+ printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
|
|
+
|
|
+ if (APIC_INTEGRATED(ver)) { /* !82489DX */
|
|
+ if (!APIC_XAPIC(ver)) {
|
|
+ v = apic_read(APIC_ARBPRI);
|
|
+ printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
|
|
+ v & APIC_ARBPRI_MASK);
|
|
+ }
|
|
+ v = apic_read(APIC_PROCPRI);
|
|
+ printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * Remote read supported only in the 82489DX and local APIC for
|
|
+ * Pentium processors.
|
|
+ */
|
|
+ if (!APIC_INTEGRATED(ver) || maxlvt == 3) {
|
|
+ v = apic_read(APIC_RRR);
|
|
+ printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
|
|
+ }
|
|
+
|
|
+ v = apic_read(APIC_LDR);
|
|
+ printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
|
|
+ if (!x2apic_enabled()) {
|
|
+ v = apic_read(APIC_DFR);
|
|
+ printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
|
|
+ }
|
|
+ v = apic_read(APIC_SPIV);
|
|
+ printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
|
|
+
|
|
+ printk(KERN_DEBUG "... APIC ISR field:\n");
|
|
+ print_APIC_bitfield(APIC_ISR);
|
|
+ printk(KERN_DEBUG "... APIC TMR field:\n");
|
|
+ print_APIC_bitfield(APIC_TMR);
|
|
+ printk(KERN_DEBUG "... APIC IRR field:\n");
|
|
+ print_APIC_bitfield(APIC_IRR);
|
|
+
|
|
+ if (APIC_INTEGRATED(ver)) { /* !82489DX */
|
|
+ if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
|
|
+ apic_write(APIC_ESR, 0);
|
|
+
|
|
+ v = apic_read(APIC_ESR);
|
|
+ printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
|
|
+ }
|
|
+
|
|
+ icr = apic_icr_read();
|
|
+ printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr);
|
|
+ printk(KERN_DEBUG "... APIC ICR2: %08x\n", (u32)(icr >> 32));
|
|
+
|
|
+ v = apic_read(APIC_LVTT);
|
|
+ printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
|
|
+
|
|
+ if (maxlvt > 3) { /* PC is LVT#4. */
|
|
+ v = apic_read(APIC_LVTPC);
|
|
+ printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
|
|
+ }
|
|
+ v = apic_read(APIC_LVT0);
|
|
+ printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
|
|
+ v = apic_read(APIC_LVT1);
|
|
+ printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
|
|
+
|
|
+ if (maxlvt > 2) { /* ERR is LVT#3. */
|
|
+ v = apic_read(APIC_LVTERR);
|
|
+ printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
|
|
+ }
|
|
+
|
|
+ v = apic_read(APIC_TMICT);
|
|
+ printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
|
|
+ v = apic_read(APIC_TMCCT);
|
|
+ printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
|
|
+ v = apic_read(APIC_TDCR);
|
|
+ printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
|
|
+ printk("\n");
|
|
+}
|
|
+
|
|
+__apicdebuginit(void) print_all_local_APICs(void)
|
|
+{
|
|
+ int cpu;
|
|
+
|
|
+ preempt_disable();
|
|
+ for_each_online_cpu(cpu)
|
|
+ smp_call_function_single(cpu, print_local_APIC, NULL, 1);
|
|
+ preempt_enable();
|
|
+}
|
|
+
|
|
+__apicdebuginit(void) print_PIC(void)
|
|
+{
|
|
+ unsigned int v;
|
|
+ unsigned long flags;
|
|
+
|
|
+ if (apic_verbosity == APIC_QUIET)
|
|
+ return;
|
|
+
|
|
+ printk(KERN_DEBUG "\nprinting PIC contents\n");
|
|
+
|
|
+ spin_lock_irqsave(&i8259A_lock, flags);
|
|
+
|
|
+ v = inb(0xa1) << 8 | inb(0x21);
|
|
+ printk(KERN_DEBUG "... PIC IMR: %04x\n", v);
|
|
+
|
|
+ v = inb(0xa0) << 8 | inb(0x20);
|
|
+ printk(KERN_DEBUG "... PIC IRR: %04x\n", v);
|
|
+
|
|
+ outb(0x0b,0xa0);
|
|
+ outb(0x0b,0x20);
|
|
+ v = inb(0xa0) << 8 | inb(0x20);
|
|
+ outb(0x0a,0xa0);
|
|
+ outb(0x0a,0x20);
|
|
+
|
|
+ spin_unlock_irqrestore(&i8259A_lock, flags);
|
|
+
|
|
+ printk(KERN_DEBUG "... PIC ISR: %04x\n", v);
|
|
+
|
|
+ v = inb(0x4d1) << 8 | inb(0x4d0);
|
|
+ printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
|
|
+}
|
|
+
|
|
+__apicdebuginit(int) print_all_ICs(void)
|
|
+{
|
|
+ print_PIC();
|
|
+ print_all_local_APICs();
|
|
+ print_IO_APIC();
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+fs_initcall(print_all_ICs);
|
|
+
|
|
+
|
|
+/* Where if anywhere is the i8259 connect in external int mode */
|
|
+static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
|
|
+#endif /* !CONFIG_XEN */
|
|
+
|
|
+void __init enable_IO_APIC(void)
|
|
+{
|
|
+ union IO_APIC_reg_01 reg_01;
|
|
+#ifndef CONFIG_XEN
|
|
+ int i8259_apic, i8259_pin;
|
|
+#endif
|
|
+ int apic;
|
|
+ unsigned long flags;
|
|
+
|
|
+#ifdef CONFIG_X86_32
|
|
+ int i;
|
|
+ if (!pirqs_enabled)
|
|
+ for (i = 0; i < MAX_PIRQS; i++)
|
|
+ pirq_entries[i] = -1;
|
|
+#endif
|
|
+
|
|
+ /*
|
|
+ * The number of IO-APIC IRQ registers (== #pins):
|
|
+ */
|
|
+ for (apic = 0; apic < nr_ioapics; apic++) {
|
|
+ spin_lock_irqsave(&ioapic_lock, flags);
|
|
+ reg_01.raw = io_apic_read(apic, 1);
|
|
+ spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+ nr_ioapic_registers[apic] = reg_01.bits.entries+1;
|
|
+ }
|
|
+#ifndef CONFIG_XEN
|
|
+ for(apic = 0; apic < nr_ioapics; apic++) {
|
|
+ int pin;
|
|
+ /* See if any of the pins is in ExtINT mode */
|
|
+ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
|
|
+ struct IO_APIC_route_entry entry;
|
|
+ entry = ioapic_read_entry(apic, pin);
|
|
+
|
|
+ /* If the interrupt line is enabled and in ExtInt mode
|
|
+ * I have found the pin where the i8259 is connected.
|
|
+ */
|
|
+ if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) {
|
|
+ ioapic_i8259.apic = apic;
|
|
+ ioapic_i8259.pin = pin;
|
|
+ goto found_i8259;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ found_i8259:
|
|
+ /* Look to see what if the MP table has reported the ExtINT */
|
|
+ /* If we could not find the appropriate pin by looking at the ioapic
|
|
+ * the i8259 probably is not connected the ioapic but give the
|
|
+ * mptable a chance anyway.
|
|
+ */
|
|
+ i8259_pin = find_isa_irq_pin(0, mp_ExtINT);
|
|
+ i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
|
|
+ /* Trust the MP table if nothing is setup in the hardware */
|
|
+ if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) {
|
|
+ printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n");
|
|
+ ioapic_i8259.pin = i8259_pin;
|
|
+ ioapic_i8259.apic = i8259_apic;
|
|
+ }
|
|
+ /* Complain if the MP table and the hardware disagree */
|
|
+ if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) &&
|
|
+ (i8259_pin >= 0) && (ioapic_i8259.pin >= 0))
|
|
+ {
|
|
+ printk(KERN_WARNING "ExtINT in hardware and MP table differ\n");
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * Do not trust the IO-APIC being empty at bootup
|
|
+ */
|
|
+ clear_IO_APIC();
|
|
+#endif
|
|
+}
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+/*
|
|
+ * Not an __init, needed by the reboot code
|
|
+ */
|
|
+void disable_IO_APIC(void)
|
|
+{
|
|
+ /*
|
|
+ * Clear the IO-APIC before rebooting:
|
|
+ */
|
|
+ clear_IO_APIC();
|
|
+
|
|
+ /*
|
|
+ * If the i8259 is routed through an IOAPIC
|
|
+ * Put that IOAPIC in virtual wire mode
|
|
+ * so legacy interrupts can be delivered.
|
|
+ */
|
|
+ if (ioapic_i8259.pin != -1) {
|
|
+ struct IO_APIC_route_entry entry;
|
|
+
|
|
+ memset(&entry, 0, sizeof(entry));
|
|
+ entry.mask = 0; /* Enabled */
|
|
+ entry.trigger = 0; /* Edge */
|
|
+ entry.irr = 0;
|
|
+ entry.polarity = 0; /* High */
|
|
+ entry.delivery_status = 0;
|
|
+ entry.dest_mode = 0; /* Physical */
|
|
+ entry.delivery_mode = dest_ExtINT; /* ExtInt */
|
|
+ entry.vector = 0;
|
|
+ entry.dest = read_apic_id();
|
|
+
|
|
+ /*
|
|
+ * Add it to the IO-APIC irq-routing table:
|
|
+ */
|
|
+ ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
|
|
+ }
|
|
+
|
|
+ disconnect_bsp_APIC(ioapic_i8259.pin != -1);
|
|
+}
|
|
+
|
|
+#ifdef CONFIG_X86_32
|
|
+/*
|
|
+ * function to set the IO-APIC physical IDs based on the
|
|
+ * values stored in the MPC table.
|
|
+ *
|
|
+ * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999
|
|
+ */
|
|
+
|
|
+static void __init setup_ioapic_ids_from_mpc(void)
|
|
+{
|
|
+ union IO_APIC_reg_00 reg_00;
|
|
+ physid_mask_t phys_id_present_map;
|
|
+ int apic;
|
|
+ int i;
|
|
+ unsigned char old_id;
|
|
+ unsigned long flags;
|
|
+
|
|
+ if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids())
|
|
+ return;
|
|
+
|
|
+ /*
|
|
+ * Don't check I/O APIC IDs for xAPIC systems. They have
|
|
+ * no meaning without the serial APIC bus.
|
|
+ */
|
|
+ if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
|
|
+ || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
|
|
+ return;
|
|
+ /*
|
|
+ * This is broken; anything with a real cpu count has to
|
|
+ * circumvent this idiocy regardless.
|
|
+ */
|
|
+ phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map);
|
|
+
|
|
+ /*
|
|
+ * Set the IOAPIC ID to the value stored in the MPC table.
|
|
+ */
|
|
+ for (apic = 0; apic < nr_ioapics; apic++) {
|
|
+
|
|
+ /* Read the register 0 value */
|
|
+ spin_lock_irqsave(&ioapic_lock, flags);
|
|
+ reg_00.raw = io_apic_read(apic, 0);
|
|
+ spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+
|
|
+ old_id = mp_ioapics[apic].mp_apicid;
|
|
+
|
|
+ if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) {
|
|
+ printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
|
|
+ apic, mp_ioapics[apic].mp_apicid);
|
|
+ printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
|
|
+ reg_00.bits.ID);
|
|
+ mp_ioapics[apic].mp_apicid = reg_00.bits.ID;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * Sanity check, is the ID really free? Every APIC in a
|
|
+ * system must have a unique ID or we get lots of nice
|
|
+ * 'stuck on smp_invalidate_needed IPI wait' messages.
|
|
+ */
|
|
+ if (check_apicid_used(phys_id_present_map,
|
|
+ mp_ioapics[apic].mp_apicid)) {
|
|
+ printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
|
|
+ apic, mp_ioapics[apic].mp_apicid);
|
|
+ for (i = 0; i < get_physical_broadcast(); i++)
|
|
+ if (!physid_isset(i, phys_id_present_map))
|
|
+ break;
|
|
+ if (i >= get_physical_broadcast())
|
|
+ panic("Max APIC ID exceeded!\n");
|
|
+ printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
|
|
+ i);
|
|
+ physid_set(i, phys_id_present_map);
|
|
+ mp_ioapics[apic].mp_apicid = i;
|
|
+ } else {
|
|
+ physid_mask_t tmp;
|
|
+ tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid);
|
|
+ apic_printk(APIC_VERBOSE, "Setting %d in the "
|
|
+ "phys_id_present_map\n",
|
|
+ mp_ioapics[apic].mp_apicid);
|
|
+ physids_or(phys_id_present_map, phys_id_present_map, tmp);
|
|
+ }
|
|
+
|
|
+
|
|
+ /*
|
|
+ * We need to adjust the IRQ routing table
|
|
+ * if the ID changed.
|
|
+ */
|
|
+ if (old_id != mp_ioapics[apic].mp_apicid)
|
|
+ for (i = 0; i < mp_irq_entries; i++)
|
|
+ if (mp_irqs[i].mp_dstapic == old_id)
|
|
+ mp_irqs[i].mp_dstapic
|
|
+ = mp_ioapics[apic].mp_apicid;
|
|
+
|
|
+ /*
|
|
+ * Read the right value from the MPC table and
|
|
+ * write it into the ID register.
|
|
+ */
|
|
+ apic_printk(APIC_VERBOSE, KERN_INFO
|
|
+ "...changing IO-APIC physical APIC ID to %d ...",
|
|
+ mp_ioapics[apic].mp_apicid);
|
|
+
|
|
+ reg_00.bits.ID = mp_ioapics[apic].mp_apicid;
|
|
+ spin_lock_irqsave(&ioapic_lock, flags);
|
|
+ io_apic_write(apic, 0, reg_00.raw);
|
|
+ spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+
|
|
+ /*
|
|
+ * Sanity check
|
|
+ */
|
|
+ spin_lock_irqsave(&ioapic_lock, flags);
|
|
+ reg_00.raw = io_apic_read(apic, 0);
|
|
+ spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+ if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid)
|
|
+ printk("could not set ID!\n");
|
|
+ else
|
|
+ apic_printk(APIC_VERBOSE, " ok.\n");
|
|
+ }
|
|
+}
|
|
+#endif
|
|
+
|
|
+int no_timer_check __initdata;
|
|
+
|
|
+static int __init notimercheck(char *s)
|
|
+{
|
|
+ no_timer_check = 1;
|
|
+ return 1;
|
|
+}
|
|
+__setup("no_timer_check", notimercheck);
|
|
+
|
|
+/*
|
|
+ * There is a nasty bug in some older SMP boards, their mptable lies
|
|
+ * about the timer IRQ. We do the following to work around the situation:
|
|
+ *
|
|
+ * - timer IRQ defaults to IO-APIC IRQ
|
|
+ * - if this function detects that timer IRQs are defunct, then we fall
|
|
+ * back to ISA timer IRQs
|
|
+ */
|
|
+static int __init timer_irq_works(void)
|
|
+{
|
|
+ unsigned long t1 = jiffies;
|
|
+ unsigned long flags;
|
|
+
|
|
+ if (no_timer_check)
|
|
+ return 1;
|
|
+
|
|
+ local_save_flags(flags);
|
|
+ local_irq_enable();
|
|
+ /* Let ten ticks pass... */
|
|
+ mdelay((10 * 1000) / HZ);
|
|
+ local_irq_restore(flags);
|
|
+
|
|
+ /*
|
|
+ * Expect a few ticks at least, to be sure some possible
|
|
+ * glue logic does not lock up after one or two first
|
|
+ * ticks in a non-ExtINT mode. Also the local APIC
|
|
+ * might have cached one ExtINT interrupt. Finally, at
|
|
+ * least one tick may be lost due to delays.
|
|
+ */
|
|
+
|
|
+ /* jiffies wrap? */
|
|
+ if (time_after(jiffies, t1 + 4))
|
|
+ return 1;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * In the SMP+IOAPIC case it might happen that there are an unspecified
|
|
+ * number of pending IRQ events unhandled. These cases are very rare,
|
|
+ * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
|
|
+ * better to do it this way as thus we do not have to be aware of
|
|
+ * 'pending' interrupts in the IRQ path, except at this point.
|
|
+ */
|
|
+/*
|
|
+ * Edge triggered needs to resend any interrupt
|
|
+ * that was delayed but this is now handled in the device
|
|
+ * independent code.
|
|
+ */
|
|
+
|
|
+/*
|
|
+ * Starting up a edge-triggered IO-APIC interrupt is
|
|
+ * nasty - we need to make sure that we get the edge.
|
|
+ * If it is already asserted for some reason, we need
|
|
+ * return 1 to indicate that is was pending.
|
|
+ *
|
|
+ * This is not complete - we should be able to fake
|
|
+ * an edge even if it isn't on the 8259A...
|
|
+ */
|
|
+
|
|
+static unsigned int startup_ioapic_irq(unsigned int irq)
|
|
+{
|
|
+ int was_pending = 0;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&ioapic_lock, flags);
|
|
+ if (irq < 16) {
|
|
+ disable_8259A_irq(irq);
|
|
+ if (i8259A_irq_pending(irq))
|
|
+ was_pending = 1;
|
|
+ }
|
|
+ __unmask_IO_APIC_irq(irq);
|
|
+ spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+
|
|
+ return was_pending;
|
|
+}
|
|
+
|
|
+#ifdef CONFIG_X86_64
|
|
+static int ioapic_retrigger_irq(unsigned int irq)
|
|
+{
|
|
+
|
|
+ struct irq_cfg *cfg = irq_cfg(irq);
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&vector_lock, flags);
|
|
+ send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector);
|
|
+ spin_unlock_irqrestore(&vector_lock, flags);
|
|
+
|
|
+ return 1;
|
|
+}
|
|
+#else
|
|
+static int ioapic_retrigger_irq(unsigned int irq)
|
|
+{
|
|
+ send_IPI_self(irq_cfg(irq)->vector);
|
|
+
|
|
+ return 1;
|
|
+}
|
|
+#endif
|
|
+
|
|
+/*
|
|
+ * Level and edge triggered IO-APIC interrupts need different handling,
|
|
+ * so we use two separate IRQ descriptors. Edge triggered IRQs can be
|
|
+ * handled with the level-triggered descriptor, but that one has slightly
|
|
+ * more overhead. Level-triggered interrupts cannot be handled with the
|
|
+ * edge-triggered handler, without risking IRQ storms and other ugly
|
|
+ * races.
|
|
+ */
|
|
+
|
|
+#ifdef CONFIG_SMP
|
|
+
|
|
+#ifdef CONFIG_INTR_REMAP
|
|
+static void ir_irq_migration(struct work_struct *work);
|
|
+
|
|
+static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
|
|
+
|
|
+/*
|
|
+ * Migrate the IO-APIC irq in the presence of intr-remapping.
|
|
+ *
|
|
+ * For edge triggered, irq migration is a simple atomic update(of vector
|
|
+ * and cpu destination) of IRTE and flush the hardware cache.
|
|
+ *
|
|
+ * For level triggered, we need to modify the io-apic RTE aswell with the update
|
|
+ * vector information, along with modifying IRTE with vector and destination.
|
|
+ * So irq migration for level triggered is little bit more complex compared to
|
|
+ * edge triggered migration. But the good news is, we use the same algorithm
|
|
+ * for level triggered migration as we have today, only difference being,
|
|
+ * we now initiate the irq migration from process context instead of the
|
|
+ * interrupt context.
|
|
+ *
|
|
+ * In future, when we do a directed EOI (combined with cpu EOI broadcast
|
|
+ * suppression) to the IO-APIC, level triggered irq migration will also be
|
|
+ * as simple as edge triggered migration and we can do the irq migration
|
|
+ * with a simple atomic update to IO-APIC RTE.
|
|
+ */
|
|
+static void migrate_ioapic_irq(int irq, cpumask_t mask)
|
|
+{
|
|
+ struct irq_cfg *cfg;
|
|
+ struct irq_desc *desc;
|
|
+ cpumask_t tmp, cleanup_mask;
|
|
+ struct irte irte;
|
|
+ int modify_ioapic_rte;
|
|
+ unsigned int dest;
|
|
+ unsigned long flags;
|
|
+
|
|
+ cpus_and(tmp, mask, cpu_online_map);
|
|
+ if (cpus_empty(tmp))
|
|
+ return;
|
|
+
|
|
+ if (get_irte(irq, &irte))
|
|
+ return;
|
|
+
|
|
+ if (assign_irq_vector(irq, mask))
|
|
+ return;
|
|
+
|
|
+ cfg = irq_cfg(irq);
|
|
+ cpus_and(tmp, cfg->domain, mask);
|
|
+ dest = cpu_mask_to_apicid(tmp);
|
|
+
|
|
+ desc = irq_to_desc(irq);
|
|
+ modify_ioapic_rte = desc->status & IRQ_LEVEL;
|
|
+ if (modify_ioapic_rte) {
|
|
+ spin_lock_irqsave(&ioapic_lock, flags);
|
|
+ __target_IO_APIC_irq(irq, dest, cfg->vector);
|
|
+ spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+ }
|
|
+
|
|
+ irte.vector = cfg->vector;
|
|
+ irte.dest_id = IRTE_DEST(dest);
|
|
+
|
|
+ /*
|
|
+ * Modified the IRTE and flushes the Interrupt entry cache.
|
|
+ */
|
|
+ modify_irte(irq, &irte);
|
|
+
|
|
+ if (cfg->move_in_progress) {
|
|
+ cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
|
|
+ cfg->move_cleanup_count = cpus_weight(cleanup_mask);
|
|
+ send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
|
|
+ cfg->move_in_progress = 0;
|
|
+ }
|
|
+
|
|
+ desc->affinity = mask;
|
|
+}
|
|
+
|
|
+static int migrate_irq_remapped_level(int irq)
|
|
+{
|
|
+ int ret = -1;
|
|
+ struct irq_desc *desc = irq_to_desc(irq);
|
|
+
|
|
+ mask_IO_APIC_irq(irq);
|
|
+
|
|
+ if (io_apic_level_ack_pending(irq)) {
|
|
+ /*
|
|
+ * Interrupt in progress. Migrating irq now will change the
|
|
+ * vector information in the IO-APIC RTE and that will confuse
|
|
+ * the EOI broadcast performed by cpu.
|
|
+ * So, delay the irq migration to the next instance.
|
|
+ */
|
|
+ schedule_delayed_work(&ir_migration_work, 1);
|
|
+ goto unmask;
|
|
+ }
|
|
+
|
|
+ /* everthing is clear. we have right of way */
|
|
+ migrate_ioapic_irq(irq, desc->pending_mask);
|
|
+
|
|
+ ret = 0;
|
|
+ desc->status &= ~IRQ_MOVE_PENDING;
|
|
+ cpus_clear(desc->pending_mask);
|
|
+
|
|
+unmask:
|
|
+ unmask_IO_APIC_irq(irq);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static void ir_irq_migration(struct work_struct *work)
|
|
+{
|
|
+ unsigned int irq;
|
|
+ struct irq_desc *desc;
|
|
+
|
|
+ for_each_irq_desc(irq, desc) {
|
|
+ if (desc->status & IRQ_MOVE_PENDING) {
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&desc->lock, flags);
|
|
+ if (!desc->chip->set_affinity ||
|
|
+ !(desc->status & IRQ_MOVE_PENDING)) {
|
|
+ desc->status &= ~IRQ_MOVE_PENDING;
|
|
+ spin_unlock_irqrestore(&desc->lock, flags);
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ desc->chip->set_affinity(irq, desc->pending_mask);
|
|
+ spin_unlock_irqrestore(&desc->lock, flags);
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Migrates the IRQ destination in the process context.
|
|
+ */
|
|
+static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
|
|
+{
|
|
+ struct irq_desc *desc = irq_to_desc(irq);
|
|
+
|
|
+ if (desc->status & IRQ_LEVEL) {
|
|
+ desc->status |= IRQ_MOVE_PENDING;
|
|
+ desc->pending_mask = mask;
|
|
+ migrate_irq_remapped_level(irq);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ migrate_ioapic_irq(irq, mask);
|
|
+}
|
|
+#endif
|
|
+
|
|
+asmlinkage void smp_irq_move_cleanup_interrupt(void)
|
|
+{
|
|
+ unsigned vector, me;
|
|
+ ack_APIC_irq();
|
|
+#ifdef CONFIG_X86_64
|
|
+ exit_idle();
|
|
+#endif
|
|
+ irq_enter();
|
|
+
|
|
+ me = smp_processor_id();
|
|
+ for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
|
|
+ unsigned int irq;
|
|
+ struct irq_desc *desc;
|
|
+ struct irq_cfg *cfg;
|
|
+ irq = __get_cpu_var(vector_irq)[vector];
|
|
+
|
|
+ desc = irq_to_desc(irq);
|
|
+ if (!desc)
|
|
+ continue;
|
|
+
|
|
+ cfg = irq_cfg(irq);
|
|
+ spin_lock(&desc->lock);
|
|
+ if (!cfg->move_cleanup_count)
|
|
+ goto unlock;
|
|
+
|
|
+ if ((vector == cfg->vector) && cpu_isset(me, cfg->domain))
|
|
+ goto unlock;
|
|
+
|
|
+ __get_cpu_var(vector_irq)[vector] = -1;
|
|
+ cfg->move_cleanup_count--;
|
|
+unlock:
|
|
+ spin_unlock(&desc->lock);
|
|
+ }
|
|
+
|
|
+ irq_exit();
|
|
+}
|
|
+
|
|
+static void irq_complete_move(unsigned int irq)
|
|
+{
|
|
+ struct irq_cfg *cfg = irq_cfg(irq);
|
|
+ unsigned vector, me;
|
|
+
|
|
+ if (likely(!cfg->move_in_progress))
|
|
+ return;
|
|
+
|
|
+ vector = ~get_irq_regs()->orig_ax;
|
|
+ me = smp_processor_id();
|
|
+ if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
|
|
+ cpumask_t cleanup_mask;
|
|
+
|
|
+ cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
|
|
+ cfg->move_cleanup_count = cpus_weight(cleanup_mask);
|
|
+ send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
|
|
+ cfg->move_in_progress = 0;
|
|
+ }
|
|
+}
|
|
+#else
|
|
+static inline void irq_complete_move(unsigned int irq) {}
|
|
+#endif
|
|
+#ifdef CONFIG_INTR_REMAP
|
|
+static void ack_x2apic_level(unsigned int irq)
|
|
+{
|
|
+ ack_x2APIC_irq();
|
|
+}
|
|
+
|
|
+static void ack_x2apic_edge(unsigned int irq)
|
|
+{
|
|
+ ack_x2APIC_irq();
|
|
+}
|
|
+#endif
|
|
+
|
|
+static void ack_apic_edge(unsigned int irq)
|
|
+{
|
|
+ irq_complete_move(irq);
|
|
+ move_native_irq(irq);
|
|
+ ack_APIC_irq();
|
|
+}
|
|
+
|
|
+atomic_t irq_mis_count;
|
|
+
|
|
+static void ack_apic_level(unsigned int irq)
|
|
+{
|
|
+#ifdef CONFIG_X86_32
|
|
+ unsigned long v;
|
|
+ int i;
|
|
+#endif
|
|
+ int do_unmask_irq = 0;
|
|
+
|
|
+ irq_complete_move(irq);
|
|
+#ifdef CONFIG_GENERIC_PENDING_IRQ
|
|
+ /* If we are moving the irq we need to mask it */
|
|
+ if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) {
|
|
+ do_unmask_irq = 1;
|
|
+ mask_IO_APIC_irq(irq);
|
|
+ }
|
|
+#endif
|
|
+
|
|
+#ifdef CONFIG_X86_32
|
|
+ /*
|
|
+ * It appears there is an erratum which affects at least version 0x11
|
|
+ * of I/O APIC (that's the 82093AA and cores integrated into various
|
|
+ * chipsets). Under certain conditions a level-triggered interrupt is
|
|
+ * erroneously delivered as edge-triggered one but the respective IRR
|
|
+ * bit gets set nevertheless. As a result the I/O unit expects an EOI
|
|
+ * message but it will never arrive and further interrupts are blocked
|
|
+ * from the source. The exact reason is so far unknown, but the
|
|
+ * phenomenon was observed when two consecutive interrupt requests
|
|
+ * from a given source get delivered to the same CPU and the source is
|
|
+ * temporarily disabled in between.
|
|
+ *
|
|
+ * A workaround is to simulate an EOI message manually. We achieve it
|
|
+ * by setting the trigger mode to edge and then to level when the edge
|
|
+ * trigger mode gets detected in the TMR of a local APIC for a
|
|
+ * level-triggered interrupt. We mask the source for the time of the
|
|
+ * operation to prevent an edge-triggered interrupt escaping meanwhile.
|
|
+ * The idea is from Manfred Spraul. --macro
|
|
+ */
|
|
+ i = irq_cfg(irq)->vector;
|
|
+
|
|
+ v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
|
|
+#endif
|
|
+
|
|
+ /*
|
|
+ * We must acknowledge the irq before we move it or the acknowledge will
|
|
+ * not propagate properly.
|
|
+ */
|
|
+ ack_APIC_irq();
|
|
+
|
|
+ /* Now we can move and renable the irq */
|
|
+ if (unlikely(do_unmask_irq)) {
|
|
+ /* Only migrate the irq if the ack has been received.
|
|
+ *
|
|
+ * On rare occasions the broadcast level triggered ack gets
|
|
+ * delayed going to ioapics, and if we reprogram the
|
|
+ * vector while Remote IRR is still set the irq will never
|
|
+ * fire again.
|
|
+ *
|
|
+ * To prevent this scenario we read the Remote IRR bit
|
|
+ * of the ioapic. This has two effects.
|
|
+ * - On any sane system the read of the ioapic will
|
|
+ * flush writes (and acks) going to the ioapic from
|
|
+ * this cpu.
|
|
+ * - We get to see if the ACK has actually been delivered.
|
|
+ *
|
|
+ * Based on failed experiments of reprogramming the
|
|
+ * ioapic entry from outside of irq context starting
|
|
+ * with masking the ioapic entry and then polling until
|
|
+ * Remote IRR was clear before reprogramming the
|
|
+ * ioapic I don't trust the Remote IRR bit to be
|
|
+ * completey accurate.
|
|
+ *
|
|
+ * However there appears to be no other way to plug
|
|
+ * this race, so if the Remote IRR bit is not
|
|
+ * accurate and is causing problems then it is a hardware bug
|
|
+ * and you can go talk to the chipset vendor about it.
|
|
+ */
|
|
+ if (!io_apic_level_ack_pending(irq))
|
|
+ move_masked_irq(irq);
|
|
+ unmask_IO_APIC_irq(irq);
|
|
+ }
|
|
+
|
|
+#ifdef CONFIG_X86_32
|
|
+ if (!(v & (1 << (i & 0x1f)))) {
|
|
+ atomic_inc(&irq_mis_count);
|
|
+ spin_lock(&ioapic_lock);
|
|
+ __mask_and_edge_IO_APIC_irq(irq);
|
|
+ __unmask_and_level_IO_APIC_irq(irq);
|
|
+ spin_unlock(&ioapic_lock);
|
|
+ }
|
|
+#endif
|
|
+}
|
|
+
|
|
+static struct irq_chip ioapic_chip __read_mostly = {
|
|
+ .name = "IO-APIC",
|
|
+ .startup = startup_ioapic_irq,
|
|
+ .mask = mask_IO_APIC_irq,
|
|
+ .unmask = unmask_IO_APIC_irq,
|
|
+ .ack = ack_apic_edge,
|
|
+ .eoi = ack_apic_level,
|
|
+#ifdef CONFIG_SMP
|
|
+ .set_affinity = set_ioapic_affinity_irq,
|
|
+#endif
|
|
+ .retrigger = ioapic_retrigger_irq,
|
|
+};
|
|
+
|
|
+#ifdef CONFIG_INTR_REMAP
|
|
+static struct irq_chip ir_ioapic_chip __read_mostly = {
|
|
+ .name = "IR-IO-APIC",
|
|
+ .startup = startup_ioapic_irq,
|
|
+ .mask = mask_IO_APIC_irq,
|
|
+ .unmask = unmask_IO_APIC_irq,
|
|
+ .ack = ack_x2apic_edge,
|
|
+ .eoi = ack_x2apic_level,
|
|
+#ifdef CONFIG_SMP
|
|
+ .set_affinity = set_ir_ioapic_affinity_irq,
|
|
+#endif
|
|
+ .retrigger = ioapic_retrigger_irq,
|
|
+};
|
|
+#endif
|
|
+#endif /* !CONFIG_XEN */
|
|
+
|
|
+static inline void init_IO_APIC_traps(void)
|
|
+{
|
|
+ int irq;
|
|
+ struct irq_desc *desc;
|
|
+ struct irq_cfg *cfg;
|
|
+
|
|
+ /*
|
|
+ * NOTE! The local APIC isn't very good at handling
|
|
+ * multiple interrupts at the same interrupt level.
|
|
+ * As the interrupt level is determined by taking the
|
|
+ * vector number and shifting that right by 4, we
|
|
+ * want to spread these out a bit so that they don't
|
|
+ * all fall in the same interrupt level.
|
|
+ *
|
|
+ * Also, we've got to be careful not to trash gate
|
|
+ * 0x80, because int 0x80 is hm, kind of importantish. ;)
|
|
+ */
|
|
+ for_each_irq_cfg(irq, cfg) {
|
|
+#ifdef CONFIG_XEN
|
|
+ if (irq < PIRQ_BASE || irq >= PIRQ_BASE + NR_PIRQS)
|
|
+ continue;
|
|
+#endif
|
|
+ if (IO_APIC_IRQ(irq) && !cfg->vector) {
|
|
+ /*
|
|
+ * Hmm.. We don't have an entry for this,
|
|
+ * so default to an old-fashioned 8259
|
|
+ * interrupt if we can..
|
|
+ */
|
|
+ if (irq < 16)
|
|
+ make_8259A_irq(irq);
|
|
+ else {
|
|
+ desc = irq_to_desc(irq);
|
|
+ /* Strange. Oh, well.. */
|
|
+ desc->chip = &no_irq_chip;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+/*
|
|
+ * The local APIC irq-chip implementation:
|
|
+ */
|
|
+
|
|
+static void mask_lapic_irq(unsigned int irq)
|
|
+{
|
|
+ unsigned long v;
|
|
+
|
|
+ v = apic_read(APIC_LVT0);
|
|
+ apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
|
|
+}
|
|
+
|
|
+static void unmask_lapic_irq(unsigned int irq)
|
|
+{
|
|
+ unsigned long v;
|
|
+
|
|
+ v = apic_read(APIC_LVT0);
|
|
+ apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
|
|
+}
|
|
+
|
|
+static void ack_lapic_irq (unsigned int irq)
|
|
+{
|
|
+ ack_APIC_irq();
|
|
+}
|
|
+
|
|
+static struct irq_chip lapic_chip __read_mostly = {
|
|
+ .name = "local-APIC",
|
|
+ .mask = mask_lapic_irq,
|
|
+ .unmask = unmask_lapic_irq,
|
|
+ .ack = ack_lapic_irq,
|
|
+};
|
|
+
|
|
+static void lapic_register_intr(int irq)
|
|
+{
|
|
+ struct irq_desc *desc;
|
|
+
|
|
+ desc = irq_to_desc(irq);
|
|
+ desc->status &= ~IRQ_LEVEL;
|
|
+ set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
|
|
+ "edge");
|
|
+}
|
|
+
|
|
+static void __init setup_nmi(void)
|
|
+{
|
|
+ /*
|
|
+ * Dirty trick to enable the NMI watchdog ...
|
|
+ * We put the 8259A master into AEOI mode and
|
|
+ * unmask on all local APICs LVT0 as NMI.
|
|
+ *
|
|
+ * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
|
|
+ * is from Maciej W. Rozycki - so we do not have to EOI from
|
|
+ * the NMI handler or the timer interrupt.
|
|
+ */
|
|
+ apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
|
|
+
|
|
+ enable_NMI_through_LVT0();
|
|
+
|
|
+ apic_printk(APIC_VERBOSE, " done.\n");
|
|
+}
|
|
+
|
|
+/*
|
|
+ * This looks a bit hackish but it's about the only one way of sending
|
|
+ * a few INTA cycles to 8259As and any associated glue logic. ICR does
|
|
+ * not support the ExtINT mode, unfortunately. We need to send these
|
|
+ * cycles as some i82489DX-based boards have glue logic that keeps the
|
|
+ * 8259A interrupt line asserted until INTA. --macro
|
|
+ */
|
|
+static inline void __init unlock_ExtINT_logic(void)
|
|
+{
|
|
+ int apic, pin, i;
|
|
+ struct IO_APIC_route_entry entry0, entry1;
|
|
+ unsigned char save_control, save_freq_select;
|
|
+
|
|
+ pin = find_isa_irq_pin(8, mp_INT);
|
|
+ if (pin == -1) {
|
|
+ WARN_ON_ONCE(1);
|
|
+ return;
|
|
+ }
|
|
+ apic = find_isa_irq_apic(8, mp_INT);
|
|
+ if (apic == -1) {
|
|
+ WARN_ON_ONCE(1);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ entry0 = ioapic_read_entry(apic, pin);
|
|
+ clear_IO_APIC_pin(apic, pin);
|
|
+
|
|
+ memset(&entry1, 0, sizeof(entry1));
|
|
+
|
|
+ entry1.dest_mode = 0; /* physical delivery */
|
|
+ entry1.mask = 0; /* unmask IRQ now */
|
|
+ entry1.dest = hard_smp_processor_id();
|
|
+ entry1.delivery_mode = dest_ExtINT;
|
|
+ entry1.polarity = entry0.polarity;
|
|
+ entry1.trigger = 0;
|
|
+ entry1.vector = 0;
|
|
+
|
|
+ ioapic_write_entry(apic, pin, entry1);
|
|
+
|
|
+ save_control = CMOS_READ(RTC_CONTROL);
|
|
+ save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
|
|
+ CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
|
|
+ RTC_FREQ_SELECT);
|
|
+ CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
|
|
+
|
|
+ i = 100;
|
|
+ while (i-- > 0) {
|
|
+ mdelay(10);
|
|
+ if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
|
|
+ i -= 10;
|
|
+ }
|
|
+
|
|
+ CMOS_WRITE(save_control, RTC_CONTROL);
|
|
+ CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
|
|
+ clear_IO_APIC_pin(apic, pin);
|
|
+
|
|
+ ioapic_write_entry(apic, pin, entry0);
|
|
+}
|
|
+
|
|
+static int disable_timer_pin_1 __initdata;
|
|
+/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
|
|
+static int __init disable_timer_pin_setup(char *arg)
|
|
+{
|
|
+ disable_timer_pin_1 = 1;
|
|
+ return 0;
|
|
+}
|
|
+early_param("disable_timer_pin_1", disable_timer_pin_setup);
|
|
+
|
|
+int timer_through_8259 __initdata;
|
|
+
|
|
+/*
|
|
+ * This code may look a bit paranoid, but it's supposed to cooperate with
|
|
+ * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
|
|
+ * is so screwy. Thanks to Brian Perkins for testing/hacking this beast
|
|
+ * fanatically on his truly buggy board.
|
|
+ *
|
|
+ * FIXME: really need to revamp this for all platforms.
|
|
+ */
|
|
+static inline void __init check_timer(void)
|
|
+{
|
|
+ struct irq_cfg *cfg = irq_cfg(0);
|
|
+ int apic1, pin1, apic2, pin2;
|
|
+ unsigned long flags;
|
|
+ unsigned int ver;
|
|
+ int no_pin1 = 0;
|
|
+
|
|
+ local_irq_save(flags);
|
|
+
|
|
+ ver = apic_read(APIC_LVR);
|
|
+ ver = GET_APIC_VERSION(ver);
|
|
+
|
|
+ /*
|
|
+ * get/set the timer IRQ vector:
|
|
+ */
|
|
+ disable_8259A_irq(0);
|
|
+ assign_irq_vector(0, TARGET_CPUS);
|
|
+
|
|
+ /*
|
|
+ * As IRQ0 is to be enabled in the 8259A, the virtual
|
|
+ * wire has to be disabled in the local APIC. Also
|
|
+ * timer interrupts need to be acknowledged manually in
|
|
+ * the 8259A for the i82489DX when using the NMI
|
|
+ * watchdog as that APIC treats NMIs as level-triggered.
|
|
+ * The AEOI mode will finish them in the 8259A
|
|
+ * automatically.
|
|
+ */
|
|
+ apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
|
|
+ init_8259A(1);
|
|
+#ifdef CONFIG_X86_32
|
|
+ timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
|
|
+#endif
|
|
+
|
|
+ pin1 = find_isa_irq_pin(0, mp_INT);
|
|
+ apic1 = find_isa_irq_apic(0, mp_INT);
|
|
+ pin2 = ioapic_i8259.pin;
|
|
+ apic2 = ioapic_i8259.apic;
|
|
+
|
|
+ apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X "
|
|
+ "apic1=%d pin1=%d apic2=%d pin2=%d\n",
|
|
+ cfg->vector, apic1, pin1, apic2, pin2);
|
|
+
|
|
+ /*
|
|
+ * Some BIOS writers are clueless and report the ExtINTA
|
|
+ * I/O APIC input from the cascaded 8259A as the timer
|
|
+ * interrupt input. So just in case, if only one pin
|
|
+ * was found above, try it both directly and through the
|
|
+ * 8259A.
|
|
+ */
|
|
+ if (pin1 == -1) {
|
|
+#ifdef CONFIG_INTR_REMAP
|
|
+ if (intr_remapping_enabled)
|
|
+ panic("BIOS bug: timer not connected to IO-APIC");
|
|
+#endif
|
|
+ pin1 = pin2;
|
|
+ apic1 = apic2;
|
|
+ no_pin1 = 1;
|
|
+ } else if (pin2 == -1) {
|
|
+ pin2 = pin1;
|
|
+ apic2 = apic1;
|
|
+ }
|
|
+
|
|
+ if (pin1 != -1) {
|
|
+ /*
|
|
+ * Ok, does IRQ0 through the IOAPIC work?
|
|
+ */
|
|
+ if (no_pin1) {
|
|
+ add_pin_to_irq(0, apic1, pin1);
|
|
+ setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
|
|
+ }
|
|
+ unmask_IO_APIC_irq(0);
|
|
+ if (timer_irq_works()) {
|
|
+ if (nmi_watchdog == NMI_IO_APIC) {
|
|
+ setup_nmi();
|
|
+ enable_8259A_irq(0);
|
|
+ }
|
|
+ if (disable_timer_pin_1 > 0)
|
|
+ clear_IO_APIC_pin(0, pin1);
|
|
+ goto out;
|
|
+ }
|
|
+#ifdef CONFIG_INTR_REMAP
|
|
+ if (intr_remapping_enabled)
|
|
+ panic("timer doesn't work through Interrupt-remapped IO-APIC");
|
|
+#endif
|
|
+ clear_IO_APIC_pin(apic1, pin1);
|
|
+ if (!no_pin1)
|
|
+ apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
|
|
+ "8254 timer not connected to IO-APIC\n");
|
|
+
|
|
+ apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer "
|
|
+ "(IRQ0) through the 8259A ...\n");
|
|
+ apic_printk(APIC_QUIET, KERN_INFO
|
|
+ "..... (found apic %d pin %d) ...\n", apic2, pin2);
|
|
+ /*
|
|
+ * legacy devices should be connected to IO APIC #0
|
|
+ */
|
|
+ replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
|
|
+ setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
|
|
+ unmask_IO_APIC_irq(0);
|
|
+ enable_8259A_irq(0);
|
|
+ if (timer_irq_works()) {
|
|
+ apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
|
|
+ timer_through_8259 = 1;
|
|
+ if (nmi_watchdog == NMI_IO_APIC) {
|
|
+ disable_8259A_irq(0);
|
|
+ setup_nmi();
|
|
+ enable_8259A_irq(0);
|
|
+ }
|
|
+ goto out;
|
|
+ }
|
|
+ /*
|
|
+ * Cleanup, just in case ...
|
|
+ */
|
|
+ disable_8259A_irq(0);
|
|
+ clear_IO_APIC_pin(apic2, pin2);
|
|
+ apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
|
|
+ }
|
|
+
|
|
+ if (nmi_watchdog == NMI_IO_APIC) {
|
|
+ apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work "
|
|
+ "through the IO-APIC - disabling NMI Watchdog!\n");
|
|
+ nmi_watchdog = NMI_NONE;
|
|
+ }
|
|
+#ifdef CONFIG_X86_32
|
|
+ timer_ack = 0;
|
|
+#endif
|
|
+
|
|
+ apic_printk(APIC_QUIET, KERN_INFO
|
|
+ "...trying to set up timer as Virtual Wire IRQ...\n");
|
|
+
|
|
+ lapic_register_intr(0);
|
|
+ apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */
|
|
+ enable_8259A_irq(0);
|
|
+
|
|
+ if (timer_irq_works()) {
|
|
+ apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
|
|
+ goto out;
|
|
+ }
|
|
+ disable_8259A_irq(0);
|
|
+ apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
|
|
+ apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
|
|
+
|
|
+ apic_printk(APIC_QUIET, KERN_INFO
|
|
+ "...trying to set up timer as ExtINT IRQ...\n");
|
|
+
|
|
+ init_8259A(0);
|
|
+ make_8259A_irq(0);
|
|
+ apic_write(APIC_LVT0, APIC_DM_EXTINT);
|
|
+
|
|
+ unlock_ExtINT_logic();
|
|
+
|
|
+ if (timer_irq_works()) {
|
|
+ apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
|
|
+ goto out;
|
|
+ }
|
|
+ apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
|
|
+ panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a "
|
|
+ "report. Then try booting with the 'noapic' option.\n");
|
|
+out:
|
|
+ local_irq_restore(flags);
|
|
+}
|
|
+#else
|
|
+#define check_timer() ((void)0)
|
|
+#endif
|
|
+
|
|
+/*
|
|
+ * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
|
|
+ * to devices. However there may be an I/O APIC pin available for
|
|
+ * this interrupt regardless. The pin may be left unconnected, but
|
|
+ * typically it will be reused as an ExtINT cascade interrupt for
|
|
+ * the master 8259A. In the MPS case such a pin will normally be
|
|
+ * reported as an ExtINT interrupt in the MP table. With ACPI
|
|
+ * there is no provision for ExtINT interrupts, and in the absence
|
|
+ * of an override it would be treated as an ordinary ISA I/O APIC
|
|
+ * interrupt, that is edge-triggered and unmasked by default. We
|
|
+ * used to do this, but it caused problems on some systems because
|
|
+ * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using
|
|
+ * the same ExtINT cascade interrupt to drive the local APIC of the
|
|
+ * bootstrap processor. Therefore we refrain from routing IRQ2 to
|
|
+ * the I/O APIC in all cases now. No actual device should request
|
|
+ * it anyway. --macro
|
|
+ */
|
|
+#define PIC_IRQS (1 << PIC_CASCADE_IR)
|
|
+
|
|
+void __init setup_IO_APIC(void)
|
|
+{
|
|
+
|
|
+#if defined(CONFIG_X86_32) || defined(CONFIG_XEN)
|
|
+ enable_IO_APIC();
|
|
+#else
|
|
+ /*
|
|
+ * calling enable_IO_APIC() is moved to setup_local_APIC for BP
|
|
+ */
|
|
+#endif
|
|
+
|
|
+ io_apic_irqs = ~PIC_IRQS;
|
|
+
|
|
+ apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
|
|
+ /*
|
|
+ * Set up IO-APIC IRQ routing.
|
|
+ */
|
|
+#ifndef CONFIG_XEN
|
|
+#ifdef CONFIG_X86_32
|
|
+ if (!acpi_ioapic)
|
|
+ setup_ioapic_ids_from_mpc();
|
|
+#endif
|
|
+ sync_Arb_IDs();
|
|
+#endif
|
|
+ setup_IO_APIC_irqs();
|
|
+ init_IO_APIC_traps();
|
|
+ check_timer();
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Called after all the initialization is done. If we didnt find any
|
|
+ * APIC bugs then we can allow the modify fast path
|
|
+ */
|
|
+
|
|
+static int __init io_apic_bug_finalize(void)
|
|
+{
|
|
+ if (sis_apic_bug == -1)
|
|
+ sis_apic_bug = 0;
|
|
+#ifdef CONFIG_X86_XEN
|
|
+ if (is_initial_xendomain()) {
|
|
+ struct xen_platform_op op = { .cmd = XENPF_platform_quirk };
|
|
+ op.u.platform_quirk.quirk_id = sis_apic_bug ?
|
|
+ QUIRK_IOAPIC_BAD_REGSEL : QUIRK_IOAPIC_GOOD_REGSEL;
|
|
+ VOID(HYPERVISOR_platform_op(&op));
|
|
+ }
|
|
+#endif
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+late_initcall(io_apic_bug_finalize);
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+struct sysfs_ioapic_data {
|
|
+ struct sys_device dev;
|
|
+ struct IO_APIC_route_entry entry[0];
|
|
+};
|
|
+static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
|
|
+
|
|
+static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
|
|
+{
|
|
+ struct IO_APIC_route_entry *entry;
|
|
+ struct sysfs_ioapic_data *data;
|
|
+ int i;
|
|
+
|
|
+ data = container_of(dev, struct sysfs_ioapic_data, dev);
|
|
+ entry = data->entry;
|
|
+ for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ )
|
|
+ *entry = ioapic_read_entry(dev->id, i);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int ioapic_resume(struct sys_device *dev)
|
|
+{
|
|
+ struct IO_APIC_route_entry *entry;
|
|
+ struct sysfs_ioapic_data *data;
|
|
+ unsigned long flags;
|
|
+ union IO_APIC_reg_00 reg_00;
|
|
+ int i;
|
|
+
|
|
+ data = container_of(dev, struct sysfs_ioapic_data, dev);
|
|
+ entry = data->entry;
|
|
+
|
|
+ spin_lock_irqsave(&ioapic_lock, flags);
|
|
+ reg_00.raw = io_apic_read(dev->id, 0);
|
|
+ if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) {
|
|
+ reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid;
|
|
+ io_apic_write(dev->id, 0, reg_00.raw);
|
|
+ }
|
|
+ spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+ for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
|
|
+ ioapic_write_entry(dev->id, i, entry[i]);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static struct sysdev_class ioapic_sysdev_class = {
|
|
+ .name = "ioapic",
|
|
+ .suspend = ioapic_suspend,
|
|
+ .resume = ioapic_resume,
|
|
+};
|
|
+
|
|
+static int __init ioapic_init_sysfs(void)
|
|
+{
|
|
+ struct sys_device * dev;
|
|
+ int i, size, error;
|
|
+
|
|
+ error = sysdev_class_register(&ioapic_sysdev_class);
|
|
+ if (error)
|
|
+ return error;
|
|
+
|
|
+ for (i = 0; i < nr_ioapics; i++ ) {
|
|
+ size = sizeof(struct sys_device) + nr_ioapic_registers[i]
|
|
+ * sizeof(struct IO_APIC_route_entry);
|
|
+ mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL);
|
|
+ if (!mp_ioapic_data[i]) {
|
|
+ printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
|
|
+ continue;
|
|
+ }
|
|
+ dev = &mp_ioapic_data[i]->dev;
|
|
+ dev->id = i;
|
|
+ dev->cls = &ioapic_sysdev_class;
|
|
+ error = sysdev_register(dev);
|
|
+ if (error) {
|
|
+ kfree(mp_ioapic_data[i]);
|
|
+ mp_ioapic_data[i] = NULL;
|
|
+ printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
|
|
+ continue;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+device_initcall(ioapic_init_sysfs);
|
|
+
|
|
+/*
|
|
+ * Dynamic irq allocate and deallocation
|
|
+ */
|
|
+unsigned int create_irq_nr(unsigned int irq_want)
|
|
+{
|
|
+ /* Allocate an unused irq */
|
|
+ unsigned int irq;
|
|
+ unsigned int new;
|
|
+ unsigned long flags;
|
|
+ struct irq_cfg *cfg_new;
|
|
+
|
|
+ irq_want = nr_irqs - 1;
|
|
+
|
|
+ irq = 0;
|
|
+ spin_lock_irqsave(&vector_lock, flags);
|
|
+ for (new = irq_want; new > 0; new--) {
|
|
+ if (platform_legacy_irq(new))
|
|
+ continue;
|
|
+ cfg_new = irq_cfg(new);
|
|
+ if (cfg_new && cfg_new->vector != 0)
|
|
+ continue;
|
|
+ /* check if need to create one */
|
|
+ if (!cfg_new)
|
|
+ cfg_new = irq_cfg_alloc(new);
|
|
+ if (__assign_irq_vector(new, TARGET_CPUS) == 0)
|
|
+ irq = new;
|
|
+ break;
|
|
+ }
|
|
+ spin_unlock_irqrestore(&vector_lock, flags);
|
|
+
|
|
+ if (irq > 0) {
|
|
+ dynamic_irq_init(irq);
|
|
+ }
|
|
+ return irq;
|
|
+}
|
|
+
|
|
+int create_irq(void)
|
|
+{
|
|
+ int irq;
|
|
+
|
|
+ irq = create_irq_nr(nr_irqs - 1);
|
|
+
|
|
+ if (irq == 0)
|
|
+ irq = -1;
|
|
+
|
|
+ return irq;
|
|
+}
|
|
+
|
|
+void destroy_irq(unsigned int irq)
|
|
+{
|
|
+ unsigned long flags;
|
|
+
|
|
+ dynamic_irq_cleanup(irq);
|
|
+
|
|
+#ifdef CONFIG_INTR_REMAP
|
|
+ free_irte(irq);
|
|
+#endif
|
|
+ spin_lock_irqsave(&vector_lock, flags);
|
|
+ __clear_irq_vector(irq);
|
|
+ spin_unlock_irqrestore(&vector_lock, flags);
|
|
+}
|
|
+#endif /* !CONFIG_XEN */
|
|
+
|
|
+/*
|
|
+ * MSI message composition
|
|
+ */
|
|
+#if defined(CONFIG_PCI_MSI) && !defined(CONFIG_XEN)
|
|
+static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
|
|
+{
|
|
+ struct irq_cfg *cfg;
|
|
+ int err;
|
|
+ unsigned dest;
|
|
+ cpumask_t tmp;
|
|
+
|
|
+ tmp = TARGET_CPUS;
|
|
+ err = assign_irq_vector(irq, tmp);
|
|
+ if (err)
|
|
+ return err;
|
|
+
|
|
+ cfg = irq_cfg(irq);
|
|
+ cpus_and(tmp, cfg->domain, tmp);
|
|
+ dest = cpu_mask_to_apicid(tmp);
|
|
+
|
|
+#ifdef CONFIG_INTR_REMAP
|
|
+ if (irq_remapped(irq)) {
|
|
+ struct irte irte;
|
|
+ int ir_index;
|
|
+ u16 sub_handle;
|
|
+
|
|
+ ir_index = map_irq_to_irte_handle(irq, &sub_handle);
|
|
+ BUG_ON(ir_index == -1);
|
|
+
|
|
+ memset (&irte, 0, sizeof(irte));
|
|
+
|
|
+ irte.present = 1;
|
|
+ irte.dst_mode = INT_DEST_MODE;
|
|
+ irte.trigger_mode = 0; /* edge */
|
|
+ irte.dlvry_mode = INT_DELIVERY_MODE;
|
|
+ irte.vector = cfg->vector;
|
|
+ irte.dest_id = IRTE_DEST(dest);
|
|
+
|
|
+ modify_irte(irq, &irte);
|
|
+
|
|
+ msg->address_hi = MSI_ADDR_BASE_HI;
|
|
+ msg->data = sub_handle;
|
|
+ msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
|
|
+ MSI_ADDR_IR_SHV |
|
|
+ MSI_ADDR_IR_INDEX1(ir_index) |
|
|
+ MSI_ADDR_IR_INDEX2(ir_index);
|
|
+ } else
|
|
+#endif
|
|
+ {
|
|
+ msg->address_hi = MSI_ADDR_BASE_HI;
|
|
+ msg->address_lo =
|
|
+ MSI_ADDR_BASE_LO |
|
|
+ ((INT_DEST_MODE == 0) ?
|
|
+ MSI_ADDR_DEST_MODE_PHYSICAL:
|
|
+ MSI_ADDR_DEST_MODE_LOGICAL) |
|
|
+ ((INT_DELIVERY_MODE != dest_LowestPrio) ?
|
|
+ MSI_ADDR_REDIRECTION_CPU:
|
|
+ MSI_ADDR_REDIRECTION_LOWPRI) |
|
|
+ MSI_ADDR_DEST_ID(dest);
|
|
+
|
|
+ msg->data =
|
|
+ MSI_DATA_TRIGGER_EDGE |
|
|
+ MSI_DATA_LEVEL_ASSERT |
|
|
+ ((INT_DELIVERY_MODE != dest_LowestPrio) ?
|
|
+ MSI_DATA_DELIVERY_FIXED:
|
|
+ MSI_DATA_DELIVERY_LOWPRI) |
|
|
+ MSI_DATA_VECTOR(cfg->vector);
|
|
+ }
|
|
+ return err;
|
|
+}
|
|
+
|
|
+#ifdef CONFIG_SMP
|
|
+static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
|
|
+{
|
|
+ struct irq_cfg *cfg;
|
|
+ struct msi_msg msg;
|
|
+ unsigned int dest;
|
|
+ cpumask_t tmp;
|
|
+ struct irq_desc *desc;
|
|
+
|
|
+ cpus_and(tmp, mask, cpu_online_map);
|
|
+ if (cpus_empty(tmp))
|
|
+ return;
|
|
+
|
|
+ if (assign_irq_vector(irq, mask))
|
|
+ return;
|
|
+
|
|
+ cfg = irq_cfg(irq);
|
|
+ cpus_and(tmp, cfg->domain, mask);
|
|
+ dest = cpu_mask_to_apicid(tmp);
|
|
+
|
|
+ read_msi_msg(irq, &msg);
|
|
+
|
|
+ msg.data &= ~MSI_DATA_VECTOR_MASK;
|
|
+ msg.data |= MSI_DATA_VECTOR(cfg->vector);
|
|
+ msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
|
|
+ msg.address_lo |= MSI_ADDR_DEST_ID(dest);
|
|
+
|
|
+ write_msi_msg(irq, &msg);
|
|
+ desc = irq_to_desc(irq);
|
|
+ desc->affinity = mask;
|
|
+}
|
|
+
|
|
+#ifdef CONFIG_INTR_REMAP
|
|
+/*
|
|
+ * Migrate the MSI irq to another cpumask. This migration is
|
|
+ * done in the process context using interrupt-remapping hardware.
|
|
+ */
|
|
+static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
|
|
+{
|
|
+ struct irq_cfg *cfg;
|
|
+ unsigned int dest;
|
|
+ cpumask_t tmp, cleanup_mask;
|
|
+ struct irte irte;
|
|
+ struct irq_desc *desc;
|
|
+
|
|
+ cpus_and(tmp, mask, cpu_online_map);
|
|
+ if (cpus_empty(tmp))
|
|
+ return;
|
|
+
|
|
+ if (get_irte(irq, &irte))
|
|
+ return;
|
|
+
|
|
+ if (assign_irq_vector(irq, mask))
|
|
+ return;
|
|
+
|
|
+ cfg = irq_cfg(irq);
|
|
+ cpus_and(tmp, cfg->domain, mask);
|
|
+ dest = cpu_mask_to_apicid(tmp);
|
|
+
|
|
+ irte.vector = cfg->vector;
|
|
+ irte.dest_id = IRTE_DEST(dest);
|
|
+
|
|
+ /*
|
|
+ * atomically update the IRTE with the new destination and vector.
|
|
+ */
|
|
+ modify_irte(irq, &irte);
|
|
+
|
|
+ /*
|
|
+ * After this point, all the interrupts will start arriving
|
|
+ * at the new destination. So, time to cleanup the previous
|
|
+ * vector allocation.
|
|
+ */
|
|
+ if (cfg->move_in_progress) {
|
|
+ cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
|
|
+ cfg->move_cleanup_count = cpus_weight(cleanup_mask);
|
|
+ send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
|
|
+ cfg->move_in_progress = 0;
|
|
+ }
|
|
+
|
|
+ desc = irq_to_desc(irq);
|
|
+ desc->affinity = mask;
|
|
+}
|
|
+#endif
|
|
+#endif /* CONFIG_SMP */
|
|
+
|
|
+/*
|
|
+ * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
|
|
+ * which implement the MSI or MSI-X Capability Structure.
|
|
+ */
|
|
+static struct irq_chip msi_chip = {
|
|
+ .name = "PCI-MSI",
|
|
+ .unmask = unmask_msi_irq,
|
|
+ .mask = mask_msi_irq,
|
|
+ .ack = ack_apic_edge,
|
|
+#ifdef CONFIG_SMP
|
|
+ .set_affinity = set_msi_irq_affinity,
|
|
+#endif
|
|
+ .retrigger = ioapic_retrigger_irq,
|
|
+};
|
|
+
|
|
+#ifdef CONFIG_INTR_REMAP
|
|
+static struct irq_chip msi_ir_chip = {
|
|
+ .name = "IR-PCI-MSI",
|
|
+ .unmask = unmask_msi_irq,
|
|
+ .mask = mask_msi_irq,
|
|
+ .ack = ack_x2apic_edge,
|
|
+#ifdef CONFIG_SMP
|
|
+ .set_affinity = ir_set_msi_irq_affinity,
|
|
+#endif
|
|
+ .retrigger = ioapic_retrigger_irq,
|
|
+};
|
|
+
|
|
+/*
|
|
+ * Map the PCI dev to the corresponding remapping hardware unit
|
|
+ * and allocate 'nvec' consecutive interrupt-remapping table entries
|
|
+ * in it.
|
|
+ */
|
|
+static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
|
|
+{
|
|
+ struct intel_iommu *iommu;
|
|
+ int index;
|
|
+
|
|
+ iommu = map_dev_to_ir(dev);
|
|
+ if (!iommu) {
|
|
+ printk(KERN_ERR
|
|
+ "Unable to map PCI %s to iommu\n", pci_name(dev));
|
|
+ return -ENOENT;
|
|
+ }
|
|
+
|
|
+ index = alloc_irte(iommu, irq, nvec);
|
|
+ if (index < 0) {
|
|
+ printk(KERN_ERR
|
|
+ "Unable to allocate %d IRTE for PCI %s\n", nvec,
|
|
+ pci_name(dev));
|
|
+ return -ENOSPC;
|
|
+ }
|
|
+ return index;
|
|
+}
|
|
+#endif
|
|
+
|
|
+static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
|
|
+{
|
|
+ int ret;
|
|
+ struct msi_msg msg;
|
|
+
|
|
+ ret = msi_compose_msg(dev, irq, &msg);
|
|
+ if (ret < 0)
|
|
+ return ret;
|
|
+
|
|
+ set_irq_msi(irq, desc);
|
|
+ write_msi_msg(irq, &msg);
|
|
+
|
|
+#ifdef CONFIG_INTR_REMAP
|
|
+ if (irq_remapped(irq)) {
|
|
+ struct irq_desc *desc = irq_to_desc(irq);
|
|
+ /*
|
|
+ * irq migration in process context
|
|
+ */
|
|
+ desc->status |= IRQ_MOVE_PCNTXT;
|
|
+ set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
|
|
+ } else
|
|
+#endif
|
|
+ set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
|
|
+
|
|
+ dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static unsigned int build_irq_for_pci_dev(struct pci_dev *dev)
|
|
+{
|
|
+ unsigned int irq;
|
|
+
|
|
+ irq = dev->bus->number;
|
|
+ irq <<= 8;
|
|
+ irq |= dev->devfn;
|
|
+ irq <<= 12;
|
|
+
|
|
+ return irq;
|
|
+}
|
|
+
|
|
+int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
|
|
+{
|
|
+ unsigned int irq;
|
|
+ int ret;
|
|
+ unsigned int irq_want;
|
|
+
|
|
+ irq_want = build_irq_for_pci_dev(dev) + 0x100;
|
|
+
|
|
+ irq = create_irq_nr(irq_want);
|
|
+ if (irq == 0)
|
|
+ return -1;
|
|
+
|
|
+#ifdef CONFIG_INTR_REMAP
|
|
+ if (!intr_remapping_enabled)
|
|
+ goto no_ir;
|
|
+
|
|
+ ret = msi_alloc_irte(dev, irq, 1);
|
|
+ if (ret < 0)
|
|
+ goto error;
|
|
+no_ir:
|
|
+#endif
|
|
+ ret = setup_msi_irq(dev, desc, irq);
|
|
+ if (ret < 0) {
|
|
+ destroy_irq(irq);
|
|
+ return ret;
|
|
+ }
|
|
+ return 0;
|
|
+
|
|
+#ifdef CONFIG_INTR_REMAP
|
|
+error:
|
|
+ destroy_irq(irq);
|
|
+ return ret;
|
|
+#endif
|
|
+}
|
|
+
|
|
+int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
|
|
+{
|
|
+ unsigned int irq;
|
|
+ int ret, sub_handle;
|
|
+ struct msi_desc *desc;
|
|
+ unsigned int irq_want;
|
|
+
|
|
+#ifdef CONFIG_INTR_REMAP
|
|
+ struct intel_iommu *iommu = 0;
|
|
+ int index = 0;
|
|
+#endif
|
|
+
|
|
+ irq_want = build_irq_for_pci_dev(dev) + 0x100;
|
|
+ sub_handle = 0;
|
|
+ list_for_each_entry(desc, &dev->msi_list, list) {
|
|
+ irq = create_irq_nr(irq_want--);
|
|
+ if (irq == 0)
|
|
+ return -1;
|
|
+#ifdef CONFIG_INTR_REMAP
|
|
+ if (!intr_remapping_enabled)
|
|
+ goto no_ir;
|
|
+
|
|
+ if (!sub_handle) {
|
|
+ /*
|
|
+ * allocate the consecutive block of IRTE's
|
|
+ * for 'nvec'
|
|
+ */
|
|
+ index = msi_alloc_irte(dev, irq, nvec);
|
|
+ if (index < 0) {
|
|
+ ret = index;
|
|
+ goto error;
|
|
+ }
|
|
+ } else {
|
|
+ iommu = map_dev_to_ir(dev);
|
|
+ if (!iommu) {
|
|
+ ret = -ENOENT;
|
|
+ goto error;
|
|
+ }
|
|
+ /*
|
|
+ * setup the mapping between the irq and the IRTE
|
|
+ * base index, the sub_handle pointing to the
|
|
+ * appropriate interrupt remap table entry.
|
|
+ */
|
|
+ set_irte_irq(irq, iommu, index, sub_handle);
|
|
+ }
|
|
+no_ir:
|
|
+#endif
|
|
+ ret = setup_msi_irq(dev, desc, irq);
|
|
+ if (ret < 0)
|
|
+ goto error;
|
|
+ sub_handle++;
|
|
+ }
|
|
+ return 0;
|
|
+
|
|
+error:
|
|
+ destroy_irq(irq);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+void arch_teardown_msi_irq(unsigned int irq)
|
|
+{
|
|
+ destroy_irq(irq);
|
|
+}
|
|
+
|
|
+#ifdef CONFIG_DMAR
|
|
+#ifdef CONFIG_SMP
|
|
+static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
|
|
+{
|
|
+ struct irq_cfg *cfg;
|
|
+ struct msi_msg msg;
|
|
+ unsigned int dest;
|
|
+ cpumask_t tmp;
|
|
+ struct irq_desc *desc;
|
|
+
|
|
+ cpus_and(tmp, mask, cpu_online_map);
|
|
+ if (cpus_empty(tmp))
|
|
+ return;
|
|
+
|
|
+ if (assign_irq_vector(irq, mask))
|
|
+ return;
|
|
+
|
|
+ cfg = irq_cfg(irq);
|
|
+ cpus_and(tmp, cfg->domain, mask);
|
|
+ dest = cpu_mask_to_apicid(tmp);
|
|
+
|
|
+ dmar_msi_read(irq, &msg);
|
|
+
|
|
+ msg.data &= ~MSI_DATA_VECTOR_MASK;
|
|
+ msg.data |= MSI_DATA_VECTOR(cfg->vector);
|
|
+ msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
|
|
+ msg.address_lo |= MSI_ADDR_DEST_ID(dest);
|
|
+
|
|
+ dmar_msi_write(irq, &msg);
|
|
+ desc = irq_to_desc(irq);
|
|
+ desc->affinity = mask;
|
|
+}
|
|
+#endif /* CONFIG_SMP */
|
|
+
|
|
+struct irq_chip dmar_msi_type = {
|
|
+ .name = "DMAR_MSI",
|
|
+ .unmask = dmar_msi_unmask,
|
|
+ .mask = dmar_msi_mask,
|
|
+ .ack = ack_apic_edge,
|
|
+#ifdef CONFIG_SMP
|
|
+ .set_affinity = dmar_msi_set_affinity,
|
|
+#endif
|
|
+ .retrigger = ioapic_retrigger_irq,
|
|
+};
|
|
+
|
|
+int arch_setup_dmar_msi(unsigned int irq)
|
|
+{
|
|
+ int ret;
|
|
+ struct msi_msg msg;
|
|
+
|
|
+ ret = msi_compose_msg(NULL, irq, &msg);
|
|
+ if (ret < 0)
|
|
+ return ret;
|
|
+ dmar_msi_write(irq, &msg);
|
|
+ set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
|
|
+ "edge");
|
|
+ return 0;
|
|
+}
|
|
+#endif
|
|
+
|
|
+#ifdef CONFIG_HPET_TIMER
|
|
+
|
|
+#ifdef CONFIG_SMP
|
|
+static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask)
|
|
+{
|
|
+ struct irq_cfg *cfg;
|
|
+ struct irq_desc *desc;
|
|
+ struct msi_msg msg;
|
|
+ unsigned int dest;
|
|
+ cpumask_t tmp;
|
|
+
|
|
+ cpus_and(tmp, mask, cpu_online_map);
|
|
+ if (cpus_empty(tmp))
|
|
+ return;
|
|
+
|
|
+ if (assign_irq_vector(irq, mask))
|
|
+ return;
|
|
+
|
|
+ cfg = irq_cfg(irq);
|
|
+ cpus_and(tmp, cfg->domain, mask);
|
|
+ dest = cpu_mask_to_apicid(tmp);
|
|
+
|
|
+ hpet_msi_read(irq, &msg);
|
|
+
|
|
+ msg.data &= ~MSI_DATA_VECTOR_MASK;
|
|
+ msg.data |= MSI_DATA_VECTOR(cfg->vector);
|
|
+ msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
|
|
+ msg.address_lo |= MSI_ADDR_DEST_ID(dest);
|
|
+
|
|
+ hpet_msi_write(irq, &msg);
|
|
+ desc = irq_to_desc(irq);
|
|
+ desc->affinity = mask;
|
|
+}
|
|
+#endif /* CONFIG_SMP */
|
|
+
|
|
+struct irq_chip hpet_msi_type = {
|
|
+ .name = "HPET_MSI",
|
|
+ .unmask = hpet_msi_unmask,
|
|
+ .mask = hpet_msi_mask,
|
|
+ .ack = ack_apic_edge,
|
|
+#ifdef CONFIG_SMP
|
|
+ .set_affinity = hpet_msi_set_affinity,
|
|
+#endif
|
|
+ .retrigger = ioapic_retrigger_irq,
|
|
+};
|
|
+
|
|
+int arch_setup_hpet_msi(unsigned int irq)
|
|
+{
|
|
+ int ret;
|
|
+ struct msi_msg msg;
|
|
+
|
|
+ ret = msi_compose_msg(NULL, irq, &msg);
|
|
+ if (ret < 0)
|
|
+ return ret;
|
|
+
|
|
+ hpet_msi_write(irq, &msg);
|
|
+ set_irq_chip_and_handler_name(irq, &hpet_msi_type, handle_edge_irq,
|
|
+ "edge");
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+#endif
|
|
+
|
|
+#endif /* CONFIG_PCI_MSI */
|
|
+/*
|
|
+ * Hypertransport interrupt support
|
|
+ */
|
|
+#ifdef CONFIG_HT_IRQ
|
|
+
|
|
+#ifdef CONFIG_SMP
|
|
+
|
|
+static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
|
|
+{
|
|
+ struct ht_irq_msg msg;
|
|
+ fetch_ht_irq_msg(irq, &msg);
|
|
+
|
|
+ msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
|
|
+ msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
|
|
+
|
|
+ msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
|
|
+ msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
|
|
+
|
|
+ write_ht_irq_msg(irq, &msg);
|
|
+}
|
|
+
|
|
+static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
|
|
+{
|
|
+ struct irq_cfg *cfg;
|
|
+ unsigned int dest;
|
|
+ cpumask_t tmp;
|
|
+ struct irq_desc *desc;
|
|
+
|
|
+ cpus_and(tmp, mask, cpu_online_map);
|
|
+ if (cpus_empty(tmp))
|
|
+ return;
|
|
+
|
|
+ if (assign_irq_vector(irq, mask))
|
|
+ return;
|
|
+
|
|
+ cfg = irq_cfg(irq);
|
|
+ cpus_and(tmp, cfg->domain, mask);
|
|
+ dest = cpu_mask_to_apicid(tmp);
|
|
+
|
|
+ target_ht_irq(irq, dest, cfg->vector);
|
|
+ desc = irq_to_desc(irq);
|
|
+ desc->affinity = mask;
|
|
+}
|
|
+#endif
|
|
+
|
|
+static struct irq_chip ht_irq_chip = {
|
|
+ .name = "PCI-HT",
|
|
+ .mask = mask_ht_irq,
|
|
+ .unmask = unmask_ht_irq,
|
|
+ .ack = ack_apic_edge,
|
|
+#ifdef CONFIG_SMP
|
|
+ .set_affinity = set_ht_irq_affinity,
|
|
+#endif
|
|
+ .retrigger = ioapic_retrigger_irq,
|
|
+};
|
|
+
|
|
+int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
|
|
+{
|
|
+ struct irq_cfg *cfg;
|
|
+ int err;
|
|
+ cpumask_t tmp;
|
|
+
|
|
+ tmp = TARGET_CPUS;
|
|
+ err = assign_irq_vector(irq, tmp);
|
|
+ if (!err) {
|
|
+ struct ht_irq_msg msg;
|
|
+ unsigned dest;
|
|
+
|
|
+ cfg = irq_cfg(irq);
|
|
+ cpus_and(tmp, cfg->domain, tmp);
|
|
+ dest = cpu_mask_to_apicid(tmp);
|
|
+
|
|
+ msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
|
|
+
|
|
+ msg.address_lo =
|
|
+ HT_IRQ_LOW_BASE |
|
|
+ HT_IRQ_LOW_DEST_ID(dest) |
|
|
+ HT_IRQ_LOW_VECTOR(cfg->vector) |
|
|
+ ((INT_DEST_MODE == 0) ?
|
|
+ HT_IRQ_LOW_DM_PHYSICAL :
|
|
+ HT_IRQ_LOW_DM_LOGICAL) |
|
|
+ HT_IRQ_LOW_RQEOI_EDGE |
|
|
+ ((INT_DELIVERY_MODE != dest_LowestPrio) ?
|
|
+ HT_IRQ_LOW_MT_FIXED :
|
|
+ HT_IRQ_LOW_MT_ARBITRATED) |
|
|
+ HT_IRQ_LOW_IRQ_MASKED;
|
|
+
|
|
+ write_ht_irq_msg(irq, &msg);
|
|
+
|
|
+ set_irq_chip_and_handler_name(irq, &ht_irq_chip,
|
|
+ handle_edge_irq, "edge");
|
|
+
|
|
+ dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
|
|
+ }
|
|
+ return err;
|
|
+}
|
|
+#endif /* CONFIG_HT_IRQ */
|
|
+
|
|
+#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
|
|
+/*
|
|
+ * Re-target the irq to the specified CPU and enable the specified MMR located
|
|
+ * on the specified blade to allow the sending of MSIs to the specified CPU.
|
|
+ */
|
|
+int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
|
|
+ unsigned long mmr_offset)
|
|
+{
|
|
+ const cpumask_t *eligible_cpu = get_cpu_mask(cpu);
|
|
+ struct irq_cfg *cfg;
|
|
+ int mmr_pnode;
|
|
+ unsigned long mmr_value;
|
|
+ struct uv_IO_APIC_route_entry *entry;
|
|
+ unsigned long flags;
|
|
+ int err;
|
|
+
|
|
+ err = assign_irq_vector(irq, *eligible_cpu);
|
|
+ if (err != 0)
|
|
+ return err;
|
|
+
|
|
+ spin_lock_irqsave(&vector_lock, flags);
|
|
+ set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
|
|
+ irq_name);
|
|
+ spin_unlock_irqrestore(&vector_lock, flags);
|
|
+
|
|
+ cfg = irq_cfg(irq);
|
|
+
|
|
+ mmr_value = 0;
|
|
+ entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
|
|
+ BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
|
|
+
|
|
+ entry->vector = cfg->vector;
|
|
+ entry->delivery_mode = INT_DELIVERY_MODE;
|
|
+ entry->dest_mode = INT_DEST_MODE;
|
|
+ entry->polarity = 0;
|
|
+ entry->trigger = 0;
|
|
+ entry->mask = 0;
|
|
+ entry->dest = cpu_mask_to_apicid(*eligible_cpu);
|
|
+
|
|
+ mmr_pnode = uv_blade_to_pnode(mmr_blade);
|
|
+ uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
|
|
+
|
|
+ return irq;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Disable the specified MMR located on the specified blade so that MSIs are
|
|
+ * longer allowed to be sent.
|
|
+ */
|
|
+void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset)
|
|
+{
|
|
+ unsigned long mmr_value;
|
|
+ struct uv_IO_APIC_route_entry *entry;
|
|
+ int mmr_pnode;
|
|
+
|
|
+ mmr_value = 0;
|
|
+ entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
|
|
+ BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
|
|
+
|
|
+ entry->mask = 1;
|
|
+
|
|
+ mmr_pnode = uv_blade_to_pnode(mmr_blade);
|
|
+ uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
|
|
+}
|
|
+#endif /* CONFIG_X86_64 */
|
|
+
|
|
+int __init io_apic_get_redir_entries (int ioapic)
|
|
+{
|
|
+ union IO_APIC_reg_01 reg_01;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&ioapic_lock, flags);
|
|
+ reg_01.raw = io_apic_read(ioapic, 1);
|
|
+ spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+
|
|
+ return reg_01.bits.entries;
|
|
+}
|
|
+
|
|
+int __init probe_nr_irqs(void)
|
|
+{
|
|
+ return NR_IRQS;
|
|
+}
|
|
+
|
|
+/* --------------------------------------------------------------------------
|
|
+ ACPI-based IOAPIC Configuration
|
|
+ -------------------------------------------------------------------------- */
|
|
+
|
|
+#ifdef CONFIG_ACPI
|
|
+
|
|
+#ifdef CONFIG_X86_32
|
|
+#ifndef CONFIG_XEN
|
|
+int __init io_apic_get_unique_id(int ioapic, int apic_id)
|
|
+{
|
|
+ union IO_APIC_reg_00 reg_00;
|
|
+ static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
|
|
+ physid_mask_t tmp;
|
|
+ unsigned long flags;
|
|
+ int i = 0;
|
|
+
|
|
+ /*
|
|
+ * The P4 platform supports up to 256 APIC IDs on two separate APIC
|
|
+ * buses (one for LAPICs, one for IOAPICs), where predecessors only
|
|
+ * supports up to 16 on one shared APIC bus.
|
|
+ *
|
|
+ * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
|
|
+ * advantage of new APIC bus architecture.
|
|
+ */
|
|
+
|
|
+ if (physids_empty(apic_id_map))
|
|
+ apic_id_map = ioapic_phys_id_map(phys_cpu_present_map);
|
|
+
|
|
+ spin_lock_irqsave(&ioapic_lock, flags);
|
|
+ reg_00.raw = io_apic_read(ioapic, 0);
|
|
+ spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+
|
|
+ if (apic_id >= get_physical_broadcast()) {
|
|
+ printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
|
|
+ "%d\n", ioapic, apic_id, reg_00.bits.ID);
|
|
+ apic_id = reg_00.bits.ID;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * Every APIC in a system must have a unique ID or we get lots of nice
|
|
+ * 'stuck on smp_invalidate_needed IPI wait' messages.
|
|
+ */
|
|
+ if (check_apicid_used(apic_id_map, apic_id)) {
|
|
+
|
|
+ for (i = 0; i < get_physical_broadcast(); i++) {
|
|
+ if (!check_apicid_used(apic_id_map, i))
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ if (i == get_physical_broadcast())
|
|
+ panic("Max apic_id exceeded!\n");
|
|
+
|
|
+ printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
|
|
+ "trying %d\n", ioapic, apic_id, i);
|
|
+
|
|
+ apic_id = i;
|
|
+ }
|
|
+
|
|
+ tmp = apicid_to_cpu_present(apic_id);
|
|
+ physids_or(apic_id_map, apic_id_map, tmp);
|
|
+
|
|
+ if (reg_00.bits.ID != apic_id) {
|
|
+ reg_00.bits.ID = apic_id;
|
|
+
|
|
+ spin_lock_irqsave(&ioapic_lock, flags);
|
|
+ io_apic_write(ioapic, 0, reg_00.raw);
|
|
+ reg_00.raw = io_apic_read(ioapic, 0);
|
|
+ spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+
|
|
+ /* Sanity check */
|
|
+ if (reg_00.bits.ID != apic_id) {
|
|
+ printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic);
|
|
+ return -1;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ apic_printk(APIC_VERBOSE, KERN_INFO
|
|
+ "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
|
|
+
|
|
+ return apic_id;
|
|
+}
|
|
+#endif /* !CONFIG_XEN */
|
|
+
|
|
+int __init io_apic_get_version(int ioapic)
|
|
+{
|
|
+ union IO_APIC_reg_01 reg_01;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&ioapic_lock, flags);
|
|
+ reg_01.raw = io_apic_read(ioapic, 1);
|
|
+ spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+
|
|
+ return reg_01.bits.version;
|
|
+}
|
|
+#endif
|
|
+
|
|
+int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
|
|
+{
|
|
+#ifdef CONFIG_XEN
|
|
+ if (irq < PIRQ_BASE || irq >= PIRQ_BASE + NR_PIRQS) {
|
|
+ apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ %d\n",
|
|
+ ioapic, irq);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ if (!IO_APIC_IRQ(irq)) {
|
|
+ apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
|
|
+ ioapic);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * IRQs < 16 are already in the irq_2_pin[] map
|
|
+ */
|
|
+ if (irq >= 16)
|
|
+ add_pin_to_irq(irq, ioapic, pin);
|
|
+
|
|
+ setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+
|
|
+int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
|
|
+{
|
|
+ int i;
|
|
+
|
|
+ if (skip_ioapic_setup)
|
|
+ return -1;
|
|
+
|
|
+ for (i = 0; i < mp_irq_entries; i++)
|
|
+ if (mp_irqs[i].mp_irqtype == mp_INT &&
|
|
+ mp_irqs[i].mp_srcbusirq == bus_irq)
|
|
+ break;
|
|
+ if (i >= mp_irq_entries)
|
|
+ return -1;
|
|
+
|
|
+ *trigger = irq_trigger(i);
|
|
+ *polarity = irq_polarity(i);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+#endif /* CONFIG_ACPI */
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+/*
|
|
+ * This function currently is only a helper for the i386 smp boot process where
|
|
+ * we need to reprogram the ioredtbls to cater for the cpus which have come online
|
|
+ * so mask in all cases should simply be TARGET_CPUS
|
|
+ */
|
|
+#ifdef CONFIG_SMP
|
|
+void __init setup_ioapic_dest(void)
|
|
+{
|
|
+ int pin, ioapic, irq, irq_entry;
|
|
+ struct irq_desc *desc;
|
|
+ struct irq_cfg *cfg;
|
|
+ cpumask_t mask;
|
|
+
|
|
+ if (skip_ioapic_setup == 1)
|
|
+ return;
|
|
+
|
|
+ for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
|
|
+ for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
|
|
+ irq_entry = find_irq_entry(ioapic, pin, mp_INT);
|
|
+ if (irq_entry == -1)
|
|
+ continue;
|
|
+ irq = pin_2_irq(irq_entry, ioapic, pin);
|
|
+
|
|
+ /* setup_IO_APIC_irqs could fail to get vector for some device
|
|
+ * when you have too many devices, because at that time only boot
|
|
+ * cpu is online.
|
|
+ */
|
|
+ cfg = irq_cfg(irq);
|
|
+ if (!cfg->vector) {
|
|
+ setup_IO_APIC_irq(ioapic, pin, irq,
|
|
+ irq_trigger(irq_entry),
|
|
+ irq_polarity(irq_entry));
|
|
+ continue;
|
|
+
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * Honour affinities which have been set in early boot
|
|
+ */
|
|
+ desc = irq_to_desc(irq);
|
|
+ if (desc->status &
|
|
+ (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
|
|
+ mask = desc->affinity;
|
|
+ else
|
|
+ mask = TARGET_CPUS;
|
|
+
|
|
+#ifdef CONFIG_INTR_REMAP
|
|
+ if (intr_remapping_enabled)
|
|
+ set_ir_ioapic_affinity_irq(irq, mask);
|
|
+ else
|
|
+#endif
|
|
+ set_ioapic_affinity_irq(irq, mask);
|
|
+ }
|
|
+
|
|
+ }
|
|
+}
|
|
+#endif
|
|
+
|
|
+#define IOAPIC_RESOURCE_NAME_SIZE 11
|
|
+
|
|
+static struct resource *ioapic_resources;
|
|
+
|
|
+static struct resource * __init ioapic_setup_resources(void)
|
|
+{
|
|
+ unsigned long n;
|
|
+ struct resource *res;
|
|
+ char *mem;
|
|
+ int i;
|
|
+
|
|
+ if (nr_ioapics <= 0)
|
|
+ return NULL;
|
|
+
|
|
+ n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);
|
|
+ n *= nr_ioapics;
|
|
+
|
|
+ mem = alloc_bootmem(n);
|
|
+ res = (void *)mem;
|
|
+
|
|
+ if (mem != NULL) {
|
|
+ mem += sizeof(struct resource) * nr_ioapics;
|
|
+
|
|
+ for (i = 0; i < nr_ioapics; i++) {
|
|
+ res[i].name = mem;
|
|
+ res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
|
|
+ sprintf(mem, "IOAPIC %u", i);
|
|
+ mem += IOAPIC_RESOURCE_NAME_SIZE;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ ioapic_resources = res;
|
|
+
|
|
+ return res;
|
|
+}
|
|
+
|
|
+void __init ioapic_init_mappings(void)
|
|
+{
|
|
+ unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
|
|
+ struct resource *ioapic_res;
|
|
+ int i;
|
|
+
|
|
+ irq_2_pin_init();
|
|
+ ioapic_res = ioapic_setup_resources();
|
|
+ for (i = 0; i < nr_ioapics; i++) {
|
|
+ if (smp_found_config) {
|
|
+ ioapic_phys = mp_ioapics[i].mp_apicaddr;
|
|
+#ifdef CONFIG_X86_32
|
|
+ if (!ioapic_phys) {
|
|
+ printk(KERN_ERR
|
|
+ "WARNING: bogus zero IO-APIC "
|
|
+ "address found in MPTABLE, "
|
|
+ "disabling IO/APIC support!\n");
|
|
+ smp_found_config = 0;
|
|
+ skip_ioapic_setup = 1;
|
|
+ goto fake_ioapic_page;
|
|
+ }
|
|
+#endif
|
|
+ } else {
|
|
+#ifdef CONFIG_X86_32
|
|
+fake_ioapic_page:
|
|
+#endif
|
|
+ ioapic_phys = (unsigned long)
|
|
+ alloc_bootmem_pages(PAGE_SIZE);
|
|
+ ioapic_phys = __pa(ioapic_phys);
|
|
+ }
|
|
+ set_fixmap_nocache(idx, ioapic_phys);
|
|
+ apic_printk(APIC_VERBOSE,
|
|
+ "mapped IOAPIC to %08lx (%08lx)\n",
|
|
+ __fix_to_virt(idx), ioapic_phys);
|
|
+ idx++;
|
|
+
|
|
+ if (ioapic_res != NULL) {
|
|
+ ioapic_res->start = ioapic_phys;
|
|
+ ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
|
|
+ ioapic_res++;
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+static int __init ioapic_insert_resources(void)
|
|
+{
|
|
+ int i;
|
|
+ struct resource *r = ioapic_resources;
|
|
+
|
|
+ if (!r) {
|
|
+ printk(KERN_ERR
|
|
+ "IO APIC resources could be not be allocated.\n");
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ for (i = 0; i < nr_ioapics; i++) {
|
|
+ insert_resource(&iomem_resource, r);
|
|
+ r++;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/* Insert the IO APIC resources after PCI initialization has occured to handle
|
|
+ * IO APICS that are mapped in on a BAR in PCI space. */
|
|
+late_initcall(ioapic_insert_resources);
|
|
+#endif /* !CONFIG_XEN */
|
|
--- head-2011-03-17.orig/arch/x86/kernel/io_apic_32-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
+++ /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
@@ -1,2985 +0,0 @@
|
|
-/*
|
|
- * Intel IO-APIC support for multi-Pentium hosts.
|
|
- *
|
|
- * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo
|
|
- *
|
|
- * Many thanks to Stig Venaas for trying out countless experimental
|
|
- * patches and reporting/debugging problems patiently!
|
|
- *
|
|
- * (c) 1999, Multiple IO-APIC support, developed by
|
|
- * Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and
|
|
- * Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>,
|
|
- * further tested and cleaned up by Zach Brown <zab@redhat.com>
|
|
- * and Ingo Molnar <mingo@redhat.com>
|
|
- *
|
|
- * Fixes
|
|
- * Maciej W. Rozycki : Bits for genuine 82489DX APICs;
|
|
- * thanks to Eric Gilmore
|
|
- * and Rolf G. Tews
|
|
- * for testing these extensively
|
|
- * Paul Diefenbaugh : Added full ACPI support
|
|
- */
|
|
-
|
|
-#include <linux/mm.h>
|
|
-#include <linux/interrupt.h>
|
|
-#include <linux/init.h>
|
|
-#include <linux/delay.h>
|
|
-#include <linux/sched.h>
|
|
-#include <linux/bootmem.h>
|
|
-#include <linux/mc146818rtc.h>
|
|
-#include <linux/compiler.h>
|
|
-#include <linux/acpi.h>
|
|
-#include <linux/module.h>
|
|
-#include <linux/sysdev.h>
|
|
-#include <linux/pci.h>
|
|
-#include <linux/msi.h>
|
|
-#include <linux/htirq.h>
|
|
-#include <linux/freezer.h>
|
|
-#include <linux/kthread.h>
|
|
-#include <linux/jiffies.h> /* time_after() */
|
|
-
|
|
-#include <asm/io.h>
|
|
-#include <asm/smp.h>
|
|
-#include <asm/desc.h>
|
|
-#include <asm/timer.h>
|
|
-#include <asm/i8259.h>
|
|
-#include <asm/nmi.h>
|
|
-#include <asm/msidef.h>
|
|
-#include <asm/hypertransport.h>
|
|
-
|
|
-#include <mach_apic.h>
|
|
-#include <mach_apicdef.h>
|
|
-
|
|
-#ifdef CONFIG_XEN
|
|
-#include <xen/interface/xen.h>
|
|
-#include <xen/interface/physdev.h>
|
|
-#include <xen/evtchn.h>
|
|
-
|
|
-/* Fake i8259 */
|
|
-#define make_8259A_irq(_irq) (io_apic_irqs &= ~(1UL<<(_irq)))
|
|
-#define disable_8259A_irq(_irq) ((void)0)
|
|
-#define i8259A_irq_pending(_irq) (0)
|
|
-
|
|
-unsigned long io_apic_irqs;
|
|
-
|
|
-#define clear_IO_APIC() ((void)0)
|
|
-#endif /* CONFIG_XEN */
|
|
-
|
|
-int (*ioapic_renumber_irq)(int ioapic, int irq);
|
|
-atomic_t irq_mis_count;
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
-/* Where if anywhere is the i8259 connect in external int mode */
|
|
-static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
|
|
-#endif
|
|
-
|
|
-static DEFINE_SPINLOCK(ioapic_lock);
|
|
-static DEFINE_SPINLOCK(vector_lock);
|
|
-
|
|
-int timer_through_8259 __initdata;
|
|
-
|
|
-/*
|
|
- * Is the SiS APIC rmw bug present ?
|
|
- * -1 = don't know, 0 = no, 1 = yes
|
|
- */
|
|
-int sis_apic_bug = -1;
|
|
-
|
|
-/*
|
|
- * # of IRQ routing registers
|
|
- */
|
|
-int nr_ioapic_registers[MAX_IO_APICS];
|
|
-
|
|
-/* I/O APIC entries */
|
|
-struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
|
|
-int nr_ioapics;
|
|
-
|
|
-/* MP IRQ source entries */
|
|
-struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
|
|
-
|
|
-/* # of MP IRQ source entries */
|
|
-int mp_irq_entries;
|
|
-
|
|
-#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
|
|
-int mp_bus_id_to_type[MAX_MP_BUSSES];
|
|
-#endif
|
|
-
|
|
-DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
|
|
-
|
|
-static int disable_timer_pin_1 __initdata;
|
|
-
|
|
-/*
|
|
- * Rough estimation of how many shared IRQs there are, can
|
|
- * be changed anytime.
|
|
- */
|
|
-#define MAX_PLUS_SHARED_IRQS NR_IRQS
|
|
-#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
|
|
-
|
|
-/*
|
|
- * This is performance-critical, we want to do it O(1)
|
|
- *
|
|
- * the indexing order of this array favors 1:1 mappings
|
|
- * between pins and IRQs.
|
|
- */
|
|
-
|
|
-static struct irq_pin_list {
|
|
- int apic, pin, next;
|
|
-} irq_2_pin[PIN_MAP_SIZE];
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
-struct io_apic {
|
|
- unsigned int index;
|
|
- unsigned int unused[3];
|
|
- unsigned int data;
|
|
-};
|
|
-
|
|
-static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
|
|
-{
|
|
- return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
|
|
- + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK);
|
|
-}
|
|
-#endif
|
|
-
|
|
-static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
|
|
-{
|
|
-#ifndef CONFIG_XEN
|
|
- struct io_apic __iomem *io_apic = io_apic_base(apic);
|
|
- writel(reg, &io_apic->index);
|
|
- return readl(&io_apic->data);
|
|
-#else
|
|
- struct physdev_apic apic_op;
|
|
- int ret;
|
|
-
|
|
- apic_op.apic_physbase = mp_ioapics[apic].mp_apicaddr;
|
|
- apic_op.reg = reg;
|
|
- ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op);
|
|
- if (ret)
|
|
- return ret;
|
|
- return apic_op.value;
|
|
-#endif
|
|
-}
|
|
-
|
|
-static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
|
|
-{
|
|
-#ifndef CONFIG_XEN
|
|
- struct io_apic __iomem *io_apic = io_apic_base(apic);
|
|
- writel(reg, &io_apic->index);
|
|
- writel(value, &io_apic->data);
|
|
-#else
|
|
- struct physdev_apic apic_op;
|
|
-
|
|
- apic_op.apic_physbase = mp_ioapics[apic].mp_apicaddr;
|
|
- apic_op.reg = reg;
|
|
- apic_op.value = value;
|
|
- WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op));
|
|
-#endif
|
|
-}
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
-/*
|
|
- * Re-write a value: to be used for read-modify-write
|
|
- * cycles where the read already set up the index register.
|
|
- *
|
|
- * Older SiS APIC requires we rewrite the index register
|
|
- */
|
|
-static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
|
|
-{
|
|
- volatile struct io_apic __iomem *io_apic = io_apic_base(apic);
|
|
- if (sis_apic_bug)
|
|
- writel(reg, &io_apic->index);
|
|
- writel(value, &io_apic->data);
|
|
-}
|
|
-#else
|
|
-#define io_apic_modify io_apic_write
|
|
-#endif
|
|
-
|
|
-union entry_union {
|
|
- struct { u32 w1, w2; };
|
|
- struct IO_APIC_route_entry entry;
|
|
-};
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
-static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
|
|
-{
|
|
- union entry_union eu;
|
|
- unsigned long flags;
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
|
|
- eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
- return eu.entry;
|
|
-}
|
|
-#endif
|
|
-
|
|
-/*
|
|
- * When we write a new IO APIC routing entry, we need to write the high
|
|
- * word first! If the mask bit in the low word is clear, we will enable
|
|
- * the interrupt, and we need to make sure the entry is fully populated
|
|
- * before that happens.
|
|
- */
|
|
-static void
|
|
-__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
|
|
-{
|
|
- union entry_union eu;
|
|
- eu.entry = e;
|
|
- io_apic_write(apic, 0x11 + 2*pin, eu.w2);
|
|
- io_apic_write(apic, 0x10 + 2*pin, eu.w1);
|
|
-}
|
|
-
|
|
-static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
|
|
-{
|
|
- unsigned long flags;
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- __ioapic_write_entry(apic, pin, e);
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
-}
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
-/*
|
|
- * When we mask an IO APIC routing entry, we need to write the low
|
|
- * word first, in order to set the mask bit before we change the
|
|
- * high bits!
|
|
- */
|
|
-static void ioapic_mask_entry(int apic, int pin)
|
|
-{
|
|
- unsigned long flags;
|
|
- union entry_union eu = { .entry.mask = 1 };
|
|
-
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- io_apic_write(apic, 0x10 + 2*pin, eu.w1);
|
|
- io_apic_write(apic, 0x11 + 2*pin, eu.w2);
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
-}
|
|
-#endif
|
|
-
|
|
-/*
|
|
- * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
|
|
- * shared ISA-space IRQs, so we have to support them. We are super
|
|
- * fast in the common case, and fast for shared ISA-space IRQs.
|
|
- */
|
|
-static void add_pin_to_irq(unsigned int irq, int apic, int pin)
|
|
-{
|
|
- static int first_free_entry = NR_IRQS;
|
|
- struct irq_pin_list *entry = irq_2_pin + irq;
|
|
-
|
|
- while (entry->next)
|
|
- entry = irq_2_pin + entry->next;
|
|
-
|
|
- if (entry->pin != -1) {
|
|
- entry->next = first_free_entry;
|
|
- entry = irq_2_pin + entry->next;
|
|
- if (++first_free_entry >= PIN_MAP_SIZE)
|
|
- panic("io_apic.c: whoops");
|
|
- }
|
|
- entry->apic = apic;
|
|
- entry->pin = pin;
|
|
-}
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
-/*
|
|
- * Reroute an IRQ to a different pin.
|
|
- */
|
|
-static void __init replace_pin_at_irq(unsigned int irq,
|
|
- int oldapic, int oldpin,
|
|
- int newapic, int newpin)
|
|
-{
|
|
- struct irq_pin_list *entry = irq_2_pin + irq;
|
|
-
|
|
- while (1) {
|
|
- if (entry->apic == oldapic && entry->pin == oldpin) {
|
|
- entry->apic = newapic;
|
|
- entry->pin = newpin;
|
|
- }
|
|
- if (!entry->next)
|
|
- break;
|
|
- entry = irq_2_pin + entry->next;
|
|
- }
|
|
-}
|
|
-
|
|
-static void __modify_IO_APIC_irq(unsigned int irq, unsigned long enable, unsigned long disable)
|
|
-{
|
|
- struct irq_pin_list *entry = irq_2_pin + irq;
|
|
- unsigned int pin, reg;
|
|
-
|
|
- for (;;) {
|
|
- pin = entry->pin;
|
|
- if (pin == -1)
|
|
- break;
|
|
- reg = io_apic_read(entry->apic, 0x10 + pin*2);
|
|
- reg &= ~disable;
|
|
- reg |= enable;
|
|
- io_apic_modify(entry->apic, 0x10 + pin*2, reg);
|
|
- if (!entry->next)
|
|
- break;
|
|
- entry = irq_2_pin + entry->next;
|
|
- }
|
|
-}
|
|
-
|
|
-/* mask = 1 */
|
|
-static void __mask_IO_APIC_irq(unsigned int irq)
|
|
-{
|
|
- __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, 0);
|
|
-}
|
|
-
|
|
-/* mask = 0 */
|
|
-static void __unmask_IO_APIC_irq(unsigned int irq)
|
|
-{
|
|
- __modify_IO_APIC_irq(irq, 0, IO_APIC_REDIR_MASKED);
|
|
-}
|
|
-
|
|
-/* mask = 1, trigger = 0 */
|
|
-static void __mask_and_edge_IO_APIC_irq(unsigned int irq)
|
|
-{
|
|
- __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED,
|
|
- IO_APIC_REDIR_LEVEL_TRIGGER);
|
|
-}
|
|
-
|
|
-/* mask = 0, trigger = 1 */
|
|
-static void __unmask_and_level_IO_APIC_irq(unsigned int irq)
|
|
-{
|
|
- __modify_IO_APIC_irq(irq, IO_APIC_REDIR_LEVEL_TRIGGER,
|
|
- IO_APIC_REDIR_MASKED);
|
|
-}
|
|
-
|
|
-static void mask_IO_APIC_irq(unsigned int irq)
|
|
-{
|
|
- unsigned long flags;
|
|
-
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- __mask_IO_APIC_irq(irq);
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
-}
|
|
-
|
|
-static void unmask_IO_APIC_irq(unsigned int irq)
|
|
-{
|
|
- unsigned long flags;
|
|
-
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- __unmask_IO_APIC_irq(irq);
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
-}
|
|
-
|
|
-static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
|
|
-{
|
|
- struct IO_APIC_route_entry entry;
|
|
-
|
|
- /* Check delivery_mode to be sure we're not clearing an SMI pin */
|
|
- entry = ioapic_read_entry(apic, pin);
|
|
- if (entry.delivery_mode == dest_SMI)
|
|
- return;
|
|
-
|
|
- /*
|
|
- * Disable it in the IO-APIC irq-routing table:
|
|
- */
|
|
- ioapic_mask_entry(apic, pin);
|
|
-}
|
|
-
|
|
-static void clear_IO_APIC(void)
|
|
-{
|
|
- int apic, pin;
|
|
-
|
|
- for (apic = 0; apic < nr_ioapics; apic++)
|
|
- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
|
|
- clear_IO_APIC_pin(apic, pin);
|
|
-}
|
|
-
|
|
-#ifdef CONFIG_SMP
|
|
-static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t cpumask)
|
|
-{
|
|
- unsigned long flags;
|
|
- int pin;
|
|
- struct irq_pin_list *entry = irq_2_pin + irq;
|
|
- unsigned int apicid_value;
|
|
- cpumask_t tmp;
|
|
-
|
|
- cpus_and(tmp, cpumask, cpu_online_map);
|
|
- if (cpus_empty(tmp))
|
|
- tmp = TARGET_CPUS;
|
|
-
|
|
- cpus_and(cpumask, tmp, CPU_MASK_ALL);
|
|
-
|
|
- apicid_value = cpu_mask_to_apicid(cpumask);
|
|
- /* Prepare to do the io_apic_write */
|
|
- apicid_value = apicid_value << 24;
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- for (;;) {
|
|
- pin = entry->pin;
|
|
- if (pin == -1)
|
|
- break;
|
|
- io_apic_write(entry->apic, 0x10 + 1 + pin*2, apicid_value);
|
|
- if (!entry->next)
|
|
- break;
|
|
- entry = irq_2_pin + entry->next;
|
|
- }
|
|
- irq_desc[irq].affinity = cpumask;
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
-}
|
|
-
|
|
-#if defined(CONFIG_IRQBALANCE)
|
|
-# include <asm/processor.h> /* kernel_thread() */
|
|
-# include <linux/kernel_stat.h> /* kstat */
|
|
-# include <linux/slab.h> /* kmalloc() */
|
|
-# include <linux/timer.h>
|
|
-
|
|
-#define IRQBALANCE_CHECK_ARCH -999
|
|
-#define MAX_BALANCED_IRQ_INTERVAL (5*HZ)
|
|
-#define MIN_BALANCED_IRQ_INTERVAL (HZ/2)
|
|
-#define BALANCED_IRQ_MORE_DELTA (HZ/10)
|
|
-#define BALANCED_IRQ_LESS_DELTA (HZ)
|
|
-
|
|
-static int irqbalance_disabled __read_mostly = IRQBALANCE_CHECK_ARCH;
|
|
-static int physical_balance __read_mostly;
|
|
-static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL;
|
|
-
|
|
-static struct irq_cpu_info {
|
|
- unsigned long *last_irq;
|
|
- unsigned long *irq_delta;
|
|
- unsigned long irq;
|
|
-} irq_cpu_data[NR_CPUS];
|
|
-
|
|
-#define CPU_IRQ(cpu) (irq_cpu_data[cpu].irq)
|
|
-#define LAST_CPU_IRQ(cpu, irq) (irq_cpu_data[cpu].last_irq[irq])
|
|
-#define IRQ_DELTA(cpu, irq) (irq_cpu_data[cpu].irq_delta[irq])
|
|
-
|
|
-#define IDLE_ENOUGH(cpu,now) \
|
|
- (idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1))
|
|
-
|
|
-#define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask)
|
|
-
|
|
-#define CPU_TO_PACKAGEINDEX(i) (first_cpu(per_cpu(cpu_sibling_map, i)))
|
|
-
|
|
-static cpumask_t balance_irq_affinity[NR_IRQS] = {
|
|
- [0 ... NR_IRQS-1] = CPU_MASK_ALL
|
|
-};
|
|
-
|
|
-void set_balance_irq_affinity(unsigned int irq, cpumask_t mask)
|
|
-{
|
|
- balance_irq_affinity[irq] = mask;
|
|
-}
|
|
-
|
|
-static unsigned long move(int curr_cpu, cpumask_t allowed_mask,
|
|
- unsigned long now, int direction)
|
|
-{
|
|
- int search_idle = 1;
|
|
- int cpu = curr_cpu;
|
|
-
|
|
- goto inside;
|
|
-
|
|
- do {
|
|
- if (unlikely(cpu == curr_cpu))
|
|
- search_idle = 0;
|
|
-inside:
|
|
- if (direction == 1) {
|
|
- cpu++;
|
|
- if (cpu >= NR_CPUS)
|
|
- cpu = 0;
|
|
- } else {
|
|
- cpu--;
|
|
- if (cpu == -1)
|
|
- cpu = NR_CPUS-1;
|
|
- }
|
|
- } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu, allowed_mask) ||
|
|
- (search_idle && !IDLE_ENOUGH(cpu, now)));
|
|
-
|
|
- return cpu;
|
|
-}
|
|
-
|
|
-static inline void balance_irq(int cpu, int irq)
|
|
-{
|
|
- unsigned long now = jiffies;
|
|
- cpumask_t allowed_mask;
|
|
- unsigned int new_cpu;
|
|
-
|
|
- if (irqbalance_disabled)
|
|
- return;
|
|
-
|
|
- cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]);
|
|
- new_cpu = move(cpu, allowed_mask, now, 1);
|
|
- if (cpu != new_cpu)
|
|
- set_pending_irq(irq, cpumask_of_cpu(new_cpu));
|
|
-}
|
|
-
|
|
-static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
|
|
-{
|
|
- int i, j;
|
|
-
|
|
- for_each_online_cpu(i) {
|
|
- for (j = 0; j < NR_IRQS; j++) {
|
|
- if (!irq_desc[j].action)
|
|
- continue;
|
|
- /* Is it a significant load ? */
|
|
- if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i), j) <
|
|
- useful_load_threshold)
|
|
- continue;
|
|
- balance_irq(i, j);
|
|
- }
|
|
- }
|
|
- balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
|
|
- balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
|
|
- return;
|
|
-}
|
|
-
|
|
-static void do_irq_balance(void)
|
|
-{
|
|
- int i, j;
|
|
- unsigned long max_cpu_irq = 0, min_cpu_irq = (~0);
|
|
- unsigned long move_this_load = 0;
|
|
- int max_loaded = 0, min_loaded = 0;
|
|
- int load;
|
|
- unsigned long useful_load_threshold = balanced_irq_interval + 10;
|
|
- int selected_irq;
|
|
- int tmp_loaded, first_attempt = 1;
|
|
- unsigned long tmp_cpu_irq;
|
|
- unsigned long imbalance = 0;
|
|
- cpumask_t allowed_mask, target_cpu_mask, tmp;
|
|
-
|
|
- for_each_possible_cpu(i) {
|
|
- int package_index;
|
|
- CPU_IRQ(i) = 0;
|
|
- if (!cpu_online(i))
|
|
- continue;
|
|
- package_index = CPU_TO_PACKAGEINDEX(i);
|
|
- for (j = 0; j < NR_IRQS; j++) {
|
|
- unsigned long value_now, delta;
|
|
- /* Is this an active IRQ or balancing disabled ? */
|
|
- if (!irq_desc[j].action || irq_balancing_disabled(j))
|
|
- continue;
|
|
- if (package_index == i)
|
|
- IRQ_DELTA(package_index, j) = 0;
|
|
- /* Determine the total count per processor per IRQ */
|
|
- value_now = (unsigned long) kstat_cpu(i).irqs[j];
|
|
-
|
|
- /* Determine the activity per processor per IRQ */
|
|
- delta = value_now - LAST_CPU_IRQ(i, j);
|
|
-
|
|
- /* Update last_cpu_irq[][] for the next time */
|
|
- LAST_CPU_IRQ(i, j) = value_now;
|
|
-
|
|
- /* Ignore IRQs whose rate is less than the clock */
|
|
- if (delta < useful_load_threshold)
|
|
- continue;
|
|
- /* update the load for the processor or package total */
|
|
- IRQ_DELTA(package_index, j) += delta;
|
|
-
|
|
- /* Keep track of the higher numbered sibling as well */
|
|
- if (i != package_index)
|
|
- CPU_IRQ(i) += delta;
|
|
- /*
|
|
- * We have sibling A and sibling B in the package
|
|
- *
|
|
- * cpu_irq[A] = load for cpu A + load for cpu B
|
|
- * cpu_irq[B] = load for cpu B
|
|
- */
|
|
- CPU_IRQ(package_index) += delta;
|
|
- }
|
|
- }
|
|
- /* Find the least loaded processor package */
|
|
- for_each_online_cpu(i) {
|
|
- if (i != CPU_TO_PACKAGEINDEX(i))
|
|
- continue;
|
|
- if (min_cpu_irq > CPU_IRQ(i)) {
|
|
- min_cpu_irq = CPU_IRQ(i);
|
|
- min_loaded = i;
|
|
- }
|
|
- }
|
|
- max_cpu_irq = ULONG_MAX;
|
|
-
|
|
-tryanothercpu:
|
|
- /*
|
|
- * Look for heaviest loaded processor.
|
|
- * We may come back to get the next heaviest loaded processor.
|
|
- * Skip processors with trivial loads.
|
|
- */
|
|
- tmp_cpu_irq = 0;
|
|
- tmp_loaded = -1;
|
|
- for_each_online_cpu(i) {
|
|
- if (i != CPU_TO_PACKAGEINDEX(i))
|
|
- continue;
|
|
- if (max_cpu_irq <= CPU_IRQ(i))
|
|
- continue;
|
|
- if (tmp_cpu_irq < CPU_IRQ(i)) {
|
|
- tmp_cpu_irq = CPU_IRQ(i);
|
|
- tmp_loaded = i;
|
|
- }
|
|
- }
|
|
-
|
|
- if (tmp_loaded == -1) {
|
|
- /*
|
|
- * In the case of small number of heavy interrupt sources,
|
|
- * loading some of the cpus too much. We use Ingo's original
|
|
- * approach to rotate them around.
|
|
- */
|
|
- if (!first_attempt && imbalance >= useful_load_threshold) {
|
|
- rotate_irqs_among_cpus(useful_load_threshold);
|
|
- return;
|
|
- }
|
|
- goto not_worth_the_effort;
|
|
- }
|
|
-
|
|
- first_attempt = 0; /* heaviest search */
|
|
- max_cpu_irq = tmp_cpu_irq; /* load */
|
|
- max_loaded = tmp_loaded; /* processor */
|
|
- imbalance = (max_cpu_irq - min_cpu_irq) / 2;
|
|
-
|
|
- /*
|
|
- * if imbalance is less than approx 10% of max load, then
|
|
- * observe diminishing returns action. - quit
|
|
- */
|
|
- if (imbalance < (max_cpu_irq >> 3))
|
|
- goto not_worth_the_effort;
|
|
-
|
|
-tryanotherirq:
|
|
- /* if we select an IRQ to move that can't go where we want, then
|
|
- * see if there is another one to try.
|
|
- */
|
|
- move_this_load = 0;
|
|
- selected_irq = -1;
|
|
- for (j = 0; j < NR_IRQS; j++) {
|
|
- /* Is this an active IRQ? */
|
|
- if (!irq_desc[j].action)
|
|
- continue;
|
|
- if (imbalance <= IRQ_DELTA(max_loaded, j))
|
|
- continue;
|
|
- /* Try to find the IRQ that is closest to the imbalance
|
|
- * without going over.
|
|
- */
|
|
- if (move_this_load < IRQ_DELTA(max_loaded, j)) {
|
|
- move_this_load = IRQ_DELTA(max_loaded, j);
|
|
- selected_irq = j;
|
|
- }
|
|
- }
|
|
- if (selected_irq == -1)
|
|
- goto tryanothercpu;
|
|
-
|
|
- imbalance = move_this_load;
|
|
-
|
|
- /* For physical_balance case, we accumulated both load
|
|
- * values in the one of the siblings cpu_irq[],
|
|
- * to use the same code for physical and logical processors
|
|
- * as much as possible.
|
|
- *
|
|
- * NOTE: the cpu_irq[] array holds the sum of the load for
|
|
- * sibling A and sibling B in the slot for the lowest numbered
|
|
- * sibling (A), _AND_ the load for sibling B in the slot for
|
|
- * the higher numbered sibling.
|
|
- *
|
|
- * We seek the least loaded sibling by making the comparison
|
|
- * (A+B)/2 vs B
|
|
- */
|
|
- load = CPU_IRQ(min_loaded) >> 1;
|
|
- for_each_cpu_mask(j, per_cpu(cpu_sibling_map, min_loaded)) {
|
|
- if (load > CPU_IRQ(j)) {
|
|
- /* This won't change cpu_sibling_map[min_loaded] */
|
|
- load = CPU_IRQ(j);
|
|
- min_loaded = j;
|
|
- }
|
|
- }
|
|
-
|
|
- cpus_and(allowed_mask,
|
|
- cpu_online_map,
|
|
- balance_irq_affinity[selected_irq]);
|
|
- target_cpu_mask = cpumask_of_cpu(min_loaded);
|
|
- cpus_and(tmp, target_cpu_mask, allowed_mask);
|
|
-
|
|
- if (!cpus_empty(tmp)) {
|
|
- /* mark for change destination */
|
|
- set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded));
|
|
-
|
|
- /* Since we made a change, come back sooner to
|
|
- * check for more variation.
|
|
- */
|
|
- balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
|
|
- balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
|
|
- return;
|
|
- }
|
|
- goto tryanotherirq;
|
|
-
|
|
-not_worth_the_effort:
|
|
- /*
|
|
- * if we did not find an IRQ to move, then adjust the time interval
|
|
- * upward
|
|
- */
|
|
- balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL,
|
|
- balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);
|
|
- return;
|
|
-}
|
|
-
|
|
-static int balanced_irq(void *unused)
|
|
-{
|
|
- int i;
|
|
- unsigned long prev_balance_time = jiffies;
|
|
- long time_remaining = balanced_irq_interval;
|
|
-
|
|
- /* push everything to CPU 0 to give us a starting point. */
|
|
- for (i = 0 ; i < NR_IRQS ; i++) {
|
|
- irq_desc[i].pending_mask = cpumask_of_cpu(0);
|
|
- set_pending_irq(i, cpumask_of_cpu(0));
|
|
- }
|
|
-
|
|
- set_freezable();
|
|
- for ( ; ; ) {
|
|
- time_remaining = schedule_timeout_interruptible(time_remaining);
|
|
- try_to_freeze();
|
|
- if (time_after(jiffies,
|
|
- prev_balance_time+balanced_irq_interval)) {
|
|
- preempt_disable();
|
|
- do_irq_balance();
|
|
- prev_balance_time = jiffies;
|
|
- time_remaining = balanced_irq_interval;
|
|
- preempt_enable();
|
|
- }
|
|
- }
|
|
- return 0;
|
|
-}
|
|
-
|
|
-static int __init balanced_irq_init(void)
|
|
-{
|
|
- int i;
|
|
- struct cpuinfo_x86 *c;
|
|
- cpumask_t tmp;
|
|
-
|
|
- cpus_shift_right(tmp, cpu_online_map, 2);
|
|
- c = &boot_cpu_data;
|
|
- /* When not overwritten by the command line ask subarchitecture. */
|
|
- if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH)
|
|
- irqbalance_disabled = NO_BALANCE_IRQ;
|
|
- if (irqbalance_disabled)
|
|
- return 0;
|
|
-
|
|
- /* disable irqbalance completely if there is only one processor online */
|
|
- if (num_online_cpus() < 2) {
|
|
- irqbalance_disabled = 1;
|
|
- return 0;
|
|
- }
|
|
- /*
|
|
- * Enable physical balance only if more than 1 physical processor
|
|
- * is present
|
|
- */
|
|
- if (smp_num_siblings > 1 && !cpus_empty(tmp))
|
|
- physical_balance = 1;
|
|
-
|
|
- for_each_online_cpu(i) {
|
|
- irq_cpu_data[i].irq_delta = kzalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
|
|
- irq_cpu_data[i].last_irq = kzalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
|
|
- if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) {
|
|
- printk(KERN_ERR "balanced_irq_init: out of memory");
|
|
- goto failed;
|
|
- }
|
|
- }
|
|
-
|
|
- printk(KERN_INFO "Starting balanced_irq\n");
|
|
- if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd")))
|
|
- return 0;
|
|
- printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
|
|
-failed:
|
|
- for_each_possible_cpu(i) {
|
|
- kfree(irq_cpu_data[i].irq_delta);
|
|
- irq_cpu_data[i].irq_delta = NULL;
|
|
- kfree(irq_cpu_data[i].last_irq);
|
|
- irq_cpu_data[i].last_irq = NULL;
|
|
- }
|
|
- return 0;
|
|
-}
|
|
-
|
|
-int __devinit irqbalance_disable(char *str)
|
|
-{
|
|
- irqbalance_disabled = 1;
|
|
- return 1;
|
|
-}
|
|
-
|
|
-__setup("noirqbalance", irqbalance_disable);
|
|
-
|
|
-late_initcall(balanced_irq_init);
|
|
-#endif /* CONFIG_IRQBALANCE */
|
|
-#endif /* CONFIG_SMP */
|
|
-#endif
|
|
-
|
|
-#ifndef CONFIG_SMP
|
|
-void send_IPI_self(int vector)
|
|
-{
|
|
-#ifndef CONFIG_XEN
|
|
- unsigned int cfg;
|
|
-
|
|
- /*
|
|
- * Wait for idle.
|
|
- */
|
|
- apic_wait_icr_idle();
|
|
- cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL;
|
|
- /*
|
|
- * Send the IPI. The write to APIC_ICR fires this off.
|
|
- */
|
|
- apic_write(APIC_ICR, cfg);
|
|
-#endif
|
|
-}
|
|
-#endif /* !CONFIG_SMP */
|
|
-
|
|
-
|
|
-/*
|
|
- * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
|
|
- * specific CPU-side IRQs.
|
|
- */
|
|
-
|
|
-#define MAX_PIRQS 8
|
|
-static int pirq_entries [MAX_PIRQS];
|
|
-static int pirqs_enabled;
|
|
-int skip_ioapic_setup;
|
|
-
|
|
-static int __init ioapic_pirq_setup(char *str)
|
|
-{
|
|
- int i, max;
|
|
- int ints[MAX_PIRQS+1];
|
|
-
|
|
- get_options(str, ARRAY_SIZE(ints), ints);
|
|
-
|
|
- for (i = 0; i < MAX_PIRQS; i++)
|
|
- pirq_entries[i] = -1;
|
|
-
|
|
- pirqs_enabled = 1;
|
|
- apic_printk(APIC_VERBOSE, KERN_INFO
|
|
- "PIRQ redirection, working around broken MP-BIOS.\n");
|
|
- max = MAX_PIRQS;
|
|
- if (ints[0] < MAX_PIRQS)
|
|
- max = ints[0];
|
|
-
|
|
- for (i = 0; i < max; i++) {
|
|
- apic_printk(APIC_VERBOSE, KERN_DEBUG
|
|
- "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
|
|
- /*
|
|
- * PIRQs are mapped upside down, usually.
|
|
- */
|
|
- pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
|
|
- }
|
|
- return 1;
|
|
-}
|
|
-
|
|
-__setup("pirq=", ioapic_pirq_setup);
|
|
-
|
|
-/*
|
|
- * Find the IRQ entry number of a certain pin.
|
|
- */
|
|
-static int find_irq_entry(int apic, int pin, int type)
|
|
-{
|
|
- int i;
|
|
-
|
|
- for (i = 0; i < mp_irq_entries; i++)
|
|
- if (mp_irqs[i].mp_irqtype == type &&
|
|
- (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid ||
|
|
- mp_irqs[i].mp_dstapic == MP_APIC_ALL) &&
|
|
- mp_irqs[i].mp_dstirq == pin)
|
|
- return i;
|
|
-
|
|
- return -1;
|
|
-}
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
-/*
|
|
- * Find the pin to which IRQ[irq] (ISA) is connected
|
|
- */
|
|
-static int __init find_isa_irq_pin(int irq, int type)
|
|
-{
|
|
- int i;
|
|
-
|
|
- for (i = 0; i < mp_irq_entries; i++) {
|
|
- int lbus = mp_irqs[i].mp_srcbus;
|
|
-
|
|
- if (test_bit(lbus, mp_bus_not_pci) &&
|
|
- (mp_irqs[i].mp_irqtype == type) &&
|
|
- (mp_irqs[i].mp_srcbusirq == irq))
|
|
-
|
|
- return mp_irqs[i].mp_dstirq;
|
|
- }
|
|
- return -1;
|
|
-}
|
|
-
|
|
-static int __init find_isa_irq_apic(int irq, int type)
|
|
-{
|
|
- int i;
|
|
-
|
|
- for (i = 0; i < mp_irq_entries; i++) {
|
|
- int lbus = mp_irqs[i].mp_srcbus;
|
|
-
|
|
- if (test_bit(lbus, mp_bus_not_pci) &&
|
|
- (mp_irqs[i].mp_irqtype == type) &&
|
|
- (mp_irqs[i].mp_srcbusirq == irq))
|
|
- break;
|
|
- }
|
|
- if (i < mp_irq_entries) {
|
|
- int apic;
|
|
- for (apic = 0; apic < nr_ioapics; apic++) {
|
|
- if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic)
|
|
- return apic;
|
|
- }
|
|
- }
|
|
-
|
|
- return -1;
|
|
-}
|
|
-#endif
|
|
-
|
|
-/*
|
|
- * Find a specific PCI IRQ entry.
|
|
- * Not an __init, possibly needed by modules
|
|
- */
|
|
-static int pin_2_irq(int idx, int apic, int pin);
|
|
-
|
|
-int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
|
|
-{
|
|
- int apic, i, best_guess = -1;
|
|
-
|
|
- apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, "
|
|
- "slot:%d, pin:%d.\n", bus, slot, pin);
|
|
- if (test_bit(bus, mp_bus_not_pci)) {
|
|
- printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
|
|
- return -1;
|
|
- }
|
|
- for (i = 0; i < mp_irq_entries; i++) {
|
|
- int lbus = mp_irqs[i].mp_srcbus;
|
|
-
|
|
- for (apic = 0; apic < nr_ioapics; apic++)
|
|
- if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic ||
|
|
- mp_irqs[i].mp_dstapic == MP_APIC_ALL)
|
|
- break;
|
|
-
|
|
- if (!test_bit(lbus, mp_bus_not_pci) &&
|
|
- !mp_irqs[i].mp_irqtype &&
|
|
- (bus == lbus) &&
|
|
- (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) {
|
|
- int irq = pin_2_irq(i, apic, mp_irqs[i].mp_dstirq);
|
|
-
|
|
- if (!(apic || IO_APIC_IRQ(irq)))
|
|
- continue;
|
|
-
|
|
- if (pin == (mp_irqs[i].mp_srcbusirq & 3))
|
|
- return irq;
|
|
- /*
|
|
- * Use the first all-but-pin matching entry as a
|
|
- * best-guess fuzzy result for broken mptables.
|
|
- */
|
|
- if (best_guess < 0)
|
|
- best_guess = irq;
|
|
- }
|
|
- }
|
|
- return best_guess;
|
|
-}
|
|
-EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
|
|
-
|
|
-/*
|
|
- * This function currently is only a helper for the i386 smp boot process where
|
|
- * we need to reprogram the ioredtbls to cater for the cpus which have come online
|
|
- * so mask in all cases should simply be TARGET_CPUS
|
|
- */
|
|
-#ifdef CONFIG_SMP
|
|
-#ifndef CONFIG_XEN
|
|
-void __init setup_ioapic_dest(void)
|
|
-{
|
|
- int pin, ioapic, irq, irq_entry;
|
|
-
|
|
- if (skip_ioapic_setup == 1)
|
|
- return;
|
|
-
|
|
- for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
|
|
- for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
|
|
- irq_entry = find_irq_entry(ioapic, pin, mp_INT);
|
|
- if (irq_entry == -1)
|
|
- continue;
|
|
- irq = pin_2_irq(irq_entry, ioapic, pin);
|
|
- set_ioapic_affinity_irq(irq, TARGET_CPUS);
|
|
- }
|
|
-
|
|
- }
|
|
-}
|
|
-#endif /* !CONFIG_XEN */
|
|
-#endif
|
|
-
|
|
-#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
|
|
-/*
|
|
- * EISA Edge/Level control register, ELCR
|
|
- */
|
|
-static int EISA_ELCR(unsigned int irq)
|
|
-{
|
|
- if (irq < 16) {
|
|
- unsigned int port = 0x4d0 + (irq >> 3);
|
|
- return (inb(port) >> (irq & 7)) & 1;
|
|
- }
|
|
- apic_printk(APIC_VERBOSE, KERN_INFO
|
|
- "Broken MPtable reports ISA irq %d\n", irq);
|
|
- return 0;
|
|
-}
|
|
-#endif
|
|
-
|
|
-/* ISA interrupts are always polarity zero edge triggered,
|
|
- * when listed as conforming in the MP table. */
|
|
-
|
|
-#define default_ISA_trigger(idx) (0)
|
|
-#define default_ISA_polarity(idx) (0)
|
|
-
|
|
-/* EISA interrupts are always polarity zero and can be edge or level
|
|
- * trigger depending on the ELCR value. If an interrupt is listed as
|
|
- * EISA conforming in the MP table, that means its trigger type must
|
|
- * be read in from the ELCR */
|
|
-
|
|
-#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq))
|
|
-#define default_EISA_polarity(idx) default_ISA_polarity(idx)
|
|
-
|
|
-/* PCI interrupts are always polarity one level triggered,
|
|
- * when listed as conforming in the MP table. */
|
|
-
|
|
-#define default_PCI_trigger(idx) (1)
|
|
-#define default_PCI_polarity(idx) (1)
|
|
-
|
|
-/* MCA interrupts are always polarity zero level triggered,
|
|
- * when listed as conforming in the MP table. */
|
|
-
|
|
-#define default_MCA_trigger(idx) (1)
|
|
-#define default_MCA_polarity(idx) default_ISA_polarity(idx)
|
|
-
|
|
-static int MPBIOS_polarity(int idx)
|
|
-{
|
|
- int bus = mp_irqs[idx].mp_srcbus;
|
|
- int polarity;
|
|
-
|
|
- /*
|
|
- * Determine IRQ line polarity (high active or low active):
|
|
- */
|
|
- switch (mp_irqs[idx].mp_irqflag & 3) {
|
|
- case 0: /* conforms, ie. bus-type dependent polarity */
|
|
- {
|
|
- polarity = test_bit(bus, mp_bus_not_pci)?
|
|
- default_ISA_polarity(idx):
|
|
- default_PCI_polarity(idx);
|
|
- break;
|
|
- }
|
|
- case 1: /* high active */
|
|
- {
|
|
- polarity = 0;
|
|
- break;
|
|
- }
|
|
- case 2: /* reserved */
|
|
- {
|
|
- printk(KERN_WARNING "broken BIOS!!\n");
|
|
- polarity = 1;
|
|
- break;
|
|
- }
|
|
- case 3: /* low active */
|
|
- {
|
|
- polarity = 1;
|
|
- break;
|
|
- }
|
|
- default: /* invalid */
|
|
- {
|
|
- printk(KERN_WARNING "broken BIOS!!\n");
|
|
- polarity = 1;
|
|
- break;
|
|
- }
|
|
- }
|
|
- return polarity;
|
|
-}
|
|
-
|
|
-static int MPBIOS_trigger(int idx)
|
|
-{
|
|
- int bus = mp_irqs[idx].mp_srcbus;
|
|
- int trigger;
|
|
-
|
|
- /*
|
|
- * Determine IRQ trigger mode (edge or level sensitive):
|
|
- */
|
|
- switch ((mp_irqs[idx].mp_irqflag>>2) & 3) {
|
|
- case 0: /* conforms, ie. bus-type dependent */
|
|
- {
|
|
- trigger = test_bit(bus, mp_bus_not_pci)?
|
|
- default_ISA_trigger(idx):
|
|
- default_PCI_trigger(idx);
|
|
-#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
|
|
- switch (mp_bus_id_to_type[bus]) {
|
|
- case MP_BUS_ISA: /* ISA pin */
|
|
- {
|
|
- /* set before the switch */
|
|
- break;
|
|
- }
|
|
- case MP_BUS_EISA: /* EISA pin */
|
|
- {
|
|
- trigger = default_EISA_trigger(idx);
|
|
- break;
|
|
- }
|
|
- case MP_BUS_PCI: /* PCI pin */
|
|
- {
|
|
- /* set before the switch */
|
|
- break;
|
|
- }
|
|
- case MP_BUS_MCA: /* MCA pin */
|
|
- {
|
|
- trigger = default_MCA_trigger(idx);
|
|
- break;
|
|
- }
|
|
- default:
|
|
- {
|
|
- printk(KERN_WARNING "broken BIOS!!\n");
|
|
- trigger = 1;
|
|
- break;
|
|
- }
|
|
- }
|
|
-#endif
|
|
- break;
|
|
- }
|
|
- case 1: /* edge */
|
|
- {
|
|
- trigger = 0;
|
|
- break;
|
|
- }
|
|
- case 2: /* reserved */
|
|
- {
|
|
- printk(KERN_WARNING "broken BIOS!!\n");
|
|
- trigger = 1;
|
|
- break;
|
|
- }
|
|
- case 3: /* level */
|
|
- {
|
|
- trigger = 1;
|
|
- break;
|
|
- }
|
|
- default: /* invalid */
|
|
- {
|
|
- printk(KERN_WARNING "broken BIOS!!\n");
|
|
- trigger = 0;
|
|
- break;
|
|
- }
|
|
- }
|
|
- return trigger;
|
|
-}
|
|
-
|
|
-static inline int irq_polarity(int idx)
|
|
-{
|
|
- return MPBIOS_polarity(idx);
|
|
-}
|
|
-
|
|
-static inline int irq_trigger(int idx)
|
|
-{
|
|
- return MPBIOS_trigger(idx);
|
|
-}
|
|
-
|
|
-static int pin_2_irq(int idx, int apic, int pin)
|
|
-{
|
|
- int irq, i;
|
|
- int bus = mp_irqs[idx].mp_srcbus;
|
|
-
|
|
- /*
|
|
- * Debugging check, we are in big trouble if this message pops up!
|
|
- */
|
|
- if (mp_irqs[idx].mp_dstirq != pin)
|
|
- printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
|
|
-
|
|
- if (test_bit(bus, mp_bus_not_pci))
|
|
- irq = mp_irqs[idx].mp_srcbusirq;
|
|
- else {
|
|
- /*
|
|
- * PCI IRQs are mapped in order
|
|
- */
|
|
- i = irq = 0;
|
|
- while (i < apic)
|
|
- irq += nr_ioapic_registers[i++];
|
|
- irq += pin;
|
|
-
|
|
- /*
|
|
- * For MPS mode, so far only needed by ES7000 platform
|
|
- */
|
|
- if (ioapic_renumber_irq)
|
|
- irq = ioapic_renumber_irq(apic, irq);
|
|
- }
|
|
-
|
|
- /*
|
|
- * PCI IRQ command line redirection. Yes, limits are hardcoded.
|
|
- */
|
|
- if ((pin >= 16) && (pin <= 23)) {
|
|
- if (pirq_entries[pin-16] != -1) {
|
|
- if (!pirq_entries[pin-16]) {
|
|
- apic_printk(APIC_VERBOSE, KERN_DEBUG
|
|
- "disabling PIRQ%d\n", pin-16);
|
|
- } else {
|
|
- irq = pirq_entries[pin-16];
|
|
- apic_printk(APIC_VERBOSE, KERN_DEBUG
|
|
- "using PIRQ%d -> IRQ %d\n",
|
|
- pin-16, irq);
|
|
- }
|
|
- }
|
|
- }
|
|
- return irq;
|
|
-}
|
|
-
|
|
-static inline int IO_APIC_irq_trigger(int irq)
|
|
-{
|
|
- int apic, idx, pin;
|
|
-
|
|
- for (apic = 0; apic < nr_ioapics; apic++) {
|
|
- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
|
|
- idx = find_irq_entry(apic, pin, mp_INT);
|
|
- if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
|
|
- return irq_trigger(idx);
|
|
- }
|
|
- }
|
|
- /*
|
|
- * nonexistent IRQs are edge default
|
|
- */
|
|
- return 0;
|
|
-}
|
|
-
|
|
-/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
|
|
-static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; /* = { FIRST_DEVICE_VECTOR , 0 }; */
|
|
-
|
|
-static int __assign_irq_vector(int irq)
|
|
-{
|
|
- int vector;
|
|
- struct physdev_irq irq_op;
|
|
-
|
|
- BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
|
|
-
|
|
- if (irq < PIRQ_BASE || irq - PIRQ_BASE >= NR_PIRQS)
|
|
- return -EINVAL;
|
|
-
|
|
- if (irq_vector[irq] > 0)
|
|
- return irq_vector[irq];
|
|
-
|
|
- irq_op.irq = irq;
|
|
- if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op))
|
|
- return -ENOSPC;
|
|
-
|
|
- vector = irq_op.vector;
|
|
- irq_vector[irq] = vector;
|
|
-
|
|
- return vector;
|
|
-}
|
|
-
|
|
-static int assign_irq_vector(int irq)
|
|
-{
|
|
- unsigned long flags;
|
|
- int vector;
|
|
-
|
|
- spin_lock_irqsave(&vector_lock, flags);
|
|
- vector = __assign_irq_vector(irq);
|
|
- spin_unlock_irqrestore(&vector_lock, flags);
|
|
-
|
|
- return vector;
|
|
-}
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
-static struct irq_chip ioapic_chip;
|
|
-
|
|
-#define IOAPIC_AUTO -1
|
|
-#define IOAPIC_EDGE 0
|
|
-#define IOAPIC_LEVEL 1
|
|
-
|
|
-static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
|
|
-{
|
|
- if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
|
|
- trigger == IOAPIC_LEVEL) {
|
|
- irq_desc[irq].status |= IRQ_LEVEL;
|
|
- set_irq_chip_and_handler_name(irq, &ioapic_chip,
|
|
- handle_fasteoi_irq, "fasteoi");
|
|
- } else {
|
|
- irq_desc[irq].status &= ~IRQ_LEVEL;
|
|
- set_irq_chip_and_handler_name(irq, &ioapic_chip,
|
|
- handle_edge_irq, "edge");
|
|
- }
|
|
- set_intr_gate(vector, interrupt[irq]);
|
|
-}
|
|
-#else
|
|
-#define ioapic_register_intr(irq, vector, trigger) evtchn_register_pirq(irq)
|
|
-#endif
|
|
-
|
|
-static void __init setup_IO_APIC_irqs(void)
|
|
-{
|
|
- struct IO_APIC_route_entry entry;
|
|
- int apic, pin, idx, irq, first_notcon = 1, vector;
|
|
-
|
|
- apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
|
|
-
|
|
- for (apic = 0; apic < nr_ioapics; apic++) {
|
|
- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
|
|
-
|
|
- /*
|
|
- * add it to the IO-APIC irq-routing table:
|
|
- */
|
|
- memset(&entry, 0, sizeof(entry));
|
|
-
|
|
- entry.delivery_mode = INT_DELIVERY_MODE;
|
|
- entry.dest_mode = INT_DEST_MODE;
|
|
- entry.mask = 0; /* enable IRQ */
|
|
- entry.dest.logical.logical_dest =
|
|
- cpu_mask_to_apicid(TARGET_CPUS);
|
|
-
|
|
- idx = find_irq_entry(apic, pin, mp_INT);
|
|
- if (idx == -1) {
|
|
- if (first_notcon) {
|
|
- apic_printk(APIC_VERBOSE, KERN_DEBUG
|
|
- " IO-APIC (apicid-pin) %d-%d",
|
|
- mp_ioapics[apic].mp_apicid,
|
|
- pin);
|
|
- first_notcon = 0;
|
|
- } else
|
|
- apic_printk(APIC_VERBOSE, ", %d-%d",
|
|
- mp_ioapics[apic].mp_apicid, pin);
|
|
- continue;
|
|
- }
|
|
-
|
|
- if (!first_notcon) {
|
|
- apic_printk(APIC_VERBOSE, " not connected.\n");
|
|
- first_notcon = 1;
|
|
- }
|
|
-
|
|
- entry.trigger = irq_trigger(idx);
|
|
- entry.polarity = irq_polarity(idx);
|
|
-
|
|
- if (irq_trigger(idx)) {
|
|
- entry.trigger = 1;
|
|
- entry.mask = 1;
|
|
- }
|
|
-
|
|
- irq = pin_2_irq(idx, apic, pin);
|
|
- /*
|
|
- * skip adding the timer int on secondary nodes, which causes
|
|
- * a small but painful rift in the time-space continuum
|
|
- */
|
|
- if (multi_timer_check(apic, irq))
|
|
- continue;
|
|
- else
|
|
- add_pin_to_irq(irq, apic, pin);
|
|
-
|
|
- if (/*!apic &&*/ !IO_APIC_IRQ(irq))
|
|
- continue;
|
|
-
|
|
- if (IO_APIC_IRQ(irq)) {
|
|
- vector = assign_irq_vector(irq);
|
|
- entry.vector = vector;
|
|
- ioapic_register_intr(irq, vector, IOAPIC_AUTO);
|
|
-
|
|
- if (!apic && (irq < 16))
|
|
- disable_8259A_irq(irq);
|
|
- }
|
|
- ioapic_write_entry(apic, pin, entry);
|
|
- }
|
|
- }
|
|
-
|
|
- if (!first_notcon)
|
|
- apic_printk(APIC_VERBOSE, " not connected.\n");
|
|
-}
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
-/*
|
|
- * Set up the timer pin, possibly with the 8259A-master behind.
|
|
- */
|
|
-static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
|
|
- int vector)
|
|
-{
|
|
- struct IO_APIC_route_entry entry;
|
|
-
|
|
- memset(&entry, 0, sizeof(entry));
|
|
-
|
|
- /*
|
|
- * We use logical delivery to get the timer IRQ
|
|
- * to the first CPU.
|
|
- */
|
|
- entry.dest_mode = INT_DEST_MODE;
|
|
- entry.mask = 1; /* mask IRQ now */
|
|
- entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
|
|
- entry.delivery_mode = INT_DELIVERY_MODE;
|
|
- entry.polarity = 0;
|
|
- entry.trigger = 0;
|
|
- entry.vector = vector;
|
|
-
|
|
- /*
|
|
- * The timer IRQ doesn't have to know that behind the
|
|
- * scene we may have a 8259A-master in AEOI mode ...
|
|
- */
|
|
- ioapic_register_intr(0, vector, IOAPIC_EDGE);
|
|
-
|
|
- /*
|
|
- * Add it to the IO-APIC irq-routing table:
|
|
- */
|
|
- ioapic_write_entry(apic, pin, entry);
|
|
-}
|
|
-
|
|
-void __init print_IO_APIC(void)
|
|
-{
|
|
- int apic, i;
|
|
- union IO_APIC_reg_00 reg_00;
|
|
- union IO_APIC_reg_01 reg_01;
|
|
- union IO_APIC_reg_02 reg_02;
|
|
- union IO_APIC_reg_03 reg_03;
|
|
- unsigned long flags;
|
|
-
|
|
- if (apic_verbosity == APIC_QUIET)
|
|
- return;
|
|
-
|
|
- printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
|
|
- for (i = 0; i < nr_ioapics; i++)
|
|
- printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
|
|
- mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]);
|
|
-
|
|
- /*
|
|
- * We are a bit conservative about what we expect. We have to
|
|
- * know about every hardware change ASAP.
|
|
- */
|
|
- printk(KERN_INFO "testing the IO APIC.......................\n");
|
|
-
|
|
- for (apic = 0; apic < nr_ioapics; apic++) {
|
|
-
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- reg_00.raw = io_apic_read(apic, 0);
|
|
- reg_01.raw = io_apic_read(apic, 1);
|
|
- if (reg_01.bits.version >= 0x10)
|
|
- reg_02.raw = io_apic_read(apic, 2);
|
|
- if (reg_01.bits.version >= 0x20)
|
|
- reg_03.raw = io_apic_read(apic, 3);
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
-
|
|
- printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
|
|
- printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
|
|
- printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
|
|
- printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type);
|
|
- printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS);
|
|
-
|
|
- printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw);
|
|
- printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries);
|
|
-
|
|
- printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ);
|
|
- printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version);
|
|
-
|
|
- /*
|
|
- * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
|
|
- * but the value of reg_02 is read as the previous read register
|
|
- * value, so ignore it if reg_02 == reg_01.
|
|
- */
|
|
- if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
|
|
- printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
|
|
- printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration);
|
|
- }
|
|
-
|
|
- /*
|
|
- * Some Intel chipsets with IO APIC VERSION of 0x2? don't have reg_02
|
|
- * or reg_03, but the value of reg_0[23] is read as the previous read
|
|
- * register value, so ignore it if reg_03 == reg_0[12].
|
|
- */
|
|
- if (reg_01.bits.version >= 0x20 && reg_03.raw != reg_02.raw &&
|
|
- reg_03.raw != reg_01.raw) {
|
|
- printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
|
|
- printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT);
|
|
- }
|
|
-
|
|
- printk(KERN_DEBUG ".... IRQ redirection table:\n");
|
|
-
|
|
- printk(KERN_DEBUG " NR Log Phy Mask Trig IRR Pol"
|
|
- " Stat Dest Deli Vect: \n");
|
|
-
|
|
- for (i = 0; i <= reg_01.bits.entries; i++) {
|
|
- struct IO_APIC_route_entry entry;
|
|
-
|
|
- entry = ioapic_read_entry(apic, i);
|
|
-
|
|
- printk(KERN_DEBUG " %02x %03X %02X ",
|
|
- i,
|
|
- entry.dest.logical.logical_dest,
|
|
- entry.dest.physical.physical_dest
|
|
- );
|
|
-
|
|
- printk("%1d %1d %1d %1d %1d %1d %1d %02X\n",
|
|
- entry.mask,
|
|
- entry.trigger,
|
|
- entry.irr,
|
|
- entry.polarity,
|
|
- entry.delivery_status,
|
|
- entry.dest_mode,
|
|
- entry.delivery_mode,
|
|
- entry.vector
|
|
- );
|
|
- }
|
|
- }
|
|
- printk(KERN_DEBUG "IRQ to pin mappings:\n");
|
|
- for (i = 0; i < NR_IRQS; i++) {
|
|
- struct irq_pin_list *entry = irq_2_pin + i;
|
|
- if (entry->pin < 0)
|
|
- continue;
|
|
- printk(KERN_DEBUG "IRQ%d ", i);
|
|
- for (;;) {
|
|
- printk("-> %d:%d", entry->apic, entry->pin);
|
|
- if (!entry->next)
|
|
- break;
|
|
- entry = irq_2_pin + entry->next;
|
|
- }
|
|
- printk("\n");
|
|
- }
|
|
-
|
|
- printk(KERN_INFO ".................................... done.\n");
|
|
-
|
|
- return;
|
|
-}
|
|
-
|
|
-static void print_APIC_bitfield(int base)
|
|
-{
|
|
- unsigned int v;
|
|
- int i, j;
|
|
-
|
|
- if (apic_verbosity == APIC_QUIET)
|
|
- return;
|
|
-
|
|
- printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
|
|
- for (i = 0; i < 8; i++) {
|
|
- v = apic_read(base + i*0x10);
|
|
- for (j = 0; j < 32; j++) {
|
|
- if (v & (1<<j))
|
|
- printk("1");
|
|
- else
|
|
- printk("0");
|
|
- }
|
|
- printk("\n");
|
|
- }
|
|
-}
|
|
-
|
|
-void /*__init*/ print_local_APIC(void *dummy)
|
|
-{
|
|
- unsigned int v, ver, maxlvt;
|
|
-
|
|
- if (apic_verbosity == APIC_QUIET)
|
|
- return;
|
|
-
|
|
- printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
|
|
- smp_processor_id(), hard_smp_processor_id());
|
|
- v = apic_read(APIC_ID);
|
|
- printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v,
|
|
- GET_APIC_ID(read_apic_id()));
|
|
- v = apic_read(APIC_LVR);
|
|
- printk(KERN_INFO "... APIC VERSION: %08x\n", v);
|
|
- ver = GET_APIC_VERSION(v);
|
|
- maxlvt = lapic_get_maxlvt();
|
|
-
|
|
- v = apic_read(APIC_TASKPRI);
|
|
- printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
|
|
-
|
|
- if (APIC_INTEGRATED(ver)) { /* !82489DX */
|
|
- v = apic_read(APIC_ARBPRI);
|
|
- printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
|
|
- v & APIC_ARBPRI_MASK);
|
|
- v = apic_read(APIC_PROCPRI);
|
|
- printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
|
|
- }
|
|
-
|
|
- v = apic_read(APIC_EOI);
|
|
- printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
|
|
- v = apic_read(APIC_RRR);
|
|
- printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
|
|
- v = apic_read(APIC_LDR);
|
|
- printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
|
|
- v = apic_read(APIC_DFR);
|
|
- printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
|
|
- v = apic_read(APIC_SPIV);
|
|
- printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
|
|
-
|
|
- printk(KERN_DEBUG "... APIC ISR field:\n");
|
|
- print_APIC_bitfield(APIC_ISR);
|
|
- printk(KERN_DEBUG "... APIC TMR field:\n");
|
|
- print_APIC_bitfield(APIC_TMR);
|
|
- printk(KERN_DEBUG "... APIC IRR field:\n");
|
|
- print_APIC_bitfield(APIC_IRR);
|
|
-
|
|
- if (APIC_INTEGRATED(ver)) { /* !82489DX */
|
|
- if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
|
|
- apic_write(APIC_ESR, 0);
|
|
- v = apic_read(APIC_ESR);
|
|
- printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
|
|
- }
|
|
-
|
|
- v = apic_read(APIC_ICR);
|
|
- printk(KERN_DEBUG "... APIC ICR: %08x\n", v);
|
|
- v = apic_read(APIC_ICR2);
|
|
- printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
|
|
-
|
|
- v = apic_read(APIC_LVTT);
|
|
- printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
|
|
-
|
|
- if (maxlvt > 3) { /* PC is LVT#4. */
|
|
- v = apic_read(APIC_LVTPC);
|
|
- printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
|
|
- }
|
|
- v = apic_read(APIC_LVT0);
|
|
- printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
|
|
- v = apic_read(APIC_LVT1);
|
|
- printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
|
|
-
|
|
- if (maxlvt > 2) { /* ERR is LVT#3. */
|
|
- v = apic_read(APIC_LVTERR);
|
|
- printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
|
|
- }
|
|
-
|
|
- v = apic_read(APIC_TMICT);
|
|
- printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
|
|
- v = apic_read(APIC_TMCCT);
|
|
- printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
|
|
- v = apic_read(APIC_TDCR);
|
|
- printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
|
|
- printk("\n");
|
|
-}
|
|
-
|
|
-void print_all_local_APICs(void)
|
|
-{
|
|
- on_each_cpu(print_local_APIC, NULL, 1);
|
|
-}
|
|
-
|
|
-void /*__init*/ print_PIC(void)
|
|
-{
|
|
- unsigned int v;
|
|
- unsigned long flags;
|
|
-
|
|
- if (apic_verbosity == APIC_QUIET)
|
|
- return;
|
|
-
|
|
- printk(KERN_DEBUG "\nprinting PIC contents\n");
|
|
-
|
|
- spin_lock_irqsave(&i8259A_lock, flags);
|
|
-
|
|
- v = inb(0xa1) << 8 | inb(0x21);
|
|
- printk(KERN_DEBUG "... PIC IMR: %04x\n", v);
|
|
-
|
|
- v = inb(0xa0) << 8 | inb(0x20);
|
|
- printk(KERN_DEBUG "... PIC IRR: %04x\n", v);
|
|
-
|
|
- outb(0x0b, 0xa0);
|
|
- outb(0x0b, 0x20);
|
|
- v = inb(0xa0) << 8 | inb(0x20);
|
|
- outb(0x0a, 0xa0);
|
|
- outb(0x0a, 0x20);
|
|
-
|
|
- spin_unlock_irqrestore(&i8259A_lock, flags);
|
|
-
|
|
- printk(KERN_DEBUG "... PIC ISR: %04x\n", v);
|
|
-
|
|
- v = inb(0x4d1) << 8 | inb(0x4d0);
|
|
- printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
|
|
-}
|
|
-#else
|
|
-void __init print_IO_APIC(void) {}
|
|
-#endif /* !CONFIG_XEN */
|
|
-
|
|
-static void __init enable_IO_APIC(void)
|
|
-{
|
|
- union IO_APIC_reg_01 reg_01;
|
|
-#ifndef CONFIG_XEN
|
|
- int i8259_apic, i8259_pin;
|
|
-#endif
|
|
- int i, apic;
|
|
- unsigned long flags;
|
|
-
|
|
- for (i = 0; i < PIN_MAP_SIZE; i++) {
|
|
- irq_2_pin[i].pin = -1;
|
|
- irq_2_pin[i].next = 0;
|
|
- }
|
|
- if (!pirqs_enabled)
|
|
- for (i = 0; i < MAX_PIRQS; i++)
|
|
- pirq_entries[i] = -1;
|
|
-
|
|
- /*
|
|
- * The number of IO-APIC IRQ registers (== #pins):
|
|
- */
|
|
- for (apic = 0; apic < nr_ioapics; apic++) {
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- reg_01.raw = io_apic_read(apic, 1);
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
- nr_ioapic_registers[apic] = reg_01.bits.entries+1;
|
|
- }
|
|
-#ifndef CONFIG_XEN
|
|
- for (apic = 0; apic < nr_ioapics; apic++) {
|
|
- int pin;
|
|
- /* See if any of the pins is in ExtINT mode */
|
|
- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
|
|
- struct IO_APIC_route_entry entry;
|
|
- entry = ioapic_read_entry(apic, pin);
|
|
-
|
|
-
|
|
- /* If the interrupt line is enabled and in ExtInt mode
|
|
- * I have found the pin where the i8259 is connected.
|
|
- */
|
|
- if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) {
|
|
- ioapic_i8259.apic = apic;
|
|
- ioapic_i8259.pin = pin;
|
|
- goto found_i8259;
|
|
- }
|
|
- }
|
|
- }
|
|
- found_i8259:
|
|
- /* Look to see what if the MP table has reported the ExtINT */
|
|
- /* If we could not find the appropriate pin by looking at the ioapic
|
|
- * the i8259 probably is not connected the ioapic but give the
|
|
- * mptable a chance anyway.
|
|
- */
|
|
- i8259_pin = find_isa_irq_pin(0, mp_ExtINT);
|
|
- i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
|
|
- /* Trust the MP table if nothing is setup in the hardware */
|
|
- if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) {
|
|
- printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n");
|
|
- ioapic_i8259.pin = i8259_pin;
|
|
- ioapic_i8259.apic = i8259_apic;
|
|
- }
|
|
- /* Complain if the MP table and the hardware disagree */
|
|
- if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) &&
|
|
- (i8259_pin >= 0) && (ioapic_i8259.pin >= 0))
|
|
- {
|
|
- printk(KERN_WARNING "ExtINT in hardware and MP table differ\n");
|
|
- }
|
|
-#endif
|
|
-
|
|
- /*
|
|
- * Do not trust the IO-APIC being empty at bootup
|
|
- */
|
|
- clear_IO_APIC();
|
|
-}
|
|
-
|
|
-/*
|
|
- * Not an __init, needed by the reboot code
|
|
- */
|
|
-void disable_IO_APIC(void)
|
|
-{
|
|
- /*
|
|
- * Clear the IO-APIC before rebooting:
|
|
- */
|
|
- clear_IO_APIC();
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
- /*
|
|
- * If the i8259 is routed through an IOAPIC
|
|
- * Put that IOAPIC in virtual wire mode
|
|
- * so legacy interrupts can be delivered.
|
|
- */
|
|
- if (ioapic_i8259.pin != -1) {
|
|
- struct IO_APIC_route_entry entry;
|
|
-
|
|
- memset(&entry, 0, sizeof(entry));
|
|
- entry.mask = 0; /* Enabled */
|
|
- entry.trigger = 0; /* Edge */
|
|
- entry.irr = 0;
|
|
- entry.polarity = 0; /* High */
|
|
- entry.delivery_status = 0;
|
|
- entry.dest_mode = 0; /* Physical */
|
|
- entry.delivery_mode = dest_ExtINT; /* ExtInt */
|
|
- entry.vector = 0;
|
|
- entry.dest.physical.physical_dest =
|
|
- GET_APIC_ID(read_apic_id());
|
|
-
|
|
- /*
|
|
- * Add it to the IO-APIC irq-routing table:
|
|
- */
|
|
- ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
|
|
- }
|
|
- disconnect_bsp_APIC(ioapic_i8259.pin != -1);
|
|
-#endif
|
|
-}
|
|
-
|
|
-/*
|
|
- * function to set the IO-APIC physical IDs based on the
|
|
- * values stored in the MPC table.
|
|
- *
|
|
- * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999
|
|
- */
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
-static void __init setup_ioapic_ids_from_mpc(void)
|
|
-{
|
|
- union IO_APIC_reg_00 reg_00;
|
|
- physid_mask_t phys_id_present_map;
|
|
- int apic;
|
|
- int i;
|
|
- unsigned char old_id;
|
|
- unsigned long flags;
|
|
-
|
|
-#ifdef CONFIG_X86_NUMAQ
|
|
- if (found_numaq)
|
|
- return;
|
|
-#endif
|
|
-
|
|
- /*
|
|
- * Don't check I/O APIC IDs for xAPIC systems. They have
|
|
- * no meaning without the serial APIC bus.
|
|
- */
|
|
- if (!(boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
|
|
- || APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
|
|
- return;
|
|
- /*
|
|
- * This is broken; anything with a real cpu count has to
|
|
- * circumvent this idiocy regardless.
|
|
- */
|
|
- phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map);
|
|
-
|
|
- /*
|
|
- * Set the IOAPIC ID to the value stored in the MPC table.
|
|
- */
|
|
- for (apic = 0; apic < nr_ioapics; apic++) {
|
|
-
|
|
- /* Read the register 0 value */
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- reg_00.raw = io_apic_read(apic, 0);
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
-
|
|
- old_id = mp_ioapics[apic].mp_apicid;
|
|
-
|
|
- if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) {
|
|
- printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
|
|
- apic, mp_ioapics[apic].mp_apicid);
|
|
- printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
|
|
- reg_00.bits.ID);
|
|
- mp_ioapics[apic].mp_apicid = reg_00.bits.ID;
|
|
- }
|
|
-
|
|
- /*
|
|
- * Sanity check, is the ID really free? Every APIC in a
|
|
- * system must have a unique ID or we get lots of nice
|
|
- * 'stuck on smp_invalidate_needed IPI wait' messages.
|
|
- */
|
|
- if (check_apicid_used(phys_id_present_map,
|
|
- mp_ioapics[apic].mp_apicid)) {
|
|
- printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
|
|
- apic, mp_ioapics[apic].mp_apicid);
|
|
- for (i = 0; i < get_physical_broadcast(); i++)
|
|
- if (!physid_isset(i, phys_id_present_map))
|
|
- break;
|
|
- if (i >= get_physical_broadcast())
|
|
- panic("Max APIC ID exceeded!\n");
|
|
- printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
|
|
- i);
|
|
- physid_set(i, phys_id_present_map);
|
|
- mp_ioapics[apic].mp_apicid = i;
|
|
- } else {
|
|
- physid_mask_t tmp;
|
|
- tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid);
|
|
- apic_printk(APIC_VERBOSE, "Setting %d in the "
|
|
- "phys_id_present_map\n",
|
|
- mp_ioapics[apic].mp_apicid);
|
|
- physids_or(phys_id_present_map, phys_id_present_map, tmp);
|
|
- }
|
|
-
|
|
-
|
|
- /*
|
|
- * We need to adjust the IRQ routing table
|
|
- * if the ID changed.
|
|
- */
|
|
- if (old_id != mp_ioapics[apic].mp_apicid)
|
|
- for (i = 0; i < mp_irq_entries; i++)
|
|
- if (mp_irqs[i].mp_dstapic == old_id)
|
|
- mp_irqs[i].mp_dstapic
|
|
- = mp_ioapics[apic].mp_apicid;
|
|
-
|
|
- /*
|
|
- * Read the right value from the MPC table and
|
|
- * write it into the ID register.
|
|
- */
|
|
- apic_printk(APIC_VERBOSE, KERN_INFO
|
|
- "...changing IO-APIC physical APIC ID to %d ...",
|
|
- mp_ioapics[apic].mp_apicid);
|
|
-
|
|
- reg_00.bits.ID = mp_ioapics[apic].mp_apicid;
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- io_apic_write(apic, 0, reg_00.raw);
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
-
|
|
- /*
|
|
- * Sanity check
|
|
- */
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- reg_00.raw = io_apic_read(apic, 0);
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
- if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid)
|
|
- printk("could not set ID!\n");
|
|
- else
|
|
- apic_printk(APIC_VERBOSE, " ok.\n");
|
|
- }
|
|
-}
|
|
-
|
|
-int no_timer_check __initdata;
|
|
-
|
|
-static int __init notimercheck(char *s)
|
|
-{
|
|
- no_timer_check = 1;
|
|
- return 1;
|
|
-}
|
|
-__setup("no_timer_check", notimercheck);
|
|
-
|
|
-/*
|
|
- * There is a nasty bug in some older SMP boards, their mptable lies
|
|
- * about the timer IRQ. We do the following to work around the situation:
|
|
- *
|
|
- * - timer IRQ defaults to IO-APIC IRQ
|
|
- * - if this function detects that timer IRQs are defunct, then we fall
|
|
- * back to ISA timer IRQs
|
|
- */
|
|
-static int __init timer_irq_works(void)
|
|
-{
|
|
- unsigned long t1 = jiffies;
|
|
- unsigned long flags;
|
|
-
|
|
- if (no_timer_check)
|
|
- return 1;
|
|
-
|
|
- local_save_flags(flags);
|
|
- local_irq_enable();
|
|
- /* Let ten ticks pass... */
|
|
- mdelay((10 * 1000) / HZ);
|
|
- local_irq_restore(flags);
|
|
-
|
|
- /*
|
|
- * Expect a few ticks at least, to be sure some possible
|
|
- * glue logic does not lock up after one or two first
|
|
- * ticks in a non-ExtINT mode. Also the local APIC
|
|
- * might have cached one ExtINT interrupt. Finally, at
|
|
- * least one tick may be lost due to delays.
|
|
- */
|
|
- if (time_after(jiffies, t1 + 4))
|
|
- return 1;
|
|
-
|
|
- return 0;
|
|
-}
|
|
-
|
|
-/*
|
|
- * In the SMP+IOAPIC case it might happen that there are an unspecified
|
|
- * number of pending IRQ events unhandled. These cases are very rare,
|
|
- * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
|
|
- * better to do it this way as thus we do not have to be aware of
|
|
- * 'pending' interrupts in the IRQ path, except at this point.
|
|
- */
|
|
-/*
|
|
- * Edge triggered needs to resend any interrupt
|
|
- * that was delayed but this is now handled in the device
|
|
- * independent code.
|
|
- */
|
|
-
|
|
-/*
|
|
- * Startup quirk:
|
|
- *
|
|
- * Starting up a edge-triggered IO-APIC interrupt is
|
|
- * nasty - we need to make sure that we get the edge.
|
|
- * If it is already asserted for some reason, we need
|
|
- * return 1 to indicate that is was pending.
|
|
- *
|
|
- * This is not complete - we should be able to fake
|
|
- * an edge even if it isn't on the 8259A...
|
|
- *
|
|
- * (We do this for level-triggered IRQs too - it cannot hurt.)
|
|
- */
|
|
-static unsigned int startup_ioapic_irq(unsigned int irq)
|
|
-{
|
|
- int was_pending = 0;
|
|
- unsigned long flags;
|
|
-
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- if (irq < 16) {
|
|
- disable_8259A_irq(irq);
|
|
- if (i8259A_irq_pending(irq))
|
|
- was_pending = 1;
|
|
- }
|
|
- __unmask_IO_APIC_irq(irq);
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
-
|
|
- return was_pending;
|
|
-}
|
|
-
|
|
-static void ack_ioapic_irq(unsigned int irq)
|
|
-{
|
|
- move_native_irq(irq);
|
|
- ack_APIC_irq();
|
|
-}
|
|
-
|
|
-static void ack_ioapic_quirk_irq(unsigned int irq)
|
|
-{
|
|
- unsigned long v;
|
|
- int i;
|
|
-
|
|
- move_native_irq(irq);
|
|
-/*
|
|
- * It appears there is an erratum which affects at least version 0x11
|
|
- * of I/O APIC (that's the 82093AA and cores integrated into various
|
|
- * chipsets). Under certain conditions a level-triggered interrupt is
|
|
- * erroneously delivered as edge-triggered one but the respective IRR
|
|
- * bit gets set nevertheless. As a result the I/O unit expects an EOI
|
|
- * message but it will never arrive and further interrupts are blocked
|
|
- * from the source. The exact reason is so far unknown, but the
|
|
- * phenomenon was observed when two consecutive interrupt requests
|
|
- * from a given source get delivered to the same CPU and the source is
|
|
- * temporarily disabled in between.
|
|
- *
|
|
- * A workaround is to simulate an EOI message manually. We achieve it
|
|
- * by setting the trigger mode to edge and then to level when the edge
|
|
- * trigger mode gets detected in the TMR of a local APIC for a
|
|
- * level-triggered interrupt. We mask the source for the time of the
|
|
- * operation to prevent an edge-triggered interrupt escaping meanwhile.
|
|
- * The idea is from Manfred Spraul. --macro
|
|
- */
|
|
- i = irq_vector[irq];
|
|
-
|
|
- v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
|
|
-
|
|
- ack_APIC_irq();
|
|
-
|
|
- if (!(v & (1 << (i & 0x1f)))) {
|
|
- atomic_inc(&irq_mis_count);
|
|
- spin_lock(&ioapic_lock);
|
|
- __mask_and_edge_IO_APIC_irq(irq);
|
|
- __unmask_and_level_IO_APIC_irq(irq);
|
|
- spin_unlock(&ioapic_lock);
|
|
- }
|
|
-}
|
|
-
|
|
-static int ioapic_retrigger_irq(unsigned int irq)
|
|
-{
|
|
- send_IPI_self(irq_vector[irq]);
|
|
-
|
|
- return 1;
|
|
-}
|
|
-
|
|
-static struct irq_chip ioapic_chip __read_mostly = {
|
|
- .name = "IO-APIC",
|
|
- .startup = startup_ioapic_irq,
|
|
- .mask = mask_IO_APIC_irq,
|
|
- .unmask = unmask_IO_APIC_irq,
|
|
- .ack = ack_ioapic_irq,
|
|
- .eoi = ack_ioapic_quirk_irq,
|
|
-#ifdef CONFIG_SMP
|
|
- .set_affinity = set_ioapic_affinity_irq,
|
|
-#endif
|
|
- .retrigger = ioapic_retrigger_irq,
|
|
-};
|
|
-#endif /* !CONFIG_XEN */
|
|
-
|
|
-static inline void init_IO_APIC_traps(void)
|
|
-{
|
|
- int irq;
|
|
-
|
|
- /*
|
|
- * NOTE! The local APIC isn't very good at handling
|
|
- * multiple interrupts at the same interrupt level.
|
|
- * As the interrupt level is determined by taking the
|
|
- * vector number and shifting that right by 4, we
|
|
- * want to spread these out a bit so that they don't
|
|
- * all fall in the same interrupt level.
|
|
- *
|
|
- * Also, we've got to be careful not to trash gate
|
|
- * 0x80, because int 0x80 is hm, kind of importantish. ;)
|
|
- */
|
|
- for (irq = 0; irq < NR_IRQS ; irq++) {
|
|
- if (IO_APIC_IRQ(irq) && !irq_vector[irq]) {
|
|
- /*
|
|
- * Hmm.. We don't have an entry for this,
|
|
- * so default to an old-fashioned 8259
|
|
- * interrupt if we can..
|
|
- */
|
|
- if (irq < 16)
|
|
- make_8259A_irq(irq);
|
|
-#ifndef CONFIG_XEN
|
|
- else
|
|
- /* Strange. Oh, well.. */
|
|
- irq_desc[irq].chip = &no_irq_chip;
|
|
-#endif
|
|
- }
|
|
- }
|
|
-}
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
-/*
|
|
- * The local APIC irq-chip implementation:
|
|
- */
|
|
-
|
|
-static void ack_lapic_irq(unsigned int irq)
|
|
-{
|
|
- ack_APIC_irq();
|
|
-}
|
|
-
|
|
-static void mask_lapic_irq(unsigned int irq)
|
|
-{
|
|
- unsigned long v;
|
|
-
|
|
- v = apic_read(APIC_LVT0);
|
|
- apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
|
|
-}
|
|
-
|
|
-static void unmask_lapic_irq(unsigned int irq)
|
|
-{
|
|
- unsigned long v;
|
|
-
|
|
- v = apic_read(APIC_LVT0);
|
|
- apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
|
|
-}
|
|
-
|
|
-static struct irq_chip lapic_chip __read_mostly = {
|
|
- .name = "local-APIC",
|
|
- .mask = mask_lapic_irq,
|
|
- .unmask = unmask_lapic_irq,
|
|
- .ack = ack_lapic_irq,
|
|
-};
|
|
-
|
|
-static void lapic_register_intr(int irq, int vector)
|
|
-{
|
|
- irq_desc[irq].status &= ~IRQ_LEVEL;
|
|
- set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
|
|
- "edge");
|
|
- set_intr_gate(vector, interrupt[irq]);
|
|
-}
|
|
-
|
|
-static void __init setup_nmi(void)
|
|
-{
|
|
- /*
|
|
- * Dirty trick to enable the NMI watchdog ...
|
|
- * We put the 8259A master into AEOI mode and
|
|
- * unmask on all local APICs LVT0 as NMI.
|
|
- *
|
|
- * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
|
|
- * is from Maciej W. Rozycki - so we do not have to EOI from
|
|
- * the NMI handler or the timer interrupt.
|
|
- */
|
|
- apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
|
|
-
|
|
- enable_NMI_through_LVT0();
|
|
-
|
|
- apic_printk(APIC_VERBOSE, " done.\n");
|
|
-}
|
|
-
|
|
-/*
|
|
- * This looks a bit hackish but it's about the only one way of sending
|
|
- * a few INTA cycles to 8259As and any associated glue logic. ICR does
|
|
- * not support the ExtINT mode, unfortunately. We need to send these
|
|
- * cycles as some i82489DX-based boards have glue logic that keeps the
|
|
- * 8259A interrupt line asserted until INTA. --macro
|
|
- */
|
|
-static inline void __init unlock_ExtINT_logic(void)
|
|
-{
|
|
- int apic, pin, i;
|
|
- struct IO_APIC_route_entry entry0, entry1;
|
|
- unsigned char save_control, save_freq_select;
|
|
-
|
|
- pin = find_isa_irq_pin(8, mp_INT);
|
|
- if (pin == -1) {
|
|
- WARN_ON_ONCE(1);
|
|
- return;
|
|
- }
|
|
- apic = find_isa_irq_apic(8, mp_INT);
|
|
- if (apic == -1) {
|
|
- WARN_ON_ONCE(1);
|
|
- return;
|
|
- }
|
|
-
|
|
- entry0 = ioapic_read_entry(apic, pin);
|
|
- clear_IO_APIC_pin(apic, pin);
|
|
-
|
|
- memset(&entry1, 0, sizeof(entry1));
|
|
-
|
|
- entry1.dest_mode = 0; /* physical delivery */
|
|
- entry1.mask = 0; /* unmask IRQ now */
|
|
- entry1.dest.physical.physical_dest = hard_smp_processor_id();
|
|
- entry1.delivery_mode = dest_ExtINT;
|
|
- entry1.polarity = entry0.polarity;
|
|
- entry1.trigger = 0;
|
|
- entry1.vector = 0;
|
|
-
|
|
- ioapic_write_entry(apic, pin, entry1);
|
|
-
|
|
- save_control = CMOS_READ(RTC_CONTROL);
|
|
- save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
|
|
- CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
|
|
- RTC_FREQ_SELECT);
|
|
- CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
|
|
-
|
|
- i = 100;
|
|
- while (i-- > 0) {
|
|
- mdelay(10);
|
|
- if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
|
|
- i -= 10;
|
|
- }
|
|
-
|
|
- CMOS_WRITE(save_control, RTC_CONTROL);
|
|
- CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
|
|
- clear_IO_APIC_pin(apic, pin);
|
|
-
|
|
- ioapic_write_entry(apic, pin, entry0);
|
|
-}
|
|
-
|
|
-/*
|
|
- * This code may look a bit paranoid, but it's supposed to cooperate with
|
|
- * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
|
|
- * is so screwy. Thanks to Brian Perkins for testing/hacking this beast
|
|
- * fanatically on his truly buggy board.
|
|
- */
|
|
-static inline void __init check_timer(void)
|
|
-{
|
|
- int apic1, pin1, apic2, pin2;
|
|
- int no_pin1 = 0;
|
|
- int vector;
|
|
- unsigned int ver;
|
|
- unsigned long flags;
|
|
-
|
|
- local_irq_save(flags);
|
|
-
|
|
- ver = apic_read(APIC_LVR);
|
|
- ver = GET_APIC_VERSION(ver);
|
|
-
|
|
- /*
|
|
- * get/set the timer IRQ vector:
|
|
- */
|
|
- disable_8259A_irq(0);
|
|
- vector = assign_irq_vector(0);
|
|
- set_intr_gate(vector, interrupt[0]);
|
|
-
|
|
- /*
|
|
- * As IRQ0 is to be enabled in the 8259A, the virtual
|
|
- * wire has to be disabled in the local APIC. Also
|
|
- * timer interrupts need to be acknowledged manually in
|
|
- * the 8259A for the i82489DX when using the NMI
|
|
- * watchdog as that APIC treats NMIs as level-triggered.
|
|
- * The AEOI mode will finish them in the 8259A
|
|
- * automatically.
|
|
- */
|
|
- apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
|
|
- init_8259A(1);
|
|
- timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
|
|
-
|
|
- pin1 = find_isa_irq_pin(0, mp_INT);
|
|
- apic1 = find_isa_irq_apic(0, mp_INT);
|
|
- pin2 = ioapic_i8259.pin;
|
|
- apic2 = ioapic_i8259.apic;
|
|
-
|
|
- apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X "
|
|
- "apic1=%d pin1=%d apic2=%d pin2=%d\n",
|
|
- vector, apic1, pin1, apic2, pin2);
|
|
-
|
|
- /*
|
|
- * Some BIOS writers are clueless and report the ExtINTA
|
|
- * I/O APIC input from the cascaded 8259A as the timer
|
|
- * interrupt input. So just in case, if only one pin
|
|
- * was found above, try it both directly and through the
|
|
- * 8259A.
|
|
- */
|
|
- if (pin1 == -1) {
|
|
- pin1 = pin2;
|
|
- apic1 = apic2;
|
|
- no_pin1 = 1;
|
|
- } else if (pin2 == -1) {
|
|
- pin2 = pin1;
|
|
- apic2 = apic1;
|
|
- }
|
|
-
|
|
- if (pin1 != -1) {
|
|
- /*
|
|
- * Ok, does IRQ0 through the IOAPIC work?
|
|
- */
|
|
- if (no_pin1) {
|
|
- add_pin_to_irq(0, apic1, pin1);
|
|
- setup_timer_IRQ0_pin(apic1, pin1, vector);
|
|
- }
|
|
- unmask_IO_APIC_irq(0);
|
|
- if (timer_irq_works()) {
|
|
- if (nmi_watchdog == NMI_IO_APIC) {
|
|
- setup_nmi();
|
|
- enable_8259A_irq(0);
|
|
- }
|
|
- if (disable_timer_pin_1 > 0)
|
|
- clear_IO_APIC_pin(0, pin1);
|
|
- goto out;
|
|
- }
|
|
- clear_IO_APIC_pin(apic1, pin1);
|
|
- if (!no_pin1)
|
|
- apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
|
|
- "8254 timer not connected to IO-APIC\n");
|
|
-
|
|
- apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer "
|
|
- "(IRQ0) through the 8259A ...\n");
|
|
- apic_printk(APIC_QUIET, KERN_INFO
|
|
- "..... (found apic %d pin %d) ...\n", apic2, pin2);
|
|
- /*
|
|
- * legacy devices should be connected to IO APIC #0
|
|
- */
|
|
- replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
|
|
- setup_timer_IRQ0_pin(apic2, pin2, vector);
|
|
- unmask_IO_APIC_irq(0);
|
|
- enable_8259A_irq(0);
|
|
- if (timer_irq_works()) {
|
|
- apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
|
|
- timer_through_8259 = 1;
|
|
- if (nmi_watchdog == NMI_IO_APIC) {
|
|
- disable_8259A_irq(0);
|
|
- setup_nmi();
|
|
- enable_8259A_irq(0);
|
|
- }
|
|
- goto out;
|
|
- }
|
|
- /*
|
|
- * Cleanup, just in case ...
|
|
- */
|
|
- disable_8259A_irq(0);
|
|
- clear_IO_APIC_pin(apic2, pin2);
|
|
- apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
|
|
- }
|
|
-
|
|
- if (nmi_watchdog == NMI_IO_APIC) {
|
|
- apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work "
|
|
- "through the IO-APIC - disabling NMI Watchdog!\n");
|
|
- nmi_watchdog = NMI_NONE;
|
|
- }
|
|
- timer_ack = 0;
|
|
-
|
|
- apic_printk(APIC_QUIET, KERN_INFO
|
|
- "...trying to set up timer as Virtual Wire IRQ...\n");
|
|
-
|
|
- lapic_register_intr(0, vector);
|
|
- apic_write(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */
|
|
- enable_8259A_irq(0);
|
|
-
|
|
- if (timer_irq_works()) {
|
|
- apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
|
|
- goto out;
|
|
- }
|
|
- disable_8259A_irq(0);
|
|
- apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
|
|
- apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
|
|
-
|
|
- apic_printk(APIC_QUIET, KERN_INFO
|
|
- "...trying to set up timer as ExtINT IRQ...\n");
|
|
-
|
|
- init_8259A(0);
|
|
- make_8259A_irq(0);
|
|
- apic_write(APIC_LVT0, APIC_DM_EXTINT);
|
|
-
|
|
- unlock_ExtINT_logic();
|
|
-
|
|
- if (timer_irq_works()) {
|
|
- apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
|
|
- goto out;
|
|
- }
|
|
- apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
|
|
- panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a "
|
|
- "report. Then try booting with the 'noapic' option.\n");
|
|
-out:
|
|
- local_irq_restore(flags);
|
|
-}
|
|
-#else
|
|
-int timer_uses_ioapic_pin_0 = 0;
|
|
-#define check_timer() ((void)0)
|
|
-#endif
|
|
-
|
|
-/*
|
|
- * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
|
|
- * to devices. However there may be an I/O APIC pin available for
|
|
- * this interrupt regardless. The pin may be left unconnected, but
|
|
- * typically it will be reused as an ExtINT cascade interrupt for
|
|
- * the master 8259A. In the MPS case such a pin will normally be
|
|
- * reported as an ExtINT interrupt in the MP table. With ACPI
|
|
- * there is no provision for ExtINT interrupts, and in the absence
|
|
- * of an override it would be treated as an ordinary ISA I/O APIC
|
|
- * interrupt, that is edge-triggered and unmasked by default. We
|
|
- * used to do this, but it caused problems on some systems because
|
|
- * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using
|
|
- * the same ExtINT cascade interrupt to drive the local APIC of the
|
|
- * bootstrap processor. Therefore we refrain from routing IRQ2 to
|
|
- * the I/O APIC in all cases now. No actual device should request
|
|
- * it anyway. --macro
|
|
- */
|
|
-#define PIC_IRQS (1 << PIC_CASCADE_IR)
|
|
-
|
|
-void __init setup_IO_APIC(void)
|
|
-{
|
|
-#ifndef CONFIG_XEN
|
|
- int i;
|
|
-
|
|
- /* Reserve all the system vectors. */
|
|
- for (i = first_system_vector; i < NR_VECTORS; i++)
|
|
- set_bit(i, used_vectors);
|
|
-#endif
|
|
-
|
|
- enable_IO_APIC();
|
|
-
|
|
- io_apic_irqs = ~PIC_IRQS;
|
|
-
|
|
- printk("ENABLING IO-APIC IRQs\n");
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
- /*
|
|
- * Set up IO-APIC IRQ routing.
|
|
- */
|
|
- if (!acpi_ioapic)
|
|
- setup_ioapic_ids_from_mpc();
|
|
- sync_Arb_IDs();
|
|
-#endif
|
|
- setup_IO_APIC_irqs();
|
|
- init_IO_APIC_traps();
|
|
- check_timer();
|
|
- if (!acpi_ioapic)
|
|
- print_IO_APIC();
|
|
-}
|
|
-
|
|
-/*
|
|
- * Called after all the initialization is done. If we didnt find any
|
|
- * APIC bugs then we can allow the modify fast path
|
|
- */
|
|
-
|
|
-static int __init io_apic_bug_finalize(void)
|
|
-{
|
|
- if (sis_apic_bug == -1)
|
|
- sis_apic_bug = 0;
|
|
- if (is_initial_xendomain()) {
|
|
- struct xen_platform_op op = { .cmd = XENPF_platform_quirk };
|
|
- op.u.platform_quirk.quirk_id = sis_apic_bug ?
|
|
- QUIRK_IOAPIC_BAD_REGSEL : QUIRK_IOAPIC_GOOD_REGSEL;
|
|
- VOID(HYPERVISOR_platform_op(&op));
|
|
- }
|
|
- return 0;
|
|
-}
|
|
-
|
|
-late_initcall(io_apic_bug_finalize);
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
-
|
|
-struct sysfs_ioapic_data {
|
|
- struct sys_device dev;
|
|
- struct IO_APIC_route_entry entry[0];
|
|
-};
|
|
-static struct sysfs_ioapic_data *mp_ioapic_data[MAX_IO_APICS];
|
|
-
|
|
-static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
|
|
-{
|
|
- struct IO_APIC_route_entry *entry;
|
|
- struct sysfs_ioapic_data *data;
|
|
- int i;
|
|
-
|
|
- data = container_of(dev, struct sysfs_ioapic_data, dev);
|
|
- entry = data->entry;
|
|
- for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
|
|
- entry[i] = ioapic_read_entry(dev->id, i);
|
|
-
|
|
- return 0;
|
|
-}
|
|
-
|
|
-static int ioapic_resume(struct sys_device *dev)
|
|
-{
|
|
- struct IO_APIC_route_entry *entry;
|
|
- struct sysfs_ioapic_data *data;
|
|
- unsigned long flags;
|
|
- union IO_APIC_reg_00 reg_00;
|
|
- int i;
|
|
-
|
|
- data = container_of(dev, struct sysfs_ioapic_data, dev);
|
|
- entry = data->entry;
|
|
-
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- reg_00.raw = io_apic_read(dev->id, 0);
|
|
- if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) {
|
|
- reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid;
|
|
- io_apic_write(dev->id, 0, reg_00.raw);
|
|
- }
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
- for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
|
|
- ioapic_write_entry(dev->id, i, entry[i]);
|
|
-
|
|
- return 0;
|
|
-}
|
|
-
|
|
-static struct sysdev_class ioapic_sysdev_class = {
|
|
- .name = "ioapic",
|
|
- .suspend = ioapic_suspend,
|
|
- .resume = ioapic_resume,
|
|
-};
|
|
-
|
|
-static int __init ioapic_init_sysfs(void)
|
|
-{
|
|
- struct sys_device *dev;
|
|
- int i, size, error = 0;
|
|
-
|
|
- error = sysdev_class_register(&ioapic_sysdev_class);
|
|
- if (error)
|
|
- return error;
|
|
-
|
|
- for (i = 0; i < nr_ioapics; i++) {
|
|
- size = sizeof(struct sys_device) + nr_ioapic_registers[i]
|
|
- * sizeof(struct IO_APIC_route_entry);
|
|
- mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL);
|
|
- if (!mp_ioapic_data[i]) {
|
|
- printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
|
|
- continue;
|
|
- }
|
|
- dev = &mp_ioapic_data[i]->dev;
|
|
- dev->id = i;
|
|
- dev->cls = &ioapic_sysdev_class;
|
|
- error = sysdev_register(dev);
|
|
- if (error) {
|
|
- kfree(mp_ioapic_data[i]);
|
|
- mp_ioapic_data[i] = NULL;
|
|
- printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
|
|
- continue;
|
|
- }
|
|
- }
|
|
-
|
|
- return 0;
|
|
-}
|
|
-
|
|
-device_initcall(ioapic_init_sysfs);
|
|
-
|
|
-/*
|
|
- * Dynamic irq allocate and deallocation
|
|
- */
|
|
-int create_irq(void)
|
|
-{
|
|
- /* Allocate an unused irq */
|
|
- int irq, new, vector = 0;
|
|
- unsigned long flags;
|
|
-
|
|
- irq = -ENOSPC;
|
|
- spin_lock_irqsave(&vector_lock, flags);
|
|
- for (new = (NR_IRQS - 1); new >= 0; new--) {
|
|
- if (platform_legacy_irq(new))
|
|
- continue;
|
|
- if (irq_vector[new] != 0)
|
|
- continue;
|
|
- vector = __assign_irq_vector(new);
|
|
- if (likely(vector > 0))
|
|
- irq = new;
|
|
- break;
|
|
- }
|
|
- spin_unlock_irqrestore(&vector_lock, flags);
|
|
-
|
|
- if (irq >= 0) {
|
|
- set_intr_gate(vector, interrupt[irq]);
|
|
- dynamic_irq_init(irq);
|
|
- }
|
|
- return irq;
|
|
-}
|
|
-
|
|
-void destroy_irq(unsigned int irq)
|
|
-{
|
|
- unsigned long flags;
|
|
-
|
|
- dynamic_irq_cleanup(irq);
|
|
-
|
|
- spin_lock_irqsave(&vector_lock, flags);
|
|
- clear_bit(irq_vector[irq], used_vectors);
|
|
- irq_vector[irq] = 0;
|
|
- spin_unlock_irqrestore(&vector_lock, flags);
|
|
-}
|
|
-
|
|
-#endif /* CONFIG_XEN */
|
|
-
|
|
-/*
|
|
- * MSI message composition
|
|
- */
|
|
-#if defined(CONFIG_PCI_MSI) && !defined(CONFIG_XEN)
|
|
-static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
|
|
-{
|
|
- int vector;
|
|
- unsigned dest;
|
|
-
|
|
- vector = assign_irq_vector(irq);
|
|
- if (vector >= 0) {
|
|
- dest = cpu_mask_to_apicid(TARGET_CPUS);
|
|
-
|
|
- msg->address_hi = MSI_ADDR_BASE_HI;
|
|
- msg->address_lo =
|
|
- MSI_ADDR_BASE_LO |
|
|
- ((INT_DEST_MODE == 0) ?
|
|
-MSI_ADDR_DEST_MODE_PHYSICAL:
|
|
- MSI_ADDR_DEST_MODE_LOGICAL) |
|
|
- ((INT_DELIVERY_MODE != dest_LowestPrio) ?
|
|
- MSI_ADDR_REDIRECTION_CPU:
|
|
- MSI_ADDR_REDIRECTION_LOWPRI) |
|
|
- MSI_ADDR_DEST_ID(dest);
|
|
-
|
|
- msg->data =
|
|
- MSI_DATA_TRIGGER_EDGE |
|
|
- MSI_DATA_LEVEL_ASSERT |
|
|
- ((INT_DELIVERY_MODE != dest_LowestPrio) ?
|
|
-MSI_DATA_DELIVERY_FIXED:
|
|
- MSI_DATA_DELIVERY_LOWPRI) |
|
|
- MSI_DATA_VECTOR(vector);
|
|
- }
|
|
- return vector;
|
|
-}
|
|
-
|
|
-#ifdef CONFIG_SMP
|
|
-static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
|
|
-{
|
|
- struct msi_msg msg;
|
|
- unsigned int dest;
|
|
- cpumask_t tmp;
|
|
- int vector;
|
|
-
|
|
- cpus_and(tmp, mask, cpu_online_map);
|
|
- if (cpus_empty(tmp))
|
|
- tmp = TARGET_CPUS;
|
|
-
|
|
- vector = assign_irq_vector(irq);
|
|
- if (vector < 0)
|
|
- return;
|
|
-
|
|
- dest = cpu_mask_to_apicid(mask);
|
|
-
|
|
- read_msi_msg(irq, &msg);
|
|
-
|
|
- msg.data &= ~MSI_DATA_VECTOR_MASK;
|
|
- msg.data |= MSI_DATA_VECTOR(vector);
|
|
- msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
|
|
- msg.address_lo |= MSI_ADDR_DEST_ID(dest);
|
|
-
|
|
- write_msi_msg(irq, &msg);
|
|
- irq_desc[irq].affinity = mask;
|
|
-}
|
|
-#endif /* CONFIG_SMP */
|
|
-
|
|
-/*
|
|
- * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
|
|
- * which implement the MSI or MSI-X Capability Structure.
|
|
- */
|
|
-static struct irq_chip msi_chip = {
|
|
- .name = "PCI-MSI",
|
|
- .unmask = unmask_msi_irq,
|
|
- .mask = mask_msi_irq,
|
|
- .ack = ack_ioapic_irq,
|
|
-#ifdef CONFIG_SMP
|
|
- .set_affinity = set_msi_irq_affinity,
|
|
-#endif
|
|
- .retrigger = ioapic_retrigger_irq,
|
|
-};
|
|
-
|
|
-int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
|
|
-{
|
|
- struct msi_msg msg;
|
|
- int irq, ret;
|
|
- irq = create_irq();
|
|
- if (irq < 0)
|
|
- return irq;
|
|
-
|
|
- ret = msi_compose_msg(dev, irq, &msg);
|
|
- if (ret < 0) {
|
|
- destroy_irq(irq);
|
|
- return ret;
|
|
- }
|
|
-
|
|
- set_irq_msi(irq, desc);
|
|
- write_msi_msg(irq, &msg);
|
|
-
|
|
- set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq,
|
|
- "edge");
|
|
-
|
|
- return 0;
|
|
-}
|
|
-
|
|
-void arch_teardown_msi_irq(unsigned int irq)
|
|
-{
|
|
- destroy_irq(irq);
|
|
-}
|
|
-
|
|
-#endif /* CONFIG_PCI_MSI */
|
|
-
|
|
-/*
|
|
- * Hypertransport interrupt support
|
|
- */
|
|
-#ifdef CONFIG_HT_IRQ
|
|
-
|
|
-#ifdef CONFIG_SMP
|
|
-
|
|
-static void target_ht_irq(unsigned int irq, unsigned int dest)
|
|
-{
|
|
- struct ht_irq_msg msg;
|
|
- fetch_ht_irq_msg(irq, &msg);
|
|
-
|
|
- msg.address_lo &= ~(HT_IRQ_LOW_DEST_ID_MASK);
|
|
- msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
|
|
-
|
|
- msg.address_lo |= HT_IRQ_LOW_DEST_ID(dest);
|
|
- msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
|
|
-
|
|
- write_ht_irq_msg(irq, &msg);
|
|
-}
|
|
-
|
|
-static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
|
|
-{
|
|
- unsigned int dest;
|
|
- cpumask_t tmp;
|
|
-
|
|
- cpus_and(tmp, mask, cpu_online_map);
|
|
- if (cpus_empty(tmp))
|
|
- tmp = TARGET_CPUS;
|
|
-
|
|
- cpus_and(mask, tmp, CPU_MASK_ALL);
|
|
-
|
|
- dest = cpu_mask_to_apicid(mask);
|
|
-
|
|
- target_ht_irq(irq, dest);
|
|
- irq_desc[irq].affinity = mask;
|
|
-}
|
|
-#endif
|
|
-
|
|
-static struct irq_chip ht_irq_chip = {
|
|
- .name = "PCI-HT",
|
|
- .mask = mask_ht_irq,
|
|
- .unmask = unmask_ht_irq,
|
|
- .ack = ack_ioapic_irq,
|
|
-#ifdef CONFIG_SMP
|
|
- .set_affinity = set_ht_irq_affinity,
|
|
-#endif
|
|
- .retrigger = ioapic_retrigger_irq,
|
|
-};
|
|
-
|
|
-int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
|
|
-{
|
|
- int vector;
|
|
-
|
|
- vector = assign_irq_vector(irq);
|
|
- if (vector >= 0) {
|
|
- struct ht_irq_msg msg;
|
|
- unsigned dest;
|
|
- cpumask_t tmp;
|
|
-
|
|
- cpus_clear(tmp);
|
|
- cpu_set(vector >> 8, tmp);
|
|
- dest = cpu_mask_to_apicid(tmp);
|
|
-
|
|
- msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
|
|
-
|
|
- msg.address_lo =
|
|
- HT_IRQ_LOW_BASE |
|
|
- HT_IRQ_LOW_DEST_ID(dest) |
|
|
- HT_IRQ_LOW_VECTOR(vector) |
|
|
- ((INT_DEST_MODE == 0) ?
|
|
- HT_IRQ_LOW_DM_PHYSICAL :
|
|
- HT_IRQ_LOW_DM_LOGICAL) |
|
|
- HT_IRQ_LOW_RQEOI_EDGE |
|
|
- ((INT_DELIVERY_MODE != dest_LowestPrio) ?
|
|
- HT_IRQ_LOW_MT_FIXED :
|
|
- HT_IRQ_LOW_MT_ARBITRATED) |
|
|
- HT_IRQ_LOW_IRQ_MASKED;
|
|
-
|
|
- write_ht_irq_msg(irq, &msg);
|
|
-
|
|
- set_irq_chip_and_handler_name(irq, &ht_irq_chip,
|
|
- handle_edge_irq, "edge");
|
|
- }
|
|
- return vector;
|
|
-}
|
|
-#endif /* CONFIG_HT_IRQ */
|
|
-
|
|
-/* --------------------------------------------------------------------------
|
|
- ACPI-based IOAPIC Configuration
|
|
- -------------------------------------------------------------------------- */
|
|
-
|
|
-#ifdef CONFIG_ACPI
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
-int __init io_apic_get_unique_id(int ioapic, int apic_id)
|
|
-{
|
|
- union IO_APIC_reg_00 reg_00;
|
|
- static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
|
|
- physid_mask_t tmp;
|
|
- unsigned long flags;
|
|
- int i = 0;
|
|
-
|
|
- /*
|
|
- * The P4 platform supports up to 256 APIC IDs on two separate APIC
|
|
- * buses (one for LAPICs, one for IOAPICs), where predecessors only
|
|
- * supports up to 16 on one shared APIC bus.
|
|
- *
|
|
- * TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
|
|
- * advantage of new APIC bus architecture.
|
|
- */
|
|
-
|
|
- if (physids_empty(apic_id_map))
|
|
- apic_id_map = ioapic_phys_id_map(phys_cpu_present_map);
|
|
-
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- reg_00.raw = io_apic_read(ioapic, 0);
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
-
|
|
- if (apic_id >= get_physical_broadcast()) {
|
|
- printk(KERN_WARNING "IOAPIC[%d]: Invalid apic_id %d, trying "
|
|
- "%d\n", ioapic, apic_id, reg_00.bits.ID);
|
|
- apic_id = reg_00.bits.ID;
|
|
- }
|
|
-
|
|
- /*
|
|
- * Every APIC in a system must have a unique ID or we get lots of nice
|
|
- * 'stuck on smp_invalidate_needed IPI wait' messages.
|
|
- */
|
|
- if (check_apicid_used(apic_id_map, apic_id)) {
|
|
-
|
|
- for (i = 0; i < get_physical_broadcast(); i++) {
|
|
- if (!check_apicid_used(apic_id_map, i))
|
|
- break;
|
|
- }
|
|
-
|
|
- if (i == get_physical_broadcast())
|
|
- panic("Max apic_id exceeded!\n");
|
|
-
|
|
- printk(KERN_WARNING "IOAPIC[%d]: apic_id %d already used, "
|
|
- "trying %d\n", ioapic, apic_id, i);
|
|
-
|
|
- apic_id = i;
|
|
- }
|
|
-
|
|
- tmp = apicid_to_cpu_present(apic_id);
|
|
- physids_or(apic_id_map, apic_id_map, tmp);
|
|
-
|
|
- if (reg_00.bits.ID != apic_id) {
|
|
- reg_00.bits.ID = apic_id;
|
|
-
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- io_apic_write(ioapic, 0, reg_00.raw);
|
|
- reg_00.raw = io_apic_read(ioapic, 0);
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
-
|
|
- /* Sanity check */
|
|
- if (reg_00.bits.ID != apic_id) {
|
|
- printk("IOAPIC[%d]: Unable to change apic_id!\n", ioapic);
|
|
- return -1;
|
|
- }
|
|
- }
|
|
-
|
|
- apic_printk(APIC_VERBOSE, KERN_INFO
|
|
- "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id);
|
|
-
|
|
- return apic_id;
|
|
-}
|
|
-#endif /* !CONFIG_XEN */
|
|
-
|
|
-
|
|
-int __init io_apic_get_version(int ioapic)
|
|
-{
|
|
- union IO_APIC_reg_01 reg_01;
|
|
- unsigned long flags;
|
|
-
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- reg_01.raw = io_apic_read(ioapic, 1);
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
-
|
|
- return reg_01.bits.version;
|
|
-}
|
|
-
|
|
-
|
|
-int __init io_apic_get_redir_entries(int ioapic)
|
|
-{
|
|
- union IO_APIC_reg_01 reg_01;
|
|
- unsigned long flags;
|
|
-
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- reg_01.raw = io_apic_read(ioapic, 1);
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
-
|
|
- return reg_01.bits.entries;
|
|
-}
|
|
-
|
|
-
|
|
-int io_apic_set_pci_routing(int ioapic, int pin, int irq, int edge_level, int active_high_low)
|
|
-{
|
|
- struct IO_APIC_route_entry entry;
|
|
-
|
|
- if (!IO_APIC_IRQ(irq)) {
|
|
- printk(KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
|
|
- ioapic);
|
|
- return -EINVAL;
|
|
- }
|
|
-
|
|
- /*
|
|
- * Generate a PCI IRQ routing entry and program the IOAPIC accordingly.
|
|
- * Note that we mask (disable) IRQs now -- these get enabled when the
|
|
- * corresponding device driver registers for this IRQ.
|
|
- */
|
|
-
|
|
- memset(&entry, 0, sizeof(entry));
|
|
-
|
|
- entry.delivery_mode = INT_DELIVERY_MODE;
|
|
- entry.dest_mode = INT_DEST_MODE;
|
|
- entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
|
|
- entry.trigger = edge_level;
|
|
- entry.polarity = active_high_low;
|
|
- entry.mask = 1;
|
|
-
|
|
- /*
|
|
- * IRQs < 16 are already in the irq_2_pin[] map
|
|
- */
|
|
- if (irq >= 16)
|
|
- add_pin_to_irq(irq, ioapic, pin);
|
|
-
|
|
- entry.vector = assign_irq_vector(irq);
|
|
-
|
|
- apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry "
|
|
- "(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic,
|
|
- mp_ioapics[ioapic].mp_apicid, pin, entry.vector, irq,
|
|
- edge_level, active_high_low);
|
|
-
|
|
- ioapic_register_intr(irq, entry.vector, edge_level);
|
|
-
|
|
- if (!ioapic && (irq < 16))
|
|
- disable_8259A_irq(irq);
|
|
-
|
|
- ioapic_write_entry(ioapic, pin, entry);
|
|
-
|
|
- return 0;
|
|
-}
|
|
-
|
|
-int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
|
|
-{
|
|
- int i;
|
|
-
|
|
- if (skip_ioapic_setup)
|
|
- return -1;
|
|
-
|
|
- for (i = 0; i < mp_irq_entries; i++)
|
|
- if (mp_irqs[i].mp_irqtype == mp_INT &&
|
|
- mp_irqs[i].mp_srcbusirq == bus_irq)
|
|
- break;
|
|
- if (i >= mp_irq_entries)
|
|
- return -1;
|
|
-
|
|
- *trigger = irq_trigger(i);
|
|
- *polarity = irq_polarity(i);
|
|
- return 0;
|
|
-}
|
|
-
|
|
-#endif /* CONFIG_ACPI */
|
|
-
|
|
-static int __init parse_disable_timer_pin_1(char *arg)
|
|
-{
|
|
- disable_timer_pin_1 = 1;
|
|
- return 0;
|
|
-}
|
|
-early_param("disable_timer_pin_1", parse_disable_timer_pin_1);
|
|
-
|
|
-static int __init parse_enable_timer_pin_1(char *arg)
|
|
-{
|
|
- disable_timer_pin_1 = -1;
|
|
- return 0;
|
|
-}
|
|
-early_param("enable_timer_pin_1", parse_enable_timer_pin_1);
|
|
-
|
|
-static int __init parse_noapic(char *arg)
|
|
-{
|
|
- /* disable IO-APIC */
|
|
- disable_ioapic_setup();
|
|
- return 0;
|
|
-}
|
|
-early_param("noapic", parse_noapic);
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
-void __init ioapic_init_mappings(void)
|
|
-{
|
|
- unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
|
|
- int i;
|
|
-
|
|
- for (i = 0; i < nr_ioapics; i++) {
|
|
- if (smp_found_config) {
|
|
- ioapic_phys = mp_ioapics[i].mp_apicaddr;
|
|
- if (!ioapic_phys) {
|
|
- printk(KERN_ERR
|
|
- "WARNING: bogus zero IO-APIC "
|
|
- "address found in MPTABLE, "
|
|
- "disabling IO/APIC support!\n");
|
|
- smp_found_config = 0;
|
|
- skip_ioapic_setup = 1;
|
|
- goto fake_ioapic_page;
|
|
- }
|
|
- } else {
|
|
-fake_ioapic_page:
|
|
- ioapic_phys = (unsigned long)
|
|
- alloc_bootmem_pages(PAGE_SIZE);
|
|
- ioapic_phys = __pa(ioapic_phys);
|
|
- }
|
|
- set_fixmap_nocache(idx, ioapic_phys);
|
|
- printk(KERN_DEBUG "mapped IOAPIC to %08lx (%08lx)\n",
|
|
- __fix_to_virt(idx), ioapic_phys);
|
|
- idx++;
|
|
- }
|
|
-}
|
|
-#endif
|
|
--- head-2011-03-17.orig/arch/x86/kernel/io_apic_64-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
+++ /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
@@ -1,2448 +0,0 @@
|
|
-/*
|
|
- * Intel IO-APIC support for multi-Pentium hosts.
|
|
- *
|
|
- * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo
|
|
- *
|
|
- * Many thanks to Stig Venaas for trying out countless experimental
|
|
- * patches and reporting/debugging problems patiently!
|
|
- *
|
|
- * (c) 1999, Multiple IO-APIC support, developed by
|
|
- * Ken-ichi Yaku <yaku@css1.kbnes.nec.co.jp> and
|
|
- * Hidemi Kishimoto <kisimoto@css1.kbnes.nec.co.jp>,
|
|
- * further tested and cleaned up by Zach Brown <zab@redhat.com>
|
|
- * and Ingo Molnar <mingo@redhat.com>
|
|
- *
|
|
- * Fixes
|
|
- * Maciej W. Rozycki : Bits for genuine 82489DX APICs;
|
|
- * thanks to Eric Gilmore
|
|
- * and Rolf G. Tews
|
|
- * for testing these extensively
|
|
- * Paul Diefenbaugh : Added full ACPI support
|
|
- */
|
|
-
|
|
-#include <linux/mm.h>
|
|
-#include <linux/interrupt.h>
|
|
-#include <linux/init.h>
|
|
-#include <linux/delay.h>
|
|
-#include <linux/sched.h>
|
|
-#include <linux/pci.h>
|
|
-#include <linux/mc146818rtc.h>
|
|
-#include <linux/acpi.h>
|
|
-#include <linux/sysdev.h>
|
|
-#include <linux/msi.h>
|
|
-#include <linux/htirq.h>
|
|
-#include <linux/dmar.h>
|
|
-#include <linux/jiffies.h>
|
|
-#ifdef CONFIG_ACPI
|
|
-#include <acpi/acpi_bus.h>
|
|
-#endif
|
|
-#include <linux/bootmem.h>
|
|
-
|
|
-#include <asm/idle.h>
|
|
-#include <asm/io.h>
|
|
-#include <asm/smp.h>
|
|
-#include <asm/desc.h>
|
|
-#include <asm/proto.h>
|
|
-#include <asm/acpi.h>
|
|
-#include <asm/dma.h>
|
|
-#include <asm/i8259.h>
|
|
-#include <asm/nmi.h>
|
|
-#include <asm/msidef.h>
|
|
-#include <asm/hypertransport.h>
|
|
-
|
|
-#include <mach_ipi.h>
|
|
-#include <mach_apic.h>
|
|
-
|
|
-struct irq_cfg {
|
|
-#ifndef CONFIG_XEN
|
|
- cpumask_t domain;
|
|
- cpumask_t old_domain;
|
|
-#endif
|
|
- unsigned move_cleanup_count;
|
|
- u8 vector;
|
|
- u8 move_in_progress : 1;
|
|
-};
|
|
-
|
|
-/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
|
|
-static struct irq_cfg irq_cfg[NR_IRQS] __read_mostly;
|
|
-
|
|
-static int assign_irq_vector(int irq, cpumask_t mask);
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
-int first_system_vector = 0xfe;
|
|
-
|
|
-char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
|
|
-#endif
|
|
-
|
|
-#define __apicdebuginit __init
|
|
-
|
|
-int sis_apic_bug; /* not actually supported, dummy for compile */
|
|
-
|
|
-static int no_timer_check;
|
|
-
|
|
-static int disable_timer_pin_1 __initdata;
|
|
-
|
|
-#ifdef CONFIG_XEN
|
|
-#include <xen/interface/xen.h>
|
|
-#include <xen/interface/physdev.h>
|
|
-#include <xen/evtchn.h>
|
|
-
|
|
-/* Fake i8259 */
|
|
-#define make_8259A_irq(_irq) (io_apic_irqs &= ~(1UL<<(_irq)))
|
|
-#define disable_8259A_irq(_irq) ((void)0)
|
|
-#define i8259A_irq_pending(_irq) (0)
|
|
-
|
|
-unsigned long io_apic_irqs;
|
|
-
|
|
-#define clear_IO_APIC() ((void)0)
|
|
-#else
|
|
-int timer_through_8259 __initdata;
|
|
-
|
|
-/* Where if anywhere is the i8259 connect in external int mode */
|
|
-static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
|
|
-#endif
|
|
-
|
|
-static DEFINE_SPINLOCK(ioapic_lock);
|
|
-static DEFINE_SPINLOCK(vector_lock);
|
|
-
|
|
-/*
|
|
- * # of IRQ routing registers
|
|
- */
|
|
-int nr_ioapic_registers[MAX_IO_APICS];
|
|
-
|
|
-/* I/O APIC entries */
|
|
-struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
|
|
-int nr_ioapics;
|
|
-
|
|
-/* MP IRQ source entries */
|
|
-struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
|
|
-
|
|
-/* # of MP IRQ source entries */
|
|
-int mp_irq_entries;
|
|
-
|
|
-DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
|
|
-
|
|
-/*
|
|
- * Rough estimation of how many shared IRQs there are, can
|
|
- * be changed anytime.
|
|
- */
|
|
-#define MAX_PLUS_SHARED_IRQS NR_IRQS
|
|
-#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
|
|
-
|
|
-/*
|
|
- * This is performance-critical, we want to do it O(1)
|
|
- *
|
|
- * the indexing order of this array favors 1:1 mappings
|
|
- * between pins and IRQs.
|
|
- */
|
|
-
|
|
-static struct irq_pin_list {
|
|
- short apic, pin, next;
|
|
-} irq_2_pin[PIN_MAP_SIZE];
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
-struct io_apic {
|
|
- unsigned int index;
|
|
- unsigned int unused[3];
|
|
- unsigned int data;
|
|
-};
|
|
-
|
|
-static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
|
|
-{
|
|
- return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
|
|
- + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK);
|
|
-}
|
|
-#endif
|
|
-
|
|
-static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
|
|
-{
|
|
-#ifndef CONFIG_XEN
|
|
- struct io_apic __iomem *io_apic = io_apic_base(apic);
|
|
- writel(reg, &io_apic->index);
|
|
- return readl(&io_apic->data);
|
|
-#else
|
|
- struct physdev_apic apic_op;
|
|
- int ret;
|
|
-
|
|
- apic_op.apic_physbase = mp_ioapics[apic].mp_apicaddr;
|
|
- apic_op.reg = reg;
|
|
- ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op);
|
|
- if (ret)
|
|
- return ret;
|
|
- return apic_op.value;
|
|
-#endif
|
|
-}
|
|
-
|
|
-static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
|
|
-{
|
|
-#ifndef CONFIG_XEN
|
|
- struct io_apic __iomem *io_apic = io_apic_base(apic);
|
|
- writel(reg, &io_apic->index);
|
|
- writel(value, &io_apic->data);
|
|
-#else
|
|
- struct physdev_apic apic_op;
|
|
-
|
|
- apic_op.apic_physbase = mp_ioapics[apic].mp_apicaddr;
|
|
- apic_op.reg = reg;
|
|
- apic_op.value = value;
|
|
- WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op));
|
|
-#endif
|
|
-}
|
|
-
|
|
-#ifdef CONFIG_XEN
|
|
-#define io_apic_modify io_apic_write
|
|
-#else
|
|
-/*
|
|
- * Re-write a value: to be used for read-modify-write
|
|
- * cycles where the read already set up the index register.
|
|
- */
|
|
-static inline void io_apic_modify(unsigned int apic, unsigned int value)
|
|
-{
|
|
- struct io_apic __iomem *io_apic = io_apic_base(apic);
|
|
- writel(value, &io_apic->data);
|
|
-}
|
|
-
|
|
-static bool io_apic_level_ack_pending(unsigned int irq)
|
|
-{
|
|
- struct irq_pin_list *entry;
|
|
- unsigned long flags;
|
|
-
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- entry = irq_2_pin + irq;
|
|
- for (;;) {
|
|
- unsigned int reg;
|
|
- int pin;
|
|
-
|
|
- pin = entry->pin;
|
|
- if (pin == -1)
|
|
- break;
|
|
- reg = io_apic_read(entry->apic, 0x10 + pin*2);
|
|
- /* Is the remote IRR bit set? */
|
|
- if (reg & IO_APIC_REDIR_REMOTE_IRR) {
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
- return true;
|
|
- }
|
|
- if (!entry->next)
|
|
- break;
|
|
- entry = irq_2_pin + entry->next;
|
|
- }
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
-
|
|
- return false;
|
|
-}
|
|
-#endif
|
|
-
|
|
-/*
|
|
- * Synchronize the IO-APIC and the CPU by doing
|
|
- * a dummy read from the IO-APIC
|
|
- */
|
|
-static inline void io_apic_sync(unsigned int apic)
|
|
-{
|
|
-#ifndef CONFIG_XEN
|
|
- struct io_apic __iomem *io_apic = io_apic_base(apic);
|
|
- readl(&io_apic->data);
|
|
-#endif
|
|
-}
|
|
-
|
|
-union entry_union {
|
|
- struct { u32 w1, w2; };
|
|
- struct IO_APIC_route_entry entry;
|
|
-};
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
-static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
|
|
-{
|
|
- union entry_union eu;
|
|
- unsigned long flags;
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
|
|
- eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
- return eu.entry;
|
|
-}
|
|
-#endif
|
|
-
|
|
-/*
|
|
- * When we write a new IO APIC routing entry, we need to write the high
|
|
- * word first! If the mask bit in the low word is clear, we will enable
|
|
- * the interrupt, and we need to make sure the entry is fully populated
|
|
- * before that happens.
|
|
- */
|
|
-static void
|
|
-__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
|
|
-{
|
|
- union entry_union eu;
|
|
- eu.entry = e;
|
|
- io_apic_write(apic, 0x11 + 2*pin, eu.w2);
|
|
- io_apic_write(apic, 0x10 + 2*pin, eu.w1);
|
|
-}
|
|
-
|
|
-static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
|
|
-{
|
|
- unsigned long flags;
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- __ioapic_write_entry(apic, pin, e);
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
-}
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
-/*
|
|
- * When we mask an IO APIC routing entry, we need to write the low
|
|
- * word first, in order to set the mask bit before we change the
|
|
- * high bits!
|
|
- */
|
|
-static void ioapic_mask_entry(int apic, int pin)
|
|
-{
|
|
- unsigned long flags;
|
|
- union entry_union eu = { .entry.mask = 1 };
|
|
-
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- io_apic_write(apic, 0x10 + 2*pin, eu.w1);
|
|
- io_apic_write(apic, 0x11 + 2*pin, eu.w2);
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
-}
|
|
-
|
|
-#ifdef CONFIG_SMP
|
|
-static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
|
|
-{
|
|
- int apic, pin;
|
|
- struct irq_pin_list *entry = irq_2_pin + irq;
|
|
-
|
|
- BUG_ON(irq >= NR_IRQS);
|
|
- for (;;) {
|
|
- unsigned int reg;
|
|
- apic = entry->apic;
|
|
- pin = entry->pin;
|
|
- if (pin == -1)
|
|
- break;
|
|
- io_apic_write(apic, 0x11 + pin*2, dest);
|
|
- reg = io_apic_read(apic, 0x10 + pin*2);
|
|
- reg &= ~IO_APIC_REDIR_VECTOR_MASK;
|
|
- reg |= vector;
|
|
- io_apic_modify(apic, reg);
|
|
- if (!entry->next)
|
|
- break;
|
|
- entry = irq_2_pin + entry->next;
|
|
- }
|
|
-}
|
|
-
|
|
-static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
|
|
-{
|
|
- struct irq_cfg *cfg = irq_cfg + irq;
|
|
- unsigned long flags;
|
|
- unsigned int dest;
|
|
- cpumask_t tmp;
|
|
-
|
|
- cpus_and(tmp, mask, cpu_online_map);
|
|
- if (cpus_empty(tmp))
|
|
- return;
|
|
-
|
|
- if (assign_irq_vector(irq, mask))
|
|
- return;
|
|
-
|
|
- cpus_and(tmp, cfg->domain, mask);
|
|
- dest = cpu_mask_to_apicid(tmp);
|
|
-
|
|
- /*
|
|
- * Only the high 8 bits are valid.
|
|
- */
|
|
- dest = SET_APIC_LOGICAL_ID(dest);
|
|
-
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- __target_IO_APIC_irq(irq, dest, cfg->vector);
|
|
- irq_desc[irq].affinity = mask;
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
-}
|
|
-#endif
|
|
-#endif
|
|
-
|
|
-/*
|
|
- * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
|
|
- * shared ISA-space IRQs, so we have to support them. We are super
|
|
- * fast in the common case, and fast for shared ISA-space IRQs.
|
|
- */
|
|
-static void add_pin_to_irq(unsigned int irq, int apic, int pin)
|
|
-{
|
|
- static int first_free_entry = NR_IRQS;
|
|
- struct irq_pin_list *entry = irq_2_pin + irq;
|
|
-
|
|
- BUG_ON(irq >= NR_IRQS);
|
|
- while (entry->next)
|
|
- entry = irq_2_pin + entry->next;
|
|
-
|
|
- if (entry->pin != -1) {
|
|
- entry->next = first_free_entry;
|
|
- entry = irq_2_pin + entry->next;
|
|
- if (++first_free_entry >= PIN_MAP_SIZE)
|
|
- panic("io_apic.c: ran out of irq_2_pin entries!");
|
|
- }
|
|
- entry->apic = apic;
|
|
- entry->pin = pin;
|
|
-}
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
-/*
|
|
- * Reroute an IRQ to a different pin.
|
|
- */
|
|
-static void __init replace_pin_at_irq(unsigned int irq,
|
|
- int oldapic, int oldpin,
|
|
- int newapic, int newpin)
|
|
-{
|
|
- struct irq_pin_list *entry = irq_2_pin + irq;
|
|
-
|
|
- while (1) {
|
|
- if (entry->apic == oldapic && entry->pin == oldpin) {
|
|
- entry->apic = newapic;
|
|
- entry->pin = newpin;
|
|
- }
|
|
- if (!entry->next)
|
|
- break;
|
|
- entry = irq_2_pin + entry->next;
|
|
- }
|
|
-}
|
|
-
|
|
-#define __DO_ACTION(R, ACTION, FINAL) \
|
|
- \
|
|
-{ \
|
|
- int pin; \
|
|
- struct irq_pin_list *entry = irq_2_pin + irq; \
|
|
- \
|
|
- BUG_ON(irq >= NR_IRQS); \
|
|
- for (;;) { \
|
|
- unsigned int reg; \
|
|
- pin = entry->pin; \
|
|
- if (pin == -1) \
|
|
- break; \
|
|
- reg = io_apic_read(entry->apic, 0x10 + R + pin*2); \
|
|
- reg ACTION; \
|
|
- io_apic_modify(entry->apic, reg); \
|
|
- FINAL; \
|
|
- if (!entry->next) \
|
|
- break; \
|
|
- entry = irq_2_pin + entry->next; \
|
|
- } \
|
|
-}
|
|
-
|
|
-#define DO_ACTION(name,R,ACTION, FINAL) \
|
|
- \
|
|
- static void name##_IO_APIC_irq (unsigned int irq) \
|
|
- __DO_ACTION(R, ACTION, FINAL)
|
|
-
|
|
-/* mask = 1 */
|
|
-DO_ACTION(__mask, 0, |= IO_APIC_REDIR_MASKED, io_apic_sync(entry->apic))
|
|
-
|
|
-/* mask = 0 */
|
|
-DO_ACTION(__unmask, 0, &= ~IO_APIC_REDIR_MASKED, )
|
|
-
|
|
-static void mask_IO_APIC_irq (unsigned int irq)
|
|
-{
|
|
- unsigned long flags;
|
|
-
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- __mask_IO_APIC_irq(irq);
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
-}
|
|
-
|
|
-static void unmask_IO_APIC_irq (unsigned int irq)
|
|
-{
|
|
- unsigned long flags;
|
|
-
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- __unmask_IO_APIC_irq(irq);
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
-}
|
|
-
|
|
-static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
|
|
-{
|
|
- struct IO_APIC_route_entry entry;
|
|
-
|
|
- /* Check delivery_mode to be sure we're not clearing an SMI pin */
|
|
- entry = ioapic_read_entry(apic, pin);
|
|
- if (entry.delivery_mode == dest_SMI)
|
|
- return;
|
|
- /*
|
|
- * Disable it in the IO-APIC irq-routing table:
|
|
- */
|
|
- ioapic_mask_entry(apic, pin);
|
|
-}
|
|
-
|
|
-static void clear_IO_APIC (void)
|
|
-{
|
|
- int apic, pin;
|
|
-
|
|
- for (apic = 0; apic < nr_ioapics; apic++)
|
|
- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
|
|
- clear_IO_APIC_pin(apic, pin);
|
|
-}
|
|
-
|
|
-#endif /* !CONFIG_XEN */
|
|
-
|
|
-int skip_ioapic_setup;
|
|
-int ioapic_force;
|
|
-
|
|
-static int __init parse_noapic(char *str)
|
|
-{
|
|
- disable_ioapic_setup();
|
|
- return 0;
|
|
-}
|
|
-early_param("noapic", parse_noapic);
|
|
-
|
|
-/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
|
|
-static int __init disable_timer_pin_setup(char *arg)
|
|
-{
|
|
- disable_timer_pin_1 = 1;
|
|
- return 1;
|
|
-}
|
|
-__setup("disable_timer_pin_1", disable_timer_pin_setup);
|
|
-
|
|
-
|
|
-/*
|
|
- * Find the IRQ entry number of a certain pin.
|
|
- */
|
|
-static int find_irq_entry(int apic, int pin, int type)
|
|
-{
|
|
- int i;
|
|
-
|
|
- for (i = 0; i < mp_irq_entries; i++)
|
|
- if (mp_irqs[i].mp_irqtype == type &&
|
|
- (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid ||
|
|
- mp_irqs[i].mp_dstapic == MP_APIC_ALL) &&
|
|
- mp_irqs[i].mp_dstirq == pin)
|
|
- return i;
|
|
-
|
|
- return -1;
|
|
-}
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
-/*
|
|
- * Find the pin to which IRQ[irq] (ISA) is connected
|
|
- */
|
|
-static int __init find_isa_irq_pin(int irq, int type)
|
|
-{
|
|
- int i;
|
|
-
|
|
- for (i = 0; i < mp_irq_entries; i++) {
|
|
- int lbus = mp_irqs[i].mp_srcbus;
|
|
-
|
|
- if (test_bit(lbus, mp_bus_not_pci) &&
|
|
- (mp_irqs[i].mp_irqtype == type) &&
|
|
- (mp_irqs[i].mp_srcbusirq == irq))
|
|
-
|
|
- return mp_irqs[i].mp_dstirq;
|
|
- }
|
|
- return -1;
|
|
-}
|
|
-
|
|
-static int __init find_isa_irq_apic(int irq, int type)
|
|
-{
|
|
- int i;
|
|
-
|
|
- for (i = 0; i < mp_irq_entries; i++) {
|
|
- int lbus = mp_irqs[i].mp_srcbus;
|
|
-
|
|
- if (test_bit(lbus, mp_bus_not_pci) &&
|
|
- (mp_irqs[i].mp_irqtype == type) &&
|
|
- (mp_irqs[i].mp_srcbusirq == irq))
|
|
- break;
|
|
- }
|
|
- if (i < mp_irq_entries) {
|
|
- int apic;
|
|
- for(apic = 0; apic < nr_ioapics; apic++) {
|
|
- if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic)
|
|
- return apic;
|
|
- }
|
|
- }
|
|
-
|
|
- return -1;
|
|
-}
|
|
-#endif
|
|
-
|
|
-/*
|
|
- * Find a specific PCI IRQ entry.
|
|
- * Not an __init, possibly needed by modules
|
|
- */
|
|
-static int pin_2_irq(int idx, int apic, int pin);
|
|
-
|
|
-int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
|
|
-{
|
|
- int apic, i, best_guess = -1;
|
|
-
|
|
- apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
|
|
- bus, slot, pin);
|
|
- if (test_bit(bus, mp_bus_not_pci)) {
|
|
- apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
|
|
- return -1;
|
|
- }
|
|
- for (i = 0; i < mp_irq_entries; i++) {
|
|
- int lbus = mp_irqs[i].mp_srcbus;
|
|
-
|
|
- for (apic = 0; apic < nr_ioapics; apic++)
|
|
- if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic ||
|
|
- mp_irqs[i].mp_dstapic == MP_APIC_ALL)
|
|
- break;
|
|
-
|
|
- if (!test_bit(lbus, mp_bus_not_pci) &&
|
|
- !mp_irqs[i].mp_irqtype &&
|
|
- (bus == lbus) &&
|
|
- (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) {
|
|
- int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq);
|
|
-
|
|
- if (!(apic || IO_APIC_IRQ(irq)))
|
|
- continue;
|
|
-
|
|
- if (pin == (mp_irqs[i].mp_srcbusirq & 3))
|
|
- return irq;
|
|
- /*
|
|
- * Use the first all-but-pin matching entry as a
|
|
- * best-guess fuzzy result for broken mptables.
|
|
- */
|
|
- if (best_guess < 0)
|
|
- best_guess = irq;
|
|
- }
|
|
- }
|
|
- BUG_ON(best_guess >= NR_IRQS);
|
|
- return best_guess;
|
|
-}
|
|
-
|
|
-/* ISA interrupts are always polarity zero edge triggered,
|
|
- * when listed as conforming in the MP table. */
|
|
-
|
|
-#define default_ISA_trigger(idx) (0)
|
|
-#define default_ISA_polarity(idx) (0)
|
|
-
|
|
-/* PCI interrupts are always polarity one level triggered,
|
|
- * when listed as conforming in the MP table. */
|
|
-
|
|
-#define default_PCI_trigger(idx) (1)
|
|
-#define default_PCI_polarity(idx) (1)
|
|
-
|
|
-static int MPBIOS_polarity(int idx)
|
|
-{
|
|
- int bus = mp_irqs[idx].mp_srcbus;
|
|
- int polarity;
|
|
-
|
|
- /*
|
|
- * Determine IRQ line polarity (high active or low active):
|
|
- */
|
|
- switch (mp_irqs[idx].mp_irqflag & 3)
|
|
- {
|
|
- case 0: /* conforms, ie. bus-type dependent polarity */
|
|
- if (test_bit(bus, mp_bus_not_pci))
|
|
- polarity = default_ISA_polarity(idx);
|
|
- else
|
|
- polarity = default_PCI_polarity(idx);
|
|
- break;
|
|
- case 1: /* high active */
|
|
- {
|
|
- polarity = 0;
|
|
- break;
|
|
- }
|
|
- case 2: /* reserved */
|
|
- {
|
|
- printk(KERN_WARNING "broken BIOS!!\n");
|
|
- polarity = 1;
|
|
- break;
|
|
- }
|
|
- case 3: /* low active */
|
|
- {
|
|
- polarity = 1;
|
|
- break;
|
|
- }
|
|
- default: /* invalid */
|
|
- {
|
|
- printk(KERN_WARNING "broken BIOS!!\n");
|
|
- polarity = 1;
|
|
- break;
|
|
- }
|
|
- }
|
|
- return polarity;
|
|
-}
|
|
-
|
|
-static int MPBIOS_trigger(int idx)
|
|
-{
|
|
- int bus = mp_irqs[idx].mp_srcbus;
|
|
- int trigger;
|
|
-
|
|
- /*
|
|
- * Determine IRQ trigger mode (edge or level sensitive):
|
|
- */
|
|
- switch ((mp_irqs[idx].mp_irqflag>>2) & 3)
|
|
- {
|
|
- case 0: /* conforms, ie. bus-type dependent */
|
|
- if (test_bit(bus, mp_bus_not_pci))
|
|
- trigger = default_ISA_trigger(idx);
|
|
- else
|
|
- trigger = default_PCI_trigger(idx);
|
|
- break;
|
|
- case 1: /* edge */
|
|
- {
|
|
- trigger = 0;
|
|
- break;
|
|
- }
|
|
- case 2: /* reserved */
|
|
- {
|
|
- printk(KERN_WARNING "broken BIOS!!\n");
|
|
- trigger = 1;
|
|
- break;
|
|
- }
|
|
- case 3: /* level */
|
|
- {
|
|
- trigger = 1;
|
|
- break;
|
|
- }
|
|
- default: /* invalid */
|
|
- {
|
|
- printk(KERN_WARNING "broken BIOS!!\n");
|
|
- trigger = 0;
|
|
- break;
|
|
- }
|
|
- }
|
|
- return trigger;
|
|
-}
|
|
-
|
|
-static inline int irq_polarity(int idx)
|
|
-{
|
|
- return MPBIOS_polarity(idx);
|
|
-}
|
|
-
|
|
-static inline int irq_trigger(int idx)
|
|
-{
|
|
- return MPBIOS_trigger(idx);
|
|
-}
|
|
-
|
|
-static int pin_2_irq(int idx, int apic, int pin)
|
|
-{
|
|
- int irq, i;
|
|
- int bus = mp_irqs[idx].mp_srcbus;
|
|
-
|
|
- /*
|
|
- * Debugging check, we are in big trouble if this message pops up!
|
|
- */
|
|
- if (mp_irqs[idx].mp_dstirq != pin)
|
|
- printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
|
|
-
|
|
- if (test_bit(bus, mp_bus_not_pci)) {
|
|
- irq = mp_irqs[idx].mp_srcbusirq;
|
|
- } else {
|
|
- /*
|
|
- * PCI IRQs are mapped in order
|
|
- */
|
|
- i = irq = 0;
|
|
- while (i < apic)
|
|
- irq += nr_ioapic_registers[i++];
|
|
- irq += pin;
|
|
- }
|
|
- BUG_ON(irq >= NR_IRQS);
|
|
- return irq;
|
|
-}
|
|
-
|
|
-void lock_vector_lock(void)
|
|
-{
|
|
- /* Used to the online set of cpus does not change
|
|
- * during assign_irq_vector.
|
|
- */
|
|
- spin_lock(&vector_lock);
|
|
-}
|
|
-
|
|
-void unlock_vector_lock(void)
|
|
-{
|
|
- spin_unlock(&vector_lock);
|
|
-}
|
|
-
|
|
-static int __assign_irq_vector(int irq, cpumask_t mask)
|
|
-{
|
|
- struct physdev_irq irq_op;
|
|
- struct irq_cfg *cfg;
|
|
-
|
|
- BUG_ON((unsigned)irq >= NR_IRQS);
|
|
-
|
|
- if (irq < PIRQ_BASE || irq - PIRQ_BASE >= NR_PIRQS)
|
|
- return -EINVAL;
|
|
-
|
|
- cfg = &irq_cfg[irq];
|
|
-
|
|
- if ((cfg->move_in_progress) || cfg->move_cleanup_count)
|
|
- return -EBUSY;
|
|
-
|
|
- if (cfg->vector)
|
|
- return 0;
|
|
-
|
|
- irq_op.irq = irq;
|
|
- if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op))
|
|
- return -ENOSPC;
|
|
-
|
|
- cfg->vector = irq_op.vector;
|
|
-
|
|
- return 0;
|
|
-}
|
|
-
|
|
-static int assign_irq_vector(int irq, cpumask_t mask)
|
|
-{
|
|
- int err;
|
|
- unsigned long flags;
|
|
-
|
|
- spin_lock_irqsave(&vector_lock, flags);
|
|
- err = __assign_irq_vector(irq, mask);
|
|
- spin_unlock_irqrestore(&vector_lock, flags);
|
|
- return err;
|
|
-}
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
-static void __clear_irq_vector(int irq)
|
|
-{
|
|
- struct irq_cfg *cfg;
|
|
- cpumask_t mask;
|
|
- int cpu, vector;
|
|
-
|
|
- BUG_ON((unsigned)irq >= NR_IRQS);
|
|
- cfg = &irq_cfg[irq];
|
|
- BUG_ON(!cfg->vector);
|
|
-
|
|
- vector = cfg->vector;
|
|
- cpus_and(mask, cfg->domain, cpu_online_map);
|
|
- for_each_cpu_mask_nr(cpu, mask)
|
|
- per_cpu(vector_irq, cpu)[vector] = -1;
|
|
-
|
|
- cfg->vector = 0;
|
|
- cpus_clear(cfg->domain);
|
|
-}
|
|
-
|
|
-void __setup_vector_irq(int cpu)
|
|
-{
|
|
- /* Initialize vector_irq on a new cpu */
|
|
- /* This function must be called with vector_lock held */
|
|
- int irq, vector;
|
|
-
|
|
- /* Mark the inuse vectors */
|
|
- for (irq = 0; irq < NR_IRQS; ++irq) {
|
|
- if (!cpu_isset(cpu, irq_cfg[irq].domain))
|
|
- continue;
|
|
- vector = irq_cfg[irq].vector;
|
|
- per_cpu(vector_irq, cpu)[vector] = irq;
|
|
- }
|
|
- /* Mark the free vectors */
|
|
- for (vector = 0; vector < NR_VECTORS; ++vector) {
|
|
- irq = per_cpu(vector_irq, cpu)[vector];
|
|
- if (irq < 0)
|
|
- continue;
|
|
- if (!cpu_isset(cpu, irq_cfg[irq].domain))
|
|
- per_cpu(vector_irq, cpu)[vector] = -1;
|
|
- }
|
|
-}
|
|
-
|
|
-static struct irq_chip ioapic_chip;
|
|
-
|
|
-static void ioapic_register_intr(int irq, unsigned long trigger)
|
|
-{
|
|
- if (trigger) {
|
|
- irq_desc[irq].status |= IRQ_LEVEL;
|
|
- set_irq_chip_and_handler_name(irq, &ioapic_chip,
|
|
- handle_fasteoi_irq, "fasteoi");
|
|
- } else {
|
|
- irq_desc[irq].status &= ~IRQ_LEVEL;
|
|
- set_irq_chip_and_handler_name(irq, &ioapic_chip,
|
|
- handle_edge_irq, "edge");
|
|
- }
|
|
-}
|
|
-#else
|
|
-#define ioapic_register_intr(irq, trigger) evtchn_register_pirq(irq)
|
|
-#endif /* !CONFIG_XEN */
|
|
-
|
|
-static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
|
|
- int trigger, int polarity)
|
|
-{
|
|
- struct irq_cfg *cfg = irq_cfg + irq;
|
|
- struct IO_APIC_route_entry entry;
|
|
- cpumask_t mask;
|
|
-
|
|
- if (!IO_APIC_IRQ(irq))
|
|
- return;
|
|
-
|
|
- mask = TARGET_CPUS;
|
|
- if (assign_irq_vector(irq, mask))
|
|
- return;
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
- cpus_and(mask, cfg->domain, mask);
|
|
-#endif
|
|
-
|
|
- apic_printk(APIC_VERBOSE,KERN_DEBUG
|
|
- "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
|
|
- "IRQ %d Mode:%i Active:%i)\n",
|
|
- apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
|
|
- irq, trigger, polarity);
|
|
-
|
|
- /*
|
|
- * add it to the IO-APIC irq-routing table:
|
|
- */
|
|
- memset(&entry,0,sizeof(entry));
|
|
-
|
|
- entry.delivery_mode = INT_DELIVERY_MODE;
|
|
- entry.dest_mode = INT_DEST_MODE;
|
|
- entry.dest = cpu_mask_to_apicid(mask);
|
|
- entry.mask = 0; /* enable IRQ */
|
|
- entry.trigger = trigger;
|
|
- entry.polarity = polarity;
|
|
- entry.vector = cfg->vector;
|
|
-
|
|
- /* Mask level triggered irqs.
|
|
- * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
|
|
- */
|
|
- if (trigger)
|
|
- entry.mask = 1;
|
|
-
|
|
- ioapic_register_intr(irq, trigger);
|
|
- if (irq < 16)
|
|
- disable_8259A_irq(irq);
|
|
-
|
|
- ioapic_write_entry(apic, pin, entry);
|
|
-}
|
|
-
|
|
-static void __init setup_IO_APIC_irqs(void)
|
|
-{
|
|
- int apic, pin, idx, irq, first_notcon = 1;
|
|
-
|
|
- apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
|
|
-
|
|
- for (apic = 0; apic < nr_ioapics; apic++) {
|
|
- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
|
|
-
|
|
- idx = find_irq_entry(apic,pin,mp_INT);
|
|
- if (idx == -1) {
|
|
- if (first_notcon) {
|
|
- apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin);
|
|
- first_notcon = 0;
|
|
- } else
|
|
- apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin);
|
|
- continue;
|
|
- }
|
|
- if (!first_notcon) {
|
|
- apic_printk(APIC_VERBOSE, " not connected.\n");
|
|
- first_notcon = 1;
|
|
- }
|
|
-
|
|
- irq = pin_2_irq(idx, apic, pin);
|
|
- add_pin_to_irq(irq, apic, pin);
|
|
-
|
|
- setup_IO_APIC_irq(apic, pin, irq,
|
|
- irq_trigger(idx), irq_polarity(idx));
|
|
- }
|
|
- }
|
|
-
|
|
- if (!first_notcon)
|
|
- apic_printk(APIC_VERBOSE, " not connected.\n");
|
|
-}
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
-/*
|
|
- * Set up the timer pin, possibly with the 8259A-master behind.
|
|
- */
|
|
-static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
|
|
- int vector)
|
|
-{
|
|
- struct IO_APIC_route_entry entry;
|
|
-
|
|
- memset(&entry, 0, sizeof(entry));
|
|
-
|
|
- /*
|
|
- * We use logical delivery to get the timer IRQ
|
|
- * to the first CPU.
|
|
- */
|
|
- entry.dest_mode = INT_DEST_MODE;
|
|
- entry.mask = 1; /* mask IRQ now */
|
|
- entry.dest = cpu_mask_to_apicid(TARGET_CPUS);
|
|
- entry.delivery_mode = INT_DELIVERY_MODE;
|
|
- entry.polarity = 0;
|
|
- entry.trigger = 0;
|
|
- entry.vector = vector;
|
|
-
|
|
- /*
|
|
- * The timer IRQ doesn't have to know that behind the
|
|
- * scene we may have a 8259A-master in AEOI mode ...
|
|
- */
|
|
- set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
|
|
-
|
|
- /*
|
|
- * Add it to the IO-APIC irq-routing table:
|
|
- */
|
|
- ioapic_write_entry(apic, pin, entry);
|
|
-}
|
|
-
|
|
-void __apicdebuginit print_IO_APIC(void)
|
|
-{
|
|
- int apic, i;
|
|
- union IO_APIC_reg_00 reg_00;
|
|
- union IO_APIC_reg_01 reg_01;
|
|
- union IO_APIC_reg_02 reg_02;
|
|
- unsigned long flags;
|
|
-
|
|
- if (apic_verbosity == APIC_QUIET)
|
|
- return;
|
|
-
|
|
- printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
|
|
- for (i = 0; i < nr_ioapics; i++)
|
|
- printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
|
|
- mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]);
|
|
-
|
|
- /*
|
|
- * We are a bit conservative about what we expect. We have to
|
|
- * know about every hardware change ASAP.
|
|
- */
|
|
- printk(KERN_INFO "testing the IO APIC.......................\n");
|
|
-
|
|
- for (apic = 0; apic < nr_ioapics; apic++) {
|
|
-
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- reg_00.raw = io_apic_read(apic, 0);
|
|
- reg_01.raw = io_apic_read(apic, 1);
|
|
- if (reg_01.bits.version >= 0x10)
|
|
- reg_02.raw = io_apic_read(apic, 2);
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
-
|
|
- printk("\n");
|
|
- printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
|
|
- printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
|
|
- printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
|
|
-
|
|
- printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)®_01);
|
|
- printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries);
|
|
-
|
|
- printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ);
|
|
- printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version);
|
|
-
|
|
- if (reg_01.bits.version >= 0x10) {
|
|
- printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
|
|
- printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration);
|
|
- }
|
|
-
|
|
- printk(KERN_DEBUG ".... IRQ redirection table:\n");
|
|
-
|
|
- printk(KERN_DEBUG " NR Dst Mask Trig IRR Pol"
|
|
- " Stat Dmod Deli Vect: \n");
|
|
-
|
|
- for (i = 0; i <= reg_01.bits.entries; i++) {
|
|
- struct IO_APIC_route_entry entry;
|
|
-
|
|
- entry = ioapic_read_entry(apic, i);
|
|
-
|
|
- printk(KERN_DEBUG " %02x %03X ",
|
|
- i,
|
|
- entry.dest
|
|
- );
|
|
-
|
|
- printk("%1d %1d %1d %1d %1d %1d %1d %02X\n",
|
|
- entry.mask,
|
|
- entry.trigger,
|
|
- entry.irr,
|
|
- entry.polarity,
|
|
- entry.delivery_status,
|
|
- entry.dest_mode,
|
|
- entry.delivery_mode,
|
|
- entry.vector
|
|
- );
|
|
- }
|
|
- }
|
|
- printk(KERN_DEBUG "IRQ to pin mappings:\n");
|
|
- for (i = 0; i < NR_IRQS; i++) {
|
|
- struct irq_pin_list *entry = irq_2_pin + i;
|
|
- if (entry->pin < 0)
|
|
- continue;
|
|
- printk(KERN_DEBUG "IRQ%d ", i);
|
|
- for (;;) {
|
|
- printk("-> %d:%d", entry->apic, entry->pin);
|
|
- if (!entry->next)
|
|
- break;
|
|
- entry = irq_2_pin + entry->next;
|
|
- }
|
|
- printk("\n");
|
|
- }
|
|
-
|
|
- printk(KERN_INFO ".................................... done.\n");
|
|
-
|
|
- return;
|
|
-}
|
|
-
|
|
-static __apicdebuginit void print_APIC_bitfield (int base)
|
|
-{
|
|
- unsigned int v;
|
|
- int i, j;
|
|
-
|
|
- if (apic_verbosity == APIC_QUIET)
|
|
- return;
|
|
-
|
|
- printk(KERN_DEBUG "0123456789abcdef0123456789abcdef\n" KERN_DEBUG);
|
|
- for (i = 0; i < 8; i++) {
|
|
- v = apic_read(base + i*0x10);
|
|
- for (j = 0; j < 32; j++) {
|
|
- if (v & (1<<j))
|
|
- printk("1");
|
|
- else
|
|
- printk("0");
|
|
- }
|
|
- printk("\n");
|
|
- }
|
|
-}
|
|
-
|
|
-void __apicdebuginit print_local_APIC(void * dummy)
|
|
-{
|
|
- unsigned int v, ver, maxlvt;
|
|
-
|
|
- if (apic_verbosity == APIC_QUIET)
|
|
- return;
|
|
-
|
|
- printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
|
|
- smp_processor_id(), hard_smp_processor_id());
|
|
- v = apic_read(APIC_ID);
|
|
- printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(read_apic_id()));
|
|
- v = apic_read(APIC_LVR);
|
|
- printk(KERN_INFO "... APIC VERSION: %08x\n", v);
|
|
- ver = GET_APIC_VERSION(v);
|
|
- maxlvt = lapic_get_maxlvt();
|
|
-
|
|
- v = apic_read(APIC_TASKPRI);
|
|
- printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
|
|
-
|
|
- v = apic_read(APIC_ARBPRI);
|
|
- printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
|
|
- v & APIC_ARBPRI_MASK);
|
|
- v = apic_read(APIC_PROCPRI);
|
|
- printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
|
|
-
|
|
- v = apic_read(APIC_EOI);
|
|
- printk(KERN_DEBUG "... APIC EOI: %08x\n", v);
|
|
- v = apic_read(APIC_RRR);
|
|
- printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
|
|
- v = apic_read(APIC_LDR);
|
|
- printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
|
|
- v = apic_read(APIC_DFR);
|
|
- printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
|
|
- v = apic_read(APIC_SPIV);
|
|
- printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
|
|
-
|
|
- printk(KERN_DEBUG "... APIC ISR field:\n");
|
|
- print_APIC_bitfield(APIC_ISR);
|
|
- printk(KERN_DEBUG "... APIC TMR field:\n");
|
|
- print_APIC_bitfield(APIC_TMR);
|
|
- printk(KERN_DEBUG "... APIC IRR field:\n");
|
|
- print_APIC_bitfield(APIC_IRR);
|
|
-
|
|
- v = apic_read(APIC_ESR);
|
|
- printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
|
|
-
|
|
- v = apic_read(APIC_ICR);
|
|
- printk(KERN_DEBUG "... APIC ICR: %08x\n", v);
|
|
- v = apic_read(APIC_ICR2);
|
|
- printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
|
|
-
|
|
- v = apic_read(APIC_LVTT);
|
|
- printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
|
|
-
|
|
- if (maxlvt > 3) { /* PC is LVT#4. */
|
|
- v = apic_read(APIC_LVTPC);
|
|
- printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
|
|
- }
|
|
- v = apic_read(APIC_LVT0);
|
|
- printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
|
|
- v = apic_read(APIC_LVT1);
|
|
- printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
|
|
-
|
|
- if (maxlvt > 2) { /* ERR is LVT#3. */
|
|
- v = apic_read(APIC_LVTERR);
|
|
- printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
|
|
- }
|
|
-
|
|
- v = apic_read(APIC_TMICT);
|
|
- printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
|
|
- v = apic_read(APIC_TMCCT);
|
|
- printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
|
|
- v = apic_read(APIC_TDCR);
|
|
- printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
|
|
- printk("\n");
|
|
-}
|
|
-
|
|
-void print_all_local_APICs (void)
|
|
-{
|
|
- on_each_cpu(print_local_APIC, NULL, 1);
|
|
-}
|
|
-
|
|
-void __apicdebuginit print_PIC(void)
|
|
-{
|
|
- unsigned int v;
|
|
- unsigned long flags;
|
|
-
|
|
- if (apic_verbosity == APIC_QUIET)
|
|
- return;
|
|
-
|
|
- printk(KERN_DEBUG "\nprinting PIC contents\n");
|
|
-
|
|
- spin_lock_irqsave(&i8259A_lock, flags);
|
|
-
|
|
- v = inb(0xa1) << 8 | inb(0x21);
|
|
- printk(KERN_DEBUG "... PIC IMR: %04x\n", v);
|
|
-
|
|
- v = inb(0xa0) << 8 | inb(0x20);
|
|
- printk(KERN_DEBUG "... PIC IRR: %04x\n", v);
|
|
-
|
|
- outb(0x0b,0xa0);
|
|
- outb(0x0b,0x20);
|
|
- v = inb(0xa0) << 8 | inb(0x20);
|
|
- outb(0x0a,0xa0);
|
|
- outb(0x0a,0x20);
|
|
-
|
|
- spin_unlock_irqrestore(&i8259A_lock, flags);
|
|
-
|
|
- printk(KERN_DEBUG "... PIC ISR: %04x\n", v);
|
|
-
|
|
- v = inb(0x4d1) << 8 | inb(0x4d0);
|
|
- printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
|
|
-}
|
|
-#else
|
|
-void __apicdebuginit print_IO_APIC(void) {}
|
|
-#endif /* !CONFIG_XEN */
|
|
-
|
|
-void __init enable_IO_APIC(void)
|
|
-{
|
|
- union IO_APIC_reg_01 reg_01;
|
|
-#ifndef CONFIG_XEN
|
|
- int i8259_apic, i8259_pin;
|
|
-#endif
|
|
- int i, apic;
|
|
- unsigned long flags;
|
|
-
|
|
- for (i = 0; i < PIN_MAP_SIZE; i++) {
|
|
- irq_2_pin[i].pin = -1;
|
|
- irq_2_pin[i].next = 0;
|
|
- }
|
|
-
|
|
- /*
|
|
- * The number of IO-APIC IRQ registers (== #pins):
|
|
- */
|
|
- for (apic = 0; apic < nr_ioapics; apic++) {
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- reg_01.raw = io_apic_read(apic, 1);
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
- nr_ioapic_registers[apic] = reg_01.bits.entries+1;
|
|
- }
|
|
-#ifndef CONFIG_XEN
|
|
- for(apic = 0; apic < nr_ioapics; apic++) {
|
|
- int pin;
|
|
- /* See if any of the pins is in ExtINT mode */
|
|
- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
|
|
- struct IO_APIC_route_entry entry;
|
|
- entry = ioapic_read_entry(apic, pin);
|
|
-
|
|
- /* If the interrupt line is enabled and in ExtInt mode
|
|
- * I have found the pin where the i8259 is connected.
|
|
- */
|
|
- if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) {
|
|
- ioapic_i8259.apic = apic;
|
|
- ioapic_i8259.pin = pin;
|
|
- goto found_i8259;
|
|
- }
|
|
- }
|
|
- }
|
|
- found_i8259:
|
|
- /* Look to see what if the MP table has reported the ExtINT */
|
|
- i8259_pin = find_isa_irq_pin(0, mp_ExtINT);
|
|
- i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
|
|
- /* Trust the MP table if nothing is setup in the hardware */
|
|
- if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) {
|
|
- printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n");
|
|
- ioapic_i8259.pin = i8259_pin;
|
|
- ioapic_i8259.apic = i8259_apic;
|
|
- }
|
|
- /* Complain if the MP table and the hardware disagree */
|
|
- if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) &&
|
|
- (i8259_pin >= 0) && (ioapic_i8259.pin >= 0))
|
|
- {
|
|
- printk(KERN_WARNING "ExtINT in hardware and MP table differ\n");
|
|
- }
|
|
-#endif
|
|
-
|
|
- /*
|
|
- * Do not trust the IO-APIC being empty at bootup
|
|
- */
|
|
- clear_IO_APIC();
|
|
-}
|
|
-
|
|
-/*
|
|
- * Not an __init, needed by the reboot code
|
|
- */
|
|
-void disable_IO_APIC(void)
|
|
-{
|
|
- /*
|
|
- * Clear the IO-APIC before rebooting:
|
|
- */
|
|
- clear_IO_APIC();
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
- /*
|
|
- * If the i8259 is routed through an IOAPIC
|
|
- * Put that IOAPIC in virtual wire mode
|
|
- * so legacy interrupts can be delivered.
|
|
- */
|
|
- if (ioapic_i8259.pin != -1) {
|
|
- struct IO_APIC_route_entry entry;
|
|
-
|
|
- memset(&entry, 0, sizeof(entry));
|
|
- entry.mask = 0; /* Enabled */
|
|
- entry.trigger = 0; /* Edge */
|
|
- entry.irr = 0;
|
|
- entry.polarity = 0; /* High */
|
|
- entry.delivery_status = 0;
|
|
- entry.dest_mode = 0; /* Physical */
|
|
- entry.delivery_mode = dest_ExtINT; /* ExtInt */
|
|
- entry.vector = 0;
|
|
- entry.dest = GET_APIC_ID(read_apic_id());
|
|
-
|
|
- /*
|
|
- * Add it to the IO-APIC irq-routing table:
|
|
- */
|
|
- ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
|
|
- }
|
|
-
|
|
- disconnect_bsp_APIC(ioapic_i8259.pin != -1);
|
|
-#endif
|
|
-}
|
|
-
|
|
-/*
|
|
- * There is a nasty bug in some older SMP boards, their mptable lies
|
|
- * about the timer IRQ. We do the following to work around the situation:
|
|
- *
|
|
- * - timer IRQ defaults to IO-APIC IRQ
|
|
- * - if this function detects that timer IRQs are defunct, then we fall
|
|
- * back to ISA timer IRQs
|
|
- */
|
|
-#ifndef CONFIG_XEN
|
|
-static int __init timer_irq_works(void)
|
|
-{
|
|
- unsigned long t1 = jiffies;
|
|
- unsigned long flags;
|
|
-
|
|
- local_save_flags(flags);
|
|
- local_irq_enable();
|
|
- /* Let ten ticks pass... */
|
|
- mdelay((10 * 1000) / HZ);
|
|
- local_irq_restore(flags);
|
|
-
|
|
- /*
|
|
- * Expect a few ticks at least, to be sure some possible
|
|
- * glue logic does not lock up after one or two first
|
|
- * ticks in a non-ExtINT mode. Also the local APIC
|
|
- * might have cached one ExtINT interrupt. Finally, at
|
|
- * least one tick may be lost due to delays.
|
|
- */
|
|
-
|
|
- /* jiffies wrap? */
|
|
- if (time_after(jiffies, t1 + 4))
|
|
- return 1;
|
|
- return 0;
|
|
-}
|
|
-
|
|
-/*
|
|
- * In the SMP+IOAPIC case it might happen that there are an unspecified
|
|
- * number of pending IRQ events unhandled. These cases are very rare,
|
|
- * so we 'resend' these IRQs via IPIs, to the same CPU. It's much
|
|
- * better to do it this way as thus we do not have to be aware of
|
|
- * 'pending' interrupts in the IRQ path, except at this point.
|
|
- */
|
|
-/*
|
|
- * Edge triggered needs to resend any interrupt
|
|
- * that was delayed but this is now handled in the device
|
|
- * independent code.
|
|
- */
|
|
-
|
|
-/*
|
|
- * Starting up a edge-triggered IO-APIC interrupt is
|
|
- * nasty - we need to make sure that we get the edge.
|
|
- * If it is already asserted for some reason, we need
|
|
- * return 1 to indicate that is was pending.
|
|
- *
|
|
- * This is not complete - we should be able to fake
|
|
- * an edge even if it isn't on the 8259A...
|
|
- */
|
|
-
|
|
-static unsigned int startup_ioapic_irq(unsigned int irq)
|
|
-{
|
|
- int was_pending = 0;
|
|
- unsigned long flags;
|
|
-
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- if (irq < 16) {
|
|
- disable_8259A_irq(irq);
|
|
- if (i8259A_irq_pending(irq))
|
|
- was_pending = 1;
|
|
- }
|
|
- __unmask_IO_APIC_irq(irq);
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
-
|
|
- return was_pending;
|
|
-}
|
|
-
|
|
-static int ioapic_retrigger_irq(unsigned int irq)
|
|
-{
|
|
- struct irq_cfg *cfg = &irq_cfg[irq];
|
|
- unsigned long flags;
|
|
-
|
|
- spin_lock_irqsave(&vector_lock, flags);
|
|
- send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector);
|
|
- spin_unlock_irqrestore(&vector_lock, flags);
|
|
-
|
|
- return 1;
|
|
-}
|
|
-
|
|
-/*
|
|
- * Level and edge triggered IO-APIC interrupts need different handling,
|
|
- * so we use two separate IRQ descriptors. Edge triggered IRQs can be
|
|
- * handled with the level-triggered descriptor, but that one has slightly
|
|
- * more overhead. Level-triggered interrupts cannot be handled with the
|
|
- * edge-triggered handler, without risking IRQ storms and other ugly
|
|
- * races.
|
|
- */
|
|
-
|
|
-#ifdef CONFIG_SMP
|
|
-asmlinkage void smp_irq_move_cleanup_interrupt(void)
|
|
-{
|
|
- unsigned vector, me;
|
|
- ack_APIC_irq();
|
|
- exit_idle();
|
|
- irq_enter();
|
|
-
|
|
- me = smp_processor_id();
|
|
- for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
|
|
- unsigned int irq;
|
|
- struct irq_desc *desc;
|
|
- struct irq_cfg *cfg;
|
|
- irq = __get_cpu_var(vector_irq)[vector];
|
|
- if (irq >= NR_IRQS)
|
|
- continue;
|
|
-
|
|
- desc = irq_desc + irq;
|
|
- cfg = irq_cfg + irq;
|
|
- spin_lock(&desc->lock);
|
|
- if (!cfg->move_cleanup_count)
|
|
- goto unlock;
|
|
-
|
|
- if ((vector == cfg->vector) && cpu_isset(me, cfg->domain))
|
|
- goto unlock;
|
|
-
|
|
- __get_cpu_var(vector_irq)[vector] = -1;
|
|
- cfg->move_cleanup_count--;
|
|
-unlock:
|
|
- spin_unlock(&desc->lock);
|
|
- }
|
|
-
|
|
- irq_exit();
|
|
-}
|
|
-
|
|
-static void irq_complete_move(unsigned int irq)
|
|
-{
|
|
- struct irq_cfg *cfg = irq_cfg + irq;
|
|
- unsigned vector, me;
|
|
-
|
|
- if (likely(!cfg->move_in_progress))
|
|
- return;
|
|
-
|
|
- vector = ~get_irq_regs()->orig_ax;
|
|
- me = smp_processor_id();
|
|
- if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
|
|
- cpumask_t cleanup_mask;
|
|
-
|
|
- cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
|
|
- cfg->move_cleanup_count = cpus_weight(cleanup_mask);
|
|
- send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
|
|
- cfg->move_in_progress = 0;
|
|
- }
|
|
-}
|
|
-#else
|
|
-static inline void irq_complete_move(unsigned int irq) {}
|
|
-#endif
|
|
-
|
|
-static void ack_apic_edge(unsigned int irq)
|
|
-{
|
|
- irq_complete_move(irq);
|
|
- move_native_irq(irq);
|
|
- ack_APIC_irq();
|
|
-}
|
|
-
|
|
-static void ack_apic_level(unsigned int irq)
|
|
-{
|
|
- int do_unmask_irq = 0;
|
|
-
|
|
- irq_complete_move(irq);
|
|
-#ifdef CONFIG_GENERIC_PENDING_IRQ
|
|
- /* If we are moving the irq we need to mask it */
|
|
- if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) {
|
|
- do_unmask_irq = 1;
|
|
- mask_IO_APIC_irq(irq);
|
|
- }
|
|
-#endif
|
|
-
|
|
- /*
|
|
- * We must acknowledge the irq before we move it or the acknowledge will
|
|
- * not propagate properly.
|
|
- */
|
|
- ack_APIC_irq();
|
|
-
|
|
- /* Now we can move and renable the irq */
|
|
- if (unlikely(do_unmask_irq)) {
|
|
- /* Only migrate the irq if the ack has been received.
|
|
- *
|
|
- * On rare occasions the broadcast level triggered ack gets
|
|
- * delayed going to ioapics, and if we reprogram the
|
|
- * vector while Remote IRR is still set the irq will never
|
|
- * fire again.
|
|
- *
|
|
- * To prevent this scenario we read the Remote IRR bit
|
|
- * of the ioapic. This has two effects.
|
|
- * - On any sane system the read of the ioapic will
|
|
- * flush writes (and acks) going to the ioapic from
|
|
- * this cpu.
|
|
- * - We get to see if the ACK has actually been delivered.
|
|
- *
|
|
- * Based on failed experiments of reprogramming the
|
|
- * ioapic entry from outside of irq context starting
|
|
- * with masking the ioapic entry and then polling until
|
|
- * Remote IRR was clear before reprogramming the
|
|
- * ioapic I don't trust the Remote IRR bit to be
|
|
- * completey accurate.
|
|
- *
|
|
- * However there appears to be no other way to plug
|
|
- * this race, so if the Remote IRR bit is not
|
|
- * accurate and is causing problems then it is a hardware bug
|
|
- * and you can go talk to the chipset vendor about it.
|
|
- */
|
|
- if (!io_apic_level_ack_pending(irq))
|
|
- move_masked_irq(irq);
|
|
- unmask_IO_APIC_irq(irq);
|
|
- }
|
|
-}
|
|
-
|
|
-static struct irq_chip ioapic_chip __read_mostly = {
|
|
- .name = "IO-APIC",
|
|
- .startup = startup_ioapic_irq,
|
|
- .mask = mask_IO_APIC_irq,
|
|
- .unmask = unmask_IO_APIC_irq,
|
|
- .ack = ack_apic_edge,
|
|
- .eoi = ack_apic_level,
|
|
-#ifdef CONFIG_SMP
|
|
- .set_affinity = set_ioapic_affinity_irq,
|
|
-#endif
|
|
- .retrigger = ioapic_retrigger_irq,
|
|
-};
|
|
-#endif /* !CONFIG_XEN */
|
|
-
|
|
-static inline void init_IO_APIC_traps(void)
|
|
-{
|
|
- int irq;
|
|
-
|
|
- /*
|
|
- * NOTE! The local APIC isn't very good at handling
|
|
- * multiple interrupts at the same interrupt level.
|
|
- * As the interrupt level is determined by taking the
|
|
- * vector number and shifting that right by 4, we
|
|
- * want to spread these out a bit so that they don't
|
|
- * all fall in the same interrupt level.
|
|
- *
|
|
- * Also, we've got to be careful not to trash gate
|
|
- * 0x80, because int 0x80 is hm, kind of importantish. ;)
|
|
- */
|
|
- for (irq = 0; irq < NR_IRQS ; irq++) {
|
|
- if (IO_APIC_IRQ(irq) && !irq_cfg[irq].vector) {
|
|
- /*
|
|
- * Hmm.. We don't have an entry for this,
|
|
- * so default to an old-fashioned 8259
|
|
- * interrupt if we can..
|
|
- */
|
|
- if (irq < 16)
|
|
- make_8259A_irq(irq);
|
|
-#ifndef CONFIG_XEN
|
|
- else
|
|
- /* Strange. Oh, well.. */
|
|
- irq_desc[irq].chip = &no_irq_chip;
|
|
-#endif
|
|
- }
|
|
- }
|
|
-}
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
-static void unmask_lapic_irq(unsigned int irq)
|
|
-{
|
|
- unsigned long v;
|
|
-
|
|
- v = apic_read(APIC_LVT0);
|
|
- apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
|
|
-}
|
|
-
|
|
-static void mask_lapic_irq(unsigned int irq)
|
|
-{
|
|
- unsigned long v;
|
|
-
|
|
- v = apic_read(APIC_LVT0);
|
|
- apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
|
|
-}
|
|
-
|
|
-static void ack_lapic_irq (unsigned int irq)
|
|
-{
|
|
- ack_APIC_irq();
|
|
-}
|
|
-
|
|
-static struct irq_chip lapic_chip __read_mostly = {
|
|
- .name = "local-APIC",
|
|
- .mask = mask_lapic_irq,
|
|
- .unmask = unmask_lapic_irq,
|
|
- .ack = ack_lapic_irq,
|
|
-};
|
|
-
|
|
-static void lapic_register_intr(int irq)
|
|
-{
|
|
- irq_desc[irq].status &= ~IRQ_LEVEL;
|
|
- set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
|
|
- "edge");
|
|
-}
|
|
-
|
|
-static void __init setup_nmi(void)
|
|
-{
|
|
- /*
|
|
- * Dirty trick to enable the NMI watchdog ...
|
|
- * We put the 8259A master into AEOI mode and
|
|
- * unmask on all local APICs LVT0 as NMI.
|
|
- *
|
|
- * The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
|
|
- * is from Maciej W. Rozycki - so we do not have to EOI from
|
|
- * the NMI handler or the timer interrupt.
|
|
- */
|
|
- printk(KERN_INFO "activating NMI Watchdog ...");
|
|
-
|
|
- enable_NMI_through_LVT0();
|
|
-
|
|
- printk(" done.\n");
|
|
-}
|
|
-
|
|
-/*
|
|
- * This looks a bit hackish but it's about the only one way of sending
|
|
- * a few INTA cycles to 8259As and any associated glue logic. ICR does
|
|
- * not support the ExtINT mode, unfortunately. We need to send these
|
|
- * cycles as some i82489DX-based boards have glue logic that keeps the
|
|
- * 8259A interrupt line asserted until INTA. --macro
|
|
- */
|
|
-static inline void __init unlock_ExtINT_logic(void)
|
|
-{
|
|
- int apic, pin, i;
|
|
- struct IO_APIC_route_entry entry0, entry1;
|
|
- unsigned char save_control, save_freq_select;
|
|
-
|
|
- pin = find_isa_irq_pin(8, mp_INT);
|
|
- apic = find_isa_irq_apic(8, mp_INT);
|
|
- if (pin == -1)
|
|
- return;
|
|
-
|
|
- entry0 = ioapic_read_entry(apic, pin);
|
|
-
|
|
- clear_IO_APIC_pin(apic, pin);
|
|
-
|
|
- memset(&entry1, 0, sizeof(entry1));
|
|
-
|
|
- entry1.dest_mode = 0; /* physical delivery */
|
|
- entry1.mask = 0; /* unmask IRQ now */
|
|
- entry1.dest = hard_smp_processor_id();
|
|
- entry1.delivery_mode = dest_ExtINT;
|
|
- entry1.polarity = entry0.polarity;
|
|
- entry1.trigger = 0;
|
|
- entry1.vector = 0;
|
|
-
|
|
- ioapic_write_entry(apic, pin, entry1);
|
|
-
|
|
- save_control = CMOS_READ(RTC_CONTROL);
|
|
- save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
|
|
- CMOS_WRITE((save_freq_select & ~RTC_RATE_SELECT) | 0x6,
|
|
- RTC_FREQ_SELECT);
|
|
- CMOS_WRITE(save_control | RTC_PIE, RTC_CONTROL);
|
|
-
|
|
- i = 100;
|
|
- while (i-- > 0) {
|
|
- mdelay(10);
|
|
- if ((CMOS_READ(RTC_INTR_FLAGS) & RTC_PF) == RTC_PF)
|
|
- i -= 10;
|
|
- }
|
|
-
|
|
- CMOS_WRITE(save_control, RTC_CONTROL);
|
|
- CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
|
|
- clear_IO_APIC_pin(apic, pin);
|
|
-
|
|
- ioapic_write_entry(apic, pin, entry0);
|
|
-}
|
|
-
|
|
-/*
|
|
- * This code may look a bit paranoid, but it's supposed to cooperate with
|
|
- * a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
|
|
- * is so screwy. Thanks to Brian Perkins for testing/hacking this beast
|
|
- * fanatically on his truly buggy board.
|
|
- *
|
|
- * FIXME: really need to revamp this for modern platforms only.
|
|
- */
|
|
-static inline void __init check_timer(void)
|
|
-{
|
|
- struct irq_cfg *cfg = irq_cfg + 0;
|
|
- int apic1, pin1, apic2, pin2;
|
|
- unsigned long flags;
|
|
- int no_pin1 = 0;
|
|
-
|
|
- local_irq_save(flags);
|
|
-
|
|
- /*
|
|
- * get/set the timer IRQ vector:
|
|
- */
|
|
- disable_8259A_irq(0);
|
|
- assign_irq_vector(0, TARGET_CPUS);
|
|
-
|
|
- /*
|
|
- * As IRQ0 is to be enabled in the 8259A, the virtual
|
|
- * wire has to be disabled in the local APIC.
|
|
- */
|
|
- apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
|
|
- init_8259A(1);
|
|
-
|
|
- pin1 = find_isa_irq_pin(0, mp_INT);
|
|
- apic1 = find_isa_irq_apic(0, mp_INT);
|
|
- pin2 = ioapic_i8259.pin;
|
|
- apic2 = ioapic_i8259.apic;
|
|
-
|
|
- apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X "
|
|
- "apic1=%d pin1=%d apic2=%d pin2=%d\n",
|
|
- cfg->vector, apic1, pin1, apic2, pin2);
|
|
-
|
|
- /*
|
|
- * Some BIOS writers are clueless and report the ExtINTA
|
|
- * I/O APIC input from the cascaded 8259A as the timer
|
|
- * interrupt input. So just in case, if only one pin
|
|
- * was found above, try it both directly and through the
|
|
- * 8259A.
|
|
- */
|
|
- if (pin1 == -1) {
|
|
- pin1 = pin2;
|
|
- apic1 = apic2;
|
|
- no_pin1 = 1;
|
|
- } else if (pin2 == -1) {
|
|
- pin2 = pin1;
|
|
- apic2 = apic1;
|
|
- }
|
|
-
|
|
- if (pin1 != -1) {
|
|
- /*
|
|
- * Ok, does IRQ0 through the IOAPIC work?
|
|
- */
|
|
- if (no_pin1) {
|
|
- add_pin_to_irq(0, apic1, pin1);
|
|
- setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
|
|
- }
|
|
- unmask_IO_APIC_irq(0);
|
|
- if (!no_timer_check && timer_irq_works()) {
|
|
- if (nmi_watchdog == NMI_IO_APIC) {
|
|
- setup_nmi();
|
|
- enable_8259A_irq(0);
|
|
- }
|
|
- if (disable_timer_pin_1 > 0)
|
|
- clear_IO_APIC_pin(0, pin1);
|
|
- goto out;
|
|
- }
|
|
- clear_IO_APIC_pin(apic1, pin1);
|
|
- if (!no_pin1)
|
|
- apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
|
|
- "8254 timer not connected to IO-APIC\n");
|
|
-
|
|
- apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer "
|
|
- "(IRQ0) through the 8259A ...\n");
|
|
- apic_printk(APIC_QUIET, KERN_INFO
|
|
- "..... (found apic %d pin %d) ...\n", apic2, pin2);
|
|
- /*
|
|
- * legacy devices should be connected to IO APIC #0
|
|
- */
|
|
- replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
|
|
- setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
|
|
- unmask_IO_APIC_irq(0);
|
|
- enable_8259A_irq(0);
|
|
- if (timer_irq_works()) {
|
|
- apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
|
|
- timer_through_8259 = 1;
|
|
- if (nmi_watchdog == NMI_IO_APIC) {
|
|
- disable_8259A_irq(0);
|
|
- setup_nmi();
|
|
- enable_8259A_irq(0);
|
|
- }
|
|
- goto out;
|
|
- }
|
|
- /*
|
|
- * Cleanup, just in case ...
|
|
- */
|
|
- disable_8259A_irq(0);
|
|
- clear_IO_APIC_pin(apic2, pin2);
|
|
- apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
|
|
- }
|
|
-
|
|
- if (nmi_watchdog == NMI_IO_APIC) {
|
|
- apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work "
|
|
- "through the IO-APIC - disabling NMI Watchdog!\n");
|
|
- nmi_watchdog = NMI_NONE;
|
|
- }
|
|
-
|
|
- apic_printk(APIC_QUIET, KERN_INFO
|
|
- "...trying to set up timer as Virtual Wire IRQ...\n");
|
|
-
|
|
- lapic_register_intr(0);
|
|
- apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */
|
|
- enable_8259A_irq(0);
|
|
-
|
|
- if (timer_irq_works()) {
|
|
- apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
|
|
- goto out;
|
|
- }
|
|
- disable_8259A_irq(0);
|
|
- apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
|
|
- apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
|
|
-
|
|
- apic_printk(APIC_QUIET, KERN_INFO
|
|
- "...trying to set up timer as ExtINT IRQ...\n");
|
|
-
|
|
- init_8259A(0);
|
|
- make_8259A_irq(0);
|
|
- apic_write(APIC_LVT0, APIC_DM_EXTINT);
|
|
-
|
|
- unlock_ExtINT_logic();
|
|
-
|
|
- if (timer_irq_works()) {
|
|
- apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
|
|
- goto out;
|
|
- }
|
|
- apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
|
|
- panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a "
|
|
- "report. Then try booting with the 'noapic' option.\n");
|
|
-out:
|
|
- local_irq_restore(flags);
|
|
-}
|
|
-#else
|
|
-#define check_timer() ((void)0)
|
|
-int timer_uses_ioapic_pin_0 = 0;
|
|
-#endif /* !CONFIG_XEN */
|
|
-
|
|
-static int __init notimercheck(char *s)
|
|
-{
|
|
- no_timer_check = 1;
|
|
- return 1;
|
|
-}
|
|
-__setup("no_timer_check", notimercheck);
|
|
-
|
|
-/*
|
|
- *
|
|
- * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
|
|
- * to devices. However there may be an I/O APIC pin available for
|
|
- * this interrupt regardless. The pin may be left unconnected, but
|
|
- * typically it will be reused as an ExtINT cascade interrupt for
|
|
- * the master 8259A. In the MPS case such a pin will normally be
|
|
- * reported as an ExtINT interrupt in the MP table. With ACPI
|
|
- * there is no provision for ExtINT interrupts, and in the absence
|
|
- * of an override it would be treated as an ordinary ISA I/O APIC
|
|
- * interrupt, that is edge-triggered and unmasked by default. We
|
|
- * used to do this, but it caused problems on some systems because
|
|
- * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using
|
|
- * the same ExtINT cascade interrupt to drive the local APIC of the
|
|
- * bootstrap processor. Therefore we refrain from routing IRQ2 to
|
|
- * the I/O APIC in all cases now. No actual device should request
|
|
- * it anyway. --macro
|
|
- */
|
|
-#define PIC_IRQS (1<<2)
|
|
-
|
|
-void __init setup_IO_APIC(void)
|
|
-{
|
|
- enable_IO_APIC();
|
|
-
|
|
- io_apic_irqs = ~PIC_IRQS;
|
|
-
|
|
- apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
- sync_Arb_IDs();
|
|
-#endif /* !CONFIG_XEN */
|
|
- setup_IO_APIC_irqs();
|
|
- init_IO_APIC_traps();
|
|
- check_timer();
|
|
- if (!acpi_ioapic)
|
|
- print_IO_APIC();
|
|
-}
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
-
|
|
-struct sysfs_ioapic_data {
|
|
- struct sys_device dev;
|
|
- struct IO_APIC_route_entry entry[0];
|
|
-};
|
|
-static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
|
|
-
|
|
-static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
|
|
-{
|
|
- struct IO_APIC_route_entry *entry;
|
|
- struct sysfs_ioapic_data *data;
|
|
- int i;
|
|
-
|
|
- data = container_of(dev, struct sysfs_ioapic_data, dev);
|
|
- entry = data->entry;
|
|
- for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ )
|
|
- *entry = ioapic_read_entry(dev->id, i);
|
|
-
|
|
- return 0;
|
|
-}
|
|
-
|
|
-static int ioapic_resume(struct sys_device *dev)
|
|
-{
|
|
- struct IO_APIC_route_entry *entry;
|
|
- struct sysfs_ioapic_data *data;
|
|
- unsigned long flags;
|
|
- union IO_APIC_reg_00 reg_00;
|
|
- int i;
|
|
-
|
|
- data = container_of(dev, struct sysfs_ioapic_data, dev);
|
|
- entry = data->entry;
|
|
-
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- reg_00.raw = io_apic_read(dev->id, 0);
|
|
- if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) {
|
|
- reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid;
|
|
- io_apic_write(dev->id, 0, reg_00.raw);
|
|
- }
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
- for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
|
|
- ioapic_write_entry(dev->id, i, entry[i]);
|
|
-
|
|
- return 0;
|
|
-}
|
|
-
|
|
-static struct sysdev_class ioapic_sysdev_class = {
|
|
- .name = "ioapic",
|
|
- .suspend = ioapic_suspend,
|
|
- .resume = ioapic_resume,
|
|
-};
|
|
-
|
|
-static int __init ioapic_init_sysfs(void)
|
|
-{
|
|
- struct sys_device * dev;
|
|
- int i, size, error;
|
|
-
|
|
- error = sysdev_class_register(&ioapic_sysdev_class);
|
|
- if (error)
|
|
- return error;
|
|
-
|
|
- for (i = 0; i < nr_ioapics; i++ ) {
|
|
- size = sizeof(struct sys_device) + nr_ioapic_registers[i]
|
|
- * sizeof(struct IO_APIC_route_entry);
|
|
- mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL);
|
|
- if (!mp_ioapic_data[i]) {
|
|
- printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
|
|
- continue;
|
|
- }
|
|
- dev = &mp_ioapic_data[i]->dev;
|
|
- dev->id = i;
|
|
- dev->cls = &ioapic_sysdev_class;
|
|
- error = sysdev_register(dev);
|
|
- if (error) {
|
|
- kfree(mp_ioapic_data[i]);
|
|
- mp_ioapic_data[i] = NULL;
|
|
- printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
|
|
- continue;
|
|
- }
|
|
- }
|
|
-
|
|
- return 0;
|
|
-}
|
|
-
|
|
-device_initcall(ioapic_init_sysfs);
|
|
-
|
|
-/*
|
|
- * Dynamic irq allocate and deallocation
|
|
- */
|
|
-int create_irq(void)
|
|
-{
|
|
- /* Allocate an unused irq */
|
|
- int irq;
|
|
- int new;
|
|
- unsigned long flags;
|
|
-
|
|
- irq = -ENOSPC;
|
|
- spin_lock_irqsave(&vector_lock, flags);
|
|
- for (new = (NR_IRQS - 1); new >= 0; new--) {
|
|
- if (platform_legacy_irq(new))
|
|
- continue;
|
|
- if (irq_cfg[new].vector != 0)
|
|
- continue;
|
|
- if (__assign_irq_vector(new, TARGET_CPUS) == 0)
|
|
- irq = new;
|
|
- break;
|
|
- }
|
|
- spin_unlock_irqrestore(&vector_lock, flags);
|
|
-
|
|
- if (irq >= 0) {
|
|
- dynamic_irq_init(irq);
|
|
- }
|
|
- return irq;
|
|
-}
|
|
-
|
|
-void destroy_irq(unsigned int irq)
|
|
-{
|
|
- unsigned long flags;
|
|
-
|
|
- dynamic_irq_cleanup(irq);
|
|
-
|
|
- spin_lock_irqsave(&vector_lock, flags);
|
|
- __clear_irq_vector(irq);
|
|
- spin_unlock_irqrestore(&vector_lock, flags);
|
|
-}
|
|
-
|
|
-#endif /* CONFIG_XEN */
|
|
-
|
|
-/*
|
|
- * MSI message composition
|
|
- */
|
|
-#if defined(CONFIG_PCI_MSI) && !defined(CONFIG_XEN)
|
|
-static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
|
|
-{
|
|
- struct irq_cfg *cfg = irq_cfg + irq;
|
|
- int err;
|
|
- unsigned dest;
|
|
- cpumask_t tmp;
|
|
-
|
|
- tmp = TARGET_CPUS;
|
|
- err = assign_irq_vector(irq, tmp);
|
|
- if (!err) {
|
|
- cpus_and(tmp, cfg->domain, tmp);
|
|
- dest = cpu_mask_to_apicid(tmp);
|
|
-
|
|
- msg->address_hi = MSI_ADDR_BASE_HI;
|
|
- msg->address_lo =
|
|
- MSI_ADDR_BASE_LO |
|
|
- ((INT_DEST_MODE == 0) ?
|
|
- MSI_ADDR_DEST_MODE_PHYSICAL:
|
|
- MSI_ADDR_DEST_MODE_LOGICAL) |
|
|
- ((INT_DELIVERY_MODE != dest_LowestPrio) ?
|
|
- MSI_ADDR_REDIRECTION_CPU:
|
|
- MSI_ADDR_REDIRECTION_LOWPRI) |
|
|
- MSI_ADDR_DEST_ID(dest);
|
|
-
|
|
- msg->data =
|
|
- MSI_DATA_TRIGGER_EDGE |
|
|
- MSI_DATA_LEVEL_ASSERT |
|
|
- ((INT_DELIVERY_MODE != dest_LowestPrio) ?
|
|
- MSI_DATA_DELIVERY_FIXED:
|
|
- MSI_DATA_DELIVERY_LOWPRI) |
|
|
- MSI_DATA_VECTOR(cfg->vector);
|
|
- }
|
|
- return err;
|
|
-}
|
|
-
|
|
-#ifdef CONFIG_SMP
|
|
-static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
|
|
-{
|
|
- struct irq_cfg *cfg = irq_cfg + irq;
|
|
- struct msi_msg msg;
|
|
- unsigned int dest;
|
|
- cpumask_t tmp;
|
|
-
|
|
- cpus_and(tmp, mask, cpu_online_map);
|
|
- if (cpus_empty(tmp))
|
|
- return;
|
|
-
|
|
- if (assign_irq_vector(irq, mask))
|
|
- return;
|
|
-
|
|
- cpus_and(tmp, cfg->domain, mask);
|
|
- dest = cpu_mask_to_apicid(tmp);
|
|
-
|
|
- read_msi_msg(irq, &msg);
|
|
-
|
|
- msg.data &= ~MSI_DATA_VECTOR_MASK;
|
|
- msg.data |= MSI_DATA_VECTOR(cfg->vector);
|
|
- msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
|
|
- msg.address_lo |= MSI_ADDR_DEST_ID(dest);
|
|
-
|
|
- write_msi_msg(irq, &msg);
|
|
- irq_desc[irq].affinity = mask;
|
|
-}
|
|
-#endif /* CONFIG_SMP */
|
|
-
|
|
-/*
|
|
- * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
|
|
- * which implement the MSI or MSI-X Capability Structure.
|
|
- */
|
|
-static struct irq_chip msi_chip = {
|
|
- .name = "PCI-MSI",
|
|
- .unmask = unmask_msi_irq,
|
|
- .mask = mask_msi_irq,
|
|
- .ack = ack_apic_edge,
|
|
-#ifdef CONFIG_SMP
|
|
- .set_affinity = set_msi_irq_affinity,
|
|
-#endif
|
|
- .retrigger = ioapic_retrigger_irq,
|
|
-};
|
|
-
|
|
-int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
|
|
-{
|
|
- struct msi_msg msg;
|
|
- int irq, ret;
|
|
- irq = create_irq();
|
|
- if (irq < 0)
|
|
- return irq;
|
|
-
|
|
- ret = msi_compose_msg(dev, irq, &msg);
|
|
- if (ret < 0) {
|
|
- destroy_irq(irq);
|
|
- return ret;
|
|
- }
|
|
-
|
|
- set_irq_msi(irq, desc);
|
|
- write_msi_msg(irq, &msg);
|
|
-
|
|
- set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
|
|
-
|
|
- return 0;
|
|
-}
|
|
-
|
|
-void arch_teardown_msi_irq(unsigned int irq)
|
|
-{
|
|
- destroy_irq(irq);
|
|
-}
|
|
-
|
|
-#ifdef CONFIG_DMAR
|
|
-#ifdef CONFIG_SMP
|
|
-static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask)
|
|
-{
|
|
- struct irq_cfg *cfg = irq_cfg + irq;
|
|
- struct msi_msg msg;
|
|
- unsigned int dest;
|
|
- cpumask_t tmp;
|
|
-
|
|
- cpus_and(tmp, mask, cpu_online_map);
|
|
- if (cpus_empty(tmp))
|
|
- return;
|
|
-
|
|
- if (assign_irq_vector(irq, mask))
|
|
- return;
|
|
-
|
|
- cpus_and(tmp, cfg->domain, mask);
|
|
- dest = cpu_mask_to_apicid(tmp);
|
|
-
|
|
- dmar_msi_read(irq, &msg);
|
|
-
|
|
- msg.data &= ~MSI_DATA_VECTOR_MASK;
|
|
- msg.data |= MSI_DATA_VECTOR(cfg->vector);
|
|
- msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
|
|
- msg.address_lo |= MSI_ADDR_DEST_ID(dest);
|
|
-
|
|
- dmar_msi_write(irq, &msg);
|
|
- irq_desc[irq].affinity = mask;
|
|
-}
|
|
-#endif /* CONFIG_SMP */
|
|
-
|
|
-struct irq_chip dmar_msi_type = {
|
|
- .name = "DMAR_MSI",
|
|
- .unmask = dmar_msi_unmask,
|
|
- .mask = dmar_msi_mask,
|
|
- .ack = ack_apic_edge,
|
|
-#ifdef CONFIG_SMP
|
|
- .set_affinity = dmar_msi_set_affinity,
|
|
-#endif
|
|
- .retrigger = ioapic_retrigger_irq,
|
|
-};
|
|
-
|
|
-int arch_setup_dmar_msi(unsigned int irq)
|
|
-{
|
|
- int ret;
|
|
- struct msi_msg msg;
|
|
-
|
|
- ret = msi_compose_msg(NULL, irq, &msg);
|
|
- if (ret < 0)
|
|
- return ret;
|
|
- dmar_msi_write(irq, &msg);
|
|
- set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
|
|
- "edge");
|
|
- return 0;
|
|
-}
|
|
-#endif
|
|
-
|
|
-#endif /* CONFIG_PCI_MSI */
|
|
-/*
|
|
- * Hypertransport interrupt support
|
|
- */
|
|
-#ifdef CONFIG_HT_IRQ
|
|
-
|
|
-#ifdef CONFIG_SMP
|
|
-
|
|
-static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
|
|
-{
|
|
- struct ht_irq_msg msg;
|
|
- fetch_ht_irq_msg(irq, &msg);
|
|
-
|
|
- msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
|
|
- msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
|
|
-
|
|
- msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
|
|
- msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
|
|
-
|
|
- write_ht_irq_msg(irq, &msg);
|
|
-}
|
|
-
|
|
-static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
|
|
-{
|
|
- struct irq_cfg *cfg = irq_cfg + irq;
|
|
- unsigned int dest;
|
|
- cpumask_t tmp;
|
|
-
|
|
- cpus_and(tmp, mask, cpu_online_map);
|
|
- if (cpus_empty(tmp))
|
|
- return;
|
|
-
|
|
- if (assign_irq_vector(irq, mask))
|
|
- return;
|
|
-
|
|
- cpus_and(tmp, cfg->domain, mask);
|
|
- dest = cpu_mask_to_apicid(tmp);
|
|
-
|
|
- target_ht_irq(irq, dest, cfg->vector);
|
|
- irq_desc[irq].affinity = mask;
|
|
-}
|
|
-#endif
|
|
-
|
|
-static struct irq_chip ht_irq_chip = {
|
|
- .name = "PCI-HT",
|
|
- .mask = mask_ht_irq,
|
|
- .unmask = unmask_ht_irq,
|
|
- .ack = ack_apic_edge,
|
|
-#ifdef CONFIG_SMP
|
|
- .set_affinity = set_ht_irq_affinity,
|
|
-#endif
|
|
- .retrigger = ioapic_retrigger_irq,
|
|
-};
|
|
-
|
|
-int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
|
|
-{
|
|
- struct irq_cfg *cfg = irq_cfg + irq;
|
|
- int err;
|
|
- cpumask_t tmp;
|
|
-
|
|
- tmp = TARGET_CPUS;
|
|
- err = assign_irq_vector(irq, tmp);
|
|
- if (!err) {
|
|
- struct ht_irq_msg msg;
|
|
- unsigned dest;
|
|
-
|
|
- cpus_and(tmp, cfg->domain, tmp);
|
|
- dest = cpu_mask_to_apicid(tmp);
|
|
-
|
|
- msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
|
|
-
|
|
- msg.address_lo =
|
|
- HT_IRQ_LOW_BASE |
|
|
- HT_IRQ_LOW_DEST_ID(dest) |
|
|
- HT_IRQ_LOW_VECTOR(cfg->vector) |
|
|
- ((INT_DEST_MODE == 0) ?
|
|
- HT_IRQ_LOW_DM_PHYSICAL :
|
|
- HT_IRQ_LOW_DM_LOGICAL) |
|
|
- HT_IRQ_LOW_RQEOI_EDGE |
|
|
- ((INT_DELIVERY_MODE != dest_LowestPrio) ?
|
|
- HT_IRQ_LOW_MT_FIXED :
|
|
- HT_IRQ_LOW_MT_ARBITRATED) |
|
|
- HT_IRQ_LOW_IRQ_MASKED;
|
|
-
|
|
- write_ht_irq_msg(irq, &msg);
|
|
-
|
|
- set_irq_chip_and_handler_name(irq, &ht_irq_chip,
|
|
- handle_edge_irq, "edge");
|
|
- }
|
|
- return err;
|
|
-}
|
|
-#endif /* CONFIG_HT_IRQ */
|
|
-
|
|
-/* --------------------------------------------------------------------------
|
|
- ACPI-based IOAPIC Configuration
|
|
- -------------------------------------------------------------------------- */
|
|
-
|
|
-#ifdef CONFIG_ACPI
|
|
-
|
|
-#define IO_APIC_MAX_ID 0xFE
|
|
-
|
|
-int __init io_apic_get_redir_entries (int ioapic)
|
|
-{
|
|
- union IO_APIC_reg_01 reg_01;
|
|
- unsigned long flags;
|
|
-
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- reg_01.raw = io_apic_read(ioapic, 1);
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
-
|
|
- return reg_01.bits.entries;
|
|
-}
|
|
-
|
|
-
|
|
-int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity)
|
|
-{
|
|
- if (!IO_APIC_IRQ(irq)) {
|
|
- apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
|
|
- ioapic);
|
|
- return -EINVAL;
|
|
- }
|
|
-
|
|
- /*
|
|
- * IRQs < 16 are already in the irq_2_pin[] map
|
|
- */
|
|
- if (irq >= 16)
|
|
- add_pin_to_irq(irq, ioapic, pin);
|
|
-
|
|
- setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity);
|
|
-
|
|
- return 0;
|
|
-}
|
|
-
|
|
-
|
|
-int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
|
|
-{
|
|
- int i;
|
|
-
|
|
- if (skip_ioapic_setup)
|
|
- return -1;
|
|
-
|
|
- for (i = 0; i < mp_irq_entries; i++)
|
|
- if (mp_irqs[i].mp_irqtype == mp_INT &&
|
|
- mp_irqs[i].mp_srcbusirq == bus_irq)
|
|
- break;
|
|
- if (i >= mp_irq_entries)
|
|
- return -1;
|
|
-
|
|
- *trigger = irq_trigger(i);
|
|
- *polarity = irq_polarity(i);
|
|
- return 0;
|
|
-}
|
|
-
|
|
-#endif /* CONFIG_ACPI */
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
-/*
|
|
- * This function currently is only a helper for the i386 smp boot process where
|
|
- * we need to reprogram the ioredtbls to cater for the cpus which have come online
|
|
- * so mask in all cases should simply be TARGET_CPUS
|
|
- */
|
|
-#ifdef CONFIG_SMP
|
|
-void __init setup_ioapic_dest(void)
|
|
-{
|
|
- int pin, ioapic, irq, irq_entry;
|
|
-
|
|
- if (skip_ioapic_setup == 1)
|
|
- return;
|
|
-
|
|
- for (ioapic = 0; ioapic < nr_ioapics; ioapic++) {
|
|
- for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
|
|
- irq_entry = find_irq_entry(ioapic, pin, mp_INT);
|
|
- if (irq_entry == -1)
|
|
- continue;
|
|
- irq = pin_2_irq(irq_entry, ioapic, pin);
|
|
-
|
|
- /* setup_IO_APIC_irqs could fail to get vector for some device
|
|
- * when you have too many devices, because at that time only boot
|
|
- * cpu is online.
|
|
- */
|
|
- if (!irq_cfg[irq].vector)
|
|
- setup_IO_APIC_irq(ioapic, pin, irq,
|
|
- irq_trigger(irq_entry),
|
|
- irq_polarity(irq_entry));
|
|
- else
|
|
- set_ioapic_affinity_irq(irq, TARGET_CPUS);
|
|
- }
|
|
-
|
|
- }
|
|
-}
|
|
-#endif
|
|
-
|
|
-#define IOAPIC_RESOURCE_NAME_SIZE 11
|
|
-
|
|
-static struct resource *ioapic_resources;
|
|
-
|
|
-static struct resource * __init ioapic_setup_resources(void)
|
|
-{
|
|
- unsigned long n;
|
|
- struct resource *res;
|
|
- char *mem;
|
|
- int i;
|
|
-
|
|
- if (nr_ioapics <= 0)
|
|
- return NULL;
|
|
-
|
|
- n = IOAPIC_RESOURCE_NAME_SIZE + sizeof(struct resource);
|
|
- n *= nr_ioapics;
|
|
-
|
|
- mem = alloc_bootmem(n);
|
|
- res = (void *)mem;
|
|
-
|
|
- if (mem != NULL) {
|
|
- mem += sizeof(struct resource) * nr_ioapics;
|
|
-
|
|
- for (i = 0; i < nr_ioapics; i++) {
|
|
- res[i].name = mem;
|
|
- res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
|
|
- sprintf(mem, "IOAPIC %u", i);
|
|
- mem += IOAPIC_RESOURCE_NAME_SIZE;
|
|
- }
|
|
- }
|
|
-
|
|
- ioapic_resources = res;
|
|
-
|
|
- return res;
|
|
-}
|
|
-
|
|
-void __init ioapic_init_mappings(void)
|
|
-{
|
|
- unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
|
|
- struct resource *ioapic_res;
|
|
- int i;
|
|
-
|
|
- ioapic_res = ioapic_setup_resources();
|
|
- for (i = 0; i < nr_ioapics; i++) {
|
|
- if (smp_found_config) {
|
|
- ioapic_phys = mp_ioapics[i].mp_apicaddr;
|
|
- } else {
|
|
- ioapic_phys = (unsigned long)
|
|
- alloc_bootmem_pages(PAGE_SIZE);
|
|
- ioapic_phys = __pa(ioapic_phys);
|
|
- }
|
|
- set_fixmap_nocache(idx, ioapic_phys);
|
|
- apic_printk(APIC_VERBOSE,
|
|
- "mapped IOAPIC to %016lx (%016lx)\n",
|
|
- __fix_to_virt(idx), ioapic_phys);
|
|
- idx++;
|
|
-
|
|
- if (ioapic_res != NULL) {
|
|
- ioapic_res->start = ioapic_phys;
|
|
- ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
|
|
- ioapic_res++;
|
|
- }
|
|
- }
|
|
-}
|
|
-
|
|
-static int __init ioapic_insert_resources(void)
|
|
-{
|
|
- int i;
|
|
- struct resource *r = ioapic_resources;
|
|
-
|
|
- if (!r) {
|
|
- printk(KERN_ERR
|
|
- "IO APIC resources could be not be allocated.\n");
|
|
- return -1;
|
|
- }
|
|
-
|
|
- for (i = 0; i < nr_ioapics; i++) {
|
|
- insert_resource(&iomem_resource, r);
|
|
- r++;
|
|
- }
|
|
-
|
|
- return 0;
|
|
-}
|
|
-
|
|
-/* Insert the IO APIC resources after PCI initialization has occured to handle
|
|
- * IO APICS that are mapped in on a BAR in PCI space. */
|
|
-late_initcall(ioapic_insert_resources);
|
|
-#endif /* !CONFIG_XEN */
|
|
--- head-2011-03-17.orig/arch/x86/kernel/ioport-xen.c 2011-01-31 18:01:51.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/ioport-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -14,6 +14,7 @@
|
|
#include <linux/slab.h>
|
|
#include <linux/thread_info.h>
|
|
#include <linux/syscalls.h>
|
|
+#include <asm/syscalls.h>
|
|
#include <xen/interface/physdev.h>
|
|
|
|
/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
|
|
--- head-2011-03-17.orig/arch/x86/kernel/apic/ipi-xen.c 2011-02-21 13:56:33.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/apic/ipi-xen.c 2011-02-21 13:56:51.000000000 +0100
|
|
@@ -57,7 +57,4 @@ void send_IPI_mask_sequence(cpumask_t ma
|
|
send_IPI_mask_bitmask(mask, vector);
|
|
}
|
|
|
|
-/* must come after the send_IPI functions above for inlining */
|
|
-#include <mach_ipi.h>
|
|
-
|
|
#endif
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ head-2011-03-17/arch/x86/kernel/irq-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -0,0 +1,200 @@
|
|
+/*
|
|
+ * Common interrupt code for 32 and 64 bit
|
|
+ */
|
|
+#include <linux/cpu.h>
|
|
+#include <linux/interrupt.h>
|
|
+#include <linux/kernel_stat.h>
|
|
+#include <linux/seq_file.h>
|
|
+
|
|
+#include <asm/apic.h>
|
|
+#include <asm/io_apic.h>
|
|
+#include <asm/smp.h>
|
|
+
|
|
+atomic_t irq_err_count;
|
|
+
|
|
+/*
|
|
+ * 'what should we do if we get a hw irq event on an illegal vector'.
|
|
+ * each architecture has to answer this themselves.
|
|
+ */
|
|
+void ack_bad_irq(unsigned int irq)
|
|
+{
|
|
+ printk(KERN_ERR "unexpected IRQ trap at irq %02x\n", irq);
|
|
+
|
|
+#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_XEN)
|
|
+ /*
|
|
+ * Currently unexpected vectors happen only on SMP and APIC.
|
|
+ * We _must_ ack these because every local APIC has only N
|
|
+ * irq slots per priority level, and a 'hanging, unacked' IRQ
|
|
+ * holds up an irq slot - in excessive cases (when multiple
|
|
+ * unexpected vectors occur) that might lock up the APIC
|
|
+ * completely.
|
|
+ * But only ack when the APIC is enabled -AK
|
|
+ */
|
|
+ if (cpu_has_apic)
|
|
+ ack_APIC_irq();
|
|
+#endif
|
|
+}
|
|
+
|
|
+#ifdef CONFIG_X86_32
|
|
+# define irq_stats(x) (&per_cpu(irq_stat, x))
|
|
+#else
|
|
+# define irq_stats(x) cpu_pda(x)
|
|
+#endif
|
|
+/*
|
|
+ * /proc/interrupts printing:
|
|
+ */
|
|
+static int show_other_interrupts(struct seq_file *p)
|
|
+{
|
|
+ int j;
|
|
+
|
|
+ seq_printf(p, "NMI: ");
|
|
+ for_each_online_cpu(j)
|
|
+ seq_printf(p, "%10u ", irq_stats(j)->__nmi_count);
|
|
+ seq_printf(p, " Non-maskable interrupts\n");
|
|
+#ifdef CONFIG_X86_LOCAL_APIC
|
|
+ seq_printf(p, "LOC: ");
|
|
+ for_each_online_cpu(j)
|
|
+ seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs);
|
|
+ seq_printf(p, " Local timer interrupts\n");
|
|
+#endif
|
|
+#ifdef CONFIG_SMP
|
|
+ seq_printf(p, "RES: ");
|
|
+ for_each_online_cpu(j)
|
|
+ seq_printf(p, "%10u ", irq_stats(j)->irq_resched_count);
|
|
+ seq_printf(p, " Rescheduling interrupts\n");
|
|
+ seq_printf(p, "CAL: ");
|
|
+ for_each_online_cpu(j)
|
|
+ seq_printf(p, "%10u ", irq_stats(j)->irq_call_count);
|
|
+ seq_printf(p, " Function call interrupts\n");
|
|
+#ifndef CONFIG_XEN
|
|
+ seq_printf(p, "TLB: ");
|
|
+ for_each_online_cpu(j)
|
|
+ seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count);
|
|
+ seq_printf(p, " TLB shootdowns\n");
|
|
+#else
|
|
+ seq_printf(p, "LCK: ");
|
|
+ for_each_online_cpu(j)
|
|
+ seq_printf(p, "%10u ", irq_stats(j)->irq_lock_count);
|
|
+ seq_printf(p, " Spinlock wakeups\n");
|
|
+#endif
|
|
+#endif
|
|
+#ifdef CONFIG_X86_MCE
|
|
+ seq_printf(p, "TRM: ");
|
|
+ for_each_online_cpu(j)
|
|
+ seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count);
|
|
+ seq_printf(p, " Thermal event interrupts\n");
|
|
+# ifdef CONFIG_X86_64
|
|
+ seq_printf(p, "THR: ");
|
|
+ for_each_online_cpu(j)
|
|
+ seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
|
|
+ seq_printf(p, " Threshold APIC interrupts\n");
|
|
+# endif
|
|
+#endif
|
|
+#ifdef CONFIG_X86_LOCAL_APIC
|
|
+ seq_printf(p, "SPU: ");
|
|
+ for_each_online_cpu(j)
|
|
+ seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
|
|
+ seq_printf(p, " Spurious interrupts\n");
|
|
+#endif
|
|
+ seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
|
|
+#if defined(CONFIG_X86_IO_APIC)
|
|
+ seq_printf(p, "MIS: %10u\n", atomic_read(&irq_mis_count));
|
|
+#endif
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+int show_interrupts(struct seq_file *p, void *v)
|
|
+{
|
|
+ unsigned long flags, any_count = 0;
|
|
+ int i = *(loff_t *) v, j;
|
|
+ struct irqaction *action;
|
|
+ struct irq_desc *desc;
|
|
+
|
|
+ if (i > nr_irqs)
|
|
+ return 0;
|
|
+
|
|
+ if (i == nr_irqs)
|
|
+ return show_other_interrupts(p);
|
|
+
|
|
+ /* print header */
|
|
+ if (i == 0) {
|
|
+ seq_printf(p, " ");
|
|
+ for_each_online_cpu(j)
|
|
+ seq_printf(p, "CPU%-8d", j);
|
|
+ seq_putc(p, '\n');
|
|
+ }
|
|
+
|
|
+ desc = irq_to_desc(i);
|
|
+ spin_lock_irqsave(&desc->lock, flags);
|
|
+#ifndef CONFIG_SMP
|
|
+ any_count = kstat_irqs(i);
|
|
+#else
|
|
+ for_each_online_cpu(j)
|
|
+ any_count |= kstat_irqs_cpu(i, j);
|
|
+#endif
|
|
+ action = desc->action;
|
|
+ if (!action && !any_count)
|
|
+ goto out;
|
|
+
|
|
+ seq_printf(p, "%3d: ", i);
|
|
+#ifndef CONFIG_SMP
|
|
+ seq_printf(p, "%10u ", kstat_irqs(i));
|
|
+#else
|
|
+ for_each_online_cpu(j)
|
|
+ seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
|
|
+#endif
|
|
+ seq_printf(p, " %8s", desc->chip->name);
|
|
+ seq_printf(p, "-%-8s", desc->name);
|
|
+
|
|
+ if (action) {
|
|
+ seq_printf(p, " %s", action->name);
|
|
+ while ((action = action->next) != NULL)
|
|
+ seq_printf(p, ", %s", action->name);
|
|
+ }
|
|
+
|
|
+ seq_putc(p, '\n');
|
|
+out:
|
|
+ spin_unlock_irqrestore(&desc->lock, flags);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * /proc/stat helpers
|
|
+ */
|
|
+u64 arch_irq_stat_cpu(unsigned int cpu)
|
|
+{
|
|
+ u64 sum = irq_stats(cpu)->__nmi_count;
|
|
+
|
|
+#ifdef CONFIG_X86_LOCAL_APIC
|
|
+ sum += irq_stats(cpu)->apic_timer_irqs;
|
|
+#endif
|
|
+#ifdef CONFIG_SMP
|
|
+ sum += irq_stats(cpu)->irq_resched_count;
|
|
+ sum += irq_stats(cpu)->irq_call_count;
|
|
+#ifndef CONFIG_XEN
|
|
+ sum += irq_stats(cpu)->irq_tlb_count;
|
|
+#else
|
|
+ sum += irq_stats(cpu)->irq_lock_count;
|
|
+#endif
|
|
+#endif
|
|
+#ifdef CONFIG_X86_MCE
|
|
+ sum += irq_stats(cpu)->irq_thermal_count;
|
|
+# ifdef CONFIG_X86_64
|
|
+ sum += irq_stats(cpu)->irq_threshold_count;
|
|
+#endif
|
|
+#endif
|
|
+#ifdef CONFIG_X86_LOCAL_APIC
|
|
+ sum += irq_stats(cpu)->irq_spurious_count;
|
|
+#endif
|
|
+ return sum;
|
|
+}
|
|
+
|
|
+u64 arch_irq_stat(void)
|
|
+{
|
|
+ u64 sum = atomic_read(&irq_err_count);
|
|
+
|
|
+#ifdef CONFIG_X86_IO_APIC
|
|
+ sum += atomic_read(&irq_mis_count);
|
|
+#endif
|
|
+ return sum;
|
|
+}
|
|
--- head-2011-03-17.orig/arch/x86/kernel/ldt-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/ldt-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -18,6 +18,7 @@
|
|
#include <asm/ldt.h>
|
|
#include <asm/desc.h>
|
|
#include <asm/mmu_context.h>
|
|
+#include <asm/syscalls.h>
|
|
|
|
#ifdef CONFIG_SMP
|
|
static void flush_ldt(void *current_mm)
|
|
--- head-2011-03-17.orig/arch/x86/kernel/microcode-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
+++ /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
@@ -1,214 +0,0 @@
|
|
-/*
|
|
- * Intel CPU Microcode Update Driver for Linux
|
|
- *
|
|
- * Copyright (C) 2000-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
|
|
- * 2006 Shaohua Li <shaohua.li@intel.com>
|
|
- *
|
|
- * This driver allows to upgrade microcode on Intel processors
|
|
- * belonging to IA-32 family - PentiumPro, Pentium II,
|
|
- * Pentium III, Xeon, Pentium 4, etc.
|
|
- *
|
|
- * Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture
|
|
- * Software Developer's Manual
|
|
- * Order Number 253668 or free download from:
|
|
- *
|
|
- * http://developer.intel.com/design/pentium4/manuals/253668.htm
|
|
- *
|
|
- * For more information, go to http://www.urbanmyth.org/microcode
|
|
- *
|
|
- * This program is free software; you can redistribute it and/or
|
|
- * modify it under the terms of the GNU General Public License
|
|
- * as published by the Free Software Foundation; either version
|
|
- * 2 of the License, or (at your option) any later version.
|
|
- */
|
|
-
|
|
-//#define DEBUG /* pr_debug */
|
|
-#include <linux/capability.h>
|
|
-#include <linux/kernel.h>
|
|
-#include <linux/init.h>
|
|
-#include <linux/sched.h>
|
|
-#include <linux/smp_lock.h>
|
|
-#include <linux/cpumask.h>
|
|
-#include <linux/module.h>
|
|
-#include <linux/slab.h>
|
|
-#include <linux/vmalloc.h>
|
|
-#include <linux/miscdevice.h>
|
|
-#include <linux/spinlock.h>
|
|
-#include <linux/mm.h>
|
|
-#include <linux/fs.h>
|
|
-#include <linux/mutex.h>
|
|
-#include <linux/cpu.h>
|
|
-#include <linux/firmware.h>
|
|
-#include <linux/platform_device.h>
|
|
-
|
|
-#include <asm/msr.h>
|
|
-#include <asm/uaccess.h>
|
|
-#include <asm/processor.h>
|
|
-
|
|
-MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver");
|
|
-MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
|
|
-MODULE_LICENSE("GPL");
|
|
-
|
|
-static int verbose;
|
|
-module_param(verbose, int, 0644);
|
|
-
|
|
-#define MICROCODE_VERSION "1.14a-xen"
|
|
-
|
|
-#define DEFAULT_UCODE_DATASIZE (2000) /* 2000 bytes */
|
|
-#define MC_HEADER_SIZE (sizeof (microcode_header_t)) /* 48 bytes */
|
|
-#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) /* 2048 bytes */
|
|
-
|
|
-/* no concurrent ->write()s are allowed on /dev/cpu/microcode */
|
|
-static DEFINE_MUTEX(microcode_mutex);
|
|
-
|
|
-#ifdef CONFIG_MICROCODE_OLD_INTERFACE
|
|
-static int do_microcode_update (const void __user *ubuf, size_t len)
|
|
-{
|
|
- int err;
|
|
- void *kbuf;
|
|
-
|
|
- kbuf = vmalloc(len);
|
|
- if (!kbuf)
|
|
- return -ENOMEM;
|
|
-
|
|
- if (copy_from_user(kbuf, ubuf, len) == 0) {
|
|
- struct xen_platform_op op;
|
|
-
|
|
- op.cmd = XENPF_microcode_update;
|
|
- set_xen_guest_handle(op.u.microcode.data, kbuf);
|
|
- op.u.microcode.length = len;
|
|
- err = HYPERVISOR_platform_op(&op);
|
|
- } else
|
|
- err = -EFAULT;
|
|
-
|
|
- vfree(kbuf);
|
|
-
|
|
- return err;
|
|
-}
|
|
-
|
|
-static int microcode_open (struct inode *unused1, struct file *unused2)
|
|
-{
|
|
- cycle_kernel_lock();
|
|
- return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
|
|
-}
|
|
-
|
|
-static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos)
|
|
-{
|
|
- ssize_t ret;
|
|
-
|
|
- if (len < MC_HEADER_SIZE) {
|
|
- printk(KERN_ERR "microcode: not enough data\n");
|
|
- return -EINVAL;
|
|
- }
|
|
-
|
|
- mutex_lock(µcode_mutex);
|
|
-
|
|
- ret = do_microcode_update(buf, len);
|
|
- if (!ret)
|
|
- ret = (ssize_t)len;
|
|
-
|
|
- mutex_unlock(µcode_mutex);
|
|
-
|
|
- return ret;
|
|
-}
|
|
-
|
|
-static const struct file_operations microcode_fops = {
|
|
- .owner = THIS_MODULE,
|
|
- .write = microcode_write,
|
|
- .open = microcode_open,
|
|
-};
|
|
-
|
|
-static struct miscdevice microcode_dev = {
|
|
- .minor = MICROCODE_MINOR,
|
|
- .name = "microcode",
|
|
- .fops = µcode_fops,
|
|
-};
|
|
-
|
|
-static int __init microcode_dev_init (void)
|
|
-{
|
|
- int error;
|
|
-
|
|
- error = misc_register(µcode_dev);
|
|
- if (error) {
|
|
- printk(KERN_ERR
|
|
- "microcode: can't misc_register on minor=%d\n",
|
|
- MICROCODE_MINOR);
|
|
- return error;
|
|
- }
|
|
-
|
|
- return 0;
|
|
-}
|
|
-
|
|
-static void microcode_dev_exit (void)
|
|
-{
|
|
- misc_deregister(µcode_dev);
|
|
-}
|
|
-
|
|
-MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
|
|
-#else
|
|
-#define microcode_dev_init() 0
|
|
-#define microcode_dev_exit() do { } while(0)
|
|
-#endif
|
|
-
|
|
-/* fake device for request_firmware */
|
|
-static struct platform_device *microcode_pdev;
|
|
-
|
|
-static int request_microcode(void)
|
|
-{
|
|
- char name[30];
|
|
- const struct cpuinfo_x86 *c = &boot_cpu_data;
|
|
- const struct firmware *firmware;
|
|
- int error;
|
|
- struct xen_platform_op op;
|
|
-
|
|
- sprintf(name,"intel-ucode/%02x-%02x-%02x",
|
|
- c->x86, c->x86_model, c->x86_mask);
|
|
- error = request_firmware(&firmware, name, µcode_pdev->dev);
|
|
- if (error) {
|
|
- pr_debug("microcode: data file %s load failed\n", name);
|
|
- return error;
|
|
- }
|
|
-
|
|
- op.cmd = XENPF_microcode_update;
|
|
- set_xen_guest_handle(op.u.microcode.data, firmware->data);
|
|
- op.u.microcode.length = firmware->size;
|
|
- error = HYPERVISOR_platform_op(&op);
|
|
-
|
|
- release_firmware(firmware);
|
|
-
|
|
- if (error)
|
|
- pr_debug("ucode load failed\n");
|
|
-
|
|
- return error;
|
|
-}
|
|
-
|
|
-static int __init microcode_init (void)
|
|
-{
|
|
- int error;
|
|
-
|
|
- printk(KERN_INFO
|
|
- "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@aivazian.fsnet.co.uk>\n");
|
|
-
|
|
- error = microcode_dev_init();
|
|
- if (error)
|
|
- return error;
|
|
- microcode_pdev = platform_device_register_simple("microcode", -1,
|
|
- NULL, 0);
|
|
- if (IS_ERR(microcode_pdev)) {
|
|
- microcode_dev_exit();
|
|
- return PTR_ERR(microcode_pdev);
|
|
- }
|
|
-
|
|
- request_microcode();
|
|
-
|
|
- return 0;
|
|
-}
|
|
-
|
|
-static void __exit microcode_exit (void)
|
|
-{
|
|
- microcode_dev_exit();
|
|
- platform_device_unregister(microcode_pdev);
|
|
-}
|
|
-
|
|
-module_init(microcode_init)
|
|
-module_exit(microcode_exit)
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ head-2011-03-17/arch/x86/kernel/microcode_core-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -0,0 +1,225 @@
|
|
+/*
|
|
+ * Intel CPU Microcode Update Driver for Linux
|
|
+ *
|
|
+ * Copyright (C) 2000-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
|
|
+ * 2006 Shaohua Li <shaohua.li@intel.com>
|
|
+ *
|
|
+ * This driver allows to upgrade microcode on Intel processors
|
|
+ * belonging to IA-32 family - PentiumPro, Pentium II,
|
|
+ * Pentium III, Xeon, Pentium 4, etc.
|
|
+ *
|
|
+ * Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture
|
|
+ * Software Developer's Manual
|
|
+ * Order Number 253668 or free download from:
|
|
+ *
|
|
+ * http://developer.intel.com/design/pentium4/manuals/253668.htm
|
|
+ *
|
|
+ * For more information, go to http://www.urbanmyth.org/microcode
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or
|
|
+ * modify it under the terms of the GNU General Public License
|
|
+ * as published by the Free Software Foundation; either version
|
|
+ * 2 of the License, or (at your option) any later version.
|
|
+ */
|
|
+#include <linux/capability.h>
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/init.h>
|
|
+#include <linux/sched.h>
|
|
+#include <linux/smp_lock.h>
|
|
+#include <linux/cpumask.h>
|
|
+#include <linux/module.h>
|
|
+#include <linux/slab.h>
|
|
+#include <linux/vmalloc.h>
|
|
+#include <linux/miscdevice.h>
|
|
+#include <linux/spinlock.h>
|
|
+#include <linux/mm.h>
|
|
+#include <linux/fs.h>
|
|
+#include <linux/mutex.h>
|
|
+#include <linux/cpu.h>
|
|
+#include <linux/firmware.h>
|
|
+#include <linux/platform_device.h>
|
|
+
|
|
+#include <asm/msr.h>
|
|
+#include <asm/uaccess.h>
|
|
+#include <asm/processor.h>
|
|
+#include <asm/microcode.h>
|
|
+
|
|
+MODULE_DESCRIPTION("Microcode Update Driver");
|
|
+MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
|
|
+MODULE_LICENSE("GPL");
|
|
+
|
|
+static int verbose;
|
|
+module_param(verbose, int, 0644);
|
|
+
|
|
+#define MICROCODE_VERSION "2.00-xen"
|
|
+
|
|
+/* no concurrent ->write()s are allowed on /dev/cpu/microcode */
|
|
+static DEFINE_MUTEX(microcode_mutex);
|
|
+
|
|
+#ifdef CONFIG_MICROCODE_OLD_INTERFACE
|
|
+static int do_microcode_update(const void __user *ubuf, size_t len)
|
|
+{
|
|
+ int err;
|
|
+ void *kbuf;
|
|
+
|
|
+ kbuf = vmalloc(len);
|
|
+ if (!kbuf)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ if (copy_from_user(kbuf, ubuf, len) == 0) {
|
|
+ struct xen_platform_op op;
|
|
+
|
|
+ op.cmd = XENPF_microcode_update;
|
|
+ set_xen_guest_handle(op.u.microcode.data, kbuf);
|
|
+ op.u.microcode.length = len;
|
|
+ err = HYPERVISOR_platform_op(&op);
|
|
+ } else
|
|
+ err = -EFAULT;
|
|
+
|
|
+ vfree(kbuf);
|
|
+
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static int microcode_open(struct inode *unused1, struct file *unused2)
|
|
+{
|
|
+ cycle_kernel_lock();
|
|
+ return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
|
|
+}
|
|
+
|
|
+static ssize_t microcode_write(struct file *file, const char __user *buf,
|
|
+ size_t len, loff_t *ppos)
|
|
+{
|
|
+ ssize_t ret;
|
|
+
|
|
+ if ((len >> PAGE_SHIFT) > num_physpages) {
|
|
+ printk(KERN_ERR "microcode: too much data (max %ld pages)\n",
|
|
+ num_physpages);
|
|
+ return -EINVAL;
|
|
+ }
|
|
+
|
|
+ mutex_lock(µcode_mutex);
|
|
+
|
|
+ ret = do_microcode_update(buf, len);
|
|
+ if (!ret)
|
|
+ ret = (ssize_t)len;
|
|
+
|
|
+ mutex_unlock(µcode_mutex);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static const struct file_operations microcode_fops = {
|
|
+ .owner = THIS_MODULE,
|
|
+ .write = microcode_write,
|
|
+ .open = microcode_open,
|
|
+};
|
|
+
|
|
+static struct miscdevice microcode_dev = {
|
|
+ .minor = MICROCODE_MINOR,
|
|
+ .name = "microcode",
|
|
+ .fops = µcode_fops,
|
|
+};
|
|
+
|
|
+static int __init microcode_dev_init(void)
|
|
+{
|
|
+ int error;
|
|
+
|
|
+ error = misc_register(µcode_dev);
|
|
+ if (error) {
|
|
+ printk(KERN_ERR
|
|
+ "microcode: can't misc_register on minor=%d\n",
|
|
+ MICROCODE_MINOR);
|
|
+ return error;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void microcode_dev_exit(void)
|
|
+{
|
|
+ misc_deregister(µcode_dev);
|
|
+}
|
|
+
|
|
+MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
|
|
+#else
|
|
+#define microcode_dev_init() 0
|
|
+#define microcode_dev_exit() do { } while (0)
|
|
+#endif
|
|
+
|
|
+/* fake device for request_firmware */
|
|
+static struct platform_device *microcode_pdev;
|
|
+
|
|
+static int request_microcode(const char *name)
|
|
+{
|
|
+ const struct firmware *firmware;
|
|
+ int error;
|
|
+ struct xen_platform_op op;
|
|
+
|
|
+ error = request_firmware(&firmware, name, µcode_pdev->dev);
|
|
+ if (error) {
|
|
+ pr_debug("microcode: data file %s load failed\n", name);
|
|
+ return error;
|
|
+ }
|
|
+
|
|
+ op.cmd = XENPF_microcode_update;
|
|
+ set_xen_guest_handle(op.u.microcode.data, firmware->data);
|
|
+ op.u.microcode.length = firmware->size;
|
|
+ error = HYPERVISOR_platform_op(&op);
|
|
+
|
|
+ release_firmware(firmware);
|
|
+
|
|
+ if (error)
|
|
+ pr_debug("ucode load failed\n");
|
|
+
|
|
+ return error;
|
|
+}
|
|
+
|
|
+static int __init microcode_init(void)
|
|
+{
|
|
+ const struct cpuinfo_x86 *c = &boot_cpu_data;
|
|
+ char buf[32];
|
|
+ const char *fw_name = buf;
|
|
+ int error;
|
|
+
|
|
+ if (c->x86_vendor == X86_VENDOR_INTEL)
|
|
+ sprintf(buf, "intel-ucode/%02x-%02x-%02x",
|
|
+ c->x86, c->x86_model, c->x86_mask);
|
|
+ else if (c->x86_vendor == X86_VENDOR_AMD)
|
|
+ fw_name = "amd-ucode/microcode_amd.bin";
|
|
+ else {
|
|
+ printk(KERN_ERR "microcode: no support for this CPU vendor\n");
|
|
+ return -ENODEV;
|
|
+ }
|
|
+
|
|
+ error = microcode_dev_init();
|
|
+ if (error)
|
|
+ return error;
|
|
+ microcode_pdev = platform_device_register_simple("microcode", -1,
|
|
+ NULL, 0);
|
|
+ if (IS_ERR(microcode_pdev)) {
|
|
+ microcode_dev_exit();
|
|
+ return PTR_ERR(microcode_pdev);
|
|
+ }
|
|
+
|
|
+ request_microcode(fw_name);
|
|
+
|
|
+ printk(KERN_INFO
|
|
+ "Microcode Update Driver: v" MICROCODE_VERSION
|
|
+ " <tigran@aivazian.fsnet.co.uk>,"
|
|
+ " Peter Oruba\n");
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void __exit microcode_exit(void)
|
|
+{
|
|
+ microcode_dev_exit();
|
|
+ platform_device_unregister(microcode_pdev);
|
|
+
|
|
+ printk(KERN_INFO
|
|
+ "Microcode Update Driver: v" MICROCODE_VERSION " removed.\n");
|
|
+}
|
|
+
|
|
+module_init(microcode_init);
|
|
+module_exit(microcode_exit);
|
|
--- head-2011-03-17.orig/arch/x86/kernel/mpparse-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/mpparse-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -410,7 +410,9 @@ static int __init smp_read_mpc(struct mp
|
|
generic_bigsmp_probe();
|
|
#endif
|
|
|
|
+#ifdef CONFIG_X86_32
|
|
setup_apic_routing();
|
|
+#endif
|
|
if (!num_processors)
|
|
printk(KERN_ERR "MPTABLE: no processors registered!\n");
|
|
return num_processors;
|
|
@@ -622,6 +624,9 @@ void __init get_smp_config(void)
|
|
printk(KERN_INFO "Using ACPI for processor (LAPIC) "
|
|
"configuration information\n");
|
|
|
|
+ if (!mpf)
|
|
+ return;
|
|
+
|
|
printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n",
|
|
mpf->mpf_specification);
|
|
#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
|
|
--- head-2011-03-17.orig/arch/x86/kernel/pci-dma-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/pci-dma-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -41,11 +41,12 @@ EXPORT_SYMBOL(bad_dma_address);
|
|
/* Dummy device used for NULL arguments (normally ISA). Better would
|
|
be probably a smaller DMA mask, but this is bug-to-bug compatible
|
|
to older i386. */
|
|
-struct device fallback_dev = {
|
|
+struct device x86_dma_fallback_dev = {
|
|
.bus_id = "fallback device",
|
|
.coherent_dma_mask = DMA_32BIT_MASK,
|
|
- .dma_mask = &fallback_dev.coherent_dma_mask,
|
|
+ .dma_mask = &x86_dma_fallback_dev.coherent_dma_mask,
|
|
};
|
|
+EXPORT_SYMBOL(x86_dma_fallback_dev);
|
|
|
|
int dma_set_mask(struct device *dev, u64 mask)
|
|
{
|
|
@@ -82,7 +83,7 @@ void __init dma32_reserve_bootmem(void)
|
|
* using 512M as goal
|
|
*/
|
|
align = 64ULL<<20;
|
|
- size = round_up(dma32_bootmem_size, align);
|
|
+ size = roundup(dma32_bootmem_size, align);
|
|
dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align,
|
|
512ULL<<20);
|
|
if (dma32_bootmem_ptr)
|
|
@@ -109,6 +110,8 @@ static void __init dma32_free_bootmem(vo
|
|
#endif
|
|
|
|
static struct dma_mapping_ops swiotlb_dma_ops = {
|
|
+ .alloc_coherent = dma_generic_alloc_coherent,
|
|
+ .free_coherent = dma_generic_free_coherent,
|
|
.mapping_error = swiotlb_dma_mapping_error,
|
|
.map_single = swiotlb_map_single_phys,
|
|
.unmap_single = swiotlb_unmap_single,
|
|
@@ -147,13 +150,77 @@ void __init pci_iommu_alloc(void)
|
|
}
|
|
|
|
#ifndef CONFIG_XEN
|
|
-unsigned long iommu_num_pages(unsigned long addr, unsigned long len)
|
|
+unsigned long iommu_nr_pages(unsigned long addr, unsigned long len)
|
|
{
|
|
unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE);
|
|
|
|
return size >> PAGE_SHIFT;
|
|
}
|
|
-EXPORT_SYMBOL(iommu_num_pages);
|
|
+EXPORT_SYMBOL(iommu_nr_pages);
|
|
+#endif
|
|
+
|
|
+void *dma_generic_alloc_coherent(struct device *dev, size_t size,
|
|
+ dma_addr_t *dma_addr, gfp_t flag)
|
|
+{
|
|
+ unsigned long dma_mask;
|
|
+ struct page *page;
|
|
+#ifndef CONFIG_XEN
|
|
+ dma_addr_t addr;
|
|
+#else
|
|
+ void *memory;
|
|
+#endif
|
|
+ unsigned int order = get_order(size);
|
|
+
|
|
+ dma_mask = dma_alloc_coherent_mask(dev, flag);
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+ flag |= __GFP_ZERO;
|
|
+again:
|
|
+#else
|
|
+ flag &= ~(__GFP_DMA | __GFP_DMA32);
|
|
+#endif
|
|
+ page = alloc_pages_node(dev_to_node(dev), flag, order);
|
|
+ if (!page)
|
|
+ return NULL;
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+ addr = page_to_phys(page);
|
|
+ if (!is_buffer_dma_capable(dma_mask, addr, size)) {
|
|
+ __free_pages(page, order);
|
|
+
|
|
+ if (dma_mask < DMA_32BIT_MASK && !(flag & GFP_DMA)) {
|
|
+ flag = (flag & ~GFP_DMA32) | GFP_DMA;
|
|
+ goto again;
|
|
+ }
|
|
+
|
|
+ return NULL;
|
|
+ }
|
|
+
|
|
+ *dma_addr = addr;
|
|
+ return page_address(page);
|
|
+#else
|
|
+ memory = page_address(page);
|
|
+ if (xen_create_contiguous_region((unsigned long)memory, order,
|
|
+ fls64(dma_mask))) {
|
|
+ __free_pages(page, order);
|
|
+ return NULL;
|
|
+ }
|
|
+
|
|
+ *dma_addr = virt_to_bus(memory);
|
|
+ return memset(memory, 0, size);
|
|
+#endif
|
|
+}
|
|
+
|
|
+#ifdef CONFIG_XEN
|
|
+void dma_generic_free_coherent(struct device *dev, size_t size, void *vaddr,
|
|
+ dma_addr_t dma_addr)
|
|
+{
|
|
+ unsigned int order = get_order(size);
|
|
+ unsigned long va = (unsigned long)vaddr;
|
|
+
|
|
+ xen_destroy_contiguous_region(va, order);
|
|
+ free_pages(va, order);
|
|
+}
|
|
#endif
|
|
|
|
/*
|
|
@@ -291,164 +358,6 @@ int dma_supported(struct device *dev, u6
|
|
}
|
|
EXPORT_SYMBOL(dma_supported);
|
|
|
|
-/* Allocate DMA memory on node near device */
|
|
-static struct page *
|
|
-dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order)
|
|
-{
|
|
- int node;
|
|
-
|
|
- node = dev_to_node(dev);
|
|
-
|
|
- return alloc_pages_node(node, gfp, order);
|
|
-}
|
|
-
|
|
-/*
|
|
- * Allocate memory for a coherent mapping.
|
|
- */
|
|
-void *
|
|
-dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
|
|
- gfp_t gfp)
|
|
-{
|
|
-#ifndef CONFIG_XEN
|
|
- struct dma_mapping_ops *ops = get_dma_ops(dev);
|
|
-#endif
|
|
- void *memory = NULL;
|
|
- struct page *page;
|
|
- unsigned long dma_mask = 0;
|
|
- int noretry = 0;
|
|
- unsigned int order = get_order(size);
|
|
-
|
|
- /* ignore region specifiers */
|
|
- gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
|
|
-
|
|
- if (dma_alloc_from_coherent(dev, size, dma_handle, &memory))
|
|
- return memory;
|
|
-
|
|
- if (!dev) {
|
|
- dev = &fallback_dev;
|
|
- gfp |= GFP_DMA;
|
|
- }
|
|
- dma_mask = dev->coherent_dma_mask;
|
|
- if (dma_mask == 0)
|
|
- dma_mask = (gfp & GFP_DMA) ? DMA_24BIT_MASK : DMA_32BIT_MASK;
|
|
-
|
|
- /* Device not DMA able */
|
|
- if (dev->dma_mask == NULL)
|
|
- return NULL;
|
|
-
|
|
-#ifdef CONFIG_XEN
|
|
- gfp &= ~(__GFP_DMA | __GFP_DMA32);
|
|
-#else
|
|
- /* Don't invoke OOM killer or retry in lower 16MB DMA zone */
|
|
- if (gfp & __GFP_DMA)
|
|
- noretry = 1;
|
|
-
|
|
-#ifdef CONFIG_X86_64
|
|
- /* Why <=? Even when the mask is smaller than 4GB it is often
|
|
- larger than 16MB and in this case we have a chance of
|
|
- finding fitting memory in the next higher zone first. If
|
|
- not retry with true GFP_DMA. -AK */
|
|
- if (dma_mask <= DMA_32BIT_MASK && !(gfp & GFP_DMA)) {
|
|
- gfp |= GFP_DMA32;
|
|
-#endif
|
|
-
|
|
- again:
|
|
-#endif
|
|
- page = dma_alloc_pages(dev,
|
|
- noretry ? gfp | __GFP_NORETRY : gfp, order);
|
|
- if (page == NULL)
|
|
- return NULL;
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
- {
|
|
- int high, mmu;
|
|
- dma_addr_t bus = page_to_phys(page);
|
|
- memory = page_address(page);
|
|
- high = (bus + size) >= dma_mask;
|
|
- mmu = high;
|
|
- if (force_iommu && !(gfp & GFP_DMA))
|
|
- mmu = 1;
|
|
- else if (high) {
|
|
- free_pages((unsigned long)memory, order);
|
|
-
|
|
- /* Don't use the 16MB ZONE_DMA unless absolutely
|
|
- needed. It's better to use remapping first. */
|
|
- if (dma_mask < DMA_32BIT_MASK && !(gfp & GFP_DMA)) {
|
|
- gfp = (gfp & ~GFP_DMA32) | GFP_DMA;
|
|
- goto again;
|
|
- }
|
|
-
|
|
- /* Let low level make its own zone decisions */
|
|
- gfp &= ~(GFP_DMA32|GFP_DMA);
|
|
-
|
|
- if (ops->alloc_coherent)
|
|
- return ops->alloc_coherent(dev, size,
|
|
- dma_handle, gfp);
|
|
- return NULL;
|
|
- }
|
|
-
|
|
- memset(memory, 0, size);
|
|
- if (!mmu) {
|
|
- *dma_handle = bus;
|
|
- return memory;
|
|
- }
|
|
- }
|
|
-
|
|
- if (ops->alloc_coherent) {
|
|
- free_pages((unsigned long)memory, order);
|
|
- gfp &= ~(GFP_DMA|GFP_DMA32);
|
|
- return ops->alloc_coherent(dev, size, dma_handle, gfp);
|
|
- }
|
|
-
|
|
- if (ops->map_simple) {
|
|
- *dma_handle = ops->map_simple(dev, virt_to_bus(memory),
|
|
- size,
|
|
- PCI_DMA_BIDIRECTIONAL);
|
|
- if (*dma_handle != bad_dma_address)
|
|
- return memory;
|
|
- }
|
|
-#else
|
|
- memory = page_address(page);
|
|
- if (xen_create_contiguous_region((unsigned long)memory, order,
|
|
- fls64(dma_mask)) == 0) {
|
|
- memset(memory, 0, size);
|
|
- *dma_handle = virt_to_bus(memory);
|
|
- return memory;
|
|
- }
|
|
-#endif
|
|
-
|
|
- if (panic_on_overflow)
|
|
- panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n",
|
|
- (unsigned long)size);
|
|
- free_pages((unsigned long)memory, order);
|
|
- return NULL;
|
|
-}
|
|
-EXPORT_SYMBOL(dma_alloc_coherent);
|
|
-
|
|
-/*
|
|
- * Unmap coherent memory.
|
|
- * The caller must ensure that the device has finished accessing the mapping.
|
|
- */
|
|
-void dma_free_coherent(struct device *dev, size_t size,
|
|
- void *vaddr, dma_addr_t bus)
|
|
-{
|
|
-#ifndef CONFIG_XEN
|
|
- struct dma_mapping_ops *ops = get_dma_ops(dev);
|
|
-#endif
|
|
-
|
|
- int order = get_order(size);
|
|
- WARN_ON(irqs_disabled()); /* for portability */
|
|
- if (dma_release_from_coherent(dev, order, vaddr))
|
|
- return;
|
|
-#ifndef CONFIG_XEN
|
|
- if (ops->unmap_single)
|
|
- ops->unmap_single(dev, bus, size, 0);
|
|
-#endif
|
|
- xen_destroy_contiguous_region((unsigned long)vaddr, order);
|
|
- free_pages((unsigned long)vaddr, order);
|
|
-}
|
|
-EXPORT_SYMBOL(dma_free_coherent);
|
|
-
|
|
static int __init pci_iommu_init(void)
|
|
{
|
|
calgary_iommu_init();
|
|
--- head-2011-03-17.orig/arch/x86/kernel/pci-nommu-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/pci-nommu-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -5,6 +5,7 @@
|
|
|
|
#include <xen/gnttab.h>
|
|
|
|
+#include <asm/iommu.h>
|
|
#include <asm/proto.h>
|
|
#include <asm/dma.h>
|
|
#include <asm/swiotlb.h>
|
|
@@ -36,7 +37,7 @@ gnttab_map_sg(struct device *hwdev, stru
|
|
gnttab_dma_map_page(sg_page(sg)) + sg->offset;
|
|
sg->dma_length = sg->length;
|
|
IOMMU_BUG_ON(address_needs_mapping(
|
|
- hwdev, sg->dma_address));
|
|
+ hwdev, sg->dma_address, sg->length));
|
|
IOMMU_BUG_ON(range_straddles_page_boundary(
|
|
page_to_pseudophys(sg_page(sg)) + sg->offset,
|
|
sg->length));
|
|
@@ -67,7 +68,7 @@ gnttab_map_single(struct device *dev, ph
|
|
dma = gnttab_dma_map_page(pfn_to_page(paddr >> PAGE_SHIFT)) +
|
|
offset_in_page(paddr);
|
|
IOMMU_BUG_ON(range_straddles_page_boundary(paddr, size));
|
|
- IOMMU_BUG_ON(address_needs_mapping(dev, dma));
|
|
+ IOMMU_BUG_ON(address_needs_mapping(dev, dma, size));
|
|
|
|
return dma;
|
|
}
|
|
@@ -84,7 +85,9 @@ static int nommu_dma_supported(struct de
|
|
return 1;
|
|
}
|
|
|
|
-static struct dma_mapping_ops nommu_dma_ops = {
|
|
+struct dma_mapping_ops nommu_dma_ops = {
|
|
+ .alloc_coherent = dma_generic_alloc_coherent,
|
|
+ .free_coherent = dma_generic_free_coherent,
|
|
.map_single = gnttab_map_single,
|
|
.unmap_single = gnttab_unmap_single,
|
|
.map_sg = gnttab_map_sg,
|
|
--- head-2011-03-17.orig/arch/x86/kernel/process-xen.c 2011-03-03 15:59:49.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/process-xen.c 2011-03-03 16:00:33.000000000 +0100
|
|
@@ -151,7 +151,8 @@ static void mwait_idle(void)
|
|
static void poll_idle(void)
|
|
{
|
|
local_irq_enable();
|
|
- cpu_relax();
|
|
+ while (!need_resched())
|
|
+ cpu_relax();
|
|
}
|
|
|
|
#ifndef CONFIG_XEN
|
|
--- head-2011-03-17.orig/arch/x86/kernel/process_32-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/process_32-xen.c 2011-02-02 08:34:28.000000000 +0100
|
|
@@ -37,6 +37,7 @@
|
|
#include <linux/tick.h>
|
|
#include <linux/percpu.h>
|
|
#include <linux/prctl.h>
|
|
+#include <linux/dmi.h>
|
|
|
|
#include <asm/uaccess.h>
|
|
#include <asm/pgtable.h>
|
|
@@ -51,8 +52,6 @@
|
|
#endif
|
|
|
|
#include <xen/interface/physdev.h>
|
|
-#include <xen/interface/vcpu.h>
|
|
-#include <xen/cpu_hotplug.h>
|
|
|
|
#include <linux/err.h>
|
|
|
|
@@ -60,6 +59,8 @@
|
|
#include <asm/cpu.h>
|
|
#include <asm/kdebug.h>
|
|
#include <asm/idle.h>
|
|
+#include <asm/syscalls.h>
|
|
+#include <asm/smp.h>
|
|
|
|
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
|
|
asmlinkage void cstar_ret_from_fork(void) __asm__("cstar_ret_from_fork");
|
|
@@ -78,42 +79,12 @@ unsigned long thread_saved_pc(struct tas
|
|
return ((unsigned long *)tsk->thread.sp)[3];
|
|
}
|
|
|
|
-#ifdef CONFIG_HOTPLUG_CPU
|
|
-#ifndef CONFIG_XEN
|
|
-#include <asm/nmi.h>
|
|
-
|
|
-static void cpu_exit_clear(void)
|
|
-{
|
|
- int cpu = raw_smp_processor_id();
|
|
-
|
|
- idle_task_exit();
|
|
-
|
|
- cpu_uninit();
|
|
- irq_ctx_exit(cpu);
|
|
-
|
|
- cpu_clear(cpu, cpu_callout_map);
|
|
- cpu_clear(cpu, cpu_callin_map);
|
|
-
|
|
- numa_remove_cpu(cpu);
|
|
- c1e_remove_cpu(cpu);
|
|
-}
|
|
-#endif
|
|
-
|
|
-static inline void play_dead(void)
|
|
-{
|
|
- idle_task_exit();
|
|
- local_irq_disable();
|
|
- cpu_clear(smp_processor_id(), cpu_initialized);
|
|
- preempt_enable_no_resched();
|
|
- VOID(HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL));
|
|
- cpu_bringup();
|
|
-}
|
|
-#else
|
|
+#ifndef CONFIG_SMP
|
|
static inline void play_dead(void)
|
|
{
|
|
BUG();
|
|
}
|
|
-#endif /* CONFIG_HOTPLUG_CPU */
|
|
+#endif
|
|
|
|
/*
|
|
* The idle thread. There's no useful work to be
|
|
@@ -155,12 +126,13 @@ void cpu_idle(void)
|
|
}
|
|
}
|
|
|
|
-void __show_registers(struct pt_regs *regs, int all)
|
|
+void __show_regs(struct pt_regs *regs, int all)
|
|
{
|
|
unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
|
|
unsigned long d0, d1, d2, d3, d6, d7;
|
|
unsigned long sp;
|
|
unsigned short ss, gs;
|
|
+ const char *board;
|
|
|
|
if (user_mode_vm(regs)) {
|
|
sp = regs->sp;
|
|
@@ -173,11 +145,15 @@ void __show_registers(struct pt_regs *re
|
|
}
|
|
|
|
printk("\n");
|
|
- printk("Pid: %d, comm: %s %s (%s %.*s)\n",
|
|
+
|
|
+ board = dmi_get_system_info(DMI_PRODUCT_NAME);
|
|
+ if (!board)
|
|
+ board = "";
|
|
+ printk("Pid: %d, comm: %s %s (%s %.*s) %s\n",
|
|
task_pid_nr(current), current->comm,
|
|
print_tainted(), init_utsname()->release,
|
|
(int)strcspn(init_utsname()->version, " "),
|
|
- init_utsname()->version);
|
|
+ init_utsname()->version, board);
|
|
|
|
printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
|
|
(u16)regs->cs, regs->ip, regs->flags,
|
|
@@ -216,7 +192,7 @@ void __show_registers(struct pt_regs *re
|
|
|
|
void show_regs(struct pt_regs *regs)
|
|
{
|
|
- __show_registers(regs, 1);
|
|
+ __show_regs(regs, 1);
|
|
show_trace(NULL, regs, ®s->sp, regs->bp);
|
|
}
|
|
|
|
@@ -269,6 +245,14 @@ void exit_thread(void)
|
|
t->io_bitmap_ptr = NULL;
|
|
clear_thread_flag(TIF_IO_BITMAP);
|
|
}
|
|
+#ifdef CONFIG_X86_DS
|
|
+ /* Free any DS contexts that have not been properly released. */
|
|
+ if (unlikely(current->thread.ds_ctx)) {
|
|
+ /* we clear debugctl to make sure DS is not used. */
|
|
+ update_debugctlmsr(0);
|
|
+ ds_free(current->thread.ds_ctx);
|
|
+ }
|
|
+#endif /* CONFIG_X86_DS */
|
|
}
|
|
|
|
void flush_thread(void)
|
|
@@ -434,6 +418,35 @@ int set_tsc_mode(unsigned int val)
|
|
return 0;
|
|
}
|
|
|
|
+#ifdef CONFIG_X86_DS
|
|
+static int update_debugctl(struct thread_struct *prev,
|
|
+ struct thread_struct *next, unsigned long debugctl)
|
|
+{
|
|
+ unsigned long ds_prev = 0;
|
|
+ unsigned long ds_next = 0;
|
|
+
|
|
+ if (prev->ds_ctx)
|
|
+ ds_prev = (unsigned long)prev->ds_ctx->ds;
|
|
+ if (next->ds_ctx)
|
|
+ ds_next = (unsigned long)next->ds_ctx->ds;
|
|
+
|
|
+ if (ds_next != ds_prev) {
|
|
+ /* we clear debugctl to make sure DS
|
|
+ * is not in use when we change it */
|
|
+ debugctl = 0;
|
|
+ update_debugctlmsr(0);
|
|
+ wrmsr(MSR_IA32_DS_AREA, ds_next, 0);
|
|
+ }
|
|
+ return debugctl;
|
|
+}
|
|
+#else
|
|
+static int update_debugctl(struct thread_struct *prev,
|
|
+ struct thread_struct *next, unsigned long debugctl)
|
|
+{
|
|
+ return debugctl;
|
|
+}
|
|
+#endif /* CONFIG_X86_DS */
|
|
+
|
|
static noinline void
|
|
__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p)
|
|
{
|
|
@@ -443,14 +456,7 @@ __switch_to_xtra(struct task_struct *pre
|
|
prev = &prev_p->thread;
|
|
next = &next_p->thread;
|
|
|
|
- debugctl = prev->debugctlmsr;
|
|
- if (next->ds_area_msr != prev->ds_area_msr) {
|
|
- /* we clear debugctl to make sure DS
|
|
- * is not in use when we change it */
|
|
- debugctl = 0;
|
|
- update_debugctlmsr(0);
|
|
- wrmsr(MSR_IA32_DS_AREA, next->ds_area_msr, 0);
|
|
- }
|
|
+ debugctl = update_debugctl(prev, next, prev->debugctlmsr);
|
|
|
|
if (next->debugctlmsr != debugctl)
|
|
update_debugctlmsr(next->debugctlmsr);
|
|
@@ -474,13 +480,13 @@ __switch_to_xtra(struct task_struct *pre
|
|
hard_enable_TSC();
|
|
}
|
|
|
|
-#ifdef X86_BTS
|
|
+#ifdef CONFIG_X86_PTRACE_BTS
|
|
if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
|
|
ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
|
|
|
|
if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
|
|
ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
|
|
-#endif
|
|
+#endif /* CONFIG_X86_PTRACE_BTS */
|
|
}
|
|
|
|
/*
|
|
--- head-2011-03-17.orig/arch/x86/kernel/process_64-xen.c 2011-02-02 08:34:01.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/process_64-xen.c 2011-02-02 08:34:22.000000000 +0100
|
|
@@ -40,25 +40,23 @@
|
|
#include <linux/kdebug.h>
|
|
#include <linux/tick.h>
|
|
#include <linux/prctl.h>
|
|
+#include <linux/uaccess.h>
|
|
+#include <linux/io.h>
|
|
|
|
-#include <asm/uaccess.h>
|
|
#include <asm/pgtable.h>
|
|
#include <asm/system.h>
|
|
-#include <asm/io.h>
|
|
#include <asm/processor.h>
|
|
#include <asm/i387.h>
|
|
#include <asm/mmu_context.h>
|
|
#include <asm/pda.h>
|
|
#include <asm/prctl.h>
|
|
#include <xen/interface/physdev.h>
|
|
-#include <xen/interface/vcpu.h>
|
|
#include <asm/desc.h>
|
|
#include <asm/proto.h>
|
|
#include <asm/hardirq.h>
|
|
#include <asm/ia32.h>
|
|
#include <asm/idle.h>
|
|
-
|
|
-#include <xen/cpu_hotplug.h>
|
|
+#include <asm/syscalls.h>
|
|
|
|
asmlinkage extern void ret_from_fork(void);
|
|
|
|
@@ -70,6 +68,13 @@ void idle_notifier_register(struct notif
|
|
{
|
|
atomic_notifier_chain_register(&idle_notifier, n);
|
|
}
|
|
+EXPORT_SYMBOL_GPL(idle_notifier_register);
|
|
+
|
|
+void idle_notifier_unregister(struct notifier_block *n)
|
|
+{
|
|
+ atomic_notifier_chain_unregister(&idle_notifier, n);
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(idle_notifier_unregister);
|
|
|
|
void enter_idle(void)
|
|
{
|
|
@@ -93,25 +98,12 @@ void exit_idle(void)
|
|
__exit_idle();
|
|
}
|
|
|
|
-#ifdef CONFIG_HOTPLUG_CPU
|
|
-static inline void play_dead(void)
|
|
-{
|
|
- idle_task_exit();
|
|
-#ifndef CONFIG_XEN
|
|
- c1e_remove_cpu(raw_smp_processor_id());
|
|
-#endif
|
|
- local_irq_disable();
|
|
- cpu_clear(smp_processor_id(), cpu_initialized);
|
|
- preempt_enable_no_resched();
|
|
- VOID(HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL));
|
|
- cpu_bringup();
|
|
-}
|
|
-#else
|
|
+#ifndef CONFIG_SMP
|
|
static inline void play_dead(void)
|
|
{
|
|
BUG();
|
|
}
|
|
-#endif /* CONFIG_HOTPLUG_CPU */
|
|
+#endif
|
|
|
|
/*
|
|
* The idle thread. There's no useful work to be
|
|
@@ -156,63 +148,74 @@ void cpu_idle(void)
|
|
}
|
|
|
|
/* Prints also some state that isn't saved in the pt_regs */
|
|
-void __show_regs(struct pt_regs * regs)
|
|
+void __show_regs(struct pt_regs *regs, int all)
|
|
{
|
|
- unsigned long fs, gs, shadowgs;
|
|
+ unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
|
|
unsigned long d0, d1, d2, d3, d6, d7;
|
|
unsigned int fsindex, gsindex;
|
|
unsigned int ds, cs, es;
|
|
|
|
printk("\n");
|
|
print_modules();
|
|
- printk("Pid: %d, comm: %.20s %s %s %.*s\n",
|
|
+ printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s\n",
|
|
current->pid, current->comm, print_tainted(),
|
|
init_utsname()->release,
|
|
(int)strcspn(init_utsname()->version, " "),
|
|
init_utsname()->version);
|
|
- printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
|
|
+ printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
|
|
printk_address(regs->ip, 1);
|
|
- printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->sp,
|
|
- regs->flags);
|
|
- printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
|
|
+ printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss,
|
|
+ regs->sp, regs->flags);
|
|
+ printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n",
|
|
regs->ax, regs->bx, regs->cx);
|
|
- printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
|
|
+ printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n",
|
|
regs->dx, regs->si, regs->di);
|
|
- printk("RBP: %016lx R08: %016lx R09: %016lx\n",
|
|
+ printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n",
|
|
regs->bp, regs->r8, regs->r9);
|
|
- printk("R10: %016lx R11: %016lx R12: %016lx\n",
|
|
- regs->r10, regs->r11, regs->r12);
|
|
- printk("R13: %016lx R14: %016lx R15: %016lx\n",
|
|
- regs->r13, regs->r14, regs->r15);
|
|
-
|
|
- asm("mov %%ds,%0" : "=r" (ds));
|
|
- asm("mov %%cs,%0" : "=r" (cs));
|
|
- asm("mov %%es,%0" : "=r" (es));
|
|
+ printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n",
|
|
+ regs->r10, regs->r11, regs->r12);
|
|
+ printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n",
|
|
+ regs->r13, regs->r14, regs->r15);
|
|
+
|
|
+ asm("movl %%ds,%0" : "=r" (ds));
|
|
+ asm("movl %%cs,%0" : "=r" (cs));
|
|
+ asm("movl %%es,%0" : "=r" (es));
|
|
asm("mov %%fs,%0" : "=r" (fsindex));
|
|
asm("mov %%gs,%0" : "=r" (gsindex));
|
|
|
|
rdmsrl(MSR_FS_BASE, fs);
|
|
- rdmsrl(MSR_GS_BASE, gs);
|
|
- rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
|
|
+ rdmsrl(MSR_GS_BASE, gs);
|
|
+ rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
|
|
|
|
- printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
|
|
- fs,fsindex,gs,gsindex,shadowgs);
|
|
- printk("CS: %04x DS: %04x ES: %04x\n", cs, ds, es);
|
|
+ if (!all)
|
|
+ return;
|
|
+
|
|
+ cr0 = read_cr0();
|
|
+ cr2 = read_cr2();
|
|
+ cr3 = read_cr3();
|
|
+ cr4 = read_cr4();
|
|
+
|
|
+ printk(KERN_INFO "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
|
|
+ fs, fsindex, gs, gsindex, shadowgs);
|
|
+ printk(KERN_INFO "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
|
|
+ es, cr0);
|
|
+ printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
|
|
+ cr4);
|
|
|
|
get_debugreg(d0, 0);
|
|
get_debugreg(d1, 1);
|
|
get_debugreg(d2, 2);
|
|
- printk("DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
|
|
+ printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
|
|
get_debugreg(d3, 3);
|
|
get_debugreg(d6, 6);
|
|
get_debugreg(d7, 7);
|
|
- printk("DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
|
|
+ printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
|
|
}
|
|
|
|
void show_regs(struct pt_regs *regs)
|
|
{
|
|
- printk("CPU %d:", smp_processor_id());
|
|
- __show_regs(regs);
|
|
+ printk(KERN_INFO "CPU %d:", smp_processor_id());
|
|
+ __show_regs(regs, 1);
|
|
show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
|
|
}
|
|
|
|
@@ -249,6 +252,14 @@ void exit_thread(void)
|
|
#endif
|
|
t->io_bitmap_max = 0;
|
|
}
|
|
+#ifdef CONFIG_X86_DS
|
|
+ /* Free any DS contexts that have not been properly released. */
|
|
+ if (unlikely(t->ds_ctx)) {
|
|
+ /* we clear debugctl to make sure DS is not used. */
|
|
+ update_debugctlmsr(0);
|
|
+ ds_free(t->ds_ctx);
|
|
+ }
|
|
+#endif /* CONFIG_X86_DS */
|
|
}
|
|
|
|
void xen_load_gs_index(unsigned gs)
|
|
@@ -329,10 +340,10 @@ void prepare_to_copy(struct task_struct
|
|
|
|
int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
|
|
unsigned long unused,
|
|
- struct task_struct * p, struct pt_regs * regs)
|
|
+ struct task_struct *p, struct pt_regs *regs)
|
|
{
|
|
int err;
|
|
- struct pt_regs * childregs;
|
|
+ struct pt_regs *childregs;
|
|
struct task_struct *me = current;
|
|
|
|
childregs = ((struct pt_regs *)
|
|
@@ -376,10 +387,10 @@ int copy_thread(int nr, unsigned long cl
|
|
if (test_thread_flag(TIF_IA32))
|
|
err = do_set_thread_area(p, -1,
|
|
(struct user_desc __user *)childregs->si, 0);
|
|
- else
|
|
-#endif
|
|
- err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
|
|
- if (err)
|
|
+ else
|
|
+#endif
|
|
+ err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
|
|
+ if (err)
|
|
goto out;
|
|
}
|
|
p->thread.iopl = current->thread.iopl;
|
|
@@ -486,13 +497,27 @@ static inline void __switch_to_xtra(stru
|
|
next = &next_p->thread;
|
|
|
|
debugctl = prev->debugctlmsr;
|
|
- if (next->ds_area_msr != prev->ds_area_msr) {
|
|
- /* we clear debugctl to make sure DS
|
|
- * is not in use when we change it */
|
|
- debugctl = 0;
|
|
- update_debugctlmsr(0);
|
|
- wrmsrl(MSR_IA32_DS_AREA, next->ds_area_msr);
|
|
+
|
|
+#ifdef CONFIG_X86_DS
|
|
+ {
|
|
+ unsigned long ds_prev = 0, ds_next = 0;
|
|
+
|
|
+ if (prev->ds_ctx)
|
|
+ ds_prev = (unsigned long)prev->ds_ctx->ds;
|
|
+ if (next->ds_ctx)
|
|
+ ds_next = (unsigned long)next->ds_ctx->ds;
|
|
+
|
|
+ if (ds_next != ds_prev) {
|
|
+ /*
|
|
+ * We clear debugctl to make sure DS
|
|
+ * is not in use when we change it:
|
|
+ */
|
|
+ debugctl = 0;
|
|
+ update_debugctlmsr(0);
|
|
+ wrmsrl(MSR_IA32_DS_AREA, ds_next);
|
|
+ }
|
|
}
|
|
+#endif /* CONFIG_X86_DS */
|
|
|
|
if (next->debugctlmsr != debugctl)
|
|
update_debugctlmsr(next->debugctlmsr);
|
|
@@ -516,13 +541,13 @@ static inline void __switch_to_xtra(stru
|
|
hard_enable_TSC();
|
|
}
|
|
|
|
-#ifdef X86_BTS
|
|
+#ifdef CONFIG_X86_PTRACE_BTS
|
|
if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS))
|
|
ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS);
|
|
|
|
if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS))
|
|
ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES);
|
|
-#endif
|
|
+#endif /* CONFIG_X86_PTRACE_BTS */
|
|
}
|
|
|
|
/*
|
|
@@ -554,7 +579,7 @@ __switch_to(struct task_struct *prev_p,
|
|
multicall_entry_t _mcl[8], *mcl = _mcl;
|
|
|
|
/* we're going to use this soon, after a few expensive things */
|
|
- if (next_p->fpu_counter>5)
|
|
+ if (next_p->fpu_counter > 5)
|
|
prefetch(next->xstate);
|
|
|
|
/*
|
|
@@ -635,12 +660,12 @@ __switch_to(struct task_struct *prev_p,
|
|
if (unlikely(HYPERVISOR_multicall_check(_mcl, mcl - _mcl, NULL)))
|
|
BUG();
|
|
|
|
- /*
|
|
+ /*
|
|
* Switch DS and ES.
|
|
* This won't pick up thread selector changes, but I guess that is ok.
|
|
*/
|
|
if (unlikely(next->es))
|
|
- loadsegment(es, next->es);
|
|
+ loadsegment(es, next->es);
|
|
|
|
if (unlikely(next->ds))
|
|
loadsegment(ds, next->ds);
|
|
@@ -654,7 +679,7 @@ __switch_to(struct task_struct *prev_p,
|
|
*/
|
|
arch_leave_lazy_cpu_mode();
|
|
|
|
- /*
|
|
+ /*
|
|
* Switch FS and GS.
|
|
*
|
|
* Segment register != 0 always requires a reload. Also
|
|
@@ -673,10 +698,10 @@ __switch_to(struct task_struct *prev_p,
|
|
if (next->gs)
|
|
WARN_ON(HYPERVISOR_set_segment_base(SEGBASE_GS_USER, next->gs));
|
|
|
|
- /*
|
|
+ /*
|
|
* Switch the PDA context.
|
|
*/
|
|
- write_pda(pcurrent, next_p);
|
|
+ write_pda(pcurrent, next_p);
|
|
write_pda(kernelstack,
|
|
(unsigned long)task_stack_page(next_p) +
|
|
THREAD_SIZE - PDA_STACKOFFSET);
|
|
@@ -717,7 +742,7 @@ long sys_execve(char __user *name, char
|
|
char __user * __user *envp, struct pt_regs *regs)
|
|
{
|
|
long error;
|
|
- char * filename;
|
|
+ char *filename;
|
|
|
|
filename = getname(name);
|
|
error = PTR_ERR(filename);
|
|
@@ -775,56 +800,56 @@ asmlinkage long sys_vfork(struct pt_regs
|
|
unsigned long get_wchan(struct task_struct *p)
|
|
{
|
|
unsigned long stack;
|
|
- u64 fp,ip;
|
|
+ u64 fp, ip;
|
|
int count = 0;
|
|
|
|
- if (!p || p == current || p->state==TASK_RUNNING)
|
|
- return 0;
|
|
+ if (!p || p == current || p->state == TASK_RUNNING)
|
|
+ return 0;
|
|
stack = (unsigned long)task_stack_page(p);
|
|
- if (p->thread.sp < stack || p->thread.sp > stack+THREAD_SIZE)
|
|
+ if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE)
|
|
return 0;
|
|
fp = *(u64 *)(p->thread.sp);
|
|
- do {
|
|
+ do {
|
|
if (fp < (unsigned long)stack ||
|
|
- fp > (unsigned long)stack+THREAD_SIZE)
|
|
- return 0;
|
|
+ fp >= (unsigned long)stack+THREAD_SIZE)
|
|
+ return 0;
|
|
ip = *(u64 *)(fp+8);
|
|
if (!in_sched_functions(ip))
|
|
return ip;
|
|
- fp = *(u64 *)fp;
|
|
- } while (count++ < 16);
|
|
+ fp = *(u64 *)fp;
|
|
+ } while (count++ < 16);
|
|
return 0;
|
|
}
|
|
|
|
long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
|
|
-{
|
|
- int ret = 0;
|
|
+{
|
|
+ int ret = 0;
|
|
int doit = task == current;
|
|
int cpu;
|
|
|
|
- switch (code) {
|
|
+ switch (code) {
|
|
case ARCH_SET_GS:
|
|
if (addr >= TASK_SIZE_OF(task))
|
|
- return -EPERM;
|
|
+ return -EPERM;
|
|
cpu = get_cpu();
|
|
- /* handle small bases via the GDT because that's faster to
|
|
+ /* handle small bases via the GDT because that's faster to
|
|
switch. */
|
|
- if (addr <= 0xffffffff) {
|
|
- set_32bit_tls(task, GS_TLS, addr);
|
|
- if (doit) {
|
|
+ if (addr <= 0xffffffff) {
|
|
+ set_32bit_tls(task, GS_TLS, addr);
|
|
+ if (doit) {
|
|
load_TLS(&task->thread, cpu);
|
|
- load_gs_index(GS_TLS_SEL);
|
|
+ load_gs_index(GS_TLS_SEL);
|
|
}
|
|
- task->thread.gsindex = GS_TLS_SEL;
|
|
+ task->thread.gsindex = GS_TLS_SEL;
|
|
task->thread.gs = 0;
|
|
- } else {
|
|
+ } else {
|
|
task->thread.gsindex = 0;
|
|
task->thread.gs = addr;
|
|
if (doit) {
|
|
load_gs_index(0);
|
|
ret = HYPERVISOR_set_segment_base(
|
|
SEGBASE_GS_USER, addr);
|
|
- }
|
|
+ }
|
|
}
|
|
put_cpu();
|
|
break;
|
|
@@ -879,8 +904,7 @@ long do_arch_prctl(struct task_struct *t
|
|
rdmsrl(MSR_KERNEL_GS_BASE, base);
|
|
else
|
|
base = task->thread.gs;
|
|
- }
|
|
- else
|
|
+ } else
|
|
base = task->thread.gs;
|
|
ret = put_user(base, (unsigned long __user *)addr);
|
|
break;
|
|
--- head-2011-03-17.orig/arch/x86/kernel/setup-xen.c 2011-03-04 15:09:03.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/setup-xen.c 2011-03-03 16:22:12.000000000 +0100
|
|
@@ -261,6 +261,9 @@ unsigned long saved_video_mode;
|
|
#define RAMDISK_LOAD_FLAG 0x4000
|
|
|
|
static char __initdata command_line[COMMAND_LINE_SIZE];
|
|
+#ifdef CONFIG_CMDLINE_BOOL
|
|
+static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE;
|
|
+#endif
|
|
|
|
#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
|
|
struct edd edd;
|
|
@@ -339,7 +342,7 @@ static void __init relocate_initrd(void)
|
|
if (clen > MAX_MAP_CHUNK-slop)
|
|
clen = MAX_MAP_CHUNK-slop;
|
|
mapaddr = ramdisk_image & PAGE_MASK;
|
|
- p = early_ioremap(mapaddr, clen+slop);
|
|
+ p = early_memremap(mapaddr, clen+slop);
|
|
memcpy(q, p+slop, clen);
|
|
early_iounmap(p, clen+slop);
|
|
q += clen;
|
|
@@ -430,7 +433,7 @@ static void __init parse_setup_data(void
|
|
return;
|
|
pa_data = boot_params.hdr.setup_data;
|
|
while (pa_data) {
|
|
- data = early_ioremap(pa_data, PAGE_SIZE);
|
|
+ data = early_memremap(pa_data, PAGE_SIZE);
|
|
switch (data->type) {
|
|
case SETUP_E820_EXT:
|
|
parse_e820_ext(data, pa_data);
|
|
@@ -455,7 +458,7 @@ static void __init e820_reserve_setup_da
|
|
return;
|
|
pa_data = boot_params.hdr.setup_data;
|
|
while (pa_data) {
|
|
- data = early_ioremap(pa_data, sizeof(*data));
|
|
+ data = early_memremap(pa_data, sizeof(*data));
|
|
e820_update_range(pa_data, sizeof(*data)+data->len,
|
|
E820_RAM, E820_RESERVED_KERN);
|
|
found = 1;
|
|
@@ -483,7 +486,7 @@ static void __init reserve_early_setup_d
|
|
return;
|
|
pa_data = boot_params.hdr.setup_data;
|
|
while (pa_data) {
|
|
- data = early_ioremap(pa_data, sizeof(*data));
|
|
+ data = early_memremap(pa_data, sizeof(*data));
|
|
sprintf(buf, "setup data %x", data->type);
|
|
reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf);
|
|
pa_data = data->next;
|
|
@@ -625,7 +628,13 @@ static void __init reserve_standard_io_r
|
|
|
|
}
|
|
|
|
-#ifdef CONFIG_PROC_VMCORE
|
|
+/*
|
|
+ * Note: elfcorehdr_addr is not just limited to vmcore. It is also used by
|
|
+ * is_kdump_kernel() to determine if we are booting after a panic. Hence
|
|
+ * ifdef it under CONFIG_CRASH_DUMP and not CONFIG_PROC_VMCORE.
|
|
+ */
|
|
+
|
|
+#ifdef CONFIG_CRASH_DUMP
|
|
/* elfcorehdr= specifies the location of elf core header
|
|
* stored by the crashed kernel. This option will be passed
|
|
* by kexec loader to the capture kernel.
|
|
@@ -646,6 +655,190 @@ static struct x86_quirks default_x86_qui
|
|
struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
|
|
|
|
/*
|
|
+ * Some BIOSes seem to corrupt the low 64k of memory during events
|
|
+ * like suspend/resume and unplugging an HDMI cable. Reserve all
|
|
+ * remaining free memory in that area and fill it with a distinct
|
|
+ * pattern.
|
|
+ */
|
|
+#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
|
|
+#define MAX_SCAN_AREAS 8
|
|
+
|
|
+static int __read_mostly memory_corruption_check = -1;
|
|
+
|
|
+static unsigned __read_mostly corruption_check_size = 64*1024;
|
|
+static unsigned __read_mostly corruption_check_period = 60; /* seconds */
|
|
+
|
|
+static struct e820entry scan_areas[MAX_SCAN_AREAS];
|
|
+static int num_scan_areas;
|
|
+
|
|
+
|
|
+static int set_corruption_check(char *arg)
|
|
+{
|
|
+ char *end;
|
|
+
|
|
+ memory_corruption_check = simple_strtol(arg, &end, 10);
|
|
+
|
|
+ return (*end == 0) ? 0 : -EINVAL;
|
|
+}
|
|
+early_param("memory_corruption_check", set_corruption_check);
|
|
+
|
|
+static int set_corruption_check_period(char *arg)
|
|
+{
|
|
+ char *end;
|
|
+
|
|
+ corruption_check_period = simple_strtoul(arg, &end, 10);
|
|
+
|
|
+ return (*end == 0) ? 0 : -EINVAL;
|
|
+}
|
|
+early_param("memory_corruption_check_period", set_corruption_check_period);
|
|
+
|
|
+static int set_corruption_check_size(char *arg)
|
|
+{
|
|
+ char *end;
|
|
+ unsigned size;
|
|
+
|
|
+ size = memparse(arg, &end);
|
|
+
|
|
+ if (*end == '\0')
|
|
+ corruption_check_size = size;
|
|
+
|
|
+ return (size == corruption_check_size) ? 0 : -EINVAL;
|
|
+}
|
|
+early_param("memory_corruption_check_size", set_corruption_check_size);
|
|
+
|
|
+
|
|
+static void __init setup_bios_corruption_check(void)
|
|
+{
|
|
+ u64 addr = PAGE_SIZE; /* assume first page is reserved anyway */
|
|
+
|
|
+ if (memory_corruption_check == -1) {
|
|
+ memory_corruption_check =
|
|
+#ifdef CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK
|
|
+ 1
|
|
+#else
|
|
+ 0
|
|
+#endif
|
|
+ ;
|
|
+ }
|
|
+
|
|
+ if (corruption_check_size == 0)
|
|
+ memory_corruption_check = 0;
|
|
+
|
|
+ if (!memory_corruption_check)
|
|
+ return;
|
|
+
|
|
+ corruption_check_size = round_up(corruption_check_size, PAGE_SIZE);
|
|
+
|
|
+ while(addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) {
|
|
+ u64 size;
|
|
+ addr = find_e820_area_size(addr, &size, PAGE_SIZE);
|
|
+
|
|
+ if (addr == 0)
|
|
+ break;
|
|
+
|
|
+ if ((addr + size) > corruption_check_size)
|
|
+ size = corruption_check_size - addr;
|
|
+
|
|
+ if (size == 0)
|
|
+ break;
|
|
+
|
|
+ e820_update_range(addr, size, E820_RAM, E820_RESERVED);
|
|
+ scan_areas[num_scan_areas].addr = addr;
|
|
+ scan_areas[num_scan_areas].size = size;
|
|
+ num_scan_areas++;
|
|
+
|
|
+ /* Assume we've already mapped this early memory */
|
|
+ memset(__va(addr), 0, size);
|
|
+
|
|
+ addr += size;
|
|
+ }
|
|
+
|
|
+ printk(KERN_INFO "Scanning %d areas for low memory corruption\n",
|
|
+ num_scan_areas);
|
|
+ update_e820();
|
|
+}
|
|
+
|
|
+static struct timer_list periodic_check_timer;
|
|
+
|
|
+void check_for_bios_corruption(void)
|
|
+{
|
|
+ int i;
|
|
+ int corruption = 0;
|
|
+
|
|
+ if (!memory_corruption_check)
|
|
+ return;
|
|
+
|
|
+ for(i = 0; i < num_scan_areas; i++) {
|
|
+ unsigned long *addr = __va(scan_areas[i].addr);
|
|
+ unsigned long size = scan_areas[i].size;
|
|
+
|
|
+ for(; size; addr++, size -= sizeof(unsigned long)) {
|
|
+ if (!*addr)
|
|
+ continue;
|
|
+ printk(KERN_ERR "Corrupted low memory at %p (%lx phys) = %08lx\n",
|
|
+ addr, __pa(addr), *addr);
|
|
+ corruption = 1;
|
|
+ *addr = 0;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ WARN(corruption, KERN_ERR "Memory corruption detected in low memory\n");
|
|
+}
|
|
+
|
|
+static void periodic_check_for_corruption(unsigned long data)
|
|
+{
|
|
+ check_for_bios_corruption();
|
|
+ mod_timer(&periodic_check_timer, round_jiffies(jiffies + corruption_check_period*HZ));
|
|
+}
|
|
+
|
|
+void start_periodic_check_for_corruption(void)
|
|
+{
|
|
+ if (!memory_corruption_check || corruption_check_period == 0)
|
|
+ return;
|
|
+
|
|
+ printk(KERN_INFO "Scanning for low memory corruption every %d seconds\n",
|
|
+ corruption_check_period);
|
|
+
|
|
+ init_timer(&periodic_check_timer);
|
|
+ periodic_check_timer.function = &periodic_check_for_corruption;
|
|
+ periodic_check_for_corruption(0);
|
|
+}
|
|
+#endif
|
|
+
|
|
+static int __init dmi_low_memory_corruption(const struct dmi_system_id *d)
|
|
+{
|
|
+ printk(KERN_NOTICE
|
|
+ "%s detected: BIOS may corrupt low RAM, working it around.\n",
|
|
+ d->ident);
|
|
+
|
|
+ e820_update_range(0, 0x10000, E820_RAM, E820_RESERVED);
|
|
+ sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/* List of systems that have known low memory corruption BIOS problems */
|
|
+static struct dmi_system_id __initdata bad_bios_dmi_table[] = {
|
|
+#ifdef CONFIG_X86_RESERVE_LOW_64K
|
|
+ {
|
|
+ .callback = dmi_low_memory_corruption,
|
|
+ .ident = "AMI BIOS",
|
|
+ .matches = {
|
|
+ DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."),
|
|
+ },
|
|
+ },
|
|
+ {
|
|
+ .callback = dmi_low_memory_corruption,
|
|
+ .ident = "Phoenix BIOS",
|
|
+ .matches = {
|
|
+ DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies"),
|
|
+ },
|
|
+ },
|
|
+#endif
|
|
+ {}
|
|
+};
|
|
+
|
|
+/*
|
|
* Determine if we were loaded by an EFI loader. If so, then we have also been
|
|
* passed the efi memmap, systab, etc., so we should use these data structures
|
|
* for initialization. Note, the efi init code path is determined by the
|
|
@@ -693,6 +886,9 @@ void __init setup_arch(char **cmdline_p)
|
|
printk(KERN_INFO "Command line: %s\n", boot_command_line);
|
|
#endif
|
|
|
|
+ /* VMI may relocate the fixmap; do this before touching ioremap area */
|
|
+ vmi_init();
|
|
+
|
|
early_cpu_init();
|
|
early_ioremap_init();
|
|
|
|
@@ -787,6 +983,19 @@ void __init setup_arch(char **cmdline_p)
|
|
bss_resource.start = virt_to_phys(&__bss_start);
|
|
bss_resource.end = virt_to_phys(&__bss_stop)-1;
|
|
|
|
+#ifdef CONFIG_CMDLINE_BOOL
|
|
+#ifdef CONFIG_CMDLINE_OVERRIDE
|
|
+ strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
|
|
+#else
|
|
+ if (builtin_cmdline[0]) {
|
|
+ /* append boot loader cmdline to builtin */
|
|
+ strlcat(builtin_cmdline, " ", COMMAND_LINE_SIZE);
|
|
+ strlcat(builtin_cmdline, boot_command_line, COMMAND_LINE_SIZE);
|
|
+ strlcpy(boot_command_line, builtin_cmdline, COMMAND_LINE_SIZE);
|
|
+ }
|
|
+#endif
|
|
+#endif
|
|
+
|
|
strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
|
|
*cmdline_p = command_line;
|
|
|
|
@@ -796,13 +1005,8 @@ void __init setup_arch(char **cmdline_p)
|
|
check_efer();
|
|
#endif
|
|
|
|
-#if defined(CONFIG_VMI) && defined(CONFIG_X86_32)
|
|
- /*
|
|
- * Must be before kernel pagetables are setup
|
|
- * or fixmap area is touched.
|
|
- */
|
|
- vmi_init();
|
|
-#endif
|
|
+ /* Must be before kernel pagetables are setup */
|
|
+ vmi_activate();
|
|
|
|
/* after early param, so could get panic from serial */
|
|
reserve_early_setup_data();
|
|
@@ -821,10 +1025,15 @@ void __init setup_arch(char **cmdline_p)
|
|
|
|
finish_e820_parsing();
|
|
|
|
+ if (is_initial_xendomain()) {
|
|
+ dmi_scan_machine();
|
|
+
|
|
+ dmi_check_system(bad_bios_dmi_table);
|
|
+
|
|
#ifdef CONFIG_X86_32
|
|
- if (is_initial_xendomain())
|
|
probe_roms();
|
|
#endif
|
|
+ }
|
|
|
|
#ifndef CONFIG_XEN
|
|
/* after parse_early_param, so could debug it */
|
|
@@ -870,6 +1079,10 @@ void __init setup_arch(char **cmdline_p)
|
|
num_physpages = max_pfn;
|
|
max_mapnr = max_pfn;
|
|
|
|
+#ifndef CONFIG_XEN
|
|
+ if (cpu_has_x2apic)
|
|
+ check_x2apic();
|
|
+#endif
|
|
|
|
/* How many end-of-memory variables you have, grandma! */
|
|
/* need this before calling reserve_initrd */
|
|
@@ -881,6 +1094,10 @@ void __init setup_arch(char **cmdline_p)
|
|
high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
|
|
#endif
|
|
|
|
+#ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION
|
|
+ setup_bios_corruption_check();
|
|
+#endif
|
|
+
|
|
/* max_pfn_mapped is updated here */
|
|
max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
|
|
max_pfn_mapped = max_low_pfn_mapped;
|
|
@@ -909,9 +1126,6 @@ void __init setup_arch(char **cmdline_p)
|
|
vsmp_init();
|
|
#endif
|
|
|
|
- if (is_initial_xendomain())
|
|
- dmi_scan_machine();
|
|
-
|
|
io_delay_init();
|
|
|
|
#ifdef CONFIG_ACPI
|
|
@@ -926,6 +1140,8 @@ void __init setup_arch(char **cmdline_p)
|
|
*/
|
|
acpi_boot_table_init();
|
|
|
|
+ early_acpi_boot_init();
|
|
+
|
|
#ifdef CONFIG_ACPI_NUMA
|
|
/*
|
|
* Parse SRAT to discover nodes.
|
|
@@ -1071,6 +1287,7 @@ void __init setup_arch(char **cmdline_p)
|
|
#endif
|
|
|
|
prefill_possible_map();
|
|
+
|
|
#ifdef CONFIG_X86_64
|
|
init_cpu_to_node();
|
|
#endif
|
|
@@ -1079,6 +1296,9 @@ void __init setup_arch(char **cmdline_p)
|
|
init_apic_mappings();
|
|
ioapic_init_mappings();
|
|
|
|
+ /* need to wait for io_apic is mapped */
|
|
+ nr_irqs = probe_nr_irqs();
|
|
+
|
|
kvm_guest_init();
|
|
|
|
e820_reserve_resources();
|
|
--- head-2011-03-17.orig/arch/x86/kernel/time-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/time-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -41,6 +41,7 @@
|
|
|
|
#include <asm/delay.h>
|
|
#include <asm/time.h>
|
|
+#include <asm/timer.h>
|
|
|
|
#include <xen/evtchn.h>
|
|
#include <xen/sysctl.h>
|
|
@@ -415,14 +416,9 @@ unsigned long profile_pc(struct pt_regs
|
|
unsigned long pc = instruction_pointer(regs);
|
|
|
|
#if defined(CONFIG_SMP) || defined(__x86_64__)
|
|
-# ifdef __i386__
|
|
- if (!v8086_mode(regs) && SEGMENT_IS_KERNEL_CODE(regs->cs)
|
|
-# else
|
|
- if (!user_mode(regs)
|
|
-# endif
|
|
- && in_lock_functions(pc)) {
|
|
+ if (!user_mode_vm(regs) && in_lock_functions(pc)) {
|
|
# ifdef CONFIG_FRAME_POINTER
|
|
- return ((unsigned long *)regs->bp)[1];
|
|
+ return *(unsigned long *)(regs->bp + sizeof(long));
|
|
# else
|
|
# ifdef __i386__
|
|
unsigned long *sp = (unsigned long *)®s->sp;
|
|
@@ -577,6 +573,7 @@ irqreturn_t timer_interrupt(int irq, voi
|
|
run_local_timers();
|
|
if (rcu_pending(cpu))
|
|
rcu_check_callbacks(cpu, user_mode_vm(get_irq_regs()));
|
|
+ printk_tick();
|
|
scheduler_tick();
|
|
run_posix_cpu_timers(current);
|
|
profile_tick(CPU_PROFILING);
|
|
@@ -806,7 +803,8 @@ static void stop_hz_timer(void)
|
|
smp_mb();
|
|
|
|
/* Leave ourselves in tick mode if rcu or softirq or timer pending. */
|
|
- if (rcu_needs_cpu(cpu) || local_softirq_pending() ||
|
|
+ if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) ||
|
|
+ local_softirq_pending() ||
|
|
(j = get_next_timer_interrupt(jiffies),
|
|
time_before_eq(j, jiffies))) {
|
|
cpu_clear(cpu, nohz_cpu_mask);
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ head-2011-03-17/arch/x86/kernel/traps-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -0,0 +1,1022 @@
|
|
+/*
|
|
+ * Copyright (C) 1991, 1992 Linus Torvalds
|
|
+ * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
|
|
+ *
|
|
+ * Pentium III FXSR, SSE support
|
|
+ * Gareth Hughes <gareth@valinux.com>, May 2000
|
|
+ */
|
|
+
|
|
+/*
|
|
+ * Handle hardware traps and faults.
|
|
+ */
|
|
+#include <linux/interrupt.h>
|
|
+#include <linux/kallsyms.h>
|
|
+#include <linux/spinlock.h>
|
|
+#include <linux/kprobes.h>
|
|
+#include <linux/uaccess.h>
|
|
+#include <linux/utsname.h>
|
|
+#include <linux/kdebug.h>
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/module.h>
|
|
+#include <linux/ptrace.h>
|
|
+#include <linux/string.h>
|
|
+#include <linux/unwind.h>
|
|
+#include <linux/delay.h>
|
|
+#include <linux/errno.h>
|
|
+#include <linux/kexec.h>
|
|
+#include <linux/sched.h>
|
|
+#include <linux/timer.h>
|
|
+#include <linux/init.h>
|
|
+#include <linux/bug.h>
|
|
+#include <linux/nmi.h>
|
|
+#include <linux/mm.h>
|
|
+#include <linux/smp.h>
|
|
+#include <linux/io.h>
|
|
+
|
|
+#ifdef CONFIG_EISA
|
|
+#include <linux/ioport.h>
|
|
+#include <linux/eisa.h>
|
|
+#endif
|
|
+
|
|
+#ifdef CONFIG_MCA
|
|
+#include <linux/mca.h>
|
|
+#endif
|
|
+
|
|
+#if defined(CONFIG_EDAC)
|
|
+#include <linux/edac.h>
|
|
+#endif
|
|
+
|
|
+#include <asm/stacktrace.h>
|
|
+#include <asm/processor.h>
|
|
+#include <asm/debugreg.h>
|
|
+#include <asm/atomic.h>
|
|
+#include <asm/system.h>
|
|
+#include <asm/unwind.h>
|
|
+#include <asm/traps.h>
|
|
+#include <asm/desc.h>
|
|
+#include <asm/i387.h>
|
|
+
|
|
+#include <mach_traps.h>
|
|
+
|
|
+#ifdef CONFIG_X86_64
|
|
+#include <asm/pgalloc.h>
|
|
+#include <asm/proto.h>
|
|
+#include <asm/pda.h>
|
|
+#else
|
|
+#include <asm/processor-flags.h>
|
|
+#include <asm/arch_hooks.h>
|
|
+#include <asm/nmi.h>
|
|
+#include <asm/smp.h>
|
|
+#include <asm/io.h>
|
|
+#include <asm/traps.h>
|
|
+
|
|
+#include "cpu/mcheck/mce.h"
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+DECLARE_BITMAP(used_vectors, NR_VECTORS);
|
|
+EXPORT_SYMBOL_GPL(used_vectors);
|
|
+#endif
|
|
+
|
|
+asmlinkage int system_call(void);
|
|
+
|
|
+/* Do we ignore FPU interrupts ? */
|
|
+char ignore_fpu_irq;
|
|
+
|
|
+#ifndef CONFIG_X86_NO_IDT
|
|
+/*
|
|
+ * The IDT has to be page-aligned to simplify the Pentium
|
|
+ * F0 0F bug workaround.. We have a special link segment
|
|
+ * for this.
|
|
+ */
|
|
+gate_desc idt_table[256]
|
|
+ __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, };
|
|
+#endif
|
|
+#endif
|
|
+
|
|
+static int ignore_nmis;
|
|
+
|
|
+static inline void conditional_sti(struct pt_regs *regs)
|
|
+{
|
|
+ if (regs->flags & X86_EFLAGS_IF)
|
|
+ local_irq_enable();
|
|
+}
|
|
+
|
|
+static inline void preempt_conditional_sti(struct pt_regs *regs)
|
|
+{
|
|
+ inc_preempt_count();
|
|
+ if (regs->flags & X86_EFLAGS_IF)
|
|
+ local_irq_enable();
|
|
+}
|
|
+
|
|
+static inline void preempt_conditional_cli(struct pt_regs *regs)
|
|
+{
|
|
+ if (regs->flags & X86_EFLAGS_IF)
|
|
+ local_irq_disable();
|
|
+ dec_preempt_count();
|
|
+}
|
|
+
|
|
+#ifdef CONFIG_X86_32
|
|
+static inline void
|
|
+die_if_kernel(const char *str, struct pt_regs *regs, long err)
|
|
+{
|
|
+ if (!user_mode_vm(regs))
|
|
+ die(str, regs, err);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Perform the lazy TSS's I/O bitmap copy. If the TSS has an
|
|
+ * invalid offset set (the LAZY one) and the faulting thread has
|
|
+ * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS,
|
|
+ * we set the offset field correctly and return 1.
|
|
+ */
|
|
+static int lazy_iobitmap_copy(void)
|
|
+{
|
|
+#ifndef CONFIG_XEN
|
|
+ struct thread_struct *thread;
|
|
+ struct tss_struct *tss;
|
|
+ int cpu;
|
|
+
|
|
+ cpu = get_cpu();
|
|
+ tss = &per_cpu(init_tss, cpu);
|
|
+ thread = ¤t->thread;
|
|
+
|
|
+ if (tss->x86_tss.io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY &&
|
|
+ thread->io_bitmap_ptr) {
|
|
+ memcpy(tss->io_bitmap, thread->io_bitmap_ptr,
|
|
+ thread->io_bitmap_max);
|
|
+ /*
|
|
+ * If the previously set map was extending to higher ports
|
|
+ * than the current one, pad extra space with 0xff (no access).
|
|
+ */
|
|
+ if (thread->io_bitmap_max < tss->io_bitmap_max) {
|
|
+ memset((char *) tss->io_bitmap +
|
|
+ thread->io_bitmap_max, 0xff,
|
|
+ tss->io_bitmap_max - thread->io_bitmap_max);
|
|
+ }
|
|
+ tss->io_bitmap_max = thread->io_bitmap_max;
|
|
+ tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
|
|
+ tss->io_bitmap_owner = thread;
|
|
+ put_cpu();
|
|
+
|
|
+ return 1;
|
|
+ }
|
|
+ put_cpu();
|
|
+#endif
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+#endif
|
|
+
|
|
+static void __kprobes
|
|
+do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
|
|
+ long error_code, siginfo_t *info)
|
|
+{
|
|
+ struct task_struct *tsk = current;
|
|
+
|
|
+#ifdef CONFIG_X86_32
|
|
+ if (regs->flags & X86_VM_MASK) {
|
|
+ /*
|
|
+ * traps 0, 1, 3, 4, and 5 should be forwarded to vm86.
|
|
+ * On nmi (interrupt 2), do_trap should not be called.
|
|
+ */
|
|
+ if (trapnr < 6)
|
|
+ goto vm86_trap;
|
|
+ goto trap_signal;
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ if (!user_mode(regs))
|
|
+ goto kernel_trap;
|
|
+
|
|
+#ifdef CONFIG_X86_32
|
|
+trap_signal:
|
|
+#endif
|
|
+ /*
|
|
+ * We want error_code and trap_no set for userspace faults and
|
|
+ * kernelspace faults which result in die(), but not
|
|
+ * kernelspace faults which are fixed up. die() gives the
|
|
+ * process no chance to handle the signal and notice the
|
|
+ * kernel fault information, so that won't result in polluting
|
|
+ * the information about previously queued, but not yet
|
|
+ * delivered, faults. See also do_general_protection below.
|
|
+ */
|
|
+ tsk->thread.error_code = error_code;
|
|
+ tsk->thread.trap_no = trapnr;
|
|
+
|
|
+#ifdef CONFIG_X86_64
|
|
+ if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
|
|
+ printk_ratelimit()) {
|
|
+ printk(KERN_INFO
|
|
+ "%s[%d] trap %s ip:%lx sp:%lx error:%lx",
|
|
+ tsk->comm, tsk->pid, str,
|
|
+ regs->ip, regs->sp, error_code);
|
|
+ print_vma_addr(" in ", regs->ip);
|
|
+ printk("\n");
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ if (info)
|
|
+ force_sig_info(signr, info, tsk);
|
|
+ else
|
|
+ force_sig(signr, tsk);
|
|
+ return;
|
|
+
|
|
+kernel_trap:
|
|
+ if (!fixup_exception(regs)) {
|
|
+ tsk->thread.error_code = error_code;
|
|
+ tsk->thread.trap_no = trapnr;
|
|
+ die(str, regs, error_code);
|
|
+ }
|
|
+ return;
|
|
+
|
|
+#ifdef CONFIG_X86_32
|
|
+vm86_trap:
|
|
+ if (handle_vm86_trap((struct kernel_vm86_regs *) regs,
|
|
+ error_code, trapnr))
|
|
+ goto trap_signal;
|
|
+ return;
|
|
+#endif
|
|
+}
|
|
+
|
|
+#define DO_ERROR(trapnr, signr, str, name) \
|
|
+dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \
|
|
+{ \
|
|
+ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
|
|
+ == NOTIFY_STOP) \
|
|
+ return; \
|
|
+ conditional_sti(regs); \
|
|
+ do_trap(trapnr, signr, str, regs, error_code, NULL); \
|
|
+}
|
|
+
|
|
+#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
|
|
+dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \
|
|
+{ \
|
|
+ siginfo_t info; \
|
|
+ info.si_signo = signr; \
|
|
+ info.si_errno = 0; \
|
|
+ info.si_code = sicode; \
|
|
+ info.si_addr = (void __user *)siaddr; \
|
|
+ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
|
|
+ == NOTIFY_STOP) \
|
|
+ return; \
|
|
+ conditional_sti(regs); \
|
|
+ do_trap(trapnr, signr, str, regs, error_code, &info); \
|
|
+}
|
|
+
|
|
+DO_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip)
|
|
+DO_ERROR(4, SIGSEGV, "overflow", overflow)
|
|
+DO_ERROR(5, SIGSEGV, "bounds", bounds)
|
|
+DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip)
|
|
+DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
|
|
+DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
|
|
+DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
|
|
+#ifdef CONFIG_X86_32
|
|
+DO_ERROR(12, SIGBUS, "stack segment", stack_segment)
|
|
+#endif
|
|
+DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
|
|
+
|
|
+#ifdef CONFIG_X86_64
|
|
+/* Runs on IST stack */
|
|
+dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code)
|
|
+{
|
|
+ if (notify_die(DIE_TRAP, "stack segment", regs, error_code,
|
|
+ 12, SIGBUS) == NOTIFY_STOP)
|
|
+ return;
|
|
+ preempt_conditional_sti(regs);
|
|
+ do_trap(12, SIGBUS, "stack segment", regs, error_code, NULL);
|
|
+ preempt_conditional_cli(regs);
|
|
+}
|
|
+
|
|
+dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
|
|
+{
|
|
+ static const char str[] = "double fault";
|
|
+ struct task_struct *tsk = current;
|
|
+
|
|
+ /* Return not checked because double check cannot be ignored */
|
|
+ notify_die(DIE_TRAP, str, regs, error_code, 8, SIGSEGV);
|
|
+
|
|
+ tsk->thread.error_code = error_code;
|
|
+ tsk->thread.trap_no = 8;
|
|
+
|
|
+ /* This is always a kernel trap and never fixable (and thus must
|
|
+ never return). */
|
|
+ for (;;)
|
|
+ die(str, regs, error_code);
|
|
+}
|
|
+#endif
|
|
+
|
|
+dotraplinkage void __kprobes
|
|
+do_general_protection(struct pt_regs *regs, long error_code)
|
|
+{
|
|
+ struct task_struct *tsk;
|
|
+
|
|
+ conditional_sti(regs);
|
|
+
|
|
+#ifdef CONFIG_X86_32
|
|
+ if (lazy_iobitmap_copy()) {
|
|
+ /* restart the faulting instruction */
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ if (regs->flags & X86_VM_MASK)
|
|
+ goto gp_in_vm86;
|
|
+#endif
|
|
+
|
|
+ tsk = current;
|
|
+ if (!user_mode(regs))
|
|
+ goto gp_in_kernel;
|
|
+
|
|
+ tsk->thread.error_code = error_code;
|
|
+ tsk->thread.trap_no = 13;
|
|
+
|
|
+ if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
|
|
+ printk_ratelimit()) {
|
|
+ printk(KERN_INFO
|
|
+ "%s[%d] general protection ip:%lx sp:%lx error:%lx",
|
|
+ tsk->comm, task_pid_nr(tsk),
|
|
+ regs->ip, regs->sp, error_code);
|
|
+ print_vma_addr(" in ", regs->ip);
|
|
+ printk("\n");
|
|
+ }
|
|
+
|
|
+ force_sig(SIGSEGV, tsk);
|
|
+ return;
|
|
+
|
|
+#ifdef CONFIG_X86_32
|
|
+gp_in_vm86:
|
|
+ local_irq_enable();
|
|
+ handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
|
|
+ return;
|
|
+#endif
|
|
+
|
|
+gp_in_kernel:
|
|
+ if (fixup_exception(regs))
|
|
+ return;
|
|
+
|
|
+ tsk->thread.error_code = error_code;
|
|
+ tsk->thread.trap_no = 13;
|
|
+ if (notify_die(DIE_GPF, "general protection fault", regs,
|
|
+ error_code, 13, SIGSEGV) == NOTIFY_STOP)
|
|
+ return;
|
|
+ die("general protection fault", regs, error_code);
|
|
+}
|
|
+
|
|
+static notrace __kprobes void
|
|
+mem_parity_error(unsigned char reason, struct pt_regs *regs)
|
|
+{
|
|
+ printk(KERN_EMERG
|
|
+ "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
|
|
+ reason, smp_processor_id());
|
|
+
|
|
+ printk(KERN_EMERG
|
|
+ "You have some hardware problem, likely on the PCI bus.\n");
|
|
+
|
|
+#if defined(CONFIG_EDAC)
|
|
+ if (edac_handler_set()) {
|
|
+ edac_atomic_assert_error();
|
|
+ return;
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ if (panic_on_unrecovered_nmi)
|
|
+ panic("NMI: Not continuing");
|
|
+
|
|
+ printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
|
|
+
|
|
+ /* Clear and disable the memory parity error line. */
|
|
+ clear_mem_error(reason);
|
|
+}
|
|
+
|
|
+static notrace __kprobes void
|
|
+io_check_error(unsigned char reason, struct pt_regs *regs)
|
|
+{
|
|
+ printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n");
|
|
+ show_registers(regs);
|
|
+
|
|
+ /* Re-enable the IOCK line, wait for a few seconds */
|
|
+ clear_io_check_error(reason);
|
|
+}
|
|
+
|
|
+static notrace __kprobes void
|
|
+unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
|
|
+{
|
|
+ if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) ==
|
|
+ NOTIFY_STOP)
|
|
+ return;
|
|
+#ifdef CONFIG_MCA
|
|
+ /*
|
|
+ * Might actually be able to figure out what the guilty party
|
|
+ * is:
|
|
+ */
|
|
+ if (MCA_bus) {
|
|
+ mca_handle_nmi();
|
|
+ return;
|
|
+ }
|
|
+#endif
|
|
+ printk(KERN_EMERG
|
|
+ "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
|
|
+ reason, smp_processor_id());
|
|
+
|
|
+ printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
|
|
+ if (panic_on_unrecovered_nmi)
|
|
+ panic("NMI: Not continuing");
|
|
+
|
|
+ printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
|
|
+}
|
|
+
|
|
+static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
|
|
+{
|
|
+ unsigned char reason = 0;
|
|
+ int cpu;
|
|
+
|
|
+ cpu = smp_processor_id();
|
|
+
|
|
+ /* Only the BSP gets external NMIs from the system. */
|
|
+ if (!cpu)
|
|
+ reason = get_nmi_reason();
|
|
+
|
|
+ if (!(reason & 0xc0)) {
|
|
+ if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
|
|
+ == NOTIFY_STOP)
|
|
+ return;
|
|
+#ifdef CONFIG_X86_LOCAL_APIC
|
|
+ /*
|
|
+ * Ok, so this is none of the documented NMI sources,
|
|
+ * so it must be the NMI watchdog.
|
|
+ */
|
|
+ if (nmi_watchdog_tick(regs, reason))
|
|
+ return;
|
|
+ if (!do_nmi_callback(regs, cpu))
|
|
+ unknown_nmi_error(reason, regs);
|
|
+#else
|
|
+ unknown_nmi_error(reason, regs);
|
|
+#endif
|
|
+
|
|
+ return;
|
|
+ }
|
|
+ if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
|
|
+ return;
|
|
+
|
|
+ /* AK: following checks seem to be broken on modern chipsets. FIXME */
|
|
+ if (reason & 0x80)
|
|
+ mem_parity_error(reason, regs);
|
|
+ if (reason & 0x40)
|
|
+ io_check_error(reason, regs);
|
|
+#ifdef CONFIG_X86_32
|
|
+ /*
|
|
+ * Reassert NMI in case it became active meanwhile
|
|
+ * as it's edge-triggered:
|
|
+ */
|
|
+ reassert_nmi();
|
|
+#endif
|
|
+}
|
|
+
|
|
+dotraplinkage notrace __kprobes void
|
|
+do_nmi(struct pt_regs *regs, long error_code)
|
|
+{
|
|
+ nmi_enter();
|
|
+
|
|
+#ifdef CONFIG_X86_32
|
|
+ { int cpu; cpu = smp_processor_id(); ++nmi_count(cpu); }
|
|
+#else
|
|
+ add_pda(__nmi_count, 1);
|
|
+#endif
|
|
+
|
|
+ if (!ignore_nmis)
|
|
+ default_do_nmi(regs);
|
|
+
|
|
+ nmi_exit();
|
|
+}
|
|
+
|
|
+void stop_nmi(void)
|
|
+{
|
|
+ acpi_nmi_disable();
|
|
+ ignore_nmis++;
|
|
+}
|
|
+
|
|
+void restart_nmi(void)
|
|
+{
|
|
+ ignore_nmis--;
|
|
+ acpi_nmi_enable();
|
|
+}
|
|
+
|
|
+/* May run on IST stack. */
|
|
+dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
|
|
+{
|
|
+#ifdef CONFIG_KPROBES
|
|
+ if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
|
|
+ == NOTIFY_STOP)
|
|
+ return;
|
|
+#else
|
|
+ if (notify_die(DIE_TRAP, "int3", regs, error_code, 3, SIGTRAP)
|
|
+ == NOTIFY_STOP)
|
|
+ return;
|
|
+#endif
|
|
+
|
|
+ preempt_conditional_sti(regs);
|
|
+ do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
|
|
+ preempt_conditional_cli(regs);
|
|
+}
|
|
+
|
|
+#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
|
|
+/* Help handler running on IST stack to switch back to user stack
|
|
+ for scheduling or signal handling. The actual stack switch is done in
|
|
+ entry.S */
|
|
+asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
|
|
+{
|
|
+ struct pt_regs *regs = eregs;
|
|
+ /* Did already sync */
|
|
+ if (eregs == (struct pt_regs *)eregs->sp)
|
|
+ ;
|
|
+ /* Exception from user space */
|
|
+ else if (user_mode(eregs))
|
|
+ regs = task_pt_regs(current);
|
|
+ /* Exception from kernel and interrupts are enabled. Move to
|
|
+ kernel process stack. */
|
|
+ else if (eregs->flags & X86_EFLAGS_IF)
|
|
+ regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs));
|
|
+ if (eregs != regs)
|
|
+ *regs = *eregs;
|
|
+ return regs;
|
|
+}
|
|
+#endif
|
|
+
|
|
+/*
|
|
+ * Our handling of the processor debug registers is non-trivial.
|
|
+ * We do not clear them on entry and exit from the kernel. Therefore
|
|
+ * it is possible to get a watchpoint trap here from inside the kernel.
|
|
+ * However, the code in ./ptrace.c has ensured that the user can
|
|
+ * only set watchpoints on userspace addresses. Therefore the in-kernel
|
|
+ * watchpoint trap can only occur in code which is reading/writing
|
|
+ * from user space. Such code must not hold kernel locks (since it
|
|
+ * can equally take a page fault), therefore it is safe to call
|
|
+ * force_sig_info even though that claims and releases locks.
|
|
+ *
|
|
+ * Code in ./signal.c ensures that the debug control register
|
|
+ * is restored before we deliver any signal, and therefore that
|
|
+ * user code runs with the correct debug control register even though
|
|
+ * we clear it here.
|
|
+ *
|
|
+ * Being careful here means that we don't have to be as careful in a
|
|
+ * lot of more complicated places (task switching can be a bit lazy
|
|
+ * about restoring all the debug state, and ptrace doesn't have to
|
|
+ * find every occurrence of the TF bit that could be saved away even
|
|
+ * by user code)
|
|
+ *
|
|
+ * May run on IST stack.
|
|
+ */
|
|
+dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
|
|
+{
|
|
+ struct task_struct *tsk = current;
|
|
+ unsigned long condition;
|
|
+ int si_code;
|
|
+
|
|
+ get_debugreg(condition, 6);
|
|
+
|
|
+ /*
|
|
+ * The processor cleared BTF, so don't mark that we need it set.
|
|
+ */
|
|
+ clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR);
|
|
+ tsk->thread.debugctlmsr = 0;
|
|
+
|
|
+ if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
|
|
+ SIGTRAP) == NOTIFY_STOP)
|
|
+ return;
|
|
+
|
|
+ /* It's safe to allow irq's after DR6 has been saved */
|
|
+ preempt_conditional_sti(regs);
|
|
+
|
|
+ /* Mask out spurious debug traps due to lazy DR7 setting */
|
|
+ if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
|
|
+ if (!tsk->thread.debugreg7)
|
|
+ goto clear_dr7;
|
|
+ }
|
|
+
|
|
+#ifdef CONFIG_X86_32
|
|
+ if (regs->flags & X86_VM_MASK)
|
|
+ goto debug_vm86;
|
|
+#endif
|
|
+
|
|
+ /* Save debug status register where ptrace can see it */
|
|
+ tsk->thread.debugreg6 = condition;
|
|
+
|
|
+ /*
|
|
+ * Single-stepping through TF: make sure we ignore any events in
|
|
+ * kernel space (but re-enable TF when returning to user mode).
|
|
+ */
|
|
+ if (condition & DR_STEP) {
|
|
+ if (!user_mode(regs))
|
|
+ goto clear_TF_reenable;
|
|
+ }
|
|
+
|
|
+ si_code = get_si_code(condition);
|
|
+ /* Ok, finally something we can handle */
|
|
+ send_sigtrap(tsk, regs, error_code, si_code);
|
|
+
|
|
+ /*
|
|
+ * Disable additional traps. They'll be re-enabled when
|
|
+ * the signal is delivered.
|
|
+ */
|
|
+clear_dr7:
|
|
+ set_debugreg(0, 7);
|
|
+ preempt_conditional_cli(regs);
|
|
+ return;
|
|
+
|
|
+#ifdef CONFIG_X86_32
|
|
+debug_vm86:
|
|
+ handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
|
|
+ preempt_conditional_cli(regs);
|
|
+ return;
|
|
+#endif
|
|
+
|
|
+clear_TF_reenable:
|
|
+ set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
|
|
+ regs->flags &= ~X86_EFLAGS_TF;
|
|
+ preempt_conditional_cli(regs);
|
|
+ return;
|
|
+}
|
|
+
|
|
+#ifdef CONFIG_X86_64
|
|
+static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr)
|
|
+{
|
|
+ if (fixup_exception(regs))
|
|
+ return 1;
|
|
+
|
|
+ notify_die(DIE_GPF, str, regs, 0, trapnr, SIGFPE);
|
|
+ /* Illegal floating point operation in the kernel */
|
|
+ current->thread.trap_no = trapnr;
|
|
+ die(str, regs, 0);
|
|
+ return 0;
|
|
+}
|
|
+#endif
|
|
+
|
|
+/*
|
|
+ * Note that we play around with the 'TS' bit in an attempt to get
|
|
+ * the correct behaviour even in the presence of the asynchronous
|
|
+ * IRQ13 behaviour
|
|
+ */
|
|
+void math_error(void __user *ip)
|
|
+{
|
|
+ struct task_struct *task;
|
|
+ siginfo_t info;
|
|
+ unsigned short cwd, swd;
|
|
+
|
|
+ /*
|
|
+ * Save the info for the exception handler and clear the error.
|
|
+ */
|
|
+ task = current;
|
|
+ save_init_fpu(task);
|
|
+ task->thread.trap_no = 16;
|
|
+ task->thread.error_code = 0;
|
|
+ info.si_signo = SIGFPE;
|
|
+ info.si_errno = 0;
|
|
+ info.si_code = __SI_FAULT;
|
|
+ info.si_addr = ip;
|
|
+ /*
|
|
+ * (~cwd & swd) will mask out exceptions that are not set to unmasked
|
|
+ * status. 0x3f is the exception bits in these regs, 0x200 is the
|
|
+ * C1 reg you need in case of a stack fault, 0x040 is the stack
|
|
+ * fault bit. We should only be taking one exception at a time,
|
|
+ * so if this combination doesn't produce any single exception,
|
|
+ * then we have a bad program that isn't synchronizing its FPU usage
|
|
+ * and it will suffer the consequences since we won't be able to
|
|
+ * fully reproduce the context of the exception
|
|
+ */
|
|
+ cwd = get_fpu_cwd(task);
|
|
+ swd = get_fpu_swd(task);
|
|
+ switch (swd & ~cwd & 0x3f) {
|
|
+ case 0x000: /* No unmasked exception */
|
|
+#ifdef CONFIG_X86_32
|
|
+ return;
|
|
+#endif
|
|
+ default: /* Multiple exceptions */
|
|
+ break;
|
|
+ case 0x001: /* Invalid Op */
|
|
+ /*
|
|
+ * swd & 0x240 == 0x040: Stack Underflow
|
|
+ * swd & 0x240 == 0x240: Stack Overflow
|
|
+ * User must clear the SF bit (0x40) if set
|
|
+ */
|
|
+ info.si_code = FPE_FLTINV;
|
|
+ break;
|
|
+ case 0x002: /* Denormalize */
|
|
+ case 0x010: /* Underflow */
|
|
+ info.si_code = FPE_FLTUND;
|
|
+ break;
|
|
+ case 0x004: /* Zero Divide */
|
|
+ info.si_code = FPE_FLTDIV;
|
|
+ break;
|
|
+ case 0x008: /* Overflow */
|
|
+ info.si_code = FPE_FLTOVF;
|
|
+ break;
|
|
+ case 0x020: /* Precision */
|
|
+ info.si_code = FPE_FLTRES;
|
|
+ break;
|
|
+ }
|
|
+ force_sig_info(SIGFPE, &info, task);
|
|
+}
|
|
+
|
|
+dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code)
|
|
+{
|
|
+ conditional_sti(regs);
|
|
+
|
|
+#ifdef CONFIG_X86_32
|
|
+ ignore_fpu_irq = 1;
|
|
+#else
|
|
+ if (!user_mode(regs) &&
|
|
+ kernel_math_error(regs, "kernel x87 math error", 16))
|
|
+ return;
|
|
+#endif
|
|
+
|
|
+ math_error((void __user *)regs->ip);
|
|
+}
|
|
+
|
|
+static void simd_math_error(void __user *ip)
|
|
+{
|
|
+ struct task_struct *task;
|
|
+ siginfo_t info;
|
|
+ unsigned short mxcsr;
|
|
+
|
|
+ /*
|
|
+ * Save the info for the exception handler and clear the error.
|
|
+ */
|
|
+ task = current;
|
|
+ save_init_fpu(task);
|
|
+ task->thread.trap_no = 19;
|
|
+ task->thread.error_code = 0;
|
|
+ info.si_signo = SIGFPE;
|
|
+ info.si_errno = 0;
|
|
+ info.si_code = __SI_FAULT;
|
|
+ info.si_addr = ip;
|
|
+ /*
|
|
+ * The SIMD FPU exceptions are handled a little differently, as there
|
|
+ * is only a single status/control register. Thus, to determine which
|
|
+ * unmasked exception was caught we must mask the exception mask bits
|
|
+ * at 0x1f80, and then use these to mask the exception bits at 0x3f.
|
|
+ */
|
|
+ mxcsr = get_fpu_mxcsr(task);
|
|
+ switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) {
|
|
+ case 0x000:
|
|
+ default:
|
|
+ break;
|
|
+ case 0x001: /* Invalid Op */
|
|
+ info.si_code = FPE_FLTINV;
|
|
+ break;
|
|
+ case 0x002: /* Denormalize */
|
|
+ case 0x010: /* Underflow */
|
|
+ info.si_code = FPE_FLTUND;
|
|
+ break;
|
|
+ case 0x004: /* Zero Divide */
|
|
+ info.si_code = FPE_FLTDIV;
|
|
+ break;
|
|
+ case 0x008: /* Overflow */
|
|
+ info.si_code = FPE_FLTOVF;
|
|
+ break;
|
|
+ case 0x020: /* Precision */
|
|
+ info.si_code = FPE_FLTRES;
|
|
+ break;
|
|
+ }
|
|
+ force_sig_info(SIGFPE, &info, task);
|
|
+}
|
|
+
|
|
+dotraplinkage void
|
|
+do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
|
|
+{
|
|
+ conditional_sti(regs);
|
|
+
|
|
+#ifdef CONFIG_X86_32
|
|
+ if (cpu_has_xmm) {
|
|
+ /* Handle SIMD FPU exceptions on PIII+ processors. */
|
|
+ ignore_fpu_irq = 1;
|
|
+ simd_math_error((void __user *)regs->ip);
|
|
+ return;
|
|
+ }
|
|
+ /*
|
|
+ * Handle strange cache flush from user space exception
|
|
+ * in all other cases. This is undocumented behaviour.
|
|
+ */
|
|
+ if (regs->flags & X86_VM_MASK) {
|
|
+ handle_vm86_fault((struct kernel_vm86_regs *)regs, error_code);
|
|
+ return;
|
|
+ }
|
|
+ current->thread.trap_no = 19;
|
|
+ current->thread.error_code = error_code;
|
|
+ die_if_kernel("cache flush denied", regs, error_code);
|
|
+ force_sig(SIGSEGV, current);
|
|
+#else
|
|
+ if (!user_mode(regs) &&
|
|
+ kernel_math_error(regs, "kernel simd math error", 19))
|
|
+ return;
|
|
+ simd_math_error((void __user *)regs->ip);
|
|
+#endif
|
|
+}
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+dotraplinkage void
|
|
+do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
|
|
+{
|
|
+ conditional_sti(regs);
|
|
+#if 0
|
|
+ /* No need to warn about this any longer. */
|
|
+ printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n");
|
|
+#endif
|
|
+}
|
|
+
|
|
+#ifdef CONFIG_X86_32
|
|
+unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp)
|
|
+{
|
|
+ struct desc_struct *gdt = get_cpu_gdt_table(smp_processor_id());
|
|
+ unsigned long base = (kesp - uesp) & -THREAD_SIZE;
|
|
+ unsigned long new_kesp = kesp - base;
|
|
+ unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT;
|
|
+ __u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS];
|
|
+
|
|
+ /* Set up base for espfix segment */
|
|
+ desc &= 0x00f0ff0000000000ULL;
|
|
+ desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) |
|
|
+ ((((__u64)base) << 32) & 0xff00000000000000ULL) |
|
|
+ ((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) |
|
|
+ (lim_pages & 0xffff);
|
|
+ *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc;
|
|
+
|
|
+ return new_kesp;
|
|
+}
|
|
+#else
|
|
+asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
|
|
+{
|
|
+}
|
|
+
|
|
+asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void)
|
|
+{
|
|
+}
|
|
+#endif
|
|
+#endif /* CONFIG_XEN */
|
|
+
|
|
+/*
|
|
+ * 'math_state_restore()' saves the current math information in the
|
|
+ * old math state array, and gets the new ones from the current task
|
|
+ *
|
|
+ * Careful.. There are problems with IBM-designed IRQ13 behaviour.
|
|
+ * Don't touch unless you *really* know how it works.
|
|
+ *
|
|
+ * Must be called with kernel preemption disabled (in this case,
|
|
+ * local interrupts are disabled at the call-site in entry.S).
|
|
+ */
|
|
+asmlinkage void math_state_restore(void)
|
|
+{
|
|
+ struct thread_info *thread = current_thread_info();
|
|
+ struct task_struct *tsk = thread->task;
|
|
+
|
|
+ if (!tsk_used_math(tsk)) {
|
|
+ local_irq_enable();
|
|
+ /*
|
|
+ * does a slab alloc which can sleep
|
|
+ */
|
|
+ if (init_fpu(tsk)) {
|
|
+ /*
|
|
+ * ran out of memory!
|
|
+ */
|
|
+ do_group_exit(SIGKILL);
|
|
+ return;
|
|
+ }
|
|
+ local_irq_disable();
|
|
+ }
|
|
+
|
|
+ /* NB. 'clts' is done for us by Xen during virtual trap. */
|
|
+#ifdef CONFIG_X86_32
|
|
+ restore_fpu(tsk);
|
|
+#else
|
|
+ /*
|
|
+ * Paranoid restore. send a SIGSEGV if we fail to restore the state.
|
|
+ */
|
|
+ if (unlikely(restore_fpu_checking(tsk))) {
|
|
+ stts();
|
|
+ force_sig(SIGSEGV, tsk);
|
|
+ return;
|
|
+ }
|
|
+#endif
|
|
+ thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
|
|
+ tsk->fpu_counter++;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(math_state_restore);
|
|
+
|
|
+#ifndef CONFIG_MATH_EMULATION
|
|
+asmlinkage void math_emulate(long arg)
|
|
+{
|
|
+ printk(KERN_EMERG
|
|
+ "math-emulation not enabled and no coprocessor found.\n");
|
|
+ printk(KERN_EMERG "killing %s.\n", current->comm);
|
|
+ force_sig(SIGFPE, current);
|
|
+ schedule();
|
|
+}
|
|
+#endif /* CONFIG_MATH_EMULATION */
|
|
+
|
|
+dotraplinkage void __kprobes
|
|
+do_device_not_available(struct pt_regs *regs, long error)
|
|
+{
|
|
+#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
|
|
+ if (read_cr0() & X86_CR0_EM) {
|
|
+ conditional_sti(regs);
|
|
+ math_emulate(0);
|
|
+ } else {
|
|
+ math_state_restore(); /* interrupts still off */
|
|
+ conditional_sti(regs);
|
|
+ }
|
|
+#else
|
|
+ math_state_restore();
|
|
+#endif
|
|
+}
|
|
+
|
|
+#ifdef CONFIG_X86_32
|
|
+dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
|
|
+{
|
|
+ siginfo_t info;
|
|
+ local_irq_enable();
|
|
+
|
|
+ info.si_signo = SIGILL;
|
|
+ info.si_errno = 0;
|
|
+ info.si_code = ILL_BADSTK;
|
|
+ info.si_addr = 0;
|
|
+ if (notify_die(DIE_TRAP, "iret exception",
|
|
+ regs, error_code, 32, SIGILL) == NOTIFY_STOP)
|
|
+ return;
|
|
+ do_trap(32, SIGILL, "iret exception", regs, error_code, &info);
|
|
+}
|
|
+#endif
|
|
+
|
|
+/*
|
|
+ * NB. All these are "trap gates" (i.e. events_mask isn't set) except
|
|
+ * for those that specify <dpl>|4 in the second field.
|
|
+ */
|
|
+static const trap_info_t __cpuinitconst trap_table[] = {
|
|
+#ifdef CONFIG_X86_32
|
|
+#define X 0
|
|
+#else
|
|
+#define X 4
|
|
+#endif
|
|
+ { 0, 0|X, __KERNEL_CS, (unsigned long)divide_error },
|
|
+ { 1, 0|4, __KERNEL_CS, (unsigned long)debug },
|
|
+ { 3, 3|4, __KERNEL_CS, (unsigned long)int3 },
|
|
+ { 4, 3|X, __KERNEL_CS, (unsigned long)overflow },
|
|
+ { 5, 0|X, __KERNEL_CS, (unsigned long)bounds },
|
|
+ { 6, 0|X, __KERNEL_CS, (unsigned long)invalid_op },
|
|
+ { 7, 0|4, __KERNEL_CS, (unsigned long)device_not_available },
|
|
+ { 9, 0|X, __KERNEL_CS, (unsigned long)coprocessor_segment_overrun },
|
|
+ { 10, 0|X, __KERNEL_CS, (unsigned long)invalid_TSS },
|
|
+ { 11, 0|X, __KERNEL_CS, (unsigned long)segment_not_present },
|
|
+ { 12, 0|X, __KERNEL_CS, (unsigned long)stack_segment },
|
|
+ { 13, 0|X, __KERNEL_CS, (unsigned long)general_protection },
|
|
+ { 14, 0|4, __KERNEL_CS, (unsigned long)page_fault },
|
|
+ { 16, 0|X, __KERNEL_CS, (unsigned long)coprocessor_error },
|
|
+ { 17, 0|X, __KERNEL_CS, (unsigned long)alignment_check },
|
|
+#ifdef CONFIG_X86_MCE
|
|
+ { 18, 0|X, __KERNEL_CS, (unsigned long)machine_check },
|
|
+#endif
|
|
+ { 19, 0|X, __KERNEL_CS, (unsigned long)simd_coprocessor_error },
|
|
+#ifdef CONFIG_X86_32
|
|
+ { 15, 0, __KERNEL_CS, (unsigned long)fixup_4gb_segment },
|
|
+ { SYSCALL_VECTOR, 3, __KERNEL_CS, (unsigned long)system_call },
|
|
+#elif defined(CONFIG_IA32_EMULATION)
|
|
+ { IA32_SYSCALL_VECTOR, 3, __KERNEL_CS, (unsigned long)ia32_syscall },
|
|
+#endif
|
|
+ { 0, 0, 0, 0 }
|
|
+};
|
|
+
|
|
+void __init trap_init(void)
|
|
+{
|
|
+ int ret;
|
|
+
|
|
+ ret = HYPERVISOR_set_trap_table(trap_table);
|
|
+ if (ret)
|
|
+ printk("HYPERVISOR_set_trap_table failed (%d)\n", ret);
|
|
+
|
|
+#ifdef CONFIG_X86_32
|
|
+ if (cpu_has_fxsr) {
|
|
+ printk(KERN_INFO "Enabling fast FPU save and restore... ");
|
|
+ set_in_cr4(X86_CR4_OSFXSR);
|
|
+ printk("done.\n");
|
|
+ }
|
|
+ if (cpu_has_xmm) {
|
|
+ printk(KERN_INFO
|
|
+ "Enabling unmasked SIMD FPU exception support... ");
|
|
+ set_in_cr4(X86_CR4_OSXMMEXCPT);
|
|
+ printk("done.\n");
|
|
+ }
|
|
+
|
|
+#endif
|
|
+ /*
|
|
+ * Should be a barrier for any external CPU state:
|
|
+ */
|
|
+ cpu_init();
|
|
+}
|
|
+
|
|
+void __cpuinit smp_trap_init(trap_info_t *trap_ctxt)
|
|
+{
|
|
+ const trap_info_t *t = trap_table;
|
|
+
|
|
+ for (t = trap_table; t->address; t++) {
|
|
+ trap_ctxt[t->vector].flags = t->flags;
|
|
+ trap_ctxt[t->vector].cs = t->cs;
|
|
+ trap_ctxt[t->vector].address = t->address;
|
|
+ }
|
|
+}
|
|
--- head-2011-03-17.orig/arch/x86/kernel/traps_32-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
+++ /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
@@ -1,1222 +0,0 @@
|
|
-/*
|
|
- * Copyright (C) 1991, 1992 Linus Torvalds
|
|
- * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
|
|
- *
|
|
- * Pentium III FXSR, SSE support
|
|
- * Gareth Hughes <gareth@valinux.com>, May 2000
|
|
- */
|
|
-
|
|
-/*
|
|
- * 'Traps.c' handles hardware traps and faults after we have saved some
|
|
- * state in 'asm.s'.
|
|
- */
|
|
-#include <linux/interrupt.h>
|
|
-#include <linux/kallsyms.h>
|
|
-#include <linux/spinlock.h>
|
|
-#include <linux/highmem.h>
|
|
-#include <linux/kprobes.h>
|
|
-#include <linux/uaccess.h>
|
|
-#include <linux/utsname.h>
|
|
-#include <linux/kdebug.h>
|
|
-#include <linux/kernel.h>
|
|
-#include <linux/module.h>
|
|
-#include <linux/ptrace.h>
|
|
-#include <linux/string.h>
|
|
-#include <linux/unwind.h>
|
|
-#include <linux/delay.h>
|
|
-#include <linux/errno.h>
|
|
-#include <linux/kexec.h>
|
|
-#include <linux/sched.h>
|
|
-#include <linux/timer.h>
|
|
-#include <linux/init.h>
|
|
-#include <linux/bug.h>
|
|
-#include <linux/nmi.h>
|
|
-#include <linux/mm.h>
|
|
-
|
|
-#ifdef CONFIG_EISA
|
|
-#include <linux/ioport.h>
|
|
-#include <linux/eisa.h>
|
|
-#endif
|
|
-
|
|
-#ifdef CONFIG_MCA
|
|
-#include <linux/mca.h>
|
|
-#endif
|
|
-
|
|
-#if defined(CONFIG_EDAC)
|
|
-#include <linux/edac.h>
|
|
-#endif
|
|
-
|
|
-#include <asm/arch_hooks.h>
|
|
-#include <asm/stacktrace.h>
|
|
-#include <asm/processor.h>
|
|
-#include <asm/debugreg.h>
|
|
-#include <asm/atomic.h>
|
|
-#include <asm/system.h>
|
|
-#include <asm/unwind.h>
|
|
-#include <asm/desc.h>
|
|
-#include <asm/i387.h>
|
|
-#include <asm/nmi.h>
|
|
-#include <asm/smp.h>
|
|
-#include <asm/io.h>
|
|
-#include <asm/traps.h>
|
|
-
|
|
-#include "mach_traps.h"
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
-DECLARE_BITMAP(used_vectors, NR_VECTORS);
|
|
-EXPORT_SYMBOL_GPL(used_vectors);
|
|
-#endif
|
|
-
|
|
-asmlinkage int system_call(void);
|
|
-
|
|
-/* Do we ignore FPU interrupts ? */
|
|
-char ignore_fpu_irq;
|
|
-
|
|
-#ifndef CONFIG_X86_NO_IDT
|
|
-/*
|
|
- * The IDT has to be page-aligned to simplify the Pentium
|
|
- * F0 0F bug workaround.. We have a special link segment
|
|
- * for this.
|
|
- */
|
|
-gate_desc idt_table[256]
|
|
- __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, };
|
|
-#endif
|
|
-
|
|
-int panic_on_unrecovered_nmi;
|
|
-int kstack_depth_to_print = 24;
|
|
-static unsigned int code_bytes = 64;
|
|
-static int ignore_nmis;
|
|
-static int die_counter;
|
|
-
|
|
-void printk_address(unsigned long address, int reliable)
|
|
-{
|
|
-#ifdef CONFIG_KALLSYMS
|
|
- unsigned long offset = 0;
|
|
- unsigned long symsize;
|
|
- const char *symname;
|
|
- char *modname;
|
|
- char *delim = ":";
|
|
- char namebuf[KSYM_NAME_LEN];
|
|
- char reliab[4] = "";
|
|
-
|
|
- symname = kallsyms_lookup(address, &symsize, &offset,
|
|
- &modname, namebuf);
|
|
- if (!symname) {
|
|
- printk(" [<%08lx>]\n", address);
|
|
- return;
|
|
- }
|
|
- if (!reliable)
|
|
- strcpy(reliab, "? ");
|
|
-
|
|
- if (!modname)
|
|
- modname = delim = "";
|
|
- printk(" [<%08lx>] %s%s%s%s%s+0x%lx/0x%lx\n",
|
|
- address, reliab, delim, modname, delim, symname, offset, symsize);
|
|
-#else
|
|
- printk(" [<%08lx>]\n", address);
|
|
-#endif
|
|
-}
|
|
-
|
|
-static inline int valid_stack_ptr(struct thread_info *tinfo,
|
|
- void *p, unsigned int size)
|
|
-{
|
|
- void *t = tinfo;
|
|
- return p > t && p <= t + THREAD_SIZE - size;
|
|
-}
|
|
-
|
|
-/* The form of the top of the frame on the stack */
|
|
-struct stack_frame {
|
|
- struct stack_frame *next_frame;
|
|
- unsigned long return_address;
|
|
-};
|
|
-
|
|
-static inline unsigned long
|
|
-print_context_stack(struct thread_info *tinfo,
|
|
- unsigned long *stack, unsigned long bp,
|
|
- const struct stacktrace_ops *ops, void *data)
|
|
-{
|
|
- struct stack_frame *frame = (struct stack_frame *)bp;
|
|
-
|
|
- while (valid_stack_ptr(tinfo, stack, sizeof(*stack))) {
|
|
- unsigned long addr;
|
|
-
|
|
- addr = *stack;
|
|
- if (__kernel_text_address(addr)) {
|
|
- if ((unsigned long) stack == bp + 4) {
|
|
- ops->address(data, addr, 1);
|
|
- frame = frame->next_frame;
|
|
- bp = (unsigned long) frame;
|
|
- } else {
|
|
- ops->address(data, addr, bp == 0);
|
|
- }
|
|
- }
|
|
- stack++;
|
|
- }
|
|
- return bp;
|
|
-}
|
|
-
|
|
-void dump_trace(struct task_struct *task, struct pt_regs *regs,
|
|
- unsigned long *stack, unsigned long bp,
|
|
- const struct stacktrace_ops *ops, void *data)
|
|
-{
|
|
- if (!task)
|
|
- task = current;
|
|
-
|
|
- if (!stack) {
|
|
- unsigned long dummy;
|
|
- stack = &dummy;
|
|
- if (task != current)
|
|
- stack = (unsigned long *)task->thread.sp;
|
|
- }
|
|
-
|
|
-#ifdef CONFIG_FRAME_POINTER
|
|
- if (!bp) {
|
|
- if (task == current) {
|
|
- /* Grab bp right from our regs */
|
|
- asm("movl %%ebp, %0" : "=r" (bp) :);
|
|
- } else {
|
|
- /* bp is the last reg pushed by switch_to */
|
|
- bp = *(unsigned long *) task->thread.sp;
|
|
- }
|
|
- }
|
|
-#endif
|
|
-
|
|
- for (;;) {
|
|
- struct thread_info *context;
|
|
-
|
|
- context = (struct thread_info *)
|
|
- ((unsigned long)stack & (~(THREAD_SIZE - 1)));
|
|
- bp = print_context_stack(context, stack, bp, ops, data);
|
|
- /*
|
|
- * Should be after the line below, but somewhere
|
|
- * in early boot context comes out corrupted and we
|
|
- * can't reference it:
|
|
- */
|
|
- if (ops->stack(data, "IRQ") < 0)
|
|
- break;
|
|
- stack = (unsigned long *)context->previous_esp;
|
|
- if (!stack)
|
|
- break;
|
|
- touch_nmi_watchdog();
|
|
- }
|
|
-}
|
|
-EXPORT_SYMBOL(dump_trace);
|
|
-
|
|
-static void
|
|
-print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
|
|
-{
|
|
- printk(data);
|
|
- print_symbol(msg, symbol);
|
|
- printk("\n");
|
|
-}
|
|
-
|
|
-static void print_trace_warning(void *data, char *msg)
|
|
-{
|
|
- printk("%s%s\n", (char *)data, msg);
|
|
-}
|
|
-
|
|
-static int print_trace_stack(void *data, char *name)
|
|
-{
|
|
- return 0;
|
|
-}
|
|
-
|
|
-/*
|
|
- * Print one address/symbol entries per line.
|
|
- */
|
|
-static void print_trace_address(void *data, unsigned long addr, int reliable)
|
|
-{
|
|
- printk("%s [<%08lx>] ", (char *)data, addr);
|
|
- if (!reliable)
|
|
- printk("? ");
|
|
- print_symbol("%s\n", addr);
|
|
- touch_nmi_watchdog();
|
|
-}
|
|
-
|
|
-static const struct stacktrace_ops print_trace_ops = {
|
|
- .warning = print_trace_warning,
|
|
- .warning_symbol = print_trace_warning_symbol,
|
|
- .stack = print_trace_stack,
|
|
- .address = print_trace_address,
|
|
-};
|
|
-
|
|
-static void
|
|
-show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
|
|
- unsigned long *stack, unsigned long bp, char *log_lvl)
|
|
-{
|
|
- dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
|
|
- printk("%s =======================\n", log_lvl);
|
|
-}
|
|
-
|
|
-void show_trace(struct task_struct *task, struct pt_regs *regs,
|
|
- unsigned long *stack, unsigned long bp)
|
|
-{
|
|
- show_trace_log_lvl(task, regs, stack, bp, "");
|
|
-}
|
|
-
|
|
-static void
|
|
-show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
|
|
- unsigned long *sp, unsigned long bp, char *log_lvl)
|
|
-{
|
|
- unsigned long *stack;
|
|
- int i;
|
|
-
|
|
- if (sp == NULL) {
|
|
- if (task)
|
|
- sp = (unsigned long *)task->thread.sp;
|
|
- else
|
|
- sp = (unsigned long *)&sp;
|
|
- }
|
|
-
|
|
- stack = sp;
|
|
- for (i = 0; i < kstack_depth_to_print; i++) {
|
|
- if (kstack_end(stack))
|
|
- break;
|
|
- if (i && ((i % 8) == 0))
|
|
- printk("\n%s ", log_lvl);
|
|
- printk("%08lx ", *stack++);
|
|
- }
|
|
- printk("\n%sCall Trace:\n", log_lvl);
|
|
-
|
|
- show_trace_log_lvl(task, regs, sp, bp, log_lvl);
|
|
-}
|
|
-
|
|
-void show_stack(struct task_struct *task, unsigned long *sp)
|
|
-{
|
|
- printk(" ");
|
|
- show_stack_log_lvl(task, NULL, sp, 0, "");
|
|
-}
|
|
-
|
|
-/*
|
|
- * The architecture-independent dump_stack generator
|
|
- */
|
|
-void dump_stack(void)
|
|
-{
|
|
- unsigned long bp = 0;
|
|
- unsigned long stack;
|
|
-
|
|
-#ifdef CONFIG_FRAME_POINTER
|
|
- if (!bp)
|
|
- asm("movl %%ebp, %0" : "=r" (bp):);
|
|
-#endif
|
|
-
|
|
- printk("Pid: %d, comm: %.20s %s %s %.*s\n",
|
|
- current->pid, current->comm, print_tainted(),
|
|
- init_utsname()->release,
|
|
- (int)strcspn(init_utsname()->version, " "),
|
|
- init_utsname()->version);
|
|
-
|
|
- show_trace(current, NULL, &stack, bp);
|
|
-}
|
|
-
|
|
-EXPORT_SYMBOL(dump_stack);
|
|
-
|
|
-void show_registers(struct pt_regs *regs)
|
|
-{
|
|
- int i;
|
|
-
|
|
- print_modules();
|
|
- __show_registers(regs, 0);
|
|
-
|
|
- printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)",
|
|
- TASK_COMM_LEN, current->comm, task_pid_nr(current),
|
|
- current_thread_info(), current, task_thread_info(current));
|
|
- /*
|
|
- * When in-kernel, we also print out the stack and code at the
|
|
- * time of the fault..
|
|
- */
|
|
- if (!user_mode_vm(regs)) {
|
|
- unsigned int code_prologue = code_bytes * 43 / 64;
|
|
- unsigned int code_len = code_bytes;
|
|
- unsigned char c;
|
|
- u8 *ip;
|
|
-
|
|
- printk("\n" KERN_EMERG "Stack: ");
|
|
- show_stack_log_lvl(NULL, regs, ®s->sp, 0, KERN_EMERG);
|
|
-
|
|
- printk(KERN_EMERG "Code: ");
|
|
-
|
|
- ip = (u8 *)regs->ip - code_prologue;
|
|
- if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
|
|
- /* try starting at EIP */
|
|
- ip = (u8 *)regs->ip;
|
|
- code_len = code_len - code_prologue + 1;
|
|
- }
|
|
- for (i = 0; i < code_len; i++, ip++) {
|
|
- if (ip < (u8 *)PAGE_OFFSET ||
|
|
- probe_kernel_address(ip, c)) {
|
|
- printk(" Bad EIP value.");
|
|
- break;
|
|
- }
|
|
- if (ip == (u8 *)regs->ip)
|
|
- printk("<%02x> ", c);
|
|
- else
|
|
- printk("%02x ", c);
|
|
- }
|
|
- }
|
|
- printk("\n");
|
|
-}
|
|
-
|
|
-int is_valid_bugaddr(unsigned long ip)
|
|
-{
|
|
- unsigned short ud2;
|
|
-
|
|
- if (ip < PAGE_OFFSET)
|
|
- return 0;
|
|
- if (probe_kernel_address((unsigned short *)ip, ud2))
|
|
- return 0;
|
|
-
|
|
- return ud2 == 0x0b0f;
|
|
-}
|
|
-
|
|
-static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
|
|
-static int die_owner = -1;
|
|
-static unsigned int die_nest_count;
|
|
-
|
|
-unsigned __kprobes long oops_begin(void)
|
|
-{
|
|
- unsigned long flags;
|
|
-
|
|
- oops_enter();
|
|
-
|
|
- if (die_owner != raw_smp_processor_id()) {
|
|
- console_verbose();
|
|
- raw_local_irq_save(flags);
|
|
- __raw_spin_lock(&die_lock);
|
|
- die_owner = smp_processor_id();
|
|
- die_nest_count = 0;
|
|
- bust_spinlocks(1);
|
|
- } else {
|
|
- raw_local_irq_save(flags);
|
|
- }
|
|
- die_nest_count++;
|
|
- return flags;
|
|
-}
|
|
-
|
|
-void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
|
|
-{
|
|
- bust_spinlocks(0);
|
|
- die_owner = -1;
|
|
- add_taint(TAINT_DIE);
|
|
- __raw_spin_unlock(&die_lock);
|
|
- raw_local_irq_restore(flags);
|
|
-
|
|
- if (!regs)
|
|
- return;
|
|
-
|
|
- if (kexec_should_crash(current))
|
|
- crash_kexec(regs);
|
|
-
|
|
- if (in_interrupt())
|
|
- panic("Fatal exception in interrupt");
|
|
-
|
|
- if (panic_on_oops)
|
|
- panic("Fatal exception");
|
|
-
|
|
- oops_exit();
|
|
- do_exit(signr);
|
|
-}
|
|
-
|
|
-int __kprobes __die(const char *str, struct pt_regs *regs, long err)
|
|
-{
|
|
- unsigned short ss;
|
|
- unsigned long sp;
|
|
-
|
|
- printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter);
|
|
-#ifdef CONFIG_PREEMPT
|
|
- printk("PREEMPT ");
|
|
-#endif
|
|
-#ifdef CONFIG_SMP
|
|
- printk("SMP ");
|
|
-#endif
|
|
-#ifdef CONFIG_DEBUG_PAGEALLOC
|
|
- printk("DEBUG_PAGEALLOC");
|
|
-#endif
|
|
- printk("\n");
|
|
- if (notify_die(DIE_OOPS, str, regs, err,
|
|
- current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
|
|
- return 1;
|
|
-
|
|
- show_registers(regs);
|
|
- /* Executive summary in case the oops scrolled away */
|
|
- sp = (unsigned long) (®s->sp);
|
|
- savesegment(ss, ss);
|
|
- if (user_mode(regs)) {
|
|
- sp = regs->sp;
|
|
- ss = regs->ss & 0xffff;
|
|
- }
|
|
- printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
|
|
- print_symbol("%s", regs->ip);
|
|
- printk(" SS:ESP %04x:%08lx\n", ss, sp);
|
|
- return 0;
|
|
-}
|
|
-
|
|
-/*
|
|
- * This is gone through when something in the kernel has done something bad
|
|
- * and is about to be terminated:
|
|
- */
|
|
-void die(const char *str, struct pt_regs *regs, long err)
|
|
-{
|
|
- unsigned long flags = oops_begin();
|
|
-
|
|
- if (die_nest_count < 3) {
|
|
- report_bug(regs->ip, regs);
|
|
-
|
|
- if (__die(str, regs, err))
|
|
- regs = NULL;
|
|
- } else {
|
|
- printk(KERN_EMERG "Recursive die() failure, output suppressed\n");
|
|
- }
|
|
-
|
|
- oops_end(flags, regs, SIGSEGV);
|
|
-}
|
|
-
|
|
-static inline void
|
|
-die_if_kernel(const char *str, struct pt_regs *regs, long err)
|
|
-{
|
|
- if (!user_mode_vm(regs))
|
|
- die(str, regs, err);
|
|
-}
|
|
-
|
|
-static void __kprobes
|
|
-do_trap(int trapnr, int signr, char *str, int vm86, struct pt_regs *regs,
|
|
- long error_code, siginfo_t *info)
|
|
-{
|
|
- struct task_struct *tsk = current;
|
|
-
|
|
- if (regs->flags & X86_VM_MASK) {
|
|
- if (vm86)
|
|
- goto vm86_trap;
|
|
- goto trap_signal;
|
|
- }
|
|
-
|
|
- if (!user_mode(regs))
|
|
- goto kernel_trap;
|
|
-
|
|
-trap_signal:
|
|
- /*
|
|
- * We want error_code and trap_no set for userspace faults and
|
|
- * kernelspace faults which result in die(), but not
|
|
- * kernelspace faults which are fixed up. die() gives the
|
|
- * process no chance to handle the signal and notice the
|
|
- * kernel fault information, so that won't result in polluting
|
|
- * the information about previously queued, but not yet
|
|
- * delivered, faults. See also do_general_protection below.
|
|
- */
|
|
- tsk->thread.error_code = error_code;
|
|
- tsk->thread.trap_no = trapnr;
|
|
-
|
|
- if (info)
|
|
- force_sig_info(signr, info, tsk);
|
|
- else
|
|
- force_sig(signr, tsk);
|
|
- return;
|
|
-
|
|
-kernel_trap:
|
|
- if (!fixup_exception(regs)) {
|
|
- tsk->thread.error_code = error_code;
|
|
- tsk->thread.trap_no = trapnr;
|
|
- die(str, regs, error_code);
|
|
- }
|
|
- return;
|
|
-
|
|
-vm86_trap:
|
|
- if (handle_vm86_trap((struct kernel_vm86_regs *) regs,
|
|
- error_code, trapnr))
|
|
- goto trap_signal;
|
|
- return;
|
|
-}
|
|
-
|
|
-#define DO_ERROR(trapnr, signr, str, name) \
|
|
-void do_##name(struct pt_regs *regs, long error_code) \
|
|
-{ \
|
|
- trace_hardirqs_fixup(); \
|
|
- if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
|
|
- == NOTIFY_STOP) \
|
|
- return; \
|
|
- do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \
|
|
-}
|
|
-
|
|
-#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr, irq) \
|
|
-void do_##name(struct pt_regs *regs, long error_code) \
|
|
-{ \
|
|
- siginfo_t info; \
|
|
- if (irq) \
|
|
- local_irq_enable(); \
|
|
- info.si_signo = signr; \
|
|
- info.si_errno = 0; \
|
|
- info.si_code = sicode; \
|
|
- info.si_addr = (void __user *)siaddr; \
|
|
- if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
|
|
- == NOTIFY_STOP) \
|
|
- return; \
|
|
- do_trap(trapnr, signr, str, 0, regs, error_code, &info); \
|
|
-}
|
|
-
|
|
-#define DO_VM86_ERROR(trapnr, signr, str, name) \
|
|
-void do_##name(struct pt_regs *regs, long error_code) \
|
|
-{ \
|
|
- if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
|
|
- == NOTIFY_STOP) \
|
|
- return; \
|
|
- do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \
|
|
-}
|
|
-
|
|
-#define DO_VM86_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
|
|
-void do_##name(struct pt_regs *regs, long error_code) \
|
|
-{ \
|
|
- siginfo_t info; \
|
|
- info.si_signo = signr; \
|
|
- info.si_errno = 0; \
|
|
- info.si_code = sicode; \
|
|
- info.si_addr = (void __user *)siaddr; \
|
|
- trace_hardirqs_fixup(); \
|
|
- if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
|
|
- == NOTIFY_STOP) \
|
|
- return; \
|
|
- do_trap(trapnr, signr, str, 1, regs, error_code, &info); \
|
|
-}
|
|
-
|
|
-DO_VM86_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip)
|
|
-#ifndef CONFIG_KPROBES
|
|
-DO_VM86_ERROR(3, SIGTRAP, "int3", int3)
|
|
-#endif
|
|
-DO_VM86_ERROR(4, SIGSEGV, "overflow", overflow)
|
|
-DO_VM86_ERROR(5, SIGSEGV, "bounds", bounds)
|
|
-DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0)
|
|
-DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
|
|
-DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
|
|
-DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
|
|
-DO_ERROR(12, SIGBUS, "stack segment", stack_segment)
|
|
-DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0)
|
|
-DO_ERROR_INFO(32, SIGILL, "iret exception", iret_error, ILL_BADSTK, 0, 1)
|
|
-
|
|
-void __kprobes
|
|
-do_general_protection(struct pt_regs *regs, long error_code)
|
|
-{
|
|
- struct task_struct *tsk;
|
|
- struct thread_struct *thread;
|
|
-
|
|
- thread = ¤t->thread;
|
|
-
|
|
- if (regs->flags & X86_VM_MASK)
|
|
- goto gp_in_vm86;
|
|
-
|
|
- tsk = current;
|
|
- if (!user_mode(regs))
|
|
- goto gp_in_kernel;
|
|
-
|
|
- tsk->thread.error_code = error_code;
|
|
- tsk->thread.trap_no = 13;
|
|
-
|
|
- if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
|
|
- printk_ratelimit()) {
|
|
- printk(KERN_INFO
|
|
- "%s[%d] general protection ip:%lx sp:%lx error:%lx",
|
|
- tsk->comm, task_pid_nr(tsk),
|
|
- regs->ip, regs->sp, error_code);
|
|
- print_vma_addr(" in ", regs->ip);
|
|
- printk("\n");
|
|
- }
|
|
-
|
|
- force_sig(SIGSEGV, tsk);
|
|
- return;
|
|
-
|
|
-gp_in_vm86:
|
|
- local_irq_enable();
|
|
- handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
|
|
- return;
|
|
-
|
|
-gp_in_kernel:
|
|
- if (fixup_exception(regs))
|
|
- return;
|
|
-
|
|
- tsk->thread.error_code = error_code;
|
|
- tsk->thread.trap_no = 13;
|
|
- if (notify_die(DIE_GPF, "general protection fault", regs,
|
|
- error_code, 13, SIGSEGV) == NOTIFY_STOP)
|
|
- return;
|
|
- die("general protection fault", regs, error_code);
|
|
-}
|
|
-
|
|
-static notrace __kprobes void
|
|
-mem_parity_error(unsigned char reason, struct pt_regs *regs)
|
|
-{
|
|
- printk(KERN_EMERG
|
|
- "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
|
|
- reason, smp_processor_id());
|
|
-
|
|
- printk(KERN_EMERG
|
|
- "You have some hardware problem, likely on the PCI bus.\n");
|
|
-
|
|
-#if defined(CONFIG_EDAC)
|
|
- if (edac_handler_set()) {
|
|
- edac_atomic_assert_error();
|
|
- return;
|
|
- }
|
|
-#endif
|
|
-
|
|
- if (panic_on_unrecovered_nmi)
|
|
- panic("NMI: Not continuing");
|
|
-
|
|
- printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
|
|
-
|
|
- /* Clear and disable the memory parity error line. */
|
|
- clear_mem_error(reason);
|
|
-}
|
|
-
|
|
-static notrace __kprobes void
|
|
-io_check_error(unsigned char reason, struct pt_regs *regs)
|
|
-{
|
|
- printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n");
|
|
- show_registers(regs);
|
|
-
|
|
- /* Re-enable the IOCK line, wait for a few seconds */
|
|
- clear_io_check_error(reason);
|
|
-}
|
|
-
|
|
-static notrace __kprobes void
|
|
-unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
|
|
-{
|
|
- if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
|
|
- return;
|
|
-#ifdef CONFIG_MCA
|
|
- /*
|
|
- * Might actually be able to figure out what the guilty party
|
|
- * is:
|
|
- */
|
|
- if (MCA_bus) {
|
|
- mca_handle_nmi();
|
|
- return;
|
|
- }
|
|
-#endif
|
|
- printk(KERN_EMERG
|
|
- "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
|
|
- reason, smp_processor_id());
|
|
-
|
|
- printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
|
|
- if (panic_on_unrecovered_nmi)
|
|
- panic("NMI: Not continuing");
|
|
-
|
|
- printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
|
|
-}
|
|
-
|
|
-static DEFINE_SPINLOCK(nmi_print_lock);
|
|
-
|
|
-void notrace __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic)
|
|
-{
|
|
- if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
|
|
- return;
|
|
-
|
|
- spin_lock(&nmi_print_lock);
|
|
- /*
|
|
- * We are in trouble anyway, lets at least try
|
|
- * to get a message out:
|
|
- */
|
|
- bust_spinlocks(1);
|
|
- printk(KERN_EMERG "%s", str);
|
|
- printk(" on CPU%d, ip %08lx, registers:\n",
|
|
- smp_processor_id(), regs->ip);
|
|
- show_registers(regs);
|
|
- if (do_panic)
|
|
- panic("Non maskable interrupt");
|
|
- console_silent();
|
|
- spin_unlock(&nmi_print_lock);
|
|
- bust_spinlocks(0);
|
|
-
|
|
- /*
|
|
- * If we are in kernel we are probably nested up pretty bad
|
|
- * and might aswell get out now while we still can:
|
|
- */
|
|
- if (!user_mode_vm(regs)) {
|
|
- current->thread.trap_no = 2;
|
|
- crash_kexec(regs);
|
|
- }
|
|
-
|
|
- do_exit(SIGSEGV);
|
|
-}
|
|
-
|
|
-static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
|
|
-{
|
|
- unsigned char reason = 0;
|
|
- int cpu;
|
|
-
|
|
- cpu = smp_processor_id();
|
|
-
|
|
- /* Only the BSP gets external NMIs from the system. */
|
|
- if (!cpu)
|
|
- reason = get_nmi_reason();
|
|
-
|
|
- if (!(reason & 0xc0)) {
|
|
- if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
|
|
- == NOTIFY_STOP)
|
|
- return;
|
|
-#ifdef CONFIG_X86_LOCAL_APIC
|
|
- /*
|
|
- * Ok, so this is none of the documented NMI sources,
|
|
- * so it must be the NMI watchdog.
|
|
- */
|
|
- if (nmi_watchdog_tick(regs, reason))
|
|
- return;
|
|
- if (!do_nmi_callback(regs, cpu))
|
|
- unknown_nmi_error(reason, regs);
|
|
-#else
|
|
- unknown_nmi_error(reason, regs);
|
|
-#endif
|
|
-
|
|
- return;
|
|
- }
|
|
- if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
|
|
- return;
|
|
-
|
|
- /* AK: following checks seem to be broken on modern chipsets. FIXME */
|
|
- if (reason & 0x80)
|
|
- mem_parity_error(reason, regs);
|
|
- if (reason & 0x40)
|
|
- io_check_error(reason, regs);
|
|
- /*
|
|
- * Reassert NMI in case it became active meanwhile
|
|
- * as it's edge-triggered:
|
|
- */
|
|
- reassert_nmi();
|
|
-}
|
|
-
|
|
-notrace __kprobes void do_nmi(struct pt_regs *regs, long error_code)
|
|
-{
|
|
- int cpu;
|
|
-
|
|
- nmi_enter();
|
|
-
|
|
- cpu = smp_processor_id();
|
|
-
|
|
- ++nmi_count(cpu);
|
|
-
|
|
- if (!ignore_nmis)
|
|
- default_do_nmi(regs);
|
|
-
|
|
- nmi_exit();
|
|
-}
|
|
-
|
|
-void stop_nmi(void)
|
|
-{
|
|
- acpi_nmi_disable();
|
|
- ignore_nmis++;
|
|
-}
|
|
-
|
|
-void restart_nmi(void)
|
|
-{
|
|
- ignore_nmis--;
|
|
- acpi_nmi_enable();
|
|
-}
|
|
-
|
|
-#ifdef CONFIG_KPROBES
|
|
-void __kprobes do_int3(struct pt_regs *regs, long error_code)
|
|
-{
|
|
- trace_hardirqs_fixup();
|
|
-
|
|
- if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
|
|
- == NOTIFY_STOP)
|
|
- return;
|
|
- /*
|
|
- * This is an interrupt gate, because kprobes wants interrupts
|
|
- * disabled. Normal trap handlers don't.
|
|
- */
|
|
- restore_interrupts(regs);
|
|
-
|
|
- do_trap(3, SIGTRAP, "int3", 1, regs, error_code, NULL);
|
|
-}
|
|
-#endif
|
|
-
|
|
-/*
|
|
- * Our handling of the processor debug registers is non-trivial.
|
|
- * We do not clear them on entry and exit from the kernel. Therefore
|
|
- * it is possible to get a watchpoint trap here from inside the kernel.
|
|
- * However, the code in ./ptrace.c has ensured that the user can
|
|
- * only set watchpoints on userspace addresses. Therefore the in-kernel
|
|
- * watchpoint trap can only occur in code which is reading/writing
|
|
- * from user space. Such code must not hold kernel locks (since it
|
|
- * can equally take a page fault), therefore it is safe to call
|
|
- * force_sig_info even though that claims and releases locks.
|
|
- *
|
|
- * Code in ./signal.c ensures that the debug control register
|
|
- * is restored before we deliver any signal, and therefore that
|
|
- * user code runs with the correct debug control register even though
|
|
- * we clear it here.
|
|
- *
|
|
- * Being careful here means that we don't have to be as careful in a
|
|
- * lot of more complicated places (task switching can be a bit lazy
|
|
- * about restoring all the debug state, and ptrace doesn't have to
|
|
- * find every occurrence of the TF bit that could be saved away even
|
|
- * by user code)
|
|
- */
|
|
-void __kprobes do_debug(struct pt_regs *regs, long error_code)
|
|
-{
|
|
- struct task_struct *tsk = current;
|
|
- unsigned int condition;
|
|
-
|
|
- trace_hardirqs_fixup();
|
|
-
|
|
- get_debugreg(condition, 6);
|
|
-
|
|
- /*
|
|
- * The processor cleared BTF, so don't mark that we need it set.
|
|
- */
|
|
- clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR);
|
|
- tsk->thread.debugctlmsr = 0;
|
|
-
|
|
- if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
|
|
- SIGTRAP) == NOTIFY_STOP)
|
|
- return;
|
|
- /* It's safe to allow irq's after DR6 has been saved */
|
|
- if (regs->flags & X86_EFLAGS_IF)
|
|
- local_irq_enable();
|
|
-
|
|
- /* Mask out spurious debug traps due to lazy DR7 setting */
|
|
- if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
|
|
- if (!tsk->thread.debugreg7)
|
|
- goto clear_dr7;
|
|
- }
|
|
-
|
|
- if (regs->flags & X86_VM_MASK)
|
|
- goto debug_vm86;
|
|
-
|
|
- /* Save debug status register where ptrace can see it */
|
|
- tsk->thread.debugreg6 = condition;
|
|
-
|
|
- /*
|
|
- * Single-stepping through TF: make sure we ignore any events in
|
|
- * kernel space (but re-enable TF when returning to user mode).
|
|
- */
|
|
- if (condition & DR_STEP) {
|
|
- /*
|
|
- * We already checked v86 mode above, so we can
|
|
- * check for kernel mode by just checking the CPL
|
|
- * of CS.
|
|
- */
|
|
- if (!user_mode(regs))
|
|
- goto clear_TF_reenable;
|
|
- }
|
|
-
|
|
- /* Ok, finally something we can handle */
|
|
- send_sigtrap(tsk, regs, error_code);
|
|
-
|
|
- /*
|
|
- * Disable additional traps. They'll be re-enabled when
|
|
- * the signal is delivered.
|
|
- */
|
|
-clear_dr7:
|
|
- set_debugreg(0, 7);
|
|
- return;
|
|
-
|
|
-debug_vm86:
|
|
- handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
|
|
- return;
|
|
-
|
|
-clear_TF_reenable:
|
|
- set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
|
|
- regs->flags &= ~X86_EFLAGS_TF;
|
|
- return;
|
|
-}
|
|
-
|
|
-/*
|
|
- * Note that we play around with the 'TS' bit in an attempt to get
|
|
- * the correct behaviour even in the presence of the asynchronous
|
|
- * IRQ13 behaviour
|
|
- */
|
|
-void math_error(void __user *ip)
|
|
-{
|
|
- struct task_struct *task;
|
|
- siginfo_t info;
|
|
- unsigned short cwd, swd;
|
|
-
|
|
- /*
|
|
- * Save the info for the exception handler and clear the error.
|
|
- */
|
|
- task = current;
|
|
- save_init_fpu(task);
|
|
- task->thread.trap_no = 16;
|
|
- task->thread.error_code = 0;
|
|
- info.si_signo = SIGFPE;
|
|
- info.si_errno = 0;
|
|
- info.si_code = __SI_FAULT;
|
|
- info.si_addr = ip;
|
|
- /*
|
|
- * (~cwd & swd) will mask out exceptions that are not set to unmasked
|
|
- * status. 0x3f is the exception bits in these regs, 0x200 is the
|
|
- * C1 reg you need in case of a stack fault, 0x040 is the stack
|
|
- * fault bit. We should only be taking one exception at a time,
|
|
- * so if this combination doesn't produce any single exception,
|
|
- * then we have a bad program that isn't synchronizing its FPU usage
|
|
- * and it will suffer the consequences since we won't be able to
|
|
- * fully reproduce the context of the exception
|
|
- */
|
|
- cwd = get_fpu_cwd(task);
|
|
- swd = get_fpu_swd(task);
|
|
- switch (swd & ~cwd & 0x3f) {
|
|
- case 0x000: /* No unmasked exception */
|
|
- return;
|
|
- default: /* Multiple exceptions */
|
|
- break;
|
|
- case 0x001: /* Invalid Op */
|
|
- /*
|
|
- * swd & 0x240 == 0x040: Stack Underflow
|
|
- * swd & 0x240 == 0x240: Stack Overflow
|
|
- * User must clear the SF bit (0x40) if set
|
|
- */
|
|
- info.si_code = FPE_FLTINV;
|
|
- break;
|
|
- case 0x002: /* Denormalize */
|
|
- case 0x010: /* Underflow */
|
|
- info.si_code = FPE_FLTUND;
|
|
- break;
|
|
- case 0x004: /* Zero Divide */
|
|
- info.si_code = FPE_FLTDIV;
|
|
- break;
|
|
- case 0x008: /* Overflow */
|
|
- info.si_code = FPE_FLTOVF;
|
|
- break;
|
|
- case 0x020: /* Precision */
|
|
- info.si_code = FPE_FLTRES;
|
|
- break;
|
|
- }
|
|
- force_sig_info(SIGFPE, &info, task);
|
|
-}
|
|
-
|
|
-void do_coprocessor_error(struct pt_regs *regs, long error_code)
|
|
-{
|
|
- ignore_fpu_irq = 1;
|
|
- math_error((void __user *)regs->ip);
|
|
-}
|
|
-
|
|
-static void simd_math_error(void __user *ip)
|
|
-{
|
|
- struct task_struct *task;
|
|
- siginfo_t info;
|
|
- unsigned short mxcsr;
|
|
-
|
|
- /*
|
|
- * Save the info for the exception handler and clear the error.
|
|
- */
|
|
- task = current;
|
|
- save_init_fpu(task);
|
|
- task->thread.trap_no = 19;
|
|
- task->thread.error_code = 0;
|
|
- info.si_signo = SIGFPE;
|
|
- info.si_errno = 0;
|
|
- info.si_code = __SI_FAULT;
|
|
- info.si_addr = ip;
|
|
- /*
|
|
- * The SIMD FPU exceptions are handled a little differently, as there
|
|
- * is only a single status/control register. Thus, to determine which
|
|
- * unmasked exception was caught we must mask the exception mask bits
|
|
- * at 0x1f80, and then use these to mask the exception bits at 0x3f.
|
|
- */
|
|
- mxcsr = get_fpu_mxcsr(task);
|
|
- switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) {
|
|
- case 0x000:
|
|
- default:
|
|
- break;
|
|
- case 0x001: /* Invalid Op */
|
|
- info.si_code = FPE_FLTINV;
|
|
- break;
|
|
- case 0x002: /* Denormalize */
|
|
- case 0x010: /* Underflow */
|
|
- info.si_code = FPE_FLTUND;
|
|
- break;
|
|
- case 0x004: /* Zero Divide */
|
|
- info.si_code = FPE_FLTDIV;
|
|
- break;
|
|
- case 0x008: /* Overflow */
|
|
- info.si_code = FPE_FLTOVF;
|
|
- break;
|
|
- case 0x020: /* Precision */
|
|
- info.si_code = FPE_FLTRES;
|
|
- break;
|
|
- }
|
|
- force_sig_info(SIGFPE, &info, task);
|
|
-}
|
|
-
|
|
-void do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
|
|
-{
|
|
- if (cpu_has_xmm) {
|
|
- /* Handle SIMD FPU exceptions on PIII+ processors. */
|
|
- ignore_fpu_irq = 1;
|
|
- simd_math_error((void __user *)regs->ip);
|
|
- return;
|
|
- }
|
|
- /*
|
|
- * Handle strange cache flush from user space exception
|
|
- * in all other cases. This is undocumented behaviour.
|
|
- */
|
|
- if (regs->flags & X86_VM_MASK) {
|
|
- handle_vm86_fault((struct kernel_vm86_regs *)regs, error_code);
|
|
- return;
|
|
- }
|
|
- current->thread.trap_no = 19;
|
|
- current->thread.error_code = error_code;
|
|
- die_if_kernel("cache flush denied", regs, error_code);
|
|
- force_sig(SIGSEGV, current);
|
|
-}
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
-void do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
|
|
-{
|
|
-#if 0
|
|
- /* No need to warn about this any longer. */
|
|
- printk(KERN_INFO "Ignoring P6 Local APIC Spurious Interrupt Bug...\n");
|
|
-#endif
|
|
-}
|
|
-
|
|
-unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp)
|
|
-{
|
|
- struct desc_struct *gdt = get_cpu_gdt_table(smp_processor_id());
|
|
- unsigned long base = (kesp - uesp) & -THREAD_SIZE;
|
|
- unsigned long new_kesp = kesp - base;
|
|
- unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT;
|
|
- __u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS];
|
|
-
|
|
- /* Set up base for espfix segment */
|
|
- desc &= 0x00f0ff0000000000ULL;
|
|
- desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) |
|
|
- ((((__u64)base) << 32) & 0xff00000000000000ULL) |
|
|
- ((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) |
|
|
- (lim_pages & 0xffff);
|
|
- *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc;
|
|
-
|
|
- return new_kesp;
|
|
-}
|
|
-#endif
|
|
-
|
|
-/*
|
|
- * 'math_state_restore()' saves the current math information in the
|
|
- * old math state array, and gets the new ones from the current task
|
|
- *
|
|
- * Careful.. There are problems with IBM-designed IRQ13 behaviour.
|
|
- * Don't touch unless you *really* know how it works.
|
|
- *
|
|
- * Must be called with kernel preemption disabled (in this case,
|
|
- * local interrupts are disabled at the call-site in entry.S).
|
|
- */
|
|
-asmlinkage void math_state_restore(void)
|
|
-{
|
|
- struct thread_info *thread = current_thread_info();
|
|
- struct task_struct *tsk = thread->task;
|
|
-
|
|
- if (!tsk_used_math(tsk)) {
|
|
- local_irq_enable();
|
|
- /*
|
|
- * does a slab alloc which can sleep
|
|
- */
|
|
- if (init_fpu(tsk)) {
|
|
- /*
|
|
- * ran out of memory!
|
|
- */
|
|
- do_group_exit(SIGKILL);
|
|
- return;
|
|
- }
|
|
- local_irq_disable();
|
|
- }
|
|
-
|
|
- /* NB. 'clts' is done for us by Xen during virtual trap. */
|
|
- restore_fpu(tsk);
|
|
- thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
|
|
- tsk->fpu_counter++;
|
|
-}
|
|
-EXPORT_SYMBOL_GPL(math_state_restore);
|
|
-
|
|
-#ifndef CONFIG_MATH_EMULATION
|
|
-
|
|
-asmlinkage void math_emulate(long arg)
|
|
-{
|
|
- printk(KERN_EMERG
|
|
- "math-emulation not enabled and no coprocessor found.\n");
|
|
- printk(KERN_EMERG "killing %s.\n", current->comm);
|
|
- force_sig(SIGFPE, current);
|
|
- schedule();
|
|
-}
|
|
-
|
|
-#endif /* CONFIG_MATH_EMULATION */
|
|
-
|
|
-/*
|
|
- * NB. All these are "trap gates" (i.e. events_mask isn't set) except
|
|
- * for those that specify <dpl>|4 in the second field.
|
|
- */
|
|
-static const trap_info_t __cpuinitconst trap_table[] = {
|
|
- { 0, 0, __KERNEL_CS, (unsigned long)divide_error },
|
|
- { 1, 0|4, __KERNEL_CS, (unsigned long)debug },
|
|
- { 3, 3|4, __KERNEL_CS, (unsigned long)int3 },
|
|
- { 4, 3, __KERNEL_CS, (unsigned long)overflow },
|
|
- { 5, 0, __KERNEL_CS, (unsigned long)bounds },
|
|
- { 6, 0, __KERNEL_CS, (unsigned long)invalid_op },
|
|
- { 7, 0|4, __KERNEL_CS, (unsigned long)device_not_available },
|
|
- { 9, 0, __KERNEL_CS, (unsigned long)coprocessor_segment_overrun },
|
|
- { 10, 0, __KERNEL_CS, (unsigned long)invalid_TSS },
|
|
- { 11, 0, __KERNEL_CS, (unsigned long)segment_not_present },
|
|
- { 12, 0, __KERNEL_CS, (unsigned long)stack_segment },
|
|
- { 13, 0, __KERNEL_CS, (unsigned long)general_protection },
|
|
- { 14, 0|4, __KERNEL_CS, (unsigned long)page_fault },
|
|
- { 15, 0, __KERNEL_CS, (unsigned long)fixup_4gb_segment },
|
|
- { 16, 0, __KERNEL_CS, (unsigned long)coprocessor_error },
|
|
- { 17, 0, __KERNEL_CS, (unsigned long)alignment_check },
|
|
-#ifdef CONFIG_X86_MCE
|
|
- { 18, 0, __KERNEL_CS, (unsigned long)machine_check },
|
|
-#endif
|
|
- { 19, 0, __KERNEL_CS, (unsigned long)simd_coprocessor_error },
|
|
- { SYSCALL_VECTOR, 3, __KERNEL_CS, (unsigned long)system_call },
|
|
- { 0, 0, 0, 0 }
|
|
-};
|
|
-
|
|
-void __init trap_init(void)
|
|
-{
|
|
- int ret;
|
|
-
|
|
- ret = HYPERVISOR_set_trap_table(trap_table);
|
|
- if (ret)
|
|
- printk("HYPERVISOR_set_trap_table failed: error %d\n", ret);
|
|
-
|
|
- if (cpu_has_fxsr) {
|
|
- printk(KERN_INFO "Enabling fast FPU save and restore... ");
|
|
- set_in_cr4(X86_CR4_OSFXSR);
|
|
- printk("done.\n");
|
|
- }
|
|
- if (cpu_has_xmm) {
|
|
- printk(KERN_INFO
|
|
- "Enabling unmasked SIMD FPU exception support... ");
|
|
- set_in_cr4(X86_CR4_OSXMMEXCPT);
|
|
- printk("done.\n");
|
|
- }
|
|
-
|
|
- init_thread_xstate();
|
|
- /*
|
|
- * Should be a barrier for any external CPU state:
|
|
- */
|
|
- cpu_init();
|
|
-}
|
|
-
|
|
-void __cpuinit smp_trap_init(trap_info_t *trap_ctxt)
|
|
-{
|
|
- const trap_info_t *t = trap_table;
|
|
-
|
|
- for (t = trap_table; t->address; t++) {
|
|
- trap_ctxt[t->vector].flags = t->flags;
|
|
- trap_ctxt[t->vector].cs = t->cs;
|
|
- trap_ctxt[t->vector].address = t->address;
|
|
- }
|
|
-}
|
|
-
|
|
-static int __init kstack_setup(char *s)
|
|
-{
|
|
- kstack_depth_to_print = simple_strtoul(s, NULL, 0);
|
|
-
|
|
- return 1;
|
|
-}
|
|
-__setup("kstack=", kstack_setup);
|
|
-
|
|
-static int __init code_bytes_setup(char *s)
|
|
-{
|
|
- code_bytes = simple_strtoul(s, NULL, 0);
|
|
- if (code_bytes > 8192)
|
|
- code_bytes = 8192;
|
|
-
|
|
- return 1;
|
|
-}
|
|
-__setup("code_bytes=", code_bytes_setup);
|
|
--- head-2011-03-17.orig/arch/x86/kernel/traps_64-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
+++ /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
@@ -1,1238 +0,0 @@
|
|
-/*
|
|
- * Copyright (C) 1991, 1992 Linus Torvalds
|
|
- * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
|
|
- *
|
|
- * Pentium III FXSR, SSE support
|
|
- * Gareth Hughes <gareth@valinux.com>, May 2000
|
|
- */
|
|
-
|
|
-/*
|
|
- * 'Traps.c' handles hardware traps and faults after we have saved some
|
|
- * state in 'entry.S'.
|
|
- */
|
|
-#include <linux/moduleparam.h>
|
|
-#include <linux/interrupt.h>
|
|
-#include <linux/kallsyms.h>
|
|
-#include <linux/spinlock.h>
|
|
-#include <linux/kprobes.h>
|
|
-#include <linux/uaccess.h>
|
|
-#include <linux/utsname.h>
|
|
-#include <linux/kdebug.h>
|
|
-#include <linux/kernel.h>
|
|
-#include <linux/module.h>
|
|
-#include <linux/ptrace.h>
|
|
-#include <linux/string.h>
|
|
-#include <linux/unwind.h>
|
|
-#include <linux/delay.h>
|
|
-#include <linux/errno.h>
|
|
-#include <linux/kexec.h>
|
|
-#include <linux/sched.h>
|
|
-#include <linux/timer.h>
|
|
-#include <linux/init.h>
|
|
-#include <linux/bug.h>
|
|
-#include <linux/nmi.h>
|
|
-#include <linux/mm.h>
|
|
-
|
|
-#if defined(CONFIG_EDAC)
|
|
-#include <linux/edac.h>
|
|
-#endif
|
|
-
|
|
-#include <asm/stacktrace.h>
|
|
-#include <asm/processor.h>
|
|
-#include <asm/debugreg.h>
|
|
-#include <asm/atomic.h>
|
|
-#include <asm/system.h>
|
|
-#include <asm/unwind.h>
|
|
-#include <asm/desc.h>
|
|
-#include <asm/i387.h>
|
|
-#include <asm/nmi.h>
|
|
-#include <asm/smp.h>
|
|
-#include <asm/io.h>
|
|
-#include <asm/pgalloc.h>
|
|
-#include <asm/proto.h>
|
|
-#include <asm/pda.h>
|
|
-#include <asm/traps.h>
|
|
-
|
|
-#include <mach_traps.h>
|
|
-
|
|
-int panic_on_unrecovered_nmi;
|
|
-int kstack_depth_to_print = 12;
|
|
-static unsigned int code_bytes = 64;
|
|
-static int ignore_nmis;
|
|
-static int die_counter;
|
|
-
|
|
-static inline void conditional_sti(struct pt_regs *regs)
|
|
-{
|
|
- if (regs->flags & X86_EFLAGS_IF)
|
|
- local_irq_enable();
|
|
-}
|
|
-
|
|
-static inline void preempt_conditional_sti(struct pt_regs *regs)
|
|
-{
|
|
- inc_preempt_count();
|
|
- if (regs->flags & X86_EFLAGS_IF)
|
|
- local_irq_enable();
|
|
-}
|
|
-
|
|
-static inline void preempt_conditional_cli(struct pt_regs *regs)
|
|
-{
|
|
- if (regs->flags & X86_EFLAGS_IF)
|
|
- local_irq_disable();
|
|
- /* Make sure to not schedule here because we could be running
|
|
- on an exception stack. */
|
|
- dec_preempt_count();
|
|
-}
|
|
-
|
|
-void printk_address(unsigned long address, int reliable)
|
|
-{
|
|
- printk(" [<%016lx>] %s%pS\n", address, reliable ? "": "? ", (void *) address);
|
|
-}
|
|
-
|
|
-static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
|
|
- unsigned *usedp, char **idp)
|
|
-{
|
|
-#ifndef CONFIG_X86_NO_TSS
|
|
- static char ids[][8] = {
|
|
- [DEBUG_STACK - 1] = "#DB",
|
|
- [NMI_STACK - 1] = "NMI",
|
|
- [DOUBLEFAULT_STACK - 1] = "#DF",
|
|
- [STACKFAULT_STACK - 1] = "#SS",
|
|
- [MCE_STACK - 1] = "#MC",
|
|
-#if DEBUG_STKSZ > EXCEPTION_STKSZ
|
|
- [N_EXCEPTION_STACKS ... N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]"
|
|
-#endif
|
|
- };
|
|
- unsigned k;
|
|
-
|
|
- /*
|
|
- * Iterate over all exception stacks, and figure out whether
|
|
- * 'stack' is in one of them:
|
|
- */
|
|
- for (k = 0; k < N_EXCEPTION_STACKS; k++) {
|
|
- unsigned long end = per_cpu(orig_ist, cpu).ist[k];
|
|
- /*
|
|
- * Is 'stack' above this exception frame's end?
|
|
- * If yes then skip to the next frame.
|
|
- */
|
|
- if (stack >= end)
|
|
- continue;
|
|
- /*
|
|
- * Is 'stack' above this exception frame's start address?
|
|
- * If yes then we found the right frame.
|
|
- */
|
|
- if (stack >= end - EXCEPTION_STKSZ) {
|
|
- /*
|
|
- * Make sure we only iterate through an exception
|
|
- * stack once. If it comes up for the second time
|
|
- * then there's something wrong going on - just
|
|
- * break out and return NULL:
|
|
- */
|
|
- if (*usedp & (1U << k))
|
|
- break;
|
|
- *usedp |= 1U << k;
|
|
- *idp = ids[k];
|
|
- return (unsigned long *)end;
|
|
- }
|
|
- /*
|
|
- * If this is a debug stack, and if it has a larger size than
|
|
- * the usual exception stacks, then 'stack' might still
|
|
- * be within the lower portion of the debug stack:
|
|
- */
|
|
-#if DEBUG_STKSZ > EXCEPTION_STKSZ
|
|
- if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) {
|
|
- unsigned j = N_EXCEPTION_STACKS - 1;
|
|
-
|
|
- /*
|
|
- * Black magic. A large debug stack is composed of
|
|
- * multiple exception stack entries, which we
|
|
- * iterate through now. Dont look:
|
|
- */
|
|
- do {
|
|
- ++j;
|
|
- end -= EXCEPTION_STKSZ;
|
|
- ids[j][4] = '1' + (j - N_EXCEPTION_STACKS);
|
|
- } while (stack < end - EXCEPTION_STKSZ);
|
|
- if (*usedp & (1U << j))
|
|
- break;
|
|
- *usedp |= 1U << j;
|
|
- *idp = ids[j];
|
|
- return (unsigned long *)end;
|
|
- }
|
|
-#endif
|
|
- }
|
|
-#endif
|
|
- return NULL;
|
|
-}
|
|
-
|
|
-/*
|
|
- * x86-64 can have up to three kernel stacks:
|
|
- * process stack
|
|
- * interrupt stack
|
|
- * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
|
|
- */
|
|
-
|
|
-static inline int valid_stack_ptr(struct thread_info *tinfo,
|
|
- void *p, unsigned int size, void *end)
|
|
-{
|
|
- void *t = tinfo;
|
|
- if (end) {
|
|
- if (p < end && p >= (end-THREAD_SIZE))
|
|
- return 1;
|
|
- else
|
|
- return 0;
|
|
- }
|
|
- return p > t && p < t + THREAD_SIZE - size;
|
|
-}
|
|
-
|
|
-/* The form of the top of the frame on the stack */
|
|
-struct stack_frame {
|
|
- struct stack_frame *next_frame;
|
|
- unsigned long return_address;
|
|
-};
|
|
-
|
|
-static inline unsigned long
|
|
-print_context_stack(struct thread_info *tinfo,
|
|
- unsigned long *stack, unsigned long bp,
|
|
- const struct stacktrace_ops *ops, void *data,
|
|
- unsigned long *end)
|
|
-{
|
|
- struct stack_frame *frame = (struct stack_frame *)bp;
|
|
-
|
|
- while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) {
|
|
- unsigned long addr;
|
|
-
|
|
- addr = *stack;
|
|
- if (__kernel_text_address(addr)) {
|
|
- if ((unsigned long) stack == bp + 8) {
|
|
- ops->address(data, addr, 1);
|
|
- frame = frame->next_frame;
|
|
- bp = (unsigned long) frame;
|
|
- } else {
|
|
- ops->address(data, addr, bp == 0);
|
|
- }
|
|
- }
|
|
- stack++;
|
|
- }
|
|
- return bp;
|
|
-}
|
|
-
|
|
-void dump_trace(struct task_struct *task, struct pt_regs *regs,
|
|
- unsigned long *stack, unsigned long bp,
|
|
- const struct stacktrace_ops *ops, void *data)
|
|
-{
|
|
- const unsigned cpu = get_cpu();
|
|
- unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr;
|
|
- unsigned used = 0;
|
|
- struct thread_info *tinfo;
|
|
-
|
|
- if (!task)
|
|
- task = current;
|
|
-
|
|
- if (!stack) {
|
|
- unsigned long dummy;
|
|
- stack = &dummy;
|
|
- if (task && task != current)
|
|
- stack = (unsigned long *)task->thread.sp;
|
|
- }
|
|
-
|
|
-#ifdef CONFIG_FRAME_POINTER
|
|
- if (!bp) {
|
|
- if (task == current) {
|
|
- /* Grab bp right from our regs */
|
|
- asm("movq %%rbp, %0" : "=r" (bp) :);
|
|
- } else {
|
|
- /* bp is the last reg pushed by switch_to */
|
|
- bp = *(unsigned long *) task->thread.sp;
|
|
- }
|
|
- }
|
|
-#endif
|
|
-
|
|
- /*
|
|
- * Print function call entries in all stacks, starting at the
|
|
- * current stack address. If the stacks consist of nested
|
|
- * exceptions
|
|
- */
|
|
- tinfo = task_thread_info(task);
|
|
- for (;;) {
|
|
- char *id;
|
|
- unsigned long *estack_end;
|
|
- estack_end = in_exception_stack(cpu, (unsigned long)stack,
|
|
- &used, &id);
|
|
-
|
|
- if (estack_end) {
|
|
- if (ops->stack(data, id) < 0)
|
|
- break;
|
|
-
|
|
- bp = print_context_stack(tinfo, stack, bp, ops,
|
|
- data, estack_end);
|
|
- ops->stack(data, "<EOE>");
|
|
- /*
|
|
- * We link to the next stack via the
|
|
- * second-to-last pointer (index -2 to end) in the
|
|
- * exception stack:
|
|
- */
|
|
- stack = (unsigned long *) estack_end[-2];
|
|
- continue;
|
|
- }
|
|
- if (irqstack_end) {
|
|
- unsigned long *irqstack;
|
|
- irqstack = irqstack_end -
|
|
- (IRQSTACKSIZE - 64) / sizeof(*irqstack);
|
|
-
|
|
- if (stack >= irqstack && stack < irqstack_end) {
|
|
- if (ops->stack(data, "IRQ") < 0)
|
|
- break;
|
|
- bp = print_context_stack(tinfo, stack, bp,
|
|
- ops, data, irqstack_end);
|
|
- /*
|
|
- * We link to the next stack (which would be
|
|
- * the process stack normally) the last
|
|
- * pointer (index -1 to end) in the IRQ stack:
|
|
- */
|
|
- stack = (unsigned long *) (irqstack_end[-1]);
|
|
- irqstack_end = NULL;
|
|
- ops->stack(data, "EOI");
|
|
- continue;
|
|
- }
|
|
- }
|
|
- break;
|
|
- }
|
|
-
|
|
- /*
|
|
- * This handles the process stack:
|
|
- */
|
|
- bp = print_context_stack(tinfo, stack, bp, ops, data, NULL);
|
|
- put_cpu();
|
|
-}
|
|
-EXPORT_SYMBOL(dump_trace);
|
|
-
|
|
-static void
|
|
-print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
|
|
-{
|
|
- print_symbol(msg, symbol);
|
|
- printk("\n");
|
|
-}
|
|
-
|
|
-static void print_trace_warning(void *data, char *msg)
|
|
-{
|
|
- printk("%s\n", msg);
|
|
-}
|
|
-
|
|
-static int print_trace_stack(void *data, char *name)
|
|
-{
|
|
- printk(" <%s> ", name);
|
|
- return 0;
|
|
-}
|
|
-
|
|
-static void print_trace_address(void *data, unsigned long addr, int reliable)
|
|
-{
|
|
- touch_nmi_watchdog();
|
|
- printk_address(addr, reliable);
|
|
-}
|
|
-
|
|
-static const struct stacktrace_ops print_trace_ops = {
|
|
- .warning = print_trace_warning,
|
|
- .warning_symbol = print_trace_warning_symbol,
|
|
- .stack = print_trace_stack,
|
|
- .address = print_trace_address,
|
|
-};
|
|
-
|
|
-static void
|
|
-show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
|
|
- unsigned long *stack, unsigned long bp, char *log_lvl)
|
|
-{
|
|
- printk("\nCall Trace:\n");
|
|
- dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
|
|
- printk("\n");
|
|
-}
|
|
-
|
|
-void show_trace(struct task_struct *task, struct pt_regs *regs,
|
|
- unsigned long *stack, unsigned long bp)
|
|
-{
|
|
- show_trace_log_lvl(task, regs, stack, bp, "");
|
|
-}
|
|
-
|
|
-static void
|
|
-show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
|
|
- unsigned long *sp, unsigned long bp, char *log_lvl)
|
|
-{
|
|
- unsigned long *stack;
|
|
- int i;
|
|
- const int cpu = smp_processor_id();
|
|
- unsigned long *irqstack_end = (unsigned long *) (cpu_pda(cpu)->irqstackptr);
|
|
- unsigned long *irqstack = (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE);
|
|
-
|
|
- // debugging aid: "show_stack(NULL, NULL);" prints the
|
|
- // back trace for this cpu.
|
|
-
|
|
- if (sp == NULL) {
|
|
- if (task)
|
|
- sp = (unsigned long *)task->thread.sp;
|
|
- else
|
|
- sp = (unsigned long *)&sp;
|
|
- }
|
|
-
|
|
- stack = sp;
|
|
- for (i = 0; i < kstack_depth_to_print; i++) {
|
|
- if (stack >= irqstack && stack <= irqstack_end) {
|
|
- if (stack == irqstack_end) {
|
|
- stack = (unsigned long *) (irqstack_end[-1]);
|
|
- printk(" <EOI> ");
|
|
- }
|
|
- } else {
|
|
- if (((long) stack & (THREAD_SIZE-1)) == 0)
|
|
- break;
|
|
- }
|
|
- if (i && ((i % 4) == 0))
|
|
- printk("\n");
|
|
- printk(" %016lx", *stack++);
|
|
- touch_nmi_watchdog();
|
|
- }
|
|
- show_trace_log_lvl(task, regs, sp, bp, log_lvl);
|
|
-}
|
|
-
|
|
-void show_stack(struct task_struct *task, unsigned long *sp)
|
|
-{
|
|
- show_stack_log_lvl(task, NULL, sp, 0, "");
|
|
-}
|
|
-
|
|
-/*
|
|
- * The architecture-independent dump_stack generator
|
|
- */
|
|
-void dump_stack(void)
|
|
-{
|
|
- unsigned long bp = 0;
|
|
- unsigned long stack;
|
|
-
|
|
-#ifdef CONFIG_FRAME_POINTER
|
|
- if (!bp)
|
|
- asm("movq %%rbp, %0" : "=r" (bp):);
|
|
-#endif
|
|
-
|
|
- printk("Pid: %d, comm: %.20s %s %s %.*s\n",
|
|
- current->pid, current->comm, print_tainted(),
|
|
- init_utsname()->release,
|
|
- (int)strcspn(init_utsname()->version, " "),
|
|
- init_utsname()->version);
|
|
- show_trace(NULL, NULL, &stack, bp);
|
|
-}
|
|
-
|
|
-EXPORT_SYMBOL(dump_stack);
|
|
-
|
|
-void show_registers(struct pt_regs *regs)
|
|
-{
|
|
- int i;
|
|
- unsigned long sp;
|
|
- const int cpu = smp_processor_id();
|
|
- struct task_struct *cur = cpu_pda(cpu)->pcurrent;
|
|
-
|
|
- sp = regs->sp;
|
|
- printk("CPU %d ", cpu);
|
|
- __show_regs(regs);
|
|
- printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
|
|
- cur->comm, cur->pid, task_thread_info(cur), cur);
|
|
-
|
|
- /*
|
|
- * When in-kernel, we also print out the stack and code at the
|
|
- * time of the fault..
|
|
- */
|
|
- if (!user_mode(regs)) {
|
|
- unsigned int code_prologue = code_bytes * 43 / 64;
|
|
- unsigned int code_len = code_bytes;
|
|
- unsigned char c;
|
|
- u8 *ip;
|
|
-
|
|
- printk("Stack: ");
|
|
- show_stack_log_lvl(NULL, regs, (unsigned long *)sp,
|
|
- regs->bp, "");
|
|
- printk("\n");
|
|
-
|
|
- printk(KERN_EMERG "Code: ");
|
|
-
|
|
- ip = (u8 *)regs->ip - code_prologue;
|
|
- if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
|
|
- /* try starting at RIP */
|
|
- ip = (u8 *)regs->ip;
|
|
- code_len = code_len - code_prologue + 1;
|
|
- }
|
|
- for (i = 0; i < code_len; i++, ip++) {
|
|
- if (ip < (u8 *)PAGE_OFFSET ||
|
|
- probe_kernel_address(ip, c)) {
|
|
- printk(" Bad RIP value.");
|
|
- break;
|
|
- }
|
|
- if (ip == (u8 *)regs->ip)
|
|
- printk("<%02x> ", c);
|
|
- else
|
|
- printk("%02x ", c);
|
|
- }
|
|
- }
|
|
- printk("\n");
|
|
-}
|
|
-
|
|
-int is_valid_bugaddr(unsigned long ip)
|
|
-{
|
|
- unsigned short ud2;
|
|
-
|
|
- if (__copy_from_user(&ud2, (const void __user *) ip, sizeof(ud2)))
|
|
- return 0;
|
|
-
|
|
- return ud2 == 0x0b0f;
|
|
-}
|
|
-
|
|
-static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
|
|
-static int die_owner = -1;
|
|
-static unsigned int die_nest_count;
|
|
-
|
|
-unsigned __kprobes long oops_begin(void)
|
|
-{
|
|
- int cpu;
|
|
- unsigned long flags;
|
|
-
|
|
- oops_enter();
|
|
-
|
|
- /* racy, but better than risking deadlock. */
|
|
- raw_local_irq_save(flags);
|
|
- cpu = smp_processor_id();
|
|
- if (!__raw_spin_trylock(&die_lock)) {
|
|
- if (cpu == die_owner)
|
|
- /* nested oops. should stop eventually */;
|
|
- else
|
|
- __raw_spin_lock(&die_lock);
|
|
- }
|
|
- die_nest_count++;
|
|
- die_owner = cpu;
|
|
- console_verbose();
|
|
- bust_spinlocks(1);
|
|
- return flags;
|
|
-}
|
|
-
|
|
-void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
|
|
-{
|
|
- die_owner = -1;
|
|
- bust_spinlocks(0);
|
|
- die_nest_count--;
|
|
- if (!die_nest_count)
|
|
- /* Nest count reaches zero, release the lock. */
|
|
- __raw_spin_unlock(&die_lock);
|
|
- raw_local_irq_restore(flags);
|
|
- if (!regs) {
|
|
- oops_exit();
|
|
- return;
|
|
- }
|
|
- if (panic_on_oops)
|
|
- panic("Fatal exception");
|
|
- oops_exit();
|
|
- do_exit(signr);
|
|
-}
|
|
-
|
|
-int __kprobes __die(const char *str, struct pt_regs *regs, long err)
|
|
-{
|
|
- printk(KERN_EMERG "%s: %04lx [%u] ", str, err & 0xffff, ++die_counter);
|
|
-#ifdef CONFIG_PREEMPT
|
|
- printk("PREEMPT ");
|
|
-#endif
|
|
-#ifdef CONFIG_SMP
|
|
- printk("SMP ");
|
|
-#endif
|
|
-#ifdef CONFIG_DEBUG_PAGEALLOC
|
|
- printk("DEBUG_PAGEALLOC");
|
|
-#endif
|
|
- printk("\n");
|
|
- if (notify_die(DIE_OOPS, str, regs, err,
|
|
- current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
|
|
- return 1;
|
|
-
|
|
- show_registers(regs);
|
|
- add_taint(TAINT_DIE);
|
|
- /* Executive summary in case the oops scrolled away */
|
|
- printk(KERN_ALERT "RIP ");
|
|
- printk_address(regs->ip, 1);
|
|
- printk(" RSP <%016lx>\n", regs->sp);
|
|
- if (kexec_should_crash(current))
|
|
- crash_kexec(regs);
|
|
- return 0;
|
|
-}
|
|
-
|
|
-void die(const char *str, struct pt_regs *regs, long err)
|
|
-{
|
|
- unsigned long flags = oops_begin();
|
|
-
|
|
- if (!user_mode(regs))
|
|
- report_bug(regs->ip, regs);
|
|
-
|
|
- if (__die(str, regs, err))
|
|
- regs = NULL;
|
|
- oops_end(flags, regs, SIGSEGV);
|
|
-}
|
|
-
|
|
-#if defined(CONFIG_X86_LOCAL_APIC) || defined(CONFIG_SYSCTL)
|
|
-notrace __kprobes void
|
|
-die_nmi(char *str, struct pt_regs *regs, int do_panic)
|
|
-{
|
|
- unsigned long flags;
|
|
-
|
|
- if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
|
|
- return;
|
|
-
|
|
- flags = oops_begin();
|
|
- /*
|
|
- * We are in trouble anyway, lets at least try
|
|
- * to get a message out.
|
|
- */
|
|
- printk(KERN_EMERG "%s", str);
|
|
- printk(" on CPU%d, ip %08lx, registers:\n",
|
|
- smp_processor_id(), regs->ip);
|
|
- show_registers(regs);
|
|
- if (kexec_should_crash(current))
|
|
- crash_kexec(regs);
|
|
- if (do_panic || panic_on_oops)
|
|
- panic("Non maskable interrupt");
|
|
- oops_end(flags, NULL, SIGBUS);
|
|
- nmi_exit();
|
|
- local_irq_enable();
|
|
- do_exit(SIGBUS);
|
|
-}
|
|
-#endif
|
|
-
|
|
-static void __kprobes
|
|
-do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
|
|
- long error_code, siginfo_t *info)
|
|
-{
|
|
- struct task_struct *tsk = current;
|
|
-
|
|
- if (!user_mode(regs))
|
|
- goto kernel_trap;
|
|
-
|
|
- /*
|
|
- * We want error_code and trap_no set for userspace faults and
|
|
- * kernelspace faults which result in die(), but not
|
|
- * kernelspace faults which are fixed up. die() gives the
|
|
- * process no chance to handle the signal and notice the
|
|
- * kernel fault information, so that won't result in polluting
|
|
- * the information about previously queued, but not yet
|
|
- * delivered, faults. See also do_general_protection below.
|
|
- */
|
|
- tsk->thread.error_code = error_code;
|
|
- tsk->thread.trap_no = trapnr;
|
|
-
|
|
- if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
|
|
- printk_ratelimit()) {
|
|
- printk(KERN_INFO
|
|
- "%s[%d] trap %s ip:%lx sp:%lx error:%lx",
|
|
- tsk->comm, tsk->pid, str,
|
|
- regs->ip, regs->sp, error_code);
|
|
- print_vma_addr(" in ", regs->ip);
|
|
- printk("\n");
|
|
- }
|
|
-
|
|
- if (info)
|
|
- force_sig_info(signr, info, tsk);
|
|
- else
|
|
- force_sig(signr, tsk);
|
|
- return;
|
|
-
|
|
-kernel_trap:
|
|
- if (!fixup_exception(regs)) {
|
|
- tsk->thread.error_code = error_code;
|
|
- tsk->thread.trap_no = trapnr;
|
|
- die(str, regs, error_code);
|
|
- }
|
|
- return;
|
|
-}
|
|
-
|
|
-#define DO_ERROR(trapnr, signr, str, name) \
|
|
-asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
|
|
-{ \
|
|
- if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
|
|
- == NOTIFY_STOP) \
|
|
- return; \
|
|
- conditional_sti(regs); \
|
|
- do_trap(trapnr, signr, str, regs, error_code, NULL); \
|
|
-}
|
|
-
|
|
-#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
|
|
-asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
|
|
-{ \
|
|
- siginfo_t info; \
|
|
- info.si_signo = signr; \
|
|
- info.si_errno = 0; \
|
|
- info.si_code = sicode; \
|
|
- info.si_addr = (void __user *)siaddr; \
|
|
- trace_hardirqs_fixup(); \
|
|
- if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
|
|
- == NOTIFY_STOP) \
|
|
- return; \
|
|
- conditional_sti(regs); \
|
|
- do_trap(trapnr, signr, str, regs, error_code, &info); \
|
|
-}
|
|
-
|
|
-DO_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip)
|
|
-DO_ERROR(4, SIGSEGV, "overflow", overflow)
|
|
-DO_ERROR(5, SIGSEGV, "bounds", bounds)
|
|
-DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip)
|
|
-DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
|
|
-DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
|
|
-DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
|
|
-DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
|
|
-
|
|
-/* Runs on IST stack */
|
|
-asmlinkage void do_stack_segment(struct pt_regs *regs, long error_code)
|
|
-{
|
|
- if (notify_die(DIE_TRAP, "stack segment", regs, error_code,
|
|
- 12, SIGBUS) == NOTIFY_STOP)
|
|
- return;
|
|
- preempt_conditional_sti(regs);
|
|
- do_trap(12, SIGBUS, "stack segment", regs, error_code, NULL);
|
|
- preempt_conditional_cli(regs);
|
|
-}
|
|
-
|
|
-asmlinkage void do_double_fault(struct pt_regs * regs, long error_code)
|
|
-{
|
|
- static const char str[] = "double fault";
|
|
- struct task_struct *tsk = current;
|
|
-
|
|
- /* Return not checked because double check cannot be ignored */
|
|
- notify_die(DIE_TRAP, str, regs, error_code, 8, SIGSEGV);
|
|
-
|
|
- tsk->thread.error_code = error_code;
|
|
- tsk->thread.trap_no = 8;
|
|
-
|
|
- /* This is always a kernel trap and never fixable (and thus must
|
|
- never return). */
|
|
- for (;;)
|
|
- die(str, regs, error_code);
|
|
-}
|
|
-
|
|
-asmlinkage void __kprobes
|
|
-do_general_protection(struct pt_regs *regs, long error_code)
|
|
-{
|
|
- struct task_struct *tsk;
|
|
-
|
|
- conditional_sti(regs);
|
|
-
|
|
- tsk = current;
|
|
- if (!user_mode(regs))
|
|
- goto gp_in_kernel;
|
|
-
|
|
- tsk->thread.error_code = error_code;
|
|
- tsk->thread.trap_no = 13;
|
|
-
|
|
- if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
|
|
- printk_ratelimit()) {
|
|
- printk(KERN_INFO
|
|
- "%s[%d] general protection ip:%lx sp:%lx error:%lx",
|
|
- tsk->comm, tsk->pid,
|
|
- regs->ip, regs->sp, error_code);
|
|
- print_vma_addr(" in ", regs->ip);
|
|
- printk("\n");
|
|
- }
|
|
-
|
|
- force_sig(SIGSEGV, tsk);
|
|
- return;
|
|
-
|
|
-gp_in_kernel:
|
|
- if (fixup_exception(regs))
|
|
- return;
|
|
-
|
|
- tsk->thread.error_code = error_code;
|
|
- tsk->thread.trap_no = 13;
|
|
- if (notify_die(DIE_GPF, "general protection fault", regs,
|
|
- error_code, 13, SIGSEGV) == NOTIFY_STOP)
|
|
- return;
|
|
- die("general protection fault", regs, error_code);
|
|
-}
|
|
-
|
|
-static notrace __kprobes void
|
|
-mem_parity_error(unsigned char reason, struct pt_regs *regs)
|
|
-{
|
|
- printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
|
|
- reason);
|
|
- printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n");
|
|
-
|
|
-#if defined(CONFIG_EDAC)
|
|
- if (edac_handler_set()) {
|
|
- edac_atomic_assert_error();
|
|
- return;
|
|
- }
|
|
-#endif
|
|
-
|
|
- if (panic_on_unrecovered_nmi)
|
|
- panic("NMI: Not continuing");
|
|
-
|
|
- printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
|
|
-
|
|
- /* Clear and disable the memory parity error line. */
|
|
- clear_mem_error(reason);
|
|
-}
|
|
-
|
|
-static notrace __kprobes void
|
|
-io_check_error(unsigned char reason, struct pt_regs *regs)
|
|
-{
|
|
- printk("NMI: IOCK error (debug interrupt?)\n");
|
|
- show_registers(regs);
|
|
-
|
|
- /* Re-enable the IOCK line, wait for a few seconds */
|
|
- clear_io_check_error(reason);
|
|
-}
|
|
-
|
|
-static notrace __kprobes void
|
|
-unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
|
|
-{
|
|
- if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
|
|
- return;
|
|
- printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
|
|
- reason);
|
|
- printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
|
|
-
|
|
- if (panic_on_unrecovered_nmi)
|
|
- panic("NMI: Not continuing");
|
|
-
|
|
- printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
|
|
-}
|
|
-
|
|
-/* Runs on IST stack. This code must keep interrupts off all the time.
|
|
- Nested NMIs are prevented by the CPU. */
|
|
-asmlinkage notrace __kprobes void default_do_nmi(struct pt_regs *regs)
|
|
-{
|
|
- unsigned char reason = 0;
|
|
- int cpu;
|
|
-
|
|
- cpu = smp_processor_id();
|
|
-
|
|
- /* Only the BSP gets external NMIs from the system. */
|
|
- if (!cpu)
|
|
- reason = get_nmi_reason();
|
|
-
|
|
- if (!(reason & 0xc0)) {
|
|
- if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
|
|
- == NOTIFY_STOP)
|
|
- return;
|
|
-#ifdef CONFIG_X86_LOCAL_APIC
|
|
- /*
|
|
- * Ok, so this is none of the documented NMI sources,
|
|
- * so it must be the NMI watchdog.
|
|
- */
|
|
- if (nmi_watchdog_tick(regs, reason))
|
|
- return;
|
|
-#endif
|
|
- if (!do_nmi_callback(regs, cpu))
|
|
- unknown_nmi_error(reason, regs);
|
|
-
|
|
- return;
|
|
- }
|
|
- if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
|
|
- return;
|
|
-
|
|
- /* AK: following checks seem to be broken on modern chipsets. FIXME */
|
|
- if (reason & 0x80)
|
|
- mem_parity_error(reason, regs);
|
|
- if (reason & 0x40)
|
|
- io_check_error(reason, regs);
|
|
-}
|
|
-
|
|
-asmlinkage notrace __kprobes void
|
|
-do_nmi(struct pt_regs *regs, long error_code)
|
|
-{
|
|
- nmi_enter();
|
|
-
|
|
- add_pda(__nmi_count, 1);
|
|
-
|
|
- if (!ignore_nmis)
|
|
- default_do_nmi(regs);
|
|
-
|
|
- nmi_exit();
|
|
-}
|
|
-
|
|
-void stop_nmi(void)
|
|
-{
|
|
- acpi_nmi_disable();
|
|
- ignore_nmis++;
|
|
-}
|
|
-
|
|
-void restart_nmi(void)
|
|
-{
|
|
- ignore_nmis--;
|
|
- acpi_nmi_enable();
|
|
-}
|
|
-
|
|
-/* runs on IST stack. */
|
|
-asmlinkage void __kprobes do_int3(struct pt_regs * regs, long error_code)
|
|
-{
|
|
- trace_hardirqs_fixup();
|
|
-
|
|
- if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
|
|
- == NOTIFY_STOP)
|
|
- return;
|
|
-
|
|
- preempt_conditional_sti(regs);
|
|
- do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
|
|
- preempt_conditional_cli(regs);
|
|
-}
|
|
-
|
|
-/* Help handler running on IST stack to switch back to user stack
|
|
- for scheduling or signal handling. The actual stack switch is done in
|
|
- entry.S */
|
|
-asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
|
|
-{
|
|
- struct pt_regs *regs = eregs;
|
|
- /* Did already sync */
|
|
- if (eregs == (struct pt_regs *)eregs->sp)
|
|
- ;
|
|
- /* Exception from user space */
|
|
- else if (user_mode(eregs))
|
|
- regs = task_pt_regs(current);
|
|
- /* Exception from kernel and interrupts are enabled. Move to
|
|
- kernel process stack. */
|
|
- else if (eregs->flags & X86_EFLAGS_IF)
|
|
- regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs));
|
|
- if (eregs != regs)
|
|
- *regs = *eregs;
|
|
- return regs;
|
|
-}
|
|
-
|
|
-/* runs on IST stack. */
|
|
-asmlinkage void __kprobes do_debug(struct pt_regs * regs,
|
|
- unsigned long error_code)
|
|
-{
|
|
- struct task_struct *tsk = current;
|
|
- unsigned long condition;
|
|
- siginfo_t info;
|
|
-
|
|
- trace_hardirqs_fixup();
|
|
-
|
|
- get_debugreg(condition, 6);
|
|
-
|
|
- /*
|
|
- * The processor cleared BTF, so don't mark that we need it set.
|
|
- */
|
|
- clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR);
|
|
- tsk->thread.debugctlmsr = 0;
|
|
-
|
|
- if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
|
|
- SIGTRAP) == NOTIFY_STOP)
|
|
- return;
|
|
-
|
|
- preempt_conditional_sti(regs);
|
|
-
|
|
- /* Mask out spurious debug traps due to lazy DR7 setting */
|
|
- if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
|
|
- if (!tsk->thread.debugreg7)
|
|
- goto clear_dr7;
|
|
- }
|
|
-
|
|
- tsk->thread.debugreg6 = condition;
|
|
-
|
|
- /*
|
|
- * Single-stepping through TF: make sure we ignore any events in
|
|
- * kernel space (but re-enable TF when returning to user mode).
|
|
- */
|
|
- if (condition & DR_STEP) {
|
|
- if (!user_mode(regs))
|
|
- goto clear_TF_reenable;
|
|
- }
|
|
-
|
|
- /* Ok, finally something we can handle */
|
|
- tsk->thread.trap_no = 1;
|
|
- tsk->thread.error_code = error_code;
|
|
- info.si_signo = SIGTRAP;
|
|
- info.si_errno = 0;
|
|
- info.si_code = TRAP_BRKPT;
|
|
- info.si_addr = user_mode(regs) ? (void __user *)regs->ip : NULL;
|
|
- force_sig_info(SIGTRAP, &info, tsk);
|
|
-
|
|
-clear_dr7:
|
|
- set_debugreg(0, 7);
|
|
- preempt_conditional_cli(regs);
|
|
- return;
|
|
-
|
|
-clear_TF_reenable:
|
|
- set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
|
|
- regs->flags &= ~X86_EFLAGS_TF;
|
|
- preempt_conditional_cli(regs);
|
|
- return;
|
|
-}
|
|
-
|
|
-static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr)
|
|
-{
|
|
- if (fixup_exception(regs))
|
|
- return 1;
|
|
-
|
|
- notify_die(DIE_GPF, str, regs, 0, trapnr, SIGFPE);
|
|
- /* Illegal floating point operation in the kernel */
|
|
- current->thread.trap_no = trapnr;
|
|
- die(str, regs, 0);
|
|
- return 0;
|
|
-}
|
|
-
|
|
-/*
|
|
- * Note that we play around with the 'TS' bit in an attempt to get
|
|
- * the correct behaviour even in the presence of the asynchronous
|
|
- * IRQ13 behaviour
|
|
- */
|
|
-asmlinkage void do_coprocessor_error(struct pt_regs *regs)
|
|
-{
|
|
- void __user *ip = (void __user *)(regs->ip);
|
|
- struct task_struct *task;
|
|
- siginfo_t info;
|
|
- unsigned short cwd, swd;
|
|
-
|
|
- conditional_sti(regs);
|
|
- if (!user_mode(regs) &&
|
|
- kernel_math_error(regs, "kernel x87 math error", 16))
|
|
- return;
|
|
-
|
|
- /*
|
|
- * Save the info for the exception handler and clear the error.
|
|
- */
|
|
- task = current;
|
|
- save_init_fpu(task);
|
|
- task->thread.trap_no = 16;
|
|
- task->thread.error_code = 0;
|
|
- info.si_signo = SIGFPE;
|
|
- info.si_errno = 0;
|
|
- info.si_code = __SI_FAULT;
|
|
- info.si_addr = ip;
|
|
- /*
|
|
- * (~cwd & swd) will mask out exceptions that are not set to unmasked
|
|
- * status. 0x3f is the exception bits in these regs, 0x200 is the
|
|
- * C1 reg you need in case of a stack fault, 0x040 is the stack
|
|
- * fault bit. We should only be taking one exception at a time,
|
|
- * so if this combination doesn't produce any single exception,
|
|
- * then we have a bad program that isn't synchronizing its FPU usage
|
|
- * and it will suffer the consequences since we won't be able to
|
|
- * fully reproduce the context of the exception
|
|
- */
|
|
- cwd = get_fpu_cwd(task);
|
|
- swd = get_fpu_swd(task);
|
|
- switch (swd & ~cwd & 0x3f) {
|
|
- case 0x000: /* No unmasked exception */
|
|
- default: /* Multiple exceptions */
|
|
- break;
|
|
- case 0x001: /* Invalid Op */
|
|
- /*
|
|
- * swd & 0x240 == 0x040: Stack Underflow
|
|
- * swd & 0x240 == 0x240: Stack Overflow
|
|
- * User must clear the SF bit (0x40) if set
|
|
- */
|
|
- info.si_code = FPE_FLTINV;
|
|
- break;
|
|
- case 0x002: /* Denormalize */
|
|
- case 0x010: /* Underflow */
|
|
- info.si_code = FPE_FLTUND;
|
|
- break;
|
|
- case 0x004: /* Zero Divide */
|
|
- info.si_code = FPE_FLTDIV;
|
|
- break;
|
|
- case 0x008: /* Overflow */
|
|
- info.si_code = FPE_FLTOVF;
|
|
- break;
|
|
- case 0x020: /* Precision */
|
|
- info.si_code = FPE_FLTRES;
|
|
- break;
|
|
- }
|
|
- force_sig_info(SIGFPE, &info, task);
|
|
-}
|
|
-
|
|
-asmlinkage void bad_intr(void)
|
|
-{
|
|
- printk("bad interrupt");
|
|
-}
|
|
-
|
|
-asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs)
|
|
-{
|
|
- void __user *ip = (void __user *)(regs->ip);
|
|
- struct task_struct *task;
|
|
- siginfo_t info;
|
|
- unsigned short mxcsr;
|
|
-
|
|
- conditional_sti(regs);
|
|
- if (!user_mode(regs) &&
|
|
- kernel_math_error(regs, "kernel simd math error", 19))
|
|
- return;
|
|
-
|
|
- /*
|
|
- * Save the info for the exception handler and clear the error.
|
|
- */
|
|
- task = current;
|
|
- save_init_fpu(task);
|
|
- task->thread.trap_no = 19;
|
|
- task->thread.error_code = 0;
|
|
- info.si_signo = SIGFPE;
|
|
- info.si_errno = 0;
|
|
- info.si_code = __SI_FAULT;
|
|
- info.si_addr = ip;
|
|
- /*
|
|
- * The SIMD FPU exceptions are handled a little differently, as there
|
|
- * is only a single status/control register. Thus, to determine which
|
|
- * unmasked exception was caught we must mask the exception mask bits
|
|
- * at 0x1f80, and then use these to mask the exception bits at 0x3f.
|
|
- */
|
|
- mxcsr = get_fpu_mxcsr(task);
|
|
- switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) {
|
|
- case 0x000:
|
|
- default:
|
|
- break;
|
|
- case 0x001: /* Invalid Op */
|
|
- info.si_code = FPE_FLTINV;
|
|
- break;
|
|
- case 0x002: /* Denormalize */
|
|
- case 0x010: /* Underflow */
|
|
- info.si_code = FPE_FLTUND;
|
|
- break;
|
|
- case 0x004: /* Zero Divide */
|
|
- info.si_code = FPE_FLTDIV;
|
|
- break;
|
|
- case 0x008: /* Overflow */
|
|
- info.si_code = FPE_FLTOVF;
|
|
- break;
|
|
- case 0x020: /* Precision */
|
|
- info.si_code = FPE_FLTRES;
|
|
- break;
|
|
- }
|
|
- force_sig_info(SIGFPE, &info, task);
|
|
-}
|
|
-
|
|
-asmlinkage void do_spurious_interrupt_bug(struct pt_regs * regs)
|
|
-{
|
|
-}
|
|
-
|
|
-#if 0
|
|
-asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
|
|
-{
|
|
-}
|
|
-#endif
|
|
-
|
|
-asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void)
|
|
-{
|
|
-}
|
|
-
|
|
-/*
|
|
- * 'math_state_restore()' saves the current math information in the
|
|
- * old math state array, and gets the new ones from the current task
|
|
- *
|
|
- * Careful.. There are problems with IBM-designed IRQ13 behaviour.
|
|
- * Don't touch unless you *really* know how it works.
|
|
- */
|
|
-asmlinkage void math_state_restore(void)
|
|
-{
|
|
- struct task_struct *me = current;
|
|
-
|
|
- if (!used_math()) {
|
|
- local_irq_enable();
|
|
- /*
|
|
- * does a slab alloc which can sleep
|
|
- */
|
|
- if (init_fpu(me)) {
|
|
- /*
|
|
- * ran out of memory!
|
|
- */
|
|
- do_group_exit(SIGKILL);
|
|
- return;
|
|
- }
|
|
- local_irq_disable();
|
|
- }
|
|
-
|
|
- /* clts(); */ /* 'clts' is done for us by Xen during virtual trap. */
|
|
-
|
|
- /*
|
|
- * Paranoid restore. send a SIGSEGV if we fail to restore the state.
|
|
- */
|
|
- if (unlikely(restore_fpu_checking(&me->thread.xstate->fxsave))) {
|
|
- stts();
|
|
- force_sig(SIGSEGV, me);
|
|
- return;
|
|
- }
|
|
- task_thread_info(me)->status |= TS_USEDFPU;
|
|
- me->fpu_counter++;
|
|
-}
|
|
-EXPORT_SYMBOL_GPL(math_state_restore);
|
|
-
|
|
-
|
|
-/*
|
|
- * NB. All these are "interrupt gates" (i.e. events_mask is set) because we
|
|
- * specify <dpl>|4 in the second field.
|
|
- */
|
|
-static const trap_info_t __cpuinitconst trap_table[] = {
|
|
- { 0, 0|4, __KERNEL_CS, (unsigned long)divide_error },
|
|
- { 1, 0|4, __KERNEL_CS, (unsigned long)debug },
|
|
- { 3, 3|4, __KERNEL_CS, (unsigned long)int3 },
|
|
- { 4, 3|4, __KERNEL_CS, (unsigned long)overflow },
|
|
- { 5, 0|4, __KERNEL_CS, (unsigned long)bounds },
|
|
- { 6, 0|4, __KERNEL_CS, (unsigned long)invalid_op },
|
|
- { 7, 0|4, __KERNEL_CS, (unsigned long)device_not_available },
|
|
- { 9, 0|4, __KERNEL_CS, (unsigned long)coprocessor_segment_overrun},
|
|
- { 10, 0|4, __KERNEL_CS, (unsigned long)invalid_TSS },
|
|
- { 11, 0|4, __KERNEL_CS, (unsigned long)segment_not_present },
|
|
- { 12, 0|4, __KERNEL_CS, (unsigned long)stack_segment },
|
|
- { 13, 0|4, __KERNEL_CS, (unsigned long)general_protection },
|
|
- { 14, 0|4, __KERNEL_CS, (unsigned long)page_fault },
|
|
- { 15, 0|4, __KERNEL_CS, (unsigned long)spurious_interrupt_bug },
|
|
- { 16, 0|4, __KERNEL_CS, (unsigned long)coprocessor_error },
|
|
- { 17, 0|4, __KERNEL_CS, (unsigned long)alignment_check },
|
|
-#ifdef CONFIG_X86_MCE
|
|
- { 18, 0|4, __KERNEL_CS, (unsigned long)machine_check },
|
|
-#endif
|
|
- { 19, 0|4, __KERNEL_CS, (unsigned long)simd_coprocessor_error },
|
|
-#ifdef CONFIG_IA32_EMULATION
|
|
- { IA32_SYSCALL_VECTOR, 3, __KERNEL_CS, (unsigned long)ia32_syscall},
|
|
-#endif
|
|
- { 0, 0, 0, 0 }
|
|
-};
|
|
-
|
|
-void __init trap_init(void)
|
|
-{
|
|
- int ret;
|
|
-
|
|
- ret = HYPERVISOR_set_trap_table(trap_table);
|
|
- if (ret)
|
|
- printk("HYPERVISOR_set_trap_table failed: error %d\n", ret);
|
|
- /*
|
|
- * initialize the per thread extended state:
|
|
- */
|
|
- init_thread_xstate();
|
|
- /*
|
|
- * Should be a barrier for any external CPU state:
|
|
- */
|
|
- cpu_init();
|
|
-}
|
|
-
|
|
-void __cpuinit smp_trap_init(trap_info_t *trap_ctxt)
|
|
-{
|
|
- const trap_info_t *t = trap_table;
|
|
-
|
|
- for (t = trap_table; t->address; t++) {
|
|
- trap_ctxt[t->vector].flags = t->flags;
|
|
- trap_ctxt[t->vector].cs = t->cs;
|
|
- trap_ctxt[t->vector].address = t->address;
|
|
- }
|
|
-}
|
|
-
|
|
-static int __init oops_setup(char *s)
|
|
-{
|
|
- if (!s)
|
|
- return -EINVAL;
|
|
- if (!strcmp(s, "panic"))
|
|
- panic_on_oops = 1;
|
|
- return 0;
|
|
-}
|
|
-early_param("oops", oops_setup);
|
|
-
|
|
-static int __init kstack_setup(char *s)
|
|
-{
|
|
- if (!s)
|
|
- return -EINVAL;
|
|
- kstack_depth_to_print = simple_strtoul(s, NULL, 0);
|
|
- return 0;
|
|
-}
|
|
-early_param("kstack", kstack_setup);
|
|
-
|
|
-static int __init code_bytes_setup(char *s)
|
|
-{
|
|
- code_bytes = simple_strtoul(s, NULL, 0);
|
|
- if (code_bytes > 8192)
|
|
- code_bytes = 8192;
|
|
-
|
|
- return 1;
|
|
-}
|
|
-__setup("code_bytes=", code_bytes_setup);
|
|
--- head-2011-03-17.orig/arch/x86/mm/dump_pagetables-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/mm/dump_pagetables-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -160,8 +160,8 @@ static void note_page(struct seq_file *m
|
|
* we have now. "break" is either changing perms, levels or
|
|
* address space marker.
|
|
*/
|
|
- prot = pgprot_val(new_prot) & ~(PTE_PFN_MASK);
|
|
- cur = pgprot_val(st->current_prot) & ~(PTE_PFN_MASK);
|
|
+ prot = pgprot_val(new_prot) & PTE_FLAGS_MASK;
|
|
+ cur = pgprot_val(st->current_prot) & PTE_FLAGS_MASK;
|
|
|
|
if (!st->level) {
|
|
/* First entry */
|
|
--- head-2011-03-17.orig/arch/x86/mm/fault-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/mm/fault-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -35,6 +35,7 @@
|
|
#include <asm/tlbflush.h>
|
|
#include <asm/proto.h>
|
|
#include <asm-generic/sections.h>
|
|
+#include <asm/traps.h>
|
|
|
|
/*
|
|
* Page fault error code bits
|
|
@@ -370,8 +371,6 @@ static int is_errata100(struct pt_regs *
|
|
return 0;
|
|
}
|
|
|
|
-void do_invalid_op(struct pt_regs *, unsigned long);
|
|
-
|
|
static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
|
|
{
|
|
#ifdef CONFIG_X86_F00F_BUG
|
|
@@ -609,11 +608,6 @@ void __kprobes do_page_fault(struct pt_r
|
|
unsigned long flags;
|
|
#endif
|
|
|
|
- /*
|
|
- * We can fault from pretty much anywhere, with unknown IRQ state.
|
|
- */
|
|
- trace_hardirqs_fixup();
|
|
-
|
|
/* Set the "privileged fault" bit to something sane. */
|
|
if (user_mode_vm(regs))
|
|
error_code |= PF_USER;
|
|
@@ -677,24 +671,23 @@ void __kprobes do_page_fault(struct pt_r
|
|
}
|
|
|
|
|
|
-#ifdef CONFIG_X86_32
|
|
- /* It's safe to allow irq's after cr2 has been saved and the vmalloc
|
|
- fault has been handled. */
|
|
- if (regs->flags & (X86_EFLAGS_IF | X86_VM_MASK))
|
|
- local_irq_enable();
|
|
-
|
|
/*
|
|
- * If we're in an interrupt, have no user context or are running in an
|
|
- * atomic region then we must not take the fault.
|
|
+ * It's safe to allow irq's after cr2 has been saved and the
|
|
+ * vmalloc fault has been handled.
|
|
+ *
|
|
+ * User-mode registers count as a user access even for any
|
|
+ * potential system fault or CPU buglet.
|
|
*/
|
|
- if (in_atomic() || !mm)
|
|
- goto bad_area_nosemaphore;
|
|
-#else /* CONFIG_X86_64 */
|
|
- if (likely(regs->flags & X86_EFLAGS_IF))
|
|
+ if (user_mode_vm(regs)) {
|
|
+ local_irq_enable();
|
|
+ error_code |= PF_USER;
|
|
+ } else if (regs->flags & X86_EFLAGS_IF)
|
|
local_irq_enable();
|
|
|
|
+#ifdef CONFIG_X86_64
|
|
if (unlikely(error_code & PF_RSVD))
|
|
pgtable_bad(address, regs, error_code);
|
|
+#endif
|
|
|
|
/*
|
|
* If we're in an interrupt, have no user context or are running in an
|
|
@@ -703,15 +696,9 @@ void __kprobes do_page_fault(struct pt_r
|
|
if (unlikely(in_atomic() || !mm))
|
|
goto bad_area_nosemaphore;
|
|
|
|
- /*
|
|
- * User-mode registers count as a user access even for any
|
|
- * potential system fault or CPU buglet.
|
|
- */
|
|
- if (user_mode_vm(regs))
|
|
- error_code |= PF_USER;
|
|
again:
|
|
-#endif
|
|
- /* When running in the kernel we expect faults to occur only to
|
|
+ /*
|
|
+ * When running in the kernel we expect faults to occur only to
|
|
* addresses in user space. All other faults represent errors in the
|
|
* kernel and should generate an OOPS. Unfortunately, in the case of an
|
|
* erroneous fault occurring in a code path which already holds mmap_sem
|
|
@@ -774,9 +761,6 @@ good_area:
|
|
goto bad_area;
|
|
}
|
|
|
|
-#ifdef CONFIG_X86_32
|
|
-survive:
|
|
-#endif
|
|
/*
|
|
* If for any reason at all we couldn't handle the fault,
|
|
* make sure we exit gracefully rather than endlessly redo
|
|
@@ -911,12 +895,11 @@ out_of_memory:
|
|
up_read(&mm->mmap_sem);
|
|
if (is_global_init(tsk)) {
|
|
yield();
|
|
-#ifdef CONFIG_X86_32
|
|
- down_read(&mm->mmap_sem);
|
|
- goto survive;
|
|
-#else
|
|
+ /*
|
|
+ * Re-lookup the vma - in theory the vma tree might
|
|
+ * have changed:
|
|
+ */
|
|
goto again;
|
|
-#endif
|
|
}
|
|
|
|
printk("VM: killing process %s\n", tsk->comm);
|
|
@@ -949,14 +932,15 @@ LIST_HEAD(pgd_list);
|
|
|
|
void vmalloc_sync_all(void)
|
|
{
|
|
-#ifdef CONFIG_X86_32
|
|
- unsigned long address = VMALLOC_START & PGDIR_MASK;
|
|
+ unsigned long address;
|
|
|
|
+#ifdef CONFIG_X86_32
|
|
if (SHARED_KERNEL_PMD)
|
|
return;
|
|
|
|
- BUILD_BUG_ON(TASK_SIZE & ~PMD_MASK);
|
|
- for (; address < hypervisor_virt_start; address += PMD_SIZE) {
|
|
+ for (address = VMALLOC_START & PMD_MASK;
|
|
+ address >= TASK_SIZE && address < FIXADDR_TOP;
|
|
+ address += PMD_SIZE) {
|
|
unsigned long flags;
|
|
struct page *page;
|
|
|
|
@@ -974,10 +958,8 @@ void vmalloc_sync_all(void)
|
|
spin_unlock_irqrestore(&pgd_lock, flags);
|
|
}
|
|
#else /* CONFIG_X86_64 */
|
|
- unsigned long start = VMALLOC_START & PGDIR_MASK;
|
|
- unsigned long address;
|
|
-
|
|
- for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) {
|
|
+ for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END;
|
|
+ address += PGDIR_SIZE) {
|
|
const pgd_t *pgd_ref = pgd_offset_k(address);
|
|
unsigned long flags;
|
|
struct page *page;
|
|
--- head-2011-03-17.orig/arch/x86/mm/highmem_32-xen.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/mm/highmem_32-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -137,6 +137,7 @@ void *kmap_atomic_pfn(unsigned long pfn,
|
|
|
|
return (void*) vaddr;
|
|
}
|
|
+EXPORT_SYMBOL_GPL(kmap_atomic_pfn); /* temporarily in use by i915 GEM until vmap */
|
|
|
|
struct page *kmap_atomic_to_page(void *ptr)
|
|
{
|
|
--- head-2011-03-17.orig/arch/x86/mm/init_32-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/mm/init_32-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -34,6 +34,7 @@
|
|
#include <linux/scatterlist.h>
|
|
|
|
#include <asm/asm.h>
|
|
+#include <asm/bios_ebda.h>
|
|
#include <asm/processor.h>
|
|
#include <asm/system.h>
|
|
#include <asm/uaccess.h>
|
|
@@ -51,6 +52,7 @@
|
|
#include <asm/swiotlb.h>
|
|
#include <asm/setup.h>
|
|
#include <asm/cacheflush.h>
|
|
+#include <asm/smp.h>
|
|
|
|
unsigned int __VMALLOC_RESERVE = 128 << 20;
|
|
|
|
@@ -206,11 +208,32 @@ static void __init kernel_physical_mappi
|
|
pgd_t *pgd;
|
|
pmd_t *pmd;
|
|
pte_t *pte;
|
|
- unsigned pages_2m = 0, pages_4k = 0;
|
|
+ unsigned pages_2m, pages_4k;
|
|
+ int mapping_iter;
|
|
|
|
- if (!cpu_has_pse)
|
|
+ /*
|
|
+ * First iteration will setup identity mapping using large/small pages
|
|
+ * based on use_pse, with other attributes same as set by
|
|
+ * the early code in head_32.S
|
|
+ *
|
|
+ * Second iteration will setup the appropriate attributes (NX, GLOBAL..)
|
|
+ * as desired for the kernel identity mapping.
|
|
+ *
|
|
+ * This two pass mechanism conforms to the TLB app note which says:
|
|
+ *
|
|
+ * "Software should not write to a paging-structure entry in a way
|
|
+ * that would change, for any linear address, both the page size
|
|
+ * and either the page frame or attributes."
|
|
+ */
|
|
+ mapping_iter = 1;
|
|
+
|
|
+ if (!cpu_has_pse) {
|
|
use_pse = 0;
|
|
+ mapping_iter = 0;
|
|
+ }
|
|
|
|
+repeat:
|
|
+ pages_2m = pages_4k = 0;
|
|
pfn = start_pfn;
|
|
pgd_idx = pgd_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);
|
|
pgd = pgd_base + pgd_idx;
|
|
@@ -250,6 +273,13 @@ static void __init kernel_physical_mappi
|
|
if (use_pse) {
|
|
unsigned int addr2;
|
|
pgprot_t prot = PAGE_KERNEL_LARGE;
|
|
+ /*
|
|
+ * first pass will use the same initial
|
|
+ * identity mapping attribute + _PAGE_PSE.
|
|
+ */
|
|
+ pgprot_t init_prot =
|
|
+ __pgprot(PTE_IDENT_ATTR |
|
|
+ _PAGE_PSE);
|
|
|
|
addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE +
|
|
PAGE_OFFSET + PAGE_SIZE-1;
|
|
@@ -259,7 +289,10 @@ static void __init kernel_physical_mappi
|
|
prot = PAGE_KERNEL_LARGE_EXEC;
|
|
|
|
pages_2m++;
|
|
- set_pmd(pmd, pfn_pmd(pfn, prot));
|
|
+ if (mapping_iter == 1)
|
|
+ set_pmd(pmd, pfn_pmd(pfn, init_prot));
|
|
+ else
|
|
+ set_pmd(pmd, pfn_pmd(pfn, prot));
|
|
|
|
pfn += PTRS_PER_PTE;
|
|
continue;
|
|
@@ -271,6 +304,11 @@ static void __init kernel_physical_mappi
|
|
for (; pte_ofs < PTRS_PER_PTE && pfn < end_pfn;
|
|
pte++, pfn++, pte_ofs++, addr += PAGE_SIZE) {
|
|
pgprot_t prot = PAGE_KERNEL;
|
|
+ /*
|
|
+ * first pass will use the same initial
|
|
+ * identity mapping attribute.
|
|
+ */
|
|
+ pgprot_t init_prot = __pgprot(PTE_IDENT_ATTR);
|
|
|
|
/* XEN: Only map initial RAM allocation. */
|
|
if (pfn >= xen_start_info->nr_pages || pte_present(*pte))
|
|
@@ -279,12 +317,34 @@ static void __init kernel_physical_mappi
|
|
prot = PAGE_KERNEL_EXEC;
|
|
|
|
pages_4k++;
|
|
- set_pte(pte, pfn_pte(pfn, prot));
|
|
+ if (mapping_iter == 1)
|
|
+ set_pte(pte, pfn_pte(pfn, init_prot));
|
|
+ else
|
|
+ set_pte(pte, pfn_pte(pfn, prot));
|
|
}
|
|
}
|
|
}
|
|
- update_page_count(PG_LEVEL_2M, pages_2m);
|
|
- update_page_count(PG_LEVEL_4K, pages_4k);
|
|
+ if (mapping_iter <= 1) {
|
|
+ /*
|
|
+ * update direct mapping page count only in the first
|
|
+ * iteration.
|
|
+ */
|
|
+ update_page_count(PG_LEVEL_2M, pages_2m);
|
|
+ update_page_count(PG_LEVEL_4K, pages_4k);
|
|
+ }
|
|
+ if (mapping_iter == 1) {
|
|
+ /*
|
|
+ * local global flush tlb, which will flush the previous
|
|
+ * mappings present in both small and large page TLB's.
|
|
+ */
|
|
+ __flush_tlb_all();
|
|
+
|
|
+ /*
|
|
+ * Second iteration will set the actual desired PTE attributes.
|
|
+ */
|
|
+ mapping_iter = 2;
|
|
+ goto repeat;
|
|
+ }
|
|
}
|
|
|
|
/*
|
|
@@ -306,7 +366,6 @@ int devmem_is_allowed(unsigned long page
|
|
return 0;
|
|
}
|
|
|
|
-#ifdef CONFIG_HIGHMEM
|
|
pte_t *kmap_pte;
|
|
pgprot_t kmap_prot;
|
|
|
|
@@ -329,6 +388,7 @@ static void __init kmap_init(void)
|
|
kmap_prot = PAGE_KERNEL;
|
|
}
|
|
|
|
+#ifdef CONFIG_HIGHMEM
|
|
static void __init permanent_kmaps_init(pgd_t *pgd_base)
|
|
{
|
|
unsigned long vaddr;
|
|
@@ -416,7 +476,6 @@ static void __init set_highmem_pages_ini
|
|
#endif /* !CONFIG_NUMA */
|
|
|
|
#else
|
|
-# define kmap_init() do { } while (0)
|
|
# define permanent_kmaps_init(pgd_base) do { } while (0)
|
|
# define set_highmem_pages_init() do { } while (0)
|
|
#endif /* CONFIG_HIGHMEM */
|
|
@@ -775,7 +834,7 @@ static unsigned long __init extend_init_
|
|
return start_pfn;
|
|
}
|
|
|
|
-static void __init find_early_table_space(unsigned long end)
|
|
+static void __init find_early_table_space(unsigned long end, int use_pse)
|
|
{
|
|
unsigned long puds, pmds, ptes, tables;
|
|
|
|
@@ -785,7 +844,7 @@ static void __init find_early_table_spac
|
|
pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
|
|
tables += PAGE_ALIGN(pmds * sizeof(pmd_t));
|
|
|
|
- if (cpu_has_pse) {
|
|
+ if (use_pse) {
|
|
unsigned long extra;
|
|
|
|
extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT);
|
|
@@ -818,12 +877,22 @@ unsigned long __init_refok init_memory_m
|
|
pgd_t *pgd_base = swapper_pg_dir;
|
|
unsigned long start_pfn, end_pfn;
|
|
unsigned long big_page_start;
|
|
+#ifdef CONFIG_DEBUG_PAGEALLOC
|
|
+ /*
|
|
+ * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
|
|
+ * This will simplify cpa(), which otherwise needs to support splitting
|
|
+ * large pages into small in interrupt context, etc.
|
|
+ */
|
|
+ int use_pse = 0;
|
|
+#else
|
|
+ int use_pse = cpu_has_pse;
|
|
+#endif
|
|
|
|
/*
|
|
* Find space for the kernel direct mapping tables.
|
|
*/
|
|
if (!after_init_bootmem)
|
|
- find_early_table_space(end);
|
|
+ find_early_table_space(end, use_pse);
|
|
|
|
#ifdef CONFIG_X86_PAE
|
|
set_nx();
|
|
@@ -869,7 +938,7 @@ unsigned long __init_refok init_memory_m
|
|
end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
|
|
if (start_pfn < end_pfn)
|
|
kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn,
|
|
- cpu_has_pse);
|
|
+ use_pse);
|
|
|
|
/* tail is not big page alignment ? */
|
|
start_pfn = end_pfn;
|
|
@@ -954,6 +1023,8 @@ void __init mem_init(void)
|
|
|
|
pci_iommu_alloc();
|
|
|
|
+ start_periodic_check_for_corruption();
|
|
+
|
|
#ifdef CONFIG_FLATMEM
|
|
BUG_ON(!mem_map);
|
|
#endif
|
|
@@ -1038,7 +1109,6 @@ void __init mem_init(void)
|
|
if (boot_cpu_data.wp_works_ok < 0)
|
|
test_wp_bit();
|
|
|
|
- cpa_init();
|
|
save_pg_dir();
|
|
zap_low_mappings();
|
|
|
|
--- head-2011-03-17.orig/arch/x86/mm/init_64-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/mm/init_64-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -34,6 +34,7 @@
|
|
#include <linux/nmi.h>
|
|
|
|
#include <asm/processor.h>
|
|
+#include <asm/bios_ebda.h>
|
|
#include <asm/system.h>
|
|
#include <asm/uaccess.h>
|
|
#include <asm/pgtable.h>
|
|
@@ -157,6 +158,62 @@ static unsigned long __meminitdata table
|
|
static unsigned long __meminitdata table_cur;
|
|
static unsigned long __meminitdata table_top;
|
|
|
|
+pteval_t __supported_pte_mask __read_mostly = ~0UL;
|
|
+EXPORT_SYMBOL_GPL(__supported_pte_mask);
|
|
+
|
|
+static int do_not_nx __cpuinitdata;
|
|
+
|
|
+/*
|
|
+ * noexec=on|off
|
|
+ * Control non-executable mappings for 64-bit processes.
|
|
+ *
|
|
+ * on Enable (default)
|
|
+ * off Disable
|
|
+ */
|
|
+static int __init nonx_setup(char *str)
|
|
+{
|
|
+ if (!str)
|
|
+ return -EINVAL;
|
|
+ if (!strncmp(str, "on", 2)) {
|
|
+ __supported_pte_mask |= _PAGE_NX;
|
|
+ do_not_nx = 0;
|
|
+ } else if (!strncmp(str, "off", 3)) {
|
|
+ do_not_nx = 1;
|
|
+ __supported_pte_mask &= ~_PAGE_NX;
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+early_param("noexec", nonx_setup);
|
|
+
|
|
+void __cpuinit check_efer(void)
|
|
+{
|
|
+ unsigned long efer;
|
|
+
|
|
+ rdmsrl(MSR_EFER, efer);
|
|
+ if (!(efer & EFER_NX) || do_not_nx)
|
|
+ __supported_pte_mask &= ~_PAGE_NX;
|
|
+}
|
|
+
|
|
+int force_personality32;
|
|
+
|
|
+/*
|
|
+ * noexec32=on|off
|
|
+ * Control non executable heap for 32bit processes.
|
|
+ * To control the stack too use noexec=off
|
|
+ *
|
|
+ * on PROT_READ does not imply PROT_EXEC for 32-bit processes (default)
|
|
+ * off PROT_READ implies PROT_EXEC
|
|
+ */
|
|
+static int __init nonx32_setup(char *str)
|
|
+{
|
|
+ if (!strcmp(str, "on"))
|
|
+ force_personality32 &= ~READ_IMPLIES_EXEC;
|
|
+ else if (!strcmp(str, "off"))
|
|
+ force_personality32 |= READ_IMPLIES_EXEC;
|
|
+ return 1;
|
|
+}
|
|
+__setup("noexec32=", nonx32_setup);
|
|
+
|
|
/*
|
|
* NOTE: This function is marked __ref because it calls __init function
|
|
* (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0.
|
|
@@ -214,14 +271,6 @@ set_pte_vaddr_pud(pud_t *pud_page, unsig
|
|
}
|
|
|
|
pte = pte_offset_kernel(pmd, vaddr);
|
|
- if (!pte_none(*pte) && __pte_val(new_pte) &&
|
|
-#ifdef CONFIG_ACPI
|
|
- /* __acpi_map_table() fails to properly call clear_fixmap() */
|
|
- (vaddr < __fix_to_virt(FIX_ACPI_END) ||
|
|
- vaddr > __fix_to_virt(FIX_ACPI_BEGIN)) &&
|
|
-#endif
|
|
- __pte_val(*pte) != (__pte_val(new_pte) & __supported_pte_mask))
|
|
- pte_ERROR(*pte);
|
|
set_pte(pte, new_pte);
|
|
|
|
/*
|
|
@@ -306,7 +355,7 @@ void __init init_extra_mapping_uc(unsign
|
|
void __init cleanup_highmap(void)
|
|
{
|
|
unsigned long vaddr = __START_KERNEL_map;
|
|
- unsigned long end = round_up((unsigned long)_end, PMD_SIZE) - 1;
|
|
+ unsigned long end = roundup((unsigned long)_end, PMD_SIZE) - 1;
|
|
pmd_t *pmd = level2_kernel_pgt;
|
|
pmd_t *last_pmd = pmd + PTRS_PER_PMD;
|
|
|
|
@@ -336,7 +385,7 @@ static __ref void *alloc_low_page(unsign
|
|
if (pfn >= table_top)
|
|
panic("alloc_low_page: ran out of memory");
|
|
|
|
- adr = early_ioremap(pfn_to_mfn(pfn) * PAGE_SIZE, PAGE_SIZE);
|
|
+ adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE);
|
|
memset(adr, 0, PAGE_SIZE);
|
|
*phys = pfn * PAGE_SIZE;
|
|
return adr;
|
|
@@ -382,7 +431,8 @@ static inline int __meminit make_readonl
|
|
}
|
|
|
|
static unsigned long __meminit
|
|
-phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end)
|
|
+phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end,
|
|
+ pgprot_t prot)
|
|
{
|
|
unsigned pages = 0;
|
|
unsigned long last_map_addr = end;
|
|
@@ -391,49 +441,58 @@ phys_pte_init(pte_t *pte_page, unsigned
|
|
pte_t *pte = pte_page + pte_index(addr);
|
|
|
|
for(i = pte_index(addr); i < PTRS_PER_PTE; i++, addr += PAGE_SIZE, pte++) {
|
|
- unsigned long pteval = addr | __PAGE_KERNEL;
|
|
+ unsigned long pteval = addr | pgprot_val(prot);
|
|
|
|
if (addr >= end ||
|
|
(!after_bootmem &&
|
|
(addr >> PAGE_SHIFT) >= xen_start_info->nr_pages))
|
|
break;
|
|
|
|
- if (__pte_val(*pte))
|
|
+ /*
|
|
+ * We will re-use the existing mapping.
|
|
+ * Xen for example has some special requirements, like mapping
|
|
+ * pagetable pages as RO. So assume someone who pre-setup
|
|
+ * these mappings are more intelligent.
|
|
+ */
|
|
+ if (__pte_val(*pte)) {
|
|
+ pages++;
|
|
continue;
|
|
+ }
|
|
|
|
if (make_readonly(addr))
|
|
pteval &= ~_PAGE_RW;
|
|
if (0)
|
|
printk(" pte=%p addr=%lx pte=%016lx\n",
|
|
pte, addr, pteval);
|
|
+ pages++;
|
|
if (!after_bootmem)
|
|
*pte = __pte(pteval & __supported_pte_mask);
|
|
else
|
|
set_pte(pte, __pte(pteval & __supported_pte_mask));
|
|
last_map_addr = (addr & PAGE_MASK) + PAGE_SIZE;
|
|
- pages++;
|
|
}
|
|
+
|
|
update_page_count(PG_LEVEL_4K, pages);
|
|
|
|
return last_map_addr;
|
|
}
|
|
|
|
static unsigned long __meminit
|
|
-phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end)
|
|
+phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end,
|
|
+ pgprot_t prot)
|
|
{
|
|
pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd);
|
|
|
|
BUG_ON(!max_pfn_mapped);
|
|
- return phys_pte_init(pte, address, end);
|
|
+ return phys_pte_init(pte, address, end, prot);
|
|
}
|
|
|
|
static unsigned long __meminit
|
|
phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
|
|
- unsigned long page_size_mask)
|
|
+ unsigned long page_size_mask, pgprot_t prot)
|
|
{
|
|
unsigned long pages = 0;
|
|
unsigned long last_map_addr = end;
|
|
- unsigned long start = address;
|
|
|
|
int i = pmd_index(address);
|
|
|
|
@@ -441,6 +500,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned
|
|
unsigned long pte_phys;
|
|
pmd_t *pmd = pmd_page + pmd_index(address);
|
|
pte_t *pte;
|
|
+ pgprot_t new_prot = prot;
|
|
|
|
if (address >= end)
|
|
break;
|
|
@@ -449,27 +509,42 @@ phys_pmd_init(pmd_t *pmd_page, unsigned
|
|
if (!pmd_large(*pmd)) {
|
|
spin_lock(&init_mm.page_table_lock);
|
|
last_map_addr = phys_pte_update(pmd, address,
|
|
- end);
|
|
+ end, prot);
|
|
spin_unlock(&init_mm.page_table_lock);
|
|
+ continue;
|
|
}
|
|
- /* Count entries we're using from level2_ident_pgt */
|
|
- if (start == 0)
|
|
+ /*
|
|
+ * If we are ok with PG_LEVEL_2M mapping, then we will
|
|
+ * use the existing mapping,
|
|
+ *
|
|
+ * Otherwise, we will split the large page mapping but
|
|
+ * use the same existing protection bits except for
|
|
+ * large page, so that we don't violate Intel's TLB
|
|
+ * Application note (317080) which says, while changing
|
|
+ * the page sizes, new and old translations should
|
|
+ * not differ with respect to page frame and
|
|
+ * attributes.
|
|
+ */
|
|
+ if (page_size_mask & (1 << PG_LEVEL_2M)) {
|
|
pages++;
|
|
- continue;
|
|
+ continue;
|
|
+ }
|
|
+ new_prot = pte_pgprot(pte_clrhuge(*(pte_t *)pmd));
|
|
}
|
|
|
|
if (page_size_mask & (1<<PG_LEVEL_2M)) {
|
|
pages++;
|
|
spin_lock(&init_mm.page_table_lock);
|
|
set_pte((pte_t *)pmd,
|
|
- pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
|
|
+ pfn_pte(address >> PAGE_SHIFT,
|
|
+ __pgprot(pgprot_val(prot) | _PAGE_PSE)));
|
|
spin_unlock(&init_mm.page_table_lock);
|
|
last_map_addr = (address & PMD_MASK) + PMD_SIZE;
|
|
continue;
|
|
}
|
|
|
|
pte = alloc_low_page(&pte_phys);
|
|
- last_map_addr = phys_pte_init(pte, address, end);
|
|
+ last_map_addr = phys_pte_init(pte, address, end, new_prot);
|
|
unmap_low_page(pte);
|
|
|
|
if (!after_bootmem) {
|
|
@@ -490,13 +565,13 @@ phys_pmd_init(pmd_t *pmd_page, unsigned
|
|
|
|
static unsigned long __meminit
|
|
phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end,
|
|
- unsigned long page_size_mask)
|
|
+ unsigned long page_size_mask, pgprot_t prot)
|
|
{
|
|
pmd_t *pmd = pmd_offset(pud, 0);
|
|
unsigned long last_map_addr;
|
|
|
|
BUG_ON(!max_pfn_mapped);
|
|
- last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask);
|
|
+ last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask, prot);
|
|
__flush_tlb_all();
|
|
return last_map_addr;
|
|
}
|
|
@@ -513,15 +588,34 @@ phys_pud_init(pud_t *pud_page, unsigned
|
|
unsigned long pmd_phys;
|
|
pud_t *pud = pud_page + pud_index(addr);
|
|
pmd_t *pmd;
|
|
+ pgprot_t prot = PAGE_KERNEL;
|
|
|
|
if (addr >= end)
|
|
break;
|
|
|
|
if (__pud_val(*pud)) {
|
|
- if (!pud_large(*pud))
|
|
+ if (!pud_large(*pud)) {
|
|
last_map_addr = phys_pmd_update(pud, addr, end,
|
|
- page_size_mask);
|
|
- continue;
|
|
+ page_size_mask, prot);
|
|
+ continue;
|
|
+ }
|
|
+ /*
|
|
+ * If we are ok with PG_LEVEL_1G mapping, then we will
|
|
+ * use the existing mapping.
|
|
+ *
|
|
+ * Otherwise, we will split the gbpage mapping but use
|
|
+ * the same existing protection bits except for large
|
|
+ * page, so that we don't violate Intel's TLB
|
|
+ * Application note (317080) which says, while changing
|
|
+ * the page sizes, new and old translations should
|
|
+ * not differ with respect to page frame and
|
|
+ * attributes.
|
|
+ */
|
|
+ if (page_size_mask & (1 << PG_LEVEL_1G)) {
|
|
+ pages++;
|
|
+ continue;
|
|
+ }
|
|
+ prot = pte_pgprot(pte_clrhuge(*(pte_t *)pud));
|
|
}
|
|
|
|
if (page_size_mask & (1<<PG_LEVEL_1G)) {
|
|
@@ -535,7 +629,8 @@ phys_pud_init(pud_t *pud_page, unsigned
|
|
}
|
|
|
|
pmd = alloc_low_page(&pmd_phys);
|
|
- last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask);
|
|
+ last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask,
|
|
+ prot);
|
|
unmap_low_page(pmd);
|
|
|
|
if (!after_bootmem) {
|
|
@@ -554,6 +649,7 @@ phys_pud_init(pud_t *pud_page, unsigned
|
|
}
|
|
}
|
|
__flush_tlb_all();
|
|
+
|
|
update_page_count(PG_LEVEL_1G, pages);
|
|
|
|
return last_map_addr;
|
|
@@ -651,7 +747,8 @@ void __init xen_init_pt(void)
|
|
}
|
|
}
|
|
|
|
-static void __init find_early_table_space(unsigned long end)
|
|
+static void __init find_early_table_space(unsigned long end, int use_pse,
|
|
+ int use_gbpages)
|
|
{
|
|
unsigned long puds, pmds, ptes, tables;
|
|
|
|
@@ -676,6 +773,7 @@ static void __init find_early_table_spac
|
|
*/
|
|
table_start = table_top;
|
|
}
|
|
+ __flush_tlb_all();
|
|
|
|
table_top = table_cur + (tables >> PAGE_SHIFT);
|
|
|
|
@@ -825,11 +923,13 @@ unsigned long __init_refok init_memory_m
|
|
unsigned long last_map_addr = 0;
|
|
unsigned long page_size_mask = 0;
|
|
unsigned long start_pfn, end_pfn;
|
|
+ unsigned long pos;
|
|
|
|
struct map_range mr[NR_RANGE_MR];
|
|
int nr_range, i;
|
|
+ int use_pse, use_gbpages;
|
|
|
|
- printk(KERN_INFO "init_memory_mapping\n");
|
|
+ printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end);
|
|
|
|
/*
|
|
* Find space for the kernel direct mapping tables.
|
|
@@ -841,9 +941,21 @@ unsigned long __init_refok init_memory_m
|
|
if (!after_bootmem)
|
|
init_gbpages();
|
|
|
|
- if (direct_gbpages)
|
|
+#ifdef CONFIG_DEBUG_PAGEALLOC
|
|
+ /*
|
|
+ * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
|
|
+ * This will simplify cpa(), which otherwise needs to support splitting
|
|
+ * large pages into small in interrupt context, etc.
|
|
+ */
|
|
+ use_pse = use_gbpages = 0;
|
|
+#else
|
|
+ use_pse = cpu_has_pse;
|
|
+ use_gbpages = direct_gbpages;
|
|
+#endif
|
|
+
|
|
+ if (use_gbpages)
|
|
page_size_mask |= 1 << PG_LEVEL_1G;
|
|
- if (cpu_has_pse)
|
|
+ if (use_pse)
|
|
page_size_mask |= 1 << PG_LEVEL_2M;
|
|
|
|
memset(mr, 0, sizeof(mr));
|
|
@@ -851,35 +963,50 @@ unsigned long __init_refok init_memory_m
|
|
|
|
/* head if not big page alignment ?*/
|
|
start_pfn = start >> PAGE_SHIFT;
|
|
- end_pfn = ((start + (PMD_SIZE - 1)) >> PMD_SHIFT)
|
|
+ pos = start_pfn << PAGE_SHIFT;
|
|
+ end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT)
|
|
<< (PMD_SHIFT - PAGE_SHIFT);
|
|
- nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
|
|
+ if (start_pfn < end_pfn) {
|
|
+ nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
|
|
+ pos = end_pfn << PAGE_SHIFT;
|
|
+ }
|
|
|
|
/* big page (2M) range*/
|
|
- start_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT)
|
|
+ start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
|
|
<< (PMD_SHIFT - PAGE_SHIFT);
|
|
- end_pfn = ((start + (PUD_SIZE - 1))>>PUD_SHIFT)
|
|
+ end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT)
|
|
<< (PUD_SHIFT - PAGE_SHIFT);
|
|
- if (end_pfn > ((end>>PUD_SHIFT)<<(PUD_SHIFT - PAGE_SHIFT)))
|
|
- end_pfn = ((end>>PUD_SHIFT)<<(PUD_SHIFT - PAGE_SHIFT));
|
|
- nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
|
|
- page_size_mask & (1<<PG_LEVEL_2M));
|
|
+ if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)))
|
|
+ end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT));
|
|
+ if (start_pfn < end_pfn) {
|
|
+ nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
|
|
+ page_size_mask & (1<<PG_LEVEL_2M));
|
|
+ pos = end_pfn << PAGE_SHIFT;
|
|
+ }
|
|
|
|
/* big page (1G) range */
|
|
- start_pfn = end_pfn;
|
|
- end_pfn = (end>>PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT);
|
|
- nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
|
|
+ start_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT)
|
|
+ << (PUD_SHIFT - PAGE_SHIFT);
|
|
+ end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT);
|
|
+ if (start_pfn < end_pfn) {
|
|
+ nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
|
|
page_size_mask &
|
|
((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G)));
|
|
+ pos = end_pfn << PAGE_SHIFT;
|
|
+ }
|
|
|
|
/* tail is not big page (1G) alignment */
|
|
- start_pfn = end_pfn;
|
|
- end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
|
|
- nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
|
|
- page_size_mask & (1<<PG_LEVEL_2M));
|
|
+ start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
|
|
+ << (PMD_SHIFT - PAGE_SHIFT);
|
|
+ end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
|
|
+ if (start_pfn < end_pfn) {
|
|
+ nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
|
|
+ page_size_mask & (1<<PG_LEVEL_2M));
|
|
+ pos = end_pfn << PAGE_SHIFT;
|
|
+ }
|
|
|
|
/* tail is not big page (2M) alignment */
|
|
- start_pfn = end_pfn;
|
|
+ start_pfn = pos>>PAGE_SHIFT;
|
|
end_pfn = end>>PAGE_SHIFT;
|
|
nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
|
|
|
|
@@ -904,7 +1031,7 @@ unsigned long __init_refok init_memory_m
|
|
(mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k"));
|
|
|
|
if (!after_bootmem)
|
|
- find_early_table_space(end);
|
|
+ find_early_table_space(end, use_pse, use_gbpages);
|
|
|
|
if (!start) {
|
|
unsigned long addr, va = __START_KERNEL_map;
|
|
@@ -1015,12 +1142,12 @@ int arch_add_memory(int nid, u64 start,
|
|
unsigned long nr_pages = size >> PAGE_SHIFT;
|
|
int ret;
|
|
|
|
- last_mapped_pfn = init_memory_mapping(start, start + size-1);
|
|
+ last_mapped_pfn = init_memory_mapping(start, start + size);
|
|
if (last_mapped_pfn > max_pfn_mapped)
|
|
max_pfn_mapped = last_mapped_pfn;
|
|
|
|
ret = __add_pages(zone, start_pfn, nr_pages);
|
|
- WARN_ON(1);
|
|
+ WARN_ON_ONCE(ret);
|
|
|
|
return ret;
|
|
}
|
|
@@ -1062,8 +1189,11 @@ static struct kcore_list kcore_mem, kcor
|
|
void __init mem_init(void)
|
|
{
|
|
long codesize, reservedpages, datasize, initsize;
|
|
+ unsigned long absent_pages;
|
|
unsigned long pfn;
|
|
|
|
+ start_periodic_check_for_corruption();
|
|
+
|
|
pci_iommu_alloc();
|
|
|
|
/* clear_bss() already clear the empty_zero_page */
|
|
@@ -1076,13 +1206,15 @@ void __init mem_init(void)
|
|
#else
|
|
totalram_pages = free_all_bootmem();
|
|
#endif
|
|
+
|
|
/* XEN: init pages outside initial allocation. */
|
|
for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
|
|
ClearPageReserved(pfn_to_page(pfn));
|
|
init_page_count(pfn_to_page(pfn));
|
|
}
|
|
- reservedpages = max_pfn - totalram_pages -
|
|
- absent_pages_in_range(0, max_pfn);
|
|
+
|
|
+ absent_pages = absent_pages_in_range(0, max_pfn);
|
|
+ reservedpages = max_pfn - totalram_pages - absent_pages;
|
|
after_bootmem = 1;
|
|
|
|
codesize = (unsigned long) &_etext - (unsigned long) &_text;
|
|
@@ -1099,15 +1231,14 @@ void __init mem_init(void)
|
|
VSYSCALL_END - VSYSCALL_START);
|
|
|
|
printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, "
|
|
- "%ldk reserved, %ldk data, %ldk init)\n",
|
|
+ "%ldk absent, %ldk reserved, %ldk data, %ldk init)\n",
|
|
(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
|
|
max_pfn << (PAGE_SHIFT-10),
|
|
codesize >> 10,
|
|
+ absent_pages << (PAGE_SHIFT-10),
|
|
reservedpages << (PAGE_SHIFT-10),
|
|
datasize >> 10,
|
|
initsize >> 10);
|
|
-
|
|
- cpa_init();
|
|
}
|
|
|
|
void free_init_pages(char *what, unsigned long begin, unsigned long end)
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ head-2011-03-17/arch/x86/mm/iomap_32-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -0,0 +1,61 @@
|
|
+/*
|
|
+ * Copyright © 2008 Ingo Molnar
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify
|
|
+ * it under the terms of the GNU General Public License as published by
|
|
+ * the Free Software Foundation; either version 2 of the License, or
|
|
+ * (at your option) any later version.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful, but
|
|
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
+ * General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License along
|
|
+ * with this program; if not, write to the Free Software Foundation, Inc.,
|
|
+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
|
|
+ */
|
|
+
|
|
+#include <asm/iomap.h>
|
|
+#include <linux/bitops.h>
|
|
+#include <linux/module.h>
|
|
+
|
|
+/* Map 'mfn' using fixed map 'type' and protections 'prot'
|
|
+ */
|
|
+void *
|
|
+iomap_atomic_prot_pfn(unsigned long mfn, enum km_type type, pgprot_t prot)
|
|
+{
|
|
+ enum fixed_addresses idx;
|
|
+ unsigned long vaddr;
|
|
+
|
|
+ pagefault_disable();
|
|
+
|
|
+ idx = type + KM_TYPE_NR*smp_processor_id();
|
|
+ vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
|
|
+ pgprot_val(prot) |= _PAGE_IOMAP;
|
|
+ set_pte_at(&init_mm, vaddr, kmap_pte-idx, pfn_pte_ma(mfn, prot));
|
|
+ /*arch_flush_lazy_mmu_mode()*/;
|
|
+
|
|
+ return (void*) vaddr;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn);
|
|
+
|
|
+void
|
|
+iounmap_atomic(void *kvaddr, enum km_type type)
|
|
+{
|
|
+ unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
|
|
+ enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
|
|
+
|
|
+ /*
|
|
+ * Force other mappings to Oops if they'll try to access this pte
|
|
+ * without first remap it. Keeping stale mappings around is a bad idea
|
|
+ * also, in case the page changes cacheability attributes or becomes
|
|
+ * a protected page in a hypervisor.
|
|
+ */
|
|
+ if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx))
|
|
+ kpte_clear_flush(kmap_pte-idx, vaddr);
|
|
+
|
|
+ /*arch_flush_lazy_mmu_mode();*/
|
|
+ pagefault_enable();
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(iounmap_atomic);
|
|
--- head-2011-03-17.orig/arch/x86/mm/ioremap-xen.c 2011-02-07 15:40:39.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/mm/ioremap-xen.c 2011-02-07 15:41:07.000000000 +0100
|
|
@@ -25,20 +25,51 @@
|
|
|
|
#ifdef CONFIG_X86_64
|
|
|
|
-#ifndef CONFIG_XEN
|
|
+static inline int phys_addr_valid(unsigned long addr)
|
|
+{
|
|
+ return addr < (1UL << boot_cpu_data.x86_phys_bits);
|
|
+}
|
|
+
|
|
+#define phys_base 0
|
|
+
|
|
unsigned long __phys_addr(unsigned long x)
|
|
{
|
|
- if (x >= __START_KERNEL_map)
|
|
- return x - __START_KERNEL_map + phys_base;
|
|
- return x - PAGE_OFFSET;
|
|
+ if (x >= __START_KERNEL_map) {
|
|
+ x -= __START_KERNEL_map;
|
|
+ VIRTUAL_BUG_ON(x >= KERNEL_IMAGE_SIZE);
|
|
+ x += phys_base;
|
|
+ } else {
|
|
+ VIRTUAL_BUG_ON(x < PAGE_OFFSET);
|
|
+ x -= PAGE_OFFSET;
|
|
+ VIRTUAL_BUG_ON(system_state == SYSTEM_BOOTING ? x > MAXMEM :
|
|
+ !phys_addr_valid(x));
|
|
+ }
|
|
+ return x;
|
|
}
|
|
EXPORT_SYMBOL(__phys_addr);
|
|
-#endif
|
|
|
|
-static inline int phys_addr_valid(unsigned long addr)
|
|
+bool __virt_addr_valid(unsigned long x)
|
|
{
|
|
- return addr < (1UL << boot_cpu_data.x86_phys_bits);
|
|
+ if (x >= __START_KERNEL_map) {
|
|
+ x -= __START_KERNEL_map;
|
|
+ if (x >= KERNEL_IMAGE_SIZE)
|
|
+ return false;
|
|
+ x += phys_base;
|
|
+ } else {
|
|
+ if (x < PAGE_OFFSET)
|
|
+ return false;
|
|
+ x -= PAGE_OFFSET;
|
|
+ if (system_state == SYSTEM_BOOTING ?
|
|
+ x > MAXMEM : !phys_addr_valid(x)) {
|
|
+ return false;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return pfn_valid(x >> PAGE_SHIFT);
|
|
}
|
|
+EXPORT_SYMBOL(__virt_addr_valid);
|
|
+
|
|
+#undef phys_base
|
|
|
|
#else
|
|
|
|
@@ -47,6 +78,28 @@ static inline int phys_addr_valid(unsign
|
|
return 1;
|
|
}
|
|
|
|
+#ifdef CONFIG_DEBUG_VIRTUAL
|
|
+unsigned long __phys_addr(unsigned long x)
|
|
+{
|
|
+ /* VMALLOC_* aren't constants; not available at the boot time */
|
|
+ VIRTUAL_BUG_ON(x < PAGE_OFFSET);
|
|
+ VIRTUAL_BUG_ON(system_state != SYSTEM_BOOTING &&
|
|
+ is_vmalloc_addr((void *) x));
|
|
+ return x - PAGE_OFFSET;
|
|
+}
|
|
+EXPORT_SYMBOL(__phys_addr);
|
|
+#endif
|
|
+
|
|
+bool __virt_addr_valid(unsigned long x)
|
|
+{
|
|
+ if (x < PAGE_OFFSET)
|
|
+ return false;
|
|
+ if (system_state != SYSTEM_BOOTING && is_vmalloc_addr((void *) x))
|
|
+ return false;
|
|
+ return pfn_valid((x - PAGE_OFFSET) >> PAGE_SHIFT);
|
|
+}
|
|
+EXPORT_SYMBOL(__virt_addr_valid);
|
|
+
|
|
#endif
|
|
|
|
static int direct_remap_area_pte_fn(pte_t *pte,
|
|
@@ -103,7 +156,7 @@ static int __direct_remap_pfn_range(stru
|
|
* Fill in the machine address: PTE ptr is done later by
|
|
* apply_to_page_range().
|
|
*/
|
|
- pgprot_val(prot) |= _PAGE_IO;
|
|
+ pgprot_val(prot) |= _PAGE_IOMAP;
|
|
v->val = __pte_val(pte_mkspecial(pfn_pte_ma(mfn, prot)));
|
|
|
|
mfn++;
|
|
@@ -221,6 +274,25 @@ int page_is_ram(unsigned long pagenr)
|
|
return 0;
|
|
}
|
|
|
|
+int pagerange_is_ram(unsigned long start, unsigned long end)
|
|
+{
|
|
+ int ram_page = 0, not_rampage = 0;
|
|
+ unsigned long page_nr;
|
|
+
|
|
+ for (page_nr = (start >> PAGE_SHIFT); page_nr < (end >> PAGE_SHIFT);
|
|
+ ++page_nr) {
|
|
+ if (page_is_ram(mfn_to_local_pfn(page_nr)))
|
|
+ ram_page = 1;
|
|
+ else
|
|
+ not_rampage = 1;
|
|
+
|
|
+ if (ram_page == not_rampage)
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ return ram_page;
|
|
+}
|
|
+
|
|
/*
|
|
* Fix up the linear direct mapping of the kernel to avoid cache attribute
|
|
* conflicts.
|
|
@@ -308,6 +380,12 @@ static void __iomem *__ioremap_caller(re
|
|
return (__force void __iomem *)isa_bus_to_virt((unsigned long)phys_addr);
|
|
|
|
/*
|
|
+ * Check if the request spans more than any BAR in the iomem resource
|
|
+ * tree.
|
|
+ */
|
|
+ WARN_ON(iomem_map_sanity_check(phys_addr, size));
|
|
+
|
|
+ /*
|
|
* Don't allow anybody to remap normal RAM that we're using..
|
|
*/
|
|
for (mfn = PFN_DOWN(phys_addr); mfn < PFN_UP(last_addr); mfn++) {
|
|
@@ -362,16 +440,16 @@ static void __iomem *__ioremap_caller(re
|
|
switch (prot_val) {
|
|
case _PAGE_CACHE_UC:
|
|
default:
|
|
- prot = PAGE_KERNEL_NOCACHE;
|
|
+ prot = PAGE_KERNEL_IO_NOCACHE;
|
|
break;
|
|
case _PAGE_CACHE_UC_MINUS:
|
|
- prot = PAGE_KERNEL_UC_MINUS;
|
|
+ prot = PAGE_KERNEL_IO_UC_MINUS;
|
|
break;
|
|
case _PAGE_CACHE_WC:
|
|
- prot = PAGE_KERNEL_WC;
|
|
+ prot = PAGE_KERNEL_IO_WC;
|
|
break;
|
|
case _PAGE_CACHE_WB:
|
|
- prot = PAGE_KERNEL;
|
|
+ prot = PAGE_KERNEL_IO;
|
|
break;
|
|
}
|
|
|
|
@@ -471,7 +549,7 @@ static void __iomem *ioremap_default(res
|
|
unsigned long size)
|
|
{
|
|
unsigned long flags;
|
|
- void *ret;
|
|
+ void __iomem *ret;
|
|
int err;
|
|
|
|
/*
|
|
@@ -483,11 +561,11 @@ static void __iomem *ioremap_default(res
|
|
if (err < 0)
|
|
return NULL;
|
|
|
|
- ret = (void *) __ioremap_caller(phys_addr, size, flags,
|
|
- __builtin_return_address(0));
|
|
+ ret = __ioremap_caller(phys_addr, size, flags,
|
|
+ __builtin_return_address(0));
|
|
|
|
free_memtype(phys_addr, phys_addr + size);
|
|
- return (void __iomem *)ret;
|
|
+ return ret;
|
|
}
|
|
#endif
|
|
|
|
@@ -583,7 +661,7 @@ void unxlate_dev_mem_ptr(unsigned long p
|
|
}
|
|
#endif
|
|
|
|
-int __initdata early_ioremap_debug;
|
|
+static int __initdata early_ioremap_debug;
|
|
|
|
static int __init early_ioremap_debug_setup(char *str)
|
|
{
|
|
@@ -702,12 +780,12 @@ static void __init __early_set_fixmap(en
|
|
}
|
|
|
|
static inline void __init early_set_fixmap(enum fixed_addresses idx,
|
|
- unsigned long phys)
|
|
+ unsigned long phys, pgprot_t prot)
|
|
{
|
|
if (after_paging_init)
|
|
- set_fixmap(idx, phys);
|
|
+ __set_fixmap(idx, phys, prot);
|
|
else
|
|
- __early_set_fixmap(idx, phys, PAGE_KERNEL);
|
|
+ __early_set_fixmap(idx, phys, prot);
|
|
}
|
|
|
|
static inline void __init early_clear_fixmap(enum fixed_addresses idx)
|
|
@@ -718,16 +796,22 @@ static inline void __init early_clear_fi
|
|
__early_set_fixmap(idx, 0, __pgprot(0));
|
|
}
|
|
|
|
-
|
|
-int __initdata early_ioremap_nested;
|
|
-
|
|
+static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata;
|
|
+static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata;
|
|
static int __init check_early_ioremap_leak(void)
|
|
{
|
|
- if (!early_ioremap_nested)
|
|
+ int count = 0;
|
|
+ int i;
|
|
+
|
|
+ for (i = 0; i < FIX_BTMAPS_SLOTS; i++)
|
|
+ if (prev_map[i])
|
|
+ count++;
|
|
+
|
|
+ if (!count)
|
|
return 0;
|
|
WARN(1, KERN_WARNING
|
|
"Debug warning: early ioremap leak of %d areas detected.\n",
|
|
- early_ioremap_nested);
|
|
+ count);
|
|
printk(KERN_WARNING
|
|
"please boot with early_ioremap_debug and report the dmesg.\n");
|
|
|
|
@@ -735,18 +819,33 @@ static int __init check_early_ioremap_le
|
|
}
|
|
late_initcall(check_early_ioremap_leak);
|
|
|
|
-void __init *early_ioremap(unsigned long phys_addr, unsigned long size)
|
|
+static void __init __iomem *__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot)
|
|
{
|
|
unsigned long offset, last_addr;
|
|
- unsigned int nrpages, nesting;
|
|
+ unsigned int nrpages;
|
|
enum fixed_addresses idx0, idx;
|
|
+ int i, slot;
|
|
|
|
WARN_ON(system_state != SYSTEM_BOOTING);
|
|
|
|
- nesting = early_ioremap_nested;
|
|
+ slot = -1;
|
|
+ for (i = 0; i < FIX_BTMAPS_SLOTS; i++) {
|
|
+ if (!prev_map[i]) {
|
|
+ slot = i;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (slot < 0) {
|
|
+ printk(KERN_INFO "early_iomap(%08lx, %08lx) not found slot\n",
|
|
+ phys_addr, size);
|
|
+ WARN_ON(1);
|
|
+ return NULL;
|
|
+ }
|
|
+
|
|
if (early_ioremap_debug) {
|
|
printk(KERN_INFO "early_ioremap(%08lx, %08lx) [%d] => ",
|
|
- phys_addr, size, nesting);
|
|
+ phys_addr, size, slot);
|
|
dump_stack();
|
|
}
|
|
|
|
@@ -757,17 +856,13 @@ void __init *early_ioremap(unsigned long
|
|
return NULL;
|
|
}
|
|
|
|
- if (nesting >= FIX_BTMAPS_NESTING) {
|
|
- WARN_ON(1);
|
|
- return NULL;
|
|
- }
|
|
- early_ioremap_nested++;
|
|
+ prev_size[slot] = size;
|
|
/*
|
|
* Mappings have to be page-aligned
|
|
*/
|
|
offset = phys_addr & ~PAGE_MASK;
|
|
phys_addr &= PAGE_MASK;
|
|
- size = PAGE_ALIGN(last_addr) - phys_addr;
|
|
+ size = PAGE_ALIGN(last_addr + 1) - phys_addr;
|
|
|
|
/*
|
|
* Mappings have to fit in the FIX_BTMAP area.
|
|
@@ -781,10 +876,10 @@ void __init *early_ioremap(unsigned long
|
|
/*
|
|
* Ok, go for it..
|
|
*/
|
|
- idx0 = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*nesting;
|
|
+ idx0 = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot;
|
|
idx = idx0;
|
|
while (nrpages > 0) {
|
|
- early_set_fixmap(idx, phys_addr);
|
|
+ early_set_fixmap(idx, phys_addr, prot);
|
|
phys_addr += PAGE_SIZE;
|
|
--idx;
|
|
--nrpages;
|
|
@@ -792,24 +887,55 @@ void __init *early_ioremap(unsigned long
|
|
if (early_ioremap_debug)
|
|
printk(KERN_CONT "%08lx + %08lx\n", offset, fix_to_virt(idx0));
|
|
|
|
- return (void *) (offset + fix_to_virt(idx0));
|
|
+ prev_map[slot] = (void __iomem *)(offset + fix_to_virt(idx0));
|
|
+ return prev_map[slot];
|
|
}
|
|
|
|
-void __init early_iounmap(void *addr, unsigned long size)
|
|
+/* Remap an IO device */
|
|
+void __init __iomem *early_ioremap(unsigned long phys_addr, unsigned long size)
|
|
+{
|
|
+ return __early_ioremap(phys_addr, size, PAGE_KERNEL_IO);
|
|
+}
|
|
+
|
|
+/* Remap memory */
|
|
+void __init __iomem *early_memremap(unsigned long phys_addr, unsigned long size)
|
|
+{
|
|
+ return __early_ioremap(phys_to_machine(phys_addr), size, PAGE_KERNEL);
|
|
+}
|
|
+
|
|
+void __init early_iounmap(void __iomem *addr, unsigned long size)
|
|
{
|
|
unsigned long virt_addr;
|
|
unsigned long offset;
|
|
unsigned int nrpages;
|
|
enum fixed_addresses idx;
|
|
- int nesting;
|
|
+ int i, slot;
|
|
|
|
- nesting = --early_ioremap_nested;
|
|
- if (WARN_ON(nesting < 0))
|
|
+ slot = -1;
|
|
+ for (i = 0; i < FIX_BTMAPS_SLOTS; i++) {
|
|
+ if (prev_map[i] == addr) {
|
|
+ slot = i;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if (slot < 0) {
|
|
+ printk(KERN_INFO "early_iounmap(%p, %08lx) not found slot\n",
|
|
+ addr, size);
|
|
+ WARN_ON(1);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ if (prev_size[slot] != size) {
|
|
+ printk(KERN_INFO "early_iounmap(%p, %08lx) [%d] size not consistent %08lx\n",
|
|
+ addr, size, slot, prev_size[slot]);
|
|
+ WARN_ON(1);
|
|
return;
|
|
+ }
|
|
|
|
if (early_ioremap_debug) {
|
|
printk(KERN_INFO "early_iounmap(%p, %08lx) [%d]\n", addr,
|
|
- size, nesting);
|
|
+ size, slot);
|
|
dump_stack();
|
|
}
|
|
|
|
@@ -821,12 +947,13 @@ void __init early_iounmap(void *addr, un
|
|
offset = virt_addr & ~PAGE_MASK;
|
|
nrpages = PAGE_ALIGN(offset + size - 1) >> PAGE_SHIFT;
|
|
|
|
- idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*nesting;
|
|
+ idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot;
|
|
while (nrpages > 0) {
|
|
early_clear_fixmap(idx);
|
|
--idx;
|
|
--nrpages;
|
|
}
|
|
+ prev_map[slot] = NULL;
|
|
}
|
|
|
|
void __this_fixmap_does_not_exist(void)
|
|
--- head-2011-03-17.orig/arch/x86/mm/pageattr-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/mm/pageattr-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -25,15 +25,27 @@
|
|
* The current flushing context - we pass it instead of 5 arguments:
|
|
*/
|
|
struct cpa_data {
|
|
- unsigned long vaddr;
|
|
+ unsigned long *vaddr;
|
|
pgprot_t mask_set;
|
|
pgprot_t mask_clr;
|
|
int numpages;
|
|
- int flushtlb;
|
|
+ int flags;
|
|
unsigned long pfn;
|
|
unsigned force_split : 1;
|
|
+ int curpage;
|
|
};
|
|
|
|
+/*
|
|
+ * Serialize cpa() (for !DEBUG_PAGEALLOC which uses large identity mappings)
|
|
+ * using cpa_lock. So that we don't allow any other cpu, with stale large tlb
|
|
+ * entries change the page attribute in parallel to some other cpu
|
|
+ * splitting a large page entry along with changing the attribute.
|
|
+ */
|
|
+static DEFINE_SPINLOCK(cpa_lock);
|
|
+
|
|
+#define CPA_FLUSHTLB 1
|
|
+#define CPA_ARRAY 2
|
|
+
|
|
#ifdef CONFIG_PROC_FS
|
|
static unsigned long direct_pages_count[PG_LEVEL_NUM];
|
|
|
|
@@ -53,23 +65,22 @@ static void split_page_count(int level)
|
|
direct_pages_count[level - 1] += PTRS_PER_PTE;
|
|
}
|
|
|
|
-int arch_report_meminfo(char *page)
|
|
+void arch_report_meminfo(struct seq_file *m)
|
|
{
|
|
- int n = sprintf(page, "DirectMap4k: %8lu kB\n",
|
|
+ seq_printf(m, "DirectMap4k: %8lu kB\n",
|
|
direct_pages_count[PG_LEVEL_4K] << 2);
|
|
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
|
|
- n += sprintf(page + n, "DirectMap2M: %8lu kB\n",
|
|
+ seq_printf(m, "DirectMap2M: %8lu kB\n",
|
|
direct_pages_count[PG_LEVEL_2M] << 11);
|
|
#else
|
|
- n += sprintf(page + n, "DirectMap4M: %8lu kB\n",
|
|
+ seq_printf(m, "DirectMap4M: %8lu kB\n",
|
|
direct_pages_count[PG_LEVEL_2M] << 12);
|
|
#endif
|
|
#ifdef CONFIG_X86_64
|
|
if (direct_gbpages)
|
|
- n += sprintf(page + n, "DirectMap1G: %8lu kB\n",
|
|
+ seq_printf(m, "DirectMap1G: %8lu kB\n",
|
|
direct_pages_count[PG_LEVEL_1G] << 20);
|
|
#endif
|
|
- return n;
|
|
}
|
|
#else
|
|
static inline void split_page_count(int level) { }
|
|
@@ -84,7 +95,7 @@ static inline unsigned long highmap_star
|
|
|
|
static inline unsigned long highmap_end_pfn(void)
|
|
{
|
|
- return __pa(round_up((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT;
|
|
+ return __pa(roundup((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT;
|
|
}
|
|
|
|
#endif
|
|
@@ -190,6 +201,41 @@ static void cpa_flush_range(unsigned lon
|
|
}
|
|
}
|
|
|
|
+static void cpa_flush_array(unsigned long *start, int numpages, int cache)
|
|
+{
|
|
+ unsigned int i, level;
|
|
+ unsigned long *addr;
|
|
+
|
|
+ BUG_ON(irqs_disabled());
|
|
+
|
|
+ on_each_cpu(__cpa_flush_range, NULL, 1);
|
|
+
|
|
+ if (!cache)
|
|
+ return;
|
|
+
|
|
+ /* 4M threshold */
|
|
+ if (numpages >= 1024) {
|
|
+ if (boot_cpu_data.x86_model >= 4)
|
|
+ wbinvd();
|
|
+ return;
|
|
+ }
|
|
+ /*
|
|
+ * We only need to flush on one CPU,
|
|
+ * clflush is a MESI-coherent instruction that
|
|
+ * will cause all other CPUs to flush the same
|
|
+ * cachelines:
|
|
+ */
|
|
+ for (i = 0, addr = start; i < numpages; i++, addr++) {
|
|
+ pte_t *pte = lookup_address(*addr, &level);
|
|
+
|
|
+ /*
|
|
+ * Only flush present addresses:
|
|
+ */
|
|
+ if (pte && (__pte_val(*pte) & _PAGE_PRESENT))
|
|
+ clflush_cache_range((void *) *addr, PAGE_SIZE);
|
|
+ }
|
|
+}
|
|
+
|
|
/*
|
|
* Certain areas of memory on x86 require very specific protection flags,
|
|
* for example the BIOS area or kernel text. Callers don't always get this
|
|
@@ -414,7 +460,7 @@ try_preserve_large_page(pte_t *kpte, uns
|
|
*/
|
|
new_pte = pfn_pte_ma(__pte_mfn(old_pte), canon_pgprot(new_prot));
|
|
__set_pmd_pte(kpte, address, level, new_pte);
|
|
- cpa->flushtlb = 1;
|
|
+ cpa->flags |= CPA_FLUSHTLB;
|
|
do_split = 0;
|
|
}
|
|
|
|
@@ -424,84 +470,6 @@ out_unlock:
|
|
return do_split;
|
|
}
|
|
|
|
-static LIST_HEAD(page_pool);
|
|
-static unsigned long pool_size, pool_pages, pool_low;
|
|
-static unsigned long pool_used, pool_failed;
|
|
-
|
|
-static void cpa_fill_pool(struct page **ret)
|
|
-{
|
|
- gfp_t gfp = GFP_KERNEL;
|
|
- unsigned long flags;
|
|
- struct page *p;
|
|
-
|
|
- /*
|
|
- * Avoid recursion (on debug-pagealloc) and also signal
|
|
- * our priority to get to these pagetables:
|
|
- */
|
|
- if (current->flags & PF_MEMALLOC)
|
|
- return;
|
|
- current->flags |= PF_MEMALLOC;
|
|
-
|
|
- /*
|
|
- * Allocate atomically from atomic contexts:
|
|
- */
|
|
- if (in_atomic() || irqs_disabled() || debug_pagealloc)
|
|
- gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN;
|
|
-
|
|
- while (pool_pages < pool_size || (ret && !*ret)) {
|
|
- p = alloc_pages(gfp, 0);
|
|
- if (!p) {
|
|
- pool_failed++;
|
|
- break;
|
|
- }
|
|
- /*
|
|
- * If the call site needs a page right now, provide it:
|
|
- */
|
|
- if (ret && !*ret) {
|
|
- *ret = p;
|
|
- continue;
|
|
- }
|
|
- spin_lock_irqsave(&pgd_lock, flags);
|
|
- list_add(&p->lru, &page_pool);
|
|
- pool_pages++;
|
|
- spin_unlock_irqrestore(&pgd_lock, flags);
|
|
- }
|
|
-
|
|
- current->flags &= ~PF_MEMALLOC;
|
|
-}
|
|
-
|
|
-#define SHIFT_MB (20 - PAGE_SHIFT)
|
|
-#define ROUND_MB_GB ((1 << 10) - 1)
|
|
-#define SHIFT_MB_GB 10
|
|
-#define POOL_PAGES_PER_GB 16
|
|
-
|
|
-void __init cpa_init(void)
|
|
-{
|
|
- struct sysinfo si;
|
|
- unsigned long gb;
|
|
-
|
|
- si_meminfo(&si);
|
|
- /*
|
|
- * Calculate the number of pool pages:
|
|
- *
|
|
- * Convert totalram (nr of pages) to MiB and round to the next
|
|
- * GiB. Shift MiB to Gib and multiply the result by
|
|
- * POOL_PAGES_PER_GB:
|
|
- */
|
|
- if (debug_pagealloc) {
|
|
- gb = ((si.totalram >> SHIFT_MB) + ROUND_MB_GB) >> SHIFT_MB_GB;
|
|
- pool_size = POOL_PAGES_PER_GB * gb;
|
|
- } else {
|
|
- pool_size = 1;
|
|
- }
|
|
- pool_low = pool_size;
|
|
-
|
|
- cpa_fill_pool(NULL);
|
|
- printk(KERN_DEBUG
|
|
- "CPA: page pool initialized %lu of %lu pages preallocated\n",
|
|
- pool_pages, pool_size);
|
|
-}
|
|
-
|
|
static int split_large_page(pte_t *kpte, unsigned long address)
|
|
{
|
|
unsigned long flags, mfn, mfninc = 1;
|
|
@@ -510,28 +478,15 @@ static int split_large_page(pte_t *kpte,
|
|
pgprot_t ref_prot;
|
|
struct page *base;
|
|
|
|
- /*
|
|
- * Get a page from the pool. The pool list is protected by the
|
|
- * pgd_lock, which we have to take anyway for the split
|
|
- * operation:
|
|
- */
|
|
- spin_lock_irqsave(&pgd_lock, flags);
|
|
- if (list_empty(&page_pool)) {
|
|
- spin_unlock_irqrestore(&pgd_lock, flags);
|
|
- base = NULL;
|
|
- cpa_fill_pool(&base);
|
|
- if (!base)
|
|
- return -ENOMEM;
|
|
- spin_lock_irqsave(&pgd_lock, flags);
|
|
- } else {
|
|
- base = list_first_entry(&page_pool, struct page, lru);
|
|
- list_del(&base->lru);
|
|
- pool_pages--;
|
|
-
|
|
- if (pool_pages < pool_low)
|
|
- pool_low = pool_pages;
|
|
- }
|
|
+ if (!debug_pagealloc)
|
|
+ spin_unlock(&cpa_lock);
|
|
+ base = alloc_pages(GFP_KERNEL, 0);
|
|
+ if (!debug_pagealloc)
|
|
+ spin_lock(&cpa_lock);
|
|
+ if (!base)
|
|
+ return -ENOMEM;
|
|
|
|
+ spin_lock_irqsave(&pgd_lock, flags);
|
|
/*
|
|
* Check for races, another CPU might have split this page
|
|
* up for us already:
|
|
@@ -592,11 +547,8 @@ out_unlock:
|
|
* If we dropped out via the lookup_address check under
|
|
* pgd_lock then stick the page back into the pool:
|
|
*/
|
|
- if (base) {
|
|
- list_add(&base->lru, &page_pool);
|
|
- pool_pages++;
|
|
- } else
|
|
- pool_used++;
|
|
+ if (base)
|
|
+ __free_page(base);
|
|
spin_unlock_irqrestore(&pgd_lock, flags);
|
|
|
|
return 0;
|
|
@@ -604,11 +556,16 @@ out_unlock:
|
|
|
|
static int __change_page_attr(struct cpa_data *cpa, int primary)
|
|
{
|
|
- unsigned long address = cpa->vaddr;
|
|
+ unsigned long address;
|
|
int do_split, err;
|
|
unsigned int level;
|
|
pte_t *kpte, old_pte;
|
|
|
|
+ if (cpa->flags & CPA_ARRAY)
|
|
+ address = cpa->vaddr[cpa->curpage];
|
|
+ else
|
|
+ address = *cpa->vaddr;
|
|
+
|
|
repeat:
|
|
kpte = lookup_address(address, &level);
|
|
if (!kpte)
|
|
@@ -620,7 +577,7 @@ repeat:
|
|
return 0;
|
|
WARN(1, KERN_WARNING "CPA: called for zero pte. "
|
|
"vaddr = %lx cpa->vaddr = %lx\n", address,
|
|
- cpa->vaddr);
|
|
+ *cpa->vaddr);
|
|
return -EINVAL;
|
|
}
|
|
|
|
@@ -647,7 +604,7 @@ repeat:
|
|
*/
|
|
if (__pte_val(old_pte) != __pte_val(new_pte)) {
|
|
set_pte_atomic(kpte, new_pte);
|
|
- cpa->flushtlb = 1;
|
|
+ cpa->flags |= CPA_FLUSHTLB;
|
|
}
|
|
cpa->numpages = 1;
|
|
return 0;
|
|
@@ -671,7 +628,25 @@ repeat:
|
|
*/
|
|
err = split_large_page(kpte, address);
|
|
if (!err) {
|
|
- cpa->flushtlb = 1;
|
|
+ /*
|
|
+ * Do a global flush tlb after splitting the large page
|
|
+ * and before we do the actual change page attribute in the PTE.
|
|
+ *
|
|
+ * With out this, we violate the TLB application note, that says
|
|
+ * "The TLBs may contain both ordinary and large-page
|
|
+ * translations for a 4-KByte range of linear addresses. This
|
|
+ * may occur if software modifies the paging structures so that
|
|
+ * the page size used for the address range changes. If the two
|
|
+ * translations differ with respect to page frame or attributes
|
|
+ * (e.g., permissions), processor behavior is undefined and may
|
|
+ * be implementation-specific."
|
|
+ *
|
|
+ * We do this global tlb flush inside the cpa_lock, so that we
|
|
+ * don't allow any other cpu, with stale tlb entries change the
|
|
+ * page attribute in parallel, that also falls into the
|
|
+ * just split large page entry.
|
|
+ */
|
|
+ flush_tlb_all();
|
|
goto repeat;
|
|
}
|
|
|
|
@@ -684,6 +659,7 @@ static int cpa_process_alias(struct cpa_
|
|
{
|
|
struct cpa_data alias_cpa;
|
|
int ret = 0;
|
|
+ unsigned long temp_cpa_vaddr, vaddr;
|
|
|
|
if (cpa->pfn >= max_pfn_mapped)
|
|
return 0;
|
|
@@ -696,16 +672,24 @@ static int cpa_process_alias(struct cpa_
|
|
* No need to redo, when the primary call touched the direct
|
|
* mapping already:
|
|
*/
|
|
- if (!(within(cpa->vaddr, PAGE_OFFSET,
|
|
+ if (cpa->flags & CPA_ARRAY)
|
|
+ vaddr = cpa->vaddr[cpa->curpage];
|
|
+ else
|
|
+ vaddr = *cpa->vaddr;
|
|
+
|
|
+ if (!(within(vaddr, PAGE_OFFSET,
|
|
PAGE_OFFSET + (max_low_pfn_mapped << PAGE_SHIFT))
|
|
#ifdef CONFIG_X86_64
|
|
- || within(cpa->vaddr, PAGE_OFFSET + (1UL<<32),
|
|
+ || within(vaddr, PAGE_OFFSET + (1UL<<32),
|
|
PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))
|
|
#endif
|
|
)) {
|
|
|
|
alias_cpa = *cpa;
|
|
- alias_cpa.vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
|
|
+ temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
|
|
+ alias_cpa.vaddr = &temp_cpa_vaddr;
|
|
+ alias_cpa.flags &= ~CPA_ARRAY;
|
|
+
|
|
|
|
ret = __change_page_attr_set_clr(&alias_cpa, 0);
|
|
}
|
|
@@ -717,7 +701,7 @@ static int cpa_process_alias(struct cpa_
|
|
* No need to redo, when the primary call touched the high
|
|
* mapping already:
|
|
*/
|
|
- if (within(cpa->vaddr, (unsigned long) _text, (unsigned long) _end))
|
|
+ if (within(vaddr, (unsigned long) _text, (unsigned long) _end))
|
|
return 0;
|
|
|
|
/*
|
|
@@ -728,8 +712,9 @@ static int cpa_process_alias(struct cpa_
|
|
return 0;
|
|
|
|
alias_cpa = *cpa;
|
|
- alias_cpa.vaddr =
|
|
- (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map;
|
|
+ temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map;
|
|
+ alias_cpa.vaddr = &temp_cpa_vaddr;
|
|
+ alias_cpa.flags &= ~CPA_ARRAY;
|
|
|
|
/*
|
|
* The high mapping range is imprecise, so ignore the return value.
|
|
@@ -749,8 +734,15 @@ static int __change_page_attr_set_clr(st
|
|
* preservation check.
|
|
*/
|
|
cpa->numpages = numpages;
|
|
+ /* for array changes, we can't use large page */
|
|
+ if (cpa->flags & CPA_ARRAY)
|
|
+ cpa->numpages = 1;
|
|
|
|
+ if (!debug_pagealloc)
|
|
+ spin_lock(&cpa_lock);
|
|
ret = __change_page_attr(cpa, checkalias);
|
|
+ if (!debug_pagealloc)
|
|
+ spin_unlock(&cpa_lock);
|
|
if (ret)
|
|
return ret;
|
|
|
|
@@ -767,7 +759,11 @@ static int __change_page_attr_set_clr(st
|
|
*/
|
|
BUG_ON(cpa->numpages > numpages);
|
|
numpages -= cpa->numpages;
|
|
- cpa->vaddr += cpa->numpages * PAGE_SIZE;
|
|
+ if (cpa->flags & CPA_ARRAY)
|
|
+ cpa->curpage++;
|
|
+ else
|
|
+ *cpa->vaddr += cpa->numpages * PAGE_SIZE;
|
|
+
|
|
}
|
|
return 0;
|
|
}
|
|
@@ -778,9 +774,9 @@ static inline int cache_attr(pgprot_t at
|
|
(_PAGE_PAT | _PAGE_PAT_LARGE | _PAGE_PWT | _PAGE_PCD);
|
|
}
|
|
|
|
-static int change_page_attr_set_clr(unsigned long addr, int numpages,
|
|
+static int change_page_attr_set_clr(unsigned long *addr, int numpages,
|
|
pgprot_t mask_set, pgprot_t mask_clr,
|
|
- int force_split)
|
|
+ int force_split, int array)
|
|
{
|
|
struct cpa_data cpa;
|
|
int ret, cache, checkalias;
|
|
@@ -795,21 +791,40 @@ static int change_page_attr_set_clr(unsi
|
|
return 0;
|
|
|
|
/* Ensure we are PAGE_SIZE aligned */
|
|
- if (addr & ~PAGE_MASK) {
|
|
- addr &= PAGE_MASK;
|
|
- /*
|
|
- * People should not be passing in unaligned addresses:
|
|
- */
|
|
- WARN_ON_ONCE(1);
|
|
+ if (!array) {
|
|
+ if (*addr & ~PAGE_MASK) {
|
|
+ *addr &= PAGE_MASK;
|
|
+ /*
|
|
+ * People should not be passing in unaligned addresses:
|
|
+ */
|
|
+ WARN_ON_ONCE(1);
|
|
+ }
|
|
+ } else {
|
|
+ int i;
|
|
+ for (i = 0; i < numpages; i++) {
|
|
+ if (addr[i] & ~PAGE_MASK) {
|
|
+ addr[i] &= PAGE_MASK;
|
|
+ WARN_ON_ONCE(1);
|
|
+ }
|
|
+ }
|
|
}
|
|
|
|
+ /* Must avoid aliasing mappings in the highmem code */
|
|
+ kmap_flush_unused();
|
|
+
|
|
+ vm_unmap_aliases();
|
|
+
|
|
cpa.vaddr = addr;
|
|
cpa.numpages = numpages;
|
|
cpa.mask_set = mask_set;
|
|
cpa.mask_clr = mask_clr;
|
|
- cpa.flushtlb = 0;
|
|
+ cpa.flags = 0;
|
|
+ cpa.curpage = 0;
|
|
cpa.force_split = force_split;
|
|
|
|
+ if (array)
|
|
+ cpa.flags |= CPA_ARRAY;
|
|
+
|
|
/* No alias checking for _NX bit modifications */
|
|
checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX;
|
|
|
|
@@ -818,7 +833,7 @@ static int change_page_attr_set_clr(unsi
|
|
/*
|
|
* Check whether we really changed something:
|
|
*/
|
|
- if (!cpa.flushtlb)
|
|
+ if (!(cpa.flags & CPA_FLUSHTLB))
|
|
goto out;
|
|
|
|
/*
|
|
@@ -833,27 +848,30 @@ static int change_page_attr_set_clr(unsi
|
|
* error case we fall back to cpa_flush_all (which uses
|
|
* wbindv):
|
|
*/
|
|
- if (!ret && cpu_has_clflush)
|
|
- cpa_flush_range(addr, numpages, cache);
|
|
- else
|
|
+ if (!ret && cpu_has_clflush) {
|
|
+ if (cpa.flags & CPA_ARRAY)
|
|
+ cpa_flush_array(addr, numpages, cache);
|
|
+ else
|
|
+ cpa_flush_range(*addr, numpages, cache);
|
|
+ } else
|
|
cpa_flush_all(cache);
|
|
|
|
out:
|
|
- cpa_fill_pool(NULL);
|
|
-
|
|
return ret;
|
|
}
|
|
|
|
-static inline int change_page_attr_set(unsigned long addr, int numpages,
|
|
- pgprot_t mask)
|
|
+static inline int change_page_attr_set(unsigned long *addr, int numpages,
|
|
+ pgprot_t mask, int array)
|
|
{
|
|
- return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0);
|
|
+ return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0,
|
|
+ array);
|
|
}
|
|
|
|
-static inline int change_page_attr_clear(unsigned long addr, int numpages,
|
|
- pgprot_t mask)
|
|
+static inline int change_page_attr_clear(unsigned long *addr, int numpages,
|
|
+ pgprot_t mask, int array)
|
|
{
|
|
- return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0);
|
|
+ return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0,
|
|
+ array);
|
|
}
|
|
|
|
#ifdef CONFIG_XEN
|
|
@@ -906,8 +924,8 @@ int _set_memory_uc(unsigned long addr, i
|
|
/*
|
|
* for now UC MINUS. see comments in ioremap_nocache()
|
|
*/
|
|
- return change_page_attr_set(addr, numpages,
|
|
- __pgprot(_PAGE_CACHE_UC_MINUS));
|
|
+ return change_page_attr_set(&addr, numpages,
|
|
+ __pgprot(_PAGE_CACHE_UC_MINUS), 0);
|
|
}
|
|
|
|
int set_memory_uc(unsigned long addr, int numpages)
|
|
@@ -923,10 +941,48 @@ int set_memory_uc(unsigned long addr, in
|
|
}
|
|
EXPORT_SYMBOL(set_memory_uc);
|
|
|
|
+int set_memory_array_uc(unsigned long *addr, int addrinarray)
|
|
+{
|
|
+ unsigned long start;
|
|
+ unsigned long end;
|
|
+ int i;
|
|
+ /*
|
|
+ * for now UC MINUS. see comments in ioremap_nocache()
|
|
+ */
|
|
+ for (i = 0; i < addrinarray; i++) {
|
|
+ start = __pa(addr[i]);
|
|
+ for (end = start + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) {
|
|
+ if (end != __pa(addr[i + 1]))
|
|
+ break;
|
|
+ i++;
|
|
+ }
|
|
+ if (reserve_memtype(start, end, _PAGE_CACHE_UC_MINUS, NULL))
|
|
+ goto out;
|
|
+ }
|
|
+
|
|
+ return change_page_attr_set(addr, addrinarray,
|
|
+ __pgprot(_PAGE_CACHE_UC_MINUS), 1);
|
|
+out:
|
|
+ for (i = 0; i < addrinarray; i++) {
|
|
+ unsigned long tmp = __pa(addr[i]);
|
|
+
|
|
+ if (tmp == start)
|
|
+ break;
|
|
+ for (end = tmp + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) {
|
|
+ if (end != __pa(addr[i + 1]))
|
|
+ break;
|
|
+ i++;
|
|
+ }
|
|
+ free_memtype(tmp, end);
|
|
+ }
|
|
+ return -EINVAL;
|
|
+}
|
|
+EXPORT_SYMBOL(set_memory_array_uc);
|
|
+
|
|
int _set_memory_wc(unsigned long addr, int numpages)
|
|
{
|
|
- return change_page_attr_set(addr, numpages,
|
|
- __pgprot(_PAGE_CACHE_WC));
|
|
+ return change_page_attr_set(&addr, numpages,
|
|
+ __pgprot(_PAGE_CACHE_WC), 0);
|
|
}
|
|
|
|
int set_memory_wc(unsigned long addr, int numpages)
|
|
@@ -944,8 +1000,8 @@ EXPORT_SYMBOL(set_memory_wc);
|
|
|
|
int _set_memory_wb(unsigned long addr, int numpages)
|
|
{
|
|
- return change_page_attr_clear(addr, numpages,
|
|
- __pgprot(_PAGE_CACHE_MASK));
|
|
+ return change_page_attr_clear(&addr, numpages,
|
|
+ __pgprot(_PAGE_CACHE_MASK), 0);
|
|
}
|
|
|
|
int set_memory_wb(unsigned long addr, int numpages)
|
|
@@ -956,37 +1012,59 @@ int set_memory_wb(unsigned long addr, in
|
|
}
|
|
EXPORT_SYMBOL(set_memory_wb);
|
|
|
|
+int set_memory_array_wb(unsigned long *addr, int addrinarray)
|
|
+{
|
|
+ int i;
|
|
+
|
|
+ for (i = 0; i < addrinarray; i++) {
|
|
+ unsigned long start = __pa(addr[i]);
|
|
+ unsigned long end;
|
|
+
|
|
+ for (end = start + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) {
|
|
+ if (end != __pa(addr[i + 1]))
|
|
+ break;
|
|
+ i++;
|
|
+ }
|
|
+ free_memtype(start, end);
|
|
+ }
|
|
+ return change_page_attr_clear(addr, addrinarray,
|
|
+ __pgprot(_PAGE_CACHE_MASK), 1);
|
|
+}
|
|
+EXPORT_SYMBOL(set_memory_array_wb);
|
|
+
|
|
int set_memory_x(unsigned long addr, int numpages)
|
|
{
|
|
- return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_NX));
|
|
+ return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_NX), 0);
|
|
}
|
|
EXPORT_SYMBOL(set_memory_x);
|
|
|
|
int set_memory_nx(unsigned long addr, int numpages)
|
|
{
|
|
- return change_page_attr_set(addr, numpages, __pgprot(_PAGE_NX));
|
|
+ return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_NX), 0);
|
|
}
|
|
EXPORT_SYMBOL(set_memory_nx);
|
|
|
|
int set_memory_ro(unsigned long addr, int numpages)
|
|
{
|
|
- return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_RW));
|
|
+ return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW), 0);
|
|
}
|
|
+EXPORT_SYMBOL_GPL(set_memory_ro);
|
|
|
|
int set_memory_rw(unsigned long addr, int numpages)
|
|
{
|
|
- return change_page_attr_set(addr, numpages, __pgprot(_PAGE_RW));
|
|
+ return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_RW), 0);
|
|
}
|
|
+EXPORT_SYMBOL_GPL(set_memory_rw);
|
|
|
|
int set_memory_np(unsigned long addr, int numpages)
|
|
{
|
|
- return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_PRESENT));
|
|
+ return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_PRESENT), 0);
|
|
}
|
|
|
|
int set_memory_4k(unsigned long addr, int numpages)
|
|
{
|
|
- return change_page_attr_set_clr(addr, numpages, __pgprot(0),
|
|
- __pgprot(0), 1);
|
|
+ return change_page_attr_set_clr(&addr, numpages, __pgprot(0),
|
|
+ __pgprot(0), 1, 0);
|
|
}
|
|
|
|
int set_pages_uc(struct page *page, int numpages)
|
|
@@ -1039,22 +1117,38 @@ int set_pages_rw(struct page *page, int
|
|
|
|
static int __set_pages_p(struct page *page, int numpages)
|
|
{
|
|
- struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page),
|
|
+ unsigned long tempaddr = (unsigned long) page_address(page);
|
|
+ struct cpa_data cpa = { .vaddr = &tempaddr,
|
|
.numpages = numpages,
|
|
.mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW),
|
|
- .mask_clr = __pgprot(0)};
|
|
+ .mask_clr = __pgprot(0),
|
|
+ .flags = 0};
|
|
|
|
- return __change_page_attr_set_clr(&cpa, 1);
|
|
+ /*
|
|
+ * No alias checking needed for setting present flag. otherwise,
|
|
+ * we may need to break large pages for 64-bit kernel text
|
|
+ * mappings (this adds to complexity if we want to do this from
|
|
+ * atomic context especially). Let's keep it simple!
|
|
+ */
|
|
+ return __change_page_attr_set_clr(&cpa, 0);
|
|
}
|
|
|
|
static int __set_pages_np(struct page *page, int numpages)
|
|
{
|
|
- struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page),
|
|
+ unsigned long tempaddr = (unsigned long) page_address(page);
|
|
+ struct cpa_data cpa = { .vaddr = &tempaddr,
|
|
.numpages = numpages,
|
|
.mask_set = __pgprot(0),
|
|
- .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW)};
|
|
+ .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW),
|
|
+ .flags = 0};
|
|
|
|
- return __change_page_attr_set_clr(&cpa, 1);
|
|
+ /*
|
|
+ * No alias checking needed for setting not present flag. otherwise,
|
|
+ * we may need to break large pages for 64-bit kernel text
|
|
+ * mappings (this adds to complexity if we want to do this from
|
|
+ * atomic context especially). Let's keep it simple!
|
|
+ */
|
|
+ return __change_page_attr_set_clr(&cpa, 0);
|
|
}
|
|
|
|
void kernel_map_pages(struct page *page, int numpages, int enable)
|
|
@@ -1074,11 +1168,8 @@ void kernel_map_pages(struct page *page,
|
|
|
|
/*
|
|
* The return value is ignored as the calls cannot fail.
|
|
- * Large pages are kept enabled at boot time, and are
|
|
- * split up quickly with DEBUG_PAGEALLOC. If a splitup
|
|
- * fails here (due to temporary memory shortage) no damage
|
|
- * is done because we just keep the largepage intact up
|
|
- * to the next attempt when it will likely be split up:
|
|
+ * Large pages for identity mappings are not used at boot time
|
|
+ * and hence no memory allocations during large page split.
|
|
*/
|
|
if (enable)
|
|
__set_pages_p(page, numpages);
|
|
@@ -1090,53 +1181,8 @@ void kernel_map_pages(struct page *page,
|
|
* but that can deadlock->flush only current cpu:
|
|
*/
|
|
__flush_tlb_all();
|
|
-
|
|
- /*
|
|
- * Try to refill the page pool here. We can do this only after
|
|
- * the tlb flush.
|
|
- */
|
|
- cpa_fill_pool(NULL);
|
|
-}
|
|
-
|
|
-#ifdef CONFIG_DEBUG_FS
|
|
-static int dpa_show(struct seq_file *m, void *v)
|
|
-{
|
|
- seq_puts(m, "DEBUG_PAGEALLOC\n");
|
|
- seq_printf(m, "pool_size : %lu\n", pool_size);
|
|
- seq_printf(m, "pool_pages : %lu\n", pool_pages);
|
|
- seq_printf(m, "pool_low : %lu\n", pool_low);
|
|
- seq_printf(m, "pool_used : %lu\n", pool_used);
|
|
- seq_printf(m, "pool_failed : %lu\n", pool_failed);
|
|
-
|
|
- return 0;
|
|
}
|
|
|
|
-static int dpa_open(struct inode *inode, struct file *filp)
|
|
-{
|
|
- return single_open(filp, dpa_show, NULL);
|
|
-}
|
|
-
|
|
-static const struct file_operations dpa_fops = {
|
|
- .open = dpa_open,
|
|
- .read = seq_read,
|
|
- .llseek = seq_lseek,
|
|
- .release = single_release,
|
|
-};
|
|
-
|
|
-static int __init debug_pagealloc_proc_init(void)
|
|
-{
|
|
- struct dentry *de;
|
|
-
|
|
- de = debugfs_create_file("debug_pagealloc", 0600, NULL, NULL,
|
|
- &dpa_fops);
|
|
- if (!de)
|
|
- return -ENOMEM;
|
|
-
|
|
- return 0;
|
|
-}
|
|
-__initcall(debug_pagealloc_proc_init);
|
|
-#endif
|
|
-
|
|
#ifdef CONFIG_HIBERNATION
|
|
|
|
bool kernel_page_present(struct page *page)
|
|
--- head-2011-03-17.orig/arch/x86/mm/pat-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/mm/pat-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -7,24 +7,24 @@
|
|
* Loosely based on earlier PAT patchset from Eric Biederman and Andi Kleen.
|
|
*/
|
|
|
|
-#include <linux/mm.h>
|
|
+#include <linux/seq_file.h>
|
|
+#include <linux/bootmem.h>
|
|
+#include <linux/debugfs.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/gfp.h>
|
|
+#include <linux/mm.h>
|
|
#include <linux/fs.h>
|
|
-#include <linux/bootmem.h>
|
|
-#include <linux/debugfs.h>
|
|
-#include <linux/seq_file.h>
|
|
|
|
-#include <asm/msr.h>
|
|
-#include <asm/tlbflush.h>
|
|
+#include <asm/cacheflush.h>
|
|
#include <asm/processor.h>
|
|
-#include <asm/page.h>
|
|
+#include <asm/tlbflush.h>
|
|
#include <asm/pgtable.h>
|
|
-#include <asm/pat.h>
|
|
-#include <asm/e820.h>
|
|
-#include <asm/cacheflush.h>
|
|
#include <asm/fcntl.h>
|
|
+#include <asm/e820.h>
|
|
#include <asm/mtrr.h>
|
|
+#include <asm/page.h>
|
|
+#include <asm/msr.h>
|
|
+#include <asm/pat.h>
|
|
#include <asm/io.h>
|
|
|
|
#ifdef CONFIG_X86_PAT
|
|
@@ -46,6 +46,7 @@ early_param("nopat", nopat);
|
|
|
|
|
|
static int debug_enable;
|
|
+
|
|
static int __init pat_debug_setup(char *str)
|
|
{
|
|
debug_enable = 1;
|
|
@@ -157,14 +158,23 @@ static char *cattr_name(unsigned long fl
|
|
*/
|
|
|
|
struct memtype {
|
|
- u64 start;
|
|
- u64 end;
|
|
- unsigned long type;
|
|
- struct list_head nd;
|
|
+ u64 start;
|
|
+ u64 end;
|
|
+ unsigned long type;
|
|
+ struct list_head nd;
|
|
};
|
|
|
|
static LIST_HEAD(memtype_list);
|
|
-static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */
|
|
+static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */
|
|
+
|
|
+static inline u8 _mtrr_type_lookup(u64 start, u64 end)
|
|
+{
|
|
+ if (is_initial_xendomain())
|
|
+ return mtrr_type_lookup(start, end);
|
|
+ return pagerange_is_ram(start, end) > 0
|
|
+ ? MTRR_TYPE_WRCOMB : MTRR_TYPE_UNCACHABLE;
|
|
+}
|
|
+#define mtrr_type_lookup _mtrr_type_lookup
|
|
|
|
/*
|
|
* Does intersection of PAT memory type and MTRR memory type and returns
|
|
@@ -192,8 +202,8 @@ static unsigned long pat_x_mtrr_type(u64
|
|
return req_type;
|
|
}
|
|
|
|
-static int chk_conflict(struct memtype *new, struct memtype *entry,
|
|
- unsigned long *type)
|
|
+static int
|
|
+chk_conflict(struct memtype *new, struct memtype *entry, unsigned long *type)
|
|
{
|
|
if (new->type != entry->type) {
|
|
if (type) {
|
|
@@ -223,6 +233,72 @@ static struct memtype *cached_entry;
|
|
static u64 cached_start;
|
|
|
|
/*
|
|
+ * For RAM pages, mark the pages as non WB memory type using
|
|
+ * PageNonWB (PG_arch_1). We allow only one set_memory_uc() or
|
|
+ * set_memory_wc() on a RAM page at a time before marking it as WB again.
|
|
+ * This is ok, because only one driver will be owning the page and
|
|
+ * doing set_memory_*() calls.
|
|
+ *
|
|
+ * For now, we use PageNonWB to track that the RAM page is being mapped
|
|
+ * as non WB. In future, we will have to use one more flag
|
|
+ * (or some other mechanism in page_struct) to distinguish between
|
|
+ * UC and WC mapping.
|
|
+ */
|
|
+static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type,
|
|
+ unsigned long *new_type)
|
|
+{
|
|
+ struct page *page;
|
|
+ unsigned long mfn, end_mfn;
|
|
+
|
|
+ for (mfn = (start >> PAGE_SHIFT); mfn < (end >> PAGE_SHIFT); ++mfn) {
|
|
+ unsigned long pfn = mfn_to_local_pfn(mfn);
|
|
+
|
|
+ BUG_ON(!pfn_valid(pfn));
|
|
+ page = pfn_to_page(pfn);
|
|
+ if (page_mapped(page) || PageNonWB(page))
|
|
+ goto out;
|
|
+
|
|
+ SetPageNonWB(page);
|
|
+ }
|
|
+ return 0;
|
|
+
|
|
+out:
|
|
+ end_mfn = mfn;
|
|
+ for (mfn = (start >> PAGE_SHIFT); mfn < end_mfn; ++mfn) {
|
|
+ page = pfn_to_page(mfn_to_local_pfn(mfn));
|
|
+ ClearPageNonWB(page);
|
|
+ }
|
|
+
|
|
+ return -EINVAL;
|
|
+}
|
|
+
|
|
+static int free_ram_pages_type(u64 start, u64 end)
|
|
+{
|
|
+ struct page *page;
|
|
+ unsigned long mfn, end_mfn;
|
|
+
|
|
+ for (mfn = (start >> PAGE_SHIFT); mfn < (end >> PAGE_SHIFT); ++mfn) {
|
|
+ unsigned long pfn = mfn_to_local_pfn(mfn);
|
|
+
|
|
+ BUG_ON(!pfn_valid(pfn));
|
|
+ page = pfn_to_page(pfn);
|
|
+ if (page_mapped(page) || !PageNonWB(page))
|
|
+ goto out;
|
|
+
|
|
+ ClearPageNonWB(page);
|
|
+ }
|
|
+ return 0;
|
|
+
|
|
+out:
|
|
+ end_mfn = mfn;
|
|
+ for (mfn = (start >> PAGE_SHIFT); mfn < end_mfn; ++mfn) {
|
|
+ page = pfn_to_page(mfn_to_local_pfn(mfn));
|
|
+ SetPageNonWB(page);
|
|
+ }
|
|
+ return -EINVAL;
|
|
+}
|
|
+
|
|
+/*
|
|
* req_type typically has one of the:
|
|
* - _PAGE_CACHE_WB
|
|
* - _PAGE_CACHE_WC
|
|
@@ -238,14 +314,15 @@ static u64 cached_start;
|
|
* it will return a negative return value.
|
|
*/
|
|
int reserve_memtype(u64 start, u64 end, unsigned long req_type,
|
|
- unsigned long *new_type)
|
|
+ unsigned long *new_type)
|
|
{
|
|
struct memtype *new, *entry;
|
|
unsigned long actual_type;
|
|
struct list_head *where;
|
|
+ int is_range_ram;
|
|
int err = 0;
|
|
|
|
- BUG_ON(start >= end); /* end is exclusive */
|
|
+ BUG_ON(start >= end); /* end is exclusive */
|
|
|
|
if (!pat_enabled) {
|
|
/* This is identical to page table setting without PAT */
|
|
@@ -278,17 +355,24 @@ int reserve_memtype(u64 start, u64 end,
|
|
actual_type = _PAGE_CACHE_WB;
|
|
else
|
|
actual_type = _PAGE_CACHE_UC_MINUS;
|
|
- } else
|
|
+ } else {
|
|
actual_type = pat_x_mtrr_type(start, end,
|
|
req_type & _PAGE_CACHE_MASK);
|
|
+ }
|
|
+
|
|
+ is_range_ram = pagerange_is_ram(start, end);
|
|
+ if (is_range_ram == 1)
|
|
+ return reserve_ram_pages_type(start, end, req_type, new_type);
|
|
+ else if (is_range_ram < 0)
|
|
+ return -EINVAL;
|
|
|
|
new = kmalloc(sizeof(struct memtype), GFP_KERNEL);
|
|
if (!new)
|
|
return -ENOMEM;
|
|
|
|
- new->start = start;
|
|
- new->end = end;
|
|
- new->type = actual_type;
|
|
+ new->start = start;
|
|
+ new->end = end;
|
|
+ new->type = actual_type;
|
|
|
|
if (new_type)
|
|
*new_type = actual_type;
|
|
@@ -347,6 +431,7 @@ int reserve_memtype(u64 start, u64 end,
|
|
start, end, cattr_name(new->type), cattr_name(req_type));
|
|
kfree(new);
|
|
spin_unlock(&memtype_lock);
|
|
+
|
|
return err;
|
|
}
|
|
|
|
@@ -370,6 +455,7 @@ int free_memtype(u64 start, u64 end)
|
|
{
|
|
struct memtype *entry;
|
|
int err = -EINVAL;
|
|
+ int is_range_ram;
|
|
|
|
if (!pat_enabled)
|
|
return 0;
|
|
@@ -378,6 +464,12 @@ int free_memtype(u64 start, u64 end)
|
|
if (is_ISA_range(start, end - 1))
|
|
return 0;
|
|
|
|
+ is_range_ram = pagerange_is_ram(start, end);
|
|
+ if (is_range_ram == 1)
|
|
+ return free_ram_pages_type(start, end);
|
|
+ else if (is_range_ram < 0)
|
|
+ return -EINVAL;
|
|
+
|
|
spin_lock(&memtype_lock);
|
|
list_for_each_entry(entry, &memtype_list, nd) {
|
|
if (entry->start == start && entry->end == end) {
|
|
@@ -398,6 +490,7 @@ int free_memtype(u64 start, u64 end)
|
|
}
|
|
|
|
dprintk("free_memtype request 0x%Lx-0x%Lx\n", start, end);
|
|
+
|
|
return err;
|
|
}
|
|
|
|
@@ -415,12 +508,16 @@ static inline int range_is_allowed(unsig
|
|
return 1;
|
|
}
|
|
#else
|
|
+/* This check is needed to avoid cache aliasing when PAT is enabled */
|
|
static inline int range_is_allowed(unsigned long mfn, unsigned long size)
|
|
{
|
|
u64 from = ((u64)mfn) << PAGE_SHIFT;
|
|
u64 to = from + size;
|
|
u64 cursor = from;
|
|
|
|
+ if (!pat_enabled)
|
|
+ return 1;
|
|
+
|
|
while (cursor < to) {
|
|
if (!devmem_is_allowed(mfn)) {
|
|
printk(KERN_INFO
|
|
@@ -504,9 +601,9 @@ int phys_mem_access_prot_allowed(struct
|
|
|
|
void map_devmem(unsigned long mfn, unsigned long size, pgprot_t vma_prot)
|
|
{
|
|
+ unsigned long want_flags = (pgprot_val(vma_prot) & _PAGE_CACHE_MASK);
|
|
u64 addr = (u64)mfn << PAGE_SHIFT;
|
|
unsigned long flags;
|
|
- unsigned long want_flags = (pgprot_val(vma_prot) & _PAGE_CACHE_MASK);
|
|
|
|
reserve_memtype(addr, addr + size, want_flags, &flags);
|
|
if (flags != want_flags) {
|
|
@@ -526,7 +623,7 @@ void unmap_devmem(unsigned long mfn, uns
|
|
free_memtype(addr, addr + size);
|
|
}
|
|
|
|
-#if defined(CONFIG_DEBUG_FS)
|
|
+#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT)
|
|
|
|
/* get Nth element of the linked list */
|
|
static struct memtype *memtype_get_idx(loff_t pos)
|
|
@@ -549,6 +646,7 @@ static struct memtype *memtype_get_idx(l
|
|
}
|
|
spin_unlock(&memtype_lock);
|
|
kfree(print_entry);
|
|
+
|
|
return NULL;
|
|
}
|
|
|
|
@@ -579,6 +677,7 @@ static int memtype_seq_show(struct seq_f
|
|
seq_printf(seq, "%s @ 0x%Lx-0x%Lx\n", cattr_name(print_entry->type),
|
|
print_entry->start, print_entry->end);
|
|
kfree(print_entry);
|
|
+
|
|
return 0;
|
|
}
|
|
|
|
@@ -610,4 +709,4 @@ static int __init pat_memtype_list_init(
|
|
|
|
late_initcall(pat_memtype_list_init);
|
|
|
|
-#endif /* CONFIG_DEBUG_FS */
|
|
+#endif /* CONFIG_DEBUG_FS && CONFIG_X86_PAT */
|
|
--- head-2011-03-17.orig/arch/x86/mm/pgtable-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/mm/pgtable-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -129,7 +129,7 @@ void __pud_free_tlb(struct mmu_gather *t
|
|
static void _pin_lock(struct mm_struct *mm, int lock) {
|
|
if (lock)
|
|
spin_lock(&mm->page_table_lock);
|
|
-#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
|
|
+#if USE_SPLIT_PTLOCKS
|
|
/* While mm->page_table_lock protects us against insertions and
|
|
* removals of higher level page table pages, it doesn't protect
|
|
* against updates of pte-s. Such updates, however, require the
|
|
@@ -408,10 +408,8 @@ static inline void pgd_list_del(pgd_t *p
|
|
#define UNSHARED_PTRS_PER_PGD \
|
|
(SHARED_KERNEL_PMD ? KERNEL_PGD_BOUNDARY : PTRS_PER_PGD)
|
|
|
|
-static void pgd_ctor(void *p)
|
|
+static void pgd_ctor(pgd_t *pgd)
|
|
{
|
|
- pgd_t *pgd = p;
|
|
-
|
|
pgd_test_and_unpin(pgd);
|
|
|
|
/* If the pgd points to a shared pagetable level (either the
|
|
@@ -440,7 +438,7 @@ static void pgd_ctor(void *p)
|
|
pgd_list_add(pgd);
|
|
}
|
|
|
|
-static void pgd_dtor(void *pgd)
|
|
+static void pgd_dtor(pgd_t *pgd)
|
|
{
|
|
unsigned long flags; /* can be called from interrupt context */
|
|
|
|
--- head-2011-03-17.orig/arch/x86/mm/pgtable_32-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/mm/pgtable_32-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -122,7 +122,6 @@ void __init reserve_top_address(unsigned
|
|
printk(KERN_INFO "Reserving virtual address space above 0x%08x\n",
|
|
(int)-reserve);
|
|
__FIXADDR_TOP = -reserve - PAGE_SIZE;
|
|
- __VMALLOC_RESERVE += reserve;
|
|
}
|
|
|
|
/*
|
|
@@ -135,7 +134,8 @@ static int __init parse_vmalloc(char *ar
|
|
if (!arg)
|
|
return -EINVAL;
|
|
|
|
- __VMALLOC_RESERVE = memparse(arg, &arg);
|
|
+ /* Add VMALLOC_OFFSET to the parsed value due to vm area guard hole*/
|
|
+ __VMALLOC_RESERVE = memparse(arg, &arg) + VMALLOC_OFFSET;
|
|
return 0;
|
|
}
|
|
early_param("vmalloc", parse_vmalloc);
|
|
--- head-2011-03-17.orig/arch/x86/pci/irq-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/pci/irq-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -499,7 +499,7 @@ static int pirq_amd756_get(struct pci_de
|
|
if (pirq <= 4)
|
|
irq = read_config_nybble(router, 0x56, pirq - 1);
|
|
dev_info(&dev->dev,
|
|
- "AMD756: dev [%04x/%04x], router PIRQ %d get IRQ %d\n",
|
|
+ "AMD756: dev [%04x:%04x], router PIRQ %d get IRQ %d\n",
|
|
dev->vendor, dev->device, pirq, irq);
|
|
return irq;
|
|
}
|
|
@@ -507,7 +507,7 @@ static int pirq_amd756_get(struct pci_de
|
|
static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
|
|
{
|
|
dev_info(&dev->dev,
|
|
- "AMD756: dev [%04x/%04x], router PIRQ %d set IRQ %d\n",
|
|
+ "AMD756: dev [%04x:%04x], router PIRQ %d set IRQ %d\n",
|
|
dev->vendor, dev->device, pirq, irq);
|
|
if (pirq <= 4)
|
|
write_config_nybble(router, 0x56, pirq - 1, irq);
|
|
@@ -596,13 +596,20 @@ static __init int intel_router_probe(str
|
|
case PCI_DEVICE_ID_INTEL_ICH10_1:
|
|
case PCI_DEVICE_ID_INTEL_ICH10_2:
|
|
case PCI_DEVICE_ID_INTEL_ICH10_3:
|
|
- case PCI_DEVICE_ID_INTEL_PCH_0:
|
|
- case PCI_DEVICE_ID_INTEL_PCH_1:
|
|
r->name = "PIIX/ICH";
|
|
r->get = pirq_piix_get;
|
|
r->set = pirq_piix_set;
|
|
return 1;
|
|
}
|
|
+
|
|
+ if ((device >= PCI_DEVICE_ID_INTEL_PCH_LPC_MIN) &&
|
|
+ (device <= PCI_DEVICE_ID_INTEL_PCH_LPC_MAX)) {
|
|
+ r->name = "PIIX/ICH";
|
|
+ r->get = pirq_piix_get;
|
|
+ r->set = pirq_piix_set;
|
|
+ return 1;
|
|
+ }
|
|
+
|
|
return 0;
|
|
}
|
|
|
|
@@ -829,7 +836,7 @@ static void __init pirq_find_router(stru
|
|
r->get = NULL;
|
|
r->set = NULL;
|
|
|
|
- DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for %04x:%04x\n",
|
|
+ DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for [%04x:%04x]\n",
|
|
rt->rtr_vendor, rt->rtr_device);
|
|
|
|
pirq_router_dev = pci_get_bus_and_slot(rt->rtr_bus, rt->rtr_devfn);
|
|
@@ -849,7 +856,7 @@ static void __init pirq_find_router(stru
|
|
h->probe(r, pirq_router_dev, pirq_router_dev->device))
|
|
break;
|
|
}
|
|
- dev_info(&pirq_router_dev->dev, "%s IRQ router [%04x/%04x]\n",
|
|
+ dev_info(&pirq_router_dev->dev, "%s IRQ router [%04x:%04x]\n",
|
|
pirq_router.name,
|
|
pirq_router_dev->vendor, pirq_router_dev->device);
|
|
|
|
@@ -1049,35 +1056,44 @@ static void __init pcibios_fixup_irqs(vo
|
|
if (io_apic_assign_pci_irqs) {
|
|
int irq;
|
|
|
|
- if (pin) {
|
|
- /*
|
|
- * interrupt pins are numbered starting
|
|
- * from 1
|
|
- */
|
|
- pin--;
|
|
- irq = IO_APIC_get_PCI_irq_vector(dev->bus->number,
|
|
- PCI_SLOT(dev->devfn), pin);
|
|
- /*
|
|
- * Busses behind bridges are typically not listed in the MP-table.
|
|
- * In this case we have to look up the IRQ based on the parent bus,
|
|
- * parent slot, and pin number. The SMP code detects such bridged
|
|
- * busses itself so we should get into this branch reliably.
|
|
- */
|
|
- if (irq < 0 && dev->bus->parent) { /* go back to the bridge */
|
|
- struct pci_dev *bridge = dev->bus->self;
|
|
+ if (!pin)
|
|
+ continue;
|
|
+
|
|
+ /*
|
|
+ * interrupt pins are numbered starting from 1
|
|
+ */
|
|
+ pin--;
|
|
+ irq = IO_APIC_get_PCI_irq_vector(dev->bus->number,
|
|
+ PCI_SLOT(dev->devfn), pin);
|
|
+ /*
|
|
+ * Busses behind bridges are typically not listed in the
|
|
+ * MP-table. In this case we have to look up the IRQ
|
|
+ * based on the parent bus, parent slot, and pin number.
|
|
+ * The SMP code detects such bridged busses itself so we
|
|
+ * should get into this branch reliably.
|
|
+ */
|
|
+ if (irq < 0 && dev->bus->parent) {
|
|
+ /* go back to the bridge */
|
|
+ struct pci_dev *bridge = dev->bus->self;
|
|
+ int bus;
|
|
|
|
- pin = (pin + PCI_SLOT(dev->devfn)) % 4;
|
|
- irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number,
|
|
- PCI_SLOT(bridge->devfn), pin);
|
|
- if (irq >= 0)
|
|
- dev_warn(&dev->dev, "using bridge %s INT %c to get IRQ %d\n",
|
|
- pci_name(bridge),
|
|
- 'A' + pin, irq);
|
|
- }
|
|
- if (irq >= 0) {
|
|
- dev_info(&dev->dev, "PCI->APIC IRQ transform: INT %c -> IRQ %d\n", 'A' + pin, irq);
|
|
- dev->irq = irq;
|
|
- }
|
|
+ pin = (pin + PCI_SLOT(dev->devfn)) % 4;
|
|
+ bus = bridge->bus->number;
|
|
+ irq = IO_APIC_get_PCI_irq_vector(bus,
|
|
+ PCI_SLOT(bridge->devfn), pin);
|
|
+ if (irq >= 0)
|
|
+ dev_warn(&dev->dev,
|
|
+ "using bridge %s INT %c to "
|
|
+ "get IRQ %d\n",
|
|
+ pci_name(bridge),
|
|
+ 'A' + pin, irq);
|
|
+ }
|
|
+ if (irq >= 0) {
|
|
+ dev_info(&dev->dev,
|
|
+ "PCI->APIC IRQ transform: INT %c "
|
|
+ "-> IRQ %d\n",
|
|
+ 'A' + pin, irq);
|
|
+ dev->irq = irq;
|
|
}
|
|
}
|
|
#endif
|
|
--- head-2011-03-17.orig/arch/x86/xen/Kconfig 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/xen/Kconfig 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -43,7 +43,7 @@ config XEN_SAVE_RESTORE
|
|
|
|
config XEN_DEBUG_FS
|
|
bool "Enable Xen debug and tuning parameters in debugfs"
|
|
- depends on XEN && DEBUG_FS
|
|
+ depends on PARAVIRT_XEN && DEBUG_FS
|
|
default n
|
|
help
|
|
Enable statistics output and various tuning options in debugfs.
|
|
--- head-2011-03-17.orig/drivers/acpi/acpica/hwsleep.c 2011-01-31 17:01:49.000000000 +0100
|
|
+++ head-2011-03-17/drivers/acpi/acpica/hwsleep.c 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -396,8 +396,7 @@ acpi_status asmlinkage acpi_enter_sleep_
|
|
err = acpi_notify_hypervisor_state(sleep_state,
|
|
PM1Acontrol, PM1Bcontrol);
|
|
if (err) {
|
|
- ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
|
|
- "Hypervisor failure [%d]\n", err));
|
|
+ printk(KERN_ERR "ACPI: Hypervisor failure [%d]\n", err);
|
|
return_ACPI_STATUS(AE_ERROR);
|
|
}
|
|
#endif
|
|
--- head-2011-03-17.orig/drivers/acpi/processor_core.c 2011-01-31 17:56:27.000000000 +0100
|
|
+++ head-2011-03-17/drivers/acpi/processor_core.c 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -165,13 +165,20 @@ exit:
|
|
|
|
int acpi_get_cpuid(acpi_handle handle, int type, u32 acpi_id)
|
|
{
|
|
- int i;
|
|
+ int i = 0;
|
|
int apic_id = -1;
|
|
|
|
+ if (type < 0) {
|
|
+ if (!processor_cntl_external())
|
|
+ return -1;
|
|
+ type = ~type;
|
|
+ i = 1;
|
|
+ }
|
|
+
|
|
apic_id = map_mat_entry(handle, type, acpi_id);
|
|
if (apic_id == -1)
|
|
apic_id = map_madt_entry(type, acpi_id);
|
|
- if (apic_id == -1)
|
|
+ if (apic_id == -1 || i)
|
|
return apic_id;
|
|
|
|
#ifndef CONFIG_PROCESSOR_EXTERNAL_CONTROL
|
|
--- head-2011-03-17.orig/drivers/acpi/processor_driver.c 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/drivers/acpi/processor_driver.c 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -326,7 +326,8 @@ static int acpi_processor_get_info(struc
|
|
if (pr->id == -1) {
|
|
if (ACPI_FAILURE
|
|
(acpi_processor_hotadd_init(pr->handle, &pr->id)) &&
|
|
- !processor_cntl_external()) {
|
|
+ get_cpu_id(pr->handle, ~device_declaration,
|
|
+ pr->acpi_id) < 0) {
|
|
return -ENODEV;
|
|
}
|
|
}
|
|
--- head-2011-03-17.orig/drivers/acpi/processor_extcntl.c 2011-01-31 17:56:27.000000000 +0100
|
|
+++ head-2011-03-17/drivers/acpi/processor_extcntl.c 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -30,7 +30,6 @@
|
|
|
|
#include <acpi/processor.h>
|
|
|
|
-#define ACPI_PROCESSOR_COMPONENT 0x01000000
|
|
#define ACPI_PROCESSOR_CLASS "processor"
|
|
#define _COMPONENT ACPI_PROCESSOR_COMPONENT
|
|
ACPI_MODULE_NAME("processor_extcntl")
|
|
--- head-2011-03-17.orig/drivers/char/agp/generic.c 2011-03-17 14:35:46.000000000 +0100
|
|
+++ head-2011-03-17/drivers/char/agp/generic.c 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -1217,7 +1217,7 @@ int agp_generic_alloc_pages(struct agp_b
|
|
}
|
|
|
|
#ifdef CONFIG_X86
|
|
- set_pages_array_uc(mem->pages, num_pages);
|
|
+ map_pages_into_agp(mem->pages, num_pages);
|
|
#endif
|
|
ret = 0;
|
|
out:
|
|
@@ -1250,7 +1250,7 @@ void agp_generic_destroy_pages(struct ag
|
|
return;
|
|
|
|
#ifdef CONFIG_X86
|
|
- set_pages_array_wb(mem->pages, mem->page_count);
|
|
+ unmap_pages_from_agp(mem->pages, mem->page_count);
|
|
#endif
|
|
|
|
for (i = 0; i < mem->page_count; i++) {
|
|
--- head-2011-03-17.orig/drivers/firmware/dmi_scan.c 2011-03-17 14:35:46.000000000 +0100
|
|
+++ head-2011-03-17/drivers/firmware/dmi_scan.c 2011-02-17 10:11:37.000000000 +0100
|
|
@@ -482,6 +482,11 @@ static bool dmi_matches(const struct dmi
|
|
{
|
|
int i;
|
|
|
|
+#ifdef CONFIG_XEN
|
|
+ if (!is_initial_xendomain())
|
|
+ return false;
|
|
+#endif
|
|
+
|
|
WARN(!dmi_initialized, KERN_ERR "dmi check: not initialized yet.\n");
|
|
|
|
for (i = 0; i < ARRAY_SIZE(dmi->matches); i++) {
|
|
--- head-2011-03-17.orig/drivers/idle/Kconfig 2011-03-17 14:35:46.000000000 +0100
|
|
+++ head-2011-03-17/drivers/idle/Kconfig 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -10,7 +10,7 @@ config INTEL_IDLE
|
|
processors intel_idle does not support.
|
|
|
|
menu "Memory power savings"
|
|
-depends on X86_64
|
|
+depends on X86_64 && !XEN
|
|
|
|
config I7300_IDLE_IOAT_CHANNEL
|
|
bool
|
|
--- head-2011-03-17.orig/drivers/pci/msi-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/drivers/pci/msi-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -266,8 +266,16 @@ static int msi_map_vector(struct pci_dev
|
|
* dev->irq in dom0 will be 'Xen pirq' if this device belongs to
|
|
* to another domain, and will be 'Linux irq' if it belongs to dom0.
|
|
*/
|
|
- return ((domid != DOMID_SELF) ?
|
|
- map_irq.pirq : evtchn_map_pirq(-1, map_irq.pirq));
|
|
+ if (domid == DOMID_SELF) {
|
|
+ rc = evtchn_map_pirq(-1, map_irq.pirq);
|
|
+ dev_printk(KERN_DEBUG, &dev->dev,
|
|
+ "irq %d (%d) for MSI/MSI-X\n",
|
|
+ rc, map_irq.pirq);
|
|
+ return rc;
|
|
+ }
|
|
+ dev_printk(KERN_DEBUG, &dev->dev, "irq %d for dom%d MSI/MSI-X\n",
|
|
+ map_irq.pirq, domid);
|
|
+ return map_irq.pirq;
|
|
}
|
|
|
|
static void pci_intx_for_msi(struct pci_dev *dev, int enable)
|
|
@@ -722,3 +730,24 @@ void pci_msi_init_pci_dev(struct pci_dev
|
|
INIT_LIST_HEAD(&dev->msi_list);
|
|
#endif
|
|
}
|
|
+
|
|
+#ifdef CONFIG_ACPI
|
|
+#include <linux/acpi.h>
|
|
+#include <linux/pci-acpi.h>
|
|
+static void __devinit msi_acpi_init(void)
|
|
+{
|
|
+ if (acpi_pci_disabled)
|
|
+ return;
|
|
+ pci_osc_support_set(OSC_MSI_SUPPORT);
|
|
+ pcie_osc_support_set(OSC_MSI_SUPPORT);
|
|
+}
|
|
+#else
|
|
+static inline void msi_acpi_init(void) { }
|
|
+#endif /* CONFIG_ACPI */
|
|
+
|
|
+void __devinit msi_init(void)
|
|
+{
|
|
+ if (!pci_msi_enable)
|
|
+ return;
|
|
+ msi_acpi_init();
|
|
+}
|
|
--- head-2011-03-17.orig/drivers/pci/probe.c 2011-03-17 14:35:46.000000000 +0100
|
|
+++ head-2011-03-17/drivers/pci/probe.c 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -1214,6 +1214,11 @@ static void pci_init_capabilities(struct
|
|
/* Vital Product Data */
|
|
pci_vpd_pci22_init(dev);
|
|
|
|
+#ifdef CONFIG_XEN
|
|
+ if (!is_initial_xendomain())
|
|
+ return;
|
|
+#endif
|
|
+
|
|
/* Alternative Routing-ID Forwarding */
|
|
pci_enable_ari(dev);
|
|
|
|
--- head-2011-03-17.orig/drivers/xen/Kconfig 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-17/drivers/xen/Kconfig 2011-02-02 15:36:33.000000000 +0100
|
|
@@ -344,9 +344,6 @@ config XEN_SMPBOOT
|
|
def_bool y
|
|
depends on SMP && !PPC_XEN
|
|
|
|
-config XEN_XENCOMM
|
|
- bool
|
|
-
|
|
config XEN_DEVMEM
|
|
def_bool y
|
|
|
|
@@ -452,4 +449,7 @@ config SWIOTLB_XEN
|
|
depends on PCI
|
|
select SWIOTLB
|
|
|
|
+config XEN_XENCOMM
|
|
+ bool
|
|
+
|
|
endmenu
|
|
--- head-2011-03-17.orig/drivers/xen/Makefile 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/drivers/xen/Makefile 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -1,4 +1,5 @@
|
|
obj-$(CONFIG_PARAVIRT_XEN) += grant-table.o features.o events.o manage.o
|
|
+xen-hotplug-$(CONFIG_PARAVIRT_XEN) := cpu_hotplug.o
|
|
xen-balloon-$(CONFIG_PARAVIRT_XEN) := balloon.o
|
|
|
|
xen-balloon-$(CONFIG_XEN) := balloon/
|
|
@@ -11,6 +12,7 @@ obj-$(CONFIG_XEN) += char/
|
|
xen-backend-$(CONFIG_XEN_BACKEND) := util.o
|
|
|
|
obj-$(CONFIG_XEN) += features.o $(xen-backend-y) $(xen-backend-m)
|
|
+obj-$(CONFIG_HOTPLUG_CPU) += $(xen-hotplug-y)
|
|
obj-$(CONFIG_XEN_XENCOMM) += xencomm.o
|
|
obj-$(CONFIG_XEN_BALLOON) += $(xen-balloon-y)
|
|
obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/
|
|
--- head-2011-03-17.orig/drivers/xen/blkback/vbd.c 2011-01-31 17:56:27.000000000 +0100
|
|
+++ head-2011-03-17/drivers/xen/blkback/vbd.c 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -95,7 +95,8 @@ int vbd_create(blkif_t *blkif, blkif_vde
|
|
void vbd_free(struct vbd *vbd)
|
|
{
|
|
if (vbd->bdev)
|
|
- blkdev_put(vbd->bdev);
|
|
+ blkdev_put(vbd->bdev,
|
|
+ vbd->readonly ? FMODE_READ : FMODE_WRITE);
|
|
vbd->bdev = NULL;
|
|
}
|
|
|
|
--- head-2011-03-17.orig/drivers/xen/blkfront/blkfront.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-17/drivers/xen/blkfront/blkfront.c 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -342,6 +342,7 @@ static void connect(struct blkfront_info
|
|
return;
|
|
pr_info("Setting capacity to %Lu\n", sectors);
|
|
set_capacity(info->gd, sectors);
|
|
+ revalidate_disk(info->gd);
|
|
|
|
/* fall through */
|
|
case BLKIF_STATE_SUSPENDED:
|
|
@@ -500,9 +501,15 @@ static void blkif_restart_queue_callback
|
|
schedule_work(&info->work);
|
|
}
|
|
|
|
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
|
|
int blkif_open(struct inode *inode, struct file *filep)
|
|
{
|
|
- struct blkfront_info *info = inode->i_bdev->bd_disk->private_data;
|
|
+ struct block_device *bd = inode->i_bdev;
|
|
+#else
|
|
+int blkif_open(struct block_device *bd, fmode_t mode)
|
|
+{
|
|
+#endif
|
|
+ struct blkfront_info *info = bd->bd_disk->private_data;
|
|
|
|
if (!info->xbdev)
|
|
return -ENODEV;
|
|
@@ -511,9 +518,16 @@ int blkif_open(struct inode *inode, stru
|
|
}
|
|
|
|
|
|
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
|
|
int blkif_release(struct inode *inode, struct file *filep)
|
|
{
|
|
- struct blkfront_info *info = inode->i_bdev->bd_disk->private_data;
|
|
+ struct gendisk *disk = inode->i_bdev->bd_disk;
|
|
+#else
|
|
+int blkif_release(struct gendisk *disk, fmode_t mode)
|
|
+{
|
|
+#endif
|
|
+ struct blkfront_info *info = disk->private_data;
|
|
+
|
|
info->users--;
|
|
if (info->users == 0) {
|
|
/* Check whether we have been instructed to close. We will
|
|
@@ -532,10 +546,17 @@ int blkif_release(struct inode *inode, s
|
|
}
|
|
|
|
|
|
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
|
|
int blkif_ioctl(struct inode *inode, struct file *filep,
|
|
unsigned command, unsigned long argument)
|
|
{
|
|
- struct blkfront_info *info = inode->i_bdev->bd_disk->private_data;
|
|
+ struct block_device *bd = inode->i_bdev;
|
|
+#else
|
|
+int blkif_ioctl(struct block_device *bd, fmode_t mode,
|
|
+ unsigned command, unsigned long argument)
|
|
+{
|
|
+#endif
|
|
+ struct blkfront_info *info = bd->bd_disk->private_data;
|
|
int i;
|
|
|
|
DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
|
|
@@ -544,7 +565,6 @@ int blkif_ioctl(struct inode *inode, str
|
|
switch (command) {
|
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16)
|
|
case HDIO_GETGEO: {
|
|
- struct block_device *bd = inode->i_bdev;
|
|
struct hd_geometry geo;
|
|
int ret;
|
|
|
|
@@ -586,10 +606,14 @@ int blkif_ioctl(struct inode *inode, str
|
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
|
|
return scsi_cmd_ioctl(filep, info->gd, command,
|
|
(void __user *)argument);
|
|
-#else
|
|
+#elif LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
|
|
return scsi_cmd_ioctl(filep, info->rq,
|
|
info->gd, command,
|
|
(void __user *)argument);
|
|
+#else
|
|
+ return scsi_cmd_ioctl(info->rq, info->gd,
|
|
+ mode, command,
|
|
+ (void __user *)argument);
|
|
#endif
|
|
}
|
|
}
|
|
--- head-2011-03-17.orig/drivers/xen/blkfront/block.h 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-17/drivers/xen/blkfront/block.h 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -123,10 +123,17 @@ struct blkfront_info
|
|
|
|
extern spinlock_t blkif_io_lock;
|
|
|
|
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
|
|
extern int blkif_open(struct inode *inode, struct file *filep);
|
|
extern int blkif_release(struct inode *inode, struct file *filep);
|
|
extern int blkif_ioctl(struct inode *inode, struct file *filep,
|
|
unsigned command, unsigned long argument);
|
|
+#else
|
|
+extern int blkif_open(struct block_device *bdev, fmode_t mode);
|
|
+extern int blkif_release(struct gendisk *disk, fmode_t mode);
|
|
+extern int blkif_ioctl(struct block_device *bdev, fmode_t mode,
|
|
+ unsigned command, unsigned long argument);
|
|
+#endif
|
|
extern int blkif_getgeo(struct block_device *, struct hd_geometry *);
|
|
extern int blkif_check(dev_t dev);
|
|
extern int blkif_revalidate(dev_t dev);
|
|
--- head-2011-03-17.orig/drivers/xen/blktap2/device.c 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/drivers/xen/blktap2/device.c 2011-02-07 14:14:26.000000000 +0100
|
|
@@ -36,10 +36,10 @@ dev_to_blktap(struct blktap_device *dev)
|
|
}
|
|
|
|
static int
|
|
-blktap_device_open(struct inode *inode, struct file *filep)
|
|
+blktap_device_open(struct block_device *bd, fmode_t mode)
|
|
{
|
|
struct blktap *tap;
|
|
- struct blktap_device *dev = inode->i_bdev->bd_disk->private_data;
|
|
+ struct blktap_device *dev = bd->bd_disk->private_data;
|
|
|
|
if (!dev)
|
|
return -ENOENT;
|
|
@@ -55,9 +55,9 @@ blktap_device_open(struct inode *inode,
|
|
}
|
|
|
|
static int
|
|
-blktap_device_release(struct inode *inode, struct file *filep)
|
|
+blktap_device_release(struct gendisk *disk, fmode_t mode)
|
|
{
|
|
- struct blktap_device *dev = inode->i_bdev->bd_disk->private_data;
|
|
+ struct blktap_device *dev = disk->private_data;
|
|
struct blktap *tap = dev_to_blktap(dev);
|
|
|
|
dev->users--;
|
|
@@ -85,18 +85,17 @@ blktap_device_getgeo(struct block_device
|
|
}
|
|
|
|
static int
|
|
-blktap_device_ioctl(struct inode *inode, struct file *filep,
|
|
+blktap_device_ioctl(struct block_device *bd, fmode_t mode,
|
|
unsigned command, unsigned long argument)
|
|
{
|
|
int i;
|
|
|
|
- DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
|
|
- command, (long)argument, inode->i_rdev);
|
|
+ DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx\n",
|
|
+ command, (long)argument);
|
|
|
|
switch (command) {
|
|
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16)
|
|
case HDIO_GETGEO: {
|
|
- struct block_device *bd = inode->i_bdev;
|
|
struct hd_geometry geo;
|
|
int ret;
|
|
|
|
@@ -762,7 +761,7 @@ blktap_device_close_bdev(struct blktap *
|
|
dev = &tap->device;
|
|
|
|
if (dev->bdev)
|
|
- blkdev_put(dev->bdev);
|
|
+ blkdev_put(dev->bdev, FMODE_WRITE);
|
|
|
|
dev->bdev = NULL;
|
|
clear_bit(BLKTAP_PASSTHROUGH, &tap->dev_inuse);
|
|
@@ -786,7 +785,7 @@ blktap_device_open_bdev(struct blktap *t
|
|
if (!bdev->bd_disk) {
|
|
BTERR("device %x:%x doesn't exist\n",
|
|
MAJOR(pdev), MINOR(pdev));
|
|
- blkdev_put(dev->bdev);
|
|
+ blkdev_put(bdev, FMODE_WRITE);
|
|
return -ENOENT;
|
|
}
|
|
|
|
--- head-2011-03-17.orig/drivers/xen/core/evtchn.c 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/drivers/xen/core/evtchn.c 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -149,7 +149,7 @@ static void bind_evtchn_to_cpu(unsigned
|
|
BUG_ON(!test_bit(chn, s->evtchn_mask));
|
|
|
|
if (irq != -1)
|
|
- irq_desc[irq].affinity = cpumask_of_cpu(cpu);
|
|
+ irq_to_desc(irq)->affinity = cpumask_of_cpu(cpu);
|
|
|
|
clear_bit(chn, (unsigned long *)cpu_evtchn_mask[cpu_evtchn[chn]]);
|
|
set_bit(chn, (unsigned long *)cpu_evtchn_mask[cpu]);
|
|
@@ -162,7 +162,7 @@ static void init_evtchn_cpu_bindings(voi
|
|
|
|
/* By default all event channels notify CPU#0. */
|
|
for (i = 0; i < NR_IRQS; i++)
|
|
- irq_desc[i].affinity = cpumask_of_cpu(0);
|
|
+ irq_to_desc(i)->affinity = cpumask_of_cpu(0);
|
|
|
|
memset(cpu_evtchn, 0, sizeof(cpu_evtchn));
|
|
for_each_possible_cpu(i)
|
|
@@ -747,7 +747,7 @@ static void ack_dynirq(unsigned int irq)
|
|
|
|
static void end_dynirq(unsigned int irq)
|
|
{
|
|
- if (!(irq_desc[irq].status & IRQ_DISABLED)) {
|
|
+ if (!(irq_to_desc(irq)->status & IRQ_DISABLED)) {
|
|
move_masked_irq(irq);
|
|
unmask_dynirq(irq);
|
|
}
|
|
@@ -841,7 +841,7 @@ static void enable_pirq(unsigned int irq
|
|
bind_pirq.pirq = evtchn_get_xen_pirq(irq);
|
|
/* NB. We are happy to share unless we are probing. */
|
|
bind_pirq.flags = test_and_clear_bit(irq - PIRQ_BASE, probing_pirq)
|
|
- || (irq_desc[irq].status & IRQ_AUTODETECT)
|
|
+ || (irq_to_desc(irq)->status & IRQ_AUTODETECT)
|
|
? 0 : BIND_PIRQ__WILL_SHARE;
|
|
if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq) != 0) {
|
|
if (bind_pirq.flags)
|
|
@@ -900,11 +900,13 @@ static void unmask_pirq(unsigned int irq
|
|
|
|
static void end_pirq(unsigned int irq)
|
|
{
|
|
- if ((irq_desc[irq].status & (IRQ_DISABLED|IRQ_PENDING)) ==
|
|
+ const struct irq_desc *desc = irq_to_desc(irq);
|
|
+
|
|
+ if ((desc->status & (IRQ_DISABLED|IRQ_PENDING)) ==
|
|
(IRQ_DISABLED|IRQ_PENDING))
|
|
shutdown_pirq(irq);
|
|
else {
|
|
- if (!(irq_desc[irq].status & IRQ_DISABLED))
|
|
+ if (!(desc->status & IRQ_DISABLED))
|
|
move_masked_irq(irq);
|
|
unmask_pirq(irq);
|
|
}
|
|
@@ -1051,7 +1053,7 @@ static void restore_cpu_ipis(unsigned in
|
|
bind_evtchn_to_cpu(evtchn, cpu);
|
|
|
|
/* Ready for use. */
|
|
- if (!(irq_desc[irq].status & IRQ_DISABLED))
|
|
+ if (!(irq_to_desc(irq)->status & IRQ_DISABLED))
|
|
unmask_evtchn(evtchn);
|
|
}
|
|
}
|
|
@@ -1187,7 +1189,7 @@ void __init xen_init_IRQ(void)
|
|
for (i = DYNIRQ_BASE; i < (DYNIRQ_BASE + NR_DYNIRQS); i++) {
|
|
irq_bindcount[i] = 0;
|
|
|
|
- irq_desc[i].status |= IRQ_NOPROBE;
|
|
+ irq_to_desc(i)->status |= IRQ_NOPROBE;
|
|
set_irq_chip_and_handler_name(i, &dynirq_chip,
|
|
handle_fasteoi_irq, "fasteoi");
|
|
}
|
|
--- head-2011-03-17.orig/drivers/xen/core/smpboot.c 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/drivers/xen/core/smpboot.c 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -25,10 +25,6 @@
|
|
#include <xen/cpu_hotplug.h>
|
|
#include <xen/xenbus.h>
|
|
|
|
-extern irqreturn_t smp_reschedule_interrupt(int, void *);
|
|
-extern irqreturn_t smp_call_function_interrupt(int, void *);
|
|
-extern irqreturn_t smp_call_function_single_interrupt(int, void *);
|
|
-
|
|
extern int local_setup_timer(unsigned int cpu);
|
|
extern void local_teardown_timer(unsigned int cpu);
|
|
|
|
@@ -147,7 +143,7 @@ static void __cpuinit xen_smp_intr_exit(
|
|
}
|
|
#endif
|
|
|
|
-void __cpuinit cpu_bringup(void)
|
|
+static void __cpuinit cpu_bringup(void)
|
|
{
|
|
cpu_init();
|
|
identify_secondary_cpu(¤t_cpu_data);
|
|
@@ -381,6 +377,20 @@ int __cpuinit __cpu_up(unsigned int cpu)
|
|
return 0;
|
|
}
|
|
|
|
+void __ref play_dead(void)
|
|
+{
|
|
+ idle_task_exit();
|
|
+ local_irq_disable();
|
|
+ cpu_clear(smp_processor_id(), cpu_initialized);
|
|
+ preempt_enable_no_resched();
|
|
+ VOID(HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL));
|
|
+#ifdef CONFIG_HOTPLUG_CPU
|
|
+ cpu_bringup();
|
|
+#else
|
|
+ BUG();
|
|
+#endif
|
|
+}
|
|
+
|
|
void __init smp_cpus_done(unsigned int max_cpus)
|
|
{
|
|
}
|
|
--- head-2011-03-17.orig/drivers/xen/netfront/netfront.c 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/drivers/xen/netfront/netfront.c 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -952,7 +952,7 @@ static int network_start_xmit(struct sk_
|
|
return 0;
|
|
}
|
|
|
|
- frags += (offset + len + PAGE_SIZE - 1) / PAGE_SIZE;
|
|
+ frags += DIV_ROUND_UP(offset + len, PAGE_SIZE);
|
|
if (unlikely(frags > MAX_SKB_FRAGS + 1)) {
|
|
pr_alert("xennet: skb rides the rocket: %d frags\n", frags);
|
|
dump_stack();
|
|
--- head-2011-03-17.orig/drivers/xen/scsifront/scsifront.c 2011-01-31 18:01:51.000000000 +0100
|
|
+++ head-2011-03-17/drivers/xen/scsifront/scsifront.c 2011-02-08 10:04:41.000000000 +0100
|
|
@@ -352,7 +352,7 @@ static int scsifront_queuecommand(struct
|
|
memset(ring_req->cmnd, 0, VSCSIIF_MAX_COMMAND_SIZE);
|
|
|
|
ring_req->sc_data_direction = (uint8_t)sc->sc_data_direction;
|
|
- ring_req->timeout_per_command = (sc->timeout_per_command / HZ);
|
|
+ ring_req->timeout_per_command = (sc->request->timeout / HZ);
|
|
|
|
info->shadow[rqid].req_scsi_cmnd = (unsigned long)sc;
|
|
info->shadow[rqid].sc_data_direction = sc->sc_data_direction;
|
|
@@ -421,7 +421,7 @@ static int scsifront_dev_reset_handler(s
|
|
memset(ring_req->cmnd, 0, VSCSIIF_MAX_COMMAND_SIZE);
|
|
|
|
ring_req->sc_data_direction = (uint8_t)sc->sc_data_direction;
|
|
- ring_req->timeout_per_command = (sc->timeout_per_command / HZ);
|
|
+ ring_req->timeout_per_command = (sc->request->timeout / HZ);
|
|
ring_req->nr_segments = 0;
|
|
|
|
scsifront_do_request(info);
|
|
--- head-2011-03-17.orig/drivers/xen/xenbus/xenbus_probe.h 2011-01-31 17:49:31.000000000 +0100
|
|
+++ head-2011-03-17/drivers/xen/xenbus/xenbus_probe.h 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -40,6 +40,11 @@
|
|
#define XEN_BUS_ID_SIZE BUS_ID_SIZE
|
|
#endif
|
|
|
|
+#ifdef CONFIG_PARAVIRT_XEN
|
|
+#define is_running_on_xen() xen_domain()
|
|
+#define is_initial_xendomain() xen_initial_domain()
|
|
+#endif
|
|
+
|
|
#if defined(CONFIG_XEN_BACKEND) || defined(CONFIG_XEN_BACKEND_MODULE)
|
|
extern void xenbus_backend_suspend(int (*fn)(struct device *, void *));
|
|
extern void xenbus_backend_resume(int (*fn)(struct device *, void *));
|
|
--- head-2011-03-17.orig/include/xen/cpu_hotplug.h 2011-01-31 17:56:27.000000000 +0100
|
|
+++ head-2011-03-17/include/xen/cpu_hotplug.h 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -15,8 +15,6 @@ void init_xenbus_allowed_cpumask(void);
|
|
int smp_suspend(void);
|
|
void smp_resume(void);
|
|
|
|
-void cpu_bringup(void);
|
|
-
|
|
#else /* !defined(CONFIG_HOTPLUG_CPU) */
|
|
|
|
#define cpu_up_check(cpu) (0)
|
|
--- head-2011-03-17.orig/lib/swiotlb-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
+++ head-2011-03-17/lib/swiotlb-xen.c 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -57,7 +57,6 @@ enum dma_sync_target {
|
|
|
|
int swiotlb_force;
|
|
|
|
-static char *iotlb_virt_start;
|
|
static unsigned long iotlb_nslabs;
|
|
|
|
/*
|
|
@@ -65,16 +64,7 @@ static unsigned long iotlb_nslabs;
|
|
* swiotlb_sync_single_*, to see if the memory was in fact allocated by this
|
|
* API.
|
|
*/
|
|
-static unsigned long iotlb_pfn_start, iotlb_pfn_end;
|
|
-
|
|
-/* Does the given dma address reside within the swiotlb aperture? */
|
|
-static inline int in_swiotlb_aperture(dma_addr_t dev_addr)
|
|
-{
|
|
- unsigned long pfn = mfn_to_local_pfn(dev_addr >> PAGE_SHIFT);
|
|
- return (pfn_valid(pfn)
|
|
- && (pfn >= iotlb_pfn_start)
|
|
- && (pfn < iotlb_pfn_end));
|
|
-}
|
|
+static char *io_tlb_start, *io_tlb_end;
|
|
|
|
/*
|
|
* When the IOMMU overflows we return a fallback buffer. This sets the size.
|
|
@@ -159,15 +149,15 @@ swiotlb_init_with_default_size(size_t de
|
|
/*
|
|
* Get IO TLB memory from the low pages
|
|
*/
|
|
- iotlb_virt_start = alloc_bootmem_pages(bytes);
|
|
- if (!iotlb_virt_start)
|
|
+ io_tlb_start = alloc_bootmem_pages(bytes);
|
|
+ if (!io_tlb_start)
|
|
panic("Cannot allocate SWIOTLB buffer!\n");
|
|
|
|
dma_bits = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT) + PAGE_SHIFT;
|
|
for (i = 0; i < iotlb_nslabs; i += IO_TLB_SEGSIZE) {
|
|
do {
|
|
rc = xen_create_contiguous_region(
|
|
- (unsigned long)iotlb_virt_start + (i << IO_TLB_SHIFT),
|
|
+ (unsigned long)io_tlb_start + (i << IO_TLB_SHIFT),
|
|
get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT),
|
|
dma_bits);
|
|
} while (rc && dma_bits++ < max_dma_bits);
|
|
@@ -178,10 +168,10 @@ swiotlb_init_with_default_size(size_t de
|
|
"some DMA memory (e.g., dom0_mem=-128M).\n");
|
|
iotlb_nslabs = i;
|
|
i <<= IO_TLB_SHIFT;
|
|
- free_bootmem(__pa(iotlb_virt_start + i), bytes - i);
|
|
+ free_bootmem(__pa(io_tlb_start + i), bytes - i);
|
|
bytes = i;
|
|
for (dma_bits = 0; i > 0; i -= IO_TLB_SEGSIZE << IO_TLB_SHIFT) {
|
|
- unsigned int bits = fls64(virt_to_bus(iotlb_virt_start + i - 1));
|
|
+ unsigned int bits = fls64(virt_to_bus(io_tlb_start + i - 1));
|
|
|
|
if (bits > dma_bits)
|
|
dma_bits = bits;
|
|
@@ -189,6 +179,7 @@ swiotlb_init_with_default_size(size_t de
|
|
break;
|
|
}
|
|
}
|
|
+ io_tlb_end = io_tlb_start + bytes;
|
|
|
|
/*
|
|
* Allocate and initialize the free list array. This array is used
|
|
@@ -217,15 +208,12 @@ swiotlb_init_with_default_size(size_t de
|
|
if (rc)
|
|
panic("No suitable physical memory available for SWIOTLB overflow buffer!\n");
|
|
|
|
- iotlb_pfn_start = __pa(iotlb_virt_start) >> PAGE_SHIFT;
|
|
- iotlb_pfn_end = iotlb_pfn_start + (bytes >> PAGE_SHIFT);
|
|
-
|
|
printk(KERN_INFO "Software IO TLB enabled: \n"
|
|
" Aperture: %lu megabytes\n"
|
|
" Kernel range: %p - %p\n"
|
|
" Address size: %u bits\n",
|
|
bytes >> 20,
|
|
- iotlb_virt_start, iotlb_virt_start + bytes,
|
|
+ io_tlb_start, io_tlb_end,
|
|
dma_bits);
|
|
}
|
|
|
|
@@ -253,6 +241,18 @@ swiotlb_init(void)
|
|
printk(KERN_INFO "Software IO TLB disabled\n");
|
|
}
|
|
|
|
+static int is_swiotlb_buffer(dma_addr_t addr)
|
|
+{
|
|
+ unsigned long pfn = mfn_to_local_pfn(PFN_DOWN(addr));
|
|
+ char *va = pfn_valid(pfn) ? __va(pfn << PAGE_SHIFT) : NULL;
|
|
+
|
|
+#ifdef CONFIG_HIGHMEM
|
|
+ if (pfn >= highstart_pfn)
|
|
+ return 0;
|
|
+#endif
|
|
+ return va >= io_tlb_start && va < io_tlb_end;
|
|
+}
|
|
+
|
|
/*
|
|
* We use __copy_to_user_inatomic to transfer to the host buffer because the
|
|
* buffer may be mapped read-only (e.g, in blkback driver) but lower-level
|
|
@@ -362,7 +362,7 @@ map_single(struct device *hwdev, struct
|
|
io_tlb_list[i] = 0;
|
|
for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--)
|
|
io_tlb_list[i] = ++count;
|
|
- dma_addr = iotlb_virt_start + (index << IO_TLB_SHIFT);
|
|
+ dma_addr = io_tlb_start + (index << IO_TLB_SHIFT);
|
|
|
|
/*
|
|
* Update the indices to avoid searching in the next
|
|
@@ -404,7 +404,7 @@ found:
|
|
|
|
static struct phys_addr dma_addr_to_phys_addr(char *dma_addr)
|
|
{
|
|
- int index = (dma_addr - iotlb_virt_start) >> IO_TLB_SHIFT;
|
|
+ int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
|
|
struct phys_addr buffer = io_tlb_orig_addr[index];
|
|
buffer.offset += (long)dma_addr & ((1 << IO_TLB_SHIFT) - 1);
|
|
buffer.page += buffer.offset >> PAGE_SHIFT;
|
|
@@ -420,7 +420,7 @@ unmap_single(struct device *hwdev, char
|
|
{
|
|
unsigned long flags;
|
|
int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
|
|
- int index = (dma_addr - iotlb_virt_start) >> IO_TLB_SHIFT;
|
|
+ int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
|
|
struct phys_addr buffer = dma_addr_to_phys_addr(dma_addr);
|
|
|
|
/*
|
|
@@ -527,7 +527,7 @@ _swiotlb_map_single(struct device *hwdev
|
|
* buffering it.
|
|
*/
|
|
if (!range_straddles_page_boundary(paddr, size) &&
|
|
- !address_needs_mapping(hwdev, dev_addr))
|
|
+ !address_needs_mapping(hwdev, dev_addr, size))
|
|
return dev_addr;
|
|
|
|
/*
|
|
@@ -578,9 +578,11 @@ void
|
|
swiotlb_unmap_single_attrs(struct device *hwdev, dma_addr_t dev_addr,
|
|
size_t size, int dir, struct dma_attrs *attrs)
|
|
{
|
|
+ char *dma_addr = bus_to_virt(dev_addr);
|
|
+
|
|
BUG_ON(dir == DMA_NONE);
|
|
- if (in_swiotlb_aperture(dev_addr))
|
|
- unmap_single(hwdev, bus_to_virt(dev_addr), size, dir);
|
|
+ if (is_swiotlb_buffer(dev_addr))
|
|
+ unmap_single(hwdev, dma_addr, size, dir);
|
|
else
|
|
gnttab_dma_unmap_page(dev_addr);
|
|
}
|
|
@@ -606,9 +608,11 @@ static void
|
|
swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
|
|
size_t size, int dir, int target)
|
|
{
|
|
+ char *dma_addr = bus_to_virt(dev_addr);
|
|
+
|
|
BUG_ON(dir == DMA_NONE);
|
|
- if (in_swiotlb_aperture(dev_addr))
|
|
- sync_single(hwdev, bus_to_virt(dev_addr), size, dir, target);
|
|
+ if (is_swiotlb_buffer(dev_addr))
|
|
+ sync_single(hwdev, dma_addr, size, dir, target);
|
|
}
|
|
|
|
void
|
|
@@ -633,10 +637,11 @@ swiotlb_sync_single_range(struct device
|
|
unsigned long offset, size_t size,
|
|
int dir, int target)
|
|
{
|
|
+ char *dma_addr = bus_to_virt(dev_addr);
|
|
+
|
|
BUG_ON(dir == DMA_NONE);
|
|
- if (in_swiotlb_aperture(dev_addr))
|
|
- sync_single(hwdev, bus_to_virt(dev_addr + offset), size,
|
|
- dir, target);
|
|
+ if (is_swiotlb_buffer(dev_addr))
|
|
+ sync_single(hwdev, dma_addr + offset, size, dir, target);
|
|
}
|
|
|
|
void
|
|
@@ -690,7 +695,7 @@ swiotlb_map_sg_attrs(struct device *hwde
|
|
|
|
if (range_straddles_page_boundary(page_to_pseudophys(sg_page(sg))
|
|
+ sg->offset, sg->length)
|
|
- || address_needs_mapping(hwdev, dev_addr)) {
|
|
+ || address_needs_mapping(hwdev, dev_addr, sg->length)) {
|
|
gnttab_dma_unmap_page(dev_addr);
|
|
buffer.page = sg_page(sg);
|
|
buffer.offset = sg->offset;
|
|
@@ -734,7 +739,7 @@ swiotlb_unmap_sg_attrs(struct device *hw
|
|
BUG_ON(dir == DMA_NONE);
|
|
|
|
for_each_sg(sgl, sg, nelems, i) {
|
|
- if (in_swiotlb_aperture(sg->dma_address))
|
|
+ if (sg->dma_address != sg_phys(sg))
|
|
unmap_single(hwdev, bus_to_virt(sg->dma_address),
|
|
sg->dma_length, dir);
|
|
else
|
|
@@ -767,7 +772,7 @@ swiotlb_sync_sg(struct device *hwdev, st
|
|
BUG_ON(dir == DMA_NONE);
|
|
|
|
for_each_sg(sgl, sg, nelems, i) {
|
|
- if (in_swiotlb_aperture(sg->dma_address))
|
|
+ if (sg->dma_address != sg_phys(sg))
|
|
sync_single(hwdev, bus_to_virt(sg->dma_address),
|
|
sg->dma_length, dir, target);
|
|
}
|
|
--- head-2011-03-17.orig/mm/vmalloc.c 2011-01-31 17:32:29.000000000 +0100
|
|
+++ head-2011-03-17/mm/vmalloc.c 2011-02-01 14:39:24.000000000 +0100
|
|
@@ -478,6 +478,8 @@ static void vmap_debug_free_range(unsign
|
|
#ifdef CONFIG_DEBUG_PAGEALLOC
|
|
vunmap_page_range(start, end);
|
|
flush_tlb_kernel_range(start, end);
|
|
+#elif defined(CONFIG_XEN) && defined(CONFIG_X86)
|
|
+ vunmap_page_range(start, end);
|
|
#endif
|
|
}
|
|
|