25808 lines
731 KiB
Plaintext
25808 lines
731 KiB
Plaintext
From: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>
|
|
Subject: [PATCH] Linux: Update to 2.6.27
|
|
Patch-mainline: 2.6.27
|
|
|
|
This patch contains the differences between Linux 2.6.26 and 2.6.27.
|
|
|
|
Acked-by: Jeff Mahoney <jeffm@suse.com>
|
|
Automatically created from "patches.kernel.org/patch-2.6.27" by xen-port-patches.py
|
|
|
|
Removed adjustments NO_HZ -> NO_HZ || NO_IDLE_HZ from kernel/{hr,}timer.c,
|
|
as they would get removed again by xen-clockevents (and really shouldn't
|
|
have been needed - see SLE11 SPn).
|
|
|
|
--- head-2011-03-11.orig/arch/x86/Kconfig 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/Kconfig 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -723,7 +723,7 @@ config AMD_IOMMU
|
|
bool "AMD IOMMU support"
|
|
select SWIOTLB
|
|
select PCI_MSI
|
|
- depends on X86_64 && PCI && ACPI
|
|
+ depends on X86_64 && PCI && ACPI && !XEN
|
|
---help---
|
|
With this option you can enable support for AMD IOMMU hardware in
|
|
your system. An IOMMU is a hardware component which provides
|
|
@@ -1465,7 +1465,7 @@ config MTRR
|
|
config MTRR_SANITIZER
|
|
def_bool y
|
|
prompt "MTRR cleanup support"
|
|
- depends on MTRR
|
|
+ depends on MTRR && !XEN
|
|
---help---
|
|
Convert MTRR layout from continuous to discrete, so X drivers can
|
|
add writeback entries.
|
|
--- head-2011-03-11.orig/arch/x86/Kconfig.debug 2011-01-31 18:01:51.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/Kconfig.debug 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -25,6 +25,7 @@ config STRICT_DEVMEM
|
|
config X86_VERBOSE_BOOTUP
|
|
bool "Enable verbose x86 bootup info messages"
|
|
default y
|
|
+ depends on !XEN
|
|
---help---
|
|
Enables the informational output from the decompression stage
|
|
(e.g. bzImage) of the boot. If you disable this you will still
|
|
@@ -179,6 +180,7 @@ config IOMMU_LEAK
|
|
|
|
config HAVE_MMIOTRACE_SUPPORT
|
|
def_bool y
|
|
+ depends on !XEN
|
|
|
|
config X86_DECODER_SELFTEST
|
|
bool "x86 instruction decoder selftest"
|
|
--- head-2011-03-11.orig/arch/x86/Makefile 2011-02-01 14:11:04.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/Makefile 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -117,8 +117,8 @@ endif
|
|
KBUILD_CFLAGS += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,)
|
|
|
|
# Xen subarch support
|
|
-mflags-$(CONFIG_X86_XEN) := -Iinclude/asm-x86/mach-xen
|
|
-mcore-$(CONFIG_X86_XEN) := arch/x86/mach-xen/
|
|
+mflags-$(CONFIG_XEN) := -Iinclude/asm-x86/mach-xen
|
|
+mcore-$(CONFIG_XEN) := arch/x86/mach-xen/
|
|
|
|
KBUILD_CFLAGS += $(mflags-y)
|
|
KBUILD_AFLAGS += $(mflags-y)
|
|
--- head-2011-03-11.orig/arch/x86/ia32/ia32entry-xen.S 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/ia32/ia32entry-xen.S 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -15,6 +15,16 @@
|
|
#include <asm/irqflags.h>
|
|
#include <linux/linkage.h>
|
|
|
|
+/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
|
|
+#include <linux/elf-em.h>
|
|
+#define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE)
|
|
+#define __AUDIT_ARCH_LE 0x40000000
|
|
+
|
|
+#ifndef CONFIG_AUDITSYSCALL
|
|
+#define sysexit_audit int_ret_from_sys_call
|
|
+#define sysretl_audit int_ret_from_sys_call
|
|
+#endif
|
|
+
|
|
#define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8)
|
|
|
|
.macro IA32_ARG_FIXUP noebp=0
|
|
@@ -37,6 +47,11 @@
|
|
movq %rax,R8(%rsp)
|
|
.endm
|
|
|
|
+ /*
|
|
+ * Reload arg registers from stack in case ptrace changed them.
|
|
+ * We don't reload %eax because syscall_trace_enter() returned
|
|
+ * the value it wants us to use in the table lookup.
|
|
+ */
|
|
.macro LOAD_ARGS32 offset
|
|
movl \offset(%rsp),%r11d
|
|
movl \offset+8(%rsp),%r10d
|
|
@@ -46,7 +61,6 @@
|
|
movl \offset+48(%rsp),%edx
|
|
movl \offset+56(%rsp),%esi
|
|
movl \offset+64(%rsp),%edi
|
|
- movl \offset+72(%rsp),%eax
|
|
.endm
|
|
|
|
.macro CFI_STARTPROC32 simple
|
|
@@ -61,6 +75,19 @@
|
|
CFI_UNDEFINED r15
|
|
.endm
|
|
|
|
+#ifdef CONFIG_PARAVIRT
|
|
+ENTRY(native_usergs_sysret32)
|
|
+ swapgs
|
|
+ sysretl
|
|
+ENDPROC(native_usergs_sysret32)
|
|
+
|
|
+ENTRY(native_irq_enable_sysexit)
|
|
+ swapgs
|
|
+ sti
|
|
+ sysexit
|
|
+ENDPROC(native_irq_enable_sysexit)
|
|
+#endif
|
|
+
|
|
/*
|
|
* 32bit SYSENTER instruction entry.
|
|
*
|
|
@@ -98,7 +125,7 @@ ENTRY(ia32_sysenter_target)
|
|
CFI_RESTORE rcx
|
|
movl %ebp,%ebp /* zero extension */
|
|
movl %eax,%eax
|
|
- movl 48-THREAD_SIZE+threadinfo_sysenter_return(%rsp),%r10d
|
|
+ movl 48-THREAD_SIZE+TI_sysenter_return(%rsp),%r10d
|
|
movl $__USER32_DS,40(%rsp)
|
|
movq %rbp,32(%rsp)
|
|
movl $__USER32_CS,16(%rsp)
|
|
@@ -113,19 +140,75 @@ ENTRY(ia32_sysenter_target)
|
|
.quad 1b,ia32_badarg
|
|
.previous
|
|
GET_THREAD_INFO(%r10)
|
|
- orl $TS_COMPAT,threadinfo_status(%r10)
|
|
- testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10)
|
|
+ orl $TS_COMPAT,TI_status(%r10)
|
|
+ testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
|
|
jnz sysenter_tracesys
|
|
-sysenter_do_call:
|
|
cmpl $(IA32_NR_syscalls-1),%eax
|
|
ja ia32_badsys
|
|
+sysenter_do_call:
|
|
IA32_ARG_FIXUP 1
|
|
+sysenter_dispatch:
|
|
call *ia32_sys_call_table(,%rax,8)
|
|
movq %rax,RAX-ARGOFFSET(%rsp)
|
|
+ GET_THREAD_INFO(%r10)
|
|
+ DISABLE_INTERRUPTS(CLBR_NONE)
|
|
+ TRACE_IRQS_OFF
|
|
+ testl $_TIF_ALLWORK_MASK,TI_flags(%r10)
|
|
+ jnz sysexit_audit
|
|
jmp int_ret_from_sys_call
|
|
|
|
+#ifdef CONFIG_AUDITSYSCALL
|
|
+ .macro auditsys_entry_common
|
|
+ movl %esi,%r9d /* 6th arg: 4th syscall arg */
|
|
+ movl %edx,%r8d /* 5th arg: 3rd syscall arg */
|
|
+ /* (already in %ecx) 4th arg: 2nd syscall arg */
|
|
+ movl %ebx,%edx /* 3rd arg: 1st syscall arg */
|
|
+ movl %eax,%esi /* 2nd arg: syscall number */
|
|
+ movl $AUDIT_ARCH_I386,%edi /* 1st arg: audit arch */
|
|
+ call audit_syscall_entry
|
|
+ movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */
|
|
+ cmpl $(IA32_NR_syscalls-1),%eax
|
|
+ ja ia32_badsys
|
|
+ movl %ebx,%edi /* reload 1st syscall arg */
|
|
+ movl RCX-ARGOFFSET(%rsp),%esi /* reload 2nd syscall arg */
|
|
+ movl RDX-ARGOFFSET(%rsp),%edx /* reload 3rd syscall arg */
|
|
+ movl RSI-ARGOFFSET(%rsp),%ecx /* reload 4th syscall arg */
|
|
+ movl RDI-ARGOFFSET(%rsp),%r8d /* reload 5th syscall arg */
|
|
+ .endm
|
|
+
|
|
+ .macro auditsys_exit exit,ebpsave=RBP
|
|
+ testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10)
|
|
+ jnz int_ret_from_sys_call
|
|
+ TRACE_IRQS_ON
|
|
+ ENABLE_INTERRUPTS(CLBR_NONE)
|
|
+ movl %eax,%esi /* second arg, syscall return value */
|
|
+ cmpl $0,%eax /* is it < 0? */
|
|
+ setl %al /* 1 if so, 0 if not */
|
|
+ movzbl %al,%edi /* zero-extend that into %edi */
|
|
+ inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
|
|
+ call audit_syscall_exit
|
|
+ movl \ebpsave-ARGOFFSET(%rsp),%ebp /* reload user register value */
|
|
+ movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
|
|
+ DISABLE_INTERRUPTS(CLBR_NONE)
|
|
+ TRACE_IRQS_OFF
|
|
+ jmp int_with_check
|
|
+ .endm
|
|
+
|
|
+sysenter_auditsys:
|
|
+ auditsys_entry_common
|
|
+ movl %ebp,%r9d /* reload 6th syscall arg */
|
|
+ jmp sysenter_dispatch
|
|
+
|
|
+sysexit_audit:
|
|
+ auditsys_exit sysexit_from_sys_call
|
|
+#endif
|
|
+
|
|
sysenter_tracesys:
|
|
xchgl %r9d,%ebp
|
|
+#ifdef CONFIG_AUDITSYSCALL
|
|
+ testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10)
|
|
+ jz sysenter_auditsys
|
|
+#endif
|
|
SAVE_REST
|
|
CLEAR_RREGS
|
|
movq %r9,R9(%rsp)
|
|
@@ -186,18 +269,38 @@ ENTRY(ia32_cstar_target)
|
|
.quad 1b,ia32_badarg
|
|
.previous
|
|
GET_THREAD_INFO(%r10)
|
|
- orl $TS_COMPAT,threadinfo_status(%r10)
|
|
- testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10)
|
|
+ orl $TS_COMPAT,TI_status(%r10)
|
|
+ testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
|
|
jnz cstar_tracesys
|
|
cstar_do_call:
|
|
cmpl $IA32_NR_syscalls-1,%eax
|
|
ja ia32_badsys
|
|
IA32_ARG_FIXUP 1
|
|
+cstar_dispatch:
|
|
call *ia32_sys_call_table(,%rax,8)
|
|
movq %rax,RAX-ARGOFFSET(%rsp)
|
|
+ GET_THREAD_INFO(%r10)
|
|
+ DISABLE_INTERRUPTS(CLBR_NONE)
|
|
+ testl $_TIF_ALLWORK_MASK,TI_flags(%r10)
|
|
+ jnz sysretl_audit
|
|
jmp int_ret_from_sys_call
|
|
|
|
-cstar_tracesys:
|
|
+#ifdef CONFIG_AUDITSYSCALL
|
|
+cstar_auditsys:
|
|
+ movl %r9d,R9-ARGOFFSET(%rsp) /* register to be clobbered by call */
|
|
+ auditsys_entry_common
|
|
+ movl R9-ARGOFFSET(%rsp),%r9d /* reload 6th syscall arg */
|
|
+ jmp cstar_dispatch
|
|
+
|
|
+sysretl_audit:
|
|
+ auditsys_exit sysretl_from_sys_call, RCX /* user %ebp in RCX slot */
|
|
+#endif
|
|
+
|
|
+cstar_tracesys:
|
|
+#ifdef CONFIG_AUDITSYSCALL
|
|
+ testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10)
|
|
+ jz cstar_auditsys
|
|
+#endif
|
|
xchgl %r9d,%ebp
|
|
SAVE_REST
|
|
CLEAR_RREGS
|
|
@@ -263,8 +366,8 @@ ENTRY(ia32_syscall)
|
|
this could be a problem. */
|
|
SAVE_ARGS 0,0,1
|
|
GET_THREAD_INFO(%r10)
|
|
- orl $TS_COMPAT,threadinfo_status(%r10)
|
|
- testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10)
|
|
+ orl $TS_COMPAT,TI_status(%r10)
|
|
+ testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
|
|
jnz ia32_tracesys
|
|
ia32_do_syscall:
|
|
cmpl $(IA32_NR_syscalls-1),%eax
|
|
@@ -309,13 +412,11 @@ quiet_ni_syscall:
|
|
PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn, %rdi
|
|
PTREGSCALL stub32_sigreturn, sys32_sigreturn, %rdi
|
|
PTREGSCALL stub32_sigaltstack, sys32_sigaltstack, %rdx
|
|
- PTREGSCALL stub32_sigsuspend, sys32_sigsuspend, %rcx
|
|
PTREGSCALL stub32_execve, sys32_execve, %rcx
|
|
PTREGSCALL stub32_fork, sys_fork, %rdi
|
|
PTREGSCALL stub32_clone, sys32_clone, %rdx
|
|
PTREGSCALL stub32_vfork, sys_vfork, %rdi
|
|
PTREGSCALL stub32_iopl, sys_iopl, %rsi
|
|
- PTREGSCALL stub32_rt_sigsuspend, sys_rt_sigsuspend, %rdx
|
|
|
|
ENTRY(ia32_ptregs_common)
|
|
popq %r11
|
|
@@ -415,7 +516,7 @@ ia32_sys_call_table:
|
|
.quad sys_ssetmask
|
|
.quad sys_setreuid16 /* 70 */
|
|
.quad sys_setregid16
|
|
- .quad stub32_sigsuspend
|
|
+ .quad sys32_sigsuspend
|
|
.quad compat_sys_sigpending
|
|
.quad sys_sethostname
|
|
.quad compat_sys_setrlimit /* 75 */
|
|
@@ -522,7 +623,7 @@ ia32_sys_call_table:
|
|
.quad sys32_rt_sigpending
|
|
.quad compat_sys_rt_sigtimedwait
|
|
.quad sys32_rt_sigqueueinfo
|
|
- .quad stub32_rt_sigsuspend
|
|
+ .quad sys_rt_sigsuspend
|
|
.quad sys32_pread /* 180 */
|
|
.quad sys32_pwrite
|
|
.quad sys_chown16
|
|
@@ -670,4 +771,10 @@ ia32_sys_call_table:
|
|
.quad sys32_fallocate
|
|
.quad compat_sys_timerfd_settime /* 325 */
|
|
.quad compat_sys_timerfd_gettime
|
|
+ .quad compat_sys_signalfd4
|
|
+ .quad sys_eventfd2
|
|
+ .quad sys_epoll_create1
|
|
+ .quad sys_dup3 /* 330 */
|
|
+ .quad sys_pipe2
|
|
+ .quad sys_inotify_init1
|
|
ia32_syscall_end:
|
|
--- head-2011-03-11.orig/arch/x86/kernel/Makefile 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/Makefile 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -125,9 +125,11 @@ ifeq ($(CONFIG_X86_64),y)
|
|
obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o
|
|
obj-y += vsmp_64.o
|
|
|
|
- obj-$(CONFIG_XEN) += nmi_64.o
|
|
+ obj-$(CONFIG_XEN) += nmi.o
|
|
time_64-$(CONFIG_XEN) += time_32.o
|
|
endif
|
|
|
|
-disabled-obj-$(CONFIG_XEN) := crash.o early-quirks.o hpet.o i8253.o i8259_$(BITS).o \
|
|
- pci-swiotlb_64.o reboot.o smpboot.o tlb_$(BITS).o tsc_$(BITS).o tsc_sync.o vsmp_64.o
|
|
+disabled-obj-$(CONFIG_XEN) := %_uv.o crash.o early-quirks.o hpet.o i8253.o \
|
|
+ i8259.o irqinit_$(BITS).o pci-swiotlb_64.o reboot.o smpboot.o \
|
|
+ tlb_$(BITS).o tsc.o tsc_sync.o vsmp_64.o
|
|
+disabled-obj-$(CONFIG_XEN_UNPRIVILEGED_GUEST) += probe_roms_32.o
|
|
--- head-2011-03-11.orig/arch/x86/kernel/acpi/boot.c 2011-03-11 10:59:02.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/acpi/boot.c 2011-03-11 10:59:30.000000000 +0100
|
|
@@ -1349,6 +1349,7 @@ static int __init dmi_disable_acpi(const
|
|
return 0;
|
|
}
|
|
|
|
+#ifndef CONFIG_XEN
|
|
/*
|
|
* Force ignoring BIOS IRQ0 pin2 override
|
|
*/
|
|
@@ -1366,6 +1367,7 @@ static int __init dmi_ignore_irq0_timer_
|
|
}
|
|
return 0;
|
|
}
|
|
+#endif
|
|
|
|
static int __init force_acpi_rsdt(const struct dmi_system_id *d)
|
|
{
|
|
@@ -1486,6 +1488,7 @@ static struct dmi_system_id __initdata a
|
|
{}
|
|
};
|
|
|
|
+#ifndef CONFIG_XEN
|
|
/* second table for DMI checks that should run after early-quirks */
|
|
static struct dmi_system_id __initdata acpi_dmi_table_late[] = {
|
|
/*
|
|
@@ -1532,6 +1535,7 @@ static struct dmi_system_id __initdata a
|
|
},
|
|
{}
|
|
};
|
|
+#endif
|
|
|
|
/*
|
|
* acpi_boot_table_init() and acpi_boot_init()
|
|
@@ -1604,8 +1608,10 @@ int __init early_acpi_boot_init(void)
|
|
|
|
int __init acpi_boot_init(void)
|
|
{
|
|
+#ifndef CONFIG_XEN
|
|
/* those are executed after early-quirks are executed */
|
|
dmi_check_system(acpi_dmi_table_late);
|
|
+#endif
|
|
|
|
/*
|
|
* If acpi_disabled, bail out
|
|
--- head-2011-03-11.orig/arch/x86/kernel/acpi/sleep-xen.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/acpi/sleep-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -9,6 +9,7 @@
|
|
#include <linux/bootmem.h>
|
|
#include <linux/dmi.h>
|
|
#include <linux/cpumask.h>
|
|
+#include <asm/segment.h>
|
|
|
|
#include "realmode/wakeup.h"
|
|
#include "sleep.h"
|
|
@@ -20,7 +21,7 @@ unsigned long acpi_realmode_flags;
|
|
/* address in low memory of the wakeup routine. */
|
|
static unsigned long acpi_realmode;
|
|
|
|
-#ifdef CONFIG_64BIT
|
|
+#if defined(CONFIG_SMP) && defined(CONFIG_64BIT)
|
|
static char temp_stack[10240];
|
|
#endif
|
|
#endif
|
|
@@ -54,18 +55,27 @@ int acpi_save_state_mem(void)
|
|
header->video_mode = saved_video_mode;
|
|
|
|
header->wakeup_jmp_seg = acpi_wakeup_address >> 4;
|
|
+
|
|
+ /*
|
|
+ * Set up the wakeup GDT. We set these up as Big Real Mode,
|
|
+ * that is, with limits set to 4 GB. At least the Lenovo
|
|
+ * Thinkpad X61 is known to need this for the video BIOS
|
|
+ * initialization quirk to work; this is likely to also
|
|
+ * be the case for other laptops or integrated video devices.
|
|
+ */
|
|
+
|
|
/* GDT[0]: GDT self-pointer */
|
|
header->wakeup_gdt[0] =
|
|
(u64)(sizeof(header->wakeup_gdt) - 1) +
|
|
((u64)(acpi_wakeup_address +
|
|
((char *)&header->wakeup_gdt - (char *)acpi_realmode))
|
|
<< 16);
|
|
- /* GDT[1]: real-mode-like code segment */
|
|
- header->wakeup_gdt[1] = (0x009bULL << 40) +
|
|
- ((u64)acpi_wakeup_address << 16) + 0xffff;
|
|
- /* GDT[2]: real-mode-like data segment */
|
|
- header->wakeup_gdt[2] = (0x0093ULL << 40) +
|
|
- ((u64)acpi_wakeup_address << 16) + 0xffff;
|
|
+ /* GDT[1]: big real mode-like code segment */
|
|
+ header->wakeup_gdt[1] =
|
|
+ GDT_ENTRY(0x809b, acpi_wakeup_address, 0xfffff);
|
|
+ /* GDT[2]: big real mode-like data segment */
|
|
+ header->wakeup_gdt[2] =
|
|
+ GDT_ENTRY(0x8093, acpi_wakeup_address, 0xfffff);
|
|
|
|
#ifndef CONFIG_64BIT
|
|
store_gdt((struct desc_ptr *)&header->pmode_gdt);
|
|
@@ -79,7 +89,7 @@ int acpi_save_state_mem(void)
|
|
#endif /* !CONFIG_64BIT */
|
|
|
|
header->pmode_cr0 = read_cr0();
|
|
- header->pmode_cr4 = read_cr4();
|
|
+ header->pmode_cr4 = read_cr4_safe();
|
|
header->realmode_flags = acpi_realmode_flags;
|
|
header->real_magic = 0x12345678;
|
|
|
|
@@ -89,7 +99,9 @@ int acpi_save_state_mem(void)
|
|
saved_magic = 0x12345678;
|
|
#else /* CONFIG_64BIT */
|
|
header->trampoline_segment = setup_trampoline() >> 4;
|
|
- init_rsp = (unsigned long)temp_stack + 4096;
|
|
+#ifdef CONFIG_SMP
|
|
+ stack_start.sp = temp_stack + 4096;
|
|
+#endif
|
|
initial_code = (unsigned long)wakeup_long64;
|
|
saved_magic = 0x123456789abcdef0;
|
|
#endif /* CONFIG_64BIT */
|
|
@@ -145,6 +157,12 @@ static int __init acpi_sleep_setup(char
|
|
acpi_realmode_flags |= 2;
|
|
if (strncmp(str, "s3_beep", 7) == 0)
|
|
acpi_realmode_flags |= 4;
|
|
+#ifdef CONFIG_HIBERNATION
|
|
+ if (strncmp(str, "s4_nohwsig", 10) == 0)
|
|
+ acpi_no_s4_hw_signature();
|
|
+#endif
|
|
+ if (strncmp(str, "old_ordering", 12) == 0)
|
|
+ acpi_old_suspend_ordering();
|
|
str = strchr(str, ',');
|
|
if (str != NULL)
|
|
str += strspn(str, ", \t");
|
|
--- head-2011-03-11.orig/arch/x86/kernel/amd_nb.c 2011-03-15 16:45:55.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/amd_nb.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -15,6 +15,10 @@ static u32 *flush_words;
|
|
struct pci_device_id amd_nb_misc_ids[] = {
|
|
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
|
|
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
|
|
+#ifdef CONFIG_XEN
|
|
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_11H_NB_MISC) },
|
|
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) }, /* Fam12, Fam14 */
|
|
+#endif
|
|
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_MISC) },
|
|
{}
|
|
};
|
|
--- head-2011-03-11.orig/arch/x86/kernel/apic/apic-xen.c 2011-02-24 15:45:13.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/apic/apic-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -59,7 +59,10 @@ static cpumask_t timer_bcast_ipi;
|
|
/*
|
|
* Debug level, exported for io_apic.c
|
|
*/
|
|
-int apic_verbosity;
|
|
+unsigned int apic_verbosity;
|
|
+
|
|
+/* Have we found an MP table */
|
|
+int smp_found_config;
|
|
|
|
#ifndef CONFIG_XEN
|
|
static int modern_apic(void)
|
|
--- head-2011-03-11.orig/arch/x86/kernel/asm-offsets_64.c 2010-01-19 16:00:48.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/asm-offsets_64.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -132,7 +132,7 @@ int main(void)
|
|
|
|
BLANK();
|
|
DEFINE(PAGE_SIZE_asm, PAGE_SIZE);
|
|
-#ifdef CONFIG_XEN
|
|
+#ifdef CONFIG_PARAVIRT_XEN
|
|
BLANK();
|
|
OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask);
|
|
OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending);
|
|
--- head-2011-03-11.orig/arch/x86/kernel/cpu/amd.c 2011-03-15 16:45:55.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/cpu/amd.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -575,6 +575,7 @@ static void __cpuinit init_amd(struct cp
|
|
fam10h_check_enable_mmcfg();
|
|
}
|
|
|
|
+#ifndef CONFIG_XEN
|
|
if (c == &boot_cpu_data && c->x86 >= 0xf) {
|
|
unsigned long long tseg;
|
|
|
|
@@ -594,6 +595,7 @@ static void __cpuinit init_amd(struct cp
|
|
}
|
|
}
|
|
#endif
|
|
+#endif
|
|
}
|
|
|
|
#ifdef CONFIG_X86_32
|
|
--- head-2011-03-11.orig/arch/x86/kernel/cpu/bugs_64.c 2011-03-15 16:45:55.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/cpu/bugs_64.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -20,6 +20,7 @@ void __init check_bugs(void)
|
|
#endif
|
|
alternative_instructions();
|
|
|
|
+#ifndef CONFIG_XEN
|
|
/*
|
|
* Make sure the first 2MB area is not mapped by huge pages
|
|
* There are typically fixed size MTRRs in there and overlapping
|
|
@@ -30,4 +31,5 @@ void __init check_bugs(void)
|
|
*/
|
|
if (!direct_gbpages)
|
|
set_memory_4k((unsigned long)__va(0), 1);
|
|
+#endif
|
|
}
|
|
--- head-2011-03-11.orig/arch/x86/kernel/cpu/common-xen.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/cpu/common-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -13,6 +13,7 @@
|
|
#include <asm/mtrr.h>
|
|
#include <asm/mce.h>
|
|
#include <asm/pat.h>
|
|
+#include <asm/asm.h>
|
|
#ifdef CONFIG_X86_LOCAL_APIC
|
|
#include <asm/mpspec.h>
|
|
#include <asm/apic.h>
|
|
@@ -341,11 +342,24 @@ static void __init early_cpu_detect(void
|
|
|
|
get_cpu_vendor(c, 1);
|
|
|
|
+ early_get_cap(c);
|
|
+
|
|
if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
|
|
cpu_devs[c->x86_vendor]->c_early_init)
|
|
cpu_devs[c->x86_vendor]->c_early_init(c);
|
|
+}
|
|
|
|
- early_get_cap(c);
|
|
+/*
|
|
+ * The NOPL instruction is supposed to exist on all CPUs with
|
|
+ * family >= 6; unfortunately, that's not true in practice because
|
|
+ * of early VIA chips and (more importantly) broken virtualizers that
|
|
+ * are not easy to detect. In the latter case it doesn't even *fail*
|
|
+ * reliably, so probing for it doesn't even work. Disable it completely
|
|
+ * unless we can find a reliable way to detect all the broken cases.
|
|
+ */
|
|
+static void __cpuinit detect_nopl(struct cpuinfo_x86 *c)
|
|
+{
|
|
+ clear_cpu_cap(c, X86_FEATURE_NOPL);
|
|
}
|
|
|
|
static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
|
|
@@ -404,8 +418,8 @@ static void __cpuinit generic_identify(s
|
|
}
|
|
|
|
init_scattered_cpuid_features(c);
|
|
+ detect_nopl(c);
|
|
}
|
|
-
|
|
}
|
|
|
|
static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
|
|
@@ -436,7 +450,7 @@ __setup("serialnumber", x86_serial_nr_se
|
|
/*
|
|
* This does the hard work of actually picking apart the CPU stuff...
|
|
*/
|
|
-void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
|
|
+static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
|
|
{
|
|
int i;
|
|
|
|
@@ -452,6 +466,8 @@ void __cpuinit identify_cpu(struct cpuin
|
|
#endif
|
|
c->x86_clflush_size = 32;
|
|
memset(&c->x86_capability, 0, sizeof c->x86_capability);
|
|
+ if (boot_cpu_has(X86_FEATURE_SYSCALL32))
|
|
+ set_cpu_cap(c, X86_FEATURE_SYSCALL32);
|
|
|
|
if (!have_cpuid_p()) {
|
|
/*
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ head-2011-03-11/arch/x86/kernel/cpu/common_64-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -0,0 +1,777 @@
|
|
+#include <linux/init.h>
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/sched.h>
|
|
+#include <linux/string.h>
|
|
+#include <linux/bootmem.h>
|
|
+#include <linux/bitops.h>
|
|
+#include <linux/module.h>
|
|
+#include <linux/kgdb.h>
|
|
+#include <linux/topology.h>
|
|
+#include <linux/delay.h>
|
|
+#include <linux/smp.h>
|
|
+#include <linux/percpu.h>
|
|
+#include <asm/i387.h>
|
|
+#include <asm/msr.h>
|
|
+#include <asm/io.h>
|
|
+#include <asm/linkage.h>
|
|
+#include <asm/mmu_context.h>
|
|
+#include <asm/mtrr.h>
|
|
+#include <asm/mce.h>
|
|
+#include <asm/pat.h>
|
|
+#include <asm/asm.h>
|
|
+#include <asm/numa.h>
|
|
+#ifdef CONFIG_X86_LOCAL_APIC
|
|
+#include <asm/mpspec.h>
|
|
+#include <asm/apic.h>
|
|
+#include <mach_apic.h>
|
|
+#elif defined(CONFIG_XEN)
|
|
+#include <mach_apic.h>
|
|
+#endif
|
|
+#include <asm/pda.h>
|
|
+#include <asm/pgtable.h>
|
|
+#include <asm/processor.h>
|
|
+#include <asm/desc.h>
|
|
+#include <asm/atomic.h>
|
|
+#include <asm/proto.h>
|
|
+#include <asm/sections.h>
|
|
+#include <asm/setup.h>
|
|
+#include <asm/genapic.h>
|
|
+
|
|
+#include "cpu.h"
|
|
+
|
|
+/* We need valid kernel segments for data and code in long mode too
|
|
+ * IRET will check the segment types kkeil 2000/10/28
|
|
+ * Also sysret mandates a special GDT layout
|
|
+ */
|
|
+/* The TLS descriptors are currently at a different place compared to i386.
|
|
+ Hopefully nobody expects them at a fixed place (Wine?) */
|
|
+DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
|
|
+ [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
|
|
+ [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
|
|
+ [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
|
|
+ [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
|
|
+ [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
|
|
+ [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
|
|
+} };
|
|
+EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
|
|
+
|
|
+__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
|
|
+
|
|
+/* Current gdt points %fs at the "master" per-cpu area: after this,
|
|
+ * it's on the real one. */
|
|
+void switch_to_new_gdt(void)
|
|
+{
|
|
+#ifndef CONFIG_XEN
|
|
+ struct desc_ptr gdt_descr;
|
|
+
|
|
+ gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
|
|
+ gdt_descr.size = GDT_SIZE - 1;
|
|
+ load_gdt(&gdt_descr);
|
|
+#else
|
|
+ void *va, *gdt_addr = get_cpu_gdt_table(smp_processor_id());
|
|
+ unsigned long frames[16];
|
|
+ unsigned int f = 0;
|
|
+
|
|
+ for (va = gdt_addr; va < gdt_addr + GDT_SIZE; va += PAGE_SIZE) {
|
|
+ frames[f++] = virt_to_mfn(va);
|
|
+ make_page_readonly(va, XENFEAT_writable_descriptor_tables);
|
|
+ }
|
|
+ if (HYPERVISOR_set_gdt(frames, GDT_SIZE / sizeof(struct desc_struct)))
|
|
+ BUG();
|
|
+#endif
|
|
+}
|
|
+
|
|
+struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
|
|
+
|
|
+static void __cpuinit default_init(struct cpuinfo_x86 *c)
|
|
+{
|
|
+ display_cacheinfo(c);
|
|
+}
|
|
+
|
|
+static struct cpu_dev __cpuinitdata default_cpu = {
|
|
+ .c_init = default_init,
|
|
+ .c_vendor = "Unknown",
|
|
+};
|
|
+static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu;
|
|
+
|
|
+int __cpuinit get_model_name(struct cpuinfo_x86 *c)
|
|
+{
|
|
+ unsigned int *v;
|
|
+
|
|
+ if (c->extended_cpuid_level < 0x80000004)
|
|
+ return 0;
|
|
+
|
|
+ v = (unsigned int *) c->x86_model_id;
|
|
+ cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
|
|
+ cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
|
|
+ cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
|
|
+ c->x86_model_id[48] = 0;
|
|
+ return 1;
|
|
+}
|
|
+
|
|
+
|
|
+void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
|
|
+{
|
|
+ unsigned int n, dummy, ebx, ecx, edx;
|
|
+
|
|
+ n = c->extended_cpuid_level;
|
|
+
|
|
+ if (n >= 0x80000005) {
|
|
+ cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
|
|
+ printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), "
|
|
+ "D cache %dK (%d bytes/line)\n",
|
|
+ edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
|
|
+ c->x86_cache_size = (ecx>>24) + (edx>>24);
|
|
+ /* On K8 L1 TLB is inclusive, so don't count it */
|
|
+ c->x86_tlbsize = 0;
|
|
+ }
|
|
+
|
|
+ if (n >= 0x80000006) {
|
|
+ cpuid(0x80000006, &dummy, &ebx, &ecx, &edx);
|
|
+ ecx = cpuid_ecx(0x80000006);
|
|
+ c->x86_cache_size = ecx >> 16;
|
|
+ c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff);
|
|
+
|
|
+ printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
|
|
+ c->x86_cache_size, ecx & 0xFF);
|
|
+ }
|
|
+}
|
|
+
|
|
+void __cpuinit detect_ht(struct cpuinfo_x86 *c)
|
|
+{
|
|
+#if defined(CONFIG_SMP) && !defined(CONFIG_XEN)
|
|
+ u32 eax, ebx, ecx, edx;
|
|
+ int index_msb, core_bits;
|
|
+
|
|
+ cpuid(1, &eax, &ebx, &ecx, &edx);
|
|
+
|
|
+
|
|
+ if (!cpu_has(c, X86_FEATURE_HT))
|
|
+ return;
|
|
+ if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
|
|
+ goto out;
|
|
+
|
|
+ smp_num_siblings = (ebx & 0xff0000) >> 16;
|
|
+
|
|
+ if (smp_num_siblings == 1) {
|
|
+ printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
|
|
+ } else if (smp_num_siblings > 1) {
|
|
+
|
|
+ if (smp_num_siblings > NR_CPUS) {
|
|
+ printk(KERN_WARNING "CPU: Unsupported number of "
|
|
+ "siblings %d", smp_num_siblings);
|
|
+ smp_num_siblings = 1;
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ index_msb = get_count_order(smp_num_siblings);
|
|
+ c->phys_proc_id = phys_pkg_id(index_msb);
|
|
+
|
|
+ smp_num_siblings = smp_num_siblings / c->x86_max_cores;
|
|
+
|
|
+ index_msb = get_count_order(smp_num_siblings);
|
|
+
|
|
+ core_bits = get_count_order(c->x86_max_cores);
|
|
+
|
|
+ c->cpu_core_id = phys_pkg_id(index_msb) &
|
|
+ ((1 << core_bits) - 1);
|
|
+ }
|
|
+out:
|
|
+ if ((c->x86_max_cores * smp_num_siblings) > 1) {
|
|
+ printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
|
|
+ c->phys_proc_id);
|
|
+ printk(KERN_INFO "CPU: Processor Core ID: %d\n",
|
|
+ c->cpu_core_id);
|
|
+ }
|
|
+
|
|
+#endif
|
|
+}
|
|
+
|
|
+static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
|
|
+{
|
|
+ char *v = c->x86_vendor_id;
|
|
+ int i;
|
|
+ static int printed;
|
|
+
|
|
+ for (i = 0; i < X86_VENDOR_NUM; i++) {
|
|
+ if (cpu_devs[i]) {
|
|
+ if (!strcmp(v, cpu_devs[i]->c_ident[0]) ||
|
|
+ (cpu_devs[i]->c_ident[1] &&
|
|
+ !strcmp(v, cpu_devs[i]->c_ident[1]))) {
|
|
+ c->x86_vendor = i;
|
|
+ this_cpu = cpu_devs[i];
|
|
+ return;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ if (!printed) {
|
|
+ printed++;
|
|
+ printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n");
|
|
+ printk(KERN_ERR "CPU: Your system may be unstable.\n");
|
|
+ }
|
|
+ c->x86_vendor = X86_VENDOR_UNKNOWN;
|
|
+}
|
|
+
|
|
+static void __init early_cpu_support_print(void)
|
|
+{
|
|
+ int i,j;
|
|
+ struct cpu_dev *cpu_devx;
|
|
+
|
|
+ printk("KERNEL supported cpus:\n");
|
|
+ for (i = 0; i < X86_VENDOR_NUM; i++) {
|
|
+ cpu_devx = cpu_devs[i];
|
|
+ if (!cpu_devx)
|
|
+ continue;
|
|
+ for (j = 0; j < 2; j++) {
|
|
+ if (!cpu_devx->c_ident[j])
|
|
+ continue;
|
|
+ printk(" %s %s\n", cpu_devx->c_vendor,
|
|
+ cpu_devx->c_ident[j]);
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+/*
|
|
+ * The NOPL instruction is supposed to exist on all CPUs with
|
|
+ * family >= 6, unfortunately, that's not true in practice because
|
|
+ * of early VIA chips and (more importantly) broken virtualizers that
|
|
+ * are not easy to detect. Hence, probe for it based on first
|
|
+ * principles.
|
|
+ *
|
|
+ * Note: no 64-bit chip is known to lack these, but put the code here
|
|
+ * for consistency with 32 bits, and to make it utterly trivial to
|
|
+ * diagnose the problem should it ever surface.
|
|
+ */
|
|
+static void __cpuinit detect_nopl(struct cpuinfo_x86 *c)
|
|
+{
|
|
+ const u32 nopl_signature = 0x888c53b1; /* Random number */
|
|
+ u32 has_nopl = nopl_signature;
|
|
+
|
|
+ clear_cpu_cap(c, X86_FEATURE_NOPL);
|
|
+ if (c->x86 >= 6) {
|
|
+ asm volatile("\n"
|
|
+ "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */
|
|
+ "2:\n"
|
|
+ " .section .fixup,\"ax\"\n"
|
|
+ "3: xor %0,%0\n"
|
|
+ " jmp 2b\n"
|
|
+ " .previous\n"
|
|
+ _ASM_EXTABLE(1b,3b)
|
|
+ : "+a" (has_nopl));
|
|
+
|
|
+ if (has_nopl == nopl_signature)
|
|
+ set_cpu_cap(c, X86_FEATURE_NOPL);
|
|
+ }
|
|
+}
|
|
+
|
|
+static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c);
|
|
+
|
|
+void __init early_cpu_init(void)
|
|
+{
|
|
+ struct cpu_vendor_dev *cvdev;
|
|
+
|
|
+ for (cvdev = __x86cpuvendor_start ;
|
|
+ cvdev < __x86cpuvendor_end ;
|
|
+ cvdev++)
|
|
+ cpu_devs[cvdev->vendor] = cvdev->cpu_dev;
|
|
+ early_cpu_support_print();
|
|
+ early_identify_cpu(&boot_cpu_data);
|
|
+}
|
|
+
|
|
+/* Do some early cpuid on the boot CPU to get some parameter that are
|
|
+ needed before check_bugs. Everything advanced is in identify_cpu
|
|
+ below. */
|
|
+static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
|
|
+{
|
|
+ u32 tfms, xlvl;
|
|
+
|
|
+ c->loops_per_jiffy = loops_per_jiffy;
|
|
+ c->x86_cache_size = -1;
|
|
+ c->x86_vendor = X86_VENDOR_UNKNOWN;
|
|
+ c->x86_model = c->x86_mask = 0; /* So far unknown... */
|
|
+ c->x86_vendor_id[0] = '\0'; /* Unset */
|
|
+ c->x86_model_id[0] = '\0'; /* Unset */
|
|
+ c->x86_clflush_size = 64;
|
|
+ c->x86_cache_alignment = c->x86_clflush_size;
|
|
+#ifndef CONFIG_XEN
|
|
+ c->x86_max_cores = 1;
|
|
+ c->x86_coreid_bits = 0;
|
|
+#endif
|
|
+ c->extended_cpuid_level = 0;
|
|
+ memset(&c->x86_capability, 0, sizeof c->x86_capability);
|
|
+
|
|
+ /* Get vendor name */
|
|
+ cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
|
|
+ (unsigned int *)&c->x86_vendor_id[0],
|
|
+ (unsigned int *)&c->x86_vendor_id[8],
|
|
+ (unsigned int *)&c->x86_vendor_id[4]);
|
|
+
|
|
+ get_cpu_vendor(c);
|
|
+
|
|
+ /* Initialize the standard set of capabilities */
|
|
+ /* Note that the vendor-specific code below might override */
|
|
+
|
|
+ /* Intel-defined flags: level 0x00000001 */
|
|
+ if (c->cpuid_level >= 0x00000001) {
|
|
+ __u32 misc;
|
|
+ cpuid(0x00000001, &tfms, &misc, &c->x86_capability[4],
|
|
+ &c->x86_capability[0]);
|
|
+ c->x86 = (tfms >> 8) & 0xf;
|
|
+ c->x86_model = (tfms >> 4) & 0xf;
|
|
+ c->x86_mask = tfms & 0xf;
|
|
+ if (c->x86 == 0xf)
|
|
+ c->x86 += (tfms >> 20) & 0xff;
|
|
+ if (c->x86 >= 0x6)
|
|
+ c->x86_model += ((tfms >> 16) & 0xF) << 4;
|
|
+ if (test_cpu_cap(c, X86_FEATURE_CLFLSH))
|
|
+ c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
|
|
+ } else {
|
|
+ /* Have CPUID level 0 only - unheard of */
|
|
+ c->x86 = 4;
|
|
+ }
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+ c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff;
|
|
+#ifdef CONFIG_SMP
|
|
+ c->phys_proc_id = c->initial_apicid;
|
|
+#endif
|
|
+#endif
|
|
+ /* AMD-defined flags: level 0x80000001 */
|
|
+ xlvl = cpuid_eax(0x80000000);
|
|
+ c->extended_cpuid_level = xlvl;
|
|
+ if ((xlvl & 0xffff0000) == 0x80000000) {
|
|
+ if (xlvl >= 0x80000001) {
|
|
+ c->x86_capability[1] = cpuid_edx(0x80000001);
|
|
+ c->x86_capability[6] = cpuid_ecx(0x80000001);
|
|
+ }
|
|
+ if (xlvl >= 0x80000004)
|
|
+ get_model_name(c); /* Default name */
|
|
+ }
|
|
+
|
|
+ /* Transmeta-defined flags: level 0x80860001 */
|
|
+ xlvl = cpuid_eax(0x80860000);
|
|
+ if ((xlvl & 0xffff0000) == 0x80860000) {
|
|
+ /* Don't set x86_cpuid_level here for now to not confuse. */
|
|
+ if (xlvl >= 0x80860001)
|
|
+ c->x86_capability[2] = cpuid_edx(0x80860001);
|
|
+ }
|
|
+
|
|
+ if (c->extended_cpuid_level >= 0x80000007)
|
|
+ c->x86_power = cpuid_edx(0x80000007);
|
|
+
|
|
+ if (c->extended_cpuid_level >= 0x80000008) {
|
|
+ u32 eax = cpuid_eax(0x80000008);
|
|
+
|
|
+ c->x86_virt_bits = (eax >> 8) & 0xff;
|
|
+ c->x86_phys_bits = eax & 0xff;
|
|
+ }
|
|
+
|
|
+ detect_nopl(c);
|
|
+
|
|
+ if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
|
|
+ cpu_devs[c->x86_vendor]->c_early_init)
|
|
+ cpu_devs[c->x86_vendor]->c_early_init(c);
|
|
+
|
|
+ validate_pat_support(c);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * This does the hard work of actually picking apart the CPU stuff...
|
|
+ */
|
|
+static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
|
|
+{
|
|
+ int i;
|
|
+
|
|
+ early_identify_cpu(c);
|
|
+
|
|
+ init_scattered_cpuid_features(c);
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+ c->apicid = phys_pkg_id(0);
|
|
+#endif
|
|
+
|
|
+ /*
|
|
+ * Vendor-specific initialization. In this section we
|
|
+ * canonicalize the feature flags, meaning if there are
|
|
+ * features a certain CPU supports which CPUID doesn't
|
|
+ * tell us, CPUID claiming incorrect flags, or other bugs,
|
|
+ * we handle them here.
|
|
+ *
|
|
+ * At the end of this section, c->x86_capability better
|
|
+ * indicate the features this CPU genuinely supports!
|
|
+ */
|
|
+ if (this_cpu->c_init)
|
|
+ this_cpu->c_init(c);
|
|
+
|
|
+ detect_ht(c);
|
|
+
|
|
+ /*
|
|
+ * On SMP, boot_cpu_data holds the common feature set between
|
|
+ * all CPUs; so make sure that we indicate which features are
|
|
+ * common between the CPUs. The first time this routine gets
|
|
+ * executed, c == &boot_cpu_data.
|
|
+ */
|
|
+ if (c != &boot_cpu_data) {
|
|
+ /* AND the already accumulated flags with these */
|
|
+ for (i = 0; i < NCAPINTS; i++)
|
|
+ boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
|
|
+ }
|
|
+
|
|
+ /* Clear all flags overriden by options */
|
|
+ for (i = 0; i < NCAPINTS; i++)
|
|
+ c->x86_capability[i] &= ~cleared_cpu_caps[i];
|
|
+
|
|
+#ifdef CONFIG_X86_MCE
|
|
+ mcheck_init(c);
|
|
+#endif
|
|
+ select_idle_routine(c);
|
|
+
|
|
+#ifdef CONFIG_NUMA
|
|
+ numa_add_cpu(smp_processor_id());
|
|
+#endif
|
|
+
|
|
+}
|
|
+
|
|
+void __cpuinit identify_boot_cpu(void)
|
|
+{
|
|
+ identify_cpu(&boot_cpu_data);
|
|
+}
|
|
+
|
|
+void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
|
|
+{
|
|
+ BUG_ON(c == &boot_cpu_data);
|
|
+ identify_cpu(c);
|
|
+ mtrr_ap_init();
|
|
+}
|
|
+
|
|
+static __init int setup_noclflush(char *arg)
|
|
+{
|
|
+ setup_clear_cpu_cap(X86_FEATURE_CLFLSH);
|
|
+ return 1;
|
|
+}
|
|
+__setup("noclflush", setup_noclflush);
|
|
+
|
|
+void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
|
|
+{
|
|
+ if (c->x86_model_id[0])
|
|
+ printk(KERN_CONT "%s", c->x86_model_id);
|
|
+
|
|
+ if (c->x86_mask || c->cpuid_level >= 0)
|
|
+ printk(KERN_CONT " stepping %02x\n", c->x86_mask);
|
|
+ else
|
|
+ printk(KERN_CONT "\n");
|
|
+}
|
|
+
|
|
+static __init int setup_disablecpuid(char *arg)
|
|
+{
|
|
+ int bit;
|
|
+ if (get_option(&arg, &bit) && bit < NCAPINTS*32)
|
|
+ setup_clear_cpu_cap(bit);
|
|
+ else
|
|
+ return 0;
|
|
+ return 1;
|
|
+}
|
|
+__setup("clearcpuid=", setup_disablecpuid);
|
|
+
|
|
+cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
|
|
+
|
|
+struct x8664_pda **_cpu_pda __read_mostly;
|
|
+EXPORT_SYMBOL(_cpu_pda);
|
|
+
|
|
+#ifndef CONFIG_X86_NO_IDT
|
|
+struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
|
|
+#endif
|
|
+
|
|
+char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
|
|
+
|
|
+unsigned long __supported_pte_mask __read_mostly = ~0UL;
|
|
+EXPORT_SYMBOL_GPL(__supported_pte_mask);
|
|
+
|
|
+static int do_not_nx __cpuinitdata;
|
|
+
|
|
+/* noexec=on|off
|
|
+Control non executable mappings for 64bit processes.
|
|
+
|
|
+on Enable(default)
|
|
+off Disable
|
|
+*/
|
|
+static int __init nonx_setup(char *str)
|
|
+{
|
|
+ if (!str)
|
|
+ return -EINVAL;
|
|
+ if (!strncmp(str, "on", 2)) {
|
|
+ __supported_pte_mask |= _PAGE_NX;
|
|
+ do_not_nx = 0;
|
|
+ } else if (!strncmp(str, "off", 3)) {
|
|
+ do_not_nx = 1;
|
|
+ __supported_pte_mask &= ~_PAGE_NX;
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+early_param("noexec", nonx_setup);
|
|
+
|
|
+int force_personality32;
|
|
+
|
|
+/* noexec32=on|off
|
|
+Control non executable heap for 32bit processes.
|
|
+To control the stack too use noexec=off
|
|
+
|
|
+on PROT_READ does not imply PROT_EXEC for 32bit processes (default)
|
|
+off PROT_READ implies PROT_EXEC
|
|
+*/
|
|
+static int __init nonx32_setup(char *str)
|
|
+{
|
|
+ if (!strcmp(str, "on"))
|
|
+ force_personality32 &= ~READ_IMPLIES_EXEC;
|
|
+ else if (!strcmp(str, "off"))
|
|
+ force_personality32 |= READ_IMPLIES_EXEC;
|
|
+ return 1;
|
|
+}
|
|
+__setup("noexec32=", nonx32_setup);
|
|
+
|
|
+static void __init_refok switch_pt(int cpu)
|
|
+{
|
|
+#ifdef CONFIG_XEN
|
|
+ if (cpu == 0)
|
|
+ xen_init_pt();
|
|
+ xen_pt_switch(__pa_symbol(init_level4_pgt));
|
|
+ xen_new_user_pt(__pa_symbol(__user_pgd(init_level4_pgt)));
|
|
+#endif
|
|
+}
|
|
+
|
|
+void pda_init(int cpu)
|
|
+{
|
|
+ struct x8664_pda *pda = cpu_pda(cpu);
|
|
+
|
|
+ /* Setup up data that may be needed in __get_free_pages early */
|
|
+ loadsegment(fs, 0);
|
|
+ loadsegment(gs, 0);
|
|
+#ifndef CONFIG_XEN
|
|
+ /* Memory clobbers used to order PDA accessed */
|
|
+ mb();
|
|
+ wrmsrl(MSR_GS_BASE, pda);
|
|
+ mb();
|
|
+#else
|
|
+ if (HYPERVISOR_set_segment_base(SEGBASE_GS_KERNEL,
|
|
+ (unsigned long)pda))
|
|
+ BUG();
|
|
+#endif
|
|
+
|
|
+ pda->cpunumber = cpu;
|
|
+ pda->irqcount = -1;
|
|
+ pda->kernelstack = (unsigned long)stack_thread_info() -
|
|
+ PDA_STACKOFFSET + THREAD_SIZE;
|
|
+ pda->active_mm = &init_mm;
|
|
+ pda->mmu_state = 0;
|
|
+
|
|
+ if (cpu == 0) {
|
|
+ /* others are initialized in smpboot.c */
|
|
+ pda->pcurrent = &init_task;
|
|
+ pda->irqstackptr = boot_cpu_stack;
|
|
+ pda->irqstackptr += IRQSTACKSIZE - 64;
|
|
+ } else {
|
|
+ if (!pda->irqstackptr) {
|
|
+ pda->irqstackptr = (char *)
|
|
+ __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
|
|
+ if (!pda->irqstackptr)
|
|
+ panic("cannot allocate irqstack for cpu %d",
|
|
+ cpu);
|
|
+ pda->irqstackptr += IRQSTACKSIZE - 64;
|
|
+ }
|
|
+
|
|
+ if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
|
|
+ pda->nodenumber = cpu_to_node(cpu);
|
|
+ }
|
|
+
|
|
+ switch_pt(cpu);
|
|
+}
|
|
+
|
|
+#ifndef CONFIG_X86_NO_TSS
|
|
+char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
|
|
+ DEBUG_STKSZ] __page_aligned_bss;
|
|
+#endif
|
|
+
|
|
+extern asmlinkage void ignore_sysret(void);
|
|
+
|
|
+void __cpuinit syscall_init(void)
|
|
+{
|
|
+#ifndef CONFIG_XEN
|
|
+ /*
|
|
+ * LSTAR and STAR live in a bit strange symbiosis.
|
|
+ * They both write to the same internal register. STAR allows to
|
|
+ * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip.
|
|
+ */
|
|
+ wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32);
|
|
+ wrmsrl(MSR_LSTAR, system_call);
|
|
+ wrmsrl(MSR_CSTAR, ignore_sysret);
|
|
+
|
|
+ /* Flags to clear on syscall */
|
|
+ wrmsrl(MSR_SYSCALL_MASK,
|
|
+ X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL);
|
|
+#endif
|
|
+#ifdef CONFIG_IA32_EMULATION
|
|
+ syscall32_cpu_init();
|
|
+#else
|
|
+ static const struct callback_register __cpuinitconst cstar = {
|
|
+ .type = CALLBACKTYPE_syscall32,
|
|
+ .address = (unsigned long)ignore_sysret
|
|
+ };
|
|
+
|
|
+ if (HYPERVISOR_callback_op(CALLBACKOP_register, &cstar))
|
|
+ printk(KERN_WARNING "Unable to register CSTAR callback\n");
|
|
+#endif
|
|
+}
|
|
+
|
|
+void __cpuinit check_efer(void)
|
|
+{
|
|
+ unsigned long efer;
|
|
+
|
|
+ rdmsrl(MSR_EFER, efer);
|
|
+ if (!(efer & EFER_NX) || do_not_nx)
|
|
+ __supported_pte_mask &= ~_PAGE_NX;
|
|
+}
|
|
+
|
|
+unsigned long kernel_eflags;
|
|
+
|
|
+#ifndef CONFIG_X86_NO_TSS
|
|
+/*
|
|
+ * Copies of the original ist values from the tss are only accessed during
|
|
+ * debugging, no special alignment required.
|
|
+ */
|
|
+DEFINE_PER_CPU(struct orig_ist, orig_ist);
|
|
+#endif
|
|
+
|
|
+/*
|
|
+ * cpu_init() initializes state that is per-CPU. Some data is already
|
|
+ * initialized (naturally) in the bootstrap process, such as the GDT
|
|
+ * and IDT. We reload them nevertheless, this function acts as a
|
|
+ * 'CPU state barrier', nothing should get across.
|
|
+ * A lot of state is already set up in PDA init.
|
|
+ */
|
|
+void __cpuinit cpu_init(void)
|
|
+{
|
|
+ int cpu = stack_smp_processor_id();
|
|
+#ifndef CONFIG_X86_NO_TSS
|
|
+ struct tss_struct *t = &per_cpu(init_tss, cpu);
|
|
+ struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
|
|
+ unsigned long v;
|
|
+ char *estacks = NULL;
|
|
+ int i;
|
|
+#endif
|
|
+ struct task_struct *me;
|
|
+
|
|
+ /* CPU 0 is initialised in head64.c */
|
|
+ if (cpu != 0)
|
|
+ pda_init(cpu);
|
|
+#ifndef CONFIG_X86_NO_TSS
|
|
+ else
|
|
+ estacks = boot_exception_stacks;
|
|
+#endif
|
|
+
|
|
+ me = current;
|
|
+
|
|
+ if (cpu_test_and_set(cpu, cpu_initialized))
|
|
+ panic("CPU#%d already initialized!\n", cpu);
|
|
+
|
|
+ printk(KERN_INFO "Initializing CPU#%d\n", cpu);
|
|
+
|
|
+ clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
|
|
+
|
|
+ /*
|
|
+ * Initialize the per-CPU GDT with the boot GDT,
|
|
+ * and set up the GDT descriptor:
|
|
+ */
|
|
+
|
|
+ switch_to_new_gdt();
|
|
+#ifndef CONFIG_X86_NO_IDT
|
|
+ load_idt((const struct desc_ptr *)&idt_descr);
|
|
+#endif
|
|
+
|
|
+ memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
|
|
+ syscall_init();
|
|
+
|
|
+ wrmsrl(MSR_FS_BASE, 0);
|
|
+ wrmsrl(MSR_KERNEL_GS_BASE, 0);
|
|
+ barrier();
|
|
+
|
|
+ check_efer();
|
|
+
|
|
+#ifndef CONFIG_X86_NO_TSS
|
|
+ /*
|
|
+ * set up and load the per-CPU TSS
|
|
+ */
|
|
+ if (!orig_ist->ist[0]) {
|
|
+ static const unsigned int order[N_EXCEPTION_STACKS] = {
|
|
+ [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
|
|
+ [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
|
|
+ };
|
|
+ for (v = 0; v < N_EXCEPTION_STACKS; v++) {
|
|
+ if (cpu) {
|
|
+ estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
|
|
+ if (!estacks)
|
|
+ panic("Cannot allocate exception "
|
|
+ "stack %ld %d\n", v, cpu);
|
|
+ }
|
|
+ estacks += PAGE_SIZE << order[v];
|
|
+ orig_ist->ist[v] = t->x86_tss.ist[v] =
|
|
+ (unsigned long)estacks;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
|
|
+ /*
|
|
+ * <= is required because the CPU will access up to
|
|
+ * 8 bits beyond the end of the IO permission bitmap.
|
|
+ */
|
|
+ for (i = 0; i <= IO_BITMAP_LONGS; i++)
|
|
+ t->io_bitmap[i] = ~0UL;
|
|
+#endif
|
|
+
|
|
+ atomic_inc(&init_mm.mm_count);
|
|
+ me->active_mm = &init_mm;
|
|
+ if (me->mm)
|
|
+ BUG();
|
|
+ enter_lazy_tlb(&init_mm, me);
|
|
+
|
|
+ load_sp0(t, ¤t->thread);
|
|
+#ifndef CONFIG_X86_NO_TSS
|
|
+ set_tss_desc(cpu, t);
|
|
+ load_TR_desc();
|
|
+#endif
|
|
+ load_LDT(&init_mm.context);
|
|
+
|
|
+#ifdef CONFIG_KGDB
|
|
+ /*
|
|
+ * If the kgdb is connected no debug regs should be altered. This
|
|
+ * is only applicable when KGDB and a KGDB I/O module are built
|
|
+ * into the kernel and you are using early debugging with
|
|
+ * kgdbwait. KGDB will control the kernel HW breakpoint registers.
|
|
+ */
|
|
+ if (kgdb_connected && arch_kgdb_ops.correct_hw_break)
|
|
+ arch_kgdb_ops.correct_hw_break();
|
|
+ else {
|
|
+#endif
|
|
+ /*
|
|
+ * Clear all 6 debug registers:
|
|
+ */
|
|
+
|
|
+ set_debugreg(0UL, 0);
|
|
+ set_debugreg(0UL, 1);
|
|
+ set_debugreg(0UL, 2);
|
|
+ set_debugreg(0UL, 3);
|
|
+ set_debugreg(0UL, 6);
|
|
+ set_debugreg(0UL, 7);
|
|
+#ifdef CONFIG_KGDB
|
|
+ /* If the kgdb is connected no debug regs should be altered. */
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ fpu_init();
|
|
+
|
|
+ asm ("pushfq; popq %0" : "=rm" (kernel_eflags));
|
|
+ if (raw_irqs_disabled())
|
|
+ kernel_eflags &= ~X86_EFLAGS_IF;
|
|
+
|
|
+ if (is_uv_system())
|
|
+ uv_cpu_init();
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ head-2011-03-11/arch/x86/kernel/e820-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -0,0 +1,1553 @@
|
|
+/*
|
|
+ * Handle the memory map.
|
|
+ * The functions here do the job until bootmem takes over.
|
|
+ *
|
|
+ * Getting sanitize_e820_map() in sync with i386 version by applying change:
|
|
+ * - Provisions for empty E820 memory regions (reported by certain BIOSes).
|
|
+ * Alex Achenbach <xela@slit.de>, December 2002.
|
|
+ * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
|
|
+ *
|
|
+ */
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/types.h>
|
|
+#include <linux/init.h>
|
|
+#include <linux/bootmem.h>
|
|
+#include <linux/ioport.h>
|
|
+#include <linux/string.h>
|
|
+#include <linux/kexec.h>
|
|
+#include <linux/module.h>
|
|
+#include <linux/mm.h>
|
|
+#include <linux/pfn.h>
|
|
+#include <linux/suspend.h>
|
|
+#include <linux/firmware-map.h>
|
|
+
|
|
+#include <asm/pgtable.h>
|
|
+#include <asm/page.h>
|
|
+#include <asm/e820.h>
|
|
+#include <asm/proto.h>
|
|
+#include <asm/setup.h>
|
|
+#include <xen/interface/memory.h>
|
|
+
|
|
+/*
|
|
+ * The e820 map is the map that gets modified e.g. with command line parameters
|
|
+ * and that is also registered with modifications in the kernel resource tree
|
|
+ * with the iomem_resource as parent.
|
|
+ *
|
|
+ * The e820_saved is directly saved after the BIOS-provided memory map is
|
|
+ * copied. It doesn't get modified afterwards. It's registered for the
|
|
+ * /sys/firmware/memmap interface.
|
|
+ *
|
|
+ * That memory map is not modified and is used as base for kexec. The kexec'd
|
|
+ * kernel should get the same memory map as the firmware provides. Then the
|
|
+ * user can e.g. boot the original kernel with mem=1G while still booting the
|
|
+ * next kernel with full memory.
|
|
+ */
|
|
+struct e820map e820;
|
|
+#ifndef CONFIG_XEN
|
|
+struct e820map e820_saved;
|
|
+#else
|
|
+static struct e820map machine_e820;
|
|
+#define e820_saved machine_e820
|
|
+#endif
|
|
+
|
|
+/* For PCI or other memory-mapped resources */
|
|
+unsigned long pci_mem_start = 0xaeedbabe;
|
|
+#ifdef CONFIG_PCI
|
|
+EXPORT_SYMBOL(pci_mem_start);
|
|
+#endif
|
|
+
|
|
+/*
|
|
+ * This function checks if any part of the range <start,end> is mapped
|
|
+ * with type.
|
|
+ */
|
|
+int
|
|
+e820_any_mapped(u64 start, u64 end, unsigned type)
|
|
+{
|
|
+ int i;
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+ for (i = 0; i < e820.nr_map; i++) {
|
|
+ struct e820entry *ei = &e820.map[i];
|
|
+#else
|
|
+ if (!is_initial_xendomain())
|
|
+ return 0;
|
|
+ for (i = 0; i < machine_e820.nr_map; ++i) {
|
|
+ const struct e820entry *ei = &machine_e820.map[i];
|
|
+#endif
|
|
+
|
|
+ if (type && ei->type != type)
|
|
+ continue;
|
|
+ if (ei->addr >= end || ei->addr + ei->size <= start)
|
|
+ continue;
|
|
+ return 1;
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(e820_any_mapped);
|
|
+
|
|
+/*
|
|
+ * This function checks if the entire range <start,end> is mapped with type.
|
|
+ *
|
|
+ * Note: this function only works correct if the e820 table is sorted and
|
|
+ * not-overlapping, which is the case
|
|
+ */
|
|
+int __init e820_all_mapped(u64 start, u64 end, unsigned type)
|
|
+{
|
|
+ int i;
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+ for (i = 0; i < e820.nr_map; i++) {
|
|
+ struct e820entry *ei = &e820.map[i];
|
|
+#else
|
|
+ if (!is_initial_xendomain())
|
|
+ return 0;
|
|
+ for (i = 0; i < machine_e820.nr_map; ++i) {
|
|
+ const struct e820entry *ei = &machine_e820.map[i];
|
|
+#endif
|
|
+
|
|
+ if (type && ei->type != type)
|
|
+ continue;
|
|
+ /* is the region (part) in overlap with the current region ?*/
|
|
+ if (ei->addr >= end || ei->addr + ei->size <= start)
|
|
+ continue;
|
|
+
|
|
+ /* if the region is at the beginning of <start,end> we move
|
|
+ * start to the end of the region since it's ok until there
|
|
+ */
|
|
+ if (ei->addr <= start)
|
|
+ start = ei->addr + ei->size;
|
|
+ /*
|
|
+ * if start is now at or beyond end, we're done, full
|
|
+ * coverage
|
|
+ */
|
|
+ if (start >= end)
|
|
+ return 1;
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Add a memory region to the kernel e820 map.
|
|
+ */
|
|
+void __init e820_add_region(u64 start, u64 size, int type)
|
|
+{
|
|
+ int x = e820.nr_map;
|
|
+
|
|
+ if (x == ARRAY_SIZE(e820.map)) {
|
|
+ printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ e820.map[x].addr = start;
|
|
+ e820.map[x].size = size;
|
|
+ e820.map[x].type = type;
|
|
+ e820.nr_map++;
|
|
+}
|
|
+
|
|
+static void __init _e820_print_map(const struct e820map *e820, const char *who)
|
|
+{
|
|
+ int i;
|
|
+
|
|
+ for (i = 0; i < e820->nr_map; i++) {
|
|
+ printk(KERN_INFO " %s: %016Lx - %016Lx ", who,
|
|
+ (unsigned long long) e820->map[i].addr,
|
|
+ (unsigned long long)
|
|
+ (e820->map[i].addr + e820->map[i].size));
|
|
+ switch (e820->map[i].type) {
|
|
+ case E820_RAM:
|
|
+ case E820_RESERVED_KERN:
|
|
+ printk(KERN_CONT "(usable)\n");
|
|
+ break;
|
|
+ case E820_RESERVED:
|
|
+ printk(KERN_CONT "(reserved)\n");
|
|
+ break;
|
|
+ case E820_ACPI:
|
|
+ printk(KERN_CONT "(ACPI data)\n");
|
|
+ break;
|
|
+ case E820_NVS:
|
|
+ printk(KERN_CONT "(ACPI NVS)\n");
|
|
+ break;
|
|
+ default:
|
|
+ printk(KERN_CONT "type %u\n", e820->map[i].type);
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Sanitize the BIOS e820 map.
|
|
+ *
|
|
+ * Some e820 responses include overlapping entries. The following
|
|
+ * replaces the original e820 map with a new one, removing overlaps,
|
|
+ * and resolving conflicting memory types in favor of highest
|
|
+ * numbered type.
|
|
+ *
|
|
+ * The input parameter biosmap points to an array of 'struct
|
|
+ * e820entry' which on entry has elements in the range [0, *pnr_map)
|
|
+ * valid, and which has space for up to max_nr_map entries.
|
|
+ * On return, the resulting sanitized e820 map entries will be in
|
|
+ * overwritten in the same location, starting at biosmap.
|
|
+ *
|
|
+ * The integer pointed to by pnr_map must be valid on entry (the
|
|
+ * current number of valid entries located at biosmap) and will
|
|
+ * be updated on return, with the new number of valid entries
|
|
+ * (something no more than max_nr_map.)
|
|
+ *
|
|
+ * The return value from sanitize_e820_map() is zero if it
|
|
+ * successfully 'sanitized' the map entries passed in, and is -1
|
|
+ * if it did nothing, which can happen if either of (1) it was
|
|
+ * only passed one map entry, or (2) any of the input map entries
|
|
+ * were invalid (start + size < start, meaning that the size was
|
|
+ * so big the described memory range wrapped around through zero.)
|
|
+ *
|
|
+ * Visually we're performing the following
|
|
+ * (1,2,3,4 = memory types)...
|
|
+ *
|
|
+ * Sample memory map (w/overlaps):
|
|
+ * ____22__________________
|
|
+ * ______________________4_
|
|
+ * ____1111________________
|
|
+ * _44_____________________
|
|
+ * 11111111________________
|
|
+ * ____________________33__
|
|
+ * ___________44___________
|
|
+ * __________33333_________
|
|
+ * ______________22________
|
|
+ * ___________________2222_
|
|
+ * _________111111111______
|
|
+ * _____________________11_
|
|
+ * _________________4______
|
|
+ *
|
|
+ * Sanitized equivalent (no overlap):
|
|
+ * 1_______________________
|
|
+ * _44_____________________
|
|
+ * ___1____________________
|
|
+ * ____22__________________
|
|
+ * ______11________________
|
|
+ * _________1______________
|
|
+ * __________3_____________
|
|
+ * ___________44___________
|
|
+ * _____________33_________
|
|
+ * _______________2________
|
|
+ * ________________1_______
|
|
+ * _________________4______
|
|
+ * ___________________2____
|
|
+ * ____________________33__
|
|
+ * ______________________4_
|
|
+ */
|
|
+
|
|
+int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map,
|
|
+ int *pnr_map)
|
|
+{
|
|
+ struct change_member {
|
|
+ struct e820entry *pbios; /* pointer to original bios entry */
|
|
+ unsigned long long addr; /* address for this change point */
|
|
+ };
|
|
+ static struct change_member change_point_list[2*E820_X_MAX] __initdata;
|
|
+ static struct change_member *change_point[2*E820_X_MAX] __initdata;
|
|
+ static struct e820entry *overlap_list[E820_X_MAX] __initdata;
|
|
+ static struct e820entry new_bios[E820_X_MAX] __initdata;
|
|
+ struct change_member *change_tmp;
|
|
+ unsigned long current_type, last_type;
|
|
+ unsigned long long last_addr;
|
|
+ int chgidx, still_changing;
|
|
+ int overlap_entries;
|
|
+ int new_bios_entry;
|
|
+ int old_nr, new_nr, chg_nr;
|
|
+ int i;
|
|
+
|
|
+ /* if there's only one memory region, don't bother */
|
|
+#ifdef CONFIG_XEN
|
|
+ if (*pnr_map == 1)
|
|
+ return 0;
|
|
+#endif
|
|
+ if (*pnr_map < 2)
|
|
+ return -1;
|
|
+
|
|
+ old_nr = *pnr_map;
|
|
+ BUG_ON(old_nr > max_nr_map);
|
|
+
|
|
+ /* bail out if we find any unreasonable addresses in bios map */
|
|
+ for (i = 0; i < old_nr; i++)
|
|
+ if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
|
|
+ return -1;
|
|
+
|
|
+ /* create pointers for initial change-point information (for sorting) */
|
|
+ for (i = 0; i < 2 * old_nr; i++)
|
|
+ change_point[i] = &change_point_list[i];
|
|
+
|
|
+ /* record all known change-points (starting and ending addresses),
|
|
+ omitting those that are for empty memory regions */
|
|
+ chgidx = 0;
|
|
+ for (i = 0; i < old_nr; i++) {
|
|
+ if (biosmap[i].size != 0) {
|
|
+ change_point[chgidx]->addr = biosmap[i].addr;
|
|
+ change_point[chgidx++]->pbios = &biosmap[i];
|
|
+ change_point[chgidx]->addr = biosmap[i].addr +
|
|
+ biosmap[i].size;
|
|
+ change_point[chgidx++]->pbios = &biosmap[i];
|
|
+ }
|
|
+ }
|
|
+ chg_nr = chgidx;
|
|
+
|
|
+ /* sort change-point list by memory addresses (low -> high) */
|
|
+ still_changing = 1;
|
|
+ while (still_changing) {
|
|
+ still_changing = 0;
|
|
+ for (i = 1; i < chg_nr; i++) {
|
|
+ unsigned long long curaddr, lastaddr;
|
|
+ unsigned long long curpbaddr, lastpbaddr;
|
|
+
|
|
+ curaddr = change_point[i]->addr;
|
|
+ lastaddr = change_point[i - 1]->addr;
|
|
+ curpbaddr = change_point[i]->pbios->addr;
|
|
+ lastpbaddr = change_point[i - 1]->pbios->addr;
|
|
+
|
|
+ /*
|
|
+ * swap entries, when:
|
|
+ *
|
|
+ * curaddr > lastaddr or
|
|
+ * curaddr == lastaddr and curaddr == curpbaddr and
|
|
+ * lastaddr != lastpbaddr
|
|
+ */
|
|
+ if (curaddr < lastaddr ||
|
|
+ (curaddr == lastaddr && curaddr == curpbaddr &&
|
|
+ lastaddr != lastpbaddr)) {
|
|
+ change_tmp = change_point[i];
|
|
+ change_point[i] = change_point[i-1];
|
|
+ change_point[i-1] = change_tmp;
|
|
+ still_changing = 1;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* create a new bios memory map, removing overlaps */
|
|
+ overlap_entries = 0; /* number of entries in the overlap table */
|
|
+ new_bios_entry = 0; /* index for creating new bios map entries */
|
|
+ last_type = 0; /* start with undefined memory type */
|
|
+ last_addr = 0; /* start with 0 as last starting address */
|
|
+
|
|
+ /* loop through change-points, determining affect on the new bios map */
|
|
+ for (chgidx = 0; chgidx < chg_nr; chgidx++) {
|
|
+ /* keep track of all overlapping bios entries */
|
|
+ if (change_point[chgidx]->addr ==
|
|
+ change_point[chgidx]->pbios->addr) {
|
|
+ /*
|
|
+ * add map entry to overlap list (> 1 entry
|
|
+ * implies an overlap)
|
|
+ */
|
|
+ overlap_list[overlap_entries++] =
|
|
+ change_point[chgidx]->pbios;
|
|
+ } else {
|
|
+ /*
|
|
+ * remove entry from list (order independent,
|
|
+ * so swap with last)
|
|
+ */
|
|
+ for (i = 0; i < overlap_entries; i++) {
|
|
+ if (overlap_list[i] ==
|
|
+ change_point[chgidx]->pbios)
|
|
+ overlap_list[i] =
|
|
+ overlap_list[overlap_entries-1];
|
|
+ }
|
|
+ overlap_entries--;
|
|
+ }
|
|
+ /*
|
|
+ * if there are overlapping entries, decide which
|
|
+ * "type" to use (larger value takes precedence --
|
|
+ * 1=usable, 2,3,4,4+=unusable)
|
|
+ */
|
|
+ current_type = 0;
|
|
+ for (i = 0; i < overlap_entries; i++)
|
|
+ if (overlap_list[i]->type > current_type)
|
|
+ current_type = overlap_list[i]->type;
|
|
+ /*
|
|
+ * continue building up new bios map based on this
|
|
+ * information
|
|
+ */
|
|
+ if (current_type != last_type) {
|
|
+ if (last_type != 0) {
|
|
+ new_bios[new_bios_entry].size =
|
|
+ change_point[chgidx]->addr - last_addr;
|
|
+ /*
|
|
+ * move forward only if the new size
|
|
+ * was non-zero
|
|
+ */
|
|
+ if (new_bios[new_bios_entry].size != 0)
|
|
+ /*
|
|
+ * no more space left for new
|
|
+ * bios entries ?
|
|
+ */
|
|
+ if (++new_bios_entry >= max_nr_map)
|
|
+ break;
|
|
+ }
|
|
+ if (current_type != 0) {
|
|
+ new_bios[new_bios_entry].addr =
|
|
+ change_point[chgidx]->addr;
|
|
+ new_bios[new_bios_entry].type = current_type;
|
|
+ last_addr = change_point[chgidx]->addr;
|
|
+ }
|
|
+ last_type = current_type;
|
|
+ }
|
|
+ }
|
|
+ /* retain count for new bios entries */
|
|
+ new_nr = new_bios_entry;
|
|
+
|
|
+ /* copy new bios mapping into original location */
|
|
+ memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry));
|
|
+ *pnr_map = new_nr;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int __init __append_e820_map(struct e820entry *biosmap, int nr_map)
|
|
+{
|
|
+ while (nr_map) {
|
|
+ u64 start = biosmap->addr;
|
|
+ u64 size = biosmap->size;
|
|
+ u64 end = start + size;
|
|
+ u32 type = biosmap->type;
|
|
+
|
|
+ /* Overflow in 64 bits? Ignore the memory map. */
|
|
+ if (start > end)
|
|
+ return -1;
|
|
+
|
|
+ e820_add_region(start, size, type);
|
|
+
|
|
+ biosmap++;
|
|
+ nr_map--;
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Copy the BIOS e820 map into a safe place.
|
|
+ *
|
|
+ * Sanity-check it while we're at it..
|
|
+ *
|
|
+ * If we're lucky and live on a modern system, the setup code
|
|
+ * will have given us a memory map that we can use to properly
|
|
+ * set up memory. If we aren't, we'll fake a memory map.
|
|
+ */
|
|
+static int __init append_e820_map(struct e820entry *biosmap, int nr_map)
|
|
+{
|
|
+#ifndef CONFIG_XEN
|
|
+ /* Only one memory region (or negative)? Ignore it */
|
|
+ if (nr_map < 2)
|
|
+ return -1;
|
|
+#else
|
|
+ BUG_ON(nr_map < 1);
|
|
+#endif
|
|
+
|
|
+ return __append_e820_map(biosmap, nr_map);
|
|
+}
|
|
+
|
|
+static u64 __init e820_update_range_map(struct e820map *e820x, u64 start,
|
|
+ u64 size, unsigned old_type,
|
|
+ unsigned new_type)
|
|
+{
|
|
+ unsigned int i, x;
|
|
+ u64 real_updated_size = 0;
|
|
+
|
|
+ BUG_ON(old_type == new_type);
|
|
+
|
|
+ if (size > (ULLONG_MAX - start))
|
|
+ size = ULLONG_MAX - start;
|
|
+
|
|
+ for (i = 0; i < e820x->nr_map; i++) {
|
|
+ struct e820entry *ei = &e820x->map[i];
|
|
+ u64 final_start, final_end;
|
|
+ if (ei->type != old_type)
|
|
+ continue;
|
|
+ /* totally covered? */
|
|
+ if (ei->addr >= start &&
|
|
+ (ei->addr + ei->size) <= (start + size)) {
|
|
+ ei->type = new_type;
|
|
+ real_updated_size += ei->size;
|
|
+ continue;
|
|
+ }
|
|
+ /* partially covered */
|
|
+ final_start = max(start, ei->addr);
|
|
+ final_end = min(start + size, ei->addr + ei->size);
|
|
+ if (final_start >= final_end)
|
|
+ continue;
|
|
+
|
|
+ x = e820x->nr_map;
|
|
+ if (x == ARRAY_SIZE(e820x->map)) {
|
|
+ printk(KERN_ERR "Too many memory map entries!\n");
|
|
+ break;
|
|
+ }
|
|
+ e820x->map[x].addr = final_start;
|
|
+ e820x->map[x].size = final_end - final_start;
|
|
+ e820x->map[x].type = new_type;
|
|
+ e820x->nr_map++;
|
|
+
|
|
+ real_updated_size += final_end - final_start;
|
|
+
|
|
+ if (ei->addr < final_start)
|
|
+ continue;
|
|
+ ei->addr = final_end;
|
|
+ ei->size -= final_end - final_start;
|
|
+ }
|
|
+ return real_updated_size;
|
|
+}
|
|
+
|
|
+u64 __init e820_update_range(u64 start, u64 size, unsigned old_type,
|
|
+ unsigned new_type)
|
|
+{
|
|
+ return e820_update_range_map(&e820, start, size, old_type, new_type);
|
|
+}
|
|
+
|
|
+static u64 __init e820_update_range_saved(u64 start, u64 size,
|
|
+ unsigned old_type, unsigned new_type)
|
|
+{
|
|
+#ifdef CONFIG_XEN
|
|
+ if (is_initial_xendomain())
|
|
+ return e820_update_range_map(&machine_e820,
|
|
+ phys_to_machine(start), size,
|
|
+ old_type, new_type);
|
|
+#endif
|
|
+ return e820_update_range_map(&e820_saved, start, size, old_type,
|
|
+ new_type);
|
|
+}
|
|
+
|
|
+/* make e820 not cover the range */
|
|
+u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type,
|
|
+ int checktype)
|
|
+{
|
|
+ int i;
|
|
+ u64 real_removed_size = 0;
|
|
+
|
|
+ if (size > (ULLONG_MAX - start))
|
|
+ size = ULLONG_MAX - start;
|
|
+
|
|
+ for (i = 0; i < e820.nr_map; i++) {
|
|
+ struct e820entry *ei = &e820.map[i];
|
|
+ u64 final_start, final_end;
|
|
+
|
|
+ if (checktype && ei->type != old_type)
|
|
+ continue;
|
|
+ /* totally covered? */
|
|
+ if (ei->addr >= start &&
|
|
+ (ei->addr + ei->size) <= (start + size)) {
|
|
+ real_removed_size += ei->size;
|
|
+ memset(ei, 0, sizeof(struct e820entry));
|
|
+ continue;
|
|
+ }
|
|
+ /* partially covered */
|
|
+ final_start = max(start, ei->addr);
|
|
+ final_end = min(start + size, ei->addr + ei->size);
|
|
+ if (final_start >= final_end)
|
|
+ continue;
|
|
+ real_removed_size += final_end - final_start;
|
|
+
|
|
+ ei->size -= final_end - final_start;
|
|
+ if (ei->addr < final_start)
|
|
+ continue;
|
|
+ ei->addr = final_end;
|
|
+ }
|
|
+ return real_removed_size;
|
|
+}
|
|
+
|
|
+void __init update_e820(void)
|
|
+{
|
|
+ int nr_map;
|
|
+
|
|
+ nr_map = e820.nr_map;
|
|
+ if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map))
|
|
+ return;
|
|
+ e820.nr_map = nr_map;
|
|
+ printk(KERN_INFO "modified physical RAM map:\n");
|
|
+ _e820_print_map(&e820, "modified");
|
|
+}
|
|
+static void __init update_e820_saved(void)
|
|
+{
|
|
+ int nr_map;
|
|
+
|
|
+ nr_map = e820_saved.nr_map;
|
|
+ if (sanitize_e820_map(e820_saved.map, ARRAY_SIZE(e820_saved.map), &nr_map))
|
|
+ return;
|
|
+ e820_saved.nr_map = nr_map;
|
|
+}
|
|
+
|
|
+#ifdef CONFIG_XEN
|
|
+#define e820 machine_e820
|
|
+#endif
|
|
+
|
|
+#define MAX_GAP_END 0x100000000ull
|
|
+/*
|
|
+ * Search for a gap in the e820 memory space from start_addr to end_addr.
|
|
+ */
|
|
+__init int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize,
|
|
+ unsigned long start_addr, unsigned long long end_addr)
|
|
+{
|
|
+ unsigned long long last;
|
|
+ int i = e820.nr_map;
|
|
+ int found = 0;
|
|
+
|
|
+ last = (end_addr && end_addr < MAX_GAP_END) ? end_addr : MAX_GAP_END;
|
|
+#ifdef CONFIG_X86_64
|
|
+ if (start_addr >= MAX_GAP_END)
|
|
+ last = end_addr ?: (1UL << boot_cpu_data.x86_phys_bits);
|
|
+#endif
|
|
+
|
|
+ while (--i >= 0) {
|
|
+ unsigned long long start = e820.map[i].addr;
|
|
+ unsigned long long end = start + e820.map[i].size;
|
|
+
|
|
+ if (end < start_addr)
|
|
+ continue;
|
|
+
|
|
+ /*
|
|
+ * Since "last" is at most 4GB, we know we'll
|
|
+ * fit in 32 bits if this condition is true
|
|
+ */
|
|
+ if (last > end) {
|
|
+ unsigned long gap = last - end;
|
|
+
|
|
+ if (gap >= *gapsize) {
|
|
+ *gapsize = gap;
|
|
+ *gapstart = end;
|
|
+ found = 1;
|
|
+ }
|
|
+ }
|
|
+ if (start < last)
|
|
+ last = start;
|
|
+ }
|
|
+ return found;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Search for the biggest gap in the low 32 bits of the e820
|
|
+ * memory space. We pass this space to PCI to assign MMIO resources
|
|
+ * for hotplug or unconfigured devices in.
|
|
+ * Hopefully the BIOS let enough space left.
|
|
+ */
|
|
+__init void e820_setup_gap(void)
|
|
+{
|
|
+ unsigned long gapstart, gapsize, round;
|
|
+ int found;
|
|
+
|
|
+ gapstart = 0x10000000;
|
|
+ gapsize = 0x400000;
|
|
+ found = e820_search_gap(&gapstart, &gapsize, 0, MAX_GAP_END);
|
|
+
|
|
+#ifdef CONFIG_X86_64
|
|
+ if (!found) {
|
|
+ printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit "
|
|
+ "address range\n"
|
|
+ KERN_ERR "PCI: Unassigned devices with 32bit resource "
|
|
+ "registers may break!\n");
|
|
+ found = e820_search_gap(&gapstart, &gapsize, MAX_GAP_END, 0);
|
|
+ WARN_ON(!found);
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ /*
|
|
+ * See how much we want to round up: start off with
|
|
+ * rounding to the next 1MB area.
|
|
+ */
|
|
+ round = 0x100000;
|
|
+ while ((gapsize >> 4) > round)
|
|
+ round += round;
|
|
+ /* Fun with two's complement */
|
|
+ pci_mem_start = (gapstart + round) & -round;
|
|
+
|
|
+ printk(KERN_INFO
|
|
+ "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n",
|
|
+ pci_mem_start, gapstart, gapsize);
|
|
+}
|
|
+
|
|
+#undef e820
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+/**
|
|
+ * Because of the size limitation of struct boot_params, only first
|
|
+ * 128 E820 memory entries are passed to kernel via
|
|
+ * boot_params.e820_map, others are passed via SETUP_E820_EXT node of
|
|
+ * linked list of struct setup_data, which is parsed here.
|
|
+ */
|
|
+void __init parse_e820_ext(struct setup_data *sdata, unsigned long pa_data)
|
|
+{
|
|
+ u32 map_len;
|
|
+ int entries;
|
|
+ struct e820entry *extmap;
|
|
+
|
|
+ entries = sdata->len / sizeof(struct e820entry);
|
|
+ map_len = sdata->len + sizeof(struct setup_data);
|
|
+ if (map_len > PAGE_SIZE)
|
|
+ sdata = early_ioremap(pa_data, map_len);
|
|
+ extmap = (struct e820entry *)(sdata->data);
|
|
+ __append_e820_map(extmap, entries);
|
|
+ sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
|
|
+ if (map_len > PAGE_SIZE)
|
|
+ early_iounmap(sdata, map_len);
|
|
+ printk(KERN_INFO "extended physical RAM map:\n");
|
|
+ _e820_print_map(&e820, "extended");
|
|
+}
|
|
+
|
|
+#if defined(CONFIG_X86_64) || \
|
|
+ (defined(CONFIG_X86_32) && defined(CONFIG_HIBERNATION))
|
|
+/**
|
|
+ * Find the ranges of physical addresses that do not correspond to
|
|
+ * e820 RAM areas and mark the corresponding pages as nosave for
|
|
+ * hibernation (32 bit) or software suspend and suspend to RAM (64 bit).
|
|
+ *
|
|
+ * This function requires the e820 map to be sorted and without any
|
|
+ * overlapping entries and assumes the first e820 area to be RAM.
|
|
+ */
|
|
+void __init e820_mark_nosave_regions(unsigned long limit_pfn)
|
|
+{
|
|
+ int i;
|
|
+ unsigned long pfn;
|
|
+
|
|
+ pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size);
|
|
+ for (i = 1; i < e820.nr_map; i++) {
|
|
+ struct e820entry *ei = &e820.map[i];
|
|
+
|
|
+ if (pfn < PFN_UP(ei->addr))
|
|
+ register_nosave_region(pfn, PFN_UP(ei->addr));
|
|
+
|
|
+ pfn = PFN_DOWN(ei->addr + ei->size);
|
|
+ if (ei->type != E820_RAM && ei->type != E820_RESERVED_KERN)
|
|
+ register_nosave_region(PFN_UP(ei->addr), pfn);
|
|
+
|
|
+ if (pfn >= limit_pfn)
|
|
+ break;
|
|
+ }
|
|
+}
|
|
+#endif
|
|
+#endif
|
|
+
|
|
+/*
|
|
+ * Early reserved memory areas.
|
|
+ */
|
|
+#define MAX_EARLY_RES 20
|
|
+
|
|
+struct early_res {
|
|
+ u64 start, end;
|
|
+ char name[16];
|
|
+ char overlap_ok;
|
|
+};
|
|
+static struct early_res early_res[MAX_EARLY_RES] __initdata = {
|
|
+#ifndef CONFIG_XEN
|
|
+ { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */
|
|
+#if defined(CONFIG_X86_64) && defined(CONFIG_X86_TRAMPOLINE)
|
|
+ { TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" },
|
|
+#endif
|
|
+#if defined(CONFIG_X86_32) && defined(CONFIG_SMP)
|
|
+ /*
|
|
+ * But first pinch a few for the stack/trampoline stuff
|
|
+ * FIXME: Don't need the extra page at 4K, but need to fix
|
|
+ * trampoline before removing it. (see the GDT stuff)
|
|
+ */
|
|
+ { PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE" },
|
|
+ /*
|
|
+ * Has to be in very low memory so we can execute
|
|
+ * real-mode AP code.
|
|
+ */
|
|
+ { TRAMPOLINE_BASE, TRAMPOLINE_BASE + PAGE_SIZE, "TRAMPOLINE" },
|
|
+#endif
|
|
+#endif
|
|
+ {}
|
|
+};
|
|
+
|
|
+static int __init find_overlapped_early(u64 start, u64 end)
|
|
+{
|
|
+ int i;
|
|
+ struct early_res *r;
|
|
+
|
|
+ for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
|
|
+ r = &early_res[i];
|
|
+ if (end > r->start && start < r->end)
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ return i;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Drop the i-th range from the early reservation map,
|
|
+ * by copying any higher ranges down one over it, and
|
|
+ * clearing what had been the last slot.
|
|
+ */
|
|
+static void __init drop_range(int i)
|
|
+{
|
|
+ int j;
|
|
+
|
|
+ for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++)
|
|
+ ;
|
|
+
|
|
+ memmove(&early_res[i], &early_res[i + 1],
|
|
+ (j - 1 - i) * sizeof(struct early_res));
|
|
+
|
|
+ early_res[j - 1].end = 0;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Split any existing ranges that:
|
|
+ * 1) are marked 'overlap_ok', and
|
|
+ * 2) overlap with the stated range [start, end)
|
|
+ * into whatever portion (if any) of the existing range is entirely
|
|
+ * below or entirely above the stated range. Drop the portion
|
|
+ * of the existing range that overlaps with the stated range,
|
|
+ * which will allow the caller of this routine to then add that
|
|
+ * stated range without conflicting with any existing range.
|
|
+ */
|
|
+static void __init drop_overlaps_that_are_ok(u64 start, u64 end)
|
|
+{
|
|
+ int i;
|
|
+ struct early_res *r;
|
|
+ u64 lower_start, lower_end;
|
|
+ u64 upper_start, upper_end;
|
|
+ char name[16];
|
|
+
|
|
+ for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
|
|
+ r = &early_res[i];
|
|
+
|
|
+ /* Continue past non-overlapping ranges */
|
|
+ if (end <= r->start || start >= r->end)
|
|
+ continue;
|
|
+
|
|
+ /*
|
|
+ * Leave non-ok overlaps as is; let caller
|
|
+ * panic "Overlapping early reservations"
|
|
+ * when it hits this overlap.
|
|
+ */
|
|
+ if (!r->overlap_ok)
|
|
+ return;
|
|
+
|
|
+ /*
|
|
+ * We have an ok overlap. We will drop it from the early
|
|
+ * reservation map, and add back in any non-overlapping
|
|
+ * portions (lower or upper) as separate, overlap_ok,
|
|
+ * non-overlapping ranges.
|
|
+ */
|
|
+
|
|
+ /* 1. Note any non-overlapping (lower or upper) ranges. */
|
|
+ strncpy(name, r->name, sizeof(name) - 1);
|
|
+
|
|
+ lower_start = lower_end = 0;
|
|
+ upper_start = upper_end = 0;
|
|
+ if (r->start < start) {
|
|
+ lower_start = r->start;
|
|
+ lower_end = start;
|
|
+ }
|
|
+ if (r->end > end) {
|
|
+ upper_start = end;
|
|
+ upper_end = r->end;
|
|
+ }
|
|
+
|
|
+ /* 2. Drop the original ok overlapping range */
|
|
+ drop_range(i);
|
|
+
|
|
+ i--; /* resume for-loop on copied down entry */
|
|
+
|
|
+ /* 3. Add back in any non-overlapping ranges. */
|
|
+ if (lower_end)
|
|
+ reserve_early_overlap_ok(lower_start, lower_end, name);
|
|
+ if (upper_end)
|
|
+ reserve_early_overlap_ok(upper_start, upper_end, name);
|
|
+ }
|
|
+}
|
|
+
|
|
+static void __init __reserve_early(u64 start, u64 end, char *name,
|
|
+ int overlap_ok)
|
|
+{
|
|
+ int i;
|
|
+ struct early_res *r;
|
|
+
|
|
+ i = find_overlapped_early(start, end);
|
|
+ if (i >= MAX_EARLY_RES)
|
|
+ panic("Too many early reservations");
|
|
+ r = &early_res[i];
|
|
+ if (r->end)
|
|
+ panic("Overlapping early reservations "
|
|
+ "%llx-%llx %s to %llx-%llx %s\n",
|
|
+ start, end - 1, name?name:"", r->start,
|
|
+ r->end - 1, r->name);
|
|
+ r->start = start;
|
|
+ r->end = end;
|
|
+ r->overlap_ok = overlap_ok;
|
|
+ if (name)
|
|
+ strncpy(r->name, name, sizeof(r->name) - 1);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * A few early reservtations come here.
|
|
+ *
|
|
+ * The 'overlap_ok' in the name of this routine does -not- mean it
|
|
+ * is ok for these reservations to overlap an earlier reservation.
|
|
+ * Rather it means that it is ok for subsequent reservations to
|
|
+ * overlap this one.
|
|
+ *
|
|
+ * Use this entry point to reserve early ranges when you are doing
|
|
+ * so out of "Paranoia", reserving perhaps more memory than you need,
|
|
+ * just in case, and don't mind a subsequent overlapping reservation
|
|
+ * that is known to be needed.
|
|
+ *
|
|
+ * The drop_overlaps_that_are_ok() call here isn't really needed.
|
|
+ * It would be needed if we had two colliding 'overlap_ok'
|
|
+ * reservations, so that the second such would not panic on the
|
|
+ * overlap with the first. We don't have any such as of this
|
|
+ * writing, but might as well tolerate such if it happens in
|
|
+ * the future.
|
|
+ */
|
|
+void __init reserve_early_overlap_ok(u64 start, u64 end, char *name)
|
|
+{
|
|
+ drop_overlaps_that_are_ok(start, end);
|
|
+ __reserve_early(start, end, name, 1);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Most early reservations come here.
|
|
+ *
|
|
+ * We first have drop_overlaps_that_are_ok() drop any pre-existing
|
|
+ * 'overlap_ok' ranges, so that we can then reserve this memory
|
|
+ * range without risk of panic'ing on an overlapping overlap_ok
|
|
+ * early reservation.
|
|
+ */
|
|
+void __init reserve_early(u64 start, u64 end, char *name)
|
|
+{
|
|
+ drop_overlaps_that_are_ok(start, end);
|
|
+ __reserve_early(start, end, name, 0);
|
|
+}
|
|
+
|
|
+void __init free_early(u64 start, u64 end)
|
|
+{
|
|
+ struct early_res *r;
|
|
+ int i;
|
|
+
|
|
+ i = find_overlapped_early(start, end);
|
|
+ r = &early_res[i];
|
|
+ if (i >= MAX_EARLY_RES || r->end != end || r->start != start)
|
|
+ panic("free_early on not reserved area: %llx-%llx!",
|
|
+ start, end - 1);
|
|
+
|
|
+ drop_range(i);
|
|
+}
|
|
+
|
|
+void __init early_res_to_bootmem(u64 start, u64 end)
|
|
+{
|
|
+ int i, count;
|
|
+ u64 final_start, final_end;
|
|
+
|
|
+ count = 0;
|
|
+ for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++)
|
|
+ count++;
|
|
+
|
|
+ printk(KERN_INFO "(%d early reservations) ==> bootmem [%010llx - %010llx]\n",
|
|
+ count, start, end);
|
|
+ for (i = 0; i < count; i++) {
|
|
+ struct early_res *r = &early_res[i];
|
|
+ printk(KERN_INFO " #%d [%010llx - %010llx] %16s", i,
|
|
+ r->start, r->end, r->name);
|
|
+ final_start = max(start, r->start);
|
|
+ final_end = min(end, r->end);
|
|
+ if (final_start >= final_end) {
|
|
+ printk(KERN_CONT "\n");
|
|
+ continue;
|
|
+ }
|
|
+ printk(KERN_CONT " ==> [%010llx - %010llx]\n",
|
|
+ final_start, final_end);
|
|
+ reserve_bootmem_generic(final_start, final_end - final_start,
|
|
+ BOOTMEM_DEFAULT);
|
|
+ }
|
|
+}
|
|
+
|
|
+/* Check for already reserved areas */
|
|
+static inline int __init bad_addr(u64 *addrp, u64 size, u64 align)
|
|
+{
|
|
+ int i;
|
|
+ u64 addr = *addrp;
|
|
+ int changed = 0;
|
|
+ struct early_res *r;
|
|
+again:
|
|
+ i = find_overlapped_early(addr, addr + size);
|
|
+ r = &early_res[i];
|
|
+ if (i < MAX_EARLY_RES && r->end) {
|
|
+ *addrp = addr = round_up(r->end, align);
|
|
+ changed = 1;
|
|
+ goto again;
|
|
+ }
|
|
+ return changed;
|
|
+}
|
|
+
|
|
+/* Check for already reserved areas */
|
|
+static inline int __init bad_addr_size(u64 *addrp, u64 *sizep, u64 align)
|
|
+{
|
|
+ int i;
|
|
+ u64 addr = *addrp, last;
|
|
+ u64 size = *sizep;
|
|
+ int changed = 0;
|
|
+again:
|
|
+ last = addr + size;
|
|
+ for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
|
|
+ struct early_res *r = &early_res[i];
|
|
+ if (last > r->start && addr < r->start) {
|
|
+ size = r->start - addr;
|
|
+ changed = 1;
|
|
+ goto again;
|
|
+ }
|
|
+ if (last > r->end && addr < r->end) {
|
|
+ addr = round_up(r->end, align);
|
|
+ size = last - addr;
|
|
+ changed = 1;
|
|
+ goto again;
|
|
+ }
|
|
+ if (last <= r->end && addr >= r->start) {
|
|
+ (*sizep)++;
|
|
+ return 0;
|
|
+ }
|
|
+ }
|
|
+ if (changed) {
|
|
+ *addrp = addr;
|
|
+ *sizep = size;
|
|
+ }
|
|
+ return changed;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Find a free area with specified alignment in a specific range.
|
|
+ */
|
|
+u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align)
|
|
+{
|
|
+ int i;
|
|
+
|
|
+ for (i = 0; i < e820.nr_map; i++) {
|
|
+ struct e820entry *ei = &e820.map[i];
|
|
+ u64 addr, last;
|
|
+ u64 ei_last;
|
|
+
|
|
+ if (ei->type != E820_RAM)
|
|
+ continue;
|
|
+ addr = round_up(ei->addr, align);
|
|
+ ei_last = ei->addr + ei->size;
|
|
+ if (addr < start)
|
|
+ addr = round_up(start, align);
|
|
+ if (addr >= ei_last)
|
|
+ continue;
|
|
+ while (bad_addr(&addr, size, align) && addr+size <= ei_last)
|
|
+ ;
|
|
+ last = addr + size;
|
|
+ if (last > ei_last)
|
|
+ continue;
|
|
+ if (last > end)
|
|
+ continue;
|
|
+ return addr;
|
|
+ }
|
|
+ return -1ULL;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Find next free range after *start
|
|
+ */
|
|
+u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align)
|
|
+{
|
|
+ int i;
|
|
+
|
|
+ for (i = 0; i < e820.nr_map; i++) {
|
|
+ struct e820entry *ei = &e820.map[i];
|
|
+ u64 addr, last;
|
|
+ u64 ei_last;
|
|
+
|
|
+ if (ei->type != E820_RAM)
|
|
+ continue;
|
|
+ addr = round_up(ei->addr, align);
|
|
+ ei_last = ei->addr + ei->size;
|
|
+ if (addr < start)
|
|
+ addr = round_up(start, align);
|
|
+ if (addr >= ei_last)
|
|
+ continue;
|
|
+ *sizep = ei_last - addr;
|
|
+ while (bad_addr_size(&addr, sizep, align) &&
|
|
+ addr + *sizep <= ei_last)
|
|
+ ;
|
|
+ last = addr + *sizep;
|
|
+ if (last > ei_last)
|
|
+ continue;
|
|
+ return addr;
|
|
+ }
|
|
+
|
|
+ return -1ULL;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * pre allocated 4k and reserved it in e820
|
|
+ */
|
|
+u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align)
|
|
+{
|
|
+ u64 size = 0;
|
|
+ u64 addr;
|
|
+ u64 start;
|
|
+#ifdef CONFIG_XEN
|
|
+ unsigned int order = get_order(sizet);
|
|
+
|
|
+ if (is_initial_xendomain()) {
|
|
+ sizet = PAGE_SIZE << order;
|
|
+ if (align < PAGE_SIZE)
|
|
+ align = PAGE_SIZE;
|
|
+ }
|
|
+#endif
|
|
+ for (start = startt; ; start += size) {
|
|
+ start = find_e820_area_size(start, &size, align);
|
|
+ if (!(start + 1))
|
|
+ return 0;
|
|
+ if (size >= sizet)
|
|
+ break;
|
|
+ }
|
|
+
|
|
+#ifdef CONFIG_X86_32
|
|
+ if (start >= MAXMEM)
|
|
+ return 0;
|
|
+ if (start + size > MAXMEM)
|
|
+ size = MAXMEM - start;
|
|
+#endif
|
|
+#ifdef CONFIG_XEN
|
|
+ if ((start >> PAGE_SHIFT) >= xen_start_info->nr_pages)
|
|
+ return 0;
|
|
+ if (PFN_UP(start + size) > xen_start_info->nr_pages)
|
|
+ size = ((u64)xen_start_info->nr_pages << PAGE_SHIFT) - start;
|
|
+#endif
|
|
+
|
|
+ addr = round_down(start + size - sizet, align);
|
|
+ if (addr < start)
|
|
+ return 0;
|
|
+#ifdef CONFIG_XEN
|
|
+ if (is_initial_xendomain()) {
|
|
+ int rc;
|
|
+ unsigned long max_initmap_pfn;
|
|
+
|
|
+ max_initmap_pfn = ALIGN(PFN_UP(__pa(xen_start_info->pt_base))
|
|
+ + xen_start_info->nr_pt_frames
|
|
+ + 1 + (1 << (19 - PAGE_SHIFT)),
|
|
+ 1UL << (22 - PAGE_SHIFT));
|
|
+#ifdef CONFIG_X86_32
|
|
+ if ((addr >> PAGE_SHIFT)
|
|
+ < max(max_initmap_pfn, max_pfn_mapped))
|
|
+ rc = xen_create_contiguous_region((unsigned long)
|
|
+ __va(addr),
|
|
+ order, 32);
|
|
+#else
|
|
+ if ((addr >> PAGE_SHIFT) < max_pfn_mapped)
|
|
+ rc = xen_create_contiguous_region((unsigned long)
|
|
+ __va(addr),
|
|
+ order, 32);
|
|
+ else if ((addr >> PAGE_SHIFT) < max_initmap_pfn)
|
|
+ rc = xen_create_contiguous_region(__START_KERNEL_map
|
|
+ + addr,
|
|
+ order, 32);
|
|
+#endif
|
|
+ else
|
|
+ rc = early_create_contiguous_region(addr >> PAGE_SHIFT,
|
|
+ order, 32);
|
|
+ if (rc)
|
|
+ return 0;
|
|
+ }
|
|
+#endif
|
|
+ e820_update_range(addr, sizet, E820_RAM, E820_RESERVED);
|
|
+ e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED);
|
|
+ printk(KERN_INFO "update e820 for early_reserve_e820\n");
|
|
+ update_e820();
|
|
+ update_e820_saved();
|
|
+
|
|
+ return addr;
|
|
+}
|
|
+
|
|
+#ifdef CONFIG_X86_32
|
|
+# ifdef CONFIG_X86_PAE
|
|
+# define MAX_ARCH_PFN (1ULL<<(40-PAGE_SHIFT))
|
|
+# else
|
|
+# define MAX_ARCH_PFN (1ULL<<(32-PAGE_SHIFT))
|
|
+# endif
|
|
+#else /* CONFIG_X86_32 */
|
|
+# define MAX_ARCH_PFN MAXMEM>>PAGE_SHIFT
|
|
+#endif
|
|
+
|
|
+/*
|
|
+ * Find the highest page frame number we have available
|
|
+ */
|
|
+static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
|
|
+{
|
|
+ int i;
|
|
+ unsigned long last_pfn = 0;
|
|
+ unsigned long max_arch_pfn = MAX_ARCH_PFN;
|
|
+
|
|
+ for (i = 0; i < e820.nr_map; i++) {
|
|
+ struct e820entry *ei = &e820.map[i];
|
|
+ unsigned long start_pfn;
|
|
+ unsigned long end_pfn;
|
|
+
|
|
+ if (ei->type != type)
|
|
+ continue;
|
|
+
|
|
+ start_pfn = ei->addr >> PAGE_SHIFT;
|
|
+ end_pfn = (ei->addr + ei->size) >> PAGE_SHIFT;
|
|
+
|
|
+ if (start_pfn >= limit_pfn)
|
|
+ continue;
|
|
+ if (end_pfn > limit_pfn) {
|
|
+ last_pfn = limit_pfn;
|
|
+ break;
|
|
+ }
|
|
+ if (end_pfn > last_pfn)
|
|
+ last_pfn = end_pfn;
|
|
+ }
|
|
+
|
|
+ if (last_pfn > max_arch_pfn)
|
|
+ last_pfn = max_arch_pfn;
|
|
+
|
|
+ printk(KERN_INFO "last_pfn = %#lx max_arch_pfn = %#lx\n",
|
|
+ last_pfn, max_arch_pfn);
|
|
+ return last_pfn;
|
|
+}
|
|
+unsigned long __init e820_end_of_ram_pfn(void)
|
|
+{
|
|
+ return e820_end_pfn(MAX_ARCH_PFN, E820_RAM);
|
|
+}
|
|
+
|
|
+unsigned long __init e820_end_of_low_ram_pfn(void)
|
|
+{
|
|
+ return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM);
|
|
+}
|
|
+/*
|
|
+ * Finds an active region in the address range from start_pfn to last_pfn and
|
|
+ * returns its range in ei_startpfn and ei_endpfn for the e820 entry.
|
|
+ */
|
|
+int __init e820_find_active_region(const struct e820entry *ei,
|
|
+ unsigned long start_pfn,
|
|
+ unsigned long last_pfn,
|
|
+ unsigned long *ei_startpfn,
|
|
+ unsigned long *ei_endpfn)
|
|
+{
|
|
+ u64 align = PAGE_SIZE;
|
|
+
|
|
+#ifdef CONFIG_XEN
|
|
+ if (last_pfn > xen_start_info->nr_pages)
|
|
+ last_pfn = xen_start_info->nr_pages;
|
|
+#endif
|
|
+
|
|
+ *ei_startpfn = round_up(ei->addr, align) >> PAGE_SHIFT;
|
|
+ *ei_endpfn = round_down(ei->addr + ei->size, align) >> PAGE_SHIFT;
|
|
+
|
|
+ /* Skip map entries smaller than a page */
|
|
+ if (*ei_startpfn >= *ei_endpfn)
|
|
+ return 0;
|
|
+
|
|
+ /* Skip if map is outside the node */
|
|
+ if (ei->type != E820_RAM || *ei_endpfn <= start_pfn ||
|
|
+ *ei_startpfn >= last_pfn)
|
|
+ return 0;
|
|
+
|
|
+ /* Check for overlaps */
|
|
+ if (*ei_startpfn < start_pfn)
|
|
+ *ei_startpfn = start_pfn;
|
|
+ if (*ei_endpfn > last_pfn)
|
|
+ *ei_endpfn = last_pfn;
|
|
+
|
|
+ return 1;
|
|
+}
|
|
+
|
|
+/* Walk the e820 map and register active regions within a node */
|
|
+void __init e820_register_active_regions(int nid, unsigned long start_pfn,
|
|
+ unsigned long last_pfn)
|
|
+{
|
|
+ unsigned long ei_startpfn;
|
|
+ unsigned long ei_endpfn;
|
|
+ int i;
|
|
+
|
|
+ for (i = 0; i < e820.nr_map; i++)
|
|
+ if (e820_find_active_region(&e820.map[i],
|
|
+ start_pfn, last_pfn,
|
|
+ &ei_startpfn, &ei_endpfn))
|
|
+ add_active_range(nid, ei_startpfn, ei_endpfn);
|
|
+#ifdef CONFIG_XEN
|
|
+ BUG_ON(nid);
|
|
+ add_active_range(nid, last_pfn, last_pfn);
|
|
+#endif
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Find the hole size (in bytes) in the memory range.
|
|
+ * @start: starting address of the memory range to scan
|
|
+ * @end: ending address of the memory range to scan
|
|
+ */
|
|
+u64 __init e820_hole_size(u64 start, u64 end)
|
|
+{
|
|
+ unsigned long start_pfn = start >> PAGE_SHIFT;
|
|
+ unsigned long last_pfn = end >> PAGE_SHIFT;
|
|
+ unsigned long ei_startpfn, ei_endpfn, ram = 0;
|
|
+ int i;
|
|
+
|
|
+ for (i = 0; i < e820.nr_map; i++) {
|
|
+ if (e820_find_active_region(&e820.map[i],
|
|
+ start_pfn, last_pfn,
|
|
+ &ei_startpfn, &ei_endpfn))
|
|
+ ram += ei_endpfn - ei_startpfn;
|
|
+ }
|
|
+ return end - start - ((u64)ram << PAGE_SHIFT);
|
|
+}
|
|
+
|
|
+static void early_panic(char *msg)
|
|
+{
|
|
+ early_printk(msg);
|
|
+ panic(msg);
|
|
+}
|
|
+
|
|
+static int userdef __initdata;
|
|
+
|
|
+/* "mem=nopentium" disables the 4MB page tables. */
|
|
+static int __init parse_memopt(char *p)
|
|
+{
|
|
+ u64 mem_size, current_end;
|
|
+ unsigned int i;
|
|
+
|
|
+ if (!p)
|
|
+ return -EINVAL;
|
|
+
|
|
+#ifdef CONFIG_X86_32
|
|
+ if (!strcmp(p, "nopentium")) {
|
|
+ setup_clear_cpu_cap(X86_FEATURE_PSE);
|
|
+ return 0;
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ userdef = 1;
|
|
+ mem_size = memparse(p, &p);
|
|
+ e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
|
|
+
|
|
+ i = e820.nr_map - 1;
|
|
+ current_end = e820.map[i].addr + e820.map[i].size;
|
|
+ if (current_end < mem_size) {
|
|
+ /*
|
|
+ * The e820 map ends before our requested size so
|
|
+ * extend the final entry to the requested address.
|
|
+ */
|
|
+ if (e820.map[i].type == E820_RAM)
|
|
+ e820.map[i].size = mem_size - e820.map[i].addr;
|
|
+ else
|
|
+ e820_add_region(current_end, mem_size - current_end, E820_RAM);
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+early_param("mem", parse_memopt);
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+static int __init parse_memmap_opt(char *p)
|
|
+{
|
|
+ char *oldp;
|
|
+ u64 start_at, mem_size;
|
|
+
|
|
+ if (!p)
|
|
+ return -EINVAL;
|
|
+
|
|
+ if (!strncmp(p, "exactmap", 8)) {
|
|
+#ifdef CONFIG_CRASH_DUMP
|
|
+ /*
|
|
+ * If we are doing a crash dump, we still need to know
|
|
+ * the real mem size before original memory map is
|
|
+ * reset.
|
|
+ */
|
|
+ saved_max_pfn = e820_end_of_ram_pfn();
|
|
+#endif
|
|
+ e820.nr_map = 0;
|
|
+ userdef = 1;
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ oldp = p;
|
|
+ mem_size = memparse(p, &p);
|
|
+ if (p == oldp)
|
|
+ return -EINVAL;
|
|
+
|
|
+ userdef = 1;
|
|
+ if (*p == '@') {
|
|
+ start_at = memparse(p+1, &p);
|
|
+ e820_add_region(start_at, mem_size, E820_RAM);
|
|
+ } else if (*p == '#') {
|
|
+ start_at = memparse(p+1, &p);
|
|
+ e820_add_region(start_at, mem_size, E820_ACPI);
|
|
+ } else if (*p == '$') {
|
|
+ start_at = memparse(p+1, &p);
|
|
+ e820_add_region(start_at, mem_size, E820_RESERVED);
|
|
+ } else
|
|
+ e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
|
|
+
|
|
+ return *p == '\0' ? 0 : -EINVAL;
|
|
+}
|
|
+early_param("memmap", parse_memmap_opt);
|
|
+#endif
|
|
+
|
|
+void __init finish_e820_parsing(void)
|
|
+{
|
|
+ if (userdef) {
|
|
+ int nr = e820.nr_map;
|
|
+
|
|
+ if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0)
|
|
+ early_panic("Invalid user supplied memory map");
|
|
+ e820.nr_map = nr;
|
|
+
|
|
+ printk(KERN_INFO "user-defined physical RAM map:\n");
|
|
+ _e820_print_map(&e820, "user");
|
|
+ }
|
|
+}
|
|
+
|
|
+static inline const char *e820_type_to_string(int e820_type)
|
|
+{
|
|
+ switch (e820_type) {
|
|
+ case E820_RESERVED_KERN:
|
|
+ case E820_RAM: return "System RAM";
|
|
+ case E820_ACPI: return "ACPI Tables";
|
|
+ case E820_NVS: return "ACPI Non-volatile Storage";
|
|
+ default: return "reserved";
|
|
+ }
|
|
+}
|
|
+
|
|
+#ifdef CONFIG_XEN
|
|
+#define e820 machine_e820
|
|
+#endif
|
|
+
|
|
+/*
|
|
+ * Mark e820 reserved areas as busy for the resource manager.
|
|
+ */
|
|
+void __init e820_reserve_resources(void)
|
|
+{
|
|
+ int i;
|
|
+ struct resource *res;
|
|
+ u64 end;
|
|
+
|
|
+ res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map);
|
|
+ for (i = 0; i < e820.nr_map; i++) {
|
|
+ end = e820.map[i].addr + e820.map[i].size - 1;
|
|
+#ifndef CONFIG_RESOURCES_64BIT
|
|
+ if (end > 0x100000000ULL) {
|
|
+ res++;
|
|
+ continue;
|
|
+ }
|
|
+#endif
|
|
+ res->name = e820_type_to_string(e820.map[i].type);
|
|
+ res->start = e820.map[i].addr;
|
|
+ res->end = end;
|
|
+
|
|
+ res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
|
|
+ insert_resource(&iomem_resource, res);
|
|
+ res++;
|
|
+ }
|
|
+
|
|
+ for (i = 0; i < e820_saved.nr_map; i++) {
|
|
+ struct e820entry *entry = &e820_saved.map[i];
|
|
+ firmware_map_add_early(entry->addr,
|
|
+ entry->addr + entry->size - 1,
|
|
+ e820_type_to_string(entry->type));
|
|
+ }
|
|
+}
|
|
+
|
|
+#undef e820
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+char *__init default_machine_specific_memory_setup(void)
|
|
+{
|
|
+ char *who = "BIOS-e820";
|
|
+ int new_nr;
|
|
+ /*
|
|
+ * Try to copy the BIOS-supplied E820-map.
|
|
+ *
|
|
+ * Otherwise fake a memory map; one section from 0k->640k,
|
|
+ * the next section from 1mb->appropriate_mem_k
|
|
+ */
|
|
+ new_nr = boot_params.e820_entries;
|
|
+ sanitize_e820_map(boot_params.e820_map,
|
|
+ ARRAY_SIZE(boot_params.e820_map),
|
|
+ &new_nr);
|
|
+ boot_params.e820_entries = new_nr;
|
|
+ if (append_e820_map(boot_params.e820_map, boot_params.e820_entries)
|
|
+ < 0) {
|
|
+ u64 mem_size;
|
|
+
|
|
+ /* compare results from other methods and take the greater */
|
|
+ if (boot_params.alt_mem_k
|
|
+ < boot_params.screen_info.ext_mem_k) {
|
|
+ mem_size = boot_params.screen_info.ext_mem_k;
|
|
+ who = "BIOS-88";
|
|
+ } else {
|
|
+ mem_size = boot_params.alt_mem_k;
|
|
+ who = "BIOS-e801";
|
|
+ }
|
|
+
|
|
+ e820.nr_map = 0;
|
|
+ e820_add_region(0, LOWMEMSIZE(), E820_RAM);
|
|
+ e820_add_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
|
|
+ }
|
|
+
|
|
+ /* In case someone cares... */
|
|
+ return who;
|
|
+}
|
|
+
|
|
+char *__init __attribute__((weak)) machine_specific_memory_setup(void)
|
|
+{
|
|
+ if (x86_quirks->arch_memory_setup) {
|
|
+ char *who = x86_quirks->arch_memory_setup();
|
|
+
|
|
+ if (who)
|
|
+ return who;
|
|
+ }
|
|
+ return default_machine_specific_memory_setup();
|
|
+}
|
|
+#endif
|
|
+
|
|
+static char * __init _memory_setup(void)
|
|
+{
|
|
+ int rc, nr_map;
|
|
+ struct xen_memory_map memmap;
|
|
+ static struct e820entry __initdata map[E820MAX];
|
|
+
|
|
+ memmap.nr_entries = E820MAX;
|
|
+ set_xen_guest_handle(memmap.buffer, map);
|
|
+
|
|
+ rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
|
|
+ if (rc == -ENOSYS) {
|
|
+ memmap.nr_entries = 1;
|
|
+ map[0].addr = 0ULL;
|
|
+ map[0].size = PFN_PHYS((unsigned long long)xen_start_info->nr_pages);
|
|
+ /* 8MB slack (to balance backend allocations). */
|
|
+ map[0].size += 8ULL << 20;
|
|
+ map[0].type = E820_RAM;
|
|
+ rc = 0;
|
|
+ }
|
|
+ BUG_ON(rc);
|
|
+
|
|
+ nr_map = memmap.nr_entries;
|
|
+ sanitize_e820_map(map, ARRAY_SIZE(map), &nr_map);
|
|
+
|
|
+ if (append_e820_map(map, nr_map) < 0)
|
|
+ BUG();
|
|
+
|
|
+#ifdef CONFIG_XEN
|
|
+ if (is_initial_xendomain()) {
|
|
+ memmap.nr_entries = E820MAX;
|
|
+ set_xen_guest_handle(memmap.buffer, machine_e820.map);
|
|
+
|
|
+ if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap))
|
|
+ BUG();
|
|
+ machine_e820.nr_map = memmap.nr_entries;
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ return "Xen";
|
|
+}
|
|
+
|
|
+void __init setup_memory_map(void)
|
|
+{
|
|
+ char *who;
|
|
+
|
|
+ who = _memory_setup();
|
|
+#ifdef CONFIG_XEN
|
|
+ if (is_initial_xendomain()) {
|
|
+ printk(KERN_INFO "Xen-provided machine memory map:\n");
|
|
+ _e820_print_map(&machine_e820, "BIOS");
|
|
+ } else
|
|
+#endif
|
|
+ memcpy(&e820_saved, &e820, sizeof(struct e820map));
|
|
+ printk(KERN_INFO "Xen-provided physical RAM map:\n");
|
|
+ _e820_print_map(&e820, who);
|
|
+}
|
|
--- head-2011-03-11.orig/arch/x86/kernel/e820_32-xen.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
@@ -1,860 +0,0 @@
|
|
-#include <linux/kernel.h>
|
|
-#include <linux/types.h>
|
|
-#include <linux/init.h>
|
|
-#include <linux/bootmem.h>
|
|
-#include <linux/ioport.h>
|
|
-#include <linux/string.h>
|
|
-#include <linux/kexec.h>
|
|
-#include <linux/module.h>
|
|
-#include <linux/mm.h>
|
|
-#include <linux/pfn.h>
|
|
-#include <linux/uaccess.h>
|
|
-#include <linux/suspend.h>
|
|
-
|
|
-#include <asm/pgtable.h>
|
|
-#include <asm/page.h>
|
|
-#include <asm/e820.h>
|
|
-#include <asm/setup.h>
|
|
-#include <xen/interface/memory.h>
|
|
-
|
|
-struct e820map e820;
|
|
-struct change_member {
|
|
- struct e820entry *pbios; /* pointer to original bios entry */
|
|
- unsigned long long addr; /* address for this change point */
|
|
-};
|
|
-static struct change_member change_point_list[2*E820MAX] __initdata;
|
|
-static struct change_member *change_point[2*E820MAX] __initdata;
|
|
-static struct e820entry *overlap_list[E820MAX] __initdata;
|
|
-static struct e820entry new_bios[E820MAX] __initdata;
|
|
-/* For PCI or other memory-mapped resources */
|
|
-unsigned long pci_mem_start = 0x10000000;
|
|
-#ifdef CONFIG_PCI
|
|
-EXPORT_SYMBOL(pci_mem_start);
|
|
-#endif
|
|
-extern int user_defined_memmap;
|
|
-
|
|
-static struct resource system_rom_resource = {
|
|
- .name = "System ROM",
|
|
- .start = 0xf0000,
|
|
- .end = 0xfffff,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
|
|
-};
|
|
-
|
|
-static struct resource extension_rom_resource = {
|
|
- .name = "Extension ROM",
|
|
- .start = 0xe0000,
|
|
- .end = 0xeffff,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
|
|
-};
|
|
-
|
|
-static struct resource adapter_rom_resources[] = { {
|
|
- .name = "Adapter ROM",
|
|
- .start = 0xc8000,
|
|
- .end = 0,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
|
|
-}, {
|
|
- .name = "Adapter ROM",
|
|
- .start = 0,
|
|
- .end = 0,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
|
|
-}, {
|
|
- .name = "Adapter ROM",
|
|
- .start = 0,
|
|
- .end = 0,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
|
|
-}, {
|
|
- .name = "Adapter ROM",
|
|
- .start = 0,
|
|
- .end = 0,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
|
|
-}, {
|
|
- .name = "Adapter ROM",
|
|
- .start = 0,
|
|
- .end = 0,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
|
|
-}, {
|
|
- .name = "Adapter ROM",
|
|
- .start = 0,
|
|
- .end = 0,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
|
|
-} };
|
|
-
|
|
-static struct resource video_rom_resource = {
|
|
- .name = "Video ROM",
|
|
- .start = 0xc0000,
|
|
- .end = 0xc7fff,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
|
|
-};
|
|
-
|
|
-#define ROMSIGNATURE 0xaa55
|
|
-
|
|
-static int __init romsignature(const unsigned char *rom)
|
|
-{
|
|
- const unsigned short * const ptr = (const unsigned short *)rom;
|
|
- unsigned short sig;
|
|
-
|
|
- return probe_kernel_address(ptr, sig) == 0 && sig == ROMSIGNATURE;
|
|
-}
|
|
-
|
|
-static int __init romchecksum(const unsigned char *rom, unsigned long length)
|
|
-{
|
|
- unsigned char sum, c;
|
|
-
|
|
- for (sum = 0; length && probe_kernel_address(rom++, c) == 0; length--)
|
|
- sum += c;
|
|
- return !length && !sum;
|
|
-}
|
|
-
|
|
-static void __init probe_roms(void)
|
|
-{
|
|
- const unsigned char *rom;
|
|
- unsigned long start, length, upper;
|
|
- unsigned char c;
|
|
- int i;
|
|
-
|
|
-#ifdef CONFIG_XEN
|
|
- /* Nothing to do if not running in dom0. */
|
|
- if (!is_initial_xendomain())
|
|
- return;
|
|
-#endif
|
|
-
|
|
- /* video rom */
|
|
- upper = adapter_rom_resources[0].start;
|
|
- for (start = video_rom_resource.start; start < upper; start += 2048) {
|
|
- rom = isa_bus_to_virt(start);
|
|
- if (!romsignature(rom))
|
|
- continue;
|
|
-
|
|
- video_rom_resource.start = start;
|
|
-
|
|
- if (probe_kernel_address(rom + 2, c) != 0)
|
|
- continue;
|
|
-
|
|
- /* 0 < length <= 0x7f * 512, historically */
|
|
- length = c * 512;
|
|
-
|
|
- /* if checksum okay, trust length byte */
|
|
- if (length && romchecksum(rom, length))
|
|
- video_rom_resource.end = start + length - 1;
|
|
-
|
|
- request_resource(&iomem_resource, &video_rom_resource);
|
|
- break;
|
|
- }
|
|
-
|
|
- start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
|
|
- if (start < upper)
|
|
- start = upper;
|
|
-
|
|
- /* system rom */
|
|
- request_resource(&iomem_resource, &system_rom_resource);
|
|
- upper = system_rom_resource.start;
|
|
-
|
|
- /* check for extension rom (ignore length byte!) */
|
|
- rom = isa_bus_to_virt((unsigned long)extension_rom_resource.start);
|
|
- if (romsignature(rom)) {
|
|
- length = extension_rom_resource.end - extension_rom_resource.start + 1;
|
|
- if (romchecksum(rom, length)) {
|
|
- request_resource(&iomem_resource, &extension_rom_resource);
|
|
- upper = extension_rom_resource.start;
|
|
- }
|
|
- }
|
|
-
|
|
- /* check for adapter roms on 2k boundaries */
|
|
- for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) {
|
|
- rom = isa_bus_to_virt(start);
|
|
- if (!romsignature(rom))
|
|
- continue;
|
|
-
|
|
- if (probe_kernel_address(rom + 2, c) != 0)
|
|
- continue;
|
|
-
|
|
- /* 0 < length <= 0x7f * 512, historically */
|
|
- length = c * 512;
|
|
-
|
|
- /* but accept any length that fits if checksum okay */
|
|
- if (!length || start + length > upper || !romchecksum(rom, length))
|
|
- continue;
|
|
-
|
|
- adapter_rom_resources[i].start = start;
|
|
- adapter_rom_resources[i].end = start + length - 1;
|
|
- request_resource(&iomem_resource, &adapter_rom_resources[i]);
|
|
-
|
|
- start = adapter_rom_resources[i++].end & ~2047UL;
|
|
- }
|
|
-}
|
|
-
|
|
-#ifdef CONFIG_XEN
|
|
-static struct e820map machine_e820;
|
|
-#define e820 machine_e820
|
|
-#endif
|
|
-
|
|
-/*
|
|
- * Request address space for all standard RAM and ROM resources
|
|
- * and also for regions reported as reserved by the e820.
|
|
- */
|
|
-void __init init_iomem_resources(struct resource *code_resource,
|
|
- struct resource *data_resource,
|
|
- struct resource *bss_resource)
|
|
-{
|
|
- int i;
|
|
-
|
|
- probe_roms();
|
|
- for (i = 0; i < e820.nr_map; i++) {
|
|
- struct resource *res;
|
|
-#ifndef CONFIG_RESOURCES_64BIT
|
|
- if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
|
|
- continue;
|
|
-#endif
|
|
- res = kzalloc(sizeof(struct resource), GFP_ATOMIC);
|
|
- switch (e820.map[i].type) {
|
|
- case E820_RAM: res->name = "System RAM"; break;
|
|
- case E820_ACPI: res->name = "ACPI Tables"; break;
|
|
- case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
|
|
- default: res->name = "reserved";
|
|
- }
|
|
- res->start = e820.map[i].addr;
|
|
- res->end = res->start + e820.map[i].size - 1;
|
|
- res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
|
|
- if (request_resource(&iomem_resource, res)) {
|
|
- kfree(res);
|
|
- continue;
|
|
- }
|
|
- if (e820.map[i].type == E820_RAM) {
|
|
- /*
|
|
- * We don't know which RAM region contains kernel data,
|
|
- * so we try it repeatedly and let the resource manager
|
|
- * test it.
|
|
- */
|
|
-#ifndef CONFIG_XEN
|
|
- request_resource(res, code_resource);
|
|
- request_resource(res, data_resource);
|
|
- request_resource(res, bss_resource);
|
|
-#endif
|
|
-#ifdef CONFIG_KEXEC
|
|
- if (crashk_res.start != crashk_res.end)
|
|
- request_resource(res, &crashk_res);
|
|
-#ifdef CONFIG_XEN
|
|
- xen_machine_kexec_register_resources(res);
|
|
-#endif
|
|
-#endif
|
|
- }
|
|
- }
|
|
-}
|
|
-
|
|
-#undef e820
|
|
-
|
|
-#if defined(CONFIG_PM) && defined(CONFIG_HIBERNATION)
|
|
-/**
|
|
- * e820_mark_nosave_regions - Find the ranges of physical addresses that do not
|
|
- * correspond to e820 RAM areas and mark the corresponding pages as nosave for
|
|
- * hibernation.
|
|
- *
|
|
- * This function requires the e820 map to be sorted and without any
|
|
- * overlapping entries and assumes the first e820 area to be RAM.
|
|
- */
|
|
-void __init e820_mark_nosave_regions(void)
|
|
-{
|
|
- int i;
|
|
- unsigned long pfn;
|
|
-
|
|
- pfn = PFN_DOWN(e820.map[0].addr + e820.map[0].size);
|
|
- for (i = 1; i < e820.nr_map; i++) {
|
|
- struct e820entry *ei = &e820.map[i];
|
|
-
|
|
- if (pfn < PFN_UP(ei->addr))
|
|
- register_nosave_region(pfn, PFN_UP(ei->addr));
|
|
-
|
|
- pfn = PFN_DOWN(ei->addr + ei->size);
|
|
- if (ei->type != E820_RAM)
|
|
- register_nosave_region(PFN_UP(ei->addr), pfn);
|
|
-
|
|
- if (pfn >= max_low_pfn)
|
|
- break;
|
|
- }
|
|
-}
|
|
-#endif
|
|
-
|
|
-void __init add_memory_region(unsigned long long start,
|
|
- unsigned long long size, int type)
|
|
-{
|
|
- int x;
|
|
-
|
|
- x = e820.nr_map;
|
|
-
|
|
- if (x == E820MAX) {
|
|
- printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
|
|
- return;
|
|
- }
|
|
-
|
|
- e820.map[x].addr = start;
|
|
- e820.map[x].size = size;
|
|
- e820.map[x].type = type;
|
|
- e820.nr_map++;
|
|
-} /* add_memory_region */
|
|
-
|
|
-/*
|
|
- * Sanitize the BIOS e820 map.
|
|
- *
|
|
- * Some e820 responses include overlapping entries. The following
|
|
- * replaces the original e820 map with a new one, removing overlaps.
|
|
- *
|
|
- */
|
|
-int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
|
|
-{
|
|
- struct change_member *change_tmp;
|
|
- unsigned long current_type, last_type;
|
|
- unsigned long long last_addr;
|
|
- int chgidx, still_changing;
|
|
- int overlap_entries;
|
|
- int new_bios_entry;
|
|
- int old_nr, new_nr, chg_nr;
|
|
- int i;
|
|
-
|
|
- /*
|
|
- Visually we're performing the following (1,2,3,4 = memory types)...
|
|
-
|
|
- Sample memory map (w/overlaps):
|
|
- ____22__________________
|
|
- ______________________4_
|
|
- ____1111________________
|
|
- _44_____________________
|
|
- 11111111________________
|
|
- ____________________33__
|
|
- ___________44___________
|
|
- __________33333_________
|
|
- ______________22________
|
|
- ___________________2222_
|
|
- _________111111111______
|
|
- _____________________11_
|
|
- _________________4______
|
|
-
|
|
- Sanitized equivalent (no overlap):
|
|
- 1_______________________
|
|
- _44_____________________
|
|
- ___1____________________
|
|
- ____22__________________
|
|
- ______11________________
|
|
- _________1______________
|
|
- __________3_____________
|
|
- ___________44___________
|
|
- _____________33_________
|
|
- _______________2________
|
|
- ________________1_______
|
|
- _________________4______
|
|
- ___________________2____
|
|
- ____________________33__
|
|
- ______________________4_
|
|
- */
|
|
- /* if there's only one memory region, don't bother */
|
|
- if (*pnr_map < 2) {
|
|
- return -1;
|
|
- }
|
|
-
|
|
- old_nr = *pnr_map;
|
|
-
|
|
- /* bail out if we find any unreasonable addresses in bios map */
|
|
- for (i=0; i<old_nr; i++)
|
|
- if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) {
|
|
- return -1;
|
|
- }
|
|
-
|
|
- /* create pointers for initial change-point information (for sorting) */
|
|
- for (i=0; i < 2*old_nr; i++)
|
|
- change_point[i] = &change_point_list[i];
|
|
-
|
|
- /* record all known change-points (starting and ending addresses),
|
|
- omitting those that are for empty memory regions */
|
|
- chgidx = 0;
|
|
- for (i=0; i < old_nr; i++) {
|
|
- if (biosmap[i].size != 0) {
|
|
- change_point[chgidx]->addr = biosmap[i].addr;
|
|
- change_point[chgidx++]->pbios = &biosmap[i];
|
|
- change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
|
|
- change_point[chgidx++]->pbios = &biosmap[i];
|
|
- }
|
|
- }
|
|
- chg_nr = chgidx; /* true number of change-points */
|
|
-
|
|
- /* sort change-point list by memory addresses (low -> high) */
|
|
- still_changing = 1;
|
|
- while (still_changing) {
|
|
- still_changing = 0;
|
|
- for (i=1; i < chg_nr; i++) {
|
|
- /* if <current_addr> > <last_addr>, swap */
|
|
- /* or, if current=<start_addr> & last=<end_addr>, swap */
|
|
- if ((change_point[i]->addr < change_point[i-1]->addr) ||
|
|
- ((change_point[i]->addr == change_point[i-1]->addr) &&
|
|
- (change_point[i]->addr == change_point[i]->pbios->addr) &&
|
|
- (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
|
|
- )
|
|
- {
|
|
- change_tmp = change_point[i];
|
|
- change_point[i] = change_point[i-1];
|
|
- change_point[i-1] = change_tmp;
|
|
- still_changing=1;
|
|
- }
|
|
- }
|
|
- }
|
|
-
|
|
- /* create a new bios memory map, removing overlaps */
|
|
- overlap_entries=0; /* number of entries in the overlap table */
|
|
- new_bios_entry=0; /* index for creating new bios map entries */
|
|
- last_type = 0; /* start with undefined memory type */
|
|
- last_addr = 0; /* start with 0 as last starting address */
|
|
- /* loop through change-points, determining affect on the new bios map */
|
|
- for (chgidx=0; chgidx < chg_nr; chgidx++)
|
|
- {
|
|
- /* keep track of all overlapping bios entries */
|
|
- if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
|
|
- {
|
|
- /* add map entry to overlap list (> 1 entry implies an overlap) */
|
|
- overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
|
|
- }
|
|
- else
|
|
- {
|
|
- /* remove entry from list (order independent, so swap with last) */
|
|
- for (i=0; i<overlap_entries; i++)
|
|
- {
|
|
- if (overlap_list[i] == change_point[chgidx]->pbios)
|
|
- overlap_list[i] = overlap_list[overlap_entries-1];
|
|
- }
|
|
- overlap_entries--;
|
|
- }
|
|
- /* if there are overlapping entries, decide which "type" to use */
|
|
- /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
|
|
- current_type = 0;
|
|
- for (i=0; i<overlap_entries; i++)
|
|
- if (overlap_list[i]->type > current_type)
|
|
- current_type = overlap_list[i]->type;
|
|
- /* continue building up new bios map based on this information */
|
|
- if (current_type != last_type) {
|
|
- if (last_type != 0) {
|
|
- new_bios[new_bios_entry].size =
|
|
- change_point[chgidx]->addr - last_addr;
|
|
- /* move forward only if the new size was non-zero */
|
|
- if (new_bios[new_bios_entry].size != 0)
|
|
- if (++new_bios_entry >= E820MAX)
|
|
- break; /* no more space left for new bios entries */
|
|
- }
|
|
- if (current_type != 0) {
|
|
- new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
|
|
- new_bios[new_bios_entry].type = current_type;
|
|
- last_addr=change_point[chgidx]->addr;
|
|
- }
|
|
- last_type = current_type;
|
|
- }
|
|
- }
|
|
- new_nr = new_bios_entry; /* retain count for new bios entries */
|
|
-
|
|
- /* copy new bios mapping into original location */
|
|
- memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
|
|
- *pnr_map = new_nr;
|
|
-
|
|
- return 0;
|
|
-}
|
|
-
|
|
-/*
|
|
- * Copy the BIOS e820 map into a safe place.
|
|
- *
|
|
- * Sanity-check it while we're at it..
|
|
- *
|
|
- * If we're lucky and live on a modern system, the setup code
|
|
- * will have given us a memory map that we can use to properly
|
|
- * set up memory. If we aren't, we'll fake a memory map.
|
|
- *
|
|
- * We check to see that the memory map contains at least 2 elements
|
|
- * before we'll use it, because the detection code in setup.S may
|
|
- * not be perfect and most every PC known to man has two memory
|
|
- * regions: one from 0 to 640k, and one from 1mb up. (The IBM
|
|
- * thinkpad 560x, for example, does not cooperate with the memory
|
|
- * detection code.)
|
|
- */
|
|
-int __init copy_e820_map(struct e820entry *biosmap, int nr_map)
|
|
-{
|
|
-#ifndef CONFIG_XEN
|
|
- /* Only one memory region (or negative)? Ignore it */
|
|
- if (nr_map < 2)
|
|
- return -1;
|
|
-#else
|
|
- BUG_ON(nr_map < 1);
|
|
-#endif
|
|
-
|
|
- do {
|
|
- u64 start = biosmap->addr;
|
|
- u64 size = biosmap->size;
|
|
- u64 end = start + size;
|
|
- u32 type = biosmap->type;
|
|
-
|
|
- /* Overflow in 64 bits? Ignore the memory map. */
|
|
- if (start > end)
|
|
- return -1;
|
|
-
|
|
- add_memory_region(start, size, type);
|
|
- } while (biosmap++, --nr_map);
|
|
-
|
|
-#ifdef CONFIG_XEN
|
|
- if (is_initial_xendomain()) {
|
|
- struct xen_memory_map memmap;
|
|
-
|
|
- memmap.nr_entries = E820MAX;
|
|
- set_xen_guest_handle(memmap.buffer, machine_e820.map);
|
|
-
|
|
- if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap))
|
|
- BUG();
|
|
- machine_e820.nr_map = memmap.nr_entries;
|
|
- } else
|
|
- machine_e820 = e820;
|
|
-#endif
|
|
-
|
|
- return 0;
|
|
-}
|
|
-
|
|
-/*
|
|
- * Find the highest page frame number we have available
|
|
- */
|
|
-void __init propagate_e820_map(void)
|
|
-{
|
|
- int i;
|
|
-
|
|
- max_pfn = 0;
|
|
-
|
|
- for (i = 0; i < e820.nr_map; i++) {
|
|
- unsigned long start, end;
|
|
- /* RAM? */
|
|
- if (e820.map[i].type != E820_RAM)
|
|
- continue;
|
|
- start = PFN_UP(e820.map[i].addr);
|
|
- end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
|
|
- if (start >= end)
|
|
- continue;
|
|
- if (end > max_pfn)
|
|
- max_pfn = end;
|
|
- memory_present(0, start, end);
|
|
- }
|
|
-}
|
|
-
|
|
-/*
|
|
- * Register fully available low RAM pages with the bootmem allocator.
|
|
- */
|
|
-void __init register_bootmem_low_pages(unsigned long max_low_pfn)
|
|
-{
|
|
- int i;
|
|
-
|
|
- for (i = 0; i < e820.nr_map; i++) {
|
|
- unsigned long curr_pfn, last_pfn, size;
|
|
- /*
|
|
- * Reserve usable low memory
|
|
- */
|
|
- if (e820.map[i].type != E820_RAM)
|
|
- continue;
|
|
- /*
|
|
- * We are rounding up the start address of usable memory:
|
|
- */
|
|
- curr_pfn = PFN_UP(e820.map[i].addr);
|
|
- if (curr_pfn >= max_low_pfn)
|
|
- continue;
|
|
- /*
|
|
- * ... and at the end of the usable range downwards:
|
|
- */
|
|
- last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
|
|
-
|
|
-#ifdef CONFIG_XEN
|
|
- /*
|
|
- * Truncate to the number of actual pages currently
|
|
- * present.
|
|
- */
|
|
- if (last_pfn > xen_start_info->nr_pages)
|
|
- last_pfn = xen_start_info->nr_pages;
|
|
-#endif
|
|
-
|
|
- if (last_pfn > max_low_pfn)
|
|
- last_pfn = max_low_pfn;
|
|
-
|
|
- /*
|
|
- * .. finally, did all the rounding and playing
|
|
- * around just make the area go away?
|
|
- */
|
|
- if (last_pfn <= curr_pfn)
|
|
- continue;
|
|
-
|
|
- size = last_pfn - curr_pfn;
|
|
- free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
|
|
- }
|
|
-}
|
|
-
|
|
-void __init e820_register_memory(void)
|
|
-{
|
|
- unsigned long gapstart, gapsize, round;
|
|
- unsigned long long last;
|
|
- int i;
|
|
-
|
|
-#ifdef CONFIG_XEN
|
|
-#define e820 machine_e820
|
|
-#endif
|
|
- /*
|
|
- * Search for the biggest gap in the low 32 bits of the e820
|
|
- * memory space.
|
|
- */
|
|
- last = 0x100000000ull;
|
|
- gapstart = 0x10000000;
|
|
- gapsize = 0x400000;
|
|
- i = e820.nr_map;
|
|
- while (--i >= 0) {
|
|
- unsigned long long start = e820.map[i].addr;
|
|
- unsigned long long end = start + e820.map[i].size;
|
|
-
|
|
- /*
|
|
- * Since "last" is at most 4GB, we know we'll
|
|
- * fit in 32 bits if this condition is true
|
|
- */
|
|
- if (last > end) {
|
|
- unsigned long gap = last - end;
|
|
-
|
|
- if (gap > gapsize) {
|
|
- gapsize = gap;
|
|
- gapstart = end;
|
|
- }
|
|
- }
|
|
- if (start < last)
|
|
- last = start;
|
|
- }
|
|
-#undef e820
|
|
-
|
|
- /*
|
|
- * See how much we want to round up: start off with
|
|
- * rounding to the next 1MB area.
|
|
- */
|
|
- round = 0x100000;
|
|
- while ((gapsize >> 4) > round)
|
|
- round += round;
|
|
- /* Fun with two's complement */
|
|
- pci_mem_start = (gapstart + round) & -round;
|
|
-
|
|
- printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
|
|
- pci_mem_start, gapstart, gapsize);
|
|
-}
|
|
-
|
|
-void __init print_memory_map(char *who)
|
|
-{
|
|
- int i;
|
|
-
|
|
- for (i = 0; i < e820.nr_map; i++) {
|
|
- printk(" %s: %016Lx - %016Lx ", who,
|
|
- e820.map[i].addr,
|
|
- e820.map[i].addr + e820.map[i].size);
|
|
- switch (e820.map[i].type) {
|
|
- case E820_RAM: printk("(usable)\n");
|
|
- break;
|
|
- case E820_RESERVED:
|
|
- printk("(reserved)\n");
|
|
- break;
|
|
- case E820_ACPI:
|
|
- printk("(ACPI data)\n");
|
|
- break;
|
|
- case E820_NVS:
|
|
- printk("(ACPI NVS)\n");
|
|
- break;
|
|
- default: printk("type %u\n", e820.map[i].type);
|
|
- break;
|
|
- }
|
|
- }
|
|
-}
|
|
-
|
|
-void __init limit_regions(unsigned long long size)
|
|
-{
|
|
- unsigned long long current_addr = 0;
|
|
- int i;
|
|
-
|
|
- print_memory_map("limit_regions start");
|
|
- for (i = 0; i < e820.nr_map; i++) {
|
|
- current_addr = e820.map[i].addr + e820.map[i].size;
|
|
- if (current_addr < size)
|
|
- continue;
|
|
-
|
|
- if (e820.map[i].type != E820_RAM)
|
|
- continue;
|
|
-
|
|
- if (e820.map[i].addr >= size) {
|
|
- /*
|
|
- * This region starts past the end of the
|
|
- * requested size, skip it completely.
|
|
- */
|
|
- e820.nr_map = i;
|
|
- } else {
|
|
- e820.nr_map = i + 1;
|
|
- e820.map[i].size -= current_addr - size;
|
|
- }
|
|
- print_memory_map("limit_regions endfor");
|
|
- return;
|
|
- }
|
|
-#ifdef CONFIG_XEN
|
|
- if (current_addr < size) {
|
|
- /*
|
|
- * The e820 map finished before our requested size so
|
|
- * extend the final entry to the requested address.
|
|
- */
|
|
- --i;
|
|
- if (e820.map[i].type == E820_RAM)
|
|
- e820.map[i].size -= current_addr - size;
|
|
- else
|
|
- add_memory_region(current_addr, size - current_addr, E820_RAM);
|
|
- }
|
|
-#endif
|
|
- print_memory_map("limit_regions endfunc");
|
|
-}
|
|
-
|
|
-/*
|
|
- * This function checks if any part of the range <start,end> is mapped
|
|
- * with type.
|
|
- */
|
|
-int
|
|
-e820_any_mapped(u64 start, u64 end, unsigned type)
|
|
-{
|
|
- int i;
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
- for (i = 0; i < e820.nr_map; i++) {
|
|
- const struct e820entry *ei = &e820.map[i];
|
|
-#else
|
|
- if (!is_initial_xendomain())
|
|
- return 0;
|
|
- for (i = 0; i < machine_e820.nr_map; ++i) {
|
|
- const struct e820entry *ei = &machine_e820.map[i];
|
|
-#endif
|
|
-
|
|
- if (type && ei->type != type)
|
|
- continue;
|
|
- if (ei->addr >= end || ei->addr + ei->size <= start)
|
|
- continue;
|
|
- return 1;
|
|
- }
|
|
- return 0;
|
|
-}
|
|
-EXPORT_SYMBOL_GPL(e820_any_mapped);
|
|
-
|
|
- /*
|
|
- * This function checks if the entire range <start,end> is mapped with type.
|
|
- *
|
|
- * Note: this function only works correct if the e820 table is sorted and
|
|
- * not-overlapping, which is the case
|
|
- */
|
|
-int __init
|
|
-e820_all_mapped(unsigned long s, unsigned long e, unsigned type)
|
|
-{
|
|
- u64 start = s;
|
|
- u64 end = e;
|
|
- int i;
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
- for (i = 0; i < e820.nr_map; i++) {
|
|
- struct e820entry *ei = &e820.map[i];
|
|
-#else
|
|
- if (!is_initial_xendomain())
|
|
- return 0;
|
|
- for (i = 0; i < machine_e820.nr_map; ++i) {
|
|
- const struct e820entry *ei = &machine_e820.map[i];
|
|
-#endif
|
|
-
|
|
- if (type && ei->type != type)
|
|
- continue;
|
|
- /* is the region (part) in overlap with the current region ?*/
|
|
- if (ei->addr >= end || ei->addr + ei->size <= start)
|
|
- continue;
|
|
- /* if the region is at the beginning of <start,end> we move
|
|
- * start to the end of the region since it's ok until there
|
|
- */
|
|
- if (ei->addr <= start)
|
|
- start = ei->addr + ei->size;
|
|
- /* if start is now at or beyond end, we're done, full
|
|
- * coverage */
|
|
- if (start >= end)
|
|
- return 1; /* we're done */
|
|
- }
|
|
- return 0;
|
|
-}
|
|
-
|
|
-static int __init parse_memmap(char *arg)
|
|
-{
|
|
- if (!arg)
|
|
- return -EINVAL;
|
|
-
|
|
- if (strcmp(arg, "exactmap") == 0) {
|
|
-#ifdef CONFIG_CRASH_DUMP
|
|
- /* If we are doing a crash dump, we
|
|
- * still need to know the real mem
|
|
- * size before original memory map is
|
|
- * reset.
|
|
- */
|
|
- propagate_e820_map();
|
|
- saved_max_pfn = max_pfn;
|
|
-#endif
|
|
- e820.nr_map = 0;
|
|
- user_defined_memmap = 1;
|
|
- } else {
|
|
- /* If the user specifies memory size, we
|
|
- * limit the BIOS-provided memory map to
|
|
- * that size. exactmap can be used to specify
|
|
- * the exact map. mem=number can be used to
|
|
- * trim the existing memory map.
|
|
- */
|
|
- unsigned long long start_at, mem_size;
|
|
-
|
|
- mem_size = memparse(arg, &arg);
|
|
- if (*arg == '@') {
|
|
- start_at = memparse(arg+1, &arg);
|
|
- add_memory_region(start_at, mem_size, E820_RAM);
|
|
- } else if (*arg == '#') {
|
|
- start_at = memparse(arg+1, &arg);
|
|
- add_memory_region(start_at, mem_size, E820_ACPI);
|
|
- } else if (*arg == '$') {
|
|
- start_at = memparse(arg+1, &arg);
|
|
- add_memory_region(start_at, mem_size, E820_RESERVED);
|
|
- } else {
|
|
- limit_regions(mem_size);
|
|
- user_defined_memmap = 1;
|
|
- }
|
|
- }
|
|
- return 0;
|
|
-}
|
|
-early_param("memmap", parse_memmap);
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
-void __init update_memory_range(u64 start, u64 size, unsigned old_type,
|
|
- unsigned new_type)
|
|
-{
|
|
- int i;
|
|
-
|
|
- BUG_ON(old_type == new_type);
|
|
-
|
|
- for (i = 0; i < e820.nr_map; i++) {
|
|
- struct e820entry *ei = &e820.map[i];
|
|
- u64 final_start, final_end;
|
|
- if (ei->type != old_type)
|
|
- continue;
|
|
- /* totally covered? */
|
|
- if (ei->addr >= start && ei->size <= size) {
|
|
- ei->type = new_type;
|
|
- continue;
|
|
- }
|
|
- /* partially covered */
|
|
- final_start = max(start, ei->addr);
|
|
- final_end = min(start + size, ei->addr + ei->size);
|
|
- if (final_start >= final_end)
|
|
- continue;
|
|
- add_memory_region(final_start, final_end - final_start,
|
|
- new_type);
|
|
- }
|
|
-}
|
|
-
|
|
-void __init update_e820(void)
|
|
-{
|
|
- u8 nr_map;
|
|
-
|
|
- nr_map = e820.nr_map;
|
|
- if (sanitize_e820_map(e820.map, &nr_map))
|
|
- return;
|
|
- e820.nr_map = nr_map;
|
|
- printk(KERN_INFO "modified physical RAM map:\n");
|
|
- print_memory_map("modified");
|
|
-}
|
|
-#endif
|
|
--- head-2011-03-11.orig/arch/x86/kernel/e820_64-xen.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
@@ -1,1052 +0,0 @@
|
|
-/*
|
|
- * Handle the memory map.
|
|
- * The functions here do the job until bootmem takes over.
|
|
- *
|
|
- * Getting sanitize_e820_map() in sync with i386 version by applying change:
|
|
- * - Provisions for empty E820 memory regions (reported by certain BIOSes).
|
|
- * Alex Achenbach <xela@slit.de>, December 2002.
|
|
- * Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
|
|
- *
|
|
- */
|
|
-#include <linux/kernel.h>
|
|
-#include <linux/types.h>
|
|
-#include <linux/init.h>
|
|
-#include <linux/bootmem.h>
|
|
-#include <linux/ioport.h>
|
|
-#include <linux/string.h>
|
|
-#include <linux/kexec.h>
|
|
-#include <linux/module.h>
|
|
-#include <linux/mm.h>
|
|
-#include <linux/suspend.h>
|
|
-#include <linux/pfn.h>
|
|
-
|
|
-#include <asm/pgtable.h>
|
|
-#include <asm/page.h>
|
|
-#include <asm/e820.h>
|
|
-#include <asm/proto.h>
|
|
-#include <asm/setup.h>
|
|
-#include <asm/sections.h>
|
|
-#include <asm/kdebug.h>
|
|
-#include <xen/interface/memory.h>
|
|
-
|
|
-struct e820map e820 __initdata;
|
|
-#ifdef CONFIG_XEN
|
|
-struct e820map machine_e820;
|
|
-#endif
|
|
-
|
|
-/*
|
|
- * PFN of last memory page.
|
|
- */
|
|
-unsigned long end_pfn;
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
-/*
|
|
- * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
|
|
- * The direct mapping extends to max_pfn_mapped, so that we can directly access
|
|
- * apertures, ACPI and other tables without having to play with fixmaps.
|
|
- */
|
|
-unsigned long max_pfn_mapped;
|
|
-#endif
|
|
-
|
|
-/*
|
|
- * Last pfn which the user wants to use.
|
|
- */
|
|
-static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT;
|
|
-
|
|
-/*
|
|
- * Early reserved memory areas.
|
|
- */
|
|
-#define MAX_EARLY_RES 20
|
|
-
|
|
-struct early_res {
|
|
- unsigned long start, end;
|
|
- char name[16];
|
|
-};
|
|
-static struct early_res early_res[MAX_EARLY_RES] __initdata = {
|
|
-#ifndef CONFIG_XEN
|
|
- { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */
|
|
-#ifdef CONFIG_X86_TRAMPOLINE
|
|
- { TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" },
|
|
-#endif
|
|
-#endif
|
|
- {}
|
|
-};
|
|
-
|
|
-void __init reserve_early(unsigned long start, unsigned long end, char *name)
|
|
-{
|
|
- int i;
|
|
- struct early_res *r;
|
|
- for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
|
|
- r = &early_res[i];
|
|
- if (end > r->start && start < r->end)
|
|
- panic("Overlapping early reservations %lx-%lx %s to %lx-%lx %s\n",
|
|
- start, end - 1, name?name:"", r->start, r->end - 1, r->name);
|
|
- }
|
|
- if (i >= MAX_EARLY_RES)
|
|
- panic("Too many early reservations");
|
|
- r = &early_res[i];
|
|
- r->start = start;
|
|
- r->end = end;
|
|
- if (name)
|
|
- strncpy(r->name, name, sizeof(r->name) - 1);
|
|
-}
|
|
-
|
|
-void __init free_early(unsigned long start, unsigned long end)
|
|
-{
|
|
- struct early_res *r;
|
|
- int i, j;
|
|
-
|
|
- for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
|
|
- r = &early_res[i];
|
|
- if (start == r->start && end == r->end)
|
|
- break;
|
|
- }
|
|
- if (i >= MAX_EARLY_RES || !early_res[i].end)
|
|
- panic("free_early on not reserved area: %lx-%lx!", start, end);
|
|
-
|
|
- for (j = i + 1; j < MAX_EARLY_RES && early_res[j].end; j++)
|
|
- ;
|
|
-
|
|
- memmove(&early_res[i], &early_res[i + 1],
|
|
- (j - 1 - i) * sizeof(struct early_res));
|
|
-
|
|
- early_res[j - 1].end = 0;
|
|
-}
|
|
-
|
|
-void __init early_res_to_bootmem(unsigned long start, unsigned long end)
|
|
-{
|
|
- int i;
|
|
- unsigned long final_start, final_end;
|
|
- for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
|
|
- struct early_res *r = &early_res[i];
|
|
- final_start = max(start, r->start);
|
|
- final_end = min(end, r->end);
|
|
- if (final_start >= final_end)
|
|
- continue;
|
|
- printk(KERN_INFO " early res: %d [%lx-%lx] %s\n", i,
|
|
- final_start, final_end - 1, r->name);
|
|
- reserve_bootmem_generic(final_start, final_end - final_start);
|
|
- }
|
|
-}
|
|
-
|
|
-/* Check for already reserved areas */
|
|
-static inline int __init
|
|
-bad_addr(unsigned long *addrp, unsigned long size, unsigned long align)
|
|
-{
|
|
- int i;
|
|
- unsigned long addr = *addrp, last;
|
|
- int changed = 0;
|
|
-again:
|
|
- last = addr + size;
|
|
- for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
|
|
- struct early_res *r = &early_res[i];
|
|
- if (last >= r->start && addr < r->end) {
|
|
- *addrp = addr = round_up(r->end, align);
|
|
- changed = 1;
|
|
- goto again;
|
|
- }
|
|
- }
|
|
- return changed;
|
|
-}
|
|
-
|
|
-/* Check for already reserved areas */
|
|
-static inline int __init
|
|
-bad_addr_size(unsigned long *addrp, unsigned long *sizep, unsigned long align)
|
|
-{
|
|
- int i;
|
|
- unsigned long addr = *addrp, last;
|
|
- unsigned long size = *sizep;
|
|
- int changed = 0;
|
|
-again:
|
|
- last = addr + size;
|
|
- for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) {
|
|
- struct early_res *r = &early_res[i];
|
|
- if (last > r->start && addr < r->start) {
|
|
- size = r->start - addr;
|
|
- changed = 1;
|
|
- goto again;
|
|
- }
|
|
- if (last > r->end && addr < r->end) {
|
|
- addr = round_up(r->end, align);
|
|
- size = last - addr;
|
|
- changed = 1;
|
|
- goto again;
|
|
- }
|
|
- if (last <= r->end && addr >= r->start) {
|
|
- (*sizep)++;
|
|
- return 0;
|
|
- }
|
|
- }
|
|
- if (changed) {
|
|
- *addrp = addr;
|
|
- *sizep = size;
|
|
- }
|
|
- return changed;
|
|
-}
|
|
-/*
|
|
- * This function checks if any part of the range <start,end> is mapped
|
|
- * with type.
|
|
- */
|
|
-int
|
|
-e820_any_mapped(unsigned long start, unsigned long end, unsigned type)
|
|
-{
|
|
- int i;
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
- for (i = 0; i < e820.nr_map; i++) {
|
|
- struct e820entry *ei = &e820.map[i];
|
|
-#else
|
|
- if (!is_initial_xendomain())
|
|
- return 0;
|
|
- for (i = 0; i < machine_e820.nr_map; i++) {
|
|
- const struct e820entry *ei = &machine_e820.map[i];
|
|
-#endif
|
|
-
|
|
- if (type && ei->type != type)
|
|
- continue;
|
|
- if (ei->addr >= end || ei->addr + ei->size <= start)
|
|
- continue;
|
|
- return 1;
|
|
- }
|
|
- return 0;
|
|
-}
|
|
-EXPORT_SYMBOL_GPL(e820_any_mapped);
|
|
-
|
|
-/*
|
|
- * This function checks if the entire range <start,end> is mapped with type.
|
|
- *
|
|
- * Note: this function only works correct if the e820 table is sorted and
|
|
- * not-overlapping, which is the case
|
|
- */
|
|
-int __init e820_all_mapped(unsigned long start, unsigned long end,
|
|
- unsigned type)
|
|
-{
|
|
- int i;
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
- for (i = 0; i < e820.nr_map; i++) {
|
|
- struct e820entry *ei = &e820.map[i];
|
|
-#else
|
|
- if (!is_initial_xendomain())
|
|
- return 0;
|
|
- for (i = 0; i < machine_e820.nr_map; i++) {
|
|
- const struct e820entry *ei = &machine_e820.map[i];
|
|
-#endif
|
|
-
|
|
- if (type && ei->type != type)
|
|
- continue;
|
|
- /* is the region (part) in overlap with the current region ?*/
|
|
- if (ei->addr >= end || ei->addr + ei->size <= start)
|
|
- continue;
|
|
-
|
|
- /* if the region is at the beginning of <start,end> we move
|
|
- * start to the end of the region since it's ok until there
|
|
- */
|
|
- if (ei->addr <= start)
|
|
- start = ei->addr + ei->size;
|
|
- /*
|
|
- * if start is now at or beyond end, we're done, full
|
|
- * coverage
|
|
- */
|
|
- if (start >= end)
|
|
- return 1;
|
|
- }
|
|
- return 0;
|
|
-}
|
|
-
|
|
-/*
|
|
- * Find a free area with specified alignment in a specific range.
|
|
- */
|
|
-unsigned long __init find_e820_area(unsigned long start, unsigned long end,
|
|
- unsigned long size, unsigned long align)
|
|
-{
|
|
- int i;
|
|
-
|
|
- for (i = 0; i < e820.nr_map; i++) {
|
|
- struct e820entry *ei = &e820.map[i];
|
|
- unsigned long addr, last;
|
|
- unsigned long ei_last;
|
|
-
|
|
- if (ei->type != E820_RAM)
|
|
- continue;
|
|
- addr = round_up(ei->addr, align);
|
|
- ei_last = ei->addr + ei->size;
|
|
- if (addr < start)
|
|
- addr = round_up(start, align);
|
|
- if (addr >= ei_last)
|
|
- continue;
|
|
- while (bad_addr(&addr, size, align) && addr+size <= ei_last)
|
|
- ;
|
|
- last = addr + size;
|
|
- if (last > ei_last)
|
|
- continue;
|
|
- if (last > end)
|
|
- continue;
|
|
- return addr;
|
|
- }
|
|
- return -1UL;
|
|
-}
|
|
-
|
|
-/*
|
|
- * Find next free range after *start
|
|
- */
|
|
-unsigned long __init find_e820_area_size(unsigned long start,
|
|
- unsigned long *sizep,
|
|
- unsigned long align)
|
|
-{
|
|
- int i;
|
|
-
|
|
- for (i = 0; i < e820.nr_map; i++) {
|
|
- struct e820entry *ei = &e820.map[i];
|
|
- unsigned long addr, last;
|
|
- unsigned long ei_last;
|
|
-
|
|
- if (ei->type != E820_RAM)
|
|
- continue;
|
|
- addr = round_up(ei->addr, align);
|
|
- ei_last = ei->addr + ei->size;
|
|
- if (addr < start)
|
|
- addr = round_up(start, align);
|
|
- if (addr >= ei_last)
|
|
- continue;
|
|
- *sizep = ei_last - addr;
|
|
- while (bad_addr_size(&addr, sizep, align) &&
|
|
- addr + *sizep <= ei_last)
|
|
- ;
|
|
- last = addr + *sizep;
|
|
- if (last > ei_last)
|
|
- continue;
|
|
- return addr;
|
|
- }
|
|
- return -1UL;
|
|
-
|
|
-}
|
|
-/*
|
|
- * Find the highest page frame number we have available
|
|
- */
|
|
-unsigned long __init e820_end_of_ram(void)
|
|
-{
|
|
- unsigned long end_pfn;
|
|
-
|
|
- end_pfn = find_max_pfn_with_active_regions();
|
|
-
|
|
- if (end_pfn > max_pfn_mapped)
|
|
- max_pfn_mapped = end_pfn;
|
|
- if (max_pfn_mapped > MAXMEM>>PAGE_SHIFT)
|
|
- max_pfn_mapped = MAXMEM>>PAGE_SHIFT;
|
|
- if (end_pfn > end_user_pfn)
|
|
- end_pfn = end_user_pfn;
|
|
- if (end_pfn > max_pfn_mapped)
|
|
- end_pfn = max_pfn_mapped;
|
|
-
|
|
- printk(KERN_INFO "max_pfn_mapped = %lu\n", max_pfn_mapped);
|
|
- return end_pfn;
|
|
-}
|
|
-
|
|
-/*
|
|
- * Mark e820 reserved areas as busy for the resource manager.
|
|
- */
|
|
-void __init e820_reserve_resources(struct e820entry *e820, int nr_map)
|
|
-{
|
|
- int i;
|
|
- struct resource *res;
|
|
-
|
|
- res = alloc_bootmem_low(sizeof(struct resource) * nr_map);
|
|
- for (i = 0; i < nr_map; i++) {
|
|
- switch (e820[i].type) {
|
|
- case E820_RAM: res->name = "System RAM"; break;
|
|
- case E820_ACPI: res->name = "ACPI Tables"; break;
|
|
- case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
|
|
- default: res->name = "reserved";
|
|
- }
|
|
- res->start = e820[i].addr;
|
|
- res->end = res->start + e820[i].size - 1;
|
|
- res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
|
|
- insert_resource(&iomem_resource, res);
|
|
- res++;
|
|
- }
|
|
-}
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
-/*
|
|
- * Find the ranges of physical addresses that do not correspond to
|
|
- * e820 RAM areas and mark the corresponding pages as nosave for software
|
|
- * suspend and suspend to RAM.
|
|
- *
|
|
- * This function requires the e820 map to be sorted and without any
|
|
- * overlapping entries and assumes the first e820 area to be RAM.
|
|
- */
|
|
-void __init e820_mark_nosave_regions(void)
|
|
-{
|
|
- int i;
|
|
- unsigned long paddr;
|
|
-
|
|
- paddr = round_down(e820.map[0].addr + e820.map[0].size, PAGE_SIZE);
|
|
- for (i = 1; i < e820.nr_map; i++) {
|
|
- struct e820entry *ei = &e820.map[i];
|
|
-
|
|
- if (paddr < ei->addr)
|
|
- register_nosave_region(PFN_DOWN(paddr),
|
|
- PFN_UP(ei->addr));
|
|
-
|
|
- paddr = round_down(ei->addr + ei->size, PAGE_SIZE);
|
|
- if (ei->type != E820_RAM)
|
|
- register_nosave_region(PFN_UP(ei->addr),
|
|
- PFN_DOWN(paddr));
|
|
-
|
|
- if (paddr >= (end_pfn << PAGE_SHIFT))
|
|
- break;
|
|
- }
|
|
-}
|
|
-#endif
|
|
-
|
|
-/*
|
|
- * Finds an active region in the address range from start_pfn to end_pfn and
|
|
- * returns its range in ei_startpfn and ei_endpfn for the e820 entry.
|
|
- */
|
|
-static int __init e820_find_active_region(const struct e820entry *ei,
|
|
- unsigned long start_pfn,
|
|
- unsigned long end_pfn,
|
|
- unsigned long *ei_startpfn,
|
|
- unsigned long *ei_endpfn)
|
|
-{
|
|
-#ifdef CONFIG_XEN
|
|
- if (end_pfn > xen_start_info->nr_pages)
|
|
- end_pfn = xen_start_info->nr_pages;
|
|
-#endif
|
|
-
|
|
- *ei_startpfn = round_up(ei->addr, PAGE_SIZE) >> PAGE_SHIFT;
|
|
- *ei_endpfn = round_down(ei->addr + ei->size, PAGE_SIZE) >> PAGE_SHIFT;
|
|
-
|
|
- /* Skip map entries smaller than a page */
|
|
- if (*ei_startpfn >= *ei_endpfn)
|
|
- return 0;
|
|
-
|
|
- /* Check if max_pfn_mapped should be updated */
|
|
- if (ei->type != E820_RAM && *ei_endpfn > max_pfn_mapped)
|
|
- max_pfn_mapped = *ei_endpfn;
|
|
-
|
|
- /* Skip if map is outside the node */
|
|
- if (ei->type != E820_RAM || *ei_endpfn <= start_pfn ||
|
|
- *ei_startpfn >= end_pfn)
|
|
- return 0;
|
|
-
|
|
- /* Check for overlaps */
|
|
- if (*ei_startpfn < start_pfn)
|
|
- *ei_startpfn = start_pfn;
|
|
- if (*ei_endpfn > end_pfn)
|
|
- *ei_endpfn = end_pfn;
|
|
-
|
|
- /* Obey end_user_pfn to save on memmap */
|
|
- if (*ei_startpfn >= end_user_pfn)
|
|
- return 0;
|
|
- if (*ei_endpfn > end_user_pfn)
|
|
- *ei_endpfn = end_user_pfn;
|
|
-
|
|
- return 1;
|
|
-}
|
|
-
|
|
-/* Walk the e820 map and register active regions within a node */
|
|
-void __init
|
|
-e820_register_active_regions(int nid, unsigned long start_pfn,
|
|
- unsigned long end_pfn)
|
|
-{
|
|
- unsigned long ei_startpfn;
|
|
- unsigned long ei_endpfn;
|
|
- int i;
|
|
-
|
|
- for (i = 0; i < e820.nr_map; i++)
|
|
- if (e820_find_active_region(&e820.map[i],
|
|
- start_pfn, end_pfn,
|
|
- &ei_startpfn, &ei_endpfn))
|
|
- add_active_range(nid, ei_startpfn, ei_endpfn);
|
|
-#ifdef CONFIG_XEN
|
|
- BUG_ON(nid);
|
|
- add_active_range(nid, end_pfn, end_pfn);
|
|
-#endif
|
|
-}
|
|
-
|
|
-/*
|
|
- * Add a memory region to the kernel e820 map.
|
|
- */
|
|
-void __init add_memory_region(unsigned long start, unsigned long size, int type)
|
|
-{
|
|
- int x = e820.nr_map;
|
|
-
|
|
- if (x == E820MAX) {
|
|
- printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
|
|
- return;
|
|
- }
|
|
-
|
|
- e820.map[x].addr = start;
|
|
- e820.map[x].size = size;
|
|
- e820.map[x].type = type;
|
|
- e820.nr_map++;
|
|
-}
|
|
-
|
|
-/*
|
|
- * Find the hole size (in bytes) in the memory range.
|
|
- * @start: starting address of the memory range to scan
|
|
- * @end: ending address of the memory range to scan
|
|
- */
|
|
-unsigned long __init e820_hole_size(unsigned long start, unsigned long end)
|
|
-{
|
|
- unsigned long start_pfn = start >> PAGE_SHIFT;
|
|
- unsigned long end_pfn = end >> PAGE_SHIFT;
|
|
- unsigned long ei_startpfn, ei_endpfn, ram = 0;
|
|
- int i;
|
|
-
|
|
- for (i = 0; i < e820.nr_map; i++) {
|
|
- if (e820_find_active_region(&e820.map[i],
|
|
- start_pfn, end_pfn,
|
|
- &ei_startpfn, &ei_endpfn))
|
|
- ram += ei_endpfn - ei_startpfn;
|
|
- }
|
|
- return end - start - (ram << PAGE_SHIFT);
|
|
-}
|
|
-
|
|
-static void __init e820_print_map(char *who)
|
|
-{
|
|
- int i;
|
|
-
|
|
- for (i = 0; i < e820.nr_map; i++) {
|
|
- printk(KERN_INFO " %s: %016Lx - %016Lx ", who,
|
|
- (unsigned long long) e820.map[i].addr,
|
|
- (unsigned long long)
|
|
- (e820.map[i].addr + e820.map[i].size));
|
|
- switch (e820.map[i].type) {
|
|
- case E820_RAM:
|
|
- printk(KERN_CONT "(usable)\n");
|
|
- break;
|
|
- case E820_RESERVED:
|
|
- printk(KERN_CONT "(reserved)\n");
|
|
- break;
|
|
- case E820_ACPI:
|
|
- printk(KERN_CONT "(ACPI data)\n");
|
|
- break;
|
|
- case E820_NVS:
|
|
- printk(KERN_CONT "(ACPI NVS)\n");
|
|
- break;
|
|
- default:
|
|
- printk(KERN_CONT "type %u\n", e820.map[i].type);
|
|
- break;
|
|
- }
|
|
- }
|
|
-}
|
|
-
|
|
-/*
|
|
- * Sanitize the BIOS e820 map.
|
|
- *
|
|
- * Some e820 responses include overlapping entries. The following
|
|
- * replaces the original e820 map with a new one, removing overlaps.
|
|
- *
|
|
- */
|
|
-static int __init sanitize_e820_map(struct e820entry *biosmap, char *pnr_map)
|
|
-{
|
|
- struct change_member {
|
|
- struct e820entry *pbios; /* pointer to original bios entry */
|
|
- unsigned long long addr; /* address for this change point */
|
|
- };
|
|
- static struct change_member change_point_list[2*E820MAX] __initdata;
|
|
- static struct change_member *change_point[2*E820MAX] __initdata;
|
|
- static struct e820entry *overlap_list[E820MAX] __initdata;
|
|
- static struct e820entry new_bios[E820MAX] __initdata;
|
|
- struct change_member *change_tmp;
|
|
- unsigned long current_type, last_type;
|
|
- unsigned long long last_addr;
|
|
- int chgidx, still_changing;
|
|
- int overlap_entries;
|
|
- int new_bios_entry;
|
|
- int old_nr, new_nr, chg_nr;
|
|
- int i;
|
|
-
|
|
- /*
|
|
- Visually we're performing the following
|
|
- (1,2,3,4 = memory types)...
|
|
-
|
|
- Sample memory map (w/overlaps):
|
|
- ____22__________________
|
|
- ______________________4_
|
|
- ____1111________________
|
|
- _44_____________________
|
|
- 11111111________________
|
|
- ____________________33__
|
|
- ___________44___________
|
|
- __________33333_________
|
|
- ______________22________
|
|
- ___________________2222_
|
|
- _________111111111______
|
|
- _____________________11_
|
|
- _________________4______
|
|
-
|
|
- Sanitized equivalent (no overlap):
|
|
- 1_______________________
|
|
- _44_____________________
|
|
- ___1____________________
|
|
- ____22__________________
|
|
- ______11________________
|
|
- _________1______________
|
|
- __________3_____________
|
|
- ___________44___________
|
|
- _____________33_________
|
|
- _______________2________
|
|
- ________________1_______
|
|
- _________________4______
|
|
- ___________________2____
|
|
- ____________________33__
|
|
- ______________________4_
|
|
- */
|
|
-
|
|
- /* if there's only one memory region, don't bother */
|
|
- if (*pnr_map < 2)
|
|
- return -1;
|
|
-
|
|
- old_nr = *pnr_map;
|
|
-
|
|
- /* bail out if we find any unreasonable addresses in bios map */
|
|
- for (i = 0; i < old_nr; i++)
|
|
- if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
|
|
- return -1;
|
|
-
|
|
- /* create pointers for initial change-point information (for sorting) */
|
|
- for (i = 0; i < 2 * old_nr; i++)
|
|
- change_point[i] = &change_point_list[i];
|
|
-
|
|
- /* record all known change-points (starting and ending addresses),
|
|
- omitting those that are for empty memory regions */
|
|
- chgidx = 0;
|
|
- for (i = 0; i < old_nr; i++) {
|
|
- if (biosmap[i].size != 0) {
|
|
- change_point[chgidx]->addr = biosmap[i].addr;
|
|
- change_point[chgidx++]->pbios = &biosmap[i];
|
|
- change_point[chgidx]->addr = biosmap[i].addr +
|
|
- biosmap[i].size;
|
|
- change_point[chgidx++]->pbios = &biosmap[i];
|
|
- }
|
|
- }
|
|
- chg_nr = chgidx;
|
|
-
|
|
- /* sort change-point list by memory addresses (low -> high) */
|
|
- still_changing = 1;
|
|
- while (still_changing) {
|
|
- still_changing = 0;
|
|
- for (i = 1; i < chg_nr; i++) {
|
|
- unsigned long long curaddr, lastaddr;
|
|
- unsigned long long curpbaddr, lastpbaddr;
|
|
-
|
|
- curaddr = change_point[i]->addr;
|
|
- lastaddr = change_point[i - 1]->addr;
|
|
- curpbaddr = change_point[i]->pbios->addr;
|
|
- lastpbaddr = change_point[i - 1]->pbios->addr;
|
|
-
|
|
- /*
|
|
- * swap entries, when:
|
|
- *
|
|
- * curaddr > lastaddr or
|
|
- * curaddr == lastaddr and curaddr == curpbaddr and
|
|
- * lastaddr != lastpbaddr
|
|
- */
|
|
- if (curaddr < lastaddr ||
|
|
- (curaddr == lastaddr && curaddr == curpbaddr &&
|
|
- lastaddr != lastpbaddr)) {
|
|
- change_tmp = change_point[i];
|
|
- change_point[i] = change_point[i-1];
|
|
- change_point[i-1] = change_tmp;
|
|
- still_changing = 1;
|
|
- }
|
|
- }
|
|
- }
|
|
-
|
|
- /* create a new bios memory map, removing overlaps */
|
|
- overlap_entries = 0; /* number of entries in the overlap table */
|
|
- new_bios_entry = 0; /* index for creating new bios map entries */
|
|
- last_type = 0; /* start with undefined memory type */
|
|
- last_addr = 0; /* start with 0 as last starting address */
|
|
-
|
|
- /* loop through change-points, determining affect on the new bios map */
|
|
- for (chgidx = 0; chgidx < chg_nr; chgidx++) {
|
|
- /* keep track of all overlapping bios entries */
|
|
- if (change_point[chgidx]->addr ==
|
|
- change_point[chgidx]->pbios->addr) {
|
|
- /*
|
|
- * add map entry to overlap list (> 1 entry
|
|
- * implies an overlap)
|
|
- */
|
|
- overlap_list[overlap_entries++] =
|
|
- change_point[chgidx]->pbios;
|
|
- } else {
|
|
- /*
|
|
- * remove entry from list (order independent,
|
|
- * so swap with last)
|
|
- */
|
|
- for (i = 0; i < overlap_entries; i++) {
|
|
- if (overlap_list[i] ==
|
|
- change_point[chgidx]->pbios)
|
|
- overlap_list[i] =
|
|
- overlap_list[overlap_entries-1];
|
|
- }
|
|
- overlap_entries--;
|
|
- }
|
|
- /*
|
|
- * if there are overlapping entries, decide which
|
|
- * "type" to use (larger value takes precedence --
|
|
- * 1=usable, 2,3,4,4+=unusable)
|
|
- */
|
|
- current_type = 0;
|
|
- for (i = 0; i < overlap_entries; i++)
|
|
- if (overlap_list[i]->type > current_type)
|
|
- current_type = overlap_list[i]->type;
|
|
- /*
|
|
- * continue building up new bios map based on this
|
|
- * information
|
|
- */
|
|
- if (current_type != last_type) {
|
|
- if (last_type != 0) {
|
|
- new_bios[new_bios_entry].size =
|
|
- change_point[chgidx]->addr - last_addr;
|
|
- /*
|
|
- * move forward only if the new size
|
|
- * was non-zero
|
|
- */
|
|
- if (new_bios[new_bios_entry].size != 0)
|
|
- /*
|
|
- * no more space left for new
|
|
- * bios entries ?
|
|
- */
|
|
- if (++new_bios_entry >= E820MAX)
|
|
- break;
|
|
- }
|
|
- if (current_type != 0) {
|
|
- new_bios[new_bios_entry].addr =
|
|
- change_point[chgidx]->addr;
|
|
- new_bios[new_bios_entry].type = current_type;
|
|
- last_addr = change_point[chgidx]->addr;
|
|
- }
|
|
- last_type = current_type;
|
|
- }
|
|
- }
|
|
- /* retain count for new bios entries */
|
|
- new_nr = new_bios_entry;
|
|
-
|
|
- /* copy new bios mapping into original location */
|
|
- memcpy(biosmap, new_bios, new_nr * sizeof(struct e820entry));
|
|
- *pnr_map = new_nr;
|
|
-
|
|
- return 0;
|
|
-}
|
|
-
|
|
-/*
|
|
- * Copy the BIOS e820 map into a safe place.
|
|
- *
|
|
- * Sanity-check it while we're at it..
|
|
- *
|
|
- * If we're lucky and live on a modern system, the setup code
|
|
- * will have given us a memory map that we can use to properly
|
|
- * set up memory. If we aren't, we'll fake a memory map.
|
|
- */
|
|
-static int __init copy_e820_map(struct e820entry *biosmap, int nr_map)
|
|
-{
|
|
-#ifndef CONFIG_XEN
|
|
- /* Only one memory region (or negative)? Ignore it */
|
|
- if (nr_map < 2)
|
|
- return -1;
|
|
-#else
|
|
- BUG_ON(nr_map < 1);
|
|
-#endif
|
|
-
|
|
- do {
|
|
- u64 start = biosmap->addr;
|
|
- u64 size = biosmap->size;
|
|
- u64 end = start + size;
|
|
- u32 type = biosmap->type;
|
|
-
|
|
- /* Overflow in 64 bits? Ignore the memory map. */
|
|
- if (start > end)
|
|
- return -1;
|
|
-
|
|
- add_memory_region(start, size, type);
|
|
- } while (biosmap++, --nr_map);
|
|
-
|
|
-#ifdef CONFIG_XEN
|
|
- if (is_initial_xendomain()) {
|
|
- struct xen_memory_map memmap;
|
|
-
|
|
- memmap.nr_entries = E820MAX;
|
|
- set_xen_guest_handle(memmap.buffer, machine_e820.map);
|
|
-
|
|
- if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap))
|
|
- BUG();
|
|
- machine_e820.nr_map = memmap.nr_entries;
|
|
- } else
|
|
- machine_e820 = e820;
|
|
-#endif
|
|
-
|
|
- return 0;
|
|
-}
|
|
-
|
|
-static void early_panic(char *msg)
|
|
-{
|
|
- early_printk(msg);
|
|
- panic(msg);
|
|
-}
|
|
-
|
|
-/* We're not void only for x86 32-bit compat */
|
|
-char * __init machine_specific_memory_setup(void)
|
|
-{
|
|
-#ifndef CONFIG_XEN
|
|
- char *who = "BIOS-e820";
|
|
- /*
|
|
- * Try to copy the BIOS-supplied E820-map.
|
|
- *
|
|
- * Otherwise fake a memory map; one section from 0k->640k,
|
|
- * the next section from 1mb->appropriate_mem_k
|
|
- */
|
|
- sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries);
|
|
- if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) < 0)
|
|
- early_panic("Cannot find a valid memory map");
|
|
-#else /* CONFIG_XEN */
|
|
- char *who = "Xen";
|
|
- int rc;
|
|
- struct xen_memory_map memmap;
|
|
- static struct e820entry __initdata map[E820MAX];
|
|
-
|
|
- memmap.nr_entries = E820MAX;
|
|
- set_xen_guest_handle(memmap.buffer, map);
|
|
-
|
|
- rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
|
|
- if ( rc == -ENOSYS ) {
|
|
- memmap.nr_entries = 1;
|
|
- map[0].addr = 0ULL;
|
|
- map[0].size = xen_start_info->nr_pages << PAGE_SHIFT;
|
|
- /* 8MB slack (to balance backend allocations). */
|
|
- map[0].size += 8 << 20;
|
|
- map[0].type = E820_RAM;
|
|
- rc = 0;
|
|
- }
|
|
- BUG_ON(rc);
|
|
-
|
|
- sanitize_e820_map(map, (char *)&memmap.nr_entries);
|
|
-
|
|
- if (copy_e820_map(map, (char)memmap.nr_entries) < 0)
|
|
- early_panic("Cannot find a valid memory map");
|
|
-#endif
|
|
- printk(KERN_INFO "BIOS-provided physical RAM map:\n");
|
|
- e820_print_map(who);
|
|
-
|
|
- /* In case someone cares... */
|
|
- return who;
|
|
-}
|
|
-
|
|
-static int __init parse_memopt(char *p)
|
|
-{
|
|
- int i;
|
|
- unsigned long current_end;
|
|
- unsigned long end;
|
|
-
|
|
- if (!p)
|
|
- return -EINVAL;
|
|
- end_user_pfn = memparse(p, &p);
|
|
- end_user_pfn >>= PAGE_SHIFT;
|
|
-
|
|
- end = end_user_pfn<<PAGE_SHIFT;
|
|
- i = e820.nr_map-1;
|
|
- current_end = e820.map[i].addr + e820.map[i].size;
|
|
-
|
|
- if (current_end < end) {
|
|
- /*
|
|
- * The e820 map ends before our requested size so
|
|
- * extend the final entry to the requested address.
|
|
- */
|
|
- if (e820.map[i].type == E820_RAM)
|
|
- e820.map[i].size = end - e820.map[i].addr;
|
|
- else
|
|
- add_memory_region(current_end, end - current_end, E820_RAM);
|
|
- }
|
|
-
|
|
- return 0;
|
|
-}
|
|
-early_param("mem", parse_memopt);
|
|
-
|
|
-static int userdef __initdata;
|
|
-
|
|
-static int __init parse_memmap_opt(char *p)
|
|
-{
|
|
- char *oldp;
|
|
- unsigned long long start_at, mem_size;
|
|
-
|
|
- if (!strcmp(p, "exactmap")) {
|
|
-#ifdef CONFIG_CRASH_DUMP
|
|
- /*
|
|
- * If we are doing a crash dump, we still need to know
|
|
- * the real mem size before original memory map is
|
|
- * reset.
|
|
- */
|
|
- e820_register_active_regions(0, 0, -1UL);
|
|
- saved_max_pfn = e820_end_of_ram();
|
|
- remove_all_active_ranges();
|
|
-#endif
|
|
- max_pfn_mapped = 0;
|
|
- e820.nr_map = 0;
|
|
- userdef = 1;
|
|
- return 0;
|
|
- }
|
|
-
|
|
- oldp = p;
|
|
- mem_size = memparse(p, &p);
|
|
- if (p == oldp)
|
|
- return -EINVAL;
|
|
-
|
|
- userdef = 1;
|
|
- if (*p == '@') {
|
|
- start_at = memparse(p+1, &p);
|
|
- add_memory_region(start_at, mem_size, E820_RAM);
|
|
- } else if (*p == '#') {
|
|
- start_at = memparse(p+1, &p);
|
|
- add_memory_region(start_at, mem_size, E820_ACPI);
|
|
- } else if (*p == '$') {
|
|
- start_at = memparse(p+1, &p);
|
|
- add_memory_region(start_at, mem_size, E820_RESERVED);
|
|
- } else {
|
|
- end_user_pfn = (mem_size >> PAGE_SHIFT);
|
|
- }
|
|
- return *p == '\0' ? 0 : -EINVAL;
|
|
-}
|
|
-early_param("memmap", parse_memmap_opt);
|
|
-
|
|
-void __init finish_e820_parsing(void)
|
|
-{
|
|
- if (userdef) {
|
|
- char nr = e820.nr_map;
|
|
-
|
|
- if (sanitize_e820_map(e820.map, &nr) < 0)
|
|
- early_panic("Invalid user supplied memory map");
|
|
- e820.nr_map = nr;
|
|
-
|
|
- printk(KERN_INFO "user-defined physical RAM map:\n");
|
|
- e820_print_map("user");
|
|
- }
|
|
-}
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
-void __init update_memory_range(u64 start, u64 size, unsigned old_type,
|
|
- unsigned new_type)
|
|
-{
|
|
- int i;
|
|
-
|
|
- BUG_ON(old_type == new_type);
|
|
-
|
|
- for (i = 0; i < e820.nr_map; i++) {
|
|
- struct e820entry *ei = &e820.map[i];
|
|
- u64 final_start, final_end;
|
|
- if (ei->type != old_type)
|
|
- continue;
|
|
- /* totally covered? */
|
|
- if (ei->addr >= start && ei->size <= size) {
|
|
- ei->type = new_type;
|
|
- continue;
|
|
- }
|
|
- /* partially covered */
|
|
- final_start = max(start, ei->addr);
|
|
- final_end = min(start + size, ei->addr + ei->size);
|
|
- if (final_start >= final_end)
|
|
- continue;
|
|
- add_memory_region(final_start, final_end - final_start,
|
|
- new_type);
|
|
- }
|
|
-}
|
|
-
|
|
-void __init update_e820(void)
|
|
-{
|
|
- u8 nr_map;
|
|
-
|
|
- nr_map = e820.nr_map;
|
|
- if (sanitize_e820_map(e820.map, &nr_map))
|
|
- return;
|
|
- e820.nr_map = nr_map;
|
|
- printk(KERN_INFO "modified physical RAM map:\n");
|
|
- e820_print_map("modified");
|
|
-}
|
|
-#endif
|
|
-
|
|
-unsigned long pci_mem_start = 0xaeedbabe;
|
|
-EXPORT_SYMBOL(pci_mem_start);
|
|
-
|
|
-/*
|
|
- * Search for the biggest gap in the low 32 bits of the e820
|
|
- * memory space. We pass this space to PCI to assign MMIO resources
|
|
- * for hotplug or unconfigured devices in.
|
|
- * Hopefully the BIOS let enough space left.
|
|
- */
|
|
-__init void e820_setup_gap(struct e820entry *e820, int nr_map)
|
|
-{
|
|
- unsigned long gapstart, gapsize, round;
|
|
- unsigned long last;
|
|
- int i;
|
|
- int found = 0;
|
|
-
|
|
- last = 0x100000000ull;
|
|
- gapstart = 0x10000000;
|
|
- gapsize = 0x400000;
|
|
- i = nr_map;
|
|
- while (--i >= 0) {
|
|
- unsigned long long start = e820[i].addr;
|
|
- unsigned long long end = start + e820[i].size;
|
|
-
|
|
- /*
|
|
- * Since "last" is at most 4GB, we know we'll
|
|
- * fit in 32 bits if this condition is true
|
|
- */
|
|
- if (last > end) {
|
|
- unsigned long gap = last - end;
|
|
-
|
|
- if (gap > gapsize) {
|
|
- gapsize = gap;
|
|
- gapstart = end;
|
|
- found = 1;
|
|
- }
|
|
- }
|
|
- if (start < last)
|
|
- last = start;
|
|
- }
|
|
-
|
|
- if (!found) {
|
|
- gapstart = (end_pfn << PAGE_SHIFT) + 1024*1024;
|
|
- printk(KERN_ERR "PCI: Warning: Cannot find a gap in the 32bit "
|
|
- "address range\n"
|
|
- KERN_ERR "PCI: Unassigned devices with 32bit resource "
|
|
- "registers may break!\n");
|
|
- }
|
|
-
|
|
- /*
|
|
- * See how much we want to round up: start off with
|
|
- * rounding to the next 1MB area.
|
|
- */
|
|
- round = 0x100000;
|
|
- while ((gapsize >> 4) > round)
|
|
- round += round;
|
|
- /* Fun with two's complement */
|
|
- pci_mem_start = (gapstart + round) & -round;
|
|
-
|
|
- printk(KERN_INFO
|
|
- "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n",
|
|
- pci_mem_start, gapstart, gapsize);
|
|
-}
|
|
-
|
|
-int __init arch_get_ram_range(int slot, u64 *addr, u64 *size)
|
|
-{
|
|
- int i;
|
|
-
|
|
- if (slot < 0 || slot >= e820.nr_map)
|
|
- return -1;
|
|
- for (i = slot; i < e820.nr_map; i++) {
|
|
- if (e820.map[i].type != E820_RAM)
|
|
- continue;
|
|
- break;
|
|
- }
|
|
- if (i == e820.nr_map || e820.map[i].addr > (max_pfn << PAGE_SHIFT))
|
|
- return -1;
|
|
- *addr = e820.map[i].addr;
|
|
- *size = min_t(u64, e820.map[i].size + e820.map[i].addr,
|
|
- max_pfn << PAGE_SHIFT) - *addr;
|
|
- return i + 1;
|
|
-}
|
|
--- head-2011-03-11.orig/arch/x86/kernel/early_printk-xen.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/early_printk-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -225,7 +225,7 @@ static struct console simnow_console = {
|
|
static struct console *early_console = &early_vga_console;
|
|
static int early_console_initialized;
|
|
|
|
-void early_printk(const char *fmt, ...)
|
|
+asmlinkage void early_printk(const char *fmt, ...)
|
|
{
|
|
char buf[512];
|
|
int n;
|
|
--- head-2011-03-11.orig/arch/x86/kernel/entry_32-xen.S 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/entry_32-xen.S 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -51,15 +51,26 @@
|
|
#include <asm/percpu.h>
|
|
#include <asm/dwarf2.h>
|
|
#include <asm/processor-flags.h>
|
|
-#include "irq_vectors.h"
|
|
+#include <asm/ftrace.h>
|
|
+#include <asm/irq_vectors.h>
|
|
#include <xen/interface/xen.h>
|
|
|
|
+/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
|
|
+#include <linux/elf-em.h>
|
|
+#define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE)
|
|
+#define __AUDIT_ARCH_LE 0x40000000
|
|
+
|
|
+#ifndef CONFIG_AUDITSYSCALL
|
|
+#define sysenter_audit syscall_trace_entry
|
|
+#define sysexit_audit syscall_exit_work
|
|
+#endif
|
|
+
|
|
/*
|
|
* We use macros for low-level operations which need to be overridden
|
|
* for paravirtualization. The following will never clobber any registers:
|
|
* INTERRUPT_RETURN (aka. "iret")
|
|
* GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
|
|
- * ENABLE_INTERRUPTS_SYSCALL_RET (aka "sti; sysexit").
|
|
+ * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
|
|
*
|
|
* For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
|
|
* specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
|
|
@@ -277,11 +288,6 @@ END(resume_kernel)
|
|
#endif
|
|
CFI_ENDPROC
|
|
|
|
- .macro test_tif ti_reg # system call tracing in operation / emulation
|
|
- /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
|
|
- testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(\ti_reg)
|
|
- .endm
|
|
-
|
|
/* SYSENTER_RETURN points to after the "sysenter" instruction in
|
|
the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */
|
|
|
|
@@ -338,8 +344,9 @@ sysenter_past_esp:
|
|
.previous
|
|
|
|
GET_THREAD_INFO(%ebp)
|
|
- test_tif %ebp
|
|
- jnz syscall_trace_entry
|
|
+ testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
|
|
+ jnz sysenter_audit
|
|
+sysenter_do_call:
|
|
cmpl $(nr_syscalls), %eax
|
|
jae syscall_badsys
|
|
call *sys_call_table(,%eax,4)
|
|
@@ -349,14 +356,54 @@ sysenter_past_esp:
|
|
TRACE_IRQS_OFF
|
|
movl TI_flags(%ebp), %ecx
|
|
testw $_TIF_ALLWORK_MASK, %cx
|
|
- jne syscall_exit_work
|
|
+ jne sysexit_audit
|
|
+sysenter_exit:
|
|
/* if something modifies registers it must also disable sysexit */
|
|
movl PT_EIP(%esp), %edx
|
|
movl PT_OLDESP(%esp), %ecx
|
|
xorl %ebp,%ebp
|
|
TRACE_IRQS_ON
|
|
1: mov PT_FS(%esp), %fs
|
|
- ENABLE_INTERRUPTS_SYSCALL_RET
|
|
+ ENABLE_INTERRUPTS_SYSEXIT
|
|
+
|
|
+#ifdef CONFIG_AUDITSYSCALL
|
|
+sysenter_audit:
|
|
+ testw $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
|
|
+ jnz syscall_trace_entry
|
|
+ addl $4,%esp
|
|
+ CFI_ADJUST_CFA_OFFSET -4
|
|
+ /* %esi already in 8(%esp) 6th arg: 4th syscall arg */
|
|
+ /* %edx already in 4(%esp) 5th arg: 3rd syscall arg */
|
|
+ /* %ecx already in 0(%esp) 4th arg: 2nd syscall arg */
|
|
+ movl %ebx,%ecx /* 3rd arg: 1st syscall arg */
|
|
+ movl %eax,%edx /* 2nd arg: syscall number */
|
|
+ movl $AUDIT_ARCH_I386,%eax /* 1st arg: audit arch */
|
|
+ call audit_syscall_entry
|
|
+ pushl %ebx
|
|
+ CFI_ADJUST_CFA_OFFSET 4
|
|
+ movl PT_EAX(%esp),%eax /* reload syscall number */
|
|
+ jmp sysenter_do_call
|
|
+
|
|
+sysexit_audit:
|
|
+ testw $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %cx
|
|
+ jne syscall_exit_work
|
|
+ TRACE_IRQS_ON
|
|
+ ENABLE_INTERRUPTS(CLBR_ANY)
|
|
+ movl %eax,%edx /* second arg, syscall return value */
|
|
+ cmpl $0,%eax /* is it < 0? */
|
|
+ setl %al /* 1 if so, 0 if not */
|
|
+ movzbl %al,%eax /* zero-extend that */
|
|
+ inc %eax /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
|
|
+ call audit_syscall_exit
|
|
+ DISABLE_INTERRUPTS(CLBR_ANY)
|
|
+ TRACE_IRQS_OFF
|
|
+ movl TI_flags(%ebp), %ecx
|
|
+ testw $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %cx
|
|
+ jne syscall_exit_work
|
|
+ movl PT_EAX(%esp),%eax /* reload syscall return value */
|
|
+ jmp sysenter_exit
|
|
+#endif
|
|
+
|
|
CFI_ENDPROC
|
|
.pushsection .fixup,"ax"
|
|
2: movl $0,PT_FS(%esp)
|
|
@@ -400,7 +447,7 @@ ENTRY(system_call)
|
|
CFI_ADJUST_CFA_OFFSET 4
|
|
SAVE_ALL
|
|
GET_THREAD_INFO(%ebp)
|
|
- test_tif %ebp
|
|
+ testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
|
|
jnz syscall_trace_entry
|
|
cmpl $(nr_syscalls), %eax
|
|
jae syscall_badsys
|
|
@@ -413,10 +460,6 @@ syscall_exit:
|
|
# setting need_resched or sigpending
|
|
# between sampling and the iret
|
|
TRACE_IRQS_OFF
|
|
- testl $X86_EFLAGS_TF,PT_EFLAGS(%esp) # If tracing set singlestep flag on exit
|
|
- jz no_singlestep
|
|
- orl $_TIF_SINGLESTEP,TI_flags(%ebp)
|
|
-no_singlestep:
|
|
movl TI_flags(%ebp), %ecx
|
|
testw $_TIF_ALLWORK_MASK, %cx # current->work
|
|
jne syscall_exit_work
|
|
@@ -588,12 +631,8 @@ END(work_pending)
|
|
syscall_trace_entry:
|
|
movl $-ENOSYS,PT_EAX(%esp)
|
|
movl %esp, %eax
|
|
- xorl %edx,%edx
|
|
- call do_syscall_trace
|
|
- cmpl $0, %eax
|
|
- jne resume_userspace # ret != 0 -> running under PTRACE_SYSEMU,
|
|
- # so must skip actual syscall
|
|
- movl PT_ORIG_EAX(%esp), %eax
|
|
+ call syscall_trace_enter
|
|
+ /* What it returned is what we'll actually use. */
|
|
cmpl $(nr_syscalls), %eax
|
|
jnae syscall_call
|
|
jmp syscall_exit
|
|
@@ -602,14 +641,13 @@ END(syscall_trace_entry)
|
|
# perform syscall exit tracing
|
|
ALIGN
|
|
syscall_exit_work:
|
|
- testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
|
|
+ testb $_TIF_WORK_SYSCALL_EXIT, %cl
|
|
jz work_pending
|
|
TRACE_IRQS_ON
|
|
- ENABLE_INTERRUPTS(CLBR_ANY) # could let do_syscall_trace() call
|
|
+ ENABLE_INTERRUPTS(CLBR_ANY) # could let syscall_trace_leave() call
|
|
# schedule() instead
|
|
movl %esp, %eax
|
|
- movl $1, %edx
|
|
- call do_syscall_trace
|
|
+ call syscall_trace_leave
|
|
jmp resume_userspace
|
|
END(syscall_exit_work)
|
|
CFI_ENDPROC
|
|
@@ -1113,10 +1151,10 @@ ENTRY(native_iret)
|
|
.previous
|
|
END(native_iret)
|
|
|
|
-ENTRY(native_irq_enable_syscall_ret)
|
|
+ENTRY(native_irq_enable_sysexit)
|
|
sti
|
|
sysexit
|
|
-END(native_irq_enable_syscall_ret)
|
|
+END(native_irq_enable_sysexit)
|
|
#endif
|
|
|
|
KPROBE_ENTRY(int3)
|
|
@@ -1265,6 +1303,77 @@ ENTRY(kernel_thread_helper)
|
|
CFI_ENDPROC
|
|
ENDPROC(kernel_thread_helper)
|
|
|
|
+#ifdef CONFIG_FTRACE
|
|
+#ifdef CONFIG_DYNAMIC_FTRACE
|
|
+
|
|
+ENTRY(mcount)
|
|
+ pushl %eax
|
|
+ pushl %ecx
|
|
+ pushl %edx
|
|
+ movl 0xc(%esp), %eax
|
|
+ subl $MCOUNT_INSN_SIZE, %eax
|
|
+
|
|
+.globl mcount_call
|
|
+mcount_call:
|
|
+ call ftrace_stub
|
|
+
|
|
+ popl %edx
|
|
+ popl %ecx
|
|
+ popl %eax
|
|
+
|
|
+ ret
|
|
+END(mcount)
|
|
+
|
|
+ENTRY(ftrace_caller)
|
|
+ pushl %eax
|
|
+ pushl %ecx
|
|
+ pushl %edx
|
|
+ movl 0xc(%esp), %eax
|
|
+ movl 0x4(%ebp), %edx
|
|
+ subl $MCOUNT_INSN_SIZE, %eax
|
|
+
|
|
+.globl ftrace_call
|
|
+ftrace_call:
|
|
+ call ftrace_stub
|
|
+
|
|
+ popl %edx
|
|
+ popl %ecx
|
|
+ popl %eax
|
|
+
|
|
+.globl ftrace_stub
|
|
+ftrace_stub:
|
|
+ ret
|
|
+END(ftrace_caller)
|
|
+
|
|
+#else /* ! CONFIG_DYNAMIC_FTRACE */
|
|
+
|
|
+ENTRY(mcount)
|
|
+ cmpl $ftrace_stub, ftrace_trace_function
|
|
+ jnz trace
|
|
+.globl ftrace_stub
|
|
+ftrace_stub:
|
|
+ ret
|
|
+
|
|
+ /* taken from glibc */
|
|
+trace:
|
|
+ pushl %eax
|
|
+ pushl %ecx
|
|
+ pushl %edx
|
|
+ movl 0xc(%esp), %eax
|
|
+ movl 0x4(%ebp), %edx
|
|
+ subl $MCOUNT_INSN_SIZE, %eax
|
|
+
|
|
+ call *ftrace_trace_function
|
|
+
|
|
+ popl %edx
|
|
+ popl %ecx
|
|
+ popl %eax
|
|
+
|
|
+ jmp ftrace_stub
|
|
+END(mcount)
|
|
+#endif /* CONFIG_DYNAMIC_FTRACE */
|
|
+#endif /* CONFIG_FTRACE */
|
|
+
|
|
#include <asm/alternative-asm.h>
|
|
|
|
# pv syscall call handler stub
|
|
@@ -1290,7 +1399,7 @@ ENTRY(ia32pv_cstar_target)
|
|
.previous
|
|
SAVE_ALL
|
|
GET_THREAD_INFO(%ebp)
|
|
- test_tif %ebp
|
|
+ testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
|
|
jnz cstar_trace_entry
|
|
cmpl $nr_syscalls,%eax
|
|
jae cstar_badsys
|
|
@@ -1324,29 +1433,21 @@ cstar_trace_entry:
|
|
btl %eax,cstar_special
|
|
jc .Lcstar_trace_special
|
|
1: movl %esp,%eax
|
|
- xorl %edx,%edx
|
|
LOCK_PREFIX
|
|
orl $_TIF_CSTAR,TI_flags(%ebp)
|
|
- call do_syscall_trace
|
|
+ call syscall_trace_enter
|
|
LOCK_PREFIX
|
|
andl $~_TIF_CSTAR,TI_flags(%ebp)
|
|
- testl %eax,%eax
|
|
- jne .Lcstar_resume # ret != 0 -> running under PTRACE_SYSEMU,
|
|
- # so must skip actual syscall
|
|
- movl PT_ORIG_EAX(%esp),%eax
|
|
+ /* What it returned is what we'll actually use. */
|
|
cmpl $nr_syscalls,%eax
|
|
jb .Lcstar_call
|
|
jmp .Lcstar_exit
|
|
.Lcstar_trace_special:
|
|
movl PT_ECX(%esp),%ecx
|
|
movl %esp,%eax
|
|
- xorl %edx,%edx
|
|
movl %ecx,PT_EBP(%esp) # put user EBP back in place
|
|
- call do_syscall_trace
|
|
- testl %eax,%eax
|
|
- jne resume_userspace # ret != 0 -> running under PTRACE_SYSEMU,
|
|
- # so must skip actual syscall
|
|
- movl PT_ORIG_EAX(%esp),%eax
|
|
+ call syscall_trace_enter
|
|
+ /* What it returned is what we'll actually use. */
|
|
cmpl $nr_syscalls,%eax
|
|
jb syscall_call
|
|
jmp syscall_exit
|
|
--- head-2011-03-11.orig/arch/x86/kernel/entry_64.S 2011-03-15 16:45:55.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/entry_64.S 2011-02-16 16:02:30.000000000 +0100
|
|
@@ -1253,7 +1253,7 @@ ENTRY(arch_unwind_init_running)
|
|
END(arch_unwind_init_running)
|
|
#endif
|
|
|
|
-#ifdef CONFIG_XEN
|
|
+#ifdef CONFIG_PARAVIRT_XEN
|
|
zeroentry xen_hypervisor_callback xen_do_hypervisor_callback
|
|
|
|
/*
|
|
@@ -1353,7 +1353,7 @@ END(xen_failsafe_callback)
|
|
apicinterrupt XEN_HVM_EVTCHN_CALLBACK \
|
|
xen_hvm_callback_vector xen_evtchn_do_upcall
|
|
|
|
-#endif /* CONFIG_XEN */
|
|
+#endif /* CONFIG_PARAVIRT_XEN */
|
|
|
|
/*
|
|
* Some functions should be protected against kprobes
|
|
--- head-2011-03-11.orig/arch/x86/kernel/entry_64-xen.S 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/entry_64-xen.S 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -53,12 +53,124 @@
|
|
#include <asm/hw_irq.h>
|
|
#include <asm/page.h>
|
|
#include <asm/irqflags.h>
|
|
+#include <asm/ftrace.h>
|
|
#include <asm/errno.h>
|
|
#include <xen/interface/xen.h>
|
|
#include <xen/interface/features.h>
|
|
|
|
+/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
|
|
+#include <linux/elf-em.h>
|
|
+#define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
|
|
+#define __AUDIT_ARCH_64BIT 0x80000000
|
|
+#define __AUDIT_ARCH_LE 0x40000000
|
|
+
|
|
.code64
|
|
|
|
+#ifdef CONFIG_FTRACE
|
|
+#ifdef CONFIG_DYNAMIC_FTRACE
|
|
+ENTRY(mcount)
|
|
+
|
|
+ subq $0x38, %rsp
|
|
+ movq %rax, (%rsp)
|
|
+ movq %rcx, 8(%rsp)
|
|
+ movq %rdx, 16(%rsp)
|
|
+ movq %rsi, 24(%rsp)
|
|
+ movq %rdi, 32(%rsp)
|
|
+ movq %r8, 40(%rsp)
|
|
+ movq %r9, 48(%rsp)
|
|
+
|
|
+ movq 0x38(%rsp), %rdi
|
|
+ subq $MCOUNT_INSN_SIZE, %rdi
|
|
+
|
|
+.globl mcount_call
|
|
+mcount_call:
|
|
+ call ftrace_stub
|
|
+
|
|
+ movq 48(%rsp), %r9
|
|
+ movq 40(%rsp), %r8
|
|
+ movq 32(%rsp), %rdi
|
|
+ movq 24(%rsp), %rsi
|
|
+ movq 16(%rsp), %rdx
|
|
+ movq 8(%rsp), %rcx
|
|
+ movq (%rsp), %rax
|
|
+ addq $0x38, %rsp
|
|
+
|
|
+ retq
|
|
+END(mcount)
|
|
+
|
|
+ENTRY(ftrace_caller)
|
|
+
|
|
+ /* taken from glibc */
|
|
+ subq $0x38, %rsp
|
|
+ movq %rax, (%rsp)
|
|
+ movq %rcx, 8(%rsp)
|
|
+ movq %rdx, 16(%rsp)
|
|
+ movq %rsi, 24(%rsp)
|
|
+ movq %rdi, 32(%rsp)
|
|
+ movq %r8, 40(%rsp)
|
|
+ movq %r9, 48(%rsp)
|
|
+
|
|
+ movq 0x38(%rsp), %rdi
|
|
+ movq 8(%rbp), %rsi
|
|
+ subq $MCOUNT_INSN_SIZE, %rdi
|
|
+
|
|
+.globl ftrace_call
|
|
+ftrace_call:
|
|
+ call ftrace_stub
|
|
+
|
|
+ movq 48(%rsp), %r9
|
|
+ movq 40(%rsp), %r8
|
|
+ movq 32(%rsp), %rdi
|
|
+ movq 24(%rsp), %rsi
|
|
+ movq 16(%rsp), %rdx
|
|
+ movq 8(%rsp), %rcx
|
|
+ movq (%rsp), %rax
|
|
+ addq $0x38, %rsp
|
|
+
|
|
+.globl ftrace_stub
|
|
+ftrace_stub:
|
|
+ retq
|
|
+END(ftrace_caller)
|
|
+
|
|
+#else /* ! CONFIG_DYNAMIC_FTRACE */
|
|
+ENTRY(mcount)
|
|
+ cmpq $ftrace_stub, ftrace_trace_function
|
|
+ jnz trace
|
|
+.globl ftrace_stub
|
|
+ftrace_stub:
|
|
+ retq
|
|
+
|
|
+trace:
|
|
+ /* taken from glibc */
|
|
+ subq $0x38, %rsp
|
|
+ movq %rax, (%rsp)
|
|
+ movq %rcx, 8(%rsp)
|
|
+ movq %rdx, 16(%rsp)
|
|
+ movq %rsi, 24(%rsp)
|
|
+ movq %rdi, 32(%rsp)
|
|
+ movq %r8, 40(%rsp)
|
|
+ movq %r9, 48(%rsp)
|
|
+
|
|
+ movq 0x38(%rsp), %rdi
|
|
+ movq 8(%rbp), %rsi
|
|
+ subq $MCOUNT_INSN_SIZE, %rdi
|
|
+
|
|
+ call *ftrace_trace_function
|
|
+
|
|
+ movq 48(%rsp), %r9
|
|
+ movq 40(%rsp), %r8
|
|
+ movq 32(%rsp), %rdi
|
|
+ movq 24(%rsp), %rsi
|
|
+ movq 16(%rsp), %rdx
|
|
+ movq 8(%rsp), %rcx
|
|
+ movq (%rsp), %rax
|
|
+ addq $0x38, %rsp
|
|
+
|
|
+ jmp ftrace_stub
|
|
+END(mcount)
|
|
+#endif /* CONFIG_DYNAMIC_FTRACE */
|
|
+#endif /* CONFIG_FTRACE */
|
|
+
|
|
#ifndef CONFIG_PREEMPT
|
|
#define retint_kernel retint_restore_args
|
|
#endif
|
|
@@ -95,7 +207,7 @@ NMI_MASK = 0x80000000
|
|
.macro FAKE_STACK_FRAME child_rip
|
|
/* push in order ss, rsp, eflags, cs, rip */
|
|
xorl %eax, %eax
|
|
- pushq %rax /* ss */
|
|
+ pushq $__KERNEL_DS /* ss */
|
|
CFI_ADJUST_CFA_OFFSET 8
|
|
/*CFI_REL_OFFSET ss,0*/
|
|
pushq %rax /* rsp */
|
|
@@ -190,13 +302,13 @@ ENTRY(ret_from_fork)
|
|
CFI_ADJUST_CFA_OFFSET -4
|
|
call schedule_tail
|
|
GET_THREAD_INFO(%rcx)
|
|
- testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx)
|
|
+ testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
|
|
jnz rff_trace
|
|
rff_action:
|
|
RESTORE_REST
|
|
testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
|
|
je int_ret_from_sys_call
|
|
- testl $_TIF_IA32,threadinfo_flags(%rcx)
|
|
+ testl $_TIF_IA32,TI_flags(%rcx)
|
|
jnz int_ret_from_sys_call
|
|
RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
|
|
jmp ret_from_sys_call
|
|
@@ -258,8 +370,9 @@ ENTRY(system_call)
|
|
SAVE_ARGS -8,0
|
|
movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
|
|
GET_THREAD_INFO(%rcx)
|
|
- testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
|
|
+ testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx)
|
|
jnz tracesys
|
|
+system_call_fastpath:
|
|
cmpq $__NR_syscall_max,%rax
|
|
ja badsys
|
|
movq %r10,%rcx
|
|
@@ -277,7 +390,7 @@ sysret_check:
|
|
GET_THREAD_INFO(%rcx)
|
|
DISABLE_INTERRUPTS(CLBR_NONE)
|
|
TRACE_IRQS_OFF
|
|
- movl threadinfo_flags(%rcx),%edx
|
|
+ movl TI_flags(%rcx),%edx
|
|
andl %edi,%edx
|
|
jnz sysret_careful
|
|
CFI_REMEMBER_STATE
|
|
@@ -308,16 +421,16 @@ sysret_careful:
|
|
sysret_signal:
|
|
TRACE_IRQS_ON
|
|
ENABLE_INTERRUPTS(CLBR_NONE)
|
|
- testl $_TIF_DO_NOTIFY_MASK,%edx
|
|
- jz 1f
|
|
-
|
|
- /* Really a signal */
|
|
+#ifdef CONFIG_AUDITSYSCALL
|
|
+ bt $TIF_SYSCALL_AUDIT,%edx
|
|
+ jc sysret_audit
|
|
+#endif
|
|
/* edx: work flags (arg3) */
|
|
leaq do_notify_resume(%rip),%rax
|
|
leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
|
|
xorl %esi,%esi # oldset -> arg2
|
|
call ptregscall_common
|
|
-1: movl $_TIF_NEED_RESCHED,%edi
|
|
+ movl $_TIF_WORK_MASK,%edi
|
|
/* Use IRET because user could have changed frame. This
|
|
works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
|
|
DISABLE_INTERRUPTS(CLBR_NONE)
|
|
@@ -328,14 +441,56 @@ badsys:
|
|
movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
|
|
jmp ret_from_sys_call
|
|
|
|
+#ifdef CONFIG_AUDITSYSCALL
|
|
+ /*
|
|
+ * Fast path for syscall audit without full syscall trace.
|
|
+ * We just call audit_syscall_entry() directly, and then
|
|
+ * jump back to the normal fast path.
|
|
+ */
|
|
+auditsys:
|
|
+ movq %r10,%r9 /* 6th arg: 4th syscall arg */
|
|
+ movq %rdx,%r8 /* 5th arg: 3rd syscall arg */
|
|
+ movq %rsi,%rcx /* 4th arg: 2nd syscall arg */
|
|
+ movq %rdi,%rdx /* 3rd arg: 1st syscall arg */
|
|
+ movq %rax,%rsi /* 2nd arg: syscall number */
|
|
+ movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */
|
|
+ call audit_syscall_entry
|
|
+ LOAD_ARGS 0 /* reload call-clobbered registers */
|
|
+ jmp system_call_fastpath
|
|
+
|
|
+ /*
|
|
+ * Return fast path for syscall audit. Call audit_syscall_exit()
|
|
+ * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT
|
|
+ * masked off.
|
|
+ */
|
|
+sysret_audit:
|
|
+ movq %rax,%rsi /* second arg, syscall return value */
|
|
+ cmpq $0,%rax /* is it < 0? */
|
|
+ setl %al /* 1 if so, 0 if not */
|
|
+ movzbl %al,%edi /* zero-extend that into %edi */
|
|
+ inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
|
|
+ call audit_syscall_exit
|
|
+ movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
|
|
+ jmp sysret_check
|
|
+#endif /* CONFIG_AUDITSYSCALL */
|
|
+
|
|
/* Do syscall tracing */
|
|
tracesys:
|
|
+#ifdef CONFIG_AUDITSYSCALL
|
|
+ testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
|
|
+ jz auditsys
|
|
+#endif
|
|
SAVE_REST
|
|
movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
|
|
FIXUP_TOP_OF_STACK %rdi
|
|
movq %rsp,%rdi
|
|
call syscall_trace_enter
|
|
- LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
|
|
+ /*
|
|
+ * Reload arg registers from stack in case ptrace changed them.
|
|
+ * We don't reload %rax because syscall_trace_enter() returned
|
|
+ * the value it wants us to use in the table lookup.
|
|
+ */
|
|
+ LOAD_ARGS ARGOFFSET, 1
|
|
RESTORE_REST
|
|
cmpq $__NR_syscall_max,%rax
|
|
ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */
|
|
@@ -349,6 +504,7 @@ tracesys:
|
|
* Has correct top of stack, but partial stack frame.
|
|
*/
|
|
.globl int_ret_from_sys_call
|
|
+ .globl int_with_check
|
|
int_ret_from_sys_call:
|
|
DISABLE_INTERRUPTS(CLBR_NONE)
|
|
TRACE_IRQS_OFF
|
|
@@ -363,10 +519,10 @@ int_ret_from_sys_call:
|
|
int_with_check:
|
|
LOCKDEP_SYS_EXIT_IRQ
|
|
GET_THREAD_INFO(%rcx)
|
|
- movl threadinfo_flags(%rcx),%edx
|
|
+ movl TI_flags(%rcx),%edx
|
|
andl %edi,%edx
|
|
jnz int_careful
|
|
- andl $~TS_COMPAT,threadinfo_status(%rcx)
|
|
+ andl $~TS_COMPAT,TI_status(%rcx)
|
|
jmp retint_restore_args
|
|
|
|
/* Either reschedule or signal or syscall exit tracking needed. */
|
|
@@ -392,7 +548,7 @@ int_very_careful:
|
|
ENABLE_INTERRUPTS(CLBR_NONE)
|
|
SAVE_REST
|
|
/* Check for syscall exit trace */
|
|
- testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx
|
|
+ testl $_TIF_WORK_SYSCALL_EXIT,%edx
|
|
jz int_signal
|
|
pushq %rdi
|
|
CFI_ADJUST_CFA_OFFSET 8
|
|
@@ -400,7 +556,7 @@ int_very_careful:
|
|
call syscall_trace_leave
|
|
popq %rdi
|
|
CFI_ADJUST_CFA_OFFSET -8
|
|
- andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
|
|
+ andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi
|
|
jmp int_restore_rest
|
|
|
|
int_signal:
|
|
@@ -409,7 +565,7 @@ int_signal:
|
|
movq %rsp,%rdi # &ptregs -> arg1
|
|
xorl %esi,%esi # oldset -> arg2
|
|
call do_notify_resume
|
|
-1: movl $_TIF_NEED_RESCHED,%edi
|
|
+1: movl $_TIF_WORK_MASK,%edi
|
|
int_restore_rest:
|
|
RESTORE_REST
|
|
DISABLE_INTERRUPTS(CLBR_NONE)
|
|
@@ -436,7 +592,6 @@ END(\label)
|
|
PTREGSCALL stub_clone, sys_clone, %r8
|
|
PTREGSCALL stub_fork, sys_fork, %rdi
|
|
PTREGSCALL stub_vfork, sys_vfork, %rdi
|
|
- PTREGSCALL stub_rt_sigsuspend, sys_rt_sigsuspend, %rdx
|
|
PTREGSCALL stub_sigaltstack, sys_sigaltstack, %rdx
|
|
PTREGSCALL stub_iopl, sys_iopl, %rsi
|
|
|
|
@@ -510,10 +665,12 @@ END(stub_rt_sigreturn)
|
|
*
|
|
*/
|
|
|
|
-retint_check:
|
|
+retint_with_reschedule:
|
|
CFI_DEFAULT_STACK adj=1
|
|
+ movl $_TIF_WORK_MASK,%edi
|
|
+retint_check:
|
|
LOCKDEP_SYS_EXIT_IRQ
|
|
- movl threadinfo_flags(%rcx),%edx
|
|
+ movl TI_flags(%rcx),%edx
|
|
andl %edi,%edx
|
|
CFI_REMEMBER_STATE
|
|
jnz retint_careful
|
|
@@ -558,17 +715,16 @@ retint_signal:
|
|
RESTORE_REST
|
|
DISABLE_INTERRUPTS(CLBR_NONE)
|
|
TRACE_IRQS_OFF
|
|
- movl $_TIF_NEED_RESCHED,%edi
|
|
GET_THREAD_INFO(%rcx)
|
|
- jmp retint_check
|
|
+ jmp retint_with_reschedule
|
|
|
|
#ifdef CONFIG_PREEMPT
|
|
/* Returning to kernel space. Check if we need preemption */
|
|
/* rcx: threadinfo. interrupts off. */
|
|
ENTRY(retint_kernel)
|
|
- cmpl $0,threadinfo_preempt_count(%rcx)
|
|
+ cmpl $0,TI_preempt_count(%rcx)
|
|
jnz retint_restore_args
|
|
- bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
|
|
+ bt $TIF_NEED_RESCHED,TI_flags(%rcx)
|
|
jnc retint_restore_args
|
|
bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
|
|
jnc retint_restore_args
|
|
@@ -623,6 +779,9 @@ END(invalidate_interrupt\num)
|
|
ENTRY(call_function_interrupt)
|
|
apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
|
|
END(call_function_interrupt)
|
|
+ENTRY(call_function_single_interrupt)
|
|
+ apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
|
|
+END(call_function_single_interrupt)
|
|
ENTRY(irq_move_cleanup_interrupt)
|
|
apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
|
|
END(irq_move_cleanup_interrupt)
|
|
@@ -632,6 +791,10 @@ ENTRY(apic_timer_interrupt)
|
|
apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
|
|
END(apic_timer_interrupt)
|
|
|
|
+ENTRY(uv_bau_message_intr1)
|
|
+ apicinterrupt 220,uv_bau_message_interrupt
|
|
+END(uv_bau_message_intr1)
|
|
+
|
|
ENTRY(error_interrupt)
|
|
apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt
|
|
END(error_interrupt)
|
|
@@ -745,7 +908,7 @@ paranoid_restore\trace:
|
|
jmp irq_return
|
|
paranoid_userspace\trace:
|
|
GET_THREAD_INFO(%rcx)
|
|
- movl threadinfo_flags(%rcx),%ebx
|
|
+ movl TI_flags(%rcx),%ebx
|
|
andl $_TIF_WORK_MASK,%ebx
|
|
jz paranoid_swapgs\trace
|
|
movq %rsp,%rdi /* &pt_regs */
|
|
@@ -842,7 +1005,7 @@ error_exit:
|
|
testb $3,CS-ARGOFFSET(%rsp)
|
|
jz retint_kernel
|
|
LOCKDEP_SYS_EXIT_IRQ
|
|
- movl threadinfo_flags(%rcx),%edx
|
|
+ movl TI_flags(%rcx),%edx
|
|
movl $_TIF_WORK_MASK,%edi
|
|
andl %edi,%edx
|
|
jnz retint_careful
|
|
@@ -864,11 +1027,11 @@ error_kernelspace:
|
|
iret run with kernel gs again, so don't set the user space flag.
|
|
B stepping K8s sometimes report an truncated RIP for IRET
|
|
exceptions returning to compat mode. Check for these here too. */
|
|
- leaq irq_return(%rip),%rbp
|
|
- cmpq %rbp,RIP(%rsp)
|
|
+ leaq irq_return(%rip),%rcx
|
|
+ cmpq %rcx,RIP(%rsp)
|
|
je error_swapgs
|
|
- movl %ebp,%ebp /* zero extend */
|
|
- cmpq %rbp,RIP(%rsp)
|
|
+ movl %ecx,%ecx /* zero extend */
|
|
+ cmpq %rcx,RIP(%rsp)
|
|
je error_swapgs
|
|
cmpq $gs_change,RIP(%rsp)
|
|
je error_swapgs
|
|
@@ -1114,6 +1277,7 @@ END(device_not_available)
|
|
/* runs on exception stack */
|
|
KPROBE_ENTRY(debug)
|
|
/* INTR_FRAME
|
|
+ PARAVIRT_ADJUST_EXCEPTION_FRAME
|
|
pushq $0
|
|
CFI_ADJUST_CFA_OFFSET 8 */
|
|
zeroentry do_debug
|
|
@@ -1141,6 +1305,7 @@ END(do_nmi_callback)
|
|
|
|
KPROBE_ENTRY(int3)
|
|
/* INTR_FRAME
|
|
+ PARAVIRT_ADJUST_EXCEPTION_FRAME
|
|
pushq $0
|
|
CFI_ADJUST_CFA_OFFSET 8 */
|
|
zeroentry do_int3
|
|
@@ -1164,14 +1329,11 @@ ENTRY(coprocessor_segment_overrun)
|
|
zeroentry do_coprocessor_segment_overrun
|
|
END(coprocessor_segment_overrun)
|
|
|
|
-ENTRY(reserved)
|
|
- zeroentry do_reserved
|
|
-END(reserved)
|
|
-
|
|
#if 0
|
|
/* runs on exception stack */
|
|
ENTRY(double_fault)
|
|
XCPT_FRAME
|
|
+ PARAVIRT_ADJUST_EXCEPTION_FRAME
|
|
paranoidentry do_double_fault
|
|
jmp paranoid_exit1
|
|
CFI_ENDPROC
|
|
@@ -1189,6 +1351,7 @@ END(segment_not_present)
|
|
/* runs on exception stack */
|
|
ENTRY(stack_segment)
|
|
/* XCPT_FRAME
|
|
+ PARAVIRT_ADJUST_EXCEPTION_FRAME
|
|
paranoidentry do_stack_segment */
|
|
errorentry do_stack_segment
|
|
/* jmp paranoid_exit1
|
|
--- head-2011-03-11.orig/arch/x86/kernel/fixup.c 2011-01-31 18:01:51.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/fixup.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -33,6 +33,7 @@
|
|
#include <linux/kernel.h>
|
|
#include <linux/delay.h>
|
|
#include <linux/version.h>
|
|
+#include <asm/traps.h>
|
|
|
|
#define DP(_f, _args...) pr_alert(" " _f "\n" , ## _args )
|
|
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ head-2011-03-11/arch/x86/kernel/head-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -0,0 +1,57 @@
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/init.h>
|
|
+
|
|
+#include <asm/setup.h>
|
|
+#include <asm/bios_ebda.h>
|
|
+
|
|
+#define BIOS_LOWMEM_KILOBYTES 0x413
|
|
+
|
|
+/*
|
|
+ * The BIOS places the EBDA/XBDA at the top of conventional
|
|
+ * memory, and usually decreases the reported amount of
|
|
+ * conventional memory (int 0x12) too. This also contains a
|
|
+ * workaround for Dell systems that neglect to reserve EBDA.
|
|
+ * The same workaround also avoids a problem with the AMD768MPX
|
|
+ * chipset: reserve a page before VGA to prevent PCI prefetch
|
|
+ * into it (errata #56). Usually the page is reserved anyways,
|
|
+ * unless you have no PS/2 mouse plugged in.
|
|
+ */
|
|
+void __init reserve_ebda_region(void)
|
|
+{
|
|
+#ifndef CONFIG_XEN
|
|
+ unsigned int lowmem, ebda_addr;
|
|
+
|
|
+ /* To determine the position of the EBDA and the */
|
|
+ /* end of conventional memory, we need to look at */
|
|
+ /* the BIOS data area. In a paravirtual environment */
|
|
+ /* that area is absent. We'll just have to assume */
|
|
+ /* that the paravirt case can handle memory setup */
|
|
+ /* correctly, without our help. */
|
|
+ if (paravirt_enabled())
|
|
+ return;
|
|
+
|
|
+ /* end of low (conventional) memory */
|
|
+ lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES);
|
|
+ lowmem <<= 10;
|
|
+
|
|
+ /* start of EBDA area */
|
|
+ ebda_addr = get_bios_ebda();
|
|
+
|
|
+ /* Fixup: bios puts an EBDA in the top 64K segment */
|
|
+ /* of conventional memory, but does not adjust lowmem. */
|
|
+ if ((lowmem - ebda_addr) <= 0x10000)
|
|
+ lowmem = ebda_addr;
|
|
+
|
|
+ /* Fixup: bios does not report an EBDA at all. */
|
|
+ /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */
|
|
+ if ((ebda_addr == 0) && (lowmem >= 0x9f000))
|
|
+ lowmem = 0x9f000;
|
|
+
|
|
+ /* Paranoia: should never happen, but... */
|
|
+ if ((lowmem == 0) || (lowmem >= 0x100000))
|
|
+ lowmem = 0x9f000;
|
|
+
|
|
+ /* reserve all memory between lowmem and the 1MB mark */
|
|
+ reserve_early_overlap_ok(lowmem, 0x100000, "BIOS reserved");
|
|
+#endif
|
|
+}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ head-2011-03-11/arch/x86/kernel/head32-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -0,0 +1,57 @@
|
|
+/*
|
|
+ * linux/arch/i386/kernel/head32.c -- prepare to run common code
|
|
+ *
|
|
+ * Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
|
|
+ * Copyright (C) 2007 Eric Biederman <ebiederm@xmission.com>
|
|
+ */
|
|
+
|
|
+#include <linux/init.h>
|
|
+#include <linux/start_kernel.h>
|
|
+
|
|
+#include <asm/setup.h>
|
|
+#include <asm/sections.h>
|
|
+#include <asm/e820.h>
|
|
+#include <asm/bios_ebda.h>
|
|
+
|
|
+void __init i386_start_kernel(void)
|
|
+{
|
|
+ reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+#ifdef CONFIG_BLK_DEV_INITRD
|
|
+ /* Reserve INITRD */
|
|
+ if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
|
|
+ u64 ramdisk_image = boot_params.hdr.ramdisk_image;
|
|
+ u64 ramdisk_size = boot_params.hdr.ramdisk_size;
|
|
+ u64 ramdisk_end = ramdisk_image + ramdisk_size;
|
|
+ reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
|
|
+ }
|
|
+#endif
|
|
+ reserve_early(init_pg_tables_start, init_pg_tables_end,
|
|
+ "INIT_PG_TABLE");
|
|
+#else
|
|
+ reserve_early(ALIGN(__pa_symbol(&_end), PAGE_SIZE),
|
|
+ __pa(xen_start_info->pt_base)
|
|
+ + (xen_start_info->nr_pt_frames << PAGE_SHIFT),
|
|
+ "Xen provided");
|
|
+
|
|
+ {
|
|
+ int max_cmdline;
|
|
+
|
|
+ if ((max_cmdline = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
|
|
+ max_cmdline = COMMAND_LINE_SIZE;
|
|
+ memcpy(boot_command_line, xen_start_info->cmd_line, max_cmdline);
|
|
+ boot_command_line[max_cmdline-1] = '\0';
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ reserve_ebda_region();
|
|
+
|
|
+ /*
|
|
+ * At this point everything still needed from the boot loader
|
|
+ * or BIOS or kernel text should be early reserved or marked not
|
|
+ * RAM in e820. All other memory is free game.
|
|
+ */
|
|
+
|
|
+ start_kernel();
|
|
+}
|
|
--- head-2011-03-11.orig/arch/x86/kernel/head64-xen.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/head64-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -32,7 +32,26 @@
|
|
#include <asm/e820.h>
|
|
#include <asm/bios_ebda.h>
|
|
|
|
-unsigned long start_pfn;
|
|
+/* boot cpu pda */
|
|
+static struct x8664_pda _boot_cpu_pda __read_mostly;
|
|
+
|
|
+#ifdef CONFIG_SMP
|
|
+/*
|
|
+ * We install an empty cpu_pda pointer table to indicate to early users
|
|
+ * (numa_set_node) that the cpu_pda pointer table for cpus other than
|
|
+ * the boot cpu is not yet setup.
|
|
+ */
|
|
+static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata;
|
|
+#else
|
|
+static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly;
|
|
+#endif
|
|
+
|
|
+void __init x86_64_init_pda(void)
|
|
+{
|
|
+ _cpu_pda = __cpu_pda;
|
|
+ cpu_pda(0) = &_boot_cpu_pda;
|
|
+ pda_init(0);
|
|
+}
|
|
|
|
#ifndef CONFIG_XEN
|
|
static void __init zap_identity_mappings(void)
|
|
@@ -77,83 +96,10 @@ EXPORT_SYMBOL(machine_to_phys_mapping);
|
|
unsigned int machine_to_phys_order;
|
|
EXPORT_SYMBOL(machine_to_phys_order);
|
|
|
|
-#define BIOS_LOWMEM_KILOBYTES 0x413
|
|
-
|
|
-/*
|
|
- * The BIOS places the EBDA/XBDA at the top of conventional
|
|
- * memory, and usually decreases the reported amount of
|
|
- * conventional memory (int 0x12) too. This also contains a
|
|
- * workaround for Dell systems that neglect to reserve EBDA.
|
|
- * The same workaround also avoids a problem with the AMD768MPX
|
|
- * chipset: reserve a page before VGA to prevent PCI prefetch
|
|
- * into it (errata #56). Usually the page is reserved anyways,
|
|
- * unless you have no PS/2 mouse plugged in.
|
|
- */
|
|
-static void __init reserve_ebda_region(void)
|
|
-{
|
|
-#ifndef CONFIG_XEN
|
|
- unsigned int lowmem, ebda_addr;
|
|
-
|
|
- /* To determine the position of the EBDA and the */
|
|
- /* end of conventional memory, we need to look at */
|
|
- /* the BIOS data area. In a paravirtual environment */
|
|
- /* that area is absent. We'll just have to assume */
|
|
- /* that the paravirt case can handle memory setup */
|
|
- /* correctly, without our help. */
|
|
- if (paravirt_enabled())
|
|
- return;
|
|
-
|
|
- /* end of low (conventional) memory */
|
|
- lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES);
|
|
- lowmem <<= 10;
|
|
-
|
|
- /* start of EBDA area */
|
|
- ebda_addr = get_bios_ebda();
|
|
-
|
|
- /* Fixup: bios puts an EBDA in the top 64K segment */
|
|
- /* of conventional memory, but does not adjust lowmem. */
|
|
- if ((lowmem - ebda_addr) <= 0x10000)
|
|
- lowmem = ebda_addr;
|
|
-
|
|
- /* Fixup: bios does not report an EBDA at all. */
|
|
- /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */
|
|
- if ((ebda_addr == 0) && (lowmem >= 0x9f000))
|
|
- lowmem = 0x9f000;
|
|
-
|
|
- /* Paranoia: should never happen, but... */
|
|
- if ((lowmem == 0) || (lowmem >= 0x100000))
|
|
- lowmem = 0x9f000;
|
|
-
|
|
- /* reserve all memory between lowmem and the 1MB mark */
|
|
- reserve_early(lowmem, 0x100000, "BIOS reserved");
|
|
-#endif
|
|
-}
|
|
-
|
|
-static void __init reserve_setup_data(void)
|
|
-{
|
|
-#ifndef CONFIG_XEN
|
|
- struct setup_data *data;
|
|
- unsigned long pa_data;
|
|
- char buf[32];
|
|
-
|
|
- if (boot_params.hdr.version < 0x0209)
|
|
- return;
|
|
- pa_data = boot_params.hdr.setup_data;
|
|
- while (pa_data) {
|
|
- data = early_ioremap(pa_data, sizeof(*data));
|
|
- sprintf(buf, "setup data %x", data->type);
|
|
- reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf);
|
|
- pa_data = data->next;
|
|
- early_iounmap(data, sizeof(*data));
|
|
- }
|
|
-#endif
|
|
-}
|
|
-
|
|
void __init x86_64_start_kernel(char * real_mode_data)
|
|
{
|
|
struct xen_machphys_mapping mapping;
|
|
unsigned long machine_to_phys_nr_ents;
|
|
- int i;
|
|
|
|
/*
|
|
* Build-time sanity checks on the kernel image and module
|
|
@@ -167,6 +113,7 @@ void __init x86_64_start_kernel(char * r
|
|
BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL));
|
|
BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) ==
|
|
(__START_KERNEL & PGDIR_MASK)));
|
|
+ BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END);
|
|
|
|
xen_setup_features();
|
|
|
|
@@ -174,8 +121,6 @@ void __init x86_64_start_kernel(char * r
|
|
if (!xen_feature(XENFEAT_auto_translated_physmap))
|
|
phys_to_machine_mapping =
|
|
(unsigned long *)xen_start_info->mfn_list;
|
|
- start_pfn = (__pa(xen_start_info->pt_base) >> PAGE_SHIFT) +
|
|
- xen_start_info->nr_pt_frames;
|
|
|
|
machine_to_phys_mapping = (unsigned long *)MACH2PHYS_VIRT_START;
|
|
machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES;
|
|
@@ -208,19 +153,23 @@ void __init x86_64_start_kernel(char * r
|
|
|
|
early_printk("Kernel alive\n");
|
|
|
|
- for (i = 0; i < NR_CPUS; i++)
|
|
- cpu_pda(i) = &boot_cpu_pda[i];
|
|
+ x86_64_init_pda();
|
|
|
|
- pda_init(0);
|
|
+ early_printk("Kernel really alive\n");
|
|
+
|
|
+ x86_64_start_reservations(real_mode_data);
|
|
+}
|
|
+
|
|
+void __init x86_64_start_reservations(char *real_mode_data)
|
|
+{
|
|
copy_bootdata(__va(real_mode_data));
|
|
|
|
reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
|
|
|
|
reserve_early(round_up(__pa_symbol(&_end), PAGE_SIZE),
|
|
- start_pfn << PAGE_SHIFT, "Xen provided");
|
|
-
|
|
- reserve_ebda_region();
|
|
- reserve_setup_data();
|
|
+ __pa(xen_start_info->pt_base)
|
|
+ + (xen_start_info->nr_pt_frames << PAGE_SHIFT),
|
|
+ "Xen provided");
|
|
|
|
/*
|
|
* At this point everything still needed from the boot loader
|
|
--- head-2011-03-11.orig/arch/x86/kernel/head_32-xen.S 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/head_32-xen.S 2011-03-03 16:19:21.000000000 +0100
|
|
@@ -61,8 +61,6 @@ ENTRY(startup_32)
|
|
movb %cl,X86_MASK
|
|
movl %edx,X86_CAPABILITY
|
|
|
|
- movb $1,X86_HARD_MATH
|
|
-
|
|
xorl %eax,%eax # Clear GS
|
|
movl %eax,%gs
|
|
|
|
--- head-2011-03-11.orig/arch/x86/kernel/head_64-xen.S 2011-01-31 17:49:31.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/head_64-xen.S 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -92,53 +92,6 @@ NEXT_PAGE(hypercall_page)
|
|
|
|
#undef NEXT_PAGE
|
|
|
|
- .data
|
|
-
|
|
- .align 16
|
|
- .globl cpu_gdt_descr
|
|
-cpu_gdt_descr:
|
|
- .word gdt_end-cpu_gdt_table-1
|
|
-gdt:
|
|
- .quad cpu_gdt_table
|
|
-#ifdef CONFIG_SMP
|
|
- .rept NR_CPUS-1
|
|
- .word 0
|
|
- .quad 0
|
|
- .endr
|
|
-#endif
|
|
-
|
|
-/* We need valid kernel segments for data and code in long mode too
|
|
- * IRET will check the segment types kkeil 2000/10/28
|
|
- * Also sysret mandates a special GDT layout
|
|
- */
|
|
-
|
|
- .section .data.page_aligned, "aw"
|
|
- .align PAGE_SIZE
|
|
-
|
|
-/* The TLS descriptors are currently at a different place compared to i386.
|
|
- Hopefully nobody expects them at a fixed place (Wine?) */
|
|
-
|
|
-ENTRY(cpu_gdt_table)
|
|
- .quad 0x0000000000000000 /* NULL descriptor */
|
|
- .quad 0x00cf9b000000ffff /* __KERNEL32_CS */
|
|
- .quad 0x00af9b000000ffff /* __KERNEL_CS */
|
|
- .quad 0x00cf93000000ffff /* __KERNEL_DS */
|
|
- .quad 0x00cffb000000ffff /* __USER32_CS */
|
|
- .quad 0x00cff3000000ffff /* __USER_DS, __USER32_DS */
|
|
- .quad 0x00affb000000ffff /* __USER_CS */
|
|
- .quad 0x0 /* unused */
|
|
- .quad 0,0 /* TSS */
|
|
- .quad 0,0 /* LDT */
|
|
- .quad 0,0,0 /* three TLS descriptors */
|
|
- .quad 0x0000f40000000000 /* node/CPU stored in limit */
|
|
-gdt_end:
|
|
- /* asm/segment.h:GDT_ENTRIES must match this */
|
|
- /* This should be a multiple of the cache line size */
|
|
- /* GDTs of other CPUs are now dynamically allocated */
|
|
-
|
|
- /* zero the remaining page */
|
|
- .fill PAGE_SIZE / 8 - GDT_ENTRIES,8,0
|
|
-
|
|
.section .bss.page_aligned, "aw", @nobits
|
|
.align PAGE_SIZE
|
|
ENTRY(empty_zero_page)
|
|
--- head-2011-03-11.orig/arch/x86/kernel/io_apic_32-xen.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/io_apic_32-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -25,6 +25,7 @@
|
|
#include <linux/init.h>
|
|
#include <linux/delay.h>
|
|
#include <linux/sched.h>
|
|
+#include <linux/bootmem.h>
|
|
#include <linux/mc146818rtc.h>
|
|
#include <linux/compiler.h>
|
|
#include <linux/acpi.h>
|
|
@@ -75,7 +76,7 @@ static struct { int pin, apic; } ioapic_
|
|
static DEFINE_SPINLOCK(ioapic_lock);
|
|
static DEFINE_SPINLOCK(vector_lock);
|
|
|
|
-int timer_over_8254 __initdata = 1;
|
|
+int timer_through_8259 __initdata;
|
|
|
|
/*
|
|
* Is the SiS APIC rmw bug present ?
|
|
@@ -89,15 +90,21 @@ int sis_apic_bug = -1;
|
|
int nr_ioapic_registers[MAX_IO_APICS];
|
|
|
|
/* I/O APIC entries */
|
|
-struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
|
|
+struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
|
|
int nr_ioapics;
|
|
|
|
/* MP IRQ source entries */
|
|
-struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
|
|
+struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
|
|
|
|
/* # of MP IRQ source entries */
|
|
int mp_irq_entries;
|
|
|
|
+#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
|
|
+int mp_bus_id_to_type[MAX_MP_BUSSES];
|
|
+#endif
|
|
+
|
|
+DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
|
|
+
|
|
static int disable_timer_pin_1 __initdata;
|
|
|
|
/*
|
|
@@ -128,7 +135,7 @@ struct io_apic {
|
|
static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
|
|
{
|
|
return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
|
|
- + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK);
|
|
+ + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK);
|
|
}
|
|
#endif
|
|
|
|
@@ -142,7 +149,7 @@ static inline unsigned int io_apic_read(
|
|
struct physdev_apic apic_op;
|
|
int ret;
|
|
|
|
- apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
|
|
+ apic_op.apic_physbase = mp_ioapics[apic].mp_apicaddr;
|
|
apic_op.reg = reg;
|
|
ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op);
|
|
if (ret)
|
|
@@ -160,7 +167,7 @@ static inline void io_apic_write(unsigne
|
|
#else
|
|
struct physdev_apic apic_op;
|
|
|
|
- apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
|
|
+ apic_op.apic_physbase = mp_ioapics[apic].mp_apicaddr;
|
|
apic_op.reg = reg;
|
|
apic_op.value = value;
|
|
WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op));
|
|
@@ -288,7 +295,7 @@ static void __init replace_pin_at_irq(un
|
|
}
|
|
}
|
|
|
|
-static void __modify_IO_APIC_irq (unsigned int irq, unsigned long enable, unsigned long disable)
|
|
+static void __modify_IO_APIC_irq(unsigned int irq, unsigned long enable, unsigned long disable)
|
|
{
|
|
struct irq_pin_list *entry = irq_2_pin + irq;
|
|
unsigned int pin, reg;
|
|
@@ -308,30 +315,32 @@ static void __modify_IO_APIC_irq (unsign
|
|
}
|
|
|
|
/* mask = 1 */
|
|
-static void __mask_IO_APIC_irq (unsigned int irq)
|
|
+static void __mask_IO_APIC_irq(unsigned int irq)
|
|
{
|
|
- __modify_IO_APIC_irq(irq, 0x00010000, 0);
|
|
+ __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED, 0);
|
|
}
|
|
|
|
/* mask = 0 */
|
|
-static void __unmask_IO_APIC_irq (unsigned int irq)
|
|
+static void __unmask_IO_APIC_irq(unsigned int irq)
|
|
{
|
|
- __modify_IO_APIC_irq(irq, 0, 0x00010000);
|
|
+ __modify_IO_APIC_irq(irq, 0, IO_APIC_REDIR_MASKED);
|
|
}
|
|
|
|
/* mask = 1, trigger = 0 */
|
|
-static void __mask_and_edge_IO_APIC_irq (unsigned int irq)
|
|
+static void __mask_and_edge_IO_APIC_irq(unsigned int irq)
|
|
{
|
|
- __modify_IO_APIC_irq(irq, 0x00010000, 0x00008000);
|
|
+ __modify_IO_APIC_irq(irq, IO_APIC_REDIR_MASKED,
|
|
+ IO_APIC_REDIR_LEVEL_TRIGGER);
|
|
}
|
|
|
|
/* mask = 0, trigger = 1 */
|
|
-static void __unmask_and_level_IO_APIC_irq (unsigned int irq)
|
|
+static void __unmask_and_level_IO_APIC_irq(unsigned int irq)
|
|
{
|
|
- __modify_IO_APIC_irq(irq, 0x00008000, 0x00010000);
|
|
+ __modify_IO_APIC_irq(irq, IO_APIC_REDIR_LEVEL_TRIGGER,
|
|
+ IO_APIC_REDIR_MASKED);
|
|
}
|
|
|
|
-static void mask_IO_APIC_irq (unsigned int irq)
|
|
+static void mask_IO_APIC_irq(unsigned int irq)
|
|
{
|
|
unsigned long flags;
|
|
|
|
@@ -340,7 +349,7 @@ static void mask_IO_APIC_irq (unsigned i
|
|
spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
}
|
|
|
|
-static void unmask_IO_APIC_irq (unsigned int irq)
|
|
+static void unmask_IO_APIC_irq(unsigned int irq)
|
|
{
|
|
unsigned long flags;
|
|
|
|
@@ -352,7 +361,7 @@ static void unmask_IO_APIC_irq (unsigned
|
|
static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
|
|
{
|
|
struct IO_APIC_route_entry entry;
|
|
-
|
|
+
|
|
/* Check delivery_mode to be sure we're not clearing an SMI pin */
|
|
entry = ioapic_read_entry(apic, pin);
|
|
if (entry.delivery_mode == dest_SMI)
|
|
@@ -364,7 +373,7 @@ static void clear_IO_APIC_pin(unsigned i
|
|
ioapic_mask_entry(apic, pin);
|
|
}
|
|
|
|
-static void clear_IO_APIC (void)
|
|
+static void clear_IO_APIC(void)
|
|
{
|
|
int apic, pin;
|
|
|
|
@@ -381,7 +390,7 @@ static void set_ioapic_affinity_irq(unsi
|
|
struct irq_pin_list *entry = irq_2_pin + irq;
|
|
unsigned int apicid_value;
|
|
cpumask_t tmp;
|
|
-
|
|
+
|
|
cpus_and(tmp, cpumask, cpu_online_map);
|
|
if (cpus_empty(tmp))
|
|
tmp = TARGET_CPUS;
|
|
@@ -410,7 +419,7 @@ static void set_ioapic_affinity_irq(unsi
|
|
# include <linux/kernel_stat.h> /* kstat */
|
|
# include <linux/slab.h> /* kmalloc() */
|
|
# include <linux/timer.h>
|
|
-
|
|
+
|
|
#define IRQBALANCE_CHECK_ARCH -999
|
|
#define MAX_BALANCED_IRQ_INTERVAL (5*HZ)
|
|
#define MIN_BALANCED_IRQ_INTERVAL (HZ/2)
|
|
@@ -422,14 +431,14 @@ static int physical_balance __read_mostl
|
|
static long balanced_irq_interval __read_mostly = MAX_BALANCED_IRQ_INTERVAL;
|
|
|
|
static struct irq_cpu_info {
|
|
- unsigned long * last_irq;
|
|
- unsigned long * irq_delta;
|
|
+ unsigned long *last_irq;
|
|
+ unsigned long *irq_delta;
|
|
unsigned long irq;
|
|
} irq_cpu_data[NR_CPUS];
|
|
|
|
#define CPU_IRQ(cpu) (irq_cpu_data[cpu].irq)
|
|
-#define LAST_CPU_IRQ(cpu,irq) (irq_cpu_data[cpu].last_irq[irq])
|
|
-#define IRQ_DELTA(cpu,irq) (irq_cpu_data[cpu].irq_delta[irq])
|
|
+#define LAST_CPU_IRQ(cpu, irq) (irq_cpu_data[cpu].last_irq[irq])
|
|
+#define IRQ_DELTA(cpu, irq) (irq_cpu_data[cpu].irq_delta[irq])
|
|
|
|
#define IDLE_ENOUGH(cpu,now) \
|
|
(idle_cpu(cpu) && ((now) - per_cpu(irq_stat, (cpu)).idle_timestamp > 1))
|
|
@@ -468,8 +477,8 @@ inside:
|
|
if (cpu == -1)
|
|
cpu = NR_CPUS-1;
|
|
}
|
|
- } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu,allowed_mask) ||
|
|
- (search_idle && !IDLE_ENOUGH(cpu,now)));
|
|
+ } while (!cpu_online(cpu) || !IRQ_ALLOWED(cpu, allowed_mask) ||
|
|
+ (search_idle && !IDLE_ENOUGH(cpu, now)));
|
|
|
|
return cpu;
|
|
}
|
|
@@ -479,15 +488,14 @@ static inline void balance_irq(int cpu,
|
|
unsigned long now = jiffies;
|
|
cpumask_t allowed_mask;
|
|
unsigned int new_cpu;
|
|
-
|
|
+
|
|
if (irqbalance_disabled)
|
|
- return;
|
|
+ return;
|
|
|
|
cpus_and(allowed_mask, cpu_online_map, balance_irq_affinity[irq]);
|
|
new_cpu = move(cpu, allowed_mask, now, 1);
|
|
- if (cpu != new_cpu) {
|
|
+ if (cpu != new_cpu)
|
|
set_pending_irq(irq, cpumask_of_cpu(new_cpu));
|
|
- }
|
|
}
|
|
|
|
static inline void rotate_irqs_among_cpus(unsigned long useful_load_threshold)
|
|
@@ -499,14 +507,14 @@ static inline void rotate_irqs_among_cpu
|
|
if (!irq_desc[j].action)
|
|
continue;
|
|
/* Is it a significant load ? */
|
|
- if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i),j) <
|
|
+ if (IRQ_DELTA(CPU_TO_PACKAGEINDEX(i), j) <
|
|
useful_load_threshold)
|
|
continue;
|
|
balance_irq(i, j);
|
|
}
|
|
}
|
|
balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
|
|
- balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
|
|
+ balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
|
|
return;
|
|
}
|
|
|
|
@@ -535,22 +543,22 @@ static void do_irq_balance(void)
|
|
/* Is this an active IRQ or balancing disabled ? */
|
|
if (!irq_desc[j].action || irq_balancing_disabled(j))
|
|
continue;
|
|
- if ( package_index == i )
|
|
- IRQ_DELTA(package_index,j) = 0;
|
|
+ if (package_index == i)
|
|
+ IRQ_DELTA(package_index, j) = 0;
|
|
/* Determine the total count per processor per IRQ */
|
|
value_now = (unsigned long) kstat_cpu(i).irqs[j];
|
|
|
|
/* Determine the activity per processor per IRQ */
|
|
- delta = value_now - LAST_CPU_IRQ(i,j);
|
|
+ delta = value_now - LAST_CPU_IRQ(i, j);
|
|
|
|
/* Update last_cpu_irq[][] for the next time */
|
|
- LAST_CPU_IRQ(i,j) = value_now;
|
|
+ LAST_CPU_IRQ(i, j) = value_now;
|
|
|
|
/* Ignore IRQs whose rate is less than the clock */
|
|
if (delta < useful_load_threshold)
|
|
continue;
|
|
/* update the load for the processor or package total */
|
|
- IRQ_DELTA(package_index,j) += delta;
|
|
+ IRQ_DELTA(package_index, j) += delta;
|
|
|
|
/* Keep track of the higher numbered sibling as well */
|
|
if (i != package_index)
|
|
@@ -576,7 +584,8 @@ static void do_irq_balance(void)
|
|
max_cpu_irq = ULONG_MAX;
|
|
|
|
tryanothercpu:
|
|
- /* Look for heaviest loaded processor.
|
|
+ /*
|
|
+ * Look for heaviest loaded processor.
|
|
* We may come back to get the next heaviest loaded processor.
|
|
* Skip processors with trivial loads.
|
|
*/
|
|
@@ -585,7 +594,7 @@ tryanothercpu:
|
|
for_each_online_cpu(i) {
|
|
if (i != CPU_TO_PACKAGEINDEX(i))
|
|
continue;
|
|
- if (max_cpu_irq <= CPU_IRQ(i))
|
|
+ if (max_cpu_irq <= CPU_IRQ(i))
|
|
continue;
|
|
if (tmp_cpu_irq < CPU_IRQ(i)) {
|
|
tmp_cpu_irq = CPU_IRQ(i);
|
|
@@ -594,8 +603,9 @@ tryanothercpu:
|
|
}
|
|
|
|
if (tmp_loaded == -1) {
|
|
- /* In the case of small number of heavy interrupt sources,
|
|
- * loading some of the cpus too much. We use Ingo's original
|
|
+ /*
|
|
+ * In the case of small number of heavy interrupt sources,
|
|
+ * loading some of the cpus too much. We use Ingo's original
|
|
* approach to rotate them around.
|
|
*/
|
|
if (!first_attempt && imbalance >= useful_load_threshold) {
|
|
@@ -604,13 +614,14 @@ tryanothercpu:
|
|
}
|
|
goto not_worth_the_effort;
|
|
}
|
|
-
|
|
+
|
|
first_attempt = 0; /* heaviest search */
|
|
max_cpu_irq = tmp_cpu_irq; /* load */
|
|
max_loaded = tmp_loaded; /* processor */
|
|
imbalance = (max_cpu_irq - min_cpu_irq) / 2;
|
|
-
|
|
- /* if imbalance is less than approx 10% of max load, then
|
|
+
|
|
+ /*
|
|
+ * if imbalance is less than approx 10% of max load, then
|
|
* observe diminishing returns action. - quit
|
|
*/
|
|
if (imbalance < (max_cpu_irq >> 3))
|
|
@@ -626,26 +637,25 @@ tryanotherirq:
|
|
/* Is this an active IRQ? */
|
|
if (!irq_desc[j].action)
|
|
continue;
|
|
- if (imbalance <= IRQ_DELTA(max_loaded,j))
|
|
+ if (imbalance <= IRQ_DELTA(max_loaded, j))
|
|
continue;
|
|
/* Try to find the IRQ that is closest to the imbalance
|
|
* without going over.
|
|
*/
|
|
- if (move_this_load < IRQ_DELTA(max_loaded,j)) {
|
|
- move_this_load = IRQ_DELTA(max_loaded,j);
|
|
+ if (move_this_load < IRQ_DELTA(max_loaded, j)) {
|
|
+ move_this_load = IRQ_DELTA(max_loaded, j);
|
|
selected_irq = j;
|
|
}
|
|
}
|
|
- if (selected_irq == -1) {
|
|
+ if (selected_irq == -1)
|
|
goto tryanothercpu;
|
|
- }
|
|
|
|
imbalance = move_this_load;
|
|
-
|
|
+
|
|
/* For physical_balance case, we accumulated both load
|
|
* values in the one of the siblings cpu_irq[],
|
|
* to use the same code for physical and logical processors
|
|
- * as much as possible.
|
|
+ * as much as possible.
|
|
*
|
|
* NOTE: the cpu_irq[] array holds the sum of the load for
|
|
* sibling A and sibling B in the slot for the lowest numbered
|
|
@@ -674,11 +684,11 @@ tryanotherirq:
|
|
/* mark for change destination */
|
|
set_pending_irq(selected_irq, cpumask_of_cpu(min_loaded));
|
|
|
|
- /* Since we made a change, come back sooner to
|
|
+ /* Since we made a change, come back sooner to
|
|
* check for more variation.
|
|
*/
|
|
balanced_irq_interval = max((long)MIN_BALANCED_IRQ_INTERVAL,
|
|
- balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
|
|
+ balanced_irq_interval - BALANCED_IRQ_LESS_DELTA);
|
|
return;
|
|
}
|
|
goto tryanotherirq;
|
|
@@ -689,7 +699,7 @@ not_worth_the_effort:
|
|
* upward
|
|
*/
|
|
balanced_irq_interval = min((long)MAX_BALANCED_IRQ_INTERVAL,
|
|
- balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);
|
|
+ balanced_irq_interval + BALANCED_IRQ_MORE_DELTA);
|
|
return;
|
|
}
|
|
|
|
@@ -728,13 +738,13 @@ static int __init balanced_irq_init(void
|
|
cpumask_t tmp;
|
|
|
|
cpus_shift_right(tmp, cpu_online_map, 2);
|
|
- c = &boot_cpu_data;
|
|
+ c = &boot_cpu_data;
|
|
/* When not overwritten by the command line ask subarchitecture. */
|
|
if (irqbalance_disabled == IRQBALANCE_CHECK_ARCH)
|
|
irqbalance_disabled = NO_BALANCE_IRQ;
|
|
if (irqbalance_disabled)
|
|
return 0;
|
|
-
|
|
+
|
|
/* disable irqbalance completely if there is only one processor online */
|
|
if (num_online_cpus() < 2) {
|
|
irqbalance_disabled = 1;
|
|
@@ -748,16 +758,14 @@ static int __init balanced_irq_init(void
|
|
physical_balance = 1;
|
|
|
|
for_each_online_cpu(i) {
|
|
- irq_cpu_data[i].irq_delta = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
|
|
- irq_cpu_data[i].last_irq = kmalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
|
|
+ irq_cpu_data[i].irq_delta = kzalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
|
|
+ irq_cpu_data[i].last_irq = kzalloc(sizeof(unsigned long) * NR_IRQS, GFP_KERNEL);
|
|
if (irq_cpu_data[i].irq_delta == NULL || irq_cpu_data[i].last_irq == NULL) {
|
|
printk(KERN_ERR "balanced_irq_init: out of memory");
|
|
goto failed;
|
|
}
|
|
- memset(irq_cpu_data[i].irq_delta,0,sizeof(unsigned long) * NR_IRQS);
|
|
- memset(irq_cpu_data[i].last_irq,0,sizeof(unsigned long) * NR_IRQS);
|
|
}
|
|
-
|
|
+
|
|
printk(KERN_INFO "Starting balanced_irq\n");
|
|
if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd")))
|
|
return 0;
|
|
@@ -799,7 +807,7 @@ void send_IPI_self(int vector)
|
|
/*
|
|
* Send the IPI. The write to APIC_ICR fires this off.
|
|
*/
|
|
- apic_write_around(APIC_ICR, cfg);
|
|
+ apic_write(APIC_ICR, cfg);
|
|
#endif
|
|
}
|
|
#endif /* !CONFIG_SMP */
|
|
@@ -853,10 +861,10 @@ static int find_irq_entry(int apic, int
|
|
int i;
|
|
|
|
for (i = 0; i < mp_irq_entries; i++)
|
|
- if (mp_irqs[i].mpc_irqtype == type &&
|
|
- (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid ||
|
|
- mp_irqs[i].mpc_dstapic == MP_APIC_ALL) &&
|
|
- mp_irqs[i].mpc_dstirq == pin)
|
|
+ if (mp_irqs[i].mp_irqtype == type &&
|
|
+ (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid ||
|
|
+ mp_irqs[i].mp_dstapic == MP_APIC_ALL) &&
|
|
+ mp_irqs[i].mp_dstirq == pin)
|
|
return i;
|
|
|
|
return -1;
|
|
@@ -871,13 +879,13 @@ static int __init find_isa_irq_pin(int i
|
|
int i;
|
|
|
|
for (i = 0; i < mp_irq_entries; i++) {
|
|
- int lbus = mp_irqs[i].mpc_srcbus;
|
|
+ int lbus = mp_irqs[i].mp_srcbus;
|
|
|
|
if (test_bit(lbus, mp_bus_not_pci) &&
|
|
- (mp_irqs[i].mpc_irqtype == type) &&
|
|
- (mp_irqs[i].mpc_srcbusirq == irq))
|
|
+ (mp_irqs[i].mp_irqtype == type) &&
|
|
+ (mp_irqs[i].mp_srcbusirq == irq))
|
|
|
|
- return mp_irqs[i].mpc_dstirq;
|
|
+ return mp_irqs[i].mp_dstirq;
|
|
}
|
|
return -1;
|
|
}
|
|
@@ -887,17 +895,17 @@ static int __init find_isa_irq_apic(int
|
|
int i;
|
|
|
|
for (i = 0; i < mp_irq_entries; i++) {
|
|
- int lbus = mp_irqs[i].mpc_srcbus;
|
|
+ int lbus = mp_irqs[i].mp_srcbus;
|
|
|
|
if (test_bit(lbus, mp_bus_not_pci) &&
|
|
- (mp_irqs[i].mpc_irqtype == type) &&
|
|
- (mp_irqs[i].mpc_srcbusirq == irq))
|
|
+ (mp_irqs[i].mp_irqtype == type) &&
|
|
+ (mp_irqs[i].mp_srcbusirq == irq))
|
|
break;
|
|
}
|
|
if (i < mp_irq_entries) {
|
|
int apic;
|
|
- for(apic = 0; apic < nr_ioapics; apic++) {
|
|
- if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic)
|
|
+ for (apic = 0; apic < nr_ioapics; apic++) {
|
|
+ if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic)
|
|
return apic;
|
|
}
|
|
}
|
|
@@ -918,28 +926,28 @@ int IO_APIC_get_PCI_irq_vector(int bus,
|
|
|
|
apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, "
|
|
"slot:%d, pin:%d.\n", bus, slot, pin);
|
|
- if (mp_bus_id_to_pci_bus[bus] == -1) {
|
|
+ if (test_bit(bus, mp_bus_not_pci)) {
|
|
printk(KERN_WARNING "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
|
|
return -1;
|
|
}
|
|
for (i = 0; i < mp_irq_entries; i++) {
|
|
- int lbus = mp_irqs[i].mpc_srcbus;
|
|
+ int lbus = mp_irqs[i].mp_srcbus;
|
|
|
|
for (apic = 0; apic < nr_ioapics; apic++)
|
|
- if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic ||
|
|
- mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
|
|
+ if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic ||
|
|
+ mp_irqs[i].mp_dstapic == MP_APIC_ALL)
|
|
break;
|
|
|
|
if (!test_bit(lbus, mp_bus_not_pci) &&
|
|
- !mp_irqs[i].mpc_irqtype &&
|
|
+ !mp_irqs[i].mp_irqtype &&
|
|
(bus == lbus) &&
|
|
- (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
|
|
- int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq);
|
|
+ (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) {
|
|
+ int irq = pin_2_irq(i, apic, mp_irqs[i].mp_dstirq);
|
|
|
|
if (!(apic || IO_APIC_IRQ(irq)))
|
|
continue;
|
|
|
|
- if (pin == (mp_irqs[i].mpc_srcbusirq & 3))
|
|
+ if (pin == (mp_irqs[i].mp_srcbusirq & 3))
|
|
return irq;
|
|
/*
|
|
* Use the first all-but-pin matching entry as a
|
|
@@ -954,7 +962,7 @@ int IO_APIC_get_PCI_irq_vector(int bus,
|
|
EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
|
|
|
|
/*
|
|
- * This function currently is only a helper for the i386 smp boot process where
|
|
+ * This function currently is only a helper for the i386 smp boot process where
|
|
* we need to reprogram the ioredtbls to cater for the cpus which have come online
|
|
* so mask in all cases should simply be TARGET_CPUS
|
|
*/
|
|
@@ -1008,7 +1016,7 @@ static int EISA_ELCR(unsigned int irq)
|
|
* EISA conforming in the MP table, that means its trigger type must
|
|
* be read in from the ELCR */
|
|
|
|
-#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq))
|
|
+#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq))
|
|
#define default_EISA_polarity(idx) default_ISA_polarity(idx)
|
|
|
|
/* PCI interrupts are always polarity one level triggered,
|
|
@@ -1025,118 +1033,115 @@ static int EISA_ELCR(unsigned int irq)
|
|
|
|
static int MPBIOS_polarity(int idx)
|
|
{
|
|
- int bus = mp_irqs[idx].mpc_srcbus;
|
|
+ int bus = mp_irqs[idx].mp_srcbus;
|
|
int polarity;
|
|
|
|
/*
|
|
* Determine IRQ line polarity (high active or low active):
|
|
*/
|
|
- switch (mp_irqs[idx].mpc_irqflag & 3)
|
|
+ switch (mp_irqs[idx].mp_irqflag & 3) {
|
|
+ case 0: /* conforms, ie. bus-type dependent polarity */
|
|
{
|
|
- case 0: /* conforms, ie. bus-type dependent polarity */
|
|
- {
|
|
- polarity = test_bit(bus, mp_bus_not_pci)?
|
|
- default_ISA_polarity(idx):
|
|
- default_PCI_polarity(idx);
|
|
- break;
|
|
- }
|
|
- case 1: /* high active */
|
|
- {
|
|
- polarity = 0;
|
|
- break;
|
|
- }
|
|
- case 2: /* reserved */
|
|
- {
|
|
- printk(KERN_WARNING "broken BIOS!!\n");
|
|
- polarity = 1;
|
|
- break;
|
|
- }
|
|
- case 3: /* low active */
|
|
- {
|
|
- polarity = 1;
|
|
- break;
|
|
- }
|
|
- default: /* invalid */
|
|
- {
|
|
- printk(KERN_WARNING "broken BIOS!!\n");
|
|
- polarity = 1;
|
|
- break;
|
|
- }
|
|
+ polarity = test_bit(bus, mp_bus_not_pci)?
|
|
+ default_ISA_polarity(idx):
|
|
+ default_PCI_polarity(idx);
|
|
+ break;
|
|
+ }
|
|
+ case 1: /* high active */
|
|
+ {
|
|
+ polarity = 0;
|
|
+ break;
|
|
+ }
|
|
+ case 2: /* reserved */
|
|
+ {
|
|
+ printk(KERN_WARNING "broken BIOS!!\n");
|
|
+ polarity = 1;
|
|
+ break;
|
|
+ }
|
|
+ case 3: /* low active */
|
|
+ {
|
|
+ polarity = 1;
|
|
+ break;
|
|
+ }
|
|
+ default: /* invalid */
|
|
+ {
|
|
+ printk(KERN_WARNING "broken BIOS!!\n");
|
|
+ polarity = 1;
|
|
+ break;
|
|
+ }
|
|
}
|
|
return polarity;
|
|
}
|
|
|
|
static int MPBIOS_trigger(int idx)
|
|
{
|
|
- int bus = mp_irqs[idx].mpc_srcbus;
|
|
+ int bus = mp_irqs[idx].mp_srcbus;
|
|
int trigger;
|
|
|
|
/*
|
|
* Determine IRQ trigger mode (edge or level sensitive):
|
|
*/
|
|
- switch ((mp_irqs[idx].mpc_irqflag>>2) & 3)
|
|
+ switch ((mp_irqs[idx].mp_irqflag>>2) & 3) {
|
|
+ case 0: /* conforms, ie. bus-type dependent */
|
|
{
|
|
- case 0: /* conforms, ie. bus-type dependent */
|
|
- {
|
|
- trigger = test_bit(bus, mp_bus_not_pci)?
|
|
- default_ISA_trigger(idx):
|
|
- default_PCI_trigger(idx);
|
|
+ trigger = test_bit(bus, mp_bus_not_pci)?
|
|
+ default_ISA_trigger(idx):
|
|
+ default_PCI_trigger(idx);
|
|
#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
|
|
- switch (mp_bus_id_to_type[bus])
|
|
- {
|
|
- case MP_BUS_ISA: /* ISA pin */
|
|
- {
|
|
- /* set before the switch */
|
|
- break;
|
|
- }
|
|
- case MP_BUS_EISA: /* EISA pin */
|
|
- {
|
|
- trigger = default_EISA_trigger(idx);
|
|
- break;
|
|
- }
|
|
- case MP_BUS_PCI: /* PCI pin */
|
|
- {
|
|
- /* set before the switch */
|
|
- break;
|
|
- }
|
|
- case MP_BUS_MCA: /* MCA pin */
|
|
- {
|
|
- trigger = default_MCA_trigger(idx);
|
|
- break;
|
|
- }
|
|
- default:
|
|
- {
|
|
- printk(KERN_WARNING "broken BIOS!!\n");
|
|
- trigger = 1;
|
|
- break;
|
|
- }
|
|
- }
|
|
-#endif
|
|
+ switch (mp_bus_id_to_type[bus]) {
|
|
+ case MP_BUS_ISA: /* ISA pin */
|
|
+ {
|
|
+ /* set before the switch */
|
|
break;
|
|
}
|
|
- case 1: /* edge */
|
|
+ case MP_BUS_EISA: /* EISA pin */
|
|
{
|
|
- trigger = 0;
|
|
+ trigger = default_EISA_trigger(idx);
|
|
break;
|
|
}
|
|
- case 2: /* reserved */
|
|
+ case MP_BUS_PCI: /* PCI pin */
|
|
{
|
|
- printk(KERN_WARNING "broken BIOS!!\n");
|
|
- trigger = 1;
|
|
+ /* set before the switch */
|
|
break;
|
|
}
|
|
- case 3: /* level */
|
|
+ case MP_BUS_MCA: /* MCA pin */
|
|
{
|
|
- trigger = 1;
|
|
+ trigger = default_MCA_trigger(idx);
|
|
break;
|
|
}
|
|
- default: /* invalid */
|
|
+ default:
|
|
{
|
|
printk(KERN_WARNING "broken BIOS!!\n");
|
|
- trigger = 0;
|
|
+ trigger = 1;
|
|
break;
|
|
}
|
|
}
|
|
+#endif
|
|
+ break;
|
|
+ }
|
|
+ case 1: /* edge */
|
|
+ {
|
|
+ trigger = 0;
|
|
+ break;
|
|
+ }
|
|
+ case 2: /* reserved */
|
|
+ {
|
|
+ printk(KERN_WARNING "broken BIOS!!\n");
|
|
+ trigger = 1;
|
|
+ break;
|
|
+ }
|
|
+ case 3: /* level */
|
|
+ {
|
|
+ trigger = 1;
|
|
+ break;
|
|
+ }
|
|
+ default: /* invalid */
|
|
+ {
|
|
+ printk(KERN_WARNING "broken BIOS!!\n");
|
|
+ trigger = 0;
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
return trigger;
|
|
}
|
|
|
|
@@ -1153,16 +1158,16 @@ static inline int irq_trigger(int idx)
|
|
static int pin_2_irq(int idx, int apic, int pin)
|
|
{
|
|
int irq, i;
|
|
- int bus = mp_irqs[idx].mpc_srcbus;
|
|
+ int bus = mp_irqs[idx].mp_srcbus;
|
|
|
|
/*
|
|
* Debugging check, we are in big trouble if this message pops up!
|
|
*/
|
|
- if (mp_irqs[idx].mpc_dstirq != pin)
|
|
+ if (mp_irqs[idx].mp_dstirq != pin)
|
|
printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
|
|
|
|
if (test_bit(bus, mp_bus_not_pci))
|
|
- irq = mp_irqs[idx].mpc_srcbusirq;
|
|
+ irq = mp_irqs[idx].mp_srcbusirq;
|
|
else {
|
|
/*
|
|
* PCI IRQs are mapped in order
|
|
@@ -1204,8 +1209,8 @@ static inline int IO_APIC_irq_trigger(in
|
|
|
|
for (apic = 0; apic < nr_ioapics; apic++) {
|
|
for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
|
|
- idx = find_irq_entry(apic,pin,mp_INT);
|
|
- if ((idx != -1) && (irq == pin_2_irq(idx,apic,pin)))
|
|
+ idx = find_irq_entry(apic, pin, mp_INT);
|
|
+ if ((idx != -1) && (irq == pin_2_irq(idx, apic, pin)))
|
|
return irq_trigger(idx);
|
|
}
|
|
}
|
|
@@ -1291,25 +1296,25 @@ static void __init setup_IO_APIC_irqs(vo
|
|
/*
|
|
* add it to the IO-APIC irq-routing table:
|
|
*/
|
|
- memset(&entry,0,sizeof(entry));
|
|
+ memset(&entry, 0, sizeof(entry));
|
|
|
|
entry.delivery_mode = INT_DELIVERY_MODE;
|
|
entry.dest_mode = INT_DEST_MODE;
|
|
entry.mask = 0; /* enable IRQ */
|
|
- entry.dest.logical.logical_dest =
|
|
+ entry.dest.logical.logical_dest =
|
|
cpu_mask_to_apicid(TARGET_CPUS);
|
|
|
|
- idx = find_irq_entry(apic,pin,mp_INT);
|
|
+ idx = find_irq_entry(apic, pin, mp_INT);
|
|
if (idx == -1) {
|
|
if (first_notcon) {
|
|
apic_printk(APIC_VERBOSE, KERN_DEBUG
|
|
" IO-APIC (apicid-pin) %d-%d",
|
|
- mp_ioapics[apic].mpc_apicid,
|
|
+ mp_ioapics[apic].mp_apicid,
|
|
pin);
|
|
first_notcon = 0;
|
|
} else
|
|
apic_printk(APIC_VERBOSE, ", %d-%d",
|
|
- mp_ioapics[apic].mpc_apicid, pin);
|
|
+ mp_ioapics[apic].mp_apicid, pin);
|
|
continue;
|
|
}
|
|
|
|
@@ -1343,7 +1348,7 @@ static void __init setup_IO_APIC_irqs(vo
|
|
vector = assign_irq_vector(irq);
|
|
entry.vector = vector;
|
|
ioapic_register_intr(irq, vector, IOAPIC_AUTO);
|
|
-
|
|
+
|
|
if (!apic && (irq < 16))
|
|
disable_8259A_irq(irq);
|
|
}
|
|
@@ -1355,27 +1360,23 @@ static void __init setup_IO_APIC_irqs(vo
|
|
apic_printk(APIC_VERBOSE, " not connected.\n");
|
|
}
|
|
|
|
+#ifndef CONFIG_XEN
|
|
/*
|
|
- * Set up the 8259A-master output pin:
|
|
+ * Set up the timer pin, possibly with the 8259A-master behind.
|
|
*/
|
|
-#ifndef CONFIG_XEN
|
|
-static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector)
|
|
+static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
|
|
+ int vector)
|
|
{
|
|
struct IO_APIC_route_entry entry;
|
|
|
|
- memset(&entry,0,sizeof(entry));
|
|
-
|
|
- disable_8259A_irq(0);
|
|
-
|
|
- /* mask LVT0 */
|
|
- apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
|
|
+ memset(&entry, 0, sizeof(entry));
|
|
|
|
/*
|
|
* We use logical delivery to get the timer IRQ
|
|
* to the first CPU.
|
|
*/
|
|
entry.dest_mode = INT_DEST_MODE;
|
|
- entry.mask = 0; /* unmask IRQ now */
|
|
+ entry.mask = 1; /* mask IRQ now */
|
|
entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
|
|
entry.delivery_mode = INT_DELIVERY_MODE;
|
|
entry.polarity = 0;
|
|
@@ -1384,17 +1385,14 @@ static void __init setup_ExtINT_IRQ0_pin
|
|
|
|
/*
|
|
* The timer IRQ doesn't have to know that behind the
|
|
- * scene we have a 8259A-master in AEOI mode ...
|
|
+ * scene we may have a 8259A-master in AEOI mode ...
|
|
*/
|
|
- irq_desc[0].chip = &ioapic_chip;
|
|
- set_irq_handler(0, handle_edge_irq);
|
|
+ ioapic_register_intr(0, vector, IOAPIC_EDGE);
|
|
|
|
/*
|
|
* Add it to the IO-APIC irq-routing table:
|
|
*/
|
|
ioapic_write_entry(apic, pin, entry);
|
|
-
|
|
- enable_8259A_irq(0);
|
|
}
|
|
|
|
void __init print_IO_APIC(void)
|
|
@@ -1409,10 +1407,10 @@ void __init print_IO_APIC(void)
|
|
if (apic_verbosity == APIC_QUIET)
|
|
return;
|
|
|
|
- printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
|
|
+ printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
|
|
for (i = 0; i < nr_ioapics; i++)
|
|
printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
|
|
- mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]);
|
|
+ mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]);
|
|
|
|
/*
|
|
* We are a bit conservative about what we expect. We have to
|
|
@@ -1431,7 +1429,7 @@ void __init print_IO_APIC(void)
|
|
reg_03.raw = io_apic_read(apic, 3);
|
|
spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
|
|
- printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
|
|
+ printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
|
|
printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
|
|
printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
|
|
printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type);
|
|
@@ -1512,7 +1510,7 @@ void __init print_IO_APIC(void)
|
|
return;
|
|
}
|
|
|
|
-static void print_APIC_bitfield (int base)
|
|
+static void print_APIC_bitfield(int base)
|
|
{
|
|
unsigned int v;
|
|
int i, j;
|
|
@@ -1533,7 +1531,7 @@ static void print_APIC_bitfield (int bas
|
|
}
|
|
}
|
|
|
|
-void /*__init*/ print_local_APIC(void * dummy)
|
|
+void /*__init*/ print_local_APIC(void *dummy)
|
|
{
|
|
unsigned int v, ver, maxlvt;
|
|
|
|
@@ -1542,6 +1540,7 @@ void /*__init*/ print_local_APIC(void *
|
|
|
|
printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
|
|
smp_processor_id(), hard_smp_processor_id());
|
|
+ v = apic_read(APIC_ID);
|
|
printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v,
|
|
GET_APIC_ID(read_apic_id()));
|
|
v = apic_read(APIC_LVR);
|
|
@@ -1616,9 +1615,9 @@ void /*__init*/ print_local_APIC(void *
|
|
printk("\n");
|
|
}
|
|
|
|
-void print_all_local_APICs (void)
|
|
+void print_all_local_APICs(void)
|
|
{
|
|
- on_each_cpu(print_local_APIC, NULL, 1, 1);
|
|
+ on_each_cpu(print_local_APIC, NULL, 1);
|
|
}
|
|
|
|
void /*__init*/ print_PIC(void)
|
|
@@ -1639,11 +1638,11 @@ void /*__init*/ print_PIC(void)
|
|
v = inb(0xa0) << 8 | inb(0x20);
|
|
printk(KERN_DEBUG "... PIC IRR: %04x\n", v);
|
|
|
|
- outb(0x0b,0xa0);
|
|
- outb(0x0b,0x20);
|
|
+ outb(0x0b, 0xa0);
|
|
+ outb(0x0b, 0x20);
|
|
v = inb(0xa0) << 8 | inb(0x20);
|
|
- outb(0x0a,0xa0);
|
|
- outb(0x0a,0x20);
|
|
+ outb(0x0a, 0xa0);
|
|
+ outb(0x0a, 0x20);
|
|
|
|
spin_unlock_irqrestore(&i8259A_lock, flags);
|
|
|
|
@@ -1652,6 +1651,8 @@ void /*__init*/ print_PIC(void)
|
|
v = inb(0x4d1) << 8 | inb(0x4d0);
|
|
printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
|
|
}
|
|
+#else
|
|
+void __init print_IO_APIC(void) {}
|
|
#endif /* !CONFIG_XEN */
|
|
|
|
static void __init enable_IO_APIC(void)
|
|
@@ -1681,7 +1682,7 @@ static void __init enable_IO_APIC(void)
|
|
nr_ioapic_registers[apic] = reg_01.bits.entries+1;
|
|
}
|
|
#ifndef CONFIG_XEN
|
|
- for(apic = 0; apic < nr_ioapics; apic++) {
|
|
+ for (apic = 0; apic < nr_ioapics; apic++) {
|
|
int pin;
|
|
/* See if any of the pins is in ExtINT mode */
|
|
for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
|
|
@@ -1774,7 +1775,7 @@ void disable_IO_APIC(void)
|
|
* by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999
|
|
*/
|
|
|
|
-#if !defined(CONFIG_XEN) && !defined(CONFIG_X86_NUMAQ)
|
|
+#ifndef CONFIG_XEN
|
|
static void __init setup_ioapic_ids_from_mpc(void)
|
|
{
|
|
union IO_APIC_reg_00 reg_00;
|
|
@@ -1784,6 +1785,11 @@ static void __init setup_ioapic_ids_from
|
|
unsigned char old_id;
|
|
unsigned long flags;
|
|
|
|
+#ifdef CONFIG_X86_NUMAQ
|
|
+ if (found_numaq)
|
|
+ return;
|
|
+#endif
|
|
+
|
|
/*
|
|
* Don't check I/O APIC IDs for xAPIC systems. They have
|
|
* no meaning without the serial APIC bus.
|
|
@@ -1806,15 +1812,15 @@ static void __init setup_ioapic_ids_from
|
|
spin_lock_irqsave(&ioapic_lock, flags);
|
|
reg_00.raw = io_apic_read(apic, 0);
|
|
spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
-
|
|
- old_id = mp_ioapics[apic].mpc_apicid;
|
|
|
|
- if (mp_ioapics[apic].mpc_apicid >= get_physical_broadcast()) {
|
|
+ old_id = mp_ioapics[apic].mp_apicid;
|
|
+
|
|
+ if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) {
|
|
printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
|
|
- apic, mp_ioapics[apic].mpc_apicid);
|
|
+ apic, mp_ioapics[apic].mp_apicid);
|
|
printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
|
|
reg_00.bits.ID);
|
|
- mp_ioapics[apic].mpc_apicid = reg_00.bits.ID;
|
|
+ mp_ioapics[apic].mp_apicid = reg_00.bits.ID;
|
|
}
|
|
|
|
/*
|
|
@@ -1823,9 +1829,9 @@ static void __init setup_ioapic_ids_from
|
|
* 'stuck on smp_invalidate_needed IPI wait' messages.
|
|
*/
|
|
if (check_apicid_used(phys_id_present_map,
|
|
- mp_ioapics[apic].mpc_apicid)) {
|
|
+ mp_ioapics[apic].mp_apicid)) {
|
|
printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
|
|
- apic, mp_ioapics[apic].mpc_apicid);
|
|
+ apic, mp_ioapics[apic].mp_apicid);
|
|
for (i = 0; i < get_physical_broadcast(); i++)
|
|
if (!physid_isset(i, phys_id_present_map))
|
|
break;
|
|
@@ -1834,13 +1840,13 @@ static void __init setup_ioapic_ids_from
|
|
printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
|
|
i);
|
|
physid_set(i, phys_id_present_map);
|
|
- mp_ioapics[apic].mpc_apicid = i;
|
|
+ mp_ioapics[apic].mp_apicid = i;
|
|
} else {
|
|
physid_mask_t tmp;
|
|
- tmp = apicid_to_cpu_present(mp_ioapics[apic].mpc_apicid);
|
|
+ tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid);
|
|
apic_printk(APIC_VERBOSE, "Setting %d in the "
|
|
"phys_id_present_map\n",
|
|
- mp_ioapics[apic].mpc_apicid);
|
|
+ mp_ioapics[apic].mp_apicid);
|
|
physids_or(phys_id_present_map, phys_id_present_map, tmp);
|
|
}
|
|
|
|
@@ -1849,21 +1855,21 @@ static void __init setup_ioapic_ids_from
|
|
* We need to adjust the IRQ routing table
|
|
* if the ID changed.
|
|
*/
|
|
- if (old_id != mp_ioapics[apic].mpc_apicid)
|
|
+ if (old_id != mp_ioapics[apic].mp_apicid)
|
|
for (i = 0; i < mp_irq_entries; i++)
|
|
- if (mp_irqs[i].mpc_dstapic == old_id)
|
|
- mp_irqs[i].mpc_dstapic
|
|
- = mp_ioapics[apic].mpc_apicid;
|
|
+ if (mp_irqs[i].mp_dstapic == old_id)
|
|
+ mp_irqs[i].mp_dstapic
|
|
+ = mp_ioapics[apic].mp_apicid;
|
|
|
|
/*
|
|
* Read the right value from the MPC table and
|
|
* write it into the ID register.
|
|
- */
|
|
+ */
|
|
apic_printk(APIC_VERBOSE, KERN_INFO
|
|
"...changing IO-APIC physical APIC ID to %d ...",
|
|
- mp_ioapics[apic].mpc_apicid);
|
|
+ mp_ioapics[apic].mp_apicid);
|
|
|
|
- reg_00.bits.ID = mp_ioapics[apic].mpc_apicid;
|
|
+ reg_00.bits.ID = mp_ioapics[apic].mp_apicid;
|
|
spin_lock_irqsave(&ioapic_lock, flags);
|
|
io_apic_write(apic, 0, reg_00.raw);
|
|
spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
@@ -1874,17 +1880,13 @@ static void __init setup_ioapic_ids_from
|
|
spin_lock_irqsave(&ioapic_lock, flags);
|
|
reg_00.raw = io_apic_read(apic, 0);
|
|
spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
- if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid)
|
|
+ if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid)
|
|
printk("could not set ID!\n");
|
|
else
|
|
apic_printk(APIC_VERBOSE, " ok.\n");
|
|
}
|
|
}
|
|
-#else
|
|
-static void __init setup_ioapic_ids_from_mpc(void) { }
|
|
-#endif
|
|
|
|
-#ifndef CONFIG_XEN
|
|
int no_timer_check __initdata;
|
|
|
|
static int __init notimercheck(char *s)
|
|
@@ -2077,45 +2079,53 @@ static inline void init_IO_APIC_traps(vo
|
|
* The local APIC irq-chip implementation:
|
|
*/
|
|
|
|
-static void ack_apic(unsigned int irq)
|
|
+static void ack_lapic_irq(unsigned int irq)
|
|
{
|
|
ack_APIC_irq();
|
|
}
|
|
|
|
-static void mask_lapic_irq (unsigned int irq)
|
|
+static void mask_lapic_irq(unsigned int irq)
|
|
{
|
|
unsigned long v;
|
|
|
|
v = apic_read(APIC_LVT0);
|
|
- apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
|
|
+ apic_write(APIC_LVT0, v | APIC_LVT_MASKED);
|
|
}
|
|
|
|
-static void unmask_lapic_irq (unsigned int irq)
|
|
+static void unmask_lapic_irq(unsigned int irq)
|
|
{
|
|
unsigned long v;
|
|
|
|
v = apic_read(APIC_LVT0);
|
|
- apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
|
|
+ apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
|
|
}
|
|
|
|
static struct irq_chip lapic_chip __read_mostly = {
|
|
- .name = "local-APIC-edge",
|
|
+ .name = "local-APIC",
|
|
.mask = mask_lapic_irq,
|
|
.unmask = unmask_lapic_irq,
|
|
- .eoi = ack_apic,
|
|
+ .ack = ack_lapic_irq,
|
|
};
|
|
|
|
+static void lapic_register_intr(int irq, int vector)
|
|
+{
|
|
+ irq_desc[irq].status &= ~IRQ_LEVEL;
|
|
+ set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
|
|
+ "edge");
|
|
+ set_intr_gate(vector, interrupt[irq]);
|
|
+}
|
|
+
|
|
static void __init setup_nmi(void)
|
|
{
|
|
/*
|
|
- * Dirty trick to enable the NMI watchdog ...
|
|
+ * Dirty trick to enable the NMI watchdog ...
|
|
* We put the 8259A master into AEOI mode and
|
|
* unmask on all local APICs LVT0 as NMI.
|
|
*
|
|
* The idea to use the 8259A in AEOI mode ('8259A Virtual Wire')
|
|
* is from Maciej W. Rozycki - so we do not have to EOI from
|
|
* the NMI handler or the timer interrupt.
|
|
- */
|
|
+ */
|
|
apic_printk(APIC_VERBOSE, KERN_INFO "activating NMI Watchdog ...");
|
|
|
|
enable_NMI_through_LVT0();
|
|
@@ -2191,11 +2201,16 @@ static inline void __init unlock_ExtINT_
|
|
static inline void __init check_timer(void)
|
|
{
|
|
int apic1, pin1, apic2, pin2;
|
|
+ int no_pin1 = 0;
|
|
int vector;
|
|
+ unsigned int ver;
|
|
unsigned long flags;
|
|
|
|
local_irq_save(flags);
|
|
|
|
+ ver = apic_read(APIC_LVR);
|
|
+ ver = GET_APIC_VERSION(ver);
|
|
+
|
|
/*
|
|
* get/set the timer IRQ vector:
|
|
*/
|
|
@@ -2204,34 +2219,54 @@ static inline void __init check_timer(vo
|
|
set_intr_gate(vector, interrupt[0]);
|
|
|
|
/*
|
|
- * Subtle, code in do_timer_interrupt() expects an AEOI
|
|
- * mode for the 8259A whenever interrupts are routed
|
|
- * through I/O APICs. Also IRQ0 has to be enabled in
|
|
- * the 8259A which implies the virtual wire has to be
|
|
- * disabled in the local APIC.
|
|
+ * As IRQ0 is to be enabled in the 8259A, the virtual
|
|
+ * wire has to be disabled in the local APIC. Also
|
|
+ * timer interrupts need to be acknowledged manually in
|
|
+ * the 8259A for the i82489DX when using the NMI
|
|
+ * watchdog as that APIC treats NMIs as level-triggered.
|
|
+ * The AEOI mode will finish them in the 8259A
|
|
+ * automatically.
|
|
*/
|
|
- apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
|
|
+ apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
|
|
init_8259A(1);
|
|
- timer_ack = 1;
|
|
- if (timer_over_8254 > 0)
|
|
- enable_8259A_irq(0);
|
|
+ timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
|
|
|
|
pin1 = find_isa_irq_pin(0, mp_INT);
|
|
apic1 = find_isa_irq_apic(0, mp_INT);
|
|
pin2 = ioapic_i8259.pin;
|
|
apic2 = ioapic_i8259.apic;
|
|
|
|
- printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
|
|
- vector, apic1, pin1, apic2, pin2);
|
|
+ apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X "
|
|
+ "apic1=%d pin1=%d apic2=%d pin2=%d\n",
|
|
+ vector, apic1, pin1, apic2, pin2);
|
|
+
|
|
+ /*
|
|
+ * Some BIOS writers are clueless and report the ExtINTA
|
|
+ * I/O APIC input from the cascaded 8259A as the timer
|
|
+ * interrupt input. So just in case, if only one pin
|
|
+ * was found above, try it both directly and through the
|
|
+ * 8259A.
|
|
+ */
|
|
+ if (pin1 == -1) {
|
|
+ pin1 = pin2;
|
|
+ apic1 = apic2;
|
|
+ no_pin1 = 1;
|
|
+ } else if (pin2 == -1) {
|
|
+ pin2 = pin1;
|
|
+ apic2 = apic1;
|
|
+ }
|
|
|
|
if (pin1 != -1) {
|
|
/*
|
|
* Ok, does IRQ0 through the IOAPIC work?
|
|
*/
|
|
+ if (no_pin1) {
|
|
+ add_pin_to_irq(0, apic1, pin1);
|
|
+ setup_timer_IRQ0_pin(apic1, pin1, vector);
|
|
+ }
|
|
unmask_IO_APIC_irq(0);
|
|
if (timer_irq_works()) {
|
|
if (nmi_watchdog == NMI_IO_APIC) {
|
|
- disable_8259A_irq(0);
|
|
setup_nmi();
|
|
enable_8259A_irq(0);
|
|
}
|
|
@@ -2240,71 +2275,77 @@ static inline void __init check_timer(vo
|
|
goto out;
|
|
}
|
|
clear_IO_APIC_pin(apic1, pin1);
|
|
- printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to "
|
|
- "IO-APIC\n");
|
|
- }
|
|
-
|
|
- printk(KERN_INFO "...trying to set up timer (IRQ0) through the 8259A ... ");
|
|
- if (pin2 != -1) {
|
|
- printk("\n..... (found pin %d) ...", pin2);
|
|
+ if (!no_pin1)
|
|
+ apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
|
|
+ "8254 timer not connected to IO-APIC\n");
|
|
+
|
|
+ apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer "
|
|
+ "(IRQ0) through the 8259A ...\n");
|
|
+ apic_printk(APIC_QUIET, KERN_INFO
|
|
+ "..... (found apic %d pin %d) ...\n", apic2, pin2);
|
|
/*
|
|
* legacy devices should be connected to IO APIC #0
|
|
*/
|
|
- setup_ExtINT_IRQ0_pin(apic2, pin2, vector);
|
|
+ replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
|
|
+ setup_timer_IRQ0_pin(apic2, pin2, vector);
|
|
+ unmask_IO_APIC_irq(0);
|
|
+ enable_8259A_irq(0);
|
|
if (timer_irq_works()) {
|
|
- printk("works.\n");
|
|
- if (pin1 != -1)
|
|
- replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
|
|
- else
|
|
- add_pin_to_irq(0, apic2, pin2);
|
|
+ apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
|
|
+ timer_through_8259 = 1;
|
|
if (nmi_watchdog == NMI_IO_APIC) {
|
|
+ disable_8259A_irq(0);
|
|
setup_nmi();
|
|
+ enable_8259A_irq(0);
|
|
}
|
|
goto out;
|
|
}
|
|
/*
|
|
* Cleanup, just in case ...
|
|
*/
|
|
+ disable_8259A_irq(0);
|
|
clear_IO_APIC_pin(apic2, pin2);
|
|
+ apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
|
|
}
|
|
- printk(" failed.\n");
|
|
|
|
if (nmi_watchdog == NMI_IO_APIC) {
|
|
- printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n");
|
|
- nmi_watchdog = 0;
|
|
+ apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work "
|
|
+ "through the IO-APIC - disabling NMI Watchdog!\n");
|
|
+ nmi_watchdog = NMI_NONE;
|
|
}
|
|
+ timer_ack = 0;
|
|
|
|
- printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
|
|
+ apic_printk(APIC_QUIET, KERN_INFO
|
|
+ "...trying to set up timer as Virtual Wire IRQ...\n");
|
|
|
|
- disable_8259A_irq(0);
|
|
- set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq,
|
|
- "fasteoi");
|
|
- apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */
|
|
+ lapic_register_intr(0, vector);
|
|
+ apic_write(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */
|
|
enable_8259A_irq(0);
|
|
|
|
if (timer_irq_works()) {
|
|
- printk(" works.\n");
|
|
+ apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
|
|
goto out;
|
|
}
|
|
- apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
|
|
- printk(" failed.\n");
|
|
+ disable_8259A_irq(0);
|
|
+ apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector);
|
|
+ apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
|
|
|
|
- printk(KERN_INFO "...trying to set up timer as ExtINT IRQ...");
|
|
+ apic_printk(APIC_QUIET, KERN_INFO
|
|
+ "...trying to set up timer as ExtINT IRQ...\n");
|
|
|
|
- timer_ack = 0;
|
|
init_8259A(0);
|
|
make_8259A_irq(0);
|
|
- apic_write_around(APIC_LVT0, APIC_DM_EXTINT);
|
|
+ apic_write(APIC_LVT0, APIC_DM_EXTINT);
|
|
|
|
unlock_ExtINT_logic();
|
|
|
|
if (timer_irq_works()) {
|
|
- printk(" works.\n");
|
|
+ apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
|
|
goto out;
|
|
}
|
|
- printk(" failed :(.\n");
|
|
+ apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
|
|
panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a "
|
|
- "report. Then try booting with the 'noapic' option");
|
|
+ "report. Then try booting with the 'noapic' option.\n");
|
|
out:
|
|
local_irq_restore(flags);
|
|
}
|
|
@@ -2314,11 +2355,21 @@ int timer_uses_ioapic_pin_0 = 0;
|
|
#endif
|
|
|
|
/*
|
|
- *
|
|
- * IRQ's that are handled by the PIC in the MPS IOAPIC case.
|
|
- * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ.
|
|
- * Linux doesn't really care, as it's not actually used
|
|
- * for any interrupt handling anyway.
|
|
+ * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
|
|
+ * to devices. However there may be an I/O APIC pin available for
|
|
+ * this interrupt regardless. The pin may be left unconnected, but
|
|
+ * typically it will be reused as an ExtINT cascade interrupt for
|
|
+ * the master 8259A. In the MPS case such a pin will normally be
|
|
+ * reported as an ExtINT interrupt in the MP table. With ACPI
|
|
+ * there is no provision for ExtINT interrupts, and in the absence
|
|
+ * of an override it would be treated as an ordinary ISA I/O APIC
|
|
+ * interrupt, that is edge-triggered and unmasked by default. We
|
|
+ * used to do this, but it caused problems on some systems because
|
|
+ * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using
|
|
+ * the same ExtINT cascade interrupt to drive the local APIC of the
|
|
+ * bootstrap processor. Therefore we refrain from routing IRQ2 to
|
|
+ * the I/O APIC in all cases now. No actual device should request
|
|
+ * it anyway. --macro
|
|
*/
|
|
#define PIC_IRQS (1 << PIC_CASCADE_IR)
|
|
|
|
@@ -2328,25 +2379,22 @@ void __init setup_IO_APIC(void)
|
|
int i;
|
|
|
|
/* Reserve all the system vectors. */
|
|
- for (i = FIRST_SYSTEM_VECTOR; i < NR_VECTORS; i++)
|
|
+ for (i = first_system_vector; i < NR_VECTORS; i++)
|
|
set_bit(i, used_vectors);
|
|
#endif
|
|
|
|
enable_IO_APIC();
|
|
|
|
- if (acpi_ioapic)
|
|
- io_apic_irqs = ~0; /* all IRQs go through IOAPIC */
|
|
- else
|
|
- io_apic_irqs = ~PIC_IRQS;
|
|
+ io_apic_irqs = ~PIC_IRQS;
|
|
|
|
printk("ENABLING IO-APIC IRQs\n");
|
|
|
|
+#ifndef CONFIG_XEN
|
|
/*
|
|
* Set up IO-APIC IRQ routing.
|
|
*/
|
|
if (!acpi_ioapic)
|
|
setup_ioapic_ids_from_mpc();
|
|
-#ifndef CONFIG_XEN
|
|
sync_Arb_IDs();
|
|
#endif
|
|
setup_IO_APIC_irqs();
|
|
@@ -2356,28 +2404,14 @@ void __init setup_IO_APIC(void)
|
|
print_IO_APIC();
|
|
}
|
|
|
|
-static int __init setup_disable_8254_timer(char *s)
|
|
-{
|
|
- timer_over_8254 = -1;
|
|
- return 1;
|
|
-}
|
|
-static int __init setup_enable_8254_timer(char *s)
|
|
-{
|
|
- timer_over_8254 = 2;
|
|
- return 1;
|
|
-}
|
|
-
|
|
-__setup("disable_8254_timer", setup_disable_8254_timer);
|
|
-__setup("enable_8254_timer", setup_enable_8254_timer);
|
|
-
|
|
/*
|
|
* Called after all the initialization is done. If we didnt find any
|
|
* APIC bugs then we can allow the modify fast path
|
|
*/
|
|
-
|
|
+
|
|
static int __init io_apic_bug_finalize(void)
|
|
{
|
|
- if(sis_apic_bug == -1)
|
|
+ if (sis_apic_bug == -1)
|
|
sis_apic_bug = 0;
|
|
if (is_initial_xendomain()) {
|
|
struct xen_platform_op op = { .cmd = XENPF_platform_quirk };
|
|
@@ -2396,17 +2430,17 @@ struct sysfs_ioapic_data {
|
|
struct sys_device dev;
|
|
struct IO_APIC_route_entry entry[0];
|
|
};
|
|
-static struct sysfs_ioapic_data * mp_ioapic_data[MAX_IO_APICS];
|
|
+static struct sysfs_ioapic_data *mp_ioapic_data[MAX_IO_APICS];
|
|
|
|
static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
|
|
{
|
|
struct IO_APIC_route_entry *entry;
|
|
struct sysfs_ioapic_data *data;
|
|
int i;
|
|
-
|
|
+
|
|
data = container_of(dev, struct sysfs_ioapic_data, dev);
|
|
entry = data->entry;
|
|
- for (i = 0; i < nr_ioapic_registers[dev->id]; i ++)
|
|
+ for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
|
|
entry[i] = ioapic_read_entry(dev->id, i);
|
|
|
|
return 0;
|
|
@@ -2419,18 +2453,18 @@ static int ioapic_resume(struct sys_devi
|
|
unsigned long flags;
|
|
union IO_APIC_reg_00 reg_00;
|
|
int i;
|
|
-
|
|
+
|
|
data = container_of(dev, struct sysfs_ioapic_data, dev);
|
|
entry = data->entry;
|
|
|
|
spin_lock_irqsave(&ioapic_lock, flags);
|
|
reg_00.raw = io_apic_read(dev->id, 0);
|
|
- if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) {
|
|
- reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
|
|
+ if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) {
|
|
+ reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid;
|
|
io_apic_write(dev->id, 0, reg_00.raw);
|
|
}
|
|
spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
- for (i = 0; i < nr_ioapic_registers[dev->id]; i ++)
|
|
+ for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
|
|
ioapic_write_entry(dev->id, i, entry[i]);
|
|
|
|
return 0;
|
|
@@ -2444,24 +2478,23 @@ static struct sysdev_class ioapic_sysdev
|
|
|
|
static int __init ioapic_init_sysfs(void)
|
|
{
|
|
- struct sys_device * dev;
|
|
+ struct sys_device *dev;
|
|
int i, size, error = 0;
|
|
|
|
error = sysdev_class_register(&ioapic_sysdev_class);
|
|
if (error)
|
|
return error;
|
|
|
|
- for (i = 0; i < nr_ioapics; i++ ) {
|
|
- size = sizeof(struct sys_device) + nr_ioapic_registers[i]
|
|
+ for (i = 0; i < nr_ioapics; i++) {
|
|
+ size = sizeof(struct sys_device) + nr_ioapic_registers[i]
|
|
* sizeof(struct IO_APIC_route_entry);
|
|
- mp_ioapic_data[i] = kmalloc(size, GFP_KERNEL);
|
|
+ mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL);
|
|
if (!mp_ioapic_data[i]) {
|
|
printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i);
|
|
continue;
|
|
}
|
|
- memset(mp_ioapic_data[i], 0, size);
|
|
dev = &mp_ioapic_data[i]->dev;
|
|
- dev->id = i;
|
|
+ dev->id = i;
|
|
dev->cls = &ioapic_sysdev_class;
|
|
error = sysdev_register(dev);
|
|
if (error) {
|
|
@@ -2538,7 +2571,7 @@ static int msi_compose_msg(struct pci_de
|
|
msg->address_lo =
|
|
MSI_ADDR_BASE_LO |
|
|
((INT_DEST_MODE == 0) ?
|
|
- MSI_ADDR_DEST_MODE_PHYSICAL:
|
|
+MSI_ADDR_DEST_MODE_PHYSICAL:
|
|
MSI_ADDR_DEST_MODE_LOGICAL) |
|
|
((INT_DELIVERY_MODE != dest_LowestPrio) ?
|
|
MSI_ADDR_REDIRECTION_CPU:
|
|
@@ -2549,7 +2582,7 @@ static int msi_compose_msg(struct pci_de
|
|
MSI_DATA_TRIGGER_EDGE |
|
|
MSI_DATA_LEVEL_ASSERT |
|
|
((INT_DELIVERY_MODE != dest_LowestPrio) ?
|
|
- MSI_DATA_DELIVERY_FIXED:
|
|
+MSI_DATA_DELIVERY_FIXED:
|
|
MSI_DATA_DELIVERY_LOWPRI) |
|
|
MSI_DATA_VECTOR(vector);
|
|
}
|
|
@@ -2720,13 +2753,13 @@ int arch_setup_ht_irq(unsigned int irq,
|
|
#endif /* CONFIG_HT_IRQ */
|
|
|
|
/* --------------------------------------------------------------------------
|
|
- ACPI-based IOAPIC Configuration
|
|
+ ACPI-based IOAPIC Configuration
|
|
-------------------------------------------------------------------------- */
|
|
|
|
#ifdef CONFIG_ACPI
|
|
|
|
#ifndef CONFIG_XEN
|
|
-int __init io_apic_get_unique_id (int ioapic, int apic_id)
|
|
+int __init io_apic_get_unique_id(int ioapic, int apic_id)
|
|
{
|
|
union IO_APIC_reg_00 reg_00;
|
|
static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
|
|
@@ -2735,10 +2768,10 @@ int __init io_apic_get_unique_id (int io
|
|
int i = 0;
|
|
|
|
/*
|
|
- * The P4 platform supports up to 256 APIC IDs on two separate APIC
|
|
- * buses (one for LAPICs, one for IOAPICs), where predecessors only
|
|
+ * The P4 platform supports up to 256 APIC IDs on two separate APIC
|
|
+ * buses (one for LAPICs, one for IOAPICs), where predecessors only
|
|
* supports up to 16 on one shared APIC bus.
|
|
- *
|
|
+ *
|
|
* TBD: Expand LAPIC/IOAPIC support on P4-class systems to take full
|
|
* advantage of new APIC bus architecture.
|
|
*/
|
|
@@ -2757,7 +2790,7 @@ int __init io_apic_get_unique_id (int io
|
|
}
|
|
|
|
/*
|
|
- * Every APIC in a system must have a unique ID or we get lots of nice
|
|
+ * Every APIC in a system must have a unique ID or we get lots of nice
|
|
* 'stuck on smp_invalidate_needed IPI wait' messages.
|
|
*/
|
|
if (check_apicid_used(apic_id_map, apic_id)) {
|
|
@@ -2774,7 +2807,7 @@ int __init io_apic_get_unique_id (int io
|
|
"trying %d\n", ioapic, apic_id, i);
|
|
|
|
apic_id = i;
|
|
- }
|
|
+ }
|
|
|
|
tmp = apicid_to_cpu_present(apic_id);
|
|
physids_or(apic_id_map, apic_id_map, tmp);
|
|
@@ -2802,7 +2835,7 @@ int __init io_apic_get_unique_id (int io
|
|
#endif /* !CONFIG_XEN */
|
|
|
|
|
|
-int __init io_apic_get_version (int ioapic)
|
|
+int __init io_apic_get_version(int ioapic)
|
|
{
|
|
union IO_APIC_reg_01 reg_01;
|
|
unsigned long flags;
|
|
@@ -2815,7 +2848,7 @@ int __init io_apic_get_version (int ioap
|
|
}
|
|
|
|
|
|
-int __init io_apic_get_redir_entries (int ioapic)
|
|
+int __init io_apic_get_redir_entries(int ioapic)
|
|
{
|
|
union IO_APIC_reg_01 reg_01;
|
|
unsigned long flags;
|
|
@@ -2828,7 +2861,7 @@ int __init io_apic_get_redir_entries (in
|
|
}
|
|
|
|
|
|
-int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int active_high_low)
|
|
+int io_apic_set_pci_routing(int ioapic, int pin, int irq, int edge_level, int active_high_low)
|
|
{
|
|
struct IO_APIC_route_entry entry;
|
|
|
|
@@ -2844,7 +2877,7 @@ int io_apic_set_pci_routing (int ioapic,
|
|
* corresponding device driver registers for this IRQ.
|
|
*/
|
|
|
|
- memset(&entry,0,sizeof(entry));
|
|
+ memset(&entry, 0, sizeof(entry));
|
|
|
|
entry.delivery_mode = INT_DELIVERY_MODE;
|
|
entry.dest_mode = INT_DEST_MODE;
|
|
@@ -2863,7 +2896,7 @@ int io_apic_set_pci_routing (int ioapic,
|
|
|
|
apic_printk(APIC_DEBUG, KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry "
|
|
"(%d-%d -> 0x%x -> IRQ %d Mode:%i Active:%i)\n", ioapic,
|
|
- mp_ioapics[ioapic].mpc_apicid, pin, entry.vector, irq,
|
|
+ mp_ioapics[ioapic].mp_apicid, pin, entry.vector, irq,
|
|
edge_level, active_high_low);
|
|
|
|
ioapic_register_intr(irq, entry.vector, edge_level);
|
|
@@ -2884,8 +2917,8 @@ int acpi_get_override_irq(int bus_irq, i
|
|
return -1;
|
|
|
|
for (i = 0; i < mp_irq_entries; i++)
|
|
- if (mp_irqs[i].mpc_irqtype == mp_INT &&
|
|
- mp_irqs[i].mpc_srcbusirq == bus_irq)
|
|
+ if (mp_irqs[i].mp_irqtype == mp_INT &&
|
|
+ mp_irqs[i].mp_srcbusirq == bus_irq)
|
|
break;
|
|
if (i >= mp_irq_entries)
|
|
return -1;
|
|
@@ -2918,3 +2951,35 @@ static int __init parse_noapic(char *arg
|
|
return 0;
|
|
}
|
|
early_param("noapic", parse_noapic);
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+void __init ioapic_init_mappings(void)
|
|
+{
|
|
+ unsigned long ioapic_phys, idx = FIX_IO_APIC_BASE_0;
|
|
+ int i;
|
|
+
|
|
+ for (i = 0; i < nr_ioapics; i++) {
|
|
+ if (smp_found_config) {
|
|
+ ioapic_phys = mp_ioapics[i].mp_apicaddr;
|
|
+ if (!ioapic_phys) {
|
|
+ printk(KERN_ERR
|
|
+ "WARNING: bogus zero IO-APIC "
|
|
+ "address found in MPTABLE, "
|
|
+ "disabling IO/APIC support!\n");
|
|
+ smp_found_config = 0;
|
|
+ skip_ioapic_setup = 1;
|
|
+ goto fake_ioapic_page;
|
|
+ }
|
|
+ } else {
|
|
+fake_ioapic_page:
|
|
+ ioapic_phys = (unsigned long)
|
|
+ alloc_bootmem_pages(PAGE_SIZE);
|
|
+ ioapic_phys = __pa(ioapic_phys);
|
|
+ }
|
|
+ set_fixmap_nocache(idx, ioapic_phys);
|
|
+ printk(KERN_DEBUG "mapped IOAPIC to %08lx (%08lx)\n",
|
|
+ __fix_to_virt(idx), ioapic_phys);
|
|
+ idx++;
|
|
+ }
|
|
+}
|
|
+#endif
|
|
--- head-2011-03-11.orig/arch/x86/kernel/io_apic_64-xen.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/io_apic_64-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -45,6 +45,7 @@
|
|
#include <asm/proto.h>
|
|
#include <asm/acpi.h>
|
|
#include <asm/dma.h>
|
|
+#include <asm/i8259.h>
|
|
#include <asm/nmi.h>
|
|
#include <asm/msidef.h>
|
|
#include <asm/hypertransport.h>
|
|
@@ -63,10 +64,16 @@ struct irq_cfg {
|
|
};
|
|
|
|
/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
|
|
-struct irq_cfg irq_cfg[NR_IRQS] __read_mostly;
|
|
+static struct irq_cfg irq_cfg[NR_IRQS] __read_mostly;
|
|
|
|
static int assign_irq_vector(int irq, cpumask_t mask);
|
|
|
|
+#ifndef CONFIG_XEN
|
|
+int first_system_vector = 0xfe;
|
|
+
|
|
+char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
|
|
+#endif
|
|
+
|
|
#define __apicdebuginit __init
|
|
|
|
int sis_apic_bug; /* not actually supported, dummy for compile */
|
|
@@ -89,14 +96,14 @@ unsigned long io_apic_irqs;
|
|
|
|
#define clear_IO_APIC() ((void)0)
|
|
#else
|
|
-int timer_over_8254 __initdata = 1;
|
|
+int timer_through_8259 __initdata;
|
|
|
|
/* Where if anywhere is the i8259 connect in external int mode */
|
|
static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
|
|
#endif
|
|
|
|
static DEFINE_SPINLOCK(ioapic_lock);
|
|
-DEFINE_SPINLOCK(vector_lock);
|
|
+static DEFINE_SPINLOCK(vector_lock);
|
|
|
|
/*
|
|
* # of IRQ routing registers
|
|
@@ -104,15 +111,17 @@ DEFINE_SPINLOCK(vector_lock);
|
|
int nr_ioapic_registers[MAX_IO_APICS];
|
|
|
|
/* I/O APIC entries */
|
|
-struct mpc_config_ioapic mp_ioapics[MAX_IO_APICS];
|
|
+struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
|
|
int nr_ioapics;
|
|
|
|
/* MP IRQ source entries */
|
|
-struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
|
|
+struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
|
|
|
|
/* # of MP IRQ source entries */
|
|
int mp_irq_entries;
|
|
|
|
+DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
|
|
+
|
|
/*
|
|
* Rough estimation of how many shared IRQs there are, can
|
|
* be changed anytime.
|
|
@@ -141,7 +150,7 @@ struct io_apic {
|
|
static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
|
|
{
|
|
return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
|
|
- + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK);
|
|
+ + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK);
|
|
}
|
|
#endif
|
|
|
|
@@ -155,7 +164,7 @@ static inline unsigned int io_apic_read(
|
|
struct physdev_apic apic_op;
|
|
int ret;
|
|
|
|
- apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
|
|
+ apic_op.apic_physbase = mp_ioapics[apic].mp_apicaddr;
|
|
apic_op.reg = reg;
|
|
ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op);
|
|
if (ret)
|
|
@@ -173,7 +182,7 @@ static inline void io_apic_write(unsigne
|
|
#else
|
|
struct physdev_apic apic_op;
|
|
|
|
- apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
|
|
+ apic_op.apic_physbase = mp_ioapics[apic].mp_apicaddr;
|
|
apic_op.reg = reg;
|
|
apic_op.value = value;
|
|
WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op));
|
|
@@ -209,7 +218,7 @@ static bool io_apic_level_ack_pending(un
|
|
break;
|
|
reg = io_apic_read(entry->apic, 0x10 + pin*2);
|
|
/* Is the remote IRR bit set? */
|
|
- if ((reg >> 14) & 1) {
|
|
+ if (reg & IO_APIC_REDIR_REMOTE_IRR) {
|
|
spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
return true;
|
|
}
|
|
@@ -308,7 +317,7 @@ static void __target_IO_APIC_irq(unsigne
|
|
break;
|
|
io_apic_write(apic, 0x11 + pin*2, dest);
|
|
reg = io_apic_read(apic, 0x10 + pin*2);
|
|
- reg &= ~0x000000ff;
|
|
+ reg &= ~IO_APIC_REDIR_VECTOR_MASK;
|
|
reg |= vector;
|
|
io_apic_modify(apic, reg);
|
|
if (!entry->next)
|
|
@@ -372,6 +381,26 @@ static void add_pin_to_irq(unsigned int
|
|
}
|
|
|
|
#ifndef CONFIG_XEN
|
|
+/*
|
|
+ * Reroute an IRQ to a different pin.
|
|
+ */
|
|
+static void __init replace_pin_at_irq(unsigned int irq,
|
|
+ int oldapic, int oldpin,
|
|
+ int newapic, int newpin)
|
|
+{
|
|
+ struct irq_pin_list *entry = irq_2_pin + irq;
|
|
+
|
|
+ while (1) {
|
|
+ if (entry->apic == oldapic && entry->pin == oldpin) {
|
|
+ entry->apic = newapic;
|
|
+ entry->pin = newpin;
|
|
+ }
|
|
+ if (!entry->next)
|
|
+ break;
|
|
+ entry = irq_2_pin + entry->next;
|
|
+ }
|
|
+}
|
|
+
|
|
#define __DO_ACTION(R, ACTION, FINAL) \
|
|
\
|
|
{ \
|
|
@@ -399,10 +428,11 @@ static void add_pin_to_irq(unsigned int
|
|
static void name##_IO_APIC_irq (unsigned int irq) \
|
|
__DO_ACTION(R, ACTION, FINAL)
|
|
|
|
-DO_ACTION( __mask, 0, |= 0x00010000, io_apic_sync(entry->apic) )
|
|
- /* mask = 1 */
|
|
-DO_ACTION( __unmask, 0, &= 0xfffeffff, )
|
|
- /* mask = 0 */
|
|
+/* mask = 1 */
|
|
+DO_ACTION(__mask, 0, |= IO_APIC_REDIR_MASKED, io_apic_sync(entry->apic))
|
|
+
|
|
+/* mask = 0 */
|
|
+DO_ACTION(__unmask, 0, &= ~IO_APIC_REDIR_MASKED, )
|
|
|
|
static void mask_IO_APIC_irq (unsigned int irq)
|
|
{
|
|
@@ -465,22 +495,6 @@ static int __init disable_timer_pin_setu
|
|
}
|
|
__setup("disable_timer_pin_1", disable_timer_pin_setup);
|
|
|
|
-#ifndef CONFIG_XEN
|
|
-static int __init setup_disable_8254_timer(char *s)
|
|
-{
|
|
- timer_over_8254 = -1;
|
|
- return 1;
|
|
-}
|
|
-static int __init setup_enable_8254_timer(char *s)
|
|
-{
|
|
- timer_over_8254 = 2;
|
|
- return 1;
|
|
-}
|
|
-
|
|
-__setup("disable_8254_timer", setup_disable_8254_timer);
|
|
-__setup("enable_8254_timer", setup_enable_8254_timer);
|
|
-#endif /* !CONFIG_XEN */
|
|
-
|
|
|
|
/*
|
|
* Find the IRQ entry number of a certain pin.
|
|
@@ -490,10 +504,10 @@ static int find_irq_entry(int apic, int
|
|
int i;
|
|
|
|
for (i = 0; i < mp_irq_entries; i++)
|
|
- if (mp_irqs[i].mpc_irqtype == type &&
|
|
- (mp_irqs[i].mpc_dstapic == mp_ioapics[apic].mpc_apicid ||
|
|
- mp_irqs[i].mpc_dstapic == MP_APIC_ALL) &&
|
|
- mp_irqs[i].mpc_dstirq == pin)
|
|
+ if (mp_irqs[i].mp_irqtype == type &&
|
|
+ (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid ||
|
|
+ mp_irqs[i].mp_dstapic == MP_APIC_ALL) &&
|
|
+ mp_irqs[i].mp_dstirq == pin)
|
|
return i;
|
|
|
|
return -1;
|
|
@@ -508,13 +522,13 @@ static int __init find_isa_irq_pin(int i
|
|
int i;
|
|
|
|
for (i = 0; i < mp_irq_entries; i++) {
|
|
- int lbus = mp_irqs[i].mpc_srcbus;
|
|
+ int lbus = mp_irqs[i].mp_srcbus;
|
|
|
|
if (test_bit(lbus, mp_bus_not_pci) &&
|
|
- (mp_irqs[i].mpc_irqtype == type) &&
|
|
- (mp_irqs[i].mpc_srcbusirq == irq))
|
|
+ (mp_irqs[i].mp_irqtype == type) &&
|
|
+ (mp_irqs[i].mp_srcbusirq == irq))
|
|
|
|
- return mp_irqs[i].mpc_dstirq;
|
|
+ return mp_irqs[i].mp_dstirq;
|
|
}
|
|
return -1;
|
|
}
|
|
@@ -524,17 +538,17 @@ static int __init find_isa_irq_apic(int
|
|
int i;
|
|
|
|
for (i = 0; i < mp_irq_entries; i++) {
|
|
- int lbus = mp_irqs[i].mpc_srcbus;
|
|
+ int lbus = mp_irqs[i].mp_srcbus;
|
|
|
|
if (test_bit(lbus, mp_bus_not_pci) &&
|
|
- (mp_irqs[i].mpc_irqtype == type) &&
|
|
- (mp_irqs[i].mpc_srcbusirq == irq))
|
|
+ (mp_irqs[i].mp_irqtype == type) &&
|
|
+ (mp_irqs[i].mp_srcbusirq == irq))
|
|
break;
|
|
}
|
|
if (i < mp_irq_entries) {
|
|
int apic;
|
|
for(apic = 0; apic < nr_ioapics; apic++) {
|
|
- if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic)
|
|
+ if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic)
|
|
return apic;
|
|
}
|
|
}
|
|
@@ -555,28 +569,28 @@ int IO_APIC_get_PCI_irq_vector(int bus,
|
|
|
|
apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n",
|
|
bus, slot, pin);
|
|
- if (mp_bus_id_to_pci_bus[bus] == -1) {
|
|
+ if (test_bit(bus, mp_bus_not_pci)) {
|
|
apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus);
|
|
return -1;
|
|
}
|
|
for (i = 0; i < mp_irq_entries; i++) {
|
|
- int lbus = mp_irqs[i].mpc_srcbus;
|
|
+ int lbus = mp_irqs[i].mp_srcbus;
|
|
|
|
for (apic = 0; apic < nr_ioapics; apic++)
|
|
- if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic ||
|
|
- mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
|
|
+ if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic ||
|
|
+ mp_irqs[i].mp_dstapic == MP_APIC_ALL)
|
|
break;
|
|
|
|
if (!test_bit(lbus, mp_bus_not_pci) &&
|
|
- !mp_irqs[i].mpc_irqtype &&
|
|
+ !mp_irqs[i].mp_irqtype &&
|
|
(bus == lbus) &&
|
|
- (slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
|
|
- int irq = pin_2_irq(i,apic,mp_irqs[i].mpc_dstirq);
|
|
+ (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) {
|
|
+ int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq);
|
|
|
|
if (!(apic || IO_APIC_IRQ(irq)))
|
|
continue;
|
|
|
|
- if (pin == (mp_irqs[i].mpc_srcbusirq & 3))
|
|
+ if (pin == (mp_irqs[i].mp_srcbusirq & 3))
|
|
return irq;
|
|
/*
|
|
* Use the first all-but-pin matching entry as a
|
|
@@ -604,13 +618,13 @@ int IO_APIC_get_PCI_irq_vector(int bus,
|
|
|
|
static int MPBIOS_polarity(int idx)
|
|
{
|
|
- int bus = mp_irqs[idx].mpc_srcbus;
|
|
+ int bus = mp_irqs[idx].mp_srcbus;
|
|
int polarity;
|
|
|
|
/*
|
|
* Determine IRQ line polarity (high active or low active):
|
|
*/
|
|
- switch (mp_irqs[idx].mpc_irqflag & 3)
|
|
+ switch (mp_irqs[idx].mp_irqflag & 3)
|
|
{
|
|
case 0: /* conforms, ie. bus-type dependent polarity */
|
|
if (test_bit(bus, mp_bus_not_pci))
|
|
@@ -646,13 +660,13 @@ static int MPBIOS_polarity(int idx)
|
|
|
|
static int MPBIOS_trigger(int idx)
|
|
{
|
|
- int bus = mp_irqs[idx].mpc_srcbus;
|
|
+ int bus = mp_irqs[idx].mp_srcbus;
|
|
int trigger;
|
|
|
|
/*
|
|
* Determine IRQ trigger mode (edge or level sensitive):
|
|
*/
|
|
- switch ((mp_irqs[idx].mpc_irqflag>>2) & 3)
|
|
+ switch ((mp_irqs[idx].mp_irqflag>>2) & 3)
|
|
{
|
|
case 0: /* conforms, ie. bus-type dependent */
|
|
if (test_bit(bus, mp_bus_not_pci))
|
|
@@ -699,16 +713,16 @@ static inline int irq_trigger(int idx)
|
|
static int pin_2_irq(int idx, int apic, int pin)
|
|
{
|
|
int irq, i;
|
|
- int bus = mp_irqs[idx].mpc_srcbus;
|
|
+ int bus = mp_irqs[idx].mp_srcbus;
|
|
|
|
/*
|
|
* Debugging check, we are in big trouble if this message pops up!
|
|
*/
|
|
- if (mp_irqs[idx].mpc_dstirq != pin)
|
|
+ if (mp_irqs[idx].mp_dstirq != pin)
|
|
printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
|
|
|
|
if (test_bit(bus, mp_bus_not_pci)) {
|
|
- irq = mp_irqs[idx].mpc_srcbusirq;
|
|
+ irq = mp_irqs[idx].mp_srcbusirq;
|
|
} else {
|
|
/*
|
|
* PCI IRQs are mapped in order
|
|
@@ -722,6 +736,19 @@ static int pin_2_irq(int idx, int apic,
|
|
return irq;
|
|
}
|
|
|
|
+void lock_vector_lock(void)
|
|
+{
|
|
+ /* Used to the online set of cpus does not change
|
|
+ * during assign_irq_vector.
|
|
+ */
|
|
+ spin_lock(&vector_lock);
|
|
+}
|
|
+
|
|
+void unlock_vector_lock(void)
|
|
+{
|
|
+ spin_unlock(&vector_lock);
|
|
+}
|
|
+
|
|
static int __assign_irq_vector(int irq, cpumask_t mask)
|
|
{
|
|
struct physdev_irq irq_op;
|
|
@@ -773,7 +800,7 @@ static void __clear_irq_vector(int irq)
|
|
|
|
vector = cfg->vector;
|
|
cpus_and(mask, cfg->domain, cpu_online_map);
|
|
- for_each_cpu_mask(cpu, mask)
|
|
+ for_each_cpu_mask_nr(cpu, mask)
|
|
per_cpu(vector_irq, cpu)[vector] = -1;
|
|
|
|
cfg->vector = 0;
|
|
@@ -842,7 +869,7 @@ static void setup_IO_APIC_irq(int apic,
|
|
apic_printk(APIC_VERBOSE,KERN_DEBUG
|
|
"IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
|
|
"IRQ %d Mode:%i Active:%i)\n",
|
|
- apic, mp_ioapics[apic].mpc_apicid, pin, cfg->vector,
|
|
+ apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
|
|
irq, trigger, polarity);
|
|
|
|
/*
|
|
@@ -883,10 +910,10 @@ static void __init setup_IO_APIC_irqs(vo
|
|
idx = find_irq_entry(apic,pin,mp_INT);
|
|
if (idx == -1) {
|
|
if (first_notcon) {
|
|
- apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mpc_apicid, pin);
|
|
+ apic_printk(APIC_VERBOSE, KERN_DEBUG " IO-APIC (apicid-pin) %d-%d", mp_ioapics[apic].mp_apicid, pin);
|
|
first_notcon = 0;
|
|
} else
|
|
- apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mpc_apicid, pin);
|
|
+ apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mp_apicid, pin);
|
|
continue;
|
|
}
|
|
if (!first_notcon) {
|
|
@@ -908,26 +935,21 @@ static void __init setup_IO_APIC_irqs(vo
|
|
|
|
#ifndef CONFIG_XEN
|
|
/*
|
|
- * Set up the 8259A-master output pin as broadcast to all
|
|
- * CPUs.
|
|
+ * Set up the timer pin, possibly with the 8259A-master behind.
|
|
*/
|
|
-static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector)
|
|
+static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
|
|
+ int vector)
|
|
{
|
|
struct IO_APIC_route_entry entry;
|
|
|
|
memset(&entry, 0, sizeof(entry));
|
|
|
|
- disable_8259A_irq(0);
|
|
-
|
|
- /* mask LVT0 */
|
|
- apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
|
|
-
|
|
/*
|
|
* We use logical delivery to get the timer IRQ
|
|
* to the first CPU.
|
|
*/
|
|
entry.dest_mode = INT_DEST_MODE;
|
|
- entry.mask = 0; /* unmask IRQ now */
|
|
+ entry.mask = 1; /* mask IRQ now */
|
|
entry.dest = cpu_mask_to_apicid(TARGET_CPUS);
|
|
entry.delivery_mode = INT_DELIVERY_MODE;
|
|
entry.polarity = 0;
|
|
@@ -936,7 +958,7 @@ static void __init setup_ExtINT_IRQ0_pin
|
|
|
|
/*
|
|
* The timer IRQ doesn't have to know that behind the
|
|
- * scene we have a 8259A-master in AEOI mode ...
|
|
+ * scene we may have a 8259A-master in AEOI mode ...
|
|
*/
|
|
set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
|
|
|
|
@@ -944,8 +966,6 @@ static void __init setup_ExtINT_IRQ0_pin
|
|
* Add it to the IO-APIC irq-routing table:
|
|
*/
|
|
ioapic_write_entry(apic, pin, entry);
|
|
-
|
|
- enable_8259A_irq(0);
|
|
}
|
|
|
|
void __apicdebuginit print_IO_APIC(void)
|
|
@@ -962,7 +982,7 @@ void __apicdebuginit print_IO_APIC(void)
|
|
printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
|
|
for (i = 0; i < nr_ioapics; i++)
|
|
printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
|
|
- mp_ioapics[i].mpc_apicid, nr_ioapic_registers[i]);
|
|
+ mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]);
|
|
|
|
/*
|
|
* We are a bit conservative about what we expect. We have to
|
|
@@ -980,7 +1000,7 @@ void __apicdebuginit print_IO_APIC(void)
|
|
spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
|
|
printk("\n");
|
|
- printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
|
|
+ printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
|
|
printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
|
|
printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
|
|
|
|
@@ -1072,6 +1092,7 @@ void __apicdebuginit print_local_APIC(vo
|
|
|
|
printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
|
|
smp_processor_id(), hard_smp_processor_id());
|
|
+ v = apic_read(APIC_ID);
|
|
printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(read_apic_id()));
|
|
v = apic_read(APIC_LVR);
|
|
printk(KERN_INFO "... APIC VERSION: %08x\n", v);
|
|
@@ -1141,7 +1162,7 @@ void __apicdebuginit print_local_APIC(vo
|
|
|
|
void print_all_local_APICs (void)
|
|
{
|
|
- on_each_cpu(print_local_APIC, NULL, 1, 1);
|
|
+ on_each_cpu(print_local_APIC, NULL, 1);
|
|
}
|
|
|
|
void __apicdebuginit print_PIC(void)
|
|
@@ -1175,6 +1196,8 @@ void __apicdebuginit print_PIC(void)
|
|
v = inb(0x4d1) << 8 | inb(0x4d0);
|
|
printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
|
|
}
|
|
+#else
|
|
+void __apicdebuginit print_IO_APIC(void) {}
|
|
#endif /* !CONFIG_XEN */
|
|
|
|
void __init enable_IO_APIC(void)
|
|
@@ -1359,12 +1382,10 @@ static unsigned int startup_ioapic_irq(u
|
|
static int ioapic_retrigger_irq(unsigned int irq)
|
|
{
|
|
struct irq_cfg *cfg = &irq_cfg[irq];
|
|
- cpumask_t mask;
|
|
unsigned long flags;
|
|
|
|
spin_lock_irqsave(&vector_lock, flags);
|
|
- mask = cpumask_of_cpu(first_cpu(cfg->domain));
|
|
- send_IPI_mask(mask, cfg->vector);
|
|
+ send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector);
|
|
spin_unlock_irqrestore(&vector_lock, flags);
|
|
|
|
return 1;
|
|
@@ -1545,7 +1566,7 @@ static inline void init_IO_APIC_traps(vo
|
|
}
|
|
|
|
#ifndef CONFIG_XEN
|
|
-static void enable_lapic_irq (unsigned int irq)
|
|
+static void unmask_lapic_irq(unsigned int irq)
|
|
{
|
|
unsigned long v;
|
|
|
|
@@ -1553,7 +1574,7 @@ static void enable_lapic_irq (unsigned i
|
|
apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
|
|
}
|
|
|
|
-static void disable_lapic_irq (unsigned int irq)
|
|
+static void mask_lapic_irq(unsigned int irq)
|
|
{
|
|
unsigned long v;
|
|
|
|
@@ -1566,19 +1587,20 @@ static void ack_lapic_irq (unsigned int
|
|
ack_APIC_irq();
|
|
}
|
|
|
|
-static void end_lapic_irq (unsigned int i) { /* nothing */ }
|
|
-
|
|
-static struct hw_interrupt_type lapic_irq_type __read_mostly = {
|
|
- .name = "local-APIC",
|
|
- .typename = "local-APIC-edge",
|
|
- .startup = NULL, /* startup_irq() not used for IRQ0 */
|
|
- .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */
|
|
- .enable = enable_lapic_irq,
|
|
- .disable = disable_lapic_irq,
|
|
- .ack = ack_lapic_irq,
|
|
- .end = end_lapic_irq,
|
|
+static struct irq_chip lapic_chip __read_mostly = {
|
|
+ .name = "local-APIC",
|
|
+ .mask = mask_lapic_irq,
|
|
+ .unmask = unmask_lapic_irq,
|
|
+ .ack = ack_lapic_irq,
|
|
};
|
|
|
|
+static void lapic_register_intr(int irq)
|
|
+{
|
|
+ irq_desc[irq].status &= ~IRQ_LEVEL;
|
|
+ set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
|
|
+ "edge");
|
|
+}
|
|
+
|
|
static void __init setup_nmi(void)
|
|
{
|
|
/*
|
|
@@ -1664,6 +1686,7 @@ static inline void __init check_timer(vo
|
|
struct irq_cfg *cfg = irq_cfg + 0;
|
|
int apic1, pin1, apic2, pin2;
|
|
unsigned long flags;
|
|
+ int no_pin1 = 0;
|
|
|
|
local_irq_save(flags);
|
|
|
|
@@ -1674,34 +1697,48 @@ static inline void __init check_timer(vo
|
|
assign_irq_vector(0, TARGET_CPUS);
|
|
|
|
/*
|
|
- * Subtle, code in do_timer_interrupt() expects an AEOI
|
|
- * mode for the 8259A whenever interrupts are routed
|
|
- * through I/O APICs. Also IRQ0 has to be enabled in
|
|
- * the 8259A which implies the virtual wire has to be
|
|
- * disabled in the local APIC.
|
|
+ * As IRQ0 is to be enabled in the 8259A, the virtual
|
|
+ * wire has to be disabled in the local APIC.
|
|
*/
|
|
apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
|
|
init_8259A(1);
|
|
- if (timer_over_8254 > 0)
|
|
- enable_8259A_irq(0);
|
|
|
|
pin1 = find_isa_irq_pin(0, mp_INT);
|
|
apic1 = find_isa_irq_apic(0, mp_INT);
|
|
pin2 = ioapic_i8259.pin;
|
|
apic2 = ioapic_i8259.apic;
|
|
|
|
- apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
|
|
- cfg->vector, apic1, pin1, apic2, pin2);
|
|
+ apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X "
|
|
+ "apic1=%d pin1=%d apic2=%d pin2=%d\n",
|
|
+ cfg->vector, apic1, pin1, apic2, pin2);
|
|
+
|
|
+ /*
|
|
+ * Some BIOS writers are clueless and report the ExtINTA
|
|
+ * I/O APIC input from the cascaded 8259A as the timer
|
|
+ * interrupt input. So just in case, if only one pin
|
|
+ * was found above, try it both directly and through the
|
|
+ * 8259A.
|
|
+ */
|
|
+ if (pin1 == -1) {
|
|
+ pin1 = pin2;
|
|
+ apic1 = apic2;
|
|
+ no_pin1 = 1;
|
|
+ } else if (pin2 == -1) {
|
|
+ pin2 = pin1;
|
|
+ apic2 = apic1;
|
|
+ }
|
|
|
|
if (pin1 != -1) {
|
|
/*
|
|
* Ok, does IRQ0 through the IOAPIC work?
|
|
*/
|
|
+ if (no_pin1) {
|
|
+ add_pin_to_irq(0, apic1, pin1);
|
|
+ setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
|
|
+ }
|
|
unmask_IO_APIC_irq(0);
|
|
if (!no_timer_check && timer_irq_works()) {
|
|
- nmi_watchdog_default();
|
|
if (nmi_watchdog == NMI_IO_APIC) {
|
|
- disable_8259A_irq(0);
|
|
setup_nmi();
|
|
enable_8259A_irq(0);
|
|
}
|
|
@@ -1710,54 +1747,62 @@ static inline void __init check_timer(vo
|
|
goto out;
|
|
}
|
|
clear_IO_APIC_pin(apic1, pin1);
|
|
- apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: 8254 timer not "
|
|
- "connected to IO-APIC\n");
|
|
- }
|
|
-
|
|
- apic_printk(APIC_VERBOSE,KERN_INFO "...trying to set up timer (IRQ0) "
|
|
- "through the 8259A ... ");
|
|
- if (pin2 != -1) {
|
|
- apic_printk(APIC_VERBOSE,"\n..... (found apic %d pin %d) ...",
|
|
- apic2, pin2);
|
|
+ if (!no_pin1)
|
|
+ apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
|
|
+ "8254 timer not connected to IO-APIC\n");
|
|
+
|
|
+ apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer "
|
|
+ "(IRQ0) through the 8259A ...\n");
|
|
+ apic_printk(APIC_QUIET, KERN_INFO
|
|
+ "..... (found apic %d pin %d) ...\n", apic2, pin2);
|
|
/*
|
|
* legacy devices should be connected to IO APIC #0
|
|
*/
|
|
- setup_ExtINT_IRQ0_pin(apic2, pin2, cfg->vector);
|
|
+ replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
|
|
+ setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
|
|
+ unmask_IO_APIC_irq(0);
|
|
+ enable_8259A_irq(0);
|
|
if (timer_irq_works()) {
|
|
- apic_printk(APIC_VERBOSE," works.\n");
|
|
- nmi_watchdog_default();
|
|
+ apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
|
|
+ timer_through_8259 = 1;
|
|
if (nmi_watchdog == NMI_IO_APIC) {
|
|
+ disable_8259A_irq(0);
|
|
setup_nmi();
|
|
+ enable_8259A_irq(0);
|
|
}
|
|
goto out;
|
|
}
|
|
/*
|
|
* Cleanup, just in case ...
|
|
*/
|
|
+ disable_8259A_irq(0);
|
|
clear_IO_APIC_pin(apic2, pin2);
|
|
+ apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
|
|
}
|
|
- apic_printk(APIC_VERBOSE," failed.\n");
|
|
|
|
if (nmi_watchdog == NMI_IO_APIC) {
|
|
- printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n");
|
|
- nmi_watchdog = 0;
|
|
+ apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work "
|
|
+ "through the IO-APIC - disabling NMI Watchdog!\n");
|
|
+ nmi_watchdog = NMI_NONE;
|
|
}
|
|
|
|
- apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
|
|
+ apic_printk(APIC_QUIET, KERN_INFO
|
|
+ "...trying to set up timer as Virtual Wire IRQ...\n");
|
|
|
|
- disable_8259A_irq(0);
|
|
- irq_desc[0].chip = &lapic_irq_type;
|
|
+ lapic_register_intr(0);
|
|
apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */
|
|
enable_8259A_irq(0);
|
|
|
|
if (timer_irq_works()) {
|
|
- apic_printk(APIC_VERBOSE," works.\n");
|
|
+ apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
|
|
goto out;
|
|
}
|
|
+ disable_8259A_irq(0);
|
|
apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
|
|
- apic_printk(APIC_VERBOSE," failed.\n");
|
|
+ apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
|
|
|
|
- apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as ExtINT IRQ...");
|
|
+ apic_printk(APIC_QUIET, KERN_INFO
|
|
+ "...trying to set up timer as ExtINT IRQ...\n");
|
|
|
|
init_8259A(0);
|
|
make_8259A_irq(0);
|
|
@@ -1766,11 +1811,12 @@ static inline void __init check_timer(vo
|
|
unlock_ExtINT_logic();
|
|
|
|
if (timer_irq_works()) {
|
|
- apic_printk(APIC_VERBOSE," works.\n");
|
|
+ apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
|
|
goto out;
|
|
}
|
|
- apic_printk(APIC_VERBOSE," failed :(.\n");
|
|
- panic("IO-APIC + timer doesn't work! Try using the 'noapic' kernel parameter\n");
|
|
+ apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
|
|
+ panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a "
|
|
+ "report. Then try booting with the 'noapic' option.\n");
|
|
out:
|
|
local_irq_restore(flags);
|
|
}
|
|
@@ -1788,10 +1834,21 @@ __setup("no_timer_check", notimercheck);
|
|
|
|
/*
|
|
*
|
|
- * IRQs that are handled by the PIC in the MPS IOAPIC case.
|
|
- * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ.
|
|
- * Linux doesn't really care, as it's not actually used
|
|
- * for any interrupt handling anyway.
|
|
+ * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
|
|
+ * to devices. However there may be an I/O APIC pin available for
|
|
+ * this interrupt regardless. The pin may be left unconnected, but
|
|
+ * typically it will be reused as an ExtINT cascade interrupt for
|
|
+ * the master 8259A. In the MPS case such a pin will normally be
|
|
+ * reported as an ExtINT interrupt in the MP table. With ACPI
|
|
+ * there is no provision for ExtINT interrupts, and in the absence
|
|
+ * of an override it would be treated as an ordinary ISA I/O APIC
|
|
+ * interrupt, that is edge-triggered and unmasked by default. We
|
|
+ * used to do this, but it caused problems on some systems because
|
|
+ * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using
|
|
+ * the same ExtINT cascade interrupt to drive the local APIC of the
|
|
+ * bootstrap processor. Therefore we refrain from routing IRQ2 to
|
|
+ * the I/O APIC in all cases now. No actual device should request
|
|
+ * it anyway. --macro
|
|
*/
|
|
#define PIC_IRQS (1<<2)
|
|
|
|
@@ -1799,10 +1856,7 @@ void __init setup_IO_APIC(void)
|
|
{
|
|
enable_IO_APIC();
|
|
|
|
- if (acpi_ioapic)
|
|
- io_apic_irqs = ~0; /* all IRQs go through IOAPIC */
|
|
- else
|
|
- io_apic_irqs = ~PIC_IRQS;
|
|
+ io_apic_irqs = ~PIC_IRQS;
|
|
|
|
apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
|
|
|
|
@@ -1851,8 +1905,8 @@ static int ioapic_resume(struct sys_devi
|
|
|
|
spin_lock_irqsave(&ioapic_lock, flags);
|
|
reg_00.raw = io_apic_read(dev->id, 0);
|
|
- if (reg_00.bits.ID != mp_ioapics[dev->id].mpc_apicid) {
|
|
- reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
|
|
+ if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) {
|
|
+ reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid;
|
|
io_apic_write(dev->id, 0, reg_00.raw);
|
|
}
|
|
spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
@@ -2254,8 +2308,8 @@ int acpi_get_override_irq(int bus_irq, i
|
|
return -1;
|
|
|
|
for (i = 0; i < mp_irq_entries; i++)
|
|
- if (mp_irqs[i].mpc_irqtype == mp_INT &&
|
|
- mp_irqs[i].mpc_srcbusirq == bus_irq)
|
|
+ if (mp_irqs[i].mp_irqtype == mp_INT &&
|
|
+ mp_irqs[i].mp_srcbusirq == bus_irq)
|
|
break;
|
|
if (i >= mp_irq_entries)
|
|
return -1;
|
|
@@ -2349,7 +2403,7 @@ void __init ioapic_init_mappings(void)
|
|
ioapic_res = ioapic_setup_resources();
|
|
for (i = 0; i < nr_ioapics; i++) {
|
|
if (smp_found_config) {
|
|
- ioapic_phys = mp_ioapics[i].mpc_apicaddr;
|
|
+ ioapic_phys = mp_ioapics[i].mp_apicaddr;
|
|
} else {
|
|
ioapic_phys = (unsigned long)
|
|
alloc_bootmem_pages(PAGE_SIZE);
|
|
--- head-2011-03-11.orig/arch/x86/kernel/ldt-xen.c 2011-01-31 18:01:51.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/ldt-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -20,9 +20,9 @@
|
|
#include <asm/mmu_context.h>
|
|
|
|
#ifdef CONFIG_SMP
|
|
-static void flush_ldt(void *null)
|
|
+static void flush_ldt(void *current_mm)
|
|
{
|
|
- if (current->active_mm)
|
|
+ if (current->active_mm == current_mm)
|
|
load_LDT(¤t->active_mm->context);
|
|
}
|
|
#endif
|
|
@@ -62,8 +62,6 @@ static int alloc_ldt(mm_context_t *pc, i
|
|
|
|
if (reload) {
|
|
#ifdef CONFIG_SMP
|
|
- cpumask_t mask;
|
|
-
|
|
preempt_disable();
|
|
#endif
|
|
make_pages_readonly(newldt,
|
|
@@ -71,9 +69,9 @@ static int alloc_ldt(mm_context_t *pc, i
|
|
XENFEAT_writable_descriptor_tables);
|
|
load_LDT(pc);
|
|
#ifdef CONFIG_SMP
|
|
- mask = cpumask_of_cpu(smp_processor_id());
|
|
- if (!cpus_equal(current->mm->cpu_vm_mask, mask))
|
|
- smp_call_function(flush_ldt, NULL, 1, 1);
|
|
+ if (!cpus_equal(current->mm->cpu_vm_mask,
|
|
+ cpumask_of_cpu(smp_processor_id())))
|
|
+ smp_call_function(flush_ldt, current->mm, 1);
|
|
preempt_enable();
|
|
#endif
|
|
}
|
|
--- head-2011-03-11.orig/arch/x86/kernel/machine_kexec_32.c 2011-01-31 14:54:00.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/machine_kexec_32.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -131,6 +131,8 @@ void machine_kexec_setup_load_arg(xen_ke
|
|
xki->page_list[PA_PTE_0] = __ma(kexec_pte0);
|
|
xki->page_list[PA_PTE_1] = __ma(kexec_pte1);
|
|
|
|
+ if (image->type == KEXEC_TYPE_DEFAULT)
|
|
+ xki->page_list[PA_SWAP_PAGE] = page_to_phys(image->swap_page);
|
|
}
|
|
|
|
int __init machine_kexec_setup_resources(struct resource *hypervisor,
|
|
--- head-2011-03-11.orig/arch/x86/kernel/microcode-xen.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/microcode-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -5,13 +5,14 @@
|
|
* 2006 Shaohua Li <shaohua.li@intel.com>
|
|
*
|
|
* This driver allows to upgrade microcode on Intel processors
|
|
- * belonging to IA-32 family - PentiumPro, Pentium II,
|
|
+ * belonging to IA-32 family - PentiumPro, Pentium II,
|
|
* Pentium III, Xeon, Pentium 4, etc.
|
|
*
|
|
- * Reference: Section 8.10 of Volume III, Intel Pentium 4 Manual,
|
|
- * Order Number 245472 or free download from:
|
|
- *
|
|
- * http://developer.intel.com/design/pentium4/manuals/245472.htm
|
|
+ * Reference: Section 8.11 of Volume 3a, IA-32 Intel? Architecture
|
|
+ * Software Developer's Manual
|
|
+ * Order Number 253668 or free download from:
|
|
+ *
|
|
+ * http://developer.intel.com/design/pentium4/manuals/253668.htm
|
|
*
|
|
* For more information, go to http://www.urbanmyth.org/microcode
|
|
*
|
|
@@ -26,6 +27,7 @@
|
|
#include <linux/kernel.h>
|
|
#include <linux/init.h>
|
|
#include <linux/sched.h>
|
|
+#include <linux/smp_lock.h>
|
|
#include <linux/cpumask.h>
|
|
#include <linux/module.h>
|
|
#include <linux/slab.h>
|
|
@@ -86,6 +88,7 @@ static int do_microcode_update (const vo
|
|
|
|
static int microcode_open (struct inode *unused1, struct file *unused2)
|
|
{
|
|
+ cycle_kernel_lock();
|
|
return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
|
|
}
|
|
|
|
@@ -162,7 +165,7 @@ static int request_microcode(void)
|
|
c->x86, c->x86_model, c->x86_mask);
|
|
error = request_firmware(&firmware, name, µcode_pdev->dev);
|
|
if (error) {
|
|
- pr_debug("microcode: ucode data file %s load failed\n", name);
|
|
+ pr_debug("microcode: data file %s load failed\n", name);
|
|
return error;
|
|
}
|
|
|
|
@@ -183,6 +186,9 @@ static int __init microcode_init (void)
|
|
{
|
|
int error;
|
|
|
|
+ printk(KERN_INFO
|
|
+ "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@aivazian.fsnet.co.uk>\n");
|
|
+
|
|
error = microcode_dev_init();
|
|
if (error)
|
|
return error;
|
|
@@ -195,8 +201,6 @@ static int __init microcode_init (void)
|
|
|
|
request_microcode();
|
|
|
|
- printk(KERN_INFO
|
|
- "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@aivazian.fsnet.co.uk>\n");
|
|
return 0;
|
|
}
|
|
|
|
--- head-2011-03-11.orig/arch/x86/kernel/mpparse-xen.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/mpparse-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -25,6 +25,9 @@
|
|
#include <asm/proto.h>
|
|
#include <asm/acpi.h>
|
|
#include <asm/bios_ebda.h>
|
|
+#include <asm/e820.h>
|
|
+#include <asm/trampoline.h>
|
|
+#include <asm/setup.h>
|
|
|
|
#include <mach_apic.h>
|
|
#ifdef CONFIG_X86_32
|
|
@@ -32,27 +35,10 @@
|
|
#include <mach_mpparse.h>
|
|
#endif
|
|
|
|
-/* Have we found an MP table */
|
|
-int smp_found_config;
|
|
-
|
|
-/*
|
|
- * Various Linux-internal data structures created from the
|
|
- * MP-table.
|
|
- */
|
|
-#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
|
|
-int mp_bus_id_to_type[MAX_MP_BUSSES];
|
|
-#endif
|
|
-
|
|
-DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
|
|
-int mp_bus_id_to_pci_bus[MAX_MP_BUSSES] = {[0 ... MAX_MP_BUSSES - 1] = -1 };
|
|
-
|
|
-static int mp_current_pci_id;
|
|
-
|
|
-int pic_mode;
|
|
-
|
|
-/*
|
|
- * Intel MP BIOS table parsing routines:
|
|
- */
|
|
+static void *_bus_to_virt(unsigned long ma)
|
|
+{
|
|
+ return is_ISA_range(ma, ma) ? isa_bus_to_virt(ma) : bus_to_virt(ma);
|
|
+}
|
|
|
|
/*
|
|
* Checksum an MP configuration block.
|
|
@@ -68,19 +54,7 @@ static int __init mpf_checksum(unsigned
|
|
return sum & 0xFF;
|
|
}
|
|
|
|
-#ifdef CONFIG_X86_NUMAQ
|
|
-/*
|
|
- * Have to match translation table entries to main table entries by counter
|
|
- * hence the mpc_record variable .... can't see a less disgusting way of
|
|
- * doing this ....
|
|
- */
|
|
-
|
|
-static int mpc_record;
|
|
-static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY]
|
|
- __cpuinitdata;
|
|
-#endif
|
|
-
|
|
-static void __cpuinit MP_processor_info(struct mpc_config_processor *m)
|
|
+static void __init MP_processor_info(struct mpc_config_processor *m)
|
|
{
|
|
#ifndef CONFIG_XEN
|
|
int apicid;
|
|
@@ -90,11 +64,12 @@ static void __cpuinit MP_processor_info(
|
|
disabled_cpus++;
|
|
return;
|
|
}
|
|
-#ifdef CONFIG_X86_NUMAQ
|
|
- apicid = mpc_apic_id(m, translation_table[mpc_record]);
|
|
-#else
|
|
- apicid = m->mpc_apicid;
|
|
-#endif
|
|
+
|
|
+ if (x86_quirks->mpc_apic_id)
|
|
+ apicid = x86_quirks->mpc_apic_id(m);
|
|
+ else
|
|
+ apicid = m->mpc_apicid;
|
|
+
|
|
if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
|
|
bootup_cpu = " (Bootup-CPU)";
|
|
boot_cpu_physical_apicid = m->mpc_apicid;
|
|
@@ -107,18 +82,17 @@ static void __cpuinit MP_processor_info(
|
|
#endif
|
|
}
|
|
|
|
+#ifdef CONFIG_X86_IO_APIC
|
|
static void __init MP_bus_info(struct mpc_config_bus *m)
|
|
{
|
|
char str[7];
|
|
-
|
|
memcpy(str, m->mpc_bustype, 6);
|
|
str[6] = 0;
|
|
|
|
-#ifdef CONFIG_X86_NUMAQ
|
|
- mpc_oem_bus_info(m, str, translation_table[mpc_record]);
|
|
-#else
|
|
- Dprintk("Bus #%d is %s\n", m->mpc_busid, str);
|
|
-#endif
|
|
+ if (x86_quirks->mpc_oem_bus_info)
|
|
+ x86_quirks->mpc_oem_bus_info(m, str);
|
|
+ else
|
|
+ apic_printk(APIC_VERBOSE, "Bus #%d is %s\n", m->mpc_busid, str);
|
|
|
|
#if MAX_MP_BUSSES < 256
|
|
if (m->mpc_busid >= MAX_MP_BUSSES) {
|
|
@@ -135,12 +109,10 @@ static void __init MP_bus_info(struct mp
|
|
mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
|
|
#endif
|
|
} else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) {
|
|
-#ifdef CONFIG_X86_NUMAQ
|
|
- mpc_oem_pci_bus(m, translation_table[mpc_record]);
|
|
-#endif
|
|
+ if (x86_quirks->mpc_oem_pci_bus)
|
|
+ x86_quirks->mpc_oem_pci_bus(m);
|
|
+
|
|
clear_bit(m->mpc_busid, mp_bus_not_pci);
|
|
- mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
|
|
- mp_current_pci_id++;
|
|
#if defined(CONFIG_EISA) || defined (CONFIG_MCA)
|
|
mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
|
|
} else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) {
|
|
@@ -151,6 +123,7 @@ static void __init MP_bus_info(struct mp
|
|
} else
|
|
printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str);
|
|
}
|
|
+#endif
|
|
|
|
#ifdef CONFIG_X86_IO_APIC
|
|
|
|
@@ -180,117 +153,111 @@ static void __init MP_ioapic_info(struct
|
|
if (bad_ioapic(m->mpc_apicaddr))
|
|
return;
|
|
|
|
- mp_ioapics[nr_ioapics] = *m;
|
|
+ mp_ioapics[nr_ioapics].mp_apicaddr = m->mpc_apicaddr;
|
|
+ mp_ioapics[nr_ioapics].mp_apicid = m->mpc_apicid;
|
|
+ mp_ioapics[nr_ioapics].mp_type = m->mpc_type;
|
|
+ mp_ioapics[nr_ioapics].mp_apicver = m->mpc_apicver;
|
|
+ mp_ioapics[nr_ioapics].mp_flags = m->mpc_flags;
|
|
nr_ioapics++;
|
|
}
|
|
|
|
-static void __init MP_intsrc_info(struct mpc_config_intsrc *m)
|
|
+static void print_MP_intsrc_info(struct mpc_config_intsrc *m)
|
|
{
|
|
- mp_irqs[mp_irq_entries] = *m;
|
|
- Dprintk("Int: type %d, pol %d, trig %d, bus %d,"
|
|
+ apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
|
|
" IRQ %02x, APIC ID %x, APIC INT %02x\n",
|
|
m->mpc_irqtype, m->mpc_irqflag & 3,
|
|
(m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
|
|
m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
|
|
- if (++mp_irq_entries == MAX_IRQ_SOURCES)
|
|
- panic("Max # of irq sources exceeded!!\n");
|
|
}
|
|
|
|
-#endif
|
|
-
|
|
-static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m)
|
|
+static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq)
|
|
{
|
|
- Dprintk("Lint: type %d, pol %d, trig %d, bus %d,"
|
|
- " IRQ %02x, APIC ID %x, APIC LINT %02x\n",
|
|
- m->mpc_irqtype, m->mpc_irqflag & 3,
|
|
- (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid,
|
|
- m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
|
|
+ apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
|
|
+ " IRQ %02x, APIC ID %x, APIC INT %02x\n",
|
|
+ mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3,
|
|
+ (mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus,
|
|
+ mp_irq->mp_srcbusirq, mp_irq->mp_dstapic, mp_irq->mp_dstirq);
|
|
}
|
|
|
|
-#ifdef CONFIG_X86_NUMAQ
|
|
-static void __init MP_translation_info(struct mpc_config_translation *m)
|
|
+static void __init assign_to_mp_irq(struct mpc_config_intsrc *m,
|
|
+ struct mp_config_intsrc *mp_irq)
|
|
{
|
|
- printk(KERN_INFO
|
|
- "Translation: record %d, type %d, quad %d, global %d, local %d\n",
|
|
- mpc_record, m->trans_type, m->trans_quad, m->trans_global,
|
|
- m->trans_local);
|
|
+ mp_irq->mp_dstapic = m->mpc_dstapic;
|
|
+ mp_irq->mp_type = m->mpc_type;
|
|
+ mp_irq->mp_irqtype = m->mpc_irqtype;
|
|
+ mp_irq->mp_irqflag = m->mpc_irqflag;
|
|
+ mp_irq->mp_srcbus = m->mpc_srcbus;
|
|
+ mp_irq->mp_srcbusirq = m->mpc_srcbusirq;
|
|
+ mp_irq->mp_dstirq = m->mpc_dstirq;
|
|
+}
|
|
|
|
- if (mpc_record >= MAX_MPC_ENTRY)
|
|
- printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
|
|
- else
|
|
- translation_table[mpc_record] = m; /* stash this for later */
|
|
- if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
|
|
- node_set_online(m->trans_quad);
|
|
+static void __init assign_to_mpc_intsrc(struct mp_config_intsrc *mp_irq,
|
|
+ struct mpc_config_intsrc *m)
|
|
+{
|
|
+ m->mpc_dstapic = mp_irq->mp_dstapic;
|
|
+ m->mpc_type = mp_irq->mp_type;
|
|
+ m->mpc_irqtype = mp_irq->mp_irqtype;
|
|
+ m->mpc_irqflag = mp_irq->mp_irqflag;
|
|
+ m->mpc_srcbus = mp_irq->mp_srcbus;
|
|
+ m->mpc_srcbusirq = mp_irq->mp_srcbusirq;
|
|
+ m->mpc_dstirq = mp_irq->mp_dstirq;
|
|
}
|
|
|
|
-/*
|
|
- * Read/parse the MPC oem tables
|
|
- */
|
|
+static int __init mp_irq_mpc_intsrc_cmp(struct mp_config_intsrc *mp_irq,
|
|
+ struct mpc_config_intsrc *m)
|
|
+{
|
|
+ if (mp_irq->mp_dstapic != m->mpc_dstapic)
|
|
+ return 1;
|
|
+ if (mp_irq->mp_type != m->mpc_type)
|
|
+ return 2;
|
|
+ if (mp_irq->mp_irqtype != m->mpc_irqtype)
|
|
+ return 3;
|
|
+ if (mp_irq->mp_irqflag != m->mpc_irqflag)
|
|
+ return 4;
|
|
+ if (mp_irq->mp_srcbus != m->mpc_srcbus)
|
|
+ return 5;
|
|
+ if (mp_irq->mp_srcbusirq != m->mpc_srcbusirq)
|
|
+ return 6;
|
|
+ if (mp_irq->mp_dstirq != m->mpc_dstirq)
|
|
+ return 7;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
|
|
-static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable,
|
|
- unsigned short oemsize)
|
|
+static void __init MP_intsrc_info(struct mpc_config_intsrc *m)
|
|
{
|
|
- int count = sizeof(*oemtable); /* the header size */
|
|
- unsigned char *oemptr = ((unsigned char *)oemtable) + count;
|
|
+ int i;
|
|
|
|
- mpc_record = 0;
|
|
- printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n",
|
|
- oemtable);
|
|
- if (memcmp(oemtable->oem_signature, MPC_OEM_SIGNATURE, 4)) {
|
|
- printk(KERN_WARNING
|
|
- "SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
|
|
- oemtable->oem_signature[0], oemtable->oem_signature[1],
|
|
- oemtable->oem_signature[2], oemtable->oem_signature[3]);
|
|
- return;
|
|
- }
|
|
- if (mpf_checksum((unsigned char *)oemtable, oemtable->oem_length)) {
|
|
- printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
|
|
- return;
|
|
- }
|
|
- while (count < oemtable->oem_length) {
|
|
- switch (*oemptr) {
|
|
- case MP_TRANSLATION:
|
|
- {
|
|
- struct mpc_config_translation *m =
|
|
- (struct mpc_config_translation *)oemptr;
|
|
- MP_translation_info(m);
|
|
- oemptr += sizeof(*m);
|
|
- count += sizeof(*m);
|
|
- ++mpc_record;
|
|
- break;
|
|
- }
|
|
- default:
|
|
- {
|
|
- printk(KERN_WARNING
|
|
- "Unrecognised OEM table entry type! - %d\n",
|
|
- (int)*oemptr);
|
|
- return;
|
|
- }
|
|
- }
|
|
+ print_MP_intsrc_info(m);
|
|
+
|
|
+ for (i = 0; i < mp_irq_entries; i++) {
|
|
+ if (!mp_irq_mpc_intsrc_cmp(&mp_irqs[i], m))
|
|
+ return;
|
|
}
|
|
+
|
|
+ assign_to_mp_irq(m, &mp_irqs[mp_irq_entries]);
|
|
+ if (++mp_irq_entries == MAX_IRQ_SOURCES)
|
|
+ panic("Max # of irq sources exceeded!!\n");
|
|
}
|
|
|
|
-static inline void mps_oem_check(struct mp_config_table *mpc, char *oem,
|
|
- char *productid)
|
|
+#endif
|
|
+
|
|
+static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m)
|
|
{
|
|
- if (strncmp(oem, "IBM NUMA", 8))
|
|
- printk("Warning! May not be a NUMA-Q system!\n");
|
|
- if (mpc->mpc_oemptr)
|
|
- smp_read_mpc_oem((struct mp_config_oemtable *)mpc->mpc_oemptr,
|
|
- mpc->mpc_oemsize);
|
|
+ apic_printk(APIC_VERBOSE, "Lint: type %d, pol %d, trig %d, bus %02x,"
|
|
+ " IRQ %02x, APIC ID %x, APIC LINT %02x\n",
|
|
+ m->mpc_irqtype, m->mpc_irqflag & 3,
|
|
+ (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid,
|
|
+ m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
|
|
}
|
|
-#endif /* CONFIG_X86_NUMAQ */
|
|
|
|
/*
|
|
* Read/parse the MPC
|
|
*/
|
|
|
|
-static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
|
|
+static int __init smp_check_mpc(struct mp_config_table *mpc, char *oem,
|
|
+ char *str)
|
|
{
|
|
- char str[16];
|
|
- char oem[10];
|
|
- int count = sizeof(*mpc);
|
|
- unsigned char *mpt = ((unsigned char *)mpc) + count;
|
|
|
|
if (memcmp(mpc->mpc_signature, MPC_SIGNATURE, 4)) {
|
|
printk(KERN_ERR "MPTABLE: bad signature [%c%c%c%c]!\n",
|
|
@@ -313,20 +280,44 @@ static int __init smp_read_mpc(struct mp
|
|
}
|
|
memcpy(oem, mpc->mpc_oem, 8);
|
|
oem[8] = 0;
|
|
- printk(KERN_INFO "MPTABLE: OEM ID: %s ", oem);
|
|
+ printk(KERN_INFO "MPTABLE: OEM ID: %s\n", oem);
|
|
|
|
memcpy(str, mpc->mpc_productid, 12);
|
|
str[12] = 0;
|
|
- printk("Product ID: %s ", str);
|
|
|
|
-#ifdef CONFIG_X86_32
|
|
- mps_oem_check(mpc, oem, str);
|
|
-#endif
|
|
- printk(KERN_INFO "MPTABLE: Product ID: %s ", str);
|
|
+ printk(KERN_INFO "MPTABLE: Product ID: %s\n", str);
|
|
|
|
#ifndef CONFIG_XEN
|
|
printk(KERN_INFO "MPTABLE: APIC at: 0x%X\n", mpc->mpc_lapic);
|
|
+#endif
|
|
+
|
|
+ return 1;
|
|
+}
|
|
+
|
|
+static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
|
|
+{
|
|
+ char str[16];
|
|
+ char oem[10];
|
|
+
|
|
+ int count = sizeof(*mpc);
|
|
+ unsigned char *mpt = ((unsigned char *)mpc) + count;
|
|
+
|
|
+ if (!smp_check_mpc(mpc, oem, str))
|
|
+ return 0;
|
|
|
|
+#ifdef CONFIG_X86_32
|
|
+ /*
|
|
+ * need to make sure summit and es7000's mps_oem_check is safe to be
|
|
+ * called early via genericarch 's mps_oem_check
|
|
+ */
|
|
+ if (early) {
|
|
+#ifdef CONFIG_X86_NUMAQ
|
|
+ numaq_mps_oem_check(mpc, oem, str);
|
|
+#endif
|
|
+ } else
|
|
+ mps_oem_check(mpc, oem, str);
|
|
+#endif
|
|
+#ifndef CONFIG_XEN
|
|
/* save the local APIC address, it might be non-default */
|
|
if (!acpi_lapic)
|
|
mp_lapic_addr = mpc->mpc_lapic;
|
|
@@ -335,12 +326,17 @@ static int __init smp_read_mpc(struct mp
|
|
if (early)
|
|
return 1;
|
|
|
|
+ if (mpc->mpc_oemptr && x86_quirks->smp_read_mpc_oem) {
|
|
+ struct mp_config_oemtable *oem_table = (struct mp_config_oemtable *)(unsigned long)mpc->mpc_oemptr;
|
|
+ x86_quirks->smp_read_mpc_oem(oem_table, mpc->mpc_oemsize);
|
|
+ }
|
|
+
|
|
/*
|
|
* Now process the configuration blocks.
|
|
*/
|
|
-#ifdef CONFIG_X86_NUMAQ
|
|
- mpc_record = 0;
|
|
-#endif
|
|
+ if (x86_quirks->mpc_record)
|
|
+ *x86_quirks->mpc_record = 0;
|
|
+
|
|
while (count < mpc->mpc_length) {
|
|
switch (*mpt) {
|
|
case MP_PROCESSOR:
|
|
@@ -358,7 +354,9 @@ static int __init smp_read_mpc(struct mp
|
|
{
|
|
struct mpc_config_bus *m =
|
|
(struct mpc_config_bus *)mpt;
|
|
+#ifdef CONFIG_X86_IO_APIC
|
|
MP_bus_info(m);
|
|
+#endif
|
|
mpt += sizeof(*m);
|
|
count += sizeof(*m);
|
|
break;
|
|
@@ -404,10 +402,14 @@ static int __init smp_read_mpc(struct mp
|
|
count = mpc->mpc_length;
|
|
break;
|
|
}
|
|
-#ifdef CONFIG_X86_NUMAQ
|
|
- ++mpc_record;
|
|
-#endif
|
|
+ if (x86_quirks->mpc_record)
|
|
+ (*x86_quirks->mpc_record)++;
|
|
}
|
|
+
|
|
+#ifdef CONFIG_X86_GENERICARCH
|
|
+ generic_bigsmp_probe();
|
|
+#endif
|
|
+
|
|
setup_apic_routing();
|
|
if (!num_processors)
|
|
printk(KERN_ERR "MPTABLE: no processors registered!\n");
|
|
@@ -433,7 +435,7 @@ static void __init construct_default_ioi
|
|
intsrc.mpc_type = MP_INTSRC;
|
|
intsrc.mpc_irqflag = 0; /* conforming */
|
|
intsrc.mpc_srcbus = 0;
|
|
- intsrc.mpc_dstapic = mp_ioapics[0].mpc_apicid;
|
|
+ intsrc.mpc_dstapic = mp_ioapics[0].mp_apicid;
|
|
|
|
intsrc.mpc_irqtype = mp_INT;
|
|
|
|
@@ -494,42 +496,11 @@ static void __init construct_default_ioi
|
|
MP_intsrc_info(&intsrc);
|
|
}
|
|
|
|
-#endif
|
|
|
|
-static inline void __init construct_default_ISA_mptable(int mpc_default_type)
|
|
+static void __init construct_ioapic_table(int mpc_default_type)
|
|
{
|
|
- struct mpc_config_processor processor;
|
|
- struct mpc_config_bus bus;
|
|
-#ifdef CONFIG_X86_IO_APIC
|
|
struct mpc_config_ioapic ioapic;
|
|
-#endif
|
|
- struct mpc_config_lintsrc lintsrc;
|
|
- int linttypes[2] = { mp_ExtINT, mp_NMI };
|
|
- int i;
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
- /*
|
|
- * local APIC has default address
|
|
- */
|
|
- mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
|
|
-#endif
|
|
-
|
|
- /*
|
|
- * 2 CPUs, numbered 0 & 1.
|
|
- */
|
|
- processor.mpc_type = MP_PROCESSOR;
|
|
- /* Either an integrated APIC or a discrete 82489DX. */
|
|
- processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
|
|
- processor.mpc_cpuflag = CPU_ENABLED;
|
|
- processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
|
|
- (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
|
|
- processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
|
|
- processor.mpc_reserved[0] = 0;
|
|
- processor.mpc_reserved[1] = 0;
|
|
- for (i = 0; i < 2; i++) {
|
|
- processor.mpc_apicid = i;
|
|
- MP_processor_info(&processor);
|
|
- }
|
|
+ struct mpc_config_bus bus;
|
|
|
|
bus.mpc_type = MP_BUS;
|
|
bus.mpc_busid = 0;
|
|
@@ -558,7 +529,6 @@ static inline void __init construct_defa
|
|
MP_bus_info(&bus);
|
|
}
|
|
|
|
-#ifdef CONFIG_X86_IO_APIC
|
|
ioapic.mpc_type = MP_IOAPIC;
|
|
ioapic.mpc_apicid = 2;
|
|
ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
|
|
@@ -570,7 +540,44 @@ static inline void __init construct_defa
|
|
* We set up most of the low 16 IO-APIC pins according to MPS rules.
|
|
*/
|
|
construct_default_ioirq_mptable(mpc_default_type);
|
|
+}
|
|
+#else
|
|
+static inline void __init construct_ioapic_table(int mpc_default_type) { }
|
|
+#endif
|
|
+
|
|
+static inline void __init construct_default_ISA_mptable(int mpc_default_type)
|
|
+{
|
|
+ struct mpc_config_processor processor;
|
|
+ struct mpc_config_lintsrc lintsrc;
|
|
+ int linttypes[2] = { mp_ExtINT, mp_NMI };
|
|
+ int i;
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+ /*
|
|
+ * local APIC has default address
|
|
+ */
|
|
+ mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
|
|
#endif
|
|
+
|
|
+ /*
|
|
+ * 2 CPUs, numbered 0 & 1.
|
|
+ */
|
|
+ processor.mpc_type = MP_PROCESSOR;
|
|
+ /* Either an integrated APIC or a discrete 82489DX. */
|
|
+ processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
|
|
+ processor.mpc_cpuflag = CPU_ENABLED;
|
|
+ processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
|
|
+ (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
|
|
+ processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
|
|
+ processor.mpc_reserved[0] = 0;
|
|
+ processor.mpc_reserved[1] = 0;
|
|
+ for (i = 0; i < 2; i++) {
|
|
+ processor.mpc_apicid = i;
|
|
+ MP_processor_info(&processor);
|
|
+ }
|
|
+
|
|
+ construct_ioapic_table(mpc_default_type);
|
|
+
|
|
lintsrc.mpc_type = MP_LINTSRC;
|
|
lintsrc.mpc_irqflag = 0; /* conforming */
|
|
lintsrc.mpc_srcbusid = 0;
|
|
@@ -589,7 +596,7 @@ static struct intel_mp_floating *mpf_fou
|
|
* Scan the memory blocks for an SMP configuration block.
|
|
*/
|
|
#ifndef CONFIG_XEN
|
|
-static void __init __get_smp_config(unsigned early)
|
|
+static void __init __get_smp_config(unsigned int early)
|
|
#else
|
|
void __init get_smp_config(void)
|
|
#define early 0
|
|
@@ -597,6 +604,10 @@ void __init get_smp_config(void)
|
|
{
|
|
struct intel_mp_floating *mpf = mpf_found;
|
|
|
|
+ if (x86_quirks->mach_get_smp_config) {
|
|
+ if (x86_quirks->mach_get_smp_config(early))
|
|
+ return;
|
|
+ }
|
|
if (acpi_lapic && early)
|
|
return;
|
|
/*
|
|
@@ -613,7 +624,7 @@ void __init get_smp_config(void)
|
|
|
|
printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n",
|
|
mpf->mpf_specification);
|
|
-#ifdef CONFIG_X86_32
|
|
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
|
|
if (mpf->mpf_feature2 & (1 << 7)) {
|
|
printk(KERN_INFO " IMCR and PIC compatibility mode.\n");
|
|
pic_mode = 1;
|
|
@@ -646,8 +657,10 @@ void __init get_smp_config(void)
|
|
* Read the physical hardware table. Anything here will
|
|
* override the defaults.
|
|
*/
|
|
- if (!smp_read_mpc(isa_bus_to_virt(mpf->mpf_physptr), early)) {
|
|
+ if (!smp_read_mpc(_bus_to_virt(mpf->mpf_physptr), early)) {
|
|
+#ifdef CONFIG_X86_LOCAL_APIC
|
|
smp_found_config = 0;
|
|
+#endif
|
|
printk(KERN_ERR
|
|
"BIOS bug, MP table errors detected!...\n");
|
|
printk(KERN_ERR "... disabling SMP support. "
|
|
@@ -704,10 +717,11 @@ void __init get_smp_config(void)
|
|
static int __init smp_scan_config(unsigned long base, unsigned long length,
|
|
unsigned reserve)
|
|
{
|
|
- unsigned int *bp = isa_bus_to_virt(base);
|
|
+ unsigned int *bp = _bus_to_virt(base);
|
|
struct intel_mp_floating *mpf;
|
|
|
|
- Dprintk("Scan SMP from %p for %ld bytes.\n", bp, length);
|
|
+ apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n",
|
|
+ bp, length);
|
|
BUILD_BUG_ON(sizeof(*mpf) != 16);
|
|
|
|
while (length > 0) {
|
|
@@ -717,16 +731,22 @@ static int __init smp_scan_config(unsign
|
|
!mpf_checksum((unsigned char *)bp, 16) &&
|
|
((mpf->mpf_specification == 1)
|
|
|| (mpf->mpf_specification == 4))) {
|
|
-
|
|
+#ifdef CONFIG_X86_LOCAL_APIC
|
|
smp_found_config = 1;
|
|
+#endif
|
|
mpf_found = mpf;
|
|
-#ifdef CONFIG_X86_32
|
|
+
|
|
#ifndef CONFIG_XEN
|
|
printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n",
|
|
mpf, virt_to_phys(mpf));
|
|
- reserve_bootmem(virt_to_phys(mpf), PAGE_SIZE,
|
|
+
|
|
+ if (!reserve)
|
|
+ return 1;
|
|
+ reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE,
|
|
BOOTMEM_DEFAULT);
|
|
if (mpf->mpf_physptr) {
|
|
+ unsigned long size = PAGE_SIZE;
|
|
+#ifdef CONFIG_X86_32
|
|
/*
|
|
* We cannot access to MPC table to compute
|
|
* table size yet, as only few megabytes from
|
|
@@ -736,27 +756,18 @@ static int __init smp_scan_config(unsign
|
|
* PAGE_SIZE from mpg->mpf_physptr yields BUG()
|
|
* in reserve_bootmem.
|
|
*/
|
|
- unsigned long size = PAGE_SIZE;
|
|
unsigned long end = max_low_pfn * PAGE_SIZE;
|
|
if (mpf->mpf_physptr + size > end)
|
|
size = end - mpf->mpf_physptr;
|
|
- reserve_bootmem(mpf->mpf_physptr, size,
|
|
+#endif
|
|
+ reserve_bootmem_generic(mpf->mpf_physptr, size,
|
|
BOOTMEM_DEFAULT);
|
|
}
|
|
#else
|
|
printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n",
|
|
- mpf, ((void *)bp - isa_bus_to_virt(base)) + base);
|
|
-#endif
|
|
-#elif !defined(CONFIG_XEN)
|
|
- if (!reserve)
|
|
- return 1;
|
|
-
|
|
- reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE);
|
|
- if (mpf->mpf_physptr)
|
|
- reserve_bootmem_generic(mpf->mpf_physptr,
|
|
- PAGE_SIZE);
|
|
+ mpf, ((void *)bp - _bus_to_virt(base)) + base);
|
|
#endif
|
|
- return 1;
|
|
+ return 1;
|
|
}
|
|
bp += 4;
|
|
length -= 16;
|
|
@@ -764,12 +775,16 @@ static int __init smp_scan_config(unsign
|
|
return 0;
|
|
}
|
|
|
|
-static void __init __find_smp_config(unsigned reserve)
|
|
+static void __init __find_smp_config(unsigned int reserve)
|
|
{
|
|
#ifndef CONFIG_XEN
|
|
unsigned int address;
|
|
#endif
|
|
|
|
+ if (x86_quirks->mach_find_smp_config) {
|
|
+ if (x86_quirks->mach_find_smp_config(reserve))
|
|
+ return;
|
|
+ }
|
|
/*
|
|
* FIXME: Linux assumes you have 640K of base ram..
|
|
* this continues the error...
|
|
@@ -816,302 +831,297 @@ void __init find_smp_config(void)
|
|
__find_smp_config(1);
|
|
}
|
|
|
|
-/* --------------------------------------------------------------------------
|
|
- ACPI-based MP Configuration
|
|
- -------------------------------------------------------------------------- */
|
|
-
|
|
-/*
|
|
- * Keep this outside and initialized to 0, for !CONFIG_ACPI builds:
|
|
- */
|
|
-int es7000_plat;
|
|
-
|
|
-#ifdef CONFIG_ACPI
|
|
+#ifdef CONFIG_X86_IO_APIC
|
|
+static u8 __initdata irq_used[MAX_IRQ_SOURCES];
|
|
|
|
-#ifdef CONFIG_X86_IO_APIC
|
|
+static int __init get_MP_intsrc_index(struct mpc_config_intsrc *m)
|
|
+{
|
|
+ int i;
|
|
|
|
-#define MP_ISA_BUS 0
|
|
+ if (m->mpc_irqtype != mp_INT)
|
|
+ return 0;
|
|
|
|
-extern struct mp_ioapic_routing mp_ioapic_routing[MAX_IO_APICS];
|
|
+ if (m->mpc_irqflag != 0x0f)
|
|
+ return 0;
|
|
|
|
-static int mp_find_ioapic(int gsi)
|
|
-{
|
|
- int i = 0;
|
|
+ /* not legacy */
|
|
|
|
- /* Find the IOAPIC that manages this GSI. */
|
|
- for (i = 0; i < nr_ioapics; i++) {
|
|
- if ((gsi >= mp_ioapic_routing[i].gsi_base)
|
|
- && (gsi <= mp_ioapic_routing[i].gsi_end))
|
|
- return i;
|
|
+ for (i = 0; i < mp_irq_entries; i++) {
|
|
+ if (mp_irqs[i].mp_irqtype != mp_INT)
|
|
+ continue;
|
|
+
|
|
+ if (mp_irqs[i].mp_irqflag != 0x0f)
|
|
+ continue;
|
|
+
|
|
+ if (mp_irqs[i].mp_srcbus != m->mpc_srcbus)
|
|
+ continue;
|
|
+ if (mp_irqs[i].mp_srcbusirq != m->mpc_srcbusirq)
|
|
+ continue;
|
|
+ if (irq_used[i]) {
|
|
+ /* already claimed */
|
|
+ return -2;
|
|
+ }
|
|
+ irq_used[i] = 1;
|
|
+ return i;
|
|
}
|
|
|
|
- printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
|
|
+ /* not found */
|
|
return -1;
|
|
}
|
|
|
|
-static u8 __init uniq_ioapic_id(u8 id)
|
|
-{
|
|
-#ifdef CONFIG_X86_32
|
|
-#ifndef CONFIG_XEN
|
|
- if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
|
|
- !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
|
|
- return io_apic_get_unique_id(nr_ioapics, id);
|
|
- else
|
|
-#endif
|
|
- return id;
|
|
-#else
|
|
- int i;
|
|
- DECLARE_BITMAP(used, 256);
|
|
- bitmap_zero(used, 256);
|
|
- for (i = 0; i < nr_ioapics; i++) {
|
|
- struct mpc_config_ioapic *ia = &mp_ioapics[i];
|
|
- __set_bit(ia->mpc_apicid, used);
|
|
- }
|
|
- if (!test_bit(id, used))
|
|
- return id;
|
|
- return find_first_zero_bit(used, 256);
|
|
+#define SPARE_SLOT_NUM 20
|
|
+
|
|
+static struct mpc_config_intsrc __initdata *m_spare[SPARE_SLOT_NUM];
|
|
#endif
|
|
-}
|
|
|
|
-void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
|
|
+static int __init replace_intsrc_all(struct mp_config_table *mpc,
|
|
+ unsigned long mpc_new_phys,
|
|
+ unsigned long mpc_new_length)
|
|
{
|
|
- int idx = 0;
|
|
-
|
|
- if (bad_ioapic(address))
|
|
- return;
|
|
+#ifdef CONFIG_X86_IO_APIC
|
|
+ int i;
|
|
+ int nr_m_spare = 0;
|
|
+#endif
|
|
|
|
- idx = nr_ioapics;
|
|
+ int count = sizeof(*mpc);
|
|
+ unsigned char *mpt = ((unsigned char *)mpc) + count;
|
|
|
|
- mp_ioapics[idx].mpc_type = MP_IOAPIC;
|
|
- mp_ioapics[idx].mpc_flags = MPC_APIC_USABLE;
|
|
- mp_ioapics[idx].mpc_apicaddr = address;
|
|
+ printk(KERN_INFO "mpc_length %x\n", mpc->mpc_length);
|
|
+ while (count < mpc->mpc_length) {
|
|
+ switch (*mpt) {
|
|
+ case MP_PROCESSOR:
|
|
+ {
|
|
+ struct mpc_config_processor *m =
|
|
+ (struct mpc_config_processor *)mpt;
|
|
+ mpt += sizeof(*m);
|
|
+ count += sizeof(*m);
|
|
+ break;
|
|
+ }
|
|
+ case MP_BUS:
|
|
+ {
|
|
+ struct mpc_config_bus *m =
|
|
+ (struct mpc_config_bus *)mpt;
|
|
+ mpt += sizeof(*m);
|
|
+ count += sizeof(*m);
|
|
+ break;
|
|
+ }
|
|
+ case MP_IOAPIC:
|
|
+ {
|
|
+ mpt += sizeof(struct mpc_config_ioapic);
|
|
+ count += sizeof(struct mpc_config_ioapic);
|
|
+ break;
|
|
+ }
|
|
+ case MP_INTSRC:
|
|
+ {
|
|
+#ifdef CONFIG_X86_IO_APIC
|
|
+ struct mpc_config_intsrc *m =
|
|
+ (struct mpc_config_intsrc *)mpt;
|
|
|
|
-#ifndef CONFIG_XEN
|
|
- set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
|
|
+ apic_printk(APIC_VERBOSE, "OLD ");
|
|
+ print_MP_intsrc_info(m);
|
|
+ i = get_MP_intsrc_index(m);
|
|
+ if (i > 0) {
|
|
+ assign_to_mpc_intsrc(&mp_irqs[i], m);
|
|
+ apic_printk(APIC_VERBOSE, "NEW ");
|
|
+ print_mp_irq_info(&mp_irqs[i]);
|
|
+ } else if (!i) {
|
|
+ /* legacy, do nothing */
|
|
+ } else if (nr_m_spare < SPARE_SLOT_NUM) {
|
|
+ /*
|
|
+ * not found (-1), or duplicated (-2)
|
|
+ * are invalid entries,
|
|
+ * we need to use the slot later
|
|
+ */
|
|
+ m_spare[nr_m_spare] = m;
|
|
+ nr_m_spare++;
|
|
+ }
|
|
#endif
|
|
- mp_ioapics[idx].mpc_apicid = uniq_ioapic_id(id);
|
|
-#ifdef CONFIG_X86_32
|
|
- mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
|
|
-#else
|
|
- mp_ioapics[idx].mpc_apicver = 0;
|
|
+ mpt += sizeof(struct mpc_config_intsrc);
|
|
+ count += sizeof(struct mpc_config_intsrc);
|
|
+ break;
|
|
+ }
|
|
+ case MP_LINTSRC:
|
|
+ {
|
|
+ struct mpc_config_lintsrc *m =
|
|
+ (struct mpc_config_lintsrc *)mpt;
|
|
+ mpt += sizeof(*m);
|
|
+ count += sizeof(*m);
|
|
+ break;
|
|
+ }
|
|
+ default:
|
|
+ /* wrong mptable */
|
|
+ printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n");
|
|
+ printk(KERN_ERR "type %x\n", *mpt);
|
|
+ print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16,
|
|
+ 1, mpc, mpc->mpc_length, 1);
|
|
+ goto out;
|
|
+ }
|
|
+ }
|
|
+
|
|
+#ifdef CONFIG_X86_IO_APIC
|
|
+ for (i = 0; i < mp_irq_entries; i++) {
|
|
+ if (irq_used[i])
|
|
+ continue;
|
|
+
|
|
+ if (mp_irqs[i].mp_irqtype != mp_INT)
|
|
+ continue;
|
|
+
|
|
+ if (mp_irqs[i].mp_irqflag != 0x0f)
|
|
+ continue;
|
|
+
|
|
+ if (nr_m_spare > 0) {
|
|
+ apic_printk(APIC_VERBOSE, "*NEW* found\n");
|
|
+ nr_m_spare--;
|
|
+ assign_to_mpc_intsrc(&mp_irqs[i], m_spare[nr_m_spare]);
|
|
+ m_spare[nr_m_spare] = NULL;
|
|
+ } else {
|
|
+ struct mpc_config_intsrc *m =
|
|
+ (struct mpc_config_intsrc *)mpt;
|
|
+ count += sizeof(struct mpc_config_intsrc);
|
|
+ if (!mpc_new_phys) {
|
|
+ printk(KERN_INFO "No spare slots, try to append...take your risk, new mpc_length %x\n", count);
|
|
+ } else {
|
|
+ if (count <= mpc_new_length)
|
|
+ printk(KERN_INFO "No spare slots, try to append..., new mpc_length %x\n", count);
|
|
+ else {
|
|
+ printk(KERN_ERR "mpc_new_length %lx is too small\n", mpc_new_length);
|
|
+ goto out;
|
|
+ }
|
|
+ }
|
|
+ assign_to_mpc_intsrc(&mp_irqs[i], m);
|
|
+ mpc->mpc_length = count;
|
|
+ mpt += sizeof(struct mpc_config_intsrc);
|
|
+ }
|
|
+ print_mp_irq_info(&mp_irqs[i]);
|
|
+ }
|
|
#endif
|
|
- /*
|
|
- * Build basic GSI lookup table to facilitate gsi->io_apic lookups
|
|
- * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
|
|
- */
|
|
- mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mpc_apicid;
|
|
- mp_ioapic_routing[idx].gsi_base = gsi_base;
|
|
- mp_ioapic_routing[idx].gsi_end = gsi_base +
|
|
- io_apic_get_redir_entries(idx);
|
|
-
|
|
- printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
|
|
- "GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid,
|
|
- mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
|
|
- mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end);
|
|
+out:
|
|
+ /* update checksum */
|
|
+ mpc->mpc_checksum = 0;
|
|
+ mpc->mpc_checksum -= mpf_checksum((unsigned char *)mpc,
|
|
+ mpc->mpc_length);
|
|
|
|
- nr_ioapics++;
|
|
+ return 0;
|
|
}
|
|
|
|
-void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
|
|
-{
|
|
- struct mpc_config_intsrc intsrc;
|
|
- int ioapic = -1;
|
|
- int pin = -1;
|
|
-
|
|
- /*
|
|
- * Convert 'gsi' to 'ioapic.pin'.
|
|
- */
|
|
- ioapic = mp_find_ioapic(gsi);
|
|
- if (ioapic < 0)
|
|
- return;
|
|
- pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
|
|
+static int __initdata enable_update_mptable;
|
|
|
|
- /*
|
|
- * TBD: This check is for faulty timer entries, where the override
|
|
- * erroneously sets the trigger to level, resulting in a HUGE
|
|
- * increase of timer interrupts!
|
|
- */
|
|
- if ((bus_irq == 0) && (trigger == 3))
|
|
- trigger = 1;
|
|
+static int __init update_mptable_setup(char *str)
|
|
+{
|
|
+ enable_update_mptable = 1;
|
|
+ return 0;
|
|
+}
|
|
+early_param("update_mptable", update_mptable_setup);
|
|
|
|
- intsrc.mpc_type = MP_INTSRC;
|
|
- intsrc.mpc_irqtype = mp_INT;
|
|
- intsrc.mpc_irqflag = (trigger << 2) | polarity;
|
|
- intsrc.mpc_srcbus = MP_ISA_BUS;
|
|
- intsrc.mpc_srcbusirq = bus_irq; /* IRQ */
|
|
- intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid; /* APIC ID */
|
|
- intsrc.mpc_dstirq = pin; /* INTIN# */
|
|
+static unsigned long __initdata mpc_new_phys;
|
|
+static unsigned long mpc_new_length __initdata = 4096;
|
|
|
|
- MP_intsrc_info(&intsrc);
|
|
+/* alloc_mptable or alloc_mptable=4k */
|
|
+static int __initdata alloc_mptable;
|
|
+static int __init parse_alloc_mptable_opt(char *p)
|
|
+{
|
|
+ enable_update_mptable = 1;
|
|
+ alloc_mptable = 1;
|
|
+ if (!p)
|
|
+ return 0;
|
|
+ mpc_new_length = memparse(p, &p);
|
|
+ return 0;
|
|
}
|
|
+early_param("alloc_mptable", parse_alloc_mptable_opt);
|
|
|
|
-void __init mp_config_acpi_legacy_irqs(void)
|
|
+void __init early_reserve_e820_mpc_new(void)
|
|
{
|
|
- struct mpc_config_intsrc intsrc;
|
|
- int i = 0;
|
|
- int ioapic = -1;
|
|
-
|
|
-#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
|
|
- /*
|
|
- * Fabricate the legacy ISA bus (bus #31).
|
|
- */
|
|
- mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
|
|
-#endif
|
|
- set_bit(MP_ISA_BUS, mp_bus_not_pci);
|
|
- Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
|
|
-
|
|
- /*
|
|
- * Older generations of ES7000 have no legacy identity mappings
|
|
- */
|
|
- if (es7000_plat == 1)
|
|
- return;
|
|
-
|
|
- /*
|
|
- * Locate the IOAPIC that manages the ISA IRQs (0-15).
|
|
- */
|
|
- ioapic = mp_find_ioapic(0);
|
|
- if (ioapic < 0)
|
|
- return;
|
|
-
|
|
- intsrc.mpc_type = MP_INTSRC;
|
|
- intsrc.mpc_irqflag = 0; /* Conforming */
|
|
- intsrc.mpc_srcbus = MP_ISA_BUS;
|
|
-#ifdef CONFIG_X86_IO_APIC
|
|
- intsrc.mpc_dstapic = mp_ioapics[ioapic].mpc_apicid;
|
|
+ if (enable_update_mptable && alloc_mptable) {
|
|
+ u64 startt = 0;
|
|
+#ifdef CONFIG_X86_TRAMPOLINE
|
|
+ startt = TRAMPOLINE_BASE;
|
|
#endif
|
|
- /*
|
|
- * Use the default configuration for the IRQs 0-15. Unless
|
|
- * overridden by (MADT) interrupt source override entries.
|
|
- */
|
|
- for (i = 0; i < 16; i++) {
|
|
- int idx;
|
|
-
|
|
- for (idx = 0; idx < mp_irq_entries; idx++) {
|
|
- struct mpc_config_intsrc *irq = mp_irqs + idx;
|
|
-
|
|
- /* Do we already have a mapping for this ISA IRQ? */
|
|
- if (irq->mpc_srcbus == MP_ISA_BUS
|
|
- && irq->mpc_srcbusirq == i)
|
|
- break;
|
|
-
|
|
- /* Do we already have a mapping for this IOAPIC pin */
|
|
- if ((irq->mpc_dstapic == intsrc.mpc_dstapic) &&
|
|
- (irq->mpc_dstirq == i))
|
|
- break;
|
|
- }
|
|
-
|
|
- if (idx != mp_irq_entries) {
|
|
- printk(KERN_DEBUG "ACPI: IRQ%d used by override.\n", i);
|
|
- continue; /* IRQ already used */
|
|
- }
|
|
-
|
|
- intsrc.mpc_irqtype = mp_INT;
|
|
- intsrc.mpc_srcbusirq = i; /* Identity mapped */
|
|
- intsrc.mpc_dstirq = i;
|
|
-
|
|
- MP_intsrc_info(&intsrc);
|
|
+ mpc_new_phys = early_reserve_e820(startt, mpc_new_length, 4);
|
|
}
|
|
}
|
|
|
|
-int mp_register_gsi(u32 gsi, int triggering, int polarity)
|
|
+static int __init update_mp_table(void)
|
|
{
|
|
- int ioapic;
|
|
- int ioapic_pin;
|
|
-#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
|
|
-#define MAX_GSI_NUM 4096
|
|
-#define IRQ_COMPRESSION_START 64
|
|
+ char str[16];
|
|
+ char oem[10];
|
|
+ struct intel_mp_floating *mpf;
|
|
+ struct mp_config_table *mpc;
|
|
+ struct mp_config_table *mpc_new;
|
|
+
|
|
+ if (!enable_update_mptable)
|
|
+ return 0;
|
|
+
|
|
+ mpf = mpf_found;
|
|
+ if (!mpf)
|
|
+ return 0;
|
|
|
|
- static int pci_irq = IRQ_COMPRESSION_START;
|
|
/*
|
|
- * Mapping between Global System Interrupts, which
|
|
- * represent all possible interrupts, and IRQs
|
|
- * assigned to actual devices.
|
|
+ * Now see if we need to go further.
|
|
*/
|
|
- static int gsi_to_irq[MAX_GSI_NUM];
|
|
-#else
|
|
+ if (mpf->mpf_feature1 != 0)
|
|
+ return 0;
|
|
|
|
- if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
|
|
- return gsi;
|
|
-#endif
|
|
+ if (!mpf->mpf_physptr)
|
|
+ return 0;
|
|
|
|
- /* Don't set up the ACPI SCI because it's already set up */
|
|
- if (acpi_gbl_FADT.sci_interrupt == gsi)
|
|
- return gsi;
|
|
+ mpc = _bus_to_virt(mpf->mpf_physptr);
|
|
|
|
- ioapic = mp_find_ioapic(gsi);
|
|
- if (ioapic < 0) {
|
|
- printk(KERN_WARNING "No IOAPIC for GSI %u\n", gsi);
|
|
- return gsi;
|
|
- }
|
|
+ if (!smp_check_mpc(mpc, oem, str))
|
|
+ return 0;
|
|
|
|
- ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base;
|
|
+ printk(KERN_INFO "mpf: %lx\n", (long)arbitrary_virt_to_machine(mpf));
|
|
+ printk(KERN_INFO "mpf_physptr: %x\n", mpf->mpf_physptr);
|
|
|
|
-#ifndef CONFIG_X86_32
|
|
- if (ioapic_renumber_irq)
|
|
- gsi = ioapic_renumber_irq(ioapic, gsi);
|
|
-#endif
|
|
+ if (mpc_new_phys && mpc->mpc_length > mpc_new_length) {
|
|
+ mpc_new_phys = 0;
|
|
+ printk(KERN_INFO "mpc_new_length is %ld, please use alloc_mptable=8k\n",
|
|
+ mpc_new_length);
|
|
+ }
|
|
+
|
|
+ if (!mpc_new_phys) {
|
|
+ unsigned char old, new;
|
|
+ /* check if we can change the postion */
|
|
+ mpc->mpc_checksum = 0;
|
|
+ old = mpf_checksum((unsigned char *)mpc, mpc->mpc_length);
|
|
+ mpc->mpc_checksum = 0xff;
|
|
+ new = mpf_checksum((unsigned char *)mpc, mpc->mpc_length);
|
|
+ if (old == new) {
|
|
+ printk(KERN_INFO "mpc is readonly, please try alloc_mptable instead\n");
|
|
+ return 0;
|
|
+ }
|
|
+ printk(KERN_INFO "use in-positon replacing\n");
|
|
+ } else {
|
|
+ maddr_t mpc_new_bus;
|
|
|
|
- /*
|
|
- * Avoid pin reprogramming. PRTs typically include entries
|
|
- * with redundant pin->gsi mappings (but unique PCI devices);
|
|
- * we only program the IOAPIC on the first.
|
|
- */
|
|
- if (ioapic_pin > MP_MAX_IOAPIC_PIN) {
|
|
- printk(KERN_ERR "Invalid reference to IOAPIC pin "
|
|
- "%d-%d\n", mp_ioapic_routing[ioapic].apic_id,
|
|
- ioapic_pin);
|
|
- return gsi;
|
|
- }
|
|
- if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) {
|
|
- Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
|
|
- mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
|
|
-#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
|
|
- return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]);
|
|
-#else
|
|
- return gsi;
|
|
-#endif
|
|
+ mpc_new_bus = phys_to_machine(mpc_new_phys);
|
|
+ mpf->mpf_physptr = mpc_new_bus;
|
|
+ mpc_new = phys_to_virt(mpc_new_phys);
|
|
+ memcpy(mpc_new, mpc, mpc->mpc_length);
|
|
+ mpc = mpc_new;
|
|
+ /* check if we can modify that */
|
|
+ if (mpc_new_bus - mpf->mpf_physptr) {
|
|
+ struct intel_mp_floating *mpf_new;
|
|
+ /* steal 16 bytes from [0, 1k) */
|
|
+ printk(KERN_INFO "mpf new: %x\n", 0x400 - 16);
|
|
+ mpf_new = isa_bus_to_virt(0x400 - 16);
|
|
+ memcpy(mpf_new, mpf, 16);
|
|
+ mpf = mpf_new;
|
|
+ mpf->mpf_physptr = mpc_new_bus;
|
|
+ }
|
|
+ mpf->mpf_checksum = 0;
|
|
+ mpf->mpf_checksum -= mpf_checksum((unsigned char *)mpf, 16);
|
|
+ printk(KERN_INFO "mpf_physptr new: %x\n", mpf->mpf_physptr);
|
|
}
|
|
|
|
- set_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed);
|
|
-#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
|
|
/*
|
|
- * For GSI >= 64, use IRQ compression
|
|
+ * only replace the one with mp_INT and
|
|
+ * MP_IRQ_TRIGGER_LEVEL|MP_IRQ_POLARITY_LOW,
|
|
+ * already in mp_irqs , stored by ... and mp_config_acpi_gsi,
|
|
+ * may need pci=routeirq for all coverage
|
|
*/
|
|
- if ((gsi >= IRQ_COMPRESSION_START)
|
|
- && (triggering == ACPI_LEVEL_SENSITIVE)) {
|
|
- /*
|
|
- * For PCI devices assign IRQs in order, avoiding gaps
|
|
- * due to unused I/O APIC pins.
|
|
- */
|
|
- int irq = gsi;
|
|
- if (gsi < MAX_GSI_NUM) {
|
|
- /*
|
|
- * Retain the VIA chipset work-around (gsi > 15), but
|
|
- * avoid a problem where the 8254 timer (IRQ0) is setup
|
|
- * via an override (so it's not on pin 0 of the ioapic),
|
|
- * and at the same time, the pin 0 interrupt is a PCI
|
|
- * type. The gsi > 15 test could cause these two pins
|
|
- * to be shared as IRQ0, and they are not shareable.
|
|
- * So test for this condition, and if necessary, avoid
|
|
- * the pin collision.
|
|
- */
|
|
- gsi = pci_irq++;
|
|
- /*
|
|
- * Don't assign IRQ used by ACPI SCI
|
|
- */
|
|
- if (gsi == acpi_gbl_FADT.sci_interrupt)
|
|
- gsi = pci_irq++;
|
|
- gsi_to_irq[irq] = gsi;
|
|
- } else {
|
|
- printk(KERN_ERR "GSI %u is too high\n", gsi);
|
|
- return gsi;
|
|
- }
|
|
- }
|
|
-#endif
|
|
- io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
|
|
- triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
|
|
- polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
|
|
- return gsi;
|
|
+ replace_intsrc_all(mpc, mpc_new_phys, mpc_new_length);
|
|
+
|
|
+ return 0;
|
|
}
|
|
|
|
-#endif /* CONFIG_X86_IO_APIC */
|
|
-#endif /* CONFIG_ACPI */
|
|
+late_initcall(update_mp_table);
|
|
--- head-2011-03-11.orig/arch/x86/kernel/pci-dma-xen.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/pci-dma-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -5,13 +5,13 @@
|
|
|
|
#include <asm/proto.h>
|
|
#include <asm/dma.h>
|
|
-#include <asm/gart.h>
|
|
+#include <asm/iommu.h>
|
|
#include <asm/calgary.h>
|
|
+#include <asm/amd_iommu.h>
|
|
|
|
-int forbid_dac __read_mostly;
|
|
-EXPORT_SYMBOL(forbid_dac);
|
|
+static int forbid_dac __read_mostly;
|
|
|
|
-const struct dma_mapping_ops *dma_ops;
|
|
+struct dma_mapping_ops *dma_ops;
|
|
EXPORT_SYMBOL(dma_ops);
|
|
|
|
static int iommu_sac_force __read_mostly;
|
|
@@ -74,13 +74,17 @@ early_param("dma32_size", parse_dma32_si
|
|
void __init dma32_reserve_bootmem(void)
|
|
{
|
|
unsigned long size, align;
|
|
- if (end_pfn <= MAX_DMA32_PFN)
|
|
+ if (max_pfn <= MAX_DMA32_PFN)
|
|
return;
|
|
|
|
+ /*
|
|
+ * check aperture_64.c allocate_aperture() for reason about
|
|
+ * using 512M as goal
|
|
+ */
|
|
align = 64ULL<<20;
|
|
size = round_up(dma32_bootmem_size, align);
|
|
dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align,
|
|
- __pa(MAX_DMA_ADDRESS));
|
|
+ 512ULL<<20);
|
|
if (dma32_bootmem_ptr)
|
|
dma32_bootmem_size = size;
|
|
else
|
|
@@ -88,17 +92,14 @@ void __init dma32_reserve_bootmem(void)
|
|
}
|
|
static void __init dma32_free_bootmem(void)
|
|
{
|
|
- int node;
|
|
|
|
- if (end_pfn <= MAX_DMA32_PFN)
|
|
+ if (max_pfn <= MAX_DMA32_PFN)
|
|
return;
|
|
|
|
if (!dma32_bootmem_ptr)
|
|
return;
|
|
|
|
- for_each_online_node(node)
|
|
- free_bootmem_node(NODE_DATA(node), __pa(dma32_bootmem_ptr),
|
|
- dma32_bootmem_size);
|
|
+ free_bootmem(__pa(dma32_bootmem_ptr), dma32_bootmem_size);
|
|
|
|
dma32_bootmem_ptr = NULL;
|
|
dma32_bootmem_size = 0;
|
|
@@ -107,7 +108,7 @@ static void __init dma32_free_bootmem(vo
|
|
#define dma32_free_bootmem() ((void)0)
|
|
#endif
|
|
|
|
-static const struct dma_mapping_ops swiotlb_dma_ops = {
|
|
+static struct dma_mapping_ops swiotlb_dma_ops = {
|
|
.mapping_error = swiotlb_dma_mapping_error,
|
|
.map_single = swiotlb_map_single_phys,
|
|
.unmap_single = swiotlb_unmap_single,
|
|
@@ -130,25 +131,31 @@ void __init pci_iommu_alloc(void)
|
|
* The order of these functions is important for
|
|
* fall-back/fail-over reasons
|
|
*/
|
|
-#ifdef CONFIG_GART_IOMMU
|
|
gart_iommu_hole_init();
|
|
-#endif
|
|
|
|
-#ifdef CONFIG_CALGARY_IOMMU
|
|
detect_calgary();
|
|
-#endif
|
|
|
|
detect_intel_iommu();
|
|
|
|
-#ifdef CONFIG_SWIOTLB
|
|
+ amd_iommu_detect();
|
|
+
|
|
swiotlb_init();
|
|
if (swiotlb) {
|
|
printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n");
|
|
dma_ops = &swiotlb_dma_ops;
|
|
}
|
|
-#endif
|
|
}
|
|
|
|
+#ifndef CONFIG_XEN
|
|
+unsigned long iommu_num_pages(unsigned long addr, unsigned long len)
|
|
+{
|
|
+ unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE);
|
|
+
|
|
+ return size >> PAGE_SHIFT;
|
|
+}
|
|
+EXPORT_SYMBOL(iommu_num_pages);
|
|
+#endif
|
|
+
|
|
/*
|
|
* See <Documentation/x86_64/boot-options.txt> for the iommu kernel parameter
|
|
* documentation.
|
|
@@ -201,9 +208,7 @@ static __init int iommu_setup(char *p)
|
|
swiotlb = 1;
|
|
#endif
|
|
|
|
-#ifdef CONFIG_GART_IOMMU
|
|
gart_parse_options(p);
|
|
-#endif
|
|
|
|
#ifdef CONFIG_CALGARY_IOMMU
|
|
if (!strncmp(p, "calgary", 7))
|
|
@@ -245,136 +250,19 @@ int range_straddles_page_boundary(paddr_
|
|
!check_pages_physically_contiguous(pfn, offset, size));
|
|
}
|
|
|
|
-#ifdef CONFIG_X86_32
|
|
-int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
|
|
- dma_addr_t device_addr, size_t size, int flags)
|
|
-{
|
|
- void __iomem *mem_base = NULL;
|
|
- int pages = size >> PAGE_SHIFT;
|
|
- int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long);
|
|
-
|
|
- if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0)
|
|
- goto out;
|
|
- if (!size)
|
|
- goto out;
|
|
- if (dev->dma_mem)
|
|
- goto out;
|
|
-
|
|
- /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */
|
|
-
|
|
- mem_base = ioremap(bus_addr, size);
|
|
- if (!mem_base)
|
|
- goto out;
|
|
-
|
|
- dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL);
|
|
- if (!dev->dma_mem)
|
|
- goto out;
|
|
- dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
|
|
- if (!dev->dma_mem->bitmap)
|
|
- goto free1_out;
|
|
-
|
|
- dev->dma_mem->virt_base = mem_base;
|
|
- dev->dma_mem->device_base = device_addr;
|
|
- dev->dma_mem->size = pages;
|
|
- dev->dma_mem->flags = flags;
|
|
-
|
|
- if (flags & DMA_MEMORY_MAP)
|
|
- return DMA_MEMORY_MAP;
|
|
-
|
|
- return DMA_MEMORY_IO;
|
|
-
|
|
- free1_out:
|
|
- kfree(dev->dma_mem);
|
|
- out:
|
|
- if (mem_base)
|
|
- iounmap(mem_base);
|
|
- return 0;
|
|
-}
|
|
-EXPORT_SYMBOL(dma_declare_coherent_memory);
|
|
-
|
|
-void dma_release_declared_memory(struct device *dev)
|
|
-{
|
|
- struct dma_coherent_mem *mem = dev->dma_mem;
|
|
-
|
|
- if (!mem)
|
|
- return;
|
|
- dev->dma_mem = NULL;
|
|
- iounmap(mem->virt_base);
|
|
- kfree(mem->bitmap);
|
|
- kfree(mem);
|
|
-}
|
|
-EXPORT_SYMBOL(dma_release_declared_memory);
|
|
-
|
|
-void *dma_mark_declared_memory_occupied(struct device *dev,
|
|
- dma_addr_t device_addr, size_t size)
|
|
-{
|
|
- struct dma_coherent_mem *mem = dev->dma_mem;
|
|
- int pos, err;
|
|
- int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1);
|
|
-
|
|
- pages >>= PAGE_SHIFT;
|
|
-
|
|
- if (!mem)
|
|
- return ERR_PTR(-EINVAL);
|
|
-
|
|
- pos = (device_addr - mem->device_base) >> PAGE_SHIFT;
|
|
- err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages));
|
|
- if (err != 0)
|
|
- return ERR_PTR(err);
|
|
- return mem->virt_base + (pos << PAGE_SHIFT);
|
|
-}
|
|
-EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
|
|
-
|
|
-static int dma_alloc_from_coherent_mem(struct device *dev, ssize_t size,
|
|
- dma_addr_t *dma_handle, void **ret)
|
|
-{
|
|
- struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
|
|
- int order = get_order(size);
|
|
-
|
|
- if (mem) {
|
|
- int page = bitmap_find_free_region(mem->bitmap, mem->size,
|
|
- order);
|
|
- if (page >= 0) {
|
|
- *dma_handle = mem->device_base + (page << PAGE_SHIFT);
|
|
- *ret = mem->virt_base + (page << PAGE_SHIFT);
|
|
- memset(*ret, 0, size);
|
|
- }
|
|
- if (mem->flags & DMA_MEMORY_EXCLUSIVE)
|
|
- *ret = NULL;
|
|
- }
|
|
- return (mem != NULL);
|
|
-}
|
|
-
|
|
-static int dma_release_coherent(struct device *dev, int order, void *vaddr)
|
|
-{
|
|
- struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
|
|
-
|
|
- if (mem && vaddr >= mem->virt_base && vaddr <
|
|
- (mem->virt_base + (mem->size << PAGE_SHIFT))) {
|
|
- int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
|
|
-
|
|
- bitmap_release_region(mem->bitmap, page, order);
|
|
- return 1;
|
|
- }
|
|
- return 0;
|
|
-}
|
|
-#else
|
|
-#define dma_alloc_from_coherent_mem(dev, size, handle, ret) (0)
|
|
-#define dma_release_coherent(dev, order, vaddr) (0)
|
|
-#endif /* CONFIG_X86_32 */
|
|
-
|
|
int dma_supported(struct device *dev, u64 mask)
|
|
{
|
|
+ struct dma_mapping_ops *ops = get_dma_ops(dev);
|
|
+
|
|
#ifdef CONFIG_PCI
|
|
if (mask > 0xffffffff && forbid_dac > 0) {
|
|
- printk(KERN_INFO "PCI: Disallowing DAC for device %s\n",
|
|
- dev->bus_id);
|
|
+ dev_info(dev, "PCI: Disallowing DAC for device\n");
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
- if (dma_ops->dma_supported)
|
|
- return dma_ops->dma_supported(dev, mask);
|
|
+ if (ops->dma_supported)
|
|
+ return ops->dma_supported(dev, mask);
|
|
|
|
/* Copied from i386. Doesn't make much sense, because it will
|
|
only work for pci_alloc_coherent.
|
|
@@ -395,8 +283,7 @@ int dma_supported(struct device *dev, u6
|
|
type. Normally this doesn't make any difference, but gives
|
|
more gentle handling of IOMMU overflow. */
|
|
if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) {
|
|
- printk(KERN_INFO "%s: Force SAC with mask %Lx\n",
|
|
- dev->bus_id, mask);
|
|
+ dev_info(dev, "Force SAC with mask %Lx\n", mask);
|
|
return 0;
|
|
}
|
|
|
|
@@ -422,6 +309,9 @@ void *
|
|
dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle,
|
|
gfp_t gfp)
|
|
{
|
|
+#ifndef CONFIG_XEN
|
|
+ struct dma_mapping_ops *ops = get_dma_ops(dev);
|
|
+#endif
|
|
void *memory = NULL;
|
|
struct page *page;
|
|
unsigned long dma_mask = 0;
|
|
@@ -431,7 +321,7 @@ dma_alloc_coherent(struct device *dev, s
|
|
/* ignore region specifiers */
|
|
gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32);
|
|
|
|
- if (dma_alloc_from_coherent_mem(dev, size, dma_handle, &memory))
|
|
+ if (dma_alloc_from_coherent(dev, size, dma_handle, &memory))
|
|
return memory;
|
|
|
|
if (!dev) {
|
|
@@ -491,8 +381,8 @@ dma_alloc_coherent(struct device *dev, s
|
|
/* Let low level make its own zone decisions */
|
|
gfp &= ~(GFP_DMA32|GFP_DMA);
|
|
|
|
- if (dma_ops->alloc_coherent)
|
|
- return dma_ops->alloc_coherent(dev, size,
|
|
+ if (ops->alloc_coherent)
|
|
+ return ops->alloc_coherent(dev, size,
|
|
dma_handle, gfp);
|
|
return NULL;
|
|
}
|
|
@@ -504,14 +394,14 @@ dma_alloc_coherent(struct device *dev, s
|
|
}
|
|
}
|
|
|
|
- if (dma_ops->alloc_coherent) {
|
|
+ if (ops->alloc_coherent) {
|
|
free_pages((unsigned long)memory, order);
|
|
gfp &= ~(GFP_DMA|GFP_DMA32);
|
|
- return dma_ops->alloc_coherent(dev, size, dma_handle, gfp);
|
|
+ return ops->alloc_coherent(dev, size, dma_handle, gfp);
|
|
}
|
|
|
|
- if (dma_ops->map_simple) {
|
|
- *dma_handle = dma_ops->map_simple(dev, virt_to_bus(memory),
|
|
+ if (ops->map_simple) {
|
|
+ *dma_handle = ops->map_simple(dev, virt_to_bus(memory),
|
|
size,
|
|
PCI_DMA_BIDIRECTIONAL);
|
|
if (*dma_handle != bad_dma_address)
|
|
@@ -542,13 +432,17 @@ EXPORT_SYMBOL(dma_alloc_coherent);
|
|
void dma_free_coherent(struct device *dev, size_t size,
|
|
void *vaddr, dma_addr_t bus)
|
|
{
|
|
+#ifndef CONFIG_XEN
|
|
+ struct dma_mapping_ops *ops = get_dma_ops(dev);
|
|
+#endif
|
|
+
|
|
int order = get_order(size);
|
|
WARN_ON(irqs_disabled()); /* for portability */
|
|
- if (dma_release_coherent(dev, order, vaddr))
|
|
+ if (dma_release_from_coherent(dev, order, vaddr))
|
|
return;
|
|
#ifndef CONFIG_XEN
|
|
- if (dma_ops->unmap_single)
|
|
- dma_ops->unmap_single(dev, bus, size, 0);
|
|
+ if (ops->unmap_single)
|
|
+ ops->unmap_single(dev, bus, size, 0);
|
|
#endif
|
|
xen_destroy_contiguous_region((unsigned long)vaddr, order);
|
|
free_pages((unsigned long)vaddr, order);
|
|
@@ -557,15 +451,13 @@ EXPORT_SYMBOL(dma_free_coherent);
|
|
|
|
static int __init pci_iommu_init(void)
|
|
{
|
|
-#ifdef CONFIG_CALGARY_IOMMU
|
|
calgary_iommu_init();
|
|
-#endif
|
|
|
|
intel_iommu_init();
|
|
|
|
-#ifdef CONFIG_GART_IOMMU
|
|
+ amd_iommu_init();
|
|
+
|
|
gart_iommu_init();
|
|
-#endif
|
|
|
|
no_iommu_init();
|
|
return 0;
|
|
--- head-2011-03-11.orig/arch/x86/kernel/pci-nommu-xen.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/pci-nommu-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -84,18 +84,12 @@ static int nommu_dma_supported(struct de
|
|
return 1;
|
|
}
|
|
|
|
-static int nommu_mapping_error(dma_addr_t dma_addr)
|
|
-{
|
|
- return (dma_addr == bad_dma_address);
|
|
-}
|
|
-
|
|
-static const struct dma_mapping_ops nommu_dma_ops = {
|
|
+static struct dma_mapping_ops nommu_dma_ops = {
|
|
.map_single = gnttab_map_single,
|
|
.unmap_single = gnttab_unmap_single,
|
|
.map_sg = gnttab_map_sg,
|
|
.unmap_sg = gnttab_unmap_sg,
|
|
.dma_supported = nommu_dma_supported,
|
|
- .mapping_error = nommu_mapping_error
|
|
};
|
|
|
|
void __init no_iommu_init(void)
|
|
--- head-2011-03-11.orig/arch/x86/kernel/probe_roms_32.c 2011-03-15 16:45:55.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/probe_roms_32.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -131,7 +131,7 @@ void __init probe_roms(void)
|
|
upper = system_rom_resource.start;
|
|
|
|
/* check for extension rom (ignore length byte!) */
|
|
- rom = isa_bus_to_virt(extension_rom_resource.start);
|
|
+ rom = isa_bus_to_virt((unsigned long)extension_rom_resource.start);
|
|
if (romsignature(rom)) {
|
|
length = extension_rom_resource.end - extension_rom_resource.start + 1;
|
|
if (romchecksum(rom, length)) {
|
|
--- head-2011-03-11.orig/arch/x86/kernel/process-xen.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/process-xen.c 2011-03-03 15:59:49.000000000 +0100
|
|
@@ -6,6 +6,13 @@
|
|
#include <linux/sched.h>
|
|
#include <linux/module.h>
|
|
#include <linux/pm.h>
|
|
+#include <linux/clockchips.h>
|
|
+#include <asm/system.h>
|
|
+
|
|
+unsigned long idle_halt;
|
|
+EXPORT_SYMBOL(idle_halt);
|
|
+unsigned long idle_nomwait;
|
|
+EXPORT_SYMBOL(idle_nomwait);
|
|
|
|
struct kmem_cache *task_xstate_cachep;
|
|
|
|
@@ -45,6 +52,41 @@ void arch_task_cache_init(void)
|
|
SLAB_PANIC, NULL);
|
|
}
|
|
|
|
+/*
|
|
+ * Idle related variables and functions
|
|
+ */
|
|
+unsigned long boot_option_idle_override = 0;
|
|
+EXPORT_SYMBOL(boot_option_idle_override);
|
|
+
|
|
+/*
|
|
+ * Powermanagement idle function, if any..
|
|
+ */
|
|
+void (*pm_idle)(void);
|
|
+EXPORT_SYMBOL(pm_idle);
|
|
+
|
|
+/*
|
|
+ * We use this if we don't have any better
|
|
+ * idle routine..
|
|
+ */
|
|
+void xen_idle(void)
|
|
+{
|
|
+ current_thread_info()->status &= ~TS_POLLING;
|
|
+ /*
|
|
+ * TS_POLLING-cleared state must be visible before we
|
|
+ * test NEED_RESCHED:
|
|
+ */
|
|
+ smp_mb();
|
|
+
|
|
+ if (!need_resched())
|
|
+ safe_halt(); /* enables interrupts racelessly */
|
|
+ else
|
|
+ local_irq_enable();
|
|
+ current_thread_info()->status |= TS_POLLING;
|
|
+}
|
|
+#ifdef CONFIG_APM_MODULE
|
|
+EXPORT_SYMBOL(default_idle);
|
|
+#endif
|
|
+
|
|
static void do_nothing(void *unused)
|
|
{
|
|
}
|
|
@@ -61,7 +103,7 @@ void cpu_idle_wait(void)
|
|
{
|
|
smp_mb();
|
|
/* kick all the CPUs so that they exit out of pm_idle */
|
|
- smp_call_function(do_nothing, NULL, 0, 1);
|
|
+ smp_call_function(do_nothing, NULL, 1);
|
|
}
|
|
EXPORT_SYMBOL_GPL(cpu_idle_wait);
|
|
|
|
@@ -125,60 +167,175 @@ static void poll_idle(void)
|
|
*
|
|
* idle=mwait overrides this decision and forces the usage of mwait.
|
|
*/
|
|
+static int __cpuinitdata force_mwait;
|
|
+
|
|
+#define MWAIT_INFO 0x05
|
|
+#define MWAIT_ECX_EXTENDED_INFO 0x01
|
|
+#define MWAIT_EDX_C1 0xf0
|
|
+
|
|
static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
|
|
{
|
|
+ u32 eax, ebx, ecx, edx;
|
|
+
|
|
if (force_mwait)
|
|
return 1;
|
|
|
|
- if (c->x86_vendor == X86_VENDOR_AMD) {
|
|
- switch(c->x86) {
|
|
- case 0x10:
|
|
- case 0x11:
|
|
- return 0;
|
|
+ if (c->cpuid_level < MWAIT_INFO)
|
|
+ return 0;
|
|
+
|
|
+ cpuid(MWAIT_INFO, &eax, &ebx, &ecx, &edx);
|
|
+ /* Check, whether EDX has extended info about MWAIT */
|
|
+ if (!(ecx & MWAIT_ECX_EXTENDED_INFO))
|
|
+ return 1;
|
|
+
|
|
+ /*
|
|
+ * edx enumeratios MONITOR/MWAIT extensions. Check, whether
|
|
+ * C1 supports MWAIT
|
|
+ */
|
|
+ return (edx & MWAIT_EDX_C1);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Check for AMD CPUs, which have potentially C1E support
|
|
+ */
|
|
+static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c)
|
|
+{
|
|
+ if (c->x86_vendor != X86_VENDOR_AMD)
|
|
+ return 0;
|
|
+
|
|
+ if (c->x86 < 0x0F)
|
|
+ return 0;
|
|
+
|
|
+ /* Family 0x0f models < rev F do not have C1E */
|
|
+ if (c->x86 == 0x0f && c->x86_model < 0x40)
|
|
+ return 0;
|
|
+
|
|
+ return 1;
|
|
+}
|
|
+
|
|
+static cpumask_t c1e_mask = CPU_MASK_NONE;
|
|
+static int c1e_detected;
|
|
+
|
|
+void c1e_remove_cpu(int cpu)
|
|
+{
|
|
+ cpu_clear(cpu, c1e_mask);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * C1E aware idle routine. We check for C1E active in the interrupt
|
|
+ * pending message MSR. If we detect C1E, then we handle it the same
|
|
+ * way as C3 power states (local apic timer and TSC stop)
|
|
+ */
|
|
+static void c1e_idle(void)
|
|
+{
|
|
+ if (need_resched())
|
|
+ return;
|
|
+
|
|
+ if (!c1e_detected) {
|
|
+ u32 lo, hi;
|
|
+
|
|
+ rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi);
|
|
+ if (lo & K8_INTP_C1E_ACTIVE_MASK) {
|
|
+ c1e_detected = 1;
|
|
+ if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
|
|
+ mark_tsc_unstable("TSC halt in AMD C1E");
|
|
+ printk(KERN_INFO "System has AMD C1E enabled\n");
|
|
+ set_cpu_cap(&boot_cpu_data, X86_FEATURE_AMDC1E);
|
|
}
|
|
}
|
|
- return 1;
|
|
+
|
|
+ if (c1e_detected) {
|
|
+ int cpu = smp_processor_id();
|
|
+
|
|
+ if (!cpu_isset(cpu, c1e_mask)) {
|
|
+ cpu_set(cpu, c1e_mask);
|
|
+ /*
|
|
+ * Force broadcast so ACPI can not interfere. Needs
|
|
+ * to run with interrupts enabled as it uses
|
|
+ * smp_function_call.
|
|
+ */
|
|
+ local_irq_enable();
|
|
+ clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE,
|
|
+ &cpu);
|
|
+ printk(KERN_INFO "Switch to broadcast mode on CPU%d\n",
|
|
+ cpu);
|
|
+ local_irq_disable();
|
|
+ }
|
|
+ clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
|
|
+
|
|
+ default_idle();
|
|
+
|
|
+ /*
|
|
+ * The switch back from broadcast mode needs to be
|
|
+ * called with interrupts disabled.
|
|
+ */
|
|
+ local_irq_disable();
|
|
+ clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
|
|
+ local_irq_enable();
|
|
+ } else
|
|
+ default_idle();
|
|
}
|
|
#endif
|
|
|
|
void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
|
|
{
|
|
#ifndef CONFIG_XEN
|
|
- static int selected;
|
|
-
|
|
- if (selected)
|
|
- return;
|
|
#ifdef CONFIG_X86_SMP
|
|
if (pm_idle == poll_idle && smp_num_siblings > 1) {
|
|
printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
|
|
" performance may degrade.\n");
|
|
}
|
|
#endif
|
|
+ if (pm_idle)
|
|
+ return;
|
|
+
|
|
if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
|
|
/*
|
|
- * Skip, if setup has overridden idle.
|
|
* One CPU supports mwait => All CPUs supports mwait
|
|
*/
|
|
- if (!pm_idle) {
|
|
- printk(KERN_INFO "using mwait in idle threads.\n");
|
|
- pm_idle = mwait_idle;
|
|
- }
|
|
- }
|
|
- selected = 1;
|
|
+ printk(KERN_INFO "using mwait in idle threads.\n");
|
|
+ pm_idle = mwait_idle;
|
|
+ } else if (check_c1e_idle(c)) {
|
|
+ printk(KERN_INFO "using C1E aware idle routine\n");
|
|
+ pm_idle = c1e_idle;
|
|
+ } else
|
|
+ pm_idle = default_idle;
|
|
#endif
|
|
}
|
|
|
|
static int __init idle_setup(char *str)
|
|
{
|
|
+ if (!str)
|
|
+ return -EINVAL;
|
|
+
|
|
if (!strcmp(str, "poll")) {
|
|
printk("using polling idle threads.\n");
|
|
pm_idle = poll_idle;
|
|
- }
|
|
#ifndef CONFIG_XEN
|
|
- else if (!strcmp(str, "mwait"))
|
|
+ } else if (!strcmp(str, "mwait"))
|
|
force_mwait = 1;
|
|
+ else if (!strcmp(str, "halt")) {
|
|
+ /*
|
|
+ * When the boot option of idle=halt is added, halt is
|
|
+ * forced to be used for CPU idle. In such case CPU C2/C3
|
|
+ * won't be used again.
|
|
+ * To continue to load the CPU idle driver, don't touch
|
|
+ * the boot_option_idle_override.
|
|
+ */
|
|
+ pm_idle = default_idle;
|
|
+ idle_halt = 1;
|
|
+ return 0;
|
|
+ } else if (!strcmp(str, "nomwait")) {
|
|
+ /*
|
|
+ * If the boot option of "idle=nomwait" is added,
|
|
+ * it means that mwait will be disabled for CPU C2/C3
|
|
+ * states. In such case it won't touch the variable
|
|
+ * of boot_option_idle_override.
|
|
+ */
|
|
+ idle_nomwait = 1;
|
|
+ return 0;
|
|
#endif
|
|
- else
|
|
+ } else
|
|
return -1;
|
|
|
|
boot_option_idle_override = 1;
|
|
--- head-2011-03-11.orig/arch/x86/kernel/process_32-xen.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/process_32-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -59,15 +59,11 @@
|
|
#include <asm/tlbflush.h>
|
|
#include <asm/cpu.h>
|
|
#include <asm/kdebug.h>
|
|
+#include <asm/idle.h>
|
|
|
|
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
|
|
asmlinkage void cstar_ret_from_fork(void) __asm__("cstar_ret_from_fork");
|
|
|
|
-static int hlt_counter;
|
|
-
|
|
-unsigned long boot_option_idle_override = 0;
|
|
-EXPORT_SYMBOL(boot_option_idle_override);
|
|
-
|
|
DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
|
|
EXPORT_PER_CPU_SYMBOL(current_task);
|
|
|
|
@@ -82,46 +78,27 @@ unsigned long thread_saved_pc(struct tas
|
|
return ((unsigned long *)tsk->thread.sp)[3];
|
|
}
|
|
|
|
-/*
|
|
- * Powermanagement idle function, if any..
|
|
- */
|
|
-void (*pm_idle)(void);
|
|
-EXPORT_SYMBOL(pm_idle);
|
|
+#ifdef CONFIG_HOTPLUG_CPU
|
|
+#ifndef CONFIG_XEN
|
|
+#include <asm/nmi.h>
|
|
|
|
-void disable_hlt(void)
|
|
+static void cpu_exit_clear(void)
|
|
{
|
|
- hlt_counter++;
|
|
-}
|
|
+ int cpu = raw_smp_processor_id();
|
|
|
|
-EXPORT_SYMBOL(disable_hlt);
|
|
-
|
|
-void enable_hlt(void)
|
|
-{
|
|
- hlt_counter--;
|
|
-}
|
|
+ idle_task_exit();
|
|
|
|
-EXPORT_SYMBOL(enable_hlt);
|
|
+ cpu_uninit();
|
|
+ irq_ctx_exit(cpu);
|
|
|
|
-static void xen_idle(void)
|
|
-{
|
|
- current_thread_info()->status &= ~TS_POLLING;
|
|
- /*
|
|
- * TS_POLLING-cleared state must be visible before we
|
|
- * test NEED_RESCHED:
|
|
- */
|
|
- smp_mb();
|
|
+ cpu_clear(cpu, cpu_callout_map);
|
|
+ cpu_clear(cpu, cpu_callin_map);
|
|
|
|
- if (!need_resched())
|
|
- safe_halt(); /* enables interrupts racelessly */
|
|
- else
|
|
- local_irq_enable();
|
|
- current_thread_info()->status |= TS_POLLING;
|
|
+ numa_remove_cpu(cpu);
|
|
+ c1e_remove_cpu(cpu);
|
|
}
|
|
-#ifdef CONFIG_APM_MODULE
|
|
-EXPORT_SYMBOL(default_idle);
|
|
#endif
|
|
|
|
-#ifdef CONFIG_HOTPLUG_CPU
|
|
static inline void play_dead(void)
|
|
{
|
|
idle_task_exit();
|
|
@@ -152,13 +129,11 @@ void cpu_idle(void)
|
|
|
|
/* endless idle loop with no priority at all */
|
|
while (1) {
|
|
- tick_nohz_stop_sched_tick();
|
|
+ tick_nohz_stop_sched_tick(1);
|
|
while (!need_resched()) {
|
|
- void (*idle)(void);
|
|
|
|
check_pgt_cache();
|
|
rmb();
|
|
- idle = xen_idle; /* no alternatives */
|
|
|
|
if (rcu_pending(cpu))
|
|
rcu_check_callbacks(cpu, 0);
|
|
@@ -168,7 +143,10 @@ void cpu_idle(void)
|
|
|
|
local_irq_disable();
|
|
__get_cpu_var(irq_stat).idle_timestamp = jiffies;
|
|
- idle();
|
|
+ /* Don't trace irqs off for idle */
|
|
+ stop_critical_timings();
|
|
+ xen_idle();
|
|
+ start_critical_timings();
|
|
}
|
|
tick_nohz_restart_sched_tick();
|
|
preempt_enable_no_resched();
|
|
--- head-2011-03-11.orig/arch/x86/kernel/process_64-xen.c 2011-02-02 08:32:46.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/process_64-xen.c 2011-02-02 08:34:01.000000000 +0100
|
|
@@ -64,15 +64,6 @@ asmlinkage extern void ret_from_fork(voi
|
|
|
|
unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
|
|
|
|
-unsigned long boot_option_idle_override = 0;
|
|
-EXPORT_SYMBOL(boot_option_idle_override);
|
|
-
|
|
-/*
|
|
- * Powermanagement idle function, if any..
|
|
- */
|
|
-void (*pm_idle)(void);
|
|
-EXPORT_SYMBOL(pm_idle);
|
|
-
|
|
static ATOMIC_NOTIFIER_HEAD(idle_notifier);
|
|
|
|
void idle_notifier_register(struct notifier_block *n)
|
|
@@ -102,25 +93,13 @@ void exit_idle(void)
|
|
__exit_idle();
|
|
}
|
|
|
|
-static void xen_idle(void)
|
|
-{
|
|
- current_thread_info()->status &= ~TS_POLLING;
|
|
- /*
|
|
- * TS_POLLING-cleared state must be visible before we
|
|
- * test NEED_RESCHED:
|
|
- */
|
|
- smp_mb();
|
|
- if (!need_resched())
|
|
- safe_halt(); /* enables interrupts racelessly */
|
|
- else
|
|
- local_irq_enable();
|
|
- current_thread_info()->status |= TS_POLLING;
|
|
-}
|
|
-
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
|
static inline void play_dead(void)
|
|
{
|
|
idle_task_exit();
|
|
+#ifndef CONFIG_XEN
|
|
+ c1e_remove_cpu(raw_smp_processor_id());
|
|
+#endif
|
|
local_irq_disable();
|
|
cpu_clear(smp_processor_id(), cpu_initialized);
|
|
preempt_enable_no_resched();
|
|
@@ -145,12 +124,11 @@ void cpu_idle(void)
|
|
current_thread_info()->status |= TS_POLLING;
|
|
/* endless idle loop with no priority at all */
|
|
while (1) {
|
|
- tick_nohz_stop_sched_tick();
|
|
+ tick_nohz_stop_sched_tick(1);
|
|
while (!need_resched()) {
|
|
- void (*idle)(void);
|
|
|
|
rmb();
|
|
- idle = xen_idle; /* no alternatives */
|
|
+
|
|
if (cpu_is_offline(smp_processor_id()))
|
|
play_dead();
|
|
/*
|
|
@@ -160,7 +138,10 @@ void cpu_idle(void)
|
|
*/
|
|
local_irq_disable();
|
|
enter_idle();
|
|
- idle();
|
|
+ /* Don't trace irqs off for idle */
|
|
+ stop_critical_timings();
|
|
+ xen_idle();
|
|
+ start_critical_timings();
|
|
/* In many cases the interrupt that ended idle
|
|
has already called exit_idle. But some idle
|
|
loops can be woken up without interrupt. */
|
|
@@ -270,7 +251,7 @@ void exit_thread(void)
|
|
}
|
|
}
|
|
|
|
-void load_gs_index(unsigned gs)
|
|
+void xen_load_gs_index(unsigned gs)
|
|
{
|
|
WARN_ON(HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, gs));
|
|
}
|
|
@@ -371,10 +352,10 @@ int copy_thread(int nr, unsigned long cl
|
|
p->thread.fs = me->thread.fs;
|
|
p->thread.gs = me->thread.gs;
|
|
|
|
- asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
|
|
- asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
|
|
- asm("mov %%es,%0" : "=m" (p->thread.es));
|
|
- asm("mov %%ds,%0" : "=m" (p->thread.ds));
|
|
+ savesegment(gs, p->thread.gsindex);
|
|
+ savesegment(fs, p->thread.fsindex);
|
|
+ savesegment(es, p->thread.es);
|
|
+ savesegment(ds, p->thread.ds);
|
|
|
|
if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
|
|
p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
|
|
@@ -415,7 +396,9 @@ out:
|
|
void
|
|
start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
|
|
{
|
|
- asm volatile("movl %0, %%fs; movl %0, %%es; movl %0, %%ds" :: "r"(0));
|
|
+ loadsegment(fs, 0);
|
|
+ loadsegment(es, 0);
|
|
+ loadsegment(ds, 0);
|
|
load_gs_index(0);
|
|
regs->ip = new_ip;
|
|
regs->sp = new_sp;
|
|
@@ -554,8 +537,8 @@ static inline void __switch_to_xtra(stru
|
|
struct task_struct *
|
|
__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
|
|
{
|
|
- struct thread_struct *prev = &prev_p->thread,
|
|
- *next = &next_p->thread;
|
|
+ struct thread_struct *prev = &prev_p->thread;
|
|
+ struct thread_struct *next = &next_p->thread;
|
|
int cpu = smp_processor_id();
|
|
#ifndef CONFIG_X86_NO_TSS
|
|
struct tss_struct *tss = &per_cpu(init_tss, cpu);
|
|
@@ -658,12 +641,25 @@ __switch_to(struct task_struct *prev_p,
|
|
*/
|
|
if (unlikely(next->es))
|
|
loadsegment(es, next->es);
|
|
-
|
|
+
|
|
if (unlikely(next->ds))
|
|
loadsegment(ds, next->ds);
|
|
|
|
+ /*
|
|
+ * Leave lazy mode, flushing any hypercalls made here.
|
|
+ * This must be done before restoring TLS segments so
|
|
+ * the GDT and LDT are properly updated, and must be
|
|
+ * done before math_state_restore, so the TS bit is up
|
|
+ * to date.
|
|
+ */
|
|
+ arch_leave_lazy_cpu_mode();
|
|
+
|
|
/*
|
|
* Switch FS and GS.
|
|
+ *
|
|
+ * Segment register != 0 always requires a reload. Also
|
|
+ * reload when it has changed. When prev process used 64bit
|
|
+ * base always reload to avoid an information leak.
|
|
*/
|
|
if (unlikely(next->fsindex))
|
|
loadsegment(fs, next->fsindex);
|
|
@@ -682,7 +678,8 @@ __switch_to(struct task_struct *prev_p,
|
|
*/
|
|
write_pda(pcurrent, next_p);
|
|
write_pda(kernelstack,
|
|
- (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
|
|
+ (unsigned long)task_stack_page(next_p) +
|
|
+ THREAD_SIZE - PDA_STACKOFFSET);
|
|
#ifdef CONFIG_CC_STACKPROTECTOR
|
|
write_pda(stack_canary, next_p->stack_canary);
|
|
|
|
@@ -843,7 +840,7 @@ long do_arch_prctl(struct task_struct *t
|
|
set_32bit_tls(task, FS_TLS, addr);
|
|
if (doit) {
|
|
load_TLS(&task->thread, cpu);
|
|
- asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
|
|
+ loadsegment(fs, FS_TLS_SEL);
|
|
}
|
|
task->thread.fsindex = FS_TLS_SEL;
|
|
task->thread.fs = 0;
|
|
@@ -853,7 +850,7 @@ long do_arch_prctl(struct task_struct *t
|
|
if (doit) {
|
|
/* set the selector to 0 to not confuse
|
|
__switch_to */
|
|
- asm volatile("movl %0,%%fs" :: "r" (0));
|
|
+ loadsegment(fs, 0);
|
|
ret = HYPERVISOR_set_segment_base(SEGBASE_FS,
|
|
addr);
|
|
}
|
|
@@ -877,7 +874,7 @@ long do_arch_prctl(struct task_struct *t
|
|
if (task->thread.gsindex == GS_TLS_SEL)
|
|
base = read_32bit_tls(task, GS_TLS);
|
|
else if (doit) {
|
|
- asm("movl %%gs,%0" : "=r" (gsindex));
|
|
+ savesegment(gs, gsindex);
|
|
if (gsindex)
|
|
rdmsrl(MSR_KERNEL_GS_BASE, base);
|
|
else
|
|
--- head-2011-03-11.orig/arch/x86/kernel/setup-xen.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/setup-xen.c 2011-03-04 15:09:03.000000000 +0100
|
|
@@ -1,143 +1,1132 @@
|
|
-#include <linux/kernel.h>
|
|
+/*
|
|
+ * Copyright (C) 1995 Linus Torvalds
|
|
+ *
|
|
+ * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
|
|
+ *
|
|
+ * Memory region support
|
|
+ * David Parsons <orc@pell.chi.il.us>, July-August 1999
|
|
+ *
|
|
+ * Added E820 sanitization routine (removes overlapping memory regions);
|
|
+ * Brian Moyle <bmoyle@mvista.com>, February 2001
|
|
+ *
|
|
+ * Moved CPU detection code to cpu/${cpu}.c
|
|
+ * Patrick Mochel <mochel@osdl.org>, March 2002
|
|
+ *
|
|
+ * Provisions for empty E820 memory regions (reported by certain BIOSes).
|
|
+ * Alex Achenbach <xela@slit.de>, December 2002.
|
|
+ *
|
|
+ */
|
|
+
|
|
+/*
|
|
+ * This file handles the architecture-dependent parts of initialization
|
|
+ */
|
|
+
|
|
+#include <linux/sched.h>
|
|
+#include <linux/mm.h>
|
|
+#include <linux/mmzone.h>
|
|
+#include <linux/screen_info.h>
|
|
+#include <linux/ioport.h>
|
|
+#include <linux/acpi.h>
|
|
+#include <linux/apm_bios.h>
|
|
+#include <linux/initrd.h>
|
|
+#include <linux/bootmem.h>
|
|
+#include <linux/seq_file.h>
|
|
+#include <linux/console.h>
|
|
+#include <linux/mca.h>
|
|
+#include <linux/root_dev.h>
|
|
+#include <linux/highmem.h>
|
|
#include <linux/module.h>
|
|
+#include <linux/efi.h>
|
|
#include <linux/init.h>
|
|
-#include <linux/bootmem.h>
|
|
+#include <linux/edd.h>
|
|
+#include <linux/iscsi_ibft.h>
|
|
+#include <linux/nodemask.h>
|
|
+#include <linux/kexec.h>
|
|
+#include <linux/dmi.h>
|
|
+#include <linux/pfn.h>
|
|
+#include <linux/pci.h>
|
|
+#include <asm/pci-direct.h>
|
|
+#include <linux/init_ohci1394_dma.h>
|
|
+#include <linux/kvm_para.h>
|
|
+
|
|
+#include <linux/errno.h>
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/stddef.h>
|
|
+#include <linux/unistd.h>
|
|
+#include <linux/ptrace.h>
|
|
+#include <linux/slab.h>
|
|
+#include <linux/user.h>
|
|
+#include <linux/delay.h>
|
|
+
|
|
+#include <linux/kallsyms.h>
|
|
+#include <linux/cpufreq.h>
|
|
+#include <linux/dma-mapping.h>
|
|
+#include <linux/ctype.h>
|
|
+#include <linux/uaccess.h>
|
|
+
|
|
#include <linux/percpu.h>
|
|
-#include <asm/smp.h>
|
|
-#include <asm/percpu.h>
|
|
+#include <linux/crash_dump.h>
|
|
+
|
|
+#include <video/edid.h>
|
|
+
|
|
+#include <asm/mtrr.h>
|
|
+#include <asm/apic.h>
|
|
+#include <asm/e820.h>
|
|
+#include <asm/mpspec.h>
|
|
+#include <asm/setup.h>
|
|
+#include <asm/arch_hooks.h>
|
|
+#include <asm/efi.h>
|
|
#include <asm/sections.h>
|
|
+#include <asm/dmi.h>
|
|
+#include <asm/io_apic.h>
|
|
+#include <asm/ist.h>
|
|
+#include <asm/vmi.h>
|
|
+#include <setup_arch.h>
|
|
+#include <asm/bios_ebda.h>
|
|
+#include <asm/cacheflush.h>
|
|
#include <asm/processor.h>
|
|
-#include <asm/setup.h>
|
|
+#include <asm/bugs.h>
|
|
+
|
|
+#include <asm/system.h>
|
|
+#include <asm/vsyscall.h>
|
|
+#include <asm/smp.h>
|
|
+#include <asm/desc.h>
|
|
+#include <asm/dma.h>
|
|
+#include <asm/iommu.h>
|
|
+#include <asm/mmu_context.h>
|
|
+#include <asm/proto.h>
|
|
+
|
|
+#include <mach_apic.h>
|
|
+#include <asm/paravirt.h>
|
|
+
|
|
+#include <asm/percpu.h>
|
|
#include <asm/topology.h>
|
|
-#include <asm/mpspec.h>
|
|
#include <asm/apicdef.h>
|
|
+#ifdef CONFIG_X86_64
|
|
+#include <asm/numa_64.h>
|
|
+#endif
|
|
+
|
|
+#ifdef CONFIG_XEN
|
|
+#include <asm/hypervisor.h>
|
|
+#include <xen/interface/kexec.h>
|
|
+#include <xen/interface/memory.h>
|
|
+#include <xen/interface/nmi.h>
|
|
+#include <xen/interface/physdev.h>
|
|
+#include <xen/features.h>
|
|
+#include <xen/firmware.h>
|
|
+#include <xen/xencons.h>
|
|
+
|
|
+shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
|
|
+EXPORT_SYMBOL(HYPERVISOR_shared_info);
|
|
+
|
|
+static int xen_panic_event(struct notifier_block *, unsigned long, void *);
|
|
+static struct notifier_block xen_panic_block = {
|
|
+ xen_panic_event, NULL, 0 /* try to go last */
|
|
+};
|
|
+
|
|
+unsigned long *phys_to_machine_mapping;
|
|
+EXPORT_SYMBOL(phys_to_machine_mapping);
|
|
+
|
|
+unsigned long *pfn_to_mfn_frame_list_list,
|
|
+#ifdef CONFIG_X86_64
|
|
+ *pfn_to_mfn_frame_list[512];
|
|
+#else
|
|
+ *pfn_to_mfn_frame_list[128];
|
|
+#endif
|
|
+
|
|
+/* Raw start-of-day parameters from the hypervisor. */
|
|
+start_info_t *xen_start_info;
|
|
+EXPORT_SYMBOL(xen_start_info);
|
|
+#endif
|
|
+
|
|
+#ifndef ARCH_SETUP
|
|
+#define ARCH_SETUP
|
|
+#endif
|
|
|
|
-#ifdef CONFIG_X86_LOCAL_APIC
|
|
-unsigned int num_processors;
|
|
-unsigned disabled_cpus __cpuinitdata;
|
|
#ifndef CONFIG_XEN
|
|
-/* Processor that is doing the boot up */
|
|
-unsigned int boot_cpu_physical_apicid = -1U;
|
|
-EXPORT_SYMBOL(boot_cpu_physical_apicid);
|
|
+#ifndef CONFIG_DEBUG_BOOT_PARAMS
|
|
+struct boot_params __initdata boot_params;
|
|
+#else
|
|
+struct boot_params boot_params;
|
|
+#endif
|
|
+#endif
|
|
+
|
|
+/*
|
|
+ * Machine setup..
|
|
+ */
|
|
+static struct resource data_resource = {
|
|
+ .name = "Kernel data",
|
|
+ .start = 0,
|
|
+ .end = 0,
|
|
+ .flags = IORESOURCE_BUSY | IORESOURCE_MEM
|
|
+};
|
|
+
|
|
+static struct resource code_resource = {
|
|
+ .name = "Kernel code",
|
|
+ .start = 0,
|
|
+ .end = 0,
|
|
+ .flags = IORESOURCE_BUSY | IORESOURCE_MEM
|
|
+};
|
|
+
|
|
+static struct resource bss_resource = {
|
|
+ .name = "Kernel bss",
|
|
+ .start = 0,
|
|
+ .end = 0,
|
|
+ .flags = IORESOURCE_BUSY | IORESOURCE_MEM
|
|
+};
|
|
|
|
-DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID;
|
|
-EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
|
|
|
|
-/* Bitmask of physically existing CPUs */
|
|
-physid_mask_t phys_cpu_present_map;
|
|
+#ifdef CONFIG_X86_32
|
|
+#ifndef CONFIG_XEN
|
|
+/* This value is set up by the early boot code to point to the value
|
|
+ immediately after the boot time page tables. It contains a *physical*
|
|
+ address, and must not be in the .bss segment! */
|
|
+unsigned long init_pg_tables_start __initdata = ~0UL;
|
|
+unsigned long init_pg_tables_end __initdata = ~0UL;
|
|
#endif
|
|
+
|
|
+static struct resource video_ram_resource = {
|
|
+ .name = "Video RAM area",
|
|
+ .start = 0xa0000,
|
|
+ .end = 0xbffff,
|
|
+ .flags = IORESOURCE_BUSY | IORESOURCE_MEM
|
|
+};
|
|
+
|
|
+/* cpu data as detected by the assembly code in head.S */
|
|
+struct cpuinfo_x86 new_cpu_data __cpuinitdata = { .wp_works_ok = 1, .hard_math = 1 };
|
|
+/* common cpu data for all cpus */
|
|
+struct cpuinfo_x86 boot_cpu_data __read_mostly = { .wp_works_ok = 1, .hard_math = 1 };
|
|
+EXPORT_SYMBOL(boot_cpu_data);
|
|
+#ifndef CONFIG_XEN
|
|
+static void set_mca_bus(int x)
|
|
+{
|
|
+#ifdef CONFIG_MCA
|
|
+ MCA_bus = x;
|
|
+#endif
|
|
+}
|
|
+
|
|
+unsigned int def_to_bigsmp;
|
|
+
|
|
+/* for MCA, but anyone else can use it if they want */
|
|
+unsigned int machine_id;
|
|
+unsigned int machine_submodel_id;
|
|
+unsigned int BIOS_revision;
|
|
+
|
|
+struct apm_info apm_info;
|
|
+EXPORT_SYMBOL(apm_info);
|
|
+#endif
|
|
+
|
|
+#if defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)
|
|
+struct ist_info ist_info;
|
|
+EXPORT_SYMBOL(ist_info);
|
|
+#elif defined(CONFIG_X86_SPEEDSTEP_SMI)
|
|
+struct ist_info ist_info;
|
|
#endif
|
|
|
|
-#if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP)
|
|
+#else
|
|
+struct cpuinfo_x86 boot_cpu_data __read_mostly;
|
|
+EXPORT_SYMBOL(boot_cpu_data);
|
|
+#endif
|
|
+
|
|
+
|
|
+#if !defined(CONFIG_X86_PAE) || defined(CONFIG_X86_64)
|
|
+unsigned long mmu_cr4_features;
|
|
+#else
|
|
+unsigned long mmu_cr4_features = X86_CR4_PAE;
|
|
+#endif
|
|
+
|
|
+/* Boot loader ID as an integer, for the benefit of proc_dointvec */
|
|
+int bootloader_type;
|
|
+
|
|
/*
|
|
- * Copy data used in early init routines from the initial arrays to the
|
|
- * per cpu data areas. These arrays then become expendable and the
|
|
- * *_early_ptr's are zeroed indicating that the static arrays are gone.
|
|
+ * Early DMI memory
|
|
+ */
|
|
+int dmi_alloc_index;
|
|
+char dmi_alloc_data[DMI_MAX_DATA];
|
|
+
|
|
+/*
|
|
+ * Setup options
|
|
+ */
|
|
+struct screen_info screen_info;
|
|
+EXPORT_SYMBOL(screen_info);
|
|
+struct edid_info edid_info;
|
|
+EXPORT_SYMBOL_GPL(edid_info);
|
|
+
|
|
+extern int root_mountflags;
|
|
+
|
|
+unsigned long saved_video_mode;
|
|
+
|
|
+#define RAMDISK_IMAGE_START_MASK 0x07FF
|
|
+#define RAMDISK_PROMPT_FLAG 0x8000
|
|
+#define RAMDISK_LOAD_FLAG 0x4000
|
|
+
|
|
+static char __initdata command_line[COMMAND_LINE_SIZE];
|
|
+
|
|
+#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
|
|
+struct edd edd;
|
|
+#ifdef CONFIG_EDD_MODULE
|
|
+EXPORT_SYMBOL(edd);
|
|
+#endif
|
|
+#ifndef CONFIG_XEN
|
|
+/**
|
|
+ * copy_edd() - Copy the BIOS EDD information
|
|
+ * from boot_params into a safe place.
|
|
+ *
|
|
*/
|
|
-static void __init setup_per_cpu_maps(void)
|
|
+static inline void copy_edd(void)
|
|
+{
|
|
+ memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer,
|
|
+ sizeof(edd.mbr_signature));
|
|
+ memcpy(edd.edd_info, boot_params.eddbuf, sizeof(edd.edd_info));
|
|
+ edd.mbr_signature_nr = boot_params.edd_mbr_sig_buf_entries;
|
|
+ edd.edd_info_nr = boot_params.eddbuf_entries;
|
|
+}
|
|
+#endif
|
|
+#else
|
|
+static inline void copy_edd(void)
|
|
+{
|
|
+}
|
|
+#endif
|
|
+
|
|
+#ifdef CONFIG_BLK_DEV_INITRD
|
|
+
|
|
+#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
|
|
+
|
|
+#define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT)
|
|
+static void __init relocate_initrd(void)
|
|
+{
|
|
+
|
|
+ u64 ramdisk_image = boot_params.hdr.ramdisk_image;
|
|
+ u64 ramdisk_size = boot_params.hdr.ramdisk_size;
|
|
+ u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT;
|
|
+ u64 ramdisk_here;
|
|
+ unsigned long slop, clen, mapaddr;
|
|
+ char *p, *q;
|
|
+
|
|
+ /* We need to move the initrd down into lowmem */
|
|
+ ramdisk_here = find_e820_area(0, end_of_lowmem, ramdisk_size,
|
|
+ PAGE_SIZE);
|
|
+
|
|
+ if (ramdisk_here == -1ULL)
|
|
+ panic("Cannot find place for new RAMDISK of size %lld\n",
|
|
+ ramdisk_size);
|
|
+
|
|
+ /* Note: this includes all the lowmem currently occupied by
|
|
+ the initrd, we rely on that fact to keep the data intact. */
|
|
+ reserve_early(ramdisk_here, ramdisk_here + ramdisk_size,
|
|
+ "NEW RAMDISK");
|
|
+ initrd_start = ramdisk_here + PAGE_OFFSET;
|
|
+ initrd_end = initrd_start + ramdisk_size;
|
|
+ printk(KERN_INFO "Allocated new RAMDISK: %08llx - %08llx\n",
|
|
+ ramdisk_here, ramdisk_here + ramdisk_size);
|
|
+
|
|
+ q = (char *)initrd_start;
|
|
+
|
|
+ /* Copy any lowmem portion of the initrd */
|
|
+ if (ramdisk_image < end_of_lowmem) {
|
|
+ clen = end_of_lowmem - ramdisk_image;
|
|
+ p = (char *)__va(ramdisk_image);
|
|
+ memcpy(q, p, clen);
|
|
+ q += clen;
|
|
+ ramdisk_image += clen;
|
|
+ ramdisk_size -= clen;
|
|
+ }
|
|
+
|
|
+ /* Copy the highmem portion of the initrd */
|
|
+ while (ramdisk_size) {
|
|
+ slop = ramdisk_image & ~PAGE_MASK;
|
|
+ clen = ramdisk_size;
|
|
+ if (clen > MAX_MAP_CHUNK-slop)
|
|
+ clen = MAX_MAP_CHUNK-slop;
|
|
+ mapaddr = ramdisk_image & PAGE_MASK;
|
|
+ p = early_ioremap(mapaddr, clen+slop);
|
|
+ memcpy(q, p+slop, clen);
|
|
+ early_iounmap(p, clen+slop);
|
|
+ q += clen;
|
|
+ ramdisk_image += clen;
|
|
+ ramdisk_size -= clen;
|
|
+ }
|
|
+ /* high pages is not converted by early_res_to_bootmem */
|
|
+ ramdisk_image = boot_params.hdr.ramdisk_image;
|
|
+ ramdisk_size = boot_params.hdr.ramdisk_size;
|
|
+ printk(KERN_INFO "Move RAMDISK from %016llx - %016llx to"
|
|
+ " %08llx - %08llx\n",
|
|
+ ramdisk_image, ramdisk_image + ramdisk_size - 1,
|
|
+ ramdisk_here, ramdisk_here + ramdisk_size - 1);
|
|
+}
|
|
+#endif
|
|
+
|
|
+static void __init reserve_initrd(void)
|
|
{
|
|
#ifndef CONFIG_XEN
|
|
- int cpu;
|
|
+ u64 ramdisk_image = boot_params.hdr.ramdisk_image;
|
|
+ u64 ramdisk_size = boot_params.hdr.ramdisk_size;
|
|
+ u64 ramdisk_end = ramdisk_image + ramdisk_size;
|
|
+ u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT;
|
|
|
|
- for_each_possible_cpu(cpu) {
|
|
- per_cpu(x86_cpu_to_apicid, cpu) = x86_cpu_to_apicid_init[cpu];
|
|
- per_cpu(x86_bios_cpu_apicid, cpu) =
|
|
- x86_bios_cpu_apicid_init[cpu];
|
|
-#ifdef CONFIG_NUMA
|
|
- per_cpu(x86_cpu_to_node_map, cpu) =
|
|
- x86_cpu_to_node_map_init[cpu];
|
|
+ if (!boot_params.hdr.type_of_loader ||
|
|
+ !ramdisk_image || !ramdisk_size)
|
|
+ return; /* No initrd provided by bootloader */
|
|
+#else
|
|
+ unsigned long ramdisk_image = __pa(xen_start_info->mod_start);
|
|
+ unsigned long ramdisk_size = xen_start_info->mod_len;
|
|
+ unsigned long ramdisk_end = ramdisk_image + ramdisk_size;
|
|
+ unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT;
|
|
+
|
|
+ if (!xen_start_info->mod_start || !ramdisk_size)
|
|
+ return; /* No initrd provided by bootloader */
|
|
#endif
|
|
+
|
|
+ initrd_start = 0;
|
|
+
|
|
+ if (ramdisk_size >= (end_of_lowmem>>1)) {
|
|
+ free_early(ramdisk_image, ramdisk_end);
|
|
+ printk(KERN_ERR "initrd too large to handle, "
|
|
+ "disabling initrd\n");
|
|
+ return;
|
|
}
|
|
|
|
- /* indicate the early static arrays will soon be gone */
|
|
- x86_cpu_to_apicid_early_ptr = NULL;
|
|
- x86_bios_cpu_apicid_early_ptr = NULL;
|
|
-#ifdef CONFIG_NUMA
|
|
- x86_cpu_to_node_map_early_ptr = NULL;
|
|
+ printk(KERN_INFO "RAMDISK: %08lx - %08lx\n", ramdisk_image,
|
|
+ ramdisk_end);
|
|
+
|
|
+
|
|
+ if (ramdisk_end <= end_of_lowmem) {
|
|
+ /* All in lowmem, easy case */
|
|
+ /*
|
|
+ * don't need to reserve again, already reserved early
|
|
+ * in i386_start_kernel
|
|
+ */
|
|
+ initrd_start = ramdisk_image + PAGE_OFFSET;
|
|
+ initrd_end = initrd_start + ramdisk_size;
|
|
+#ifdef CONFIG_X86_64_XEN
|
|
+ initrd_below_start_ok = 1;
|
|
#endif
|
|
+ return;
|
|
+ }
|
|
+
|
|
+#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
|
|
+ relocate_initrd();
|
|
+#else
|
|
+ printk(KERN_ERR "initrd extends beyond end of memory "
|
|
+ "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
|
|
+ ramdisk_end, end_of_lowmem);
|
|
+ initrd_start = 0;
|
|
#endif
|
|
+ free_early(ramdisk_image, ramdisk_end);
|
|
+}
|
|
+#else
|
|
+static void __init reserve_initrd(void)
|
|
+{
|
|
}
|
|
+#endif /* CONFIG_BLK_DEV_INITRD */
|
|
|
|
-#ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP
|
|
-cpumask_t *cpumask_of_cpu_map __read_mostly;
|
|
-EXPORT_SYMBOL(cpumask_of_cpu_map);
|
|
+static void __init parse_setup_data(void)
|
|
+{
|
|
+#ifndef CONFIG_XEN
|
|
+ struct setup_data *data;
|
|
+ u64 pa_data;
|
|
+
|
|
+ if (boot_params.hdr.version < 0x0209)
|
|
+ return;
|
|
+ pa_data = boot_params.hdr.setup_data;
|
|
+ while (pa_data) {
|
|
+ data = early_ioremap(pa_data, PAGE_SIZE);
|
|
+ switch (data->type) {
|
|
+ case SETUP_E820_EXT:
|
|
+ parse_e820_ext(data, pa_data);
|
|
+ break;
|
|
+ default:
|
|
+ break;
|
|
+ }
|
|
+ pa_data = data->next;
|
|
+ early_iounmap(data, PAGE_SIZE);
|
|
+ }
|
|
+#endif
|
|
+}
|
|
|
|
-/* requires nr_cpu_ids to be initialized */
|
|
-static void __init setup_cpumask_of_cpu(void)
|
|
+static void __init e820_reserve_setup_data(void)
|
|
{
|
|
- int i;
|
|
+#ifndef CONFIG_XEN
|
|
+ struct setup_data *data;
|
|
+ u64 pa_data;
|
|
+ int found = 0;
|
|
|
|
- /* alloc_bootmem zeroes memory */
|
|
- cpumask_of_cpu_map = alloc_bootmem_low(sizeof(cpumask_t) * nr_cpu_ids);
|
|
- for (i = 0; i < nr_cpu_ids; i++)
|
|
- cpu_set(i, cpumask_of_cpu_map[i]);
|
|
+ if (boot_params.hdr.version < 0x0209)
|
|
+ return;
|
|
+ pa_data = boot_params.hdr.setup_data;
|
|
+ while (pa_data) {
|
|
+ data = early_ioremap(pa_data, sizeof(*data));
|
|
+ e820_update_range(pa_data, sizeof(*data)+data->len,
|
|
+ E820_RAM, E820_RESERVED_KERN);
|
|
+ found = 1;
|
|
+ pa_data = data->next;
|
|
+ early_iounmap(data, sizeof(*data));
|
|
+ }
|
|
+ if (!found)
|
|
+ return;
|
|
+
|
|
+ sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
|
|
+ memcpy(&e820_saved, &e820, sizeof(struct e820map));
|
|
+ printk(KERN_INFO "extended physical RAM map:\n");
|
|
+ e820_print_map("reserve setup_data");
|
|
+#endif
|
|
}
|
|
-#else
|
|
-static inline void setup_cpumask_of_cpu(void) { }
|
|
+
|
|
+static void __init reserve_early_setup_data(void)
|
|
+{
|
|
+#ifndef CONFIG_XEN
|
|
+ struct setup_data *data;
|
|
+ u64 pa_data;
|
|
+ char buf[32];
|
|
+
|
|
+ if (boot_params.hdr.version < 0x0209)
|
|
+ return;
|
|
+ pa_data = boot_params.hdr.setup_data;
|
|
+ while (pa_data) {
|
|
+ data = early_ioremap(pa_data, sizeof(*data));
|
|
+ sprintf(buf, "setup data %x", data->type);
|
|
+ reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf);
|
|
+ pa_data = data->next;
|
|
+ early_iounmap(data, sizeof(*data));
|
|
+ }
|
|
#endif
|
|
+}
|
|
|
|
-#ifdef CONFIG_X86_32
|
|
/*
|
|
- * Great future not-so-futuristic plan: make i386 and x86_64 do it
|
|
- * the same way
|
|
+ * --------- Crashkernel reservation ------------------------------
|
|
*/
|
|
-unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
|
|
-EXPORT_SYMBOL(__per_cpu_offset);
|
|
+
|
|
+#ifdef CONFIG_KEXEC
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+/**
|
|
+ * Reserve @size bytes of crashkernel memory at any suitable offset.
|
|
+ *
|
|
+ * @size: Size of the crashkernel memory to reserve.
|
|
+ * Returns the base address on success, and -1ULL on failure.
|
|
+ */
|
|
+unsigned long long __init find_and_reserve_crashkernel(unsigned long long size)
|
|
+{
|
|
+ const unsigned long long alignment = 16<<20; /* 16M */
|
|
+ unsigned long long start = 0LL;
|
|
+
|
|
+ while (1) {
|
|
+ int ret;
|
|
+
|
|
+ start = find_e820_area(start, ULONG_MAX, size, alignment);
|
|
+ if (start == -1ULL)
|
|
+ return start;
|
|
+
|
|
+ /* try to reserve it */
|
|
+ ret = reserve_bootmem_generic(start, size, BOOTMEM_EXCLUSIVE);
|
|
+ if (ret >= 0)
|
|
+ return start;
|
|
+
|
|
+ start += alignment;
|
|
+ }
|
|
+}
|
|
+
|
|
+static inline unsigned long long get_total_mem(void)
|
|
+{
|
|
+ unsigned long long total;
|
|
+
|
|
+ total = max_low_pfn - min_low_pfn;
|
|
+#ifdef CONFIG_HIGHMEM
|
|
+ total += highend_pfn - highstart_pfn;
|
|
#endif
|
|
|
|
+ return total << PAGE_SHIFT;
|
|
+}
|
|
+
|
|
+static void __init reserve_crashkernel(void)
|
|
+{
|
|
+ unsigned long long total_mem;
|
|
+ unsigned long long crash_size, crash_base;
|
|
+ int ret;
|
|
+
|
|
+ total_mem = get_total_mem();
|
|
+
|
|
+ ret = parse_crashkernel(boot_command_line, total_mem,
|
|
+ &crash_size, &crash_base);
|
|
+ if (ret != 0 || crash_size <= 0)
|
|
+ return;
|
|
+
|
|
+ /* 0 means: find the address automatically */
|
|
+ if (crash_base <= 0) {
|
|
+ crash_base = find_and_reserve_crashkernel(crash_size);
|
|
+ if (crash_base == -1ULL) {
|
|
+ pr_info("crashkernel reservation failed. "
|
|
+ "No suitable area found.\n");
|
|
+ return;
|
|
+ }
|
|
+ } else {
|
|
+ ret = reserve_bootmem_generic(crash_base, crash_size,
|
|
+ BOOTMEM_EXCLUSIVE);
|
|
+ if (ret < 0) {
|
|
+ pr_info("crashkernel reservation failed - "
|
|
+ "memory is in use\n");
|
|
+ return;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
|
|
+ "for crashkernel (System RAM: %ldMB)\n",
|
|
+ (unsigned long)(crash_size >> 20),
|
|
+ (unsigned long)(crash_base >> 20),
|
|
+ (unsigned long)(total_mem >> 20));
|
|
+
|
|
+ crashk_res.start = crash_base;
|
|
+ crashk_res.end = crash_base + crash_size - 1;
|
|
+ insert_resource(&iomem_resource, &crashk_res);
|
|
+}
|
|
+#else
|
|
+#define reserve_crashkernel xen_machine_kexec_setup_resources
|
|
+#endif
|
|
+#else
|
|
+static void __init reserve_crashkernel(void)
|
|
+{
|
|
+}
|
|
+#endif
|
|
+
|
|
+static struct resource standard_io_resources[] = {
|
|
+ { .name = "dma1", .start = 0x00, .end = 0x1f,
|
|
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO },
|
|
+ { .name = "pic1", .start = 0x20, .end = 0x21,
|
|
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO },
|
|
+ { .name = "timer0", .start = 0x40, .end = 0x43,
|
|
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO },
|
|
+ { .name = "timer1", .start = 0x50, .end = 0x53,
|
|
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO },
|
|
+ { .name = "keyboard", .start = 0x60, .end = 0x60,
|
|
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO },
|
|
+ { .name = "keyboard", .start = 0x64, .end = 0x64,
|
|
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO },
|
|
+ { .name = "dma page reg", .start = 0x80, .end = 0x8f,
|
|
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO },
|
|
+ { .name = "pic2", .start = 0xa0, .end = 0xa1,
|
|
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO },
|
|
+ { .name = "dma2", .start = 0xc0, .end = 0xdf,
|
|
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO },
|
|
+ { .name = "fpu", .start = 0xf0, .end = 0xff,
|
|
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO }
|
|
+};
|
|
+
|
|
+static void __init reserve_standard_io_resources(void)
|
|
+{
|
|
+ int i;
|
|
+
|
|
+ /* Nothing to do if not running in dom0. */
|
|
+ if (!is_initial_xendomain())
|
|
+ return;
|
|
+
|
|
+ /* request I/O space for devices used on all i[345]86 PCs */
|
|
+ for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
|
|
+ request_resource(&ioport_resource, &standard_io_resources[i]);
|
|
+
|
|
+}
|
|
+
|
|
+#ifdef CONFIG_PROC_VMCORE
|
|
+/* elfcorehdr= specifies the location of elf core header
|
|
+ * stored by the crashed kernel. This option will be passed
|
|
+ * by kexec loader to the capture kernel.
|
|
+ */
|
|
+static int __init setup_elfcorehdr(char *arg)
|
|
+{
|
|
+ char *end;
|
|
+ if (!arg)
|
|
+ return -EINVAL;
|
|
+ elfcorehdr_addr = memparse(arg, &end);
|
|
+ return end > arg ? 0 : -EINVAL;
|
|
+}
|
|
+early_param("elfcorehdr", setup_elfcorehdr);
|
|
+#endif
|
|
+
|
|
+static struct x86_quirks default_x86_quirks __initdata;
|
|
+
|
|
+struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
|
|
+
|
|
/*
|
|
- * Great future plan:
|
|
- * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
|
|
- * Always point %gs to its beginning
|
|
+ * Determine if we were loaded by an EFI loader. If so, then we have also been
|
|
+ * passed the efi memmap, systab, etc., so we should use these data structures
|
|
+ * for initialization. Note, the efi init code path is determined by the
|
|
+ * global efi_enabled. This allows the same kernel image to be used on existing
|
|
+ * systems (with a traditional BIOS) as well as on EFI systems.
|
|
*/
|
|
-void __init setup_per_cpu_areas(void)
|
|
+/*
|
|
+ * setup_arch - architecture-specific boot-time initializations
|
|
+ *
|
|
+ * Note: On x86_64, fixmaps are ready for use even before this is called.
|
|
+ */
|
|
+
|
|
+void __init setup_arch(char **cmdline_p)
|
|
{
|
|
- int i, highest_cpu = 0;
|
|
- unsigned long size;
|
|
+#ifdef CONFIG_XEN
|
|
+ unsigned int i;
|
|
+ unsigned long p2m_pages;
|
|
+ struct physdev_set_iopl set_iopl;
|
|
|
|
-#ifdef CONFIG_HOTPLUG_CPU
|
|
- prefill_possible_map();
|
|
+#ifdef CONFIG_X86_32
|
|
+ /* Force a quick death if the kernel panics (not domain 0). */
|
|
+ extern int panic_timeout;
|
|
+ if (!panic_timeout && !is_initial_xendomain())
|
|
+ panic_timeout = 1;
|
|
#endif
|
|
|
|
- /* Copy section for each CPU (we discard the original) */
|
|
- size = PERCPU_ENOUGH_ROOM;
|
|
- printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n",
|
|
- size);
|
|
-
|
|
- for_each_possible_cpu(i) {
|
|
- char *ptr;
|
|
-#ifndef CONFIG_NEED_MULTIPLE_NODES
|
|
- ptr = alloc_bootmem_pages(size);
|
|
-#else
|
|
- int node = early_cpu_to_node(i);
|
|
- if (!node_online(node) || !NODE_DATA(node)) {
|
|
- ptr = alloc_bootmem_pages(size);
|
|
- printk(KERN_INFO
|
|
- "cpu %d has no node or node-local memory\n", i);
|
|
- }
|
|
- else
|
|
- ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
|
|
+ /* Register a call for panic conditions. */
|
|
+ atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block);
|
|
+
|
|
+ WARN_ON(HYPERVISOR_vm_assist(VMASST_CMD_enable,
|
|
+ VMASST_TYPE_writable_pagetables));
|
|
+#ifdef CONFIG_X86_32
|
|
+ WARN_ON(HYPERVISOR_vm_assist(VMASST_CMD_enable,
|
|
+ VMASST_TYPE_4gb_segments));
|
|
+#endif
|
|
+ set_iopl.iopl = 1;
|
|
+ WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl));
|
|
+#endif /* CONFIG_XEN */
|
|
+
|
|
+#ifdef CONFIG_X86_32
|
|
+ memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
|
|
+ visws_early_detect();
|
|
+ pre_setup_arch_hook();
|
|
+#else
|
|
+ printk(KERN_INFO "Command line: %s\n", boot_command_line);
|
|
+#endif
|
|
+
|
|
+ early_cpu_init();
|
|
+ early_ioremap_init();
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+ ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev);
|
|
+ screen_info = boot_params.screen_info;
|
|
+ edid_info = boot_params.edid_info;
|
|
+#ifdef CONFIG_X86_32
|
|
+ apm_info.bios = boot_params.apm_bios_info;
|
|
+ ist_info = boot_params.ist_info;
|
|
+ if (boot_params.sys_desc_table.length != 0) {
|
|
+ set_mca_bus(boot_params.sys_desc_table.table[3] & 0x2);
|
|
+ machine_id = boot_params.sys_desc_table.table[0];
|
|
+ machine_submodel_id = boot_params.sys_desc_table.table[1];
|
|
+ BIOS_revision = boot_params.sys_desc_table.table[2];
|
|
+ }
|
|
+#endif
|
|
+ saved_video_mode = boot_params.hdr.vid_mode;
|
|
+ bootloader_type = boot_params.hdr.type_of_loader;
|
|
+
|
|
+#ifdef CONFIG_BLK_DEV_RAM
|
|
+ rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK;
|
|
+ rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0);
|
|
+ rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0);
|
|
+#endif
|
|
+#ifdef CONFIG_EFI
|
|
+ if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
|
|
+#ifdef CONFIG_X86_32
|
|
+ "EL32",
|
|
+#else
|
|
+ "EL64",
|
|
#endif
|
|
- if (!ptr)
|
|
- panic("Cannot allocate cpu data for CPU %d\n", i);
|
|
+ 4)) {
|
|
+ efi_enabled = 1;
|
|
+ efi_reserve_early();
|
|
+ }
|
|
+#endif
|
|
+#else /* CONFIG_XEN */
|
|
+#ifdef CONFIG_X86_32
|
|
+ /* This must be initialized to UNNAMED_MAJOR for ipconfig to work
|
|
+ properly. Setting ROOT_DEV to default to /dev/ram0 breaks initrd.
|
|
+ */
|
|
+ ROOT_DEV = MKDEV(UNNAMED_MAJOR,0);
|
|
+#else
|
|
+ ROOT_DEV = MKDEV(RAMDISK_MAJOR,0);
|
|
+#endif
|
|
+ if (is_initial_xendomain()) {
|
|
+ const struct dom0_vga_console_info *info =
|
|
+ (void *)((char *)xen_start_info +
|
|
+ xen_start_info->console.dom0.info_off);
|
|
+
|
|
+ dom0_init_screen_info(info,
|
|
+ xen_start_info->console.dom0.info_size);
|
|
+ xen_start_info->console.domU.mfn = 0;
|
|
+ xen_start_info->console.domU.evtchn = 0;
|
|
+ } else
|
|
+ screen_info.orig_video_isVGA = 0;
|
|
+ copy_edid();
|
|
+#endif /* CONFIG_XEN */
|
|
+
|
|
+ ARCH_SETUP
|
|
+
|
|
+ setup_memory_map();
|
|
+ parse_setup_data();
|
|
+ /* update the e820_saved too */
|
|
+ e820_reserve_setup_data();
|
|
+
|
|
+ copy_edd();
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+ if (!boot_params.hdr.root_flags)
|
|
+ root_mountflags &= ~MS_RDONLY;
|
|
+#endif
|
|
+ init_mm.start_code = (unsigned long) _text;
|
|
+ init_mm.end_code = (unsigned long) _etext;
|
|
+ init_mm.end_data = (unsigned long) _edata;
|
|
+#ifdef CONFIG_X86_32
|
|
+#ifndef CONFIG_XEN
|
|
+ init_mm.brk = init_pg_tables_end + PAGE_OFFSET;
|
|
+#else
|
|
+ init_mm.brk = (PFN_UP(__pa(xen_start_info->pt_base)) +
|
|
+ xen_start_info->nr_pt_frames) << PAGE_SHIFT;
|
|
+#endif
|
|
+#else
|
|
+ init_mm.brk = (unsigned long) &_end;
|
|
+#endif
|
|
+
|
|
+ code_resource.start = virt_to_phys(_text);
|
|
+ code_resource.end = virt_to_phys(_etext)-1;
|
|
+ data_resource.start = virt_to_phys(_etext);
|
|
+ data_resource.end = virt_to_phys(_edata)-1;
|
|
+ bss_resource.start = virt_to_phys(&__bss_start);
|
|
+ bss_resource.end = virt_to_phys(&__bss_stop)-1;
|
|
+
|
|
+ strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
|
|
+ *cmdline_p = command_line;
|
|
+
|
|
+ parse_early_param();
|
|
+
|
|
#ifdef CONFIG_X86_64
|
|
- cpu_pda(i)->data_offset = ptr - __per_cpu_start;
|
|
+ check_efer();
|
|
+#endif
|
|
+
|
|
+#if defined(CONFIG_VMI) && defined(CONFIG_X86_32)
|
|
+ /*
|
|
+ * Must be before kernel pagetables are setup
|
|
+ * or fixmap area is touched.
|
|
+ */
|
|
+ vmi_init();
|
|
+#endif
|
|
+
|
|
+ /* after early param, so could get panic from serial */
|
|
+ reserve_early_setup_data();
|
|
+
|
|
+ if (acpi_mps_check()) {
|
|
+#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_XEN)
|
|
+ disable_apic = 1;
|
|
+#endif
|
|
+ setup_clear_cpu_cap(X86_FEATURE_APIC);
|
|
+ }
|
|
+
|
|
+#ifdef CONFIG_PCI
|
|
+ if (pci_early_dump_regs)
|
|
+ early_dump_pci_devices();
|
|
+#endif
|
|
+
|
|
+ finish_e820_parsing();
|
|
+
|
|
+#ifdef CONFIG_X86_32
|
|
+ if (is_initial_xendomain())
|
|
+ probe_roms();
|
|
+#endif
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+ /* after parse_early_param, so could debug it */
|
|
+ insert_resource(&iomem_resource, &code_resource);
|
|
+ insert_resource(&iomem_resource, &data_resource);
|
|
+ insert_resource(&iomem_resource, &bss_resource);
|
|
+
|
|
+ if (efi_enabled)
|
|
+ efi_init();
|
|
+
|
|
+#ifdef CONFIG_X86_32
|
|
+ if (ppro_with_ram_bug()) {
|
|
+ e820_update_range(0x70000000ULL, 0x40000ULL, E820_RAM,
|
|
+ E820_RESERVED);
|
|
+ sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
|
|
+ printk(KERN_INFO "fixed physical RAM map:\n");
|
|
+ e820_print_map("bad_ppro");
|
|
+ }
|
|
#else
|
|
- __per_cpu_offset[i] = ptr - __per_cpu_start;
|
|
+ early_gart_iommu_check();
|
|
#endif
|
|
- memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
|
|
+#endif /* CONFIG_XEN */
|
|
|
|
- highest_cpu = i;
|
|
+ /*
|
|
+ * partially used pages are not usable - thus
|
|
+ * we are rounding upwards:
|
|
+ */
|
|
+ max_pfn = e820_end_of_ram_pfn();
|
|
+
|
|
+ /* preallocate 4k for mptable mpc */
|
|
+ early_reserve_e820_mpc_new();
|
|
+ /* update e820 for memory not covered by WB MTRRs */
|
|
+ mtrr_bp_init();
|
|
+#ifndef CONFIG_XEN
|
|
+ if (mtrr_trim_uncached_memory(max_pfn))
|
|
+ max_pfn = e820_end_of_ram_pfn();
|
|
+#endif
|
|
+
|
|
+#ifdef CONFIG_X86_32
|
|
+ /* max_low_pfn get updated here */
|
|
+ find_low_pfn_range();
|
|
+#else
|
|
+ num_physpages = max_pfn;
|
|
+ max_mapnr = max_pfn;
|
|
+
|
|
+
|
|
+ /* How many end-of-memory variables you have, grandma! */
|
|
+ /* need this before calling reserve_initrd */
|
|
+ if (max_pfn > (1UL<<(32 - PAGE_SHIFT)))
|
|
+ max_low_pfn = e820_end_of_low_ram_pfn();
|
|
+ else
|
|
+ max_low_pfn = max_pfn;
|
|
+
|
|
+ high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
|
|
+#endif
|
|
+
|
|
+ /* max_pfn_mapped is updated here */
|
|
+ max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
|
|
+ max_pfn_mapped = max_low_pfn_mapped;
|
|
+
|
|
+#ifdef CONFIG_X86_64
|
|
+ if (max_pfn > max_low_pfn) {
|
|
+ max_pfn_mapped = init_memory_mapping(1UL<<32,
|
|
+ max_pfn<<PAGE_SHIFT);
|
|
+ /* can we preseve max_low_pfn ?*/
|
|
+ max_low_pfn = max_pfn;
|
|
}
|
|
+#endif
|
|
|
|
- nr_cpu_ids = highest_cpu + 1;
|
|
- printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d\n", NR_CPUS, nr_cpu_ids);
|
|
+ /*
|
|
+ * NOTE: On x86-32, only from this point on, fixmaps are ready for use.
|
|
+ */
|
|
|
|
- /* Setup percpu data maps */
|
|
- setup_per_cpu_maps();
|
|
+#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
|
|
+ if (init_ohci1394_dma_early)
|
|
+ init_ohci1394_dma_on_all_controllers();
|
|
+#endif
|
|
|
|
- /* Setup cpumask_of_cpu map */
|
|
- setup_cpumask_of_cpu();
|
|
-}
|
|
+ reserve_initrd();
|
|
+
|
|
+#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
|
|
+ vsmp_init();
|
|
+#endif
|
|
+
|
|
+ if (is_initial_xendomain())
|
|
+ dmi_scan_machine();
|
|
+
|
|
+ io_delay_init();
|
|
+
|
|
+#ifdef CONFIG_ACPI
|
|
+ if (!is_initial_xendomain()) {
|
|
+ printk(KERN_INFO "ACPI in unprivileged domain disabled\n");
|
|
+ disable_acpi();
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ /*
|
|
+ * Parse the ACPI tables for possible boot-time SMP configuration.
|
|
+ */
|
|
+ acpi_boot_table_init();
|
|
+
|
|
+#ifdef CONFIG_ACPI_NUMA
|
|
+ /*
|
|
+ * Parse SRAT to discover nodes.
|
|
+ */
|
|
+ acpi_numa_init();
|
|
+#endif
|
|
+
|
|
+ initmem_init(0, max_pfn);
|
|
+
|
|
+#ifdef CONFIG_ACPI_SLEEP
|
|
+ /*
|
|
+ * Reserve low memory region for sleep support.
|
|
+ */
|
|
+ acpi_reserve_bootmem();
|
|
+#endif
|
|
+#ifdef CONFIG_X86_FIND_SMP_CONFIG
|
|
+ /*
|
|
+ * Find and reserve possible boot-time SMP configuration:
|
|
+ */
|
|
+ find_smp_config();
|
|
+#endif
|
|
+ reserve_crashkernel();
|
|
+
|
|
+#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
|
|
+ /*
|
|
+ * dma32_reserve_bootmem() allocates bootmem which may conflict
|
|
+ * with the crashkernel command line, so do that after
|
|
+ * reserve_crashkernel()
|
|
+ */
|
|
+ dma32_reserve_bootmem();
|
|
+#endif
|
|
+
|
|
+ reserve_ibft_region();
|
|
+
|
|
+#ifdef CONFIG_KVM_CLOCK
|
|
+ kvmclock_init();
|
|
+#endif
|
|
+
|
|
+ xen_pagetable_setup_start(swapper_pg_dir);
|
|
+ paging_init();
|
|
+ xen_pagetable_setup_done(swapper_pg_dir);
|
|
+ paravirt_post_allocator_init();
|
|
+
|
|
+#ifdef CONFIG_X86_64
|
|
+ map_vsyscall();
|
|
+#endif
|
|
+
|
|
+#ifdef CONFIG_XEN
|
|
+ p2m_pages = max_pfn;
|
|
+ if (xen_start_info->nr_pages > max_pfn) {
|
|
+ /*
|
|
+ * the max_pfn was shrunk (probably by mem= or highmem=
|
|
+ * kernel parameter); shrink reservation with the HV
|
|
+ */
|
|
+ struct xen_memory_reservation reservation = {
|
|
+ .address_bits = 0,
|
|
+ .extent_order = 0,
|
|
+ .domid = DOMID_SELF
|
|
+ };
|
|
+ unsigned int difference;
|
|
+ int ret;
|
|
+
|
|
+ difference = xen_start_info->nr_pages - max_pfn;
|
|
|
|
+ set_xen_guest_handle(reservation.extent_start,
|
|
+ ((unsigned long *)xen_start_info->mfn_list) + max_pfn);
|
|
+ reservation.nr_extents = difference;
|
|
+ ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
|
|
+ &reservation);
|
|
+ BUG_ON(ret != difference);
|
|
+ }
|
|
+ else if (max_pfn > xen_start_info->nr_pages)
|
|
+ p2m_pages = xen_start_info->nr_pages;
|
|
+
|
|
+ if (!xen_feature(XENFEAT_auto_translated_physmap)) {
|
|
+ unsigned long i, j;
|
|
+ unsigned int k, fpp;
|
|
+
|
|
+ /* Make sure we have a large enough P->M table. */
|
|
+ phys_to_machine_mapping = alloc_bootmem_pages(
|
|
+ max_pfn * sizeof(unsigned long));
|
|
+ memset(phys_to_machine_mapping, ~0,
|
|
+ max_pfn * sizeof(unsigned long));
|
|
+ memcpy(phys_to_machine_mapping,
|
|
+ (unsigned long *)xen_start_info->mfn_list,
|
|
+ p2m_pages * sizeof(unsigned long));
|
|
+ free_bootmem(
|
|
+ __pa(xen_start_info->mfn_list),
|
|
+ PFN_PHYS(PFN_UP(xen_start_info->nr_pages *
|
|
+ sizeof(unsigned long))));
|
|
+
|
|
+ /*
|
|
+ * Initialise the list of the frames that specify the list of
|
|
+ * frames that make up the p2m table. Used by save/restore.
|
|
+ */
|
|
+ pfn_to_mfn_frame_list_list = alloc_bootmem_pages(PAGE_SIZE);
|
|
+
|
|
+ fpp = PAGE_SIZE/sizeof(unsigned long);
|
|
+ for (i = j = 0, k = -1; i < max_pfn; i += fpp, j++) {
|
|
+ if (j == fpp)
|
|
+ j = 0;
|
|
+ if (j == 0) {
|
|
+ k++;
|
|
+ BUG_ON(k>=ARRAY_SIZE(pfn_to_mfn_frame_list));
|
|
+ pfn_to_mfn_frame_list[k] =
|
|
+ alloc_bootmem_pages(PAGE_SIZE);
|
|
+ pfn_to_mfn_frame_list_list[k] =
|
|
+ virt_to_mfn(pfn_to_mfn_frame_list[k]);
|
|
+ }
|
|
+ pfn_to_mfn_frame_list[k][j] =
|
|
+ virt_to_mfn(&phys_to_machine_mapping[i]);
|
|
+ }
|
|
+ HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
|
|
+ HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
|
|
+ virt_to_mfn(pfn_to_mfn_frame_list_list);
|
|
+ }
|
|
+
|
|
+ /* Mark all ISA DMA channels in-use - using them wouldn't work. */
|
|
+ for (i = 0; i < MAX_DMA_CHANNELS; ++i)
|
|
+ if (i != 4 && request_dma(i, "xen") != 0)
|
|
+ BUG();
|
|
+#endif /* CONFIG_XEN */
|
|
+
|
|
+#ifdef CONFIG_X86_GENERICARCH
|
|
+ generic_apic_probe();
|
|
#endif
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+ early_quirks();
|
|
+#endif
|
|
+
|
|
+ /*
|
|
+ * Read APIC and some other early information from ACPI tables.
|
|
+ */
|
|
+ acpi_boot_init();
|
|
+
|
|
+#if defined(CONFIG_X86_MPPARSE) || defined(CONFIG_X86_VISWS)
|
|
+ /*
|
|
+ * get boot-time SMP configuration:
|
|
+ */
|
|
+ if (smp_found_config)
|
|
+ get_smp_config();
|
|
+#endif
|
|
+
|
|
+ prefill_possible_map();
|
|
+#ifdef CONFIG_X86_64
|
|
+ init_cpu_to_node();
|
|
+#endif
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+ init_apic_mappings();
|
|
+ ioapic_init_mappings();
|
|
+
|
|
+ kvm_guest_init();
|
|
+
|
|
+ e820_reserve_resources();
|
|
+ e820_mark_nosave_regions(max_low_pfn);
|
|
+#else
|
|
+ if (is_initial_xendomain())
|
|
+ e820_reserve_resources();
|
|
+#endif
|
|
+
|
|
+#ifdef CONFIG_X86_32
|
|
+ if (is_initial_xendomain())
|
|
+ request_resource(&iomem_resource, &video_ram_resource);
|
|
+#endif
|
|
+ reserve_standard_io_resources();
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+ e820_setup_gap();
|
|
+
|
|
+#ifdef CONFIG_VT
|
|
+#if defined(CONFIG_VGA_CONSOLE)
|
|
+ if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
|
|
+ conswitchp = &vga_con;
|
|
+#elif defined(CONFIG_DUMMY_CONSOLE)
|
|
+ conswitchp = &dummy_con;
|
|
+#endif
|
|
+#endif
|
|
+#else /* CONFIG_XEN */
|
|
+ if (is_initial_xendomain())
|
|
+ e820_setup_gap();
|
|
+
|
|
+#ifdef CONFIG_VT
|
|
+#ifdef CONFIG_DUMMY_CONSOLE
|
|
+ conswitchp = &dummy_con;
|
|
+#endif
|
|
+#ifdef CONFIG_VGA_CONSOLE
|
|
+ if (is_initial_xendomain())
|
|
+ conswitchp = &vga_con;
|
|
+#endif
|
|
+#endif
|
|
+#endif /* CONFIG_XEN */
|
|
+}
|
|
+
|
|
+#ifdef CONFIG_XEN
|
|
+static int
|
|
+xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
|
|
+{
|
|
+ HYPERVISOR_shutdown(SHUTDOWN_crash);
|
|
+ /* we're never actually going to get here... */
|
|
+ return NOTIFY_DONE;
|
|
+}
|
|
+#endif /* !CONFIG_XEN */
|
|
--- head-2011-03-11.orig/arch/x86/kernel/setup64-xen.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
@@ -1,370 +0,0 @@
|
|
-/*
|
|
- * X86-64 specific CPU setup.
|
|
- * Copyright (C) 1995 Linus Torvalds
|
|
- * Copyright 2001, 2002, 2003 SuSE Labs / Andi Kleen.
|
|
- * See setup.c for older changelog.
|
|
- *
|
|
- * Jun Nakajima <jun.nakajima@intel.com>
|
|
- * Modified for Xen
|
|
- *
|
|
- */
|
|
-#include <linux/init.h>
|
|
-#include <linux/kernel.h>
|
|
-#include <linux/sched.h>
|
|
-#include <linux/string.h>
|
|
-#include <linux/bootmem.h>
|
|
-#include <linux/bitops.h>
|
|
-#include <linux/module.h>
|
|
-#include <linux/kgdb.h>
|
|
-#include <asm/pda.h>
|
|
-#include <asm/pgtable.h>
|
|
-#include <asm/processor.h>
|
|
-#include <asm/desc.h>
|
|
-#include <asm/atomic.h>
|
|
-#include <asm/mmu_context.h>
|
|
-#include <asm/smp.h>
|
|
-#include <asm/i387.h>
|
|
-#include <asm/percpu.h>
|
|
-#include <asm/proto.h>
|
|
-#include <asm/sections.h>
|
|
-#include <asm/setup.h>
|
|
-#include <asm/genapic.h>
|
|
-#ifdef CONFIG_XEN
|
|
-#include <asm/hypervisor.h>
|
|
-#endif
|
|
-
|
|
-#ifndef CONFIG_DEBUG_BOOT_PARAMS
|
|
-struct boot_params __initdata boot_params;
|
|
-#else
|
|
-struct boot_params boot_params;
|
|
-#endif
|
|
-
|
|
-cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
|
|
-
|
|
-struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly;
|
|
-EXPORT_SYMBOL(_cpu_pda);
|
|
-struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned;
|
|
-
|
|
-#ifndef CONFIG_X86_NO_IDT
|
|
-struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
|
|
-#endif
|
|
-
|
|
-char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned")));
|
|
-
|
|
-unsigned long __supported_pte_mask __read_mostly = ~0UL;
|
|
-EXPORT_SYMBOL(__supported_pte_mask);
|
|
-
|
|
-static int do_not_nx __cpuinitdata = 0;
|
|
-
|
|
-/* noexec=on|off
|
|
-Control non executable mappings for 64bit processes.
|
|
-
|
|
-on Enable(default)
|
|
-off Disable
|
|
-*/
|
|
-static int __init nonx_setup(char *str)
|
|
-{
|
|
- if (!str)
|
|
- return -EINVAL;
|
|
- if (!strncmp(str, "on", 2)) {
|
|
- __supported_pte_mask |= _PAGE_NX;
|
|
- do_not_nx = 0;
|
|
- } else if (!strncmp(str, "off", 3)) {
|
|
- do_not_nx = 1;
|
|
- __supported_pte_mask &= ~_PAGE_NX;
|
|
- }
|
|
- return 0;
|
|
-}
|
|
-early_param("noexec", nonx_setup);
|
|
-
|
|
-int force_personality32 = 0;
|
|
-
|
|
-/* noexec32=on|off
|
|
-Control non executable heap for 32bit processes.
|
|
-To control the stack too use noexec=off
|
|
-
|
|
-on PROT_READ does not imply PROT_EXEC for 32bit processes (default)
|
|
-off PROT_READ implies PROT_EXEC
|
|
-*/
|
|
-static int __init nonx32_setup(char *str)
|
|
-{
|
|
- if (!strcmp(str, "on"))
|
|
- force_personality32 &= ~READ_IMPLIES_EXEC;
|
|
- else if (!strcmp(str, "off"))
|
|
- force_personality32 |= READ_IMPLIES_EXEC;
|
|
- return 1;
|
|
-}
|
|
-__setup("noexec32=", nonx32_setup);
|
|
-
|
|
-#ifdef CONFIG_XEN
|
|
-static void __init_refok switch_pt(int cpu)
|
|
-{
|
|
- if (cpu == 0)
|
|
- xen_init_pt();
|
|
- xen_pt_switch(__pa_symbol(init_level4_pgt));
|
|
- xen_new_user_pt(__pa_symbol(__user_pgd(init_level4_pgt)));
|
|
-}
|
|
-#define switch_pt() switch_pt(cpu)
|
|
-
|
|
-static void __cpuinit cpu_gdt_init(const struct desc_ptr *gdt_descr)
|
|
-{
|
|
- unsigned long frames[16];
|
|
- unsigned long va;
|
|
- int f;
|
|
-
|
|
- for (va = gdt_descr->address, f = 0;
|
|
- va < gdt_descr->address + gdt_descr->size;
|
|
- va += PAGE_SIZE, f++) {
|
|
- frames[f] = virt_to_mfn(va);
|
|
- make_page_readonly(
|
|
- (void *)va, XENFEAT_writable_descriptor_tables);
|
|
- }
|
|
- if (HYPERVISOR_set_gdt(frames, (gdt_descr->size + 1) /
|
|
- sizeof (struct desc_struct)))
|
|
- BUG();
|
|
-}
|
|
-#else
|
|
-static void switch_pt(void)
|
|
-{
|
|
- asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt)));
|
|
-}
|
|
-
|
|
-static void __cpuinit cpu_gdt_init(const struct desc_ptr *gdt_descr)
|
|
-{
|
|
- load_gdt(gdt_descr);
|
|
- load_idt(idt_descr);
|
|
-}
|
|
-#endif
|
|
-
|
|
-void pda_init(int cpu)
|
|
-{
|
|
- struct x8664_pda *pda = cpu_pda(cpu);
|
|
-
|
|
- /* Setup up data that may be needed in __get_free_pages early */
|
|
- asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0));
|
|
-#ifndef CONFIG_XEN
|
|
- /* Memory clobbers used to order PDA accessed */
|
|
- mb();
|
|
- wrmsrl(MSR_GS_BASE, pda);
|
|
- mb();
|
|
-#else
|
|
- if (HYPERVISOR_set_segment_base(SEGBASE_GS_KERNEL,
|
|
- (unsigned long)pda))
|
|
- BUG();
|
|
-#endif
|
|
- pda->cpunumber = cpu;
|
|
- pda->irqcount = -1;
|
|
- pda->kernelstack =
|
|
- (unsigned long)stack_thread_info() - PDA_STACKOFFSET + THREAD_SIZE;
|
|
- pda->active_mm = &init_mm;
|
|
- pda->mmu_state = 0;
|
|
-
|
|
- if (cpu == 0) {
|
|
- /* others are initialized in smpboot.c */
|
|
- pda->pcurrent = &init_task;
|
|
- pda->irqstackptr = boot_cpu_stack;
|
|
- } else {
|
|
- pda->irqstackptr = (char *)
|
|
- __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
|
|
- if (!pda->irqstackptr)
|
|
- panic("cannot allocate irqstack for cpu %d", cpu);
|
|
- }
|
|
-
|
|
- switch_pt();
|
|
-
|
|
- pda->irqstackptr += IRQSTACKSIZE-64;
|
|
-}
|
|
-
|
|
-#ifndef CONFIG_X86_NO_TSS
|
|
-char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]
|
|
-__attribute__((section(".bss.page_aligned")));
|
|
-#endif
|
|
-
|
|
-extern asmlinkage void ignore_sysret(void);
|
|
-
|
|
-/* May not be marked __init: used by software suspend */
|
|
-void syscall_init(void)
|
|
-{
|
|
-#ifndef CONFIG_XEN
|
|
- /*
|
|
- * LSTAR and STAR live in a bit strange symbiosis.
|
|
- * They both write to the same internal register. STAR allows to set CS/DS
|
|
- * but only a 32bit target. LSTAR sets the 64bit rip.
|
|
- */
|
|
- wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32);
|
|
- wrmsrl(MSR_LSTAR, system_call);
|
|
- wrmsrl(MSR_CSTAR, ignore_sysret);
|
|
-
|
|
- /* Flags to clear on syscall */
|
|
- wrmsrl(MSR_SYSCALL_MASK,
|
|
- X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL);
|
|
-#endif
|
|
-#ifdef CONFIG_IA32_EMULATION
|
|
- syscall32_cpu_init ();
|
|
-#else
|
|
- {
|
|
- static const struct callback_register cstar = {
|
|
- .type = CALLBACKTYPE_syscall32,
|
|
- .address = (unsigned long)ignore_sysret
|
|
- };
|
|
- if (HYPERVISOR_callback_op(CALLBACKOP_register, &cstar))
|
|
- printk(KERN_WARNING "Unable to register CSTAR callback\n");
|
|
- }
|
|
-#endif
|
|
-}
|
|
-
|
|
-void __cpuinit check_efer(void)
|
|
-{
|
|
- unsigned long efer;
|
|
-
|
|
- rdmsrl(MSR_EFER, efer);
|
|
- if (!(efer & EFER_NX) || do_not_nx) {
|
|
- __supported_pte_mask &= ~_PAGE_NX;
|
|
- }
|
|
-}
|
|
-
|
|
-unsigned long kernel_eflags;
|
|
-
|
|
-#ifndef CONFIG_X86_NO_TSS
|
|
-/*
|
|
- * Copies of the original ist values from the tss are only accessed during
|
|
- * debugging, no special alignment required.
|
|
- */
|
|
-DEFINE_PER_CPU(struct orig_ist, orig_ist);
|
|
-#endif
|
|
-
|
|
-/*
|
|
- * cpu_init() initializes state that is per-CPU. Some data is already
|
|
- * initialized (naturally) in the bootstrap process, such as the GDT
|
|
- * and IDT. We reload them nevertheless, this function acts as a
|
|
- * 'CPU state barrier', nothing should get across.
|
|
- * A lot of state is already set up in PDA init.
|
|
- */
|
|
-void __cpuinit cpu_init (void)
|
|
-{
|
|
- int cpu = stack_smp_processor_id();
|
|
-#ifndef CONFIG_X86_NO_TSS
|
|
- struct tss_struct *t = &per_cpu(init_tss, cpu);
|
|
- struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
|
|
- unsigned long v;
|
|
- char *estacks = NULL;
|
|
- unsigned i;
|
|
-#endif
|
|
- struct task_struct *me;
|
|
-
|
|
- /* CPU 0 is initialised in head64.c */
|
|
- if (cpu != 0) {
|
|
- pda_init(cpu);
|
|
- }
|
|
-#ifndef CONFIG_X86_NO_TSS
|
|
- else
|
|
- estacks = boot_exception_stacks;
|
|
-#endif
|
|
-
|
|
- me = current;
|
|
-
|
|
- if (cpu_test_and_set(cpu, cpu_initialized))
|
|
- panic("CPU#%d already initialized!\n", cpu);
|
|
-
|
|
- printk("Initializing CPU#%d\n", cpu);
|
|
-
|
|
- clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
|
|
-
|
|
- /*
|
|
- * Initialize the per-CPU GDT with the boot GDT,
|
|
- * and set up the GDT descriptor:
|
|
- */
|
|
-#ifndef CONFIG_XEN
|
|
- if (cpu)
|
|
- memcpy(get_cpu_gdt_table(cpu), cpu_gdt_table, GDT_SIZE);
|
|
-#endif
|
|
-
|
|
- cpu_gdt_descr[cpu].size = GDT_SIZE;
|
|
- cpu_gdt_init(&cpu_gdt_descr[cpu]);
|
|
-
|
|
- memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
|
|
- syscall_init();
|
|
-
|
|
- wrmsrl(MSR_FS_BASE, 0);
|
|
- wrmsrl(MSR_KERNEL_GS_BASE, 0);
|
|
- barrier();
|
|
-
|
|
- check_efer();
|
|
-
|
|
-#ifndef CONFIG_X86_NO_TSS
|
|
- /*
|
|
- * set up and load the per-CPU TSS
|
|
- */
|
|
- for (v = 0; v < N_EXCEPTION_STACKS; v++) {
|
|
- static const unsigned int order[N_EXCEPTION_STACKS] = {
|
|
- [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
|
|
- [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
|
|
- };
|
|
- if (cpu) {
|
|
- estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
|
|
- if (!estacks)
|
|
- panic("Cannot allocate exception stack %ld %d\n",
|
|
- v, cpu);
|
|
- }
|
|
- estacks += PAGE_SIZE << order[v];
|
|
- orig_ist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks;
|
|
- }
|
|
-
|
|
- t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
|
|
- /*
|
|
- * <= is required because the CPU will access up to
|
|
- * 8 bits beyond the end of the IO permission bitmap.
|
|
- */
|
|
- for (i = 0; i <= IO_BITMAP_LONGS; i++)
|
|
- t->io_bitmap[i] = ~0UL;
|
|
-#endif
|
|
-
|
|
- atomic_inc(&init_mm.mm_count);
|
|
- me->active_mm = &init_mm;
|
|
- if (me->mm)
|
|
- BUG();
|
|
- enter_lazy_tlb(&init_mm, me);
|
|
-
|
|
-#ifndef CONFIG_X86_NO_TSS
|
|
- set_tss_desc(cpu, t);
|
|
-#endif
|
|
-#ifndef CONFIG_XEN
|
|
- load_TR_desc();
|
|
-#endif
|
|
- load_LDT(&init_mm.context);
|
|
-
|
|
-#ifdef CONFIG_KGDB
|
|
- /*
|
|
- * If the kgdb is connected no debug regs should be altered. This
|
|
- * is only applicable when KGDB and a KGDB I/O module are built
|
|
- * into the kernel and you are using early debugging with
|
|
- * kgdbwait. KGDB will control the kernel HW breakpoint registers.
|
|
- */
|
|
- if (kgdb_connected && arch_kgdb_ops.correct_hw_break)
|
|
- arch_kgdb_ops.correct_hw_break();
|
|
- else {
|
|
-#endif
|
|
- /*
|
|
- * Clear all 6 debug registers:
|
|
- */
|
|
-
|
|
- set_debugreg(0UL, 0);
|
|
- set_debugreg(0UL, 1);
|
|
- set_debugreg(0UL, 2);
|
|
- set_debugreg(0UL, 3);
|
|
- set_debugreg(0UL, 6);
|
|
- set_debugreg(0UL, 7);
|
|
-#ifdef CONFIG_KGDB
|
|
- /* If the kgdb is connected no debug regs should be altered. */
|
|
- }
|
|
-#endif
|
|
-
|
|
- fpu_init();
|
|
-
|
|
- asm ("pushfq; popq %0" : "=rm" (kernel_eflags));
|
|
- if (raw_irqs_disabled())
|
|
- kernel_eflags &= ~X86_EFLAGS_IF;
|
|
-
|
|
- if (is_uv_system())
|
|
- uv_cpu_init();
|
|
-}
|
|
--- head-2011-03-11.orig/arch/x86/kernel/setup_32-xen.c 2011-03-04 15:07:31.000000000 +0100
|
|
+++ /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
@@ -1,1153 +0,0 @@
|
|
-/*
|
|
- * Copyright (C) 1995 Linus Torvalds
|
|
- *
|
|
- * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
|
|
- *
|
|
- * Memory region support
|
|
- * David Parsons <orc@pell.chi.il.us>, July-August 1999
|
|
- *
|
|
- * Added E820 sanitization routine (removes overlapping memory regions);
|
|
- * Brian Moyle <bmoyle@mvista.com>, February 2001
|
|
- *
|
|
- * Moved CPU detection code to cpu/${cpu}.c
|
|
- * Patrick Mochel <mochel@osdl.org>, March 2002
|
|
- *
|
|
- * Provisions for empty E820 memory regions (reported by certain BIOSes).
|
|
- * Alex Achenbach <xela@slit.de>, December 2002.
|
|
- *
|
|
- */
|
|
-
|
|
-/*
|
|
- * This file handles the architecture-dependent parts of initialization
|
|
- */
|
|
-
|
|
-#include <linux/sched.h>
|
|
-#include <linux/mm.h>
|
|
-#include <linux/mmzone.h>
|
|
-#include <linux/screen_info.h>
|
|
-#include <linux/ioport.h>
|
|
-#include <linux/acpi.h>
|
|
-#include <linux/apm_bios.h>
|
|
-#include <linux/initrd.h>
|
|
-#include <linux/bootmem.h>
|
|
-#include <linux/seq_file.h>
|
|
-#include <linux/console.h>
|
|
-#include <linux/mca.h>
|
|
-#include <linux/root_dev.h>
|
|
-#include <linux/highmem.h>
|
|
-#include <linux/module.h>
|
|
-#include <linux/efi.h>
|
|
-#include <linux/init.h>
|
|
-#include <linux/edd.h>
|
|
-#include <linux/iscsi_ibft.h>
|
|
-#include <linux/nodemask.h>
|
|
-#include <linux/kernel.h>
|
|
-#include <linux/percpu.h>
|
|
-#include <linux/notifier.h>
|
|
-#include <linux/kexec.h>
|
|
-#include <linux/crash_dump.h>
|
|
-#include <linux/dmi.h>
|
|
-#include <linux/pfn.h>
|
|
-#include <linux/pci.h>
|
|
-#include <linux/init_ohci1394_dma.h>
|
|
-#include <linux/kvm_para.h>
|
|
-
|
|
-#include <video/edid.h>
|
|
-
|
|
-#include <asm/mtrr.h>
|
|
-#include <asm/apic.h>
|
|
-#include <asm/e820.h>
|
|
-#include <asm/mpspec.h>
|
|
-#include <asm/mmzone.h>
|
|
-#include <asm/setup.h>
|
|
-#include <asm/arch_hooks.h>
|
|
-#include <asm/sections.h>
|
|
-#include <asm/io_apic.h>
|
|
-#include <asm/ist.h>
|
|
-#include <asm/io.h>
|
|
-#include <asm/hypervisor.h>
|
|
-#include <xen/interface/physdev.h>
|
|
-#include <xen/interface/memory.h>
|
|
-#include <xen/features.h>
|
|
-#include <xen/firmware.h>
|
|
-#include <xen/xencons.h>
|
|
-#include <setup_arch.h>
|
|
-#include <asm/bios_ebda.h>
|
|
-#include <asm/cacheflush.h>
|
|
-#include <asm/processor.h>
|
|
-
|
|
-#ifdef CONFIG_XEN
|
|
-#include <xen/interface/kexec.h>
|
|
-#endif
|
|
-
|
|
-static int xen_panic_event(struct notifier_block *, unsigned long, void *);
|
|
-static struct notifier_block xen_panic_block = {
|
|
- xen_panic_event, NULL, 0 /* try to go last */
|
|
-};
|
|
-
|
|
-/*
|
|
- * Machine setup..
|
|
- */
|
|
-static struct resource data_resource = {
|
|
- .name = "Kernel data",
|
|
- .start = 0,
|
|
- .end = 0,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
|
|
-};
|
|
-
|
|
-static struct resource code_resource = {
|
|
- .name = "Kernel code",
|
|
- .start = 0,
|
|
- .end = 0,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
|
|
-};
|
|
-
|
|
-static struct resource bss_resource = {
|
|
- .name = "Kernel bss",
|
|
- .start = 0,
|
|
- .end = 0,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
|
|
-};
|
|
-
|
|
-static struct resource video_ram_resource = {
|
|
- .name = "Video RAM area",
|
|
- .start = 0xa0000,
|
|
- .end = 0xbffff,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
|
|
-};
|
|
-
|
|
-static struct resource standard_io_resources[] = { {
|
|
- .name = "dma1",
|
|
- .start = 0x0000,
|
|
- .end = 0x001f,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_IO
|
|
-}, {
|
|
- .name = "pic1",
|
|
- .start = 0x0020,
|
|
- .end = 0x0021,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_IO
|
|
-}, {
|
|
- .name = "timer0",
|
|
- .start = 0x0040,
|
|
- .end = 0x0043,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_IO
|
|
-}, {
|
|
- .name = "timer1",
|
|
- .start = 0x0050,
|
|
- .end = 0x0053,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_IO
|
|
-}, {
|
|
- .name = "keyboard",
|
|
- .start = 0x0060,
|
|
- .end = 0x0060,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_IO
|
|
-}, {
|
|
- .name = "keyboard",
|
|
- .start = 0x0064,
|
|
- .end = 0x0064,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_IO
|
|
-}, {
|
|
- .name = "dma page reg",
|
|
- .start = 0x0080,
|
|
- .end = 0x008f,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_IO
|
|
-}, {
|
|
- .name = "pic2",
|
|
- .start = 0x00a0,
|
|
- .end = 0x00a1,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_IO
|
|
-}, {
|
|
- .name = "dma2",
|
|
- .start = 0x00c0,
|
|
- .end = 0x00df,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_IO
|
|
-}, {
|
|
- .name = "fpu",
|
|
- .start = 0x00f0,
|
|
- .end = 0x00ff,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_IO
|
|
-} };
|
|
-
|
|
-/* cpu data as detected by the assembly code in head.S */
|
|
-struct cpuinfo_x86 new_cpu_data __cpuinitdata = { .wp_works_ok = 1 };
|
|
-/* common cpu data for all cpus */
|
|
-struct cpuinfo_x86 boot_cpu_data __read_mostly = { .wp_works_ok = 1 };
|
|
-EXPORT_SYMBOL(boot_cpu_data);
|
|
-
|
|
-unsigned int def_to_bigsmp;
|
|
-
|
|
-#ifndef CONFIG_X86_PAE
|
|
-unsigned long mmu_cr4_features;
|
|
-#else
|
|
-unsigned long mmu_cr4_features = X86_CR4_PAE;
|
|
-#endif
|
|
-
|
|
-/* for MCA, but anyone else can use it if they want */
|
|
-unsigned int machine_id;
|
|
-unsigned int machine_submodel_id;
|
|
-unsigned int BIOS_revision;
|
|
-
|
|
-/* Boot loader ID as an integer, for the benefit of proc_dointvec */
|
|
-int bootloader_type;
|
|
-
|
|
-/* user-defined highmem size */
|
|
-static unsigned int highmem_pages = -1;
|
|
-
|
|
-/*
|
|
- * Setup options
|
|
- */
|
|
-struct screen_info screen_info;
|
|
-EXPORT_SYMBOL(screen_info);
|
|
-struct apm_info apm_info;
|
|
-EXPORT_SYMBOL(apm_info);
|
|
-struct edid_info edid_info;
|
|
-EXPORT_SYMBOL_GPL(edid_info);
|
|
-#ifndef CONFIG_XEN
|
|
-#define copy_edid() (edid_info = boot_params.edid_info)
|
|
-#endif
|
|
-struct ist_info ist_info;
|
|
-#if defined(CONFIG_X86_SPEEDSTEP_SMI) || \
|
|
- defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)
|
|
-EXPORT_SYMBOL(ist_info);
|
|
-#endif
|
|
-
|
|
-extern void early_cpu_init(void);
|
|
-extern int root_mountflags;
|
|
-
|
|
-unsigned long saved_video_mode;
|
|
-
|
|
-#define RAMDISK_IMAGE_START_MASK 0x07FF
|
|
-#define RAMDISK_PROMPT_FLAG 0x8000
|
|
-#define RAMDISK_LOAD_FLAG 0x4000
|
|
-
|
|
-static char __initdata command_line[COMMAND_LINE_SIZE];
|
|
-
|
|
-#ifndef CONFIG_DEBUG_BOOT_PARAMS
|
|
-struct boot_params __initdata boot_params;
|
|
-#else
|
|
-struct boot_params boot_params;
|
|
-#endif
|
|
-
|
|
-/*
|
|
- * Point at the empty zero page to start with. We map the real shared_info
|
|
- * page as soon as fixmap is up and running.
|
|
- */
|
|
-shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
|
|
-EXPORT_SYMBOL(HYPERVISOR_shared_info);
|
|
-
|
|
-unsigned long *phys_to_machine_mapping;
|
|
-unsigned long *pfn_to_mfn_frame_list_list, *pfn_to_mfn_frame_list[16];
|
|
-EXPORT_SYMBOL(phys_to_machine_mapping);
|
|
-
|
|
-/* Raw start-of-day parameters from the hypervisor. */
|
|
-start_info_t *xen_start_info;
|
|
-EXPORT_SYMBOL(xen_start_info);
|
|
-
|
|
-#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
|
|
-struct edd edd;
|
|
-#ifdef CONFIG_EDD_MODULE
|
|
-EXPORT_SYMBOL(edd);
|
|
-#endif
|
|
-#ifndef CONFIG_XEN
|
|
-/**
|
|
- * copy_edd() - Copy the BIOS EDD information
|
|
- * from boot_params into a safe place.
|
|
- *
|
|
- */
|
|
-static inline void copy_edd(void)
|
|
-{
|
|
- memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer,
|
|
- sizeof(edd.mbr_signature));
|
|
- memcpy(edd.edd_info, boot_params.eddbuf, sizeof(edd.edd_info));
|
|
- edd.mbr_signature_nr = boot_params.edd_mbr_sig_buf_entries;
|
|
- edd.edd_info_nr = boot_params.eddbuf_entries;
|
|
-}
|
|
-#endif
|
|
-#else
|
|
-static inline void copy_edd(void)
|
|
-{
|
|
-}
|
|
-#endif
|
|
-
|
|
-int __initdata user_defined_memmap;
|
|
-
|
|
-/*
|
|
- * "mem=nopentium" disables the 4MB page tables.
|
|
- * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
|
|
- * to <mem>, overriding the bios size.
|
|
- * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
|
|
- * <start> to <start>+<mem>, overriding the bios size.
|
|
- *
|
|
- * HPA tells me bootloaders need to parse mem=, so no new
|
|
- * option should be mem= [also see Documentation/i386/boot.txt]
|
|
- */
|
|
-static int __init parse_mem(char *arg)
|
|
-{
|
|
- if (!arg)
|
|
- return -EINVAL;
|
|
-
|
|
- if (strcmp(arg, "nopentium") == 0) {
|
|
- setup_clear_cpu_cap(X86_FEATURE_PSE);
|
|
- } else {
|
|
- /* If the user specifies memory size, we
|
|
- * limit the BIOS-provided memory map to
|
|
- * that size. exactmap can be used to specify
|
|
- * the exact map. mem=number can be used to
|
|
- * trim the existing memory map.
|
|
- */
|
|
- unsigned long long mem_size;
|
|
-
|
|
- mem_size = memparse(arg, &arg);
|
|
- limit_regions(mem_size);
|
|
- user_defined_memmap = 1;
|
|
- }
|
|
- return 0;
|
|
-}
|
|
-early_param("mem", parse_mem);
|
|
-
|
|
-#ifdef CONFIG_PROC_VMCORE
|
|
-/* elfcorehdr= specifies the location of elf core header
|
|
- * stored by the crashed kernel.
|
|
- */
|
|
-static int __init parse_elfcorehdr(char *arg)
|
|
-{
|
|
- if (!arg)
|
|
- return -EINVAL;
|
|
-
|
|
- elfcorehdr_addr = memparse(arg, &arg);
|
|
- return 0;
|
|
-}
|
|
-early_param("elfcorehdr", parse_elfcorehdr);
|
|
-#endif /* CONFIG_PROC_VMCORE */
|
|
-
|
|
-/*
|
|
- * highmem=size forces highmem to be exactly 'size' bytes.
|
|
- * This works even on boxes that have no highmem otherwise.
|
|
- * This also works to reduce highmem size on bigger boxes.
|
|
- */
|
|
-static int __init parse_highmem(char *arg)
|
|
-{
|
|
- if (!arg)
|
|
- return -EINVAL;
|
|
-
|
|
- highmem_pages = memparse(arg, &arg) >> PAGE_SHIFT;
|
|
- return 0;
|
|
-}
|
|
-early_param("highmem", parse_highmem);
|
|
-
|
|
-/*
|
|
- * vmalloc=size forces the vmalloc area to be exactly 'size'
|
|
- * bytes. This can be used to increase (or decrease) the
|
|
- * vmalloc area - the default is 128m.
|
|
- */
|
|
-static int __init parse_vmalloc(char *arg)
|
|
-{
|
|
- if (!arg)
|
|
- return -EINVAL;
|
|
-
|
|
- __VMALLOC_RESERVE = memparse(arg, &arg);
|
|
- return 0;
|
|
-}
|
|
-early_param("vmalloc", parse_vmalloc);
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
-/*
|
|
- * reservetop=size reserves a hole at the top of the kernel address space which
|
|
- * a hypervisor can load into later. Needed for dynamically loaded hypervisors,
|
|
- * so relocating the fixmap can be done before paging initialization.
|
|
- */
|
|
-static int __init parse_reservetop(char *arg)
|
|
-{
|
|
- unsigned long address;
|
|
-
|
|
- if (!arg)
|
|
- return -EINVAL;
|
|
-
|
|
- address = memparse(arg, &arg);
|
|
- reserve_top_address(address);
|
|
- return 0;
|
|
-}
|
|
-early_param("reservetop", parse_reservetop);
|
|
-#endif
|
|
-
|
|
-/*
|
|
- * Determine low and high memory ranges:
|
|
- */
|
|
-unsigned long __init find_max_low_pfn(void)
|
|
-{
|
|
- unsigned long max_low_pfn;
|
|
-
|
|
- max_low_pfn = max_pfn;
|
|
- if (max_low_pfn > MAXMEM_PFN) {
|
|
- if (highmem_pages == -1)
|
|
- highmem_pages = max_pfn - MAXMEM_PFN;
|
|
- if (highmem_pages + MAXMEM_PFN < max_pfn)
|
|
- max_pfn = MAXMEM_PFN + highmem_pages;
|
|
- if (highmem_pages + MAXMEM_PFN > max_pfn) {
|
|
- printk("only %luMB highmem pages available, ignoring highmem size of %uMB.\n", pages_to_mb(max_pfn - MAXMEM_PFN), pages_to_mb(highmem_pages));
|
|
- highmem_pages = 0;
|
|
- }
|
|
- max_low_pfn = MAXMEM_PFN;
|
|
-#ifndef CONFIG_HIGHMEM
|
|
- /* Maximum memory usable is what is directly addressable */
|
|
- printk(KERN_WARNING "Warning only %ldMB will be used.\n",
|
|
- MAXMEM>>20);
|
|
- if (max_pfn > MAX_NONPAE_PFN)
|
|
- printk(KERN_WARNING "Use a HIGHMEM64G enabled kernel.\n");
|
|
- else
|
|
- printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
|
|
- max_pfn = MAXMEM_PFN;
|
|
-#else /* !CONFIG_HIGHMEM */
|
|
-#ifndef CONFIG_HIGHMEM64G
|
|
- if (max_pfn > MAX_NONPAE_PFN) {
|
|
- max_pfn = MAX_NONPAE_PFN;
|
|
- printk(KERN_WARNING "Warning only 4GB will be used.\n");
|
|
- printk(KERN_WARNING "Use a HIGHMEM64G enabled kernel.\n");
|
|
- }
|
|
-#endif /* !CONFIG_HIGHMEM64G */
|
|
-#endif /* !CONFIG_HIGHMEM */
|
|
- } else {
|
|
- if (highmem_pages == -1)
|
|
- highmem_pages = 0;
|
|
-#ifdef CONFIG_HIGHMEM
|
|
- if (highmem_pages >= max_pfn) {
|
|
- printk(KERN_ERR "highmem size specified (%uMB) is bigger than pages available (%luMB)!.\n", pages_to_mb(highmem_pages), pages_to_mb(max_pfn));
|
|
- highmem_pages = 0;
|
|
- }
|
|
- if (highmem_pages) {
|
|
- if (max_low_pfn-highmem_pages < 64*1024*1024/PAGE_SIZE){
|
|
- printk(KERN_ERR "highmem size %uMB results in smaller than 64MB lowmem, ignoring it.\n", pages_to_mb(highmem_pages));
|
|
- highmem_pages = 0;
|
|
- }
|
|
- max_low_pfn -= highmem_pages;
|
|
- }
|
|
-#else
|
|
- if (highmem_pages)
|
|
- printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n");
|
|
-#endif
|
|
- }
|
|
- return max_low_pfn;
|
|
-}
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
-#define BIOS_LOWMEM_KILOBYTES 0x413
|
|
-
|
|
-/*
|
|
- * The BIOS places the EBDA/XBDA at the top of conventional
|
|
- * memory, and usually decreases the reported amount of
|
|
- * conventional memory (int 0x12) too. This also contains a
|
|
- * workaround for Dell systems that neglect to reserve EBDA.
|
|
- * The same workaround also avoids a problem with the AMD768MPX
|
|
- * chipset: reserve a page before VGA to prevent PCI prefetch
|
|
- * into it (errata #56). Usually the page is reserved anyways,
|
|
- * unless you have no PS/2 mouse plugged in.
|
|
- */
|
|
-static void __init reserve_ebda_region(void)
|
|
-{
|
|
- unsigned int lowmem, ebda_addr;
|
|
-
|
|
- /* To determine the position of the EBDA and the */
|
|
- /* end of conventional memory, we need to look at */
|
|
- /* the BIOS data area. In a paravirtual environment */
|
|
- /* that area is absent. We'll just have to assume */
|
|
- /* that the paravirt case can handle memory setup */
|
|
- /* correctly, without our help. */
|
|
- if (paravirt_enabled())
|
|
- return;
|
|
-
|
|
- /* end of low (conventional) memory */
|
|
- lowmem = *(unsigned short *)__va(BIOS_LOWMEM_KILOBYTES);
|
|
- lowmem <<= 10;
|
|
-
|
|
- /* start of EBDA area */
|
|
- ebda_addr = get_bios_ebda();
|
|
-
|
|
- /* Fixup: bios puts an EBDA in the top 64K segment */
|
|
- /* of conventional memory, but does not adjust lowmem. */
|
|
- if ((lowmem - ebda_addr) <= 0x10000)
|
|
- lowmem = ebda_addr;
|
|
-
|
|
- /* Fixup: bios does not report an EBDA at all. */
|
|
- /* Some old Dells seem to need 4k anyhow (bugzilla 2990) */
|
|
- if ((ebda_addr == 0) && (lowmem >= 0x9f000))
|
|
- lowmem = 0x9f000;
|
|
-
|
|
- /* Paranoia: should never happen, but... */
|
|
- if ((lowmem == 0) || (lowmem >= 0x100000))
|
|
- lowmem = 0x9f000;
|
|
-
|
|
- /* reserve all memory between lowmem and the 1MB mark */
|
|
- reserve_bootmem(lowmem, 0x100000 - lowmem, BOOTMEM_DEFAULT);
|
|
-}
|
|
-#endif
|
|
-
|
|
-#ifndef CONFIG_NEED_MULTIPLE_NODES
|
|
-static void __init setup_bootmem_allocator(void);
|
|
-static unsigned long __init setup_memory(void)
|
|
-{
|
|
- /*
|
|
- * partially used pages are not usable - thus
|
|
- * we are rounding upwards:
|
|
- */
|
|
- min_low_pfn = PFN_UP(__pa(xen_start_info->pt_base)) +
|
|
- xen_start_info->nr_pt_frames;
|
|
-
|
|
- max_low_pfn = find_max_low_pfn();
|
|
-
|
|
-#ifdef CONFIG_HIGHMEM
|
|
- highstart_pfn = highend_pfn = max_pfn;
|
|
- if (max_pfn > max_low_pfn) {
|
|
- highstart_pfn = max_low_pfn;
|
|
- }
|
|
- printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
|
|
- pages_to_mb(highend_pfn - highstart_pfn));
|
|
- num_physpages = highend_pfn;
|
|
- high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
|
|
-#else
|
|
- num_physpages = max_low_pfn;
|
|
- high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
|
|
-#endif
|
|
-#ifdef CONFIG_FLATMEM
|
|
- max_mapnr = num_physpages;
|
|
-#endif
|
|
- printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
|
|
- pages_to_mb(max_low_pfn));
|
|
-
|
|
- setup_bootmem_allocator();
|
|
-
|
|
- return max_low_pfn;
|
|
-}
|
|
-
|
|
-static void __init zone_sizes_init(void)
|
|
-{
|
|
- unsigned long max_zone_pfns[MAX_NR_ZONES];
|
|
- memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
|
|
- max_zone_pfns[ZONE_DMA] =
|
|
- virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
|
|
- max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
|
|
-#ifdef CONFIG_HIGHMEM
|
|
- max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
|
|
- add_active_range(0, 0, min(xen_start_info->nr_pages, highend_pfn));
|
|
- add_active_range(0, highend_pfn, highend_pfn);
|
|
-#else
|
|
- add_active_range(0, 0, min(xen_start_info->nr_pages, max_low_pfn));
|
|
- add_active_range(0, max_low_pfn, max_low_pfn);
|
|
-#endif
|
|
-
|
|
- free_area_init_nodes(max_zone_pfns);
|
|
-}
|
|
-#else
|
|
-extern unsigned long __init setup_memory(void);
|
|
-extern void zone_sizes_init(void);
|
|
-#endif /* !CONFIG_NEED_MULTIPLE_NODES */
|
|
-
|
|
-static inline unsigned long long get_total_mem(void)
|
|
-{
|
|
- unsigned long long total;
|
|
-
|
|
- total = max_low_pfn - min_low_pfn;
|
|
-#ifdef CONFIG_HIGHMEM
|
|
- total += highend_pfn - highstart_pfn;
|
|
-#endif
|
|
-
|
|
- return total << PAGE_SHIFT;
|
|
-}
|
|
-
|
|
-#ifdef CONFIG_KEXEC
|
|
-#ifndef CONFIG_XEN
|
|
-static void __init reserve_crashkernel(void)
|
|
-{
|
|
- unsigned long long total_mem;
|
|
- unsigned long long crash_size, crash_base;
|
|
- int ret;
|
|
-
|
|
- total_mem = get_total_mem();
|
|
-
|
|
- ret = parse_crashkernel(boot_command_line, total_mem,
|
|
- &crash_size, &crash_base);
|
|
- if (ret == 0 && crash_size > 0) {
|
|
- if (crash_base > 0) {
|
|
- printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
|
|
- "for crashkernel (System RAM: %ldMB)\n",
|
|
- (unsigned long)(crash_size >> 20),
|
|
- (unsigned long)(crash_base >> 20),
|
|
- (unsigned long)(total_mem >> 20));
|
|
-
|
|
- if (reserve_bootmem(crash_base, crash_size,
|
|
- BOOTMEM_EXCLUSIVE) < 0) {
|
|
- printk(KERN_INFO "crashkernel reservation "
|
|
- "failed - memory is in use\n");
|
|
- return;
|
|
- }
|
|
-
|
|
- crashk_res.start = crash_base;
|
|
- crashk_res.end = crash_base + crash_size - 1;
|
|
- } else
|
|
- printk(KERN_INFO "crashkernel reservation failed - "
|
|
- "you have to specify a base address\n");
|
|
- }
|
|
-}
|
|
-#else
|
|
-#define reserve_crashkernel xen_machine_kexec_setup_resources
|
|
-#endif
|
|
-#else
|
|
-static inline void __init reserve_crashkernel(void)
|
|
-{}
|
|
-#endif
|
|
-
|
|
-#ifdef CONFIG_BLK_DEV_INITRD
|
|
-
|
|
-static bool do_relocate_initrd = false;
|
|
-
|
|
-static void __init reserve_initrd(void)
|
|
-{
|
|
- unsigned long ramdisk_image = __pa(xen_start_info->mod_start);
|
|
- unsigned long ramdisk_size = xen_start_info->mod_len;
|
|
- unsigned long ramdisk_end = ramdisk_image + ramdisk_size;
|
|
- unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT;
|
|
- unsigned long ramdisk_here;
|
|
-
|
|
- initrd_start = 0;
|
|
-
|
|
- if (!xen_start_info->mod_start || !ramdisk_size)
|
|
- return; /* No initrd provided by bootloader */
|
|
-
|
|
- if (ramdisk_end < ramdisk_image) {
|
|
- printk(KERN_ERR "initrd wraps around end of memory, "
|
|
- "disabling initrd\n");
|
|
- return;
|
|
- }
|
|
- if (ramdisk_size >= end_of_lowmem/2) {
|
|
- printk(KERN_ERR "initrd too large to handle, "
|
|
- "disabling initrd\n");
|
|
- return;
|
|
- }
|
|
- if (ramdisk_end <= end_of_lowmem) {
|
|
- /* All in lowmem, easy case */
|
|
- reserve_bootmem(ramdisk_image, ramdisk_size, BOOTMEM_DEFAULT);
|
|
- initrd_start = ramdisk_image + PAGE_OFFSET;
|
|
- initrd_end = initrd_start+ramdisk_size;
|
|
- return;
|
|
- }
|
|
-
|
|
- /* We need to move the initrd down into lowmem */
|
|
- ramdisk_here = (end_of_lowmem - ramdisk_size) & PAGE_MASK;
|
|
-
|
|
- /* Note: this includes all the lowmem currently occupied by
|
|
- the initrd, we rely on that fact to keep the data intact. */
|
|
- reserve_bootmem(ramdisk_here, ramdisk_size, BOOTMEM_DEFAULT);
|
|
- initrd_start = ramdisk_here + PAGE_OFFSET;
|
|
- initrd_end = initrd_start + ramdisk_size;
|
|
-
|
|
- do_relocate_initrd = true;
|
|
-}
|
|
-
|
|
-#define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT)
|
|
-
|
|
-static void __init relocate_initrd(void)
|
|
-{
|
|
- unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
|
|
- unsigned long ramdisk_size = boot_params.hdr.ramdisk_size;
|
|
- unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT;
|
|
- unsigned long ramdisk_here;
|
|
- unsigned long slop, clen, mapaddr;
|
|
- char *p, *q;
|
|
-
|
|
- if (!do_relocate_initrd)
|
|
- return;
|
|
-
|
|
- ramdisk_here = initrd_start - PAGE_OFFSET;
|
|
-
|
|
- q = (char *)initrd_start;
|
|
-
|
|
- /* Copy any lowmem portion of the initrd */
|
|
- if (ramdisk_image < end_of_lowmem) {
|
|
- clen = end_of_lowmem - ramdisk_image;
|
|
- p = (char *)__va(ramdisk_image);
|
|
- memcpy(q, p, clen);
|
|
- q += clen;
|
|
- ramdisk_image += clen;
|
|
- ramdisk_size -= clen;
|
|
- }
|
|
-
|
|
- /* Copy the highmem portion of the initrd */
|
|
- while (ramdisk_size) {
|
|
- slop = ramdisk_image & ~PAGE_MASK;
|
|
- clen = ramdisk_size;
|
|
- if (clen > MAX_MAP_CHUNK-slop)
|
|
- clen = MAX_MAP_CHUNK-slop;
|
|
- mapaddr = ramdisk_image & PAGE_MASK;
|
|
- p = early_ioremap(mapaddr, clen+slop);
|
|
- memcpy(q, p+slop, clen);
|
|
- early_iounmap(p, clen+slop);
|
|
- q += clen;
|
|
- ramdisk_image += clen;
|
|
- ramdisk_size -= clen;
|
|
- }
|
|
-}
|
|
-
|
|
-#endif /* CONFIG_BLK_DEV_INITRD */
|
|
-
|
|
-void __init setup_bootmem_allocator(void)
|
|
-{
|
|
- unsigned long bootmap_size;
|
|
- /*
|
|
- * Initialize the boot-time allocator (with low memory only):
|
|
- */
|
|
- bootmap_size = init_bootmem(min_low_pfn, max_low_pfn);
|
|
-
|
|
- register_bootmem_low_pages(max_low_pfn);
|
|
-
|
|
- /*
|
|
- * Reserve the bootmem bitmap itself as well. We do this in two
|
|
- * steps (first step was init_bootmem()) because this catches
|
|
- * the (very unlikely) case of us accidentally initializing the
|
|
- * bootmem allocator with an invalid RAM area.
|
|
- */
|
|
- reserve_bootmem(__pa_symbol(_text), (PFN_PHYS(min_low_pfn) +
|
|
- bootmap_size + PAGE_SIZE-1) - __pa_symbol(_text),
|
|
- BOOTMEM_DEFAULT);
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
- /*
|
|
- * reserve physical page 0 - it's a special BIOS page on many boxes,
|
|
- * enabling clean reboots, SMP operation, laptop functions.
|
|
- */
|
|
- reserve_bootmem(0, PAGE_SIZE, BOOTMEM_DEFAULT);
|
|
-
|
|
- /* reserve EBDA region */
|
|
- reserve_ebda_region();
|
|
-
|
|
-#ifdef CONFIG_SMP
|
|
- /*
|
|
- * But first pinch a few for the stack/trampoline stuff
|
|
- * FIXME: Don't need the extra page at 4K, but need to fix
|
|
- * trampoline before removing it. (see the GDT stuff)
|
|
- */
|
|
- reserve_bootmem(PAGE_SIZE, PAGE_SIZE, BOOTMEM_DEFAULT);
|
|
-#endif
|
|
-#ifdef CONFIG_ACPI_SLEEP
|
|
- /*
|
|
- * Reserve low memory region for sleep support.
|
|
- */
|
|
- acpi_reserve_bootmem();
|
|
-#endif
|
|
-#endif /* !CONFIG_XEN */
|
|
-
|
|
-#ifdef CONFIG_BLK_DEV_INITRD
|
|
- reserve_initrd();
|
|
-#endif
|
|
- numa_kva_reserve();
|
|
- reserve_crashkernel();
|
|
-
|
|
- reserve_ibft_region();
|
|
-}
|
|
-
|
|
-/*
|
|
- * The node 0 pgdat is initialized before all of these because
|
|
- * it's needed for bootmem. node>0 pgdats have their virtual
|
|
- * space allocated before the pagetables are in place to access
|
|
- * them, so they can't be cleared then.
|
|
- *
|
|
- * This should all compile down to nothing when NUMA is off.
|
|
- */
|
|
-static void __init remapped_pgdat_init(void)
|
|
-{
|
|
- int nid;
|
|
-
|
|
- for_each_online_node(nid) {
|
|
- if (nid != 0)
|
|
- memset(NODE_DATA(nid), 0, sizeof(struct pglist_data));
|
|
- }
|
|
-}
|
|
-
|
|
-#ifdef CONFIG_MCA
|
|
-static void set_mca_bus(int x)
|
|
-{
|
|
- MCA_bus = x;
|
|
-}
|
|
-#else
|
|
-static void set_mca_bus(int x) { }
|
|
-#endif
|
|
-
|
|
-/* Overridden in paravirt.c if CONFIG_PARAVIRT */
|
|
-char * __init __attribute__((weak)) memory_setup(void)
|
|
-{
|
|
- return machine_specific_memory_setup();
|
|
-}
|
|
-
|
|
-#ifdef CONFIG_NUMA
|
|
-/*
|
|
- * In the golden day, when everything among i386 and x86_64 will be
|
|
- * integrated, this will not live here
|
|
- */
|
|
-void *x86_cpu_to_node_map_early_ptr;
|
|
-int x86_cpu_to_node_map_init[NR_CPUS] = {
|
|
- [0 ... NR_CPUS-1] = NUMA_NO_NODE
|
|
-};
|
|
-DEFINE_PER_CPU(int, x86_cpu_to_node_map) = NUMA_NO_NODE;
|
|
-#endif
|
|
-
|
|
-/*
|
|
- * Determine if we were loaded by an EFI loader. If so, then we have also been
|
|
- * passed the efi memmap, systab, etc., so we should use these data structures
|
|
- * for initialization. Note, the efi init code path is determined by the
|
|
- * global efi_enabled. This allows the same kernel image to be used on existing
|
|
- * systems (with a traditional BIOS) as well as on EFI systems.
|
|
- */
|
|
-void __init setup_arch(char **cmdline_p)
|
|
-{
|
|
- int i, j, k, fpp;
|
|
- struct physdev_set_iopl set_iopl;
|
|
- unsigned long max_low_pfn;
|
|
- unsigned long p2m_pages;
|
|
-
|
|
- /* Force a quick death if the kernel panics (not domain 0). */
|
|
- extern int panic_timeout;
|
|
- if (!panic_timeout && !is_initial_xendomain())
|
|
- panic_timeout = 1;
|
|
-
|
|
- /* Register a call for panic conditions. */
|
|
- atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block);
|
|
-
|
|
- WARN_ON(HYPERVISOR_vm_assist(VMASST_CMD_enable,
|
|
- VMASST_TYPE_4gb_segments));
|
|
- WARN_ON(HYPERVISOR_vm_assist(VMASST_CMD_enable,
|
|
- VMASST_TYPE_writable_pagetables));
|
|
-
|
|
- memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
|
|
- pre_setup_arch_hook();
|
|
- early_cpu_init();
|
|
- early_ioremap_init();
|
|
-#ifdef CONFIG_SMP
|
|
- prefill_possible_map();
|
|
-#endif
|
|
-
|
|
-#ifdef CONFIG_EFI
|
|
- if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
|
|
- "EL32", 4))
|
|
- efi_enabled = 1;
|
|
-#endif
|
|
-
|
|
- /* This must be initialized to UNNAMED_MAJOR for ipconfig to work
|
|
- properly. Setting ROOT_DEV to default to /dev/ram0 breaks initrd.
|
|
- */
|
|
- ROOT_DEV = MKDEV(UNNAMED_MAJOR,0);
|
|
- screen_info = boot_params.screen_info;
|
|
- copy_edid();
|
|
- apm_info.bios = boot_params.apm_bios_info;
|
|
- ist_info = boot_params.ist_info;
|
|
- saved_video_mode = boot_params.hdr.vid_mode;
|
|
- if( boot_params.sys_desc_table.length != 0 ) {
|
|
- set_mca_bus(boot_params.sys_desc_table.table[3] & 0x2);
|
|
- machine_id = boot_params.sys_desc_table.table[0];
|
|
- machine_submodel_id = boot_params.sys_desc_table.table[1];
|
|
- BIOS_revision = boot_params.sys_desc_table.table[2];
|
|
- }
|
|
- bootloader_type = boot_params.hdr.type_of_loader;
|
|
-
|
|
- if (is_initial_xendomain()) {
|
|
- const struct dom0_vga_console_info *info =
|
|
- (void *)((char *)xen_start_info +
|
|
- xen_start_info->console.dom0.info_off);
|
|
-
|
|
- dom0_init_screen_info(info,
|
|
- xen_start_info->console.dom0.info_size);
|
|
- xen_start_info->console.domU.mfn = 0;
|
|
- xen_start_info->console.domU.evtchn = 0;
|
|
- } else
|
|
- screen_info.orig_video_isVGA = 0;
|
|
-
|
|
-#ifdef CONFIG_BLK_DEV_RAM
|
|
- rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK;
|
|
- rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0);
|
|
- rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0);
|
|
-#endif
|
|
-
|
|
- ARCH_SETUP
|
|
-
|
|
- printk(KERN_INFO "BIOS-provided physical RAM map:\n");
|
|
- print_memory_map(memory_setup());
|
|
-
|
|
- copy_edd();
|
|
-
|
|
- if (!boot_params.hdr.root_flags)
|
|
- root_mountflags &= ~MS_RDONLY;
|
|
- init_mm.start_code = (unsigned long) _text;
|
|
- init_mm.end_code = (unsigned long) _etext;
|
|
- init_mm.end_data = (unsigned long) _edata;
|
|
- init_mm.brk = (PFN_UP(__pa(xen_start_info->pt_base)) +
|
|
- xen_start_info->nr_pt_frames) << PAGE_SHIFT;
|
|
-
|
|
- code_resource.start = virt_to_phys(_text);
|
|
- code_resource.end = virt_to_phys(_etext)-1;
|
|
- data_resource.start = virt_to_phys(_etext);
|
|
- data_resource.end = virt_to_phys(_edata)-1;
|
|
- bss_resource.start = virt_to_phys(&__bss_start);
|
|
- bss_resource.end = virt_to_phys(&__bss_stop)-1;
|
|
-
|
|
- if ((i = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
|
|
- i = COMMAND_LINE_SIZE;
|
|
- memcpy(boot_command_line, xen_start_info->cmd_line, i);
|
|
- boot_command_line[i - 1] = '\0';
|
|
- parse_early_param();
|
|
-
|
|
- if (user_defined_memmap) {
|
|
- printk(KERN_INFO "user-defined physical RAM map:\n");
|
|
- print_memory_map("user");
|
|
- }
|
|
-
|
|
- strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
|
|
- *cmdline_p = command_line;
|
|
-
|
|
- if (efi_enabled)
|
|
- efi_init();
|
|
-
|
|
- /* update e820 for memory not covered by WB MTRRs */
|
|
- propagate_e820_map();
|
|
- mtrr_bp_init();
|
|
-#ifndef CONFIG_XEN
|
|
- if (mtrr_trim_uncached_memory(max_pfn))
|
|
- propagate_e820_map();
|
|
-#endif
|
|
-
|
|
- max_low_pfn = setup_memory();
|
|
-
|
|
-#ifdef CONFIG_KVM_CLOCK
|
|
- kvmclock_init();
|
|
-#endif
|
|
-
|
|
-#ifdef CONFIG_VMI
|
|
- /*
|
|
- * Must be after max_low_pfn is determined, and before kernel
|
|
- * pagetables are setup.
|
|
- */
|
|
- vmi_init();
|
|
-#endif
|
|
- kvm_guest_init();
|
|
-
|
|
- /*
|
|
- * NOTE: before this point _nobody_ is allowed to allocate
|
|
- * any memory using the bootmem allocator. Although the
|
|
- * allocator is now initialised only the first 8Mb of the kernel
|
|
- * virtual address space has been mapped. All allocations before
|
|
- * paging_init() has completed must use the alloc_bootmem_low_pages()
|
|
- * variant (which allocates DMA'able memory) and care must be taken
|
|
- * not to exceed the 8Mb limit.
|
|
- */
|
|
-
|
|
-#ifdef CONFIG_SMP
|
|
- smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
|
|
-#endif
|
|
- paging_init();
|
|
-
|
|
- /*
|
|
- * NOTE: On x86-32, only from this point on, fixmaps are ready for use.
|
|
- */
|
|
-
|
|
-#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
|
|
- if (init_ohci1394_dma_early)
|
|
- init_ohci1394_dma_on_all_controllers();
|
|
-#endif
|
|
-
|
|
- remapped_pgdat_init();
|
|
- sparse_init();
|
|
- zone_sizes_init();
|
|
-
|
|
-#ifdef CONFIG_X86_FIND_SMP_CONFIG
|
|
- /*
|
|
- * Find and reserve possible boot-time SMP configuration:
|
|
- */
|
|
- find_smp_config();
|
|
-#endif
|
|
-
|
|
- p2m_pages = max_pfn;
|
|
- if (xen_start_info->nr_pages > max_pfn) {
|
|
- /*
|
|
- * the max_pfn was shrunk (probably by mem= or highmem=
|
|
- * kernel parameter); shrink reservation with the HV
|
|
- */
|
|
- struct xen_memory_reservation reservation = {
|
|
- .address_bits = 0,
|
|
- .extent_order = 0,
|
|
- .domid = DOMID_SELF
|
|
- };
|
|
- unsigned int difference;
|
|
- int ret;
|
|
-
|
|
- difference = xen_start_info->nr_pages - max_pfn;
|
|
-
|
|
- set_xen_guest_handle(reservation.extent_start,
|
|
- ((unsigned long *)xen_start_info->mfn_list) + max_pfn);
|
|
- reservation.nr_extents = difference;
|
|
- ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
|
|
- &reservation);
|
|
- BUG_ON (ret != difference);
|
|
- }
|
|
- else if (max_pfn > xen_start_info->nr_pages)
|
|
- p2m_pages = xen_start_info->nr_pages;
|
|
-
|
|
- /* Make sure we have a correctly sized P->M table. */
|
|
- if (!xen_feature(XENFEAT_auto_translated_physmap)) {
|
|
- phys_to_machine_mapping = alloc_bootmem_low_pages(
|
|
- max_pfn * sizeof(unsigned long));
|
|
- memset(phys_to_machine_mapping, ~0,
|
|
- max_pfn * sizeof(unsigned long));
|
|
- memcpy(phys_to_machine_mapping,
|
|
- (unsigned long *)xen_start_info->mfn_list,
|
|
- p2m_pages * sizeof(unsigned long));
|
|
- free_bootmem(
|
|
- __pa(xen_start_info->mfn_list),
|
|
- PFN_PHYS(PFN_UP(xen_start_info->nr_pages *
|
|
- sizeof(unsigned long))));
|
|
-
|
|
- /*
|
|
- * Initialise the list of the frames that specify the list of
|
|
- * frames that make up the p2m table. Used by save/restore
|
|
- */
|
|
- pfn_to_mfn_frame_list_list = alloc_bootmem_low_pages(PAGE_SIZE);
|
|
-
|
|
- fpp = PAGE_SIZE/sizeof(unsigned long);
|
|
- for (i=0, j=0, k=-1; i< max_pfn; i+=fpp, j++) {
|
|
- if ((j % fpp) == 0) {
|
|
- k++;
|
|
- BUG_ON(k>=16);
|
|
- pfn_to_mfn_frame_list[k] =
|
|
- alloc_bootmem_low_pages(PAGE_SIZE);
|
|
- pfn_to_mfn_frame_list_list[k] =
|
|
- virt_to_mfn(pfn_to_mfn_frame_list[k]);
|
|
- j=0;
|
|
- }
|
|
- pfn_to_mfn_frame_list[k][j] =
|
|
- virt_to_mfn(&phys_to_machine_mapping[i]);
|
|
- }
|
|
- HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
|
|
- HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
|
|
- virt_to_mfn(pfn_to_mfn_frame_list_list);
|
|
- }
|
|
-
|
|
- /* Mark all ISA DMA channels in-use - using them wouldn't work. */
|
|
- for (i = 0; i < MAX_DMA_CHANNELS; ++i)
|
|
- if (i != 4 && request_dma(i, "xen") != 0)
|
|
- BUG();
|
|
-
|
|
- /*
|
|
- * NOTE: at this point the bootmem allocator is fully available.
|
|
- */
|
|
-
|
|
-#ifdef CONFIG_BLK_DEV_INITRD
|
|
- relocate_initrd();
|
|
-#endif
|
|
-
|
|
- paravirt_post_allocator_init();
|
|
-
|
|
- if (is_initial_xendomain())
|
|
- dmi_scan_machine();
|
|
-
|
|
- io_delay_init();
|
|
-
|
|
-#if defined(CONFIG_X86_SMP) && !defined(CONFIG_XEN)
|
|
- /*
|
|
- * setup to use the early static init tables during kernel startup
|
|
- * X86_SMP will exclude sub-arches that don't deal well with it.
|
|
- */
|
|
- x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init;
|
|
- x86_bios_cpu_apicid_early_ptr = (void *)x86_bios_cpu_apicid_init;
|
|
-#ifdef CONFIG_NUMA
|
|
- x86_cpu_to_node_map_early_ptr = (void *)x86_cpu_to_node_map_init;
|
|
-#endif
|
|
-#endif
|
|
-
|
|
-#ifdef CONFIG_X86_GENERICARCH
|
|
- generic_apic_probe();
|
|
-#endif
|
|
-
|
|
- set_iopl.iopl = 1;
|
|
- WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl));
|
|
-
|
|
-#ifdef CONFIG_ACPI
|
|
- if (!is_initial_xendomain()) {
|
|
- printk(KERN_INFO "ACPI in unprivileged domain disabled\n");
|
|
- acpi_disabled = 1;
|
|
- acpi_ht = 0;
|
|
- }
|
|
-
|
|
- /*
|
|
- * Parse the ACPI tables for possible boot-time SMP configuration.
|
|
- */
|
|
- acpi_boot_table_init();
|
|
-#endif
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
- early_quirks();
|
|
-#endif
|
|
-
|
|
-#ifdef CONFIG_ACPI
|
|
- acpi_boot_init();
|
|
-
|
|
-#if defined(CONFIG_SMP) && defined(CONFIG_X86_PC)
|
|
- if (def_to_bigsmp)
|
|
- printk(KERN_WARNING "More than 8 CPUs detected and "
|
|
- "CONFIG_X86_PC cannot handle it.\nUse "
|
|
- "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n");
|
|
-#endif
|
|
-#endif
|
|
-#ifdef CONFIG_X86_LOCAL_APIC
|
|
- if (smp_found_config)
|
|
- get_smp_config();
|
|
-#endif
|
|
-
|
|
- e820_register_memory();
|
|
- e820_mark_nosave_regions();
|
|
-
|
|
- if (is_initial_xendomain()) {
|
|
-#ifdef CONFIG_VT
|
|
-#if defined(CONFIG_VGA_CONSOLE)
|
|
- if (!efi_enabled ||
|
|
- (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
|
|
- conswitchp = &vga_con;
|
|
-#elif defined(CONFIG_DUMMY_CONSOLE)
|
|
- conswitchp = &dummy_con;
|
|
-#endif
|
|
-#endif
|
|
- } else {
|
|
-#if defined(CONFIG_VT) && defined(CONFIG_DUMMY_CONSOLE)
|
|
- conswitchp = &dummy_con;
|
|
-#endif
|
|
- }
|
|
-}
|
|
-
|
|
-static int
|
|
-xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
|
|
-{
|
|
- HYPERVISOR_shutdown(SHUTDOWN_crash);
|
|
- /* we're never actually going to get here... */
|
|
- return NOTIFY_DONE;
|
|
-}
|
|
-
|
|
-/*
|
|
- * Request address space for all standard resources
|
|
- *
|
|
- * This is called just before pcibios_init(), which is also a
|
|
- * subsys_initcall, but is linked in later (in arch/i386/pci/common.c).
|
|
- */
|
|
-static int __init request_standard_resources(void)
|
|
-{
|
|
- int i;
|
|
-
|
|
- /* Nothing to do if not running in dom0. */
|
|
- if (!is_initial_xendomain())
|
|
- return 0;
|
|
-
|
|
- printk(KERN_INFO "Setting up standard PCI resources\n");
|
|
- init_iomem_resources(&code_resource, &data_resource, &bss_resource);
|
|
-
|
|
- request_resource(&iomem_resource, &video_ram_resource);
|
|
-
|
|
- /* request I/O space for devices used on all i[345]86 PCs */
|
|
- for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
|
|
- request_resource(&ioport_resource, &standard_io_resources[i]);
|
|
- return 0;
|
|
-}
|
|
-
|
|
-subsys_initcall(request_standard_resources);
|
|
--- head-2011-03-11.orig/arch/x86/kernel/setup_64-xen.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
@@ -1,1442 +0,0 @@
|
|
-/*
|
|
- * Copyright (C) 1995 Linus Torvalds
|
|
- */
|
|
-
|
|
-/*
|
|
- * This file handles the architecture-dependent parts of initialization
|
|
- */
|
|
-
|
|
-#include <linux/errno.h>
|
|
-#include <linux/sched.h>
|
|
-#include <linux/kernel.h>
|
|
-#include <linux/mm.h>
|
|
-#include <linux/stddef.h>
|
|
-#include <linux/unistd.h>
|
|
-#include <linux/ptrace.h>
|
|
-#include <linux/slab.h>
|
|
-#include <linux/user.h>
|
|
-#include <linux/screen_info.h>
|
|
-#include <linux/ioport.h>
|
|
-#include <linux/delay.h>
|
|
-#include <linux/init.h>
|
|
-#include <linux/initrd.h>
|
|
-#include <linux/highmem.h>
|
|
-#include <linux/bootmem.h>
|
|
-#include <linux/module.h>
|
|
-#include <asm/processor.h>
|
|
-#include <linux/console.h>
|
|
-#include <linux/seq_file.h>
|
|
-#include <linux/crash_dump.h>
|
|
-#include <linux/root_dev.h>
|
|
-#include <linux/pci.h>
|
|
-#include <asm/pci-direct.h>
|
|
-#include <linux/efi.h>
|
|
-#include <linux/acpi.h>
|
|
-#include <linux/kallsyms.h>
|
|
-#include <linux/edd.h>
|
|
-#include <linux/iscsi_ibft.h>
|
|
-#include <linux/mmzone.h>
|
|
-#include <linux/kexec.h>
|
|
-#include <linux/cpufreq.h>
|
|
-#include <linux/dmi.h>
|
|
-#include <linux/dma-mapping.h>
|
|
-#include <linux/ctype.h>
|
|
-#include <linux/sort.h>
|
|
-#include <linux/uaccess.h>
|
|
-#include <linux/init_ohci1394_dma.h>
|
|
-#include <linux/kvm_para.h>
|
|
-
|
|
-#include <asm/mtrr.h>
|
|
-#include <asm/uaccess.h>
|
|
-#include <asm/system.h>
|
|
-#include <asm/vsyscall.h>
|
|
-#include <asm/io.h>
|
|
-#include <asm/smp.h>
|
|
-#include <asm/msr.h>
|
|
-#include <asm/desc.h>
|
|
-#include <video/edid.h>
|
|
-#include <asm/e820.h>
|
|
-#include <asm/dma.h>
|
|
-#include <asm/gart.h>
|
|
-#include <asm/mpspec.h>
|
|
-#include <asm/mmu_context.h>
|
|
-#include <asm/proto.h>
|
|
-#include <asm/setup.h>
|
|
-#include <asm/numa.h>
|
|
-#include <asm/sections.h>
|
|
-#include <asm/dmi.h>
|
|
-#include <asm/cacheflush.h>
|
|
-#include <asm/mce.h>
|
|
-#include <asm/ds.h>
|
|
-#include <asm/topology.h>
|
|
-#include <asm/pat.h>
|
|
-
|
|
-#include <mach_apic.h>
|
|
-#ifdef CONFIG_XEN
|
|
-#include <linux/percpu.h>
|
|
-#include <xen/interface/physdev.h>
|
|
-#include "setup_arch_pre.h"
|
|
-#include <asm/hypervisor.h>
|
|
-#include <xen/interface/nmi.h>
|
|
-#include <xen/features.h>
|
|
-#include <xen/firmware.h>
|
|
-#include <xen/xencons.h>
|
|
-#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
|
|
-#define PFN_PHYS(x) ((x) << PAGE_SHIFT)
|
|
-#include <asm/mach-xen/setup_arch_post.h>
|
|
-#include <xen/interface/memory.h>
|
|
-
|
|
-#ifdef CONFIG_XEN
|
|
-#include <xen/interface/kexec.h>
|
|
-#endif
|
|
-
|
|
-extern unsigned long start_pfn;
|
|
-extern struct edid_info edid_info;
|
|
-
|
|
-shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
|
|
-EXPORT_SYMBOL(HYPERVISOR_shared_info);
|
|
-
|
|
-static int xen_panic_event(struct notifier_block *, unsigned long, void *);
|
|
-static struct notifier_block xen_panic_block = {
|
|
- xen_panic_event, NULL, 0 /* try to go last */
|
|
-};
|
|
-
|
|
-unsigned long *phys_to_machine_mapping;
|
|
-unsigned long *pfn_to_mfn_frame_list_list, *pfn_to_mfn_frame_list[512];
|
|
-
|
|
-EXPORT_SYMBOL(phys_to_machine_mapping);
|
|
-
|
|
-DEFINE_PER_CPU(multicall_entry_t, multicall_list[8]);
|
|
-DEFINE_PER_CPU(int, nr_multicall_ents);
|
|
-
|
|
-/* Raw start-of-day parameters from the hypervisor. */
|
|
-start_info_t *xen_start_info;
|
|
-EXPORT_SYMBOL(xen_start_info);
|
|
-#endif
|
|
-
|
|
-/*
|
|
- * Machine setup..
|
|
- */
|
|
-
|
|
-struct cpuinfo_x86 boot_cpu_data __read_mostly;
|
|
-EXPORT_SYMBOL(boot_cpu_data);
|
|
-
|
|
-__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
|
|
-
|
|
-unsigned long mmu_cr4_features;
|
|
-
|
|
-/* Boot loader ID as an integer, for the benefit of proc_dointvec */
|
|
-int bootloader_type;
|
|
-
|
|
-unsigned long saved_video_mode;
|
|
-
|
|
-int force_mwait __cpuinitdata;
|
|
-
|
|
-/*
|
|
- * Early DMI memory
|
|
- */
|
|
-int dmi_alloc_index;
|
|
-char dmi_alloc_data[DMI_MAX_DATA];
|
|
-
|
|
-/*
|
|
- * Setup options
|
|
- */
|
|
-struct screen_info screen_info;
|
|
-EXPORT_SYMBOL(screen_info);
|
|
-struct sys_desc_table_struct {
|
|
- unsigned short length;
|
|
- unsigned char table[0];
|
|
-};
|
|
-
|
|
-struct edid_info edid_info;
|
|
-EXPORT_SYMBOL_GPL(edid_info);
|
|
-
|
|
-extern int root_mountflags;
|
|
-
|
|
-char __initdata command_line[COMMAND_LINE_SIZE];
|
|
-
|
|
-static struct resource standard_io_resources[] = {
|
|
- { .name = "dma1", .start = 0x00, .end = 0x1f,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_IO },
|
|
- { .name = "pic1", .start = 0x20, .end = 0x21,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_IO },
|
|
- { .name = "timer0", .start = 0x40, .end = 0x43,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_IO },
|
|
- { .name = "timer1", .start = 0x50, .end = 0x53,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_IO },
|
|
- { .name = "keyboard", .start = 0x60, .end = 0x60,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_IO },
|
|
- { .name = "keyboard", .start = 0x64, .end = 0x64,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_IO },
|
|
- { .name = "dma page reg", .start = 0x80, .end = 0x8f,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_IO },
|
|
- { .name = "pic2", .start = 0xa0, .end = 0xa1,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_IO },
|
|
- { .name = "dma2", .start = 0xc0, .end = 0xdf,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_IO },
|
|
- { .name = "fpu", .start = 0xf0, .end = 0xff,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_IO }
|
|
-};
|
|
-
|
|
-#define IORESOURCE_RAM (IORESOURCE_BUSY | IORESOURCE_MEM)
|
|
-
|
|
-static struct resource data_resource = {
|
|
- .name = "Kernel data",
|
|
- .start = 0,
|
|
- .end = 0,
|
|
- .flags = IORESOURCE_RAM,
|
|
-};
|
|
-static struct resource code_resource = {
|
|
- .name = "Kernel code",
|
|
- .start = 0,
|
|
- .end = 0,
|
|
- .flags = IORESOURCE_RAM,
|
|
-};
|
|
-static struct resource bss_resource = {
|
|
- .name = "Kernel bss",
|
|
- .start = 0,
|
|
- .end = 0,
|
|
- .flags = IORESOURCE_RAM,
|
|
-};
|
|
-
|
|
-static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c);
|
|
-
|
|
-#ifdef CONFIG_PROC_VMCORE
|
|
-/* elfcorehdr= specifies the location of elf core header
|
|
- * stored by the crashed kernel. This option will be passed
|
|
- * by kexec loader to the capture kernel.
|
|
- */
|
|
-static int __init setup_elfcorehdr(char *arg)
|
|
-{
|
|
- char *end;
|
|
- if (!arg)
|
|
- return -EINVAL;
|
|
- elfcorehdr_addr = memparse(arg, &end);
|
|
- return end > arg ? 0 : -EINVAL;
|
|
-}
|
|
-early_param("elfcorehdr", setup_elfcorehdr);
|
|
-#endif
|
|
-
|
|
-#ifndef CONFIG_NUMA
|
|
-static void __init
|
|
-contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
|
|
-{
|
|
- unsigned long bootmap_size, bootmap;
|
|
-
|
|
- bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
|
|
- bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size,
|
|
- PAGE_SIZE);
|
|
- if (bootmap == -1L)
|
|
- panic("Cannot find bootmem map of size %ld\n", bootmap_size);
|
|
- bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
|
|
- e820_register_active_regions(0, start_pfn, end_pfn);
|
|
-#ifdef CONFIG_XEN
|
|
- if (xen_start_info->nr_pages < end_pfn)
|
|
- end_pfn = xen_start_info->nr_pages;
|
|
-#endif
|
|
- free_bootmem_with_active_regions(0, end_pfn);
|
|
- early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT);
|
|
- reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT);
|
|
-}
|
|
-#endif
|
|
-
|
|
-#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
|
|
-struct edd edd;
|
|
-#ifdef CONFIG_EDD_MODULE
|
|
-EXPORT_SYMBOL(edd);
|
|
-#endif
|
|
-#ifndef CONFIG_XEN
|
|
-/**
|
|
- * copy_edd() - Copy the BIOS EDD information
|
|
- * from boot_params into a safe place.
|
|
- *
|
|
- */
|
|
-static inline void copy_edd(void)
|
|
-{
|
|
- memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer,
|
|
- sizeof(edd.mbr_signature));
|
|
- memcpy(edd.edd_info, boot_params.eddbuf, sizeof(edd.edd_info));
|
|
- edd.mbr_signature_nr = boot_params.edd_mbr_sig_buf_entries;
|
|
- edd.edd_info_nr = boot_params.eddbuf_entries;
|
|
-}
|
|
-#endif
|
|
-#else
|
|
-static inline void copy_edd(void)
|
|
-{
|
|
-}
|
|
-#endif
|
|
-
|
|
-#ifdef CONFIG_KEXEC
|
|
-#ifndef CONFIG_XEN
|
|
-static void __init reserve_crashkernel(void)
|
|
-{
|
|
- unsigned long long total_mem;
|
|
- unsigned long long crash_size, crash_base;
|
|
- int ret;
|
|
-
|
|
- total_mem = ((unsigned long long)max_low_pfn - min_low_pfn) << PAGE_SHIFT;
|
|
-
|
|
- ret = parse_crashkernel(boot_command_line, total_mem,
|
|
- &crash_size, &crash_base);
|
|
- if (ret == 0 && crash_size) {
|
|
- if (crash_base <= 0) {
|
|
- printk(KERN_INFO "crashkernel reservation failed - "
|
|
- "you have to specify a base address\n");
|
|
- return;
|
|
- }
|
|
-
|
|
- if (reserve_bootmem(crash_base, crash_size,
|
|
- BOOTMEM_EXCLUSIVE) < 0) {
|
|
- printk(KERN_INFO "crashkernel reservation failed - "
|
|
- "memory is in use\n");
|
|
- return;
|
|
- }
|
|
-
|
|
- printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
|
|
- "for crashkernel (System RAM: %ldMB)\n",
|
|
- (unsigned long)(crash_size >> 20),
|
|
- (unsigned long)(crash_base >> 20),
|
|
- (unsigned long)(total_mem >> 20));
|
|
- crashk_res.start = crash_base;
|
|
- crashk_res.end = crash_base + crash_size - 1;
|
|
- insert_resource(&iomem_resource, &crashk_res);
|
|
- }
|
|
-}
|
|
-#else
|
|
-#define reserve_crashkernel xen_machine_kexec_setup_resources
|
|
-#endif
|
|
-#else
|
|
-static inline void __init reserve_crashkernel(void)
|
|
-{}
|
|
-#endif
|
|
-
|
|
-/* Overridden in paravirt.c if CONFIG_PARAVIRT */
|
|
-void __attribute__((weak)) __init memory_setup(void)
|
|
-{
|
|
- machine_specific_memory_setup();
|
|
-}
|
|
-
|
|
-static void __init parse_setup_data(void)
|
|
-{
|
|
- struct setup_data *data;
|
|
- unsigned long pa_data;
|
|
-
|
|
- if (boot_params.hdr.version < 0x0209)
|
|
- return;
|
|
- pa_data = boot_params.hdr.setup_data;
|
|
- while (pa_data) {
|
|
- data = early_ioremap(pa_data, PAGE_SIZE);
|
|
- switch (data->type) {
|
|
- default:
|
|
- break;
|
|
- }
|
|
-#ifndef CONFIG_DEBUG_BOOT_PARAMS
|
|
- free_early(pa_data, pa_data+sizeof(*data)+data->len);
|
|
-#endif
|
|
- pa_data = data->next;
|
|
- early_iounmap(data, PAGE_SIZE);
|
|
- }
|
|
-}
|
|
-
|
|
-#ifdef CONFIG_PCI_MMCONFIG
|
|
-extern void __cpuinit fam10h_check_enable_mmcfg(void);
|
|
-extern void __init check_enable_amd_mmconf_dmi(void);
|
|
-#else
|
|
-void __cpuinit fam10h_check_enable_mmcfg(void)
|
|
-{
|
|
-}
|
|
-void __init check_enable_amd_mmconf_dmi(void)
|
|
-{
|
|
-}
|
|
-#endif
|
|
-
|
|
-/*
|
|
- * setup_arch - architecture-specific boot-time initializations
|
|
- *
|
|
- * Note: On x86_64, fixmaps are ready for use even before this is called.
|
|
- */
|
|
-void __init setup_arch(char **cmdline_p)
|
|
-{
|
|
- unsigned i;
|
|
-
|
|
-#ifdef CONFIG_XEN
|
|
- extern struct e820map machine_e820;
|
|
-
|
|
- printk(KERN_INFO "Command line: %s\n", boot_command_line);
|
|
-
|
|
- /* Register a call for panic conditions. */
|
|
- atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block);
|
|
-
|
|
- WARN_ON(HYPERVISOR_vm_assist(VMASST_CMD_enable,
|
|
- VMASST_TYPE_writable_pagetables));
|
|
-
|
|
- early_ioremap_init();
|
|
-
|
|
- ROOT_DEV = MKDEV(RAMDISK_MAJOR,0);
|
|
- screen_info = boot_params.screen_info;
|
|
-
|
|
- if (is_initial_xendomain()) {
|
|
- const struct dom0_vga_console_info *info =
|
|
- (void *)((char *)xen_start_info +
|
|
- xen_start_info->console.dom0.info_off);
|
|
-
|
|
- dom0_init_screen_info(info,
|
|
- xen_start_info->console.dom0.info_size);
|
|
- xen_start_info->console.domU.mfn = 0;
|
|
- xen_start_info->console.domU.evtchn = 0;
|
|
- } else
|
|
- screen_info.orig_video_isVGA = 0;
|
|
-
|
|
- copy_edid();
|
|
-#else
|
|
- printk(KERN_INFO "Command line: %s\n", boot_command_line);
|
|
-
|
|
- ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev);
|
|
- screen_info = boot_params.screen_info;
|
|
- edid_info = boot_params.edid_info;
|
|
-#endif /* !CONFIG_XEN */
|
|
- saved_video_mode = boot_params.hdr.vid_mode;
|
|
- bootloader_type = boot_params.hdr.type_of_loader;
|
|
-
|
|
-#ifdef CONFIG_BLK_DEV_RAM
|
|
- rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK;
|
|
- rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0);
|
|
- rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0);
|
|
-#endif
|
|
-#ifdef CONFIG_EFI
|
|
- if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature,
|
|
- "EL64", 4))
|
|
- efi_enabled = 1;
|
|
-#endif
|
|
-
|
|
- ARCH_SETUP
|
|
-
|
|
- memory_setup();
|
|
- copy_edd();
|
|
-
|
|
- if (!boot_params.hdr.root_flags)
|
|
- root_mountflags &= ~MS_RDONLY;
|
|
- init_mm.start_code = (unsigned long) &_text;
|
|
- init_mm.end_code = (unsigned long) &_etext;
|
|
- init_mm.end_data = (unsigned long) &_edata;
|
|
- init_mm.brk = (unsigned long) &_end;
|
|
-
|
|
- code_resource.start = virt_to_phys(&_text);
|
|
- code_resource.end = virt_to_phys(&_etext)-1;
|
|
- data_resource.start = virt_to_phys(&_etext);
|
|
- data_resource.end = virt_to_phys(&_edata)-1;
|
|
- bss_resource.start = virt_to_phys(&__bss_start);
|
|
- bss_resource.end = virt_to_phys(&__bss_stop)-1;
|
|
-
|
|
- early_identify_cpu(&boot_cpu_data);
|
|
-
|
|
- strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
|
|
- *cmdline_p = command_line;
|
|
-
|
|
- parse_setup_data();
|
|
-
|
|
- parse_early_param();
|
|
-
|
|
-#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
|
|
- if (init_ohci1394_dma_early)
|
|
- init_ohci1394_dma_on_all_controllers();
|
|
-#endif
|
|
-
|
|
- finish_e820_parsing();
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
- /* after parse_early_param, so could debug it */
|
|
- insert_resource(&iomem_resource, &code_resource);
|
|
- insert_resource(&iomem_resource, &data_resource);
|
|
- insert_resource(&iomem_resource, &bss_resource);
|
|
-#endif
|
|
-
|
|
- early_gart_iommu_check();
|
|
-
|
|
- e820_register_active_regions(0, 0, -1UL);
|
|
- /*
|
|
- * partially used pages are not usable - thus
|
|
- * we are rounding upwards:
|
|
- */
|
|
- end_pfn = e820_end_of_ram();
|
|
- /* update e820 for memory not covered by WB MTRRs */
|
|
- mtrr_bp_init();
|
|
-#ifndef CONFIG_XEN
|
|
- if (mtrr_trim_uncached_memory(end_pfn)) {
|
|
- e820_register_active_regions(0, 0, -1UL);
|
|
- end_pfn = e820_end_of_ram();
|
|
- }
|
|
-#endif
|
|
-
|
|
- num_physpages = end_pfn;
|
|
- max_mapnr = end_pfn;
|
|
-
|
|
- check_efer();
|
|
-
|
|
- max_pfn_mapped = init_memory_mapping(0, (max_pfn_mapped << PAGE_SHIFT));
|
|
- if (efi_enabled)
|
|
- efi_init();
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
- vsmp_init();
|
|
-#endif
|
|
-
|
|
- if (is_initial_xendomain())
|
|
- dmi_scan_machine();
|
|
-
|
|
- io_delay_init();
|
|
-
|
|
-#ifdef CONFIG_KVM_CLOCK
|
|
- kvmclock_init();
|
|
-#endif
|
|
-
|
|
-#if defined(CONFIG_SMP) && !defined(CONFIG_XEN)
|
|
- /* setup to use the early static init tables during kernel startup */
|
|
- x86_cpu_to_apicid_early_ptr = (void *)x86_cpu_to_apicid_init;
|
|
- x86_bios_cpu_apicid_early_ptr = (void *)x86_bios_cpu_apicid_init;
|
|
-#ifdef CONFIG_NUMA
|
|
- x86_cpu_to_node_map_early_ptr = (void *)x86_cpu_to_node_map_init;
|
|
-#endif
|
|
-#endif
|
|
-
|
|
- /* How many end-of-memory variables you have, grandma! */
|
|
- max_low_pfn = end_pfn;
|
|
- max_pfn = end_pfn;
|
|
- high_memory = (void *)__va(end_pfn * PAGE_SIZE - 1) + 1;
|
|
-
|
|
- /* Remove active ranges so rediscovery with NUMA-awareness happens */
|
|
- remove_all_active_ranges();
|
|
-
|
|
-#ifdef CONFIG_ACPI_NUMA
|
|
- /*
|
|
- * Parse SRAT to discover nodes.
|
|
- */
|
|
- acpi_numa_init();
|
|
-#endif
|
|
-
|
|
-#ifdef CONFIG_NUMA
|
|
- numa_initmem_init(0, end_pfn);
|
|
-#else
|
|
- contig_initmem_init(0, end_pfn);
|
|
-#endif
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
- dma32_reserve_bootmem();
|
|
-
|
|
-#ifdef CONFIG_ACPI_SLEEP
|
|
- /*
|
|
- * Reserve low memory region for sleep support.
|
|
- */
|
|
- acpi_reserve_bootmem();
|
|
-#endif
|
|
-
|
|
- if (efi_enabled)
|
|
- efi_reserve_bootmem();
|
|
-#endif
|
|
-
|
|
-#ifdef CONFIG_BLK_DEV_INITRD
|
|
-#ifdef CONFIG_XEN
|
|
- if (xen_start_info->mod_start) {
|
|
- unsigned long ramdisk_image = __pa(xen_start_info->mod_start);
|
|
- unsigned long ramdisk_size = xen_start_info->mod_len;
|
|
-#else
|
|
- if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) {
|
|
- unsigned long ramdisk_image = boot_params.hdr.ramdisk_image;
|
|
- unsigned long ramdisk_size = boot_params.hdr.ramdisk_size;
|
|
-#endif
|
|
- unsigned long ramdisk_end = ramdisk_image + ramdisk_size;
|
|
- unsigned long end_of_mem = end_pfn << PAGE_SHIFT;
|
|
-
|
|
- if (ramdisk_end <= end_of_mem) {
|
|
- /*
|
|
- * don't need to reserve again, already reserved early
|
|
- * in x86_64_start_kernel, and early_res_to_bootmem
|
|
- * convert that to reserved in bootmem
|
|
- */
|
|
- initrd_start = ramdisk_image + PAGE_OFFSET;
|
|
- initrd_end = initrd_start+ramdisk_size;
|
|
-#ifdef CONFIG_XEN
|
|
- initrd_below_start_ok = 1;
|
|
-#endif
|
|
- } else {
|
|
- free_bootmem(ramdisk_image, ramdisk_size);
|
|
- printk(KERN_ERR "initrd extends beyond end of memory "
|
|
- "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
|
|
- ramdisk_end, end_of_mem);
|
|
- initrd_start = 0;
|
|
- }
|
|
- }
|
|
-#endif
|
|
- reserve_crashkernel();
|
|
-
|
|
- reserve_ibft_region();
|
|
-
|
|
- paging_init();
|
|
- map_vsyscall();
|
|
-#ifdef CONFIG_X86_LOCAL_APIC
|
|
- /*
|
|
- * Find and reserve possible boot-time SMP configuration:
|
|
- */
|
|
- find_smp_config();
|
|
-#endif
|
|
-#ifdef CONFIG_XEN
|
|
- {
|
|
- int i, j, k, fpp;
|
|
- unsigned long p2m_pages;
|
|
-
|
|
- p2m_pages = end_pfn;
|
|
- if (xen_start_info->nr_pages > end_pfn) {
|
|
- /*
|
|
- * the end_pfn was shrunk (probably by mem= or highmem=
|
|
- * kernel parameter); shrink reservation with the HV
|
|
- */
|
|
- struct xen_memory_reservation reservation = {
|
|
- .address_bits = 0,
|
|
- .extent_order = 0,
|
|
- .domid = DOMID_SELF
|
|
- };
|
|
- unsigned int difference;
|
|
- int ret;
|
|
-
|
|
- difference = xen_start_info->nr_pages - end_pfn;
|
|
-
|
|
- set_xen_guest_handle(reservation.extent_start,
|
|
- ((unsigned long *)xen_start_info->mfn_list) + end_pfn);
|
|
- reservation.nr_extents = difference;
|
|
- ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation,
|
|
- &reservation);
|
|
- BUG_ON (ret != difference);
|
|
- }
|
|
- else if (end_pfn > xen_start_info->nr_pages)
|
|
- p2m_pages = xen_start_info->nr_pages;
|
|
-
|
|
- if (!xen_feature(XENFEAT_auto_translated_physmap)) {
|
|
- /* Make sure we have a large enough P->M table. */
|
|
- phys_to_machine_mapping = alloc_bootmem_pages(
|
|
- end_pfn * sizeof(unsigned long));
|
|
- memset(phys_to_machine_mapping, ~0,
|
|
- end_pfn * sizeof(unsigned long));
|
|
- memcpy(phys_to_machine_mapping,
|
|
- (unsigned long *)xen_start_info->mfn_list,
|
|
- p2m_pages * sizeof(unsigned long));
|
|
- free_bootmem(
|
|
- __pa(xen_start_info->mfn_list),
|
|
- PFN_PHYS(PFN_UP(xen_start_info->nr_pages *
|
|
- sizeof(unsigned long))));
|
|
-
|
|
- /*
|
|
- * Initialise the list of the frames that specify the
|
|
- * list of frames that make up the p2m table. Used by
|
|
- * save/restore.
|
|
- */
|
|
- pfn_to_mfn_frame_list_list = alloc_bootmem_pages(PAGE_SIZE);
|
|
-
|
|
- fpp = PAGE_SIZE/sizeof(unsigned long);
|
|
- for (i=0, j=0, k=-1; i< end_pfn; i+=fpp, j++) {
|
|
- if ((j % fpp) == 0) {
|
|
- k++;
|
|
- BUG_ON(k>=fpp);
|
|
- pfn_to_mfn_frame_list[k] =
|
|
- alloc_bootmem_pages(PAGE_SIZE);
|
|
- pfn_to_mfn_frame_list_list[k] =
|
|
- virt_to_mfn(pfn_to_mfn_frame_list[k]);
|
|
- j=0;
|
|
- }
|
|
- pfn_to_mfn_frame_list[k][j] =
|
|
- virt_to_mfn(&phys_to_machine_mapping[i]);
|
|
- }
|
|
- HYPERVISOR_shared_info->arch.max_pfn = end_pfn;
|
|
- HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
|
|
- virt_to_mfn(pfn_to_mfn_frame_list_list);
|
|
- }
|
|
-
|
|
- /* Mark all ISA DMA channels in-use - using them wouldn't work. */
|
|
- for (i = 0; i < MAX_DMA_CHANNELS; ++i)
|
|
- if (i != 4 && request_dma(i, "xen") != 0)
|
|
- BUG();
|
|
- }
|
|
-
|
|
-#ifdef CONFIG_ACPI
|
|
- if (!is_initial_xendomain()) {
|
|
- acpi_disabled = 1;
|
|
- acpi_ht = 0;
|
|
- }
|
|
-#endif
|
|
-#endif
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
- early_quirks();
|
|
-#endif
|
|
-
|
|
-#ifdef CONFIG_ACPI
|
|
- /*
|
|
- * Initialize the ACPI boot-time table parser (gets the RSDP and SDT).
|
|
- * Call this early for SRAT node setup.
|
|
- */
|
|
- acpi_boot_table_init();
|
|
-
|
|
- /*
|
|
- * Read APIC and some other early information from ACPI tables.
|
|
- */
|
|
- acpi_boot_init();
|
|
-#endif
|
|
-
|
|
- init_cpu_to_node();
|
|
-
|
|
-#ifdef CONFIG_X86_LOCAL_APIC
|
|
- /*
|
|
- * get boot-time SMP configuration:
|
|
- */
|
|
- if (smp_found_config)
|
|
- get_smp_config();
|
|
-#ifndef CONFIG_XEN
|
|
- init_apic_mappings();
|
|
- ioapic_init_mappings();
|
|
-#endif
|
|
-#endif
|
|
-#if defined(CONFIG_XEN) && defined(CONFIG_SMP) && !defined(CONFIG_HOTPLUG_CPU)
|
|
- prefill_possible_map();
|
|
-#endif
|
|
-
|
|
- kvm_guest_init();
|
|
-
|
|
- /*
|
|
- * We trust e820 completely. No explicit ROM probing in memory.
|
|
- */
|
|
-#ifdef CONFIG_XEN
|
|
- if (is_initial_xendomain())
|
|
- e820_reserve_resources(machine_e820.map, machine_e820.nr_map);
|
|
-#else
|
|
- e820_reserve_resources(e820.map, e820.nr_map);
|
|
- e820_mark_nosave_regions();
|
|
-#endif
|
|
-
|
|
- /* request I/O space for devices used on all i[345]86 PCs */
|
|
- for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
|
|
- request_resource(&ioport_resource, &standard_io_resources[i]);
|
|
-
|
|
-#ifdef CONFIG_XEN
|
|
- if (is_initial_xendomain())
|
|
- e820_setup_gap(machine_e820.map, machine_e820.nr_map);
|
|
-#else
|
|
- e820_setup_gap(e820.map, e820.nr_map);
|
|
-#endif
|
|
-
|
|
-#ifdef CONFIG_XEN
|
|
- {
|
|
- struct physdev_set_iopl set_iopl;
|
|
-
|
|
- set_iopl.iopl = 1;
|
|
- WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl));
|
|
-
|
|
- if (is_initial_xendomain()) {
|
|
-#ifdef CONFIG_VT
|
|
-#if defined(CONFIG_VGA_CONSOLE)
|
|
- conswitchp = &vga_con;
|
|
-#elif defined(CONFIG_DUMMY_CONSOLE)
|
|
- conswitchp = &dummy_con;
|
|
-#endif
|
|
-#endif
|
|
- } else {
|
|
-#if defined(CONFIG_VT) && defined(CONFIG_DUMMY_CONSOLE)
|
|
- conswitchp = &dummy_con;
|
|
-#endif
|
|
- }
|
|
- }
|
|
-#else /* CONFIG_XEN */
|
|
-
|
|
-#ifdef CONFIG_VT
|
|
-#if defined(CONFIG_VGA_CONSOLE)
|
|
- if (!efi_enabled || (efi_mem_type(0xa0000) != EFI_CONVENTIONAL_MEMORY))
|
|
- conswitchp = &vga_con;
|
|
-#elif defined(CONFIG_DUMMY_CONSOLE)
|
|
- conswitchp = &dummy_con;
|
|
-#endif
|
|
-#endif
|
|
-
|
|
-#endif /* !CONFIG_XEN */
|
|
-
|
|
- /* do this before identify_cpu for boot cpu */
|
|
- check_enable_amd_mmconf_dmi();
|
|
-}
|
|
-
|
|
-#ifdef CONFIG_XEN
|
|
-static int
|
|
-xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
|
|
-{
|
|
- HYPERVISOR_shutdown(SHUTDOWN_crash);
|
|
- /* we're never actually going to get here... */
|
|
- return NOTIFY_DONE;
|
|
-}
|
|
-#endif /* !CONFIG_XEN */
|
|
-
|
|
-
|
|
-static int __cpuinit get_model_name(struct cpuinfo_x86 *c)
|
|
-{
|
|
- unsigned int *v;
|
|
-
|
|
- if (c->extended_cpuid_level < 0x80000004)
|
|
- return 0;
|
|
-
|
|
- v = (unsigned int *) c->x86_model_id;
|
|
- cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
|
|
- cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
|
|
- cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
|
|
- c->x86_model_id[48] = 0;
|
|
- return 1;
|
|
-}
|
|
-
|
|
-
|
|
-static void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
|
|
-{
|
|
- unsigned int n, dummy, eax, ebx, ecx, edx;
|
|
-
|
|
- n = c->extended_cpuid_level;
|
|
-
|
|
- if (n >= 0x80000005) {
|
|
- cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
|
|
- printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), "
|
|
- "D cache %dK (%d bytes/line)\n",
|
|
- edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
|
|
- c->x86_cache_size = (ecx>>24) + (edx>>24);
|
|
- /* On K8 L1 TLB is inclusive, so don't count it */
|
|
- c->x86_tlbsize = 0;
|
|
- }
|
|
-
|
|
- if (n >= 0x80000006) {
|
|
- cpuid(0x80000006, &dummy, &ebx, &ecx, &edx);
|
|
- ecx = cpuid_ecx(0x80000006);
|
|
- c->x86_cache_size = ecx >> 16;
|
|
- c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff);
|
|
-
|
|
- printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
|
|
- c->x86_cache_size, ecx & 0xFF);
|
|
- }
|
|
- if (n >= 0x80000008) {
|
|
- cpuid(0x80000008, &eax, &dummy, &dummy, &dummy);
|
|
- c->x86_virt_bits = (eax >> 8) & 0xff;
|
|
- c->x86_phys_bits = eax & 0xff;
|
|
- }
|
|
-}
|
|
-
|
|
-#ifdef CONFIG_NUMA
|
|
-static int __cpuinit nearby_node(int apicid)
|
|
-{
|
|
- int i, node;
|
|
-
|
|
- for (i = apicid - 1; i >= 0; i--) {
|
|
- node = apicid_to_node[i];
|
|
- if (node != NUMA_NO_NODE && node_online(node))
|
|
- return node;
|
|
- }
|
|
- for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
|
|
- node = apicid_to_node[i];
|
|
- if (node != NUMA_NO_NODE && node_online(node))
|
|
- return node;
|
|
- }
|
|
- return first_node(node_online_map); /* Shouldn't happen */
|
|
-}
|
|
-#endif
|
|
-
|
|
-/*
|
|
- * On a AMD dual core setup the lower bits of the APIC id distingush the cores.
|
|
- * Assumes number of cores is a power of two.
|
|
- */
|
|
-static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
|
|
-{
|
|
-#if defined(CONFIG_SMP) && !defined(CONFIG_XEN)
|
|
- unsigned bits;
|
|
-#ifdef CONFIG_NUMA
|
|
- int cpu = smp_processor_id();
|
|
- int node = 0;
|
|
- unsigned apicid = hard_smp_processor_id();
|
|
-#endif
|
|
- bits = c->x86_coreid_bits;
|
|
-
|
|
- /* Low order bits define the core id (index of core in socket) */
|
|
- c->cpu_core_id = c->initial_apicid & ((1 << bits)-1);
|
|
- /* Convert the initial APIC ID into the socket ID */
|
|
- c->phys_proc_id = c->initial_apicid >> bits;
|
|
-
|
|
-#ifdef CONFIG_NUMA
|
|
- node = c->phys_proc_id;
|
|
- if (apicid_to_node[apicid] != NUMA_NO_NODE)
|
|
- node = apicid_to_node[apicid];
|
|
- if (!node_online(node)) {
|
|
- /* Two possibilities here:
|
|
- - The CPU is missing memory and no node was created.
|
|
- In that case try picking one from a nearby CPU
|
|
- - The APIC IDs differ from the HyperTransport node IDs
|
|
- which the K8 northbridge parsing fills in.
|
|
- Assume they are all increased by a constant offset,
|
|
- but in the same order as the HT nodeids.
|
|
- If that doesn't result in a usable node fall back to the
|
|
- path for the previous case. */
|
|
-
|
|
- int ht_nodeid = c->initial_apicid;
|
|
-
|
|
- if (ht_nodeid >= 0 &&
|
|
- apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
|
|
- node = apicid_to_node[ht_nodeid];
|
|
- /* Pick a nearby node */
|
|
- if (!node_online(node))
|
|
- node = nearby_node(apicid);
|
|
- }
|
|
- numa_set_node(cpu, node);
|
|
-
|
|
- printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
|
|
-#endif
|
|
-#endif
|
|
-}
|
|
-
|
|
-static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c)
|
|
-{
|
|
-#if defined(CONFIG_SMP) && !defined(CONFIG_XEN)
|
|
- unsigned bits, ecx;
|
|
-
|
|
- /* Multi core CPU? */
|
|
- if (c->extended_cpuid_level < 0x80000008)
|
|
- return;
|
|
-
|
|
- ecx = cpuid_ecx(0x80000008);
|
|
-
|
|
- c->x86_max_cores = (ecx & 0xff) + 1;
|
|
-
|
|
- /* CPU telling us the core id bits shift? */
|
|
- bits = (ecx >> 12) & 0xF;
|
|
-
|
|
- /* Otherwise recompute */
|
|
- if (bits == 0) {
|
|
- while ((1 << bits) < c->x86_max_cores)
|
|
- bits++;
|
|
- }
|
|
-
|
|
- c->x86_coreid_bits = bits;
|
|
-
|
|
-#endif
|
|
-}
|
|
-
|
|
-#define ENABLE_C1E_MASK 0x18000000
|
|
-#define CPUID_PROCESSOR_SIGNATURE 1
|
|
-#define CPUID_XFAM 0x0ff00000
|
|
-#define CPUID_XFAM_K8 0x00000000
|
|
-#define CPUID_XFAM_10H 0x00100000
|
|
-#define CPUID_XFAM_11H 0x00200000
|
|
-#define CPUID_XMOD 0x000f0000
|
|
-#define CPUID_XMOD_REV_F 0x00040000
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
-/* AMD systems with C1E don't have a working lAPIC timer. Check for that. */
|
|
-static __cpuinit int amd_apic_timer_broken(void)
|
|
-{
|
|
- u32 lo, hi, eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE);
|
|
-
|
|
- switch (eax & CPUID_XFAM) {
|
|
- case CPUID_XFAM_K8:
|
|
- if ((eax & CPUID_XMOD) < CPUID_XMOD_REV_F)
|
|
- break;
|
|
- case CPUID_XFAM_10H:
|
|
- case CPUID_XFAM_11H:
|
|
- rdmsr(MSR_K8_ENABLE_C1E, lo, hi);
|
|
- if (lo & ENABLE_C1E_MASK)
|
|
- return 1;
|
|
- break;
|
|
- default:
|
|
- /* err on the side of caution */
|
|
- return 1;
|
|
- }
|
|
- return 0;
|
|
-}
|
|
-#endif
|
|
-
|
|
-static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
|
|
-{
|
|
- early_init_amd_mc(c);
|
|
-
|
|
- /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */
|
|
- if (c->x86_power & (1<<8))
|
|
- set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
|
|
-}
|
|
-
|
|
-static void __cpuinit init_amd(struct cpuinfo_x86 *c)
|
|
-{
|
|
- unsigned level;
|
|
-
|
|
-#ifdef CONFIG_SMP
|
|
- unsigned long value;
|
|
-
|
|
- /*
|
|
- * Disable TLB flush filter by setting HWCR.FFDIS on K8
|
|
- * bit 6 of msr C001_0015
|
|
- *
|
|
- * Errata 63 for SH-B3 steppings
|
|
- * Errata 122 for all steppings (F+ have it disabled by default)
|
|
- */
|
|
- if (c->x86 == 15) {
|
|
- rdmsrl(MSR_K8_HWCR, value);
|
|
- value |= 1 << 6;
|
|
- wrmsrl(MSR_K8_HWCR, value);
|
|
- }
|
|
-#endif
|
|
-
|
|
- /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
|
|
- 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
|
|
- clear_cpu_cap(c, 0*32+31);
|
|
-
|
|
- /* On C+ stepping K8 rep microcode works well for copy/memset */
|
|
- level = cpuid_eax(1);
|
|
- if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) ||
|
|
- level >= 0x0f58))
|
|
- set_cpu_cap(c, X86_FEATURE_REP_GOOD);
|
|
- if (c->x86 == 0x10 || c->x86 == 0x11)
|
|
- set_cpu_cap(c, X86_FEATURE_REP_GOOD);
|
|
-
|
|
- /* Enable workaround for FXSAVE leak */
|
|
- if (c->x86 >= 6)
|
|
- set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK);
|
|
-
|
|
- level = get_model_name(c);
|
|
- if (!level) {
|
|
- switch (c->x86) {
|
|
- case 15:
|
|
- /* Should distinguish Models here, but this is only
|
|
- a fallback anyways. */
|
|
- strcpy(c->x86_model_id, "Hammer");
|
|
- break;
|
|
- }
|
|
- }
|
|
- display_cacheinfo(c);
|
|
-
|
|
- /* Multi core CPU? */
|
|
- if (c->extended_cpuid_level >= 0x80000008)
|
|
- amd_detect_cmp(c);
|
|
-
|
|
- if (c->extended_cpuid_level >= 0x80000006 &&
|
|
- (cpuid_edx(0x80000006) & 0xf000))
|
|
- num_cache_leaves = 4;
|
|
- else
|
|
- num_cache_leaves = 3;
|
|
-
|
|
- if (c->x86 == 0xf || c->x86 == 0x10 || c->x86 == 0x11)
|
|
- set_cpu_cap(c, X86_FEATURE_K8);
|
|
-
|
|
- /* MFENCE stops RDTSC speculation */
|
|
- set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
|
|
-
|
|
- if (c->x86 == 0x10)
|
|
- fam10h_check_enable_mmcfg();
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
- if (amd_apic_timer_broken())
|
|
- disable_apic_timer = 1;
|
|
-
|
|
- if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) {
|
|
- unsigned long long tseg;
|
|
-
|
|
- /*
|
|
- * Split up direct mapping around the TSEG SMM area.
|
|
- * Don't do it for gbpages because there seems very little
|
|
- * benefit in doing so.
|
|
- */
|
|
- if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg) &&
|
|
- (tseg >> PMD_SHIFT) < (max_pfn_mapped >> (PMD_SHIFT-PAGE_SHIFT)))
|
|
- set_memory_4k((unsigned long)__va(tseg), 1);
|
|
- }
|
|
-#endif
|
|
-}
|
|
-
|
|
-void __cpuinit detect_ht(struct cpuinfo_x86 *c)
|
|
-{
|
|
-#if defined(CONFIG_SMP) && !defined(CONFIG_XEN)
|
|
- u32 eax, ebx, ecx, edx;
|
|
- int index_msb, core_bits;
|
|
-
|
|
- cpuid(1, &eax, &ebx, &ecx, &edx);
|
|
-
|
|
-
|
|
- if (!cpu_has(c, X86_FEATURE_HT))
|
|
- return;
|
|
- if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
|
|
- goto out;
|
|
-
|
|
- smp_num_siblings = (ebx & 0xff0000) >> 16;
|
|
-
|
|
- if (smp_num_siblings == 1) {
|
|
- printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
|
|
- } else if (smp_num_siblings > 1) {
|
|
-
|
|
- if (smp_num_siblings > NR_CPUS) {
|
|
- printk(KERN_WARNING "CPU: Unsupported number of "
|
|
- "siblings %d", smp_num_siblings);
|
|
- smp_num_siblings = 1;
|
|
- return;
|
|
- }
|
|
-
|
|
- index_msb = get_count_order(smp_num_siblings);
|
|
- c->phys_proc_id = phys_pkg_id(index_msb);
|
|
-
|
|
- smp_num_siblings = smp_num_siblings / c->x86_max_cores;
|
|
-
|
|
- index_msb = get_count_order(smp_num_siblings);
|
|
-
|
|
- core_bits = get_count_order(c->x86_max_cores);
|
|
-
|
|
- c->cpu_core_id = phys_pkg_id(index_msb) &
|
|
- ((1 << core_bits) - 1);
|
|
- }
|
|
-out:
|
|
- if ((c->x86_max_cores * smp_num_siblings) > 1) {
|
|
- printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
|
|
- c->phys_proc_id);
|
|
- printk(KERN_INFO "CPU: Processor Core ID: %d\n",
|
|
- c->cpu_core_id);
|
|
- }
|
|
-
|
|
-#endif
|
|
-}
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
-/*
|
|
- * find out the number of processor cores on the die
|
|
- */
|
|
-static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c)
|
|
-{
|
|
- unsigned int eax, t;
|
|
-
|
|
- if (c->cpuid_level < 4)
|
|
- return 1;
|
|
-
|
|
- cpuid_count(4, 0, &eax, &t, &t, &t);
|
|
-
|
|
- if (eax & 0x1f)
|
|
- return ((eax >> 26) + 1);
|
|
- else
|
|
- return 1;
|
|
-}
|
|
-#endif
|
|
-
|
|
-static void __cpuinit srat_detect_node(void)
|
|
-{
|
|
-#ifdef CONFIG_NUMA
|
|
- unsigned node;
|
|
- int cpu = smp_processor_id();
|
|
- int apicid = hard_smp_processor_id();
|
|
-
|
|
- /* Don't do the funky fallback heuristics the AMD version employs
|
|
- for now. */
|
|
- node = apicid_to_node[apicid];
|
|
- if (node == NUMA_NO_NODE || !node_online(node))
|
|
- node = first_node(node_online_map);
|
|
- numa_set_node(cpu, node);
|
|
-
|
|
- printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
|
|
-#endif
|
|
-}
|
|
-
|
|
-static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
|
|
-{
|
|
- if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
|
|
- (c->x86 == 0x6 && c->x86_model >= 0x0e))
|
|
- set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
|
|
-}
|
|
-
|
|
-static void __cpuinit init_intel(struct cpuinfo_x86 *c)
|
|
-{
|
|
- /* Cache sizes */
|
|
- unsigned n;
|
|
-
|
|
- init_intel_cacheinfo(c);
|
|
- if (c->cpuid_level > 9) {
|
|
- unsigned eax = cpuid_eax(10);
|
|
- /* Check for version and the number of counters */
|
|
- if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
|
|
- set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
|
|
- }
|
|
-
|
|
- if (cpu_has_ds) {
|
|
- unsigned int l1, l2;
|
|
- rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
|
|
- if (!(l1 & (1<<11)))
|
|
- set_cpu_cap(c, X86_FEATURE_BTS);
|
|
- if (!(l1 & (1<<12)))
|
|
- set_cpu_cap(c, X86_FEATURE_PEBS);
|
|
- }
|
|
-
|
|
-
|
|
- if (cpu_has_bts)
|
|
- ds_init_intel(c);
|
|
-
|
|
- n = c->extended_cpuid_level;
|
|
- if (n >= 0x80000008) {
|
|
- unsigned eax = cpuid_eax(0x80000008);
|
|
- c->x86_virt_bits = (eax >> 8) & 0xff;
|
|
- c->x86_phys_bits = eax & 0xff;
|
|
- /* CPUID workaround for Intel 0F34 CPU */
|
|
- if (c->x86_vendor == X86_VENDOR_INTEL &&
|
|
- c->x86 == 0xF && c->x86_model == 0x3 &&
|
|
- c->x86_mask == 0x4)
|
|
- c->x86_phys_bits = 36;
|
|
- }
|
|
-
|
|
- if (c->x86 == 15)
|
|
- c->x86_cache_alignment = c->x86_clflush_size * 2;
|
|
- if (c->x86 == 6)
|
|
- set_cpu_cap(c, X86_FEATURE_REP_GOOD);
|
|
- set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
|
|
-#ifndef CONFIG_XEN
|
|
- c->x86_max_cores = intel_num_cpu_cores(c);
|
|
-#endif
|
|
-
|
|
- srat_detect_node();
|
|
-}
|
|
-
|
|
-static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
|
|
-{
|
|
- if (c->x86 == 0x6 && c->x86_model >= 0xf)
|
|
- set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
|
|
-}
|
|
-
|
|
-static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
|
|
-{
|
|
- /* Cache sizes */
|
|
- unsigned n;
|
|
-
|
|
- n = c->extended_cpuid_level;
|
|
- if (n >= 0x80000008) {
|
|
- unsigned eax = cpuid_eax(0x80000008);
|
|
- c->x86_virt_bits = (eax >> 8) & 0xff;
|
|
- c->x86_phys_bits = eax & 0xff;
|
|
- }
|
|
-
|
|
- if (c->x86 == 0x6 && c->x86_model >= 0xf) {
|
|
- c->x86_cache_alignment = c->x86_clflush_size * 2;
|
|
- set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
|
|
- set_cpu_cap(c, X86_FEATURE_REP_GOOD);
|
|
- }
|
|
- set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
|
|
-}
|
|
-
|
|
-static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
|
|
-{
|
|
- char *v = c->x86_vendor_id;
|
|
-
|
|
- if (!strcmp(v, "AuthenticAMD"))
|
|
- c->x86_vendor = X86_VENDOR_AMD;
|
|
- else if (!strcmp(v, "GenuineIntel"))
|
|
- c->x86_vendor = X86_VENDOR_INTEL;
|
|
- else if (!strcmp(v, "CentaurHauls"))
|
|
- c->x86_vendor = X86_VENDOR_CENTAUR;
|
|
- else
|
|
- c->x86_vendor = X86_VENDOR_UNKNOWN;
|
|
-}
|
|
-
|
|
-/* Do some early cpuid on the boot CPU to get some parameter that are
|
|
- needed before check_bugs. Everything advanced is in identify_cpu
|
|
- below. */
|
|
-static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
|
|
-{
|
|
- u32 tfms, xlvl;
|
|
-
|
|
- c->loops_per_jiffy = loops_per_jiffy;
|
|
- c->x86_cache_size = -1;
|
|
- c->x86_vendor = X86_VENDOR_UNKNOWN;
|
|
- c->x86_model = c->x86_mask = 0; /* So far unknown... */
|
|
- c->x86_vendor_id[0] = '\0'; /* Unset */
|
|
- c->x86_model_id[0] = '\0'; /* Unset */
|
|
- c->x86_clflush_size = 64;
|
|
- c->x86_cache_alignment = c->x86_clflush_size;
|
|
-#ifndef CONFIG_XEN
|
|
- c->x86_max_cores = 1;
|
|
- c->x86_coreid_bits = 0;
|
|
-#endif
|
|
- c->extended_cpuid_level = 0;
|
|
- memset(&c->x86_capability, 0, sizeof c->x86_capability);
|
|
-
|
|
- /* Get vendor name */
|
|
- cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
|
|
- (unsigned int *)&c->x86_vendor_id[0],
|
|
- (unsigned int *)&c->x86_vendor_id[8],
|
|
- (unsigned int *)&c->x86_vendor_id[4]);
|
|
-
|
|
- get_cpu_vendor(c);
|
|
-
|
|
- /* Initialize the standard set of capabilities */
|
|
- /* Note that the vendor-specific code below might override */
|
|
-
|
|
- /* Intel-defined flags: level 0x00000001 */
|
|
- if (c->cpuid_level >= 0x00000001) {
|
|
- __u32 misc;
|
|
- cpuid(0x00000001, &tfms, &misc, &c->x86_capability[4],
|
|
- &c->x86_capability[0]);
|
|
- c->x86 = (tfms >> 8) & 0xf;
|
|
- c->x86_model = (tfms >> 4) & 0xf;
|
|
- c->x86_mask = tfms & 0xf;
|
|
- if (c->x86 == 0xf)
|
|
- c->x86 += (tfms >> 20) & 0xff;
|
|
- if (c->x86 >= 0x6)
|
|
- c->x86_model += ((tfms >> 16) & 0xF) << 4;
|
|
- if (test_cpu_cap(c, X86_FEATURE_CLFLSH))
|
|
- c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
|
|
- } else {
|
|
- /* Have CPUID level 0 only - unheard of */
|
|
- c->x86 = 4;
|
|
- }
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
- c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff;
|
|
-#ifdef CONFIG_SMP
|
|
- c->phys_proc_id = c->initial_apicid;
|
|
-#endif
|
|
-#endif
|
|
- /* AMD-defined flags: level 0x80000001 */
|
|
- xlvl = cpuid_eax(0x80000000);
|
|
- c->extended_cpuid_level = xlvl;
|
|
- if ((xlvl & 0xffff0000) == 0x80000000) {
|
|
- if (xlvl >= 0x80000001) {
|
|
- c->x86_capability[1] = cpuid_edx(0x80000001);
|
|
- c->x86_capability[6] = cpuid_ecx(0x80000001);
|
|
- }
|
|
- if (xlvl >= 0x80000004)
|
|
- get_model_name(c); /* Default name */
|
|
- }
|
|
-
|
|
- /* Transmeta-defined flags: level 0x80860001 */
|
|
- xlvl = cpuid_eax(0x80860000);
|
|
- if ((xlvl & 0xffff0000) == 0x80860000) {
|
|
- /* Don't set x86_cpuid_level here for now to not confuse. */
|
|
- if (xlvl >= 0x80860001)
|
|
- c->x86_capability[2] = cpuid_edx(0x80860001);
|
|
- }
|
|
-
|
|
- c->extended_cpuid_level = cpuid_eax(0x80000000);
|
|
- if (c->extended_cpuid_level >= 0x80000007)
|
|
- c->x86_power = cpuid_edx(0x80000007);
|
|
-
|
|
- switch (c->x86_vendor) {
|
|
- case X86_VENDOR_AMD:
|
|
- early_init_amd(c);
|
|
- break;
|
|
- case X86_VENDOR_INTEL:
|
|
- early_init_intel(c);
|
|
- break;
|
|
- case X86_VENDOR_CENTAUR:
|
|
- early_init_centaur(c);
|
|
- break;
|
|
- }
|
|
-
|
|
- validate_pat_support(c);
|
|
-}
|
|
-
|
|
-/*
|
|
- * This does the hard work of actually picking apart the CPU stuff...
|
|
- */
|
|
-void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
|
|
-{
|
|
- int i;
|
|
-
|
|
- early_identify_cpu(c);
|
|
-
|
|
- init_scattered_cpuid_features(c);
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
- c->apicid = phys_pkg_id(0);
|
|
-#endif
|
|
-
|
|
- /*
|
|
- * Vendor-specific initialization. In this section we
|
|
- * canonicalize the feature flags, meaning if there are
|
|
- * features a certain CPU supports which CPUID doesn't
|
|
- * tell us, CPUID claiming incorrect flags, or other bugs,
|
|
- * we handle them here.
|
|
- *
|
|
- * At the end of this section, c->x86_capability better
|
|
- * indicate the features this CPU genuinely supports!
|
|
- */
|
|
- switch (c->x86_vendor) {
|
|
- case X86_VENDOR_AMD:
|
|
- init_amd(c);
|
|
- break;
|
|
-
|
|
- case X86_VENDOR_INTEL:
|
|
- init_intel(c);
|
|
- break;
|
|
-
|
|
- case X86_VENDOR_CENTAUR:
|
|
- init_centaur(c);
|
|
- break;
|
|
-
|
|
- case X86_VENDOR_UNKNOWN:
|
|
- default:
|
|
- display_cacheinfo(c);
|
|
- break;
|
|
- }
|
|
-
|
|
- detect_ht(c);
|
|
-
|
|
- /*
|
|
- * On SMP, boot_cpu_data holds the common feature set between
|
|
- * all CPUs; so make sure that we indicate which features are
|
|
- * common between the CPUs. The first time this routine gets
|
|
- * executed, c == &boot_cpu_data.
|
|
- */
|
|
- if (c != &boot_cpu_data) {
|
|
- /* AND the already accumulated flags with these */
|
|
- for (i = 0; i < NCAPINTS; i++)
|
|
- boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
|
|
- }
|
|
-
|
|
- /* Clear all flags overriden by options */
|
|
- for (i = 0; i < NCAPINTS; i++)
|
|
- c->x86_capability[i] &= ~cleared_cpu_caps[i];
|
|
-
|
|
-#ifdef CONFIG_X86_MCE
|
|
- mcheck_init(c);
|
|
-#endif
|
|
- select_idle_routine(c);
|
|
-
|
|
-#ifdef CONFIG_NUMA
|
|
- numa_add_cpu(smp_processor_id());
|
|
-#endif
|
|
-
|
|
-}
|
|
-
|
|
-void __cpuinit identify_boot_cpu(void)
|
|
-{
|
|
- identify_cpu(&boot_cpu_data);
|
|
-}
|
|
-
|
|
-void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
|
|
-{
|
|
- BUG_ON(c == &boot_cpu_data);
|
|
- identify_cpu(c);
|
|
- mtrr_ap_init();
|
|
-}
|
|
-
|
|
-static __init int setup_noclflush(char *arg)
|
|
-{
|
|
- setup_clear_cpu_cap(X86_FEATURE_CLFLSH);
|
|
- return 1;
|
|
-}
|
|
-__setup("noclflush", setup_noclflush);
|
|
-
|
|
-void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
|
|
-{
|
|
- if (c->x86_model_id[0])
|
|
- printk(KERN_CONT "%s", c->x86_model_id);
|
|
-
|
|
- if (c->x86_mask || c->cpuid_level >= 0)
|
|
- printk(KERN_CONT " stepping %02x\n", c->x86_mask);
|
|
- else
|
|
- printk(KERN_CONT "\n");
|
|
-}
|
|
-
|
|
-static __init int setup_disablecpuid(char *arg)
|
|
-{
|
|
- int bit;
|
|
- if (get_option(&arg, &bit) && bit < NCAPINTS*32)
|
|
- setup_clear_cpu_cap(bit);
|
|
- else
|
|
- return 0;
|
|
- return 1;
|
|
-}
|
|
-__setup("clearcpuid=", setup_disablecpuid);
|
|
--- head-2011-03-11.orig/arch/x86/kernel/smp-xen.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/smp-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -121,132 +121,14 @@ void xen_smp_send_reschedule(int cpu)
|
|
send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
|
|
}
|
|
|
|
-/*
|
|
- * Structure and data for smp_call_function(). This is designed to minimise
|
|
- * static memory requirements. It also looks cleaner.
|
|
- */
|
|
-static DEFINE_SPINLOCK(call_lock);
|
|
-
|
|
-struct call_data_struct {
|
|
- void (*func) (void *info);
|
|
- void *info;
|
|
- atomic_t started;
|
|
- atomic_t finished;
|
|
- int wait;
|
|
-};
|
|
-
|
|
-void lock_ipi_call_lock(void)
|
|
+void xen_send_call_func_single_ipi(int cpu)
|
|
{
|
|
- spin_lock_irq(&call_lock);
|
|
+ send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNC_SINGLE_VECTOR);
|
|
}
|
|
|
|
-void unlock_ipi_call_lock(void)
|
|
+void xen_send_call_func_ipi(cpumask_t mask)
|
|
{
|
|
- spin_unlock_irq(&call_lock);
|
|
-}
|
|
-
|
|
-static struct call_data_struct *call_data;
|
|
-
|
|
-static void __smp_call_function(void (*func) (void *info), void *info,
|
|
- int nonatomic, int wait)
|
|
-{
|
|
- struct call_data_struct data;
|
|
- int cpus = num_online_cpus() - 1;
|
|
-
|
|
- if (!cpus)
|
|
- return;
|
|
-
|
|
- data.func = func;
|
|
- data.info = info;
|
|
- atomic_set(&data.started, 0);
|
|
- data.wait = wait;
|
|
- if (wait)
|
|
- atomic_set(&data.finished, 0);
|
|
-
|
|
- call_data = &data;
|
|
- mb();
|
|
-
|
|
- /* Send a message to all other CPUs and wait for them to respond */
|
|
- send_IPI_allbutself(CALL_FUNCTION_VECTOR);
|
|
-
|
|
- /* Wait for response */
|
|
- while (atomic_read(&data.started) != cpus)
|
|
- cpu_relax();
|
|
-
|
|
- if (wait)
|
|
- while (atomic_read(&data.finished) != cpus)
|
|
- cpu_relax();
|
|
-}
|
|
-
|
|
-
|
|
-/**
|
|
- * smp_call_function_mask(): Run a function on a set of other CPUs.
|
|
- * @mask: The set of cpus to run on. Must not include the current cpu.
|
|
- * @func: The function to run. This must be fast and non-blocking.
|
|
- * @info: An arbitrary pointer to pass to the function.
|
|
- * @wait: If true, wait (atomically) until function has completed on other CPUs.
|
|
- *
|
|
- * Returns 0 on success, else a negative status code.
|
|
- *
|
|
- * If @wait is true, then returns once @func has returned; otherwise
|
|
- * it returns just before the target cpu calls @func.
|
|
- *
|
|
- * You must not call this function with disabled interrupts or from a
|
|
- * hardware interrupt handler or from a bottom half handler.
|
|
- */
|
|
-int
|
|
-xen_smp_call_function_mask(cpumask_t mask,
|
|
- void (*func)(void *), void *info,
|
|
- int wait)
|
|
-{
|
|
- struct call_data_struct data;
|
|
- cpumask_t allbutself;
|
|
- int cpus;
|
|
-
|
|
- /* Can deadlock when called with interrupts disabled */
|
|
- WARN_ON(irqs_disabled());
|
|
-
|
|
- /* Holding any lock stops cpus from going down. */
|
|
- spin_lock(&call_lock);
|
|
-
|
|
- allbutself = cpu_online_map;
|
|
- cpu_clear(smp_processor_id(), allbutself);
|
|
-
|
|
- cpus_and(mask, mask, allbutself);
|
|
- cpus = cpus_weight(mask);
|
|
-
|
|
- if (!cpus) {
|
|
- spin_unlock(&call_lock);
|
|
- return 0;
|
|
- }
|
|
-
|
|
- data.func = func;
|
|
- data.info = info;
|
|
- atomic_set(&data.started, 0);
|
|
- data.wait = wait;
|
|
- if (wait)
|
|
- atomic_set(&data.finished, 0);
|
|
-
|
|
- call_data = &data;
|
|
- wmb();
|
|
-
|
|
- /* Send a message to other CPUs */
|
|
- if (cpus_equal(mask, allbutself) &&
|
|
- cpus_equal(cpu_online_map, cpu_callout_map))
|
|
- send_IPI_allbutself(CALL_FUNCTION_VECTOR);
|
|
- else
|
|
- send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
|
|
-
|
|
- /* Wait for response */
|
|
- while (atomic_read(&data.started) != cpus)
|
|
- cpu_relax();
|
|
-
|
|
- if (wait)
|
|
- while (atomic_read(&data.finished) != cpus)
|
|
- cpu_relax();
|
|
- spin_unlock(&call_lock);
|
|
-
|
|
- return 0;
|
|
+ send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
|
|
}
|
|
|
|
static void stop_this_cpu(void *dummy)
|
|
@@ -268,15 +150,10 @@ static void stop_this_cpu(void *dummy)
|
|
|
|
void xen_smp_send_stop(void)
|
|
{
|
|
- int nolock;
|
|
unsigned long flags;
|
|
|
|
- /* Don't deadlock on the call lock in panic */
|
|
- nolock = !spin_trylock(&call_lock);
|
|
+ smp_call_function(stop_this_cpu, NULL, 0);
|
|
local_irq_save(flags);
|
|
- __smp_call_function(stop_this_cpu, NULL, 0, 0);
|
|
- if (!nolock)
|
|
- spin_unlock(&call_lock);
|
|
disable_all_local_evtchn();
|
|
local_irq_restore(flags);
|
|
}
|
|
@@ -298,30 +175,24 @@ irqreturn_t smp_reschedule_interrupt(int
|
|
|
|
irqreturn_t smp_call_function_interrupt(int irq, void *dev_id)
|
|
{
|
|
- void (*func) (void *info) = call_data->func;
|
|
- void *info = call_data->info;
|
|
- int wait = call_data->wait;
|
|
-
|
|
- /*
|
|
- * Notify initiating CPU that I've grabbed the data and am
|
|
- * about to execute the function
|
|
- */
|
|
- mb();
|
|
- atomic_inc(&call_data->started);
|
|
- /*
|
|
- * At this point the info structure may be out of scope unless wait==1
|
|
- */
|
|
- (*func)(info);
|
|
+ generic_smp_call_function_interrupt();
|
|
#ifdef CONFIG_X86_32
|
|
__get_cpu_var(irq_stat).irq_call_count++;
|
|
#else
|
|
add_pda(irq_call_count, 1);
|
|
#endif
|
|
|
|
- if (wait) {
|
|
- mb();
|
|
- atomic_inc(&call_data->finished);
|
|
- }
|
|
+ return IRQ_HANDLED;
|
|
+}
|
|
+
|
|
+irqreturn_t smp_call_function_single_interrupt(int irq, void *dev_id)
|
|
+{
|
|
+ generic_smp_call_function_single_interrupt();
|
|
+#ifdef CONFIG_X86_32
|
|
+ __get_cpu_var(irq_stat).irq_call_count++;
|
|
+#else
|
|
+ add_pda(irq_call_count, 1);
|
|
+#endif
|
|
|
|
return IRQ_HANDLED;
|
|
}
|
|
--- head-2011-03-11.orig/arch/x86/kernel/time-xen.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/time-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -460,7 +460,7 @@ irqreturn_t timer_interrupt(int irq, voi
|
|
|
|
/* Keep nmi watchdog up to date */
|
|
#ifdef __i386__
|
|
- per_cpu(irq_stat, smp_processor_id()).irq0_irqs++;
|
|
+ x86_add_percpu(irq_stat.irq0_irqs, 1);
|
|
#else
|
|
add_pda(irq0_irqs, 1);
|
|
#endif
|
|
@@ -750,9 +750,7 @@ void __init time_init(void)
|
|
|
|
update_wallclock();
|
|
|
|
-#ifndef CONFIG_X86_64
|
|
use_tsc_delay();
|
|
-#endif
|
|
|
|
/* Cannot request_irq() until kmem is initialised. */
|
|
late_time_init = setup_cpu0_timer_irq;
|
|
@@ -809,7 +807,8 @@ static void stop_hz_timer(void)
|
|
|
|
/* Leave ourselves in tick mode if rcu or softirq or timer pending. */
|
|
if (rcu_needs_cpu(cpu) || local_softirq_pending() ||
|
|
- (j = next_timer_interrupt(), time_before_eq(j, jiffies))) {
|
|
+ (j = get_next_timer_interrupt(jiffies),
|
|
+ time_before_eq(j, jiffies))) {
|
|
cpu_clear(cpu, nohz_cpu_mask);
|
|
j = jiffies + 1;
|
|
}
|
|
--- head-2011-03-11.orig/arch/x86/kernel/traps_32-xen.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/traps_32-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -1,5 +1,6 @@
|
|
/*
|
|
* Copyright (C) 1991, 1992 Linus Torvalds
|
|
+ * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
|
|
*
|
|
* Pentium III FXSR, SSE support
|
|
* Gareth Hughes <gareth@valinux.com>, May 2000
|
|
@@ -57,11 +58,10 @@
|
|
#include <asm/nmi.h>
|
|
#include <asm/smp.h>
|
|
#include <asm/io.h>
|
|
+#include <asm/traps.h>
|
|
|
|
#include "mach_traps.h"
|
|
|
|
-int panic_on_unrecovered_nmi;
|
|
-
|
|
#ifndef CONFIG_XEN
|
|
DECLARE_BITMAP(used_vectors, NR_VECTORS);
|
|
EXPORT_SYMBOL_GPL(used_vectors);
|
|
@@ -82,43 +82,22 @@ gate_desc idt_table[256]
|
|
__attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, };
|
|
#endif
|
|
|
|
-asmlinkage void divide_error(void);
|
|
-asmlinkage void debug(void);
|
|
-asmlinkage void nmi(void);
|
|
-asmlinkage void int3(void);
|
|
-asmlinkage void overflow(void);
|
|
-asmlinkage void bounds(void);
|
|
-asmlinkage void invalid_op(void);
|
|
-asmlinkage void device_not_available(void);
|
|
-asmlinkage void coprocessor_segment_overrun(void);
|
|
-asmlinkage void invalid_TSS(void);
|
|
-asmlinkage void segment_not_present(void);
|
|
-asmlinkage void stack_segment(void);
|
|
-asmlinkage void general_protection(void);
|
|
-asmlinkage void page_fault(void);
|
|
-asmlinkage void coprocessor_error(void);
|
|
-asmlinkage void simd_coprocessor_error(void);
|
|
-asmlinkage void alignment_check(void);
|
|
-#ifndef CONFIG_XEN
|
|
-asmlinkage void spurious_interrupt_bug(void);
|
|
-#else
|
|
-asmlinkage void fixup_4gb_segment(void);
|
|
-#endif
|
|
-asmlinkage void machine_check(void);
|
|
-
|
|
+int panic_on_unrecovered_nmi;
|
|
int kstack_depth_to_print = 24;
|
|
static unsigned int code_bytes = 64;
|
|
+static int ignore_nmis;
|
|
+static int die_counter;
|
|
|
|
void printk_address(unsigned long address, int reliable)
|
|
{
|
|
#ifdef CONFIG_KALLSYMS
|
|
- char namebuf[KSYM_NAME_LEN];
|
|
unsigned long offset = 0;
|
|
unsigned long symsize;
|
|
const char *symname;
|
|
- char reliab[4] = "";
|
|
- char *delim = ":";
|
|
char *modname;
|
|
+ char *delim = ":";
|
|
+ char namebuf[KSYM_NAME_LEN];
|
|
+ char reliab[4] = "";
|
|
|
|
symname = kallsyms_lookup(address, &symsize, &offset,
|
|
&modname, namebuf);
|
|
@@ -138,22 +117,23 @@ void printk_address(unsigned long addres
|
|
#endif
|
|
}
|
|
|
|
-static inline int valid_stack_ptr(struct thread_info *tinfo, void *p, unsigned size)
|
|
+static inline int valid_stack_ptr(struct thread_info *tinfo,
|
|
+ void *p, unsigned int size)
|
|
{
|
|
- return p > (void *)tinfo &&
|
|
- p <= (void *)tinfo + THREAD_SIZE - size;
|
|
+ void *t = tinfo;
|
|
+ return p > t && p <= t + THREAD_SIZE - size;
|
|
}
|
|
|
|
/* The form of the top of the frame on the stack */
|
|
struct stack_frame {
|
|
- struct stack_frame *next_frame;
|
|
- unsigned long return_address;
|
|
+ struct stack_frame *next_frame;
|
|
+ unsigned long return_address;
|
|
};
|
|
|
|
static inline unsigned long
|
|
print_context_stack(struct thread_info *tinfo,
|
|
- unsigned long *stack, unsigned long bp,
|
|
- const struct stacktrace_ops *ops, void *data)
|
|
+ unsigned long *stack, unsigned long bp,
|
|
+ const struct stacktrace_ops *ops, void *data)
|
|
{
|
|
struct stack_frame *frame = (struct stack_frame *)bp;
|
|
|
|
@@ -175,8 +155,6 @@ print_context_stack(struct thread_info *
|
|
return bp;
|
|
}
|
|
|
|
-#define MSG(msg) ops->warning(data, msg)
|
|
-
|
|
void dump_trace(struct task_struct *task, struct pt_regs *regs,
|
|
unsigned long *stack, unsigned long bp,
|
|
const struct stacktrace_ops *ops, void *data)
|
|
@@ -186,7 +164,6 @@ void dump_trace(struct task_struct *task
|
|
|
|
if (!stack) {
|
|
unsigned long dummy;
|
|
-
|
|
stack = &dummy;
|
|
if (task != current)
|
|
stack = (unsigned long *)task->thread.sp;
|
|
@@ -204,7 +181,7 @@ void dump_trace(struct task_struct *task
|
|
}
|
|
#endif
|
|
|
|
- while (1) {
|
|
+ for (;;) {
|
|
struct thread_info *context;
|
|
|
|
context = (struct thread_info *)
|
|
@@ -256,15 +233,15 @@ static void print_trace_address(void *da
|
|
}
|
|
|
|
static const struct stacktrace_ops print_trace_ops = {
|
|
- .warning = print_trace_warning,
|
|
- .warning_symbol = print_trace_warning_symbol,
|
|
- .stack = print_trace_stack,
|
|
- .address = print_trace_address,
|
|
+ .warning = print_trace_warning,
|
|
+ .warning_symbol = print_trace_warning_symbol,
|
|
+ .stack = print_trace_stack,
|
|
+ .address = print_trace_address,
|
|
};
|
|
|
|
static void
|
|
show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
|
|
- unsigned long *stack, unsigned long bp, char *log_lvl)
|
|
+ unsigned long *stack, unsigned long bp, char *log_lvl)
|
|
{
|
|
dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
|
|
printk("%s =======================\n", log_lvl);
|
|
@@ -359,15 +336,14 @@ void show_registers(struct pt_regs *regs
|
|
printk(KERN_EMERG "Code: ");
|
|
|
|
ip = (u8 *)regs->ip - code_prologue;
|
|
- if (ip < (u8 *)PAGE_OFFSET ||
|
|
- probe_kernel_address(ip, c)) {
|
|
+ if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
|
|
/* try starting at EIP */
|
|
ip = (u8 *)regs->ip;
|
|
code_len = code_len - code_prologue + 1;
|
|
}
|
|
for (i = 0; i < code_len; i++, ip++) {
|
|
if (ip < (u8 *)PAGE_OFFSET ||
|
|
- probe_kernel_address(ip, c)) {
|
|
+ probe_kernel_address(ip, c)) {
|
|
printk(" Bad EIP value.");
|
|
break;
|
|
}
|
|
@@ -392,7 +368,53 @@ int is_valid_bugaddr(unsigned long ip)
|
|
return ud2 == 0x0b0f;
|
|
}
|
|
|
|
-static int die_counter;
|
|
+static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED;
|
|
+static int die_owner = -1;
|
|
+static unsigned int die_nest_count;
|
|
+
|
|
+unsigned __kprobes long oops_begin(void)
|
|
+{
|
|
+ unsigned long flags;
|
|
+
|
|
+ oops_enter();
|
|
+
|
|
+ if (die_owner != raw_smp_processor_id()) {
|
|
+ console_verbose();
|
|
+ raw_local_irq_save(flags);
|
|
+ __raw_spin_lock(&die_lock);
|
|
+ die_owner = smp_processor_id();
|
|
+ die_nest_count = 0;
|
|
+ bust_spinlocks(1);
|
|
+ } else {
|
|
+ raw_local_irq_save(flags);
|
|
+ }
|
|
+ die_nest_count++;
|
|
+ return flags;
|
|
+}
|
|
+
|
|
+void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
|
|
+{
|
|
+ bust_spinlocks(0);
|
|
+ die_owner = -1;
|
|
+ add_taint(TAINT_DIE);
|
|
+ __raw_spin_unlock(&die_lock);
|
|
+ raw_local_irq_restore(flags);
|
|
+
|
|
+ if (!regs)
|
|
+ return;
|
|
+
|
|
+ if (kexec_should_crash(current))
|
|
+ crash_kexec(regs);
|
|
+
|
|
+ if (in_interrupt())
|
|
+ panic("Fatal exception in interrupt");
|
|
+
|
|
+ if (panic_on_oops)
|
|
+ panic("Fatal exception");
|
|
+
|
|
+ oops_exit();
|
|
+ do_exit(signr);
|
|
+}
|
|
|
|
int __kprobes __die(const char *str, struct pt_regs *regs, long err)
|
|
{
|
|
@@ -410,26 +432,22 @@ int __kprobes __die(const char *str, str
|
|
printk("DEBUG_PAGEALLOC");
|
|
#endif
|
|
printk("\n");
|
|
-
|
|
if (notify_die(DIE_OOPS, str, regs, err,
|
|
- current->thread.trap_no, SIGSEGV) != NOTIFY_STOP) {
|
|
+ current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
|
|
+ return 1;
|
|
|
|
- show_registers(regs);
|
|
- /* Executive summary in case the oops scrolled away */
|
|
- sp = (unsigned long) (®s->sp);
|
|
- savesegment(ss, ss);
|
|
- if (user_mode(regs)) {
|
|
- sp = regs->sp;
|
|
- ss = regs->ss & 0xffff;
|
|
- }
|
|
- printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
|
|
- print_symbol("%s", regs->ip);
|
|
- printk(" SS:ESP %04x:%08lx\n", ss, sp);
|
|
-
|
|
- return 0;
|
|
- }
|
|
-
|
|
- return 1;
|
|
+ show_registers(regs);
|
|
+ /* Executive summary in case the oops scrolled away */
|
|
+ sp = (unsigned long) (®s->sp);
|
|
+ savesegment(ss, ss);
|
|
+ if (user_mode(regs)) {
|
|
+ sp = regs->sp;
|
|
+ ss = regs->ss & 0xffff;
|
|
+ }
|
|
+ printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
|
|
+ print_symbol("%s", regs->ip);
|
|
+ printk(" SS:ESP %04x:%08lx\n", ss, sp);
|
|
+ return 0;
|
|
}
|
|
|
|
/*
|
|
@@ -438,31 +456,9 @@ int __kprobes __die(const char *str, str
|
|
*/
|
|
void die(const char *str, struct pt_regs *regs, long err)
|
|
{
|
|
- static struct {
|
|
- raw_spinlock_t lock;
|
|
- u32 lock_owner;
|
|
- int lock_owner_depth;
|
|
- } die = {
|
|
- .lock = __RAW_SPIN_LOCK_UNLOCKED,
|
|
- .lock_owner = -1,
|
|
- .lock_owner_depth = 0
|
|
- };
|
|
- unsigned long flags;
|
|
-
|
|
- oops_enter();
|
|
+ unsigned long flags = oops_begin();
|
|
|
|
- if (die.lock_owner != raw_smp_processor_id()) {
|
|
- console_verbose();
|
|
- raw_local_irq_save(flags);
|
|
- __raw_spin_lock(&die.lock);
|
|
- die.lock_owner = smp_processor_id();
|
|
- die.lock_owner_depth = 0;
|
|
- bust_spinlocks(1);
|
|
- } else {
|
|
- raw_local_irq_save(flags);
|
|
- }
|
|
-
|
|
- if (++die.lock_owner_depth < 3) {
|
|
+ if (die_nest_count < 3) {
|
|
report_bug(regs->ip, regs);
|
|
|
|
if (__die(str, regs, err))
|
|
@@ -471,26 +467,7 @@ void die(const char *str, struct pt_regs
|
|
printk(KERN_EMERG "Recursive die() failure, output suppressed\n");
|
|
}
|
|
|
|
- bust_spinlocks(0);
|
|
- die.lock_owner = -1;
|
|
- add_taint(TAINT_DIE);
|
|
- __raw_spin_unlock(&die.lock);
|
|
- raw_local_irq_restore(flags);
|
|
-
|
|
- if (!regs)
|
|
- return;
|
|
-
|
|
- if (kexec_should_crash(current))
|
|
- crash_kexec(regs);
|
|
-
|
|
- if (in_interrupt())
|
|
- panic("Fatal exception in interrupt");
|
|
-
|
|
- if (panic_on_oops)
|
|
- panic("Fatal exception");
|
|
-
|
|
- oops_exit();
|
|
- do_exit(SIGSEGV);
|
|
+ oops_end(flags, regs, SIGSEGV);
|
|
}
|
|
|
|
static inline void
|
|
@@ -554,7 +531,7 @@ void do_##name(struct pt_regs *regs, lon
|
|
{ \
|
|
trace_hardirqs_fixup(); \
|
|
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
|
|
- == NOTIFY_STOP) \
|
|
+ == NOTIFY_STOP) \
|
|
return; \
|
|
do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \
|
|
}
|
|
@@ -570,7 +547,7 @@ void do_##name(struct pt_regs *regs, lon
|
|
info.si_code = sicode; \
|
|
info.si_addr = (void __user *)siaddr; \
|
|
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
|
|
- == NOTIFY_STOP) \
|
|
+ == NOTIFY_STOP) \
|
|
return; \
|
|
do_trap(trapnr, signr, str, 0, regs, error_code, &info); \
|
|
}
|
|
@@ -579,7 +556,7 @@ void do_##name(struct pt_regs *regs, lon
|
|
void do_##name(struct pt_regs *regs, long error_code) \
|
|
{ \
|
|
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
|
|
- == NOTIFY_STOP) \
|
|
+ == NOTIFY_STOP) \
|
|
return; \
|
|
do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \
|
|
}
|
|
@@ -594,28 +571,29 @@ void do_##name(struct pt_regs *regs, lon
|
|
info.si_addr = (void __user *)siaddr; \
|
|
trace_hardirqs_fixup(); \
|
|
if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
|
|
- == NOTIFY_STOP) \
|
|
+ == NOTIFY_STOP) \
|
|
return; \
|
|
do_trap(trapnr, signr, str, 1, regs, error_code, &info); \
|
|
}
|
|
|
|
-DO_VM86_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip)
|
|
+DO_VM86_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip)
|
|
#ifndef CONFIG_KPROBES
|
|
DO_VM86_ERROR(3, SIGTRAP, "int3", int3)
|
|
#endif
|
|
DO_VM86_ERROR(4, SIGSEGV, "overflow", overflow)
|
|
DO_VM86_ERROR(5, SIGSEGV, "bounds", bounds)
|
|
-DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0)
|
|
-DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
|
|
+DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0)
|
|
+DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
|
|
DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
|
|
-DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
|
|
-DO_ERROR(12, SIGBUS, "stack segment", stack_segment)
|
|
+DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
|
|
+DO_ERROR(12, SIGBUS, "stack segment", stack_segment)
|
|
DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0)
|
|
DO_ERROR_INFO(32, SIGILL, "iret exception", iret_error, ILL_BADSTK, 0, 1)
|
|
|
|
-void __kprobes do_general_protection(struct pt_regs * regs,
|
|
- long error_code)
|
|
+void __kprobes
|
|
+do_general_protection(struct pt_regs *regs, long error_code)
|
|
{
|
|
+ struct task_struct *tsk;
|
|
struct thread_struct *thread;
|
|
|
|
thread = ¤t->thread;
|
|
@@ -623,23 +601,24 @@ void __kprobes do_general_protection(str
|
|
if (regs->flags & X86_VM_MASK)
|
|
goto gp_in_vm86;
|
|
|
|
+ tsk = current;
|
|
if (!user_mode(regs))
|
|
goto gp_in_kernel;
|
|
|
|
- current->thread.error_code = error_code;
|
|
- current->thread.trap_no = 13;
|
|
+ tsk->thread.error_code = error_code;
|
|
+ tsk->thread.trap_no = 13;
|
|
|
|
- if (show_unhandled_signals && unhandled_signal(current, SIGSEGV) &&
|
|
- printk_ratelimit()) {
|
|
+ if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
|
|
+ printk_ratelimit()) {
|
|
printk(KERN_INFO
|
|
- "%s[%d] general protection ip:%lx sp:%lx error:%lx",
|
|
- current->comm, task_pid_nr(current),
|
|
- regs->ip, regs->sp, error_code);
|
|
+ "%s[%d] general protection ip:%lx sp:%lx error:%lx",
|
|
+ tsk->comm, task_pid_nr(tsk),
|
|
+ regs->ip, regs->sp, error_code);
|
|
print_vma_addr(" in ", regs->ip);
|
|
printk("\n");
|
|
}
|
|
|
|
- force_sig(SIGSEGV, current);
|
|
+ force_sig(SIGSEGV, tsk);
|
|
return;
|
|
|
|
gp_in_vm86:
|
|
@@ -648,14 +627,15 @@ gp_in_vm86:
|
|
return;
|
|
|
|
gp_in_kernel:
|
|
- if (!fixup_exception(regs)) {
|
|
- current->thread.error_code = error_code;
|
|
- current->thread.trap_no = 13;
|
|
- if (notify_die(DIE_GPF, "general protection fault", regs,
|
|
+ if (fixup_exception(regs))
|
|
+ return;
|
|
+
|
|
+ tsk->thread.error_code = error_code;
|
|
+ tsk->thread.trap_no = 13;
|
|
+ if (notify_die(DIE_GPF, "general protection fault", regs,
|
|
error_code, 13, SIGSEGV) == NOTIFY_STOP)
|
|
- return;
|
|
- die("general protection fault", regs, error_code);
|
|
- }
|
|
+ return;
|
|
+ die("general protection fault", regs, error_code);
|
|
}
|
|
|
|
static notrace __kprobes void
|
|
@@ -722,9 +702,9 @@ unknown_nmi_error(unsigned char reason,
|
|
|
|
static DEFINE_SPINLOCK(nmi_print_lock);
|
|
|
|
-void notrace __kprobes die_nmi(struct pt_regs *regs, const char *msg)
|
|
+void notrace __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic)
|
|
{
|
|
- if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) == NOTIFY_STOP)
|
|
+ if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
|
|
return;
|
|
|
|
spin_lock(&nmi_print_lock);
|
|
@@ -733,10 +713,12 @@ void notrace __kprobes die_nmi(struct pt
|
|
* to get a message out:
|
|
*/
|
|
bust_spinlocks(1);
|
|
- printk(KERN_EMERG "%s", msg);
|
|
+ printk(KERN_EMERG "%s", str);
|
|
printk(" on CPU%d, ip %08lx, registers:\n",
|
|
smp_processor_id(), regs->ip);
|
|
show_registers(regs);
|
|
+ if (do_panic)
|
|
+ panic("Non maskable interrupt");
|
|
console_silent();
|
|
spin_unlock(&nmi_print_lock);
|
|
bust_spinlocks(0);
|
|
@@ -756,14 +738,17 @@ void notrace __kprobes die_nmi(struct pt
|
|
static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
|
|
{
|
|
unsigned char reason = 0;
|
|
+ int cpu;
|
|
|
|
- /* Only the BSP gets external NMIs from the system: */
|
|
- if (!smp_processor_id())
|
|
+ cpu = smp_processor_id();
|
|
+
|
|
+ /* Only the BSP gets external NMIs from the system. */
|
|
+ if (!cpu)
|
|
reason = get_nmi_reason();
|
|
|
|
if (!(reason & 0xc0)) {
|
|
if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
|
|
- == NOTIFY_STOP)
|
|
+ == NOTIFY_STOP)
|
|
return;
|
|
#ifdef CONFIG_X86_LOCAL_APIC
|
|
/*
|
|
@@ -772,7 +757,7 @@ static notrace __kprobes void default_do
|
|
*/
|
|
if (nmi_watchdog_tick(regs, reason))
|
|
return;
|
|
- if (!do_nmi_callback(regs, smp_processor_id()))
|
|
+ if (!do_nmi_callback(regs, cpu))
|
|
unknown_nmi_error(reason, regs);
|
|
#else
|
|
unknown_nmi_error(reason, regs);
|
|
@@ -782,6 +767,8 @@ static notrace __kprobes void default_do
|
|
}
|
|
if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
|
|
return;
|
|
+
|
|
+ /* AK: following checks seem to be broken on modern chipsets. FIXME */
|
|
if (reason & 0x80)
|
|
mem_parity_error(reason, regs);
|
|
if (reason & 0x40)
|
|
@@ -793,8 +780,6 @@ static notrace __kprobes void default_do
|
|
reassert_nmi();
|
|
}
|
|
|
|
-static int ignore_nmis;
|
|
-
|
|
notrace __kprobes void do_nmi(struct pt_regs *regs, long error_code)
|
|
{
|
|
int cpu;
|
|
@@ -879,7 +864,7 @@ void __kprobes do_debug(struct pt_regs *
|
|
tsk->thread.debugctlmsr = 0;
|
|
|
|
if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
|
|
- SIGTRAP) == NOTIFY_STOP)
|
|
+ SIGTRAP) == NOTIFY_STOP)
|
|
return;
|
|
/* It's safe to allow irq's after DR6 has been saved */
|
|
if (regs->flags & X86_EFLAGS_IF)
|
|
@@ -940,9 +925,8 @@ clear_TF_reenable:
|
|
void math_error(void __user *ip)
|
|
{
|
|
struct task_struct *task;
|
|
- unsigned short cwd;
|
|
- unsigned short swd;
|
|
siginfo_t info;
|
|
+ unsigned short cwd, swd;
|
|
|
|
/*
|
|
* Save the info for the exception handler and clear the error.
|
|
@@ -961,7 +945,7 @@ void math_error(void __user *ip)
|
|
* C1 reg you need in case of a stack fault, 0x040 is the stack
|
|
* fault bit. We should only be taking one exception at a time,
|
|
* so if this combination doesn't produce any single exception,
|
|
- * then we have a bad program that isn't syncronizing its FPU usage
|
|
+ * then we have a bad program that isn't synchronizing its FPU usage
|
|
* and it will suffer the consequences since we won't be able to
|
|
* fully reproduce the context of the exception
|
|
*/
|
|
@@ -970,7 +954,7 @@ void math_error(void __user *ip)
|
|
switch (swd & ~cwd & 0x3f) {
|
|
case 0x000: /* No unmasked exception */
|
|
return;
|
|
- default: /* Multiple exceptions */
|
|
+ default: /* Multiple exceptions */
|
|
break;
|
|
case 0x001: /* Invalid Op */
|
|
/*
|
|
@@ -1006,8 +990,8 @@ void do_coprocessor_error(struct pt_regs
|
|
static void simd_math_error(void __user *ip)
|
|
{
|
|
struct task_struct *task;
|
|
- unsigned short mxcsr;
|
|
siginfo_t info;
|
|
+ unsigned short mxcsr;
|
|
|
|
/*
|
|
* Save the info for the exception handler and clear the error.
|
|
@@ -1084,7 +1068,7 @@ void do_spurious_interrupt_bug(struct pt
|
|
|
|
unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp)
|
|
{
|
|
- struct desc_struct *gdt = __get_cpu_var(gdt_page).gdt;
|
|
+ struct desc_struct *gdt = get_cpu_gdt_table(smp_processor_id());
|
|
unsigned long base = (kesp - uesp) & -THREAD_SIZE;
|
|
unsigned long new_kesp = kesp - base;
|
|
unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT;
|
|
--- head-2011-03-11.orig/arch/x86/kernel/traps_64-xen.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/traps_64-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -10,73 +10,56 @@
|
|
* 'Traps.c' handles hardware traps and faults after we have saved some
|
|
* state in 'entry.S'.
|
|
*/
|
|
-#include <linux/sched.h>
|
|
+#include <linux/moduleparam.h>
|
|
+#include <linux/interrupt.h>
|
|
+#include <linux/kallsyms.h>
|
|
+#include <linux/spinlock.h>
|
|
+#include <linux/kprobes.h>
|
|
+#include <linux/uaccess.h>
|
|
+#include <linux/utsname.h>
|
|
+#include <linux/kdebug.h>
|
|
#include <linux/kernel.h>
|
|
+#include <linux/module.h>
|
|
+#include <linux/ptrace.h>
|
|
#include <linux/string.h>
|
|
+#include <linux/unwind.h>
|
|
+#include <linux/delay.h>
|
|
#include <linux/errno.h>
|
|
-#include <linux/ptrace.h>
|
|
+#include <linux/kexec.h>
|
|
+#include <linux/sched.h>
|
|
#include <linux/timer.h>
|
|
-#include <linux/mm.h>
|
|
#include <linux/init.h>
|
|
-#include <linux/delay.h>
|
|
-#include <linux/spinlock.h>
|
|
-#include <linux/interrupt.h>
|
|
-#include <linux/kallsyms.h>
|
|
-#include <linux/module.h>
|
|
-#include <linux/moduleparam.h>
|
|
-#include <linux/nmi.h>
|
|
-#include <linux/kprobes.h>
|
|
-#include <linux/kexec.h>
|
|
-#include <linux/unwind.h>
|
|
-#include <linux/uaccess.h>
|
|
#include <linux/bug.h>
|
|
-#include <linux/kdebug.h>
|
|
-#include <linux/utsname.h>
|
|
-
|
|
-#include <mach_traps.h>
|
|
+#include <linux/nmi.h>
|
|
+#include <linux/mm.h>
|
|
|
|
#if defined(CONFIG_EDAC)
|
|
#include <linux/edac.h>
|
|
#endif
|
|
|
|
-#include <asm/system.h>
|
|
-#include <asm/io.h>
|
|
-#include <asm/atomic.h>
|
|
+#include <asm/stacktrace.h>
|
|
+#include <asm/processor.h>
|
|
#include <asm/debugreg.h>
|
|
+#include <asm/atomic.h>
|
|
+#include <asm/system.h>
|
|
+#include <asm/unwind.h>
|
|
#include <asm/desc.h>
|
|
#include <asm/i387.h>
|
|
-#include <asm/processor.h>
|
|
-#include <asm/unwind.h>
|
|
+#include <asm/nmi.h>
|
|
#include <asm/smp.h>
|
|
+#include <asm/io.h>
|
|
#include <asm/pgalloc.h>
|
|
-#include <asm/pda.h>
|
|
#include <asm/proto.h>
|
|
-#include <asm/nmi.h>
|
|
-#include <asm/stacktrace.h>
|
|
+#include <asm/pda.h>
|
|
+#include <asm/traps.h>
|
|
|
|
-asmlinkage void divide_error(void);
|
|
-asmlinkage void debug(void);
|
|
-asmlinkage void nmi(void);
|
|
-asmlinkage void int3(void);
|
|
-asmlinkage void overflow(void);
|
|
-asmlinkage void bounds(void);
|
|
-asmlinkage void invalid_op(void);
|
|
-asmlinkage void device_not_available(void);
|
|
-asmlinkage void double_fault(void);
|
|
-asmlinkage void coprocessor_segment_overrun(void);
|
|
-asmlinkage void invalid_TSS(void);
|
|
-asmlinkage void segment_not_present(void);
|
|
-asmlinkage void stack_segment(void);
|
|
-asmlinkage void general_protection(void);
|
|
-asmlinkage void page_fault(void);
|
|
-asmlinkage void coprocessor_error(void);
|
|
-asmlinkage void simd_coprocessor_error(void);
|
|
-asmlinkage void reserved(void);
|
|
-asmlinkage void alignment_check(void);
|
|
-asmlinkage void machine_check(void);
|
|
-asmlinkage void spurious_interrupt_bug(void);
|
|
+#include <mach_traps.h>
|
|
|
|
+int panic_on_unrecovered_nmi;
|
|
+int kstack_depth_to_print = 12;
|
|
static unsigned int code_bytes = 64;
|
|
+static int ignore_nmis;
|
|
+static int die_counter;
|
|
|
|
static inline void conditional_sti(struct pt_regs *regs)
|
|
{
|
|
@@ -100,34 +83,9 @@ static inline void preempt_conditional_c
|
|
dec_preempt_count();
|
|
}
|
|
|
|
-int kstack_depth_to_print = 12;
|
|
-
|
|
void printk_address(unsigned long address, int reliable)
|
|
{
|
|
-#ifdef CONFIG_KALLSYMS
|
|
- unsigned long offset = 0, symsize;
|
|
- const char *symname;
|
|
- char *modname;
|
|
- char *delim = ":";
|
|
- char namebuf[KSYM_NAME_LEN];
|
|
- char reliab[4] = "";
|
|
-
|
|
- symname = kallsyms_lookup(address, &symsize, &offset,
|
|
- &modname, namebuf);
|
|
- if (!symname) {
|
|
- printk(" [<%016lx>]\n", address);
|
|
- return;
|
|
- }
|
|
- if (!reliable)
|
|
- strcpy(reliab, "? ");
|
|
-
|
|
- if (!modname)
|
|
- modname = delim = "";
|
|
- printk(" [<%016lx>] %s%s%s%s%s+0x%lx/0x%lx\n",
|
|
- address, reliab, delim, modname, delim, symname, offset, symsize);
|
|
-#else
|
|
- printk(" [<%016lx>]\n", address);
|
|
-#endif
|
|
+ printk(" [<%016lx>] %s%pS\n", address, reliable ? "": "? ", (void *) address);
|
|
}
|
|
|
|
static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
|
|
@@ -206,8 +164,6 @@ static unsigned long *in_exception_stack
|
|
return NULL;
|
|
}
|
|
|
|
-#define MSG(txt) ops->warning(data, txt)
|
|
-
|
|
/*
|
|
* x86-64 can have up to three kernel stacks:
|
|
* process stack
|
|
@@ -234,11 +190,11 @@ struct stack_frame {
|
|
unsigned long return_address;
|
|
};
|
|
|
|
-
|
|
-static inline unsigned long print_context_stack(struct thread_info *tinfo,
|
|
- unsigned long *stack, unsigned long bp,
|
|
- const struct stacktrace_ops *ops, void *data,
|
|
- unsigned long *end)
|
|
+static inline unsigned long
|
|
+print_context_stack(struct thread_info *tinfo,
|
|
+ unsigned long *stack, unsigned long bp,
|
|
+ const struct stacktrace_ops *ops, void *data,
|
|
+ unsigned long *end)
|
|
{
|
|
struct stack_frame *frame = (struct stack_frame *)bp;
|
|
|
|
@@ -260,7 +216,7 @@ static inline unsigned long print_contex
|
|
return bp;
|
|
}
|
|
|
|
-void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
|
|
+void dump_trace(struct task_struct *task, struct pt_regs *regs,
|
|
unsigned long *stack, unsigned long bp,
|
|
const struct stacktrace_ops *ops, void *data)
|
|
{
|
|
@@ -269,36 +225,34 @@ void dump_trace(struct task_struct *tsk,
|
|
unsigned used = 0;
|
|
struct thread_info *tinfo;
|
|
|
|
- if (!tsk)
|
|
- tsk = current;
|
|
- tinfo = task_thread_info(tsk);
|
|
+ if (!task)
|
|
+ task = current;
|
|
|
|
if (!stack) {
|
|
unsigned long dummy;
|
|
stack = &dummy;
|
|
- if (tsk && tsk != current)
|
|
- stack = (unsigned long *)tsk->thread.sp;
|
|
+ if (task && task != current)
|
|
+ stack = (unsigned long *)task->thread.sp;
|
|
}
|
|
|
|
#ifdef CONFIG_FRAME_POINTER
|
|
if (!bp) {
|
|
- if (tsk == current) {
|
|
+ if (task == current) {
|
|
/* Grab bp right from our regs */
|
|
- asm("movq %%rbp, %0" : "=r" (bp):);
|
|
+ asm("movq %%rbp, %0" : "=r" (bp) :);
|
|
} else {
|
|
/* bp is the last reg pushed by switch_to */
|
|
- bp = *(unsigned long *) tsk->thread.sp;
|
|
+ bp = *(unsigned long *) task->thread.sp;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
-
|
|
-
|
|
/*
|
|
* Print function call entries in all stacks, starting at the
|
|
* current stack address. If the stacks consist of nested
|
|
* exceptions
|
|
*/
|
|
+ tinfo = task_thread_info(task);
|
|
for (;;) {
|
|
char *id;
|
|
unsigned long *estack_end;
|
|
@@ -383,18 +337,24 @@ static const struct stacktrace_ops print
|
|
.address = print_trace_address,
|
|
};
|
|
|
|
-void
|
|
-show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack,
|
|
- unsigned long bp)
|
|
+static void
|
|
+show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
|
|
+ unsigned long *stack, unsigned long bp, char *log_lvl)
|
|
{
|
|
printk("\nCall Trace:\n");
|
|
- dump_trace(tsk, regs, stack, bp, &print_trace_ops, NULL);
|
|
+ dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl);
|
|
printk("\n");
|
|
}
|
|
|
|
+void show_trace(struct task_struct *task, struct pt_regs *regs,
|
|
+ unsigned long *stack, unsigned long bp)
|
|
+{
|
|
+ show_trace_log_lvl(task, regs, stack, bp, "");
|
|
+}
|
|
+
|
|
static void
|
|
-_show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *sp,
|
|
- unsigned long bp)
|
|
+show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
|
|
+ unsigned long *sp, unsigned long bp, char *log_lvl)
|
|
{
|
|
unsigned long *stack;
|
|
int i;
|
|
@@ -406,14 +366,14 @@ _show_stack(struct task_struct *tsk, str
|
|
// back trace for this cpu.
|
|
|
|
if (sp == NULL) {
|
|
- if (tsk)
|
|
- sp = (unsigned long *)tsk->thread.sp;
|
|
+ if (task)
|
|
+ sp = (unsigned long *)task->thread.sp;
|
|
else
|
|
sp = (unsigned long *)&sp;
|
|
}
|
|
|
|
stack = sp;
|
|
- for(i=0; i < kstack_depth_to_print; i++) {
|
|
+ for (i = 0; i < kstack_depth_to_print; i++) {
|
|
if (stack >= irqstack && stack <= irqstack_end) {
|
|
if (stack == irqstack_end) {
|
|
stack = (unsigned long *) (irqstack_end[-1]);
|
|
@@ -428,12 +388,12 @@ _show_stack(struct task_struct *tsk, str
|
|
printk(" %016lx", *stack++);
|
|
touch_nmi_watchdog();
|
|
}
|
|
- show_trace(tsk, regs, sp, bp);
|
|
+ show_trace_log_lvl(task, regs, sp, bp, log_lvl);
|
|
}
|
|
|
|
-void show_stack(struct task_struct *tsk, unsigned long * sp)
|
|
+void show_stack(struct task_struct *task, unsigned long *sp)
|
|
{
|
|
- _show_stack(tsk, NULL, sp, 0);
|
|
+ show_stack_log_lvl(task, NULL, sp, 0, "");
|
|
}
|
|
|
|
/*
|
|
@@ -441,8 +401,8 @@ void show_stack(struct task_struct *tsk,
|
|
*/
|
|
void dump_stack(void)
|
|
{
|
|
- unsigned long dummy;
|
|
unsigned long bp = 0;
|
|
+ unsigned long stack;
|
|
|
|
#ifdef CONFIG_FRAME_POINTER
|
|
if (!bp)
|
|
@@ -454,7 +414,7 @@ void dump_stack(void)
|
|
init_utsname()->release,
|
|
(int)strcspn(init_utsname()->version, " "),
|
|
init_utsname()->version);
|
|
- show_trace(NULL, NULL, &dummy, bp);
|
|
+ show_trace(NULL, NULL, &stack, bp);
|
|
}
|
|
|
|
EXPORT_SYMBOL(dump_stack);
|
|
@@ -465,12 +425,8 @@ void show_registers(struct pt_regs *regs
|
|
unsigned long sp;
|
|
const int cpu = smp_processor_id();
|
|
struct task_struct *cur = cpu_pda(cpu)->pcurrent;
|
|
- u8 *ip;
|
|
- unsigned int code_prologue = code_bytes * 43 / 64;
|
|
- unsigned int code_len = code_bytes;
|
|
|
|
sp = regs->sp;
|
|
- ip = (u8 *) regs->ip - code_prologue;
|
|
printk("CPU %d ", cpu);
|
|
__show_regs(regs);
|
|
printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
|
|
@@ -481,15 +437,22 @@ void show_registers(struct pt_regs *regs
|
|
* time of the fault..
|
|
*/
|
|
if (!user_mode(regs)) {
|
|
+ unsigned int code_prologue = code_bytes * 43 / 64;
|
|
+ unsigned int code_len = code_bytes;
|
|
unsigned char c;
|
|
+ u8 *ip;
|
|
+
|
|
printk("Stack: ");
|
|
- _show_stack(NULL, regs, (unsigned long *)sp, regs->bp);
|
|
+ show_stack_log_lvl(NULL, regs, (unsigned long *)sp,
|
|
+ regs->bp, "");
|
|
printk("\n");
|
|
|
|
printk(KERN_EMERG "Code: ");
|
|
+
|
|
+ ip = (u8 *)regs->ip - code_prologue;
|
|
if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
|
|
/* try starting at RIP */
|
|
- ip = (u8 *) regs->ip;
|
|
+ ip = (u8 *)regs->ip;
|
|
code_len = code_len - code_prologue + 1;
|
|
}
|
|
for (i = 0; i < code_len; i++, ip++) {
|
|
@@ -505,7 +468,7 @@ void show_registers(struct pt_regs *regs
|
|
}
|
|
}
|
|
printk("\n");
|
|
-}
|
|
+}
|
|
|
|
int is_valid_bugaddr(unsigned long ip)
|
|
{
|
|
@@ -545,7 +508,7 @@ unsigned __kprobes long oops_begin(void)
|
|
}
|
|
|
|
void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
|
|
-{
|
|
+{
|
|
die_owner = -1;
|
|
bust_spinlocks(0);
|
|
die_nest_count--;
|
|
@@ -563,10 +526,9 @@ void __kprobes oops_end(unsigned long fl
|
|
do_exit(signr);
|
|
}
|
|
|
|
-int __kprobes __die(const char * str, struct pt_regs * regs, long err)
|
|
+int __kprobes __die(const char *str, struct pt_regs *regs, long err)
|
|
{
|
|
- static int die_counter;
|
|
- printk(KERN_EMERG "%s: %04lx [%u] ", str, err & 0xffff,++die_counter);
|
|
+ printk(KERN_EMERG "%s: %04lx [%u] ", str, err & 0xffff, ++die_counter);
|
|
#ifdef CONFIG_PREEMPT
|
|
printk("PREEMPT ");
|
|
#endif
|
|
@@ -577,8 +539,10 @@ int __kprobes __die(const char * str, st
|
|
printk("DEBUG_PAGEALLOC");
|
|
#endif
|
|
printk("\n");
|
|
- if (notify_die(DIE_OOPS, str, regs, err, current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
|
|
+ if (notify_die(DIE_OOPS, str, regs, err,
|
|
+ current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
|
|
return 1;
|
|
+
|
|
show_registers(regs);
|
|
add_taint(TAINT_DIE);
|
|
/* Executive summary in case the oops scrolled away */
|
|
@@ -590,7 +554,7 @@ int __kprobes __die(const char * str, st
|
|
return 0;
|
|
}
|
|
|
|
-void die(const char * str, struct pt_regs * regs, long err)
|
|
+void die(const char *str, struct pt_regs *regs, long err)
|
|
{
|
|
unsigned long flags = oops_begin();
|
|
|
|
@@ -608,8 +572,7 @@ die_nmi(char *str, struct pt_regs *regs,
|
|
{
|
|
unsigned long flags;
|
|
|
|
- if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) ==
|
|
- NOTIFY_STOP)
|
|
+ if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
|
|
return;
|
|
|
|
flags = oops_begin();
|
|
@@ -617,7 +580,9 @@ die_nmi(char *str, struct pt_regs *regs,
|
|
* We are in trouble anyway, lets at least try
|
|
* to get a message out.
|
|
*/
|
|
- printk(str, smp_processor_id());
|
|
+ printk(KERN_EMERG "%s", str);
|
|
+ printk(" on CPU%d, ip %08lx, registers:\n",
|
|
+ smp_processor_id(), regs->ip);
|
|
show_registers(regs);
|
|
if (kexec_should_crash(current))
|
|
crash_kexec(regs);
|
|
@@ -630,44 +595,44 @@ die_nmi(char *str, struct pt_regs *regs,
|
|
}
|
|
#endif
|
|
|
|
-static void __kprobes do_trap(int trapnr, int signr, char *str,
|
|
- struct pt_regs * regs, long error_code,
|
|
- siginfo_t *info)
|
|
+static void __kprobes
|
|
+do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
|
|
+ long error_code, siginfo_t *info)
|
|
{
|
|
struct task_struct *tsk = current;
|
|
|
|
- if (user_mode(regs)) {
|
|
- /*
|
|
- * We want error_code and trap_no set for userspace
|
|
- * faults and kernelspace faults which result in
|
|
- * die(), but not kernelspace faults which are fixed
|
|
- * up. die() gives the process no chance to handle
|
|
- * the signal and notice the kernel fault information,
|
|
- * so that won't result in polluting the information
|
|
- * about previously queued, but not yet delivered,
|
|
- * faults. See also do_general_protection below.
|
|
- */
|
|
- tsk->thread.error_code = error_code;
|
|
- tsk->thread.trap_no = trapnr;
|
|
+ if (!user_mode(regs))
|
|
+ goto kernel_trap;
|
|
|
|
- if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
|
|
- printk_ratelimit()) {
|
|
- printk(KERN_INFO
|
|
- "%s[%d] trap %s ip:%lx sp:%lx error:%lx",
|
|
- tsk->comm, tsk->pid, str,
|
|
- regs->ip, regs->sp, error_code);
|
|
- print_vma_addr(" in ", regs->ip);
|
|
- printk("\n");
|
|
- }
|
|
+ /*
|
|
+ * We want error_code and trap_no set for userspace faults and
|
|
+ * kernelspace faults which result in die(), but not
|
|
+ * kernelspace faults which are fixed up. die() gives the
|
|
+ * process no chance to handle the signal and notice the
|
|
+ * kernel fault information, so that won't result in polluting
|
|
+ * the information about previously queued, but not yet
|
|
+ * delivered, faults. See also do_general_protection below.
|
|
+ */
|
|
+ tsk->thread.error_code = error_code;
|
|
+ tsk->thread.trap_no = trapnr;
|
|
|
|
- if (info)
|
|
- force_sig_info(signr, info, tsk);
|
|
- else
|
|
- force_sig(signr, tsk);
|
|
- return;
|
|
+ if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
|
|
+ printk_ratelimit()) {
|
|
+ printk(KERN_INFO
|
|
+ "%s[%d] trap %s ip:%lx sp:%lx error:%lx",
|
|
+ tsk->comm, tsk->pid, str,
|
|
+ regs->ip, regs->sp, error_code);
|
|
+ print_vma_addr(" in ", regs->ip);
|
|
+ printk("\n");
|
|
}
|
|
|
|
+ if (info)
|
|
+ force_sig_info(signr, info, tsk);
|
|
+ else
|
|
+ force_sig(signr, tsk);
|
|
+ return;
|
|
|
|
+kernel_trap:
|
|
if (!fixup_exception(regs)) {
|
|
tsk->thread.error_code = error_code;
|
|
tsk->thread.trap_no = trapnr;
|
|
@@ -677,41 +642,39 @@ static void __kprobes do_trap(int trapnr
|
|
}
|
|
|
|
#define DO_ERROR(trapnr, signr, str, name) \
|
|
-asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
|
|
-{ \
|
|
- if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
|
|
- == NOTIFY_STOP) \
|
|
- return; \
|
|
+asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
|
|
+{ \
|
|
+ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
|
|
+ == NOTIFY_STOP) \
|
|
+ return; \
|
|
conditional_sti(regs); \
|
|
- do_trap(trapnr, signr, str, regs, error_code, NULL); \
|
|
+ do_trap(trapnr, signr, str, regs, error_code, NULL); \
|
|
}
|
|
|
|
-#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
|
|
-asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
|
|
-{ \
|
|
- siginfo_t info; \
|
|
- info.si_signo = signr; \
|
|
- info.si_errno = 0; \
|
|
- info.si_code = sicode; \
|
|
- info.si_addr = (void __user *)siaddr; \
|
|
- trace_hardirqs_fixup(); \
|
|
- if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
|
|
- == NOTIFY_STOP) \
|
|
- return; \
|
|
+#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
|
|
+asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
|
|
+{ \
|
|
+ siginfo_t info; \
|
|
+ info.si_signo = signr; \
|
|
+ info.si_errno = 0; \
|
|
+ info.si_code = sicode; \
|
|
+ info.si_addr = (void __user *)siaddr; \
|
|
+ trace_hardirqs_fixup(); \
|
|
+ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
|
|
+ == NOTIFY_STOP) \
|
|
+ return; \
|
|
conditional_sti(regs); \
|
|
- do_trap(trapnr, signr, str, regs, error_code, &info); \
|
|
+ do_trap(trapnr, signr, str, regs, error_code, &info); \
|
|
}
|
|
|
|
-DO_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip)
|
|
-DO_ERROR( 4, SIGSEGV, "overflow", overflow)
|
|
-DO_ERROR( 5, SIGSEGV, "bounds", bounds)
|
|
-DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip)
|
|
-DO_ERROR( 7, SIGSEGV, "device not available", device_not_available)
|
|
-DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
|
|
+DO_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip)
|
|
+DO_ERROR(4, SIGSEGV, "overflow", overflow)
|
|
+DO_ERROR(5, SIGSEGV, "bounds", bounds)
|
|
+DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip)
|
|
+DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
|
|
DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
|
|
-DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
|
|
+DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
|
|
DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
|
|
-DO_ERROR(18, SIGSEGV, "reserved", reserved)
|
|
|
|
/* Runs on IST stack */
|
|
asmlinkage void do_stack_segment(struct pt_regs *regs, long error_code)
|
|
@@ -741,31 +704,34 @@ asmlinkage void do_double_fault(struct p
|
|
die(str, regs, error_code);
|
|
}
|
|
|
|
-asmlinkage void __kprobes do_general_protection(struct pt_regs * regs,
|
|
- long error_code)
|
|
+asmlinkage void __kprobes
|
|
+do_general_protection(struct pt_regs *regs, long error_code)
|
|
{
|
|
- struct task_struct *tsk = current;
|
|
+ struct task_struct *tsk;
|
|
|
|
conditional_sti(regs);
|
|
|
|
- if (user_mode(regs)) {
|
|
- tsk->thread.error_code = error_code;
|
|
- tsk->thread.trap_no = 13;
|
|
+ tsk = current;
|
|
+ if (!user_mode(regs))
|
|
+ goto gp_in_kernel;
|
|
|
|
- if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
|
|
- printk_ratelimit()) {
|
|
- printk(KERN_INFO
|
|
- "%s[%d] general protection ip:%lx sp:%lx error:%lx",
|
|
- tsk->comm, tsk->pid,
|
|
- regs->ip, regs->sp, error_code);
|
|
- print_vma_addr(" in ", regs->ip);
|
|
- printk("\n");
|
|
- }
|
|
+ tsk->thread.error_code = error_code;
|
|
+ tsk->thread.trap_no = 13;
|
|
|
|
- force_sig(SIGSEGV, tsk);
|
|
- return;
|
|
- }
|
|
+ if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
|
|
+ printk_ratelimit()) {
|
|
+ printk(KERN_INFO
|
|
+ "%s[%d] general protection ip:%lx sp:%lx error:%lx",
|
|
+ tsk->comm, tsk->pid,
|
|
+ regs->ip, regs->sp, error_code);
|
|
+ print_vma_addr(" in ", regs->ip);
|
|
+ printk("\n");
|
|
+ }
|
|
|
|
+ force_sig(SIGSEGV, tsk);
|
|
+ return;
|
|
+
|
|
+gp_in_kernel:
|
|
if (fixup_exception(regs))
|
|
return;
|
|
|
|
@@ -778,14 +744,14 @@ asmlinkage void __kprobes do_general_pro
|
|
}
|
|
|
|
static notrace __kprobes void
|
|
-mem_parity_error(unsigned char reason, struct pt_regs * regs)
|
|
+mem_parity_error(unsigned char reason, struct pt_regs *regs)
|
|
{
|
|
printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
|
|
reason);
|
|
printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n");
|
|
|
|
#if defined(CONFIG_EDAC)
|
|
- if(edac_handler_set()) {
|
|
+ if (edac_handler_set()) {
|
|
edac_atomic_assert_error();
|
|
return;
|
|
}
|
|
@@ -801,7 +767,7 @@ mem_parity_error(unsigned char reason, s
|
|
}
|
|
|
|
static notrace __kprobes void
|
|
-io_check_error(unsigned char reason, struct pt_regs * regs)
|
|
+io_check_error(unsigned char reason, struct pt_regs *regs)
|
|
{
|
|
printk("NMI: IOCK error (debug interrupt?)\n");
|
|
show_registers(regs);
|
|
@@ -827,14 +793,14 @@ unknown_nmi_error(unsigned char reason,
|
|
|
|
/* Runs on IST stack. This code must keep interrupts off all the time.
|
|
Nested NMIs are prevented by the CPU. */
|
|
-asmlinkage notrace __kprobes void default_do_nmi(struct pt_regs *regs)
|
|
+asmlinkage notrace __kprobes void default_do_nmi(struct pt_regs *regs)
|
|
{
|
|
unsigned char reason = 0;
|
|
int cpu;
|
|
|
|
cpu = smp_processor_id();
|
|
|
|
- /* Only the BSP gets external NMIs from the system. */
|
|
+ /* Only the BSP gets external NMIs from the system. */
|
|
if (!cpu)
|
|
reason = get_nmi_reason();
|
|
|
|
@@ -847,33 +813,58 @@ asmlinkage notrace __kprobes void defau
|
|
* Ok, so this is none of the documented NMI sources,
|
|
* so it must be the NMI watchdog.
|
|
*/
|
|
- if (nmi_watchdog_tick(regs,reason))
|
|
+ if (nmi_watchdog_tick(regs, reason))
|
|
return;
|
|
#endif
|
|
- if (!do_nmi_callback(regs,cpu))
|
|
+ if (!do_nmi_callback(regs, cpu))
|
|
unknown_nmi_error(reason, regs);
|
|
|
|
return;
|
|
}
|
|
if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
|
|
- return;
|
|
+ return;
|
|
|
|
/* AK: following checks seem to be broken on modern chipsets. FIXME */
|
|
-
|
|
if (reason & 0x80)
|
|
mem_parity_error(reason, regs);
|
|
if (reason & 0x40)
|
|
io_check_error(reason, regs);
|
|
}
|
|
|
|
+asmlinkage notrace __kprobes void
|
|
+do_nmi(struct pt_regs *regs, long error_code)
|
|
+{
|
|
+ nmi_enter();
|
|
+
|
|
+ add_pda(__nmi_count, 1);
|
|
+
|
|
+ if (!ignore_nmis)
|
|
+ default_do_nmi(regs);
|
|
+
|
|
+ nmi_exit();
|
|
+}
|
|
+
|
|
+void stop_nmi(void)
|
|
+{
|
|
+ acpi_nmi_disable();
|
|
+ ignore_nmis++;
|
|
+}
|
|
+
|
|
+void restart_nmi(void)
|
|
+{
|
|
+ ignore_nmis--;
|
|
+ acpi_nmi_enable();
|
|
+}
|
|
+
|
|
/* runs on IST stack. */
|
|
asmlinkage void __kprobes do_int3(struct pt_regs * regs, long error_code)
|
|
{
|
|
trace_hardirqs_fixup();
|
|
|
|
- if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) {
|
|
+ if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
|
|
+ == NOTIFY_STOP)
|
|
return;
|
|
- }
|
|
+
|
|
preempt_conditional_sti(regs);
|
|
do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
|
|
preempt_conditional_cli(regs);
|
|
@@ -904,8 +895,8 @@ asmlinkage __kprobes struct pt_regs *syn
|
|
asmlinkage void __kprobes do_debug(struct pt_regs * regs,
|
|
unsigned long error_code)
|
|
{
|
|
- unsigned long condition;
|
|
struct task_struct *tsk = current;
|
|
+ unsigned long condition;
|
|
siginfo_t info;
|
|
|
|
trace_hardirqs_fixup();
|
|
@@ -926,21 +917,19 @@ asmlinkage void __kprobes do_debug(struc
|
|
|
|
/* Mask out spurious debug traps due to lazy DR7 setting */
|
|
if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
|
|
- if (!tsk->thread.debugreg7) {
|
|
+ if (!tsk->thread.debugreg7)
|
|
goto clear_dr7;
|
|
- }
|
|
}
|
|
|
|
tsk->thread.debugreg6 = condition;
|
|
|
|
-
|
|
/*
|
|
* Single-stepping through TF: make sure we ignore any events in
|
|
* kernel space (but re-enable TF when returning to user mode).
|
|
*/
|
|
if (condition & DR_STEP) {
|
|
- if (!user_mode(regs))
|
|
- goto clear_TF_reenable;
|
|
+ if (!user_mode(regs))
|
|
+ goto clear_TF_reenable;
|
|
}
|
|
|
|
/* Ok, finally something we can handle */
|
|
@@ -953,7 +942,7 @@ asmlinkage void __kprobes do_debug(struc
|
|
force_sig_info(SIGTRAP, &info, tsk);
|
|
|
|
clear_dr7:
|
|
- set_debugreg(0UL, 7);
|
|
+ set_debugreg(0, 7);
|
|
preempt_conditional_cli(regs);
|
|
return;
|
|
|
|
@@ -961,6 +950,7 @@ clear_TF_reenable:
|
|
set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
|
|
regs->flags &= ~X86_EFLAGS_TF;
|
|
preempt_conditional_cli(regs);
|
|
+ return;
|
|
}
|
|
|
|
static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr)
|
|
@@ -983,7 +973,7 @@ static int kernel_math_error(struct pt_r
|
|
asmlinkage void do_coprocessor_error(struct pt_regs *regs)
|
|
{
|
|
void __user *ip = (void __user *)(regs->ip);
|
|
- struct task_struct * task;
|
|
+ struct task_struct *task;
|
|
siginfo_t info;
|
|
unsigned short cwd, swd;
|
|
|
|
@@ -1016,30 +1006,30 @@ asmlinkage void do_coprocessor_error(str
|
|
cwd = get_fpu_cwd(task);
|
|
swd = get_fpu_swd(task);
|
|
switch (swd & ~cwd & 0x3f) {
|
|
- case 0x000:
|
|
- default:
|
|
- break;
|
|
- case 0x001: /* Invalid Op */
|
|
- /*
|
|
- * swd & 0x240 == 0x040: Stack Underflow
|
|
- * swd & 0x240 == 0x240: Stack Overflow
|
|
- * User must clear the SF bit (0x40) if set
|
|
- */
|
|
- info.si_code = FPE_FLTINV;
|
|
- break;
|
|
- case 0x002: /* Denormalize */
|
|
- case 0x010: /* Underflow */
|
|
- info.si_code = FPE_FLTUND;
|
|
- break;
|
|
- case 0x004: /* Zero Divide */
|
|
- info.si_code = FPE_FLTDIV;
|
|
- break;
|
|
- case 0x008: /* Overflow */
|
|
- info.si_code = FPE_FLTOVF;
|
|
- break;
|
|
- case 0x020: /* Precision */
|
|
- info.si_code = FPE_FLTRES;
|
|
- break;
|
|
+ case 0x000: /* No unmasked exception */
|
|
+ default: /* Multiple exceptions */
|
|
+ break;
|
|
+ case 0x001: /* Invalid Op */
|
|
+ /*
|
|
+ * swd & 0x240 == 0x040: Stack Underflow
|
|
+ * swd & 0x240 == 0x240: Stack Overflow
|
|
+ * User must clear the SF bit (0x40) if set
|
|
+ */
|
|
+ info.si_code = FPE_FLTINV;
|
|
+ break;
|
|
+ case 0x002: /* Denormalize */
|
|
+ case 0x010: /* Underflow */
|
|
+ info.si_code = FPE_FLTUND;
|
|
+ break;
|
|
+ case 0x004: /* Zero Divide */
|
|
+ info.si_code = FPE_FLTDIV;
|
|
+ break;
|
|
+ case 0x008: /* Overflow */
|
|
+ info.si_code = FPE_FLTOVF;
|
|
+ break;
|
|
+ case 0x020: /* Precision */
|
|
+ info.si_code = FPE_FLTRES;
|
|
+ break;
|
|
}
|
|
force_sig_info(SIGFPE, &info, task);
|
|
}
|
|
@@ -1052,7 +1042,7 @@ asmlinkage void bad_intr(void)
|
|
asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs)
|
|
{
|
|
void __user *ip = (void __user *)(regs->ip);
|
|
- struct task_struct * task;
|
|
+ struct task_struct *task;
|
|
siginfo_t info;
|
|
unsigned short mxcsr;
|
|
|
|
@@ -1080,25 +1070,25 @@ asmlinkage void do_simd_coprocessor_erro
|
|
*/
|
|
mxcsr = get_fpu_mxcsr(task);
|
|
switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) {
|
|
- case 0x000:
|
|
- default:
|
|
- break;
|
|
- case 0x001: /* Invalid Op */
|
|
- info.si_code = FPE_FLTINV;
|
|
- break;
|
|
- case 0x002: /* Denormalize */
|
|
- case 0x010: /* Underflow */
|
|
- info.si_code = FPE_FLTUND;
|
|
- break;
|
|
- case 0x004: /* Zero Divide */
|
|
- info.si_code = FPE_FLTDIV;
|
|
- break;
|
|
- case 0x008: /* Overflow */
|
|
- info.si_code = FPE_FLTOVF;
|
|
- break;
|
|
- case 0x020: /* Precision */
|
|
- info.si_code = FPE_FLTRES;
|
|
- break;
|
|
+ case 0x000:
|
|
+ default:
|
|
+ break;
|
|
+ case 0x001: /* Invalid Op */
|
|
+ info.si_code = FPE_FLTINV;
|
|
+ break;
|
|
+ case 0x002: /* Denormalize */
|
|
+ case 0x010: /* Underflow */
|
|
+ info.si_code = FPE_FLTUND;
|
|
+ break;
|
|
+ case 0x004: /* Zero Divide */
|
|
+ info.si_code = FPE_FLTDIV;
|
|
+ break;
|
|
+ case 0x008: /* Overflow */
|
|
+ info.si_code = FPE_FLTOVF;
|
|
+ break;
|
|
+ case 0x020: /* Precision */
|
|
+ info.si_code = FPE_FLTRES;
|
|
+ break;
|
|
}
|
|
force_sig_info(SIGFPE, &info, task);
|
|
}
|
|
@@ -1118,7 +1108,7 @@ asmlinkage void __attribute__((weak)) mc
|
|
}
|
|
|
|
/*
|
|
- * 'math_state_restore()' saves the current math information in the
|
|
+ * 'math_state_restore()' saves the current math information in the
|
|
* old math state array, and gets the new ones from the current task
|
|
*
|
|
* Careful.. There are problems with IBM-designed IRQ13 behaviour.
|
|
@@ -1145,7 +1135,14 @@ asmlinkage void math_state_restore(void)
|
|
|
|
/* clts(); */ /* 'clts' is done for us by Xen during virtual trap. */
|
|
|
|
- restore_fpu_checking(&me->thread.xstate->fxsave);
|
|
+ /*
|
|
+ * Paranoid restore. send a SIGSEGV if we fail to restore the state.
|
|
+ */
|
|
+ if (unlikely(restore_fpu_checking(&me->thread.xstate->fxsave))) {
|
|
+ stts();
|
|
+ force_sig(SIGSEGV, me);
|
|
+ return;
|
|
+ }
|
|
task_thread_info(me)->status |= TS_USEDFPU;
|
|
me->fpu_counter++;
|
|
}
|
|
@@ -1190,13 +1187,12 @@ void __init trap_init(void)
|
|
ret = HYPERVISOR_set_trap_table(trap_table);
|
|
if (ret)
|
|
printk("HYPERVISOR_set_trap_table failed: error %d\n", ret);
|
|
-
|
|
/*
|
|
* initialize the per thread extended state:
|
|
*/
|
|
- init_thread_xstate();
|
|
+ init_thread_xstate();
|
|
/*
|
|
- * Should be a barrier for any external CPU state.
|
|
+ * Should be a barrier for any external CPU state:
|
|
*/
|
|
cpu_init();
|
|
}
|
|
@@ -1212,27 +1208,25 @@ void __cpuinit smp_trap_init(trap_info_t
|
|
}
|
|
}
|
|
|
|
-
|
|
static int __init oops_setup(char *s)
|
|
-{
|
|
+{
|
|
if (!s)
|
|
return -EINVAL;
|
|
if (!strcmp(s, "panic"))
|
|
panic_on_oops = 1;
|
|
return 0;
|
|
-}
|
|
+}
|
|
early_param("oops", oops_setup);
|
|
|
|
static int __init kstack_setup(char *s)
|
|
{
|
|
if (!s)
|
|
return -EINVAL;
|
|
- kstack_depth_to_print = simple_strtoul(s,NULL,0);
|
|
+ kstack_depth_to_print = simple_strtoul(s, NULL, 0);
|
|
return 0;
|
|
}
|
|
early_param("kstack", kstack_setup);
|
|
|
|
-
|
|
static int __init code_bytes_setup(char *s)
|
|
{
|
|
code_bytes = simple_strtoul(s, NULL, 0);
|
|
--- head-2011-03-11.orig/arch/x86/kernel/vsyscall_64-xen.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/vsyscall_64-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -42,7 +42,8 @@
|
|
#include <asm/topology.h>
|
|
#include <asm/vgtod.h>
|
|
|
|
-#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
|
|
+#define __vsyscall(nr) \
|
|
+ __attribute__ ((unused, __section__(".vsyscall_" #nr))) notrace
|
|
#define __syscall_clobber "r11","cx","memory"
|
|
|
|
/*
|
|
@@ -264,10 +265,7 @@ static void __cpuinit vsyscall_set_cpu(i
|
|
d |= cpu;
|
|
d |= (node & 0xf) << 12;
|
|
d |= (node >> 4) << 48;
|
|
- if (HYPERVISOR_update_descriptor(virt_to_machine(get_cpu_gdt_table(cpu)
|
|
- + GDT_ENTRY_PER_CPU),
|
|
- d))
|
|
- BUG();
|
|
+ write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
|
|
}
|
|
|
|
static void __cpuinit cpu_vsyscall_init(void *arg)
|
|
@@ -281,7 +279,7 @@ cpu_vsyscall_notifier(struct notifier_bl
|
|
{
|
|
long cpu = (long)arg;
|
|
if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
|
|
- smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1);
|
|
+ smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 1);
|
|
return NOTIFY_DONE;
|
|
}
|
|
|
|
@@ -311,7 +309,7 @@ static int __init vsyscall_init(void)
|
|
#ifdef CONFIG_SYSCTL
|
|
register_sysctl_table(kernel_root_table2);
|
|
#endif
|
|
- on_each_cpu(cpu_vsyscall_init, NULL, 0, 1);
|
|
+ on_each_cpu(cpu_vsyscall_init, NULL, 1);
|
|
hotcpu_notifier(cpu_vsyscall_notifier, 0);
|
|
return 0;
|
|
}
|
|
--- head-2011-03-11.orig/arch/x86/mach-xen/setup.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/mach-xen/setup.c 2011-02-03 14:23:14.000000000 +0100
|
|
@@ -17,6 +17,8 @@
|
|
#include <xen/interface/callback.h>
|
|
#include <xen/interface/memory.h>
|
|
|
|
+#ifdef CONFIG_X86_32
|
|
+
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
|
#define DEFAULT_SEND_IPI (1)
|
|
#else
|
|
@@ -44,47 +46,6 @@ static int __init print_ipi_mode(void)
|
|
|
|
late_initcall(print_ipi_mode);
|
|
|
|
-/**
|
|
- * machine_specific_memory_setup - Hook for machine specific memory setup.
|
|
- *
|
|
- * Description:
|
|
- * This is included late in kernel/setup.c so that it can make
|
|
- * use of all of the static functions.
|
|
- **/
|
|
-
|
|
-char * __init machine_specific_memory_setup(void)
|
|
-{
|
|
- int rc;
|
|
- struct xen_memory_map memmap;
|
|
- static struct e820entry __initdata map[E820MAX];
|
|
-
|
|
- memmap.nr_entries = E820MAX;
|
|
- set_xen_guest_handle(memmap.buffer, map);
|
|
-
|
|
- rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap);
|
|
- if ( rc == -ENOSYS ) {
|
|
- memmap.nr_entries = 1;
|
|
- map[0].addr = 0ULL;
|
|
- map[0].size = PFN_PHYS((unsigned long long)xen_start_info->nr_pages);
|
|
- /* 8MB slack (to balance backend allocations). */
|
|
- map[0].size += 8ULL << 20;
|
|
- map[0].type = E820_RAM;
|
|
- rc = 0;
|
|
- }
|
|
- BUG_ON(rc);
|
|
-
|
|
- sanitize_e820_map(map, (char *)&memmap.nr_entries);
|
|
-
|
|
- BUG_ON(copy_e820_map(map, (char)memmap.nr_entries) < 0);
|
|
-
|
|
- return "Xen";
|
|
-}
|
|
-
|
|
-
|
|
-extern void hypervisor_callback(void);
|
|
-extern void failsafe_callback(void);
|
|
-extern void nmi(void);
|
|
-
|
|
unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START;
|
|
EXPORT_SYMBOL(machine_to_phys_mapping);
|
|
unsigned int machine_to_phys_order;
|
|
@@ -117,30 +78,60 @@ void __init pre_setup_arch_hook(void)
|
|
(unsigned long *)xen_start_info->mfn_list;
|
|
}
|
|
|
|
+#endif /* CONFIG_X86_32 */
|
|
+
|
|
+extern void hypervisor_callback(void);
|
|
+extern void failsafe_callback(void);
|
|
+extern void nmi(void);
|
|
+
|
|
+#ifdef CONFIG_X86_64
|
|
+#include <asm/proto.h>
|
|
+#define CALLBACK_ADDR(fn) ((unsigned long)(fn))
|
|
+#else
|
|
+#define CALLBACK_ADDR(fn) { __KERNEL_CS, (unsigned long)(fn) }
|
|
+#endif
|
|
+
|
|
void __init machine_specific_arch_setup(void)
|
|
{
|
|
int ret;
|
|
static struct callback_register __initdata event = {
|
|
.type = CALLBACKTYPE_event,
|
|
- .address = { __KERNEL_CS, (unsigned long)hypervisor_callback },
|
|
+ .address = CALLBACK_ADDR(hypervisor_callback)
|
|
};
|
|
static struct callback_register __initdata failsafe = {
|
|
.type = CALLBACKTYPE_failsafe,
|
|
- .address = { __KERNEL_CS, (unsigned long)failsafe_callback },
|
|
+ .address = CALLBACK_ADDR(failsafe_callback)
|
|
};
|
|
+#ifdef CONFIG_X86_64
|
|
+ static struct callback_register __initdata syscall = {
|
|
+ .type = CALLBACKTYPE_syscall,
|
|
+ .address = CALLBACK_ADDR(system_call)
|
|
+ };
|
|
+#endif
|
|
static struct callback_register __initdata nmi_cb = {
|
|
.type = CALLBACKTYPE_nmi,
|
|
- .address = { __KERNEL_CS, (unsigned long)nmi },
|
|
+ .address = CALLBACK_ADDR(nmi)
|
|
};
|
|
|
|
ret = HYPERVISOR_callback_op(CALLBACKOP_register, &event);
|
|
if (ret == 0)
|
|
ret = HYPERVISOR_callback_op(CALLBACKOP_register, &failsafe);
|
|
+#ifdef CONFIG_X86_64
|
|
+ if (ret == 0)
|
|
+ ret = HYPERVISOR_callback_op(CALLBACKOP_register, &syscall);
|
|
+#endif
|
|
#if CONFIG_XEN_COMPAT <= 0x030002
|
|
+#ifdef CONFIG_X86_32
|
|
if (ret == -ENOSYS)
|
|
ret = HYPERVISOR_set_callbacks(
|
|
event.address.cs, event.address.eip,
|
|
failsafe.address.cs, failsafe.address.eip);
|
|
+#else
|
|
+ ret = HYPERVISOR_set_callbacks(
|
|
+ event.address,
|
|
+ failsafe.address,
|
|
+ syscall.address);
|
|
+#endif
|
|
#endif
|
|
BUG_ON(ret);
|
|
|
|
@@ -155,14 +146,41 @@ void __init machine_specific_arch_setup(
|
|
}
|
|
#endif
|
|
|
|
+#ifdef CONFIG_X86_32
|
|
/* Do an early initialization of the fixmap area */
|
|
{
|
|
extern pte_t swapper_pg_fixmap[PTRS_PER_PTE];
|
|
unsigned long addr = __fix_to_virt(FIX_EARLYCON_MEM_BASE);
|
|
pud_t *pud = pud_offset(swapper_pg_dir + pgd_index(addr), addr);
|
|
pmd_t *pmd = pmd_offset(pud, addr);
|
|
+ unsigned int i;
|
|
|
|
make_lowmem_page_readonly(swapper_pg_fixmap, XENFEAT_writable_page_tables);
|
|
set_pmd(pmd, __pmd(__pa_symbol(swapper_pg_fixmap) | _PAGE_TABLE));
|
|
+
|
|
+#define __FIXADDR_TOP (-PAGE_SIZE)
|
|
+#define FIX_BUG_ON(fix) BUILD_BUG_ON(pmd_index(__fix_to_virt(FIX_##fix)) \
|
|
+ != pmd_index(__fix_to_virt(FIX_EARLYCON_MEM_BASE)))
|
|
+ FIX_BUG_ON(SHARED_INFO);
|
|
+ FIX_BUG_ON(ISAMAP_BEGIN);
|
|
+ FIX_BUG_ON(ISAMAP_END);
|
|
+#undef __FIXADDR_TOP
|
|
+ BUG_ON(pte_index(hypervisor_virt_start));
|
|
+
|
|
+ /* Switch to the real shared_info page, and clear the
|
|
+ * dummy page. */
|
|
+ set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
|
|
+ HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
|
|
+ memset(empty_zero_page, 0, sizeof(empty_zero_page));
|
|
+
|
|
+ /* Setup mapping of lower 1st MB */
|
|
+ for (i = 0; i < NR_FIX_ISAMAPS; i++)
|
|
+ if (is_initial_xendomain())
|
|
+ set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE);
|
|
+ else
|
|
+ __set_fixmap(FIX_ISAMAP_BEGIN - i,
|
|
+ virt_to_machine(empty_zero_page),
|
|
+ PAGE_KERNEL_RO);
|
|
}
|
|
+#endif
|
|
}
|
|
--- head-2011-03-11.orig/arch/x86/mm/Makefile 2011-01-31 14:53:50.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/mm/Makefile 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -27,6 +27,7 @@ obj-$(CONFIG_AMD_NUMA) += amdtopology_6
|
|
obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o
|
|
|
|
obj-$(CONFIG_XEN) += hypervisor.o
|
|
+disabled-obj-$(CONFIG_XEN) := gup.o
|
|
|
|
obj-$(CONFIG_HAVE_MEMBLOCK) += memblock.o
|
|
|
|
--- head-2011-03-11.orig/arch/x86/mm/dump_pagetables-xen.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/mm/dump_pagetables-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -45,7 +45,7 @@ static struct addr_marker address_marker
|
|
{ 0, "User Space" },
|
|
#ifdef CONFIG_X86_64
|
|
{ HYPERVISOR_VIRT_START, "Hypervisor Space" },
|
|
- { HYPERVISOR_VIRT_END, "Low Kernel Mapping" },
|
|
+ { PAGE_OFFSET, "Low Kernel Mapping" },
|
|
{ VMALLOC_START, "vmalloc() Area" },
|
|
{ VMEMMAP_START, "Vmemmap" },
|
|
{ __START_KERNEL_map, "High Kernel Mapping" },
|
|
@@ -160,8 +160,8 @@ static void note_page(struct seq_file *m
|
|
* we have now. "break" is either changing perms, levels or
|
|
* address space marker.
|
|
*/
|
|
- prot = pgprot_val(new_prot) & ~(PTE_MASK);
|
|
- cur = pgprot_val(st->current_prot) & ~(PTE_MASK);
|
|
+ prot = pgprot_val(new_prot) & ~(PTE_PFN_MASK);
|
|
+ cur = pgprot_val(st->current_prot) & ~(PTE_PFN_MASK);
|
|
|
|
if (!st->level) {
|
|
/* First entry */
|
|
@@ -234,7 +234,7 @@ static void walk_pmd_level(struct seq_fi
|
|
st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT);
|
|
if (!hypervisor_space(st->current_address)
|
|
&& !pmd_none(*start)) {
|
|
- pgprotval_t prot = __pmd_val(*start) & ~PTE_MASK;
|
|
+ pgprotval_t prot = __pmd_val(*start) & PTE_FLAGS_MASK;
|
|
|
|
if (pmd_large(*start) || !pmd_present(*start))
|
|
note_page(m, st, __pgprot(prot), 3);
|
|
@@ -267,7 +267,7 @@ static void walk_pud_level(struct seq_fi
|
|
st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT);
|
|
if (!hypervisor_space(st->current_address)
|
|
&& !pud_none(*start)) {
|
|
- pgprotval_t prot = __pud_val(*start) & ~PTE_MASK;
|
|
+ pgprotval_t prot = __pud_val(*start) & PTE_FLAGS_MASK;
|
|
|
|
if (pud_large(*start) || !pud_present(*start))
|
|
note_page(m, st, __pgprot(prot), 2);
|
|
@@ -303,7 +303,7 @@ static void walk_pgd_level(struct seq_fi
|
|
for (i = 0; i < PTRS_PER_PGD; i++) {
|
|
st.current_address = normalize_addr(i * PGD_LEVEL_MULT);
|
|
if (!pgd_none(*start)) {
|
|
- pgprotval_t prot = __pgd_val(*start) & ~PTE_MASK;
|
|
+ pgprotval_t prot = __pgd_val(*start) & PTE_FLAGS_MASK;
|
|
|
|
if (pgd_large(*start) || !pgd_present(*start))
|
|
note_page(m, &st, __pgprot(prot), 1);
|
|
--- head-2011-03-11.orig/arch/x86/mm/fault-xen.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/mm/fault-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -10,6 +10,7 @@
|
|
#include <linux/string.h>
|
|
#include <linux/types.h>
|
|
#include <linux/ptrace.h>
|
|
+#include <linux/mmiotrace.h>
|
|
#include <linux/mman.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/smp.h>
|
|
@@ -49,17 +50,23 @@
|
|
#define PF_RSVD (1<<3)
|
|
#define PF_INSTR (1<<4)
|
|
|
|
+static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr)
|
|
+{
|
|
+#ifdef CONFIG_MMIOTRACE_HOOKS
|
|
+ if (unlikely(is_kmmio_active()))
|
|
+ if (kmmio_handler(regs, addr) == 1)
|
|
+ return -1;
|
|
+#endif
|
|
+ return 0;
|
|
+}
|
|
+
|
|
static inline int notify_page_fault(struct pt_regs *regs)
|
|
{
|
|
#ifdef CONFIG_KPROBES
|
|
int ret = 0;
|
|
|
|
/* kprobe_running() needs smp_processor_id() */
|
|
-#ifdef CONFIG_X86_32
|
|
if (!user_mode_vm(regs)) {
|
|
-#else
|
|
- if (!user_mode(regs)) {
|
|
-#endif
|
|
preempt_disable();
|
|
if (kprobe_running() && kprobe_fault_handler(regs, 14))
|
|
ret = 1;
|
|
@@ -409,11 +416,7 @@ static void show_fault_oops(struct pt_re
|
|
printk(KERN_CONT "NULL pointer dereference");
|
|
else
|
|
printk(KERN_CONT "paging request");
|
|
-#ifdef CONFIG_X86_32
|
|
- printk(KERN_CONT " at %08lx\n", address);
|
|
-#else
|
|
- printk(KERN_CONT " at %016lx\n", address);
|
|
-#endif
|
|
+ printk(KERN_CONT " at %p\n", (void *) address);
|
|
printk(KERN_ALERT "IP:");
|
|
printk_address(regs->ip, 1);
|
|
dump_pagetable(address);
|
|
@@ -628,6 +631,8 @@ void __kprobes do_page_fault(struct pt_r
|
|
|
|
if (notify_page_fault(regs))
|
|
return;
|
|
+ if (unlikely(kmmio_fault(regs, address)))
|
|
+ return;
|
|
|
|
/*
|
|
* We fault-in kernel-space virtual memory on-demand. The
|
|
@@ -831,14 +836,10 @@ bad_area_nosemaphore:
|
|
if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
|
|
printk_ratelimit()) {
|
|
printk(
|
|
-#ifdef CONFIG_X86_32
|
|
- "%s%s[%d]: segfault at %lx ip %08lx sp %08lx error %lx",
|
|
-#else
|
|
- "%s%s[%d]: segfault at %lx ip %lx sp %lx error %lx",
|
|
-#endif
|
|
+ "%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
|
|
task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
|
|
- tsk->comm, task_pid_nr(tsk), address, regs->ip,
|
|
- regs->sp, error_code);
|
|
+ tsk->comm, task_pid_nr(tsk), address,
|
|
+ (void *) regs->ip, (void *) regs->sp, error_code);
|
|
print_vma_addr(" in ", regs->ip);
|
|
printk("\n");
|
|
}
|
|
@@ -949,89 +950,52 @@ LIST_HEAD(pgd_list);
|
|
void vmalloc_sync_all(void)
|
|
{
|
|
#ifdef CONFIG_X86_32
|
|
- /*
|
|
- * Note that races in the updates of insync and start aren't
|
|
- * problematic: insync can only get set bits added, and updates to
|
|
- * start are only improving performance (without affecting correctness
|
|
- * if undone).
|
|
- * XEN: To work on PAE, we need to iterate over PMDs rather than PGDs.
|
|
- * This change works just fine with 2-level paging too.
|
|
- */
|
|
-#define sync_index(a) ((a) >> PMD_SHIFT)
|
|
- static DECLARE_BITMAP(insync, PTRS_PER_PGD*PTRS_PER_PMD);
|
|
- static unsigned long start = TASK_SIZE;
|
|
- unsigned long address;
|
|
+ unsigned long address = VMALLOC_START & PGDIR_MASK;
|
|
|
|
if (SHARED_KERNEL_PMD)
|
|
return;
|
|
|
|
BUILD_BUG_ON(TASK_SIZE & ~PMD_MASK);
|
|
- for (address = start;
|
|
- address < hypervisor_virt_start;
|
|
- address += PMD_SIZE) {
|
|
- if (!test_bit(sync_index(address), insync)) {
|
|
- unsigned long flags;
|
|
- struct page *page;
|
|
-
|
|
- spin_lock_irqsave(&pgd_lock, flags);
|
|
- /* XEN: failure path assumes non-empty pgd_list. */
|
|
- if (unlikely(list_empty(&pgd_list))) {
|
|
- spin_unlock_irqrestore(&pgd_lock, flags);
|
|
- return;
|
|
- }
|
|
- list_for_each_entry(page, &pgd_list, lru) {
|
|
- pmd_t *pmd;
|
|
-
|
|
- pgd_page_table(lock, page);
|
|
- pmd = vmalloc_sync_one(page_address(page),
|
|
- address);
|
|
- pgd_page_table(unlock, page);
|
|
-
|
|
- if (!pmd)
|
|
- break;
|
|
- }
|
|
- spin_unlock_irqrestore(&pgd_lock, flags);
|
|
- if (!page)
|
|
- set_bit(sync_index(address), insync);
|
|
+ for (; address < hypervisor_virt_start; address += PMD_SIZE) {
|
|
+ unsigned long flags;
|
|
+ struct page *page;
|
|
+
|
|
+ spin_lock_irqsave(&pgd_lock, flags);
|
|
+ list_for_each_entry(page, &pgd_list, lru) {
|
|
+ pmd_t *pmd;
|
|
+
|
|
+ pgd_page_table(lock, page);
|
|
+ pmd = vmalloc_sync_one(page_address(page), address);
|
|
+ pgd_page_table(unlock, page);
|
|
+
|
|
+ if (!pmd)
|
|
+ break;
|
|
}
|
|
- if (address == start && test_bit(sync_index(address), insync))
|
|
- start = address + PMD_SIZE;
|
|
+ spin_unlock_irqrestore(&pgd_lock, flags);
|
|
}
|
|
#else /* CONFIG_X86_64 */
|
|
- /*
|
|
- * Note that races in the updates of insync and start aren't
|
|
- * problematic: insync can only get set bits added, and updates to
|
|
- * start are only improving performance (without affecting correctness
|
|
- * if undone).
|
|
- */
|
|
- static DECLARE_BITMAP(insync, PTRS_PER_PGD);
|
|
- static unsigned long start = VMALLOC_START & PGDIR_MASK;
|
|
+ unsigned long start = VMALLOC_START & PGDIR_MASK;
|
|
unsigned long address;
|
|
|
|
for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) {
|
|
- if (!test_bit(pgd_index(address), insync)) {
|
|
- const pgd_t *pgd_ref = pgd_offset_k(address);
|
|
- unsigned long flags;
|
|
- struct page *page;
|
|
-
|
|
- if (pgd_none(*pgd_ref))
|
|
- continue;
|
|
- spin_lock_irqsave(&pgd_lock, flags);
|
|
- list_for_each_entry(page, &pgd_list, lru) {
|
|
- pgd_t *pgd;
|
|
- pgd = (pgd_t *)page_address(page) + pgd_index(address);
|
|
- pgd_page_table(lock, page);
|
|
- if (pgd_none(*pgd))
|
|
- set_pgd(pgd, *pgd_ref);
|
|
- else
|
|
- BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
|
|
- pgd_page_table(unlock, page);
|
|
- }
|
|
- spin_unlock_irqrestore(&pgd_lock, flags);
|
|
- set_bit(pgd_index(address), insync);
|
|
+ const pgd_t *pgd_ref = pgd_offset_k(address);
|
|
+ unsigned long flags;
|
|
+ struct page *page;
|
|
+
|
|
+ if (pgd_none(*pgd_ref))
|
|
+ continue;
|
|
+ spin_lock_irqsave(&pgd_lock, flags);
|
|
+ list_for_each_entry(page, &pgd_list, lru) {
|
|
+ pgd_t *pgd;
|
|
+ pgd = (pgd_t *)page_address(page) + pgd_index(address);
|
|
+ pgd_page_table(lock, page);
|
|
+ if (pgd_none(*pgd))
|
|
+ set_pgd(pgd, *pgd_ref);
|
|
+ else
|
|
+ BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
|
|
+ pgd_page_table(unlock, page);
|
|
}
|
|
- if (address == start)
|
|
- start = address + PGDIR_SIZE;
|
|
+ spin_unlock_irqrestore(&pgd_lock, flags);
|
|
}
|
|
#endif
|
|
}
|
|
--- head-2011-03-11.orig/arch/x86/mm/hypervisor.c 2011-01-31 18:01:51.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/mm/hypervisor.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -711,6 +711,72 @@ void xen_destroy_contiguous_region(unsig
|
|
}
|
|
EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
|
|
|
|
+int __init early_create_contiguous_region(unsigned long pfn,
|
|
+ unsigned int order,
|
|
+ unsigned int address_bits)
|
|
+{
|
|
+ unsigned long *in_frames = discontig_frames, out_frame = pfn;
|
|
+ unsigned int i;
|
|
+ int rc, success;
|
|
+ struct xen_memory_exchange exchange = {
|
|
+ .in = {
|
|
+ .nr_extents = 1UL << order,
|
|
+ .extent_order = 0,
|
|
+ .domid = DOMID_SELF
|
|
+ },
|
|
+ .out = {
|
|
+ .nr_extents = 1,
|
|
+ .extent_order = order,
|
|
+ .address_bits = address_bits,
|
|
+ .domid = DOMID_SELF
|
|
+ }
|
|
+ };
|
|
+
|
|
+ if (xen_feature(XENFEAT_auto_translated_physmap))
|
|
+ return 0;
|
|
+
|
|
+ if (unlikely(order > MAX_CONTIG_ORDER))
|
|
+ return -ENOMEM;
|
|
+
|
|
+ for (i = 0; i < (1U << order); ++i) {
|
|
+ in_frames[i] = pfn_to_mfn(pfn + i);
|
|
+ set_phys_to_machine(pfn + i, INVALID_P2M_ENTRY);
|
|
+ }
|
|
+
|
|
+ set_xen_guest_handle(exchange.in.extent_start, in_frames);
|
|
+ set_xen_guest_handle(exchange.out.extent_start, &out_frame);
|
|
+
|
|
+ rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
|
|
+ success = (exchange.nr_exchanged == (1UL << order));
|
|
+ BUG_ON(!success && (exchange.nr_exchanged || !rc));
|
|
+ BUG_ON(success && rc);
|
|
+#if CONFIG_XEN_COMPAT <= 0x030002
|
|
+ if (unlikely(rc == -ENOSYS)) {
|
|
+ /* Compatibility when XENMEM_exchange is unavailable. */
|
|
+ if (HYPERVISOR_memory_op(XENMEM_decrease_reservation,
|
|
+ &exchange.in) != (1UL << order))
|
|
+ BUG();
|
|
+ success = (HYPERVISOR_memory_op(XENMEM_populate_physmap,
|
|
+ &exchange.out) == 1);
|
|
+ if (!success) {
|
|
+ for (i = 0; i < (1U << order); ++i)
|
|
+ in_frames[i] = pfn + i;
|
|
+ if (HYPERVISOR_memory_op(XENMEM_populate_physmap,
|
|
+ &exchange.in) != (1UL << order))
|
|
+ BUG();
|
|
+ }
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ for (i = 0; i < (1U << order); ++i, ++out_frame) {
|
|
+ if (!success)
|
|
+ out_frame = in_frames[i];
|
|
+ set_phys_to_machine(pfn + i, out_frame);
|
|
+ }
|
|
+
|
|
+ return success ? 0 : -ENOMEM;
|
|
+}
|
|
+
|
|
static void undo_limit_pages(struct page *pages, unsigned int order)
|
|
{
|
|
BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
|
|
@@ -877,42 +943,9 @@ int write_ldt_entry(struct desc_struct *
|
|
return HYPERVISOR_update_descriptor(mach_lp, *(const u64*)desc);
|
|
}
|
|
|
|
-#define MAX_BATCHED_FULL_PTES 32
|
|
-
|
|
-int xen_change_pte_range(struct mm_struct *mm, pmd_t *pmd,
|
|
- unsigned long addr, unsigned long end, pgprot_t newprot,
|
|
- int dirty_accountable)
|
|
+int write_gdt_entry(struct desc_struct *gdt, int entry, const void *desc,
|
|
+ int type)
|
|
{
|
|
- int rc = 0, i = 0;
|
|
- mmu_update_t u[MAX_BATCHED_FULL_PTES];
|
|
- pte_t *pte;
|
|
- spinlock_t *ptl;
|
|
-
|
|
- if (!xen_feature(XENFEAT_mmu_pt_update_preserve_ad))
|
|
- return 0;
|
|
-
|
|
- pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
|
|
- do {
|
|
- if (pte_present(*pte)) {
|
|
- pte_t ptent = pte_modify(*pte, newprot);
|
|
-
|
|
- if (dirty_accountable && pte_dirty(ptent))
|
|
- ptent = pte_mkwrite(ptent);
|
|
- u[i].ptr = (__pmd_val(*pmd) & PHYSICAL_PAGE_MASK)
|
|
- | ((unsigned long)pte & ~PAGE_MASK)
|
|
- | MMU_PT_UPDATE_PRESERVE_AD;
|
|
- u[i].val = __pte_val(ptent);
|
|
- if (++i == MAX_BATCHED_FULL_PTES) {
|
|
- if ((rc = HYPERVISOR_mmu_update(
|
|
- &u[0], i, NULL, DOMID_SELF)) != 0)
|
|
- break;
|
|
- i = 0;
|
|
- }
|
|
- }
|
|
- } while (pte++, addr += PAGE_SIZE, addr != end);
|
|
- if (i)
|
|
- rc = HYPERVISOR_mmu_update( &u[0], i, NULL, DOMID_SELF);
|
|
- pte_unmap_unlock(pte - 1, ptl);
|
|
- BUG_ON(rc && rc != -ENOSYS);
|
|
- return !rc;
|
|
+ maddr_t mach_gp = virt_to_machine(gdt + entry);
|
|
+ return HYPERVISOR_update_descriptor(mach_gp, *(const u64*)desc);
|
|
}
|
|
--- head-2011-03-11.orig/arch/x86/mm/init_32-xen.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/mm/init_32-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -54,6 +54,7 @@
|
|
|
|
unsigned int __VMALLOC_RESERVE = 128 << 20;
|
|
|
|
+unsigned long max_low_pfn_mapped;
|
|
unsigned long max_pfn_mapped;
|
|
|
|
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
|
|
@@ -61,6 +62,27 @@ unsigned long highstart_pfn, highend_pfn
|
|
|
|
static noinline int do_test_wp_bit(void);
|
|
|
|
+
|
|
+static unsigned long __initdata table_start;
|
|
+static unsigned long __initdata table_end;
|
|
+static unsigned long __initdata table_top;
|
|
+
|
|
+static int __initdata after_init_bootmem;
|
|
+
|
|
+static __init void *alloc_low_page(unsigned long *phys)
|
|
+{
|
|
+ unsigned long pfn = table_end++;
|
|
+ void *adr;
|
|
+
|
|
+ if (pfn >= table_top)
|
|
+ panic("alloc_low_page: ran out of memory");
|
|
+
|
|
+ adr = __va(pfn * PAGE_SIZE);
|
|
+ memset(adr, 0, PAGE_SIZE);
|
|
+ *phys = pfn * PAGE_SIZE;
|
|
+ return adr;
|
|
+}
|
|
+
|
|
/*
|
|
* Creates a middle page table and puts a pointer to it in the
|
|
* given global directory entry. This only returns the gd entry
|
|
@@ -72,9 +94,12 @@ static pmd_t * __init one_md_table_init(
|
|
pmd_t *pmd_table;
|
|
|
|
#ifdef CONFIG_X86_PAE
|
|
+ unsigned long phys;
|
|
if (!(__pgd_val(*pgd) & _PAGE_PRESENT)) {
|
|
- pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
|
|
-
|
|
+ if (after_init_bootmem)
|
|
+ pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
|
|
+ else
|
|
+ pmd_table = (pmd_t *)alloc_low_page(&phys);
|
|
paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
|
|
make_lowmem_page_readonly(pmd_table, XENFEAT_writable_page_tables);
|
|
set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
|
|
@@ -101,12 +126,16 @@ static pte_t * __init one_page_table_ini
|
|
#endif
|
|
pte_t *page_table = NULL;
|
|
|
|
+ if (after_init_bootmem) {
|
|
#ifdef CONFIG_DEBUG_PAGEALLOC
|
|
- page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
|
|
+ page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
|
|
#endif
|
|
- if (!page_table) {
|
|
- page_table =
|
|
+ if (!page_table)
|
|
+ page_table =
|
|
(pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
|
|
+ } else {
|
|
+ unsigned long phys;
|
|
+ page_table = (pte_t *)alloc_low_page(&phys);
|
|
}
|
|
|
|
paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT);
|
|
@@ -167,24 +196,24 @@ static inline int is_kernel_text(unsigne
|
|
* of max_low_pfn pages, by creating page tables starting from address
|
|
* PAGE_OFFSET:
|
|
*/
|
|
-static void __init kernel_physical_mapping_init(pgd_t *pgd_base)
|
|
+static void __init kernel_physical_mapping_init(pgd_t *pgd_base,
|
|
+ unsigned long start_pfn,
|
|
+ unsigned long end_pfn,
|
|
+ int use_pse)
|
|
{
|
|
int pgd_idx, pmd_idx, pte_ofs;
|
|
unsigned long pfn;
|
|
pgd_t *pgd;
|
|
pmd_t *pmd;
|
|
pte_t *pte;
|
|
+ unsigned pages_2m = 0, pages_4k = 0;
|
|
|
|
- unsigned long max_ram_pfn = xen_start_info->nr_pages;
|
|
- if (max_ram_pfn > max_low_pfn)
|
|
- max_ram_pfn = max_low_pfn;
|
|
+ if (!cpu_has_pse)
|
|
+ use_pse = 0;
|
|
|
|
- pgd_idx = pgd_index(PAGE_OFFSET);
|
|
+ pfn = start_pfn;
|
|
+ pgd_idx = pgd_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);
|
|
pgd = pgd_base + pgd_idx;
|
|
- pfn = 0;
|
|
- pmd_idx = pmd_index(PAGE_OFFSET);
|
|
- pte_ofs = pte_index(PAGE_OFFSET);
|
|
-
|
|
for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) {
|
|
#ifdef CONFIG_XEN
|
|
/*
|
|
@@ -198,10 +227,16 @@ static void __init kernel_physical_mappi
|
|
#else
|
|
pmd = one_md_table_init(pgd);
|
|
#endif
|
|
- if (pfn >= max_low_pfn)
|
|
+
|
|
+ if (pfn >= end_pfn)
|
|
continue;
|
|
+#ifdef CONFIG_X86_PAE
|
|
+ pmd_idx = pmd_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);
|
|
pmd += pmd_idx;
|
|
- for (; pmd_idx < PTRS_PER_PMD && pfn < max_low_pfn;
|
|
+#else
|
|
+ pmd_idx = 0;
|
|
+#endif
|
|
+ for (; pmd_idx < PTRS_PER_PMD && pfn < end_pfn;
|
|
pmd++, pmd_idx++) {
|
|
unsigned int addr = pfn * PAGE_SIZE + PAGE_OFFSET;
|
|
|
|
@@ -211,13 +246,8 @@ static void __init kernel_physical_mappi
|
|
/*
|
|
* Map with big pages if possible, otherwise
|
|
* create normal page tables:
|
|
- *
|
|
- * Don't use a large page for the first 2/4MB of memory
|
|
- * because there are often fixed size MTRRs in there
|
|
- * and overlapping MTRRs into large pages can cause
|
|
- * slowdowns.
|
|
*/
|
|
- if (cpu_has_pse && !(pgd_idx == 0 && pmd_idx == 0)) {
|
|
+ if (use_pse) {
|
|
unsigned int addr2;
|
|
pgprot_t prot = PAGE_KERNEL_LARGE;
|
|
|
|
@@ -228,49 +258,35 @@ static void __init kernel_physical_mappi
|
|
is_kernel_text(addr2))
|
|
prot = PAGE_KERNEL_LARGE_EXEC;
|
|
|
|
+ pages_2m++;
|
|
set_pmd(pmd, pfn_pmd(pfn, prot));
|
|
|
|
pfn += PTRS_PER_PTE;
|
|
- max_pfn_mapped = pfn;
|
|
continue;
|
|
}
|
|
pte = one_page_table_init(pmd);
|
|
|
|
- for (pte += pte_ofs;
|
|
- pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn;
|
|
+ pte_ofs = pte_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);
|
|
+ pte += pte_ofs;
|
|
+ for (; pte_ofs < PTRS_PER_PTE && pfn < end_pfn;
|
|
pte++, pfn++, pte_ofs++, addr += PAGE_SIZE) {
|
|
pgprot_t prot = PAGE_KERNEL;
|
|
|
|
/* XEN: Only map initial RAM allocation. */
|
|
- if ((pfn >= max_ram_pfn) || pte_present(*pte))
|
|
+ if (pfn >= xen_start_info->nr_pages || pte_present(*pte))
|
|
continue;
|
|
if (is_kernel_text(addr))
|
|
prot = PAGE_KERNEL_EXEC;
|
|
|
|
+ pages_4k++;
|
|
set_pte(pte, pfn_pte(pfn, prot));
|
|
}
|
|
- max_pfn_mapped = pfn;
|
|
- pte_ofs = 0;
|
|
}
|
|
- pmd_idx = 0;
|
|
}
|
|
+ update_page_count(PG_LEVEL_2M, pages_2m);
|
|
+ update_page_count(PG_LEVEL_4K, pages_4k);
|
|
}
|
|
|
|
-#ifndef CONFIG_XEN
|
|
-
|
|
-static inline int page_kills_ppro(unsigned long pagenr)
|
|
-{
|
|
- if (pagenr >= 0x70000 && pagenr <= 0x7003F)
|
|
- return 1;
|
|
- return 0;
|
|
-}
|
|
-
|
|
-#else
|
|
-
|
|
-#define page_kills_ppro(p) 0
|
|
-
|
|
-#endif
|
|
-
|
|
/*
|
|
* devmem_is_allowed() checks to see if /dev/mem access to a certain address
|
|
* is valid. The argument is a physical page number.
|
|
@@ -331,33 +347,66 @@ static void __init permanent_kmaps_init(
|
|
pkmap_page_table = pte;
|
|
}
|
|
|
|
-void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro)
|
|
+static void __init add_one_highpage_init(struct page *page, int pfn)
|
|
{
|
|
- if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) {
|
|
- ClearPageReserved(page);
|
|
- init_page_count(page);
|
|
- __free_page(page);
|
|
- totalhigh_pages++;
|
|
- } else
|
|
- SetPageReserved(page);
|
|
+ ClearPageReserved(page);
|
|
+ init_page_count(page);
|
|
+ __free_page(page);
|
|
+ totalhigh_pages++;
|
|
+}
|
|
+
|
|
+struct add_highpages_data {
|
|
+ unsigned long start_pfn;
|
|
+ unsigned long end_pfn;
|
|
+};
|
|
+
|
|
+static int __init add_highpages_work_fn(unsigned long start_pfn,
|
|
+ unsigned long end_pfn, void *datax)
|
|
+{
|
|
+ int node_pfn;
|
|
+ struct page *page;
|
|
+ unsigned long final_start_pfn, final_end_pfn;
|
|
+ struct add_highpages_data *data;
|
|
+
|
|
+ data = (struct add_highpages_data *)datax;
|
|
+
|
|
+ final_start_pfn = max(start_pfn, data->start_pfn);
|
|
+ final_end_pfn = min(end_pfn, data->end_pfn);
|
|
+ if (final_start_pfn >= final_end_pfn)
|
|
+ return 0;
|
|
+
|
|
+ for (node_pfn = final_start_pfn; node_pfn < final_end_pfn;
|
|
+ node_pfn++) {
|
|
+ if (!pfn_valid(node_pfn))
|
|
+ continue;
|
|
+ page = pfn_to_page(node_pfn);
|
|
+ add_one_highpage_init(page, node_pfn);
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+
|
|
+}
|
|
+
|
|
+void __init add_highpages_with_active_regions(int nid, unsigned long start_pfn,
|
|
+ unsigned long end_pfn)
|
|
+{
|
|
+ struct add_highpages_data data;
|
|
+
|
|
+ data.start_pfn = start_pfn;
|
|
+ data.end_pfn = end_pfn;
|
|
+
|
|
+ work_with_active_regions(nid, add_highpages_work_fn, &data);
|
|
}
|
|
|
|
#ifndef CONFIG_NUMA
|
|
-static void __init set_highmem_pages_init(int bad_ppro)
|
|
+static void __init set_highmem_pages_init(void)
|
|
{
|
|
int pfn;
|
|
|
|
- for (pfn = highstart_pfn; pfn < highend_pfn
|
|
- && pfn < xen_start_info->nr_pages; pfn++) {
|
|
- /*
|
|
- * Holes under sparsemem might not have no mem_map[]:
|
|
- */
|
|
- if (pfn_valid(pfn))
|
|
- add_one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro);
|
|
- }
|
|
+ add_highpages_with_active_regions(0, highstart_pfn, highend_pfn);
|
|
|
|
/* XEN: init high-mem pages outside initial allocation. */
|
|
- for (; pfn < highend_pfn; pfn++) {
|
|
+ for (pfn = xen_start_info->nr_pages; pfn < highend_pfn; pfn++) {
|
|
ClearPageReserved(pfn_to_page(pfn));
|
|
init_page_count(pfn_to_page(pfn));
|
|
}
|
|
@@ -369,24 +418,11 @@ static void __init set_highmem_pages_ini
|
|
#else
|
|
# define kmap_init() do { } while (0)
|
|
# define permanent_kmaps_init(pgd_base) do { } while (0)
|
|
-# define set_highmem_pages_init(bad_ppro) do { } while (0)
|
|
+# define set_highmem_pages_init() do { } while (0)
|
|
#endif /* CONFIG_HIGHMEM */
|
|
|
|
-pteval_t __PAGE_KERNEL = _PAGE_KERNEL;
|
|
-EXPORT_SYMBOL(__PAGE_KERNEL);
|
|
-
|
|
-pteval_t __PAGE_KERNEL_EXEC = _PAGE_KERNEL_EXEC;
|
|
-
|
|
pgd_t *swapper_pg_dir;
|
|
|
|
-static void __init xen_pagetable_setup_start(pgd_t *base)
|
|
-{
|
|
-}
|
|
-
|
|
-static void __init xen_pagetable_setup_done(pgd_t *base)
|
|
-{
|
|
-}
|
|
-
|
|
/*
|
|
* Build a proper pagetable for the kernel mappings. Up until this
|
|
* point, we've been running on some set of pagetables constructed by
|
|
@@ -406,27 +442,10 @@ static void __init xen_pagetable_setup_d
|
|
* be partially populated, and so it avoids stomping on any existing
|
|
* mappings.
|
|
*/
|
|
-static void __init pagetable_init(void)
|
|
+static void __init early_ioremap_page_table_range_init(pgd_t *pgd_base)
|
|
{
|
|
- pgd_t *pgd_base = (pgd_t *)xen_start_info->pt_base;
|
|
unsigned long vaddr, end;
|
|
|
|
- xen_pagetable_setup_start(pgd_base);
|
|
-
|
|
- /* Enable PSE if available */
|
|
- if (cpu_has_pse)
|
|
- set_in_cr4(X86_CR4_PSE);
|
|
-
|
|
- /* Enable PGE if available */
|
|
- if (cpu_has_pge) {
|
|
- set_in_cr4(X86_CR4_PGE);
|
|
- __PAGE_KERNEL |= _PAGE_GLOBAL;
|
|
- __PAGE_KERNEL_EXEC |= _PAGE_GLOBAL;
|
|
- }
|
|
-
|
|
- kernel_physical_mapping_init(pgd_base);
|
|
- remap_numa_kva();
|
|
-
|
|
/*
|
|
* Fixed mappings, only the page table structure has to be
|
|
* created - mappings will be set by set_fixmap():
|
|
@@ -436,10 +455,13 @@ static void __init pagetable_init(void)
|
|
end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK;
|
|
page_table_range_init(vaddr, end, pgd_base);
|
|
early_ioremap_reset();
|
|
+}
|
|
|
|
- permanent_kmaps_init(pgd_base);
|
|
+static void __init pagetable_init(void)
|
|
+{
|
|
+ pgd_t *pgd_base = (pgd_t *)xen_start_info->pt_base;
|
|
|
|
- xen_pagetable_setup_done(pgd_base);
|
|
+ permanent_kmaps_init(pgd_base);
|
|
}
|
|
|
|
#if defined(CONFIG_ACPI_SLEEP) && !defined(CONFIG_XEN)
|
|
@@ -482,7 +504,7 @@ void zap_low_mappings(void)
|
|
|
|
int nx_enabled;
|
|
|
|
-pteval_t __supported_pte_mask __read_mostly = ~_PAGE_NX;
|
|
+pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL);
|
|
EXPORT_SYMBOL_GPL(__supported_pte_mask);
|
|
|
|
#ifdef CONFIG_X86_PAE
|
|
@@ -535,42 +557,364 @@ static void __init set_nx(void)
|
|
}
|
|
#endif
|
|
|
|
+/* user-defined highmem size */
|
|
+static unsigned int highmem_pages = -1;
|
|
+
|
|
/*
|
|
- * paging_init() sets up the page tables - note that the first 8MB are
|
|
- * already mapped by head.S.
|
|
- *
|
|
- * This routines also unmaps the page at virtual kernel address 0, so
|
|
- * that we can trap those pesky NULL-reference errors in the kernel.
|
|
+ * highmem=size forces highmem to be exactly 'size' bytes.
|
|
+ * This works even on boxes that have no highmem otherwise.
|
|
+ * This also works to reduce highmem size on bigger boxes.
|
|
*/
|
|
-void __init paging_init(void)
|
|
+static int __init parse_highmem(char *arg)
|
|
+{
|
|
+ if (!arg)
|
|
+ return -EINVAL;
|
|
+
|
|
+ highmem_pages = memparse(arg, &arg) >> PAGE_SHIFT;
|
|
+ return 0;
|
|
+}
|
|
+early_param("highmem", parse_highmem);
|
|
+
|
|
+/*
|
|
+ * Determine low and high memory ranges:
|
|
+ */
|
|
+void __init find_low_pfn_range(void)
|
|
+{
|
|
+ /* it could update max_pfn */
|
|
+
|
|
+ /* max_low_pfn is 0, we already have early_res support */
|
|
+
|
|
+ max_low_pfn = max_pfn;
|
|
+ if (max_low_pfn > MAXMEM_PFN) {
|
|
+ if (highmem_pages == -1)
|
|
+ highmem_pages = max_pfn - MAXMEM_PFN;
|
|
+ if (highmem_pages + MAXMEM_PFN < max_pfn)
|
|
+ max_pfn = MAXMEM_PFN + highmem_pages;
|
|
+ if (highmem_pages + MAXMEM_PFN > max_pfn) {
|
|
+ printk(KERN_WARNING "only %luMB highmem pages "
|
|
+ "available, ignoring highmem size of %uMB.\n",
|
|
+ pages_to_mb(max_pfn - MAXMEM_PFN),
|
|
+ pages_to_mb(highmem_pages));
|
|
+ highmem_pages = 0;
|
|
+ }
|
|
+ max_low_pfn = MAXMEM_PFN;
|
|
+#ifndef CONFIG_HIGHMEM
|
|
+ /* Maximum memory usable is what is directly addressable */
|
|
+ printk(KERN_WARNING "Warning only %ldMB will be used.\n",
|
|
+ MAXMEM>>20);
|
|
+ if (max_pfn > MAX_NONPAE_PFN)
|
|
+ printk(KERN_WARNING
|
|
+ "Use a HIGHMEM64G enabled kernel.\n");
|
|
+ else
|
|
+ printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
|
|
+ max_pfn = MAXMEM_PFN;
|
|
+#else /* !CONFIG_HIGHMEM */
|
|
+#ifndef CONFIG_HIGHMEM64G
|
|
+ if (max_pfn > MAX_NONPAE_PFN) {
|
|
+ max_pfn = MAX_NONPAE_PFN;
|
|
+ printk(KERN_WARNING "Warning only 4GB will be used."
|
|
+ "Use a HIGHMEM64G enabled kernel.\n");
|
|
+ }
|
|
+#endif /* !CONFIG_HIGHMEM64G */
|
|
+#endif /* !CONFIG_HIGHMEM */
|
|
+ } else {
|
|
+ if (highmem_pages == -1)
|
|
+ highmem_pages = 0;
|
|
+#ifdef CONFIG_HIGHMEM
|
|
+ if (highmem_pages >= max_pfn) {
|
|
+ printk(KERN_ERR "highmem size specified (%uMB) is "
|
|
+ "bigger than pages available (%luMB)!.\n",
|
|
+ pages_to_mb(highmem_pages),
|
|
+ pages_to_mb(max_pfn));
|
|
+ highmem_pages = 0;
|
|
+ }
|
|
+ if (highmem_pages) {
|
|
+ if (max_low_pfn - highmem_pages <
|
|
+ 64*1024*1024/PAGE_SIZE){
|
|
+ printk(KERN_ERR "highmem size %uMB results in "
|
|
+ "smaller than 64MB lowmem, ignoring it.\n"
|
|
+ , pages_to_mb(highmem_pages));
|
|
+ highmem_pages = 0;
|
|
+ }
|
|
+ max_low_pfn -= highmem_pages;
|
|
+ }
|
|
+#else
|
|
+ if (highmem_pages)
|
|
+ printk(KERN_ERR "ignoring highmem size on non-highmem"
|
|
+ " kernel!\n");
|
|
+#endif
|
|
+ }
|
|
+}
|
|
+
|
|
+#ifndef CONFIG_NEED_MULTIPLE_NODES
|
|
+void __init initmem_init(unsigned long start_pfn,
|
|
+ unsigned long end_pfn)
|
|
+{
|
|
+#ifdef CONFIG_HIGHMEM
|
|
+ highstart_pfn = highend_pfn = max_pfn;
|
|
+ if (max_pfn > max_low_pfn)
|
|
+ highstart_pfn = max_low_pfn;
|
|
+ memory_present(0, 0, highend_pfn);
|
|
+ e820_register_active_regions(0, 0, highend_pfn);
|
|
+ printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
|
|
+ pages_to_mb(highend_pfn - highstart_pfn));
|
|
+ num_physpages = highend_pfn;
|
|
+ high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
|
|
+#else
|
|
+ memory_present(0, 0, max_low_pfn);
|
|
+ e820_register_active_regions(0, 0, max_low_pfn);
|
|
+ num_physpages = max_low_pfn;
|
|
+ high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
|
|
+#endif
|
|
+#ifdef CONFIG_FLATMEM
|
|
+ max_mapnr = num_physpages;
|
|
+#endif
|
|
+ printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
|
|
+ pages_to_mb(max_low_pfn));
|
|
+
|
|
+ setup_bootmem_allocator();
|
|
+}
|
|
+#endif /* !CONFIG_NEED_MULTIPLE_NODES */
|
|
+
|
|
+static void __init zone_sizes_init(void)
|
|
+{
|
|
+ unsigned long max_zone_pfns[MAX_NR_ZONES];
|
|
+ memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
|
|
+ max_zone_pfns[ZONE_DMA] =
|
|
+ virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
|
|
+ max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
|
|
+#ifdef CONFIG_HIGHMEM
|
|
+ max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
|
|
+#endif
|
|
+
|
|
+ free_area_init_nodes(max_zone_pfns);
|
|
+}
|
|
+
|
|
+void __init setup_bootmem_allocator(void)
|
|
{
|
|
int i;
|
|
+ unsigned long bootmap_size, bootmap;
|
|
+ unsigned long end_pfn = min(max_low_pfn, xen_start_info->nr_pages);
|
|
+
|
|
+ /*
|
|
+ * Initialize the boot-time allocator (with low memory only):
|
|
+ */
|
|
+ bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
|
|
+ bootmap = find_e820_area(min_low_pfn<<PAGE_SHIFT,
|
|
+ min(max_pfn_mapped, xen_start_info->nr_pages)<<PAGE_SHIFT,
|
|
+ bootmap_size, PAGE_SIZE);
|
|
+ if (bootmap == -1L)
|
|
+ panic("Cannot find bootmem map of size %ld\n", bootmap_size);
|
|
+ reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP");
|
|
+
|
|
+ /* don't touch min_low_pfn */
|
|
+ bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT,
|
|
+ min_low_pfn, end_pfn);
|
|
+ printk(KERN_INFO " mapped low ram: 0 - %08lx\n",
|
|
+ max_pfn_mapped<<PAGE_SHIFT);
|
|
+ printk(KERN_INFO " low ram: %08lx - %08lx\n",
|
|
+ min_low_pfn<<PAGE_SHIFT, max_low_pfn<<PAGE_SHIFT);
|
|
+ printk(KERN_INFO " bootmap %08lx - %08lx\n",
|
|
+ bootmap, bootmap + bootmap_size);
|
|
+ for_each_online_node(i)
|
|
+ free_bootmem_with_active_regions(i, end_pfn);
|
|
+ early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT);
|
|
+
|
|
+ after_init_bootmem = 1;
|
|
+}
|
|
+
|
|
+static unsigned long __init extend_init_mapping(unsigned long tables_space)
|
|
+{
|
|
+ unsigned long start_pfn = (__pa(xen_start_info->pt_base) >> PAGE_SHIFT)
|
|
+ + xen_start_info->nr_pt_frames;
|
|
+ unsigned long start = start_pfn, va = (unsigned long)&_text;
|
|
+ pgd_t *pgd;
|
|
+ pud_t *pud;
|
|
+ pmd_t *pmd;
|
|
+ pte_t *pte;
|
|
+
|
|
+ /* Ensure init mappings cover kernel text/data and initial tables. */
|
|
+ while (va < PAGE_OFFSET + (start_pfn << PAGE_SHIFT) + tables_space) {
|
|
+ pgd = pgd_offset_k(va);
|
|
+ pud = pud_offset(pgd, va);
|
|
+ pmd = pmd_offset(pud, va);
|
|
+ if (pmd_none(*pmd)) {
|
|
+ unsigned long pa = start_pfn++ << PAGE_SHIFT;
|
|
+
|
|
+ memset(__va(pa), 0, PAGE_SIZE);
|
|
+ make_lowmem_page_readonly(__va(pa),
|
|
+ XENFEAT_writable_page_tables);
|
|
+ xen_l2_entry_update(pmd, __pmd(pa | _KERNPG_TABLE));
|
|
+ }
|
|
+ pte = pte_offset_kernel(pmd, va);
|
|
+ if (pte_none(*pte)) {
|
|
+ pte_t new_pte = __pte(__pa(va) | _KERNPG_TABLE);
|
|
+
|
|
+ if (HYPERVISOR_update_va_mapping(va, new_pte, 0))
|
|
+ BUG();
|
|
+ }
|
|
+ va += PAGE_SIZE;
|
|
+ }
|
|
+
|
|
+ /* Finally, blow away any spurious initial mappings. */
|
|
+ while (1) {
|
|
+ pgd = pgd_offset_k(va);
|
|
+ pud = pud_offset(pgd, va);
|
|
+ pmd = pmd_offset(pud, va);
|
|
+ if (pmd_none(*pmd))
|
|
+ break;
|
|
+ if (HYPERVISOR_update_va_mapping(va, __pte_ma(0), 0))
|
|
+ BUG();
|
|
+ va += PAGE_SIZE;
|
|
+ }
|
|
+
|
|
+ if (start_pfn > start)
|
|
+ reserve_early(start << PAGE_SHIFT,
|
|
+ start_pfn << PAGE_SHIFT, "INITMAP");
|
|
+
|
|
+ return start_pfn;
|
|
+}
|
|
+
|
|
+static void __init find_early_table_space(unsigned long end)
|
|
+{
|
|
+ unsigned long puds, pmds, ptes, tables;
|
|
+
|
|
+ puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
|
|
+ tables = PAGE_ALIGN(puds * sizeof(pud_t));
|
|
+
|
|
+ pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
|
|
+ tables += PAGE_ALIGN(pmds * sizeof(pmd_t));
|
|
+
|
|
+ if (cpu_has_pse) {
|
|
+ unsigned long extra;
|
|
+
|
|
+ extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT);
|
|
+ extra += PMD_SIZE;
|
|
+ ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
|
+ } else
|
|
+ ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
|
+
|
|
+ tables += PAGE_ALIGN(ptes * sizeof(pte_t));
|
|
+
|
|
+ /* for fixmap */
|
|
+ tables += PAGE_SIZE
|
|
+ * ((((FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK)
|
|
+ - (__fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK))
|
|
+ >> PMD_SHIFT);
|
|
+
|
|
+ table_start = extend_init_mapping(tables);
|
|
+
|
|
+ table_end = table_start;
|
|
+ table_top = table_start + (tables>>PAGE_SHIFT);
|
|
+
|
|
+ printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n",
|
|
+ end, table_start << PAGE_SHIFT,
|
|
+ (table_start << PAGE_SHIFT) + tables);
|
|
+}
|
|
+
|
|
+unsigned long __init_refok init_memory_mapping(unsigned long start,
|
|
+ unsigned long end)
|
|
+{
|
|
+ pgd_t *pgd_base = swapper_pg_dir;
|
|
+ unsigned long start_pfn, end_pfn;
|
|
+ unsigned long big_page_start;
|
|
+
|
|
+ /*
|
|
+ * Find space for the kernel direct mapping tables.
|
|
+ */
|
|
+ if (!after_init_bootmem)
|
|
+ find_early_table_space(end);
|
|
|
|
#ifdef CONFIG_X86_PAE
|
|
set_nx();
|
|
if (nx_enabled)
|
|
printk(KERN_INFO "NX (Execute Disable) protection: active\n");
|
|
#endif
|
|
+
|
|
+ /* Enable PSE if available */
|
|
+ if (cpu_has_pse)
|
|
+ set_in_cr4(X86_CR4_PSE);
|
|
+
|
|
+ /* Enable PGE if available */
|
|
+ if (cpu_has_pge) {
|
|
+ set_in_cr4(X86_CR4_PGE);
|
|
+ __supported_pte_mask |= _PAGE_GLOBAL;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * Don't use a large page for the first 2/4MB of memory
|
|
+ * because there are often fixed size MTRRs in there
|
|
+ * and overlapping MTRRs into large pages can cause
|
|
+ * slowdowns.
|
|
+ */
|
|
+ big_page_start = PMD_SIZE;
|
|
+
|
|
+ if (start < big_page_start) {
|
|
+ start_pfn = start >> PAGE_SHIFT;
|
|
+ end_pfn = min(big_page_start>>PAGE_SHIFT, end>>PAGE_SHIFT);
|
|
+ } else {
|
|
+ /* head is not big page alignment ? */
|
|
+ start_pfn = start >> PAGE_SHIFT;
|
|
+ end_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT)
|
|
+ << (PMD_SHIFT - PAGE_SHIFT);
|
|
+ }
|
|
+ if (start_pfn < end_pfn)
|
|
+ kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn, 0);
|
|
+
|
|
+ /* big page range */
|
|
+ start_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT)
|
|
+ << (PMD_SHIFT - PAGE_SHIFT);
|
|
+ if (start_pfn < (big_page_start >> PAGE_SHIFT))
|
|
+ start_pfn = big_page_start >> PAGE_SHIFT;
|
|
+ end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
|
|
+ if (start_pfn < end_pfn)
|
|
+ kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn,
|
|
+ cpu_has_pse);
|
|
+
|
|
+ /* tail is not big page alignment ? */
|
|
+ start_pfn = end_pfn;
|
|
+ if (start_pfn > (big_page_start>>PAGE_SHIFT)) {
|
|
+ end_pfn = end >> PAGE_SHIFT;
|
|
+ if (start_pfn < end_pfn)
|
|
+ kernel_physical_mapping_init(pgd_base, start_pfn,
|
|
+ end_pfn, 0);
|
|
+ }
|
|
+
|
|
+ early_ioremap_page_table_range_init(pgd_base);
|
|
+
|
|
+ __flush_tlb_all();
|
|
+
|
|
+ if (!after_init_bootmem)
|
|
+ reserve_early(table_start << PAGE_SHIFT,
|
|
+ table_end << PAGE_SHIFT, "PGTABLE");
|
|
+
|
|
+ if (!after_init_bootmem)
|
|
+ early_memtest(start, end);
|
|
+
|
|
+ return end >> PAGE_SHIFT;
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ * paging_init() sets up the page tables - note that the first 8MB are
|
|
+ * already mapped by head.S.
|
|
+ *
|
|
+ * This routines also unmaps the page at virtual kernel address 0, so
|
|
+ * that we can trap those pesky NULL-reference errors in the kernel.
|
|
+ */
|
|
+void __init paging_init(void)
|
|
+{
|
|
pagetable_init();
|
|
|
|
__flush_tlb_all();
|
|
|
|
kmap_init();
|
|
|
|
- /* Switch to the real shared_info page, and clear the
|
|
- * dummy page. */
|
|
- set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
|
|
- HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
|
|
- memset(empty_zero_page, 0, sizeof(empty_zero_page));
|
|
-
|
|
- /* Setup mapping of lower 1st MB */
|
|
- for (i = 0; i < NR_FIX_ISAMAPS; i++)
|
|
- if (is_initial_xendomain())
|
|
- set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE);
|
|
- else
|
|
- __set_fixmap(FIX_ISAMAP_BEGIN - i,
|
|
- virt_to_machine(empty_zero_page),
|
|
- PAGE_KERNEL_RO);
|
|
+ /*
|
|
+ * NOTE: at this point the bootmem allocator is fully available.
|
|
+ */
|
|
+ sparse_init();
|
|
+ zone_sizes_init();
|
|
}
|
|
|
|
/*
|
|
@@ -605,7 +949,7 @@ static struct kcore_list kcore_mem, kcor
|
|
void __init mem_init(void)
|
|
{
|
|
int codesize, reservedpages, datasize, initsize;
|
|
- int tmp, bad_ppro;
|
|
+ int tmp;
|
|
unsigned long pfn;
|
|
|
|
pci_iommu_alloc();
|
|
@@ -613,19 +957,6 @@ void __init mem_init(void)
|
|
#ifdef CONFIG_FLATMEM
|
|
BUG_ON(!mem_map);
|
|
#endif
|
|
- bad_ppro = ppro_with_ram_bug();
|
|
-
|
|
-#ifdef CONFIG_HIGHMEM
|
|
- /* check that fixmap and pkmap do not overlap */
|
|
- if (PKMAP_BASE + LAST_PKMAP*PAGE_SIZE >= FIXADDR_START) {
|
|
- printk(KERN_ERR
|
|
- "fixmap and kmap areas overlap - this will crash\n");
|
|
- printk(KERN_ERR "pkstart: %lxh pkend: %lxh fixstart %lxh\n",
|
|
- PKMAP_BASE, PKMAP_BASE + LAST_PKMAP*PAGE_SIZE,
|
|
- FIXADDR_START);
|
|
- BUG();
|
|
- }
|
|
-#endif
|
|
/* this will put all low memory onto the freelists */
|
|
totalram_pages += free_all_bootmem();
|
|
/* XEN: init low-mem pages outside initial allocation. */
|
|
@@ -642,7 +973,7 @@ void __init mem_init(void)
|
|
if (page_is_ram(tmp) && PageReserved(pfn_to_page(tmp)))
|
|
reservedpages++;
|
|
|
|
- set_highmem_pages_init(bad_ppro);
|
|
+ set_highmem_pages_init();
|
|
|
|
codesize = (unsigned long) &_etext - (unsigned long) &_text;
|
|
datasize = (unsigned long) &_edata - (unsigned long) &_etext;
|
|
@@ -663,7 +994,6 @@ void __init mem_init(void)
|
|
(unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
|
|
);
|
|
|
|
-#if 1 /* double-sanity-check paranoia */
|
|
printk(KERN_INFO "virtual kernel memory layout:\n"
|
|
" fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
|
|
#ifdef CONFIG_HIGHMEM
|
|
@@ -704,7 +1034,6 @@ void __init mem_init(void)
|
|
#endif
|
|
BUG_ON(VMALLOC_START > VMALLOC_END);
|
|
BUG_ON((unsigned long)high_memory > VMALLOC_START);
|
|
-#endif /* double-sanity-check paranoia */
|
|
|
|
if (boot_cpu_data.wp_works_ok < 0)
|
|
test_wp_bit();
|
|
@@ -761,6 +1090,8 @@ void mark_rodata_ro(void)
|
|
unsigned long start = PFN_ALIGN(_text);
|
|
unsigned long size = PFN_ALIGN(_etext) - start;
|
|
|
|
+#ifndef CONFIG_DYNAMIC_FTRACE
|
|
+ /* Dynamic tracing modifies the kernel text section */
|
|
set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
|
|
printk(KERN_INFO "Write protecting the kernel text: %luk\n",
|
|
size >> 10);
|
|
@@ -773,6 +1104,8 @@ void mark_rodata_ro(void)
|
|
printk(KERN_INFO "Testing CPA: write protecting again\n");
|
|
set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT);
|
|
#endif
|
|
+#endif /* CONFIG_DYNAMIC_FTRACE */
|
|
+
|
|
start += size;
|
|
size = (unsigned long)__end_rodata - start;
|
|
set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
|
|
@@ -835,3 +1168,9 @@ void free_initrd_mem(unsigned long start
|
|
free_init_pages("initrd memory", start, end);
|
|
}
|
|
#endif
|
|
+
|
|
+int __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
|
|
+ int flags)
|
|
+{
|
|
+ return reserve_bootmem(phys, len, flags);
|
|
+}
|
|
--- head-2011-03-11.orig/arch/x86/mm/init_64-xen.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/mm/init_64-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -21,6 +21,7 @@
|
|
#include <linux/swap.h>
|
|
#include <linux/smp.h>
|
|
#include <linux/init.h>
|
|
+#include <linux/initrd.h>
|
|
#include <linux/pagemap.h>
|
|
#include <linux/bootmem.h>
|
|
#include <linux/proc_fs.h>
|
|
@@ -52,6 +53,14 @@
|
|
|
|
#include <xen/features.h>
|
|
|
|
+/*
|
|
+ * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
|
|
+ * The direct mapping extends to max_pfn_mapped, so that we can directly access
|
|
+ * apertures, ACPI and other tables without having to play with fixmaps.
|
|
+ */
|
|
+unsigned long max_low_pfn_mapped;
|
|
+unsigned long max_pfn_mapped;
|
|
+
|
|
#if CONFIG_XEN_COMPAT <= 0x030002
|
|
unsigned int __kernel_page_user;
|
|
EXPORT_SYMBOL(__kernel_page_user);
|
|
@@ -60,7 +69,6 @@ EXPORT_SYMBOL(__kernel_page_user);
|
|
int after_bootmem;
|
|
|
|
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
|
|
-extern unsigned long start_pfn;
|
|
|
|
extern pmd_t level2_fixmap_pgt[PTRS_PER_PMD];
|
|
extern pte_t level1_fixmap_pgt[PTRS_PER_PTE];
|
|
@@ -118,7 +126,7 @@ void __meminit early_make_page_readonly(
|
|
}
|
|
|
|
#ifndef CONFIG_XEN
|
|
-int direct_gbpages __meminitdata
|
|
+int direct_gbpages
|
|
#ifdef CONFIG_DIRECT_GBPAGES
|
|
= 1
|
|
#endif
|
|
@@ -145,55 +153,23 @@ early_param("gbpages", parse_direct_gbpa
|
|
* around without checking the pgd every time.
|
|
*/
|
|
|
|
-void show_mem(void)
|
|
-{
|
|
- long i, total = 0, reserved = 0;
|
|
- long shared = 0, cached = 0;
|
|
- struct page *page;
|
|
- pg_data_t *pgdat;
|
|
-
|
|
- printk(KERN_INFO "Mem-info:\n");
|
|
- show_free_areas();
|
|
- for_each_online_pgdat(pgdat) {
|
|
- for (i = 0; i < pgdat->node_spanned_pages; ++i) {
|
|
- /*
|
|
- * This loop can take a while with 256 GB and
|
|
- * 4k pages so defer the NMI watchdog:
|
|
- */
|
|
- if (unlikely(i % MAX_ORDER_NR_PAGES == 0))
|
|
- touch_nmi_watchdog();
|
|
-
|
|
- if (!pfn_valid(pgdat->node_start_pfn + i))
|
|
- continue;
|
|
-
|
|
- page = pfn_to_page(pgdat->node_start_pfn + i);
|
|
- total++;
|
|
- if (PageReserved(page))
|
|
- reserved++;
|
|
- else if (PageSwapCache(page))
|
|
- cached++;
|
|
- else if (page_count(page))
|
|
- shared += page_count(page) - 1;
|
|
- }
|
|
- }
|
|
- printk(KERN_INFO "%lu pages of RAM\n", total);
|
|
- printk(KERN_INFO "%lu reserved pages\n", reserved);
|
|
- printk(KERN_INFO "%lu pages shared\n", shared);
|
|
- printk(KERN_INFO "%lu pages swap cached\n", cached);
|
|
-}
|
|
-
|
|
static unsigned long __meminitdata table_start;
|
|
-static unsigned long __meminitdata table_end;
|
|
+static unsigned long __meminitdata table_cur;
|
|
+static unsigned long __meminitdata table_top;
|
|
|
|
-static __init void *spp_getpage(void)
|
|
+/*
|
|
+ * NOTE: This function is marked __ref because it calls __init function
|
|
+ * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0.
|
|
+ */
|
|
+static __ref void *spp_getpage(void)
|
|
{
|
|
void *ptr;
|
|
|
|
if (after_bootmem)
|
|
ptr = (void *) get_zeroed_page(GFP_ATOMIC);
|
|
- else if (start_pfn < table_end) {
|
|
- ptr = __va(start_pfn << PAGE_SHIFT);
|
|
- start_pfn++;
|
|
+ else if (table_cur < table_top) {
|
|
+ ptr = __va(table_cur << PAGE_SHIFT);
|
|
+ table_cur++;
|
|
memset(ptr, 0, PAGE_SIZE);
|
|
} else
|
|
ptr = alloc_bootmem_pages(PAGE_SIZE);
|
|
@@ -208,30 +184,18 @@ static __init void *spp_getpage(void)
|
|
return ptr;
|
|
}
|
|
|
|
-#define pgd_offset_u(address) (__user_pgd(init_level4_pgt) + pgd_index(address))
|
|
-#define pud_offset_u(address) (level3_user_pgt + pud_index(address))
|
|
-
|
|
-static __init void
|
|
-set_pte_phys(unsigned long vaddr, unsigned long phys, pgprot_t prot, int user_mode)
|
|
+void
|
|
+set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte)
|
|
{
|
|
- pgd_t *pgd;
|
|
pud_t *pud;
|
|
pmd_t *pmd;
|
|
- pte_t *pte, new_pte;
|
|
-
|
|
- pr_debug("set_pte_phys %lx to %lx\n", vaddr, phys);
|
|
+ pte_t *pte;
|
|
|
|
- pgd = (user_mode ? pgd_offset_u(vaddr) : pgd_offset_k(vaddr));
|
|
- if (pgd_none(*pgd)) {
|
|
- printk(KERN_ERR
|
|
- "PGD FIXMAP MISSING, it should be setup in head.S!\n");
|
|
- return;
|
|
- }
|
|
- pud = (user_mode ? pud_offset_u(vaddr) : pud_offset(pgd, vaddr));
|
|
+ pud = pud_page + pud_index(vaddr);
|
|
if (pud_none(*pud)) {
|
|
pmd = (pmd_t *) spp_getpage();
|
|
make_page_readonly(pmd, XENFEAT_writable_page_tables);
|
|
- set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
|
|
+ pud_populate(&init_mm, pud, pmd);
|
|
if (pmd != pmd_offset(pud, 0)) {
|
|
printk(KERN_ERR "PAGETABLE BUG #01! %p <-> %p\n",
|
|
pmd, pmd_offset(pud, 0));
|
|
@@ -242,19 +206,20 @@ set_pte_phys(unsigned long vaddr, unsign
|
|
if (pmd_none(*pmd)) {
|
|
pte = (pte_t *) spp_getpage();
|
|
make_page_readonly(pte, XENFEAT_writable_page_tables);
|
|
- set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
|
|
+ pmd_populate_kernel(&init_mm, pmd, pte);
|
|
if (pte != pte_offset_kernel(pmd, 0)) {
|
|
printk(KERN_ERR "PAGETABLE BUG #02!\n");
|
|
return;
|
|
}
|
|
}
|
|
- if (pgprot_val(prot))
|
|
- new_pte = pfn_pte(phys >> PAGE_SHIFT, prot);
|
|
- else
|
|
- new_pte = __pte(0);
|
|
|
|
pte = pte_offset_kernel(pmd, vaddr);
|
|
if (!pte_none(*pte) && __pte_val(new_pte) &&
|
|
+#ifdef CONFIG_ACPI
|
|
+ /* __acpi_map_table() fails to properly call clear_fixmap() */
|
|
+ (vaddr < __fix_to_virt(FIX_ACPI_END) ||
|
|
+ vaddr > __fix_to_virt(FIX_ACPI_BEGIN)) &&
|
|
+#endif
|
|
__pte_val(*pte) != (__pte_val(new_pte) & __supported_pte_mask))
|
|
pte_ERROR(*pte);
|
|
set_pte(pte, new_pte);
|
|
@@ -266,15 +231,13 @@ set_pte_phys(unsigned long vaddr, unsign
|
|
__flush_tlb_one(vaddr);
|
|
}
|
|
|
|
-static __init void
|
|
-set_pte_phys_ma(unsigned long vaddr, unsigned long phys, pgprot_t prot)
|
|
+void
|
|
+set_pte_vaddr(unsigned long vaddr, pte_t pteval)
|
|
{
|
|
pgd_t *pgd;
|
|
- pud_t *pud;
|
|
- pmd_t *pmd;
|
|
- pte_t *pte, new_pte;
|
|
+ pud_t *pud_page;
|
|
|
|
- pr_debug("set_pte_phys_ma %lx to %lx\n", vaddr, phys);
|
|
+ pr_debug("set_pte_vaddr %lx to %lx\n", vaddr, __pte_val(pteval));
|
|
|
|
pgd = pgd_offset_k(vaddr);
|
|
if (pgd_none(*pgd)) {
|
|
@@ -282,47 +245,51 @@ set_pte_phys_ma(unsigned long vaddr, uns
|
|
"PGD FIXMAP MISSING, it should be setup in head.S!\n");
|
|
return;
|
|
}
|
|
- pud = pud_offset(pgd, vaddr);
|
|
- if (pud_none(*pud)) {
|
|
- pmd = (pmd_t *) spp_getpage();
|
|
- make_page_readonly(pmd, XENFEAT_writable_page_tables);
|
|
- set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | _PAGE_USER));
|
|
- if (pmd != pmd_offset(pud, 0)) {
|
|
- printk(KERN_ERR "PAGETABLE BUG #01! %p <-> %p\n",
|
|
- pmd, pmd_offset(pud, 0));
|
|
+ pud_page = (pud_t*)pgd_page_vaddr(*pgd);
|
|
+ set_pte_vaddr_pud(pud_page, vaddr, pteval);
|
|
+}
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+/*
|
|
+ * Create large page table mappings for a range of physical addresses.
|
|
+ */
|
|
+static void __init __init_extra_mapping(unsigned long phys, unsigned long size,
|
|
+ pgprot_t prot)
|
|
+{
|
|
+ pgd_t *pgd;
|
|
+ pud_t *pud;
|
|
+ pmd_t *pmd;
|
|
+
|
|
+ BUG_ON((phys & ~PMD_MASK) || (size & ~PMD_MASK));
|
|
+ for (; size; phys += PMD_SIZE, size -= PMD_SIZE) {
|
|
+ pgd = pgd_offset_k((unsigned long)__va(phys));
|
|
+ if (pgd_none(*pgd)) {
|
|
+ pud = (pud_t *) spp_getpage();
|
|
+ set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE |
|
|
+ _PAGE_USER));
|
|
}
|
|
- }
|
|
- pmd = pmd_offset(pud, vaddr);
|
|
- if (pmd_none(*pmd)) {
|
|
- pte = (pte_t *) spp_getpage();
|
|
- make_page_readonly(pte, XENFEAT_writable_page_tables);
|
|
- set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE | _PAGE_USER));
|
|
- if (pte != pte_offset_kernel(pmd, 0)) {
|
|
- printk(KERN_ERR "PAGETABLE BUG #02!\n");
|
|
- return;
|
|
+ pud = pud_offset(pgd, (unsigned long)__va(phys));
|
|
+ if (pud_none(*pud)) {
|
|
+ pmd = (pmd_t *) spp_getpage();
|
|
+ set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE |
|
|
+ _PAGE_USER));
|
|
}
|
|
+ pmd = pmd_offset(pud, phys);
|
|
+ BUG_ON(!pmd_none(*pmd));
|
|
+ set_pmd(pmd, __pmd(phys | pgprot_val(prot)));
|
|
}
|
|
- new_pte = pfn_pte_ma(phys >> PAGE_SHIFT, prot);
|
|
+}
|
|
|
|
- pte = pte_offset_kernel(pmd, vaddr);
|
|
- if (!pte_none(*pte) && __pte_val(new_pte) &&
|
|
-#ifdef CONFIG_ACPI
|
|
- /* __acpi_map_table() fails to properly call clear_fixmap() */
|
|
- (vaddr < __fix_to_virt(FIX_ACPI_END) ||
|
|
- vaddr > __fix_to_virt(FIX_ACPI_BEGIN)) &&
|
|
-#endif
|
|
- __pte_val(*pte) != (__pte_val(new_pte) & __supported_pte_mask))
|
|
- pte_ERROR(*pte);
|
|
- set_pte(pte, new_pte);
|
|
+void __init init_extra_mapping_wb(unsigned long phys, unsigned long size)
|
|
+{
|
|
+ __init_extra_mapping(phys, size, PAGE_KERNEL_LARGE);
|
|
+}
|
|
|
|
- /*
|
|
- * It's enough to flush this one mapping.
|
|
- * (PGE mappings get flushed as well)
|
|
- */
|
|
- __flush_tlb_one(vaddr);
|
|
+void __init init_extra_mapping_uc(unsigned long phys, unsigned long size)
|
|
+{
|
|
+ __init_extra_mapping(phys, size, PAGE_KERNEL_LARGE_NOCACHE);
|
|
}
|
|
|
|
-#ifndef CONFIG_XEN
|
|
/*
|
|
* The head.S code sets up the kernel high mapping:
|
|
*
|
|
@@ -352,63 +319,52 @@ void __init cleanup_highmap(void)
|
|
}
|
|
#endif
|
|
|
|
-/* NOTE: this is meant to be run only at boot */
|
|
-void __init __set_fixmap(enum fixed_addresses idx, unsigned long phys, pgprot_t prot)
|
|
+static __ref void *alloc_low_page(unsigned long *phys)
|
|
{
|
|
- unsigned long address = __fix_to_virt(idx);
|
|
-
|
|
- if (idx >= __end_of_fixed_addresses) {
|
|
- printk(KERN_ERR "Invalid __set_fixmap\n");
|
|
- return;
|
|
- }
|
|
- switch (idx) {
|
|
- case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE:
|
|
- set_pte_phys(address, phys, prot, 0);
|
|
- set_pte_phys(address, phys, prot, 1);
|
|
- break;
|
|
- case FIX_EARLYCON_MEM_BASE:
|
|
- xen_l1_entry_update(level1_fixmap_pgt + pte_index(address),
|
|
- pfn_pte_ma(phys >> PAGE_SHIFT, prot));
|
|
- break;
|
|
- default:
|
|
- set_pte_phys_ma(address, phys, prot);
|
|
- break;
|
|
- }
|
|
-}
|
|
-
|
|
-static __meminit void *alloc_static_page(unsigned long *phys)
|
|
-{
|
|
- unsigned long va = (start_pfn << PAGE_SHIFT) + __START_KERNEL_map;
|
|
+ unsigned long pfn;
|
|
+ void *adr;
|
|
|
|
if (after_bootmem) {
|
|
- void *adr = (void *)get_zeroed_page(GFP_ATOMIC);
|
|
+ adr = (void *)get_zeroed_page(GFP_ATOMIC);
|
|
*phys = __pa(adr);
|
|
|
|
return adr;
|
|
}
|
|
|
|
- *phys = start_pfn << PAGE_SHIFT;
|
|
- start_pfn++;
|
|
- memset((void *)va, 0, PAGE_SIZE);
|
|
- return (void *)va;
|
|
+ BUG_ON(!table_cur);
|
|
+ pfn = table_cur++;
|
|
+ if (pfn >= table_top)
|
|
+ panic("alloc_low_page: ran out of memory");
|
|
+
|
|
+ adr = early_ioremap(pfn_to_mfn(pfn) * PAGE_SIZE, PAGE_SIZE);
|
|
+ memset(adr, 0, PAGE_SIZE);
|
|
+ *phys = pfn * PAGE_SIZE;
|
|
+ return adr;
|
|
}
|
|
|
|
-#define PTE_SIZE PAGE_SIZE
|
|
+static __ref void unmap_low_page(void *adr)
|
|
+{
|
|
+ if (after_bootmem)
|
|
+ return;
|
|
+
|
|
+ early_iounmap(adr, PAGE_SIZE);
|
|
+}
|
|
|
|
static inline int __meminit make_readonly(unsigned long paddr)
|
|
{
|
|
extern char __vsyscall_0;
|
|
int readonly = 0;
|
|
|
|
- /* Make new page tables read-only. */
|
|
+ /* Make new page tables read-only on the first pass. */
|
|
if (!xen_feature(XENFEAT_writable_page_tables)
|
|
+ && !max_pfn_mapped
|
|
&& (paddr >= (table_start << PAGE_SHIFT))
|
|
- && (paddr < (table_end << PAGE_SHIFT)))
|
|
+ && (paddr < (table_top << PAGE_SHIFT)))
|
|
readonly = 1;
|
|
/* Make old page tables read-only. */
|
|
if (!xen_feature(XENFEAT_writable_page_tables)
|
|
&& (paddr >= (xen_start_info->pt_base - __START_KERNEL_map))
|
|
- && (paddr < (start_pfn << PAGE_SHIFT)))
|
|
+ && (paddr < (table_cur << PAGE_SHIFT)))
|
|
readonly = 1;
|
|
|
|
/*
|
|
@@ -425,118 +381,131 @@ static inline int __meminit make_readonl
|
|
return readonly;
|
|
}
|
|
|
|
-#ifndef CONFIG_XEN
|
|
-/* Must run before zap_low_mappings */
|
|
-__meminit void *early_ioremap(unsigned long addr, unsigned long size)
|
|
+static unsigned long __meminit
|
|
+phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end)
|
|
{
|
|
- pmd_t *pmd, *last_pmd;
|
|
- unsigned long vaddr;
|
|
- int i, pmds;
|
|
-
|
|
- pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
|
|
- vaddr = __START_KERNEL_map;
|
|
- pmd = level2_kernel_pgt;
|
|
- last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1;
|
|
-
|
|
- for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) {
|
|
- for (i = 0; i < pmds; i++) {
|
|
- if (pmd_present(pmd[i]))
|
|
- goto continue_outer_loop;
|
|
- }
|
|
- vaddr += addr & ~PMD_MASK;
|
|
- addr &= PMD_MASK;
|
|
+ unsigned pages = 0;
|
|
+ unsigned long last_map_addr = end;
|
|
+ int i;
|
|
+
|
|
+ pte_t *pte = pte_page + pte_index(addr);
|
|
+
|
|
+ for(i = pte_index(addr); i < PTRS_PER_PTE; i++, addr += PAGE_SIZE, pte++) {
|
|
+ unsigned long pteval = addr | __PAGE_KERNEL;
|
|
+
|
|
+ if (addr >= end ||
|
|
+ (!after_bootmem &&
|
|
+ (addr >> PAGE_SHIFT) >= xen_start_info->nr_pages))
|
|
+ break;
|
|
+
|
|
+ if (__pte_val(*pte))
|
|
+ continue;
|
|
|
|
- for (i = 0; i < pmds; i++, addr += PMD_SIZE)
|
|
- set_pmd(pmd+i, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
|
|
- __flush_tlb_all();
|
|
-
|
|
- return (void *)vaddr;
|
|
-continue_outer_loop:
|
|
- ;
|
|
+ if (make_readonly(addr))
|
|
+ pteval &= ~_PAGE_RW;
|
|
+ if (0)
|
|
+ printk(" pte=%p addr=%lx pte=%016lx\n",
|
|
+ pte, addr, pteval);
|
|
+ if (!after_bootmem)
|
|
+ *pte = __pte(pteval & __supported_pte_mask);
|
|
+ else
|
|
+ set_pte(pte, __pte(pteval & __supported_pte_mask));
|
|
+ last_map_addr = (addr & PAGE_MASK) + PAGE_SIZE;
|
|
+ pages++;
|
|
}
|
|
- printk("early_ioremap(0x%lx, %lu) failed\n", addr, size);
|
|
- return NULL;
|
|
+ update_page_count(PG_LEVEL_4K, pages);
|
|
+
|
|
+ return last_map_addr;
|
|
}
|
|
|
|
-/*
|
|
- * To avoid virtual aliases later:
|
|
- */
|
|
-__meminit void early_iounmap(void *addr, unsigned long size)
|
|
+static unsigned long __meminit
|
|
+phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end)
|
|
{
|
|
- unsigned long vaddr;
|
|
- pmd_t *pmd;
|
|
- int i, pmds;
|
|
+ pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd);
|
|
|
|
- vaddr = (unsigned long)addr;
|
|
- pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
|
|
- pmd = level2_kernel_pgt + pmd_index(vaddr);
|
|
-
|
|
- for (i = 0; i < pmds; i++)
|
|
- pmd_clear(pmd + i);
|
|
-
|
|
- __flush_tlb_all();
|
|
+ BUG_ON(!max_pfn_mapped);
|
|
+ return phys_pte_init(pte, address, end);
|
|
}
|
|
-#endif
|
|
|
|
static unsigned long __meminit
|
|
-phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
|
|
+phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end,
|
|
+ unsigned long page_size_mask)
|
|
{
|
|
+ unsigned long pages = 0;
|
|
+ unsigned long last_map_addr = end;
|
|
+ unsigned long start = address;
|
|
+
|
|
int i = pmd_index(address);
|
|
|
|
- for (; i < PTRS_PER_PMD; i++) {
|
|
+ for (; i < PTRS_PER_PMD; i++, address += PMD_SIZE) {
|
|
unsigned long pte_phys;
|
|
- pmd_t *pmd = pmd_page + i;
|
|
- pte_t *pte, *pte_save;
|
|
- int k;
|
|
+ pmd_t *pmd = pmd_page + pmd_index(address);
|
|
+ pte_t *pte;
|
|
|
|
if (address >= end)
|
|
break;
|
|
|
|
if (__pmd_val(*pmd)) {
|
|
- address += PMD_SIZE;
|
|
+ if (!pmd_large(*pmd)) {
|
|
+ spin_lock(&init_mm.page_table_lock);
|
|
+ last_map_addr = phys_pte_update(pmd, address,
|
|
+ end);
|
|
+ spin_unlock(&init_mm.page_table_lock);
|
|
+ }
|
|
+ /* Count entries we're using from level2_ident_pgt */
|
|
+ if (start == 0)
|
|
+ pages++;
|
|
continue;
|
|
}
|
|
|
|
- pte = alloc_static_page(&pte_phys);
|
|
- pte_save = pte;
|
|
- for (k = 0; k < PTRS_PER_PTE; pte++, k++, address += PTE_SIZE) {
|
|
- unsigned long pteval = address | _PAGE_NX | _KERNPG_TABLE;
|
|
-
|
|
- if (address >= end ||
|
|
- (!after_bootmem &&
|
|
- (address >> PAGE_SHIFT) >= xen_start_info->nr_pages))
|
|
- pteval = 0;
|
|
- else if (make_readonly(address))
|
|
- pteval &= ~_PAGE_RW;
|
|
- set_pte(pte, __pte(pteval & __supported_pte_mask));
|
|
+ if (page_size_mask & (1<<PG_LEVEL_2M)) {
|
|
+ pages++;
|
|
+ spin_lock(&init_mm.page_table_lock);
|
|
+ set_pte((pte_t *)pmd,
|
|
+ pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
|
|
+ spin_unlock(&init_mm.page_table_lock);
|
|
+ last_map_addr = (address & PMD_MASK) + PMD_SIZE;
|
|
+ continue;
|
|
}
|
|
+
|
|
+ pte = alloc_low_page(&pte_phys);
|
|
+ last_map_addr = phys_pte_init(pte, address, end);
|
|
+ unmap_low_page(pte);
|
|
+
|
|
if (!after_bootmem) {
|
|
- early_make_page_readonly(pte_save, XENFEAT_writable_page_tables);
|
|
- *pmd = __pmd(pte_phys | _KERNPG_TABLE);
|
|
+ if (max_pfn_mapped)
|
|
+ make_page_readonly(__va(pte_phys),
|
|
+ XENFEAT_writable_page_tables);
|
|
+ *pmd = __pmd(pte_phys | _PAGE_TABLE);
|
|
} else {
|
|
- make_page_readonly(pte_save, XENFEAT_writable_page_tables);
|
|
- set_pmd(pmd, __pmd(pte_phys | _KERNPG_TABLE));
|
|
+ make_page_readonly(pte, XENFEAT_writable_page_tables);
|
|
+ spin_lock(&init_mm.page_table_lock);
|
|
+ pmd_populate_kernel(&init_mm, pmd, __va(pte_phys));
|
|
+ spin_unlock(&init_mm.page_table_lock);
|
|
}
|
|
}
|
|
- return address;
|
|
+ update_page_count(PG_LEVEL_2M, pages);
|
|
+ return last_map_addr;
|
|
}
|
|
|
|
static unsigned long __meminit
|
|
-phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
|
|
+phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end,
|
|
+ unsigned long page_size_mask)
|
|
{
|
|
pmd_t *pmd = pmd_offset(pud, 0);
|
|
unsigned long last_map_addr;
|
|
|
|
- spin_lock(&init_mm.page_table_lock);
|
|
- last_map_addr = phys_pmd_init(pmd, address, end);
|
|
- spin_unlock(&init_mm.page_table_lock);
|
|
+ BUG_ON(!max_pfn_mapped);
|
|
+ last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask);
|
|
__flush_tlb_all();
|
|
return last_map_addr;
|
|
}
|
|
|
|
static unsigned long __meminit
|
|
-phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
|
|
+phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end,
|
|
+ unsigned long page_size_mask)
|
|
{
|
|
+ unsigned long pages = 0;
|
|
unsigned long last_map_addr = end;
|
|
int i = pud_index(addr);
|
|
|
|
@@ -550,29 +519,55 @@ phys_pud_init(pud_t *pud_page, unsigned
|
|
|
|
if (__pud_val(*pud)) {
|
|
if (!pud_large(*pud))
|
|
- last_map_addr = phys_pmd_update(pud, addr, end);
|
|
+ last_map_addr = phys_pmd_update(pud, addr, end,
|
|
+ page_size_mask);
|
|
continue;
|
|
}
|
|
|
|
- if (direct_gbpages) {
|
|
+ if (page_size_mask & (1<<PG_LEVEL_1G)) {
|
|
+ pages++;
|
|
+ spin_lock(&init_mm.page_table_lock);
|
|
set_pte((pte_t *)pud,
|
|
pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE));
|
|
+ spin_unlock(&init_mm.page_table_lock);
|
|
last_map_addr = (addr & PUD_MASK) + PUD_SIZE;
|
|
continue;
|
|
}
|
|
|
|
- pmd = alloc_static_page(&pmd_phys);
|
|
+ pmd = alloc_low_page(&pmd_phys);
|
|
+ last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask);
|
|
+ unmap_low_page(pmd);
|
|
|
|
- spin_lock(&init_mm.page_table_lock);
|
|
- *pud = __pud(pmd_phys | _KERNPG_TABLE);
|
|
- last_map_addr = phys_pmd_init(pmd, addr, end);
|
|
- spin_unlock(&init_mm.page_table_lock);
|
|
-
|
|
- early_make_page_readonly(pmd, XENFEAT_writable_page_tables);
|
|
+ if (!after_bootmem) {
|
|
+ if (max_pfn_mapped)
|
|
+ make_page_readonly(__va(pmd_phys),
|
|
+ XENFEAT_writable_page_tables);
|
|
+ if (page_size_mask & (1 << PG_LEVEL_NUM))
|
|
+ xen_l3_entry_update(pud, __pud(pmd_phys | _PAGE_TABLE));
|
|
+ else
|
|
+ *pud = __pud(pmd_phys | _PAGE_TABLE);
|
|
+ } else {
|
|
+ make_page_readonly(pmd, XENFEAT_writable_page_tables);
|
|
+ spin_lock(&init_mm.page_table_lock);
|
|
+ pud_populate(&init_mm, pud, __va(pmd_phys));
|
|
+ spin_unlock(&init_mm.page_table_lock);
|
|
+ }
|
|
}
|
|
__flush_tlb_all();
|
|
+ update_page_count(PG_LEVEL_1G, pages);
|
|
|
|
- return last_map_addr >> PAGE_SHIFT;
|
|
+ return last_map_addr;
|
|
+}
|
|
+
|
|
+static unsigned long __meminit
|
|
+phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end,
|
|
+ unsigned long page_size_mask)
|
|
+{
|
|
+ pud_t *pud;
|
|
+
|
|
+ pud = (pud_t *)pgd_page_vaddr(*pgd);
|
|
+
|
|
+ return phys_pud_init(pud, addr, end, page_size_mask | (1 << PG_LEVEL_NUM));
|
|
}
|
|
|
|
void __init xen_init_pt(void)
|
|
@@ -656,112 +651,36 @@ void __init xen_init_pt(void)
|
|
}
|
|
}
|
|
|
|
-static void __init extend_init_mapping(unsigned long tables_space)
|
|
-{
|
|
- unsigned long va = __START_KERNEL_map;
|
|
- unsigned long start = start_pfn;
|
|
- unsigned long phys, addr, *pte_page;
|
|
- pmd_t *pmd;
|
|
- pte_t *pte, new_pte;
|
|
- unsigned long *page = (unsigned long *)init_level4_pgt;
|
|
-
|
|
- addr = page[pgd_index(va)];
|
|
- addr_to_page(addr, page);
|
|
- addr = page[pud_index(va)];
|
|
- addr_to_page(addr, page);
|
|
-
|
|
- /* Kill mapping of low 1MB. */
|
|
- while (va < (unsigned long)&_text) {
|
|
- if (HYPERVISOR_update_va_mapping(va, __pte_ma(0), 0))
|
|
- BUG();
|
|
- va += PAGE_SIZE;
|
|
- }
|
|
-
|
|
- /* Ensure init mappings cover kernel text/data and initial tables. */
|
|
- while (va < (__START_KERNEL_map
|
|
- + (start_pfn << PAGE_SHIFT)
|
|
- + tables_space)) {
|
|
- if (!(pmd_index(va) | pte_index(va))) {
|
|
- pud_t *pud;
|
|
-
|
|
- page = (unsigned long *)init_level4_pgt;
|
|
- addr = page[pgd_index(va)];
|
|
- addr_to_page(addr, page);
|
|
- pud = (pud_t *)&page[pud_index(va)];
|
|
- if (pud_none(*pud)) {
|
|
- page = alloc_static_page(&phys);
|
|
- early_make_page_readonly(
|
|
- page, XENFEAT_writable_page_tables);
|
|
- set_pud(pud, __pud(phys | _KERNPG_TABLE));
|
|
- } else {
|
|
- addr = page[pud_index(va)];
|
|
- addr_to_page(addr, page);
|
|
- }
|
|
- }
|
|
- pmd = (pmd_t *)&page[pmd_index(va)];
|
|
- if (pmd_none(*pmd)) {
|
|
- pte_page = alloc_static_page(&phys);
|
|
- early_make_page_readonly(
|
|
- pte_page, XENFEAT_writable_page_tables);
|
|
- set_pmd(pmd, __pmd(phys | _KERNPG_TABLE));
|
|
- } else {
|
|
- addr = page[pmd_index(va)];
|
|
- addr_to_page(addr, pte_page);
|
|
- }
|
|
- pte = (pte_t *)&pte_page[pte_index(va)];
|
|
- if (pte_none(*pte)) {
|
|
- new_pte = pfn_pte(
|
|
- (va - __START_KERNEL_map) >> PAGE_SHIFT,
|
|
- __pgprot(_KERNPG_TABLE));
|
|
- xen_l1_entry_update(pte, new_pte);
|
|
- }
|
|
- va += PAGE_SIZE;
|
|
- }
|
|
-
|
|
- /* Finally, blow away any spurious initial mappings. */
|
|
- while (1) {
|
|
- if (!(pmd_index(va) | pte_index(va))) {
|
|
- page = (unsigned long *)init_level4_pgt;
|
|
- addr = page[pgd_index(va)];
|
|
- addr_to_page(addr, page);
|
|
- if (pud_none(((pud_t *)page)[pud_index(va)]))
|
|
- break;
|
|
- addr = page[pud_index(va)];
|
|
- addr_to_page(addr, page);
|
|
- }
|
|
- pmd = (pmd_t *)&page[pmd_index(va)];
|
|
- if (pmd_none(*pmd))
|
|
- break;
|
|
- if (HYPERVISOR_update_va_mapping(va, __pte_ma(0), 0))
|
|
- BUG();
|
|
- va += PAGE_SIZE;
|
|
- }
|
|
-
|
|
- if (start_pfn > start)
|
|
- reserve_early(start << PAGE_SHIFT,
|
|
- start_pfn << PAGE_SHIFT, "INITMAP");
|
|
-}
|
|
-
|
|
static void __init find_early_table_space(unsigned long end)
|
|
{
|
|
unsigned long puds, pmds, ptes, tables;
|
|
|
|
puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
|
|
+ tables = round_up(puds * sizeof(pud_t), PAGE_SIZE);
|
|
pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
|
|
- ptes = (end + PTE_SIZE - 1) >> PAGE_SHIFT;
|
|
+ tables += round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
|
|
|
|
- tables = round_up(puds * 8, PAGE_SIZE) +
|
|
- round_up(pmds * 8, PAGE_SIZE) +
|
|
- round_up(ptes * 8, PAGE_SIZE);
|
|
+ ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
|
+ tables += round_up(ptes * sizeof(pte_t), PAGE_SIZE);
|
|
|
|
- extend_init_mapping(tables);
|
|
+ if (!table_top) {
|
|
+ table_start = (__pa(xen_start_info->pt_base) >> PAGE_SHIFT) +
|
|
+ xen_start_info->nr_pt_frames;
|
|
+ table_cur = table_start;
|
|
+ } else {
|
|
+ /*
|
|
+ * [table_start, table_top) gets passed to reserve_early(),
|
|
+ * so we must not use table_cur here, despite continuing
|
|
+ * to allocate from there. table_cur possibly being below
|
|
+ * table_start is otoh not a problem.
|
|
+ */
|
|
+ table_start = table_top;
|
|
+ }
|
|
|
|
- table_start = start_pfn;
|
|
- table_end = table_start + (tables>>PAGE_SHIFT);
|
|
+ table_top = table_cur + (tables >> PAGE_SHIFT);
|
|
|
|
- early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n",
|
|
- end, table_start << PAGE_SHIFT,
|
|
- (table_start << PAGE_SHIFT) + tables);
|
|
+ printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n",
|
|
+ end, table_cur << PAGE_SHIFT, table_top << PAGE_SHIFT);
|
|
}
|
|
|
|
static void __init xen_finish_init_mapping(void)
|
|
@@ -783,18 +702,18 @@ static void __init xen_finish_init_mappi
|
|
xen_start_info->mod_start = (unsigned long)
|
|
__va(__pa(xen_start_info->mod_start));
|
|
|
|
- /* Destroy the Xen-created mappings beyond the kernel image as
|
|
- * well as the temporary mappings created above. Prevents
|
|
- * overlap with modules area (if init mapping is very big).
|
|
- */
|
|
+ /* Destroy the Xen-created mappings beyond the kernel image. */
|
|
start = PAGE_ALIGN((unsigned long)_end);
|
|
- end = __START_KERNEL_map + (table_end << PAGE_SHIFT);
|
|
+ end = __START_KERNEL_map + (table_start << PAGE_SHIFT);
|
|
for (; start < end; start += PAGE_SIZE)
|
|
if (HYPERVISOR_update_va_mapping(start, __pte_ma(0), 0))
|
|
BUG();
|
|
|
|
- /* Allocate pte's for initial fixmaps from 'start_pfn' allocator. */
|
|
- table_end = ~0UL;
|
|
+ /* Allocate pte's for initial fixmaps from 'table_cur' allocator. */
|
|
+ start = table_top;
|
|
+ WARN(table_cur != start, "start=%lx cur=%lx top=%lx\n",
|
|
+ table_start, table_cur, start);
|
|
+ table_top = ~0UL;
|
|
|
|
/* Switch to the real shared_info page, and clear the dummy page. */
|
|
set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
|
|
@@ -811,8 +730,7 @@ static void __init xen_finish_init_mappi
|
|
<< PAGE_SHIFT,
|
|
PAGE_KERNEL_RO);
|
|
|
|
- /* Disable the 'start_pfn' allocator. */
|
|
- table_end = start_pfn;
|
|
+ table_top = max(table_cur, start);
|
|
}
|
|
|
|
static void __init init_gbpages(void)
|
|
@@ -825,126 +743,91 @@ static void __init init_gbpages(void)
|
|
#endif
|
|
}
|
|
|
|
-#ifdef CONFIG_MEMTEST_BOOTPARAM
|
|
-
|
|
-static void __init memtest(unsigned long start_phys, unsigned long size,
|
|
- unsigned pattern)
|
|
+static unsigned long __init kernel_physical_mapping_init(unsigned long start,
|
|
+ unsigned long end,
|
|
+ unsigned long page_size_mask)
|
|
{
|
|
- unsigned long i;
|
|
- unsigned long *start;
|
|
- unsigned long start_bad;
|
|
- unsigned long last_bad;
|
|
- unsigned long val;
|
|
- unsigned long start_phys_aligned;
|
|
- unsigned long count;
|
|
- unsigned long incr;
|
|
-
|
|
- switch (pattern) {
|
|
- case 0:
|
|
- val = 0UL;
|
|
- break;
|
|
- case 1:
|
|
- val = -1UL;
|
|
- break;
|
|
- case 2:
|
|
- val = 0x5555555555555555UL;
|
|
- break;
|
|
- case 3:
|
|
- val = 0xaaaaaaaaaaaaaaaaUL;
|
|
- break;
|
|
- default:
|
|
- return;
|
|
- }
|
|
|
|
- incr = sizeof(unsigned long);
|
|
- start_phys_aligned = ALIGN(start_phys, incr);
|
|
- count = (size - (start_phys_aligned - start_phys))/incr;
|
|
- start = __va(start_phys_aligned);
|
|
- start_bad = 0;
|
|
- last_bad = 0;
|
|
-
|
|
- for (i = 0; i < count; i++)
|
|
- start[i] = val;
|
|
- for (i = 0; i < count; i++, start++, start_phys_aligned += incr) {
|
|
- if (*start != val) {
|
|
- if (start_phys_aligned == last_bad + incr) {
|
|
- last_bad += incr;
|
|
- } else {
|
|
- if (start_bad) {
|
|
- printk(KERN_CONT "\n %016lx bad mem addr %016lx - %016lx reserved",
|
|
- val, start_bad, last_bad + incr);
|
|
- reserve_early(start_bad, last_bad - start_bad, "BAD RAM");
|
|
- }
|
|
- start_bad = last_bad = start_phys_aligned;
|
|
- }
|
|
- }
|
|
- }
|
|
- if (start_bad) {
|
|
- printk(KERN_CONT "\n %016lx bad mem addr %016lx - %016lx reserved",
|
|
- val, start_bad, last_bad + incr);
|
|
- reserve_early(start_bad, last_bad - start_bad, "BAD RAM");
|
|
- }
|
|
+ unsigned long next, last_map_addr = end;
|
|
|
|
-}
|
|
+ start = (unsigned long)__va(start);
|
|
+ end = (unsigned long)__va(end);
|
|
|
|
-static int memtest_pattern __initdata = CONFIG_MEMTEST_BOOTPARAM_VALUE;
|
|
+ for (; start < end; start = next) {
|
|
+ pgd_t *pgd = pgd_offset_k(start);
|
|
+ unsigned long pud_phys;
|
|
+ pud_t *pud;
|
|
|
|
-static int __init parse_memtest(char *arg)
|
|
-{
|
|
- if (arg)
|
|
- memtest_pattern = simple_strtoul(arg, NULL, 0);
|
|
- return 0;
|
|
-}
|
|
+ next = (start + PGDIR_SIZE) & PGDIR_MASK;
|
|
+ if (next > end)
|
|
+ next = end;
|
|
|
|
-early_param("memtest", parse_memtest);
|
|
+ if (__pgd_val(*pgd)) {
|
|
+ last_map_addr = phys_pud_update(pgd, __pa(start),
|
|
+ __pa(end), page_size_mask);
|
|
+ continue;
|
|
+ }
|
|
|
|
-static void __init early_memtest(unsigned long start, unsigned long end)
|
|
-{
|
|
- u64 t_start, t_size;
|
|
- unsigned pattern;
|
|
+ pud = alloc_low_page(&pud_phys);
|
|
+ last_map_addr = phys_pud_init(pud, __pa(start), __pa(next),
|
|
+ page_size_mask);
|
|
+ unmap_low_page(pud);
|
|
|
|
- if (!memtest_pattern)
|
|
- return;
|
|
+ if (!after_bootmem) {
|
|
+ if (max_pfn_mapped)
|
|
+ make_page_readonly(__va(pud_phys),
|
|
+ XENFEAT_writable_page_tables);
|
|
+ xen_l4_entry_update(pgd, __pgd(pud_phys | _PAGE_TABLE));
|
|
+ } else {
|
|
+ make_page_readonly(pud, XENFEAT_writable_page_tables);
|
|
+ spin_lock(&init_mm.page_table_lock);
|
|
+ pgd_populate(&init_mm, pgd, __va(pud_phys));
|
|
+ spin_unlock(&init_mm.page_table_lock);
|
|
+ }
|
|
+ }
|
|
|
|
- printk(KERN_INFO "early_memtest: pattern num %d", memtest_pattern);
|
|
- for (pattern = 0; pattern < memtest_pattern; pattern++) {
|
|
- t_start = start;
|
|
- t_size = 0;
|
|
- while (t_start < end) {
|
|
- t_start = find_e820_area_size(t_start, &t_size, 1);
|
|
+ return last_map_addr;
|
|
+}
|
|
|
|
- /* done ? */
|
|
- if (t_start >= end)
|
|
- break;
|
|
- if (t_start + t_size > end)
|
|
- t_size = end - t_start;
|
|
+struct map_range {
|
|
+ unsigned long start;
|
|
+ unsigned long end;
|
|
+ unsigned page_size_mask;
|
|
+};
|
|
|
|
- printk(KERN_CONT "\n %016llx - %016llx pattern %d",
|
|
- (unsigned long long)t_start,
|
|
- (unsigned long long)t_start + t_size, pattern);
|
|
+#define NR_RANGE_MR 5
|
|
|
|
- memtest(t_start, t_size, pattern);
|
|
+static int save_mr(struct map_range *mr, int nr_range,
|
|
+ unsigned long start_pfn, unsigned long end_pfn,
|
|
+ unsigned long page_size_mask)
|
|
+{
|
|
|
|
- t_start += t_size;
|
|
- }
|
|
+ if (start_pfn < end_pfn) {
|
|
+ if (nr_range >= NR_RANGE_MR)
|
|
+ panic("run out of range for init_memory_mapping\n");
|
|
+ mr[nr_range].start = start_pfn<<PAGE_SHIFT;
|
|
+ mr[nr_range].end = end_pfn<<PAGE_SHIFT;
|
|
+ mr[nr_range].page_size_mask = page_size_mask;
|
|
+ nr_range++;
|
|
}
|
|
- printk(KERN_CONT "\n");
|
|
-}
|
|
-#else
|
|
-static void __init early_memtest(unsigned long start, unsigned long end)
|
|
-{
|
|
+
|
|
+ return nr_range;
|
|
}
|
|
-#endif
|
|
|
|
/*
|
|
* Setup the direct mapping of the physical memory at PAGE_OFFSET.
|
|
* This runs before bootmem is initialized and gets pages directly from
|
|
* the physical memory. To access them they are temporarily mapped.
|
|
*/
|
|
-unsigned long __init_refok init_memory_mapping(unsigned long start, unsigned long end)
|
|
+unsigned long __init_refok init_memory_mapping(unsigned long start,
|
|
+ unsigned long end)
|
|
{
|
|
- unsigned long next, last_map_addr = end;
|
|
- unsigned long start_phys = start, end_phys = end;
|
|
+ unsigned long last_map_addr = 0;
|
|
+ unsigned long page_size_mask = 0;
|
|
+ unsigned long start_pfn, end_pfn;
|
|
+
|
|
+ struct map_range mr[NR_RANGE_MR];
|
|
+ int nr_range, i;
|
|
|
|
printk(KERN_INFO "init_memory_mapping\n");
|
|
|
|
@@ -955,51 +838,150 @@ unsigned long __init_refok init_memory_m
|
|
* memory mapped. Unfortunately this is done currently before the
|
|
* nodes are discovered.
|
|
*/
|
|
- if (!after_bootmem) {
|
|
+ if (!after_bootmem)
|
|
init_gbpages();
|
|
- find_early_table_space(end);
|
|
+
|
|
+ if (direct_gbpages)
|
|
+ page_size_mask |= 1 << PG_LEVEL_1G;
|
|
+ if (cpu_has_pse)
|
|
+ page_size_mask |= 1 << PG_LEVEL_2M;
|
|
+
|
|
+ memset(mr, 0, sizeof(mr));
|
|
+ nr_range = 0;
|
|
+
|
|
+ /* head if not big page alignment ?*/
|
|
+ start_pfn = start >> PAGE_SHIFT;
|
|
+ end_pfn = ((start + (PMD_SIZE - 1)) >> PMD_SHIFT)
|
|
+ << (PMD_SHIFT - PAGE_SHIFT);
|
|
+ nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
|
|
+
|
|
+ /* big page (2M) range*/
|
|
+ start_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT)
|
|
+ << (PMD_SHIFT - PAGE_SHIFT);
|
|
+ end_pfn = ((start + (PUD_SIZE - 1))>>PUD_SHIFT)
|
|
+ << (PUD_SHIFT - PAGE_SHIFT);
|
|
+ if (end_pfn > ((end>>PUD_SHIFT)<<(PUD_SHIFT - PAGE_SHIFT)))
|
|
+ end_pfn = ((end>>PUD_SHIFT)<<(PUD_SHIFT - PAGE_SHIFT));
|
|
+ nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
|
|
+ page_size_mask & (1<<PG_LEVEL_2M));
|
|
+
|
|
+ /* big page (1G) range */
|
|
+ start_pfn = end_pfn;
|
|
+ end_pfn = (end>>PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT);
|
|
+ nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
|
|
+ page_size_mask &
|
|
+ ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G)));
|
|
+
|
|
+ /* tail is not big page (1G) alignment */
|
|
+ start_pfn = end_pfn;
|
|
+ end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
|
|
+ nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
|
|
+ page_size_mask & (1<<PG_LEVEL_2M));
|
|
+
|
|
+ /* tail is not big page (2M) alignment */
|
|
+ start_pfn = end_pfn;
|
|
+ end_pfn = end>>PAGE_SHIFT;
|
|
+ nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
|
|
+
|
|
+ /* try to merge same page size and continuous */
|
|
+ for (i = 0; nr_range > 1 && i < nr_range - 1; i++) {
|
|
+ unsigned long old_start;
|
|
+ if (mr[i].end != mr[i+1].start ||
|
|
+ mr[i].page_size_mask != mr[i+1].page_size_mask)
|
|
+ continue;
|
|
+ /* move it */
|
|
+ old_start = mr[i].start;
|
|
+ memmove(&mr[i], &mr[i+1],
|
|
+ (nr_range - 1 - i) * sizeof (struct map_range));
|
|
+ mr[i--].start = old_start;
|
|
+ nr_range--;
|
|
}
|
|
|
|
- start = (unsigned long)__va(start);
|
|
- end = (unsigned long)__va(end);
|
|
+ for (i = 0; i < nr_range; i++)
|
|
+ printk(KERN_DEBUG " %010lx - %010lx page %s\n",
|
|
+ mr[i].start, mr[i].end,
|
|
+ (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":(
|
|
+ (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k"));
|
|
|
|
- for (; start < end; start = next) {
|
|
- pgd_t *pgd = pgd_offset_k(start);
|
|
- unsigned long pud_phys;
|
|
- pud_t *pud;
|
|
+ if (!after_bootmem)
|
|
+ find_early_table_space(end);
|
|
|
|
- if (after_bootmem)
|
|
- pud = pud_offset(pgd, start & PGDIR_MASK);
|
|
- else
|
|
- pud = alloc_static_page(&pud_phys);
|
|
- next = start + PGDIR_SIZE;
|
|
- if (next > end)
|
|
- next = end;
|
|
- last_map_addr = phys_pud_init(pud, __pa(start), __pa(next));
|
|
- if (!after_bootmem) {
|
|
- early_make_page_readonly(pud, XENFEAT_writable_page_tables);
|
|
- set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys));
|
|
+ if (!start) {
|
|
+ unsigned long addr, va = __START_KERNEL_map;
|
|
+ unsigned long *page = (unsigned long *)init_level4_pgt;
|
|
+
|
|
+ /* Kill mapping of memory below _text. */
|
|
+ while (va < (unsigned long)&_text) {
|
|
+ if (HYPERVISOR_update_va_mapping(va, __pte_ma(0), 0))
|
|
+ BUG();
|
|
+ va += PAGE_SIZE;
|
|
+ }
|
|
+
|
|
+ /* Blow away any spurious initial mappings. */
|
|
+ va = __START_KERNEL_map + (table_start << PAGE_SHIFT);
|
|
+ addr = page[pgd_index(va)];
|
|
+ addr_to_page(addr, page);
|
|
+ addr = page[pud_index(va)];
|
|
+ addr_to_page(addr, page);
|
|
+ while (pmd_index(va) | pte_index(va)) {
|
|
+ if (pmd_none(*(pmd_t *)&page[pmd_index(va)]))
|
|
+ break;
|
|
+ if (HYPERVISOR_update_va_mapping(va, __pte_ma(0), 0))
|
|
+ BUG();
|
|
+ va += PAGE_SIZE;
|
|
}
|
|
}
|
|
|
|
- if (!after_bootmem) {
|
|
- BUG_ON(start_pfn != table_end);
|
|
+ for (i = 0; i < nr_range; i++)
|
|
+ last_map_addr = kernel_physical_mapping_init(
|
|
+ mr[i].start, mr[i].end,
|
|
+ mr[i].page_size_mask);
|
|
+
|
|
+ BUG_ON(table_cur > table_top);
|
|
+ if (!start)
|
|
xen_finish_init_mapping();
|
|
- }
|
|
+ else if (table_cur < table_top)
|
|
+ /* Disable the 'table_cur' allocator. */
|
|
+ table_top = table_cur;
|
|
|
|
__flush_tlb_all();
|
|
|
|
- if (!after_bootmem)
|
|
+ if (!after_bootmem && table_top > table_start)
|
|
reserve_early(table_start << PAGE_SHIFT,
|
|
- table_end << PAGE_SHIFT, "PGTABLE");
|
|
+ table_top << PAGE_SHIFT, "PGTABLE");
|
|
+
|
|
+ printk(KERN_INFO "last_map_addr: %lx end: %lx\n",
|
|
+ last_map_addr, end);
|
|
|
|
if (!after_bootmem)
|
|
- early_memtest(start_phys, end_phys);
|
|
+ early_memtest(start, end);
|
|
|
|
- return last_map_addr;
|
|
+ return last_map_addr >> PAGE_SHIFT;
|
|
}
|
|
|
|
#ifndef CONFIG_NUMA
|
|
+void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn)
|
|
+{
|
|
+ unsigned long bootmap_size, bootmap;
|
|
+
|
|
+ e820_register_active_regions(0, start_pfn, end_pfn);
|
|
+#ifdef CONFIG_XEN
|
|
+ if (end_pfn > xen_start_info->nr_pages)
|
|
+ end_pfn = xen_start_info->nr_pages;
|
|
+#endif
|
|
+ bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
|
|
+ bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size,
|
|
+ PAGE_SIZE);
|
|
+ if (bootmap == -1L)
|
|
+ panic("Cannot find bootmem map of size %ld\n", bootmap_size);
|
|
+ /* don't touch min_low_pfn */
|
|
+ bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT,
|
|
+ 0, end_pfn);
|
|
+ free_bootmem_with_active_regions(0, end_pfn);
|
|
+ early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT);
|
|
+ reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT);
|
|
+}
|
|
+
|
|
void __init paging_init(void)
|
|
{
|
|
unsigned long max_zone_pfns[MAX_NR_ZONES];
|
|
@@ -1007,9 +989,9 @@ void __init paging_init(void)
|
|
memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
|
|
max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
|
|
max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
|
|
- max_zone_pfns[ZONE_NORMAL] = end_pfn;
|
|
+ max_zone_pfns[ZONE_NORMAL] = max_pfn;
|
|
|
|
- memory_present(0, 0, end_pfn);
|
|
+ memory_present(0, 0, max_pfn);
|
|
sparse_init();
|
|
free_area_init_nodes(max_zone_pfns);
|
|
|
|
@@ -1099,8 +1081,8 @@ void __init mem_init(void)
|
|
ClearPageReserved(pfn_to_page(pfn));
|
|
init_page_count(pfn_to_page(pfn));
|
|
}
|
|
- reservedpages = end_pfn - totalram_pages -
|
|
- absent_pages_in_range(0, end_pfn);
|
|
+ reservedpages = max_pfn - totalram_pages -
|
|
+ absent_pages_in_range(0, max_pfn);
|
|
after_bootmem = 1;
|
|
|
|
codesize = (unsigned long) &_etext - (unsigned long) &_text;
|
|
@@ -1119,7 +1101,7 @@ void __init mem_init(void)
|
|
printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, "
|
|
"%ldk reserved, %ldk data, %ldk init)\n",
|
|
(unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
|
|
- end_pfn << (PAGE_SHIFT-10),
|
|
+ max_pfn << (PAGE_SHIFT-10),
|
|
codesize >> 10,
|
|
reservedpages << (PAGE_SHIFT-10),
|
|
datasize >> 10,
|
|
@@ -1182,6 +1164,13 @@ EXPORT_SYMBOL_GPL(rodata_test_data);
|
|
void mark_rodata_ro(void)
|
|
{
|
|
unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata);
|
|
+ unsigned long rodata_start =
|
|
+ ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK;
|
|
+
|
|
+#ifdef CONFIG_DYNAMIC_FTRACE
|
|
+ /* Dynamic tracing modifies the kernel text section */
|
|
+ start = rodata_start;
|
|
+#endif
|
|
|
|
printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
|
|
(end - start) >> 10);
|
|
@@ -1191,8 +1180,7 @@ void mark_rodata_ro(void)
|
|
* The rodata section (but not the kernel text!) should also be
|
|
* not-executable.
|
|
*/
|
|
- start = ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK;
|
|
- set_memory_nx(start, (end - start) >> PAGE_SHIFT);
|
|
+ set_memory_nx(rodata_start, (end - rodata_start) >> PAGE_SHIFT);
|
|
|
|
rodata_test();
|
|
|
|
@@ -1214,24 +1202,26 @@ void free_initrd_mem(unsigned long start
|
|
}
|
|
#endif
|
|
|
|
-void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
|
|
+int __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
|
|
+ int flags)
|
|
{
|
|
#ifdef CONFIG_NUMA
|
|
int nid, next_nid;
|
|
+ int ret;
|
|
#endif
|
|
unsigned long pfn = phys >> PAGE_SHIFT;
|
|
|
|
- if (pfn >= end_pfn) {
|
|
+ if (pfn >= max_pfn) {
|
|
/*
|
|
* This can happen with kdump kernels when accessing
|
|
* firmware tables:
|
|
*/
|
|
if (pfn < max_pfn_mapped)
|
|
- return;
|
|
+ return -EFAULT;
|
|
|
|
- printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n",
|
|
+ printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %lu\n",
|
|
phys, len);
|
|
- return;
|
|
+ return -EFAULT;
|
|
}
|
|
|
|
/* Should check here against the e820 map to avoid double free */
|
|
@@ -1239,9 +1229,13 @@ void __init reserve_bootmem_generic(unsi
|
|
nid = phys_to_nid(phys);
|
|
next_nid = phys_to_nid(phys + len - 1);
|
|
if (nid == next_nid)
|
|
- reserve_bootmem_node(NODE_DATA(nid), phys, len, BOOTMEM_DEFAULT);
|
|
+ ret = reserve_bootmem_node(NODE_DATA(nid), phys, len, flags);
|
|
else
|
|
- reserve_bootmem(phys, len, BOOTMEM_DEFAULT);
|
|
+ ret = reserve_bootmem(phys, len, flags);
|
|
+
|
|
+ if (ret != 0)
|
|
+ return ret;
|
|
+
|
|
#else
|
|
reserve_bootmem(phys, len, BOOTMEM_DEFAULT);
|
|
#endif
|
|
@@ -1252,6 +1246,8 @@ void __init reserve_bootmem_generic(unsi
|
|
set_dma_reserve(dma_reserve);
|
|
}
|
|
#endif
|
|
+
|
|
+ return 0;
|
|
}
|
|
|
|
int kern_addr_valid(unsigned long addr)
|
|
@@ -1369,7 +1365,7 @@ vmemmap_populate(struct page *start_page
|
|
pmd_t *pmd;
|
|
|
|
for (; addr < end; addr = next) {
|
|
- next = pmd_addr_end(addr, end);
|
|
+ void *p = NULL;
|
|
|
|
pgd = vmemmap_pgd_populate(addr, node);
|
|
if (!pgd)
|
|
@@ -1379,33 +1375,51 @@ vmemmap_populate(struct page *start_page
|
|
if (!pud)
|
|
return -ENOMEM;
|
|
|
|
- pmd = pmd_offset(pud, addr);
|
|
- if (pmd_none(*pmd)) {
|
|
- pte_t entry;
|
|
- void *p;
|
|
+ if (!cpu_has_pse) {
|
|
+ next = (addr + PAGE_SIZE) & PAGE_MASK;
|
|
+ pmd = vmemmap_pmd_populate(pud, addr, node);
|
|
+
|
|
+ if (!pmd)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ p = vmemmap_pte_populate(pmd, addr, node);
|
|
|
|
- p = vmemmap_alloc_block(PMD_SIZE, node);
|
|
if (!p)
|
|
return -ENOMEM;
|
|
|
|
- entry = pfn_pte(__pa(p) >> PAGE_SHIFT,
|
|
- PAGE_KERNEL_LARGE);
|
|
- set_pmd(pmd, __pmd_ma(__pte_val(entry)));
|
|
-
|
|
- /* check to see if we have contiguous blocks */
|
|
- if (p_end != p || node_start != node) {
|
|
- if (p_start)
|
|
- printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n",
|
|
- addr_start, addr_end-1, p_start, p_end-1, node_start);
|
|
- addr_start = addr;
|
|
- node_start = node;
|
|
- p_start = p;
|
|
- }
|
|
- addr_end = addr + PMD_SIZE;
|
|
- p_end = p + PMD_SIZE;
|
|
+ addr_end = addr + PAGE_SIZE;
|
|
+ p_end = p + PAGE_SIZE;
|
|
} else {
|
|
- vmemmap_verify((pte_t *)pmd, node, addr, next);
|
|
+ next = pmd_addr_end(addr, end);
|
|
+
|
|
+ pmd = pmd_offset(pud, addr);
|
|
+ if (pmd_none(*pmd)) {
|
|
+ pte_t entry;
|
|
+
|
|
+ p = vmemmap_alloc_block(PMD_SIZE, node);
|
|
+ if (!p)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ entry = pfn_pte(__pa(p) >> PAGE_SHIFT,
|
|
+ PAGE_KERNEL_LARGE);
|
|
+ set_pmd(pmd, __pmd_ma(__pte_val(entry)));
|
|
+
|
|
+ /* check to see if we have contiguous blocks */
|
|
+ if (p_end != p || node_start != node) {
|
|
+ if (p_start)
|
|
+ printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n",
|
|
+ addr_start, addr_end-1, p_start, p_end-1, node_start);
|
|
+ addr_start = addr;
|
|
+ node_start = node;
|
|
+ p_start = p;
|
|
+ }
|
|
+
|
|
+ addr_end = addr + PMD_SIZE;
|
|
+ p_end = p + PMD_SIZE;
|
|
+ } else
|
|
+ vmemmap_verify((pte_t *)pmd, node, addr, next);
|
|
}
|
|
+
|
|
}
|
|
return 0;
|
|
}
|
|
--- head-2011-03-11.orig/arch/x86/mm/ioremap-xen.c 2011-02-07 15:39:13.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/mm/ioremap-xen.c 2011-02-07 15:40:39.000000000 +0100
|
|
@@ -13,6 +13,7 @@
|
|
#include <linux/pfn.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/vmalloc.h>
|
|
+#include <linux/mmiotrace.h>
|
|
|
|
#include <asm/cacheflush.h>
|
|
#include <asm/e820.h>
|
|
@@ -255,7 +256,8 @@ int ioremap_check_change_attr(unsigned l
|
|
for (sz = rc = 0; sz < size && !rc; ++mfn, sz += PAGE_SIZE) {
|
|
unsigned long pfn = mfn_to_local_pfn(mfn);
|
|
|
|
- if (pfn >= max_pfn_mapped)
|
|
+ if (pfn >= max_low_pfn_mapped &&
|
|
+ (pfn < (1UL<<(32 - PAGE_SHIFT)) || pfn >= max_pfn_mapped))
|
|
continue;
|
|
rc = ioremap_change_attr((unsigned long)__va(pfn << PAGE_SHIFT),
|
|
PAGE_SIZE, prot_val);
|
|
@@ -278,11 +280,14 @@ static void __iomem *__ioremap_caller(re
|
|
{
|
|
unsigned long offset, vaddr;
|
|
phys_addr_t mfn, last_addr;
|
|
+ const resource_size_t unaligned_phys_addr = phys_addr;
|
|
+ const unsigned long unaligned_size = size;
|
|
struct vm_struct *area;
|
|
unsigned long new_prot_val;
|
|
pgprot_t prot;
|
|
int retval;
|
|
domid_t domid = DOMID_IO;
|
|
+ void __iomem *ret_addr;
|
|
|
|
/* Don't allow wraparound or zero size */
|
|
last_addr = phys_addr + size - 1;
|
|
@@ -299,7 +304,7 @@ static void __iomem *__ioremap_caller(re
|
|
/*
|
|
* Don't remap the low PCI/ISA area, it's always mapped..
|
|
*/
|
|
- if (is_initial_xendomain() && last_addr < ISA_END_ADDRESS)
|
|
+ if (is_initial_xendomain() && is_ISA_range(phys_addr, last_addr))
|
|
return (__force void __iomem *)isa_bus_to_virt((unsigned long)phys_addr);
|
|
|
|
/*
|
|
@@ -323,7 +328,7 @@ static void __iomem *__ioremap_caller(re
|
|
phys_addr &= PAGE_MASK;
|
|
size = PAGE_ALIGN(last_addr+1) - phys_addr;
|
|
|
|
- retval = reserve_memtype(phys_addr, phys_addr + size,
|
|
+ retval = reserve_memtype(phys_addr, (u64)phys_addr + size,
|
|
prot_val, &new_prot_val);
|
|
if (retval) {
|
|
pr_debug("Warning: reserve_memtype returned %d\n", retval);
|
|
@@ -391,7 +396,10 @@ static void __iomem *__ioremap_caller(re
|
|
return NULL;
|
|
}
|
|
|
|
- return (void __iomem *) (vaddr + offset);
|
|
+ ret_addr = (void __iomem *) (vaddr + offset);
|
|
+ mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr);
|
|
+
|
|
+ return ret_addr;
|
|
}
|
|
|
|
/**
|
|
@@ -419,7 +427,7 @@ void __iomem *ioremap_nocache(resource_s
|
|
{
|
|
/*
|
|
* Ideally, this should be:
|
|
- * pat_wc_enabled ? _PAGE_CACHE_UC : _PAGE_CACHE_UC_MINUS;
|
|
+ * pat_enabled ? _PAGE_CACHE_UC : _PAGE_CACHE_UC_MINUS;
|
|
*
|
|
* Till we fix all X drivers to use ioremap_wc(), we will use
|
|
* UC MINUS.
|
|
@@ -443,7 +451,7 @@ EXPORT_SYMBOL(ioremap_nocache);
|
|
*/
|
|
void __iomem *ioremap_wc(unsigned long phys_addr, unsigned long size)
|
|
{
|
|
- if (pat_wc_enabled)
|
|
+ if (pat_enabled)
|
|
return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WC,
|
|
__builtin_return_address(0));
|
|
else
|
|
@@ -483,6 +491,14 @@ static void __iomem *ioremap_default(res
|
|
}
|
|
#endif
|
|
|
|
+void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
|
|
+ unsigned long prot_val)
|
|
+{
|
|
+ return __ioremap_caller(phys_addr, size, (prot_val & _PAGE_CACHE_MASK),
|
|
+ __builtin_return_address(0));
|
|
+}
|
|
+EXPORT_SYMBOL(ioremap_prot);
|
|
+
|
|
/**
|
|
* iounmap - Free a IO remapping
|
|
* @addr: virtual address from ioremap_*
|
|
@@ -507,6 +523,8 @@ void iounmap(volatile void __iomem *addr
|
|
addr = (volatile void __iomem *)
|
|
(PAGE_MASK & (unsigned long __force)addr);
|
|
|
|
+ mmiotrace_iounmap(addr);
|
|
+
|
|
/* Use the vm area unlocked, assuming the caller
|
|
ensures there isn't another iounmap for the same address
|
|
in parallel. Reuse of the virtual address is prevented by
|
|
@@ -514,7 +532,7 @@ void iounmap(volatile void __iomem *addr
|
|
cpa takes care of the direct mappings. */
|
|
read_lock(&vmlist_lock);
|
|
for (p = vmlist; p; p = p->next) {
|
|
- if (p->addr == addr)
|
|
+ if (p->addr == (void __force *)addr)
|
|
break;
|
|
}
|
|
read_unlock(&vmlist_lock);
|
|
@@ -528,7 +546,7 @@ void iounmap(volatile void __iomem *addr
|
|
free_memtype(p->phys_addr, p->phys_addr + get_vm_area_size(p));
|
|
|
|
/* Finally remove it */
|
|
- o = remove_vm_area((void *)addr);
|
|
+ o = remove_vm_area((void __force *)addr);
|
|
BUG_ON(p != o || o == NULL);
|
|
kfree(p);
|
|
}
|
|
@@ -548,7 +566,7 @@ void *xlate_dev_mem_ptr(unsigned long ph
|
|
if (page_is_ram(start >> PAGE_SHIFT))
|
|
return __va(phys);
|
|
|
|
- addr = (void *)ioremap_default(start, PAGE_SIZE);
|
|
+ addr = (void __force *)ioremap_default(start, PAGE_SIZE);
|
|
if (addr)
|
|
addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK));
|
|
|
|
@@ -576,8 +594,7 @@ static int __init early_ioremap_debug_se
|
|
early_param("early_ioremap_debug", early_ioremap_debug_setup);
|
|
|
|
static __initdata int after_paging_init;
|
|
-static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)]
|
|
- __section(.bss.page_aligned);
|
|
+static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss;
|
|
|
|
#ifdef CONFIG_X86_32
|
|
static inline pmd_t * __init early_ioremap_pmd(unsigned long addr)
|
|
@@ -676,10 +693,11 @@ static void __init __early_set_fixmap(en
|
|
return;
|
|
}
|
|
pte = early_ioremap_pte(addr);
|
|
+
|
|
if (pgprot_val(flags))
|
|
set_pte(pte, pfn_pte_ma(phys >> PAGE_SHIFT, flags));
|
|
else
|
|
- pte_clear(NULL, addr, pte);
|
|
+ pte_clear(&init_mm, addr, pte);
|
|
__flush_tlb_one(addr);
|
|
}
|
|
|
|
@@ -707,13 +725,11 @@ static int __init check_early_ioremap_le
|
|
{
|
|
if (!early_ioremap_nested)
|
|
return 0;
|
|
-
|
|
- printk(KERN_WARNING
|
|
+ WARN(1, KERN_WARNING
|
|
"Debug warning: early ioremap leak of %d areas detected.\n",
|
|
- early_ioremap_nested);
|
|
+ early_ioremap_nested);
|
|
printk(KERN_WARNING
|
|
- "please boot with early_ioremap_debug and report the dmesg.\n");
|
|
- WARN_ON(1);
|
|
+ "please boot with early_ioremap_debug and report the dmesg.\n");
|
|
|
|
return 1;
|
|
}
|
|
--- head-2011-03-11.orig/arch/x86/mm/pageattr-xen.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/mm/pageattr-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -34,6 +34,47 @@ struct cpa_data {
|
|
unsigned force_split : 1;
|
|
};
|
|
|
|
+#ifdef CONFIG_PROC_FS
|
|
+static unsigned long direct_pages_count[PG_LEVEL_NUM];
|
|
+
|
|
+void update_page_count(int level, unsigned long pages)
|
|
+{
|
|
+ unsigned long flags;
|
|
+
|
|
+ /* Protect against CPA */
|
|
+ spin_lock_irqsave(&pgd_lock, flags);
|
|
+ direct_pages_count[level] += pages;
|
|
+ spin_unlock_irqrestore(&pgd_lock, flags);
|
|
+}
|
|
+
|
|
+static void split_page_count(int level)
|
|
+{
|
|
+ direct_pages_count[level]--;
|
|
+ direct_pages_count[level - 1] += PTRS_PER_PTE;
|
|
+}
|
|
+
|
|
+int arch_report_meminfo(char *page)
|
|
+{
|
|
+ int n = sprintf(page, "DirectMap4k: %8lu kB\n",
|
|
+ direct_pages_count[PG_LEVEL_4K] << 2);
|
|
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
|
|
+ n += sprintf(page + n, "DirectMap2M: %8lu kB\n",
|
|
+ direct_pages_count[PG_LEVEL_2M] << 11);
|
|
+#else
|
|
+ n += sprintf(page + n, "DirectMap4M: %8lu kB\n",
|
|
+ direct_pages_count[PG_LEVEL_2M] << 12);
|
|
+#endif
|
|
+#ifdef CONFIG_X86_64
|
|
+ if (direct_gbpages)
|
|
+ n += sprintf(page + n, "DirectMap1G: %8lu kB\n",
|
|
+ direct_pages_count[PG_LEVEL_1G] << 20);
|
|
+#endif
|
|
+ return n;
|
|
+}
|
|
+#else
|
|
+static inline void split_page_count(int level) { }
|
|
+#endif
|
|
+
|
|
#ifdef CONFIG_X86_64
|
|
|
|
static inline unsigned long highmap_start_pfn(void)
|
|
@@ -106,7 +147,7 @@ static void cpa_flush_all(unsigned long
|
|
{
|
|
BUG_ON(irqs_disabled());
|
|
|
|
- on_each_cpu(__cpa_flush_all, (void *) cache, 1, 1);
|
|
+ on_each_cpu(__cpa_flush_all, (void *) cache, 1);
|
|
}
|
|
|
|
static void __cpa_flush_range(void *arg)
|
|
@@ -127,7 +168,7 @@ static void cpa_flush_range(unsigned lon
|
|
BUG_ON(irqs_disabled());
|
|
WARN_ON(PAGE_ALIGN(start) != start);
|
|
|
|
- on_each_cpu(__cpa_flush_range, NULL, 1, 1);
|
|
+ on_each_cpu(__cpa_flush_range, NULL, 1);
|
|
|
|
if (!cache)
|
|
return;
|
|
@@ -229,6 +270,7 @@ pte_t *lookup_address(unsigned long addr
|
|
|
|
return pte_offset_kernel(pmd, address);
|
|
}
|
|
+EXPORT_SYMBOL_GPL(lookup_address);
|
|
|
|
/*
|
|
* Set the new pmd in all the pgds we know about:
|
|
@@ -509,6 +551,16 @@ static int split_large_page(pte_t *kpte,
|
|
}
|
|
#endif
|
|
|
|
+ if (address >= (unsigned long)__va(0) &&
|
|
+ address < (unsigned long)__va(max_low_pfn_mapped << PAGE_SHIFT))
|
|
+ split_page_count(level);
|
|
+
|
|
+#ifdef CONFIG_X86_64
|
|
+ if (address >= (unsigned long)__va(1UL<<32) &&
|
|
+ address < (unsigned long)__va(max_pfn_mapped << PAGE_SHIFT))
|
|
+ split_page_count(level);
|
|
+#endif
|
|
+
|
|
/*
|
|
* Get the target mfn from the original entry:
|
|
*/
|
|
@@ -566,10 +618,9 @@ repeat:
|
|
if (!__pte_val(old_pte)) {
|
|
if (!primary)
|
|
return 0;
|
|
- printk(KERN_WARNING "CPA: called for zero pte. "
|
|
+ WARN(1, KERN_WARNING "CPA: called for zero pte. "
|
|
"vaddr = %lx cpa->vaddr = %lx\n", address,
|
|
cpa->vaddr);
|
|
- WARN_ON(1);
|
|
return -EINVAL;
|
|
}
|
|
|
|
@@ -634,15 +685,24 @@ static int cpa_process_alias(struct cpa_
|
|
struct cpa_data alias_cpa;
|
|
int ret = 0;
|
|
|
|
- if (cpa->pfn > max_pfn_mapped)
|
|
+ if (cpa->pfn >= max_pfn_mapped)
|
|
return 0;
|
|
|
|
+#ifdef CONFIG_X86_64
|
|
+ if (cpa->pfn >= max_low_pfn_mapped && cpa->pfn < (1UL<<(32-PAGE_SHIFT)))
|
|
+ return 0;
|
|
+#endif
|
|
/*
|
|
* No need to redo, when the primary call touched the direct
|
|
* mapping already:
|
|
*/
|
|
- if (!within(cpa->vaddr, PAGE_OFFSET,
|
|
- PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))) {
|
|
+ if (!(within(cpa->vaddr, PAGE_OFFSET,
|
|
+ PAGE_OFFSET + (max_low_pfn_mapped << PAGE_SHIFT))
|
|
+#ifdef CONFIG_X86_64
|
|
+ || within(cpa->vaddr, PAGE_OFFSET + (1UL<<32),
|
|
+ PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))
|
|
+#endif
|
|
+ )) {
|
|
|
|
alias_cpa = *cpa;
|
|
alias_cpa.vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
|
|
@@ -796,6 +856,51 @@ static inline int change_page_attr_clear
|
|
return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0);
|
|
}
|
|
|
|
+#ifdef CONFIG_XEN
|
|
+static void _free_memtype(u64 pstart, u64 pend)
|
|
+{
|
|
+ u64 pa = pstart &= __PHYSICAL_MASK;
|
|
+ u64 ma = phys_to_machine(pa);
|
|
+
|
|
+ while ((pa += PAGE_SIZE) < pend) {
|
|
+ if (phys_to_machine(pa) != ma + (pa - pstart)) {
|
|
+ free_memtype(ma, ma + (pa - pstart));
|
|
+ pstart = pa;
|
|
+ ma = phys_to_machine(pa);
|
|
+ }
|
|
+ }
|
|
+ free_memtype(ma, ma + (pend - pstart));
|
|
+}
|
|
+#define free_memtype _free_memtype
|
|
+
|
|
+static int _reserve_memtype(u64 pstart, u64 pend, unsigned long req_type)
|
|
+{
|
|
+ u64 pcur = pstart &= __PHYSICAL_MASK, pa = pcur;
|
|
+ u64 ma = phys_to_machine(pa);
|
|
+ int rc = 0;
|
|
+
|
|
+ while ((pa += PAGE_SIZE) < pend) {
|
|
+ if (phys_to_machine(pa) != ma + (pa - pcur)) {
|
|
+ rc = reserve_memtype(ma, ma + (pa - pcur),
|
|
+ req_type, NULL);
|
|
+ if (rc)
|
|
+ break;
|
|
+ pcur = pa;
|
|
+ ma = phys_to_machine(pa);
|
|
+ }
|
|
+ }
|
|
+ if (likely(!rc))
|
|
+ rc = reserve_memtype(ma, ma + (pend - pcur), req_type, NULL);
|
|
+
|
|
+ if (unlikely(!rc) && pstart < pcur)
|
|
+ _free_memtype(pstart, pcur);
|
|
+
|
|
+ return rc;
|
|
+}
|
|
+#define reserve_memtype(s, e, r, n) \
|
|
+ _reserve_memtype(s, e, BUILD_BUG_ON_ZERO(n) ?: (r))
|
|
+#endif
|
|
+
|
|
int _set_memory_uc(unsigned long addr, int numpages)
|
|
{
|
|
/*
|
|
@@ -810,7 +915,7 @@ int set_memory_uc(unsigned long addr, in
|
|
/*
|
|
* for now UC MINUS. see comments in ioremap_nocache()
|
|
*/
|
|
- if (reserve_memtype(addr, addr + numpages * PAGE_SIZE,
|
|
+ if (reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
|
|
_PAGE_CACHE_UC_MINUS, NULL))
|
|
return -EINVAL;
|
|
|
|
@@ -826,10 +931,10 @@ int _set_memory_wc(unsigned long addr, i
|
|
|
|
int set_memory_wc(unsigned long addr, int numpages)
|
|
{
|
|
- if (!pat_wc_enabled)
|
|
+ if (!pat_enabled)
|
|
return set_memory_uc(addr, numpages);
|
|
|
|
- if (reserve_memtype(addr, addr + numpages * PAGE_SIZE,
|
|
+ if (reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
|
|
_PAGE_CACHE_WC, NULL))
|
|
return -EINVAL;
|
|
|
|
@@ -845,7 +950,7 @@ int _set_memory_wb(unsigned long addr, i
|
|
|
|
int set_memory_wb(unsigned long addr, int numpages)
|
|
{
|
|
- free_memtype(addr, addr + numpages * PAGE_SIZE);
|
|
+ free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
|
|
|
|
return _set_memory_wb(addr, numpages);
|
|
}
|
|
--- head-2011-03-11.orig/arch/x86/mm/pat-xen.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/mm/pat-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -12,6 +12,8 @@
|
|
#include <linux/gfp.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/bootmem.h>
|
|
+#include <linux/debugfs.h>
|
|
+#include <linux/seq_file.h>
|
|
|
|
#include <asm/msr.h>
|
|
#include <asm/tlbflush.h>
|
|
@@ -26,11 +28,11 @@
|
|
#include <asm/io.h>
|
|
|
|
#ifdef CONFIG_X86_PAT
|
|
-int __read_mostly pat_wc_enabled = 1;
|
|
+int __read_mostly pat_enabled = 1;
|
|
|
|
void __cpuinit pat_disable(char *reason)
|
|
{
|
|
- pat_wc_enabled = 0;
|
|
+ pat_enabled = 0;
|
|
printk(KERN_INFO "%s\n", reason);
|
|
}
|
|
|
|
@@ -42,6 +44,19 @@ static int __init nopat(char *str)
|
|
early_param("nopat", nopat);
|
|
#endif
|
|
|
|
+
|
|
+static int debug_enable;
|
|
+static int __init pat_debug_setup(char *str)
|
|
+{
|
|
+ debug_enable = 1;
|
|
+ return 0;
|
|
+}
|
|
+__setup("debugpat", pat_debug_setup);
|
|
+
|
|
+#define dprintk(fmt, arg...) \
|
|
+ do { if (debug_enable) printk(KERN_INFO fmt, ##arg); } while (0)
|
|
+
|
|
+
|
|
static u64 __read_mostly boot_pat_state;
|
|
|
|
enum {
|
|
@@ -53,24 +68,25 @@ enum {
|
|
PAT_UC_MINUS = 7, /* UC, but can be overriden by MTRR */
|
|
};
|
|
|
|
-#define PAT(x,y) ((u64)PAT_ ## y << ((x)*8))
|
|
+#define PAT(x, y) ((u64)PAT_ ## y << ((x)*8))
|
|
|
|
void pat_init(void)
|
|
{
|
|
u64 pat;
|
|
|
|
- if (!pat_wc_enabled)
|
|
+ if (!pat_enabled)
|
|
return;
|
|
|
|
/* Paranoia check. */
|
|
- if (!cpu_has_pat) {
|
|
- printk(KERN_ERR "PAT enabled, but CPU feature cleared\n");
|
|
+ if (!cpu_has_pat && boot_pat_state) {
|
|
/*
|
|
- * Panic if this happens on the secondary CPU, and we
|
|
+ * If this happens we are on a secondary CPU, but
|
|
* switched to PAT on the boot CPU. We have no way to
|
|
* undo PAT.
|
|
- */
|
|
- BUG_ON(boot_pat_state);
|
|
+ */
|
|
+ printk(KERN_ERR "PAT enabled, "
|
|
+ "but not supported by secondary CPU\n");
|
|
+ BUG();
|
|
}
|
|
|
|
#ifndef CONFIG_XEN
|
|
@@ -87,8 +103,8 @@ void pat_init(void)
|
|
* 011 UC _PAGE_CACHE_UC
|
|
* PAT bit unused
|
|
*/
|
|
- pat = PAT(0,WB) | PAT(1,WC) | PAT(2,UC_MINUS) | PAT(3,UC) |
|
|
- PAT(4,WB) | PAT(5,WC) | PAT(6,UC_MINUS) | PAT(7,UC);
|
|
+ pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) |
|
|
+ PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC);
|
|
|
|
/* Boot CPU check */
|
|
if (!boot_pat_state)
|
|
@@ -113,13 +129,13 @@ void pat_init(void)
|
|
static char *cattr_name(unsigned long flags)
|
|
{
|
|
switch (flags & _PAGE_CACHE_MASK) {
|
|
- case _PAGE_CACHE_UC: return "uncached";
|
|
- case _PAGE_CACHE_UC_MINUS: return "uncached-minus";
|
|
- case _PAGE_CACHE_WB: return "write-back";
|
|
- case _PAGE_CACHE_WC: return "write-combining";
|
|
- case _PAGE_CACHE_WP: return "write-protected";
|
|
- case _PAGE_CACHE_WT: return "write-through";
|
|
- default: return "broken";
|
|
+ case _PAGE_CACHE_UC: return "uncached";
|
|
+ case _PAGE_CACHE_UC_MINUS: return "uncached-minus";
|
|
+ case _PAGE_CACHE_WB: return "write-back";
|
|
+ case _PAGE_CACHE_WC: return "write-combining";
|
|
+ case _PAGE_CACHE_WP: return "write-protected";
|
|
+ case _PAGE_CACHE_WT: return "write-through";
|
|
+ default: return "broken";
|
|
}
|
|
}
|
|
|
|
@@ -157,49 +173,55 @@ static DEFINE_SPINLOCK(memtype_lock); /
|
|
* The intersection is based on "Effective Memory Type" tables in IA-32
|
|
* SDM vol 3a
|
|
*/
|
|
-static int pat_x_mtrr_type(u64 start, u64 end, unsigned long prot,
|
|
- unsigned long *ret_prot)
|
|
+static unsigned long pat_x_mtrr_type(u64 start, u64 end, unsigned long req_type)
|
|
{
|
|
- unsigned long pat_type;
|
|
- u8 mtrr_type;
|
|
-
|
|
- pat_type = prot & _PAGE_CACHE_MASK;
|
|
- prot &= (~_PAGE_CACHE_MASK);
|
|
-
|
|
- /*
|
|
- * We return the PAT request directly for types where PAT takes
|
|
- * precedence with respect to MTRR and for UC_MINUS.
|
|
- * Consistency checks with other PAT requests is done later
|
|
- * while going through memtype list.
|
|
- */
|
|
- if (pat_type == _PAGE_CACHE_WC) {
|
|
- *ret_prot = prot | _PAGE_CACHE_WC;
|
|
- return 0;
|
|
- } else if (pat_type == _PAGE_CACHE_UC_MINUS) {
|
|
- *ret_prot = prot | _PAGE_CACHE_UC_MINUS;
|
|
- return 0;
|
|
- } else if (pat_type == _PAGE_CACHE_UC) {
|
|
- *ret_prot = prot | _PAGE_CACHE_UC;
|
|
- return 0;
|
|
- }
|
|
-
|
|
/*
|
|
* Look for MTRR hint to get the effective type in case where PAT
|
|
* request is for WB.
|
|
*/
|
|
- mtrr_type = mtrr_type_lookup(start, end);
|
|
+ if (req_type == _PAGE_CACHE_WB) {
|
|
+ u8 mtrr_type;
|
|
|
|
- if (mtrr_type == MTRR_TYPE_UNCACHABLE) {
|
|
- *ret_prot = prot | _PAGE_CACHE_UC;
|
|
- } else if (mtrr_type == MTRR_TYPE_WRCOMB) {
|
|
- *ret_prot = prot | _PAGE_CACHE_WC;
|
|
- } else {
|
|
- *ret_prot = prot | _PAGE_CACHE_WB;
|
|
+ mtrr_type = mtrr_type_lookup(start, end);
|
|
+ if (mtrr_type == MTRR_TYPE_UNCACHABLE)
|
|
+ return _PAGE_CACHE_UC;
|
|
+ if (mtrr_type == MTRR_TYPE_WRCOMB)
|
|
+ return _PAGE_CACHE_WC;
|
|
+ }
|
|
+
|
|
+ return req_type;
|
|
+}
|
|
+
|
|
+static int chk_conflict(struct memtype *new, struct memtype *entry,
|
|
+ unsigned long *type)
|
|
+{
|
|
+ if (new->type != entry->type) {
|
|
+ if (type) {
|
|
+ new->type = entry->type;
|
|
+ *type = entry->type;
|
|
+ } else
|
|
+ goto conflict;
|
|
}
|
|
|
|
+ /* check overlaps with more than one entry in the list */
|
|
+ list_for_each_entry_continue(entry, &memtype_list, nd) {
|
|
+ if (new->end <= entry->start)
|
|
+ break;
|
|
+ else if (new->type != entry->type)
|
|
+ goto conflict;
|
|
+ }
|
|
return 0;
|
|
+
|
|
+ conflict:
|
|
+ printk(KERN_INFO "%s:%d conflicting memory types "
|
|
+ "%Lx-%Lx %s<->%s\n", current->comm, current->pid, new->start,
|
|
+ new->end, cattr_name(new->type), cattr_name(entry->type));
|
|
+ return -EBUSY;
|
|
}
|
|
|
|
+static struct memtype *cached_entry;
|
|
+static u64 cached_start;
|
|
+
|
|
/*
|
|
* req_type typically has one of the:
|
|
* - _PAGE_CACHE_WB
|
|
@@ -210,37 +232,36 @@ static int pat_x_mtrr_type(u64 start, u6
|
|
* req_type will have a special case value '-1', when requester want to inherit
|
|
* the memory type from mtrr (if WB), existing PAT, defaulting to UC_MINUS.
|
|
*
|
|
- * If ret_type is NULL, function will return an error if it cannot reserve the
|
|
- * region with req_type. If ret_type is non-null, function will return
|
|
- * available type in ret_type in case of no error. In case of any error
|
|
+ * If new_type is NULL, function will return an error if it cannot reserve the
|
|
+ * region with req_type. If new_type is non-NULL, function will return
|
|
+ * available type in new_type in case of no error. In case of any error
|
|
* it will return a negative return value.
|
|
*/
|
|
int reserve_memtype(u64 start, u64 end, unsigned long req_type,
|
|
- unsigned long *ret_type)
|
|
+ unsigned long *new_type)
|
|
{
|
|
- struct memtype *new_entry = NULL;
|
|
- struct memtype *parse;
|
|
+ struct memtype *new, *entry;
|
|
unsigned long actual_type;
|
|
+ struct list_head *where;
|
|
int err = 0;
|
|
|
|
- /* Only track when pat_wc_enabled */
|
|
- if (!pat_wc_enabled) {
|
|
+ BUG_ON(start >= end); /* end is exclusive */
|
|
+
|
|
+ if (!pat_enabled) {
|
|
/* This is identical to page table setting without PAT */
|
|
- if (ret_type) {
|
|
- if (req_type == -1) {
|
|
- *ret_type = _PAGE_CACHE_WB;
|
|
- } else {
|
|
- *ret_type = req_type;
|
|
- }
|
|
+ if (new_type) {
|
|
+ if (req_type == -1)
|
|
+ *new_type = _PAGE_CACHE_WB;
|
|
+ else
|
|
+ *new_type = req_type & _PAGE_CACHE_MASK;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/* Low ISA region is always mapped WB in page table. No need to track */
|
|
- if (start >= ISA_START_ADDRESS && (end - 1) <= ISA_END_ADDRESS) {
|
|
- if (ret_type)
|
|
- *ret_type = _PAGE_CACHE_WB;
|
|
-
|
|
+ if (is_ISA_range(start, end - 1)) {
|
|
+ if (new_type)
|
|
+ *new_type = _PAGE_CACHE_WB;
|
|
return 0;
|
|
}
|
|
|
|
@@ -253,206 +274,118 @@ int reserve_memtype(u64 start, u64 end,
|
|
*/
|
|
u8 mtrr_type = mtrr_type_lookup(start, end);
|
|
|
|
- if (mtrr_type == MTRR_TYPE_WRBACK) {
|
|
- req_type = _PAGE_CACHE_WB;
|
|
+ if (mtrr_type == MTRR_TYPE_WRBACK)
|
|
actual_type = _PAGE_CACHE_WB;
|
|
- } else {
|
|
- req_type = _PAGE_CACHE_UC_MINUS;
|
|
+ else
|
|
actual_type = _PAGE_CACHE_UC_MINUS;
|
|
- }
|
|
- } else {
|
|
- req_type &= _PAGE_CACHE_MASK;
|
|
- err = pat_x_mtrr_type(start, end, req_type, &actual_type);
|
|
- }
|
|
-
|
|
- if (err) {
|
|
- if (ret_type)
|
|
- *ret_type = actual_type;
|
|
+ } else
|
|
+ actual_type = pat_x_mtrr_type(start, end,
|
|
+ req_type & _PAGE_CACHE_MASK);
|
|
|
|
- return -EINVAL;
|
|
- }
|
|
-
|
|
- new_entry = kmalloc(sizeof(struct memtype), GFP_KERNEL);
|
|
- if (!new_entry)
|
|
+ new = kmalloc(sizeof(struct memtype), GFP_KERNEL);
|
|
+ if (!new)
|
|
return -ENOMEM;
|
|
|
|
- new_entry->start = start;
|
|
- new_entry->end = end;
|
|
- new_entry->type = actual_type;
|
|
+ new->start = start;
|
|
+ new->end = end;
|
|
+ new->type = actual_type;
|
|
|
|
- if (ret_type)
|
|
- *ret_type = actual_type;
|
|
+ if (new_type)
|
|
+ *new_type = actual_type;
|
|
|
|
spin_lock(&memtype_lock);
|
|
|
|
- /* Search for existing mapping that overlaps the current range */
|
|
- list_for_each_entry(parse, &memtype_list, nd) {
|
|
- struct memtype *saved_ptr;
|
|
+ if (cached_entry && start >= cached_start)
|
|
+ entry = cached_entry;
|
|
+ else
|
|
+ entry = list_entry(&memtype_list, struct memtype, nd);
|
|
|
|
- if (parse->start >= end) {
|
|
- pr_debug("New Entry\n");
|
|
- list_add(&new_entry->nd, parse->nd.prev);
|
|
- new_entry = NULL;
|
|
+ /* Search for existing mapping that overlaps the current range */
|
|
+ where = NULL;
|
|
+ list_for_each_entry_continue(entry, &memtype_list, nd) {
|
|
+ if (end <= entry->start) {
|
|
+ where = entry->nd.prev;
|
|
+ cached_entry = list_entry(where, struct memtype, nd);
|
|
break;
|
|
- }
|
|
-
|
|
- if (start <= parse->start && end >= parse->start) {
|
|
- if (actual_type != parse->type && ret_type) {
|
|
- actual_type = parse->type;
|
|
- *ret_type = actual_type;
|
|
- new_entry->type = actual_type;
|
|
- }
|
|
-
|
|
- if (actual_type != parse->type) {
|
|
- printk(
|
|
- KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
|
|
- current->comm, current->pid,
|
|
- start, end,
|
|
- cattr_name(actual_type),
|
|
- cattr_name(parse->type));
|
|
- err = -EBUSY;
|
|
- break;
|
|
- }
|
|
-
|
|
- saved_ptr = parse;
|
|
- /*
|
|
- * Check to see whether the request overlaps more
|
|
- * than one entry in the list
|
|
- */
|
|
- list_for_each_entry_continue(parse, &memtype_list, nd) {
|
|
- if (end <= parse->start) {
|
|
- break;
|
|
- }
|
|
-
|
|
- if (actual_type != parse->type) {
|
|
- printk(
|
|
- KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
|
|
- current->comm, current->pid,
|
|
- start, end,
|
|
- cattr_name(actual_type),
|
|
- cattr_name(parse->type));
|
|
- err = -EBUSY;
|
|
- break;
|
|
- }
|
|
- }
|
|
-
|
|
- if (err) {
|
|
- break;
|
|
+ } else if (start <= entry->start) { /* end > entry->start */
|
|
+ err = chk_conflict(new, entry, new_type);
|
|
+ if (!err) {
|
|
+ dprintk("Overlap at 0x%Lx-0x%Lx\n",
|
|
+ entry->start, entry->end);
|
|
+ where = entry->nd.prev;
|
|
+ cached_entry = list_entry(where,
|
|
+ struct memtype, nd);
|
|
}
|
|
-
|
|
- pr_debug("Overlap at 0x%Lx-0x%Lx\n",
|
|
- saved_ptr->start, saved_ptr->end);
|
|
- /* No conflict. Go ahead and add this new entry */
|
|
- list_add(&new_entry->nd, saved_ptr->nd.prev);
|
|
- new_entry = NULL;
|
|
break;
|
|
- }
|
|
-
|
|
- if (start < parse->end) {
|
|
- if (actual_type != parse->type && ret_type) {
|
|
- actual_type = parse->type;
|
|
- *ret_type = actual_type;
|
|
- new_entry->type = actual_type;
|
|
- }
|
|
-
|
|
- if (actual_type != parse->type) {
|
|
- printk(
|
|
- KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
|
|
- current->comm, current->pid,
|
|
- start, end,
|
|
- cattr_name(actual_type),
|
|
- cattr_name(parse->type));
|
|
- err = -EBUSY;
|
|
- break;
|
|
- }
|
|
-
|
|
- saved_ptr = parse;
|
|
- /*
|
|
- * Check to see whether the request overlaps more
|
|
- * than one entry in the list
|
|
- */
|
|
- list_for_each_entry_continue(parse, &memtype_list, nd) {
|
|
- if (end <= parse->start) {
|
|
- break;
|
|
- }
|
|
-
|
|
- if (actual_type != parse->type) {
|
|
- printk(
|
|
- KERN_INFO "%s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
|
|
- current->comm, current->pid,
|
|
- start, end,
|
|
- cattr_name(actual_type),
|
|
- cattr_name(parse->type));
|
|
- err = -EBUSY;
|
|
- break;
|
|
+ } else if (start < entry->end) { /* start > entry->start */
|
|
+ err = chk_conflict(new, entry, new_type);
|
|
+ if (!err) {
|
|
+ dprintk("Overlap at 0x%Lx-0x%Lx\n",
|
|
+ entry->start, entry->end);
|
|
+ cached_entry = list_entry(entry->nd.prev,
|
|
+ struct memtype, nd);
|
|
+
|
|
+ /*
|
|
+ * Move to right position in the linked
|
|
+ * list to add this new entry
|
|
+ */
|
|
+ list_for_each_entry_continue(entry,
|
|
+ &memtype_list, nd) {
|
|
+ if (start <= entry->start) {
|
|
+ where = entry->nd.prev;
|
|
+ break;
|
|
+ }
|
|
}
|
|
}
|
|
-
|
|
- if (err) {
|
|
- break;
|
|
- }
|
|
-
|
|
- pr_debug(KERN_INFO "Overlap at 0x%Lx-0x%Lx\n",
|
|
- saved_ptr->start, saved_ptr->end);
|
|
- /* No conflict. Go ahead and add this new entry */
|
|
- list_add(&new_entry->nd, &saved_ptr->nd);
|
|
- new_entry = NULL;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (err) {
|
|
- printk(KERN_INFO
|
|
- "reserve_memtype failed 0x%Lx-0x%Lx, track %s, req %s\n",
|
|
- start, end, cattr_name(new_entry->type),
|
|
- cattr_name(req_type));
|
|
- kfree(new_entry);
|
|
+ printk(KERN_INFO "reserve_memtype failed 0x%Lx-0x%Lx, "
|
|
+ "track %s, req %s\n",
|
|
+ start, end, cattr_name(new->type), cattr_name(req_type));
|
|
+ kfree(new);
|
|
spin_unlock(&memtype_lock);
|
|
return err;
|
|
}
|
|
|
|
- if (new_entry) {
|
|
- /* No conflict. Not yet added to the list. Add to the tail */
|
|
- list_add_tail(&new_entry->nd, &memtype_list);
|
|
- pr_debug("New Entry\n");
|
|
- }
|
|
+ cached_start = start;
|
|
|
|
- if (ret_type) {
|
|
- pr_debug(
|
|
- "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n",
|
|
- start, end, cattr_name(actual_type),
|
|
- cattr_name(req_type), cattr_name(*ret_type));
|
|
- } else {
|
|
- pr_debug(
|
|
- "reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s\n",
|
|
- start, end, cattr_name(actual_type),
|
|
- cattr_name(req_type));
|
|
- }
|
|
+ if (where)
|
|
+ list_add(&new->nd, where);
|
|
+ else
|
|
+ list_add_tail(&new->nd, &memtype_list);
|
|
|
|
spin_unlock(&memtype_lock);
|
|
+
|
|
+ dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n",
|
|
+ start, end, cattr_name(new->type), cattr_name(req_type),
|
|
+ new_type ? cattr_name(*new_type) : "-");
|
|
+
|
|
return err;
|
|
}
|
|
|
|
int free_memtype(u64 start, u64 end)
|
|
{
|
|
- struct memtype *ml;
|
|
+ struct memtype *entry;
|
|
int err = -EINVAL;
|
|
|
|
- /* Only track when pat_wc_enabled */
|
|
- if (!pat_wc_enabled) {
|
|
+ if (!pat_enabled)
|
|
return 0;
|
|
- }
|
|
|
|
/* Low ISA region is always mapped WB. No need to track */
|
|
- if (start >= ISA_START_ADDRESS && end <= ISA_END_ADDRESS) {
|
|
+ if (is_ISA_range(start, end - 1))
|
|
return 0;
|
|
- }
|
|
|
|
spin_lock(&memtype_lock);
|
|
- list_for_each_entry(ml, &memtype_list, nd) {
|
|
- if (ml->start == start && ml->end == end) {
|
|
- list_del(&ml->nd);
|
|
- kfree(ml);
|
|
+ list_for_each_entry(entry, &memtype_list, nd) {
|
|
+ if (entry->start == start && entry->end == end) {
|
|
+ if (cached_entry == entry || cached_start == start)
|
|
+ cached_entry = NULL;
|
|
+
|
|
+ list_del(&entry->nd);
|
|
+ kfree(entry);
|
|
err = 0;
|
|
break;
|
|
}
|
|
@@ -464,27 +397,19 @@ int free_memtype(u64 start, u64 end)
|
|
current->comm, current->pid, start, end);
|
|
}
|
|
|
|
- pr_debug("free_memtype request 0x%Lx-0x%Lx\n", start, end);
|
|
+ dprintk("free_memtype request 0x%Lx-0x%Lx\n", start, end);
|
|
return err;
|
|
}
|
|
|
|
|
|
-/*
|
|
- * /dev/mem mmap interface. The memtype used for mapping varies:
|
|
- * - Use UC for mappings with O_SYNC flag
|
|
- * - Without O_SYNC flag, if there is any conflict in reserve_memtype,
|
|
- * inherit the memtype from existing mapping.
|
|
- * - Else use UC_MINUS memtype (for backward compatibility with existing
|
|
- * X drivers.
|
|
- */
|
|
pgprot_t phys_mem_access_prot(struct file *file, unsigned long mfn,
|
|
unsigned long size, pgprot_t vma_prot)
|
|
{
|
|
return vma_prot;
|
|
}
|
|
|
|
-#ifdef CONFIG_NONPROMISC_DEVMEM
|
|
-/* This check is done in drivers/char/mem.c in case of NONPROMISC_DEVMEM*/
|
|
+#ifdef CONFIG_STRICT_DEVMEM
|
|
+/* This check is done in drivers/char/mem.c in case of STRICT_DEVMEM*/
|
|
static inline int range_is_allowed(unsigned long mfn, unsigned long size)
|
|
{
|
|
return 1;
|
|
@@ -508,20 +433,20 @@ static inline int range_is_allowed(unsig
|
|
}
|
|
return 1;
|
|
}
|
|
-#endif /* CONFIG_NONPROMISC_DEVMEM */
|
|
+#endif /* CONFIG_STRICT_DEVMEM */
|
|
|
|
int phys_mem_access_prot_allowed(struct file *file, unsigned long mfn,
|
|
unsigned long size, pgprot_t *vma_prot)
|
|
{
|
|
u64 addr = (u64)mfn << PAGE_SHIFT;
|
|
- unsigned long flags = _PAGE_CACHE_UC_MINUS;
|
|
+ unsigned long flags = -1;
|
|
int retval;
|
|
|
|
if (!range_is_allowed(mfn, size))
|
|
return 0;
|
|
|
|
if (file->f_flags & O_SYNC) {
|
|
- flags = _PAGE_CACHE_UC;
|
|
+ flags = _PAGE_CACHE_UC_MINUS;
|
|
}
|
|
|
|
#ifndef CONFIG_X86_32
|
|
@@ -534,25 +459,26 @@ int phys_mem_access_prot_allowed(struct
|
|
* caching for the high addresses through the KEN pin, but
|
|
* we maintain the tradition of paranoia in this code.
|
|
*/
|
|
- if (!pat_wc_enabled &&
|
|
- ! ( test_bit(X86_FEATURE_MTRR, boot_cpu_data.x86_capability) ||
|
|
- test_bit(X86_FEATURE_K6_MTRR, boot_cpu_data.x86_capability) ||
|
|
- test_bit(X86_FEATURE_CYRIX_ARR, boot_cpu_data.x86_capability) ||
|
|
- test_bit(X86_FEATURE_CENTAUR_MCR, boot_cpu_data.x86_capability)) &&
|
|
- (pfn << PAGE_SHIFT) >= __pa(high_memory)) {
|
|
+ if (!pat_enabled &&
|
|
+ !(boot_cpu_has(X86_FEATURE_MTRR) ||
|
|
+ boot_cpu_has(X86_FEATURE_K6_MTRR) ||
|
|
+ boot_cpu_has(X86_FEATURE_CYRIX_ARR) ||
|
|
+ boot_cpu_has(X86_FEATURE_CENTAUR_MCR)) &&
|
|
+ (pfn << PAGE_SHIFT) >= __pa(high_memory)) {
|
|
flags = _PAGE_CACHE_UC;
|
|
}
|
|
#endif
|
|
#endif
|
|
|
|
/*
|
|
- * With O_SYNC, we can only take UC mapping. Fail if we cannot.
|
|
+ * With O_SYNC, we can only take UC_MINUS mapping. Fail if we cannot.
|
|
+ *
|
|
* Without O_SYNC, we want to get
|
|
* - WB for WB-able memory and no other conflicting mappings
|
|
* - UC_MINUS for non-WB-able memory with no other conflicting mappings
|
|
* - Inherit from confliting mappings otherwise
|
|
*/
|
|
- if (flags != _PAGE_CACHE_UC_MINUS) {
|
|
+ if (flags != -1) {
|
|
retval = reserve_memtype(addr, addr + size, flags, NULL);
|
|
} else {
|
|
retval = reserve_memtype(addr, addr + size, -1, &flags);
|
|
@@ -600,3 +526,88 @@ void unmap_devmem(unsigned long mfn, uns
|
|
free_memtype(addr, addr + size);
|
|
}
|
|
|
|
+#if defined(CONFIG_DEBUG_FS)
|
|
+
|
|
+/* get Nth element of the linked list */
|
|
+static struct memtype *memtype_get_idx(loff_t pos)
|
|
+{
|
|
+ struct memtype *list_node, *print_entry;
|
|
+ int i = 1;
|
|
+
|
|
+ print_entry = kmalloc(sizeof(struct memtype), GFP_KERNEL);
|
|
+ if (!print_entry)
|
|
+ return NULL;
|
|
+
|
|
+ spin_lock(&memtype_lock);
|
|
+ list_for_each_entry(list_node, &memtype_list, nd) {
|
|
+ if (pos == i) {
|
|
+ *print_entry = *list_node;
|
|
+ spin_unlock(&memtype_lock);
|
|
+ return print_entry;
|
|
+ }
|
|
+ ++i;
|
|
+ }
|
|
+ spin_unlock(&memtype_lock);
|
|
+ kfree(print_entry);
|
|
+ return NULL;
|
|
+}
|
|
+
|
|
+static void *memtype_seq_start(struct seq_file *seq, loff_t *pos)
|
|
+{
|
|
+ if (*pos == 0) {
|
|
+ ++*pos;
|
|
+ seq_printf(seq, "PAT memtype list:\n");
|
|
+ }
|
|
+
|
|
+ return memtype_get_idx(*pos);
|
|
+}
|
|
+
|
|
+static void *memtype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
|
|
+{
|
|
+ ++*pos;
|
|
+ return memtype_get_idx(*pos);
|
|
+}
|
|
+
|
|
+static void memtype_seq_stop(struct seq_file *seq, void *v)
|
|
+{
|
|
+}
|
|
+
|
|
+static int memtype_seq_show(struct seq_file *seq, void *v)
|
|
+{
|
|
+ struct memtype *print_entry = (struct memtype *)v;
|
|
+
|
|
+ seq_printf(seq, "%s @ 0x%Lx-0x%Lx\n", cattr_name(print_entry->type),
|
|
+ print_entry->start, print_entry->end);
|
|
+ kfree(print_entry);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static struct seq_operations memtype_seq_ops = {
|
|
+ .start = memtype_seq_start,
|
|
+ .next = memtype_seq_next,
|
|
+ .stop = memtype_seq_stop,
|
|
+ .show = memtype_seq_show,
|
|
+};
|
|
+
|
|
+static int memtype_seq_open(struct inode *inode, struct file *file)
|
|
+{
|
|
+ return seq_open(file, &memtype_seq_ops);
|
|
+}
|
|
+
|
|
+static const struct file_operations memtype_fops = {
|
|
+ .open = memtype_seq_open,
|
|
+ .read = seq_read,
|
|
+ .llseek = seq_lseek,
|
|
+ .release = seq_release,
|
|
+};
|
|
+
|
|
+static int __init pat_memtype_list_init(void)
|
|
+{
|
|
+ debugfs_create_file("pat_memtype_list", S_IRUSR, arch_debugfs_dir,
|
|
+ NULL, &memtype_fops);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+late_initcall(pat_memtype_list_init);
|
|
+
|
|
+#endif /* CONFIG_DEBUG_FS */
|
|
--- head-2011-03-11.orig/arch/x86/mm/pgtable-xen.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/mm/pgtable-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -4,6 +4,7 @@
|
|
#include <asm/pgalloc.h>
|
|
#include <asm/pgtable.h>
|
|
#include <asm/tlb.h>
|
|
+#include <asm/fixmap.h>
|
|
#include <asm/hypervisor.h>
|
|
#include <asm/mmu_context.h>
|
|
|
|
@@ -410,15 +411,9 @@ static inline void pgd_list_del(pgd_t *p
|
|
static void pgd_ctor(void *p)
|
|
{
|
|
pgd_t *pgd = p;
|
|
- unsigned long flags;
|
|
|
|
pgd_test_and_unpin(pgd);
|
|
|
|
- /* Clear usermode parts of PGD */
|
|
- memset(pgd, 0, KERNEL_PGD_BOUNDARY*sizeof(pgd_t));
|
|
-
|
|
- spin_lock_irqsave(&pgd_lock, flags);
|
|
-
|
|
/* If the pgd points to a shared pagetable level (either the
|
|
ptes in non-PAE, or shared PMD in PAE), then just copy the
|
|
references from swapper_pg_dir. */
|
|
@@ -440,13 +435,9 @@ static void pgd_ctor(void *p)
|
|
__pgd(__pa_symbol(level3_user_pgt) | _PAGE_TABLE);
|
|
#endif
|
|
|
|
-#ifndef CONFIG_X86_PAE
|
|
/* list required to sync kernel mapping updates */
|
|
if (!SHARED_KERNEL_PMD)
|
|
pgd_list_add(pgd);
|
|
-#endif
|
|
-
|
|
- spin_unlock_irqrestore(&pgd_lock, flags);
|
|
}
|
|
|
|
static void pgd_dtor(void *pgd)
|
|
@@ -475,33 +466,6 @@ static void pgd_dtor(void *pgd)
|
|
|
|
#ifdef CONFIG_X86_PAE
|
|
/*
|
|
- * Mop up any pmd pages which may still be attached to the pgd.
|
|
- * Normally they will be freed by munmap/exit_mmap, but any pmd we
|
|
- * preallocate which never got a corresponding vma will need to be
|
|
- * freed manually.
|
|
- */
|
|
-static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
|
|
-{
|
|
- int i;
|
|
-
|
|
- for(i = 0; i < UNSHARED_PTRS_PER_PGD; i++) {
|
|
- pgd_t pgd = pgdp[i];
|
|
-
|
|
- if (__pgd_val(pgd) != 0) {
|
|
- pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
|
|
-
|
|
- pgdp[i] = xen_make_pgd(0);
|
|
-
|
|
- paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT);
|
|
- pmd_free(mm, pmd);
|
|
- }
|
|
- }
|
|
-
|
|
- if (!xen_feature(XENFEAT_pae_pgdir_above_4gb))
|
|
- xen_destroy_contiguous_region((unsigned long)pgdp, 0);
|
|
-}
|
|
-
|
|
-/*
|
|
* In PAE mode, we need to do a cr3 reload (=tlb flush) when
|
|
* updating the top-level pagetable entries to guarantee the
|
|
* processor notices the update. Since this is expensive, and
|
|
@@ -512,61 +476,7 @@ static void pgd_mop_up_pmds(struct mm_st
|
|
* not shared between pagetables (!SHARED_KERNEL_PMDS), we allocate
|
|
* and initialize the kernel pmds here.
|
|
*/
|
|
-static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
|
|
-{
|
|
- pud_t *pud;
|
|
- pmd_t *pmds[UNSHARED_PTRS_PER_PGD];
|
|
- unsigned long addr, flags;
|
|
- int i;
|
|
-
|
|
- /*
|
|
- * We can race save/restore (if we sleep during a GFP_KERNEL memory
|
|
- * allocation). We therefore store virtual addresses of pmds as they
|
|
- * do not change across save/restore, and poke the machine addresses
|
|
- * into the pgdir under the pgd_lock.
|
|
- */
|
|
- for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD; i++, addr += PUD_SIZE) {
|
|
- pmds[i] = pmd_alloc_one(mm, addr);
|
|
- if (!pmds[i])
|
|
- goto out_oom;
|
|
- }
|
|
-
|
|
- spin_lock_irqsave(&pgd_lock, flags);
|
|
-
|
|
- /* Protect against save/restore: move below 4GB under pgd_lock. */
|
|
- if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)
|
|
- && xen_create_contiguous_region((unsigned long)pgd, 0, 32)) {
|
|
- spin_unlock_irqrestore(&pgd_lock, flags);
|
|
-out_oom:
|
|
- while (i--)
|
|
- pmd_free(mm, pmds[i]);
|
|
- return 0;
|
|
- }
|
|
-
|
|
- /* Copy kernel pmd contents and write-protect the new pmds. */
|
|
- pud = pud_offset(pgd, 0);
|
|
- for (addr = i = 0; i < UNSHARED_PTRS_PER_PGD;
|
|
- i++, pud++, addr += PUD_SIZE) {
|
|
- if (i >= KERNEL_PGD_BOUNDARY) {
|
|
- memcpy(pmds[i],
|
|
- (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
|
|
- sizeof(pmd_t) * PTRS_PER_PMD);
|
|
- make_lowmem_page_readonly(
|
|
- pmds[i], XENFEAT_writable_page_tables);
|
|
- }
|
|
-
|
|
- /* It is safe to poke machine addresses of pmds under the pgd_lock. */
|
|
- pud_populate(mm, pud, pmds[i]);
|
|
- }
|
|
-
|
|
- /* List required to sync kernel mapping updates and
|
|
- * to pin/unpin on save/restore. */
|
|
- pgd_list_add(pgd);
|
|
-
|
|
- spin_unlock_irqrestore(&pgd_lock, flags);
|
|
-
|
|
- return 1;
|
|
-}
|
|
+#define PREALLOCATED_PMDS UNSHARED_PTRS_PER_PGD
|
|
|
|
void pud_populate(struct mm_struct *mm, pud_t *pudp, pmd_t *pmd)
|
|
{
|
|
@@ -596,16 +506,101 @@ void pud_populate(struct mm_struct *mm,
|
|
xen_tlb_flush();
|
|
}
|
|
#else /* !CONFIG_X86_PAE */
|
|
+
|
|
/* No need to prepopulate any pagetable entries in non-PAE modes. */
|
|
-static int pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd)
|
|
+#define PREALLOCATED_PMDS 0
|
|
+
|
|
+#endif /* CONFIG_X86_PAE */
|
|
+
|
|
+static void free_pmds(pmd_t *pmds[], struct mm_struct *mm, bool contig)
|
|
{
|
|
- return 1;
|
|
+ int i;
|
|
+
|
|
+#ifdef CONFIG_X86_PAE
|
|
+ if (contig)
|
|
+ xen_destroy_contiguous_region((unsigned long)mm->pgd, 0);
|
|
+#endif
|
|
+
|
|
+ for(i = 0; i < PREALLOCATED_PMDS; i++)
|
|
+ if (pmds[i])
|
|
+ pmd_free(mm, pmds[i]);
|
|
}
|
|
|
|
-static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgd)
|
|
+static int preallocate_pmds(pmd_t *pmds[], struct mm_struct *mm)
|
|
{
|
|
+ int i;
|
|
+ bool failed = false;
|
|
+
|
|
+ for(i = 0; i < PREALLOCATED_PMDS; i++) {
|
|
+ pmd_t *pmd = pmd_alloc_one(mm, i << PUD_SHIFT);
|
|
+ if (pmd == NULL)
|
|
+ failed = true;
|
|
+ pmds[i] = pmd;
|
|
+ }
|
|
+
|
|
+ if (failed) {
|
|
+ free_pmds(pmds, mm, false);
|
|
+ return -ENOMEM;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Mop up any pmd pages which may still be attached to the pgd.
|
|
+ * Normally they will be freed by munmap/exit_mmap, but any pmd we
|
|
+ * preallocate which never got a corresponding vma will need to be
|
|
+ * freed manually.
|
|
+ */
|
|
+static void pgd_mop_up_pmds(struct mm_struct *mm, pgd_t *pgdp)
|
|
+{
|
|
+ int i;
|
|
+
|
|
+ for(i = 0; i < PREALLOCATED_PMDS; i++) {
|
|
+ pgd_t pgd = pgdp[i];
|
|
+
|
|
+ if (__pgd_val(pgd) != 0) {
|
|
+ pmd_t *pmd = (pmd_t *)pgd_page_vaddr(pgd);
|
|
+
|
|
+ pgdp[i] = xen_make_pgd(0);
|
|
+
|
|
+ paravirt_release_pmd(pgd_val(pgd) >> PAGE_SHIFT);
|
|
+ pmd_free(mm, pmd);
|
|
+ }
|
|
+ }
|
|
+
|
|
+#ifdef CONFIG_X86_PAE
|
|
+ if (!xen_feature(XENFEAT_pae_pgdir_above_4gb))
|
|
+ xen_destroy_contiguous_region((unsigned long)pgdp, 0);
|
|
+#endif
|
|
+}
|
|
+
|
|
+static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[])
|
|
+{
|
|
+ pud_t *pud;
|
|
+ unsigned long addr;
|
|
+ int i;
|
|
+
|
|
+ if (PREALLOCATED_PMDS == 0) /* Work around gcc-3.4.x bug */
|
|
+ return;
|
|
+
|
|
+ pud = pud_offset(pgd, 0);
|
|
+ for (addr = i = 0; i < PREALLOCATED_PMDS;
|
|
+ i++, pud++, addr += PUD_SIZE) {
|
|
+ pmd_t *pmd = pmds[i];
|
|
+
|
|
+ if (i >= KERNEL_PGD_BOUNDARY) {
|
|
+ memcpy(pmd,
|
|
+ (pmd_t *)pgd_page_vaddr(swapper_pg_dir[i]),
|
|
+ sizeof(pmd_t) * PTRS_PER_PMD);
|
|
+ make_lowmem_page_readonly(
|
|
+ pmd, XENFEAT_writable_page_tables);
|
|
+ }
|
|
+
|
|
+ /* It is safe to poke machine addresses of pmds under the pgd_lock. */
|
|
+ pud_populate(mm, pud, pmd);
|
|
+ }
|
|
}
|
|
-#endif /* CONFIG_X86_PAE */
|
|
|
|
#ifdef CONFIG_X86_64
|
|
/* We allocate two contiguous pages for kernel and user. */
|
|
@@ -616,22 +611,55 @@ static void pgd_mop_up_pmds(struct mm_st
|
|
|
|
pgd_t *pgd_alloc(struct mm_struct *mm)
|
|
{
|
|
- pgd_t *pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, PGD_ORDER);
|
|
+ pgd_t *pgd;
|
|
+ pmd_t *pmds[PREALLOCATED_PMDS];
|
|
+ unsigned long flags;
|
|
+
|
|
+ pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, PGD_ORDER);
|
|
+
|
|
+ if (pgd == NULL)
|
|
+ goto out;
|
|
|
|
- /* so that alloc_pd can use it */
|
|
mm->pgd = pgd;
|
|
- if (pgd) {
|
|
- /* Store a back link for vmalloc_sync_all(). */
|
|
- set_page_private(virt_to_page(pgd), (unsigned long)mm);
|
|
- pgd_ctor(pgd);
|
|
- }
|
|
|
|
- if (pgd && !pgd_prepopulate_pmd(mm, pgd)) {
|
|
- free_pages((unsigned long)pgd, PGD_ORDER);
|
|
- pgd = NULL;
|
|
+ if (preallocate_pmds(pmds, mm) != 0)
|
|
+ goto out_free_pgd;
|
|
+
|
|
+ if (paravirt_pgd_alloc(mm) != 0)
|
|
+ goto out_free_pmds;
|
|
+
|
|
+ /*
|
|
+ * Make sure that pre-populating the pmds is atomic with
|
|
+ * respect to anything walking the pgd_list, so that they
|
|
+ * never see a partially populated pgd.
|
|
+ */
|
|
+ spin_lock_irqsave(&pgd_lock, flags);
|
|
+
|
|
+#ifdef CONFIG_X86_PAE
|
|
+ /* Protect against save/restore: move below 4GB under pgd_lock. */
|
|
+ if (!xen_feature(XENFEAT_pae_pgdir_above_4gb)
|
|
+ && xen_create_contiguous_region((unsigned long)pgd, 0, 32)) {
|
|
+ spin_unlock_irqrestore(&pgd_lock, flags);
|
|
+ goto out_free_pmds;
|
|
}
|
|
+#endif
|
|
+
|
|
+ pgd_ctor(pgd);
|
|
+ pgd_prepopulate_pmd(mm, pgd, pmds);
|
|
+
|
|
+ /* Store a back link for vmalloc_sync_all(). */
|
|
+ set_page_private(virt_to_page(pgd), (unsigned long)mm);
|
|
+
|
|
+ spin_unlock_irqrestore(&pgd_lock, flags);
|
|
|
|
return pgd;
|
|
+
|
|
+out_free_pmds:
|
|
+ free_pmds(pmds, mm, !xen_feature(XENFEAT_pae_pgdir_above_4gb));
|
|
+out_free_pgd:
|
|
+ free_pages((unsigned long)pgd, PGD_ORDER);
|
|
+out:
|
|
+ return NULL;
|
|
}
|
|
|
|
void pgd_free(struct mm_struct *mm, pgd_t *pgd)
|
|
@@ -647,6 +675,7 @@ void pgd_free(struct mm_struct *mm, pgd_
|
|
pgd_dtor(pgd);
|
|
|
|
pgd_mop_up_pmds(mm, pgd);
|
|
+ paravirt_pgd_free(mm, pgd);
|
|
free_pages((unsigned long)pgd, PGD_ORDER);
|
|
}
|
|
|
|
@@ -689,7 +718,7 @@ int ptep_test_and_clear_young(struct vm_
|
|
|
|
if (pte_young(*ptep))
|
|
ret = test_and_clear_bit(_PAGE_BIT_ACCESSED,
|
|
- &ptep->pte);
|
|
+ (unsigned long *) &ptep->pte);
|
|
|
|
if (ret)
|
|
pte_update(vma->vm_mm, addr, ptep);
|
|
@@ -711,3 +740,42 @@ int ptep_clear_flush_young(struct vm_are
|
|
|
|
return young;
|
|
}
|
|
+
|
|
+int fixmaps_set;
|
|
+
|
|
+void xen_set_fixmap(enum fixed_addresses idx, maddr_t phys, pgprot_t flags)
|
|
+{
|
|
+ unsigned long address = __fix_to_virt(idx);
|
|
+ pte_t pte;
|
|
+
|
|
+ if (idx >= __end_of_fixed_addresses) {
|
|
+ BUG();
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ switch (idx) {
|
|
+#ifdef CONFIG_X86_64
|
|
+ extern pte_t level1_fixmap_pgt[PTRS_PER_PTE];
|
|
+
|
|
+ case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE:
|
|
+ pte = pfn_pte(phys >> PAGE_SHIFT, flags);
|
|
+ set_pte_vaddr_pud(level3_user_pgt, address, pte);
|
|
+ break;
|
|
+ case FIX_EARLYCON_MEM_BASE:
|
|
+ xen_l1_entry_update(level1_fixmap_pgt + pte_index(address),
|
|
+ pfn_pte_ma(phys >> PAGE_SHIFT, flags));
|
|
+ fixmaps_set++;
|
|
+ return;
|
|
+#else
|
|
+ case FIX_WP_TEST:
|
|
+ case FIX_VDSO:
|
|
+ pte = pfn_pte(phys >> PAGE_SHIFT, flags);
|
|
+ break;
|
|
+#endif
|
|
+ default:
|
|
+ pte = pfn_pte_ma(phys >> PAGE_SHIFT, flags);
|
|
+ break;
|
|
+ }
|
|
+ set_pte_vaddr(address, pte);
|
|
+ fixmaps_set++;
|
|
+}
|
|
--- head-2011-03-11.orig/arch/x86/mm/pgtable_32-xen.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/mm/pgtable_32-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -25,51 +25,49 @@
|
|
#include <xen/features.h>
|
|
#include <asm/hypervisor.h>
|
|
|
|
-void show_mem(void)
|
|
+/*
|
|
+ * Associate a virtual page frame with a given physical page frame
|
|
+ * and protection flags for that frame.
|
|
+ */
|
|
+void set_pte_vaddr(unsigned long vaddr, pte_t pteval)
|
|
{
|
|
- int total = 0, reserved = 0;
|
|
- int shared = 0, cached = 0;
|
|
- int highmem = 0;
|
|
- struct page *page;
|
|
- pg_data_t *pgdat;
|
|
- unsigned long i;
|
|
- unsigned long flags;
|
|
-
|
|
- printk(KERN_INFO "Mem-info:\n");
|
|
- show_free_areas();
|
|
- for_each_online_pgdat(pgdat) {
|
|
- pgdat_resize_lock(pgdat, &flags);
|
|
- for (i = 0; i < pgdat->node_spanned_pages; ++i) {
|
|
- if (unlikely(i % MAX_ORDER_NR_PAGES == 0))
|
|
- touch_nmi_watchdog();
|
|
- page = pgdat_page_nr(pgdat, i);
|
|
- total++;
|
|
- if (PageHighMem(page))
|
|
- highmem++;
|
|
- if (PageReserved(page))
|
|
- reserved++;
|
|
- else if (PageSwapCache(page))
|
|
- cached++;
|
|
- else if (page_count(page))
|
|
- shared += page_count(page) - 1;
|
|
- }
|
|
- pgdat_resize_unlock(pgdat, &flags);
|
|
- }
|
|
- printk(KERN_INFO "%d pages of RAM\n", total);
|
|
- printk(KERN_INFO "%d pages of HIGHMEM\n", highmem);
|
|
- printk(KERN_INFO "%d reserved pages\n", reserved);
|
|
- printk(KERN_INFO "%d pages shared\n", shared);
|
|
- printk(KERN_INFO "%d pages swap cached\n", cached);
|
|
-
|
|
- printk(KERN_INFO "%lu pages dirty\n", global_page_state(NR_FILE_DIRTY));
|
|
- printk(KERN_INFO "%lu pages writeback\n",
|
|
- global_page_state(NR_WRITEBACK));
|
|
- printk(KERN_INFO "%lu pages mapped\n", global_page_state(NR_FILE_MAPPED));
|
|
- printk(KERN_INFO "%lu pages slab\n",
|
|
- global_page_state(NR_SLAB_RECLAIMABLE) +
|
|
- global_page_state(NR_SLAB_UNRECLAIMABLE));
|
|
- printk(KERN_INFO "%lu pages pagetables\n",
|
|
- global_page_state(NR_PAGETABLE));
|
|
+#ifndef CONFIG_XEN
|
|
+ pgd_t *pgd;
|
|
+ pud_t *pud;
|
|
+ pmd_t *pmd;
|
|
+ pte_t *pte;
|
|
+
|
|
+ pgd = swapper_pg_dir + pgd_index(vaddr);
|
|
+ if (pgd_none(*pgd)) {
|
|
+ BUG();
|
|
+ return;
|
|
+ }
|
|
+ pud = pud_offset(pgd, vaddr);
|
|
+ if (pud_none(*pud)) {
|
|
+ BUG();
|
|
+ return;
|
|
+ }
|
|
+ pmd = pmd_offset(pud, vaddr);
|
|
+ if (pmd_none(*pmd)) {
|
|
+ BUG();
|
|
+ return;
|
|
+ }
|
|
+ pte = pte_offset_kernel(pmd, vaddr);
|
|
+ if (pte_val(pteval))
|
|
+ set_pte_present(&init_mm, vaddr, pte, pteval);
|
|
+ else
|
|
+ pte_clear(&init_mm, vaddr, pte);
|
|
+
|
|
+ /*
|
|
+ * It's enough to flush this one mapping.
|
|
+ * (PGE mappings get flushed as well)
|
|
+ */
|
|
+ __flush_tlb_one(vaddr);
|
|
+#else
|
|
+ if (HYPERVISOR_update_va_mapping(vaddr, pteval,
|
|
+ UVMF_INVLPG|UVMF_ALL))
|
|
+ BUG();
|
|
+#endif
|
|
}
|
|
|
|
/*
|
|
@@ -107,35 +105,10 @@ void set_pmd_pfn(unsigned long vaddr, un
|
|
__flush_tlb_one(vaddr);
|
|
}
|
|
|
|
-static int fixmaps;
|
|
unsigned long hypervisor_virt_start = HYPERVISOR_VIRT_START;
|
|
unsigned long __FIXADDR_TOP = (HYPERVISOR_VIRT_START - PAGE_SIZE);
|
|
EXPORT_SYMBOL(__FIXADDR_TOP);
|
|
|
|
-void __set_fixmap (enum fixed_addresses idx, maddr_t phys, pgprot_t flags)
|
|
-{
|
|
- unsigned long address = __fix_to_virt(idx);
|
|
- pte_t pte;
|
|
-
|
|
- if (idx >= __end_of_fixed_addresses) {
|
|
- BUG();
|
|
- return;
|
|
- }
|
|
- switch (idx) {
|
|
- case FIX_WP_TEST:
|
|
- case FIX_VDSO:
|
|
- pte = pfn_pte(phys >> PAGE_SHIFT, flags);
|
|
- break;
|
|
- default:
|
|
- pte = pfn_pte_ma(phys >> PAGE_SHIFT, flags);
|
|
- break;
|
|
- }
|
|
- if (HYPERVISOR_update_va_mapping(address, pte,
|
|
- UVMF_INVLPG|UVMF_ALL))
|
|
- BUG();
|
|
- fixmaps++;
|
|
-}
|
|
-
|
|
/**
|
|
* reserve_top_address - reserves a hole in the top of kernel address space
|
|
* @reserve - size of hole to reserve
|
|
@@ -145,13 +118,48 @@ void __set_fixmap (enum fixed_addresses
|
|
*/
|
|
void __init reserve_top_address(unsigned long reserve)
|
|
{
|
|
- BUG_ON(fixmaps > 0);
|
|
+ BUG_ON(fixmaps_set > 0);
|
|
printk(KERN_INFO "Reserving virtual address space above 0x%08x\n",
|
|
(int)-reserve);
|
|
__FIXADDR_TOP = -reserve - PAGE_SIZE;
|
|
__VMALLOC_RESERVE += reserve;
|
|
}
|
|
|
|
+/*
|
|
+ * vmalloc=size forces the vmalloc area to be exactly 'size'
|
|
+ * bytes. This can be used to increase (or decrease) the
|
|
+ * vmalloc area - the default is 128m.
|
|
+ */
|
|
+static int __init parse_vmalloc(char *arg)
|
|
+{
|
|
+ if (!arg)
|
|
+ return -EINVAL;
|
|
+
|
|
+ __VMALLOC_RESERVE = memparse(arg, &arg);
|
|
+ return 0;
|
|
+}
|
|
+early_param("vmalloc", parse_vmalloc);
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+/*
|
|
+ * reservetop=size reserves a hole at the top of the kernel address space which
|
|
+ * a hypervisor can load into later. Needed for dynamically loaded hypervisors,
|
|
+ * so relocating the fixmap can be done before paging initialization.
|
|
+ */
|
|
+static int __init parse_reservetop(char *arg)
|
|
+{
|
|
+ unsigned long address;
|
|
+
|
|
+ if (!arg)
|
|
+ return -EINVAL;
|
|
+
|
|
+ address = memparse(arg, &arg);
|
|
+ reserve_top_address(address);
|
|
+ return 0;
|
|
+}
|
|
+early_param("reservetop", parse_reservetop);
|
|
+#endif
|
|
+
|
|
void make_lowmem_page_readonly(void *va, unsigned int feature)
|
|
{
|
|
pte_t *pte;
|
|
--- head-2011-03-11.orig/arch/x86/pci/amd_bus.c 2011-03-15 16:45:55.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/pci/amd_bus.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -350,6 +350,7 @@ static int __init early_fill_mp_bus_info
|
|
|
|
#define ENABLE_CF8_EXT_CFG (1ULL << 46)
|
|
|
|
+#ifndef CONFIG_XEN
|
|
static void enable_pci_io_ecs(void *unused)
|
|
{
|
|
u64 reg;
|
|
@@ -378,6 +379,7 @@ static int __cpuinit amd_cpu_notify(stru
|
|
static struct notifier_block __cpuinitdata amd_cpu_notifier = {
|
|
.notifier_call = amd_cpu_notify,
|
|
};
|
|
+#endif /* CONFIG_XEN */
|
|
|
|
static void __init pci_enable_pci_io_ecs(void)
|
|
{
|
|
@@ -419,10 +421,19 @@ static int __init pci_io_ecs_init(void)
|
|
if (early_pci_allowed())
|
|
pci_enable_pci_io_ecs();
|
|
|
|
+#ifndef CONFIG_XEN
|
|
register_cpu_notifier(&amd_cpu_notifier);
|
|
for_each_online_cpu(cpu)
|
|
amd_cpu_notify(&amd_cpu_notifier, (unsigned long)CPU_ONLINE,
|
|
(void *)(long)cpu);
|
|
+#else
|
|
+ if (cpu = 1, cpu) {
|
|
+ u64 reg;
|
|
+ rdmsrl(MSR_AMD64_NB_CFG, reg);
|
|
+ if (!(reg & ENABLE_CF8_EXT_CFG))
|
|
+ return 0;
|
|
+ }
|
|
+#endif
|
|
pci_probe |= PCI_HAS_IO_ECS;
|
|
|
|
return 0;
|
|
@@ -430,6 +441,10 @@ static int __init pci_io_ecs_init(void)
|
|
|
|
static int __init amd_postcore_init(void)
|
|
{
|
|
+#ifdef CONFIG_XEN
|
|
+ if (!is_initial_xendomain())
|
|
+ return 0;
|
|
+#endif
|
|
if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
|
|
return 0;
|
|
|
|
--- head-2011-03-11.orig/arch/x86/pci/irq-xen.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/pci/irq-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -11,8 +11,8 @@
|
|
#include <linux/slab.h>
|
|
#include <linux/interrupt.h>
|
|
#include <linux/dmi.h>
|
|
-#include <asm/io.h>
|
|
-#include <asm/smp.h>
|
|
+#include <linux/io.h>
|
|
+#include <linux/smp.h>
|
|
#include <asm/io_apic.h>
|
|
#include <linux/irq.h>
|
|
#include <linux/acpi.h>
|
|
@@ -45,7 +45,8 @@ struct irq_router {
|
|
char *name;
|
|
u16 vendor, device;
|
|
int (*get)(struct pci_dev *router, struct pci_dev *dev, int pirq);
|
|
- int (*set)(struct pci_dev *router, struct pci_dev *dev, int pirq, int new);
|
|
+ int (*set)(struct pci_dev *router, struct pci_dev *dev, int pirq,
|
|
+ int new);
|
|
};
|
|
|
|
struct irq_router_handler {
|
|
@@ -61,7 +62,7 @@ void (*pcibios_disable_irq)(struct pci_d
|
|
* and perform checksum verification.
|
|
*/
|
|
|
|
-static inline struct irq_routing_table * pirq_check_routing_table(u8 *addr)
|
|
+static inline struct irq_routing_table *pirq_check_routing_table(u8 *addr)
|
|
{
|
|
struct irq_routing_table *rt;
|
|
int i;
|
|
@@ -74,10 +75,11 @@ static inline struct irq_routing_table *
|
|
rt->size < sizeof(struct irq_routing_table))
|
|
return NULL;
|
|
sum = 0;
|
|
- for (i=0; i < rt->size; i++)
|
|
+ for (i = 0; i < rt->size; i++)
|
|
sum += addr[i];
|
|
if (!sum) {
|
|
- DBG(KERN_DEBUG "PCI: Interrupt Routing Table found at 0x%p\n", rt);
|
|
+ DBG(KERN_DEBUG "PCI: Interrupt Routing Table found at 0x%p\n",
|
|
+ rt);
|
|
return rt;
|
|
}
|
|
return NULL;
|
|
@@ -104,7 +106,9 @@ static struct irq_routing_table * __init
|
|
return rt;
|
|
printk(KERN_WARNING "PCI: PIRQ table NOT found at pirqaddr\n");
|
|
}
|
|
- for(addr = (u8 *) isa_bus_to_virt(0xf0000); addr < (u8 *) isa_bus_to_virt(0x100000); addr += 16) {
|
|
+ for (addr = (u8 *) isa_bus_to_virt(0xf0000);
|
|
+ addr < (u8 *) isa_bus_to_virt(0x100000);
|
|
+ addr += 16) {
|
|
rt = pirq_check_routing_table(addr);
|
|
if (rt)
|
|
return rt;
|
|
@@ -126,20 +130,20 @@ static void __init pirq_peer_trick(void)
|
|
struct irq_info *e;
|
|
|
|
memset(busmap, 0, sizeof(busmap));
|
|
- for(i=0; i < (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info); i++) {
|
|
+ for (i = 0; i < (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info); i++) {
|
|
e = &rt->slots[i];
|
|
#ifdef DEBUG
|
|
{
|
|
int j;
|
|
DBG(KERN_DEBUG "%02x:%02x slot=%02x", e->bus, e->devfn/8, e->slot);
|
|
- for(j=0; j<4; j++)
|
|
+ for (j = 0; j < 4; j++)
|
|
DBG(" %d:%02x/%04x", j, e->irq[j].link, e->irq[j].bitmap);
|
|
DBG("\n");
|
|
}
|
|
#endif
|
|
busmap[e->bus] = 1;
|
|
}
|
|
- for(i = 1; i < 256; i++) {
|
|
+ for (i = 1; i < 256; i++) {
|
|
int node;
|
|
if (!busmap[i] || pci_find_bus(0, i))
|
|
continue;
|
|
@@ -187,7 +191,8 @@ static unsigned int read_config_nybble(s
|
|
return (nr & 1) ? (x >> 4) : (x & 0xf);
|
|
}
|
|
|
|
-static void write_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr, unsigned int val)
|
|
+static void write_config_nybble(struct pci_dev *router, unsigned offset,
|
|
+ unsigned nr, unsigned int val)
|
|
{
|
|
u8 x;
|
|
unsigned reg = offset + (nr >> 1);
|
|
@@ -289,7 +294,7 @@ static int pirq_ite_get(struct pci_dev *
|
|
static const unsigned char pirqmap[4] = { 1, 0, 2, 3 };
|
|
|
|
WARN_ON_ONCE(pirq > 4);
|
|
- return read_config_nybble(router,0x43, pirqmap[pirq-1]);
|
|
+ return read_config_nybble(router, 0x43, pirqmap[pirq-1]);
|
|
}
|
|
|
|
static int pirq_ite_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
|
|
@@ -318,7 +323,7 @@ static int pirq_opti_set(struct pci_dev
|
|
|
|
/*
|
|
* Cyrix: nibble offset 0x5C
|
|
- * 0x5C bits 7:4 is INTB bits 3:0 is INTA
|
|
+ * 0x5C bits 7:4 is INTB bits 3:0 is INTA
|
|
* 0x5D bits 7:4 is INTD bits 3:0 is INTC
|
|
*/
|
|
static int pirq_cyrix_get(struct pci_dev *router, struct pci_dev *dev, int pirq)
|
|
@@ -354,7 +359,7 @@ static int pirq_cyrix_set(struct pci_dev
|
|
* Apparently there are systems implementing PCI routing table using
|
|
* link values 0x01-0x04 and others using 0x41-0x44 for PCI INTA..D.
|
|
* We try our best to handle both link mappings.
|
|
- *
|
|
+ *
|
|
* Currently (2003-05-21) it appears most SiS chipsets follow the
|
|
* definition of routing registers from the SiS-5595 southbridge.
|
|
* According to the SiS 5595 datasheets the revision id's of the
|
|
@@ -374,7 +379,7 @@ static int pirq_cyrix_set(struct pci_dev
|
|
*
|
|
* 0x62: USBIRQ:
|
|
* bit 6 OHCI function disabled (0), enabled (1)
|
|
- *
|
|
+ *
|
|
* 0x6a: ACPI/SCI IRQ: bits 4-6 reserved
|
|
*
|
|
* 0x7e: Data Acq. Module IRQ - bits 4-6 reserved
|
|
@@ -437,7 +442,7 @@ static int pirq_vlsi_get(struct pci_dev
|
|
{
|
|
WARN_ON_ONCE(pirq >= 9);
|
|
if (pirq > 8) {
|
|
- printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq);
|
|
+ dev_info(&dev->dev, "VLSI router PIRQ escape (%d)\n", pirq);
|
|
return 0;
|
|
}
|
|
return read_config_nybble(router, 0x74, pirq-1);
|
|
@@ -447,7 +452,7 @@ static int pirq_vlsi_set(struct pci_dev
|
|
{
|
|
WARN_ON_ONCE(pirq >= 9);
|
|
if (pirq > 8) {
|
|
- printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq);
|
|
+ dev_info(&dev->dev, "VLSI router PIRQ escape (%d)\n", pirq);
|
|
return 0;
|
|
}
|
|
write_config_nybble(router, 0x74, pirq-1, irq);
|
|
@@ -471,7 +476,8 @@ static int pirq_serverworks_get(struct p
|
|
return inb(0xc01) & 0xf;
|
|
}
|
|
|
|
-static int pirq_serverworks_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
|
|
+static int pirq_serverworks_set(struct pci_dev *router, struct pci_dev *dev,
|
|
+ int pirq, int irq)
|
|
{
|
|
outb(pirq, 0xc00);
|
|
outb(irq, 0xc01);
|
|
@@ -491,22 +497,20 @@ static int pirq_amd756_get(struct pci_de
|
|
u8 irq;
|
|
irq = 0;
|
|
if (pirq <= 4)
|
|
- {
|
|
irq = read_config_nybble(router, 0x56, pirq - 1);
|
|
- }
|
|
- printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d get irq : %2d\n",
|
|
- dev->vendor, dev->device, pirq, irq);
|
|
+ dev_info(&dev->dev,
|
|
+ "AMD756: dev [%04x/%04x], router PIRQ %d get IRQ %d\n",
|
|
+ dev->vendor, dev->device, pirq, irq);
|
|
return irq;
|
|
}
|
|
|
|
static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq)
|
|
{
|
|
- printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d SET irq : %2d\n",
|
|
- dev->vendor, dev->device, pirq, irq);
|
|
+ dev_info(&dev->dev,
|
|
+ "AMD756: dev [%04x/%04x], router PIRQ %d set IRQ %d\n",
|
|
+ dev->vendor, dev->device, pirq, irq);
|
|
if (pirq <= 4)
|
|
- {
|
|
write_config_nybble(router, 0x56, pirq - 1, irq);
|
|
- }
|
|
return 1;
|
|
}
|
|
|
|
@@ -553,50 +557,51 @@ static __init int intel_router_probe(str
|
|
if (pci_dev_present(pirq_440gx))
|
|
return 0;
|
|
|
|
- switch(device)
|
|
- {
|
|
- case PCI_DEVICE_ID_INTEL_82371FB_0:
|
|
- case PCI_DEVICE_ID_INTEL_82371SB_0:
|
|
- case PCI_DEVICE_ID_INTEL_82371AB_0:
|
|
- case PCI_DEVICE_ID_INTEL_82371MX:
|
|
- case PCI_DEVICE_ID_INTEL_82443MX_0:
|
|
- case PCI_DEVICE_ID_INTEL_82801AA_0:
|
|
- case PCI_DEVICE_ID_INTEL_82801AB_0:
|
|
- case PCI_DEVICE_ID_INTEL_82801BA_0:
|
|
- case PCI_DEVICE_ID_INTEL_82801BA_10:
|
|
- case PCI_DEVICE_ID_INTEL_82801CA_0:
|
|
- case PCI_DEVICE_ID_INTEL_82801CA_12:
|
|
- case PCI_DEVICE_ID_INTEL_82801DB_0:
|
|
- case PCI_DEVICE_ID_INTEL_82801E_0:
|
|
- case PCI_DEVICE_ID_INTEL_82801EB_0:
|
|
- case PCI_DEVICE_ID_INTEL_ESB_1:
|
|
- case PCI_DEVICE_ID_INTEL_ICH6_0:
|
|
- case PCI_DEVICE_ID_INTEL_ICH6_1:
|
|
- case PCI_DEVICE_ID_INTEL_ICH7_0:
|
|
- case PCI_DEVICE_ID_INTEL_ICH7_1:
|
|
- case PCI_DEVICE_ID_INTEL_ICH7_30:
|
|
- case PCI_DEVICE_ID_INTEL_ICH7_31:
|
|
- case PCI_DEVICE_ID_INTEL_ESB2_0:
|
|
- case PCI_DEVICE_ID_INTEL_ICH8_0:
|
|
- case PCI_DEVICE_ID_INTEL_ICH8_1:
|
|
- case PCI_DEVICE_ID_INTEL_ICH8_2:
|
|
- case PCI_DEVICE_ID_INTEL_ICH8_3:
|
|
- case PCI_DEVICE_ID_INTEL_ICH8_4:
|
|
- case PCI_DEVICE_ID_INTEL_ICH9_0:
|
|
- case PCI_DEVICE_ID_INTEL_ICH9_1:
|
|
- case PCI_DEVICE_ID_INTEL_ICH9_2:
|
|
- case PCI_DEVICE_ID_INTEL_ICH9_3:
|
|
- case PCI_DEVICE_ID_INTEL_ICH9_4:
|
|
- case PCI_DEVICE_ID_INTEL_ICH9_5:
|
|
- case PCI_DEVICE_ID_INTEL_TOLAPAI_0:
|
|
- case PCI_DEVICE_ID_INTEL_ICH10_0:
|
|
- case PCI_DEVICE_ID_INTEL_ICH10_1:
|
|
- case PCI_DEVICE_ID_INTEL_ICH10_2:
|
|
- case PCI_DEVICE_ID_INTEL_ICH10_3:
|
|
- r->name = "PIIX/ICH";
|
|
- r->get = pirq_piix_get;
|
|
- r->set = pirq_piix_set;
|
|
- return 1;
|
|
+ switch (device) {
|
|
+ case PCI_DEVICE_ID_INTEL_82371FB_0:
|
|
+ case PCI_DEVICE_ID_INTEL_82371SB_0:
|
|
+ case PCI_DEVICE_ID_INTEL_82371AB_0:
|
|
+ case PCI_DEVICE_ID_INTEL_82371MX:
|
|
+ case PCI_DEVICE_ID_INTEL_82443MX_0:
|
|
+ case PCI_DEVICE_ID_INTEL_82801AA_0:
|
|
+ case PCI_DEVICE_ID_INTEL_82801AB_0:
|
|
+ case PCI_DEVICE_ID_INTEL_82801BA_0:
|
|
+ case PCI_DEVICE_ID_INTEL_82801BA_10:
|
|
+ case PCI_DEVICE_ID_INTEL_82801CA_0:
|
|
+ case PCI_DEVICE_ID_INTEL_82801CA_12:
|
|
+ case PCI_DEVICE_ID_INTEL_82801DB_0:
|
|
+ case PCI_DEVICE_ID_INTEL_82801E_0:
|
|
+ case PCI_DEVICE_ID_INTEL_82801EB_0:
|
|
+ case PCI_DEVICE_ID_INTEL_ESB_1:
|
|
+ case PCI_DEVICE_ID_INTEL_ICH6_0:
|
|
+ case PCI_DEVICE_ID_INTEL_ICH6_1:
|
|
+ case PCI_DEVICE_ID_INTEL_ICH7_0:
|
|
+ case PCI_DEVICE_ID_INTEL_ICH7_1:
|
|
+ case PCI_DEVICE_ID_INTEL_ICH7_30:
|
|
+ case PCI_DEVICE_ID_INTEL_ICH7_31:
|
|
+ case PCI_DEVICE_ID_INTEL_ESB2_0:
|
|
+ case PCI_DEVICE_ID_INTEL_ICH8_0:
|
|
+ case PCI_DEVICE_ID_INTEL_ICH8_1:
|
|
+ case PCI_DEVICE_ID_INTEL_ICH8_2:
|
|
+ case PCI_DEVICE_ID_INTEL_ICH8_3:
|
|
+ case PCI_DEVICE_ID_INTEL_ICH8_4:
|
|
+ case PCI_DEVICE_ID_INTEL_ICH9_0:
|
|
+ case PCI_DEVICE_ID_INTEL_ICH9_1:
|
|
+ case PCI_DEVICE_ID_INTEL_ICH9_2:
|
|
+ case PCI_DEVICE_ID_INTEL_ICH9_3:
|
|
+ case PCI_DEVICE_ID_INTEL_ICH9_4:
|
|
+ case PCI_DEVICE_ID_INTEL_ICH9_5:
|
|
+ case PCI_DEVICE_ID_INTEL_TOLAPAI_0:
|
|
+ case PCI_DEVICE_ID_INTEL_ICH10_0:
|
|
+ case PCI_DEVICE_ID_INTEL_ICH10_1:
|
|
+ case PCI_DEVICE_ID_INTEL_ICH10_2:
|
|
+ case PCI_DEVICE_ID_INTEL_ICH10_3:
|
|
+ case PCI_DEVICE_ID_INTEL_PCH_0:
|
|
+ case PCI_DEVICE_ID_INTEL_PCH_1:
|
|
+ r->name = "PIIX/ICH";
|
|
+ r->get = pirq_piix_get;
|
|
+ r->set = pirq_piix_set;
|
|
+ return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
@@ -610,7 +615,7 @@ static __init int via_router_probe(struc
|
|
* workarounds for some buggy BIOSes
|
|
*/
|
|
if (device == PCI_DEVICE_ID_VIA_82C586_0) {
|
|
- switch(router->device) {
|
|
+ switch (router->device) {
|
|
case PCI_DEVICE_ID_VIA_82C686:
|
|
/*
|
|
* Asus k7m bios wrongly reports 82C686A
|
|
@@ -635,7 +640,7 @@ static __init int via_router_probe(struc
|
|
}
|
|
}
|
|
|
|
- switch(device) {
|
|
+ switch (device) {
|
|
case PCI_DEVICE_ID_VIA_82C586_0:
|
|
r->name = "VIA";
|
|
r->get = pirq_via586_get;
|
|
@@ -658,28 +663,27 @@ static __init int via_router_probe(struc
|
|
|
|
static __init int vlsi_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
|
|
{
|
|
- switch(device)
|
|
- {
|
|
- case PCI_DEVICE_ID_VLSI_82C534:
|
|
- r->name = "VLSI 82C534";
|
|
- r->get = pirq_vlsi_get;
|
|
- r->set = pirq_vlsi_set;
|
|
- return 1;
|
|
+ switch (device) {
|
|
+ case PCI_DEVICE_ID_VLSI_82C534:
|
|
+ r->name = "VLSI 82C534";
|
|
+ r->get = pirq_vlsi_get;
|
|
+ r->set = pirq_vlsi_set;
|
|
+ return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
-static __init int serverworks_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
|
|
+static __init int serverworks_router_probe(struct irq_router *r,
|
|
+ struct pci_dev *router, u16 device)
|
|
{
|
|
- switch(device)
|
|
- {
|
|
- case PCI_DEVICE_ID_SERVERWORKS_OSB4:
|
|
- case PCI_DEVICE_ID_SERVERWORKS_CSB5:
|
|
- r->name = "ServerWorks";
|
|
- r->get = pirq_serverworks_get;
|
|
- r->set = pirq_serverworks_set;
|
|
- return 1;
|
|
+ switch (device) {
|
|
+ case PCI_DEVICE_ID_SERVERWORKS_OSB4:
|
|
+ case PCI_DEVICE_ID_SERVERWORKS_CSB5:
|
|
+ r->name = "ServerWorks";
|
|
+ r->get = pirq_serverworks_get;
|
|
+ r->set = pirq_serverworks_set;
|
|
+ return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
@@ -688,7 +692,7 @@ static __init int sis_router_probe(struc
|
|
{
|
|
if (device != PCI_DEVICE_ID_SI_503)
|
|
return 0;
|
|
-
|
|
+
|
|
r->name = "SIS";
|
|
r->get = pirq_sis_get;
|
|
r->set = pirq_sis_set;
|
|
@@ -697,50 +701,45 @@ static __init int sis_router_probe(struc
|
|
|
|
static __init int cyrix_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
|
|
{
|
|
- switch(device)
|
|
- {
|
|
- case PCI_DEVICE_ID_CYRIX_5520:
|
|
- r->name = "NatSemi";
|
|
- r->get = pirq_cyrix_get;
|
|
- r->set = pirq_cyrix_set;
|
|
- return 1;
|
|
+ switch (device) {
|
|
+ case PCI_DEVICE_ID_CYRIX_5520:
|
|
+ r->name = "NatSemi";
|
|
+ r->get = pirq_cyrix_get;
|
|
+ r->set = pirq_cyrix_set;
|
|
+ return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static __init int opti_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
|
|
{
|
|
- switch(device)
|
|
- {
|
|
- case PCI_DEVICE_ID_OPTI_82C700:
|
|
- r->name = "OPTI";
|
|
- r->get = pirq_opti_get;
|
|
- r->set = pirq_opti_set;
|
|
- return 1;
|
|
+ switch (device) {
|
|
+ case PCI_DEVICE_ID_OPTI_82C700:
|
|
+ r->name = "OPTI";
|
|
+ r->get = pirq_opti_get;
|
|
+ r->set = pirq_opti_set;
|
|
+ return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static __init int ite_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
|
|
{
|
|
- switch(device)
|
|
- {
|
|
- case PCI_DEVICE_ID_ITE_IT8330G_0:
|
|
- r->name = "ITE";
|
|
- r->get = pirq_ite_get;
|
|
- r->set = pirq_ite_set;
|
|
- return 1;
|
|
+ switch (device) {
|
|
+ case PCI_DEVICE_ID_ITE_IT8330G_0:
|
|
+ r->name = "ITE";
|
|
+ r->get = pirq_ite_get;
|
|
+ r->set = pirq_ite_set;
|
|
+ return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static __init int ali_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
|
|
{
|
|
- switch(device)
|
|
- {
|
|
+ switch (device) {
|
|
case PCI_DEVICE_ID_AL_M1533:
|
|
case PCI_DEVICE_ID_AL_M1563:
|
|
- printk(KERN_DEBUG "PCI: Using ALI IRQ Router\n");
|
|
r->name = "ALI";
|
|
r->get = pirq_ali_get;
|
|
r->set = pirq_ali_set;
|
|
@@ -751,25 +750,24 @@ static __init int ali_router_probe(struc
|
|
|
|
static __init int amd_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
|
|
{
|
|
- switch(device)
|
|
- {
|
|
- case PCI_DEVICE_ID_AMD_VIPER_740B:
|
|
- r->name = "AMD756";
|
|
- break;
|
|
- case PCI_DEVICE_ID_AMD_VIPER_7413:
|
|
- r->name = "AMD766";
|
|
- break;
|
|
- case PCI_DEVICE_ID_AMD_VIPER_7443:
|
|
- r->name = "AMD768";
|
|
- break;
|
|
- default:
|
|
- return 0;
|
|
+ switch (device) {
|
|
+ case PCI_DEVICE_ID_AMD_VIPER_740B:
|
|
+ r->name = "AMD756";
|
|
+ break;
|
|
+ case PCI_DEVICE_ID_AMD_VIPER_7413:
|
|
+ r->name = "AMD766";
|
|
+ break;
|
|
+ case PCI_DEVICE_ID_AMD_VIPER_7443:
|
|
+ r->name = "AMD768";
|
|
+ break;
|
|
+ default:
|
|
+ return 0;
|
|
}
|
|
r->get = pirq_amd756_get;
|
|
r->set = pirq_amd756_set;
|
|
return 1;
|
|
}
|
|
-
|
|
+
|
|
static __init int pico_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
|
|
{
|
|
switch (device) {
|
|
@@ -811,7 +809,7 @@ static struct pci_dev *pirq_router_dev;
|
|
* FIXME: should we have an option to say "generic for
|
|
* chipset" ?
|
|
*/
|
|
-
|
|
+
|
|
static void __init pirq_find_router(struct irq_router *r)
|
|
{
|
|
struct irq_routing_table *rt = pirq_table;
|
|
@@ -830,7 +828,7 @@ static void __init pirq_find_router(stru
|
|
r->name = "default";
|
|
r->get = NULL;
|
|
r->set = NULL;
|
|
-
|
|
+
|
|
DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for %04x:%04x\n",
|
|
rt->rtr_vendor, rt->rtr_device);
|
|
|
|
@@ -841,19 +839,19 @@ static void __init pirq_find_router(stru
|
|
return;
|
|
}
|
|
|
|
- for( h = pirq_routers; h->vendor; h++) {
|
|
+ for (h = pirq_routers; h->vendor; h++) {
|
|
/* First look for a router match */
|
|
- if (rt->rtr_vendor == h->vendor && h->probe(r, pirq_router_dev, rt->rtr_device))
|
|
+ if (rt->rtr_vendor == h->vendor &&
|
|
+ h->probe(r, pirq_router_dev, rt->rtr_device))
|
|
break;
|
|
/* Fall back to a device match */
|
|
- if (pirq_router_dev->vendor == h->vendor && h->probe(r, pirq_router_dev, pirq_router_dev->device))
|
|
+ if (pirq_router_dev->vendor == h->vendor &&
|
|
+ h->probe(r, pirq_router_dev, pirq_router_dev->device))
|
|
break;
|
|
}
|
|
- printk(KERN_INFO "PCI: Using IRQ router %s [%04x/%04x] at %s\n",
|
|
- pirq_router.name,
|
|
- pirq_router_dev->vendor,
|
|
- pirq_router_dev->device,
|
|
- pci_name(pirq_router_dev));
|
|
+ dev_info(&pirq_router_dev->dev, "%s IRQ router [%04x/%04x]\n",
|
|
+ pirq_router.name,
|
|
+ pirq_router_dev->vendor, pirq_router_dev->device);
|
|
|
|
/* The device remains referenced for the kernel lifetime */
|
|
}
|
|
@@ -861,11 +859,13 @@ static void __init pirq_find_router(stru
|
|
static struct irq_info *pirq_get_info(struct pci_dev *dev)
|
|
{
|
|
struct irq_routing_table *rt = pirq_table;
|
|
- int entries = (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info);
|
|
+ int entries = (rt->size - sizeof(struct irq_routing_table)) /
|
|
+ sizeof(struct irq_info);
|
|
struct irq_info *info;
|
|
|
|
for (info = rt->slots; entries--; info++)
|
|
- if (info->bus == dev->bus->number && PCI_SLOT(info->devfn) == PCI_SLOT(dev->devfn))
|
|
+ if (info->bus == dev->bus->number &&
|
|
+ PCI_SLOT(info->devfn) == PCI_SLOT(dev->devfn))
|
|
return info;
|
|
return NULL;
|
|
}
|
|
@@ -884,7 +884,7 @@ static int pcibios_lookup_irq(struct pci
|
|
/* Find IRQ pin */
|
|
pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
|
|
if (!pin) {
|
|
- DBG(KERN_DEBUG " -> no interrupt pin\n");
|
|
+ dev_dbg(&dev->dev, "no interrupt pin\n");
|
|
return 0;
|
|
}
|
|
pin = pin - 1;
|
|
@@ -893,20 +893,21 @@ static int pcibios_lookup_irq(struct pci
|
|
|
|
if (!pirq_table)
|
|
return 0;
|
|
-
|
|
- DBG(KERN_DEBUG "IRQ for %s[%c]", pci_name(dev), 'A' + pin);
|
|
+
|
|
info = pirq_get_info(dev);
|
|
if (!info) {
|
|
- DBG(" -> not found in routing table\n" KERN_DEBUG);
|
|
+ dev_dbg(&dev->dev, "PCI INT %c not found in routing table\n",
|
|
+ 'A' + pin);
|
|
return 0;
|
|
}
|
|
pirq = info->irq[pin].link;
|
|
mask = info->irq[pin].bitmap;
|
|
if (!pirq) {
|
|
- DBG(" -> not routed\n" KERN_DEBUG);
|
|
+ dev_dbg(&dev->dev, "PCI INT %c not routed\n", 'A' + pin);
|
|
return 0;
|
|
}
|
|
- DBG(" -> PIRQ %02x, mask %04x, excl %04x", pirq, mask, pirq_table->exclusive_irqs);
|
|
+ dev_dbg(&dev->dev, "PCI INT %c -> PIRQ %02x, mask %04x, excl %04x",
|
|
+ 'A' + pin, pirq, mask, pirq_table->exclusive_irqs);
|
|
mask &= pcibios_irq_mask;
|
|
|
|
/* Work around broken HP Pavilion Notebooks which assign USB to
|
|
@@ -919,7 +920,8 @@ static int pcibios_lookup_irq(struct pci
|
|
}
|
|
|
|
/* same for Acer Travelmate 360, but with CB and irq 11 -> 10 */
|
|
- if (acer_tm360_irqrouting && dev->irq == 11 && dev->vendor == PCI_VENDOR_ID_O2) {
|
|
+ if (acer_tm360_irqrouting && dev->irq == 11 &&
|
|
+ dev->vendor == PCI_VENDOR_ID_O2) {
|
|
pirq = 0x68;
|
|
mask = 0x400;
|
|
dev->irq = r->get(pirq_router_dev, dev, pirq);
|
|
@@ -932,51 +934,50 @@ static int pcibios_lookup_irq(struct pci
|
|
*/
|
|
newirq = dev->irq;
|
|
if (newirq && !((1 << newirq) & mask)) {
|
|
- if ( pci_probe & PCI_USE_PIRQ_MASK) newirq = 0;
|
|
- else printk("\n" KERN_WARNING
|
|
- "PCI: IRQ %i for device %s doesn't match PIRQ mask "
|
|
- "- try pci=usepirqmask\n" KERN_DEBUG, newirq,
|
|
- pci_name(dev));
|
|
+ if (pci_probe & PCI_USE_PIRQ_MASK)
|
|
+ newirq = 0;
|
|
+ else
|
|
+ dev_warn(&dev->dev, "IRQ %d doesn't match PIRQ mask "
|
|
+ "%#x; try pci=usepirqmask\n", newirq, mask);
|
|
}
|
|
if (!newirq && assign) {
|
|
for (i = 0; i < 16; i++) {
|
|
if (!(mask & (1 << i)))
|
|
continue;
|
|
- if (pirq_penalty[i] < pirq_penalty[newirq] && can_request_irq(i, IRQF_SHARED))
|
|
+ if (pirq_penalty[i] < pirq_penalty[newirq] &&
|
|
+ can_request_irq(i, IRQF_SHARED))
|
|
newirq = i;
|
|
}
|
|
}
|
|
- DBG(" -> newirq=%d", newirq);
|
|
+ dev_dbg(&dev->dev, "PCI INT %c -> newirq %d", 'A' + pin, newirq);
|
|
|
|
/* Check if it is hardcoded */
|
|
if ((pirq & 0xf0) == 0xf0) {
|
|
irq = pirq & 0xf;
|
|
- DBG(" -> hardcoded IRQ %d\n", irq);
|
|
- msg = "Hardcoded";
|
|
- } else if ( r->get && (irq = r->get(pirq_router_dev, dev, pirq)) && \
|
|
- ((!(pci_probe & PCI_USE_PIRQ_MASK)) || ((1 << irq) & mask)) ) {
|
|
- DBG(" -> got IRQ %d\n", irq);
|
|
- msg = "Found";
|
|
+ msg = "hardcoded";
|
|
+ } else if (r->get && (irq = r->get(pirq_router_dev, dev, pirq)) && \
|
|
+ ((!(pci_probe & PCI_USE_PIRQ_MASK)) || ((1 << irq) & mask))) {
|
|
+ msg = "found";
|
|
eisa_set_level_irq(irq);
|
|
- } else if (newirq && r->set && (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) {
|
|
- DBG(" -> assigning IRQ %d", newirq);
|
|
+ } else if (newirq && r->set &&
|
|
+ (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) {
|
|
if (r->set(pirq_router_dev, dev, pirq, newirq)) {
|
|
eisa_set_level_irq(newirq);
|
|
- DBG(" ... OK\n");
|
|
- msg = "Assigned";
|
|
+ msg = "assigned";
|
|
irq = newirq;
|
|
}
|
|
}
|
|
|
|
if (!irq) {
|
|
- DBG(" ... failed\n");
|
|
if (newirq && mask == (1 << newirq)) {
|
|
- msg = "Guessed";
|
|
+ msg = "guessed";
|
|
irq = newirq;
|
|
- } else
|
|
+ } else {
|
|
+ dev_dbg(&dev->dev, "can't route interrupt\n");
|
|
return 0;
|
|
+ }
|
|
}
|
|
- printk(KERN_INFO "PCI: %s IRQ %d for device %s\n", msg, irq, pci_name(dev));
|
|
+ dev_info(&dev->dev, "%s PCI INT %c -> IRQ %d\n", msg, 'A' + pin, irq);
|
|
|
|
/* Update IRQ for all devices with the same pirq value */
|
|
while ((dev2 = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev2)) != NULL) {
|
|
@@ -988,20 +989,25 @@ static int pcibios_lookup_irq(struct pci
|
|
if (!info)
|
|
continue;
|
|
if (info->irq[pin].link == pirq) {
|
|
- /* We refuse to override the dev->irq information. Give a warning! */
|
|
- if ( dev2->irq && dev2->irq != irq && \
|
|
+ /*
|
|
+ * We refuse to override the dev->irq
|
|
+ * information. Give a warning!
|
|
+ */
|
|
+ if (dev2->irq && dev2->irq != irq && \
|
|
(!(pci_probe & PCI_USE_PIRQ_MASK) || \
|
|
- ((1 << dev2->irq) & mask)) ) {
|
|
+ ((1 << dev2->irq) & mask))) {
|
|
#ifndef CONFIG_PCI_MSI
|
|
- printk(KERN_INFO "IRQ routing conflict for %s, have irq %d, want irq %d\n",
|
|
- pci_name(dev2), dev2->irq, irq);
|
|
+ dev_info(&dev2->dev, "IRQ routing conflict: "
|
|
+ "have IRQ %d, want IRQ %d\n",
|
|
+ dev2->irq, irq);
|
|
#endif
|
|
- continue;
|
|
- }
|
|
+ continue;
|
|
+ }
|
|
dev2->irq = irq;
|
|
pirq_penalty[irq]++;
|
|
if (dev != dev2)
|
|
- printk(KERN_INFO "PCI: Sharing IRQ %d with %s\n", irq, pci_name(dev2));
|
|
+ dev_info(&dev->dev, "sharing IRQ %d with %s\n",
|
|
+ irq, pci_name(dev2));
|
|
}
|
|
}
|
|
return 1;
|
|
@@ -1015,15 +1021,20 @@ static void __init pcibios_fixup_irqs(vo
|
|
DBG(KERN_DEBUG "PCI: IRQ fixup\n");
|
|
while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
|
|
/*
|
|
- * If the BIOS has set an out of range IRQ number, just ignore it.
|
|
- * Also keep track of which IRQ's are already in use.
|
|
+ * If the BIOS has set an out of range IRQ number, just
|
|
+ * ignore it. Also keep track of which IRQ's are
|
|
+ * already in use.
|
|
*/
|
|
if (dev->irq >= 16) {
|
|
- DBG(KERN_DEBUG "%s: ignoring bogus IRQ %d\n", pci_name(dev), dev->irq);
|
|
+ dev_dbg(&dev->dev, "ignoring bogus IRQ %d\n", dev->irq);
|
|
dev->irq = 0;
|
|
}
|
|
- /* If the IRQ is already assigned to a PCI device, ignore its ISA use penalty */
|
|
- if (pirq_penalty[dev->irq] >= 100 && pirq_penalty[dev->irq] < 100000)
|
|
+ /*
|
|
+ * If the IRQ is already assigned to a PCI device,
|
|
+ * ignore its ISA use penalty
|
|
+ */
|
|
+ if (pirq_penalty[dev->irq] >= 100 &&
|
|
+ pirq_penalty[dev->irq] < 100000)
|
|
pirq_penalty[dev->irq] = 0;
|
|
pirq_penalty[dev->irq]++;
|
|
}
|
|
@@ -1035,13 +1046,17 @@ static void __init pcibios_fixup_irqs(vo
|
|
/*
|
|
* Recalculate IRQ numbers if we use the I/O APIC.
|
|
*/
|
|
- if (io_apic_assign_pci_irqs)
|
|
- {
|
|
+ if (io_apic_assign_pci_irqs) {
|
|
int irq;
|
|
|
|
if (pin) {
|
|
- pin--; /* interrupt pins are numbered starting from 1 */
|
|
- irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin);
|
|
+ /*
|
|
+ * interrupt pins are numbered starting
|
|
+ * from 1
|
|
+ */
|
|
+ pin--;
|
|
+ irq = IO_APIC_get_PCI_irq_vector(dev->bus->number,
|
|
+ PCI_SLOT(dev->devfn), pin);
|
|
/*
|
|
* Busses behind bridges are typically not listed in the MP-table.
|
|
* In this case we have to look up the IRQ based on the parent bus,
|
|
@@ -1049,18 +1064,18 @@ static void __init pcibios_fixup_irqs(vo
|
|
* busses itself so we should get into this branch reliably.
|
|
*/
|
|
if (irq < 0 && dev->bus->parent) { /* go back to the bridge */
|
|
- struct pci_dev * bridge = dev->bus->self;
|
|
+ struct pci_dev *bridge = dev->bus->self;
|
|
|
|
pin = (pin + PCI_SLOT(dev->devfn)) % 4;
|
|
- irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number,
|
|
+ irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number,
|
|
PCI_SLOT(bridge->devfn), pin);
|
|
if (irq >= 0)
|
|
- printk(KERN_WARNING "PCI: using PPB %s[%c] to get irq %d\n",
|
|
- pci_name(bridge), 'A' + pin, irq);
|
|
+ dev_warn(&dev->dev, "using bridge %s INT %c to get IRQ %d\n",
|
|
+ pci_name(bridge),
|
|
+ 'A' + pin, irq);
|
|
}
|
|
if (irq >= 0) {
|
|
- printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n",
|
|
- pci_name(dev), 'A' + pin, irq);
|
|
+ dev_info(&dev->dev, "PCI->APIC IRQ transform: INT %c -> IRQ %d\n", 'A' + pin, irq);
|
|
dev->irq = irq;
|
|
}
|
|
}
|
|
@@ -1082,7 +1097,8 @@ static int __init fix_broken_hp_bios_irq
|
|
{
|
|
if (!broken_hp_bios_irq9) {
|
|
broken_hp_bios_irq9 = 1;
|
|
- printk(KERN_INFO "%s detected - fixing broken IRQ routing\n", d->ident);
|
|
+ printk(KERN_INFO "%s detected - fixing broken IRQ routing\n",
|
|
+ d->ident);
|
|
}
|
|
return 0;
|
|
}
|
|
@@ -1095,7 +1111,8 @@ static int __init fix_acer_tm360_irqrout
|
|
{
|
|
if (!acer_tm360_irqrouting) {
|
|
acer_tm360_irqrouting = 1;
|
|
- printk(KERN_INFO "%s detected - fixing broken IRQ routing\n", d->ident);
|
|
+ printk(KERN_INFO "%s detected - fixing broken IRQ routing\n",
|
|
+ d->ident);
|
|
}
|
|
return 0;
|
|
}
|
|
@@ -1107,7 +1124,8 @@ static struct dmi_system_id __initdata p
|
|
.matches = {
|
|
DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
|
|
DMI_MATCH(DMI_BIOS_VERSION, "GE.M1.03"),
|
|
- DMI_MATCH(DMI_PRODUCT_VERSION, "HP Pavilion Notebook Model GE"),
|
|
+ DMI_MATCH(DMI_PRODUCT_VERSION,
|
|
+ "HP Pavilion Notebook Model GE"),
|
|
DMI_MATCH(DMI_BOARD_VERSION, "OmniBook N32N-736"),
|
|
},
|
|
},
|
|
@@ -1122,7 +1140,7 @@ static struct dmi_system_id __initdata p
|
|
{ }
|
|
};
|
|
|
|
-static int __init pcibios_irq_init(void)
|
|
+int __init pcibios_irq_init(void)
|
|
{
|
|
DBG(KERN_DEBUG "PCI: IRQ init\n");
|
|
|
|
@@ -1142,11 +1160,14 @@ static int __init pcibios_irq_init(void)
|
|
pirq_find_router(&pirq_router);
|
|
if (pirq_table->exclusive_irqs) {
|
|
int i;
|
|
- for (i=0; i<16; i++)
|
|
+ for (i = 0; i < 16; i++)
|
|
if (!(pirq_table->exclusive_irqs & (1 << i)))
|
|
pirq_penalty[i] += 100;
|
|
}
|
|
- /* If we're using the I/O APIC, avoid using the PCI IRQ routing table */
|
|
+ /*
|
|
+ * If we're using the I/O APIC, avoid using the PCI IRQ
|
|
+ * routing table
|
|
+ */
|
|
if (io_apic_assign_pci_irqs)
|
|
pirq_table = NULL;
|
|
}
|
|
@@ -1157,9 +1178,6 @@ static int __init pcibios_irq_init(void)
|
|
return 0;
|
|
}
|
|
|
|
-subsys_initcall(pcibios_irq_init);
|
|
-
|
|
-
|
|
static void pirq_penalize_isa_irq(int irq, int active)
|
|
{
|
|
/*
|
|
@@ -1193,7 +1211,7 @@ static int pirq_enable_irq(struct pci_de
|
|
if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) {
|
|
char *msg = "";
|
|
|
|
- pin--; /* interrupt pins are numbered starting from 1 */
|
|
+ pin--; /* interrupt pins are numbered starting from 1 */
|
|
|
|
if (io_apic_assign_pci_irqs) {
|
|
int irq;
|
|
@@ -1207,35 +1225,41 @@ static int pirq_enable_irq(struct pci_de
|
|
*/
|
|
temp_dev = dev;
|
|
while (irq < 0 && dev->bus->parent) { /* go back to the bridge */
|
|
- struct pci_dev * bridge = dev->bus->self;
|
|
+ struct pci_dev *bridge = dev->bus->self;
|
|
|
|
pin = (pin + PCI_SLOT(dev->devfn)) % 4;
|
|
- irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number,
|
|
+ irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number,
|
|
PCI_SLOT(bridge->devfn), pin);
|
|
if (irq >= 0)
|
|
- printk(KERN_WARNING "PCI: using PPB %s[%c] to get irq %d\n",
|
|
- pci_name(bridge), 'A' + pin, irq);
|
|
+ dev_warn(&dev->dev, "using bridge %s "
|
|
+ "INT %c to get IRQ %d\n",
|
|
+ pci_name(bridge), 'A' + pin,
|
|
+ irq);
|
|
dev = bridge;
|
|
}
|
|
dev = temp_dev;
|
|
if (irq >= 0) {
|
|
- printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n",
|
|
- pci_name(dev), 'A' + pin, irq);
|
|
+ dev_info(&dev->dev, "PCI->APIC IRQ transform: "
|
|
+ "INT %c -> IRQ %d\n", 'A' + pin, irq);
|
|
dev->irq = irq;
|
|
return 0;
|
|
} else
|
|
- msg = " Probably buggy MP table.";
|
|
+ msg = "; probably buggy MP table";
|
|
} else if (pci_probe & PCI_BIOS_IRQ_SCAN)
|
|
msg = "";
|
|
else
|
|
- msg = " Please try using pci=biosirq.";
|
|
+ msg = "; please try using pci=biosirq";
|
|
|
|
- /* With IDE legacy devices the IRQ lookup failure is not a problem.. */
|
|
- if (dev->class >> 8 == PCI_CLASS_STORAGE_IDE && !(dev->class & 0x5))
|
|
+ /*
|
|
+ * With IDE legacy devices the IRQ lookup failure is not
|
|
+ * a problem..
|
|
+ */
|
|
+ if (dev->class >> 8 == PCI_CLASS_STORAGE_IDE &&
|
|
+ !(dev->class & 0x5))
|
|
return 0;
|
|
|
|
- printk(KERN_WARNING "PCI: No IRQ known for interrupt pin %c of device %s.%s\n",
|
|
- 'A' + pin, pci_name(dev), msg);
|
|
+ dev_warn(&dev->dev, "can't find IRQ for PCI INT %c%s\n",
|
|
+ 'A' + pin, msg);
|
|
}
|
|
return 0;
|
|
}
|
|
--- head-2011-03-11.orig/arch/x86/vdso/Makefile 2011-01-31 18:01:51.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/vdso/Makefile 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -65,9 +65,7 @@ obj-$(VDSO32-y) += vdso32-syms.lds
|
|
vdso32.so-$(VDSO32-y) += int80
|
|
vdso32.so-$(CONFIG_COMPAT) += syscall
|
|
vdso32.so-$(VDSO32-y) += sysenter
|
|
-xen-vdso32-$(subst 1,$(CONFIG_COMPAT),$(shell expr $(CONFIG_XEN_COMPAT)0 '<' 0x0302000)) += int80
|
|
-xen-vdso32-$(CONFIG_X86_32) += syscall
|
|
-vdso32.so-$(CONFIG_XEN) += $(xen-vdso32-y)
|
|
+vdso32.so-$(CONFIG_X86_XEN) += syscall
|
|
|
|
vdso32-images = $(vdso32.so-y:%=vdso32-%.so)
|
|
|
|
--- head-2011-03-11.orig/arch/x86/vdso/vdso32.S 2011-01-31 18:01:51.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/vdso/vdso32.S 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -9,7 +9,7 @@ vdso32_int80_end:
|
|
|
|
.globl vdso32_syscall_start, vdso32_syscall_end
|
|
vdso32_syscall_start:
|
|
-#ifdef CONFIG_COMPAT
|
|
+#if defined(CONFIG_COMPAT) || defined(CONFIG_X86_XEN)
|
|
.incbin "arch/x86/vdso/vdso32-syscall.so"
|
|
#endif
|
|
vdso32_syscall_end:
|
|
@@ -19,16 +19,4 @@ vdso32_sysenter_start:
|
|
.incbin "arch/x86/vdso/vdso32-sysenter.so"
|
|
vdso32_sysenter_end:
|
|
|
|
-#if defined(CONFIG_X86_64_XEN) && CONFIG_XEN_COMPAT < 0x030200
|
|
- .globl vdso32_int80_start, vdso32_int80_end
|
|
-vdso32_int80_start:
|
|
- .incbin "arch/x86/vdso/vdso32-int80.so"
|
|
-vdso32_int80_end:
|
|
-#elif defined(CONFIG_X86_XEN)
|
|
- .globl vdso32_syscall_start, vdso32_syscall_end
|
|
-vdso32_syscall_start:
|
|
- .incbin "arch/x86/vdso/vdso32-syscall.so"
|
|
-vdso32_syscall_end:
|
|
-#endif
|
|
-
|
|
__FINIT
|
|
--- head-2011-03-11.orig/arch/x86/vdso/vdso32-setup-xen.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/vdso/vdso32-setup-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -195,50 +195,28 @@ static __init void relocate_vdso(Elf32_E
|
|
}
|
|
}
|
|
|
|
-/*
|
|
- * These symbols are defined by vdso32.S to mark the bounds
|
|
- * of the ELF DSO images included therein.
|
|
- */
|
|
-extern const char vdso32_default_start, vdso32_default_end;
|
|
-extern const char vdso32_sysenter_start, vdso32_sysenter_end;
|
|
static struct page *vdso32_pages[1];
|
|
|
|
#ifdef CONFIG_X86_64
|
|
|
|
-#if CONFIG_XEN_COMPAT < 0x030200
|
|
-static int use_int80 = 1;
|
|
-#endif
|
|
-static int use_sysenter __read_mostly = -1;
|
|
-
|
|
-#define vdso32_sysenter() (use_sysenter > 0)
|
|
+#define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SYSENTER32))
|
|
+#define vdso32_syscall() (boot_cpu_has(X86_FEATURE_SYSCALL32))
|
|
|
|
-/* May not be __init: called during resume */
|
|
-void syscall32_cpu_init(void)
|
|
+void __cpuinit syscall32_cpu_init(void)
|
|
{
|
|
- static const struct callback_register cstar = {
|
|
+ static const struct callback_register __cpuinitconst cstar = {
|
|
.type = CALLBACKTYPE_syscall32,
|
|
.address = (unsigned long)ia32_cstar_target
|
|
};
|
|
- static const struct callback_register sysenter = {
|
|
+ static const struct callback_register __cpuinitconst sysenter = {
|
|
.type = CALLBACKTYPE_sysenter,
|
|
.address = (unsigned long)ia32_sysenter_target
|
|
};
|
|
|
|
- if ((HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter) < 0) ||
|
|
- (HYPERVISOR_callback_op(CALLBACKOP_register, &cstar) < 0))
|
|
-#if CONFIG_XEN_COMPAT < 0x030200
|
|
- return;
|
|
- use_int80 = 0;
|
|
-#else
|
|
- BUG();
|
|
-#endif
|
|
-
|
|
- if (use_sysenter < 0) {
|
|
- if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
|
|
- use_sysenter = 1;
|
|
- if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR)
|
|
- use_sysenter = 1;
|
|
- }
|
|
+ if (HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter) < 0)
|
|
+ setup_clear_cpu_cap(X86_FEATURE_SYSENTER32);
|
|
+ if (HYPERVISOR_callback_op(CALLBACKOP_register, &cstar) < 0)
|
|
+ setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
|
|
}
|
|
|
|
#define compat_uses_vma 1
|
|
@@ -250,6 +228,7 @@ static inline void map_compat_vdso(int m
|
|
#else /* CONFIG_X86_32 */
|
|
|
|
#define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SEP))
|
|
+#define vdso32_syscall() (boot_cpu_has(X86_FEATURE_SYSCALL32))
|
|
|
|
extern asmlinkage void ia32pv_cstar_target(void);
|
|
static const struct callback_register __cpuinitconst cstar = {
|
|
@@ -265,13 +244,13 @@ void __cpuinit enable_sep_cpu(void)
|
|
.address = { __KERNEL_CS, (unsigned long)ia32pv_sysenter_target },
|
|
};
|
|
|
|
- if (boot_cpu_has(X86_FEATURE_SYSCALL)) {
|
|
+ if (vdso32_syscall()) {
|
|
if (HYPERVISOR_callback_op(CALLBACKOP_register, &cstar) != 0)
|
|
BUG();
|
|
return;
|
|
}
|
|
|
|
- if (!boot_cpu_has(X86_FEATURE_SEP))
|
|
+ if (!vdso32_sysenter())
|
|
return;
|
|
|
|
if (xen_feature(XENFEAT_supervisor_mode_kernel))
|
|
@@ -341,34 +320,26 @@ int __init sysenter_setup(void)
|
|
|
|
#ifdef CONFIG_X86_32
|
|
gate_vma_init();
|
|
-#endif
|
|
|
|
-#if defined(CONFIG_X86_64) && CONFIG_XEN_COMPAT < 0x030200
|
|
- if (use_int80) {
|
|
- extern const char vdso32_int80_start, vdso32_int80_end;
|
|
-
|
|
- vsyscall = &vdso32_int80_start;
|
|
- vsyscall_len = &vdso32_int80_end - &vdso32_int80_start;
|
|
- } else
|
|
-#elif defined(CONFIG_X86_32)
|
|
- if (boot_cpu_has(X86_FEATURE_SYSCALL)
|
|
- && (boot_cpu_data.x86_vendor != X86_VENDOR_AMD
|
|
- || HYPERVISOR_callback_op(CALLBACKOP_register, &cstar) != 0))
|
|
- setup_clear_cpu_cap(X86_FEATURE_SYSCALL);
|
|
- barrier(); /* until clear_bit()'s constraints are correct ... */
|
|
if (boot_cpu_has(X86_FEATURE_SYSCALL)) {
|
|
- extern const char vdso32_syscall_start, vdso32_syscall_end;
|
|
-
|
|
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD
|
|
+ && HYPERVISOR_callback_op(CALLBACKOP_register, &cstar) == 0)
|
|
+ setup_force_cpu_cap(X86_FEATURE_SYSCALL32);
|
|
+ else {
|
|
+ setup_clear_cpu_cap(X86_FEATURE_SYSCALL);
|
|
+ setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
|
|
+ }
|
|
+ }
|
|
+#endif
|
|
+ if (vdso32_syscall()) {
|
|
vsyscall = &vdso32_syscall_start;
|
|
vsyscall_len = &vdso32_syscall_end - &vdso32_syscall_start;
|
|
- } else
|
|
-#endif
|
|
- if (!vdso32_sysenter()) {
|
|
- vsyscall = &vdso32_default_start;
|
|
- vsyscall_len = &vdso32_default_end - &vdso32_default_start;
|
|
- } else {
|
|
+ } else if (vdso32_sysenter()){
|
|
vsyscall = &vdso32_sysenter_start;
|
|
vsyscall_len = &vdso32_sysenter_end - &vdso32_sysenter_start;
|
|
+ } else {
|
|
+ vsyscall = &vdso32_int80_start;
|
|
+ vsyscall_len = &vdso32_int80_end - &vdso32_int80_start;
|
|
}
|
|
|
|
memcpy(syscall_page, vsyscall, vsyscall_len);
|
|
--- head-2011-03-11.orig/arch/x86/xen/Kconfig 2011-01-31 17:49:31.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/xen/Kconfig 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -31,14 +31,14 @@ config XEN_PVHVM
|
|
config XEN_MAX_DOMAIN_MEMORY
|
|
int
|
|
default 128
|
|
- depends on XEN
|
|
+ depends on PARAVIRT_XEN
|
|
help
|
|
This only affects the sizing of some bss arrays, the unused
|
|
portions of which are freed.
|
|
|
|
config XEN_SAVE_RESTORE
|
|
bool
|
|
- depends on XEN && PM
|
|
+ depends on PARAVIRT_XEN && PM
|
|
default y
|
|
|
|
config XEN_DEBUG_FS
|
|
--- head-2011-03-11.orig/drivers/acpi/processor_driver.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/drivers/acpi/processor_driver.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -512,10 +512,12 @@ static int __cpuinit acpi_processor_add(
|
|
per_cpu(processors, pr->id) = pr;
|
|
#endif
|
|
|
|
- sysdev = get_cpu_sysdev(pr->id);
|
|
- if (sysfs_create_link(&device->dev.kobj, &sysdev->kobj, "sysdev")) {
|
|
- result = -EFAULT;
|
|
- goto err_free_cpumask;
|
|
+ if (pr->id != -1) {
|
|
+ sysdev = get_cpu_sysdev(pr->id);
|
|
+ if (sysfs_create_link(&device->dev.kobj, &sysdev->kobj, "sysdev")) {
|
|
+ result = -EFAULT;
|
|
+ goto err_free_cpumask;
|
|
+ }
|
|
}
|
|
|
|
#if defined(CONFIG_CPU_FREQ) || defined(CONFIG_PROCESSOR_EXTERNAL_CONTROL)
|
|
@@ -599,7 +601,8 @@ static int acpi_processor_remove(struct
|
|
|
|
acpi_processor_power_exit(pr, device);
|
|
|
|
- sysfs_remove_link(&device->dev.kobj, "sysdev");
|
|
+ if (pr->id != -1)
|
|
+ sysfs_remove_link(&device->dev.kobj, "sysdev");
|
|
|
|
if (pr->cdev) {
|
|
sysfs_remove_link(&device->dev.kobj, "thermal_cooling");
|
|
--- head-2011-03-11.orig/drivers/acpi/processor_perflib.c 2011-01-31 17:02:29.000000000 +0100
|
|
+++ head-2011-03-11/drivers/acpi/processor_perflib.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -187,6 +187,12 @@ int acpi_processor_ppc_has_changed(struc
|
|
{
|
|
int ret;
|
|
|
|
+#ifdef CONFIG_PROCESSOR_EXTERNAL_CONTROL
|
|
+ /* Xen hypervisor can handle cpufreq _PPC event */
|
|
+ if (ignore_ppc < 0 && processor_pmperf_external())
|
|
+ ignore_ppc = 0;
|
|
+#endif
|
|
+
|
|
if (ignore_ppc) {
|
|
/*
|
|
* Only when it is notification event, the _OST object
|
|
--- head-2011-03-11.orig/drivers/char/tpm/tpm_vtpm.c 2011-01-31 14:53:38.000000000 +0100
|
|
+++ head-2011-03-11/drivers/char/tpm/tpm_vtpm.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -347,7 +347,7 @@ static int _vtpm_send_queued(struct tpm_
|
|
{
|
|
int rc;
|
|
int error = 0;
|
|
- long flags;
|
|
+ unsigned long flags;
|
|
unsigned char buffer[1];
|
|
struct vtpm_state *vtpms;
|
|
vtpms = (struct vtpm_state *)chip_get_private(chip);
|
|
--- head-2011-03-11.orig/drivers/dma/ioat/dma.h 2011-01-31 17:56:27.000000000 +0100
|
|
+++ head-2011-03-11/drivers/dma/ioat/dma.h 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -363,6 +363,7 @@ __ioat_dca_init(struct pci_dev *pdev, vo
|
|
}
|
|
#define ioat_dca_init __ioat_dca_init
|
|
#define ioat2_dca_init __ioat_dca_init
|
|
+#define ioat3_dca_init __ioat_dca_init
|
|
#endif
|
|
|
|
#endif /* IOATDMA_H */
|
|
--- head-2011-03-11.orig/drivers/hwmon/coretemp-xen.c 2011-01-31 18:01:51.000000000 +0100
|
|
+++ head-2011-03-11/drivers/hwmon/coretemp-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -360,10 +360,11 @@ static int coretemp_device_add(unsigned
|
|
if (err)
|
|
goto exit_entry_free;
|
|
|
|
- /* check if family 6, models 0xe, 0xf, 0x16, 0x17 */
|
|
+ /* check if family 6, models 0xe, 0xf, 0x16, 0x17, 0x1A */
|
|
if (info.x86 != 0x6 ||
|
|
!((pdev_entry->x86_model == 0xe) || (pdev_entry->x86_model == 0xf) ||
|
|
- (pdev_entry->x86_model == 0x16) || (pdev_entry->x86_model == 0x17))) {
|
|
+ (pdev_entry->x86_model == 0x16) || (pdev_entry->x86_model == 0x17) ||
|
|
+ (pdev_entry->x86_model == 0x1A))) {
|
|
|
|
/* supported CPU not found, but report the unknown
|
|
family 6 CPU */
|
|
--- head-2011-03-11.orig/drivers/pci/msi-xen.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/drivers/pci/msi-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -53,12 +53,10 @@ arch_msi_check_device(struct pci_dev *de
|
|
return 0;
|
|
}
|
|
|
|
-static void msi_set_enable(struct pci_dev *dev, int enable)
|
|
+static void __msi_set_enable(struct pci_dev *dev, int pos, int enable)
|
|
{
|
|
- int pos;
|
|
u16 control;
|
|
|
|
- pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
|
|
if (pos) {
|
|
pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &control);
|
|
control &= ~PCI_MSI_FLAGS_ENABLE;
|
|
@@ -68,6 +66,11 @@ static void msi_set_enable(struct pci_de
|
|
}
|
|
}
|
|
|
|
+static void msi_set_enable(struct pci_dev *dev, int enable)
|
|
+{
|
|
+ __msi_set_enable(dev, pci_find_capability(dev, PCI_CAP_ID_MSI), enable);
|
|
+}
|
|
+
|
|
static void msix_set_enable(struct pci_dev *dev, int enable)
|
|
{
|
|
int pos;
|
|
@@ -180,8 +183,7 @@ static int msi_get_dev_owner(struct pci_
|
|
|
|
BUG_ON(!is_initial_xendomain());
|
|
if (get_owner && (owner = get_owner(dev)) >= 0) {
|
|
- printk(KERN_INFO "get owner for dev %x get %x \n",
|
|
- dev->devfn, owner);
|
|
+ dev_info(&dev->dev, "get owner: %x \n", owner);
|
|
return owner;
|
|
}
|
|
|
|
@@ -201,7 +203,7 @@ static int msi_unmap_pirq(struct pci_dev
|
|
? pirq : evtchn_get_xen_pirq(pirq);
|
|
|
|
if ((rc = HYPERVISOR_physdev_op(PHYSDEVOP_unmap_pirq, &unmap)))
|
|
- printk(KERN_WARNING "unmap irq %x failed\n", pirq);
|
|
+ dev_warn(&dev->dev, "unmap irq %d failed\n", pirq);
|
|
|
|
if (rc < 0)
|
|
return rc;
|
|
@@ -249,7 +251,7 @@ static int msi_map_vector(struct pci_dev
|
|
map_irq.table_base = table_base;
|
|
|
|
if ((rc = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, &map_irq)))
|
|
- printk(KERN_WARNING "map irq failed\n");
|
|
+ dev_warn(&dev->dev, "map irq failed\n");
|
|
|
|
if (rc < 0)
|
|
return rc;
|
|
@@ -360,10 +362,9 @@ static int msix_capability_init(struct p
|
|
mapped = 0;
|
|
list_for_each_entry(pirq_entry, &msi_dev_entry->pirq_list_head, list) {
|
|
if (pirq_entry->entry_nr == entries[i].entry) {
|
|
- printk(KERN_WARNING "msix entry %d for dev %02x:%02x:%01x are \
|
|
- not freed before acquire again.\n", entries[i].entry,
|
|
- dev->bus->number, PCI_SLOT(dev->devfn),
|
|
- PCI_FUNC(dev->devfn));
|
|
+ dev_warn(&dev->dev,
|
|
+ "msix entry %d was not freed\n",
|
|
+ entries[i].entry);
|
|
(entries + i)->vector = pirq_entry->pirq;
|
|
mapped = 1;
|
|
break;
|
|
@@ -489,9 +490,8 @@ int pci_enable_msi(struct pci_dev* dev)
|
|
|
|
/* Check whether driver already requested for MSI-X irqs */
|
|
if (dev->msix_enabled) {
|
|
- printk(KERN_INFO "PCI: %s: Can't enable MSI. "
|
|
- "Device already has MSI-X enabled\n",
|
|
- pci_name(dev));
|
|
+ dev_info(&dev->dev, "can't enable MSI "
|
|
+ "(MSI-X already enabled)\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
@@ -573,7 +573,8 @@ int pci_enable_msix(struct pci_dev* dev,
|
|
temp = dev->irq;
|
|
ret = pci_frontend_enable_msix(dev, entries, nvec);
|
|
if (ret) {
|
|
- printk("get %x from pci_frontend_enable_msix\n", ret);
|
|
+ dev_warn(&dev->dev,
|
|
+ "got %x from frontend_enable_msix\n", ret);
|
|
return ret;
|
|
}
|
|
dev->msix_enabled = 1;
|
|
@@ -624,9 +625,8 @@ int pci_enable_msix(struct pci_dev* dev,
|
|
temp = dev->irq;
|
|
/* Check whether driver already requested for MSI vector */
|
|
if (dev->msi_enabled) {
|
|
- printk(KERN_INFO "PCI: %s: Can't enable MSI-X. "
|
|
- "Device already has an MSI irq assigned\n",
|
|
- pci_name(dev));
|
|
+ dev_info(&dev->dev, "can't enable MSI-X "
|
|
+ "(MSI IRQ already assigned)\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
--- head-2011-03-11.orig/drivers/xen/Makefile 2011-02-28 15:13:33.000000000 +0100
|
|
+++ head-2011-03-11/drivers/xen/Makefile 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -1,4 +1,4 @@
|
|
-obj-$(CONFIG_PARAVIRT_XEN) += grant-table.o features.o events.o
|
|
+obj-$(CONFIG_PARAVIRT_XEN) += grant-table.o features.o events.o manage.o
|
|
xen-balloon-$(CONFIG_PARAVIRT_XEN) := balloon.o
|
|
|
|
xen-balloon-$(CONFIG_XEN) := balloon/
|
|
--- head-2011-03-11.orig/drivers/xen/balloon/balloon.c 2011-01-31 17:56:27.000000000 +0100
|
|
+++ head-2011-03-11/drivers/xen/balloon/balloon.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -82,7 +82,7 @@ struct balloon_stats balloon_stats;
|
|
/* We increase/decrease in batches which fit in a page */
|
|
static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
|
|
|
|
-#if !defined(MODULE) && defined(CONFIG_HIGHMEM)
|
|
+#ifdef CONFIG_HIGHMEM
|
|
#define inc_totalhigh_pages() (totalhigh_pages++)
|
|
#define dec_totalhigh_pages() (totalhigh_pages--)
|
|
#else
|
|
--- head-2011-03-11.orig/drivers/xen/balloon/sysfs.c 2011-01-31 18:01:51.000000000 +0100
|
|
+++ head-2011-03-11/drivers/xen/balloon/sysfs.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -45,6 +45,7 @@
|
|
|
|
#define BALLOON_SHOW(name, format, args...) \
|
|
static ssize_t show_##name(struct sys_device *dev, \
|
|
+ struct sysdev_attribute *attr, \
|
|
char *buf) \
|
|
{ \
|
|
return sprintf(buf, format, ##args); \
|
|
@@ -56,14 +57,15 @@ BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(b
|
|
BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(bs.balloon_high));
|
|
BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(bs.driver_pages));
|
|
|
|
-static ssize_t show_target_kb(struct sys_device *dev, char *buf)
|
|
+static ssize_t show_target_kb(struct sys_device *dev,
|
|
+ struct sysdev_attribute *attr, char *buf)
|
|
{
|
|
return sprintf(buf, "%lu\n", PAGES2KB(bs.target_pages));
|
|
}
|
|
|
|
static ssize_t store_target_kb(struct sys_device *dev,
|
|
- const char *buf,
|
|
- size_t count)
|
|
+ struct sysdev_attribute *attr,
|
|
+ const char *buf, size_t count)
|
|
{
|
|
char memstring[64], *endchar;
|
|
unsigned long long target_bytes;
|
|
--- head-2011-03-11.orig/drivers/xen/blktap/blktap.c 2011-02-17 10:11:08.000000000 +0100
|
|
+++ head-2011-03-11/drivers/xen/blktap/blktap.c 2011-02-17 10:11:18.000000000 +0100
|
|
@@ -54,6 +54,7 @@
|
|
#include <linux/gfp.h>
|
|
#include <linux/poll.h>
|
|
#include <linux/delay.h>
|
|
+#include <linux/nsproxy.h>
|
|
#include <asm/tlbflush.h>
|
|
|
|
#define MAX_TAP_DEV 256 /*the maximum number of tapdisk ring devices */
|
|
@@ -502,7 +503,7 @@ found:
|
|
|
|
if ((class = get_xen_class()) != NULL)
|
|
device_create(class, NULL, MKDEV(blktap_major, minor),
|
|
- "blktap%d", minor);
|
|
+ NULL, "blktap%d", minor);
|
|
}
|
|
|
|
out:
|
|
@@ -1743,7 +1744,8 @@ static int __init blkif_init(void)
|
|
* We only create the device when a request of a new device is
|
|
* made.
|
|
*/
|
|
- device_create(class, NULL, MKDEV(blktap_major, 0), "blktap0");
|
|
+ device_create(class, NULL, MKDEV(blktap_major, 0), NULL,
|
|
+ "blktap0");
|
|
} else {
|
|
/* this is bad, but not fatal */
|
|
WPRINTK("blktap: sysfs xen_class not created\n");
|
|
--- head-2011-03-11.orig/drivers/xen/blktap2/device.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/drivers/xen/blktap2/device.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -3,6 +3,7 @@
|
|
#include <linux/cdrom.h>
|
|
#include <linux/hdreg.h>
|
|
#include <linux/module.h>
|
|
+#include <linux/version.h>
|
|
#include <asm/tlbflush.h>
|
|
|
|
#include <scsi/scsi.h>
|
|
--- head-2011-03-11.orig/drivers/xen/blktap2/sysfs.c 2011-03-11 10:58:58.000000000 +0100
|
|
+++ head-2011-03-11/drivers/xen/blktap2/sysfs.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -307,8 +307,8 @@ blktap_sysfs_create(struct blktap *tap)
|
|
|
|
ring = &tap->ring;
|
|
|
|
- dev = device_create_drvdata(class, NULL, ring->devno, tap,
|
|
- "blktap%d", tap->minor);
|
|
+ dev = device_create(class, NULL, ring->devno, tap,
|
|
+ "blktap%d", tap->minor);
|
|
if (IS_ERR(dev))
|
|
return PTR_ERR(dev);
|
|
|
|
--- head-2011-03-11.orig/drivers/xen/char/mem.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/drivers/xen/char/mem.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -35,7 +35,7 @@ static inline int uncached_access(struct
|
|
|
|
static inline int range_is_allowed(unsigned long pfn, unsigned long size)
|
|
{
|
|
-#ifdef CONFIG_NONPROMISC_DEVMEM
|
|
+#ifdef CONFIG_STRICT_DEVMEM
|
|
u64 from = ((u64)pfn) << PAGE_SHIFT;
|
|
u64 to = from + size;
|
|
u64 cursor = from;
|
|
@@ -172,7 +172,10 @@ static void mmap_mem_close(struct vm_are
|
|
|
|
static struct vm_operations_struct mmap_mem_ops = {
|
|
.open = mmap_mem_open,
|
|
- .close = mmap_mem_close
|
|
+ .close = mmap_mem_close,
|
|
+#ifdef CONFIG_HAVE_IOREMAP_PROT
|
|
+ .access = generic_access_phys
|
|
+#endif
|
|
};
|
|
|
|
static int xen_mmap_mem(struct file * file, struct vm_area_struct * vma)
|
|
--- head-2011-03-11.orig/drivers/xen/console/console.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/drivers/xen/console/console.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -431,9 +431,7 @@ static void __xencons_tx_flush(void)
|
|
|
|
if (work_done && (xencons_tty != NULL)) {
|
|
wake_up_interruptible(&xencons_tty->write_wait);
|
|
- if ((xencons_tty->flags & (1 << TTY_DO_WRITE_WAKEUP)) &&
|
|
- (xencons_tty->ldisc.write_wakeup != NULL))
|
|
- (xencons_tty->ldisc.write_wakeup)(xencons_tty);
|
|
+ tty_wakeup(xencons_tty);
|
|
}
|
|
}
|
|
|
|
@@ -634,8 +632,8 @@ static void xencons_close(struct tty_str
|
|
tty->closing = 1;
|
|
tty_wait_until_sent(tty, 0);
|
|
tty_driver_flush_buffer(tty);
|
|
- if (tty->ldisc.flush_buffer != NULL)
|
|
- tty->ldisc.flush_buffer(tty);
|
|
+ if (tty->ldisc.ops->flush_buffer != NULL)
|
|
+ tty->ldisc.ops->flush_buffer(tty);
|
|
tty->closing = 0;
|
|
spin_lock_irqsave(&xencons_lock, flags);
|
|
xencons_tty = NULL;
|
|
--- head-2011-03-11.orig/drivers/xen/core/evtchn.c 2011-01-31 18:01:51.000000000 +0100
|
|
+++ head-2011-03-11/drivers/xen/core/evtchn.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -126,7 +126,11 @@ static int irq_bindcount[NR_IRQS];
|
|
|
|
#ifdef CONFIG_SMP
|
|
|
|
+#if CONFIG_NR_CPUS <= 256
|
|
static u8 cpu_evtchn[NR_EVENT_CHANNELS];
|
|
+#else
|
|
+static u16 cpu_evtchn[NR_EVENT_CHANNELS];
|
|
+#endif
|
|
static unsigned long cpu_evtchn_mask[NR_CPUS][NR_EVENT_CHANNELS/BITS_PER_LONG];
|
|
|
|
static inline unsigned long active_evtchns(unsigned int cpu, shared_info_t *sh,
|
|
@@ -767,8 +771,9 @@ static struct irq_chip dynirq_chip = {
|
|
};
|
|
|
|
/* Bitmap indicating which PIRQs require Xen to be notified on unmask. */
|
|
-static int pirq_eoi_does_unmask;
|
|
+static bool pirq_eoi_does_unmask;
|
|
static unsigned long *pirq_needs_eoi;
|
|
+static DECLARE_BITMAP(probing_pirq, NR_PIRQS);
|
|
|
|
static void pirq_unmask_and_notify(unsigned int evtchn, unsigned int irq)
|
|
{
|
|
@@ -815,25 +820,31 @@ static inline void pirq_query_unmask(int
|
|
set_bit(irq - PIRQ_BASE, pirq_needs_eoi);
|
|
}
|
|
|
|
-/*
|
|
- * On startup, if there is no action associated with the IRQ then we are
|
|
- * probing. In this case we should not share with others as it will confuse us.
|
|
- */
|
|
-#define probing_irq(_irq) (irq_desc[(_irq)].action == NULL)
|
|
+static int set_type_pirq(unsigned int irq, unsigned int type)
|
|
+{
|
|
+ if (type != IRQ_TYPE_PROBE)
|
|
+ return -EINVAL;
|
|
+ set_bit(irq - PIRQ_BASE, probing_pirq);
|
|
+ return 0;
|
|
+}
|
|
|
|
static void enable_pirq(unsigned int irq)
|
|
{
|
|
struct evtchn_bind_pirq bind_pirq;
|
|
int evtchn = evtchn_from_irq(irq);
|
|
|
|
- if (VALID_EVTCHN(evtchn))
|
|
+ if (VALID_EVTCHN(evtchn)) {
|
|
+ clear_bit(irq - PIRQ_BASE, probing_pirq);
|
|
goto out;
|
|
+ }
|
|
|
|
bind_pirq.pirq = evtchn_get_xen_pirq(irq);
|
|
/* NB. We are happy to share unless we are probing. */
|
|
- bind_pirq.flags = probing_irq(irq) ? 0 : BIND_PIRQ__WILL_SHARE;
|
|
+ bind_pirq.flags = test_and_clear_bit(irq - PIRQ_BASE, probing_pirq)
|
|
+ || (irq_desc[irq].status & IRQ_AUTODETECT)
|
|
+ ? 0 : BIND_PIRQ__WILL_SHARE;
|
|
if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq) != 0) {
|
|
- if (!probing_irq(irq))
|
|
+ if (bind_pirq.flags)
|
|
pr_info("Failed to obtain physical IRQ %d\n", irq);
|
|
return;
|
|
}
|
|
@@ -910,6 +921,7 @@ static struct irq_chip pirq_chip = {
|
|
.ack = ack_pirq,
|
|
.end = end_pirq,
|
|
.eoi = end_pirq,
|
|
+ .set_type = set_type_pirq,
|
|
#ifdef CONFIG_SMP
|
|
.set_affinity = set_affinity_irq,
|
|
#endif
|
|
@@ -985,6 +997,7 @@ void disable_all_local_evtchn(void)
|
|
synch_set_bit(i, &s->evtchn_mask[0]);
|
|
}
|
|
|
|
+#ifdef CONFIG_PM_SLEEP
|
|
static void restore_cpu_virqs(unsigned int cpu)
|
|
{
|
|
struct evtchn_bind_virq bind_virq;
|
|
@@ -1077,6 +1090,7 @@ void irq_resume(void)
|
|
}
|
|
|
|
}
|
|
+#endif
|
|
|
|
#if defined(CONFIG_X86_IO_APIC)
|
|
#define identity_mapped_irq(irq) (!IO_APIC_IRQ((irq) - PIRQ_BASE))
|
|
@@ -1159,7 +1173,7 @@ void __init xen_init_IRQ(void)
|
|
* BITS_TO_LONGS(ALIGN(NR_PIRQS, PAGE_SIZE * 8)));
|
|
eoi_gmfn.gmfn = virt_to_machine(pirq_needs_eoi) >> PAGE_SHIFT;
|
|
if (HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn, &eoi_gmfn) == 0)
|
|
- pirq_eoi_does_unmask = 1;
|
|
+ pirq_eoi_does_unmask = true;
|
|
|
|
/* No event channels are 'live' right now. */
|
|
for (i = 0; i < NR_EVENT_CHANNELS; i++)
|
|
--- head-2011-03-11.orig/drivers/xen/core/gnttab.c 2011-01-31 17:56:27.000000000 +0100
|
|
+++ head-2011-03-11/drivers/xen/core/gnttab.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -448,6 +448,7 @@ static int map_pte_fn(pte_t *pte, struct
|
|
return 0;
|
|
}
|
|
|
|
+#ifdef CONFIG_PM_SLEEP
|
|
static int unmap_pte_fn(pte_t *pte, struct page *pmd_page,
|
|
unsigned long addr, void *data)
|
|
{
|
|
@@ -455,6 +456,7 @@ static int unmap_pte_fn(pte_t *pte, stru
|
|
set_pte_at(&init_mm, addr, pte, __pte(0));
|
|
return 0;
|
|
}
|
|
+#endif
|
|
|
|
void *arch_gnttab_alloc_shared(unsigned long *frames)
|
|
{
|
|
@@ -635,6 +637,75 @@ void __gnttab_dma_map_page(struct page *
|
|
} while (unlikely(read_seqretry(&gnttab_dma_lock, seq)));
|
|
}
|
|
|
|
+#ifdef __HAVE_ARCH_PTE_SPECIAL
|
|
+
|
|
+static unsigned int GNTMAP_pte_special;
|
|
+
|
|
+bool gnttab_pre_map_adjust(unsigned int cmd, struct gnttab_map_grant_ref *map,
|
|
+ unsigned int count)
|
|
+{
|
|
+ unsigned int i;
|
|
+
|
|
+ if (unlikely(cmd != GNTTABOP_map_grant_ref))
|
|
+ count = 0;
|
|
+
|
|
+ for (i = 0; i < count; ++i, ++map) {
|
|
+ if (!(map->flags & GNTMAP_host_map)
|
|
+ || !(map->flags & GNTMAP_application_map))
|
|
+ continue;
|
|
+ if (GNTMAP_pte_special)
|
|
+ map->flags |= GNTMAP_pte_special;
|
|
+ else {
|
|
+ BUG_ON(xen_feature(XENFEAT_auto_translated_physmap));
|
|
+ return true;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return false;
|
|
+}
|
|
+EXPORT_SYMBOL(gnttab_pre_map_adjust);
|
|
+
|
|
+#if CONFIG_XEN_COMPAT < 0x030400
|
|
+int gnttab_post_map_adjust(const struct gnttab_map_grant_ref *map, unsigned int count)
|
|
+{
|
|
+ unsigned int i;
|
|
+ int rc = 0;
|
|
+
|
|
+ for (i = 0; i < count && rc == 0; ++i, ++map) {
|
|
+ pte_t pte;
|
|
+
|
|
+ if (!(map->flags & GNTMAP_host_map)
|
|
+ || !(map->flags & GNTMAP_application_map))
|
|
+ continue;
|
|
+
|
|
+#ifdef CONFIG_X86
|
|
+ pte = __pte_ma((map->dev_bus_addr | _PAGE_PRESENT | _PAGE_USER
|
|
+ | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_NX
|
|
+ | _PAGE_SPECIAL)
|
|
+ & __supported_pte_mask);
|
|
+#else
|
|
+#error Architecture not yet supported.
|
|
+#endif
|
|
+ if (!(map->flags & GNTMAP_readonly))
|
|
+ pte = pte_mkwrite(pte);
|
|
+
|
|
+ if (map->flags & GNTMAP_contains_pte) {
|
|
+ mmu_update_t u;
|
|
+
|
|
+ u.ptr = map->host_addr;
|
|
+ u.val = __pte_val(pte);
|
|
+ rc = HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF);
|
|
+ } else
|
|
+ rc = HYPERVISOR_update_va_mapping(map->host_addr, pte, 0);
|
|
+ }
|
|
+
|
|
+ return rc;
|
|
+}
|
|
+EXPORT_SYMBOL(gnttab_post_map_adjust);
|
|
+#endif
|
|
+
|
|
+#endif /* __HAVE_ARCH_PTE_SPECIAL */
|
|
+
|
|
int gnttab_resume(void)
|
|
{
|
|
if (max_nr_grant_frames() < nr_grant_frames)
|
|
@@ -642,6 +713,7 @@ int gnttab_resume(void)
|
|
return gnttab_map(0, nr_grant_frames - 1);
|
|
}
|
|
|
|
+#ifdef CONFIG_PM_SLEEP
|
|
int gnttab_suspend(void)
|
|
{
|
|
#ifdef CONFIG_X86
|
|
@@ -651,6 +723,7 @@ int gnttab_suspend(void)
|
|
#endif
|
|
return 0;
|
|
}
|
|
+#endif
|
|
|
|
#else /* !CONFIG_XEN */
|
|
|
|
@@ -761,6 +834,18 @@ int __devinit gnttab_init(void)
|
|
gnttab_free_count = nr_init_grefs - NR_RESERVED_ENTRIES;
|
|
gnttab_free_head = NR_RESERVED_ENTRIES;
|
|
|
|
+#if defined(CONFIG_XEN) && defined(__HAVE_ARCH_PTE_SPECIAL)
|
|
+ if (!xen_feature(XENFEAT_auto_translated_physmap)
|
|
+ && xen_feature(XENFEAT_gnttab_map_avail_bits)) {
|
|
+#ifdef CONFIG_X86
|
|
+ GNTMAP_pte_special = (__pte_val(pte_mkspecial(__pte_ma(0)))
|
|
+ >> _PAGE_BIT_UNUSED1) << _GNTMAP_guest_avail0;
|
|
+#else
|
|
+#error Architecture not yet supported.
|
|
+#endif
|
|
+ }
|
|
+#endif
|
|
+
|
|
return 0;
|
|
|
|
ini_nomem:
|
|
--- head-2011-03-11.orig/drivers/xen/core/machine_kexec.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/drivers/xen/core/machine_kexec.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -57,8 +57,7 @@ void __init xen_machine_kexec_setup_reso
|
|
|
|
/* allocate xen_phys_cpus */
|
|
|
|
- xen_phys_cpus = alloc_bootmem_low(k * sizeof(struct resource));
|
|
- BUG_ON(xen_phys_cpus == NULL);
|
|
+ xen_phys_cpus = alloc_bootmem(k * sizeof(struct resource));
|
|
|
|
/* fill in xen_phys_cpus with per-cpu crash note information */
|
|
|
|
@@ -91,7 +90,7 @@ void __init xen_machine_kexec_setup_reso
|
|
xen_hypervisor_res.start = range.start;
|
|
xen_hypervisor_res.end = range.start + range.size - 1;
|
|
xen_hypervisor_res.flags = IORESOURCE_BUSY | IORESOURCE_MEM;
|
|
-#ifdef CONFIG_X86_64
|
|
+#ifdef CONFIG_X86
|
|
insert_resource(&iomem_resource, &xen_hypervisor_res);
|
|
#endif
|
|
|
|
@@ -106,7 +105,7 @@ void __init xen_machine_kexec_setup_reso
|
|
if (range.size) {
|
|
crashk_res.start = range.start;
|
|
crashk_res.end = range.start + range.size - 1;
|
|
-#ifdef CONFIG_X86_64
|
|
+#ifdef CONFIG_X86
|
|
insert_resource(&iomem_resource, &crashk_res);
|
|
#endif
|
|
}
|
|
@@ -141,15 +140,13 @@ void __init xen_machine_kexec_setup_reso
|
|
xen_max_nr_phys_cpus))
|
|
goto err;
|
|
|
|
-#ifdef CONFIG_X86_64
|
|
+#ifdef CONFIG_X86
|
|
for (k = 0; k < xen_max_nr_phys_cpus; k++) {
|
|
res = xen_phys_cpus + k;
|
|
if (!res->parent) /* outside of xen_hypervisor_res range */
|
|
insert_resource(&iomem_resource, res);
|
|
}
|
|
-#endif
|
|
|
|
-#ifdef CONFIG_X86
|
|
if (xen_create_contiguous_region((unsigned long)&vmcoreinfo_note,
|
|
get_order(sizeof(vmcoreinfo_note)),
|
|
BITS_PER_LONG))
|
|
@@ -168,7 +165,7 @@ void __init xen_machine_kexec_setup_reso
|
|
return;
|
|
}
|
|
|
|
-#ifndef CONFIG_X86_64
|
|
+#ifndef CONFIG_X86
|
|
void __init xen_machine_kexec_register_resources(struct resource *res)
|
|
{
|
|
int k;
|
|
--- head-2011-03-11.orig/drivers/xen/core/machine_reboot.c 2011-01-31 18:01:51.000000000 +0100
|
|
+++ head-2011-03-11/drivers/xen/core/machine_reboot.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -52,6 +52,7 @@ void machine_power_off(void)
|
|
HYPERVISOR_shutdown(SHUTDOWN_poweroff);
|
|
}
|
|
|
|
+#ifdef CONFIG_PM_SLEEP
|
|
static void pre_suspend(void)
|
|
{
|
|
HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
|
|
@@ -108,6 +109,7 @@ static void post_suspend(int suspend_can
|
|
HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
|
|
virt_to_mfn(pfn_to_mfn_frame_list_list);
|
|
}
|
|
+#endif
|
|
|
|
#else /* !(defined(__i386__) || defined(__x86_64__)) */
|
|
|
|
@@ -126,6 +128,7 @@ static void post_suspend(int suspend_can
|
|
|
|
#endif
|
|
|
|
+#ifdef CONFIG_PM_SLEEP
|
|
struct suspend {
|
|
int fast_suspend;
|
|
void (*resume_notifier)(int);
|
|
@@ -221,7 +224,8 @@ int __xen_suspend(int fast_suspend, void
|
|
|
|
if (fast_suspend) {
|
|
xenbus_suspend();
|
|
- err = stop_machine_run(take_machine_down, &suspend, 0);
|
|
+ err = stop_machine(take_machine_down, &suspend,
|
|
+ &cpumask_of_cpu(0));
|
|
if (err < 0)
|
|
xenbus_suspend_cancel();
|
|
} else {
|
|
@@ -244,3 +248,4 @@ int __xen_suspend(int fast_suspend, void
|
|
|
|
return 0;
|
|
}
|
|
+#endif
|
|
--- head-2011-03-11.orig/drivers/xen/core/reboot.c 2011-01-31 17:56:27.000000000 +0100
|
|
+++ head-2011-03-11/drivers/xen/core/reboot.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -28,17 +28,12 @@ MODULE_LICENSE("Dual BSD/GPL");
|
|
/* Ignore multiple shutdown requests. */
|
|
static int shutting_down = SHUTDOWN_INVALID;
|
|
|
|
-/* Was last suspend request cancelled? */
|
|
-static int suspend_cancelled;
|
|
-
|
|
/* Can we leave APs online when we suspend? */
|
|
static int fast_suspend;
|
|
|
|
static void __shutdown_handler(struct work_struct *unused);
|
|
static DECLARE_DELAYED_WORK(shutdown_work, __shutdown_handler);
|
|
|
|
-static int setup_suspend_evtchn(void);
|
|
-
|
|
int __xen_suspend(int fast_suspend, void (*resume_notifier)(int));
|
|
|
|
static int shutdown_process(void *__unused)
|
|
@@ -68,6 +63,13 @@ static int shutdown_process(void *__unus
|
|
return 0;
|
|
}
|
|
|
|
+#ifdef CONFIG_PM_SLEEP
|
|
+
|
|
+static int setup_suspend_evtchn(void);
|
|
+
|
|
+/* Was last suspend request cancelled? */
|
|
+static int suspend_cancelled;
|
|
+
|
|
static void xen_resume_notifier(int _suspend_cancelled)
|
|
{
|
|
int old_state = xchg(&shutting_down, SHUTDOWN_RESUMING);
|
|
@@ -117,6 +119,10 @@ static int xen_suspend(void *__unused)
|
|
return 0;
|
|
}
|
|
|
|
+#else
|
|
+# define xen_suspend NULL
|
|
+#endif
|
|
+
|
|
static void switch_shutdown_state(int new_state)
|
|
{
|
|
int prev_state, old_state = SHUTDOWN_INVALID;
|
|
@@ -193,8 +199,10 @@ static void shutdown_handler(struct xenb
|
|
new_state = SHUTDOWN_POWEROFF;
|
|
else if (strcmp(str, "reboot") == 0)
|
|
ctrl_alt_del();
|
|
+#ifdef CONFIG_PM_SLEEP
|
|
else if (strcmp(str, "suspend") == 0)
|
|
new_state = SHUTDOWN_SUSPEND;
|
|
+#endif
|
|
else if (strcmp(str, "halt") == 0)
|
|
new_state = SHUTDOWN_HALT;
|
|
else
|
|
@@ -245,6 +253,7 @@ static struct xenbus_watch sysrq_watch =
|
|
.callback = sysrq_handler
|
|
};
|
|
|
|
+#ifdef CONFIG_PM_SLEEP
|
|
static irqreturn_t suspend_int(int irq, void* dev_id)
|
|
{
|
|
switch_shutdown_state(SHUTDOWN_SUSPEND);
|
|
@@ -272,6 +281,9 @@ static int setup_suspend_evtchn(void)
|
|
|
|
return 0;
|
|
}
|
|
+#else
|
|
+#define setup_suspend_evtchn() 0
|
|
+#endif
|
|
|
|
static int setup_shutdown_watcher(void)
|
|
{
|
|
--- head-2011-03-11.orig/drivers/xen/core/smpboot.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/drivers/xen/core/smpboot.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -27,6 +27,7 @@
|
|
|
|
extern irqreturn_t smp_reschedule_interrupt(int, void *);
|
|
extern irqreturn_t smp_call_function_interrupt(int, void *);
|
|
+extern irqreturn_t smp_call_function_single_interrupt(int, void *);
|
|
|
|
extern int local_setup_timer(unsigned int cpu);
|
|
extern void local_teardown_timer(unsigned int cpu);
|
|
@@ -47,8 +48,10 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
|
|
|
|
static DEFINE_PER_CPU(int, resched_irq);
|
|
static DEFINE_PER_CPU(int, callfunc_irq);
|
|
+static DEFINE_PER_CPU(int, call1func_irq);
|
|
static char resched_name[NR_CPUS][15];
|
|
static char callfunc_name[NR_CPUS][15];
|
|
+static char call1func_name[NR_CPUS][15];
|
|
|
|
void __init prefill_possible_map(void)
|
|
{
|
|
@@ -64,20 +67,19 @@ void __init prefill_possible_map(void)
|
|
break;
|
|
#endif
|
|
rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
|
|
- if (rc >= 0)
|
|
+ if (rc >= 0) {
|
|
cpu_set(i, cpu_possible_map);
|
|
+ nr_cpu_ids = i + 1;
|
|
+ }
|
|
}
|
|
}
|
|
|
|
-void __init smp_alloc_memory(void)
|
|
-{
|
|
-}
|
|
-
|
|
static int __cpuinit xen_smp_intr_init(unsigned int cpu)
|
|
{
|
|
int rc;
|
|
|
|
- per_cpu(resched_irq, cpu) = per_cpu(callfunc_irq, cpu) = -1;
|
|
+ per_cpu(resched_irq, cpu) = per_cpu(callfunc_irq, cpu) =
|
|
+ per_cpu(call1func_irq, cpu) = -1;
|
|
|
|
sprintf(resched_name[cpu], "resched%u", cpu);
|
|
rc = bind_ipi_to_irqhandler(RESCHEDULE_VECTOR,
|
|
@@ -101,6 +103,17 @@ static int __cpuinit xen_smp_intr_init(u
|
|
goto fail;
|
|
per_cpu(callfunc_irq, cpu) = rc;
|
|
|
|
+ sprintf(call1func_name[cpu], "call1func%u", cpu);
|
|
+ rc = bind_ipi_to_irqhandler(CALL_FUNC_SINGLE_VECTOR,
|
|
+ cpu,
|
|
+ smp_call_function_single_interrupt,
|
|
+ IRQF_DISABLED|IRQF_NOBALANCING,
|
|
+ call1func_name[cpu],
|
|
+ NULL);
|
|
+ if (rc < 0)
|
|
+ goto fail;
|
|
+ per_cpu(call1func_irq, cpu) = rc;
|
|
+
|
|
rc = xen_spinlock_init(cpu);
|
|
if (rc < 0)
|
|
goto fail;
|
|
@@ -115,6 +128,8 @@ static int __cpuinit xen_smp_intr_init(u
|
|
unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
|
|
if (per_cpu(callfunc_irq, cpu) >= 0)
|
|
unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
|
|
+ if (per_cpu(call1func_irq, cpu) >= 0)
|
|
+ unbind_from_irqhandler(per_cpu(call1func_irq, cpu), NULL);
|
|
xen_spinlock_cleanup(cpu);
|
|
return rc;
|
|
}
|
|
@@ -127,6 +142,7 @@ static void __cpuinit xen_smp_intr_exit(
|
|
|
|
unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
|
|
unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
|
|
+ unbind_from_irqhandler(per_cpu(call1func_irq, cpu), NULL);
|
|
xen_spinlock_cleanup(cpu);
|
|
}
|
|
#endif
|
|
@@ -134,11 +150,7 @@ static void __cpuinit xen_smp_intr_exit(
|
|
void __cpuinit cpu_bringup(void)
|
|
{
|
|
cpu_init();
|
|
-#ifdef __i386__
|
|
identify_secondary_cpu(¤t_cpu_data);
|
|
-#else
|
|
- identify_cpu(¤t_cpu_data);
|
|
-#endif
|
|
touch_softlockup_watchdog();
|
|
preempt_disable();
|
|
local_irq_enable();
|
|
@@ -218,9 +230,6 @@ void __init smp_prepare_cpus(unsigned in
|
|
struct task_struct *idle;
|
|
int apicid;
|
|
struct vcpu_get_physid cpu_id;
|
|
-#ifdef __x86_64__
|
|
- struct desc_ptr *gdt_descr;
|
|
-#endif
|
|
void *gdt_addr;
|
|
|
|
apicid = 0;
|
|
@@ -249,20 +258,10 @@ void __init smp_prepare_cpus(unsigned in
|
|
if (IS_ERR(idle))
|
|
panic("failed fork for CPU %d", cpu);
|
|
|
|
-#ifdef __x86_64__
|
|
- gdt_descr = &cpu_gdt_descr[cpu];
|
|
- gdt_descr->address = get_zeroed_page(GFP_KERNEL);
|
|
- if (unlikely(!gdt_descr->address)) {
|
|
- pr_crit("CPU%d failed to allocate GDT\n", cpu);
|
|
- continue;
|
|
- }
|
|
- gdt_descr->size = GDT_SIZE;
|
|
- memcpy((void *)gdt_descr->address, cpu_gdt_table, GDT_SIZE);
|
|
- gdt_addr = (void *)gdt_descr->address;
|
|
-#else
|
|
+#ifdef __i386__
|
|
init_gdt(cpu);
|
|
- gdt_addr = get_cpu_gdt_table(cpu);
|
|
#endif
|
|
+ gdt_addr = get_cpu_gdt_table(cpu);
|
|
make_page_readonly(gdt_addr, XENFEAT_writable_descriptor_tables);
|
|
|
|
apicid = cpu;
|
|
@@ -305,8 +304,8 @@ void __init smp_prepare_boot_cpu(void)
|
|
{
|
|
#ifdef __i386__
|
|
init_gdt(smp_processor_id());
|
|
- switch_to_new_gdt();
|
|
#endif
|
|
+ switch_to_new_gdt();
|
|
prefill_possible_map();
|
|
}
|
|
|
|
--- head-2011-03-11.orig/drivers/xen/core/spinlock.c 2011-03-15 16:17:10.000000000 +0100
|
|
+++ head-2011-03-11/drivers/xen/core/spinlock.c 2011-03-15 16:51:35.000000000 +0100
|
|
@@ -5,6 +5,10 @@
|
|
* portions of this file.
|
|
*/
|
|
#define XEN_SPINLOCK_SOURCE
|
|
+#include <linux/spinlock_types.h>
|
|
+
|
|
+#ifdef TICKET_SHIFT
|
|
+
|
|
#include <linux/init.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/module.h>
|
|
@@ -55,6 +59,7 @@ void __cpuinit xen_spinlock_cleanup(unsi
|
|
WARN_ON(HYPERVISOR_event_channel_op(EVTCHNOP_close, &close));
|
|
}
|
|
|
|
+#ifdef CONFIG_PM_SLEEP
|
|
void __cpuinit spinlock_resume(void)
|
|
{
|
|
unsigned int cpu;
|
|
@@ -64,6 +69,7 @@ void __cpuinit spinlock_resume(void)
|
|
xen_spinlock_init(cpu);
|
|
}
|
|
}
|
|
+#endif
|
|
|
|
static unsigned int spin_adjust(struct spinning *spinning,
|
|
const raw_spinlock_t *lock,
|
|
@@ -86,7 +92,7 @@ static unsigned int spin_adjust(struct s
|
|
|
|
unsigned int xen_spin_adjust(const raw_spinlock_t *lock, unsigned int token)
|
|
{
|
|
- return spin_adjust(__get_cpu_var(spinning), lock, token);
|
|
+ return spin_adjust(x86_read_percpu(spinning), lock, token);
|
|
}
|
|
|
|
bool xen_spin_wait(raw_spinlock_t *lock, unsigned int *ptok,
|
|
@@ -99,21 +105,21 @@ bool xen_spin_wait(raw_spinlock_t *lock,
|
|
|
|
/* If kicker interrupt not initialized yet, just spin. */
|
|
if (unlikely(!cpu_online(raw_smp_processor_id()))
|
|
- || unlikely(!__get_cpu_var(poll_evtchn)))
|
|
+ || unlikely(!x86_read_percpu(poll_evtchn)))
|
|
return false;
|
|
|
|
/* announce we're spinning */
|
|
spinning.ticket = *ptok >> TICKET_SHIFT;
|
|
spinning.lock = lock;
|
|
- spinning.prev = __get_cpu_var(spinning);
|
|
+ spinning.prev = x86_read_percpu(spinning);
|
|
smp_wmb();
|
|
- __get_cpu_var(spinning) = &spinning;
|
|
+ x86_write_percpu(spinning, &spinning);
|
|
upcall_mask = current_vcpu_info()->evtchn_upcall_mask;
|
|
|
|
do {
|
|
bool nested = false;
|
|
|
|
- clear_evtchn(__get_cpu_var(poll_evtchn));
|
|
+ clear_evtchn(x86_read_percpu(poll_evtchn));
|
|
|
|
/*
|
|
* Check again to make sure it didn't become free while
|
|
@@ -126,7 +132,7 @@ bool xen_spin_wait(raw_spinlock_t *lock,
|
|
* without rechecking the lock.
|
|
*/
|
|
if (spinning.prev)
|
|
- set_evtchn(__get_cpu_var(poll_evtchn));
|
|
+ set_evtchn(x86_read_percpu(poll_evtchn));
|
|
rc = true;
|
|
break;
|
|
}
|
|
@@ -153,11 +159,11 @@ bool xen_spin_wait(raw_spinlock_t *lock,
|
|
bool kick, free;
|
|
|
|
other->ticket = -1;
|
|
- __raw_spin_unlock_body;
|
|
+ __ticket_spin_unlock_body;
|
|
if (!kick)
|
|
break;
|
|
xen_spin_kick(lock, token);
|
|
- __raw_spin_lock_preamble;
|
|
+ __ticket_spin_lock_preamble;
|
|
if (!free)
|
|
token = spin_adjust(
|
|
other->prev, lock,
|
|
@@ -181,7 +187,7 @@ bool xen_spin_wait(raw_spinlock_t *lock,
|
|
|
|
current_vcpu_info()->evtchn_upcall_mask = upcall_mask;
|
|
|
|
- rc = !test_evtchn(__get_cpu_var(poll_evtchn));
|
|
+ rc = !test_evtchn(x86_read_percpu(poll_evtchn));
|
|
if (!rc)
|
|
inc_irq_stat(irq_lock_count);
|
|
} while (spinning.prev || rc);
|
|
@@ -192,11 +198,12 @@ bool xen_spin_wait(raw_spinlock_t *lock,
|
|
*/
|
|
|
|
/* announce we're done */
|
|
- __get_cpu_var(spinning) = other = spinning.prev;
|
|
+ other = spinning.prev;
|
|
+ x86_write_percpu(spinning, other);
|
|
raw_local_irq_disable();
|
|
- rm_idx = __get_cpu_var(rm_seq.idx);
|
|
+ rm_idx = x86_read_percpu(rm_seq.idx);
|
|
smp_wmb();
|
|
- __get_cpu_var(rm_seq.idx) = rm_idx + 1;
|
|
+ x86_write_percpu(rm_seq.idx, rm_idx + 1);
|
|
mb();
|
|
|
|
/*
|
|
@@ -211,7 +218,7 @@ bool xen_spin_wait(raw_spinlock_t *lock,
|
|
if (other->ticket + 1)
|
|
continue;
|
|
lock = other->lock;
|
|
- __raw_spin_lock_preamble;
|
|
+ __ticket_spin_lock_preamble;
|
|
if (!free)
|
|
token = spin_adjust(other->prev, lock, token);
|
|
other->ticket = token >> TICKET_SHIFT;
|
|
@@ -220,7 +227,7 @@ bool xen_spin_wait(raw_spinlock_t *lock,
|
|
}
|
|
|
|
rm_idx &= 1;
|
|
- while (__get_cpu_var(rm_seq.ctr[rm_idx].counter))
|
|
+ while (x86_read_percpu(rm_seq.ctr[rm_idx].counter))
|
|
cpu_relax();
|
|
raw_local_irq_restore(upcall_mask);
|
|
*ptok = lock->cur | (spinning.ticket << TICKET_SHIFT);
|
|
@@ -283,3 +290,5 @@ void xen_spin_kick(raw_spinlock_t *lock,
|
|
}
|
|
}
|
|
EXPORT_SYMBOL(xen_spin_kick);
|
|
+
|
|
+#endif /* TICKET_SHIFT */
|
|
--- head-2011-03-11.orig/drivers/xen/fbfront/xenfb.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/drivers/xen/fbfront/xenfb.c 2011-02-17 10:11:23.000000000 +0100
|
|
@@ -18,6 +18,7 @@
|
|
* frame buffer.
|
|
*/
|
|
|
|
+#include <linux/console.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/errno.h>
|
|
#include <linux/fb.h>
|
|
@@ -545,6 +546,28 @@ static unsigned long vmalloc_to_mfn(void
|
|
return pfn_to_mfn(vmalloc_to_pfn(address));
|
|
}
|
|
|
|
+static __devinit void
|
|
+xenfb_make_preferred_console(void)
|
|
+{
|
|
+ struct console *c;
|
|
+
|
|
+ if (console_set_on_cmdline)
|
|
+ return;
|
|
+
|
|
+ acquire_console_sem();
|
|
+ for (c = console_drivers; c; c = c->next) {
|
|
+ if (!strcmp(c->name, "tty") && c->index == 0)
|
|
+ break;
|
|
+ }
|
|
+ release_console_sem();
|
|
+ if (c) {
|
|
+ unregister_console(c);
|
|
+ c->flags |= CON_CONSDEV;
|
|
+ c->flags &= ~CON_PRINTBUFFER; /* don't print again */
|
|
+ register_console(c);
|
|
+ }
|
|
+}
|
|
+
|
|
static int __devinit xenfb_probe(struct xenbus_device *dev,
|
|
const struct xenbus_device_id *id)
|
|
{
|
|
@@ -665,6 +688,7 @@ static int __devinit xenfb_probe(struct
|
|
if (ret < 0)
|
|
goto error;
|
|
|
|
+ xenfb_make_preferred_console();
|
|
return 0;
|
|
|
|
error_nomem:
|
|
@@ -884,4 +908,5 @@ static void __exit xenfb_cleanup(void)
|
|
module_init(xenfb_init);
|
|
module_exit(xenfb_cleanup);
|
|
|
|
+MODULE_DESCRIPTION("Xen virtual framebuffer device frontend");
|
|
MODULE_LICENSE("GPL");
|
|
--- head-2011-03-11.orig/drivers/xen/fbfront/xenkbd.c 2011-01-31 17:56:27.000000000 +0100
|
|
+++ head-2011-03-11/drivers/xen/fbfront/xenkbd.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -350,4 +350,5 @@ static void __exit xenkbd_cleanup(void)
|
|
module_init(xenkbd_init);
|
|
module_exit(xenkbd_cleanup);
|
|
|
|
+MODULE_DESCRIPTION("Xen virtual keyboard/pointer device frontend");
|
|
MODULE_LICENSE("GPL");
|
|
--- head-2011-03-11.orig/drivers/xen/gntdev/gntdev.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/drivers/xen/gntdev/gntdev.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -400,7 +400,7 @@ static int __init gntdev_init(void)
|
|
}
|
|
|
|
device = device_create(class, NULL, MKDEV(gntdev_major, 0),
|
|
- GNTDEV_NAME);
|
|
+ NULL, GNTDEV_NAME);
|
|
if (IS_ERR(device)) {
|
|
pr_err("Error creating gntdev device in xen_class\n");
|
|
pr_err("gntdev created, major number = %d\n", gntdev_major);
|
|
--- head-2011-03-11.orig/drivers/xen/netback/netback.c 2011-02-09 15:55:20.000000000 +0100
|
|
+++ head-2011-03-11/drivers/xen/netback/netback.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -36,7 +36,7 @@
|
|
|
|
#include "common.h"
|
|
#include <linux/if_vlan.h>
|
|
-#include <linux/tcp.h>
|
|
+#include <net/tcp.h>
|
|
#include <xen/balloon.h>
|
|
#include <xen/interface/memory.h>
|
|
#include <xen/net-util.h>
|
|
@@ -115,7 +115,7 @@ static inline int netif_page_index(struc
|
|
*/
|
|
#define PKT_PROT_LEN (ETH_HLEN + VLAN_HLEN + \
|
|
sizeof(struct iphdr) + MAX_IPOPTLEN + \
|
|
- sizeof(struct tcphdr) + 40 /* MAX_TCP_OPTION_SPACE */)
|
|
+ sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE)
|
|
|
|
static struct pending_tx_info {
|
|
netif_tx_request_t req;
|
|
--- head-2011-03-11.orig/drivers/xen/netfront/accel.c 2011-01-31 17:56:27.000000000 +0100
|
|
+++ head-2011-03-11/drivers/xen/netfront/accel.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -28,6 +28,7 @@
|
|
* IN THE SOFTWARE.
|
|
*/
|
|
|
|
+#include <linux/version.h>
|
|
#include <linux/netdevice.h>
|
|
#include <linux/skbuff.h>
|
|
#include <linux/list.h>
|
|
--- head-2011-03-11.orig/drivers/xen/netfront/netfront.c 2011-02-09 16:04:02.000000000 +0100
|
|
+++ head-2011-03-11/drivers/xen/netfront/netfront.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -637,7 +637,7 @@ static int network_open(struct net_devic
|
|
}
|
|
spin_unlock_bh(&np->rx_lock);
|
|
|
|
- network_maybe_wake_tx(dev);
|
|
+ netif_start_queue(dev);
|
|
|
|
return 0;
|
|
}
|
|
--- head-2011-03-11.orig/drivers/xen/sfc_netback/accel.h 2010-01-18 15:23:12.000000000 +0100
|
|
+++ head-2011-03-11/drivers/xen/sfc_netback/accel.h 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -25,6 +25,7 @@
|
|
#ifndef NETBACK_ACCEL_H
|
|
#define NETBACK_ACCEL_H
|
|
|
|
+#include <linux/version.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/ip.h>
|
|
#include <linux/tcp.h>
|
|
--- head-2011-03-11.orig/drivers/xen/sfc_netfront/accel.h 2011-01-31 17:29:16.000000000 +0100
|
|
+++ head-2011-03-11/drivers/xen/sfc_netfront/accel.h 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -35,6 +35,7 @@
|
|
#include <xen/evtchn.h>
|
|
|
|
#include <linux/kernel.h>
|
|
+#include <linux/version.h>
|
|
#include <linux/list.h>
|
|
|
|
enum netfront_accel_post_status {
|
|
--- head-2011-03-11.orig/drivers/xen/xenbus/xenbus_client.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/drivers/xen/xenbus/xenbus_client.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -149,7 +149,7 @@ int xenbus_watch_pathfmt(struct xenbus_d
|
|
char *path;
|
|
|
|
va_start(ap, pathfmt);
|
|
- path = kvasprintf(GFP_KERNEL, pathfmt, ap);
|
|
+ path = kvasprintf(GFP_NOIO | __GFP_HIGH, pathfmt, ap);
|
|
va_end(ap);
|
|
|
|
if (!path) {
|
|
--- head-2011-03-11.orig/drivers/xen/xenbus/xenbus_comms.c 2011-01-31 17:56:27.000000000 +0100
|
|
+++ head-2011-03-11/drivers/xen/xenbus/xenbus_comms.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -248,14 +248,11 @@ int xb_init_comms(void)
|
|
intf->rsp_cons = intf->rsp_prod;
|
|
}
|
|
|
|
+#if defined(CONFIG_XEN) || defined(MODULE)
|
|
if (xenbus_irq)
|
|
unbind_from_irqhandler(xenbus_irq, &xb_waitq);
|
|
|
|
-#if defined(CONFIG_XEN) || defined(MODULE)
|
|
err = bind_caller_port_to_irqhandler(
|
|
-#else
|
|
- err = bind_evtchn_to_irqhandler(
|
|
-#endif
|
|
xen_store_evtchn, wake_waiting,
|
|
0, "xenbus", &xb_waitq);
|
|
if (err <= 0) {
|
|
@@ -264,6 +261,20 @@ int xb_init_comms(void)
|
|
}
|
|
|
|
xenbus_irq = err;
|
|
+#else
|
|
+ if (xenbus_irq) {
|
|
+ /* Already have an irq; assume we're resuming */
|
|
+ rebind_evtchn_irq(xen_store_evtchn, xenbus_irq);
|
|
+ } else {
|
|
+ err = bind_evtchn_to_irqhandler(xen_store_evtchn, wake_waiting,
|
|
+ 0, "xenbus", &xb_waitq);
|
|
+ if (err <= 0) {
|
|
+ pr_err("XENBUS request irq failed %i\n", err);
|
|
+ return err;
|
|
+ }
|
|
+ xenbus_irq = err;
|
|
+ }
|
|
+#endif
|
|
|
|
return 0;
|
|
}
|
|
--- head-2011-03-11.orig/drivers/xen/xenbus/xenbus_probe.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/drivers/xen/xenbus/xenbus_probe.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -36,6 +36,7 @@
|
|
__FUNCTION__, __LINE__, ##args)
|
|
|
|
#include <linux/kernel.h>
|
|
+#include <linux/version.h>
|
|
#include <linux/err.h>
|
|
#include <linux/string.h>
|
|
#include <linux/ctype.h>
|
|
--- head-2011-03-11.orig/fs/aio.c 2011-03-11 10:58:46.000000000 +0100
|
|
+++ head-2011-03-11/fs/aio.c 2011-03-11 10:59:16.000000000 +0100
|
|
@@ -1307,7 +1307,7 @@ static int make_aio_fd(struct kioctx *io
|
|
int fd;
|
|
struct file *file;
|
|
|
|
- fd = anon_inode_getfd("[aioq]", &aioq_fops, ioctx);
|
|
+ fd = anon_inode_getfd("[aioq]", &aioq_fops, ioctx, 0);
|
|
if (fd < 0)
|
|
return fd;
|
|
|
|
--- head-2011-03-11.orig/include/Kbuild 2011-03-15 16:45:55.000000000 +0100
|
|
+++ head-2011-03-11/include/Kbuild 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -8,5 +8,6 @@ header-y += mtd/
|
|
header-y += rdma/
|
|
header-y += video/
|
|
header-y += drm/
|
|
+header-y += xen/public/
|
|
header-y += xen/
|
|
header-y += scsi/
|
|
--- head-2011-03-11.orig/include/asm-generic/pgtable.h 2011-03-11 10:54:24.000000000 +0100
|
|
+++ head-2011-03-11/include/asm-generic/pgtable.h 2011-03-11 10:59:22.000000000 +0100
|
|
@@ -156,10 +156,6 @@ static inline void pmdp_set_wrprotect(st
|
|
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
|
|
#endif
|
|
|
|
-#ifndef arch_change_pte_range
|
|
-#define arch_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) 0
|
|
-#endif
|
|
-
|
|
#ifndef __HAVE_ARCH_PMDP_SPLITTING_FLUSH
|
|
extern pmd_t pmdp_splitting_flush(struct vm_area_struct *vma,
|
|
unsigned long address,
|
|
--- head-2011-03-11.orig/arch/x86/include/asm/kexec.h 2011-01-31 14:53:50.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/include/asm/kexec.h 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -5,8 +5,21 @@
|
|
# define PA_CONTROL_PAGE 0
|
|
# define VA_CONTROL_PAGE 1
|
|
# define PA_PGD 2
|
|
+# ifndef CONFIG_XEN
|
|
# define PA_SWAP_PAGE 3
|
|
# define PAGES_NR 4
|
|
+# else /* CONFIG_XEN */
|
|
+/*
|
|
+ * The hypervisor interface implicitly requires that all entries (except
|
|
+ * for possibly the final one) are arranged in matching PA_/VA_ pairs.
|
|
+ */
|
|
+# define PA_PMD_0 8
|
|
+# define VA_PMD_0 9
|
|
+# define PA_PMD_1 10
|
|
+# define VA_PMD_1 11
|
|
+# define PA_SWAP_PAGE 12
|
|
+# define PAGES_NR 13
|
|
+# endif /* CONFIG_XEN */
|
|
#else
|
|
# define PA_CONTROL_PAGE 0
|
|
# define VA_CONTROL_PAGE 1
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/asm/desc.h 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/desc.h 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -31,11 +31,17 @@ extern struct desc_ptr idt_descr;
|
|
extern gate_desc idt_table[];
|
|
#endif
|
|
|
|
+struct gdt_page {
|
|
+ struct desc_struct gdt[GDT_ENTRIES];
|
|
+} __attribute__((aligned(PAGE_SIZE)));
|
|
+DECLARE_PER_CPU(struct gdt_page, gdt_page);
|
|
+
|
|
+static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
|
|
+{
|
|
+ return per_cpu(gdt_page, cpu).gdt;
|
|
+}
|
|
+
|
|
#ifdef CONFIG_X86_64
|
|
-extern struct desc_struct cpu_gdt_table[GDT_ENTRIES];
|
|
-extern struct desc_ptr cpu_gdt_descr[];
|
|
-/* the cpu gdt accessor */
|
|
-#define get_cpu_gdt_table(x) ((struct desc_struct *)cpu_gdt_descr[x].address)
|
|
|
|
static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func,
|
|
unsigned dpl, unsigned ist, unsigned seg)
|
|
@@ -53,16 +59,6 @@ static inline void pack_gate(gate_desc *
|
|
}
|
|
|
|
#else
|
|
-struct gdt_page {
|
|
- struct desc_struct gdt[GDT_ENTRIES];
|
|
-} __attribute__((aligned(PAGE_SIZE)));
|
|
-DECLARE_PER_CPU(struct gdt_page, gdt_page);
|
|
-
|
|
-static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
|
|
-{
|
|
- return per_cpu(gdt_page, cpu).gdt;
|
|
-}
|
|
-
|
|
static inline void pack_gate(gate_desc *gate, unsigned char type,
|
|
unsigned long base, unsigned dpl, unsigned flags,
|
|
unsigned short seg)
|
|
@@ -333,6 +329,28 @@ static inline void set_intr_gate(unsigne
|
|
_set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS);
|
|
}
|
|
|
|
+#define SYS_VECTOR_FREE 0
|
|
+#define SYS_VECTOR_ALLOCED 1
|
|
+
|
|
+extern int first_system_vector;
|
|
+extern char system_vectors[];
|
|
+
|
|
+static inline void alloc_system_vector(int vector)
|
|
+{
|
|
+ if (system_vectors[vector] == SYS_VECTOR_FREE) {
|
|
+ system_vectors[vector] = SYS_VECTOR_ALLOCED;
|
|
+ if (first_system_vector > vector)
|
|
+ first_system_vector = vector;
|
|
+ } else
|
|
+ BUG();
|
|
+}
|
|
+
|
|
+static inline void alloc_intr_gate(unsigned int n, void *addr)
|
|
+{
|
|
+ alloc_system_vector(n);
|
|
+ set_intr_gate(n, addr);
|
|
+}
|
|
+
|
|
/*
|
|
* This routine sets up an interrupt gate at directory privilege level 3.
|
|
*/
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/asm/fixmap.h 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/fixmap.h 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -7,7 +7,58 @@
|
|
# include "fixmap_64.h"
|
|
#endif
|
|
|
|
+extern int fixmaps_set;
|
|
+
|
|
+void xen_set_fixmap(enum fixed_addresses, maddr_t, pgprot_t);
|
|
+
|
|
+static inline void __set_fixmap(enum fixed_addresses idx,
|
|
+ maddr_t phys, pgprot_t flags)
|
|
+{
|
|
+ xen_set_fixmap(idx, phys, flags);
|
|
+}
|
|
+
|
|
+#define set_fixmap(idx, phys) \
|
|
+ __set_fixmap(idx, phys, PAGE_KERNEL)
|
|
+
|
|
+/*
|
|
+ * Some hardware wants to get fixmapped without caching.
|
|
+ */
|
|
+#define set_fixmap_nocache(idx, phys) \
|
|
+ __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
|
|
+
|
|
#define clear_fixmap(idx) \
|
|
__set_fixmap(idx, 0, __pgprot(0))
|
|
|
|
+#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT))
|
|
+#define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)
|
|
+
|
|
+extern void __this_fixmap_does_not_exist(void);
|
|
+
|
|
+/*
|
|
+ * 'index to address' translation. If anyone tries to use the idx
|
|
+ * directly without translation, we catch the bug with a NULL-deference
|
|
+ * kernel oops. Illegal ranges of incoming indices are caught too.
|
|
+ */
|
|
+static __always_inline unsigned long fix_to_virt(const unsigned int idx)
|
|
+{
|
|
+ /*
|
|
+ * this branch gets completely eliminated after inlining,
|
|
+ * except when someone tries to use fixaddr indices in an
|
|
+ * illegal way. (such as mixing up address types or using
|
|
+ * out-of-range indices).
|
|
+ *
|
|
+ * If it doesn't get removed, the linker will complain
|
|
+ * loudly with a reasonably clear error message..
|
|
+ */
|
|
+ if (idx >= __end_of_fixed_addresses)
|
|
+ __this_fixmap_does_not_exist();
|
|
+
|
|
+ return __fix_to_virt(idx);
|
|
+}
|
|
+
|
|
+static inline unsigned long virt_to_fix(const unsigned long vaddr)
|
|
+{
|
|
+ BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
|
|
+ return __virt_to_fix(vaddr);
|
|
+}
|
|
#endif
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/asm/fixmap_32.h 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/fixmap_32.h 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -58,10 +58,17 @@ enum fixed_addresses {
|
|
#ifdef CONFIG_X86_LOCAL_APIC
|
|
FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
|
|
#endif
|
|
-#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_XEN)
|
|
+#ifndef CONFIG_XEN
|
|
+#ifdef CONFIG_X86_IO_APIC
|
|
FIX_IO_APIC_BASE_0,
|
|
FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
|
|
#endif
|
|
+#else
|
|
+ FIX_SHARED_INFO,
|
|
+#define NR_FIX_ISAMAPS 256
|
|
+ FIX_ISAMAP_END,
|
|
+ FIX_ISAMAP_BEGIN = FIX_ISAMAP_END + NR_FIX_ISAMAPS - 1,
|
|
+#endif
|
|
#ifdef CONFIG_X86_VISWS_APIC
|
|
FIX_CO_CPU, /* Cobalt timer */
|
|
FIX_CO_APIC, /* Cobalt APIC Redirection Table */
|
|
@@ -78,51 +85,38 @@ enum fixed_addresses {
|
|
FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
|
|
FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
|
|
#endif
|
|
-#ifdef CONFIG_ACPI
|
|
- FIX_ACPI_BEGIN,
|
|
- FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
|
|
-#endif
|
|
#ifdef CONFIG_PCI_MMCONFIG
|
|
FIX_PCIE_MCFG,
|
|
#endif
|
|
#ifdef CONFIG_PARAVIRT
|
|
FIX_PARAVIRT_BOOTMAP,
|
|
#endif
|
|
- FIX_SHARED_INFO,
|
|
-#define NR_FIX_ISAMAPS 256
|
|
- FIX_ISAMAP_END,
|
|
- FIX_ISAMAP_BEGIN = FIX_ISAMAP_END + NR_FIX_ISAMAPS - 1,
|
|
__end_of_permanent_fixed_addresses,
|
|
/*
|
|
* 256 temporary boot-time mappings, used by early_ioremap(),
|
|
* before ioremap() is functional.
|
|
*
|
|
- * We round it up to the next 512 pages boundary so that we
|
|
+ * We round it up to the next 256 pages boundary so that we
|
|
* can have a single pgd entry and a single pte table:
|
|
*/
|
|
#define NR_FIX_BTMAPS 64
|
|
#define FIX_BTMAPS_NESTING 4
|
|
- FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 512 -
|
|
- (__end_of_permanent_fixed_addresses & 511),
|
|
+ FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 256 -
|
|
+ (__end_of_permanent_fixed_addresses & 255),
|
|
FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_NESTING - 1,
|
|
FIX_WP_TEST,
|
|
+#ifdef CONFIG_ACPI
|
|
+ FIX_ACPI_BEGIN,
|
|
+ FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
|
|
+#endif
|
|
#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
|
|
FIX_OHCI1394_BASE,
|
|
#endif
|
|
__end_of_fixed_addresses
|
|
};
|
|
|
|
-extern void __set_fixmap(enum fixed_addresses idx,
|
|
- maddr_t phys, pgprot_t flags);
|
|
extern void reserve_top_address(unsigned long reserve);
|
|
|
|
-#define set_fixmap(idx, phys) \
|
|
- __set_fixmap(idx, phys, PAGE_KERNEL)
|
|
-/*
|
|
- * Some hardware wants to get fixmapped without caching.
|
|
- */
|
|
-#define set_fixmap_nocache(idx, phys) \
|
|
- __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
|
|
|
|
#define FIXADDR_TOP ((unsigned long)__FIXADDR_TOP)
|
|
|
|
@@ -131,38 +125,5 @@ extern void reserve_top_address(unsigned
|
|
#define FIXADDR_START (FIXADDR_TOP - __FIXADDR_SIZE)
|
|
#define FIXADDR_BOOT_START (FIXADDR_TOP - __FIXADDR_BOOT_SIZE)
|
|
|
|
-#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT))
|
|
-#define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)
|
|
-
|
|
-extern void __this_fixmap_does_not_exist(void);
|
|
-
|
|
-/*
|
|
- * 'index to address' translation. If anyone tries to use the idx
|
|
- * directly without tranlation, we catch the bug with a NULL-deference
|
|
- * kernel oops. Illegal ranges of incoming indices are caught too.
|
|
- */
|
|
-static __always_inline unsigned long fix_to_virt(const unsigned int idx)
|
|
-{
|
|
- /*
|
|
- * this branch gets completely eliminated after inlining,
|
|
- * except when someone tries to use fixaddr indices in an
|
|
- * illegal way. (such as mixing up address types or using
|
|
- * out-of-range indices).
|
|
- *
|
|
- * If it doesn't get removed, the linker will complain
|
|
- * loudly with a reasonably clear error message..
|
|
- */
|
|
- if (idx >= __end_of_fixed_addresses)
|
|
- __this_fixmap_does_not_exist();
|
|
-
|
|
- return __fix_to_virt(idx);
|
|
-}
|
|
-
|
|
-static inline unsigned long virt_to_fix(const unsigned long vaddr)
|
|
-{
|
|
- BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
|
|
- return __virt_to_fix(vaddr);
|
|
-}
|
|
-
|
|
#endif /* !__ASSEMBLY__ */
|
|
#endif
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/asm/fixmap_64.h 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/fixmap_64.h 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -12,6 +12,7 @@
|
|
#define _ASM_FIXMAP_64_H
|
|
|
|
#include <linux/kernel.h>
|
|
+#include <asm/acpi.h>
|
|
#include <asm/apicdef.h>
|
|
#include <asm/page.h>
|
|
#include <asm/vsyscall.h>
|
|
@@ -40,7 +41,6 @@ enum fixed_addresses {
|
|
VSYSCALL_HPET,
|
|
FIX_DBGP_BASE,
|
|
FIX_EARLYCON_MEM_BASE,
|
|
- FIX_HPET_BASE,
|
|
#ifdef CONFIG_X86_LOCAL_APIC
|
|
FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
|
|
#endif
|
|
@@ -53,14 +53,21 @@ enum fixed_addresses {
|
|
FIX_EFI_IO_MAP_FIRST_PAGE = FIX_EFI_IO_MAP_LAST_PAGE
|
|
+ MAX_EFI_IO_PAGES - 1,
|
|
#endif
|
|
+#ifdef CONFIG_PARAVIRT
|
|
+ FIX_PARAVIRT_BOOTMAP,
|
|
+#else
|
|
+ FIX_SHARED_INFO,
|
|
+#endif
|
|
#ifdef CONFIG_ACPI
|
|
FIX_ACPI_BEGIN,
|
|
FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
|
|
#endif
|
|
- FIX_SHARED_INFO,
|
|
#define NR_FIX_ISAMAPS 256
|
|
FIX_ISAMAP_END,
|
|
FIX_ISAMAP_BEGIN = FIX_ISAMAP_END + NR_FIX_ISAMAPS - 1,
|
|
+#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
|
|
+ FIX_OHCI1394_BASE,
|
|
+#endif
|
|
__end_of_permanent_fixed_addresses,
|
|
/*
|
|
* 256 temporary boot-time mappings, used by early_ioremap(),
|
|
@@ -71,27 +78,12 @@ enum fixed_addresses {
|
|
*/
|
|
#define NR_FIX_BTMAPS 64
|
|
#define FIX_BTMAPS_NESTING 4
|
|
- FIX_BTMAP_END =
|
|
- __end_of_permanent_fixed_addresses + 512 -
|
|
+ FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 512 -
|
|
(__end_of_permanent_fixed_addresses & 511),
|
|
FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_NESTING - 1,
|
|
-#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
|
|
- FIX_OHCI1394_BASE,
|
|
-#endif
|
|
__end_of_fixed_addresses
|
|
};
|
|
|
|
-extern void __set_fixmap(enum fixed_addresses idx,
|
|
- unsigned long phys, pgprot_t flags);
|
|
-
|
|
-#define set_fixmap(idx, phys) \
|
|
- __set_fixmap(idx, phys, PAGE_KERNEL)
|
|
-/*
|
|
- * Some hardware wants to get fixmapped without caching.
|
|
- */
|
|
-#define set_fixmap_nocache(idx, phys) \
|
|
- __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE)
|
|
-
|
|
#define FIXADDR_TOP (VSYSCALL_END-PAGE_SIZE)
|
|
#define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT)
|
|
#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
|
|
@@ -100,30 +92,4 @@ extern void __set_fixmap(enum fixed_addr
|
|
#define FIXADDR_USER_START ((unsigned long)VSYSCALL32_VSYSCALL)
|
|
#define FIXADDR_USER_END (FIXADDR_USER_START + PAGE_SIZE)
|
|
|
|
-#define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT))
|
|
-
|
|
-extern void __this_fixmap_does_not_exist(void);
|
|
-
|
|
-/*
|
|
- * 'index to address' translation. If anyone tries to use the idx
|
|
- * directly without translation, we catch the bug with a NULL-deference
|
|
- * kernel oops. Illegal ranges of incoming indices are caught too.
|
|
- */
|
|
-static __always_inline unsigned long fix_to_virt(const unsigned int idx)
|
|
-{
|
|
- /*
|
|
- * this branch gets completely eliminated after inlining,
|
|
- * except when someone tries to use fixaddr indices in an
|
|
- * illegal way. (such as mixing up address types or using
|
|
- * out-of-range indices).
|
|
- *
|
|
- * If it doesn't get removed, the linker will complain
|
|
- * loudly with a reasonably clear error message..
|
|
- */
|
|
- if (idx >= __end_of_fixed_addresses)
|
|
- __this_fixmap_does_not_exist();
|
|
-
|
|
- return __fix_to_virt(idx);
|
|
-}
|
|
-
|
|
#endif
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/asm/highmem.h 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/highmem.h 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -73,6 +73,9 @@ struct page *kmap_atomic_to_page(void *p
|
|
|
|
#define flush_cache_kmaps() do { } while (0)
|
|
|
|
+extern void add_highpages_with_active_regions(int nid, unsigned long start_pfn,
|
|
+ unsigned long end_pfn);
|
|
+
|
|
void clear_highpage(struct page *);
|
|
static inline void clear_user_highpage(struct page *page, unsigned long vaddr)
|
|
{
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/asm/hypercall.h 2011-01-31 17:56:27.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/hypercall.h 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -332,9 +332,19 @@ static inline int __must_check
|
|
HYPERVISOR_grant_table_op(
|
|
unsigned int cmd, void *uop, unsigned int count)
|
|
{
|
|
+ bool fixup = false;
|
|
+ int rc;
|
|
+
|
|
if (arch_use_lazy_mmu_mode())
|
|
xen_multicall_flush(false);
|
|
- return _hypercall3(int, grant_table_op, cmd, uop, count);
|
|
+#ifdef GNTTABOP_map_grant_ref
|
|
+ if (cmd == GNTTABOP_map_grant_ref)
|
|
+#endif
|
|
+ fixup = gnttab_pre_map_adjust(cmd, uop, count);
|
|
+ rc = _hypercall3(int, grant_table_op, cmd, uop, count);
|
|
+ if (rc == 0 && fixup)
|
|
+ rc = gnttab_post_map_adjust(uop, count);
|
|
+ return rc;
|
|
}
|
|
|
|
static inline int __must_check
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/asm/hypervisor.h 2011-01-31 18:01:51.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/hypervisor.h 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -35,7 +35,6 @@
|
|
|
|
#include <linux/types.h>
|
|
#include <linux/kernel.h>
|
|
-#include <linux/version.h>
|
|
#include <linux/errno.h>
|
|
#include <xen/interface/xen.h>
|
|
#include <xen/interface/platform.h>
|
|
@@ -119,6 +118,8 @@ int xen_create_contiguous_region(
|
|
unsigned long vstart, unsigned int order, unsigned int address_bits);
|
|
void xen_destroy_contiguous_region(
|
|
unsigned long vstart, unsigned int order);
|
|
+int early_create_contiguous_region(unsigned long pfn, unsigned int order,
|
|
+ unsigned int address_bits);
|
|
|
|
struct page;
|
|
|
|
@@ -188,6 +189,29 @@ static inline void xen_multicall_flush(b
|
|
|
|
#endif /* CONFIG_XEN && !MODULE */
|
|
|
|
+#ifdef CONFIG_XEN
|
|
+
|
|
+struct gnttab_map_grant_ref;
|
|
+bool gnttab_pre_map_adjust(unsigned int cmd, struct gnttab_map_grant_ref *,
|
|
+ unsigned int count);
|
|
+#if CONFIG_XEN_COMPAT < 0x030400
|
|
+int gnttab_post_map_adjust(const struct gnttab_map_grant_ref *, unsigned int);
|
|
+#else
|
|
+static inline int gnttab_post_map_adjust(const struct gnttab_map_grant_ref *m,
|
|
+ unsigned int count)
|
|
+{
|
|
+ BUG();
|
|
+ return -ENOSYS;
|
|
+}
|
|
+#endif
|
|
+
|
|
+#else /* !CONFIG_XEN */
|
|
+
|
|
+#define gnttab_pre_map_adjust(...) false
|
|
+#define gnttab_post_map_adjust(...) ({ BUG(); -ENOSYS; })
|
|
+
|
|
+#endif /* CONFIG_XEN */
|
|
+
|
|
#if defined(CONFIG_X86_64)
|
|
#define MULTI_UVMFLAGS_INDEX 2
|
|
#define MULTI_UVMDOMID_INDEX 3
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/asm/io.h 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/io.h 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -3,20 +3,140 @@
|
|
|
|
#define ARCH_HAS_IOREMAP_WC
|
|
|
|
+#include <linux/compiler.h>
|
|
+
|
|
+/*
|
|
+ * early_ioremap() and early_iounmap() are for temporary early boot-time
|
|
+ * mappings, before the real ioremap() is functional.
|
|
+ * A boot-time mapping is currently limited to at most 16 pages.
|
|
+ */
|
|
+#ifndef __ASSEMBLY__
|
|
+extern void early_ioremap_init(void);
|
|
+extern void early_ioremap_clear(void);
|
|
+extern void early_ioremap_reset(void);
|
|
+extern void *early_ioremap(unsigned long offset, unsigned long size);
|
|
+extern void early_iounmap(void *addr, unsigned long size);
|
|
+extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys);
|
|
+#endif
|
|
+
|
|
+#define build_mmio_read(name, size, type, reg, barrier) \
|
|
+static inline type name(const volatile void __iomem *addr) \
|
|
+{ type ret; asm volatile("mov" size " %1,%0":reg (ret) \
|
|
+:"m" (*(volatile type __force *)addr) barrier); return ret; }
|
|
+
|
|
+#define build_mmio_write(name, size, type, reg, barrier) \
|
|
+static inline void name(type val, volatile void __iomem *addr) \
|
|
+{ asm volatile("mov" size " %0,%1": :reg (val), \
|
|
+"m" (*(volatile type __force *)addr) barrier); }
|
|
+
|
|
+build_mmio_read(readb, "b", unsigned char, "=q", :"memory")
|
|
+build_mmio_read(readw, "w", unsigned short, "=r", :"memory")
|
|
+build_mmio_read(readl, "l", unsigned int, "=r", :"memory")
|
|
+
|
|
+build_mmio_read(__readb, "b", unsigned char, "=q", )
|
|
+build_mmio_read(__readw, "w", unsigned short, "=r", )
|
|
+build_mmio_read(__readl, "l", unsigned int, "=r", )
|
|
+
|
|
+build_mmio_write(writeb, "b", unsigned char, "q", :"memory")
|
|
+build_mmio_write(writew, "w", unsigned short, "r", :"memory")
|
|
+build_mmio_write(writel, "l", unsigned int, "r", :"memory")
|
|
+
|
|
+build_mmio_write(__writeb, "b", unsigned char, "q", )
|
|
+build_mmio_write(__writew, "w", unsigned short, "r", )
|
|
+build_mmio_write(__writel, "l", unsigned int, "r", )
|
|
+
|
|
+#define readb_relaxed(a) __readb(a)
|
|
+#define readw_relaxed(a) __readw(a)
|
|
+#define readl_relaxed(a) __readl(a)
|
|
+#define __raw_readb __readb
|
|
+#define __raw_readw __readw
|
|
+#define __raw_readl __readl
|
|
+
|
|
+#define __raw_writeb __writeb
|
|
+#define __raw_writew __writew
|
|
+#define __raw_writel __writel
|
|
+
|
|
+#define mmiowb() barrier()
|
|
+
|
|
+#ifdef CONFIG_X86_64
|
|
+build_mmio_read(readq, "q", unsigned long, "=r", :"memory")
|
|
+build_mmio_read(__readq, "q", unsigned long, "=r", )
|
|
+build_mmio_write(writeq, "q", unsigned long, "r", :"memory")
|
|
+build_mmio_write(__writeq, "q", unsigned long, "r", )
|
|
+
|
|
+#define readq_relaxed(a) __readq(a)
|
|
+#define __raw_readq __readq
|
|
+#define __raw_writeq writeq
|
|
+
|
|
+/* Let people know we have them */
|
|
+#define readq readq
|
|
+#define writeq writeq
|
|
+#endif
|
|
+
|
|
+#define native_io_delay xen_io_delay
|
|
+
|
|
#ifdef CONFIG_X86_32
|
|
-# include "io_32.h"
|
|
+# include "../../io_32.h"
|
|
#else
|
|
-# include "io_64.h"
|
|
+# include "../../io_64.h"
|
|
+#endif
|
|
+
|
|
+#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
|
|
+
|
|
+/* We will be supplying our own /dev/mem implementation */
|
|
+#define ARCH_HAS_DEV_MEM
|
|
+
|
|
+#define page_to_pseudophys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT)
|
|
+#undef page_to_phys
|
|
+#define page_to_phys(page) (phys_to_machine(page_to_pseudophys(page)))
|
|
+#define page_to_bus(page) (phys_to_machine(page_to_pseudophys(page)))
|
|
+
|
|
+#define bvec_to_pseudophys(bv) (page_to_pseudophys((bv)->bv_page) + \
|
|
+ (unsigned long)(bv)->bv_offset)
|
|
+
|
|
+#define BIOVEC_PHYS_MERGEABLE(vec1, vec2) \
|
|
+ (bvec_to_phys(vec1) + (vec1)->bv_len == bvec_to_phys(vec2) \
|
|
+ && bvec_to_pseudophys(vec1) + (vec1)->bv_len \
|
|
+ == bvec_to_pseudophys(vec2))
|
|
+
|
|
+#undef virt_to_bus
|
|
+#undef bus_to_virt
|
|
+#define virt_to_bus(_x) phys_to_machine(__pa(_x))
|
|
+#define bus_to_virt(_x) __va(machine_to_phys(_x))
|
|
+
|
|
+#include <asm/fixmap.h>
|
|
+
|
|
+#undef __ISA_IO_base
|
|
+#undef isa_virt_to_bus
|
|
+#undef isa_page_to_bus
|
|
+#undef isa_bus_to_virt
|
|
+#define isa_virt_to_bus(_x) ({ \
|
|
+ unsigned long _va_ = (unsigned long)(_x); \
|
|
+ _va_ - fix_to_virt(FIX_ISAMAP_BEGIN) < (NR_FIX_ISAMAPS << PAGE_SHIFT) \
|
|
+ ? _va_ - fix_to_virt(FIX_ISAMAP_BEGIN) \
|
|
+ : ({ BUG(); (unsigned long)virt_to_bus(_va_); }); })
|
|
+#define isa_bus_to_virt(_x) ((void *)fix_to_virt(FIX_ISAMAP_BEGIN) + (_x))
|
|
+
|
|
#endif
|
|
|
|
extern void *xlate_dev_mem_ptr(unsigned long phys);
|
|
extern void unxlate_dev_mem_ptr(unsigned long phys, void *addr);
|
|
|
|
-extern void map_devmem(unsigned long pfn, unsigned long len, pgprot_t);
|
|
-extern void unmap_devmem(unsigned long pfn, unsigned long len, pgprot_t);
|
|
-
|
|
extern int ioremap_check_change_attr(unsigned long mfn, unsigned long size,
|
|
unsigned long prot_val);
|
|
extern void __iomem *ioremap_wc(unsigned long offset, unsigned long size);
|
|
|
|
+/*
|
|
+ * early_ioremap() and early_iounmap() are for temporary early boot-time
|
|
+ * mappings, before the real ioremap() is functional.
|
|
+ * A boot-time mapping is currently limited to at most 16 pages.
|
|
+ */
|
|
+extern void early_ioremap_init(void);
|
|
+extern void early_ioremap_clear(void);
|
|
+extern void early_ioremap_reset(void);
|
|
+extern void *early_ioremap(unsigned long offset, unsigned long size);
|
|
+extern void early_iounmap(void *addr, unsigned long size);
|
|
+extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys);
|
|
+
|
|
+
|
|
#endif /* _ASM_X86_IO_H */
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/irq_vectors.h 2011-02-15 17:27:18.000000000 +0100
|
|
@@ -0,0 +1,52 @@
|
|
+#ifndef _ASM_IRQ_VECTORS_H
|
|
+#define _ASM_IRQ_VECTORS_H
|
|
+
|
|
+#ifdef CONFIG_X86_32
|
|
+# define SYSCALL_VECTOR 0x80
|
|
+#else
|
|
+# define IA32_SYSCALL_VECTOR 0x80
|
|
+#endif
|
|
+
|
|
+#define RESCHEDULE_VECTOR 0
|
|
+#define CALL_FUNCTION_VECTOR 1
|
|
+#define NMI_VECTOR 0x02
|
|
+#define CALL_FUNC_SINGLE_VECTOR 3
|
|
+#define NR_IPIS 4
|
|
+
|
|
+/*
|
|
+ * The maximum number of vectors supported by i386 processors
|
|
+ * is limited to 256. For processors other than i386, NR_VECTORS
|
|
+ * should be changed accordingly.
|
|
+ */
|
|
+#define NR_VECTORS 256
|
|
+
|
|
+#define FIRST_VM86_IRQ 3
|
|
+#define LAST_VM86_IRQ 15
|
|
+#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15)
|
|
+
|
|
+/*
|
|
+ * The flat IRQ space is divided into two regions:
|
|
+ * 1. A one-to-one mapping of real physical IRQs. This space is only used
|
|
+ * if we have physical device-access privilege. This region is at the
|
|
+ * start of the IRQ space so that existing device drivers do not need
|
|
+ * to be modified to translate physical IRQ numbers into our IRQ space.
|
|
+ * 3. A dynamic mapping of inter-domain and Xen-sourced virtual IRQs. These
|
|
+ * are bound using the provided bind/unbind functions.
|
|
+ */
|
|
+
|
|
+#define PIRQ_BASE 0
|
|
+#if defined(NR_CPUS) && defined(MAX_IO_APICS)
|
|
+# if NR_CPUS < MAX_IO_APICS
|
|
+# define NR_PIRQS (NR_VECTORS + 32 * NR_CPUS)
|
|
+# else
|
|
+# define NR_PIRQS (NR_VECTORS + 32 * MAX_IO_APICS)
|
|
+# endif
|
|
+#endif
|
|
+
|
|
+#define DYNIRQ_BASE (PIRQ_BASE + NR_PIRQS)
|
|
+#define NR_DYNIRQS 256
|
|
+
|
|
+#define NR_IRQS (NR_PIRQS + NR_DYNIRQS)
|
|
+#define NR_IRQ_VECTORS NR_IRQS
|
|
+
|
|
+#endif /* _ASM_IRQ_VECTORS_H */
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/asm/irqflags.h 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/irqflags.h 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -118,7 +118,7 @@ static inline void halt(void)
|
|
|
|
#ifndef CONFIG_X86_64
|
|
#define INTERRUPT_RETURN iret
|
|
-#define ENABLE_INTERRUPTS_SYSCALL_RET __ENABLE_INTERRUPTS ; \
|
|
+#define ENABLE_INTERRUPTS_SYSEXIT __ENABLE_INTERRUPTS ; \
|
|
sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/ ; \
|
|
__TEST_PENDING ; \
|
|
jnz 14f /* process more events if necessary... */ ; \
|
|
@@ -177,18 +177,6 @@ static inline void trace_hardirqs_fixup_
|
|
#else
|
|
|
|
#ifdef CONFIG_X86_64
|
|
-/*
|
|
- * Currently paravirt can't handle swapgs nicely when we
|
|
- * don't have a stack we can rely on (such as a user space
|
|
- * stack). So we either find a way around these or just fault
|
|
- * and emulate if a guest tries to call swapgs directly.
|
|
- *
|
|
- * Either way, this is a good way to document that we don't
|
|
- * have a reliable stack. x86_64 only.
|
|
- */
|
|
-#define SWAPGS_UNSAFE_STACK swapgs
|
|
-#define ARCH_TRACE_IRQS_ON call trace_hardirqs_on_thunk
|
|
-#define ARCH_TRACE_IRQS_OFF call trace_hardirqs_off_thunk
|
|
#define ARCH_LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk
|
|
#define ARCH_LOCKDEP_SYS_EXIT_IRQ \
|
|
TRACE_IRQS_ON; \
|
|
@@ -200,24 +188,6 @@ static inline void trace_hardirqs_fixup_
|
|
TRACE_IRQS_OFF;
|
|
|
|
#else
|
|
-#define ARCH_TRACE_IRQS_ON \
|
|
- pushl %eax; \
|
|
- pushl %ecx; \
|
|
- pushl %edx; \
|
|
- call trace_hardirqs_on; \
|
|
- popl %edx; \
|
|
- popl %ecx; \
|
|
- popl %eax;
|
|
-
|
|
-#define ARCH_TRACE_IRQS_OFF \
|
|
- pushl %eax; \
|
|
- pushl %ecx; \
|
|
- pushl %edx; \
|
|
- call trace_hardirqs_off; \
|
|
- popl %edx; \
|
|
- popl %ecx; \
|
|
- popl %eax;
|
|
-
|
|
#define ARCH_LOCKDEP_SYS_EXIT \
|
|
pushl %eax; \
|
|
pushl %ecx; \
|
|
@@ -231,8 +201,8 @@ static inline void trace_hardirqs_fixup_
|
|
#endif
|
|
|
|
#ifdef CONFIG_TRACE_IRQFLAGS
|
|
-# define TRACE_IRQS_ON ARCH_TRACE_IRQS_ON
|
|
-# define TRACE_IRQS_OFF ARCH_TRACE_IRQS_OFF
|
|
+# define TRACE_IRQS_ON call trace_hardirqs_on_thunk;
|
|
+# define TRACE_IRQS_OFF call trace_hardirqs_off_thunk;
|
|
#else
|
|
# define TRACE_IRQS_ON
|
|
# define TRACE_IRQS_OFF
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/asm/mmu_context.h 2011-01-31 17:56:27.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/mmu_context.h 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -1,5 +1,42 @@
|
|
+#ifndef __ASM_X86_MMU_CONTEXT_H
|
|
+#define __ASM_X86_MMU_CONTEXT_H
|
|
+
|
|
+#include <asm/desc.h>
|
|
+#include <asm/atomic.h>
|
|
+#include <asm/pgalloc.h>
|
|
+#include <asm/tlbflush.h>
|
|
+
|
|
+void arch_exit_mmap(struct mm_struct *mm);
|
|
+void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm);
|
|
+
|
|
+void mm_pin(struct mm_struct *mm);
|
|
+void mm_unpin(struct mm_struct *mm);
|
|
+void mm_pin_all(void);
|
|
+
|
|
+static inline void xen_activate_mm(struct mm_struct *prev,
|
|
+ struct mm_struct *next)
|
|
+{
|
|
+ if (!PagePinned(virt_to_page(next->pgd)))
|
|
+ mm_pin(next);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Used for LDT copy/destruction.
|
|
+ */
|
|
+int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
|
|
+void destroy_context(struct mm_struct *mm);
|
|
+
|
|
#ifdef CONFIG_X86_32
|
|
# include "mmu_context_32.h"
|
|
#else
|
|
# include "mmu_context_64.h"
|
|
#endif
|
|
+
|
|
+#define activate_mm(prev, next) \
|
|
+do { \
|
|
+ xen_activate_mm(prev, next); \
|
|
+ switch_mm((prev), (next), NULL); \
|
|
+} while (0);
|
|
+
|
|
+
|
|
+#endif /* __ASM_X86_MMU_CONTEXT_H */
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/asm/mmu_context_32.h 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/mmu_context_32.h 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -1,32 +1,6 @@
|
|
#ifndef __I386_SCHED_H
|
|
#define __I386_SCHED_H
|
|
|
|
-#include <asm/desc.h>
|
|
-#include <asm/atomic.h>
|
|
-#include <asm/pgalloc.h>
|
|
-#include <asm/tlbflush.h>
|
|
-
|
|
-void arch_exit_mmap(struct mm_struct *mm);
|
|
-void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm);
|
|
-
|
|
-void mm_pin(struct mm_struct *mm);
|
|
-void mm_unpin(struct mm_struct *mm);
|
|
-void mm_pin_all(void);
|
|
-
|
|
-static inline void xen_activate_mm(struct mm_struct *prev,
|
|
- struct mm_struct *next)
|
|
-{
|
|
- if (!PagePinned(virt_to_page(next->pgd)))
|
|
- mm_pin(next);
|
|
-}
|
|
-
|
|
-/*
|
|
- * Used for LDT copy/destruction.
|
|
- */
|
|
-int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
|
|
-void destroy_context(struct mm_struct *mm);
|
|
-
|
|
-
|
|
static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
|
|
{
|
|
#if 0 /* XEN: no lazy tlb */
|
|
@@ -107,10 +81,4 @@ static inline void switch_mm(struct mm_s
|
|
#define deactivate_mm(tsk, mm) \
|
|
asm("movl %0,%%gs": :"r" (0));
|
|
|
|
-#define activate_mm(prev, next) \
|
|
-do { \
|
|
- xen_activate_mm(prev, next); \
|
|
- switch_mm((prev), (next), NULL); \
|
|
-} while (0)
|
|
-
|
|
#endif
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/asm/mmu_context_64.h 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/mmu_context_64.h 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -1,23 +1,6 @@
|
|
#ifndef __X86_64_MMU_CONTEXT_H
|
|
#define __X86_64_MMU_CONTEXT_H
|
|
|
|
-#include <asm/desc.h>
|
|
-#include <asm/atomic.h>
|
|
-#include <asm/pgalloc.h>
|
|
-#include <asm/page.h>
|
|
-#include <asm/pda.h>
|
|
-#include <asm/pgtable.h>
|
|
-#include <asm/tlbflush.h>
|
|
-
|
|
-void arch_exit_mmap(struct mm_struct *mm);
|
|
-void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm);
|
|
-
|
|
-/*
|
|
- * possibly do the LDT unload here?
|
|
- */
|
|
-int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
|
|
-void destroy_context(struct mm_struct *mm);
|
|
-
|
|
static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
|
|
{
|
|
#if defined(CONFIG_SMP) && !defined(CONFIG_XEN)
|
|
@@ -58,10 +41,6 @@ static inline void __prepare_arch_switch
|
|
}
|
|
}
|
|
|
|
-extern void mm_pin(struct mm_struct *mm);
|
|
-extern void mm_unpin(struct mm_struct *mm);
|
|
-void mm_pin_all(void);
|
|
-
|
|
static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
|
|
struct task_struct *tsk)
|
|
{
|
|
@@ -124,11 +103,4 @@ do { \
|
|
asm volatile("movl %0,%%fs"::"r"(0)); \
|
|
} while (0)
|
|
|
|
-static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
|
|
-{
|
|
- if (!PagePinned(virt_to_page(next->pgd)))
|
|
- mm_pin(next);
|
|
- switch_mm(prev, next, NULL);
|
|
-}
|
|
-
|
|
#endif
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/asm/pci.h 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/pci.h 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -21,6 +21,8 @@ struct pci_sysdata {
|
|
#endif
|
|
};
|
|
|
|
+extern int pci_routeirq;
|
|
+
|
|
/* scan a bus after allocating a pci_sysdata for it */
|
|
extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops,
|
|
int node);
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/asm/pgalloc.h 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/pgalloc.h 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -7,6 +7,9 @@
|
|
|
|
#include <asm/io.h> /* for phys_to_virt and page_to_pseudophys */
|
|
|
|
+static inline int paravirt_pgd_alloc(struct mm_struct *mm) { return 0; }
|
|
+static inline void paravirt_pgd_free(struct mm_struct *mm, pgd_t *pgd) {}
|
|
+
|
|
static inline void paravirt_alloc_pte(struct mm_struct *mm, unsigned long pfn) {}
|
|
static inline void paravirt_alloc_pmd(struct mm_struct *mm, unsigned long pfn) {}
|
|
static inline void paravirt_alloc_pmd_clone(unsigned long pfn, unsigned long clonepfn,
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/asm/pgtable.h 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/pgtable.h 2011-02-07 15:40:30.000000000 +0100
|
|
@@ -13,11 +13,12 @@
|
|
#define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */
|
|
#define _PAGE_BIT_PAT 7 /* on 4KB pages */
|
|
#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */
|
|
-#define _PAGE_BIT_IO 9 /* Mapped page is I/O or foreign and
|
|
+#define _PAGE_BIT_UNUSED1 9 /* available for programmer */
|
|
+#define _PAGE_BIT_UNUSED2 10
|
|
+#define _PAGE_BIT_IO 11 /* Mapped page is I/O or foreign and
|
|
* has no associated page struct. */
|
|
-#define _PAGE_BIT_UNUSED2 10 /* available for programmer */
|
|
-#define _PAGE_BIT_UNUSED3 11
|
|
#define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */
|
|
+#define _PAGE_BIT_SPECIAL _PAGE_BIT_UNUSED1
|
|
#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */
|
|
|
|
/* If _PAGE_BIT_PRESENT is clear, we use these: */
|
|
@@ -28,34 +29,31 @@
|
|
/* if the user mapped it with PROT_NONE; pte_present gives true */
|
|
#define _PAGE_BIT_PROTNONE _PAGE_BIT_GLOBAL
|
|
|
|
-/*
|
|
- * Note: we use _AC(1, L) instead of _AC(1, UL) so that we get a
|
|
- * sign-extended value on 32-bit with all 1's in the upper word,
|
|
- * which preserves the upper pte values on 64-bit ptes:
|
|
- */
|
|
-#define _PAGE_PRESENT (_AC(1, L)<<_PAGE_BIT_PRESENT)
|
|
-#define _PAGE_RW (_AC(1, L)<<_PAGE_BIT_RW)
|
|
-#define _PAGE_USER (_AC(1, L)<<_PAGE_BIT_USER)
|
|
-#define _PAGE_PWT (_AC(1, L)<<_PAGE_BIT_PWT)
|
|
-#define _PAGE_PCD (_AC(1, L)<<_PAGE_BIT_PCD)
|
|
-#define _PAGE_ACCESSED (_AC(1, L)<<_PAGE_BIT_ACCESSED)
|
|
-#define _PAGE_DIRTY (_AC(1, L)<<_PAGE_BIT_DIRTY)
|
|
-#define _PAGE_PSE (_AC(1, L)<<_PAGE_BIT_PSE) /* 2MB page */
|
|
-#define _PAGE_GLOBAL (_AC(1, L)<<_PAGE_BIT_GLOBAL) /* Global TLB entry */
|
|
-#define _PAGE_IO (_AC(1, L)<<_PAGE_BIT_IO)
|
|
-#define _PAGE_UNUSED2 (_AC(1, L)<<_PAGE_BIT_UNUSED2)
|
|
-#define _PAGE_UNUSED3 (_AC(1, L)<<_PAGE_BIT_UNUSED3)
|
|
-#define _PAGE_PAT (_AC(1, L)<<_PAGE_BIT_PAT)
|
|
-#define _PAGE_PAT_LARGE (_AC(1, L)<<_PAGE_BIT_PAT_LARGE)
|
|
+#define _PAGE_PRESENT (_AT(pteval_t, 1) << _PAGE_BIT_PRESENT)
|
|
+#define _PAGE_RW (_AT(pteval_t, 1) << _PAGE_BIT_RW)
|
|
+#define _PAGE_USER (_AT(pteval_t, 1) << _PAGE_BIT_USER)
|
|
+#define _PAGE_PWT (_AT(pteval_t, 1) << _PAGE_BIT_PWT)
|
|
+#define _PAGE_PCD (_AT(pteval_t, 1) << _PAGE_BIT_PCD)
|
|
+#define _PAGE_ACCESSED (_AT(pteval_t, 1) << _PAGE_BIT_ACCESSED)
|
|
+#define _PAGE_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_DIRTY)
|
|
+#define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE)
|
|
+#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
|
|
+#define _PAGE_UNUSED1 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1)
|
|
+#define _PAGE_UNUSED2 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED2)
|
|
+#define _PAGE_IO (_AT(pteval_t, 1) << _PAGE_BIT_IO)
|
|
+#define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT)
|
|
+#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
|
|
+#define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL)
|
|
+#define __HAVE_ARCH_PTE_SPECIAL
|
|
|
|
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
|
|
-#define _PAGE_NX (_AC(1, ULL) << _PAGE_BIT_NX)
|
|
+#define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX)
|
|
#else
|
|
-#define _PAGE_NX 0
|
|
+#define _PAGE_NX (_AT(pteval_t, 0))
|
|
#endif
|
|
|
|
-#define _PAGE_FILE (_AC(1, L)<<_PAGE_BIT_FILE)
|
|
-#define _PAGE_PROTNONE (_AC(1, L)<<_PAGE_BIT_PROTNONE)
|
|
+#define _PAGE_FILE (_AT(pteval_t, 1) << _PAGE_BIT_FILE)
|
|
+#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
|
|
|
|
#ifndef __ASSEMBLY__
|
|
#if defined(CONFIG_X86_64) && CONFIG_XEN_COMPAT <= 0x030002
|
|
@@ -71,8 +69,8 @@ extern unsigned int __kernel_page_user;
|
|
_PAGE_DIRTY | __kernel_page_user)
|
|
|
|
/* Set of bits not changed in pte_modify */
|
|
-#define _PAGE_CHG_MASK (PTE_MASK | _PAGE_CACHE_MASK | _PAGE_IO | \
|
|
- _PAGE_ACCESSED | _PAGE_DIRTY)
|
|
+#define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_CACHE_MASK | _PAGE_IO | \
|
|
+ _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY)
|
|
|
|
/*
|
|
* PAT settings are part of the hypervisor interface, which sets the
|
|
@@ -102,19 +100,9 @@ extern unsigned int __kernel_page_user;
|
|
#define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \
|
|
_PAGE_ACCESSED)
|
|
|
|
-#ifdef CONFIG_X86_32
|
|
-#define _PAGE_KERNEL_EXEC \
|
|
- (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED)
|
|
-#define _PAGE_KERNEL (_PAGE_KERNEL_EXEC | _PAGE_NX)
|
|
-
|
|
-#ifndef __ASSEMBLY__
|
|
-extern pteval_t __PAGE_KERNEL, __PAGE_KERNEL_EXEC;
|
|
-#endif /* __ASSEMBLY__ */
|
|
-#else
|
|
#define __PAGE_KERNEL_EXEC \
|
|
(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | __kernel_page_user)
|
|
#define __PAGE_KERNEL (__PAGE_KERNEL_EXEC | _PAGE_NX)
|
|
-#endif
|
|
|
|
#define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW)
|
|
#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW)
|
|
@@ -125,25 +113,22 @@ extern pteval_t __PAGE_KERNEL, __PAGE_KE
|
|
#define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RX | _PAGE_USER)
|
|
#define __PAGE_KERNEL_VSYSCALL_NOCACHE (__PAGE_KERNEL_VSYSCALL | _PAGE_PCD | _PAGE_PWT)
|
|
#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE)
|
|
+#define __PAGE_KERNEL_LARGE_NOCACHE (__PAGE_KERNEL | _PAGE_CACHE_UC | _PAGE_PSE)
|
|
#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE)
|
|
|
|
-/*
|
|
- * We don't support GLOBAL page in xenolinux64
|
|
- */
|
|
-#define MAKE_GLOBAL(x) __pgprot((x))
|
|
-
|
|
-#define PAGE_KERNEL MAKE_GLOBAL(__PAGE_KERNEL)
|
|
-#define PAGE_KERNEL_RO MAKE_GLOBAL(__PAGE_KERNEL_RO)
|
|
-#define PAGE_KERNEL_EXEC MAKE_GLOBAL(__PAGE_KERNEL_EXEC)
|
|
-#define PAGE_KERNEL_RX MAKE_GLOBAL(__PAGE_KERNEL_RX)
|
|
-#define PAGE_KERNEL_WC MAKE_GLOBAL(__PAGE_KERNEL_WC)
|
|
-#define PAGE_KERNEL_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_NOCACHE)
|
|
-#define PAGE_KERNEL_UC_MINUS MAKE_GLOBAL(__PAGE_KERNEL_UC_MINUS)
|
|
-#define PAGE_KERNEL_EXEC_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_EXEC_NOCACHE)
|
|
-#define PAGE_KERNEL_LARGE MAKE_GLOBAL(__PAGE_KERNEL_LARGE)
|
|
-#define PAGE_KERNEL_LARGE_EXEC MAKE_GLOBAL(__PAGE_KERNEL_LARGE_EXEC)
|
|
-#define PAGE_KERNEL_VSYSCALL MAKE_GLOBAL(__PAGE_KERNEL_VSYSCALL)
|
|
-#define PAGE_KERNEL_VSYSCALL_NOCACHE MAKE_GLOBAL(__PAGE_KERNEL_VSYSCALL_NOCACHE)
|
|
+#define PAGE_KERNEL __pgprot(__PAGE_KERNEL)
|
|
+#define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO)
|
|
+#define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC)
|
|
+#define PAGE_KERNEL_RX __pgprot(__PAGE_KERNEL_RX)
|
|
+#define PAGE_KERNEL_WC __pgprot(__PAGE_KERNEL_WC)
|
|
+#define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE)
|
|
+#define PAGE_KERNEL_UC_MINUS __pgprot(__PAGE_KERNEL_UC_MINUS)
|
|
+#define PAGE_KERNEL_EXEC_NOCACHE __pgprot(__PAGE_KERNEL_EXEC_NOCACHE)
|
|
+#define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE)
|
|
+#define PAGE_KERNEL_LARGE_NOCACHE __pgprot(__PAGE_KERNEL_LARGE_NOCACHE)
|
|
+#define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC)
|
|
+#define PAGE_KERNEL_VSYSCALL __pgprot(__PAGE_KERNEL_VSYSCALL)
|
|
+#define PAGE_KERNEL_VSYSCALL_NOCACHE __pgprot(__PAGE_KERNEL_VSYSCALL_NOCACHE)
|
|
|
|
/* xwr */
|
|
#define __P000 PAGE_NONE
|
|
@@ -182,27 +167,27 @@ extern struct list_head pgd_list;
|
|
*/
|
|
static inline int pte_dirty(pte_t pte)
|
|
{
|
|
- return __pte_val(pte) & _PAGE_DIRTY;
|
|
+ return pte_flags(pte) & _PAGE_DIRTY;
|
|
}
|
|
|
|
static inline int pte_young(pte_t pte)
|
|
{
|
|
- return __pte_val(pte) & _PAGE_ACCESSED;
|
|
+ return pte_flags(pte) & _PAGE_ACCESSED;
|
|
}
|
|
|
|
static inline int pte_write(pte_t pte)
|
|
{
|
|
- return __pte_val(pte) & _PAGE_RW;
|
|
+ return pte_flags(pte) & _PAGE_RW;
|
|
}
|
|
|
|
static inline int pte_file(pte_t pte)
|
|
{
|
|
- return __pte_val(pte) & _PAGE_FILE;
|
|
+ return pte_flags(pte) & _PAGE_FILE;
|
|
}
|
|
|
|
static inline int pte_huge(pte_t pte)
|
|
{
|
|
- return __pte_val(pte) & _PAGE_PSE;
|
|
+ return pte_flags(pte) & _PAGE_PSE;
|
|
}
|
|
|
|
static inline int pte_global(pte_t pte)
|
|
@@ -212,12 +197,12 @@ static inline int pte_global(pte_t pte)
|
|
|
|
static inline int pte_exec(pte_t pte)
|
|
{
|
|
- return !(__pte_val(pte) & _PAGE_NX);
|
|
+ return !(pte_flags(pte) & _PAGE_NX);
|
|
}
|
|
|
|
static inline int pte_special(pte_t pte)
|
|
{
|
|
- return 0;
|
|
+ return pte_flags(pte) & _PAGE_SPECIAL;
|
|
}
|
|
|
|
static inline int pmd_large(pmd_t pte)
|
|
@@ -228,22 +213,22 @@ static inline int pmd_large(pmd_t pte)
|
|
|
|
static inline pte_t pte_mkclean(pte_t pte)
|
|
{
|
|
- return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_DIRTY);
|
|
+ return __pte_ma(__pte_val(pte) & ~_PAGE_DIRTY);
|
|
}
|
|
|
|
static inline pte_t pte_mkold(pte_t pte)
|
|
{
|
|
- return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_ACCESSED);
|
|
+ return __pte_ma(__pte_val(pte) & ~_PAGE_ACCESSED);
|
|
}
|
|
|
|
static inline pte_t pte_wrprotect(pte_t pte)
|
|
{
|
|
- return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_RW);
|
|
+ return __pte_ma(__pte_val(pte) & ~_PAGE_RW);
|
|
}
|
|
|
|
static inline pte_t pte_mkexec(pte_t pte)
|
|
{
|
|
- return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_NX);
|
|
+ return __pte_ma(__pte_val(pte) & ~_PAGE_NX);
|
|
}
|
|
|
|
static inline pte_t pte_mkdirty(pte_t pte)
|
|
@@ -268,7 +253,7 @@ static inline pte_t pte_mkhuge(pte_t pte
|
|
|
|
static inline pte_t pte_clrhuge(pte_t pte)
|
|
{
|
|
- return __pte_ma(__pte_val(pte) & ~(pteval_t)_PAGE_PSE);
|
|
+ return __pte_ma(__pte_val(pte) & ~_PAGE_PSE);
|
|
}
|
|
|
|
static inline pte_t pte_mkglobal(pte_t pte)
|
|
@@ -283,35 +268,46 @@ static inline pte_t pte_clrglobal(pte_t
|
|
|
|
static inline pte_t pte_mkspecial(pte_t pte)
|
|
{
|
|
- return pte;
|
|
+ return __pte_ma(__pte_val(pte) | _PAGE_SPECIAL);
|
|
}
|
|
|
|
extern pteval_t __supported_pte_mask;
|
|
|
|
static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
|
|
{
|
|
- return __pte((((phys_addr_t)page_nr << PAGE_SHIFT) |
|
|
- pgprot_val(pgprot)) & __supported_pte_mask);
|
|
+ pgprotval_t prot = pgprot_val(pgprot);
|
|
+
|
|
+ if (prot & _PAGE_PRESENT)
|
|
+ prot &= __supported_pte_mask;
|
|
+ return __pte(((phys_addr_t)page_nr << PAGE_SHIFT) | prot);
|
|
}
|
|
|
|
static inline pte_t pfn_pte_ma(phys_addr_t page_nr, pgprot_t pgprot)
|
|
{
|
|
- return __pte_ma(((page_nr << PAGE_SHIFT) |
|
|
- pgprot_val(pgprot)) & __supported_pte_mask);
|
|
+ pgprotval_t prot = pgprot_val(pgprot);
|
|
+
|
|
+ if (prot & _PAGE_PRESENT)
|
|
+ prot &= __supported_pte_mask;
|
|
+ return __pte_ma((page_nr << PAGE_SHIFT) | prot);
|
|
}
|
|
|
|
static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
|
|
{
|
|
- return __pmd((((phys_addr_t)page_nr << PAGE_SHIFT) |
|
|
- pgprot_val(pgprot)) & __supported_pte_mask);
|
|
+ pgprotval_t prot = pgprot_val(pgprot);
|
|
+
|
|
+ if (prot & _PAGE_PRESENT)
|
|
+ prot &= __supported_pte_mask;
|
|
+ return __pmd(((phys_addr_t)page_nr << PAGE_SHIFT) | prot);
|
|
}
|
|
|
|
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
|
|
{
|
|
- pteval_t val = pte_val(pte);
|
|
+ pgprotval_t prot = pgprot_val(newprot);
|
|
+ pteval_t val = pte_val(pte) & _PAGE_CHG_MASK;
|
|
|
|
- val &= _PAGE_CHG_MASK;
|
|
- val |= pgprot_val(newprot) & (~_PAGE_CHG_MASK) & __supported_pte_mask;
|
|
+ if (prot & _PAGE_PRESENT)
|
|
+ prot &= __supported_pte_mask;
|
|
+ val |= prot & ~_PAGE_CHG_MASK;
|
|
|
|
return __pte(val);
|
|
}
|
|
@@ -325,9 +321,11 @@ static inline pgprot_t pgprot_modify(pgp
|
|
return __pgprot(preservebits | addbits);
|
|
}
|
|
|
|
-#define pte_pgprot(x) __pgprot(__pte_val(x) & ~PTE_MASK)
|
|
+#define pte_pgprot(x) __pgprot(pte_flags(x) & PTE_FLAGS_MASK)
|
|
|
|
-#define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask)
|
|
+#define canon_pgprot(p) __pgprot(pgprot_val(p) & _PAGE_PRESENT \
|
|
+ ? pgprot_val(p) & __supported_pte_mask \
|
|
+ : pgprot_val(p))
|
|
|
|
#ifndef __ASSEMBLY__
|
|
#define __HAVE_PHYS_MEM_ACCESS_PROT
|
|
@@ -338,6 +336,17 @@ int phys_mem_access_prot_allowed(struct
|
|
unsigned long size, pgprot_t *vma_prot);
|
|
#endif
|
|
|
|
+/* Install a pte for a particular vaddr in kernel space. */
|
|
+void set_pte_vaddr(unsigned long vaddr, pte_t pte);
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+extern void native_pagetable_setup_start(pgd_t *base);
|
|
+extern void native_pagetable_setup_done(pgd_t *base);
|
|
+#else
|
|
+static inline void xen_pagetable_setup_start(pgd_t *base) {}
|
|
+static inline void xen_pagetable_setup_done(pgd_t *base) {}
|
|
+#endif
|
|
+
|
|
#define set_pte(ptep, pte) xen_set_pte(ptep, pte)
|
|
#define set_pte_at(mm, addr, ptep, pte) xen_set_pte_at(mm, addr, ptep, pte)
|
|
|
|
@@ -373,6 +382,26 @@ int phys_mem_access_prot_allowed(struct
|
|
# include "pgtable_64.h"
|
|
#endif
|
|
|
|
+/*
|
|
+ * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD]
|
|
+ *
|
|
+ * this macro returns the index of the entry in the pgd page which would
|
|
+ * control the given virtual address
|
|
+ */
|
|
+#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
|
|
+
|
|
+/*
|
|
+ * pgd_offset() returns a (pgd_t *)
|
|
+ * pgd_index() is used get the offset into the pgd page's array of pgd_t's;
|
|
+ */
|
|
+#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address)))
|
|
+/*
|
|
+ * a shortcut which implies the use of the kernel's pgd, instead
|
|
+ * of a process's
|
|
+ */
|
|
+#define pgd_offset_k(address) pgd_offset(&init_mm, (address))
|
|
+
|
|
+
|
|
#define KERNEL_PGD_BOUNDARY pgd_index(PAGE_OFFSET)
|
|
#define KERNEL_PGD_PTRS (PTRS_PER_PGD - KERNEL_PGD_BOUNDARY)
|
|
|
|
@@ -383,8 +412,15 @@ enum {
|
|
PG_LEVEL_4K,
|
|
PG_LEVEL_2M,
|
|
PG_LEVEL_1G,
|
|
+ PG_LEVEL_NUM
|
|
};
|
|
|
|
+#ifdef CONFIG_PROC_FS
|
|
+extern void update_page_count(int level, unsigned long pages);
|
|
+#else
|
|
+static inline void update_page_count(int level, unsigned long pages) { }
|
|
+#endif
|
|
+
|
|
/*
|
|
* Helper function that returns the kernel pagetable entry controlling
|
|
* the virtual address 'address'. NULL means no pagetable entry present.
|
|
@@ -441,6 +477,8 @@ static inline void xen_pte_clear(struct
|
|
* race with other CPU's that might be updating the dirty
|
|
* bit at the same time.
|
|
*/
|
|
+struct vm_area_struct;
|
|
+
|
|
#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
|
|
extern int ptep_set_access_flags(struct vm_area_struct *vma,
|
|
unsigned long address, pte_t *ptep,
|
|
@@ -523,9 +561,6 @@ static inline void clone_pgd_range(pgd_t
|
|
memcpy(dst, src, count * sizeof(pgd_t));
|
|
}
|
|
|
|
-#define arch_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) \
|
|
- xen_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable)
|
|
-
|
|
#define arbitrary_virt_to_machine(va) \
|
|
({ \
|
|
unsigned int __lvl; \
|
|
@@ -548,6 +583,34 @@ struct page *kmap_atomic_to_page(void *)
|
|
#define ptep_to_machine(ptep) virt_to_machine(ptep)
|
|
#endif
|
|
|
|
+#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
|
|
+static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr,
|
|
+ pte_t *ptep)
|
|
+{
|
|
+#if CONFIG_XEN_COMPAT < 0x030300
|
|
+ if (unlikely(!xen_feature(XENFEAT_mmu_pt_update_preserve_ad)))
|
|
+ return ptep_get_and_clear(mm, addr, ptep);
|
|
+#endif
|
|
+ return *ptep;
|
|
+}
|
|
+
|
|
+static inline void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr,
|
|
+ pte_t *ptep, pte_t pte)
|
|
+{
|
|
+ mmu_update_t u;
|
|
+
|
|
+#if CONFIG_XEN_COMPAT < 0x030300
|
|
+ if (unlikely(!xen_feature(XENFEAT_mmu_pt_update_preserve_ad))) {
|
|
+ set_pte_at(mm, addr, ptep, pte);
|
|
+ return;
|
|
+ }
|
|
+#endif
|
|
+ u.ptr = ptep_to_machine(ptep) | MMU_PT_UPDATE_PRESERVE_AD;
|
|
+ u.val = __pte_val(pte);
|
|
+ if (HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF))
|
|
+ BUG();
|
|
+}
|
|
+
|
|
#include <asm-generic/pgtable.h>
|
|
|
|
#include <xen/features.h>
|
|
@@ -573,10 +636,6 @@ int create_lookup_pte_addr(struct mm_str
|
|
unsigned long address,
|
|
uint64_t *ptep);
|
|
|
|
-int xen_change_pte_range(struct mm_struct *mm, pmd_t *pmd,
|
|
- unsigned long addr, unsigned long end, pgprot_t newprot,
|
|
- int dirty_accountable);
|
|
-
|
|
#endif /* __ASSEMBLY__ */
|
|
|
|
#endif /* _ASM_X86_PGTABLE_H */
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/asm/pgtable-3level.h 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/pgtable-3level.h 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -14,11 +14,11 @@
|
|
#define pmd_ERROR(e) \
|
|
printk("%s:%d: bad pmd %p(%016Lx pfn %08Lx).\n", \
|
|
__FILE__, __LINE__, &(e), __pmd_val(e), \
|
|
- (pmd_val(e) & PTE_MASK) >> PAGE_SHIFT)
|
|
+ (pmd_val(e) & PTE_PFN_MASK) >> PAGE_SHIFT)
|
|
#define pgd_ERROR(e) \
|
|
printk("%s:%d: bad pgd %p(%016Lx pfn %08Lx).\n", \
|
|
__FILE__, __LINE__, &(e), __pgd_val(e), \
|
|
- (pgd_val(e) & PTE_MASK) >> PAGE_SHIFT)
|
|
+ (pgd_val(e) & PTE_PFN_MASK) >> PAGE_SHIFT)
|
|
|
|
static inline int pud_none(pud_t pud)
|
|
{
|
|
@@ -27,7 +27,7 @@ static inline int pud_none(pud_t pud)
|
|
}
|
|
static inline int pud_bad(pud_t pud)
|
|
{
|
|
- return (__pud_val(pud) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER)) != 0;
|
|
+ return (__pud_val(pud) & ~(PTE_PFN_MASK | _KERNPG_TABLE | _PAGE_USER)) != 0;
|
|
}
|
|
|
|
static inline int pud_present(pud_t pud)
|
|
@@ -102,9 +102,9 @@ static inline void pud_clear(pud_t *pudp
|
|
xen_tlb_flush();
|
|
}
|
|
|
|
-#define pud_page(pud) ((struct page *) __va(pud_val(pud) & PTE_MASK))
|
|
+#define pud_page(pud) ((struct page *) __va(pud_val(pud) & PTE_PFN_MASK))
|
|
|
|
-#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PTE_MASK))
|
|
+#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PTE_PFN_MASK))
|
|
|
|
|
|
/* Find an entry in the second-level page table.. */
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/asm/pgtable_32.h 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/pgtable_32.h 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -89,10 +89,10 @@ extern unsigned long pg0[];
|
|
/* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t.
|
|
can temporarily clear it. */
|
|
#define pmd_present(x) (__pmd_val(x))
|
|
-#define pmd_bad(x) ((__pmd_val(x) & (~PTE_MASK & ~_PAGE_USER & ~_PAGE_PRESENT)) != (_KERNPG_TABLE & ~_PAGE_PRESENT))
|
|
+#define pmd_bad(x) ((__pmd_val(x) & (PTE_FLAGS_MASK & ~_PAGE_USER & ~_PAGE_PRESENT)) != (_KERNPG_TABLE & ~_PAGE_PRESENT))
|
|
#else
|
|
#define pmd_present(x) (__pmd_val(x) & _PAGE_PRESENT)
|
|
-#define pmd_bad(x) ((__pmd_val(x) & (~PTE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
|
|
+#define pmd_bad(x) ((__pmd_val(x) & (PTE_FLAGS_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
|
|
#endif
|
|
|
|
|
|
@@ -119,26 +119,6 @@ extern unsigned long pg0[];
|
|
*/
|
|
#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
|
|
|
|
-/*
|
|
- * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD]
|
|
- *
|
|
- * this macro returns the index of the entry in the pgd page which would
|
|
- * control the given virtual address
|
|
- */
|
|
-#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
|
|
-#define pgd_index_k(addr) pgd_index((addr))
|
|
-
|
|
-/*
|
|
- * pgd_offset() returns a (pgd_t *)
|
|
- * pgd_index() is used get the offset into the pgd page's array of pgd_t's;
|
|
- */
|
|
-#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address)))
|
|
-
|
|
-/*
|
|
- * a shortcut which implies the use of the kernel's pgd, instead
|
|
- * of a process's
|
|
- */
|
|
-#define pgd_offset_k(address) pgd_offset(&init_mm, (address))
|
|
|
|
static inline int pud_large(pud_t pud) { return 0; }
|
|
|
|
@@ -165,7 +145,7 @@ static inline int pud_large(pud_t pud) {
|
|
#define pmd_page(pmd) (pfn_to_page(pmd_val((pmd)) >> PAGE_SHIFT))
|
|
|
|
#define pmd_page_vaddr(pmd) \
|
|
- ((unsigned long)__va(pmd_val((pmd)) & PTE_MASK))
|
|
+ ((unsigned long)__va(pmd_val((pmd)) & PTE_PFN_MASK))
|
|
|
|
#if defined(CONFIG_HIGHPTE)
|
|
#define pte_offset_map(dir, address) \
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/asm/pgtable_64.h 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/pgtable_64.h 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -23,6 +23,8 @@ extern void xen_init_pt(void);
|
|
extern pud_t level3_kernel_pgt[512];
|
|
extern pud_t level3_ident_pgt[512];
|
|
extern pmd_t level2_kernel_pgt[512];
|
|
+extern pmd_t level2_fixmap_pgt[512];
|
|
+extern pmd_t level2_ident_pgt[512];
|
|
extern pgd_t init_level4_pgt[];
|
|
|
|
#define swapper_pg_dir init_level4_pgt
|
|
@@ -79,6 +81,9 @@ extern void paging_init(void);
|
|
|
|
struct mm_struct;
|
|
|
|
+void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte);
|
|
+
|
|
+
|
|
#define __xen_pte_clear(ptep) xen_set_pte(ptep, __pte(0))
|
|
|
|
static inline void xen_set_pte(pte_t *ptep, pte_t pte)
|
|
@@ -145,7 +150,7 @@ static inline void xen_pgd_clear(pgd_t *
|
|
#define PGDIR_MASK (~(PGDIR_SIZE - 1))
|
|
|
|
|
|
-#define MAXMEM _AC(0x0000006fffffffff, UL)
|
|
+#define MAXMEM _AC(0x000004ffffffffff, UL)
|
|
#define VMALLOC_START _AC(0xffffc20000000000, UL)
|
|
#define VMALLOC_END _AC(0xffffe1ffffffffff, UL)
|
|
#define VMEMMAP_START _AC(0xffffe20000000000, UL)
|
|
@@ -157,17 +162,17 @@ static inline void xen_pgd_clear(pgd_t *
|
|
|
|
static inline int pgd_bad(pgd_t pgd)
|
|
{
|
|
- return (__pgd_val(pgd) & ~(PTE_MASK | _PAGE_USER)) != _KERNPG_TABLE;
|
|
+ return (__pgd_val(pgd) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE;
|
|
}
|
|
|
|
static inline int pud_bad(pud_t pud)
|
|
{
|
|
- return (__pud_val(pud) & ~(PTE_MASK | _PAGE_USER)) != _KERNPG_TABLE;
|
|
+ return (__pud_val(pud) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE;
|
|
}
|
|
|
|
static inline int pmd_bad(pmd_t pmd)
|
|
{
|
|
- return (__pmd_val(pmd) & ~(PTE_MASK | _PAGE_USER)) != _KERNPG_TABLE;
|
|
+ return (__pmd_val(pmd) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE;
|
|
}
|
|
|
|
#define pte_none(x) (!(x).pte)
|
|
@@ -175,7 +180,7 @@ static inline int pmd_bad(pmd_t pmd)
|
|
|
|
#define pages_to_mb(x) ((x) >> (20 - PAGE_SHIFT)) /* FIXME: is this right? */
|
|
|
|
-#define __pte_mfn(_pte) (((_pte).pte & PTE_MASK) >> PAGE_SHIFT)
|
|
+#define __pte_mfn(_pte) (((_pte).pte & PTE_PFN_MASK) >> PAGE_SHIFT)
|
|
#define pte_mfn(_pte) ((_pte).pte & _PAGE_PRESENT ? \
|
|
__pte_mfn(_pte) : pfn_to_mfn(__pte_mfn(_pte)))
|
|
#define pte_pfn(_pte) ((_pte).pte & _PAGE_IO ? max_mapnr : \
|
|
@@ -200,11 +205,8 @@ static inline int pmd_bad(pmd_t pmd)
|
|
* Level 4 access.
|
|
*/
|
|
#define pgd_page_vaddr(pgd) \
|
|
- ((unsigned long)__va((unsigned long)pgd_val((pgd)) & PTE_MASK))
|
|
+ ((unsigned long)__va((unsigned long)pgd_val((pgd)) & PTE_PFN_MASK))
|
|
#define pgd_page(pgd) (pfn_to_page(pgd_val((pgd)) >> PAGE_SHIFT))
|
|
-#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
|
|
-#define pgd_offset(mm, address) ((mm)->pgd + pgd_index((address)))
|
|
-#define pgd_offset_k(address) (init_level4_pgt + pgd_index((address)))
|
|
#define pgd_present(pgd) (__pgd_val(pgd) & _PAGE_PRESENT)
|
|
static inline int pgd_large(pgd_t pgd) { return 0; }
|
|
#define mk_kernel_pgd(address) __pgd((address) | _KERNPG_TABLE)
|
|
@@ -226,7 +228,7 @@ static inline int pud_large(pud_t pte)
|
|
}
|
|
|
|
/* PMD - Level 2 access */
|
|
-#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val((pmd)) & PTE_MASK))
|
|
+#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val((pmd)) & PTE_PFN_MASK))
|
|
#define pmd_page(pmd) (pfn_to_page(pmd_val((pmd)) >> PAGE_SHIFT))
|
|
|
|
#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/asm/processor.h 2011-03-03 16:42:13.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/processor.h 2011-03-03 16:44:23.000000000 +0100
|
|
@@ -144,7 +144,7 @@ extern __u32 cleared_cpu_caps[NCAPINTS
|
|
#ifdef CONFIG_SMP
|
|
DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info);
|
|
#define cpu_data(cpu) per_cpu(cpu_info, cpu)
|
|
-#define current_cpu_data cpu_data(smp_processor_id())
|
|
+#define current_cpu_data __get_cpu_var(cpu_info)
|
|
#else
|
|
#define cpu_data(cpu) boot_cpu_data
|
|
#define current_cpu_data boot_cpu_data
|
|
@@ -163,7 +163,7 @@ static inline int hlt_works(int cpu)
|
|
|
|
extern void cpu_detect(struct cpuinfo_x86 *c);
|
|
|
|
-extern void identify_cpu(struct cpuinfo_x86 *);
|
|
+extern void early_cpu_init(void);
|
|
extern void identify_boot_cpu(void);
|
|
extern void identify_secondary_cpu(struct cpuinfo_x86 *);
|
|
extern void print_cpu_info(struct cpuinfo_x86 *);
|
|
@@ -277,15 +277,11 @@ struct tss_struct {
|
|
struct thread_struct *io_bitmap_owner;
|
|
|
|
/*
|
|
- * Pad the TSS to be cacheline-aligned (size is 0x100):
|
|
- */
|
|
- unsigned long __cacheline_filler[35];
|
|
- /*
|
|
* .. and then another 0x100 bytes for the emergency kernel stack:
|
|
*/
|
|
unsigned long stack[64];
|
|
|
|
-} __attribute__((packed));
|
|
+} ____cacheline_aligned;
|
|
|
|
DECLARE_PER_CPU(struct tss_struct, init_tss);
|
|
|
|
@@ -677,11 +673,36 @@ static inline void __sti_mwait(unsigned
|
|
|
|
extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
|
|
|
|
-extern int force_mwait;
|
|
-
|
|
extern void select_idle_routine(const struct cpuinfo_x86 *c);
|
|
|
|
extern unsigned long boot_option_idle_override;
|
|
+extern unsigned long idle_halt;
|
|
+extern unsigned long idle_nomwait;
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+/*
|
|
+ * on systems with caches, caches must be flashed as the absolute
|
|
+ * last instruction before going into a suspended halt. Otherwise,
|
|
+ * dirty data can linger in the cache and become stale on resume,
|
|
+ * leading to strange errors.
|
|
+ *
|
|
+ * perform a variety of operations to guarantee that the compiler
|
|
+ * will not reorder instructions. wbinvd itself is serializing
|
|
+ * so the processor will not reorder.
|
|
+ *
|
|
+ * Systems without cache can just go into halt.
|
|
+ */
|
|
+static inline void wbinvd_halt(void)
|
|
+{
|
|
+ mb();
|
|
+ /* check for clflush to determine if wbinvd is legal */
|
|
+ if (cpu_has_clflush)
|
|
+ asm volatile("cli; wbinvd; 1: hlt; jmp 1b" : : : "memory");
|
|
+ else
|
|
+ while (1)
|
|
+ halt();
|
|
+}
|
|
+#endif
|
|
|
|
extern void enable_sep_cpu(void);
|
|
extern int sysenter_setup(void);
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/asm/smp.h 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/smp.h 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -25,25 +25,18 @@ extern cpumask_t cpu_initialized;
|
|
extern void (*mtrr_hook)(void);
|
|
extern void zap_low_mappings(void);
|
|
|
|
+extern int __cpuinit get_local_pda(int cpu);
|
|
+
|
|
extern unsigned int num_processors;
|
|
extern cpumask_t cpu_initialized;
|
|
|
|
#ifndef CONFIG_XEN
|
|
-#ifdef CONFIG_SMP
|
|
-extern u16 x86_cpu_to_apicid_init[];
|
|
-extern u16 x86_bios_cpu_apicid_init[];
|
|
-extern void *x86_cpu_to_apicid_early_ptr;
|
|
-extern void *x86_bios_cpu_apicid_early_ptr;
|
|
-#else
|
|
-#define x86_cpu_to_apicid_early_ptr NULL
|
|
-#define x86_bios_cpu_apicid_early_ptr NULL
|
|
-#endif
|
|
-
|
|
DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
|
|
DECLARE_PER_CPU(cpumask_t, cpu_core_map);
|
|
DECLARE_PER_CPU(u16, cpu_llc_id);
|
|
-DECLARE_PER_CPU(u16, x86_cpu_to_apicid);
|
|
-DECLARE_PER_CPU(u16, x86_bios_cpu_apicid);
|
|
+
|
|
+DECLARE_EARLY_PER_CPU(u16, x86_cpu_to_apicid);
|
|
+DECLARE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid);
|
|
#endif
|
|
|
|
#ifdef CONFIG_SMP
|
|
@@ -64,9 +57,9 @@ struct smp_ops {
|
|
|
|
void (*smp_send_stop)(void);
|
|
void (*smp_send_reschedule)(int cpu);
|
|
- int (*smp_call_function_mask)(cpumask_t mask,
|
|
- void (*func)(void *info), void *info,
|
|
- int wait);
|
|
+
|
|
+ void (*send_call_func_ipi)(cpumask_t mask);
|
|
+ void (*send_call_func_single_ipi)(int cpu);
|
|
};
|
|
|
|
/* Globals due to paravirt */
|
|
@@ -104,11 +97,14 @@ static inline void smp_send_reschedule(i
|
|
smp_ops.smp_send_reschedule(cpu);
|
|
}
|
|
|
|
-static inline int smp_call_function_mask(cpumask_t mask,
|
|
- void (*func) (void *info), void *info,
|
|
- int wait)
|
|
+static inline void arch_send_call_function_single_ipi(int cpu)
|
|
+{
|
|
+ smp_ops.send_call_func_single_ipi(cpu);
|
|
+}
|
|
+
|
|
+static inline void arch_send_call_function_ipi(cpumask_t mask)
|
|
{
|
|
- return smp_ops.smp_call_function_mask(mask, func, info, wait);
|
|
+ smp_ops.send_call_func_ipi(mask);
|
|
}
|
|
|
|
void native_smp_prepare_boot_cpu(void);
|
|
@@ -120,23 +116,19 @@ int native_cpu_up(unsigned int cpunum);
|
|
|
|
void xen_smp_send_stop(void);
|
|
void xen_smp_send_reschedule(int cpu);
|
|
-int xen_smp_call_function_mask(cpumask_t mask,
|
|
- void (*func) (void *info), void *info,
|
|
- int wait);
|
|
+void xen_send_call_func_ipi(cpumask_t mask);
|
|
+void xen_send_call_func_single_ipi(int cpu);
|
|
|
|
#define smp_send_stop xen_smp_send_stop
|
|
#define smp_send_reschedule xen_smp_send_reschedule
|
|
-#define smp_call_function_mask xen_smp_call_function_mask
|
|
-
|
|
-extern void prefill_possible_map(void);
|
|
+#define arch_send_call_function_single_ipi xen_send_call_func_single_ipi
|
|
+#define arch_send_call_function_ipi xen_send_call_func_ipi
|
|
|
|
#endif /* CONFIG_XEN */
|
|
|
|
extern int __cpu_disable(void);
|
|
extern void __cpu_die(unsigned int cpu);
|
|
|
|
-extern void prefill_possible_map(void);
|
|
-
|
|
void smp_store_cpu_info(int id);
|
|
#define cpu_physical_id(cpu) (cpu)
|
|
|
|
@@ -147,6 +139,14 @@ static inline int num_booting_cpus(void)
|
|
}
|
|
#endif /* CONFIG_SMP */
|
|
|
|
+#if defined(CONFIG_SMP) && (defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_XEN))
|
|
+extern void prefill_possible_map(void);
|
|
+#else
|
|
+static inline void prefill_possible_map(void)
|
|
+{
|
|
+}
|
|
+#endif
|
|
+
|
|
extern unsigned disabled_cpus __cpuinitdata;
|
|
|
|
#ifdef CONFIG_X86_32_SMP
|
|
@@ -214,12 +214,8 @@ static inline int hard_smp_processor_id(
|
|
#endif /* CONFIG_X86_LOCAL_APIC */
|
|
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
|
-extern void cpu_exit_clear(void);
|
|
extern void cpu_uninit(void);
|
|
#endif
|
|
|
|
-extern void smp_alloc_memory(void);
|
|
-extern void lock_ipi_call_lock(void);
|
|
-extern void unlock_ipi_call_lock(void);
|
|
#endif /* __ASSEMBLY__ */
|
|
#endif
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/asm/spinlock.h 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/spinlock.h 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -38,6 +38,8 @@
|
|
# define UNLOCK_LOCK_PREFIX
|
|
#endif
|
|
|
|
+#ifdef TICKET_SHIFT
|
|
+
|
|
#include <asm/irqflags.h>
|
|
|
|
int xen_spinlock_init(unsigned int cpu);
|
|
@@ -65,14 +67,14 @@ void xen_spin_kick(raw_spinlock_t *, uns
|
|
* much between them in performance though, especially as locks are out of line.
|
|
*/
|
|
#if TICKET_SHIFT == 8
|
|
-#define __raw_spin_lock_preamble \
|
|
+#define __ticket_spin_lock_preamble \
|
|
asm(LOCK_PREFIX "xaddw %w0, %2\n\t" \
|
|
"cmpb %h0, %b0\n\t" \
|
|
"sete %1" \
|
|
: "=&Q" (token), "=qm" (free), "+m" (lock->slock) \
|
|
: "0" (0x0100) \
|
|
: "memory", "cc")
|
|
-#define __raw_spin_lock_body \
|
|
+#define __ticket_spin_lock_body \
|
|
asm("1:\t" \
|
|
"cmpb %h0, %b0\n\t" \
|
|
"je 2f\n\t" \
|
|
@@ -86,7 +88,7 @@ void xen_spin_kick(raw_spinlock_t *, uns
|
|
: "+Q" (token), "+g" (count) \
|
|
: "m" (lock->slock) \
|
|
: "memory", "cc")
|
|
-#define __raw_spin_unlock_body \
|
|
+#define __ticket_spin_unlock_body \
|
|
asm(UNLOCK_LOCK_PREFIX "incb %2\n\t" \
|
|
"movzwl %2, %0\n\t" \
|
|
"cmpb %h0, %b0\n\t" \
|
|
@@ -95,7 +97,7 @@ void xen_spin_kick(raw_spinlock_t *, uns
|
|
: \
|
|
: "memory", "cc")
|
|
|
|
-static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock)
|
|
+static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock)
|
|
{
|
|
int tmp, new;
|
|
|
|
@@ -114,7 +116,7 @@ static __always_inline int __raw_spin_tr
|
|
return tmp;
|
|
}
|
|
#elif TICKET_SHIFT == 16
|
|
-#define __raw_spin_lock_preamble \
|
|
+#define __ticket_spin_lock_preamble \
|
|
do { \
|
|
unsigned int tmp; \
|
|
asm(LOCK_PREFIX "xaddl %0, %2\n\t" \
|
|
@@ -126,7 +128,7 @@ static __always_inline int __raw_spin_tr
|
|
: "0" (0x00010000) \
|
|
: "memory", "cc"); \
|
|
} while (0)
|
|
-#define __raw_spin_lock_body \
|
|
+#define __ticket_spin_lock_body \
|
|
do { \
|
|
unsigned int tmp; \
|
|
asm("shldl $16, %0, %2\n" \
|
|
@@ -144,7 +146,7 @@ static __always_inline int __raw_spin_tr
|
|
: "m" (lock->slock) \
|
|
: "memory", "cc"); \
|
|
} while (0)
|
|
-#define __raw_spin_unlock_body \
|
|
+#define __ticket_spin_unlock_body \
|
|
do { \
|
|
unsigned int tmp; \
|
|
asm(UNLOCK_LOCK_PREFIX "incw %2\n\t" \
|
|
@@ -158,7 +160,7 @@ static __always_inline int __raw_spin_tr
|
|
: "memory", "cc"); \
|
|
} while (0)
|
|
|
|
-static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock)
|
|
+static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock)
|
|
{
|
|
int tmp;
|
|
int new;
|
|
@@ -181,27 +183,27 @@ static __always_inline int __raw_spin_tr
|
|
}
|
|
#endif
|
|
|
|
-static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
|
|
+static inline int __ticket_spin_is_locked(raw_spinlock_t *lock)
|
|
{
|
|
int tmp = ACCESS_ONCE(lock->slock);
|
|
|
|
return !!(((tmp >> TICKET_SHIFT) ^ tmp) & ((1 << TICKET_SHIFT) - 1));
|
|
}
|
|
|
|
-static inline int __raw_spin_is_contended(raw_spinlock_t *lock)
|
|
+static inline int __ticket_spin_is_contended(raw_spinlock_t *lock)
|
|
{
|
|
int tmp = ACCESS_ONCE(lock->slock);
|
|
|
|
return (((tmp >> TICKET_SHIFT) - tmp) & ((1 << TICKET_SHIFT) - 1)) > 1;
|
|
}
|
|
|
|
-static __always_inline void __raw_spin_lock(raw_spinlock_t *lock)
|
|
+static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock)
|
|
{
|
|
unsigned int token, count;
|
|
unsigned int flags = __raw_local_irq_save();
|
|
bool free;
|
|
|
|
- __raw_spin_lock_preamble;
|
|
+ __ticket_spin_lock_preamble;
|
|
if (likely(free)) {
|
|
raw_local_irq_restore(flags);
|
|
return;
|
|
@@ -210,41 +212,154 @@ static __always_inline void __raw_spin_l
|
|
raw_local_irq_restore(flags);
|
|
do {
|
|
count = 1 << 10;
|
|
- __raw_spin_lock_body;
|
|
+ __ticket_spin_lock_body;
|
|
} while (unlikely(!count) && !xen_spin_wait(lock, &token, flags));
|
|
}
|
|
|
|
-static __always_inline void __raw_spin_lock_flags(raw_spinlock_t *lock,
|
|
- unsigned long flags)
|
|
+static __always_inline void __ticket_spin_lock_flags(raw_spinlock_t *lock,
|
|
+ unsigned long flags)
|
|
{
|
|
unsigned int token, count;
|
|
bool free;
|
|
|
|
- __raw_spin_lock_preamble;
|
|
+ __ticket_spin_lock_preamble;
|
|
if (likely(free))
|
|
return;
|
|
token = xen_spin_adjust(lock, token);
|
|
do {
|
|
count = 1 << 10;
|
|
- __raw_spin_lock_body;
|
|
+ __ticket_spin_lock_body;
|
|
} while (unlikely(!count) && !xen_spin_wait(lock, &token, flags));
|
|
}
|
|
|
|
-static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock)
|
|
+static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock)
|
|
{
|
|
unsigned int token;
|
|
bool kick;
|
|
|
|
- __raw_spin_unlock_body;
|
|
+ __ticket_spin_unlock_body;
|
|
if (kick)
|
|
xen_spin_kick(lock, token);
|
|
}
|
|
|
|
#ifndef XEN_SPINLOCK_SOURCE
|
|
-#undef __raw_spin_lock_preamble
|
|
-#undef __raw_spin_lock_body
|
|
-#undef __raw_spin_unlock_body
|
|
+#undef __ticket_spin_lock_preamble
|
|
+#undef __ticket_spin_lock_body
|
|
+#undef __ticket_spin_unlock_body
|
|
+#endif
|
|
+
|
|
+#define __raw_spin(n) __ticket_spin_##n
|
|
+
|
|
+#else /* TICKET_SHIFT */
|
|
+
|
|
+static inline int xen_spinlock_init(unsigned int cpu) { return 0; }
|
|
+static inline void xen_spinlock_cleanup(unsigned int cpu) {}
|
|
+
|
|
+/*
|
|
+ * Define virtualization-friendly old-style lock byte lock, for use in
|
|
+ * pv_lock_ops if desired.
|
|
+ *
|
|
+ * This differs from the pre-2.6.24 spinlock by always using xchgb
|
|
+ * rather than decb to take the lock; this allows it to use a
|
|
+ * zero-initialized lock structure. It also maintains a 1-byte
|
|
+ * contention counter, so that we can implement
|
|
+ * __byte_spin_is_contended.
|
|
+ */
|
|
+struct __byte_spinlock {
|
|
+ s8 lock;
|
|
+#if NR_CPUS < 256
|
|
+ s8 spinners;
|
|
+#else
|
|
+#error NR_CPUS >= 256 support not implemented
|
|
#endif
|
|
+};
|
|
+
|
|
+static inline int __byte_spin_is_locked(raw_spinlock_t *lock)
|
|
+{
|
|
+ struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
|
|
+ return bl->lock != 0;
|
|
+}
|
|
+
|
|
+static inline int __byte_spin_is_contended(raw_spinlock_t *lock)
|
|
+{
|
|
+ struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
|
|
+ return bl->spinners != 0;
|
|
+}
|
|
+
|
|
+static inline void __byte_spin_lock(raw_spinlock_t *lock)
|
|
+{
|
|
+ struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
|
|
+ s8 val = 1;
|
|
+
|
|
+ asm("1: xchgb %1, %0\n"
|
|
+ " test %1,%1\n"
|
|
+ " jz 3f\n"
|
|
+ " " LOCK_PREFIX "incb %2\n"
|
|
+ "2: rep;nop\n"
|
|
+ " cmpb $1, %0\n"
|
|
+ " je 2b\n"
|
|
+ " " LOCK_PREFIX "decb %2\n"
|
|
+ " jmp 1b\n"
|
|
+ "3:"
|
|
+ : "+m" (bl->lock), "+q" (val), "+m" (bl->spinners): : "memory");
|
|
+}
|
|
+
|
|
+#define __byte_spin_lock_flags(lock, flags) __byte_spin_lock(lock)
|
|
+
|
|
+static inline int __byte_spin_trylock(raw_spinlock_t *lock)
|
|
+{
|
|
+ struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
|
|
+ u8 old = 1;
|
|
+
|
|
+ asm("xchgb %1,%0"
|
|
+ : "+m" (bl->lock), "+q" (old) : : "memory");
|
|
+
|
|
+ return old == 0;
|
|
+}
|
|
+
|
|
+static inline void __byte_spin_unlock(raw_spinlock_t *lock)
|
|
+{
|
|
+ struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
|
|
+ smp_wmb();
|
|
+ bl->lock = 0;
|
|
+}
|
|
+
|
|
+#define __raw_spin(n) __byte_spin_##n
|
|
+
|
|
+#endif /* TICKET_SHIFT */
|
|
+
|
|
+static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
|
|
+{
|
|
+ return __raw_spin(is_locked)(lock);
|
|
+}
|
|
+
|
|
+static inline int __raw_spin_is_contended(raw_spinlock_t *lock)
|
|
+{
|
|
+ return __raw_spin(is_contended)(lock);
|
|
+}
|
|
+
|
|
+static __always_inline void __raw_spin_lock(raw_spinlock_t *lock)
|
|
+{
|
|
+ __raw_spin(lock)(lock);
|
|
+}
|
|
+
|
|
+static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock)
|
|
+{
|
|
+ return __raw_spin(trylock)(lock);
|
|
+}
|
|
+
|
|
+static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock)
|
|
+{
|
|
+ __raw_spin(unlock)(lock);
|
|
+}
|
|
+
|
|
+static __always_inline void __raw_spin_lock_flags(raw_spinlock_t *lock,
|
|
+ unsigned long flags)
|
|
+{
|
|
+ __raw_spin(lock_flags)(lock, flags);
|
|
+}
|
|
+
|
|
+#undef __raw_spin
|
|
|
|
static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
|
|
{
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/asm/spinlock_types.h 2011-01-31 18:01:51.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/spinlock_types.h 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -11,6 +11,10 @@ typedef union {
|
|
unsigned int slock;
|
|
struct {
|
|
/*
|
|
+ * Xen versions prior to 3.2.x have a race condition with HYPERVISOR_poll().
|
|
+ */
|
|
+#if CONFIG_XEN_COMPAT >= 0x030200
|
|
+/*
|
|
* On Xen we support a single level of interrupt re-enabling per lock. Hence
|
|
* we can have twice as many outstanding tickets. Thus the cut-off for using
|
|
* byte register pairs must be at half the number of CPUs.
|
|
@@ -22,6 +26,7 @@ typedef union {
|
|
# define TICKET_SHIFT 16
|
|
u16 cur, seq;
|
|
#endif
|
|
+#endif
|
|
};
|
|
} raw_spinlock_t;
|
|
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/asm/system.h 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/system.h 2011-03-03 15:58:55.000000000 +0100
|
|
@@ -68,10 +68,12 @@ do { \
|
|
[next] "d" (next)); \
|
|
} while (0)
|
|
|
|
+#ifndef CONFIG_XEN
|
|
/*
|
|
* disable hlt during certain critical i/o operations
|
|
*/
|
|
#define HAVE_DISABLE_HLT
|
|
+#endif
|
|
#else
|
|
#define __SAVE(reg, offset) "movq %%" #reg ",(14-" #offset ")*8(%%rsp)\n\t"
|
|
#define __RESTORE(reg, offset) "movq (14-" #offset ")*8(%%rsp),%%" #reg "\n\t"
|
|
@@ -137,7 +139,7 @@ __asm__ __volatile__ ("movw %%dx,%1\n\t"
|
|
#define set_base(ldt, base) _set_base(((char *)&(ldt)) , (base))
|
|
#define set_limit(ldt, limit) _set_limit(((char *)&(ldt)) , ((limit)-1))
|
|
|
|
-extern void load_gs_index(unsigned);
|
|
+extern void xen_load_gs_index(unsigned);
|
|
|
|
/*
|
|
* Load a segment. Fall back on loading the zero
|
|
@@ -154,14 +156,14 @@ extern void load_gs_index(unsigned);
|
|
"jmp 2b\n" \
|
|
".previous\n" \
|
|
_ASM_EXTABLE(1b,3b) \
|
|
- : :"r" (value), "r" (0))
|
|
+ : :"r" (value), "r" (0) : "memory")
|
|
|
|
|
|
/*
|
|
* Save a segment register away
|
|
*/
|
|
#define savesegment(seg, value) \
|
|
- asm volatile("mov %%" #seg ",%0":"=rm" (value))
|
|
+ asm("mov %%" #seg ",%0":"=r" (value) : : "memory")
|
|
|
|
static inline unsigned long get_limit(unsigned long segment)
|
|
{
|
|
@@ -269,6 +271,7 @@ static inline void xen_wbinvd(void)
|
|
#ifdef CONFIG_X86_64
|
|
#define read_cr8() (xen_read_cr8())
|
|
#define write_cr8(x) (xen_write_cr8(x))
|
|
+#define load_gs_index xen_load_gs_index
|
|
#endif
|
|
|
|
/* Clear the 'TS' bit */
|
|
@@ -287,13 +290,12 @@ static inline void clflush(volatile void
|
|
void disable_hlt(void);
|
|
void enable_hlt(void);
|
|
|
|
-extern int es7000_plat;
|
|
void cpu_idle_wait(void);
|
|
|
|
extern unsigned long arch_align_stack(unsigned long sp);
|
|
extern void free_init_pages(char *what, unsigned long begin, unsigned long end);
|
|
|
|
-void default_idle(void);
|
|
+void xen_idle(void);
|
|
|
|
/*
|
|
* Force strict CPU ordering.
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/asm/xor_64.h 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/xor_64.h 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -1,3 +1,6 @@
|
|
+#ifndef ASM_X86__XOR_64_H
|
|
+#define ASM_X86__XOR_64_H
|
|
+
|
|
/*
|
|
* x86-64 changes / gcc fixes from Andi Kleen.
|
|
* Copyright 2002 Andi Kleen, SuSE Labs.
|
|
@@ -330,3 +333,5 @@ do { \
|
|
We may also be able to load into the L1 only depending on how the cpu
|
|
deals with a load to a line that is being prefetched. */
|
|
#define XOR_SELECT_TEMPLATE(FASTEST) (&xor_block_sse)
|
|
+
|
|
+#endif /* ASM_X86__XOR_64_H */
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/irq_vectors.h 2008-09-25 13:55:32.000000000 +0200
|
|
+++ /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
@@ -1,125 +0,0 @@
|
|
-/*
|
|
- * This file should contain #defines for all of the interrupt vector
|
|
- * numbers used by this architecture.
|
|
- *
|
|
- * In addition, there are some standard defines:
|
|
- *
|
|
- * FIRST_EXTERNAL_VECTOR:
|
|
- * The first free place for external interrupts
|
|
- *
|
|
- * SYSCALL_VECTOR:
|
|
- * The IRQ vector a syscall makes the user to kernel transition
|
|
- * under.
|
|
- *
|
|
- * TIMER_IRQ:
|
|
- * The IRQ number the timer interrupt comes in at.
|
|
- *
|
|
- * NR_IRQS:
|
|
- * The total number of interrupt vectors (including all the
|
|
- * architecture specific interrupts) needed.
|
|
- *
|
|
- */
|
|
-#ifndef _ASM_IRQ_VECTORS_H
|
|
-#define _ASM_IRQ_VECTORS_H
|
|
-
|
|
-/*
|
|
- * IDT vectors usable for external interrupt sources start
|
|
- * at 0x20:
|
|
- */
|
|
-#define FIRST_EXTERNAL_VECTOR 0x20
|
|
-
|
|
-#define SYSCALL_VECTOR 0x80
|
|
-
|
|
-/*
|
|
- * Vectors 0x20-0x2f are used for ISA interrupts.
|
|
- */
|
|
-
|
|
-#if 0
|
|
-/*
|
|
- * Special IRQ vectors used by the SMP architecture, 0xf0-0xff
|
|
- *
|
|
- * some of the following vectors are 'rare', they are merged
|
|
- * into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
|
|
- * TLB, reschedule and local APIC vectors are performance-critical.
|
|
- *
|
|
- * Vectors 0xf0-0xfa are free (reserved for future Linux use).
|
|
- */
|
|
-#define SPURIOUS_APIC_VECTOR 0xff
|
|
-#define ERROR_APIC_VECTOR 0xfe
|
|
-#define INVALIDATE_TLB_VECTOR 0xfd
|
|
-#define RESCHEDULE_VECTOR 0xfc
|
|
-#define CALL_FUNCTION_VECTOR 0xfb
|
|
-
|
|
-#define THERMAL_APIC_VECTOR 0xf0
|
|
-/*
|
|
- * Local APIC timer IRQ vector is on a different priority level,
|
|
- * to work around the 'lost local interrupt if more than 2 IRQ
|
|
- * sources per level' errata.
|
|
- */
|
|
-#define LOCAL_TIMER_VECTOR 0xef
|
|
-#endif
|
|
-
|
|
-#define SPURIOUS_APIC_VECTOR 0xff
|
|
-#define ERROR_APIC_VECTOR 0xfe
|
|
-
|
|
-/*
|
|
- * First APIC vector available to drivers: (vectors 0x30-0xee)
|
|
- * we start at 0x31 to spread out vectors evenly between priority
|
|
- * levels. (0x80 is the syscall vector)
|
|
- */
|
|
-#define FIRST_DEVICE_VECTOR 0x31
|
|
-#define FIRST_SYSTEM_VECTOR 0xef
|
|
-
|
|
-/*
|
|
- * 16 8259A IRQ's, 208 potential APIC interrupt sources.
|
|
- * Right now the APIC is mostly only used for SMP.
|
|
- * 256 vectors is an architectural limit. (we can have
|
|
- * more than 256 devices theoretically, but they will
|
|
- * have to use shared interrupts)
|
|
- * Since vectors 0x00-0x1f are used/reserved for the CPU,
|
|
- * the usable vector space is 0x20-0xff (224 vectors)
|
|
- */
|
|
-
|
|
-#define RESCHEDULE_VECTOR 0
|
|
-#define CALL_FUNCTION_VECTOR 1
|
|
-#define NR_IPIS 2
|
|
-
|
|
-/*
|
|
- * The maximum number of vectors supported by i386 processors
|
|
- * is limited to 256. For processors other than i386, NR_VECTORS
|
|
- * should be changed accordingly.
|
|
- */
|
|
-#define NR_VECTORS 256
|
|
-
|
|
-#define FPU_IRQ 13
|
|
-
|
|
-#define FIRST_VM86_IRQ 3
|
|
-#define LAST_VM86_IRQ 15
|
|
-#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15)
|
|
-
|
|
-/*
|
|
- * The flat IRQ space is divided into two regions:
|
|
- * 1. A one-to-one mapping of real physical IRQs. This space is only used
|
|
- * if we have physical device-access privilege. This region is at the
|
|
- * start of the IRQ space so that existing device drivers do not need
|
|
- * to be modified to translate physical IRQ numbers into our IRQ space.
|
|
- * 3. A dynamic mapping of inter-domain and Xen-sourced virtual IRQs. These
|
|
- * are bound using the provided bind/unbind functions.
|
|
- */
|
|
-
|
|
-#define PIRQ_BASE 0
|
|
-#if !defined(MAX_IO_APICS)
|
|
-# define NR_PIRQS (NR_VECTORS + 32 * NR_CPUS)
|
|
-#elif NR_CPUS < MAX_IO_APICS
|
|
-# define NR_PIRQS (NR_VECTORS + 32 * NR_CPUS)
|
|
-#else
|
|
-# define NR_PIRQS (NR_VECTORS + 32 * MAX_IO_APICS)
|
|
-#endif
|
|
-
|
|
-#define DYNIRQ_BASE (PIRQ_BASE + NR_PIRQS)
|
|
-#define NR_DYNIRQS 256
|
|
-
|
|
-#define NR_IRQS (NR_PIRQS + NR_DYNIRQS)
|
|
-#define NR_IRQ_VECTORS NR_IRQS
|
|
-
|
|
-#endif /* _ASM_IRQ_VECTORS_H */
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/setup_arch_post.h 2007-06-12 13:14:13.000000000 +0200
|
|
+++ /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
@@ -1,63 +0,0 @@
|
|
-/**
|
|
- * machine_specific_* - Hooks for machine specific setup.
|
|
- *
|
|
- * Description:
|
|
- * This is included late in kernel/setup.c so that it can make
|
|
- * use of all of the static functions.
|
|
- **/
|
|
-
|
|
-#include <xen/interface/callback.h>
|
|
-
|
|
-extern void hypervisor_callback(void);
|
|
-extern void failsafe_callback(void);
|
|
-extern void nmi(void);
|
|
-
|
|
-static void __init machine_specific_arch_setup(void)
|
|
-{
|
|
- int ret;
|
|
- static struct callback_register __initdata event = {
|
|
- .type = CALLBACKTYPE_event,
|
|
- .address = (unsigned long) hypervisor_callback,
|
|
- };
|
|
- static struct callback_register __initdata failsafe = {
|
|
- .type = CALLBACKTYPE_failsafe,
|
|
- .address = (unsigned long)failsafe_callback,
|
|
- };
|
|
- static struct callback_register __initdata syscall = {
|
|
- .type = CALLBACKTYPE_syscall,
|
|
- .address = (unsigned long)system_call,
|
|
- };
|
|
-#ifdef CONFIG_X86_LOCAL_APIC
|
|
- static struct callback_register __initdata nmi_cb = {
|
|
- .type = CALLBACKTYPE_nmi,
|
|
- .address = (unsigned long)nmi,
|
|
- };
|
|
-#endif
|
|
-
|
|
- ret = HYPERVISOR_callback_op(CALLBACKOP_register, &event);
|
|
- if (ret == 0)
|
|
- ret = HYPERVISOR_callback_op(CALLBACKOP_register, &failsafe);
|
|
- if (ret == 0)
|
|
- ret = HYPERVISOR_callback_op(CALLBACKOP_register, &syscall);
|
|
-#if CONFIG_XEN_COMPAT <= 0x030002
|
|
- if (ret == -ENOSYS)
|
|
- ret = HYPERVISOR_set_callbacks(
|
|
- event.address,
|
|
- failsafe.address,
|
|
- syscall.address);
|
|
-#endif
|
|
- BUG_ON(ret);
|
|
-
|
|
-#ifdef CONFIG_X86_LOCAL_APIC
|
|
- ret = HYPERVISOR_callback_op(CALLBACKOP_register, &nmi_cb);
|
|
-#if CONFIG_XEN_COMPAT <= 0x030002
|
|
- if (ret == -ENOSYS) {
|
|
- static struct xennmi_callback __initdata cb = {
|
|
- .handler_address = (unsigned long)nmi
|
|
- };
|
|
-
|
|
- HYPERVISOR_nmi_op(XENNMI_register_callback, &cb);
|
|
- }
|
|
-#endif
|
|
-#endif
|
|
-}
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/setup_arch_pre.h 2007-06-12 13:14:13.000000000 +0200
|
|
+++ /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
@@ -1,5 +0,0 @@
|
|
-/* Hook to call BIOS initialisation function */
|
|
-
|
|
-#define ARCH_SETUP machine_specific_arch_setup();
|
|
-
|
|
-static void __init machine_specific_arch_setup(void);
|
|
--- head-2011-03-11.orig/arch/x86/include/asm/traps.h 2011-03-15 16:45:55.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/include/asm/traps.h 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -38,6 +38,9 @@ asmlinkage void alignment_check(void);
|
|
asmlinkage void machine_check(void);
|
|
#endif /* CONFIG_X86_MCE */
|
|
asmlinkage void simd_coprocessor_error(void);
|
|
+#ifdef CONFIG_X86_XEN
|
|
+asmlinkage void fixup_4gb_segment(void);
|
|
+#endif
|
|
|
|
dotraplinkage void do_divide_error(struct pt_regs *, long);
|
|
dotraplinkage void do_debug(struct pt_regs *, long);
|
|
@@ -66,6 +69,9 @@ dotraplinkage void do_machine_check(stru
|
|
dotraplinkage void do_simd_coprocessor_error(struct pt_regs *, long);
|
|
#ifdef CONFIG_X86_32
|
|
dotraplinkage void do_iret_error(struct pt_regs *, long);
|
|
+#ifdef CONFIG_XEN
|
|
+void do_fixup_4gb_segment(struct pt_regs *, long);
|
|
+#endif
|
|
#endif
|
|
|
|
static inline int get_si_code(unsigned long condition)
|
|
--- head-2011-03-11.orig/include/linux/page-flags.h 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/include/linux/page-flags.h 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -125,12 +125,12 @@ enum pageflags {
|
|
PG_fscache = PG_private_2, /* page backed by cache */
|
|
|
|
/* XEN */
|
|
-#ifdef CONFIG_XEN
|
|
+#if defined(CONFIG_XEN)
|
|
PG_pinned = PG_locked, /* Cannot alias with PG_owner_priv_1 since
|
|
* bad_page() checks should include this bit.
|
|
* Should not use PG_arch_1 as that may have
|
|
* a different purpose elsewhere. */
|
|
-#else
|
|
+#elif defined(CONFIG_PARAVIRT_XEN)
|
|
PG_pinned = PG_owner_priv_1,
|
|
PG_savepinned = PG_dirty,
|
|
#endif
|
|
@@ -225,8 +225,12 @@ PAGEFLAG(Active, active) __CLEARPAGEFLAG
|
|
TESTCLEARFLAG(Active, active)
|
|
__PAGEFLAG(Slab, slab)
|
|
PAGEFLAG(Checked, checked) /* Used by some filesystems */
|
|
+#if defined(CONFIG_XEN) || defined(CONFIG_PARAVIRT_XEN)
|
|
PAGEFLAG(Pinned, pinned) TESTSCFLAG(Pinned, pinned) /* Xen */
|
|
+#endif
|
|
+#ifdef CONFIG_PARAVIRT_XEN
|
|
PAGEFLAG(SavePinned, savepinned); /* Xen */
|
|
+#endif
|
|
PAGEFLAG(Reserved, reserved) __CLEARPAGEFLAG(Reserved, reserved)
|
|
PAGEFLAG(SwapBacked, swapbacked) __CLEARPAGEFLAG(SwapBacked, swapbacked)
|
|
|
|
--- head-2011-03-11.orig/include/xen/interface/memory.h 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/include/xen/interface/memory.h 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -88,6 +88,7 @@ struct xen_memory_reservation {
|
|
*/
|
|
domid_t domid;
|
|
};
|
|
+DEFINE_GUEST_HANDLE_STRUCT(xen_memory_reservation);
|
|
typedef struct xen_memory_reservation xen_memory_reservation_t;
|
|
DEFINE_XEN_GUEST_HANDLE(xen_memory_reservation_t);
|
|
|
|
@@ -173,11 +174,7 @@ struct xen_machphys_mfn_list {
|
|
* any large discontiguities in the machine address space, 2MB gaps in
|
|
* the machphys table will be represented by an MFN base of zero.
|
|
*/
|
|
-#ifndef CONFIG_PARAVIRT_XEN
|
|
XEN_GUEST_HANDLE(xen_pfn_t) extent_start;
|
|
-#else
|
|
- ulong extent_start;
|
|
-#endif
|
|
|
|
/*
|
|
* Number of extents written to the above array. This will be smaller
|
|
@@ -185,6 +182,7 @@ struct xen_machphys_mfn_list {
|
|
*/
|
|
unsigned int nr_extents;
|
|
};
|
|
+DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list);
|
|
typedef struct xen_machphys_mfn_list xen_machphys_mfn_list_t;
|
|
DEFINE_XEN_GUEST_HANDLE(xen_machphys_mfn_list_t);
|
|
|
|
@@ -226,6 +224,7 @@ struct xen_add_to_physmap {
|
|
/* GPFN where the source mapping page should appear. */
|
|
xen_pfn_t gpfn;
|
|
};
|
|
+DEFINE_GUEST_HANDLE_STRUCT(xen_add_to_physmap);
|
|
typedef struct xen_add_to_physmap xen_add_to_physmap_t;
|
|
DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_t);
|
|
|
|
--- head-2011-03-11.orig/include/xen/public/Kbuild 2011-01-31 14:31:28.000000000 +0100
|
|
+++ head-2011-03-11/include/xen/public/Kbuild 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -1 +1,5 @@
|
|
+header-y += evtchn.h
|
|
+header-y += gntdev.h
|
|
header-y += iomulti.h
|
|
+header-y += privcmd.h
|
|
+header-y += xenbus.h
|
|
--- head-2011-03-11.orig/include/xen/public/privcmd.h 2010-01-18 15:23:12.000000000 +0100
|
|
+++ head-2011-03-11/include/xen/public/privcmd.h 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -35,10 +35,6 @@
|
|
|
|
#include <linux/types.h>
|
|
|
|
-#ifndef __user
|
|
-#define __user
|
|
-#endif
|
|
-
|
|
typedef struct privcmd_hypercall
|
|
{
|
|
__u64 op;
|
|
--- head-2011-03-11.orig/include/xen/public/xenbus.h 2009-05-29 10:25:53.000000000 +0200
|
|
+++ head-2011-03-11/include/xen/public/xenbus.h 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -35,10 +35,6 @@
|
|
|
|
#include <linux/types.h>
|
|
|
|
-#ifndef __user
|
|
-#define __user
|
|
-#endif
|
|
-
|
|
typedef struct xenbus_alloc {
|
|
domid_t dom;
|
|
__u32 port;
|
|
--- head-2011-03-11.orig/kernel/kexec.c 2011-01-31 17:56:27.000000000 +0100
|
|
+++ head-2011-03-11/kernel/kexec.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -49,7 +49,7 @@ note_buf_t __percpu *crash_notes;
|
|
static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
|
|
u32
|
|
#if defined(CONFIG_XEN) && defined(CONFIG_X86)
|
|
-__attribute__((__section__(".bss.page_aligned"), __aligned__(PAGE_SIZE)))
|
|
+__page_aligned_bss
|
|
#endif
|
|
vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4];
|
|
size_t vmcoreinfo_size;
|
|
--- head-2011-03-11.orig/lib/swiotlb-xen.c 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-11/lib/swiotlb-xen.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -788,7 +788,7 @@ swiotlb_sync_sg_for_device(struct device
|
|
}
|
|
|
|
int
|
|
-swiotlb_dma_mapping_error(dma_addr_t dma_addr)
|
|
+swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
|
|
{
|
|
return (dma_addr == virt_to_bus(io_tlb_overflow_buffer));
|
|
}
|
|
--- head-2011-03-11.orig/mm/mprotect.c 2011-01-31 17:29:16.000000000 +0100
|
|
+++ head-2011-03-11/mm/mprotect.c 2011-02-01 14:38:38.000000000 +0100
|
|
@@ -97,8 +97,6 @@ static inline void change_pmd_range(stru
|
|
}
|
|
if (pmd_none_or_clear_bad(pmd))
|
|
continue;
|
|
- if (arch_change_pte_range(mm, pmd, addr, next, newprot, dirty_accountable))
|
|
- continue;
|
|
change_pte_range(vma->vm_mm, pmd, addr, next, newprot,
|
|
dirty_accountable);
|
|
} while (pmd++, addr = next, addr != end);
|