18151 lines
519 KiB
Plaintext
18151 lines
519 KiB
Plaintext
From: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>
|
|
Subject: Linux 2.6.30
|
|
Patch-mainline: 2.6.30
|
|
|
|
This patch contains the differences between 2.6.29 and 2.6.30.
|
|
|
|
Acked-by: Jeff Mahoney <jeffm@suse.com>
|
|
Automatically created from "patches.kernel.org/patch-2.6.30" by xen-port-patches.py
|
|
|
|
--- head-2010-05-25.orig/arch/ia64/include/asm/xen/hypervisor.h 2010-03-24 15:14:47.000000000 +0100
|
|
+++ head-2010-05-25/arch/ia64/include/asm/xen/hypervisor.h 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -34,13 +34,13 @@
|
|
#define _ASM_IA64_XEN_HYPERVISOR_H
|
|
|
|
#include <linux/err.h>
|
|
+#ifdef CONFIG_PARAVIRT_XEN
|
|
#include <xen/interface/xen.h>
|
|
#include <xen/interface/version.h> /* to compile feature.c */
|
|
#include <xen/features.h> /* to comiple xen-netfront.c */
|
|
#include <xen/xen.h>
|
|
#include <asm/xen/hypercall.h>
|
|
|
|
-#ifdef CONFIG_PARAVIRT_XEN
|
|
extern struct shared_info *HYPERVISOR_shared_info;
|
|
extern struct start_info *xen_start_info;
|
|
|
|
--- head-2010-05-25.orig/arch/ia64/kernel/vmlinux.lds.S 2010-05-25 09:12:09.000000000 +0200
|
|
+++ head-2010-05-25/arch/ia64/kernel/vmlinux.lds.S 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -182,7 +182,7 @@ SECTIONS
|
|
__start_gate_section = .;
|
|
*(.data.gate)
|
|
__stop_gate_section = .;
|
|
-#ifdef CONFIG_XEN
|
|
+#ifdef CONFIG_PARAVIRT_XEN
|
|
. = ALIGN(PAGE_SIZE);
|
|
__xen_start_gate_section = .;
|
|
*(.data.gate.xen)
|
|
--- head-2010-05-25.orig/arch/x86/Kconfig 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/Kconfig 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -49,8 +49,8 @@ config X86
|
|
select HAVE_REGS_AND_STACK_ACCESS_API
|
|
select HAVE_DMA_API_DEBUG
|
|
select HAVE_KERNEL_GZIP
|
|
- select HAVE_KERNEL_BZIP2
|
|
- select HAVE_KERNEL_LZMA
|
|
+ select HAVE_KERNEL_BZIP2 if !XEN
|
|
+ select HAVE_KERNEL_LZMA if !XEN
|
|
select HAVE_KERNEL_LZO
|
|
select HAVE_HW_BREAKPOINT
|
|
select PERF_EVENTS
|
|
@@ -337,11 +337,11 @@ config X86_XEN
|
|
|
|
config X86_BIGSMP
|
|
bool "Support for big SMP systems with more than 8 CPUs"
|
|
- depends on X86_32 && SMP
|
|
+ depends on X86_32 && SMP && !XEN
|
|
---help---
|
|
This option is needed for the systems that have more than 8 CPUs
|
|
|
|
-if X86_32
|
|
+if X86_32 && !XEN
|
|
config X86_EXTENDED_PLATFORM
|
|
bool "Support for extended (non-PC) x86 platforms"
|
|
default y
|
|
@@ -371,7 +371,7 @@ config X86_64_XEN
|
|
help
|
|
This option will compile a kernel compatible with Xen hypervisor
|
|
|
|
-if X86_64
|
|
+if X86_64 && !XEN
|
|
config X86_EXTENDED_PLATFORM
|
|
bool "Support for extended (non-PC) x86 platforms"
|
|
default y
|
|
@@ -842,7 +842,7 @@ config MAXSMP
|
|
|
|
config NR_CPUS
|
|
int "Maximum number of CPUs" if SMP && !MAXSMP
|
|
- range 2 8 if SMP && X86_32 && !X86_BIGSMP
|
|
+ range 2 8 if SMP && X86_32 && !X86_BIGSMP && !X86_XEN
|
|
range 2 512 if SMP && !MAXSMP
|
|
default "1" if !SMP
|
|
default "4096" if MAXSMP
|
|
@@ -916,10 +916,6 @@ config X86_VISWS_APIC
|
|
def_bool y
|
|
depends on X86_32 && X86_VISWS
|
|
|
|
-config X86_XEN_GENAPIC
|
|
- def_bool y
|
|
- depends on X86_64_XEN
|
|
-
|
|
config X86_REROUTE_FOR_BROKEN_BOOT_IRQS
|
|
bool "Reroute for broken boot IRQs"
|
|
default n
|
|
--- head-2010-05-25.orig/arch/x86/Makefile 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/Makefile 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -111,10 +111,6 @@ endif
|
|
# prevent gcc from generating any FP code by mistake
|
|
KBUILD_CFLAGS += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,)
|
|
|
|
-# Xen subarch support
|
|
-mflags-$(CONFIG_XEN) := -Iarch/x86/include/mach-xen
|
|
-mcore-$(CONFIG_XEN) := arch/x86/mach-xen/
|
|
-
|
|
KBUILD_CFLAGS += $(mflags-y)
|
|
KBUILD_AFLAGS += $(mflags-y)
|
|
|
|
@@ -187,10 +183,10 @@ endif
|
|
$(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE)
|
|
$(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot
|
|
$(Q)ln -fsn ../../x86/boot/bzImage $(objtree)/arch/$(UTS_MACHINE)/boot/$@
|
|
+endif
|
|
|
|
$(BOOT_TARGETS): vmlinux
|
|
$(Q)$(MAKE) $(build)=$(boot) $@
|
|
-endif
|
|
|
|
PHONY += install
|
|
install:
|
|
--- head-2010-05-25.orig/arch/x86/boot/Makefile 2010-03-24 15:01:37.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/boot/Makefile 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -204,6 +204,12 @@ $(obj)/vmlinux-stripped: OBJCOPYFLAGS :=
|
|
$(obj)/vmlinux-stripped: vmlinux FORCE
|
|
$(call if_changed,objcopy)
|
|
|
|
+ifndef CONFIG_XEN
|
|
+bzImage := bzImage
|
|
+else
|
|
+bzImage := vmlinuz
|
|
+endif
|
|
+
|
|
install:
|
|
- sh $(srctree)/$(src)/install.sh $(KERNELRELEASE) $(obj)/bzImage \
|
|
+ sh $(srctree)/$(src)/install.sh $(KERNELRELEASE) $(obj)/$(bzImage) \
|
|
System.map "$(INSTALL_PATH)"
|
|
--- head-2010-05-25.orig/arch/x86/ia32/ia32entry-xen.S 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/ia32/ia32entry-xen.S 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -502,7 +502,7 @@ ia32_sys_call_table:
|
|
.quad sys32_olduname
|
|
.quad sys_umask /* 60 */
|
|
.quad sys_chroot
|
|
- .quad sys32_ustat
|
|
+ .quad compat_sys_ustat
|
|
.quad sys_dup2
|
|
.quad sys_getppid
|
|
.quad sys_getpgrp /* 65 */
|
|
@@ -773,4 +773,6 @@ ia32_sys_call_table:
|
|
.quad sys_dup3 /* 330 */
|
|
.quad sys_pipe2
|
|
.quad sys_inotify_init1
|
|
+ .quad compat_sys_preadv
|
|
+ .quad compat_sys_pwritev
|
|
ia32_syscall_end:
|
|
--- head-2010-05-25.orig/arch/x86/include/asm/kexec.h 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/include/asm/kexec.h 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -21,8 +21,14 @@
|
|
# define PA_CONTROL_PAGE 0
|
|
# define VA_CONTROL_PAGE 1
|
|
# define PA_TABLE_PAGE 2
|
|
+# ifndef CONFIG_XEN
|
|
# define PA_SWAP_PAGE 3
|
|
# define PAGES_NR 4
|
|
+# else /* CONFIG_XEN, see comment above
|
|
+# define VA_TABLE_PAGE 3 */
|
|
+# define PA_SWAP_PAGE 4
|
|
+# define PAGES_NR 5
|
|
+# endif /* CONFIG_XEN */
|
|
#endif
|
|
|
|
# define KEXEC_CONTROL_CODE_MAX_SIZE 2048
|
|
--- head-2010-05-25.orig/arch/x86/include/asm/page_64_types.h 2010-05-25 09:12:09.000000000 +0200
|
|
+++ head-2010-05-25/arch/x86/include/asm/page_64_types.h 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -69,7 +69,15 @@ extern void init_extra_mapping_wb(unsign
|
|
#endif /* !__ASSEMBLY__ */
|
|
|
|
#ifdef CONFIG_FLATMEM
|
|
+/*
|
|
+ * While max_pfn is not exported, max_mapnr never gets initialized for non-Xen
|
|
+ * other than for hotplugged memory.
|
|
+ */
|
|
+#ifndef CONFIG_XEN
|
|
#define pfn_valid(pfn) ((pfn) < max_pfn)
|
|
+#else
|
|
+#define pfn_valid(pfn) ((pfn) < max_mapnr)
|
|
+#endif
|
|
#endif
|
|
|
|
#endif /* _ASM_X86_PAGE_64_DEFS_H */
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/desc.h 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/desc.h 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -39,7 +39,7 @@ extern gate_desc idt_table[];
|
|
struct gdt_page {
|
|
struct desc_struct gdt[GDT_ENTRIES];
|
|
} __attribute__((aligned(PAGE_SIZE)));
|
|
-DECLARE_PER_CPU(struct gdt_page, gdt_page);
|
|
+DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page);
|
|
|
|
static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
|
|
{
|
|
@@ -91,7 +91,6 @@ static inline int desc_empty(const void
|
|
#define store_gdt(dtr) native_store_gdt(dtr)
|
|
#define store_idt(dtr) native_store_idt(dtr)
|
|
#define store_tr(tr) (tr = native_store_tr())
|
|
-#define store_ldt(ldt) asm("sldt %0":"=m" (ldt))
|
|
|
|
#define load_TLS(t, cpu) native_load_tls(t, cpu)
|
|
#define set_ldt native_set_ldt
|
|
@@ -111,6 +110,8 @@ static inline void paravirt_free_ldt(str
|
|
{
|
|
}
|
|
|
|
+#define store_ldt(ldt) asm("sldt %0" : "=m"(ldt))
|
|
+
|
|
static inline void native_write_idt_entry(gate_desc *idt, int entry,
|
|
const gate_desc *gate)
|
|
{
|
|
@@ -251,6 +252,8 @@ static inline void native_load_tls(struc
|
|
gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i];
|
|
}
|
|
#else
|
|
+#include <asm/pgtable.h>
|
|
+
|
|
#define load_TLS(t, cpu) xen_load_tls(t, cpu)
|
|
#define set_ldt xen_set_ldt
|
|
|
|
@@ -265,8 +268,9 @@ static inline void xen_load_tls(struct t
|
|
struct desc_struct *gdt = get_cpu_gdt_table(cpu) + GDT_ENTRY_TLS_MIN;
|
|
|
|
for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
|
|
- if (HYPERVISOR_update_descriptor(virt_to_machine(&gdt[i]),
|
|
- *(u64 *)&t->tls_array[i]))
|
|
+ if (HYPERVISOR_update_descriptor(
|
|
+ arbitrary_virt_to_machine(&gdt[i]),
|
|
+ *(u64 *)&t->tls_array[i]))
|
|
BUG();
|
|
}
|
|
#endif
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/fixmap.h 2010-03-24 15:14:47.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/fixmap.h 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -1,11 +1,154 @@
|
|
+/*
|
|
+ * fixmap.h: compile-time virtual memory allocation
|
|
+ *
|
|
+ * This file is subject to the terms and conditions of the GNU General Public
|
|
+ * License. See the file "COPYING" in the main directory of this archive
|
|
+ * for more details.
|
|
+ *
|
|
+ * Copyright (C) 1998 Ingo Molnar
|
|
+ *
|
|
+ * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
|
|
+ * x86_32 and x86_64 integration by Gustavo F. Padovan, February 2009
|
|
+ */
|
|
+
|
|
#ifndef _ASM_X86_FIXMAP_H
|
|
#define _ASM_X86_FIXMAP_H
|
|
|
|
+#ifndef __ASSEMBLY__
|
|
+#include <linux/kernel.h>
|
|
+#include <asm/acpi.h>
|
|
+#include <asm/apicdef.h>
|
|
+#include <asm/page.h>
|
|
+#ifdef CONFIG_X86_32
|
|
+#include <linux/threads.h>
|
|
+#include <asm/kmap_types.h>
|
|
+#else
|
|
+#include <asm/vsyscall.h>
|
|
+#endif
|
|
+
|
|
+/*
|
|
+ * We can't declare FIXADDR_TOP as variable for x86_64 because vsyscall
|
|
+ * uses fixmaps that relies on FIXADDR_TOP for proper address calculation.
|
|
+ * Because of this, FIXADDR_TOP x86 integration was left as later work.
|
|
+ */
|
|
+#ifdef CONFIG_X86_32
|
|
+/* used by vmalloc.c, vsyscall.lds.S.
|
|
+ *
|
|
+ * Leave one empty page between vmalloc'ed areas and
|
|
+ * the start of the fixmap.
|
|
+ */
|
|
+extern unsigned long __FIXADDR_TOP;
|
|
+#define FIXADDR_TOP ((unsigned long)__FIXADDR_TOP)
|
|
+
|
|
+#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO)
|
|
+#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1)
|
|
+#else
|
|
+#define FIXADDR_TOP (VSYSCALL_END-PAGE_SIZE)
|
|
+
|
|
+/* Only covers 32bit vsyscalls currently. Need another set for 64bit. */
|
|
+#define FIXADDR_USER_START ((unsigned long)VSYSCALL32_VSYSCALL)
|
|
+#define FIXADDR_USER_END (FIXADDR_USER_START + PAGE_SIZE)
|
|
+#endif
|
|
+
|
|
+
|
|
+/*
|
|
+ * Here we define all the compile-time 'special' virtual
|
|
+ * addresses. The point is to have a constant address at
|
|
+ * compile time, but to set the physical address only
|
|
+ * in the boot process.
|
|
+ * for x86_32: We allocate these special addresses
|
|
+ * from the end of virtual memory (0xfffff000) backwards.
|
|
+ * Also this lets us do fail-safe vmalloc(), we
|
|
+ * can guarantee that these special addresses and
|
|
+ * vmalloc()-ed addresses never overlap.
|
|
+ *
|
|
+ * These 'compile-time allocated' memory buffers are
|
|
+ * fixed-size 4k pages (or larger if used with an increment
|
|
+ * higher than 1). Use set_fixmap(idx,phys) to associate
|
|
+ * physical memory with fixmap indices.
|
|
+ *
|
|
+ * TLB entries of such buffers will not be flushed across
|
|
+ * task switches.
|
|
+ */
|
|
+enum fixed_addresses {
|
|
#ifdef CONFIG_X86_32
|
|
-# include "fixmap_32.h"
|
|
+ FIX_HOLE,
|
|
+ FIX_VDSO,
|
|
#else
|
|
-# include "fixmap_64.h"
|
|
+ VSYSCALL_LAST_PAGE,
|
|
+ VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE
|
|
+ + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1,
|
|
+ VSYSCALL_HPET,
|
|
+#endif
|
|
+ FIX_DBGP_BASE,
|
|
+ FIX_EARLYCON_MEM_BASE,
|
|
+#ifdef CONFIG_X86_LOCAL_APIC
|
|
+ FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
|
|
#endif
|
|
+#ifndef CONFIG_XEN
|
|
+#ifdef CONFIG_X86_IO_APIC
|
|
+ FIX_IO_APIC_BASE_0,
|
|
+ FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,
|
|
+#endif
|
|
+#else
|
|
+ FIX_SHARED_INFO,
|
|
+#define NR_FIX_ISAMAPS 256
|
|
+ FIX_ISAMAP_END,
|
|
+ FIX_ISAMAP_BEGIN = FIX_ISAMAP_END + NR_FIX_ISAMAPS - 1,
|
|
+#endif
|
|
+#ifdef CONFIG_X86_VISWS_APIC
|
|
+ FIX_CO_CPU, /* Cobalt timer */
|
|
+ FIX_CO_APIC, /* Cobalt APIC Redirection Table */
|
|
+ FIX_LI_PCIA, /* Lithium PCI Bridge A */
|
|
+ FIX_LI_PCIB, /* Lithium PCI Bridge B */
|
|
+#endif
|
|
+#ifdef CONFIG_X86_F00F_BUG
|
|
+ FIX_F00F_IDT, /* Virtual mapping for IDT */
|
|
+#endif
|
|
+#ifdef CONFIG_X86_CYCLONE_TIMER
|
|
+ FIX_CYCLONE_TIMER, /*cyclone timer register*/
|
|
+#endif
|
|
+#ifdef CONFIG_X86_32
|
|
+ FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
|
|
+ FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
|
|
+#ifdef CONFIG_PCI_MMCONFIG
|
|
+ FIX_PCIE_MCFG,
|
|
+#endif
|
|
+#endif
|
|
+#ifdef CONFIG_PARAVIRT
|
|
+ FIX_PARAVIRT_BOOTMAP,
|
|
+#endif
|
|
+ FIX_TEXT_POKE0, /* reserve 2 pages for text_poke() */
|
|
+ FIX_TEXT_POKE1,
|
|
+ __end_of_permanent_fixed_addresses,
|
|
+#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
|
|
+ FIX_OHCI1394_BASE,
|
|
+#endif
|
|
+ /*
|
|
+ * 256 temporary boot-time mappings, used by early_ioremap(),
|
|
+ * before ioremap() is functional.
|
|
+ *
|
|
+ * We round it up to the next 256 pages boundary so that we
|
|
+ * can have a single pgd entry and a single pte table:
|
|
+ */
|
|
+#define NR_FIX_BTMAPS 64
|
|
+#define FIX_BTMAPS_SLOTS 4
|
|
+ FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 256 -
|
|
+ (__end_of_permanent_fixed_addresses & 255),
|
|
+ FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_SLOTS - 1,
|
|
+#ifdef CONFIG_X86_32
|
|
+ FIX_WP_TEST,
|
|
+#endif
|
|
+ __end_of_fixed_addresses
|
|
+};
|
|
+
|
|
+
|
|
+extern void reserve_top_address(unsigned long reserve);
|
|
+
|
|
+#define FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT)
|
|
+#define FIXADDR_BOOT_SIZE (__end_of_fixed_addresses << PAGE_SHIFT)
|
|
+#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
|
|
+#define FIXADDR_BOOT_START (FIXADDR_TOP - FIXADDR_BOOT_SIZE)
|
|
|
|
extern int fixmaps_set;
|
|
|
|
@@ -13,10 +156,10 @@ extern pte_t *kmap_pte;
|
|
extern pgprot_t kmap_prot;
|
|
extern pte_t *pkmap_page_table;
|
|
|
|
-void xen_set_fixmap(enum fixed_addresses, maddr_t, pgprot_t);
|
|
+void xen_set_fixmap(enum fixed_addresses, phys_addr_t, pgprot_t);
|
|
|
|
static inline void __set_fixmap(enum fixed_addresses idx,
|
|
- maddr_t phys, pgprot_t flags)
|
|
+ phys_addr_t phys, pgprot_t flags)
|
|
{
|
|
xen_set_fixmap(idx, phys, flags);
|
|
}
|
|
@@ -65,4 +208,5 @@ static inline unsigned long virt_to_fix(
|
|
BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START);
|
|
return __virt_to_fix(vaddr);
|
|
}
|
|
+#endif /* !__ASSEMBLY__ */
|
|
#endif /* _ASM_X86_FIXMAP_H */
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/fixmap_32.h 2010-03-24 15:14:47.000000000 +0100
|
|
+++ /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
@@ -1,125 +0,0 @@
|
|
-/*
|
|
- * fixmap.h: compile-time virtual memory allocation
|
|
- *
|
|
- * This file is subject to the terms and conditions of the GNU General Public
|
|
- * License. See the file "COPYING" in the main directory of this archive
|
|
- * for more details.
|
|
- *
|
|
- * Copyright (C) 1998 Ingo Molnar
|
|
- *
|
|
- * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
|
|
- */
|
|
-
|
|
-#ifndef _ASM_X86_FIXMAP_32_H
|
|
-#define _ASM_X86_FIXMAP_32_H
|
|
-
|
|
-/* used by vmalloc.c, vsyscall.lds.S.
|
|
- *
|
|
- * Leave one empty page between vmalloc'ed areas and
|
|
- * the start of the fixmap.
|
|
- */
|
|
-extern unsigned long __FIXADDR_TOP;
|
|
-#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO)
|
|
-#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1)
|
|
-
|
|
-#ifndef __ASSEMBLY__
|
|
-#include <linux/kernel.h>
|
|
-#include <asm/acpi.h>
|
|
-#include <asm/apicdef.h>
|
|
-#include <asm/page.h>
|
|
-#include <linux/threads.h>
|
|
-#include <asm/kmap_types.h>
|
|
-
|
|
-/*
|
|
- * Here we define all the compile-time 'special' virtual
|
|
- * addresses. The point is to have a constant address at
|
|
- * compile time, but to set the physical address only
|
|
- * in the boot process. We allocate these special addresses
|
|
- * from the end of virtual memory (0xfffff000) backwards.
|
|
- * Also this lets us do fail-safe vmalloc(), we
|
|
- * can guarantee that these special addresses and
|
|
- * vmalloc()-ed addresses never overlap.
|
|
- *
|
|
- * these 'compile-time allocated' memory buffers are
|
|
- * fixed-size 4k pages. (or larger if used with an increment
|
|
- * highger than 1) use fixmap_set(idx,phys) to associate
|
|
- * physical memory with fixmap indices.
|
|
- *
|
|
- * TLB entries of such buffers will not be flushed across
|
|
- * task switches.
|
|
- */
|
|
-enum fixed_addresses {
|
|
- FIX_HOLE,
|
|
- FIX_VDSO,
|
|
- FIX_DBGP_BASE,
|
|
- FIX_EARLYCON_MEM_BASE,
|
|
-#ifdef CONFIG_X86_LOCAL_APIC
|
|
- FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
|
|
-#endif
|
|
-#ifndef CONFIG_XEN
|
|
-#ifdef CONFIG_X86_IO_APIC
|
|
- FIX_IO_APIC_BASE_0,
|
|
- FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
|
|
-#endif
|
|
-#else
|
|
- FIX_SHARED_INFO,
|
|
-#define NR_FIX_ISAMAPS 256
|
|
- FIX_ISAMAP_END,
|
|
- FIX_ISAMAP_BEGIN = FIX_ISAMAP_END + NR_FIX_ISAMAPS - 1,
|
|
-#endif
|
|
-#ifdef CONFIG_X86_VISWS_APIC
|
|
- FIX_CO_CPU, /* Cobalt timer */
|
|
- FIX_CO_APIC, /* Cobalt APIC Redirection Table */
|
|
- FIX_LI_PCIA, /* Lithium PCI Bridge A */
|
|
- FIX_LI_PCIB, /* Lithium PCI Bridge B */
|
|
-#endif
|
|
-#ifdef CONFIG_X86_F00F_BUG
|
|
- FIX_F00F_IDT, /* Virtual mapping for IDT */
|
|
-#endif
|
|
-#ifdef CONFIG_X86_CYCLONE_TIMER
|
|
- FIX_CYCLONE_TIMER, /*cyclone timer register*/
|
|
-#endif
|
|
- FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
|
|
- FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
|
|
-#ifdef CONFIG_PCI_MMCONFIG
|
|
- FIX_PCIE_MCFG,
|
|
-#endif
|
|
-#ifdef CONFIG_PARAVIRT
|
|
- FIX_PARAVIRT_BOOTMAP,
|
|
-#endif
|
|
- __end_of_permanent_fixed_addresses,
|
|
- /*
|
|
- * 256 temporary boot-time mappings, used by early_ioremap(),
|
|
- * before ioremap() is functional.
|
|
- *
|
|
- * We round it up to the next 256 pages boundary so that we
|
|
- * can have a single pgd entry and a single pte table:
|
|
- */
|
|
-#define NR_FIX_BTMAPS 64
|
|
-#define FIX_BTMAPS_SLOTS 4
|
|
- FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 256 -
|
|
- (__end_of_permanent_fixed_addresses & 255),
|
|
- FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_SLOTS - 1,
|
|
- FIX_WP_TEST,
|
|
-#ifdef CONFIG_ACPI
|
|
- FIX_ACPI_BEGIN,
|
|
- FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
|
|
-#endif
|
|
-#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
|
|
- FIX_OHCI1394_BASE,
|
|
-#endif
|
|
- __end_of_fixed_addresses
|
|
-};
|
|
-
|
|
-extern void reserve_top_address(unsigned long reserve);
|
|
-
|
|
-
|
|
-#define FIXADDR_TOP ((unsigned long)__FIXADDR_TOP)
|
|
-
|
|
-#define __FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT)
|
|
-#define __FIXADDR_BOOT_SIZE (__end_of_fixed_addresses << PAGE_SHIFT)
|
|
-#define FIXADDR_START (FIXADDR_TOP - __FIXADDR_SIZE)
|
|
-#define FIXADDR_BOOT_START (FIXADDR_TOP - __FIXADDR_BOOT_SIZE)
|
|
-
|
|
-#endif /* !__ASSEMBLY__ */
|
|
-#endif /* _ASM_X86_FIXMAP_32_H */
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/fixmap_64.h 2010-03-24 15:17:58.000000000 +0100
|
|
+++ /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
@@ -1,90 +0,0 @@
|
|
-/*
|
|
- * fixmap.h: compile-time virtual memory allocation
|
|
- *
|
|
- * This file is subject to the terms and conditions of the GNU General Public
|
|
- * License. See the file "COPYING" in the main directory of this archive
|
|
- * for more details.
|
|
- *
|
|
- * Copyright (C) 1998 Ingo Molnar
|
|
- */
|
|
-
|
|
-#ifndef _ASM_X86_FIXMAP_64_H
|
|
-#define _ASM_X86_FIXMAP_64_H
|
|
-
|
|
-#include <linux/kernel.h>
|
|
-#include <asm/acpi.h>
|
|
-#include <asm/apicdef.h>
|
|
-#include <asm/page.h>
|
|
-#include <asm/vsyscall.h>
|
|
-#include <asm/acpi.h>
|
|
-
|
|
-/*
|
|
- * Here we define all the compile-time 'special' virtual
|
|
- * addresses. The point is to have a constant address at
|
|
- * compile time, but to set the physical address only
|
|
- * in the boot process.
|
|
- *
|
|
- * These 'compile-time allocated' memory buffers are
|
|
- * fixed-size 4k pages (or larger if used with an increment
|
|
- * higher than 1). Use set_fixmap(idx,phys) to associate
|
|
- * physical memory with fixmap indices.
|
|
- *
|
|
- * TLB entries of such buffers will not be flushed across
|
|
- * task switches.
|
|
- */
|
|
-
|
|
-enum fixed_addresses {
|
|
- VSYSCALL_LAST_PAGE,
|
|
- VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE
|
|
- + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1,
|
|
- VSYSCALL_HPET,
|
|
- FIX_DBGP_BASE,
|
|
- FIX_EARLYCON_MEM_BASE,
|
|
-#ifdef CONFIG_X86_LOCAL_APIC
|
|
- FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
|
|
-#endif
|
|
-#ifndef CONFIG_XEN
|
|
- FIX_IO_APIC_BASE_0,
|
|
- FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,
|
|
-#else
|
|
-#define NR_FIX_ISAMAPS 256
|
|
- FIX_ISAMAP_END,
|
|
- FIX_ISAMAP_BEGIN = FIX_ISAMAP_END + NR_FIX_ISAMAPS - 1,
|
|
-#endif
|
|
-#ifdef CONFIG_PARAVIRT
|
|
- FIX_PARAVIRT_BOOTMAP,
|
|
-#else
|
|
- FIX_SHARED_INFO,
|
|
-#endif
|
|
- __end_of_permanent_fixed_addresses,
|
|
-#ifdef CONFIG_ACPI
|
|
- FIX_ACPI_BEGIN,
|
|
- FIX_ACPI_END = FIX_ACPI_BEGIN + FIX_ACPI_PAGES - 1,
|
|
-#endif
|
|
-#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
|
|
- FIX_OHCI1394_BASE,
|
|
-#endif
|
|
- /*
|
|
- * 256 temporary boot-time mappings, used by early_ioremap(),
|
|
- * before ioremap() is functional.
|
|
- *
|
|
- * We round it up to the next 256 pages boundary so that we
|
|
- * can have a single pgd entry and a single pte table:
|
|
- */
|
|
-#define NR_FIX_BTMAPS 64
|
|
-#define FIX_BTMAPS_SLOTS 4
|
|
- FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 256 -
|
|
- (__end_of_permanent_fixed_addresses & 255),
|
|
- FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_SLOTS - 1,
|
|
- __end_of_fixed_addresses
|
|
-};
|
|
-
|
|
-#define FIXADDR_TOP (VSYSCALL_END-PAGE_SIZE)
|
|
-#define FIXADDR_SIZE (__end_of_fixed_addresses << PAGE_SHIFT)
|
|
-#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
|
|
-
|
|
-/* Only covers 32bit vsyscalls currently. Need another set for 64bit. */
|
|
-#define FIXADDR_USER_START ((unsigned long)VSYSCALL32_VSYSCALL)
|
|
-#define FIXADDR_USER_END (FIXADDR_USER_START + PAGE_SIZE)
|
|
-
|
|
-#endif /* _ASM_X86_FIXMAP_64_H */
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/highmem.h 2010-03-24 17:05:16.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/highmem.h 2010-03-24 17:05:22.000000000 +0100
|
|
@@ -62,6 +62,7 @@ void *kmap_atomic_prot(struct page *page
|
|
void *kmap_atomic(struct page *page, enum km_type type);
|
|
void kunmap_atomic(void *kvaddr, enum km_type type);
|
|
void *kmap_atomic_pfn(unsigned long pfn, enum km_type type);
|
|
+void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot);
|
|
struct page *kmap_atomic_to_page(void *ptr);
|
|
|
|
#define kmap_atomic_pte(page, type) \
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/hypervisor.h 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/hypervisor.h 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -46,7 +46,7 @@
|
|
#include <xen/interface/arch-x86/xen-mca.h>
|
|
#include <asm/percpu.h>
|
|
#include <asm/ptrace.h>
|
|
-#include <asm/page.h>
|
|
+#include <asm/pgtable_types.h>
|
|
|
|
extern shared_info_t *HYPERVISOR_shared_info;
|
|
|
|
@@ -153,20 +153,16 @@ int __must_check xen_multi_mmuext_op(str
|
|
#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE
|
|
static inline void arch_enter_lazy_mmu_mode(void)
|
|
{
|
|
- __get_cpu_var(xen_lazy_mmu) = true;
|
|
+ percpu_write(xen_lazy_mmu, true);
|
|
}
|
|
|
|
static inline void arch_leave_lazy_mmu_mode(void)
|
|
{
|
|
- __get_cpu_var(xen_lazy_mmu) = false;
|
|
+ percpu_write(xen_lazy_mmu, false);
|
|
xen_multicall_flush(false);
|
|
}
|
|
|
|
-#if defined(CONFIG_X86_32)
|
|
-#define arch_use_lazy_mmu_mode() unlikely(x86_read_percpu(xen_lazy_mmu))
|
|
-#elif !defined(arch_use_lazy_mmu_mode)
|
|
-#define arch_use_lazy_mmu_mode() unlikely(__get_cpu_var(xen_lazy_mmu))
|
|
-#endif
|
|
+#define arch_use_lazy_mmu_mode() unlikely(percpu_read(xen_lazy_mmu))
|
|
|
|
#if 0 /* All uses are in places potentially called asynchronously, but
|
|
* asynchronous code should rather not make use of lazy mode at all.
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/io.h 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/io.h 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -5,6 +5,10 @@
|
|
|
|
#include <linux/compiler.h>
|
|
#include <asm-generic/int-ll64.h>
|
|
+#include <asm/page.h>
|
|
+#ifdef __KERNEL__
|
|
+#include <asm/fixmap.h>
|
|
+#endif
|
|
|
|
#define build_mmio_read(name, size, type, reg, barrier) \
|
|
static inline type name(const volatile void __iomem *addr) \
|
|
@@ -82,6 +86,101 @@ static inline void writeq(__u64 val, vol
|
|
|
|
#define native_io_delay xen_io_delay
|
|
|
|
+/**
|
|
+ * virt_to_phys - map virtual addresses to physical
|
|
+ * @address: address to remap
|
|
+ *
|
|
+ * The returned physical address is the physical (CPU) mapping for
|
|
+ * the memory address given. It is only valid to use this function on
|
|
+ * addresses directly mapped or allocated via kmalloc.
|
|
+ *
|
|
+ * This function does not give bus mappings for DMA transfers. In
|
|
+ * almost all conceivable cases a device driver should not be using
|
|
+ * this function
|
|
+ */
|
|
+
|
|
+static inline phys_addr_t virt_to_phys(volatile void *address)
|
|
+{
|
|
+ return __pa(address);
|
|
+}
|
|
+
|
|
+/**
|
|
+ * phys_to_virt - map physical address to virtual
|
|
+ * @address: address to remap
|
|
+ *
|
|
+ * The returned virtual address is a current CPU mapping for
|
|
+ * the memory address given. It is only valid to use this function on
|
|
+ * addresses that have a kernel mapping
|
|
+ *
|
|
+ * This function does not handle bus mappings for DMA transfers. In
|
|
+ * almost all conceivable cases a device driver should not be using
|
|
+ * this function
|
|
+ */
|
|
+
|
|
+static inline void *phys_to_virt(phys_addr_t address)
|
|
+{
|
|
+ return __va(address);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Change "struct page" to physical address.
|
|
+ */
|
|
+#define page_to_pseudophys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT)
|
|
+#undef page_to_phys
|
|
+#define page_to_phys(page) (phys_to_machine(page_to_pseudophys(page)))
|
|
+#define page_to_bus(page) (phys_to_machine(page_to_pseudophys(page)))
|
|
+
|
|
+/*
|
|
+ * ISA I/O bus memory addresses are 1:1 with the physical address.
|
|
+ * However, we truncate the address to unsigned int to avoid undesirable
|
|
+ * promitions in legacy drivers.
|
|
+ */
|
|
+#define isa_virt_to_bus(_x) ({ \
|
|
+ unsigned long _va_ = (unsigned long)(_x); \
|
|
+ _va_ - fix_to_virt(FIX_ISAMAP_BEGIN) < (NR_FIX_ISAMAPS << PAGE_SHIFT) \
|
|
+ ? _va_ - fix_to_virt(FIX_ISAMAP_BEGIN) \
|
|
+ : ({ BUG(); (unsigned long)virt_to_bus(_va_); }); })
|
|
+#define isa_bus_to_virt(_x) ((void *)fix_to_virt(FIX_ISAMAP_BEGIN) + (_x))
|
|
+
|
|
+/*
|
|
+ * However PCI ones are not necessarily 1:1 and therefore these interfaces
|
|
+ * are forbidden in portable PCI drivers.
|
|
+ *
|
|
+ * Allow them on x86 for legacy drivers, though.
|
|
+ */
|
|
+#define virt_to_bus(_x) phys_to_machine(__pa(_x))
|
|
+#define bus_to_virt(_x) __va(machine_to_phys(_x))
|
|
+
|
|
+/**
|
|
+ * ioremap - map bus memory into CPU space
|
|
+ * @offset: bus address of the memory
|
|
+ * @size: size of the resource to map
|
|
+ *
|
|
+ * ioremap performs a platform specific sequence of operations to
|
|
+ * make bus memory CPU accessible via the readb/readw/readl/writeb/
|
|
+ * writew/writel functions and the other mmio helpers. The returned
|
|
+ * address is not guaranteed to be usable directly as a virtual
|
|
+ * address.
|
|
+ *
|
|
+ * If the area you are trying to map is a PCI BAR you should have a
|
|
+ * look at pci_iomap().
|
|
+ */
|
|
+extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size);
|
|
+extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size);
|
|
+extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size,
|
|
+ unsigned long prot_val);
|
|
+
|
|
+/*
|
|
+ * The default ioremap() behavior is non-cached:
|
|
+ */
|
|
+static inline void __iomem *ioremap(resource_size_t offset, unsigned long size)
|
|
+{
|
|
+ return ioremap_nocache(offset, size);
|
|
+}
|
|
+
|
|
+extern void iounmap(volatile void __iomem *addr);
|
|
+
|
|
+
|
|
#ifdef CONFIG_X86_32
|
|
# include "../../asm/io_32.h"
|
|
#else
|
|
@@ -93,11 +192,6 @@ static inline void writeq(__u64 val, vol
|
|
/* We will be supplying our own /dev/mem implementation */
|
|
#define ARCH_HAS_DEV_MEM
|
|
|
|
-#define page_to_pseudophys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT)
|
|
-#undef page_to_phys
|
|
-#define page_to_phys(page) (phys_to_machine(page_to_pseudophys(page)))
|
|
-#define page_to_bus(page) (phys_to_machine(page_to_pseudophys(page)))
|
|
-
|
|
#define bvec_to_pseudophys(bv) (page_to_pseudophys((bv)->bv_page) + \
|
|
(unsigned long)(bv)->bv_offset)
|
|
|
|
@@ -106,23 +200,7 @@ static inline void writeq(__u64 val, vol
|
|
&& bvec_to_pseudophys(vec1) + (vec1)->bv_len \
|
|
== bvec_to_pseudophys(vec2))
|
|
|
|
-#undef virt_to_bus
|
|
-#undef bus_to_virt
|
|
-#define virt_to_bus(_x) phys_to_machine(__pa(_x))
|
|
-#define bus_to_virt(_x) __va(machine_to_phys(_x))
|
|
-
|
|
-#include <asm/fixmap.h>
|
|
-
|
|
#undef __ISA_IO_base
|
|
-#undef isa_virt_to_bus
|
|
-#undef isa_page_to_bus
|
|
-#undef isa_bus_to_virt
|
|
-#define isa_virt_to_bus(_x) ({ \
|
|
- unsigned long _va_ = (unsigned long)(_x); \
|
|
- _va_ - fix_to_virt(FIX_ISAMAP_BEGIN) < (NR_FIX_ISAMAPS << PAGE_SHIFT) \
|
|
- ? _va_ - fix_to_virt(FIX_ISAMAP_BEGIN) \
|
|
- : ({ BUG(); (unsigned long)virt_to_bus(_va_); }); })
|
|
-#define isa_bus_to_virt(_x) ((void *)fix_to_virt(FIX_ISAMAP_BEGIN) + (_x))
|
|
|
|
#endif
|
|
|
|
@@ -131,7 +209,7 @@ extern void unxlate_dev_mem_ptr(unsigned
|
|
|
|
extern int ioremap_check_change_attr(unsigned long mfn, unsigned long size,
|
|
unsigned long prot_val);
|
|
-extern void __iomem *ioremap_wc(unsigned long offset, unsigned long size);
|
|
+extern void __iomem *ioremap_wc(resource_size_t offset, unsigned long size);
|
|
|
|
/*
|
|
* early_ioremap() and early_iounmap() are for temporary early boot-time
|
|
@@ -140,10 +218,12 @@ extern void __iomem *ioremap_wc(unsigned
|
|
*/
|
|
extern void early_ioremap_init(void);
|
|
extern void early_ioremap_reset(void);
|
|
-extern void __iomem *early_ioremap(unsigned long offset, unsigned long size);
|
|
-extern void __iomem *early_memremap(unsigned long offset, unsigned long size);
|
|
+extern void __iomem *early_ioremap(resource_size_t phys_addr,
|
|
+ unsigned long size);
|
|
+extern void __iomem *early_memremap(resource_size_t phys_addr,
|
|
+ unsigned long size);
|
|
extern void early_iounmap(void __iomem *addr, unsigned long size);
|
|
-extern void __iomem *fix_ioremap(unsigned idx, unsigned long phys);
|
|
|
|
+#define IO_SPACE_LIMIT 0xffff
|
|
|
|
#endif /* _ASM_X86_IO_H */
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/ipi.h 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -0,0 +1,13 @@
|
|
+#ifndef _ASM_X86_IPI_H
|
|
+#define _ASM_X86_IPI_H
|
|
+
|
|
+#include <asm/hw_irq.h>
|
|
+#include <asm/smp.h>
|
|
+
|
|
+void xen_send_IPI_mask(const struct cpumask *, int vector);
|
|
+void xen_send_IPI_mask_allbutself(const struct cpumask *, int vector);
|
|
+void xen_send_IPI_allbutself(int vector);
|
|
+void xen_send_IPI_all(int vector);
|
|
+void xen_send_IPI_self(int vector);
|
|
+
|
|
+#endif /* _ASM_X86_IPI_H */
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/irqflags.h 2010-03-24 15:14:47.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/irqflags.h 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -94,7 +94,7 @@ static inline void halt(void)
|
|
|
|
#ifdef CONFIG_X86_64
|
|
# define __REG_si %rsi
|
|
-# define __CPU_num %gs:pda_cpunumber
|
|
+# define __CPU_num PER_CPU_VAR(cpu_number)
|
|
#else
|
|
# define __REG_si %esi
|
|
# define __CPU_num TI_cpu(%ebp)
|
|
@@ -130,6 +130,7 @@ sysexit_ecrit: /**** END OF SYSEXIT CRIT
|
|
mov $__KERNEL_PERCPU, %ecx ; \
|
|
push %esp ; \
|
|
mov %ecx, %fs ; \
|
|
+ SET_KERNEL_GS %ecx ; \
|
|
call evtchn_do_upcall ; \
|
|
add $4,%esp ; \
|
|
jmp ret_from_intr
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/irq_vectors.h 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/irq_vectors.h 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -2,29 +2,46 @@
|
|
#define _ASM_X86_IRQ_VECTORS_H
|
|
|
|
#ifdef CONFIG_X86_32
|
|
-# define SYSCALL_VECTOR 0x80
|
|
+# define SYSCALL_VECTOR 0x80
|
|
#else
|
|
-# define IA32_SYSCALL_VECTOR 0x80
|
|
+# define IA32_SYSCALL_VECTOR 0x80
|
|
#endif
|
|
|
|
-#define RESCHEDULE_VECTOR 0
|
|
-#define CALL_FUNCTION_VECTOR 1
|
|
-#define CALL_FUNC_SINGLE_VECTOR 2
|
|
-#define SPIN_UNLOCK_VECTOR 3
|
|
-#define NR_IPIS 4
|
|
+#define RESCHEDULE_VECTOR 0
|
|
+#define CALL_FUNCTION_VECTOR 1
|
|
+#define CALL_FUNC_SINGLE_VECTOR 2
|
|
+#define SPIN_UNLOCK_VECTOR 3
|
|
+#define NR_IPIS 4
|
|
|
|
/*
|
|
* The maximum number of vectors supported by i386 processors
|
|
* is limited to 256. For processors other than i386, NR_VECTORS
|
|
* should be changed accordingly.
|
|
*/
|
|
-#define NR_VECTORS 256
|
|
+#define NR_VECTORS 256
|
|
|
|
-#define FIRST_VM86_IRQ 3
|
|
-#define LAST_VM86_IRQ 15
|
|
-#define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15)
|
|
+#define FIRST_VM86_IRQ 3
|
|
+#define LAST_VM86_IRQ 15
|
|
|
|
-#define NR_IRQS_LEGACY 16
|
|
+#ifndef __ASSEMBLY__
|
|
+static inline int invalid_vm86_irq(int irq)
|
|
+{
|
|
+ return irq < FIRST_VM86_IRQ || irq > LAST_VM86_IRQ;
|
|
+}
|
|
+#endif
|
|
+
|
|
+/*
|
|
+ * Size the maximum number of interrupts.
|
|
+ *
|
|
+ * If the irq_desc[] array has a sparse layout, we can size things
|
|
+ * generously - it scales up linearly with the maximum number of CPUs,
|
|
+ * and the maximum number of IO-APICs, whichever is higher.
|
|
+ *
|
|
+ * In other cases we size more conservatively, to not create too large
|
|
+ * static arrays.
|
|
+ */
|
|
+
|
|
+#define NR_IRQS_LEGACY 16
|
|
|
|
/*
|
|
* The flat IRQ space is divided into two regions:
|
|
@@ -35,21 +52,41 @@
|
|
* 3. A dynamic mapping of inter-domain and Xen-sourced virtual IRQs. These
|
|
* are bound using the provided bind/unbind functions.
|
|
*/
|
|
+#define PIRQ_BASE 0
|
|
|
|
-#define PIRQ_BASE 0
|
|
-#if defined(NR_CPUS) && defined(MAX_IO_APICS)
|
|
-# if !defined(CONFIG_SPARSE_IRQ) && NR_CPUS < MAX_IO_APICS
|
|
-# define NR_PIRQS (NR_VECTORS + 32 * NR_CPUS)
|
|
-# elif defined(CONFIG_SPARSE_IRQ) && 8 * NR_CPUS > 32 * MAX_IO_APICS
|
|
-# define NR_PIRQS (NR_VECTORS + 8 * NR_CPUS)
|
|
+#define CPU_VECTOR_LIMIT ( 8 * NR_CPUS )
|
|
+#define IO_APIC_VECTOR_LIMIT ( 32 * MAX_IO_APICS )
|
|
+
|
|
+#ifdef CONFIG_X86_IO_APIC
|
|
+# if !defined(NR_CPUS) || !defined(MAX_IO_APICS)
|
|
+/* nothing */
|
|
+# elif defined(CONFIG_SPARSE_IRQ)
|
|
+# define NR_PIRQS \
|
|
+ (CPU_VECTOR_LIMIT > IO_APIC_VECTOR_LIMIT ? \
|
|
+ (NR_VECTORS + CPU_VECTOR_LIMIT) : \
|
|
+ (NR_VECTORS + IO_APIC_VECTOR_LIMIT))
|
|
+# elif NR_CPUS < MAX_IO_APICS
|
|
+# define NR_PIRQS (NR_VECTORS + 4*CPU_VECTOR_LIMIT)
|
|
# else
|
|
-# define NR_PIRQS (NR_VECTORS + 32 * MAX_IO_APICS)
|
|
+# define NR_PIRQS (NR_VECTORS + IO_APIC_VECTOR_LIMIT)
|
|
# endif
|
|
+#elif defined(CONFIG_XEN_PCIDEV_FRONTEND)
|
|
+# define NR_PIRQS (NR_VECTORS + CPU_VECTOR_LIMIT)
|
|
+#else /* !CONFIG_X86_IO_APIC: */
|
|
+# define NR_PIRQS NR_IRQS_LEGACY
|
|
+#endif
|
|
+
|
|
+#ifndef __ASSEMBLY__
|
|
+#if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SPARSE_IRQ)
|
|
+extern int nr_pirqs;
|
|
+#else
|
|
+# define nr_pirqs NR_PIRQS
|
|
+#endif
|
|
#endif
|
|
|
|
-#define DYNIRQ_BASE (PIRQ_BASE + NR_PIRQS)
|
|
-#define NR_DYNIRQS 256
|
|
+#define DYNIRQ_BASE (PIRQ_BASE + nr_pirqs)
|
|
+#define NR_DYNIRQS 256
|
|
|
|
-#define NR_IRQS (NR_PIRQS + NR_DYNIRQS)
|
|
+#define NR_IRQS (NR_PIRQS + NR_DYNIRQS)
|
|
|
|
#endif /* _ASM_X86_IRQ_VECTORS_H */
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/mmu_context.h 2010-03-24 15:14:47.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/mmu_context.h 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -26,11 +26,117 @@ static inline void xen_activate_mm(struc
|
|
int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
|
|
void destroy_context(struct mm_struct *mm);
|
|
|
|
+
|
|
+static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
|
|
+{
|
|
+#if defined(CONFIG_SMP) && !defined(CONFIG_XEN) /* XEN: no lazy tlb */
|
|
+ if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
|
|
+ percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
|
|
+#endif
|
|
+}
|
|
+
|
|
+#define prepare_arch_switch(next) __prepare_arch_switch()
|
|
+
|
|
+static inline void __prepare_arch_switch(void)
|
|
+{
|
|
#ifdef CONFIG_X86_32
|
|
-# include "mmu_context_32.h"
|
|
+ /*
|
|
+ * Save away %gs. No need to save %fs, as it was saved on the
|
|
+ * stack on entry. No need to save %es and %ds, as those are
|
|
+ * always kernel segments while inside the kernel.
|
|
+ */
|
|
+ lazy_save_gs(current->thread.gs);
|
|
+ lazy_load_gs(__KERNEL_STACK_CANARY);
|
|
#else
|
|
-# include "mmu_context_64.h"
|
|
+ /*
|
|
+ * Save away %es, %ds, %fs and %gs. Must happen before reload
|
|
+ * of cr3/ldt (i.e., not in __switch_to).
|
|
+ */
|
|
+ __asm__ __volatile__ (
|
|
+ "mov %%es,%0 ; mov %%ds,%1 ; mov %%fs,%2 ; mov %%gs,%3"
|
|
+ : "=m" (current->thread.es),
|
|
+ "=m" (current->thread.ds),
|
|
+ "=m" (current->thread.fsindex),
|
|
+ "=m" (current->thread.gsindex) );
|
|
+
|
|
+ if (current->thread.ds)
|
|
+ __asm__ __volatile__ ( "movl %0,%%ds" : : "r" (0) );
|
|
+
|
|
+ if (current->thread.es)
|
|
+ __asm__ __volatile__ ( "movl %0,%%es" : : "r" (0) );
|
|
+
|
|
+ if (current->thread.fsindex) {
|
|
+ __asm__ __volatile__ ( "movl %0,%%fs" : : "r" (0) );
|
|
+ current->thread.fs = 0;
|
|
+ }
|
|
+
|
|
+ if (current->thread.gsindex) {
|
|
+ load_gs_index(0);
|
|
+ current->thread.gs = 0;
|
|
+ }
|
|
+#endif
|
|
+}
|
|
+
|
|
+static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
|
|
+ struct task_struct *tsk)
|
|
+{
|
|
+ unsigned cpu = smp_processor_id();
|
|
+ struct mmuext_op _op[2 + (sizeof(long) > 4)], *op = _op;
|
|
+
|
|
+ if (likely(prev != next)) {
|
|
+ BUG_ON(!xen_feature(XENFEAT_writable_page_tables) &&
|
|
+ !PagePinned(virt_to_page(next->pgd)));
|
|
+
|
|
+ /* stop flush ipis for the previous mm */
|
|
+ cpu_clear(cpu, prev->cpu_vm_mask);
|
|
+#if defined(CONFIG_SMP) && !defined(CONFIG_XEN) /* XEN: no lazy tlb */
|
|
+ percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
|
|
+ percpu_write(cpu_tlbstate.active_mm, next);
|
|
#endif
|
|
+ cpu_set(cpu, next->cpu_vm_mask);
|
|
+
|
|
+ /* Re-load page tables: load_cr3(next->pgd) */
|
|
+ op->cmd = MMUEXT_NEW_BASEPTR;
|
|
+ op->arg1.mfn = virt_to_mfn(next->pgd);
|
|
+ op++;
|
|
+
|
|
+ /* xen_new_user_pt(__pa(__user_pgd(next->pgd))) */
|
|
+#ifdef CONFIG_X86_64
|
|
+ op->cmd = MMUEXT_NEW_USER_BASEPTR;
|
|
+ op->arg1.mfn = virt_to_mfn(__user_pgd(next->pgd));
|
|
+ op++;
|
|
+#endif
|
|
+
|
|
+ /*
|
|
+ * load the LDT, if the LDT is different:
|
|
+ */
|
|
+ if (unlikely(prev->context.ldt != next->context.ldt)) {
|
|
+ /* load_LDT_nolock(&next->context) */
|
|
+ op->cmd = MMUEXT_SET_LDT;
|
|
+ op->arg1.linear_addr = (unsigned long)next->context.ldt;
|
|
+ op->arg2.nr_ents = next->context.size;
|
|
+ op++;
|
|
+ }
|
|
+
|
|
+ BUG_ON(HYPERVISOR_mmuext_op(_op, op-_op, NULL, DOMID_SELF));
|
|
+ }
|
|
+#if defined(CONFIG_SMP) && !defined(CONFIG_XEN) /* XEN: no lazy tlb */
|
|
+ else {
|
|
+ percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
|
|
+ BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
|
|
+
|
|
+ if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
|
|
+ /* We were in lazy tlb mode and leave_mm disabled
|
|
+ * tlb flush IPI delivery. We must reload CR3
|
|
+ * to make sure to use no freed page tables.
|
|
+ */
|
|
+ load_cr3(next->pgd);
|
|
+ xen_new_user_pt(__pa(__user_pgd(next->pgd)));
|
|
+ load_LDT_nolock(&next->context);
|
|
+ }
|
|
+ }
|
|
+#endif
|
|
+}
|
|
|
|
#define activate_mm(prev, next) \
|
|
do { \
|
|
@@ -38,5 +144,17 @@ do { \
|
|
switch_mm((prev), (next), NULL); \
|
|
} while (0);
|
|
|
|
+#ifdef CONFIG_X86_32
|
|
+#define deactivate_mm(tsk, mm) \
|
|
+do { \
|
|
+ lazy_load_gs(0); \
|
|
+} while (0)
|
|
+#else
|
|
+#define deactivate_mm(tsk, mm) \
|
|
+do { \
|
|
+ load_gs_index(0); \
|
|
+ loadsegment(fs, 0); \
|
|
+} while (0)
|
|
+#endif
|
|
|
|
#endif /* _ASM_X86_MMU_CONTEXT_H */
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/mmu_context_32.h 2010-03-24 15:17:58.000000000 +0100
|
|
+++ /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
@@ -1,83 +0,0 @@
|
|
-#ifndef _ASM_X86_MMU_CONTEXT_32_H
|
|
-#define _ASM_X86_MMU_CONTEXT_32_H
|
|
-
|
|
-static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
|
|
-{
|
|
-#if defined(CONFIG_SMP) && !defined(CONFIG_XEN) /* XEN: no lazy tlb */
|
|
- if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK)
|
|
- x86_write_percpu(cpu_tlbstate.state, TLBSTATE_LAZY);
|
|
-#endif
|
|
-}
|
|
-
|
|
-#define prepare_arch_switch(next) __prepare_arch_switch()
|
|
-
|
|
-static inline void __prepare_arch_switch(void)
|
|
-{
|
|
- /*
|
|
- * Save away %gs. No need to save %fs, as it was saved on the
|
|
- * stack on entry. No need to save %es and %ds, as those are
|
|
- * always kernel segments while inside the kernel.
|
|
- */
|
|
- asm volatile ( "mov %%gs,%0"
|
|
- : "=m" (current->thread.gs));
|
|
- asm volatile ( "movl %0,%%gs"
|
|
- : : "r" (0) );
|
|
-}
|
|
-
|
|
-static inline void switch_mm(struct mm_struct *prev,
|
|
- struct mm_struct *next,
|
|
- struct task_struct *tsk)
|
|
-{
|
|
- int cpu = smp_processor_id();
|
|
- struct mmuext_op _op[2], *op = _op;
|
|
-
|
|
- if (likely(prev != next)) {
|
|
- BUG_ON(!xen_feature(XENFEAT_writable_page_tables) &&
|
|
- !PagePinned(virt_to_page(next->pgd)));
|
|
-
|
|
- /* stop flush ipis for the previous mm */
|
|
- cpu_clear(cpu, prev->cpu_vm_mask);
|
|
-#if defined(CONFIG_SMP) && !defined(CONFIG_XEN) /* XEN: no lazy tlb */
|
|
- x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK);
|
|
- x86_write_percpu(cpu_tlbstate.active_mm, next);
|
|
-#endif
|
|
- cpu_set(cpu, next->cpu_vm_mask);
|
|
-
|
|
- /* Re-load page tables: load_cr3(next->pgd) */
|
|
- op->cmd = MMUEXT_NEW_BASEPTR;
|
|
- op->arg1.mfn = pfn_to_mfn(__pa(next->pgd) >> PAGE_SHIFT);
|
|
- op++;
|
|
-
|
|
- /*
|
|
- * load the LDT, if the LDT is different:
|
|
- */
|
|
- if (unlikely(prev->context.ldt != next->context.ldt)) {
|
|
- /* load_LDT_nolock(&next->context, cpu) */
|
|
- op->cmd = MMUEXT_SET_LDT;
|
|
- op->arg1.linear_addr = (unsigned long)next->context.ldt;
|
|
- op->arg2.nr_ents = next->context.size;
|
|
- op++;
|
|
- }
|
|
-
|
|
- BUG_ON(HYPERVISOR_mmuext_op(_op, op-_op, NULL, DOMID_SELF));
|
|
- }
|
|
-#if defined(CONFIG_SMP) && !defined(CONFIG_XEN) /* XEN: no lazy tlb */
|
|
- else {
|
|
- x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK);
|
|
- BUG_ON(x86_read_percpu(cpu_tlbstate.active_mm) != next);
|
|
-
|
|
- if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
|
|
- /* We were in lazy tlb mode and leave_mm disabled
|
|
- * tlb flush IPI delivery. We must reload %cr3.
|
|
- */
|
|
- load_cr3(next->pgd);
|
|
- load_LDT_nolock(&next->context);
|
|
- }
|
|
- }
|
|
-#endif
|
|
-}
|
|
-
|
|
-#define deactivate_mm(tsk, mm) \
|
|
- asm("movl %0,%%gs": :"r" (0));
|
|
-
|
|
-#endif /* _ASM_X86_MMU_CONTEXT_32_H */
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/mmu_context_64.h 2010-03-24 15:14:47.000000000 +0100
|
|
+++ /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
@@ -1,106 +0,0 @@
|
|
-#ifndef _ASM_X86_MMU_CONTEXT_64_H
|
|
-#define _ASM_X86_MMU_CONTEXT_64_H
|
|
-
|
|
-static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
|
|
-{
|
|
-#if defined(CONFIG_SMP) && !defined(CONFIG_XEN)
|
|
- if (read_pda(mmu_state) == TLBSTATE_OK)
|
|
- write_pda(mmu_state, TLBSTATE_LAZY);
|
|
-#endif
|
|
-}
|
|
-
|
|
-#define prepare_arch_switch(next) __prepare_arch_switch()
|
|
-
|
|
-static inline void __prepare_arch_switch(void)
|
|
-{
|
|
- /*
|
|
- * Save away %es, %ds, %fs and %gs. Must happen before reload
|
|
- * of cr3/ldt (i.e., not in __switch_to).
|
|
- */
|
|
- __asm__ __volatile__ (
|
|
- "mov %%es,%0 ; mov %%ds,%1 ; mov %%fs,%2 ; mov %%gs,%3"
|
|
- : "=m" (current->thread.es),
|
|
- "=m" (current->thread.ds),
|
|
- "=m" (current->thread.fsindex),
|
|
- "=m" (current->thread.gsindex) );
|
|
-
|
|
- if (current->thread.ds)
|
|
- __asm__ __volatile__ ( "movl %0,%%ds" : : "r" (0) );
|
|
-
|
|
- if (current->thread.es)
|
|
- __asm__ __volatile__ ( "movl %0,%%es" : : "r" (0) );
|
|
-
|
|
- if (current->thread.fsindex) {
|
|
- __asm__ __volatile__ ( "movl %0,%%fs" : : "r" (0) );
|
|
- current->thread.fs = 0;
|
|
- }
|
|
-
|
|
- if (current->thread.gsindex) {
|
|
- load_gs_index(0);
|
|
- current->thread.gs = 0;
|
|
- }
|
|
-}
|
|
-
|
|
-static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
|
|
- struct task_struct *tsk)
|
|
-{
|
|
- unsigned cpu = smp_processor_id();
|
|
- struct mmuext_op _op[3], *op = _op;
|
|
-
|
|
- if (likely(prev != next)) {
|
|
- BUG_ON(!xen_feature(XENFEAT_writable_page_tables) &&
|
|
- !PagePinned(virt_to_page(next->pgd)));
|
|
-
|
|
- /* stop flush ipis for the previous mm */
|
|
- cpu_clear(cpu, prev->cpu_vm_mask);
|
|
-#if defined(CONFIG_SMP) && !defined(CONFIG_XEN)
|
|
- write_pda(mmu_state, TLBSTATE_OK);
|
|
- write_pda(active_mm, next);
|
|
-#endif
|
|
- cpu_set(cpu, next->cpu_vm_mask);
|
|
-
|
|
- /* load_cr3(next->pgd) */
|
|
- op->cmd = MMUEXT_NEW_BASEPTR;
|
|
- op->arg1.mfn = pfn_to_mfn(__pa(next->pgd) >> PAGE_SHIFT);
|
|
- op++;
|
|
-
|
|
- /* xen_new_user_pt(__pa(__user_pgd(next->pgd))) */
|
|
- op->cmd = MMUEXT_NEW_USER_BASEPTR;
|
|
- op->arg1.mfn = pfn_to_mfn(__pa(__user_pgd(next->pgd)) >> PAGE_SHIFT);
|
|
- op++;
|
|
-
|
|
- if (unlikely(next->context.ldt != prev->context.ldt)) {
|
|
- /* load_LDT_nolock(&next->context) */
|
|
- op->cmd = MMUEXT_SET_LDT;
|
|
- op->arg1.linear_addr = (unsigned long)next->context.ldt;
|
|
- op->arg2.nr_ents = next->context.size;
|
|
- op++;
|
|
- }
|
|
-
|
|
- BUG_ON(HYPERVISOR_mmuext_op(_op, op-_op, NULL, DOMID_SELF));
|
|
- }
|
|
-#if defined(CONFIG_SMP) && !defined(CONFIG_XEN)
|
|
- else {
|
|
- write_pda(mmu_state, TLBSTATE_OK);
|
|
- if (read_pda(active_mm) != next)
|
|
- BUG();
|
|
- if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
|
|
- /* We were in lazy tlb mode and leave_mm disabled
|
|
- * tlb flush IPI delivery. We must reload CR3
|
|
- * to make sure to use no freed page tables.
|
|
- */
|
|
- load_cr3(next->pgd);
|
|
- xen_new_user_pt(__pa(__user_pgd(next->pgd)));
|
|
- load_LDT_nolock(&next->context);
|
|
- }
|
|
- }
|
|
-#endif
|
|
-}
|
|
-
|
|
-#define deactivate_mm(tsk, mm) \
|
|
-do { \
|
|
- load_gs_index(0); \
|
|
- asm volatile("movl %0,%%fs"::"r"(0)); \
|
|
-} while (0)
|
|
-
|
|
-#endif /* _ASM_X86_MMU_CONTEXT_64_H */
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/pci.h 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/pci.h 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -41,7 +41,6 @@ static inline int pci_proc_domain(struct
|
|
return pci_domain_nr(bus);
|
|
}
|
|
|
|
-extern void pci_iommu_alloc(void);
|
|
|
|
/* Can be used to override the logic in pci_scan_bus for skipping
|
|
already-configured bus numbers - to be used for buggy BIOSes
|
|
@@ -92,12 +91,44 @@ static inline void early_quirks(void) {
|
|
|
|
extern void pci_iommu_alloc(void);
|
|
|
|
-#endif /* __KERNEL__ */
|
|
+/* MSI arch hooks */
|
|
+#define arch_setup_msi_irqs arch_setup_msi_irqs
|
|
+#define arch_teardown_msi_irqs arch_teardown_msi_irqs
|
|
+
|
|
+#define PCI_DMA_BUS_IS_PHYS 0
|
|
+
|
|
+#if defined(CONFIG_X86_64) || defined(CONFIG_DMA_API_DEBUG) || defined(CONFIG_SWIOTLB)
|
|
+
|
|
+#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \
|
|
+ dma_addr_t ADDR_NAME;
|
|
+#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \
|
|
+ __u32 LEN_NAME;
|
|
+#define pci_unmap_addr(PTR, ADDR_NAME) \
|
|
+ ((PTR)->ADDR_NAME)
|
|
+#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) \
|
|
+ (((PTR)->ADDR_NAME) = (VAL))
|
|
+#define pci_unmap_len(PTR, LEN_NAME) \
|
|
+ ((PTR)->LEN_NAME)
|
|
+#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \
|
|
+ (((PTR)->LEN_NAME) = (VAL))
|
|
|
|
-#ifdef CONFIG_X86_32
|
|
-# include "pci_32.h"
|
|
#else
|
|
-# include "../../asm/pci_64.h"
|
|
+
|
|
+#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) dma_addr_t ADDR_NAME[0];
|
|
+#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) unsigned LEN_NAME[0];
|
|
+#define pci_unmap_addr(PTR, ADDR_NAME) sizeof((PTR)->ADDR_NAME)
|
|
+#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) \
|
|
+ do { break; } while (pci_unmap_addr(PTR, ADDR_NAME))
|
|
+#define pci_unmap_len(PTR, LEN_NAME) sizeof((PTR)->LEN_NAME)
|
|
+#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \
|
|
+ do { break; } while (pci_unmap_len(PTR, LEN_NAME))
|
|
+
|
|
+#endif
|
|
+
|
|
+#endif /* __KERNEL__ */
|
|
+
|
|
+#ifdef CONFIG_X86_64
|
|
+#include "../../asm/pci_64.h"
|
|
#endif
|
|
|
|
/* implement the pci_ DMA API in terms of the generic device dma_ one */
|
|
@@ -115,11 +146,6 @@ static inline int __pcibus_to_node(const
|
|
return sd->node;
|
|
}
|
|
|
|
-static inline cpumask_t __pcibus_to_cpumask(struct pci_bus *bus)
|
|
-{
|
|
- return node_to_cpumask(__pcibus_to_node(bus));
|
|
-}
|
|
-
|
|
static inline const struct cpumask *
|
|
cpumask_of_pcibus(const struct pci_bus *bus)
|
|
{
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/pgtable.h 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/pgtable.h 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -1,178 +1,9 @@
|
|
#ifndef _ASM_X86_PGTABLE_H
|
|
#define _ASM_X86_PGTABLE_H
|
|
|
|
-#define FIRST_USER_ADDRESS 0
|
|
+#include <asm/page.h>
|
|
|
|
-#define _PAGE_BIT_PRESENT 0 /* is present */
|
|
-#define _PAGE_BIT_RW 1 /* writeable */
|
|
-#define _PAGE_BIT_USER 2 /* userspace addressable */
|
|
-#define _PAGE_BIT_PWT 3 /* page write through */
|
|
-#define _PAGE_BIT_PCD 4 /* page cache disabled */
|
|
-#define _PAGE_BIT_ACCESSED 5 /* was accessed (raised by CPU) */
|
|
-#define _PAGE_BIT_DIRTY 6 /* was written to (raised by CPU) */
|
|
-#define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */
|
|
-#define _PAGE_BIT_PAT 7 /* on 4KB pages */
|
|
-#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */
|
|
-#define _PAGE_BIT_UNUSED1 9 /* available for programmer */
|
|
-#define _PAGE_BIT_IOMAP 10 /* flag used to indicate IO mapping */
|
|
-#define _PAGE_BIT_UNUSED3 11
|
|
-#define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */
|
|
-#define _PAGE_BIT_SPECIAL _PAGE_BIT_UNUSED1
|
|
-#define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1
|
|
-#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */
|
|
-
|
|
-/* If _PAGE_BIT_PRESENT is clear, we use these: */
|
|
-/* - if the user mapped it with PROT_NONE; pte_present gives true */
|
|
-#define _PAGE_BIT_PROTNONE _PAGE_BIT_GLOBAL
|
|
-/* - set: nonlinear file mapping, saved PTE; unset:swap */
|
|
-#define _PAGE_BIT_FILE _PAGE_BIT_DIRTY
|
|
-
|
|
-#define _PAGE_PRESENT (_AT(pteval_t, 1) << _PAGE_BIT_PRESENT)
|
|
-#define _PAGE_RW (_AT(pteval_t, 1) << _PAGE_BIT_RW)
|
|
-#define _PAGE_USER (_AT(pteval_t, 1) << _PAGE_BIT_USER)
|
|
-#define _PAGE_PWT (_AT(pteval_t, 1) << _PAGE_BIT_PWT)
|
|
-#define _PAGE_PCD (_AT(pteval_t, 1) << _PAGE_BIT_PCD)
|
|
-#define _PAGE_ACCESSED (_AT(pteval_t, 1) << _PAGE_BIT_ACCESSED)
|
|
-#define _PAGE_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_DIRTY)
|
|
-#define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE)
|
|
-#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
|
|
-#define _PAGE_UNUSED1 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1)
|
|
-#define _PAGE_IOMAP (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP)
|
|
-#define _PAGE_UNUSED3 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED3)
|
|
-#define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT)
|
|
-#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
|
|
-#define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL)
|
|
-#define _PAGE_CPA_TEST (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST)
|
|
-#define __HAVE_ARCH_PTE_SPECIAL
|
|
-
|
|
-#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
|
|
-#define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX)
|
|
-#else
|
|
-#define _PAGE_NX (_AT(pteval_t, 0))
|
|
-#endif
|
|
-
|
|
-#define _PAGE_FILE (_AT(pteval_t, 1) << _PAGE_BIT_FILE)
|
|
-#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
|
|
-
|
|
-#ifndef __ASSEMBLY__
|
|
-#if defined(CONFIG_X86_64) && CONFIG_XEN_COMPAT <= 0x030002
|
|
-extern unsigned int __kernel_page_user;
|
|
-#else
|
|
-#define __kernel_page_user 0
|
|
-#endif
|
|
-#endif
|
|
-
|
|
-#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
|
|
- _PAGE_ACCESSED | _PAGE_DIRTY)
|
|
-#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \
|
|
- _PAGE_DIRTY | __kernel_page_user)
|
|
-
|
|
-/* Set of bits not changed in pte_modify */
|
|
-#define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_CACHE_MASK | _PAGE_IOMAP | \
|
|
- _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY)
|
|
-
|
|
-/*
|
|
- * PAT settings are part of the hypervisor interface, which sets the
|
|
- * MSR to 0x050100070406 (i.e. WB, WT, UC-, UC, WC, WP [, UC, UC]).
|
|
- */
|
|
-#define _PAGE_CACHE_MASK (_PAGE_PCD | _PAGE_PWT | _PAGE_PAT)
|
|
-#define _PAGE_CACHE_WB (0)
|
|
-#define _PAGE_CACHE_WT (_PAGE_PWT)
|
|
-#define _PAGE_CACHE_WC (_PAGE_PAT)
|
|
-#define _PAGE_CACHE_WP (_PAGE_PAT | _PAGE_PWT)
|
|
-#define _PAGE_CACHE_UC_MINUS (_PAGE_PCD)
|
|
-#define _PAGE_CACHE_UC (_PAGE_PCD | _PAGE_PWT)
|
|
-
|
|
-#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
|
|
-#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
|
|
- _PAGE_ACCESSED | _PAGE_NX)
|
|
-
|
|
-#define PAGE_SHARED_EXEC __pgprot(_PAGE_PRESENT | _PAGE_RW | \
|
|
- _PAGE_USER | _PAGE_ACCESSED)
|
|
-#define PAGE_COPY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \
|
|
- _PAGE_ACCESSED | _PAGE_NX)
|
|
-#define PAGE_COPY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \
|
|
- _PAGE_ACCESSED)
|
|
-#define PAGE_COPY PAGE_COPY_NOEXEC
|
|
-#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | \
|
|
- _PAGE_ACCESSED | _PAGE_NX)
|
|
-#define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \
|
|
- _PAGE_ACCESSED)
|
|
-
|
|
-#define __PAGE_KERNEL_EXEC \
|
|
- (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | __kernel_page_user)
|
|
-#define __PAGE_KERNEL (__PAGE_KERNEL_EXEC | _PAGE_NX)
|
|
-
|
|
-#define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW)
|
|
-#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW)
|
|
-#define __PAGE_KERNEL_EXEC_NOCACHE (__PAGE_KERNEL_EXEC | _PAGE_PCD | _PAGE_PWT)
|
|
-#define __PAGE_KERNEL_WC (__PAGE_KERNEL | _PAGE_CACHE_WC)
|
|
-#define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD | _PAGE_PWT)
|
|
-#define __PAGE_KERNEL_UC_MINUS (__PAGE_KERNEL | _PAGE_PCD)
|
|
-#define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RX | _PAGE_USER)
|
|
-#define __PAGE_KERNEL_VSYSCALL_NOCACHE (__PAGE_KERNEL_VSYSCALL | _PAGE_PCD | _PAGE_PWT)
|
|
-#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE)
|
|
-#define __PAGE_KERNEL_LARGE_NOCACHE (__PAGE_KERNEL | _PAGE_CACHE_UC | _PAGE_PSE)
|
|
-#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE)
|
|
-
|
|
-#define __PAGE_KERNEL_IO (__PAGE_KERNEL | _PAGE_IOMAP)
|
|
-#define __PAGE_KERNEL_IO_NOCACHE (__PAGE_KERNEL_NOCACHE | _PAGE_IOMAP)
|
|
-#define __PAGE_KERNEL_IO_UC_MINUS (__PAGE_KERNEL_UC_MINUS | _PAGE_IOMAP)
|
|
-#define __PAGE_KERNEL_IO_WC (__PAGE_KERNEL_WC | _PAGE_IOMAP)
|
|
-
|
|
-#define PAGE_KERNEL __pgprot(__PAGE_KERNEL)
|
|
-#define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO)
|
|
-#define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC)
|
|
-#define PAGE_KERNEL_RX __pgprot(__PAGE_KERNEL_RX)
|
|
-#define PAGE_KERNEL_WC __pgprot(__PAGE_KERNEL_WC)
|
|
-#define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE)
|
|
-#define PAGE_KERNEL_UC_MINUS __pgprot(__PAGE_KERNEL_UC_MINUS)
|
|
-#define PAGE_KERNEL_EXEC_NOCACHE __pgprot(__PAGE_KERNEL_EXEC_NOCACHE)
|
|
-#define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE)
|
|
-#define PAGE_KERNEL_LARGE_NOCACHE __pgprot(__PAGE_KERNEL_LARGE_NOCACHE)
|
|
-#define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC)
|
|
-#define PAGE_KERNEL_VSYSCALL __pgprot(__PAGE_KERNEL_VSYSCALL)
|
|
-#define PAGE_KERNEL_VSYSCALL_NOCACHE __pgprot(__PAGE_KERNEL_VSYSCALL_NOCACHE)
|
|
-
|
|
-#define PAGE_KERNEL_IO __pgprot(__PAGE_KERNEL_IO)
|
|
-#define PAGE_KERNEL_IO_NOCACHE __pgprot(__PAGE_KERNEL_IO_NOCACHE)
|
|
-#define PAGE_KERNEL_IO_UC_MINUS __pgprot(__PAGE_KERNEL_IO_UC_MINUS)
|
|
-#define PAGE_KERNEL_IO_WC __pgprot(__PAGE_KERNEL_IO_WC)
|
|
-
|
|
-/* xwr */
|
|
-#define __P000 PAGE_NONE
|
|
-#define __P001 PAGE_READONLY
|
|
-#define __P010 PAGE_COPY
|
|
-#define __P011 PAGE_COPY
|
|
-#define __P100 PAGE_READONLY_EXEC
|
|
-#define __P101 PAGE_READONLY_EXEC
|
|
-#define __P110 PAGE_COPY_EXEC
|
|
-#define __P111 PAGE_COPY_EXEC
|
|
-
|
|
-#define __S000 PAGE_NONE
|
|
-#define __S001 PAGE_READONLY
|
|
-#define __S010 PAGE_SHARED
|
|
-#define __S011 PAGE_SHARED
|
|
-#define __S100 PAGE_READONLY_EXEC
|
|
-#define __S101 PAGE_READONLY_EXEC
|
|
-#define __S110 PAGE_SHARED_EXEC
|
|
-#define __S111 PAGE_SHARED_EXEC
|
|
-
|
|
-/*
|
|
- * early identity mapping pte attrib macros.
|
|
- */
|
|
-#ifdef CONFIG_X86_64
|
|
-#define __PAGE_KERNEL_IDENT_LARGE_EXEC __PAGE_KERNEL_LARGE_EXEC
|
|
-#else
|
|
-/*
|
|
- * For PDE_IDENT_ATTR include USER bit. As the PDE and PTE protection
|
|
- * bits are combined, this will alow user to access the high address mapped
|
|
- * VDSO in the presence of CONFIG_COMPAT_VDSO
|
|
- */
|
|
-#define PTE_IDENT_ATTR 0x003 /* PRESENT+RW */
|
|
-#define PDE_IDENT_ATTR 0x067 /* PRESENT+RW+USER+DIRTY+ACCESSED */
|
|
-#define PGD_IDENT_ATTR 0x001 /* PRESENT (no other attributes) */
|
|
-#endif
|
|
+#include <asm/pgtable_types.h>
|
|
|
|
/*
|
|
* Macro to mark a page protection value as UC-
|
|
@@ -184,9 +15,6 @@ extern unsigned int __kernel_page_user;
|
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
-#define pgprot_writecombine pgprot_writecombine
|
|
-extern pgprot_t pgprot_writecombine(pgprot_t prot);
|
|
-
|
|
/*
|
|
* ZERO_PAGE is a global shared page that is always zero: used
|
|
* for zero-mapped memory areas etc..
|
|
@@ -197,6 +25,59 @@ extern unsigned long empty_zero_page[PAG
|
|
extern spinlock_t pgd_lock;
|
|
extern struct list_head pgd_list;
|
|
|
|
+#define set_pte(ptep, pte) xen_set_pte(ptep, pte)
|
|
+#define set_pte_at(mm, addr, ptep, pte) xen_set_pte_at(mm, addr, ptep, pte)
|
|
+
|
|
+#define set_pte_atomic(ptep, pte) \
|
|
+ xen_set_pte_atomic(ptep, pte)
|
|
+
|
|
+#define set_pmd(pmdp, pmd) xen_set_pmd(pmdp, pmd)
|
|
+
|
|
+#ifndef __PAGETABLE_PUD_FOLDED
|
|
+#define set_pgd(pgdp, pgd) xen_set_pgd(pgdp, pgd)
|
|
+#define pgd_clear(pgd) xen_pgd_clear(pgd)
|
|
+#endif
|
|
+
|
|
+#ifndef set_pud
|
|
+# define set_pud(pudp, pud) xen_set_pud(pudp, pud)
|
|
+#endif
|
|
+
|
|
+#ifndef __PAGETABLE_PMD_FOLDED
|
|
+#define pud_clear(pud) xen_pud_clear(pud)
|
|
+#endif
|
|
+
|
|
+#define pte_clear(mm, addr, ptep) xen_pte_clear(mm, addr, ptep)
|
|
+#define pmd_clear(pmd) xen_pmd_clear(pmd)
|
|
+
|
|
+#define pte_update(mm, addr, ptep) do { } while (0)
|
|
+#define pte_update_defer(mm, addr, ptep) do { } while (0)
|
|
+
|
|
+static inline void __init paravirt_pagetable_setup_start(pgd_t *base)
|
|
+{
|
|
+ xen_pagetable_setup_start(base);
|
|
+}
|
|
+
|
|
+static inline void __init paravirt_pagetable_setup_done(pgd_t *base)
|
|
+{
|
|
+ xen_pagetable_setup_done(base);
|
|
+}
|
|
+
|
|
+#define pgd_val(x) xen_pgd_val(x)
|
|
+#define __pgd(x) xen_make_pgd(x)
|
|
+
|
|
+#ifndef __PAGETABLE_PUD_FOLDED
|
|
+#define pud_val(x) xen_pud_val(x)
|
|
+#define __pud(x) xen_make_pud(x)
|
|
+#endif
|
|
+
|
|
+#ifndef __PAGETABLE_PMD_FOLDED
|
|
+#define pmd_val(x) xen_pmd_val(x)
|
|
+#define __pmd(x) xen_make_pmd(x)
|
|
+#endif
|
|
+
|
|
+#define pte_val(x) xen_pte_val(x)
|
|
+#define __pte(x) xen_make_pte(x)
|
|
+
|
|
/*
|
|
* The following only work if pte_present() is true.
|
|
* Undefined behaviour if not..
|
|
@@ -252,53 +133,67 @@ static inline int pte_special(pte_t pte)
|
|
|
|
static inline int pmd_large(pmd_t pte)
|
|
{
|
|
- return (__pmd_val(pte) & (_PAGE_PSE | _PAGE_PRESENT)) ==
|
|
+ return (pmd_flags(pte) & (_PAGE_PSE | _PAGE_PRESENT)) ==
|
|
(_PAGE_PSE | _PAGE_PRESENT);
|
|
}
|
|
|
|
+static inline pte_t pte_set_flags(pte_t pte, pteval_t set)
|
|
+{
|
|
+ pteval_t v = __pte_val(pte);
|
|
+
|
|
+ return __pte_ma(v | set);
|
|
+}
|
|
+
|
|
+static inline pte_t pte_clear_flags(pte_t pte, pteval_t clear)
|
|
+{
|
|
+ pteval_t v = __pte_val(pte);
|
|
+
|
|
+ return __pte_ma(v & ~clear);
|
|
+}
|
|
+
|
|
static inline pte_t pte_mkclean(pte_t pte)
|
|
{
|
|
- return __pte_ma(__pte_val(pte) & ~_PAGE_DIRTY);
|
|
+ return pte_clear_flags(pte, _PAGE_DIRTY);
|
|
}
|
|
|
|
static inline pte_t pte_mkold(pte_t pte)
|
|
{
|
|
- return __pte_ma(__pte_val(pte) & ~_PAGE_ACCESSED);
|
|
+ return pte_clear_flags(pte, _PAGE_ACCESSED);
|
|
}
|
|
|
|
static inline pte_t pte_wrprotect(pte_t pte)
|
|
{
|
|
- return __pte_ma(__pte_val(pte) & ~_PAGE_RW);
|
|
+ return pte_clear_flags(pte, _PAGE_RW);
|
|
}
|
|
|
|
static inline pte_t pte_mkexec(pte_t pte)
|
|
{
|
|
- return __pte_ma(__pte_val(pte) & ~_PAGE_NX);
|
|
+ return pte_clear_flags(pte, _PAGE_NX);
|
|
}
|
|
|
|
static inline pte_t pte_mkdirty(pte_t pte)
|
|
{
|
|
- return __pte_ma(__pte_val(pte) | _PAGE_DIRTY);
|
|
+ return pte_set_flags(pte, _PAGE_DIRTY);
|
|
}
|
|
|
|
static inline pte_t pte_mkyoung(pte_t pte)
|
|
{
|
|
- return __pte_ma(__pte_val(pte) | _PAGE_ACCESSED);
|
|
+ return pte_set_flags(pte, _PAGE_ACCESSED);
|
|
}
|
|
|
|
static inline pte_t pte_mkwrite(pte_t pte)
|
|
{
|
|
- return __pte_ma(__pte_val(pte) | _PAGE_RW);
|
|
+ return pte_set_flags(pte, _PAGE_RW);
|
|
}
|
|
|
|
static inline pte_t pte_mkhuge(pte_t pte)
|
|
{
|
|
- return __pte_ma(__pte_val(pte) | _PAGE_PSE);
|
|
+ return pte_set_flags(pte, _PAGE_PSE);
|
|
}
|
|
|
|
static inline pte_t pte_clrhuge(pte_t pte)
|
|
{
|
|
- return __pte_ma(__pte_val(pte) & ~_PAGE_PSE);
|
|
+ return pte_clear_flags(pte, _PAGE_PSE);
|
|
}
|
|
|
|
static inline pte_t pte_mkglobal(pte_t pte)
|
|
@@ -313,11 +208,9 @@ static inline pte_t pte_clrglobal(pte_t
|
|
|
|
static inline pte_t pte_mkspecial(pte_t pte)
|
|
{
|
|
- return __pte_ma(__pte_val(pte) | _PAGE_SPECIAL);
|
|
+ return pte_set_flags(pte, _PAGE_SPECIAL);
|
|
}
|
|
|
|
-extern pteval_t __supported_pte_mask;
|
|
-
|
|
/*
|
|
* Mask out unsupported bits in a present pgprot. Non-present pgprots
|
|
* can use those bits for other purposes, so leave them be.
|
|
@@ -391,68 +284,208 @@ static inline int is_new_memtype_allowed
|
|
return 1;
|
|
}
|
|
|
|
-#ifndef __ASSEMBLY__
|
|
-#ifndef CONFIG_XEN
|
|
-/* Indicate that x86 has its own track and untrack pfn vma functions */
|
|
-#define __HAVE_PFNMAP_TRACKING
|
|
-#endif
|
|
+pmd_t *populate_extra_pmd(unsigned long vaddr);
|
|
+pte_t *populate_extra_pte(unsigned long vaddr);
|
|
+#endif /* __ASSEMBLY__ */
|
|
|
|
-#define __HAVE_PHYS_MEM_ACCESS_PROT
|
|
-struct file;
|
|
-pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
|
|
- unsigned long size, pgprot_t vma_prot);
|
|
-int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
|
|
- unsigned long size, pgprot_t *vma_prot);
|
|
+#ifdef CONFIG_X86_32
|
|
+# include "pgtable_32.h"
|
|
+#else
|
|
+# include "pgtable_64.h"
|
|
#endif
|
|
|
|
-/* Install a pte for a particular vaddr in kernel space. */
|
|
-void set_pte_vaddr(unsigned long vaddr, pte_t pte);
|
|
+#ifndef __ASSEMBLY__
|
|
+#include <linux/mm_types.h>
|
|
|
|
-#ifndef CONFIG_XEN
|
|
-extern void native_pagetable_setup_start(pgd_t *base);
|
|
-extern void native_pagetable_setup_done(pgd_t *base);
|
|
+static inline int pte_none(pte_t pte)
|
|
+{
|
|
+ return !pte.pte;
|
|
+}
|
|
+
|
|
+#define __HAVE_ARCH_PTE_SAME
|
|
+static inline int pte_same(pte_t a, pte_t b)
|
|
+{
|
|
+ return a.pte == b.pte;
|
|
+}
|
|
+
|
|
+static inline int pte_present(pte_t a)
|
|
+{
|
|
+ return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE);
|
|
+}
|
|
+
|
|
+static inline int pmd_present(pmd_t pmd)
|
|
+{
|
|
+#if CONFIG_XEN_COMPAT <= 0x030002
|
|
+/* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t.
|
|
+ can temporarily clear it. */
|
|
+ return __pmd_val(pmd) != 0;
|
|
#else
|
|
-static inline void xen_pagetable_setup_start(pgd_t *base) {}
|
|
-static inline void xen_pagetable_setup_done(pgd_t *base) {}
|
|
+ return pmd_flags(pmd) & _PAGE_PRESENT;
|
|
#endif
|
|
+}
|
|
|
|
-struct seq_file;
|
|
-extern void arch_report_meminfo(struct seq_file *m);
|
|
+static inline int pmd_none(pmd_t pmd)
|
|
+{
|
|
+ /* Only check low word on 32-bit platforms, since it might be
|
|
+ out of sync with upper half. */
|
|
+ return (unsigned long)__pmd_val(pmd) == 0;
|
|
+}
|
|
|
|
-#define set_pte(ptep, pte) xen_set_pte(ptep, pte)
|
|
-#define set_pte_at(mm, addr, ptep, pte) xen_set_pte_at(mm, addr, ptep, pte)
|
|
+static inline unsigned long pmd_page_vaddr(pmd_t pmd)
|
|
+{
|
|
+ return (unsigned long)__va(pmd_val(pmd) & PTE_PFN_MASK);
|
|
+}
|
|
|
|
-#define set_pte_atomic(ptep, pte) \
|
|
- xen_set_pte_atomic(ptep, pte)
|
|
+/*
|
|
+ * Currently stuck as a macro due to indirect forward reference to
|
|
+ * linux/mmzone.h's __section_mem_map_addr() definition:
|
|
+ */
|
|
+#define pmd_page(pmd) pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)
|
|
|
|
-#define set_pmd(pmdp, pmd) xen_set_pmd(pmdp, pmd)
|
|
+/*
|
|
+ * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD]
|
|
+ *
|
|
+ * this macro returns the index of the entry in the pmd page which would
|
|
+ * control the given virtual address
|
|
+ */
|
|
+static inline unsigned pmd_index(unsigned long address)
|
|
+{
|
|
+ return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1);
|
|
+}
|
|
|
|
-#ifndef __PAGETABLE_PUD_FOLDED
|
|
-#define set_pgd(pgdp, pgd) xen_set_pgd(pgdp, pgd)
|
|
-#define pgd_clear(pgd) xen_pgd_clear(pgd)
|
|
-#endif
|
|
+/*
|
|
+ * Conversion functions: convert a page and protection to a page entry,
|
|
+ * and a page entry and page directory to the page they refer to.
|
|
+ *
|
|
+ * (Currently stuck as a macro because of indirect forward reference
|
|
+ * to linux/mm.h:page_to_nid())
|
|
+ */
|
|
+#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
|
|
|
|
-#ifndef set_pud
|
|
-# define set_pud(pudp, pud) xen_set_pud(pudp, pud)
|
|
-#endif
|
|
+/*
|
|
+ * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE]
|
|
+ *
|
|
+ * this function returns the index of the entry in the pte page which would
|
|
+ * control the given virtual address
|
|
+ */
|
|
+static inline unsigned pte_index(unsigned long address)
|
|
+{
|
|
+ return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
|
|
+}
|
|
|
|
-#ifndef __PAGETABLE_PMD_FOLDED
|
|
-#define pud_clear(pud) xen_pud_clear(pud)
|
|
+static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address)
|
|
+{
|
|
+ return (pte_t *)pmd_page_vaddr(*pmd) + pte_index(address);
|
|
+}
|
|
+
|
|
+static inline int pmd_bad(pmd_t pmd)
|
|
+{
|
|
+#if CONFIG_XEN_COMPAT <= 0x030002
|
|
+ return (pmd_flags(pmd) & ~_PAGE_USER & ~_PAGE_PRESENT)
|
|
+ != (_KERNPG_TABLE & ~_PAGE_PRESENT);
|
|
+#else
|
|
+ return (pmd_flags(pmd) & ~_PAGE_USER) != _KERNPG_TABLE;
|
|
#endif
|
|
+}
|
|
|
|
-#define pte_clear(mm, addr, ptep) xen_pte_clear(mm, addr, ptep)
|
|
-#define pmd_clear(pmd) xen_pmd_clear(pmd)
|
|
+static inline unsigned long pages_to_mb(unsigned long npg)
|
|
+{
|
|
+ return npg >> (20 - PAGE_SHIFT);
|
|
+}
|
|
|
|
-#define pte_update(mm, addr, ptep) do { } while (0)
|
|
-#define pte_update_defer(mm, addr, ptep) do { } while (0)
|
|
+#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
|
|
+ direct_remap_pfn_range(vma, vaddr, pfn, size, prot, DOMID_IO)
|
|
|
|
-#endif /* __ASSEMBLY__ */
|
|
+#if PAGETABLE_LEVELS > 2
|
|
+static inline int pud_none(pud_t pud)
|
|
+{
|
|
+ return __pud_val(pud) == 0;
|
|
+}
|
|
|
|
-#ifdef CONFIG_X86_32
|
|
-# include "pgtable_32.h"
|
|
+static inline int pud_present(pud_t pud)
|
|
+{
|
|
+ return pud_flags(pud) & _PAGE_PRESENT;
|
|
+}
|
|
+
|
|
+static inline unsigned long pud_page_vaddr(pud_t pud)
|
|
+{
|
|
+ return (unsigned long)__va((unsigned long)pud_val(pud) & PTE_PFN_MASK);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Currently stuck as a macro due to indirect forward reference to
|
|
+ * linux/mmzone.h's __section_mem_map_addr() definition:
|
|
+ */
|
|
+#define pud_page(pud) pfn_to_page(pud_val(pud) >> PAGE_SHIFT)
|
|
+
|
|
+/* Find an entry in the second-level page table.. */
|
|
+static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
|
|
+{
|
|
+ return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address);
|
|
+}
|
|
+
|
|
+static inline unsigned long pmd_pfn(pmd_t pmd)
|
|
+{
|
|
+ return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT;
|
|
+}
|
|
+
|
|
+static inline int pud_large(pud_t pud)
|
|
+{
|
|
+ return (__pud_val(pud) & (_PAGE_PSE | _PAGE_PRESENT)) ==
|
|
+ (_PAGE_PSE | _PAGE_PRESENT);
|
|
+}
|
|
+
|
|
+static inline int pud_bad(pud_t pud)
|
|
+{
|
|
+ return (pud_flags(pud) & ~(_KERNPG_TABLE | _PAGE_USER)) != 0;
|
|
+}
|
|
#else
|
|
-# include "pgtable_64.h"
|
|
-#endif
|
|
+static inline int pud_large(pud_t pud)
|
|
+{
|
|
+ return 0;
|
|
+}
|
|
+#endif /* PAGETABLE_LEVELS > 2 */
|
|
+
|
|
+#if PAGETABLE_LEVELS > 3
|
|
+static inline int pgd_present(pgd_t pgd)
|
|
+{
|
|
+ return pgd_flags(pgd) & _PAGE_PRESENT;
|
|
+}
|
|
+
|
|
+static inline unsigned long pgd_page_vaddr(pgd_t pgd)
|
|
+{
|
|
+ return (unsigned long)__va((unsigned long)pgd_val(pgd) & PTE_PFN_MASK);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Currently stuck as a macro due to indirect forward reference to
|
|
+ * linux/mmzone.h's __section_mem_map_addr() definition:
|
|
+ */
|
|
+#define pgd_page(pgd) pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT)
|
|
+
|
|
+/* to find an entry in a page-table-directory. */
|
|
+static inline unsigned pud_index(unsigned long address)
|
|
+{
|
|
+ return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1);
|
|
+}
|
|
+
|
|
+static inline pud_t *pud_offset(pgd_t *pgd, unsigned long address)
|
|
+{
|
|
+ return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(address);
|
|
+}
|
|
+
|
|
+static inline int pgd_bad(pgd_t pgd)
|
|
+{
|
|
+ return (pgd_flags(pgd) & ~_PAGE_USER) != _KERNPG_TABLE;
|
|
+}
|
|
+
|
|
+static inline int pgd_none(pgd_t pgd)
|
|
+{
|
|
+ return !__pgd_val(pgd);
|
|
+}
|
|
+#endif /* PAGETABLE_LEVELS > 3 */
|
|
+
|
|
+#endif /* __ASSEMBLY__ */
|
|
|
|
/*
|
|
* the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD]
|
|
@@ -479,28 +512,6 @@ extern void arch_report_meminfo(struct s
|
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
-enum {
|
|
- PG_LEVEL_NONE,
|
|
- PG_LEVEL_4K,
|
|
- PG_LEVEL_2M,
|
|
- PG_LEVEL_1G,
|
|
- PG_LEVEL_NUM
|
|
-};
|
|
-
|
|
-#ifdef CONFIG_PROC_FS
|
|
-extern void update_page_count(int level, unsigned long pages);
|
|
-#else
|
|
-static inline void update_page_count(int level, unsigned long pages) { }
|
|
-#endif
|
|
-
|
|
-/*
|
|
- * Helper function that returns the kernel pagetable entry controlling
|
|
- * the virtual address 'address'. NULL means no pagetable entry present.
|
|
- * NOTE: the return type is pte_t but if the pmd is PSE then we return it
|
|
- * as a pte too.
|
|
- */
|
|
-extern pte_t *lookup_address(unsigned long address, unsigned int *level);
|
|
-
|
|
/* local pte updates need not use xchg for locking */
|
|
static inline pte_t xen_local_ptep_get_and_clear(pte_t *ptep, pte_t res)
|
|
{
|
|
@@ -633,15 +644,18 @@ static inline void clone_pgd_range(pgd_t
|
|
memcpy(dst, src, count * sizeof(pgd_t));
|
|
}
|
|
|
|
-#define arbitrary_virt_to_machine(va) \
|
|
+#define arbitrary_virt_to_mfn(va) \
|
|
({ \
|
|
unsigned int __lvl; \
|
|
pte_t *__ptep = lookup_address((unsigned long)(va), &__lvl); \
|
|
BUG_ON(!__ptep || __lvl != PG_LEVEL_4K || !pte_present(*__ptep));\
|
|
- (((maddr_t)pte_mfn(*__ptep) << PAGE_SHIFT) \
|
|
- | ((unsigned long)(va) & (PAGE_SIZE - 1))); \
|
|
+ pte_mfn(*__ptep); \
|
|
})
|
|
|
|
+#define arbitrary_virt_to_machine(va) \
|
|
+ (((maddr_t)arbitrary_virt_to_mfn(va) << PAGE_SHIFT) \
|
|
+ | ((unsigned long)(va) & (PAGE_SIZE - 1)))
|
|
+
|
|
#ifdef CONFIG_HIGHPTE
|
|
#include <asm/io.h>
|
|
struct page *kmap_atomic_to_page(void *);
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/pgtable-3level.h 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/pgtable-3level.h 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -20,21 +20,6 @@
|
|
__FILE__, __LINE__, &(e), __pgd_val(e), \
|
|
(pgd_val(e) & PTE_PFN_MASK) >> PAGE_SHIFT)
|
|
|
|
-static inline int pud_none(pud_t pud)
|
|
-{
|
|
- return __pud_val(pud) == 0;
|
|
-
|
|
-}
|
|
-static inline int pud_bad(pud_t pud)
|
|
-{
|
|
- return (__pud_val(pud) & ~(PTE_PFN_MASK | _KERNPG_TABLE | _PAGE_USER)) != 0;
|
|
-}
|
|
-
|
|
-static inline int pud_present(pud_t pud)
|
|
-{
|
|
- return __pud_val(pud) & _PAGE_PRESENT;
|
|
-}
|
|
-
|
|
/* Rules for using set_pte: the pte being assigned *must* be
|
|
* either not present or in a state where the hardware will
|
|
* not attempt to update the pte. In places where this is
|
|
@@ -102,15 +87,6 @@ static inline void pud_clear(pud_t *pudp
|
|
xen_tlb_flush();
|
|
}
|
|
|
|
-#define pud_page(pud) pfn_to_page(pud_val(pud) >> PAGE_SHIFT)
|
|
-
|
|
-#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PTE_PFN_MASK))
|
|
-
|
|
-
|
|
-/* Find an entry in the second-level page table.. */
|
|
-#define pmd_offset(pud, address) ((pmd_t *)pud_page_vaddr(*(pud)) + \
|
|
- pmd_index(address))
|
|
-
|
|
#ifdef CONFIG_SMP
|
|
static inline pte_t xen_ptep_get_and_clear(pte_t *ptep, pte_t res)
|
|
{
|
|
@@ -127,17 +103,6 @@ static inline pte_t xen_ptep_get_and_cle
|
|
#define xen_ptep_get_and_clear(xp, pte) xen_local_ptep_get_and_clear(xp, pte)
|
|
#endif
|
|
|
|
-#define __HAVE_ARCH_PTE_SAME
|
|
-static inline int pte_same(pte_t a, pte_t b)
|
|
-{
|
|
- return a.pte_low == b.pte_low && a.pte_high == b.pte_high;
|
|
-}
|
|
-
|
|
-static inline int pte_none(pte_t pte)
|
|
-{
|
|
- return !(pte.pte_low | pte.pte_high);
|
|
-}
|
|
-
|
|
#define __pte_mfn(_pte) (((_pte).pte_low >> PAGE_SHIFT) | \
|
|
((_pte).pte_high << (32-PAGE_SHIFT)))
|
|
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/pgtable-3level-defs.h 2010-03-24 15:14:47.000000000 +0100
|
|
+++ /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
@@ -1,24 +0,0 @@
|
|
-#ifndef _ASM_X86_PGTABLE_3LEVEL_DEFS_H
|
|
-#define _ASM_X86_PGTABLE_3LEVEL_DEFS_H
|
|
-
|
|
-#define SHARED_KERNEL_PMD 0
|
|
-
|
|
-/*
|
|
- * PGDIR_SHIFT determines what a top-level page table entry can map
|
|
- */
|
|
-#define PGDIR_SHIFT 30
|
|
-#define PTRS_PER_PGD 4
|
|
-
|
|
-/*
|
|
- * PMD_SHIFT determines the size of the area a middle-level
|
|
- * page table can map
|
|
- */
|
|
-#define PMD_SHIFT 21
|
|
-#define PTRS_PER_PMD 512
|
|
-
|
|
-/*
|
|
- * entries per page directory level
|
|
- */
|
|
-#define PTRS_PER_PTE 512
|
|
-
|
|
-#endif /* _ASM_X86_PGTABLE_3LEVEL_DEFS_H */
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/pgtable-3level_types.h 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -0,0 +1,44 @@
|
|
+#ifndef _ASM_X86_PGTABLE_3LEVEL_DEFS_H
|
|
+#define _ASM_X86_PGTABLE_3LEVEL_DEFS_H
|
|
+
|
|
+#ifndef __ASSEMBLY__
|
|
+#include <linux/types.h>
|
|
+
|
|
+typedef u64 pteval_t;
|
|
+typedef u64 pmdval_t;
|
|
+typedef u64 pudval_t;
|
|
+typedef u64 pgdval_t;
|
|
+typedef u64 pgprotval_t;
|
|
+
|
|
+typedef union {
|
|
+ struct {
|
|
+ unsigned long pte_low, pte_high;
|
|
+ };
|
|
+ pteval_t pte;
|
|
+} pte_t;
|
|
+#endif /* !__ASSEMBLY__ */
|
|
+
|
|
+#define SHARED_KERNEL_PMD 0
|
|
+
|
|
+#define PAGETABLE_LEVELS 3
|
|
+
|
|
+/*
|
|
+ * PGDIR_SHIFT determines what a top-level page table entry can map
|
|
+ */
|
|
+#define PGDIR_SHIFT 30
|
|
+#define PTRS_PER_PGD 4
|
|
+
|
|
+/*
|
|
+ * PMD_SHIFT determines the size of the area a middle-level
|
|
+ * page table can map
|
|
+ */
|
|
+#define PMD_SHIFT 21
|
|
+#define PTRS_PER_PMD 512
|
|
+
|
|
+/*
|
|
+ * entries per page directory level
|
|
+ */
|
|
+#define PTRS_PER_PTE 512
|
|
+
|
|
+
|
|
+#endif /* _ASM_X86_PGTABLE_3LEVEL_DEFS_H */
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/pgtable_32.h 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/pgtable_32.h 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -1,6 +1,8 @@
|
|
#ifndef _ASM_X86_PGTABLE_32_H
|
|
#define _ASM_X86_PGTABLE_32_H
|
|
|
|
+#include <asm/pgtable_32_types.h>
|
|
+
|
|
/*
|
|
* The Linux memory management assumes a three-level page table setup. On
|
|
* the i386, we use that, but "fold" the mid level into the top-level page
|
|
@@ -31,47 +33,6 @@ void paging_init(void);
|
|
|
|
extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t);
|
|
|
|
-/*
|
|
- * The Linux x86 paging architecture is 'compile-time dual-mode', it
|
|
- * implements both the traditional 2-level x86 page tables and the
|
|
- * newer 3-level PAE-mode page tables.
|
|
- */
|
|
-#ifdef CONFIG_X86_PAE
|
|
-# include <asm/pgtable-3level-defs.h>
|
|
-# define PMD_SIZE (1UL << PMD_SHIFT)
|
|
-# define PMD_MASK (~(PMD_SIZE - 1))
|
|
-#else
|
|
-# include <asm/pgtable-2level-defs.h>
|
|
-#endif
|
|
-
|
|
-#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
|
|
-#define PGDIR_MASK (~(PGDIR_SIZE - 1))
|
|
-
|
|
-/* Just any arbitrary offset to the start of the vmalloc VM area: the
|
|
- * current 8MB value just means that there will be a 8MB "hole" after the
|
|
- * physical memory until the kernel virtual memory starts. That means that
|
|
- * any out-of-bounds memory accesses will hopefully be caught.
|
|
- * The vmalloc() routines leaves a hole of 4kB between each vmalloced
|
|
- * area for the same reason. ;)
|
|
- */
|
|
-#define VMALLOC_OFFSET (8 * 1024 * 1024)
|
|
-#define VMALLOC_START ((unsigned long)high_memory + VMALLOC_OFFSET)
|
|
-#ifdef CONFIG_X86_PAE
|
|
-#define LAST_PKMAP 512
|
|
-#else
|
|
-#define LAST_PKMAP 1024
|
|
-#endif
|
|
-
|
|
-#define PKMAP_BASE ((FIXADDR_BOOT_START - PAGE_SIZE * (LAST_PKMAP + 1)) \
|
|
- & PMD_MASK)
|
|
-
|
|
-#ifdef CONFIG_HIGHMEM
|
|
-# define VMALLOC_END (PKMAP_BASE - 2 * PAGE_SIZE)
|
|
-#else
|
|
-# define VMALLOC_END (FIXADDR_START - 2 * PAGE_SIZE)
|
|
-#endif
|
|
-
|
|
-#define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE)
|
|
|
|
/*
|
|
* Define this if things work differently on an i386 and an i486:
|
|
@@ -80,66 +41,12 @@ extern void set_pmd_pfn(unsigned long, u
|
|
*/
|
|
#undef TEST_ACCESS_OK
|
|
|
|
-/* The boot page tables (all created as a single array) */
|
|
-extern unsigned long pg0[];
|
|
-
|
|
-#define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE))
|
|
-
|
|
-/* To avoid harmful races, pmd_none(x) should check only the lower when PAE */
|
|
-#define pmd_none(x) (!(unsigned long)__pmd_val(x))
|
|
-#if CONFIG_XEN_COMPAT <= 0x030002
|
|
-/* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t.
|
|
- can temporarily clear it. */
|
|
-#define pmd_present(x) (__pmd_val(x))
|
|
-#define pmd_bad(x) ((__pmd_val(x) & (PTE_FLAGS_MASK & ~_PAGE_USER & ~_PAGE_PRESENT)) != (_KERNPG_TABLE & ~_PAGE_PRESENT))
|
|
-#else
|
|
-#define pmd_present(x) (__pmd_val(x) & _PAGE_PRESENT)
|
|
-#define pmd_bad(x) ((__pmd_val(x) & (PTE_FLAGS_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
|
|
-#endif
|
|
-
|
|
-
|
|
-#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))
|
|
-
|
|
#ifdef CONFIG_X86_PAE
|
|
# include <asm/pgtable-3level.h>
|
|
#else
|
|
# include <asm/pgtable-2level.h>
|
|
#endif
|
|
|
|
-/*
|
|
- * Conversion functions: convert a page and protection to a page entry,
|
|
- * and a page entry and page directory to the page they refer to.
|
|
- */
|
|
-#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
|
|
-
|
|
-
|
|
-static inline int pud_large(pud_t pud) { return 0; }
|
|
-
|
|
-/*
|
|
- * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD]
|
|
- *
|
|
- * this macro returns the index of the entry in the pmd page which would
|
|
- * control the given virtual address
|
|
- */
|
|
-#define pmd_index(address) \
|
|
- (((address) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
|
|
-
|
|
-/*
|
|
- * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE]
|
|
- *
|
|
- * this macro returns the index of the entry in the pte page which would
|
|
- * control the given virtual address
|
|
- */
|
|
-#define pte_index(address) \
|
|
- (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
|
|
-#define pte_offset_kernel(dir, address) \
|
|
- ((pte_t *)pmd_page_vaddr(*(dir)) + pte_index((address)))
|
|
-
|
|
-#define pmd_page(pmd) (pfn_to_page(pmd_val((pmd)) >> PAGE_SHIFT))
|
|
-
|
|
-#define pmd_page_vaddr(pmd) \
|
|
- ((unsigned long)__va(pmd_val((pmd)) & PTE_PFN_MASK))
|
|
-
|
|
#if defined(CONFIG_HIGHPTE)
|
|
#define pte_offset_map(dir, address) \
|
|
((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE0) + \
|
|
@@ -185,7 +92,4 @@ void make_lowmem_page_writable(void *va,
|
|
#define kern_addr_valid(kaddr) (0)
|
|
#endif
|
|
|
|
-#define io_remap_pfn_range(vma, from, pfn, size, prot) \
|
|
- direct_remap_pfn_range(vma, from, pfn, size, prot, DOMID_IO)
|
|
-
|
|
#endif /* _ASM_X86_PGTABLE_32_H */
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/pgtable_64.h 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/pgtable_64.h 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -2,6 +2,8 @@
|
|
#define _ASM_X86_PGTABLE_64_H
|
|
|
|
#include <linux/const.h>
|
|
+#include <asm/pgtable_64_types.h>
|
|
+
|
|
#ifndef __ASSEMBLY__
|
|
|
|
/*
|
|
@@ -12,12 +14,12 @@
|
|
#include <linux/bitops.h>
|
|
#include <linux/threads.h>
|
|
#include <linux/sched.h>
|
|
-#include <asm/pda.h>
|
|
|
|
#ifdef CONFIG_XEN
|
|
extern pud_t level3_user_pgt[512];
|
|
|
|
extern void xen_init_pt(void);
|
|
+extern void xen_switch_pt(void);
|
|
#endif
|
|
|
|
extern pud_t level3_kernel_pgt[512];
|
|
@@ -33,39 +35,13 @@ extern void paging_init(void);
|
|
|
|
#endif /* !__ASSEMBLY__ */
|
|
|
|
-#define SHARED_KERNEL_PMD 0
|
|
-
|
|
-/*
|
|
- * PGDIR_SHIFT determines what a top-level page table entry can map
|
|
- */
|
|
-#define PGDIR_SHIFT 39
|
|
-#define PTRS_PER_PGD 512
|
|
-
|
|
-/*
|
|
- * 3rd level page
|
|
- */
|
|
-#define PUD_SHIFT 30
|
|
-#define PTRS_PER_PUD 512
|
|
-
|
|
-/*
|
|
- * PMD_SHIFT determines the size of the area a middle-level
|
|
- * page table can map
|
|
- */
|
|
-#define PMD_SHIFT 21
|
|
-#define PTRS_PER_PMD 512
|
|
-
|
|
-/*
|
|
- * entries per page directory level
|
|
- */
|
|
-#define PTRS_PER_PTE 512
|
|
-
|
|
#ifndef __ASSEMBLY__
|
|
|
|
#define pte_ERROR(e) \
|
|
printk("%s:%d: bad pte %p(%016lx pfn %010lx).\n", \
|
|
__FILE__, __LINE__, &(e), __pte_val(e), pte_pfn(e))
|
|
#define pmd_ERROR(e) \
|
|
- printk("%s:%d: bad pmd %p(%016lx pfn %010Lx).\n", \
|
|
+ printk("%s:%d: bad pmd %p(%016lx pfn %010lx).\n", \
|
|
__FILE__, __LINE__, &(e), __pmd_val(e), pmd_pfn(e))
|
|
#define pud_ERROR(e) \
|
|
printk("%s:%d: bad pud %p(%016lx pfn %010Lx).\n", \
|
|
@@ -76,9 +52,6 @@ extern void paging_init(void);
|
|
__FILE__, __LINE__, &(e), __pgd_val(e), \
|
|
(pgd_val(e) & __PHYSICAL_MASK) >> PAGE_SHIFT)
|
|
|
|
-#define pgd_none(x) (!__pgd_val(x))
|
|
-#define pud_none(x) (!__pud_val(x))
|
|
-
|
|
struct mm_struct;
|
|
|
|
void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte);
|
|
@@ -138,48 +111,6 @@ static inline void xen_pgd_clear(pgd_t *
|
|
xen_set_pgd(__user_pgd(pgd), xen_make_pgd(0));
|
|
}
|
|
|
|
-#define pte_same(a, b) ((a).pte == (b).pte)
|
|
-
|
|
-#endif /* !__ASSEMBLY__ */
|
|
-
|
|
-#define PMD_SIZE (_AC(1, UL) << PMD_SHIFT)
|
|
-#define PMD_MASK (~(PMD_SIZE - 1))
|
|
-#define PUD_SIZE (_AC(1, UL) << PUD_SHIFT)
|
|
-#define PUD_MASK (~(PUD_SIZE - 1))
|
|
-#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
|
|
-#define PGDIR_MASK (~(PGDIR_SIZE - 1))
|
|
-
|
|
-#define MAX_PHYSMEM_BITS 43
|
|
-#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
|
|
-#define VMALLOC_START _AC(0xffffc20000000000, UL)
|
|
-#define VMALLOC_END _AC(0xffffe1ffffffffff, UL)
|
|
-#define VMEMMAP_START _AC(0xffffe20000000000, UL)
|
|
-#define MODULES_VADDR _AC(0xffffffffa0000000, UL)
|
|
-#define MODULES_END _AC(0xffffffffff000000, UL)
|
|
-#define MODULES_LEN (MODULES_END - MODULES_VADDR)
|
|
-
|
|
-#ifndef __ASSEMBLY__
|
|
-
|
|
-static inline int pgd_bad(pgd_t pgd)
|
|
-{
|
|
- return (__pgd_val(pgd) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE;
|
|
-}
|
|
-
|
|
-static inline int pud_bad(pud_t pud)
|
|
-{
|
|
- return (__pud_val(pud) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE;
|
|
-}
|
|
-
|
|
-static inline int pmd_bad(pmd_t pmd)
|
|
-{
|
|
- return (__pmd_val(pmd) & ~(PTE_PFN_MASK | _PAGE_USER)) != _KERNPG_TABLE;
|
|
-}
|
|
-
|
|
-#define pte_none(x) (!(x).pte)
|
|
-#define pte_present(x) ((x).pte & (_PAGE_PRESENT | _PAGE_PROTNONE))
|
|
-
|
|
-#define pages_to_mb(x) ((x) >> (20 - PAGE_SHIFT)) /* FIXME: is this right? */
|
|
-
|
|
#define __pte_mfn(_pte) (((_pte).pte & PTE_PFN_MASK) >> PAGE_SHIFT)
|
|
|
|
/*
|
|
@@ -190,47 +121,12 @@ static inline int pmd_bad(pmd_t pmd)
|
|
/*
|
|
* Level 4 access.
|
|
*/
|
|
-#define pgd_page_vaddr(pgd) \
|
|
- ((unsigned long)__va((unsigned long)pgd_val((pgd)) & PTE_PFN_MASK))
|
|
-#define pgd_page(pgd) (pfn_to_page(pgd_val((pgd)) >> PAGE_SHIFT))
|
|
-#define pgd_present(pgd) (__pgd_val(pgd) & _PAGE_PRESENT)
|
|
static inline int pgd_large(pgd_t pgd) { return 0; }
|
|
#define mk_kernel_pgd(address) __pgd((address) | _KERNPG_TABLE)
|
|
|
|
/* PUD - Level3 access */
|
|
-/* to find an entry in a page-table-directory. */
|
|
-#define pud_page_vaddr(pud) \
|
|
- ((unsigned long)__va(pud_val((pud)) & PHYSICAL_PAGE_MASK))
|
|
-#define pud_page(pud) (pfn_to_page(pud_val((pud)) >> PAGE_SHIFT))
|
|
-#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
|
|
-#define pud_offset(pgd, address) \
|
|
- ((pud_t *)pgd_page_vaddr(*(pgd)) + pud_index((address)))
|
|
-#define pud_present(pud) (__pud_val(pud) & _PAGE_PRESENT)
|
|
-
|
|
-static inline int pud_large(pud_t pte)
|
|
-{
|
|
- return (__pud_val(pte) & (_PAGE_PSE | _PAGE_PRESENT)) ==
|
|
- (_PAGE_PSE | _PAGE_PRESENT);
|
|
-}
|
|
|
|
/* PMD - Level 2 access */
|
|
-#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val((pmd)) & PTE_PFN_MASK))
|
|
-#define pmd_page(pmd) (pfn_to_page(pmd_val((pmd)) >> PAGE_SHIFT))
|
|
-
|
|
-#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
|
|
-#define pmd_offset(dir, address) ((pmd_t *)pud_page_vaddr(*(dir)) + \
|
|
- pmd_index(address))
|
|
-#define pmd_none(x) (!__pmd_val(x))
|
|
-#if CONFIG_XEN_COMPAT <= 0x030002
|
|
-/* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t.
|
|
- can temporarily clear it. */
|
|
-#define pmd_present(x) (__pmd_val(x))
|
|
-#else
|
|
-#define pmd_present(x) (__pmd_val(x) & _PAGE_PRESENT)
|
|
-#endif
|
|
-#define pfn_pmd(nr, prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val((prot))))
|
|
-#define pmd_pfn(x) ((pmd_val((x)) & __PHYSICAL_MASK) >> PAGE_SHIFT)
|
|
-
|
|
#define pte_to_pgoff(pte) ((__pte_val(pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT)
|
|
#define pgoff_to_pte(off) ((pte_t) { .pte = ((off) << PAGE_SHIFT) | \
|
|
_PAGE_FILE })
|
|
@@ -238,13 +134,6 @@ static inline int pud_large(pud_t pte)
|
|
|
|
/* PTE - Level 1 access. */
|
|
|
|
-/* page, protection -> pte */
|
|
-#define mk_pte(page, pgprot) pfn_pte(page_to_pfn((page)), (pgprot))
|
|
-
|
|
-#define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
|
|
-#define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_vaddr(*(dir)) + \
|
|
- pte_index((address)))
|
|
-
|
|
/* x86-64 always has all page tables mapped. */
|
|
#define pte_offset_map(dir, address) pte_offset_kernel((dir), (address))
|
|
#define pte_offset_map_nested(dir, address) pte_offset_kernel((dir), (address))
|
|
@@ -278,9 +167,6 @@ static inline int pud_large(pud_t pte)
|
|
extern int kern_addr_valid(unsigned long addr);
|
|
extern void cleanup_highmap(void);
|
|
|
|
-#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
|
|
- direct_remap_pfn_range(vma, vaddr, pfn, size, prot, DOMID_IO)
|
|
-
|
|
#define HAVE_ARCH_UNMAPPED_AREA
|
|
#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
|
|
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/pgtable_64_types.h 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -0,0 +1,63 @@
|
|
+#ifndef _ASM_X86_PGTABLE_64_DEFS_H
|
|
+#define _ASM_X86_PGTABLE_64_DEFS_H
|
|
+
|
|
+#ifndef __ASSEMBLY__
|
|
+#include <linux/types.h>
|
|
+
|
|
+/*
|
|
+ * These are used to make use of C type-checking..
|
|
+ */
|
|
+typedef unsigned long pteval_t;
|
|
+typedef unsigned long pmdval_t;
|
|
+typedef unsigned long pudval_t;
|
|
+typedef unsigned long pgdval_t;
|
|
+typedef unsigned long pgprotval_t;
|
|
+
|
|
+typedef union { pteval_t pte; unsigned int pte_low; } pte_t;
|
|
+
|
|
+#endif /* !__ASSEMBLY__ */
|
|
+
|
|
+#define SHARED_KERNEL_PMD 0
|
|
+#define PAGETABLE_LEVELS 4
|
|
+
|
|
+/*
|
|
+ * PGDIR_SHIFT determines what a top-level page table entry can map
|
|
+ */
|
|
+#define PGDIR_SHIFT 39
|
|
+#define PTRS_PER_PGD 512
|
|
+
|
|
+/*
|
|
+ * 3rd level page
|
|
+ */
|
|
+#define PUD_SHIFT 30
|
|
+#define PTRS_PER_PUD 512
|
|
+
|
|
+/*
|
|
+ * PMD_SHIFT determines the size of the area a middle-level
|
|
+ * page table can map
|
|
+ */
|
|
+#define PMD_SHIFT 21
|
|
+#define PTRS_PER_PMD 512
|
|
+
|
|
+/*
|
|
+ * entries per page directory level
|
|
+ */
|
|
+#define PTRS_PER_PTE 512
|
|
+
|
|
+#define PMD_SIZE (_AC(1, UL) << PMD_SHIFT)
|
|
+#define PMD_MASK (~(PMD_SIZE - 1))
|
|
+#define PUD_SIZE (_AC(1, UL) << PUD_SHIFT)
|
|
+#define PUD_MASK (~(PUD_SIZE - 1))
|
|
+#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT)
|
|
+#define PGDIR_MASK (~(PGDIR_SIZE - 1))
|
|
+
|
|
+#define MAX_PHYSMEM_BITS 43
|
|
+#define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL)
|
|
+#define VMALLOC_START _AC(0xffffc20000000000, UL)
|
|
+#define VMALLOC_END _AC(0xffffe1ffffffffff, UL)
|
|
+#define VMEMMAP_START _AC(0xffffe20000000000, UL)
|
|
+#define MODULES_VADDR _AC(0xffffffffa0000000, UL)
|
|
+#define MODULES_END _AC(0xffffffffff000000, UL)
|
|
+#define MODULES_LEN (MODULES_END - MODULES_VADDR)
|
|
+
|
|
+#endif /* _ASM_X86_PGTABLE_64_DEFS_H */
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/pgtable_types.h 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -0,0 +1,388 @@
|
|
+#ifndef _ASM_X86_PGTABLE_DEFS_H
|
|
+#define _ASM_X86_PGTABLE_DEFS_H
|
|
+
|
|
+#include <linux/const.h>
|
|
+#include <asm/page_types.h>
|
|
+
|
|
+#define FIRST_USER_ADDRESS 0
|
|
+
|
|
+#define _PAGE_BIT_PRESENT 0 /* is present */
|
|
+#define _PAGE_BIT_RW 1 /* writeable */
|
|
+#define _PAGE_BIT_USER 2 /* userspace addressable */
|
|
+#define _PAGE_BIT_PWT 3 /* page write through */
|
|
+#define _PAGE_BIT_PCD 4 /* page cache disabled */
|
|
+#define _PAGE_BIT_ACCESSED 5 /* was accessed (raised by CPU) */
|
|
+#define _PAGE_BIT_DIRTY 6 /* was written to (raised by CPU) */
|
|
+#define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */
|
|
+#define _PAGE_BIT_PAT 7 /* on 4KB pages */
|
|
+#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */
|
|
+#define _PAGE_BIT_UNUSED1 9 /* available for programmer */
|
|
+#define _PAGE_BIT_IOMAP 10 /* flag used to indicate IO mapping */
|
|
+#define _PAGE_BIT_UNUSED3 11
|
|
+#define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */
|
|
+#define _PAGE_BIT_SPECIAL _PAGE_BIT_UNUSED1
|
|
+#define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1
|
|
+#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */
|
|
+
|
|
+/* If _PAGE_BIT_PRESENT is clear, we use these: */
|
|
+/* - if the user mapped it with PROT_NONE; pte_present gives true */
|
|
+#define _PAGE_BIT_PROTNONE _PAGE_BIT_GLOBAL
|
|
+/* - set: nonlinear file mapping, saved PTE; unset:swap */
|
|
+#define _PAGE_BIT_FILE _PAGE_BIT_DIRTY
|
|
+
|
|
+#define _PAGE_PRESENT (_AT(pteval_t, 1) << _PAGE_BIT_PRESENT)
|
|
+#define _PAGE_RW (_AT(pteval_t, 1) << _PAGE_BIT_RW)
|
|
+#define _PAGE_USER (_AT(pteval_t, 1) << _PAGE_BIT_USER)
|
|
+#define _PAGE_PWT (_AT(pteval_t, 1) << _PAGE_BIT_PWT)
|
|
+#define _PAGE_PCD (_AT(pteval_t, 1) << _PAGE_BIT_PCD)
|
|
+#define _PAGE_ACCESSED (_AT(pteval_t, 1) << _PAGE_BIT_ACCESSED)
|
|
+#define _PAGE_DIRTY (_AT(pteval_t, 1) << _PAGE_BIT_DIRTY)
|
|
+#define _PAGE_PSE (_AT(pteval_t, 1) << _PAGE_BIT_PSE)
|
|
+#define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
|
|
+#define _PAGE_UNUSED1 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1)
|
|
+#define _PAGE_IOMAP (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP)
|
|
+#define _PAGE_UNUSED3 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED3)
|
|
+#define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT)
|
|
+#define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE)
|
|
+#define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL)
|
|
+#define _PAGE_CPA_TEST (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST)
|
|
+#define __HAVE_ARCH_PTE_SPECIAL
|
|
+
|
|
+#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
|
|
+#define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX)
|
|
+#else
|
|
+#define _PAGE_NX (_AT(pteval_t, 0))
|
|
+#endif
|
|
+
|
|
+#define _PAGE_FILE (_AT(pteval_t, 1) << _PAGE_BIT_FILE)
|
|
+#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
|
|
+
|
|
+#ifndef __ASSEMBLY__
|
|
+#if defined(CONFIG_X86_64) && CONFIG_XEN_COMPAT <= 0x030002
|
|
+extern unsigned int __kernel_page_user;
|
|
+#else
|
|
+#define __kernel_page_user 0
|
|
+#endif
|
|
+#endif
|
|
+
|
|
+#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
|
|
+ _PAGE_ACCESSED | _PAGE_DIRTY)
|
|
+#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \
|
|
+ _PAGE_DIRTY | __kernel_page_user)
|
|
+
|
|
+/* Set of bits not changed in pte_modify */
|
|
+#define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_CACHE_MASK | _PAGE_IOMAP | \
|
|
+ _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY)
|
|
+
|
|
+/*
|
|
+ * PAT settings are part of the hypervisor interface, which sets the
|
|
+ * MSR to 0x050100070406 (i.e. WB, WT, UC-, UC, WC, WP [, UC, UC]).
|
|
+ */
|
|
+#define _PAGE_CACHE_MASK (_PAGE_PCD | _PAGE_PWT | _PAGE_PAT)
|
|
+#define _PAGE_CACHE_WB (0)
|
|
+#define _PAGE_CACHE_WT (_PAGE_PWT)
|
|
+#define _PAGE_CACHE_WC (_PAGE_PAT)
|
|
+#define _PAGE_CACHE_WP (_PAGE_PAT | _PAGE_PWT)
|
|
+#define _PAGE_CACHE_UC_MINUS (_PAGE_PCD)
|
|
+#define _PAGE_CACHE_UC (_PAGE_PCD | _PAGE_PWT)
|
|
+
|
|
+#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_ACCESSED)
|
|
+#define PAGE_SHARED __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
|
|
+ _PAGE_ACCESSED | _PAGE_NX)
|
|
+
|
|
+#define PAGE_SHARED_EXEC __pgprot(_PAGE_PRESENT | _PAGE_RW | \
|
|
+ _PAGE_USER | _PAGE_ACCESSED)
|
|
+#define PAGE_COPY_NOEXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \
|
|
+ _PAGE_ACCESSED | _PAGE_NX)
|
|
+#define PAGE_COPY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \
|
|
+ _PAGE_ACCESSED)
|
|
+#define PAGE_COPY PAGE_COPY_NOEXEC
|
|
+#define PAGE_READONLY __pgprot(_PAGE_PRESENT | _PAGE_USER | \
|
|
+ _PAGE_ACCESSED | _PAGE_NX)
|
|
+#define PAGE_READONLY_EXEC __pgprot(_PAGE_PRESENT | _PAGE_USER | \
|
|
+ _PAGE_ACCESSED)
|
|
+
|
|
+#define __PAGE_KERNEL_EXEC \
|
|
+ (_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | __kernel_page_user)
|
|
+#define __PAGE_KERNEL (__PAGE_KERNEL_EXEC | _PAGE_NX)
|
|
+
|
|
+#define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW)
|
|
+#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW)
|
|
+#define __PAGE_KERNEL_EXEC_NOCACHE (__PAGE_KERNEL_EXEC | _PAGE_PCD | _PAGE_PWT)
|
|
+#define __PAGE_KERNEL_WC (__PAGE_KERNEL | _PAGE_CACHE_WC)
|
|
+#define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD | _PAGE_PWT)
|
|
+#define __PAGE_KERNEL_UC_MINUS (__PAGE_KERNEL | _PAGE_PCD)
|
|
+#define __PAGE_KERNEL_VSYSCALL (__PAGE_KERNEL_RX | _PAGE_USER)
|
|
+#define __PAGE_KERNEL_VSYSCALL_NOCACHE (__PAGE_KERNEL_VSYSCALL | _PAGE_PCD | _PAGE_PWT)
|
|
+#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE)
|
|
+#define __PAGE_KERNEL_LARGE_NOCACHE (__PAGE_KERNEL | _PAGE_CACHE_UC | _PAGE_PSE)
|
|
+#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE)
|
|
+
|
|
+#define __PAGE_KERNEL_IO (__PAGE_KERNEL | _PAGE_IOMAP)
|
|
+#define __PAGE_KERNEL_IO_NOCACHE (__PAGE_KERNEL_NOCACHE | _PAGE_IOMAP)
|
|
+#define __PAGE_KERNEL_IO_UC_MINUS (__PAGE_KERNEL_UC_MINUS | _PAGE_IOMAP)
|
|
+#define __PAGE_KERNEL_IO_WC (__PAGE_KERNEL_WC | _PAGE_IOMAP)
|
|
+
|
|
+#define PAGE_KERNEL __pgprot(__PAGE_KERNEL)
|
|
+#define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO)
|
|
+#define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC)
|
|
+#define PAGE_KERNEL_RX __pgprot(__PAGE_KERNEL_RX)
|
|
+#define PAGE_KERNEL_WC __pgprot(__PAGE_KERNEL_WC)
|
|
+#define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE)
|
|
+#define PAGE_KERNEL_UC_MINUS __pgprot(__PAGE_KERNEL_UC_MINUS)
|
|
+#define PAGE_KERNEL_EXEC_NOCACHE __pgprot(__PAGE_KERNEL_EXEC_NOCACHE)
|
|
+#define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE)
|
|
+#define PAGE_KERNEL_LARGE_NOCACHE __pgprot(__PAGE_KERNEL_LARGE_NOCACHE)
|
|
+#define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC)
|
|
+#define PAGE_KERNEL_VSYSCALL __pgprot(__PAGE_KERNEL_VSYSCALL)
|
|
+#define PAGE_KERNEL_VSYSCALL_NOCACHE __pgprot(__PAGE_KERNEL_VSYSCALL_NOCACHE)
|
|
+
|
|
+#define PAGE_KERNEL_IO __pgprot(__PAGE_KERNEL_IO)
|
|
+#define PAGE_KERNEL_IO_NOCACHE __pgprot(__PAGE_KERNEL_IO_NOCACHE)
|
|
+#define PAGE_KERNEL_IO_UC_MINUS __pgprot(__PAGE_KERNEL_IO_UC_MINUS)
|
|
+#define PAGE_KERNEL_IO_WC __pgprot(__PAGE_KERNEL_IO_WC)
|
|
+
|
|
+/* xwr */
|
|
+#define __P000 PAGE_NONE
|
|
+#define __P001 PAGE_READONLY
|
|
+#define __P010 PAGE_COPY
|
|
+#define __P011 PAGE_COPY
|
|
+#define __P100 PAGE_READONLY_EXEC
|
|
+#define __P101 PAGE_READONLY_EXEC
|
|
+#define __P110 PAGE_COPY_EXEC
|
|
+#define __P111 PAGE_COPY_EXEC
|
|
+
|
|
+#define __S000 PAGE_NONE
|
|
+#define __S001 PAGE_READONLY
|
|
+#define __S010 PAGE_SHARED
|
|
+#define __S011 PAGE_SHARED
|
|
+#define __S100 PAGE_READONLY_EXEC
|
|
+#define __S101 PAGE_READONLY_EXEC
|
|
+#define __S110 PAGE_SHARED_EXEC
|
|
+#define __S111 PAGE_SHARED_EXEC
|
|
+
|
|
+/*
|
|
+ * early identity mapping pte attrib macros.
|
|
+ */
|
|
+#ifdef CONFIG_X86_64
|
|
+#define __PAGE_KERNEL_IDENT_LARGE_EXEC __PAGE_KERNEL_LARGE_EXEC
|
|
+#else
|
|
+/*
|
|
+ * For PDE_IDENT_ATTR include USER bit. As the PDE and PTE protection
|
|
+ * bits are combined, this will alow user to access the high address mapped
|
|
+ * VDSO in the presence of CONFIG_COMPAT_VDSO
|
|
+ */
|
|
+#define PTE_IDENT_ATTR 0x003 /* PRESENT+RW */
|
|
+#define PDE_IDENT_ATTR 0x067 /* PRESENT+RW+USER+DIRTY+ACCESSED */
|
|
+#define PGD_IDENT_ATTR 0x001 /* PRESENT (no other attributes) */
|
|
+#endif
|
|
+
|
|
+#ifdef CONFIG_X86_32
|
|
+# include <asm/pgtable_32_types.h>
|
|
+#else
|
|
+# include "pgtable_64_types.h"
|
|
+#endif
|
|
+
|
|
+#ifndef __ASSEMBLY__
|
|
+
|
|
+#include <linux/types.h>
|
|
+
|
|
+/* PTE_PFN_MASK extracts the PFN from a (pte|pmd|pud|pgd)val_t */
|
|
+#define PTE_PFN_MASK ((pteval_t)PHYSICAL_PAGE_MASK)
|
|
+
|
|
+/* PTE_FLAGS_MASK extracts the flags from a (pte|pmd|pud|pgd)val_t */
|
|
+#define PTE_FLAGS_MASK (~PTE_PFN_MASK)
|
|
+
|
|
+typedef struct pgprot { pgprotval_t pgprot; } pgprot_t;
|
|
+
|
|
+#include <asm/maddr.h>
|
|
+
|
|
+typedef struct { pgdval_t pgd; } pgd_t;
|
|
+
|
|
+#define __pgd_ma(x) ((pgd_t) { (x) } )
|
|
+static inline pgd_t xen_make_pgd(pgdval_t val)
|
|
+{
|
|
+ if (val & _PAGE_PRESENT)
|
|
+ val = pte_phys_to_machine(val);
|
|
+ return (pgd_t) { val };
|
|
+}
|
|
+
|
|
+#define __pgd_val(x) ((x).pgd)
|
|
+static inline pgdval_t xen_pgd_val(pgd_t pgd)
|
|
+{
|
|
+ pgdval_t ret = __pgd_val(pgd);
|
|
+#if PAGETABLE_LEVELS == 2 && CONFIG_XEN_COMPAT <= 0x030002
|
|
+ if (ret)
|
|
+ ret = machine_to_phys(ret) | _PAGE_PRESENT;
|
|
+#else
|
|
+ if (ret & _PAGE_PRESENT)
|
|
+ ret = pte_machine_to_phys(ret);
|
|
+#endif
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static inline pgdval_t pgd_flags(pgd_t pgd)
|
|
+{
|
|
+ return __pgd_val(pgd) & PTE_FLAGS_MASK;
|
|
+}
|
|
+
|
|
+#if PAGETABLE_LEVELS > 3
|
|
+typedef struct { pudval_t pud; } pud_t;
|
|
+
|
|
+#define __pud_ma(x) ((pud_t) { (x) } )
|
|
+static inline pud_t xen_make_pud(pudval_t val)
|
|
+{
|
|
+ if (val & _PAGE_PRESENT)
|
|
+ val = pte_phys_to_machine(val);
|
|
+ return (pud_t) { val };
|
|
+}
|
|
+
|
|
+#define __pud_val(x) ((x).pud)
|
|
+static inline pudval_t xen_pud_val(pud_t pud)
|
|
+{
|
|
+ pudval_t ret = __pud_val(pud);
|
|
+ if (ret & _PAGE_PRESENT)
|
|
+ ret = pte_machine_to_phys(ret);
|
|
+ return ret;
|
|
+}
|
|
+#else
|
|
+#include <asm-generic/pgtable-nopud.h>
|
|
+
|
|
+#define __pud_val(x) __pgd_val((x).pgd)
|
|
+static inline pudval_t xen_pud_val(pud_t pud)
|
|
+{
|
|
+ return xen_pgd_val(pud.pgd);
|
|
+}
|
|
+#endif
|
|
+
|
|
+#if PAGETABLE_LEVELS > 2
|
|
+typedef struct { pmdval_t pmd; } pmd_t;
|
|
+
|
|
+#define __pmd_ma(x) ((pmd_t) { (x) } )
|
|
+static inline pmd_t xen_make_pmd(pmdval_t val)
|
|
+{
|
|
+ if (val & _PAGE_PRESENT)
|
|
+ val = pte_phys_to_machine(val);
|
|
+ return (pmd_t) { val };
|
|
+}
|
|
+
|
|
+#define __pmd_val(x) ((x).pmd)
|
|
+static inline pmdval_t xen_pmd_val(pmd_t pmd)
|
|
+{
|
|
+ pmdval_t ret = __pmd_val(pmd);
|
|
+#if CONFIG_XEN_COMPAT <= 0x030002
|
|
+ if (ret)
|
|
+ ret = pte_machine_to_phys(ret) | _PAGE_PRESENT;
|
|
+#else
|
|
+ if (ret & _PAGE_PRESENT)
|
|
+ ret = pte_machine_to_phys(ret);
|
|
+#endif
|
|
+ return ret;
|
|
+}
|
|
+#else
|
|
+#include <asm-generic/pgtable-nopmd.h>
|
|
+
|
|
+#define __pmd_ma(x) ((pmd_t) { .pud.pgd = __pgd_ma(x) } )
|
|
+#define __pmd_val(x) __pgd_val((x).pud.pgd)
|
|
+static inline pmdval_t xen_pmd_val(pmd_t pmd)
|
|
+{
|
|
+ return xen_pgd_val(pmd.pud.pgd);
|
|
+}
|
|
+#endif
|
|
+
|
|
+static inline pudval_t pud_flags(pud_t pud)
|
|
+{
|
|
+ return __pud_val(pud) & PTE_FLAGS_MASK;
|
|
+}
|
|
+
|
|
+static inline pmdval_t pmd_flags(pmd_t pmd)
|
|
+{
|
|
+ return __pmd_val(pmd) & PTE_FLAGS_MASK;
|
|
+}
|
|
+
|
|
+#define __pte_ma(x) ((pte_t) { .pte = (x) } )
|
|
+static inline pte_t xen_make_pte(pteval_t val)
|
|
+{
|
|
+ if ((val & (_PAGE_PRESENT|_PAGE_IOMAP)) == _PAGE_PRESENT)
|
|
+ val = pte_phys_to_machine(val);
|
|
+ return (pte_t) { .pte = val };
|
|
+}
|
|
+
|
|
+#define __pte_val(x) ((x).pte)
|
|
+static inline pteval_t xen_pte_val(pte_t pte)
|
|
+{
|
|
+ pteval_t ret = __pte_val(pte);
|
|
+ if ((pte.pte_low & (_PAGE_PRESENT|_PAGE_IOMAP)) == _PAGE_PRESENT)
|
|
+ ret = pte_machine_to_phys(ret);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static inline pteval_t pte_flags(pte_t pte)
|
|
+{
|
|
+ return __pte_val(pte) & PTE_FLAGS_MASK;
|
|
+}
|
|
+
|
|
+#define pgprot_val(x) ((x).pgprot)
|
|
+#define __pgprot(x) ((pgprot_t) { (x) } )
|
|
+
|
|
+
|
|
+typedef struct page *pgtable_t;
|
|
+
|
|
+extern pteval_t __supported_pte_mask;
|
|
+extern int nx_enabled;
|
|
+extern void set_nx(void);
|
|
+
|
|
+#define pgprot_writecombine pgprot_writecombine
|
|
+extern pgprot_t pgprot_writecombine(pgprot_t prot);
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+/* Indicate that x86 has its own track and untrack pfn vma functions */
|
|
+#define __HAVE_PFNMAP_TRACKING
|
|
+#endif
|
|
+
|
|
+#define __HAVE_PHYS_MEM_ACCESS_PROT
|
|
+struct file;
|
|
+pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn,
|
|
+ unsigned long size, pgprot_t vma_prot);
|
|
+int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
|
|
+ unsigned long size, pgprot_t *vma_prot);
|
|
+
|
|
+/* Install a pte for a particular vaddr in kernel space. */
|
|
+void set_pte_vaddr(unsigned long vaddr, pte_t pte);
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+extern void native_pagetable_setup_start(pgd_t *base);
|
|
+extern void native_pagetable_setup_done(pgd_t *base);
|
|
+#else
|
|
+static inline void xen_pagetable_setup_start(pgd_t *base) {}
|
|
+static inline void xen_pagetable_setup_done(pgd_t *base) {}
|
|
+#endif
|
|
+
|
|
+struct seq_file;
|
|
+extern void arch_report_meminfo(struct seq_file *m);
|
|
+
|
|
+enum {
|
|
+ PG_LEVEL_NONE,
|
|
+ PG_LEVEL_4K,
|
|
+ PG_LEVEL_2M,
|
|
+ PG_LEVEL_1G,
|
|
+ PG_LEVEL_NUM
|
|
+};
|
|
+
|
|
+#ifdef CONFIG_PROC_FS
|
|
+extern void update_page_count(int level, unsigned long pages);
|
|
+#else
|
|
+static inline void update_page_count(int level, unsigned long pages) { }
|
|
+#endif
|
|
+
|
|
+/*
|
|
+ * Helper function that returns the kernel pagetable entry controlling
|
|
+ * the virtual address 'address'. NULL means no pagetable entry present.
|
|
+ * NOTE: the return type is pte_t but if the pmd is PSE then we return it
|
|
+ * as a pte too.
|
|
+ */
|
|
+extern pte_t *lookup_address(unsigned long address, unsigned int *level);
|
|
+
|
|
+#endif /* !__ASSEMBLY__ */
|
|
+
|
|
+#endif /* _ASM_X86_PGTABLE_DEFS_H */
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/processor.h 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/processor.h 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -16,6 +16,7 @@ struct mm_struct;
|
|
#include <asm/cpufeature.h>
|
|
#include <asm/system.h>
|
|
#include <asm/page.h>
|
|
+#include <asm/pgtable_types.h>
|
|
#include <asm/percpu.h>
|
|
#include <asm/msr.h>
|
|
#include <asm/desc_defs.h>
|
|
@@ -74,10 +75,10 @@ struct cpuinfo_x86 {
|
|
char pad0;
|
|
#else
|
|
/* Number of 4K pages in DTLB/ITLB combined(in pages): */
|
|
- int x86_tlbsize;
|
|
+ int x86_tlbsize;
|
|
+#endif
|
|
__u8 x86_virt_bits;
|
|
__u8 x86_phys_bits;
|
|
-#endif
|
|
/* CPUID returned core id bits: */
|
|
__u8 x86_coreid_bits;
|
|
/* Max extended CPUID function supported: */
|
|
@@ -92,9 +93,9 @@ struct cpuinfo_x86 {
|
|
int x86_cache_alignment; /* In bytes */
|
|
int x86_power;
|
|
unsigned long loops_per_jiffy;
|
|
-#ifdef CONFIG_SMP
|
|
+#if defined(CONFIG_SMP) && !defined(CONFIG_XEN)
|
|
/* cpus sharing the last level cache: */
|
|
- cpumask_t llc_shared_map;
|
|
+ cpumask_var_t llc_shared_map;
|
|
#endif
|
|
/* cpuid returned max cores value: */
|
|
u16 x86_max_cores;
|
|
@@ -138,7 +139,7 @@ extern struct cpuinfo_x86 new_cpu_data;
|
|
extern __u32 cleared_cpu_caps[NCAPINTS];
|
|
|
|
#ifdef CONFIG_SMP
|
|
-DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info);
|
|
+DECLARE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info);
|
|
#define cpu_data(cpu) per_cpu(cpu_info, cpu)
|
|
#define current_cpu_data __get_cpu_var(cpu_info)
|
|
#else
|
|
@@ -251,7 +252,6 @@ struct x86_hw_tss {
|
|
#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
|
|
#define IO_BITMAP_OFFSET offsetof(struct tss_struct, io_bitmap)
|
|
#define INVALID_IO_BITMAP_OFFSET 0x8000
|
|
-#define INVALID_IO_BITMAP_OFFSET_LAZY 0x9000
|
|
|
|
#ifndef CONFIG_X86_NO_TSS
|
|
struct tss_struct {
|
|
@@ -267,11 +267,6 @@ struct tss_struct {
|
|
* be within the limit.
|
|
*/
|
|
unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
|
|
- /*
|
|
- * Cache the current maximum and the last task that used the bitmap:
|
|
- */
|
|
- unsigned long io_bitmap_max;
|
|
- struct thread_struct *io_bitmap_owner;
|
|
|
|
/*
|
|
* .. and then another 0x100 bytes for the emergency kernel stack:
|
|
@@ -280,7 +275,7 @@ struct tss_struct {
|
|
|
|
} ____cacheline_aligned;
|
|
|
|
-DECLARE_PER_CPU(struct tss_struct, init_tss);
|
|
+DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss);
|
|
|
|
/*
|
|
* Save the original ist values for checking stack pointers during debugging
|
|
@@ -363,6 +358,11 @@ struct i387_soft_struct {
|
|
u32 entry_eip;
|
|
};
|
|
|
|
+struct ymmh_struct {
|
|
+ /* 16 * 16 bytes for each YMMH-reg = 256 bytes */
|
|
+ u32 ymmh_space[64];
|
|
+};
|
|
+
|
|
struct xsave_hdr_struct {
|
|
u64 xstate_bv;
|
|
u64 reserved1[2];
|
|
@@ -372,6 +372,7 @@ struct xsave_hdr_struct {
|
|
struct xsave_struct {
|
|
struct i387_fxsave_struct i387;
|
|
struct xsave_hdr_struct xsave_hdr;
|
|
+ struct ymmh_struct ymmh;
|
|
/* new processor state extensions will go here */
|
|
} __attribute__ ((packed, aligned (64)));
|
|
|
|
@@ -382,11 +383,37 @@ union thread_xstate {
|
|
struct xsave_struct xsave;
|
|
};
|
|
|
|
-#if defined(CONFIG_X86_64) && !defined(CONFIG_X86_NO_TSS)
|
|
+#ifdef CONFIG_X86_64
|
|
+#ifndef CONFIG_X86_NO_TSS
|
|
DECLARE_PER_CPU(struct orig_ist, orig_ist);
|
|
#endif
|
|
|
|
-extern void print_cpu_info(struct cpuinfo_x86 *);
|
|
+union irq_stack_union {
|
|
+ char irq_stack[IRQ_STACK_SIZE];
|
|
+ /*
|
|
+ * GCC hardcodes the stack canary as %gs:40. Since the
|
|
+ * irq_stack is the object at %gs:0, we reserve the bottom
|
|
+ * 48 bytes of the irq stack for the canary.
|
|
+ */
|
|
+ struct {
|
|
+ char gs_base[40];
|
|
+ unsigned long stack_canary;
|
|
+ };
|
|
+};
|
|
+
|
|
+DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union);
|
|
+DECLARE_INIT_PER_CPU(irq_stack_union);
|
|
+
|
|
+DECLARE_PER_CPU(char *, irq_stack_ptr);
|
|
+DECLARE_PER_CPU(unsigned int, irq_count);
|
|
+extern unsigned long kernel_eflags;
|
|
+extern asmlinkage void ignore_sysret(void);
|
|
+#else /* X86_64 */
|
|
+#ifdef CONFIG_CC_STACKPROTECTOR
|
|
+DECLARE_PER_CPU(unsigned long, stack_canary);
|
|
+#endif
|
|
+#endif /* X86_64 */
|
|
+
|
|
extern unsigned int xstate_size;
|
|
extern void free_thread_xstate(struct task_struct *);
|
|
extern struct kmem_cache *task_xstate_cachep;
|
|
@@ -659,6 +686,7 @@ static inline void __sti_mwait(unsigned
|
|
extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
|
|
|
|
extern void select_idle_routine(const struct cpuinfo_x86 *c);
|
|
+extern void init_c1e_mask(void);
|
|
|
|
extern unsigned long boot_option_idle_override;
|
|
extern unsigned long idle_halt;
|
|
@@ -696,9 +724,9 @@ extern int sysenter_setup(void);
|
|
extern struct desc_ptr early_gdt_descr;
|
|
|
|
extern void cpu_set_gdt(int);
|
|
-extern void switch_to_new_gdt(void);
|
|
+extern void switch_to_new_gdt(int);
|
|
+extern void load_percpu_segment(int);
|
|
extern void cpu_init(void);
|
|
-extern void init_gdt(int cpu);
|
|
|
|
static inline unsigned long get_debugctlmsr(void)
|
|
{
|
|
@@ -783,6 +811,7 @@ static inline void spin_lock_prefetch(co
|
|
* User space process size: 3GB (default).
|
|
*/
|
|
#define TASK_SIZE PAGE_OFFSET
|
|
+#define TASK_SIZE_MAX TASK_SIZE
|
|
#define STACK_TOP TASK_SIZE
|
|
#define STACK_TOP_MAX STACK_TOP
|
|
|
|
@@ -840,7 +869,7 @@ extern unsigned long thread_saved_pc(str
|
|
/*
|
|
* User space process size. 47bits minus one guard page.
|
|
*/
|
|
-#define TASK_SIZE64 ((1UL << 47) - PAGE_SIZE)
|
|
+#define TASK_SIZE_MAX ((1UL << 47) - PAGE_SIZE)
|
|
|
|
/* This decides where the kernel will search for a free chunk of vm
|
|
* space during mmap's.
|
|
@@ -849,12 +878,12 @@ extern unsigned long thread_saved_pc(str
|
|
0xc0000000 : 0xFFFFe000)
|
|
|
|
#define TASK_SIZE (test_thread_flag(TIF_IA32) ? \
|
|
- IA32_PAGE_OFFSET : TASK_SIZE64)
|
|
+ IA32_PAGE_OFFSET : TASK_SIZE_MAX)
|
|
#define TASK_SIZE_OF(child) ((test_tsk_thread_flag(child, TIF_IA32)) ? \
|
|
- IA32_PAGE_OFFSET : TASK_SIZE64)
|
|
+ IA32_PAGE_OFFSET : TASK_SIZE_MAX)
|
|
|
|
#define STACK_TOP TASK_SIZE
|
|
-#define STACK_TOP_MAX TASK_SIZE64
|
|
+#define STACK_TOP_MAX TASK_SIZE_MAX
|
|
|
|
#define INIT_THREAD { \
|
|
.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/smp.h 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/smp.h 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -15,53 +15,25 @@
|
|
# include <asm/io_apic.h>
|
|
# endif
|
|
#endif
|
|
-#include <asm/pda.h>
|
|
#include <asm/thread_info.h>
|
|
-
|
|
-#ifdef CONFIG_X86_64
|
|
-
|
|
-#define cpu_callin_mask cpu_possible_mask
|
|
-#define cpu_callout_mask cpu_possible_mask
|
|
-extern cpumask_var_t cpu_initialized_mask;
|
|
-extern cpumask_var_t cpu_sibling_setup_mask;
|
|
-
|
|
-#else /* CONFIG_X86_32 */
|
|
-
|
|
-#define cpu_callin_map cpu_possible_map
|
|
-#define cpu_callout_map cpu_possible_map
|
|
-extern cpumask_t cpu_initialized;
|
|
-extern cpumask_t cpu_sibling_setup_map;
|
|
-
|
|
-#define cpu_callin_mask ((struct cpumask *)&cpu_callin_map)
|
|
-#define cpu_callout_mask ((struct cpumask *)&cpu_callout_map)
|
|
-#define cpu_initialized_mask ((struct cpumask *)&cpu_initialized)
|
|
-#define cpu_sibling_setup_mask ((struct cpumask *)&cpu_sibling_setup_map)
|
|
-
|
|
-#endif /* CONFIG_X86_32 */
|
|
-
|
|
-extern void (*mtrr_hook)(void);
|
|
-extern void zap_low_mappings(void);
|
|
-
|
|
-extern int __cpuinit get_local_pda(int cpu);
|
|
+#include <asm/cpumask.h>
|
|
|
|
extern int smp_num_siblings;
|
|
extern unsigned int num_processors;
|
|
|
|
-DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
|
|
-DECLARE_PER_CPU(cpumask_t, cpu_core_map);
|
|
+DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_map);
|
|
+DECLARE_PER_CPU(cpumask_var_t, cpu_core_map);
|
|
DECLARE_PER_CPU(u16, cpu_llc_id);
|
|
-#ifdef CONFIG_X86_32
|
|
DECLARE_PER_CPU(int, cpu_number);
|
|
-#endif
|
|
|
|
static inline struct cpumask *cpu_sibling_mask(int cpu)
|
|
{
|
|
- return &per_cpu(cpu_sibling_map, cpu);
|
|
+ return per_cpu(cpu_sibling_map, cpu);
|
|
}
|
|
|
|
static inline struct cpumask *cpu_core_mask(int cpu)
|
|
{
|
|
- return &per_cpu(cpu_core_map, cpu);
|
|
+ return per_cpu(cpu_core_map, cpu);
|
|
}
|
|
|
|
DECLARE_PER_CPU(u16, x86_cpu_to_apicid);
|
|
@@ -149,9 +121,10 @@ static inline void arch_send_call_functi
|
|
smp_ops.send_call_func_single_ipi(cpu);
|
|
}
|
|
|
|
-static inline void arch_send_call_function_ipi(cpumask_t mask)
|
|
+#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask
|
|
+static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
|
|
{
|
|
- smp_ops.send_call_func_ipi(&mask);
|
|
+ smp_ops.send_call_func_ipi(mask);
|
|
}
|
|
|
|
void cpu_disable_common(void);
|
|
@@ -176,14 +149,12 @@ void xen_send_call_func_single_ipi(int c
|
|
#define smp_send_stop xen_smp_send_stop
|
|
#define smp_send_reschedule xen_smp_send_reschedule
|
|
#define arch_send_call_function_single_ipi xen_send_call_func_single_ipi
|
|
-#define arch_send_call_function_ipi(m) xen_send_call_func_ipi(&(m))
|
|
+#define arch_send_call_function_ipi_mask xen_send_call_func_ipi
|
|
|
|
void play_dead(void);
|
|
|
|
#endif /* CONFIG_XEN */
|
|
|
|
-extern void prefill_possible_map(void);
|
|
-
|
|
void smp_store_cpu_info(int id);
|
|
#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu)
|
|
|
|
@@ -192,10 +163,6 @@ static inline int num_booting_cpus(void)
|
|
{
|
|
return cpumask_weight(cpu_callout_mask);
|
|
}
|
|
-#else
|
|
-static inline void prefill_possible_map(void)
|
|
-{
|
|
-}
|
|
#endif /* CONFIG_SMP */
|
|
|
|
extern unsigned disabled_cpus __cpuinitdata;
|
|
@@ -206,11 +173,11 @@ extern unsigned disabled_cpus __cpuinitd
|
|
* from the initial startup. We map APIC_BASE very early in page_setup(),
|
|
* so this is correct in the x86 case.
|
|
*/
|
|
-#define raw_smp_processor_id() (x86_read_percpu(cpu_number))
|
|
+#define raw_smp_processor_id() (percpu_read(cpu_number))
|
|
#define safe_smp_processor_id() smp_processor_id()
|
|
|
|
#elif defined(CONFIG_X86_64_SMP)
|
|
-#define raw_smp_processor_id() read_pda(cpunumber)
|
|
+#define raw_smp_processor_id() (percpu_read(cpu_number))
|
|
|
|
#define stack_smp_processor_id() \
|
|
({ \
|
|
@@ -220,10 +187,6 @@ extern unsigned disabled_cpus __cpuinitd
|
|
})
|
|
#define safe_smp_processor_id() smp_processor_id()
|
|
|
|
-#else /* !CONFIG_X86_32_SMP && !CONFIG_X86_64_SMP */
|
|
-#define cpu_physical_id(cpu) boot_cpu_physical_apicid
|
|
-#define safe_smp_processor_id() 0
|
|
-#define stack_smp_processor_id() 0
|
|
#endif
|
|
|
|
#ifdef CONFIG_X86_LOCAL_APIC
|
|
@@ -235,28 +198,9 @@ static inline int logical_smp_processor_
|
|
return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR));
|
|
}
|
|
|
|
-#include <mach_apicdef.h>
|
|
-static inline unsigned int read_apic_id(void)
|
|
-{
|
|
- unsigned int reg;
|
|
-
|
|
- reg = *(u32 *)(APIC_BASE + APIC_ID);
|
|
-
|
|
- return GET_APIC_ID(reg);
|
|
-}
|
|
#endif
|
|
|
|
-
|
|
-# if defined(APIC_DEFINITION) || defined(CONFIG_X86_64)
|
|
extern int hard_smp_processor_id(void);
|
|
-# else
|
|
-#include <mach_apicdef.h>
|
|
-static inline int hard_smp_processor_id(void)
|
|
-{
|
|
- /* we don't want to mark this access volatile - bad code generation */
|
|
- return read_apic_id();
|
|
-}
|
|
-# endif /* APIC_DEFINITION */
|
|
|
|
#else /* CONFIG_X86_LOCAL_APIC */
|
|
|
|
@@ -266,11 +210,5 @@ static inline int hard_smp_processor_id(
|
|
|
|
#endif /* CONFIG_X86_LOCAL_APIC */
|
|
|
|
-#ifdef CONFIG_X86_HAS_BOOT_CPU_ID
|
|
-extern unsigned char boot_cpu_id;
|
|
-#else
|
|
-#define boot_cpu_id 0
|
|
-#endif
|
|
-
|
|
#endif /* __ASSEMBLY__ */
|
|
#endif /* _ASM_X86_SMP_H */
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/spinlock.h 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/spinlock.h 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -255,40 +255,18 @@ static __always_inline void __ticket_spi
|
|
static inline int xen_spinlock_init(unsigned int cpu) { return 0; }
|
|
static inline void xen_spinlock_cleanup(unsigned int cpu) {}
|
|
|
|
-/*
|
|
- * Define virtualization-friendly old-style lock byte lock, for use in
|
|
- * pv_lock_ops if desired.
|
|
- *
|
|
- * This differs from the pre-2.6.24 spinlock by always using xchgb
|
|
- * rather than decb to take the lock; this allows it to use a
|
|
- * zero-initialized lock structure. It also maintains a 1-byte
|
|
- * contention counter, so that we can implement
|
|
- * __byte_spin_is_contended.
|
|
- */
|
|
-struct __byte_spinlock {
|
|
- s8 lock;
|
|
-#if NR_CPUS < 256
|
|
- s8 spinners;
|
|
-#else
|
|
-#error NR_CPUS >= 256 support not implemented
|
|
-#endif
|
|
-};
|
|
-
|
|
static inline int __byte_spin_is_locked(raw_spinlock_t *lock)
|
|
{
|
|
- struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
|
|
- return bl->lock != 0;
|
|
+ return lock->lock != 0;
|
|
}
|
|
|
|
static inline int __byte_spin_is_contended(raw_spinlock_t *lock)
|
|
{
|
|
- struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
|
|
- return bl->spinners != 0;
|
|
+ return lock->spinners != 0;
|
|
}
|
|
|
|
static inline void __byte_spin_lock(raw_spinlock_t *lock)
|
|
{
|
|
- struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
|
|
s8 val = 1;
|
|
|
|
asm("1: xchgb %1, %0\n"
|
|
@@ -301,27 +279,25 @@ static inline void __byte_spin_lock(raw_
|
|
" " LOCK_PREFIX "decb %2\n"
|
|
" jmp 1b\n"
|
|
"3:"
|
|
- : "+m" (bl->lock), "+q" (val), "+m" (bl->spinners): : "memory");
|
|
+ : "+m" (lock->lock), "+q" (val), "+m" (lock->spinners): : "memory");
|
|
}
|
|
|
|
#define __byte_spin_lock_flags(lock, flags) __byte_spin_lock(lock)
|
|
|
|
static inline int __byte_spin_trylock(raw_spinlock_t *lock)
|
|
{
|
|
- struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
|
|
u8 old = 1;
|
|
|
|
asm("xchgb %1,%0"
|
|
- : "+m" (bl->lock), "+q" (old) : : "memory");
|
|
+ : "+m" (lock->lock), "+q" (old) : : "memory");
|
|
|
|
return old == 0;
|
|
}
|
|
|
|
static inline void __byte_spin_unlock(raw_spinlock_t *lock)
|
|
{
|
|
- struct __byte_spinlock *bl = (struct __byte_spinlock *)lock;
|
|
smp_wmb();
|
|
- bl->lock = 0;
|
|
+ lock->lock = 0;
|
|
}
|
|
|
|
#define __raw_spin(n) __byte_spin_##n
|
|
@@ -422,8 +398,7 @@ static inline int __raw_read_trylock(raw
|
|
{
|
|
atomic_t *count = (atomic_t *)lock;
|
|
|
|
- atomic_dec(count);
|
|
- if (atomic_read(count) >= 0)
|
|
+ if (atomic_dec_return(count) >= 0)
|
|
return 1;
|
|
atomic_inc(count);
|
|
return 0;
|
|
@@ -450,6 +425,9 @@ static inline void __raw_write_unlock(ra
|
|
: "+m" (rw->lock) : "i" (RW_LOCK_BIAS) : "memory");
|
|
}
|
|
|
|
+#define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock)
|
|
+#define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock)
|
|
+
|
|
#define _raw_spin_relax(lock) cpu_relax()
|
|
#define _raw_read_relax(lock) cpu_relax()
|
|
#define _raw_write_relax(lock) cpu_relax()
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/spinlock_types.h 2010-03-24 15:14:47.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/spinlock_types.h 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -26,6 +26,20 @@ typedef union {
|
|
# define TICKET_SHIFT 16
|
|
u16 cur, seq;
|
|
#endif
|
|
+#else
|
|
+/*
|
|
+ * This differs from the pre-2.6.24 spinlock by always using xchgb
|
|
+ * rather than decb to take the lock; this allows it to use a
|
|
+ * zero-initialized lock structure. It also maintains a 1-byte
|
|
+ * contention counter, so that we can implement
|
|
+ * __byte_spin_is_contended.
|
|
+ */
|
|
+ u8 lock;
|
|
+#if CONFIG_NR_CPUS < 256
|
|
+ u8 spinners;
|
|
+#else
|
|
+# error NR_CPUS >= 256 not implemented
|
|
+#endif
|
|
#endif
|
|
};
|
|
} raw_spinlock_t;
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/system.h 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/system.h 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -21,9 +21,24 @@
|
|
struct task_struct; /* one of the stranger aspects of C forward declarations */
|
|
struct task_struct *__switch_to(struct task_struct *prev,
|
|
struct task_struct *next);
|
|
+void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p);
|
|
|
|
#ifdef CONFIG_X86_32
|
|
|
|
+#ifdef CONFIG_CC_STACKPROTECTOR
|
|
+#define __switch_canary \
|
|
+ "movl %P[task_canary](%[next]), %%ebx\n\t" \
|
|
+ "movl %%ebx, "__percpu_arg([stack_canary])"\n\t"
|
|
+#define __switch_canary_oparam \
|
|
+ , [stack_canary] "=m" (per_cpu_var(stack_canary))
|
|
+#define __switch_canary_iparam \
|
|
+ , [task_canary] "i" (offsetof(struct task_struct, stack_canary))
|
|
+#else /* CC_STACKPROTECTOR */
|
|
+#define __switch_canary
|
|
+#define __switch_canary_oparam
|
|
+#define __switch_canary_iparam
|
|
+#endif /* CC_STACKPROTECTOR */
|
|
+
|
|
/*
|
|
* Saving eflags is important. It switches not only IOPL between tasks,
|
|
* it also protects other tasks from NT leaking through sysenter etc.
|
|
@@ -45,6 +60,7 @@ do { \
|
|
"movl %[next_sp],%%esp\n\t" /* restore ESP */ \
|
|
"movl $1f,%[prev_ip]\n\t" /* save EIP */ \
|
|
"pushl %[next_ip]\n\t" /* restore EIP */ \
|
|
+ __switch_canary \
|
|
"jmp __switch_to\n" /* regparm call */ \
|
|
"1:\t" \
|
|
"popl %%ebp\n\t" /* restore EBP */ \
|
|
@@ -59,6 +75,8 @@ do { \
|
|
"=b" (ebx), "=c" (ecx), "=d" (edx), \
|
|
"=S" (esi), "=D" (edi) \
|
|
\
|
|
+ __switch_canary_oparam \
|
|
+ \
|
|
/* input parameters: */ \
|
|
: [next_sp] "m" (next->thread.sp), \
|
|
[next_ip] "m" (next->thread.ip), \
|
|
@@ -67,6 +85,8 @@ do { \
|
|
[prev] "a" (prev), \
|
|
[next] "d" (next) \
|
|
\
|
|
+ __switch_canary_iparam \
|
|
+ \
|
|
: /* reloaded segment registers */ \
|
|
"memory"); \
|
|
} while (0)
|
|
@@ -87,27 +107,44 @@ do { \
|
|
, "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \
|
|
"r12", "r13", "r14", "r15"
|
|
|
|
+#ifdef CONFIG_CC_STACKPROTECTOR
|
|
+#define __switch_canary \
|
|
+ "movq %P[task_canary](%%rsi),%%r8\n\t" \
|
|
+ "movq %%r8,"__percpu_arg([gs_canary])"\n\t"
|
|
+#define __switch_canary_oparam \
|
|
+ , [gs_canary] "=m" (per_cpu_var(irq_stack_union.stack_canary))
|
|
+#define __switch_canary_iparam \
|
|
+ , [task_canary] "i" (offsetof(struct task_struct, stack_canary))
|
|
+#else /* CC_STACKPROTECTOR */
|
|
+#define __switch_canary
|
|
+#define __switch_canary_oparam
|
|
+#define __switch_canary_iparam
|
|
+#endif /* CC_STACKPROTECTOR */
|
|
+
|
|
/* Save restore flags to clear handle leaking NT */
|
|
#define switch_to(prev, next, last) \
|
|
- asm volatile(SAVE_CONTEXT \
|
|
+ asm volatile(SAVE_CONTEXT \
|
|
"movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \
|
|
"movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \
|
|
"call __switch_to\n\t" \
|
|
".globl thread_return\n" \
|
|
"thread_return:\n\t" \
|
|
- "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \
|
|
+ "movq "__percpu_arg([current_task])",%%rsi\n\t" \
|
|
+ __switch_canary \
|
|
"movq %P[thread_info](%%rsi),%%r8\n\t" \
|
|
- LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \
|
|
"movq %%rax,%%rdi\n\t" \
|
|
- "jc ret_from_fork\n\t" \
|
|
+ "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \
|
|
+ "jnz ret_from_fork\n\t" \
|
|
RESTORE_CONTEXT \
|
|
: "=a" (last) \
|
|
+ __switch_canary_oparam \
|
|
: [next] "S" (next), [prev] "D" (prev), \
|
|
[threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \
|
|
[ti_flags] "i" (offsetof(struct thread_info, flags)), \
|
|
- [tif_fork] "i" (TIF_FORK), \
|
|
+ [_tif_fork] "i" (_TIF_FORK), \
|
|
[thread_info] "i" (offsetof(struct task_struct, stack)), \
|
|
- [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \
|
|
+ [current_task] "m" (per_cpu_var(current_task)) \
|
|
+ __switch_canary_iparam \
|
|
: "memory", "cc" __EXTRA_CLOBBER)
|
|
#endif
|
|
|
|
@@ -166,6 +203,25 @@ extern void xen_load_gs_index(unsigned);
|
|
#define savesegment(seg, value) \
|
|
asm("mov %%" #seg ",%0":"=r" (value) : : "memory")
|
|
|
|
+/*
|
|
+ * x86_32 user gs accessors.
|
|
+ */
|
|
+#ifdef CONFIG_X86_32
|
|
+#ifdef CONFIG_X86_32_LAZY_GS
|
|
+#define get_user_gs(regs) (u16)({unsigned long v; savesegment(gs, v); v;})
|
|
+#define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v))
|
|
+#define task_user_gs(tsk) ((tsk)->thread.gs)
|
|
+#define lazy_save_gs(v) savesegment(gs, (v))
|
|
+#define lazy_load_gs(v) loadsegment(gs, (v))
|
|
+#else /* X86_32_LAZY_GS */
|
|
+#define get_user_gs(regs) (u16)((regs)->gs)
|
|
+#define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0)
|
|
+#define task_user_gs(tsk) (task_pt_regs(tsk)->gs)
|
|
+#define lazy_save_gs(v) do { } while (0)
|
|
+#define lazy_load_gs(v) do { } while (0)
|
|
+#endif /* X86_32_LAZY_GS */
|
|
+#endif /* X86_32 */
|
|
+
|
|
static inline unsigned long get_limit(unsigned long segment)
|
|
{
|
|
unsigned long __limit;
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/tlbflush.h 2010-03-24 15:14:47.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/tlbflush.h 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -86,21 +86,20 @@ static inline void flush_tlb_range(struc
|
|
flush_tlb_mm(vma->vm_mm);
|
|
}
|
|
|
|
+#ifndef CONFIG_XEN
|
|
#define TLBSTATE_OK 1
|
|
#define TLBSTATE_LAZY 2
|
|
|
|
-#ifdef CONFIG_X86_32
|
|
struct tlb_state {
|
|
struct mm_struct *active_mm;
|
|
int state;
|
|
- char __cacheline_padding[L1_CACHE_BYTES-8];
|
|
};
|
|
-DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate);
|
|
+DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate);
|
|
|
|
-void reset_lazy_tlbstate(void);
|
|
-#else
|
|
static inline void reset_lazy_tlbstate(void)
|
|
{
|
|
+ percpu_write(cpu_tlbstate.state, 0);
|
|
+ percpu_write(cpu_tlbstate.active_mm, &init_mm);
|
|
}
|
|
#endif
|
|
|
|
@@ -112,4 +111,6 @@ static inline void flush_tlb_kernel_rang
|
|
flush_tlb_all();
|
|
}
|
|
|
|
+extern void zap_low_mappings(void);
|
|
+
|
|
#endif /* _ASM_X86_TLBFLUSH_H */
|
|
--- head-2010-05-25.orig/arch/x86/kernel/Makefile 2010-03-24 15:14:47.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/Makefile 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -122,7 +122,6 @@ obj-$(CONFIG_X86_XEN) += fixup.o
|
|
###
|
|
# 64 bit specific files
|
|
ifeq ($(CONFIG_X86_64),y)
|
|
- obj-$(CONFIG_X86_XEN_GENAPIC) += genapic_64.o genapic_xen_64.o
|
|
obj-$(CONFIG_X86_UV) += tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o uv_time.o
|
|
obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
|
|
obj-$(CONFIG_AUDIT) += audit_64.o
|
|
@@ -134,11 +133,10 @@ ifeq ($(CONFIG_X86_64),y)
|
|
obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o
|
|
obj-y += vsmp_64.o
|
|
|
|
- obj-$(CONFIG_XEN) += nmi.o
|
|
time_64-$(CONFIG_XEN) += time_32.o
|
|
endif
|
|
|
|
-disabled-obj-$(CONFIG_XEN) := %_uv.o crash.o early-quirks.o genx2apic_%.o \
|
|
- hpet.o i8253.o i8259.o irqinit_$(BITS).o pci-swiotlb_64.o reboot.o \
|
|
- smpboot.o tlb_$(BITS).o tsc.o tsc_sync.o uv_%.o vsmp_64.o
|
|
+disabled-obj-$(CONFIG_XEN) := %_uv.o crash.o early-quirks.o hpet.o i8253.o \
|
|
+ i8259.o irqinit_$(BITS).o pci-swiotlb.o reboot.o smpboot.o tsc.o \
|
|
+ tsc_sync.o uv_%.o vsmp_64.o
|
|
disabled-obj-$(CONFIG_XEN_UNPRIVILEGED_GUEST) += probe_roms_32.o
|
|
--- head-2010-05-25.orig/arch/x86/kernel/acpi/boot.c 2010-04-15 10:05:36.000000000 +0200
|
|
+++ head-2010-05-25/arch/x86/kernel/acpi/boot.c 2010-04-15 10:07:05.000000000 +0200
|
|
@@ -115,11 +115,6 @@ char *__init __acpi_map_table(unsigned l
|
|
if (!phys || !size)
|
|
return NULL;
|
|
|
|
-#ifdef CONFIG_XEN
|
|
- if (phys + size <= (NR_FIX_ISAMAPS << PAGE_SHIFT))
|
|
- return isa_bus_to_virt(phys);
|
|
-#endif
|
|
-
|
|
return early_ioremap(phys, size);
|
|
}
|
|
void __init __acpi_unmap_table(char *map, unsigned long size)
|
|
@@ -151,8 +146,10 @@ static int __init acpi_parse_madt(struct
|
|
madt->address);
|
|
}
|
|
|
|
+#ifndef CONFIG_XEN
|
|
default_acpi_madt_oem_check(madt->header.oem_id,
|
|
madt->header.oem_table_id);
|
|
+#endif
|
|
|
|
return 0;
|
|
}
|
|
--- head-2010-05-25.orig/arch/x86/kernel/acpi/sleep-xen.c 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/acpi/sleep-xen.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -104,6 +104,7 @@ int acpi_save_state_mem(void)
|
|
stack_start.sp = temp_stack + sizeof(temp_stack);
|
|
early_gdt_descr.address =
|
|
(unsigned long)get_cpu_gdt_table(smp_processor_id());
|
|
+ initial_gs = per_cpu_offset(smp_processor_id());
|
|
#endif
|
|
initial_code = (unsigned long)wakeup_long64;
|
|
saved_magic = 0x123456789abcdef0;
|
|
--- head-2010-05-25.orig/arch/x86/kernel/apic/Makefile 2010-05-25 09:12:09.000000000 +0200
|
|
+++ head-2010-05-25/arch/x86/kernel/apic/Makefile 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -17,3 +17,10 @@ obj-$(CONFIG_X86_BIGSMP) += bigsmp_32.o
|
|
obj-$(CONFIG_X86_NUMAQ) += numaq_32.o
|
|
obj-$(CONFIG_X86_ES7000) += es7000_32.o
|
|
obj-$(CONFIG_X86_SUMMIT) += summit_32.o
|
|
+
|
|
+obj-$(CONFIG_XEN) += nmi.o
|
|
+
|
|
+probe_64-$(CONFIG_XEN) := probe_32.o
|
|
+
|
|
+disabled-obj-$(CONFIG_XEN) := apic_flat_$(BITS).o
|
|
+disabled-obj-$(filter-out $(CONFIG_SMP),$(CONFIG_XEN)) += ipi.o
|
|
--- head-2010-05-25.orig/arch/x86/kernel/apic/apic-xen.c 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/apic/apic-xen.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -4,11 +4,20 @@
|
|
|
|
#include <linux/init.h>
|
|
#include <linux/interrupt.h>
|
|
+#include <linux/module.h>
|
|
|
|
#include <asm/smp.h>
|
|
#include <asm/proto.h>
|
|
#include <asm/apic.h>
|
|
|
|
+unsigned int num_processors;
|
|
+
|
|
+/*
|
|
+ * Map cpu index to physical APIC ID
|
|
+ */
|
|
+DEFINE_PER_CPU(u16, x86_cpu_to_apicid) = BAD_APICID;
|
|
+EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
|
|
+
|
|
/*
|
|
* Debug level, exported for io_apic.c
|
|
*/
|
|
--- head-2010-05-25.orig/arch/x86/kernel/apic/io_apic-xen.c 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/apic/io_apic-xen.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -1,7 +1,7 @@
|
|
/*
|
|
* Intel IO-APIC support for multi-Pentium hosts.
|
|
*
|
|
- * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo
|
|
+ * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
|
|
*
|
|
* Many thanks to Stig Venaas for trying out countless experimental
|
|
* patches and reporting/debugging problems patiently!
|
|
@@ -46,6 +46,7 @@
|
|
#include <asm/idle.h>
|
|
#include <asm/io.h>
|
|
#include <asm/smp.h>
|
|
+#include <asm/cpu.h>
|
|
#include <asm/desc.h>
|
|
#include <asm/proto.h>
|
|
#include <asm/acpi.h>
|
|
@@ -61,9 +62,7 @@
|
|
#include <asm/uv/uv_hub.h>
|
|
#include <asm/uv/uv_irq.h>
|
|
|
|
-#include <mach_ipi.h>
|
|
-#include <mach_apic.h>
|
|
-#include <mach_apicdef.h>
|
|
+#include <asm/apic.h>
|
|
|
|
#ifdef CONFIG_XEN
|
|
#include <xen/interface/xen.h>
|
|
@@ -97,11 +96,11 @@ static DEFINE_SPINLOCK(vector_lock);
|
|
int nr_ioapic_registers[MAX_IO_APICS];
|
|
|
|
/* I/O APIC entries */
|
|
-struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
|
|
+struct mpc_ioapic mp_ioapics[MAX_IO_APICS];
|
|
int nr_ioapics;
|
|
|
|
/* MP IRQ source entries */
|
|
-struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
|
|
+struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
|
|
|
|
/* # of MP IRQ source entries */
|
|
int mp_irq_entries;
|
|
@@ -114,10 +113,19 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BU
|
|
|
|
int skip_ioapic_setup;
|
|
|
|
+static void __init _arch_disable_smp_support(void)
|
|
+{
|
|
+#ifdef CONFIG_PCI
|
|
+ noioapicquirk = 1;
|
|
+ noioapicreroute = -1;
|
|
+#endif
|
|
+ skip_ioapic_setup = 1;
|
|
+}
|
|
+
|
|
static int __init parse_noapic(char *str)
|
|
{
|
|
/* disable IO-APIC */
|
|
- disable_ioapic_setup();
|
|
+ _arch_disable_smp_support();
|
|
return 0;
|
|
}
|
|
early_param("noapic", parse_noapic);
|
|
@@ -372,7 +380,7 @@ set_extra_move_desc(struct irq_desc *des
|
|
|
|
if (!cfg->move_in_progress) {
|
|
/* it means that domain is not changed */
|
|
- if (!cpumask_intersects(&desc->affinity, mask))
|
|
+ if (!cpumask_intersects(desc->affinity, mask))
|
|
cfg->move_desc_pending = 1;
|
|
}
|
|
}
|
|
@@ -397,12 +405,20 @@ struct io_apic {
|
|
unsigned int index;
|
|
unsigned int unused[3];
|
|
unsigned int data;
|
|
+ unsigned int unused2[11];
|
|
+ unsigned int eoi;
|
|
};
|
|
|
|
static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
|
|
{
|
|
return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
|
|
- + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK);
|
|
+ + (mp_ioapics[idx].apicaddr & ~PAGE_MASK);
|
|
+}
|
|
+
|
|
+static inline void io_apic_eoi(unsigned int apic, unsigned int vector)
|
|
+{
|
|
+ struct io_apic __iomem *io_apic = io_apic_base(apic);
|
|
+ writel(vector, &io_apic->eoi);
|
|
}
|
|
#endif /* CONFIG_XEN */
|
|
|
|
@@ -416,7 +432,7 @@ static inline unsigned int io_apic_read(
|
|
struct physdev_apic apic_op;
|
|
int ret;
|
|
|
|
- apic_op.apic_physbase = mp_ioapics[apic].mp_apicaddr;
|
|
+ apic_op.apic_physbase = mp_ioapics[apic].apicaddr;
|
|
apic_op.reg = reg;
|
|
ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op);
|
|
if (ret)
|
|
@@ -434,7 +450,7 @@ static inline void io_apic_write(unsigne
|
|
#else
|
|
struct physdev_apic apic_op;
|
|
|
|
- apic_op.apic_physbase = mp_ioapics[apic].mp_apicaddr;
|
|
+ apic_op.apic_physbase = mp_ioapics[apic].apicaddr;
|
|
apic_op.reg = reg;
|
|
apic_op.value = value;
|
|
WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op));
|
|
@@ -522,7 +538,7 @@ __ioapic_write_entry(int apic, int pin,
|
|
io_apic_write(apic, 0x10 + 2*pin, eu.w1);
|
|
}
|
|
|
|
-static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
|
|
+void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
|
|
{
|
|
unsigned long flags;
|
|
spin_lock_irqsave(&ioapic_lock, flags);
|
|
@@ -558,11 +574,11 @@ static void send_cleanup_vector(struct i
|
|
for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
|
|
cfg->move_cleanup_count++;
|
|
for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
|
|
- send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
|
|
+ apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
|
|
} else {
|
|
cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
|
|
cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
|
|
- send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
|
|
+ apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
|
|
free_cpumask_var(cleanup_mask);
|
|
}
|
|
cfg->move_in_progress = 0;
|
|
@@ -583,16 +599,12 @@ static void __target_IO_APIC_irq(unsigne
|
|
|
|
apic = entry->apic;
|
|
pin = entry->pin;
|
|
-#ifdef CONFIG_INTR_REMAP
|
|
/*
|
|
* With interrupt-remapping, destination information comes
|
|
* from interrupt-remapping table entry.
|
|
*/
|
|
if (!irq_remapped(irq))
|
|
io_apic_write(apic, 0x11 + pin*2, dest);
|
|
-#else
|
|
- io_apic_write(apic, 0x11 + pin*2, dest);
|
|
-#endif
|
|
reg = io_apic_read(apic, 0x10 + pin*2);
|
|
reg &= ~IO_APIC_REDIR_VECTOR_MASK;
|
|
reg |= vector;
|
|
@@ -607,8 +619,9 @@ static int
|
|
assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
|
|
|
|
/*
|
|
- * Either sets desc->affinity to a valid value, and returns cpu_mask_to_apicid
|
|
- * of that, or returns BAD_APICID and leaves desc->affinity untouched.
|
|
+ * Either sets desc->affinity to a valid value, and returns
|
|
+ * ->cpu_mask_to_apicid of that, or returns BAD_APICID and
|
|
+ * leaves desc->affinity untouched.
|
|
*/
|
|
static unsigned int
|
|
set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
|
|
@@ -624,9 +637,12 @@ set_desc_affinity(struct irq_desc *desc,
|
|
if (assign_irq_vector(irq, cfg, mask))
|
|
return BAD_APICID;
|
|
|
|
- cpumask_and(&desc->affinity, cfg->domain, mask);
|
|
+ /* check that before desc->addinity get updated */
|
|
set_extra_move_desc(desc, mask);
|
|
- return cpu_mask_to_apicid_and(&desc->affinity, cpu_online_mask);
|
|
+
|
|
+ cpumask_copy(desc->affinity, mask);
|
|
+
|
|
+ return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
|
|
}
|
|
|
|
static void
|
|
@@ -840,23 +856,6 @@ static void clear_IO_APIC (void)
|
|
for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
|
|
clear_IO_APIC_pin(apic, pin);
|
|
}
|
|
-
|
|
-#if !defined(CONFIG_SMP) && defined(CONFIG_X86_32)
|
|
-void send_IPI_self(int vector)
|
|
-{
|
|
- unsigned int cfg;
|
|
-
|
|
- /*
|
|
- * Wait for idle.
|
|
- */
|
|
- apic_wait_icr_idle();
|
|
- cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL;
|
|
- /*
|
|
- * Send the IPI. The write to APIC_ICR fires this off.
|
|
- */
|
|
- apic_write(APIC_ICR, cfg);
|
|
-}
|
|
-#endif /* !CONFIG_SMP && CONFIG_X86_32*/
|
|
#else
|
|
#define add_pin_to_irq_cpu(cfg, cpu, apic, pin)
|
|
#endif /* CONFIG_XEN */
|
|
@@ -868,8 +867,9 @@ void send_IPI_self(int vector)
|
|
*/
|
|
|
|
#define MAX_PIRQS 8
|
|
-static int pirq_entries [MAX_PIRQS];
|
|
-static int pirqs_enabled;
|
|
+static int pirq_entries[MAX_PIRQS] = {
|
|
+ [0 ... MAX_PIRQS - 1] = -1
|
|
+};
|
|
|
|
static int __init ioapic_pirq_setup(char *str)
|
|
{
|
|
@@ -878,10 +878,6 @@ static int __init ioapic_pirq_setup(char
|
|
|
|
get_options(str, ARRAY_SIZE(ints), ints);
|
|
|
|
- for (i = 0; i < MAX_PIRQS; i++)
|
|
- pirq_entries[i] = -1;
|
|
-
|
|
- pirqs_enabled = 1;
|
|
apic_printk(APIC_VERBOSE, KERN_INFO
|
|
"PIRQ redirection, working around broken MP-BIOS.\n");
|
|
max = MAX_PIRQS;
|
|
@@ -903,75 +899,106 @@ __setup("pirq=", ioapic_pirq_setup);
|
|
#endif /* CONFIG_X86_32 */
|
|
|
|
#ifdef CONFIG_INTR_REMAP
|
|
-/* I/O APIC RTE contents at the OS boot up */
|
|
-static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
|
|
+struct IO_APIC_route_entry **alloc_ioapic_entries(void)
|
|
+{
|
|
+ int apic;
|
|
+ struct IO_APIC_route_entry **ioapic_entries;
|
|
+
|
|
+ ioapic_entries = kzalloc(sizeof(*ioapic_entries) * nr_ioapics,
|
|
+ GFP_ATOMIC);
|
|
+ if (!ioapic_entries)
|
|
+ return 0;
|
|
+
|
|
+ for (apic = 0; apic < nr_ioapics; apic++) {
|
|
+ ioapic_entries[apic] =
|
|
+ kzalloc(sizeof(struct IO_APIC_route_entry) *
|
|
+ nr_ioapic_registers[apic], GFP_ATOMIC);
|
|
+ if (!ioapic_entries[apic])
|
|
+ goto nomem;
|
|
+ }
|
|
+
|
|
+ return ioapic_entries;
|
|
+
|
|
+nomem:
|
|
+ while (--apic >= 0)
|
|
+ kfree(ioapic_entries[apic]);
|
|
+ kfree(ioapic_entries);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
|
|
/*
|
|
- * Saves and masks all the unmasked IO-APIC RTE's
|
|
+ * Saves all the IO-APIC RTE's
|
|
*/
|
|
-int save_mask_IO_APIC_setup(void)
|
|
+int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries)
|
|
{
|
|
- union IO_APIC_reg_01 reg_01;
|
|
- unsigned long flags;
|
|
int apic, pin;
|
|
|
|
- /*
|
|
- * The number of IO-APIC IRQ registers (== #pins):
|
|
- */
|
|
+ if (!ioapic_entries)
|
|
+ return -ENOMEM;
|
|
+
|
|
for (apic = 0; apic < nr_ioapics; apic++) {
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- reg_01.raw = io_apic_read(apic, 1);
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
- nr_ioapic_registers[apic] = reg_01.bits.entries+1;
|
|
+ if (!ioapic_entries[apic])
|
|
+ return -ENOMEM;
|
|
+
|
|
+ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
|
|
+ ioapic_entries[apic][pin] =
|
|
+ ioapic_read_entry(apic, pin);
|
|
}
|
|
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Mask all IO APIC entries.
|
|
+ */
|
|
+void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries)
|
|
+{
|
|
+ int apic, pin;
|
|
+
|
|
+ if (!ioapic_entries)
|
|
+ return;
|
|
+
|
|
for (apic = 0; apic < nr_ioapics; apic++) {
|
|
- early_ioapic_entries[apic] =
|
|
- kzalloc(sizeof(struct IO_APIC_route_entry) *
|
|
- nr_ioapic_registers[apic], GFP_KERNEL);
|
|
- if (!early_ioapic_entries[apic])
|
|
- goto nomem;
|
|
- }
|
|
+ if (!ioapic_entries[apic])
|
|
+ break;
|
|
|
|
- for (apic = 0; apic < nr_ioapics; apic++)
|
|
for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
|
|
struct IO_APIC_route_entry entry;
|
|
|
|
- entry = early_ioapic_entries[apic][pin] =
|
|
- ioapic_read_entry(apic, pin);
|
|
+ entry = ioapic_entries[apic][pin];
|
|
if (!entry.mask) {
|
|
entry.mask = 1;
|
|
ioapic_write_entry(apic, pin, entry);
|
|
}
|
|
}
|
|
-
|
|
- return 0;
|
|
-
|
|
-nomem:
|
|
- while (apic >= 0)
|
|
- kfree(early_ioapic_entries[apic--]);
|
|
- memset(early_ioapic_entries, 0,
|
|
- ARRAY_SIZE(early_ioapic_entries));
|
|
-
|
|
- return -ENOMEM;
|
|
+ }
|
|
}
|
|
|
|
-void restore_IO_APIC_setup(void)
|
|
+/*
|
|
+ * Restore IO APIC entries which was saved in ioapic_entries.
|
|
+ */
|
|
+int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries)
|
|
{
|
|
int apic, pin;
|
|
|
|
+ if (!ioapic_entries)
|
|
+ return -ENOMEM;
|
|
+
|
|
for (apic = 0; apic < nr_ioapics; apic++) {
|
|
- if (!early_ioapic_entries[apic])
|
|
- break;
|
|
+ if (!ioapic_entries[apic])
|
|
+ return -ENOMEM;
|
|
+
|
|
for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
|
|
ioapic_write_entry(apic, pin,
|
|
- early_ioapic_entries[apic][pin]);
|
|
- kfree(early_ioapic_entries[apic]);
|
|
- early_ioapic_entries[apic] = NULL;
|
|
+ ioapic_entries[apic][pin]);
|
|
}
|
|
+ return 0;
|
|
}
|
|
|
|
-void reinit_intr_remapped_IO_APIC(int intr_remapping)
|
|
+void reinit_intr_remapped_IO_APIC(int intr_remapping,
|
|
+ struct IO_APIC_route_entry **ioapic_entries)
|
|
+
|
|
{
|
|
/*
|
|
* for now plain restore of previous settings.
|
|
@@ -980,7 +1007,17 @@ void reinit_intr_remapped_IO_APIC(int in
|
|
* table entries. for now, do a plain restore, and wait for
|
|
* the setup_IO_APIC_irqs() to do proper initialization.
|
|
*/
|
|
- restore_IO_APIC_setup();
|
|
+ restore_IO_APIC_setup(ioapic_entries);
|
|
+}
|
|
+
|
|
+void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries)
|
|
+{
|
|
+ int apic;
|
|
+
|
|
+ for (apic = 0; apic < nr_ioapics; apic++)
|
|
+ kfree(ioapic_entries[apic]);
|
|
+
|
|
+ kfree(ioapic_entries);
|
|
}
|
|
#endif
|
|
|
|
@@ -992,10 +1029,10 @@ static int find_irq_entry(int apic, int
|
|
int i;
|
|
|
|
for (i = 0; i < mp_irq_entries; i++)
|
|
- if (mp_irqs[i].mp_irqtype == type &&
|
|
- (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid ||
|
|
- mp_irqs[i].mp_dstapic == MP_APIC_ALL) &&
|
|
- mp_irqs[i].mp_dstirq == pin)
|
|
+ if (mp_irqs[i].irqtype == type &&
|
|
+ (mp_irqs[i].dstapic == mp_ioapics[apic].apicid ||
|
|
+ mp_irqs[i].dstapic == MP_APIC_ALL) &&
|
|
+ mp_irqs[i].dstirq == pin)
|
|
return i;
|
|
|
|
return -1;
|
|
@@ -1010,13 +1047,13 @@ static int __init find_isa_irq_pin(int i
|
|
int i;
|
|
|
|
for (i = 0; i < mp_irq_entries; i++) {
|
|
- int lbus = mp_irqs[i].mp_srcbus;
|
|
+ int lbus = mp_irqs[i].srcbus;
|
|
|
|
if (test_bit(lbus, mp_bus_not_pci) &&
|
|
- (mp_irqs[i].mp_irqtype == type) &&
|
|
- (mp_irqs[i].mp_srcbusirq == irq))
|
|
+ (mp_irqs[i].irqtype == type) &&
|
|
+ (mp_irqs[i].srcbusirq == irq))
|
|
|
|
- return mp_irqs[i].mp_dstirq;
|
|
+ return mp_irqs[i].dstirq;
|
|
}
|
|
return -1;
|
|
}
|
|
@@ -1026,17 +1063,17 @@ static int __init find_isa_irq_apic(int
|
|
int i;
|
|
|
|
for (i = 0; i < mp_irq_entries; i++) {
|
|
- int lbus = mp_irqs[i].mp_srcbus;
|
|
+ int lbus = mp_irqs[i].srcbus;
|
|
|
|
if (test_bit(lbus, mp_bus_not_pci) &&
|
|
- (mp_irqs[i].mp_irqtype == type) &&
|
|
- (mp_irqs[i].mp_srcbusirq == irq))
|
|
+ (mp_irqs[i].irqtype == type) &&
|
|
+ (mp_irqs[i].srcbusirq == irq))
|
|
break;
|
|
}
|
|
if (i < mp_irq_entries) {
|
|
int apic;
|
|
for(apic = 0; apic < nr_ioapics; apic++) {
|
|
- if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic)
|
|
+ if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic)
|
|
return apic;
|
|
}
|
|
}
|
|
@@ -1062,23 +1099,23 @@ int IO_APIC_get_PCI_irq_vector(int bus,
|
|
return -1;
|
|
}
|
|
for (i = 0; i < mp_irq_entries; i++) {
|
|
- int lbus = mp_irqs[i].mp_srcbus;
|
|
+ int lbus = mp_irqs[i].srcbus;
|
|
|
|
for (apic = 0; apic < nr_ioapics; apic++)
|
|
- if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic ||
|
|
- mp_irqs[i].mp_dstapic == MP_APIC_ALL)
|
|
+ if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic ||
|
|
+ mp_irqs[i].dstapic == MP_APIC_ALL)
|
|
break;
|
|
|
|
if (!test_bit(lbus, mp_bus_not_pci) &&
|
|
- !mp_irqs[i].mp_irqtype &&
|
|
+ !mp_irqs[i].irqtype &&
|
|
(bus == lbus) &&
|
|
- (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) {
|
|
- int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq);
|
|
+ (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) {
|
|
+ int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq);
|
|
|
|
if (!(apic || IO_APIC_IRQ(irq)))
|
|
continue;
|
|
|
|
- if (pin == (mp_irqs[i].mp_srcbusirq & 3))
|
|
+ if (pin == (mp_irqs[i].srcbusirq & 3))
|
|
return irq;
|
|
/*
|
|
* Use the first all-but-pin matching entry as a
|
|
@@ -1121,7 +1158,7 @@ static int EISA_ELCR(unsigned int irq)
|
|
* EISA conforming in the MP table, that means its trigger type must
|
|
* be read in from the ELCR */
|
|
|
|
-#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq))
|
|
+#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].srcbusirq))
|
|
#define default_EISA_polarity(idx) default_ISA_polarity(idx)
|
|
|
|
/* PCI interrupts are always polarity one level triggered,
|
|
@@ -1138,13 +1175,13 @@ static int EISA_ELCR(unsigned int irq)
|
|
|
|
static int MPBIOS_polarity(int idx)
|
|
{
|
|
- int bus = mp_irqs[idx].mp_srcbus;
|
|
+ int bus = mp_irqs[idx].srcbus;
|
|
int polarity;
|
|
|
|
/*
|
|
* Determine IRQ line polarity (high active or low active):
|
|
*/
|
|
- switch (mp_irqs[idx].mp_irqflag & 3)
|
|
+ switch (mp_irqs[idx].irqflag & 3)
|
|
{
|
|
case 0: /* conforms, ie. bus-type dependent polarity */
|
|
if (test_bit(bus, mp_bus_not_pci))
|
|
@@ -1180,13 +1217,13 @@ static int MPBIOS_polarity(int idx)
|
|
|
|
static int MPBIOS_trigger(int idx)
|
|
{
|
|
- int bus = mp_irqs[idx].mp_srcbus;
|
|
+ int bus = mp_irqs[idx].srcbus;
|
|
int trigger;
|
|
|
|
/*
|
|
* Determine IRQ trigger mode (edge or level sensitive):
|
|
*/
|
|
- switch ((mp_irqs[idx].mp_irqflag>>2) & 3)
|
|
+ switch ((mp_irqs[idx].irqflag>>2) & 3)
|
|
{
|
|
case 0: /* conforms, ie. bus-type dependent */
|
|
if (test_bit(bus, mp_bus_not_pci))
|
|
@@ -1264,16 +1301,16 @@ int (*ioapic_renumber_irq)(int ioapic, i
|
|
static int pin_2_irq(int idx, int apic, int pin)
|
|
{
|
|
int irq, i;
|
|
- int bus = mp_irqs[idx].mp_srcbus;
|
|
+ int bus = mp_irqs[idx].srcbus;
|
|
|
|
/*
|
|
* Debugging check, we are in big trouble if this message pops up!
|
|
*/
|
|
- if (mp_irqs[idx].mp_dstirq != pin)
|
|
+ if (mp_irqs[idx].dstirq != pin)
|
|
printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
|
|
|
|
if (test_bit(bus, mp_bus_not_pci)) {
|
|
- irq = mp_irqs[idx].mp_srcbusirq;
|
|
+ irq = mp_irqs[idx].srcbusirq;
|
|
} else {
|
|
/*
|
|
* PCI IRQs are mapped in order
|
|
@@ -1366,7 +1403,7 @@ __assign_irq_vector(int irq, struct irq_
|
|
int new_cpu;
|
|
int vector, offset;
|
|
|
|
- vector_allocation_domain(cpu, tmp_mask);
|
|
+ apic->vector_allocation_domain(cpu, tmp_mask);
|
|
|
|
vector = current_vector;
|
|
offset = current_offset;
|
|
@@ -1476,9 +1513,7 @@ void __setup_vector_irq(int cpu)
|
|
}
|
|
|
|
static struct irq_chip ioapic_chip;
|
|
-#ifdef CONFIG_INTR_REMAP
|
|
static struct irq_chip ir_ioapic_chip;
|
|
-#endif
|
|
|
|
#define IOAPIC_AUTO -1
|
|
#define IOAPIC_EDGE 0
|
|
@@ -1517,7 +1552,6 @@ static void ioapic_register_intr(int irq
|
|
else
|
|
desc->status &= ~IRQ_LEVEL;
|
|
|
|
-#ifdef CONFIG_INTR_REMAP
|
|
if (irq_remapped(irq)) {
|
|
desc->status |= IRQ_MOVE_PCNTXT;
|
|
if (trigger)
|
|
@@ -1529,7 +1563,7 @@ static void ioapic_register_intr(int irq
|
|
handle_edge_irq, "edge");
|
|
return;
|
|
}
|
|
-#endif
|
|
+
|
|
if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
|
|
trigger == IOAPIC_LEVEL)
|
|
set_irq_chip_and_handler_name(irq, &ioapic_chip,
|
|
@@ -1544,37 +1578,44 @@ static void ioapic_register_intr(int irq
|
|
#define ioapic_register_intr(irq, desc, trigger) evtchn_register_pirq(irq)
|
|
#endif
|
|
|
|
-static int setup_ioapic_entry(int apic, int irq,
|
|
- struct IO_APIC_route_entry *entry,
|
|
- unsigned int destination, int trigger,
|
|
- int polarity, int vector)
|
|
+int setup_ioapic_entry(int apic_id, int irq,
|
|
+ struct IO_APIC_route_entry *entry,
|
|
+ unsigned int destination, int trigger,
|
|
+ int polarity, int vector, int pin)
|
|
{
|
|
/*
|
|
* add it to the IO-APIC irq-routing table:
|
|
*/
|
|
memset(entry,0,sizeof(*entry));
|
|
|
|
-#ifdef CONFIG_INTR_REMAP
|
|
if (intr_remapping_enabled) {
|
|
- struct intel_iommu *iommu = map_ioapic_to_ir(apic);
|
|
+#ifndef CONFIG_XEN
|
|
+ struct intel_iommu *iommu = map_ioapic_to_ir(apic_id);
|
|
struct irte irte;
|
|
struct IR_IO_APIC_route_entry *ir_entry =
|
|
(struct IR_IO_APIC_route_entry *) entry;
|
|
int index;
|
|
|
|
if (!iommu)
|
|
- panic("No mapping iommu for ioapic %d\n", apic);
|
|
+ panic("No mapping iommu for ioapic %d\n", apic_id);
|
|
|
|
index = alloc_irte(iommu, irq, 1);
|
|
if (index < 0)
|
|
- panic("Failed to allocate IRTE for ioapic %d\n", apic);
|
|
+ panic("Failed to allocate IRTE for ioapic %d\n", apic_id);
|
|
|
|
memset(&irte, 0, sizeof(irte));
|
|
|
|
irte.present = 1;
|
|
- irte.dst_mode = INT_DEST_MODE;
|
|
- irte.trigger_mode = trigger;
|
|
- irte.dlvry_mode = INT_DELIVERY_MODE;
|
|
+ irte.dst_mode = apic->irq_dest_mode;
|
|
+ /*
|
|
+ * Trigger mode in the IRTE will always be edge, and the
|
|
+ * actual level or edge trigger will be setup in the IO-APIC
|
|
+ * RTE. This will help simplify level triggered irq migration.
|
|
+ * For more details, see the comments above explainig IO-APIC
|
|
+ * irq migration in the presence of interrupt-remapping.
|
|
+ */
|
|
+ irte.trigger_mode = 0;
|
|
+ irte.dlvry_mode = apic->irq_delivery_mode;
|
|
irte.vector = vector;
|
|
irte.dest_id = IRTE_DEST(destination);
|
|
|
|
@@ -1584,18 +1625,22 @@ static int setup_ioapic_entry(int apic,
|
|
ir_entry->zero = 0;
|
|
ir_entry->format = 1;
|
|
ir_entry->index = (index & 0x7fff);
|
|
- } else
|
|
+ /*
|
|
+ * IO-APIC RTE will be configured with virtual vector.
|
|
+ * irq handler will do the explicit EOI to the io-apic.
|
|
+ */
|
|
+ ir_entry->vector = pin;
|
|
#endif
|
|
- {
|
|
- entry->delivery_mode = INT_DELIVERY_MODE;
|
|
- entry->dest_mode = INT_DEST_MODE;
|
|
+ } else {
|
|
+ entry->delivery_mode = apic->irq_delivery_mode;
|
|
+ entry->dest_mode = apic->irq_dest_mode;
|
|
entry->dest = destination;
|
|
+ entry->vector = vector;
|
|
}
|
|
|
|
entry->mask = 0; /* enable IRQ */
|
|
entry->trigger = trigger;
|
|
entry->polarity = polarity;
|
|
- entry->vector = vector;
|
|
|
|
/* Mask level triggered irqs.
|
|
* Use IRQ_DELAYED_DISABLE for edge triggered irqs.
|
|
@@ -1605,7 +1650,7 @@ static int setup_ioapic_entry(int apic,
|
|
return 0;
|
|
}
|
|
|
|
-static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_desc *desc,
|
|
+static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq_desc *desc,
|
|
int trigger, int polarity)
|
|
{
|
|
struct irq_cfg *cfg;
|
|
@@ -1617,26 +1662,26 @@ static void setup_IO_APIC_irq(int apic,
|
|
|
|
cfg = desc->chip_data;
|
|
|
|
- if (assign_irq_vector(irq, cfg, TARGET_CPUS))
|
|
+ if (assign_irq_vector(irq, cfg, apic->target_cpus()))
|
|
return;
|
|
|
|
#ifndef CONFIG_XEN
|
|
- dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
|
|
+ dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
|
|
#else
|
|
- dest = cpu_mask_to_apicid(TARGET_CPUS);
|
|
+ dest = 0; /* meaningless */
|
|
#endif
|
|
|
|
apic_printk(APIC_VERBOSE,KERN_DEBUG
|
|
"IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
|
|
"IRQ %d Mode:%i Active:%i)\n",
|
|
- apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
|
|
+ apic_id, mp_ioapics[apic_id].apicid, pin, cfg->vector,
|
|
irq, trigger, polarity);
|
|
|
|
|
|
- if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
|
|
- dest, trigger, polarity, cfg->vector)) {
|
|
+ if (setup_ioapic_entry(mp_ioapics[apic_id].apicid, irq, &entry,
|
|
+ dest, trigger, polarity, cfg->vector, pin)) {
|
|
printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
|
|
- mp_ioapics[apic].mp_apicid, pin);
|
|
+ mp_ioapics[apic_id].apicid, pin);
|
|
__clear_irq_vector(irq, cfg);
|
|
return;
|
|
}
|
|
@@ -1645,12 +1690,12 @@ static void setup_IO_APIC_irq(int apic,
|
|
if (irq < NR_IRQS_LEGACY)
|
|
disable_8259A_irq(irq);
|
|
|
|
- ioapic_write_entry(apic, pin, entry);
|
|
+ ioapic_write_entry(apic_id, pin, entry);
|
|
}
|
|
|
|
static void __init setup_IO_APIC_irqs(void)
|
|
{
|
|
- int apic, pin, idx, irq;
|
|
+ int apic_id, pin, idx, irq;
|
|
int notcon = 0;
|
|
struct irq_desc *desc;
|
|
struct irq_cfg *cfg;
|
|
@@ -1658,21 +1703,19 @@ static void __init setup_IO_APIC_irqs(vo
|
|
|
|
apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
|
|
|
|
- for (apic = 0; apic < nr_ioapics; apic++) {
|
|
- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
|
|
+ for (apic_id = 0; apic_id < nr_ioapics; apic_id++) {
|
|
+ for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) {
|
|
|
|
- idx = find_irq_entry(apic, pin, mp_INT);
|
|
+ idx = find_irq_entry(apic_id, pin, mp_INT);
|
|
if (idx == -1) {
|
|
if (!notcon) {
|
|
notcon = 1;
|
|
apic_printk(APIC_VERBOSE,
|
|
KERN_DEBUG " %d-%d",
|
|
- mp_ioapics[apic].mp_apicid,
|
|
- pin);
|
|
+ mp_ioapics[apic_id].apicid, pin);
|
|
} else
|
|
apic_printk(APIC_VERBOSE, " %d-%d",
|
|
- mp_ioapics[apic].mp_apicid,
|
|
- pin);
|
|
+ mp_ioapics[apic_id].apicid, pin);
|
|
continue;
|
|
}
|
|
if (notcon) {
|
|
@@ -1681,23 +1724,30 @@ static void __init setup_IO_APIC_irqs(vo
|
|
notcon = 0;
|
|
}
|
|
|
|
- irq = pin_2_irq(idx, apic, pin);
|
|
-#if defined(CONFIG_XEN)
|
|
- if (irq < PIRQ_BASE || irq >= PIRQ_BASE + NR_PIRQS)
|
|
+ irq = pin_2_irq(idx, apic_id, pin);
|
|
+
|
|
+#ifdef CONFIG_XEN
|
|
+ if (irq < PIRQ_BASE || irq >= PIRQ_BASE + nr_pirqs)
|
|
continue;
|
|
-#elif defined(CONFIG_X86_32)
|
|
- if (multi_timer_check(apic, irq))
|
|
+#else
|
|
+ /*
|
|
+ * Skip the timer IRQ if there's a quirk handler
|
|
+ * installed and if it returns 1:
|
|
+ */
|
|
+ if (apic->multi_timer_check &&
|
|
+ apic->multi_timer_check(apic_id, irq))
|
|
continue;
|
|
#endif
|
|
+
|
|
desc = irq_to_desc_alloc_cpu(irq, cpu);
|
|
if (!desc) {
|
|
printk(KERN_INFO "can not get irq_desc for %d\n", irq);
|
|
continue;
|
|
}
|
|
cfg = desc->chip_data;
|
|
- add_pin_to_irq_cpu(cfg, cpu, apic, pin);
|
|
+ add_pin_to_irq_cpu(cfg, cpu, apic_id, pin);
|
|
|
|
- setup_IO_APIC_irq(apic, pin, irq, desc,
|
|
+ setup_IO_APIC_irq(apic_id, pin, irq, desc,
|
|
irq_trigger(idx), irq_polarity(idx));
|
|
}
|
|
}
|
|
@@ -1711,15 +1761,13 @@ static void __init setup_IO_APIC_irqs(vo
|
|
/*
|
|
* Set up the timer pin, possibly with the 8259A-master behind.
|
|
*/
|
|
-static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
|
|
+static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin,
|
|
int vector)
|
|
{
|
|
struct IO_APIC_route_entry entry;
|
|
|
|
-#ifdef CONFIG_INTR_REMAP
|
|
if (intr_remapping_enabled)
|
|
return;
|
|
-#endif
|
|
|
|
memset(&entry, 0, sizeof(entry));
|
|
|
|
@@ -1727,10 +1775,10 @@ static void __init setup_timer_IRQ0_pin(
|
|
* We use logical delivery to get the timer IRQ
|
|
* to the first CPU.
|
|
*/
|
|
- entry.dest_mode = INT_DEST_MODE;
|
|
- entry.mask = 1; /* mask IRQ now */
|
|
- entry.dest = cpu_mask_to_apicid(TARGET_CPUS);
|
|
- entry.delivery_mode = INT_DELIVERY_MODE;
|
|
+ entry.dest_mode = apic->irq_dest_mode;
|
|
+ entry.mask = 0; /* don't mask IRQ for edge */
|
|
+ entry.dest = apic->cpu_mask_to_apicid(apic->target_cpus());
|
|
+ entry.delivery_mode = apic->irq_delivery_mode;
|
|
entry.polarity = 0;
|
|
entry.trigger = 0;
|
|
entry.vector = vector;
|
|
@@ -1744,7 +1792,7 @@ static void __init setup_timer_IRQ0_pin(
|
|
/*
|
|
* Add it to the IO-APIC irq-routing table:
|
|
*/
|
|
- ioapic_write_entry(apic, pin, entry);
|
|
+ ioapic_write_entry(apic_id, pin, entry);
|
|
}
|
|
|
|
|
|
@@ -1766,7 +1814,7 @@ __apicdebuginit(void) print_IO_APIC(void
|
|
printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
|
|
for (i = 0; i < nr_ioapics; i++)
|
|
printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
|
|
- mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]);
|
|
+ mp_ioapics[i].apicid, nr_ioapic_registers[i]);
|
|
|
|
/*
|
|
* We are a bit conservative about what we expect. We have to
|
|
@@ -1786,7 +1834,7 @@ __apicdebuginit(void) print_IO_APIC(void
|
|
spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
|
|
printk("\n");
|
|
- printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid);
|
|
+ printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid);
|
|
printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
|
|
printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
|
|
printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type);
|
|
@@ -2050,13 +2098,6 @@ void __init enable_IO_APIC(void)
|
|
int apic;
|
|
unsigned long flags;
|
|
|
|
-#ifdef CONFIG_X86_32
|
|
- int i;
|
|
- if (!pirqs_enabled)
|
|
- for (i = 0; i < MAX_PIRQS; i++)
|
|
- pirq_entries[i] = -1;
|
|
-#endif
|
|
-
|
|
/*
|
|
* The number of IO-APIC IRQ registers (== #pins):
|
|
*/
|
|
@@ -2129,8 +2170,13 @@ void disable_IO_APIC(void)
|
|
* If the i8259 is routed through an IOAPIC
|
|
* Put that IOAPIC in virtual wire mode
|
|
* so legacy interrupts can be delivered.
|
|
+ *
|
|
+ * With interrupt-remapping, for now we will use virtual wire A mode,
|
|
+ * as virtual wire B is little complex (need to configure both
|
|
+ * IOAPIC RTE aswell as interrupt-remapping table entry).
|
|
+ * As this gets called during crash dump, keep this simple for now.
|
|
*/
|
|
- if (ioapic_i8259.pin != -1) {
|
|
+ if (ioapic_i8259.pin != -1 && !intr_remapping_enabled) {
|
|
struct IO_APIC_route_entry entry;
|
|
|
|
memset(&entry, 0, sizeof(entry));
|
|
@@ -2150,7 +2196,10 @@ void disable_IO_APIC(void)
|
|
ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
|
|
}
|
|
|
|
- disconnect_bsp_APIC(ioapic_i8259.pin != -1);
|
|
+ /*
|
|
+ * Use virtual wire A mode when interrupt remapping is enabled.
|
|
+ */
|
|
+ disconnect_bsp_APIC(!intr_remapping_enabled && ioapic_i8259.pin != -1);
|
|
}
|
|
|
|
#ifdef CONFIG_X86_32
|
|
@@ -2165,7 +2214,7 @@ static void __init setup_ioapic_ids_from
|
|
{
|
|
union IO_APIC_reg_00 reg_00;
|
|
physid_mask_t phys_id_present_map;
|
|
- int apic;
|
|
+ int apic_id;
|
|
int i;
|
|
unsigned char old_id;
|
|
unsigned long flags;
|
|
@@ -2184,26 +2233,26 @@ static void __init setup_ioapic_ids_from
|
|
* This is broken; anything with a real cpu count has to
|
|
* circumvent this idiocy regardless.
|
|
*/
|
|
- phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map);
|
|
+ phys_id_present_map = apic->ioapic_phys_id_map(phys_cpu_present_map);
|
|
|
|
/*
|
|
* Set the IOAPIC ID to the value stored in the MPC table.
|
|
*/
|
|
- for (apic = 0; apic < nr_ioapics; apic++) {
|
|
+ for (apic_id = 0; apic_id < nr_ioapics; apic_id++) {
|
|
|
|
/* Read the register 0 value */
|
|
spin_lock_irqsave(&ioapic_lock, flags);
|
|
- reg_00.raw = io_apic_read(apic, 0);
|
|
+ reg_00.raw = io_apic_read(apic_id, 0);
|
|
spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
|
|
- old_id = mp_ioapics[apic].mp_apicid;
|
|
+ old_id = mp_ioapics[apic_id].apicid;
|
|
|
|
- if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) {
|
|
+ if (mp_ioapics[apic_id].apicid >= get_physical_broadcast()) {
|
|
printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
|
|
- apic, mp_ioapics[apic].mp_apicid);
|
|
+ apic_id, mp_ioapics[apic_id].apicid);
|
|
printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
|
|
reg_00.bits.ID);
|
|
- mp_ioapics[apic].mp_apicid = reg_00.bits.ID;
|
|
+ mp_ioapics[apic_id].apicid = reg_00.bits.ID;
|
|
}
|
|
|
|
/*
|
|
@@ -2211,10 +2260,10 @@ static void __init setup_ioapic_ids_from
|
|
* system must have a unique ID or we get lots of nice
|
|
* 'stuck on smp_invalidate_needed IPI wait' messages.
|
|
*/
|
|
- if (check_apicid_used(phys_id_present_map,
|
|
- mp_ioapics[apic].mp_apicid)) {
|
|
+ if (apic->check_apicid_used(phys_id_present_map,
|
|
+ mp_ioapics[apic_id].apicid)) {
|
|
printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
|
|
- apic, mp_ioapics[apic].mp_apicid);
|
|
+ apic_id, mp_ioapics[apic_id].apicid);
|
|
for (i = 0; i < get_physical_broadcast(); i++)
|
|
if (!physid_isset(i, phys_id_present_map))
|
|
break;
|
|
@@ -2223,13 +2272,13 @@ static void __init setup_ioapic_ids_from
|
|
printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
|
|
i);
|
|
physid_set(i, phys_id_present_map);
|
|
- mp_ioapics[apic].mp_apicid = i;
|
|
+ mp_ioapics[apic_id].apicid = i;
|
|
} else {
|
|
physid_mask_t tmp;
|
|
- tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid);
|
|
+ tmp = apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid);
|
|
apic_printk(APIC_VERBOSE, "Setting %d in the "
|
|
"phys_id_present_map\n",
|
|
- mp_ioapics[apic].mp_apicid);
|
|
+ mp_ioapics[apic_id].apicid);
|
|
physids_or(phys_id_present_map, phys_id_present_map, tmp);
|
|
}
|
|
|
|
@@ -2238,11 +2287,11 @@ static void __init setup_ioapic_ids_from
|
|
* We need to adjust the IRQ routing table
|
|
* if the ID changed.
|
|
*/
|
|
- if (old_id != mp_ioapics[apic].mp_apicid)
|
|
+ if (old_id != mp_ioapics[apic_id].apicid)
|
|
for (i = 0; i < mp_irq_entries; i++)
|
|
- if (mp_irqs[i].mp_dstapic == old_id)
|
|
- mp_irqs[i].mp_dstapic
|
|
- = mp_ioapics[apic].mp_apicid;
|
|
+ if (mp_irqs[i].dstapic == old_id)
|
|
+ mp_irqs[i].dstapic
|
|
+ = mp_ioapics[apic_id].apicid;
|
|
|
|
/*
|
|
* Read the right value from the MPC table and
|
|
@@ -2250,20 +2299,20 @@ static void __init setup_ioapic_ids_from
|
|
*/
|
|
apic_printk(APIC_VERBOSE, KERN_INFO
|
|
"...changing IO-APIC physical APIC ID to %d ...",
|
|
- mp_ioapics[apic].mp_apicid);
|
|
+ mp_ioapics[apic_id].apicid);
|
|
|
|
- reg_00.bits.ID = mp_ioapics[apic].mp_apicid;
|
|
+ reg_00.bits.ID = mp_ioapics[apic_id].apicid;
|
|
spin_lock_irqsave(&ioapic_lock, flags);
|
|
- io_apic_write(apic, 0, reg_00.raw);
|
|
+ io_apic_write(apic_id, 0, reg_00.raw);
|
|
spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
|
|
/*
|
|
* Sanity check
|
|
*/
|
|
spin_lock_irqsave(&ioapic_lock, flags);
|
|
- reg_00.raw = io_apic_read(apic, 0);
|
|
+ reg_00.raw = io_apic_read(apic_id, 0);
|
|
spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
- if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid)
|
|
+ if (reg_00.bits.ID != mp_ioapics[apic_id].apicid)
|
|
printk("could not set ID!\n");
|
|
else
|
|
apic_printk(APIC_VERBOSE, " ok.\n");
|
|
@@ -2366,7 +2415,7 @@ static int ioapic_retrigger_irq(unsigned
|
|
unsigned long flags;
|
|
|
|
spin_lock_irqsave(&vector_lock, flags);
|
|
- send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector);
|
|
+ apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector);
|
|
spin_unlock_irqrestore(&vector_lock, flags);
|
|
|
|
return 1;
|
|
@@ -2374,7 +2423,7 @@ static int ioapic_retrigger_irq(unsigned
|
|
#else
|
|
static int ioapic_retrigger_irq(unsigned int irq)
|
|
{
|
|
- send_IPI_self(irq_cfg(irq)->vector);
|
|
+ apic->send_IPI_self(irq_cfg(irq)->vector);
|
|
|
|
return 1;
|
|
}
|
|
@@ -2392,37 +2441,24 @@ static int ioapic_retrigger_irq(unsigned
|
|
#ifdef CONFIG_SMP
|
|
|
|
#ifdef CONFIG_INTR_REMAP
|
|
-static void ir_irq_migration(struct work_struct *work);
|
|
-
|
|
-static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
|
|
|
|
/*
|
|
* Migrate the IO-APIC irq in the presence of intr-remapping.
|
|
*
|
|
- * For edge triggered, irq migration is a simple atomic update(of vector
|
|
- * and cpu destination) of IRTE and flush the hardware cache.
|
|
- *
|
|
- * For level triggered, we need to modify the io-apic RTE aswell with the update
|
|
- * vector information, along with modifying IRTE with vector and destination.
|
|
- * So irq migration for level triggered is little bit more complex compared to
|
|
- * edge triggered migration. But the good news is, we use the same algorithm
|
|
- * for level triggered migration as we have today, only difference being,
|
|
- * we now initiate the irq migration from process context instead of the
|
|
- * interrupt context.
|
|
+ * For both level and edge triggered, irq migration is a simple atomic
|
|
+ * update(of vector and cpu destination) of IRTE and flush the hardware cache.
|
|
*
|
|
- * In future, when we do a directed EOI (combined with cpu EOI broadcast
|
|
- * suppression) to the IO-APIC, level triggered irq migration will also be
|
|
- * as simple as edge triggered migration and we can do the irq migration
|
|
- * with a simple atomic update to IO-APIC RTE.
|
|
+ * For level triggered, we eliminate the io-apic RTE modification (with the
|
|
+ * updated vector information), by using a virtual vector (io-apic pin number).
|
|
+ * Real vector that is used for interrupting cpu will be coming from
|
|
+ * the interrupt-remapping table entry.
|
|
*/
|
|
static void
|
|
migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
|
|
{
|
|
struct irq_cfg *cfg;
|
|
struct irte irte;
|
|
- int modify_ioapic_rte;
|
|
unsigned int dest;
|
|
- unsigned long flags;
|
|
unsigned int irq;
|
|
|
|
if (!cpumask_intersects(mask, cpu_online_mask))
|
|
@@ -2438,14 +2474,7 @@ migrate_ioapic_irq_desc(struct irq_desc
|
|
|
|
set_extra_move_desc(desc, mask);
|
|
|
|
- dest = cpu_mask_to_apicid_and(cfg->domain, mask);
|
|
-
|
|
- modify_ioapic_rte = desc->status & IRQ_LEVEL;
|
|
- if (modify_ioapic_rte) {
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- __target_IO_APIC_irq(irq, dest, cfg);
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
- }
|
|
+ dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
|
|
|
|
irte.vector = cfg->vector;
|
|
irte.dest_id = IRTE_DEST(dest);
|
|
@@ -2458,61 +2487,7 @@ migrate_ioapic_irq_desc(struct irq_desc
|
|
if (cfg->move_in_progress)
|
|
send_cleanup_vector(cfg);
|
|
|
|
- cpumask_copy(&desc->affinity, mask);
|
|
-}
|
|
-
|
|
-static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
|
|
-{
|
|
- int ret = -1;
|
|
- struct irq_cfg *cfg = desc->chip_data;
|
|
-
|
|
- mask_IO_APIC_irq_desc(desc);
|
|
-
|
|
- if (io_apic_level_ack_pending(cfg)) {
|
|
- /*
|
|
- * Interrupt in progress. Migrating irq now will change the
|
|
- * vector information in the IO-APIC RTE and that will confuse
|
|
- * the EOI broadcast performed by cpu.
|
|
- * So, delay the irq migration to the next instance.
|
|
- */
|
|
- schedule_delayed_work(&ir_migration_work, 1);
|
|
- goto unmask;
|
|
- }
|
|
-
|
|
- /* everthing is clear. we have right of way */
|
|
- migrate_ioapic_irq_desc(desc, &desc->pending_mask);
|
|
-
|
|
- ret = 0;
|
|
- desc->status &= ~IRQ_MOVE_PENDING;
|
|
- cpumask_clear(&desc->pending_mask);
|
|
-
|
|
-unmask:
|
|
- unmask_IO_APIC_irq_desc(desc);
|
|
-
|
|
- return ret;
|
|
-}
|
|
-
|
|
-static void ir_irq_migration(struct work_struct *work)
|
|
-{
|
|
- unsigned int irq;
|
|
- struct irq_desc *desc;
|
|
-
|
|
- for_each_irq_desc(irq, desc) {
|
|
- if (desc->status & IRQ_MOVE_PENDING) {
|
|
- unsigned long flags;
|
|
-
|
|
- spin_lock_irqsave(&desc->lock, flags);
|
|
- if (!desc->chip->set_affinity ||
|
|
- !(desc->status & IRQ_MOVE_PENDING)) {
|
|
- desc->status &= ~IRQ_MOVE_PENDING;
|
|
- spin_unlock_irqrestore(&desc->lock, flags);
|
|
- continue;
|
|
- }
|
|
-
|
|
- desc->chip->set_affinity(irq, &desc->pending_mask);
|
|
- spin_unlock_irqrestore(&desc->lock, flags);
|
|
- }
|
|
- }
|
|
+ cpumask_copy(desc->affinity, mask);
|
|
}
|
|
|
|
/*
|
|
@@ -2521,13 +2496,6 @@ static void ir_irq_migration(struct work
|
|
static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
|
|
const struct cpumask *mask)
|
|
{
|
|
- if (desc->status & IRQ_LEVEL) {
|
|
- desc->status |= IRQ_MOVE_PENDING;
|
|
- cpumask_copy(&desc->pending_mask, mask);
|
|
- migrate_irq_remapped_level_desc(desc);
|
|
- return;
|
|
- }
|
|
-
|
|
migrate_ioapic_irq_desc(desc, mask);
|
|
}
|
|
static void set_ir_ioapic_affinity_irq(unsigned int irq,
|
|
@@ -2537,6 +2505,11 @@ static void set_ir_ioapic_affinity_irq(u
|
|
|
|
set_ir_ioapic_affinity_irq_desc(desc, mask);
|
|
}
|
|
+#else
|
|
+static inline void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
|
|
+ const struct cpumask *mask)
|
|
+{
|
|
+}
|
|
#endif
|
|
|
|
asmlinkage void smp_irq_move_cleanup_interrupt(void)
|
|
@@ -2550,6 +2523,7 @@ asmlinkage void smp_irq_move_cleanup_int
|
|
me = smp_processor_id();
|
|
for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
|
|
unsigned int irq;
|
|
+ unsigned int irr;
|
|
struct irq_desc *desc;
|
|
struct irq_cfg *cfg;
|
|
irq = __get_cpu_var(vector_irq)[vector];
|
|
@@ -2569,6 +2543,18 @@ asmlinkage void smp_irq_move_cleanup_int
|
|
if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
|
|
goto unlock;
|
|
|
|
+ irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
|
|
+ /*
|
|
+ * Check if the vector that needs to be cleanedup is
|
|
+ * registered at the cpu's IRR. If so, then this is not
|
|
+ * the best time to clean it up. Lets clean it up in the
|
|
+ * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR
|
|
+ * to myself.
|
|
+ */
|
|
+ if (irr & (1 << (vector % 32))) {
|
|
+ apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
|
|
+ goto unlock;
|
|
+ }
|
|
__get_cpu_var(vector_irq)[vector] = -1;
|
|
cfg->move_cleanup_count--;
|
|
unlock:
|
|
@@ -2591,7 +2577,7 @@ static void irq_complete_move(struct irq
|
|
|
|
/* domain has not changed, but affinity did */
|
|
me = smp_processor_id();
|
|
- if (cpu_isset(me, desc->affinity)) {
|
|
+ if (cpumask_test_cpu(me, desc->affinity)) {
|
|
*descp = desc = move_irq_desc(desc, me);
|
|
/* get the new one */
|
|
cfg = desc->chip_data;
|
|
@@ -2617,17 +2603,51 @@ static void irq_complete_move(struct irq
|
|
static inline void irq_complete_move(struct irq_desc **descp) {}
|
|
#endif
|
|
|
|
-#ifdef CONFIG_INTR_REMAP
|
|
+static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
|
|
+{
|
|
+ int apic, pin;
|
|
+ struct irq_pin_list *entry;
|
|
+
|
|
+ entry = cfg->irq_2_pin;
|
|
+ for (;;) {
|
|
+
|
|
+ if (!entry)
|
|
+ break;
|
|
+
|
|
+ apic = entry->apic;
|
|
+ pin = entry->pin;
|
|
+ io_apic_eoi(apic, pin);
|
|
+ entry = entry->next;
|
|
+ }
|
|
+}
|
|
+
|
|
+static void
|
|
+eoi_ioapic_irq(struct irq_desc *desc)
|
|
+{
|
|
+ struct irq_cfg *cfg;
|
|
+ unsigned long flags;
|
|
+ unsigned int irq;
|
|
+
|
|
+ irq = desc->irq;
|
|
+ cfg = desc->chip_data;
|
|
+
|
|
+ spin_lock_irqsave(&ioapic_lock, flags);
|
|
+ __eoi_ioapic_irq(irq, cfg);
|
|
+ spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+}
|
|
+
|
|
+#ifdef CONFIG_X86_X2APIC
|
|
static void ack_x2apic_level(unsigned int irq)
|
|
{
|
|
+ struct irq_desc *desc = irq_to_desc(irq);
|
|
ack_x2APIC_irq();
|
|
+ eoi_ioapic_irq(desc);
|
|
}
|
|
|
|
static void ack_x2apic_edge(unsigned int irq)
|
|
{
|
|
ack_x2APIC_irq();
|
|
}
|
|
-
|
|
#endif
|
|
|
|
static void ack_apic_edge(unsigned int irq)
|
|
@@ -2693,6 +2713,9 @@ static void ack_apic_level(unsigned int
|
|
*/
|
|
ack_APIC_irq();
|
|
|
|
+ if (irq_remapped(irq))
|
|
+ eoi_ioapic_irq(desc);
|
|
+
|
|
/* Now we can move and renable the irq */
|
|
if (unlikely(do_unmask_irq)) {
|
|
/* Only migrate the irq if the ack has been received.
|
|
@@ -2738,6 +2761,26 @@ static void ack_apic_level(unsigned int
|
|
#endif
|
|
}
|
|
|
|
+#ifdef CONFIG_INTR_REMAP
|
|
+static void ir_ack_apic_edge(unsigned int irq)
|
|
+{
|
|
+#ifdef CONFIG_X86_X2APIC
|
|
+ if (x2apic_enabled())
|
|
+ return ack_x2apic_edge(irq);
|
|
+#endif
|
|
+ return ack_apic_edge(irq);
|
|
+}
|
|
+
|
|
+static void ir_ack_apic_level(unsigned int irq)
|
|
+{
|
|
+#ifdef CONFIG_X86_X2APIC
|
|
+ if (x2apic_enabled())
|
|
+ return ack_x2apic_level(irq);
|
|
+#endif
|
|
+ return ack_apic_level(irq);
|
|
+}
|
|
+#endif /* CONFIG_INTR_REMAP */
|
|
+
|
|
static struct irq_chip ioapic_chip __read_mostly = {
|
|
.name = "IO-APIC",
|
|
.startup = startup_ioapic_irq,
|
|
@@ -2751,20 +2794,20 @@ static struct irq_chip ioapic_chip __rea
|
|
.retrigger = ioapic_retrigger_irq,
|
|
};
|
|
|
|
-#ifdef CONFIG_INTR_REMAP
|
|
static struct irq_chip ir_ioapic_chip __read_mostly = {
|
|
.name = "IR-IO-APIC",
|
|
.startup = startup_ioapic_irq,
|
|
.mask = mask_IO_APIC_irq,
|
|
.unmask = unmask_IO_APIC_irq,
|
|
- .ack = ack_x2apic_edge,
|
|
- .eoi = ack_x2apic_level,
|
|
+#ifdef CONFIG_INTR_REMAP
|
|
+ .ack = ir_ack_apic_edge,
|
|
+ .eoi = ir_ack_apic_level,
|
|
#ifdef CONFIG_SMP
|
|
.set_affinity = set_ir_ioapic_affinity_irq,
|
|
#endif
|
|
+#endif
|
|
.retrigger = ioapic_retrigger_irq,
|
|
};
|
|
-#endif
|
|
#endif /* CONFIG_XEN */
|
|
|
|
static inline void init_IO_APIC_traps(void)
|
|
@@ -2786,7 +2829,7 @@ static inline void init_IO_APIC_traps(vo
|
|
*/
|
|
for_each_irq_desc(irq, desc) {
|
|
#ifdef CONFIG_XEN
|
|
- if (irq < PIRQ_BASE || irq >= PIRQ_BASE + NR_PIRQS)
|
|
+ if (irq < PIRQ_BASE || irq >= PIRQ_BASE + nr_pirqs)
|
|
continue;
|
|
#endif
|
|
cfg = desc->chip_data;
|
|
@@ -2948,19 +2991,15 @@ static inline void __init check_timer(vo
|
|
int cpu = boot_cpu_id;
|
|
int apic1, pin1, apic2, pin2;
|
|
unsigned long flags;
|
|
- unsigned int ver;
|
|
int no_pin1 = 0;
|
|
|
|
local_irq_save(flags);
|
|
|
|
- ver = apic_read(APIC_LVR);
|
|
- ver = GET_APIC_VERSION(ver);
|
|
-
|
|
/*
|
|
* get/set the timer IRQ vector:
|
|
*/
|
|
disable_8259A_irq(0);
|
|
- assign_irq_vector(0, cfg, TARGET_CPUS);
|
|
+ assign_irq_vector(0, cfg, apic->target_cpus());
|
|
|
|
/*
|
|
* As IRQ0 is to be enabled in the 8259A, the virtual
|
|
@@ -2974,7 +3013,13 @@ static inline void __init check_timer(vo
|
|
apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
|
|
init_8259A(1);
|
|
#ifdef CONFIG_X86_32
|
|
- timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
|
|
+ {
|
|
+ unsigned int ver;
|
|
+
|
|
+ ver = apic_read(APIC_LVR);
|
|
+ ver = GET_APIC_VERSION(ver);
|
|
+ timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
|
|
+ }
|
|
#endif
|
|
|
|
pin1 = find_isa_irq_pin(0, mp_INT);
|
|
@@ -2994,10 +3039,8 @@ static inline void __init check_timer(vo
|
|
* 8259A.
|
|
*/
|
|
if (pin1 == -1) {
|
|
-#ifdef CONFIG_INTR_REMAP
|
|
if (intr_remapping_enabled)
|
|
panic("BIOS bug: timer not connected to IO-APIC");
|
|
-#endif
|
|
pin1 = pin2;
|
|
apic1 = apic2;
|
|
no_pin1 = 1;
|
|
@@ -3013,8 +3056,17 @@ static inline void __init check_timer(vo
|
|
if (no_pin1) {
|
|
add_pin_to_irq_cpu(cfg, cpu, apic1, pin1);
|
|
setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
|
|
+ } else {
|
|
+ /* for edge trigger, setup_IO_APIC_irq already
|
|
+ * leave it unmasked.
|
|
+ * so only need to unmask if it is level-trigger
|
|
+ * do we really have level trigger timer?
|
|
+ */
|
|
+ int idx;
|
|
+ idx = find_irq_entry(apic1, pin1, mp_INT);
|
|
+ if (idx != -1 && irq_trigger(idx))
|
|
+ unmask_IO_APIC_irq_desc(desc);
|
|
}
|
|
- unmask_IO_APIC_irq_desc(desc);
|
|
if (timer_irq_works()) {
|
|
if (nmi_watchdog == NMI_IO_APIC) {
|
|
setup_nmi();
|
|
@@ -3024,10 +3076,9 @@ static inline void __init check_timer(vo
|
|
clear_IO_APIC_pin(0, pin1);
|
|
goto out;
|
|
}
|
|
-#ifdef CONFIG_INTR_REMAP
|
|
if (intr_remapping_enabled)
|
|
panic("timer doesn't work through Interrupt-remapped IO-APIC");
|
|
-#endif
|
|
+ local_irq_disable();
|
|
clear_IO_APIC_pin(apic1, pin1);
|
|
if (!no_pin1)
|
|
apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
|
|
@@ -3042,7 +3093,6 @@ static inline void __init check_timer(vo
|
|
*/
|
|
replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2);
|
|
setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
|
|
- unmask_IO_APIC_irq_desc(desc);
|
|
enable_8259A_irq(0);
|
|
if (timer_irq_works()) {
|
|
apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
|
|
@@ -3057,6 +3107,7 @@ static inline void __init check_timer(vo
|
|
/*
|
|
* Cleanup, just in case ...
|
|
*/
|
|
+ local_irq_disable();
|
|
disable_8259A_irq(0);
|
|
clear_IO_APIC_pin(apic2, pin2);
|
|
apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
|
|
@@ -3082,6 +3133,7 @@ static inline void __init check_timer(vo
|
|
apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
|
|
goto out;
|
|
}
|
|
+ local_irq_disable();
|
|
disable_8259A_irq(0);
|
|
apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
|
|
apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
|
|
@@ -3099,6 +3151,7 @@ static inline void __init check_timer(vo
|
|
apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
|
|
goto out;
|
|
}
|
|
+ local_irq_disable();
|
|
apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
|
|
panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a "
|
|
"report. Then try booting with the 'noapic' option.\n");
|
|
@@ -3131,7 +3184,7 @@ out:
|
|
void __init setup_IO_APIC(void)
|
|
{
|
|
|
|
-#if defined(CONFIG_X86_32) || defined(CONFIG_XEN)
|
|
+#ifdef CONFIG_XEN
|
|
enable_IO_APIC();
|
|
#else
|
|
/*
|
|
@@ -3213,8 +3266,8 @@ static int ioapic_resume(struct sys_devi
|
|
|
|
spin_lock_irqsave(&ioapic_lock, flags);
|
|
reg_00.raw = io_apic_read(dev->id, 0);
|
|
- if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) {
|
|
- reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid;
|
|
+ if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) {
|
|
+ reg_00.bits.ID = mp_ioapics[dev->id].apicid;
|
|
io_apic_write(dev->id, 0, reg_00.raw);
|
|
}
|
|
spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
@@ -3264,6 +3317,7 @@ static int __init ioapic_init_sysfs(void
|
|
|
|
device_initcall(ioapic_init_sysfs);
|
|
|
|
+static int nr_irqs_gsi = NR_IRQS_LEGACY;
|
|
/*
|
|
* Dynamic irq allocate and deallocation
|
|
*/
|
|
@@ -3278,11 +3332,11 @@ unsigned int create_irq_nr(unsigned int
|
|
struct irq_desc *desc_new = NULL;
|
|
|
|
irq = 0;
|
|
- spin_lock_irqsave(&vector_lock, flags);
|
|
- for (new = irq_want; new < NR_IRQS; new++) {
|
|
- if (platform_legacy_irq(new))
|
|
- continue;
|
|
+ if (irq_want < nr_irqs_gsi)
|
|
+ irq_want = nr_irqs_gsi;
|
|
|
|
+ spin_lock_irqsave(&vector_lock, flags);
|
|
+ for (new = irq_want; new < nr_irqs; new++) {
|
|
desc_new = irq_to_desc_alloc_cpu(new, cpu);
|
|
if (!desc_new) {
|
|
printk(KERN_INFO "can not get irq_desc for %d\n", new);
|
|
@@ -3292,7 +3346,7 @@ unsigned int create_irq_nr(unsigned int
|
|
|
|
if (cfg_new->vector != 0)
|
|
continue;
|
|
- if (__assign_irq_vector(new, cfg_new, TARGET_CPUS) == 0)
|
|
+ if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0)
|
|
irq = new;
|
|
break;
|
|
}
|
|
@@ -3307,7 +3361,6 @@ unsigned int create_irq_nr(unsigned int
|
|
return irq;
|
|
}
|
|
|
|
-static int nr_irqs_gsi = NR_IRQS_LEGACY;
|
|
int create_irq(void)
|
|
{
|
|
unsigned int irq_want;
|
|
@@ -3336,9 +3389,7 @@ void destroy_irq(unsigned int irq)
|
|
if (desc)
|
|
desc->chip_data = cfg;
|
|
|
|
-#ifdef CONFIG_INTR_REMAP
|
|
free_irte(irq);
|
|
-#endif
|
|
spin_lock_irqsave(&vector_lock, flags);
|
|
__clear_irq_vector(irq, cfg);
|
|
spin_unlock_irqrestore(&vector_lock, flags);
|
|
@@ -3355,14 +3406,16 @@ static int msi_compose_msg(struct pci_de
|
|
int err;
|
|
unsigned dest;
|
|
|
|
+ if (disable_apic)
|
|
+ return -ENXIO;
|
|
+
|
|
cfg = irq_cfg(irq);
|
|
- err = assign_irq_vector(irq, cfg, TARGET_CPUS);
|
|
+ err = assign_irq_vector(irq, cfg, apic->target_cpus());
|
|
if (err)
|
|
return err;
|
|
|
|
- dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
|
|
+ dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
|
|
|
|
-#ifdef CONFIG_INTR_REMAP
|
|
if (irq_remapped(irq)) {
|
|
struct irte irte;
|
|
int ir_index;
|
|
@@ -3374,9 +3427,9 @@ static int msi_compose_msg(struct pci_de
|
|
memset (&irte, 0, sizeof(irte));
|
|
|
|
irte.present = 1;
|
|
- irte.dst_mode = INT_DEST_MODE;
|
|
+ irte.dst_mode = apic->irq_dest_mode;
|
|
irte.trigger_mode = 0; /* edge */
|
|
- irte.dlvry_mode = INT_DELIVERY_MODE;
|
|
+ irte.dlvry_mode = apic->irq_delivery_mode;
|
|
irte.vector = cfg->vector;
|
|
irte.dest_id = IRTE_DEST(dest);
|
|
|
|
@@ -3388,16 +3441,19 @@ static int msi_compose_msg(struct pci_de
|
|
MSI_ADDR_IR_SHV |
|
|
MSI_ADDR_IR_INDEX1(ir_index) |
|
|
MSI_ADDR_IR_INDEX2(ir_index);
|
|
- } else
|
|
-#endif
|
|
- {
|
|
- msg->address_hi = MSI_ADDR_BASE_HI;
|
|
+ } else {
|
|
+ if (x2apic_enabled())
|
|
+ msg->address_hi = MSI_ADDR_BASE_HI |
|
|
+ MSI_ADDR_EXT_DEST_ID(dest);
|
|
+ else
|
|
+ msg->address_hi = MSI_ADDR_BASE_HI;
|
|
+
|
|
msg->address_lo =
|
|
MSI_ADDR_BASE_LO |
|
|
- ((INT_DEST_MODE == 0) ?
|
|
+ ((apic->irq_dest_mode == 0) ?
|
|
MSI_ADDR_DEST_MODE_PHYSICAL:
|
|
MSI_ADDR_DEST_MODE_LOGICAL) |
|
|
- ((INT_DELIVERY_MODE != dest_LowestPrio) ?
|
|
+ ((apic->irq_delivery_mode != dest_LowestPrio) ?
|
|
MSI_ADDR_REDIRECTION_CPU:
|
|
MSI_ADDR_REDIRECTION_LOWPRI) |
|
|
MSI_ADDR_DEST_ID(dest);
|
|
@@ -3405,7 +3461,7 @@ static int msi_compose_msg(struct pci_de
|
|
msg->data =
|
|
MSI_DATA_TRIGGER_EDGE |
|
|
MSI_DATA_LEVEL_ASSERT |
|
|
- ((INT_DELIVERY_MODE != dest_LowestPrio) ?
|
|
+ ((apic->irq_delivery_mode != dest_LowestPrio) ?
|
|
MSI_DATA_DELIVERY_FIXED:
|
|
MSI_DATA_DELIVERY_LOWPRI) |
|
|
MSI_DATA_VECTOR(cfg->vector);
|
|
@@ -3491,15 +3547,16 @@ static struct irq_chip msi_chip = {
|
|
.retrigger = ioapic_retrigger_irq,
|
|
};
|
|
|
|
-#ifdef CONFIG_INTR_REMAP
|
|
static struct irq_chip msi_ir_chip = {
|
|
.name = "IR-PCI-MSI",
|
|
.unmask = unmask_msi_irq,
|
|
.mask = mask_msi_irq,
|
|
- .ack = ack_x2apic_edge,
|
|
+#ifdef CONFIG_INTR_REMAP
|
|
+ .ack = ir_ack_apic_edge,
|
|
#ifdef CONFIG_SMP
|
|
.set_affinity = ir_set_msi_irq_affinity,
|
|
#endif
|
|
+#endif
|
|
.retrigger = ioapic_retrigger_irq,
|
|
};
|
|
|
|
@@ -3529,7 +3586,6 @@ static int msi_alloc_irte(struct pci_dev
|
|
}
|
|
return index;
|
|
}
|
|
-#endif
|
|
|
|
static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
|
|
{
|
|
@@ -3543,7 +3599,6 @@ static int setup_msi_irq(struct pci_dev
|
|
set_irq_msi(irq, msidesc);
|
|
write_msi_msg(irq, &msg);
|
|
|
|
-#ifdef CONFIG_INTR_REMAP
|
|
if (irq_remapped(irq)) {
|
|
struct irq_desc *desc = irq_to_desc(irq);
|
|
/*
|
|
@@ -3552,7 +3607,6 @@ static int setup_msi_irq(struct pci_dev
|
|
desc->status |= IRQ_MOVE_PCNTXT;
|
|
set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
|
|
} else
|
|
-#endif
|
|
set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
|
|
|
|
dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq);
|
|
@@ -3560,60 +3614,26 @@ static int setup_msi_irq(struct pci_dev
|
|
return 0;
|
|
}
|
|
|
|
-int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc)
|
|
-{
|
|
- unsigned int irq;
|
|
- int ret;
|
|
- unsigned int irq_want;
|
|
-
|
|
- irq_want = nr_irqs_gsi;
|
|
- irq = create_irq_nr(irq_want);
|
|
- if (irq == 0)
|
|
- return -1;
|
|
-
|
|
-#ifdef CONFIG_INTR_REMAP
|
|
- if (!intr_remapping_enabled)
|
|
- goto no_ir;
|
|
-
|
|
- ret = msi_alloc_irte(dev, irq, 1);
|
|
- if (ret < 0)
|
|
- goto error;
|
|
-no_ir:
|
|
-#endif
|
|
- ret = setup_msi_irq(dev, msidesc, irq);
|
|
- if (ret < 0) {
|
|
- destroy_irq(irq);
|
|
- return ret;
|
|
- }
|
|
- return 0;
|
|
-
|
|
-#ifdef CONFIG_INTR_REMAP
|
|
-error:
|
|
- destroy_irq(irq);
|
|
- return ret;
|
|
-#endif
|
|
-}
|
|
-
|
|
int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
|
|
{
|
|
unsigned int irq;
|
|
int ret, sub_handle;
|
|
struct msi_desc *msidesc;
|
|
unsigned int irq_want;
|
|
-
|
|
-#ifdef CONFIG_INTR_REMAP
|
|
- struct intel_iommu *iommu = 0;
|
|
+ struct intel_iommu *iommu = NULL;
|
|
int index = 0;
|
|
-#endif
|
|
+
|
|
+ /* x86 doesn't support multiple MSI yet */
|
|
+ if (type == PCI_CAP_ID_MSI && nvec > 1)
|
|
+ return 1;
|
|
|
|
irq_want = nr_irqs_gsi;
|
|
sub_handle = 0;
|
|
list_for_each_entry(msidesc, &dev->msi_list, list) {
|
|
irq = create_irq_nr(irq_want);
|
|
- irq_want++;
|
|
if (irq == 0)
|
|
return -1;
|
|
-#ifdef CONFIG_INTR_REMAP
|
|
+ irq_want = irq + 1;
|
|
if (!intr_remapping_enabled)
|
|
goto no_ir;
|
|
|
|
@@ -3641,7 +3661,6 @@ int arch_setup_msi_irqs(struct pci_dev *
|
|
set_irte_irq(irq, iommu, index, sub_handle);
|
|
}
|
|
no_ir:
|
|
-#endif
|
|
ret = setup_msi_irq(dev, msidesc, irq);
|
|
if (ret < 0)
|
|
goto error;
|
|
@@ -3659,7 +3678,7 @@ void arch_teardown_msi_irq(unsigned int
|
|
destroy_irq(irq);
|
|
}
|
|
|
|
-#ifdef CONFIG_DMAR
|
|
+#if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP)
|
|
#ifdef CONFIG_SMP
|
|
static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask)
|
|
{
|
|
@@ -3740,7 +3759,7 @@ static void hpet_msi_set_affinity(unsign
|
|
|
|
#endif /* CONFIG_SMP */
|
|
|
|
-struct irq_chip hpet_msi_type = {
|
|
+static struct irq_chip hpet_msi_type = {
|
|
.name = "HPET_MSI",
|
|
.unmask = hpet_msi_unmask,
|
|
.mask = hpet_msi_mask,
|
|
@@ -3755,12 +3774,14 @@ int arch_setup_hpet_msi(unsigned int irq
|
|
{
|
|
int ret;
|
|
struct msi_msg msg;
|
|
+ struct irq_desc *desc = irq_to_desc(irq);
|
|
|
|
ret = msi_compose_msg(NULL, irq, &msg);
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
hpet_msi_write(irq, &msg);
|
|
+ desc->status |= IRQ_MOVE_PCNTXT;
|
|
set_irq_chip_and_handler_name(irq, &hpet_msi_type, handle_edge_irq,
|
|
"edge");
|
|
|
|
@@ -3823,13 +3844,17 @@ int arch_setup_ht_irq(unsigned int irq,
|
|
struct irq_cfg *cfg;
|
|
int err;
|
|
|
|
+ if (disable_apic)
|
|
+ return -ENXIO;
|
|
+
|
|
cfg = irq_cfg(irq);
|
|
- err = assign_irq_vector(irq, cfg, TARGET_CPUS);
|
|
+ err = assign_irq_vector(irq, cfg, apic->target_cpus());
|
|
if (!err) {
|
|
struct ht_irq_msg msg;
|
|
unsigned dest;
|
|
|
|
- dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS);
|
|
+ dest = apic->cpu_mask_to_apicid_and(cfg->domain,
|
|
+ apic->target_cpus());
|
|
|
|
msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
|
|
|
|
@@ -3837,11 +3862,11 @@ int arch_setup_ht_irq(unsigned int irq,
|
|
HT_IRQ_LOW_BASE |
|
|
HT_IRQ_LOW_DEST_ID(dest) |
|
|
HT_IRQ_LOW_VECTOR(cfg->vector) |
|
|
- ((INT_DEST_MODE == 0) ?
|
|
+ ((apic->irq_dest_mode == 0) ?
|
|
HT_IRQ_LOW_DM_PHYSICAL :
|
|
HT_IRQ_LOW_DM_LOGICAL) |
|
|
HT_IRQ_LOW_RQEOI_EDGE |
|
|
- ((INT_DELIVERY_MODE != dest_LowestPrio) ?
|
|
+ ((apic->irq_delivery_mode != dest_LowestPrio) ?
|
|
HT_IRQ_LOW_MT_FIXED :
|
|
HT_IRQ_LOW_MT_ARBITRATED) |
|
|
HT_IRQ_LOW_IRQ_MASKED;
|
|
@@ -3857,7 +3882,7 @@ int arch_setup_ht_irq(unsigned int irq,
|
|
}
|
|
#endif /* CONFIG_HT_IRQ */
|
|
|
|
-#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
|
|
+#ifdef CONFIG_X86_UV
|
|
/*
|
|
* Re-target the irq to the specified CPU and enable the specified MMR located
|
|
* on the specified blade to allow the sending of MSIs to the specified CPU.
|
|
@@ -3889,12 +3914,12 @@ int arch_enable_uv_irq(char *irq_name, u
|
|
BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
|
|
|
|
entry->vector = cfg->vector;
|
|
- entry->delivery_mode = INT_DELIVERY_MODE;
|
|
- entry->dest_mode = INT_DEST_MODE;
|
|
+ entry->delivery_mode = apic->irq_delivery_mode;
|
|
+ entry->dest_mode = apic->irq_dest_mode;
|
|
entry->polarity = 0;
|
|
entry->trigger = 0;
|
|
entry->mask = 0;
|
|
- entry->dest = cpu_mask_to_apicid(eligible_cpu);
|
|
+ entry->dest = apic->cpu_mask_to_apicid(eligible_cpu);
|
|
|
|
mmr_pnode = uv_blade_to_pnode(mmr_blade);
|
|
uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
|
|
@@ -3957,7 +3982,29 @@ void __init probe_nr_irqs_gsi(void)
|
|
|
|
printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi);
|
|
}
|
|
+
|
|
+#ifdef CONFIG_SPARSE_IRQ
|
|
+int __init arch_probe_nr_irqs(void)
|
|
+{
|
|
+ int nr;
|
|
+
|
|
+ if (nr_irqs > (NR_VECTORS * nr_cpu_ids))
|
|
+ nr_irqs = NR_VECTORS * nr_cpu_ids;
|
|
+
|
|
+ nr = nr_irqs_gsi + 8 * nr_cpu_ids;
|
|
+#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ)
|
|
+ /*
|
|
+ * for MSI and HT dyn irq
|
|
+ */
|
|
+ nr += nr_irqs_gsi * 16;
|
|
#endif
|
|
+ if (nr < nr_irqs)
|
|
+ nr_irqs = nr;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+#endif
|
|
+#endif /* CONFIG_XEN */
|
|
|
|
/* --------------------------------------------------------------------------
|
|
ACPI-based IOAPIC Configuration
|
|
@@ -3985,7 +4032,7 @@ int __init io_apic_get_unique_id(int ioa
|
|
*/
|
|
|
|
if (physids_empty(apic_id_map))
|
|
- apic_id_map = ioapic_phys_id_map(phys_cpu_present_map);
|
|
+ apic_id_map = apic->ioapic_phys_id_map(phys_cpu_present_map);
|
|
|
|
spin_lock_irqsave(&ioapic_lock, flags);
|
|
reg_00.raw = io_apic_read(ioapic, 0);
|
|
@@ -4001,10 +4048,10 @@ int __init io_apic_get_unique_id(int ioa
|
|
* Every APIC in a system must have a unique ID or we get lots of nice
|
|
* 'stuck on smp_invalidate_needed IPI wait' messages.
|
|
*/
|
|
- if (check_apicid_used(apic_id_map, apic_id)) {
|
|
+ if (apic->check_apicid_used(apic_id_map, apic_id)) {
|
|
|
|
for (i = 0; i < get_physical_broadcast(); i++) {
|
|
- if (!check_apicid_used(apic_id_map, i))
|
|
+ if (!apic->check_apicid_used(apic_id_map, i))
|
|
break;
|
|
}
|
|
|
|
@@ -4017,7 +4064,7 @@ int __init io_apic_get_unique_id(int ioa
|
|
apic_id = i;
|
|
}
|
|
|
|
- tmp = apicid_to_cpu_present(apic_id);
|
|
+ tmp = apic->apicid_to_cpu_present(apic_id);
|
|
physids_or(apic_id_map, apic_id_map, tmp);
|
|
|
|
if (reg_00.bits.ID != apic_id) {
|
|
@@ -4062,7 +4109,7 @@ int io_apic_set_pci_routing (int ioapic,
|
|
int cpu = boot_cpu_id;
|
|
|
|
#ifdef CONFIG_XEN
|
|
- if (irq < PIRQ_BASE || irq >= PIRQ_BASE + NR_PIRQS) {
|
|
+ if (irq < PIRQ_BASE || irq >= PIRQ_BASE + nr_pirqs) {
|
|
apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ %d\n",
|
|
ioapic, irq);
|
|
return -EINVAL;
|
|
@@ -4103,8 +4150,8 @@ int acpi_get_override_irq(int bus_irq, i
|
|
return -1;
|
|
|
|
for (i = 0; i < mp_irq_entries; i++)
|
|
- if (mp_irqs[i].mp_irqtype == mp_INT &&
|
|
- mp_irqs[i].mp_srcbusirq == bus_irq)
|
|
+ if (mp_irqs[i].irqtype == mp_INT &&
|
|
+ mp_irqs[i].srcbusirq == bus_irq)
|
|
break;
|
|
if (i >= mp_irq_entries)
|
|
return -1;
|
|
@@ -4120,7 +4167,7 @@ int acpi_get_override_irq(int bus_irq, i
|
|
/*
|
|
* This function currently is only a helper for the i386 smp boot process where
|
|
* we need to reprogram the ioredtbls to cater for the cpus which have come online
|
|
- * so mask in all cases should simply be TARGET_CPUS
|
|
+ * so mask in all cases should simply be apic->target_cpus()
|
|
*/
|
|
#ifdef CONFIG_SMP
|
|
void __init setup_ioapic_dest(void)
|
|
@@ -4159,15 +4206,13 @@ void __init setup_ioapic_dest(void)
|
|
*/
|
|
if (desc->status &
|
|
(IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
|
|
- mask = &desc->affinity;
|
|
+ mask = desc->affinity;
|
|
else
|
|
- mask = TARGET_CPUS;
|
|
+ mask = apic->target_cpus();
|
|
|
|
-#ifdef CONFIG_INTR_REMAP
|
|
if (intr_remapping_enabled)
|
|
set_ir_ioapic_affinity_irq_desc(desc, mask);
|
|
else
|
|
-#endif
|
|
set_ioapic_affinity_irq_desc(desc, mask);
|
|
}
|
|
|
|
@@ -4220,7 +4265,7 @@ void __init ioapic_init_mappings(void)
|
|
ioapic_res = ioapic_setup_resources();
|
|
for (i = 0; i < nr_ioapics; i++) {
|
|
if (smp_found_config) {
|
|
- ioapic_phys = mp_ioapics[i].mp_apicaddr;
|
|
+ ioapic_phys = mp_ioapics[i].apicaddr;
|
|
#ifdef CONFIG_X86_32
|
|
if (!ioapic_phys) {
|
|
printk(KERN_ERR
|
|
@@ -4260,9 +4305,12 @@ static int __init ioapic_insert_resource
|
|
struct resource *r = ioapic_resources;
|
|
|
|
if (!r) {
|
|
- printk(KERN_ERR
|
|
- "IO APIC resources could be not be allocated.\n");
|
|
- return -1;
|
|
+ if (nr_ioapics > 0) {
|
|
+ printk(KERN_ERR
|
|
+ "IO APIC resources couldn't be allocated.\n");
|
|
+ return -1;
|
|
+ }
|
|
+ return 0;
|
|
}
|
|
|
|
for (i = 0; i < nr_ioapics; i++) {
|
|
--- head-2010-05-25.orig/arch/x86/kernel/apic/ipi-xen.c 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/apic/ipi-xen.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -17,38 +17,8 @@
|
|
#include <asm/mmu_context.h>
|
|
#include <asm/apic.h>
|
|
#include <asm/proto.h>
|
|
+#include <asm/ipi.h>
|
|
|
|
-#ifdef CONFIG_X86_32
|
|
-#ifndef CONFIG_XEN
|
|
-#include <mach_apic.h>
|
|
-#include <mach_ipi.h>
|
|
-
|
|
-/*
|
|
- * the following functions deal with sending IPIs between CPUs.
|
|
- *
|
|
- * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
|
|
- */
|
|
-
|
|
-static inline int __prepare_ICR(unsigned int shortcut, int vector)
|
|
-{
|
|
- unsigned int icr = shortcut | APIC_DEST_LOGICAL;
|
|
-
|
|
- switch (vector) {
|
|
- default:
|
|
- icr |= APIC_DM_FIXED | vector;
|
|
- break;
|
|
- case NMI_VECTOR:
|
|
- icr |= APIC_DM_NMI;
|
|
- break;
|
|
- }
|
|
- return icr;
|
|
-}
|
|
-
|
|
-static inline int __prepare_ICR2(unsigned int mask)
|
|
-{
|
|
- return SET_APIC_DEST_FIELD(mask);
|
|
-}
|
|
-#else
|
|
#include <xen/evtchn.h>
|
|
|
|
DECLARE_PER_CPU(int, ipi_to_irq[NR_IPIS]);
|
|
@@ -59,36 +29,10 @@ static inline void __send_IPI_one(unsign
|
|
BUG_ON(irq < 0);
|
|
notify_remote_via_irq(irq);
|
|
}
|
|
-#endif
|
|
|
|
-void __send_IPI_shortcut(unsigned int shortcut, int vector)
|
|
+static void __send_IPI_shortcut(unsigned int shortcut, int vector)
|
|
{
|
|
-#ifndef CONFIG_XEN
|
|
- /*
|
|
- * Subtle. In the case of the 'never do double writes' workaround
|
|
- * we have to lock out interrupts to be safe. As we don't care
|
|
- * of the value read we use an atomic rmw access to avoid costly
|
|
- * cli/sti. Otherwise we use an even cheaper single atomic write
|
|
- * to the APIC.
|
|
- */
|
|
- unsigned int cfg;
|
|
-
|
|
- /*
|
|
- * Wait for idle.
|
|
- */
|
|
- apic_wait_icr_idle();
|
|
-
|
|
- /*
|
|
- * No need to touch the target chip field
|
|
- */
|
|
- cfg = __prepare_ICR(shortcut, vector);
|
|
-
|
|
- /*
|
|
- * Send the IPI. The write to APIC_ICR fires this off.
|
|
- */
|
|
- apic_write(APIC_ICR, cfg);
|
|
-#else
|
|
- int cpu;
|
|
+ unsigned int cpu;
|
|
|
|
switch (shortcut) {
|
|
case APIC_DEST_SELF:
|
|
@@ -99,149 +43,53 @@ void __send_IPI_shortcut(unsigned int sh
|
|
if (cpu != smp_processor_id())
|
|
__send_IPI_one(cpu, vector);
|
|
break;
|
|
+ case APIC_DEST_ALLINC:
|
|
+ for_each_online_cpu(cpu)
|
|
+ __send_IPI_one(cpu, vector);
|
|
+ break;
|
|
default:
|
|
printk("XXXXXX __send_IPI_shortcut %08x vector %d\n", shortcut,
|
|
vector);
|
|
break;
|
|
}
|
|
-#endif
|
|
}
|
|
|
|
-void send_IPI_self(int vector)
|
|
+void xen_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector)
|
|
{
|
|
- __send_IPI_shortcut(APIC_DEST_SELF, vector);
|
|
-}
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
-/*
|
|
- * This is used to send an IPI with no shorthand notation (the destination is
|
|
- * specified in bits 56 to 63 of the ICR).
|
|
- */
|
|
-static inline void __send_IPI_dest_field(unsigned long mask, int vector)
|
|
-{
|
|
- unsigned long cfg;
|
|
-
|
|
- /*
|
|
- * Wait for idle.
|
|
- */
|
|
- if (unlikely(vector == NMI_VECTOR))
|
|
- safe_apic_wait_icr_idle();
|
|
- else
|
|
- apic_wait_icr_idle();
|
|
-
|
|
- /*
|
|
- * prepare target chip field
|
|
- */
|
|
- cfg = __prepare_ICR2(mask);
|
|
- apic_write(APIC_ICR2, cfg);
|
|
-
|
|
- /*
|
|
- * program the ICR
|
|
- */
|
|
- cfg = __prepare_ICR(0, vector);
|
|
-
|
|
- /*
|
|
- * Send the IPI. The write to APIC_ICR fires this off.
|
|
- */
|
|
- apic_write(APIC_ICR, cfg);
|
|
-}
|
|
-#endif
|
|
-
|
|
-/*
|
|
- * This is only used on smaller machines.
|
|
- */
|
|
-void send_IPI_mask_bitmask(const struct cpumask *cpumask, int vector)
|
|
-{
|
|
-#ifndef CONFIG_XEN
|
|
- unsigned long mask = cpumask_bits(cpumask)[0];
|
|
-#else
|
|
unsigned int cpu;
|
|
-#endif
|
|
unsigned long flags;
|
|
|
|
local_irq_save(flags);
|
|
-#ifndef CONFIG_XEN
|
|
- WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]);
|
|
- __send_IPI_dest_field(mask, vector);
|
|
-#else
|
|
WARN_ON(!cpumask_subset(cpumask, cpu_online_mask));
|
|
for_each_cpu_and(cpu, cpumask, cpu_online_mask)
|
|
- __send_IPI_one(cpu, vector);
|
|
-#endif
|
|
+ if (cpu != smp_processor_id())
|
|
+ __send_IPI_one(cpu, vector);
|
|
local_irq_restore(flags);
|
|
}
|
|
|
|
-void send_IPI_mask_sequence(const struct cpumask *mask, int vector)
|
|
+void xen_send_IPI_mask(const struct cpumask *cpumask, int vector)
|
|
{
|
|
-#ifndef CONFIG_XEN
|
|
+ unsigned int cpu;
|
|
unsigned long flags;
|
|
- unsigned int query_cpu;
|
|
-
|
|
- /*
|
|
- * Hack. The clustered APIC addressing mode doesn't allow us to send
|
|
- * to an arbitrary mask, so I do a unicasts to each CPU instead. This
|
|
- * should be modified to do 1 message per cluster ID - mbligh
|
|
- */
|
|
|
|
local_irq_save(flags);
|
|
- for_each_cpu(query_cpu, mask)
|
|
- __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), vector);
|
|
+ WARN_ON(!cpumask_subset(cpumask, cpu_online_mask));
|
|
+ for_each_cpu_and(cpu, cpumask, cpu_online_mask)
|
|
+ __send_IPI_one(cpu, vector);
|
|
local_irq_restore(flags);
|
|
-#else
|
|
- send_IPI_mask_bitmask(mask, vector);
|
|
-#endif
|
|
}
|
|
|
|
-void send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
|
|
+void xen_send_IPI_allbutself(int vector)
|
|
{
|
|
- unsigned long flags;
|
|
- unsigned int query_cpu;
|
|
- unsigned int this_cpu = smp_processor_id();
|
|
-
|
|
- /* See Hack comment above */
|
|
-
|
|
- local_irq_save(flags);
|
|
-#ifndef CONFIG_XEN
|
|
- for_each_cpu(query_cpu, mask)
|
|
- if (query_cpu != this_cpu)
|
|
- __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu),
|
|
- vector);
|
|
-#else
|
|
- WARN_ON(!cpumask_subset(mask, cpu_online_mask));
|
|
- for_each_cpu_and(query_cpu, mask, cpu_online_mask)
|
|
- if (query_cpu != this_cpu)
|
|
- __send_IPI_one(query_cpu, vector);
|
|
-#endif
|
|
- local_irq_restore(flags);
|
|
+ __send_IPI_shortcut(APIC_DEST_ALLBUT, vector);
|
|
}
|
|
|
|
-#ifndef CONFIG_XEN
|
|
-/* must come after the send_IPI functions above for inlining */
|
|
-static int convert_apicid_to_cpu(int apic_id)
|
|
-{
|
|
- int i;
|
|
-
|
|
- for_each_possible_cpu(i) {
|
|
- if (per_cpu(x86_cpu_to_apicid, i) == apic_id)
|
|
- return i;
|
|
- }
|
|
- return -1;
|
|
+void xen_send_IPI_all(int vector)
|
|
+{
|
|
+ __send_IPI_shortcut(APIC_DEST_ALLINC, vector);
|
|
}
|
|
|
|
-int safe_smp_processor_id(void)
|
|
+void xen_send_IPI_self(int vector)
|
|
{
|
|
- int apicid, cpuid;
|
|
-
|
|
- if (!boot_cpu_has(X86_FEATURE_APIC))
|
|
- return 0;
|
|
-
|
|
- apicid = hard_smp_processor_id();
|
|
- if (apicid == BAD_APICID)
|
|
- return 0;
|
|
-
|
|
- cpuid = convert_apicid_to_cpu(apicid);
|
|
-
|
|
- return cpuid >= 0 ? cpuid : 0;
|
|
+ __send_IPI_shortcut(APIC_DEST_SELF, vector);
|
|
}
|
|
-#endif
|
|
-#endif
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ head-2010-05-25/arch/x86/kernel/apic/probe_32-xen.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -0,0 +1,69 @@
|
|
+/*
|
|
+ * Default generic APIC driver. This handles up to 8 CPUs.
|
|
+ *
|
|
+ * Copyright 2003 Andi Kleen, SuSE Labs.
|
|
+ * Subject to the GNU Public License, v.2
|
|
+ *
|
|
+ * Generic x86 APIC driver probe layer.
|
|
+ */
|
|
+#include <linux/threads.h>
|
|
+#include <linux/cpumask.h>
|
|
+#include <linux/module.h>
|
|
+#include <linux/string.h>
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/ctype.h>
|
|
+#include <linux/init.h>
|
|
+#include <linux/errno.h>
|
|
+#include <asm/fixmap.h>
|
|
+#include <asm/mpspec.h>
|
|
+#include <asm/apicdef.h>
|
|
+#include <asm/apic.h>
|
|
+#include <asm/setup.h>
|
|
+
|
|
+#include <linux/threads.h>
|
|
+#include <linux/cpumask.h>
|
|
+#include <asm/mpspec.h>
|
|
+#include <asm/fixmap.h>
|
|
+#include <asm/apicdef.h>
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/string.h>
|
|
+#include <linux/smp.h>
|
|
+#include <linux/init.h>
|
|
+#include <asm/ipi.h>
|
|
+
|
|
+#include <linux/smp.h>
|
|
+#include <linux/init.h>
|
|
+#include <linux/interrupt.h>
|
|
+#include <asm/acpi.h>
|
|
+#include <asm/e820.h>
|
|
+#include <asm/setup.h>
|
|
+
|
|
+static int xen_phys_pkg_id(int cpuid_apic, int index_msb)
|
|
+{
|
|
+ return cpuid_apic;
|
|
+}
|
|
+
|
|
+static struct apic apic_xen = {
|
|
+
|
|
+ .name = "default",
|
|
+
|
|
+ .irq_delivery_mode = dest_LowestPrio,
|
|
+ /* logical delivery broadcast to all CPUs: */
|
|
+ .irq_dest_mode = 1,
|
|
+
|
|
+ .target_cpus = default_target_cpus,
|
|
+
|
|
+ .phys_pkg_id = xen_phys_pkg_id,
|
|
+ .mps_oem_check = NULL,
|
|
+
|
|
+#ifdef CONFIG_SMP
|
|
+ .send_IPI_mask = xen_send_IPI_mask,
|
|
+ .send_IPI_mask_allbutself = xen_send_IPI_mask_allbutself,
|
|
+ .send_IPI_allbutself = xen_send_IPI_allbutself,
|
|
+ .send_IPI_all = xen_send_IPI_all,
|
|
+ .send_IPI_self = xen_send_IPI_self,
|
|
+#endif
|
|
+};
|
|
+
|
|
+struct apic *apic = &apic_xen;
|
|
+EXPORT_SYMBOL_GPL(apic);
|
|
--- head-2010-05-25.orig/arch/x86/kernel/asm-offsets_32.c 2010-03-24 15:10:37.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/asm-offsets_32.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -115,6 +115,11 @@ void foo(void)
|
|
|
|
OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
|
|
|
|
+#ifdef CONFIG_XEN
|
|
+ BLANK();
|
|
+ OFFSET(XEN_START_mfn_list, start_info, mfn_list);
|
|
+#endif
|
|
+
|
|
#ifdef CONFIG_PARAVIRT
|
|
BLANK();
|
|
OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
|
|
--- head-2010-05-25.orig/arch/x86/kernel/cpu/common-xen.c 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/cpu/common-xen.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -1,101 +1,94 @@
|
|
-#include <linux/init.h>
|
|
-#include <linux/kernel.h>
|
|
-#include <linux/sched.h>
|
|
-#include <linux/string.h>
|
|
#include <linux/bootmem.h>
|
|
+#include <linux/linkage.h>
|
|
#include <linux/bitops.h>
|
|
+#include <linux/kernel.h>
|
|
#include <linux/module.h>
|
|
-#include <linux/kgdb.h>
|
|
-#include <linux/topology.h>
|
|
+#include <linux/percpu.h>
|
|
+#include <linux/string.h>
|
|
#include <linux/delay.h>
|
|
+#include <linux/sched.h>
|
|
+#include <linux/init.h>
|
|
+#include <linux/kgdb.h>
|
|
#include <linux/smp.h>
|
|
-#include <linux/percpu.h>
|
|
-#include <asm/i387.h>
|
|
-#include <asm/msr.h>
|
|
-#include <asm/io.h>
|
|
-#include <asm/linkage.h>
|
|
+#include <linux/io.h>
|
|
+
|
|
+#include <asm/stackprotector.h>
|
|
#include <asm/mmu_context.h>
|
|
+#include <asm/hypervisor.h>
|
|
+#include <asm/processor.h>
|
|
+#include <asm/sections.h>
|
|
+#include <asm/topology.h>
|
|
+#include <asm/cpumask.h>
|
|
+#include <asm/pgtable.h>
|
|
+#include <asm/atomic.h>
|
|
+#include <asm/proto.h>
|
|
+#include <asm/setup.h>
|
|
+#include <asm/apic.h>
|
|
+#include <asm/desc.h>
|
|
+#include <asm/i387.h>
|
|
#include <asm/mtrr.h>
|
|
+#include <asm/numa.h>
|
|
+#include <asm/asm.h>
|
|
+#include <asm/cpu.h>
|
|
#include <asm/mce.h>
|
|
+#include <asm/msr.h>
|
|
#include <asm/pat.h>
|
|
-#include <asm/asm.h>
|
|
-#include <asm/numa.h>
|
|
#include <asm/smp.h>
|
|
+
|
|
#ifdef CONFIG_X86_LOCAL_APIC
|
|
-#include <asm/mpspec.h>
|
|
-#include <asm/apic.h>
|
|
-#include <mach_apic.h>
|
|
-#include <asm/genapic.h>
|
|
-#elif defined(CONFIG_X86_64_XEN)
|
|
-#include <mach_apic.h>
|
|
+#include <asm/uv/uv.h>
|
|
#endif
|
|
|
|
-#include <asm/pda.h>
|
|
-#include <asm/pgtable.h>
|
|
-#include <asm/processor.h>
|
|
-#include <asm/desc.h>
|
|
-#include <asm/atomic.h>
|
|
-#include <asm/proto.h>
|
|
-#include <asm/sections.h>
|
|
-#include <asm/setup.h>
|
|
-#include <asm/hypervisor.h>
|
|
-
|
|
#ifdef CONFIG_XEN
|
|
-#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_LOCAL_APIC)
|
|
-#define phys_pkg_id(a,b) a
|
|
-#endif
|
|
#include <xen/interface/callback.h>
|
|
#endif
|
|
|
|
#include "cpu.h"
|
|
|
|
-#ifdef CONFIG_X86_64
|
|
-
|
|
/* all of these masks are initialized in setup_cpu_local_masks() */
|
|
+cpumask_var_t cpu_initialized_mask;
|
|
#ifndef CONFIG_XEN
|
|
-cpumask_var_t cpu_callin_mask;
|
|
cpumask_var_t cpu_callout_mask;
|
|
+cpumask_var_t cpu_callin_mask;
|
|
#endif
|
|
-cpumask_var_t cpu_initialized_mask;
|
|
|
|
/* representing cpus for which sibling maps can be computed */
|
|
cpumask_var_t cpu_sibling_setup_mask;
|
|
|
|
-#else /* CONFIG_X86_32 */
|
|
-
|
|
+/* correctly size the local cpu masks */
|
|
+void __init setup_cpu_local_masks(void)
|
|
+{
|
|
+ alloc_bootmem_cpumask_var(&cpu_initialized_mask);
|
|
#ifndef CONFIG_XEN
|
|
-cpumask_t cpu_callin_map;
|
|
-cpumask_t cpu_callout_map;
|
|
+ alloc_bootmem_cpumask_var(&cpu_callin_mask);
|
|
+ alloc_bootmem_cpumask_var(&cpu_callout_mask);
|
|
#endif
|
|
-cpumask_t cpu_initialized;
|
|
-cpumask_t cpu_sibling_setup_map;
|
|
-
|
|
-#endif /* CONFIG_X86_32 */
|
|
-
|
|
+ alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
|
|
+}
|
|
|
|
-static struct cpu_dev *this_cpu __cpuinitdata;
|
|
+static const struct cpu_dev *this_cpu __cpuinitdata;
|
|
|
|
+DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
|
|
#ifdef CONFIG_X86_64
|
|
-/* We need valid kernel segments for data and code in long mode too
|
|
- * IRET will check the segment types kkeil 2000/10/28
|
|
- * Also sysret mandates a special GDT layout
|
|
- */
|
|
-/* The TLS descriptors are currently at a different place compared to i386.
|
|
- Hopefully nobody expects them at a fixed place (Wine?) */
|
|
-DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
|
|
- [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
|
|
- [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
|
|
- [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
|
|
- [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
|
|
- [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
|
|
- [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
|
|
-} };
|
|
+ /*
|
|
+ * We need valid kernel segments for data and code in long mode too
|
|
+ * IRET will check the segment types kkeil 2000/10/28
|
|
+ * Also sysret mandates a special GDT layout
|
|
+ *
|
|
+ * TLS descriptors are currently at a different place compared to i386.
|
|
+ * Hopefully nobody expects them at a fixed place (Wine?)
|
|
+ */
|
|
+ [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
|
|
+ [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
|
|
+ [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
|
|
+ [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
|
|
+ [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
|
|
+ [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
|
|
#else
|
|
-DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
|
|
- [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } },
|
|
- [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } },
|
|
- [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } },
|
|
- [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff200 } } },
|
|
+ [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } },
|
|
+ [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } },
|
|
+ [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } },
|
|
+ [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff200 } } },
|
|
#ifndef CONFIG_XEN
|
|
/*
|
|
* Segments used for calling PnP BIOS have byte granularity.
|
|
@@ -103,33 +96,41 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_p
|
|
* the transfer segment sizes are set at run time.
|
|
*/
|
|
/* 32-bit code */
|
|
- [GDT_ENTRY_PNPBIOS_CS32] = { { { 0x0000ffff, 0x00409a00 } } },
|
|
+ [GDT_ENTRY_PNPBIOS_CS32] = { { { 0x0000ffff, 0x00409a00 } } },
|
|
/* 16-bit code */
|
|
- [GDT_ENTRY_PNPBIOS_CS16] = { { { 0x0000ffff, 0x00009a00 } } },
|
|
+ [GDT_ENTRY_PNPBIOS_CS16] = { { { 0x0000ffff, 0x00009a00 } } },
|
|
/* 16-bit data */
|
|
- [GDT_ENTRY_PNPBIOS_DS] = { { { 0x0000ffff, 0x00009200 } } },
|
|
+ [GDT_ENTRY_PNPBIOS_DS] = { { { 0x0000ffff, 0x00009200 } } },
|
|
/* 16-bit data */
|
|
- [GDT_ENTRY_PNPBIOS_TS1] = { { { 0x00000000, 0x00009200 } } },
|
|
+ [GDT_ENTRY_PNPBIOS_TS1] = { { { 0x00000000, 0x00009200 } } },
|
|
/* 16-bit data */
|
|
- [GDT_ENTRY_PNPBIOS_TS2] = { { { 0x00000000, 0x00009200 } } },
|
|
+ [GDT_ENTRY_PNPBIOS_TS2] = { { { 0x00000000, 0x00009200 } } },
|
|
/*
|
|
* The APM segments have byte granularity and their bases
|
|
* are set at run time. All have 64k limits.
|
|
*/
|
|
/* 32-bit code */
|
|
- [GDT_ENTRY_APMBIOS_BASE] = { { { 0x0000ffff, 0x00409a00 } } },
|
|
+ [GDT_ENTRY_APMBIOS_BASE] = { { { 0x0000ffff, 0x00409a00 } } },
|
|
/* 16-bit code */
|
|
- [GDT_ENTRY_APMBIOS_BASE+1] = { { { 0x0000ffff, 0x00009a00 } } },
|
|
+ [GDT_ENTRY_APMBIOS_BASE+1] = { { { 0x0000ffff, 0x00009a00 } } },
|
|
/* data */
|
|
- [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } },
|
|
+ [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } },
|
|
|
|
- [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } },
|
|
+ [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } },
|
|
#endif
|
|
- [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } },
|
|
-} };
|
|
+ [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } },
|
|
+ GDT_STACK_CANARY_INIT
|
|
#endif
|
|
+} };
|
|
EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
|
|
|
|
+static int __init x86_xsave_setup(char *s)
|
|
+{
|
|
+ setup_clear_cpu_cap(X86_FEATURE_XSAVE);
|
|
+ return 1;
|
|
+}
|
|
+__setup("noxsave", x86_xsave_setup);
|
|
+
|
|
#ifdef CONFIG_X86_32
|
|
static int cachesize_override __cpuinitdata = -1;
|
|
static int disable_x86_serial_nr __cpuinitdata = 1;
|
|
@@ -168,16 +169,17 @@ static inline int flag_is_changeable_p(u
|
|
* the CPUID. Add "volatile" to not allow gcc to
|
|
* optimize the subsequent calls to this function.
|
|
*/
|
|
- asm volatile ("pushfl\n\t"
|
|
- "pushfl\n\t"
|
|
- "popl %0\n\t"
|
|
- "movl %0,%1\n\t"
|
|
- "xorl %2,%0\n\t"
|
|
- "pushl %0\n\t"
|
|
- "popfl\n\t"
|
|
- "pushfl\n\t"
|
|
- "popl %0\n\t"
|
|
- "popfl\n\t"
|
|
+ asm volatile ("pushfl \n\t"
|
|
+ "pushfl \n\t"
|
|
+ "popl %0 \n\t"
|
|
+ "movl %0, %1 \n\t"
|
|
+ "xorl %2, %0 \n\t"
|
|
+ "pushl %0 \n\t"
|
|
+ "popfl \n\t"
|
|
+ "pushfl \n\t"
|
|
+ "popl %0 \n\t"
|
|
+ "popfl \n\t"
|
|
+
|
|
: "=&r" (f1), "=&r" (f2)
|
|
: "ir" (flag));
|
|
|
|
@@ -192,18 +194,22 @@ static int __cpuinit have_cpuid_p(void)
|
|
|
|
static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
|
|
{
|
|
- if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) {
|
|
- /* Disable processor serial number */
|
|
- unsigned long lo, hi;
|
|
- rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
|
|
- lo |= 0x200000;
|
|
- wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
|
|
- printk(KERN_NOTICE "CPU serial number disabled.\n");
|
|
- clear_cpu_cap(c, X86_FEATURE_PN);
|
|
+ unsigned long lo, hi;
|
|
|
|
- /* Disabling the serial number may affect the cpuid level */
|
|
- c->cpuid_level = cpuid_eax(0);
|
|
- }
|
|
+ if (!cpu_has(c, X86_FEATURE_PN) || !disable_x86_serial_nr)
|
|
+ return;
|
|
+
|
|
+ /* Disable processor serial number: */
|
|
+
|
|
+ rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
|
|
+ lo |= 0x200000;
|
|
+ wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
|
|
+
|
|
+ printk(KERN_NOTICE "CPU serial number disabled.\n");
|
|
+ clear_cpu_cap(c, X86_FEATURE_PN);
|
|
+
|
|
+ /* Disabling the serial number may affect the cpuid level */
|
|
+ c->cpuid_level = cpuid_eax(0);
|
|
}
|
|
|
|
static int __init x86_serial_nr_setup(char *s)
|
|
@@ -228,16 +234,64 @@ static inline void squash_the_stupid_ser
|
|
#endif
|
|
|
|
/*
|
|
+ * Some CPU features depend on higher CPUID levels, which may not always
|
|
+ * be available due to CPUID level capping or broken virtualization
|
|
+ * software. Add those features to this table to auto-disable them.
|
|
+ */
|
|
+struct cpuid_dependent_feature {
|
|
+ u32 feature;
|
|
+ u32 level;
|
|
+};
|
|
+
|
|
+static const struct cpuid_dependent_feature __cpuinitconst
|
|
+cpuid_dependent_features[] = {
|
|
+ { X86_FEATURE_MWAIT, 0x00000005 },
|
|
+ { X86_FEATURE_DCA, 0x00000009 },
|
|
+ { X86_FEATURE_XSAVE, 0x0000000d },
|
|
+ { 0, 0 }
|
|
+};
|
|
+
|
|
+static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn)
|
|
+{
|
|
+ const struct cpuid_dependent_feature *df;
|
|
+
|
|
+ for (df = cpuid_dependent_features; df->feature; df++) {
|
|
+
|
|
+ if (!cpu_has(c, df->feature))
|
|
+ continue;
|
|
+ /*
|
|
+ * Note: cpuid_level is set to -1 if unavailable, but
|
|
+ * extended_extended_level is set to 0 if unavailable
|
|
+ * and the legitimate extended levels are all negative
|
|
+ * when signed; hence the weird messing around with
|
|
+ * signs here...
|
|
+ */
|
|
+ if (!((s32)df->level < 0 ?
|
|
+ (u32)df->level > (u32)c->extended_cpuid_level :
|
|
+ (s32)df->level > (s32)c->cpuid_level))
|
|
+ continue;
|
|
+
|
|
+ clear_cpu_cap(c, df->feature);
|
|
+ if (!warn)
|
|
+ continue;
|
|
+
|
|
+ printk(KERN_WARNING
|
|
+ "CPU: CPU feature %s disabled, no CPUID level 0x%x\n",
|
|
+ x86_cap_flags[df->feature], df->level);
|
|
+ }
|
|
+}
|
|
+
|
|
+/*
|
|
* Naming convention should be: <Name> [(<Codename>)]
|
|
* This table only is used unless init_<vendor>() below doesn't set it;
|
|
- * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used
|
|
- *
|
|
+ * in particular, if CPUID levels 0x80000002..4 are supported, this
|
|
+ * isn't used
|
|
*/
|
|
|
|
/* Look up CPU names by table lookup. */
|
|
-static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c)
|
|
+static const char *__cpuinit table_lookup_model(struct cpuinfo_x86 *c)
|
|
{
|
|
- struct cpu_model_info *info;
|
|
+ const struct cpu_model_info *info;
|
|
|
|
if (c->x86_model >= 16)
|
|
return NULL; /* Range check */
|
|
@@ -257,32 +311,52 @@ static char __cpuinit *table_lookup_mode
|
|
|
|
__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
|
|
|
|
-/* Current gdt points %fs at the "master" per-cpu area: after this,
|
|
- * it's on the real one. */
|
|
-void switch_to_new_gdt(void)
|
|
+void load_percpu_segment(int cpu)
|
|
+{
|
|
+#ifdef CONFIG_X86_32
|
|
+ loadsegment(fs, __KERNEL_PERCPU);
|
|
+#else
|
|
+ loadsegment(gs, 0);
|
|
+#ifndef CONFIG_XEN
|
|
+ wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
|
|
+#else
|
|
+ if (HYPERVISOR_set_segment_base(SEGBASE_GS_KERNEL,
|
|
+ (unsigned long)per_cpu(irq_stack_union.gs_base, cpu)))
|
|
+ BUG();
|
|
+#endif
|
|
+#endif
|
|
+ load_stack_canary_segment();
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Current gdt points %fs at the "master" per-cpu area: after this,
|
|
+ * it's on the real one.
|
|
+ */
|
|
+void switch_to_new_gdt(int cpu)
|
|
{
|
|
struct desc_ptr gdt_descr;
|
|
unsigned long va, frames[16];
|
|
int f;
|
|
|
|
- gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
|
|
+ gdt_descr.address = (long)get_cpu_gdt_table(cpu);
|
|
gdt_descr.size = GDT_SIZE - 1;
|
|
|
|
for (va = gdt_descr.address, f = 0;
|
|
va < gdt_descr.address + gdt_descr.size;
|
|
va += PAGE_SIZE, f++) {
|
|
- frames[f] = virt_to_mfn(va);
|
|
- make_lowmem_page_readonly(
|
|
- (void *)va, XENFEAT_writable_descriptor_tables);
|
|
+ frames[f] = arbitrary_virt_to_mfn(va);
|
|
+ make_page_readonly((void *)va,
|
|
+ XENFEAT_writable_descriptor_tables);
|
|
}
|
|
if (HYPERVISOR_set_gdt(frames, (gdt_descr.size + 1) / 8))
|
|
BUG();
|
|
-#ifdef CONFIG_X86_32
|
|
- asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory");
|
|
-#endif
|
|
+
|
|
+ /* Reload the per-cpu base */
|
|
+
|
|
+ load_percpu_segment(cpu);
|
|
}
|
|
|
|
-static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
|
|
+static const struct cpu_dev *__cpuinitdata cpu_devs[X86_VENDOR_NUM] = {};
|
|
|
|
static void __cpuinit default_init(struct cpuinfo_x86 *c)
|
|
{
|
|
@@ -301,7 +375,7 @@ static void __cpuinit default_init(struc
|
|
#endif
|
|
}
|
|
|
|
-static struct cpu_dev __cpuinitdata default_cpu = {
|
|
+static const struct cpu_dev __cpuinitconst default_cpu = {
|
|
.c_init = default_init,
|
|
.c_vendor = "Unknown",
|
|
.c_x86_vendor = X86_VENDOR_UNKNOWN,
|
|
@@ -315,22 +389,24 @@ static void __cpuinit get_model_name(str
|
|
if (c->extended_cpuid_level < 0x80000004)
|
|
return;
|
|
|
|
- v = (unsigned int *) c->x86_model_id;
|
|
+ v = (unsigned int *)c->x86_model_id;
|
|
cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
|
|
cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
|
|
cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
|
|
c->x86_model_id[48] = 0;
|
|
|
|
- /* Intel chips right-justify this string for some dumb reason;
|
|
- undo that brain damage */
|
|
+ /*
|
|
+ * Intel chips right-justify this string for some dumb reason;
|
|
+ * undo that brain damage:
|
|
+ */
|
|
p = q = &c->x86_model_id[0];
|
|
while (*p == ' ')
|
|
- p++;
|
|
+ p++;
|
|
if (p != q) {
|
|
- while (*p)
|
|
- *q++ = *p++;
|
|
- while (q <= &c->x86_model_id[48])
|
|
- *q++ = '\0'; /* Zero-pad the rest */
|
|
+ while (*p)
|
|
+ *q++ = *p++;
|
|
+ while (q <= &c->x86_model_id[48])
|
|
+ *q++ = '\0'; /* Zero-pad the rest */
|
|
}
|
|
}
|
|
|
|
@@ -399,36 +475,30 @@ void __cpuinit detect_ht(struct cpuinfo_
|
|
|
|
if (smp_num_siblings == 1) {
|
|
printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
|
|
- } else if (smp_num_siblings > 1) {
|
|
+ goto out;
|
|
+ }
|
|
|
|
- if (smp_num_siblings > nr_cpu_ids) {
|
|
- printk(KERN_WARNING "CPU: Unsupported number of siblings %d",
|
|
- smp_num_siblings);
|
|
- smp_num_siblings = 1;
|
|
- return;
|
|
- }
|
|
+ if (smp_num_siblings <= 1)
|
|
+ goto out;
|
|
|
|
- index_msb = get_count_order(smp_num_siblings);
|
|
-#ifdef CONFIG_X86_64
|
|
- c->phys_proc_id = phys_pkg_id(index_msb);
|
|
-#else
|
|
- c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb);
|
|
-#endif
|
|
+ if (smp_num_siblings > nr_cpu_ids) {
|
|
+ pr_warning("CPU: Unsupported number of siblings %d",
|
|
+ smp_num_siblings);
|
|
+ smp_num_siblings = 1;
|
|
+ return;
|
|
+ }
|
|
|
|
- smp_num_siblings = smp_num_siblings / c->x86_max_cores;
|
|
+ index_msb = get_count_order(smp_num_siblings);
|
|
+ c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb);
|
|
|
|
- index_msb = get_count_order(smp_num_siblings);
|
|
+ smp_num_siblings = smp_num_siblings / c->x86_max_cores;
|
|
|
|
- core_bits = get_count_order(c->x86_max_cores);
|
|
+ index_msb = get_count_order(smp_num_siblings);
|
|
|
|
-#ifdef CONFIG_X86_64
|
|
- c->cpu_core_id = phys_pkg_id(index_msb) &
|
|
- ((1 << core_bits) - 1);
|
|
-#else
|
|
- c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) &
|
|
- ((1 << core_bits) - 1);
|
|
-#endif
|
|
- }
|
|
+ core_bits = get_count_order(c->x86_max_cores);
|
|
+
|
|
+ c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, index_msb) &
|
|
+ ((1 << core_bits) - 1);
|
|
|
|
out:
|
|
if ((c->x86_max_cores * smp_num_siblings) > 1) {
|
|
@@ -443,8 +513,8 @@ out:
|
|
static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
|
|
{
|
|
char *v = c->x86_vendor_id;
|
|
- int i;
|
|
static int printed;
|
|
+ int i;
|
|
|
|
for (i = 0; i < X86_VENDOR_NUM; i++) {
|
|
if (!cpu_devs[i])
|
|
@@ -453,6 +523,7 @@ static void __cpuinit get_cpu_vendor(str
|
|
if (!strcmp(v, cpu_devs[i]->c_ident[0]) ||
|
|
(cpu_devs[i]->c_ident[1] &&
|
|
!strcmp(v, cpu_devs[i]->c_ident[1]))) {
|
|
+
|
|
this_cpu = cpu_devs[i];
|
|
c->x86_vendor = this_cpu->c_x86_vendor;
|
|
return;
|
|
@@ -461,7 +532,9 @@ static void __cpuinit get_cpu_vendor(str
|
|
|
|
if (!printed) {
|
|
printed++;
|
|
- printk(KERN_ERR "CPU: vendor_id '%s' unknown, using generic init.\n", v);
|
|
+ printk(KERN_ERR
|
|
+ "CPU: vendor_id '%s' unknown, using generic init.\n", v);
|
|
+
|
|
printk(KERN_ERR "CPU: Your system may be unstable.\n");
|
|
}
|
|
|
|
@@ -481,14 +554,17 @@ void __cpuinit cpu_detect(struct cpuinfo
|
|
/* Intel-defined flags: level 0x00000001 */
|
|
if (c->cpuid_level >= 0x00000001) {
|
|
u32 junk, tfms, cap0, misc;
|
|
+
|
|
cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
|
|
c->x86 = (tfms >> 8) & 0xf;
|
|
c->x86_model = (tfms >> 4) & 0xf;
|
|
c->x86_mask = tfms & 0xf;
|
|
+
|
|
if (c->x86 == 0xf)
|
|
c->x86 += (tfms >> 20) & 0xff;
|
|
if (c->x86 >= 0x6)
|
|
c->x86_model += ((tfms >> 16) & 0xf) << 4;
|
|
+
|
|
if (cap0 & (1<<19)) {
|
|
c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
|
|
c->x86_cache_alignment = c->x86_clflush_size;
|
|
@@ -504,6 +580,7 @@ static void __cpuinit get_cpu_cap(struct
|
|
/* Intel-defined flags: level 0x00000001 */
|
|
if (c->cpuid_level >= 0x00000001) {
|
|
u32 capability, excap;
|
|
+
|
|
cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
|
|
c->x86_capability[0] = capability;
|
|
c->x86_capability[4] = excap;
|
|
@@ -512,6 +589,7 @@ static void __cpuinit get_cpu_cap(struct
|
|
/* AMD-defined flags: level 0x80000001 */
|
|
xlvl = cpuid_eax(0x80000000);
|
|
c->extended_cpuid_level = xlvl;
|
|
+
|
|
if ((xlvl & 0xffff0000) == 0x80000000) {
|
|
if (xlvl >= 0x80000001) {
|
|
c->x86_capability[1] = cpuid_edx(0x80000001);
|
|
@@ -519,13 +597,15 @@ static void __cpuinit get_cpu_cap(struct
|
|
}
|
|
}
|
|
|
|
-#ifdef CONFIG_X86_64
|
|
if (c->extended_cpuid_level >= 0x80000008) {
|
|
u32 eax = cpuid_eax(0x80000008);
|
|
|
|
c->x86_virt_bits = (eax >> 8) & 0xff;
|
|
c->x86_phys_bits = eax & 0xff;
|
|
}
|
|
+#ifdef CONFIG_X86_32
|
|
+ else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36))
|
|
+ c->x86_phys_bits = 36;
|
|
#endif
|
|
|
|
if (c->extended_cpuid_level >= 0x80000007)
|
|
@@ -572,8 +652,12 @@ static void __init early_identify_cpu(st
|
|
{
|
|
#ifdef CONFIG_X86_64
|
|
c->x86_clflush_size = 64;
|
|
+ c->x86_phys_bits = 36;
|
|
+ c->x86_virt_bits = 48;
|
|
#else
|
|
c->x86_clflush_size = 32;
|
|
+ c->x86_phys_bits = 32;
|
|
+ c->x86_virt_bits = 32;
|
|
#endif
|
|
c->x86_cache_alignment = c->x86_clflush_size;
|
|
|
|
@@ -596,21 +680,20 @@ static void __init early_identify_cpu(st
|
|
if (this_cpu->c_early_init)
|
|
this_cpu->c_early_init(c);
|
|
|
|
- validate_pat_support(c);
|
|
-
|
|
#ifdef CONFIG_SMP
|
|
c->cpu_index = boot_cpu_id;
|
|
#endif
|
|
+ filter_cpuid_features(c, false);
|
|
}
|
|
|
|
void __init early_cpu_init(void)
|
|
{
|
|
- struct cpu_dev **cdev;
|
|
+ const struct cpu_dev *const *cdev;
|
|
int count = 0;
|
|
|
|
- printk("KERNEL supported cpus:\n");
|
|
+ printk(KERN_INFO "KERNEL supported cpus:\n");
|
|
for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) {
|
|
- struct cpu_dev *cpudev = *cdev;
|
|
+ const struct cpu_dev *cpudev = *cdev;
|
|
unsigned int j;
|
|
|
|
if (count >= X86_VENDOR_NUM)
|
|
@@ -621,7 +704,7 @@ void __init early_cpu_init(void)
|
|
for (j = 0; j < 2; j++) {
|
|
if (!cpudev->c_ident[j])
|
|
continue;
|
|
- printk(" %s %s\n", cpudev->c_vendor,
|
|
+ printk(KERN_INFO " %s %s\n", cpudev->c_vendor,
|
|
cpudev->c_ident[j]);
|
|
}
|
|
}
|
|
@@ -663,7 +746,7 @@ static void __cpuinit generic_identify(s
|
|
c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF;
|
|
#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
|
|
# ifdef CONFIG_X86_HT
|
|
- c->apicid = phys_pkg_id(c->initial_apicid, 0);
|
|
+ c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
|
|
# else
|
|
c->apicid = c->initial_apicid;
|
|
# endif
|
|
@@ -697,9 +780,13 @@ static void __cpuinit identify_cpu(struc
|
|
c->x86_coreid_bits = 0;
|
|
#ifdef CONFIG_X86_64
|
|
c->x86_clflush_size = 64;
|
|
+ c->x86_phys_bits = 36;
|
|
+ c->x86_virt_bits = 48;
|
|
#else
|
|
c->cpuid_level = -1; /* CPUID not detected */
|
|
c->x86_clflush_size = 32;
|
|
+ c->x86_phys_bits = 32;
|
|
+ c->x86_virt_bits = 32;
|
|
#endif
|
|
c->x86_cache_alignment = c->x86_clflush_size;
|
|
memset(&c->x86_capability, 0, sizeof c->x86_capability);
|
|
@@ -712,7 +799,7 @@ static void __cpuinit identify_cpu(struc
|
|
this_cpu->c_identify(c);
|
|
|
|
#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
|
|
- c->apicid = phys_pkg_id(0);
|
|
+ c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
|
|
#endif
|
|
|
|
/*
|
|
@@ -732,13 +819,16 @@ static void __cpuinit identify_cpu(struc
|
|
squash_the_stupid_serial_number(c);
|
|
|
|
/*
|
|
- * The vendor-specific functions might have changed features. Now
|
|
- * we do "generic changes."
|
|
+ * The vendor-specific functions might have changed features.
|
|
+ * Now we do "generic changes."
|
|
*/
|
|
|
|
+ /* Filter out anything that depends on CPUID levels we don't have */
|
|
+ filter_cpuid_features(c, true);
|
|
+
|
|
/* If the model name is still unset, do table lookup. */
|
|
if (!c->x86_model_id[0]) {
|
|
- char *p;
|
|
+ const char *p;
|
|
p = table_lookup_model(c);
|
|
if (p)
|
|
strcpy(c->x86_model_id, p);
|
|
@@ -794,6 +884,7 @@ static void vgetcpu_set_mode(void)
|
|
void __init identify_boot_cpu(void)
|
|
{
|
|
identify_cpu(&boot_cpu_data);
|
|
+ init_c1e_mask();
|
|
#ifdef CONFIG_X86_32
|
|
sysenter_setup();
|
|
enable_sep_cpu();
|
|
@@ -813,11 +904,11 @@ void __cpuinit identify_secondary_cpu(st
|
|
}
|
|
|
|
struct msr_range {
|
|
- unsigned min;
|
|
- unsigned max;
|
|
+ unsigned min;
|
|
+ unsigned max;
|
|
};
|
|
|
|
-static struct msr_range msr_range_array[] __cpuinitdata = {
|
|
+static const struct msr_range msr_range_array[] __cpuinitconst = {
|
|
{ 0x00000000, 0x00000418},
|
|
{ 0xc0000000, 0xc000040b},
|
|
{ 0xc0010000, 0xc0010142},
|
|
@@ -826,14 +917,15 @@ static struct msr_range msr_range_array[
|
|
|
|
static void __cpuinit print_cpu_msr(void)
|
|
{
|
|
+ unsigned index_min, index_max;
|
|
unsigned index;
|
|
u64 val;
|
|
int i;
|
|
- unsigned index_min, index_max;
|
|
|
|
for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) {
|
|
index_min = msr_range_array[i].min;
|
|
index_max = msr_range_array[i].max;
|
|
+
|
|
for (index = index_min; index < index_max; index++) {
|
|
if (rdmsrl_amd_safe(index, &val))
|
|
continue;
|
|
@@ -843,6 +935,7 @@ static void __cpuinit print_cpu_msr(void
|
|
}
|
|
|
|
static int show_msr __cpuinitdata;
|
|
+
|
|
static __init int setup_show_msr(char *arg)
|
|
{
|
|
int num;
|
|
@@ -864,12 +957,14 @@ __setup("noclflush", setup_noclflush);
|
|
|
|
void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
|
|
{
|
|
- char *vendor = NULL;
|
|
+ const char *vendor = NULL;
|
|
|
|
- if (c->x86_vendor < X86_VENDOR_NUM)
|
|
+ if (c->x86_vendor < X86_VENDOR_NUM) {
|
|
vendor = this_cpu->c_vendor;
|
|
- else if (c->cpuid_level >= 0)
|
|
- vendor = c->x86_vendor_id;
|
|
+ } else {
|
|
+ if (c->cpuid_level >= 0)
|
|
+ vendor = c->x86_vendor_id;
|
|
+ }
|
|
|
|
if (vendor && !strstr(c->x86_model_id, vendor))
|
|
printk(KERN_CONT "%s ", vendor);
|
|
@@ -896,87 +991,57 @@ void __cpuinit print_cpu_info(struct cpu
|
|
static __init int setup_disablecpuid(char *arg)
|
|
{
|
|
int bit;
|
|
+
|
|
if (get_option(&arg, &bit) && bit < NCAPINTS*32)
|
|
setup_clear_cpu_cap(bit);
|
|
else
|
|
return 0;
|
|
+
|
|
return 1;
|
|
}
|
|
__setup("clearcpuid=", setup_disablecpuid);
|
|
|
|
#ifdef CONFIG_X86_64
|
|
-struct x8664_pda **_cpu_pda __read_mostly;
|
|
-EXPORT_SYMBOL(_cpu_pda);
|
|
-
|
|
#ifndef CONFIG_X86_NO_IDT
|
|
struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
|
|
#endif
|
|
|
|
-static char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
|
|
+DEFINE_PER_CPU_FIRST(union irq_stack_union,
|
|
+ irq_stack_union) __aligned(PAGE_SIZE);
|
|
|
|
-static void __ref switch_pt(int cpu)
|
|
+void xen_switch_pt(void)
|
|
{
|
|
#ifdef CONFIG_XEN
|
|
- if (cpu == 0)
|
|
- xen_init_pt();
|
|
xen_pt_switch(__pa_symbol(init_level4_pgt));
|
|
xen_new_user_pt(__pa_symbol(__user_pgd(init_level4_pgt)));
|
|
#endif
|
|
}
|
|
|
|
-void __cpuinit pda_init(int cpu)
|
|
-{
|
|
- struct x8664_pda *pda = cpu_pda(cpu);
|
|
+DEFINE_PER_CPU(char *, irq_stack_ptr) =
|
|
+ init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
|
|
|
|
- /* Setup up data that may be needed in __get_free_pages early */
|
|
- loadsegment(fs, 0);
|
|
- loadsegment(gs, 0);
|
|
-#ifndef CONFIG_XEN
|
|
- /* Memory clobbers used to order PDA accessed */
|
|
- mb();
|
|
- wrmsrl(MSR_GS_BASE, pda);
|
|
- mb();
|
|
-#else
|
|
- if (HYPERVISOR_set_segment_base(SEGBASE_GS_KERNEL,
|
|
- (unsigned long)pda))
|
|
- BUG();
|
|
-#endif
|
|
-
|
|
- pda->cpunumber = cpu;
|
|
- pda->irqcount = -1;
|
|
- pda->kernelstack = (unsigned long)stack_thread_info() -
|
|
- PDA_STACKOFFSET + THREAD_SIZE;
|
|
- pda->active_mm = &init_mm;
|
|
- pda->mmu_state = 0;
|
|
-
|
|
- if (cpu == 0) {
|
|
- /* others are initialized in smpboot.c */
|
|
- pda->pcurrent = &init_task;
|
|
- pda->irqstackptr = boot_cpu_stack;
|
|
- pda->irqstackptr += IRQSTACKSIZE - 64;
|
|
- } else {
|
|
- if (!pda->irqstackptr) {
|
|
- pda->irqstackptr = (char *)
|
|
- __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
|
|
- if (!pda->irqstackptr)
|
|
- panic("cannot allocate irqstack for cpu %d",
|
|
- cpu);
|
|
- pda->irqstackptr += IRQSTACKSIZE - 64;
|
|
- }
|
|
+DEFINE_PER_CPU(unsigned long, kernel_stack) =
|
|
+ (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
|
|
+EXPORT_PER_CPU_SYMBOL(kernel_stack);
|
|
|
|
- if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
|
|
- pda->nodenumber = cpu_to_node(cpu);
|
|
- }
|
|
-
|
|
- switch_pt(cpu);
|
|
-}
|
|
+DEFINE_PER_CPU(unsigned int, irq_count) = -1;
|
|
|
|
#ifndef CONFIG_X86_NO_TSS
|
|
-static char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
|
|
- DEBUG_STKSZ] __page_aligned_bss;
|
|
-#endif
|
|
+/*
|
|
+ * Special IST stacks which the CPU switches to when it calls
|
|
+ * an IST-marked descriptor entry. Up to 7 stacks (hardware
|
|
+ * limit), all of them are 4K, except the debug stack which
|
|
+ * is 8K.
|
|
+ */
|
|
+static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
|
|
+ [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
|
|
+ [DEBUG_STACK - 1] = DEBUG_STKSZ
|
|
+};
|
|
|
|
-extern asmlinkage void ignore_sysret(void);
|
|
+static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
|
|
+ [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ])
|
|
+ __aligned(PAGE_SIZE);
|
|
+#endif
|
|
|
|
void __cpuinit syscall_init(void)
|
|
{
|
|
@@ -1020,16 +1085,38 @@ unsigned long kernel_eflags;
|
|
DEFINE_PER_CPU(struct orig_ist, orig_ist);
|
|
#endif
|
|
|
|
-#else
|
|
+#else /* CONFIG_X86_64 */
|
|
|
|
-/* Make sure %fs is initialized properly in idle threads */
|
|
+#ifdef CONFIG_CC_STACKPROTECTOR
|
|
+DEFINE_PER_CPU(unsigned long, stack_canary);
|
|
+#endif
|
|
+
|
|
+/* Make sure %fs and %gs are initialized properly in idle threads */
|
|
struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs)
|
|
{
|
|
memset(regs, 0, sizeof(struct pt_regs));
|
|
regs->fs = __KERNEL_PERCPU;
|
|
+ regs->gs = __KERNEL_STACK_CANARY;
|
|
+
|
|
return regs;
|
|
}
|
|
-#endif
|
|
+#endif /* CONFIG_X86_64 */
|
|
+
|
|
+/*
|
|
+ * Clear all 6 debug registers:
|
|
+ */
|
|
+static void clear_all_debug_regs(void)
|
|
+{
|
|
+ int i;
|
|
+
|
|
+ for (i = 0; i < 8; i++) {
|
|
+ /* Ignore db4, db5 */
|
|
+ if ((i == 4) || (i == 5))
|
|
+ continue;
|
|
+
|
|
+ set_debugreg(0, i);
|
|
+ }
|
|
+}
|
|
|
|
/*
|
|
* cpu_init() initializes state that is per-CPU. Some data is already
|
|
@@ -1039,24 +1126,31 @@ struct pt_regs * __cpuinit idle_regs(str
|
|
* A lot of state is already set up in PDA init for 64 bit
|
|
*/
|
|
#ifdef CONFIG_X86_64
|
|
+
|
|
void __cpuinit cpu_init(void)
|
|
{
|
|
- int cpu = stack_smp_processor_id();
|
|
#ifndef CONFIG_X86_NO_TSS
|
|
- struct tss_struct *t = &per_cpu(init_tss, cpu);
|
|
- struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
|
|
+ struct orig_ist *orig_ist;
|
|
+ struct tss_struct *t;
|
|
unsigned long v;
|
|
- char *estacks = NULL;
|
|
int i;
|
|
#endif
|
|
struct task_struct *me;
|
|
+ int cpu;
|
|
|
|
+ cpu = stack_smp_processor_id();
|
|
/* CPU 0 is initialised in head64.c */
|
|
if (cpu != 0)
|
|
- pda_init(cpu);
|
|
+ xen_switch_pt();
|
|
#ifndef CONFIG_X86_NO_TSS
|
|
- else
|
|
- estacks = boot_exception_stacks;
|
|
+ t = &per_cpu(init_tss, cpu);
|
|
+ orig_ist = &per_cpu(orig_ist, cpu);
|
|
+#endif
|
|
+
|
|
+#ifdef CONFIG_NUMA
|
|
+ if (cpu != 0 && percpu_read(node_number) == 0 &&
|
|
+ cpu_to_node(cpu) != NUMA_NO_NODE)
|
|
+ percpu_write(node_number, cpu_to_node(cpu));
|
|
#endif
|
|
|
|
me = current;
|
|
@@ -1073,7 +1167,9 @@ void __cpuinit cpu_init(void)
|
|
* and set up the GDT descriptor:
|
|
*/
|
|
|
|
- switch_to_new_gdt();
|
|
+ switch_to_new_gdt(cpu);
|
|
+ loadsegment(fs, 0);
|
|
+
|
|
#ifndef CONFIG_X86_NO_IDT
|
|
load_idt((const struct desc_ptr *)&idt_descr);
|
|
#endif
|
|
@@ -1086,8 +1182,8 @@ void __cpuinit cpu_init(void)
|
|
barrier();
|
|
|
|
check_efer();
|
|
-#ifndef CONFIG_XEN
|
|
- if (cpu != 0 && x2apic)
|
|
+#ifdef CONFIG_X86_LOCAL_APIC
|
|
+ if (cpu != 0)
|
|
enable_x2apic();
|
|
#endif
|
|
|
|
@@ -1096,24 +1192,17 @@ void __cpuinit cpu_init(void)
|
|
* set up and load the per-CPU TSS
|
|
*/
|
|
if (!orig_ist->ist[0]) {
|
|
- static const unsigned int order[N_EXCEPTION_STACKS] = {
|
|
- [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
|
|
- [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
|
|
- };
|
|
+ char *estacks = per_cpu(exception_stacks, cpu);
|
|
+
|
|
for (v = 0; v < N_EXCEPTION_STACKS; v++) {
|
|
- if (cpu) {
|
|
- estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
|
|
- if (!estacks)
|
|
- panic("Cannot allocate exception "
|
|
- "stack %ld %d\n", v, cpu);
|
|
- }
|
|
- estacks += PAGE_SIZE << order[v];
|
|
+ estacks += exception_stack_sizes[v];
|
|
orig_ist->ist[v] = t->x86_tss.ist[v] =
|
|
(unsigned long)estacks;
|
|
}
|
|
}
|
|
|
|
t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
|
|
+
|
|
/*
|
|
* <= is required because the CPU will access up to
|
|
* 8 bits beyond the end of the IO permission bitmap.
|
|
@@ -1124,8 +1213,7 @@ void __cpuinit cpu_init(void)
|
|
|
|
atomic_inc(&init_mm.mm_count);
|
|
me->active_mm = &init_mm;
|
|
- if (me->mm)
|
|
- BUG();
|
|
+ BUG_ON(me->mm);
|
|
enter_lazy_tlb(&init_mm, me);
|
|
|
|
load_sp0(t, ¤t->thread);
|
|
@@ -1144,22 +1232,9 @@ void __cpuinit cpu_init(void)
|
|
*/
|
|
if (kgdb_connected && arch_kgdb_ops.correct_hw_break)
|
|
arch_kgdb_ops.correct_hw_break();
|
|
- else {
|
|
-#endif
|
|
- /*
|
|
- * Clear all 6 debug registers:
|
|
- */
|
|
-
|
|
- set_debugreg(0UL, 0);
|
|
- set_debugreg(0UL, 1);
|
|
- set_debugreg(0UL, 2);
|
|
- set_debugreg(0UL, 3);
|
|
- set_debugreg(0UL, 6);
|
|
- set_debugreg(0UL, 7);
|
|
-#ifdef CONFIG_KGDB
|
|
- /* If the kgdb is connected no debug regs should be altered. */
|
|
- }
|
|
+ else
|
|
#endif
|
|
+ clear_all_debug_regs();
|
|
|
|
fpu_init();
|
|
|
|
@@ -1171,8 +1246,10 @@ void __cpuinit cpu_init(void)
|
|
kernel_eflags &= ~X86_EFLAGS_IF;
|
|
#endif
|
|
|
|
+#ifdef CONFIG_X86_LOCAL_APIC
|
|
if (is_uv_system())
|
|
uv_cpu_init();
|
|
+#endif
|
|
}
|
|
|
|
#else
|
|
@@ -1188,7 +1265,8 @@ void __cpuinit cpu_init(void)
|
|
|
|
if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask)) {
|
|
printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
|
|
- for (;;) local_irq_enable();
|
|
+ for (;;)
|
|
+ local_irq_enable();
|
|
}
|
|
|
|
printk(KERN_INFO "Initializing CPU#%d\n", cpu);
|
|
@@ -1196,36 +1274,30 @@ void __cpuinit cpu_init(void)
|
|
if (cpu_has_vme || cpu_has_de)
|
|
clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
|
|
|
|
- switch_to_new_gdt();
|
|
+ switch_to_new_gdt(cpu);
|
|
|
|
/*
|
|
* Set up and load the per-CPU TSS and LDT
|
|
*/
|
|
atomic_inc(&init_mm.mm_count);
|
|
curr->active_mm = &init_mm;
|
|
- if (curr->mm)
|
|
- BUG();
|
|
+ BUG_ON(curr->mm);
|
|
enter_lazy_tlb(&init_mm, curr);
|
|
|
|
load_sp0(t, thread);
|
|
|
|
load_LDT(&init_mm.context);
|
|
|
|
+#ifndef CONFIG_X86_NO_TSS
|
|
+ t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
|
|
+#endif
|
|
+
|
|
#ifdef CONFIG_DOUBLEFAULT
|
|
/* Set up doublefault TSS pointer in the GDT */
|
|
__set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
|
|
#endif
|
|
|
|
- /* Clear %gs. */
|
|
- asm volatile ("mov %0, %%gs" : : "r" (0));
|
|
-
|
|
- /* Clear all 6 debug registers: */
|
|
- set_debugreg(0, 0);
|
|
- set_debugreg(0, 1);
|
|
- set_debugreg(0, 2);
|
|
- set_debugreg(0, 3);
|
|
- set_debugreg(0, 6);
|
|
- set_debugreg(0, 7);
|
|
+ clear_all_debug_regs();
|
|
|
|
/*
|
|
* Force FPU initialization:
|
|
@@ -1245,6 +1317,4 @@ void __cpuinit cpu_init(void)
|
|
|
|
xsave_init();
|
|
}
|
|
-
|
|
-
|
|
#endif
|
|
--- head-2010-05-25.orig/arch/x86/kernel/cpu/intel.c 2010-05-25 09:12:08.000000000 +0200
|
|
+++ head-2010-05-25/arch/x86/kernel/cpu/intel.c 2010-05-25 09:24:45.000000000 +0200
|
|
@@ -91,8 +91,10 @@ static void __cpuinit early_init_intel(s
|
|
if (c->x86_power & (1 << 8)) {
|
|
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
|
|
set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
|
|
+#ifndef CONFIG_XEN
|
|
if (!check_tsc_unstable())
|
|
sched_clock_stable = 1;
|
|
+#endif
|
|
}
|
|
|
|
/*
|
|
--- head-2010-05-25.orig/arch/x86/kernel/e820-xen.c 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/e820-xen.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -129,19 +129,50 @@ int __init e820_all_mapped(u64 start, u6
|
|
/*
|
|
* Add a memory region to the kernel e820 map.
|
|
*/
|
|
-void __init e820_add_region(u64 start, u64 size, int type)
|
|
+static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,
|
|
+ int type)
|
|
{
|
|
- int x = e820.nr_map;
|
|
+ int x = e820x->nr_map;
|
|
|
|
- if (x == ARRAY_SIZE(e820.map)) {
|
|
+ if (x == ARRAY_SIZE(e820x->map)) {
|
|
printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
|
|
return;
|
|
}
|
|
|
|
- e820.map[x].addr = start;
|
|
- e820.map[x].size = size;
|
|
- e820.map[x].type = type;
|
|
- e820.nr_map++;
|
|
+ e820x->map[x].addr = start;
|
|
+ e820x->map[x].size = size;
|
|
+ e820x->map[x].type = type;
|
|
+ e820x->nr_map++;
|
|
+}
|
|
+
|
|
+void __init e820_add_region(u64 start, u64 size, int type)
|
|
+{
|
|
+ __e820_add_region(&e820, start, size, type);
|
|
+}
|
|
+
|
|
+static void __init e820_print_type(u32 type)
|
|
+{
|
|
+ switch (type) {
|
|
+ case E820_RAM:
|
|
+ case E820_RESERVED_KERN:
|
|
+ printk(KERN_CONT "(usable)");
|
|
+ break;
|
|
+ case E820_RESERVED:
|
|
+ printk(KERN_CONT "(reserved)");
|
|
+ break;
|
|
+ case E820_ACPI:
|
|
+ printk(KERN_CONT "(ACPI data)");
|
|
+ break;
|
|
+ case E820_NVS:
|
|
+ printk(KERN_CONT "(ACPI NVS)");
|
|
+ break;
|
|
+ case E820_UNUSABLE:
|
|
+ printk(KERN_CONT "(unusable)");
|
|
+ break;
|
|
+ default:
|
|
+ printk(KERN_CONT "type %u", type);
|
|
+ break;
|
|
+ }
|
|
}
|
|
|
|
static void __init _e820_print_map(const struct e820map *e820, const char *who)
|
|
@@ -153,27 +184,8 @@ static void __init _e820_print_map(const
|
|
(unsigned long long) e820->map[i].addr,
|
|
(unsigned long long)
|
|
(e820->map[i].addr + e820->map[i].size));
|
|
- switch (e820->map[i].type) {
|
|
- case E820_RAM:
|
|
- case E820_RESERVED_KERN:
|
|
- printk(KERN_CONT "(usable)\n");
|
|
- break;
|
|
- case E820_RESERVED:
|
|
- printk(KERN_CONT "(reserved)\n");
|
|
- break;
|
|
- case E820_ACPI:
|
|
- printk(KERN_CONT "(ACPI data)\n");
|
|
- break;
|
|
- case E820_NVS:
|
|
- printk(KERN_CONT "(ACPI NVS)\n");
|
|
- break;
|
|
- case E820_UNUSABLE:
|
|
- printk("(unusable)\n");
|
|
- break;
|
|
- default:
|
|
- printk(KERN_CONT "type %u\n", e820->map[i].type);
|
|
- break;
|
|
- }
|
|
+ e820_print_type(e820->map[i].type);
|
|
+ printk(KERN_CONT "\n");
|
|
}
|
|
}
|
|
|
|
@@ -240,7 +252,7 @@ static void __init _e820_print_map(const
|
|
*/
|
|
|
|
int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map,
|
|
- int *pnr_map)
|
|
+ u32 *pnr_map)
|
|
{
|
|
struct change_member {
|
|
struct e820entry *pbios; /* pointer to original bios entry */
|
|
@@ -444,11 +456,12 @@ static int __init append_e820_map(struct
|
|
return __append_e820_map(biosmap, nr_map);
|
|
}
|
|
|
|
-static u64 __init e820_update_range_map(struct e820map *e820x, u64 start,
|
|
+static u64 __init __e820_update_range(struct e820map *e820x, u64 start,
|
|
u64 size, unsigned old_type,
|
|
unsigned new_type)
|
|
{
|
|
- unsigned int i, x;
|
|
+ u64 end;
|
|
+ unsigned int i;
|
|
u64 real_updated_size = 0;
|
|
|
|
BUG_ON(old_type == new_type);
|
|
@@ -456,40 +469,59 @@ static u64 __init e820_update_range_map(
|
|
if (size > (ULLONG_MAX - start))
|
|
size = ULLONG_MAX - start;
|
|
|
|
+ end = start + size;
|
|
+ printk(KERN_DEBUG "e820 update range: %016Lx - %016Lx ",
|
|
+ (unsigned long long) start,
|
|
+ (unsigned long long) end);
|
|
+ e820_print_type(old_type);
|
|
+ printk(KERN_CONT " ==> ");
|
|
+ e820_print_type(new_type);
|
|
+ printk(KERN_CONT "\n");
|
|
+
|
|
for (i = 0; i < e820x->nr_map; i++) {
|
|
struct e820entry *ei = &e820x->map[i];
|
|
u64 final_start, final_end;
|
|
+ u64 ei_end;
|
|
+
|
|
if (ei->type != old_type)
|
|
continue;
|
|
- /* totally covered? */
|
|
- if (ei->addr >= start &&
|
|
- (ei->addr + ei->size) <= (start + size)) {
|
|
+
|
|
+ ei_end = ei->addr + ei->size;
|
|
+ /* totally covered by new range? */
|
|
+ if (ei->addr >= start && ei_end <= end) {
|
|
ei->type = new_type;
|
|
real_updated_size += ei->size;
|
|
continue;
|
|
}
|
|
+
|
|
+ /* new range is totally covered? */
|
|
+ if (ei->addr < start && ei_end > end) {
|
|
+ __e820_add_region(e820x, start, size, new_type);
|
|
+ __e820_add_region(e820x, end, ei_end - end, ei->type);
|
|
+ ei->size = start - ei->addr;
|
|
+ real_updated_size += size;
|
|
+ continue;
|
|
+ }
|
|
+
|
|
/* partially covered */
|
|
final_start = max(start, ei->addr);
|
|
- final_end = min(start + size, ei->addr + ei->size);
|
|
+ final_end = min(end, ei_end);
|
|
if (final_start >= final_end)
|
|
continue;
|
|
|
|
- x = e820x->nr_map;
|
|
- if (x == ARRAY_SIZE(e820x->map)) {
|
|
- printk(KERN_ERR "Too many memory map entries!\n");
|
|
- break;
|
|
- }
|
|
- e820x->map[x].addr = final_start;
|
|
- e820x->map[x].size = final_end - final_start;
|
|
- e820x->map[x].type = new_type;
|
|
- e820x->nr_map++;
|
|
+ __e820_add_region(e820x, final_start, final_end - final_start,
|
|
+ new_type);
|
|
|
|
real_updated_size += final_end - final_start;
|
|
|
|
+ /*
|
|
+ * left range could be head or tail, so need to update
|
|
+ * size at first.
|
|
+ */
|
|
+ ei->size -= final_end - final_start;
|
|
if (ei->addr < final_start)
|
|
continue;
|
|
ei->addr = final_end;
|
|
- ei->size -= final_end - final_start;
|
|
}
|
|
return real_updated_size;
|
|
}
|
|
@@ -497,7 +529,7 @@ static u64 __init e820_update_range_map(
|
|
u64 __init e820_update_range(u64 start, u64 size, unsigned old_type,
|
|
unsigned new_type)
|
|
{
|
|
- return e820_update_range_map(&e820, start, size, old_type, new_type);
|
|
+ return __e820_update_range(&e820, start, size, old_type, new_type);
|
|
}
|
|
|
|
static u64 __init e820_update_range_saved(u64 start, u64 size,
|
|
@@ -505,11 +537,11 @@ static u64 __init e820_update_range_save
|
|
{
|
|
#ifdef CONFIG_XEN
|
|
if (is_initial_xendomain())
|
|
- return e820_update_range_map(&machine_e820,
|
|
- phys_to_machine(start), size,
|
|
- old_type, new_type);
|
|
+ return __e820_update_range(&machine_e820,
|
|
+ phys_to_machine(start), size,
|
|
+ old_type, new_type);
|
|
#endif
|
|
- return e820_update_range_map(&e820_saved, start, size, old_type,
|
|
+ return __e820_update_range(&e820_saved, start, size, old_type,
|
|
new_type);
|
|
}
|
|
|
|
@@ -553,7 +585,7 @@ u64 __init e820_remove_range(u64 start,
|
|
|
|
void __init update_e820(void)
|
|
{
|
|
- int nr_map;
|
|
+ u32 nr_map;
|
|
|
|
nr_map = e820.nr_map;
|
|
if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr_map))
|
|
@@ -564,7 +596,7 @@ void __init update_e820(void)
|
|
}
|
|
static void __init update_e820_saved(void)
|
|
{
|
|
- int nr_map;
|
|
+ u32 nr_map;
|
|
|
|
nr_map = e820_saved.nr_map;
|
|
if (sanitize_e820_map(e820_saved.map, ARRAY_SIZE(e820_saved.map), &nr_map))
|
|
@@ -916,6 +948,9 @@ void __init reserve_early_overlap_ok(u64
|
|
*/
|
|
void __init reserve_early(u64 start, u64 end, char *name)
|
|
{
|
|
+ if (start >= end)
|
|
+ return;
|
|
+
|
|
drop_overlaps_that_are_ok(start, end);
|
|
__reserve_early(start, end, name, 0);
|
|
}
|
|
@@ -1389,7 +1424,7 @@ early_param("memmap", parse_memmap_opt);
|
|
void __init finish_e820_parsing(void)
|
|
{
|
|
if (userdef) {
|
|
- int nr = e820.nr_map;
|
|
+ u32 nr = e820.nr_map;
|
|
|
|
if (sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &nr) < 0)
|
|
early_panic("Invalid user supplied memory map");
|
|
@@ -1479,7 +1514,7 @@ void __init e820_reserve_resources_late(
|
|
char *__init default_machine_specific_memory_setup(void)
|
|
{
|
|
char *who = "BIOS-e820";
|
|
- int new_nr;
|
|
+ u32 new_nr;
|
|
/*
|
|
* Try to copy the BIOS-supplied E820-map.
|
|
*
|
|
--- head-2010-05-25.orig/arch/x86/kernel/early_printk-xen.c 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/early_printk-xen.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -12,8 +12,8 @@
|
|
#include <asm/fcntl.h>
|
|
#include <asm/setup.h>
|
|
#include <asm/pci-direct.h>
|
|
-#include <asm/pgtable.h>
|
|
#include <asm/fixmap.h>
|
|
+#include <asm/pgtable.h>
|
|
#include <linux/usb/ehci_def.h>
|
|
|
|
#ifndef CONFIG_XEN
|
|
@@ -279,7 +279,7 @@ static int dbgp_wait_until_complete(void
|
|
return (ctrl & DBGP_ERROR) ? -DBGP_ERRCODE(ctrl) : DBGP_LEN(ctrl);
|
|
}
|
|
|
|
-static void dbgp_mdelay(int ms)
|
|
+static void __init dbgp_mdelay(int ms)
|
|
{
|
|
int i;
|
|
|
|
@@ -340,7 +340,7 @@ static void dbgp_set_data(const void *bu
|
|
writel(hi, &ehci_debug->data47);
|
|
}
|
|
|
|
-static void dbgp_get_data(void *buf, int size)
|
|
+static void __init dbgp_get_data(void *buf, int size)
|
|
{
|
|
unsigned char *bytes = buf;
|
|
u32 lo, hi;
|
|
@@ -384,7 +384,7 @@ static int dbgp_bulk_write(unsigned devn
|
|
return ret;
|
|
}
|
|
|
|
-static int dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data,
|
|
+static int __init dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data,
|
|
int size)
|
|
{
|
|
u32 pids, addr, ctrl;
|
|
@@ -415,8 +415,8 @@ static int dbgp_bulk_read(unsigned devnu
|
|
return ret;
|
|
}
|
|
|
|
-static int dbgp_control_msg(unsigned devnum, int requesttype, int request,
|
|
- int value, int index, void *data, int size)
|
|
+static int __init dbgp_control_msg(unsigned devnum, int requesttype,
|
|
+ int request, int value, int index, void *data, int size)
|
|
{
|
|
u32 pids, addr, ctrl;
|
|
struct usb_ctrlrequest req;
|
|
@@ -518,7 +518,7 @@ static u32 __init find_dbgp(int ehci_num
|
|
return 0;
|
|
}
|
|
|
|
-static int ehci_reset_port(int port)
|
|
+static int __init ehci_reset_port(int port)
|
|
{
|
|
u32 portsc;
|
|
u32 delay_time, delay;
|
|
@@ -561,7 +561,7 @@ static int ehci_reset_port(int port)
|
|
return -EBUSY;
|
|
}
|
|
|
|
-static int ehci_wait_for_port(int port)
|
|
+static int __init ehci_wait_for_port(int port)
|
|
{
|
|
u32 status;
|
|
int ret, reps;
|
|
@@ -586,13 +586,13 @@ static inline void dbgp_printk(const cha
|
|
|
|
typedef void (*set_debug_port_t)(int port);
|
|
|
|
-static void default_set_debug_port(int port)
|
|
+static void __init default_set_debug_port(int port)
|
|
{
|
|
}
|
|
|
|
-static set_debug_port_t set_debug_port = default_set_debug_port;
|
|
+static set_debug_port_t __initdata set_debug_port = default_set_debug_port;
|
|
|
|
-static void nvidia_set_debug_port(int port)
|
|
+static void __init nvidia_set_debug_port(int port)
|
|
{
|
|
u32 dword;
|
|
dword = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func,
|
|
--- head-2010-05-25.orig/arch/x86/kernel/entry_32-xen.S 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/entry_32-xen.S 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -30,12 +30,13 @@
|
|
* 1C(%esp) - %ds
|
|
* 20(%esp) - %es
|
|
* 24(%esp) - %fs
|
|
- * 28(%esp) - orig_eax
|
|
- * 2C(%esp) - %eip
|
|
- * 30(%esp) - %cs
|
|
- * 34(%esp) - %eflags
|
|
- * 38(%esp) - %oldesp
|
|
- * 3C(%esp) - %oldss
|
|
+ * 28(%esp) - %gs saved iff !CONFIG_X86_32_LAZY_GS
|
|
+ * 2C(%esp) - orig_eax
|
|
+ * 30(%esp) - %eip
|
|
+ * 34(%esp) - %cs
|
|
+ * 38(%esp) - %eflags
|
|
+ * 3C(%esp) - %oldesp
|
|
+ * 40(%esp) - %oldss
|
|
*
|
|
* "current" is in register %ebx during any slow entries.
|
|
*/
|
|
@@ -46,7 +47,7 @@
|
|
#include <asm/errno.h>
|
|
#include <asm/segment.h>
|
|
#include <asm/smp.h>
|
|
-#include <asm/page.h>
|
|
+#include <asm/page_types.h>
|
|
#include <asm/desc.h>
|
|
#include <asm/percpu.h>
|
|
#include <asm/dwarf2.h>
|
|
@@ -105,121 +106,221 @@ NMI_MASK = 0x80000000
|
|
#define resume_userspace_sig resume_userspace
|
|
#endif
|
|
|
|
-#define SAVE_ALL \
|
|
- cld; \
|
|
- pushl %fs; \
|
|
- CFI_ADJUST_CFA_OFFSET 4;\
|
|
- /*CFI_REL_OFFSET fs, 0;*/\
|
|
- pushl %es; \
|
|
- CFI_ADJUST_CFA_OFFSET 4;\
|
|
- /*CFI_REL_OFFSET es, 0;*/\
|
|
- pushl %ds; \
|
|
- CFI_ADJUST_CFA_OFFSET 4;\
|
|
- /*CFI_REL_OFFSET ds, 0;*/\
|
|
- pushl %eax; \
|
|
- CFI_ADJUST_CFA_OFFSET 4;\
|
|
- CFI_REL_OFFSET eax, 0;\
|
|
- pushl %ebp; \
|
|
- CFI_ADJUST_CFA_OFFSET 4;\
|
|
- CFI_REL_OFFSET ebp, 0;\
|
|
- pushl %edi; \
|
|
- CFI_ADJUST_CFA_OFFSET 4;\
|
|
- CFI_REL_OFFSET edi, 0;\
|
|
- pushl %esi; \
|
|
- CFI_ADJUST_CFA_OFFSET 4;\
|
|
- CFI_REL_OFFSET esi, 0;\
|
|
- pushl %edx; \
|
|
- CFI_ADJUST_CFA_OFFSET 4;\
|
|
- CFI_REL_OFFSET edx, 0;\
|
|
- pushl %ecx; \
|
|
- CFI_ADJUST_CFA_OFFSET 4;\
|
|
- CFI_REL_OFFSET ecx, 0;\
|
|
- pushl %ebx; \
|
|
- CFI_ADJUST_CFA_OFFSET 4;\
|
|
- CFI_REL_OFFSET ebx, 0;\
|
|
- movl $(__USER_DS), %edx; \
|
|
- movl %edx, %ds; \
|
|
- movl %edx, %es; \
|
|
- movl $(__KERNEL_PERCPU), %edx; \
|
|
+/*
|
|
+ * User gs save/restore
|
|
+ *
|
|
+ * %gs is used for userland TLS and kernel only uses it for stack
|
|
+ * canary which is required to be at %gs:20 by gcc. Read the comment
|
|
+ * at the top of stackprotector.h for more info.
|
|
+ *
|
|
+ * Local labels 98 and 99 are used.
|
|
+ */
|
|
+#ifdef CONFIG_X86_32_LAZY_GS
|
|
+
|
|
+ /* unfortunately push/pop can't be no-op */
|
|
+.macro PUSH_GS
|
|
+ pushl $0
|
|
+ CFI_ADJUST_CFA_OFFSET 4
|
|
+.endm
|
|
+.macro POP_GS pop=0
|
|
+ addl $(4 + \pop), %esp
|
|
+ CFI_ADJUST_CFA_OFFSET -(4 + \pop)
|
|
+.endm
|
|
+.macro POP_GS_EX
|
|
+.endm
|
|
+
|
|
+ /* all the rest are no-op */
|
|
+.macro PTGS_TO_GS
|
|
+.endm
|
|
+.macro PTGS_TO_GS_EX
|
|
+.endm
|
|
+.macro GS_TO_REG reg
|
|
+.endm
|
|
+.macro REG_TO_PTGS reg
|
|
+.endm
|
|
+.macro SET_KERNEL_GS reg
|
|
+.endm
|
|
+
|
|
+#else /* CONFIG_X86_32_LAZY_GS */
|
|
+
|
|
+.macro PUSH_GS
|
|
+ pushl %gs
|
|
+ CFI_ADJUST_CFA_OFFSET 4
|
|
+ /*CFI_REL_OFFSET gs, 0*/
|
|
+.endm
|
|
+
|
|
+.macro POP_GS pop=0
|
|
+98: popl %gs
|
|
+ CFI_ADJUST_CFA_OFFSET -4
|
|
+ /*CFI_RESTORE gs*/
|
|
+ .if \pop <> 0
|
|
+ add $\pop, %esp
|
|
+ CFI_ADJUST_CFA_OFFSET -\pop
|
|
+ .endif
|
|
+.endm
|
|
+.macro POP_GS_EX
|
|
+.pushsection .fixup, "ax"
|
|
+99: movl $0, (%esp)
|
|
+ jmp 98b
|
|
+.section __ex_table, "a"
|
|
+ .align 4
|
|
+ .long 98b, 99b
|
|
+.popsection
|
|
+.endm
|
|
+
|
|
+.macro PTGS_TO_GS
|
|
+98: mov PT_GS(%esp), %gs
|
|
+.endm
|
|
+.macro PTGS_TO_GS_EX
|
|
+.pushsection .fixup, "ax"
|
|
+99: movl $0, PT_GS(%esp)
|
|
+ jmp 98b
|
|
+.section __ex_table, "a"
|
|
+ .align 4
|
|
+ .long 98b, 99b
|
|
+.popsection
|
|
+.endm
|
|
+
|
|
+.macro GS_TO_REG reg
|
|
+ movl %gs, \reg
|
|
+ /*CFI_REGISTER gs, \reg*/
|
|
+.endm
|
|
+.macro REG_TO_PTGS reg
|
|
+ movl \reg, PT_GS(%esp)
|
|
+ /*CFI_REL_OFFSET gs, PT_GS*/
|
|
+.endm
|
|
+.macro SET_KERNEL_GS reg
|
|
+ movl $(__KERNEL_STACK_CANARY), \reg
|
|
+ movl \reg, %gs
|
|
+.endm
|
|
+
|
|
+#endif /* CONFIG_X86_32_LAZY_GS */
|
|
+
|
|
+.macro SAVE_ALL
|
|
+ cld
|
|
+ PUSH_GS
|
|
+ pushl %fs
|
|
+ CFI_ADJUST_CFA_OFFSET 4
|
|
+ /*CFI_REL_OFFSET fs, 0;*/
|
|
+ pushl %es
|
|
+ CFI_ADJUST_CFA_OFFSET 4
|
|
+ /*CFI_REL_OFFSET es, 0;*/
|
|
+ pushl %ds
|
|
+ CFI_ADJUST_CFA_OFFSET 4
|
|
+ /*CFI_REL_OFFSET ds, 0;*/
|
|
+ pushl %eax
|
|
+ CFI_ADJUST_CFA_OFFSET 4
|
|
+ CFI_REL_OFFSET eax, 0
|
|
+ pushl %ebp
|
|
+ CFI_ADJUST_CFA_OFFSET 4
|
|
+ CFI_REL_OFFSET ebp, 0
|
|
+ pushl %edi
|
|
+ CFI_ADJUST_CFA_OFFSET 4
|
|
+ CFI_REL_OFFSET edi, 0
|
|
+ pushl %esi
|
|
+ CFI_ADJUST_CFA_OFFSET 4
|
|
+ CFI_REL_OFFSET esi, 0
|
|
+ pushl %edx
|
|
+ CFI_ADJUST_CFA_OFFSET 4
|
|
+ CFI_REL_OFFSET edx, 0
|
|
+ pushl %ecx
|
|
+ CFI_ADJUST_CFA_OFFSET 4
|
|
+ CFI_REL_OFFSET ecx, 0
|
|
+ pushl %ebx
|
|
+ CFI_ADJUST_CFA_OFFSET 4
|
|
+ CFI_REL_OFFSET ebx, 0
|
|
+ movl $(__USER_DS), %edx
|
|
+ movl %edx, %ds
|
|
+ movl %edx, %es
|
|
+ movl $(__KERNEL_PERCPU), %edx
|
|
movl %edx, %fs
|
|
+ SET_KERNEL_GS %edx
|
|
+.endm
|
|
|
|
-#define RESTORE_INT_REGS \
|
|
- popl %ebx; \
|
|
- CFI_ADJUST_CFA_OFFSET -4;\
|
|
- CFI_RESTORE ebx;\
|
|
- popl %ecx; \
|
|
- CFI_ADJUST_CFA_OFFSET -4;\
|
|
- CFI_RESTORE ecx;\
|
|
- popl %edx; \
|
|
- CFI_ADJUST_CFA_OFFSET -4;\
|
|
- CFI_RESTORE edx;\
|
|
- popl %esi; \
|
|
- CFI_ADJUST_CFA_OFFSET -4;\
|
|
- CFI_RESTORE esi;\
|
|
- popl %edi; \
|
|
- CFI_ADJUST_CFA_OFFSET -4;\
|
|
- CFI_RESTORE edi;\
|
|
- popl %ebp; \
|
|
- CFI_ADJUST_CFA_OFFSET -4;\
|
|
- CFI_RESTORE ebp;\
|
|
- popl %eax; \
|
|
- CFI_ADJUST_CFA_OFFSET -4;\
|
|
+.macro RESTORE_INT_REGS
|
|
+ popl %ebx
|
|
+ CFI_ADJUST_CFA_OFFSET -4
|
|
+ CFI_RESTORE ebx
|
|
+ popl %ecx
|
|
+ CFI_ADJUST_CFA_OFFSET -4
|
|
+ CFI_RESTORE ecx
|
|
+ popl %edx
|
|
+ CFI_ADJUST_CFA_OFFSET -4
|
|
+ CFI_RESTORE edx
|
|
+ popl %esi
|
|
+ CFI_ADJUST_CFA_OFFSET -4
|
|
+ CFI_RESTORE esi
|
|
+ popl %edi
|
|
+ CFI_ADJUST_CFA_OFFSET -4
|
|
+ CFI_RESTORE edi
|
|
+ popl %ebp
|
|
+ CFI_ADJUST_CFA_OFFSET -4
|
|
+ CFI_RESTORE ebp
|
|
+ popl %eax
|
|
+ CFI_ADJUST_CFA_OFFSET -4
|
|
CFI_RESTORE eax
|
|
+.endm
|
|
|
|
-#define RESTORE_REGS \
|
|
- RESTORE_INT_REGS; \
|
|
-1: popl %ds; \
|
|
- CFI_ADJUST_CFA_OFFSET -4;\
|
|
- /*CFI_RESTORE ds;*/\
|
|
-2: popl %es; \
|
|
- CFI_ADJUST_CFA_OFFSET -4;\
|
|
- /*CFI_RESTORE es;*/\
|
|
-3: popl %fs; \
|
|
- CFI_ADJUST_CFA_OFFSET -4;\
|
|
- /*CFI_RESTORE fs;*/\
|
|
-.pushsection .fixup,"ax"; \
|
|
-4: movl $0,(%esp); \
|
|
- jmp 1b; \
|
|
-5: movl $0,(%esp); \
|
|
- jmp 2b; \
|
|
-6: movl $0,(%esp); \
|
|
- jmp 3b; \
|
|
-.section __ex_table,"a";\
|
|
- .align 4; \
|
|
- .long 1b,4b; \
|
|
- .long 2b,5b; \
|
|
- .long 3b,6b; \
|
|
+.macro RESTORE_REGS pop=0
|
|
+ RESTORE_INT_REGS
|
|
+1: popl %ds
|
|
+ CFI_ADJUST_CFA_OFFSET -4
|
|
+ /*CFI_RESTORE ds;*/
|
|
+2: popl %es
|
|
+ CFI_ADJUST_CFA_OFFSET -4
|
|
+ /*CFI_RESTORE es;*/
|
|
+3: popl %fs
|
|
+ CFI_ADJUST_CFA_OFFSET -4
|
|
+ /*CFI_RESTORE fs;*/
|
|
+ POP_GS \pop
|
|
+.pushsection .fixup, "ax"
|
|
+4: movl $0, (%esp)
|
|
+ jmp 1b
|
|
+5: movl $0, (%esp)
|
|
+ jmp 2b
|
|
+6: movl $0, (%esp)
|
|
+ jmp 3b
|
|
+.section __ex_table, "a"
|
|
+ .align 4
|
|
+ .long 1b, 4b
|
|
+ .long 2b, 5b
|
|
+ .long 3b, 6b
|
|
.popsection
|
|
+ POP_GS_EX
|
|
+.endm
|
|
|
|
-#define RING0_INT_FRAME \
|
|
- CFI_STARTPROC simple;\
|
|
- CFI_SIGNAL_FRAME;\
|
|
- CFI_DEF_CFA esp, 3*4;\
|
|
- /*CFI_OFFSET cs, -2*4;*/\
|
|
+.macro RING0_INT_FRAME
|
|
+ CFI_STARTPROC simple
|
|
+ CFI_SIGNAL_FRAME
|
|
+ CFI_DEF_CFA esp, 3*4
|
|
+ /*CFI_OFFSET cs, -2*4;*/
|
|
CFI_OFFSET eip, -3*4
|
|
+.endm
|
|
|
|
-#define RING0_EC_FRAME \
|
|
- CFI_STARTPROC simple;\
|
|
- CFI_SIGNAL_FRAME;\
|
|
- CFI_DEF_CFA esp, 4*4;\
|
|
- /*CFI_OFFSET cs, -2*4;*/\
|
|
+.macro RING0_EC_FRAME
|
|
+ CFI_STARTPROC simple
|
|
+ CFI_SIGNAL_FRAME
|
|
+ CFI_DEF_CFA esp, 4*4
|
|
+ /*CFI_OFFSET cs, -2*4;*/
|
|
CFI_OFFSET eip, -3*4
|
|
+.endm
|
|
|
|
-#define RING0_PTREGS_FRAME \
|
|
- CFI_STARTPROC simple;\
|
|
- CFI_SIGNAL_FRAME;\
|
|
- CFI_DEF_CFA esp, PT_OLDESP-PT_EBX;\
|
|
- /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/\
|
|
- CFI_OFFSET eip, PT_EIP-PT_OLDESP;\
|
|
- /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/\
|
|
- /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/\
|
|
- CFI_OFFSET eax, PT_EAX-PT_OLDESP;\
|
|
- CFI_OFFSET ebp, PT_EBP-PT_OLDESP;\
|
|
- CFI_OFFSET edi, PT_EDI-PT_OLDESP;\
|
|
- CFI_OFFSET esi, PT_ESI-PT_OLDESP;\
|
|
- CFI_OFFSET edx, PT_EDX-PT_OLDESP;\
|
|
- CFI_OFFSET ecx, PT_ECX-PT_OLDESP;\
|
|
+.macro RING0_PTREGS_FRAME
|
|
+ CFI_STARTPROC simple
|
|
+ CFI_SIGNAL_FRAME
|
|
+ CFI_DEF_CFA esp, PT_OLDESP-PT_EBX
|
|
+ /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/
|
|
+ CFI_OFFSET eip, PT_EIP-PT_OLDESP
|
|
+ /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/
|
|
+ /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/
|
|
+ CFI_OFFSET eax, PT_EAX-PT_OLDESP
|
|
+ CFI_OFFSET ebp, PT_EBP-PT_OLDESP
|
|
+ CFI_OFFSET edi, PT_EDI-PT_OLDESP
|
|
+ CFI_OFFSET esi, PT_ESI-PT_OLDESP
|
|
+ CFI_OFFSET edx, PT_EDX-PT_OLDESP
|
|
+ CFI_OFFSET ecx, PT_ECX-PT_OLDESP
|
|
CFI_OFFSET ebx, PT_EBX-PT_OLDESP
|
|
+.endm
|
|
|
|
ENTRY(ret_from_fork)
|
|
CFI_STARTPROC
|
|
@@ -344,7 +445,8 @@ sysenter_past_esp:
|
|
.previous
|
|
|
|
GET_THREAD_INFO(%ebp)
|
|
- testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
|
|
+
|
|
+ testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
|
|
jnz sysenter_audit
|
|
sysenter_do_call:
|
|
cmpl $(nr_syscalls), %eax
|
|
@@ -355,7 +457,7 @@ sysenter_do_call:
|
|
DISABLE_INTERRUPTS(CLBR_ANY)
|
|
TRACE_IRQS_OFF
|
|
movl TI_flags(%ebp), %ecx
|
|
- testw $_TIF_ALLWORK_MASK, %cx
|
|
+ testl $_TIF_ALLWORK_MASK, %ecx
|
|
jne sysexit_audit
|
|
sysenter_exit:
|
|
/* if something modifies registers it must also disable sysexit */
|
|
@@ -364,11 +466,12 @@ sysenter_exit:
|
|
xorl %ebp,%ebp
|
|
TRACE_IRQS_ON
|
|
1: mov PT_FS(%esp), %fs
|
|
+ PTGS_TO_GS
|
|
ENABLE_INTERRUPTS_SYSEXIT
|
|
|
|
#ifdef CONFIG_AUDITSYSCALL
|
|
sysenter_audit:
|
|
- testw $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
|
|
+ testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
|
|
jnz syscall_trace_entry
|
|
addl $4,%esp
|
|
CFI_ADJUST_CFA_OFFSET -4
|
|
@@ -385,7 +488,7 @@ sysenter_audit:
|
|
jmp sysenter_do_call
|
|
|
|
sysexit_audit:
|
|
- testw $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %cx
|
|
+ testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
|
|
jne syscall_exit_work
|
|
TRACE_IRQS_ON
|
|
ENABLE_INTERRUPTS(CLBR_ANY)
|
|
@@ -398,7 +501,7 @@ sysexit_audit:
|
|
DISABLE_INTERRUPTS(CLBR_ANY)
|
|
TRACE_IRQS_OFF
|
|
movl TI_flags(%ebp), %ecx
|
|
- testw $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %cx
|
|
+ testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
|
|
jne syscall_exit_work
|
|
movl PT_EAX(%esp),%eax /* reload syscall return value */
|
|
jmp sysenter_exit
|
|
@@ -412,6 +515,7 @@ sysexit_audit:
|
|
.align 4
|
|
.long 1b,2b
|
|
.popsection
|
|
+ PTGS_TO_GS_EX
|
|
ENDPROC(ia32_sysenter_target)
|
|
|
|
# pv sysenter call handler stub
|
|
@@ -447,7 +551,7 @@ ENTRY(system_call)
|
|
CFI_ADJUST_CFA_OFFSET 4
|
|
SAVE_ALL
|
|
GET_THREAD_INFO(%ebp)
|
|
- testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
|
|
+ testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
|
|
jnz syscall_trace_entry
|
|
cmpl $(nr_syscalls), %eax
|
|
jae syscall_badsys
|
|
@@ -461,7 +565,7 @@ syscall_exit:
|
|
# between sampling and the iret
|
|
TRACE_IRQS_OFF
|
|
movl TI_flags(%ebp), %ecx
|
|
- testw $_TIF_ALLWORK_MASK, %cx # current->work
|
|
+ testl $_TIF_ALLWORK_MASK, %ecx # current->work
|
|
jne syscall_exit_work
|
|
|
|
restore_all:
|
|
@@ -492,8 +596,7 @@ restore_nocheck:
|
|
#endif
|
|
TRACE_IRQS_IRET
|
|
restore_nocheck_notrace:
|
|
- RESTORE_REGS
|
|
- addl $4, %esp # skip orig_eax/error_code
|
|
+ RESTORE_REGS 4 # skip orig_eax/error_code
|
|
CFI_ADJUST_CFA_OFFSET -4
|
|
irq_return:
|
|
INTERRUPT_RETURN
|
|
@@ -555,9 +658,7 @@ restore_all_enable_events:
|
|
scrit: /**** START OF CRITICAL REGION ****/
|
|
__TEST_PENDING
|
|
jnz 14f # process more events if necessary...
|
|
- RESTORE_REGS
|
|
- addl $4, %esp
|
|
- CFI_ADJUST_CFA_OFFSET -4
|
|
+ RESTORE_REGS 4
|
|
1: INTERRUPT_RETURN
|
|
.section __ex_table,"a"
|
|
.align 4
|
|
@@ -571,9 +672,7 @@ ecrit: /**** END OF CRITICAL REGION ***
|
|
CFI_RESTORE_STATE
|
|
hypervisor_iret:
|
|
andl $~NMI_MASK, PT_EFLAGS(%esp)
|
|
- RESTORE_REGS
|
|
- addl $4, %esp
|
|
- CFI_ADJUST_CFA_OFFSET -4
|
|
+ RESTORE_REGS 4
|
|
jmp hypercall_page + (__HYPERVISOR_iret * 32)
|
|
#endif
|
|
CFI_ENDPROC
|
|
@@ -641,7 +740,7 @@ END(syscall_trace_entry)
|
|
# perform syscall exit tracing
|
|
ALIGN
|
|
syscall_exit_work:
|
|
- testb $_TIF_WORK_SYSCALL_EXIT, %cl
|
|
+ testl $_TIF_WORK_SYSCALL_EXIT, %ecx
|
|
jz work_pending
|
|
TRACE_IRQS_ON
|
|
ENABLE_INTERRUPTS(CLBR_ANY) # could let syscall_trace_leave() call
|
|
@@ -665,29 +764,51 @@ syscall_badsys:
|
|
END(syscall_badsys)
|
|
CFI_ENDPROC
|
|
|
|
+/*
|
|
+ * System calls that need a pt_regs pointer.
|
|
+ */
|
|
+#define PTREGSCALL(name) \
|
|
+ ALIGN; \
|
|
+ptregs_##name: \
|
|
+ leal 4(%esp),%eax; \
|
|
+ jmp sys_##name;
|
|
+
|
|
+PTREGSCALL(iopl)
|
|
+PTREGSCALL(fork)
|
|
+PTREGSCALL(clone)
|
|
+PTREGSCALL(vfork)
|
|
+PTREGSCALL(execve)
|
|
+PTREGSCALL(sigaltstack)
|
|
+PTREGSCALL(sigreturn)
|
|
+PTREGSCALL(rt_sigreturn)
|
|
+PTREGSCALL(vm86)
|
|
+PTREGSCALL(vm86old)
|
|
+
|
|
#ifndef CONFIG_XEN
|
|
-#define FIXUP_ESPFIX_STACK \
|
|
- /* since we are on a wrong stack, we cant make it a C code :( */ \
|
|
- PER_CPU(gdt_page, %ebx); \
|
|
- GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \
|
|
- addl %esp, %eax; \
|
|
- pushl $__KERNEL_DS; \
|
|
- CFI_ADJUST_CFA_OFFSET 4; \
|
|
- pushl %eax; \
|
|
- CFI_ADJUST_CFA_OFFSET 4; \
|
|
- lss (%esp), %esp; \
|
|
- CFI_ADJUST_CFA_OFFSET -8;
|
|
-#define UNWIND_ESPFIX_STACK \
|
|
- movl %ss, %eax; \
|
|
- /* see if on espfix stack */ \
|
|
- cmpw $__ESPFIX_SS, %ax; \
|
|
- jne 27f; \
|
|
- movl $__KERNEL_DS, %eax; \
|
|
- movl %eax, %ds; \
|
|
- movl %eax, %es; \
|
|
- /* switch to normal stack */ \
|
|
- FIXUP_ESPFIX_STACK; \
|
|
-27:;
|
|
+.macro FIXUP_ESPFIX_STACK
|
|
+ /* since we are on a wrong stack, we cant make it a C code :( */
|
|
+ PER_CPU(gdt_page, %ebx)
|
|
+ GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah)
|
|
+ addl %esp, %eax
|
|
+ pushl $__KERNEL_DS
|
|
+ CFI_ADJUST_CFA_OFFSET 4
|
|
+ pushl %eax
|
|
+ CFI_ADJUST_CFA_OFFSET 4
|
|
+ lss (%esp), %esp
|
|
+ CFI_ADJUST_CFA_OFFSET -8
|
|
+.endm
|
|
+.macro UNWIND_ESPFIX_STACK
|
|
+ movl %ss, %eax
|
|
+ /* see if on espfix stack */
|
|
+ cmpw $__ESPFIX_SS, %ax
|
|
+ jne 27f
|
|
+ movl $__KERNEL_DS, %eax
|
|
+ movl %eax, %ds
|
|
+ movl %eax, %es
|
|
+ /* switch to normal stack */
|
|
+ FIXUP_ESPFIX_STACK
|
|
+27:
|
|
+.endm
|
|
|
|
/*
|
|
* Build the entry stubs and pointer table with some assembler magic.
|
|
@@ -743,7 +864,7 @@ common_interrupt:
|
|
ENDPROC(common_interrupt)
|
|
CFI_ENDPROC
|
|
|
|
-#define BUILD_INTERRUPT(name, nr) \
|
|
+#define BUILD_INTERRUPT3(name, nr, fn) \
|
|
ENTRY(name) \
|
|
RING0_INT_FRAME; \
|
|
pushl $~(nr); \
|
|
@@ -751,13 +872,15 @@ ENTRY(name) \
|
|
SAVE_ALL; \
|
|
TRACE_IRQS_OFF \
|
|
movl %esp,%eax; \
|
|
- call smp_##name; \
|
|
+ call fn; \
|
|
jmp ret_from_intr; \
|
|
CFI_ENDPROC; \
|
|
ENDPROC(name)
|
|
|
|
+#define BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(name, nr, smp_##name)
|
|
+
|
|
/* The include is where all of the SMP etc. interrupts come from */
|
|
-#include "entry_arch.h"
|
|
+#include <asm/entry_arch.h>
|
|
|
|
#else
|
|
#define UNWIND_ESPFIX_STACK
|
|
@@ -844,8 +967,13 @@ critical_fixup_table:
|
|
.byte 7 # pop %ds
|
|
.byte 8 # pop %es
|
|
.byte 9,9 # pop %fs
|
|
- .byte 10,10,10 # add $4,%esp
|
|
- .byte 11 # iret
|
|
+#ifndef CONFIG_X86_32_LAZY_GS
|
|
+ .byte 10,10 # pop %gs
|
|
+ .byte 11,11,11 # add $4,%esp
|
|
+#else
|
|
+ .byte 10,10,10 # add $8,%esp
|
|
+#endif
|
|
+ .byte 12 # iret
|
|
.byte -1,-1,-1,-1 # movb $1,1(%esi) = __DISABLE_INTERRUPTS
|
|
.previous
|
|
|
|
@@ -1203,7 +1331,7 @@ ENTRY(ia32pv_cstar_target)
|
|
.previous
|
|
SAVE_ALL
|
|
GET_THREAD_INFO(%ebp)
|
|
- testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
|
|
+ testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp)
|
|
jnz cstar_trace_entry
|
|
cmpl $nr_syscalls,%eax
|
|
jae cstar_badsys
|
|
@@ -1323,7 +1451,10 @@ ENTRY(page_fault)
|
|
CFI_ADJUST_CFA_OFFSET 4
|
|
ALIGN
|
|
error_code:
|
|
- /* the function address is in %fs's slot on the stack */
|
|
+ /* the function address is in %gs's slot on the stack */
|
|
+ pushl %fs
|
|
+ CFI_ADJUST_CFA_OFFSET 4
|
|
+ /*CFI_REL_OFFSET fs, 0*/
|
|
pushl %es
|
|
CFI_ADJUST_CFA_OFFSET 4
|
|
/*CFI_REL_OFFSET es, 0*/
|
|
@@ -1352,20 +1483,15 @@ error_code:
|
|
CFI_ADJUST_CFA_OFFSET 4
|
|
CFI_REL_OFFSET ebx, 0
|
|
cld
|
|
- pushl %fs
|
|
- CFI_ADJUST_CFA_OFFSET 4
|
|
- /*CFI_REL_OFFSET fs, 0*/
|
|
movl $(__KERNEL_PERCPU), %ecx
|
|
movl %ecx, %fs
|
|
UNWIND_ESPFIX_STACK
|
|
- popl %ecx
|
|
- CFI_ADJUST_CFA_OFFSET -4
|
|
- /*CFI_REGISTER es, ecx*/
|
|
- movl PT_FS(%esp), %edi # get the function address
|
|
+ GS_TO_REG %ecx
|
|
+ movl PT_GS(%esp), %edi # get the function address
|
|
movl PT_ORIG_EAX(%esp), %edx # get the error code
|
|
movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
|
|
- mov %ecx, PT_FS(%esp)
|
|
- /*CFI_REL_OFFSET fs, ES*/
|
|
+ REG_TO_PTGS %ecx
|
|
+ SET_KERNEL_GS %ecx
|
|
movl $(__USER_DS), %ecx
|
|
movl %ecx, %ds
|
|
movl %ecx, %es
|
|
@@ -1390,20 +1516,21 @@ END(page_fault)
|
|
* by hand onto the new stack - while updating the return eip past
|
|
* the instruction that would have done it for sysenter.
|
|
*/
|
|
-#define FIX_STACK(offset, ok, label) \
|
|
- cmpw $__KERNEL_CS,4(%esp); \
|
|
- jne ok; \
|
|
-label: \
|
|
- movl TSS_sysenter_sp0+offset(%esp),%esp; \
|
|
- CFI_DEF_CFA esp, 0; \
|
|
- CFI_UNDEFINED eip; \
|
|
- pushfl; \
|
|
- CFI_ADJUST_CFA_OFFSET 4; \
|
|
- pushl $__KERNEL_CS; \
|
|
- CFI_ADJUST_CFA_OFFSET 4; \
|
|
- pushl $sysenter_past_esp; \
|
|
- CFI_ADJUST_CFA_OFFSET 4; \
|
|
+.macro FIX_STACK offset ok label
|
|
+ cmpw $__KERNEL_CS, 4(%esp)
|
|
+ jne \ok
|
|
+\label:
|
|
+ movl TSS_sysenter_sp0 + \offset(%esp), %esp
|
|
+ CFI_DEF_CFA esp, 0
|
|
+ CFI_UNDEFINED eip
|
|
+ pushfl
|
|
+ CFI_ADJUST_CFA_OFFSET 4
|
|
+ pushl $__KERNEL_CS
|
|
+ CFI_ADJUST_CFA_OFFSET 4
|
|
+ pushl $sysenter_past_esp
|
|
+ CFI_ADJUST_CFA_OFFSET 4
|
|
CFI_REL_OFFSET eip, 0
|
|
+.endm
|
|
#endif /* CONFIG_XEN */
|
|
|
|
ENTRY(debug)
|
|
@@ -1411,7 +1538,7 @@ ENTRY(debug)
|
|
#ifndef CONFIG_XEN
|
|
cmpl $ia32_sysenter_target,(%esp)
|
|
jne debug_stack_correct
|
|
- FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn)
|
|
+ FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn
|
|
debug_stack_correct:
|
|
#endif /* !CONFIG_XEN */
|
|
pushl $-1 # mark this as an int
|
|
@@ -1471,7 +1598,7 @@ nmi_stack_correct:
|
|
|
|
nmi_stack_fixup:
|
|
RING0_INT_FRAME
|
|
- FIX_STACK(12,nmi_stack_correct, 1)
|
|
+ FIX_STACK 12, nmi_stack_correct, 1
|
|
jmp nmi_stack_correct
|
|
|
|
nmi_debug_stack_check:
|
|
@@ -1482,7 +1609,7 @@ nmi_debug_stack_check:
|
|
jb nmi_stack_correct
|
|
cmpl $debug_esp_fix_insn,(%esp)
|
|
ja nmi_stack_correct
|
|
- FIX_STACK(24,nmi_stack_correct, 1)
|
|
+ FIX_STACK 24, nmi_stack_correct, 1
|
|
jmp nmi_stack_correct
|
|
|
|
nmi_espfix_stack:
|
|
@@ -1494,7 +1621,7 @@ nmi_espfix_stack:
|
|
CFI_ADJUST_CFA_OFFSET 4
|
|
pushl %esp
|
|
CFI_ADJUST_CFA_OFFSET 4
|
|
- addw $4, (%esp)
|
|
+ addl $4, (%esp)
|
|
/* copy the iret frame of 12 bytes */
|
|
.rept 3
|
|
pushl 16(%esp)
|
|
--- head-2010-05-25.orig/arch/x86/kernel/entry_64-xen.S 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/entry_64-xen.S 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -51,10 +51,10 @@
|
|
#include <asm/unistd.h>
|
|
#include <asm/thread_info.h>
|
|
#include <asm/hw_irq.h>
|
|
-#include <asm/page.h>
|
|
+#include <asm/page_types.h>
|
|
#include <asm/irqflags.h>
|
|
#include <asm/ftrace.h>
|
|
-#include <asm/errno.h>
|
|
+#include <asm/percpu.h>
|
|
#include <xen/interface/xen.h>
|
|
#include <xen/interface/features.h>
|
|
|
|
@@ -81,20 +81,17 @@ ENTRY(ftrace_caller)
|
|
movq 8(%rbp), %rsi
|
|
subq $MCOUNT_INSN_SIZE, %rdi
|
|
|
|
-.globl ftrace_call
|
|
-ftrace_call:
|
|
+GLOBAL(ftrace_call)
|
|
call ftrace_stub
|
|
|
|
MCOUNT_RESTORE_FRAME
|
|
|
|
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
|
|
-.globl ftrace_graph_call
|
|
-ftrace_graph_call:
|
|
+GLOBAL(ftrace_graph_call)
|
|
jmp ftrace_stub
|
|
#endif
|
|
|
|
-.globl ftrace_stub
|
|
-ftrace_stub:
|
|
+GLOBAL(ftrace_stub)
|
|
retq
|
|
END(ftrace_caller)
|
|
|
|
@@ -114,8 +111,7 @@ ENTRY(mcount)
|
|
jnz ftrace_graph_caller
|
|
#endif
|
|
|
|
-.globl ftrace_stub
|
|
-ftrace_stub:
|
|
+GLOBAL(ftrace_stub)
|
|
retq
|
|
|
|
trace:
|
|
@@ -152,9 +148,7 @@ ENTRY(ftrace_graph_caller)
|
|
retq
|
|
END(ftrace_graph_caller)
|
|
|
|
-
|
|
-.globl return_to_handler
|
|
-return_to_handler:
|
|
+GLOBAL(return_to_handler)
|
|
subq $80, %rsp
|
|
|
|
movq %rax, (%rsp)
|
|
@@ -369,15 +363,15 @@ ENTRY(save_args)
|
|
je 1f
|
|
SWAPGS
|
|
/*
|
|
- * irqcount is used to check if a CPU is already on an interrupt stack
|
|
+ * irq_count is used to check if a CPU is already on an interrupt stack
|
|
* or not. While this is essentially redundant with preempt_count it is
|
|
* a little cheaper to use a separate counter in the PDA (short of
|
|
* moving irq_enter into assembly, which would be too much work)
|
|
*/
|
|
-1: incl %gs:pda_irqcount
|
|
+1: incl PER_CPU_VAR(irq_count)
|
|
jne 2f
|
|
popq_cfi %rax /* move return address... */
|
|
- mov %gs:pda_irqstackptr,%rsp
|
|
+ mov PER_CPU_VAR(irq_stack_ptr),%rsp
|
|
EMPTY_FRAME 0
|
|
pushq_cfi %rbp /* backlink for unwinder */
|
|
pushq_cfi %rax /* ... to the new stack */
|
|
@@ -407,6 +401,7 @@ END(save_rest)
|
|
|
|
#ifndef CONFIG_XEN
|
|
/* save complete stack frame */
|
|
+ .pushsection .kprobes.text, "ax"
|
|
ENTRY(save_paranoid)
|
|
XCPT_FRAME 1 RDI+8
|
|
cld
|
|
@@ -435,6 +430,7 @@ ENTRY(save_paranoid)
|
|
1: ret
|
|
CFI_ENDPROC
|
|
END(save_paranoid)
|
|
+ .popsection
|
|
#endif
|
|
|
|
/*
|
|
@@ -445,6 +441,8 @@ END(save_paranoid)
|
|
ENTRY(ret_from_fork)
|
|
DEFAULT_FRAME
|
|
|
|
+ LOCK ; btr $TIF_FORK,TI_flags(%r8)
|
|
+
|
|
push kernel_eflags(%rip)
|
|
CFI_ADJUST_CFA_OFFSET 8
|
|
popf # reset kernel eflags
|
|
@@ -454,7 +452,6 @@ ENTRY(ret_from_fork)
|
|
|
|
GET_THREAD_INFO(%rcx)
|
|
|
|
- CFI_REMEMBER_STATE
|
|
RESTORE_REST
|
|
|
|
testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread?
|
|
@@ -466,7 +463,6 @@ ENTRY(ret_from_fork)
|
|
RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET
|
|
jmp ret_from_sys_call # go to the SYSRET fastpath
|
|
|
|
- CFI_RESTORE_STATE
|
|
CFI_ENDPROC
|
|
END(ret_from_fork)
|
|
|
|
@@ -642,9 +638,7 @@ tracesys:
|
|
* Syscall return path ending with IRET.
|
|
* Has correct top of stack, but partial stack frame.
|
|
*/
|
|
- .globl int_ret_from_sys_call
|
|
- .globl int_with_check
|
|
-int_ret_from_sys_call:
|
|
+GLOBAL(int_ret_from_sys_call)
|
|
DISABLE_INTERRUPTS(CLBR_NONE)
|
|
TRACE_IRQS_OFF
|
|
testb $3,CS-ARGOFFSET(%rsp)
|
|
@@ -655,7 +649,7 @@ int_ret_from_sys_call:
|
|
1:
|
|
movl $_TIF_ALLWORK_MASK,%edi
|
|
/* edi: mask to check */
|
|
-int_with_check:
|
|
+GLOBAL(int_with_check)
|
|
LOCKDEP_SYS_EXIT_IRQ
|
|
GET_THREAD_INFO(%rcx)
|
|
movl TI_flags(%rcx),%edx
|
|
@@ -877,10 +871,14 @@ apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \
|
|
irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
|
|
#endif
|
|
|
|
+#ifdef CONFIG_X86_UV
|
|
apicinterrupt UV_BAU_MESSAGE \
|
|
uv_bau_message_intr1 uv_bau_message_interrupt
|
|
+#endif
|
|
apicinterrupt LOCAL_TIMER_VECTOR \
|
|
apic_timer_interrupt smp_apic_timer_interrupt
|
|
+apicinterrupt GENERIC_INTERRUPT_VECTOR \
|
|
+ generic_interrupt smp_generic_interrupt
|
|
|
|
#ifdef CONFIG_SMP
|
|
apicinterrupt INVALIDATE_TLB_VECTOR_START+0 \
|
|
@@ -998,15 +996,15 @@ ENTRY(do_hypervisor_callback) # do_hyp
|
|
movq %rdi, %rsp # we don't return, adjust the stack frame
|
|
CFI_ENDPROC
|
|
DEFAULT_FRAME
|
|
-11: incl %gs:pda_irqcount
|
|
+11: incl PER_CPU_VAR(irq_count)
|
|
movq %rsp,%rbp
|
|
CFI_DEF_CFA_REGISTER rbp
|
|
- cmovzq %gs:pda_irqstackptr,%rsp
|
|
+ cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
|
|
pushq %rbp # backlink for old unwinder
|
|
call evtchn_do_upcall
|
|
popq %rsp
|
|
CFI_DEF_CFA_REGISTER rsp
|
|
- decl %gs:pda_irqcount
|
|
+ decl PER_CPU_VAR(irq_count)
|
|
jmp error_exit
|
|
CFI_ENDPROC
|
|
END(do_hypervisor_callback)
|
|
@@ -1197,14 +1195,14 @@ ENTRY(call_softirq)
|
|
CFI_REL_OFFSET rbp,0
|
|
mov %rsp,%rbp
|
|
CFI_DEF_CFA_REGISTER rbp
|
|
- incl %gs:pda_irqcount
|
|
- cmove %gs:pda_irqstackptr,%rsp
|
|
+ incl PER_CPU_VAR(irq_count)
|
|
+ cmove PER_CPU_VAR(irq_stack_ptr),%rsp
|
|
push %rbp # backlink for old unwinder
|
|
call __do_softirq
|
|
leaveq
|
|
CFI_DEF_CFA_REGISTER rsp
|
|
CFI_ADJUST_CFA_OFFSET -8
|
|
- decl %gs:pda_irqcount
|
|
+ decl PER_CPU_VAR(irq_count)
|
|
ret
|
|
CFI_ENDPROC
|
|
END(call_softirq)
|
|
@@ -1250,7 +1248,10 @@ ENTRY(paranoid_exit)
|
|
paranoid_swapgs:
|
|
TRACE_IRQS_IRETQ 0
|
|
SWAPGS_UNSAFE_STACK
|
|
+ RESTORE_ALL 8
|
|
+ jmp irq_return
|
|
paranoid_restore:
|
|
+ TRACE_IRQS_IRETQ 0
|
|
RESTORE_ALL 8
|
|
jmp irq_return
|
|
paranoid_userspace:
|
|
--- head-2010-05-25.orig/arch/x86/kernel/head-xen.c 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/head-xen.c 2010-04-28 17:07:13.000000000 +0200
|
|
@@ -2,6 +2,7 @@
|
|
#include <linux/init.h>
|
|
|
|
#include <asm/setup.h>
|
|
+#ifndef CONFIG_XEN
|
|
#include <asm/bios_ebda.h>
|
|
|
|
#define BIOS_LOWMEM_KILOBYTES 0x413
|
|
@@ -18,7 +19,6 @@
|
|
*/
|
|
void __init reserve_ebda_region(void)
|
|
{
|
|
-#ifndef CONFIG_XEN
|
|
unsigned int lowmem, ebda_addr;
|
|
|
|
/* To determine the position of the EBDA and the */
|
|
@@ -53,5 +53,174 @@ void __init reserve_ebda_region(void)
|
|
|
|
/* reserve all memory between lowmem and the 1MB mark */
|
|
reserve_early_overlap_ok(lowmem, 0x100000, "BIOS reserved");
|
|
+}
|
|
+#else /* CONFIG_XEN */
|
|
+#include <linux/module.h>
|
|
+#include <asm/fixmap.h>
|
|
+#include <asm/pgtable.h>
|
|
+#include <asm/sections.h>
|
|
+#include <asm/setup_arch.h>
|
|
+#include <xen/interface/callback.h>
|
|
+#include <xen/interface/memory.h>
|
|
+
|
|
+extern void hypervisor_callback(void);
|
|
+extern void failsafe_callback(void);
|
|
+extern void nmi(void);
|
|
+
|
|
+#ifdef CONFIG_X86_64
|
|
+#include <asm/proto.h>
|
|
+#define CALLBACK_ADDR(fn) ((unsigned long)(fn))
|
|
+#else
|
|
+#define CALLBACK_ADDR(fn) { __KERNEL_CS, (unsigned long)(fn) }
|
|
+#endif
|
|
+
|
|
+unsigned long *__read_mostly machine_to_phys_mapping =
|
|
+ (void *)MACH2PHYS_VIRT_START;
|
|
+EXPORT_SYMBOL(machine_to_phys_mapping);
|
|
+unsigned int __read_mostly machine_to_phys_order;
|
|
+EXPORT_SYMBOL(machine_to_phys_order);
|
|
+
|
|
+void __init xen_start_kernel(void)
|
|
+{
|
|
+ unsigned int i;
|
|
+ struct xen_machphys_mapping mapping;
|
|
+ unsigned long machine_to_phys_nr_ents;
|
|
+#ifdef CONFIG_X86_32
|
|
+ struct xen_platform_parameters pp;
|
|
+ extern pte_t swapper_pg_fixmap[PTRS_PER_PTE];
|
|
+ unsigned long addr;
|
|
+#endif
|
|
+
|
|
+ xen_setup_features();
|
|
+
|
|
+ if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) {
|
|
+ machine_to_phys_mapping = (unsigned long *)mapping.v_start;
|
|
+ machine_to_phys_nr_ents = mapping.max_mfn + 1;
|
|
+ } else
|
|
+ machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES;
|
|
+ while ((1UL << machine_to_phys_order) < machine_to_phys_nr_ents )
|
|
+ machine_to_phys_order++;
|
|
+
|
|
+ if (!xen_feature(XENFEAT_auto_translated_physmap))
|
|
+ phys_to_machine_mapping =
|
|
+ (unsigned long *)xen_start_info->mfn_list;
|
|
+
|
|
+ WARN_ON(HYPERVISOR_vm_assist(VMASST_CMD_enable,
|
|
+ VMASST_TYPE_writable_pagetables));
|
|
+
|
|
+ reserve_early(ALIGN(__pa_symbol(&_end), PAGE_SIZE),
|
|
+ __pa(xen_start_info->pt_base)
|
|
+ + (xen_start_info->nr_pt_frames << PAGE_SHIFT),
|
|
+ "Xen provided");
|
|
+
|
|
+#ifdef CONFIG_X86_32
|
|
+ WARN_ON(HYPERVISOR_vm_assist(VMASST_CMD_enable,
|
|
+ VMASST_TYPE_4gb_segments));
|
|
+
|
|
+ init_mm.pgd = swapper_pg_dir = (pgd_t *)xen_start_info->pt_base;
|
|
+
|
|
+ if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0) {
|
|
+ hypervisor_virt_start = pp.virt_start;
|
|
+ reserve_top_address(0UL - pp.virt_start);
|
|
+ }
|
|
+
|
|
+ BUG_ON(pte_index(hypervisor_virt_start));
|
|
+
|
|
+ /* Do an early initialization of the fixmap area */
|
|
+ make_lowmem_page_readonly(swapper_pg_fixmap, XENFEAT_writable_page_tables);
|
|
+ addr = __fix_to_virt(FIX_EARLYCON_MEM_BASE);
|
|
+ set_pmd(pmd_offset(pud_offset(swapper_pg_dir + pgd_index(addr),
|
|
+ addr),
|
|
+ addr),
|
|
+ __pmd(__pa_symbol(swapper_pg_fixmap) | _PAGE_TABLE));
|
|
+#else
|
|
+ check_efer();
|
|
+ xen_init_pt();
|
|
+#endif
|
|
+
|
|
+#define __FIXADDR_TOP (-PAGE_SIZE)
|
|
+#define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
|
|
+#define FIX_BUG_ON(fix) BUILD_BUG_ON(pmd_index(__fix_to_virt(FIX_##fix)) \
|
|
+ != pmd_index(__fix_to_virt(FIX_EARLYCON_MEM_BASE)))
|
|
+ FIX_BUG_ON(SHARED_INFO);
|
|
+ FIX_BUG_ON(ISAMAP_BEGIN);
|
|
+ FIX_BUG_ON(ISAMAP_END);
|
|
+#undef pmd_index
|
|
+#undef __FIXADDR_TOP
|
|
+
|
|
+ /* Switch to the real shared_info page, and clear the dummy page. */
|
|
+ set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
|
|
+ HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
|
|
+ clear_page(empty_zero_page);
|
|
+
|
|
+ /* Set up mapping of lowest 1MB of physical memory. */
|
|
+ for (i = 0; i < NR_FIX_ISAMAPS; i++)
|
|
+ if (is_initial_xendomain())
|
|
+ set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE);
|
|
+ else
|
|
+ __set_fixmap(FIX_ISAMAP_BEGIN - i,
|
|
+ virt_to_machine(empty_zero_page),
|
|
+ PAGE_KERNEL_RO);
|
|
+
|
|
+}
|
|
+
|
|
+void __init machine_specific_arch_setup(void)
|
|
+{
|
|
+ int ret;
|
|
+ static const struct callback_register __initconst event = {
|
|
+ .type = CALLBACKTYPE_event,
|
|
+ .address = CALLBACK_ADDR(hypervisor_callback)
|
|
+ };
|
|
+ static const struct callback_register __initconst failsafe = {
|
|
+ .type = CALLBACKTYPE_failsafe,
|
|
+ .address = CALLBACK_ADDR(failsafe_callback)
|
|
+ };
|
|
+#ifdef CONFIG_X86_64
|
|
+ static const struct callback_register __initconst syscall = {
|
|
+ .type = CALLBACKTYPE_syscall,
|
|
+ .address = CALLBACK_ADDR(system_call)
|
|
+ };
|
|
+#endif
|
|
+#if defined(CONFIG_X86_LOCAL_APIC) || defined(CONFIG_X86_32)
|
|
+ static const struct callback_register __initconst nmi_cb = {
|
|
+ .type = CALLBACKTYPE_nmi,
|
|
+ .address = CALLBACK_ADDR(nmi)
|
|
+ };
|
|
+#endif
|
|
+
|
|
+ ret = HYPERVISOR_callback_op(CALLBACKOP_register, &event);
|
|
+ if (ret == 0)
|
|
+ ret = HYPERVISOR_callback_op(CALLBACKOP_register, &failsafe);
|
|
+#ifdef CONFIG_X86_64
|
|
+ if (ret == 0)
|
|
+ ret = HYPERVISOR_callback_op(CALLBACKOP_register, &syscall);
|
|
+#endif
|
|
+#if CONFIG_XEN_COMPAT <= 0x030002
|
|
+#ifdef CONFIG_X86_32
|
|
+ if (ret == -ENOSYS)
|
|
+ ret = HYPERVISOR_set_callbacks(
|
|
+ event.address.cs, event.address.eip,
|
|
+ failsafe.address.cs, failsafe.address.eip);
|
|
+#else
|
|
+ ret = HYPERVISOR_set_callbacks(
|
|
+ event.address,
|
|
+ failsafe.address,
|
|
+ syscall.address);
|
|
+#endif
|
|
+#endif
|
|
+ BUG_ON(ret);
|
|
+
|
|
+#if defined(CONFIG_X86_LOCAL_APIC) || defined(CONFIG_X86_32)
|
|
+ ret = HYPERVISOR_callback_op(CALLBACKOP_register, &nmi_cb);
|
|
+#if CONFIG_XEN_COMPAT <= 0x030002
|
|
+ if (ret == -ENOSYS) {
|
|
+ static struct xennmi_callback __initdata cb = {
|
|
+ .handler_address = (unsigned long)nmi
|
|
+ };
|
|
+
|
|
+ HYPERVISOR_nmi_op(XENNMI_register_callback, &cb);
|
|
+ }
|
|
+#endif
|
|
#endif
|
|
}
|
|
+#endif /* CONFIG_XEN */
|
|
--- head-2010-05-25.orig/arch/x86/kernel/head32-xen.c 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/head32-xen.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -9,6 +9,7 @@
|
|
#include <linux/start_kernel.h>
|
|
|
|
#include <asm/setup.h>
|
|
+#include <asm/setup_arch.h>
|
|
#include <asm/sections.h>
|
|
#include <asm/e820.h>
|
|
#include <asm/bios_ebda.h>
|
|
@@ -18,7 +19,7 @@ void __init i386_start_kernel(void)
|
|
{
|
|
reserve_trampoline_memory();
|
|
|
|
- reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
|
|
+ reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
|
|
|
|
#ifndef CONFIG_XEN
|
|
#ifdef CONFIG_BLK_DEV_INITRD
|
|
@@ -30,14 +31,8 @@ void __init i386_start_kernel(void)
|
|
reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
|
|
}
|
|
#endif
|
|
- reserve_early(init_pg_tables_start, init_pg_tables_end,
|
|
- "INIT_PG_TABLE");
|
|
+ reserve_ebda_region();
|
|
#else
|
|
- reserve_early(ALIGN(__pa_symbol(&_end), PAGE_SIZE),
|
|
- __pa(xen_start_info->pt_base)
|
|
- + (xen_start_info->nr_pt_frames << PAGE_SHIFT),
|
|
- "Xen provided");
|
|
-
|
|
{
|
|
int max_cmdline;
|
|
|
|
@@ -46,9 +41,9 @@ void __init i386_start_kernel(void)
|
|
memcpy(boot_command_line, xen_start_info->cmd_line, max_cmdline);
|
|
boot_command_line[max_cmdline-1] = '\0';
|
|
}
|
|
-#endif
|
|
|
|
- reserve_ebda_region();
|
|
+ xen_start_kernel();
|
|
+#endif
|
|
|
|
/*
|
|
* At this point everything still needed from the boot loader
|
|
--- head-2010-05-25.orig/arch/x86/kernel/head64-xen.c 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/head64-xen.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -7,9 +7,6 @@
|
|
* Modified for Xen.
|
|
*/
|
|
|
|
-/* PDA is not ready to be used until the end of x86_64_start_kernel(). */
|
|
-#define arch_use_lazy_mmu_mode() false
|
|
-
|
|
#include <linux/init.h>
|
|
#include <linux/linkage.h>
|
|
#include <linux/types.h>
|
|
@@ -18,12 +15,12 @@
|
|
#include <linux/percpu.h>
|
|
#include <linux/start_kernel.h>
|
|
#include <linux/io.h>
|
|
-#include <linux/module.h>
|
|
|
|
#include <asm/processor.h>
|
|
#include <asm/proto.h>
|
|
#include <asm/smp.h>
|
|
#include <asm/setup.h>
|
|
+#include <asm/setup_arch.h>
|
|
#include <asm/desc.h>
|
|
#include <asm/pgtable.h>
|
|
#include <asm/tlbflush.h>
|
|
@@ -33,27 +30,6 @@
|
|
#include <asm/bios_ebda.h>
|
|
#include <asm/trampoline.h>
|
|
|
|
-/* boot cpu pda */
|
|
-static struct x8664_pda _boot_cpu_pda;
|
|
-
|
|
-#ifdef CONFIG_SMP
|
|
-/*
|
|
- * We install an empty cpu_pda pointer table to indicate to early users
|
|
- * (numa_set_node) that the cpu_pda pointer table for cpus other than
|
|
- * the boot cpu is not yet setup.
|
|
- */
|
|
-static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata;
|
|
-#else
|
|
-static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly;
|
|
-#endif
|
|
-
|
|
-void __init x86_64_init_pda(void)
|
|
-{
|
|
- _cpu_pda = __cpu_pda;
|
|
- cpu_pda(0) = &_boot_cpu_pda;
|
|
- pda_init(0);
|
|
-}
|
|
-
|
|
#ifndef CONFIG_XEN
|
|
static void __init zap_identity_mappings(void)
|
|
{
|
|
@@ -92,16 +68,9 @@ static void __init copy_bootdata(char *r
|
|
}
|
|
|
|
#include <xen/interface/memory.h>
|
|
-unsigned long *machine_to_phys_mapping;
|
|
-EXPORT_SYMBOL(machine_to_phys_mapping);
|
|
-unsigned int machine_to_phys_order;
|
|
-EXPORT_SYMBOL(machine_to_phys_order);
|
|
|
|
void __init x86_64_start_kernel(char * real_mode_data)
|
|
{
|
|
- struct xen_machphys_mapping mapping;
|
|
- unsigned long machine_to_phys_nr_ents;
|
|
-
|
|
/*
|
|
* Build-time sanity checks on the kernel image and module
|
|
* area mappings. (these are purely build-time and produce no code)
|
|
@@ -116,21 +85,8 @@ void __init x86_64_start_kernel(char * r
|
|
(__START_KERNEL & PGDIR_MASK)));
|
|
BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END);
|
|
|
|
- xen_setup_features();
|
|
-
|
|
xen_start_info = (struct start_info *)real_mode_data;
|
|
- if (!xen_feature(XENFEAT_auto_translated_physmap))
|
|
- phys_to_machine_mapping =
|
|
- (unsigned long *)xen_start_info->mfn_list;
|
|
-
|
|
- machine_to_phys_mapping = (unsigned long *)MACH2PHYS_VIRT_START;
|
|
- machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES;
|
|
- if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) {
|
|
- machine_to_phys_mapping = (unsigned long *)mapping.v_start;
|
|
- machine_to_phys_nr_ents = mapping.max_mfn + 1;
|
|
- }
|
|
- while ((1UL << machine_to_phys_order) < machine_to_phys_nr_ents )
|
|
- machine_to_phys_order++;
|
|
+ xen_start_kernel();
|
|
|
|
#ifndef CONFIG_XEN
|
|
/* clear bss before set_intr_gate with early_idt_handler */
|
|
@@ -155,7 +111,7 @@ void __init x86_64_start_kernel(char * r
|
|
if (console_loglevel == 10)
|
|
early_printk("Kernel alive\n");
|
|
|
|
- x86_64_init_pda();
|
|
+ xen_switch_pt();
|
|
|
|
x86_64_start_reservations(real_mode_data);
|
|
}
|
|
@@ -166,12 +122,7 @@ void __init x86_64_start_reservations(ch
|
|
|
|
reserve_trampoline_memory();
|
|
|
|
- reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS");
|
|
-
|
|
- reserve_early(round_up(__pa_symbol(&_end), PAGE_SIZE),
|
|
- __pa(xen_start_info->pt_base)
|
|
- + (xen_start_info->nr_pt_frames << PAGE_SHIFT),
|
|
- "Xen provided");
|
|
+ reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
|
|
|
|
/*
|
|
* At this point everything still needed from the boot loader
|
|
--- head-2010-05-25.orig/arch/x86/kernel/head_32-xen.S 2010-03-24 15:12:36.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/head_32-xen.S 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -6,12 +6,14 @@
|
|
#include <linux/init.h>
|
|
#include <linux/linkage.h>
|
|
#include <asm/segment.h>
|
|
-#include <asm/page.h>
|
|
+#include <asm/page_types.h>
|
|
+#include <asm/pgtable_types.h>
|
|
#include <asm/cache.h>
|
|
#include <asm/thread_info.h>
|
|
#include <asm/asm-offsets.h>
|
|
#include <asm/boot.h>
|
|
#include <asm/dwarf2.h>
|
|
+#include <asm/percpu.h>
|
|
#include <xen/interface/xen.h>
|
|
#include <xen/interface/elfnote.h>
|
|
|
|
@@ -38,9 +40,6 @@ ENTRY(startup_32)
|
|
/* Set up the stack pointer */
|
|
movl $(init_thread_union+THREAD_SIZE),%esp
|
|
|
|
- movl %ss,%eax
|
|
- movl %eax,%fs # gets reset once there's real percpu
|
|
-
|
|
/* get vendor info */
|
|
xorl %eax,%eax # call CPUID with 0 -> return vendor ID
|
|
XEN_CPUID
|
|
@@ -63,7 +62,49 @@ ENTRY(startup_32)
|
|
|
|
movb $1,X86_HARD_MATH
|
|
|
|
- xorl %eax,%eax # Clear GS
|
|
+#ifdef CONFIG_CC_STACKPROTECTOR
|
|
+ /*
|
|
+ * The linker can't handle this by relocation. Manually set
|
|
+ * base address in stack canary segment descriptor.
|
|
+ */
|
|
+ movl $per_cpu__gdt_page,%eax
|
|
+ movl $per_cpu__stack_canary,%ecx
|
|
+ subl $20, %ecx
|
|
+ movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax)
|
|
+ shrl $16, %ecx
|
|
+ movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax)
|
|
+ movb %ch, 8 * GDT_ENTRY_STACK_CANARY + 7(%eax)
|
|
+#endif
|
|
+
|
|
+ # %esi still points to start_info, and no registers
|
|
+ # need to be preserved.
|
|
+
|
|
+ movl XEN_START_mfn_list(%esi), %ebx
|
|
+ movl $(per_cpu__gdt_page - __PAGE_OFFSET), %eax
|
|
+ shrl $PAGE_SHIFT, %eax
|
|
+ movl (%ebx,%eax,4), %ecx
|
|
+ pushl %ecx # frame number for set_gdt below
|
|
+
|
|
+ xorl %esi, %esi
|
|
+ xorl %edx, %edx
|
|
+ shldl $PAGE_SHIFT, %ecx, %edx
|
|
+ shll $PAGE_SHIFT, %ecx
|
|
+ orl $_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_DIRTY, %ecx
|
|
+ movl $per_cpu__gdt_page, %ebx
|
|
+ movl $__HYPERVISOR_update_va_mapping, %eax
|
|
+ int $0x82
|
|
+
|
|
+ movl $(PAGE_SIZE_asm / 8), %ecx
|
|
+ movl %esp, %ebx
|
|
+ movl $__HYPERVISOR_set_gdt, %eax
|
|
+ int $0x82
|
|
+
|
|
+ popl %ecx
|
|
+
|
|
+ movl $(__KERNEL_PERCPU), %eax
|
|
+ movl %eax,%fs # set this cpu's percpu
|
|
+
|
|
+ movl $(__KERNEL_STACK_CANARY),%eax
|
|
movl %eax,%gs
|
|
|
|
cld # gcc2 wants the direction flag cleared at all times
|
|
--- head-2010-05-25.orig/arch/x86/kernel/head_64-xen.S 2010-03-24 15:12:46.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/head_64-xen.S 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -21,6 +21,7 @@
|
|
#include <asm/msr.h>
|
|
#include <asm/cache.h>
|
|
#include <asm/dwarf2.h>
|
|
+#include <asm/percpu.h>
|
|
#include <xen/interface/elfnote.h>
|
|
|
|
.section .text.head, "ax", @progbits
|
|
@@ -32,11 +33,23 @@ startup_64:
|
|
/* rsi is pointer to startup info structure.
|
|
pass it to C */
|
|
movq %rsi,%rdi
|
|
+
|
|
+ /* Set up %gs.
|
|
+ *
|
|
+ * The base of %gs always points to the bottom of the irqstack
|
|
+ * union. If the stack protector canary is enabled, it is
|
|
+ * located at %gs:40. Note that, on SMP, the boot cpu uses
|
|
+ * init data section till per cpu areas are set up.
|
|
+ */
|
|
+ movl $MSR_GS_BASE,%ecx
|
|
+ movq $INIT_PER_CPU_VAR(irq_stack_union),%rax
|
|
+ movq %rax,%rdx
|
|
+ shrq $32,%rdx
|
|
+ wrmsr
|
|
+
|
|
pushq $0 # fake return address
|
|
jmp x86_64_start_kernel
|
|
|
|
-.balign PAGE_SIZE
|
|
-
|
|
#define NEXT_PAGE(name) \
|
|
.balign PAGE_SIZE; \
|
|
phys_##name = . - .text.head; \
|
|
--- head-2010-05-25.orig/arch/x86/kernel/ioport-xen.c 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/ioport-xen.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -91,9 +91,8 @@ static int do_iopl(unsigned int level, s
|
|
}
|
|
|
|
#ifdef CONFIG_X86_32
|
|
-asmlinkage long sys_iopl(unsigned long regsp)
|
|
+long sys_iopl(struct pt_regs *regs)
|
|
{
|
|
- struct pt_regs *regs = (struct pt_regs *)®sp;
|
|
unsigned int level = regs->bx;
|
|
#else
|
|
asmlinkage long sys_iopl(unsigned int level, struct pt_regs *regs)
|
|
--- head-2010-05-25.orig/arch/x86/kernel/irq-xen.c 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/irq-xen.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -6,13 +6,20 @@
|
|
#include <linux/kernel_stat.h>
|
|
#include <linux/seq_file.h>
|
|
#include <linux/smp.h>
|
|
+#include <linux/ftrace.h>
|
|
|
|
#include <asm/apic.h>
|
|
#include <asm/io_apic.h>
|
|
#include <asm/irq.h>
|
|
+#include <asm/idle.h>
|
|
|
|
atomic_t irq_err_count;
|
|
|
|
+#ifndef CONFIG_XEN
|
|
+/* Function pointer for generic interrupt vector handling */
|
|
+void (*generic_interrupt_extension)(void) = NULL;
|
|
+#endif
|
|
+
|
|
/*
|
|
* 'what should we do if we get a hw irq event on an illegal vector'.
|
|
* each architecture has to answer this themselves.
|
|
@@ -36,11 +43,7 @@ void ack_bad_irq(unsigned int irq)
|
|
#endif
|
|
}
|
|
|
|
-#ifdef CONFIG_X86_32
|
|
-# define irq_stats(x) (&per_cpu(irq_stat, x))
|
|
-#else
|
|
-# define irq_stats(x) cpu_pda(x)
|
|
-#endif
|
|
+#define irq_stats(x) (&per_cpu(irq_stat, x))
|
|
/*
|
|
* /proc/interrupts printing:
|
|
*/
|
|
@@ -57,6 +60,19 @@ static int show_other_interrupts(struct
|
|
for_each_online_cpu(j)
|
|
seq_printf(p, "%10u ", irq_stats(j)->apic_timer_irqs);
|
|
seq_printf(p, " Local timer interrupts\n");
|
|
+
|
|
+ seq_printf(p, "%*s: ", prec, "SPU");
|
|
+ for_each_online_cpu(j)
|
|
+ seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
|
|
+ seq_printf(p, " Spurious interrupts\n");
|
|
+#endif
|
|
+#ifndef CONFIG_XEN
|
|
+ if (generic_interrupt_extension) {
|
|
+ seq_printf(p, "%*s: ", prec, "PLT");
|
|
+ for_each_online_cpu(j)
|
|
+ seq_printf(p, "%10u ", irq_stats(j)->generic_irqs);
|
|
+ seq_printf(p, " Platform interrupts\n");
|
|
+ }
|
|
#endif
|
|
#ifdef CONFIG_SMP
|
|
seq_printf(p, "%*s: ", prec, "RES");
|
|
@@ -86,12 +102,6 @@ static int show_other_interrupts(struct
|
|
seq_printf(p, " Threshold APIC interrupts\n");
|
|
# endif
|
|
#endif
|
|
-#ifdef CONFIG_X86_LOCAL_APIC
|
|
- seq_printf(p, "%*s: ", prec, "SPU");
|
|
- for_each_online_cpu(j)
|
|
- seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
|
|
- seq_printf(p, " Spurious interrupts\n");
|
|
-#endif
|
|
seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count));
|
|
#if defined(CONFIG_X86_IO_APIC)
|
|
seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count));
|
|
@@ -128,23 +138,15 @@ int show_interrupts(struct seq_file *p,
|
|
return 0;
|
|
|
|
spin_lock_irqsave(&desc->lock, flags);
|
|
-#ifndef CONFIG_SMP
|
|
- any_count = kstat_irqs(i);
|
|
-#else
|
|
for_each_online_cpu(j)
|
|
any_count |= kstat_irqs_cpu(i, j);
|
|
-#endif
|
|
action = desc->action;
|
|
if (!action && !any_count)
|
|
goto out;
|
|
|
|
seq_printf(p, "%*d: ", prec, i);
|
|
-#ifndef CONFIG_SMP
|
|
- seq_printf(p, "%10u ", kstat_irqs(i));
|
|
-#else
|
|
for_each_online_cpu(j)
|
|
seq_printf(p, "%10u ", kstat_irqs_cpu(i, j));
|
|
-#endif
|
|
seq_printf(p, " %8s", desc->chip->name);
|
|
seq_printf(p, "-%-8s", desc->name);
|
|
|
|
@@ -169,6 +171,11 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
|
|
|
|
#ifdef CONFIG_X86_LOCAL_APIC
|
|
sum += irq_stats(cpu)->apic_timer_irqs;
|
|
+ sum += irq_stats(cpu)->irq_spurious_count;
|
|
+#endif
|
|
+#ifndef CONFIG_XEN
|
|
+ if (generic_interrupt_extension)
|
|
+ sum += irq_stats(cpu)->generic_irqs;
|
|
#endif
|
|
#ifdef CONFIG_SMP
|
|
sum += irq_stats(cpu)->irq_resched_count;
|
|
@@ -183,9 +190,6 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
|
|
sum += irq_stats(cpu)->irq_threshold_count;
|
|
#endif
|
|
#endif
|
|
-#ifdef CONFIG_X86_LOCAL_APIC
|
|
- sum += irq_stats(cpu)->irq_spurious_count;
|
|
-#endif
|
|
return sum;
|
|
}
|
|
|
|
@@ -198,3 +202,64 @@ u64 arch_irq_stat(void)
|
|
#endif
|
|
return sum;
|
|
}
|
|
+
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+/*
|
|
+ * do_IRQ handles all normal device IRQ's (the special
|
|
+ * SMP cross-CPU interrupts have their own specific
|
|
+ * handlers).
|
|
+ */
|
|
+unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
|
|
+{
|
|
+ struct pt_regs *old_regs = set_irq_regs(regs);
|
|
+
|
|
+ /* high bit used in ret_from_ code */
|
|
+ unsigned vector = ~regs->orig_ax;
|
|
+ unsigned irq;
|
|
+
|
|
+ exit_idle();
|
|
+ irq_enter();
|
|
+
|
|
+ irq = __get_cpu_var(vector_irq)[vector];
|
|
+
|
|
+ if (!handle_irq(irq, regs)) {
|
|
+#ifdef CONFIG_X86_64
|
|
+ if (!disable_apic)
|
|
+ ack_APIC_irq();
|
|
+#endif
|
|
+
|
|
+ if (printk_ratelimit())
|
|
+ printk(KERN_EMERG "%s: %d.%d No irq handler for vector (irq %d)\n",
|
|
+ __func__, smp_processor_id(), vector, irq);
|
|
+ }
|
|
+
|
|
+ irq_exit();
|
|
+
|
|
+ set_irq_regs(old_regs);
|
|
+ return 1;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Handler for GENERIC_INTERRUPT_VECTOR.
|
|
+ */
|
|
+void smp_generic_interrupt(struct pt_regs *regs)
|
|
+{
|
|
+ struct pt_regs *old_regs = set_irq_regs(regs);
|
|
+
|
|
+ ack_APIC_irq();
|
|
+
|
|
+ exit_idle();
|
|
+
|
|
+ irq_enter();
|
|
+
|
|
+ inc_irq_stat(generic_irqs);
|
|
+
|
|
+ if (generic_interrupt_extension)
|
|
+ generic_interrupt_extension();
|
|
+
|
|
+ irq_exit();
|
|
+
|
|
+ set_irq_regs(old_regs);
|
|
+}
|
|
+#endif
|
|
--- head-2010-05-25.orig/arch/x86/kernel/machine_kexec_64.c 2010-04-15 10:03:05.000000000 +0200
|
|
+++ head-2010-05-25/arch/x86/kernel/machine_kexec_64.c 2010-04-15 10:07:08.000000000 +0200
|
|
@@ -92,13 +92,8 @@ void machine_kexec_setup_load_arg(xen_ke
|
|
xki->page_list[PA_CONTROL_PAGE] = __ma(control_page);
|
|
xki->page_list[PA_TABLE_PAGE] = __ma(table_page);
|
|
|
|
- xki->page_list[PA_PGD] = __ma(kexec_pgd);
|
|
- xki->page_list[PA_PUD_0] = __ma(kexec_pud0);
|
|
- xki->page_list[PA_PUD_1] = __ma(kexec_pud1);
|
|
- xki->page_list[PA_PMD_0] = __ma(kexec_pmd0);
|
|
- xki->page_list[PA_PMD_1] = __ma(kexec_pmd1);
|
|
- xki->page_list[PA_PTE_0] = __ma(kexec_pte0);
|
|
- xki->page_list[PA_PTE_1] = __ma(kexec_pte1);
|
|
+ if (image->type == KEXEC_TYPE_DEFAULT)
|
|
+ xki->page_list[PA_SWAP_PAGE] = page_to_phys(image->swap_page);
|
|
}
|
|
|
|
int __init machine_kexec_setup_resources(struct resource *hypervisor,
|
|
@@ -161,7 +156,7 @@ static int init_one_level2_page(struct k
|
|
}
|
|
pmd = pmd_offset(pud, addr);
|
|
if (!pmd_present(*pmd))
|
|
- set_pmd(pmd, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
|
|
+ x_set_pmd(pmd, x__pmd(addr | X__PAGE_KERNEL_LARGE_EXEC));
|
|
result = 0;
|
|
out:
|
|
return result;
|
|
--- head-2010-05-25.orig/arch/x86/kernel/microcode_core-xen.c 2010-03-24 15:14:47.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/microcode_core-xen.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -21,28 +21,28 @@
|
|
* as published by the Free Software Foundation; either version
|
|
* 2 of the License, or (at your option) any later version.
|
|
*/
|
|
+#include <linux/platform_device.h>
|
|
#include <linux/capability.h>
|
|
-#include <linux/kernel.h>
|
|
-#include <linux/init.h>
|
|
-#include <linux/sched.h>
|
|
+#include <linux/miscdevice.h>
|
|
+#include <linux/firmware.h>
|
|
#include <linux/smp_lock.h>
|
|
+#include <linux/spinlock.h>
|
|
#include <linux/cpumask.h>
|
|
-#include <linux/module.h>
|
|
-#include <linux/slab.h>
|
|
+#include <linux/uaccess.h>
|
|
#include <linux/vmalloc.h>
|
|
-#include <linux/miscdevice.h>
|
|
-#include <linux/spinlock.h>
|
|
-#include <linux/mm.h>
|
|
-#include <linux/fs.h>
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/module.h>
|
|
#include <linux/mutex.h>
|
|
+#include <linux/sched.h>
|
|
+#include <linux/init.h>
|
|
+#include <linux/slab.h>
|
|
#include <linux/cpu.h>
|
|
-#include <linux/firmware.h>
|
|
-#include <linux/platform_device.h>
|
|
+#include <linux/fs.h>
|
|
+#include <linux/mm.h>
|
|
|
|
-#include <asm/msr.h>
|
|
-#include <asm/uaccess.h>
|
|
-#include <asm/processor.h>
|
|
#include <asm/microcode.h>
|
|
+#include <asm/processor.h>
|
|
+#include <asm/msr.h>
|
|
|
|
MODULE_DESCRIPTION("Microcode Update Driver");
|
|
MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
|
|
@@ -51,7 +51,7 @@ MODULE_LICENSE("GPL");
|
|
static int verbose;
|
|
module_param(verbose, int, 0644);
|
|
|
|
-#define MICROCODE_VERSION "2.00-xen"
|
|
+#define MICROCODE_VERSION "2.00-xen"
|
|
|
|
/* no concurrent ->write()s are allowed on /dev/cpu/microcode */
|
|
static DEFINE_MUTEX(microcode_mutex);
|
|
@@ -143,12 +143,12 @@ static void microcode_dev_exit(void)
|
|
|
|
MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
|
|
#else
|
|
-#define microcode_dev_init() 0
|
|
-#define microcode_dev_exit() do { } while (0)
|
|
+#define microcode_dev_init() 0
|
|
+#define microcode_dev_exit() do { } while (0)
|
|
#endif
|
|
|
|
/* fake device for request_firmware */
|
|
-static struct platform_device *microcode_pdev;
|
|
+static struct platform_device *microcode_pdev;
|
|
|
|
static int request_microcode(const char *name)
|
|
{
|
|
--- head-2010-05-25.orig/arch/x86/kernel/mpparse-xen.c 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/mpparse-xen.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -3,7 +3,7 @@
|
|
* compliant MP-table parsing routines.
|
|
*
|
|
* (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk>
|
|
- * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com>
|
|
+ * (c) 1998, 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com>
|
|
* (c) 2008 Alexey Starikovskiy <astarikovskiy@suse.de>
|
|
*/
|
|
|
|
@@ -29,11 +29,7 @@
|
|
#include <asm/setup.h>
|
|
#include <asm/smp.h>
|
|
|
|
-#include <mach_apic.h>
|
|
-#ifdef CONFIG_X86_32
|
|
-#include <mach_apicdef.h>
|
|
-#include <mach_mpparse.h>
|
|
-#endif
|
|
+#include <asm/apic.h>
|
|
|
|
static void *_bus_to_virt(unsigned long ma)
|
|
{
|
|
@@ -123,9 +119,6 @@ static void __init MP_bus_info(struct mp
|
|
} else
|
|
printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str);
|
|
}
|
|
-#endif
|
|
-
|
|
-#ifdef CONFIG_X86_IO_APIC
|
|
|
|
static int bad_ioapic(unsigned long address)
|
|
{
|
|
@@ -153,11 +146,11 @@ static void __init MP_ioapic_info(struct
|
|
if (bad_ioapic(m->apicaddr))
|
|
return;
|
|
|
|
- mp_ioapics[nr_ioapics].mp_apicaddr = m->apicaddr;
|
|
- mp_ioapics[nr_ioapics].mp_apicid = m->apicid;
|
|
- mp_ioapics[nr_ioapics].mp_type = m->type;
|
|
- mp_ioapics[nr_ioapics].mp_apicver = m->apicver;
|
|
- mp_ioapics[nr_ioapics].mp_flags = m->flags;
|
|
+ mp_ioapics[nr_ioapics].apicaddr = m->apicaddr;
|
|
+ mp_ioapics[nr_ioapics].apicid = m->apicid;
|
|
+ mp_ioapics[nr_ioapics].type = m->type;
|
|
+ mp_ioapics[nr_ioapics].apicver = m->apicver;
|
|
+ mp_ioapics[nr_ioapics].flags = m->flags;
|
|
nr_ioapics++;
|
|
}
|
|
|
|
@@ -169,55 +162,55 @@ static void print_MP_intsrc_info(struct
|
|
m->srcbusirq, m->dstapic, m->dstirq);
|
|
}
|
|
|
|
-static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq)
|
|
+static void __init print_mp_irq_info(struct mpc_intsrc *mp_irq)
|
|
{
|
|
apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
|
|
" IRQ %02x, APIC ID %x, APIC INT %02x\n",
|
|
- mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3,
|
|
- (mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus,
|
|
- mp_irq->mp_srcbusirq, mp_irq->mp_dstapic, mp_irq->mp_dstirq);
|
|
+ mp_irq->irqtype, mp_irq->irqflag & 3,
|
|
+ (mp_irq->irqflag >> 2) & 3, mp_irq->srcbus,
|
|
+ mp_irq->srcbusirq, mp_irq->dstapic, mp_irq->dstirq);
|
|
}
|
|
|
|
static void __init assign_to_mp_irq(struct mpc_intsrc *m,
|
|
- struct mp_config_intsrc *mp_irq)
|
|
+ struct mpc_intsrc *mp_irq)
|
|
{
|
|
- mp_irq->mp_dstapic = m->dstapic;
|
|
- mp_irq->mp_type = m->type;
|
|
- mp_irq->mp_irqtype = m->irqtype;
|
|
- mp_irq->mp_irqflag = m->irqflag;
|
|
- mp_irq->mp_srcbus = m->srcbus;
|
|
- mp_irq->mp_srcbusirq = m->srcbusirq;
|
|
- mp_irq->mp_dstirq = m->dstirq;
|
|
+ mp_irq->dstapic = m->dstapic;
|
|
+ mp_irq->type = m->type;
|
|
+ mp_irq->irqtype = m->irqtype;
|
|
+ mp_irq->irqflag = m->irqflag;
|
|
+ mp_irq->srcbus = m->srcbus;
|
|
+ mp_irq->srcbusirq = m->srcbusirq;
|
|
+ mp_irq->dstirq = m->dstirq;
|
|
}
|
|
|
|
-static void __init assign_to_mpc_intsrc(struct mp_config_intsrc *mp_irq,
|
|
+static void __init assign_to_mpc_intsrc(struct mpc_intsrc *mp_irq,
|
|
struct mpc_intsrc *m)
|
|
{
|
|
- m->dstapic = mp_irq->mp_dstapic;
|
|
- m->type = mp_irq->mp_type;
|
|
- m->irqtype = mp_irq->mp_irqtype;
|
|
- m->irqflag = mp_irq->mp_irqflag;
|
|
- m->srcbus = mp_irq->mp_srcbus;
|
|
- m->srcbusirq = mp_irq->mp_srcbusirq;
|
|
- m->dstirq = mp_irq->mp_dstirq;
|
|
+ m->dstapic = mp_irq->dstapic;
|
|
+ m->type = mp_irq->type;
|
|
+ m->irqtype = mp_irq->irqtype;
|
|
+ m->irqflag = mp_irq->irqflag;
|
|
+ m->srcbus = mp_irq->srcbus;
|
|
+ m->srcbusirq = mp_irq->srcbusirq;
|
|
+ m->dstirq = mp_irq->dstirq;
|
|
}
|
|
|
|
-static int __init mp_irq_mpc_intsrc_cmp(struct mp_config_intsrc *mp_irq,
|
|
+static int __init mp_irq_mpc_intsrc_cmp(struct mpc_intsrc *mp_irq,
|
|
struct mpc_intsrc *m)
|
|
{
|
|
- if (mp_irq->mp_dstapic != m->dstapic)
|
|
+ if (mp_irq->dstapic != m->dstapic)
|
|
return 1;
|
|
- if (mp_irq->mp_type != m->type)
|
|
+ if (mp_irq->type != m->type)
|
|
return 2;
|
|
- if (mp_irq->mp_irqtype != m->irqtype)
|
|
+ if (mp_irq->irqtype != m->irqtype)
|
|
return 3;
|
|
- if (mp_irq->mp_irqflag != m->irqflag)
|
|
+ if (mp_irq->irqflag != m->irqflag)
|
|
return 4;
|
|
- if (mp_irq->mp_srcbus != m->srcbus)
|
|
+ if (mp_irq->srcbus != m->srcbus)
|
|
return 5;
|
|
- if (mp_irq->mp_srcbusirq != m->srcbusirq)
|
|
+ if (mp_irq->srcbusirq != m->srcbusirq)
|
|
return 6;
|
|
- if (mp_irq->mp_dstirq != m->dstirq)
|
|
+ if (mp_irq->dstirq != m->dstirq)
|
|
return 7;
|
|
|
|
return 0;
|
|
@@ -238,8 +231,12 @@ static void __init MP_intsrc_info(struct
|
|
if (++mp_irq_entries == MAX_IRQ_SOURCES)
|
|
panic("Max # of irq sources exceeded!!\n");
|
|
}
|
|
+#else /* CONFIG_X86_IO_APIC */
|
|
+static inline void __init MP_bus_info(struct mpc_bus *m) {}
|
|
+static inline void __init MP_ioapic_info(struct mpc_ioapic *m) {}
|
|
+static inline void __init MP_intsrc_info(struct mpc_intsrc *m) {}
|
|
+#endif /* CONFIG_X86_IO_APIC */
|
|
|
|
-#endif
|
|
|
|
static void __init MP_lintsrc_info(struct mpc_lintsrc *m)
|
|
{
|
|
@@ -289,6 +286,20 @@ static int __init smp_check_mpc(struct m
|
|
return 1;
|
|
}
|
|
|
|
+static void skip_entry(unsigned char **ptr, int *count, int size)
|
|
+{
|
|
+ *ptr += size;
|
|
+ *count += size;
|
|
+}
|
|
+
|
|
+static void __init smp_dump_mptable(struct mpc_table *mpc, unsigned char *mpt)
|
|
+{
|
|
+ printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n"
|
|
+ "type %x\n", *mpt);
|
|
+ print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16,
|
|
+ 1, mpc, mpc->length, 1);
|
|
+}
|
|
+
|
|
static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
|
|
{
|
|
char str[16];
|
|
@@ -300,17 +311,8 @@ static int __init smp_read_mpc(struct mp
|
|
if (!smp_check_mpc(mpc, oem, str))
|
|
return 0;
|
|
|
|
-#ifdef CONFIG_X86_32
|
|
- /*
|
|
- * need to make sure summit and es7000's mps_oem_check is safe to be
|
|
- * called early via genericarch 's mps_oem_check
|
|
- */
|
|
- if (early) {
|
|
-#ifdef CONFIG_X86_NUMAQ
|
|
- numaq_mps_oem_check(mpc, oem, str);
|
|
-#endif
|
|
- } else
|
|
- mps_oem_check(mpc, oem, str);
|
|
+#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
|
|
+ generic_mps_oem_check(mpc, oem, str);
|
|
#endif
|
|
/* save the local APIC address, it might be non-default */
|
|
if (!acpi_lapic)
|
|
@@ -333,61 +335,30 @@ static int __init smp_read_mpc(struct mp
|
|
while (count < mpc->length) {
|
|
switch (*mpt) {
|
|
case MP_PROCESSOR:
|
|
- {
|
|
- struct mpc_cpu *m = (struct mpc_cpu *)mpt;
|
|
- /* ACPI may have already provided this data */
|
|
- if (!acpi_lapic)
|
|
- MP_processor_info(m);
|
|
- mpt += sizeof(*m);
|
|
- count += sizeof(*m);
|
|
- break;
|
|
- }
|
|
+ /* ACPI may have already provided this data */
|
|
+ if (!acpi_lapic)
|
|
+ MP_processor_info((struct mpc_cpu *)mpt);
|
|
+ skip_entry(&mpt, &count, sizeof(struct mpc_cpu));
|
|
+ break;
|
|
case MP_BUS:
|
|
- {
|
|
- struct mpc_bus *m = (struct mpc_bus *)mpt;
|
|
-#ifdef CONFIG_X86_IO_APIC
|
|
- MP_bus_info(m);
|
|
-#endif
|
|
- mpt += sizeof(*m);
|
|
- count += sizeof(*m);
|
|
- break;
|
|
- }
|
|
+ MP_bus_info((struct mpc_bus *)mpt);
|
|
+ skip_entry(&mpt, &count, sizeof(struct mpc_bus));
|
|
+ break;
|
|
case MP_IOAPIC:
|
|
- {
|
|
-#ifdef CONFIG_X86_IO_APIC
|
|
- struct mpc_ioapic *m = (struct mpc_ioapic *)mpt;
|
|
- MP_ioapic_info(m);
|
|
-#endif
|
|
- mpt += sizeof(struct mpc_ioapic);
|
|
- count += sizeof(struct mpc_ioapic);
|
|
- break;
|
|
- }
|
|
+ MP_ioapic_info((struct mpc_ioapic *)mpt);
|
|
+ skip_entry(&mpt, &count, sizeof(struct mpc_ioapic));
|
|
+ break;
|
|
case MP_INTSRC:
|
|
- {
|
|
-#ifdef CONFIG_X86_IO_APIC
|
|
- struct mpc_intsrc *m = (struct mpc_intsrc *)mpt;
|
|
-
|
|
- MP_intsrc_info(m);
|
|
-#endif
|
|
- mpt += sizeof(struct mpc_intsrc);
|
|
- count += sizeof(struct mpc_intsrc);
|
|
- break;
|
|
- }
|
|
+ MP_intsrc_info((struct mpc_intsrc *)mpt);
|
|
+ skip_entry(&mpt, &count, sizeof(struct mpc_intsrc));
|
|
+ break;
|
|
case MP_LINTSRC:
|
|
- {
|
|
- struct mpc_lintsrc *m =
|
|
- (struct mpc_lintsrc *)mpt;
|
|
- MP_lintsrc_info(m);
|
|
- mpt += sizeof(*m);
|
|
- count += sizeof(*m);
|
|
- break;
|
|
- }
|
|
+ MP_lintsrc_info((struct mpc_lintsrc *)mpt);
|
|
+ skip_entry(&mpt, &count, sizeof(struct mpc_lintsrc));
|
|
+ break;
|
|
default:
|
|
/* wrong mptable */
|
|
- printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n");
|
|
- printk(KERN_ERR "type %x\n", *mpt);
|
|
- print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16,
|
|
- 1, mpc, mpc->length, 1);
|
|
+ smp_dump_mptable(mpc, mpt);
|
|
count = mpc->length;
|
|
break;
|
|
}
|
|
@@ -395,13 +366,13 @@ static int __init smp_read_mpc(struct mp
|
|
(*x86_quirks->mpc_record)++;
|
|
}
|
|
|
|
-#ifdef CONFIG_X86_GENERICARCH
|
|
- generic_bigsmp_probe();
|
|
+#ifdef CONFIG_X86_BIGSMP
|
|
+ generic_bigsmp_probe();
|
|
#endif
|
|
|
|
-#ifdef CONFIG_X86_32
|
|
- setup_apic_routing();
|
|
-#endif
|
|
+ if (apic->setup_apic_routing)
|
|
+ apic->setup_apic_routing();
|
|
+
|
|
if (!num_processors)
|
|
printk(KERN_ERR "MPTABLE: no processors registered!\n");
|
|
return num_processors;
|
|
@@ -426,7 +397,7 @@ static void __init construct_default_ioi
|
|
intsrc.type = MP_INTSRC;
|
|
intsrc.irqflag = 0; /* conforming */
|
|
intsrc.srcbus = 0;
|
|
- intsrc.dstapic = mp_ioapics[0].mp_apicid;
|
|
+ intsrc.dstapic = mp_ioapics[0].apicid;
|
|
|
|
intsrc.irqtype = mp_INT;
|
|
|
|
@@ -579,14 +550,76 @@ static inline void __init construct_defa
|
|
}
|
|
}
|
|
|
|
-static struct intel_mp_floating *mpf_found;
|
|
+static struct mpf_intel *mpf_found;
|
|
+
|
|
+static unsigned long __init get_mpc_size(unsigned long physptr)
|
|
+{
|
|
+ struct mpc_table *mpc;
|
|
+ unsigned long size;
|
|
+
|
|
+ mpc = early_ioremap(physptr, PAGE_SIZE);
|
|
+ size = mpc->length;
|
|
+ early_iounmap(mpc, PAGE_SIZE);
|
|
+ apic_printk(APIC_VERBOSE, " mpc: %lx-%lx\n", physptr, physptr + size);
|
|
+
|
|
+ return size;
|
|
+}
|
|
+
|
|
+static int __init check_physptr(struct mpf_intel *mpf, unsigned int early)
|
|
+{
|
|
+ struct mpc_table *mpc;
|
|
+ unsigned long size;
|
|
+
|
|
+ size = get_mpc_size(mpf->physptr);
|
|
+ mpc = early_ioremap(mpf->physptr, size);
|
|
+ /*
|
|
+ * Read the physical hardware table. Anything here will
|
|
+ * override the defaults.
|
|
+ */
|
|
+ if (!smp_read_mpc(mpc, early)) {
|
|
+#ifdef CONFIG_X86_LOCAL_APIC
|
|
+ smp_found_config = 0;
|
|
+#endif
|
|
+ printk(KERN_ERR "BIOS bug, MP table errors detected!...\n"
|
|
+ "... disabling SMP support. (tell your hw vendor)\n");
|
|
+ early_iounmap(mpc, size);
|
|
+ return -1;
|
|
+ }
|
|
+ early_iounmap(mpc, size);
|
|
+
|
|
+ if (early)
|
|
+ return -1;
|
|
+
|
|
+#ifdef CONFIG_X86_IO_APIC
|
|
+ /*
|
|
+ * If there are no explicit MP IRQ entries, then we are
|
|
+ * broken. We set up most of the low 16 IO-APIC pins to
|
|
+ * ISA defaults and hope it will work.
|
|
+ */
|
|
+ if (!mp_irq_entries) {
|
|
+ struct mpc_bus bus;
|
|
+
|
|
+ printk(KERN_ERR "BIOS bug, no explicit IRQ entries, "
|
|
+ "using default mptable. (tell your hw vendor)\n");
|
|
+
|
|
+ bus.type = MP_BUS;
|
|
+ bus.busid = 0;
|
|
+ memcpy(bus.bustype, "ISA ", 6);
|
|
+ MP_bus_info(&bus);
|
|
+
|
|
+ construct_default_ioirq_mptable(0);
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ return 0;
|
|
+}
|
|
|
|
/*
|
|
* Scan the memory blocks for an SMP configuration block.
|
|
*/
|
|
static void __init __get_smp_config(unsigned int early)
|
|
{
|
|
- struct intel_mp_floating *mpf = mpf_found;
|
|
+ struct mpf_intel *mpf = mpf_found;
|
|
|
|
if (!mpf)
|
|
return;
|
|
@@ -607,9 +640,9 @@ static void __init __get_smp_config(unsi
|
|
}
|
|
|
|
printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n",
|
|
- mpf->mpf_specification);
|
|
+ mpf->specification);
|
|
#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
|
|
- if (mpf->mpf_feature2 & (1 << 7)) {
|
|
+ if (mpf->feature2 & (1 << 7)) {
|
|
printk(KERN_INFO " IMCR and PIC compatibility mode.\n");
|
|
pic_mode = 1;
|
|
} else {
|
|
@@ -620,7 +653,7 @@ static void __init __get_smp_config(unsi
|
|
/*
|
|
* Now see if we need to read further.
|
|
*/
|
|
- if (mpf->mpf_feature1 != 0) {
|
|
+ if (mpf->feature1 != 0) {
|
|
if (early) {
|
|
/*
|
|
* local APIC has default address
|
|
@@ -630,49 +663,12 @@ static void __init __get_smp_config(unsi
|
|
}
|
|
|
|
printk(KERN_INFO "Default MP configuration #%d\n",
|
|
- mpf->mpf_feature1);
|
|
- construct_default_ISA_mptable(mpf->mpf_feature1);
|
|
-
|
|
- } else if (mpf->mpf_physptr) {
|
|
-
|
|
- /*
|
|
- * Read the physical hardware table. Anything here will
|
|
- * override the defaults.
|
|
- */
|
|
- if (!smp_read_mpc(_bus_to_virt(mpf->mpf_physptr), early)) {
|
|
-#ifdef CONFIG_X86_LOCAL_APIC
|
|
- smp_found_config = 0;
|
|
-#endif
|
|
- printk(KERN_ERR
|
|
- "BIOS bug, MP table errors detected!...\n");
|
|
- printk(KERN_ERR "... disabling SMP support. "
|
|
- "(tell your hw vendor)\n");
|
|
- return;
|
|
- }
|
|
+ mpf->feature1);
|
|
+ construct_default_ISA_mptable(mpf->feature1);
|
|
|
|
- if (early)
|
|
+ } else if (mpf->physptr) {
|
|
+ if (check_physptr(mpf, early))
|
|
return;
|
|
-#ifdef CONFIG_X86_IO_APIC
|
|
- /*
|
|
- * If there are no explicit MP IRQ entries, then we are
|
|
- * broken. We set up most of the low 16 IO-APIC pins to
|
|
- * ISA defaults and hope it will work.
|
|
- */
|
|
- if (!mp_irq_entries) {
|
|
- struct mpc_bus bus;
|
|
-
|
|
- printk(KERN_ERR "BIOS bug, no explicit IRQ entries, "
|
|
- "using default mptable. "
|
|
- "(tell your hw vendor)\n");
|
|
-
|
|
- bus.type = MP_BUS;
|
|
- bus.busid = 0;
|
|
- memcpy(bus.bustype, "ISA ", 6);
|
|
- MP_bus_info(&bus);
|
|
-
|
|
- construct_default_ioirq_mptable(0);
|
|
- }
|
|
-#endif
|
|
} else
|
|
BUG();
|
|
|
|
@@ -693,58 +689,68 @@ void __init get_smp_config(void)
|
|
__get_smp_config(0);
|
|
}
|
|
|
|
+#ifndef CONFIG_XEN
|
|
+static void __init smp_reserve_bootmem(struct mpf_intel *mpf)
|
|
+{
|
|
+ unsigned long size = get_mpc_size(mpf->physptr);
|
|
+#ifdef CONFIG_X86_32
|
|
+ /*
|
|
+ * We cannot access to MPC table to compute table size yet,
|
|
+ * as only few megabytes from the bottom is mapped now.
|
|
+ * PC-9800's MPC table places on the very last of physical
|
|
+ * memory; so that simply reserving PAGE_SIZE from mpf->physptr
|
|
+ * yields BUG() in reserve_bootmem.
|
|
+ * also need to make sure physptr is below than max_low_pfn
|
|
+ * we don't need reserve the area above max_low_pfn
|
|
+ */
|
|
+ unsigned long end = max_low_pfn * PAGE_SIZE;
|
|
+
|
|
+ if (mpf->physptr < end) {
|
|
+ if (mpf->physptr + size > end)
|
|
+ size = end - mpf->physptr;
|
|
+ reserve_bootmem_generic(mpf->physptr, size, BOOTMEM_DEFAULT);
|
|
+ }
|
|
+#else
|
|
+ reserve_bootmem_generic(mpf->physptr, size, BOOTMEM_DEFAULT);
|
|
+#endif
|
|
+}
|
|
+#endif
|
|
+
|
|
static int __init smp_scan_config(unsigned long base, unsigned long length,
|
|
unsigned reserve)
|
|
{
|
|
unsigned int *bp = _bus_to_virt(base);
|
|
- struct intel_mp_floating *mpf;
|
|
+ struct mpf_intel *mpf;
|
|
|
|
apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n",
|
|
bp, length);
|
|
BUILD_BUG_ON(sizeof(*mpf) != 16);
|
|
|
|
while (length > 0) {
|
|
- mpf = (struct intel_mp_floating *)bp;
|
|
+ mpf = (struct mpf_intel *)bp;
|
|
if ((*bp == SMP_MAGIC_IDENT) &&
|
|
- (mpf->mpf_length == 1) &&
|
|
+ (mpf->length == 1) &&
|
|
!mpf_checksum((unsigned char *)bp, 16) &&
|
|
- ((mpf->mpf_specification == 1)
|
|
- || (mpf->mpf_specification == 4))) {
|
|
+ ((mpf->specification == 1)
|
|
+ || (mpf->specification == 4))) {
|
|
#ifdef CONFIG_X86_LOCAL_APIC
|
|
smp_found_config = 1;
|
|
#endif
|
|
mpf_found = mpf;
|
|
|
|
#ifndef CONFIG_XEN
|
|
- printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n",
|
|
- mpf, virt_to_phys(mpf));
|
|
+ printk(KERN_INFO "found SMP MP-table at [%p] %llx\n",
|
|
+ mpf, (u64)virt_to_phys(mpf));
|
|
|
|
if (!reserve)
|
|
return 1;
|
|
- reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE,
|
|
- BOOTMEM_DEFAULT);
|
|
- if (mpf->mpf_physptr) {
|
|
- unsigned long size = PAGE_SIZE;
|
|
-#ifdef CONFIG_X86_32
|
|
- /*
|
|
- * We cannot access to MPC table to compute
|
|
- * table size yet, as only few megabytes from
|
|
- * the bottom is mapped now.
|
|
- * PC-9800's MPC table places on the very last
|
|
- * of physical memory; so that simply reserving
|
|
- * PAGE_SIZE from mpg->mpf_physptr yields BUG()
|
|
- * in reserve_bootmem.
|
|
- */
|
|
- unsigned long end = max_low_pfn * PAGE_SIZE;
|
|
- if (mpf->mpf_physptr + size > end)
|
|
- size = end - mpf->mpf_physptr;
|
|
-#endif
|
|
- reserve_bootmem_generic(mpf->mpf_physptr, size,
|
|
+ reserve_bootmem_generic(virt_to_phys(mpf), sizeof(*mpf),
|
|
BOOTMEM_DEFAULT);
|
|
- }
|
|
+ if (mpf->physptr)
|
|
+ smp_reserve_bootmem(mpf);
|
|
#else
|
|
printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n",
|
|
- mpf, ((void *)bp - _bus_to_virt(base)) + base);
|
|
+ mpf, ((void *)bp - _bus_to_virt(base)) + base);
|
|
#endif
|
|
return 1;
|
|
}
|
|
@@ -826,15 +832,15 @@ static int __init get_MP_intsrc_index(s
|
|
/* not legacy */
|
|
|
|
for (i = 0; i < mp_irq_entries; i++) {
|
|
- if (mp_irqs[i].mp_irqtype != mp_INT)
|
|
+ if (mp_irqs[i].irqtype != mp_INT)
|
|
continue;
|
|
|
|
- if (mp_irqs[i].mp_irqflag != 0x0f)
|
|
+ if (mp_irqs[i].irqflag != 0x0f)
|
|
continue;
|
|
|
|
- if (mp_irqs[i].mp_srcbus != m->srcbus)
|
|
+ if (mp_irqs[i].srcbus != m->srcbus)
|
|
continue;
|
|
- if (mp_irqs[i].mp_srcbusirq != m->srcbusirq)
|
|
+ if (mp_irqs[i].srcbusirq != m->srcbusirq)
|
|
continue;
|
|
if (irq_used[i]) {
|
|
/* already claimed */
|
|
@@ -851,7 +857,58 @@ static int __init get_MP_intsrc_index(s
|
|
#define SPARE_SLOT_NUM 20
|
|
|
|
static struct mpc_intsrc __initdata *m_spare[SPARE_SLOT_NUM];
|
|
-#endif
|
|
+
|
|
+static void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare)
|
|
+{
|
|
+ int i;
|
|
+
|
|
+ apic_printk(APIC_VERBOSE, "OLD ");
|
|
+ print_MP_intsrc_info(m);
|
|
+
|
|
+ i = get_MP_intsrc_index(m);
|
|
+ if (i > 0) {
|
|
+ assign_to_mpc_intsrc(&mp_irqs[i], m);
|
|
+ apic_printk(APIC_VERBOSE, "NEW ");
|
|
+ print_mp_irq_info(&mp_irqs[i]);
|
|
+ return;
|
|
+ }
|
|
+ if (!i) {
|
|
+ /* legacy, do nothing */
|
|
+ return;
|
|
+ }
|
|
+ if (*nr_m_spare < SPARE_SLOT_NUM) {
|
|
+ /*
|
|
+ * not found (-1), or duplicated (-2) are invalid entries,
|
|
+ * we need to use the slot later
|
|
+ */
|
|
+ m_spare[*nr_m_spare] = m;
|
|
+ *nr_m_spare += 1;
|
|
+ }
|
|
+}
|
|
+#else /* CONFIG_X86_IO_APIC */
|
|
+static
|
|
+inline void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) {}
|
|
+#endif /* CONFIG_X86_IO_APIC */
|
|
+
|
|
+static int check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length,
|
|
+ int count)
|
|
+{
|
|
+ if (!mpc_new_phys) {
|
|
+ pr_info("No spare slots, try to append...take your risk, "
|
|
+ "new mpc_length %x\n", count);
|
|
+ } else {
|
|
+ if (count <= mpc_new_length)
|
|
+ pr_info("No spare slots, try to append..., "
|
|
+ "new mpc_length %x\n", count);
|
|
+ else {
|
|
+ pr_err("mpc_new_length %lx is too small\n",
|
|
+ mpc_new_length);
|
|
+ return -1;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
|
|
static int __init replace_intsrc_all(struct mpc_table *mpc,
|
|
unsigned long mpc_new_phys,
|
|
@@ -859,77 +916,33 @@ static int __init replace_intsrc_all(st
|
|
{
|
|
#ifdef CONFIG_X86_IO_APIC
|
|
int i;
|
|
- int nr_m_spare = 0;
|
|
#endif
|
|
-
|
|
int count = sizeof(*mpc);
|
|
+ int nr_m_spare = 0;
|
|
unsigned char *mpt = ((unsigned char *)mpc) + count;
|
|
|
|
printk(KERN_INFO "mpc_length %x\n", mpc->length);
|
|
while (count < mpc->length) {
|
|
switch (*mpt) {
|
|
case MP_PROCESSOR:
|
|
- {
|
|
- struct mpc_cpu *m = (struct mpc_cpu *)mpt;
|
|
- mpt += sizeof(*m);
|
|
- count += sizeof(*m);
|
|
- break;
|
|
- }
|
|
+ skip_entry(&mpt, &count, sizeof(struct mpc_cpu));
|
|
+ break;
|
|
case MP_BUS:
|
|
- {
|
|
- struct mpc_bus *m = (struct mpc_bus *)mpt;
|
|
- mpt += sizeof(*m);
|
|
- count += sizeof(*m);
|
|
- break;
|
|
- }
|
|
+ skip_entry(&mpt, &count, sizeof(struct mpc_bus));
|
|
+ break;
|
|
case MP_IOAPIC:
|
|
- {
|
|
- mpt += sizeof(struct mpc_ioapic);
|
|
- count += sizeof(struct mpc_ioapic);
|
|
- break;
|
|
- }
|
|
+ skip_entry(&mpt, &count, sizeof(struct mpc_ioapic));
|
|
+ break;
|
|
case MP_INTSRC:
|
|
- {
|
|
-#ifdef CONFIG_X86_IO_APIC
|
|
- struct mpc_intsrc *m = (struct mpc_intsrc *)mpt;
|
|
-
|
|
- apic_printk(APIC_VERBOSE, "OLD ");
|
|
- print_MP_intsrc_info(m);
|
|
- i = get_MP_intsrc_index(m);
|
|
- if (i > 0) {
|
|
- assign_to_mpc_intsrc(&mp_irqs[i], m);
|
|
- apic_printk(APIC_VERBOSE, "NEW ");
|
|
- print_mp_irq_info(&mp_irqs[i]);
|
|
- } else if (!i) {
|
|
- /* legacy, do nothing */
|
|
- } else if (nr_m_spare < SPARE_SLOT_NUM) {
|
|
- /*
|
|
- * not found (-1), or duplicated (-2)
|
|
- * are invalid entries,
|
|
- * we need to use the slot later
|
|
- */
|
|
- m_spare[nr_m_spare] = m;
|
|
- nr_m_spare++;
|
|
- }
|
|
-#endif
|
|
- mpt += sizeof(struct mpc_intsrc);
|
|
- count += sizeof(struct mpc_intsrc);
|
|
- break;
|
|
- }
|
|
+ check_irq_src((struct mpc_intsrc *)mpt, &nr_m_spare);
|
|
+ skip_entry(&mpt, &count, sizeof(struct mpc_intsrc));
|
|
+ break;
|
|
case MP_LINTSRC:
|
|
- {
|
|
- struct mpc_lintsrc *m =
|
|
- (struct mpc_lintsrc *)mpt;
|
|
- mpt += sizeof(*m);
|
|
- count += sizeof(*m);
|
|
- break;
|
|
- }
|
|
+ skip_entry(&mpt, &count, sizeof(struct mpc_lintsrc));
|
|
+ break;
|
|
default:
|
|
/* wrong mptable */
|
|
- printk(KERN_ERR "Your mptable is wrong, contact your HW vendor!\n");
|
|
- printk(KERN_ERR "type %x\n", *mpt);
|
|
- print_hex_dump(KERN_ERR, " ", DUMP_PREFIX_ADDRESS, 16,
|
|
- 1, mpc, mpc->length, 1);
|
|
+ smp_dump_mptable(mpc, mpt);
|
|
goto out;
|
|
}
|
|
}
|
|
@@ -939,10 +952,10 @@ static int __init replace_intsrc_all(st
|
|
if (irq_used[i])
|
|
continue;
|
|
|
|
- if (mp_irqs[i].mp_irqtype != mp_INT)
|
|
+ if (mp_irqs[i].irqtype != mp_INT)
|
|
continue;
|
|
|
|
- if (mp_irqs[i].mp_irqflag != 0x0f)
|
|
+ if (mp_irqs[i].irqflag != 0x0f)
|
|
continue;
|
|
|
|
if (nr_m_spare > 0) {
|
|
@@ -953,16 +966,8 @@ static int __init replace_intsrc_all(st
|
|
} else {
|
|
struct mpc_intsrc *m = (struct mpc_intsrc *)mpt;
|
|
count += sizeof(struct mpc_intsrc);
|
|
- if (!mpc_new_phys) {
|
|
- printk(KERN_INFO "No spare slots, try to append...take your risk, new mpc_length %x\n", count);
|
|
- } else {
|
|
- if (count <= mpc_new_length)
|
|
- printk(KERN_INFO "No spare slots, try to append..., new mpc_length %x\n", count);
|
|
- else {
|
|
- printk(KERN_ERR "mpc_new_length %lx is too small\n", mpc_new_length);
|
|
- goto out;
|
|
- }
|
|
- }
|
|
+ if (!check_slot(mpc_new_phys, mpc_new_length, count))
|
|
+ goto out;
|
|
assign_to_mpc_intsrc(&mp_irqs[i], m);
|
|
mpc->length = count;
|
|
mpt += sizeof(struct mpc_intsrc);
|
|
@@ -1018,7 +1023,7 @@ static int __init update_mp_table(void)
|
|
{
|
|
char str[16];
|
|
char oem[10];
|
|
- struct intel_mp_floating *mpf;
|
|
+ struct mpf_intel *mpf;
|
|
struct mpc_table *mpc, *mpc_new;
|
|
|
|
if (!enable_update_mptable)
|
|
@@ -1031,19 +1036,19 @@ static int __init update_mp_table(void)
|
|
/*
|
|
* Now see if we need to go further.
|
|
*/
|
|
- if (mpf->mpf_feature1 != 0)
|
|
+ if (mpf->feature1 != 0)
|
|
return 0;
|
|
|
|
- if (!mpf->mpf_physptr)
|
|
+ if (!mpf->physptr)
|
|
return 0;
|
|
|
|
- mpc = _bus_to_virt(mpf->mpf_physptr);
|
|
+ mpc = _bus_to_virt(mpf->physptr);
|
|
|
|
if (!smp_check_mpc(mpc, oem, str))
|
|
return 0;
|
|
|
|
- printk(KERN_INFO "mpf: %lx\n", (long)arbitrary_virt_to_machine(mpf));
|
|
- printk(KERN_INFO "mpf_physptr: %x\n", mpf->mpf_physptr);
|
|
+ printk(KERN_INFO "mpf: %llx\n", (u64)arbitrary_virt_to_machine(mpf));
|
|
+ printk(KERN_INFO "physptr: %x\n", mpf->physptr);
|
|
|
|
if (mpc_new_phys && mpc->length > mpc_new_length) {
|
|
mpc_new_phys = 0;
|
|
@@ -1067,23 +1072,23 @@ static int __init update_mp_table(void)
|
|
maddr_t mpc_new_bus;
|
|
|
|
mpc_new_bus = phys_to_machine(mpc_new_phys);
|
|
- mpf->mpf_physptr = mpc_new_bus;
|
|
+ mpf->physptr = mpc_new_bus;
|
|
mpc_new = phys_to_virt(mpc_new_phys);
|
|
memcpy(mpc_new, mpc, mpc->length);
|
|
mpc = mpc_new;
|
|
/* check if we can modify that */
|
|
- if (mpc_new_bus - mpf->mpf_physptr) {
|
|
- struct intel_mp_floating *mpf_new;
|
|
+ if (mpc_new_bus - mpf->physptr) {
|
|
+ struct mpf_intel *mpf_new;
|
|
/* steal 16 bytes from [0, 1k) */
|
|
printk(KERN_INFO "mpf new: %x\n", 0x400 - 16);
|
|
mpf_new = isa_bus_to_virt(0x400 - 16);
|
|
memcpy(mpf_new, mpf, 16);
|
|
mpf = mpf_new;
|
|
- mpf->mpf_physptr = mpc_new_bus;
|
|
+ mpf->physptr = mpc_new_bus;
|
|
}
|
|
- mpf->mpf_checksum = 0;
|
|
- mpf->mpf_checksum -= mpf_checksum((unsigned char *)mpf, 16);
|
|
- printk(KERN_INFO "mpf_physptr new: %x\n", mpf->mpf_physptr);
|
|
+ mpf->checksum = 0;
|
|
+ mpf->checksum -= mpf_checksum((unsigned char *)mpf, 16);
|
|
+ printk(KERN_INFO "physptr new: %x\n", mpf->physptr);
|
|
}
|
|
|
|
/*
|
|
--- head-2010-05-25.orig/arch/x86/kernel/pci-dma-xen.c 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/pci-dma-xen.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -1,4 +1,5 @@
|
|
#include <linux/dma-mapping.h>
|
|
+#include <linux/dma-debug.h>
|
|
#include <linux/dmar.h>
|
|
#include <linux/bootmem.h>
|
|
#include <linux/pci.h>
|
|
@@ -12,7 +13,7 @@
|
|
|
|
static int forbid_dac __read_mostly;
|
|
|
|
-struct dma_mapping_ops *dma_ops;
|
|
+struct dma_map_ops *dma_ops;
|
|
EXPORT_SYMBOL(dma_ops);
|
|
|
|
static int iommu_sac_force __read_mostly;
|
|
@@ -39,11 +40,14 @@ EXPORT_SYMBOL(bad_dma_address);
|
|
to older i386. */
|
|
struct device x86_dma_fallback_dev = {
|
|
.init_name = "fallback device",
|
|
- .coherent_dma_mask = DMA_32BIT_MASK,
|
|
+ .coherent_dma_mask = DMA_BIT_MASK(32),
|
|
.dma_mask = &x86_dma_fallback_dev.coherent_dma_mask,
|
|
};
|
|
EXPORT_SYMBOL(x86_dma_fallback_dev);
|
|
|
|
+/* Number of entries preallocated for DMA-API debugging */
|
|
+#define PREALLOC_DMA_DEBUG_ENTRIES 32768
|
|
+
|
|
int dma_set_mask(struct device *dev, u64 mask)
|
|
{
|
|
if (!dev->dma_mask || !dma_supported(dev, mask))
|
|
@@ -103,20 +107,20 @@ static void __init dma32_free_bootmem(vo
|
|
}
|
|
#endif
|
|
|
|
-static struct dma_mapping_ops swiotlb_dma_ops = {
|
|
+static struct dma_map_ops swiotlb_dma_ops = {
|
|
.alloc_coherent = dma_generic_alloc_coherent,
|
|
.free_coherent = dma_generic_free_coherent,
|
|
.mapping_error = swiotlb_dma_mapping_error,
|
|
- .map_single = swiotlb_map_single_phys,
|
|
- .unmap_single = swiotlb_unmap_single,
|
|
+ .map_page = swiotlb_map_page,
|
|
+ .unmap_page = swiotlb_unmap_page,
|
|
.sync_single_for_cpu = swiotlb_sync_single_for_cpu,
|
|
.sync_single_for_device = swiotlb_sync_single_for_device,
|
|
.sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu,
|
|
.sync_single_range_for_device = swiotlb_sync_single_range_for_device,
|
|
.sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
|
|
.sync_sg_for_device = swiotlb_sync_sg_for_device,
|
|
- .map_sg = swiotlb_map_sg,
|
|
- .unmap_sg = swiotlb_unmap_sg,
|
|
+ .map_sg = swiotlb_map_sg_attrs,
|
|
+ .unmap_sg = swiotlb_unmap_sg_attrs,
|
|
.dma_supported = swiotlb_dma_supported
|
|
};
|
|
|
|
@@ -175,7 +179,7 @@ again:
|
|
if (!is_buffer_dma_capable(dma_mask, addr, size)) {
|
|
__free_pages(page, order);
|
|
|
|
- if (dma_mask < DMA_32BIT_MASK && !(flag & GFP_DMA)) {
|
|
+ if (dma_mask < DMA_BIT_MASK(32) && !(flag & GFP_DMA)) {
|
|
flag = (flag & ~GFP_DMA32) | GFP_DMA;
|
|
goto again;
|
|
}
|
|
@@ -305,7 +309,7 @@ int range_straddles_page_boundary(paddr_
|
|
|
|
int dma_supported(struct device *dev, u64 mask)
|
|
{
|
|
- struct dma_mapping_ops *ops = get_dma_ops(dev);
|
|
+ struct dma_map_ops *ops = get_dma_ops(dev);
|
|
|
|
#ifdef CONFIG_PCI
|
|
if (mask > 0xffffffff && forbid_dac > 0) {
|
|
@@ -320,7 +324,7 @@ int dma_supported(struct device *dev, u6
|
|
/* Copied from i386. Doesn't make much sense, because it will
|
|
only work for pci_alloc_coherent.
|
|
The caller just has to use GFP_DMA in this case. */
|
|
- if (mask < DMA_24BIT_MASK)
|
|
+ if (mask < DMA_BIT_MASK(24))
|
|
return 0;
|
|
|
|
/* Tell the device to use SAC when IOMMU force is on. This
|
|
@@ -335,7 +339,7 @@ int dma_supported(struct device *dev, u6
|
|
SAC for these. Assume all masks <= 40 bits are of this
|
|
type. Normally this doesn't make any difference, but gives
|
|
more gentle handling of IOMMU overflow. */
|
|
- if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) {
|
|
+ if (iommu_sac_force && (mask >= DMA_BIT_MASK(40))) {
|
|
dev_info(dev, "Force SAC with mask %Lx\n", mask);
|
|
return 0;
|
|
}
|
|
@@ -346,6 +350,12 @@ EXPORT_SYMBOL(dma_supported);
|
|
|
|
static int __init pci_iommu_init(void)
|
|
{
|
|
+ dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES);
|
|
+
|
|
+#ifdef CONFIG_PCI
|
|
+ dma_debug_add_bus(&pci_bus_type);
|
|
+#endif
|
|
+
|
|
calgary_iommu_init();
|
|
|
|
intel_iommu_init();
|
|
@@ -371,8 +381,7 @@ fs_initcall(pci_iommu_init);
|
|
static __devinit void via_no_dac(struct pci_dev *dev)
|
|
{
|
|
if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) {
|
|
- printk(KERN_INFO
|
|
- "PCI: VIA PCI bridge detected. Disabling DAC.\n");
|
|
+ dev_info(&dev->dev, "disabling DAC on VIA PCI bridge\n");
|
|
forbid_dac = 1;
|
|
}
|
|
}
|
|
--- head-2010-05-25.orig/arch/x86/kernel/pci-nommu-xen.c 2010-03-24 15:14:47.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/pci-nommu-xen.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -24,7 +24,7 @@ do { \
|
|
|
|
static int
|
|
gnttab_map_sg(struct device *hwdev, struct scatterlist *sgl, int nents,
|
|
- int direction)
|
|
+ enum dma_data_direction dir, struct dma_attrs *attrs)
|
|
{
|
|
unsigned int i;
|
|
struct scatterlist *sg;
|
|
@@ -48,7 +48,7 @@ gnttab_map_sg(struct device *hwdev, stru
|
|
|
|
static void
|
|
gnttab_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nents,
|
|
- int direction)
|
|
+ enum dma_data_direction dir, struct dma_attrs *attrs)
|
|
{
|
|
unsigned int i;
|
|
struct scatterlist *sg;
|
|
@@ -58,24 +58,25 @@ gnttab_unmap_sg(struct device *hwdev, st
|
|
}
|
|
|
|
static dma_addr_t
|
|
-gnttab_map_single(struct device *dev, phys_addr_t paddr, size_t size,
|
|
- int direction)
|
|
+gnttab_map_page(struct device *dev, struct page *page, unsigned long offset,
|
|
+ size_t size, enum dma_data_direction dir,
|
|
+ struct dma_attrs *attrs)
|
|
{
|
|
dma_addr_t dma;
|
|
|
|
WARN_ON(size == 0);
|
|
|
|
- dma = gnttab_dma_map_page(pfn_to_page(paddr >> PAGE_SHIFT)) +
|
|
- offset_in_page(paddr);
|
|
- IOMMU_BUG_ON(range_straddles_page_boundary(paddr, size));
|
|
+ dma = gnttab_dma_map_page(page) + offset;
|
|
+ IOMMU_BUG_ON(range_straddles_page_boundary(page_to_pseudophys(page) +
|
|
+ offset, size));
|
|
IOMMU_BUG_ON(address_needs_mapping(dev, dma, size));
|
|
|
|
return dma;
|
|
}
|
|
|
|
static void
|
|
-gnttab_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
|
|
- int direction)
|
|
+gnttab_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size,
|
|
+ enum dma_data_direction dir, struct dma_attrs *attrs)
|
|
{
|
|
gnttab_dma_unmap_page(dma_addr);
|
|
}
|
|
@@ -85,14 +86,14 @@ static int nommu_dma_supported(struct de
|
|
return 1;
|
|
}
|
|
|
|
-struct dma_mapping_ops nommu_dma_ops = {
|
|
- .alloc_coherent = dma_generic_alloc_coherent,
|
|
- .free_coherent = dma_generic_free_coherent,
|
|
- .map_single = gnttab_map_single,
|
|
- .unmap_single = gnttab_unmap_single,
|
|
- .map_sg = gnttab_map_sg,
|
|
- .unmap_sg = gnttab_unmap_sg,
|
|
- .dma_supported = nommu_dma_supported,
|
|
+struct dma_map_ops nommu_dma_ops = {
|
|
+ .alloc_coherent = dma_generic_alloc_coherent,
|
|
+ .free_coherent = dma_generic_free_coherent,
|
|
+ .map_page = gnttab_map_page,
|
|
+ .unmap_page = gnttab_unmap_page,
|
|
+ .map_sg = gnttab_map_sg,
|
|
+ .unmap_sg = gnttab_unmap_sg,
|
|
+ .dma_supported = nommu_dma_supported,
|
|
};
|
|
|
|
void __init no_iommu_init(void)
|
|
--- head-2010-05-25.orig/arch/x86/kernel/process-xen.c 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/process-xen.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -1,16 +1,19 @@
|
|
#include <linux/errno.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/mm.h>
|
|
-#include <asm/idle.h>
|
|
#include <linux/smp.h>
|
|
+#include <linux/prctl.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/module.h>
|
|
#include <linux/pm.h>
|
|
#include <linux/clockchips.h>
|
|
-#include <linux/ftrace.h>
|
|
+#include <trace/power.h>
|
|
#include <asm/system.h>
|
|
#include <asm/apic.h>
|
|
+#include <asm/idle.h>
|
|
+#include <asm/uaccess.h>
|
|
+#include <asm/i387.h>
|
|
#include <xen/evtchn.h>
|
|
|
|
unsigned long idle_halt;
|
|
@@ -20,6 +23,9 @@ EXPORT_SYMBOL(idle_nomwait);
|
|
|
|
struct kmem_cache *task_xstate_cachep;
|
|
|
|
+DEFINE_TRACE(power_start);
|
|
+DEFINE_TRACE(power_end);
|
|
+
|
|
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
|
|
{
|
|
*dst = *src;
|
|
@@ -57,6 +63,179 @@ void arch_task_cache_init(void)
|
|
}
|
|
|
|
/*
|
|
+ * Free current thread data structures etc..
|
|
+ */
|
|
+void exit_thread(void)
|
|
+{
|
|
+ struct task_struct *me = current;
|
|
+ struct thread_struct *t = &me->thread;
|
|
+ unsigned long *bp = t->io_bitmap_ptr;
|
|
+
|
|
+ if (bp) {
|
|
+ struct physdev_set_iobitmap set_iobitmap;
|
|
+
|
|
+ t->io_bitmap_ptr = NULL;
|
|
+ clear_thread_flag(TIF_IO_BITMAP);
|
|
+ /*
|
|
+ * Careful, clear this in the TSS too:
|
|
+ */
|
|
+ memset(&set_iobitmap, 0, sizeof(set_iobitmap));
|
|
+ WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap,
|
|
+ &set_iobitmap));
|
|
+ t->io_bitmap_max = 0;
|
|
+ kfree(bp);
|
|
+ }
|
|
+
|
|
+ ds_exit_thread(current);
|
|
+}
|
|
+
|
|
+void flush_thread(void)
|
|
+{
|
|
+ struct task_struct *tsk = current;
|
|
+
|
|
+#ifdef CONFIG_X86_64
|
|
+ if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
|
|
+ clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
|
|
+ if (test_tsk_thread_flag(tsk, TIF_IA32)) {
|
|
+ clear_tsk_thread_flag(tsk, TIF_IA32);
|
|
+ } else {
|
|
+ set_tsk_thread_flag(tsk, TIF_IA32);
|
|
+ current_thread_info()->status |= TS_COMPAT;
|
|
+ }
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ clear_tsk_thread_flag(tsk, TIF_DEBUG);
|
|
+
|
|
+ tsk->thread.debugreg0 = 0;
|
|
+ tsk->thread.debugreg1 = 0;
|
|
+ tsk->thread.debugreg2 = 0;
|
|
+ tsk->thread.debugreg3 = 0;
|
|
+ tsk->thread.debugreg6 = 0;
|
|
+ tsk->thread.debugreg7 = 0;
|
|
+ memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
|
|
+ /*
|
|
+ * Forget coprocessor state..
|
|
+ */
|
|
+ tsk->fpu_counter = 0;
|
|
+ clear_fpu(tsk);
|
|
+ clear_used_math();
|
|
+}
|
|
+
|
|
+static void hard_disable_TSC(void)
|
|
+{
|
|
+ write_cr4(read_cr4() | X86_CR4_TSD);
|
|
+}
|
|
+
|
|
+void disable_TSC(void)
|
|
+{
|
|
+ preempt_disable();
|
|
+ if (!test_and_set_thread_flag(TIF_NOTSC))
|
|
+ /*
|
|
+ * Must flip the CPU state synchronously with
|
|
+ * TIF_NOTSC in the current running context.
|
|
+ */
|
|
+ hard_disable_TSC();
|
|
+ preempt_enable();
|
|
+}
|
|
+
|
|
+static void hard_enable_TSC(void)
|
|
+{
|
|
+ write_cr4(read_cr4() & ~X86_CR4_TSD);
|
|
+}
|
|
+
|
|
+static void enable_TSC(void)
|
|
+{
|
|
+ preempt_disable();
|
|
+ if (test_and_clear_thread_flag(TIF_NOTSC))
|
|
+ /*
|
|
+ * Must flip the CPU state synchronously with
|
|
+ * TIF_NOTSC in the current running context.
|
|
+ */
|
|
+ hard_enable_TSC();
|
|
+ preempt_enable();
|
|
+}
|
|
+
|
|
+int get_tsc_mode(unsigned long adr)
|
|
+{
|
|
+ unsigned int val;
|
|
+
|
|
+ if (test_thread_flag(TIF_NOTSC))
|
|
+ val = PR_TSC_SIGSEGV;
|
|
+ else
|
|
+ val = PR_TSC_ENABLE;
|
|
+
|
|
+ return put_user(val, (unsigned int __user *)adr);
|
|
+}
|
|
+
|
|
+int set_tsc_mode(unsigned int val)
|
|
+{
|
|
+ if (val == PR_TSC_SIGSEGV)
|
|
+ disable_TSC();
|
|
+ else if (val == PR_TSC_ENABLE)
|
|
+ enable_TSC();
|
|
+ else
|
|
+ return -EINVAL;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p)
|
|
+{
|
|
+ struct thread_struct *prev, *next;
|
|
+
|
|
+ prev = &prev_p->thread;
|
|
+ next = &next_p->thread;
|
|
+
|
|
+ if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) ||
|
|
+ test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR))
|
|
+ ds_switch_to(prev_p, next_p);
|
|
+ else if (next->debugctlmsr != prev->debugctlmsr)
|
|
+ update_debugctlmsr(next->debugctlmsr);
|
|
+
|
|
+ if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
|
|
+ set_debugreg(next->debugreg0, 0);
|
|
+ set_debugreg(next->debugreg1, 1);
|
|
+ set_debugreg(next->debugreg2, 2);
|
|
+ set_debugreg(next->debugreg3, 3);
|
|
+ /* no 4 and 5 */
|
|
+ set_debugreg(next->debugreg6, 6);
|
|
+ set_debugreg(next->debugreg7, 7);
|
|
+ }
|
|
+
|
|
+ if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
|
|
+ test_tsk_thread_flag(next_p, TIF_NOTSC)) {
|
|
+ /* prev and next are different */
|
|
+ if (test_tsk_thread_flag(next_p, TIF_NOTSC))
|
|
+ hard_disable_TSC();
|
|
+ else
|
|
+ hard_enable_TSC();
|
|
+ }
|
|
+}
|
|
+
|
|
+int sys_fork(struct pt_regs *regs)
|
|
+{
|
|
+ return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * This is trivial, and on the face of it looks like it
|
|
+ * could equally well be done in user mode.
|
|
+ *
|
|
+ * Not so, for quite unobvious reasons - register pressure.
|
|
+ * In user mode vfork() cannot have a stack frame, and if
|
|
+ * done by calling the "clone()" system call directly, you
|
|
+ * do not have enough call-clobbered registers to hold all
|
|
+ * the information you need.
|
|
+ */
|
|
+int sys_vfork(struct pt_regs *regs)
|
|
+{
|
|
+ return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
|
|
+ NULL, NULL);
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
* Idle related variables and functions
|
|
*/
|
|
unsigned long boot_option_idle_override = 0;
|
|
@@ -130,7 +309,7 @@ void stop_this_cpu(void *dummy)
|
|
/*
|
|
* Remove this CPU:
|
|
*/
|
|
- cpu_clear(smp_processor_id(), cpu_online_map);
|
|
+ set_cpu_online(smp_processor_id(), false);
|
|
disable_all_local_evtchn();
|
|
|
|
for (;;) {
|
|
@@ -283,12 +462,13 @@ static int __cpuinit check_c1e_idle(cons
|
|
return 1;
|
|
}
|
|
|
|
-static cpumask_t c1e_mask = CPU_MASK_NONE;
|
|
+static cpumask_var_t c1e_mask;
|
|
static int c1e_detected;
|
|
|
|
void c1e_remove_cpu(int cpu)
|
|
{
|
|
- cpu_clear(cpu, c1e_mask);
|
|
+ if (c1e_mask != NULL)
|
|
+ cpumask_clear_cpu(cpu, c1e_mask);
|
|
}
|
|
|
|
/*
|
|
@@ -317,8 +497,8 @@ static void c1e_idle(void)
|
|
if (c1e_detected) {
|
|
int cpu = smp_processor_id();
|
|
|
|
- if (!cpu_isset(cpu, c1e_mask)) {
|
|
- cpu_set(cpu, c1e_mask);
|
|
+ if (!cpumask_test_cpu(cpu, c1e_mask)) {
|
|
+ cpumask_set_cpu(cpu, c1e_mask);
|
|
/*
|
|
* Force broadcast so ACPI can not interfere. Needs
|
|
* to run with interrupts enabled as it uses
|
|
@@ -350,7 +530,7 @@ static void c1e_idle(void)
|
|
void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
|
|
{
|
|
#ifndef CONFIG_XEN
|
|
-#ifdef CONFIG_X86_SMP
|
|
+#ifdef CONFIG_SMP
|
|
if (pm_idle == poll_idle && smp_num_siblings > 1) {
|
|
printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
|
|
" performance may degrade.\n");
|
|
@@ -373,6 +553,17 @@ void __cpuinit select_idle_routine(const
|
|
#endif
|
|
}
|
|
|
|
+void __init init_c1e_mask(void)
|
|
+{
|
|
+#ifndef CONFIG_XEN
|
|
+ /* If we're using c1e_idle, we need to allocate c1e_mask. */
|
|
+ if (pm_idle == c1e_idle) {
|
|
+ alloc_cpumask_var(&c1e_mask, GFP_KERNEL);
|
|
+ cpumask_clear(c1e_mask);
|
|
+ }
|
|
+#endif
|
|
+}
|
|
+
|
|
static int __init idle_setup(char *str)
|
|
{
|
|
if (!str)
|
|
--- head-2010-05-25.orig/arch/x86/kernel/process_32-xen.c 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/process_32-xen.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -11,6 +11,7 @@
|
|
|
|
#include <stdarg.h>
|
|
|
|
+#include <linux/stackprotector.h>
|
|
#include <linux/cpu.h>
|
|
#include <linux/errno.h>
|
|
#include <linux/sched.h>
|
|
@@ -71,9 +72,6 @@ asmlinkage void cstar_ret_from_fork(void
|
|
DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
|
|
EXPORT_PER_CPU_SYMBOL(current_task);
|
|
|
|
-DEFINE_PER_CPU(int, cpu_number);
|
|
-EXPORT_PER_CPU_SYMBOL(cpu_number);
|
|
-
|
|
/*
|
|
* Return saved PC of a blocked thread.
|
|
*/
|
|
@@ -99,6 +97,15 @@ void cpu_idle(void)
|
|
{
|
|
int cpu = smp_processor_id();
|
|
|
|
+ /*
|
|
+ * If we're the non-boot CPU, nothing set the stack canary up
|
|
+ * for us. CPU0 already has it initialized but no harm in
|
|
+ * doing it again. This is a good place for updating it, as
|
|
+ * we wont ever return from this function (so the invalid
|
|
+ * canaries already on the stack wont ever trigger).
|
|
+ */
|
|
+ boot_init_stack_canary();
|
|
+
|
|
current_thread_info()->status |= TS_POLLING;
|
|
|
|
/* endless idle loop with no priority at all */
|
|
@@ -113,7 +120,6 @@ void cpu_idle(void)
|
|
play_dead();
|
|
|
|
local_irq_disable();
|
|
- __get_cpu_var(irq_stat).idle_timestamp = jiffies;
|
|
/* Don't trace irqs off for idle */
|
|
stop_critical_timings();
|
|
xen_idle();
|
|
@@ -137,7 +143,7 @@ void __show_regs(struct pt_regs *regs, i
|
|
if (user_mode_vm(regs)) {
|
|
sp = regs->sp;
|
|
ss = regs->ss & 0xffff;
|
|
- savesegment(gs, gs);
|
|
+ gs = get_user_gs(regs);
|
|
} else {
|
|
sp = (unsigned long) (®s->sp);
|
|
savesegment(ss, ss);
|
|
@@ -218,6 +224,7 @@ int kernel_thread(int (*fn)(void *), voi
|
|
regs.ds = __USER_DS;
|
|
regs.es = __USER_DS;
|
|
regs.fs = __KERNEL_PERCPU;
|
|
+ regs.gs = __KERNEL_STACK_CANARY;
|
|
regs.orig_ax = -1;
|
|
regs.ip = (unsigned long) kernel_thread_helper;
|
|
regs.cs = __KERNEL_CS | get_kernel_rpl();
|
|
@@ -228,47 +235,6 @@ int kernel_thread(int (*fn)(void *), voi
|
|
}
|
|
EXPORT_SYMBOL(kernel_thread);
|
|
|
|
-/*
|
|
- * Free current thread data structures etc..
|
|
- */
|
|
-void exit_thread(void)
|
|
-{
|
|
- /* The process may have allocated an io port bitmap... nuke it. */
|
|
- if (unlikely(test_thread_flag(TIF_IO_BITMAP))) {
|
|
- struct task_struct *tsk = current;
|
|
- struct thread_struct *t = &tsk->thread;
|
|
- struct physdev_set_iobitmap set_iobitmap;
|
|
- memset(&set_iobitmap, 0, sizeof(set_iobitmap));
|
|
- WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap,
|
|
- &set_iobitmap));
|
|
- kfree(t->io_bitmap_ptr);
|
|
- t->io_bitmap_ptr = NULL;
|
|
- clear_thread_flag(TIF_IO_BITMAP);
|
|
- }
|
|
-
|
|
- ds_exit_thread(current);
|
|
-}
|
|
-
|
|
-void flush_thread(void)
|
|
-{
|
|
- struct task_struct *tsk = current;
|
|
-
|
|
- tsk->thread.debugreg0 = 0;
|
|
- tsk->thread.debugreg1 = 0;
|
|
- tsk->thread.debugreg2 = 0;
|
|
- tsk->thread.debugreg3 = 0;
|
|
- tsk->thread.debugreg6 = 0;
|
|
- tsk->thread.debugreg7 = 0;
|
|
- memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
|
|
- clear_tsk_thread_flag(tsk, TIF_DEBUG);
|
|
- /*
|
|
- * Forget coprocessor state..
|
|
- */
|
|
- tsk->fpu_counter = 0;
|
|
- clear_fpu(tsk);
|
|
- clear_used_math();
|
|
-}
|
|
-
|
|
void release_thread(struct task_struct *dead_task)
|
|
{
|
|
BUG_ON(dead_task->mm);
|
|
@@ -284,7 +250,7 @@ void prepare_to_copy(struct task_struct
|
|
unlazy_fpu(tsk);
|
|
}
|
|
|
|
-int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
|
|
+int copy_thread(unsigned long clone_flags, unsigned long sp,
|
|
unsigned long unused,
|
|
struct task_struct *p, struct pt_regs *regs)
|
|
{
|
|
@@ -302,7 +268,7 @@ int copy_thread(int nr, unsigned long cl
|
|
|
|
p->thread.ip = (unsigned long) ret_from_fork;
|
|
|
|
- savesegment(gs, p->thread.gs);
|
|
+ task_user_gs(p) = get_user_gs(regs);
|
|
|
|
tsk = current;
|
|
if (test_tsk_thread_flag(tsk, TIF_CSTAR))
|
|
@@ -344,7 +310,7 @@ int copy_thread(int nr, unsigned long cl
|
|
void
|
|
start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
|
|
{
|
|
- __asm__("movl %0, %%gs" : : "r"(0));
|
|
+ set_user_gs(regs, 0);
|
|
regs->fs = 0;
|
|
set_fs(USER_DS);
|
|
regs->ds = __USER_DS;
|
|
@@ -360,98 +326,6 @@ start_thread(struct pt_regs *regs, unsig
|
|
}
|
|
EXPORT_SYMBOL_GPL(start_thread);
|
|
|
|
-static void hard_disable_TSC(void)
|
|
-{
|
|
- write_cr4(read_cr4() | X86_CR4_TSD);
|
|
-}
|
|
-
|
|
-void disable_TSC(void)
|
|
-{
|
|
- preempt_disable();
|
|
- if (!test_and_set_thread_flag(TIF_NOTSC))
|
|
- /*
|
|
- * Must flip the CPU state synchronously with
|
|
- * TIF_NOTSC in the current running context.
|
|
- */
|
|
- hard_disable_TSC();
|
|
- preempt_enable();
|
|
-}
|
|
-
|
|
-static void hard_enable_TSC(void)
|
|
-{
|
|
- write_cr4(read_cr4() & ~X86_CR4_TSD);
|
|
-}
|
|
-
|
|
-static void enable_TSC(void)
|
|
-{
|
|
- preempt_disable();
|
|
- if (test_and_clear_thread_flag(TIF_NOTSC))
|
|
- /*
|
|
- * Must flip the CPU state synchronously with
|
|
- * TIF_NOTSC in the current running context.
|
|
- */
|
|
- hard_enable_TSC();
|
|
- preempt_enable();
|
|
-}
|
|
-
|
|
-int get_tsc_mode(unsigned long adr)
|
|
-{
|
|
- unsigned int val;
|
|
-
|
|
- if (test_thread_flag(TIF_NOTSC))
|
|
- val = PR_TSC_SIGSEGV;
|
|
- else
|
|
- val = PR_TSC_ENABLE;
|
|
-
|
|
- return put_user(val, (unsigned int __user *)adr);
|
|
-}
|
|
-
|
|
-int set_tsc_mode(unsigned int val)
|
|
-{
|
|
- if (val == PR_TSC_SIGSEGV)
|
|
- disable_TSC();
|
|
- else if (val == PR_TSC_ENABLE)
|
|
- enable_TSC();
|
|
- else
|
|
- return -EINVAL;
|
|
-
|
|
- return 0;
|
|
-}
|
|
-
|
|
-static noinline void
|
|
-__switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p)
|
|
-{
|
|
- struct thread_struct *prev, *next;
|
|
-
|
|
- prev = &prev_p->thread;
|
|
- next = &next_p->thread;
|
|
-
|
|
- if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) ||
|
|
- test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR))
|
|
- ds_switch_to(prev_p, next_p);
|
|
- else if (next->debugctlmsr != prev->debugctlmsr)
|
|
- update_debugctlmsr(next->debugctlmsr);
|
|
-
|
|
- if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
|
|
- set_debugreg(next->debugreg0, 0);
|
|
- set_debugreg(next->debugreg1, 1);
|
|
- set_debugreg(next->debugreg2, 2);
|
|
- set_debugreg(next->debugreg3, 3);
|
|
- /* no 4 and 5 */
|
|
- set_debugreg(next->debugreg6, 6);
|
|
- set_debugreg(next->debugreg7, 7);
|
|
- }
|
|
-
|
|
- if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
|
|
- test_tsk_thread_flag(next_p, TIF_NOTSC)) {
|
|
- /* prev and next are different */
|
|
- if (test_tsk_thread_flag(next_p, TIF_NOTSC))
|
|
- hard_disable_TSC();
|
|
- else
|
|
- hard_enable_TSC();
|
|
- }
|
|
-}
|
|
-
|
|
/*
|
|
* switch_to(x,yn) should switch tasks from x to y.
|
|
*
|
|
@@ -532,7 +406,7 @@ __switch_to(struct task_struct *prev_p,
|
|
if (unlikely(next->tls_array[i].a != prev->tls_array[i].a || \
|
|
next->tls_array[i].b != prev->tls_array[i].b)) { \
|
|
mcl->op = __HYPERVISOR_update_descriptor; \
|
|
- *(u64 *)&mcl->args[0] = virt_to_machine( \
|
|
+ *(u64 *)&mcl->args[0] = arbitrary_virt_to_machine( \
|
|
&get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]);\
|
|
*(u64 *)&mcl->args[2] = *(u64 *)&next->tls_array[i]; \
|
|
mcl++; \
|
|
@@ -612,64 +486,44 @@ __switch_to(struct task_struct *prev_p,
|
|
* Restore %gs if needed (which is common)
|
|
*/
|
|
if (prev->gs | next->gs)
|
|
- loadsegment(gs, next->gs);
|
|
+ lazy_load_gs(next->gs);
|
|
|
|
- x86_write_percpu(current_task, next_p);
|
|
+ percpu_write(current_task, next_p);
|
|
|
|
return prev_p;
|
|
}
|
|
|
|
-asmlinkage int sys_fork(struct pt_regs regs)
|
|
-{
|
|
- return do_fork(SIGCHLD, regs.sp, ®s, 0, NULL, NULL);
|
|
-}
|
|
-
|
|
-asmlinkage int sys_clone(struct pt_regs regs)
|
|
+int sys_clone(struct pt_regs *regs)
|
|
{
|
|
unsigned long clone_flags;
|
|
unsigned long newsp;
|
|
int __user *parent_tidptr, *child_tidptr;
|
|
|
|
- clone_flags = regs.bx;
|
|
- newsp = regs.cx;
|
|
- parent_tidptr = (int __user *)regs.dx;
|
|
- child_tidptr = (int __user *)regs.di;
|
|
+ clone_flags = regs->bx;
|
|
+ newsp = regs->cx;
|
|
+ parent_tidptr = (int __user *)regs->dx;
|
|
+ child_tidptr = (int __user *)regs->di;
|
|
if (!newsp)
|
|
- newsp = regs.sp;
|
|
- return do_fork(clone_flags, newsp, ®s, 0, parent_tidptr, child_tidptr);
|
|
-}
|
|
-
|
|
-/*
|
|
- * This is trivial, and on the face of it looks like it
|
|
- * could equally well be done in user mode.
|
|
- *
|
|
- * Not so, for quite unobvious reasons - register pressure.
|
|
- * In user mode vfork() cannot have a stack frame, and if
|
|
- * done by calling the "clone()" system call directly, you
|
|
- * do not have enough call-clobbered registers to hold all
|
|
- * the information you need.
|
|
- */
|
|
-asmlinkage int sys_vfork(struct pt_regs regs)
|
|
-{
|
|
- return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.sp, ®s, 0, NULL, NULL);
|
|
+ newsp = regs->sp;
|
|
+ return do_fork(clone_flags, newsp, regs, 0, parent_tidptr, child_tidptr);
|
|
}
|
|
|
|
/*
|
|
* sys_execve() executes a new program.
|
|
*/
|
|
-asmlinkage int sys_execve(struct pt_regs regs)
|
|
+int sys_execve(struct pt_regs *regs)
|
|
{
|
|
int error;
|
|
char *filename;
|
|
|
|
- filename = getname((char __user *) regs.bx);
|
|
+ filename = getname((char __user *) regs->bx);
|
|
error = PTR_ERR(filename);
|
|
if (IS_ERR(filename))
|
|
goto out;
|
|
error = do_execve(filename,
|
|
- (char __user * __user *) regs.cx,
|
|
- (char __user * __user *) regs.dx,
|
|
- ®s);
|
|
+ (char __user * __user *) regs->cx,
|
|
+ (char __user * __user *) regs->dx,
|
|
+ regs);
|
|
if (error == 0) {
|
|
/* Make sure we don't return using sysenter.. */
|
|
set_thread_flag(TIF_IRET);
|
|
--- head-2010-05-25.orig/arch/x86/kernel/process_64-xen.c 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/process_64-xen.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -19,6 +19,7 @@
|
|
|
|
#include <stdarg.h>
|
|
|
|
+#include <linux/stackprotector.h>
|
|
#include <linux/cpu.h>
|
|
#include <linux/errno.h>
|
|
#include <linux/sched.h>
|
|
@@ -50,7 +51,6 @@
|
|
#include <asm/processor.h>
|
|
#include <asm/i387.h>
|
|
#include <asm/mmu_context.h>
|
|
-#include <asm/pda.h>
|
|
#include <asm/prctl.h>
|
|
#include <xen/interface/platform.h>
|
|
#include <xen/interface/physdev.h>
|
|
@@ -67,6 +67,11 @@
|
|
|
|
asmlinkage extern void ret_from_fork(void);
|
|
|
|
+DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
|
|
+EXPORT_PER_CPU_SYMBOL(current_task);
|
|
+
|
|
+static DEFINE_PER_CPU(unsigned char, is_idle);
|
|
+
|
|
unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
|
|
|
|
static ATOMIC_NOTIFIER_HEAD(idle_notifier);
|
|
@@ -85,13 +90,13 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregist
|
|
|
|
void enter_idle(void)
|
|
{
|
|
- write_pda(isidle, 1);
|
|
+ percpu_write(is_idle, 1);
|
|
atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
|
|
}
|
|
|
|
static void __exit_idle(void)
|
|
{
|
|
- if (test_and_clear_bit_pda(0, isidle) == 0)
|
|
+ if (x86_test_and_clear_bit_percpu(0, is_idle) == 0)
|
|
return;
|
|
atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
|
|
}
|
|
@@ -121,6 +126,16 @@ static inline void play_dead(void)
|
|
void cpu_idle(void)
|
|
{
|
|
current_thread_info()->status |= TS_POLLING;
|
|
+
|
|
+ /*
|
|
+ * If we're the non-boot CPU, nothing set the stack canary up
|
|
+ * for us. CPU0 already has it initialized but no harm in
|
|
+ * doing it again. This is a good place for updating it, as
|
|
+ * we wont ever return from this function (so the invalid
|
|
+ * canaries already on the stack wont ever trigger).
|
|
+ */
|
|
+ boot_init_stack_canary();
|
|
+
|
|
/* endless idle loop with no priority at all */
|
|
while (1) {
|
|
tick_nohz_stop_sched_tick(1);
|
|
@@ -230,78 +245,11 @@ void show_regs(struct pt_regs *regs)
|
|
show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
|
|
}
|
|
|
|
-/*
|
|
- * Free current thread data structures etc..
|
|
- */
|
|
-void exit_thread(void)
|
|
-{
|
|
- struct task_struct *me = current;
|
|
- struct thread_struct *t = &me->thread;
|
|
-
|
|
- if (me->thread.io_bitmap_ptr) {
|
|
-#ifndef CONFIG_X86_NO_TSS
|
|
- struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
|
|
-#endif
|
|
-#ifdef CONFIG_XEN
|
|
- struct physdev_set_iobitmap iobmp_op;
|
|
- memset(&iobmp_op, 0, sizeof(iobmp_op));
|
|
-#endif
|
|
-
|
|
- kfree(t->io_bitmap_ptr);
|
|
- t->io_bitmap_ptr = NULL;
|
|
- clear_thread_flag(TIF_IO_BITMAP);
|
|
- /*
|
|
- * Careful, clear this in the TSS too:
|
|
- */
|
|
-#ifndef CONFIG_X86_NO_TSS
|
|
- memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
|
|
- put_cpu();
|
|
-#endif
|
|
-#ifdef CONFIG_XEN
|
|
- WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iobitmap,
|
|
- &iobmp_op));
|
|
-#endif
|
|
- t->io_bitmap_max = 0;
|
|
- }
|
|
-
|
|
- ds_exit_thread(current);
|
|
-}
|
|
-
|
|
void xen_load_gs_index(unsigned gs)
|
|
{
|
|
WARN_ON(HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, gs));
|
|
}
|
|
|
|
-void flush_thread(void)
|
|
-{
|
|
- struct task_struct *tsk = current;
|
|
-
|
|
- if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
|
|
- clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
|
|
- if (test_tsk_thread_flag(tsk, TIF_IA32)) {
|
|
- clear_tsk_thread_flag(tsk, TIF_IA32);
|
|
- } else {
|
|
- set_tsk_thread_flag(tsk, TIF_IA32);
|
|
- current_thread_info()->status |= TS_COMPAT;
|
|
- }
|
|
- }
|
|
- clear_tsk_thread_flag(tsk, TIF_DEBUG);
|
|
-
|
|
- tsk->thread.debugreg0 = 0;
|
|
- tsk->thread.debugreg1 = 0;
|
|
- tsk->thread.debugreg2 = 0;
|
|
- tsk->thread.debugreg3 = 0;
|
|
- tsk->thread.debugreg6 = 0;
|
|
- tsk->thread.debugreg7 = 0;
|
|
- memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
|
|
- /*
|
|
- * Forget coprocessor state..
|
|
- */
|
|
- tsk->fpu_counter = 0;
|
|
- clear_fpu(tsk);
|
|
- clear_used_math();
|
|
-}
|
|
-
|
|
void release_thread(struct task_struct *dead_task)
|
|
{
|
|
if (dead_task->mm) {
|
|
@@ -343,7 +291,7 @@ void prepare_to_copy(struct task_struct
|
|
unlazy_fpu(tsk);
|
|
}
|
|
|
|
-int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
|
|
+int copy_thread(unsigned long clone_flags, unsigned long sp,
|
|
unsigned long unused,
|
|
struct task_struct *p, struct pt_regs *regs)
|
|
{
|
|
@@ -434,103 +382,6 @@ start_thread(struct pt_regs *regs, unsig
|
|
}
|
|
EXPORT_SYMBOL_GPL(start_thread);
|
|
|
|
-static void hard_disable_TSC(void)
|
|
-{
|
|
- write_cr4(read_cr4() | X86_CR4_TSD);
|
|
-}
|
|
-
|
|
-void disable_TSC(void)
|
|
-{
|
|
- preempt_disable();
|
|
- if (!test_and_set_thread_flag(TIF_NOTSC))
|
|
- /*
|
|
- * Must flip the CPU state synchronously with
|
|
- * TIF_NOTSC in the current running context.
|
|
- */
|
|
- hard_disable_TSC();
|
|
- preempt_enable();
|
|
-}
|
|
-
|
|
-static void hard_enable_TSC(void)
|
|
-{
|
|
- write_cr4(read_cr4() & ~X86_CR4_TSD);
|
|
-}
|
|
-
|
|
-static void enable_TSC(void)
|
|
-{
|
|
- preempt_disable();
|
|
- if (test_and_clear_thread_flag(TIF_NOTSC))
|
|
- /*
|
|
- * Must flip the CPU state synchronously with
|
|
- * TIF_NOTSC in the current running context.
|
|
- */
|
|
- hard_enable_TSC();
|
|
- preempt_enable();
|
|
-}
|
|
-
|
|
-int get_tsc_mode(unsigned long adr)
|
|
-{
|
|
- unsigned int val;
|
|
-
|
|
- if (test_thread_flag(TIF_NOTSC))
|
|
- val = PR_TSC_SIGSEGV;
|
|
- else
|
|
- val = PR_TSC_ENABLE;
|
|
-
|
|
- return put_user(val, (unsigned int __user *)adr);
|
|
-}
|
|
-
|
|
-int set_tsc_mode(unsigned int val)
|
|
-{
|
|
- if (val == PR_TSC_SIGSEGV)
|
|
- disable_TSC();
|
|
- else if (val == PR_TSC_ENABLE)
|
|
- enable_TSC();
|
|
- else
|
|
- return -EINVAL;
|
|
-
|
|
- return 0;
|
|
-}
|
|
-
|
|
-/*
|
|
- * This special macro can be used to load a debugging register
|
|
- */
|
|
-#define loaddebug(thread, r) set_debugreg(thread->debugreg ## r, r)
|
|
-
|
|
-static inline void __switch_to_xtra(struct task_struct *prev_p,
|
|
- struct task_struct *next_p)
|
|
-{
|
|
- struct thread_struct *prev, *next;
|
|
-
|
|
- prev = &prev_p->thread,
|
|
- next = &next_p->thread;
|
|
-
|
|
- if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) ||
|
|
- test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR))
|
|
- ds_switch_to(prev_p, next_p);
|
|
- else if (next->debugctlmsr != prev->debugctlmsr)
|
|
- update_debugctlmsr(next->debugctlmsr);
|
|
-
|
|
- if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
|
|
- loaddebug(next, 0);
|
|
- loaddebug(next, 1);
|
|
- loaddebug(next, 2);
|
|
- loaddebug(next, 3);
|
|
- /* no 4 and 5 */
|
|
- loaddebug(next, 6);
|
|
- loaddebug(next, 7);
|
|
- }
|
|
-
|
|
- if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
|
|
- test_tsk_thread_flag(next_p, TIF_NOTSC)) {
|
|
- /* prev and next are different */
|
|
- if (test_tsk_thread_flag(next_p, TIF_NOTSC))
|
|
- hard_disable_TSC();
|
|
- else
|
|
- hard_enable_TSC();
|
|
- }
|
|
-}
|
|
-
|
|
/*
|
|
* switch_to(x,y) should switch tasks from x to y.
|
|
*
|
|
@@ -596,7 +447,7 @@ __switch_to(struct task_struct *prev_p,
|
|
if (unlikely(next->tls_array[i].a != prev->tls_array[i].a || \
|
|
next->tls_array[i].b != prev->tls_array[i].b)) { \
|
|
mcl->op = __HYPERVISOR_update_descriptor; \
|
|
- mcl->args[0] = virt_to_machine( \
|
|
+ mcl->args[0] = arbitrary_virt_to_machine( \
|
|
&get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]);\
|
|
mcl->args[1] = *(u64 *)&next->tls_array[i]; \
|
|
mcl++; \
|
|
@@ -683,19 +534,11 @@ __switch_to(struct task_struct *prev_p,
|
|
/*
|
|
* Switch the PDA context.
|
|
*/
|
|
- write_pda(pcurrent, next_p);
|
|
- write_pda(kernelstack,
|
|
- (unsigned long)task_stack_page(next_p) +
|
|
- THREAD_SIZE - PDA_STACKOFFSET);
|
|
-#ifdef CONFIG_CC_STACKPROTECTOR
|
|
- write_pda(stack_canary, next_p->stack_canary);
|
|
+ percpu_write(current_task, next_p);
|
|
|
|
- /*
|
|
- * Build time only check to make sure the stack_canary is at
|
|
- * offset 40 in the pda; this is a gcc ABI requirement
|
|
- */
|
|
- BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
|
|
-#endif
|
|
+ percpu_write(kernel_stack,
|
|
+ (unsigned long)task_stack_page(next_p) +
|
|
+ THREAD_SIZE - KERNEL_STACK_OFFSET);
|
|
|
|
/*
|
|
* Now maybe reload the debug registers
|
|
@@ -749,11 +592,6 @@ void set_personality_64bit(void)
|
|
current->personality &= ~READ_IMPLIES_EXEC;
|
|
}
|
|
|
|
-asmlinkage long sys_fork(struct pt_regs *regs)
|
|
-{
|
|
- return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
|
|
-}
|
|
-
|
|
asmlinkage long
|
|
sys_clone(unsigned long clone_flags, unsigned long newsp,
|
|
void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
|
|
@@ -763,22 +601,6 @@ sys_clone(unsigned long clone_flags, uns
|
|
return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
|
|
}
|
|
|
|
-/*
|
|
- * This is trivial, and on the face of it looks like it
|
|
- * could equally well be done in user mode.
|
|
- *
|
|
- * Not so, for quite unobvious reasons - register pressure.
|
|
- * In user mode vfork() cannot have a stack frame, and if
|
|
- * done by calling the "clone()" system call directly, you
|
|
- * do not have enough call-clobbered registers to hold all
|
|
- * the information you need.
|
|
- */
|
|
-asmlinkage long sys_vfork(struct pt_regs *regs)
|
|
-{
|
|
- return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0,
|
|
- NULL, NULL);
|
|
-}
|
|
-
|
|
unsigned long get_wchan(struct task_struct *p)
|
|
{
|
|
unsigned long stack;
|
|
--- head-2010-05-25.orig/arch/x86/kernel/quirks-xen.c 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/quirks-xen.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -75,8 +75,7 @@ static void ich_force_hpet_resume(void)
|
|
if (!force_hpet_address)
|
|
return;
|
|
|
|
- if (rcba_base == NULL)
|
|
- BUG();
|
|
+ BUG_ON(rcba_base == NULL);
|
|
|
|
/* read the Function Disable register, dword mode only */
|
|
val = readl(rcba_base + 0x3404);
|
|
@@ -173,7 +172,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_I
|
|
ich_force_enable_hpet);
|
|
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_7,
|
|
ich_force_enable_hpet);
|
|
-
|
|
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x3a16, /* ICH10 */
|
|
+ ich_force_enable_hpet);
|
|
|
|
static struct pci_dev *cached_dev;
|
|
|
|
@@ -262,8 +262,6 @@ static void old_ich_force_enable_hpet_us
|
|
{
|
|
if (hpet_force_user)
|
|
old_ich_force_enable_hpet(dev);
|
|
- else
|
|
- hpet_print_force_info();
|
|
}
|
|
|
|
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB_1,
|
|
--- head-2010-05-25.orig/arch/x86/kernel/setup-xen.c 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/setup-xen.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -74,14 +74,15 @@
|
|
#include <asm/e820.h>
|
|
#include <asm/mpspec.h>
|
|
#include <asm/setup.h>
|
|
-#include <asm/arch_hooks.h>
|
|
#include <asm/efi.h>
|
|
+#include <asm/timer.h>
|
|
+#include <asm/i8259.h>
|
|
#include <asm/sections.h>
|
|
#include <asm/dmi.h>
|
|
#include <asm/io_apic.h>
|
|
#include <asm/ist.h>
|
|
#include <asm/vmi.h>
|
|
-#include <setup_arch.h>
|
|
+#include <asm/setup_arch.h>
|
|
#include <asm/bios_ebda.h>
|
|
#include <asm/cacheflush.h>
|
|
#include <asm/processor.h>
|
|
@@ -89,7 +90,7 @@
|
|
|
|
#include <asm/system.h>
|
|
#include <asm/vsyscall.h>
|
|
-#include <asm/smp.h>
|
|
+#include <asm/cpu.h>
|
|
#include <asm/desc.h>
|
|
#include <asm/dma.h>
|
|
#include <asm/iommu.h>
|
|
@@ -97,7 +98,6 @@
|
|
#include <asm/mmu_context.h>
|
|
#include <asm/proto.h>
|
|
|
|
-#include <mach_apic.h>
|
|
#include <asm/paravirt.h>
|
|
#include <asm/hypervisor.h>
|
|
|
|
@@ -118,9 +118,6 @@
|
|
#include <xen/firmware.h>
|
|
#include <xen/xencons.h>
|
|
|
|
-shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
|
|
-EXPORT_SYMBOL(HYPERVISOR_shared_info);
|
|
-
|
|
static int xen_panic_event(struct notifier_block *, unsigned long, void *);
|
|
static struct notifier_block xen_panic_block = {
|
|
xen_panic_event, NULL, 0 /* try to go last */
|
|
@@ -145,7 +142,26 @@ EXPORT_SYMBOL(xen_start_info);
|
|
#define ARCH_SETUP
|
|
#endif
|
|
|
|
+RESERVE_BRK(dmi_alloc, 65536);
|
|
+
|
|
+unsigned int boot_cpu_id __read_mostly;
|
|
+
|
|
+static __initdata unsigned long _brk_start = (unsigned long)__brk_base;
|
|
+unsigned long _brk_end = (unsigned long)__brk_base;
|
|
+
|
|
#ifndef CONFIG_XEN
|
|
+#ifdef CONFIG_X86_64
|
|
+int default_cpu_present_to_apicid(int mps_cpu)
|
|
+{
|
|
+ return __default_cpu_present_to_apicid(mps_cpu);
|
|
+}
|
|
+
|
|
+int default_check_phys_apicid_present(int boot_cpu_physical_apicid)
|
|
+{
|
|
+ return __default_check_phys_apicid_present(boot_cpu_physical_apicid);
|
|
+}
|
|
+#endif
|
|
+
|
|
#ifndef CONFIG_DEBUG_BOOT_PARAMS
|
|
struct boot_params __initdata boot_params;
|
|
#else
|
|
@@ -179,14 +195,6 @@ static struct resource bss_resource = {
|
|
|
|
|
|
#ifdef CONFIG_X86_32
|
|
-#ifndef CONFIG_XEN
|
|
-/* This value is set up by the early boot code to point to the value
|
|
- immediately after the boot time page tables. It contains a *physical*
|
|
- address, and must not be in the .bss segment! */
|
|
-unsigned long init_pg_tables_start __initdata = ~0UL;
|
|
-unsigned long init_pg_tables_end __initdata = ~0UL;
|
|
-#endif
|
|
-
|
|
static struct resource video_ram_resource = {
|
|
.name = "Video RAM area",
|
|
.start = 0xa0000,
|
|
@@ -226,7 +234,9 @@ struct ist_info ist_info;
|
|
#endif
|
|
|
|
#else
|
|
-struct cpuinfo_x86 boot_cpu_data __read_mostly;
|
|
+struct cpuinfo_x86 boot_cpu_data __read_mostly = {
|
|
+ .x86_phys_bits = MAX_PHYSMEM_BITS,
|
|
+};
|
|
EXPORT_SYMBOL(boot_cpu_data);
|
|
#endif
|
|
|
|
@@ -241,12 +251,6 @@ unsigned long mmu_cr4_features = X86_CR4
|
|
int bootloader_type;
|
|
|
|
/*
|
|
- * Early DMI memory
|
|
- */
|
|
-int dmi_alloc_index;
|
|
-char dmi_alloc_data[DMI_MAX_DATA];
|
|
-
|
|
-/*
|
|
* Setup options
|
|
*/
|
|
struct screen_info screen_info;
|
|
@@ -293,6 +297,35 @@ static inline void copy_edd(void)
|
|
}
|
|
#endif
|
|
|
|
+void * __init extend_brk(size_t size, size_t align)
|
|
+{
|
|
+ size_t mask = align - 1;
|
|
+ void *ret;
|
|
+
|
|
+ BUG_ON(_brk_start == 0);
|
|
+ BUG_ON(align & mask);
|
|
+
|
|
+ _brk_end = (_brk_end + mask) & ~mask;
|
|
+ BUG_ON((char *)(_brk_end + size) > __brk_limit);
|
|
+
|
|
+ ret = (void *)_brk_end;
|
|
+ _brk_end += size;
|
|
+
|
|
+ memset(ret, 0, size);
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static void __init reserve_brk(void)
|
|
+{
|
|
+ if (_brk_end > _brk_start)
|
|
+ reserve_early(__pa(_brk_start), __pa(_brk_end), "BRK");
|
|
+
|
|
+ /* Mark brk area as locked down and no longer taking any
|
|
+ new allocations */
|
|
+ _brk_start = 0;
|
|
+}
|
|
+
|
|
#ifdef CONFIG_BLK_DEV_INITRD
|
|
|
|
#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
|
|
@@ -653,24 +686,7 @@ static int __init setup_elfcorehdr(char
|
|
early_param("elfcorehdr", setup_elfcorehdr);
|
|
#endif
|
|
|
|
-#ifndef CONFIG_XEN
|
|
-static int __init default_update_genapic(void)
|
|
-{
|
|
-#ifdef CONFIG_X86_SMP
|
|
-# if defined(CONFIG_X86_GENERICARCH) || defined(CONFIG_X86_64)
|
|
- genapic->wakeup_cpu = wakeup_secondary_cpu_via_init;
|
|
-# endif
|
|
-#endif
|
|
-
|
|
- return 0;
|
|
-}
|
|
-#else
|
|
-#define default_update_genapic NULL
|
|
-#endif
|
|
-
|
|
-static struct x86_quirks default_x86_quirks __initdata = {
|
|
- .update_genapic = default_update_genapic,
|
|
-};
|
|
+static struct x86_quirks default_x86_quirks __initdata;
|
|
|
|
struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
|
|
|
|
@@ -738,19 +754,11 @@ void __init setup_arch(char **cmdline_p)
|
|
|
|
/* Register a call for panic conditions. */
|
|
atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block);
|
|
-
|
|
- WARN_ON(HYPERVISOR_vm_assist(VMASST_CMD_enable,
|
|
- VMASST_TYPE_writable_pagetables));
|
|
-#ifdef CONFIG_X86_32
|
|
- WARN_ON(HYPERVISOR_vm_assist(VMASST_CMD_enable,
|
|
- VMASST_TYPE_4gb_segments));
|
|
-#endif
|
|
#endif /* CONFIG_XEN */
|
|
|
|
#ifdef CONFIG_X86_32
|
|
memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
|
|
visws_early_detect();
|
|
- pre_setup_arch_hook();
|
|
#else
|
|
printk(KERN_INFO "Command line: %s\n", boot_command_line);
|
|
#endif
|
|
@@ -834,16 +842,7 @@ void __init setup_arch(char **cmdline_p)
|
|
init_mm.start_code = (unsigned long) _text;
|
|
init_mm.end_code = (unsigned long) _etext;
|
|
init_mm.end_data = (unsigned long) _edata;
|
|
-#ifdef CONFIG_X86_32
|
|
-#ifndef CONFIG_XEN
|
|
- init_mm.brk = init_pg_tables_end + PAGE_OFFSET;
|
|
-#else
|
|
- init_mm.brk = (PFN_UP(__pa(xen_start_info->pt_base)) +
|
|
- xen_start_info->nr_pt_frames) << PAGE_SHIFT;
|
|
-#endif
|
|
-#else
|
|
- init_mm.brk = (unsigned long) &_end;
|
|
-#endif
|
|
+ init_mm.brk = _brk_end;
|
|
|
|
code_resource.start = virt_to_phys(_text);
|
|
code_resource.end = virt_to_phys(_etext)-1;
|
|
@@ -956,9 +955,8 @@ void __init setup_arch(char **cmdline_p)
|
|
num_physpages = max_pfn;
|
|
max_mapnr = max_pfn;
|
|
|
|
-#ifndef CONFIG_XEN
|
|
- if (cpu_has_x2apic)
|
|
- check_x2apic();
|
|
+#ifdef CONFIG_X86_LOCAL_APIC
|
|
+ check_x2apic();
|
|
#endif
|
|
|
|
/* How many end-of-memory variables you have, grandma! */
|
|
@@ -975,6 +973,8 @@ void __init setup_arch(char **cmdline_p)
|
|
setup_bios_corruption_check();
|
|
#endif
|
|
|
|
+ reserve_brk();
|
|
+
|
|
/* max_pfn_mapped is updated here */
|
|
max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
|
|
max_pfn_mapped = max_low_pfn_mapped;
|
|
@@ -999,7 +999,7 @@ void __init setup_arch(char **cmdline_p)
|
|
|
|
reserve_initrd();
|
|
|
|
-#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
|
|
+#ifndef CONFIG_XEN
|
|
vsmp_init();
|
|
#endif
|
|
|
|
@@ -1034,12 +1034,11 @@ void __init setup_arch(char **cmdline_p)
|
|
*/
|
|
acpi_reserve_bootmem();
|
|
#endif
|
|
-#ifdef CONFIG_X86_FIND_SMP_CONFIG
|
|
/*
|
|
* Find and reserve possible boot-time SMP configuration:
|
|
*/
|
|
find_smp_config();
|
|
-#endif
|
|
+
|
|
reserve_crashkernel();
|
|
|
|
#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
|
|
@@ -1140,13 +1139,9 @@ void __init setup_arch(char **cmdline_p)
|
|
for (i = 0; i < MAX_DMA_CHANNELS; ++i)
|
|
if (i != 4 && request_dma(i, "xen") != 0)
|
|
BUG();
|
|
-#endif /* CONFIG_XEN */
|
|
-
|
|
-#ifdef CONFIG_X86_GENERICARCH
|
|
+#else /* CONFIG_XEN */
|
|
generic_apic_probe();
|
|
-#endif
|
|
|
|
-#ifndef CONFIG_XEN
|
|
early_quirks();
|
|
#endif
|
|
|
|
@@ -1221,6 +1216,98 @@ void __init setup_arch(char **cmdline_p)
|
|
#endif /* CONFIG_XEN */
|
|
}
|
|
|
|
+#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
|
|
+
|
|
+/**
|
|
+ * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors
|
|
+ *
|
|
+ * Description:
|
|
+ * Perform any necessary interrupt initialisation prior to setting up
|
|
+ * the "ordinary" interrupt call gates. For legacy reasons, the ISA
|
|
+ * interrupts should be initialised here if the machine emulates a PC
|
|
+ * in any way.
|
|
+ **/
|
|
+void __init x86_quirk_pre_intr_init(void)
|
|
+{
|
|
+ if (x86_quirks->arch_pre_intr_init) {
|
|
+ if (x86_quirks->arch_pre_intr_init())
|
|
+ return;
|
|
+ }
|
|
+ init_ISA_irqs();
|
|
+}
|
|
+
|
|
+/**
|
|
+ * x86_quirk_intr_init - post gate setup interrupt initialisation
|
|
+ *
|
|
+ * Description:
|
|
+ * Fill in any interrupts that may have been left out by the general
|
|
+ * init_IRQ() routine. interrupts having to do with the machine rather
|
|
+ * than the devices on the I/O bus (like APIC interrupts in intel MP
|
|
+ * systems) are started here.
|
|
+ **/
|
|
+void __init x86_quirk_intr_init(void)
|
|
+{
|
|
+ if (x86_quirks->arch_intr_init) {
|
|
+ if (x86_quirks->arch_intr_init())
|
|
+ return;
|
|
+ }
|
|
+}
|
|
+
|
|
+/**
|
|
+ * x86_quirk_trap_init - initialise system specific traps
|
|
+ *
|
|
+ * Description:
|
|
+ * Called as the final act of trap_init(). Used in VISWS to initialise
|
|
+ * the various board specific APIC traps.
|
|
+ **/
|
|
+void __init x86_quirk_trap_init(void)
|
|
+{
|
|
+ if (x86_quirks->arch_trap_init) {
|
|
+ if (x86_quirks->arch_trap_init())
|
|
+ return;
|
|
+ }
|
|
+}
|
|
+
|
|
+static struct irqaction irq0 = {
|
|
+ .handler = timer_interrupt,
|
|
+ .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER,
|
|
+ .name = "timer"
|
|
+};
|
|
+
|
|
+/**
|
|
+ * x86_quirk_pre_time_init - do any specific initialisations before.
|
|
+ *
|
|
+ **/
|
|
+void __init x86_quirk_pre_time_init(void)
|
|
+{
|
|
+ if (x86_quirks->arch_pre_time_init)
|
|
+ x86_quirks->arch_pre_time_init();
|
|
+}
|
|
+
|
|
+/**
|
|
+ * x86_quirk_time_init - do any specific initialisations for the system timer.
|
|
+ *
|
|
+ * Description:
|
|
+ * Must plug the system timer interrupt source at HZ into the IRQ listed
|
|
+ * in irq_vectors.h:TIMER_IRQ
|
|
+ **/
|
|
+void __init x86_quirk_time_init(void)
|
|
+{
|
|
+ if (x86_quirks->arch_time_init) {
|
|
+ /*
|
|
+ * A nonzero return code does not mean failure, it means
|
|
+ * that the architecture quirk does not want any
|
|
+ * generic (timer) setup to be performed after this:
|
|
+ */
|
|
+ if (x86_quirks->arch_time_init())
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ irq0.mask = cpumask_of_cpu(0);
|
|
+ setup_irq(0, &irq0);
|
|
+}
|
|
+#endif /* CONFIG_X86_32 */
|
|
+
|
|
#ifdef CONFIG_XEN
|
|
static int
|
|
xen_panic_event(struct notifier_block *this, unsigned long event, void *ptr)
|
|
--- head-2010-05-25.orig/arch/x86/kernel/setup_percpu.c 2010-05-25 09:12:09.000000000 +0200
|
|
+++ head-2010-05-25/arch/x86/kernel/setup_percpu.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -231,7 +231,7 @@ void __init setup_per_cpu_areas(void)
|
|
* are zeroed indicating that the static arrays are
|
|
* gone.
|
|
*/
|
|
-#ifdef CONFIG_X86_LOCAL_APIC
|
|
+#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_XEN)
|
|
per_cpu(x86_cpu_to_apicid, cpu) =
|
|
early_per_cpu_map(x86_cpu_to_apicid, cpu);
|
|
per_cpu(x86_bios_cpu_apicid, cpu) =
|
|
@@ -255,7 +255,7 @@ void __init setup_per_cpu_areas(void)
|
|
}
|
|
|
|
/* indicate the early static arrays will soon be gone */
|
|
-#ifdef CONFIG_X86_LOCAL_APIC
|
|
+#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_XEN)
|
|
early_per_cpu_ptr(x86_cpu_to_apicid) = NULL;
|
|
early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL;
|
|
#endif
|
|
--- head-2010-05-25.orig/arch/x86/kernel/smp-xen.c 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/smp-xen.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -2,7 +2,7 @@
|
|
* Intel SMP support routines.
|
|
*
|
|
* (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk>
|
|
- * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com>
|
|
+ * (c) 1998-99, 2000, 2009 Ingo Molnar <mingo@redhat.com>
|
|
* (c) 2002,2003 Andi Kleen, SuSE Labs.
|
|
*
|
|
* i386 and x86_64 integration by Glauber Costa <gcosta@redhat.com>
|
|
@@ -26,7 +26,7 @@
|
|
#include <asm/tlbflush.h>
|
|
#include <asm/mmu_context.h>
|
|
#include <asm/proto.h>
|
|
-#include <mach_ipi.h>
|
|
+#include <asm/ipi.h>
|
|
#include <xen/evtchn.h>
|
|
/*
|
|
* Some notes on x86 processor bugs affecting SMP operation:
|
|
@@ -118,17 +118,17 @@ void xen_smp_send_reschedule(int cpu)
|
|
WARN_ON(1);
|
|
return;
|
|
}
|
|
- send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR);
|
|
+ xen_send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR);
|
|
}
|
|
|
|
void xen_send_call_func_single_ipi(int cpu)
|
|
{
|
|
- send_IPI_mask(cpumask_of(cpu), CALL_FUNC_SINGLE_VECTOR);
|
|
+ xen_send_IPI_mask(cpumask_of(cpu), CALL_FUNC_SINGLE_VECTOR);
|
|
}
|
|
|
|
void xen_send_call_func_ipi(const struct cpumask *mask)
|
|
{
|
|
- send_IPI_mask_allbutself(mask, CALL_FUNCTION_VECTOR);
|
|
+ xen_send_IPI_mask_allbutself(mask, CALL_FUNCTION_VECTOR);
|
|
}
|
|
|
|
/*
|
|
--- head-2010-05-25.orig/arch/x86/kernel/time-xen.c 2010-05-11 17:14:09.000000000 +0200
|
|
+++ head-2010-05-25/arch/x86/kernel/time-xen.c 2010-05-12 09:02:08.000000000 +0200
|
|
@@ -554,7 +554,8 @@ irqreturn_t timer_interrupt(int irq, voi
|
|
ct = jiffies_to_cputime(delta_cpu);
|
|
if (user_mode_vm(get_irq_regs()))
|
|
account_user_time(current, ct, cputime_to_scaled(ct));
|
|
- else if (current != idle_task(cpu))
|
|
+ else if (current != idle_task(cpu)
|
|
+ || irq_count() != HARDIRQ_OFFSET)
|
|
account_system_time(current, HARDIRQ_OFFSET,
|
|
ct, cputime_to_scaled(ct));
|
|
else
|
|
@@ -587,7 +588,7 @@ EXPORT_SYMBOL_GPL(mark_tsc_unstable);
|
|
|
|
static cycle_t cs_last;
|
|
|
|
-static cycle_t xen_clocksource_read(void)
|
|
+static cycle_t xen_clocksource_read(struct clocksource *cs)
|
|
{
|
|
#ifdef CONFIG_SMP
|
|
cycle_t last = get64(&cs_last);
|
|
--- head-2010-05-25.orig/arch/x86/kernel/traps-xen.c 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/traps-xen.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -54,15 +54,14 @@
|
|
#include <asm/desc.h>
|
|
#include <asm/i387.h>
|
|
|
|
-#include <mach_traps.h>
|
|
+#include <asm/mach_traps.h>
|
|
|
|
#ifdef CONFIG_X86_64
|
|
#include <asm/pgalloc.h>
|
|
#include <asm/proto.h>
|
|
-#include <asm/pda.h>
|
|
#else
|
|
#include <asm/processor-flags.h>
|
|
-#include <asm/arch_hooks.h>
|
|
+#include <asm/setup.h>
|
|
#include <asm/traps.h>
|
|
|
|
#include "cpu/mcheck/mce.h"
|
|
@@ -123,49 +122,6 @@ die_if_kernel(const char *str, struct pt
|
|
if (!user_mode_vm(regs))
|
|
die(str, regs, err);
|
|
}
|
|
-
|
|
-/*
|
|
- * Perform the lazy TSS's I/O bitmap copy. If the TSS has an
|
|
- * invalid offset set (the LAZY one) and the faulting thread has
|
|
- * a valid I/O bitmap pointer, we copy the I/O bitmap in the TSS,
|
|
- * we set the offset field correctly and return 1.
|
|
- */
|
|
-static int lazy_iobitmap_copy(void)
|
|
-{
|
|
-#ifndef CONFIG_XEN
|
|
- struct thread_struct *thread;
|
|
- struct tss_struct *tss;
|
|
- int cpu;
|
|
-
|
|
- cpu = get_cpu();
|
|
- tss = &per_cpu(init_tss, cpu);
|
|
- thread = ¤t->thread;
|
|
-
|
|
- if (tss->x86_tss.io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY &&
|
|
- thread->io_bitmap_ptr) {
|
|
- memcpy(tss->io_bitmap, thread->io_bitmap_ptr,
|
|
- thread->io_bitmap_max);
|
|
- /*
|
|
- * If the previously set map was extending to higher ports
|
|
- * than the current one, pad extra space with 0xff (no access).
|
|
- */
|
|
- if (thread->io_bitmap_max < tss->io_bitmap_max) {
|
|
- memset((char *) tss->io_bitmap +
|
|
- thread->io_bitmap_max, 0xff,
|
|
- tss->io_bitmap_max - thread->io_bitmap_max);
|
|
- }
|
|
- tss->io_bitmap_max = thread->io_bitmap_max;
|
|
- tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
|
|
- tss->io_bitmap_owner = thread;
|
|
- put_cpu();
|
|
-
|
|
- return 1;
|
|
- }
|
|
- put_cpu();
|
|
-#endif
|
|
-
|
|
- return 0;
|
|
-}
|
|
#endif
|
|
|
|
static void __kprobes
|
|
@@ -316,11 +272,6 @@ do_general_protection(struct pt_regs *re
|
|
conditional_sti(regs);
|
|
|
|
#ifdef CONFIG_X86_32
|
|
- if (lazy_iobitmap_copy()) {
|
|
- /* restart the faulting instruction */
|
|
- return;
|
|
- }
|
|
-
|
|
if (regs->flags & X86_VM_MASK)
|
|
goto gp_in_vm86;
|
|
#endif
|
|
@@ -911,19 +862,20 @@ void math_emulate(struct math_emu_info *
|
|
}
|
|
#endif /* CONFIG_MATH_EMULATION */
|
|
|
|
-dotraplinkage void __kprobes do_device_not_available(struct pt_regs regs)
|
|
+dotraplinkage void __kprobes
|
|
+do_device_not_available(struct pt_regs *regs, long error_code)
|
|
{
|
|
#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
|
|
if (read_cr0() & X86_CR0_EM) {
|
|
struct math_emu_info info = { };
|
|
|
|
- conditional_sti(®s);
|
|
+ conditional_sti(regs);
|
|
|
|
- info.regs = ®s;
|
|
+ info.regs = regs;
|
|
math_emulate(&info);
|
|
} else {
|
|
math_state_restore(); /* interrupts still off */
|
|
- conditional_sti(®s);
|
|
+ conditional_sti(regs);
|
|
}
|
|
#else
|
|
math_state_restore();
|
|
@@ -939,7 +891,7 @@ dotraplinkage void do_iret_error(struct
|
|
info.si_signo = SIGILL;
|
|
info.si_errno = 0;
|
|
info.si_code = ILL_BADSTK;
|
|
- info.si_addr = 0;
|
|
+ info.si_addr = NULL;
|
|
if (notify_die(DIE_TRAP, "iret exception",
|
|
regs, error_code, 32, SIGILL) == NOTIFY_STOP)
|
|
return;
|
|
--- head-2010-05-25.orig/arch/x86/kernel/vmlinux.lds.S 2010-03-24 15:08:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/vmlinux.lds.S 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -16,8 +16,10 @@
|
|
|
|
#ifdef CONFIG_X86_32
|
|
#define LOAD_OFFSET __PAGE_OFFSET
|
|
-#else
|
|
+#elif !defined(CONFIG_XEN) || CONFIG_XEN_COMPAT > 0x030002
|
|
#define LOAD_OFFSET __START_KERNEL_map
|
|
+#else
|
|
+#define LOAD_OFFSET 0
|
|
#endif
|
|
|
|
#include <asm-generic/vmlinux.lds.h>
|
|
--- head-2010-05-25.orig/arch/x86/mach-xen/Makefile 2007-06-12 13:12:48.000000000 +0200
|
|
+++ /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
@@ -1,5 +0,0 @@
|
|
-#
|
|
-# Makefile for the linux kernel.
|
|
-#
|
|
-
|
|
-obj-y := setup.o
|
|
--- head-2010-05-25.orig/arch/x86/mach-xen/setup.c 2010-03-24 15:12:46.000000000 +0100
|
|
+++ /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
@@ -1,190 +0,0 @@
|
|
-/*
|
|
- * Machine specific setup for generic
|
|
- */
|
|
-
|
|
-#include <linux/mm.h>
|
|
-#include <linux/smp.h>
|
|
-#include <linux/init.h>
|
|
-#include <linux/interrupt.h>
|
|
-#include <linux/module.h>
|
|
-#include <asm/acpi.h>
|
|
-#include <asm/arch_hooks.h>
|
|
-#include <asm/e820.h>
|
|
-#include <asm/setup.h>
|
|
-#include <asm/fixmap.h>
|
|
-#include <asm/pgtable.h>
|
|
-
|
|
-#include <xen/interface/callback.h>
|
|
-#include <xen/interface/memory.h>
|
|
-
|
|
-#ifdef CONFIG_X86_32
|
|
-
|
|
-#ifdef CONFIG_HOTPLUG_CPU
|
|
-#define DEFAULT_SEND_IPI (1)
|
|
-#else
|
|
-#define DEFAULT_SEND_IPI (0)
|
|
-#endif
|
|
-
|
|
-int no_broadcast=DEFAULT_SEND_IPI;
|
|
-
|
|
-static __init int no_ipi_broadcast(char *str)
|
|
-{
|
|
- get_option(&str, &no_broadcast);
|
|
- printk ("Using %s mode\n", no_broadcast ? "No IPI Broadcast" :
|
|
- "IPI Broadcast");
|
|
- return 1;
|
|
-}
|
|
-
|
|
-__setup("no_ipi_broadcast", no_ipi_broadcast);
|
|
-
|
|
-static int __init print_ipi_mode(void)
|
|
-{
|
|
- printk ("Using IPI %s mode\n", no_broadcast ? "No-Shortcut" :
|
|
- "Shortcut");
|
|
- return 0;
|
|
-}
|
|
-
|
|
-late_initcall(print_ipi_mode);
|
|
-
|
|
-unsigned long *machine_to_phys_mapping = (void *)MACH2PHYS_VIRT_START;
|
|
-EXPORT_SYMBOL(machine_to_phys_mapping);
|
|
-unsigned int machine_to_phys_order;
|
|
-EXPORT_SYMBOL(machine_to_phys_order);
|
|
-
|
|
-void __init pre_setup_arch_hook(void)
|
|
-{
|
|
- struct xen_machphys_mapping mapping;
|
|
- unsigned long machine_to_phys_nr_ents;
|
|
- struct xen_platform_parameters pp;
|
|
-
|
|
- init_mm.pgd = swapper_pg_dir = (pgd_t *)xen_start_info->pt_base;
|
|
-
|
|
- xen_setup_features();
|
|
-
|
|
- if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0) {
|
|
- hypervisor_virt_start = pp.virt_start;
|
|
- reserve_top_address(0UL - pp.virt_start);
|
|
- }
|
|
-
|
|
- if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) {
|
|
- machine_to_phys_mapping = (unsigned long *)mapping.v_start;
|
|
- machine_to_phys_nr_ents = mapping.max_mfn + 1;
|
|
- } else
|
|
- machine_to_phys_nr_ents = MACH2PHYS_NR_ENTRIES;
|
|
- machine_to_phys_order = fls(machine_to_phys_nr_ents - 1);
|
|
-
|
|
- if (!xen_feature(XENFEAT_auto_translated_physmap))
|
|
- phys_to_machine_mapping =
|
|
- (unsigned long *)xen_start_info->mfn_list;
|
|
-}
|
|
-
|
|
-#endif /* CONFIG_X86_32 */
|
|
-
|
|
-extern void hypervisor_callback(void);
|
|
-extern void failsafe_callback(void);
|
|
-extern void nmi(void);
|
|
-
|
|
-#ifdef CONFIG_X86_64
|
|
-#include <asm/proto.h>
|
|
-#define CALLBACK_ADDR(fn) ((unsigned long)(fn))
|
|
-#else
|
|
-#define CALLBACK_ADDR(fn) { __KERNEL_CS, (unsigned long)(fn) }
|
|
-#endif
|
|
-
|
|
-void __init machine_specific_arch_setup(void)
|
|
-{
|
|
- int ret;
|
|
- static struct callback_register __initdata event = {
|
|
- .type = CALLBACKTYPE_event,
|
|
- .address = CALLBACK_ADDR(hypervisor_callback)
|
|
- };
|
|
- static struct callback_register __initdata failsafe = {
|
|
- .type = CALLBACKTYPE_failsafe,
|
|
- .address = CALLBACK_ADDR(failsafe_callback)
|
|
- };
|
|
-#ifdef CONFIG_X86_64
|
|
- static struct callback_register __initdata syscall = {
|
|
- .type = CALLBACKTYPE_syscall,
|
|
- .address = CALLBACK_ADDR(system_call)
|
|
- };
|
|
-#endif
|
|
-#if defined(CONFIG_X86_LOCAL_APIC) || defined(CONFIG_X86_32)
|
|
- static struct callback_register __initdata nmi_cb = {
|
|
- .type = CALLBACKTYPE_nmi,
|
|
- .address = CALLBACK_ADDR(nmi)
|
|
- };
|
|
-#endif
|
|
-
|
|
- ret = HYPERVISOR_callback_op(CALLBACKOP_register, &event);
|
|
- if (ret == 0)
|
|
- ret = HYPERVISOR_callback_op(CALLBACKOP_register, &failsafe);
|
|
-#ifdef CONFIG_X86_64
|
|
- if (ret == 0)
|
|
- ret = HYPERVISOR_callback_op(CALLBACKOP_register, &syscall);
|
|
-#endif
|
|
-#if CONFIG_XEN_COMPAT <= 0x030002
|
|
-#ifdef CONFIG_X86_32
|
|
- if (ret == -ENOSYS)
|
|
- ret = HYPERVISOR_set_callbacks(
|
|
- event.address.cs, event.address.eip,
|
|
- failsafe.address.cs, failsafe.address.eip);
|
|
-#else
|
|
- ret = HYPERVISOR_set_callbacks(
|
|
- event.address,
|
|
- failsafe.address,
|
|
- syscall.address);
|
|
-#endif
|
|
-#endif
|
|
- BUG_ON(ret);
|
|
-
|
|
-#if defined(CONFIG_X86_LOCAL_APIC) || defined(CONFIG_X86_32)
|
|
- ret = HYPERVISOR_callback_op(CALLBACKOP_register, &nmi_cb);
|
|
-#if CONFIG_XEN_COMPAT <= 0x030002
|
|
- if (ret == -ENOSYS) {
|
|
- static struct xennmi_callback __initdata cb = {
|
|
- .handler_address = (unsigned long)nmi
|
|
- };
|
|
-
|
|
- HYPERVISOR_nmi_op(XENNMI_register_callback, &cb);
|
|
- }
|
|
-#endif
|
|
-#endif
|
|
-
|
|
-#ifdef CONFIG_X86_32
|
|
- /* Do an early initialization of the fixmap area */
|
|
- {
|
|
- extern pte_t swapper_pg_fixmap[PTRS_PER_PTE];
|
|
- unsigned long addr = __fix_to_virt(FIX_EARLYCON_MEM_BASE);
|
|
- pud_t *pud = pud_offset(swapper_pg_dir + pgd_index(addr), addr);
|
|
- pmd_t *pmd = pmd_offset(pud, addr);
|
|
- unsigned int i;
|
|
-
|
|
- make_lowmem_page_readonly(swapper_pg_fixmap, XENFEAT_writable_page_tables);
|
|
- set_pmd(pmd, __pmd(__pa_symbol(swapper_pg_fixmap) | _PAGE_TABLE));
|
|
-
|
|
-#define __FIXADDR_TOP (-PAGE_SIZE)
|
|
-#define FIX_BUG_ON(fix) BUILD_BUG_ON(pmd_index(__fix_to_virt(FIX_##fix)) \
|
|
- != pmd_index(__fix_to_virt(FIX_EARLYCON_MEM_BASE)))
|
|
- FIX_BUG_ON(SHARED_INFO);
|
|
- FIX_BUG_ON(ISAMAP_BEGIN);
|
|
- FIX_BUG_ON(ISAMAP_END);
|
|
-#undef __FIXADDR_TOP
|
|
- BUG_ON(pte_index(hypervisor_virt_start));
|
|
-
|
|
- /* Switch to the real shared_info page, and clear the
|
|
- * dummy page. */
|
|
- set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
|
|
- HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
|
|
- memset(empty_zero_page, 0, sizeof(empty_zero_page));
|
|
-
|
|
- /* Setup mapping of lower 1st MB */
|
|
- for (i = 0; i < NR_FIX_ISAMAPS; i++)
|
|
- if (is_initial_xendomain())
|
|
- set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE);
|
|
- else
|
|
- __set_fixmap(FIX_ISAMAP_BEGIN - i,
|
|
- virt_to_machine(empty_zero_page),
|
|
- PAGE_KERNEL_RO);
|
|
- }
|
|
-#endif
|
|
-}
|
|
--- head-2010-05-25.orig/arch/x86/mm/Makefile 2010-03-24 15:01:37.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/mm/Makefile 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -26,5 +26,6 @@ obj-$(CONFIG_K8_NUMA) += k8topology_64.
|
|
obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o
|
|
|
|
obj-$(CONFIG_XEN) += hypervisor.o
|
|
+disabled-obj-$(CONFIG_XEN) := tlb.o
|
|
|
|
obj-$(CONFIG_MEMTEST) += memtest.o
|
|
--- head-2010-05-25.orig/arch/x86/mm/fault-xen.c 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/mm/fault-xen.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -1,73 +1,79 @@
|
|
/*
|
|
* Copyright (C) 1995 Linus Torvalds
|
|
- * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs.
|
|
+ * Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs.
|
|
+ * Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar
|
|
*/
|
|
-
|
|
-#include <linux/signal.h>
|
|
-#include <linux/sched.h>
|
|
-#include <linux/kernel.h>
|
|
-#include <linux/errno.h>
|
|
-#include <linux/string.h>
|
|
-#include <linux/types.h>
|
|
-#include <linux/ptrace.h>
|
|
-#include <linux/mmiotrace.h>
|
|
-#include <linux/mman.h>
|
|
-#include <linux/mm.h>
|
|
-#include <linux/smp.h>
|
|
#include <linux/interrupt.h>
|
|
-#include <linux/init.h>
|
|
-#include <linux/tty.h>
|
|
-#include <linux/vt_kern.h> /* For unblank_screen() */
|
|
+#include <linux/mmiotrace.h>
|
|
+#include <linux/bootmem.h>
|
|
#include <linux/compiler.h>
|
|
#include <linux/highmem.h>
|
|
-#include <linux/bootmem.h> /* for max_low_pfn */
|
|
-#include <linux/vmalloc.h>
|
|
-#include <linux/module.h>
|
|
#include <linux/kprobes.h>
|
|
#include <linux/uaccess.h>
|
|
+#include <linux/vmalloc.h>
|
|
+#include <linux/vt_kern.h>
|
|
+#include <linux/signal.h>
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/ptrace.h>
|
|
+#include <linux/string.h>
|
|
+#include <linux/module.h>
|
|
#include <linux/kdebug.h>
|
|
+#include <linux/errno.h>
|
|
+#include <linux/magic.h>
|
|
+#include <linux/sched.h>
|
|
+#include <linux/types.h>
|
|
+#include <linux/init.h>
|
|
+#include <linux/mman.h>
|
|
+#include <linux/tty.h>
|
|
+#include <linux/smp.h>
|
|
+#include <linux/mm.h>
|
|
+
|
|
+#include <asm-generic/sections.h>
|
|
|
|
-#include <asm/system.h>
|
|
-#include <asm/desc.h>
|
|
-#include <asm/segment.h>
|
|
-#include <asm/pgalloc.h>
|
|
-#include <asm/smp.h>
|
|
#include <asm/tlbflush.h>
|
|
+#include <asm/pgalloc.h>
|
|
+#include <asm/segment.h>
|
|
+#include <asm/system.h>
|
|
#include <asm/proto.h>
|
|
-#include <asm-generic/sections.h>
|
|
#include <asm/traps.h>
|
|
+#include <asm/desc.h>
|
|
|
|
/*
|
|
- * Page fault error code bits
|
|
- * bit 0 == 0 means no page found, 1 means protection fault
|
|
- * bit 1 == 0 means read, 1 means write
|
|
- * bit 2 == 0 means kernel, 1 means user-mode
|
|
- * bit 3 == 1 means use of reserved bit detected
|
|
- * bit 4 == 1 means fault was an instruction fetch
|
|
- */
|
|
-#define PF_PROT (1<<0)
|
|
-#define PF_WRITE (1<<1)
|
|
-#define PF_USER (1<<2)
|
|
-#define PF_RSVD (1<<3)
|
|
-#define PF_INSTR (1<<4)
|
|
+ * Page fault error code bits:
|
|
+ *
|
|
+ * bit 0 == 0: no page found 1: protection fault
|
|
+ * bit 1 == 0: read access 1: write access
|
|
+ * bit 2 == 0: kernel-mode access 1: user-mode access
|
|
+ * bit 3 == 1: use of reserved bit detected
|
|
+ * bit 4 == 1: fault was an instruction fetch
|
|
+ */
|
|
+enum x86_pf_error_code {
|
|
|
|
+ PF_PROT = 1 << 0,
|
|
+ PF_WRITE = 1 << 1,
|
|
+ PF_USER = 1 << 2,
|
|
+ PF_RSVD = 1 << 3,
|
|
+ PF_INSTR = 1 << 4,
|
|
+};
|
|
+
|
|
+/*
|
|
+ * Returns 0 if mmiotrace is disabled, or if the fault is not
|
|
+ * handled by mmiotrace:
|
|
+ */
|
|
static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr)
|
|
{
|
|
-#ifdef CONFIG_MMIOTRACE
|
|
if (unlikely(is_kmmio_active()))
|
|
if (kmmio_handler(regs, addr) == 1)
|
|
return -1;
|
|
-#endif
|
|
return 0;
|
|
}
|
|
|
|
static inline int notify_page_fault(struct pt_regs *regs)
|
|
{
|
|
-#ifdef CONFIG_KPROBES
|
|
int ret = 0;
|
|
|
|
/* kprobe_running() needs smp_processor_id() */
|
|
- if (!user_mode_vm(regs)) {
|
|
+ if (kprobes_built_in() && !user_mode_vm(regs)) {
|
|
preempt_disable();
|
|
if (kprobe_running() && kprobe_fault_handler(regs, 14))
|
|
ret = 1;
|
|
@@ -75,29 +81,76 @@ static inline int notify_page_fault(stru
|
|
}
|
|
|
|
return ret;
|
|
-#else
|
|
- return 0;
|
|
-#endif
|
|
}
|
|
|
|
/*
|
|
- * X86_32
|
|
- * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
|
|
- * Check that here and ignore it.
|
|
- *
|
|
- * X86_64
|
|
- * Sometimes the CPU reports invalid exceptions on prefetch.
|
|
- * Check that here and ignore it.
|
|
+ * Prefetch quirks:
|
|
+ *
|
|
+ * 32-bit mode:
|
|
*
|
|
- * Opcode checker based on code by Richard Brunner
|
|
+ * Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
|
|
+ * Check that here and ignore it.
|
|
+ *
|
|
+ * 64-bit mode:
|
|
+ *
|
|
+ * Sometimes the CPU reports invalid exceptions on prefetch.
|
|
+ * Check that here and ignore it.
|
|
+ *
|
|
+ * Opcode checker based on code by Richard Brunner.
|
|
*/
|
|
-static int is_prefetch(struct pt_regs *regs, unsigned long addr,
|
|
- unsigned long error_code)
|
|
+static inline int
|
|
+check_prefetch_opcode(struct pt_regs *regs, unsigned char *instr,
|
|
+ unsigned char opcode, int *prefetch)
|
|
{
|
|
+ unsigned char instr_hi = opcode & 0xf0;
|
|
+ unsigned char instr_lo = opcode & 0x0f;
|
|
+
|
|
+ switch (instr_hi) {
|
|
+ case 0x20:
|
|
+ case 0x30:
|
|
+ /*
|
|
+ * Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes.
|
|
+ * In X86_64 long mode, the CPU will signal invalid
|
|
+ * opcode if some of these prefixes are present so
|
|
+ * X86_64 will never get here anyway
|
|
+ */
|
|
+ return ((instr_lo & 7) == 0x6);
|
|
+#ifdef CONFIG_X86_64
|
|
+ case 0x40:
|
|
+ /*
|
|
+ * In AMD64 long mode 0x40..0x4F are valid REX prefixes
|
|
+ * Need to figure out under what instruction mode the
|
|
+ * instruction was issued. Could check the LDT for lm,
|
|
+ * but for now it's good enough to assume that long
|
|
+ * mode only uses well known segments or kernel.
|
|
+ */
|
|
+ return (!user_mode(regs)) || (regs->cs == __USER_CS);
|
|
+#endif
|
|
+ case 0x60:
|
|
+ /* 0x64 thru 0x67 are valid prefixes in all modes. */
|
|
+ return (instr_lo & 0xC) == 0x4;
|
|
+ case 0xF0:
|
|
+ /* 0xF0, 0xF2, 0xF3 are valid prefixes in all modes. */
|
|
+ return !instr_lo || (instr_lo>>1) == 1;
|
|
+ case 0x00:
|
|
+ /* Prefetch instruction is 0x0F0D or 0x0F18 */
|
|
+ if (probe_kernel_address(instr, opcode))
|
|
+ return 0;
|
|
+
|
|
+ *prefetch = (instr_lo == 0xF) &&
|
|
+ (opcode == 0x0D || opcode == 0x18);
|
|
+ return 0;
|
|
+ default:
|
|
+ return 0;
|
|
+ }
|
|
+}
|
|
+
|
|
+static int
|
|
+is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
|
|
+{
|
|
+ unsigned char *max_instr;
|
|
unsigned char *instr;
|
|
- int scan_more = 1;
|
|
int prefetch = 0;
|
|
- unsigned char *max_instr;
|
|
|
|
/*
|
|
* If it was a exec (instruction fetch) fault on NX page, then
|
|
@@ -106,99 +159,174 @@ static int is_prefetch(struct pt_regs *r
|
|
if (error_code & PF_INSTR)
|
|
return 0;
|
|
|
|
- instr = (unsigned char *)convert_ip_to_linear(current, regs);
|
|
+ instr = (void *)convert_ip_to_linear(current, regs);
|
|
max_instr = instr + 15;
|
|
|
|
if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
|
|
return 0;
|
|
|
|
- while (scan_more && instr < max_instr) {
|
|
+ while (instr < max_instr) {
|
|
unsigned char opcode;
|
|
- unsigned char instr_hi;
|
|
- unsigned char instr_lo;
|
|
|
|
if (probe_kernel_address(instr, opcode))
|
|
break;
|
|
|
|
- instr_hi = opcode & 0xf0;
|
|
- instr_lo = opcode & 0x0f;
|
|
instr++;
|
|
|
|
- switch (instr_hi) {
|
|
- case 0x20:
|
|
- case 0x30:
|
|
- /*
|
|
- * Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes.
|
|
- * In X86_64 long mode, the CPU will signal invalid
|
|
- * opcode if some of these prefixes are present so
|
|
- * X86_64 will never get here anyway
|
|
- */
|
|
- scan_more = ((instr_lo & 7) == 0x6);
|
|
- break;
|
|
-#ifdef CONFIG_X86_64
|
|
- case 0x40:
|
|
- /*
|
|
- * In AMD64 long mode 0x40..0x4F are valid REX prefixes
|
|
- * Need to figure out under what instruction mode the
|
|
- * instruction was issued. Could check the LDT for lm,
|
|
- * but for now it's good enough to assume that long
|
|
- * mode only uses well known segments or kernel.
|
|
- */
|
|
- scan_more = (!user_mode(regs)) || (regs->cs == __USER_CS);
|
|
+ if (!check_prefetch_opcode(regs, instr, opcode, &prefetch))
|
|
break;
|
|
+ }
|
|
+ return prefetch;
|
|
+}
|
|
+
|
|
+static void
|
|
+force_sig_info_fault(int si_signo, int si_code, unsigned long address,
|
|
+ struct task_struct *tsk)
|
|
+{
|
|
+ siginfo_t info;
|
|
+
|
|
+ info.si_signo = si_signo;
|
|
+ info.si_errno = 0;
|
|
+ info.si_code = si_code;
|
|
+ info.si_addr = (void __user *)address;
|
|
+
|
|
+ force_sig_info(si_signo, &info, tsk);
|
|
+}
|
|
+
|
|
+DEFINE_SPINLOCK(pgd_lock);
|
|
+LIST_HEAD(pgd_list);
|
|
+
|
|
+#ifdef CONFIG_X86_32
|
|
+static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
|
|
+{
|
|
+ unsigned index = pgd_index(address);
|
|
+ pgd_t *pgd_k;
|
|
+ pud_t *pud, *pud_k;
|
|
+ pmd_t *pmd, *pmd_k;
|
|
+
|
|
+ pgd += index;
|
|
+ pgd_k = init_mm.pgd + index;
|
|
+
|
|
+ if (!pgd_present(*pgd_k))
|
|
+ return NULL;
|
|
+
|
|
+ /*
|
|
+ * set_pgd(pgd, *pgd_k); here would be useless on PAE
|
|
+ * and redundant with the set_pmd() on non-PAE. As would
|
|
+ * set_pud.
|
|
+ */
|
|
+ pud = pud_offset(pgd, address);
|
|
+ pud_k = pud_offset(pgd_k, address);
|
|
+ if (!pud_present(*pud_k))
|
|
+ return NULL;
|
|
+
|
|
+ pmd = pmd_offset(pud, address);
|
|
+ pmd_k = pmd_offset(pud_k, address);
|
|
+ if (!pmd_present(*pmd_k))
|
|
+ return NULL;
|
|
+
|
|
+ if (!pmd_present(*pmd)) {
|
|
+ bool lazy = percpu_read(xen_lazy_mmu);
|
|
+
|
|
+ percpu_write(xen_lazy_mmu, false);
|
|
+#if CONFIG_XEN_COMPAT > 0x030002
|
|
+ set_pmd(pmd, *pmd_k);
|
|
+#else
|
|
+ /*
|
|
+ * When running on older Xen we must launder *pmd_k through
|
|
+ * pmd_val() to ensure that _PAGE_PRESENT is correctly set.
|
|
+ */
|
|
+ set_pmd(pmd, __pmd(pmd_val(*pmd_k)));
|
|
#endif
|
|
- case 0x60:
|
|
- /* 0x64 thru 0x67 are valid prefixes in all modes. */
|
|
- scan_more = (instr_lo & 0xC) == 0x4;
|
|
- break;
|
|
- case 0xF0:
|
|
- /* 0xF0, 0xF2, 0xF3 are valid prefixes in all modes. */
|
|
- scan_more = !instr_lo || (instr_lo>>1) == 1;
|
|
- break;
|
|
- case 0x00:
|
|
- /* Prefetch instruction is 0x0F0D or 0x0F18 */
|
|
- scan_more = 0;
|
|
+ percpu_write(xen_lazy_mmu, lazy);
|
|
+ } else {
|
|
+ BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
|
|
+ }
|
|
+
|
|
+ return pmd_k;
|
|
+}
|
|
+
|
|
+void vmalloc_sync_all(void)
|
|
+{
|
|
+ unsigned long address;
|
|
+
|
|
+ if (SHARED_KERNEL_PMD)
|
|
+ return;
|
|
+
|
|
+ for (address = VMALLOC_START & PMD_MASK;
|
|
+ address >= TASK_SIZE && address < FIXADDR_TOP;
|
|
+ address += PMD_SIZE) {
|
|
+
|
|
+ unsigned long flags;
|
|
+ struct page *page;
|
|
|
|
- if (probe_kernel_address(instr, opcode))
|
|
+ spin_lock_irqsave(&pgd_lock, flags);
|
|
+ list_for_each_entry(page, &pgd_list, lru) {
|
|
+ if (!vmalloc_sync_one(page_address(page), address))
|
|
break;
|
|
- prefetch = (instr_lo == 0xF) &&
|
|
- (opcode == 0x0D || opcode == 0x18);
|
|
- break;
|
|
- default:
|
|
- scan_more = 0;
|
|
- break;
|
|
}
|
|
+ spin_unlock_irqrestore(&pgd_lock, flags);
|
|
}
|
|
- return prefetch;
|
|
}
|
|
|
|
-static void force_sig_info_fault(int si_signo, int si_code,
|
|
- unsigned long address, struct task_struct *tsk)
|
|
+/*
|
|
+ * 32-bit:
|
|
+ *
|
|
+ * Handle a fault on the vmalloc or module mapping area
|
|
+ */
|
|
+static noinline int vmalloc_fault(unsigned long address)
|
|
{
|
|
- siginfo_t info;
|
|
+ unsigned long pgd_paddr;
|
|
+ pmd_t *pmd_k;
|
|
+ pte_t *pte_k;
|
|
|
|
- info.si_signo = si_signo;
|
|
- info.si_errno = 0;
|
|
- info.si_code = si_code;
|
|
- info.si_addr = (void __user *)address;
|
|
- force_sig_info(si_signo, &info, tsk);
|
|
+ /* Make sure we are in vmalloc area: */
|
|
+ if (!(address >= VMALLOC_START && address < VMALLOC_END))
|
|
+ return -1;
|
|
+
|
|
+ /*
|
|
+ * Synchronize this task's top level page-table
|
|
+ * with the 'reference' page table.
|
|
+ *
|
|
+ * Do _not_ use "current" here. We might be inside
|
|
+ * an interrupt in the middle of a task switch..
|
|
+ */
|
|
+ pgd_paddr = read_cr3();
|
|
+ pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
|
|
+ if (!pmd_k)
|
|
+ return -1;
|
|
+
|
|
+ pte_k = pte_offset_kernel(pmd_k, address);
|
|
+ if (!pte_present(*pte_k))
|
|
+ return -1;
|
|
+
|
|
+ return 0;
|
|
}
|
|
|
|
-#ifdef CONFIG_X86_64
|
|
-static int bad_address(void *p)
|
|
+/*
|
|
+ * Did it hit the DOS screen memory VA from vm86 mode?
|
|
+ */
|
|
+static inline void
|
|
+check_v8086_mode(struct pt_regs *regs, unsigned long address,
|
|
+ struct task_struct *tsk)
|
|
{
|
|
- unsigned long dummy;
|
|
- return probe_kernel_address((unsigned long *)p, dummy);
|
|
+ unsigned long bit;
|
|
+
|
|
+ if (!v8086_mode(regs))
|
|
+ return;
|
|
+
|
|
+ bit = (address - 0xA0000) >> PAGE_SHIFT;
|
|
+ if (bit < 32)
|
|
+ tsk->thread.screen_bitmap |= 1 << bit;
|
|
}
|
|
-#endif
|
|
|
|
static void dump_pagetable(unsigned long address)
|
|
{
|
|
-#ifdef CONFIG_X86_32
|
|
__typeof__(pte_val(__pte(0))) page;
|
|
|
|
page = read_cr3();
|
|
page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT];
|
|
+
|
|
#ifdef CONFIG_X86_PAE
|
|
printk("*pdpt = %016Lx ", page);
|
|
if ((page & _PAGE_PRESENT)
|
|
@@ -206,7 +334,7 @@ static void dump_pagetable(unsigned long
|
|
page = mfn_to_pfn(page >> PAGE_SHIFT);
|
|
page <<= PAGE_SHIFT;
|
|
page = ((__typeof__(page) *) __va(page))[(address >> PMD_SHIFT)
|
|
- & (PTRS_PER_PMD - 1)];
|
|
+ & (PTRS_PER_PMD - 1)];
|
|
printk(KERN_CONT "*pde = %016Lx ", page);
|
|
page &= ~_PAGE_NX;
|
|
}
|
|
@@ -218,20 +346,146 @@ static void dump_pagetable(unsigned long
|
|
* We must not directly access the pte in the highpte
|
|
* case if the page table is located in highmem.
|
|
* And let's rather not kmap-atomic the pte, just in case
|
|
- * it's allocated already.
|
|
+ * it's allocated already:
|
|
*/
|
|
if ((page & _PAGE_PRESENT)
|
|
&& mfn_to_local_pfn(page >> PAGE_SHIFT) < max_low_pfn
|
|
&& !(page & _PAGE_PSE)) {
|
|
+
|
|
page = mfn_to_pfn(page >> PAGE_SHIFT);
|
|
page <<= PAGE_SHIFT;
|
|
page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT)
|
|
- & (PTRS_PER_PTE - 1)];
|
|
+ & (PTRS_PER_PTE - 1)];
|
|
printk(KERN_CONT "*pte = %0*Lx ", sizeof(page)*2, (u64)page);
|
|
}
|
|
|
|
printk(KERN_CONT "\n");
|
|
-#else /* CONFIG_X86_64 */
|
|
+}
|
|
+
|
|
+#else /* CONFIG_X86_64: */
|
|
+
|
|
+void vmalloc_sync_all(void)
|
|
+{
|
|
+ unsigned long address;
|
|
+
|
|
+ for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END;
|
|
+ address += PGDIR_SIZE) {
|
|
+
|
|
+ const pgd_t *pgd_ref = pgd_offset_k(address);
|
|
+ unsigned long flags;
|
|
+ struct page *page;
|
|
+
|
|
+ if (pgd_none(*pgd_ref))
|
|
+ continue;
|
|
+
|
|
+ spin_lock_irqsave(&pgd_lock, flags);
|
|
+ list_for_each_entry(page, &pgd_list, lru) {
|
|
+ pgd_t *pgd;
|
|
+ pgd = (pgd_t *)page_address(page) + pgd_index(address);
|
|
+ if (pgd_none(*pgd))
|
|
+ set_pgd(pgd, *pgd_ref);
|
|
+ else
|
|
+ BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
|
|
+ }
|
|
+ spin_unlock_irqrestore(&pgd_lock, flags);
|
|
+ }
|
|
+}
|
|
+
|
|
+/*
|
|
+ * 64-bit:
|
|
+ *
|
|
+ * Handle a fault on the vmalloc area
|
|
+ *
|
|
+ * This assumes no large pages in there.
|
|
+ */
|
|
+static noinline int vmalloc_fault(unsigned long address)
|
|
+{
|
|
+ pgd_t *pgd, *pgd_ref;
|
|
+ pud_t *pud, *pud_ref;
|
|
+ pmd_t *pmd, *pmd_ref;
|
|
+ pte_t *pte, *pte_ref;
|
|
+
|
|
+ /* Make sure we are in vmalloc area: */
|
|
+ if (!(address >= VMALLOC_START && address < VMALLOC_END))
|
|
+ return -1;
|
|
+
|
|
+ /*
|
|
+ * Copy kernel mappings over when needed. This can also
|
|
+ * happen within a race in page table update. In the later
|
|
+ * case just flush:
|
|
+ */
|
|
+ pgd = pgd_offset(current->active_mm, address);
|
|
+ pgd_ref = pgd_offset_k(address);
|
|
+ if (pgd_none(*pgd_ref))
|
|
+ return -1;
|
|
+
|
|
+ if (pgd_none(*pgd))
|
|
+ set_pgd(pgd, *pgd_ref);
|
|
+ else
|
|
+ BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
|
|
+
|
|
+ /*
|
|
+ * Below here mismatches are bugs because these lower tables
|
|
+ * are shared:
|
|
+ */
|
|
+
|
|
+ pud = pud_offset(pgd, address);
|
|
+ pud_ref = pud_offset(pgd_ref, address);
|
|
+ if (pud_none(*pud_ref))
|
|
+ return -1;
|
|
+
|
|
+ if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref))
|
|
+ BUG();
|
|
+
|
|
+ pmd = pmd_offset(pud, address);
|
|
+ pmd_ref = pmd_offset(pud_ref, address);
|
|
+ if (pmd_none(*pmd_ref))
|
|
+ return -1;
|
|
+
|
|
+ if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref))
|
|
+ BUG();
|
|
+
|
|
+ pte_ref = pte_offset_kernel(pmd_ref, address);
|
|
+ if (!pte_present(*pte_ref))
|
|
+ return -1;
|
|
+
|
|
+ pte = pte_offset_kernel(pmd, address);
|
|
+
|
|
+ /*
|
|
+ * Don't use pte_page here, because the mappings can point
|
|
+ * outside mem_map, and the NUMA hash lookup cannot handle
|
|
+ * that:
|
|
+ */
|
|
+ if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref))
|
|
+ BUG();
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static const char errata93_warning[] =
|
|
+KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
|
|
+KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n"
|
|
+KERN_ERR "******* Please consider a BIOS update.\n"
|
|
+KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n";
|
|
+
|
|
+/*
|
|
+ * No vm86 mode in 64-bit mode:
|
|
+ */
|
|
+static inline void
|
|
+check_v8086_mode(struct pt_regs *regs, unsigned long address,
|
|
+ struct task_struct *tsk)
|
|
+{
|
|
+}
|
|
+
|
|
+static int bad_address(void *p)
|
|
+{
|
|
+ unsigned long dummy;
|
|
+
|
|
+ return probe_kernel_address((unsigned long *)p, dummy);
|
|
+}
|
|
+
|
|
+static void dump_pagetable(unsigned long address)
|
|
+{
|
|
pgd_t *pgd;
|
|
pud_t *pud;
|
|
pmd_t *pmd;
|
|
@@ -240,113 +494,77 @@ static void dump_pagetable(unsigned long
|
|
pgd = (pgd_t *)read_cr3();
|
|
|
|
pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK);
|
|
+
|
|
pgd += pgd_index(address);
|
|
- if (bad_address(pgd)) goto bad;
|
|
+ if (bad_address(pgd))
|
|
+ goto bad;
|
|
+
|
|
printk("PGD %lx ", pgd_val(*pgd));
|
|
- if (!pgd_present(*pgd)) goto ret;
|
|
+
|
|
+ if (!pgd_present(*pgd))
|
|
+ goto out;
|
|
|
|
pud = pud_offset(pgd, address);
|
|
- if (bad_address(pud)) goto bad;
|
|
+ if (bad_address(pud))
|
|
+ goto bad;
|
|
+
|
|
printk(KERN_CONT "PUD %lx ", pud_val(*pud));
|
|
if (!pud_present(*pud) || pud_large(*pud))
|
|
- goto ret;
|
|
+ goto out;
|
|
|
|
pmd = pmd_offset(pud, address);
|
|
- if (bad_address(pmd)) goto bad;
|
|
+ if (bad_address(pmd))
|
|
+ goto bad;
|
|
+
|
|
printk(KERN_CONT "PMD %lx ", pmd_val(*pmd));
|
|
- if (!pmd_present(*pmd) || pmd_large(*pmd)) goto ret;
|
|
+ if (!pmd_present(*pmd) || pmd_large(*pmd))
|
|
+ goto out;
|
|
|
|
pte = pte_offset_kernel(pmd, address);
|
|
- if (bad_address(pte)) goto bad;
|
|
+ if (bad_address(pte))
|
|
+ goto bad;
|
|
+
|
|
printk(KERN_CONT "PTE %lx", pte_val(*pte));
|
|
-ret:
|
|
+out:
|
|
printk(KERN_CONT "\n");
|
|
return;
|
|
bad:
|
|
printk("BAD\n");
|
|
-#endif
|
|
-}
|
|
-
|
|
-#ifdef CONFIG_X86_32
|
|
-static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
|
|
-{
|
|
- unsigned index = pgd_index(address);
|
|
- pgd_t *pgd_k;
|
|
- pud_t *pud, *pud_k;
|
|
- pmd_t *pmd, *pmd_k;
|
|
-
|
|
- pgd += index;
|
|
- pgd_k = init_mm.pgd + index;
|
|
-
|
|
- if (!pgd_present(*pgd_k))
|
|
- return NULL;
|
|
-
|
|
- /*
|
|
- * set_pgd(pgd, *pgd_k); here would be useless on PAE
|
|
- * and redundant with the set_pmd() on non-PAE. As would
|
|
- * set_pud.
|
|
- */
|
|
-
|
|
- pud = pud_offset(pgd, address);
|
|
- pud_k = pud_offset(pgd_k, address);
|
|
- if (!pud_present(*pud_k))
|
|
- return NULL;
|
|
-
|
|
- pmd = pmd_offset(pud, address);
|
|
- pmd_k = pmd_offset(pud_k, address);
|
|
- if (!pmd_present(*pmd_k))
|
|
- return NULL;
|
|
- if (!pmd_present(*pmd)) {
|
|
- bool lazy = x86_read_percpu(xen_lazy_mmu);
|
|
-
|
|
- x86_write_percpu(xen_lazy_mmu, false);
|
|
-#if CONFIG_XEN_COMPAT > 0x030002
|
|
- set_pmd(pmd, *pmd_k);
|
|
-#else
|
|
- /*
|
|
- * When running on older Xen we must launder *pmd_k through
|
|
- * pmd_val() to ensure that _PAGE_PRESENT is correctly set.
|
|
- */
|
|
- set_pmd(pmd, __pmd(pmd_val(*pmd_k)));
|
|
-#endif
|
|
- x86_write_percpu(xen_lazy_mmu, lazy);
|
|
- } else
|
|
- BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
|
|
- return pmd_k;
|
|
}
|
|
-#endif
|
|
|
|
-#ifdef CONFIG_X86_64
|
|
-static const char errata93_warning[] =
|
|
-KERN_ERR "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
|
|
-KERN_ERR "******* Working around it, but it may cause SEGVs or burn power.\n"
|
|
-KERN_ERR "******* Please consider a BIOS update.\n"
|
|
-KERN_ERR "******* Disabling USB legacy in the BIOS may also help.\n";
|
|
-#endif
|
|
+#endif /* CONFIG_X86_64 */
|
|
|
|
-/* Workaround for K8 erratum #93 & buggy BIOS.
|
|
- BIOS SMM functions are required to use a specific workaround
|
|
- to avoid corruption of the 64bit RIP register on C stepping K8.
|
|
- A lot of BIOS that didn't get tested properly miss this.
|
|
- The OS sees this as a page fault with the upper 32bits of RIP cleared.
|
|
- Try to work around it here.
|
|
- Note we only handle faults in kernel here.
|
|
- Does nothing for X86_32
|
|
+/*
|
|
+ * Workaround for K8 erratum #93 & buggy BIOS.
|
|
+ *
|
|
+ * BIOS SMM functions are required to use a specific workaround
|
|
+ * to avoid corruption of the 64bit RIP register on C stepping K8.
|
|
+ *
|
|
+ * A lot of BIOS that didn't get tested properly miss this.
|
|
+ *
|
|
+ * The OS sees this as a page fault with the upper 32bits of RIP cleared.
|
|
+ * Try to work around it here.
|
|
+ *
|
|
+ * Note we only handle faults in kernel here.
|
|
+ * Does nothing on 32-bit.
|
|
*/
|
|
static int is_errata93(struct pt_regs *regs, unsigned long address)
|
|
{
|
|
#ifdef CONFIG_X86_64
|
|
- static int warned;
|
|
+ static int once;
|
|
+
|
|
if (address != regs->ip)
|
|
return 0;
|
|
+
|
|
if ((address >> 32) != 0)
|
|
return 0;
|
|
+
|
|
address |= 0xffffffffUL << 32;
|
|
if ((address >= (u64)_stext && address <= (u64)_etext) ||
|
|
(address >= MODULES_VADDR && address <= MODULES_END)) {
|
|
- if (!warned) {
|
|
+ if (!once) {
|
|
printk(errata93_warning);
|
|
- warned = 1;
|
|
+ once = 1;
|
|
}
|
|
regs->ip = address;
|
|
return 1;
|
|
@@ -356,16 +574,17 @@ static int is_errata93(struct pt_regs *r
|
|
}
|
|
|
|
/*
|
|
- * Work around K8 erratum #100 K8 in compat mode occasionally jumps to illegal
|
|
- * addresses >4GB. We catch this in the page fault handler because these
|
|
- * addresses are not reachable. Just detect this case and return. Any code
|
|
+ * Work around K8 erratum #100 K8 in compat mode occasionally jumps
|
|
+ * to illegal addresses >4GB.
|
|
+ *
|
|
+ * We catch this in the page fault handler because these addresses
|
|
+ * are not reachable. Just detect this case and return. Any code
|
|
* segment in LDT is compatibility mode.
|
|
*/
|
|
static int is_errata100(struct pt_regs *regs, unsigned long address)
|
|
{
|
|
#ifdef CONFIG_X86_64
|
|
- if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) &&
|
|
- (address >> 32))
|
|
+ if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) && (address >> 32))
|
|
return 1;
|
|
#endif
|
|
return 0;
|
|
@@ -375,8 +594,9 @@ static int is_f00f_bug(struct pt_regs *r
|
|
{
|
|
#ifdef CONFIG_X86_F00F_BUG
|
|
unsigned long nr;
|
|
+
|
|
/*
|
|
- * Pentium F0 0F C7 C8 bug workaround.
|
|
+ * Pentium F0 0F C7 C8 bug workaround:
|
|
*/
|
|
if (boot_cpu_data.f00f_bug) {
|
|
nr = (address - idt_descr.address) >> 3;
|
|
@@ -390,62 +610,277 @@ static int is_f00f_bug(struct pt_regs *r
|
|
return 0;
|
|
}
|
|
|
|
-static void show_fault_oops(struct pt_regs *regs, unsigned long error_code,
|
|
- unsigned long address)
|
|
+static const char nx_warning[] = KERN_CRIT
|
|
+"kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n";
|
|
+
|
|
+static void
|
|
+show_fault_oops(struct pt_regs *regs, unsigned long error_code,
|
|
+ unsigned long address)
|
|
{
|
|
-#ifdef CONFIG_X86_32
|
|
if (!oops_may_print())
|
|
return;
|
|
-#endif
|
|
|
|
-#ifdef CONFIG_X86_PAE
|
|
if (error_code & PF_INSTR) {
|
|
unsigned int level;
|
|
+
|
|
pte_t *pte = lookup_address(address, &level);
|
|
|
|
if (pte && pte_present(*pte) && !pte_exec(*pte))
|
|
- printk(KERN_CRIT "kernel tried to execute "
|
|
- "NX-protected page - exploit attempt? "
|
|
- "(uid: %d)\n", current_uid());
|
|
+ printk(nx_warning, current_uid());
|
|
}
|
|
-#endif
|
|
|
|
printk(KERN_ALERT "BUG: unable to handle kernel ");
|
|
if (address < PAGE_SIZE)
|
|
printk(KERN_CONT "NULL pointer dereference");
|
|
else
|
|
printk(KERN_CONT "paging request");
|
|
+
|
|
printk(KERN_CONT " at %p\n", (void *) address);
|
|
printk(KERN_ALERT "IP:");
|
|
printk_address(regs->ip, 1);
|
|
+
|
|
dump_pagetable(address);
|
|
}
|
|
|
|
-#ifdef CONFIG_X86_64
|
|
-static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
|
|
- unsigned long error_code)
|
|
+static noinline void
|
|
+pgtable_bad(struct pt_regs *regs, unsigned long error_code,
|
|
+ unsigned long address)
|
|
{
|
|
- unsigned long flags = oops_begin();
|
|
- int sig = SIGKILL;
|
|
struct task_struct *tsk;
|
|
+ unsigned long flags;
|
|
+ int sig;
|
|
+
|
|
+ flags = oops_begin();
|
|
+ tsk = current;
|
|
+ sig = SIGKILL;
|
|
|
|
printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
|
|
- current->comm, address);
|
|
+ tsk->comm, address);
|
|
dump_pagetable(address);
|
|
- tsk = current;
|
|
- tsk->thread.cr2 = address;
|
|
- tsk->thread.trap_no = 14;
|
|
- tsk->thread.error_code = error_code;
|
|
+
|
|
+ tsk->thread.cr2 = address;
|
|
+ tsk->thread.trap_no = 14;
|
|
+ tsk->thread.error_code = error_code;
|
|
+
|
|
if (__die("Bad pagetable", regs, error_code))
|
|
sig = 0;
|
|
+
|
|
oops_end(flags, regs, sig);
|
|
}
|
|
-#endif
|
|
+
|
|
+static noinline void
|
|
+no_context(struct pt_regs *regs, unsigned long error_code,
|
|
+ unsigned long address)
|
|
+{
|
|
+ struct task_struct *tsk = current;
|
|
+ unsigned long *stackend;
|
|
+ unsigned long flags;
|
|
+ int sig;
|
|
+
|
|
+ /* Are we prepared to handle this kernel fault? */
|
|
+ if (fixup_exception(regs))
|
|
+ return;
|
|
+
|
|
+ /*
|
|
+ * 32-bit:
|
|
+ *
|
|
+ * Valid to do another page fault here, because if this fault
|
|
+ * had been triggered by is_prefetch fixup_exception would have
|
|
+ * handled it.
|
|
+ *
|
|
+ * 64-bit:
|
|
+ *
|
|
+ * Hall of shame of CPU/BIOS bugs.
|
|
+ */
|
|
+ if (is_prefetch(regs, error_code, address))
|
|
+ return;
|
|
+
|
|
+ if (is_errata93(regs, address))
|
|
+ return;
|
|
+
|
|
+ /*
|
|
+ * Oops. The kernel tried to access some bad page. We'll have to
|
|
+ * terminate things with extreme prejudice:
|
|
+ */
|
|
+ flags = oops_begin();
|
|
+
|
|
+ show_fault_oops(regs, error_code, address);
|
|
+
|
|
+ stackend = end_of_stack(tsk);
|
|
+ if (*stackend != STACK_END_MAGIC)
|
|
+ printk(KERN_ALERT "Thread overran stack, or stack corrupted\n");
|
|
+
|
|
+ tsk->thread.cr2 = address;
|
|
+ tsk->thread.trap_no = 14;
|
|
+ tsk->thread.error_code = error_code;
|
|
+
|
|
+ sig = SIGKILL;
|
|
+ if (__die("Oops", regs, error_code))
|
|
+ sig = 0;
|
|
+
|
|
+ /* Executive summary in case the body of the oops scrolled away */
|
|
+ printk(KERN_EMERG "CR2: %016lx\n", address);
|
|
+
|
|
+ oops_end(flags, regs, sig);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Print out info about fatal segfaults, if the show_unhandled_signals
|
|
+ * sysctl is set:
|
|
+ */
|
|
+static inline void
|
|
+show_signal_msg(struct pt_regs *regs, unsigned long error_code,
|
|
+ unsigned long address, struct task_struct *tsk)
|
|
+{
|
|
+ if (!unhandled_signal(tsk, SIGSEGV))
|
|
+ return;
|
|
+
|
|
+ if (!printk_ratelimit())
|
|
+ return;
|
|
+
|
|
+ printk(KERN_CONT "%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
|
|
+ task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
|
|
+ tsk->comm, task_pid_nr(tsk), address,
|
|
+ (void *)regs->ip, (void *)regs->sp, error_code);
|
|
+
|
|
+ print_vma_addr(KERN_CONT " in ", regs->ip);
|
|
+
|
|
+ printk(KERN_CONT "\n");
|
|
+}
|
|
+
|
|
+static void
|
|
+__bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
|
|
+ unsigned long address, int si_code)
|
|
+{
|
|
+ struct task_struct *tsk = current;
|
|
+
|
|
+ /* User mode accesses just cause a SIGSEGV */
|
|
+ if (error_code & PF_USER) {
|
|
+ /*
|
|
+ * It's possible to have interrupts off here:
|
|
+ */
|
|
+ local_irq_enable();
|
|
+
|
|
+ /*
|
|
+ * Valid to do another page fault here because this one came
|
|
+ * from user space:
|
|
+ */
|
|
+ if (is_prefetch(regs, error_code, address))
|
|
+ return;
|
|
+
|
|
+ if (is_errata100(regs, address))
|
|
+ return;
|
|
+
|
|
+ if (unlikely(show_unhandled_signals))
|
|
+ show_signal_msg(regs, error_code, address, tsk);
|
|
+
|
|
+ /* Kernel addresses are always protection faults: */
|
|
+ tsk->thread.cr2 = address;
|
|
+ tsk->thread.error_code = error_code | (address >= TASK_SIZE);
|
|
+ tsk->thread.trap_no = 14;
|
|
+
|
|
+ force_sig_info_fault(SIGSEGV, si_code, address, tsk);
|
|
+
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ if (is_f00f_bug(regs, address))
|
|
+ return;
|
|
+
|
|
+ no_context(regs, error_code, address);
|
|
+}
|
|
+
|
|
+static noinline void
|
|
+bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
|
|
+ unsigned long address)
|
|
+{
|
|
+ __bad_area_nosemaphore(regs, error_code, address, SEGV_MAPERR);
|
|
+}
|
|
+
|
|
+static void
|
|
+__bad_area(struct pt_regs *regs, unsigned long error_code,
|
|
+ unsigned long address, int si_code)
|
|
+{
|
|
+ struct mm_struct *mm = current->mm;
|
|
+
|
|
+ /*
|
|
+ * Something tried to access memory that isn't in our memory map..
|
|
+ * Fix it, but check if it's kernel or user first..
|
|
+ */
|
|
+ up_read(&mm->mmap_sem);
|
|
+
|
|
+ __bad_area_nosemaphore(regs, error_code, address, si_code);
|
|
+}
|
|
+
|
|
+static noinline void
|
|
+bad_area(struct pt_regs *regs, unsigned long error_code, unsigned long address)
|
|
+{
|
|
+ __bad_area(regs, error_code, address, SEGV_MAPERR);
|
|
+}
|
|
+
|
|
+static noinline void
|
|
+bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
|
|
+ unsigned long address)
|
|
+{
|
|
+ __bad_area(regs, error_code, address, SEGV_ACCERR);
|
|
+}
|
|
+
|
|
+/* TODO: fixup for "mm-invoke-oom-killer-from-page-fault.patch" */
|
|
+static void
|
|
+out_of_memory(struct pt_regs *regs, unsigned long error_code,
|
|
+ unsigned long address)
|
|
+{
|
|
+ /*
|
|
+ * We ran out of memory, call the OOM killer, and return the userspace
|
|
+ * (which will retry the fault, or kill us if we got oom-killed):
|
|
+ */
|
|
+ up_read(¤t->mm->mmap_sem);
|
|
+
|
|
+ pagefault_out_of_memory();
|
|
+}
|
|
+
|
|
+static void
|
|
+do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address)
|
|
+{
|
|
+ struct task_struct *tsk = current;
|
|
+ struct mm_struct *mm = tsk->mm;
|
|
+
|
|
+ up_read(&mm->mmap_sem);
|
|
+
|
|
+ /* Kernel mode? Handle exceptions or die: */
|
|
+ if (!(error_code & PF_USER))
|
|
+ no_context(regs, error_code, address);
|
|
+
|
|
+ /* User-space => ok to do another page fault: */
|
|
+ if (is_prefetch(regs, error_code, address))
|
|
+ return;
|
|
+
|
|
+ tsk->thread.cr2 = address;
|
|
+ tsk->thread.error_code = error_code;
|
|
+ tsk->thread.trap_no = 14;
|
|
+
|
|
+ force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
|
|
+}
|
|
+
|
|
+static noinline void
|
|
+mm_fault_error(struct pt_regs *regs, unsigned long error_code,
|
|
+ unsigned long address, unsigned int fault)
|
|
+{
|
|
+ if (fault & VM_FAULT_OOM) {
|
|
+ out_of_memory(regs, error_code, address);
|
|
+ } else {
|
|
+ if (fault & VM_FAULT_SIGBUS)
|
|
+ do_sigbus(regs, error_code, address);
|
|
+ else
|
|
+ BUG();
|
|
+ }
|
|
+}
|
|
|
|
static int spurious_fault_check(unsigned long error_code, pte_t *pte)
|
|
{
|
|
if ((error_code & PF_WRITE) && !pte_write(*pte))
|
|
return 0;
|
|
+
|
|
if ((error_code & PF_INSTR) && !pte_exec(*pte))
|
|
return 0;
|
|
|
|
@@ -453,21 +888,25 @@ static int spurious_fault_check(unsigned
|
|
}
|
|
|
|
/*
|
|
- * Handle a spurious fault caused by a stale TLB entry. This allows
|
|
- * us to lazily refresh the TLB when increasing the permissions of a
|
|
- * kernel page (RO -> RW or NX -> X). Doing it eagerly is very
|
|
- * expensive since that implies doing a full cross-processor TLB
|
|
- * flush, even if no stale TLB entries exist on other processors.
|
|
+ * Handle a spurious fault caused by a stale TLB entry.
|
|
+ *
|
|
+ * This allows us to lazily refresh the TLB when increasing the
|
|
+ * permissions of a kernel page (RO -> RW or NX -> X). Doing it
|
|
+ * eagerly is very expensive since that implies doing a full
|
|
+ * cross-processor TLB flush, even if no stale TLB entries exist
|
|
+ * on other processors.
|
|
+ *
|
|
* There are no security implications to leaving a stale TLB when
|
|
* increasing the permissions on a page.
|
|
*/
|
|
-static int spurious_fault(unsigned long address,
|
|
- unsigned long error_code)
|
|
+static noinline int
|
|
+spurious_fault(unsigned long error_code, unsigned long address)
|
|
{
|
|
pgd_t *pgd;
|
|
pud_t *pud;
|
|
pmd_t *pmd;
|
|
pte_t *pte;
|
|
+ int ret;
|
|
|
|
/* Reserved-bit violation or user access to kernel space? */
|
|
if (error_code & (PF_USER | PF_RSVD))
|
|
@@ -495,117 +934,62 @@ static int spurious_fault(unsigned long
|
|
if (!pte_present(*pte))
|
|
return 0;
|
|
|
|
- return spurious_fault_check(error_code, pte);
|
|
-}
|
|
-
|
|
-/*
|
|
- * X86_32
|
|
- * Handle a fault on the vmalloc or module mapping area
|
|
- *
|
|
- * X86_64
|
|
- * Handle a fault on the vmalloc area
|
|
- *
|
|
- * This assumes no large pages in there.
|
|
- */
|
|
-static int vmalloc_fault(unsigned long address)
|
|
-{
|
|
-#ifdef CONFIG_X86_32
|
|
- unsigned long pgd_paddr;
|
|
- pmd_t *pmd_k;
|
|
- pte_t *pte_k;
|
|
-
|
|
- /* Make sure we are in vmalloc area */
|
|
- if (!(address >= VMALLOC_START && address < VMALLOC_END))
|
|
- return -1;
|
|
+ ret = spurious_fault_check(error_code, pte);
|
|
+ if (!ret)
|
|
+ return 0;
|
|
|
|
/*
|
|
- * Synchronize this task's top level page-table
|
|
- * with the 'reference' page table.
|
|
- *
|
|
- * Do _not_ use "current" here. We might be inside
|
|
- * an interrupt in the middle of a task switch..
|
|
+ * Make sure we have permissions in PMD.
|
|
+ * If not, then there's a bug in the page tables:
|
|
*/
|
|
- pgd_paddr = read_cr3();
|
|
- pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
|
|
- if (!pmd_k)
|
|
- return -1;
|
|
- pte_k = pte_offset_kernel(pmd_k, address);
|
|
- if (!pte_present(*pte_k))
|
|
- return -1;
|
|
- return 0;
|
|
-#else
|
|
- pgd_t *pgd, *pgd_ref;
|
|
- pud_t *pud, *pud_ref;
|
|
- pmd_t *pmd, *pmd_ref;
|
|
- pte_t *pte, *pte_ref;
|
|
+ ret = spurious_fault_check(error_code, (pte_t *) pmd);
|
|
+ WARN_ONCE(!ret, "PMD has incorrect permission bits\n");
|
|
|
|
- /* Make sure we are in vmalloc area */
|
|
- if (!(address >= VMALLOC_START && address < VMALLOC_END))
|
|
- return -1;
|
|
+ return ret;
|
|
+}
|
|
|
|
- /* Copy kernel mappings over when needed. This can also
|
|
- happen within a race in page table update. In the later
|
|
- case just flush. */
|
|
+int show_unhandled_signals = 1;
|
|
|
|
- pgd = pgd_offset(current->active_mm, address);
|
|
- pgd_ref = pgd_offset_k(address);
|
|
- if (pgd_none(*pgd_ref))
|
|
- return -1;
|
|
- if (pgd_none(*pgd))
|
|
- set_pgd(pgd, *pgd_ref);
|
|
- else
|
|
- BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
|
|
+static inline int
|
|
+access_error(unsigned long error_code, int write, struct vm_area_struct *vma)
|
|
+{
|
|
+ if (write) {
|
|
+ /* write, present and write, not present: */
|
|
+ if (unlikely(!(vma->vm_flags & VM_WRITE)))
|
|
+ return 1;
|
|
+ return 0;
|
|
+ }
|
|
|
|
- /* Below here mismatches are bugs because these lower tables
|
|
- are shared */
|
|
+ /* read, present: */
|
|
+ if (unlikely(error_code & PF_PROT))
|
|
+ return 1;
|
|
+
|
|
+ /* read, not present: */
|
|
+ if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))))
|
|
+ return 1;
|
|
|
|
- pud = pud_offset(pgd, address);
|
|
- pud_ref = pud_offset(pgd_ref, address);
|
|
- if (pud_none(*pud_ref))
|
|
- return -1;
|
|
- if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref))
|
|
- BUG();
|
|
- pmd = pmd_offset(pud, address);
|
|
- pmd_ref = pmd_offset(pud_ref, address);
|
|
- if (pmd_none(*pmd_ref))
|
|
- return -1;
|
|
- if (pmd_none(*pmd) || pmd_page(*pmd) != pmd_page(*pmd_ref))
|
|
- BUG();
|
|
- pte_ref = pte_offset_kernel(pmd_ref, address);
|
|
- if (!pte_present(*pte_ref))
|
|
- return -1;
|
|
- pte = pte_offset_kernel(pmd, address);
|
|
- /* Don't use pte_page here, because the mappings can point
|
|
- outside mem_map, and the NUMA hash lookup cannot handle
|
|
- that. */
|
|
- if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref))
|
|
- BUG();
|
|
return 0;
|
|
-#endif
|
|
}
|
|
|
|
-int show_unhandled_signals = 1;
|
|
+static int fault_in_kernel_space(unsigned long address)
|
|
+{
|
|
+ return address >= TASK_SIZE_MAX;
|
|
+}
|
|
|
|
/*
|
|
* This routine handles page faults. It determines the address,
|
|
* and the problem, and then passes it off to one of the appropriate
|
|
* routines.
|
|
*/
|
|
-#ifdef CONFIG_X86_64
|
|
-asmlinkage
|
|
-#endif
|
|
-void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
|
|
+dotraplinkage void __kprobes
|
|
+do_page_fault(struct pt_regs *regs, unsigned long error_code)
|
|
{
|
|
- struct task_struct *tsk;
|
|
- struct mm_struct *mm;
|
|
struct vm_area_struct *vma;
|
|
+ struct task_struct *tsk;
|
|
unsigned long address;
|
|
- int write, si_code;
|
|
+ struct mm_struct *mm;
|
|
+ int write;
|
|
int fault;
|
|
-#ifdef CONFIG_X86_64
|
|
- unsigned long flags;
|
|
- int sig;
|
|
-#endif
|
|
|
|
/* Set the "privileged fault" bit to something sane. */
|
|
if (user_mode_vm(regs))
|
|
@@ -615,13 +999,12 @@ void __kprobes do_page_fault(struct pt_r
|
|
|
|
tsk = current;
|
|
mm = tsk->mm;
|
|
+
|
|
prefetchw(&mm->mmap_sem);
|
|
|
|
- /* get the address */
|
|
+ /* Get the faulting address: */
|
|
address = read_cr2();
|
|
|
|
- si_code = SEGV_MAPERR;
|
|
-
|
|
if (unlikely(kmmio_fault(regs, address)))
|
|
return;
|
|
|
|
@@ -638,328 +1021,158 @@ void __kprobes do_page_fault(struct pt_r
|
|
* (error_code & 4) == 0, and that the fault was not a
|
|
* protection error (error_code & 9) == 0.
|
|
*/
|
|
-#ifdef CONFIG_X86_32
|
|
- if (unlikely(address >= TASK_SIZE)) {
|
|
-#else
|
|
- if (unlikely(address >= TASK_SIZE64)) {
|
|
-#endif
|
|
+ if (unlikely(fault_in_kernel_space(address))) {
|
|
/* Faults in hypervisor area can never be patched up. */
|
|
#if defined(CONFIG_X86_XEN)
|
|
- if (address >= hypervisor_virt_start)
|
|
- goto bad_area_nosemaphore;
|
|
+ if (address >= hypervisor_virt_start) {
|
|
#elif defined(CONFIG_X86_64_XEN)
|
|
if (address >= HYPERVISOR_VIRT_START
|
|
- && address < HYPERVISOR_VIRT_END)
|
|
- goto bad_area_nosemaphore;
|
|
+ && address < HYPERVISOR_VIRT_END) {
|
|
#endif
|
|
+ bad_area_nosemaphore(regs, error_code, address);
|
|
+ return;
|
|
+ }
|
|
+
|
|
if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
|
|
vmalloc_fault(address) >= 0)
|
|
return;
|
|
|
|
- /* Can handle a stale RO->RW TLB */
|
|
- if (spurious_fault(address, error_code))
|
|
+ /* Can handle a stale RO->RW TLB: */
|
|
+ if (spurious_fault(error_code, address))
|
|
return;
|
|
|
|
- /* kprobes don't want to hook the spurious faults. */
|
|
+ /* kprobes don't want to hook the spurious faults: */
|
|
if (notify_page_fault(regs))
|
|
return;
|
|
/*
|
|
* Don't take the mm semaphore here. If we fixup a prefetch
|
|
- * fault we could otherwise deadlock.
|
|
+ * fault we could otherwise deadlock:
|
|
*/
|
|
- goto bad_area_nosemaphore;
|
|
- }
|
|
+ bad_area_nosemaphore(regs, error_code, address);
|
|
|
|
- /* kprobes don't want to hook the spurious faults. */
|
|
- if (notify_page_fault(regs))
|
|
return;
|
|
+ }
|
|
|
|
+ /* kprobes don't want to hook the spurious faults: */
|
|
+ if (unlikely(notify_page_fault(regs)))
|
|
+ return;
|
|
/*
|
|
* It's safe to allow irq's after cr2 has been saved and the
|
|
* vmalloc fault has been handled.
|
|
*
|
|
* User-mode registers count as a user access even for any
|
|
- * potential system fault or CPU buglet.
|
|
+ * potential system fault or CPU buglet:
|
|
*/
|
|
if (user_mode_vm(regs)) {
|
|
local_irq_enable();
|
|
error_code |= PF_USER;
|
|
- } else if (regs->flags & X86_EFLAGS_IF)
|
|
- local_irq_enable();
|
|
+ } else {
|
|
+ if (regs->flags & X86_EFLAGS_IF)
|
|
+ local_irq_enable();
|
|
+ }
|
|
|
|
-#ifdef CONFIG_X86_64
|
|
if (unlikely(error_code & PF_RSVD))
|
|
- pgtable_bad(address, regs, error_code);
|
|
-#endif
|
|
+ pgtable_bad(regs, error_code, address);
|
|
|
|
/*
|
|
- * If we're in an interrupt, have no user context or are running in an
|
|
- * atomic region then we must not take the fault.
|
|
+ * If we're in an interrupt, have no user context or are running
|
|
+ * in an atomic region then we must not take the fault:
|
|
*/
|
|
- if (unlikely(in_atomic() || !mm))
|
|
- goto bad_area_nosemaphore;
|
|
+ if (unlikely(in_atomic() || !mm)) {
|
|
+ bad_area_nosemaphore(regs, error_code, address);
|
|
+ return;
|
|
+ }
|
|
|
|
/*
|
|
* When running in the kernel we expect faults to occur only to
|
|
- * addresses in user space. All other faults represent errors in the
|
|
- * kernel and should generate an OOPS. Unfortunately, in the case of an
|
|
- * erroneous fault occurring in a code path which already holds mmap_sem
|
|
- * we will deadlock attempting to validate the fault against the
|
|
- * address space. Luckily the kernel only validly references user
|
|
- * space from well defined areas of code, which are listed in the
|
|
- * exceptions table.
|
|
+ * addresses in user space. All other faults represent errors in
|
|
+ * the kernel and should generate an OOPS. Unfortunately, in the
|
|
+ * case of an erroneous fault occurring in a code path which already
|
|
+ * holds mmap_sem we will deadlock attempting to validate the fault
|
|
+ * against the address space. Luckily the kernel only validly
|
|
+ * references user space from well defined areas of code, which are
|
|
+ * listed in the exceptions table.
|
|
*
|
|
* As the vast majority of faults will be valid we will only perform
|
|
- * the source reference check when there is a possibility of a deadlock.
|
|
- * Attempt to lock the address space, if we cannot we then validate the
|
|
- * source. If this is invalid we can skip the address space check,
|
|
- * thus avoiding the deadlock.
|
|
+ * the source reference check when there is a possibility of a
|
|
+ * deadlock. Attempt to lock the address space, if we cannot we then
|
|
+ * validate the source. If this is invalid we can skip the address
|
|
+ * space check, thus avoiding the deadlock:
|
|
*/
|
|
- if (!down_read_trylock(&mm->mmap_sem)) {
|
|
+ if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
|
|
if ((error_code & PF_USER) == 0 &&
|
|
- !search_exception_tables(regs->ip))
|
|
- goto bad_area_nosemaphore;
|
|
+ !search_exception_tables(regs->ip)) {
|
|
+ bad_area_nosemaphore(regs, error_code, address);
|
|
+ return;
|
|
+ }
|
|
down_read(&mm->mmap_sem);
|
|
+ } else {
|
|
+ /*
|
|
+ * The above down_read_trylock() might have succeeded in
|
|
+ * which case we'll have missed the might_sleep() from
|
|
+ * down_read():
|
|
+ */
|
|
+ might_sleep();
|
|
}
|
|
|
|
vma = find_vma(mm, address);
|
|
- if (!vma)
|
|
- goto bad_area;
|
|
- if (vma->vm_start <= address)
|
|
+ if (unlikely(!vma)) {
|
|
+ bad_area(regs, error_code, address);
|
|
+ return;
|
|
+ }
|
|
+ if (likely(vma->vm_start <= address))
|
|
goto good_area;
|
|
- if (!(vma->vm_flags & VM_GROWSDOWN))
|
|
- goto bad_area;
|
|
+ if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
|
|
+ bad_area(regs, error_code, address);
|
|
+ return;
|
|
+ }
|
|
if (error_code & PF_USER) {
|
|
/*
|
|
* Accessing the stack below %sp is always a bug.
|
|
* The large cushion allows instructions like enter
|
|
- * and pusha to work. ("enter $65535,$31" pushes
|
|
+ * and pusha to work. ("enter $65535, $31" pushes
|
|
* 32 pointers and then decrements %sp by 65535.)
|
|
*/
|
|
- if (address + 65536 + 32 * sizeof(unsigned long) < regs->sp)
|
|
- goto bad_area;
|
|
+ if (unlikely(address + 65536 + 32 * sizeof(unsigned long) < regs->sp)) {
|
|
+ bad_area(regs, error_code, address);
|
|
+ return;
|
|
+ }
|
|
}
|
|
- if (expand_stack(vma, address))
|
|
- goto bad_area;
|
|
-/*
|
|
- * Ok, we have a good vm_area for this memory access, so
|
|
- * we can handle it..
|
|
- */
|
|
+ if (unlikely(expand_stack(vma, address))) {
|
|
+ bad_area(regs, error_code, address);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * Ok, we have a good vm_area for this memory access, so
|
|
+ * we can handle it..
|
|
+ */
|
|
good_area:
|
|
- si_code = SEGV_ACCERR;
|
|
- write = 0;
|
|
- switch (error_code & (PF_PROT|PF_WRITE)) {
|
|
- default: /* 3: write, present */
|
|
- /* fall through */
|
|
- case PF_WRITE: /* write, not present */
|
|
- if (!(vma->vm_flags & VM_WRITE))
|
|
- goto bad_area;
|
|
- write++;
|
|
- break;
|
|
- case PF_PROT: /* read, present */
|
|
- goto bad_area;
|
|
- case 0: /* read, not present */
|
|
- if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
|
|
- goto bad_area;
|
|
+ write = error_code & PF_WRITE;
|
|
+
|
|
+ if (unlikely(access_error(error_code, write, vma))) {
|
|
+ bad_area_access_error(regs, error_code, address);
|
|
+ return;
|
|
}
|
|
|
|
/*
|
|
* If for any reason at all we couldn't handle the fault,
|
|
* make sure we exit gracefully rather than endlessly redo
|
|
- * the fault.
|
|
+ * the fault:
|
|
*/
|
|
fault = handle_mm_fault(mm, vma, address, write);
|
|
+
|
|
if (unlikely(fault & VM_FAULT_ERROR)) {
|
|
- if (fault & VM_FAULT_OOM)
|
|
- goto out_of_memory;
|
|
- else if (fault & VM_FAULT_SIGBUS)
|
|
- goto do_sigbus;
|
|
- BUG();
|
|
+ mm_fault_error(regs, error_code, address, fault);
|
|
+ return;
|
|
}
|
|
+
|
|
if (fault & VM_FAULT_MAJOR)
|
|
tsk->maj_flt++;
|
|
else
|
|
tsk->min_flt++;
|
|
|
|
-#ifdef CONFIG_X86_32
|
|
- /*
|
|
- * Did it hit the DOS screen memory VA from vm86 mode?
|
|
- */
|
|
- if (v8086_mode(regs)) {
|
|
- unsigned long bit = (address - 0xA0000) >> PAGE_SHIFT;
|
|
- if (bit < 32)
|
|
- tsk->thread.screen_bitmap |= 1 << bit;
|
|
- }
|
|
-#endif
|
|
- up_read(&mm->mmap_sem);
|
|
- return;
|
|
-
|
|
-/*
|
|
- * Something tried to access memory that isn't in our memory map..
|
|
- * Fix it, but check if it's kernel or user first..
|
|
- */
|
|
-bad_area:
|
|
- up_read(&mm->mmap_sem);
|
|
-
|
|
-bad_area_nosemaphore:
|
|
- /* User mode accesses just cause a SIGSEGV */
|
|
- if (error_code & PF_USER) {
|
|
- /*
|
|
- * It's possible to have interrupts off here.
|
|
- */
|
|
- local_irq_enable();
|
|
-
|
|
- /*
|
|
- * Valid to do another page fault here because this one came
|
|
- * from user space.
|
|
- */
|
|
- if (is_prefetch(regs, address, error_code))
|
|
- return;
|
|
-
|
|
- if (is_errata100(regs, address))
|
|
- return;
|
|
-
|
|
- if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
|
|
- printk_ratelimit()) {
|
|
- printk(
|
|
- "%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
|
|
- task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
|
|
- tsk->comm, task_pid_nr(tsk), address,
|
|
- (void *) regs->ip, (void *) regs->sp, error_code);
|
|
- print_vma_addr(" in ", regs->ip);
|
|
- printk("\n");
|
|
- }
|
|
-
|
|
- tsk->thread.cr2 = address;
|
|
- /* Kernel addresses are always protection faults */
|
|
- tsk->thread.error_code = error_code | (address >= TASK_SIZE);
|
|
- tsk->thread.trap_no = 14;
|
|
- force_sig_info_fault(SIGSEGV, si_code, address, tsk);
|
|
- return;
|
|
- }
|
|
-
|
|
- if (is_f00f_bug(regs, address))
|
|
- return;
|
|
-
|
|
-no_context:
|
|
- /* Are we prepared to handle this kernel fault? */
|
|
- if (fixup_exception(regs))
|
|
- return;
|
|
-
|
|
- /*
|
|
- * X86_32
|
|
- * Valid to do another page fault here, because if this fault
|
|
- * had been triggered by is_prefetch fixup_exception would have
|
|
- * handled it.
|
|
- *
|
|
- * X86_64
|
|
- * Hall of shame of CPU/BIOS bugs.
|
|
- */
|
|
- if (is_prefetch(regs, address, error_code))
|
|
- return;
|
|
-
|
|
- if (is_errata93(regs, address))
|
|
- return;
|
|
-
|
|
-/*
|
|
- * Oops. The kernel tried to access some bad page. We'll have to
|
|
- * terminate things with extreme prejudice.
|
|
- */
|
|
-#ifdef CONFIG_X86_32
|
|
- bust_spinlocks(1);
|
|
-#else
|
|
- flags = oops_begin();
|
|
-#endif
|
|
-
|
|
- show_fault_oops(regs, error_code, address);
|
|
-
|
|
- tsk->thread.cr2 = address;
|
|
- tsk->thread.trap_no = 14;
|
|
- tsk->thread.error_code = error_code;
|
|
-
|
|
-#ifdef CONFIG_X86_32
|
|
- die("Oops", regs, error_code);
|
|
- bust_spinlocks(0);
|
|
- do_exit(SIGKILL);
|
|
-#else
|
|
- sig = SIGKILL;
|
|
- if (__die("Oops", regs, error_code))
|
|
- sig = 0;
|
|
- /* Executive summary in case the body of the oops scrolled away */
|
|
- printk(KERN_EMERG "CR2: %016lx\n", address);
|
|
- oops_end(flags, regs, sig);
|
|
-#endif
|
|
-
|
|
-out_of_memory:
|
|
- /*
|
|
- * We ran out of memory, call the OOM killer, and return the userspace
|
|
- * (which will retry the fault, or kill us if we got oom-killed).
|
|
- */
|
|
- up_read(&mm->mmap_sem);
|
|
- pagefault_out_of_memory();
|
|
- return;
|
|
+ check_v8086_mode(regs, address, tsk);
|
|
|
|
-do_sigbus:
|
|
up_read(&mm->mmap_sem);
|
|
-
|
|
- /* Kernel mode? Handle exceptions or die */
|
|
- if (!(error_code & PF_USER))
|
|
- goto no_context;
|
|
-#ifdef CONFIG_X86_32
|
|
- /* User space => ok to do another page fault */
|
|
- if (is_prefetch(regs, address, error_code))
|
|
- return;
|
|
-#endif
|
|
- tsk->thread.cr2 = address;
|
|
- tsk->thread.error_code = error_code;
|
|
- tsk->thread.trap_no = 14;
|
|
- force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
|
|
-}
|
|
-
|
|
-DEFINE_SPINLOCK(pgd_lock);
|
|
-LIST_HEAD(pgd_list);
|
|
-
|
|
-void vmalloc_sync_all(void)
|
|
-{
|
|
- unsigned long address;
|
|
-
|
|
-#ifdef CONFIG_X86_32
|
|
- if (SHARED_KERNEL_PMD)
|
|
- return;
|
|
-
|
|
- for (address = VMALLOC_START & PMD_MASK;
|
|
- address >= TASK_SIZE && address < FIXADDR_TOP;
|
|
- address += PMD_SIZE) {
|
|
- unsigned long flags;
|
|
- struct page *page;
|
|
-
|
|
- spin_lock_irqsave(&pgd_lock, flags);
|
|
- list_for_each_entry(page, &pgd_list, lru) {
|
|
- if (!vmalloc_sync_one(page_address(page),
|
|
- address))
|
|
- break;
|
|
- }
|
|
- spin_unlock_irqrestore(&pgd_lock, flags);
|
|
- }
|
|
-#else /* CONFIG_X86_64 */
|
|
- for (address = VMALLOC_START & PGDIR_MASK; address <= VMALLOC_END;
|
|
- address += PGDIR_SIZE) {
|
|
- const pgd_t *pgd_ref = pgd_offset_k(address);
|
|
- unsigned long flags;
|
|
- struct page *page;
|
|
-
|
|
- if (pgd_none(*pgd_ref))
|
|
- continue;
|
|
- spin_lock_irqsave(&pgd_lock, flags);
|
|
- list_for_each_entry(page, &pgd_list, lru) {
|
|
- pgd_t *pgd;
|
|
- pgd = (pgd_t *)page_address(page) + pgd_index(address);
|
|
- if (pgd_none(*pgd))
|
|
- set_pgd(pgd, *pgd_ref);
|
|
- else
|
|
- BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
|
|
- }
|
|
- spin_unlock_irqrestore(&pgd_lock, flags);
|
|
- }
|
|
-#endif
|
|
}
|
|
--- head-2010-05-25.orig/arch/x86/mm/highmem_32-xen.c 2010-03-24 15:14:47.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/mm/highmem_32-xen.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -1,5 +1,6 @@
|
|
#include <linux/highmem.h>
|
|
#include <linux/module.h>
|
|
+#include <linux/swap.h> /* for totalram_pages */
|
|
|
|
void *kmap(struct page *page)
|
|
{
|
|
@@ -18,49 +19,6 @@ void kunmap(struct page *page)
|
|
kunmap_high(page);
|
|
}
|
|
|
|
-static void debug_kmap_atomic_prot(enum km_type type)
|
|
-{
|
|
-#ifdef CONFIG_DEBUG_HIGHMEM
|
|
- static unsigned warn_count = 10;
|
|
-
|
|
- if (unlikely(warn_count == 0))
|
|
- return;
|
|
-
|
|
- if (unlikely(in_interrupt())) {
|
|
- if (in_irq()) {
|
|
- if (type != KM_IRQ0 && type != KM_IRQ1 &&
|
|
- type != KM_BIO_SRC_IRQ && type != KM_BIO_DST_IRQ &&
|
|
- type != KM_BOUNCE_READ) {
|
|
- WARN_ON(1);
|
|
- warn_count--;
|
|
- }
|
|
- } else if (!irqs_disabled()) { /* softirq */
|
|
- if (type != KM_IRQ0 && type != KM_IRQ1 &&
|
|
- type != KM_SOFTIRQ0 && type != KM_SOFTIRQ1 &&
|
|
- type != KM_SKB_SUNRPC_DATA &&
|
|
- type != KM_SKB_DATA_SOFTIRQ &&
|
|
- type != KM_BOUNCE_READ) {
|
|
- WARN_ON(1);
|
|
- warn_count--;
|
|
- }
|
|
- }
|
|
- }
|
|
-
|
|
- if (type == KM_IRQ0 || type == KM_IRQ1 || type == KM_BOUNCE_READ ||
|
|
- type == KM_BIO_SRC_IRQ || type == KM_BIO_DST_IRQ) {
|
|
- if (!irqs_disabled()) {
|
|
- WARN_ON(1);
|
|
- warn_count--;
|
|
- }
|
|
- } else if (type == KM_SOFTIRQ0 || type == KM_SOFTIRQ1) {
|
|
- if (irq_count() == 0 && !irqs_disabled()) {
|
|
- WARN_ON(1);
|
|
- warn_count--;
|
|
- }
|
|
- }
|
|
-#endif
|
|
-}
|
|
-
|
|
/*
|
|
* kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because
|
|
* no global lock is needed and because the kmap code must perform a global TLB
|
|
@@ -80,7 +38,7 @@ void *kmap_atomic_prot(struct page *page
|
|
if (!PageHighMem(page))
|
|
return page_address(page);
|
|
|
|
- debug_kmap_atomic_prot(type);
|
|
+ debug_kmap_atomic(type);
|
|
|
|
idx = type + KM_TYPE_NR*smp_processor_id();
|
|
vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
|
|
@@ -120,22 +78,13 @@ void kunmap_atomic(void *kvaddr, enum km
|
|
pagefault_enable();
|
|
}
|
|
|
|
-/* This is the same as kmap_atomic() but can map memory that doesn't
|
|
+/*
|
|
+ * This is the same as kmap_atomic() but can map memory that doesn't
|
|
* have a struct page associated with it.
|
|
*/
|
|
void *kmap_atomic_pfn(unsigned long pfn, enum km_type type)
|
|
{
|
|
- enum fixed_addresses idx;
|
|
- unsigned long vaddr;
|
|
-
|
|
- pagefault_disable();
|
|
-
|
|
- idx = type + KM_TYPE_NR*smp_processor_id();
|
|
- vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
|
|
- set_pte(kmap_pte-idx, pfn_pte(pfn, kmap_prot));
|
|
- /*arch_flush_lazy_mmu_mode();*/
|
|
-
|
|
- return (void*) vaddr;
|
|
+ return kmap_atomic_prot_pfn(pfn, type, kmap_prot);
|
|
}
|
|
EXPORT_SYMBOL_GPL(kmap_atomic_pfn); /* temporarily in use by i915 GEM until vmap */
|
|
|
|
@@ -206,3 +155,35 @@ EXPORT_SYMBOL(kmap_atomic_to_page);
|
|
#endif
|
|
EXPORT_SYMBOL(clear_highpage);
|
|
EXPORT_SYMBOL(copy_highpage);
|
|
+
|
|
+void __init set_highmem_pages_init(void)
|
|
+{
|
|
+ struct zone *zone;
|
|
+ int nid;
|
|
+
|
|
+ for_each_zone(zone) {
|
|
+ unsigned long zone_start_pfn, zone_end_pfn;
|
|
+
|
|
+ if (!is_highmem(zone))
|
|
+ continue;
|
|
+
|
|
+ zone_start_pfn = zone->zone_start_pfn;
|
|
+ zone_end_pfn = zone_start_pfn + zone->spanned_pages;
|
|
+
|
|
+ nid = zone_to_nid(zone);
|
|
+ printk(KERN_INFO "Initializing %s for node %d (%08lx:%08lx)\n",
|
|
+ zone->name, nid, zone_start_pfn, zone_end_pfn);
|
|
+
|
|
+ add_highpages_with_active_regions(nid, zone_start_pfn,
|
|
+ zone_end_pfn);
|
|
+
|
|
+ /* XEN: init high-mem pages outside initial allocation. */
|
|
+ if (zone_start_pfn < xen_start_info->nr_pages)
|
|
+ zone_start_pfn = xen_start_info->nr_pages;
|
|
+ for (; zone_start_pfn < zone_end_pfn; zone_start_pfn++) {
|
|
+ ClearPageReserved(pfn_to_page(zone_start_pfn));
|
|
+ init_page_count(pfn_to_page(zone_start_pfn));
|
|
+ }
|
|
+ }
|
|
+ totalram_pages += totalhigh_pages;
|
|
+}
|
|
--- head-2010-05-25.orig/arch/x86/mm/hypervisor.c 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/mm/hypervisor.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -36,6 +36,7 @@
|
|
#include <linux/vmalloc.h>
|
|
#include <asm/page.h>
|
|
#include <asm/pgtable.h>
|
|
+#include <asm/setup.h>
|
|
#include <asm/hypervisor.h>
|
|
#include <xen/balloon.h>
|
|
#include <xen/features.h>
|
|
@@ -47,6 +48,9 @@
|
|
|
|
EXPORT_SYMBOL(hypercall_page);
|
|
|
|
+shared_info_t *__read_mostly HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
|
|
+EXPORT_SYMBOL(HYPERVISOR_shared_info);
|
|
+
|
|
#define NR_MC BITS_PER_LONG
|
|
#define NR_MMU BITS_PER_LONG
|
|
#define NR_MMUEXT (BITS_PER_LONG / 4)
|
|
@@ -538,7 +542,7 @@ int xen_create_contiguous_region(
|
|
unsigned int level;
|
|
|
|
if (vstart < __START_KERNEL_map
|
|
- || vstart + (PAGE_SIZE << order) > (unsigned long)_end)
|
|
+ || vstart + (PAGE_SIZE << order) > _brk_end)
|
|
return -EINVAL;
|
|
ptep = lookup_address((unsigned long)__va(__pa(vstart)),
|
|
&level);
|
|
@@ -953,6 +957,6 @@ int write_ldt_entry(struct desc_struct *
|
|
int write_gdt_entry(struct desc_struct *gdt, int entry, const void *desc,
|
|
int type)
|
|
{
|
|
- maddr_t mach_gp = virt_to_machine(gdt + entry);
|
|
+ maddr_t mach_gp = arbitrary_virt_to_machine(gdt + entry);
|
|
return HYPERVISOR_update_descriptor(mach_gp, *(const u64*)desc);
|
|
}
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ head-2010-05-25/arch/x86/mm/init-xen.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -0,0 +1,459 @@
|
|
+#include <linux/ioport.h>
|
|
+#include <linux/swap.h>
|
|
+#include <linux/bootmem.h>
|
|
+
|
|
+#include <asm/cacheflush.h>
|
|
+#include <asm/e820.h>
|
|
+#include <asm/init.h>
|
|
+#include <asm/page.h>
|
|
+#include <asm/page_types.h>
|
|
+#include <asm/sections.h>
|
|
+#include <asm/setup.h>
|
|
+#include <asm/system.h>
|
|
+#include <asm/tlbflush.h>
|
|
+
|
|
+unsigned long __meminitdata e820_table_start;
|
|
+unsigned long __meminitdata e820_table_end;
|
|
+unsigned long __meminitdata e820_table_top;
|
|
+
|
|
+int after_bootmem;
|
|
+
|
|
+#if !defined(CONFIG_XEN)
|
|
+int direct_gbpages
|
|
+#ifdef CONFIG_DIRECT_GBPAGES
|
|
+ = 1
|
|
+#endif
|
|
+;
|
|
+#elif defined(CONFIG_X86_32)
|
|
+#define direct_gbpages 0
|
|
+extern unsigned long extend_init_mapping(unsigned long tables_space);
|
|
+#else
|
|
+extern void xen_finish_init_mapping(void);
|
|
+#endif
|
|
+
|
|
+static void __init find_early_table_space(unsigned long end, int use_pse,
|
|
+ int use_gbpages)
|
|
+{
|
|
+ unsigned long puds, pmds, ptes, tables;
|
|
+
|
|
+ puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
|
|
+ tables = roundup(puds * sizeof(pud_t), PAGE_SIZE);
|
|
+
|
|
+ if (use_gbpages) {
|
|
+ unsigned long extra;
|
|
+
|
|
+ extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT);
|
|
+ pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT;
|
|
+ } else
|
|
+ pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
|
|
+
|
|
+ tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE);
|
|
+
|
|
+ if (use_pse) {
|
|
+ unsigned long extra;
|
|
+
|
|
+ extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT);
|
|
+#ifdef CONFIG_X86_32
|
|
+ extra += PMD_SIZE;
|
|
+#endif
|
|
+ ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
|
+ } else
|
|
+ ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
|
+
|
|
+ tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE);
|
|
+
|
|
+#ifdef CONFIG_X86_32
|
|
+ /* for fixmap */
|
|
+ tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE);
|
|
+#endif
|
|
+
|
|
+ /*
|
|
+ * RED-PEN putting page tables only on node 0 could
|
|
+ * cause a hotspot and fill up ZONE_DMA. The page tables
|
|
+ * need roughly 0.5KB per GB.
|
|
+ */
|
|
+#ifdef CONFIG_X86_32
|
|
+ e820_table_start = extend_init_mapping(tables);
|
|
+ e820_table_end = e820_table_start;
|
|
+#else /* CONFIG_X86_64 */
|
|
+ if (!e820_table_top) {
|
|
+ e820_table_start = (__pa(xen_start_info->pt_base) >> PAGE_SHIFT) +
|
|
+ xen_start_info->nr_pt_frames;
|
|
+ e820_table_end = e820_table_start;
|
|
+ } else {
|
|
+ /*
|
|
+ * [table_start, table_top) gets passed to reserve_early(),
|
|
+ * so we must not use table_end here, despite continuing
|
|
+ * to allocate from there. table_end possibly being below
|
|
+ * table_start is otoh not a problem.
|
|
+ */
|
|
+ e820_table_start = e820_table_top;
|
|
+ }
|
|
+#endif
|
|
+ if (e820_table_start == -1UL)
|
|
+ panic("Cannot find space for the kernel page tables");
|
|
+
|
|
+ e820_table_top = e820_table_start + (tables >> PAGE_SHIFT);
|
|
+
|
|
+ printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n",
|
|
+ end, e820_table_start << PAGE_SHIFT, e820_table_top << PAGE_SHIFT);
|
|
+}
|
|
+
|
|
+struct map_range {
|
|
+ unsigned long start;
|
|
+ unsigned long end;
|
|
+ unsigned page_size_mask;
|
|
+};
|
|
+
|
|
+#ifdef CONFIG_X86_32
|
|
+#define NR_RANGE_MR 3
|
|
+#else /* CONFIG_X86_64 */
|
|
+#define NR_RANGE_MR 5
|
|
+#endif
|
|
+
|
|
+static int __meminit save_mr(struct map_range *mr, int nr_range,
|
|
+ unsigned long start_pfn, unsigned long end_pfn,
|
|
+ unsigned long page_size_mask)
|
|
+{
|
|
+ if (start_pfn < end_pfn) {
|
|
+ if (nr_range >= NR_RANGE_MR)
|
|
+ panic("run out of range for init_memory_mapping\n");
|
|
+ mr[nr_range].start = start_pfn<<PAGE_SHIFT;
|
|
+ mr[nr_range].end = end_pfn<<PAGE_SHIFT;
|
|
+ mr[nr_range].page_size_mask = page_size_mask;
|
|
+ nr_range++;
|
|
+ }
|
|
+
|
|
+ return nr_range;
|
|
+}
|
|
+
|
|
+#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
|
|
+static void __init init_gbpages(void)
|
|
+{
|
|
+ if (direct_gbpages && cpu_has_gbpages)
|
|
+ printk(KERN_INFO "Using GB pages for direct mapping\n");
|
|
+ else
|
|
+ direct_gbpages = 0;
|
|
+}
|
|
+#else
|
|
+static inline void init_gbpages(void)
|
|
+{
|
|
+}
|
|
+#endif
|
|
+
|
|
+/*
|
|
+ * Setup the direct mapping of the physical memory at PAGE_OFFSET.
|
|
+ * This runs before bootmem is initialized and gets pages directly from
|
|
+ * the physical memory. To access them they are temporarily mapped.
|
|
+ */
|
|
+unsigned long __init_refok init_memory_mapping(unsigned long start,
|
|
+ unsigned long end)
|
|
+{
|
|
+ unsigned long page_size_mask = 0;
|
|
+ unsigned long start_pfn, end_pfn;
|
|
+ unsigned long ret = 0;
|
|
+ unsigned long pos;
|
|
+
|
|
+ struct map_range mr[NR_RANGE_MR];
|
|
+ int nr_range, i;
|
|
+ int use_pse, use_gbpages;
|
|
+
|
|
+ printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end);
|
|
+
|
|
+ if (!after_bootmem)
|
|
+ init_gbpages();
|
|
+
|
|
+#ifdef CONFIG_DEBUG_PAGEALLOC
|
|
+ /*
|
|
+ * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
|
|
+ * This will simplify cpa(), which otherwise needs to support splitting
|
|
+ * large pages into small in interrupt context, etc.
|
|
+ */
|
|
+ use_pse = use_gbpages = 0;
|
|
+#else
|
|
+ use_pse = cpu_has_pse;
|
|
+ use_gbpages = direct_gbpages;
|
|
+#endif
|
|
+
|
|
+#ifdef CONFIG_X86_32
|
|
+#ifdef CONFIG_X86_PAE
|
|
+ set_nx();
|
|
+ if (nx_enabled)
|
|
+ printk(KERN_INFO "NX (Execute Disable) protection: active\n");
|
|
+#endif
|
|
+
|
|
+ /* Enable PSE if available */
|
|
+ if (cpu_has_pse)
|
|
+ set_in_cr4(X86_CR4_PSE);
|
|
+
|
|
+ /* Enable PGE if available */
|
|
+ if (cpu_has_pge) {
|
|
+ set_in_cr4(X86_CR4_PGE);
|
|
+ __supported_pte_mask |= _PAGE_GLOBAL;
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ if (use_gbpages)
|
|
+ page_size_mask |= 1 << PG_LEVEL_1G;
|
|
+ if (use_pse)
|
|
+ page_size_mask |= 1 << PG_LEVEL_2M;
|
|
+
|
|
+ memset(mr, 0, sizeof(mr));
|
|
+ nr_range = 0;
|
|
+
|
|
+ /* head if not big page alignment ? */
|
|
+ start_pfn = start >> PAGE_SHIFT;
|
|
+ pos = start_pfn << PAGE_SHIFT;
|
|
+#ifdef CONFIG_X86_32
|
|
+ /*
|
|
+ * Don't use a large page for the first 2/4MB of memory
|
|
+ * because there are often fixed size MTRRs in there
|
|
+ * and overlapping MTRRs into large pages can cause
|
|
+ * slowdowns.
|
|
+ */
|
|
+ if (pos == 0)
|
|
+ end_pfn = 1<<(PMD_SHIFT - PAGE_SHIFT);
|
|
+ else
|
|
+ end_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
|
|
+ << (PMD_SHIFT - PAGE_SHIFT);
|
|
+#else /* CONFIG_X86_64 */
|
|
+ end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT)
|
|
+ << (PMD_SHIFT - PAGE_SHIFT);
|
|
+#endif
|
|
+ if (end_pfn > (end >> PAGE_SHIFT))
|
|
+ end_pfn = end >> PAGE_SHIFT;
|
|
+ if (start_pfn < end_pfn) {
|
|
+ nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
|
|
+ pos = end_pfn << PAGE_SHIFT;
|
|
+ }
|
|
+
|
|
+ /* big page (2M) range */
|
|
+ start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
|
|
+ << (PMD_SHIFT - PAGE_SHIFT);
|
|
+#ifdef CONFIG_X86_32
|
|
+ end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
|
|
+#else /* CONFIG_X86_64 */
|
|
+ end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT)
|
|
+ << (PUD_SHIFT - PAGE_SHIFT);
|
|
+ if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)))
|
|
+ end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT));
|
|
+#endif
|
|
+
|
|
+ if (start_pfn < end_pfn) {
|
|
+ nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
|
|
+ page_size_mask & (1<<PG_LEVEL_2M));
|
|
+ pos = end_pfn << PAGE_SHIFT;
|
|
+ }
|
|
+
|
|
+#ifdef CONFIG_X86_64
|
|
+ /* big page (1G) range */
|
|
+ start_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT)
|
|
+ << (PUD_SHIFT - PAGE_SHIFT);
|
|
+ end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT);
|
|
+ if (start_pfn < end_pfn) {
|
|
+ nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
|
|
+ page_size_mask &
|
|
+ ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G)));
|
|
+ pos = end_pfn << PAGE_SHIFT;
|
|
+ }
|
|
+
|
|
+ /* tail is not big page (1G) alignment */
|
|
+ start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
|
|
+ << (PMD_SHIFT - PAGE_SHIFT);
|
|
+ end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
|
|
+ if (start_pfn < end_pfn) {
|
|
+ nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
|
|
+ page_size_mask & (1<<PG_LEVEL_2M));
|
|
+ pos = end_pfn << PAGE_SHIFT;
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ /* tail is not big page (2M) alignment */
|
|
+ start_pfn = pos>>PAGE_SHIFT;
|
|
+ end_pfn = end>>PAGE_SHIFT;
|
|
+ nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
|
|
+
|
|
+ /* try to merge same page size and continuous */
|
|
+ for (i = 0; nr_range > 1 && i < nr_range - 1; i++) {
|
|
+ unsigned long old_start;
|
|
+ if (mr[i].end != mr[i+1].start ||
|
|
+ mr[i].page_size_mask != mr[i+1].page_size_mask)
|
|
+ continue;
|
|
+ /* move it */
|
|
+ old_start = mr[i].start;
|
|
+ memmove(&mr[i], &mr[i+1],
|
|
+ (nr_range - 1 - i) * sizeof(struct map_range));
|
|
+ mr[i--].start = old_start;
|
|
+ nr_range--;
|
|
+ }
|
|
+
|
|
+ for (i = 0; i < nr_range; i++)
|
|
+ printk(KERN_DEBUG " %010lx - %010lx page %s\n",
|
|
+ mr[i].start, mr[i].end,
|
|
+ (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":(
|
|
+ (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k"));
|
|
+
|
|
+ /*
|
|
+ * Find space for the kernel direct mapping tables.
|
|
+ *
|
|
+ * Later we should allocate these tables in the local node of the
|
|
+ * memory mapped. Unfortunately this is done currently before the
|
|
+ * nodes are discovered.
|
|
+ */
|
|
+ if (!after_bootmem)
|
|
+ find_early_table_space(end, use_pse, use_gbpages);
|
|
+
|
|
+#ifdef CONFIG_X86_32
|
|
+ for (i = 0; i < nr_range; i++)
|
|
+ kernel_physical_mapping_init(mr[i].start, mr[i].end,
|
|
+ mr[i].page_size_mask);
|
|
+ ret = end;
|
|
+#else /* CONFIG_X86_64 */
|
|
+#define addr_to_page(addr) \
|
|
+ ((unsigned long *) \
|
|
+ ((mfn_to_pfn(((addr) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT) \
|
|
+ << PAGE_SHIFT) + __START_KERNEL_map))
|
|
+
|
|
+ if (!start) {
|
|
+ unsigned long addr, va = __START_KERNEL_map;
|
|
+ unsigned long *page = (unsigned long *)init_level4_pgt;
|
|
+
|
|
+ /* Kill mapping of memory below _text. */
|
|
+ while (va < (unsigned long)&_text) {
|
|
+ if (HYPERVISOR_update_va_mapping(va, __pte_ma(0), 0))
|
|
+ BUG();
|
|
+ va += PAGE_SIZE;
|
|
+ }
|
|
+
|
|
+ /* Blow away any spurious initial mappings. */
|
|
+ va = __START_KERNEL_map + (e820_table_start << PAGE_SHIFT);
|
|
+
|
|
+ addr = page[pgd_index(va)];
|
|
+ page = addr_to_page(addr);
|
|
+ addr = page[pud_index(va)];
|
|
+ page = addr_to_page(addr);
|
|
+ while (pmd_index(va) | pte_index(va)) {
|
|
+ if (pmd_none(*(pmd_t *)&page[pmd_index(va)]))
|
|
+ break;
|
|
+ if (HYPERVISOR_update_va_mapping(va, __pte_ma(0), 0))
|
|
+ BUG();
|
|
+ va += PAGE_SIZE;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ for (i = 0; i < nr_range; i++)
|
|
+ ret = kernel_physical_mapping_init(mr[i].start, mr[i].end,
|
|
+ mr[i].page_size_mask);
|
|
+#undef addr_to_page
|
|
+#endif
|
|
+
|
|
+#ifdef CONFIG_X86_32
|
|
+ early_ioremap_page_table_range_init();
|
|
+#endif
|
|
+
|
|
+#ifdef CONFIG_X86_64
|
|
+ BUG_ON(e820_table_end > e820_table_top);
|
|
+ if (!start)
|
|
+ xen_finish_init_mapping();
|
|
+ else
|
|
+#endif
|
|
+ if (e820_table_end < e820_table_top)
|
|
+ /* Disable the 'table_end' allocator. */
|
|
+ e820_table_top = e820_table_end;
|
|
+
|
|
+ __flush_tlb_all();
|
|
+
|
|
+ if (!after_bootmem && e820_table_top > e820_table_start)
|
|
+ reserve_early(e820_table_start << PAGE_SHIFT,
|
|
+ e820_table_top << PAGE_SHIFT, "PGTABLE");
|
|
+
|
|
+ if (!after_bootmem)
|
|
+ early_memtest(start, end);
|
|
+
|
|
+ return ret >> PAGE_SHIFT;
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ * devmem_is_allowed() checks to see if /dev/mem access to a certain address
|
|
+ * is valid. The argument is a physical page number.
|
|
+ *
|
|
+ *
|
|
+ * On x86, access has to be given to the first megabyte of ram because that area
|
|
+ * contains bios code and data regions used by X and dosemu and similar apps.
|
|
+ * Access has to be given to non-kernel-ram areas as well, these contain the PCI
|
|
+ * mmio resources as well as potential bios/acpi data regions.
|
|
+ */
|
|
+int devmem_is_allowed(unsigned long pagenr)
|
|
+{
|
|
+ if (pagenr <= 256)
|
|
+ return 1;
|
|
+ if (iomem_is_exclusive(pagenr << PAGE_SHIFT))
|
|
+ return 0;
|
|
+ if (mfn_to_local_pfn(pagenr) >= max_pfn)
|
|
+ return 1;
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+void free_init_pages(char *what, unsigned long begin, unsigned long end)
|
|
+{
|
|
+ unsigned long addr = begin;
|
|
+
|
|
+ if (addr >= end)
|
|
+ return;
|
|
+
|
|
+ /*
|
|
+ * If debugging page accesses then do not free this memory but
|
|
+ * mark them not present - any buggy init-section access will
|
|
+ * create a kernel page fault:
|
|
+ */
|
|
+#ifdef CONFIG_DEBUG_PAGEALLOC
|
|
+ printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n",
|
|
+ begin, PAGE_ALIGN(end));
|
|
+ set_memory_np(begin, (end - begin) >> PAGE_SHIFT);
|
|
+#else
|
|
+ /*
|
|
+ * We just marked the kernel text read only above, now that
|
|
+ * we are going to free part of that, we need to make that
|
|
+ * writeable first.
|
|
+ */
|
|
+ set_memory_rw(begin, (end - begin) >> PAGE_SHIFT);
|
|
+
|
|
+ printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
|
|
+
|
|
+ for (; addr < end; addr += PAGE_SIZE) {
|
|
+ ClearPageReserved(virt_to_page(addr));
|
|
+ init_page_count(virt_to_page(addr));
|
|
+ memset((void *)(addr & ~(PAGE_SIZE-1)),
|
|
+ POISON_FREE_INITMEM, PAGE_SIZE);
|
|
+#ifdef CONFIG_X86_64
|
|
+ if (addr >= __START_KERNEL_map) {
|
|
+ /* make_readonly() reports all kernel addresses. */
|
|
+ if (HYPERVISOR_update_va_mapping((unsigned long)__va(__pa(addr)),
|
|
+ pfn_pte(__pa(addr) >> PAGE_SHIFT,
|
|
+ PAGE_KERNEL),
|
|
+ 0))
|
|
+ BUG();
|
|
+ if (HYPERVISOR_update_va_mapping(addr, __pte(0), 0))
|
|
+ BUG();
|
|
+ }
|
|
+#endif
|
|
+ free_page(addr);
|
|
+ totalram_pages++;
|
|
+ }
|
|
+#endif
|
|
+}
|
|
+
|
|
+void free_initmem(void)
|
|
+{
|
|
+ free_init_pages("unused kernel memory",
|
|
+ (unsigned long)(&__init_begin),
|
|
+ (unsigned long)(&__init_end));
|
|
+}
|
|
+
|
|
+#ifdef CONFIG_BLK_DEV_INITRD
|
|
+void free_initrd_mem(unsigned long start, unsigned long end)
|
|
+{
|
|
+ free_init_pages("initrd memory", start, end);
|
|
+}
|
|
+#endif
|
|
--- head-2010-05-25.orig/arch/x86/mm/init_32-xen.c 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/mm/init_32-xen.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -52,9 +52,7 @@
|
|
#include <asm/swiotlb.h>
|
|
#include <asm/setup.h>
|
|
#include <asm/cacheflush.h>
|
|
-#include <asm/smp.h>
|
|
-
|
|
-unsigned int __VMALLOC_RESERVE = 128 << 20;
|
|
+#include <asm/init.h>
|
|
|
|
unsigned long max_low_pfn_mapped;
|
|
unsigned long max_pfn_mapped;
|
|
@@ -64,19 +62,14 @@ unsigned long highstart_pfn, highend_pfn
|
|
|
|
static noinline int do_test_wp_bit(void);
|
|
|
|
-
|
|
-static unsigned long __initdata table_start;
|
|
-static unsigned long __initdata table_end;
|
|
-static unsigned long __initdata table_top;
|
|
-
|
|
-static int __initdata after_init_bootmem;
|
|
+bool __read_mostly __vmalloc_start_set = false;
|
|
|
|
static __init void *alloc_low_page(void)
|
|
{
|
|
- unsigned long pfn = table_end++;
|
|
+ unsigned long pfn = e820_table_end++;
|
|
void *adr;
|
|
|
|
- if (pfn >= table_top)
|
|
+ if (pfn >= e820_table_top)
|
|
panic("alloc_low_page: ran out of memory");
|
|
|
|
adr = __va(pfn * PAGE_SIZE);
|
|
@@ -96,7 +89,7 @@ static pmd_t * __init one_md_table_init(
|
|
|
|
#ifdef CONFIG_X86_PAE
|
|
if (!(__pgd_val(*pgd) & _PAGE_PRESENT)) {
|
|
- if (after_init_bootmem)
|
|
+ if (after_bootmem)
|
|
pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
|
|
else
|
|
pmd_table = (pmd_t *)alloc_low_page();
|
|
@@ -128,7 +121,7 @@ static pte_t * __init one_page_table_ini
|
|
#endif
|
|
pte_t *page_table = NULL;
|
|
|
|
- if (after_init_bootmem) {
|
|
+ if (after_bootmem) {
|
|
#ifdef CONFIG_DEBUG_PAGEALLOC
|
|
page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
|
|
#endif
|
|
@@ -148,6 +141,23 @@ static pte_t * __init one_page_table_ini
|
|
return pte_offset_kernel(pmd, 0);
|
|
}
|
|
|
|
+pmd_t * __init populate_extra_pmd(unsigned long vaddr)
|
|
+{
|
|
+ int pgd_idx = pgd_index(vaddr);
|
|
+ int pmd_idx = pmd_index(vaddr);
|
|
+
|
|
+ return one_md_table_init(swapper_pg_dir + pgd_idx) + pmd_idx;
|
|
+}
|
|
+
|
|
+pte_t * __init populate_extra_pte(unsigned long vaddr)
|
|
+{
|
|
+ int pte_idx = pte_index(vaddr);
|
|
+ pmd_t *pmd;
|
|
+
|
|
+ pmd = populate_extra_pmd(vaddr);
|
|
+ return one_page_table_init(pmd) + pte_idx;
|
|
+}
|
|
+
|
|
static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd,
|
|
unsigned long vaddr, pte_t *lastpte)
|
|
{
|
|
@@ -164,12 +174,12 @@ static pte_t *__init page_table_kmap_che
|
|
if (pmd_idx_kmap_begin != pmd_idx_kmap_end
|
|
&& (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin
|
|
&& (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end
|
|
- && ((__pa(pte) >> PAGE_SHIFT) < table_start
|
|
- || (__pa(pte) >> PAGE_SHIFT) >= table_end)) {
|
|
+ && ((__pa(pte) >> PAGE_SHIFT) < e820_table_start
|
|
+ || (__pa(pte) >> PAGE_SHIFT) >= e820_table_end)) {
|
|
pte_t *newpte;
|
|
int i;
|
|
|
|
- BUG_ON(after_init_bootmem);
|
|
+ BUG_ON(after_bootmem);
|
|
newpte = alloc_low_page();
|
|
for (i = 0; i < PTRS_PER_PTE; i++)
|
|
set_pte(newpte + i, pte[i]);
|
|
@@ -244,11 +254,14 @@ static inline int is_kernel_text(unsigne
|
|
* of max_low_pfn pages, by creating page tables starting from address
|
|
* PAGE_OFFSET:
|
|
*/
|
|
-static void __init kernel_physical_mapping_init(pgd_t *pgd_base,
|
|
- unsigned long start_pfn,
|
|
- unsigned long end_pfn,
|
|
- int use_pse)
|
|
+unsigned long __init
|
|
+kernel_physical_mapping_init(unsigned long start,
|
|
+ unsigned long end,
|
|
+ unsigned long page_size_mask)
|
|
{
|
|
+ int use_pse = page_size_mask == (1<<PG_LEVEL_2M);
|
|
+ unsigned long start_pfn, end_pfn;
|
|
+ pgd_t *pgd_base = swapper_pg_dir;
|
|
int pgd_idx, pmd_idx, pte_ofs;
|
|
unsigned long pfn;
|
|
pgd_t *pgd;
|
|
@@ -257,6 +270,9 @@ static void __init kernel_physical_mappi
|
|
unsigned pages_2m, pages_4k;
|
|
int mapping_iter;
|
|
|
|
+ start_pfn = start >> PAGE_SHIFT;
|
|
+ end_pfn = end >> PAGE_SHIFT;
|
|
+
|
|
/*
|
|
* First iteration will setup identity mapping using large/small pages
|
|
* based on use_pse, with other attributes same as set by
|
|
@@ -391,26 +407,6 @@ repeat:
|
|
mapping_iter = 2;
|
|
goto repeat;
|
|
}
|
|
-}
|
|
-
|
|
-/*
|
|
- * devmem_is_allowed() checks to see if /dev/mem access to a certain address
|
|
- * is valid. The argument is a physical page number.
|
|
- *
|
|
- *
|
|
- * On x86, access has to be given to the first megabyte of ram because that area
|
|
- * contains bios code and data regions used by X and dosemu and similar apps.
|
|
- * Access has to be given to non-kernel-ram areas as well, these contain the PCI
|
|
- * mmio resources as well as potential bios/acpi data regions.
|
|
- */
|
|
-int devmem_is_allowed(unsigned long pagenr)
|
|
-{
|
|
- if (pagenr <= 256)
|
|
- return 1;
|
|
- if (iomem_is_exclusive(pagenr << PAGE_SHIFT))
|
|
- return 0;
|
|
- if (mfn_to_local_pfn(pagenr) >= max_pfn)
|
|
- return 1;
|
|
return 0;
|
|
}
|
|
|
|
@@ -506,30 +502,10 @@ void __init add_highpages_with_active_re
|
|
work_with_active_regions(nid, add_highpages_work_fn, &data);
|
|
}
|
|
|
|
-#ifndef CONFIG_NUMA
|
|
-static void __init set_highmem_pages_init(void)
|
|
-{
|
|
- int pfn;
|
|
-
|
|
- add_highpages_with_active_regions(0, highstart_pfn, highend_pfn);
|
|
-
|
|
- /* XEN: init high-mem pages outside initial allocation. */
|
|
- for (pfn = xen_start_info->nr_pages; pfn < highend_pfn; pfn++) {
|
|
- ClearPageReserved(pfn_to_page(pfn));
|
|
- init_page_count(pfn_to_page(pfn));
|
|
- }
|
|
-
|
|
- totalram_pages += totalhigh_pages;
|
|
-}
|
|
-#endif /* !CONFIG_NUMA */
|
|
-
|
|
#else
|
|
static inline void permanent_kmaps_init(pgd_t *pgd_base)
|
|
{
|
|
}
|
|
-static inline void set_highmem_pages_init(void)
|
|
-{
|
|
-}
|
|
#endif /* CONFIG_HIGHMEM */
|
|
|
|
pgd_t *swapper_pg_dir;
|
|
@@ -553,8 +529,9 @@ pgd_t *swapper_pg_dir;
|
|
* be partially populated, and so it avoids stomping on any existing
|
|
* mappings.
|
|
*/
|
|
-static void __init early_ioremap_page_table_range_init(pgd_t *pgd_base)
|
|
+void __init early_ioremap_page_table_range_init(void)
|
|
{
|
|
+ pgd_t *pgd_base = swapper_pg_dir;
|
|
unsigned long vaddr, end;
|
|
|
|
/*
|
|
@@ -649,7 +626,7 @@ static int __init noexec_setup(char *str
|
|
}
|
|
early_param("noexec", noexec_setup);
|
|
|
|
-static void __init set_nx(void)
|
|
+void __init set_nx(void)
|
|
{
|
|
unsigned int v[4], l, h;
|
|
|
|
@@ -685,75 +662,97 @@ static int __init parse_highmem(char *ar
|
|
}
|
|
early_param("highmem", parse_highmem);
|
|
|
|
+#define MSG_HIGHMEM_TOO_BIG \
|
|
+ "highmem size (%luMB) is bigger than pages available (%luMB)!\n"
|
|
+
|
|
+#define MSG_LOWMEM_TOO_SMALL \
|
|
+ "highmem size (%luMB) results in <64MB lowmem, ignoring it!\n"
|
|
/*
|
|
- * Determine low and high memory ranges:
|
|
+ * All of RAM fits into lowmem - but if user wants highmem
|
|
+ * artificially via the highmem=x boot parameter then create
|
|
+ * it:
|
|
*/
|
|
-void __init find_low_pfn_range(void)
|
|
+void __init lowmem_pfn_init(void)
|
|
{
|
|
- /* it could update max_pfn */
|
|
-
|
|
/* max_low_pfn is 0, we already have early_res support */
|
|
-
|
|
max_low_pfn = max_pfn;
|
|
- if (max_low_pfn > MAXMEM_PFN) {
|
|
- if (highmem_pages == -1)
|
|
- highmem_pages = max_pfn - MAXMEM_PFN;
|
|
- if (highmem_pages + MAXMEM_PFN < max_pfn)
|
|
- max_pfn = MAXMEM_PFN + highmem_pages;
|
|
- if (highmem_pages + MAXMEM_PFN > max_pfn) {
|
|
- printk(KERN_WARNING "only %luMB highmem pages "
|
|
- "available, ignoring highmem size of %uMB.\n",
|
|
- pages_to_mb(max_pfn - MAXMEM_PFN),
|
|
+
|
|
+ if (highmem_pages == -1)
|
|
+ highmem_pages = 0;
|
|
+#ifdef CONFIG_HIGHMEM
|
|
+ if (highmem_pages >= max_pfn) {
|
|
+ printk(KERN_ERR MSG_HIGHMEM_TOO_BIG,
|
|
+ pages_to_mb(highmem_pages), pages_to_mb(max_pfn));
|
|
+ highmem_pages = 0;
|
|
+ }
|
|
+ if (highmem_pages) {
|
|
+ if (max_low_pfn - highmem_pages < 64*1024*1024/PAGE_SIZE) {
|
|
+ printk(KERN_ERR MSG_LOWMEM_TOO_SMALL,
|
|
pages_to_mb(highmem_pages));
|
|
highmem_pages = 0;
|
|
}
|
|
- max_low_pfn = MAXMEM_PFN;
|
|
+ max_low_pfn -= highmem_pages;
|
|
+ }
|
|
+#else
|
|
+ if (highmem_pages)
|
|
+ printk(KERN_ERR "ignoring highmem size on non-highmem kernel!\n");
|
|
+#endif
|
|
+}
|
|
+
|
|
+#define MSG_HIGHMEM_TOO_SMALL \
|
|
+ "only %luMB highmem pages available, ignoring highmem size of %luMB!\n"
|
|
+
|
|
+#define MSG_HIGHMEM_TRIMMED \
|
|
+ "Warning: only 4GB will be used. Use a HIGHMEM64G enabled kernel!\n"
|
|
+/*
|
|
+ * We have more RAM than fits into lowmem - we try to put it into
|
|
+ * highmem, also taking the highmem=x boot parameter into account:
|
|
+ */
|
|
+void __init highmem_pfn_init(void)
|
|
+{
|
|
+ max_low_pfn = MAXMEM_PFN;
|
|
+
|
|
+ if (highmem_pages == -1)
|
|
+ highmem_pages = max_pfn - MAXMEM_PFN;
|
|
+
|
|
+ if (highmem_pages + MAXMEM_PFN < max_pfn)
|
|
+ max_pfn = MAXMEM_PFN + highmem_pages;
|
|
+
|
|
+ if (highmem_pages + MAXMEM_PFN > max_pfn) {
|
|
+ printk(KERN_WARNING MSG_HIGHMEM_TOO_SMALL,
|
|
+ pages_to_mb(max_pfn - MAXMEM_PFN),
|
|
+ pages_to_mb(highmem_pages));
|
|
+ highmem_pages = 0;
|
|
+ }
|
|
#ifndef CONFIG_HIGHMEM
|
|
- /* Maximum memory usable is what is directly addressable */
|
|
- printk(KERN_WARNING "Warning only %ldMB will be used.\n",
|
|
- MAXMEM>>20);
|
|
- if (max_pfn > MAX_NONPAE_PFN)
|
|
- printk(KERN_WARNING
|
|
- "Use a HIGHMEM64G enabled kernel.\n");
|
|
- else
|
|
- printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
|
|
- max_pfn = MAXMEM_PFN;
|
|
+ /* Maximum memory usable is what is directly addressable */
|
|
+ printk(KERN_WARNING "Warning only %ldMB will be used.\n", MAXMEM>>20);
|
|
+ if (max_pfn > MAX_NONPAE_PFN)
|
|
+ printk(KERN_WARNING "Use a HIGHMEM64G enabled kernel.\n");
|
|
+ else
|
|
+ printk(KERN_WARNING "Use a HIGHMEM enabled kernel.\n");
|
|
+ max_pfn = MAXMEM_PFN;
|
|
#else /* !CONFIG_HIGHMEM */
|
|
#ifndef CONFIG_HIGHMEM64G
|
|
- if (max_pfn > MAX_NONPAE_PFN) {
|
|
- max_pfn = MAX_NONPAE_PFN;
|
|
- printk(KERN_WARNING "Warning only 4GB will be used."
|
|
- "Use a HIGHMEM64G enabled kernel.\n");
|
|
- }
|
|
+ if (max_pfn > MAX_NONPAE_PFN) {
|
|
+ max_pfn = MAX_NONPAE_PFN;
|
|
+ printk(KERN_WARNING MSG_HIGHMEM_TRIMMED);
|
|
+ }
|
|
#endif /* !CONFIG_HIGHMEM64G */
|
|
#endif /* !CONFIG_HIGHMEM */
|
|
- } else {
|
|
- if (highmem_pages == -1)
|
|
- highmem_pages = 0;
|
|
-#ifdef CONFIG_HIGHMEM
|
|
- if (highmem_pages >= max_pfn) {
|
|
- printk(KERN_ERR "highmem size specified (%uMB) is "
|
|
- "bigger than pages available (%luMB)!.\n",
|
|
- pages_to_mb(highmem_pages),
|
|
- pages_to_mb(max_pfn));
|
|
- highmem_pages = 0;
|
|
- }
|
|
- if (highmem_pages) {
|
|
- if (max_low_pfn - highmem_pages <
|
|
- 64*1024*1024/PAGE_SIZE){
|
|
- printk(KERN_ERR "highmem size %uMB results in "
|
|
- "smaller than 64MB lowmem, ignoring it.\n"
|
|
- , pages_to_mb(highmem_pages));
|
|
- highmem_pages = 0;
|
|
- }
|
|
- max_low_pfn -= highmem_pages;
|
|
- }
|
|
-#else
|
|
- if (highmem_pages)
|
|
- printk(KERN_ERR "ignoring highmem size on non-highmem"
|
|
- " kernel!\n");
|
|
-#endif
|
|
- }
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Determine low and high memory ranges:
|
|
+ */
|
|
+void __init find_low_pfn_range(void)
|
|
+{
|
|
+ /* it could update max_pfn */
|
|
+
|
|
+ if (max_pfn <= MAXMEM_PFN)
|
|
+ lowmem_pfn_init();
|
|
+ else
|
|
+ highmem_pfn_init();
|
|
}
|
|
|
|
#ifndef CONFIG_NEED_MULTIPLE_NODES
|
|
@@ -779,6 +778,8 @@ void __init initmem_init(unsigned long s
|
|
#ifdef CONFIG_FLATMEM
|
|
max_mapnr = num_physpages;
|
|
#endif
|
|
+ __vmalloc_start_set = true;
|
|
+
|
|
printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
|
|
pages_to_mb(max_low_pfn));
|
|
|
|
@@ -800,40 +801,70 @@ static void __init zone_sizes_init(void)
|
|
free_area_init_nodes(max_zone_pfns);
|
|
}
|
|
|
|
+static unsigned long __init setup_node_bootmem(int nodeid,
|
|
+ unsigned long start_pfn,
|
|
+ unsigned long end_pfn,
|
|
+ unsigned long bootmap)
|
|
+{
|
|
+ unsigned long bootmap_size;
|
|
+
|
|
+ /* don't touch min_low_pfn */
|
|
+ bootmap_size = init_bootmem_node(NODE_DATA(nodeid),
|
|
+ bootmap >> PAGE_SHIFT,
|
|
+ start_pfn, end_pfn);
|
|
+ printk(KERN_INFO " node %d low ram: %08lx - %08lx\n",
|
|
+ nodeid, start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
|
|
+ printk(KERN_INFO " node %d bootmap %08lx - %08lx\n",
|
|
+ nodeid, bootmap, bootmap + bootmap_size);
|
|
+ free_bootmem_with_active_regions(nodeid, end_pfn);
|
|
+ early_res_to_bootmem(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
|
|
+
|
|
+ return bootmap + bootmap_size;
|
|
+}
|
|
+
|
|
void __init setup_bootmem_allocator(void)
|
|
{
|
|
- int i;
|
|
+ int nodeid;
|
|
unsigned long bootmap_size, bootmap;
|
|
- unsigned long end_pfn = min(max_low_pfn, xen_start_info->nr_pages);
|
|
+ unsigned long end_xen_pfn = min(max_low_pfn, xen_start_info->nr_pages);
|
|
|
|
/*
|
|
* Initialize the boot-time allocator (with low memory only):
|
|
*/
|
|
- bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
|
|
- bootmap = find_e820_area(min_low_pfn<<PAGE_SHIFT,
|
|
- min(max_pfn_mapped, xen_start_info->nr_pages)<<PAGE_SHIFT,
|
|
+ bootmap_size = bootmem_bootmap_pages(end_xen_pfn)<<PAGE_SHIFT;
|
|
+ bootmap = find_e820_area(0, min(max_pfn_mapped,
|
|
+ xen_start_info->nr_pages)<<PAGE_SHIFT,
|
|
bootmap_size, PAGE_SIZE);
|
|
if (bootmap == -1L)
|
|
panic("Cannot find bootmem map of size %ld\n", bootmap_size);
|
|
reserve_early(bootmap, bootmap + bootmap_size, "BOOTMAP");
|
|
|
|
- /* don't touch min_low_pfn */
|
|
- bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap >> PAGE_SHIFT,
|
|
- min_low_pfn, end_pfn);
|
|
printk(KERN_INFO " mapped low ram: 0 - %08lx\n",
|
|
max_pfn_mapped<<PAGE_SHIFT);
|
|
- printk(KERN_INFO " low ram: %08lx - %08lx\n",
|
|
- min_low_pfn<<PAGE_SHIFT, max_low_pfn<<PAGE_SHIFT);
|
|
- printk(KERN_INFO " bootmap %08lx - %08lx\n",
|
|
- bootmap, bootmap + bootmap_size);
|
|
- for_each_online_node(i)
|
|
- free_bootmem_with_active_regions(i, end_pfn);
|
|
- early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT);
|
|
+ printk(KERN_INFO " low ram: 0 - %08lx\n", max_low_pfn<<PAGE_SHIFT);
|
|
|
|
- after_init_bootmem = 1;
|
|
+ for_each_online_node(nodeid) {
|
|
+ unsigned long start_pfn, end_pfn;
|
|
+
|
|
+#ifdef CONFIG_NEED_MULTIPLE_NODES
|
|
+ start_pfn = node_start_pfn[nodeid];
|
|
+ end_pfn = node_end_pfn[nodeid];
|
|
+ if (start_pfn > end_xen_pfn)
|
|
+ continue;
|
|
+ if (end_pfn > end_xen_pfn)
|
|
+ end_pfn = end_xen_pfn;
|
|
+#else
|
|
+ start_pfn = 0;
|
|
+ end_pfn = end_xen_pfn;
|
|
+#endif
|
|
+ bootmap = setup_node_bootmem(nodeid, start_pfn, end_pfn,
|
|
+ bootmap);
|
|
+ }
|
|
+
|
|
+ after_bootmem = 1;
|
|
}
|
|
|
|
-static unsigned long __init extend_init_mapping(unsigned long tables_space)
|
|
+unsigned long __init extend_init_mapping(unsigned long tables_space)
|
|
{
|
|
unsigned long start_pfn = (__pa(xen_start_info->pt_base) >> PAGE_SHIFT)
|
|
+ xen_start_info->nr_pt_frames;
|
|
@@ -885,133 +916,6 @@ static unsigned long __init extend_init_
|
|
return start_pfn;
|
|
}
|
|
|
|
-static void __init find_early_table_space(unsigned long end, int use_pse)
|
|
-{
|
|
- unsigned long puds, pmds, ptes, tables;
|
|
-
|
|
- puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
|
|
- tables = PAGE_ALIGN(puds * sizeof(pud_t));
|
|
-
|
|
- pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
|
|
- tables += PAGE_ALIGN(pmds * sizeof(pmd_t));
|
|
-
|
|
- if (use_pse) {
|
|
- unsigned long extra;
|
|
-
|
|
- extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT);
|
|
- extra += PMD_SIZE;
|
|
- ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
|
- } else
|
|
- ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
|
-
|
|
- tables += PAGE_ALIGN(ptes * sizeof(pte_t));
|
|
-
|
|
- /* for fixmap */
|
|
- tables += PAGE_ALIGN(__end_of_fixed_addresses * sizeof(pte_t));
|
|
-
|
|
- table_start = extend_init_mapping(tables);
|
|
-
|
|
- table_end = table_start;
|
|
- table_top = table_start + (tables>>PAGE_SHIFT);
|
|
-
|
|
- printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n",
|
|
- end, table_start << PAGE_SHIFT,
|
|
- (table_start << PAGE_SHIFT) + tables);
|
|
-}
|
|
-
|
|
-unsigned long __init_refok init_memory_mapping(unsigned long start,
|
|
- unsigned long end)
|
|
-{
|
|
- pgd_t *pgd_base = swapper_pg_dir;
|
|
- unsigned long start_pfn, end_pfn;
|
|
- unsigned long big_page_start;
|
|
-#ifdef CONFIG_DEBUG_PAGEALLOC
|
|
- /*
|
|
- * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
|
|
- * This will simplify cpa(), which otherwise needs to support splitting
|
|
- * large pages into small in interrupt context, etc.
|
|
- */
|
|
- int use_pse = 0;
|
|
-#else
|
|
- int use_pse = cpu_has_pse;
|
|
-#endif
|
|
-
|
|
- /*
|
|
- * Find space for the kernel direct mapping tables.
|
|
- */
|
|
- if (!after_init_bootmem)
|
|
- find_early_table_space(end, use_pse);
|
|
-
|
|
-#ifdef CONFIG_X86_PAE
|
|
- set_nx();
|
|
- if (nx_enabled)
|
|
- printk(KERN_INFO "NX (Execute Disable) protection: active\n");
|
|
-#endif
|
|
-
|
|
- /* Enable PSE if available */
|
|
- if (cpu_has_pse)
|
|
- set_in_cr4(X86_CR4_PSE);
|
|
-
|
|
- /* Enable PGE if available */
|
|
- if (cpu_has_pge) {
|
|
- set_in_cr4(X86_CR4_PGE);
|
|
- __supported_pte_mask |= _PAGE_GLOBAL;
|
|
- }
|
|
-
|
|
- /*
|
|
- * Don't use a large page for the first 2/4MB of memory
|
|
- * because there are often fixed size MTRRs in there
|
|
- * and overlapping MTRRs into large pages can cause
|
|
- * slowdowns.
|
|
- */
|
|
- big_page_start = PMD_SIZE;
|
|
-
|
|
- if (start < big_page_start) {
|
|
- start_pfn = start >> PAGE_SHIFT;
|
|
- end_pfn = min(big_page_start>>PAGE_SHIFT, end>>PAGE_SHIFT);
|
|
- } else {
|
|
- /* head is not big page alignment ? */
|
|
- start_pfn = start >> PAGE_SHIFT;
|
|
- end_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT)
|
|
- << (PMD_SHIFT - PAGE_SHIFT);
|
|
- }
|
|
- if (start_pfn < end_pfn)
|
|
- kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn, 0);
|
|
-
|
|
- /* big page range */
|
|
- start_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT)
|
|
- << (PMD_SHIFT - PAGE_SHIFT);
|
|
- if (start_pfn < (big_page_start >> PAGE_SHIFT))
|
|
- start_pfn = big_page_start >> PAGE_SHIFT;
|
|
- end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
|
|
- if (start_pfn < end_pfn)
|
|
- kernel_physical_mapping_init(pgd_base, start_pfn, end_pfn,
|
|
- use_pse);
|
|
-
|
|
- /* tail is not big page alignment ? */
|
|
- start_pfn = end_pfn;
|
|
- if (start_pfn > (big_page_start>>PAGE_SHIFT)) {
|
|
- end_pfn = end >> PAGE_SHIFT;
|
|
- if (start_pfn < end_pfn)
|
|
- kernel_physical_mapping_init(pgd_base, start_pfn,
|
|
- end_pfn, 0);
|
|
- }
|
|
-
|
|
- early_ioremap_page_table_range_init(pgd_base);
|
|
-
|
|
- __flush_tlb_all();
|
|
-
|
|
- if (!after_init_bootmem)
|
|
- reserve_early(table_start << PAGE_SHIFT,
|
|
- table_end << PAGE_SHIFT, "PGTABLE");
|
|
-
|
|
- if (!after_init_bootmem)
|
|
- early_memtest(start, end);
|
|
-
|
|
- return end >> PAGE_SHIFT;
|
|
-}
|
|
-
|
|
-
|
|
/*
|
|
* paging_init() sets up the page tables - note that the first 8MB are
|
|
* already mapped by head.S.
|
|
@@ -1215,17 +1119,47 @@ static noinline int do_test_wp_bit(void)
|
|
const int rodata_test_data = 0xC3;
|
|
EXPORT_SYMBOL_GPL(rodata_test_data);
|
|
|
|
+static int kernel_set_to_readonly;
|
|
+
|
|
+void set_kernel_text_rw(void)
|
|
+{
|
|
+ unsigned long start = PFN_ALIGN(_text);
|
|
+ unsigned long size = PFN_ALIGN(_etext) - start;
|
|
+
|
|
+ if (!kernel_set_to_readonly)
|
|
+ return;
|
|
+
|
|
+ pr_debug("Set kernel text: %lx - %lx for read write\n",
|
|
+ start, start+size);
|
|
+
|
|
+ set_pages_rw(virt_to_page(start), size >> PAGE_SHIFT);
|
|
+}
|
|
+
|
|
+void set_kernel_text_ro(void)
|
|
+{
|
|
+ unsigned long start = PFN_ALIGN(_text);
|
|
+ unsigned long size = PFN_ALIGN(_etext) - start;
|
|
+
|
|
+ if (!kernel_set_to_readonly)
|
|
+ return;
|
|
+
|
|
+ pr_debug("Set kernel text: %lx - %lx for read only\n",
|
|
+ start, start+size);
|
|
+
|
|
+ set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
|
|
+}
|
|
+
|
|
void mark_rodata_ro(void)
|
|
{
|
|
unsigned long start = PFN_ALIGN(_text);
|
|
unsigned long size = PFN_ALIGN(_etext) - start;
|
|
|
|
-#ifndef CONFIG_DYNAMIC_FTRACE
|
|
- /* Dynamic tracing modifies the kernel text section */
|
|
set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT);
|
|
printk(KERN_INFO "Write protecting the kernel text: %luk\n",
|
|
size >> 10);
|
|
|
|
+ kernel_set_to_readonly = 1;
|
|
+
|
|
#ifdef CONFIG_CPA_DEBUG
|
|
printk(KERN_INFO "Testing CPA: Reverting %lx-%lx\n",
|
|
start, start+size);
|
|
@@ -1234,7 +1168,6 @@ void mark_rodata_ro(void)
|
|
printk(KERN_INFO "Testing CPA: write protecting again\n");
|
|
set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT);
|
|
#endif
|
|
-#endif /* CONFIG_DYNAMIC_FTRACE */
|
|
|
|
start += size;
|
|
size = (unsigned long)__end_rodata - start;
|
|
@@ -1253,52 +1186,6 @@ void mark_rodata_ro(void)
|
|
}
|
|
#endif
|
|
|
|
-void free_init_pages(char *what, unsigned long begin, unsigned long end)
|
|
-{
|
|
-#ifdef CONFIG_DEBUG_PAGEALLOC
|
|
- /*
|
|
- * If debugging page accesses then do not free this memory but
|
|
- * mark them not present - any buggy init-section access will
|
|
- * create a kernel page fault:
|
|
- */
|
|
- printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n",
|
|
- begin, PAGE_ALIGN(end));
|
|
- set_memory_np(begin, (end - begin) >> PAGE_SHIFT);
|
|
-#else
|
|
- unsigned long addr;
|
|
-
|
|
- /*
|
|
- * We just marked the kernel text read only above, now that
|
|
- * we are going to free part of that, we need to make that
|
|
- * writeable first.
|
|
- */
|
|
- set_memory_rw(begin, (end - begin) >> PAGE_SHIFT);
|
|
-
|
|
- for (addr = begin; addr < end; addr += PAGE_SIZE) {
|
|
- ClearPageReserved(virt_to_page(addr));
|
|
- init_page_count(virt_to_page(addr));
|
|
- memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE);
|
|
- free_page(addr);
|
|
- totalram_pages++;
|
|
- }
|
|
- printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
|
|
-#endif
|
|
-}
|
|
-
|
|
-void free_initmem(void)
|
|
-{
|
|
- free_init_pages("unused kernel memory",
|
|
- (unsigned long)(&__init_begin),
|
|
- (unsigned long)(&__init_end));
|
|
-}
|
|
-
|
|
-#ifdef CONFIG_BLK_DEV_INITRD
|
|
-void free_initrd_mem(unsigned long start, unsigned long end)
|
|
-{
|
|
- free_init_pages("initrd memory", start, end);
|
|
-}
|
|
-#endif
|
|
-
|
|
int __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
|
|
int flags)
|
|
{
|
|
--- head-2010-05-25.orig/arch/x86/mm/init_64-xen.c 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/mm/init_64-xen.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -51,6 +51,8 @@
|
|
#include <asm/kdebug.h>
|
|
#include <asm/numa.h>
|
|
#include <asm/cacheflush.h>
|
|
+#include <asm/init.h>
|
|
+#include <asm/setup.h>
|
|
|
|
#include <xen/features.h>
|
|
|
|
@@ -67,8 +69,6 @@ unsigned int __kernel_page_user;
|
|
EXPORT_SYMBOL(__kernel_page_user);
|
|
#endif
|
|
|
|
-int after_bootmem;
|
|
-
|
|
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
|
|
|
|
extern pmd_t level2_fixmap_pgt[PTRS_PER_PMD];
|
|
@@ -127,12 +127,6 @@ void __meminit early_make_page_readonly(
|
|
}
|
|
|
|
#ifndef CONFIG_XEN
|
|
-int direct_gbpages
|
|
-#ifdef CONFIG_DIRECT_GBPAGES
|
|
- = 1
|
|
-#endif
|
|
-;
|
|
-
|
|
static int __init parse_direct_gbpages_off(char *arg)
|
|
{
|
|
direct_gbpages = 0;
|
|
@@ -154,14 +148,10 @@ early_param("gbpages", parse_direct_gbpa
|
|
* around without checking the pgd every time.
|
|
*/
|
|
|
|
-static unsigned long __meminitdata table_start;
|
|
-static unsigned long __meminitdata table_cur;
|
|
-static unsigned long __meminitdata table_top;
|
|
-
|
|
pteval_t __supported_pte_mask __read_mostly = ~0UL;
|
|
EXPORT_SYMBOL_GPL(__supported_pte_mask);
|
|
|
|
-static int do_not_nx __cpuinitdata;
|
|
+static int disable_nx __cpuinitdata;
|
|
|
|
/*
|
|
* noexec=on|off
|
|
@@ -176,9 +166,9 @@ static int __init nonx_setup(char *str)
|
|
return -EINVAL;
|
|
if (!strncmp(str, "on", 2)) {
|
|
__supported_pte_mask |= _PAGE_NX;
|
|
- do_not_nx = 0;
|
|
+ disable_nx = 0;
|
|
} else if (!strncmp(str, "off", 3)) {
|
|
- do_not_nx = 1;
|
|
+ disable_nx = 1;
|
|
__supported_pte_mask &= ~_PAGE_NX;
|
|
}
|
|
return 0;
|
|
@@ -190,7 +180,7 @@ void __cpuinit check_efer(void)
|
|
unsigned long efer;
|
|
|
|
rdmsrl(MSR_EFER, efer);
|
|
- if (!(efer & EFER_NX) || do_not_nx)
|
|
+ if (!(efer & EFER_NX) || disable_nx)
|
|
__supported_pte_mask &= ~_PAGE_NX;
|
|
}
|
|
|
|
@@ -224,9 +214,9 @@ static __ref void *spp_getpage(void)
|
|
|
|
if (after_bootmem)
|
|
ptr = (void *) get_zeroed_page(GFP_ATOMIC);
|
|
- else if (table_cur < table_top) {
|
|
- ptr = __va(table_cur << PAGE_SHIFT);
|
|
- table_cur++;
|
|
+ else if (e820_table_end < e820_table_top) {
|
|
+ ptr = __va(e820_table_end << PAGE_SHIFT);
|
|
+ e820_table_end++;
|
|
memset(ptr, 0, PAGE_SIZE);
|
|
} else
|
|
ptr = alloc_bootmem_pages(PAGE_SIZE);
|
|
@@ -241,36 +231,54 @@ static __ref void *spp_getpage(void)
|
|
return ptr;
|
|
}
|
|
|
|
-void
|
|
-set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte)
|
|
+static pud_t *fill_pud(pgd_t *pgd, unsigned long vaddr)
|
|
{
|
|
- pud_t *pud;
|
|
- pmd_t *pmd;
|
|
- pte_t *pte;
|
|
+ if (pgd_none(*pgd)) {
|
|
+ pud_t *pud = (pud_t *)spp_getpage();
|
|
+ make_page_readonly(pud, XENFEAT_writable_page_tables);
|
|
+ pgd_populate(&init_mm, pgd, pud);
|
|
+ if (pud != pud_offset(pgd, 0))
|
|
+ printk(KERN_ERR "PAGETABLE BUG #00! %p <-> %p\n",
|
|
+ pud, pud_offset(pgd, 0));
|
|
+ }
|
|
+ return pud_offset(pgd, vaddr);
|
|
+}
|
|
|
|
- pud = pud_page + pud_index(vaddr);
|
|
+static pmd_t *fill_pmd(pud_t *pud, unsigned long vaddr)
|
|
+{
|
|
if (pud_none(*pud)) {
|
|
- pmd = (pmd_t *) spp_getpage();
|
|
+ pmd_t *pmd = (pmd_t *) spp_getpage();
|
|
make_page_readonly(pmd, XENFEAT_writable_page_tables);
|
|
pud_populate(&init_mm, pud, pmd);
|
|
- if (pmd != pmd_offset(pud, 0)) {
|
|
+ if (pmd != pmd_offset(pud, 0))
|
|
printk(KERN_ERR "PAGETABLE BUG #01! %p <-> %p\n",
|
|
- pmd, pmd_offset(pud, 0));
|
|
- return;
|
|
- }
|
|
+ pmd, pmd_offset(pud, 0));
|
|
}
|
|
- pmd = pmd_offset(pud, vaddr);
|
|
+ return pmd_offset(pud, vaddr);
|
|
+}
|
|
+
|
|
+static pte_t *fill_pte(pmd_t *pmd, unsigned long vaddr)
|
|
+{
|
|
if (pmd_none(*pmd)) {
|
|
- pte = (pte_t *) spp_getpage();
|
|
+ pte_t *pte = (pte_t *) spp_getpage();
|
|
make_page_readonly(pte, XENFEAT_writable_page_tables);
|
|
pmd_populate_kernel(&init_mm, pmd, pte);
|
|
- if (pte != pte_offset_kernel(pmd, 0)) {
|
|
+ if (pte != pte_offset_kernel(pmd, 0))
|
|
printk(KERN_ERR "PAGETABLE BUG #02!\n");
|
|
- return;
|
|
- }
|
|
}
|
|
+ return pte_offset_kernel(pmd, vaddr);
|
|
+}
|
|
+
|
|
+void set_pte_vaddr_pud(pud_t *pud_page, unsigned long vaddr, pte_t new_pte)
|
|
+{
|
|
+ pud_t *pud;
|
|
+ pmd_t *pmd;
|
|
+ pte_t *pte;
|
|
+
|
|
+ pud = pud_page + pud_index(vaddr);
|
|
+ pmd = fill_pmd(pud, vaddr);
|
|
+ pte = fill_pte(pmd, vaddr);
|
|
|
|
- pte = pte_offset_kernel(pmd, vaddr);
|
|
set_pte(pte, new_pte);
|
|
|
|
/*
|
|
@@ -280,8 +288,7 @@ set_pte_vaddr_pud(pud_t *pud_page, unsig
|
|
__flush_tlb_one(vaddr);
|
|
}
|
|
|
|
-void
|
|
-set_pte_vaddr(unsigned long vaddr, pte_t pteval)
|
|
+void set_pte_vaddr(unsigned long vaddr, pte_t pteval)
|
|
{
|
|
pgd_t *pgd;
|
|
pud_t *pud_page;
|
|
@@ -298,6 +305,24 @@ set_pte_vaddr(unsigned long vaddr, pte_t
|
|
set_pte_vaddr_pud(pud_page, vaddr, pteval);
|
|
}
|
|
|
|
+pmd_t * __init populate_extra_pmd(unsigned long vaddr)
|
|
+{
|
|
+ pgd_t *pgd;
|
|
+ pud_t *pud;
|
|
+
|
|
+ pgd = pgd_offset_k(vaddr);
|
|
+ pud = fill_pud(pgd, vaddr);
|
|
+ return fill_pmd(pud, vaddr);
|
|
+}
|
|
+
|
|
+pte_t * __init populate_extra_pte(unsigned long vaddr)
|
|
+{
|
|
+ pmd_t *pmd;
|
|
+
|
|
+ pmd = populate_extra_pmd(vaddr);
|
|
+ return fill_pte(pmd, vaddr);
|
|
+}
|
|
+
|
|
#ifndef CONFIG_XEN
|
|
/*
|
|
* Create large page table mappings for a range of physical addresses.
|
|
@@ -380,9 +405,9 @@ static __ref void *alloc_low_page(unsign
|
|
return adr;
|
|
}
|
|
|
|
- BUG_ON(!table_cur);
|
|
- pfn = table_cur++;
|
|
- if (pfn >= table_top)
|
|
+ BUG_ON(!e820_table_end);
|
|
+ pfn = e820_table_end++;
|
|
+ if (pfn >= e820_table_top)
|
|
panic("alloc_low_page: ran out of memory");
|
|
|
|
adr = early_memremap(pfn * PAGE_SIZE, PAGE_SIZE);
|
|
@@ -407,13 +432,13 @@ static inline int __meminit make_readonl
|
|
/* Make new page tables read-only on the first pass. */
|
|
if (!xen_feature(XENFEAT_writable_page_tables)
|
|
&& !max_pfn_mapped
|
|
- && (paddr >= (table_start << PAGE_SHIFT))
|
|
- && (paddr < (table_top << PAGE_SHIFT)))
|
|
+ && (paddr >= (e820_table_start << PAGE_SHIFT))
|
|
+ && (paddr < (e820_table_top << PAGE_SHIFT)))
|
|
readonly = 1;
|
|
/* Make old page tables read-only. */
|
|
if (!xen_feature(XENFEAT_writable_page_tables)
|
|
&& (paddr >= (xen_start_info->pt_base - __START_KERNEL_map))
|
|
- && (paddr < (table_cur << PAGE_SHIFT)))
|
|
+ && (paddr < (e820_table_end << PAGE_SHIFT)))
|
|
readonly = 1;
|
|
|
|
/*
|
|
@@ -422,7 +447,7 @@ static inline int __meminit make_readonl
|
|
* mappings. Exclude the vsyscall area here, allowing alternative
|
|
* instruction patching to work.
|
|
*/
|
|
- if ((paddr >= __pa_symbol(&_text)) && (paddr < __pa_symbol(&_end))
|
|
+ if ((paddr >= __pa_symbol(&_text)) && (paddr < __pa(_brk_end))
|
|
&& !(paddr >= __pa_symbol(&__vsyscall_0)
|
|
&& paddr < __pa_symbol(&__vsyscall_0) + PAGE_SIZE))
|
|
readonly = 1;
|
|
@@ -747,43 +772,9 @@ void __init xen_init_pt(void)
|
|
}
|
|
}
|
|
|
|
-static void __init find_early_table_space(unsigned long end, int use_pse,
|
|
- int use_gbpages)
|
|
-{
|
|
- unsigned long puds, pmds, ptes, tables;
|
|
-
|
|
- puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
|
|
- tables = round_up(puds * sizeof(pud_t), PAGE_SIZE);
|
|
- pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
|
|
- tables += round_up(pmds * sizeof(pmd_t), PAGE_SIZE);
|
|
-
|
|
- ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
|
- tables += round_up(ptes * sizeof(pte_t), PAGE_SIZE);
|
|
-
|
|
- if (!table_top) {
|
|
- table_start = (__pa(xen_start_info->pt_base) >> PAGE_SHIFT) +
|
|
- xen_start_info->nr_pt_frames;
|
|
- table_cur = table_start;
|
|
- } else {
|
|
- /*
|
|
- * [table_start, table_top) gets passed to reserve_early(),
|
|
- * so we must not use table_cur here, despite continuing
|
|
- * to allocate from there. table_cur possibly being below
|
|
- * table_start is otoh not a problem.
|
|
- */
|
|
- table_start = table_top;
|
|
- }
|
|
- __flush_tlb_all();
|
|
-
|
|
- table_top = table_cur + (tables >> PAGE_SHIFT);
|
|
-
|
|
- printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n",
|
|
- end, table_cur << PAGE_SHIFT, table_top << PAGE_SHIFT);
|
|
-}
|
|
-
|
|
-static void __init xen_finish_init_mapping(void)
|
|
+void __init xen_finish_init_mapping(void)
|
|
{
|
|
- unsigned long i, start, end;
|
|
+ unsigned long start, end;
|
|
|
|
/* Re-vector virtual addresses pointing into the initial
|
|
mapping to the just-established permanent ones. */
|
|
@@ -801,49 +792,22 @@ static void __init xen_finish_init_mappi
|
|
__va(__pa(xen_start_info->mod_start));
|
|
|
|
/* Destroy the Xen-created mappings beyond the kernel image. */
|
|
- start = PAGE_ALIGN((unsigned long)_end);
|
|
- end = __START_KERNEL_map + (table_start << PAGE_SHIFT);
|
|
+ start = PAGE_ALIGN(_brk_end);
|
|
+ end = __START_KERNEL_map + (e820_table_start << PAGE_SHIFT);
|
|
for (; start < end; start += PAGE_SIZE)
|
|
if (HYPERVISOR_update_va_mapping(start, __pte_ma(0), 0))
|
|
BUG();
|
|
|
|
- /* Allocate pte's for initial fixmaps from 'table_cur' allocator. */
|
|
- start = table_top;
|
|
- WARN(table_cur != start, "start=%lx cur=%lx top=%lx\n",
|
|
- table_start, table_cur, start);
|
|
- table_top = ~0UL;
|
|
-
|
|
- /* Switch to the real shared_info page, and clear the dummy page. */
|
|
- set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
|
|
- HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
|
|
- memset(empty_zero_page, 0, sizeof(empty_zero_page));
|
|
-
|
|
- /* Set up mapping of lowest 1MB of physical memory. */
|
|
- for (i = 0; i < NR_FIX_ISAMAPS; i++)
|
|
- if (is_initial_xendomain())
|
|
- set_fixmap(FIX_ISAMAP_BEGIN - i, i * PAGE_SIZE);
|
|
- else
|
|
- __set_fixmap(FIX_ISAMAP_BEGIN - i,
|
|
- virt_to_mfn(empty_zero_page)
|
|
- << PAGE_SHIFT,
|
|
- PAGE_KERNEL_RO);
|
|
-
|
|
- table_top = max(table_cur, start);
|
|
+ WARN(e820_table_end != e820_table_top, "start=%lx cur=%lx top=%lx\n",
|
|
+ e820_table_start, e820_table_end, e820_table_top);
|
|
+ if (e820_table_end > e820_table_top)
|
|
+ e820_table_top = e820_table_end;
|
|
}
|
|
|
|
-static void __init init_gbpages(void)
|
|
-{
|
|
-#ifndef CONFIG_XEN
|
|
- if (direct_gbpages && cpu_has_gbpages)
|
|
- printk(KERN_INFO "Using GB pages for direct mapping\n");
|
|
- else
|
|
- direct_gbpages = 0;
|
|
-#endif
|
|
-}
|
|
-
|
|
-static unsigned long __meminit kernel_physical_mapping_init(unsigned long start,
|
|
- unsigned long end,
|
|
- unsigned long page_size_mask)
|
|
+unsigned long __init
|
|
+kernel_physical_mapping_init(unsigned long start,
|
|
+ unsigned long end,
|
|
+ unsigned long page_size_mask)
|
|
{
|
|
|
|
unsigned long next, last_map_addr = end;
|
|
@@ -887,207 +851,6 @@ static unsigned long __meminit kernel_ph
|
|
return last_map_addr;
|
|
}
|
|
|
|
-struct map_range {
|
|
- unsigned long start;
|
|
- unsigned long end;
|
|
- unsigned page_size_mask;
|
|
-};
|
|
-
|
|
-#define NR_RANGE_MR 5
|
|
-
|
|
-static int save_mr(struct map_range *mr, int nr_range,
|
|
- unsigned long start_pfn, unsigned long end_pfn,
|
|
- unsigned long page_size_mask)
|
|
-{
|
|
-
|
|
- if (start_pfn < end_pfn) {
|
|
- if (nr_range >= NR_RANGE_MR)
|
|
- panic("run out of range for init_memory_mapping\n");
|
|
- mr[nr_range].start = start_pfn<<PAGE_SHIFT;
|
|
- mr[nr_range].end = end_pfn<<PAGE_SHIFT;
|
|
- mr[nr_range].page_size_mask = page_size_mask;
|
|
- nr_range++;
|
|
- }
|
|
-
|
|
- return nr_range;
|
|
-}
|
|
-
|
|
-/*
|
|
- * Setup the direct mapping of the physical memory at PAGE_OFFSET.
|
|
- * This runs before bootmem is initialized and gets pages directly from
|
|
- * the physical memory. To access them they are temporarily mapped.
|
|
- */
|
|
-unsigned long __init_refok init_memory_mapping(unsigned long start,
|
|
- unsigned long end)
|
|
-{
|
|
- unsigned long last_map_addr = 0;
|
|
- unsigned long page_size_mask = 0;
|
|
- unsigned long start_pfn, end_pfn;
|
|
- unsigned long pos;
|
|
-
|
|
- struct map_range mr[NR_RANGE_MR];
|
|
- int nr_range, i;
|
|
- int use_pse, use_gbpages;
|
|
-
|
|
- printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end);
|
|
-
|
|
- /*
|
|
- * Find space for the kernel direct mapping tables.
|
|
- *
|
|
- * Later we should allocate these tables in the local node of the
|
|
- * memory mapped. Unfortunately this is done currently before the
|
|
- * nodes are discovered.
|
|
- */
|
|
- if (!after_bootmem)
|
|
- init_gbpages();
|
|
-
|
|
-#ifdef CONFIG_DEBUG_PAGEALLOC
|
|
- /*
|
|
- * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
|
|
- * This will simplify cpa(), which otherwise needs to support splitting
|
|
- * large pages into small in interrupt context, etc.
|
|
- */
|
|
- use_pse = use_gbpages = 0;
|
|
-#else
|
|
- use_pse = cpu_has_pse;
|
|
- use_gbpages = direct_gbpages;
|
|
-#endif
|
|
-
|
|
- if (use_gbpages)
|
|
- page_size_mask |= 1 << PG_LEVEL_1G;
|
|
- if (use_pse)
|
|
- page_size_mask |= 1 << PG_LEVEL_2M;
|
|
-
|
|
- memset(mr, 0, sizeof(mr));
|
|
- nr_range = 0;
|
|
-
|
|
- /* head if not big page alignment ?*/
|
|
- start_pfn = start >> PAGE_SHIFT;
|
|
- pos = start_pfn << PAGE_SHIFT;
|
|
- end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT)
|
|
- << (PMD_SHIFT - PAGE_SHIFT);
|
|
- if (end_pfn > (end >> PAGE_SHIFT))
|
|
- end_pfn = end >> PAGE_SHIFT;
|
|
- if (start_pfn < end_pfn) {
|
|
- nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
|
|
- pos = end_pfn << PAGE_SHIFT;
|
|
- }
|
|
-
|
|
- /* big page (2M) range*/
|
|
- start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
|
|
- << (PMD_SHIFT - PAGE_SHIFT);
|
|
- end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT)
|
|
- << (PUD_SHIFT - PAGE_SHIFT);
|
|
- if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)))
|
|
- end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT));
|
|
- if (start_pfn < end_pfn) {
|
|
- nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
|
|
- page_size_mask & (1<<PG_LEVEL_2M));
|
|
- pos = end_pfn << PAGE_SHIFT;
|
|
- }
|
|
-
|
|
- /* big page (1G) range */
|
|
- start_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT)
|
|
- << (PUD_SHIFT - PAGE_SHIFT);
|
|
- end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT);
|
|
- if (start_pfn < end_pfn) {
|
|
- nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
|
|
- page_size_mask &
|
|
- ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G)));
|
|
- pos = end_pfn << PAGE_SHIFT;
|
|
- }
|
|
-
|
|
- /* tail is not big page (1G) alignment */
|
|
- start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
|
|
- << (PMD_SHIFT - PAGE_SHIFT);
|
|
- end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
|
|
- if (start_pfn < end_pfn) {
|
|
- nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
|
|
- page_size_mask & (1<<PG_LEVEL_2M));
|
|
- pos = end_pfn << PAGE_SHIFT;
|
|
- }
|
|
-
|
|
- /* tail is not big page (2M) alignment */
|
|
- start_pfn = pos>>PAGE_SHIFT;
|
|
- end_pfn = end>>PAGE_SHIFT;
|
|
- nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
|
|
-
|
|
- /* try to merge same page size and continuous */
|
|
- for (i = 0; nr_range > 1 && i < nr_range - 1; i++) {
|
|
- unsigned long old_start;
|
|
- if (mr[i].end != mr[i+1].start ||
|
|
- mr[i].page_size_mask != mr[i+1].page_size_mask)
|
|
- continue;
|
|
- /* move it */
|
|
- old_start = mr[i].start;
|
|
- memmove(&mr[i], &mr[i+1],
|
|
- (nr_range - 1 - i) * sizeof (struct map_range));
|
|
- mr[i--].start = old_start;
|
|
- nr_range--;
|
|
- }
|
|
-
|
|
- for (i = 0; i < nr_range; i++)
|
|
- printk(KERN_DEBUG " %010lx - %010lx page %s\n",
|
|
- mr[i].start, mr[i].end,
|
|
- (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":(
|
|
- (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k"));
|
|
-
|
|
- if (!after_bootmem)
|
|
- find_early_table_space(end, use_pse, use_gbpages);
|
|
-
|
|
- if (!start) {
|
|
- unsigned long addr, va = __START_KERNEL_map;
|
|
- unsigned long *page = (unsigned long *)init_level4_pgt;
|
|
-
|
|
- /* Kill mapping of memory below _text. */
|
|
- while (va < (unsigned long)&_text) {
|
|
- if (HYPERVISOR_update_va_mapping(va, __pte_ma(0), 0))
|
|
- BUG();
|
|
- va += PAGE_SIZE;
|
|
- }
|
|
-
|
|
- /* Blow away any spurious initial mappings. */
|
|
- va = __START_KERNEL_map + (table_start << PAGE_SHIFT);
|
|
- addr = page[pgd_index(va)];
|
|
- addr_to_page(addr, page);
|
|
- addr = page[pud_index(va)];
|
|
- addr_to_page(addr, page);
|
|
- while (pmd_index(va) | pte_index(va)) {
|
|
- if (pmd_none(*(pmd_t *)&page[pmd_index(va)]))
|
|
- break;
|
|
- if (HYPERVISOR_update_va_mapping(va, __pte_ma(0), 0))
|
|
- BUG();
|
|
- va += PAGE_SIZE;
|
|
- }
|
|
- }
|
|
-
|
|
- for (i = 0; i < nr_range; i++)
|
|
- last_map_addr = kernel_physical_mapping_init(
|
|
- mr[i].start, mr[i].end,
|
|
- mr[i].page_size_mask);
|
|
-
|
|
- BUG_ON(table_cur > table_top);
|
|
- if (!start)
|
|
- xen_finish_init_mapping();
|
|
- else if (table_cur < table_top)
|
|
- /* Disable the 'table_cur' allocator. */
|
|
- table_top = table_cur;
|
|
-
|
|
- __flush_tlb_all();
|
|
-
|
|
- if (!after_bootmem && table_top > table_start)
|
|
- reserve_early(table_start << PAGE_SHIFT,
|
|
- table_top << PAGE_SHIFT, "PGTABLE");
|
|
-
|
|
- printk(KERN_INFO "last_map_addr: %lx end: %lx\n",
|
|
- last_map_addr, end);
|
|
-
|
|
- if (!after_bootmem)
|
|
- early_memtest(start, end);
|
|
-
|
|
- return last_map_addr >> PAGE_SHIFT;
|
|
-}
|
|
-
|
|
#ifndef CONFIG_NUMA
|
|
void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn)
|
|
{
|
|
@@ -1165,28 +928,6 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to
|
|
|
|
#endif /* CONFIG_MEMORY_HOTPLUG */
|
|
|
|
-/*
|
|
- * devmem_is_allowed() checks to see if /dev/mem access to a certain address
|
|
- * is valid. The argument is a physical page number.
|
|
- *
|
|
- *
|
|
- * On x86, access has to be given to the first megabyte of ram because that area
|
|
- * contains bios code and data regions used by X and dosemu and similar apps.
|
|
- * Access has to be given to non-kernel-ram areas as well, these contain the PCI
|
|
- * mmio resources as well as potential bios/acpi data regions.
|
|
- */
|
|
-int devmem_is_allowed(unsigned long pagenr)
|
|
-{
|
|
- if (pagenr <= 256)
|
|
- return 1;
|
|
- if (iomem_is_exclusive(pagenr << PAGE_SHIFT))
|
|
- return 0;
|
|
- if (mfn_to_local_pfn(pagenr) >= max_pfn)
|
|
- return 1;
|
|
- return 0;
|
|
-}
|
|
-
|
|
-
|
|
static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel,
|
|
kcore_modules, kcore_vsyscall;
|
|
|
|
@@ -1243,56 +984,39 @@ void __init mem_init(void)
|
|
initsize >> 10);
|
|
}
|
|
|
|
-void free_init_pages(char *what, unsigned long begin, unsigned long end)
|
|
+#ifdef CONFIG_DEBUG_RODATA
|
|
+const int rodata_test_data = 0xC3;
|
|
+EXPORT_SYMBOL_GPL(rodata_test_data);
|
|
+
|
|
+static int kernel_set_to_readonly;
|
|
+
|
|
+void set_kernel_text_rw(void)
|
|
{
|
|
- unsigned long addr = begin;
|
|
+ unsigned long start = PFN_ALIGN(_stext);
|
|
+ unsigned long end = PFN_ALIGN(__start_rodata);
|
|
|
|
- if (addr >= end)
|
|
+ if (!kernel_set_to_readonly)
|
|
return;
|
|
|
|
- /*
|
|
- * If debugging page accesses then do not free this memory but
|
|
- * mark them not present - any buggy init-section access will
|
|
- * create a kernel page fault:
|
|
- */
|
|
-#ifdef CONFIG_DEBUG_PAGEALLOC
|
|
- printk(KERN_INFO "debug: unmapping init memory %08lx..%08lx\n",
|
|
- begin, PAGE_ALIGN(end));
|
|
- set_memory_np(begin, (end - begin) >> PAGE_SHIFT);
|
|
-#else
|
|
- printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
|
|
+ pr_debug("Set kernel text: %lx - %lx for read write\n",
|
|
+ start, end);
|
|
|
|
- for (; addr < end; addr += PAGE_SIZE) {
|
|
- ClearPageReserved(virt_to_page(addr));
|
|
- init_page_count(virt_to_page(addr));
|
|
- memset((void *)(addr & ~(PAGE_SIZE-1)),
|
|
- POISON_FREE_INITMEM, PAGE_SIZE);
|
|
- if (addr >= __START_KERNEL_map) {
|
|
- /* make_readonly() reports all kernel addresses. */
|
|
- if (HYPERVISOR_update_va_mapping((unsigned long)__va(__pa(addr)),
|
|
- pfn_pte(__pa(addr) >> PAGE_SHIFT,
|
|
- PAGE_KERNEL),
|
|
- 0))
|
|
- BUG();
|
|
- if (HYPERVISOR_update_va_mapping(addr, __pte(0), 0))
|
|
- BUG();
|
|
- }
|
|
- free_page(addr);
|
|
- totalram_pages++;
|
|
- }
|
|
-#endif
|
|
+ set_memory_rw(start, (end - start) >> PAGE_SHIFT);
|
|
}
|
|
|
|
-void free_initmem(void)
|
|
+void set_kernel_text_ro(void)
|
|
{
|
|
- free_init_pages("unused kernel memory",
|
|
- (unsigned long)(&__init_begin),
|
|
- (unsigned long)(&__init_end));
|
|
-}
|
|
+ unsigned long start = PFN_ALIGN(_stext);
|
|
+ unsigned long end = PFN_ALIGN(__start_rodata);
|
|
|
|
-#ifdef CONFIG_DEBUG_RODATA
|
|
-const int rodata_test_data = 0xC3;
|
|
-EXPORT_SYMBOL_GPL(rodata_test_data);
|
|
+ if (!kernel_set_to_readonly)
|
|
+ return;
|
|
+
|
|
+ pr_debug("Set kernel text: %lx - %lx for read only\n",
|
|
+ start, end);
|
|
+
|
|
+ set_memory_ro(start, (end - start) >> PAGE_SHIFT);
|
|
+}
|
|
|
|
void mark_rodata_ro(void)
|
|
{
|
|
@@ -1300,15 +1024,12 @@ void mark_rodata_ro(void)
|
|
unsigned long rodata_start =
|
|
((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK;
|
|
|
|
-#ifdef CONFIG_DYNAMIC_FTRACE
|
|
- /* Dynamic tracing modifies the kernel text section */
|
|
- start = rodata_start;
|
|
-#endif
|
|
-
|
|
printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
|
|
(end - start) >> 10);
|
|
set_memory_ro(start, (end - start) >> PAGE_SHIFT);
|
|
|
|
+ kernel_set_to_readonly = 1;
|
|
+
|
|
/*
|
|
* The rodata section (but not the kernel text!) should also be
|
|
* not-executable.
|
|
@@ -1328,13 +1049,6 @@ void mark_rodata_ro(void)
|
|
|
|
#endif
|
|
|
|
-#ifdef CONFIG_BLK_DEV_INITRD
|
|
-void free_initrd_mem(unsigned long start, unsigned long end)
|
|
-{
|
|
- free_init_pages("initrd memory", start, end);
|
|
-}
|
|
-#endif
|
|
-
|
|
int __init reserve_bootmem_generic(unsigned long phys, unsigned long len,
|
|
int flags)
|
|
{
|
|
--- head-2010-05-25.orig/arch/x86/mm/iomap_32-xen.c 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/mm/iomap_32-xen.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -20,10 +20,11 @@
|
|
#include <asm/pat.h>
|
|
#include <linux/bitops.h>
|
|
#include <linux/module.h>
|
|
+#include <linux/highmem.h>
|
|
|
|
int is_io_mapping_possible(resource_size_t base, unsigned long size)
|
|
{
|
|
-#ifndef CONFIG_X86_PAE
|
|
+#if !defined(CONFIG_X86_PAE) && defined(CONFIG_PHYS_ADDR_T_64BIT)
|
|
/* There is no way to map greater than 1 << 32 address without PAE */
|
|
if (base + size > 0x100000000ULL)
|
|
return 0;
|
|
@@ -32,16 +33,28 @@ int is_io_mapping_possible(resource_size
|
|
}
|
|
EXPORT_SYMBOL_GPL(is_io_mapping_possible);
|
|
|
|
-/* Map 'mfn' using fixed map 'type' and protections 'prot'
|
|
- */
|
|
-void *
|
|
-iomap_atomic_prot_pfn(unsigned long mfn, enum km_type type, pgprot_t prot)
|
|
+void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
|
|
{
|
|
enum fixed_addresses idx;
|
|
unsigned long vaddr;
|
|
|
|
pagefault_disable();
|
|
|
|
+ debug_kmap_atomic(type);
|
|
+ idx = type + KM_TYPE_NR * smp_processor_id();
|
|
+ vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
|
|
+ set_pte_at(&init_mm, vaddr, kmap_pte - idx, pfn_pte(pfn, prot));
|
|
+ /*arch_flush_lazy_mmu_mode();*/
|
|
+
|
|
+ return (void *)vaddr;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Map 'mfn' using fixed map 'type' and protections 'prot'
|
|
+ */
|
|
+void *
|
|
+iomap_atomic_prot_pfn(unsigned long mfn, enum km_type type, pgprot_t prot)
|
|
+{
|
|
/*
|
|
* For non-PAT systems, promote PAGE_KERNEL_WC to PAGE_KERNEL_UC_MINUS.
|
|
* PAGE_KERNEL_WC maps to PWT, which translates to uncached if the
|
|
@@ -51,13 +64,8 @@ iomap_atomic_prot_pfn(unsigned long mfn,
|
|
if (!pat_enabled && pgprot_val(prot) == pgprot_val(PAGE_KERNEL_WC))
|
|
prot = PAGE_KERNEL_UC_MINUS;
|
|
|
|
- idx = type + KM_TYPE_NR*smp_processor_id();
|
|
- vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
|
|
pgprot_val(prot) |= _PAGE_IOMAP;
|
|
- set_pte_at(&init_mm, vaddr, kmap_pte-idx, pfn_pte_ma(mfn, prot));
|
|
- /*arch_flush_lazy_mmu_mode()*/;
|
|
-
|
|
- return (void*) vaddr;
|
|
+ return kmap_atomic_prot_pfn(mfn, type, prot);
|
|
}
|
|
EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn);
|
|
|
|
--- head-2010-05-25.orig/arch/x86/mm/ioremap-xen.c 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/mm/ioremap-xen.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -23,13 +23,17 @@
|
|
#include <asm/pgalloc.h>
|
|
#include <asm/pat.h>
|
|
|
|
-#ifdef CONFIG_X86_64
|
|
-
|
|
-static inline int phys_addr_valid(unsigned long addr)
|
|
+static inline int phys_addr_valid(resource_size_t addr)
|
|
{
|
|
- return addr < (1UL << boot_cpu_data.x86_phys_bits);
|
|
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
|
|
+ return !(addr >> boot_cpu_data.x86_phys_bits);
|
|
+#else
|
|
+ return 1;
|
|
+#endif
|
|
}
|
|
|
|
+#ifdef CONFIG_X86_64
|
|
+
|
|
#define phys_base 0
|
|
|
|
unsigned long __phys_addr(unsigned long x)
|
|
@@ -41,8 +45,7 @@ unsigned long __phys_addr(unsigned long
|
|
} else {
|
|
VIRTUAL_BUG_ON(x < PAGE_OFFSET);
|
|
x -= PAGE_OFFSET;
|
|
- VIRTUAL_BUG_ON(system_state == SYSTEM_BOOTING ? x > MAXMEM :
|
|
- !phys_addr_valid(x));
|
|
+ VIRTUAL_BUG_ON(!phys_addr_valid(x));
|
|
}
|
|
return x;
|
|
}
|
|
@@ -59,10 +62,8 @@ bool __virt_addr_valid(unsigned long x)
|
|
if (x < PAGE_OFFSET)
|
|
return false;
|
|
x -= PAGE_OFFSET;
|
|
- if (system_state == SYSTEM_BOOTING ?
|
|
- x > MAXMEM : !phys_addr_valid(x)) {
|
|
+ if (!phys_addr_valid(x))
|
|
return false;
|
|
- }
|
|
}
|
|
|
|
return pfn_valid(x >> PAGE_SHIFT);
|
|
@@ -73,18 +74,12 @@ EXPORT_SYMBOL(__virt_addr_valid);
|
|
|
|
#else
|
|
|
|
-static inline int phys_addr_valid(unsigned long addr)
|
|
-{
|
|
- return 1;
|
|
-}
|
|
-
|
|
#ifdef CONFIG_DEBUG_VIRTUAL
|
|
unsigned long __phys_addr(unsigned long x)
|
|
{
|
|
- /* VMALLOC_* aren't constants; not available at the boot time */
|
|
+ /* VMALLOC_* aren't constants */
|
|
VIRTUAL_BUG_ON(x < PAGE_OFFSET);
|
|
- VIRTUAL_BUG_ON(system_state != SYSTEM_BOOTING &&
|
|
- is_vmalloc_addr((void *) x));
|
|
+ VIRTUAL_BUG_ON(__vmalloc_start_set && is_vmalloc_addr((void *) x));
|
|
return x - PAGE_OFFSET;
|
|
}
|
|
EXPORT_SYMBOL(__phys_addr);
|
|
@@ -94,7 +89,9 @@ bool __virt_addr_valid(unsigned long x)
|
|
{
|
|
if (x < PAGE_OFFSET)
|
|
return false;
|
|
- if (system_state != SYSTEM_BOOTING && is_vmalloc_addr((void *) x))
|
|
+ if (__vmalloc_start_set && is_vmalloc_addr((void *) x))
|
|
+ return false;
|
|
+ if (x >= FIXADDR_START)
|
|
return false;
|
|
return pfn_valid((x - PAGE_OFFSET) >> PAGE_SHIFT);
|
|
}
|
|
@@ -462,16 +459,17 @@ static void __iomem *__ioremap_caller(re
|
|
return NULL;
|
|
area->phys_addr = phys_addr;
|
|
vaddr = (unsigned long) area->addr;
|
|
- if (__direct_remap_pfn_range(&init_mm, vaddr, PFN_DOWN(phys_addr),
|
|
- size, prot, domid)) {
|
|
+
|
|
+ if (kernel_map_sync_memtype(phys_addr, size, prot_val)) {
|
|
free_memtype(phys_addr, phys_addr + size);
|
|
free_vm_area(area);
|
|
return NULL;
|
|
}
|
|
|
|
- if (ioremap_change_attr(vaddr, size, prot_val) < 0) {
|
|
+ if (__direct_remap_pfn_range(&init_mm, vaddr, PFN_DOWN(phys_addr),
|
|
+ size, prot, domid)) {
|
|
free_memtype(phys_addr, phys_addr + size);
|
|
- vunmap(area->addr);
|
|
+ free_vm_area(area);
|
|
return NULL;
|
|
}
|
|
|
|
@@ -528,7 +526,7 @@ EXPORT_SYMBOL(ioremap_nocache);
|
|
*
|
|
* Must be freed with iounmap.
|
|
*/
|
|
-void __iomem *ioremap_wc(unsigned long phys_addr, unsigned long size)
|
|
+void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size)
|
|
{
|
|
if (pat_enabled)
|
|
return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WC,
|
|
@@ -558,7 +556,8 @@ static void __iomem *ioremap_default(res
|
|
* - UC_MINUS for non-WB-able memory with no other conflicting mappings
|
|
* - Inherit from confliting mappings otherwise
|
|
*/
|
|
- err = reserve_memtype(phys_addr, phys_addr + size, -1, &flags);
|
|
+ err = reserve_memtype(phys_addr, phys_addr + size,
|
|
+ _PAGE_CACHE_WB, &flags);
|
|
if (err < 0)
|
|
return NULL;
|
|
|
|
@@ -697,13 +696,19 @@ static inline pte_t * __init early_iorem
|
|
return &bm_pte[pte_index(addr)];
|
|
}
|
|
|
|
+static unsigned long slot_virt[FIX_BTMAPS_SLOTS] __initdata;
|
|
+
|
|
void __init early_ioremap_init(void)
|
|
{
|
|
pmd_t *pmd;
|
|
+ int i;
|
|
|
|
if (early_ioremap_debug)
|
|
printk(KERN_INFO "early_ioremap_init()\n");
|
|
|
|
+ for (i = 0; i < FIX_BTMAPS_SLOTS; i++)
|
|
+ slot_virt[i] = __fix_to_virt(FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*i);
|
|
+
|
|
pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
|
|
memset(bm_pte, 0, sizeof(bm_pte));
|
|
make_lowmem_page_readonly(bm_pte, XENFEAT_writable_page_tables);
|
|
@@ -734,7 +739,7 @@ void __init early_ioremap_reset(void)
|
|
}
|
|
|
|
static void __init __early_set_fixmap(enum fixed_addresses idx,
|
|
- unsigned long phys, pgprot_t flags)
|
|
+ phys_addr_t phys, pgprot_t flags)
|
|
{
|
|
unsigned long addr = __fix_to_virt(idx);
|
|
pte_t *pte;
|
|
@@ -753,7 +758,7 @@ static void __init __early_set_fixmap(en
|
|
}
|
|
|
|
static inline void __init early_set_fixmap(enum fixed_addresses idx,
|
|
- unsigned long phys, pgprot_t prot)
|
|
+ phys_addr_t phys, pgprot_t prot)
|
|
{
|
|
if (after_paging_init)
|
|
__set_fixmap(idx, phys, prot);
|
|
@@ -771,6 +776,7 @@ static inline void __init early_clear_fi
|
|
|
|
static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata;
|
|
static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata;
|
|
+
|
|
static int __init check_early_ioremap_leak(void)
|
|
{
|
|
int count = 0;
|
|
@@ -792,9 +798,11 @@ static int __init check_early_ioremap_le
|
|
}
|
|
late_initcall(check_early_ioremap_leak);
|
|
|
|
-static void __init __iomem *__early_ioremap(unsigned long phys_addr, unsigned long size, pgprot_t prot)
|
|
+static void __init __iomem *
|
|
+__early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot)
|
|
{
|
|
- unsigned long offset, last_addr;
|
|
+ unsigned long offset;
|
|
+ resource_size_t last_addr;
|
|
unsigned int nrpages;
|
|
enum fixed_addresses idx0, idx;
|
|
int i, slot;
|
|
@@ -810,15 +818,15 @@ static void __init __iomem *__early_iore
|
|
}
|
|
|
|
if (slot < 0) {
|
|
- printk(KERN_INFO "early_iomap(%08lx, %08lx) not found slot\n",
|
|
- phys_addr, size);
|
|
+ printk(KERN_INFO "early_iomap(%08llx, %08lx) not found slot\n",
|
|
+ (u64)phys_addr, size);
|
|
WARN_ON(1);
|
|
return NULL;
|
|
}
|
|
|
|
if (early_ioremap_debug) {
|
|
- printk(KERN_INFO "early_ioremap(%08lx, %08lx) [%d] => ",
|
|
- phys_addr, size, slot);
|
|
+ printk(KERN_INFO "early_ioremap(%08llx, %08lx) [%d] => ",
|
|
+ (u64)phys_addr, size, slot);
|
|
dump_stack();
|
|
}
|
|
|
|
@@ -858,20 +866,28 @@ static void __init __iomem *__early_iore
|
|
--nrpages;
|
|
}
|
|
if (early_ioremap_debug)
|
|
- printk(KERN_CONT "%08lx + %08lx\n", offset, fix_to_virt(idx0));
|
|
+ printk(KERN_CONT "%08lx + %08lx\n", offset, slot_virt[slot]);
|
|
|
|
- prev_map[slot] = (void __iomem *)(offset + fix_to_virt(idx0));
|
|
+ prev_map[slot] = (void __iomem *)(offset + slot_virt[slot]);
|
|
return prev_map[slot];
|
|
}
|
|
|
|
/* Remap an IO device */
|
|
-void __init __iomem *early_ioremap(unsigned long phys_addr, unsigned long size)
|
|
+void __init __iomem *
|
|
+early_ioremap(resource_size_t phys_addr, unsigned long size)
|
|
{
|
|
+ /*
|
|
+ * Don't remap the low PCI/ISA area, it's always mapped.
|
|
+ */
|
|
+ if (is_initial_xendomain() && is_ISA_range(phys_addr, phys_addr + size - 1))
|
|
+ return (__force void __iomem *)isa_bus_to_virt((unsigned long)phys_addr);
|
|
+
|
|
return __early_ioremap(phys_addr, size, PAGE_KERNEL_IO);
|
|
}
|
|
|
|
/* Remap memory */
|
|
-void __init __iomem *early_memremap(unsigned long phys_addr, unsigned long size)
|
|
+void __init __iomem *
|
|
+early_memremap(resource_size_t phys_addr, unsigned long size)
|
|
{
|
|
return __early_ioremap(phys_to_machine(phys_addr), size, PAGE_KERNEL);
|
|
}
|
|
@@ -884,6 +900,15 @@ void __init early_iounmap(void __iomem *
|
|
enum fixed_addresses idx;
|
|
int i, slot;
|
|
|
|
+ /*
|
|
+ * early_ioremap special-cases the PCI/ISA range by not instantiating a
|
|
+ * vm_area and by simply returning an address into the kernel mapping
|
|
+ * of ISA space. So handle that here.
|
|
+ */
|
|
+ if ((unsigned long)addr >= fix_to_virt(FIX_ISAMAP_BEGIN)
|
|
+ && (unsigned long)addr < fix_to_virt(FIX_ISAMAP_END - 1))
|
|
+ return;
|
|
+
|
|
slot = -1;
|
|
for (i = 0; i < FIX_BTMAPS_SLOTS; i++) {
|
|
if (prev_map[i] == addr) {
|
|
@@ -928,8 +953,3 @@ void __init early_iounmap(void __iomem *
|
|
}
|
|
prev_map[slot] = NULL;
|
|
}
|
|
-
|
|
-void __this_fixmap_does_not_exist(void)
|
|
-{
|
|
- WARN_ON(1);
|
|
-}
|
|
--- head-2010-05-25.orig/arch/x86/mm/pageattr-xen.c 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/mm/pageattr-xen.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -16,6 +16,7 @@
|
|
#include <asm/processor.h>
|
|
#include <asm/tlbflush.h>
|
|
#include <asm/sections.h>
|
|
+#include <asm/setup.h>
|
|
#include <asm/uaccess.h>
|
|
#include <asm/pgalloc.h>
|
|
#include <asm/proto.h>
|
|
@@ -33,6 +34,7 @@ struct cpa_data {
|
|
unsigned long pfn;
|
|
unsigned force_split : 1;
|
|
int curpage;
|
|
+ struct page **pages;
|
|
};
|
|
|
|
/*
|
|
@@ -45,6 +47,7 @@ static DEFINE_SPINLOCK(cpa_lock);
|
|
|
|
#define CPA_FLUSHTLB 1
|
|
#define CPA_ARRAY 2
|
|
+#define CPA_PAGES_ARRAY 4
|
|
|
|
#ifdef CONFIG_PROC_FS
|
|
static unsigned long direct_pages_count[PG_LEVEL_NUM];
|
|
@@ -95,7 +98,7 @@ static inline unsigned long highmap_star
|
|
|
|
static inline unsigned long highmap_end_pfn(void)
|
|
{
|
|
- return __pa(roundup((unsigned long)_end, PMD_SIZE)) >> PAGE_SHIFT;
|
|
+ return __pa(roundup(_brk_end, PMD_SIZE)) >> PAGE_SHIFT;
|
|
}
|
|
|
|
#endif
|
|
@@ -150,7 +153,7 @@ static void __cpa_flush_all(void *arg)
|
|
*/
|
|
__flush_tlb_all();
|
|
|
|
- if (cache && boot_cpu_data.x86_model >= 4)
|
|
+ if (cache && boot_cpu_data.x86 >= 4)
|
|
wbinvd();
|
|
}
|
|
|
|
@@ -201,38 +204,41 @@ static void cpa_flush_range(unsigned lon
|
|
}
|
|
}
|
|
|
|
-static void cpa_flush_array(unsigned long *start, int numpages, int cache)
|
|
+static void cpa_flush_array(unsigned long *start, int numpages, int cache,
|
|
+ int in_flags, struct page **pages)
|
|
{
|
|
unsigned int i, level;
|
|
- unsigned long *addr;
|
|
+ unsigned long do_wbinvd = cache && numpages >= 1024; /* 4M threshold */
|
|
|
|
BUG_ON(irqs_disabled());
|
|
|
|
- on_each_cpu(__cpa_flush_range, NULL, 1);
|
|
+ on_each_cpu(__cpa_flush_all, (void *) do_wbinvd, 1);
|
|
|
|
- if (!cache)
|
|
+ if (!cache || do_wbinvd)
|
|
return;
|
|
|
|
- /* 4M threshold */
|
|
- if (numpages >= 1024) {
|
|
- if (boot_cpu_data.x86_model >= 4)
|
|
- wbinvd();
|
|
- return;
|
|
- }
|
|
/*
|
|
* We only need to flush on one CPU,
|
|
* clflush is a MESI-coherent instruction that
|
|
* will cause all other CPUs to flush the same
|
|
* cachelines:
|
|
*/
|
|
- for (i = 0, addr = start; i < numpages; i++, addr++) {
|
|
- pte_t *pte = lookup_address(*addr, &level);
|
|
+ for (i = 0; i < numpages; i++) {
|
|
+ unsigned long addr;
|
|
+ pte_t *pte;
|
|
+
|
|
+ if (in_flags & CPA_PAGES_ARRAY)
|
|
+ addr = (unsigned long)page_address(pages[i]);
|
|
+ else
|
|
+ addr = start[i];
|
|
+
|
|
+ pte = lookup_address(addr, &level);
|
|
|
|
/*
|
|
* Only flush present addresses:
|
|
*/
|
|
if (pte && (__pte_val(*pte) & _PAGE_PRESENT))
|
|
- clflush_cache_range((void *) *addr, PAGE_SIZE);
|
|
+ clflush_cache_range((void *)addr, PAGE_SIZE);
|
|
}
|
|
}
|
|
|
|
@@ -498,6 +504,13 @@ static int split_large_page(pte_t *kpte,
|
|
pbase = (pte_t *)page_address(base);
|
|
paravirt_alloc_pte(&init_mm, page_to_pfn(base));
|
|
ref_prot = pte_pgprot(pte_clrhuge(*kpte));
|
|
+ /*
|
|
+ * If we ever want to utilize the PAT bit, we need to
|
|
+ * update this function to make sure it's converted from
|
|
+ * bit 12 to bit 7 when we cross from the 2MB level to
|
|
+ * the 4K level:
|
|
+ */
|
|
+ WARN_ON_ONCE(pgprot_val(ref_prot) & _PAGE_PAT_LARGE);
|
|
|
|
#ifdef CONFIG_X86_64
|
|
if (level == PG_LEVEL_1G) {
|
|
@@ -597,7 +610,9 @@ static int __change_page_attr(struct cpa
|
|
unsigned int level;
|
|
pte_t *kpte, old_pte;
|
|
|
|
- if (cpa->flags & CPA_ARRAY)
|
|
+ if (cpa->flags & CPA_PAGES_ARRAY)
|
|
+ address = (unsigned long)page_address(cpa->pages[cpa->curpage]);
|
|
+ else if (cpa->flags & CPA_ARRAY)
|
|
address = cpa->vaddr[cpa->curpage];
|
|
else
|
|
address = *cpa->vaddr;
|
|
@@ -701,7 +716,9 @@ static int cpa_process_alias(struct cpa_
|
|
* No need to redo, when the primary call touched the direct
|
|
* mapping already:
|
|
*/
|
|
- if (cpa->flags & CPA_ARRAY)
|
|
+ if (cpa->flags & CPA_PAGES_ARRAY)
|
|
+ vaddr = (unsigned long)page_address(cpa->pages[cpa->curpage]);
|
|
+ else if (cpa->flags & CPA_ARRAY)
|
|
vaddr = cpa->vaddr[cpa->curpage];
|
|
else
|
|
vaddr = *cpa->vaddr;
|
|
@@ -712,7 +729,7 @@ static int cpa_process_alias(struct cpa_
|
|
alias_cpa = *cpa;
|
|
temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
|
|
alias_cpa.vaddr = &temp_cpa_vaddr;
|
|
- alias_cpa.flags &= ~CPA_ARRAY;
|
|
+ alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
|
|
|
|
|
|
ret = __change_page_attr_set_clr(&alias_cpa, 0);
|
|
@@ -725,7 +742,7 @@ static int cpa_process_alias(struct cpa_
|
|
* No need to redo, when the primary call touched the high
|
|
* mapping already:
|
|
*/
|
|
- if (within(vaddr, (unsigned long) _text, (unsigned long) _end))
|
|
+ if (within(vaddr, (unsigned long) _text, _brk_end))
|
|
return 0;
|
|
|
|
/*
|
|
@@ -738,7 +755,7 @@ static int cpa_process_alias(struct cpa_
|
|
alias_cpa = *cpa;
|
|
temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map;
|
|
alias_cpa.vaddr = &temp_cpa_vaddr;
|
|
- alias_cpa.flags &= ~CPA_ARRAY;
|
|
+ alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
|
|
|
|
/*
|
|
* The high mapping range is imprecise, so ignore the return value.
|
|
@@ -759,7 +776,7 @@ static int __change_page_attr_set_clr(st
|
|
*/
|
|
cpa->numpages = numpages;
|
|
/* for array changes, we can't use large page */
|
|
- if (cpa->flags & CPA_ARRAY)
|
|
+ if (cpa->flags & (CPA_ARRAY | CPA_PAGES_ARRAY))
|
|
cpa->numpages = 1;
|
|
|
|
if (!debug_pagealloc)
|
|
@@ -783,7 +800,7 @@ static int __change_page_attr_set_clr(st
|
|
*/
|
|
BUG_ON(cpa->numpages > numpages);
|
|
numpages -= cpa->numpages;
|
|
- if (cpa->flags & CPA_ARRAY)
|
|
+ if (cpa->flags & (CPA_PAGES_ARRAY | CPA_ARRAY))
|
|
cpa->curpage++;
|
|
else
|
|
*cpa->vaddr += cpa->numpages * PAGE_SIZE;
|
|
@@ -800,7 +817,8 @@ static inline int cache_attr(pgprot_t at
|
|
|
|
static int change_page_attr_set_clr(unsigned long *addr, int numpages,
|
|
pgprot_t mask_set, pgprot_t mask_clr,
|
|
- int force_split, int array)
|
|
+ int force_split, int in_flag,
|
|
+ struct page **pages)
|
|
{
|
|
struct cpa_data cpa;
|
|
int ret, cache, checkalias;
|
|
@@ -815,15 +833,7 @@ static int change_page_attr_set_clr(unsi
|
|
return 0;
|
|
|
|
/* Ensure we are PAGE_SIZE aligned */
|
|
- if (!array) {
|
|
- if (*addr & ~PAGE_MASK) {
|
|
- *addr &= PAGE_MASK;
|
|
- /*
|
|
- * People should not be passing in unaligned addresses:
|
|
- */
|
|
- WARN_ON_ONCE(1);
|
|
- }
|
|
- } else {
|
|
+ if (in_flag & CPA_ARRAY) {
|
|
int i;
|
|
for (i = 0; i < numpages; i++) {
|
|
if (addr[i] & ~PAGE_MASK) {
|
|
@@ -831,6 +841,18 @@ static int change_page_attr_set_clr(unsi
|
|
WARN_ON_ONCE(1);
|
|
}
|
|
}
|
|
+ } else if (!(in_flag & CPA_PAGES_ARRAY)) {
|
|
+ /*
|
|
+ * in_flag of CPA_PAGES_ARRAY implies it is aligned.
|
|
+ * No need to cehck in that case
|
|
+ */
|
|
+ if (*addr & ~PAGE_MASK) {
|
|
+ *addr &= PAGE_MASK;
|
|
+ /*
|
|
+ * People should not be passing in unaligned addresses:
|
|
+ */
|
|
+ WARN_ON_ONCE(1);
|
|
+ }
|
|
}
|
|
|
|
/* Must avoid aliasing mappings in the highmem code */
|
|
@@ -848,6 +870,7 @@ static int change_page_attr_set_clr(unsi
|
|
xen_multicall_flush(true);
|
|
|
|
cpa.vaddr = addr;
|
|
+ cpa.pages = pages;
|
|
cpa.numpages = numpages;
|
|
cpa.mask_set = mask_set;
|
|
cpa.mask_clr = mask_clr;
|
|
@@ -855,8 +878,8 @@ static int change_page_attr_set_clr(unsi
|
|
cpa.curpage = 0;
|
|
cpa.force_split = force_split;
|
|
|
|
- if (array)
|
|
- cpa.flags |= CPA_ARRAY;
|
|
+ if (in_flag & (CPA_ARRAY | CPA_PAGES_ARRAY))
|
|
+ cpa.flags |= in_flag;
|
|
|
|
/* No alias checking for _NX bit modifications */
|
|
checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX;
|
|
@@ -882,9 +905,10 @@ static int change_page_attr_set_clr(unsi
|
|
* wbindv):
|
|
*/
|
|
if (!ret && cpu_has_clflush) {
|
|
- if (cpa.flags & CPA_ARRAY)
|
|
- cpa_flush_array(addr, numpages, cache);
|
|
- else
|
|
+ if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) {
|
|
+ cpa_flush_array(addr, numpages, cache,
|
|
+ cpa.flags, pages);
|
|
+ } else
|
|
cpa_flush_range(*addr, numpages, cache);
|
|
} else
|
|
cpa_flush_all(cache);
|
|
@@ -905,14 +929,28 @@ static inline int change_page_attr_set(u
|
|
pgprot_t mask, int array)
|
|
{
|
|
return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0,
|
|
- array);
|
|
+ (array ? CPA_ARRAY : 0), NULL);
|
|
}
|
|
|
|
static inline int change_page_attr_clear(unsigned long *addr, int numpages,
|
|
pgprot_t mask, int array)
|
|
{
|
|
return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0,
|
|
- array);
|
|
+ (array ? CPA_ARRAY : 0), NULL);
|
|
+}
|
|
+
|
|
+static inline int cpa_set_pages_array(struct page **pages, int numpages,
|
|
+ pgprot_t mask)
|
|
+{
|
|
+ return change_page_attr_set_clr(NULL, numpages, mask, __pgprot(0), 0,
|
|
+ CPA_PAGES_ARRAY, pages);
|
|
+}
|
|
+
|
|
+static inline int cpa_clear_pages_array(struct page **pages, int numpages,
|
|
+ pgprot_t mask)
|
|
+{
|
|
+ return change_page_attr_set_clr(NULL, numpages, __pgprot(0), mask, 0,
|
|
+ CPA_PAGES_ARRAY, pages);
|
|
}
|
|
|
|
#ifdef CONFIG_XEN
|
|
@@ -971,71 +1009,94 @@ int _set_memory_uc(unsigned long addr, i
|
|
|
|
int set_memory_uc(unsigned long addr, int numpages)
|
|
{
|
|
+ int ret;
|
|
+
|
|
/*
|
|
* for now UC MINUS. see comments in ioremap_nocache()
|
|
*/
|
|
- if (reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
|
|
- _PAGE_CACHE_UC_MINUS, NULL))
|
|
- return -EINVAL;
|
|
+ ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
|
|
+ _PAGE_CACHE_UC_MINUS, NULL);
|
|
+ if (ret)
|
|
+ goto out_err;
|
|
+
|
|
+ ret = _set_memory_uc(addr, numpages);
|
|
+ if (ret)
|
|
+ goto out_free;
|
|
|
|
- return _set_memory_uc(addr, numpages);
|
|
+ return 0;
|
|
+
|
|
+out_free:
|
|
+ free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
|
|
+out_err:
|
|
+ return ret;
|
|
}
|
|
EXPORT_SYMBOL(set_memory_uc);
|
|
|
|
int set_memory_array_uc(unsigned long *addr, int addrinarray)
|
|
{
|
|
- unsigned long start;
|
|
- unsigned long end;
|
|
- int i;
|
|
+ int i, j;
|
|
+ int ret;
|
|
+
|
|
/*
|
|
* for now UC MINUS. see comments in ioremap_nocache()
|
|
*/
|
|
for (i = 0; i < addrinarray; i++) {
|
|
- start = __pa(addr[i]);
|
|
- for (end = start + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) {
|
|
- if (end != __pa(addr[i + 1]))
|
|
- break;
|
|
- i++;
|
|
- }
|
|
- if (reserve_memtype(start, end, _PAGE_CACHE_UC_MINUS, NULL))
|
|
- goto out;
|
|
+ ret = reserve_memtype(__pa(addr[i]), __pa(addr[i]) + PAGE_SIZE,
|
|
+ _PAGE_CACHE_UC_MINUS, NULL);
|
|
+ if (ret)
|
|
+ goto out_free;
|
|
}
|
|
|
|
- return change_page_attr_set(addr, addrinarray,
|
|
+ ret = change_page_attr_set(addr, addrinarray,
|
|
__pgprot(_PAGE_CACHE_UC_MINUS), 1);
|
|
-out:
|
|
- for (i = 0; i < addrinarray; i++) {
|
|
- unsigned long tmp = __pa(addr[i]);
|
|
+ if (ret)
|
|
+ goto out_free;
|
|
|
|
- if (tmp == start)
|
|
- break;
|
|
- for (end = tmp + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) {
|
|
- if (end != __pa(addr[i + 1]))
|
|
- break;
|
|
- i++;
|
|
- }
|
|
- free_memtype(tmp, end);
|
|
- }
|
|
- return -EINVAL;
|
|
+ return 0;
|
|
+
|
|
+out_free:
|
|
+ for (j = 0; j < i; j++)
|
|
+ free_memtype(__pa(addr[j]), __pa(addr[j]) + PAGE_SIZE);
|
|
+
|
|
+ return ret;
|
|
}
|
|
EXPORT_SYMBOL(set_memory_array_uc);
|
|
|
|
int _set_memory_wc(unsigned long addr, int numpages)
|
|
{
|
|
- return change_page_attr_set(&addr, numpages,
|
|
+ int ret;
|
|
+ ret = change_page_attr_set(&addr, numpages,
|
|
+ __pgprot(_PAGE_CACHE_UC_MINUS), 0);
|
|
+
|
|
+ if (!ret) {
|
|
+ ret = change_page_attr_set(&addr, numpages,
|
|
__pgprot(_PAGE_CACHE_WC), 0);
|
|
+ }
|
|
+ return ret;
|
|
}
|
|
|
|
int set_memory_wc(unsigned long addr, int numpages)
|
|
{
|
|
+ int ret;
|
|
+
|
|
if (!pat_enabled)
|
|
return set_memory_uc(addr, numpages);
|
|
|
|
- if (reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
|
|
- _PAGE_CACHE_WC, NULL))
|
|
- return -EINVAL;
|
|
+ ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE,
|
|
+ _PAGE_CACHE_WC, NULL);
|
|
+ if (ret)
|
|
+ goto out_err;
|
|
|
|
- return _set_memory_wc(addr, numpages);
|
|
+ ret = _set_memory_wc(addr, numpages);
|
|
+ if (ret)
|
|
+ goto out_free;
|
|
+
|
|
+ return 0;
|
|
+
|
|
+out_free:
|
|
+ free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
|
|
+out_err:
|
|
+ return ret;
|
|
}
|
|
EXPORT_SYMBOL(set_memory_wc);
|
|
|
|
@@ -1047,29 +1108,31 @@ int _set_memory_wb(unsigned long addr, i
|
|
|
|
int set_memory_wb(unsigned long addr, int numpages)
|
|
{
|
|
- free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
|
|
+ int ret;
|
|
|
|
- return _set_memory_wb(addr, numpages);
|
|
+ ret = _set_memory_wb(addr, numpages);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+
|
|
+ free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE);
|
|
+ return 0;
|
|
}
|
|
EXPORT_SYMBOL(set_memory_wb);
|
|
|
|
int set_memory_array_wb(unsigned long *addr, int addrinarray)
|
|
{
|
|
int i;
|
|
+ int ret;
|
|
|
|
- for (i = 0; i < addrinarray; i++) {
|
|
- unsigned long start = __pa(addr[i]);
|
|
- unsigned long end;
|
|
-
|
|
- for (end = start + PAGE_SIZE; i < addrinarray - 1; end += PAGE_SIZE) {
|
|
- if (end != __pa(addr[i + 1]))
|
|
- break;
|
|
- i++;
|
|
- }
|
|
- free_memtype(start, end);
|
|
- }
|
|
- return change_page_attr_clear(addr, addrinarray,
|
|
+ ret = change_page_attr_clear(addr, addrinarray,
|
|
__pgprot(_PAGE_CACHE_MASK), 1);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+
|
|
+ for (i = 0; i < addrinarray; i++)
|
|
+ free_memtype(__pa(addr[i]), __pa(addr[i]) + PAGE_SIZE);
|
|
+
|
|
+ return 0;
|
|
}
|
|
EXPORT_SYMBOL(set_memory_array_wb);
|
|
|
|
@@ -1105,7 +1168,7 @@ int set_memory_np(unsigned long addr, in
|
|
int set_memory_4k(unsigned long addr, int numpages)
|
|
{
|
|
return change_page_attr_set_clr(&addr, numpages, __pgprot(0),
|
|
- __pgprot(0), 1, 0);
|
|
+ __pgprot(0), 1, 0, NULL);
|
|
}
|
|
|
|
int set_pages_uc(struct page *page, int numpages)
|
|
@@ -1116,6 +1179,35 @@ int set_pages_uc(struct page *page, int
|
|
}
|
|
EXPORT_SYMBOL(set_pages_uc);
|
|
|
|
+int set_pages_array_uc(struct page **pages, int addrinarray)
|
|
+{
|
|
+ unsigned long start;
|
|
+ unsigned long end;
|
|
+ int i;
|
|
+ int free_idx;
|
|
+
|
|
+ for (i = 0; i < addrinarray; i++) {
|
|
+ start = (unsigned long)page_address(pages[i]);
|
|
+ end = start + PAGE_SIZE;
|
|
+ if (reserve_memtype(start, end, _PAGE_CACHE_UC_MINUS, NULL))
|
|
+ goto err_out;
|
|
+ }
|
|
+
|
|
+ if (cpa_set_pages_array(pages, addrinarray,
|
|
+ __pgprot(_PAGE_CACHE_UC_MINUS)) == 0) {
|
|
+ return 0; /* Success */
|
|
+ }
|
|
+err_out:
|
|
+ free_idx = i;
|
|
+ for (i = 0; i < free_idx; i++) {
|
|
+ start = (unsigned long)page_address(pages[i]);
|
|
+ end = start + PAGE_SIZE;
|
|
+ free_memtype(start, end);
|
|
+ }
|
|
+ return -EINVAL;
|
|
+}
|
|
+EXPORT_SYMBOL(set_pages_array_uc);
|
|
+
|
|
int set_pages_wb(struct page *page, int numpages)
|
|
{
|
|
unsigned long addr = (unsigned long)page_address(page);
|
|
@@ -1124,6 +1216,28 @@ int set_pages_wb(struct page *page, int
|
|
}
|
|
EXPORT_SYMBOL(set_pages_wb);
|
|
|
|
+int set_pages_array_wb(struct page **pages, int addrinarray)
|
|
+{
|
|
+ int retval;
|
|
+ unsigned long start;
|
|
+ unsigned long end;
|
|
+ int i;
|
|
+
|
|
+ retval = cpa_clear_pages_array(pages, addrinarray,
|
|
+ __pgprot(_PAGE_CACHE_MASK));
|
|
+ if (retval)
|
|
+ return retval;
|
|
+
|
|
+ for (i = 0; i < addrinarray; i++) {
|
|
+ start = (unsigned long)page_address(pages[i]);
|
|
+ end = start + PAGE_SIZE;
|
|
+ free_memtype(start, end);
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+EXPORT_SYMBOL(set_pages_array_wb);
|
|
+
|
|
int set_pages_x(struct page *page, int numpages)
|
|
{
|
|
unsigned long addr = (unsigned long)page_address(page);
|
|
--- head-2010-05-25.orig/arch/x86/mm/pat-xen.c 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/mm/pat-xen.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -31,7 +31,7 @@
|
|
#ifdef CONFIG_X86_PAT
|
|
int __read_mostly pat_enabled = 1;
|
|
|
|
-void __cpuinit pat_disable(char *reason)
|
|
+static inline void pat_disable(const char *reason)
|
|
{
|
|
pat_enabled = 0;
|
|
printk(KERN_INFO "%s\n", reason);
|
|
@@ -43,6 +43,11 @@ static int __init nopat(char *str)
|
|
return 0;
|
|
}
|
|
early_param("nopat", nopat);
|
|
+#else
|
|
+static inline void pat_disable(const char *reason)
|
|
+{
|
|
+ (void)reason;
|
|
+}
|
|
#endif
|
|
|
|
|
|
@@ -79,16 +84,20 @@ void pat_init(void)
|
|
if (!pat_enabled)
|
|
return;
|
|
|
|
- /* Paranoia check. */
|
|
- if (!cpu_has_pat && boot_pat_state) {
|
|
- /*
|
|
- * If this happens we are on a secondary CPU, but
|
|
- * switched to PAT on the boot CPU. We have no way to
|
|
- * undo PAT.
|
|
- */
|
|
- printk(KERN_ERR "PAT enabled, "
|
|
- "but not supported by secondary CPU\n");
|
|
- BUG();
|
|
+ if (!cpu_has_pat) {
|
|
+ if (!boot_pat_state) {
|
|
+ pat_disable("PAT not supported by CPU.");
|
|
+ return;
|
|
+ } else {
|
|
+ /*
|
|
+ * If this happens we are on a secondary CPU, but
|
|
+ * switched to PAT on the boot CPU. We have no way to
|
|
+ * undo PAT.
|
|
+ */
|
|
+ printk(KERN_ERR "PAT enabled, "
|
|
+ "but not supported by secondary CPU\n");
|
|
+ BUG();
|
|
+ }
|
|
}
|
|
|
|
#ifndef CONFIG_XEN
|
|
@@ -195,10 +204,10 @@ static unsigned long pat_x_mtrr_type(u64
|
|
u8 mtrr_type;
|
|
|
|
mtrr_type = mtrr_type_lookup(start, end);
|
|
- if (mtrr_type == MTRR_TYPE_UNCACHABLE)
|
|
- return _PAGE_CACHE_UC;
|
|
- if (mtrr_type == MTRR_TYPE_WRCOMB)
|
|
- return _PAGE_CACHE_WC;
|
|
+ if (mtrr_type != MTRR_TYPE_WRBACK)
|
|
+ return _PAGE_CACHE_UC_MINUS;
|
|
+
|
|
+ return _PAGE_CACHE_WB;
|
|
}
|
|
|
|
return req_type;
|
|
@@ -371,23 +380,13 @@ int reserve_memtype(u64 start, u64 end,
|
|
return 0;
|
|
}
|
|
|
|
- if (req_type == -1) {
|
|
- /*
|
|
- * Call mtrr_lookup to get the type hint. This is an
|
|
- * optimization for /dev/mem mmap'ers into WB memory (BIOS
|
|
- * tools and ACPI tools). Use WB request for WB memory and use
|
|
- * UC_MINUS otherwise.
|
|
- */
|
|
- u8 mtrr_type = mtrr_type_lookup(start, end);
|
|
-
|
|
- if (mtrr_type == MTRR_TYPE_WRBACK)
|
|
- actual_type = _PAGE_CACHE_WB;
|
|
- else
|
|
- actual_type = _PAGE_CACHE_UC_MINUS;
|
|
- } else {
|
|
- actual_type = pat_x_mtrr_type(start, end,
|
|
- req_type & _PAGE_CACHE_MASK);
|
|
- }
|
|
+ /*
|
|
+ * Call mtrr_lookup to get the type hint. This is an
|
|
+ * optimization for /dev/mem mmap'ers into WB memory (BIOS
|
|
+ * tools and ACPI tools). Use WB request for WB memory and use
|
|
+ * UC_MINUS otherwise.
|
|
+ */
|
|
+ actual_type = pat_x_mtrr_type(start, end, req_type & _PAGE_CACHE_MASK);
|
|
|
|
if (new_type)
|
|
*new_type = actual_type;
|
|
@@ -565,9 +564,7 @@ static inline int range_is_allowed(unsig
|
|
int phys_mem_access_prot_allowed(struct file *file, unsigned long mfn,
|
|
unsigned long size, pgprot_t *vma_prot)
|
|
{
|
|
- u64 addr = (u64)mfn << PAGE_SHIFT;
|
|
- unsigned long flags = -1;
|
|
- int retval;
|
|
+ unsigned long flags = _PAGE_CACHE_WB;
|
|
|
|
if (!range_is_allowed(mfn, size))
|
|
return 0;
|
|
@@ -597,60 +594,21 @@ int phys_mem_access_prot_allowed(struct
|
|
#endif
|
|
#endif
|
|
|
|
- /*
|
|
- * With O_SYNC, we can only take UC_MINUS mapping. Fail if we cannot.
|
|
- *
|
|
- * Without O_SYNC, we want to get
|
|
- * - WB for WB-able memory and no other conflicting mappings
|
|
- * - UC_MINUS for non-WB-able memory with no other conflicting mappings
|
|
- * - Inherit from confliting mappings otherwise
|
|
- */
|
|
- if (flags != -1) {
|
|
- retval = reserve_memtype(addr, addr + size, flags, NULL);
|
|
- } else {
|
|
- retval = reserve_memtype(addr, addr + size, -1, &flags);
|
|
- }
|
|
-
|
|
- if (retval < 0)
|
|
- return 0;
|
|
-
|
|
- if (ioremap_check_change_attr(mfn, size, flags) < 0) {
|
|
- free_memtype(addr, addr + size);
|
|
- printk(KERN_INFO
|
|
- "%s:%d /dev/mem ioremap_change_attr failed %s for %Lx-%Lx\n",
|
|
- current->comm, current->pid,
|
|
- cattr_name(flags),
|
|
- addr, addr + size);
|
|
- return 0;
|
|
- }
|
|
-
|
|
*vma_prot = __pgprot((pgprot_val(*vma_prot) & ~_PAGE_CACHE_MASK) |
|
|
flags);
|
|
return 1;
|
|
}
|
|
|
|
-void map_devmem(unsigned long mfn, unsigned long size, pgprot_t vma_prot)
|
|
-{
|
|
- unsigned long want_flags = (pgprot_val(vma_prot) & _PAGE_CACHE_MASK);
|
|
- u64 addr = (u64)mfn << PAGE_SHIFT;
|
|
- unsigned long flags;
|
|
-
|
|
- reserve_memtype(addr, addr + size, want_flags, &flags);
|
|
- if (flags != want_flags) {
|
|
- printk(KERN_INFO
|
|
- "%s:%d /dev/mem expected mapping type %s for %Lx-%Lx, got %s\n",
|
|
- current->comm, current->pid,
|
|
- cattr_name(want_flags),
|
|
- addr, (unsigned long long)(addr + size),
|
|
- cattr_name(flags));
|
|
- }
|
|
-}
|
|
-
|
|
-void unmap_devmem(unsigned long mfn, unsigned long size, pgprot_t vma_prot)
|
|
+/*
|
|
+ * Change the memory type for the physial address range in kernel identity
|
|
+ * mapping space if that range is a part of identity map.
|
|
+ */
|
|
+int kernel_map_sync_memtype(u64 ma, unsigned long size, unsigned long flags)
|
|
{
|
|
- u64 addr = (u64)mfn << PAGE_SHIFT;
|
|
+ if (!pat_enabled)
|
|
+ return 0;
|
|
|
|
- free_memtype(addr, addr + size);
|
|
+ return ioremap_check_change_attr(ma >> PAGE_SHIFT, size, flags);
|
|
}
|
|
|
|
#ifndef CONFIG_XEN
|
|
@@ -663,17 +621,18 @@ static int reserve_pfn_range(u64 paddr,
|
|
int strict_prot)
|
|
{
|
|
int is_ram = 0;
|
|
- int id_sz, ret;
|
|
- unsigned long flags;
|
|
+ int ret;
|
|
unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK);
|
|
+ unsigned long flags = want_flags;
|
|
|
|
is_ram = pat_pagerange_is_ram(paddr, paddr + size);
|
|
|
|
/*
|
|
- * reserve_pfn_range() doesn't support RAM pages.
|
|
+ * reserve_pfn_range() doesn't support RAM pages. Maintain the current
|
|
+ * behavior with RAM pages by returning success.
|
|
*/
|
|
if (is_ram != 0)
|
|
- return -EINVAL;
|
|
+ return 0;
|
|
|
|
ret = reserve_memtype(paddr, paddr + size, want_flags, &flags);
|
|
if (ret)
|
|
@@ -700,23 +659,8 @@ static int reserve_pfn_range(u64 paddr,
|
|
flags);
|
|
}
|
|
|
|
- /* Need to keep identity mapping in sync */
|
|
- if (paddr >= __pa(high_memory))
|
|
- return 0;
|
|
-
|
|
- id_sz = (__pa(high_memory) < paddr + size) ?
|
|
- __pa(high_memory) - paddr :
|
|
- size;
|
|
-
|
|
- if (ioremap_change_attr((unsigned long)__va(paddr), id_sz, flags) < 0) {
|
|
+ if (kernel_map_sync_memtype(paddr, size, flags) < 0) {
|
|
free_memtype(paddr, paddr + size);
|
|
- printk(KERN_ERR
|
|
- "%s:%d reserve_pfn_range ioremap_change_attr failed %s "
|
|
- "for %Lx-%Lx\n",
|
|
- current->comm, current->pid,
|
|
- cattr_name(flags),
|
|
- (unsigned long long)paddr,
|
|
- (unsigned long long)(paddr + size));
|
|
return -EINVAL;
|
|
}
|
|
return 0;
|
|
@@ -741,29 +685,28 @@ static void free_pfn_range(u64 paddr, un
|
|
*
|
|
* If the vma has a linear pfn mapping for the entire range, we get the prot
|
|
* from pte and reserve the entire vma range with single reserve_pfn_range call.
|
|
- * Otherwise, we reserve the entire vma range, my ging through the PTEs page
|
|
- * by page to get physical address and protection.
|
|
*/
|
|
int track_pfn_vma_copy(struct vm_area_struct *vma)
|
|
{
|
|
- int retval = 0;
|
|
- unsigned long i, j;
|
|
resource_size_t paddr;
|
|
unsigned long prot;
|
|
- unsigned long vma_start = vma->vm_start;
|
|
- unsigned long vma_end = vma->vm_end;
|
|
- unsigned long vma_size = vma_end - vma_start;
|
|
+ unsigned long vma_size = vma->vm_end - vma->vm_start;
|
|
pgprot_t pgprot;
|
|
|
|
if (!pat_enabled)
|
|
return 0;
|
|
|
|
+ /*
|
|
+ * For now, only handle remap_pfn_range() vmas where
|
|
+ * is_linear_pfn_mapping() == TRUE. Handling of
|
|
+ * vm_insert_pfn() is TBD.
|
|
+ */
|
|
if (is_linear_pfn_mapping(vma)) {
|
|
/*
|
|
* reserve the whole chunk covered by vma. We need the
|
|
* starting address and protection from pte.
|
|
*/
|
|
- if (follow_phys(vma, vma_start, 0, &prot, &paddr)) {
|
|
+ if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) {
|
|
WARN_ON_ONCE(1);
|
|
return -EINVAL;
|
|
}
|
|
@@ -771,28 +714,7 @@ int track_pfn_vma_copy(struct vm_area_st
|
|
return reserve_pfn_range(paddr, vma_size, &pgprot, 1);
|
|
}
|
|
|
|
- /* reserve entire vma page by page, using pfn and prot from pte */
|
|
- for (i = 0; i < vma_size; i += PAGE_SIZE) {
|
|
- if (follow_phys(vma, vma_start + i, 0, &prot, &paddr))
|
|
- continue;
|
|
-
|
|
- pgprot = __pgprot(prot);
|
|
- retval = reserve_pfn_range(paddr, PAGE_SIZE, &pgprot, 1);
|
|
- if (retval)
|
|
- goto cleanup_ret;
|
|
- }
|
|
return 0;
|
|
-
|
|
-cleanup_ret:
|
|
- /* Reserve error: Cleanup partial reservation and return error */
|
|
- for (j = 0; j < i; j += PAGE_SIZE) {
|
|
- if (follow_phys(vma, vma_start + j, 0, &prot, &paddr))
|
|
- continue;
|
|
-
|
|
- free_pfn_range(paddr, PAGE_SIZE);
|
|
- }
|
|
-
|
|
- return retval;
|
|
}
|
|
|
|
/*
|
|
@@ -802,50 +724,28 @@ cleanup_ret:
|
|
* prot is passed in as a parameter for the new mapping. If the vma has a
|
|
* linear pfn mapping for the entire range reserve the entire vma range with
|
|
* single reserve_pfn_range call.
|
|
- * Otherwise, we look t the pfn and size and reserve only the specified range
|
|
- * page by page.
|
|
- *
|
|
- * Note that this function can be called with caller trying to map only a
|
|
- * subrange/page inside the vma.
|
|
*/
|
|
int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot,
|
|
unsigned long pfn, unsigned long size)
|
|
{
|
|
- int retval = 0;
|
|
- unsigned long i, j;
|
|
- resource_size_t base_paddr;
|
|
resource_size_t paddr;
|
|
- unsigned long vma_start = vma->vm_start;
|
|
- unsigned long vma_end = vma->vm_end;
|
|
- unsigned long vma_size = vma_end - vma_start;
|
|
+ unsigned long vma_size = vma->vm_end - vma->vm_start;
|
|
|
|
if (!pat_enabled)
|
|
return 0;
|
|
|
|
+ /*
|
|
+ * For now, only handle remap_pfn_range() vmas where
|
|
+ * is_linear_pfn_mapping() == TRUE. Handling of
|
|
+ * vm_insert_pfn() is TBD.
|
|
+ */
|
|
if (is_linear_pfn_mapping(vma)) {
|
|
/* reserve the whole chunk starting from vm_pgoff */
|
|
paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
|
|
return reserve_pfn_range(paddr, vma_size, prot, 0);
|
|
}
|
|
|
|
- /* reserve page by page using pfn and size */
|
|
- base_paddr = (resource_size_t)pfn << PAGE_SHIFT;
|
|
- for (i = 0; i < size; i += PAGE_SIZE) {
|
|
- paddr = base_paddr + i;
|
|
- retval = reserve_pfn_range(paddr, PAGE_SIZE, prot, 0);
|
|
- if (retval)
|
|
- goto cleanup_ret;
|
|
- }
|
|
return 0;
|
|
-
|
|
-cleanup_ret:
|
|
- /* Reserve error: Cleanup partial reservation and return error */
|
|
- for (j = 0; j < i; j += PAGE_SIZE) {
|
|
- paddr = base_paddr + j;
|
|
- free_pfn_range(paddr, PAGE_SIZE);
|
|
- }
|
|
-
|
|
- return retval;
|
|
}
|
|
|
|
/*
|
|
@@ -856,39 +756,23 @@ cleanup_ret:
|
|
void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn,
|
|
unsigned long size)
|
|
{
|
|
- unsigned long i;
|
|
resource_size_t paddr;
|
|
- unsigned long prot;
|
|
- unsigned long vma_start = vma->vm_start;
|
|
- unsigned long vma_end = vma->vm_end;
|
|
- unsigned long vma_size = vma_end - vma_start;
|
|
+ unsigned long vma_size = vma->vm_end - vma->vm_start;
|
|
|
|
if (!pat_enabled)
|
|
return;
|
|
|
|
+ /*
|
|
+ * For now, only handle remap_pfn_range() vmas where
|
|
+ * is_linear_pfn_mapping() == TRUE. Handling of
|
|
+ * vm_insert_pfn() is TBD.
|
|
+ */
|
|
if (is_linear_pfn_mapping(vma)) {
|
|
/* free the whole chunk starting from vm_pgoff */
|
|
paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
|
|
free_pfn_range(paddr, vma_size);
|
|
return;
|
|
}
|
|
-
|
|
- if (size != 0 && size != vma_size) {
|
|
- /* free page by page, using pfn and size */
|
|
- paddr = (resource_size_t)pfn << PAGE_SHIFT;
|
|
- for (i = 0; i < size; i += PAGE_SIZE) {
|
|
- paddr = paddr + i;
|
|
- free_pfn_range(paddr, PAGE_SIZE);
|
|
- }
|
|
- } else {
|
|
- /* free entire vma, page by page, using the pfn from pte */
|
|
- for (i = 0; i < vma_size; i += PAGE_SIZE) {
|
|
- if (follow_phys(vma, vma_start + i, 0, &prot, &paddr))
|
|
- continue;
|
|
-
|
|
- free_pfn_range(paddr, PAGE_SIZE);
|
|
- }
|
|
- }
|
|
}
|
|
#endif /* CONFIG_XEN */
|
|
|
|
--- head-2010-05-25.orig/arch/x86/mm/pgtable-xen.c 2010-03-24 15:14:47.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/mm/pgtable-xen.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -122,10 +122,6 @@ void __pud_free_tlb(struct mmu_gather *t
|
|
#endif /* PAGETABLE_LEVELS > 3 */
|
|
#endif /* PAGETABLE_LEVELS > 2 */
|
|
|
|
-#ifndef CONFIG_X86_64
|
|
-#define TASK_SIZE64 TASK_SIZE
|
|
-#endif
|
|
-
|
|
static void _pin_lock(struct mm_struct *mm, int lock) {
|
|
if (lock)
|
|
spin_lock(&mm->page_table_lock);
|
|
@@ -149,7 +145,7 @@ static void _pin_lock(struct mm_struct *
|
|
pgd_t *pgd = mm->pgd;
|
|
unsigned g;
|
|
|
|
- for (g = 0; g <= ((TASK_SIZE64-1) / PGDIR_SIZE); g++, pgd++) {
|
|
+ for (g = 0; g <= ((TASK_SIZE_MAX-1) / PGDIR_SIZE); g++, pgd++) {
|
|
pud_t *pud;
|
|
unsigned u;
|
|
|
|
@@ -230,10 +226,10 @@ static void pgd_walk(pgd_t *pgd_base, pg
|
|
* Cannot iterate up to USER_PTRS_PER_PGD on x86-64 as these pagetables
|
|
* may not be the 'current' task's pagetables (e.g., current may be
|
|
* 32-bit, but the pagetables may be for a 64-bit task).
|
|
- * Subtracting 1 from TASK_SIZE64 means the loop limit is correct
|
|
- * regardless of whether TASK_SIZE64 is a multiple of PGDIR_SIZE.
|
|
+ * Subtracting 1 from TASK_SIZE_MAX means the loop limit is correct
|
|
+ * regardless of whether TASK_SIZE_MAX is a multiple of PGDIR_SIZE.
|
|
*/
|
|
- for (g = 0, seq = 0; g <= ((TASK_SIZE64-1) / PGDIR_SIZE); g++, pgd++) {
|
|
+ for (g = 0, seq = 0; g <= ((TASK_SIZE_MAX-1) / PGDIR_SIZE); g++, pgd++) {
|
|
if (pgd_none(*pgd))
|
|
continue;
|
|
pud = pud_offset(pgd, 0);
|
|
@@ -736,9 +732,26 @@ int ptep_clear_flush_young(struct vm_are
|
|
return young;
|
|
}
|
|
|
|
+/**
|
|
+ * reserve_top_address - reserves a hole in the top of kernel address space
|
|
+ * @reserve - size of hole to reserve
|
|
+ *
|
|
+ * Can be used to relocate the fixmap area and poke a hole in the top
|
|
+ * of kernel address space to make room for a hypervisor.
|
|
+ */
|
|
+void __init reserve_top_address(unsigned long reserve)
|
|
+{
|
|
+#ifdef CONFIG_X86_32
|
|
+ BUG_ON(fixmaps_set > 0);
|
|
+ printk(KERN_INFO "Reserving virtual address space above 0x%08x\n",
|
|
+ (int)-reserve);
|
|
+ __FIXADDR_TOP = -reserve - PAGE_SIZE;
|
|
+#endif
|
|
+}
|
|
+
|
|
int fixmaps_set;
|
|
|
|
-void xen_set_fixmap(enum fixed_addresses idx, maddr_t phys, pgprot_t flags)
|
|
+void xen_set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags)
|
|
{
|
|
unsigned long address = __fix_to_virt(idx);
|
|
pte_t pte;
|
|
@@ -757,6 +770,8 @@ void xen_set_fixmap(enum fixed_addresses
|
|
set_pte_vaddr_pud(level3_user_pgt, address, pte);
|
|
break;
|
|
case FIX_EARLYCON_MEM_BASE:
|
|
+ case FIX_SHARED_INFO:
|
|
+ case FIX_ISAMAP_END ... FIX_ISAMAP_BEGIN:
|
|
xen_l1_entry_update(level1_fixmap_pgt + pte_index(address),
|
|
pfn_pte_ma(phys >> PAGE_SHIFT, flags));
|
|
fixmaps_set++;
|
|
--- head-2010-05-25.orig/arch/x86/mm/pgtable_32-xen.c 2010-03-24 15:14:47.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/mm/pgtable_32-xen.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -25,6 +25,8 @@
|
|
#include <xen/features.h>
|
|
#include <asm/hypervisor.h>
|
|
|
|
+unsigned int __VMALLOC_RESERVE = 128 << 20;
|
|
+
|
|
/*
|
|
* Associate a virtual page frame with a given physical page frame
|
|
* and protection flags for that frame.
|
|
@@ -54,7 +56,7 @@ void set_pte_vaddr(unsigned long vaddr,
|
|
}
|
|
pte = pte_offset_kernel(pmd, vaddr);
|
|
if (pte_val(pteval))
|
|
- set_pte_present(&init_mm, vaddr, pte, pteval);
|
|
+ set_pte_at(&init_mm, vaddr, pte, pteval);
|
|
else
|
|
pte_clear(&init_mm, vaddr, pte);
|
|
|
|
@@ -109,21 +111,6 @@ unsigned long hypervisor_virt_start = HY
|
|
unsigned long __FIXADDR_TOP = (HYPERVISOR_VIRT_START - PAGE_SIZE);
|
|
EXPORT_SYMBOL(__FIXADDR_TOP);
|
|
|
|
-/**
|
|
- * reserve_top_address - reserves a hole in the top of kernel address space
|
|
- * @reserve - size of hole to reserve
|
|
- *
|
|
- * Can be used to relocate the fixmap area and poke a hole in the top
|
|
- * of kernel address space to make room for a hypervisor.
|
|
- */
|
|
-void __init reserve_top_address(unsigned long reserve)
|
|
-{
|
|
- BUG_ON(fixmaps_set > 0);
|
|
- printk(KERN_INFO "Reserving virtual address space above 0x%08x\n",
|
|
- (int)-reserve);
|
|
- __FIXADDR_TOP = -reserve - PAGE_SIZE;
|
|
-}
|
|
-
|
|
/*
|
|
* vmalloc=size forces the vmalloc area to be exactly 'size'
|
|
* bytes. This can be used to increase (or decrease) the
|
|
--- head-2010-05-25.orig/drivers/acpi/Makefile 2010-03-24 14:53:41.000000000 +0100
|
|
+++ head-2010-05-25/drivers/acpi/Makefile 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -64,8 +64,6 @@ obj-$(CONFIG_ACPI_POWER_METER) += power_
|
|
processor-y := processor_driver.o processor_throttling.o
|
|
processor-y += processor_idle.o processor_thermal.o
|
|
processor-$(CONFIG_CPU_FREQ) += processor_perflib.o
|
|
-ifdef CONFIG_PROCESSOR_EXTERNAL_CONTROL
|
|
-processor-objs += processor_perflib.o processor_extcntl.o
|
|
-endif
|
|
+processor-$(CONFIG_PROCESSOR_EXTERNAL_CONTROL) += processor_perflib.o processor_extcntl.o
|
|
|
|
obj-$(CONFIG_ACPI_PROCESSOR_AGGREGATOR) += acpi_pad.o
|
|
--- head-2010-05-25.orig/drivers/acpi/acpica/hwsleep.c 2010-03-24 15:14:47.000000000 +0100
|
|
+++ head-2010-05-25/drivers/acpi/acpica/hwsleep.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -394,7 +394,7 @@ acpi_status asmlinkage acpi_enter_sleep_
|
|
#else
|
|
/* PV ACPI just need check hypercall return value */
|
|
err = acpi_notify_hypervisor_state(sleep_state,
|
|
- PM1Acontrol, PM1Bcontrol);
|
|
+ pm1a_control, pm1b_control);
|
|
if (err) {
|
|
printk(KERN_ERR "ACPI: Hypervisor failure [%d]\n", err);
|
|
return_ACPI_STATUS(AE_ERROR);
|
|
--- head-2010-05-25.orig/drivers/acpi/processor_idle.c 2010-04-15 10:06:51.000000000 +0200
|
|
+++ head-2010-05-25/drivers/acpi/processor_idle.c 2010-04-15 10:06:59.000000000 +0200
|
|
@@ -606,7 +606,7 @@ static void acpi_processor_power_verify_
|
|
#ifndef CONFIG_PROCESSOR_EXTERNAL_CONTROL
|
|
cx->latency_ticks = cx->latency;
|
|
#else
|
|
- cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency);
|
|
+ cx->latency_ticks = us_to_pm_timer_ticks(cx->latency);
|
|
#endif
|
|
/*
|
|
* On older chipsets, BM_RLD needs to be set
|
|
@@ -643,7 +643,7 @@ static int acpi_processor_power_verify(s
|
|
#ifndef CONFIG_PROCESSOR_EXTERNAL_CONTROL
|
|
cx->latency_ticks = cx->latency; /* Normalize latency */
|
|
#else
|
|
- cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency);
|
|
+ cx->latency_ticks = us_to_pm_timer_ticks(cx->latency);
|
|
#endif
|
|
break;
|
|
|
|
--- head-2010-05-25.orig/drivers/oprofile/oprofile_files.c 2010-03-24 15:02:17.000000000 +0100
|
|
+++ head-2010-05-25/drivers/oprofile/oprofile_files.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -172,6 +172,7 @@ static const struct file_operations dump
|
|
};
|
|
|
|
#ifdef CONFIG_XEN
|
|
+#include <linux/slab.h>
|
|
|
|
#define TMPBUFSIZE 512
|
|
|
|
--- head-2010-05-25.orig/drivers/pci/msi-xen.c 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/drivers/pci/msi-xen.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -47,47 +47,50 @@ struct msi_pirq_entry {
|
|
|
|
/* Arch hooks */
|
|
|
|
-int __attribute__ ((weak))
|
|
-arch_msi_check_device(struct pci_dev *dev, int nvec, int type)
|
|
-{
|
|
- return 0;
|
|
-}
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
-int __attribute__ ((weak))
|
|
-arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *entry)
|
|
+#ifndef arch_msi_check_device
|
|
+int arch_msi_check_device(struct pci_dev *dev, int nvec, int type)
|
|
{
|
|
return 0;
|
|
}
|
|
+#endif
|
|
|
|
-int __attribute__ ((weak))
|
|
-arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
|
|
+#ifndef arch_setup_msi_irqs
|
|
+int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
|
|
{
|
|
struct msi_desc *entry;
|
|
int ret;
|
|
|
|
+ /*
|
|
+ * If an architecture wants to support multiple MSI, it needs to
|
|
+ * override arch_setup_msi_irqs()
|
|
+ */
|
|
+ if (type == PCI_CAP_ID_MSI && nvec > 1)
|
|
+ return 1;
|
|
+
|
|
list_for_each_entry(entry, &dev->msi_list, list) {
|
|
ret = arch_setup_msi_irq(dev, entry);
|
|
- if (ret)
|
|
+ if (ret < 0)
|
|
return ret;
|
|
+ if (ret > 0)
|
|
+ return -ENOSPC;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
+#endif
|
|
|
|
-void __attribute__ ((weak)) arch_teardown_msi_irq(unsigned int irq)
|
|
-{
|
|
- return;
|
|
-}
|
|
-
|
|
-void __attribute__ ((weak))
|
|
-arch_teardown_msi_irqs(struct pci_dev *dev)
|
|
+#ifndef arch_teardown_msi_irqs
|
|
+void arch_teardown_msi_irqs(struct pci_dev *dev)
|
|
{
|
|
struct msi_desc *entry;
|
|
|
|
list_for_each_entry(entry, &dev->msi_list, list) {
|
|
- if (entry->irq != 0)
|
|
- arch_teardown_msi_irq(entry->irq);
|
|
+ int i, nvec;
|
|
+ if (entry->irq == 0)
|
|
+ continue;
|
|
+ nvec = 1 << entry->msi_attrib.multiple;
|
|
+ for (i = 0; i < nvec; i++)
|
|
+ arch_teardown_msi_irq(entry->irq + i);
|
|
}
|
|
}
|
|
#endif
|
|
@@ -347,13 +350,15 @@ EXPORT_SYMBOL_GPL(pci_restore_msi_state)
|
|
/**
|
|
* msi_capability_init - configure device's MSI capability structure
|
|
* @dev: pointer to the pci_dev data structure of MSI device function
|
|
+ * @nvec: number of interrupts to allocate
|
|
*
|
|
- * Setup the MSI capability structure of device function with a single
|
|
- * MSI irq, regardless of device function is capable of handling
|
|
- * multiple messages. A return of zero indicates the successful setup
|
|
- * of an entry zero with the new MSI irq or non-zero for otherwise.
|
|
- **/
|
|
-static int msi_capability_init(struct pci_dev *dev)
|
|
+ * Setup the MSI capability structure of the device with the requested
|
|
+ * number of interrupts. A return value of zero indicates the successful
|
|
+ * setup of an entry with the new MSI irq. A negative return value indicates
|
|
+ * an error, and a positive return value indicates the number of interrupts
|
|
+ * which could have been allocated.
|
|
+ */
|
|
+static int msi_capability_init(struct pci_dev *dev, int nvec)
|
|
{
|
|
int pos, pirq;
|
|
u16 control;
|
|
@@ -363,6 +368,7 @@ static int msi_capability_init(struct pc
|
|
pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
|
|
pci_read_config_word(dev, msi_control_reg(pos), &control);
|
|
|
|
+ WARN_ON(nvec > 1); /* XXX */
|
|
pirq = msi_map_vector(dev, 0, 0);
|
|
if (pirq < 0)
|
|
return -EBUSY;
|
|
@@ -496,22 +502,34 @@ static int pci_msi_check_device(struct p
|
|
}
|
|
|
|
/**
|
|
- * pci_enable_msi - configure device's MSI capability structure
|
|
- * @dev: pointer to the pci_dev data structure of MSI device function
|
|
+ * pci_enable_msi_block - configure device's MSI capability structure
|
|
+ * @dev: device to configure
|
|
+ * @nvec: number of interrupts to configure
|
|
*
|
|
- * Setup the MSI capability structure of device function with
|
|
- * a single MSI irq upon its software driver call to request for
|
|
- * MSI mode enabled on its hardware device function. A return of zero
|
|
- * indicates the successful setup of an entry zero with the new MSI
|
|
- * vector or non-zero for otherwise.
|
|
- **/
|
|
+ * Allocate IRQs for a device with the MSI capability.
|
|
+ * This function returns a negative errno if an error occurs. If it
|
|
+ * is unable to allocate the number of interrupts requested, it returns
|
|
+ * the number of interrupts it might be able to allocate. If it successfully
|
|
+ * allocates at least the number of interrupts requested, it returns 0 and
|
|
+ * updates the @dev's irq member to the lowest new interrupt number; the
|
|
+ * other interrupt numbers allocated to this device are consecutive.
|
|
+ */
|
|
extern int pci_frontend_enable_msi(struct pci_dev *dev);
|
|
-int pci_enable_msi(struct pci_dev* dev)
|
|
+int pci_enable_msi_block(struct pci_dev *dev, unsigned int nvec)
|
|
{
|
|
- int temp, status;
|
|
+ int temp, status, pos, maxvec;
|
|
+ u16 msgctl;
|
|
struct msi_dev_list *msi_dev_entry = get_msi_dev_pirq_list(dev);
|
|
|
|
- status = pci_msi_check_device(dev, 1, PCI_CAP_ID_MSI);
|
|
+ pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
|
|
+ if (!pos)
|
|
+ return -EINVAL;
|
|
+ pci_read_config_word(dev, pos + PCI_MSI_FLAGS, &msgctl);
|
|
+ maxvec = 1 << ((msgctl & PCI_MSI_FLAGS_QMASK) >> 1);
|
|
+ if (nvec > maxvec)
|
|
+ return maxvec;
|
|
+
|
|
+ status = pci_msi_check_device(dev, nvec, PCI_CAP_ID_MSI);
|
|
if (status)
|
|
return status;
|
|
|
|
@@ -521,6 +539,7 @@ int pci_enable_msi(struct pci_dev* dev)
|
|
int ret;
|
|
|
|
temp = dev->irq;
|
|
+ WARN_ON(nvec > 1); /* XXX */
|
|
ret = pci_frontend_enable_msi(dev);
|
|
if (ret)
|
|
return ret;
|
|
@@ -535,23 +554,23 @@ int pci_enable_msi(struct pci_dev* dev)
|
|
|
|
temp = dev->irq;
|
|
|
|
- /* Check whether driver already requested for MSI-X irqs */
|
|
+ /* Check whether driver already requested MSI-X irqs */
|
|
if (dev->msix_enabled) {
|
|
dev_info(&dev->dev, "can't enable MSI "
|
|
"(MSI-X already enabled)\n");
|
|
return -EINVAL;
|
|
}
|
|
|
|
- status = msi_capability_init(dev);
|
|
+ status = msi_capability_init(dev, nvec);
|
|
if ( !status )
|
|
msi_dev_entry->default_irq = temp;
|
|
|
|
return status;
|
|
}
|
|
-EXPORT_SYMBOL(pci_enable_msi);
|
|
+EXPORT_SYMBOL(pci_enable_msi_block);
|
|
|
|
extern void pci_frontend_disable_msi(struct pci_dev* dev);
|
|
-void pci_msi_shutdown(struct pci_dev* dev)
|
|
+void pci_msi_shutdown(struct pci_dev *dev)
|
|
{
|
|
int pirq;
|
|
struct msi_dev_list *msi_dev_entry = get_msi_dev_pirq_list(dev);
|
|
@@ -579,6 +598,7 @@ void pci_msi_shutdown(struct pci_dev* de
|
|
pci_intx_for_msi(dev, 1);
|
|
dev->msi_enabled = 0;
|
|
}
|
|
+
|
|
void pci_disable_msi(struct pci_dev* dev)
|
|
{
|
|
pci_msi_shutdown(dev);
|
|
@@ -586,6 +606,23 @@ void pci_disable_msi(struct pci_dev* dev
|
|
EXPORT_SYMBOL(pci_disable_msi);
|
|
|
|
/**
|
|
+ * pci_msix_table_size - return the number of device's MSI-X table entries
|
|
+ * @dev: pointer to the pci_dev data structure of MSI-X device function
|
|
+ */
|
|
+int pci_msix_table_size(struct pci_dev *dev)
|
|
+{
|
|
+ int pos;
|
|
+ u16 control;
|
|
+
|
|
+ pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
|
|
+ if (!pos)
|
|
+ return 0;
|
|
+
|
|
+ pci_read_config_word(dev, msi_control_reg(pos), &control);
|
|
+ return multi_msix_capable(control);
|
|
+}
|
|
+
|
|
+/**
|
|
* pci_enable_msix - configure device's MSI-X capability structure
|
|
* @dev: pointer to the pci_dev data structure of MSI-X device function
|
|
* @entries: pointer to an array of MSI-X entries
|
|
@@ -604,9 +641,8 @@ extern int pci_frontend_enable_msix(stru
|
|
struct msix_entry *entries, int nvec);
|
|
int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec)
|
|
{
|
|
- int status, pos, nr_entries;
|
|
+ int status, nr_entries;
|
|
int i, j, temp;
|
|
- u16 control;
|
|
struct msi_dev_list *msi_dev_entry = get_msi_dev_pirq_list(dev);
|
|
|
|
if (!entries)
|
|
@@ -653,9 +689,7 @@ int pci_enable_msix(struct pci_dev* dev,
|
|
if (status)
|
|
return status;
|
|
|
|
- pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
|
|
- pci_read_config_word(dev, msi_control_reg(pos), &control);
|
|
- nr_entries = multi_msix_capable(control);
|
|
+ nr_entries = pci_msix_table_size(dev);
|
|
if (nvec > nr_entries)
|
|
return -EINVAL;
|
|
|
|
--- head-2010-05-25.orig/drivers/xen/Kconfig 2010-03-24 15:18:46.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/Kconfig 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -14,7 +14,6 @@ menu "XEN"
|
|
|
|
config XEN_PRIVILEGED_GUEST
|
|
bool "Privileged Guest (domain 0)"
|
|
- select PCI_REASSIGN if PCI
|
|
help
|
|
Support for privileged operation (domain 0)
|
|
|
|
@@ -333,10 +332,6 @@ endmenu
|
|
config HAVE_IRQ_IGNORE_UNHANDLED
|
|
def_bool y
|
|
|
|
-config GENERIC_HARDIRQS_NO__DO_IRQ
|
|
- def_bool y
|
|
- depends on X86
|
|
-
|
|
config NO_IDLE_HZ
|
|
def_bool y
|
|
|
|
--- head-2010-05-25.orig/drivers/xen/char/mem.c 2010-03-24 15:12:46.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/char/mem.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -158,21 +158,7 @@ static ssize_t write_mem(struct file * f
|
|
}
|
|
|
|
#ifndef ARCH_HAS_DEV_MEM_MMAP_MEM
|
|
-static void mmap_mem_open(struct vm_area_struct *vma)
|
|
-{
|
|
- map_devmem(vma->vm_pgoff, vma->vm_end - vma->vm_start,
|
|
- vma->vm_page_prot);
|
|
-}
|
|
-
|
|
-static void mmap_mem_close(struct vm_area_struct *vma)
|
|
-{
|
|
- unmap_devmem(vma->vm_pgoff, vma->vm_end - vma->vm_start,
|
|
- vma->vm_page_prot);
|
|
-}
|
|
-
|
|
static struct vm_operations_struct mmap_mem_ops = {
|
|
- .open = mmap_mem_open,
|
|
- .close = mmap_mem_close,
|
|
#ifdef CONFIG_HAVE_IOREMAP_PROT
|
|
.access = generic_access_phys
|
|
#endif
|
|
--- head-2010-05-25.orig/drivers/xen/core/Makefile 2010-04-19 14:50:32.000000000 +0200
|
|
+++ head-2010-05-25/drivers/xen/core/Makefile 2010-04-19 14:52:49.000000000 +0200
|
|
@@ -10,5 +10,5 @@ obj-$(CONFIG_SYS_HYPERVISOR) += hypervis
|
|
obj-$(CONFIG_HOTPLUG_CPU) += cpu_hotplug.o
|
|
obj-$(CONFIG_XEN_SYSFS) += xen_sysfs.o
|
|
obj-$(CONFIG_XEN_SMPBOOT) += smpboot.o
|
|
-obj-$(CONFIG_X86_SMP) += spinlock.o
|
|
+obj-$(CONFIG_SMP) += spinlock.o
|
|
obj-$(CONFIG_KEXEC) += machine_kexec.o
|
|
--- head-2010-05-25.orig/drivers/xen/core/evtchn.c 2010-04-23 15:19:25.000000000 +0200
|
|
+++ head-2010-05-25/drivers/xen/core/evtchn.c 2010-04-23 15:19:37.000000000 +0200
|
|
@@ -150,13 +150,15 @@ DEFINE_PER_CPU(int, ipi_to_irq[NR_IPIS])
|
|
#ifdef CONFIG_SMP
|
|
|
|
static u8 cpu_evtchn[NR_EVENT_CHANNELS];
|
|
-static unsigned long cpu_evtchn_mask[NR_CPUS][NR_EVENT_CHANNELS/BITS_PER_LONG];
|
|
+static DEFINE_PER_CPU(unsigned long[BITS_TO_LONGS(NR_EVENT_CHANNELS)],
|
|
+ cpu_evtchn_mask);
|
|
|
|
-static inline unsigned long active_evtchns(unsigned int cpu, shared_info_t *sh,
|
|
- unsigned int idx)
|
|
+static inline unsigned long active_evtchns(unsigned int idx)
|
|
{
|
|
+ shared_info_t *sh = HYPERVISOR_shared_info;
|
|
+
|
|
return (sh->evtchn_pending[idx] &
|
|
- cpu_evtchn_mask[cpu][idx] &
|
|
+ percpu_read(cpu_evtchn_mask[idx]) &
|
|
~sh->evtchn_mask[idx]);
|
|
}
|
|
|
|
@@ -168,10 +170,10 @@ static void bind_evtchn_to_cpu(unsigned
|
|
BUG_ON(!test_bit(chn, s->evtchn_mask));
|
|
|
|
if (irq != -1)
|
|
- irq_to_desc(irq)->affinity = cpumask_of_cpu(cpu);
|
|
+ cpumask_copy(irq_to_desc(irq)->affinity, cpumask_of(cpu));
|
|
|
|
- clear_bit(chn, (unsigned long *)cpu_evtchn_mask[cpu_evtchn[chn]]);
|
|
- set_bit(chn, (unsigned long *)cpu_evtchn_mask[cpu]);
|
|
+ clear_bit(chn, per_cpu(cpu_evtchn_mask, cpu_evtchn[chn]));
|
|
+ set_bit(chn, per_cpu(cpu_evtchn_mask, cpu));
|
|
cpu_evtchn[chn] = cpu;
|
|
}
|
|
|
|
@@ -184,11 +186,11 @@ static void init_evtchn_cpu_bindings(voi
|
|
struct irq_desc *desc = irq_to_desc(i);
|
|
|
|
if (desc)
|
|
- desc->affinity = cpumask_of_cpu(0);
|
|
+ cpumask_copy(desc->affinity, cpumask_of(0));
|
|
}
|
|
|
|
memset(cpu_evtchn, 0, sizeof(cpu_evtchn));
|
|
- memset(cpu_evtchn_mask[0], ~0, sizeof(cpu_evtchn_mask[0]));
|
|
+ memset(per_cpu(cpu_evtchn_mask, 0), ~0, sizeof(per_cpu(cpu_evtchn_mask, 0)));
|
|
}
|
|
|
|
static inline unsigned int cpu_from_evtchn(unsigned int evtchn)
|
|
@@ -198,9 +200,10 @@ static inline unsigned int cpu_from_evtc
|
|
|
|
#else
|
|
|
|
-static inline unsigned long active_evtchns(unsigned int cpu, shared_info_t *sh,
|
|
- unsigned int idx)
|
|
+static inline unsigned long active_evtchns(unsigned int idx)
|
|
{
|
|
+ shared_info_t *sh = HYPERVISOR_shared_info;
|
|
+
|
|
return (sh->evtchn_pending[idx] & ~sh->evtchn_mask[idx]);
|
|
}
|
|
|
|
@@ -219,25 +222,15 @@ static inline unsigned int cpu_from_evtc
|
|
|
|
#endif
|
|
|
|
-/* Upcall to generic IRQ layer. */
|
|
#ifdef CONFIG_X86
|
|
-extern unsigned int do_IRQ(struct pt_regs *regs);
|
|
void __init xen_init_IRQ(void);
|
|
void __init init_IRQ(void)
|
|
{
|
|
irq_ctx_init(0);
|
|
xen_init_IRQ();
|
|
}
|
|
-#if defined (__i386__)
|
|
-static inline void exit_idle(void) {}
|
|
-#elif defined (__x86_64__)
|
|
#include <asm/idle.h>
|
|
#endif
|
|
-#define do_IRQ(irq, regs) do { \
|
|
- (regs)->orig_ax = ~(irq); \
|
|
- do_IRQ((regs)); \
|
|
-} while (0)
|
|
-#endif
|
|
|
|
/* Xen will never allocate port zero for any purpose. */
|
|
#define VALID_EVTCHN(chn) ((chn) != 0)
|
|
@@ -261,13 +254,12 @@ static DEFINE_PER_CPU(unsigned int, curr
|
|
/* NB. Interrupts are disabled on entry. */
|
|
asmlinkage void __irq_entry evtchn_do_upcall(struct pt_regs *regs)
|
|
{
|
|
+ struct pt_regs *old_regs = set_irq_regs(regs);
|
|
unsigned long l1, l2;
|
|
unsigned long masked_l1, masked_l2;
|
|
unsigned int l1i, l2i, start_l1i, start_l2i, port, count, i;
|
|
int irq;
|
|
- unsigned int cpu = smp_processor_id();
|
|
- shared_info_t *s = HYPERVISOR_shared_info;
|
|
- vcpu_info_t *vcpu_info = &s->vcpu_info[cpu];
|
|
+ vcpu_info_t *vcpu_info = current_vcpu_info();
|
|
|
|
exit_idle();
|
|
irq_enter();
|
|
@@ -277,7 +269,8 @@ asmlinkage void __irq_entry evtchn_do_up
|
|
vcpu_info->evtchn_upcall_pending = 0;
|
|
|
|
/* Nested invocations bail immediately. */
|
|
- if (unlikely(per_cpu(upcall_count, cpu)++))
|
|
+ percpu_add(upcall_count, 1);
|
|
+ if (unlikely(percpu_read(upcall_count) != 1))
|
|
break;
|
|
|
|
#ifndef CONFIG_X86 /* No need for a barrier -- XCHG is a barrier on x86. */
|
|
@@ -286,8 +279,8 @@ asmlinkage void __irq_entry evtchn_do_up
|
|
#endif
|
|
l1 = xchg(&vcpu_info->evtchn_pending_sel, 0);
|
|
|
|
- start_l1i = l1i = per_cpu(current_l1i, cpu);
|
|
- start_l2i = per_cpu(current_l2i, cpu);
|
|
+ start_l1i = l1i = percpu_read(current_l1i);
|
|
+ start_l2i = percpu_read(current_l2i);
|
|
|
|
for (i = 0; l1 != 0; i++) {
|
|
masked_l1 = l1 & ((~0UL) << l1i);
|
|
@@ -298,7 +291,7 @@ asmlinkage void __irq_entry evtchn_do_up
|
|
}
|
|
l1i = __ffs(masked_l1);
|
|
|
|
- l2 = active_evtchns(cpu, s, l1i);
|
|
+ l2 = active_evtchns(l1i);
|
|
l2i = 0; /* usually scan entire word from start */
|
|
if (l1i == start_l1i) {
|
|
/* We scan the starting word in two parts. */
|
|
@@ -318,17 +311,18 @@ asmlinkage void __irq_entry evtchn_do_up
|
|
|
|
/* process port */
|
|
port = (l1i * BITS_PER_LONG) + l2i;
|
|
- if ((irq = evtchn_to_irq[port]) != -1)
|
|
- do_IRQ(irq, regs);
|
|
- else
|
|
+ if (unlikely((irq = evtchn_to_irq[port]) == -1))
|
|
evtchn_device_upcall(port);
|
|
+ else if (!handle_irq(irq, regs) && printk_ratelimit())
|
|
+ printk(KERN_EMERG "%s(%d): No handler for irq %d\n",
|
|
+ __func__, smp_processor_id(), irq);
|
|
|
|
l2i = (l2i + 1) % BITS_PER_LONG;
|
|
|
|
/* Next caller starts at last processed + 1 */
|
|
- per_cpu(current_l1i, cpu) =
|
|
- l2i ? l1i : (l1i + 1) % BITS_PER_LONG;
|
|
- per_cpu(current_l2i, cpu) = l2i;
|
|
+ percpu_write(current_l1i,
|
|
+ l2i ? l1i : (l1i + 1) % BITS_PER_LONG);
|
|
+ percpu_write(current_l2i, l2i);
|
|
|
|
} while (l2i != 0);
|
|
|
|
@@ -340,11 +334,12 @@ asmlinkage void __irq_entry evtchn_do_up
|
|
}
|
|
|
|
/* If there were nested callbacks then we have more to do. */
|
|
- count = per_cpu(upcall_count, cpu);
|
|
- per_cpu(upcall_count, cpu) = 0;
|
|
+ count = percpu_read(upcall_count);
|
|
+ percpu_write(upcall_count, 0);
|
|
} while (unlikely(count != 1));
|
|
|
|
irq_exit();
|
|
+ set_irq_regs(old_regs);
|
|
}
|
|
|
|
static struct irq_chip dynirq_chip;
|
|
@@ -551,7 +546,7 @@ static void unbind_from_irq(unsigned int
|
|
|
|
/* Zap stats across IRQ changes of use. */
|
|
for_each_possible_cpu(cpu)
|
|
-#ifdef CONFIG_SPARSE_IRQ
|
|
+#ifdef CONFIG_GENERIC_HARDIRQS
|
|
irq_to_desc(irq)->kstat_irqs[cpu] = 0;
|
|
#else
|
|
kstat_cpu(cpu).irqs[irq] = 0;
|
|
@@ -669,7 +664,8 @@ int bind_ipi_to_irqhandler(
|
|
if (irq < 0)
|
|
return irq;
|
|
|
|
- retval = request_irq(irq, handler, irqflags, devname, dev_id);
|
|
+ retval = request_irq(irq, handler, irqflags | IRQF_NO_SUSPEND,
|
|
+ devname, dev_id);
|
|
if (retval != 0) {
|
|
unbind_from_irq(irq);
|
|
return retval;
|
|
@@ -1134,7 +1130,7 @@ void irq_resume(void)
|
|
mask_evtchn(evtchn);
|
|
|
|
/* Check that no PIRQs are still bound. */
|
|
- for (irq = PIRQ_BASE; irq < (PIRQ_BASE + NR_PIRQS); irq++) {
|
|
+ for (irq = PIRQ_BASE; irq < (PIRQ_BASE + nr_pirqs); irq++) {
|
|
cfg = irq_cfg(irq);
|
|
BUG_ON(cfg && cfg->info != IRQ_UNBOUND);
|
|
}
|
|
@@ -1171,7 +1167,7 @@ int arch_init_chip_data(struct irq_desc
|
|
{
|
|
if (!desc->chip_data) {
|
|
/* By default all event channels notify CPU#0. */
|
|
- desc->affinity = cpumask_of_cpu(0);
|
|
+ cpumask_copy(desc->affinity, cpumask_of(0));
|
|
|
|
desc->chip_data = kzalloc(sizeof(struct irq_cfg), GFP_ATOMIC);
|
|
}
|
|
@@ -1185,11 +1181,44 @@ int arch_init_chip_data(struct irq_desc
|
|
#endif
|
|
|
|
#if defined(CONFIG_X86_IO_APIC)
|
|
+#ifdef CONFIG_SPARSE_IRQ
|
|
+int nr_pirqs = NR_PIRQS;
|
|
+EXPORT_SYMBOL_GPL(nr_pirqs);
|
|
+
|
|
+int __init arch_probe_nr_irqs(void)
|
|
+{
|
|
+ int nr_irqs_gsi, nr = acpi_probe_gsi();
|
|
+
|
|
+ if (nr <= NR_IRQS_LEGACY) {
|
|
+ /* for acpi=off or acpi not compiled in */
|
|
+ int idx;
|
|
+
|
|
+ for (nr = idx = 0; idx < nr_ioapics; idx++)
|
|
+ nr += io_apic_get_redir_entries(idx) + 1;
|
|
+ }
|
|
+ nr_irqs_gsi = max(nr, NR_IRQS_LEGACY);
|
|
+
|
|
+ nr = nr_irqs_gsi + 8 * nr_cpu_ids;
|
|
+#ifdef CONFIG_PCI_MSI
|
|
+ nr += nr_irqs_gsi * 16;
|
|
+#endif
|
|
+ if (nr_pirqs > nr) {
|
|
+ nr_pirqs = nr;
|
|
+ nr_irqs = nr + NR_DYNIRQS;
|
|
+ }
|
|
+
|
|
+ printk(KERN_DEBUG "nr_irqs_gsi=%d nr_pirqs=%d\n",
|
|
+ nr_irqs_gsi, nr_pirqs);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+#endif
|
|
+
|
|
int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
|
|
{
|
|
struct physdev_irq irq_op;
|
|
|
|
- if (irq < PIRQ_BASE || irq - PIRQ_BASE >= NR_PIRQS)
|
|
+ if (irq < PIRQ_BASE || irq - PIRQ_BASE >= nr_pirqs)
|
|
return -EINVAL;
|
|
|
|
if (cfg->vector)
|
|
@@ -1212,7 +1241,7 @@ int assign_irq_vector(int irq, struct ir
|
|
|
|
void evtchn_register_pirq(int irq)
|
|
{
|
|
- BUG_ON(irq < PIRQ_BASE || irq - PIRQ_BASE >= NR_PIRQS);
|
|
+ BUG_ON(irq < PIRQ_BASE || irq - PIRQ_BASE >= nr_pirqs);
|
|
if (identity_mapped_irq(irq) || type_from_irq(irq) != IRQT_UNBOUND)
|
|
return;
|
|
irq_cfg(irq)->info = mk_irq_info(IRQT_PIRQ, irq, 0);
|
|
@@ -1225,7 +1254,7 @@ int evtchn_map_pirq(int irq, int xen_pir
|
|
if (irq < 0) {
|
|
static DEFINE_SPINLOCK(irq_alloc_lock);
|
|
|
|
- irq = PIRQ_BASE + NR_PIRQS - 1;
|
|
+ irq = PIRQ_BASE + nr_pirqs - 1;
|
|
spin_lock(&irq_alloc_lock);
|
|
do {
|
|
struct irq_desc *desc;
|
|
@@ -1285,7 +1314,7 @@ void __init xen_init_IRQ(void)
|
|
init_evtchn_cpu_bindings();
|
|
|
|
pirq_needs_eoi = alloc_bootmem_pages(sizeof(unsigned long)
|
|
- * BITS_TO_LONGS(ALIGN(NR_PIRQS, PAGE_SIZE * 8)));
|
|
+ * BITS_TO_LONGS(ALIGN(nr_pirqs, PAGE_SIZE * 8)));
|
|
eoi_gmfn.gmfn = virt_to_machine(pirq_needs_eoi) >> PAGE_SHIFT;
|
|
if (HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn, &eoi_gmfn) == 0)
|
|
pirq_eoi_does_unmask = true;
|
|
@@ -1301,7 +1330,7 @@ void __init xen_init_IRQ(void)
|
|
handle_level_irq, "level");
|
|
}
|
|
|
|
- for (i = PIRQ_BASE; i < (PIRQ_BASE + NR_PIRQS); i++) {
|
|
+ for (i = PIRQ_BASE; i < (PIRQ_BASE + nr_pirqs); i++) {
|
|
#else
|
|
for (i = PIRQ_BASE; i < (PIRQ_BASE + NR_IRQS_LEGACY); i++) {
|
|
#endif
|
|
--- head-2010-05-25.orig/drivers/xen/core/smpboot.c 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/core/smpboot.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -18,7 +18,6 @@
|
|
#include <linux/cpu.h>
|
|
#include <linux/percpu.h>
|
|
#include <asm/desc.h>
|
|
-#include <asm/arch_hooks.h>
|
|
#include <asm/pgalloc.h>
|
|
#include <xen/evtchn.h>
|
|
#include <xen/interface/vcpu.h>
|
|
@@ -54,8 +53,8 @@ static char call1func_name[NR_CPUS][15];
|
|
#define set_cpu_to_apicid(cpu, apicid)
|
|
#endif
|
|
|
|
-DEFINE_PER_CPU(cpumask_t, cpu_sibling_map);
|
|
-DEFINE_PER_CPU(cpumask_t, cpu_core_map);
|
|
+DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
|
|
+DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
|
|
|
|
void __init prefill_possible_map(void)
|
|
{
|
|
@@ -88,8 +87,8 @@ set_cpu_sibling_map(unsigned int cpu)
|
|
cpu_data(cpu).phys_proc_id = cpu;
|
|
cpu_data(cpu).cpu_core_id = 0;
|
|
|
|
- per_cpu(cpu_sibling_map, cpu) = cpumask_of_cpu(cpu);
|
|
- per_cpu(cpu_core_map, cpu) = cpumask_of_cpu(cpu);
|
|
+ cpumask_copy(cpu_sibling_mask(cpu), cpumask_of(cpu));
|
|
+ cpumask_copy(cpu_core_mask(cpu), cpumask_of(cpu));
|
|
|
|
cpu_data(cpu).booted_cores = 1;
|
|
}
|
|
@@ -100,8 +99,8 @@ remove_siblinginfo(unsigned int cpu)
|
|
cpu_data(cpu).phys_proc_id = BAD_APICID;
|
|
cpu_data(cpu).cpu_core_id = BAD_APICID;
|
|
|
|
- cpus_clear(per_cpu(cpu_sibling_map, cpu));
|
|
- cpus_clear(per_cpu(cpu_core_map, cpu));
|
|
+ cpumask_clear(cpu_sibling_mask(cpu));
|
|
+ cpumask_clear(cpu_core_mask(cpu));
|
|
|
|
cpu_data(cpu).booted_cores = 0;
|
|
}
|
|
@@ -224,7 +223,7 @@ static void __cpuinit cpu_initialize_con
|
|
smp_trap_init(ctxt.trap_ctxt);
|
|
|
|
ctxt.ldt_ents = 0;
|
|
- ctxt.gdt_frames[0] = virt_to_mfn(get_cpu_gdt_table(cpu));
|
|
+ ctxt.gdt_frames[0] = arbitrary_virt_to_mfn(get_cpu_gdt_table(cpu));
|
|
ctxt.gdt_ents = GDT_SIZE / 8;
|
|
|
|
ctxt.user_regs.cs = __KERNEL_CS;
|
|
@@ -242,12 +241,13 @@ static void __cpuinit cpu_initialize_con
|
|
ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
|
|
|
|
ctxt.user_regs.fs = __KERNEL_PERCPU;
|
|
+ ctxt.user_regs.gs = __KERNEL_STACK_CANARY;
|
|
#else /* __x86_64__ */
|
|
ctxt.syscall_callback_eip = (unsigned long)system_call;
|
|
|
|
ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt));
|
|
|
|
- ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu));
|
|
+ ctxt.gs_base_kernel = per_cpu_offset(cpu);
|
|
#endif
|
|
|
|
if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt))
|
|
@@ -275,8 +275,10 @@ void __init smp_prepare_cpus(unsigned in
|
|
current_thread_info()->cpu = 0;
|
|
|
|
for_each_possible_cpu (cpu) {
|
|
- cpus_clear(per_cpu(cpu_sibling_map, cpu));
|
|
- cpus_clear(per_cpu(cpu_core_map, cpu));
|
|
+ alloc_cpumask_var(&per_cpu(cpu_sibling_map, cpu), GFP_KERNEL);
|
|
+ alloc_cpumask_var(&per_cpu(cpu_core_map, cpu), GFP_KERNEL);
|
|
+ cpumask_clear(cpu_sibling_mask(cpu));
|
|
+ cpumask_clear(cpu_core_mask(cpu));
|
|
}
|
|
|
|
set_cpu_sibling_map(0);
|
|
@@ -303,9 +305,6 @@ void __init smp_prepare_cpus(unsigned in
|
|
if (IS_ERR(idle))
|
|
panic("failed fork for CPU %d", cpu);
|
|
|
|
-#ifdef __i386__
|
|
- init_gdt(cpu);
|
|
-#endif
|
|
gdt_addr = get_cpu_gdt_table(cpu);
|
|
make_page_readonly(gdt_addr, XENFEAT_writable_descriptor_tables);
|
|
|
|
@@ -319,12 +318,12 @@ void __init smp_prepare_cpus(unsigned in
|
|
set_cpu_to_apicid(cpu, apicid);
|
|
|
|
#ifdef __x86_64__
|
|
- cpu_pda(cpu)->pcurrent = idle;
|
|
- cpu_pda(cpu)->cpunumber = cpu;
|
|
clear_tsk_thread_flag(idle, TIF_FORK);
|
|
-#else
|
|
- per_cpu(current_task, cpu) = idle;
|
|
+ per_cpu(kernel_stack, cpu) =
|
|
+ (unsigned long)task_stack_page(idle) -
|
|
+ KERNEL_STACK_OFFSET + THREAD_SIZE;
|
|
#endif
|
|
+ per_cpu(current_task, cpu) = idle;
|
|
|
|
irq_ctx_init(cpu);
|
|
|
|
@@ -348,10 +347,7 @@ void __init smp_prepare_cpus(unsigned in
|
|
|
|
void __init smp_prepare_boot_cpu(void)
|
|
{
|
|
-#ifdef __i386__
|
|
- init_gdt(smp_processor_id());
|
|
-#endif
|
|
- switch_to_new_gdt();
|
|
+ switch_to_new_gdt(smp_processor_id());
|
|
prefill_possible_map();
|
|
}
|
|
|
|
--- head-2010-05-25.orig/drivers/xen/core/spinlock.c 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/core/spinlock.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -78,13 +78,13 @@ static unsigned int spin_adjust(struct s
|
|
|
|
unsigned int xen_spin_adjust(const raw_spinlock_t *lock, unsigned int token)
|
|
{
|
|
- return spin_adjust(x86_read_percpu(spinning), lock, token);
|
|
+ return spin_adjust(percpu_read(spinning), lock, token);
|
|
}
|
|
|
|
bool xen_spin_wait(raw_spinlock_t *lock, unsigned int *ptok,
|
|
unsigned int flags)
|
|
{
|
|
- int irq = x86_read_percpu(spinlock_irq);
|
|
+ int irq = percpu_read(spinlock_irq);
|
|
bool rc;
|
|
typeof(vcpu_info(0)->evtchn_upcall_mask) upcall_mask;
|
|
raw_rwlock_t *rm_lock;
|
|
@@ -97,9 +97,9 @@ bool xen_spin_wait(raw_spinlock_t *lock,
|
|
/* announce we're spinning */
|
|
spinning.ticket = *ptok >> TICKET_SHIFT;
|
|
spinning.lock = lock;
|
|
- spinning.prev = x86_read_percpu(spinning);
|
|
+ spinning.prev = percpu_read(spinning);
|
|
smp_wmb();
|
|
- x86_write_percpu(spinning, &spinning);
|
|
+ percpu_write(spinning, &spinning);
|
|
upcall_mask = current_vcpu_info()->evtchn_upcall_mask;
|
|
|
|
do {
|
|
@@ -184,7 +184,7 @@ bool xen_spin_wait(raw_spinlock_t *lock,
|
|
|
|
/* announce we're done */
|
|
other = spinning.prev;
|
|
- x86_write_percpu(spinning, other);
|
|
+ percpu_write(spinning, other);
|
|
rm_lock = &__get_cpu_var(spinning_rm_lock);
|
|
raw_local_irq_disable();
|
|
__raw_write_lock(rm_lock);
|
|
--- head-2010-05-25.orig/drivers/xen/netback/interface.c 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/netback/interface.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -121,7 +121,7 @@ static void netbk_get_drvinfo(struct net
|
|
struct ethtool_drvinfo *info)
|
|
{
|
|
strcpy(info->driver, "netbk");
|
|
- strcpy(info->bus_info, dev->dev.parent->bus_id);
|
|
+ strcpy(info->bus_info, dev_name(dev->dev.parent));
|
|
}
|
|
|
|
static const struct netif_stat {
|
|
--- head-2010-05-25.orig/drivers/xen/netback/netback.c 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/netback/netback.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -333,7 +333,7 @@ int netif_be_start_xmit(struct sk_buff *
|
|
*/
|
|
netif->tx_queue_timeout.data = (unsigned long)netif;
|
|
netif->tx_queue_timeout.function = tx_queue_callback;
|
|
- __mod_timer(&netif->tx_queue_timeout, jiffies + HZ/2);
|
|
+ mod_timer(&netif->tx_queue_timeout, jiffies + HZ/2);
|
|
}
|
|
}
|
|
|
|
@@ -354,7 +354,7 @@ static void xen_network_done_notify(void
|
|
static struct net_device *eth0_dev = NULL;
|
|
if (unlikely(eth0_dev == NULL))
|
|
eth0_dev = __dev_get_by_name(&init_net, "eth0");
|
|
- netif_rx_schedule(???);
|
|
+ napi_schedule(???);
|
|
}
|
|
/*
|
|
* Add following to poll() function in NAPI driver (Tigon3 is example):
|
|
@@ -1308,8 +1308,7 @@ static void net_tx_action(unsigned long
|
|
(unsigned long)netif;
|
|
netif->credit_timeout.function =
|
|
tx_credit_callback;
|
|
- __mod_timer(&netif->credit_timeout,
|
|
- next_credit);
|
|
+ mod_timer(&netif->credit_timeout, next_credit);
|
|
netif_put(netif);
|
|
continue;
|
|
}
|
|
--- head-2010-05-25.orig/drivers/xen/netfront/netfront.c 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/netfront/netfront.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -102,7 +102,7 @@ static const int MODPARM_rx_flip = 0;
|
|
static inline void dev_disable_gso_features(struct net_device *dev)
|
|
{
|
|
/* Turn off all GSO bits except ROBUST. */
|
|
- dev->features &= (1 << NETIF_F_GSO_SHIFT) - 1;
|
|
+ dev->features &= ~NETIF_F_GSO_MASK;
|
|
dev->features |= NETIF_F_GSO_ROBUST;
|
|
}
|
|
#elif defined(NETIF_F_TSO)
|
|
@@ -635,7 +635,7 @@ static int network_open(struct net_devic
|
|
if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)){
|
|
netfront_accelerator_call_stop_napi_irq(np, dev);
|
|
|
|
- netif_rx_schedule(&np->napi);
|
|
+ napi_schedule(&np->napi);
|
|
}
|
|
}
|
|
spin_unlock_bh(&np->rx_lock);
|
|
@@ -707,7 +707,7 @@ static void rx_refill_timeout(unsigned l
|
|
|
|
netfront_accelerator_call_stop_napi_irq(np, dev);
|
|
|
|
- netif_rx_schedule(&np->napi);
|
|
+ napi_schedule(&np->napi);
|
|
}
|
|
|
|
static void network_alloc_rx_buffers(struct net_device *dev)
|
|
@@ -1064,7 +1064,7 @@ static irqreturn_t netif_int(int irq, vo
|
|
if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) {
|
|
netfront_accelerator_call_stop_napi_irq(np, dev);
|
|
|
|
- netif_rx_schedule(&np->napi);
|
|
+ napi_schedule(&np->napi);
|
|
}
|
|
}
|
|
|
|
@@ -1521,7 +1521,7 @@ err:
|
|
}
|
|
|
|
if (!more_to_do && !accel_more_to_do)
|
|
- __netif_rx_complete(napi);
|
|
+ __napi_complete(napi);
|
|
|
|
local_irq_restore(flags);
|
|
}
|
|
--- head-2010-05-25.orig/drivers/xen/sfc_netfront/accel_msg.c 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/sfc_netfront/accel_msg.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -47,7 +47,7 @@ static void vnic_start_interrupts(netfro
|
|
netfront_accel_disable_net_interrupts(vnic);
|
|
vnic->irq_enabled = 0;
|
|
NETFRONT_ACCEL_STATS_OP(vnic->stats.poll_schedule_count++);
|
|
- netif_rx_schedule(&np->napi);
|
|
+ napi_schedule(&np->napi);
|
|
} else {
|
|
/*
|
|
* Nothing yet, make sure we get interrupts through
|
|
@@ -532,7 +532,7 @@ irqreturn_t netfront_accel_net_channel_i
|
|
vnic->stats.event_count_since_irq;
|
|
vnic->stats.event_count_since_irq = 0;
|
|
#endif
|
|
- netif_rx_schedule(&np->napi);
|
|
+ napi_schedule(&np->napi);
|
|
}
|
|
else {
|
|
spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags);
|
|
--- head-2010-05-25.orig/drivers/xen/usbback/usbstub.c 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/usbback/usbstub.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -188,7 +188,7 @@ static int usbstub_probe(struct usb_inte
|
|
const struct usb_device_id *id)
|
|
{
|
|
struct usb_device *udev = interface_to_usbdev(intf);
|
|
- char *busid = intf->dev.parent->bus_id;
|
|
+ const char *busid = dev_name(intf->dev.parent);
|
|
struct vusb_port_id *portid = NULL;
|
|
struct usbstub *stub = NULL;
|
|
usbif_t *usbif = NULL;
|
|
--- head-2010-05-25.orig/drivers/xen/usbfront/usbfront-dbg.c 2010-03-24 15:10:37.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/usbfront/usbfront-dbg.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -64,7 +64,7 @@ static ssize_t show_statistics(struct de
|
|
"%s\n"
|
|
"xenhcd, hcd state %d\n",
|
|
hcd->self.controller->bus->name,
|
|
- hcd->self.controller->bus_id,
|
|
+ dev_name(hcd->self.controller),
|
|
hcd->product_desc,
|
|
hcd->state);
|
|
size -= temp;
|
|
--- head-2010-05-25.orig/drivers/xen/usbfront/xenbus.c 2010-04-15 09:53:49.000000000 +0200
|
|
+++ head-2010-05-25/drivers/xen/usbfront/xenbus.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -252,10 +252,10 @@ static struct usb_hcd *create_hcd(struct
|
|
}
|
|
switch (usb_ver) {
|
|
case USB_VER_USB11:
|
|
- hcd = usb_create_hcd(&xen_usb11_hc_driver, &dev->dev, dev->dev.bus_id);
|
|
+ hcd = usb_create_hcd(&xen_usb11_hc_driver, &dev->dev, dev_name(&dev->dev));
|
|
break;
|
|
case USB_VER_USB20:
|
|
- hcd = usb_create_hcd(&xen_usb20_hc_driver, &dev->dev, dev->dev.bus_id);
|
|
+ hcd = usb_create_hcd(&xen_usb20_hc_driver, &dev->dev, dev_name(&dev->dev));
|
|
break;
|
|
default:
|
|
xenbus_dev_fatal(dev, err, "invalid usb-ver");
|
|
--- head-2010-05-25.orig/drivers/xen/xenbus/xenbus_probe.c 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/xenbus/xenbus_probe.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -230,7 +230,7 @@ static struct xen_bus_type xenbus_fronte
|
|
},
|
|
#if defined(CONFIG_XEN) || defined(MODULE)
|
|
.dev = {
|
|
- .bus_id = "xen",
|
|
+ .init_name = "xen",
|
|
},
|
|
#endif
|
|
};
|
|
--- head-2010-05-25.orig/drivers/xen/xenbus/xenbus_probe_backend.c 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/xenbus/xenbus_probe_backend.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -129,7 +129,7 @@ static struct xen_bus_type xenbus_backen
|
|
.dev_attrs = xenbus_backend_attrs,
|
|
},
|
|
.dev = {
|
|
- .bus_id = "xen-backend",
|
|
+ .init_name = "xen-backend",
|
|
},
|
|
};
|
|
|
|
--- head-2010-05-25.orig/include/linux/interrupt.h 2010-03-24 14:53:41.000000000 +0100
|
|
+++ head-2010-05-25/include/linux/interrupt.h 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -52,6 +52,7 @@
|
|
* IRQF_ONESHOT - Interrupt is not reenabled after the hardirq handler finished.
|
|
* Used by threaded interrupts which need to keep the
|
|
* irq line disabled until the threaded handler has been run.
|
|
+ * IRQF_NO_SUSPEND - Prevent this interrupt from being disabled during suspend.
|
|
*/
|
|
#define IRQF_DISABLED 0x00000020
|
|
#define IRQF_SAMPLE_RANDOM 0x00000040
|
|
@@ -62,6 +63,7 @@
|
|
#define IRQF_NOBALANCING 0x00000800
|
|
#define IRQF_IRQPOLL 0x00001000
|
|
#define IRQF_ONESHOT 0x00002000
|
|
+#define IRQF_NO_SUSPEND 0x00008000
|
|
|
|
/*
|
|
* Bits used by threaded handlers:
|
|
--- head-2010-05-25.orig/kernel/irq/manage.c 2010-05-25 09:12:09.000000000 +0200
|
|
+++ head-2010-05-25/kernel/irq/manage.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -200,7 +200,8 @@ static inline int setup_affinity(unsigne
|
|
void __disable_irq(struct irq_desc *desc, unsigned int irq, bool suspend)
|
|
{
|
|
if (suspend) {
|
|
- if (!desc->action || (desc->action->flags & IRQF_TIMER))
|
|
+ if (!desc->action ||
|
|
+ (desc->action->flags & (IRQF_TIMER | IRQF_NO_SUSPEND)))
|
|
return;
|
|
desc->status |= IRQ_SUSPENDED;
|
|
}
|
|
--- head-2010-05-25.orig/lib/swiotlb-xen.c 2010-03-24 15:17:58.000000000 +0100
|
|
+++ head-2010-05-25/lib/swiotlb-xen.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -175,7 +175,7 @@ static void *swiotlb_bus_to_virt(dma_add
|
|
return phys_to_virt(swiotlb_bus_to_phys(address));
|
|
}
|
|
|
|
-int __weak swiotlb_arch_range_needs_mapping(void *ptr, size_t size)
|
|
+int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size)
|
|
{
|
|
return 0;
|
|
}
|
|
@@ -523,13 +523,13 @@ swiotlb_full(struct device *dev, size_t
|
|
* Once the device is given the dma address, the device owns this memory until
|
|
* either swiotlb_unmap_single or swiotlb_dma_sync_single is performed.
|
|
*/
|
|
-static dma_addr_t
|
|
-_swiotlb_map_single(struct device *hwdev, phys_addr_t paddr, size_t size,
|
|
- int dir, struct dma_attrs *attrs)
|
|
-{
|
|
- struct page *page = pfn_to_page(paddr >> PAGE_SHIFT);
|
|
- dma_addr_t dev_addr = gnttab_dma_map_page(page) +
|
|
- offset_in_page(paddr);
|
|
+dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
|
|
+ unsigned long offset, size_t size,
|
|
+ enum dma_data_direction dir,
|
|
+ struct dma_attrs *attrs)
|
|
+{
|
|
+ phys_addr_t phys = page_to_pseudophys(page) + offset;
|
|
+ dma_addr_t dev_addr = gnttab_dma_map_page(page) + offset;
|
|
void *map;
|
|
|
|
BUG_ON(dir == DMA_NONE);
|
|
@@ -539,44 +539,24 @@ _swiotlb_map_single(struct device *hwdev
|
|
* we can safely return the device addr and not worry about bounce
|
|
* buffering it.
|
|
*/
|
|
- if (!address_needs_mapping(hwdev, dev_addr, size) &&
|
|
- !range_needs_mapping(paddr, size))
|
|
+ if (!address_needs_mapping(dev, dev_addr, size) &&
|
|
+ !range_needs_mapping(phys, size))
|
|
return dev_addr;
|
|
|
|
/*
|
|
* Oh well, have to allocate and map a bounce buffer.
|
|
*/
|
|
gnttab_dma_unmap_page(dev_addr);
|
|
- map = map_single(hwdev, paddr, size, dir);
|
|
+ map = map_single(dev, phys, size, dir);
|
|
if (!map) {
|
|
- swiotlb_full(hwdev, size, dir, 1);
|
|
+ swiotlb_full(dev, size, dir, 1);
|
|
map = io_tlb_overflow_buffer;
|
|
}
|
|
|
|
- dev_addr = swiotlb_virt_to_bus(hwdev, map);
|
|
+ dev_addr = swiotlb_virt_to_bus(dev, map);
|
|
return dev_addr;
|
|
}
|
|
-
|
|
-dma_addr_t
|
|
-swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size,
|
|
- int dir, struct dma_attrs *attrs)
|
|
-{
|
|
- return _swiotlb_map_single(hwdev, virt_to_phys(ptr), size, dir, attrs);
|
|
-}
|
|
-EXPORT_SYMBOL(swiotlb_map_single_attrs);
|
|
-
|
|
-dma_addr_t
|
|
-swiotlb_map_single(struct device *hwdev, void *ptr, size_t size, int dir)
|
|
-{
|
|
- return _swiotlb_map_single(hwdev, virt_to_phys(ptr), size, dir, NULL);
|
|
-}
|
|
-EXPORT_SYMBOL(swiotlb_map_single);
|
|
-
|
|
-dma_addr_t
|
|
-swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size, int dir)
|
|
-{
|
|
- return _swiotlb_map_single(hwdev, paddr, size, dir, NULL);
|
|
-}
|
|
+EXPORT_SYMBOL_GPL(swiotlb_map_page);
|
|
|
|
/*
|
|
* Unmap a single streaming mode DMA translation. The dma_addr and size must
|
|
@@ -586,9 +566,9 @@ swiotlb_map_single_phys(struct device *h
|
|
* After this call, reads by the cpu to the buffer are guaranteed to see
|
|
* whatever the device wrote there.
|
|
*/
|
|
-void
|
|
-swiotlb_unmap_single_attrs(struct device *hwdev, dma_addr_t dev_addr,
|
|
- size_t size, int dir, struct dma_attrs *attrs)
|
|
+void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
|
|
+ size_t size, enum dma_data_direction dir,
|
|
+ struct dma_attrs *attrs)
|
|
{
|
|
char *dma_addr = swiotlb_bus_to_virt(dev_addr);
|
|
|
|
@@ -598,15 +578,7 @@ swiotlb_unmap_single_attrs(struct device
|
|
else
|
|
gnttab_dma_unmap_page(dev_addr);
|
|
}
|
|
-EXPORT_SYMBOL(swiotlb_unmap_single_attrs);
|
|
-
|
|
-void
|
|
-swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, size_t size,
|
|
- int dir)
|
|
-{
|
|
- return swiotlb_unmap_single_attrs(hwdev, dev_addr, size, dir, NULL);
|
|
-}
|
|
-EXPORT_SYMBOL(swiotlb_unmap_single);
|
|
+EXPORT_SYMBOL_GPL(swiotlb_unmap_page);
|
|
|
|
/*
|
|
* Make physical memory consistent for a single streaming mode DMA translation
|
|
@@ -620,7 +592,7 @@ EXPORT_SYMBOL(swiotlb_unmap_single);
|
|
*/
|
|
void
|
|
swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
|
|
- size_t size, int dir)
|
|
+ size_t size, enum dma_data_direction dir)
|
|
{
|
|
char *dma_addr = swiotlb_bus_to_virt(dev_addr);
|
|
|
|
@@ -632,7 +604,7 @@ EXPORT_SYMBOL(swiotlb_sync_single_for_cp
|
|
|
|
void
|
|
swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
|
|
- size_t size, int dir)
|
|
+ size_t size, enum dma_data_direction dir)
|
|
{
|
|
char *dma_addr = swiotlb_bus_to_virt(dev_addr);
|
|
|
|
@@ -644,7 +616,8 @@ EXPORT_SYMBOL(swiotlb_sync_single_for_de
|
|
|
|
void
|
|
swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
|
|
- unsigned long offset, size_t size, int dir)
|
|
+ unsigned long offset, size_t size,
|
|
+ enum dma_data_direction dir)
|
|
{
|
|
char *dma_addr = swiotlb_bus_to_virt(dev_addr);
|
|
|
|
@@ -656,7 +629,8 @@ EXPORT_SYMBOL_GPL(swiotlb_sync_single_ra
|
|
|
|
void
|
|
swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr,
|
|
- unsigned long offset, size_t size, int dir)
|
|
+ unsigned long offset, size_t size,
|
|
+ enum dma_data_direction dir)
|
|
{
|
|
char *dma_addr = swiotlb_bus_to_virt(dev_addr);
|
|
|
|
@@ -684,7 +658,7 @@ EXPORT_SYMBOL_GPL(swiotlb_sync_single_ra
|
|
*/
|
|
int
|
|
swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
|
|
- int dir, struct dma_attrs *attrs)
|
|
+ enum dma_data_direction dir, struct dma_attrs *attrs)
|
|
{
|
|
struct scatterlist *sg;
|
|
int i;
|
|
@@ -736,7 +710,7 @@ EXPORT_SYMBOL(swiotlb_map_sg);
|
|
*/
|
|
void
|
|
swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
|
|
- int nelems, int dir, struct dma_attrs *attrs)
|
|
+ int nelems, enum dma_data_direction dir, struct dma_attrs *attrs)
|
|
{
|
|
struct scatterlist *sg;
|
|
int i;
|
|
@@ -770,7 +744,7 @@ EXPORT_SYMBOL(swiotlb_unmap_sg);
|
|
*/
|
|
void
|
|
swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sgl,
|
|
- int nelems, int dir)
|
|
+ int nelems, enum dma_data_direction dir)
|
|
{
|
|
struct scatterlist *sg;
|
|
int i;
|
|
@@ -787,7 +761,7 @@ EXPORT_SYMBOL(swiotlb_sync_sg_for_cpu);
|
|
|
|
void
|
|
swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sgl,
|
|
- int nelems, int dir)
|
|
+ int nelems, enum dma_data_direction dir)
|
|
{
|
|
struct scatterlist *sg;
|
|
int i;
|
|
--- head-2010-05-25.orig/mm/page_alloc.c 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/mm/page_alloc.c 2010-03-24 15:25:06.000000000 +0100
|
|
@@ -4685,11 +4685,9 @@ static void __setup_per_zone_wmarks(void
|
|
}
|
|
|
|
#ifdef CONFIG_XEN
|
|
- for_each_zone(zone) {
|
|
+ for_each_populated_zone(zone) {
|
|
unsigned int cpu;
|
|
|
|
- if (!populated_zone(zone))
|
|
- continue;
|
|
for_each_online_cpu(cpu) {
|
|
unsigned long high;
|
|
|