7155 lines
208 KiB
Plaintext
7155 lines
208 KiB
Plaintext
From: www.kernel.org
|
|
Subject: Linux 2.6.20
|
|
Patch-mainline: 2.6.20
|
|
|
|
Automatically created from "patches.kernel.org/patch-2.6.20" by xen-port-patches.py
|
|
|
|
Acked-by: jbeulich@novell.com
|
|
|
|
--- head-2010-05-25.orig/arch/x86/Kconfig 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/Kconfig 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -1707,6 +1707,7 @@ config PHYSICAL_START
|
|
|
|
config RELOCATABLE
|
|
bool "Build a relocatable kernel"
|
|
+ depends on !X86_XEN
|
|
default y
|
|
---help---
|
|
This builds a kernel image that retains relocation information
|
|
@@ -1729,7 +1730,8 @@ config X86_NEED_RELOCS
|
|
|
|
config PHYSICAL_ALIGN
|
|
hex
|
|
- prompt "Alignment value to which kernel should be aligned" if X86_32
|
|
+ prompt "Alignment value to which kernel should be aligned" if X86_32 && !XEN
|
|
+ default 0x2000 if XEN
|
|
default "0x1000000"
|
|
range 0x2000 0x1000000
|
|
---help---
|
|
--- head-2010-05-25.orig/arch/x86/kernel/asm-offsets_32.c 2010-01-19 16:00:16.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/asm-offsets_32.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -55,6 +55,7 @@ void foo(void)
|
|
OFFSET(TI_exec_domain, thread_info, exec_domain);
|
|
OFFSET(TI_flags, thread_info, flags);
|
|
OFFSET(TI_status, thread_info, status);
|
|
+ OFFSET(TI_cpu, thread_info, cpu);
|
|
OFFSET(TI_preempt_count, thread_info, preempt_count);
|
|
OFFSET(TI_addr_limit, thread_info, addr_limit);
|
|
OFFSET(TI_restart_block, thread_info, restart_block);
|
|
@@ -110,6 +111,11 @@ void foo(void)
|
|
|
|
OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
|
|
|
|
+#ifdef CONFIG_XEN
|
|
+ BLANK();
|
|
+ OFFSET(XEN_START_mfn_list, start_info, mfn_list);
|
|
+#endif
|
|
+
|
|
#ifdef CONFIG_PARAVIRT
|
|
BLANK();
|
|
OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
|
|
--- head-2010-05-25.orig/arch/x86/kernel/cpu/common-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/cpu/common-xen.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -22,6 +22,7 @@
|
|
#define phys_pkg_id(a,b) a
|
|
#endif
|
|
#endif
|
|
+#include <asm/pda.h>
|
|
#include <asm/hypervisor.h>
|
|
|
|
#include "cpu.h"
|
|
@@ -29,10 +30,8 @@
|
|
DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr);
|
|
EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr);
|
|
|
|
-#ifndef CONFIG_XEN
|
|
-DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
|
|
-EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack);
|
|
-#endif
|
|
+struct i386_pda *_cpu_pda[NR_CPUS] __read_mostly;
|
|
+EXPORT_SYMBOL(_cpu_pda);
|
|
|
|
static int cachesize_override __cpuinitdata = -1;
|
|
static int disable_x86_fxsr __cpuinitdata;
|
|
@@ -60,7 +59,7 @@ static struct cpu_dev __cpuinitdata defa
|
|
.c_init = default_init,
|
|
.c_vendor = "Unknown",
|
|
};
|
|
-static struct cpu_dev * this_cpu = &default_cpu;
|
|
+static struct cpu_dev * this_cpu __cpuinitdata = &default_cpu;
|
|
|
|
static int __init cachesize_setup(char *str)
|
|
{
|
|
@@ -242,29 +241,14 @@ static int __cpuinit have_cpuid_p(void)
|
|
return flag_is_changeable_p(X86_EFLAGS_ID);
|
|
}
|
|
|
|
-/* Do minimum CPU detection early.
|
|
- Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment.
|
|
- The others are not touched to avoid unwanted side effects.
|
|
-
|
|
- WARNING: this function is only called on the BP. Don't add code here
|
|
- that is supposed to run on all CPUs. */
|
|
-static void __init early_cpu_detect(void)
|
|
+void __init cpu_detect(struct cpuinfo_x86 *c)
|
|
{
|
|
- struct cpuinfo_x86 *c = &boot_cpu_data;
|
|
-
|
|
- c->x86_cache_alignment = 32;
|
|
-
|
|
- if (!have_cpuid_p())
|
|
- return;
|
|
-
|
|
/* Get vendor name */
|
|
cpuid(0x00000000, &c->cpuid_level,
|
|
(int *)&c->x86_vendor_id[0],
|
|
(int *)&c->x86_vendor_id[8],
|
|
(int *)&c->x86_vendor_id[4]);
|
|
|
|
- get_cpu_vendor(c, 1);
|
|
-
|
|
c->x86 = 4;
|
|
if (c->cpuid_level >= 0x00000001) {
|
|
u32 junk, tfms, cap0, misc;
|
|
@@ -281,6 +265,26 @@ static void __init early_cpu_detect(void
|
|
}
|
|
}
|
|
|
|
+/* Do minimum CPU detection early.
|
|
+ Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment.
|
|
+ The others are not touched to avoid unwanted side effects.
|
|
+
|
|
+ WARNING: this function is only called on the BP. Don't add code here
|
|
+ that is supposed to run on all CPUs. */
|
|
+static void __init early_cpu_detect(void)
|
|
+{
|
|
+ struct cpuinfo_x86 *c = &boot_cpu_data;
|
|
+
|
|
+ c->x86_cache_alignment = 32;
|
|
+
|
|
+ if (!have_cpuid_p())
|
|
+ return;
|
|
+
|
|
+ cpu_detect(c);
|
|
+
|
|
+ get_cpu_vendor(c, 1);
|
|
+}
|
|
+
|
|
static void __cpuinit generic_identify(struct cpuinfo_x86 * c)
|
|
{
|
|
u32 tfms, xlvl;
|
|
@@ -317,6 +321,8 @@ static void __cpuinit generic_identify(s
|
|
c->apicid = (ebx >> 24) & 0xFF;
|
|
#endif
|
|
#endif
|
|
+ if (c->x86_capability[0] & (1<<19))
|
|
+ c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8;
|
|
} else {
|
|
/* Have CPUID level 0 only - unheard of */
|
|
c->x86 = 4;
|
|
@@ -381,6 +387,7 @@ void __cpuinit identify_cpu(struct cpuin
|
|
c->x86_vendor_id[0] = '\0'; /* Unset */
|
|
c->x86_model_id[0] = '\0'; /* Unset */
|
|
c->x86_max_cores = 1;
|
|
+ c->x86_clflush_size = 32;
|
|
memset(&c->x86_capability, 0, sizeof c->x86_capability);
|
|
|
|
if (!have_cpuid_p()) {
|
|
@@ -601,61 +608,23 @@ void __init early_cpu_init(void)
|
|
#endif
|
|
}
|
|
|
|
-static void __cpuinit cpu_gdt_init(const struct Xgt_desc_struct *gdt_descr)
|
|
+/* Make sure %gs is initialized properly in idle threads */
|
|
+struct pt_regs * __devinit idle_regs(struct pt_regs *regs)
|
|
{
|
|
- unsigned long frames[16];
|
|
- unsigned long va;
|
|
- int f;
|
|
-
|
|
- for (va = gdt_descr->address, f = 0;
|
|
- va < gdt_descr->address + gdt_descr->size;
|
|
- va += PAGE_SIZE, f++) {
|
|
- frames[f] = virt_to_mfn(va);
|
|
- make_lowmem_page_readonly(
|
|
- (void *)va, XENFEAT_writable_descriptor_tables);
|
|
- }
|
|
- if (HYPERVISOR_set_gdt(frames, (gdt_descr->size + 1) / 8))
|
|
- BUG();
|
|
+ memset(regs, 0, sizeof(struct pt_regs));
|
|
+ regs->xgs = __KERNEL_PDA;
|
|
+ return regs;
|
|
}
|
|
|
|
-/*
|
|
- * cpu_init() initializes state that is per-CPU. Some data is already
|
|
- * initialized (naturally) in the bootstrap process, such as the GDT
|
|
- * and IDT. We reload them nevertheless, this function acts as a
|
|
- * 'CPU state barrier', nothing should get across.
|
|
- */
|
|
-void __cpuinit cpu_init(void)
|
|
+static __cpuinit int alloc_gdt(int cpu)
|
|
{
|
|
- int cpu = smp_processor_id();
|
|
-#ifndef CONFIG_X86_NO_TSS
|
|
- struct tss_struct * t = &per_cpu(init_tss, cpu);
|
|
-#endif
|
|
- struct thread_struct *thread = ¤t->thread;
|
|
- struct desc_struct *gdt;
|
|
struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
|
|
+ struct desc_struct *gdt;
|
|
+ struct i386_pda *pda;
|
|
|
|
- if (cpu_test_and_set(cpu, cpu_initialized)) {
|
|
- printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
|
|
- for (;;) local_irq_enable();
|
|
- }
|
|
- printk(KERN_INFO "Initializing CPU#%d\n", cpu);
|
|
-
|
|
- if (cpu_has_vme || cpu_has_de)
|
|
- clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
|
|
- if (tsc_disable && cpu_has_tsc) {
|
|
- printk(KERN_NOTICE "Disabling TSC...\n");
|
|
- /**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/
|
|
- clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability);
|
|
- set_in_cr4(X86_CR4_TSD);
|
|
- }
|
|
+ gdt = (struct desc_struct *)cpu_gdt_descr->address;
|
|
+ pda = cpu_pda(cpu);
|
|
|
|
-#ifndef CONFIG_XEN
|
|
- /* The CPU hotplug case */
|
|
- if (cpu_gdt_descr->address) {
|
|
- gdt = (struct desc_struct *)cpu_gdt_descr->address;
|
|
- memset(gdt, 0, PAGE_SIZE);
|
|
- goto old_gdt;
|
|
- }
|
|
/*
|
|
* This is a horrible hack to allocate the GDT. The problem
|
|
* is that cpu_init() is called really early for the boot CPU
|
|
@@ -663,54 +632,141 @@ void __cpuinit cpu_init(void)
|
|
* CPUs, when bootmem will have gone away
|
|
*/
|
|
if (NODE_DATA(0)->bdata->node_bootmem_map) {
|
|
- gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE);
|
|
- /* alloc_bootmem_pages panics on failure, so no check */
|
|
+ BUG_ON(gdt != NULL || pda != NULL);
|
|
+
|
|
+ gdt = alloc_bootmem_pages(PAGE_SIZE);
|
|
+ pda = alloc_bootmem(sizeof(*pda));
|
|
+ /* alloc_bootmem(_pages) panics on failure, so no check */
|
|
+
|
|
memset(gdt, 0, PAGE_SIZE);
|
|
+ memset(pda, 0, sizeof(*pda));
|
|
} else {
|
|
- gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL);
|
|
- if (unlikely(!gdt)) {
|
|
- printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu);
|
|
- for (;;)
|
|
- local_irq_enable();
|
|
+ /* GDT and PDA might already have been allocated if
|
|
+ this is a CPU hotplug re-insertion. */
|
|
+ if (gdt == NULL)
|
|
+ gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL);
|
|
+
|
|
+ if (pda == NULL)
|
|
+ pda = kmalloc_node(sizeof(*pda), GFP_KERNEL, cpu_to_node(cpu));
|
|
+
|
|
+ if (unlikely(!gdt || !pda)) {
|
|
+ free_pages((unsigned long)gdt, 0);
|
|
+ kfree(pda);
|
|
+ return 0;
|
|
}
|
|
}
|
|
-old_gdt:
|
|
+
|
|
+ cpu_gdt_descr->address = (unsigned long)gdt;
|
|
+ cpu_pda(cpu) = pda;
|
|
+
|
|
+ return 1;
|
|
+}
|
|
+
|
|
+/* Initial PDA used by boot CPU */
|
|
+struct i386_pda boot_pda = {
|
|
+ ._pda = &boot_pda,
|
|
+ .cpu_number = 0,
|
|
+ .pcurrent = &init_task,
|
|
+};
|
|
+
|
|
+static inline void set_kernel_gs(void)
|
|
+{
|
|
+ /* Set %gs for this CPU's PDA. Memory clobber is to create a
|
|
+ barrier with respect to any PDA operations, so the compiler
|
|
+ doesn't move any before here. */
|
|
+ asm volatile ("mov %0, %%gs" : : "r" (__KERNEL_PDA) : "memory");
|
|
+}
|
|
+
|
|
+/* Initialize the CPU's GDT and PDA. The boot CPU does this for
|
|
+ itself, but secondaries find this done for them. */
|
|
+__cpuinit int init_gdt(int cpu, struct task_struct *idle)
|
|
+{
|
|
+ struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
|
|
+ struct desc_struct *gdt;
|
|
+ struct i386_pda *pda;
|
|
+
|
|
+ /* For non-boot CPUs, the GDT and PDA should already have been
|
|
+ allocated. */
|
|
+ if (!alloc_gdt(cpu)) {
|
|
+ printk(KERN_CRIT "CPU%d failed to allocate GDT or PDA\n", cpu);
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ gdt = (struct desc_struct *)cpu_gdt_descr->address;
|
|
+ pda = cpu_pda(cpu);
|
|
+
|
|
+ BUG_ON(gdt == NULL || pda == NULL);
|
|
+
|
|
/*
|
|
* Initialize the per-CPU GDT with the boot GDT,
|
|
* and set up the GDT descriptor:
|
|
*/
|
|
memcpy(gdt, cpu_gdt_table, GDT_SIZE);
|
|
+ cpu_gdt_descr->size = GDT_SIZE - 1;
|
|
|
|
- /* Set up GDT entry for 16bit stack */
|
|
- *(__u64 *)(&gdt[GDT_ENTRY_ESPFIX_SS]) |=
|
|
- ((((__u64)stk16_off) << 16) & 0x000000ffffff0000ULL) |
|
|
- ((((__u64)stk16_off) << 32) & 0xff00000000000000ULL) |
|
|
- (CPU_16BIT_STACK_SIZE - 1);
|
|
+ pack_descriptor((u32 *)&gdt[GDT_ENTRY_PDA].a,
|
|
+ (u32 *)&gdt[GDT_ENTRY_PDA].b,
|
|
+ (unsigned long)pda, sizeof(*pda) - 1,
|
|
+ 0x80 | DESCTYPE_S | 0x2, 0); /* present read-write data segment */
|
|
+
|
|
+ memset(pda, 0, sizeof(*pda));
|
|
+ pda->_pda = pda;
|
|
+ pda->cpu_number = cpu;
|
|
+ pda->pcurrent = idle;
|
|
|
|
- cpu_gdt_descr->size = GDT_SIZE - 1;
|
|
- cpu_gdt_descr->address = (unsigned long)gdt;
|
|
-#else
|
|
- if (cpu == 0 && cpu_gdt_descr->address == 0) {
|
|
- gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE);
|
|
- /* alloc_bootmem_pages panics on failure, so no check */
|
|
- memset(gdt, 0, PAGE_SIZE);
|
|
+ return 1;
|
|
+}
|
|
|
|
- memcpy(gdt, cpu_gdt_table, GDT_SIZE);
|
|
-
|
|
- cpu_gdt_descr->size = GDT_SIZE;
|
|
- cpu_gdt_descr->address = (unsigned long)gdt;
|
|
+void __cpuinit cpu_set_gdt(int cpu)
|
|
+{
|
|
+ struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
|
|
+ unsigned long va, frames[16];
|
|
+ int f;
|
|
+
|
|
+ for (va = cpu_gdt_descr->address, f = 0;
|
|
+ va < cpu_gdt_descr->address + cpu_gdt_descr->size;
|
|
+ va += PAGE_SIZE, f++) {
|
|
+ frames[f] = virt_to_mfn(va);
|
|
+ make_lowmem_page_readonly(
|
|
+ (void *)va, XENFEAT_writable_descriptor_tables);
|
|
}
|
|
+ BUG_ON(HYPERVISOR_set_gdt(frames, (cpu_gdt_descr->size + 1) / 8));
|
|
+
|
|
+ set_kernel_gs();
|
|
+}
|
|
+
|
|
+/* Common CPU init for both boot and secondary CPUs */
|
|
+static void __cpuinit _cpu_init(int cpu, struct task_struct *curr)
|
|
+{
|
|
+#ifndef CONFIG_X86_NO_TSS
|
|
+ struct tss_struct * t = &per_cpu(init_tss, cpu);
|
|
#endif
|
|
+ struct thread_struct *thread = &curr->thread;
|
|
+
|
|
+ if (cpu_test_and_set(cpu, cpu_initialized)) {
|
|
+ printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
|
|
+ for (;;) local_irq_enable();
|
|
+ }
|
|
|
|
- cpu_gdt_init(cpu_gdt_descr);
|
|
+ printk(KERN_INFO "Initializing CPU#%d\n", cpu);
|
|
+
|
|
+ if (cpu_has_vme || cpu_has_de)
|
|
+ clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
|
|
+ if (tsc_disable && cpu_has_tsc) {
|
|
+ printk(KERN_NOTICE "Disabling TSC...\n");
|
|
+ /**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/
|
|
+ clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability);
|
|
+ set_in_cr4(X86_CR4_TSD);
|
|
+ }
|
|
|
|
/*
|
|
* Set up and load the per-CPU TSS and LDT
|
|
*/
|
|
atomic_inc(&init_mm.mm_count);
|
|
- current->active_mm = &init_mm;
|
|
- BUG_ON(current->mm);
|
|
- enter_lazy_tlb(&init_mm, current);
|
|
+ curr->active_mm = &init_mm;
|
|
+ if (curr->mm)
|
|
+ BUG();
|
|
+ enter_lazy_tlb(&init_mm, curr);
|
|
|
|
load_esp0(t, thread);
|
|
|
|
@@ -721,8 +777,8 @@ old_gdt:
|
|
__set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
|
|
#endif
|
|
|
|
- /* Clear %fs and %gs. */
|
|
- asm volatile ("movl %0, %%fs; movl %0, %%gs" : : "r" (0));
|
|
+ /* Clear %fs. */
|
|
+ asm volatile ("mov %0, %%fs" : : "r" (0));
|
|
|
|
/* Clear all 6 debug registers: */
|
|
set_debugreg(0, 0);
|
|
@@ -740,6 +796,38 @@ old_gdt:
|
|
mxcsr_feature_mask_init();
|
|
}
|
|
|
|
+/* Entrypoint to initialize secondary CPU */
|
|
+void __cpuinit secondary_cpu_init(void)
|
|
+{
|
|
+ int cpu = smp_processor_id();
|
|
+ struct task_struct *curr = current;
|
|
+
|
|
+ _cpu_init(cpu, curr);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * cpu_init() initializes state that is per-CPU. Some data is already
|
|
+ * initialized (naturally) in the bootstrap process, such as the GDT
|
|
+ * and IDT. We reload them nevertheless, this function acts as a
|
|
+ * 'CPU state barrier', nothing should get across.
|
|
+ */
|
|
+void __cpuinit cpu_init(void)
|
|
+{
|
|
+ int cpu = smp_processor_id();
|
|
+ struct task_struct *curr = current;
|
|
+
|
|
+ /* Set up the real GDT and PDA, so we can transition from the
|
|
+ boot versions. */
|
|
+ if (!init_gdt(cpu, curr)) {
|
|
+ /* failed to allocate something; not much we can do... */
|
|
+ for (;;)
|
|
+ local_irq_enable();
|
|
+ }
|
|
+
|
|
+ cpu_set_gdt(cpu);
|
|
+ _cpu_init(cpu, curr);
|
|
+}
|
|
+
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
|
void __cpuinit cpu_uninit(void)
|
|
{
|
|
--- head-2010-05-25.orig/arch/x86/kernel/cpu/mtrr/main-xen.c 2008-01-28 12:24:18.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/cpu/mtrr/main-xen.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -12,7 +12,7 @@
|
|
static DEFINE_MUTEX(mtrr_mutex);
|
|
|
|
void generic_get_mtrr(unsigned int reg, unsigned long *base,
|
|
- unsigned int *size, mtrr_type * type)
|
|
+ unsigned long *size, mtrr_type * type)
|
|
{
|
|
struct xen_platform_op op;
|
|
|
|
@@ -115,8 +115,7 @@ int mtrr_del_page(int reg, unsigned long
|
|
{
|
|
unsigned i;
|
|
mtrr_type ltype;
|
|
- unsigned long lbase;
|
|
- unsigned int lsize;
|
|
+ unsigned long lbase, lsize;
|
|
int error = -EINVAL;
|
|
struct xen_platform_op op;
|
|
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ head-2010-05-25/arch/x86/kernel/e820_32-xen.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -0,0 +1,1002 @@
|
|
+#include <linux/kernel.h>
|
|
+#include <linux/types.h>
|
|
+#include <linux/init.h>
|
|
+#include <linux/bootmem.h>
|
|
+#include <linux/ioport.h>
|
|
+#include <linux/string.h>
|
|
+#include <linux/kexec.h>
|
|
+#include <linux/module.h>
|
|
+#include <linux/mm.h>
|
|
+#include <linux/efi.h>
|
|
+#include <linux/pfn.h>
|
|
+#include <linux/uaccess.h>
|
|
+
|
|
+#include <asm/pgtable.h>
|
|
+#include <asm/page.h>
|
|
+#include <asm/e820.h>
|
|
+#include <xen/interface/memory.h>
|
|
+
|
|
+#ifdef CONFIG_EFI
|
|
+int efi_enabled = 0;
|
|
+EXPORT_SYMBOL(efi_enabled);
|
|
+#endif
|
|
+
|
|
+struct e820map e820;
|
|
+struct change_member {
|
|
+ struct e820entry *pbios; /* pointer to original bios entry */
|
|
+ unsigned long long addr; /* address for this change point */
|
|
+};
|
|
+static struct change_member change_point_list[2*E820MAX] __initdata;
|
|
+static struct change_member *change_point[2*E820MAX] __initdata;
|
|
+static struct e820entry *overlap_list[E820MAX] __initdata;
|
|
+static struct e820entry new_bios[E820MAX] __initdata;
|
|
+/* For PCI or other memory-mapped resources */
|
|
+unsigned long pci_mem_start = 0x10000000;
|
|
+#ifdef CONFIG_PCI
|
|
+EXPORT_SYMBOL(pci_mem_start);
|
|
+#endif
|
|
+extern int user_defined_memmap;
|
|
+struct resource data_resource = {
|
|
+ .name = "Kernel data",
|
|
+ .start = 0,
|
|
+ .end = 0,
|
|
+ .flags = IORESOURCE_BUSY | IORESOURCE_MEM
|
|
+};
|
|
+
|
|
+struct resource code_resource = {
|
|
+ .name = "Kernel code",
|
|
+ .start = 0,
|
|
+ .end = 0,
|
|
+ .flags = IORESOURCE_BUSY | IORESOURCE_MEM
|
|
+};
|
|
+
|
|
+static struct resource system_rom_resource = {
|
|
+ .name = "System ROM",
|
|
+ .start = 0xf0000,
|
|
+ .end = 0xfffff,
|
|
+ .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
|
|
+};
|
|
+
|
|
+static struct resource extension_rom_resource = {
|
|
+ .name = "Extension ROM",
|
|
+ .start = 0xe0000,
|
|
+ .end = 0xeffff,
|
|
+ .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
|
|
+};
|
|
+
|
|
+static struct resource adapter_rom_resources[] = { {
|
|
+ .name = "Adapter ROM",
|
|
+ .start = 0xc8000,
|
|
+ .end = 0,
|
|
+ .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
|
|
+}, {
|
|
+ .name = "Adapter ROM",
|
|
+ .start = 0,
|
|
+ .end = 0,
|
|
+ .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
|
|
+}, {
|
|
+ .name = "Adapter ROM",
|
|
+ .start = 0,
|
|
+ .end = 0,
|
|
+ .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
|
|
+}, {
|
|
+ .name = "Adapter ROM",
|
|
+ .start = 0,
|
|
+ .end = 0,
|
|
+ .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
|
|
+}, {
|
|
+ .name = "Adapter ROM",
|
|
+ .start = 0,
|
|
+ .end = 0,
|
|
+ .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
|
|
+}, {
|
|
+ .name = "Adapter ROM",
|
|
+ .start = 0,
|
|
+ .end = 0,
|
|
+ .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
|
|
+} };
|
|
+
|
|
+static struct resource video_rom_resource = {
|
|
+ .name = "Video ROM",
|
|
+ .start = 0xc0000,
|
|
+ .end = 0xc7fff,
|
|
+ .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
|
|
+};
|
|
+
|
|
+static struct resource video_ram_resource = {
|
|
+ .name = "Video RAM area",
|
|
+ .start = 0xa0000,
|
|
+ .end = 0xbffff,
|
|
+ .flags = IORESOURCE_BUSY | IORESOURCE_MEM
|
|
+};
|
|
+
|
|
+static struct resource standard_io_resources[] = { {
|
|
+ .name = "dma1",
|
|
+ .start = 0x0000,
|
|
+ .end = 0x001f,
|
|
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO
|
|
+}, {
|
|
+ .name = "pic1",
|
|
+ .start = 0x0020,
|
|
+ .end = 0x0021,
|
|
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO
|
|
+}, {
|
|
+ .name = "timer0",
|
|
+ .start = 0x0040,
|
|
+ .end = 0x0043,
|
|
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO
|
|
+}, {
|
|
+ .name = "timer1",
|
|
+ .start = 0x0050,
|
|
+ .end = 0x0053,
|
|
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO
|
|
+}, {
|
|
+ .name = "keyboard",
|
|
+ .start = 0x0060,
|
|
+ .end = 0x006f,
|
|
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO
|
|
+}, {
|
|
+ .name = "dma page reg",
|
|
+ .start = 0x0080,
|
|
+ .end = 0x008f,
|
|
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO
|
|
+}, {
|
|
+ .name = "pic2",
|
|
+ .start = 0x00a0,
|
|
+ .end = 0x00a1,
|
|
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO
|
|
+}, {
|
|
+ .name = "dma2",
|
|
+ .start = 0x00c0,
|
|
+ .end = 0x00df,
|
|
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO
|
|
+}, {
|
|
+ .name = "fpu",
|
|
+ .start = 0x00f0,
|
|
+ .end = 0x00ff,
|
|
+ .flags = IORESOURCE_BUSY | IORESOURCE_IO
|
|
+} };
|
|
+
|
|
+static int romsignature(const unsigned char *x)
|
|
+{
|
|
+ unsigned short sig;
|
|
+ int ret = 0;
|
|
+ if (probe_kernel_address((const unsigned short *)x, sig) == 0)
|
|
+ ret = (sig == 0xaa55);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static int __init romchecksum(unsigned char *rom, unsigned long length)
|
|
+{
|
|
+ unsigned char *p, sum = 0;
|
|
+
|
|
+ for (p = rom; p < rom + length; p++)
|
|
+ sum += *p;
|
|
+ return sum == 0;
|
|
+}
|
|
+
|
|
+static void __init probe_roms(void)
|
|
+{
|
|
+ unsigned long start, length, upper;
|
|
+ unsigned char *rom;
|
|
+ int i;
|
|
+
|
|
+#ifdef CONFIG_XEN
|
|
+ /* Nothing to do if not running in dom0. */
|
|
+ if (!is_initial_xendomain())
|
|
+ return;
|
|
+#endif
|
|
+
|
|
+ /* video rom */
|
|
+ upper = adapter_rom_resources[0].start;
|
|
+ for (start = video_rom_resource.start; start < upper; start += 2048) {
|
|
+ rom = isa_bus_to_virt(start);
|
|
+ if (!romsignature(rom))
|
|
+ continue;
|
|
+
|
|
+ video_rom_resource.start = start;
|
|
+
|
|
+ /* 0 < length <= 0x7f * 512, historically */
|
|
+ length = rom[2] * 512;
|
|
+
|
|
+ /* if checksum okay, trust length byte */
|
|
+ if (length && romchecksum(rom, length))
|
|
+ video_rom_resource.end = start + length - 1;
|
|
+
|
|
+ request_resource(&iomem_resource, &video_rom_resource);
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
|
|
+ if (start < upper)
|
|
+ start = upper;
|
|
+
|
|
+ /* system rom */
|
|
+ request_resource(&iomem_resource, &system_rom_resource);
|
|
+ upper = system_rom_resource.start;
|
|
+
|
|
+ /* check for extension rom (ignore length byte!) */
|
|
+ rom = isa_bus_to_virt((unsigned long)extension_rom_resource.start);
|
|
+ if (romsignature(rom)) {
|
|
+ length = extension_rom_resource.end - extension_rom_resource.start + 1;
|
|
+ if (romchecksum(rom, length)) {
|
|
+ request_resource(&iomem_resource, &extension_rom_resource);
|
|
+ upper = extension_rom_resource.start;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* check for adapter roms on 2k boundaries */
|
|
+ for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) {
|
|
+ rom = isa_bus_to_virt(start);
|
|
+ if (!romsignature(rom))
|
|
+ continue;
|
|
+
|
|
+ /* 0 < length <= 0x7f * 512, historically */
|
|
+ length = rom[2] * 512;
|
|
+
|
|
+ /* but accept any length that fits if checksum okay */
|
|
+ if (!length || start + length > upper || !romchecksum(rom, length))
|
|
+ continue;
|
|
+
|
|
+ adapter_rom_resources[i].start = start;
|
|
+ adapter_rom_resources[i].end = start + length - 1;
|
|
+ request_resource(&iomem_resource, &adapter_rom_resources[i]);
|
|
+
|
|
+ start = adapter_rom_resources[i++].end & ~2047UL;
|
|
+ }
|
|
+}
|
|
+
|
|
+#ifdef CONFIG_XEN
|
|
+static struct e820map machine_e820 __initdata;
|
|
+#define e820 machine_e820
|
|
+#endif
|
|
+
|
|
+/*
|
|
+ * Request address space for all standard RAM and ROM resources
|
|
+ * and also for regions reported as reserved by the e820.
|
|
+ */
|
|
+static void __init
|
|
+legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource)
|
|
+{
|
|
+ int i;
|
|
+
|
|
+ probe_roms();
|
|
+ for (i = 0; i < e820.nr_map; i++) {
|
|
+ struct resource *res;
|
|
+#ifndef CONFIG_RESOURCES_64BIT
|
|
+ if (e820.map[i].addr + e820.map[i].size > 0x100000000ULL)
|
|
+ continue;
|
|
+#endif
|
|
+ res = kzalloc(sizeof(struct resource), GFP_ATOMIC);
|
|
+ switch (e820.map[i].type) {
|
|
+ case E820_RAM: res->name = "System RAM"; break;
|
|
+ case E820_ACPI: res->name = "ACPI Tables"; break;
|
|
+ case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
|
|
+ default: res->name = "reserved";
|
|
+ }
|
|
+ res->start = e820.map[i].addr;
|
|
+ res->end = res->start + e820.map[i].size - 1;
|
|
+ res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
|
|
+ if (request_resource(&iomem_resource, res)) {
|
|
+ kfree(res);
|
|
+ continue;
|
|
+ }
|
|
+ if (e820.map[i].type == E820_RAM) {
|
|
+ /*
|
|
+ * We don't know which RAM region contains kernel data,
|
|
+ * so we try it repeatedly and let the resource manager
|
|
+ * test it.
|
|
+ */
|
|
+#ifndef CONFIG_XEN
|
|
+ request_resource(res, code_resource);
|
|
+ request_resource(res, data_resource);
|
|
+#endif
|
|
+#ifdef CONFIG_KEXEC
|
|
+ request_resource(res, &crashk_res);
|
|
+#ifdef CONFIG_XEN
|
|
+ xen_machine_kexec_register_resources(res);
|
|
+#endif
|
|
+#endif
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+#undef e820
|
|
+
|
|
+/*
|
|
+ * Request address space for all standard resources
|
|
+ *
|
|
+ * This is called just before pcibios_init(), which is also a
|
|
+ * subsys_initcall, but is linked in later (in arch/i386/pci/common.c).
|
|
+ */
|
|
+static int __init request_standard_resources(void)
|
|
+{
|
|
+ int i;
|
|
+
|
|
+ /* Nothing to do if not running in dom0. */
|
|
+ if (!is_initial_xendomain())
|
|
+ return 0;
|
|
+
|
|
+ printk("Setting up standard PCI resources\n");
|
|
+ if (efi_enabled)
|
|
+ efi_initialize_iomem_resources(&code_resource, &data_resource);
|
|
+ else
|
|
+ legacy_init_iomem_resources(&code_resource, &data_resource);
|
|
+
|
|
+ /* EFI systems may still have VGA */
|
|
+ request_resource(&iomem_resource, &video_ram_resource);
|
|
+
|
|
+ /* request I/O space for devices used on all i[345]86 PCs */
|
|
+ for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
|
|
+ request_resource(&ioport_resource, &standard_io_resources[i]);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+subsys_initcall(request_standard_resources);
|
|
+
|
|
+void __init add_memory_region(unsigned long long start,
|
|
+ unsigned long long size, int type)
|
|
+{
|
|
+ int x;
|
|
+
|
|
+ if (!efi_enabled) {
|
|
+ x = e820.nr_map;
|
|
+
|
|
+ if (x == E820MAX) {
|
|
+ printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ e820.map[x].addr = start;
|
|
+ e820.map[x].size = size;
|
|
+ e820.map[x].type = type;
|
|
+ e820.nr_map++;
|
|
+ }
|
|
+} /* add_memory_region */
|
|
+
|
|
+/*
|
|
+ * Sanitize the BIOS e820 map.
|
|
+ *
|
|
+ * Some e820 responses include overlapping entries. The following
|
|
+ * replaces the original e820 map with a new one, removing overlaps.
|
|
+ *
|
|
+ */
|
|
+int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
|
|
+{
|
|
+ struct change_member *change_tmp;
|
|
+ unsigned long current_type, last_type;
|
|
+ unsigned long long last_addr;
|
|
+ int chgidx, still_changing;
|
|
+ int overlap_entries;
|
|
+ int new_bios_entry;
|
|
+ int old_nr, new_nr, chg_nr;
|
|
+ int i;
|
|
+
|
|
+ /*
|
|
+ Visually we're performing the following (1,2,3,4 = memory types)...
|
|
+
|
|
+ Sample memory map (w/overlaps):
|
|
+ ____22__________________
|
|
+ ______________________4_
|
|
+ ____1111________________
|
|
+ _44_____________________
|
|
+ 11111111________________
|
|
+ ____________________33__
|
|
+ ___________44___________
|
|
+ __________33333_________
|
|
+ ______________22________
|
|
+ ___________________2222_
|
|
+ _________111111111______
|
|
+ _____________________11_
|
|
+ _________________4______
|
|
+
|
|
+ Sanitized equivalent (no overlap):
|
|
+ 1_______________________
|
|
+ _44_____________________
|
|
+ ___1____________________
|
|
+ ____22__________________
|
|
+ ______11________________
|
|
+ _________1______________
|
|
+ __________3_____________
|
|
+ ___________44___________
|
|
+ _____________33_________
|
|
+ _______________2________
|
|
+ ________________1_______
|
|
+ _________________4______
|
|
+ ___________________2____
|
|
+ ____________________33__
|
|
+ ______________________4_
|
|
+ */
|
|
+ printk("sanitize start\n");
|
|
+ /* if there's only one memory region, don't bother */
|
|
+ if (*pnr_map < 2) {
|
|
+ printk("sanitize bail 0\n");
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ old_nr = *pnr_map;
|
|
+
|
|
+ /* bail out if we find any unreasonable addresses in bios map */
|
|
+ for (i=0; i<old_nr; i++)
|
|
+ if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) {
|
|
+ printk("sanitize bail 1\n");
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ /* create pointers for initial change-point information (for sorting) */
|
|
+ for (i=0; i < 2*old_nr; i++)
|
|
+ change_point[i] = &change_point_list[i];
|
|
+
|
|
+ /* record all known change-points (starting and ending addresses),
|
|
+ omitting those that are for empty memory regions */
|
|
+ chgidx = 0;
|
|
+ for (i=0; i < old_nr; i++) {
|
|
+ if (biosmap[i].size != 0) {
|
|
+ change_point[chgidx]->addr = biosmap[i].addr;
|
|
+ change_point[chgidx++]->pbios = &biosmap[i];
|
|
+ change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
|
|
+ change_point[chgidx++]->pbios = &biosmap[i];
|
|
+ }
|
|
+ }
|
|
+ chg_nr = chgidx; /* true number of change-points */
|
|
+
|
|
+ /* sort change-point list by memory addresses (low -> high) */
|
|
+ still_changing = 1;
|
|
+ while (still_changing) {
|
|
+ still_changing = 0;
|
|
+ for (i=1; i < chg_nr; i++) {
|
|
+ /* if <current_addr> > <last_addr>, swap */
|
|
+ /* or, if current=<start_addr> & last=<end_addr>, swap */
|
|
+ if ((change_point[i]->addr < change_point[i-1]->addr) ||
|
|
+ ((change_point[i]->addr == change_point[i-1]->addr) &&
|
|
+ (change_point[i]->addr == change_point[i]->pbios->addr) &&
|
|
+ (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
|
|
+ )
|
|
+ {
|
|
+ change_tmp = change_point[i];
|
|
+ change_point[i] = change_point[i-1];
|
|
+ change_point[i-1] = change_tmp;
|
|
+ still_changing=1;
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* create a new bios memory map, removing overlaps */
|
|
+ overlap_entries=0; /* number of entries in the overlap table */
|
|
+ new_bios_entry=0; /* index for creating new bios map entries */
|
|
+ last_type = 0; /* start with undefined memory type */
|
|
+ last_addr = 0; /* start with 0 as last starting address */
|
|
+ /* loop through change-points, determining affect on the new bios map */
|
|
+ for (chgidx=0; chgidx < chg_nr; chgidx++)
|
|
+ {
|
|
+ /* keep track of all overlapping bios entries */
|
|
+ if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
|
|
+ {
|
|
+ /* add map entry to overlap list (> 1 entry implies an overlap) */
|
|
+ overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
|
|
+ }
|
|
+ else
|
|
+ {
|
|
+ /* remove entry from list (order independent, so swap with last) */
|
|
+ for (i=0; i<overlap_entries; i++)
|
|
+ {
|
|
+ if (overlap_list[i] == change_point[chgidx]->pbios)
|
|
+ overlap_list[i] = overlap_list[overlap_entries-1];
|
|
+ }
|
|
+ overlap_entries--;
|
|
+ }
|
|
+ /* if there are overlapping entries, decide which "type" to use */
|
|
+ /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
|
|
+ current_type = 0;
|
|
+ for (i=0; i<overlap_entries; i++)
|
|
+ if (overlap_list[i]->type > current_type)
|
|
+ current_type = overlap_list[i]->type;
|
|
+ /* continue building up new bios map based on this information */
|
|
+ if (current_type != last_type) {
|
|
+ if (last_type != 0) {
|
|
+ new_bios[new_bios_entry].size =
|
|
+ change_point[chgidx]->addr - last_addr;
|
|
+ /* move forward only if the new size was non-zero */
|
|
+ if (new_bios[new_bios_entry].size != 0)
|
|
+ if (++new_bios_entry >= E820MAX)
|
|
+ break; /* no more space left for new bios entries */
|
|
+ }
|
|
+ if (current_type != 0) {
|
|
+ new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
|
|
+ new_bios[new_bios_entry].type = current_type;
|
|
+ last_addr=change_point[chgidx]->addr;
|
|
+ }
|
|
+ last_type = current_type;
|
|
+ }
|
|
+ }
|
|
+ new_nr = new_bios_entry; /* retain count for new bios entries */
|
|
+
|
|
+ /* copy new bios mapping into original location */
|
|
+ memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
|
|
+ *pnr_map = new_nr;
|
|
+
|
|
+ printk("sanitize end\n");
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Copy the BIOS e820 map into a safe place.
|
|
+ *
|
|
+ * Sanity-check it while we're at it..
|
|
+ *
|
|
+ * If we're lucky and live on a modern system, the setup code
|
|
+ * will have given us a memory map that we can use to properly
|
|
+ * set up memory. If we aren't, we'll fake a memory map.
|
|
+ *
|
|
+ * We check to see that the memory map contains at least 2 elements
|
|
+ * before we'll use it, because the detection code in setup.S may
|
|
+ * not be perfect and most every PC known to man has two memory
|
|
+ * regions: one from 0 to 640k, and one from 1mb up. (The IBM
|
|
+ * thinkpad 560x, for example, does not cooperate with the memory
|
|
+ * detection code.)
|
|
+ */
|
|
+int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
|
|
+{
|
|
+#ifndef CONFIG_XEN
|
|
+ /* Only one memory region (or negative)? Ignore it */
|
|
+ if (nr_map < 2)
|
|
+ return -1;
|
|
+#else
|
|
+ BUG_ON(nr_map < 1);
|
|
+#endif
|
|
+
|
|
+ do {
|
|
+ unsigned long long start = biosmap->addr;
|
|
+ unsigned long long size = biosmap->size;
|
|
+ unsigned long long end = start + size;
|
|
+ unsigned long type = biosmap->type;
|
|
+ printk("copy_e820_map() start: %016Lx size: %016Lx end: %016Lx type: %ld\n", start, size, end, type);
|
|
+
|
|
+ /* Overflow in 64 bits? Ignore the memory map. */
|
|
+ if (start > end)
|
|
+ return -1;
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+ /*
|
|
+ * Some BIOSes claim RAM in the 640k - 1M region.
|
|
+ * Not right. Fix it up.
|
|
+ */
|
|
+ if (type == E820_RAM) {
|
|
+ printk("copy_e820_map() type is E820_RAM\n");
|
|
+ if (start < 0x100000ULL && end > 0xA0000ULL) {
|
|
+ printk("copy_e820_map() lies in range...\n");
|
|
+ if (start < 0xA0000ULL) {
|
|
+ printk("copy_e820_map() start < 0xA0000ULL\n");
|
|
+ add_memory_region(start, 0xA0000ULL-start, type);
|
|
+ }
|
|
+ if (end <= 0x100000ULL) {
|
|
+ printk("copy_e820_map() end <= 0x100000ULL\n");
|
|
+ continue;
|
|
+ }
|
|
+ start = 0x100000ULL;
|
|
+ size = end - start;
|
|
+ }
|
|
+ }
|
|
+#endif
|
|
+ add_memory_region(start, size, type);
|
|
+ } while (biosmap++,--nr_map);
|
|
+
|
|
+#ifdef CONFIG_XEN
|
|
+ if (is_initial_xendomain()) {
|
|
+ struct xen_memory_map memmap;
|
|
+
|
|
+ memmap.nr_entries = E820MAX;
|
|
+ set_xen_guest_handle(memmap.buffer, machine_e820.map);
|
|
+
|
|
+ if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap))
|
|
+ BUG();
|
|
+ machine_e820.nr_map = memmap.nr_entries;
|
|
+ } else
|
|
+ machine_e820 = e820;
|
|
+#endif
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Callback for efi_memory_walk.
|
|
+ */
|
|
+static int __init
|
|
+efi_find_max_pfn(unsigned long start, unsigned long end, void *arg)
|
|
+{
|
|
+ unsigned long *max_pfn = arg, pfn;
|
|
+
|
|
+ if (start < end) {
|
|
+ pfn = PFN_UP(end -1);
|
|
+ if (pfn > *max_pfn)
|
|
+ *max_pfn = pfn;
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int __init
|
|
+efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg)
|
|
+{
|
|
+ memory_present(0, PFN_UP(start), PFN_DOWN(end));
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Find the highest page frame number we have available
|
|
+ */
|
|
+void __init find_max_pfn(void)
|
|
+{
|
|
+ int i;
|
|
+
|
|
+ max_pfn = 0;
|
|
+ if (efi_enabled) {
|
|
+ efi_memmap_walk(efi_find_max_pfn, &max_pfn);
|
|
+ efi_memmap_walk(efi_memory_present_wrapper, NULL);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ for (i = 0; i < e820.nr_map; i++) {
|
|
+ unsigned long start, end;
|
|
+ /* RAM? */
|
|
+ if (e820.map[i].type != E820_RAM)
|
|
+ continue;
|
|
+ start = PFN_UP(e820.map[i].addr);
|
|
+ end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
|
|
+ if (start >= end)
|
|
+ continue;
|
|
+ if (end > max_pfn)
|
|
+ max_pfn = end;
|
|
+ memory_present(0, start, end);
|
|
+ }
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Free all available memory for boot time allocation. Used
|
|
+ * as a callback function by efi_memory_walk()
|
|
+ */
|
|
+
|
|
+static int __init
|
|
+free_available_memory(unsigned long start, unsigned long end, void *arg)
|
|
+{
|
|
+ /* check max_low_pfn */
|
|
+ if (start >= (max_low_pfn << PAGE_SHIFT))
|
|
+ return 0;
|
|
+ if (end >= (max_low_pfn << PAGE_SHIFT))
|
|
+ end = max_low_pfn << PAGE_SHIFT;
|
|
+ if (start < end)
|
|
+ free_bootmem(start, end - start);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+/*
|
|
+ * Register fully available low RAM pages with the bootmem allocator.
|
|
+ */
|
|
+void __init register_bootmem_low_pages(unsigned long max_low_pfn)
|
|
+{
|
|
+ int i;
|
|
+
|
|
+ if (efi_enabled) {
|
|
+ efi_memmap_walk(free_available_memory, NULL);
|
|
+ return;
|
|
+ }
|
|
+ for (i = 0; i < e820.nr_map; i++) {
|
|
+ unsigned long curr_pfn, last_pfn, size;
|
|
+ /*
|
|
+ * Reserve usable low memory
|
|
+ */
|
|
+ if (e820.map[i].type != E820_RAM)
|
|
+ continue;
|
|
+ /*
|
|
+ * We are rounding up the start address of usable memory:
|
|
+ */
|
|
+ curr_pfn = PFN_UP(e820.map[i].addr);
|
|
+ if (curr_pfn >= max_low_pfn)
|
|
+ continue;
|
|
+ /*
|
|
+ * ... and at the end of the usable range downwards:
|
|
+ */
|
|
+ last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
|
|
+
|
|
+#ifdef CONFIG_XEN
|
|
+ /*
|
|
+ * Truncate to the number of actual pages currently
|
|
+ * present.
|
|
+ */
|
|
+ if (last_pfn > xen_start_info->nr_pages)
|
|
+ last_pfn = xen_start_info->nr_pages;
|
|
+#endif
|
|
+
|
|
+ if (last_pfn > max_low_pfn)
|
|
+ last_pfn = max_low_pfn;
|
|
+
|
|
+ /*
|
|
+ * .. finally, did all the rounding and playing
|
|
+ * around just make the area go away?
|
|
+ */
|
|
+ if (last_pfn <= curr_pfn)
|
|
+ continue;
|
|
+
|
|
+ size = last_pfn - curr_pfn;
|
|
+ free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
|
|
+ }
|
|
+}
|
|
+
|
|
+void __init e820_register_memory(void)
|
|
+{
|
|
+ unsigned long gapstart, gapsize, round;
|
|
+ unsigned long long last;
|
|
+ int i;
|
|
+
|
|
+#ifdef CONFIG_XEN
|
|
+#define e820 machine_e820
|
|
+#endif
|
|
+ /*
|
|
+ * Search for the bigest gap in the low 32 bits of the e820
|
|
+ * memory space.
|
|
+ */
|
|
+ last = 0x100000000ull;
|
|
+ gapstart = 0x10000000;
|
|
+ gapsize = 0x400000;
|
|
+ i = e820.nr_map;
|
|
+ while (--i >= 0) {
|
|
+ unsigned long long start = e820.map[i].addr;
|
|
+ unsigned long long end = start + e820.map[i].size;
|
|
+
|
|
+ /*
|
|
+ * Since "last" is at most 4GB, we know we'll
|
|
+ * fit in 32 bits if this condition is true
|
|
+ */
|
|
+ if (last > end) {
|
|
+ unsigned long gap = last - end;
|
|
+
|
|
+ if (gap > gapsize) {
|
|
+ gapsize = gap;
|
|
+ gapstart = end;
|
|
+ }
|
|
+ }
|
|
+ if (start < last)
|
|
+ last = start;
|
|
+ }
|
|
+#undef e820
|
|
+
|
|
+ /*
|
|
+ * See how much we want to round up: start off with
|
|
+ * rounding to the next 1MB area.
|
|
+ */
|
|
+ round = 0x100000;
|
|
+ while ((gapsize >> 4) > round)
|
|
+ round += round;
|
|
+ /* Fun with two's complement */
|
|
+ pci_mem_start = (gapstart + round) & -round;
|
|
+
|
|
+ printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
|
|
+ pci_mem_start, gapstart, gapsize);
|
|
+}
|
|
+
|
|
+void __init print_memory_map(char *who)
|
|
+{
|
|
+ int i;
|
|
+
|
|
+ for (i = 0; i < e820.nr_map; i++) {
|
|
+ printk(" %s: %016Lx - %016Lx ", who,
|
|
+ e820.map[i].addr,
|
|
+ e820.map[i].addr + e820.map[i].size);
|
|
+ switch (e820.map[i].type) {
|
|
+ case E820_RAM: printk("(usable)\n");
|
|
+ break;
|
|
+ case E820_RESERVED:
|
|
+ printk("(reserved)\n");
|
|
+ break;
|
|
+ case E820_ACPI:
|
|
+ printk("(ACPI data)\n");
|
|
+ break;
|
|
+ case E820_NVS:
|
|
+ printk("(ACPI NVS)\n");
|
|
+ break;
|
|
+ default: printk("type %lu\n", e820.map[i].type);
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+static __init __always_inline void efi_limit_regions(unsigned long long size)
|
|
+{
|
|
+ unsigned long long current_addr = 0;
|
|
+ efi_memory_desc_t *md, *next_md;
|
|
+ void *p, *p1;
|
|
+ int i, j;
|
|
+
|
|
+ j = 0;
|
|
+ p1 = memmap.map;
|
|
+ for (p = p1, i = 0; p < memmap.map_end; p += memmap.desc_size, i++) {
|
|
+ md = p;
|
|
+ next_md = p1;
|
|
+ current_addr = md->phys_addr +
|
|
+ PFN_PHYS(md->num_pages);
|
|
+ if (is_available_memory(md)) {
|
|
+ if (md->phys_addr >= size) continue;
|
|
+ memcpy(next_md, md, memmap.desc_size);
|
|
+ if (current_addr >= size) {
|
|
+ next_md->num_pages -=
|
|
+ PFN_UP(current_addr-size);
|
|
+ }
|
|
+ p1 += memmap.desc_size;
|
|
+ next_md = p1;
|
|
+ j++;
|
|
+ } else if ((md->attribute & EFI_MEMORY_RUNTIME) ==
|
|
+ EFI_MEMORY_RUNTIME) {
|
|
+ /* In order to make runtime services
|
|
+ * available we have to include runtime
|
|
+ * memory regions in memory map */
|
|
+ memcpy(next_md, md, memmap.desc_size);
|
|
+ p1 += memmap.desc_size;
|
|
+ next_md = p1;
|
|
+ j++;
|
|
+ }
|
|
+ }
|
|
+ memmap.nr_map = j;
|
|
+ memmap.map_end = memmap.map +
|
|
+ (memmap.nr_map * memmap.desc_size);
|
|
+}
|
|
+
|
|
+void __init limit_regions(unsigned long long size)
|
|
+{
|
|
+ unsigned long long current_addr = 0;
|
|
+ int i;
|
|
+
|
|
+ print_memory_map("limit_regions start");
|
|
+ if (efi_enabled) {
|
|
+ efi_limit_regions(size);
|
|
+ return;
|
|
+ }
|
|
+ for (i = 0; i < e820.nr_map; i++) {
|
|
+ current_addr = e820.map[i].addr + e820.map[i].size;
|
|
+ if (current_addr < size)
|
|
+ continue;
|
|
+
|
|
+ if (e820.map[i].type != E820_RAM)
|
|
+ continue;
|
|
+
|
|
+ if (e820.map[i].addr >= size) {
|
|
+ /*
|
|
+ * This region starts past the end of the
|
|
+ * requested size, skip it completely.
|
|
+ */
|
|
+ e820.nr_map = i;
|
|
+ } else {
|
|
+ e820.nr_map = i + 1;
|
|
+ e820.map[i].size -= current_addr - size;
|
|
+ }
|
|
+ print_memory_map("limit_regions endfor");
|
|
+ return;
|
|
+ }
|
|
+#ifdef CONFIG_XEN
|
|
+ if (current_addr < size) {
|
|
+ /*
|
|
+ * The e820 map finished before our requested size so
|
|
+ * extend the final entry to the requested address.
|
|
+ */
|
|
+ --i;
|
|
+ if (e820.map[i].type == E820_RAM)
|
|
+ e820.map[i].size -= current_addr - size;
|
|
+ else
|
|
+ add_memory_region(current_addr, size - current_addr, E820_RAM);
|
|
+ }
|
|
+#endif
|
|
+ print_memory_map("limit_regions endfunc");
|
|
+}
|
|
+
|
|
+/*
|
|
+ * This function checks if any part of the range <start,end> is mapped
|
|
+ * with type.
|
|
+ */
|
|
+int
|
|
+e820_any_mapped(u64 start, u64 end, unsigned type)
|
|
+{
|
|
+ int i;
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+ for (i = 0; i < e820.nr_map; i++) {
|
|
+ const struct e820entry *ei = &e820.map[i];
|
|
+#else
|
|
+ if (!is_initial_xendomain())
|
|
+ return 0;
|
|
+ for (i = 0; i < machine_e820.nr_map; ++i) {
|
|
+ const struct e820entry *ei = &machine_e820.map[i];
|
|
+#endif
|
|
+
|
|
+ if (type && ei->type != type)
|
|
+ continue;
|
|
+ if (ei->addr >= end || ei->addr + ei->size <= start)
|
|
+ continue;
|
|
+ return 1;
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(e820_any_mapped);
|
|
+
|
|
+ /*
|
|
+ * This function checks if the entire range <start,end> is mapped with type.
|
|
+ *
|
|
+ * Note: this function only works correct if the e820 table is sorted and
|
|
+ * not-overlapping, which is the case
|
|
+ */
|
|
+int __init
|
|
+e820_all_mapped(unsigned long s, unsigned long e, unsigned type)
|
|
+{
|
|
+ u64 start = s;
|
|
+ u64 end = e;
|
|
+ int i;
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+ for (i = 0; i < e820.nr_map; i++) {
|
|
+ struct e820entry *ei = &e820.map[i];
|
|
+#else
|
|
+ if (!is_initial_xendomain())
|
|
+ return 0;
|
|
+ for (i = 0; i < machine_e820.nr_map; ++i) {
|
|
+ const struct e820entry *ei = &machine_e820.map[i];
|
|
+#endif
|
|
+
|
|
+ if (type && ei->type != type)
|
|
+ continue;
|
|
+ /* is the region (part) in overlap with the current region ?*/
|
|
+ if (ei->addr >= end || ei->addr + ei->size <= start)
|
|
+ continue;
|
|
+ /* if the region is at the beginning of <start,end> we move
|
|
+ * start to the end of the region since it's ok until there
|
|
+ */
|
|
+ if (ei->addr <= start)
|
|
+ start = ei->addr + ei->size;
|
|
+ /* if start is now at or beyond end, we're done, full
|
|
+ * coverage */
|
|
+ if (start >= end)
|
|
+ return 1; /* we're done */
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static int __init parse_memmap(char *arg)
|
|
+{
|
|
+ if (!arg)
|
|
+ return -EINVAL;
|
|
+
|
|
+ if (strcmp(arg, "exactmap") == 0) {
|
|
+#ifdef CONFIG_CRASH_DUMP
|
|
+ /* If we are doing a crash dump, we
|
|
+ * still need to know the real mem
|
|
+ * size before original memory map is
|
|
+ * reset.
|
|
+ */
|
|
+ find_max_pfn();
|
|
+ saved_max_pfn = max_pfn;
|
|
+#endif
|
|
+ e820.nr_map = 0;
|
|
+ user_defined_memmap = 1;
|
|
+ } else {
|
|
+ /* If the user specifies memory size, we
|
|
+ * limit the BIOS-provided memory map to
|
|
+ * that size. exactmap can be used to specify
|
|
+ * the exact map. mem=number can be used to
|
|
+ * trim the existing memory map.
|
|
+ */
|
|
+ unsigned long long start_at, mem_size;
|
|
+
|
|
+ mem_size = memparse(arg, &arg);
|
|
+ if (*arg == '@') {
|
|
+ start_at = memparse(arg+1, &arg);
|
|
+ add_memory_region(start_at, mem_size, E820_RAM);
|
|
+ } else if (*arg == '#') {
|
|
+ start_at = memparse(arg+1, &arg);
|
|
+ add_memory_region(start_at, mem_size, E820_ACPI);
|
|
+ } else if (*arg == '$') {
|
|
+ start_at = memparse(arg+1, &arg);
|
|
+ add_memory_region(start_at, mem_size, E820_RESERVED);
|
|
+ } else {
|
|
+ limit_regions(mem_size);
|
|
+ user_defined_memmap = 1;
|
|
+ }
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+early_param("memmap", parse_memmap);
|
|
--- head-2010-05-25.orig/arch/x86/kernel/entry_32-xen.S 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/entry_32-xen.S 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -30,12 +30,13 @@
|
|
* 18(%esp) - %eax
|
|
* 1C(%esp) - %ds
|
|
* 20(%esp) - %es
|
|
- * 24(%esp) - orig_eax
|
|
- * 28(%esp) - %eip
|
|
- * 2C(%esp) - %cs
|
|
- * 30(%esp) - %eflags
|
|
- * 34(%esp) - %oldesp
|
|
- * 38(%esp) - %oldss
|
|
+ * 24(%esp) - %gs
|
|
+ * 28(%esp) - orig_eax
|
|
+ * 2C(%esp) - %eip
|
|
+ * 30(%esp) - %cs
|
|
+ * 34(%esp) - %eflags
|
|
+ * 38(%esp) - %oldesp
|
|
+ * 3C(%esp) - %oldss
|
|
*
|
|
* "current" is in register %ebx during any slow entries.
|
|
*/
|
|
@@ -48,27 +49,25 @@
|
|
#include <asm/smp.h>
|
|
#include <asm/page.h>
|
|
#include <asm/desc.h>
|
|
+#include <asm/percpu.h>
|
|
#include <asm/dwarf2.h>
|
|
#include "irq_vectors.h"
|
|
#include <xen/interface/xen.h>
|
|
|
|
-#define nr_syscalls ((syscall_table_size)/4)
|
|
+/*
|
|
+ * We use macros for low-level operations which need to be overridden
|
|
+ * for paravirtualization. The following will never clobber any registers:
|
|
+ * INTERRUPT_RETURN (aka. "iret")
|
|
+ * GET_CR0_INTO_EAX (aka. "movl %cr0, %eax")
|
|
+ * ENABLE_INTERRUPTS_SYSEXIT (aka "sti; sysexit").
|
|
+ *
|
|
+ * For DISABLE_INTERRUPTS/ENABLE_INTERRUPTS (aka "cli"/"sti"), you must
|
|
+ * specify what registers can be overwritten (CLBR_NONE, CLBR_EAX/EDX/ECX/ANY).
|
|
+ * Allowing a register to be clobbered can shrink the paravirt replacement
|
|
+ * enough to patch inline, increasing performance.
|
|
+ */
|
|
|
|
-EBX = 0x00
|
|
-ECX = 0x04
|
|
-EDX = 0x08
|
|
-ESI = 0x0C
|
|
-EDI = 0x10
|
|
-EBP = 0x14
|
|
-EAX = 0x18
|
|
-DS = 0x1C
|
|
-ES = 0x20
|
|
-ORIG_EAX = 0x24
|
|
-EIP = 0x28
|
|
-CS = 0x2C
|
|
-EFLAGS = 0x30
|
|
-OLDESP = 0x34
|
|
-OLDSS = 0x38
|
|
+#define nr_syscalls ((syscall_table_size)/4)
|
|
|
|
CF_MASK = 0x00000001
|
|
TF_MASK = 0x00000100
|
|
@@ -79,61 +78,16 @@ VM_MASK = 0x00020000
|
|
/* Pseudo-eflags. */
|
|
NMI_MASK = 0x80000000
|
|
|
|
-#ifndef CONFIG_XEN
|
|
-/* These are replaces for paravirtualization */
|
|
-#define DISABLE_INTERRUPTS cli
|
|
-#define ENABLE_INTERRUPTS sti
|
|
-#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit
|
|
-#define INTERRUPT_RETURN iret
|
|
-#define GET_CR0_INTO_EAX movl %cr0, %eax
|
|
-#else
|
|
-/* Offsets into shared_info_t. */
|
|
-#define evtchn_upcall_pending /* 0 */
|
|
-#define evtchn_upcall_mask 1
|
|
-
|
|
-#define sizeof_vcpu_shift 6
|
|
-
|
|
-#ifdef CONFIG_SMP
|
|
-#define GET_VCPU_INFO movl TI_cpu(%ebp),%esi ; \
|
|
- shl $sizeof_vcpu_shift,%esi ; \
|
|
- addl HYPERVISOR_shared_info,%esi
|
|
-#else
|
|
-#define GET_VCPU_INFO movl HYPERVISOR_shared_info,%esi
|
|
-#endif
|
|
-
|
|
-#define __DISABLE_INTERRUPTS movb $1,evtchn_upcall_mask(%esi)
|
|
-#define __ENABLE_INTERRUPTS movb $0,evtchn_upcall_mask(%esi)
|
|
-#define __TEST_PENDING testb $0xFF,evtchn_upcall_pending(%esi)
|
|
-#define DISABLE_INTERRUPTS GET_VCPU_INFO ; \
|
|
- __DISABLE_INTERRUPTS
|
|
-#define ENABLE_INTERRUPTS GET_VCPU_INFO ; \
|
|
- __ENABLE_INTERRUPTS
|
|
-#define ENABLE_INTERRUPTS_SYSEXIT __ENABLE_INTERRUPTS ; \
|
|
-sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/ ; \
|
|
- __TEST_PENDING ; \
|
|
- jnz 14f # process more events if necessary... ; \
|
|
- movl ESI(%esp), %esi ; \
|
|
- sysexit ; \
|
|
-14: __DISABLE_INTERRUPTS ; \
|
|
- TRACE_IRQS_OFF ; \
|
|
-sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/ ; \
|
|
- push %esp ; \
|
|
- call evtchn_do_upcall ; \
|
|
- add $4,%esp ; \
|
|
- jmp ret_from_intr
|
|
-#define INTERRUPT_RETURN iret
|
|
-#endif
|
|
-
|
|
#ifdef CONFIG_PREEMPT
|
|
-#define preempt_stop DISABLE_INTERRUPTS; TRACE_IRQS_OFF
|
|
+#define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF
|
|
#else
|
|
-#define preempt_stop
|
|
+#define preempt_stop(clobbers)
|
|
#define resume_kernel restore_nocheck
|
|
#endif
|
|
|
|
.macro TRACE_IRQS_IRET
|
|
#ifdef CONFIG_TRACE_IRQFLAGS
|
|
- testl $IF_MASK,EFLAGS(%esp) # interrupts off?
|
|
+ testl $IF_MASK,PT_EFLAGS(%esp) # interrupts off?
|
|
jz 1f
|
|
TRACE_IRQS_ON
|
|
1:
|
|
@@ -148,6 +102,9 @@ sysexit_ecrit: /**** END OF SYSEXIT CRIT
|
|
|
|
#define SAVE_ALL \
|
|
cld; \
|
|
+ pushl %gs; \
|
|
+ CFI_ADJUST_CFA_OFFSET 4;\
|
|
+ /*CFI_REL_OFFSET gs, 0;*/\
|
|
pushl %es; \
|
|
CFI_ADJUST_CFA_OFFSET 4;\
|
|
/*CFI_REL_OFFSET es, 0;*/\
|
|
@@ -177,7 +134,9 @@ sysexit_ecrit: /**** END OF SYSEXIT CRIT
|
|
CFI_REL_OFFSET ebx, 0;\
|
|
movl $(__USER_DS), %edx; \
|
|
movl %edx, %ds; \
|
|
- movl %edx, %es;
|
|
+ movl %edx, %es; \
|
|
+ movl $(__KERNEL_PDA), %edx; \
|
|
+ movl %edx, %gs
|
|
|
|
#define RESTORE_INT_REGS \
|
|
popl %ebx; \
|
|
@@ -210,17 +169,22 @@ sysexit_ecrit: /**** END OF SYSEXIT CRIT
|
|
2: popl %es; \
|
|
CFI_ADJUST_CFA_OFFSET -4;\
|
|
/*CFI_RESTORE es;*/\
|
|
-.section .fixup,"ax"; \
|
|
-3: movl $0,(%esp); \
|
|
- jmp 1b; \
|
|
+3: popl %gs; \
|
|
+ CFI_ADJUST_CFA_OFFSET -4;\
|
|
+ /*CFI_RESTORE gs;*/\
|
|
+.pushsection .fixup,"ax"; \
|
|
4: movl $0,(%esp); \
|
|
+ jmp 1b; \
|
|
+5: movl $0,(%esp); \
|
|
jmp 2b; \
|
|
-.previous; \
|
|
+6: movl $0,(%esp); \
|
|
+ jmp 3b; \
|
|
.section __ex_table,"a";\
|
|
.align 4; \
|
|
- .long 1b,3b; \
|
|
- .long 2b,4b; \
|
|
-.previous
|
|
+ .long 1b,4b; \
|
|
+ .long 2b,5b; \
|
|
+ .long 3b,6b; \
|
|
+.popsection
|
|
|
|
#define RING0_INT_FRAME \
|
|
CFI_STARTPROC simple;\
|
|
@@ -239,18 +203,18 @@ sysexit_ecrit: /**** END OF SYSEXIT CRIT
|
|
#define RING0_PTREGS_FRAME \
|
|
CFI_STARTPROC simple;\
|
|
CFI_SIGNAL_FRAME;\
|
|
- CFI_DEF_CFA esp, OLDESP-EBX;\
|
|
- /*CFI_OFFSET cs, CS-OLDESP;*/\
|
|
- CFI_OFFSET eip, EIP-OLDESP;\
|
|
- /*CFI_OFFSET es, ES-OLDESP;*/\
|
|
- /*CFI_OFFSET ds, DS-OLDESP;*/\
|
|
- CFI_OFFSET eax, EAX-OLDESP;\
|
|
- CFI_OFFSET ebp, EBP-OLDESP;\
|
|
- CFI_OFFSET edi, EDI-OLDESP;\
|
|
- CFI_OFFSET esi, ESI-OLDESP;\
|
|
- CFI_OFFSET edx, EDX-OLDESP;\
|
|
- CFI_OFFSET ecx, ECX-OLDESP;\
|
|
- CFI_OFFSET ebx, EBX-OLDESP
|
|
+ CFI_DEF_CFA esp, PT_OLDESP-PT_EBX;\
|
|
+ /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/\
|
|
+ CFI_OFFSET eip, PT_EIP-PT_OLDESP;\
|
|
+ /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/\
|
|
+ /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/\
|
|
+ CFI_OFFSET eax, PT_EAX-PT_OLDESP;\
|
|
+ CFI_OFFSET ebp, PT_EBP-PT_OLDESP;\
|
|
+ CFI_OFFSET edi, PT_EDI-PT_OLDESP;\
|
|
+ CFI_OFFSET esi, PT_ESI-PT_OLDESP;\
|
|
+ CFI_OFFSET edx, PT_EDX-PT_OLDESP;\
|
|
+ CFI_OFFSET ecx, PT_ECX-PT_OLDESP;\
|
|
+ CFI_OFFSET ebx, PT_EBX-PT_OLDESP
|
|
|
|
ENTRY(ret_from_fork)
|
|
CFI_STARTPROC
|
|
@@ -278,17 +242,18 @@ ENTRY(ret_from_fork)
|
|
ALIGN
|
|
RING0_PTREGS_FRAME
|
|
ret_from_exception:
|
|
- preempt_stop
|
|
+ preempt_stop(CLBR_ANY)
|
|
ret_from_intr:
|
|
GET_THREAD_INFO(%ebp)
|
|
check_userspace:
|
|
- movl EFLAGS(%esp), %eax # mix EFLAGS and CS
|
|
- movb CS(%esp), %al
|
|
+ movl PT_EFLAGS(%esp), %eax # mix EFLAGS and CS
|
|
+ movb PT_CS(%esp), %al
|
|
andl $(VM_MASK | SEGMENT_RPL_MASK), %eax
|
|
cmpl $USER_RPL, %eax
|
|
jb resume_kernel # not returning to v8086 or userspace
|
|
+
|
|
ENTRY(resume_userspace)
|
|
- DISABLE_INTERRUPTS # make sure we don't miss an interrupt
|
|
+ DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
|
|
# setting need_resched or sigpending
|
|
# between sampling and the iret
|
|
movl TI_flags(%ebp), %ecx
|
|
@@ -299,14 +264,14 @@ ENTRY(resume_userspace)
|
|
|
|
#ifdef CONFIG_PREEMPT
|
|
ENTRY(resume_kernel)
|
|
- DISABLE_INTERRUPTS
|
|
+ DISABLE_INTERRUPTS(CLBR_ANY)
|
|
cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
|
|
jnz restore_nocheck
|
|
need_resched:
|
|
movl TI_flags(%ebp), %ecx # need_resched set ?
|
|
testb $_TIF_NEED_RESCHED, %cl
|
|
jz restore_all
|
|
- testl $IF_MASK,EFLAGS(%esp) # interrupts off (exception path) ?
|
|
+ testl $IF_MASK,PT_EFLAGS(%esp) # interrupts off (exception path) ?
|
|
jz restore_all
|
|
call preempt_schedule_irq
|
|
jmp need_resched
|
|
@@ -328,7 +293,7 @@ sysenter_past_esp:
|
|
* No need to follow this irqs on/off section: the syscall
|
|
* disabled irqs and here we enable it straight after entry:
|
|
*/
|
|
- ENABLE_INTERRUPTS
|
|
+ ENABLE_INTERRUPTS(CLBR_NONE)
|
|
pushl $(__USER_DS)
|
|
CFI_ADJUST_CFA_OFFSET 4
|
|
/*CFI_REL_OFFSET ss, 0*/
|
|
@@ -340,12 +305,16 @@ sysenter_past_esp:
|
|
pushl $(__USER_CS)
|
|
CFI_ADJUST_CFA_OFFSET 4
|
|
/*CFI_REL_OFFSET cs, 0*/
|
|
+#ifndef CONFIG_COMPAT_VDSO
|
|
/*
|
|
* Push current_thread_info()->sysenter_return to the stack.
|
|
* A tiny bit of offset fixup is necessary - 4*4 means the 4 words
|
|
* pushed above; +8 corresponds to copy_thread's esp0 setting.
|
|
*/
|
|
pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
|
|
+#else
|
|
+ pushl $SYSENTER_RETURN
|
|
+#endif
|
|
CFI_ADJUST_CFA_OFFSET 4
|
|
CFI_REL_OFFSET eip, 0
|
|
|
|
@@ -372,19 +341,27 @@ sysenter_past_esp:
|
|
cmpl $(nr_syscalls), %eax
|
|
jae syscall_badsys
|
|
call *sys_call_table(,%eax,4)
|
|
- movl %eax,EAX(%esp)
|
|
- DISABLE_INTERRUPTS
|
|
+ movl %eax,PT_EAX(%esp)
|
|
+ DISABLE_INTERRUPTS(CLBR_ECX|CLBR_EDX)
|
|
TRACE_IRQS_OFF
|
|
movl TI_flags(%ebp), %ecx
|
|
testw $_TIF_ALLWORK_MASK, %cx
|
|
jne syscall_exit_work
|
|
/* if something modifies registers it must also disable sysexit */
|
|
- movl EIP(%esp), %edx
|
|
- movl OLDESP(%esp), %ecx
|
|
+ movl PT_EIP(%esp), %edx
|
|
+ movl PT_OLDESP(%esp), %ecx
|
|
xorl %ebp,%ebp
|
|
TRACE_IRQS_ON
|
|
+1: mov PT_GS(%esp), %gs
|
|
ENABLE_INTERRUPTS_SYSEXIT
|
|
CFI_ENDPROC
|
|
+.pushsection .fixup,"ax"
|
|
+2: movl $0,PT_GS(%esp)
|
|
+ jmp 1b
|
|
+.section __ex_table,"a"
|
|
+ .align 4
|
|
+ .long 1b,2b
|
|
+.popsection
|
|
|
|
# pv sysenter call handler stub
|
|
ENTRY(sysenter_entry_pv)
|
|
@@ -419,7 +396,7 @@ ENTRY(system_call)
|
|
CFI_ADJUST_CFA_OFFSET 4
|
|
SAVE_ALL
|
|
GET_THREAD_INFO(%ebp)
|
|
- testl $TF_MASK,EFLAGS(%esp)
|
|
+ testl $TF_MASK,PT_EFLAGS(%esp)
|
|
jz no_singlestep
|
|
orl $_TIF_SINGLESTEP,TI_flags(%ebp)
|
|
no_singlestep:
|
|
@@ -431,9 +408,9 @@ no_singlestep:
|
|
jae syscall_badsys
|
|
syscall_call:
|
|
call *sys_call_table(,%eax,4)
|
|
- movl %eax,EAX(%esp) # store the return value
|
|
+ movl %eax,PT_EAX(%esp) # store the return value
|
|
syscall_exit:
|
|
- DISABLE_INTERRUPTS # make sure we don't miss an interrupt
|
|
+ DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
|
|
# setting need_resched or sigpending
|
|
# between sampling and the iret
|
|
TRACE_IRQS_OFF
|
|
@@ -443,12 +420,12 @@ syscall_exit:
|
|
|
|
restore_all:
|
|
#ifndef CONFIG_XEN
|
|
- movl EFLAGS(%esp), %eax # mix EFLAGS, SS and CS
|
|
- # Warning: OLDSS(%esp) contains the wrong/random values if we
|
|
+ movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS
|
|
+ # Warning: PT_OLDSS(%esp) contains the wrong/random values if we
|
|
# are returning to the kernel.
|
|
# See comments in process.c:copy_thread() for details.
|
|
- movb OLDSS(%esp), %ah
|
|
- movb CS(%esp), %al
|
|
+ movb PT_OLDSS(%esp), %ah
|
|
+ movb PT_CS(%esp), %al
|
|
andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
|
|
cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
|
|
CFI_REMEMBER_STATE
|
|
@@ -456,7 +433,7 @@ restore_all:
|
|
restore_nocheck:
|
|
#else
|
|
restore_nocheck:
|
|
- movl EFLAGS(%esp), %eax
|
|
+ movl PT_EFLAGS(%esp), %eax
|
|
testl $(VM_MASK|NMI_MASK), %eax
|
|
CFI_REMEMBER_STATE
|
|
jnz hypervisor_iret
|
|
@@ -470,13 +447,13 @@ restore_nocheck:
|
|
TRACE_IRQS_IRET
|
|
restore_nocheck_notrace:
|
|
RESTORE_REGS
|
|
- addl $4, %esp
|
|
+ addl $4, %esp # skip orig_eax/error_code
|
|
CFI_ADJUST_CFA_OFFSET -4
|
|
1: INTERRUPT_RETURN
|
|
.section .fixup,"ax"
|
|
iret_exc:
|
|
#ifndef CONFIG_XEN
|
|
- ENABLE_INTERRUPTS
|
|
+ ENABLE_INTERRUPTS(CLBR_NONE)
|
|
#endif
|
|
pushl $0 # no error code
|
|
pushl $do_iret_error
|
|
@@ -490,33 +467,42 @@ iret_exc:
|
|
CFI_RESTORE_STATE
|
|
#ifndef CONFIG_XEN
|
|
ldt_ss:
|
|
- larl OLDSS(%esp), %eax
|
|
+ larl PT_OLDSS(%esp), %eax
|
|
jnz restore_nocheck
|
|
testl $0x00400000, %eax # returning to 32bit stack?
|
|
jnz restore_nocheck # allright, normal return
|
|
+
|
|
+#ifdef CONFIG_PARAVIRT
|
|
+ /*
|
|
+ * The kernel can't run on a non-flat stack if paravirt mode
|
|
+ * is active. Rather than try to fixup the high bits of
|
|
+ * ESP, bypass this code entirely. This may break DOSemu
|
|
+ * and/or Wine support in a paravirt VM, although the option
|
|
+ * is still available to implement the setting of the high
|
|
+ * 16-bits in the INTERRUPT_RETURN paravirt-op.
|
|
+ */
|
|
+ cmpl $0, paravirt_ops+PARAVIRT_enabled
|
|
+ jne restore_nocheck
|
|
+#endif
|
|
+
|
|
/* If returning to userspace with 16bit stack,
|
|
* try to fix the higher word of ESP, as the CPU
|
|
* won't restore it.
|
|
* This is an "official" bug of all the x86-compatible
|
|
* CPUs, which we can try to work around to make
|
|
* dosemu and wine happy. */
|
|
- subl $8, %esp # reserve space for switch16 pointer
|
|
- CFI_ADJUST_CFA_OFFSET 8
|
|
- DISABLE_INTERRUPTS
|
|
+ movl PT_OLDESP(%esp), %eax
|
|
+ movl %esp, %edx
|
|
+ call patch_espfix_desc
|
|
+ pushl $__ESPFIX_SS
|
|
+ CFI_ADJUST_CFA_OFFSET 4
|
|
+ pushl %eax
|
|
+ CFI_ADJUST_CFA_OFFSET 4
|
|
+ DISABLE_INTERRUPTS(CLBR_EAX)
|
|
TRACE_IRQS_OFF
|
|
- movl %esp, %eax
|
|
- /* Set up the 16bit stack frame with switch32 pointer on top,
|
|
- * and a switch16 pointer on top of the current frame. */
|
|
- call setup_x86_bogus_stack
|
|
- CFI_ADJUST_CFA_OFFSET -8 # frame has moved
|
|
- TRACE_IRQS_IRET
|
|
- RESTORE_REGS
|
|
- lss 20+4(%esp), %esp # switch to 16bit stack
|
|
-1: INTERRUPT_RETURN
|
|
-.section __ex_table,"a"
|
|
- .align 4
|
|
- .long 1b,iret_exc
|
|
-.previous
|
|
+ lss (%esp), %esp
|
|
+ CFI_ADJUST_CFA_OFFSET -8
|
|
+ jmp restore_nocheck
|
|
#else
|
|
ALIGN
|
|
restore_all_enable_events:
|
|
@@ -540,7 +526,7 @@ ecrit: /**** END OF CRITICAL REGION ***
|
|
|
|
CFI_RESTORE_STATE
|
|
hypervisor_iret:
|
|
- andl $~NMI_MASK, EFLAGS(%esp)
|
|
+ andl $~NMI_MASK, PT_EFLAGS(%esp)
|
|
RESTORE_REGS
|
|
addl $4, %esp
|
|
CFI_ADJUST_CFA_OFFSET -4
|
|
@@ -556,7 +542,7 @@ work_pending:
|
|
jz work_notifysig
|
|
work_resched:
|
|
call schedule
|
|
- DISABLE_INTERRUPTS # make sure we don't miss an interrupt
|
|
+ DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
|
|
# setting need_resched or sigpending
|
|
# between sampling and the iret
|
|
TRACE_IRQS_OFF
|
|
@@ -569,7 +555,8 @@ work_resched:
|
|
|
|
work_notifysig: # deal with pending signals and
|
|
# notify-resume requests
|
|
- testl $VM_MASK, EFLAGS(%esp)
|
|
+#ifdef CONFIG_VM86
|
|
+ testl $VM_MASK, PT_EFLAGS(%esp)
|
|
movl %esp, %eax
|
|
jne work_notifysig_v86 # returning to kernel-space or
|
|
# vm86-space
|
|
@@ -579,29 +566,30 @@ work_notifysig: # deal with pending s
|
|
|
|
ALIGN
|
|
work_notifysig_v86:
|
|
-#ifdef CONFIG_VM86
|
|
pushl %ecx # save ti_flags for do_notify_resume
|
|
CFI_ADJUST_CFA_OFFSET 4
|
|
call save_v86_state # %eax contains pt_regs pointer
|
|
popl %ecx
|
|
CFI_ADJUST_CFA_OFFSET -4
|
|
movl %eax, %esp
|
|
+#else
|
|
+ movl %esp, %eax
|
|
+#endif
|
|
xorl %edx, %edx
|
|
call do_notify_resume
|
|
jmp resume_userspace_sig
|
|
-#endif
|
|
|
|
# perform syscall exit tracing
|
|
ALIGN
|
|
syscall_trace_entry:
|
|
- movl $-ENOSYS,EAX(%esp)
|
|
+ movl $-ENOSYS,PT_EAX(%esp)
|
|
movl %esp, %eax
|
|
xorl %edx,%edx
|
|
call do_syscall_trace
|
|
cmpl $0, %eax
|
|
jne resume_userspace # ret != 0 -> running under PTRACE_SYSEMU,
|
|
# so must skip actual syscall
|
|
- movl ORIG_EAX(%esp), %eax
|
|
+ movl PT_ORIG_EAX(%esp), %eax
|
|
cmpl $(nr_syscalls), %eax
|
|
jnae syscall_call
|
|
jmp syscall_exit
|
|
@@ -612,7 +600,7 @@ syscall_exit_work:
|
|
testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
|
|
jz work_pending
|
|
TRACE_IRQS_ON
|
|
- ENABLE_INTERRUPTS # could let do_syscall_trace() call
|
|
+ ENABLE_INTERRUPTS(CLBR_ANY) # could let do_syscall_trace() call
|
|
# schedule() instead
|
|
movl %esp, %eax
|
|
movl $1, %edx
|
|
@@ -626,40 +614,39 @@ syscall_fault:
|
|
CFI_ADJUST_CFA_OFFSET 4
|
|
SAVE_ALL
|
|
GET_THREAD_INFO(%ebp)
|
|
- movl $-EFAULT,EAX(%esp)
|
|
+ movl $-EFAULT,PT_EAX(%esp)
|
|
jmp resume_userspace
|
|
|
|
syscall_badsys:
|
|
- movl $-ENOSYS,EAX(%esp)
|
|
+ movl $-ENOSYS,PT_EAX(%esp)
|
|
jmp resume_userspace
|
|
CFI_ENDPROC
|
|
|
|
#ifndef CONFIG_XEN
|
|
#define FIXUP_ESPFIX_STACK \
|
|
- movl %esp, %eax; \
|
|
- /* switch to 32bit stack using the pointer on top of 16bit stack */ \
|
|
- lss %ss:CPU_16BIT_STACK_SIZE-8, %esp; \
|
|
- /* copy data from 16bit stack to 32bit stack */ \
|
|
- call fixup_x86_bogus_stack; \
|
|
- /* put ESP to the proper location */ \
|
|
- movl %eax, %esp;
|
|
-#define UNWIND_ESPFIX_STACK \
|
|
+ /* since we are on a wrong stack, we cant make it a C code :( */ \
|
|
+ movl %gs:PDA_cpu, %ebx; \
|
|
+ PER_CPU(cpu_gdt_descr, %ebx); \
|
|
+ movl GDS_address(%ebx), %ebx; \
|
|
+ GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \
|
|
+ addl %esp, %eax; \
|
|
+ pushl $__KERNEL_DS; \
|
|
+ CFI_ADJUST_CFA_OFFSET 4; \
|
|
pushl %eax; \
|
|
CFI_ADJUST_CFA_OFFSET 4; \
|
|
+ lss (%esp), %esp; \
|
|
+ CFI_ADJUST_CFA_OFFSET -8;
|
|
+#define UNWIND_ESPFIX_STACK \
|
|
movl %ss, %eax; \
|
|
- /* see if on 16bit stack */ \
|
|
+ /* see if on espfix stack */ \
|
|
cmpw $__ESPFIX_SS, %ax; \
|
|
- je 28f; \
|
|
-27: popl %eax; \
|
|
- CFI_ADJUST_CFA_OFFSET -4; \
|
|
-.section .fixup,"ax"; \
|
|
-28: movl $__KERNEL_DS, %eax; \
|
|
+ jne 27f; \
|
|
+ movl $__KERNEL_DS, %eax; \
|
|
movl %eax, %ds; \
|
|
movl %eax, %es; \
|
|
- /* switch to 32bit stack */ \
|
|
+ /* switch to normal stack */ \
|
|
FIXUP_ESPFIX_STACK; \
|
|
- jmp 27b; \
|
|
-.previous
|
|
+27:;
|
|
|
|
/*
|
|
* Build the entry stubs and pointer table with
|
|
@@ -723,13 +710,16 @@ KPROBE_ENTRY(page_fault)
|
|
CFI_ADJUST_CFA_OFFSET 4
|
|
ALIGN
|
|
error_code:
|
|
+ /* the function address is in %gs's slot on the stack */
|
|
+ pushl %es
|
|
+ CFI_ADJUST_CFA_OFFSET 4
|
|
+ /*CFI_REL_OFFSET es, 0*/
|
|
pushl %ds
|
|
CFI_ADJUST_CFA_OFFSET 4
|
|
/*CFI_REL_OFFSET ds, 0*/
|
|
pushl %eax
|
|
CFI_ADJUST_CFA_OFFSET 4
|
|
CFI_REL_OFFSET eax, 0
|
|
- xorl %eax, %eax
|
|
pushl %ebp
|
|
CFI_ADJUST_CFA_OFFSET 4
|
|
CFI_REL_OFFSET ebp, 0
|
|
@@ -742,7 +732,6 @@ error_code:
|
|
pushl %edx
|
|
CFI_ADJUST_CFA_OFFSET 4
|
|
CFI_REL_OFFSET edx, 0
|
|
- decl %eax # eax = -1
|
|
pushl %ecx
|
|
CFI_ADJUST_CFA_OFFSET 4
|
|
CFI_REL_OFFSET ecx, 0
|
|
@@ -750,18 +739,20 @@ error_code:
|
|
CFI_ADJUST_CFA_OFFSET 4
|
|
CFI_REL_OFFSET ebx, 0
|
|
cld
|
|
- pushl %es
|
|
+ pushl %gs
|
|
CFI_ADJUST_CFA_OFFSET 4
|
|
- /*CFI_REL_OFFSET es, 0*/
|
|
+ /*CFI_REL_OFFSET gs, 0*/
|
|
+ movl $(__KERNEL_PDA), %ecx
|
|
+ movl %ecx, %gs
|
|
UNWIND_ESPFIX_STACK
|
|
popl %ecx
|
|
CFI_ADJUST_CFA_OFFSET -4
|
|
/*CFI_REGISTER es, ecx*/
|
|
- movl ES(%esp), %edi # get the function address
|
|
- movl ORIG_EAX(%esp), %edx # get the error code
|
|
- movl %eax, ORIG_EAX(%esp)
|
|
- movl %ecx, ES(%esp)
|
|
- /*CFI_REL_OFFSET es, ES*/
|
|
+ movl PT_GS(%esp), %edi # get the function address
|
|
+ movl PT_ORIG_EAX(%esp), %edx # get the error code
|
|
+ movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
|
|
+ mov %ecx, PT_GS(%esp)
|
|
+ /*CFI_REL_OFFSET gs, ES*/
|
|
movl $(__USER_DS), %ecx
|
|
movl %ecx, %ds
|
|
movl %ecx, %es
|
|
@@ -793,8 +784,8 @@ ENTRY(hypervisor_callback)
|
|
pushl %eax
|
|
CFI_ADJUST_CFA_OFFSET 4
|
|
SAVE_ALL
|
|
- movl CS(%esp),%ecx
|
|
- movl EIP(%esp),%eax
|
|
+ movl PT_CS(%esp),%ecx
|
|
+ movl PT_EIP(%esp),%eax
|
|
andl $SEGMENT_RPL_MASK,%ecx
|
|
cmpl $USER_RPL,%ecx
|
|
jae .Ldo_upcall
|
|
@@ -808,7 +799,7 @@ ENTRY(hypervisor_callback)
|
|
jb .Ldo_upcall
|
|
cmpl $sysexit_ecrit,%eax
|
|
ja .Ldo_upcall
|
|
- addl $OLDESP,%esp # Remove eflags...ebx from stack frame.
|
|
+ addl $PT_OLDESP,%esp # Remove eflags...ebx from stack frame.
|
|
#endif
|
|
.Ldo_upcall:
|
|
push %esp
|
|
@@ -830,7 +821,7 @@ critical_region_fixup:
|
|
movsbl critical_fixup_table-scrit(%eax),%ecx # %ecx contains num slots popped
|
|
testl %ecx,%ecx
|
|
leal (%esp,%ecx,4),%esi # %esi points at end of src region
|
|
- leal OLDESP(%esp),%edi # %edi points at end of dst region
|
|
+ leal PT_OLDESP(%esp),%edi # %edi points at end of dst region
|
|
jle 17f # skip loop if nothing to copy
|
|
16: subl $4,%esi # pre-decrementing copy loop
|
|
subl $4,%edi
|
|
@@ -853,8 +844,9 @@ critical_fixup_table:
|
|
.byte 6 # pop %eax
|
|
.byte 7 # pop %ds
|
|
.byte 8 # pop %es
|
|
- .byte 9,9,9 # add $4,%esp
|
|
- .byte 10 # iret
|
|
+ .byte 9,9 # pop %gs
|
|
+ .byte 10,10,10 # add $4,%esp
|
|
+ .byte 11 # iret
|
|
.byte -1,-1,-1,-1 # movb $1,1(%esi) = __DISABLE_INTERRUPTS
|
|
.previous
|
|
|
|
@@ -944,7 +936,7 @@ ENTRY(device_not_available)
|
|
jmp ret_from_exception
|
|
device_available_emulate:
|
|
#endif
|
|
- preempt_stop
|
|
+ preempt_stop(CLBR_ANY)
|
|
call math_state_restore
|
|
jmp ret_from_exception
|
|
CFI_ENDPROC
|
|
@@ -1014,7 +1006,7 @@ KPROBE_ENTRY(nmi)
|
|
cmpw $__ESPFIX_SS, %ax
|
|
popl %eax
|
|
CFI_ADJUST_CFA_OFFSET -4
|
|
- je nmi_16bit_stack
|
|
+ je nmi_espfix_stack
|
|
cmpl $sysenter_entry,(%esp)
|
|
je nmi_stack_fixup
|
|
pushl %eax
|
|
@@ -1057,7 +1049,7 @@ nmi_debug_stack_check:
|
|
FIX_STACK(24,nmi_stack_correct, 1)
|
|
jmp nmi_stack_correct
|
|
|
|
-nmi_16bit_stack:
|
|
+nmi_espfix_stack:
|
|
/* We have a RING0_INT_FRAME here.
|
|
*
|
|
* create the pointer to lss back
|
|
@@ -1066,7 +1058,6 @@ nmi_16bit_stack:
|
|
CFI_ADJUST_CFA_OFFSET 4
|
|
pushl %esp
|
|
CFI_ADJUST_CFA_OFFSET 4
|
|
- movzwl %sp, %esp
|
|
addw $4, (%esp)
|
|
/* copy the iret frame of 12 bytes */
|
|
.rept 3
|
|
@@ -1077,11 +1068,11 @@ nmi_16bit_stack:
|
|
CFI_ADJUST_CFA_OFFSET 4
|
|
SAVE_ALL
|
|
FIXUP_ESPFIX_STACK # %eax == %esp
|
|
- CFI_ADJUST_CFA_OFFSET -20 # the frame has now moved
|
|
xorl %edx,%edx # zero error code
|
|
call do_nmi
|
|
RESTORE_REGS
|
|
- lss 12+4(%esp), %esp # back to 16bit stack
|
|
+ lss 12+4(%esp), %esp # back to espfix stack
|
|
+ CFI_ADJUST_CFA_OFFSET -24
|
|
1: INTERRUPT_RETURN
|
|
CFI_ENDPROC
|
|
.section __ex_table,"a"
|
|
@@ -1097,12 +1088,25 @@ KPROBE_ENTRY(nmi)
|
|
xorl %edx,%edx # zero error code
|
|
movl %esp,%eax # pt_regs pointer
|
|
call do_nmi
|
|
- orl $NMI_MASK, EFLAGS(%esp)
|
|
+ orl $NMI_MASK, PT_EFLAGS(%esp)
|
|
jmp restore_all
|
|
CFI_ENDPROC
|
|
#endif
|
|
KPROBE_END(nmi)
|
|
|
|
+#ifdef CONFIG_PARAVIRT
|
|
+ENTRY(native_iret)
|
|
+1: iret
|
|
+.section __ex_table,"a"
|
|
+ .align 4
|
|
+ .long 1b,iret_exc
|
|
+.previous
|
|
+
|
|
+ENTRY(native_irq_enable_sysexit)
|
|
+ sti
|
|
+ sysexit
|
|
+#endif
|
|
+
|
|
KPROBE_ENTRY(int3)
|
|
RING0_INT_FRAME
|
|
pushl $-1 # mark this as an int
|
|
@@ -1218,37 +1222,6 @@ ENTRY(spurious_interrupt_bug)
|
|
CFI_ENDPROC
|
|
#endif /* !CONFIG_XEN */
|
|
|
|
-#ifdef CONFIG_STACK_UNWIND
|
|
-ENTRY(arch_unwind_init_running)
|
|
- CFI_STARTPROC
|
|
- movl 4(%esp), %edx
|
|
- movl (%esp), %ecx
|
|
- leal 4(%esp), %eax
|
|
- movl %ebx, EBX(%edx)
|
|
- xorl %ebx, %ebx
|
|
- movl %ebx, ECX(%edx)
|
|
- movl %ebx, EDX(%edx)
|
|
- movl %esi, ESI(%edx)
|
|
- movl %edi, EDI(%edx)
|
|
- movl %ebp, EBP(%edx)
|
|
- movl %ebx, EAX(%edx)
|
|
- movl $__USER_DS, DS(%edx)
|
|
- movl $__USER_DS, ES(%edx)
|
|
- movl %ebx, ORIG_EAX(%edx)
|
|
- movl %ecx, EIP(%edx)
|
|
- movl 12(%esp), %ecx
|
|
- movl $__KERNEL_CS, CS(%edx)
|
|
- movl %ebx, EFLAGS(%edx)
|
|
- movl %eax, OLDESP(%edx)
|
|
- movl 8(%esp), %eax
|
|
- movl %ecx, 8(%esp)
|
|
- movl EBX(%edx), %ebx
|
|
- movl $__KERNEL_DS, OLDSS(%edx)
|
|
- jmpl *%eax
|
|
- CFI_ENDPROC
|
|
-ENDPROC(arch_unwind_init_running)
|
|
-#endif
|
|
-
|
|
ENTRY(fixup_4gb_segment)
|
|
RING0_EC_FRAME
|
|
pushl $do_fixup_4gb_segment
|
|
--- head-2010-05-25.orig/arch/x86/kernel/head_32-xen.S 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/head_32-xen.S 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -9,6 +9,7 @@
|
|
#include <asm/cache.h>
|
|
#include <asm/thread_info.h>
|
|
#include <asm/asm-offsets.h>
|
|
+#include <asm/boot.h>
|
|
#include <asm/dwarf2.h>
|
|
#include <xen/interface/xen.h>
|
|
#include <xen/interface/elfnote.h>
|
|
@@ -35,6 +36,8 @@ ENTRY(startup_32)
|
|
/* Set up the stack pointer */
|
|
movl $(init_thread_union+THREAD_SIZE),%esp
|
|
|
|
+ call setup_pda
|
|
+
|
|
/* get vendor info */
|
|
xorl %eax,%eax # call CPUID with 0 -> return vendor ID
|
|
XEN_CPUID
|
|
@@ -57,14 +60,58 @@ ENTRY(startup_32)
|
|
|
|
movb $1,X86_HARD_MATH
|
|
|
|
- xorl %eax,%eax # Clear FS/GS and LDT
|
|
+ xorl %eax,%eax # Clear FS
|
|
movl %eax,%fs
|
|
- movl %eax,%gs
|
|
+
|
|
+ movl $(__KERNEL_PDA),%eax
|
|
+ mov %eax,%gs
|
|
+
|
|
cld # gcc2 wants the direction flag cleared at all times
|
|
|
|
pushl $0 # fake return address for unwinder
|
|
jmp start_kernel
|
|
|
|
+/*
|
|
+ * Point the GDT at this CPU's PDA. This will be
|
|
+ * cpu_gdt_table and boot_pda.
|
|
+ */
|
|
+setup_pda:
|
|
+ /* get the PDA pointer */
|
|
+ movl $boot_pda, %eax
|
|
+
|
|
+ /* slot the PDA address into the GDT */
|
|
+ mov $cpu_gdt_table, %ecx
|
|
+ mov %ax, (__KERNEL_PDA+0+2)(%ecx) /* base & 0x0000ffff */
|
|
+ shr $16, %eax
|
|
+ mov %al, (__KERNEL_PDA+4+0)(%ecx) /* base & 0x00ff0000 */
|
|
+ mov %ah, (__KERNEL_PDA+4+3)(%ecx) /* base & 0xff000000 */
|
|
+
|
|
+ # %esi still points to start_info, and no registers
|
|
+ # need to be preserved.
|
|
+
|
|
+ movl XEN_START_mfn_list(%esi), %ebx
|
|
+ movl $(cpu_gdt_table - __PAGE_OFFSET), %eax
|
|
+ shrl $PAGE_SHIFT, %eax
|
|
+ movl (%ebx,%eax,4), %ecx
|
|
+ pushl %ecx # frame number for set_gdt below
|
|
+
|
|
+ xorl %esi, %esi
|
|
+ xorl %edx, %edx
|
|
+ shldl $PAGE_SHIFT, %ecx, %edx
|
|
+ shll $PAGE_SHIFT, %ecx
|
|
+ orl $0x61, %ecx
|
|
+ movl $cpu_gdt_table, %ebx
|
|
+ movl $__HYPERVISOR_update_va_mapping, %eax
|
|
+ int $0x82
|
|
+
|
|
+ movl $(PAGE_SIZE_asm / 8), %ecx
|
|
+ movl %esp, %ebx
|
|
+ movl $__HYPERVISOR_set_gdt, %eax
|
|
+ int $0x82
|
|
+
|
|
+ popl %ecx
|
|
+ ret
|
|
+
|
|
#define HYPERCALL_PAGE_OFFSET 0x1000
|
|
.org HYPERCALL_PAGE_OFFSET
|
|
ENTRY(hypercall_page)
|
|
@@ -93,7 +140,8 @@ ENTRY(empty_zero_page)
|
|
/*
|
|
* The Global Descriptor Table contains 28 quadwords, per-CPU.
|
|
*/
|
|
- .align L1_CACHE_BYTES
|
|
+ .section .data.page_aligned, "aw"
|
|
+ .align PAGE_SIZE_asm
|
|
ENTRY(cpu_gdt_table)
|
|
.quad 0x0000000000000000 /* NULL descriptor */
|
|
.quad 0x0000000000000000 /* 0x0b reserved */
|
|
@@ -135,12 +183,13 @@ ENTRY(cpu_gdt_table)
|
|
.quad 0x0000000000000000 /* 0xc0 APM CS 16 code (16 bit) */
|
|
.quad 0x0000000000000000 /* 0xc8 APM DS data */
|
|
|
|
- .quad 0x0000000000000000 /* 0xd0 - ESPFIX 16-bit SS */
|
|
- .quad 0x0000000000000000 /* 0xd8 - unused */
|
|
+ .quad 0x0000000000000000 /* 0xd0 - ESPFIX SS */
|
|
+ .quad 0x00cf92000000ffff /* 0xd8 - PDA */
|
|
.quad 0x0000000000000000 /* 0xe0 - unused */
|
|
.quad 0x0000000000000000 /* 0xe8 - unused */
|
|
.quad 0x0000000000000000 /* 0xf0 - unused */
|
|
.quad 0x0000000000000000 /* 0xf8 - GDT entry 31: double-fault TSS */
|
|
+ .align PAGE_SIZE_asm
|
|
|
|
#if CONFIG_XEN_COMPAT <= 0x030002
|
|
/*
|
|
@@ -165,9 +214,9 @@ ENTRY(cpu_gdt_table)
|
|
.ascii ",ELF_PADDR_OFFSET=0x"
|
|
utoa __PAGE_OFFSET
|
|
.ascii ",VIRT_ENTRY=0x"
|
|
- utoa (__PAGE_OFFSET + __PHYSICAL_START + VIRT_ENTRY_OFFSET)
|
|
+ utoa (__PAGE_OFFSET + LOAD_PHYSICAL_ADDR + VIRT_ENTRY_OFFSET)
|
|
.ascii ",HYPERCALL_PAGE=0x"
|
|
- utoa ((__PHYSICAL_START+HYPERCALL_PAGE_OFFSET)>>PAGE_SHIFT)
|
|
+ utoa ((LOAD_PHYSICAL_ADDR+HYPERCALL_PAGE_OFFSET)>>PAGE_SHIFT)
|
|
.ascii ",FEATURES=writable_page_tables"
|
|
.ascii "|writable_descriptor_tables"
|
|
.ascii "|auto_translated_physmap"
|
|
--- head-2010-05-25.orig/arch/x86/kernel/io_apic_32-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/io_apic_32-xen.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -34,6 +34,7 @@
|
|
#include <linux/pci.h>
|
|
#include <linux/msi.h>
|
|
#include <linux/htirq.h>
|
|
+#include <linux/freezer.h>
|
|
|
|
#include <asm/io.h>
|
|
#include <asm/smp.h>
|
|
@@ -199,14 +200,20 @@ static struct IO_APIC_route_entry ioapic
|
|
* the interrupt, and we need to make sure the entry is fully populated
|
|
* before that happens.
|
|
*/
|
|
-static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
|
|
+static void
|
|
+__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
|
|
{
|
|
- unsigned long flags;
|
|
union entry_union eu;
|
|
eu.entry = e;
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
io_apic_write(apic, 0x11 + 2*pin, eu.w2);
|
|
io_apic_write(apic, 0x10 + 2*pin, eu.w1);
|
|
+}
|
|
+
|
|
+static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
|
|
+{
|
|
+ unsigned long flags;
|
|
+ spin_lock_irqsave(&ioapic_lock, flags);
|
|
+ __ioapic_write_entry(apic, pin, e);
|
|
spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
}
|
|
|
|
@@ -889,8 +896,7 @@ static int __init find_isa_irq_pin(int i
|
|
|
|
if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
|
|
mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
|
|
- mp_bus_id_to_type[lbus] == MP_BUS_MCA ||
|
|
- mp_bus_id_to_type[lbus] == MP_BUS_NEC98
|
|
+ mp_bus_id_to_type[lbus] == MP_BUS_MCA
|
|
) &&
|
|
(mp_irqs[i].mpc_irqtype == type) &&
|
|
(mp_irqs[i].mpc_srcbusirq == irq))
|
|
@@ -909,8 +915,7 @@ static int __init find_isa_irq_apic(int
|
|
|
|
if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
|
|
mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
|
|
- mp_bus_id_to_type[lbus] == MP_BUS_MCA ||
|
|
- mp_bus_id_to_type[lbus] == MP_BUS_NEC98
|
|
+ mp_bus_id_to_type[lbus] == MP_BUS_MCA
|
|
) &&
|
|
(mp_irqs[i].mpc_irqtype == type) &&
|
|
(mp_irqs[i].mpc_srcbusirq == irq))
|
|
@@ -1043,12 +1048,6 @@ static int EISA_ELCR(unsigned int irq)
|
|
#define default_MCA_trigger(idx) (1)
|
|
#define default_MCA_polarity(idx) (0)
|
|
|
|
-/* NEC98 interrupts are always polarity zero edge triggered,
|
|
- * when listed as conforming in the MP table. */
|
|
-
|
|
-#define default_NEC98_trigger(idx) (0)
|
|
-#define default_NEC98_polarity(idx) (0)
|
|
-
|
|
static int __init MPBIOS_polarity(int idx)
|
|
{
|
|
int bus = mp_irqs[idx].mpc_srcbus;
|
|
@@ -1083,11 +1082,6 @@ static int __init MPBIOS_polarity(int id
|
|
polarity = default_MCA_polarity(idx);
|
|
break;
|
|
}
|
|
- case MP_BUS_NEC98: /* NEC 98 pin */
|
|
- {
|
|
- polarity = default_NEC98_polarity(idx);
|
|
- break;
|
|
- }
|
|
default:
|
|
{
|
|
printk(KERN_WARNING "broken BIOS!!\n");
|
|
@@ -1157,11 +1151,6 @@ static int MPBIOS_trigger(int idx)
|
|
trigger = default_MCA_trigger(idx);
|
|
break;
|
|
}
|
|
- case MP_BUS_NEC98: /* NEC 98 pin */
|
|
- {
|
|
- trigger = default_NEC98_trigger(idx);
|
|
- break;
|
|
- }
|
|
default:
|
|
{
|
|
printk(KERN_WARNING "broken BIOS!!\n");
|
|
@@ -1223,7 +1212,6 @@ static int pin_2_irq(int idx, int apic,
|
|
case MP_BUS_ISA: /* ISA pin */
|
|
case MP_BUS_EISA:
|
|
case MP_BUS_MCA:
|
|
- case MP_BUS_NEC98:
|
|
{
|
|
irq = mp_irqs[idx].mpc_srcbusirq;
|
|
break;
|
|
@@ -1291,7 +1279,7 @@ static inline int IO_APIC_irq_trigger(in
|
|
}
|
|
|
|
/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
|
|
-u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; /* = { FIRST_DEVICE_VECTOR , 0 }; */
|
|
+static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; /* = { FIRST_DEVICE_VECTOR , 0 }; */
|
|
|
|
static int __assign_irq_vector(int irq)
|
|
{
|
|
@@ -1417,8 +1405,8 @@ static void __init setup_IO_APIC_irqs(vo
|
|
if (!apic && (irq < 16))
|
|
disable_8259A_irq(irq);
|
|
}
|
|
- ioapic_write_entry(apic, pin, entry);
|
|
spin_lock_irqsave(&ioapic_lock, flags);
|
|
+ __ioapic_write_entry(apic, pin, entry);
|
|
set_native_irq_info(irq, TARGET_CPUS);
|
|
spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
}
|
|
@@ -1988,6 +1976,15 @@ static void __init setup_ioapic_ids_from
|
|
#endif
|
|
|
|
#ifndef CONFIG_XEN
|
|
+static int no_timer_check __initdata;
|
|
+
|
|
+static int __init notimercheck(char *s)
|
|
+{
|
|
+ no_timer_check = 1;
|
|
+ return 1;
|
|
+}
|
|
+__setup("no_timer_check", notimercheck);
|
|
+
|
|
/*
|
|
* There is a nasty bug in some older SMP boards, their mptable lies
|
|
* about the timer IRQ. We do the following to work around the situation:
|
|
@@ -1996,10 +1993,13 @@ static void __init setup_ioapic_ids_from
|
|
* - if this function detects that timer IRQs are defunct, then we fall
|
|
* back to ISA timer IRQs
|
|
*/
|
|
-static int __init timer_irq_works(void)
|
|
+int __init timer_irq_works(void)
|
|
{
|
|
unsigned long t1 = jiffies;
|
|
|
|
+ if (no_timer_check)
|
|
+ return 1;
|
|
+
|
|
local_irq_enable();
|
|
/* Let ten ticks pass... */
|
|
mdelay((10 * 1000) / HZ);
|
|
@@ -2226,9 +2226,15 @@ static inline void unlock_ExtINT_logic(v
|
|
unsigned char save_control, save_freq_select;
|
|
|
|
pin = find_isa_irq_pin(8, mp_INT);
|
|
+ if (pin == -1) {
|
|
+ WARN_ON_ONCE(1);
|
|
+ return;
|
|
+ }
|
|
apic = find_isa_irq_apic(8, mp_INT);
|
|
- if (pin == -1)
|
|
+ if (apic == -1) {
|
|
+ WARN_ON_ONCE(1);
|
|
return;
|
|
+ }
|
|
|
|
entry0 = ioapic_read_entry(apic, pin);
|
|
clear_IO_APIC_pin(apic, pin);
|
|
@@ -2273,7 +2279,7 @@ int timer_uses_ioapic_pin_0;
|
|
* is so screwy. Thanks to Brian Perkins for testing/hacking this beast
|
|
* fanatically on his truly buggy board.
|
|
*/
|
|
-static inline void check_timer(void)
|
|
+static inline void __init check_timer(void)
|
|
{
|
|
int apic1, pin1, apic2, pin2;
|
|
int vector;
|
|
@@ -2558,7 +2564,7 @@ device_initcall(ioapic_init_sysfs);
|
|
int create_irq(void)
|
|
{
|
|
/* Allocate an unused irq */
|
|
- int irq, new, vector;
|
|
+ int irq, new, vector = 0;
|
|
unsigned long flags;
|
|
|
|
irq = -ENOSPC;
|
|
@@ -2939,8 +2945,8 @@ int io_apic_set_pci_routing (int ioapic,
|
|
if (!ioapic && (irq < 16))
|
|
disable_8259A_irq(irq);
|
|
|
|
- ioapic_write_entry(ioapic, pin, entry);
|
|
spin_lock_irqsave(&ioapic_lock, flags);
|
|
+ __ioapic_write_entry(ioapic, pin, entry);
|
|
set_native_irq_info(irq, TARGET_CPUS);
|
|
spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
|
|
--- head-2010-05-25.orig/arch/x86/kernel/ldt_32-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/ldt_32-xen.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -177,16 +177,14 @@ static int read_default_ldt(void __user
|
|
{
|
|
int err;
|
|
unsigned long size;
|
|
- void *address;
|
|
|
|
err = 0;
|
|
- address = &default_ldt[0];
|
|
size = 5*sizeof(struct desc_struct);
|
|
if (size > bytecount)
|
|
size = bytecount;
|
|
|
|
err = size;
|
|
- if (copy_to_user(ptr, address, size))
|
|
+ if (clear_user(ptr, size))
|
|
err = -EFAULT;
|
|
|
|
return err;
|
|
--- head-2010-05-25.orig/arch/x86/kernel/microcode-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/microcode-xen.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -1,7 +1,7 @@
|
|
/*
|
|
* Intel CPU Microcode Update Driver for Linux
|
|
*
|
|
- * Copyright (C) 2000-2004 Tigran Aivazian
|
|
+ * Copyright (C) 2000-2006 Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
|
|
* 2006 Shaohua Li <shaohua.li@intel.com>
|
|
*
|
|
* This driver allows to upgrade microcode on Intel processors
|
|
@@ -43,7 +43,7 @@
|
|
#include <asm/processor.h>
|
|
|
|
MODULE_DESCRIPTION("Intel CPU (IA-32) Microcode Update Driver");
|
|
-MODULE_AUTHOR("Tigran Aivazian <tigran@veritas.com>");
|
|
+MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>");
|
|
MODULE_LICENSE("GPL");
|
|
|
|
static int verbose;
|
|
@@ -195,7 +195,7 @@ static int __init microcode_init (void)
|
|
request_microcode();
|
|
|
|
printk(KERN_INFO
|
|
- "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@veritas.com>\n");
|
|
+ "IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@aivazian.fsnet.co.uk>\n");
|
|
return 0;
|
|
}
|
|
|
|
--- head-2010-05-25.orig/arch/x86/kernel/mpparse_32-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/mpparse_32-xen.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -36,7 +36,7 @@
|
|
|
|
/* Have we found an MP table */
|
|
int smp_found_config;
|
|
-unsigned int __initdata maxcpus = NR_CPUS;
|
|
+unsigned int __cpuinitdata maxcpus = NR_CPUS;
|
|
|
|
/*
|
|
* Various Linux-internal data structures created from the
|
|
@@ -102,10 +102,10 @@ static int __init mpf_checksum(unsigned
|
|
*/
|
|
|
|
static int mpc_record;
|
|
-static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __initdata;
|
|
+static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] __cpuinitdata;
|
|
|
|
#ifndef CONFIG_XEN
|
|
-static void __devinit MP_processor_info (struct mpc_config_processor *m)
|
|
+static void __cpuinit MP_processor_info (struct mpc_config_processor *m)
|
|
{
|
|
int ver, apicid;
|
|
physid_mask_t phys_cpu;
|
|
@@ -221,7 +221,7 @@ static void __devinit MP_processor_info
|
|
bios_cpu_apicid[num_processors - 1] = m->mpc_apicid;
|
|
}
|
|
#else
|
|
-void __init MP_processor_info (struct mpc_config_processor *m)
|
|
+static void __cpuinit MP_processor_info (struct mpc_config_processor *m)
|
|
{
|
|
num_processors++;
|
|
}
|
|
@@ -256,8 +256,6 @@ static void __init MP_bus_info (struct m
|
|
mp_current_pci_id++;
|
|
} else if (strncmp(str, BUSTYPE_MCA, sizeof(BUSTYPE_MCA)-1) == 0) {
|
|
mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
|
|
- } else if (strncmp(str, BUSTYPE_NEC98, sizeof(BUSTYPE_NEC98)-1) == 0) {
|
|
- mp_bus_id_to_type[m->mpc_busid] = MP_BUS_NEC98;
|
|
} else {
|
|
printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str);
|
|
}
|
|
@@ -842,7 +840,7 @@ void __init mp_register_lapic_address(u6
|
|
#endif
|
|
}
|
|
|
|
-void __devinit mp_register_lapic (u8 id, u8 enabled)
|
|
+void __cpuinit mp_register_lapic (u8 id, u8 enabled)
|
|
{
|
|
struct mpc_config_processor processor;
|
|
int boot_cpu = 0;
|
|
--- head-2010-05-25.orig/arch/x86/kernel/pci-dma-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/pci-dma-xen.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -273,7 +273,7 @@ EXPORT_SYMBOL(dma_free_coherent);
|
|
int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr,
|
|
dma_addr_t device_addr, size_t size, int flags)
|
|
{
|
|
- void __iomem *mem_base;
|
|
+ void __iomem *mem_base = NULL;
|
|
int pages = size >> PAGE_SHIFT;
|
|
int bitmap_size = (pages + 31)/32;
|
|
|
|
@@ -290,14 +290,12 @@ int dma_declare_coherent_memory(struct d
|
|
if (!mem_base)
|
|
goto out;
|
|
|
|
- dev->dma_mem = kmalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL);
|
|
+ dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL);
|
|
if (!dev->dma_mem)
|
|
goto out;
|
|
- memset(dev->dma_mem, 0, sizeof(struct dma_coherent_mem));
|
|
- dev->dma_mem->bitmap = kmalloc(bitmap_size, GFP_KERNEL);
|
|
+ dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
|
|
if (!dev->dma_mem->bitmap)
|
|
goto free1_out;
|
|
- memset(dev->dma_mem->bitmap, 0, bitmap_size);
|
|
|
|
dev->dma_mem->virt_base = mem_base;
|
|
dev->dma_mem->device_base = device_addr;
|
|
@@ -312,6 +310,8 @@ int dma_declare_coherent_memory(struct d
|
|
free1_out:
|
|
kfree(dev->dma_mem->bitmap);
|
|
out:
|
|
+ if (mem_base)
|
|
+ iounmap(mem_base);
|
|
return 0;
|
|
}
|
|
EXPORT_SYMBOL(dma_declare_coherent_memory);
|
|
--- head-2010-05-25.orig/arch/x86/kernel/process_32-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/process_32-xen.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -60,6 +60,7 @@
|
|
|
|
#include <asm/tlbflush.h>
|
|
#include <asm/cpu.h>
|
|
+#include <asm/pda.h>
|
|
|
|
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
|
|
|
|
@@ -104,28 +105,24 @@ EXPORT_SYMBOL(enable_hlt);
|
|
*/
|
|
static void poll_idle (void)
|
|
{
|
|
- local_irq_enable();
|
|
-
|
|
- asm volatile(
|
|
- "2:"
|
|
- "testl %0, %1;"
|
|
- "rep; nop;"
|
|
- "je 2b;"
|
|
- : : "i"(_TIF_NEED_RESCHED), "m" (current_thread_info()->flags));
|
|
+ cpu_relax();
|
|
}
|
|
|
|
static void xen_idle(void)
|
|
{
|
|
- local_irq_disable();
|
|
+ current_thread_info()->status &= ~TS_POLLING;
|
|
+ /*
|
|
+ * TS_POLLING-cleared state must be visible before we
|
|
+ * test NEED_RESCHED:
|
|
+ */
|
|
+ smp_mb();
|
|
|
|
- if (need_resched())
|
|
+ local_irq_disable();
|
|
+ if (!need_resched())
|
|
+ safe_halt(); /* enables interrupts racelessly */
|
|
+ else
|
|
local_irq_enable();
|
|
- else {
|
|
- current_thread_info()->status &= ~TS_POLLING;
|
|
- smp_mb__after_clear_bit();
|
|
- safe_halt();
|
|
- current_thread_info()->status |= TS_POLLING;
|
|
- }
|
|
+ current_thread_info()->status |= TS_POLLING;
|
|
}
|
|
#ifdef CONFIG_APM_MODULE
|
|
EXPORT_SYMBOL(default_idle);
|
|
@@ -250,8 +247,8 @@ void show_regs(struct pt_regs * regs)
|
|
regs->eax,regs->ebx,regs->ecx,regs->edx);
|
|
printk("ESI: %08lx EDI: %08lx EBP: %08lx",
|
|
regs->esi, regs->edi, regs->ebp);
|
|
- printk(" DS: %04x ES: %04x\n",
|
|
- 0xffff & regs->xds,0xffff & regs->xes);
|
|
+ printk(" DS: %04x ES: %04x GS: %04x\n",
|
|
+ 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xgs);
|
|
|
|
cr0 = read_cr0();
|
|
cr2 = read_cr2();
|
|
@@ -282,6 +279,7 @@ int kernel_thread(int (*fn)(void *), voi
|
|
|
|
regs.xds = __USER_DS;
|
|
regs.xes = __USER_DS;
|
|
+ regs.xgs = __KERNEL_PDA;
|
|
regs.orig_eax = -1;
|
|
regs.eip = (unsigned long) kernel_thread_helper;
|
|
regs.xcs = __KERNEL_CS | get_kernel_rpl();
|
|
@@ -359,7 +357,6 @@ int copy_thread(int nr, unsigned long cl
|
|
p->thread.eip = (unsigned long) ret_from_fork;
|
|
|
|
savesegment(fs,p->thread.fs);
|
|
- savesegment(gs,p->thread.gs);
|
|
|
|
tsk = current;
|
|
if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
|
|
@@ -438,7 +435,7 @@ void dump_thread(struct pt_regs * regs,
|
|
dump->regs.ds = regs->xds;
|
|
dump->regs.es = regs->xes;
|
|
savesegment(fs,dump->regs.fs);
|
|
- savesegment(gs,dump->regs.gs);
|
|
+ dump->regs.gs = regs->xgs;
|
|
dump->regs.orig_eax = regs->orig_eax;
|
|
dump->regs.eip = regs->eip;
|
|
dump->regs.cs = regs->xcs;
|
|
@@ -635,17 +632,19 @@ struct task_struct fastcall * __switch_t
|
|
if (unlikely(HYPERVISOR_multicall_check(_mcl, mcl - _mcl, NULL)))
|
|
BUG();
|
|
|
|
+ /* we're going to use this soon, after a few expensive things */
|
|
+ if (next_p->fpu_counter > 5)
|
|
+ prefetch(&next->i387.fxsave);
|
|
+
|
|
/*
|
|
- * Restore %fs and %gs if needed.
|
|
+ * Restore %fs if needed.
|
|
*
|
|
- * Glibc normally makes %fs be zero, and %gs is one of
|
|
- * the TLS segments.
|
|
+ * Glibc normally makes %fs be zero.
|
|
*/
|
|
if (unlikely(next->fs))
|
|
loadsegment(fs, next->fs);
|
|
|
|
- if (next->gs)
|
|
- loadsegment(gs, next->gs);
|
|
+ write_pda(pcurrent, next_p);
|
|
|
|
/*
|
|
* Now maybe handle debug registers
|
|
@@ -655,6 +654,13 @@ struct task_struct fastcall * __switch_t
|
|
|
|
disable_tsc(prev_p, next_p);
|
|
|
|
+ /* If the task has used fpu the last 5 timeslices, just do a full
|
|
+ * restore of the math state immediately to avoid the trap; the
|
|
+ * chances of needing FPU soon are obviously high now
|
|
+ */
|
|
+ if (next_p->fpu_counter > 5)
|
|
+ math_state_restore();
|
|
+
|
|
return prev_p;
|
|
}
|
|
|
|
--- head-2010-05-25.orig/arch/x86/kernel/quirks-xen.c 2008-01-28 12:24:19.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/quirks-xen.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -3,10 +3,12 @@
|
|
*/
|
|
#include <linux/pci.h>
|
|
#include <linux/irq.h>
|
|
+#include <asm/pci-direct.h>
|
|
+#include <asm/genapic.h>
|
|
+#include <asm/cpu.h>
|
|
|
|
#if defined(CONFIG_X86_IO_APIC) && (defined(CONFIG_SMP) || defined(CONFIG_XEN)) && defined(CONFIG_PCI)
|
|
-
|
|
-static void __devinit quirk_intel_irqbalance(struct pci_dev *dev)
|
|
+static void __devinit verify_quirk_intel_irqbalance(struct pci_dev *dev)
|
|
{
|
|
u8 config, rev;
|
|
u32 word;
|
|
@@ -14,14 +16,12 @@ static void __devinit quirk_intel_irqbal
|
|
/* BIOS may enable hardware IRQ balancing for
|
|
* E7520/E7320/E7525(revision ID 0x9 and below)
|
|
* based platforms.
|
|
- * Disable SW irqbalance/affinity on those platforms.
|
|
+ * For those platforms, make sure that the genapic is set to 'flat'
|
|
*/
|
|
pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev);
|
|
if (rev > 0x9)
|
|
return;
|
|
|
|
- printk(KERN_INFO "Intel E7520/7320/7525 detected.");
|
|
-
|
|
/* enable access to config space*/
|
|
pci_read_config_byte(dev, 0xf4, &config);
|
|
pci_write_config_byte(dev, 0xf4, config|0x2);
|
|
@@ -30,6 +30,46 @@ static void __devinit quirk_intel_irqbal
|
|
raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word);
|
|
|
|
if (!(word & (1 << 13))) {
|
|
+#ifndef CONFIG_XEN
|
|
+#ifdef CONFIG_X86_64
|
|
+ if (genapic != &apic_flat)
|
|
+ panic("APIC mode must be flat on this system\n");
|
|
+#elif defined(CONFIG_X86_GENERICARCH)
|
|
+ if (genapic != &apic_default)
|
|
+ panic("APIC mode must be default(flat) on this system. Use apic=default\n");
|
|
+#endif
|
|
+#endif
|
|
+ }
|
|
+
|
|
+ /* put back the original value for config space*/
|
|
+ if (!(config & 0x2))
|
|
+ pci_write_config_byte(dev, 0xf4, config);
|
|
+}
|
|
+
|
|
+void __init quirk_intel_irqbalance(void)
|
|
+{
|
|
+ u8 config, rev;
|
|
+ u32 word;
|
|
+
|
|
+ /* BIOS may enable hardware IRQ balancing for
|
|
+ * E7520/E7320/E7525(revision ID 0x9 and below)
|
|
+ * based platforms.
|
|
+ * Disable SW irqbalance/affinity on those platforms.
|
|
+ */
|
|
+ rev = read_pci_config_byte(0, 0, 0, PCI_CLASS_REVISION);
|
|
+ if (rev > 0x9)
|
|
+ return;
|
|
+
|
|
+ printk(KERN_INFO "Intel E7520/7320/7525 detected.");
|
|
+
|
|
+ /* enable access to config space */
|
|
+ config = read_pci_config_byte(0, 0, 0, 0xf4);
|
|
+ write_pci_config_byte(0, 0, 0, 0xf4, config|0x2);
|
|
+
|
|
+ /* read xTPR register */
|
|
+ word = read_pci_config_16(0, 0, 0x40, 0x4c);
|
|
+
|
|
+ if (!(word & (1 << 13))) {
|
|
struct xen_platform_op op;
|
|
printk(KERN_INFO "Disabling irq balancing and affinity\n");
|
|
op.cmd = XENPF_platform_quirk;
|
|
@@ -37,11 +77,12 @@ static void __devinit quirk_intel_irqbal
|
|
WARN_ON(HYPERVISOR_platform_op(&op));
|
|
}
|
|
|
|
- /* put back the original value for config space*/
|
|
+ /* put back the original value for config space */
|
|
if (!(config & 0x2))
|
|
- pci_write_config_byte(dev, 0xf4, config);
|
|
+ write_pci_config_byte(0, 0, 0, 0xf4, config);
|
|
}
|
|
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, quirk_intel_irqbalance);
|
|
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, quirk_intel_irqbalance);
|
|
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, quirk_intel_irqbalance);
|
|
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, verify_quirk_intel_irqbalance);
|
|
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, verify_quirk_intel_irqbalance);
|
|
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, verify_quirk_intel_irqbalance);
|
|
+
|
|
#endif
|
|
--- head-2010-05-25.orig/arch/x86/kernel/setup_32-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/setup_32-xen.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -76,9 +76,6 @@
|
|
#include <xen/interface/kexec.h>
|
|
#endif
|
|
|
|
-/* Forward Declaration. */
|
|
-void __init find_max_pfn(void);
|
|
-
|
|
static int xen_panic_event(struct notifier_block *, unsigned long, void *);
|
|
static struct notifier_block xen_panic_block = {
|
|
xen_panic_event, NULL, 0 /* try to go last */
|
|
@@ -89,14 +86,11 @@ int disable_pse __devinitdata = 0;
|
|
/*
|
|
* Machine setup..
|
|
*/
|
|
-
|
|
-#ifdef CONFIG_EFI
|
|
-int efi_enabled = 0;
|
|
-EXPORT_SYMBOL(efi_enabled);
|
|
-#endif
|
|
+extern struct resource code_resource;
|
|
+extern struct resource data_resource;
|
|
|
|
/* cpu data as detected by the assembly code in head.S */
|
|
-struct cpuinfo_x86 new_cpu_data __initdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
|
|
+struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
|
|
/* common cpu data for all cpus */
|
|
struct cpuinfo_x86 boot_cpu_data __read_mostly = { 0, 0, 0, 0, -1, 1, 0, 0, -1 };
|
|
EXPORT_SYMBOL(boot_cpu_data);
|
|
@@ -112,12 +106,6 @@ unsigned int machine_submodel_id;
|
|
unsigned int BIOS_revision;
|
|
unsigned int mca_pentium_flag;
|
|
|
|
-/* For PCI or other memory-mapped resources */
|
|
-unsigned long pci_mem_start = 0x10000000;
|
|
-#ifdef CONFIG_PCI
|
|
-EXPORT_SYMBOL(pci_mem_start);
|
|
-#endif
|
|
-
|
|
/* Boot loader ID as an integer, for the benefit of proc_dointvec */
|
|
int bootloader_type;
|
|
|
|
@@ -150,10 +138,6 @@ struct ist_info ist_info;
|
|
defined(CONFIG_X86_SPEEDSTEP_SMI_MODULE)
|
|
EXPORT_SYMBOL(ist_info);
|
|
#endif
|
|
-struct e820map e820;
|
|
-#ifdef CONFIG_XEN
|
|
-struct e820map machine_e820;
|
|
-#endif
|
|
|
|
extern void early_cpu_init(void);
|
|
extern int root_mountflags;
|
|
@@ -168,209 +152,6 @@ static char command_line[COMMAND_LINE_SI
|
|
|
|
unsigned char __initdata boot_params[PARAM_SIZE];
|
|
|
|
-static struct resource data_resource = {
|
|
- .name = "Kernel data",
|
|
- .start = 0,
|
|
- .end = 0,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
|
|
-};
|
|
-
|
|
-static struct resource code_resource = {
|
|
- .name = "Kernel code",
|
|
- .start = 0,
|
|
- .end = 0,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
|
|
-};
|
|
-
|
|
-static struct resource system_rom_resource = {
|
|
- .name = "System ROM",
|
|
- .start = 0xf0000,
|
|
- .end = 0xfffff,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
|
|
-};
|
|
-
|
|
-static struct resource extension_rom_resource = {
|
|
- .name = "Extension ROM",
|
|
- .start = 0xe0000,
|
|
- .end = 0xeffff,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
|
|
-};
|
|
-
|
|
-static struct resource adapter_rom_resources[] = { {
|
|
- .name = "Adapter ROM",
|
|
- .start = 0xc8000,
|
|
- .end = 0,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
|
|
-}, {
|
|
- .name = "Adapter ROM",
|
|
- .start = 0,
|
|
- .end = 0,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
|
|
-}, {
|
|
- .name = "Adapter ROM",
|
|
- .start = 0,
|
|
- .end = 0,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
|
|
-}, {
|
|
- .name = "Adapter ROM",
|
|
- .start = 0,
|
|
- .end = 0,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
|
|
-}, {
|
|
- .name = "Adapter ROM",
|
|
- .start = 0,
|
|
- .end = 0,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
|
|
-}, {
|
|
- .name = "Adapter ROM",
|
|
- .start = 0,
|
|
- .end = 0,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
|
|
-} };
|
|
-
|
|
-static struct resource video_rom_resource = {
|
|
- .name = "Video ROM",
|
|
- .start = 0xc0000,
|
|
- .end = 0xc7fff,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
|
|
-};
|
|
-
|
|
-static struct resource video_ram_resource = {
|
|
- .name = "Video RAM area",
|
|
- .start = 0xa0000,
|
|
- .end = 0xbffff,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
|
|
-};
|
|
-
|
|
-static struct resource standard_io_resources[] = { {
|
|
- .name = "dma1",
|
|
- .start = 0x0000,
|
|
- .end = 0x001f,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_IO
|
|
-}, {
|
|
- .name = "pic1",
|
|
- .start = 0x0020,
|
|
- .end = 0x0021,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_IO
|
|
-}, {
|
|
- .name = "timer0",
|
|
- .start = 0x0040,
|
|
- .end = 0x0043,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_IO
|
|
-}, {
|
|
- .name = "timer1",
|
|
- .start = 0x0050,
|
|
- .end = 0x0053,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_IO
|
|
-}, {
|
|
- .name = "keyboard",
|
|
- .start = 0x0060,
|
|
- .end = 0x006f,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_IO
|
|
-}, {
|
|
- .name = "dma page reg",
|
|
- .start = 0x0080,
|
|
- .end = 0x008f,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_IO
|
|
-}, {
|
|
- .name = "pic2",
|
|
- .start = 0x00a0,
|
|
- .end = 0x00a1,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_IO
|
|
-}, {
|
|
- .name = "dma2",
|
|
- .start = 0x00c0,
|
|
- .end = 0x00df,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_IO
|
|
-}, {
|
|
- .name = "fpu",
|
|
- .start = 0x00f0,
|
|
- .end = 0x00ff,
|
|
- .flags = IORESOURCE_BUSY | IORESOURCE_IO
|
|
-} };
|
|
-
|
|
-#define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
|
|
-
|
|
-static int __init romchecksum(unsigned char *rom, unsigned long length)
|
|
-{
|
|
- unsigned char *p, sum = 0;
|
|
-
|
|
- for (p = rom; p < rom + length; p++)
|
|
- sum += *p;
|
|
- return sum == 0;
|
|
-}
|
|
-
|
|
-static void __init probe_roms(void)
|
|
-{
|
|
- unsigned long start, length, upper;
|
|
- unsigned char *rom;
|
|
- int i;
|
|
-
|
|
-#ifdef CONFIG_XEN
|
|
- /* Nothing to do if not running in dom0. */
|
|
- if (!is_initial_xendomain())
|
|
- return;
|
|
-#endif
|
|
-
|
|
- /* video rom */
|
|
- upper = adapter_rom_resources[0].start;
|
|
- for (start = video_rom_resource.start; start < upper; start += 2048) {
|
|
- rom = isa_bus_to_virt(start);
|
|
- if (!romsignature(rom))
|
|
- continue;
|
|
-
|
|
- video_rom_resource.start = start;
|
|
-
|
|
- /* 0 < length <= 0x7f * 512, historically */
|
|
- length = rom[2] * 512;
|
|
-
|
|
- /* if checksum okay, trust length byte */
|
|
- if (length && romchecksum(rom, length))
|
|
- video_rom_resource.end = start + length - 1;
|
|
-
|
|
- request_resource(&iomem_resource, &video_rom_resource);
|
|
- break;
|
|
- }
|
|
-
|
|
- start = (video_rom_resource.end + 1 + 2047) & ~2047UL;
|
|
- if (start < upper)
|
|
- start = upper;
|
|
-
|
|
- /* system rom */
|
|
- request_resource(&iomem_resource, &system_rom_resource);
|
|
- upper = system_rom_resource.start;
|
|
-
|
|
- /* check for extension rom (ignore length byte!) */
|
|
- rom = isa_bus_to_virt(extension_rom_resource.start);
|
|
- if (romsignature(rom)) {
|
|
- length = extension_rom_resource.end - extension_rom_resource.start + 1;
|
|
- if (romchecksum(rom, length)) {
|
|
- request_resource(&iomem_resource, &extension_rom_resource);
|
|
- upper = extension_rom_resource.start;
|
|
- }
|
|
- }
|
|
-
|
|
- /* check for adapter roms on 2k boundaries */
|
|
- for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) {
|
|
- rom = isa_bus_to_virt(start);
|
|
- if (!romsignature(rom))
|
|
- continue;
|
|
-
|
|
- /* 0 < length <= 0x7f * 512, historically */
|
|
- length = rom[2] * 512;
|
|
-
|
|
- /* but accept any length that fits if checksum okay */
|
|
- if (!length || start + length > upper || !romchecksum(rom, length))
|
|
- continue;
|
|
-
|
|
- adapter_rom_resources[i].start = start;
|
|
- adapter_rom_resources[i].end = start + length - 1;
|
|
- request_resource(&iomem_resource, &adapter_rom_resources[i]);
|
|
-
|
|
- start = adapter_rom_resources[i++].end & ~2047UL;
|
|
- }
|
|
-}
|
|
-
|
|
/*
|
|
* Point at the empty zero page to start with. We map the real shared_info
|
|
* page as soon as fixmap is up and running.
|
|
@@ -386,353 +167,6 @@ EXPORT_SYMBOL(phys_to_machine_mapping);
|
|
start_info_t *xen_start_info;
|
|
EXPORT_SYMBOL(xen_start_info);
|
|
|
|
-void __init add_memory_region(unsigned long long start,
|
|
- unsigned long long size, int type)
|
|
-{
|
|
- int x;
|
|
-
|
|
- if (!efi_enabled) {
|
|
- x = e820.nr_map;
|
|
-
|
|
- if (x == E820MAX) {
|
|
- printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
|
|
- return;
|
|
- }
|
|
-
|
|
- e820.map[x].addr = start;
|
|
- e820.map[x].size = size;
|
|
- e820.map[x].type = type;
|
|
- e820.nr_map++;
|
|
- }
|
|
-} /* add_memory_region */
|
|
-
|
|
-static void __init limit_regions(unsigned long long size)
|
|
-{
|
|
- unsigned long long current_addr = 0;
|
|
- int i;
|
|
-
|
|
- if (efi_enabled) {
|
|
- efi_memory_desc_t *md;
|
|
- void *p;
|
|
-
|
|
- for (p = memmap.map, i = 0; p < memmap.map_end;
|
|
- p += memmap.desc_size, i++) {
|
|
- md = p;
|
|
- current_addr = md->phys_addr + (md->num_pages << 12);
|
|
- if (md->type == EFI_CONVENTIONAL_MEMORY) {
|
|
- if (current_addr >= size) {
|
|
- md->num_pages -=
|
|
- (((current_addr-size) + PAGE_SIZE-1) >> PAGE_SHIFT);
|
|
- memmap.nr_map = i + 1;
|
|
- return;
|
|
- }
|
|
- }
|
|
- }
|
|
- }
|
|
- for (i = 0; i < e820.nr_map; i++) {
|
|
- current_addr = e820.map[i].addr + e820.map[i].size;
|
|
- if (current_addr < size)
|
|
- continue;
|
|
-
|
|
- if (e820.map[i].type != E820_RAM)
|
|
- continue;
|
|
-
|
|
- if (e820.map[i].addr >= size) {
|
|
- /*
|
|
- * This region starts past the end of the
|
|
- * requested size, skip it completely.
|
|
- */
|
|
- e820.nr_map = i;
|
|
- } else {
|
|
- e820.nr_map = i + 1;
|
|
- e820.map[i].size -= current_addr - size;
|
|
- }
|
|
- return;
|
|
- }
|
|
-#ifdef CONFIG_XEN
|
|
- if (i==e820.nr_map && current_addr < size) {
|
|
- /*
|
|
- * The e820 map finished before our requested size so
|
|
- * extend the final entry to the requested address.
|
|
- */
|
|
- --i;
|
|
- if (e820.map[i].type == E820_RAM)
|
|
- e820.map[i].size -= current_addr - size;
|
|
- else
|
|
- add_memory_region(current_addr, size - current_addr, E820_RAM);
|
|
- }
|
|
-#endif
|
|
-}
|
|
-
|
|
-#define E820_DEBUG 1
|
|
-
|
|
-static void __init print_memory_map(char *who)
|
|
-{
|
|
- int i;
|
|
-
|
|
- for (i = 0; i < e820.nr_map; i++) {
|
|
- printk(" %s: %016Lx - %016Lx ", who,
|
|
- e820.map[i].addr,
|
|
- e820.map[i].addr + e820.map[i].size);
|
|
- switch (e820.map[i].type) {
|
|
- case E820_RAM: printk("(usable)\n");
|
|
- break;
|
|
- case E820_RESERVED:
|
|
- printk("(reserved)\n");
|
|
- break;
|
|
- case E820_ACPI:
|
|
- printk("(ACPI data)\n");
|
|
- break;
|
|
- case E820_NVS:
|
|
- printk("(ACPI NVS)\n");
|
|
- break;
|
|
- default: printk("type %lu\n", e820.map[i].type);
|
|
- break;
|
|
- }
|
|
- }
|
|
-}
|
|
-
|
|
-/*
|
|
- * Sanitize the BIOS e820 map.
|
|
- *
|
|
- * Some e820 responses include overlapping entries. The following
|
|
- * replaces the original e820 map with a new one, removing overlaps.
|
|
- *
|
|
- */
|
|
-struct change_member {
|
|
- struct e820entry *pbios; /* pointer to original bios entry */
|
|
- unsigned long long addr; /* address for this change point */
|
|
-};
|
|
-static struct change_member change_point_list[2*E820MAX] __initdata;
|
|
-static struct change_member *change_point[2*E820MAX] __initdata;
|
|
-static struct e820entry *overlap_list[E820MAX] __initdata;
|
|
-static struct e820entry new_bios[E820MAX] __initdata;
|
|
-
|
|
-int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map)
|
|
-{
|
|
- struct change_member *change_tmp;
|
|
- unsigned long current_type, last_type;
|
|
- unsigned long long last_addr;
|
|
- int chgidx, still_changing;
|
|
- int overlap_entries;
|
|
- int new_bios_entry;
|
|
- int old_nr, new_nr, chg_nr;
|
|
- int i;
|
|
-
|
|
- /*
|
|
- Visually we're performing the following (1,2,3,4 = memory types)...
|
|
-
|
|
- Sample memory map (w/overlaps):
|
|
- ____22__________________
|
|
- ______________________4_
|
|
- ____1111________________
|
|
- _44_____________________
|
|
- 11111111________________
|
|
- ____________________33__
|
|
- ___________44___________
|
|
- __________33333_________
|
|
- ______________22________
|
|
- ___________________2222_
|
|
- _________111111111______
|
|
- _____________________11_
|
|
- _________________4______
|
|
-
|
|
- Sanitized equivalent (no overlap):
|
|
- 1_______________________
|
|
- _44_____________________
|
|
- ___1____________________
|
|
- ____22__________________
|
|
- ______11________________
|
|
- _________1______________
|
|
- __________3_____________
|
|
- ___________44___________
|
|
- _____________33_________
|
|
- _______________2________
|
|
- ________________1_______
|
|
- _________________4______
|
|
- ___________________2____
|
|
- ____________________33__
|
|
- ______________________4_
|
|
- */
|
|
-
|
|
- /* if there's only one memory region, don't bother */
|
|
- if (*pnr_map < 2)
|
|
- return -1;
|
|
-
|
|
- old_nr = *pnr_map;
|
|
-
|
|
- /* bail out if we find any unreasonable addresses in bios map */
|
|
- for (i=0; i<old_nr; i++)
|
|
- if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr)
|
|
- return -1;
|
|
-
|
|
- /* create pointers for initial change-point information (for sorting) */
|
|
- for (i=0; i < 2*old_nr; i++)
|
|
- change_point[i] = &change_point_list[i];
|
|
-
|
|
- /* record all known change-points (starting and ending addresses),
|
|
- omitting those that are for empty memory regions */
|
|
- chgidx = 0;
|
|
- for (i=0; i < old_nr; i++) {
|
|
- if (biosmap[i].size != 0) {
|
|
- change_point[chgidx]->addr = biosmap[i].addr;
|
|
- change_point[chgidx++]->pbios = &biosmap[i];
|
|
- change_point[chgidx]->addr = biosmap[i].addr + biosmap[i].size;
|
|
- change_point[chgidx++]->pbios = &biosmap[i];
|
|
- }
|
|
- }
|
|
- chg_nr = chgidx; /* true number of change-points */
|
|
-
|
|
- /* sort change-point list by memory addresses (low -> high) */
|
|
- still_changing = 1;
|
|
- while (still_changing) {
|
|
- still_changing = 0;
|
|
- for (i=1; i < chg_nr; i++) {
|
|
- /* if <current_addr> > <last_addr>, swap */
|
|
- /* or, if current=<start_addr> & last=<end_addr>, swap */
|
|
- if ((change_point[i]->addr < change_point[i-1]->addr) ||
|
|
- ((change_point[i]->addr == change_point[i-1]->addr) &&
|
|
- (change_point[i]->addr == change_point[i]->pbios->addr) &&
|
|
- (change_point[i-1]->addr != change_point[i-1]->pbios->addr))
|
|
- )
|
|
- {
|
|
- change_tmp = change_point[i];
|
|
- change_point[i] = change_point[i-1];
|
|
- change_point[i-1] = change_tmp;
|
|
- still_changing=1;
|
|
- }
|
|
- }
|
|
- }
|
|
-
|
|
- /* create a new bios memory map, removing overlaps */
|
|
- overlap_entries=0; /* number of entries in the overlap table */
|
|
- new_bios_entry=0; /* index for creating new bios map entries */
|
|
- last_type = 0; /* start with undefined memory type */
|
|
- last_addr = 0; /* start with 0 as last starting address */
|
|
- /* loop through change-points, determining affect on the new bios map */
|
|
- for (chgidx=0; chgidx < chg_nr; chgidx++)
|
|
- {
|
|
- /* keep track of all overlapping bios entries */
|
|
- if (change_point[chgidx]->addr == change_point[chgidx]->pbios->addr)
|
|
- {
|
|
- /* add map entry to overlap list (> 1 entry implies an overlap) */
|
|
- overlap_list[overlap_entries++]=change_point[chgidx]->pbios;
|
|
- }
|
|
- else
|
|
- {
|
|
- /* remove entry from list (order independent, so swap with last) */
|
|
- for (i=0; i<overlap_entries; i++)
|
|
- {
|
|
- if (overlap_list[i] == change_point[chgidx]->pbios)
|
|
- overlap_list[i] = overlap_list[overlap_entries-1];
|
|
- }
|
|
- overlap_entries--;
|
|
- }
|
|
- /* if there are overlapping entries, decide which "type" to use */
|
|
- /* (larger value takes precedence -- 1=usable, 2,3,4,4+=unusable) */
|
|
- current_type = 0;
|
|
- for (i=0; i<overlap_entries; i++)
|
|
- if (overlap_list[i]->type > current_type)
|
|
- current_type = overlap_list[i]->type;
|
|
- /* continue building up new bios map based on this information */
|
|
- if (current_type != last_type) {
|
|
- if (last_type != 0) {
|
|
- new_bios[new_bios_entry].size =
|
|
- change_point[chgidx]->addr - last_addr;
|
|
- /* move forward only if the new size was non-zero */
|
|
- if (new_bios[new_bios_entry].size != 0)
|
|
- if (++new_bios_entry >= E820MAX)
|
|
- break; /* no more space left for new bios entries */
|
|
- }
|
|
- if (current_type != 0) {
|
|
- new_bios[new_bios_entry].addr = change_point[chgidx]->addr;
|
|
- new_bios[new_bios_entry].type = current_type;
|
|
- last_addr=change_point[chgidx]->addr;
|
|
- }
|
|
- last_type = current_type;
|
|
- }
|
|
- }
|
|
- new_nr = new_bios_entry; /* retain count for new bios entries */
|
|
-
|
|
- /* copy new bios mapping into original location */
|
|
- memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
|
|
- *pnr_map = new_nr;
|
|
-
|
|
- return 0;
|
|
-}
|
|
-
|
|
-/*
|
|
- * Copy the BIOS e820 map into a safe place.
|
|
- *
|
|
- * Sanity-check it while we're at it..
|
|
- *
|
|
- * If we're lucky and live on a modern system, the setup code
|
|
- * will have given us a memory map that we can use to properly
|
|
- * set up memory. If we aren't, we'll fake a memory map.
|
|
- *
|
|
- * We check to see that the memory map contains at least 2 elements
|
|
- * before we'll use it, because the detection code in setup.S may
|
|
- * not be perfect and most every PC known to man has two memory
|
|
- * regions: one from 0 to 640k, and one from 1mb up. (The IBM
|
|
- * thinkpad 560x, for example, does not cooperate with the memory
|
|
- * detection code.)
|
|
- */
|
|
-int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
|
|
-{
|
|
-#ifndef CONFIG_XEN
|
|
- /* Only one memory region (or negative)? Ignore it */
|
|
- if (nr_map < 2)
|
|
- return -1;
|
|
-#else
|
|
- BUG_ON(nr_map < 1);
|
|
-#endif
|
|
-
|
|
- do {
|
|
- unsigned long long start = biosmap->addr;
|
|
- unsigned long long size = biosmap->size;
|
|
- unsigned long long end = start + size;
|
|
- unsigned long type = biosmap->type;
|
|
-
|
|
- /* Overflow in 64 bits? Ignore the memory map. */
|
|
- if (start > end)
|
|
- return -1;
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
- /*
|
|
- * Some BIOSes claim RAM in the 640k - 1M region.
|
|
- * Not right. Fix it up.
|
|
- */
|
|
- if (type == E820_RAM) {
|
|
- if (start < 0x100000ULL && end > 0xA0000ULL) {
|
|
- if (start < 0xA0000ULL)
|
|
- add_memory_region(start, 0xA0000ULL-start, type);
|
|
- if (end <= 0x100000ULL)
|
|
- continue;
|
|
- start = 0x100000ULL;
|
|
- size = end - start;
|
|
- }
|
|
- }
|
|
-#endif
|
|
- add_memory_region(start, size, type);
|
|
- } while (biosmap++,--nr_map);
|
|
-
|
|
-#ifdef CONFIG_XEN
|
|
- if (is_initial_xendomain()) {
|
|
- struct xen_memory_map memmap;
|
|
-
|
|
- memmap.nr_entries = E820MAX;
|
|
- set_xen_guest_handle(memmap.buffer, machine_e820.map);
|
|
-
|
|
- if (HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap))
|
|
- BUG();
|
|
- machine_e820.nr_map = memmap.nr_entries;
|
|
- } else
|
|
- machine_e820 = e820;
|
|
-#endif
|
|
-
|
|
- return 0;
|
|
-}
|
|
-
|
|
#if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE)
|
|
struct edd edd;
|
|
#ifdef CONFIG_EDD_MODULE
|
|
@@ -758,7 +192,7 @@ static inline void copy_edd(void)
|
|
}
|
|
#endif
|
|
|
|
-static int __initdata user_defined_memmap = 0;
|
|
+int __initdata user_defined_memmap = 0;
|
|
|
|
/*
|
|
* "mem=nopentium" disables the 4MB page tables.
|
|
@@ -795,51 +229,6 @@ static int __init parse_mem(char *arg)
|
|
}
|
|
early_param("mem", parse_mem);
|
|
|
|
-static int __init parse_memmap(char *arg)
|
|
-{
|
|
- if (!arg)
|
|
- return -EINVAL;
|
|
-
|
|
- if (strcmp(arg, "exactmap") == 0) {
|
|
-#ifdef CONFIG_CRASH_DUMP
|
|
- /* If we are doing a crash dump, we
|
|
- * still need to know the real mem
|
|
- * size before original memory map is
|
|
- * reset.
|
|
- */
|
|
- find_max_pfn();
|
|
- saved_max_pfn = max_pfn;
|
|
-#endif
|
|
- e820.nr_map = 0;
|
|
- user_defined_memmap = 1;
|
|
- } else {
|
|
- /* If the user specifies memory size, we
|
|
- * limit the BIOS-provided memory map to
|
|
- * that size. exactmap can be used to specify
|
|
- * the exact map. mem=number can be used to
|
|
- * trim the existing memory map.
|
|
- */
|
|
- unsigned long long start_at, mem_size;
|
|
-
|
|
- mem_size = memparse(arg, &arg);
|
|
- if (*arg == '@') {
|
|
- start_at = memparse(arg+1, &arg);
|
|
- add_memory_region(start_at, mem_size, E820_RAM);
|
|
- } else if (*arg == '#') {
|
|
- start_at = memparse(arg+1, &arg);
|
|
- add_memory_region(start_at, mem_size, E820_ACPI);
|
|
- } else if (*arg == '$') {
|
|
- start_at = memparse(arg+1, &arg);
|
|
- add_memory_region(start_at, mem_size, E820_RESERVED);
|
|
- } else {
|
|
- limit_regions(mem_size);
|
|
- user_defined_memmap = 1;
|
|
- }
|
|
- }
|
|
- return 0;
|
|
-}
|
|
-early_param("memmap", parse_memmap);
|
|
-
|
|
#ifdef CONFIG_PROC_VMCORE
|
|
/* elfcorehdr= specifies the location of elf core header
|
|
* stored by the crashed kernel.
|
|
@@ -906,127 +295,6 @@ early_param("reservetop", parse_reservet
|
|
#endif
|
|
|
|
/*
|
|
- * Callback for efi_memory_walk.
|
|
- */
|
|
-static int __init
|
|
-efi_find_max_pfn(unsigned long start, unsigned long end, void *arg)
|
|
-{
|
|
- unsigned long *max_pfn = arg, pfn;
|
|
-
|
|
- if (start < end) {
|
|
- pfn = PFN_UP(end -1);
|
|
- if (pfn > *max_pfn)
|
|
- *max_pfn = pfn;
|
|
- }
|
|
- return 0;
|
|
-}
|
|
-
|
|
-static int __init
|
|
-efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg)
|
|
-{
|
|
- memory_present(0, PFN_UP(start), PFN_DOWN(end));
|
|
- return 0;
|
|
-}
|
|
-
|
|
-/*
|
|
- * This function checks if any part of the range <start,end> is mapped
|
|
- * with type.
|
|
- */
|
|
-int
|
|
-e820_any_mapped(u64 start, u64 end, unsigned type)
|
|
-{
|
|
- int i;
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
- for (i = 0; i < e820.nr_map; i++) {
|
|
- const struct e820entry *ei = &e820.map[i];
|
|
-#else
|
|
- if (!is_initial_xendomain())
|
|
- return 0;
|
|
- for (i = 0; i < machine_e820.nr_map; ++i) {
|
|
- const struct e820entry *ei = &machine_e820.map[i];
|
|
-#endif
|
|
-
|
|
- if (type && ei->type != type)
|
|
- continue;
|
|
- if (ei->addr >= end || ei->addr + ei->size <= start)
|
|
- continue;
|
|
- return 1;
|
|
- }
|
|
- return 0;
|
|
-}
|
|
-EXPORT_SYMBOL_GPL(e820_any_mapped);
|
|
-
|
|
- /*
|
|
- * This function checks if the entire range <start,end> is mapped with type.
|
|
- *
|
|
- * Note: this function only works correct if the e820 table is sorted and
|
|
- * not-overlapping, which is the case
|
|
- */
|
|
-int __init
|
|
-e820_all_mapped(unsigned long s, unsigned long e, unsigned type)
|
|
-{
|
|
- u64 start = s;
|
|
- u64 end = e;
|
|
- int i;
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
- for (i = 0; i < e820.nr_map; i++) {
|
|
- struct e820entry *ei = &e820.map[i];
|
|
-#else
|
|
- if (!is_initial_xendomain())
|
|
- return 0;
|
|
- for (i = 0; i < machine_e820.nr_map; ++i) {
|
|
- const struct e820entry *ei = &machine_e820.map[i];
|
|
-#endif
|
|
- if (type && ei->type != type)
|
|
- continue;
|
|
- /* is the region (part) in overlap with the current region ?*/
|
|
- if (ei->addr >= end || ei->addr + ei->size <= start)
|
|
- continue;
|
|
- /* if the region is at the beginning of <start,end> we move
|
|
- * start to the end of the region since it's ok until there
|
|
- */
|
|
- if (ei->addr <= start)
|
|
- start = ei->addr + ei->size;
|
|
- /* if start is now at or beyond end, we're done, full
|
|
- * coverage */
|
|
- if (start >= end)
|
|
- return 1; /* we're done */
|
|
- }
|
|
- return 0;
|
|
-}
|
|
-
|
|
-/*
|
|
- * Find the highest page frame number we have available
|
|
- */
|
|
-void __init find_max_pfn(void)
|
|
-{
|
|
- int i;
|
|
-
|
|
- max_pfn = 0;
|
|
- if (efi_enabled) {
|
|
- efi_memmap_walk(efi_find_max_pfn, &max_pfn);
|
|
- efi_memmap_walk(efi_memory_present_wrapper, NULL);
|
|
- return;
|
|
- }
|
|
-
|
|
- for (i = 0; i < e820.nr_map; i++) {
|
|
- unsigned long start, end;
|
|
- /* RAM? */
|
|
- if (e820.map[i].type != E820_RAM)
|
|
- continue;
|
|
- start = PFN_UP(e820.map[i].addr);
|
|
- end = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
|
|
- if (start >= end)
|
|
- continue;
|
|
- if (end > max_pfn)
|
|
- max_pfn = end;
|
|
- memory_present(0, start, end);
|
|
- }
|
|
-}
|
|
-
|
|
-/*
|
|
* Determine low and high memory ranges:
|
|
*/
|
|
unsigned long __init find_max_low_pfn(void)
|
|
@@ -1085,77 +353,6 @@ unsigned long __init find_max_low_pfn(vo
|
|
return max_low_pfn;
|
|
}
|
|
|
|
-/*
|
|
- * Free all available memory for boot time allocation. Used
|
|
- * as a callback function by efi_memory_walk()
|
|
- */
|
|
-
|
|
-static int __init
|
|
-free_available_memory(unsigned long start, unsigned long end, void *arg)
|
|
-{
|
|
- /* check max_low_pfn */
|
|
- if (start >= (max_low_pfn << PAGE_SHIFT))
|
|
- return 0;
|
|
- if (end >= (max_low_pfn << PAGE_SHIFT))
|
|
- end = max_low_pfn << PAGE_SHIFT;
|
|
- if (start < end)
|
|
- free_bootmem(start, end - start);
|
|
-
|
|
- return 0;
|
|
-}
|
|
-/*
|
|
- * Register fully available low RAM pages with the bootmem allocator.
|
|
- */
|
|
-static void __init register_bootmem_low_pages(unsigned long max_low_pfn)
|
|
-{
|
|
- int i;
|
|
-
|
|
- if (efi_enabled) {
|
|
- efi_memmap_walk(free_available_memory, NULL);
|
|
- return;
|
|
- }
|
|
- for (i = 0; i < e820.nr_map; i++) {
|
|
- unsigned long curr_pfn, last_pfn, size;
|
|
- /*
|
|
- * Reserve usable low memory
|
|
- */
|
|
- if (e820.map[i].type != E820_RAM)
|
|
- continue;
|
|
- /*
|
|
- * We are rounding up the start address of usable memory:
|
|
- */
|
|
- curr_pfn = PFN_UP(e820.map[i].addr);
|
|
- if (curr_pfn >= max_low_pfn)
|
|
- continue;
|
|
- /*
|
|
- * ... and at the end of the usable range downwards:
|
|
- */
|
|
- last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
|
|
-
|
|
-#ifdef CONFIG_XEN
|
|
- /*
|
|
- * Truncate to the number of actual pages currently
|
|
- * present.
|
|
- */
|
|
- if (last_pfn > xen_start_info->nr_pages)
|
|
- last_pfn = xen_start_info->nr_pages;
|
|
-#endif
|
|
-
|
|
- if (last_pfn > max_low_pfn)
|
|
- last_pfn = max_low_pfn;
|
|
-
|
|
- /*
|
|
- * .. finally, did all the rounding and playing
|
|
- * around just make the area go away?
|
|
- */
|
|
- if (last_pfn <= curr_pfn)
|
|
- continue;
|
|
-
|
|
- size = last_pfn - curr_pfn;
|
|
- free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
|
|
- }
|
|
-}
|
|
-
|
|
#ifndef CONFIG_XEN
|
|
/*
|
|
* workaround for Dell systems that neglect to reserve EBDA
|
|
@@ -1247,8 +444,8 @@ void __init setup_bootmem_allocator(void
|
|
* the (very unlikely) case of us accidentally initializing the
|
|
* bootmem allocator with an invalid RAM area.
|
|
*/
|
|
- reserve_bootmem(__PHYSICAL_START, (PFN_PHYS(min_low_pfn) +
|
|
- bootmap_size + PAGE_SIZE-1) - (__PHYSICAL_START));
|
|
+ reserve_bootmem(__pa_symbol(_text), (PFN_PHYS(min_low_pfn) +
|
|
+ bootmap_size + PAGE_SIZE-1) - __pa_symbol(_text));
|
|
|
|
#ifndef CONFIG_XEN
|
|
/*
|
|
@@ -1330,160 +527,6 @@ void __init remapped_pgdat_init(void)
|
|
}
|
|
}
|
|
|
|
-/*
|
|
- * Request address space for all standard RAM and ROM resources
|
|
- * and also for regions reported as reserved by the e820.
|
|
- */
|
|
-static void __init
|
|
-legacy_init_iomem_resources(struct e820entry *e820, int nr_map,
|
|
- struct resource *code_resource,
|
|
- struct resource *data_resource)
|
|
-{
|
|
- int i;
|
|
-
|
|
- probe_roms();
|
|
-
|
|
- for (i = 0; i < nr_map; i++) {
|
|
- struct resource *res;
|
|
-#ifndef CONFIG_RESOURCES_64BIT
|
|
- if (e820[i].addr + e820[i].size > 0x100000000ULL)
|
|
- continue;
|
|
-#endif
|
|
- res = kzalloc(sizeof(struct resource), GFP_ATOMIC);
|
|
- switch (e820[i].type) {
|
|
- case E820_RAM: res->name = "System RAM"; break;
|
|
- case E820_ACPI: res->name = "ACPI Tables"; break;
|
|
- case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
|
|
- default: res->name = "reserved";
|
|
- }
|
|
- res->start = e820[i].addr;
|
|
- res->end = res->start + e820[i].size - 1;
|
|
- res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
|
|
- if (request_resource(&iomem_resource, res)) {
|
|
- kfree(res);
|
|
- continue;
|
|
- }
|
|
- if (e820[i].type == E820_RAM) {
|
|
- /*
|
|
- * We don't know which RAM region contains kernel data,
|
|
- * so we try it repeatedly and let the resource manager
|
|
- * test it.
|
|
- */
|
|
-#ifndef CONFIG_XEN
|
|
- request_resource(res, code_resource);
|
|
- request_resource(res, data_resource);
|
|
-#endif
|
|
-#ifdef CONFIG_KEXEC
|
|
- if (crashk_res.start != crashk_res.end)
|
|
- request_resource(res, &crashk_res);
|
|
-#ifdef CONFIG_XEN
|
|
- xen_machine_kexec_register_resources(res);
|
|
-#endif
|
|
-#endif
|
|
- }
|
|
- }
|
|
-}
|
|
-
|
|
-/*
|
|
- * Locate a unused range of the physical address space below 4G which
|
|
- * can be used for PCI mappings.
|
|
- */
|
|
-static void __init
|
|
-e820_setup_gap(struct e820entry *e820, int nr_map)
|
|
-{
|
|
- unsigned long gapstart, gapsize, round;
|
|
- unsigned long long last;
|
|
- int i;
|
|
-
|
|
- /*
|
|
- * Search for the bigest gap in the low 32 bits of the e820
|
|
- * memory space.
|
|
- */
|
|
- last = 0x100000000ull;
|
|
- gapstart = 0x10000000;
|
|
- gapsize = 0x400000;
|
|
- i = nr_map;
|
|
- while (--i >= 0) {
|
|
- unsigned long long start = e820[i].addr;
|
|
- unsigned long long end = start + e820[i].size;
|
|
-
|
|
- /*
|
|
- * Since "last" is at most 4GB, we know we'll
|
|
- * fit in 32 bits if this condition is true
|
|
- */
|
|
- if (last > end) {
|
|
- unsigned long gap = last - end;
|
|
-
|
|
- if (gap > gapsize) {
|
|
- gapsize = gap;
|
|
- gapstart = end;
|
|
- }
|
|
- }
|
|
- if (start < last)
|
|
- last = start;
|
|
- }
|
|
-
|
|
- /*
|
|
- * See how much we want to round up: start off with
|
|
- * rounding to the next 1MB area.
|
|
- */
|
|
- round = 0x100000;
|
|
- while ((gapsize >> 4) > round)
|
|
- round += round;
|
|
- /* Fun with two's complement */
|
|
- pci_mem_start = (gapstart + round) & -round;
|
|
-
|
|
- printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n",
|
|
- pci_mem_start, gapstart, gapsize);
|
|
-}
|
|
-
|
|
-/*
|
|
- * Request address space for all standard resources
|
|
- *
|
|
- * This is called just before pcibios_init(), which is also a
|
|
- * subsys_initcall, but is linked in later (in arch/i386/pci/common.c).
|
|
- */
|
|
-static int __init request_standard_resources(void)
|
|
-{
|
|
- int i;
|
|
-
|
|
- /* Nothing to do if not running in dom0. */
|
|
- if (!is_initial_xendomain())
|
|
- return 0;
|
|
-
|
|
- printk("Setting up standard PCI resources\n");
|
|
-#ifdef CONFIG_XEN
|
|
- legacy_init_iomem_resources(machine_e820.map, machine_e820.nr_map,
|
|
- &code_resource, &data_resource);
|
|
-#else
|
|
- if (efi_enabled)
|
|
- efi_initialize_iomem_resources(&code_resource, &data_resource);
|
|
- else
|
|
- legacy_init_iomem_resources(e820.map, e820.nr_map,
|
|
- &code_resource, &data_resource);
|
|
-#endif
|
|
-
|
|
- /* EFI systems may still have VGA */
|
|
- request_resource(&iomem_resource, &video_ram_resource);
|
|
-
|
|
- /* request I/O space for devices used on all i[345]86 PCs */
|
|
- for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
|
|
- request_resource(&ioport_resource, &standard_io_resources[i]);
|
|
- return 0;
|
|
-}
|
|
-
|
|
-subsys_initcall(request_standard_resources);
|
|
-
|
|
-static void __init register_memory(void)
|
|
-{
|
|
-#ifdef CONFIG_XEN
|
|
- if (is_initial_xendomain())
|
|
- e820_setup_gap(machine_e820.map, machine_e820.nr_map);
|
|
- else
|
|
-#endif
|
|
- e820_setup_gap(e820.map, e820.nr_map);
|
|
-}
|
|
-
|
|
#ifdef CONFIG_MCA
|
|
static void set_mca_bus(int x)
|
|
{
|
|
@@ -1493,6 +536,12 @@ static void set_mca_bus(int x)
|
|
static void set_mca_bus(int x) { }
|
|
#endif
|
|
|
|
+/* Overridden in paravirt.c if CONFIG_PARAVIRT */
|
|
+char * __init __attribute__((weak)) memory_setup(void)
|
|
+{
|
|
+ return machine_specific_memory_setup();
|
|
+}
|
|
+
|
|
/*
|
|
* Determine if we were loaded by an EFI loader. If so, then we have also been
|
|
* passed the efi memmap, systab, etc., so we should use these data structures
|
|
@@ -1580,7 +629,7 @@ void __init setup_arch(char **cmdline_p)
|
|
efi_init();
|
|
else {
|
|
printk(KERN_INFO "BIOS-provided physical RAM map:\n");
|
|
- print_memory_map(machine_specific_memory_setup());
|
|
+ print_memory_map(memory_setup());
|
|
}
|
|
|
|
copy_edd();
|
|
@@ -1759,7 +808,7 @@ void __init setup_arch(char **cmdline_p)
|
|
get_smp_config();
|
|
#endif
|
|
|
|
- register_memory();
|
|
+ e820_register_memory();
|
|
|
|
if (is_initial_xendomain()) {
|
|
#ifdef CONFIG_VT
|
|
--- head-2010-05-25.orig/arch/x86/kernel/smp_32-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/smp_32-xen.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -659,6 +659,10 @@ int smp_call_function_single(int cpu, vo
|
|
put_cpu();
|
|
return -EBUSY;
|
|
}
|
|
+
|
|
+ /* Can deadlock when called with interrupts disabled */
|
|
+ WARN_ON(irqs_disabled());
|
|
+
|
|
spin_lock_bh(&call_lock);
|
|
__smp_call_function_single(cpu, func, info, nonatomic, wait);
|
|
spin_unlock_bh(&call_lock);
|
|
--- head-2010-05-25.orig/arch/x86/kernel/time-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/time-xen.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -60,6 +60,7 @@
|
|
#include <asm/uaccess.h>
|
|
#include <asm/processor.h>
|
|
#include <asm/timer.h>
|
|
+#include <asm/time.h>
|
|
#include <asm/sections.h>
|
|
|
|
#include "mach_time.h"
|
|
@@ -128,11 +129,11 @@ static DEFINE_PER_CPU(struct vcpu_runsta
|
|
/* Must be signed, as it's compared with s64 quantities which can be -ve. */
|
|
#define NS_PER_TICK (1000000000LL/HZ)
|
|
|
|
-static void __clock_was_set(void *unused)
|
|
+static void __clock_was_set(struct work_struct *unused)
|
|
{
|
|
clock_was_set();
|
|
}
|
|
-static DECLARE_WORK(clock_was_set_work, __clock_was_set, NULL);
|
|
+static DECLARE_WORK(clock_was_set_work, __clock_was_set);
|
|
|
|
/*
|
|
* GCC 4.3 can turn loops over an induction variable into division. We do
|
|
@@ -527,10 +528,7 @@ static int set_rtc_mmss(unsigned long no
|
|
/* gets recalled with irq locally disabled */
|
|
/* XXX - does irqsave resolve this? -johnstul */
|
|
spin_lock_irqsave(&rtc_lock, flags);
|
|
- if (efi_enabled)
|
|
- retval = efi_set_rtc_mmss(nowtime);
|
|
- else
|
|
- retval = mach_set_rtc_mmss(nowtime);
|
|
+ retval = set_wallclock(nowtime);
|
|
spin_unlock_irqrestore(&rtc_lock, flags);
|
|
|
|
return retval;
|
|
@@ -858,10 +856,7 @@ unsigned long get_cmos_time(void)
|
|
|
|
spin_lock_irqsave(&rtc_lock, flags);
|
|
|
|
- if (efi_enabled)
|
|
- retval = efi_get_time();
|
|
- else
|
|
- retval = mach_get_cmos_time();
|
|
+ retval = get_wallclock();
|
|
|
|
spin_unlock_irqrestore(&rtc_lock, flags);
|
|
|
|
@@ -963,7 +958,7 @@ static void __init hpet_time_init(void)
|
|
printk("Using HPET for base-timer\n");
|
|
}
|
|
|
|
- time_init_hook();
|
|
+ do_time_init();
|
|
}
|
|
#endif
|
|
|
|
--- head-2010-05-25.orig/arch/x86/kernel/traps_32-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/traps_32-xen.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -29,6 +29,8 @@
|
|
#include <linux/kexec.h>
|
|
#include <linux/unwind.h>
|
|
#include <linux/uaccess.h>
|
|
+#include <linux/nmi.h>
|
|
+#include <linux/bug.h>
|
|
|
|
#ifdef CONFIG_EISA
|
|
#include <linux/ioport.h>
|
|
@@ -61,9 +63,6 @@ int panic_on_unrecovered_nmi;
|
|
|
|
asmlinkage int system_call(void);
|
|
|
|
-struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 },
|
|
- { 0, 0 }, { 0, 0 } };
|
|
-
|
|
/* Do we ignore FPU interrupts ? */
|
|
char ignore_fpu_irq = 0;
|
|
|
|
@@ -100,12 +99,7 @@ asmlinkage void fixup_4gb_segment(void);
|
|
#endif
|
|
asmlinkage void machine_check(void);
|
|
|
|
-static int kstack_depth_to_print = 24;
|
|
-#ifdef CONFIG_STACK_UNWIND
|
|
-static int call_trace = 1;
|
|
-#else
|
|
-#define call_trace (-1)
|
|
-#endif
|
|
+int kstack_depth_to_print = 24;
|
|
ATOMIC_NOTIFIER_HEAD(i386die_chain);
|
|
|
|
int register_die_notifier(struct notifier_block *nb)
|
|
@@ -159,25 +153,7 @@ static inline unsigned long print_contex
|
|
return ebp;
|
|
}
|
|
|
|
-struct ops_and_data {
|
|
- struct stacktrace_ops *ops;
|
|
- void *data;
|
|
-};
|
|
-
|
|
-static asmlinkage int
|
|
-dump_trace_unwind(struct unwind_frame_info *info, void *data)
|
|
-{
|
|
- struct ops_and_data *oad = (struct ops_and_data *)data;
|
|
- int n = 0;
|
|
-
|
|
- while (unwind(info) == 0 && UNW_PC(info)) {
|
|
- n++;
|
|
- oad->ops->address(oad->data, UNW_PC(info));
|
|
- if (arch_unw_user_mode(info))
|
|
- break;
|
|
- }
|
|
- return n;
|
|
-}
|
|
+#define MSG(msg) ops->warning(data, msg)
|
|
|
|
void dump_trace(struct task_struct *task, struct pt_regs *regs,
|
|
unsigned long *stack,
|
|
@@ -188,39 +164,6 @@ void dump_trace(struct task_struct *task
|
|
if (!task)
|
|
task = current;
|
|
|
|
- if (call_trace >= 0) {
|
|
- int unw_ret = 0;
|
|
- struct unwind_frame_info info;
|
|
- struct ops_and_data oad = { .ops = ops, .data = data };
|
|
-
|
|
- if (regs) {
|
|
- if (unwind_init_frame_info(&info, task, regs) == 0)
|
|
- unw_ret = dump_trace_unwind(&info, &oad);
|
|
- } else if (task == current)
|
|
- unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad);
|
|
- else {
|
|
- if (unwind_init_blocked(&info, task) == 0)
|
|
- unw_ret = dump_trace_unwind(&info, &oad);
|
|
- }
|
|
- if (unw_ret > 0) {
|
|
- if (call_trace == 1 && !arch_unw_user_mode(&info)) {
|
|
- ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n",
|
|
- UNW_PC(&info));
|
|
- if (UNW_SP(&info) >= PAGE_OFFSET) {
|
|
- ops->warning(data, "Leftover inexact backtrace:\n");
|
|
- stack = (void *)UNW_SP(&info);
|
|
- if (!stack)
|
|
- return;
|
|
- ebp = UNW_FP(&info);
|
|
- } else
|
|
- ops->warning(data, "Full inexact backtrace again:\n");
|
|
- } else if (call_trace >= 1)
|
|
- return;
|
|
- else
|
|
- ops->warning(data, "Full inexact backtrace again:\n");
|
|
- } else
|
|
- ops->warning(data, "Inexact backtrace:\n");
|
|
- }
|
|
if (!stack) {
|
|
unsigned long dummy;
|
|
stack = &dummy;
|
|
@@ -253,6 +196,7 @@ void dump_trace(struct task_struct *task
|
|
stack = (unsigned long*)context->previous_esp;
|
|
if (!stack)
|
|
break;
|
|
+ touch_nmi_watchdog();
|
|
}
|
|
}
|
|
EXPORT_SYMBOL(dump_trace);
|
|
@@ -385,7 +329,7 @@ void show_registers(struct pt_regs *regs
|
|
* time of the fault..
|
|
*/
|
|
if (in_kernel) {
|
|
- u8 __user *eip;
|
|
+ u8 *eip;
|
|
int code_bytes = 64;
|
|
unsigned char c;
|
|
|
|
@@ -394,18 +338,20 @@ void show_registers(struct pt_regs *regs
|
|
|
|
printk(KERN_EMERG "Code: ");
|
|
|
|
- eip = (u8 __user *)regs->eip - 43;
|
|
- if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) {
|
|
+ eip = (u8 *)regs->eip - 43;
|
|
+ if (eip < (u8 *)PAGE_OFFSET ||
|
|
+ probe_kernel_address(eip, c)) {
|
|
/* try starting at EIP */
|
|
- eip = (u8 __user *)regs->eip;
|
|
+ eip = (u8 *)regs->eip;
|
|
code_bytes = 32;
|
|
}
|
|
for (i = 0; i < code_bytes; i++, eip++) {
|
|
- if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) {
|
|
+ if (eip < (u8 *)PAGE_OFFSET ||
|
|
+ probe_kernel_address(eip, c)) {
|
|
printk(" Bad EIP value.");
|
|
break;
|
|
}
|
|
- if (eip == (u8 __user *)regs->eip)
|
|
+ if (eip == (u8 *)regs->eip)
|
|
printk("<%02x> ", c);
|
|
else
|
|
printk("%02x ", c);
|
|
@@ -414,43 +360,22 @@ void show_registers(struct pt_regs *regs
|
|
printk("\n");
|
|
}
|
|
|
|
-static void handle_BUG(struct pt_regs *regs)
|
|
+int is_valid_bugaddr(unsigned long eip)
|
|
{
|
|
- unsigned long eip = regs->eip;
|
|
unsigned short ud2;
|
|
|
|
if (eip < PAGE_OFFSET)
|
|
- return;
|
|
- if (probe_kernel_address((unsigned short __user *)eip, ud2))
|
|
- return;
|
|
- if (ud2 != 0x0b0f)
|
|
- return;
|
|
+ return 0;
|
|
+ if (probe_kernel_address((unsigned short *)eip, ud2))
|
|
+ return 0;
|
|
|
|
- printk(KERN_EMERG "------------[ cut here ]------------\n");
|
|
-
|
|
-#ifdef CONFIG_DEBUG_BUGVERBOSE
|
|
- do {
|
|
- unsigned short line;
|
|
- char *file;
|
|
- char c;
|
|
-
|
|
- if (probe_kernel_address((unsigned short __user *)(eip + 2),
|
|
- line))
|
|
- break;
|
|
- if (__get_user(file, (char * __user *)(eip + 4)) ||
|
|
- (unsigned long)file < PAGE_OFFSET || __get_user(c, file))
|
|
- file = "<bad filename>";
|
|
-
|
|
- printk(KERN_EMERG "kernel BUG at %s:%d!\n", file, line);
|
|
- return;
|
|
- } while (0);
|
|
-#endif
|
|
- printk(KERN_EMERG "Kernel BUG at [verbose debug info unavailable]\n");
|
|
+ return ud2 == 0x0b0f;
|
|
}
|
|
|
|
-/* This is gone through when something in the kernel
|
|
- * has done something bad and is about to be terminated.
|
|
-*/
|
|
+/*
|
|
+ * This is gone through when something in the kernel has done something bad and
|
|
+ * is about to be terminated.
|
|
+ */
|
|
void die(const char * str, struct pt_regs * regs, long err)
|
|
{
|
|
static struct {
|
|
@@ -458,7 +383,7 @@ void die(const char * str, struct pt_reg
|
|
u32 lock_owner;
|
|
int lock_owner_depth;
|
|
} die = {
|
|
- .lock = SPIN_LOCK_UNLOCKED,
|
|
+ .lock = __SPIN_LOCK_UNLOCKED(die.lock),
|
|
.lock_owner = -1,
|
|
.lock_owner_depth = 0
|
|
};
|
|
@@ -482,7 +407,8 @@ void die(const char * str, struct pt_reg
|
|
unsigned long esp;
|
|
unsigned short ss;
|
|
|
|
- handle_BUG(regs);
|
|
+ report_bug(regs->eip);
|
|
+
|
|
printk(KERN_EMERG "%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter);
|
|
#ifdef CONFIG_PREEMPT
|
|
printk(KERN_EMERG "PREEMPT ");
|
|
@@ -682,8 +608,7 @@ mem_parity_error(unsigned char reason, s
|
|
{
|
|
printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
|
|
"CPU %d.\n", reason, smp_processor_id());
|
|
- printk(KERN_EMERG "You probably have a hardware problem with your RAM "
|
|
- "chips\n");
|
|
+ printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n");
|
|
if (panic_on_unrecovered_nmi)
|
|
panic("NMI: Not continuing");
|
|
|
|
@@ -741,7 +666,6 @@ void __kprobes die_nmi(struct pt_regs *r
|
|
printk(" on CPU%d, eip %08lx, registers:\n",
|
|
smp_processor_id(), regs->eip);
|
|
show_registers(regs);
|
|
- printk(KERN_EMERG "console shuts up ...\n");
|
|
console_silent();
|
|
spin_unlock(&nmi_print_lock);
|
|
bust_spinlocks(0);
|
|
@@ -1057,49 +981,24 @@ fastcall void do_spurious_interrupt_bug(
|
|
#endif
|
|
}
|
|
|
|
-fastcall void setup_x86_bogus_stack(unsigned char * stk)
|
|
+fastcall unsigned long patch_espfix_desc(unsigned long uesp,
|
|
+ unsigned long kesp)
|
|
{
|
|
- unsigned long *switch16_ptr, *switch32_ptr;
|
|
- struct pt_regs *regs;
|
|
- unsigned long stack_top, stack_bot;
|
|
- unsigned short iret_frame16_off;
|
|
- int cpu = smp_processor_id();
|
|
- /* reserve the space on 32bit stack for the magic switch16 pointer */
|
|
- memmove(stk, stk + 8, sizeof(struct pt_regs));
|
|
- switch16_ptr = (unsigned long *)(stk + sizeof(struct pt_regs));
|
|
- regs = (struct pt_regs *)stk;
|
|
- /* now the switch32 on 16bit stack */
|
|
- stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu);
|
|
- stack_top = stack_bot + CPU_16BIT_STACK_SIZE;
|
|
- switch32_ptr = (unsigned long *)(stack_top - 8);
|
|
- iret_frame16_off = CPU_16BIT_STACK_SIZE - 8 - 20;
|
|
- /* copy iret frame on 16bit stack */
|
|
- memcpy((void *)(stack_bot + iret_frame16_off), ®s->eip, 20);
|
|
- /* fill in the switch pointers */
|
|
- switch16_ptr[0] = (regs->esp & 0xffff0000) | iret_frame16_off;
|
|
- switch16_ptr[1] = __ESPFIX_SS;
|
|
- switch32_ptr[0] = (unsigned long)stk + sizeof(struct pt_regs) +
|
|
- 8 - CPU_16BIT_STACK_SIZE;
|
|
- switch32_ptr[1] = __KERNEL_DS;
|
|
-}
|
|
-
|
|
-fastcall unsigned char * fixup_x86_bogus_stack(unsigned short sp)
|
|
-{
|
|
- unsigned long *switch32_ptr;
|
|
- unsigned char *stack16, *stack32;
|
|
- unsigned long stack_top, stack_bot;
|
|
- int len;
|
|
int cpu = smp_processor_id();
|
|
- stack_bot = (unsigned long)&per_cpu(cpu_16bit_stack, cpu);
|
|
- stack_top = stack_bot + CPU_16BIT_STACK_SIZE;
|
|
- switch32_ptr = (unsigned long *)(stack_top - 8);
|
|
- /* copy the data from 16bit stack to 32bit stack */
|
|
- len = CPU_16BIT_STACK_SIZE - 8 - sp;
|
|
- stack16 = (unsigned char *)(stack_bot + sp);
|
|
- stack32 = (unsigned char *)
|
|
- (switch32_ptr[0] + CPU_16BIT_STACK_SIZE - 8 - len);
|
|
- memcpy(stack32, stack16, len);
|
|
- return stack32;
|
|
+ struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
|
|
+ struct desc_struct *gdt = (struct desc_struct *)cpu_gdt_descr->address;
|
|
+ unsigned long base = (kesp - uesp) & -THREAD_SIZE;
|
|
+ unsigned long new_kesp = kesp - base;
|
|
+ unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT;
|
|
+ __u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS];
|
|
+ /* Set up base for espfix segment */
|
|
+ desc &= 0x00f0ff0000000000ULL;
|
|
+ desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) |
|
|
+ ((((__u64)base) << 32) & 0xff00000000000000ULL) |
|
|
+ ((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) |
|
|
+ (lim_pages & 0xffff);
|
|
+ *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc;
|
|
+ return new_kesp;
|
|
}
|
|
#endif
|
|
|
|
@@ -1113,7 +1012,7 @@ fastcall unsigned char * fixup_x86_bogus
|
|
* Must be called with kernel preemption disabled (in this case,
|
|
* local interrupts are disabled at the call-site in entry.S).
|
|
*/
|
|
-asmlinkage void math_state_restore(struct pt_regs regs)
|
|
+asmlinkage void math_state_restore(void)
|
|
{
|
|
struct thread_info *thread = current_thread_info();
|
|
struct task_struct *tsk = thread->task;
|
|
@@ -1123,6 +1022,7 @@ asmlinkage void math_state_restore(struc
|
|
init_fpu(tsk);
|
|
restore_fpu(tsk);
|
|
thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
|
|
+ tsk->fpu_counter++;
|
|
}
|
|
|
|
#ifndef CONFIG_MATH_EMULATION
|
|
@@ -1234,19 +1134,3 @@ static int __init kstack_setup(char *s)
|
|
return 1;
|
|
}
|
|
__setup("kstack=", kstack_setup);
|
|
-
|
|
-#ifdef CONFIG_STACK_UNWIND
|
|
-static int __init call_trace_setup(char *s)
|
|
-{
|
|
- if (strcmp(s, "old") == 0)
|
|
- call_trace = -1;
|
|
- else if (strcmp(s, "both") == 0)
|
|
- call_trace = 0;
|
|
- else if (strcmp(s, "newfallback") == 0)
|
|
- call_trace = 1;
|
|
- else if (strcmp(s, "new") == 2)
|
|
- call_trace = 2;
|
|
- return 1;
|
|
-}
|
|
-__setup("call_trace=", call_trace_setup);
|
|
-#endif
|
|
--- head-2010-05-25.orig/arch/x86/kernel/vmlinux.lds.S 2010-05-25 09:12:09.000000000 +0200
|
|
+++ head-2010-05-25/arch/x86/kernel/vmlinux.lds.S 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -84,6 +84,10 @@ SECTIONS
|
|
{
|
|
#ifdef CONFIG_X86_32
|
|
. = LOAD_OFFSET + LOAD_PHYSICAL_ADDR;
|
|
+#if defined(CONFIG_XEN) && CONFIG_XEN_COMPAT <= 0x030002
|
|
+#undef LOAD_OFFSET
|
|
+#define LOAD_OFFSET 0
|
|
+#endif
|
|
phys_startup_32 = startup_32 - LOAD_OFFSET;
|
|
#else
|
|
. = __START_KERNEL;
|
|
--- head-2010-05-25.orig/arch/x86/kvm/Kconfig 2010-05-25 09:12:09.000000000 +0200
|
|
+++ head-2010-05-25/arch/x86/kvm/Kconfig 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -7,6 +7,7 @@ source "virt/kvm/Kconfig"
|
|
menuconfig VIRTUALIZATION
|
|
bool "Virtualization"
|
|
depends on HAVE_KVM || X86
|
|
+ depends on !XEN
|
|
default y
|
|
---help---
|
|
Say Y here to get to see options for using your Linux host to run other
|
|
--- head-2010-05-25.orig/arch/x86/mm/fault_32-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/mm/fault_32-xen.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -22,9 +22,9 @@
|
|
#include <linux/highmem.h>
|
|
#include <linux/module.h>
|
|
#include <linux/kprobes.h>
|
|
+#include <linux/uaccess.h>
|
|
|
|
#include <asm/system.h>
|
|
-#include <asm/uaccess.h>
|
|
#include <asm/desc.h>
|
|
#include <asm/kdebug.h>
|
|
#include <asm/segment.h>
|
|
@@ -167,7 +167,7 @@ static inline unsigned long get_segment_
|
|
static int __is_prefetch(struct pt_regs *regs, unsigned long addr)
|
|
{
|
|
unsigned long limit;
|
|
- unsigned long instr = get_segment_eip (regs, &limit);
|
|
+ unsigned char *instr = (unsigned char *)get_segment_eip (regs, &limit);
|
|
int scan_more = 1;
|
|
int prefetch = 0;
|
|
int i;
|
|
@@ -177,9 +177,9 @@ static int __is_prefetch(struct pt_regs
|
|
unsigned char instr_hi;
|
|
unsigned char instr_lo;
|
|
|
|
- if (instr > limit)
|
|
+ if (instr > (unsigned char *)limit)
|
|
break;
|
|
- if (__get_user(opcode, (unsigned char __user *) instr))
|
|
+ if (probe_kernel_address(instr, opcode))
|
|
break;
|
|
|
|
instr_hi = opcode & 0xf0;
|
|
@@ -204,9 +204,9 @@ static int __is_prefetch(struct pt_regs
|
|
case 0x00:
|
|
/* Prefetch instruction is 0x0F0D or 0x0F18 */
|
|
scan_more = 0;
|
|
- if (instr > limit)
|
|
+ if (instr > (unsigned char *)limit)
|
|
break;
|
|
- if (__get_user(opcode, (unsigned char __user *) instr))
|
|
+ if (probe_kernel_address(instr, opcode))
|
|
break;
|
|
prefetch = (instr_lo == 0xF) &&
|
|
(opcode == 0x0D || opcode == 0x18);
|
|
--- head-2010-05-25.orig/arch/x86/mm/highmem_32-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/mm/highmem_32-xen.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -32,7 +32,7 @@ static void *__kmap_atomic(struct page *
|
|
unsigned long vaddr;
|
|
|
|
/* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */
|
|
- inc_preempt_count();
|
|
+ pagefault_disable();
|
|
if (!PageHighMem(page))
|
|
return page_address(page);
|
|
|
|
@@ -63,26 +63,22 @@ void kunmap_atomic(void *kvaddr, enum km
|
|
unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
|
|
enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
|
|
|
|
-#ifdef CONFIG_DEBUG_HIGHMEM
|
|
- if (vaddr >= PAGE_OFFSET && vaddr < (unsigned long)high_memory) {
|
|
- dec_preempt_count();
|
|
- preempt_check_resched();
|
|
- return;
|
|
- }
|
|
-
|
|
- if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx))
|
|
- BUG();
|
|
-#endif
|
|
/*
|
|
* Force other mappings to Oops if they'll try to access this pte
|
|
* without first remap it. Keeping stale mappings around is a bad idea
|
|
* also, in case the page changes cacheability attributes or becomes
|
|
* a protected page in a hypervisor.
|
|
*/
|
|
- kpte_clear_flush(kmap_pte-idx, vaddr);
|
|
+ if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx))
|
|
+ kpte_clear_flush(kmap_pte-idx, vaddr);
|
|
+ else {
|
|
+#ifdef CONFIG_DEBUG_HIGHMEM
|
|
+ BUG_ON(vaddr < PAGE_OFFSET);
|
|
+ BUG_ON(vaddr >= (unsigned long)high_memory);
|
|
+#endif
|
|
+ }
|
|
|
|
- dec_preempt_count();
|
|
- preempt_check_resched();
|
|
+ pagefault_enable();
|
|
}
|
|
|
|
/* This is the same as kmap_atomic() but can map memory that doesn't
|
|
@@ -93,7 +89,7 @@ void *kmap_atomic_pfn(unsigned long pfn,
|
|
enum fixed_addresses idx;
|
|
unsigned long vaddr;
|
|
|
|
- inc_preempt_count();
|
|
+ pagefault_disable();
|
|
|
|
idx = type + KM_TYPE_NR*smp_processor_id();
|
|
vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
|
|
--- head-2010-05-25.orig/arch/x86/mm/init_32-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/mm/init_32-xen.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -233,8 +233,6 @@ static inline int page_kills_ppro(unsign
|
|
|
|
#endif
|
|
|
|
-extern int is_available_memory(efi_memory_desc_t *);
|
|
-
|
|
int page_is_ram(unsigned long pagenr)
|
|
{
|
|
int i;
|
|
@@ -326,7 +324,7 @@ void __init add_one_highpage_init(struct
|
|
SetPageReserved(page);
|
|
}
|
|
|
|
-static int add_one_highpage_hotplug(struct page *page, unsigned long pfn)
|
|
+static int __meminit add_one_highpage_hotplug(struct page *page, unsigned long pfn)
|
|
{
|
|
free_new_highpage(page, pfn);
|
|
totalram_pages++;
|
|
@@ -343,7 +341,7 @@ static int add_one_highpage_hotplug(stru
|
|
* has been added dynamically that would be
|
|
* onlined here is in HIGHMEM
|
|
*/
|
|
-void online_page(struct page *page)
|
|
+void __meminit online_page(struct page *page)
|
|
{
|
|
ClearPageReserved(page);
|
|
add_one_highpage_hotplug(page, page_to_pfn(page));
|
|
@@ -738,16 +736,10 @@ void __init mem_init(void)
|
|
set_bit(PG_pinned, &virt_to_page(init_mm.pgd)->flags);
|
|
}
|
|
|
|
-/*
|
|
- * this is for the non-NUMA, single node SMP system case.
|
|
- * Specifically, in the case of x86, we will always add
|
|
- * memory to the highmem for now.
|
|
- */
|
|
#ifdef CONFIG_MEMORY_HOTPLUG
|
|
-#ifndef CONFIG_NEED_MULTIPLE_NODES
|
|
int arch_add_memory(int nid, u64 start, u64 size)
|
|
{
|
|
- struct pglist_data *pgdata = &contig_page_data;
|
|
+ struct pglist_data *pgdata = NODE_DATA(nid);
|
|
struct zone *zone = pgdata->node_zones + ZONE_HIGHMEM;
|
|
unsigned long start_pfn = start >> PAGE_SHIFT;
|
|
unsigned long nr_pages = size >> PAGE_SHIFT;
|
|
@@ -759,11 +751,11 @@ int remove_memory(u64 start, u64 size)
|
|
{
|
|
return -EINVAL;
|
|
}
|
|
-#endif
|
|
+EXPORT_SYMBOL_GPL(remove_memory);
|
|
#endif
|
|
|
|
-kmem_cache_t *pgd_cache;
|
|
-kmem_cache_t *pmd_cache;
|
|
+struct kmem_cache *pgd_cache;
|
|
+struct kmem_cache *pmd_cache;
|
|
|
|
void __init pgtable_cache_init(void)
|
|
{
|
|
--- head-2010-05-25.orig/arch/x86/mm/pgtable_32-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/mm/pgtable_32-xen.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -203,7 +203,7 @@ void pte_free(struct page *pte)
|
|
__free_page(pte);
|
|
}
|
|
|
|
-void pmd_ctor(void *pmd, kmem_cache_t *cache, unsigned long flags)
|
|
+void pmd_ctor(void *pmd, struct kmem_cache *cache, unsigned long flags)
|
|
{
|
|
memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
|
|
}
|
|
@@ -243,7 +243,7 @@ static inline void pgd_list_del(pgd_t *p
|
|
set_page_private(next, (unsigned long)pprev);
|
|
}
|
|
|
|
-void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused)
|
|
+void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused)
|
|
{
|
|
unsigned long flags;
|
|
|
|
@@ -264,7 +264,7 @@ void pgd_ctor(void *pgd, kmem_cache_t *c
|
|
}
|
|
|
|
/* never called when PTRS_PER_PMD > 1 */
|
|
-void pgd_dtor(void *pgd, kmem_cache_t *cache, unsigned long unused)
|
|
+void pgd_dtor(void *pgd, struct kmem_cache *cache, unsigned long unused)
|
|
{
|
|
unsigned long flags; /* can be called from interrupt context */
|
|
|
|
--- head-2010-05-25.orig/arch/x86/pci/irq-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/pci/irq-xen.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -768,7 +768,7 @@ static void __init pirq_find_router(stru
|
|
DBG(KERN_DEBUG "PCI: Attempting to find IRQ router for %04x:%04x\n",
|
|
rt->rtr_vendor, rt->rtr_device);
|
|
|
|
- pirq_router_dev = pci_find_slot(rt->rtr_bus, rt->rtr_devfn);
|
|
+ pirq_router_dev = pci_get_bus_and_slot(rt->rtr_bus, rt->rtr_devfn);
|
|
if (!pirq_router_dev) {
|
|
DBG(KERN_DEBUG "PCI: Interrupt router not found at "
|
|
"%02x:%02x\n", rt->rtr_bus, rt->rtr_devfn);
|
|
@@ -788,6 +788,8 @@ static void __init pirq_find_router(stru
|
|
pirq_router_dev->vendor,
|
|
pirq_router_dev->device,
|
|
pci_name(pirq_router_dev));
|
|
+
|
|
+ /* The device remains referenced for the kernel lifetime */
|
|
}
|
|
|
|
static struct irq_info *pirq_get_info(struct pci_dev *dev)
|
|
--- head-2010-05-25.orig/arch/x86/kernel/entry_64-xen.S 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/entry_64-xen.S 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -261,7 +261,6 @@ ENTRY(system_call)
|
|
movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
|
|
GET_THREAD_INFO(%rcx)
|
|
testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx)
|
|
- CFI_REMEMBER_STATE
|
|
jnz tracesys
|
|
cmpq $__NR_syscall_max,%rax
|
|
ja badsys
|
|
@@ -272,7 +271,6 @@ ENTRY(system_call)
|
|
* Syscall return path ending with SYSRET (fast path)
|
|
* Has incomplete stack frame and undefined top of stack.
|
|
*/
|
|
- .globl ret_from_sys_call
|
|
ret_from_sys_call:
|
|
movl $_TIF_ALLWORK_MASK,%edi
|
|
/* edi: flagmask */
|
|
@@ -282,8 +280,8 @@ sysret_check:
|
|
TRACE_IRQS_OFF
|
|
movl threadinfo_flags(%rcx),%edx
|
|
andl %edi,%edx
|
|
- CFI_REMEMBER_STATE
|
|
jnz sysret_careful
|
|
+ CFI_REMEMBER_STATE
|
|
/*
|
|
* sysretq will re-enable interrupts:
|
|
*/
|
|
@@ -292,10 +290,10 @@ sysret_check:
|
|
RESTORE_ARGS 0,8,0
|
|
HYPERVISOR_IRET VGCF_IN_SYSCALL
|
|
|
|
+ CFI_RESTORE_STATE
|
|
/* Handle reschedules */
|
|
/* edx: work, edi: workmask */
|
|
sysret_careful:
|
|
- CFI_RESTORE_STATE
|
|
bt $TIF_NEED_RESCHED,%edx
|
|
jnc sysret_signal
|
|
TRACE_IRQS_ON
|
|
@@ -334,7 +332,6 @@ badsys:
|
|
|
|
/* Do syscall tracing */
|
|
tracesys:
|
|
- CFI_RESTORE_STATE
|
|
SAVE_REST
|
|
movq $-ENOSYS,RAX(%rsp)
|
|
FIXUP_TOP_OF_STACK %rdi
|
|
@@ -350,32 +347,13 @@ tracesys:
|
|
call *sys_call_table(,%rax,8)
|
|
1: movq %rax,RAX-ARGOFFSET(%rsp)
|
|
/* Use IRET because user could have changed frame */
|
|
- jmp int_ret_from_sys_call
|
|
- CFI_ENDPROC
|
|
-END(system_call)
|
|
|
|
/*
|
|
* Syscall return path ending with IRET.
|
|
* Has correct top of stack, but partial stack frame.
|
|
- */
|
|
-ENTRY(int_ret_from_sys_call)
|
|
- CFI_STARTPROC simple
|
|
- CFI_SIGNAL_FRAME
|
|
- CFI_DEF_CFA rsp,SS+8-ARGOFFSET
|
|
- /*CFI_REL_OFFSET ss,SS-ARGOFFSET*/
|
|
- CFI_REL_OFFSET rsp,RSP-ARGOFFSET
|
|
- /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
|
|
- /*CFI_REL_OFFSET cs,CS-ARGOFFSET*/
|
|
- CFI_REL_OFFSET rip,RIP-ARGOFFSET
|
|
- CFI_REL_OFFSET rdx,RDX-ARGOFFSET
|
|
- CFI_REL_OFFSET rcx,RCX-ARGOFFSET
|
|
- CFI_REL_OFFSET rax,RAX-ARGOFFSET
|
|
- CFI_REL_OFFSET rdi,RDI-ARGOFFSET
|
|
- CFI_REL_OFFSET rsi,RSI-ARGOFFSET
|
|
- CFI_REL_OFFSET r8,R8-ARGOFFSET
|
|
- CFI_REL_OFFSET r9,R9-ARGOFFSET
|
|
- CFI_REL_OFFSET r10,R10-ARGOFFSET
|
|
- CFI_REL_OFFSET r11,R11-ARGOFFSET
|
|
+ */
|
|
+ .globl int_ret_from_sys_call
|
|
+int_ret_from_sys_call:
|
|
XEN_BLOCK_EVENTS(%rsi)
|
|
TRACE_IRQS_OFF
|
|
testb $3,CS-ARGOFFSET(%rsp)
|
|
@@ -428,8 +406,6 @@ int_very_careful:
|
|
popq %rdi
|
|
CFI_ADJUST_CFA_OFFSET -8
|
|
andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi
|
|
- XEN_BLOCK_EVENTS(%rsi)
|
|
- TRACE_IRQS_OFF
|
|
jmp int_restore_rest
|
|
|
|
int_signal:
|
|
@@ -445,7 +421,7 @@ int_restore_rest:
|
|
TRACE_IRQS_OFF
|
|
jmp int_with_check
|
|
CFI_ENDPROC
|
|
-END(int_ret_from_sys_call)
|
|
+END(system_call)
|
|
|
|
/*
|
|
* Certain special system calls that need to save a complete full stack frame.
|
|
@@ -1270,36 +1246,3 @@ ENTRY(call_softirq)
|
|
ret
|
|
CFI_ENDPROC
|
|
ENDPROC(call_softirq)
|
|
-
|
|
-#ifdef CONFIG_STACK_UNWIND
|
|
-ENTRY(arch_unwind_init_running)
|
|
- CFI_STARTPROC
|
|
- movq %r15, R15(%rdi)
|
|
- movq %r14, R14(%rdi)
|
|
- xchgq %rsi, %rdx
|
|
- movq %r13, R13(%rdi)
|
|
- movq %r12, R12(%rdi)
|
|
- xorl %eax, %eax
|
|
- movq %rbp, RBP(%rdi)
|
|
- movq %rbx, RBX(%rdi)
|
|
- movq (%rsp), %rcx
|
|
- movq %rax, R11(%rdi)
|
|
- movq %rax, R10(%rdi)
|
|
- movq %rax, R9(%rdi)
|
|
- movq %rax, R8(%rdi)
|
|
- movq %rax, RAX(%rdi)
|
|
- movq %rax, RCX(%rdi)
|
|
- movq %rax, RDX(%rdi)
|
|
- movq %rax, RSI(%rdi)
|
|
- movq %rax, RDI(%rdi)
|
|
- movq %rax, ORIG_RAX(%rdi)
|
|
- movq %rcx, RIP(%rdi)
|
|
- leaq 8(%rsp), %rcx
|
|
- movq $__KERNEL_CS, CS(%rdi)
|
|
- movq %rax, EFLAGS(%rdi)
|
|
- movq %rcx, RSP(%rdi)
|
|
- movq $__KERNEL_DS, SS(%rdi)
|
|
- jmpq *%rdx
|
|
- CFI_ENDPROC
|
|
-ENDPROC(arch_unwind_init_running)
|
|
-#endif
|
|
--- head-2010-05-25.orig/arch/x86/kernel/head64-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/head64-xen.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -104,7 +104,10 @@ void __init x86_64_start_kernel(char * r
|
|
machine_to_phys_order++;
|
|
|
|
#if 0
|
|
- for (i = 0; i < 256; i++)
|
|
+ /* clear bss before set_intr_gate with early_idt_handler */
|
|
+ clear_bss();
|
|
+
|
|
+ for (i = 0; i < IDT_ENTRIES; i++)
|
|
set_intr_gate(i, early_idt_handler);
|
|
asm volatile("lidt %0" :: "m" (idt_descr));
|
|
#endif
|
|
--- head-2010-05-25.orig/arch/x86/kernel/io_apic_64-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/io_apic_64-xen.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -202,14 +202,20 @@ static struct IO_APIC_route_entry ioapic
|
|
* the interrupt, and we need to make sure the entry is fully populated
|
|
* before that happens.
|
|
*/
|
|
-static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
|
|
+static void
|
|
+__ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
|
|
{
|
|
- unsigned long flags;
|
|
union entry_union eu;
|
|
eu.entry = e;
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
io_apic_write(apic, 0x11 + 2*pin, eu.w2);
|
|
io_apic_write(apic, 0x10 + 2*pin, eu.w1);
|
|
+}
|
|
+
|
|
+static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
|
|
+{
|
|
+ unsigned long flags;
|
|
+ spin_lock_irqsave(&ioapic_lock, flags);
|
|
+ __ioapic_write_entry(apic, pin, e);
|
|
spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
}
|
|
|
|
@@ -720,6 +726,22 @@ static int assign_irq_vector(int irq, cp
|
|
}
|
|
|
|
#ifndef CONFIG_XEN
|
|
+static void __clear_irq_vector(int irq)
|
|
+{
|
|
+ cpumask_t mask;
|
|
+ int cpu, vector;
|
|
+
|
|
+ BUG_ON(!irq_vector[irq]);
|
|
+
|
|
+ vector = irq_vector[irq];
|
|
+ cpus_and(mask, irq_domain[irq], cpu_online_map);
|
|
+ for_each_cpu_mask(cpu, mask)
|
|
+ per_cpu(vector_irq, cpu)[vector] = -1;
|
|
+
|
|
+ irq_vector[irq] = 0;
|
|
+ irq_domain[irq] = CPU_MASK_NONE;
|
|
+}
|
|
+
|
|
void __setup_vector_irq(int cpu)
|
|
{
|
|
/* Initialize vector_irq on a new cpu */
|
|
@@ -767,26 +789,65 @@ static void ioapic_register_intr(int irq
|
|
#define ioapic_register_intr(irq, vector, trigger) evtchn_register_pirq(irq)
|
|
#endif /* !CONFIG_XEN */
|
|
|
|
-static void __init setup_IO_APIC_irqs(void)
|
|
+static void __init setup_IO_APIC_irq(int apic, int pin, int idx, int irq)
|
|
{
|
|
struct IO_APIC_route_entry entry;
|
|
- int apic, pin, idx, irq, first_notcon = 1, vector;
|
|
+ int vector;
|
|
unsigned long flags;
|
|
|
|
- apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
|
|
|
|
- for (apic = 0; apic < nr_ioapics; apic++) {
|
|
- for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
|
|
+ /*
|
|
+ * add it to the IO-APIC irq-routing table:
|
|
+ */
|
|
+ memset(&entry,0,sizeof(entry));
|
|
|
|
- /*
|
|
- * add it to the IO-APIC irq-routing table:
|
|
- */
|
|
- memset(&entry,0,sizeof(entry));
|
|
+ entry.delivery_mode = INT_DELIVERY_MODE;
|
|
+ entry.dest_mode = INT_DEST_MODE;
|
|
+ entry.mask = 0; /* enable IRQ */
|
|
+ entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
|
|
|
|
- entry.delivery_mode = INT_DELIVERY_MODE;
|
|
- entry.dest_mode = INT_DEST_MODE;
|
|
- entry.mask = 0; /* enable IRQ */
|
|
+ entry.trigger = irq_trigger(idx);
|
|
+ entry.polarity = irq_polarity(idx);
|
|
+
|
|
+ if (irq_trigger(idx)) {
|
|
+ entry.trigger = 1;
|
|
+ entry.mask = 1;
|
|
entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
|
|
+ }
|
|
+
|
|
+ if (/* !apic && */ !IO_APIC_IRQ(irq))
|
|
+ return;
|
|
+
|
|
+ if (IO_APIC_IRQ(irq)) {
|
|
+ cpumask_t mask;
|
|
+ vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
|
|
+ if (vector < 0)
|
|
+ return;
|
|
+
|
|
+ entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
|
|
+ entry.vector = vector;
|
|
+
|
|
+ ioapic_register_intr(irq, vector, IOAPIC_AUTO);
|
|
+ if (!apic && (irq < 16))
|
|
+ disable_8259A_irq(irq);
|
|
+ }
|
|
+
|
|
+ ioapic_write_entry(apic, pin, entry);
|
|
+
|
|
+ spin_lock_irqsave(&ioapic_lock, flags);
|
|
+ set_native_irq_info(irq, TARGET_CPUS);
|
|
+ spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+
|
|
+}
|
|
+
|
|
+static void __init setup_IO_APIC_irqs(void)
|
|
+{
|
|
+ int apic, pin, idx, irq, first_notcon = 1;
|
|
+
|
|
+ apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
|
|
+
|
|
+ for (apic = 0; apic < nr_ioapics; apic++) {
|
|
+ for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
|
|
|
|
idx = find_irq_entry(apic,pin,mp_INT);
|
|
if (idx == -1) {
|
|
@@ -798,39 +859,11 @@ static void __init setup_IO_APIC_irqs(vo
|
|
continue;
|
|
}
|
|
|
|
- entry.trigger = irq_trigger(idx);
|
|
- entry.polarity = irq_polarity(idx);
|
|
-
|
|
- if (irq_trigger(idx)) {
|
|
- entry.trigger = 1;
|
|
- entry.mask = 1;
|
|
- entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
|
|
- }
|
|
-
|
|
irq = pin_2_irq(idx, apic, pin);
|
|
add_pin_to_irq(irq, apic, pin);
|
|
|
|
- if (/* !apic && */ !IO_APIC_IRQ(irq))
|
|
- continue;
|
|
-
|
|
- if (IO_APIC_IRQ(irq)) {
|
|
- cpumask_t mask;
|
|
- vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
|
|
- if (vector < 0)
|
|
- continue;
|
|
-
|
|
- entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
|
|
- entry.vector = vector;
|
|
+ setup_IO_APIC_irq(apic, pin, idx, irq);
|
|
|
|
- ioapic_register_intr(irq, vector, IOAPIC_AUTO);
|
|
- if (!apic && (irq < 16))
|
|
- disable_8259A_irq(irq);
|
|
- }
|
|
- ioapic_write_entry(apic, pin, entry);
|
|
-
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- set_native_irq_info(irq, TARGET_CPUS);
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
}
|
|
}
|
|
|
|
@@ -1826,7 +1859,7 @@ void destroy_irq(unsigned int irq)
|
|
dynamic_irq_cleanup(irq);
|
|
|
|
spin_lock_irqsave(&vector_lock, flags);
|
|
- irq_vector[irq] = 0;
|
|
+ __clear_irq_vector(irq);
|
|
spin_unlock_irqrestore(&vector_lock, flags);
|
|
}
|
|
|
|
@@ -2131,7 +2164,15 @@ void __init setup_ioapic_dest(void)
|
|
if (irq_entry == -1)
|
|
continue;
|
|
irq = pin_2_irq(irq_entry, ioapic, pin);
|
|
- set_ioapic_affinity_irq(irq, TARGET_CPUS);
|
|
+
|
|
+ /* setup_IO_APIC_irqs could fail to get vector for some device
|
|
+ * when you have too many devices, because at that time only boot
|
|
+ * cpu is online.
|
|
+ */
|
|
+ if(!irq_vector[irq])
|
|
+ setup_IO_APIC_irq(ioapic, pin, irq_entry, irq);
|
|
+ else
|
|
+ set_ioapic_affinity_irq(irq, TARGET_CPUS);
|
|
}
|
|
|
|
}
|
|
--- head-2010-05-25.orig/arch/x86/kernel/mpparse_64-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/mpparse_64-xen.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -35,8 +35,6 @@
|
|
int smp_found_config;
|
|
unsigned int __initdata maxcpus = NR_CPUS;
|
|
|
|
-int acpi_found_madt;
|
|
-
|
|
/*
|
|
* Various Linux-internal data structures created from the
|
|
* MP-table.
|
|
--- head-2010-05-25.orig/arch/x86/kernel/process_64-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/process_64-xen.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -119,29 +119,23 @@ void exit_idle(void)
|
|
static void poll_idle (void)
|
|
{
|
|
local_irq_enable();
|
|
-
|
|
- asm volatile(
|
|
- "2:"
|
|
- "testl %0,%1;"
|
|
- "rep; nop;"
|
|
- "je 2b;"
|
|
- : :
|
|
- "i" (_TIF_NEED_RESCHED),
|
|
- "m" (current_thread_info()->flags));
|
|
+ cpu_relax();
|
|
}
|
|
|
|
static void xen_idle(void)
|
|
{
|
|
+ current_thread_info()->status &= ~TS_POLLING;
|
|
+ /*
|
|
+ * TS_POLLING-cleared state must be visible before we
|
|
+ * test NEED_RESCHED:
|
|
+ */
|
|
+ smp_mb();
|
|
local_irq_disable();
|
|
-
|
|
- if (need_resched())
|
|
- local_irq_enable();
|
|
- else {
|
|
- current_thread_info()->status &= ~TS_POLLING;
|
|
- smp_mb__after_clear_bit();
|
|
+ if (!need_resched())
|
|
safe_halt();
|
|
- current_thread_info()->status |= TS_POLLING;
|
|
- }
|
|
+ else
|
|
+ local_irq_enable();
|
|
+ current_thread_info()->status |= TS_POLLING;
|
|
}
|
|
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
|
@@ -181,6 +175,12 @@ void cpu_idle (void)
|
|
idle = xen_idle; /* no alternatives */
|
|
if (cpu_is_offline(smp_processor_id()))
|
|
play_dead();
|
|
+ /*
|
|
+ * Idle routines should keep interrupts disabled
|
|
+ * from here on, until they go to idle.
|
|
+ * Otherwise, idle callbacks can misfire.
|
|
+ */
|
|
+ local_irq_disable();
|
|
enter_idle();
|
|
idle();
|
|
/* In many cases the interrupt that ended idle
|
|
--- head-2010-05-25.orig/arch/x86/kernel/setup_64-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/setup_64-xen.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -574,8 +574,7 @@ void __init setup_arch(char **cmdline_p)
|
|
if (LOADER_TYPE && INITRD_START) {
|
|
if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) {
|
|
reserve_bootmem_generic(INITRD_START, INITRD_SIZE);
|
|
- initrd_start =
|
|
- INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
|
|
+ initrd_start = INITRD_START + PAGE_OFFSET;
|
|
initrd_end = initrd_start+INITRD_SIZE;
|
|
}
|
|
else {
|
|
@@ -991,11 +990,8 @@ static void __cpuinit init_amd(struct cp
|
|
/* Fix cpuid4 emulation for more */
|
|
num_cache_leaves = 3;
|
|
|
|
- /* When there is only one core no need to synchronize RDTSC */
|
|
- if (num_possible_cpus() == 1)
|
|
- set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
|
|
- else
|
|
- clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
|
|
+ /* RDTSC can be speculated around */
|
|
+ clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
|
|
}
|
|
|
|
static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
|
|
@@ -1094,6 +1090,15 @@ static void __cpuinit init_intel(struct
|
|
set_bit(X86_FEATURE_ARCH_PERFMON, &c->x86_capability);
|
|
}
|
|
|
|
+ if (cpu_has_ds) {
|
|
+ unsigned int l1, l2;
|
|
+ rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
|
|
+ if (!(l1 & (1<<11)))
|
|
+ set_bit(X86_FEATURE_BTS, c->x86_capability);
|
|
+ if (!(l1 & (1<<12)))
|
|
+ set_bit(X86_FEATURE_PEBS, c->x86_capability);
|
|
+ }
|
|
+
|
|
n = c->extended_cpuid_level;
|
|
if (n >= 0x80000008) {
|
|
unsigned eax = cpuid_eax(0x80000008);
|
|
@@ -1113,7 +1118,10 @@ static void __cpuinit init_intel(struct
|
|
set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
|
|
if (c->x86 == 6)
|
|
set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
|
|
- set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
|
|
+ if (c->x86 == 15)
|
|
+ set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
|
|
+ else
|
|
+ clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
|
|
c->x86_max_cores = intel_num_cpu_cores(c);
|
|
|
|
srat_detect_node();
|
|
--- head-2010-05-25.orig/arch/x86/kernel/smp_64-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/smp_64-xen.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -384,12 +384,17 @@ int smp_call_function_single (int cpu, v
|
|
put_cpu();
|
|
return 0;
|
|
}
|
|
+
|
|
+ /* Can deadlock when called with interrupts disabled */
|
|
+ WARN_ON(irqs_disabled());
|
|
+
|
|
spin_lock_bh(&call_lock);
|
|
__smp_call_function_single(cpu, func, info, nonatomic, wait);
|
|
spin_unlock_bh(&call_lock);
|
|
put_cpu();
|
|
return 0;
|
|
}
|
|
+EXPORT_SYMBOL(smp_call_function_single);
|
|
|
|
/*
|
|
* this function sends a 'generic call function' IPI to all other CPUs
|
|
--- head-2010-05-25.orig/arch/x86/kernel/traps_64-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/traps_64-xen.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -30,9 +30,10 @@
|
|
#include <linux/kprobes.h>
|
|
#include <linux/kexec.h>
|
|
#include <linux/unwind.h>
|
|
+#include <linux/uaccess.h>
|
|
+#include <linux/bug.h>
|
|
|
|
#include <asm/system.h>
|
|
-#include <asm/uaccess.h>
|
|
#include <asm/io.h>
|
|
#include <asm/atomic.h>
|
|
#include <asm/debugreg.h>
|
|
@@ -108,12 +109,7 @@ static inline void preempt_conditional_c
|
|
preempt_enable_no_resched();
|
|
}
|
|
|
|
-static int kstack_depth_to_print = 12;
|
|
-#ifdef CONFIG_STACK_UNWIND
|
|
-static int call_trace = 1;
|
|
-#else
|
|
-#define call_trace (-1)
|
|
-#endif
|
|
+int kstack_depth_to_print = 12;
|
|
|
|
#ifdef CONFIG_KALLSYMS
|
|
void printk_address(unsigned long address)
|
|
@@ -218,24 +214,7 @@ static unsigned long *in_exception_stack
|
|
return NULL;
|
|
}
|
|
|
|
-struct ops_and_data {
|
|
- struct stacktrace_ops *ops;
|
|
- void *data;
|
|
-};
|
|
-
|
|
-static int dump_trace_unwind(struct unwind_frame_info *info, void *context)
|
|
-{
|
|
- struct ops_and_data *oad = (struct ops_and_data *)context;
|
|
- int n = 0;
|
|
-
|
|
- while (unwind(info) == 0 && UNW_PC(info)) {
|
|
- n++;
|
|
- oad->ops->address(oad->data, UNW_PC(info));
|
|
- if (arch_unw_user_mode(info))
|
|
- break;
|
|
- }
|
|
- return n;
|
|
-}
|
|
+#define MSG(txt) ops->warning(data, txt)
|
|
|
|
/*
|
|
* x86-64 can have upto three kernel stacks:
|
|
@@ -250,61 +229,24 @@ static inline int valid_stack_ptr(struct
|
|
return p > t && p < t + THREAD_SIZE - 3;
|
|
}
|
|
|
|
-void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack,
|
|
+void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
|
|
+ unsigned long *stack,
|
|
struct stacktrace_ops *ops, void *data)
|
|
{
|
|
- const unsigned cpu = smp_processor_id();
|
|
- unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
|
|
+ const unsigned cpu = get_cpu();
|
|
+ unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr;
|
|
unsigned used = 0;
|
|
struct thread_info *tinfo;
|
|
|
|
if (!tsk)
|
|
tsk = current;
|
|
|
|
- if (call_trace >= 0) {
|
|
- int unw_ret = 0;
|
|
- struct unwind_frame_info info;
|
|
- struct ops_and_data oad = { .ops = ops, .data = data };
|
|
-
|
|
- if (regs) {
|
|
- if (unwind_init_frame_info(&info, tsk, regs) == 0)
|
|
- unw_ret = dump_trace_unwind(&info, &oad);
|
|
- } else if (tsk == current)
|
|
- unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad);
|
|
- else {
|
|
- if (unwind_init_blocked(&info, tsk) == 0)
|
|
- unw_ret = dump_trace_unwind(&info, &oad);
|
|
- }
|
|
- if (unw_ret > 0) {
|
|
- if (call_trace == 1 && !arch_unw_user_mode(&info)) {
|
|
- ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n",
|
|
- UNW_PC(&info));
|
|
- if ((long)UNW_SP(&info) < 0) {
|
|
- ops->warning(data, "Leftover inexact backtrace:\n");
|
|
- stack = (unsigned long *)UNW_SP(&info);
|
|
- if (!stack)
|
|
- return;
|
|
- } else
|
|
- ops->warning(data, "Full inexact backtrace again:\n");
|
|
- } else if (call_trace >= 1)
|
|
- return;
|
|
- else
|
|
- ops->warning(data, "Full inexact backtrace again:\n");
|
|
- } else
|
|
- ops->warning(data, "Inexact backtrace:\n");
|
|
- }
|
|
if (!stack) {
|
|
unsigned long dummy;
|
|
stack = &dummy;
|
|
if (tsk && tsk != current)
|
|
stack = (unsigned long *)tsk->thread.rsp;
|
|
}
|
|
- /*
|
|
- * Align the stack pointer on word boundary, later loops
|
|
- * rely on that (and corruption / debug info bugs can cause
|
|
- * unaligned values here):
|
|
- */
|
|
- stack = (unsigned long *)((unsigned long)stack & ~(sizeof(long)-1));
|
|
|
|
/*
|
|
* Print function call entries within a stack. 'cond' is the
|
|
@@ -314,9 +256,9 @@ void dump_trace(struct task_struct *tsk,
|
|
#define HANDLE_STACK(cond) \
|
|
do while (cond) { \
|
|
unsigned long addr = *stack++; \
|
|
- if (oops_in_progress ? \
|
|
- __kernel_text_address(addr) : \
|
|
- kernel_text_address(addr)) { \
|
|
+ /* Use unlocked access here because except for NMIs \
|
|
+ we should be already protected against module unloads */ \
|
|
+ if (__kernel_text_address(addr)) { \
|
|
/* \
|
|
* If the address is either in the text segment of the \
|
|
* kernel, or in the region which contains vmalloc'ed \
|
|
@@ -379,9 +321,10 @@ void dump_trace(struct task_struct *tsk,
|
|
/*
|
|
* This handles the process stack:
|
|
*/
|
|
- tinfo = current_thread_info();
|
|
+ tinfo = task_thread_info(tsk);
|
|
HANDLE_STACK (valid_stack_ptr(tinfo, stack));
|
|
#undef HANDLE_STACK
|
|
+ put_cpu();
|
|
}
|
|
EXPORT_SYMBOL(dump_trace);
|
|
|
|
@@ -518,30 +461,15 @@ bad:
|
|
printk("\n");
|
|
}
|
|
|
|
-void handle_BUG(struct pt_regs *regs)
|
|
-{
|
|
- struct bug_frame f;
|
|
- long len;
|
|
- const char *prefix = "";
|
|
+int is_valid_bugaddr(unsigned long rip)
|
|
+{
|
|
+ unsigned short ud2;
|
|
|
|
- if (user_mode(regs))
|
|
- return;
|
|
- if (__copy_from_user(&f, (const void __user *) regs->rip,
|
|
- sizeof(struct bug_frame)))
|
|
- return;
|
|
- if (f.filename >= 0 ||
|
|
- f.ud2[0] != 0x0f || f.ud2[1] != 0x0b)
|
|
- return;
|
|
- len = __strnlen_user((char *)(long)f.filename, PATH_MAX) - 1;
|
|
- if (len < 0 || len >= PATH_MAX)
|
|
- f.filename = (int)(long)"unmapped filename";
|
|
- else if (len > 50) {
|
|
- f.filename += len - 50;
|
|
- prefix = "...";
|
|
- }
|
|
- printk("----------- [cut here ] --------- [please bite here ] ---------\n");
|
|
- printk(KERN_ALERT "Kernel BUG at %s%.50s:%d\n", prefix, (char *)(long)f.filename, f.line);
|
|
-}
|
|
+ if (__copy_from_user(&ud2, (const void __user *) rip, sizeof(ud2)))
|
|
+ return 0;
|
|
+
|
|
+ return ud2 == 0x0b0f;
|
|
+}
|
|
|
|
#ifdef CONFIG_BUG
|
|
void out_of_line_bug(void)
|
|
@@ -621,7 +549,9 @@ void die(const char * str, struct pt_reg
|
|
{
|
|
unsigned long flags = oops_begin();
|
|
|
|
- handle_BUG(regs);
|
|
+ if (!user_mode(regs))
|
|
+ report_bug(regs->rip);
|
|
+
|
|
__die(str, regs, err);
|
|
oops_end(flags);
|
|
do_exit(SIGSEGV);
|
|
@@ -790,8 +720,7 @@ mem_parity_error(unsigned char reason, s
|
|
{
|
|
printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
|
|
reason);
|
|
- printk(KERN_EMERG "You probably have a hardware problem with your "
|
|
- "RAM chips\n");
|
|
+ printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n");
|
|
|
|
if (panic_on_unrecovered_nmi)
|
|
panic("NMI: Not continuing");
|
|
@@ -1227,21 +1156,3 @@ static int __init kstack_setup(char *s)
|
|
return 0;
|
|
}
|
|
early_param("kstack", kstack_setup);
|
|
-
|
|
-#ifdef CONFIG_STACK_UNWIND
|
|
-static int __init call_trace_setup(char *s)
|
|
-{
|
|
- if (!s)
|
|
- return -EINVAL;
|
|
- if (strcmp(s, "old") == 0)
|
|
- call_trace = -1;
|
|
- else if (strcmp(s, "both") == 0)
|
|
- call_trace = 0;
|
|
- else if (strcmp(s, "newfallback") == 0)
|
|
- call_trace = 1;
|
|
- else if (strcmp(s, "new") == 0)
|
|
- call_trace = 2;
|
|
- return 0;
|
|
-}
|
|
-early_param("call_trace", call_trace_setup);
|
|
-#endif
|
|
--- head-2010-05-25.orig/arch/x86/kernel/vsyscall_64-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/vsyscall_64-xen.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -42,6 +42,7 @@
|
|
#include <asm/topology.h>
|
|
|
|
#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
|
|
+#define __syscall_clobber "r11","rcx","memory"
|
|
|
|
int __sysctl_vsyscall __section_sysctl_vsyscall = 1;
|
|
seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED;
|
|
@@ -224,8 +225,7 @@ out:
|
|
|
|
static int vsyscall_sysctl_nostrat(ctl_table *t, int __user *name, int nlen,
|
|
void __user *oldval, size_t __user *oldlenp,
|
|
- void __user *newval, size_t newlen,
|
|
- void **context)
|
|
+ void __user *newval, size_t newlen)
|
|
{
|
|
return -ENOSYS;
|
|
}
|
|
@@ -277,7 +277,6 @@ static void __cpuinit cpu_vsyscall_init(
|
|
vsyscall_set_cpu(raw_smp_processor_id());
|
|
}
|
|
|
|
-#ifdef CONFIG_HOTPLUG_CPU
|
|
static int __cpuinit
|
|
cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
|
|
{
|
|
@@ -286,13 +285,13 @@ cpu_vsyscall_notifier(struct notifier_bl
|
|
smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1);
|
|
return NOTIFY_DONE;
|
|
}
|
|
-#endif
|
|
|
|
static void __init map_vsyscall(void)
|
|
{
|
|
extern char __vsyscall_0;
|
|
unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0);
|
|
|
|
+ /* Note that VSYSCALL_MAPPED_PAGES must agree with the code below. */
|
|
__set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL);
|
|
}
|
|
|
|
--- head-2010-05-25.orig/arch/x86/mm/fault_64-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/mm/fault_64-xen.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -23,9 +23,9 @@
|
|
#include <linux/compiler.h>
|
|
#include <linux/module.h>
|
|
#include <linux/kprobes.h>
|
|
+#include <linux/uaccess.h>
|
|
|
|
#include <asm/system.h>
|
|
-#include <asm/uaccess.h>
|
|
#include <asm/pgalloc.h>
|
|
#include <asm/smp.h>
|
|
#include <asm/tlbflush.h>
|
|
@@ -96,7 +96,7 @@ void bust_spinlocks(int yes)
|
|
static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
|
|
unsigned long error_code)
|
|
{
|
|
- unsigned char __user *instr;
|
|
+ unsigned char *instr;
|
|
int scan_more = 1;
|
|
int prefetch = 0;
|
|
unsigned char *max_instr;
|
|
@@ -116,7 +116,7 @@ static noinline int is_prefetch(struct p
|
|
unsigned char instr_hi;
|
|
unsigned char instr_lo;
|
|
|
|
- if (__get_user(opcode, (char __user *)instr))
|
|
+ if (probe_kernel_address(instr, opcode))
|
|
break;
|
|
|
|
instr_hi = opcode & 0xf0;
|
|
@@ -154,7 +154,7 @@ static noinline int is_prefetch(struct p
|
|
case 0x00:
|
|
/* Prefetch instruction is 0x0F0D or 0x0F18 */
|
|
scan_more = 0;
|
|
- if (__get_user(opcode, (char __user *)instr))
|
|
+ if (probe_kernel_address(instr, opcode))
|
|
break;
|
|
prefetch = (instr_lo == 0xF) &&
|
|
(opcode == 0x0D || opcode == 0x18);
|
|
@@ -170,7 +170,7 @@ static noinline int is_prefetch(struct p
|
|
static int bad_address(void *p)
|
|
{
|
|
unsigned long dummy;
|
|
- return __get_user(dummy, (unsigned long __user *)p);
|
|
+ return probe_kernel_address((unsigned long *)p, dummy);
|
|
}
|
|
|
|
void dump_pagetable(unsigned long address)
|
|
--- head-2010-05-25.orig/arch/x86/mm/init_64-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/mm/init_64-xen.c 2010-04-29 09:46:52.000000000 +0200
|
|
@@ -1164,14 +1164,15 @@ static __init int x8664_sysctl_init(void
|
|
__initcall(x8664_sysctl_init);
|
|
#endif
|
|
|
|
-/* A pseudo VMAs to allow ptrace access for the vsyscall page. This only
|
|
+/* A pseudo VMA to allow ptrace access for the vsyscall page. This only
|
|
covers the 64bit vsyscall page now. 32bit has a real VMA now and does
|
|
not need special handling anymore. */
|
|
|
|
static struct vm_area_struct gate_vma = {
|
|
.vm_start = VSYSCALL_START,
|
|
- .vm_end = VSYSCALL_END,
|
|
- .vm_page_prot = PAGE_READONLY
|
|
+ .vm_end = VSYSCALL_START + (VSYSCALL_MAPPED_PAGES << PAGE_SHIFT),
|
|
+ .vm_page_prot = PAGE_READONLY_EXEC,
|
|
+ .vm_flags = VM_READ | VM_EXEC
|
|
};
|
|
|
|
struct vm_area_struct *get_gate_vma(struct task_struct *tsk)
|
|
--- head-2010-05-25.orig/arch/x86/mm/pageattr_64-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/mm/pageattr_64-xen.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -330,34 +330,40 @@ static struct page *split_large_page(uns
|
|
return base;
|
|
}
|
|
|
|
-
|
|
-static void flush_kernel_map(void *address)
|
|
+static void cache_flush_page(void *adr)
|
|
{
|
|
- if (0 && address && cpu_has_clflush) {
|
|
- /* is this worth it? */
|
|
- int i;
|
|
- for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size)
|
|
- asm volatile("clflush (%0)" :: "r" (address + i));
|
|
- } else
|
|
- asm volatile("wbinvd":::"memory");
|
|
- if (address)
|
|
- __flush_tlb_one(address);
|
|
- else
|
|
- __flush_tlb_all();
|
|
+ int i;
|
|
+ for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size)
|
|
+ asm volatile("clflush (%0)" :: "r" (adr + i));
|
|
}
|
|
|
|
+static void flush_kernel_map(void *arg)
|
|
+{
|
|
+ struct list_head *l = (struct list_head *)arg;
|
|
+ struct page *pg;
|
|
|
|
-static inline void flush_map(unsigned long address)
|
|
+ /* When clflush is available always use it because it is
|
|
+ much cheaper than WBINVD */
|
|
+ if (!cpu_has_clflush)
|
|
+ asm volatile("wbinvd" ::: "memory");
|
|
+ list_for_each_entry(pg, l, lru) {
|
|
+ void *adr = page_address(pg);
|
|
+ if (cpu_has_clflush)
|
|
+ cache_flush_page(adr);
|
|
+ __flush_tlb_one(adr);
|
|
+ }
|
|
+}
|
|
+
|
|
+static inline void flush_map(struct list_head *l)
|
|
{
|
|
- on_each_cpu(flush_kernel_map, (void *)address, 1, 1);
|
|
+ on_each_cpu(flush_kernel_map, l, 1, 1);
|
|
}
|
|
|
|
-static struct page *deferred_pages; /* protected by init_mm.mmap_sem */
|
|
+static LIST_HEAD(deferred_pages); /* protected by init_mm.mmap_sem */
|
|
|
|
static inline void save_page(struct page *fpage)
|
|
{
|
|
- fpage->lru.next = (struct list_head *)deferred_pages;
|
|
- deferred_pages = fpage;
|
|
+ list_add(&fpage->lru, &deferred_pages);
|
|
}
|
|
|
|
/*
|
|
@@ -487,18 +493,18 @@ int change_page_attr(struct page *page,
|
|
|
|
void global_flush_tlb(void)
|
|
{
|
|
- struct page *dpage;
|
|
+ struct page *pg, *next;
|
|
+ struct list_head l;
|
|
|
|
down_read(&init_mm.mmap_sem);
|
|
- dpage = xchg(&deferred_pages, NULL);
|
|
+ list_replace_init(&deferred_pages, &l);
|
|
up_read(&init_mm.mmap_sem);
|
|
|
|
- flush_map((dpage && !dpage->lru.next) ? (unsigned long)page_address(dpage) : 0);
|
|
- while (dpage) {
|
|
- struct page *tmp = dpage;
|
|
- dpage = (struct page *)dpage->lru.next;
|
|
- ClearPagePrivate(tmp);
|
|
- __free_page(tmp);
|
|
+ flush_map(&l);
|
|
+
|
|
+ list_for_each_entry_safe(pg, next, &l, lru) {
|
|
+ ClearPagePrivate(pg);
|
|
+ __free_page(pg);
|
|
}
|
|
}
|
|
|
|
--- head-2010-05-25.orig/drivers/pci/msi-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/drivers/pci/msi-xen.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -273,10 +273,8 @@ void disable_msi_mode(struct pci_dev *de
|
|
pci_write_config_word(dev, msi_control_reg(pos), control);
|
|
dev->msix_enabled = 0;
|
|
}
|
|
- if (pci_find_capability(dev, PCI_CAP_ID_EXP)) {
|
|
- /* PCI Express Endpoint device detected */
|
|
- pci_intx(dev, 1); /* enable intx */
|
|
- }
|
|
+
|
|
+ pci_intx(dev, 1); /* enable intx */
|
|
}
|
|
|
|
static void enable_msi_mode(struct pci_dev *dev, int pos, int type)
|
|
@@ -294,10 +292,8 @@ static void enable_msi_mode(struct pci_d
|
|
pci_write_config_word(dev, msi_control_reg(pos), control);
|
|
dev->msix_enabled = 1;
|
|
}
|
|
- if (pci_find_capability(dev, PCI_CAP_ID_EXP)) {
|
|
- /* PCI Express Endpoint device detected */
|
|
- pci_intx(dev, 0); /* disable intx */
|
|
- }
|
|
+
|
|
+ pci_intx(dev, 0); /* disable intx */
|
|
}
|
|
|
|
#ifdef CONFIG_PM
|
|
--- head-2010-05-25.orig/drivers/xen/balloon/balloon.c 2010-04-15 09:52:32.000000000 +0200
|
|
+++ head-2010-05-25/drivers/xen/balloon/balloon.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -106,8 +106,8 @@ static unsigned long __read_mostly total
|
|
static LIST_HEAD(ballooned_pages);
|
|
|
|
/* Main work function, always executed in process context. */
|
|
-static void balloon_process(void *unused);
|
|
-static DECLARE_WORK(balloon_worker, balloon_process, NULL);
|
|
+static void balloon_process(struct work_struct *unused);
|
|
+static DECLARE_WORK(balloon_worker, balloon_process);
|
|
static struct timer_list balloon_timer;
|
|
|
|
/* When ballooning out (allocating memory to return to Xen) we don't really
|
|
@@ -414,7 +414,7 @@ static int decrease_reservation(unsigned
|
|
* by the balloon lock), or with changes to the Xen hard limit, but we will
|
|
* recover from these in time.
|
|
*/
|
|
-static void balloon_process(void *unused)
|
|
+static void balloon_process(struct work_struct *unused)
|
|
{
|
|
int need_sleep = 0;
|
|
long credit;
|
|
--- head-2010-05-25.orig/drivers/xen/blkback/blkback.c 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/blkback/blkback.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -37,6 +37,7 @@
|
|
|
|
#include <linux/spinlock.h>
|
|
#include <linux/kthread.h>
|
|
+#include <linux/freezer.h>
|
|
#include <linux/list.h>
|
|
#include <linux/delay.h>
|
|
#include <xen/balloon.h>
|
|
--- head-2010-05-25.orig/drivers/xen/blkback/interface.c 2010-01-04 11:56:34.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/blkback/interface.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -35,7 +35,7 @@
|
|
#include <linux/kthread.h>
|
|
#include <linux/delay.h>
|
|
|
|
-static kmem_cache_t *blkif_cachep;
|
|
+static struct kmem_cache *blkif_cachep;
|
|
|
|
blkif_t *blkif_alloc(domid_t domid)
|
|
{
|
|
--- head-2010-05-25.orig/drivers/xen/blkfront/blkfront.c 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/blkfront/blkfront.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -71,7 +71,7 @@ static int setup_blkring(struct xenbus_d
|
|
static void kick_pending_request_queues(struct blkfront_info *);
|
|
|
|
static irqreturn_t blkif_int(int irq, void *dev_id);
|
|
-static void blkif_restart_queue(void *arg);
|
|
+static void blkif_restart_queue(struct work_struct *arg);
|
|
static void blkif_recover(struct blkfront_info *);
|
|
static void blkif_completion(struct blk_shadow *);
|
|
static void blkif_free(struct blkfront_info *, int);
|
|
@@ -111,7 +111,7 @@ static int blkfront_probe(struct xenbus_
|
|
info->xbdev = dev;
|
|
info->vdevice = vdevice;
|
|
info->connected = BLKIF_STATE_DISCONNECTED;
|
|
- INIT_WORK(&info->work, blkif_restart_queue, (void *)info);
|
|
+ INIT_WORK(&info->work, blkif_restart_queue);
|
|
|
|
for (i = 0; i < BLK_RING_SIZE; i++)
|
|
info->shadow[i].req.id = i+1;
|
|
@@ -482,9 +482,9 @@ static void kick_pending_request_queues(
|
|
}
|
|
}
|
|
|
|
-static void blkif_restart_queue(void *arg)
|
|
+static void blkif_restart_queue(struct work_struct *arg)
|
|
{
|
|
- struct blkfront_info *info = (struct blkfront_info *)arg;
|
|
+ struct blkfront_info *info = container_of(arg, struct blkfront_info, work);
|
|
spin_lock_irq(&blkif_io_lock);
|
|
if (info->connected == BLKIF_STATE_CONNECTED)
|
|
kick_pending_request_queues(info);
|
|
--- head-2010-05-25.orig/drivers/xen/blktap/blktap.c 2010-04-29 09:43:21.000000000 +0200
|
|
+++ head-2010-05-25/drivers/xen/blktap/blktap.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -40,6 +40,7 @@
|
|
|
|
#include <linux/spinlock.h>
|
|
#include <linux/kthread.h>
|
|
+#include <linux/freezer.h>
|
|
#include <linux/list.h>
|
|
#include <asm/hypervisor.h>
|
|
#include "common.h"
|
|
--- head-2010-05-25.orig/drivers/xen/blktap/interface.c 2010-01-04 11:56:34.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/blktap/interface.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -35,7 +35,7 @@
|
|
#include <xen/evtchn.h>
|
|
#include <linux/delay.h>
|
|
|
|
-static kmem_cache_t *blkif_cachep;
|
|
+static struct kmem_cache *blkif_cachep;
|
|
|
|
blkif_t *tap_alloc_blkif(domid_t domid)
|
|
{
|
|
--- head-2010-05-25.orig/drivers/xen/char/mem.c 2007-08-06 15:10:49.000000000 +0200
|
|
+++ head-2010-05-25/drivers/xen/char/mem.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -157,7 +157,7 @@ static loff_t memory_lseek(struct file *
|
|
{
|
|
loff_t ret;
|
|
|
|
- mutex_lock(&file->f_dentry->d_inode->i_mutex);
|
|
+ mutex_lock(&file->f_path.dentry->d_inode->i_mutex);
|
|
switch (orig) {
|
|
case 0:
|
|
file->f_pos = offset;
|
|
@@ -172,7 +172,7 @@ static loff_t memory_lseek(struct file *
|
|
default:
|
|
ret = -EINVAL;
|
|
}
|
|
- mutex_unlock(&file->f_dentry->d_inode->i_mutex);
|
|
+ mutex_unlock(&file->f_path.dentry->d_inode->i_mutex);
|
|
return ret;
|
|
}
|
|
|
|
--- head-2010-05-25.orig/drivers/xen/console/console.c 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/console/console.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -85,11 +85,6 @@ static int xc_num = -1;
|
|
#define XEN_HVC_MAJOR 229
|
|
#define XEN_HVC_MINOR 0
|
|
|
|
-#ifdef CONFIG_MAGIC_SYSRQ
|
|
-static unsigned long sysrq_requested;
|
|
-extern int sysrq_enabled;
|
|
-#endif
|
|
-
|
|
static int __init xencons_setup(char *str)
|
|
{
|
|
char *q;
|
|
@@ -354,8 +349,8 @@ void __init dom0_init_screen_info(const
|
|
#define DUMMY_TTY(_tty) ((xc_mode == XC_TTY) && \
|
|
((_tty)->index != (xc_num - 1)))
|
|
|
|
-static struct termios *xencons_termios[MAX_NR_CONSOLES];
|
|
-static struct termios *xencons_termios_locked[MAX_NR_CONSOLES];
|
|
+static struct ktermios *xencons_termios[MAX_NR_CONSOLES];
|
|
+static struct ktermios *xencons_termios_locked[MAX_NR_CONSOLES];
|
|
static struct tty_struct *xencons_tty;
|
|
static int xencons_priv_irq;
|
|
static char x_char;
|
|
@@ -371,7 +366,9 @@ void xencons_rx(char *buf, unsigned len)
|
|
|
|
for (i = 0; i < len; i++) {
|
|
#ifdef CONFIG_MAGIC_SYSRQ
|
|
- if (sysrq_enabled) {
|
|
+ if (sysrq_on()) {
|
|
+ static unsigned long sysrq_requested;
|
|
+
|
|
if (buf[i] == '\x0f') { /* ^O */
|
|
if (!sysrq_requested) {
|
|
sysrq_requested = jiffies;
|
|
--- head-2010-05-25.orig/drivers/xen/core/reboot.c 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/core/reboot.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -34,8 +34,8 @@ static int suspend_cancelled;
|
|
/* Can we leave APs online when we suspend? */
|
|
static int fast_suspend;
|
|
|
|
-static void __shutdown_handler(void *unused);
|
|
-static DECLARE_WORK(shutdown_work, __shutdown_handler, NULL);
|
|
+static void __shutdown_handler(struct work_struct *unused);
|
|
+static DECLARE_DELAYED_WORK(shutdown_work, __shutdown_handler);
|
|
|
|
static int setup_suspend_evtchn(void);
|
|
|
|
@@ -105,7 +105,7 @@ static int xen_suspend(void *__unused)
|
|
case SHUTDOWN_RESUMING:
|
|
break;
|
|
default:
|
|
- schedule_work(&shutdown_work);
|
|
+ schedule_delayed_work(&shutdown_work, 0);
|
|
break;
|
|
}
|
|
|
|
@@ -137,12 +137,12 @@ static void switch_shutdown_state(int ne
|
|
|
|
/* Either we kick off the work, or we leave it to xen_suspend(). */
|
|
if (old_state == SHUTDOWN_INVALID)
|
|
- schedule_work(&shutdown_work);
|
|
+ schedule_delayed_work(&shutdown_work, 0);
|
|
else
|
|
BUG_ON(old_state != SHUTDOWN_RESUMING);
|
|
}
|
|
|
|
-static void __shutdown_handler(void *unused)
|
|
+static void __shutdown_handler(struct work_struct *unused)
|
|
{
|
|
int err;
|
|
|
|
--- head-2010-05-25.orig/drivers/xen/core/smpboot.c 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/core/smpboot.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -160,7 +160,12 @@ static void xen_smp_intr_exit(unsigned i
|
|
|
|
void __cpuinit cpu_bringup(void)
|
|
{
|
|
+#ifdef __i386__
|
|
+ cpu_set_gdt(current_thread_info()->cpu);
|
|
+ secondary_cpu_init();
|
|
+#else
|
|
cpu_init();
|
|
+#endif
|
|
identify_cpu(cpu_data + smp_processor_id());
|
|
touch_softlockup_watchdog();
|
|
preempt_disable();
|
|
@@ -299,11 +304,12 @@ void __init smp_prepare_cpus(unsigned in
|
|
if (cpu == 0)
|
|
continue;
|
|
|
|
+ idle = fork_idle(cpu);
|
|
+ if (IS_ERR(idle))
|
|
+ panic("failed fork for CPU %d", cpu);
|
|
+
|
|
#ifdef __x86_64__
|
|
gdt_descr = &cpu_gdt_descr[cpu];
|
|
-#else
|
|
- gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
|
|
-#endif
|
|
gdt_descr->address = get_zeroed_page(GFP_KERNEL);
|
|
if (unlikely(!gdt_descr->address)) {
|
|
printk(KERN_CRIT "CPU%d failed to allocate GDT\n",
|
|
@@ -312,6 +318,11 @@ void __init smp_prepare_cpus(unsigned in
|
|
}
|
|
gdt_descr->size = GDT_SIZE;
|
|
memcpy((void *)gdt_descr->address, cpu_gdt_table, GDT_SIZE);
|
|
+#else
|
|
+ if (unlikely(!init_gdt(cpu, idle)))
|
|
+ continue;
|
|
+ gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
|
|
+#endif
|
|
make_page_readonly(
|
|
(void *)gdt_descr->address,
|
|
XENFEAT_writable_descriptor_tables);
|
|
@@ -331,10 +342,6 @@ void __init smp_prepare_cpus(unsigned in
|
|
cpu_2_logical_apicid[cpu] = apicid;
|
|
x86_cpu_to_apicid[cpu] = apicid;
|
|
|
|
- idle = fork_idle(cpu);
|
|
- if (IS_ERR(idle))
|
|
- panic("failed fork for CPU %d", cpu);
|
|
-
|
|
#ifdef __x86_64__
|
|
cpu_pda(cpu)->pcurrent = idle;
|
|
cpu_pda(cpu)->cpunumber = cpu;
|
|
--- head-2010-05-25.orig/drivers/xen/fbfront/xenfb.c 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/fbfront/xenfb.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -25,6 +25,7 @@
|
|
#include <linux/vmalloc.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/mutex.h>
|
|
+#include <linux/freezer.h>
|
|
#include <asm/hypervisor.h>
|
|
#include <xen/evtchn.h>
|
|
#include <xen/interface/io/fbif.h>
|
|
--- head-2010-05-25.orig/drivers/xen/netback/loopback.c 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/netback/loopback.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -54,6 +54,7 @@
|
|
#include <net/dst.h>
|
|
#include <net/xfrm.h> /* secpath_reset() */
|
|
#include <asm/hypervisor.h> /* is_initial_xendomain() */
|
|
+#include <../net/core/kmap_skb.h> /* k{,un}map_skb_frag() */
|
|
|
|
static int nloopbacks = -1;
|
|
module_param(nloopbacks, int, 0);
|
|
--- head-2010-05-25.orig/drivers/xen/pciback/conf_space_header.c 2010-03-02 09:56:10.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/pciback/conf_space_header.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -24,7 +24,7 @@ static int command_read(struct pci_dev *
|
|
int ret;
|
|
|
|
ret = pciback_read_config_word(dev, offset, value, data);
|
|
- if (!dev->is_enabled)
|
|
+ if (!atomic_read(&dev->enable_cnt))
|
|
return ret;
|
|
|
|
for (i = 0; i < PCI_ROM_RESOURCE; i++) {
|
|
@@ -41,14 +41,14 @@ static int command_write(struct pci_dev
|
|
{
|
|
int err;
|
|
|
|
- if (!dev->is_enabled && is_enable_cmd(value)) {
|
|
+ if (!atomic_read(&dev->enable_cnt) && is_enable_cmd(value)) {
|
|
if (unlikely(verbose_request))
|
|
printk(KERN_DEBUG "pciback: %s: enable\n",
|
|
pci_name(dev));
|
|
err = pci_enable_device(dev);
|
|
if (err)
|
|
return err;
|
|
- } else if (dev->is_enabled && !is_enable_cmd(value)) {
|
|
+ } else if (atomic_read(&dev->enable_cnt) && !is_enable_cmd(value)) {
|
|
if (unlikely(verbose_request))
|
|
printk(KERN_DEBUG "pciback: %s: disable\n",
|
|
pci_name(dev));
|
|
--- head-2010-05-25.orig/drivers/xen/pciback/pciback.h 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/pciback/pciback.h 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -100,7 +100,7 @@ void pciback_release_devices(struct pcib
|
|
|
|
/* Handles events from front-end */
|
|
irqreturn_t pciback_handle_event(int irq, void *dev_id);
|
|
-void pciback_do_op(void *data);
|
|
+void pciback_do_op(struct work_struct *work);
|
|
|
|
int pciback_xenbus_register(void);
|
|
void pciback_xenbus_unregister(void);
|
|
--- head-2010-05-25.orig/drivers/xen/pciback/pciback_ops.c 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/pciback/pciback_ops.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -26,7 +26,7 @@ void pciback_reset_device(struct pci_dev
|
|
|
|
pci_write_config_word(dev, PCI_COMMAND, 0);
|
|
|
|
- dev->is_enabled = 0;
|
|
+ atomic_set(&dev->enable_cnt, 0);
|
|
dev->is_busmaster = 0;
|
|
} else {
|
|
pci_read_config_word(dev, PCI_COMMAND, &cmd);
|
|
@@ -67,9 +67,9 @@ void test_and_schedule_op(struct pciback
|
|
* context because some of the pci_* functions can sleep (mostly due to ACPI
|
|
* use of semaphores). This function is intended to be called from a work
|
|
* queue in process context taking a struct pciback_device as a parameter */
|
|
-void pciback_do_op(void *data)
|
|
+void pciback_do_op(struct work_struct *work)
|
|
{
|
|
- struct pciback_device *pdev = data;
|
|
+ struct pciback_device *pdev = container_of(work, struct pciback_device, op_work);
|
|
struct pci_dev *dev;
|
|
struct xen_pci_op *op = &pdev->sh_info->op;
|
|
|
|
--- head-2010-05-25.orig/drivers/xen/pciback/xenbus.c 2009-04-07 13:58:48.000000000 +0200
|
|
+++ head-2010-05-25/drivers/xen/pciback/xenbus.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -33,7 +33,7 @@ static struct pciback_device *alloc_pdev
|
|
pdev->evtchn_irq = INVALID_EVTCHN_IRQ;
|
|
pdev->be_watching = 0;
|
|
|
|
- INIT_WORK(&pdev->op_work, pciback_do_op, pdev);
|
|
+ INIT_WORK(&pdev->op_work, pciback_do_op);
|
|
|
|
if (pciback_init_devices(pdev)) {
|
|
kfree(pdev);
|
|
--- head-2010-05-25.orig/drivers/xen/pcifront/pci_op.c 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/pcifront/pci_op.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -636,9 +636,9 @@ static pci_ers_result_t pcifront_common_
|
|
}
|
|
|
|
|
|
-void pcifront_do_aer(void *data)
|
|
+void pcifront_do_aer(struct work_struct *data)
|
|
{
|
|
- struct pcifront_device *pdev = data;
|
|
+ struct pcifront_device *pdev = container_of(data, struct pcifront_device, op_work);
|
|
int cmd = pdev->sh_info->aer_op.cmd;
|
|
pci_channel_state_t state =
|
|
(pci_channel_state_t)pdev->sh_info->aer_op.err;
|
|
--- head-2010-05-25.orig/drivers/xen/pcifront/pcifront.h 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/pcifront/pcifront.h 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -48,7 +48,7 @@ int pcifront_rescan_root(struct pcifront
|
|
unsigned int domain, unsigned int bus);
|
|
void pcifront_free_roots(struct pcifront_device *pdev);
|
|
|
|
-void pcifront_do_aer( void *data);
|
|
+void pcifront_do_aer(struct work_struct *data);
|
|
|
|
irqreturn_t pcifront_handler_aer(int irq, void *dev);
|
|
|
|
--- head-2010-05-25.orig/drivers/xen/pcifront/xenbus.c 2009-04-07 13:58:48.000000000 +0200
|
|
+++ head-2010-05-25/drivers/xen/pcifront/xenbus.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -49,7 +49,7 @@ static struct pcifront_device *alloc_pde
|
|
pdev->evtchn = INVALID_EVTCHN;
|
|
pdev->gnt_ref = INVALID_GRANT_REF;
|
|
|
|
- INIT_WORK(&pdev->op_work, pcifront_do_aer, pdev);
|
|
+ INIT_WORK(&pdev->op_work, pcifront_do_aer);
|
|
|
|
dev_dbg(&xdev->dev, "Allocated pdev @ 0x%p pdev->sh_info @ 0x%p\n",
|
|
pdev, pdev->sh_info);
|
|
--- head-2010-05-25.orig/drivers/xen/scsiback/interface.c 2010-01-04 11:56:34.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/scsiback/interface.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -40,7 +40,7 @@
|
|
#include <linux/delay.h>
|
|
|
|
|
|
-static kmem_cache_t *scsiback_cachep;
|
|
+static struct kmem_cache *scsiback_cachep;
|
|
|
|
struct vscsibk_info *vscsibk_info_alloc(domid_t domid)
|
|
{
|
|
--- head-2010-05-25.orig/drivers/xen/scsiback/scsiback.c 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/scsiback/scsiback.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -349,13 +349,11 @@ static int scsiback_merge_bio(struct req
|
|
|
|
if (!rq->bio)
|
|
blk_rq_bio_prep(q, rq, bio);
|
|
- else if (!q->back_merge_fn(q, rq, bio))
|
|
+ else if (!ll_back_merge_fn(q, rq, bio))
|
|
return -EINVAL;
|
|
else {
|
|
rq->biotail->bi_next = bio;
|
|
rq->biotail = bio;
|
|
- rq->hard_nr_sectors += bio_sectors(bio);
|
|
- rq->nr_sectors = rq->hard_nr_sectors;
|
|
}
|
|
|
|
return 0;
|
|
--- head-2010-05-25.orig/drivers/xen/sfc_netfront/accel_vi.c 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/sfc_netfront/accel_vi.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -465,7 +465,7 @@ netfront_accel_enqueue_skb_multi(netfron
|
|
|
|
if (skb->ip_summed == CHECKSUM_PARTIAL) {
|
|
/* Set to zero to encourage falcon to work it out for us */
|
|
- *(u16*)(skb->h.raw + skb->csum) = 0;
|
|
+ *(u16*)(skb->h.raw + skb->csum_offset) = 0;
|
|
}
|
|
|
|
if (multi_post_start_new_buffer(vnic, &state)) {
|
|
@@ -584,7 +584,7 @@ netfront_accel_enqueue_skb_single(netfro
|
|
|
|
if (skb->ip_summed == CHECKSUM_PARTIAL) {
|
|
/* Set to zero to encourage falcon to work it out for us */
|
|
- *(u16*)(skb->h.raw + skb->csum) = 0;
|
|
+ *(u16*)(skb->h.raw + skb->csum_offset) = 0;
|
|
}
|
|
NETFRONT_ACCEL_PKTBUFF_FOR_EACH_FRAGMENT
|
|
(skb, idx, frag_data, frag_len, {
|
|
--- head-2010-05-25.orig/drivers/xen/tpmback/interface.c 2010-01-04 11:56:34.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/tpmback/interface.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -16,7 +16,7 @@
|
|
#include <xen/balloon.h>
|
|
#include <xen/gnttab.h>
|
|
|
|
-static kmem_cache_t *tpmif_cachep;
|
|
+static struct kmem_cache *tpmif_cachep;
|
|
int num_frontends = 0;
|
|
|
|
LIST_HEAD(tpmif_list);
|
|
--- head-2010-05-25.orig/drivers/xen/usbback/usbback.c 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/usbback/usbback.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -540,9 +540,10 @@ struct set_interface_request {
|
|
struct work_struct work;
|
|
};
|
|
|
|
-static void usbbk_set_interface_work(void *data)
|
|
+static void usbbk_set_interface_work(struct work_struct *arg)
|
|
{
|
|
- struct set_interface_request *req = (struct set_interface_request *) data;
|
|
+ struct set_interface_request *req
|
|
+ = container_of(arg, struct set_interface_request, work);
|
|
pending_req_t *pending_req = req->pending_req;
|
|
struct usb_device *udev = req->pending_req->stub->udev;
|
|
|
|
@@ -570,7 +571,7 @@ static int usbbk_set_interface(pending_r
|
|
req->pending_req = pending_req;
|
|
req->interface = interface;
|
|
req->alternate = alternate;
|
|
- INIT_WORK(&req->work, usbbk_set_interface_work, req);
|
|
+ INIT_WORK(&req->work, usbbk_set_interface_work);
|
|
usb_get_dev(udev);
|
|
schedule_work(&req->work);
|
|
return 0;
|
|
@@ -582,9 +583,10 @@ struct clear_halt_request {
|
|
struct work_struct work;
|
|
};
|
|
|
|
-static void usbbk_clear_halt_work(void *data)
|
|
+static void usbbk_clear_halt_work(struct work_struct *arg)
|
|
{
|
|
- struct clear_halt_request *req = (struct clear_halt_request *) data;
|
|
+ struct clear_halt_request *req
|
|
+ = container_of(arg, struct clear_halt_request, work);
|
|
pending_req_t *pending_req = req->pending_req;
|
|
struct usb_device *udev = req->pending_req->stub->udev;
|
|
int ret;
|
|
@@ -610,7 +612,7 @@ static int usbbk_clear_halt(pending_req_
|
|
return -ENOMEM;
|
|
req->pending_req = pending_req;
|
|
req->pipe = pipe;
|
|
- INIT_WORK(&req->work, usbbk_clear_halt_work, req);
|
|
+ INIT_WORK(&req->work, usbbk_clear_halt_work);
|
|
|
|
usb_get_dev(udev);
|
|
schedule_work(&req->work);
|
|
@@ -623,9 +625,10 @@ struct port_reset_request {
|
|
struct work_struct work;
|
|
};
|
|
|
|
-static void usbbk_port_reset_work(void *data)
|
|
+static void usbbk_port_reset_work(struct work_struct *arg)
|
|
{
|
|
- struct port_reset_request *req = (struct port_reset_request *) data;
|
|
+ struct port_reset_request *req
|
|
+ = container_of(arg, struct port_reset_request, work);
|
|
pending_req_t *pending_req = req->pending_req;
|
|
struct usb_device *udev = pending_req->stub->udev;
|
|
int ret, ret_lock;
|
|
@@ -654,7 +657,7 @@ static int usbbk_port_reset(pending_req_
|
|
return -ENOMEM;
|
|
|
|
req->pending_req = pending_req;
|
|
- INIT_WORK(&req->work, usbbk_port_reset_work, req);
|
|
+ INIT_WORK(&req->work, usbbk_port_reset_work);
|
|
|
|
usb_get_dev(udev);
|
|
schedule_work(&req->work);
|
|
--- head-2010-05-25.orig/drivers/xen/xenbus/xenbus_comms.c 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/xenbus/xenbus_comms.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -49,8 +49,8 @@
|
|
|
|
static int xenbus_irq;
|
|
|
|
-extern void xenbus_probe(void *);
|
|
-static DECLARE_WORK(probe_work, xenbus_probe, NULL);
|
|
+extern void xenbus_probe(struct work_struct *);
|
|
+static DECLARE_WORK(probe_work, xenbus_probe);
|
|
|
|
static DECLARE_WAIT_QUEUE_HEAD(xb_waitq);
|
|
|
|
--- head-2010-05-25.orig/drivers/xen/xenbus/xenbus_probe.c 2010-01-26 09:08:16.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/xenbus/xenbus_probe.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -856,7 +856,7 @@ void unregister_xenstore_notifier(struct
|
|
EXPORT_SYMBOL_GPL(unregister_xenstore_notifier);
|
|
|
|
|
|
-void xenbus_probe(void *unused)
|
|
+void xenbus_probe(struct work_struct *unused)
|
|
{
|
|
BUG_ON(!is_xenstored_ready());
|
|
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/desc_32.h 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/desc_32.h 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -4,8 +4,6 @@
|
|
#include <asm/ldt.h>
|
|
#include <asm/segment.h>
|
|
|
|
-#define CPU_16BIT_STACK_SIZE 1024
|
|
-
|
|
#ifndef __ASSEMBLY__
|
|
|
|
#include <linux/preempt.h>
|
|
@@ -15,8 +13,6 @@
|
|
|
|
extern struct desc_struct cpu_gdt_table[GDT_ENTRIES];
|
|
|
|
-DECLARE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
|
|
-
|
|
struct Xgt_desc_struct {
|
|
unsigned short size;
|
|
unsigned long address __attribute__((packed));
|
|
@@ -32,11 +28,6 @@ static inline struct desc_struct *get_cp
|
|
return (struct desc_struct *)per_cpu(cpu_gdt_descr, cpu).address;
|
|
}
|
|
|
|
-/*
|
|
- * This is the ldt that every process will get unless we need
|
|
- * something other than this.
|
|
- */
|
|
-extern struct desc_struct default_ldt[];
|
|
extern struct desc_struct idt_table[];
|
|
extern void set_intr_gate(unsigned int irq, void * addr);
|
|
|
|
@@ -63,8 +54,8 @@ static inline void pack_gate(__u32 *a, _
|
|
#define DESCTYPE_DPL3 0x60 /* DPL-3 */
|
|
#define DESCTYPE_S 0x10 /* !system */
|
|
|
|
+#ifndef CONFIG_XEN
|
|
#define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8))
|
|
-#define load_LDT_desc() __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8))
|
|
|
|
#define load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr))
|
|
#define load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr))
|
|
@@ -75,6 +66,7 @@ static inline void pack_gate(__u32 *a, _
|
|
#define store_idt(dtr) __asm__ ("sidt %0":"=m" (*dtr))
|
|
#define store_tr(tr) __asm__ ("str %0":"=m" (tr))
|
|
#define store_ldt(ldt) __asm__ ("sldt %0":"=m" (ldt))
|
|
+#endif
|
|
|
|
#if TLS_SIZE != 24
|
|
# error update this code.
|
|
@@ -90,22 +82,43 @@ static inline void load_TLS(struct threa
|
|
}
|
|
|
|
#ifndef CONFIG_XEN
|
|
+#define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
|
|
+#define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
|
|
+#define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
|
|
+
|
|
static inline void write_dt_entry(void *dt, int entry, __u32 entry_a, __u32 entry_b)
|
|
{
|
|
__u32 *lp = (__u32 *)((char *)dt + entry*8);
|
|
*lp = entry_a;
|
|
*(lp+1) = entry_b;
|
|
}
|
|
-
|
|
-#define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
|
|
-#define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
|
|
+#define set_ldt native_set_ldt
|
|
#else
|
|
extern int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b);
|
|
extern int write_gdt_entry(void *gdt, int entry, __u32 entry_a, __u32 entry_b);
|
|
+#define set_ldt xen_set_ldt
|
|
+#endif
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+static inline fastcall void native_set_ldt(const void *addr,
|
|
+ unsigned int entries)
|
|
+{
|
|
+ if (likely(entries == 0))
|
|
+ __asm__ __volatile__("lldt %w0"::"q" (0));
|
|
+ else {
|
|
+ unsigned cpu = smp_processor_id();
|
|
+ __u32 a, b;
|
|
+
|
|
+ pack_descriptor(&a, &b, (unsigned long)addr,
|
|
+ entries * sizeof(struct desc_struct) - 1,
|
|
+ DESCTYPE_LDT, 0);
|
|
+ write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, a, b);
|
|
+ __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
|
|
+ }
|
|
+}
|
|
#endif
|
|
-#ifndef CONFIG_X86_NO_IDT
|
|
-#define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
|
|
|
|
+#ifndef CONFIG_X86_NO_IDT
|
|
static inline void _set_gate(int gate, unsigned int type, void *addr, unsigned short seg)
|
|
{
|
|
__u32 a, b;
|
|
@@ -125,14 +138,6 @@ static inline void __set_tss_desc(unsign
|
|
}
|
|
#endif
|
|
|
|
-static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int entries)
|
|
-{
|
|
- __u32 a, b;
|
|
- pack_descriptor(&a, &b, (unsigned long)addr,
|
|
- entries * sizeof(struct desc_struct) - 1,
|
|
- DESCTYPE_LDT, 0);
|
|
- write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, a, b);
|
|
-}
|
|
|
|
#define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr)
|
|
|
|
@@ -163,36 +168,22 @@ static inline void set_ldt_desc(unsigned
|
|
|
|
static inline void clear_LDT(void)
|
|
{
|
|
- int cpu = get_cpu();
|
|
-
|
|
- /*
|
|
- * NB. We load the default_ldt for lcall7/27 handling on demand, as
|
|
- * it slows down context switching. Noone uses it anyway.
|
|
- */
|
|
- cpu = cpu; /* XXX avoid compiler warning */
|
|
- xen_set_ldt(NULL, 0);
|
|
- put_cpu();
|
|
+ set_ldt(NULL, 0);
|
|
}
|
|
|
|
/*
|
|
* load one particular LDT into the current CPU
|
|
*/
|
|
-static inline void load_LDT_nolock(mm_context_t *pc, int cpu)
|
|
+static inline void load_LDT_nolock(mm_context_t *pc)
|
|
{
|
|
- void *segments = pc->ldt;
|
|
- int count = pc->size;
|
|
-
|
|
- if (likely(!count))
|
|
- segments = NULL;
|
|
-
|
|
- xen_set_ldt(segments, count);
|
|
+ set_ldt(pc->ldt, pc->size);
|
|
}
|
|
|
|
static inline void load_LDT(mm_context_t *pc)
|
|
{
|
|
- int cpu = get_cpu();
|
|
- load_LDT_nolock(pc, cpu);
|
|
- put_cpu();
|
|
+ preempt_disable();
|
|
+ load_LDT_nolock(pc);
|
|
+ preempt_enable();
|
|
}
|
|
|
|
static inline unsigned long get_desc_base(unsigned long *desc)
|
|
@@ -204,6 +195,29 @@ static inline unsigned long get_desc_bas
|
|
return base;
|
|
}
|
|
|
|
+#else /* __ASSEMBLY__ */
|
|
+
|
|
+/*
|
|
+ * GET_DESC_BASE reads the descriptor base of the specified segment.
|
|
+ *
|
|
+ * Args:
|
|
+ * idx - descriptor index
|
|
+ * gdt - GDT pointer
|
|
+ * base - 32bit register to which the base will be written
|
|
+ * lo_w - lo word of the "base" register
|
|
+ * lo_b - lo byte of the "base" register
|
|
+ * hi_b - hi byte of the low word of the "base" register
|
|
+ *
|
|
+ * Example:
|
|
+ * GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah)
|
|
+ * Will read the base address of GDT_ENTRY_ESPFIX_SS and put it into %eax.
|
|
+ */
|
|
+#define GET_DESC_BASE(idx, gdt, base, lo_w, lo_b, hi_b) \
|
|
+ movb idx*8+4(gdt), lo_b; \
|
|
+ movb idx*8+7(gdt), hi_b; \
|
|
+ shll $16, base; \
|
|
+ movw idx*8+2(gdt), lo_w;
|
|
+
|
|
#endif /* !__ASSEMBLY__ */
|
|
|
|
#endif
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/fixmap_32.h 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/fixmap_32.h 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -13,13 +13,16 @@
|
|
#ifndef _ASM_FIXMAP_H
|
|
#define _ASM_FIXMAP_H
|
|
|
|
-
|
|
/* used by vmalloc.c, vsyscall.lds.S.
|
|
*
|
|
* Leave one empty page between vmalloc'ed areas and
|
|
* the start of the fixmap.
|
|
*/
|
|
extern unsigned long __FIXADDR_TOP;
|
|
+#ifdef CONFIG_COMPAT_VDSO
|
|
+#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO)
|
|
+#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1)
|
|
+#endif
|
|
|
|
#ifndef __ASSEMBLY__
|
|
#include <linux/kernel.h>
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/highmem.h 2008-10-29 09:55:56.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/highmem.h 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -85,7 +85,7 @@ static inline void clear_user_highpage(s
|
|
|
|
void copy_highpage(struct page *to, struct page *from);
|
|
static inline void copy_user_highpage(struct page *to, struct page *from,
|
|
- unsigned long vaddr)
|
|
+ unsigned long vaddr, struct vm_area_struct *vma)
|
|
{
|
|
copy_highpage(to, from);
|
|
}
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/hypervisor.h 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/hypervisor.h 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -47,15 +47,6 @@
|
|
#include <asm/percpu.h>
|
|
#include <asm/ptrace.h>
|
|
#include <asm/page.h>
|
|
-#if defined(__i386__)
|
|
-# ifdef CONFIG_X86_PAE
|
|
-# include <asm-generic/pgtable-nopud.h>
|
|
-# else
|
|
-# include <asm-generic/pgtable-nopmd.h>
|
|
-# endif
|
|
-#elif defined(__x86_64__) && LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11)
|
|
-# include <asm-generic/pgtable-nopud.h>
|
|
-#endif
|
|
|
|
extern shared_info_t *HYPERVISOR_shared_info;
|
|
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/irqflags_32.h 2007-06-12 13:14:02.000000000 +0200
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/irqflags_32.h 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -22,9 +22,6 @@
|
|
|
|
#define __raw_local_save_flags() (current_vcpu_info()->evtchn_upcall_mask)
|
|
|
|
-#define raw_local_save_flags(flags) \
|
|
- do { (flags) = __raw_local_save_flags(); } while (0)
|
|
-
|
|
#define raw_local_irq_restore(x) \
|
|
do { \
|
|
vcpu_info_t *_vcpu; \
|
|
@@ -66,18 +63,6 @@ void raw_safe_halt(void);
|
|
*/
|
|
void halt(void);
|
|
|
|
-static inline int raw_irqs_disabled_flags(unsigned long flags)
|
|
-{
|
|
- return (flags != 0);
|
|
-}
|
|
-
|
|
-#define raw_irqs_disabled() \
|
|
-({ \
|
|
- unsigned long flags = __raw_local_save_flags(); \
|
|
- \
|
|
- raw_irqs_disabled_flags(flags); \
|
|
-})
|
|
-
|
|
/*
|
|
* For spinlocks, etc:
|
|
*/
|
|
@@ -90,9 +75,64 @@ static inline int raw_irqs_disabled_flag
|
|
flags; \
|
|
})
|
|
|
|
+#else
|
|
+/* Offsets into shared_info_t. */
|
|
+#define evtchn_upcall_pending /* 0 */
|
|
+#define evtchn_upcall_mask 1
|
|
+
|
|
+#define sizeof_vcpu_shift 6
|
|
+
|
|
+#ifdef CONFIG_SMP
|
|
+#define GET_VCPU_INFO movl TI_cpu(%ebp),%esi ; \
|
|
+ shl $sizeof_vcpu_shift,%esi ; \
|
|
+ addl HYPERVISOR_shared_info,%esi
|
|
+#else
|
|
+#define GET_VCPU_INFO movl HYPERVISOR_shared_info,%esi
|
|
+#endif
|
|
+
|
|
+#define __DISABLE_INTERRUPTS movb $1,evtchn_upcall_mask(%esi)
|
|
+#define __ENABLE_INTERRUPTS movb $0,evtchn_upcall_mask(%esi)
|
|
+#define __TEST_PENDING testb $0xFF,evtchn_upcall_pending(%esi)
|
|
+#define DISABLE_INTERRUPTS(clb) GET_VCPU_INFO ; \
|
|
+ __DISABLE_INTERRUPTS
|
|
+#define ENABLE_INTERRUPTS(clb) GET_VCPU_INFO ; \
|
|
+ __ENABLE_INTERRUPTS
|
|
+#define ENABLE_INTERRUPTS_SYSEXIT __ENABLE_INTERRUPTS ; \
|
|
+sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/ ; \
|
|
+ __TEST_PENDING ; \
|
|
+ jnz 14f /* process more events if necessary... */ ; \
|
|
+ movl PT_ESI(%esp), %esi ; \
|
|
+ sysexit ; \
|
|
+14: __DISABLE_INTERRUPTS ; \
|
|
+ TRACE_IRQS_OFF ; \
|
|
+sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/ ; \
|
|
+ mov $__KERNEL_PDA, %ecx ; \
|
|
+ push %esp ; \
|
|
+ mov %ecx, %gs ; \
|
|
+ call evtchn_do_upcall ; \
|
|
+ add $4,%esp ; \
|
|
+ jmp ret_from_intr
|
|
+#define INTERRUPT_RETURN iret
|
|
+#endif /* __ASSEMBLY__ */
|
|
+
|
|
+#ifndef __ASSEMBLY__
|
|
+#define raw_local_save_flags(flags) \
|
|
+ do { (flags) = __raw_local_save_flags(); } while (0)
|
|
+
|
|
#define raw_local_irq_save(flags) \
|
|
do { (flags) = __raw_local_irq_save(); } while (0)
|
|
|
|
+static inline int raw_irqs_disabled_flags(unsigned long flags)
|
|
+{
|
|
+ return (flags != 0);
|
|
+}
|
|
+
|
|
+#define raw_irqs_disabled() \
|
|
+({ \
|
|
+ unsigned long flags = __raw_local_save_flags(); \
|
|
+ \
|
|
+ raw_irqs_disabled_flags(flags); \
|
|
+})
|
|
#endif /* __ASSEMBLY__ */
|
|
|
|
/*
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/mmu_context_32.h 2007-06-12 13:14:02.000000000 +0200
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/mmu_context_32.h 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -27,14 +27,13 @@ static inline void enter_lazy_tlb(struct
|
|
static inline void __prepare_arch_switch(void)
|
|
{
|
|
/*
|
|
- * Save away %fs and %gs. No need to save %es and %ds, as those
|
|
- * are always kernel segments while inside the kernel. Must
|
|
- * happen before reload of cr3/ldt (i.e., not in __switch_to).
|
|
+ * Save away %fs. No need to save %gs, as it was saved on the
|
|
+ * stack on entry. No need to save %es and %ds, as those are
|
|
+ * always kernel segments while inside the kernel.
|
|
*/
|
|
- asm volatile ( "mov %%fs,%0 ; mov %%gs,%1"
|
|
- : "=m" (current->thread.fs),
|
|
- "=m" (current->thread.gs));
|
|
- asm volatile ( "movl %0,%%fs ; movl %0,%%gs"
|
|
+ asm volatile ( "mov %%fs,%0"
|
|
+ : "=m" (current->thread.fs));
|
|
+ asm volatile ( "movl %0,%%fs"
|
|
: : "r" (0) );
|
|
}
|
|
|
|
@@ -89,14 +88,14 @@ static inline void switch_mm(struct mm_s
|
|
* tlb flush IPI delivery. We must reload %cr3.
|
|
*/
|
|
load_cr3(next->pgd);
|
|
- load_LDT_nolock(&next->context, cpu);
|
|
+ load_LDT_nolock(&next->context);
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
|
|
-#define deactivate_mm(tsk, mm) \
|
|
- asm("movl %0,%%fs ; movl %0,%%gs": :"r" (0))
|
|
+#define deactivate_mm(tsk, mm) \
|
|
+ asm("movl %0,%%fs": :"r" (0));
|
|
|
|
static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
|
|
{
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/pgtable-3level.h 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/pgtable-3level.h 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -1,8 +1,6 @@
|
|
#ifndef _I386_PGTABLE_3LEVEL_H
|
|
#define _I386_PGTABLE_3LEVEL_H
|
|
|
|
-#include <asm-generic/pgtable-nopud.h>
|
|
-
|
|
/*
|
|
* Intel Physical Address Extension (PAE) Mode - three-level page
|
|
* tables on PPro+ CPUs.
|
|
@@ -75,6 +73,23 @@ static inline void set_pte(pte_t *ptep,
|
|
xen_l3_entry_update((pudptr), (pudval))
|
|
|
|
/*
|
|
+ * For PTEs and PDEs, we must clear the P-bit first when clearing a page table
|
|
+ * entry, so clear the bottom half first and enforce ordering with a compiler
|
|
+ * barrier.
|
|
+ */
|
|
+static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
|
|
+{
|
|
+ if ((mm != current->mm && mm != &init_mm)
|
|
+ || HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) {
|
|
+ ptep->pte_low = 0;
|
|
+ smp_wmb();
|
|
+ ptep->pte_high = 0;
|
|
+ }
|
|
+}
|
|
+
|
|
+#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
|
|
+
|
|
+/*
|
|
* Pentium-II erratum A13: in PAE mode we explicitly have to flush
|
|
* the TLB via cr3 if the top-level pgd is changed...
|
|
* We do not let the generic code free and clear pgd entries due to
|
|
@@ -93,45 +108,16 @@ static inline void pud_clear (pud_t * pu
|
|
#define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \
|
|
pmd_index(address))
|
|
|
|
-static inline int pte_none(pte_t pte)
|
|
-{
|
|
- return !(pte.pte_low | pte.pte_high);
|
|
-}
|
|
-
|
|
-/*
|
|
- * For PTEs and PDEs, we must clear the P-bit first when clearing a page table
|
|
- * entry, so clear the bottom half first and enforce ordering with a compiler
|
|
- * barrier.
|
|
- */
|
|
-static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
|
|
+static inline pte_t raw_ptep_get_and_clear(pte_t *ptep, pte_t res)
|
|
{
|
|
- if ((mm != current->mm && mm != &init_mm)
|
|
- || HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) {
|
|
- ptep->pte_low = 0;
|
|
- smp_wmb();
|
|
+ uint64_t val = __pte_val(res);
|
|
+ if (__cmpxchg64(ptep, val, 0) != val) {
|
|
+ /* xchg acts as a barrier before the setting of the high bits */
|
|
+ res.pte_low = xchg(&ptep->pte_low, 0);
|
|
+ res.pte_high = ptep->pte_high;
|
|
ptep->pte_high = 0;
|
|
}
|
|
-}
|
|
-
|
|
-#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
|
|
-
|
|
-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
|
|
-static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
|
|
-{
|
|
- pte_t pte = *ptep;
|
|
- if (!pte_none(pte)) {
|
|
- if ((mm != &init_mm) ||
|
|
- HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) {
|
|
- uint64_t val = __pte_val(pte);
|
|
- if (__cmpxchg64(ptep, val, 0) != val) {
|
|
- /* xchg acts as a barrier before the setting of the high bits */
|
|
- pte.pte_low = xchg(&ptep->pte_low, 0);
|
|
- pte.pte_high = ptep->pte_high;
|
|
- ptep->pte_high = 0;
|
|
- }
|
|
- }
|
|
- }
|
|
- return pte;
|
|
+ return res;
|
|
}
|
|
|
|
#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
|
|
@@ -160,6 +146,11 @@ static inline int pte_same(pte_t a, pte_
|
|
|
|
#define pte_page(x) pfn_to_page(pte_pfn(x))
|
|
|
|
+static inline int pte_none(pte_t pte)
|
|
+{
|
|
+ return !(pte.pte_low | pte.pte_high);
|
|
+}
|
|
+
|
|
#define __pte_mfn(_pte) (((_pte).pte_low >> PAGE_SHIFT) | \
|
|
((_pte).pte_high << (32-PAGE_SHIFT)))
|
|
#define pte_mfn(_pte) ((_pte).pte_low & _PAGE_PRESENT ? \
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/pgtable_32.h 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/pgtable_32.h 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -38,14 +38,14 @@ struct vm_area_struct;
|
|
#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
|
|
extern unsigned long empty_zero_page[1024];
|
|
extern pgd_t *swapper_pg_dir;
|
|
-extern kmem_cache_t *pgd_cache;
|
|
-extern kmem_cache_t *pmd_cache;
|
|
+extern struct kmem_cache *pgd_cache;
|
|
+extern struct kmem_cache *pmd_cache;
|
|
extern spinlock_t pgd_lock;
|
|
extern struct page *pgd_list;
|
|
|
|
-void pmd_ctor(void *, kmem_cache_t *, unsigned long);
|
|
-void pgd_ctor(void *, kmem_cache_t *, unsigned long);
|
|
-void pgd_dtor(void *, kmem_cache_t *, unsigned long);
|
|
+void pmd_ctor(void *, struct kmem_cache *, unsigned long);
|
|
+void pgd_ctor(void *, struct kmem_cache *, unsigned long);
|
|
+void pgd_dtor(void *, struct kmem_cache *, unsigned long);
|
|
void pgtable_cache_init(void);
|
|
void paging_init(void);
|
|
|
|
@@ -276,7 +276,6 @@ static inline pte_t pte_mkhuge(pte_t pte
|
|
#define pte_update(mm, addr, ptep) do { } while (0)
|
|
#define pte_update_defer(mm, addr, ptep) do { } while (0)
|
|
|
|
-
|
|
/*
|
|
* We only update the dirty/accessed state if we set
|
|
* the dirty bit by hand in the kernel, since the hardware
|
|
@@ -342,6 +341,19 @@ do { \
|
|
__young; \
|
|
})
|
|
|
|
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
|
|
+static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
|
|
+{
|
|
+ pte_t pte = *ptep;
|
|
+ if (!pte_none(pte)
|
|
+ && (mm != &init_mm
|
|
+ || HYPERVISOR_update_va_mapping(addr, __pte(0), 0))) {
|
|
+ pte = raw_ptep_get_and_clear(ptep, pte);
|
|
+ pte_update(mm, addr, ptep);
|
|
+ }
|
|
+ return pte;
|
|
+}
|
|
+
|
|
#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
|
|
#define ptep_get_and_clear_full(mm, addr, ptep, full) \
|
|
((full) ? ({ \
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/processor_32.h 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/processor_32.h 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -20,6 +20,7 @@
|
|
#include <linux/threads.h>
|
|
#include <asm/percpu.h>
|
|
#include <linux/cpumask.h>
|
|
+#include <linux/init.h>
|
|
#include <xen/interface/physdev.h>
|
|
|
|
/* flag for disabling the tsc */
|
|
@@ -73,6 +74,7 @@ struct cpuinfo_x86 {
|
|
#endif
|
|
unsigned char x86_max_cores; /* cpuid returned max cores value */
|
|
unsigned char apicid;
|
|
+ unsigned short x86_clflush_size;
|
|
#ifdef CONFIG_SMP
|
|
unsigned char booted_cores; /* number of cores as seen by OS */
|
|
__u8 phys_proc_id; /* Physical processor id. */
|
|
@@ -114,6 +116,8 @@ extern struct cpuinfo_x86 cpu_data[];
|
|
extern int cpu_llc_id[NR_CPUS];
|
|
extern char ignore_fpu_irq;
|
|
|
|
+void __init cpu_detect(struct cpuinfo_x86 *c);
|
|
+
|
|
extern void identify_cpu(struct cpuinfo_x86 *);
|
|
extern void print_cpu_info(struct cpuinfo_x86 *);
|
|
extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
|
|
@@ -146,8 +150,8 @@ static inline void detect_ht(struct cpui
|
|
#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */
|
|
#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */
|
|
|
|
-static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
|
|
- unsigned int *ecx, unsigned int *edx)
|
|
+static inline fastcall void xen_cpuid(unsigned int *eax, unsigned int *ebx,
|
|
+ unsigned int *ecx, unsigned int *edx)
|
|
{
|
|
/* ecx is often an input as well as an output. */
|
|
__asm__(XEN_CPUID
|
|
@@ -158,59 +162,6 @@ static inline void __cpuid(unsigned int
|
|
: "0" (*eax), "2" (*ecx));
|
|
}
|
|
|
|
-/*
|
|
- * Generic CPUID function
|
|
- * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
|
|
- * resulting in stale register contents being returned.
|
|
- */
|
|
-static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx)
|
|
-{
|
|
- *eax = op;
|
|
- *ecx = 0;
|
|
- __cpuid(eax, ebx, ecx, edx);
|
|
-}
|
|
-
|
|
-/* Some CPUID calls want 'count' to be placed in ecx */
|
|
-static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx,
|
|
- int *edx)
|
|
-{
|
|
- *eax = op;
|
|
- *ecx = count;
|
|
- __cpuid(eax, ebx, ecx, edx);
|
|
-}
|
|
-
|
|
-/*
|
|
- * CPUID functions returning a single datum
|
|
- */
|
|
-static inline unsigned int cpuid_eax(unsigned int op)
|
|
-{
|
|
- unsigned int eax, ebx, ecx, edx;
|
|
-
|
|
- cpuid(op, &eax, &ebx, &ecx, &edx);
|
|
- return eax;
|
|
-}
|
|
-static inline unsigned int cpuid_ebx(unsigned int op)
|
|
-{
|
|
- unsigned int eax, ebx, ecx, edx;
|
|
-
|
|
- cpuid(op, &eax, &ebx, &ecx, &edx);
|
|
- return ebx;
|
|
-}
|
|
-static inline unsigned int cpuid_ecx(unsigned int op)
|
|
-{
|
|
- unsigned int eax, ebx, ecx, edx;
|
|
-
|
|
- cpuid(op, &eax, &ebx, &ecx, &edx);
|
|
- return ecx;
|
|
-}
|
|
-static inline unsigned int cpuid_edx(unsigned int op)
|
|
-{
|
|
- unsigned int eax, ebx, ecx, edx;
|
|
-
|
|
- cpuid(op, &eax, &ebx, &ecx, &edx);
|
|
- return edx;
|
|
-}
|
|
-
|
|
#define load_cr3(pgdir) write_cr3(__pa(pgdir))
|
|
|
|
/*
|
|
@@ -480,9 +431,9 @@ struct thread_struct {
|
|
.vm86_info = NULL, \
|
|
.sysenter_cs = __KERNEL_CS, \
|
|
.io_bitmap_ptr = NULL, \
|
|
+ .gs = __KERNEL_PDA, \
|
|
}
|
|
|
|
-#ifndef CONFIG_X86_NO_TSS
|
|
/*
|
|
* Note that the .io_bitmap member must be extra-big. This is because
|
|
* the CPU will access an additional byte beyond the end of the IO
|
|
@@ -497,26 +448,9 @@ struct thread_struct {
|
|
.io_bitmap = { [ 0 ... IO_BITMAP_LONGS] = ~0 }, \
|
|
}
|
|
|
|
-static inline void __load_esp0(struct tss_struct *tss, struct thread_struct *thread)
|
|
-{
|
|
- tss->esp0 = thread->esp0;
|
|
- /* This can only happen when SEP is enabled, no need to test "SEP"arately */
|
|
- if (unlikely(tss->ss1 != thread->sysenter_cs)) {
|
|
- tss->ss1 = thread->sysenter_cs;
|
|
- wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
|
|
- }
|
|
-}
|
|
-#define load_esp0(tss, thread) \
|
|
- __load_esp0(tss, thread)
|
|
-#else
|
|
-#define load_esp0(tss, thread) do { \
|
|
- if (HYPERVISOR_stack_switch(__KERNEL_DS, (thread)->esp0)) \
|
|
- BUG(); \
|
|
-} while (0)
|
|
-#endif
|
|
-
|
|
#define start_thread(regs, new_eip, new_esp) do { \
|
|
- __asm__("movl %0,%%fs ; movl %0,%%gs": :"r" (0)); \
|
|
+ __asm__("movl %0,%%fs": :"r" (0)); \
|
|
+ regs->xgs = 0; \
|
|
set_fs(USER_DS); \
|
|
regs->xds = __USER_DS; \
|
|
regs->xes = __USER_DS; \
|
|
@@ -526,26 +460,6 @@ static inline void __load_esp0(struct ts
|
|
regs->esp = new_esp; \
|
|
} while (0)
|
|
|
|
-/*
|
|
- * These special macros can be used to get or set a debugging register
|
|
- */
|
|
-#define get_debugreg(var, register) \
|
|
- (var) = HYPERVISOR_get_debugreg((register))
|
|
-#define set_debugreg(value, register) \
|
|
- WARN_ON(HYPERVISOR_set_debugreg((register), (value)))
|
|
-
|
|
-/*
|
|
- * Set IOPL bits in EFLAGS from given mask
|
|
- */
|
|
-static inline void set_iopl_mask(unsigned mask)
|
|
-{
|
|
- struct physdev_set_iopl set_iopl;
|
|
-
|
|
- /* Force the change at ring 0. */
|
|
- set_iopl.iopl = (mask == 0) ? 1 : (mask >> 12) & 3;
|
|
- WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl));
|
|
-}
|
|
-
|
|
/* Forward declaration, a strange C thing */
|
|
struct task_struct;
|
|
struct mm_struct;
|
|
@@ -637,6 +551,105 @@ static inline void rep_nop(void)
|
|
|
|
#define cpu_relax() rep_nop()
|
|
|
|
+#define paravirt_enabled() 0
|
|
+#define __cpuid xen_cpuid
|
|
+
|
|
+#ifndef CONFIG_X86_NO_TSS
|
|
+static inline void __load_esp0(struct tss_struct *tss, struct thread_struct *thread)
|
|
+{
|
|
+ tss->esp0 = thread->esp0;
|
|
+ /* This can only happen when SEP is enabled, no need to test "SEP"arately */
|
|
+ if (unlikely(tss->ss1 != thread->sysenter_cs)) {
|
|
+ tss->ss1 = thread->sysenter_cs;
|
|
+ wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
|
|
+ }
|
|
+}
|
|
+#define load_esp0(tss, thread) \
|
|
+ __load_esp0(tss, thread)
|
|
+#else
|
|
+#define load_esp0(tss, thread) do { \
|
|
+ if (HYPERVISOR_stack_switch(__KERNEL_DS, (thread)->esp0)) \
|
|
+ BUG(); \
|
|
+} while (0)
|
|
+#endif
|
|
+
|
|
+
|
|
+/*
|
|
+ * These special macros can be used to get or set a debugging register
|
|
+ */
|
|
+#define get_debugreg(var, register) \
|
|
+ (var) = HYPERVISOR_get_debugreg(register)
|
|
+#define set_debugreg(value, register) \
|
|
+ WARN_ON(HYPERVISOR_set_debugreg(register, value))
|
|
+
|
|
+#define set_iopl_mask xen_set_iopl_mask
|
|
+
|
|
+/*
|
|
+ * Set IOPL bits in EFLAGS from given mask
|
|
+ */
|
|
+static inline void xen_set_iopl_mask(unsigned mask)
|
|
+{
|
|
+ struct physdev_set_iopl set_iopl;
|
|
+
|
|
+ /* Force the change at ring 0. */
|
|
+ set_iopl.iopl = (mask == 0) ? 1 : (mask >> 12) & 3;
|
|
+ WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl));
|
|
+}
|
|
+
|
|
+
|
|
+/*
|
|
+ * Generic CPUID function
|
|
+ * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
|
|
+ * resulting in stale register contents being returned.
|
|
+ */
|
|
+static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx)
|
|
+{
|
|
+ *eax = op;
|
|
+ *ecx = 0;
|
|
+ __cpuid(eax, ebx, ecx, edx);
|
|
+}
|
|
+
|
|
+/* Some CPUID calls want 'count' to be placed in ecx */
|
|
+static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx,
|
|
+ int *edx)
|
|
+{
|
|
+ *eax = op;
|
|
+ *ecx = count;
|
|
+ __cpuid(eax, ebx, ecx, edx);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * CPUID functions returning a single datum
|
|
+ */
|
|
+static inline unsigned int cpuid_eax(unsigned int op)
|
|
+{
|
|
+ unsigned int eax, ebx, ecx, edx;
|
|
+
|
|
+ cpuid(op, &eax, &ebx, &ecx, &edx);
|
|
+ return eax;
|
|
+}
|
|
+static inline unsigned int cpuid_ebx(unsigned int op)
|
|
+{
|
|
+ unsigned int eax, ebx, ecx, edx;
|
|
+
|
|
+ cpuid(op, &eax, &ebx, &ecx, &edx);
|
|
+ return ebx;
|
|
+}
|
|
+static inline unsigned int cpuid_ecx(unsigned int op)
|
|
+{
|
|
+ unsigned int eax, ebx, ecx, edx;
|
|
+
|
|
+ cpuid(op, &eax, &ebx, &ecx, &edx);
|
|
+ return ecx;
|
|
+}
|
|
+static inline unsigned int cpuid_edx(unsigned int op)
|
|
+{
|
|
+ unsigned int eax, ebx, ecx, edx;
|
|
+
|
|
+ cpuid(op, &eax, &ebx, &ecx, &edx);
|
|
+ return edx;
|
|
+}
|
|
+
|
|
/* generic versions from gas */
|
|
#define GENERIC_NOP1 ".byte 0x90\n"
|
|
#define GENERIC_NOP2 ".byte 0x89,0xf6\n"
|
|
@@ -736,4 +749,8 @@ extern unsigned long boot_option_idle_ov
|
|
extern void enable_sep_cpu(void);
|
|
extern int sysenter_setup(void);
|
|
|
|
+extern int init_gdt(int cpu, struct task_struct *idle);
|
|
+extern void cpu_set_gdt(int);
|
|
+extern void secondary_cpu_init(void);
|
|
+
|
|
#endif /* __ASM_I386_PROCESSOR_H */
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/smp_32.h 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/smp_32.h 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -8,6 +8,7 @@
|
|
#include <linux/kernel.h>
|
|
#include <linux/threads.h>
|
|
#include <linux/cpumask.h>
|
|
+#include <asm/pda.h>
|
|
#endif
|
|
|
|
#ifdef CONFIG_X86_LOCAL_APIC
|
|
@@ -56,7 +57,7 @@ extern void cpu_uninit(void);
|
|
* from the initial startup. We map APIC_BASE very early in page_setup(),
|
|
* so this is correct in the x86 case.
|
|
*/
|
|
-#define raw_smp_processor_id() (current_thread_info()->cpu)
|
|
+#define raw_smp_processor_id() (read_pda(cpu_number))
|
|
|
|
extern cpumask_t cpu_possible_map;
|
|
#define cpu_callin_map cpu_possible_map
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/system_32.h 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/system_32.h 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -139,17 +139,17 @@ __asm__ __volatile__ ("movw %%dx,%1\n\t"
|
|
#define write_cr4(x) \
|
|
__asm__ __volatile__("movl %0,%%cr4": :"r" (x))
|
|
|
|
-/*
|
|
- * Clear and set 'TS' bit respectively
|
|
- */
|
|
+#define wbinvd() \
|
|
+ __asm__ __volatile__ ("wbinvd": : :"memory")
|
|
+
|
|
+/* Clear the 'TS' bit */
|
|
#define clts() (HYPERVISOR_fpu_taskswitch(0))
|
|
+
|
|
+/* Set the 'TS' bit */
|
|
#define stts() (HYPERVISOR_fpu_taskswitch(1))
|
|
|
|
#endif /* __KERNEL__ */
|
|
|
|
-#define wbinvd() \
|
|
- __asm__ __volatile__ ("wbinvd": : :"memory")
|
|
-
|
|
static inline unsigned long get_limit(unsigned long segment)
|
|
{
|
|
unsigned long __limit;
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/desc_64.h 2008-01-28 12:24:19.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/desc_64.h 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -9,62 +9,11 @@
|
|
|
|
#include <linux/string.h>
|
|
#include <linux/smp.h>
|
|
+#include <asm/desc_defs.h>
|
|
|
|
#include <asm/segment.h>
|
|
#include <asm/mmu.h>
|
|
|
|
-// 8 byte segment descriptor
|
|
-struct desc_struct {
|
|
- u16 limit0;
|
|
- u16 base0;
|
|
- unsigned base1 : 8, type : 4, s : 1, dpl : 2, p : 1;
|
|
- unsigned limit : 4, avl : 1, l : 1, d : 1, g : 1, base2 : 8;
|
|
-} __attribute__((packed));
|
|
-
|
|
-struct n_desc_struct {
|
|
- unsigned int a,b;
|
|
-};
|
|
-
|
|
-enum {
|
|
- GATE_INTERRUPT = 0xE,
|
|
- GATE_TRAP = 0xF,
|
|
- GATE_CALL = 0xC,
|
|
-};
|
|
-
|
|
-// 16byte gate
|
|
-struct gate_struct {
|
|
- u16 offset_low;
|
|
- u16 segment;
|
|
- unsigned ist : 3, zero0 : 5, type : 5, dpl : 2, p : 1;
|
|
- u16 offset_middle;
|
|
- u32 offset_high;
|
|
- u32 zero1;
|
|
-} __attribute__((packed));
|
|
-
|
|
-#define PTR_LOW(x) ((unsigned long)(x) & 0xFFFF)
|
|
-#define PTR_MIDDLE(x) (((unsigned long)(x) >> 16) & 0xFFFF)
|
|
-#define PTR_HIGH(x) ((unsigned long)(x) >> 32)
|
|
-
|
|
-enum {
|
|
- DESC_TSS = 0x9,
|
|
- DESC_LDT = 0x2,
|
|
-};
|
|
-
|
|
-// LDT or TSS descriptor in the GDT. 16 bytes.
|
|
-struct ldttss_desc {
|
|
- u16 limit0;
|
|
- u16 base0;
|
|
- unsigned base1 : 8, type : 5, dpl : 2, p : 1;
|
|
- unsigned limit1 : 4, zero0 : 3, g : 1, base2 : 8;
|
|
- u32 base3;
|
|
- u32 zero1;
|
|
-} __attribute__((packed));
|
|
-
|
|
-struct desc_ptr {
|
|
- unsigned short size;
|
|
- unsigned long address;
|
|
-} __attribute__((packed)) ;
|
|
-
|
|
extern struct desc_ptr idt_descr, cpu_gdt_descr[NR_CPUS];
|
|
|
|
extern struct desc_struct cpu_gdt_table[GDT_ENTRIES];
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/pgtable_64.h 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/pgtable_64.h 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -237,19 +237,18 @@ extern unsigned int __kernel_page_user;
|
|
|
|
static inline unsigned long pgd_bad(pgd_t pgd)
|
|
{
|
|
- unsigned long val = __pgd_val(pgd);
|
|
- val &= ~PTE_MASK;
|
|
- val &= ~(_PAGE_USER | _PAGE_DIRTY);
|
|
- return val & ~(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED);
|
|
+ return __pgd_val(pgd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER);
|
|
}
|
|
|
|
-static inline unsigned long pud_bad(pud_t pud)
|
|
-{
|
|
- unsigned long val = __pud_val(pud);
|
|
- val &= ~PTE_MASK;
|
|
- val &= ~(_PAGE_USER | _PAGE_DIRTY);
|
|
- return val & ~(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED);
|
|
-}
|
|
+static inline unsigned long pud_bad(pud_t pud)
|
|
+{
|
|
+ return __pud_val(pud) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER);
|
|
+}
|
|
+
|
|
+static inline unsigned long pmd_bad(pmd_t pmd)
|
|
+{
|
|
+ return __pmd_val(pmd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER);
|
|
+}
|
|
|
|
#define set_pte_at(_mm,addr,ptep,pteval) do { \
|
|
if (((_mm) != current->mm && (_mm) != &init_mm) || \
|
|
@@ -404,8 +403,6 @@ static inline int pmd_large(pmd_t pte) {
|
|
#define pmd_present(x) (__pmd_val(x) & _PAGE_PRESENT)
|
|
#endif
|
|
#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
|
|
-#define pmd_bad(x) ((__pmd_val(x) & ~(PTE_MASK | _PAGE_USER | _PAGE_PRESENT)) \
|
|
- != (_KERNPG_TABLE & ~(_PAGE_USER | _PAGE_PRESENT)))
|
|
#define pfn_pmd(nr,prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val(prot)))
|
|
#define pmd_pfn(x) ((pmd_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT)
|
|
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/processor_64.h 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/processor_64.h 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -484,6 +484,14 @@ static inline void __mwait(unsigned long
|
|
: :"a" (eax), "c" (ecx));
|
|
}
|
|
|
|
+static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
|
|
+{
|
|
+ /* "mwait %eax,%ecx;" */
|
|
+ asm volatile(
|
|
+ "sti; .byte 0x0f,0x01,0xc9;"
|
|
+ : :"a" (eax), "c" (ecx));
|
|
+}
|
|
+
|
|
extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
|
|
|
|
#define stack_current() \
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/smp_64.h 2010-03-24 15:06:12.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/smp_64.h 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -88,11 +88,6 @@ extern u8 x86_cpu_to_log_apicid[NR_CPUS]
|
|
extern u8 bios_cpu_apicid[];
|
|
|
|
#ifdef CONFIG_X86_LOCAL_APIC
|
|
-static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
|
|
-{
|
|
- return cpus_addr(cpumask)[0];
|
|
-}
|
|
-
|
|
static inline int cpu_present_to_apicid(int mps_cpu)
|
|
{
|
|
if (mps_cpu < NR_CPUS)
|
|
@@ -127,13 +122,6 @@ static __inline int logical_smp_processo
|
|
#define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu]
|
|
#else
|
|
#define cpu_physical_id(cpu) boot_cpu_id
|
|
-static inline int smp_call_function_single(int cpuid, void (*func) (void *info),
|
|
- void *info, int retry, int wait)
|
|
-{
|
|
- /* Disable interrupts here? */
|
|
- func(info);
|
|
- return 0;
|
|
-}
|
|
#endif /* !CONFIG_SMP */
|
|
#endif
|
|
|
|
--- head-2010-05-25.orig/kernel/kexec.c 2010-05-25 09:20:04.000000000 +0200
|
|
+++ head-2010-05-25/kernel/kexec.c 2010-03-24 15:08:58.000000000 +0100
|
|
@@ -375,7 +375,7 @@ static struct page *kimage_alloc_pages(g
|
|
if (limit == ~0UL)
|
|
address_bits = BITS_PER_LONG;
|
|
else
|
|
- address_bits = long_log2(limit);
|
|
+ address_bits = ilog2(limit);
|
|
|
|
if (xen_limit_pages_to_max_mfn(pages, order, address_bits) < 0) {
|
|
__free_pages(pages, order);
|
|
--- head-2010-05-25.orig/net/core/dev.c 2010-05-25 09:21:41.000000000 +0200
|
|
+++ head-2010-05-25/net/core/dev.c 2010-05-25 09:21:50.000000000 +0200
|
|
@@ -2022,10 +2022,10 @@ inline int skb_checksum_setup(struct sk_
|
|
goto out;
|
|
switch (skb->nh.iph->protocol) {
|
|
case IPPROTO_TCP:
|
|
- skb->csum = offsetof(struct tcphdr, check);
|
|
+ skb->csum_offset = offsetof(struct tcphdr, check);
|
|
break;
|
|
case IPPROTO_UDP:
|
|
- skb->csum = offsetof(struct udphdr, check);
|
|
+ skb->csum_offset = offsetof(struct udphdr, check);
|
|
break;
|
|
default:
|
|
if (net_ratelimit())
|
|
@@ -2034,7 +2034,7 @@ inline int skb_checksum_setup(struct sk_
|
|
" %d packet", skb->nh.iph->protocol);
|
|
goto out;
|
|
}
|
|
- if ((skb->h.raw + skb->csum + 2) > skb->tail)
|
|
+ if ((skb->h.raw + skb->csum_offset + 2) > skb->tail)
|
|
goto out;
|
|
skb->ip_summed = CHECKSUM_PARTIAL;
|
|
skb->proto_csum_blank = 0;
|