8242 lines
247 KiB
Plaintext
8242 lines
247 KiB
Plaintext
From: www.kernel.org
|
|
Subject: Update to 2.6.22
|
|
Patch-mainline: 2.6.22
|
|
|
|
Automatically created from "patches.kernel.org/patch-2.6.22" by xen-port-patches.py
|
|
|
|
Acked-by: jbeulich@novell.com
|
|
|
|
--- head-2011-03-11.orig/arch/x86/Kconfig 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/Kconfig 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -1653,7 +1653,7 @@ config PHYSICAL_START
|
|
|
|
config RELOCATABLE
|
|
bool "Build a relocatable kernel"
|
|
- depends on !X86_XEN
|
|
+ depends on !XEN
|
|
default y
|
|
---help---
|
|
This builds a kernel image that retains relocation information
|
|
@@ -1712,7 +1712,6 @@ config COMPAT_VDSO
|
|
def_bool y
|
|
prompt "Compat VDSO support"
|
|
depends on X86_32 || IA32_EMULATION
|
|
- depends on !X86_XEN
|
|
---help---
|
|
Map the 32-bit VDSO to the predictable old-style address too.
|
|
|
|
@@ -1936,6 +1935,7 @@ config PCI
|
|
bool "PCI support"
|
|
default y
|
|
select ARCH_SUPPORTS_MSI if (X86_LOCAL_APIC && X86_IO_APIC)
|
|
+ select ARCH_SUPPORTS_MSI if (XEN_UNPRIVILEGED_GUEST && XEN_PCIDEV_FRONTEND)
|
|
---help---
|
|
Find out whether you have a PCI motherboard. PCI is the name of a
|
|
bus system, i.e. the way the CPU talks to the other stuff inside
|
|
--- head-2011-03-11.orig/arch/x86/kernel/Makefile 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/Makefile 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -132,4 +132,4 @@ endif
|
|
disabled-obj-$(CONFIG_XEN) := early-quirks.o hpet.o i8253.o i8259_$(BITS).o reboot.o \
|
|
smpboot_$(BITS).o tsc_$(BITS).o tsc_sync.o
|
|
disabled-obj-$(CONFIG_XEN_UNPRIVILEGED_GUEST) += mpparse_64.o
|
|
-%/head_$(BITS).o %/head_$(BITS).s: $(if $(CONFIG_XEN),EXTRA_AFLAGS,dummy) :=
|
|
+%/head_64.o %/head_64.s: $(if $(CONFIG_XEN),EXTRA_AFLAGS,dummy) :=
|
|
--- head-2011-03-11.orig/arch/x86/kernel/apic/apic-xen.c 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/apic/apic-xen.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -19,7 +19,6 @@
|
|
#include <linux/mm.h>
|
|
#include <linux/delay.h>
|
|
#include <linux/bootmem.h>
|
|
-#include <linux/smp_lock.h>
|
|
#include <linux/interrupt.h>
|
|
#include <linux/mc146818rtc.h>
|
|
#include <linux/kernel_stat.h>
|
|
--- head-2011-03-11.orig/arch/x86/kernel/asm-offsets_32.c 2011-01-31 17:32:16.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/asm-offsets_32.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -109,11 +109,6 @@ void foo(void)
|
|
|
|
OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
|
|
|
|
-#ifdef CONFIG_XEN
|
|
- BLANK();
|
|
- OFFSET(XEN_START_mfn_list, start_info, mfn_list);
|
|
-#endif
|
|
-
|
|
#ifdef CONFIG_PARAVIRT
|
|
BLANK();
|
|
OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
|
|
--- head-2011-03-11.orig/arch/x86/kernel/cpu/common-xen.c 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/cpu/common-xen.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -22,16 +22,40 @@
|
|
#define phys_pkg_id(a,b) a
|
|
#endif
|
|
#endif
|
|
-#include <asm/pda.h>
|
|
#include <asm/hypervisor.h>
|
|
|
|
#include "cpu.h"
|
|
|
|
-DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr);
|
|
-EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr);
|
|
+DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
|
|
+ [GDT_ENTRY_KERNEL_CS] = { 0x0000ffff, 0x00cf9a00 },
|
|
+ [GDT_ENTRY_KERNEL_DS] = { 0x0000ffff, 0x00cf9200 },
|
|
+ [GDT_ENTRY_DEFAULT_USER_CS] = { 0x0000ffff, 0x00cffa00 },
|
|
+ [GDT_ENTRY_DEFAULT_USER_DS] = { 0x0000ffff, 0x00cff200 },
|
|
+#ifndef CONFIG_XEN
|
|
+ /*
|
|
+ * Segments used for calling PnP BIOS have byte granularity.
|
|
+ * They code segments and data segments have fixed 64k limits,
|
|
+ * the transfer segment sizes are set at run time.
|
|
+ */
|
|
+ [GDT_ENTRY_PNPBIOS_CS32] = { 0x0000ffff, 0x00409a00 },/* 32-bit code */
|
|
+ [GDT_ENTRY_PNPBIOS_CS16] = { 0x0000ffff, 0x00009a00 },/* 16-bit code */
|
|
+ [GDT_ENTRY_PNPBIOS_DS] = { 0x0000ffff, 0x00009200 }, /* 16-bit data */
|
|
+ [GDT_ENTRY_PNPBIOS_TS1] = { 0x00000000, 0x00009200 },/* 16-bit data */
|
|
+ [GDT_ENTRY_PNPBIOS_TS2] = { 0x00000000, 0x00009200 },/* 16-bit data */
|
|
+ /*
|
|
+ * The APM segments have byte granularity and their bases
|
|
+ * are set at run time. All have 64k limits.
|
|
+ */
|
|
+ [GDT_ENTRY_APMBIOS_BASE] = { 0x0000ffff, 0x00409a00 },/* 32-bit code */
|
|
+ /* 16-bit code */
|
|
+ [GDT_ENTRY_APMBIOS_BASE+1] = { 0x0000ffff, 0x00009a00 },
|
|
+ [GDT_ENTRY_APMBIOS_BASE+2] = { 0x0000ffff, 0x00409200 }, /* data */
|
|
|
|
-struct i386_pda *_cpu_pda[NR_CPUS] __read_mostly;
|
|
-EXPORT_SYMBOL(_cpu_pda);
|
|
+ [GDT_ENTRY_ESPFIX_SS] = { 0x00000000, 0x00c09200 },
|
|
+#endif
|
|
+ [GDT_ENTRY_PERCPU] = { 0x00000000, 0x00000000 },
|
|
+} };
|
|
+EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
|
|
|
|
static int cachesize_override __cpuinitdata = -1;
|
|
static int disable_x86_fxsr __cpuinitdata;
|
|
@@ -375,7 +399,7 @@ __setup("serialnumber", x86_serial_nr_se
|
|
/*
|
|
* This does the hard work of actually picking apart the CPU stuff...
|
|
*/
|
|
-void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
|
|
+static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
|
|
{
|
|
int i;
|
|
|
|
@@ -486,15 +510,22 @@ void __cpuinit identify_cpu(struct cpuin
|
|
|
|
/* Init Machine Check Exception if available. */
|
|
mcheck_init(c);
|
|
+}
|
|
|
|
- if (c == &boot_cpu_data)
|
|
- sysenter_setup();
|
|
+void __init identify_boot_cpu(void)
|
|
+{
|
|
+ identify_cpu(&boot_cpu_data);
|
|
+ sysenter_setup();
|
|
enable_sep_cpu();
|
|
+ mtrr_bp_init();
|
|
+}
|
|
|
|
- if (c == &boot_cpu_data)
|
|
- mtrr_bp_init();
|
|
- else
|
|
- mtrr_ap_init();
|
|
+void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
|
|
+{
|
|
+ BUG_ON(c == &boot_cpu_data);
|
|
+ identify_cpu(c);
|
|
+ enable_sep_cpu();
|
|
+ mtrr_ap_init();
|
|
}
|
|
|
|
#ifdef CONFIG_X86_HT
|
|
@@ -608,136 +639,47 @@ void __init early_cpu_init(void)
|
|
#endif
|
|
}
|
|
|
|
-/* Make sure %gs is initialized properly in idle threads */
|
|
+/* Make sure %fs is initialized properly in idle threads */
|
|
struct pt_regs * __devinit idle_regs(struct pt_regs *regs)
|
|
{
|
|
memset(regs, 0, sizeof(struct pt_regs));
|
|
- regs->xfs = __KERNEL_PDA;
|
|
+ regs->xfs = __KERNEL_PERCPU;
|
|
return regs;
|
|
}
|
|
|
|
-static __cpuinit int alloc_gdt(int cpu)
|
|
-{
|
|
- struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
|
|
- struct desc_struct *gdt;
|
|
- struct i386_pda *pda;
|
|
-
|
|
- gdt = (struct desc_struct *)cpu_gdt_descr->address;
|
|
- pda = cpu_pda(cpu);
|
|
-
|
|
- /*
|
|
- * This is a horrible hack to allocate the GDT. The problem
|
|
- * is that cpu_init() is called really early for the boot CPU
|
|
- * (and hence needs bootmem) but much later for the secondary
|
|
- * CPUs, when bootmem will have gone away
|
|
- */
|
|
- if (NODE_DATA(0)->bdata->node_bootmem_map) {
|
|
- BUG_ON(gdt != NULL || pda != NULL);
|
|
-
|
|
- gdt = alloc_bootmem_pages(PAGE_SIZE);
|
|
- pda = alloc_bootmem(sizeof(*pda));
|
|
- /* alloc_bootmem(_pages) panics on failure, so no check */
|
|
-
|
|
- memset(gdt, 0, PAGE_SIZE);
|
|
- memset(pda, 0, sizeof(*pda));
|
|
- } else {
|
|
- /* GDT and PDA might already have been allocated if
|
|
- this is a CPU hotplug re-insertion. */
|
|
- if (gdt == NULL)
|
|
- gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL);
|
|
-
|
|
- if (pda == NULL)
|
|
- pda = kmalloc_node(sizeof(*pda), GFP_KERNEL, cpu_to_node(cpu));
|
|
-
|
|
- if (unlikely(!gdt || !pda)) {
|
|
- free_pages((unsigned long)gdt, 0);
|
|
- kfree(pda);
|
|
- return 0;
|
|
- }
|
|
- }
|
|
-
|
|
- cpu_gdt_descr->address = (unsigned long)gdt;
|
|
- cpu_pda(cpu) = pda;
|
|
-
|
|
- return 1;
|
|
-}
|
|
-
|
|
-/* Initial PDA used by boot CPU */
|
|
-struct i386_pda boot_pda = {
|
|
- ._pda = &boot_pda,
|
|
- .cpu_number = 0,
|
|
- .pcurrent = &init_task,
|
|
-};
|
|
-
|
|
-static inline void set_kernel_fs(void)
|
|
-{
|
|
- /* Set %fs for this CPU's PDA. Memory clobber is to create a
|
|
- barrier with respect to any PDA operations, so the compiler
|
|
- doesn't move any before here. */
|
|
- asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_PDA) : "memory");
|
|
-}
|
|
-
|
|
-/* Initialize the CPU's GDT and PDA. The boot CPU does this for
|
|
- itself, but secondaries find this done for them. */
|
|
-__cpuinit int init_gdt(int cpu, struct task_struct *idle)
|
|
-{
|
|
- struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
|
|
- struct desc_struct *gdt;
|
|
- struct i386_pda *pda;
|
|
-
|
|
- /* For non-boot CPUs, the GDT and PDA should already have been
|
|
- allocated. */
|
|
- if (!alloc_gdt(cpu)) {
|
|
- printk(KERN_CRIT "CPU%d failed to allocate GDT or PDA\n", cpu);
|
|
- return 0;
|
|
- }
|
|
-
|
|
- gdt = (struct desc_struct *)cpu_gdt_descr->address;
|
|
- pda = cpu_pda(cpu);
|
|
-
|
|
- BUG_ON(gdt == NULL || pda == NULL);
|
|
-
|
|
- /*
|
|
- * Initialize the per-CPU GDT with the boot GDT,
|
|
- * and set up the GDT descriptor:
|
|
- */
|
|
- memcpy(gdt, cpu_gdt_table, GDT_SIZE);
|
|
- cpu_gdt_descr->size = GDT_SIZE - 1;
|
|
-
|
|
- pack_descriptor((u32 *)&gdt[GDT_ENTRY_PDA].a,
|
|
- (u32 *)&gdt[GDT_ENTRY_PDA].b,
|
|
- (unsigned long)pda, sizeof(*pda) - 1,
|
|
- 0x80 | DESCTYPE_S | 0x2, 0); /* present read-write data segment */
|
|
-
|
|
- memset(pda, 0, sizeof(*pda));
|
|
- pda->_pda = pda;
|
|
- pda->cpu_number = cpu;
|
|
- pda->pcurrent = idle;
|
|
-
|
|
- return 1;
|
|
-}
|
|
-
|
|
-void __cpuinit cpu_set_gdt(int cpu)
|
|
+/* Current gdt points %fs at the "master" per-cpu area: after this,
|
|
+ * it's on the real one. */
|
|
+void switch_to_new_gdt(void)
|
|
{
|
|
- struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
|
|
+ struct Xgt_desc_struct gdt_descr;
|
|
unsigned long va, frames[16];
|
|
int f;
|
|
|
|
- for (va = cpu_gdt_descr->address, f = 0;
|
|
- va < cpu_gdt_descr->address + cpu_gdt_descr->size;
|
|
+ gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
|
|
+ gdt_descr.size = GDT_SIZE - 1;
|
|
+
|
|
+ for (va = gdt_descr.address, f = 0;
|
|
+ va < gdt_descr.address + gdt_descr.size;
|
|
va += PAGE_SIZE, f++) {
|
|
frames[f] = virt_to_mfn(va);
|
|
make_lowmem_page_readonly(
|
|
(void *)va, XENFEAT_writable_descriptor_tables);
|
|
}
|
|
- BUG_ON(HYPERVISOR_set_gdt(frames, (cpu_gdt_descr->size + 1) / 8));
|
|
-
|
|
- set_kernel_fs();
|
|
+ if (HYPERVISOR_set_gdt(frames, (gdt_descr.size + 1) / 8))
|
|
+ BUG();
|
|
+ asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory");
|
|
}
|
|
|
|
-/* Common CPU init for both boot and secondary CPUs */
|
|
-static void __cpuinit _cpu_init(int cpu, struct task_struct *curr)
|
|
+/*
|
|
+ * cpu_init() initializes state that is per-CPU. Some data is already
|
|
+ * initialized (naturally) in the bootstrap process, such as the GDT
|
|
+ * and IDT. We reload them nevertheless, this function acts as a
|
|
+ * 'CPU state barrier', nothing should get across.
|
|
+ */
|
|
+void __cpuinit cpu_init(void)
|
|
{
|
|
+ int cpu = smp_processor_id();
|
|
+ struct task_struct *curr = current;
|
|
#ifndef CONFIG_X86_NO_TSS
|
|
struct tss_struct * t = &per_cpu(init_tss, cpu);
|
|
#endif
|
|
@@ -759,6 +701,8 @@ static void __cpuinit _cpu_init(int cpu,
|
|
set_in_cr4(X86_CR4_TSD);
|
|
}
|
|
|
|
+ switch_to_new_gdt();
|
|
+
|
|
/*
|
|
* Set up and load the per-CPU TSS and LDT
|
|
*/
|
|
@@ -796,38 +740,6 @@ static void __cpuinit _cpu_init(int cpu,
|
|
mxcsr_feature_mask_init();
|
|
}
|
|
|
|
-/* Entrypoint to initialize secondary CPU */
|
|
-void __cpuinit secondary_cpu_init(void)
|
|
-{
|
|
- int cpu = smp_processor_id();
|
|
- struct task_struct *curr = current;
|
|
-
|
|
- _cpu_init(cpu, curr);
|
|
-}
|
|
-
|
|
-/*
|
|
- * cpu_init() initializes state that is per-CPU. Some data is already
|
|
- * initialized (naturally) in the bootstrap process, such as the GDT
|
|
- * and IDT. We reload them nevertheless, this function acts as a
|
|
- * 'CPU state barrier', nothing should get across.
|
|
- */
|
|
-void __cpuinit cpu_init(void)
|
|
-{
|
|
- int cpu = smp_processor_id();
|
|
- struct task_struct *curr = current;
|
|
-
|
|
- /* Set up the real GDT and PDA, so we can transition from the
|
|
- boot versions. */
|
|
- if (!init_gdt(cpu, curr)) {
|
|
- /* failed to allocate something; not much we can do... */
|
|
- for (;;)
|
|
- local_irq_enable();
|
|
- }
|
|
-
|
|
- cpu_set_gdt(cpu);
|
|
- _cpu_init(cpu, curr);
|
|
-}
|
|
-
|
|
#ifdef CONFIG_HOTPLUG_CPU
|
|
void __cpuinit cpu_uninit(void)
|
|
{
|
|
--- head-2011-03-11.orig/arch/x86/kernel/cpu/mtrr/main-xen.c 2011-01-31 17:32:16.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/cpu/mtrr/main-xen.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -167,7 +167,7 @@ mtrr_del(int reg, unsigned long base, un
|
|
EXPORT_SYMBOL(mtrr_add);
|
|
EXPORT_SYMBOL(mtrr_del);
|
|
|
|
-void __init mtrr_bp_init(void)
|
|
+__init void mtrr_bp_init(void)
|
|
{
|
|
}
|
|
|
|
--- head-2011-03-11.orig/arch/x86/kernel/e820_32-xen.c 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/e820_32-xen.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -162,26 +162,27 @@ static struct resource standard_io_resou
|
|
|
|
static int __init romsignature(const unsigned char *rom)
|
|
{
|
|
+ const unsigned short * const ptr = (const unsigned short *)rom;
|
|
unsigned short sig;
|
|
|
|
- return probe_kernel_address((const unsigned short *)rom, sig) == 0 &&
|
|
- sig == ROMSIGNATURE;
|
|
+ return probe_kernel_address(ptr, sig) == 0 && sig == ROMSIGNATURE;
|
|
}
|
|
|
|
-static int __init romchecksum(unsigned char *rom, unsigned long length)
|
|
+static int __init romchecksum(const unsigned char *rom, unsigned long length)
|
|
{
|
|
- unsigned char sum;
|
|
+ unsigned char sum, c;
|
|
|
|
- for (sum = 0; length; length--)
|
|
- sum += *rom++;
|
|
- return sum == 0;
|
|
+ for (sum = 0; length && probe_kernel_address(rom++, c) == 0; length--)
|
|
+ sum += c;
|
|
+ return !length && !sum;
|
|
}
|
|
|
|
static void __init probe_roms(void)
|
|
{
|
|
+ const unsigned char *rom;
|
|
unsigned long start, length, upper;
|
|
- unsigned char *rom;
|
|
- int i;
|
|
+ unsigned char c;
|
|
+ int i;
|
|
|
|
#ifdef CONFIG_XEN
|
|
/* Nothing to do if not running in dom0. */
|
|
@@ -198,8 +199,11 @@ static void __init probe_roms(void)
|
|
|
|
video_rom_resource.start = start;
|
|
|
|
+ if (probe_kernel_address(rom + 2, c) != 0)
|
|
+ continue;
|
|
+
|
|
/* 0 < length <= 0x7f * 512, historically */
|
|
- length = rom[2] * 512;
|
|
+ length = c * 512;
|
|
|
|
/* if checksum okay, trust length byte */
|
|
if (length && romchecksum(rom, length))
|
|
@@ -233,8 +237,11 @@ static void __init probe_roms(void)
|
|
if (!romsignature(rom))
|
|
continue;
|
|
|
|
+ if (probe_kernel_address(rom + 2, c) != 0)
|
|
+ continue;
|
|
+
|
|
/* 0 < length <= 0x7f * 512, historically */
|
|
- length = rom[2] * 512;
|
|
+ length = c * 512;
|
|
|
|
/* but accept any length that fits if checksum okay */
|
|
if (!length || start + length > upper || !romchecksum(rom, length))
|
|
@@ -249,7 +256,7 @@ static void __init probe_roms(void)
|
|
}
|
|
|
|
#ifdef CONFIG_XEN
|
|
-static struct e820map machine_e820 __initdata;
|
|
+static struct e820map machine_e820;
|
|
#define e820 machine_e820
|
|
#endif
|
|
|
|
@@ -409,10 +416,8 @@ int __init sanitize_e820_map(struct e820
|
|
____________________33__
|
|
______________________4_
|
|
*/
|
|
- printk("sanitize start\n");
|
|
/* if there's only one memory region, don't bother */
|
|
if (*pnr_map < 2) {
|
|
- printk("sanitize bail 0\n");
|
|
return -1;
|
|
}
|
|
|
|
@@ -421,7 +426,6 @@ int __init sanitize_e820_map(struct e820
|
|
/* bail out if we find any unreasonable addresses in bios map */
|
|
for (i=0; i<old_nr; i++)
|
|
if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) {
|
|
- printk("sanitize bail 1\n");
|
|
return -1;
|
|
}
|
|
|
|
@@ -517,7 +521,6 @@ int __init sanitize_e820_map(struct e820
|
|
memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry));
|
|
*pnr_map = new_nr;
|
|
|
|
- printk("sanitize end\n");
|
|
return 0;
|
|
}
|
|
|
|
@@ -552,7 +555,6 @@ int __init copy_e820_map(struct e820entr
|
|
unsigned long long size = biosmap->size;
|
|
unsigned long long end = start + size;
|
|
unsigned long type = biosmap->type;
|
|
- printk("copy_e820_map() start: %016Lx size: %016Lx end: %016Lx type: %ld\n", start, size, end, type);
|
|
|
|
/* Overflow in 64 bits? Ignore the memory map. */
|
|
if (start > end)
|
|
@@ -564,17 +566,11 @@ int __init copy_e820_map(struct e820entr
|
|
* Not right. Fix it up.
|
|
*/
|
|
if (type == E820_RAM) {
|
|
- printk("copy_e820_map() type is E820_RAM\n");
|
|
if (start < 0x100000ULL && end > 0xA0000ULL) {
|
|
- printk("copy_e820_map() lies in range...\n");
|
|
- if (start < 0xA0000ULL) {
|
|
- printk("copy_e820_map() start < 0xA0000ULL\n");
|
|
+ if (start < 0xA0000ULL)
|
|
add_memory_region(start, 0xA0000ULL-start, type);
|
|
- }
|
|
- if (end <= 0x100000ULL) {
|
|
- printk("copy_e820_map() end <= 0x100000ULL\n");
|
|
+ if (end <= 0x100000ULL)
|
|
continue;
|
|
- }
|
|
start = 0x100000ULL;
|
|
size = end - start;
|
|
}
|
|
--- head-2011-03-11.orig/arch/x86/kernel/entry_32-xen.S 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/entry_32-xen.S 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -15,7 +15,7 @@
|
|
* I changed all the .align's to 4 (16 byte alignment), as that's faster
|
|
* on a 486.
|
|
*
|
|
- * Stack layout in 'ret_from_system_call':
|
|
+ * Stack layout in 'syscall_exit':
|
|
* ptrace needs to have all regs on the stack.
|
|
* if the order here is changed, it needs to be
|
|
* updated in fork.c:copy_process, signal.c:do_signal,
|
|
@@ -135,7 +135,7 @@ NMI_MASK = 0x80000000
|
|
movl $(__USER_DS), %edx; \
|
|
movl %edx, %ds; \
|
|
movl %edx, %es; \
|
|
- movl $(__KERNEL_PDA), %edx; \
|
|
+ movl $(__KERNEL_PERCPU), %edx; \
|
|
movl %edx, %fs
|
|
|
|
#define RESTORE_INT_REGS \
|
|
@@ -308,16 +308,12 @@ sysenter_past_esp:
|
|
pushl $(__USER_CS)
|
|
CFI_ADJUST_CFA_OFFSET 4
|
|
/*CFI_REL_OFFSET cs, 0*/
|
|
-#ifndef CONFIG_COMPAT_VDSO
|
|
/*
|
|
* Push current_thread_info()->sysenter_return to the stack.
|
|
* A tiny bit of offset fixup is necessary - 4*4 means the 4 words
|
|
* pushed above; +8 corresponds to copy_thread's esp0 setting.
|
|
*/
|
|
pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
|
|
-#else
|
|
- pushl $SYSENTER_RETURN
|
|
-#endif
|
|
CFI_ADJUST_CFA_OFFSET 4
|
|
CFI_REL_OFFSET eip, 0
|
|
|
|
@@ -345,7 +341,7 @@ sysenter_past_esp:
|
|
jae syscall_badsys
|
|
call *sys_call_table(,%eax,4)
|
|
movl %eax,PT_EAX(%esp)
|
|
- DISABLE_INTERRUPTS(CLBR_ECX|CLBR_EDX)
|
|
+ DISABLE_INTERRUPTS(CLBR_ANY)
|
|
TRACE_IRQS_OFF
|
|
movl TI_flags(%ebp), %ecx
|
|
testw $_TIF_ALLWORK_MASK, %cx
|
|
@@ -400,10 +396,6 @@ ENTRY(system_call)
|
|
CFI_ADJUST_CFA_OFFSET 4
|
|
SAVE_ALL
|
|
GET_THREAD_INFO(%ebp)
|
|
- testl $TF_MASK,PT_EFLAGS(%esp)
|
|
- jz no_singlestep
|
|
- orl $_TIF_SINGLESTEP,TI_flags(%ebp)
|
|
-no_singlestep:
|
|
# system call tracing in operation / emulation
|
|
/* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */
|
|
testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp)
|
|
@@ -418,6 +410,10 @@ syscall_exit:
|
|
# setting need_resched or sigpending
|
|
# between sampling and the iret
|
|
TRACE_IRQS_OFF
|
|
+ testl $TF_MASK,PT_EFLAGS(%esp) # If tracing set singlestep flag on exit
|
|
+ jz no_singlestep
|
|
+ orl $_TIF_SINGLESTEP,TI_flags(%ebp)
|
|
+no_singlestep:
|
|
movl TI_flags(%ebp), %ecx
|
|
testw $_TIF_ALLWORK_MASK, %cx # current->work
|
|
jne syscall_exit_work
|
|
@@ -635,9 +631,7 @@ END(syscall_badsys)
|
|
#ifndef CONFIG_XEN
|
|
#define FIXUP_ESPFIX_STACK \
|
|
/* since we are on a wrong stack, we cant make it a C code :( */ \
|
|
- movl %fs:PDA_cpu, %ebx; \
|
|
- PER_CPU(cpu_gdt_descr, %ebx); \
|
|
- movl GDS_address(%ebx), %ebx; \
|
|
+ PER_CPU(gdt_page, %ebx); \
|
|
GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \
|
|
addl %esp, %eax; \
|
|
pushl $__KERNEL_DS; \
|
|
@@ -710,7 +704,7 @@ ENTRY(name) \
|
|
SAVE_ALL; \
|
|
TRACE_IRQS_OFF \
|
|
movl %esp,%eax; \
|
|
- call smp_/**/name; \
|
|
+ call smp_##name; \
|
|
jmp ret_from_intr; \
|
|
CFI_ENDPROC; \
|
|
ENDPROC(name)
|
|
@@ -718,10 +712,6 @@ ENDPROC(name)
|
|
/* The include is where all of the SMP etc. interrupts come from */
|
|
#include "entry_arch.h"
|
|
|
|
-/* This alternate entry is needed because we hijack the apic LVTT */
|
|
-#if defined(CONFIG_VMI) && defined(CONFIG_X86_LOCAL_APIC)
|
|
-BUILD_INTERRUPT(apic_vmi_timer_interrupt,LOCAL_TIMER_VECTOR)
|
|
-#endif
|
|
#else
|
|
#define UNWIND_ESPFIX_STACK
|
|
#endif
|
|
@@ -764,7 +754,7 @@ error_code:
|
|
pushl %fs
|
|
CFI_ADJUST_CFA_OFFSET 4
|
|
/*CFI_REL_OFFSET fs, 0*/
|
|
- movl $(__KERNEL_PDA), %ecx
|
|
+ movl $(__KERNEL_PERCPU), %ecx
|
|
movl %ecx, %fs
|
|
UNWIND_ESPFIX_STACK
|
|
popl %ecx
|
|
--- head-2011-03-11.orig/arch/x86/kernel/head_32-xen.S 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/head_32-xen.S 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -37,7 +37,8 @@ ENTRY(startup_32)
|
|
/* Set up the stack pointer */
|
|
movl $(init_thread_union+THREAD_SIZE),%esp
|
|
|
|
- call setup_pda
|
|
+ movl %ss,%eax
|
|
+ movl %eax,%fs # gets reset once there's real percpu
|
|
|
|
/* get vendor info */
|
|
xorl %eax,%eax # call CPUID with 0 -> return vendor ID
|
|
@@ -64,55 +65,11 @@ ENTRY(startup_32)
|
|
xorl %eax,%eax # Clear GS
|
|
movl %eax,%gs
|
|
|
|
- movl $(__KERNEL_PDA),%eax
|
|
- mov %eax,%fs
|
|
-
|
|
cld # gcc2 wants the direction flag cleared at all times
|
|
|
|
pushl $0 # fake return address for unwinder
|
|
jmp start_kernel
|
|
|
|
-/*
|
|
- * Point the GDT at this CPU's PDA. This will be
|
|
- * cpu_gdt_table and boot_pda.
|
|
- */
|
|
-ENTRY(setup_pda)
|
|
- /* get the PDA pointer */
|
|
- movl $boot_pda, %eax
|
|
-
|
|
- /* slot the PDA address into the GDT */
|
|
- mov $cpu_gdt_table, %ecx
|
|
- mov %ax, (__KERNEL_PDA+0+2)(%ecx) /* base & 0x0000ffff */
|
|
- shr $16, %eax
|
|
- mov %al, (__KERNEL_PDA+4+0)(%ecx) /* base & 0x00ff0000 */
|
|
- mov %ah, (__KERNEL_PDA+4+3)(%ecx) /* base & 0xff000000 */
|
|
-
|
|
- # %esi still points to start_info, and no registers
|
|
- # need to be preserved.
|
|
-
|
|
- movl XEN_START_mfn_list(%esi), %ebx
|
|
- movl $(cpu_gdt_table - __PAGE_OFFSET), %eax
|
|
- shrl $PAGE_SHIFT, %eax
|
|
- movl (%ebx,%eax,4), %ecx
|
|
- pushl %ecx # frame number for set_gdt below
|
|
-
|
|
- xorl %esi, %esi
|
|
- xorl %edx, %edx
|
|
- shldl $PAGE_SHIFT, %ecx, %edx
|
|
- shll $PAGE_SHIFT, %ecx
|
|
- orl $0x61, %ecx
|
|
- movl $cpu_gdt_table, %ebx
|
|
- movl $__HYPERVISOR_update_va_mapping, %eax
|
|
- int $0x82
|
|
-
|
|
- movl $(PAGE_SIZE_asm / 8), %ecx
|
|
- movl %esp, %ebx
|
|
- movl $__HYPERVISOR_set_gdt, %eax
|
|
- int $0x82
|
|
-
|
|
- popl %ecx
|
|
- ret
|
|
-
|
|
#define HYPERCALL_PAGE_OFFSET 0x1000
|
|
.org HYPERCALL_PAGE_OFFSET
|
|
ENTRY(hypercall_page)
|
|
@@ -138,60 +95,6 @@ ENTRY(empty_zero_page)
|
|
*/
|
|
.data
|
|
|
|
-/*
|
|
- * The Global Descriptor Table contains 28 quadwords, per-CPU.
|
|
- */
|
|
- .section .data.page_aligned, "aw"
|
|
- .align PAGE_SIZE_asm
|
|
-ENTRY(cpu_gdt_table)
|
|
- .quad 0x0000000000000000 /* NULL descriptor */
|
|
- .quad 0x0000000000000000 /* 0x0b reserved */
|
|
- .quad 0x0000000000000000 /* 0x13 reserved */
|
|
- .quad 0x0000000000000000 /* 0x1b reserved */
|
|
- .quad 0x0000000000000000 /* 0x20 unused */
|
|
- .quad 0x0000000000000000 /* 0x28 unused */
|
|
- .quad 0x0000000000000000 /* 0x33 TLS entry 1 */
|
|
- .quad 0x0000000000000000 /* 0x3b TLS entry 2 */
|
|
- .quad 0x0000000000000000 /* 0x43 TLS entry 3 */
|
|
- .quad 0x0000000000000000 /* 0x4b reserved */
|
|
- .quad 0x0000000000000000 /* 0x53 reserved */
|
|
- .quad 0x0000000000000000 /* 0x5b reserved */
|
|
-
|
|
- .quad 0x00cf9a000000ffff /* 0x60 kernel 4GB code at 0x00000000 */
|
|
- .quad 0x00cf92000000ffff /* 0x68 kernel 4GB data at 0x00000000 */
|
|
- .quad 0x00cffa000000ffff /* 0x73 user 4GB code at 0x00000000 */
|
|
- .quad 0x00cff2000000ffff /* 0x7b user 4GB data at 0x00000000 */
|
|
-
|
|
- .quad 0x0000000000000000 /* 0x80 TSS descriptor */
|
|
- .quad 0x0000000000000000 /* 0x88 LDT descriptor */
|
|
-
|
|
- /*
|
|
- * Segments used for calling PnP BIOS have byte granularity.
|
|
- * They code segments and data segments have fixed 64k limits,
|
|
- * the transfer segment sizes are set at run time.
|
|
- */
|
|
- .quad 0x0000000000000000 /* 0x90 32-bit code */
|
|
- .quad 0x0000000000000000 /* 0x98 16-bit code */
|
|
- .quad 0x0000000000000000 /* 0xa0 16-bit data */
|
|
- .quad 0x0000000000000000 /* 0xa8 16-bit data */
|
|
- .quad 0x0000000000000000 /* 0xb0 16-bit data */
|
|
-
|
|
- /*
|
|
- * The APM segments have byte granularity and their bases
|
|
- * are set at run time. All have 64k limits.
|
|
- */
|
|
- .quad 0x0000000000000000 /* 0xb8 APM CS code */
|
|
- .quad 0x0000000000000000 /* 0xc0 APM CS 16 code (16 bit) */
|
|
- .quad 0x0000000000000000 /* 0xc8 APM DS data */
|
|
-
|
|
- .quad 0x0000000000000000 /* 0xd0 - ESPFIX SS */
|
|
- .quad 0x00cf92000000ffff /* 0xd8 - PDA */
|
|
- .quad 0x0000000000000000 /* 0xe0 - unused */
|
|
- .quad 0x0000000000000000 /* 0xe8 - unused */
|
|
- .quad 0x0000000000000000 /* 0xf0 - unused */
|
|
- .quad 0x0000000000000000 /* 0xf8 - GDT entry 31: double-fault TSS */
|
|
- .align PAGE_SIZE_asm
|
|
-
|
|
#if CONFIG_XEN_COMPAT <= 0x030002
|
|
/*
|
|
* __xen_guest information
|
|
--- head-2011-03-11.orig/arch/x86/kernel/io_apic_32-xen.c 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/io_apic_32-xen.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -25,7 +25,6 @@
|
|
#include <linux/init.h>
|
|
#include <linux/delay.h>
|
|
#include <linux/sched.h>
|
|
-#include <linux/smp_lock.h>
|
|
#include <linux/mc146818rtc.h>
|
|
#include <linux/compiler.h>
|
|
#include <linux/acpi.h>
|
|
@@ -35,6 +34,7 @@
|
|
#include <linux/msi.h>
|
|
#include <linux/htirq.h>
|
|
#include <linux/freezer.h>
|
|
+#include <linux/kthread.h>
|
|
|
|
#include <asm/io.h>
|
|
#include <asm/smp.h>
|
|
@@ -710,8 +710,6 @@ static int balanced_irq(void *unused)
|
|
unsigned long prev_balance_time = jiffies;
|
|
long time_remaining = balanced_irq_interval;
|
|
|
|
- daemonize("kirqd");
|
|
-
|
|
/* push everything to CPU 0 to give us a starting point. */
|
|
for (i = 0 ; i < NR_IRQS ; i++) {
|
|
irq_desc[i].pending_mask = cpumask_of_cpu(0);
|
|
@@ -771,10 +769,9 @@ static int __init balanced_irq_init(void
|
|
}
|
|
|
|
printk(KERN_INFO "Starting balanced_irq\n");
|
|
- if (kernel_thread(balanced_irq, NULL, CLONE_KERNEL) >= 0)
|
|
+ if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd")))
|
|
return 0;
|
|
- else
|
|
- printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
|
|
+ printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq");
|
|
failed:
|
|
for_each_possible_cpu(i) {
|
|
kfree(irq_cpu_data[i].irq_delta);
|
|
@@ -1455,10 +1452,6 @@ static void __init setup_ExtINT_IRQ0_pin
|
|
enable_8259A_irq(0);
|
|
}
|
|
|
|
-static inline void UNEXPECTED_IO_APIC(void)
|
|
-{
|
|
-}
|
|
-
|
|
void __init print_IO_APIC(void)
|
|
{
|
|
int apic, i;
|
|
@@ -1498,34 +1491,12 @@ void __init print_IO_APIC(void)
|
|
printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
|
|
printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type);
|
|
printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS);
|
|
- if (reg_00.bits.ID >= get_physical_broadcast())
|
|
- UNEXPECTED_IO_APIC();
|
|
- if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2)
|
|
- UNEXPECTED_IO_APIC();
|
|
|
|
printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw);
|
|
printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries);
|
|
- if ( (reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */
|
|
- (reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */
|
|
- (reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */
|
|
- (reg_01.bits.entries != 0x1f) && /* dual Xeon boards */
|
|
- (reg_01.bits.entries != 0x22) && /* bigger Xeon boards */
|
|
- (reg_01.bits.entries != 0x2E) &&
|
|
- (reg_01.bits.entries != 0x3F)
|
|
- )
|
|
- UNEXPECTED_IO_APIC();
|
|
|
|
printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ);
|
|
printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version);
|
|
- if ( (reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */
|
|
- (reg_01.bits.version != 0x10) && /* oldest IO-APICs */
|
|
- (reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */
|
|
- (reg_01.bits.version != 0x13) && /* Xeon IO-APICs */
|
|
- (reg_01.bits.version != 0x20) /* Intel P64H (82806 AA) */
|
|
- )
|
|
- UNEXPECTED_IO_APIC();
|
|
- if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2)
|
|
- UNEXPECTED_IO_APIC();
|
|
|
|
/*
|
|
* Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02,
|
|
@@ -1535,8 +1506,6 @@ void __init print_IO_APIC(void)
|
|
if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) {
|
|
printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
|
|
printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration);
|
|
- if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2)
|
|
- UNEXPECTED_IO_APIC();
|
|
}
|
|
|
|
/*
|
|
@@ -1548,8 +1517,6 @@ void __init print_IO_APIC(void)
|
|
reg_03.raw != reg_01.raw) {
|
|
printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw);
|
|
printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT);
|
|
- if (reg_03.bits.__reserved_1)
|
|
- UNEXPECTED_IO_APIC();
|
|
}
|
|
|
|
printk(KERN_DEBUG ".... IRQ redirection table:\n");
|
|
@@ -2686,19 +2653,19 @@ int arch_setup_msi_irq(struct pci_dev *d
|
|
if (irq < 0)
|
|
return irq;
|
|
|
|
- set_irq_msi(irq, desc);
|
|
ret = msi_compose_msg(dev, irq, &msg);
|
|
if (ret < 0) {
|
|
destroy_irq(irq);
|
|
return ret;
|
|
}
|
|
|
|
+ set_irq_msi(irq, desc);
|
|
write_msi_msg(irq, &msg);
|
|
|
|
set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq,
|
|
"edge");
|
|
|
|
- return irq;
|
|
+ return 0;
|
|
}
|
|
|
|
void arch_teardown_msi_irq(unsigned int irq)
|
|
--- head-2011-03-11.orig/arch/x86/kernel/ioport_32-xen.c 2008-01-28 12:24:19.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/ioport_32-xen.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -12,10 +12,10 @@
|
|
#include <linux/types.h>
|
|
#include <linux/ioport.h>
|
|
#include <linux/smp.h>
|
|
-#include <linux/smp_lock.h>
|
|
#include <linux/stddef.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/thread_info.h>
|
|
+#include <linux/syscalls.h>
|
|
#include <xen/interface/physdev.h>
|
|
|
|
/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
|
|
--- head-2011-03-11.orig/arch/x86/kernel/ldt_32-xen.c 2011-01-31 17:32:16.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/ldt_32-xen.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -10,7 +10,6 @@
|
|
#include <linux/string.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/smp.h>
|
|
-#include <linux/smp_lock.h>
|
|
#include <linux/vmalloc.h>
|
|
#include <linux/slab.h>
|
|
|
|
--- head-2011-03-11.orig/arch/x86/kernel/microcode-xen.c 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/microcode-xen.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -135,7 +135,7 @@ static int __init microcode_dev_init (vo
|
|
return 0;
|
|
}
|
|
|
|
-static void __exit microcode_dev_exit (void)
|
|
+static void microcode_dev_exit (void)
|
|
{
|
|
misc_deregister(µcode_dev);
|
|
}
|
|
--- head-2011-03-11.orig/arch/x86/kernel/mpparse_32-xen.c 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/mpparse_32-xen.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -18,7 +18,6 @@
|
|
#include <linux/acpi.h>
|
|
#include <linux/delay.h>
|
|
#include <linux/bootmem.h>
|
|
-#include <linux/smp_lock.h>
|
|
#include <linux/kernel_stat.h>
|
|
#include <linux/mc146818rtc.h>
|
|
#include <linux/bitops.h>
|
|
@@ -484,7 +483,7 @@ static int __init smp_read_mpc(struct mp
|
|
}
|
|
++mpc_record;
|
|
}
|
|
- clustered_apic_check();
|
|
+ setup_apic_routing();
|
|
if (!num_processors)
|
|
printk(KERN_ERR "SMP mptable: no processors registered!\n");
|
|
return num_processors;
|
|
--- head-2011-03-11.orig/arch/x86/kernel/pci-dma-xen.c 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/pci-dma-xen.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -13,6 +13,7 @@
|
|
#include <linux/pci.h>
|
|
#include <linux/module.h>
|
|
#include <linux/version.h>
|
|
+#include <linux/pci.h>
|
|
#include <asm/io.h>
|
|
#include <xen/balloon.h>
|
|
#include <xen/gnttab.h>
|
|
@@ -275,7 +276,7 @@ int dma_declare_coherent_memory(struct d
|
|
{
|
|
void __iomem *mem_base = NULL;
|
|
int pages = size >> PAGE_SHIFT;
|
|
- int bitmap_size = (pages + 31)/32;
|
|
+ int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long);
|
|
|
|
if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0)
|
|
goto out;
|
|
@@ -348,6 +349,32 @@ void *dma_mark_declared_memory_occupied(
|
|
EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
|
|
#endif /* ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY */
|
|
|
|
+#if defined(CONFIG_PCI) && !defined(CONFIG_XEN)
|
|
+/* Many VIA bridges seem to corrupt data for DAC. Disable it here */
|
|
+
|
|
+int forbid_dac;
|
|
+EXPORT_SYMBOL(forbid_dac);
|
|
+
|
|
+static __devinit void via_no_dac(struct pci_dev *dev)
|
|
+{
|
|
+ if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) {
|
|
+ printk(KERN_INFO "PCI: VIA PCI bridge detected. Disabling DAC.\n");
|
|
+ forbid_dac = 1;
|
|
+ }
|
|
+}
|
|
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac);
|
|
+
|
|
+static int check_iommu(char *s)
|
|
+{
|
|
+ if (!strcmp(s, "usedac")) {
|
|
+ forbid_dac = -1;
|
|
+ return 1;
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+__setup("iommu=", check_iommu);
|
|
+#endif
|
|
+
|
|
dma_addr_t
|
|
dma_map_single(struct device *dev, void *ptr, size_t size,
|
|
enum dma_data_direction direction)
|
|
--- head-2011-03-11.orig/arch/x86/kernel/process_32-xen.c 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/process_32-xen.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -21,7 +21,6 @@
|
|
#include <linux/mm.h>
|
|
#include <linux/elfcore.h>
|
|
#include <linux/smp.h>
|
|
-#include <linux/smp_lock.h>
|
|
#include <linux/stddef.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/vmalloc.h>
|
|
@@ -39,6 +38,7 @@
|
|
#include <linux/random.h>
|
|
#include <linux/personality.h>
|
|
#include <linux/tick.h>
|
|
+#include <linux/percpu.h>
|
|
|
|
#include <asm/uaccess.h>
|
|
#include <asm/pgtable.h>
|
|
@@ -61,7 +61,6 @@
|
|
|
|
#include <asm/tlbflush.h>
|
|
#include <asm/cpu.h>
|
|
-#include <asm/pda.h>
|
|
|
|
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
|
|
|
|
@@ -70,6 +69,12 @@ static int hlt_counter;
|
|
unsigned long boot_option_idle_override = 0;
|
|
EXPORT_SYMBOL(boot_option_idle_override);
|
|
|
|
+DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
|
|
+EXPORT_PER_CPU_SYMBOL(current_task);
|
|
+
|
|
+DEFINE_PER_CPU(int, cpu_number);
|
|
+EXPORT_PER_CPU_SYMBOL(cpu_number);
|
|
+
|
|
/*
|
|
* Return saved PC of a blocked thread.
|
|
*/
|
|
@@ -168,6 +173,7 @@ void cpu_idle(void)
|
|
if (__get_cpu_var(cpu_idle_state))
|
|
__get_cpu_var(cpu_idle_state) = 0;
|
|
|
|
+ check_pgt_cache();
|
|
rmb();
|
|
idle = xen_idle; /* no alternatives */
|
|
|
|
@@ -218,18 +224,19 @@ void __devinit select_idle_routine(const
|
|
{
|
|
}
|
|
|
|
-static int __init idle_setup (char *str)
|
|
+static int __init idle_setup(char *str)
|
|
{
|
|
- if (!strncmp(str, "poll", 4)) {
|
|
+ if (!strcmp(str, "poll")) {
|
|
printk("using polling idle threads.\n");
|
|
pm_idle = poll_idle;
|
|
}
|
|
+ else
|
|
+ return -1;
|
|
|
|
boot_option_idle_override = 1;
|
|
- return 1;
|
|
+ return 0;
|
|
}
|
|
-
|
|
-__setup("idle=", idle_setup);
|
|
+early_param("idle", idle_setup);
|
|
|
|
void show_regs(struct pt_regs * regs)
|
|
{
|
|
@@ -282,7 +289,7 @@ int kernel_thread(int (*fn)(void *), voi
|
|
|
|
regs.xds = __USER_DS;
|
|
regs.xes = __USER_DS;
|
|
- regs.xfs = __KERNEL_PDA;
|
|
+ regs.xfs = __KERNEL_PERCPU;
|
|
regs.orig_eax = -1;
|
|
regs.eip = (unsigned long) kernel_thread_helper;
|
|
regs.xcs = __KERNEL_CS | get_kernel_rpl();
|
|
@@ -562,7 +569,7 @@ struct task_struct fastcall * __switch_t
|
|
* multicall to indicate FPU task switch, rather than
|
|
* synchronously trapping to Xen.
|
|
*/
|
|
- if (prev_p->thread_info->status & TS_USEDFPU) {
|
|
+ if (task_thread_info(prev_p)->status & TS_USEDFPU) {
|
|
__save_init_fpu(prev_p); /* _not_ save_init_fpu() */
|
|
mcl->op = __HYPERVISOR_fpu_taskswitch;
|
|
mcl->args[0] = 1;
|
|
@@ -669,7 +676,7 @@ struct task_struct fastcall * __switch_t
|
|
if (prev->gs | next->gs)
|
|
loadsegment(gs, next->gs);
|
|
|
|
- write_pda(pcurrent, next_p);
|
|
+ x86_write_percpu(current_task, next_p);
|
|
|
|
return prev_p;
|
|
}
|
|
--- head-2011-03-11.orig/arch/x86/kernel/smp_32-xen.c 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/smp_32-xen.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -13,7 +13,6 @@
|
|
#include <linux/mm.h>
|
|
#include <linux/delay.h>
|
|
#include <linux/spinlock.h>
|
|
-#include <linux/smp_lock.h>
|
|
#include <linux/kernel_stat.h>
|
|
#include <linux/mc146818rtc.h>
|
|
#include <linux/cache.h>
|
|
@@ -216,7 +215,6 @@ static cpumask_t flush_cpumask;
|
|
static struct mm_struct * flush_mm;
|
|
static unsigned long flush_va;
|
|
static DEFINE_SPINLOCK(tlbstate_lock);
|
|
-#define FLUSH_ALL 0xffffffff
|
|
|
|
/*
|
|
* We cannot call mmdrop() because we are in interrupt context,
|
|
@@ -298,7 +296,7 @@ irqreturn_t smp_invalidate_interrupt(int
|
|
|
|
if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) {
|
|
if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) {
|
|
- if (flush_va == FLUSH_ALL)
|
|
+ if (flush_va == TLB_FLUSH_ALL)
|
|
local_flush_tlb();
|
|
else
|
|
__flush_tlb_one(flush_va);
|
|
@@ -314,9 +312,11 @@ out:
|
|
return IRQ_HANDLED;
|
|
}
|
|
|
|
-static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
|
|
- unsigned long va)
|
|
+void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
|
|
+ unsigned long va)
|
|
{
|
|
+ cpumask_t cpumask = *cpumaskp;
|
|
+
|
|
/*
|
|
* A couple of (to be removed) sanity checks:
|
|
*
|
|
@@ -327,10 +327,12 @@ static void flush_tlb_others(cpumask_t c
|
|
BUG_ON(cpu_isset(smp_processor_id(), cpumask));
|
|
BUG_ON(!mm);
|
|
|
|
+#ifdef CONFIG_HOTPLUG_CPU
|
|
/* If a CPU which we ran on has gone down, OK. */
|
|
cpus_and(cpumask, cpumask, cpu_online_map);
|
|
- if (cpus_empty(cpumask))
|
|
+ if (unlikely(cpus_empty(cpumask)))
|
|
return;
|
|
+#endif
|
|
|
|
/*
|
|
* i'm not happy about this global shared spinlock in the
|
|
@@ -341,17 +343,7 @@ static void flush_tlb_others(cpumask_t c
|
|
|
|
flush_mm = mm;
|
|
flush_va = va;
|
|
-#if NR_CPUS <= BITS_PER_LONG
|
|
- atomic_set_mask(cpumask, &flush_cpumask);
|
|
-#else
|
|
- {
|
|
- int k;
|
|
- unsigned long *flush_mask = (unsigned long *)&flush_cpumask;
|
|
- unsigned long *cpu_mask = (unsigned long *)&cpumask;
|
|
- for (k = 0; k < BITS_TO_LONGS(NR_CPUS); ++k)
|
|
- atomic_set_mask(cpu_mask[k], &flush_mask[k]);
|
|
- }
|
|
-#endif
|
|
+ cpus_or(flush_cpumask, cpumask, flush_cpumask);
|
|
/*
|
|
* We have to send the IPI only to
|
|
* CPUs affected.
|
|
@@ -378,7 +370,7 @@ void flush_tlb_current_task(void)
|
|
|
|
local_flush_tlb();
|
|
if (!cpus_empty(cpu_mask))
|
|
- flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
|
|
+ flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
|
|
preempt_enable();
|
|
}
|
|
|
|
@@ -397,7 +389,7 @@ void flush_tlb_mm (struct mm_struct * mm
|
|
leave_mm(smp_processor_id());
|
|
}
|
|
if (!cpus_empty(cpu_mask))
|
|
- flush_tlb_others(cpu_mask, mm, FLUSH_ALL);
|
|
+ flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
|
|
|
|
preempt_enable();
|
|
}
|
|
@@ -446,7 +438,7 @@ void flush_tlb_all(void)
|
|
* it goes straight through and wastes no time serializing
|
|
* anything. Worst case is that we lose a reschedule ...
|
|
*/
|
|
-void smp_send_reschedule(int cpu)
|
|
+void xen_smp_send_reschedule(int cpu)
|
|
{
|
|
WARN_ON(cpu_is_offline(cpu));
|
|
send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
|
|
@@ -478,36 +470,79 @@ void unlock_ipi_call_lock(void)
|
|
|
|
static struct call_data_struct *call_data;
|
|
|
|
+static void __smp_call_function(void (*func) (void *info), void *info,
|
|
+ int nonatomic, int wait)
|
|
+{
|
|
+ struct call_data_struct data;
|
|
+ int cpus = num_online_cpus() - 1;
|
|
+
|
|
+ if (!cpus)
|
|
+ return;
|
|
+
|
|
+ data.func = func;
|
|
+ data.info = info;
|
|
+ atomic_set(&data.started, 0);
|
|
+ data.wait = wait;
|
|
+ if (wait)
|
|
+ atomic_set(&data.finished, 0);
|
|
+
|
|
+ call_data = &data;
|
|
+ mb();
|
|
+
|
|
+ /* Send a message to all other CPUs and wait for them to respond */
|
|
+ send_IPI_allbutself(CALL_FUNCTION_VECTOR);
|
|
+
|
|
+ /* Wait for response */
|
|
+ while (atomic_read(&data.started) != cpus)
|
|
+ cpu_relax();
|
|
+
|
|
+ if (wait)
|
|
+ while (atomic_read(&data.finished) != cpus)
|
|
+ cpu_relax();
|
|
+}
|
|
+
|
|
+
|
|
/**
|
|
- * smp_call_function(): Run a function on all other CPUs.
|
|
+ * smp_call_function_mask(): Run a function on a set of other CPUs.
|
|
+ * @mask: The set of cpus to run on. Must not include the current cpu.
|
|
* @func: The function to run. This must be fast and non-blocking.
|
|
* @info: An arbitrary pointer to pass to the function.
|
|
- * @nonatomic: currently unused.
|
|
* @wait: If true, wait (atomically) until function has completed on other CPUs.
|
|
*
|
|
- * Returns 0 on success, else a negative status code. Does not return until
|
|
- * remote CPUs are nearly ready to execute <<func>> or are or have executed.
|
|
+ * Returns 0 on success, else a negative status code.
|
|
+ *
|
|
+ * If @wait is true, then returns once @func has returned; otherwise
|
|
+ * it returns just before the target cpu calls @func.
|
|
*
|
|
* You must not call this function with disabled interrupts or from a
|
|
* hardware interrupt handler or from a bottom half handler.
|
|
*/
|
|
-int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
|
|
- int wait)
|
|
+int
|
|
+xen_smp_call_function_mask(cpumask_t mask,
|
|
+ void (*func)(void *), void *info,
|
|
+ int wait)
|
|
{
|
|
struct call_data_struct data;
|
|
+ cpumask_t allbutself;
|
|
int cpus;
|
|
|
|
+ /* Can deadlock when called with interrupts disabled */
|
|
+ WARN_ON(irqs_disabled());
|
|
+
|
|
/* Holding any lock stops cpus from going down. */
|
|
spin_lock(&call_lock);
|
|
- cpus = num_online_cpus() - 1;
|
|
+
|
|
+ allbutself = cpu_online_map;
|
|
+ cpu_clear(smp_processor_id(), allbutself);
|
|
+
|
|
+ cpus_and(mask, mask, allbutself);
|
|
+ cpus = cpus_weight(mask);
|
|
+
|
|
if (!cpus) {
|
|
spin_unlock(&call_lock);
|
|
return 0;
|
|
}
|
|
|
|
- /* Can deadlock when called with interrupts disabled */
|
|
- WARN_ON(irqs_disabled());
|
|
-
|
|
data.func = func;
|
|
data.info = info;
|
|
atomic_set(&data.started, 0);
|
|
@@ -517,9 +552,12 @@ int smp_call_function (void (*func) (voi
|
|
|
|
call_data = &data;
|
|
mb();
|
|
-
|
|
- /* Send a message to all other CPUs and wait for them to respond */
|
|
- send_IPI_allbutself(CALL_FUNCTION_VECTOR);
|
|
+
|
|
+ /* Send a message to other CPUs */
|
|
+ if (cpus_equal(mask, allbutself))
|
|
+ send_IPI_allbutself(CALL_FUNCTION_VECTOR);
|
|
+ else
|
|
+ send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
|
|
|
|
/* Wait for response */
|
|
while (atomic_read(&data.started) != cpus)
|
|
@@ -532,15 +570,14 @@ int smp_call_function (void (*func) (voi
|
|
|
|
return 0;
|
|
}
|
|
-EXPORT_SYMBOL(smp_call_function);
|
|
|
|
static void stop_this_cpu (void * dummy)
|
|
{
|
|
+ local_irq_disable();
|
|
/*
|
|
* Remove this CPU:
|
|
*/
|
|
cpu_clear(smp_processor_id(), cpu_online_map);
|
|
- local_irq_disable();
|
|
disable_all_local_evtchn();
|
|
if (cpu_data[smp_processor_id()].hlt_works_ok)
|
|
for(;;) halt();
|
|
@@ -551,13 +588,18 @@ static void stop_this_cpu (void * dummy)
|
|
* this function calls the 'stop' function on all other CPUs in the system.
|
|
*/
|
|
|
|
-void smp_send_stop(void)
|
|
+void xen_smp_send_stop(void)
|
|
{
|
|
- smp_call_function(stop_this_cpu, NULL, 1, 0);
|
|
+ /* Don't deadlock on the call lock in panic */
|
|
+ int nolock = !spin_trylock(&call_lock);
|
|
+ unsigned long flags;
|
|
|
|
- local_irq_disable();
|
|
+ local_irq_save(flags);
|
|
+ __smp_call_function(stop_this_cpu, NULL, 0, 0);
|
|
+ if (!nolock)
|
|
+ spin_unlock(&call_lock);
|
|
disable_all_local_evtchn();
|
|
- local_irq_enable();
|
|
+ local_irq_restore(flags);
|
|
}
|
|
|
|
/*
|
|
@@ -598,74 +640,3 @@ irqreturn_t smp_call_function_interrupt(
|
|
|
|
return IRQ_HANDLED;
|
|
}
|
|
-
|
|
-/*
|
|
- * this function sends a 'generic call function' IPI to one other CPU
|
|
- * in the system.
|
|
- *
|
|
- * cpu is a standard Linux logical CPU number.
|
|
- */
|
|
-static void
|
|
-__smp_call_function_single(int cpu, void (*func) (void *info), void *info,
|
|
- int nonatomic, int wait)
|
|
-{
|
|
- struct call_data_struct data;
|
|
- int cpus = 1;
|
|
-
|
|
- data.func = func;
|
|
- data.info = info;
|
|
- atomic_set(&data.started, 0);
|
|
- data.wait = wait;
|
|
- if (wait)
|
|
- atomic_set(&data.finished, 0);
|
|
-
|
|
- call_data = &data;
|
|
- wmb();
|
|
- /* Send a message to all other CPUs and wait for them to respond */
|
|
- send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR);
|
|
-
|
|
- /* Wait for response */
|
|
- while (atomic_read(&data.started) != cpus)
|
|
- cpu_relax();
|
|
-
|
|
- if (!wait)
|
|
- return;
|
|
-
|
|
- while (atomic_read(&data.finished) != cpus)
|
|
- cpu_relax();
|
|
-}
|
|
-
|
|
-/*
|
|
- * smp_call_function_single - Run a function on another CPU
|
|
- * @func: The function to run. This must be fast and non-blocking.
|
|
- * @info: An arbitrary pointer to pass to the function.
|
|
- * @nonatomic: Currently unused.
|
|
- * @wait: If true, wait until function has completed on other CPUs.
|
|
- *
|
|
- * Retrurns 0 on success, else a negative status code.
|
|
- *
|
|
- * Does not return until the remote CPU is nearly ready to execute <func>
|
|
- * or is or has executed.
|
|
- */
|
|
-
|
|
-int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
|
|
- int nonatomic, int wait)
|
|
-{
|
|
- /* prevent preemption and reschedule on another processor */
|
|
- int me = get_cpu();
|
|
- if (cpu == me) {
|
|
- WARN_ON(1);
|
|
- put_cpu();
|
|
- return -EBUSY;
|
|
- }
|
|
-
|
|
- /* Can deadlock when called with interrupts disabled */
|
|
- WARN_ON(irqs_disabled());
|
|
-
|
|
- spin_lock_bh(&call_lock);
|
|
- __smp_call_function_single(cpu, func, info, nonatomic, wait);
|
|
- spin_unlock_bh(&call_lock);
|
|
- put_cpu();
|
|
- return 0;
|
|
-}
|
|
-EXPORT_SYMBOL(smp_call_function_single);
|
|
--- head-2011-03-11.orig/arch/x86/kernel/time-xen.c 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/time-xen.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -79,7 +79,6 @@
|
|
#include <asm/i8253.h>
|
|
DEFINE_SPINLOCK(i8253_lock);
|
|
EXPORT_SYMBOL(i8253_lock);
|
|
-int pit_latch_buggy; /* extern */
|
|
#else
|
|
volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
|
|
#endif
|
|
@@ -199,6 +198,36 @@ static inline u64 scale_delta(u64 delta,
|
|
return product;
|
|
}
|
|
|
|
+static inline u64 get64(volatile u64 *ptr)
|
|
+{
|
|
+#ifndef CONFIG_64BIT
|
|
+ u64 res;
|
|
+ __asm__("movl %%ebx,%%eax\n"
|
|
+ "movl %%ecx,%%edx\n"
|
|
+ LOCK_PREFIX "cmpxchg8b %1"
|
|
+ : "=&A" (res) : "m" (*ptr));
|
|
+ return res;
|
|
+#else
|
|
+ return *ptr;
|
|
+#define cmpxchg64 cmpxchg
|
|
+#endif
|
|
+}
|
|
+
|
|
+static inline u64 get64_local(volatile u64 *ptr)
|
|
+{
|
|
+#ifndef CONFIG_64BIT
|
|
+ u64 res;
|
|
+ __asm__("movl %%ebx,%%eax\n"
|
|
+ "movl %%ecx,%%edx\n"
|
|
+ "cmpxchg8b %1"
|
|
+ : "=&A" (res) : "m" (*ptr));
|
|
+ return res;
|
|
+#else
|
|
+ return *ptr;
|
|
+#define cmpxchg64_local cmpxchg_local
|
|
+#endif
|
|
+}
|
|
+
|
|
static void init_cpu_khz(void)
|
|
{
|
|
u64 __cpu_khz = 1000000ULL << 32;
|
|
@@ -378,7 +407,7 @@ static int set_rtc_mmss(unsigned long no
|
|
return retval;
|
|
}
|
|
|
|
-unsigned long long sched_clock(void)
|
|
+static unsigned long long local_clock(void)
|
|
{
|
|
unsigned int cpu = get_cpu();
|
|
struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
|
|
@@ -399,6 +428,61 @@ unsigned long long sched_clock(void)
|
|
return time;
|
|
}
|
|
|
|
+/*
|
|
+ * Runstate accounting
|
|
+ */
|
|
+static void get_runstate_snapshot(struct vcpu_runstate_info *res)
|
|
+{
|
|
+ u64 state_time;
|
|
+ struct vcpu_runstate_info *state;
|
|
+
|
|
+ BUG_ON(preemptible());
|
|
+
|
|
+ state = &__get_cpu_var(runstate);
|
|
+
|
|
+ do {
|
|
+ state_time = get64_local(&state->state_entry_time);
|
|
+ *res = *state;
|
|
+ } while (get64_local(&state->state_entry_time) != state_time);
|
|
+
|
|
+ WARN_ON_ONCE(res->state != RUNSTATE_running);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Xen sched_clock implementation. Returns the number of unstolen
|
|
+ * nanoseconds, which is nanoseconds the VCPU spent in RUNNING+BLOCKED
|
|
+ * states.
|
|
+ */
|
|
+unsigned long long sched_clock(void)
|
|
+{
|
|
+ struct vcpu_runstate_info runstate;
|
|
+ cycle_t now;
|
|
+ u64 ret;
|
|
+ s64 offset;
|
|
+
|
|
+ /*
|
|
+ * Ideally sched_clock should be called on a per-cpu basis
|
|
+ * anyway, so preempt should already be disabled, but that's
|
|
+ * not current practice at the moment.
|
|
+ */
|
|
+ preempt_disable();
|
|
+
|
|
+ now = local_clock();
|
|
+
|
|
+ get_runstate_snapshot(&runstate);
|
|
+
|
|
+ offset = now - runstate.state_entry_time;
|
|
+ if (offset < 0)
|
|
+ offset = 0;
|
|
+
|
|
+ ret = offset + runstate.time[RUNSTATE_running]
|
|
+ + runstate.time[RUNSTATE_blocked];
|
|
+
|
|
+ preempt_enable();
|
|
+
|
|
+ return ret;
|
|
+}
|
|
+
|
|
unsigned long profile_pc(struct pt_regs *regs)
|
|
{
|
|
unsigned long pc = instruction_pointer(regs);
|
|
@@ -446,11 +530,10 @@ EXPORT_SYMBOL(profile_pc);
|
|
irqreturn_t timer_interrupt(int irq, void *dev_id)
|
|
{
|
|
s64 delta, delta_cpu, stolen, blocked;
|
|
- u64 sched_time;
|
|
unsigned int i, cpu = smp_processor_id();
|
|
int schedule_clock_was_set_work = 0;
|
|
struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu);
|
|
- struct vcpu_runstate_info *runstate = &per_cpu(runstate, cpu);
|
|
+ struct vcpu_runstate_info runstate;
|
|
|
|
/*
|
|
* Here we are in the timer irq handler. We just have irqs locally
|
|
@@ -470,20 +553,7 @@ irqreturn_t timer_interrupt(int irq, voi
|
|
delta -= processed_system_time;
|
|
delta_cpu -= per_cpu(processed_system_time, cpu);
|
|
|
|
- /*
|
|
- * Obtain a consistent snapshot of stolen/blocked cycles. We
|
|
- * can use state_entry_time to detect if we get preempted here.
|
|
- */
|
|
- do {
|
|
- sched_time = runstate->state_entry_time;
|
|
- barrier();
|
|
- stolen = runstate->time[RUNSTATE_runnable] +
|
|
- runstate->time[RUNSTATE_offline] -
|
|
- per_cpu(processed_stolen_time, cpu);
|
|
- blocked = runstate->time[RUNSTATE_blocked] -
|
|
- per_cpu(processed_blocked_time, cpu);
|
|
- barrier();
|
|
- } while (sched_time != runstate->state_entry_time);
|
|
+ get_runstate_snapshot(&runstate);
|
|
} while (!time_values_up_to_date(cpu));
|
|
|
|
if ((unlikely(delta < -(s64)permitted_clock_jitter) ||
|
|
@@ -528,6 +598,9 @@ irqreturn_t timer_interrupt(int irq, voi
|
|
* HACK: Passing NULL to account_steal_time()
|
|
* ensures that the ticks are accounted as stolen.
|
|
*/
|
|
+ stolen = runstate.time[RUNSTATE_runnable]
|
|
+ + runstate.time[RUNSTATE_offline]
|
|
+ - per_cpu(processed_stolen_time, cpu);
|
|
if ((stolen > 0) && (delta_cpu > 0)) {
|
|
delta_cpu -= stolen;
|
|
if (unlikely(delta_cpu < 0))
|
|
@@ -543,6 +616,8 @@ irqreturn_t timer_interrupt(int irq, voi
|
|
* HACK: Passing idle_task to account_steal_time()
|
|
* ensures that the ticks are accounted as idle/wait.
|
|
*/
|
|
+ blocked = runstate.time[RUNSTATE_blocked]
|
|
+ - per_cpu(processed_blocked_time, cpu);
|
|
if ((blocked > 0) && (delta_cpu > 0)) {
|
|
delta_cpu -= blocked;
|
|
if (unlikely(delta_cpu < 0))
|
|
@@ -579,7 +654,7 @@ irqreturn_t timer_interrupt(int irq, voi
|
|
return IRQ_HANDLED;
|
|
}
|
|
|
|
-void mark_tsc_unstable(void)
|
|
+void mark_tsc_unstable(char *reason)
|
|
{
|
|
#ifndef CONFIG_XEN /* XXX Should tell the hypervisor about this fact. */
|
|
tsc_unstable = 1;
|
|
@@ -587,17 +662,13 @@ void mark_tsc_unstable(void)
|
|
}
|
|
EXPORT_SYMBOL_GPL(mark_tsc_unstable);
|
|
|
|
+static cycle_t cs_last;
|
|
+
|
|
static cycle_t xen_clocksource_read(void)
|
|
{
|
|
#ifdef CONFIG_SMP
|
|
- static cycle_t last_ret;
|
|
-#ifndef CONFIG_64BIT
|
|
- cycle_t last = cmpxchg64(&last_ret, 0, 0);
|
|
-#else
|
|
- cycle_t last = last_ret;
|
|
-#define cmpxchg64 cmpxchg
|
|
-#endif
|
|
- cycle_t ret = sched_clock();
|
|
+ cycle_t last = get64(&cs_last);
|
|
+ cycle_t ret = local_clock();
|
|
|
|
if (unlikely((s64)(ret - last) < 0)) {
|
|
if (last - ret > permitted_clock_jitter
|
|
@@ -616,17 +687,25 @@ static cycle_t xen_clocksource_read(void
|
|
}
|
|
|
|
for (;;) {
|
|
- cycle_t cur = cmpxchg64(&last_ret, last, ret);
|
|
+ cycle_t cur = cmpxchg64(&cs_last, last, ret);
|
|
|
|
if (cur == last || (s64)(ret - cur) < 0)
|
|
return ret;
|
|
last = cur;
|
|
}
|
|
#else
|
|
- return sched_clock();
|
|
+ return local_clock();
|
|
#endif
|
|
}
|
|
|
|
+static void xen_clocksource_resume(void)
|
|
+{
|
|
+ extern void time_resume(void);
|
|
+
|
|
+ time_resume();
|
|
+ cs_last = local_clock();
|
|
+}
|
|
+
|
|
static struct clocksource clocksource_xen = {
|
|
.name = "xen",
|
|
.rating = 400,
|
|
@@ -635,19 +714,29 @@ static struct clocksource clocksource_xe
|
|
.mult = 1 << XEN_SHIFT, /* time directly in nanoseconds */
|
|
.shift = XEN_SHIFT,
|
|
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
|
|
+ .resume = xen_clocksource_resume,
|
|
};
|
|
|
|
-static void init_missing_ticks_accounting(unsigned int cpu)
|
|
+struct vcpu_runstate_info *setup_runstate_area(unsigned int cpu)
|
|
{
|
|
struct vcpu_register_runstate_memory_area area;
|
|
struct vcpu_runstate_info *runstate = &per_cpu(runstate, cpu);
|
|
int rc;
|
|
|
|
- memset(runstate, 0, sizeof(*runstate));
|
|
-
|
|
- area.addr.v = runstate;
|
|
+ set_xen_guest_handle(area.addr.h, runstate);
|
|
rc = HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, cpu, &area);
|
|
- WARN_ON(rc && rc != -ENOSYS);
|
|
+ if (rc) {
|
|
+ BUILD_BUG_ON(RUNSTATE_running);
|
|
+ memset(runstate, 0, sizeof(*runstate));
|
|
+ WARN_ON(rc != -ENOSYS);
|
|
+ }
|
|
+
|
|
+ return runstate;
|
|
+}
|
|
+
|
|
+static void init_missing_ticks_accounting(unsigned int cpu)
|
|
+{
|
|
+ struct vcpu_runstate_info *runstate = setup_runstate_area(cpu);
|
|
|
|
per_cpu(processed_blocked_time, cpu) =
|
|
runstate->time[RUNSTATE_blocked];
|
|
@@ -723,35 +812,6 @@ void notify_arch_cmos_timer(void)
|
|
mod_timer(&sync_xen_wallclock_timer, jiffies + 1);
|
|
}
|
|
|
|
-static int timer_resume(struct sys_device *dev)
|
|
-{
|
|
- extern void time_resume(void);
|
|
- time_resume();
|
|
- return 0;
|
|
-}
|
|
-
|
|
-static struct sysdev_class timer_sysclass = {
|
|
- .resume = timer_resume,
|
|
- set_kset_name("timer"),
|
|
-};
|
|
-
|
|
-
|
|
-/* XXX this driverfs stuff should probably go elsewhere later -john */
|
|
-static struct sys_device device_timer = {
|
|
- .id = 0,
|
|
- .cls = &timer_sysclass,
|
|
-};
|
|
-
|
|
-static int time_init_device(void)
|
|
-{
|
|
- int error = sysdev_class_register(&timer_sysclass);
|
|
- if (!error)
|
|
- error = sysdev_register(&device_timer);
|
|
- return error;
|
|
-}
|
|
-
|
|
-device_initcall(time_init_device);
|
|
-
|
|
extern void (*late_time_init)(void);
|
|
|
|
/* Dynamically-mapped IRQ. */
|
|
@@ -895,21 +955,21 @@ static void start_hz_timer(void)
|
|
cpu_clear(cpu, nohz_cpu_mask);
|
|
}
|
|
|
|
-void raw_safe_halt(void)
|
|
+void xen_safe_halt(void)
|
|
{
|
|
stop_hz_timer();
|
|
/* Blocking includes an implicit local_irq_enable(). */
|
|
HYPERVISOR_block();
|
|
start_hz_timer();
|
|
}
|
|
-EXPORT_SYMBOL(raw_safe_halt);
|
|
+EXPORT_SYMBOL(xen_safe_halt);
|
|
|
|
-void halt(void)
|
|
+void xen_halt(void)
|
|
{
|
|
if (irqs_disabled())
|
|
VOID(HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL));
|
|
}
|
|
-EXPORT_SYMBOL(halt);
|
|
+EXPORT_SYMBOL(xen_halt);
|
|
|
|
/* No locking required. Interrupts are disabled on all CPUs. */
|
|
void time_resume(void)
|
|
--- head-2011-03-11.orig/arch/x86/kernel/traps_32-xen.c 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/traps_32-xen.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -52,7 +52,7 @@
|
|
#include <asm/unwind.h>
|
|
#include <asm/smp.h>
|
|
#include <asm/arch_hooks.h>
|
|
-#include <asm/kdebug.h>
|
|
+#include <linux/kdebug.h>
|
|
#include <asm/stacktrace.h>
|
|
|
|
#include <linux/module.h>
|
|
@@ -101,20 +101,6 @@ asmlinkage void machine_check(void);
|
|
|
|
int kstack_depth_to_print = 24;
|
|
static unsigned int code_bytes = 64;
|
|
-ATOMIC_NOTIFIER_HEAD(i386die_chain);
|
|
-
|
|
-int register_die_notifier(struct notifier_block *nb)
|
|
-{
|
|
- vmalloc_sync_all();
|
|
- return atomic_notifier_chain_register(&i386die_chain, nb);
|
|
-}
|
|
-EXPORT_SYMBOL(register_die_notifier); /* used modular by kdb */
|
|
-
|
|
-int unregister_die_notifier(struct notifier_block *nb)
|
|
-{
|
|
- return atomic_notifier_chain_unregister(&i386die_chain, nb);
|
|
-}
|
|
-EXPORT_SYMBOL(unregister_die_notifier); /* used modular by kdb */
|
|
|
|
static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
|
|
{
|
|
@@ -325,7 +311,7 @@ void show_registers(struct pt_regs *regs
|
|
regs->xds & 0xffff, regs->xes & 0xffff, regs->xfs & 0xffff, gs, ss);
|
|
printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)",
|
|
TASK_COMM_LEN, current->comm, current->pid,
|
|
- current_thread_info(), current, current->thread_info);
|
|
+ current_thread_info(), current, task_thread_info(current));
|
|
/*
|
|
* When in-kernel, we also print out the stack and code at the
|
|
* time of the fault..
|
|
@@ -482,8 +468,6 @@ static void __kprobes do_trap(int trapnr
|
|
siginfo_t *info)
|
|
{
|
|
struct task_struct *tsk = current;
|
|
- tsk->thread.error_code = error_code;
|
|
- tsk->thread.trap_no = trapnr;
|
|
|
|
if (regs->eflags & VM_MASK) {
|
|
if (vm86)
|
|
@@ -495,6 +479,18 @@ static void __kprobes do_trap(int trapnr
|
|
goto kernel_trap;
|
|
|
|
trap_signal: {
|
|
+ /*
|
|
+ * We want error_code and trap_no set for userspace faults and
|
|
+ * kernelspace faults which result in die(), but not
|
|
+ * kernelspace faults which are fixed up. die() gives the
|
|
+ * process no chance to handle the signal and notice the
|
|
+ * kernel fault information, so that won't result in polluting
|
|
+ * the information about previously queued, but not yet
|
|
+ * delivered, faults. See also do_general_protection below.
|
|
+ */
|
|
+ tsk->thread.error_code = error_code;
|
|
+ tsk->thread.trap_no = trapnr;
|
|
+
|
|
if (info)
|
|
force_sig_info(signr, info, tsk);
|
|
else
|
|
@@ -503,8 +499,11 @@ static void __kprobes do_trap(int trapnr
|
|
}
|
|
|
|
kernel_trap: {
|
|
- if (!fixup_exception(regs))
|
|
+ if (!fixup_exception(regs)) {
|
|
+ tsk->thread.error_code = error_code;
|
|
+ tsk->thread.trap_no = trapnr;
|
|
die(str, regs, error_code);
|
|
+ }
|
|
return;
|
|
}
|
|
|
|
@@ -578,9 +577,6 @@ DO_ERROR_INFO(32, SIGSEGV, "iret excepti
|
|
fastcall void __kprobes do_general_protection(struct pt_regs * regs,
|
|
long error_code)
|
|
{
|
|
- current->thread.error_code = error_code;
|
|
- current->thread.trap_no = 13;
|
|
-
|
|
if (regs->eflags & VM_MASK)
|
|
goto gp_in_vm86;
|
|
|
|
@@ -599,6 +595,8 @@ gp_in_vm86:
|
|
|
|
gp_in_kernel:
|
|
if (!fixup_exception(regs)) {
|
|
+ current->thread.error_code = error_code;
|
|
+ current->thread.trap_no = 13;
|
|
if (notify_die(DIE_GPF, "general protection fault", regs,
|
|
error_code, 13, SIGSEGV) == NOTIFY_STOP)
|
|
return;
|
|
@@ -987,9 +985,7 @@ fastcall void do_spurious_interrupt_bug(
|
|
fastcall unsigned long patch_espfix_desc(unsigned long uesp,
|
|
unsigned long kesp)
|
|
{
|
|
- int cpu = smp_processor_id();
|
|
- struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
|
|
- struct desc_struct *gdt = (struct desc_struct *)cpu_gdt_descr->address;
|
|
+ struct desc_struct *gdt = __get_cpu_var(gdt_page).gdt;
|
|
unsigned long base = (kesp - uesp) & -THREAD_SIZE;
|
|
unsigned long new_kesp = kesp - base;
|
|
unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT;
|
|
--- head-2011-03-11.orig/arch/x86/mm/fault_32-xen.c 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/mm/fault_32-xen.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -14,19 +14,20 @@
|
|
#include <linux/mman.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/smp.h>
|
|
-#include <linux/smp_lock.h>
|
|
#include <linux/interrupt.h>
|
|
#include <linux/init.h>
|
|
#include <linux/tty.h>
|
|
#include <linux/vt_kern.h> /* For unblank_screen() */
|
|
#include <linux/highmem.h>
|
|
+#include <linux/bootmem.h> /* for max_low_pfn */
|
|
+#include <linux/vmalloc.h>
|
|
#include <linux/module.h>
|
|
#include <linux/kprobes.h>
|
|
#include <linux/uaccess.h>
|
|
+#include <linux/kdebug.h>
|
|
|
|
#include <asm/system.h>
|
|
#include <asm/desc.h>
|
|
-#include <asm/kdebug.h>
|
|
#include <asm/segment.h>
|
|
|
|
extern void die(const char *,struct pt_regs *,long);
|
|
@@ -259,25 +260,20 @@ static void dump_fault_path(unsigned lon
|
|
unsigned long page;
|
|
|
|
page = read_cr3();
|
|
- page = ((unsigned long *) __va(page))[address >> 22];
|
|
- if (oops_may_print())
|
|
- printk(KERN_ALERT "*pde = ma %08lx pa %08lx\n", page,
|
|
- machine_to_phys(page));
|
|
+ page = ((unsigned long *) __va(page))[address >> PGDIR_SHIFT];
|
|
+ printk(KERN_ALERT "*pde = ma %08lx pa %08lx\n", page,
|
|
+ machine_to_phys(page));
|
|
/*
|
|
* We must not directly access the pte in the highpte
|
|
* case if the page table is located in highmem.
|
|
* And lets rather not kmap-atomic the pte, just in case
|
|
* it's allocated already.
|
|
*/
|
|
-#ifdef CONFIG_HIGHPTE
|
|
- if ((page >> PAGE_SHIFT) >= highstart_pfn)
|
|
- return;
|
|
-#endif
|
|
- if ((page & 1) && oops_may_print()) {
|
|
- page &= PAGE_MASK;
|
|
- address &= 0x003ff000;
|
|
- page = machine_to_phys(page);
|
|
- page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT];
|
|
+ if ((machine_to_phys(page) >> PAGE_SHIFT) < max_low_pfn
|
|
+ && (page & _PAGE_PRESENT)) {
|
|
+ page = machine_to_phys(page & PAGE_MASK);
|
|
+ page = ((unsigned long *) __va(page))[(address >> PAGE_SHIFT)
|
|
+ & (PTRS_PER_PTE - 1)];
|
|
printk(KERN_ALERT "*pte = ma %08lx pa %08lx\n", page,
|
|
machine_to_phys(page));
|
|
}
|
|
@@ -581,6 +577,11 @@ bad_area:
|
|
bad_area_nosemaphore:
|
|
/* User mode accesses just cause a SIGSEGV */
|
|
if (error_code & 4) {
|
|
+ /*
|
|
+ * It's possible to have interrupts off here.
|
|
+ */
|
|
+ local_irq_enable();
|
|
+
|
|
/*
|
|
* Valid to do another page fault here because this one came
|
|
* from user space.
|
|
@@ -633,7 +634,7 @@ no_context:
|
|
bust_spinlocks(1);
|
|
|
|
if (oops_may_print()) {
|
|
- #ifdef CONFIG_X86_PAE
|
|
+#ifdef CONFIG_X86_PAE
|
|
if (error_code & 16) {
|
|
pte_t *pte = lookup_address(address);
|
|
|
|
@@ -642,7 +643,7 @@ no_context:
|
|
"NX-protected page - exploit attempt? "
|
|
"(uid: %d)\n", current->uid);
|
|
}
|
|
- #endif
|
|
+#endif
|
|
if (address < PAGE_SIZE)
|
|
printk(KERN_ALERT "BUG: unable to handle kernel NULL "
|
|
"pointer dereference");
|
|
@@ -652,8 +653,8 @@ no_context:
|
|
printk(" at virtual address %08lx\n",address);
|
|
printk(KERN_ALERT " printing eip:\n");
|
|
printk("%08lx\n", regs->eip);
|
|
+ dump_fault_path(address);
|
|
}
|
|
- dump_fault_path(address);
|
|
tsk->thread.cr2 = address;
|
|
tsk->thread.trap_no = 14;
|
|
tsk->thread.error_code = error_code;
|
|
@@ -694,7 +695,6 @@ do_sigbus:
|
|
force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
|
|
}
|
|
|
|
-#if !HAVE_SHARED_KERNEL_PMD
|
|
void vmalloc_sync_all(void)
|
|
{
|
|
/*
|
|
@@ -710,6 +710,9 @@ void vmalloc_sync_all(void)
|
|
static unsigned long start = TASK_SIZE;
|
|
unsigned long address;
|
|
|
|
+ if (SHARED_KERNEL_PMD)
|
|
+ return;
|
|
+
|
|
BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK);
|
|
for (address = start;
|
|
address >= TASK_SIZE && address < hypervisor_virt_start;
|
|
@@ -752,4 +755,3 @@ void vmalloc_sync_all(void)
|
|
start = address + (1UL << PMD_SHIFT);
|
|
}
|
|
}
|
|
-#endif
|
|
--- head-2011-03-11.orig/arch/x86/mm/highmem_32-xen.c 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/mm/highmem_32-xen.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -26,7 +26,7 @@ void kunmap(struct page *page)
|
|
* However when holding an atomic kmap is is not legal to sleep, so atomic
|
|
* kmaps are appropriate for short, tight code paths only.
|
|
*/
|
|
-static void *__kmap_atomic(struct page *page, enum km_type type, pgprot_t prot)
|
|
+void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
|
|
{
|
|
enum fixed_addresses idx;
|
|
unsigned long vaddr;
|
|
@@ -49,15 +49,7 @@ static void *__kmap_atomic(struct page *
|
|
|
|
void *kmap_atomic(struct page *page, enum km_type type)
|
|
{
|
|
- return __kmap_atomic(page, type, kmap_prot);
|
|
-}
|
|
-
|
|
-/* Same as kmap_atomic but with PAGE_KERNEL_RO page protection. */
|
|
-void *kmap_atomic_pte(struct page *page, enum km_type type)
|
|
-{
|
|
- return __kmap_atomic(page, type,
|
|
- test_bit(PG_pinned, &page->flags)
|
|
- ? PAGE_KERNEL_RO : kmap_prot);
|
|
+ return kmap_atomic_prot(page, type, kmap_prot);
|
|
}
|
|
|
|
void kunmap_atomic(void *kvaddr, enum km_type type)
|
|
@@ -80,6 +72,7 @@ void kunmap_atomic(void *kvaddr, enum km
|
|
#endif
|
|
}
|
|
|
|
+ /*arch_flush_lazy_mmu_mode();*/
|
|
pagefault_enable();
|
|
}
|
|
|
|
@@ -162,7 +155,6 @@ void copy_highpage(struct page *to, stru
|
|
EXPORT_SYMBOL(kmap);
|
|
EXPORT_SYMBOL(kunmap);
|
|
EXPORT_SYMBOL(kmap_atomic);
|
|
-EXPORT_SYMBOL(kmap_atomic_pte);
|
|
EXPORT_SYMBOL(kunmap_atomic);
|
|
EXPORT_SYMBOL(kmap_atomic_to_page);
|
|
EXPORT_SYMBOL(clear_highpage);
|
|
--- head-2011-03-11.orig/arch/x86/mm/hypervisor.c 2011-01-31 17:29:16.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/mm/hypervisor.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -419,13 +419,13 @@ void xen_tlb_flush_all(void)
|
|
}
|
|
EXPORT_SYMBOL_GPL(xen_tlb_flush_all);
|
|
|
|
-void xen_tlb_flush_mask(cpumask_t *mask)
|
|
+void xen_tlb_flush_mask(const cpumask_t *mask)
|
|
{
|
|
struct mmuext_op op;
|
|
if ( cpus_empty(*mask) )
|
|
return;
|
|
op.cmd = MMUEXT_TLB_FLUSH_MULTI;
|
|
- set_xen_guest_handle(op.arg2.vcpumask, mask->bits);
|
|
+ set_xen_guest_handle(op.arg2.vcpumask, cpus_addr(*mask));
|
|
BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
|
|
}
|
|
EXPORT_SYMBOL_GPL(xen_tlb_flush_mask);
|
|
@@ -439,14 +439,14 @@ void xen_invlpg_all(unsigned long ptr)
|
|
}
|
|
EXPORT_SYMBOL_GPL(xen_invlpg_all);
|
|
|
|
-void xen_invlpg_mask(cpumask_t *mask, unsigned long ptr)
|
|
+void xen_invlpg_mask(const cpumask_t *mask, unsigned long ptr)
|
|
{
|
|
struct mmuext_op op;
|
|
if ( cpus_empty(*mask) )
|
|
return;
|
|
op.cmd = MMUEXT_INVLPG_MULTI;
|
|
op.arg1.linear_addr = ptr & PAGE_MASK;
|
|
- set_xen_guest_handle(op.arg2.vcpumask, mask->bits);
|
|
+ set_xen_guest_handle(op.arg2.vcpumask, cpus_addr(*mask));
|
|
BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
|
|
}
|
|
EXPORT_SYMBOL_GPL(xen_invlpg_mask);
|
|
--- head-2011-03-11.orig/arch/x86/mm/init_32-xen.c 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/mm/init_32-xen.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -22,6 +22,7 @@
|
|
#include <linux/init.h>
|
|
#include <linux/highmem.h>
|
|
#include <linux/pagemap.h>
|
|
+#include <linux/pfn.h>
|
|
#include <linux/poison.h>
|
|
#include <linux/bootmem.h>
|
|
#include <linux/slab.h>
|
|
@@ -65,17 +66,19 @@ static pmd_t * __init one_md_table_init(
|
|
pmd_t *pmd_table;
|
|
|
|
#ifdef CONFIG_X86_PAE
|
|
- pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
|
|
- paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT);
|
|
- make_lowmem_page_readonly(pmd_table, XENFEAT_writable_page_tables);
|
|
- set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
|
|
- pud = pud_offset(pgd, 0);
|
|
- if (pmd_table != pmd_offset(pud, 0))
|
|
- BUG();
|
|
-#else
|
|
+ if (!(__pgd_val(*pgd) & _PAGE_PRESENT)) {
|
|
+ pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE);
|
|
+
|
|
+ paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT);
|
|
+ make_lowmem_page_readonly(pmd_table, XENFEAT_writable_page_tables);
|
|
+ set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT));
|
|
+ pud = pud_offset(pgd, 0);
|
|
+ if (pmd_table != pmd_offset(pud, 0))
|
|
+ BUG();
|
|
+ }
|
|
+#endif
|
|
pud = pud_offset(pgd, 0);
|
|
pmd_table = pmd_offset(pud, 0);
|
|
-#endif
|
|
|
|
return pmd_table;
|
|
}
|
|
@@ -86,16 +89,18 @@ static pmd_t * __init one_md_table_init(
|
|
*/
|
|
static pte_t * __init one_page_table_init(pmd_t *pmd)
|
|
{
|
|
+#if CONFIG_XEN_COMPAT <= 0x030002
|
|
if (pmd_none(*pmd)) {
|
|
+#else
|
|
+ if (!(__pmd_val(*pmd) & _PAGE_PRESENT)) {
|
|
+#endif
|
|
pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
|
|
+
|
|
paravirt_alloc_pt(__pa(page_table) >> PAGE_SHIFT);
|
|
make_lowmem_page_readonly(page_table,
|
|
XENFEAT_writable_page_tables);
|
|
set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE));
|
|
- if (page_table != pte_offset_kernel(pmd, 0))
|
|
- BUG();
|
|
-
|
|
- return page_table;
|
|
+ BUG_ON(page_table != pte_offset_kernel(pmd, 0));
|
|
}
|
|
|
|
return pte_offset_kernel(pmd, 0);
|
|
@@ -115,7 +120,6 @@ static pte_t * __init one_page_table_ini
|
|
static void __init page_table_range_init (unsigned long start, unsigned long end, pgd_t *pgd_base)
|
|
{
|
|
pgd_t *pgd;
|
|
- pud_t *pud;
|
|
pmd_t *pmd;
|
|
int pgd_idx, pmd_idx;
|
|
unsigned long vaddr;
|
|
@@ -126,12 +130,10 @@ static void __init page_table_range_init
|
|
pgd = pgd_base + pgd_idx;
|
|
|
|
for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) {
|
|
- if (pgd_none(*pgd))
|
|
- one_md_table_init(pgd);
|
|
- pud = pud_offset(pgd, vaddr);
|
|
- pmd = pmd_offset(pud, vaddr);
|
|
+ pmd = one_md_table_init(pgd);
|
|
+ pmd = pmd + pmd_index(vaddr);
|
|
for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) {
|
|
- if (vaddr < hypervisor_virt_start && pmd_none(*pmd))
|
|
+ if (vaddr < hypervisor_virt_start)
|
|
one_page_table_init(pmd);
|
|
|
|
vaddr += PMD_SIZE;
|
|
@@ -194,24 +196,25 @@ static void __init kernel_physical_mappi
|
|
/* Map with big pages if possible, otherwise create normal page tables. */
|
|
if (cpu_has_pse) {
|
|
unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1;
|
|
-
|
|
if (is_kernel_text(address) || is_kernel_text(address2))
|
|
set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC));
|
|
else
|
|
set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE));
|
|
+
|
|
pfn += PTRS_PER_PTE;
|
|
} else {
|
|
pte = one_page_table_init(pmd);
|
|
|
|
- pte += pte_ofs;
|
|
- for (; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++) {
|
|
- /* XEN: Only map initial RAM allocation. */
|
|
- if ((pfn >= max_ram_pfn) || pte_present(*pte))
|
|
- continue;
|
|
- if (is_kernel_text(address))
|
|
- set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
|
|
- else
|
|
- set_pte(pte, pfn_pte(pfn, PAGE_KERNEL));
|
|
+ for (pte += pte_ofs;
|
|
+ pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn;
|
|
+ pte++, pfn++, pte_ofs++, address += PAGE_SIZE) {
|
|
+ /* XEN: Only map initial RAM allocation. */
|
|
+ if ((pfn >= max_ram_pfn) || pte_present(*pte))
|
|
+ continue;
|
|
+ if (is_kernel_text(address))
|
|
+ set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC));
|
|
+ else
|
|
+ set_pte(pte, pfn_pte(pfn, PAGE_KERNEL));
|
|
}
|
|
pte_ofs = 0;
|
|
}
|
|
@@ -388,15 +391,44 @@ extern void __init remap_numa_kva(void);
|
|
|
|
pgd_t *swapper_pg_dir;
|
|
|
|
+static void __init xen_pagetable_setup_start(pgd_t *base)
|
|
+{
|
|
+}
|
|
+
|
|
+static void __init xen_pagetable_setup_done(pgd_t *base)
|
|
+{
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Build a proper pagetable for the kernel mappings. Up until this
|
|
+ * point, we've been running on some set of pagetables constructed by
|
|
+ * the boot process.
|
|
+ *
|
|
+ * If we're booting on native hardware, this will be a pagetable
|
|
+ * constructed in arch/i386/kernel/head.S, and not running in PAE mode
|
|
+ * (even if we'll end up running in PAE). The root of the pagetable
|
|
+ * will be swapper_pg_dir.
|
|
+ *
|
|
+ * If we're booting paravirtualized under a hypervisor, then there are
|
|
+ * more options: we may already be running PAE, and the pagetable may
|
|
+ * or may not be based in swapper_pg_dir. In any case,
|
|
+ * paravirt_pagetable_setup_start() will set up swapper_pg_dir
|
|
+ * appropriately for the rest of the initialization to work.
|
|
+ *
|
|
+ * In general, pagetable_init() assumes that the pagetable may already
|
|
+ * be partially populated, and so it avoids stomping on any existing
|
|
+ * mappings.
|
|
+ */
|
|
static void __init pagetable_init (void)
|
|
{
|
|
- unsigned long vaddr;
|
|
+ unsigned long vaddr, end;
|
|
pgd_t *pgd_base = (pgd_t *)xen_start_info->pt_base;
|
|
|
|
+ xen_pagetable_setup_start(pgd_base);
|
|
+
|
|
/* Enable PSE if available */
|
|
- if (cpu_has_pse) {
|
|
+ if (cpu_has_pse)
|
|
set_in_cr4(X86_CR4_PSE);
|
|
- }
|
|
|
|
/* Enable PGE if available */
|
|
if (cpu_has_pge) {
|
|
@@ -413,9 +445,12 @@ static void __init pagetable_init (void)
|
|
* created - mappings will be set by set_fixmap():
|
|
*/
|
|
vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
|
|
- page_table_range_init(vaddr, hypervisor_virt_start, pgd_base);
|
|
+ end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK;
|
|
+ page_table_range_init(vaddr, end, pgd_base);
|
|
|
|
permanent_kmaps_init(pgd_base);
|
|
+
|
|
+ xen_pagetable_setup_done(pgd_base);
|
|
}
|
|
|
|
#if defined(CONFIG_SOFTWARE_SUSPEND) || defined(CONFIG_ACPI_SLEEP)
|
|
@@ -756,34 +791,29 @@ int remove_memory(u64 start, u64 size)
|
|
EXPORT_SYMBOL_GPL(remove_memory);
|
|
#endif
|
|
|
|
-struct kmem_cache *pgd_cache;
|
|
struct kmem_cache *pmd_cache;
|
|
|
|
void __init pgtable_cache_init(void)
|
|
{
|
|
+ size_t pgd_size = PTRS_PER_PGD*sizeof(pgd_t);
|
|
+
|
|
if (PTRS_PER_PMD > 1) {
|
|
pmd_cache = kmem_cache_create("pmd",
|
|
PTRS_PER_PMD*sizeof(pmd_t),
|
|
PTRS_PER_PMD*sizeof(pmd_t),
|
|
- 0,
|
|
+ SLAB_PANIC,
|
|
pmd_ctor,
|
|
NULL);
|
|
- if (!pmd_cache)
|
|
- panic("pgtable_cache_init(): cannot create pmd cache");
|
|
+ if (!SHARED_KERNEL_PMD) {
|
|
+ /* If we're in PAE mode and have a non-shared
|
|
+ kernel pmd, then the pgd size must be a
|
|
+ page size. This is because the pgd_list
|
|
+ links through the page structure, so there
|
|
+ can only be one pgd per page for this to
|
|
+ work. */
|
|
+ pgd_size = PAGE_SIZE;
|
|
+ }
|
|
}
|
|
- pgd_cache = kmem_cache_create("pgd",
|
|
-#ifndef CONFIG_XEN
|
|
- PTRS_PER_PGD*sizeof(pgd_t),
|
|
- PTRS_PER_PGD*sizeof(pgd_t),
|
|
-#else
|
|
- PAGE_SIZE,
|
|
- PAGE_SIZE,
|
|
-#endif
|
|
- 0,
|
|
- pgd_ctor,
|
|
- PTRS_PER_PMD == 1 ? pgd_dtor : NULL);
|
|
- if (!pgd_cache)
|
|
- panic("pgtable_cache_init(): Cannot create pgd cache");
|
|
}
|
|
|
|
/*
|
|
@@ -817,13 +847,26 @@ static int noinline do_test_wp_bit(void)
|
|
|
|
void mark_rodata_ro(void)
|
|
{
|
|
- unsigned long addr = (unsigned long)__start_rodata;
|
|
-
|
|
- for (; addr < (unsigned long)__end_rodata; addr += PAGE_SIZE)
|
|
- change_page_attr(virt_to_page(addr), 1, PAGE_KERNEL_RO);
|
|
+ unsigned long start = PFN_ALIGN(_text);
|
|
+ unsigned long size = PFN_ALIGN(_etext) - start;
|
|
|
|
- printk("Write protecting the kernel read-only data: %uk\n",
|
|
- (__end_rodata - __start_rodata) >> 10);
|
|
+#ifndef CONFIG_KPROBES
|
|
+#ifdef CONFIG_HOTPLUG_CPU
|
|
+ /* It must still be possible to apply SMP alternatives. */
|
|
+ if (num_possible_cpus() <= 1)
|
|
+#endif
|
|
+ {
|
|
+ change_page_attr(virt_to_page(start),
|
|
+ size >> PAGE_SHIFT, PAGE_KERNEL_RX);
|
|
+ printk("Write protecting the kernel text: %luk\n", size >> 10);
|
|
+ }
|
|
+#endif
|
|
+ start += size;
|
|
+ size = (unsigned long)__end_rodata - start;
|
|
+ change_page_attr(virt_to_page(start),
|
|
+ size >> PAGE_SHIFT, PAGE_KERNEL_RO);
|
|
+ printk("Write protecting the kernel read-only data: %luk\n",
|
|
+ size >> 10);
|
|
|
|
/*
|
|
* change_page_attr() requires a global_flush_tlb() call after it.
|
|
@@ -846,7 +889,7 @@ void free_init_pages(char *what, unsigne
|
|
free_page(addr);
|
|
totalram_pages++;
|
|
}
|
|
- printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10);
|
|
+ printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
|
|
}
|
|
|
|
void free_initmem(void)
|
|
--- head-2011-03-11.orig/arch/x86/mm/ioremap-xen.c 2011-02-07 15:37:37.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/mm/ioremap-xen.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -13,6 +13,7 @@
|
|
#include <linux/slab.h>
|
|
#include <linux/module.h>
|
|
#include <linux/io.h>
|
|
+#include <linux/sched.h>
|
|
#include <asm/fixmap.h>
|
|
#include <asm/cacheflush.h>
|
|
#include <asm/tlbflush.h>
|
|
--- head-2011-03-11.orig/arch/x86/mm/pgtable_32-xen.c 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/mm/pgtable_32-xen.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -13,6 +13,7 @@
|
|
#include <linux/pagemap.h>
|
|
#include <linux/spinlock.h>
|
|
#include <linux/module.h>
|
|
+#include <linux/quicklist.h>
|
|
|
|
#include <asm/system.h>
|
|
#include <asm/pgtable.h>
|
|
@@ -218,8 +219,6 @@ void pmd_ctor(void *pmd, struct kmem_cac
|
|
* against pageattr.c; it is the unique case in which a valid change
|
|
* of kernel pagetables can't be lazily synchronized by vmalloc faults.
|
|
* vmalloc faults work because attached pagetables are never freed.
|
|
- * The locking scheme was chosen on the basis of manfred's
|
|
- * recommendations and having no core impact whatsoever.
|
|
* -- wli
|
|
*/
|
|
DEFINE_SPINLOCK(pgd_lock);
|
|
@@ -246,37 +245,54 @@ static inline void pgd_list_del(pgd_t *p
|
|
page->mapping = NULL;
|
|
}
|
|
|
|
-void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused)
|
|
+
|
|
+
|
|
+#if (PTRS_PER_PMD == 1)
|
|
+/* Non-PAE pgd constructor */
|
|
+void pgd_ctor(void *pgd)
|
|
{
|
|
unsigned long flags;
|
|
|
|
- if (PTRS_PER_PMD > 1) {
|
|
- if (HAVE_SHARED_KERNEL_PMD)
|
|
- clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
|
|
- swapper_pg_dir + USER_PTRS_PER_PGD,
|
|
- KERNEL_PGD_PTRS);
|
|
- } else {
|
|
- spin_lock_irqsave(&pgd_lock, flags);
|
|
+ /* !PAE, no pagetable sharing */
|
|
+ memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
|
|
+
|
|
+ spin_lock_irqsave(&pgd_lock, flags);
|
|
+
|
|
+ /* must happen under lock */
|
|
+ clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
|
|
+ swapper_pg_dir + USER_PTRS_PER_PGD,
|
|
+ KERNEL_PGD_PTRS);
|
|
+
|
|
+ paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
|
|
+ __pa(swapper_pg_dir) >> PAGE_SHIFT,
|
|
+ USER_PTRS_PER_PGD,
|
|
+ KERNEL_PGD_PTRS);
|
|
+ pgd_list_add(pgd);
|
|
+ spin_unlock_irqrestore(&pgd_lock, flags);
|
|
+}
|
|
+#else /* PTRS_PER_PMD > 1 */
|
|
+/* PAE pgd constructor */
|
|
+void pgd_ctor(void *pgd)
|
|
+{
|
|
+ /* PAE, kernel PMD may be shared */
|
|
+
|
|
+ if (SHARED_KERNEL_PMD) {
|
|
clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD,
|
|
swapper_pg_dir + USER_PTRS_PER_PGD,
|
|
KERNEL_PGD_PTRS);
|
|
+ } else {
|
|
memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t));
|
|
-
|
|
- /* must happen under lock */
|
|
- paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT,
|
|
- __pa(swapper_pg_dir) >> PAGE_SHIFT,
|
|
- USER_PTRS_PER_PGD, PTRS_PER_PGD - USER_PTRS_PER_PGD);
|
|
-
|
|
- pgd_list_add(pgd);
|
|
- spin_unlock_irqrestore(&pgd_lock, flags);
|
|
}
|
|
}
|
|
+#endif /* PTRS_PER_PMD */
|
|
|
|
-/* never called when PTRS_PER_PMD > 1 */
|
|
-void pgd_dtor(void *pgd, struct kmem_cache *cache, unsigned long unused)
|
|
+void pgd_dtor(void *pgd)
|
|
{
|
|
unsigned long flags; /* can be called from interrupt context */
|
|
|
|
+ if (SHARED_KERNEL_PMD)
|
|
+ return;
|
|
+
|
|
paravirt_release_pd(__pa(pgd) >> PAGE_SHIFT);
|
|
spin_lock_irqsave(&pgd_lock, flags);
|
|
pgd_list_del(pgd);
|
|
@@ -285,11 +301,46 @@ void pgd_dtor(void *pgd, struct kmem_cac
|
|
pgd_test_and_unpin(pgd);
|
|
}
|
|
|
|
+#define UNSHARED_PTRS_PER_PGD \
|
|
+ (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD)
|
|
+
|
|
+/* If we allocate a pmd for part of the kernel address space, then
|
|
+ make sure its initialized with the appropriate kernel mappings.
|
|
+ Otherwise use a cached zeroed pmd. */
|
|
+static pmd_t *pmd_cache_alloc(int idx)
|
|
+{
|
|
+ pmd_t *pmd;
|
|
+
|
|
+ if (idx >= USER_PTRS_PER_PGD) {
|
|
+ pmd = (pmd_t *)__get_free_page(GFP_KERNEL);
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+ if (pmd)
|
|
+ memcpy(pmd,
|
|
+ (void *)pgd_page_vaddr(swapper_pg_dir[idx]),
|
|
+ sizeof(pmd_t) * PTRS_PER_PMD);
|
|
+#endif
|
|
+ } else
|
|
+ pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
|
|
+
|
|
+ return pmd;
|
|
+}
|
|
+
|
|
+static void pmd_cache_free(pmd_t *pmd, int idx)
|
|
+{
|
|
+ if (idx >= USER_PTRS_PER_PGD) {
|
|
+ make_lowmem_page_writable(pmd, XENFEAT_writable_page_tables);
|
|
+ memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
|
|
+ free_page((unsigned long)pmd);
|
|
+ } else
|
|
+ kmem_cache_free(pmd_cache, pmd);
|
|
+}
|
|
+
|
|
pgd_t *pgd_alloc(struct mm_struct *mm)
|
|
{
|
|
int i;
|
|
- pgd_t *pgd = kmem_cache_alloc(pgd_cache, GFP_KERNEL);
|
|
- pmd_t **pmd;
|
|
+ pgd_t *pgd = quicklist_alloc(0, GFP_KERNEL, pgd_ctor);
|
|
+ pmd_t **pmds = NULL;
|
|
unsigned long flags;
|
|
|
|
if (!pgd)
|
|
@@ -303,37 +354,40 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
|
|
if (PTRS_PER_PMD == 1)
|
|
return pgd;
|
|
|
|
- if (HAVE_SHARED_KERNEL_PMD) {
|
|
- for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
|
|
- pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
|
|
- if (!pmd)
|
|
- goto out_oom;
|
|
- paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT);
|
|
- set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
|
|
+#ifdef CONFIG_XEN
|
|
+ if (!SHARED_KERNEL_PMD) {
|
|
+ /*
|
|
+ * We can race save/restore (if we sleep during a GFP_KERNEL memory
|
|
+ * allocation). We therefore store virtual addresses of pmds as they
|
|
+ * do not change across save/restore, and poke the machine addresses
|
|
+ * into the pgdir under the pgd_lock.
|
|
+ */
|
|
+ pmds = kmalloc(PTRS_PER_PGD * sizeof(pmd_t *), GFP_KERNEL);
|
|
+ if (!pmds) {
|
|
+ quicklist_free(0, pgd_dtor, pgd);
|
|
+ return NULL;
|
|
}
|
|
- return pgd;
|
|
- }
|
|
-
|
|
- /*
|
|
- * We can race save/restore (if we sleep during a GFP_KERNEL memory
|
|
- * allocation). We therefore store virtual addresses of pmds as they
|
|
- * do not change across save/restore, and poke the machine addresses
|
|
- * into the pgdir under the pgd_lock.
|
|
- */
|
|
- pmd = kmalloc(PTRS_PER_PGD * sizeof(pmd_t *), GFP_KERNEL);
|
|
- if (!pmd) {
|
|
- kmem_cache_free(pgd_cache, pgd);
|
|
- return NULL;
|
|
}
|
|
+#endif
|
|
|
|
/* Allocate pmds, remember virtual addresses. */
|
|
- for (i = 0; i < PTRS_PER_PGD; ++i) {
|
|
- pmd[i] = kmem_cache_alloc(pmd_cache, GFP_KERNEL);
|
|
- if (!pmd[i])
|
|
+ for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) {
|
|
+ pmd_t *pmd = pmd_cache_alloc(i);
|
|
+
|
|
+ if (!pmd)
|
|
goto out_oom;
|
|
+
|
|
paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT);
|
|
+ if (pmds)
|
|
+ pmds[i] = pmd;
|
|
+ else
|
|
+ set_pgd(&pgd[i], __pgd(1 + __pa(pmd)));
|
|
}
|
|
|
|
+#ifdef CONFIG_XEN
|
|
+ if (SHARED_KERNEL_PMD)
|
|
+ return pgd;
|
|
+
|
|
spin_lock_irqsave(&pgd_lock, flags);
|
|
|
|
/* Protect against save/restore: move below 4GB under pgd_lock. */
|
|
@@ -348,44 +402,43 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
|
|
|
|
/* Copy kernel pmd contents and write-protect the new pmds. */
|
|
for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
|
|
- unsigned long v = (unsigned long)i << PGDIR_SHIFT;
|
|
- pgd_t *kpgd = pgd_offset_k(v);
|
|
- pud_t *kpud = pud_offset(kpgd, v);
|
|
- pmd_t *kpmd = pmd_offset(kpud, v);
|
|
- memcpy(pmd[i], kpmd, PAGE_SIZE);
|
|
+ memcpy(pmds[i],
|
|
+ (void *)pgd_page_vaddr(swapper_pg_dir[i]),
|
|
+ sizeof(pmd_t) * PTRS_PER_PMD);
|
|
make_lowmem_page_readonly(
|
|
- pmd[i], XENFEAT_writable_page_tables);
|
|
+ pmds[i], XENFEAT_writable_page_tables);
|
|
}
|
|
|
|
/* It is safe to poke machine addresses of pmds under the pmd_lock. */
|
|
for (i = 0; i < PTRS_PER_PGD; i++)
|
|
- set_pgd(&pgd[i], __pgd(1 + __pa(pmd[i])));
|
|
+ set_pgd(&pgd[i], __pgd(1 + __pa(pmds[i])));
|
|
|
|
/* Ensure this pgd gets picked up and pinned on save/restore. */
|
|
pgd_list_add(pgd);
|
|
|
|
spin_unlock_irqrestore(&pgd_lock, flags);
|
|
|
|
- kfree(pmd);
|
|
+ kfree(pmds);
|
|
+#endif
|
|
|
|
return pgd;
|
|
|
|
out_oom:
|
|
- if (HAVE_SHARED_KERNEL_PMD) {
|
|
+ if (!pmds) {
|
|
for (i--; i >= 0; i--) {
|
|
pgd_t pgdent = pgd[i];
|
|
void* pmd = (void *)__va(pgd_val(pgdent)-1);
|
|
paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
|
|
- kmem_cache_free(pmd_cache, pmd);
|
|
+ pmd_cache_free(pmd, i);
|
|
}
|
|
} else {
|
|
for (i--; i >= 0; i--) {
|
|
- paravirt_release_pd(__pa(pmd[i]) >> PAGE_SHIFT);
|
|
- kmem_cache_free(pmd_cache, pmd[i]);
|
|
+ paravirt_release_pd(__pa(pmds[i]) >> PAGE_SHIFT);
|
|
+ pmd_cache_free(pmds[i], i);
|
|
}
|
|
- kfree(pmd);
|
|
+ kfree(pmds);
|
|
}
|
|
- kmem_cache_free(pgd_cache, pgd);
|
|
+ quicklist_free(0, pgd_dtor, pgd);
|
|
return NULL;
|
|
}
|
|
|
|
@@ -405,35 +458,24 @@ void pgd_free(pgd_t *pgd)
|
|
|
|
/* in the PAE case user pgd entries are overwritten before usage */
|
|
if (PTRS_PER_PMD > 1) {
|
|
- for (i = 0; i < USER_PTRS_PER_PGD; ++i) {
|
|
+ for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) {
|
|
pgd_t pgdent = pgd[i];
|
|
void* pmd = (void *)__va(pgd_val(pgdent)-1);
|
|
paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT);
|
|
- kmem_cache_free(pmd_cache, pmd);
|
|
+ pmd_cache_free(pmd, i);
|
|
}
|
|
|
|
- if (!HAVE_SHARED_KERNEL_PMD) {
|
|
- unsigned long flags;
|
|
- spin_lock_irqsave(&pgd_lock, flags);
|
|
- pgd_list_del(pgd);
|
|
- spin_unlock_irqrestore(&pgd_lock, flags);
|
|
-
|
|
- for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++) {
|
|
- pmd_t *pmd = (void *)__va(pgd_val(pgd[i])-1);
|
|
- make_lowmem_page_writable(
|
|
- pmd, XENFEAT_writable_page_tables);
|
|
- memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t));
|
|
- kmem_cache_free(pmd_cache, pmd);
|
|
- }
|
|
-
|
|
- if (!xen_feature(XENFEAT_pae_pgdir_above_4gb))
|
|
- xen_destroy_contiguous_region(
|
|
- (unsigned long)pgd, 0);
|
|
- }
|
|
+ if (!xen_feature(XENFEAT_pae_pgdir_above_4gb))
|
|
+ xen_destroy_contiguous_region((unsigned long)pgd, 0);
|
|
}
|
|
|
|
/* in the non-PAE case, free_pgtables() clears user pgd entries */
|
|
- kmem_cache_free(pgd_cache, pgd);
|
|
+ quicklist_free(0, pgd_dtor, pgd);
|
|
+}
|
|
+
|
|
+void check_pgt_cache(void)
|
|
+{
|
|
+ quicklist_trim(0, pgd_dtor, 25, 16);
|
|
}
|
|
|
|
void make_lowmem_page_readonly(void *va, unsigned int feature)
|
|
@@ -730,13 +772,13 @@ void mm_pin_all(void)
|
|
spin_unlock_irqrestore(&pgd_lock, flags);
|
|
}
|
|
|
|
-void _arch_dup_mmap(struct mm_struct *mm)
|
|
+void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
|
|
{
|
|
if (!test_bit(PG_pinned, &virt_to_page(mm->pgd)->flags))
|
|
mm_pin(mm);
|
|
}
|
|
|
|
-void _arch_exit_mmap(struct mm_struct *mm)
|
|
+void arch_exit_mmap(struct mm_struct *mm)
|
|
{
|
|
struct task_struct *tsk = current;
|
|
|
|
--- head-2011-03-11.orig/arch/x86/ia32/ia32entry-xen.S 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/ia32/ia32entry-xen.S 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -431,11 +431,7 @@ ia32_sys_call_table:
|
|
.quad sys_symlink
|
|
.quad sys_lstat
|
|
.quad sys_readlink /* 85 */
|
|
-#ifdef CONFIG_IA32_AOUT
|
|
.quad sys_uselib
|
|
-#else
|
|
- .quad quiet_ni_syscall
|
|
-#endif
|
|
.quad sys_swapon
|
|
.quad sys_reboot
|
|
.quad compat_sys_old_readdir
|
|
@@ -574,7 +570,7 @@ ia32_sys_call_table:
|
|
.quad quiet_ni_syscall /* tux */
|
|
.quad quiet_ni_syscall /* security */
|
|
.quad sys_gettid
|
|
- .quad sys_readahead /* 225 */
|
|
+ .quad sys32_readahead /* 225 */
|
|
.quad sys_setxattr
|
|
.quad sys_lsetxattr
|
|
.quad sys_fsetxattr
|
|
@@ -599,7 +595,7 @@ ia32_sys_call_table:
|
|
.quad compat_sys_io_getevents
|
|
.quad compat_sys_io_submit
|
|
.quad sys_io_cancel
|
|
- .quad sys_fadvise64 /* 250 */
|
|
+ .quad sys32_fadvise64 /* 250 */
|
|
.quad quiet_ni_syscall /* free_huge_pages */
|
|
.quad sys_exit_group
|
|
.quad sys32_lookup_dcookie
|
|
@@ -663,10 +659,14 @@ ia32_sys_call_table:
|
|
.quad compat_sys_set_robust_list
|
|
.quad compat_sys_get_robust_list
|
|
.quad sys_splice
|
|
- .quad sys_sync_file_range
|
|
- .quad sys_tee
|
|
+ .quad sys32_sync_file_range
|
|
+ .quad sys_tee /* 315 */
|
|
.quad compat_sys_vmsplice
|
|
.quad compat_sys_move_pages
|
|
.quad sys_getcpu
|
|
.quad sys_epoll_pwait
|
|
-ia32_syscall_end:
|
|
+ .quad compat_sys_utimensat /* 320 */
|
|
+ .quad compat_sys_signalfd
|
|
+ .quad compat_sys_timerfd
|
|
+ .quad sys_eventfd
|
|
+ia32_syscall_end:
|
|
--- head-2011-03-11.orig/arch/x86/kernel/acpi/sleep_64-xen.c 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/acpi/sleep_64-xen.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -60,19 +60,6 @@ unsigned long acpi_video_flags;
|
|
extern char wakeup_start, wakeup_end;
|
|
|
|
extern unsigned long acpi_copy_wakeup_routine(unsigned long);
|
|
-
|
|
-static pgd_t low_ptr;
|
|
-
|
|
-static void init_low_mapping(void)
|
|
-{
|
|
- pgd_t *slot0 = pgd_offset(current->mm, 0UL);
|
|
- low_ptr = *slot0;
|
|
- /* FIXME: We're playing with the current task's page tables here, which
|
|
- * is potentially dangerous on SMP systems.
|
|
- */
|
|
- set_pgd(slot0, *pgd_offset(current->mm, PAGE_OFFSET));
|
|
- local_flush_tlb();
|
|
-}
|
|
#endif
|
|
|
|
/**
|
|
@@ -84,8 +71,6 @@ static void init_low_mapping(void)
|
|
int acpi_save_state_mem(void)
|
|
{
|
|
#ifndef CONFIG_ACPI_PV_SLEEP
|
|
- init_low_mapping();
|
|
-
|
|
memcpy((void *)acpi_wakeup_address, &wakeup_start,
|
|
&wakeup_end - &wakeup_start);
|
|
acpi_copy_wakeup_routine(acpi_wakeup_address);
|
|
@@ -98,10 +83,6 @@ int acpi_save_state_mem(void)
|
|
*/
|
|
void acpi_restore_state_mem(void)
|
|
{
|
|
-#ifndef CONFIG_ACPI_PV_SLEEP
|
|
- set_pgd(pgd_offset(current->mm, 0UL), low_ptr);
|
|
- local_flush_tlb();
|
|
-#endif
|
|
}
|
|
|
|
/**
|
|
@@ -115,10 +96,11 @@ void acpi_restore_state_mem(void)
|
|
void __init acpi_reserve_bootmem(void)
|
|
{
|
|
#ifndef CONFIG_ACPI_PV_SLEEP
|
|
- acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE);
|
|
- if ((&wakeup_end - &wakeup_start) > PAGE_SIZE)
|
|
+ acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE*2);
|
|
+ if ((&wakeup_end - &wakeup_start) > (PAGE_SIZE*2))
|
|
printk(KERN_CRIT
|
|
- "ACPI: Wakeup code way too big, will crash on attempt to suspend\n");
|
|
+ "ACPI: Wakeup code way too big, will crash on attempt"
|
|
+ " to suspend\n");
|
|
#endif
|
|
}
|
|
|
|
--- head-2011-03-11.orig/arch/x86/kernel/e820_64-xen.c 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/e820_64-xen.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -17,6 +17,8 @@
|
|
#include <linux/kexec.h>
|
|
#include <linux/module.h>
|
|
#include <linux/mm.h>
|
|
+#include <linux/suspend.h>
|
|
+#include <linux/pfn.h>
|
|
|
|
#include <asm/pgtable.h>
|
|
#include <asm/page.h>
|
|
@@ -28,7 +30,7 @@
|
|
|
|
struct e820map e820 __initdata;
|
|
#ifdef CONFIG_XEN
|
|
-struct e820map machine_e820 __initdata;
|
|
+struct e820map machine_e820;
|
|
#endif
|
|
|
|
/*
|
|
@@ -293,22 +295,6 @@ void __init e820_reserve_resources(struc
|
|
}
|
|
|
|
#ifndef CONFIG_XEN
|
|
-/* Mark pages corresponding to given address range as nosave */
|
|
-static void __init
|
|
-e820_mark_nosave_range(unsigned long start, unsigned long end)
|
|
-{
|
|
- unsigned long pfn, max_pfn;
|
|
-
|
|
- if (start >= end)
|
|
- return;
|
|
-
|
|
- printk("Nosave address range: %016lx - %016lx\n", start, end);
|
|
- max_pfn = end >> PAGE_SHIFT;
|
|
- for (pfn = start >> PAGE_SHIFT; pfn < max_pfn; pfn++)
|
|
- if (pfn_valid(pfn))
|
|
- SetPageNosave(pfn_to_page(pfn));
|
|
-}
|
|
-
|
|
/*
|
|
* Find the ranges of physical addresses that do not correspond to
|
|
* e820 RAM areas and mark the corresponding pages as nosave for software
|
|
@@ -327,13 +313,13 @@ void __init e820_mark_nosave_regions(voi
|
|
struct e820entry *ei = &e820.map[i];
|
|
|
|
if (paddr < ei->addr)
|
|
- e820_mark_nosave_range(paddr,
|
|
- round_up(ei->addr, PAGE_SIZE));
|
|
+ register_nosave_region(PFN_DOWN(paddr),
|
|
+ PFN_UP(ei->addr));
|
|
|
|
paddr = round_down(ei->addr + ei->size, PAGE_SIZE);
|
|
if (ei->type != E820_RAM)
|
|
- e820_mark_nosave_range(round_up(ei->addr, PAGE_SIZE),
|
|
- paddr);
|
|
+ register_nosave_region(PFN_UP(ei->addr),
|
|
+ PFN_DOWN(paddr));
|
|
|
|
if (paddr >= (end_pfn << PAGE_SHIFT))
|
|
break;
|
|
--- head-2011-03-11.orig/arch/x86/kernel/early_printk-xen.c 2011-01-31 17:29:16.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/early_printk-xen.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -11,13 +11,12 @@
|
|
|
|
#ifdef __i386__
|
|
#include <asm/setup.h>
|
|
-#define VGABASE (__ISA_IO_base + 0xb8000)
|
|
#else
|
|
#include <asm/bootsetup.h>
|
|
-#define VGABASE ((void __iomem *)0xffffffff800b8000UL)
|
|
#endif
|
|
-
|
|
#ifndef CONFIG_XEN
|
|
+#define VGABASE (__ISA_IO_base + 0xb8000)
|
|
+
|
|
static int max_ypos = 25, max_xpos = 80;
|
|
static int current_ypos = 25, current_xpos = 0;
|
|
|
|
@@ -93,9 +92,9 @@ static int early_serial_putc(unsigned ch
|
|
static void early_serial_write(struct console *con, const char *s, unsigned n)
|
|
{
|
|
while (*s && n-- > 0) {
|
|
- early_serial_putc(*s);
|
|
if (*s == '\n')
|
|
early_serial_putc('\r');
|
|
+ early_serial_putc(*s);
|
|
s++;
|
|
}
|
|
}
|
|
@@ -205,7 +204,7 @@ static noinline long simnow(long cmd, lo
|
|
return ret;
|
|
}
|
|
|
|
-void __init simnow_init(char *str)
|
|
+static void __init simnow_init(char *str)
|
|
{
|
|
char *fn = "klog";
|
|
if (*str == '=')
|
|
@@ -277,22 +276,12 @@ static int __init setup_early_printk(cha
|
|
early_console = &simnow_console;
|
|
keep_early = 1;
|
|
}
|
|
+
|
|
+ if (keep_early)
|
|
+ early_console->flags &= ~CON_BOOT;
|
|
+ else
|
|
+ early_console->flags |= CON_BOOT;
|
|
register_console(early_console);
|
|
return 0;
|
|
}
|
|
-
|
|
early_param("earlyprintk", setup_early_printk);
|
|
-
|
|
-void __init disable_early_printk(void)
|
|
-{
|
|
- if (!early_console_initialized || !early_console)
|
|
- return;
|
|
- if (!keep_early) {
|
|
- printk("disabling early console\n");
|
|
- unregister_console(early_console);
|
|
- early_console_initialized = 0;
|
|
- } else {
|
|
- printk("keeping early console\n");
|
|
- }
|
|
-}
|
|
-
|
|
--- head-2011-03-11.orig/arch/x86/kernel/entry_64-xen.S 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/entry_64-xen.S 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -1249,3 +1249,23 @@ ENTRY(call_softirq)
|
|
ret
|
|
CFI_ENDPROC
|
|
ENDPROC(call_softirq)
|
|
+
|
|
+#ifndef CONFIG_IA32_EMULATION
|
|
+KPROBE_ENTRY(ignore_sysret)
|
|
+ CFI_STARTPROC simple
|
|
+ CFI_SIGNAL_FRAME
|
|
+ CFI_DEF_CFA rsp,SS+8-RIP+16
|
|
+/* CFI_REL_OFFSET ss,SS-RIP+16 */
|
|
+ CFI_REL_OFFSET rsp,RSP-RIP+16
|
|
+/* CFI_REL_OFFSET rflags,EFLAGS-RIP+16 */
|
|
+/* CFI_REL_OFFSET cs,CS-RIP+16 */
|
|
+ CFI_REL_OFFSET rip,RIP-RIP+16
|
|
+ popq %rcx
|
|
+ CFI_ADJUST_CFA_OFFSET -8
|
|
+ popq %r11
|
|
+ CFI_ADJUST_CFA_OFFSET -8
|
|
+ mov $-ENOSYS,%eax
|
|
+ HYPERVISOR_IRET 0
|
|
+ CFI_ENDPROC
|
|
+ENDPROC(ignore_sysret)
|
|
+#endif
|
|
--- head-2011-03-11.orig/arch/x86/kernel/head_64-xen.S 2011-01-31 17:29:16.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/head_64-xen.S 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -5,6 +5,7 @@
|
|
* Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
|
|
* Copyright (C) 2000 Karsten Keil <kkeil@suse.de>
|
|
* Copyright (C) 2001,2002 Andi Kleen <ak@suse.de>
|
|
+ * Copyright (C) 2005 Eric Biederman <ebiederm@xmission.com>
|
|
* Jun Nakajima <jun.nakajima@intel.com>
|
|
* Modified for Xen
|
|
*/
|
|
@@ -34,27 +35,15 @@ startup_64:
|
|
pushq $0 # fake return address
|
|
jmp x86_64_start_kernel
|
|
|
|
-#ifdef CONFIG_ACPI_SLEEP
|
|
-.org 0xf00
|
|
- .globl pGDT32
|
|
-pGDT32:
|
|
- .word gdt_end-cpu_gdt_table-1
|
|
- .long cpu_gdt_table-__START_KERNEL_map
|
|
-#endif
|
|
-ENTRY(stext)
|
|
-ENTRY(_stext)
|
|
+.balign PAGE_SIZE
|
|
|
|
- $page = 0
|
|
#define NEXT_PAGE(name) \
|
|
- $page = $page + 1; \
|
|
- .org $page * 0x1000; \
|
|
- phys_##name = $page * 0x1000 + __PHYSICAL_START; \
|
|
+ .balign PAGE_SIZE; \
|
|
+ phys_##name = . - .bootstrap.text; \
|
|
ENTRY(name)
|
|
|
|
NEXT_PAGE(init_level4_pgt)
|
|
- /* This gets initialized in x86_64_start_kernel */
|
|
.fill 512,8,0
|
|
-NEXT_PAGE(init_level4_user_pgt)
|
|
/*
|
|
* We update two pgd entries to make kernel and user pgd consistent
|
|
* at pgd_populate(). It can be used for kernel modules. So we place
|
|
@@ -98,14 +87,6 @@ NEXT_PAGE(hypercall_page)
|
|
#undef NEXT_PAGE
|
|
|
|
.data
|
|
-/* Just dummy symbol to allow compilation. Not used in sleep path */
|
|
-#ifdef CONFIG_ACPI_SLEEP
|
|
- .align PAGE_SIZE
|
|
-ENTRY(wakeup_level4_pgt)
|
|
- .fill 512,8,0
|
|
-#endif
|
|
-
|
|
- .data
|
|
|
|
.align 16
|
|
.globl cpu_gdt_descr
|
|
@@ -133,13 +114,13 @@ gdt:
|
|
|
|
ENTRY(cpu_gdt_table)
|
|
.quad 0x0000000000000000 /* NULL descriptor */
|
|
+ .quad 0x00cf9b000000ffff /* __KERNEL32_CS */
|
|
+ .quad 0x00af9b000000ffff /* __KERNEL_CS */
|
|
+ .quad 0x00cf93000000ffff /* __KERNEL_DS */
|
|
+ .quad 0x00cffb000000ffff /* __USER32_CS */
|
|
+ .quad 0x00cff3000000ffff /* __USER_DS, __USER32_DS */
|
|
+ .quad 0x00affb000000ffff /* __USER_CS */
|
|
.quad 0x0 /* unused */
|
|
- .quad 0x00af9a000000ffff /* __KERNEL_CS */
|
|
- .quad 0x00cf92000000ffff /* __KERNEL_DS */
|
|
- .quad 0x00cffa000000ffff /* __USER32_CS */
|
|
- .quad 0x00cff2000000ffff /* __USER_DS, __USER32_DS */
|
|
- .quad 0x00affa000000ffff /* __USER_CS */
|
|
- .quad 0x00cf9a000000ffff /* __KERNEL32_CS */
|
|
.quad 0,0 /* TSS */
|
|
.quad 0,0 /* LDT */
|
|
.quad 0,0,0 /* three TLS descriptors */
|
|
@@ -162,14 +143,11 @@ ENTRY(empty_zero_page)
|
|
* __xen_guest information
|
|
*/
|
|
.macro utoh value
|
|
- .if (\value) < 0 || (\value) >= 0x10
|
|
- utoh (((\value)>>4)&0x0fffffffffffffff)
|
|
- .endif
|
|
- .if ((\value) & 0xf) < 10
|
|
- .byte '0' + ((\value) & 0xf)
|
|
- .else
|
|
- .byte 'A' + ((\value) & 0xf) - 10
|
|
- .endif
|
|
+ i = 64
|
|
+ .rept 16
|
|
+ i = i - 4
|
|
+ .byte '0' + ((((\value) >> i) & 0xf) > 9) * ('0' - 'A' + 10) + (((\value) >> i) & 0xf)
|
|
+ .endr
|
|
.endm
|
|
|
|
.section __xen_guest
|
|
--- head-2011-03-11.orig/arch/x86/kernel/head64-xen.c 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/head64-xen.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -25,13 +25,21 @@
|
|
#include <asm/setup.h>
|
|
#include <asm/desc.h>
|
|
#include <asm/pgtable.h>
|
|
+#include <asm/tlbflush.h>
|
|
#include <asm/sections.h>
|
|
|
|
unsigned long start_pfn;
|
|
|
|
+#ifndef CONFIG_XEN
|
|
+static void __init zap_identity_mappings(void)
|
|
+{
|
|
+ pgd_t *pgd = pgd_offset_k(0UL);
|
|
+ pgd_clear(pgd);
|
|
+ __flush_tlb();
|
|
+}
|
|
+
|
|
/* Don't add a printk in there. printk relies on the PDA which is not initialized
|
|
yet. */
|
|
-#if 0
|
|
static void __init clear_bss(void)
|
|
{
|
|
memset(__bss_start, 0,
|
|
@@ -40,26 +48,25 @@ static void __init clear_bss(void)
|
|
#endif
|
|
|
|
#define NEW_CL_POINTER 0x228 /* Relative to real mode data */
|
|
-#define OLD_CL_MAGIC_ADDR 0x90020
|
|
+#define OLD_CL_MAGIC_ADDR 0x20
|
|
#define OLD_CL_MAGIC 0xA33F
|
|
-#define OLD_CL_BASE_ADDR 0x90000
|
|
-#define OLD_CL_OFFSET 0x90022
|
|
+#define OLD_CL_OFFSET 0x22
|
|
|
|
static void __init copy_bootdata(char *real_mode_data)
|
|
{
|
|
#ifndef CONFIG_XEN
|
|
- int new_data;
|
|
+ unsigned long new_data;
|
|
char * command_line;
|
|
|
|
memcpy(x86_boot_params, real_mode_data, BOOT_PARAM_SIZE);
|
|
- new_data = *(int *) (x86_boot_params + NEW_CL_POINTER);
|
|
+ new_data = *(u32 *) (x86_boot_params + NEW_CL_POINTER);
|
|
if (!new_data) {
|
|
- if (OLD_CL_MAGIC != * (u16 *) OLD_CL_MAGIC_ADDR) {
|
|
+ if (OLD_CL_MAGIC != *(u16 *)(real_mode_data + OLD_CL_MAGIC_ADDR)) {
|
|
return;
|
|
}
|
|
- new_data = OLD_CL_BASE_ADDR + * (u16 *) OLD_CL_OFFSET;
|
|
+ new_data = __pa(real_mode_data) + *(u16 *)(real_mode_data + OLD_CL_OFFSET);
|
|
}
|
|
- command_line = (char *) ((u64)(new_data));
|
|
+ command_line = __va(new_data);
|
|
memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
|
|
#else
|
|
int max_cmdline;
|
|
@@ -101,10 +108,13 @@ void __init x86_64_start_kernel(char * r
|
|
while ((1UL << machine_to_phys_order) < machine_to_phys_nr_ents )
|
|
machine_to_phys_order++;
|
|
|
|
-#if 0
|
|
+#ifndef CONFIG_XEN
|
|
/* clear bss before set_intr_gate with early_idt_handler */
|
|
clear_bss();
|
|
|
|
+ /* Make NULL pointers segfault */
|
|
+ zap_identity_mappings();
|
|
+
|
|
for (i = 0; i < IDT_ENTRIES; i++)
|
|
set_intr_gate(i, early_idt_handler);
|
|
asm volatile("lidt %0" :: "m" (idt_descr));
|
|
@@ -116,7 +126,7 @@ void __init x86_64_start_kernel(char * r
|
|
cpu_pda(i) = &boot_cpu_pda[i];
|
|
|
|
pda_init(0);
|
|
- copy_bootdata(real_mode_data);
|
|
+ copy_bootdata(__va(real_mode_data));
|
|
#ifdef CONFIG_SMP
|
|
cpu_set(0, cpu_online_map);
|
|
#endif
|
|
--- head-2011-03-11.orig/arch/x86/kernel/io_apic_64-xen.c 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/io_apic_64-xen.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -25,7 +25,6 @@
|
|
#include <linux/init.h>
|
|
#include <linux/delay.h>
|
|
#include <linux/sched.h>
|
|
-#include <linux/smp_lock.h>
|
|
#include <linux/pci.h>
|
|
#include <linux/mc146818rtc.h>
|
|
#include <linux/acpi.h>
|
|
@@ -904,10 +903,6 @@ static void __init setup_ExtINT_IRQ0_pin
|
|
enable_8259A_irq(0);
|
|
}
|
|
|
|
-void __init UNEXPECTED_IO_APIC(void)
|
|
-{
|
|
-}
|
|
-
|
|
void __apicdebuginit print_IO_APIC(void)
|
|
{
|
|
int apic, i;
|
|
@@ -943,40 +938,16 @@ void __apicdebuginit print_IO_APIC(void)
|
|
printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid);
|
|
printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
|
|
printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
|
|
- if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2)
|
|
- UNEXPECTED_IO_APIC();
|
|
|
|
printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)®_01);
|
|
printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries);
|
|
- if ( (reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */
|
|
- (reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */
|
|
- (reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */
|
|
- (reg_01.bits.entries != 0x1f) && /* dual Xeon boards */
|
|
- (reg_01.bits.entries != 0x22) && /* bigger Xeon boards */
|
|
- (reg_01.bits.entries != 0x2E) &&
|
|
- (reg_01.bits.entries != 0x3F) &&
|
|
- (reg_01.bits.entries != 0x03)
|
|
- )
|
|
- UNEXPECTED_IO_APIC();
|
|
|
|
printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ);
|
|
printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version);
|
|
- if ( (reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */
|
|
- (reg_01.bits.version != 0x02) && /* 82801BA IO-APICs (ICH2) */
|
|
- (reg_01.bits.version != 0x10) && /* oldest IO-APICs */
|
|
- (reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */
|
|
- (reg_01.bits.version != 0x13) && /* Xeon IO-APICs */
|
|
- (reg_01.bits.version != 0x20) /* Intel P64H (82806 AA) */
|
|
- )
|
|
- UNEXPECTED_IO_APIC();
|
|
- if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2)
|
|
- UNEXPECTED_IO_APIC();
|
|
|
|
if (reg_01.bits.version >= 0x10) {
|
|
printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw);
|
|
printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration);
|
|
- if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2)
|
|
- UNEXPECTED_IO_APIC();
|
|
}
|
|
|
|
printk(KERN_DEBUG ".... IRQ redirection table:\n");
|
|
@@ -1408,8 +1379,7 @@ static void irq_complete_move(unsigned i
|
|
|
|
vector = ~get_irq_regs()->orig_rax;
|
|
me = smp_processor_id();
|
|
- if ((vector == cfg->vector) &&
|
|
- cpu_isset(smp_processor_id(), cfg->domain)) {
|
|
+ if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) {
|
|
cpumask_t cleanup_mask;
|
|
|
|
cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
|
|
@@ -1444,7 +1414,7 @@ static void ack_apic_level(unsigned int
|
|
|
|
/*
|
|
* We must acknowledge the irq before we move it or the acknowledge will
|
|
- * not propogate properly.
|
|
+ * not propagate properly.
|
|
*/
|
|
ack_APIC_irq();
|
|
|
|
@@ -1527,6 +1497,7 @@ static void ack_lapic_irq (unsigned int
|
|
static void end_lapic_irq (unsigned int i) { /* nothing */ }
|
|
|
|
static struct hw_interrupt_type lapic_irq_type __read_mostly = {
|
|
+ .name = "local-APIC",
|
|
.typename = "local-APIC-edge",
|
|
.startup = NULL, /* startup_irq() not used for IRQ0 */
|
|
.shutdown = NULL, /* shutdown_irq() not used for IRQ0 */
|
|
@@ -1998,18 +1969,18 @@ int arch_setup_msi_irq(struct pci_dev *d
|
|
if (irq < 0)
|
|
return irq;
|
|
|
|
- set_irq_msi(irq, desc);
|
|
ret = msi_compose_msg(dev, irq, &msg);
|
|
if (ret < 0) {
|
|
destroy_irq(irq);
|
|
return ret;
|
|
}
|
|
|
|
+ set_irq_msi(irq, desc);
|
|
write_msi_msg(irq, &msg);
|
|
|
|
set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
|
|
|
|
- return irq;
|
|
+ return 0;
|
|
}
|
|
|
|
void arch_teardown_msi_irq(unsigned int irq)
|
|
--- head-2011-03-11.orig/arch/x86/kernel/ioport_64-xen.c 2011-01-31 17:29:16.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/ioport_64-xen.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -13,10 +13,10 @@
|
|
#include <linux/ioport.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/smp.h>
|
|
-#include <linux/smp_lock.h>
|
|
#include <linux/stddef.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/thread_info.h>
|
|
+#include <linux/syscalls.h>
|
|
#include <xen/interface/physdev.h>
|
|
|
|
/* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */
|
|
--- head-2011-03-11.orig/arch/x86/kernel/ldt_64-xen.c 2007-06-12 13:13:01.000000000 +0200
|
|
+++ head-2011-03-11/arch/x86/kernel/ldt_64-xen.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -13,7 +13,6 @@
|
|
#include <linux/string.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/smp.h>
|
|
-#include <linux/smp_lock.h>
|
|
#include <linux/vmalloc.h>
|
|
#include <linux/slab.h>
|
|
|
|
--- head-2011-03-11.orig/arch/x86/kernel/mpparse_64-xen.c 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/mpparse_64-xen.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -17,7 +17,6 @@
|
|
#include <linux/init.h>
|
|
#include <linux/delay.h>
|
|
#include <linux/bootmem.h>
|
|
-#include <linux/smp_lock.h>
|
|
#include <linux/kernel_stat.h>
|
|
#include <linux/mc146818rtc.h>
|
|
#include <linux/acpi.h>
|
|
@@ -307,7 +306,7 @@ static int __init smp_read_mpc(struct mp
|
|
}
|
|
}
|
|
}
|
|
- clustered_apic_check();
|
|
+ setup_apic_routing();
|
|
if (!num_processors)
|
|
printk(KERN_ERR "MPTABLE: no processors registered!\n");
|
|
return num_processors;
|
|
--- head-2011-03-11.orig/arch/x86/kernel/process_64-xen.c 2011-02-02 08:31:05.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/process_64-xen.c 2011-02-02 08:31:39.000000000 +0100
|
|
@@ -39,6 +39,7 @@
|
|
#include <linux/random.h>
|
|
#include <linux/notifier.h>
|
|
#include <linux/kprobes.h>
|
|
+#include <linux/kdebug.h>
|
|
|
|
#include <asm/uaccess.h>
|
|
#include <asm/pgtable.h>
|
|
@@ -49,7 +50,6 @@
|
|
#include <asm/mmu_context.h>
|
|
#include <asm/pda.h>
|
|
#include <asm/prctl.h>
|
|
-#include <asm/kdebug.h>
|
|
#include <xen/interface/physdev.h>
|
|
#include <xen/interface/vcpu.h>
|
|
#include <asm/desc.h>
|
|
@@ -231,16 +231,18 @@ void __cpuinit select_idle_routine(const
|
|
|
|
static int __init idle_setup (char *str)
|
|
{
|
|
- if (!strncmp(str, "poll", 4)) {
|
|
+ if (!strcmp(str, "poll")) {
|
|
printk("using polling idle threads.\n");
|
|
pm_idle = poll_idle;
|
|
- }
|
|
+ } else if (!strcmp(str, "mwait"))
|
|
+ force_mwait = 1;
|
|
+ else
|
|
+ return -1;
|
|
|
|
boot_option_idle_override = 1;
|
|
- return 1;
|
|
+ return 0;
|
|
}
|
|
-
|
|
-__setup("idle=", idle_setup);
|
|
+early_param("idle", idle_setup);
|
|
|
|
/* Prints also some state that isn't saved in the pt_regs */
|
|
void __show_regs(struct pt_regs * regs)
|
|
@@ -545,7 +547,7 @@ __switch_to(struct task_struct *prev_p,
|
|
* The AMD workaround requires it to be after DS reload, or
|
|
* after DS has been cleared, which we do in __prepare_arch_switch.
|
|
*/
|
|
- if (prev_p->thread_info->status & TS_USEDFPU) {
|
|
+ if (task_thread_info(prev_p)->status & TS_USEDFPU) {
|
|
__save_init_fpu(prev_p); /* _not_ save_init_fpu() */
|
|
mcl->op = __HYPERVISOR_fpu_taskswitch;
|
|
mcl->args[0] = 1;
|
|
--- head-2011-03-11.orig/arch/x86/kernel/setup_64-xen.c 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/setup_64-xen.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -120,6 +120,8 @@ int bootloader_type;
|
|
|
|
unsigned long saved_video_mode;
|
|
|
|
+int force_mwait __cpuinitdata;
|
|
+
|
|
/*
|
|
* Early DMI memory
|
|
*/
|
|
@@ -254,10 +256,10 @@ static void discover_ebda(void)
|
|
* there is a real-mode segmented pointer pointing to the
|
|
* 4K EBDA area at 0x40E
|
|
*/
|
|
- ebda_addr = *(unsigned short *)EBDA_ADDR_POINTER;
|
|
+ ebda_addr = *(unsigned short *)__va(EBDA_ADDR_POINTER);
|
|
ebda_addr <<= 4;
|
|
|
|
- ebda_size = *(unsigned short *)(unsigned long)ebda_addr;
|
|
+ ebda_size = *(unsigned short *)__va(ebda_addr);
|
|
|
|
/* Round EBDA up to pages */
|
|
if (ebda_size == 0)
|
|
@@ -411,15 +413,8 @@ void __init setup_arch(char **cmdline_p)
|
|
#endif
|
|
|
|
#ifdef CONFIG_SMP
|
|
- /*
|
|
- * But first pinch a few for the stack/trampoline stuff
|
|
- * FIXME: Don't need the extra page at 4K, but need to fix
|
|
- * trampoline before removing it. (see the GDT stuff)
|
|
- */
|
|
- reserve_bootmem_generic(PAGE_SIZE, PAGE_SIZE);
|
|
-
|
|
/* Reserve SMP trampoline */
|
|
- reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, PAGE_SIZE);
|
|
+ reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, 2*PAGE_SIZE);
|
|
#endif
|
|
#endif
|
|
|
|
@@ -571,8 +566,6 @@ void __init setup_arch(char **cmdline_p)
|
|
early_quirks();
|
|
#endif
|
|
|
|
- zap_low_mappings(0);
|
|
-
|
|
/*
|
|
* set this early, so we dont allocate cpu0
|
|
* if MADT list doesnt list BSP first
|
|
@@ -865,6 +858,10 @@ static void __cpuinit init_amd(struct cp
|
|
|
|
/* RDTSC can be speculated around */
|
|
clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
|
|
+
|
|
+ /* Family 10 doesn't support C states in MWAIT so don't use it */
|
|
+ if (c->x86 == 0x10 && !force_mwait)
|
|
+ clear_bit(X86_FEATURE_MWAIT, &c->x86_capability);
|
|
}
|
|
|
|
static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
|
|
@@ -1149,9 +1146,7 @@ void __cpuinit identify_cpu(struct cpuin
|
|
#ifdef CONFIG_X86_MCE
|
|
mcheck_init(c);
|
|
#endif
|
|
- if (c == &boot_cpu_data)
|
|
- mtrr_bp_init();
|
|
- else
|
|
+ if (c != &boot_cpu_data)
|
|
mtrr_ap_init();
|
|
#ifdef CONFIG_NUMA
|
|
numa_add_cpu(smp_processor_id());
|
|
@@ -1242,9 +1237,8 @@ static int show_cpuinfo(struct seq_file
|
|
"stc",
|
|
"100mhzsteps",
|
|
"hwpstate",
|
|
- NULL, /* tsc invariant mapped to constant_tsc */
|
|
- NULL,
|
|
- /* nothing */ /* constant_tsc - moved to flags */
|
|
+ "", /* tsc invariant mapped to constant_tsc */
|
|
+ /* nothing */
|
|
};
|
|
|
|
|
|
--- head-2011-03-11.orig/arch/x86/kernel/setup64-xen.c 2011-01-31 17:29:16.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/setup64-xen.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -113,9 +113,9 @@ void __init setup_per_cpu_areas(void)
|
|
if (!NODE_DATA(cpu_to_node(i))) {
|
|
printk("cpu with no node %d, num_online_nodes %d\n",
|
|
i, num_online_nodes());
|
|
- ptr = alloc_bootmem(size);
|
|
+ ptr = alloc_bootmem_pages(size);
|
|
} else {
|
|
- ptr = alloc_bootmem_node(NODE_DATA(cpu_to_node(i)), size);
|
|
+ ptr = alloc_bootmem_pages_node(NODE_DATA(cpu_to_node(i)), size);
|
|
}
|
|
if (!ptr)
|
|
panic("Cannot allocate cpu data for CPU %d\n", i);
|
|
@@ -208,6 +208,8 @@ char boot_exception_stacks[(N_EXCEPTION_
|
|
__attribute__((section(".bss.page_aligned")));
|
|
#endif
|
|
|
|
+extern asmlinkage void ignore_sysret(void);
|
|
+
|
|
/* May not be marked __init: used by software suspend */
|
|
void syscall_init(void)
|
|
{
|
|
@@ -219,12 +221,22 @@ void syscall_init(void)
|
|
*/
|
|
wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32);
|
|
wrmsrl(MSR_LSTAR, system_call);
|
|
+ wrmsrl(MSR_CSTAR, ignore_sysret);
|
|
|
|
/* Flags to clear on syscall */
|
|
wrmsrl(MSR_SYSCALL_MASK, EF_TF|EF_DF|EF_IE|0x3000);
|
|
#endif
|
|
#ifdef CONFIG_IA32_EMULATION
|
|
syscall32_cpu_init ();
|
|
+#else
|
|
+ {
|
|
+ static const struct callback_register cstar = {
|
|
+ .type = CALLBACKTYPE_syscall32,
|
|
+ .address = (unsigned long)ignore_sysret
|
|
+ };
|
|
+ if (HYPERVISOR_callback_op(CALLBACKOP_register, &cstar))
|
|
+ printk(KERN_WARNING "Unable to register CSTAR callback\n");
|
|
+ }
|
|
#endif
|
|
}
|
|
|
|
@@ -262,7 +274,6 @@ void __cpuinit cpu_init (void)
|
|
/* CPU 0 is initialised in head64.c */
|
|
if (cpu != 0) {
|
|
pda_init(cpu);
|
|
- zap_low_mappings(cpu);
|
|
}
|
|
#ifndef CONFIG_X86_NO_TSS
|
|
else
|
|
--- head-2011-03-11.orig/arch/x86/kernel/smp_64-xen.c 2011-01-31 17:32:16.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/smp_64-xen.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -14,7 +14,6 @@
|
|
#include <linux/mm.h>
|
|
#include <linux/delay.h>
|
|
#include <linux/spinlock.h>
|
|
-#include <linux/smp_lock.h>
|
|
#include <linux/smp.h>
|
|
#include <linux/kernel_stat.h>
|
|
#include <linux/mc146818rtc.h>
|
|
@@ -457,44 +456,36 @@ int smp_call_function (void (*func) (voi
|
|
}
|
|
EXPORT_SYMBOL(smp_call_function);
|
|
|
|
-void smp_stop_cpu(void)
|
|
+static void stop_this_cpu(void *dummy)
|
|
{
|
|
- unsigned long flags;
|
|
+ local_irq_disable();
|
|
/*
|
|
* Remove this CPU:
|
|
*/
|
|
cpu_clear(smp_processor_id(), cpu_online_map);
|
|
- local_irq_save(flags);
|
|
disable_all_local_evtchn();
|
|
- local_irq_restore(flags);
|
|
-}
|
|
-
|
|
-static void smp_really_stop_cpu(void *dummy)
|
|
-{
|
|
- smp_stop_cpu();
|
|
for (;;)
|
|
halt();
|
|
}
|
|
|
|
void smp_send_stop(void)
|
|
{
|
|
- int nolock = 0;
|
|
+ int nolock;
|
|
+ unsigned long flags;
|
|
+
|
|
#ifndef CONFIG_XEN
|
|
if (reboot_force)
|
|
return;
|
|
#endif
|
|
+
|
|
/* Don't deadlock on the call lock in panic */
|
|
- if (!spin_trylock(&call_lock)) {
|
|
- /* ignore locking because we have panicked anyways */
|
|
- nolock = 1;
|
|
- }
|
|
- __smp_call_function(smp_really_stop_cpu, NULL, 0, 0);
|
|
+ nolock = !spin_trylock(&call_lock);
|
|
+ local_irq_save(flags);
|
|
+ __smp_call_function(stop_this_cpu, NULL, 0, 0);
|
|
if (!nolock)
|
|
spin_unlock(&call_lock);
|
|
-
|
|
- local_irq_disable();
|
|
disable_all_local_evtchn();
|
|
- local_irq_enable();
|
|
+ local_irq_restore(flags);
|
|
}
|
|
|
|
/*
|
|
--- head-2011-03-11.orig/arch/x86/kernel/traps_64-xen.c 2011-01-31 17:32:16.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/traps_64-xen.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -32,6 +32,7 @@
|
|
#include <linux/unwind.h>
|
|
#include <linux/uaccess.h>
|
|
#include <linux/bug.h>
|
|
+#include <linux/kdebug.h>
|
|
|
|
#include <asm/system.h>
|
|
#include <asm/io.h>
|
|
@@ -39,7 +40,6 @@
|
|
#include <asm/debugreg.h>
|
|
#include <asm/desc.h>
|
|
#include <asm/i387.h>
|
|
-#include <asm/kdebug.h>
|
|
#include <asm/processor.h>
|
|
#include <asm/unwind.h>
|
|
#include <asm/smp.h>
|
|
@@ -71,22 +71,6 @@ asmlinkage void alignment_check(void);
|
|
asmlinkage void machine_check(void);
|
|
asmlinkage void spurious_interrupt_bug(void);
|
|
|
|
-ATOMIC_NOTIFIER_HEAD(die_chain);
|
|
-EXPORT_SYMBOL(die_chain);
|
|
-
|
|
-int register_die_notifier(struct notifier_block *nb)
|
|
-{
|
|
- vmalloc_sync_all();
|
|
- return atomic_notifier_chain_register(&die_chain, nb);
|
|
-}
|
|
-EXPORT_SYMBOL(register_die_notifier); /* used modular by kdb */
|
|
-
|
|
-int unregister_die_notifier(struct notifier_block *nb)
|
|
-{
|
|
- return atomic_notifier_chain_unregister(&die_chain, nb);
|
|
-}
|
|
-EXPORT_SYMBOL(unregister_die_notifier); /* used modular by kdb */
|
|
-
|
|
static inline void conditional_sti(struct pt_regs *regs)
|
|
{
|
|
if (regs->eflags & X86_EFLAGS_IF)
|
|
@@ -428,8 +412,7 @@ void show_registers(struct pt_regs *regs
|
|
const int cpu = smp_processor_id();
|
|
struct task_struct *cur = cpu_pda(cpu)->pcurrent;
|
|
|
|
- rsp = regs->rsp;
|
|
-
|
|
+ rsp = regs->rsp;
|
|
printk("CPU %d ", cpu);
|
|
__show_regs(regs);
|
|
printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
|
|
@@ -440,7 +423,6 @@ void show_registers(struct pt_regs *regs
|
|
* time of the fault..
|
|
*/
|
|
if (in_kernel) {
|
|
-
|
|
printk("Stack: ");
|
|
_show_stack(NULL, regs, (unsigned long*)rsp);
|
|
|
|
@@ -485,13 +467,14 @@ static unsigned int die_nest_count;
|
|
|
|
unsigned __kprobes long oops_begin(void)
|
|
{
|
|
- int cpu = smp_processor_id();
|
|
+ int cpu;
|
|
unsigned long flags;
|
|
|
|
oops_enter();
|
|
|
|
/* racy, but better than risking deadlock. */
|
|
local_irq_save(flags);
|
|
+ cpu = smp_processor_id();
|
|
if (!spin_trylock(&die_lock)) {
|
|
if (cpu == die_owner)
|
|
/* nested oops. should stop eventually */;
|
|
@@ -585,10 +568,20 @@ static void __kprobes do_trap(int trapnr
|
|
{
|
|
struct task_struct *tsk = current;
|
|
|
|
- tsk->thread.error_code = error_code;
|
|
- tsk->thread.trap_no = trapnr;
|
|
-
|
|
if (user_mode(regs)) {
|
|
+ /*
|
|
+ * We want error_code and trap_no set for userspace
|
|
+ * faults and kernelspace faults which result in
|
|
+ * die(), but not kernelspace faults which are fixed
|
|
+ * up. die() gives the process no chance to handle
|
|
+ * the signal and notice the kernel fault information,
|
|
+ * so that won't result in polluting the information
|
|
+ * about previously queued, but not yet delivered,
|
|
+ * faults. See also do_general_protection below.
|
|
+ */
|
|
+ tsk->thread.error_code = error_code;
|
|
+ tsk->thread.trap_no = trapnr;
|
|
+
|
|
if (exception_trace && unhandled_signal(tsk, signr))
|
|
printk(KERN_INFO
|
|
"%s[%d] trap %s rip:%lx rsp:%lx error:%lx\n",
|
|
@@ -609,8 +602,11 @@ static void __kprobes do_trap(int trapnr
|
|
fixup = search_exception_tables(regs->rip);
|
|
if (fixup)
|
|
regs->rip = fixup->fixup;
|
|
- else
|
|
+ else {
|
|
+ tsk->thread.error_code = error_code;
|
|
+ tsk->thread.trap_no = trapnr;
|
|
die(str, regs, error_code);
|
|
+ }
|
|
return;
|
|
}
|
|
}
|
|
@@ -686,10 +682,10 @@ asmlinkage void __kprobes do_general_pro
|
|
|
|
conditional_sti(regs);
|
|
|
|
- tsk->thread.error_code = error_code;
|
|
- tsk->thread.trap_no = 13;
|
|
-
|
|
if (user_mode(regs)) {
|
|
+ tsk->thread.error_code = error_code;
|
|
+ tsk->thread.trap_no = 13;
|
|
+
|
|
if (exception_trace && unhandled_signal(tsk, SIGSEGV))
|
|
printk(KERN_INFO
|
|
"%s[%d] general protection rip:%lx rsp:%lx error:%lx\n",
|
|
@@ -708,6 +704,9 @@ asmlinkage void __kprobes do_general_pro
|
|
regs->rip = fixup->fixup;
|
|
return;
|
|
}
|
|
+
|
|
+ tsk->thread.error_code = error_code;
|
|
+ tsk->thread.trap_no = 13;
|
|
if (notify_die(DIE_GPF, "general protection fault", regs,
|
|
error_code, 13, SIGSEGV) == NOTIFY_STOP)
|
|
return;
|
|
--- head-2011-03-11.orig/arch/x86/kernel/vsyscall_64-xen.c 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/kernel/vsyscall_64-xen.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -45,14 +45,34 @@
|
|
|
|
#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
|
|
#define __syscall_clobber "r11","rcx","memory"
|
|
+#define __pa_vsymbol(x) \
|
|
+ ({unsigned long v; \
|
|
+ extern char __vsyscall_0; \
|
|
+ asm("" : "=r" (v) : "0" (x)); \
|
|
+ ((v - VSYSCALL_FIRST_PAGE) + __pa_symbol(&__vsyscall_0)); })
|
|
|
|
+/*
|
|
+ * vsyscall_gtod_data contains data that is :
|
|
+ * - readonly from vsyscalls
|
|
+ * - writen by timer interrupt or systcl (/proc/sys/kernel/vsyscall64)
|
|
+ * Try to keep this structure as small as possible to avoid cache line ping pongs
|
|
+ */
|
|
struct vsyscall_gtod_data_t {
|
|
- seqlock_t lock;
|
|
- int sysctl_enabled;
|
|
- struct timeval wall_time_tv;
|
|
+ seqlock_t lock;
|
|
+
|
|
+ /* open coded 'struct timespec' */
|
|
+ time_t wall_time_sec;
|
|
+ u32 wall_time_nsec;
|
|
+
|
|
+ int sysctl_enabled;
|
|
struct timezone sys_tz;
|
|
- cycle_t offset_base;
|
|
- struct clocksource clock;
|
|
+ struct { /* extract of a clocksource struct */
|
|
+ cycle_t (*vread)(void);
|
|
+ cycle_t cycle_last;
|
|
+ cycle_t mask;
|
|
+ u32 mult;
|
|
+ u32 shift;
|
|
+ } clock;
|
|
};
|
|
int __vgetcpu_mode __section_vgetcpu_mode;
|
|
|
|
@@ -68,9 +88,13 @@ void update_vsyscall(struct timespec *wa
|
|
|
|
write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags);
|
|
/* copy vsyscall data */
|
|
- vsyscall_gtod_data.clock = *clock;
|
|
- vsyscall_gtod_data.wall_time_tv.tv_sec = wall_time->tv_sec;
|
|
- vsyscall_gtod_data.wall_time_tv.tv_usec = wall_time->tv_nsec/1000;
|
|
+ vsyscall_gtod_data.clock.vread = clock->vread;
|
|
+ vsyscall_gtod_data.clock.cycle_last = clock->cycle_last;
|
|
+ vsyscall_gtod_data.clock.mask = clock->mask;
|
|
+ vsyscall_gtod_data.clock.mult = clock->mult;
|
|
+ vsyscall_gtod_data.clock.shift = clock->shift;
|
|
+ vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
|
|
+ vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
|
|
vsyscall_gtod_data.sys_tz = sys_tz;
|
|
write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
|
|
}
|
|
@@ -105,7 +129,8 @@ static __always_inline long time_syscall
|
|
static __always_inline void do_vgettimeofday(struct timeval * tv)
|
|
{
|
|
cycle_t now, base, mask, cycle_delta;
|
|
- unsigned long seq, mult, shift, nsec_delta;
|
|
+ unsigned seq;
|
|
+ unsigned long mult, shift, nsec;
|
|
cycle_t (*vread)(void);
|
|
do {
|
|
seq = read_seqbegin(&__vsyscall_gtod_data.lock);
|
|
@@ -121,21 +146,20 @@ static __always_inline void do_vgettimeo
|
|
mult = __vsyscall_gtod_data.clock.mult;
|
|
shift = __vsyscall_gtod_data.clock.shift;
|
|
|
|
- *tv = __vsyscall_gtod_data.wall_time_tv;
|
|
-
|
|
+ tv->tv_sec = __vsyscall_gtod_data.wall_time_sec;
|
|
+ nsec = __vsyscall_gtod_data.wall_time_nsec;
|
|
} while (read_seqretry(&__vsyscall_gtod_data.lock, seq));
|
|
|
|
/* calculate interval: */
|
|
cycle_delta = (now - base) & mask;
|
|
/* convert to nsecs: */
|
|
- nsec_delta = (cycle_delta * mult) >> shift;
|
|
+ nsec += (cycle_delta * mult) >> shift;
|
|
|
|
- /* convert to usecs and add to timespec: */
|
|
- tv->tv_usec += nsec_delta / NSEC_PER_USEC;
|
|
- while (tv->tv_usec > USEC_PER_SEC) {
|
|
+ while (nsec >= NSEC_PER_SEC) {
|
|
tv->tv_sec += 1;
|
|
- tv->tv_usec -= USEC_PER_SEC;
|
|
+ nsec -= NSEC_PER_SEC;
|
|
}
|
|
+ tv->tv_usec = nsec / NSEC_PER_USEC;
|
|
}
|
|
|
|
int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz)
|
|
@@ -151,11 +175,16 @@ int __vsyscall(0) vgettimeofday(struct t
|
|
* unlikely */
|
|
time_t __vsyscall(1) vtime(time_t *t)
|
|
{
|
|
+ struct timeval tv;
|
|
+ time_t result;
|
|
if (unlikely(!__vsyscall_gtod_data.sysctl_enabled))
|
|
return time_syscall(t);
|
|
- else if (t)
|
|
- *t = __vsyscall_gtod_data.wall_time_tv.tv_sec;
|
|
- return __vsyscall_gtod_data.wall_time_tv.tv_sec;
|
|
+
|
|
+ vgettimeofday(&tv, 0);
|
|
+ result = tv.tv_sec;
|
|
+ if (t)
|
|
+ *t = result;
|
|
+ return result;
|
|
}
|
|
|
|
/* Fast way to get current CPU and node.
|
|
@@ -224,10 +253,10 @@ static int vsyscall_sysctl_change(ctl_ta
|
|
return ret;
|
|
/* gcc has some trouble with __va(__pa()), so just do it this
|
|
way. */
|
|
- map1 = ioremap(__pa_symbol(&vsysc1), 2);
|
|
+ map1 = ioremap(__pa_vsymbol(&vsysc1), 2);
|
|
if (!map1)
|
|
return -ENOMEM;
|
|
- map2 = ioremap(__pa_symbol(&vsysc2), 2);
|
|
+ map2 = ioremap(__pa_vsymbol(&vsysc2), 2);
|
|
if (!map2) {
|
|
ret = -ENOMEM;
|
|
goto out;
|
|
@@ -304,7 +333,7 @@ static int __cpuinit
|
|
cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
|
|
{
|
|
long cpu = (long)arg;
|
|
- if (action == CPU_ONLINE)
|
|
+ if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
|
|
smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1);
|
|
return NOTIFY_DONE;
|
|
}
|
|
--- head-2011-03-11.orig/arch/x86/mm/fault_64-xen.c 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/mm/fault_64-xen.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -15,22 +15,22 @@
|
|
#include <linux/mman.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/smp.h>
|
|
-#include <linux/smp_lock.h>
|
|
#include <linux/interrupt.h>
|
|
#include <linux/init.h>
|
|
#include <linux/tty.h>
|
|
#include <linux/vt_kern.h> /* For unblank_screen() */
|
|
#include <linux/compiler.h>
|
|
+#include <linux/vmalloc.h>
|
|
#include <linux/module.h>
|
|
#include <linux/kprobes.h>
|
|
#include <linux/uaccess.h>
|
|
+#include <linux/kdebug.h>
|
|
|
|
#include <asm/system.h>
|
|
#include <asm/pgalloc.h>
|
|
#include <asm/smp.h>
|
|
#include <asm/tlbflush.h>
|
|
#include <asm/proto.h>
|
|
-#include <asm/kdebug.h>
|
|
#include <asm-generic/sections.h>
|
|
|
|
/* Page fault error code bits */
|
|
@@ -537,6 +537,12 @@ bad_area:
|
|
bad_area_nosemaphore:
|
|
/* User mode accesses just cause a SIGSEGV */
|
|
if (error_code & PF_USER) {
|
|
+
|
|
+ /*
|
|
+ * It's possible to have interrupts off here.
|
|
+ */
|
|
+ local_irq_enable();
|
|
+
|
|
if (is_prefetch(regs, address, error_code))
|
|
return;
|
|
|
|
@@ -646,10 +652,10 @@ do_sigbus:
|
|
}
|
|
|
|
DEFINE_SPINLOCK(pgd_lock);
|
|
-struct page *pgd_list;
|
|
+LIST_HEAD(pgd_list);
|
|
|
|
#define pgd_page_table(what, pg) \
|
|
- spin_##what(&((struct mm_struct *)(pg)->mapping)->page_table_lock)
|
|
+ spin_##what(&((struct mm_struct *)(pg)->private)->page_table_lock)
|
|
|
|
void vmalloc_sync_all(void)
|
|
{
|
|
@@ -669,8 +675,7 @@ void vmalloc_sync_all(void)
|
|
if (pgd_none(*pgd_ref))
|
|
continue;
|
|
spin_lock(&pgd_lock);
|
|
- for (page = pgd_list; page;
|
|
- page = (struct page *)page->index) {
|
|
+ list_for_each_entry(page, &pgd_list, lru) {
|
|
pgd_t *pgd;
|
|
pgd = (pgd_t *)page_address(page) + pgd_index(address);
|
|
|
|
--- head-2011-03-11.orig/arch/x86/mm/init_64-xen.c 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/mm/init_64-xen.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -25,10 +25,12 @@
|
|
#include <linux/bootmem.h>
|
|
#include <linux/proc_fs.h>
|
|
#include <linux/pci.h>
|
|
+#include <linux/pfn.h>
|
|
#include <linux/poison.h>
|
|
#include <linux/dma-mapping.h>
|
|
#include <linux/module.h>
|
|
#include <linux/memory_hotplug.h>
|
|
+#include <linux/nmi.h>
|
|
|
|
#include <asm/processor.h>
|
|
#include <asm/system.h>
|
|
@@ -51,7 +53,7 @@
|
|
#define Dprintk(x...)
|
|
#endif
|
|
|
|
-struct dma_mapping_ops* dma_ops;
|
|
+const struct dma_mapping_ops* dma_ops;
|
|
EXPORT_SYMBOL(dma_ops);
|
|
|
|
#if CONFIG_XEN_COMPAT <= 0x030002
|
|
@@ -191,6 +193,13 @@ void show_mem(void)
|
|
|
|
for_each_online_pgdat(pgdat) {
|
|
for (i = 0; i < pgdat->node_spanned_pages; ++i) {
|
|
+ /* this loop can take a while with 256 GB and 4k pages
|
|
+ so update the NMI watchdog */
|
|
+ if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) {
|
|
+ touch_nmi_watchdog();
|
|
+ }
|
|
+ if (!pfn_valid(pgdat->node_start_pfn + i))
|
|
+ continue;
|
|
page = pfn_to_page(pgdat->node_start_pfn + i);
|
|
total++;
|
|
if (PageReserved(page))
|
|
@@ -359,7 +368,7 @@ __set_fixmap (enum fixed_addresses idx,
|
|
}
|
|
}
|
|
|
|
-unsigned long __initdata table_start, table_end;
|
|
+unsigned long __meminitdata table_start, table_end;
|
|
|
|
static __meminit void *alloc_static_page(unsigned long *phys)
|
|
{
|
|
@@ -376,7 +385,7 @@ static __meminit void *alloc_static_page
|
|
start_pfn++;
|
|
memset((void *)va, 0, PAGE_SIZE);
|
|
return (void *)va;
|
|
-}
|
|
+}
|
|
|
|
#define PTE_SIZE PAGE_SIZE
|
|
|
|
@@ -412,28 +421,46 @@ static inline int make_readonly(unsigned
|
|
|
|
#ifndef CONFIG_XEN
|
|
/* Must run before zap_low_mappings */
|
|
-__init void *early_ioremap(unsigned long addr, unsigned long size)
|
|
+__meminit void *early_ioremap(unsigned long addr, unsigned long size)
|
|
{
|
|
- unsigned long map = round_down(addr, LARGE_PAGE_SIZE);
|
|
-
|
|
- /* actually usually some more */
|
|
- if (size >= LARGE_PAGE_SIZE) {
|
|
- return NULL;
|
|
+ unsigned long vaddr;
|
|
+ pmd_t *pmd, *last_pmd;
|
|
+ int i, pmds;
|
|
+
|
|
+ pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
|
|
+ vaddr = __START_KERNEL_map;
|
|
+ pmd = level2_kernel_pgt;
|
|
+ last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1;
|
|
+ for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) {
|
|
+ for (i = 0; i < pmds; i++) {
|
|
+ if (pmd_present(pmd[i]))
|
|
+ goto next;
|
|
+ }
|
|
+ vaddr += addr & ~PMD_MASK;
|
|
+ addr &= PMD_MASK;
|
|
+ for (i = 0; i < pmds; i++, addr += PMD_SIZE)
|
|
+ set_pmd(pmd + i,__pmd(addr | _KERNPG_TABLE | _PAGE_PSE));
|
|
+ __flush_tlb();
|
|
+ return (void *)vaddr;
|
|
+ next:
|
|
+ ;
|
|
}
|
|
- set_pmd(temp_mappings[0].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE));
|
|
- map += LARGE_PAGE_SIZE;
|
|
- set_pmd(temp_mappings[1].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE));
|
|
- __flush_tlb();
|
|
- return temp_mappings[0].address + (addr & (LARGE_PAGE_SIZE-1));
|
|
+ printk("early_ioremap(0x%lx, %lu) failed\n", addr, size);
|
|
+ return NULL;
|
|
}
|
|
|
|
/* To avoid virtual aliases later */
|
|
-__init void early_iounmap(void *addr, unsigned long size)
|
|
+__meminit void early_iounmap(void *addr, unsigned long size)
|
|
{
|
|
- if ((void *)round_down((unsigned long)addr, LARGE_PAGE_SIZE) != temp_mappings[0].address)
|
|
- printk("early_iounmap: bad address %p\n", addr);
|
|
- set_pmd(temp_mappings[0].pmd, __pmd(0));
|
|
- set_pmd(temp_mappings[1].pmd, __pmd(0));
|
|
+ unsigned long vaddr;
|
|
+ pmd_t *pmd;
|
|
+ int i, pmds;
|
|
+
|
|
+ vaddr = (unsigned long)addr;
|
|
+ pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE;
|
|
+ pmd = level2_kernel_pgt + pmd_index(vaddr);
|
|
+ for (i = 0; i < pmds; i++)
|
|
+ pmd_clear(pmd + i);
|
|
__flush_tlb();
|
|
}
|
|
#endif
|
|
@@ -792,14 +819,6 @@ void __meminit init_memory_mapping(unsig
|
|
__flush_tlb_all();
|
|
}
|
|
|
|
-void __cpuinit zap_low_mappings(int cpu)
|
|
-{
|
|
- /* this is not required for Xen */
|
|
-#if 0
|
|
- swap_low_mappings();
|
|
-#endif
|
|
-}
|
|
-
|
|
#ifndef CONFIG_NUMA
|
|
void __init paging_init(void)
|
|
{
|
|
@@ -984,17 +1003,6 @@ void __init mem_init(void)
|
|
reservedpages << (PAGE_SHIFT-10),
|
|
datasize >> 10,
|
|
initsize >> 10);
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
-#ifdef CONFIG_SMP
|
|
- /*
|
|
- * Sync boot_level4_pgt mappings with the init_level4_pgt
|
|
- * except for the low identity mappings which are already zapped
|
|
- * in init_level4_pgt. This sync-up is essential for AP's bringup
|
|
- */
|
|
- memcpy(boot_level4_pgt+1, init_level4_pgt+1, (PTRS_PER_PGD-1)*sizeof(pgd_t));
|
|
-#endif
|
|
-#endif
|
|
}
|
|
|
|
void free_init_pages(char *what, unsigned long begin, unsigned long end)
|
|
@@ -1004,7 +1012,7 @@ void free_init_pages(char *what, unsigne
|
|
if (begin >= end)
|
|
return;
|
|
|
|
- printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10);
|
|
+ printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
|
|
for (addr = begin; addr < end; addr += PAGE_SIZE) {
|
|
ClearPageReserved(virt_to_page(addr));
|
|
init_page_count(virt_to_page(addr));
|
|
@@ -1013,24 +1021,17 @@ void free_init_pages(char *what, unsigne
|
|
if (addr >= __START_KERNEL_map) {
|
|
/* make_readonly() reports all kernel addresses. */
|
|
__make_page_writable(__va(__pa(addr)));
|
|
- if (HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) {
|
|
- pgd_t *pgd = pgd_offset_k(addr);
|
|
- pud_t *pud = pud_offset(pgd, addr);
|
|
- pmd_t *pmd = pmd_offset(pud, addr);
|
|
- pte_t *pte = pte_offset_kernel(pmd, addr);
|
|
-
|
|
- xen_l1_entry_update(pte, __pte(0)); /* fallback */
|
|
- }
|
|
+ change_page_attr_addr(addr, 1, __pgprot(0));
|
|
}
|
|
free_page(addr);
|
|
totalram_pages++;
|
|
}
|
|
+ if (addr > __START_KERNEL_map)
|
|
+ global_flush_tlb();
|
|
}
|
|
|
|
void free_initmem(void)
|
|
{
|
|
- memset(__initdata_begin, POISON_FREE_INITDATA,
|
|
- __initdata_end - __initdata_begin);
|
|
free_init_pages("unused kernel memory",
|
|
(unsigned long)(&__init_begin),
|
|
(unsigned long)(&__init_end));
|
|
@@ -1040,13 +1041,28 @@ void free_initmem(void)
|
|
|
|
void mark_rodata_ro(void)
|
|
{
|
|
- unsigned long addr = (unsigned long)__start_rodata;
|
|
+ unsigned long start = (unsigned long)_stext, end;
|
|
+
|
|
+#ifdef CONFIG_HOTPLUG_CPU
|
|
+ /* It must still be possible to apply SMP alternatives. */
|
|
+ if (num_possible_cpus() > 1)
|
|
+ start = (unsigned long)_etext;
|
|
+#endif
|
|
+
|
|
+#ifdef CONFIG_KPROBES
|
|
+ start = (unsigned long)__start_rodata;
|
|
+#endif
|
|
+
|
|
+ end = (unsigned long)__end_rodata;
|
|
+ start = (start + PAGE_SIZE - 1) & PAGE_MASK;
|
|
+ end &= PAGE_MASK;
|
|
+ if (end <= start)
|
|
+ return;
|
|
|
|
- for (; addr < (unsigned long)__end_rodata; addr += PAGE_SIZE)
|
|
- change_page_attr_addr(addr, 1, PAGE_KERNEL_RO);
|
|
+ change_page_attr_addr(start, (end - start) >> PAGE_SHIFT, PAGE_KERNEL_RO);
|
|
|
|
- printk ("Write protecting the kernel read-only data: %luk\n",
|
|
- (__end_rodata - __start_rodata) >> 10);
|
|
+ printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
|
|
+ (end - start) >> 10);
|
|
|
|
/*
|
|
* change_page_attr_addr() requires a global_flush_tlb() call after it.
|
|
@@ -1210,3 +1226,11 @@ int in_gate_area_no_task(unsigned long a
|
|
{
|
|
return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END);
|
|
}
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+void *alloc_bootmem_high_node(pg_data_t *pgdat, unsigned long size)
|
|
+{
|
|
+ return __alloc_bootmem_core(pgdat->bdata, size,
|
|
+ SMP_CACHE_BYTES, (4UL*1024*1024*1024), 0);
|
|
+}
|
|
+#endif
|
|
--- head-2011-03-11.orig/arch/x86/mm/pageattr_64-xen.c 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/mm/pageattr_64-xen.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -215,13 +215,13 @@ void mm_pin_all(void)
|
|
preempt_enable();
|
|
}
|
|
|
|
-void _arch_dup_mmap(struct mm_struct *mm)
|
|
+void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
|
|
{
|
|
if (!mm->context.pinned)
|
|
mm_pin(mm);
|
|
}
|
|
|
|
-void _arch_exit_mmap(struct mm_struct *mm)
|
|
+void arch_exit_mmap(struct mm_struct *mm)
|
|
{
|
|
struct task_struct *tsk = current;
|
|
|
|
@@ -343,10 +343,11 @@ static void flush_kernel_map(void *arg)
|
|
struct page *pg;
|
|
|
|
/* When clflush is available always use it because it is
|
|
- much cheaper than WBINVD */
|
|
- if (!cpu_has_clflush)
|
|
+ much cheaper than WBINVD. Disable clflush for now because
|
|
+ the high level code is not ready yet */
|
|
+ if (1 || !cpu_has_clflush)
|
|
asm volatile("wbinvd" ::: "memory");
|
|
- list_for_each_entry(pg, l, lru) {
|
|
+ else list_for_each_entry(pg, l, lru) {
|
|
void *adr = page_address(pg);
|
|
if (cpu_has_clflush)
|
|
cache_flush_page(adr);
|
|
@@ -460,16 +461,24 @@ __change_page_attr(unsigned long address
|
|
*/
|
|
int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot)
|
|
{
|
|
- int err = 0;
|
|
+ int err = 0, kernel_map = 0;
|
|
int i;
|
|
|
|
+ if (address >= __START_KERNEL_map
|
|
+ && address < __START_KERNEL_map + KERNEL_TEXT_SIZE) {
|
|
+ address = (unsigned long)__va(__pa(address));
|
|
+ kernel_map = 1;
|
|
+ }
|
|
+
|
|
down_write(&init_mm.mmap_sem);
|
|
for (i = 0; i < numpages; i++, address += PAGE_SIZE) {
|
|
unsigned long pfn = __pa(address) >> PAGE_SHIFT;
|
|
|
|
- err = __change_page_attr(address, pfn, prot, PAGE_KERNEL);
|
|
- if (err)
|
|
- break;
|
|
+ if (!kernel_map || pte_present(pfn_pte(0, prot))) {
|
|
+ err = __change_page_attr(address, pfn, prot, PAGE_KERNEL);
|
|
+ if (err)
|
|
+ break;
|
|
+ }
|
|
/* Handle kernel mapping too which aliases part of the
|
|
* lowmem */
|
|
if (__pa(address) < KERNEL_TEXT_SIZE) {
|
|
--- head-2011-03-11.orig/drivers/char/tpm/tpm_xen.c 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/drivers/char/tpm/tpm_xen.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -463,7 +463,7 @@ static int tpmif_connect(struct xenbus_d
|
|
tp->backend_id = domid;
|
|
|
|
err = bind_listening_port_to_irqhandler(
|
|
- domid, tpmif_int, SA_SAMPLE_RANDOM, "tpmif", tp);
|
|
+ domid, tpmif_int, IRQF_SAMPLE_RANDOM, "tpmif", tp);
|
|
if (err <= 0) {
|
|
WPRINTK("bind_listening_port_to_irqhandler failed "
|
|
"(err=%d)\n", err);
|
|
--- head-2011-03-11.orig/drivers/hwmon/Kconfig 2011-03-11 10:41:54.000000000 +0100
|
|
+++ head-2011-03-11/drivers/hwmon/Kconfig 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -392,7 +392,7 @@ config SENSORS_GPIO_FAN
|
|
|
|
config SENSORS_CORETEMP
|
|
tristate "Intel Core/Core2/Atom temperature sensor"
|
|
- depends on X86 && PCI && EXPERIMENTAL
|
|
+ depends on X86 && PCI && !XEN && EXPERIMENTAL
|
|
help
|
|
If you say yes here you get support for the temperature
|
|
sensor inside your CPU. Most of the family 6 CPUs
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ head-2011-03-11/drivers/hwmon/coretemp-xen.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -0,0 +1,449 @@
|
|
+/*
|
|
+ * coretemp.c - Linux kernel module for hardware monitoring
|
|
+ *
|
|
+ * Copyright (C) 2007 Rudolf Marek <r.marek@assembler.cz>
|
|
+ *
|
|
+ * Inspired from many hwmon drivers
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify
|
|
+ * it under the terms of the GNU General Public License as published by
|
|
+ * the Free Software Foundation; version 2 of the License.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
+ * 02110-1301 USA.
|
|
+ */
|
|
+
|
|
+#include <linux/module.h>
|
|
+#include <linux/init.h>
|
|
+#include <linux/slab.h>
|
|
+#include <linux/jiffies.h>
|
|
+#include <linux/hwmon.h>
|
|
+#include <linux/sysfs.h>
|
|
+#include <linux/hwmon-sysfs.h>
|
|
+#include <linux/err.h>
|
|
+#include <linux/mutex.h>
|
|
+#include <linux/list.h>
|
|
+#include <linux/platform_device.h>
|
|
+#include <asm/msr.h>
|
|
+#include <xen/pcpu.h>
|
|
+#include "../xen/core/domctl.h"
|
|
+
|
|
+#define DRVNAME "coretemp"
|
|
+#define coretemp_data pdev_entry
|
|
+
|
|
+typedef enum { SHOW_TEMP, SHOW_TJMAX, SHOW_LABEL, SHOW_NAME } SHOW;
|
|
+
|
|
+/*
|
|
+ * Functions declaration
|
|
+ */
|
|
+
|
|
+static struct coretemp_data *coretemp_update_device(struct device *dev);
|
|
+
|
|
+struct pdev_entry {
|
|
+ struct list_head list;
|
|
+ struct platform_device *pdev;
|
|
+ struct class_device *class_dev;
|
|
+ struct mutex update_lock;
|
|
+ const char *name;
|
|
+ u8 x86_model, x86_mask;
|
|
+ u32 ucode_rev;
|
|
+ char valid; /* zero until following fields are valid */
|
|
+ unsigned long last_updated; /* in jiffies */
|
|
+ int temp;
|
|
+ int tjmax;
|
|
+ u8 alarm;
|
|
+};
|
|
+
|
|
+static struct coretemp_data *coretemp_update_device(struct device *dev);
|
|
+
|
|
+/*
|
|
+ * Sysfs stuff
|
|
+ */
|
|
+
|
|
+static ssize_t show_name(struct device *dev, struct device_attribute
|
|
+ *devattr, char *buf)
|
|
+{
|
|
+ int ret;
|
|
+ struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
|
|
+ struct coretemp_data *data = dev_get_drvdata(dev);
|
|
+
|
|
+ if (attr->index == SHOW_NAME)
|
|
+ ret = sprintf(buf, "%s\n", data->name);
|
|
+ else /* show label */
|
|
+ ret = sprintf(buf, "Core %d\n", data->pdev->id);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static ssize_t show_alarm(struct device *dev, struct device_attribute
|
|
+ *devattr, char *buf)
|
|
+{
|
|
+ struct coretemp_data *data = coretemp_update_device(dev);
|
|
+ /* read the Out-of-spec log, never clear */
|
|
+ return sprintf(buf, "%d\n", data->alarm);
|
|
+}
|
|
+
|
|
+static ssize_t show_temp(struct device *dev,
|
|
+ struct device_attribute *devattr, char *buf)
|
|
+{
|
|
+ struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
|
|
+ struct coretemp_data *data = coretemp_update_device(dev);
|
|
+ int err;
|
|
+
|
|
+ if (attr->index == SHOW_TEMP)
|
|
+ err = data->valid ? sprintf(buf, "%d\n", data->temp) : -EAGAIN;
|
|
+ else
|
|
+ err = sprintf(buf, "%d\n", data->tjmax);
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, show_temp, NULL,
|
|
+ SHOW_TEMP);
|
|
+static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, show_temp, NULL,
|
|
+ SHOW_TJMAX);
|
|
+static DEVICE_ATTR(temp1_crit_alarm, S_IRUGO, show_alarm, NULL);
|
|
+static SENSOR_DEVICE_ATTR(temp1_label, S_IRUGO, show_name, NULL, SHOW_LABEL);
|
|
+static SENSOR_DEVICE_ATTR(name, S_IRUGO, show_name, NULL, SHOW_NAME);
|
|
+
|
|
+static struct attribute *coretemp_attributes[] = {
|
|
+ &sensor_dev_attr_name.dev_attr.attr,
|
|
+ &sensor_dev_attr_temp1_label.dev_attr.attr,
|
|
+ &dev_attr_temp1_crit_alarm.attr,
|
|
+ &sensor_dev_attr_temp1_input.dev_attr.attr,
|
|
+ &sensor_dev_attr_temp1_crit.dev_attr.attr,
|
|
+ NULL
|
|
+};
|
|
+
|
|
+static const struct attribute_group coretemp_group = {
|
|
+ .attrs = coretemp_attributes,
|
|
+};
|
|
+
|
|
+static struct coretemp_data *coretemp_update_device(struct device *dev)
|
|
+{
|
|
+ struct coretemp_data *data = dev_get_drvdata(dev);
|
|
+
|
|
+ mutex_lock(&data->update_lock);
|
|
+
|
|
+ if (!data->valid || time_after(jiffies, data->last_updated + HZ)) {
|
|
+ u32 eax, edx;
|
|
+
|
|
+ data->valid = 0;
|
|
+ if (rdmsr_safe_on_pcpu(data->pdev->id, MSR_IA32_THERM_STATUS,
|
|
+ &eax, &edx) < 0)
|
|
+ eax = ~0;
|
|
+ data->alarm = (eax >> 5) & 1;
|
|
+ /* update only if data has been valid */
|
|
+ if (eax & 0x80000000) {
|
|
+ data->temp = data->tjmax - (((eax >> 16)
|
|
+ & 0x7f) * 1000);
|
|
+ data->valid = 1;
|
|
+ } else {
|
|
+ dev_dbg(dev, "Temperature data invalid (0x%x)\n", eax);
|
|
+ }
|
|
+ data->last_updated = jiffies;
|
|
+ }
|
|
+
|
|
+ mutex_unlock(&data->update_lock);
|
|
+ return data;
|
|
+}
|
|
+
|
|
+static int coretemp_probe(struct platform_device *pdev)
|
|
+{
|
|
+ struct coretemp_data *data = platform_get_drvdata(pdev);
|
|
+ int err;
|
|
+ u32 eax, edx;
|
|
+
|
|
+ data->name = "coretemp";
|
|
+ mutex_init(&data->update_lock);
|
|
+ /* Tjmax default is 100 degrees C */
|
|
+ data->tjmax = 100000;
|
|
+
|
|
+ /* test if we can access the THERM_STATUS MSR */
|
|
+ err = rdmsr_safe_on_pcpu(pdev->id, MSR_IA32_THERM_STATUS, &eax, &edx);
|
|
+ if (err < 0) {
|
|
+ dev_err(&pdev->dev,
|
|
+ "Unable to access THERM_STATUS MSR, giving up\n");
|
|
+ return err;
|
|
+ }
|
|
+
|
|
+ /* Check if we have problem with errata AE18 of Core processors:
|
|
+ Readings might stop update when processor visited too deep sleep,
|
|
+ fixed for stepping D0 (6EC).
|
|
+ */
|
|
+
|
|
+ if ((data->x86_model == 0xe) && (data->x86_mask < 0xc)) {
|
|
+ /* check for microcode update */
|
|
+ if (!(data->ucode_rev + 1))
|
|
+ dev_warn(&pdev->dev,
|
|
+ "Cannot read microcode revision of CPU\n");
|
|
+ else if (data->ucode_rev < 0x39) {
|
|
+ err = -ENODEV;
|
|
+ dev_err(&pdev->dev,
|
|
+ "Errata AE18 not fixed, update BIOS or "
|
|
+ "microcode of the CPU!\n");
|
|
+ return err;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* Some processors have Tjmax 85 following magic should detect it
|
|
+ Intel won't disclose the information without signed NDA, but
|
|
+ individuals cannot sign it. Catch(ed) 22.
|
|
+ */
|
|
+
|
|
+ if (((data->x86_model == 0xf) && (data->x86_mask > 3)) ||
|
|
+ (data->x86_model == 0xe)) {
|
|
+ err = rdmsr_safe_on_pcpu(data->pdev->id, 0xee, &eax, &edx);
|
|
+ if (err < 0) {
|
|
+ dev_warn(&pdev->dev,
|
|
+ "Unable to access MSR 0xEE, Tjmax left at %d "
|
|
+ "degrees C\n", data->tjmax/1000);
|
|
+ } else if (eax & 0x40000000) {
|
|
+ data->tjmax = 85000;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /* Intel says that above should not work for desktop Core2 processors,
|
|
+ but it seems to work. There is no other way how get the absolute
|
|
+ readings. Warn the user about this. First check if are desktop,
|
|
+ bit 50 of MSR_IA32_PLATFORM_ID should be 0.
|
|
+ */
|
|
+
|
|
+ rdmsr_safe_on_pcpu(data->pdev->id, MSR_IA32_PLATFORM_ID, &eax, &edx);
|
|
+
|
|
+ if ((data->x86_model == 0xf) && (!(edx & 0x00040000))) {
|
|
+ dev_warn(&pdev->dev, "Using undocumented features, absolute "
|
|
+ "temperature might be wrong!\n");
|
|
+ }
|
|
+
|
|
+ if ((err = sysfs_create_group(&pdev->dev.kobj, &coretemp_group)))
|
|
+ return err;
|
|
+
|
|
+ data->class_dev = hwmon_device_register(&pdev->dev);
|
|
+ if (IS_ERR(data->class_dev)) {
|
|
+ err = PTR_ERR(data->class_dev);
|
|
+ dev_err(&pdev->dev, "Class registration failed (%d)\n",
|
|
+ err);
|
|
+ goto exit_class;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+
|
|
+exit_class:
|
|
+ sysfs_remove_group(&pdev->dev.kobj, &coretemp_group);
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static int coretemp_remove(struct platform_device *pdev)
|
|
+{
|
|
+ struct coretemp_data *data = platform_get_drvdata(pdev);
|
|
+
|
|
+ hwmon_device_unregister(data->class_dev);
|
|
+ sysfs_remove_group(&pdev->dev.kobj, &coretemp_group);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static struct platform_driver coretemp_driver = {
|
|
+ .driver = {
|
|
+ .owner = THIS_MODULE,
|
|
+ .name = DRVNAME,
|
|
+ },
|
|
+ .probe = coretemp_probe,
|
|
+ .remove = coretemp_remove,
|
|
+};
|
|
+
|
|
+static LIST_HEAD(pdev_list);
|
|
+static DEFINE_MUTEX(pdev_list_mutex);
|
|
+
|
|
+struct cpu_info {
|
|
+ struct pdev_entry *pdev_entry;
|
|
+ u8 x86;
|
|
+ u32 cpuid_6_eax;
|
|
+};
|
|
+
|
|
+static void get_cpuid_info(void *arg)
|
|
+{
|
|
+ struct cpu_info *info = arg;
|
|
+ struct pdev_entry *pdev_entry = info->pdev_entry;
|
|
+ u32 val = cpuid_eax(1);
|
|
+
|
|
+ info->x86 = ((val >> 8) & 0xf) + ((val >> 20) & 0xff);
|
|
+ pdev_entry->x86_model = ((val >> 4) & 0xf) | ((val >> 12) & 0xf0);
|
|
+ pdev_entry->x86_mask = val & 0xf;
|
|
+
|
|
+ if (info->x86 != 6 || !pdev_entry->x86_model
|
|
+ || wrmsr_safe(MSR_IA32_UCODE_REV, 0, 0) < 0
|
|
+ || (sync_core(), rdmsr_safe(MSR_IA32_UCODE_REV,
|
|
+ &val, &pdev_entry->ucode_rev)) < 0)
|
|
+ pdev_entry->ucode_rev = ~0;
|
|
+
|
|
+ info->cpuid_6_eax = cpuid_eax(0) >= 6 ? cpuid_eax(6) : 0;
|
|
+}
|
|
+
|
|
+static int coretemp_device_add(unsigned int cpu)
|
|
+{
|
|
+ int err;
|
|
+ struct cpu_info info;
|
|
+ struct platform_device *pdev;
|
|
+ struct pdev_entry *pdev_entry;
|
|
+
|
|
+ pdev_entry = kzalloc(sizeof(*pdev_entry), GFP_KERNEL);
|
|
+ if (!info.pdev_entry)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ info.pdev_entry = pdev_entry;
|
|
+ err = xen_set_physical_cpu_affinity(cpu);
|
|
+ if (!err) {
|
|
+ get_cpuid_info(&info);
|
|
+ WARN_ON_ONCE(xen_set_physical_cpu_affinity(-1));
|
|
+ } else if (err > 0) {
|
|
+ static bool warned;
|
|
+
|
|
+ if (!warned) {
|
|
+ warned = true;
|
|
+ printk(KERN_WARNING DRVNAME
|
|
+ "Cannot set physical CPU affinity"
|
|
+ " (assuming use of dom0_vcpus_pin)\n");
|
|
+ }
|
|
+ err = smp_call_function_single(cpu, get_cpuid_info, &info, 1);
|
|
+ }
|
|
+ if (err)
|
|
+ goto exit_entry_free;
|
|
+
|
|
+ /* check if family 6, models e, f */
|
|
+ if (info.x86 != 0x6 ||
|
|
+ !((pdev_entry->x86_model == 0xe) || (pdev_entry->x86_model == 0xf))) {
|
|
+
|
|
+ /* supported CPU not found, but report the unknown
|
|
+ family 6 CPU */
|
|
+ if ((info.x86 == 0x6) && (pdev_entry->x86_model > 0xf))
|
|
+ printk(KERN_WARNING DRVNAME ": Unknown CPU "
|
|
+ "model 0x%x", pdev_entry->x86_model);
|
|
+ goto exit_entry_free;
|
|
+ }
|
|
+
|
|
+ pdev = platform_device_alloc(DRVNAME, cpu);
|
|
+ if (!pdev) {
|
|
+ err = -ENOMEM;
|
|
+ printk(KERN_ERR DRVNAME ": Device allocation failed\n");
|
|
+ goto exit_entry_free;
|
|
+ }
|
|
+
|
|
+ platform_set_drvdata(pdev, pdev_entry);
|
|
+ pdev_entry->pdev = pdev;
|
|
+
|
|
+ err = platform_device_add(pdev);
|
|
+ if (err) {
|
|
+ printk(KERN_ERR DRVNAME ": Device addition failed (%d)\n",
|
|
+ err);
|
|
+ goto exit_device_put;
|
|
+ }
|
|
+
|
|
+ mutex_lock(&pdev_list_mutex);
|
|
+ list_add_tail(&pdev_entry->list, &pdev_list);
|
|
+ mutex_unlock(&pdev_list_mutex);
|
|
+
|
|
+ return 0;
|
|
+
|
|
+exit_device_put:
|
|
+ platform_device_put(pdev);
|
|
+exit_entry_free:
|
|
+ kfree(info.pdev_entry);
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static void coretemp_device_remove(unsigned int cpu)
|
|
+{
|
|
+ struct pdev_entry *p;
|
|
+
|
|
+ mutex_lock(&pdev_list_mutex);
|
|
+ list_for_each_entry(p, &pdev_list, list) {
|
|
+ if (p->pdev->id == cpu) {
|
|
+ platform_device_unregister(p->pdev);
|
|
+ list_del(&p->list);
|
|
+ kfree(p);
|
|
+ }
|
|
+ }
|
|
+ mutex_unlock(&pdev_list_mutex);
|
|
+}
|
|
+
|
|
+static int coretemp_cpu_callback(struct notifier_block *nfb,
|
|
+ unsigned long action, void *hcpu)
|
|
+{
|
|
+ unsigned int cpu = (unsigned long) hcpu;
|
|
+
|
|
+ switch (action) {
|
|
+ case CPU_ONLINE:
|
|
+ coretemp_device_add(cpu);
|
|
+ break;
|
|
+ case CPU_DEAD:
|
|
+ coretemp_device_remove(cpu);
|
|
+ break;
|
|
+ }
|
|
+ return NOTIFY_OK;
|
|
+}
|
|
+
|
|
+static struct notifier_block coretemp_cpu_notifier = {
|
|
+ .notifier_call = coretemp_cpu_callback,
|
|
+};
|
|
+
|
|
+static int __init coretemp_init(void)
|
|
+{
|
|
+ int err = -ENODEV;
|
|
+
|
|
+ if (!is_initial_xendomain())
|
|
+ goto exit;
|
|
+
|
|
+ /* quick check if we run Intel */
|
|
+ if (cpu_data(0).x86_vendor != X86_VENDOR_INTEL)
|
|
+ goto exit;
|
|
+
|
|
+ err = platform_driver_register(&coretemp_driver);
|
|
+ if (err)
|
|
+ goto exit;
|
|
+
|
|
+ err = register_pcpu_notifier(&coretemp_cpu_notifier);
|
|
+ if (err)
|
|
+ goto exit_driver_unreg;
|
|
+
|
|
+ if (list_empty(&pdev_list)) {
|
|
+ err = -ENODEV;
|
|
+ goto exit_notifier_unreg;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+
|
|
+exit_notifier_unreg:
|
|
+ unregister_pcpu_notifier(&coretemp_cpu_notifier);
|
|
+exit_driver_unreg:
|
|
+ platform_driver_unregister(&coretemp_driver);
|
|
+exit:
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static void __exit coretemp_exit(void)
|
|
+{
|
|
+ struct pdev_entry *p, *n;
|
|
+
|
|
+ unregister_pcpu_notifier(&coretemp_cpu_notifier);
|
|
+ mutex_lock(&pdev_list_mutex);
|
|
+ list_for_each_entry_safe(p, n, &pdev_list, list) {
|
|
+ platform_device_unregister(p->pdev);
|
|
+ list_del(&p->list);
|
|
+ kfree(p);
|
|
+ }
|
|
+ mutex_unlock(&pdev_list_mutex);
|
|
+ platform_driver_unregister(&coretemp_driver);
|
|
+}
|
|
+
|
|
+MODULE_AUTHOR("Rudolf Marek <r.marek@assembler.cz>");
|
|
+MODULE_DESCRIPTION("Intel Core temperature monitor");
|
|
+MODULE_LICENSE("GPL");
|
|
+
|
|
+module_init(coretemp_init)
|
|
+module_exit(coretemp_exit)
|
|
--- head-2011-03-11.orig/drivers/pci/msi-xen.c 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/drivers/pci/msi-xen.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -12,16 +12,15 @@
|
|
#include <linux/interrupt.h>
|
|
#include <linux/init.h>
|
|
#include <linux/ioport.h>
|
|
-#include <linux/smp_lock.h>
|
|
#include <linux/pci.h>
|
|
#include <linux/proc_fs.h>
|
|
#include <linux/msi.h>
|
|
+#include <linux/smp.h>
|
|
|
|
#include <xen/evtchn.h>
|
|
|
|
#include <asm/errno.h>
|
|
#include <asm/io.h>
|
|
-#include <asm/smp.h>
|
|
|
|
#include "pci.h"
|
|
#include "msi.h"
|
|
@@ -156,6 +155,7 @@ int register_msi_get_owner(int (*func)(s
|
|
get_owner = func;
|
|
return 0;
|
|
}
|
|
+EXPORT_SYMBOL(register_msi_get_owner);
|
|
|
|
int unregister_msi_get_owner(int (*func)(struct pci_dev *dev))
|
|
{
|
|
@@ -164,6 +164,7 @@ int unregister_msi_get_owner(int (*func)
|
|
get_owner = NULL;
|
|
return 0;
|
|
}
|
|
+EXPORT_SYMBOL(unregister_msi_get_owner);
|
|
|
|
static int msi_get_dev_owner(struct pci_dev *dev)
|
|
{
|
|
@@ -259,11 +260,6 @@ static int msi_map_vector(struct pci_dev
|
|
map_irq.pirq : evtchn_map_pirq(-1, map_irq.pirq));
|
|
}
|
|
|
|
-static int msi_init(void)
|
|
-{
|
|
- return 0;
|
|
-}
|
|
-
|
|
#ifdef CONFIG_PM
|
|
void pci_restore_msi_state(struct pci_dev *dev)
|
|
{
|
|
@@ -392,21 +388,32 @@ static int msix_capability_init(struct p
|
|
}
|
|
|
|
/**
|
|
- * pci_msi_supported - check whether MSI may be enabled on device
|
|
+ * pci_msi_check_device - check whether MSI may be enabled on a device
|
|
* @dev: pointer to the pci_dev data structure of MSI device function
|
|
+ * @nvec: how many MSIs have been requested ?
|
|
+ * @type: are we checking for MSI or MSI-X ?
|
|
*
|
|
* Look at global flags, the device itself, and its parent busses
|
|
- * to return 0 if MSI are supported for the device.
|
|
+ * to determine if MSI/-X are supported for the device. If MSI/-X is
|
|
+ * supported return 0, else return an error code.
|
|
**/
|
|
-static
|
|
-int pci_msi_supported(struct pci_dev * dev)
|
|
+static int pci_msi_check_device(struct pci_dev* dev, int nvec, int type)
|
|
{
|
|
struct pci_bus *bus;
|
|
+ int ret;
|
|
|
|
/* MSI must be globally enabled and supported by the device */
|
|
if (!pci_msi_enable || !dev || dev->no_msi)
|
|
return -EINVAL;
|
|
|
|
+ /*
|
|
+ * You can't ask to have 0 or less MSIs configured.
|
|
+ * a) it's stupid ..
|
|
+ * b) the list manipulation code assumes nvec >= 1.
|
|
+ */
|
|
+ if (nvec < 1)
|
|
+ return -ERANGE;
|
|
+
|
|
/* Any bridge which does NOT route MSI transactions from it's
|
|
* secondary bus to it's primary bus must set NO_MSI flag on
|
|
* the secondary pci_bus.
|
|
@@ -417,6 +424,13 @@ int pci_msi_supported(struct pci_dev * d
|
|
if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
|
|
return -EINVAL;
|
|
|
|
+ ret = arch_msi_check_device(dev, nvec, type);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+
|
|
+ if (!pci_find_capability(dev, type))
|
|
+ return -EINVAL;
|
|
+
|
|
return 0;
|
|
}
|
|
|
|
@@ -433,15 +447,12 @@ int pci_msi_supported(struct pci_dev * d
|
|
extern int pci_frontend_enable_msi(struct pci_dev *dev);
|
|
int pci_enable_msi(struct pci_dev* dev)
|
|
{
|
|
- int pos, temp, status;
|
|
+ int temp, status;
|
|
struct msi_dev_list *msi_dev_entry = get_msi_dev_pirq_list(dev);
|
|
|
|
- if (pci_msi_supported(dev) < 0)
|
|
- return -EINVAL;
|
|
-
|
|
- status = msi_init();
|
|
- if (status < 0)
|
|
- return status;
|
|
+ status = pci_msi_check_device(dev, 1, PCI_CAP_ID_MSI);
|
|
+ if (status)
|
|
+ return status;
|
|
|
|
#ifdef CONFIG_XEN_PCIDEV_FRONTEND
|
|
if (!is_initial_xendomain())
|
|
@@ -463,10 +474,6 @@ int pci_enable_msi(struct pci_dev* dev)
|
|
|
|
temp = dev->irq;
|
|
|
|
- pos = pci_find_capability(dev, PCI_CAP_ID_MSI);
|
|
- if (!pos)
|
|
- return -EINVAL;
|
|
-
|
|
/* Check whether driver already requested for MSI-X irqs */
|
|
if (dev->msix_enabled) {
|
|
printk(KERN_INFO "PCI: %s: Can't enable MSI. "
|
|
@@ -481,6 +488,7 @@ int pci_enable_msi(struct pci_dev* dev)
|
|
|
|
return status;
|
|
}
|
|
+EXPORT_SYMBOL(pci_enable_msi);
|
|
|
|
extern void pci_frontend_disable_msi(struct pci_dev* dev);
|
|
void pci_disable_msi(struct pci_dev* dev)
|
|
@@ -488,12 +496,7 @@ void pci_disable_msi(struct pci_dev* dev
|
|
int pirq;
|
|
struct msi_dev_list *msi_dev_entry = get_msi_dev_pirq_list(dev);
|
|
|
|
- if (!pci_msi_enable)
|
|
- return;
|
|
- if (!dev)
|
|
- return;
|
|
-
|
|
- if (!dev->msi_enabled)
|
|
+ if (!pci_msi_enable || !dev || !dev->msi_enabled)
|
|
return;
|
|
|
|
#ifdef CONFIG_XEN_PCIDEV_FRONTEND
|
|
@@ -516,6 +519,7 @@ void pci_disable_msi(struct pci_dev* dev
|
|
pci_intx(dev, 1); /* enable intx */
|
|
dev->msi_enabled = 0;
|
|
}
|
|
+EXPORT_SYMBOL(pci_disable_msi);
|
|
|
|
/**
|
|
* pci_enable_msix - configure device's MSI-X capability structure
|
|
@@ -541,7 +545,7 @@ int pci_enable_msix(struct pci_dev* dev,
|
|
u16 control;
|
|
struct msi_dev_list *msi_dev_entry = get_msi_dev_pirq_list(dev);
|
|
|
|
- if (!entries || pci_msi_supported(dev) < 0)
|
|
+ if (!entries)
|
|
return -EINVAL;
|
|
|
|
#ifdef CONFIG_XEN_PCIDEV_FRONTEND
|
|
@@ -580,14 +584,11 @@ int pci_enable_msix(struct pci_dev* dev,
|
|
}
|
|
#endif
|
|
|
|
- status = msi_init();
|
|
- if (status < 0)
|
|
+ status = pci_msi_check_device(dev, nvec, PCI_CAP_ID_MSIX);
|
|
+ if (status)
|
|
return status;
|
|
|
|
pos = pci_find_capability(dev, PCI_CAP_ID_MSIX);
|
|
- if (!pos)
|
|
- return -EINVAL;
|
|
-
|
|
pci_read_config_word(dev, msi_control_reg(pos), &control);
|
|
nr_entries = multi_msix_capable(control);
|
|
if (nvec > nr_entries)
|
|
@@ -619,16 +620,12 @@ int pci_enable_msix(struct pci_dev* dev,
|
|
|
|
return status;
|
|
}
|
|
+EXPORT_SYMBOL(pci_enable_msix);
|
|
|
|
extern void pci_frontend_disable_msix(struct pci_dev* dev);
|
|
void pci_disable_msix(struct pci_dev* dev)
|
|
{
|
|
- if (!pci_msi_enable)
|
|
- return;
|
|
- if (!dev)
|
|
- return;
|
|
-
|
|
- if (!dev->msix_enabled)
|
|
+ if (!pci_msi_enable || !dev || !dev->msix_enabled)
|
|
return;
|
|
|
|
#ifdef CONFIG_XEN_PCIDEV_FRONTEND
|
|
@@ -659,6 +656,7 @@ void pci_disable_msix(struct pci_dev* de
|
|
pci_intx(dev, 1); /* enable intx */
|
|
dev->msix_enabled = 0;
|
|
}
|
|
+EXPORT_SYMBOL(pci_disable_msix);
|
|
|
|
/**
|
|
* msi_remove_pci_irq_vectors - reclaim MSI(X) irqs to unused state
|
|
@@ -697,12 +695,18 @@ void pci_no_msi(void)
|
|
pci_msi_enable = 0;
|
|
}
|
|
|
|
-EXPORT_SYMBOL(pci_enable_msi);
|
|
-EXPORT_SYMBOL(pci_disable_msi);
|
|
-EXPORT_SYMBOL(pci_enable_msix);
|
|
-EXPORT_SYMBOL(pci_disable_msix);
|
|
-#ifdef CONFIG_XEN
|
|
-EXPORT_SYMBOL(register_msi_get_owner);
|
|
-EXPORT_SYMBOL(unregister_msi_get_owner);
|
|
+void pci_msi_init_pci_dev(struct pci_dev *dev)
|
|
+{
|
|
+#ifndef CONFIG_XEN
|
|
+ INIT_LIST_HEAD(&dev->msi_list);
|
|
#endif
|
|
+}
|
|
|
|
+
|
|
+/* Arch hooks */
|
|
+
|
|
+int __attribute__ ((weak))
|
|
+arch_msi_check_device(struct pci_dev* dev, int nvec, int type)
|
|
+{
|
|
+ return 0;
|
|
+}
|
|
--- head-2011-03-11.orig/drivers/xen/blkfront/blkfront.c 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/drivers/xen/blkfront/blkfront.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -244,7 +244,7 @@ static int setup_blkring(struct xenbus_d
|
|
info->ring_ref = err;
|
|
|
|
err = bind_listening_port_to_irqhandler(
|
|
- dev->otherend_id, blkif_int, SA_SAMPLE_RANDOM, "blkif", info);
|
|
+ dev->otherend_id, blkif_int, IRQF_SAMPLE_RANDOM, "blkif", info);
|
|
if (err <= 0) {
|
|
xenbus_dev_fatal(dev, err,
|
|
"bind_listening_port_to_irqhandler");
|
|
--- head-2011-03-11.orig/drivers/xen/char/mem.c 2011-01-31 17:32:16.000000000 +0100
|
|
+++ head-2011-03-11/drivers/xen/char/mem.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -18,7 +18,6 @@
|
|
#include <linux/raw.h>
|
|
#include <linux/tty.h>
|
|
#include <linux/capability.h>
|
|
-#include <linux/smp_lock.h>
|
|
#include <linux/ptrace.h>
|
|
#include <linux/device.h>
|
|
#include <asm/pgalloc.h>
|
|
--- head-2011-03-11.orig/drivers/xen/core/hypervisor_sysfs.c 2007-07-10 09:42:30.000000000 +0200
|
|
+++ head-2011-03-11/drivers/xen/core/hypervisor_sysfs.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -50,7 +50,7 @@ static int __init hypervisor_subsys_init
|
|
if (!is_running_on_xen())
|
|
return -ENODEV;
|
|
|
|
- hypervisor_subsys.kset.kobj.ktype = &hyp_sysfs_kobj_type;
|
|
+ hypervisor_subsys.kobj.ktype = &hyp_sysfs_kobj_type;
|
|
return 0;
|
|
}
|
|
|
|
--- head-2011-03-11.orig/drivers/xen/core/machine_reboot.c 2011-01-24 12:06:05.000000000 +0100
|
|
+++ head-2011-03-11/drivers/xen/core/machine_reboot.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -80,6 +80,8 @@ static void post_suspend(int suspend_can
|
|
#ifdef CONFIG_SMP
|
|
cpu_initialized_map = cpu_online_map;
|
|
#endif
|
|
+ for_each_possible_cpu(i)
|
|
+ setup_runstate_area(i);
|
|
}
|
|
|
|
shinfo_mfn = xen_start_info->shared_info >> PAGE_SHIFT;
|
|
--- head-2011-03-11.orig/drivers/xen/core/smpboot.c 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/drivers/xen/core/smpboot.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -158,13 +158,12 @@ static void xen_smp_intr_exit(unsigned i
|
|
|
|
void __cpuinit cpu_bringup(void)
|
|
{
|
|
+ cpu_init();
|
|
#ifdef __i386__
|
|
- cpu_set_gdt(current_thread_info()->cpu);
|
|
- secondary_cpu_init();
|
|
+ identify_secondary_cpu(cpu_data + smp_processor_id());
|
|
#else
|
|
- cpu_init();
|
|
-#endif
|
|
identify_cpu(cpu_data + smp_processor_id());
|
|
+#endif
|
|
touch_softlockup_watchdog();
|
|
preempt_disable();
|
|
local_irq_enable();
|
|
@@ -184,11 +183,6 @@ static void __cpuinit cpu_initialize_con
|
|
static DEFINE_SPINLOCK(ctxt_lock);
|
|
|
|
struct task_struct *idle = idle_task(cpu);
|
|
-#ifdef __x86_64__
|
|
- struct desc_ptr *gdt_descr = &cpu_gdt_descr[cpu];
|
|
-#else
|
|
- struct Xgt_desc_struct *gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
|
|
-#endif
|
|
|
|
if (cpu_test_and_set(cpu, cpu_initialized_map))
|
|
return;
|
|
@@ -211,11 +205,11 @@ static void __cpuinit cpu_initialize_con
|
|
smp_trap_init(ctxt.trap_ctxt);
|
|
|
|
ctxt.ldt_ents = 0;
|
|
-
|
|
- ctxt.gdt_frames[0] = virt_to_mfn(gdt_descr->address);
|
|
- ctxt.gdt_ents = gdt_descr->size / 8;
|
|
+ ctxt.gdt_ents = GDT_SIZE / 8;
|
|
|
|
#ifdef __i386__
|
|
+ ctxt.gdt_frames[0] = virt_to_mfn(get_cpu_gdt_table(cpu));
|
|
+
|
|
ctxt.user_regs.cs = __KERNEL_CS;
|
|
ctxt.user_regs.esp = idle->thread.esp0 - sizeof(struct pt_regs);
|
|
|
|
@@ -228,7 +222,11 @@ static void __cpuinit cpu_initialize_con
|
|
ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
|
|
|
|
ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
|
|
+
|
|
+ ctxt.user_regs.fs = __KERNEL_PERCPU;
|
|
#else /* __x86_64__ */
|
|
+ ctxt.gdt_frames[0] = virt_to_mfn(cpu_gdt_descr[cpu].address);
|
|
+
|
|
ctxt.user_regs.cs = __KERNEL_CS;
|
|
ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs);
|
|
|
|
@@ -258,9 +256,8 @@ void __init smp_prepare_cpus(unsigned in
|
|
struct vcpu_get_physid cpu_id;
|
|
#ifdef __x86_64__
|
|
struct desc_ptr *gdt_descr;
|
|
-#else
|
|
- struct Xgt_desc_struct *gdt_descr;
|
|
#endif
|
|
+ void *gdt_addr;
|
|
|
|
apicid = 0;
|
|
if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, 0, &cpu_id) == 0)
|
|
@@ -309,14 +306,12 @@ void __init smp_prepare_cpus(unsigned in
|
|
}
|
|
gdt_descr->size = GDT_SIZE;
|
|
memcpy((void *)gdt_descr->address, cpu_gdt_table, GDT_SIZE);
|
|
+ gdt_addr = (void *)gdt_descr->address;
|
|
#else
|
|
- if (unlikely(!init_gdt(cpu, idle)))
|
|
- continue;
|
|
- gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
|
|
+ init_gdt(cpu);
|
|
+ gdt_addr = get_cpu_gdt_table(cpu);
|
|
#endif
|
|
- make_page_readonly(
|
|
- (void *)gdt_descr->address,
|
|
- XENFEAT_writable_descriptor_tables);
|
|
+ make_page_readonly(gdt_addr, XENFEAT_writable_descriptor_tables);
|
|
|
|
apicid = cpu;
|
|
if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0)
|
|
@@ -330,6 +325,8 @@ void __init smp_prepare_cpus(unsigned in
|
|
cpu_pda(cpu)->pcurrent = idle;
|
|
cpu_pda(cpu)->cpunumber = cpu;
|
|
clear_tsk_thread_flag(idle, TIF_FORK);
|
|
+#else
|
|
+ per_cpu(current_task, cpu) = idle;
|
|
#endif
|
|
|
|
irq_ctx_init(cpu);
|
|
@@ -354,8 +351,12 @@ void __init smp_prepare_cpus(unsigned in
|
|
#endif
|
|
}
|
|
|
|
-void __devinit smp_prepare_boot_cpu(void)
|
|
+void __init smp_prepare_boot_cpu(void)
|
|
{
|
|
+#ifdef __i386__
|
|
+ init_gdt(smp_processor_id());
|
|
+ switch_to_new_gdt();
|
|
+#endif
|
|
prefill_possible_map();
|
|
}
|
|
|
|
--- head-2011-03-11.orig/drivers/xen/core/xen_sysfs.c 2009-05-29 10:25:53.000000000 +0200
|
|
+++ head-2011-03-11/drivers/xen/core/xen_sysfs.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -30,12 +30,12 @@ HYPERVISOR_ATTR_RO(type);
|
|
|
|
static int __init xen_sysfs_type_init(void)
|
|
{
|
|
- return sysfs_create_file(&hypervisor_subsys.kset.kobj, &type_attr.attr);
|
|
+ return sysfs_create_file(&hypervisor_subsys.kobj, &type_attr.attr);
|
|
}
|
|
|
|
static void xen_sysfs_type_destroy(void)
|
|
{
|
|
- sysfs_remove_file(&hypervisor_subsys.kset.kobj, &type_attr.attr);
|
|
+ sysfs_remove_file(&hypervisor_subsys.kobj, &type_attr.attr);
|
|
}
|
|
|
|
/* xen version attributes */
|
|
@@ -91,13 +91,13 @@ static struct attribute_group version_gr
|
|
|
|
static int __init xen_sysfs_version_init(void)
|
|
{
|
|
- return sysfs_create_group(&hypervisor_subsys.kset.kobj,
|
|
+ return sysfs_create_group(&hypervisor_subsys.kobj,
|
|
&version_group);
|
|
}
|
|
|
|
static void xen_sysfs_version_destroy(void)
|
|
{
|
|
- sysfs_remove_group(&hypervisor_subsys.kset.kobj, &version_group);
|
|
+ sysfs_remove_group(&hypervisor_subsys.kobj, &version_group);
|
|
}
|
|
|
|
/* UUID */
|
|
@@ -126,12 +126,12 @@ HYPERVISOR_ATTR_RO(uuid);
|
|
|
|
static int __init xen_sysfs_uuid_init(void)
|
|
{
|
|
- return sysfs_create_file(&hypervisor_subsys.kset.kobj, &uuid_attr.attr);
|
|
+ return sysfs_create_file(&hypervisor_subsys.kobj, &uuid_attr.attr);
|
|
}
|
|
|
|
static void xen_sysfs_uuid_destroy(void)
|
|
{
|
|
- sysfs_remove_file(&hypervisor_subsys.kset.kobj, &uuid_attr.attr);
|
|
+ sysfs_remove_file(&hypervisor_subsys.kobj, &uuid_attr.attr);
|
|
}
|
|
|
|
/* xen compilation attributes */
|
|
@@ -204,13 +204,13 @@ static struct attribute_group xen_compil
|
|
|
|
int __init static xen_compilation_init(void)
|
|
{
|
|
- return sysfs_create_group(&hypervisor_subsys.kset.kobj,
|
|
+ return sysfs_create_group(&hypervisor_subsys.kobj,
|
|
&xen_compilation_group);
|
|
}
|
|
|
|
static void xen_compilation_destroy(void)
|
|
{
|
|
- sysfs_remove_group(&hypervisor_subsys.kset.kobj,
|
|
+ sysfs_remove_group(&hypervisor_subsys.kobj,
|
|
&xen_compilation_group);
|
|
}
|
|
|
|
@@ -325,13 +325,13 @@ static struct attribute_group xen_proper
|
|
|
|
static int __init xen_properties_init(void)
|
|
{
|
|
- return sysfs_create_group(&hypervisor_subsys.kset.kobj,
|
|
+ return sysfs_create_group(&hypervisor_subsys.kobj,
|
|
&xen_properties_group);
|
|
}
|
|
|
|
static void xen_properties_destroy(void)
|
|
{
|
|
- sysfs_remove_group(&hypervisor_subsys.kset.kobj,
|
|
+ sysfs_remove_group(&hypervisor_subsys.kobj,
|
|
&xen_properties_group);
|
|
}
|
|
|
|
@@ -350,13 +350,13 @@ HYPERVISOR_ATTR_RO(vmcoreinfo);
|
|
|
|
static int __init xen_sysfs_vmcoreinfo_init(void)
|
|
{
|
|
- return sysfs_create_file(&hypervisor_subsys.kset.kobj,
|
|
+ return sysfs_create_file(&hypervisor_subsys.kobj,
|
|
&vmcoreinfo_attr.attr);
|
|
}
|
|
|
|
static void xen_sysfs_vmcoreinfo_destroy(void)
|
|
{
|
|
- sysfs_remove_file(&hypervisor_subsys.kset.kobj, &vmcoreinfo_attr.attr);
|
|
+ sysfs_remove_file(&hypervisor_subsys.kobj, &vmcoreinfo_attr.attr);
|
|
}
|
|
|
|
#endif
|
|
--- head-2011-03-11.orig/drivers/xen/netback/common.h 2011-02-17 10:07:22.000000000 +0100
|
|
+++ head-2011-03-11/drivers/xen/netback/common.h 2011-02-17 10:09:57.000000000 +0100
|
|
@@ -107,7 +107,6 @@ typedef struct netif_st {
|
|
struct list_head list; /* scheduling list */
|
|
atomic_t refcnt;
|
|
struct net_device *dev;
|
|
- struct net_device_stats stats;
|
|
|
|
unsigned int carrier;
|
|
|
|
@@ -209,7 +208,6 @@ void netif_schedule_work(netif_t *netif)
|
|
void netif_deschedule_work(netif_t *netif);
|
|
|
|
int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev);
|
|
-struct net_device_stats *netif_be_get_stats(struct net_device *dev);
|
|
irqreturn_t netif_be_int(int irq, void *dev_id);
|
|
|
|
static inline int netbk_can_queue(struct net_device *dev)
|
|
--- head-2011-03-11.orig/drivers/xen/netback/interface.c 2011-02-17 10:08:03.000000000 +0100
|
|
+++ head-2011-03-11/drivers/xen/netback/interface.c 2011-02-17 10:10:00.000000000 +0100
|
|
@@ -255,7 +255,6 @@ netif_t *netif_alloc(struct device *pare
|
|
init_timer(&netif->tx_queue_timeout);
|
|
|
|
dev->hard_start_xmit = netif_be_start_xmit;
|
|
- dev->get_stats = netif_be_get_stats;
|
|
dev->open = net_open;
|
|
dev->stop = net_close;
|
|
dev->change_mtu = netbk_change_mtu;
|
|
--- head-2011-03-11.orig/drivers/xen/netback/loopback.c 2011-01-31 17:32:16.000000000 +0100
|
|
+++ head-2011-03-11/drivers/xen/netback/loopback.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -62,14 +62,17 @@ MODULE_PARM_DESC(nloopbacks, "Number of
|
|
|
|
struct net_private {
|
|
struct net_device *loopback_dev;
|
|
- struct net_device_stats stats;
|
|
int loop_idx;
|
|
};
|
|
|
|
+static inline struct net_private *loopback_priv(struct net_device *dev)
|
|
+{
|
|
+ return netdev_priv(dev);
|
|
+}
|
|
+
|
|
static int loopback_open(struct net_device *dev)
|
|
{
|
|
- struct net_private *np = netdev_priv(dev);
|
|
- memset(&np->stats, 0, sizeof(np->stats));
|
|
+ memset(&dev->stats, 0, sizeof(dev->stats));
|
|
netif_start_queue(dev);
|
|
return 0;
|
|
}
|
|
@@ -130,10 +133,8 @@ static int skb_remove_foreign_references
|
|
|
|
static int loopback_start_xmit(struct sk_buff *skb, struct net_device *dev)
|
|
{
|
|
- struct net_private *np = netdev_priv(dev);
|
|
-
|
|
if (!skb_remove_foreign_references(skb)) {
|
|
- np->stats.tx_dropped++;
|
|
+ dev->stats.tx_dropped++;
|
|
dev_kfree_skb(skb);
|
|
return 0;
|
|
}
|
|
@@ -143,19 +144,17 @@ static int loopback_start_xmit(struct sk
|
|
|
|
skb_orphan(skb);
|
|
|
|
- np->stats.tx_bytes += skb->len;
|
|
- np->stats.tx_packets++;
|
|
+ dev->stats.tx_bytes += skb->len;
|
|
+ dev->stats.tx_packets++;
|
|
|
|
/* Switch to loopback context. */
|
|
- dev = np->loopback_dev;
|
|
- np = netdev_priv(dev);
|
|
+ dev = loopback_priv(dev)->loopback_dev;
|
|
|
|
- np->stats.rx_bytes += skb->len;
|
|
- np->stats.rx_packets++;
|
|
+ dev->stats.rx_bytes += skb->len;
|
|
+ dev->stats.rx_packets++;
|
|
|
|
skb->pkt_type = PACKET_HOST; /* overridden by eth_type_trans() */
|
|
skb->protocol = eth_type_trans(skb, dev);
|
|
- skb->dev = dev;
|
|
dev->last_rx = jiffies;
|
|
|
|
/* Flush netfilter context: rx'ed skbuffs not expected to have any. */
|
|
@@ -167,17 +166,11 @@ static int loopback_start_xmit(struct sk
|
|
return 0;
|
|
}
|
|
|
|
-static struct net_device_stats *loopback_get_stats(struct net_device *dev)
|
|
-{
|
|
- struct net_private *np = netdev_priv(dev);
|
|
- return &np->stats;
|
|
-}
|
|
-
|
|
static void get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
|
|
{
|
|
strcpy(info->driver, "netloop");
|
|
snprintf(info->bus_info, ETHTOOL_BUSINFO_LEN, "vif-0-%d",
|
|
- ((struct net_private *)netdev_priv(dev))->loop_idx);
|
|
+ loopback_priv(dev)->loop_idx);
|
|
}
|
|
|
|
static struct ethtool_ops network_ethtool_ops =
|
|
@@ -204,7 +197,7 @@ static void loopback_set_multicast_list(
|
|
static void loopback_construct(struct net_device *dev, struct net_device *lo,
|
|
int loop_idx)
|
|
{
|
|
- struct net_private *np = netdev_priv(dev);
|
|
+ struct net_private *np = loopback_priv(dev);
|
|
|
|
np->loopback_dev = lo;
|
|
np->loop_idx = loop_idx;
|
|
@@ -212,7 +205,6 @@ static void loopback_construct(struct ne
|
|
dev->open = loopback_open;
|
|
dev->stop = loopback_close;
|
|
dev->hard_start_xmit = loopback_start_xmit;
|
|
- dev->get_stats = loopback_get_stats;
|
|
dev->set_multicast_list = loopback_set_multicast_list;
|
|
dev->change_mtu = NULL; /* allow arbitrary mtu */
|
|
|
|
--- head-2011-03-11.orig/drivers/xen/netback/netback.c 2011-02-09 15:35:10.000000000 +0100
|
|
+++ head-2011-03-11/drivers/xen/netback/netback.c 2011-02-09 15:54:33.000000000 +0100
|
|
@@ -208,7 +208,7 @@ static struct sk_buff *netbk_copy_skb(st
|
|
goto err;
|
|
|
|
skb_reserve(nskb, 16 + NET_IP_ALIGN);
|
|
- headlen = nskb->end - nskb->data;
|
|
+ headlen = skb_end_pointer(nskb) - nskb->data;
|
|
if (headlen > skb_headlen(skb))
|
|
headlen = skb_headlen(skb);
|
|
ret = skb_copy_bits(skb, 0, __skb_put(nskb, headlen), headlen);
|
|
@@ -254,11 +254,15 @@ static struct sk_buff *netbk_copy_skb(st
|
|
len -= copy;
|
|
}
|
|
|
|
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
|
|
+ offset = 0;
|
|
+#else
|
|
offset = nskb->data - skb->data;
|
|
+#endif
|
|
|
|
- nskb->h.raw = skb->h.raw + offset;
|
|
- nskb->nh.raw = skb->nh.raw + offset;
|
|
- nskb->mac.raw = skb->mac.raw + offset;
|
|
+ nskb->transport_header = skb->transport_header + offset;
|
|
+ nskb->network_header = skb->network_header + offset;
|
|
+ nskb->mac_header = skb->mac_header + offset;
|
|
|
|
return nskb;
|
|
|
|
@@ -348,7 +352,7 @@ int netif_be_start_xmit(struct sk_buff *
|
|
return 0;
|
|
|
|
drop:
|
|
- netif->stats.tx_dropped++;
|
|
+ dev->stats.tx_dropped++;
|
|
dev_kfree_skb(skb);
|
|
return 0;
|
|
}
|
|
@@ -700,8 +704,8 @@ static void net_rx_action(unsigned long
|
|
netbk_free_pages(nr_frags, meta + npo.meta_cons + 1);
|
|
}
|
|
|
|
- netif->stats.tx_bytes += skb->len;
|
|
- netif->stats.tx_packets++;
|
|
+ skb->dev->stats.tx_bytes += skb->len;
|
|
+ skb->dev->stats.tx_packets++;
|
|
|
|
id = meta[npo.meta_cons].id;
|
|
flags = nr_frags ? NETRXF_more_data : 0;
|
|
@@ -786,12 +790,6 @@ static void netbk_tx_pending_timeout(uns
|
|
tasklet_schedule(&net_tx_tasklet);
|
|
}
|
|
|
|
-struct net_device_stats *netif_be_get_stats(struct net_device *dev)
|
|
-{
|
|
- netif_t *netif = netdev_priv(dev);
|
|
- return &netif->stats;
|
|
-}
|
|
-
|
|
static int __on_net_schedule_list(netif_t *netif)
|
|
{
|
|
return netif->list.next != NULL;
|
|
@@ -1428,10 +1426,12 @@ static void net_tx_action(unsigned long
|
|
|
|
mop = tx_map_ops;
|
|
while ((skb = __skb_dequeue(&tx_queue)) != NULL) {
|
|
+ struct net_device *dev;
|
|
netif_tx_request_t *txp;
|
|
|
|
pending_idx = *((u16 *)skb->data);
|
|
netif = pending_tx_info[pending_idx].netif;
|
|
+ dev = netif->dev;
|
|
txp = &pending_tx_info[pending_idx].req;
|
|
|
|
/* Check the remap error code. */
|
|
@@ -1439,6 +1439,7 @@ static void net_tx_action(unsigned long
|
|
DPRINTK("netback grant failed.\n");
|
|
skb_shinfo(skb)->nr_frags = 0;
|
|
kfree_skb(skb);
|
|
+ dev->stats.rx_dropped++;
|
|
continue;
|
|
}
|
|
|
|
@@ -1474,8 +1475,7 @@ static void net_tx_action(unsigned long
|
|
__pskb_pull_tail(skb, target - skb_headlen(skb));
|
|
}
|
|
|
|
- skb->dev = netif->dev;
|
|
- skb->protocol = eth_type_trans(skb, skb->dev);
|
|
+ skb->protocol = eth_type_trans(skb, dev);
|
|
|
|
if (skb_checksum_setup(skb, &netif->rx_gso_csum_fixups)) {
|
|
DPRINTK("Can't setup checksum in net_tx_action\n");
|
|
@@ -1483,18 +1483,19 @@ static void net_tx_action(unsigned long
|
|
continue;
|
|
}
|
|
|
|
- netif->stats.rx_bytes += skb->len;
|
|
- netif->stats.rx_packets++;
|
|
-
|
|
if (unlikely(netbk_copy_skb_mode == NETBK_ALWAYS_COPY_SKB) &&
|
|
unlikely(skb_linearize(skb))) {
|
|
DPRINTK("Can't linearize skb in net_tx_action.\n");
|
|
kfree_skb(skb);
|
|
+ dev->stats.rx_errors++;
|
|
continue;
|
|
}
|
|
|
|
+ dev->stats.rx_bytes += skb->len;
|
|
+ dev->stats.rx_packets++;
|
|
+
|
|
netif_rx(skb);
|
|
- netif->dev->last_rx = jiffies;
|
|
+ dev->last_rx = jiffies;
|
|
}
|
|
|
|
out:
|
|
@@ -1691,7 +1692,7 @@ static int __init netback_init(void)
|
|
(void)bind_virq_to_irqhandler(VIRQ_DEBUG,
|
|
0,
|
|
netif_be_dbg,
|
|
- SA_SHIRQ,
|
|
+ IRQF_SHARED,
|
|
"net-be-dbg",
|
|
&netif_be_dbg);
|
|
#endif
|
|
--- head-2011-03-11.orig/drivers/xen/netback/xenbus.c 2011-02-17 10:08:09.000000000 +0100
|
|
+++ head-2011-03-11/drivers/xen/netback/xenbus.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -19,6 +19,7 @@
|
|
|
|
#include <stdarg.h>
|
|
#include <linux/module.h>
|
|
+#include <linux/rwsem.h>
|
|
#include <xen/xenbus.h>
|
|
#include "common.h"
|
|
|
|
@@ -28,11 +29,12 @@
|
|
printk("netback/xenbus (%s:%d) " fmt ".\n", __FUNCTION__, __LINE__, ##args)
|
|
#endif
|
|
|
|
+static DECLARE_RWSEM(teardown_sem);
|
|
|
|
static int connect_rings(struct backend_info *);
|
|
static void connect(struct backend_info *);
|
|
static void backend_create_netif(struct backend_info *be);
|
|
-static void netback_disconnect(struct device *);
|
|
+static void netback_disconnect(struct device *, int);
|
|
|
|
static int netback_remove(struct xenbus_device *dev)
|
|
{
|
|
@@ -40,21 +42,26 @@ static int netback_remove(struct xenbus_
|
|
|
|
netback_remove_accelerators(be, dev);
|
|
|
|
- netback_disconnect(&dev->dev);
|
|
+ netback_disconnect(&dev->dev, 1);
|
|
kfree(be);
|
|
- dev->dev.driver_data = NULL;
|
|
return 0;
|
|
}
|
|
|
|
-static void netback_disconnect(struct device *xbdev_dev)
|
|
+static void netback_disconnect(struct device *xbdev_dev, int clear)
|
|
{
|
|
struct backend_info *be = xbdev_dev->driver_data;
|
|
|
|
- if (be->netif) {
|
|
+ if (be->netif)
|
|
kobject_uevent(&xbdev_dev->kobj, KOBJ_OFFLINE);
|
|
+
|
|
+ down_write(&teardown_sem);
|
|
+ if (be->netif) {
|
|
netif_disconnect(be->netif);
|
|
be->netif = NULL;
|
|
}
|
|
+ if (clear)
|
|
+ xbdev_dev->driver_data = NULL;
|
|
+ up_write(&teardown_sem);
|
|
}
|
|
|
|
/**
|
|
@@ -159,8 +166,7 @@ fail:
|
|
static int netback_uevent(struct xenbus_device *xdev, char **envp,
|
|
int num_envp, char *buffer, int buffer_size)
|
|
{
|
|
- struct backend_info *be = xdev->dev.driver_data;
|
|
- netif_t *netif = be->netif;
|
|
+ struct backend_info *be;
|
|
int i = 0, length = 0;
|
|
char *val;
|
|
|
|
@@ -178,8 +184,12 @@ static int netback_uevent(struct xenbus_
|
|
kfree(val);
|
|
}
|
|
|
|
- add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
|
|
- "vif=%s", netif->dev->name);
|
|
+ down_read(&teardown_sem);
|
|
+ be = xdev->dev.driver_data;
|
|
+ if (be && be->netif)
|
|
+ add_uevent_var(envp, num_envp, &i, buffer, buffer_size,
|
|
+ &length, "vif=%s", be->netif->dev->name);
|
|
+ up_read(&teardown_sem);
|
|
|
|
envp[i] = NULL;
|
|
|
|
@@ -192,6 +202,7 @@ static void backend_create_netif(struct
|
|
int err;
|
|
long handle;
|
|
struct xenbus_device *dev = be->dev;
|
|
+ netif_t *netif;
|
|
|
|
if (be->netif != NULL)
|
|
return;
|
|
@@ -202,13 +213,13 @@ static void backend_create_netif(struct
|
|
return;
|
|
}
|
|
|
|
- be->netif = netif_alloc(&dev->dev, dev->otherend_id, handle);
|
|
- if (IS_ERR(be->netif)) {
|
|
- err = PTR_ERR(be->netif);
|
|
- be->netif = NULL;
|
|
+ netif = netif_alloc(&dev->dev, dev->otherend_id, handle);
|
|
+ if (IS_ERR(netif)) {
|
|
+ err = PTR_ERR(netif);
|
|
xenbus_dev_fatal(dev, err, "creating interface");
|
|
return;
|
|
}
|
|
+ be->netif = netif;
|
|
|
|
kobject_uevent(&dev->dev.kobj, KOBJ_ONLINE);
|
|
}
|
|
@@ -249,7 +260,7 @@ static void frontend_changed(struct xenb
|
|
break;
|
|
|
|
case XenbusStateClosing:
|
|
- netback_disconnect(&dev->dev);
|
|
+ netback_disconnect(&dev->dev, 0);
|
|
xenbus_switch_state(dev, XenbusStateClosing);
|
|
break;
|
|
|
|
--- head-2011-03-11.orig/drivers/xen/netfront/accel.c 2009-05-04 10:01:03.000000000 +0200
|
|
+++ head-2011-03-11/drivers/xen/netfront/accel.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -548,7 +548,7 @@ static void accelerator_remove_hooks(str
|
|
|
|
/* Last chance to get statistics from the accelerator */
|
|
vif_state->hooks->get_stats(vif_state->np->netdev,
|
|
- &vif_state->np->stats);
|
|
+ &vif_state->np->netdev->stats);
|
|
|
|
spin_unlock_irqrestore(&accelerator->vif_states_lock,
|
|
flags);
|
|
@@ -604,7 +604,8 @@ static int do_remove(struct netfront_inf
|
|
spin_lock_irqsave(&accelerator->vif_states_lock, flags);
|
|
|
|
/* Last chance to get statistics from the accelerator */
|
|
- np->accel_vif_state.hooks->get_stats(np->netdev, &np->stats);
|
|
+ np->accel_vif_state.hooks->get_stats(np->netdev,
|
|
+ &np->netdev->stats);
|
|
|
|
spin_unlock_irqrestore(&accelerator->vif_states_lock,
|
|
flags);
|
|
@@ -804,9 +805,9 @@ void netfront_accelerator_call_stop_napi
|
|
/*
|
|
* No lock pre-requisites. Takes the vif_states_lock spinlock
|
|
*/
|
|
-int netfront_accelerator_call_get_stats(struct netfront_info *np,
|
|
- struct net_device *dev)
|
|
+int netfront_accelerator_call_get_stats(struct net_device *dev)
|
|
{
|
|
+ struct netfront_info *np = netdev_priv(dev);
|
|
struct netfront_accelerator *accelerator;
|
|
unsigned long flags;
|
|
int rc = 0;
|
|
@@ -819,7 +820,7 @@ int netfront_accelerator_call_get_stats(
|
|
if (np->accel_vif_state.hooks &&
|
|
np->accelerator == accelerator)
|
|
rc = np->accel_vif_state.hooks->get_stats(dev,
|
|
- &np->stats);
|
|
+ &dev->stats);
|
|
spin_unlock_irqrestore(&accelerator->vif_states_lock, flags);
|
|
}
|
|
return rc;
|
|
--- head-2011-03-11.orig/drivers/xen/netfront/netfront.c 2011-02-09 15:38:24.000000000 +0100
|
|
+++ head-2011-03-11/drivers/xen/netfront/netfront.c 2011-02-09 15:54:17.000000000 +0100
|
|
@@ -514,7 +514,7 @@ static int setup_device(struct xenbus_de
|
|
memcpy(netdev->dev_addr, info->mac, ETH_ALEN);
|
|
|
|
err = bind_listening_port_to_irqhandler(
|
|
- dev->otherend_id, netif_int, SA_SAMPLE_RANDOM, netdev->name,
|
|
+ dev->otherend_id, netif_int, IRQF_SAMPLE_RANDOM, netdev->name,
|
|
netdev);
|
|
if (err < 0)
|
|
goto fail;
|
|
@@ -626,8 +626,6 @@ static int network_open(struct net_devic
|
|
{
|
|
struct netfront_info *np = netdev_priv(dev);
|
|
|
|
- memset(&np->stats, 0, sizeof(np->stats));
|
|
-
|
|
spin_lock_bh(&np->rx_lock);
|
|
if (netfront_carrier_ok(np)) {
|
|
network_alloc_rx_buffers(dev);
|
|
@@ -1028,8 +1026,8 @@ static int network_start_xmit(struct sk_
|
|
if (notify)
|
|
notify_remote_via_irq(np->irq);
|
|
|
|
- np->stats.tx_bytes += skb->len;
|
|
- np->stats.tx_packets++;
|
|
+ dev->stats.tx_bytes += skb->len;
|
|
+ dev->stats.tx_packets++;
|
|
dev->trans_start = jiffies;
|
|
|
|
/* Note: It is not safe to access skb after network_tx_buf_gc()! */
|
|
@@ -1043,7 +1041,7 @@ static int network_start_xmit(struct sk_
|
|
return 0;
|
|
|
|
drop:
|
|
- np->stats.tx_dropped++;
|
|
+ dev->stats.tx_dropped++;
|
|
dev_kfree_skb(skb);
|
|
return 0;
|
|
}
|
|
@@ -1362,7 +1360,7 @@ static int netif_poll(struct net_device
|
|
err:
|
|
while ((skb = __skb_dequeue(&tmpq)))
|
|
__skb_queue_tail(&errq, skb);
|
|
- np->stats.rx_errors++;
|
|
+ dev->stats.rx_errors++;
|
|
i = np->rx.rsp_cons;
|
|
continue;
|
|
}
|
|
@@ -1430,8 +1428,8 @@ err:
|
|
else
|
|
skb->ip_summed = CHECKSUM_NONE;
|
|
|
|
- np->stats.rx_packets++;
|
|
- np->stats.rx_bytes += skb->len;
|
|
+ dev->stats.rx_packets++;
|
|
+ dev->stats.rx_bytes += skb->len;
|
|
|
|
__skb_queue_tail(&rxq, skb);
|
|
|
|
@@ -1686,10 +1684,8 @@ static int network_close(struct net_devi
|
|
|
|
static struct net_device_stats *network_get_stats(struct net_device *dev)
|
|
{
|
|
- struct netfront_info *np = netdev_priv(dev);
|
|
-
|
|
- netfront_accelerator_call_get_stats(np, dev);
|
|
- return &np->stats;
|
|
+ netfront_accelerator_call_get_stats(dev);
|
|
+ return &dev->stats;
|
|
}
|
|
|
|
static int xennet_set_mac_address(struct net_device *dev, void *p)
|
|
--- head-2011-03-11.orig/drivers/xen/netfront/netfront.h 2011-02-09 15:35:17.000000000 +0100
|
|
+++ head-2011-03-11/drivers/xen/netfront/netfront.h 2011-02-09 15:54:19.000000000 +0100
|
|
@@ -149,9 +149,6 @@ struct netfront_info {
|
|
struct list_head list;
|
|
struct net_device *netdev;
|
|
|
|
- struct net_device_stats stats;
|
|
- unsigned long rx_gso_csum_fixups;
|
|
-
|
|
struct netif_tx_front_ring tx;
|
|
struct netif_rx_front_ring rx;
|
|
|
|
@@ -193,6 +190,9 @@ struct netfront_info {
|
|
struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1];
|
|
struct mmu_update rx_mmu[NET_RX_RING_SIZE];
|
|
|
|
+ /* Statistics */
|
|
+ unsigned long rx_gso_csum_fixups;
|
|
+
|
|
/* Private pointer to state internal to accelerator module */
|
|
void *accel_priv;
|
|
/* The accelerator used by this netfront device */
|
|
@@ -259,8 +259,7 @@ extern
|
|
void netfront_accelerator_call_stop_napi_irq(struct netfront_info *np,
|
|
struct net_device *dev);
|
|
extern
|
|
-int netfront_accelerator_call_get_stats(struct netfront_info *np,
|
|
- struct net_device *dev);
|
|
+int netfront_accelerator_call_get_stats(struct net_device *dev);
|
|
extern
|
|
void netfront_accelerator_add_watch(struct netfront_info *np);
|
|
|
|
--- head-2011-03-11.orig/drivers/xen/pciback/xenbus.c 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/drivers/xen/pciback/xenbus.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -100,7 +100,7 @@ static int pciback_do_attach(struct pcib
|
|
|
|
err = bind_interdomain_evtchn_to_irqhandler(
|
|
pdev->xdev->otherend_id, remote_evtchn, pciback_handle_event,
|
|
- SA_SAMPLE_RANDOM, "pciback", pdev);
|
|
+ IRQF_SAMPLE_RANDOM, "pciback", pdev);
|
|
if (err < 0) {
|
|
xenbus_dev_fatal(pdev->xdev, err,
|
|
"Error binding event channel to IRQ");
|
|
--- head-2011-03-11.orig/drivers/xen/pcifront/xenbus.c 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/drivers/xen/pcifront/xenbus.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -11,10 +11,6 @@
|
|
#include <xen/gnttab.h>
|
|
#include "pcifront.h"
|
|
|
|
-#ifndef __init_refok
|
|
-#define __init_refok
|
|
-#endif
|
|
-
|
|
#define INVALID_GRANT_REF (0)
|
|
#define INVALID_EVTCHN (-1)
|
|
|
|
@@ -101,7 +97,7 @@ static int pcifront_publish_info(struct
|
|
|
|
err = bind_caller_port_to_irqhandler(pdev->evtchn,
|
|
pcifront_handler_aer,
|
|
- SA_SAMPLE_RANDOM,
|
|
+ IRQF_SAMPLE_RANDOM,
|
|
"pcifront", pdev);
|
|
if (err < 0) {
|
|
xenbus_dev_fatal(pdev->xdev, err,
|
|
--- head-2011-03-11.orig/drivers/xen/scsifront/xenbus.c 2011-02-08 10:03:34.000000000 +0100
|
|
+++ head-2011-03-11/drivers/xen/scsifront/xenbus.c 2011-02-08 10:03:46.000000000 +0100
|
|
@@ -100,7 +100,7 @@ static int scsifront_alloc_ring(struct v
|
|
|
|
err = bind_listening_port_to_irqhandler(
|
|
dev->otherend_id, scsifront_intr,
|
|
- SA_SAMPLE_RANDOM, "scsifront", info);
|
|
+ IRQF_SAMPLE_RANDOM, "scsifront", info);
|
|
|
|
if (err <= 0) {
|
|
xenbus_dev_fatal(dev, err, "bind_listening_port_to_irqhandler");
|
|
--- head-2011-03-11.orig/drivers/xen/sfc_netback/accel_fwd.c 2008-04-02 12:34:02.000000000 +0200
|
|
+++ head-2011-03-11/drivers/xen/sfc_netback/accel_fwd.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -308,7 +308,7 @@ static struct netback_accel *for_a_vnic(
|
|
static inline int packet_is_arp_reply(struct sk_buff *skb)
|
|
{
|
|
return skb->protocol == ntohs(ETH_P_ARP)
|
|
- && skb->nh.arph->ar_op == ntohs(ARPOP_REPLY);
|
|
+ && arp_hdr(skb)->ar_op == ntohs(ARPOP_REPLY);
|
|
}
|
|
|
|
|
|
@@ -392,12 +392,13 @@ void netback_accel_tx_packet(struct sk_b
|
|
|
|
BUG_ON(fwd_priv == NULL);
|
|
|
|
- if (is_broadcast_ether_addr(skb->mac.raw) && packet_is_arp_reply(skb)) {
|
|
+ if (is_broadcast_ether_addr(skb_mac_header(skb))
|
|
+ && packet_is_arp_reply(skb)) {
|
|
/*
|
|
* update our fast path forwarding to reflect this
|
|
* gratuitous ARP
|
|
*/
|
|
- mac = skb->mac.raw+ETH_ALEN;
|
|
+ mac = skb_mac_header(skb)+ETH_ALEN;
|
|
|
|
DPRINTK("%s: found gratuitous ARP for " MAC_FMT "\n",
|
|
__FUNCTION__, MAC_ARG(mac));
|
|
--- head-2011-03-11.orig/drivers/xen/sfc_netback/accel_solarflare.c 2010-01-18 15:23:12.000000000 +0100
|
|
+++ head-2011-03-11/drivers/xen/sfc_netback/accel_solarflare.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -113,7 +113,7 @@ bend_dl_tx_packet(struct efx_dl_device *
|
|
BUG_ON(port == NULL);
|
|
|
|
NETBACK_ACCEL_STATS_OP(global_stats.dl_tx_packets++);
|
|
- if (skb->mac.raw != NULL)
|
|
+ if (skb_mac_header_was_set(skb))
|
|
netback_accel_tx_packet(skb, port->fwd_priv);
|
|
else {
|
|
DPRINTK("Ignoring packet with missing mac address\n");
|
|
--- head-2011-03-11.orig/drivers/xen/sfc_netfront/accel_tso.c 2011-01-31 17:29:16.000000000 +0100
|
|
+++ head-2011-03-11/drivers/xen/sfc_netfront/accel_tso.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -33,10 +33,9 @@
|
|
|
|
#include "accel_tso.h"
|
|
|
|
-#define PTR_DIFF(p1, p2) ((u8*)(p1) - (u8*)(p2))
|
|
-#define ETH_HDR_LEN(skb) ((skb)->nh.raw - (skb)->data)
|
|
-#define SKB_TCP_OFF(skb) PTR_DIFF ((skb)->h.th, (skb)->data)
|
|
-#define SKB_IP_OFF(skb) PTR_DIFF ((skb)->nh.iph, (skb)->data)
|
|
+#define ETH_HDR_LEN(skb) skb_network_offset(skb)
|
|
+#define SKB_TCP_OFF(skb) skb_transport_offset(skb)
|
|
+#define SKB_IP_OFF(skb) skb_network_offset(skb)
|
|
|
|
/*
|
|
* Set a maximum number of buffers in each output packet to make life
|
|
@@ -114,9 +113,8 @@ struct netfront_accel_tso_state {
|
|
static inline void tso_check_safe(struct sk_buff *skb) {
|
|
EPRINTK_ON(skb->protocol != htons (ETH_P_IP));
|
|
EPRINTK_ON(((struct ethhdr*) skb->data)->h_proto != htons (ETH_P_IP));
|
|
- EPRINTK_ON(skb->nh.iph->protocol != IPPROTO_TCP);
|
|
- EPRINTK_ON((SKB_TCP_OFF(skb)
|
|
- + (skb->h.th->doff << 2u)) > skb_headlen(skb));
|
|
+ EPRINTK_ON(ip_hdr(skb)->protocol != IPPROTO_TCP);
|
|
+ EPRINTK_ON((SKB_TCP_OFF(skb) + tcp_hdrlen(skb)) > skb_headlen(skb));
|
|
}
|
|
|
|
|
|
@@ -129,17 +127,17 @@ static inline void tso_start(struct netf
|
|
* All ethernet/IP/TCP headers combined size is TCP header size
|
|
* plus offset of TCP header relative to start of packet.
|
|
*/
|
|
- st->p.header_length = (skb->h.th->doff << 2u) + SKB_TCP_OFF(skb);
|
|
+ st->p.header_length = tcp_hdrlen(skb) + SKB_TCP_OFF(skb);
|
|
st->p.full_packet_size = (st->p.header_length
|
|
+ skb_shinfo(skb)->gso_size);
|
|
st->p.gso_size = skb_shinfo(skb)->gso_size;
|
|
|
|
- st->p.ip_id = htons(skb->nh.iph->id);
|
|
- st->seqnum = ntohl(skb->h.th->seq);
|
|
+ st->p.ip_id = htons(ip_hdr(skb)->id);
|
|
+ st->seqnum = ntohl(tcp_hdr(skb)->seq);
|
|
|
|
- EPRINTK_ON(skb->h.th->urg);
|
|
- EPRINTK_ON(skb->h.th->syn);
|
|
- EPRINTK_ON(skb->h.th->rst);
|
|
+ EPRINTK_ON(tcp_hdr(skb)->urg);
|
|
+ EPRINTK_ON(tcp_hdr(skb)->syn);
|
|
+ EPRINTK_ON(tcp_hdr(skb)->rst);
|
|
|
|
st->remaining_len = skb->len - st->p.header_length;
|
|
|
|
@@ -258,8 +256,8 @@ int tso_start_new_packet(netfront_accel_
|
|
/* This packet will be the last in the TSO burst. */
|
|
ip_length = (st->p.header_length - ETH_HDR_LEN(skb)
|
|
+ st->remaining_len);
|
|
- tsoh_th->fin = skb->h.th->fin;
|
|
- tsoh_th->psh = skb->h.th->psh;
|
|
+ tsoh_th->fin = tcp_hdr(skb)->fin;
|
|
+ tsoh_th->psh = tcp_hdr(skb)->psh;
|
|
}
|
|
|
|
tsoh_iph->tot_len = htons(ip_length);
|
|
--- head-2011-03-11.orig/drivers/xen/sfc_netfront/accel_vi.c 2011-01-31 17:32:16.000000000 +0100
|
|
+++ head-2011-03-11/drivers/xen/sfc_netfront/accel_vi.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -465,7 +465,7 @@ netfront_accel_enqueue_skb_multi(netfron
|
|
|
|
if (skb->ip_summed == CHECKSUM_PARTIAL) {
|
|
/* Set to zero to encourage falcon to work it out for us */
|
|
- *(u16*)(skb->h.raw + skb->csum_offset) = 0;
|
|
+ *(u16*)(skb->head + skb->csum_start + skb->csum_offset) = 0;
|
|
}
|
|
|
|
if (multi_post_start_new_buffer(vnic, &state)) {
|
|
@@ -584,7 +584,7 @@ netfront_accel_enqueue_skb_single(netfro
|
|
|
|
if (skb->ip_summed == CHECKSUM_PARTIAL) {
|
|
/* Set to zero to encourage falcon to work it out for us */
|
|
- *(u16*)(skb->h.raw + skb->csum_offset) = 0;
|
|
+ *(u16*)(skb->head + skb->csum_start + skb->csum_offset) = 0;
|
|
}
|
|
NETFRONT_ACCEL_PKTBUFF_FOR_EACH_FRAGMENT
|
|
(skb, idx, frag_data, frag_len, {
|
|
@@ -791,7 +791,6 @@ static void netfront_accel_vi_rx_comple
|
|
}
|
|
|
|
net_dev = vnic->net_dev;
|
|
- skb->dev = net_dev;
|
|
skb->protocol = eth_type_trans(skb, net_dev);
|
|
/* CHECKSUM_UNNECESSARY as hardware has done it already */
|
|
skb->ip_summed = CHECKSUM_UNNECESSARY;
|
|
--- head-2011-03-11.orig/drivers/xen/sfc_netfront/accel_xenbus.c 2008-02-20 09:32:49.000000000 +0100
|
|
+++ head-2011-03-11/drivers/xen/sfc_netfront/accel_xenbus.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -356,7 +356,7 @@ static int vnic_setup_domU_shared_state(
|
|
/* Create xenbus msg event channel */
|
|
err = bind_listening_port_to_irqhandler
|
|
(dev->otherend_id, netfront_accel_msg_channel_irq_from_bend,
|
|
- SA_SAMPLE_RANDOM, "vnicctrl", vnic);
|
|
+ IRQF_SAMPLE_RANDOM, "vnicctrl", vnic);
|
|
if (err < 0) {
|
|
EPRINTK("Couldn't bind msg event channel\n");
|
|
goto fail_msg_irq;
|
|
@@ -367,7 +367,7 @@ static int vnic_setup_domU_shared_state(
|
|
/* Create xenbus net event channel */
|
|
err = bind_listening_port_to_irqhandler
|
|
(dev->otherend_id, netfront_accel_net_channel_irq_from_bend,
|
|
- SA_SAMPLE_RANDOM, "vnicfront", vnic);
|
|
+ IRQF_SAMPLE_RANDOM, "vnicfront", vnic);
|
|
if (err < 0) {
|
|
EPRINTK("Couldn't bind net event channel\n");
|
|
goto fail_net_irq;
|
|
--- head-2011-03-11.orig/drivers/xen/usbfront/xenbus.c 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/drivers/xen/usbfront/xenbus.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -111,7 +111,7 @@ static int setup_rings(struct xenbus_dev
|
|
info->conn_ring_ref = err;
|
|
|
|
err = bind_listening_port_to_irqhandler(
|
|
- dev->otherend_id, xenhcd_int, SA_SAMPLE_RANDOM, "usbif", info);
|
|
+ dev->otherend_id, xenhcd_int, IRQF_SAMPLE_RANDOM, "usbif", info);
|
|
if (err <= 0) {
|
|
xenbus_dev_fatal(dev, err,
|
|
"bind_listening_port_to_irqhandler");
|
|
--- head-2011-03-11.orig/drivers/xen/xenbus/xenbus_xs.c 2011-01-31 15:14:12.000000000 +0100
|
|
+++ head-2011-03-11/drivers/xen/xenbus/xenbus_xs.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -539,18 +539,15 @@ int xenbus_printf(struct xenbus_transact
|
|
{
|
|
va_list ap;
|
|
int ret;
|
|
-#define PRINTF_BUFFER_SIZE 4096
|
|
char *printf_buffer;
|
|
|
|
- printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_NOIO | __GFP_HIGH);
|
|
- if (printf_buffer == NULL)
|
|
- return -ENOMEM;
|
|
-
|
|
va_start(ap, fmt);
|
|
- ret = vsnprintf(printf_buffer, PRINTF_BUFFER_SIZE, fmt, ap);
|
|
+ printf_buffer = kvasprintf(GFP_NOIO | __GFP_HIGH, fmt, ap);
|
|
va_end(ap);
|
|
|
|
- BUG_ON(ret > PRINTF_BUFFER_SIZE-1);
|
|
+ if (!printf_buffer)
|
|
+ return -ENOMEM;
|
|
+
|
|
ret = xenbus_write(t, dir, node, printf_buffer);
|
|
|
|
kfree(printf_buffer);
|
|
--- head-2011-03-11.orig/fs/aio.c 2011-03-11 10:52:11.000000000 +0100
|
|
+++ head-2011-03-11/fs/aio.c 2011-03-11 10:55:30.000000000 +0100
|
|
@@ -43,7 +43,7 @@
|
|
|
|
#ifdef CONFIG_EPOLL
|
|
#include <linux/poll.h>
|
|
-#include <linux/eventpoll.h>
|
|
+#include <linux/anon_inodes.h>
|
|
#endif
|
|
|
|
#if DEBUG > 1
|
|
@@ -1297,7 +1297,7 @@ static const struct file_operations aioq
|
|
|
|
/* make_aio_fd:
|
|
* Create a file descriptor that can be used to poll the event queue.
|
|
- * Based and piggybacked on the excellent epoll code.
|
|
+ * Based on the excellent epoll code.
|
|
*/
|
|
|
|
static int make_aio_fd(struct kioctx *ioctx)
|
|
@@ -1306,7 +1306,8 @@ static int make_aio_fd(struct kioctx *io
|
|
struct inode *inode;
|
|
struct file *file;
|
|
|
|
- error = ep_getfd(&fd, &inode, &file, NULL, &aioq_fops);
|
|
+ error = anon_inode_getfd(&fd, &inode, &file, "[aioq]",
|
|
+ &aioq_fops, ioctx);
|
|
if (error)
|
|
return error;
|
|
|
|
--- head-2011-03-11.orig/arch/x86/include/asm/boot.h 2011-03-11 10:41:54.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/include/asm/boot.h 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -16,7 +16,7 @@
|
|
& ~(CONFIG_PHYSICAL_ALIGN - 1))
|
|
|
|
/* Minimum kernel alignment, as a power of two */
|
|
-#ifdef CONFIG_X86_64
|
|
+#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
|
|
#define MIN_KERNEL_ALIGN_LG2 PMD_SHIFT
|
|
#else
|
|
#define MIN_KERNEL_ALIGN_LG2 (PAGE_SHIFT + THREAD_ORDER)
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/asm/desc_32.h 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/desc_32.h 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -11,23 +11,24 @@
|
|
|
|
#include <asm/mmu.h>
|
|
|
|
-extern struct desc_struct cpu_gdt_table[GDT_ENTRIES];
|
|
-
|
|
struct Xgt_desc_struct {
|
|
unsigned short size;
|
|
unsigned long address __attribute__((packed));
|
|
unsigned short pad;
|
|
} __attribute__ ((packed));
|
|
|
|
-extern struct Xgt_desc_struct idt_descr;
|
|
-DECLARE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr);
|
|
-extern struct Xgt_desc_struct early_gdt_descr;
|
|
+struct gdt_page
|
|
+{
|
|
+ struct desc_struct gdt[GDT_ENTRIES];
|
|
+} __attribute__((aligned(PAGE_SIZE)));
|
|
+DECLARE_PER_CPU(struct gdt_page, gdt_page);
|
|
|
|
static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
|
|
{
|
|
- return (struct desc_struct *)per_cpu(cpu_gdt_descr, cpu).address;
|
|
+ return per_cpu(gdt_page, cpu).gdt;
|
|
}
|
|
|
|
+extern struct Xgt_desc_struct idt_descr;
|
|
extern struct desc_struct idt_table[];
|
|
extern void set_intr_gate(unsigned int irq, void * addr);
|
|
|
|
@@ -55,53 +56,32 @@ static inline void pack_gate(__u32 *a, _
|
|
#define DESCTYPE_S 0x10 /* !system */
|
|
|
|
#ifndef CONFIG_XEN
|
|
-#define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8))
|
|
-
|
|
-#define load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr))
|
|
-#define load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr))
|
|
+#define load_TR_desc() native_load_tr_desc()
|
|
+#define load_gdt(dtr) native_load_gdt(dtr)
|
|
+#define load_idt(dtr) native_load_idt(dtr)
|
|
#define load_tr(tr) __asm__ __volatile("ltr %0"::"m" (tr))
|
|
#define load_ldt(ldt) __asm__ __volatile("lldt %0"::"m" (ldt))
|
|
|
|
-#define store_gdt(dtr) __asm__ ("sgdt %0":"=m" (*dtr))
|
|
-#define store_idt(dtr) __asm__ ("sidt %0":"=m" (*dtr))
|
|
-#define store_tr(tr) __asm__ ("str %0":"=m" (tr))
|
|
+#define store_gdt(dtr) native_store_gdt(dtr)
|
|
+#define store_idt(dtr) native_store_idt(dtr)
|
|
+#define store_tr(tr) (tr = native_store_tr())
|
|
#define store_ldt(ldt) __asm__ ("sldt %0":"=m" (ldt))
|
|
-#endif
|
|
|
|
-#if TLS_SIZE != 24
|
|
-# error update this code.
|
|
-#endif
|
|
-
|
|
-static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
|
|
-{
|
|
-#define C(i) if (HYPERVISOR_update_descriptor(virt_to_machine(&get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]), \
|
|
- *(u64 *)&t->tls_array[i]) \
|
|
- BUG()
|
|
- C(0); C(1); C(2);
|
|
-#undef C
|
|
-}
|
|
+#define load_TLS(t, cpu) native_load_tls(t, cpu)
|
|
+#define set_ldt native_set_ldt
|
|
|
|
-#ifndef CONFIG_XEN
|
|
#define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
|
|
#define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
|
|
#define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
|
|
|
|
-static inline void write_dt_entry(void *dt, int entry, __u32 entry_a, __u32 entry_b)
|
|
+static inline void write_dt_entry(struct desc_struct *dt,
|
|
+ int entry, u32 entry_low, u32 entry_high)
|
|
{
|
|
- __u32 *lp = (__u32 *)((char *)dt + entry*8);
|
|
- *lp = entry_a;
|
|
- *(lp+1) = entry_b;
|
|
+ dt[entry].a = entry_low;
|
|
+ dt[entry].b = entry_high;
|
|
}
|
|
-#define set_ldt native_set_ldt
|
|
-#else
|
|
-extern int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b);
|
|
-extern int write_gdt_entry(void *gdt, int entry, __u32 entry_a, __u32 entry_b);
|
|
-#define set_ldt xen_set_ldt
|
|
-#endif
|
|
|
|
-#ifndef CONFIG_XEN
|
|
-static inline fastcall void native_set_ldt(const void *addr,
|
|
- unsigned int entries)
|
|
+static inline void native_set_ldt(const void *addr, unsigned int entries)
|
|
{
|
|
if (likely(entries == 0))
|
|
__asm__ __volatile__("lldt %w0"::"q" (0));
|
|
@@ -116,6 +96,65 @@ static inline fastcall void native_set_l
|
|
__asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8));
|
|
}
|
|
}
|
|
+
|
|
+
|
|
+static inline void native_load_tr_desc(void)
|
|
+{
|
|
+ asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8));
|
|
+}
|
|
+
|
|
+static inline void native_load_gdt(const struct Xgt_desc_struct *dtr)
|
|
+{
|
|
+ asm volatile("lgdt %0"::"m" (*dtr));
|
|
+}
|
|
+
|
|
+static inline void native_load_idt(const struct Xgt_desc_struct *dtr)
|
|
+{
|
|
+ asm volatile("lidt %0"::"m" (*dtr));
|
|
+}
|
|
+
|
|
+static inline void native_store_gdt(struct Xgt_desc_struct *dtr)
|
|
+{
|
|
+ asm ("sgdt %0":"=m" (*dtr));
|
|
+}
|
|
+
|
|
+static inline void native_store_idt(struct Xgt_desc_struct *dtr)
|
|
+{
|
|
+ asm ("sidt %0":"=m" (*dtr));
|
|
+}
|
|
+
|
|
+static inline unsigned long native_store_tr(void)
|
|
+{
|
|
+ unsigned long tr;
|
|
+ asm ("str %0":"=r" (tr));
|
|
+ return tr;
|
|
+}
|
|
+
|
|
+static inline void native_load_tls(struct thread_struct *t, unsigned int cpu)
|
|
+{
|
|
+ unsigned int i;
|
|
+ struct desc_struct *gdt = get_cpu_gdt_table(cpu);
|
|
+
|
|
+ for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
|
|
+ gdt[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i];
|
|
+}
|
|
+#else
|
|
+#define load_TLS(t, cpu) xen_load_tls(t, cpu)
|
|
+#define set_ldt xen_set_ldt
|
|
+
|
|
+extern int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b);
|
|
+extern int write_gdt_entry(void *gdt, int entry, __u32 entry_a, __u32 entry_b);
|
|
+
|
|
+static inline void xen_load_tls(struct thread_struct *t, unsigned int cpu)
|
|
+{
|
|
+ unsigned int i;
|
|
+ struct desc_struct *gdt = get_cpu_gdt_table(cpu) + GDT_ENTRY_TLS_MIN;
|
|
+
|
|
+ for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
|
|
+ if (HYPERVISOR_update_descriptor(virt_to_machine(&gdt[i]),
|
|
+ *(u64 *)&t->tls_array[i]))
|
|
+ BUG();
|
|
+}
|
|
#endif
|
|
|
|
#ifndef CONFIG_X86_NO_IDT
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/asm/fixmap_32.h 2011-01-31 17:32:16.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/fixmap_32.h 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -19,10 +19,8 @@
|
|
* the start of the fixmap.
|
|
*/
|
|
extern unsigned long __FIXADDR_TOP;
|
|
-#ifdef CONFIG_COMPAT_VDSO
|
|
-#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO)
|
|
-#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1)
|
|
-#endif
|
|
+#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO)
|
|
+#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1)
|
|
|
|
#ifndef __ASSEMBLY__
|
|
#include <linux/kernel.h>
|
|
@@ -85,6 +83,9 @@ enum fixed_addresses {
|
|
#ifdef CONFIG_PCI_MMCONFIG
|
|
FIX_PCIE_MCFG,
|
|
#endif
|
|
+#ifdef CONFIG_PARAVIRT
|
|
+ FIX_PARAVIRT_BOOTMAP,
|
|
+#endif
|
|
FIX_SHARED_INFO,
|
|
#define NR_FIX_ISAMAPS 256
|
|
FIX_ISAMAP_END,
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/asm/highmem.h 2011-01-31 17:32:16.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/highmem.h 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -67,12 +67,17 @@ extern void FASTCALL(kunmap_high(struct
|
|
|
|
void *kmap(struct page *page);
|
|
void kunmap(struct page *page);
|
|
+void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot);
|
|
void *kmap_atomic(struct page *page, enum km_type type);
|
|
-void *kmap_atomic_pte(struct page *page, enum km_type type);
|
|
void kunmap_atomic(void *kvaddr, enum km_type type);
|
|
void *kmap_atomic_pfn(unsigned long pfn, enum km_type type);
|
|
struct page *kmap_atomic_to_page(void *ptr);
|
|
|
|
+#define kmap_atomic_pte(page, type) \
|
|
+ kmap_atomic_prot(page, type, \
|
|
+ test_bit(PG_pinned, &(page)->flags) \
|
|
+ ? PAGE_KERNEL_RO : kmap_prot)
|
|
+
|
|
#define flush_cache_kmaps() do { } while (0)
|
|
|
|
void clear_highpage(struct page *);
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/asm/hypervisor.h 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/hypervisor.h 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -69,6 +69,8 @@ extern start_info_t *xen_start_info;
|
|
#define is_initial_xendomain() 0
|
|
#endif
|
|
|
|
+struct vcpu_runstate_info *setup_runstate_area(unsigned int cpu);
|
|
+
|
|
/* arch/xen/kernel/evtchn.c */
|
|
/* Force a proper event-channel callback from Xen. */
|
|
void force_evtchn_callback(void);
|
|
@@ -104,8 +106,8 @@ void xen_set_ldt(const void *ptr, unsign
|
|
#include <linux/cpumask.h>
|
|
void xen_tlb_flush_all(void);
|
|
void xen_invlpg_all(unsigned long ptr);
|
|
-void xen_tlb_flush_mask(cpumask_t *mask);
|
|
-void xen_invlpg_mask(cpumask_t *mask, unsigned long ptr);
|
|
+void xen_tlb_flush_mask(const cpumask_t *mask);
|
|
+void xen_invlpg_mask(const cpumask_t *mask, unsigned long ptr);
|
|
#else
|
|
#define xen_tlb_flush_all xen_tlb_flush
|
|
#define xen_invlpg_all xen_invlpg
|
|
@@ -156,7 +158,9 @@ static inline void arch_leave_lazy_mmu_m
|
|
xen_multicall_flush(false);
|
|
}
|
|
|
|
-#ifndef arch_use_lazy_mmu_mode
|
|
+#if defined(CONFIG_X86_32)
|
|
+#define arch_use_lazy_mmu_mode() unlikely(x86_read_percpu(xen_lazy_mmu))
|
|
+#elif !defined(arch_use_lazy_mmu_mode)
|
|
#define arch_use_lazy_mmu_mode() unlikely(__get_cpu_var(xen_lazy_mmu))
|
|
#endif
|
|
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/asm/irqflags_32.h 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/irqflags_32.h 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -11,6 +11,40 @@
|
|
#define _ASM_IRQFLAGS_H
|
|
|
|
#ifndef __ASSEMBLY__
|
|
+#define xen_save_fl(void) (current_vcpu_info()->evtchn_upcall_mask)
|
|
+
|
|
+#define xen_restore_fl(f) \
|
|
+do { \
|
|
+ vcpu_info_t *_vcpu; \
|
|
+ barrier(); \
|
|
+ _vcpu = current_vcpu_info(); \
|
|
+ if ((_vcpu->evtchn_upcall_mask = (f)) == 0) { \
|
|
+ barrier(); /* unmask then check (avoid races) */\
|
|
+ if (unlikely(_vcpu->evtchn_upcall_pending)) \
|
|
+ force_evtchn_callback(); \
|
|
+ } \
|
|
+} while (0)
|
|
+
|
|
+#define xen_irq_disable() \
|
|
+do { \
|
|
+ current_vcpu_info()->evtchn_upcall_mask = 1; \
|
|
+ barrier(); \
|
|
+} while (0)
|
|
+
|
|
+#define xen_irq_enable() \
|
|
+do { \
|
|
+ vcpu_info_t *_vcpu; \
|
|
+ barrier(); \
|
|
+ _vcpu = current_vcpu_info(); \
|
|
+ _vcpu->evtchn_upcall_mask = 0; \
|
|
+ barrier(); /* unmask then check (avoid races) */ \
|
|
+ if (unlikely(_vcpu->evtchn_upcall_pending)) \
|
|
+ force_evtchn_callback(); \
|
|
+} while (0)
|
|
+
|
|
+void xen_safe_halt(void);
|
|
+
|
|
+void xen_halt(void);
|
|
|
|
/*
|
|
* The use of 'barrier' in the following reflects their use as local-lock
|
|
@@ -20,48 +54,31 @@
|
|
* includes these barriers, for example.
|
|
*/
|
|
|
|
-#define __raw_local_save_flags() (current_vcpu_info()->evtchn_upcall_mask)
|
|
+#define __raw_local_save_flags() xen_save_fl()
|
|
|
|
-#define raw_local_irq_restore(x) \
|
|
-do { \
|
|
- vcpu_info_t *_vcpu; \
|
|
- barrier(); \
|
|
- _vcpu = current_vcpu_info(); \
|
|
- if ((_vcpu->evtchn_upcall_mask = (x)) == 0) { \
|
|
- barrier(); /* unmask then check (avoid races) */ \
|
|
- if (unlikely(_vcpu->evtchn_upcall_pending)) \
|
|
- force_evtchn_callback(); \
|
|
- } \
|
|
-} while (0)
|
|
+#define raw_local_irq_restore(flags) xen_restore_fl(flags)
|
|
|
|
-#define raw_local_irq_disable() \
|
|
-do { \
|
|
- current_vcpu_info()->evtchn_upcall_mask = 1; \
|
|
- barrier(); \
|
|
-} while (0)
|
|
+#define raw_local_irq_disable() xen_irq_disable()
|
|
|
|
-#define raw_local_irq_enable() \
|
|
-do { \
|
|
- vcpu_info_t *_vcpu; \
|
|
- barrier(); \
|
|
- _vcpu = current_vcpu_info(); \
|
|
- _vcpu->evtchn_upcall_mask = 0; \
|
|
- barrier(); /* unmask then check (avoid races) */ \
|
|
- if (unlikely(_vcpu->evtchn_upcall_pending)) \
|
|
- force_evtchn_callback(); \
|
|
-} while (0)
|
|
+#define raw_local_irq_enable() xen_irq_enable()
|
|
|
|
/*
|
|
* Used in the idle loop; sti takes one instruction cycle
|
|
* to complete:
|
|
*/
|
|
-void raw_safe_halt(void);
|
|
+static inline void raw_safe_halt(void)
|
|
+{
|
|
+ xen_safe_halt();
|
|
+}
|
|
|
|
/*
|
|
* Used when interrupts are already enabled or to
|
|
* shutdown the processor:
|
|
*/
|
|
-void halt(void);
|
|
+static inline void halt(void)
|
|
+{
|
|
+ xen_halt();
|
|
+}
|
|
|
|
/*
|
|
* For spinlocks, etc:
|
|
@@ -106,7 +123,7 @@ sysexit_scrit: /**** START OF SYSEXIT CR
|
|
14: __DISABLE_INTERRUPTS ; \
|
|
TRACE_IRQS_OFF ; \
|
|
sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/ ; \
|
|
- mov $__KERNEL_PDA, %ecx ; \
|
|
+ mov $__KERNEL_PERCPU, %ecx ; \
|
|
push %esp ; \
|
|
mov %ecx, %fs ; \
|
|
call evtchn_do_upcall ; \
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/asm/mmu_context_32.h 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/mmu_context_32.h 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -6,6 +6,20 @@
|
|
#include <asm/pgalloc.h>
|
|
#include <asm/tlbflush.h>
|
|
|
|
+void arch_exit_mmap(struct mm_struct *mm);
|
|
+void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm);
|
|
+
|
|
+void mm_pin(struct mm_struct *mm);
|
|
+void mm_unpin(struct mm_struct *mm);
|
|
+void mm_pin_all(void);
|
|
+
|
|
+static inline void xen_activate_mm(struct mm_struct *prev,
|
|
+ struct mm_struct *next)
|
|
+{
|
|
+ if (!test_bit(PG_pinned, &virt_to_page(next->pgd)->flags))
|
|
+ mm_pin(next);
|
|
+}
|
|
+
|
|
/*
|
|
* Used for LDT copy/destruction.
|
|
*/
|
|
@@ -37,10 +51,6 @@ static inline void __prepare_arch_switch
|
|
: : "r" (0) );
|
|
}
|
|
|
|
-extern void mm_pin(struct mm_struct *mm);
|
|
-extern void mm_unpin(struct mm_struct *mm);
|
|
-void mm_pin_all(void);
|
|
-
|
|
static inline void switch_mm(struct mm_struct *prev,
|
|
struct mm_struct *next,
|
|
struct task_struct *tsk)
|
|
@@ -97,11 +107,10 @@ static inline void switch_mm(struct mm_s
|
|
#define deactivate_mm(tsk, mm) \
|
|
asm("movl %0,%%gs": :"r" (0));
|
|
|
|
-static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next)
|
|
-{
|
|
- if (!test_bit(PG_pinned, &virt_to_page(next->pgd)->flags))
|
|
- mm_pin(next);
|
|
- switch_mm(prev, next, NULL);
|
|
-}
|
|
+#define activate_mm(prev, next) \
|
|
+ do { \
|
|
+ xen_activate_mm(prev, next); \
|
|
+ switch_mm((prev),(next),NULL); \
|
|
+ } while(0)
|
|
|
|
#endif
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/asm/pgalloc_32.h 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/pgalloc_32.h 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -1,7 +1,6 @@
|
|
#ifndef _I386_PGALLOC_H
|
|
#define _I386_PGALLOC_H
|
|
|
|
-#include <asm/fixmap.h>
|
|
#include <linux/threads.h>
|
|
#include <linux/mm.h> /* for struct page */
|
|
#include <asm/io.h> /* for phys_to_virt and page_to_pseudophys */
|
|
@@ -69,6 +68,4 @@ do { \
|
|
#define pud_populate(mm, pmd, pte) BUG()
|
|
#endif
|
|
|
|
-#define check_pgt_cache() do { } while (0)
|
|
-
|
|
#endif /* _I386_PGALLOC_H */
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/asm/pgtable-3level.h 2011-01-31 17:32:16.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/pgtable-3level.h 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -52,32 +52,40 @@ static inline int pte_exec_kernel(pte_t
|
|
* value and then use set_pte to update it. -ben
|
|
*/
|
|
|
|
-static inline void set_pte(pte_t *ptep, pte_t pte)
|
|
+static inline void xen_set_pte(pte_t *ptep, pte_t pte)
|
|
{
|
|
ptep->pte_high = pte.pte_high;
|
|
smp_wmb();
|
|
ptep->pte_low = pte.pte_low;
|
|
}
|
|
-#define set_pte_atomic(pteptr,pteval) \
|
|
- set_64bit((unsigned long long *)(pteptr),__pte_val(pteval))
|
|
|
|
-#define set_pte_at(_mm,addr,ptep,pteval) do { \
|
|
- if (((_mm) != current->mm && (_mm) != &init_mm) || \
|
|
- HYPERVISOR_update_va_mapping((addr), (pteval), 0)) \
|
|
- set_pte((ptep), (pteval)); \
|
|
-} while (0)
|
|
-
|
|
-#define set_pmd(pmdptr,pmdval) \
|
|
- xen_l2_entry_update((pmdptr), (pmdval))
|
|
-#define set_pud(pudptr,pudval) \
|
|
- xen_l3_entry_update((pudptr), (pudval))
|
|
+static inline void xen_set_pte_at(struct mm_struct *mm, unsigned long addr,
|
|
+ pte_t *ptep , pte_t pte)
|
|
+{
|
|
+ if ((mm != current->mm && mm != &init_mm) ||
|
|
+ HYPERVISOR_update_va_mapping(addr, pte, 0))
|
|
+ xen_set_pte(ptep, pte);
|
|
+}
|
|
+
|
|
+static inline void xen_set_pte_atomic(pte_t *ptep, pte_t pte)
|
|
+{
|
|
+ set_64bit((unsigned long long *)(ptep),__pte_val(pte));
|
|
+}
|
|
+static inline void xen_set_pmd(pmd_t *pmdp, pmd_t pmd)
|
|
+{
|
|
+ xen_l2_entry_update(pmdp, pmd);
|
|
+}
|
|
+static inline void xen_set_pud(pud_t *pudp, pud_t pud)
|
|
+{
|
|
+ xen_l3_entry_update(pudp, pud);
|
|
+}
|
|
|
|
/*
|
|
* For PTEs and PDEs, we must clear the P-bit first when clearing a page table
|
|
* entry, so clear the bottom half first and enforce ordering with a compiler
|
|
* barrier.
|
|
*/
|
|
-static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
|
|
+static inline void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
|
|
{
|
|
if ((mm != current->mm && mm != &init_mm)
|
|
|| HYPERVISOR_update_va_mapping(addr, __pte(0), 0)) {
|
|
@@ -87,7 +95,18 @@ static inline void pte_clear(struct mm_s
|
|
}
|
|
}
|
|
|
|
-#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
|
|
+static inline void xen_pmd_clear(pmd_t *pmd)
|
|
+{
|
|
+ xen_l2_entry_update(pmd, __pmd(0));
|
|
+}
|
|
+
|
|
+#define set_pte(ptep, pte) xen_set_pte(ptep, pte)
|
|
+#define set_pte_at(mm, addr, ptep, pte) xen_set_pte_at(mm, addr, ptep, pte)
|
|
+#define set_pte_atomic(ptep, pte) xen_set_pte_atomic(ptep, pte)
|
|
+#define set_pmd(pmdp, pmd) xen_set_pmd(pmdp, pmd)
|
|
+#define set_pud(pudp, pud) xen_set_pud(pudp, pud)
|
|
+#define pte_clear(mm, addr, ptep) xen_pte_clear(mm, addr, ptep)
|
|
+#define pmd_clear(pmd) xen_pmd_clear(pmd)
|
|
|
|
/*
|
|
* Pentium-II erratum A13: in PAE mode we explicitly have to flush
|
|
@@ -108,7 +127,8 @@ static inline void pud_clear (pud_t * pu
|
|
#define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \
|
|
pmd_index(address))
|
|
|
|
-static inline pte_t raw_ptep_get_and_clear(pte_t *ptep, pte_t res)
|
|
+#ifdef CONFIG_SMP
|
|
+static inline pte_t xen_ptep_get_and_clear(pte_t *ptep, pte_t res)
|
|
{
|
|
uint64_t val = __pte_val(res);
|
|
if (__cmpxchg64(ptep, val, 0) != val) {
|
|
@@ -119,6 +139,9 @@ static inline pte_t raw_ptep_get_and_cle
|
|
}
|
|
return res;
|
|
}
|
|
+#else
|
|
+#define xen_ptep_get_and_clear(xp, pte) xen_local_ptep_get_and_clear(xp, pte)
|
|
+#endif
|
|
|
|
#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
|
|
#define ptep_clear_flush(vma, addr, ptep) \
|
|
@@ -165,13 +188,13 @@ extern unsigned long long __supported_pt
|
|
static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
|
|
{
|
|
return __pte((((unsigned long long)page_nr << PAGE_SHIFT) |
|
|
- pgprot_val(pgprot)) & __supported_pte_mask);
|
|
+ pgprot_val(pgprot)) & __supported_pte_mask);
|
|
}
|
|
|
|
static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
|
|
{
|
|
return __pmd((((unsigned long long)page_nr << PAGE_SHIFT) |
|
|
- pgprot_val(pgprot)) & __supported_pte_mask);
|
|
+ pgprot_val(pgprot)) & __supported_pte_mask);
|
|
}
|
|
|
|
/*
|
|
@@ -191,6 +214,4 @@ static inline pmd_t pfn_pmd(unsigned lon
|
|
|
|
#define __pmd_free_tlb(tlb, x) do { } while (0)
|
|
|
|
-void vmalloc_sync_all(void);
|
|
-
|
|
#endif /* _I386_PGTABLE_3LEVEL_H */
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/asm/pgtable-3level-defs.h 2007-06-12 13:14:02.000000000 +0200
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/pgtable-3level-defs.h 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -1,7 +1,7 @@
|
|
#ifndef _I386_PGTABLE_3LEVEL_DEFS_H
|
|
#define _I386_PGTABLE_3LEVEL_DEFS_H
|
|
|
|
-#define HAVE_SHARED_KERNEL_PMD 0
|
|
+#define SHARED_KERNEL_PMD 0
|
|
|
|
/*
|
|
* PGDIR_SHIFT determines what a top-level page table entry can map
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/asm/pgtable_32.h 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/pgtable_32.h 2011-02-07 15:38:07.000000000 +0100
|
|
@@ -24,11 +24,11 @@
|
|
#include <linux/slab.h>
|
|
#include <linux/list.h>
|
|
#include <linux/spinlock.h>
|
|
+#include <linux/sched.h>
|
|
|
|
/* Is this pagetable pinned? */
|
|
#define PG_pinned PG_arch_1
|
|
|
|
-struct mm_struct;
|
|
struct vm_area_struct;
|
|
|
|
/*
|
|
@@ -38,17 +38,16 @@ struct vm_area_struct;
|
|
#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
|
|
extern unsigned long empty_zero_page[1024];
|
|
extern pgd_t *swapper_pg_dir;
|
|
-extern struct kmem_cache *pgd_cache;
|
|
extern struct kmem_cache *pmd_cache;
|
|
extern spinlock_t pgd_lock;
|
|
extern struct page *pgd_list;
|
|
+void check_pgt_cache(void);
|
|
|
|
void pmd_ctor(void *, struct kmem_cache *, unsigned long);
|
|
-void pgd_ctor(void *, struct kmem_cache *, unsigned long);
|
|
-void pgd_dtor(void *, struct kmem_cache *, unsigned long);
|
|
void pgtable_cache_init(void);
|
|
void paging_init(void);
|
|
|
|
+
|
|
/*
|
|
* The Linux x86 paging architecture is 'compile-time dual-mode', it
|
|
* implements both the traditional 2-level x86 page tables and the
|
|
@@ -165,6 +164,7 @@ void paging_init(void);
|
|
|
|
extern unsigned long long __PAGE_KERNEL, __PAGE_KERNEL_EXEC;
|
|
#define __PAGE_KERNEL_RO (__PAGE_KERNEL & ~_PAGE_RW)
|
|
+#define __PAGE_KERNEL_RX (__PAGE_KERNEL_EXEC & ~_PAGE_RW)
|
|
#define __PAGE_KERNEL_NOCACHE (__PAGE_KERNEL | _PAGE_PCD)
|
|
#define __PAGE_KERNEL_LARGE (__PAGE_KERNEL | _PAGE_PSE)
|
|
#define __PAGE_KERNEL_LARGE_EXEC (__PAGE_KERNEL_EXEC | _PAGE_PSE)
|
|
@@ -172,6 +172,7 @@ extern unsigned long long __PAGE_KERNEL,
|
|
#define PAGE_KERNEL __pgprot(__PAGE_KERNEL)
|
|
#define PAGE_KERNEL_RO __pgprot(__PAGE_KERNEL_RO)
|
|
#define PAGE_KERNEL_EXEC __pgprot(__PAGE_KERNEL_EXEC)
|
|
+#define PAGE_KERNEL_RX __pgprot(__PAGE_KERNEL_RX)
|
|
#define PAGE_KERNEL_NOCACHE __pgprot(__PAGE_KERNEL_NOCACHE)
|
|
#define PAGE_KERNEL_LARGE __pgprot(__PAGE_KERNEL_LARGE)
|
|
#define PAGE_KERNEL_LARGE_EXEC __pgprot(__PAGE_KERNEL_LARGE_EXEC)
|
|
@@ -275,7 +276,13 @@ static inline pte_t pte_mkhuge(pte_t pte
|
|
*/
|
|
#define pte_update(mm, addr, ptep) do { } while (0)
|
|
#define pte_update_defer(mm, addr, ptep) do { } while (0)
|
|
-#define paravirt_map_pt_hook(slot, va, pfn) do { } while (0)
|
|
+
|
|
+/* local pte updates need not use xchg for locking */
|
|
+static inline pte_t xen_local_ptep_get_and_clear(pte_t *ptep, pte_t res)
|
|
+{
|
|
+ xen_set_pte(ptep, __pte(0));
|
|
+ return res;
|
|
+}
|
|
|
|
/*
|
|
* We only update the dirty/accessed state if we set
|
|
@@ -286,17 +293,34 @@ static inline pte_t pte_mkhuge(pte_t pte
|
|
*/
|
|
#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
|
|
#define ptep_set_access_flags(vma, address, ptep, entry, dirty) \
|
|
-do { \
|
|
- if (dirty) \
|
|
+({ \
|
|
+ int __changed = !pte_same(*(ptep), entry); \
|
|
+ if (__changed && (dirty)) \
|
|
ptep_establish(vma, address, ptep, entry); \
|
|
-} while (0)
|
|
+ __changed; \
|
|
+})
|
|
|
|
-/*
|
|
- * We don't actually have these, but we want to advertise them so that
|
|
- * we can encompass the flush here.
|
|
- */
|
|
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
|
|
+#define ptep_test_and_clear_dirty(vma, addr, ptep) ({ \
|
|
+ int __ret = 0; \
|
|
+ if (pte_dirty(*(ptep))) \
|
|
+ __ret = test_and_clear_bit(_PAGE_BIT_DIRTY, \
|
|
+ &(ptep)->pte_low); \
|
|
+ if (__ret) \
|
|
+ pte_update((vma)->vm_mm, addr, ptep); \
|
|
+ __ret; \
|
|
+})
|
|
+
|
|
#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
|
|
+#define ptep_test_and_clear_young(vma, addr, ptep) ({ \
|
|
+ int __ret = 0; \
|
|
+ if (pte_young(*(ptep))) \
|
|
+ __ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, \
|
|
+ &(ptep)->pte_low); \
|
|
+ if (__ret) \
|
|
+ pte_update((vma)->vm_mm, addr, ptep); \
|
|
+ __ret; \
|
|
+})
|
|
|
|
/*
|
|
* Rules for using ptep_establish: the pte MUST be a user pte, and
|
|
@@ -323,7 +347,7 @@ do { \
|
|
int __dirty = pte_dirty(__pte); \
|
|
__pte = pte_mkclean(__pte); \
|
|
if (test_bit(PG_pinned, &virt_to_page((vma)->vm_mm->pgd)->flags)) \
|
|
- ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \
|
|
+ (void)ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \
|
|
else if (__dirty) \
|
|
(ptep)->pte_low = __pte.pte_low; \
|
|
__dirty; \
|
|
@@ -336,7 +360,7 @@ do { \
|
|
int __young = pte_young(__pte); \
|
|
__pte = pte_mkold(__pte); \
|
|
if (test_bit(PG_pinned, &virt_to_page((vma)->vm_mm->pgd)->flags)) \
|
|
- ptep_set_access_flags(vma, address, ptep, __pte, __young); \
|
|
+ (void)ptep_set_access_flags(vma, address, ptep, __pte, __young); \
|
|
else if (__young) \
|
|
(ptep)->pte_low = __pte.pte_low; \
|
|
__young; \
|
|
@@ -349,7 +373,7 @@ static inline pte_t ptep_get_and_clear(s
|
|
if (!pte_none(pte)
|
|
&& (mm != &init_mm
|
|
|| HYPERVISOR_update_va_mapping(addr, __pte(0), 0))) {
|
|
- pte = raw_ptep_get_and_clear(ptep, pte);
|
|
+ pte = xen_ptep_get_and_clear(ptep, pte);
|
|
pte_update(mm, addr, ptep);
|
|
}
|
|
return pte;
|
|
@@ -491,24 +515,10 @@ extern pte_t *lookup_address(unsigned lo
|
|
#endif
|
|
|
|
#if defined(CONFIG_HIGHPTE)
|
|
-#define pte_offset_map(dir, address) \
|
|
-({ \
|
|
- pte_t *__ptep; \
|
|
- unsigned pfn = pmd_val(*(dir)) >> PAGE_SHIFT; \
|
|
- __ptep = (pte_t *)kmap_atomic_pte(pfn_to_page(pfn),KM_PTE0); \
|
|
- paravirt_map_pt_hook(KM_PTE0,__ptep, pfn); \
|
|
- __ptep = __ptep + pte_index(address); \
|
|
- __ptep; \
|
|
-})
|
|
-#define pte_offset_map_nested(dir, address) \
|
|
-({ \
|
|
- pte_t *__ptep; \
|
|
- unsigned pfn = pmd_val(*(dir)) >> PAGE_SHIFT; \
|
|
- __ptep = (pte_t *)kmap_atomic_pte(pfn_to_page(pfn),KM_PTE1); \
|
|
- paravirt_map_pt_hook(KM_PTE1,__ptep, pfn); \
|
|
- __ptep = __ptep + pte_index(address); \
|
|
- __ptep; \
|
|
-})
|
|
+#define pte_offset_map(dir, address) \
|
|
+ ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + pte_index(address))
|
|
+#define pte_offset_map_nested(dir, address) \
|
|
+ ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE1) + pte_index(address))
|
|
#define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0)
|
|
#define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1)
|
|
#else
|
|
@@ -594,10 +604,6 @@ int xen_change_pte_range(struct mm_struc
|
|
#define io_remap_pfn_range(vma,from,pfn,size,prot) \
|
|
direct_remap_pfn_range(vma,from,pfn,size,prot,DOMID_IO)
|
|
|
|
-#define MK_IOSPACE_PFN(space, pfn) (pfn)
|
|
-#define GET_IOSPACE(pfn) 0
|
|
-#define GET_PFN(pfn) (pfn)
|
|
-
|
|
#include <asm-generic/pgtable.h>
|
|
|
|
#endif /* _I386_PGTABLE_H */
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/asm/processor_32.h 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/processor_32.h 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -21,6 +21,7 @@
|
|
#include <asm/percpu.h>
|
|
#include <linux/cpumask.h>
|
|
#include <linux/init.h>
|
|
+#include <asm/processor-flags.h>
|
|
#include <xen/interface/physdev.h>
|
|
|
|
/* flag for disabling the tsc */
|
|
@@ -118,7 +119,8 @@ extern char ignore_fpu_irq;
|
|
|
|
void __init cpu_detect(struct cpuinfo_x86 *c);
|
|
|
|
-extern void identify_cpu(struct cpuinfo_x86 *);
|
|
+extern void identify_boot_cpu(void);
|
|
+extern void identify_secondary_cpu(struct cpuinfo_x86 *);
|
|
extern void print_cpu_info(struct cpuinfo_x86 *);
|
|
extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
|
|
extern unsigned short num_cache_leaves;
|
|
@@ -129,29 +131,8 @@ extern void detect_ht(struct cpuinfo_x86
|
|
static inline void detect_ht(struct cpuinfo_x86 *c) {}
|
|
#endif
|
|
|
|
-/*
|
|
- * EFLAGS bits
|
|
- */
|
|
-#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */
|
|
-#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */
|
|
-#define X86_EFLAGS_AF 0x00000010 /* Auxillary carry Flag */
|
|
-#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */
|
|
-#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */
|
|
-#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */
|
|
-#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */
|
|
-#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */
|
|
-#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */
|
|
-#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */
|
|
-#define X86_EFLAGS_NT 0x00004000 /* Nested Task */
|
|
-#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */
|
|
-#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */
|
|
-#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */
|
|
-#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */
|
|
-#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */
|
|
-#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */
|
|
-
|
|
-static inline fastcall void xen_cpuid(unsigned int *eax, unsigned int *ebx,
|
|
- unsigned int *ecx, unsigned int *edx)
|
|
+static inline void xen_cpuid(unsigned int *eax, unsigned int *ebx,
|
|
+ unsigned int *ecx, unsigned int *edx)
|
|
{
|
|
/* ecx is often an input as well as an output. */
|
|
__asm__(XEN_CPUID
|
|
@@ -165,21 +146,6 @@ static inline fastcall void xen_cpuid(un
|
|
#define load_cr3(pgdir) write_cr3(__pa(pgdir))
|
|
|
|
/*
|
|
- * Intel CPU features in CR4
|
|
- */
|
|
-#define X86_CR4_VME 0x0001 /* enable vm86 extensions */
|
|
-#define X86_CR4_PVI 0x0002 /* virtual interrupts flag enable */
|
|
-#define X86_CR4_TSD 0x0004 /* disable time stamp at ipl 3 */
|
|
-#define X86_CR4_DE 0x0008 /* enable debugging extensions */
|
|
-#define X86_CR4_PSE 0x0010 /* enable page size extensions */
|
|
-#define X86_CR4_PAE 0x0020 /* enable physical address extensions */
|
|
-#define X86_CR4_MCE 0x0040 /* Machine check enable */
|
|
-#define X86_CR4_PGE 0x0080 /* enable global pages */
|
|
-#define X86_CR4_PCE 0x0100 /* enable performance counters at ipl 3 */
|
|
-#define X86_CR4_OSFXSR 0x0200 /* enable fast FPU save and restore */
|
|
-#define X86_CR4_OSXMMEXCPT 0x0400 /* enable unmasked SSE exceptions */
|
|
-
|
|
-/*
|
|
* Save the cr4 feature set we're using (ie
|
|
* Pentium 4MB enable and PPro Global page
|
|
* enable), so that any CPU's that boot up
|
|
@@ -206,26 +172,6 @@ static inline void clear_in_cr4 (unsigne
|
|
}
|
|
|
|
/*
|
|
- * NSC/Cyrix CPU configuration register indexes
|
|
- */
|
|
-
|
|
-#define CX86_PCR0 0x20
|
|
-#define CX86_GCR 0xb8
|
|
-#define CX86_CCR0 0xc0
|
|
-#define CX86_CCR1 0xc1
|
|
-#define CX86_CCR2 0xc2
|
|
-#define CX86_CCR3 0xc3
|
|
-#define CX86_CCR4 0xe8
|
|
-#define CX86_CCR5 0xe9
|
|
-#define CX86_CCR6 0xea
|
|
-#define CX86_CCR7 0xeb
|
|
-#define CX86_PCR1 0xf0
|
|
-#define CX86_DIR0 0xfe
|
|
-#define CX86_DIR1 0xff
|
|
-#define CX86_ARR_BASE 0xc4
|
|
-#define CX86_RCR_BASE 0xdc
|
|
-
|
|
-/*
|
|
* NSC/Cyrix CPU indexed register access macros
|
|
*/
|
|
|
|
@@ -351,7 +297,8 @@ typedef struct {
|
|
struct thread_struct;
|
|
|
|
#ifndef CONFIG_X86_NO_TSS
|
|
-struct tss_struct {
|
|
+/* This is the TSS defined by the hardware. */
|
|
+struct i386_hw_tss {
|
|
unsigned short back_link,__blh;
|
|
unsigned long esp0;
|
|
unsigned short ss0,__ss0h;
|
|
@@ -375,6 +322,11 @@ struct tss_struct {
|
|
unsigned short gs, __gsh;
|
|
unsigned short ldt, __ldth;
|
|
unsigned short trace, io_bitmap_base;
|
|
+} __attribute__((packed));
|
|
+
|
|
+struct tss_struct {
|
|
+ struct i386_hw_tss x86_tss;
|
|
+
|
|
/*
|
|
* The extra 1 is there because the CPU will access an
|
|
* additional byte beyond the end of the IO permission
|
|
@@ -428,10 +380,11 @@ struct thread_struct {
|
|
};
|
|
|
|
#define INIT_THREAD { \
|
|
+ .esp0 = sizeof(init_stack) + (long)&init_stack, \
|
|
.vm86_info = NULL, \
|
|
.sysenter_cs = __KERNEL_CS, \
|
|
.io_bitmap_ptr = NULL, \
|
|
- .fs = __KERNEL_PDA, \
|
|
+ .fs = __KERNEL_PERCPU, \
|
|
}
|
|
|
|
/*
|
|
@@ -441,10 +394,12 @@ struct thread_struct {
|
|
* be within the limit.
|
|
*/
|
|
#define INIT_TSS { \
|
|
- .esp0 = sizeof(init_stack) + (long)&init_stack, \
|
|
- .ss0 = __KERNEL_DS, \
|
|
- .ss1 = __KERNEL_CS, \
|
|
- .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \
|
|
+ .x86_tss = { \
|
|
+ .esp0 = sizeof(init_stack) + (long)&init_stack, \
|
|
+ .ss0 = __KERNEL_DS, \
|
|
+ .ss1 = __KERNEL_CS, \
|
|
+ .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \
|
|
+ }, \
|
|
.io_bitmap = { [ 0 ... IO_BITMAP_LONGS] = ~0 }, \
|
|
}
|
|
|
|
@@ -551,38 +506,33 @@ static inline void rep_nop(void)
|
|
|
|
#define cpu_relax() rep_nop()
|
|
|
|
-#define paravirt_enabled() 1
|
|
-#define __cpuid xen_cpuid
|
|
-
|
|
#ifndef CONFIG_X86_NO_TSS
|
|
-static inline void __load_esp0(struct tss_struct *tss, struct thread_struct *thread)
|
|
+static inline void native_load_esp0(struct tss_struct *tss, struct thread_struct *thread)
|
|
{
|
|
- tss->esp0 = thread->esp0;
|
|
+ tss->x86_tss.esp0 = thread->esp0;
|
|
/* This can only happen when SEP is enabled, no need to test "SEP"arately */
|
|
- if (unlikely(tss->ss1 != thread->sysenter_cs)) {
|
|
- tss->ss1 = thread->sysenter_cs;
|
|
+ if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) {
|
|
+ tss->x86_tss.ss1 = thread->sysenter_cs;
|
|
wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0);
|
|
}
|
|
}
|
|
-#define load_esp0(tss, thread) \
|
|
- __load_esp0(tss, thread)
|
|
#else
|
|
-#define load_esp0(tss, thread) do { \
|
|
+#define xen_load_esp0(tss, thread) do { \
|
|
if (HYPERVISOR_stack_switch(__KERNEL_DS, (thread)->esp0)) \
|
|
BUG(); \
|
|
} while (0)
|
|
#endif
|
|
|
|
|
|
-/*
|
|
- * These special macros can be used to get or set a debugging register
|
|
- */
|
|
-#define get_debugreg(var, register) \
|
|
- (var) = HYPERVISOR_get_debugreg(register)
|
|
-#define set_debugreg(value, register) \
|
|
- WARN_ON(HYPERVISOR_set_debugreg(register, value))
|
|
+static inline unsigned long xen_get_debugreg(int regno)
|
|
+{
|
|
+ return HYPERVISOR_get_debugreg(regno);
|
|
+}
|
|
|
|
-#define set_iopl_mask xen_set_iopl_mask
|
|
+static inline void xen_set_debugreg(int regno, unsigned long value)
|
|
+{
|
|
+ WARN_ON(HYPERVISOR_set_debugreg(regno, value));
|
|
+}
|
|
|
|
/*
|
|
* Set IOPL bits in EFLAGS from given mask
|
|
@@ -597,6 +547,21 @@ static inline void xen_set_iopl_mask(uns
|
|
}
|
|
|
|
|
|
+#define paravirt_enabled() 1
|
|
+#define __cpuid xen_cpuid
|
|
+
|
|
+#define load_esp0 xen_load_esp0
|
|
+
|
|
+/*
|
|
+ * These special macros can be used to get or set a debugging register
|
|
+ */
|
|
+#define get_debugreg(var, register) \
|
|
+ (var) = xen_get_debugreg(register)
|
|
+#define set_debugreg(value, register) \
|
|
+ xen_set_debugreg(register, value)
|
|
+
|
|
+#define set_iopl_mask xen_set_iopl_mask
|
|
+
|
|
/*
|
|
* Generic CPUID function
|
|
* clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
|
|
@@ -749,8 +714,14 @@ extern unsigned long boot_option_idle_ov
|
|
extern void enable_sep_cpu(void);
|
|
extern int sysenter_setup(void);
|
|
|
|
-extern int init_gdt(int cpu, struct task_struct *idle);
|
|
+/* Defined in head.S */
|
|
+extern struct Xgt_desc_struct early_gdt_descr;
|
|
+
|
|
extern void cpu_set_gdt(int);
|
|
-extern void secondary_cpu_init(void);
|
|
+extern void switch_to_new_gdt(void);
|
|
+extern void cpu_init(void);
|
|
+extern void init_gdt(int cpu);
|
|
+
|
|
+extern int force_mwait;
|
|
|
|
#endif /* __ASM_I386_PROCESSOR_H */
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/asm/smp_32.h 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/smp_32.h 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -8,19 +8,15 @@
|
|
#include <linux/kernel.h>
|
|
#include <linux/threads.h>
|
|
#include <linux/cpumask.h>
|
|
-#include <asm/pda.h>
|
|
#endif
|
|
|
|
-#ifdef CONFIG_X86_LOCAL_APIC
|
|
-#ifndef __ASSEMBLY__
|
|
-#include <asm/fixmap.h>
|
|
+#if defined(CONFIG_X86_LOCAL_APIC) && !defined(__ASSEMBLY__)
|
|
#include <asm/bitops.h>
|
|
#include <asm/mpspec.h>
|
|
+#include <asm/apic.h>
|
|
#ifdef CONFIG_X86_IO_APIC
|
|
#include <asm/io_apic.h>
|
|
#endif
|
|
-#include <asm/apic.h>
|
|
-#endif
|
|
#endif
|
|
|
|
#define BAD_APICID 0xFFu
|
|
@@ -52,9 +48,76 @@ extern void cpu_exit_clear(void);
|
|
extern void cpu_uninit(void);
|
|
#endif
|
|
|
|
-#ifndef CONFIG_PARAVIRT
|
|
+#ifndef CONFIG_XEN
|
|
+struct smp_ops
|
|
+{
|
|
+ void (*smp_prepare_boot_cpu)(void);
|
|
+ void (*smp_prepare_cpus)(unsigned max_cpus);
|
|
+ int (*cpu_up)(unsigned cpu);
|
|
+ void (*smp_cpus_done)(unsigned max_cpus);
|
|
+
|
|
+ void (*smp_send_stop)(void);
|
|
+ void (*smp_send_reschedule)(int cpu);
|
|
+ int (*smp_call_function_mask)(cpumask_t mask,
|
|
+ void (*func)(void *info), void *info,
|
|
+ int wait);
|
|
+};
|
|
+
|
|
+extern struct smp_ops smp_ops;
|
|
+
|
|
+static inline void smp_prepare_boot_cpu(void)
|
|
+{
|
|
+ smp_ops.smp_prepare_boot_cpu();
|
|
+}
|
|
+static inline void smp_prepare_cpus(unsigned int max_cpus)
|
|
+{
|
|
+ smp_ops.smp_prepare_cpus(max_cpus);
|
|
+}
|
|
+static inline int __cpu_up(unsigned int cpu)
|
|
+{
|
|
+ return smp_ops.cpu_up(cpu);
|
|
+}
|
|
+static inline void smp_cpus_done(unsigned int max_cpus)
|
|
+{
|
|
+ smp_ops.smp_cpus_done(max_cpus);
|
|
+}
|
|
+
|
|
+static inline void smp_send_stop(void)
|
|
+{
|
|
+ smp_ops.smp_send_stop();
|
|
+}
|
|
+static inline void smp_send_reschedule(int cpu)
|
|
+{
|
|
+ smp_ops.smp_send_reschedule(cpu);
|
|
+}
|
|
+static inline int smp_call_function_mask(cpumask_t mask,
|
|
+ void (*func) (void *info), void *info,
|
|
+ int wait)
|
|
+{
|
|
+ return smp_ops.smp_call_function_mask(mask, func, info, wait);
|
|
+}
|
|
+
|
|
+void native_smp_prepare_boot_cpu(void);
|
|
+void native_smp_prepare_cpus(unsigned int max_cpus);
|
|
+int native_cpu_up(unsigned int cpunum);
|
|
+void native_smp_cpus_done(unsigned int max_cpus);
|
|
+
|
|
#define startup_ipi_hook(phys_apicid, start_eip, start_esp) \
|
|
do { } while (0)
|
|
+
|
|
+#else
|
|
+
|
|
+
|
|
+void xen_smp_send_stop(void);
|
|
+void xen_smp_send_reschedule(int cpu);
|
|
+int xen_smp_call_function_mask(cpumask_t mask,
|
|
+ void (*func) (void *info), void *info,
|
|
+ int wait);
|
|
+
|
|
+#define smp_send_stop xen_smp_send_stop
|
|
+#define smp_send_reschedule xen_smp_send_reschedule
|
|
+#define smp_call_function_mask xen_smp_call_function_mask
|
|
+
|
|
#endif
|
|
|
|
/*
|
|
@@ -62,7 +125,8 @@ do { } while (0)
|
|
* from the initial startup. We map APIC_BASE very early in page_setup(),
|
|
* so this is correct in the x86 case.
|
|
*/
|
|
-#define raw_smp_processor_id() (read_pda(cpu_number))
|
|
+DECLARE_PER_CPU(int, cpu_number);
|
|
+#define raw_smp_processor_id() (x86_read_percpu(cpu_number))
|
|
|
|
extern cpumask_t cpu_possible_map;
|
|
#define cpu_callin_map cpu_possible_map
|
|
@@ -73,20 +137,6 @@ static inline int num_booting_cpus(void)
|
|
return cpus_weight(cpu_possible_map);
|
|
}
|
|
|
|
-#ifdef CONFIG_X86_LOCAL_APIC
|
|
-
|
|
-#ifdef APIC_DEFINITION
|
|
-extern int hard_smp_processor_id(void);
|
|
-#else
|
|
-#include <mach_apicdef.h>
|
|
-static inline int hard_smp_processor_id(void)
|
|
-{
|
|
- /* we don't want to mark this access volatile - bad code generation */
|
|
- return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID));
|
|
-}
|
|
-#endif
|
|
-#endif
|
|
-
|
|
#define safe_smp_processor_id() smp_processor_id()
|
|
extern int __cpu_disable(void);
|
|
extern void __cpu_die(unsigned int cpu);
|
|
@@ -102,10 +152,31 @@ extern unsigned int num_processors;
|
|
|
|
#define NO_PROC_ID 0xFF /* No processor magic marker */
|
|
|
|
-#endif
|
|
+#endif /* CONFIG_SMP */
|
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
+#ifdef CONFIG_X86_LOCAL_APIC
|
|
+
|
|
+#ifdef APIC_DEFINITION
|
|
+extern int hard_smp_processor_id(void);
|
|
+#else
|
|
+#include <mach_apicdef.h>
|
|
+static inline int hard_smp_processor_id(void)
|
|
+{
|
|
+ /* we don't want to mark this access volatile - bad code generation */
|
|
+ return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID));
|
|
+}
|
|
+#endif /* APIC_DEFINITION */
|
|
+
|
|
+#else /* CONFIG_X86_LOCAL_APIC */
|
|
+
|
|
+#ifndef CONFIG_SMP
|
|
+#define hard_smp_processor_id() 0
|
|
+#endif
|
|
+
|
|
+#endif /* CONFIG_X86_LOCAL_APIC */
|
|
+
|
|
extern u8 apicid_2_node[];
|
|
|
|
#ifdef CONFIG_X86_LOCAL_APIC
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/asm/system_32.h 2011-01-31 17:32:16.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/system_32.h 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -4,7 +4,7 @@
|
|
#include <linux/kernel.h>
|
|
#include <asm/segment.h>
|
|
#include <asm/cpufeature.h>
|
|
-#include <linux/bitops.h> /* for LOCK_PREFIX */
|
|
+#include <asm/cmpxchg.h>
|
|
#include <asm/synch_bitops.h>
|
|
#include <asm/hypervisor.h>
|
|
|
|
@@ -90,308 +90,102 @@ __asm__ __volatile__ ("movw %%dx,%1\n\t"
|
|
#define savesegment(seg, value) \
|
|
asm volatile("mov %%" #seg ",%0":"=rm" (value))
|
|
|
|
-#define read_cr0() ({ \
|
|
- unsigned int __dummy; \
|
|
- __asm__ __volatile__( \
|
|
- "movl %%cr0,%0\n\t" \
|
|
- :"=r" (__dummy)); \
|
|
- __dummy; \
|
|
-})
|
|
-#define write_cr0(x) \
|
|
- __asm__ __volatile__("movl %0,%%cr0": :"r" (x))
|
|
-
|
|
-#define read_cr2() (current_vcpu_info()->arch.cr2)
|
|
-#define write_cr2(x) \
|
|
- __asm__ __volatile__("movl %0,%%cr2": :"r" (x))
|
|
-
|
|
-#define read_cr3() ({ \
|
|
- unsigned int __dummy; \
|
|
- __asm__ ( \
|
|
- "movl %%cr3,%0\n\t" \
|
|
- :"=r" (__dummy)); \
|
|
- __dummy = xen_cr3_to_pfn(__dummy); \
|
|
- mfn_to_pfn(__dummy) << PAGE_SHIFT; \
|
|
-})
|
|
-#define write_cr3(x) ({ \
|
|
- unsigned int __dummy = pfn_to_mfn((x) >> PAGE_SHIFT); \
|
|
- __dummy = xen_pfn_to_cr3(__dummy); \
|
|
- __asm__ __volatile__("movl %0,%%cr3": :"r" (__dummy)); \
|
|
-})
|
|
-#define read_cr4() ({ \
|
|
- unsigned int __dummy; \
|
|
- __asm__( \
|
|
- "movl %%cr4,%0\n\t" \
|
|
- :"=r" (__dummy)); \
|
|
- __dummy; \
|
|
-})
|
|
-#define read_cr4_safe() ({ \
|
|
- unsigned int __dummy; \
|
|
- /* This could fault if %cr4 does not exist */ \
|
|
- __asm__("1: movl %%cr4, %0 \n" \
|
|
- "2: \n" \
|
|
- ".section __ex_table,\"a\" \n" \
|
|
- ".long 1b,2b \n" \
|
|
- ".previous \n" \
|
|
- : "=r" (__dummy): "0" (0)); \
|
|
- __dummy; \
|
|
-})
|
|
-
|
|
-#define write_cr4(x) \
|
|
- __asm__ __volatile__("movl %0,%%cr4": :"r" (x))
|
|
-
|
|
-#define wbinvd() \
|
|
- __asm__ __volatile__ ("wbinvd": : :"memory")
|
|
-
|
|
-/* Clear the 'TS' bit */
|
|
-#define clts() (HYPERVISOR_fpu_taskswitch(0))
|
|
-
|
|
-/* Set the 'TS' bit */
|
|
-#define stts() (HYPERVISOR_fpu_taskswitch(1))
|
|
-
|
|
-#endif /* __KERNEL__ */
|
|
-
|
|
-static inline unsigned long get_limit(unsigned long segment)
|
|
+static inline void xen_clts(void)
|
|
{
|
|
- unsigned long __limit;
|
|
- __asm__("lsll %1,%0"
|
|
- :"=r" (__limit):"r" (segment));
|
|
- return __limit+1;
|
|
+ HYPERVISOR_fpu_taskswitch(0);
|
|
}
|
|
|
|
-#define nop() __asm__ __volatile__ ("nop")
|
|
-
|
|
-#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr))))
|
|
-
|
|
-#define tas(ptr) (xchg((ptr),1))
|
|
-
|
|
-struct __xchg_dummy { unsigned long a[100]; };
|
|
-#define __xg(x) ((struct __xchg_dummy *)(x))
|
|
+static inline unsigned long xen_read_cr0(void)
|
|
+{
|
|
+ unsigned long val;
|
|
+ asm volatile("movl %%cr0,%0\n\t" :"=r" (val));
|
|
+ return val;
|
|
+}
|
|
|
|
+static inline void xen_write_cr0(unsigned long val)
|
|
+{
|
|
+ asm volatile("movl %0,%%cr0": :"r" (val));
|
|
+}
|
|
|
|
-#ifdef CONFIG_X86_CMPXCHG64
|
|
+#define xen_read_cr2() (current_vcpu_info()->arch.cr2)
|
|
|
|
-/*
|
|
- * The semantics of XCHGCMP8B are a bit strange, this is why
|
|
- * there is a loop and the loading of %%eax and %%edx has to
|
|
- * be inside. This inlines well in most cases, the cached
|
|
- * cost is around ~38 cycles. (in the future we might want
|
|
- * to do an SIMD/3DNOW!/MMX/FPU 64-bit store here, but that
|
|
- * might have an implicit FPU-save as a cost, so it's not
|
|
- * clear which path to go.)
|
|
- *
|
|
- * cmpxchg8b must be used with the lock prefix here to allow
|
|
- * the instruction to be executed atomically, see page 3-102
|
|
- * of the instruction set reference 24319102.pdf. We need
|
|
- * the reader side to see the coherent 64bit value.
|
|
- */
|
|
-static inline void __set_64bit (unsigned long long * ptr,
|
|
- unsigned int low, unsigned int high)
|
|
+static inline void xen_write_cr2(unsigned long val)
|
|
{
|
|
- __asm__ __volatile__ (
|
|
- "\n1:\t"
|
|
- "movl (%0), %%eax\n\t"
|
|
- "movl 4(%0), %%edx\n\t"
|
|
- "lock cmpxchg8b (%0)\n\t"
|
|
- "jnz 1b"
|
|
- : /* no outputs */
|
|
- : "D"(ptr),
|
|
- "b"(low),
|
|
- "c"(high)
|
|
- : "ax","dx","memory");
|
|
+ asm volatile("movl %0,%%cr2": :"r" (val));
|
|
}
|
|
|
|
-static inline void __set_64bit_constant (unsigned long long *ptr,
|
|
- unsigned long long value)
|
|
+static inline unsigned long xen_read_cr3(void)
|
|
{
|
|
- __set_64bit(ptr,(unsigned int)(value), (unsigned int)((value)>>32ULL));
|
|
+ unsigned long val;
|
|
+ asm volatile("movl %%cr3,%0\n\t" :"=r" (val));
|
|
+ return mfn_to_pfn(xen_cr3_to_pfn(val)) << PAGE_SHIFT;
|
|
}
|
|
-#define ll_low(x) *(((unsigned int*)&(x))+0)
|
|
-#define ll_high(x) *(((unsigned int*)&(x))+1)
|
|
|
|
-static inline void __set_64bit_var (unsigned long long *ptr,
|
|
- unsigned long long value)
|
|
+static inline void xen_write_cr3(unsigned long val)
|
|
{
|
|
- __set_64bit(ptr,ll_low(value), ll_high(value));
|
|
+ val = xen_pfn_to_cr3(pfn_to_mfn(val >> PAGE_SHIFT));
|
|
+ asm volatile("movl %0,%%cr3": :"r" (val));
|
|
}
|
|
|
|
-#define set_64bit(ptr,value) \
|
|
-(__builtin_constant_p(value) ? \
|
|
- __set_64bit_constant(ptr, value) : \
|
|
- __set_64bit_var(ptr, value) )
|
|
-
|
|
-#define _set_64bit(ptr,value) \
|
|
-(__builtin_constant_p(value) ? \
|
|
- __set_64bit(ptr, (unsigned int)(value), (unsigned int)((value)>>32ULL) ) : \
|
|
- __set_64bit(ptr, ll_low(value), ll_high(value)) )
|
|
-
|
|
-#endif
|
|
-
|
|
-/*
|
|
- * Note: no "lock" prefix even on SMP: xchg always implies lock anyway
|
|
- * Note 2: xchg has side effect, so that attribute volatile is necessary,
|
|
- * but generally the primitive is invalid, *ptr is output argument. --ANK
|
|
- */
|
|
-static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size)
|
|
+static inline unsigned long xen_read_cr4(void)
|
|
{
|
|
- switch (size) {
|
|
- case 1:
|
|
- __asm__ __volatile__("xchgb %b0,%1"
|
|
- :"=q" (x)
|
|
- :"m" (*__xg(ptr)), "0" (x)
|
|
- :"memory");
|
|
- break;
|
|
- case 2:
|
|
- __asm__ __volatile__("xchgw %w0,%1"
|
|
- :"=r" (x)
|
|
- :"m" (*__xg(ptr)), "0" (x)
|
|
- :"memory");
|
|
- break;
|
|
- case 4:
|
|
- __asm__ __volatile__("xchgl %0,%1"
|
|
- :"=r" (x)
|
|
- :"m" (*__xg(ptr)), "0" (x)
|
|
- :"memory");
|
|
- break;
|
|
- }
|
|
- return x;
|
|
+ unsigned long val;
|
|
+ asm volatile("movl %%cr4,%0\n\t" :"=r" (val));
|
|
+ return val;
|
|
}
|
|
|
|
-/*
|
|
- * Atomic compare and exchange. Compare OLD with MEM, if identical,
|
|
- * store NEW in MEM. Return the initial value in MEM. Success is
|
|
- * indicated by comparing RETURN with OLD.
|
|
- */
|
|
-
|
|
-#ifdef CONFIG_X86_CMPXCHG
|
|
-#define __HAVE_ARCH_CMPXCHG 1
|
|
-#define cmpxchg(ptr,o,n)\
|
|
- ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
|
|
- (unsigned long)(n),sizeof(*(ptr))))
|
|
-#define sync_cmpxchg(ptr,o,n)\
|
|
- ((__typeof__(*(ptr)))__sync_cmpxchg((ptr),(unsigned long)(o),\
|
|
- (unsigned long)(n),sizeof(*(ptr))))
|
|
-#endif
|
|
+static inline unsigned long xen_read_cr4_safe(void)
|
|
+{
|
|
+ unsigned long val;
|
|
+ /* This could fault if %cr4 does not exist */
|
|
+ asm("1: movl %%cr4, %0 \n"
|
|
+ "2: \n"
|
|
+ ".section __ex_table,\"a\" \n"
|
|
+ ".long 1b,2b \n"
|
|
+ ".previous \n"
|
|
+ : "=r" (val): "0" (0));
|
|
+ return val;
|
|
+}
|
|
|
|
-static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
|
|
- unsigned long new, int size)
|
|
+static inline void xen_write_cr4(unsigned long val)
|
|
{
|
|
- unsigned long prev;
|
|
- switch (size) {
|
|
- case 1:
|
|
- __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2"
|
|
- : "=a"(prev)
|
|
- : "q"(new), "m"(*__xg(ptr)), "0"(old)
|
|
- : "memory");
|
|
- return prev;
|
|
- case 2:
|
|
- __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2"
|
|
- : "=a"(prev)
|
|
- : "r"(new), "m"(*__xg(ptr)), "0"(old)
|
|
- : "memory");
|
|
- return prev;
|
|
- case 4:
|
|
- __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %1,%2"
|
|
- : "=a"(prev)
|
|
- : "r"(new), "m"(*__xg(ptr)), "0"(old)
|
|
- : "memory");
|
|
- return prev;
|
|
- }
|
|
- return old;
|
|
+ asm volatile("movl %0,%%cr4": :"r" (val));
|
|
}
|
|
|
|
-/*
|
|
- * Always use locked operations when touching memory shared with a
|
|
- * hypervisor, since the system may be SMP even if the guest kernel
|
|
- * isn't.
|
|
- */
|
|
-static inline unsigned long __sync_cmpxchg(volatile void *ptr,
|
|
- unsigned long old,
|
|
- unsigned long new, int size)
|
|
-{
|
|
- unsigned long prev;
|
|
- switch (size) {
|
|
- case 1:
|
|
- __asm__ __volatile__("lock; cmpxchgb %b1,%2"
|
|
- : "=a"(prev)
|
|
- : "q"(new), "m"(*__xg(ptr)), "0"(old)
|
|
- : "memory");
|
|
- return prev;
|
|
- case 2:
|
|
- __asm__ __volatile__("lock; cmpxchgw %w1,%2"
|
|
- : "=a"(prev)
|
|
- : "r"(new), "m"(*__xg(ptr)), "0"(old)
|
|
- : "memory");
|
|
- return prev;
|
|
- case 4:
|
|
- __asm__ __volatile__("lock; cmpxchgl %1,%2"
|
|
- : "=a"(prev)
|
|
- : "r"(new), "m"(*__xg(ptr)), "0"(old)
|
|
- : "memory");
|
|
- return prev;
|
|
- }
|
|
- return old;
|
|
+static inline void xen_wbinvd(void)
|
|
+{
|
|
+ asm volatile("wbinvd": : :"memory");
|
|
}
|
|
|
|
-#ifndef CONFIG_X86_CMPXCHG
|
|
-/*
|
|
- * Building a kernel capable running on 80386. It may be necessary to
|
|
- * simulate the cmpxchg on the 80386 CPU. For that purpose we define
|
|
- * a function for each of the sizes we support.
|
|
- */
|
|
+#define read_cr0() (xen_read_cr0())
|
|
+#define write_cr0(x) (xen_write_cr0(x))
|
|
+#define read_cr2() (xen_read_cr2())
|
|
+#define write_cr2(x) (xen_write_cr2(x))
|
|
+#define read_cr3() (xen_read_cr3())
|
|
+#define write_cr3(x) (xen_write_cr3(x))
|
|
+#define read_cr4() (xen_read_cr4())
|
|
+#define read_cr4_safe() (xen_read_cr4_safe())
|
|
+#define write_cr4(x) (xen_write_cr4(x))
|
|
+#define wbinvd() (xen_wbinvd())
|
|
|
|
-extern unsigned long cmpxchg_386_u8(volatile void *, u8, u8);
|
|
-extern unsigned long cmpxchg_386_u16(volatile void *, u16, u16);
|
|
-extern unsigned long cmpxchg_386_u32(volatile void *, u32, u32);
|
|
-
|
|
-static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old,
|
|
- unsigned long new, int size)
|
|
-{
|
|
- switch (size) {
|
|
- case 1:
|
|
- return cmpxchg_386_u8(ptr, old, new);
|
|
- case 2:
|
|
- return cmpxchg_386_u16(ptr, old, new);
|
|
- case 4:
|
|
- return cmpxchg_386_u32(ptr, old, new);
|
|
- }
|
|
- return old;
|
|
-}
|
|
-
|
|
-#define cmpxchg(ptr,o,n) \
|
|
-({ \
|
|
- __typeof__(*(ptr)) __ret; \
|
|
- if (likely(boot_cpu_data.x86 > 3)) \
|
|
- __ret = __cmpxchg((ptr), (unsigned long)(o), \
|
|
- (unsigned long)(n), sizeof(*(ptr))); \
|
|
- else \
|
|
- __ret = cmpxchg_386((ptr), (unsigned long)(o), \
|
|
- (unsigned long)(n), sizeof(*(ptr))); \
|
|
- __ret; \
|
|
-})
|
|
-#endif
|
|
+/* Clear the 'TS' bit */
|
|
+#define clts() (xen_clts())
|
|
|
|
-#ifdef CONFIG_X86_CMPXCHG64
|
|
+/* Set the 'TS' bit */
|
|
+#define stts() (HYPERVISOR_fpu_taskswitch(1))
|
|
|
|
-static inline unsigned long long __cmpxchg64(volatile void *ptr, unsigned long long old,
|
|
- unsigned long long new)
|
|
+#endif /* __KERNEL__ */
|
|
+
|
|
+static inline unsigned long get_limit(unsigned long segment)
|
|
{
|
|
- unsigned long long prev;
|
|
- __asm__ __volatile__(LOCK_PREFIX "cmpxchg8b %3"
|
|
- : "=A"(prev)
|
|
- : "b"((unsigned long)new),
|
|
- "c"((unsigned long)(new >> 32)),
|
|
- "m"(*__xg(ptr)),
|
|
- "0"(old)
|
|
- : "memory");
|
|
- return prev;
|
|
-}
|
|
-
|
|
-#define cmpxchg64(ptr,o,n)\
|
|
- ((__typeof__(*(ptr)))__cmpxchg64((ptr),(unsigned long long)(o),\
|
|
- (unsigned long long)(n)))
|
|
+ unsigned long __limit;
|
|
+ __asm__("lsll %1,%0"
|
|
+ :"=r" (__limit):"r" (segment));
|
|
+ return __limit+1;
|
|
+}
|
|
+
|
|
+#define nop() __asm__ __volatile__ ("nop")
|
|
|
|
-#endif
|
|
-
|
|
/*
|
|
* Force strict CPU ordering.
|
|
* And yes, this is required on UP too when we're talking
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/asm/tlbflush_32.h 2011-01-31 17:29:16.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/tlbflush_32.h 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -29,8 +29,13 @@
|
|
* and page-granular flushes are available only on i486 and up.
|
|
*/
|
|
|
|
+#define TLB_FLUSH_ALL 0xffffffff
|
|
+
|
|
+
|
|
#ifndef CONFIG_SMP
|
|
|
|
+#include <linux/sched.h>
|
|
+
|
|
#define flush_tlb() __flush_tlb()
|
|
#define flush_tlb_all() __flush_tlb_all()
|
|
#define local_flush_tlb() __flush_tlb()
|
|
@@ -55,7 +60,7 @@ static inline void flush_tlb_range(struc
|
|
__flush_tlb();
|
|
}
|
|
|
|
-#else
|
|
+#else /* SMP */
|
|
|
|
#include <asm/smp.h>
|
|
|
|
@@ -84,9 +89,7 @@ struct tlb_state
|
|
char __cacheline_padding[L1_CACHE_BYTES-8];
|
|
};
|
|
DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate);
|
|
-
|
|
-
|
|
-#endif
|
|
+#endif /* SMP */
|
|
|
|
#define flush_tlb_kernel_range(start, end) flush_tlb_all()
|
|
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/asm/desc_64.h 2011-01-31 17:32:16.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/desc_64.h 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -127,16 +127,6 @@ static inline void set_ldt_desc(unsigned
|
|
DESC_LDT, size * 8 - 1);
|
|
}
|
|
|
|
-static inline void set_seg_base(unsigned cpu, int entry, void *base)
|
|
-{
|
|
- struct desc_struct *d = &cpu_gdt(cpu)[entry];
|
|
- u32 addr = (u32)(u64)base;
|
|
- BUG_ON((u64)base >> 32);
|
|
- d->base0 = addr & 0xffff;
|
|
- d->base1 = (addr >> 16) & 0xff;
|
|
- d->base2 = (addr >> 24) & 0xff;
|
|
-}
|
|
-
|
|
#define LDT_entry_a(info) \
|
|
((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
|
|
/* Don't allow setting of the lm bit. It is useless anyways because
|
|
@@ -165,25 +155,15 @@ static inline void set_seg_base(unsigned
|
|
(info)->useable == 0 && \
|
|
(info)->lm == 0)
|
|
|
|
-#if TLS_SIZE != 24
|
|
-# error update this code.
|
|
-#endif
|
|
-
|
|
static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
|
|
{
|
|
-#if 0
|
|
+ unsigned int i;
|
|
u64 *gdt = (u64 *)(cpu_gdt(cpu) + GDT_ENTRY_TLS_MIN);
|
|
- gdt[0] = t->tls_array[0];
|
|
- gdt[1] = t->tls_array[1];
|
|
- gdt[2] = t->tls_array[2];
|
|
-#endif
|
|
-#define C(i) \
|
|
- if (HYPERVISOR_update_descriptor(virt_to_machine(&cpu_gdt(cpu)[GDT_ENTRY_TLS_MIN + i]), \
|
|
- t->tls_array[i])) \
|
|
- BUG();
|
|
|
|
- C(0); C(1); C(2);
|
|
-#undef C
|
|
+ for (i = 0; i < GDT_ENTRY_TLS_ENTRIES; i++)
|
|
+ if (HYPERVISOR_update_descriptor(virt_to_machine(&gdt[i]),
|
|
+ t->tls_array[i]))
|
|
+ BUG();
|
|
}
|
|
|
|
/*
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/asm/fixmap_64.h 2011-01-31 17:29:16.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/fixmap_64.h 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -15,7 +15,6 @@
|
|
#include <asm/apicdef.h>
|
|
#include <asm/page.h>
|
|
#include <asm/vsyscall.h>
|
|
-#include <asm/vsyscall32.h>
|
|
#include <asm/acpi.h>
|
|
|
|
/*
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/asm/irqflags_64.h 2007-06-12 13:14:13.000000000 +0200
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/irqflags_64.h 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -9,6 +9,7 @@
|
|
*/
|
|
#ifndef _ASM_IRQFLAGS_H
|
|
#define _ASM_IRQFLAGS_H
|
|
+#include <asm/processor-flags.h>
|
|
|
|
#ifndef __ASSEMBLY__
|
|
/*
|
|
@@ -50,19 +51,19 @@ static inline void raw_local_irq_disable
|
|
{
|
|
unsigned long flags = __raw_local_save_flags();
|
|
|
|
- raw_local_irq_restore((flags & ~(1 << 9)) | (1 << 18));
|
|
+ raw_local_irq_restore((flags & ~X86_EFLAGS_IF) | X86_EFLAGS_AC);
|
|
}
|
|
|
|
static inline void raw_local_irq_enable(void)
|
|
{
|
|
unsigned long flags = __raw_local_save_flags();
|
|
|
|
- raw_local_irq_restore((flags | (1 << 9)) & ~(1 << 18));
|
|
+ raw_local_irq_restore((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC));
|
|
}
|
|
|
|
static inline int raw_irqs_disabled_flags(unsigned long flags)
|
|
{
|
|
- return !(flags & (1<<9)) || (flags & (1 << 18));
|
|
+ return !(flags & X86_EFLAGS_IF) || (flags & X86_EFLAGS_AC);
|
|
}
|
|
|
|
#else /* CONFIG_X86_VSMP */
|
|
@@ -118,13 +119,21 @@ static inline int raw_irqs_disabled_flag
|
|
* Used in the idle loop; sti takes one instruction cycle
|
|
* to complete:
|
|
*/
|
|
-void raw_safe_halt(void);
|
|
+void xen_safe_halt(void);
|
|
+static inline void raw_safe_halt(void)
|
|
+{
|
|
+ xen_safe_halt();
|
|
+}
|
|
|
|
/*
|
|
* Used when interrupts are already enabled or to
|
|
* shutdown the processor:
|
|
*/
|
|
-void halt(void);
|
|
+void xen_halt(void);
|
|
+static inline void halt(void)
|
|
+{
|
|
+ xen_halt();
|
|
+}
|
|
|
|
#else /* __ASSEMBLY__: */
|
|
# ifdef CONFIG_TRACE_IRQFLAGS
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/asm/mmu_context_64.h 2007-06-12 13:14:13.000000000 +0200
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/mmu_context_64.h 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -9,6 +9,9 @@
|
|
#include <asm/pgtable.h>
|
|
#include <asm/tlbflush.h>
|
|
|
|
+void arch_exit_mmap(struct mm_struct *mm);
|
|
+void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm);
|
|
+
|
|
/*
|
|
* possibly do the LDT unload here?
|
|
*/
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/asm/pgalloc_64.h 2010-09-23 15:39:04.000000000 +0200
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/pgalloc_64.h 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -1,7 +1,6 @@
|
|
#ifndef _X86_64_PGALLOC_H
|
|
#define _X86_64_PGALLOC_H
|
|
|
|
-#include <asm/fixmap.h>
|
|
#include <asm/pda.h>
|
|
#include <linux/threads.h>
|
|
#include <linux/mm.h>
|
|
@@ -95,35 +94,25 @@ static inline void pud_free(pud_t *pud)
|
|
pte_free(virt_to_page(pud));
|
|
}
|
|
|
|
-static inline void pgd_list_add(pgd_t *pgd, void *mm)
|
|
+static inline void pgd_list_add(pgd_t *pgd, struct mm_struct *mm)
|
|
{
|
|
struct page *page = virt_to_page(pgd);
|
|
|
|
/* Store a back link for vmalloc_sync_all(). */
|
|
- page->mapping = mm;
|
|
+ set_page_private(page, (unsigned long)mm);
|
|
|
|
spin_lock(&pgd_lock);
|
|
- page->index = (pgoff_t)pgd_list;
|
|
- if (pgd_list)
|
|
- pgd_list->private = (unsigned long)&page->index;
|
|
- pgd_list = page;
|
|
- page->private = (unsigned long)&pgd_list;
|
|
+ list_add(&page->lru, &pgd_list);
|
|
spin_unlock(&pgd_lock);
|
|
}
|
|
|
|
static inline void pgd_list_del(pgd_t *pgd)
|
|
{
|
|
- struct page *next, **pprev, *page = virt_to_page(pgd);
|
|
+ struct page *page = virt_to_page(pgd);
|
|
|
|
spin_lock(&pgd_lock);
|
|
- next = (struct page *)page->index;
|
|
- pprev = (struct page **)page->private;
|
|
- *pprev = next;
|
|
- if (next)
|
|
- next->private = (unsigned long)pprev;
|
|
+ list_del(&page->lru);
|
|
spin_unlock(&pgd_lock);
|
|
-
|
|
- page->mapping = NULL;
|
|
}
|
|
|
|
static inline pgd_t *pgd_alloc(struct mm_struct *mm)
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/asm/pgtable_64.h 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/pgtable_64.h 2011-02-07 15:38:08.000000000 +0100
|
|
@@ -1,12 +1,14 @@
|
|
#ifndef _X86_64_PGTABLE_H
|
|
#define _X86_64_PGTABLE_H
|
|
|
|
+#include <linux/const.h>
|
|
+#ifndef __ASSEMBLY__
|
|
+
|
|
/*
|
|
* This file contains the functions and defines necessary to modify and use
|
|
* the x86-64 page table tree.
|
|
*/
|
|
#include <asm/processor.h>
|
|
-#include <asm/fixmap.h>
|
|
#include <asm/bitops.h>
|
|
#include <linux/threads.h>
|
|
#include <linux/sched.h>
|
|
@@ -35,11 +37,9 @@ extern pte_t *lookup_address(unsigned lo
|
|
#endif
|
|
|
|
extern pud_t level3_kernel_pgt[512];
|
|
-extern pud_t level3_physmem_pgt[512];
|
|
extern pud_t level3_ident_pgt[512];
|
|
extern pmd_t level2_kernel_pgt[512];
|
|
extern pgd_t init_level4_pgt[];
|
|
-extern pgd_t boot_level4_pgt[];
|
|
extern unsigned long __supported_pte_mask;
|
|
|
|
#define swapper_pg_dir init_level4_pgt
|
|
@@ -54,6 +54,8 @@ extern void clear_kernel_mapping(unsigne
|
|
extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)];
|
|
#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
|
|
|
|
+#endif /* !__ASSEMBLY__ */
|
|
+
|
|
/*
|
|
* PGDIR_SHIFT determines what a top-level page table entry can map
|
|
*/
|
|
@@ -78,6 +80,8 @@ extern unsigned long empty_zero_page[PAG
|
|
*/
|
|
#define PTRS_PER_PTE 512
|
|
|
|
+#ifndef __ASSEMBLY__
|
|
+
|
|
#define pte_ERROR(e) \
|
|
printk("%s:%d: bad pte %p(%016lx pfn %010lx).\n", __FILE__, __LINE__, \
|
|
&(e), __pte_val(e), pte_pfn(e))
|
|
@@ -120,22 +124,23 @@ static inline void pgd_clear (pgd_t * pg
|
|
|
|
#define pte_pgprot(a) (__pgprot((a).pte & ~PHYSICAL_PAGE_MASK))
|
|
|
|
-#define PMD_SIZE (1UL << PMD_SHIFT)
|
|
+#endif /* !__ASSEMBLY__ */
|
|
+
|
|
+#define PMD_SIZE (_AC(1,UL) << PMD_SHIFT)
|
|
#define PMD_MASK (~(PMD_SIZE-1))
|
|
-#define PUD_SIZE (1UL << PUD_SHIFT)
|
|
+#define PUD_SIZE (_AC(1,UL) << PUD_SHIFT)
|
|
#define PUD_MASK (~(PUD_SIZE-1))
|
|
-#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
|
|
+#define PGDIR_SIZE (_AC(1,UL) << PGDIR_SHIFT)
|
|
#define PGDIR_MASK (~(PGDIR_SIZE-1))
|
|
|
|
#define USER_PTRS_PER_PGD ((TASK_SIZE-1)/PGDIR_SIZE+1)
|
|
#define FIRST_USER_ADDRESS 0
|
|
|
|
-#ifndef __ASSEMBLY__
|
|
-#define MAXMEM 0x6fffffffffUL
|
|
-#define VMALLOC_START 0xffffc20000000000UL
|
|
-#define VMALLOC_END 0xffffe1ffffffffffUL
|
|
-#define MODULES_VADDR 0xffffffff88000000UL
|
|
-#define MODULES_END 0xffffffffff000000UL
|
|
+#define MAXMEM _AC(0x6fffffffff, UL)
|
|
+#define VMALLOC_START _AC(0xffffc20000000000, UL)
|
|
+#define VMALLOC_END _AC(0xffffe1ffffffffff, UL)
|
|
+#define MODULES_VADDR _AC(0xffffffff88000000, UL)
|
|
+#define MODULES_END _AC(0xffffffffff000000, UL)
|
|
#define MODULES_LEN (MODULES_END - MODULES_VADDR)
|
|
|
|
#define _PAGE_BIT_PRESENT 0
|
|
@@ -161,16 +166,18 @@ static inline void pgd_clear (pgd_t * pg
|
|
#define _PAGE_GLOBAL 0x100 /* Global TLB entry */
|
|
|
|
#define _PAGE_PROTNONE 0x080 /* If not present */
|
|
-#define _PAGE_NX (1UL<<_PAGE_BIT_NX)
|
|
+#define _PAGE_NX (_AC(1,UL)<<_PAGE_BIT_NX)
|
|
|
|
/* Mapped page is I/O or foreign and has no associated page struct. */
|
|
#define _PAGE_IO 0x200
|
|
|
|
+#ifndef __ASSEMBLY__
|
|
#if CONFIG_XEN_COMPAT <= 0x030002
|
|
extern unsigned int __kernel_page_user;
|
|
#else
|
|
#define __kernel_page_user 0
|
|
#endif
|
|
+#endif
|
|
|
|
#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
|
|
#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY | __kernel_page_user)
|
|
@@ -235,6 +242,8 @@ extern unsigned int __kernel_page_user;
|
|
#define __S110 PAGE_SHARED_EXEC
|
|
#define __S111 PAGE_SHARED_EXEC
|
|
|
|
+#ifndef __ASSEMBLY__
|
|
+
|
|
static inline unsigned long pgd_bad(pgd_t pgd)
|
|
{
|
|
return __pgd_val(pgd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER);
|
|
@@ -346,6 +355,20 @@ static inline pte_t pte_mkwrite(pte_t pt
|
|
static inline pte_t pte_mkhuge(pte_t pte) { __pte_val(pte) |= _PAGE_PSE; return pte; }
|
|
static inline pte_t pte_clrhuge(pte_t pte) { __pte_val(pte) &= ~_PAGE_PSE; return pte; }
|
|
|
|
+static inline int ptep_test_and_clear_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
|
|
+{
|
|
+ if (!pte_dirty(*ptep))
|
|
+ return 0;
|
|
+ return test_and_clear_bit(_PAGE_BIT_DIRTY, &ptep->pte);
|
|
+}
|
|
+
|
|
+static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
|
|
+{
|
|
+ if (!pte_young(*ptep))
|
|
+ return 0;
|
|
+ return test_and_clear_bit(_PAGE_BIT_ACCESSED, &ptep->pte);
|
|
+}
|
|
+
|
|
static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
|
|
{
|
|
pte_t pte = *ptep;
|
|
@@ -470,18 +493,12 @@ static inline pte_t pte_modify(pte_t pte
|
|
* bit at the same time. */
|
|
#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
|
|
#define ptep_set_access_flags(vma, address, ptep, entry, dirty) \
|
|
- do { \
|
|
- if (dirty) \
|
|
- ptep_establish(vma, address, ptep, entry); \
|
|
- } while (0)
|
|
-
|
|
-
|
|
-/*
|
|
- * i386 says: We don't actually have these, but we want to advertise
|
|
- * them so that we can encompass the flush here.
|
|
- */
|
|
-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
|
|
-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
|
|
+({ \
|
|
+ int __changed = !pte_same(*(ptep), entry); \
|
|
+ if (__changed && (dirty)) \
|
|
+ ptep_establish(vma, address, ptep, entry); \
|
|
+ __changed; \
|
|
+})
|
|
|
|
#define __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH
|
|
#define ptep_clear_flush_dirty(vma, address, ptep) \
|
|
@@ -490,7 +507,7 @@ static inline pte_t pte_modify(pte_t pte
|
|
int __dirty = pte_dirty(__pte); \
|
|
__pte = pte_mkclean(__pte); \
|
|
if ((vma)->vm_mm->context.pinned) \
|
|
- ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \
|
|
+ (void)ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \
|
|
else if (__dirty) \
|
|
set_pte(ptep, __pte); \
|
|
__dirty; \
|
|
@@ -503,7 +520,7 @@ static inline pte_t pte_modify(pte_t pte
|
|
int __young = pte_young(__pte); \
|
|
__pte = pte_mkold(__pte); \
|
|
if ((vma)->vm_mm->context.pinned) \
|
|
- ptep_set_access_flags(vma, address, ptep, __pte, __young); \
|
|
+ (void)ptep_set_access_flags(vma, address, ptep, __pte, __young); \
|
|
else if (__young) \
|
|
set_pte(ptep, __pte); \
|
|
__young; \
|
|
@@ -517,10 +534,7 @@ static inline pte_t pte_modify(pte_t pte
|
|
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
|
|
|
|
extern spinlock_t pgd_lock;
|
|
-extern struct page *pgd_list;
|
|
-void vmalloc_sync_all(void);
|
|
-
|
|
-#endif /* !__ASSEMBLY__ */
|
|
+extern struct list_head pgd_list;
|
|
|
|
extern int kern_addr_valid(unsigned long addr);
|
|
|
|
@@ -555,10 +569,6 @@ int xen_change_pte_range(struct mm_struc
|
|
#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
|
|
direct_remap_pfn_range(vma,vaddr,pfn,size,prot,DOMID_IO)
|
|
|
|
-#define MK_IOSPACE_PFN(space, pfn) (pfn)
|
|
-#define GET_IOSPACE(pfn) 0
|
|
-#define GET_PFN(pfn) (pfn)
|
|
-
|
|
#define HAVE_ARCH_UNMAPPED_AREA
|
|
|
|
#define pgtable_cache_init() do { } while (0)
|
|
@@ -572,11 +582,14 @@ int xen_change_pte_range(struct mm_struc
|
|
#define kc_offset_to_vaddr(o) \
|
|
(((o) & (1UL << (__VIRTUAL_MASK_SHIFT-1))) ? ((o) | (~__VIRTUAL_MASK)) : (o))
|
|
|
|
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
|
|
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
|
|
#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
|
|
#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
|
|
#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
|
|
#define __HAVE_ARCH_PTEP_SET_WRPROTECT
|
|
#define __HAVE_ARCH_PTE_SAME
|
|
#include <asm-generic/pgtable.h>
|
|
+#endif /* !__ASSEMBLY__ */
|
|
|
|
#endif /* _X86_64_PGTABLE_H */
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/asm/processor_64.h 2011-01-31 17:32:16.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/processor_64.h 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -20,6 +20,7 @@
|
|
#include <asm/percpu.h>
|
|
#include <linux/personality.h>
|
|
#include <linux/cpumask.h>
|
|
+#include <asm/processor-flags.h>
|
|
|
|
#define TF_MASK 0x00000100
|
|
#define IF_MASK 0x00000200
|
|
@@ -103,42 +104,6 @@ extern unsigned int init_intel_cacheinfo
|
|
extern unsigned short num_cache_leaves;
|
|
|
|
/*
|
|
- * EFLAGS bits
|
|
- */
|
|
-#define X86_EFLAGS_CF 0x00000001 /* Carry Flag */
|
|
-#define X86_EFLAGS_PF 0x00000004 /* Parity Flag */
|
|
-#define X86_EFLAGS_AF 0x00000010 /* Auxillary carry Flag */
|
|
-#define X86_EFLAGS_ZF 0x00000040 /* Zero Flag */
|
|
-#define X86_EFLAGS_SF 0x00000080 /* Sign Flag */
|
|
-#define X86_EFLAGS_TF 0x00000100 /* Trap Flag */
|
|
-#define X86_EFLAGS_IF 0x00000200 /* Interrupt Flag */
|
|
-#define X86_EFLAGS_DF 0x00000400 /* Direction Flag */
|
|
-#define X86_EFLAGS_OF 0x00000800 /* Overflow Flag */
|
|
-#define X86_EFLAGS_IOPL 0x00003000 /* IOPL mask */
|
|
-#define X86_EFLAGS_NT 0x00004000 /* Nested Task */
|
|
-#define X86_EFLAGS_RF 0x00010000 /* Resume Flag */
|
|
-#define X86_EFLAGS_VM 0x00020000 /* Virtual Mode */
|
|
-#define X86_EFLAGS_AC 0x00040000 /* Alignment Check */
|
|
-#define X86_EFLAGS_VIF 0x00080000 /* Virtual Interrupt Flag */
|
|
-#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */
|
|
-#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */
|
|
-
|
|
-/*
|
|
- * Intel CPU features in CR4
|
|
- */
|
|
-#define X86_CR4_VME 0x0001 /* enable vm86 extensions */
|
|
-#define X86_CR4_PVI 0x0002 /* virtual interrupts flag enable */
|
|
-#define X86_CR4_TSD 0x0004 /* disable time stamp at ipl 3 */
|
|
-#define X86_CR4_DE 0x0008 /* enable debugging extensions */
|
|
-#define X86_CR4_PSE 0x0010 /* enable page size extensions */
|
|
-#define X86_CR4_PAE 0x0020 /* enable physical address extensions */
|
|
-#define X86_CR4_MCE 0x0040 /* Machine check enable */
|
|
-#define X86_CR4_PGE 0x0080 /* enable global pages */
|
|
-#define X86_CR4_PCE 0x0100 /* enable performance counters at ipl 3 */
|
|
-#define X86_CR4_OSFXSR 0x0200 /* enable fast FPU save and restore */
|
|
-#define X86_CR4_OSXMMEXCPT 0x0400 /* enable unmasked SSE exceptions */
|
|
-
|
|
-/*
|
|
* Save the cr4 feature set we're using (ie
|
|
* Pentium 4MB enable and PPro Global page
|
|
* enable), so that any CPU's that boot up
|
|
@@ -203,7 +168,7 @@ struct i387_fxsave_struct {
|
|
u32 mxcsr;
|
|
u32 mxcsr_mask;
|
|
u32 st_space[32]; /* 8*16 bytes for each FP-reg = 128 bytes */
|
|
- u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 128 bytes */
|
|
+ u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg = 256 bytes */
|
|
u32 padding[24];
|
|
} __attribute__ ((aligned (16)));
|
|
|
|
@@ -436,22 +401,6 @@ static inline void prefetchw(void *x)
|
|
#define cpu_relax() rep_nop()
|
|
|
|
/*
|
|
- * NSC/Cyrix CPU configuration register indexes
|
|
- */
|
|
-#define CX86_CCR0 0xc0
|
|
-#define CX86_CCR1 0xc1
|
|
-#define CX86_CCR2 0xc2
|
|
-#define CX86_CCR3 0xc3
|
|
-#define CX86_CCR4 0xe8
|
|
-#define CX86_CCR5 0xe9
|
|
-#define CX86_CCR6 0xea
|
|
-#define CX86_CCR7 0xeb
|
|
-#define CX86_DIR0 0xfe
|
|
-#define CX86_DIR1 0xff
|
|
-#define CX86_ARR_BASE 0xc4
|
|
-#define CX86_RCR_BASE 0xdc
|
|
-
|
|
-/*
|
|
* NSC/Cyrix CPU indexed register access macros
|
|
*/
|
|
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/asm/smp_64.h 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/smp_64.h 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -11,12 +11,11 @@
|
|
extern int disable_apic;
|
|
|
|
#ifdef CONFIG_X86_LOCAL_APIC
|
|
-#include <asm/fixmap.h>
|
|
#include <asm/mpspec.h>
|
|
+#include <asm/apic.h>
|
|
#ifdef CONFIG_X86_IO_APIC
|
|
#include <asm/io_apic.h>
|
|
#endif
|
|
-#include <asm/apic.h>
|
|
#include <asm/thread_info.h>
|
|
#endif
|
|
|
|
@@ -41,7 +40,6 @@ extern void lock_ipi_call_lock(void);
|
|
extern void unlock_ipi_call_lock(void);
|
|
extern int smp_num_siblings;
|
|
extern void smp_send_reschedule(int cpu);
|
|
-void smp_stop_cpu(void);
|
|
|
|
extern cpumask_t cpu_sibling_map[NR_CPUS];
|
|
extern cpumask_t cpu_core_map[NR_CPUS];
|
|
@@ -62,14 +60,6 @@ static inline int num_booting_cpus(void)
|
|
|
|
#define raw_smp_processor_id() read_pda(cpunumber)
|
|
|
|
-#ifdef CONFIG_X86_LOCAL_APIC
|
|
-static inline int hard_smp_processor_id(void)
|
|
-{
|
|
- /* we don't want to mark this access volatile - bad code generation */
|
|
- return GET_APIC_ID(*(unsigned int *)(APIC_BASE+APIC_ID));
|
|
-}
|
|
-#endif
|
|
-
|
|
extern int __cpu_disable(void);
|
|
extern void __cpu_die(unsigned int cpu);
|
|
extern void prefill_possible_map(void);
|
|
@@ -78,6 +68,14 @@ extern unsigned __cpuinitdata disabled_c
|
|
|
|
#define NO_PROC_ID 0xFF /* No processor magic marker */
|
|
|
|
+#endif /* CONFIG_SMP */
|
|
+
|
|
+#ifdef CONFIG_X86_LOCAL_APIC
|
|
+static inline int hard_smp_processor_id(void)
|
|
+{
|
|
+ /* we don't want to mark this access volatile - bad code generation */
|
|
+ return GET_APIC_ID(*(unsigned int *)(APIC_BASE+APIC_ID));
|
|
+}
|
|
#endif
|
|
|
|
/*
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/asm/system_64.h 2011-01-31 17:29:16.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/system_64.h 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -3,7 +3,7 @@
|
|
|
|
#include <linux/kernel.h>
|
|
#include <asm/segment.h>
|
|
-#include <asm/alternative.h>
|
|
+#include <asm/cmpxchg.h>
|
|
|
|
#include <asm/synch_bitops.h>
|
|
#include <asm/hypervisor.h>
|
|
@@ -43,7 +43,7 @@
|
|
[threadrsp] "i" (offsetof(struct task_struct, thread.rsp)), \
|
|
[ti_flags] "i" (offsetof(struct thread_info, flags)),\
|
|
[tif_fork] "i" (TIF_FORK), \
|
|
- [thread_info] "i" (offsetof(struct task_struct, thread_info)), \
|
|
+ [thread_info] "i" (offsetof(struct task_struct, stack)), \
|
|
[pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \
|
|
: "memory", "cc" __EXTRA_CLOBBER)
|
|
|
|
@@ -92,6 +92,12 @@ static inline void write_cr0(unsigned lo
|
|
machine_to_phys(__dummy); \
|
|
})
|
|
|
|
+static inline void write_cr3(unsigned long val)
|
|
+{
|
|
+ val = phys_to_machine(val);
|
|
+ asm volatile("movq %0,%%cr3" :: "r" (val) : "memory");
|
|
+}
|
|
+
|
|
static inline unsigned long read_cr4(void)
|
|
{
|
|
unsigned long cr4;
|
|
@@ -101,7 +107,7 @@ static inline unsigned long read_cr4(voi
|
|
|
|
static inline void write_cr4(unsigned long val)
|
|
{
|
|
- asm volatile("movq %0,%%cr4" :: "r" (val));
|
|
+ asm volatile("movq %0,%%cr4" :: "r" (val) : "memory");
|
|
}
|
|
|
|
#define stts() (HYPERVISOR_fpu_taskswitch(1))
|
|
@@ -122,100 +128,6 @@ static inline void sched_cacheflush(void
|
|
|
|
#define nop() __asm__ __volatile__ ("nop")
|
|
|
|
-#define xchg(ptr,v) ((__typeof__(*(ptr)))__xchg((unsigned long)(v),(ptr),sizeof(*(ptr))))
|
|
-
|
|
-#define tas(ptr) (xchg((ptr),1))
|
|
-
|
|
-#define __xg(x) ((volatile long *)(x))
|
|
-
|
|
-static inline void set_64bit(volatile unsigned long *ptr, unsigned long val)
|
|
-{
|
|
- *ptr = val;
|
|
-}
|
|
-
|
|
-#define _set_64bit set_64bit
|
|
-
|
|
-/*
|
|
- * Note: no "lock" prefix even on SMP: xchg always implies lock anyway
|
|
- * Note 2: xchg has side effect, so that attribute volatile is necessary,
|
|
- * but generally the primitive is invalid, *ptr is output argument. --ANK
|
|
- */
|
|
-static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int size)
|
|
-{
|
|
- switch (size) {
|
|
- case 1:
|
|
- __asm__ __volatile__("xchgb %b0,%1"
|
|
- :"=q" (x)
|
|
- :"m" (*__xg(ptr)), "0" (x)
|
|
- :"memory");
|
|
- break;
|
|
- case 2:
|
|
- __asm__ __volatile__("xchgw %w0,%1"
|
|
- :"=r" (x)
|
|
- :"m" (*__xg(ptr)), "0" (x)
|
|
- :"memory");
|
|
- break;
|
|
- case 4:
|
|
- __asm__ __volatile__("xchgl %k0,%1"
|
|
- :"=r" (x)
|
|
- :"m" (*__xg(ptr)), "0" (x)
|
|
- :"memory");
|
|
- break;
|
|
- case 8:
|
|
- __asm__ __volatile__("xchgq %0,%1"
|
|
- :"=r" (x)
|
|
- :"m" (*__xg(ptr)), "0" (x)
|
|
- :"memory");
|
|
- break;
|
|
- }
|
|
- return x;
|
|
-}
|
|
-
|
|
-/*
|
|
- * Atomic compare and exchange. Compare OLD with MEM, if identical,
|
|
- * store NEW in MEM. Return the initial value in MEM. Success is
|
|
- * indicated by comparing RETURN with OLD.
|
|
- */
|
|
-
|
|
-#define __HAVE_ARCH_CMPXCHG 1
|
|
-
|
|
-static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
|
|
- unsigned long new, int size)
|
|
-{
|
|
- unsigned long prev;
|
|
- switch (size) {
|
|
- case 1:
|
|
- __asm__ __volatile__(LOCK_PREFIX "cmpxchgb %b1,%2"
|
|
- : "=a"(prev)
|
|
- : "q"(new), "m"(*__xg(ptr)), "0"(old)
|
|
- : "memory");
|
|
- return prev;
|
|
- case 2:
|
|
- __asm__ __volatile__(LOCK_PREFIX "cmpxchgw %w1,%2"
|
|
- : "=a"(prev)
|
|
- : "r"(new), "m"(*__xg(ptr)), "0"(old)
|
|
- : "memory");
|
|
- return prev;
|
|
- case 4:
|
|
- __asm__ __volatile__(LOCK_PREFIX "cmpxchgl %k1,%2"
|
|
- : "=a"(prev)
|
|
- : "r"(new), "m"(*__xg(ptr)), "0"(old)
|
|
- : "memory");
|
|
- return prev;
|
|
- case 8:
|
|
- __asm__ __volatile__(LOCK_PREFIX "cmpxchgq %1,%2"
|
|
- : "=a"(prev)
|
|
- : "r"(new), "m"(*__xg(ptr)), "0"(old)
|
|
- : "memory");
|
|
- return prev;
|
|
- }
|
|
- return old;
|
|
-}
|
|
-
|
|
-#define cmpxchg(ptr,o,n)\
|
|
- ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
|
|
- (unsigned long)(n),sizeof(*(ptr))))
|
|
-
|
|
#ifdef CONFIG_SMP
|
|
#define smp_mb() mb()
|
|
#define smp_rmb() rmb()
|
|
--- head-2011-03-11.orig/arch/x86/include/mach-xen/asm/tlbflush_64.h 2011-01-31 17:29:16.000000000 +0100
|
|
+++ head-2011-03-11/arch/x86/include/mach-xen/asm/tlbflush_64.h 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -2,7 +2,9 @@
|
|
#define _X8664_TLBFLUSH_H
|
|
|
|
#include <linux/mm.h>
|
|
+#include <linux/sched.h>
|
|
#include <asm/processor.h>
|
|
+#include <asm/system.h>
|
|
|
|
#define __flush_tlb() xen_tlb_flush()
|
|
|
|
--- head-2011-03-11.orig/include/linux/pci.h 2011-01-31 14:53:38.000000000 +0100
|
|
+++ head-2011-03-11/include/linux/pci.h 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -325,7 +325,7 @@ struct pci_dev {
|
|
int rom_attr_enabled; /* has display of the rom attribute been enabled? */
|
|
struct bin_attribute *res_attr[DEVICE_COUNT_RESOURCE]; /* sysfs file for resources */
|
|
struct bin_attribute *res_attr_wc[DEVICE_COUNT_RESOURCE]; /* sysfs file for WC mapping of resources */
|
|
-#ifdef CONFIG_PCI_MSI
|
|
+#if defined(CONFIG_PCI_MSI) && !defined(CONFIG_XEN)
|
|
struct list_head msi_list;
|
|
#endif
|
|
struct pci_vpd *vpd;
|
|
--- head-2011-03-11.orig/include/xen/net-util.h 2011-02-09 15:50:19.000000000 +0100
|
|
+++ head-2011-03-11/include/xen/net-util.h 2011-02-09 15:53:07.000000000 +0100
|
|
@@ -11,6 +11,7 @@ static inline int skb_checksum_setup(str
|
|
unsigned long *fixup_counter)
|
|
{
|
|
struct iphdr *iph = (void *)skb->data;
|
|
+ unsigned char *th;
|
|
__be16 *csum = NULL;
|
|
int err = -EPROTO;
|
|
|
|
@@ -32,31 +33,31 @@ static inline int skb_checksum_setup(str
|
|
if (skb->protocol != htons(ETH_P_IP))
|
|
goto out;
|
|
|
|
- skb->nh.iph = iph;
|
|
- skb->h.raw = skb->nh.raw + 4 * iph->ihl;
|
|
- if (skb->h.raw >= skb->tail)
|
|
+ th = skb->data + 4 * iph->ihl;
|
|
+ if (th >= skb_tail_pointer(skb))
|
|
goto out;
|
|
|
|
+ skb->csum_start = th - skb->head;
|
|
switch (iph->protocol) {
|
|
case IPPROTO_TCP:
|
|
skb->csum_offset = offsetof(struct tcphdr, check);
|
|
if (csum)
|
|
- csum = &skb->h.th->check;
|
|
+ csum = &((struct tcphdr *)th)->check;
|
|
break;
|
|
case IPPROTO_UDP:
|
|
skb->csum_offset = offsetof(struct udphdr, check);
|
|
if (csum)
|
|
- csum = &skb->h.uh->check;
|
|
+ csum = &((struct udphdr *)th)->check;
|
|
break;
|
|
default:
|
|
if (net_ratelimit())
|
|
printk(KERN_ERR "Attempting to checksum a non-"
|
|
"TCP/UDP packet, dropping a protocol"
|
|
- " %d packet\n", skb->nh.iph->protocol);
|
|
+ " %d packet\n", iph->protocol);
|
|
goto out;
|
|
}
|
|
|
|
- if ((skb->h.raw + skb->csum_offset + sizeof(*csum)) > skb->tail)
|
|
+ if ((th + skb->csum_offset + sizeof(*csum)) > skb_tail_pointer(skb))
|
|
goto out;
|
|
|
|
if (csum) {
|
|
--- head-2011-03-11.orig/lib/swiotlb-xen.c 2011-01-31 17:32:22.000000000 +0100
|
|
+++ head-2011-03-11/lib/swiotlb-xen.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -751,7 +751,6 @@ swiotlb_dma_supported (struct device *hw
|
|
return (mask >= ((1UL << dma_bits) - 1));
|
|
}
|
|
|
|
-EXPORT_SYMBOL(swiotlb_init);
|
|
EXPORT_SYMBOL(swiotlb_map_single);
|
|
EXPORT_SYMBOL(swiotlb_unmap_single);
|
|
EXPORT_SYMBOL(swiotlb_map_sg);
|
|
--- head-2011-03-11.orig/mm/vmalloc.c 2011-03-11 10:41:54.000000000 +0100
|
|
+++ head-2011-03-11/mm/vmalloc.c 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -1485,6 +1485,13 @@ static void *__vmalloc_area_node(struct
|
|
struct page **pages;
|
|
unsigned int nr_pages, array_size, i;
|
|
gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
|
|
+#ifdef CONFIG_XEN
|
|
+ gfp_t dma_mask = gfp_mask & (__GFP_DMA | __GFP_DMA32);
|
|
+
|
|
+ BUILD_BUG_ON((__GFP_DMA | __GFP_DMA32) != (__GFP_DMA + __GFP_DMA32));
|
|
+ if (dma_mask == (__GFP_DMA | __GFP_DMA32))
|
|
+ gfp_mask &= ~(__GFP_DMA | __GFP_DMA32);
|
|
+#endif
|
|
|
|
nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT;
|
|
array_size = (nr_pages * sizeof(struct page *));
|
|
@@ -1520,6 +1527,16 @@ static void *__vmalloc_area_node(struct
|
|
goto fail;
|
|
}
|
|
area->pages[i] = page;
|
|
+#ifdef CONFIG_XEN
|
|
+ if (dma_mask) {
|
|
+ if (xen_limit_pages_to_max_mfn(page, 0, 32)) {
|
|
+ area->nr_pages = i + 1;
|
|
+ goto fail;
|
|
+ }
|
|
+ if (gfp_mask & __GFP_ZERO)
|
|
+ clear_highpage(page);
|
|
+ }
|
|
+#endif
|
|
}
|
|
|
|
if (map_vm_area(area, prot, &pages))
|
|
@@ -1729,6 +1746,8 @@ void *vmalloc_exec(unsigned long size)
|
|
#define GFP_VMALLOC32 GFP_DMA32 | GFP_KERNEL
|
|
#elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA)
|
|
#define GFP_VMALLOC32 GFP_DMA | GFP_KERNEL
|
|
+#elif defined(CONFIG_XEN)
|
|
+#define GFP_VMALLOC32 __GFP_DMA | __GFP_DMA32 | GFP_KERNEL
|
|
#else
|
|
#define GFP_VMALLOC32 GFP_KERNEL
|
|
#endif
|
|
--- head-2011-03-11.orig/scripts/Makefile.xen.awk 2007-08-06 15:10:49.000000000 +0200
|
|
+++ head-2011-03-11/scripts/Makefile.xen.awk 2011-01-31 17:32:29.000000000 +0100
|
|
@@ -13,7 +13,7 @@ BEGIN {
|
|
next
|
|
}
|
|
|
|
-/:[[:space:]]*%\.[cS][[:space:]]/ {
|
|
+/:[[:space:]]*\$\(src\)\/%\.[cS][[:space:]]/ {
|
|
line = gensub(/%.([cS])/, "%-xen.\\1", "g", $0)
|
|
line = gensub(/(single-used-m)/, "xen-\\1", "g", line)
|
|
print line
|