12600 lines
370 KiB
Plaintext
12600 lines
370 KiB
Plaintext
From: www.kernel.org
|
|
Subject: Linux 2.6.19
|
|
Patch-mainline: 2.6.19
|
|
|
|
Automatically created from "patches.kernel.org/patch-2.6.19" by xen-port-patches.py
|
|
|
|
Acked-by: jbeulich@novell.com
|
|
|
|
--- head-2010-05-25.orig/arch/x86/Kconfig 2010-03-24 15:06:08.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/Kconfig 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -528,6 +528,7 @@ config SCHED_OMIT_FRAME_POINTER
|
|
|
|
menuconfig PARAVIRT_GUEST
|
|
bool "Paravirtualized guest support"
|
|
+ depends on !XEN
|
|
---help---
|
|
Say Y here to get to see options related to running Linux under
|
|
various hypervisors. This option alone does not add any kernel code.
|
|
--- head-2010-05-25.orig/arch/x86/kernel/acpi/boot.c 2010-05-25 09:12:09.000000000 +0200
|
|
+++ head-2010-05-25/arch/x86/kernel/acpi/boot.c 2010-04-15 09:52:23.000000000 +0200
|
|
@@ -71,8 +71,12 @@ int acpi_strict;
|
|
|
|
u8 acpi_sci_flags __initdata;
|
|
int acpi_sci_override_gsi __initdata;
|
|
+#ifndef CONFIG_XEN
|
|
int acpi_skip_timer_override __initdata;
|
|
int acpi_use_timer_override __initdata;
|
|
+#else
|
|
+#define acpi_skip_timer_override 0
|
|
+#endif
|
|
|
|
#ifdef CONFIG_X86_LOCAL_APIC
|
|
static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
|
|
@@ -493,6 +497,7 @@ int acpi_register_gsi(struct device *dev
|
|
#ifdef CONFIG_ACPI_HOTPLUG_CPU
|
|
#include <acpi/processor.h>
|
|
|
|
+#ifndef CONFIG_XEN
|
|
static void acpi_map_cpu2node(acpi_handle handle, int cpu, int physid)
|
|
{
|
|
#ifdef CONFIG_ACPI_NUMA
|
|
@@ -584,6 +589,9 @@ free_tmp_map:
|
|
out:
|
|
return retval;
|
|
}
|
|
+#else
|
|
+#define _acpi_map_lsapic(h, p) (-EINVAL)
|
|
+#endif
|
|
|
|
/* wrapper to silence section mismatch warning */
|
|
int __ref acpi_map_lsapic(acpi_handle handle, int *pcpu)
|
|
@@ -594,9 +602,11 @@ EXPORT_SYMBOL(acpi_map_lsapic);
|
|
|
|
int acpi_unmap_lsapic(int cpu)
|
|
{
|
|
+#ifndef CONFIG_XEN
|
|
per_cpu(x86_cpu_to_apicid, cpu) = -1;
|
|
set_cpu_present(cpu, false);
|
|
num_processors--;
|
|
+#endif
|
|
|
|
return (0);
|
|
}
|
|
@@ -1660,7 +1670,7 @@ int __init acpi_mps_check(void)
|
|
return 0;
|
|
}
|
|
|
|
-#ifdef CONFIG_X86_IO_APIC
|
|
+#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_XEN)
|
|
static int __init parse_acpi_skip_timer_override(char *arg)
|
|
{
|
|
acpi_skip_timer_override = 1;
|
|
--- head-2010-05-25.orig/arch/x86/kernel/apic/apic-xen.c 2007-06-12 13:12:48.000000000 +0200
|
|
+++ head-2010-05-25/arch/x86/kernel/apic/apic-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -54,7 +54,6 @@ static cpumask_t timer_bcast_ipi;
|
|
/*
|
|
* Knob to control our willingness to enable the local APIC.
|
|
*/
|
|
-int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
|
|
|
|
/*
|
|
* Debug level
|
|
@@ -102,7 +101,7 @@ int get_physical_broadcast(void)
|
|
|
|
#ifndef CONFIG_XEN
|
|
#ifndef CONFIG_SMP
|
|
-static void up_apic_timer_interrupt_call(struct pt_regs *regs)
|
|
+static void up_apic_timer_interrupt_call(void)
|
|
{
|
|
int cpu = smp_processor_id();
|
|
|
|
@@ -111,11 +110,11 @@ static void up_apic_timer_interrupt_call
|
|
*/
|
|
per_cpu(irq_stat, cpu).apic_timer_irqs++;
|
|
|
|
- smp_local_timer_interrupt(regs);
|
|
+ smp_local_timer_interrupt();
|
|
}
|
|
#endif
|
|
|
|
-void smp_send_timer_broadcast_ipi(struct pt_regs *regs)
|
|
+void smp_send_timer_broadcast_ipi(void)
|
|
{
|
|
cpumask_t mask;
|
|
|
|
@@ -128,7 +127,7 @@ void smp_send_timer_broadcast_ipi(struct
|
|
* We can directly call the apic timer interrupt handler
|
|
* in UP case. Minus all irq related functions
|
|
*/
|
|
- up_apic_timer_interrupt_call(regs);
|
|
+ up_apic_timer_interrupt_call();
|
|
#endif
|
|
}
|
|
}
|
|
--- head-2010-05-25.orig/arch/x86/kernel/cpu/common-xen.c 2009-05-19 09:16:41.000000000 +0200
|
|
+++ head-2010-05-25/arch/x86/kernel/cpu/common-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -43,7 +43,7 @@ struct cpu_dev * cpu_devs[X86_VENDOR_NUM
|
|
|
|
extern int disable_pse;
|
|
|
|
-static void default_init(struct cpuinfo_x86 * c)
|
|
+static void __cpuinit default_init(struct cpuinfo_x86 * c)
|
|
{
|
|
/* Not much we can do here... */
|
|
/* Check if at least it has cpuid */
|
|
@@ -56,7 +56,7 @@ static void default_init(struct cpuinfo_
|
|
}
|
|
}
|
|
|
|
-static struct cpu_dev default_cpu = {
|
|
+static struct cpu_dev __cpuinitdata default_cpu = {
|
|
.c_init = default_init,
|
|
.c_vendor = "Unknown",
|
|
};
|
|
@@ -191,7 +191,16 @@ static void __cpuinit get_cpu_vendor(str
|
|
|
|
static int __init x86_fxsr_setup(char * s)
|
|
{
|
|
+ /* Tell all the other CPU's to not use it... */
|
|
disable_x86_fxsr = 1;
|
|
+
|
|
+ /*
|
|
+ * ... and clear the bits early in the boot_cpu_data
|
|
+ * so that the bootup process doesn't try to do this
|
|
+ * either.
|
|
+ */
|
|
+ clear_bit(X86_FEATURE_FXSR, boot_cpu_data.x86_capability);
|
|
+ clear_bit(X86_FEATURE_XMM, boot_cpu_data.x86_capability);
|
|
return 1;
|
|
}
|
|
__setup("nofxsr", x86_fxsr_setup);
|
|
@@ -272,7 +281,7 @@ static void __init early_cpu_detect(void
|
|
}
|
|
}
|
|
|
|
-void __cpuinit generic_identify(struct cpuinfo_x86 * c)
|
|
+static void __cpuinit generic_identify(struct cpuinfo_x86 * c)
|
|
{
|
|
u32 tfms, xlvl;
|
|
int ebx;
|
|
@@ -700,8 +709,7 @@ old_gdt:
|
|
*/
|
|
atomic_inc(&init_mm.mm_count);
|
|
current->active_mm = &init_mm;
|
|
- if (current->mm)
|
|
- BUG();
|
|
+ BUG_ON(current->mm);
|
|
enter_lazy_tlb(&init_mm, current);
|
|
|
|
load_esp0(t, thread);
|
|
@@ -714,7 +722,7 @@ old_gdt:
|
|
#endif
|
|
|
|
/* Clear %fs and %gs. */
|
|
- asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs");
|
|
+ asm volatile ("movl %0, %%fs; movl %0, %%gs" : : "r" (0));
|
|
|
|
/* Clear all 6 debug registers: */
|
|
set_debugreg(0, 0);
|
|
--- head-2010-05-25.orig/arch/x86/kernel/cpu/mcheck/Makefile 2010-03-24 15:01:37.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/cpu/mcheck/Makefile 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -9,3 +9,5 @@ obj-$(CONFIG_X86_MCE_THRESHOLD) += thres
|
|
obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o
|
|
|
|
obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o
|
|
+
|
|
+disabled-obj-$(CONFIG_XEN) := therm_throt.o
|
|
--- head-2010-05-25.orig/arch/x86/kernel/cpu/mcheck/mce_dom0.c 2009-10-01 11:00:47.000000000 +0200
|
|
+++ head-2010-05-25/arch/x86/kernel/cpu/mcheck/mce_dom0.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -53,8 +53,7 @@ static struct mc_info *g_mi;
|
|
|
|
/*dom0 mce virq handler, logging physical mce error info*/
|
|
|
|
-static irqreturn_t mce_dom0_interrupt(int irq, void *dev_id,
|
|
- struct pt_regs *regs)
|
|
+static irqreturn_t mce_dom0_interrupt(int irq, void *dev_id)
|
|
{
|
|
xen_mc_t mc_op;
|
|
int result = 0;
|
|
@@ -129,6 +128,6 @@ void bind_virq_for_mce(void)
|
|
printk(KERN_ERR "MCE_DOM0_LOG: bind_virq for DOM0 failed\n");
|
|
|
|
/* Log the machine checks left over from the previous reset. */
|
|
- mce_dom0_interrupt(VIRQ_MCA, NULL, NULL);
|
|
+ mce_dom0_interrupt(VIRQ_MCA, NULL);
|
|
}
|
|
|
|
--- head-2010-05-25.orig/arch/x86/kernel/entry_32-xen.S 2009-05-19 09:16:41.000000000 +0200
|
|
+++ head-2010-05-25/arch/x86/kernel/entry_32-xen.S 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -80,8 +80,12 @@ VM_MASK = 0x00020000
|
|
NMI_MASK = 0x80000000
|
|
|
|
#ifndef CONFIG_XEN
|
|
-#define DISABLE_INTERRUPTS cli
|
|
-#define ENABLE_INTERRUPTS sti
|
|
+/* These are replaces for paravirtualization */
|
|
+#define DISABLE_INTERRUPTS cli
|
|
+#define ENABLE_INTERRUPTS sti
|
|
+#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit
|
|
+#define INTERRUPT_RETURN iret
|
|
+#define GET_CR0_INTO_EAX movl %cr0, %eax
|
|
#else
|
|
/* Offsets into shared_info_t. */
|
|
#define evtchn_upcall_pending /* 0 */
|
|
@@ -99,15 +103,29 @@ NMI_MASK = 0x80000000
|
|
|
|
#define __DISABLE_INTERRUPTS movb $1,evtchn_upcall_mask(%esi)
|
|
#define __ENABLE_INTERRUPTS movb $0,evtchn_upcall_mask(%esi)
|
|
+#define __TEST_PENDING testb $0xFF,evtchn_upcall_pending(%esi)
|
|
#define DISABLE_INTERRUPTS GET_VCPU_INFO ; \
|
|
__DISABLE_INTERRUPTS
|
|
#define ENABLE_INTERRUPTS GET_VCPU_INFO ; \
|
|
__ENABLE_INTERRUPTS
|
|
-#define __TEST_PENDING testb $0xFF,evtchn_upcall_pending(%esi)
|
|
+#define ENABLE_INTERRUPTS_SYSEXIT __ENABLE_INTERRUPTS ; \
|
|
+sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/ ; \
|
|
+ __TEST_PENDING ; \
|
|
+ jnz 14f # process more events if necessary... ; \
|
|
+ movl ESI(%esp), %esi ; \
|
|
+ sysexit ; \
|
|
+14: __DISABLE_INTERRUPTS ; \
|
|
+ TRACE_IRQS_OFF ; \
|
|
+sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/ ; \
|
|
+ push %esp ; \
|
|
+ call evtchn_do_upcall ; \
|
|
+ add $4,%esp ; \
|
|
+ jmp ret_from_intr
|
|
+#define INTERRUPT_RETURN iret
|
|
#endif
|
|
|
|
#ifdef CONFIG_PREEMPT
|
|
-#define preempt_stop cli; TRACE_IRQS_OFF
|
|
+#define preempt_stop DISABLE_INTERRUPTS; TRACE_IRQS_OFF
|
|
#else
|
|
#define preempt_stop
|
|
#define resume_kernel restore_nocheck
|
|
@@ -206,18 +224,21 @@ NMI_MASK = 0x80000000
|
|
|
|
#define RING0_INT_FRAME \
|
|
CFI_STARTPROC simple;\
|
|
+ CFI_SIGNAL_FRAME;\
|
|
CFI_DEF_CFA esp, 3*4;\
|
|
/*CFI_OFFSET cs, -2*4;*/\
|
|
CFI_OFFSET eip, -3*4
|
|
|
|
#define RING0_EC_FRAME \
|
|
CFI_STARTPROC simple;\
|
|
+ CFI_SIGNAL_FRAME;\
|
|
CFI_DEF_CFA esp, 4*4;\
|
|
/*CFI_OFFSET cs, -2*4;*/\
|
|
CFI_OFFSET eip, -3*4
|
|
|
|
#define RING0_PTREGS_FRAME \
|
|
CFI_STARTPROC simple;\
|
|
+ CFI_SIGNAL_FRAME;\
|
|
CFI_DEF_CFA esp, OLDESP-EBX;\
|
|
/*CFI_OFFSET cs, CS-OLDESP;*/\
|
|
CFI_OFFSET eip, EIP-OLDESP;\
|
|
@@ -263,8 +284,9 @@ ret_from_intr:
|
|
check_userspace:
|
|
movl EFLAGS(%esp), %eax # mix EFLAGS and CS
|
|
movb CS(%esp), %al
|
|
- testl $(VM_MASK | 2), %eax
|
|
- jz resume_kernel
|
|
+ andl $(VM_MASK | SEGMENT_RPL_MASK), %eax
|
|
+ cmpl $USER_RPL, %eax
|
|
+ jb resume_kernel # not returning to v8086 or userspace
|
|
ENTRY(resume_userspace)
|
|
DISABLE_INTERRUPTS # make sure we don't miss an interrupt
|
|
# setting need_resched or sigpending
|
|
@@ -277,7 +299,7 @@ ENTRY(resume_userspace)
|
|
|
|
#ifdef CONFIG_PREEMPT
|
|
ENTRY(resume_kernel)
|
|
- cli
|
|
+ DISABLE_INTERRUPTS
|
|
cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
|
|
jnz restore_nocheck
|
|
need_resched:
|
|
@@ -297,6 +319,7 @@ need_resched:
|
|
# sysenter call handler stub
|
|
ENTRY(sysenter_entry)
|
|
CFI_STARTPROC simple
|
|
+ CFI_SIGNAL_FRAME
|
|
CFI_DEF_CFA esp, 0
|
|
CFI_REGISTER esp, ebp
|
|
movl SYSENTER_stack_esp0(%esp),%esp
|
|
@@ -305,7 +328,7 @@ sysenter_past_esp:
|
|
* No need to follow this irqs on/off section: the syscall
|
|
* disabled irqs and here we enable it straight after entry:
|
|
*/
|
|
- sti
|
|
+ ENABLE_INTERRUPTS
|
|
pushl $(__USER_DS)
|
|
CFI_ADJUST_CFA_OFFSET 4
|
|
/*CFI_REL_OFFSET ss, 0*/
|
|
@@ -359,26 +382,8 @@ sysenter_past_esp:
|
|
movl EIP(%esp), %edx
|
|
movl OLDESP(%esp), %ecx
|
|
xorl %ebp,%ebp
|
|
-#ifdef CONFIG_XEN
|
|
TRACE_IRQS_ON
|
|
- __ENABLE_INTERRUPTS
|
|
-sysexit_scrit: /**** START OF SYSEXIT CRITICAL REGION ****/
|
|
- __TEST_PENDING
|
|
- jnz 14f # process more events if necessary...
|
|
- movl ESI(%esp), %esi
|
|
- sysexit
|
|
-14: __DISABLE_INTERRUPTS
|
|
- TRACE_IRQS_OFF
|
|
-sysexit_ecrit: /**** END OF SYSEXIT CRITICAL REGION ****/
|
|
- push %esp
|
|
- call evtchn_do_upcall
|
|
- add $4,%esp
|
|
- jmp ret_from_intr
|
|
-#else
|
|
- TRACE_IRQS_ON
|
|
- sti
|
|
- sysexit
|
|
-#endif /* !CONFIG_XEN */
|
|
+ ENABLE_INTERRUPTS_SYSEXIT
|
|
CFI_ENDPROC
|
|
|
|
# pv sysenter call handler stub
|
|
@@ -444,8 +449,8 @@ restore_all:
|
|
# See comments in process.c:copy_thread() for details.
|
|
movb OLDSS(%esp), %ah
|
|
movb CS(%esp), %al
|
|
- andl $(VM_MASK | (4 << 8) | 3), %eax
|
|
- cmpl $((4 << 8) | 3), %eax
|
|
+ andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
|
|
+ cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
|
|
CFI_REMEMBER_STATE
|
|
je ldt_ss # returning to user-space with LDT SS
|
|
restore_nocheck:
|
|
@@ -467,12 +472,11 @@ restore_nocheck_notrace:
|
|
RESTORE_REGS
|
|
addl $4, %esp
|
|
CFI_ADJUST_CFA_OFFSET -4
|
|
-1: iret
|
|
+1: INTERRUPT_RETURN
|
|
.section .fixup,"ax"
|
|
iret_exc:
|
|
#ifndef CONFIG_XEN
|
|
- TRACE_IRQS_ON
|
|
- sti
|
|
+ ENABLE_INTERRUPTS
|
|
#endif
|
|
pushl $0 # no error code
|
|
pushl $do_iret_error
|
|
@@ -498,7 +502,7 @@ ldt_ss:
|
|
* dosemu and wine happy. */
|
|
subl $8, %esp # reserve space for switch16 pointer
|
|
CFI_ADJUST_CFA_OFFSET 8
|
|
- cli
|
|
+ DISABLE_INTERRUPTS
|
|
TRACE_IRQS_OFF
|
|
movl %esp, %eax
|
|
/* Set up the 16bit stack frame with switch32 pointer on top,
|
|
@@ -508,7 +512,7 @@ ldt_ss:
|
|
TRACE_IRQS_IRET
|
|
RESTORE_REGS
|
|
lss 20+4(%esp), %esp # switch to 16bit stack
|
|
-1: iret
|
|
+1: INTERRUPT_RETURN
|
|
.section __ex_table,"a"
|
|
.align 4
|
|
.long 1b,iret_exc
|
|
@@ -524,7 +528,7 @@ scrit: /**** START OF CRITICAL REGION **
|
|
RESTORE_REGS
|
|
addl $4, %esp
|
|
CFI_ADJUST_CFA_OFFSET -4
|
|
-1: iret
|
|
+1: INTERRUPT_RETURN
|
|
.section __ex_table,"a"
|
|
.align 4
|
|
.long 1b,iret_exc
|
|
@@ -713,11 +717,9 @@ ENTRY(name) \
|
|
#define UNWIND_ESPFIX_STACK
|
|
#endif
|
|
|
|
-ENTRY(divide_error)
|
|
- RING0_INT_FRAME
|
|
- pushl $0 # no error code
|
|
- CFI_ADJUST_CFA_OFFSET 4
|
|
- pushl $do_divide_error
|
|
+KPROBE_ENTRY(page_fault)
|
|
+ RING0_EC_FRAME
|
|
+ pushl $do_page_fault
|
|
CFI_ADJUST_CFA_OFFSET 4
|
|
ALIGN
|
|
error_code:
|
|
@@ -767,6 +769,7 @@ error_code:
|
|
call *%edi
|
|
jmp ret_from_exception
|
|
CFI_ENDPROC
|
|
+KPROBE_END(page_fault)
|
|
|
|
#ifdef CONFIG_XEN
|
|
# A note on the "critical region" in our callback handler.
|
|
@@ -790,9 +793,11 @@ ENTRY(hypervisor_callback)
|
|
pushl %eax
|
|
CFI_ADJUST_CFA_OFFSET 4
|
|
SAVE_ALL
|
|
- testb $2,CS(%esp)
|
|
+ movl CS(%esp),%ecx
|
|
movl EIP(%esp),%eax
|
|
- jnz .Ldo_upcall
|
|
+ andl $SEGMENT_RPL_MASK,%ecx
|
|
+ cmpl $USER_RPL,%ecx
|
|
+ jae .Ldo_upcall
|
|
cmpl $scrit,%eax
|
|
jb 0f
|
|
cmpl $ecrit,%eax
|
|
@@ -928,7 +933,7 @@ ENTRY(device_not_available)
|
|
CFI_ADJUST_CFA_OFFSET 4
|
|
SAVE_ALL
|
|
#ifndef CONFIG_XEN
|
|
- movl %cr0, %eax
|
|
+ GET_CR0_INTO_EAX
|
|
testl $0x4, %eax # EM (math emulation bit)
|
|
je device_available_emulate
|
|
pushl $0 # temporary storage for ORIG_EIP
|
|
@@ -963,9 +968,15 @@ device_available_emulate:
|
|
jne ok; \
|
|
label: \
|
|
movl SYSENTER_stack_esp0+offset(%esp),%esp; \
|
|
+ CFI_DEF_CFA esp, 0; \
|
|
+ CFI_UNDEFINED eip; \
|
|
pushfl; \
|
|
+ CFI_ADJUST_CFA_OFFSET 4; \
|
|
pushl $__KERNEL_CS; \
|
|
- pushl $sysenter_past_esp
|
|
+ CFI_ADJUST_CFA_OFFSET 4; \
|
|
+ pushl $sysenter_past_esp; \
|
|
+ CFI_ADJUST_CFA_OFFSET 4; \
|
|
+ CFI_REL_OFFSET eip, 0
|
|
#endif /* CONFIG_XEN */
|
|
|
|
KPROBE_ENTRY(debug)
|
|
@@ -984,7 +995,8 @@ debug_stack_correct:
|
|
call do_debug
|
|
jmp ret_from_exception
|
|
CFI_ENDPROC
|
|
- .previous .text
|
|
+KPROBE_END(debug)
|
|
+
|
|
#ifndef CONFIG_XEN
|
|
/*
|
|
* NMI is doubly nasty. It can happen _while_ we're handling
|
|
@@ -994,7 +1006,7 @@ debug_stack_correct:
|
|
* check whether we got an NMI on the debug path where the debug
|
|
* fault happened on the sysenter path.
|
|
*/
|
|
-ENTRY(nmi)
|
|
+KPROBE_ENTRY(nmi)
|
|
RING0_INT_FRAME
|
|
pushl %eax
|
|
CFI_ADJUST_CFA_OFFSET 4
|
|
@@ -1019,6 +1031,7 @@ ENTRY(nmi)
|
|
cmpl $sysenter_entry,12(%esp)
|
|
je nmi_debug_stack_check
|
|
nmi_stack_correct:
|
|
+ /* We have a RING0_INT_FRAME here */
|
|
pushl %eax
|
|
CFI_ADJUST_CFA_OFFSET 4
|
|
SAVE_ALL
|
|
@@ -1029,9 +1042,12 @@ nmi_stack_correct:
|
|
CFI_ENDPROC
|
|
|
|
nmi_stack_fixup:
|
|
+ RING0_INT_FRAME
|
|
FIX_STACK(12,nmi_stack_correct, 1)
|
|
jmp nmi_stack_correct
|
|
+
|
|
nmi_debug_stack_check:
|
|
+ /* We have a RING0_INT_FRAME here */
|
|
cmpw $__KERNEL_CS,16(%esp)
|
|
jne nmi_stack_correct
|
|
cmpl $debug,(%esp)
|
|
@@ -1042,8 +1058,10 @@ nmi_debug_stack_check:
|
|
jmp nmi_stack_correct
|
|
|
|
nmi_16bit_stack:
|
|
- RING0_INT_FRAME
|
|
- /* create the pointer to lss back */
|
|
+ /* We have a RING0_INT_FRAME here.
|
|
+ *
|
|
+ * create the pointer to lss back
|
|
+ */
|
|
pushl %ss
|
|
CFI_ADJUST_CFA_OFFSET 4
|
|
pushl %esp
|
|
@@ -1064,14 +1082,14 @@ nmi_16bit_stack:
|
|
call do_nmi
|
|
RESTORE_REGS
|
|
lss 12+4(%esp), %esp # back to 16bit stack
|
|
-1: iret
|
|
+1: INTERRUPT_RETURN
|
|
CFI_ENDPROC
|
|
.section __ex_table,"a"
|
|
.align 4
|
|
.long 1b,iret_exc
|
|
.previous
|
|
#else
|
|
-ENTRY(nmi)
|
|
+KPROBE_ENTRY(nmi)
|
|
RING0_INT_FRAME
|
|
pushl %eax
|
|
CFI_ADJUST_CFA_OFFSET 4
|
|
@@ -1083,6 +1101,7 @@ ENTRY(nmi)
|
|
jmp restore_all
|
|
CFI_ENDPROC
|
|
#endif
|
|
+KPROBE_END(nmi)
|
|
|
|
KPROBE_ENTRY(int3)
|
|
RING0_INT_FRAME
|
|
@@ -1094,7 +1113,7 @@ KPROBE_ENTRY(int3)
|
|
call do_int3
|
|
jmp ret_from_exception
|
|
CFI_ENDPROC
|
|
- .previous .text
|
|
+KPROBE_END(int3)
|
|
|
|
ENTRY(overflow)
|
|
RING0_INT_FRAME
|
|
@@ -1159,7 +1178,7 @@ KPROBE_ENTRY(general_protection)
|
|
CFI_ADJUST_CFA_OFFSET 4
|
|
jmp error_code
|
|
CFI_ENDPROC
|
|
- .previous .text
|
|
+KPROBE_END(general_protection)
|
|
|
|
ENTRY(alignment_check)
|
|
RING0_EC_FRAME
|
|
@@ -1168,13 +1187,14 @@ ENTRY(alignment_check)
|
|
jmp error_code
|
|
CFI_ENDPROC
|
|
|
|
-KPROBE_ENTRY(page_fault)
|
|
- RING0_EC_FRAME
|
|
- pushl $do_page_fault
|
|
+ENTRY(divide_error)
|
|
+ RING0_INT_FRAME
|
|
+ pushl $0 # no error code
|
|
+ CFI_ADJUST_CFA_OFFSET 4
|
|
+ pushl $do_divide_error
|
|
CFI_ADJUST_CFA_OFFSET 4
|
|
jmp error_code
|
|
CFI_ENDPROC
|
|
- .previous .text
|
|
|
|
#ifdef CONFIG_X86_MCE
|
|
ENTRY(machine_check)
|
|
@@ -1236,6 +1256,19 @@ ENTRY(fixup_4gb_segment)
|
|
jmp error_code
|
|
CFI_ENDPROC
|
|
|
|
+ENTRY(kernel_thread_helper)
|
|
+ pushl $0 # fake return address for unwinder
|
|
+ CFI_STARTPROC
|
|
+ movl %edx,%eax
|
|
+ push %edx
|
|
+ CFI_ADJUST_CFA_OFFSET 4
|
|
+ call *%ebx
|
|
+ push %eax
|
|
+ CFI_ADJUST_CFA_OFFSET 4
|
|
+ call do_exit
|
|
+ CFI_ENDPROC
|
|
+ENDPROC(kernel_thread_helper)
|
|
+
|
|
.section .rodata,"a"
|
|
#include "syscall_table.S"
|
|
|
|
--- head-2010-05-25.orig/arch/x86/kernel/head_32-xen.S 2007-06-12 13:12:48.000000000 +0200
|
|
+++ head-2010-05-25/arch/x86/kernel/head_32-xen.S 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -62,7 +62,7 @@ ENTRY(startup_32)
|
|
movl %eax,%gs
|
|
cld # gcc2 wants the direction flag cleared at all times
|
|
|
|
- pushl %eax # fake return address
|
|
+ pushl $0 # fake return address for unwinder
|
|
jmp start_kernel
|
|
|
|
#define HYPERCALL_PAGE_OFFSET 0x1000
|
|
--- head-2010-05-25.orig/arch/x86/kernel/io_apic_32-xen.c 2009-03-18 10:39:31.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/io_apic_32-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -31,6 +31,9 @@
|
|
#include <linux/acpi.h>
|
|
#include <linux/module.h>
|
|
#include <linux/sysdev.h>
|
|
+#include <linux/pci.h>
|
|
+#include <linux/msi.h>
|
|
+#include <linux/htirq.h>
|
|
|
|
#include <asm/io.h>
|
|
#include <asm/smp.h>
|
|
@@ -38,13 +41,15 @@
|
|
#include <asm/timer.h>
|
|
#include <asm/i8259.h>
|
|
#include <asm/nmi.h>
|
|
+#include <asm/msidef.h>
|
|
+#include <asm/hypertransport.h>
|
|
|
|
#include <mach_apic.h>
|
|
+#include <mach_apicdef.h>
|
|
|
|
#include "io_ports.h"
|
|
|
|
#ifdef CONFIG_XEN
|
|
-
|
|
#include <xen/interface/xen.h>
|
|
#include <xen/interface/physdev.h>
|
|
#include <xen/evtchn.h>
|
|
@@ -56,32 +61,7 @@
|
|
|
|
unsigned long io_apic_irqs;
|
|
|
|
-static inline unsigned int xen_io_apic_read(unsigned int apic, unsigned int reg)
|
|
-{
|
|
- struct physdev_apic apic_op;
|
|
- int ret;
|
|
-
|
|
- apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
|
|
- apic_op.reg = reg;
|
|
- ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op);
|
|
- if (ret)
|
|
- return ret;
|
|
- return apic_op.value;
|
|
-}
|
|
-
|
|
-static inline void xen_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
|
|
-{
|
|
- struct physdev_apic apic_op;
|
|
-
|
|
- apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
|
|
- apic_op.reg = reg;
|
|
- apic_op.value = value;
|
|
- WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op));
|
|
-}
|
|
-
|
|
-#define io_apic_read(a,r) xen_io_apic_read(a,r)
|
|
-#define io_apic_write(a,r,v) xen_io_apic_write(a,r,v)
|
|
-
|
|
+#define clear_IO_APIC() ((void)0)
|
|
#endif /* CONFIG_XEN */
|
|
|
|
int (*ioapic_renumber_irq)(int ioapic, int irq);
|
|
@@ -108,7 +88,7 @@ int sis_apic_bug = -1;
|
|
*/
|
|
int nr_ioapic_registers[MAX_IO_APICS];
|
|
|
|
-int disable_timer_pin_1 __initdata;
|
|
+static int disable_timer_pin_1 __initdata;
|
|
|
|
/*
|
|
* Rough estimation of how many shared IRQs there are, can
|
|
@@ -128,12 +108,124 @@ static struct irq_pin_list {
|
|
int apic, pin, next;
|
|
} irq_2_pin[PIN_MAP_SIZE];
|
|
|
|
-int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
|
|
-#ifdef CONFIG_PCI_MSI
|
|
-#define vector_to_irq(vector) \
|
|
- (platform_legacy_irq(vector) ? vector : vector_irq[vector])
|
|
+#ifndef CONFIG_XEN
|
|
+struct io_apic {
|
|
+ unsigned int index;
|
|
+ unsigned int unused[3];
|
|
+ unsigned int data;
|
|
+};
|
|
+
|
|
+static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
|
|
+{
|
|
+ return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
|
|
+ + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK);
|
|
+}
|
|
+#endif
|
|
+
|
|
+static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
|
|
+{
|
|
+#ifndef CONFIG_XEN
|
|
+ struct io_apic __iomem *io_apic = io_apic_base(apic);
|
|
+ writel(reg, &io_apic->index);
|
|
+ return readl(&io_apic->data);
|
|
+#else
|
|
+ struct physdev_apic apic_op;
|
|
+ int ret;
|
|
+
|
|
+ apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
|
|
+ apic_op.reg = reg;
|
|
+ ret = HYPERVISOR_physdev_op(PHYSDEVOP_apic_read, &apic_op);
|
|
+ if (ret)
|
|
+ return ret;
|
|
+ return apic_op.value;
|
|
+#endif
|
|
+}
|
|
+
|
|
+static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
|
|
+{
|
|
+#ifndef CONFIG_XEN
|
|
+ struct io_apic __iomem *io_apic = io_apic_base(apic);
|
|
+ writel(reg, &io_apic->index);
|
|
+ writel(value, &io_apic->data);
|
|
+#else
|
|
+ struct physdev_apic apic_op;
|
|
+
|
|
+ apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
|
|
+ apic_op.reg = reg;
|
|
+ apic_op.value = value;
|
|
+ WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op));
|
|
+#endif
|
|
+}
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+/*
|
|
+ * Re-write a value: to be used for read-modify-write
|
|
+ * cycles where the read already set up the index register.
|
|
+ *
|
|
+ * Older SiS APIC requires we rewrite the index register
|
|
+ */
|
|
+static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
|
|
+{
|
|
+ volatile struct io_apic *io_apic = io_apic_base(apic);
|
|
+ if (sis_apic_bug)
|
|
+ writel(reg, &io_apic->index);
|
|
+ writel(value, &io_apic->data);
|
|
+}
|
|
#else
|
|
-#define vector_to_irq(vector) (vector)
|
|
+#define io_apic_modify io_apic_write
|
|
+#endif
|
|
+
|
|
+union entry_union {
|
|
+ struct { u32 w1, w2; };
|
|
+ struct IO_APIC_route_entry entry;
|
|
+};
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
|
|
+{
|
|
+ union entry_union eu;
|
|
+ unsigned long flags;
|
|
+ spin_lock_irqsave(&ioapic_lock, flags);
|
|
+ eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
|
|
+ eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
|
|
+ spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+ return eu.entry;
|
|
+}
|
|
+#endif
|
|
+
|
|
+/*
|
|
+ * When we write a new IO APIC routing entry, we need to write the high
|
|
+ * word first! If the mask bit in the low word is clear, we will enable
|
|
+ * the interrupt, and we need to make sure the entry is fully populated
|
|
+ * before that happens.
|
|
+ */
|
|
+static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
|
|
+{
|
|
+ unsigned long flags;
|
|
+ union entry_union eu;
|
|
+ eu.entry = e;
|
|
+ spin_lock_irqsave(&ioapic_lock, flags);
|
|
+ io_apic_write(apic, 0x11 + 2*pin, eu.w2);
|
|
+ io_apic_write(apic, 0x10 + 2*pin, eu.w1);
|
|
+ spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+}
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+/*
|
|
+ * When we mask an IO APIC routing entry, we need to write the low
|
|
+ * word first, in order to set the mask bit before we change the
|
|
+ * high bits!
|
|
+ */
|
|
+static void ioapic_mask_entry(int apic, int pin)
|
|
+{
|
|
+ unsigned long flags;
|
|
+ union entry_union eu = { .entry.mask = 1 };
|
|
+
|
|
+ spin_lock_irqsave(&ioapic_lock, flags);
|
|
+ io_apic_write(apic, 0x10 + 2*pin, eu.w1);
|
|
+ io_apic_write(apic, 0x11 + 2*pin, eu.w2);
|
|
+ spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+}
|
|
#endif
|
|
|
|
/*
|
|
@@ -159,9 +251,7 @@ static void add_pin_to_irq(unsigned int
|
|
entry->pin = pin;
|
|
}
|
|
|
|
-#ifdef CONFIG_XEN
|
|
-#define clear_IO_APIC() ((void)0)
|
|
-#else
|
|
+#ifndef CONFIG_XEN
|
|
/*
|
|
* Reroute an IRQ to a different pin.
|
|
*/
|
|
@@ -246,25 +336,16 @@ static void unmask_IO_APIC_irq (unsigned
|
|
static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
|
|
{
|
|
struct IO_APIC_route_entry entry;
|
|
- unsigned long flags;
|
|
|
|
/* Check delivery_mode to be sure we're not clearing an SMI pin */
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
|
|
- *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+ entry = ioapic_read_entry(apic, pin);
|
|
if (entry.delivery_mode == dest_SMI)
|
|
return;
|
|
|
|
/*
|
|
* Disable it in the IO-APIC irq-routing table:
|
|
*/
|
|
- memset(&entry, 0, sizeof(entry));
|
|
- entry.mask = 1;
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
|
|
- io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+ ioapic_mask_entry(apic, pin);
|
|
}
|
|
|
|
static void clear_IO_APIC (void)
|
|
@@ -304,7 +385,7 @@ static void set_ioapic_affinity_irq(unsi
|
|
break;
|
|
entry = irq_2_pin + entry->next;
|
|
}
|
|
- set_irq_info(irq, cpumask);
|
|
+ set_native_irq_info(irq, cpumask);
|
|
spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
}
|
|
|
|
@@ -1212,43 +1293,43 @@ static inline int IO_APIC_irq_trigger(in
|
|
/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
|
|
u8 irq_vector[NR_IRQ_VECTORS] __read_mostly; /* = { FIRST_DEVICE_VECTOR , 0 }; */
|
|
|
|
-int assign_irq_vector(int irq)
|
|
+static int __assign_irq_vector(int irq)
|
|
{
|
|
- unsigned long flags;
|
|
int vector;
|
|
struct physdev_irq irq_op;
|
|
|
|
- BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
|
|
+ BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
|
|
|
|
if (irq < PIRQ_BASE || irq - PIRQ_BASE >= NR_PIRQS)
|
|
return -EINVAL;
|
|
|
|
- spin_lock_irqsave(&vector_lock, flags);
|
|
-
|
|
- if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) {
|
|
- spin_unlock_irqrestore(&vector_lock, flags);
|
|
- return IO_APIC_VECTOR(irq);
|
|
- }
|
|
+ if (irq_vector[irq] > 0)
|
|
+ return irq_vector[irq];
|
|
|
|
irq_op.irq = irq;
|
|
- if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
|
|
- spin_unlock_irqrestore(&vector_lock, flags);
|
|
+ if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op))
|
|
return -ENOSPC;
|
|
- }
|
|
|
|
vector = irq_op.vector;
|
|
- vector_irq[vector] = irq;
|
|
- if (irq != AUTO_ASSIGN)
|
|
- IO_APIC_VECTOR(irq) = vector;
|
|
+ irq_vector[irq] = vector;
|
|
+
|
|
+ return vector;
|
|
+}
|
|
+
|
|
+static int assign_irq_vector(int irq)
|
|
+{
|
|
+ unsigned long flags;
|
|
+ int vector;
|
|
|
|
+ spin_lock_irqsave(&vector_lock, flags);
|
|
+ vector = __assign_irq_vector(irq);
|
|
spin_unlock_irqrestore(&vector_lock, flags);
|
|
|
|
return vector;
|
|
}
|
|
|
|
#ifndef CONFIG_XEN
|
|
-static struct hw_interrupt_type ioapic_level_type;
|
|
-static struct hw_interrupt_type ioapic_edge_type;
|
|
+static struct irq_chip ioapic_chip;
|
|
|
|
#define IOAPIC_AUTO -1
|
|
#define IOAPIC_EDGE 0
|
|
@@ -1256,16 +1337,16 @@ static struct hw_interrupt_type ioapic_e
|
|
|
|
static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
|
|
{
|
|
- unsigned idx;
|
|
-
|
|
- idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq;
|
|
-
|
|
if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
|
|
trigger == IOAPIC_LEVEL)
|
|
- irq_desc[idx].chip = &ioapic_level_type;
|
|
- else
|
|
- irq_desc[idx].chip = &ioapic_edge_type;
|
|
- set_intr_gate(vector, interrupt[idx]);
|
|
+ set_irq_chip_and_handler_name(irq, &ioapic_chip,
|
|
+ handle_fasteoi_irq, "fasteoi");
|
|
+ else {
|
|
+ irq_desc[irq].status |= IRQ_DELAYED_DISABLE;
|
|
+ set_irq_chip_and_handler_name(irq, &ioapic_chip,
|
|
+ handle_edge_irq, "edge");
|
|
+ }
|
|
+ set_intr_gate(vector, interrupt[irq]);
|
|
}
|
|
#else
|
|
#define ioapic_register_intr(irq, vector, trigger) evtchn_register_pirq(irq)
|
|
@@ -1336,9 +1417,8 @@ static void __init setup_IO_APIC_irqs(vo
|
|
if (!apic && (irq < 16))
|
|
disable_8259A_irq(irq);
|
|
}
|
|
+ ioapic_write_entry(apic, pin, entry);
|
|
spin_lock_irqsave(&ioapic_lock, flags);
|
|
- io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
|
|
- io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
|
|
set_native_irq_info(irq, TARGET_CPUS);
|
|
spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
}
|
|
@@ -1355,7 +1435,6 @@ static void __init setup_IO_APIC_irqs(vo
|
|
static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector)
|
|
{
|
|
struct IO_APIC_route_entry entry;
|
|
- unsigned long flags;
|
|
|
|
memset(&entry,0,sizeof(entry));
|
|
|
|
@@ -1380,15 +1459,13 @@ static void __init setup_ExtINT_IRQ0_pin
|
|
* The timer IRQ doesn't have to know that behind the
|
|
* scene we have a 8259A-master in AEOI mode ...
|
|
*/
|
|
- irq_desc[0].chip = &ioapic_edge_type;
|
|
+ irq_desc[0].chip = &ioapic_chip;
|
|
+ set_irq_handler(0, handle_edge_irq);
|
|
|
|
/*
|
|
* Add it to the IO-APIC irq-routing table:
|
|
*/
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
|
|
- io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+ ioapic_write_entry(apic, pin, entry);
|
|
|
|
enable_8259A_irq(0);
|
|
}
|
|
@@ -1498,10 +1575,7 @@ void __init print_IO_APIC(void)
|
|
for (i = 0; i <= reg_01.bits.entries; i++) {
|
|
struct IO_APIC_route_entry entry;
|
|
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
|
|
- *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+ entry = ioapic_read_entry(apic, i);
|
|
|
|
printk(KERN_DEBUG " %02x %03X %02X ",
|
|
i,
|
|
@@ -1521,17 +1595,12 @@ void __init print_IO_APIC(void)
|
|
);
|
|
}
|
|
}
|
|
- if (use_pci_vector())
|
|
- printk(KERN_INFO "Using vector-based indexing\n");
|
|
printk(KERN_DEBUG "IRQ to pin mappings:\n");
|
|
for (i = 0; i < NR_IRQS; i++) {
|
|
struct irq_pin_list *entry = irq_2_pin + i;
|
|
if (entry->pin < 0)
|
|
continue;
|
|
- if (use_pci_vector() && !platform_legacy_irq(i))
|
|
- printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i));
|
|
- else
|
|
- printk(KERN_DEBUG "IRQ%d ", i);
|
|
+ printk(KERN_DEBUG "IRQ%d ", i);
|
|
for (;;) {
|
|
printk("-> %d:%d", entry->apic, entry->pin);
|
|
if (!entry->next)
|
|
@@ -1720,10 +1789,7 @@ static void __init enable_IO_APIC(void)
|
|
/* See if any of the pins is in ExtINT mode */
|
|
for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
|
|
struct IO_APIC_route_entry entry;
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
|
|
- *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+ entry = ioapic_read_entry(apic, pin);
|
|
|
|
|
|
/* If the interrupt line is enabled and in ExtInt mode
|
|
@@ -1782,7 +1848,6 @@ void disable_IO_APIC(void)
|
|
*/
|
|
if (ioapic_i8259.pin != -1) {
|
|
struct IO_APIC_route_entry entry;
|
|
- unsigned long flags;
|
|
|
|
memset(&entry, 0, sizeof(entry));
|
|
entry.mask = 0; /* Enabled */
|
|
@@ -1799,12 +1864,7 @@ void disable_IO_APIC(void)
|
|
/*
|
|
* Add it to the IO-APIC irq-routing table:
|
|
*/
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin,
|
|
- *(((int *)&entry)+1));
|
|
- io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin,
|
|
- *(((int *)&entry)+0));
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+ ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
|
|
}
|
|
disconnect_bsp_APIC(ioapic_i8259.pin != -1);
|
|
#endif
|
|
@@ -1971,6 +2031,8 @@ static int __init timer_irq_works(void)
|
|
*/
|
|
|
|
/*
|
|
+ * Startup quirk:
|
|
+ *
|
|
* Starting up a edge-triggered IO-APIC interrupt is
|
|
* nasty - we need to make sure that we get the edge.
|
|
* If it is already asserted for some reason, we need
|
|
@@ -1978,8 +2040,10 @@ static int __init timer_irq_works(void)
|
|
*
|
|
* This is not complete - we should be able to fake
|
|
* an edge even if it isn't on the 8259A...
|
|
+ *
|
|
+ * (We do this for level-triggered IRQs too - it cannot hurt.)
|
|
*/
|
|
-static unsigned int startup_edge_ioapic_irq(unsigned int irq)
|
|
+static unsigned int startup_ioapic_irq(unsigned int irq)
|
|
{
|
|
int was_pending = 0;
|
|
unsigned long flags;
|
|
@@ -1996,47 +2060,18 @@ static unsigned int startup_edge_ioapic_
|
|
return was_pending;
|
|
}
|
|
|
|
-/*
|
|
- * Once we have recorded IRQ_PENDING already, we can mask the
|
|
- * interrupt for real. This prevents IRQ storms from unhandled
|
|
- * devices.
|
|
- */
|
|
-static void ack_edge_ioapic_irq(unsigned int irq)
|
|
-{
|
|
- move_irq(irq);
|
|
- if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
|
|
- == (IRQ_PENDING | IRQ_DISABLED))
|
|
- mask_IO_APIC_irq(irq);
|
|
- ack_APIC_irq();
|
|
-}
|
|
-
|
|
-/*
|
|
- * Level triggered interrupts can just be masked,
|
|
- * and shutting down and starting up the interrupt
|
|
- * is the same as enabling and disabling them -- except
|
|
- * with a startup need to return a "was pending" value.
|
|
- *
|
|
- * Level triggered interrupts are special because we
|
|
- * do not touch any IO-APIC register while handling
|
|
- * them. We ack the APIC in the end-IRQ handler, not
|
|
- * in the start-IRQ-handler. Protection against reentrance
|
|
- * from the same interrupt is still provided, both by the
|
|
- * generic IRQ layer and by the fact that an unacked local
|
|
- * APIC does not accept IRQs.
|
|
- */
|
|
-static unsigned int startup_level_ioapic_irq (unsigned int irq)
|
|
+static void ack_ioapic_irq(unsigned int irq)
|
|
{
|
|
- unmask_IO_APIC_irq(irq);
|
|
-
|
|
- return 0; /* don't check for pending */
|
|
+ move_native_irq(irq);
|
|
+ ack_APIC_irq();
|
|
}
|
|
|
|
-static void end_level_ioapic_irq (unsigned int irq)
|
|
+static void ack_ioapic_quirk_irq(unsigned int irq)
|
|
{
|
|
unsigned long v;
|
|
int i;
|
|
|
|
- move_irq(irq);
|
|
+ move_native_irq(irq);
|
|
/*
|
|
* It appears there is an erratum which affects at least version 0x11
|
|
* of I/O APIC (that's the 82093AA and cores integrated into various
|
|
@@ -2056,7 +2091,7 @@ static void end_level_ioapic_irq (unsign
|
|
* operation to prevent an edge-triggered interrupt escaping meanwhile.
|
|
* The idea is from Manfred Spraul. --macro
|
|
*/
|
|
- i = IO_APIC_VECTOR(irq);
|
|
+ i = irq_vector[irq];
|
|
|
|
v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
|
|
|
|
@@ -2071,104 +2106,24 @@ static void end_level_ioapic_irq (unsign
|
|
}
|
|
}
|
|
|
|
-#ifdef CONFIG_PCI_MSI
|
|
-static unsigned int startup_edge_ioapic_vector(unsigned int vector)
|
|
-{
|
|
- int irq = vector_to_irq(vector);
|
|
-
|
|
- return startup_edge_ioapic_irq(irq);
|
|
-}
|
|
-
|
|
-static void ack_edge_ioapic_vector(unsigned int vector)
|
|
-{
|
|
- int irq = vector_to_irq(vector);
|
|
-
|
|
- move_native_irq(vector);
|
|
- ack_edge_ioapic_irq(irq);
|
|
-}
|
|
-
|
|
-static unsigned int startup_level_ioapic_vector (unsigned int vector)
|
|
-{
|
|
- int irq = vector_to_irq(vector);
|
|
-
|
|
- return startup_level_ioapic_irq (irq);
|
|
-}
|
|
-
|
|
-static void end_level_ioapic_vector (unsigned int vector)
|
|
-{
|
|
- int irq = vector_to_irq(vector);
|
|
-
|
|
- move_native_irq(vector);
|
|
- end_level_ioapic_irq(irq);
|
|
-}
|
|
-
|
|
-static void mask_IO_APIC_vector (unsigned int vector)
|
|
-{
|
|
- int irq = vector_to_irq(vector);
|
|
-
|
|
- mask_IO_APIC_irq(irq);
|
|
-}
|
|
-
|
|
-static void unmask_IO_APIC_vector (unsigned int vector)
|
|
+static int ioapic_retrigger_irq(unsigned int irq)
|
|
{
|
|
- int irq = vector_to_irq(vector);
|
|
-
|
|
- unmask_IO_APIC_irq(irq);
|
|
-}
|
|
-
|
|
-#ifdef CONFIG_SMP
|
|
-static void set_ioapic_affinity_vector (unsigned int vector,
|
|
- cpumask_t cpu_mask)
|
|
-{
|
|
- int irq = vector_to_irq(vector);
|
|
-
|
|
- set_native_irq_info(vector, cpu_mask);
|
|
- set_ioapic_affinity_irq(irq, cpu_mask);
|
|
-}
|
|
-#endif
|
|
-#endif
|
|
-
|
|
-static int ioapic_retrigger(unsigned int irq)
|
|
-{
|
|
- send_IPI_self(IO_APIC_VECTOR(irq));
|
|
+ send_IPI_self(irq_vector[irq]);
|
|
|
|
return 1;
|
|
}
|
|
|
|
-/*
|
|
- * Level and edge triggered IO-APIC interrupts need different handling,
|
|
- * so we use two separate IRQ descriptors. Edge triggered IRQs can be
|
|
- * handled with the level-triggered descriptor, but that one has slightly
|
|
- * more overhead. Level-triggered interrupts cannot be handled with the
|
|
- * edge-triggered handler, without risking IRQ storms and other ugly
|
|
- * races.
|
|
- */
|
|
-static struct hw_interrupt_type ioapic_edge_type __read_mostly = {
|
|
- .typename = "IO-APIC-edge",
|
|
- .startup = startup_edge_ioapic,
|
|
- .shutdown = shutdown_edge_ioapic,
|
|
- .enable = enable_edge_ioapic,
|
|
- .disable = disable_edge_ioapic,
|
|
- .ack = ack_edge_ioapic,
|
|
- .end = end_edge_ioapic,
|
|
-#ifdef CONFIG_SMP
|
|
- .set_affinity = set_ioapic_affinity,
|
|
-#endif
|
|
- .retrigger = ioapic_retrigger,
|
|
-};
|
|
-
|
|
-static struct hw_interrupt_type ioapic_level_type __read_mostly = {
|
|
- .typename = "IO-APIC-level",
|
|
- .startup = startup_level_ioapic,
|
|
- .shutdown = shutdown_level_ioapic,
|
|
- .enable = enable_level_ioapic,
|
|
- .disable = disable_level_ioapic,
|
|
- .ack = mask_and_ack_level_ioapic,
|
|
- .end = end_level_ioapic,
|
|
+static struct irq_chip ioapic_chip __read_mostly = {
|
|
+ .name = "IO-APIC",
|
|
+ .startup = startup_ioapic_irq,
|
|
+ .mask = mask_IO_APIC_irq,
|
|
+ .unmask = unmask_IO_APIC_irq,
|
|
+ .ack = ack_ioapic_irq,
|
|
+ .eoi = ack_ioapic_quirk_irq,
|
|
#ifdef CONFIG_SMP
|
|
- .set_affinity = set_ioapic_affinity,
|
|
+ .set_affinity = set_ioapic_affinity_irq,
|
|
#endif
|
|
- .retrigger = ioapic_retrigger,
|
|
+ .retrigger = ioapic_retrigger_irq,
|
|
};
|
|
#endif /* !CONFIG_XEN */
|
|
|
|
@@ -2189,12 +2144,7 @@ static inline void init_IO_APIC_traps(vo
|
|
*/
|
|
for (irq = 0; irq < NR_IRQS ; irq++) {
|
|
int tmp = irq;
|
|
- if (use_pci_vector()) {
|
|
- if (!platform_legacy_irq(tmp))
|
|
- if ((tmp = vector_to_irq(tmp)) == -1)
|
|
- continue;
|
|
- }
|
|
- if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) {
|
|
+ if (IO_APIC_IRQ(tmp) && !irq_vector[tmp]) {
|
|
/*
|
|
* Hmm.. We don't have an entry for this,
|
|
* so default to an old-fashioned 8259
|
|
@@ -2205,22 +2155,23 @@ static inline void init_IO_APIC_traps(vo
|
|
#ifndef CONFIG_XEN
|
|
else
|
|
/* Strange. Oh, well.. */
|
|
- irq_desc[irq].chip = &no_irq_type;
|
|
+ irq_desc[irq].chip = &no_irq_chip;
|
|
#endif
|
|
}
|
|
}
|
|
}
|
|
|
|
#ifndef CONFIG_XEN
|
|
-static void enable_lapic_irq (unsigned int irq)
|
|
-{
|
|
- unsigned long v;
|
|
+/*
|
|
+ * The local APIC irq-chip implementation:
|
|
+ */
|
|
|
|
- v = apic_read(APIC_LVT0);
|
|
- apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
|
|
+static void ack_apic(unsigned int irq)
|
|
+{
|
|
+ ack_APIC_irq();
|
|
}
|
|
|
|
-static void disable_lapic_irq (unsigned int irq)
|
|
+static void mask_lapic_irq (unsigned int irq)
|
|
{
|
|
unsigned long v;
|
|
|
|
@@ -2228,21 +2179,19 @@ static void disable_lapic_irq (unsigned
|
|
apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED);
|
|
}
|
|
|
|
-static void ack_lapic_irq (unsigned int irq)
|
|
+static void unmask_lapic_irq (unsigned int irq)
|
|
{
|
|
- ack_APIC_irq();
|
|
-}
|
|
+ unsigned long v;
|
|
|
|
-static void end_lapic_irq (unsigned int i) { /* nothing */ }
|
|
+ v = apic_read(APIC_LVT0);
|
|
+ apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED);
|
|
+}
|
|
|
|
-static struct hw_interrupt_type lapic_irq_type __read_mostly = {
|
|
- .typename = "local-APIC-edge",
|
|
- .startup = NULL, /* startup_irq() not used for IRQ0 */
|
|
- .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */
|
|
- .enable = enable_lapic_irq,
|
|
- .disable = disable_lapic_irq,
|
|
- .ack = ack_lapic_irq,
|
|
- .end = end_lapic_irq
|
|
+static struct irq_chip lapic_chip __read_mostly = {
|
|
+ .name = "local-APIC-edge",
|
|
+ .mask = mask_lapic_irq,
|
|
+ .unmask = unmask_lapic_irq,
|
|
+ .eoi = ack_apic,
|
|
};
|
|
|
|
static void setup_nmi (void)
|
|
@@ -2275,17 +2224,13 @@ static inline void unlock_ExtINT_logic(v
|
|
int apic, pin, i;
|
|
struct IO_APIC_route_entry entry0, entry1;
|
|
unsigned char save_control, save_freq_select;
|
|
- unsigned long flags;
|
|
|
|
pin = find_isa_irq_pin(8, mp_INT);
|
|
apic = find_isa_irq_apic(8, mp_INT);
|
|
if (pin == -1)
|
|
return;
|
|
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- *(((int *)&entry0) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
|
|
- *(((int *)&entry0) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+ entry0 = ioapic_read_entry(apic, pin);
|
|
clear_IO_APIC_pin(apic, pin);
|
|
|
|
memset(&entry1, 0, sizeof(entry1));
|
|
@@ -2298,10 +2243,7 @@ static inline void unlock_ExtINT_logic(v
|
|
entry1.trigger = 0;
|
|
entry1.vector = 0;
|
|
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
|
|
- io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+ ioapic_write_entry(apic, pin, entry1);
|
|
|
|
save_control = CMOS_READ(RTC_CONTROL);
|
|
save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
|
|
@@ -2320,10 +2262,7 @@ static inline void unlock_ExtINT_logic(v
|
|
CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
|
|
clear_IO_APIC_pin(apic, pin);
|
|
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
|
|
- io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+ ioapic_write_entry(apic, pin, entry0);
|
|
}
|
|
|
|
int timer_uses_ioapic_pin_0;
|
|
@@ -2423,7 +2362,8 @@ static inline void check_timer(void)
|
|
printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
|
|
|
|
disable_8259A_irq(0);
|
|
- irq_desc[0].chip = &lapic_irq_type;
|
|
+ set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq,
|
|
+ "fasteio");
|
|
apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */
|
|
enable_8259A_irq(0);
|
|
|
|
@@ -2537,17 +2477,12 @@ static int ioapic_suspend(struct sys_dev
|
|
{
|
|
struct IO_APIC_route_entry *entry;
|
|
struct sysfs_ioapic_data *data;
|
|
- unsigned long flags;
|
|
int i;
|
|
|
|
data = container_of(dev, struct sysfs_ioapic_data, dev);
|
|
entry = data->entry;
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
|
|
- *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i);
|
|
- *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i);
|
|
- }
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+ for (i = 0; i < nr_ioapic_registers[dev->id]; i ++)
|
|
+ entry[i] = ioapic_read_entry(dev->id, i);
|
|
|
|
return 0;
|
|
}
|
|
@@ -2569,11 +2504,9 @@ static int ioapic_resume(struct sys_devi
|
|
reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
|
|
io_apic_write(dev->id, 0, reg_00.raw);
|
|
}
|
|
- for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
|
|
- io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1));
|
|
- io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0));
|
|
- }
|
|
spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+ for (i = 0; i < nr_ioapic_registers[dev->id]; i ++)
|
|
+ ioapic_write_entry(dev->id, i, entry[i]);
|
|
|
|
return 0;
|
|
}
|
|
@@ -2619,8 +2552,240 @@ static int __init ioapic_init_sysfs(void
|
|
|
|
device_initcall(ioapic_init_sysfs);
|
|
|
|
+/*
|
|
+ * Dynamic irq allocate and deallocation
|
|
+ */
|
|
+int create_irq(void)
|
|
+{
|
|
+ /* Allocate an unused irq */
|
|
+ int irq, new, vector;
|
|
+ unsigned long flags;
|
|
+
|
|
+ irq = -ENOSPC;
|
|
+ spin_lock_irqsave(&vector_lock, flags);
|
|
+ for (new = (NR_IRQS - 1); new >= 0; new--) {
|
|
+ if (platform_legacy_irq(new))
|
|
+ continue;
|
|
+ if (irq_vector[new] != 0)
|
|
+ continue;
|
|
+ vector = __assign_irq_vector(new);
|
|
+ if (likely(vector > 0))
|
|
+ irq = new;
|
|
+ break;
|
|
+ }
|
|
+ spin_unlock_irqrestore(&vector_lock, flags);
|
|
+
|
|
+ if (irq >= 0) {
|
|
+ set_intr_gate(vector, interrupt[irq]);
|
|
+ dynamic_irq_init(irq);
|
|
+ }
|
|
+ return irq;
|
|
+}
|
|
+
|
|
+void destroy_irq(unsigned int irq)
|
|
+{
|
|
+ unsigned long flags;
|
|
+
|
|
+ dynamic_irq_cleanup(irq);
|
|
+
|
|
+ spin_lock_irqsave(&vector_lock, flags);
|
|
+ irq_vector[irq] = 0;
|
|
+ spin_unlock_irqrestore(&vector_lock, flags);
|
|
+}
|
|
+
|
|
#endif /* CONFIG_XEN */
|
|
|
|
+/*
|
|
+ * MSI mesage composition
|
|
+ */
|
|
+#if defined(CONFIG_PCI_MSI) && !defined(CONFIG_XEN)
|
|
+static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
|
|
+{
|
|
+ int vector;
|
|
+ unsigned dest;
|
|
+
|
|
+ vector = assign_irq_vector(irq);
|
|
+ if (vector >= 0) {
|
|
+ dest = cpu_mask_to_apicid(TARGET_CPUS);
|
|
+
|
|
+ msg->address_hi = MSI_ADDR_BASE_HI;
|
|
+ msg->address_lo =
|
|
+ MSI_ADDR_BASE_LO |
|
|
+ ((INT_DEST_MODE == 0) ?
|
|
+ MSI_ADDR_DEST_MODE_PHYSICAL:
|
|
+ MSI_ADDR_DEST_MODE_LOGICAL) |
|
|
+ ((INT_DELIVERY_MODE != dest_LowestPrio) ?
|
|
+ MSI_ADDR_REDIRECTION_CPU:
|
|
+ MSI_ADDR_REDIRECTION_LOWPRI) |
|
|
+ MSI_ADDR_DEST_ID(dest);
|
|
+
|
|
+ msg->data =
|
|
+ MSI_DATA_TRIGGER_EDGE |
|
|
+ MSI_DATA_LEVEL_ASSERT |
|
|
+ ((INT_DELIVERY_MODE != dest_LowestPrio) ?
|
|
+ MSI_DATA_DELIVERY_FIXED:
|
|
+ MSI_DATA_DELIVERY_LOWPRI) |
|
|
+ MSI_DATA_VECTOR(vector);
|
|
+ }
|
|
+ return vector;
|
|
+}
|
|
+
|
|
+#ifdef CONFIG_SMP
|
|
+static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
|
|
+{
|
|
+ struct msi_msg msg;
|
|
+ unsigned int dest;
|
|
+ cpumask_t tmp;
|
|
+ int vector;
|
|
+
|
|
+ cpus_and(tmp, mask, cpu_online_map);
|
|
+ if (cpus_empty(tmp))
|
|
+ tmp = TARGET_CPUS;
|
|
+
|
|
+ vector = assign_irq_vector(irq);
|
|
+ if (vector < 0)
|
|
+ return;
|
|
+
|
|
+ dest = cpu_mask_to_apicid(mask);
|
|
+
|
|
+ read_msi_msg(irq, &msg);
|
|
+
|
|
+ msg.data &= ~MSI_DATA_VECTOR_MASK;
|
|
+ msg.data |= MSI_DATA_VECTOR(vector);
|
|
+ msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
|
|
+ msg.address_lo |= MSI_ADDR_DEST_ID(dest);
|
|
+
|
|
+ write_msi_msg(irq, &msg);
|
|
+ set_native_irq_info(irq, mask);
|
|
+}
|
|
+#endif /* CONFIG_SMP */
|
|
+
|
|
+/*
|
|
+ * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
|
|
+ * which implement the MSI or MSI-X Capability Structure.
|
|
+ */
|
|
+static struct irq_chip msi_chip = {
|
|
+ .name = "PCI-MSI",
|
|
+ .unmask = unmask_msi_irq,
|
|
+ .mask = mask_msi_irq,
|
|
+ .ack = ack_ioapic_irq,
|
|
+#ifdef CONFIG_SMP
|
|
+ .set_affinity = set_msi_irq_affinity,
|
|
+#endif
|
|
+ .retrigger = ioapic_retrigger_irq,
|
|
+};
|
|
+
|
|
+int arch_setup_msi_irq(unsigned int irq, struct pci_dev *dev)
|
|
+{
|
|
+ struct msi_msg msg;
|
|
+ int ret;
|
|
+ ret = msi_compose_msg(dev, irq, &msg);
|
|
+ if (ret < 0)
|
|
+ return ret;
|
|
+
|
|
+ write_msi_msg(irq, &msg);
|
|
+
|
|
+ set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq,
|
|
+ "edge");
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+void arch_teardown_msi_irq(unsigned int irq)
|
|
+{
|
|
+ return;
|
|
+}
|
|
+
|
|
+#endif /* CONFIG_PCI_MSI */
|
|
+
|
|
+/*
|
|
+ * Hypertransport interrupt support
|
|
+ */
|
|
+#ifdef CONFIG_HT_IRQ
|
|
+
|
|
+#ifdef CONFIG_SMP
|
|
+
|
|
+static void target_ht_irq(unsigned int irq, unsigned int dest)
|
|
+{
|
|
+ struct ht_irq_msg msg;
|
|
+ fetch_ht_irq_msg(irq, &msg);
|
|
+
|
|
+ msg.address_lo &= ~(HT_IRQ_LOW_DEST_ID_MASK);
|
|
+ msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
|
|
+
|
|
+ msg.address_lo |= HT_IRQ_LOW_DEST_ID(dest);
|
|
+ msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
|
|
+
|
|
+ write_ht_irq_msg(irq, &msg);
|
|
+}
|
|
+
|
|
+static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
|
|
+{
|
|
+ unsigned int dest;
|
|
+ cpumask_t tmp;
|
|
+
|
|
+ cpus_and(tmp, mask, cpu_online_map);
|
|
+ if (cpus_empty(tmp))
|
|
+ tmp = TARGET_CPUS;
|
|
+
|
|
+ cpus_and(mask, tmp, CPU_MASK_ALL);
|
|
+
|
|
+ dest = cpu_mask_to_apicid(mask);
|
|
+
|
|
+ target_ht_irq(irq, dest);
|
|
+ set_native_irq_info(irq, mask);
|
|
+}
|
|
+#endif
|
|
+
|
|
+static struct irq_chip ht_irq_chip = {
|
|
+ .name = "PCI-HT",
|
|
+ .mask = mask_ht_irq,
|
|
+ .unmask = unmask_ht_irq,
|
|
+ .ack = ack_ioapic_irq,
|
|
+#ifdef CONFIG_SMP
|
|
+ .set_affinity = set_ht_irq_affinity,
|
|
+#endif
|
|
+ .retrigger = ioapic_retrigger_irq,
|
|
+};
|
|
+
|
|
+int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
|
|
+{
|
|
+ int vector;
|
|
+
|
|
+ vector = assign_irq_vector(irq);
|
|
+ if (vector >= 0) {
|
|
+ struct ht_irq_msg msg;
|
|
+ unsigned dest;
|
|
+ cpumask_t tmp;
|
|
+
|
|
+ cpus_clear(tmp);
|
|
+ cpu_set(vector >> 8, tmp);
|
|
+ dest = cpu_mask_to_apicid(tmp);
|
|
+
|
|
+ msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
|
|
+
|
|
+ msg.address_lo =
|
|
+ HT_IRQ_LOW_BASE |
|
|
+ HT_IRQ_LOW_DEST_ID(dest) |
|
|
+ HT_IRQ_LOW_VECTOR(vector) |
|
|
+ ((INT_DEST_MODE == 0) ?
|
|
+ HT_IRQ_LOW_DM_PHYSICAL :
|
|
+ HT_IRQ_LOW_DM_LOGICAL) |
|
|
+ HT_IRQ_LOW_RQEOI_EDGE |
|
|
+ ((INT_DELIVERY_MODE != dest_LowestPrio) ?
|
|
+ HT_IRQ_LOW_MT_FIXED :
|
|
+ HT_IRQ_LOW_MT_ARBITRATED) |
|
|
+ HT_IRQ_LOW_IRQ_MASKED;
|
|
+
|
|
+ write_ht_irq_msg(irq, &msg);
|
|
+
|
|
+ set_irq_chip_and_handler_name(irq, &ht_irq_chip,
|
|
+ handle_edge_irq, "edge");
|
|
+ }
|
|
+ return vector;
|
|
+}
|
|
+#endif /* CONFIG_HT_IRQ */
|
|
+
|
|
/* --------------------------------------------------------------------------
|
|
ACPI-based IOAPIC Configuration
|
|
-------------------------------------------------------------------------- */
|
|
@@ -2774,13 +2939,34 @@ int io_apic_set_pci_routing (int ioapic,
|
|
if (!ioapic && (irq < 16))
|
|
disable_8259A_irq(irq);
|
|
|
|
+ ioapic_write_entry(ioapic, pin, entry);
|
|
spin_lock_irqsave(&ioapic_lock, flags);
|
|
- io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
|
|
- io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
|
|
- set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS);
|
|
+ set_native_irq_info(irq, TARGET_CPUS);
|
|
spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
|
|
return 0;
|
|
}
|
|
|
|
#endif /* CONFIG_ACPI */
|
|
+
|
|
+static int __init parse_disable_timer_pin_1(char *arg)
|
|
+{
|
|
+ disable_timer_pin_1 = 1;
|
|
+ return 0;
|
|
+}
|
|
+early_param("disable_timer_pin_1", parse_disable_timer_pin_1);
|
|
+
|
|
+static int __init parse_enable_timer_pin_1(char *arg)
|
|
+{
|
|
+ disable_timer_pin_1 = -1;
|
|
+ return 0;
|
|
+}
|
|
+early_param("enable_timer_pin_1", parse_enable_timer_pin_1);
|
|
+
|
|
+static int __init parse_noapic(char *arg)
|
|
+{
|
|
+ /* disable IO-APIC */
|
|
+ disable_ioapic_setup();
|
|
+ return 0;
|
|
+}
|
|
+early_param("noapic", parse_noapic);
|
|
--- head-2010-05-25.orig/arch/x86/kernel/ldt_32-xen.c 2007-06-12 13:12:48.000000000 +0200
|
|
+++ head-2010-05-25/arch/x86/kernel/ldt_32-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -1,5 +1,5 @@
|
|
/*
|
|
- * linux/kernel/ldt.c
|
|
+ * linux/arch/i386/kernel/ldt.c
|
|
*
|
|
* Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
|
|
* Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
|
|
--- head-2010-05-25.orig/arch/x86/kernel/microcode-xen.c 2007-06-12 13:12:48.000000000 +0200
|
|
+++ head-2010-05-25/arch/x86/kernel/microcode-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -2,6 +2,7 @@
|
|
* Intel CPU Microcode Update Driver for Linux
|
|
*
|
|
* Copyright (C) 2000-2004 Tigran Aivazian
|
|
+ * 2006 Shaohua Li <shaohua.li@intel.com>
|
|
*
|
|
* This driver allows to upgrade microcode on Intel processors
|
|
* belonging to IA-32 family - PentiumPro, Pentium II,
|
|
@@ -33,7 +34,9 @@
|
|
#include <linux/spinlock.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/mutex.h>
|
|
-#include <linux/syscalls.h>
|
|
+#include <linux/cpu.h>
|
|
+#include <linux/firmware.h>
|
|
+#include <linux/platform_device.h>
|
|
|
|
#include <asm/msr.h>
|
|
#include <asm/uaccess.h>
|
|
@@ -55,12 +58,7 @@ module_param(verbose, int, 0644);
|
|
/* no concurrent ->write()s are allowed on /dev/cpu/microcode */
|
|
static DEFINE_MUTEX(microcode_mutex);
|
|
|
|
-static int microcode_open (struct inode *unused1, struct file *unused2)
|
|
-{
|
|
- return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
|
|
-}
|
|
-
|
|
-
|
|
+#ifdef CONFIG_MICROCODE_OLD_INTERFACE
|
|
static int do_microcode_update (const void __user *ubuf, size_t len)
|
|
{
|
|
int err;
|
|
@@ -85,6 +83,11 @@ static int do_microcode_update (const vo
|
|
return err;
|
|
}
|
|
|
|
+static int microcode_open (struct inode *unused1, struct file *unused2)
|
|
+{
|
|
+ return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
|
|
+}
|
|
+
|
|
static ssize_t microcode_write (struct file *file, const char __user *buf, size_t len, loff_t *ppos)
|
|
{
|
|
ssize_t ret;
|
|
@@ -117,7 +120,7 @@ static struct miscdevice microcode_dev =
|
|
.fops = µcode_fops,
|
|
};
|
|
|
|
-static int __init microcode_init (void)
|
|
+static int __init microcode_dev_init (void)
|
|
{
|
|
int error;
|
|
|
|
@@ -129,6 +132,68 @@ static int __init microcode_init (void)
|
|
return error;
|
|
}
|
|
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void __exit microcode_dev_exit (void)
|
|
+{
|
|
+ misc_deregister(µcode_dev);
|
|
+}
|
|
+
|
|
+MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
|
|
+#else
|
|
+#define microcode_dev_init() 0
|
|
+#define microcode_dev_exit() do { } while(0)
|
|
+#endif
|
|
+
|
|
+/* fake device for request_firmware */
|
|
+static struct platform_device *microcode_pdev;
|
|
+
|
|
+static int request_microcode(void)
|
|
+{
|
|
+ char name[30];
|
|
+ const struct cpuinfo_x86 *c = &boot_cpu_data;
|
|
+ const struct firmware *firmware;
|
|
+ int error;
|
|
+ struct xen_platform_op op;
|
|
+
|
|
+ sprintf(name,"intel-ucode/%02x-%02x-%02x",
|
|
+ c->x86, c->x86_model, c->x86_mask);
|
|
+ error = request_firmware(&firmware, name, µcode_pdev->dev);
|
|
+ if (error) {
|
|
+ pr_debug("ucode data file %s load failed\n", name);
|
|
+ return error;
|
|
+ }
|
|
+
|
|
+ op.cmd = XENPF_microcode_update;
|
|
+ set_xen_guest_handle(op.u.microcode.data, (void *)firmware->data);
|
|
+ op.u.microcode.length = firmware->size;
|
|
+ error = HYPERVISOR_platform_op(&op);
|
|
+
|
|
+ release_firmware(firmware);
|
|
+
|
|
+ if (error)
|
|
+ pr_debug("ucode load failed\n");
|
|
+
|
|
+ return error;
|
|
+}
|
|
+
|
|
+static int __init microcode_init (void)
|
|
+{
|
|
+ int error;
|
|
+
|
|
+ error = microcode_dev_init();
|
|
+ if (error)
|
|
+ return error;
|
|
+ microcode_pdev = platform_device_register_simple("microcode", -1,
|
|
+ NULL, 0);
|
|
+ if (IS_ERR(microcode_pdev)) {
|
|
+ microcode_dev_exit();
|
|
+ return PTR_ERR(microcode_pdev);
|
|
+ }
|
|
+
|
|
+ request_microcode();
|
|
+
|
|
printk(KERN_INFO
|
|
"IA-32 Microcode Update Driver: v" MICROCODE_VERSION " <tigran@veritas.com>\n");
|
|
return 0;
|
|
@@ -136,9 +201,9 @@ static int __init microcode_init (void)
|
|
|
|
static void __exit microcode_exit (void)
|
|
{
|
|
- misc_deregister(µcode_dev);
|
|
+ microcode_dev_exit();
|
|
+ platform_device_unregister(microcode_pdev);
|
|
}
|
|
|
|
module_init(microcode_init)
|
|
module_exit(microcode_exit)
|
|
-MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
|
|
--- head-2010-05-25.orig/arch/x86/kernel/mpparse_32-xen.c 2007-06-12 13:12:48.000000000 +0200
|
|
+++ head-2010-05-25/arch/x86/kernel/mpparse_32-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -30,6 +30,7 @@
|
|
#include <asm/io_apic.h>
|
|
|
|
#include <mach_apic.h>
|
|
+#include <mach_apicdef.h>
|
|
#include <mach_mpparse.h>
|
|
#include <bios_ebda.h>
|
|
|
|
@@ -68,7 +69,7 @@ unsigned int def_to_bigsmp = 0;
|
|
/* Processor that is doing the boot up */
|
|
unsigned int boot_cpu_physical_apicid = -1U;
|
|
/* Internal processor count */
|
|
-static unsigned int __devinitdata num_processors;
|
|
+unsigned int __cpuinitdata num_processors;
|
|
|
|
/* Bitmask of physically existing CPUs */
|
|
physid_mask_t phys_cpu_present_map;
|
|
@@ -235,12 +236,14 @@ static void __init MP_bus_info (struct m
|
|
|
|
mpc_oem_bus_info(m, str, translation_table[mpc_record]);
|
|
|
|
+#if MAX_MP_BUSSES < 256
|
|
if (m->mpc_busid >= MAX_MP_BUSSES) {
|
|
printk(KERN_WARNING "MP table busid value (%d) for bustype %s "
|
|
" is too large, max. supported is %d\n",
|
|
m->mpc_busid, str, MAX_MP_BUSSES - 1);
|
|
return;
|
|
}
|
|
+#endif
|
|
|
|
if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) {
|
|
mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
|
|
@@ -300,19 +303,6 @@ static void __init MP_lintsrc_info (stru
|
|
m->mpc_irqtype, m->mpc_irqflag & 3,
|
|
(m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
|
|
m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
|
|
- /*
|
|
- * Well it seems all SMP boards in existence
|
|
- * use ExtINT/LVT1 == LINT0 and
|
|
- * NMI/LVT2 == LINT1 - the following check
|
|
- * will show us if this assumptions is false.
|
|
- * Until then we do not have to add baggage.
|
|
- */
|
|
- if ((m->mpc_irqtype == mp_ExtINT) &&
|
|
- (m->mpc_destapiclint != 0))
|
|
- BUG();
|
|
- if ((m->mpc_irqtype == mp_NMI) &&
|
|
- (m->mpc_destapiclint != 1))
|
|
- BUG();
|
|
}
|
|
|
|
#ifdef CONFIG_X86_NUMAQ
|
|
@@ -838,8 +828,7 @@ int es7000_plat;
|
|
|
|
#ifdef CONFIG_ACPI
|
|
|
|
-void __init mp_register_lapic_address (
|
|
- u64 address)
|
|
+void __init mp_register_lapic_address(u64 address)
|
|
{
|
|
#ifndef CONFIG_XEN
|
|
mp_lapic_addr = (unsigned long) address;
|
|
@@ -853,13 +842,10 @@ void __init mp_register_lapic_address (
|
|
#endif
|
|
}
|
|
|
|
-
|
|
-void __devinit mp_register_lapic (
|
|
- u8 id,
|
|
- u8 enabled)
|
|
+void __devinit mp_register_lapic (u8 id, u8 enabled)
|
|
{
|
|
struct mpc_config_processor processor;
|
|
- int boot_cpu = 0;
|
|
+ int boot_cpu = 0;
|
|
|
|
if (MAX_APICS - id <= 0) {
|
|
printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
|
|
@@ -898,11 +884,9 @@ static struct mp_ioapic_routing {
|
|
u32 pin_programmed[4];
|
|
} mp_ioapic_routing[MAX_IO_APICS];
|
|
|
|
-
|
|
-static int mp_find_ioapic (
|
|
- int gsi)
|
|
+static int mp_find_ioapic (int gsi)
|
|
{
|
|
- int i = 0;
|
|
+ int i = 0;
|
|
|
|
/* Find the IOAPIC that manages this GSI. */
|
|
for (i = 0; i < nr_ioapics; i++) {
|
|
@@ -915,15 +899,11 @@ static int mp_find_ioapic (
|
|
|
|
return -1;
|
|
}
|
|
-
|
|
|
|
-void __init mp_register_ioapic (
|
|
- u8 id,
|
|
- u32 address,
|
|
- u32 gsi_base)
|
|
+void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
|
|
{
|
|
- int idx = 0;
|
|
- int tmpid;
|
|
+ int idx = 0;
|
|
+ int tmpid;
|
|
|
|
if (nr_ioapics >= MAX_IO_APICS) {
|
|
printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
|
|
@@ -971,16 +951,10 @@ void __init mp_register_ioapic (
|
|
mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
|
|
mp_ioapic_routing[idx].gsi_base,
|
|
mp_ioapic_routing[idx].gsi_end);
|
|
-
|
|
- return;
|
|
}
|
|
|
|
-
|
|
-void __init mp_override_legacy_irq (
|
|
- u8 bus_irq,
|
|
- u8 polarity,
|
|
- u8 trigger,
|
|
- u32 gsi)
|
|
+void __init
|
|
+mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
|
|
{
|
|
struct mpc_config_intsrc intsrc;
|
|
int ioapic = -1;
|
|
@@ -1018,15 +992,13 @@ void __init mp_override_legacy_irq (
|
|
mp_irqs[mp_irq_entries] = intsrc;
|
|
if (++mp_irq_entries == MAX_IRQ_SOURCES)
|
|
panic("Max # of irq sources exceeded!\n");
|
|
-
|
|
- return;
|
|
}
|
|
|
|
void __init mp_config_acpi_legacy_irqs (void)
|
|
{
|
|
struct mpc_config_intsrc intsrc;
|
|
- int i = 0;
|
|
- int ioapic = -1;
|
|
+ int i = 0;
|
|
+ int ioapic = -1;
|
|
|
|
/*
|
|
* Fabricate the legacy ISA bus (bus #31).
|
|
@@ -1095,12 +1067,12 @@ void __init mp_config_acpi_legacy_irqs (
|
|
|
|
#define MAX_GSI_NUM 4096
|
|
|
|
-int mp_register_gsi (u32 gsi, int triggering, int polarity)
|
|
+int mp_register_gsi(u32 gsi, int triggering, int polarity)
|
|
{
|
|
- int ioapic = -1;
|
|
- int ioapic_pin = 0;
|
|
- int idx, bit = 0;
|
|
- static int pci_irq = 16;
|
|
+ int ioapic = -1;
|
|
+ int ioapic_pin = 0;
|
|
+ int idx, bit = 0;
|
|
+ static int pci_irq = 16;
|
|
/*
|
|
* Mapping between Global System Interrups, which
|
|
* represent all possible interrupts, and IRQs
|
|
--- head-2010-05-25.orig/arch/x86/kernel/pci-dma-xen.c 2009-11-06 10:23:23.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/pci-dma-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -110,8 +110,7 @@ dma_map_sg(struct device *hwdev, struct
|
|
{
|
|
int i, rc;
|
|
|
|
- if (direction == DMA_NONE)
|
|
- BUG();
|
|
+ BUG_ON(!valid_dma_direction(direction));
|
|
WARN_ON(nents == 0 || sg[0].length == 0);
|
|
|
|
if (swiotlb) {
|
|
@@ -142,7 +141,7 @@ dma_unmap_sg(struct device *hwdev, struc
|
|
{
|
|
int i;
|
|
|
|
- BUG_ON(direction == DMA_NONE);
|
|
+ BUG_ON(!valid_dma_direction(direction));
|
|
if (swiotlb)
|
|
swiotlb_unmap_sg(hwdev, sg, nents, direction);
|
|
else {
|
|
@@ -159,8 +158,7 @@ dma_map_page(struct device *dev, struct
|
|
{
|
|
dma_addr_t dma_addr;
|
|
|
|
- BUG_ON(direction == DMA_NONE);
|
|
-
|
|
+ BUG_ON(!valid_dma_direction(direction));
|
|
if (swiotlb) {
|
|
dma_addr = swiotlb_map_page(
|
|
dev, page, offset, size, direction);
|
|
@@ -177,7 +175,7 @@ void
|
|
dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
|
|
enum dma_data_direction direction)
|
|
{
|
|
- BUG_ON(direction == DMA_NONE);
|
|
+ BUG_ON(!valid_dma_direction(direction));
|
|
if (swiotlb)
|
|
swiotlb_unmap_page(dev, dma_address, size, direction);
|
|
else
|
|
@@ -356,8 +354,7 @@ dma_map_single(struct device *dev, void
|
|
{
|
|
dma_addr_t dma;
|
|
|
|
- if (direction == DMA_NONE)
|
|
- BUG();
|
|
+ BUG_ON(!valid_dma_direction(direction));
|
|
WARN_ON(size == 0);
|
|
|
|
if (swiotlb) {
|
|
@@ -378,8 +375,7 @@ void
|
|
dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
|
|
enum dma_data_direction direction)
|
|
{
|
|
- if (direction == DMA_NONE)
|
|
- BUG();
|
|
+ BUG_ON(!valid_dma_direction(direction));
|
|
if (swiotlb)
|
|
swiotlb_unmap_single(dev, dma_addr, size, direction);
|
|
else
|
|
--- head-2010-05-25.orig/arch/x86/kernel/process_32-xen.c 2008-07-21 11:00:32.000000000 +0200
|
|
+++ head-2010-05-25/arch/x86/kernel/process_32-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -37,6 +37,7 @@
|
|
#include <linux/kallsyms.h>
|
|
#include <linux/ptrace.h>
|
|
#include <linux/random.h>
|
|
+#include <linux/personality.h>
|
|
|
|
#include <asm/uaccess.h>
|
|
#include <asm/pgtable.h>
|
|
@@ -186,7 +187,7 @@ void cpu_idle(void)
|
|
void cpu_idle_wait(void)
|
|
{
|
|
unsigned int cpu, this_cpu = get_cpu();
|
|
- cpumask_t map;
|
|
+ cpumask_t map, tmp = current->cpus_allowed;
|
|
|
|
set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
|
|
put_cpu();
|
|
@@ -208,6 +209,8 @@ void cpu_idle_wait(void)
|
|
}
|
|
cpus_and(map, map, cpu_online_map);
|
|
} while (!cpus_empty(map));
|
|
+
|
|
+ set_cpus_allowed(current, tmp);
|
|
}
|
|
EXPORT_SYMBOL_GPL(cpu_idle_wait);
|
|
|
|
@@ -240,9 +243,9 @@ void show_regs(struct pt_regs * regs)
|
|
if (user_mode_vm(regs))
|
|
printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp);
|
|
printk(" EFLAGS: %08lx %s (%s %.*s)\n",
|
|
- regs->eflags, print_tainted(), system_utsname.release,
|
|
- (int)strcspn(system_utsname.version, " "),
|
|
- system_utsname.version);
|
|
+ regs->eflags, print_tainted(), init_utsname()->release,
|
|
+ (int)strcspn(init_utsname()->version, " "),
|
|
+ init_utsname()->version);
|
|
printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
|
|
regs->eax,regs->ebx,regs->ecx,regs->edx);
|
|
printk("ESI: %08lx EDI: %08lx EBP: %08lx",
|
|
@@ -264,15 +267,6 @@ void show_regs(struct pt_regs * regs)
|
|
* the "args".
|
|
*/
|
|
extern void kernel_thread_helper(void);
|
|
-__asm__(".section .text\n"
|
|
- ".align 4\n"
|
|
- "kernel_thread_helper:\n\t"
|
|
- "movl %edx,%eax\n\t"
|
|
- "pushl %edx\n\t"
|
|
- "call *%ebx\n\t"
|
|
- "pushl %eax\n\t"
|
|
- "call do_exit\n"
|
|
- ".previous");
|
|
|
|
/*
|
|
* Create a kernel thread
|
|
@@ -290,7 +284,7 @@ int kernel_thread(int (*fn)(void *), voi
|
|
regs.xes = __USER_DS;
|
|
regs.orig_eax = -1;
|
|
regs.eip = (unsigned long) kernel_thread_helper;
|
|
- regs.xcs = GET_KERNEL_CS();
|
|
+ regs.xcs = __KERNEL_CS | get_kernel_rpl();
|
|
regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
|
|
|
|
/* Ok, create the new process.. */
|
|
@@ -369,13 +363,12 @@ int copy_thread(int nr, unsigned long cl
|
|
|
|
tsk = current;
|
|
if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
|
|
- p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
|
|
+ p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
|
|
+ IO_BITMAP_BYTES, GFP_KERNEL);
|
|
if (!p->thread.io_bitmap_ptr) {
|
|
p->thread.io_bitmap_max = 0;
|
|
return -ENOMEM;
|
|
}
|
|
- memcpy(p->thread.io_bitmap_ptr, tsk->thread.io_bitmap_ptr,
|
|
- IO_BITMAP_BYTES);
|
|
set_tsk_thread_flag(p, TIF_IO_BITMAP);
|
|
}
|
|
|
|
@@ -871,7 +864,7 @@ asmlinkage int sys_get_thread_area(struc
|
|
|
|
unsigned long arch_align_stack(unsigned long sp)
|
|
{
|
|
- if (randomize_va_space)
|
|
+ if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
|
|
sp -= get_random_int() % 8192;
|
|
return sp & ~0xf;
|
|
}
|
|
--- head-2010-05-25.orig/arch/x86/kernel/setup_32-xen.c 2008-04-22 15:41:51.000000000 +0200
|
|
+++ head-2010-05-25/arch/x86/kernel/setup_32-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -56,6 +56,7 @@
|
|
#include <asm/apic.h>
|
|
#include <asm/e820.h>
|
|
#include <asm/mpspec.h>
|
|
+#include <asm/mmzone.h>
|
|
#include <asm/setup.h>
|
|
#include <asm/arch_hooks.h>
|
|
#include <asm/sections.h>
|
|
@@ -83,9 +84,6 @@ static struct notifier_block xen_panic_b
|
|
xen_panic_event, NULL, 0 /* try to go last */
|
|
};
|
|
|
|
-extern char hypercall_page[PAGE_SIZE];
|
|
-EXPORT_SYMBOL(hypercall_page);
|
|
-
|
|
int disable_pse __devinitdata = 0;
|
|
|
|
/*
|
|
@@ -105,18 +103,6 @@ EXPORT_SYMBOL(boot_cpu_data);
|
|
|
|
unsigned long mmu_cr4_features;
|
|
|
|
-#ifdef CONFIG_ACPI
|
|
- int acpi_disabled = 0;
|
|
-#else
|
|
- int acpi_disabled = 1;
|
|
-#endif
|
|
-EXPORT_SYMBOL(acpi_disabled);
|
|
-
|
|
-#ifdef CONFIG_ACPI
|
|
-int __initdata acpi_force = 0;
|
|
-extern acpi_interrupt_flags acpi_sci_flags;
|
|
-#endif
|
|
-
|
|
/* for MCA, but anyone else can use it if they want */
|
|
unsigned int machine_id;
|
|
#ifdef CONFIG_MCA
|
|
@@ -170,7 +156,6 @@ struct e820map machine_e820;
|
|
#endif
|
|
|
|
extern void early_cpu_init(void);
|
|
-extern void generic_apic_probe(char *);
|
|
extern int root_mountflags;
|
|
|
|
unsigned long saved_videomode;
|
|
@@ -243,9 +228,6 @@ static struct resource adapter_rom_resou
|
|
.flags = IORESOURCE_BUSY | IORESOURCE_READONLY | IORESOURCE_MEM
|
|
} };
|
|
|
|
-#define ADAPTER_ROM_RESOURCES \
|
|
- (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
|
|
-
|
|
static struct resource video_rom_resource = {
|
|
.name = "Video ROM",
|
|
.start = 0xc0000,
|
|
@@ -307,9 +289,6 @@ static struct resource standard_io_resou
|
|
.flags = IORESOURCE_BUSY | IORESOURCE_IO
|
|
} };
|
|
|
|
-#define STANDARD_IO_RESOURCES \
|
|
- (sizeof standard_io_resources / sizeof standard_io_resources[0])
|
|
-
|
|
#define romsignature(x) (*(unsigned short *)(x) == 0xaa55)
|
|
|
|
static int __init romchecksum(unsigned char *rom, unsigned long length)
|
|
@@ -372,7 +351,7 @@ static void __init probe_roms(void)
|
|
}
|
|
|
|
/* check for adapter roms on 2k boundaries */
|
|
- for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
|
|
+ for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper; start += 2048) {
|
|
rom = isa_bus_to_virt(start);
|
|
if (!romsignature(rom))
|
|
continue;
|
|
@@ -779,246 +758,152 @@ static inline void copy_edd(void)
|
|
}
|
|
#endif
|
|
|
|
-static void __init parse_cmdline_early (char ** cmdline_p)
|
|
+static int __initdata user_defined_memmap = 0;
|
|
+
|
|
+/*
|
|
+ * "mem=nopentium" disables the 4MB page tables.
|
|
+ * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
|
|
+ * to <mem>, overriding the bios size.
|
|
+ * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
|
|
+ * <start> to <start>+<mem>, overriding the bios size.
|
|
+ *
|
|
+ * HPA tells me bootloaders need to parse mem=, so no new
|
|
+ * option should be mem= [also see Documentation/i386/boot.txt]
|
|
+ */
|
|
+static int __init parse_mem(char *arg)
|
|
{
|
|
- char c = ' ', *to = command_line, *from = saved_command_line;
|
|
- int len = 0, max_cmdline;
|
|
- int userdef = 0;
|
|
-
|
|
- if ((max_cmdline = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
|
|
- max_cmdline = COMMAND_LINE_SIZE;
|
|
- memcpy(saved_command_line, xen_start_info->cmd_line, max_cmdline);
|
|
- /* Save unparsed command line copy for /proc/cmdline */
|
|
- saved_command_line[max_cmdline-1] = '\0';
|
|
-
|
|
- for (;;) {
|
|
- if (c != ' ')
|
|
- goto next_char;
|
|
- /*
|
|
- * "mem=nopentium" disables the 4MB page tables.
|
|
- * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
|
|
- * to <mem>, overriding the bios size.
|
|
- * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
|
|
- * <start> to <start>+<mem>, overriding the bios size.
|
|
- *
|
|
- * HPA tells me bootloaders need to parse mem=, so no new
|
|
- * option should be mem= [also see Documentation/i386/boot.txt]
|
|
- */
|
|
- if (!memcmp(from, "mem=", 4)) {
|
|
- if (to != command_line)
|
|
- to--;
|
|
- if (!memcmp(from+4, "nopentium", 9)) {
|
|
- from += 9+4;
|
|
- clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
|
|
- disable_pse = 1;
|
|
- } else {
|
|
- /* If the user specifies memory size, we
|
|
- * limit the BIOS-provided memory map to
|
|
- * that size. exactmap can be used to specify
|
|
- * the exact map. mem=number can be used to
|
|
- * trim the existing memory map.
|
|
- */
|
|
- unsigned long long mem_size;
|
|
-
|
|
- mem_size = memparse(from+4, &from);
|
|
- limit_regions(mem_size);
|
|
- userdef=1;
|
|
- }
|
|
- }
|
|
+ if (!arg)
|
|
+ return -EINVAL;
|
|
|
|
- else if (!memcmp(from, "memmap=", 7)) {
|
|
- if (to != command_line)
|
|
- to--;
|
|
- if (!memcmp(from+7, "exactmap", 8)) {
|
|
-#ifdef CONFIG_CRASH_DUMP
|
|
- /* If we are doing a crash dump, we
|
|
- * still need to know the real mem
|
|
- * size before original memory map is
|
|
- * reset.
|
|
- */
|
|
- find_max_pfn();
|
|
- saved_max_pfn = max_pfn;
|
|
-#endif
|
|
- from += 8+7;
|
|
- e820.nr_map = 0;
|
|
- userdef = 1;
|
|
- } else {
|
|
- /* If the user specifies memory size, we
|
|
- * limit the BIOS-provided memory map to
|
|
- * that size. exactmap can be used to specify
|
|
- * the exact map. mem=number can be used to
|
|
- * trim the existing memory map.
|
|
- */
|
|
- unsigned long long start_at, mem_size;
|
|
+ if (strcmp(arg, "nopentium") == 0) {
|
|
+ clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
|
|
+ disable_pse = 1;
|
|
+ } else {
|
|
+ /* If the user specifies memory size, we
|
|
+ * limit the BIOS-provided memory map to
|
|
+ * that size. exactmap can be used to specify
|
|
+ * the exact map. mem=number can be used to
|
|
+ * trim the existing memory map.
|
|
+ */
|
|
+ unsigned long long mem_size;
|
|
|
|
- mem_size = memparse(from+7, &from);
|
|
- if (*from == '@') {
|
|
- start_at = memparse(from+1, &from);
|
|
- add_memory_region(start_at, mem_size, E820_RAM);
|
|
- } else if (*from == '#') {
|
|
- start_at = memparse(from+1, &from);
|
|
- add_memory_region(start_at, mem_size, E820_ACPI);
|
|
- } else if (*from == '$') {
|
|
- start_at = memparse(from+1, &from);
|
|
- add_memory_region(start_at, mem_size, E820_RESERVED);
|
|
- } else {
|
|
- limit_regions(mem_size);
|
|
- userdef=1;
|
|
- }
|
|
- }
|
|
- }
|
|
-
|
|
- else if (!memcmp(from, "noexec=", 7))
|
|
- noexec_setup(from + 7);
|
|
+ mem_size = memparse(arg, &arg);
|
|
+ limit_regions(mem_size);
|
|
+ user_defined_memmap = 1;
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+early_param("mem", parse_mem);
|
|
|
|
+static int __init parse_memmap(char *arg)
|
|
+{
|
|
+ if (!arg)
|
|
+ return -EINVAL;
|
|
|
|
-#ifdef CONFIG_X86_MPPARSE
|
|
- /*
|
|
- * If the BIOS enumerates physical processors before logical,
|
|
- * maxcpus=N at enumeration-time can be used to disable HT.
|
|
+ if (strcmp(arg, "exactmap") == 0) {
|
|
+#ifdef CONFIG_CRASH_DUMP
|
|
+ /* If we are doing a crash dump, we
|
|
+ * still need to know the real mem
|
|
+ * size before original memory map is
|
|
+ * reset.
|
|
*/
|
|
- else if (!memcmp(from, "maxcpus=", 8)) {
|
|
- extern unsigned int maxcpus;
|
|
-
|
|
- maxcpus = simple_strtoul(from + 8, NULL, 0);
|
|
- }
|
|
+ find_max_pfn();
|
|
+ saved_max_pfn = max_pfn;
|
|
#endif
|
|
+ e820.nr_map = 0;
|
|
+ user_defined_memmap = 1;
|
|
+ } else {
|
|
+ /* If the user specifies memory size, we
|
|
+ * limit the BIOS-provided memory map to
|
|
+ * that size. exactmap can be used to specify
|
|
+ * the exact map. mem=number can be used to
|
|
+ * trim the existing memory map.
|
|
+ */
|
|
+ unsigned long long start_at, mem_size;
|
|
|
|
-#ifdef CONFIG_ACPI
|
|
- /* "acpi=off" disables both ACPI table parsing and interpreter */
|
|
- else if (!memcmp(from, "acpi=off", 8)) {
|
|
- disable_acpi();
|
|
- }
|
|
-
|
|
- /* acpi=force to over-ride black-list */
|
|
- else if (!memcmp(from, "acpi=force", 10)) {
|
|
- acpi_force = 1;
|
|
- acpi_ht = 1;
|
|
- acpi_disabled = 0;
|
|
- }
|
|
-
|
|
- /* acpi=strict disables out-of-spec workarounds */
|
|
- else if (!memcmp(from, "acpi=strict", 11)) {
|
|
- acpi_strict = 1;
|
|
- }
|
|
-
|
|
- /* Limit ACPI just to boot-time to enable HT */
|
|
- else if (!memcmp(from, "acpi=ht", 7)) {
|
|
- if (!acpi_force)
|
|
- disable_acpi();
|
|
- acpi_ht = 1;
|
|
- }
|
|
-
|
|
- /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
|
|
- else if (!memcmp(from, "pci=noacpi", 10)) {
|
|
- acpi_disable_pci();
|
|
- }
|
|
- /* "acpi=noirq" disables ACPI interrupt routing */
|
|
- else if (!memcmp(from, "acpi=noirq", 10)) {
|
|
- acpi_noirq_set();
|
|
+ mem_size = memparse(arg, &arg);
|
|
+ if (*arg == '@') {
|
|
+ start_at = memparse(arg+1, &arg);
|
|
+ add_memory_region(start_at, mem_size, E820_RAM);
|
|
+ } else if (*arg == '#') {
|
|
+ start_at = memparse(arg+1, &arg);
|
|
+ add_memory_region(start_at, mem_size, E820_ACPI);
|
|
+ } else if (*arg == '$') {
|
|
+ start_at = memparse(arg+1, &arg);
|
|
+ add_memory_region(start_at, mem_size, E820_RESERVED);
|
|
+ } else {
|
|
+ limit_regions(mem_size);
|
|
+ user_defined_memmap = 1;
|
|
}
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+early_param("memmap", parse_memmap);
|
|
|
|
- else if (!memcmp(from, "acpi_sci=edge", 13))
|
|
- acpi_sci_flags.trigger = 1;
|
|
-
|
|
- else if (!memcmp(from, "acpi_sci=level", 14))
|
|
- acpi_sci_flags.trigger = 3;
|
|
+#ifdef CONFIG_PROC_VMCORE
|
|
+/* elfcorehdr= specifies the location of elf core header
|
|
+ * stored by the crashed kernel.
|
|
+ */
|
|
+static int __init parse_elfcorehdr(char *arg)
|
|
+{
|
|
+ if (!arg)
|
|
+ return -EINVAL;
|
|
|
|
- else if (!memcmp(from, "acpi_sci=high", 13))
|
|
- acpi_sci_flags.polarity = 1;
|
|
+ elfcorehdr_addr = memparse(arg, &arg);
|
|
+ return 0;
|
|
+}
|
|
+early_param("elfcorehdr", parse_elfcorehdr);
|
|
+#endif /* CONFIG_PROC_VMCORE */
|
|
|
|
- else if (!memcmp(from, "acpi_sci=low", 12))
|
|
- acpi_sci_flags.polarity = 3;
|
|
+/*
|
|
+ * highmem=size forces highmem to be exactly 'size' bytes.
|
|
+ * This works even on boxes that have no highmem otherwise.
|
|
+ * This also works to reduce highmem size on bigger boxes.
|
|
+ */
|
|
+static int __init parse_highmem(char *arg)
|
|
+{
|
|
+ if (!arg)
|
|
+ return -EINVAL;
|
|
|
|
-#ifdef CONFIG_X86_IO_APIC
|
|
- else if (!memcmp(from, "acpi_skip_timer_override", 24))
|
|
- acpi_skip_timer_override = 1;
|
|
+ highmem_pages = memparse(arg, &arg) >> PAGE_SHIFT;
|
|
+ return 0;
|
|
+}
|
|
+early_param("highmem", parse_highmem);
|
|
|
|
- if (!memcmp(from, "disable_timer_pin_1", 19))
|
|
- disable_timer_pin_1 = 1;
|
|
- if (!memcmp(from, "enable_timer_pin_1", 18))
|
|
- disable_timer_pin_1 = -1;
|
|
-
|
|
- /* disable IO-APIC */
|
|
- else if (!memcmp(from, "noapic", 6))
|
|
- disable_ioapic_setup();
|
|
-#endif /* CONFIG_X86_IO_APIC */
|
|
-#endif /* CONFIG_ACPI */
|
|
+/*
|
|
+ * vmalloc=size forces the vmalloc area to be exactly 'size'
|
|
+ * bytes. This can be used to increase (or decrease) the
|
|
+ * vmalloc area - the default is 128m.
|
|
+ */
|
|
+static int __init parse_vmalloc(char *arg)
|
|
+{
|
|
+ if (!arg)
|
|
+ return -EINVAL;
|
|
|
|
-#ifdef CONFIG_X86_LOCAL_APIC
|
|
- /* enable local APIC */
|
|
- else if (!memcmp(from, "lapic", 5))
|
|
- lapic_enable();
|
|
-
|
|
- /* disable local APIC */
|
|
- else if (!memcmp(from, "nolapic", 6))
|
|
- lapic_disable();
|
|
-#endif /* CONFIG_X86_LOCAL_APIC */
|
|
+ __VMALLOC_RESERVE = memparse(arg, &arg);
|
|
+ return 0;
|
|
+}
|
|
+early_param("vmalloc", parse_vmalloc);
|
|
|
|
-#ifdef CONFIG_KEXEC
|
|
- /* crashkernel=size@addr specifies the location to reserve for
|
|
- * a crash kernel. By reserving this memory we guarantee
|
|
- * that linux never set's it up as a DMA target.
|
|
- * Useful for holding code to do something appropriate
|
|
- * after a kernel panic.
|
|
- */
|
|
- else if (!memcmp(from, "crashkernel=", 12)) {
|
|
#ifndef CONFIG_XEN
|
|
- unsigned long size, base;
|
|
- size = memparse(from+12, &from);
|
|
- if (*from == '@') {
|
|
- base = memparse(from+1, &from);
|
|
- /* FIXME: Do I want a sanity check
|
|
- * to validate the memory range?
|
|
- */
|
|
- crashk_res.start = base;
|
|
- crashk_res.end = base + size - 1;
|
|
- }
|
|
-#else
|
|
- printk("Ignoring crashkernel command line, "
|
|
- "parameter will be supplied by xen\n");
|
|
-#endif
|
|
- }
|
|
-#endif
|
|
-#ifdef CONFIG_PROC_VMCORE
|
|
- /* elfcorehdr= specifies the location of elf core header
|
|
- * stored by the crashed kernel.
|
|
- */
|
|
- else if (!memcmp(from, "elfcorehdr=", 11))
|
|
- elfcorehdr_addr = memparse(from+11, &from);
|
|
-#endif
|
|
+/*
|
|
+ * reservetop=size reserves a hole at the top of the kernel address space which
|
|
+ * a hypervisor can load into later. Needed for dynamically loaded hypervisors,
|
|
+ * so relocating the fixmap can be done before paging initialization.
|
|
+ */
|
|
+static int __init parse_reservetop(char *arg)
|
|
+{
|
|
+ unsigned long address;
|
|
|
|
- /*
|
|
- * highmem=size forces highmem to be exactly 'size' bytes.
|
|
- * This works even on boxes that have no highmem otherwise.
|
|
- * This also works to reduce highmem size on bigger boxes.
|
|
- */
|
|
- else if (!memcmp(from, "highmem=", 8))
|
|
- highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
|
|
-
|
|
- /*
|
|
- * vmalloc=size forces the vmalloc area to be exactly 'size'
|
|
- * bytes. This can be used to increase (or decrease) the
|
|
- * vmalloc area - the default is 128m.
|
|
- */
|
|
- else if (!memcmp(from, "vmalloc=", 8))
|
|
- __VMALLOC_RESERVE = memparse(from+8, &from);
|
|
+ if (!arg)
|
|
+ return -EINVAL;
|
|
|
|
- next_char:
|
|
- c = *(from++);
|
|
- if (!c)
|
|
- break;
|
|
- if (COMMAND_LINE_SIZE <= ++len)
|
|
- break;
|
|
- *(to++) = c;
|
|
- }
|
|
- *to = '\0';
|
|
- *cmdline_p = command_line;
|
|
- if (userdef) {
|
|
- printk(KERN_INFO "user-defined physical RAM map:\n");
|
|
- print_memory_map("user");
|
|
- }
|
|
+ address = memparse(arg, &arg);
|
|
+ reserve_top_address(address);
|
|
+ return 0;
|
|
}
|
|
+early_param("reservetop", parse_reservetop);
|
|
+#endif
|
|
|
|
/*
|
|
* Callback for efi_memory_walk.
|
|
@@ -1039,7 +924,7 @@ efi_find_max_pfn(unsigned long start, un
|
|
static int __init
|
|
efi_memory_present_wrapper(unsigned long start, unsigned long end, void *arg)
|
|
{
|
|
- memory_present(0, start, end);
|
|
+ memory_present(0, PFN_UP(start), PFN_DOWN(end));
|
|
return 0;
|
|
}
|
|
|
|
@@ -1306,6 +1191,14 @@ static unsigned long __init setup_memory
|
|
}
|
|
printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
|
|
pages_to_mb(highend_pfn - highstart_pfn));
|
|
+ num_physpages = highend_pfn;
|
|
+ high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
|
|
+#else
|
|
+ num_physpages = max_low_pfn;
|
|
+ high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
|
|
+#endif
|
|
+#ifdef CONFIG_FLATMEM
|
|
+ max_mapnr = num_physpages;
|
|
#endif
|
|
printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
|
|
pages_to_mb(max_low_pfn));
|
|
@@ -1317,22 +1210,21 @@ static unsigned long __init setup_memory
|
|
|
|
void __init zone_sizes_init(void)
|
|
{
|
|
- unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
|
|
- unsigned int max_dma, low;
|
|
-
|
|
- max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
|
|
- low = max_low_pfn;
|
|
-
|
|
- if (low < max_dma)
|
|
- zones_size[ZONE_DMA] = low;
|
|
- else {
|
|
- zones_size[ZONE_DMA] = max_dma;
|
|
- zones_size[ZONE_NORMAL] = low - max_dma;
|
|
+ unsigned long max_zone_pfns[MAX_NR_ZONES];
|
|
+ memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
|
|
+ max_zone_pfns[ZONE_DMA] =
|
|
+ virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
|
|
+ max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
|
|
#ifdef CONFIG_HIGHMEM
|
|
- zones_size[ZONE_HIGHMEM] = highend_pfn - low;
|
|
+ max_zone_pfns[ZONE_HIGHMEM] = highend_pfn;
|
|
+ add_active_range(0, 0, min(xen_start_info->nr_pages, highend_pfn));
|
|
+ add_active_range(0, highend_pfn, highend_pfn);
|
|
+#else
|
|
+ add_active_range(0, 0, min(xen_start_info->nr_pages, max_low_pfn));
|
|
+ add_active_range(0, max_low_pfn, max_low_pfn);
|
|
#endif
|
|
- }
|
|
- free_area_init(zones_size);
|
|
+
|
|
+ free_area_init_nodes(max_zone_pfns);
|
|
}
|
|
#else
|
|
extern unsigned long __init setup_memory(void);
|
|
@@ -1389,6 +1281,7 @@ void __init setup_bootmem_allocator(void
|
|
*/
|
|
acpi_reserve_bootmem();
|
|
#endif
|
|
+ numa_kva_reserve();
|
|
#endif /* !CONFIG_XEN */
|
|
|
|
#ifdef CONFIG_BLK_DEV_INITRD
|
|
@@ -1574,7 +1467,7 @@ static int __init request_standard_resou
|
|
request_resource(&iomem_resource, &video_ram_resource);
|
|
|
|
/* request I/O space for devices used on all i[345]86 PCs */
|
|
- for (i = 0; i < STANDARD_IO_RESOURCES; i++)
|
|
+ for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
|
|
request_resource(&ioport_resource, &standard_io_resources[i]);
|
|
return 0;
|
|
}
|
|
@@ -1705,17 +1598,19 @@ void __init setup_arch(char **cmdline_p)
|
|
data_resource.start = virt_to_phys(_etext);
|
|
data_resource.end = virt_to_phys(_edata)-1;
|
|
|
|
- parse_cmdline_early(cmdline_p);
|
|
+ if ((i = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE)
|
|
+ i = COMMAND_LINE_SIZE;
|
|
+ memcpy(saved_command_line, xen_start_info->cmd_line, i);
|
|
+ saved_command_line[i - 1] = '\0';
|
|
+ parse_early_param();
|
|
|
|
-#ifdef CONFIG_EARLY_PRINTK
|
|
- {
|
|
- char *s = strstr(*cmdline_p, "earlyprintk=");
|
|
- if (s) {
|
|
- setup_early_printk(strchr(s, '=') + 1);
|
|
- printk("early console enabled\n");
|
|
- }
|
|
+ if (user_defined_memmap) {
|
|
+ printk(KERN_INFO "user-defined physical RAM map:\n");
|
|
+ print_memory_map("user");
|
|
}
|
|
-#endif
|
|
+
|
|
+ strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE);
|
|
+ *cmdline_p = command_line;
|
|
|
|
max_low_pfn = setup_memory();
|
|
|
|
@@ -1822,7 +1717,7 @@ void __init setup_arch(char **cmdline_p)
|
|
dmi_scan_machine();
|
|
|
|
#ifdef CONFIG_X86_GENERICARCH
|
|
- generic_apic_probe(*cmdline_p);
|
|
+ generic_apic_probe();
|
|
#endif
|
|
if (efi_enabled)
|
|
efi_map_memmap();
|
|
@@ -1843,9 +1738,11 @@ void __init setup_arch(char **cmdline_p)
|
|
acpi_boot_table_init();
|
|
#endif
|
|
|
|
+#ifdef CONFIG_PCI
|
|
#ifdef CONFIG_X86_IO_APIC
|
|
check_acpi_pci(); /* Checks more than just ACPI actually */
|
|
#endif
|
|
+#endif
|
|
|
|
#ifdef CONFIG_ACPI
|
|
acpi_boot_init();
|
|
--- head-2010-05-25.orig/arch/x86/kernel/smp_32-xen.c 2007-12-10 08:47:31.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/smp_32-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -279,8 +279,7 @@ static inline void leave_mm (unsigned lo
|
|
* 2) Leave the mm if we are in the lazy tlb mode.
|
|
*/
|
|
|
|
-irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id,
|
|
- struct pt_regs *regs)
|
|
+irqreturn_t smp_invalidate_interrupt(int irq, void *dev_id)
|
|
{
|
|
unsigned long cpu;
|
|
|
|
@@ -567,16 +566,14 @@ void smp_send_stop(void)
|
|
* all the work is done automatically when
|
|
* we return from the interrupt.
|
|
*/
|
|
-irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id,
|
|
- struct pt_regs *regs)
|
|
+irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id)
|
|
{
|
|
|
|
return IRQ_HANDLED;
|
|
}
|
|
|
|
#include <linux/kallsyms.h>
|
|
-irqreturn_t smp_call_function_interrupt(int irq, void *dev_id,
|
|
- struct pt_regs *regs)
|
|
+irqreturn_t smp_call_function_interrupt(int irq, void *dev_id)
|
|
{
|
|
void (*func) (void *info) = call_data->func;
|
|
void *info = call_data->info;
|
|
@@ -603,3 +600,69 @@ irqreturn_t smp_call_function_interrupt(
|
|
return IRQ_HANDLED;
|
|
}
|
|
|
|
+/*
|
|
+ * this function sends a 'generic call function' IPI to one other CPU
|
|
+ * in the system.
|
|
+ *
|
|
+ * cpu is a standard Linux logical CPU number.
|
|
+ */
|
|
+static void
|
|
+__smp_call_function_single(int cpu, void (*func) (void *info), void *info,
|
|
+ int nonatomic, int wait)
|
|
+{
|
|
+ struct call_data_struct data;
|
|
+ int cpus = 1;
|
|
+
|
|
+ data.func = func;
|
|
+ data.info = info;
|
|
+ atomic_set(&data.started, 0);
|
|
+ data.wait = wait;
|
|
+ if (wait)
|
|
+ atomic_set(&data.finished, 0);
|
|
+
|
|
+ call_data = &data;
|
|
+ wmb();
|
|
+ /* Send a message to all other CPUs and wait for them to respond */
|
|
+ send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR);
|
|
+
|
|
+ /* Wait for response */
|
|
+ while (atomic_read(&data.started) != cpus)
|
|
+ cpu_relax();
|
|
+
|
|
+ if (!wait)
|
|
+ return;
|
|
+
|
|
+ while (atomic_read(&data.finished) != cpus)
|
|
+ cpu_relax();
|
|
+}
|
|
+
|
|
+/*
|
|
+ * smp_call_function_single - Run a function on another CPU
|
|
+ * @func: The function to run. This must be fast and non-blocking.
|
|
+ * @info: An arbitrary pointer to pass to the function.
|
|
+ * @nonatomic: Currently unused.
|
|
+ * @wait: If true, wait until function has completed on other CPUs.
|
|
+ *
|
|
+ * Retrurns 0 on success, else a negative status code.
|
|
+ *
|
|
+ * Does not return until the remote CPU is nearly ready to execute <func>
|
|
+ * or is or has executed.
|
|
+ */
|
|
+
|
|
+int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
|
|
+ int nonatomic, int wait)
|
|
+{
|
|
+ /* prevent preemption and reschedule on another processor */
|
|
+ int me = get_cpu();
|
|
+ if (cpu == me) {
|
|
+ WARN_ON(1);
|
|
+ put_cpu();
|
|
+ return -EBUSY;
|
|
+ }
|
|
+ spin_lock_bh(&call_lock);
|
|
+ __smp_call_function_single(cpu, func, info, nonatomic, wait);
|
|
+ spin_unlock_bh(&call_lock);
|
|
+ put_cpu();
|
|
+ return 0;
|
|
+}
|
|
+EXPORT_SYMBOL(smp_call_function_single);
|
|
--- head-2010-05-25.orig/arch/x86/kernel/time-xen.c 2010-03-24 15:06:08.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/time-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -88,7 +88,6 @@ int pit_latch_buggy; /* ext
|
|
unsigned long vxtime_hz = PIT_TICK_RATE;
|
|
struct vxtime_data __vxtime __section_vxtime; /* for vsyscalls */
|
|
volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
|
|
-unsigned long __wall_jiffies __section_wall_jiffies = INITIAL_JIFFIES;
|
|
struct timespec __xtime __section_xtime;
|
|
struct timezone __sys_tz __section_sys_tz;
|
|
#endif
|
|
@@ -96,8 +95,6 @@ struct timezone __sys_tz __section_sys_t
|
|
unsigned int cpu_khz; /* Detected as we calibrate the TSC */
|
|
EXPORT_SYMBOL(cpu_khz);
|
|
|
|
-extern unsigned long wall_jiffies;
|
|
-
|
|
DEFINE_SPINLOCK(rtc_lock);
|
|
EXPORT_SYMBOL(rtc_lock);
|
|
|
|
@@ -246,11 +243,10 @@ static void __update_wallclock(time_t se
|
|
time_t wtm_sec, xtime_sec;
|
|
u64 tmp, wc_nsec;
|
|
|
|
- /* Adjust wall-clock time base based on wall_jiffies ticks. */
|
|
+ /* Adjust wall-clock time base. */
|
|
wc_nsec = processed_system_time;
|
|
wc_nsec += sec * (u64)NSEC_PER_SEC;
|
|
wc_nsec += nsec;
|
|
- wc_nsec -= (jiffies - wall_jiffies) * (u64)NS_PER_TICK;
|
|
|
|
/* Split wallclock base into seconds and nanoseconds. */
|
|
tmp = wc_nsec;
|
|
@@ -373,16 +369,10 @@ void do_gettimeofday(struct timeval *tv)
|
|
shadow = &per_cpu(shadow_time, cpu);
|
|
|
|
do {
|
|
- unsigned long lost;
|
|
-
|
|
local_time_version = shadow->version;
|
|
seq = read_seqbegin(&xtime_lock);
|
|
|
|
usec = get_usec_offset(shadow);
|
|
- lost = jiffies - wall_jiffies;
|
|
-
|
|
- if (unlikely(lost))
|
|
- usec += lost * (USEC_PER_SEC / HZ);
|
|
|
|
sec = xtime.tv_sec;
|
|
usec += (xtime.tv_nsec / NSEC_PER_USEC);
|
|
@@ -509,7 +499,7 @@ static void sync_xen_wallclock(unsigned
|
|
write_seqlock_irq(&xtime_lock);
|
|
|
|
sec = xtime.tv_sec;
|
|
- nsec = xtime.tv_nsec + ((jiffies - wall_jiffies) * (u64)NS_PER_TICK);
|
|
+ nsec = xtime.tv_nsec;
|
|
__normalize_time(&sec, &nsec);
|
|
|
|
op.cmd = XENPF_settime;
|
|
@@ -583,42 +573,49 @@ unsigned long long sched_clock(void)
|
|
}
|
|
#endif
|
|
|
|
-#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER)
|
|
unsigned long profile_pc(struct pt_regs *regs)
|
|
{
|
|
unsigned long pc = instruction_pointer(regs);
|
|
|
|
-#ifdef __x86_64__
|
|
- /* Assume the lock function has either no stack frame or only a single word.
|
|
- This checks if the address on the stack looks like a kernel text address.
|
|
- There is a small window for false hits, but in that case the tick
|
|
- is just accounted to the spinlock function.
|
|
- Better would be to write these functions in assembler again
|
|
- and check exactly. */
|
|
+#if defined(CONFIG_SMP) || defined(__x86_64__)
|
|
if (!user_mode_vm(regs) && in_lock_functions(pc)) {
|
|
- char *v = *(char **)regs->rsp;
|
|
- if ((v >= _stext && v <= _etext) ||
|
|
- (v >= _sinittext && v <= _einittext) ||
|
|
- (v >= (char *)MODULES_VADDR && v <= (char *)MODULES_END))
|
|
- return (unsigned long)v;
|
|
- return ((unsigned long *)regs->rsp)[1];
|
|
+# ifdef CONFIG_FRAME_POINTER
|
|
+# ifdef __i386__
|
|
+ return ((unsigned long *)regs->ebp)[1];
|
|
+# else
|
|
+ return ((unsigned long *)regs->rbp)[1];
|
|
+# endif
|
|
+# else
|
|
+# ifdef __i386__
|
|
+ unsigned long *sp;
|
|
+ if ((regs->xcs & 2) == 0)
|
|
+ sp = (unsigned long *)®s->esp;
|
|
+ else
|
|
+ sp = (unsigned long *)regs->esp;
|
|
+# else
|
|
+ unsigned long *sp = (unsigned long *)regs->rsp;
|
|
+# endif
|
|
+ /* Return address is either directly at stack pointer
|
|
+ or above a saved eflags. Eflags has bits 22-31 zero,
|
|
+ kernel addresses don't. */
|
|
+ if (sp[0] >> 22)
|
|
+ return sp[0];
|
|
+ if (sp[1] >> 22)
|
|
+ return sp[1];
|
|
+# endif
|
|
}
|
|
-#else
|
|
- if (!user_mode_vm(regs) && in_lock_functions(pc))
|
|
- return *(unsigned long *)(regs->ebp + 4);
|
|
#endif
|
|
|
|
return pc;
|
|
}
|
|
EXPORT_SYMBOL(profile_pc);
|
|
-#endif
|
|
|
|
/*
|
|
* This is the same as the above, except we _also_ save the current
|
|
* Time Stamp Counter value at the time of the timer interrupt, so that
|
|
* we later on can estimate the time of day more exactly.
|
|
*/
|
|
-irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
|
|
+irqreturn_t timer_interrupt(int irq, void *dev_id)
|
|
{
|
|
s64 delta, delta_cpu, stolen, blocked;
|
|
u64 sched_time;
|
|
@@ -676,10 +673,15 @@ irqreturn_t timer_interrupt(int irq, voi
|
|
}
|
|
|
|
/* System-wide jiffy work. */
|
|
- while (delta >= NS_PER_TICK) {
|
|
- delta -= NS_PER_TICK;
|
|
- processed_system_time += NS_PER_TICK;
|
|
- do_timer(regs);
|
|
+ if (delta >= NS_PER_TICK) {
|
|
+ do_div(delta, NS_PER_TICK);
|
|
+ processed_system_time += delta * NS_PER_TICK;
|
|
+ while (delta > HZ) {
|
|
+ clobber_induction_variable(delta);
|
|
+ do_timer(HZ);
|
|
+ delta -= HZ;
|
|
+ }
|
|
+ do_timer(delta);
|
|
}
|
|
|
|
if (shadow_tv_version != HYPERVISOR_shared_info->wc_version) {
|
|
@@ -724,7 +726,7 @@ irqreturn_t timer_interrupt(int irq, voi
|
|
if (delta_cpu > 0) {
|
|
do_div(delta_cpu, NS_PER_TICK);
|
|
per_cpu(processed_system_time, cpu) += delta_cpu * NS_PER_TICK;
|
|
- if (user_mode_vm(regs))
|
|
+ if (user_mode_vm(get_irq_regs()))
|
|
account_user_time(current, (cputime_t)delta_cpu);
|
|
else
|
|
account_system_time(current, HARDIRQ_OFFSET,
|
|
@@ -738,10 +740,10 @@ irqreturn_t timer_interrupt(int irq, voi
|
|
/* Local timer processing (see update_process_times()). */
|
|
run_local_timers();
|
|
if (rcu_pending(cpu))
|
|
- rcu_check_callbacks(cpu, user_mode_vm(regs));
|
|
+ rcu_check_callbacks(cpu, user_mode_vm(get_irq_regs()));
|
|
scheduler_tick();
|
|
run_posix_cpu_timers(current);
|
|
- profile_tick(CPU_PROFILING, regs);
|
|
+ profile_tick(CPU_PROFILING);
|
|
|
|
return IRQ_HANDLED;
|
|
}
|
|
@@ -951,10 +953,11 @@ extern void (*late_time_init)(void);
|
|
/* Duplicate of time_init() below, with hpet_enable part added */
|
|
static void __init hpet_time_init(void)
|
|
{
|
|
- xtime.tv_sec = get_cmos_time();
|
|
- xtime.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
|
|
- set_normalized_timespec(&wall_to_monotonic,
|
|
- -xtime.tv_sec, -xtime.tv_nsec);
|
|
+ struct timespec ts;
|
|
+ ts.tv_sec = get_cmos_time();
|
|
+ ts.tv_nsec = (INITIAL_JIFFIES % HZ) * (NSEC_PER_SEC / HZ);
|
|
+
|
|
+ do_settimeofday(&ts);
|
|
|
|
if ((hpet_enable() >= 0) && hpet_use_timer) {
|
|
printk("Using HPET for base-timer\n");
|
|
--- head-2010-05-25.orig/arch/x86/kernel/traps_32-xen.c 2008-04-02 12:34:02.000000000 +0200
|
|
+++ head-2010-05-25/arch/x86/kernel/traps_32-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -28,6 +28,7 @@
|
|
#include <linux/kprobes.h>
|
|
#include <linux/kexec.h>
|
|
#include <linux/unwind.h>
|
|
+#include <linux/uaccess.h>
|
|
|
|
#ifdef CONFIG_EISA
|
|
#include <linux/ioport.h>
|
|
@@ -40,7 +41,6 @@
|
|
|
|
#include <asm/processor.h>
|
|
#include <asm/system.h>
|
|
-#include <asm/uaccess.h>
|
|
#include <asm/io.h>
|
|
#include <asm/atomic.h>
|
|
#include <asm/debugreg.h>
|
|
@@ -51,11 +51,14 @@
|
|
#include <asm/smp.h>
|
|
#include <asm/arch_hooks.h>
|
|
#include <asm/kdebug.h>
|
|
+#include <asm/stacktrace.h>
|
|
|
|
#include <linux/module.h>
|
|
|
|
#include "mach_traps.h"
|
|
|
|
+int panic_on_unrecovered_nmi;
|
|
+
|
|
asmlinkage int system_call(void);
|
|
|
|
struct desc_struct default_ldt[] = { { 0, 0 }, { 0, 0 }, { 0, 0 },
|
|
@@ -124,62 +127,63 @@ static inline int valid_stack_ptr(struct
|
|
p < (void *)tinfo + THREAD_SIZE - 3;
|
|
}
|
|
|
|
-/*
|
|
- * Print one address/symbol entries per line.
|
|
- */
|
|
-static inline void print_addr_and_symbol(unsigned long addr, char *log_lvl)
|
|
-{
|
|
- printk(" [<%08lx>] ", addr);
|
|
-
|
|
- print_symbol("%s\n", addr);
|
|
-}
|
|
-
|
|
static inline unsigned long print_context_stack(struct thread_info *tinfo,
|
|
unsigned long *stack, unsigned long ebp,
|
|
- char *log_lvl)
|
|
+ struct stacktrace_ops *ops, void *data)
|
|
{
|
|
unsigned long addr;
|
|
|
|
#ifdef CONFIG_FRAME_POINTER
|
|
while (valid_stack_ptr(tinfo, (void *)ebp)) {
|
|
+ unsigned long new_ebp;
|
|
addr = *(unsigned long *)(ebp + 4);
|
|
- print_addr_and_symbol(addr, log_lvl);
|
|
+ ops->address(data, addr);
|
|
/*
|
|
* break out of recursive entries (such as
|
|
- * end_of_stack_stop_unwind_function):
|
|
+ * end_of_stack_stop_unwind_function). Also,
|
|
+ * we can never allow a frame pointer to
|
|
+ * move downwards!
|
|
*/
|
|
- if (ebp == *(unsigned long *)ebp)
|
|
+ new_ebp = *(unsigned long *)ebp;
|
|
+ if (new_ebp <= ebp)
|
|
break;
|
|
- ebp = *(unsigned long *)ebp;
|
|
+ ebp = new_ebp;
|
|
}
|
|
#else
|
|
while (valid_stack_ptr(tinfo, stack)) {
|
|
addr = *stack++;
|
|
if (__kernel_text_address(addr))
|
|
- print_addr_and_symbol(addr, log_lvl);
|
|
+ ops->address(data, addr);
|
|
}
|
|
#endif
|
|
return ebp;
|
|
}
|
|
|
|
+struct ops_and_data {
|
|
+ struct stacktrace_ops *ops;
|
|
+ void *data;
|
|
+};
|
|
+
|
|
static asmlinkage int
|
|
-show_trace_unwind(struct unwind_frame_info *info, void *log_lvl)
|
|
+dump_trace_unwind(struct unwind_frame_info *info, void *data)
|
|
{
|
|
+ struct ops_and_data *oad = (struct ops_and_data *)data;
|
|
int n = 0;
|
|
|
|
while (unwind(info) == 0 && UNW_PC(info)) {
|
|
n++;
|
|
- print_addr_and_symbol(UNW_PC(info), log_lvl);
|
|
+ oad->ops->address(oad->data, UNW_PC(info));
|
|
if (arch_unw_user_mode(info))
|
|
break;
|
|
}
|
|
return n;
|
|
}
|
|
|
|
-static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
|
|
- unsigned long *stack, char *log_lvl)
|
|
+void dump_trace(struct task_struct *task, struct pt_regs *regs,
|
|
+ unsigned long *stack,
|
|
+ struct stacktrace_ops *ops, void *data)
|
|
{
|
|
- unsigned long ebp;
|
|
+ unsigned long ebp = 0;
|
|
|
|
if (!task)
|
|
task = current;
|
|
@@ -187,54 +191,116 @@ static void show_trace_log_lvl(struct ta
|
|
if (call_trace >= 0) {
|
|
int unw_ret = 0;
|
|
struct unwind_frame_info info;
|
|
+ struct ops_and_data oad = { .ops = ops, .data = data };
|
|
|
|
if (regs) {
|
|
if (unwind_init_frame_info(&info, task, regs) == 0)
|
|
- unw_ret = show_trace_unwind(&info, log_lvl);
|
|
+ unw_ret = dump_trace_unwind(&info, &oad);
|
|
} else if (task == current)
|
|
- unw_ret = unwind_init_running(&info, show_trace_unwind, log_lvl);
|
|
+ unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad);
|
|
else {
|
|
if (unwind_init_blocked(&info, task) == 0)
|
|
- unw_ret = show_trace_unwind(&info, log_lvl);
|
|
+ unw_ret = dump_trace_unwind(&info, &oad);
|
|
}
|
|
if (unw_ret > 0) {
|
|
if (call_trace == 1 && !arch_unw_user_mode(&info)) {
|
|
- print_symbol("DWARF2 unwinder stuck at %s\n",
|
|
+ ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n",
|
|
UNW_PC(&info));
|
|
if (UNW_SP(&info) >= PAGE_OFFSET) {
|
|
- printk("Leftover inexact backtrace:\n");
|
|
+ ops->warning(data, "Leftover inexact backtrace:\n");
|
|
stack = (void *)UNW_SP(&info);
|
|
+ if (!stack)
|
|
+ return;
|
|
+ ebp = UNW_FP(&info);
|
|
} else
|
|
- printk("Full inexact backtrace again:\n");
|
|
+ ops->warning(data, "Full inexact backtrace again:\n");
|
|
} else if (call_trace >= 1)
|
|
return;
|
|
else
|
|
- printk("Full inexact backtrace again:\n");
|
|
+ ops->warning(data, "Full inexact backtrace again:\n");
|
|
} else
|
|
- printk("Inexact backtrace:\n");
|
|
+ ops->warning(data, "Inexact backtrace:\n");
|
|
}
|
|
-
|
|
- if (task == current) {
|
|
- /* Grab ebp right from our regs */
|
|
- asm ("movl %%ebp, %0" : "=r" (ebp) : );
|
|
- } else {
|
|
- /* ebp is the last reg pushed by switch_to */
|
|
- ebp = *(unsigned long *) task->thread.esp;
|
|
+ if (!stack) {
|
|
+ unsigned long dummy;
|
|
+ stack = &dummy;
|
|
+ if (task && task != current)
|
|
+ stack = (unsigned long *)task->thread.esp;
|
|
+ }
|
|
+
|
|
+#ifdef CONFIG_FRAME_POINTER
|
|
+ if (!ebp) {
|
|
+ if (task == current) {
|
|
+ /* Grab ebp right from our regs */
|
|
+ asm ("movl %%ebp, %0" : "=r" (ebp) : );
|
|
+ } else {
|
|
+ /* ebp is the last reg pushed by switch_to */
|
|
+ ebp = *(unsigned long *) task->thread.esp;
|
|
+ }
|
|
}
|
|
+#endif
|
|
|
|
while (1) {
|
|
struct thread_info *context;
|
|
context = (struct thread_info *)
|
|
((unsigned long)stack & (~(THREAD_SIZE - 1)));
|
|
- ebp = print_context_stack(context, stack, ebp, log_lvl);
|
|
+ ebp = print_context_stack(context, stack, ebp, ops, data);
|
|
+ /* Should be after the line below, but somewhere
|
|
+ in early boot context comes out corrupted and we
|
|
+ can't reference it -AK */
|
|
+ if (ops->stack(data, "IRQ") < 0)
|
|
+ break;
|
|
stack = (unsigned long*)context->previous_esp;
|
|
if (!stack)
|
|
break;
|
|
- printk("%s =======================\n", log_lvl);
|
|
}
|
|
}
|
|
+EXPORT_SYMBOL(dump_trace);
|
|
+
|
|
+static void
|
|
+print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
|
|
+{
|
|
+ printk(data);
|
|
+ print_symbol(msg, symbol);
|
|
+ printk("\n");
|
|
+}
|
|
+
|
|
+static void print_trace_warning(void *data, char *msg)
|
|
+{
|
|
+ printk("%s%s\n", (char *)data, msg);
|
|
+}
|
|
|
|
-void show_trace(struct task_struct *task, struct pt_regs *regs, unsigned long * stack)
|
|
+static int print_trace_stack(void *data, char *name)
|
|
+{
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Print one address/symbol entries per line.
|
|
+ */
|
|
+static void print_trace_address(void *data, unsigned long addr)
|
|
+{
|
|
+ printk("%s [<%08lx>] ", (char *)data, addr);
|
|
+ print_symbol("%s\n", addr);
|
|
+}
|
|
+
|
|
+static struct stacktrace_ops print_trace_ops = {
|
|
+ .warning = print_trace_warning,
|
|
+ .warning_symbol = print_trace_warning_symbol,
|
|
+ .stack = print_trace_stack,
|
|
+ .address = print_trace_address,
|
|
+};
|
|
+
|
|
+static void
|
|
+show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
|
|
+ unsigned long * stack, char *log_lvl)
|
|
+{
|
|
+ dump_trace(task, regs, stack, &print_trace_ops, log_lvl);
|
|
+ printk("%s =======================\n", log_lvl);
|
|
+}
|
|
+
|
|
+void show_trace(struct task_struct *task, struct pt_regs *regs,
|
|
+ unsigned long * stack)
|
|
{
|
|
show_trace_log_lvl(task, regs, stack, "");
|
|
}
|
|
@@ -297,12 +363,13 @@ void show_registers(struct pt_regs *regs
|
|
ss = regs->xss & 0xffff;
|
|
}
|
|
print_modules();
|
|
- printk(KERN_EMERG "CPU: %d\nEIP: %04x:[<%08lx>] %s VLI\n"
|
|
- "EFLAGS: %08lx (%s %.*s) \n",
|
|
+ printk(KERN_EMERG "CPU: %d\n"
|
|
+ KERN_EMERG "EIP: %04x:[<%08lx>] %s VLI\n"
|
|
+ KERN_EMERG "EFLAGS: %08lx (%s %.*s)\n",
|
|
smp_processor_id(), 0xffff & regs->xcs, regs->eip,
|
|
- print_tainted(), regs->eflags, system_utsname.release,
|
|
- (int)strcspn(system_utsname.version, " "),
|
|
- system_utsname.version);
|
|
+ print_tainted(), regs->eflags, init_utsname()->release,
|
|
+ (int)strcspn(init_utsname()->version, " "),
|
|
+ init_utsname()->version);
|
|
print_symbol(KERN_EMERG "EIP is at %s\n", regs->eip);
|
|
printk(KERN_EMERG "eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n",
|
|
regs->eax, regs->ebx, regs->ecx, regs->edx);
|
|
@@ -319,6 +386,8 @@ void show_registers(struct pt_regs *regs
|
|
*/
|
|
if (in_kernel) {
|
|
u8 __user *eip;
|
|
+ int code_bytes = 64;
|
|
+ unsigned char c;
|
|
|
|
printk("\n" KERN_EMERG "Stack: ");
|
|
show_stack_log_lvl(NULL, regs, (unsigned long *)esp, KERN_EMERG);
|
|
@@ -326,9 +395,12 @@ void show_registers(struct pt_regs *regs
|
|
printk(KERN_EMERG "Code: ");
|
|
|
|
eip = (u8 __user *)regs->eip - 43;
|
|
- for (i = 0; i < 64; i++, eip++) {
|
|
- unsigned char c;
|
|
-
|
|
+ if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) {
|
|
+ /* try starting at EIP */
|
|
+ eip = (u8 __user *)regs->eip;
|
|
+ code_bytes = 32;
|
|
+ }
|
|
+ for (i = 0; i < code_bytes; i++, eip++) {
|
|
if (eip < (u8 __user *)PAGE_OFFSET || __get_user(c, eip)) {
|
|
printk(" Bad EIP value.");
|
|
break;
|
|
@@ -349,7 +421,7 @@ static void handle_BUG(struct pt_regs *r
|
|
|
|
if (eip < PAGE_OFFSET)
|
|
return;
|
|
- if (__get_user(ud2, (unsigned short __user *)eip))
|
|
+ if (probe_kernel_address((unsigned short __user *)eip, ud2))
|
|
return;
|
|
if (ud2 != 0x0b0f)
|
|
return;
|
|
@@ -362,7 +434,8 @@ static void handle_BUG(struct pt_regs *r
|
|
char *file;
|
|
char c;
|
|
|
|
- if (__get_user(line, (unsigned short __user *)(eip + 2)))
|
|
+ if (probe_kernel_address((unsigned short __user *)(eip + 2),
|
|
+ line))
|
|
break;
|
|
if (__get_user(file, (char * __user *)(eip + 4)) ||
|
|
(unsigned long)file < PAGE_OFFSET || __get_user(c, file))
|
|
@@ -604,18 +677,24 @@ gp_in_kernel:
|
|
}
|
|
}
|
|
|
|
-static void mem_parity_error(unsigned char reason, struct pt_regs * regs)
|
|
+static __kprobes void
|
|
+mem_parity_error(unsigned char reason, struct pt_regs * regs)
|
|
{
|
|
- printk(KERN_EMERG "Uhhuh. NMI received. Dazed and confused, but trying "
|
|
- "to continue\n");
|
|
+ printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
|
|
+ "CPU %d.\n", reason, smp_processor_id());
|
|
printk(KERN_EMERG "You probably have a hardware problem with your RAM "
|
|
"chips\n");
|
|
+ if (panic_on_unrecovered_nmi)
|
|
+ panic("NMI: Not continuing");
|
|
+
|
|
+ printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
|
|
|
|
/* Clear and disable the memory parity error line. */
|
|
clear_mem_error(reason);
|
|
}
|
|
|
|
-static void io_check_error(unsigned char reason, struct pt_regs * regs)
|
|
+static __kprobes void
|
|
+io_check_error(unsigned char reason, struct pt_regs * regs)
|
|
{
|
|
printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n");
|
|
show_registers(regs);
|
|
@@ -624,7 +703,8 @@ static void io_check_error(unsigned char
|
|
clear_io_check_error(reason);
|
|
}
|
|
|
|
-static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
|
|
+static __kprobes void
|
|
+unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
|
|
{
|
|
#ifdef CONFIG_MCA
|
|
/* Might actually be able to figure out what the guilty party
|
|
@@ -634,15 +714,18 @@ static void unknown_nmi_error(unsigned c
|
|
return;
|
|
}
|
|
#endif
|
|
- printk("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
|
|
- reason, smp_processor_id());
|
|
- printk("Dazed and confused, but trying to continue\n");
|
|
- printk("Do you have a strange power saving mode enabled?\n");
|
|
+ printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
|
|
+ "CPU %d.\n", reason, smp_processor_id());
|
|
+ printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
|
|
+ if (panic_on_unrecovered_nmi)
|
|
+ panic("NMI: Not continuing");
|
|
+
|
|
+ printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
|
|
}
|
|
|
|
static DEFINE_SPINLOCK(nmi_print_lock);
|
|
|
|
-void die_nmi (struct pt_regs *regs, const char *msg)
|
|
+void __kprobes die_nmi(struct pt_regs *regs, const char *msg)
|
|
{
|
|
if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) ==
|
|
NOTIFY_STOP)
|
|
@@ -674,7 +757,7 @@ void die_nmi (struct pt_regs *regs, cons
|
|
do_exit(SIGSEGV);
|
|
}
|
|
|
|
-static void default_do_nmi(struct pt_regs * regs)
|
|
+static __kprobes void default_do_nmi(struct pt_regs * regs)
|
|
{
|
|
unsigned char reason = 0;
|
|
|
|
@@ -691,12 +774,12 @@ static void default_do_nmi(struct pt_reg
|
|
* Ok, so this is none of the documented NMI sources,
|
|
* so it must be the NMI watchdog.
|
|
*/
|
|
- if (nmi_watchdog) {
|
|
- nmi_watchdog_tick(regs);
|
|
+ if (nmi_watchdog_tick(regs, reason))
|
|
return;
|
|
- }
|
|
+ if (!do_nmi_callback(regs, smp_processor_id()))
|
|
#endif
|
|
- unknown_nmi_error(reason, regs);
|
|
+ unknown_nmi_error(reason, regs);
|
|
+
|
|
return;
|
|
}
|
|
if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
|
|
@@ -712,14 +795,7 @@ static void default_do_nmi(struct pt_reg
|
|
reassert_nmi();
|
|
}
|
|
|
|
-static int dummy_nmi_callback(struct pt_regs * regs, int cpu)
|
|
-{
|
|
- return 0;
|
|
-}
|
|
-
|
|
-static nmi_callback_t nmi_callback = dummy_nmi_callback;
|
|
-
|
|
-fastcall void do_nmi(struct pt_regs * regs, long error_code)
|
|
+fastcall __kprobes void do_nmi(struct pt_regs * regs, long error_code)
|
|
{
|
|
int cpu;
|
|
|
|
@@ -729,25 +805,11 @@ fastcall void do_nmi(struct pt_regs * re
|
|
|
|
++nmi_count(cpu);
|
|
|
|
- if (!rcu_dereference(nmi_callback)(regs, cpu))
|
|
- default_do_nmi(regs);
|
|
+ default_do_nmi(regs);
|
|
|
|
nmi_exit();
|
|
}
|
|
|
|
-void set_nmi_callback(nmi_callback_t callback)
|
|
-{
|
|
- vmalloc_sync_all();
|
|
- rcu_assign_pointer(nmi_callback, callback);
|
|
-}
|
|
-EXPORT_SYMBOL_GPL(set_nmi_callback);
|
|
-
|
|
-void unset_nmi_callback(void)
|
|
-{
|
|
- nmi_callback = dummy_nmi_callback;
|
|
-}
|
|
-EXPORT_SYMBOL_GPL(unset_nmi_callback);
|
|
-
|
|
#ifdef CONFIG_KPROBES
|
|
fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code)
|
|
{
|
|
--- head-2010-05-25.orig/arch/x86/mach-xen/setup.c 2008-04-02 12:34:02.000000000 +0200
|
|
+++ head-2010-05-25/arch/x86/mach-xen/setup.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -103,8 +103,10 @@ void __init pre_setup_arch_hook(void)
|
|
|
|
setup_xen_features();
|
|
|
|
- if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0)
|
|
- set_fixaddr_top(pp.virt_start);
|
|
+ if (HYPERVISOR_xen_version(XENVER_platform_parameters, &pp) == 0) {
|
|
+ hypervisor_virt_start = pp.virt_start;
|
|
+ reserve_top_address(0UL - pp.virt_start);
|
|
+ }
|
|
|
|
if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) {
|
|
machine_to_phys_mapping = (unsigned long *)mapping.v_start;
|
|
--- head-2010-05-25.orig/arch/x86/mm/fault_32-xen.c 2007-12-10 08:47:31.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/mm/fault_32-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -27,21 +27,24 @@
|
|
#include <asm/uaccess.h>
|
|
#include <asm/desc.h>
|
|
#include <asm/kdebug.h>
|
|
+#include <asm/segment.h>
|
|
|
|
extern void die(const char *,struct pt_regs *,long);
|
|
|
|
-#ifdef CONFIG_KPROBES
|
|
-ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
|
|
+static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
|
|
+
|
|
int register_page_fault_notifier(struct notifier_block *nb)
|
|
{
|
|
vmalloc_sync_all();
|
|
return atomic_notifier_chain_register(¬ify_page_fault_chain, nb);
|
|
}
|
|
+EXPORT_SYMBOL_GPL(register_page_fault_notifier);
|
|
|
|
int unregister_page_fault_notifier(struct notifier_block *nb)
|
|
{
|
|
return atomic_notifier_chain_unregister(¬ify_page_fault_chain, nb);
|
|
}
|
|
+EXPORT_SYMBOL_GPL(unregister_page_fault_notifier);
|
|
|
|
static inline int notify_page_fault(enum die_val val, const char *str,
|
|
struct pt_regs *regs, long err, int trap, int sig)
|
|
@@ -55,14 +58,6 @@ static inline int notify_page_fault(enum
|
|
};
|
|
return atomic_notifier_call_chain(¬ify_page_fault_chain, val, &args);
|
|
}
|
|
-#else
|
|
-static inline int notify_page_fault(enum die_val val, const char *str,
|
|
- struct pt_regs *regs, long err, int trap, int sig)
|
|
-{
|
|
- return NOTIFY_DONE;
|
|
-}
|
|
-#endif
|
|
-
|
|
|
|
/*
|
|
* Unlock any spinlocks which will prevent us from getting the
|
|
@@ -119,10 +114,10 @@ static inline unsigned long get_segment_
|
|
}
|
|
|
|
/* The standard kernel/user address space limit. */
|
|
- *eip_limit = (seg & 2) ? USER_DS.seg : KERNEL_DS.seg;
|
|
+ *eip_limit = user_mode(regs) ? USER_DS.seg : KERNEL_DS.seg;
|
|
|
|
/* By far the most common cases. */
|
|
- if (likely(seg == __USER_CS || seg == GET_KERNEL_CS()))
|
|
+ if (likely(SEGMENT_IS_FLAT_CODE(seg)))
|
|
return eip;
|
|
|
|
/* Check the segment exists, is within the current LDT/GDT size,
|
|
@@ -559,11 +554,7 @@ good_area:
|
|
write = 0;
|
|
switch (error_code & 3) {
|
|
default: /* 3: write, present */
|
|
-#ifdef TEST_VERIFY_AREA
|
|
- if (regs->cs == GET_KERNEL_CS())
|
|
- printk("WP fault at %08lx\n", regs->eip);
|
|
-#endif
|
|
- /* fall through */
|
|
+ /* fall through */
|
|
case 2: /* write, not present */
|
|
if (!(vma->vm_flags & VM_WRITE))
|
|
goto bad_area;
|
|
@@ -572,7 +563,7 @@ good_area:
|
|
case 1: /* read, present */
|
|
goto bad_area;
|
|
case 0: /* read, not present */
|
|
- if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
|
|
+ if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
|
|
goto bad_area;
|
|
}
|
|
|
|
@@ -704,7 +695,7 @@ no_context:
|
|
*/
|
|
out_of_memory:
|
|
up_read(&mm->mmap_sem);
|
|
- if (tsk->pid == 1) {
|
|
+ if (is_init(tsk)) {
|
|
yield();
|
|
down_read(&mm->mmap_sem);
|
|
goto survive;
|
|
--- head-2010-05-25.orig/arch/x86/mm/highmem_32-xen.c 2008-10-29 09:55:56.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/mm/highmem_32-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -38,11 +38,9 @@ static void *__kmap_atomic(struct page *
|
|
|
|
idx = type + KM_TYPE_NR*smp_processor_id();
|
|
vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
|
|
-#ifdef CONFIG_DEBUG_HIGHMEM
|
|
if (!pte_none(*(kmap_pte-idx)))
|
|
BUG();
|
|
-#endif
|
|
- set_pte_at_sync(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot));
|
|
+ set_pte_at(&init_mm, vaddr, kmap_pte-idx, mk_pte(page, prot));
|
|
|
|
return (void*) vaddr;
|
|
}
|
|
@@ -62,36 +60,26 @@ void *kmap_atomic_pte(struct page *page,
|
|
|
|
void kunmap_atomic(void *kvaddr, enum km_type type)
|
|
{
|
|
-#if defined(CONFIG_DEBUG_HIGHMEM) || defined(CONFIG_XEN)
|
|
unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
|
|
enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
|
|
|
|
- if (vaddr < FIXADDR_START) { // FIXME
|
|
+#ifdef CONFIG_DEBUG_HIGHMEM
|
|
+ if (vaddr >= PAGE_OFFSET && vaddr < (unsigned long)high_memory) {
|
|
dec_preempt_count();
|
|
preempt_check_resched();
|
|
return;
|
|
}
|
|
-#endif
|
|
|
|
-#if defined(CONFIG_DEBUG_HIGHMEM)
|
|
if (vaddr != __fix_to_virt(FIX_KMAP_BEGIN+idx))
|
|
BUG();
|
|
-
|
|
- /*
|
|
- * force other mappings to Oops if they'll try to access
|
|
- * this pte without first remap it
|
|
- */
|
|
- pte_clear(&init_mm, vaddr, kmap_pte-idx);
|
|
- __flush_tlb_one(vaddr);
|
|
-#elif defined(CONFIG_XEN)
|
|
+#endif
|
|
/*
|
|
- * We must ensure there are no dangling pagetable references when
|
|
- * returning memory to Xen (decrease_reservation).
|
|
- * XXX TODO: We could make this faster by only zapping when
|
|
- * kmap_flush_unused is called but that is trickier and more invasive.
|
|
+ * Force other mappings to Oops if they'll try to access this pte
|
|
+ * without first remap it. Keeping stale mappings around is a bad idea
|
|
+ * also, in case the page changes cacheability attributes or becomes
|
|
+ * a protected page in a hypervisor.
|
|
*/
|
|
- pte_clear(&init_mm, vaddr, kmap_pte-idx);
|
|
-#endif
|
|
+ kpte_clear_flush(kmap_pte-idx, vaddr);
|
|
|
|
dec_preempt_count();
|
|
preempt_check_resched();
|
|
@@ -110,7 +98,6 @@ void *kmap_atomic_pfn(unsigned long pfn,
|
|
idx = type + KM_TYPE_NR*smp_processor_id();
|
|
vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
|
|
set_pte(kmap_pte-idx, pfn_pte(pfn, kmap_prot));
|
|
- __flush_tlb_one(vaddr);
|
|
|
|
return (void*) vaddr;
|
|
}
|
|
--- head-2010-05-25.orig/arch/x86/mm/hypervisor.c 2009-06-09 15:01:37.000000000 +0200
|
|
+++ head-2010-05-25/arch/x86/mm/hypervisor.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -31,6 +31,7 @@
|
|
*/
|
|
|
|
#include <linux/sched.h>
|
|
+#include <linux/hardirq.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/vmalloc.h>
|
|
#include <asm/page.h>
|
|
@@ -44,6 +45,302 @@
|
|
#include <asm/tlbflush.h>
|
|
#include <linux/highmem.h>
|
|
|
|
+EXPORT_SYMBOL(hypercall_page);
|
|
+
|
|
+#define NR_MC BITS_PER_LONG
|
|
+#define NR_MMU BITS_PER_LONG
|
|
+#define NR_MMUEXT (BITS_PER_LONG / 4)
|
|
+
|
|
+DEFINE_PER_CPU(bool, xen_lazy_mmu);
|
|
+EXPORT_PER_CPU_SYMBOL(xen_lazy_mmu);
|
|
+struct lazy_mmu {
|
|
+ unsigned int nr_mc, nr_mmu, nr_mmuext;
|
|
+ multicall_entry_t mc[NR_MC];
|
|
+ mmu_update_t mmu[NR_MMU];
|
|
+ struct mmuext_op mmuext[NR_MMUEXT];
|
|
+};
|
|
+static DEFINE_PER_CPU(struct lazy_mmu, lazy_mmu);
|
|
+
|
|
+static inline bool use_lazy_mmu_mode(void)
|
|
+{
|
|
+#ifdef CONFIG_PREEMPT
|
|
+ if (!preempt_count())
|
|
+ return false;
|
|
+#endif
|
|
+ return !irq_count();
|
|
+}
|
|
+
|
|
+static void multicall_failed(const multicall_entry_t *mc, int rc)
|
|
+{
|
|
+ printk(KERN_EMERG "hypercall#%lu(%lx, %lx, %lx, %lx)"
|
|
+ " failed: %d (caller %lx)\n",
|
|
+ mc->op, mc->args[0], mc->args[1], mc->args[2], mc->args[3],
|
|
+ rc, mc->args[5]);
|
|
+ BUG();
|
|
+}
|
|
+
|
|
+int xen_multicall_flush(bool ret_last) {
|
|
+ struct lazy_mmu *lazy = &__get_cpu_var(lazy_mmu);
|
|
+ multicall_entry_t *mc = lazy->mc;
|
|
+ unsigned int count = lazy->nr_mc;
|
|
+
|
|
+ if (!count || !use_lazy_mmu_mode())
|
|
+ return 0;
|
|
+
|
|
+ lazy->nr_mc = 0;
|
|
+ lazy->nr_mmu = 0;
|
|
+ lazy->nr_mmuext = 0;
|
|
+
|
|
+ if (count == 1) {
|
|
+ int rc = _hypercall(int, mc->op, mc->args[0], mc->args[1],
|
|
+ mc->args[2], mc->args[3], mc->args[4]);
|
|
+
|
|
+ if (unlikely(rc)) {
|
|
+ if (ret_last)
|
|
+ return rc;
|
|
+ multicall_failed(mc, rc);
|
|
+ }
|
|
+ } else {
|
|
+ if (HYPERVISOR_multicall(mc, count))
|
|
+ BUG();
|
|
+ while (count-- > ret_last)
|
|
+ if (unlikely(mc++->result))
|
|
+ multicall_failed(mc - 1, mc[-1].result);
|
|
+ if (ret_last)
|
|
+ return mc->result;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+EXPORT_SYMBOL(xen_multicall_flush);
|
|
+
|
|
+int xen_multi_update_va_mapping(unsigned long va, pte_t pte,
|
|
+ unsigned long uvmf)
|
|
+{
|
|
+ struct lazy_mmu *lazy = &__get_cpu_var(lazy_mmu);
|
|
+ multicall_entry_t *mc;
|
|
+
|
|
+ if (unlikely(!use_lazy_mmu_mode()))
|
|
+#ifdef CONFIG_X86_PAE
|
|
+ return _hypercall4(int, update_va_mapping, va,
|
|
+ pte.pte_low, pte.pte_high, uvmf);
|
|
+#else
|
|
+ return _hypercall3(int, update_va_mapping, va,
|
|
+ pte.pte, uvmf);
|
|
+#endif
|
|
+
|
|
+ if (unlikely(lazy->nr_mc == NR_MC))
|
|
+ xen_multicall_flush(false);
|
|
+
|
|
+ mc = lazy->mc + lazy->nr_mc++;
|
|
+ mc->op = __HYPERVISOR_update_va_mapping;
|
|
+ mc->args[0] = va;
|
|
+#ifndef CONFIG_X86_PAE
|
|
+ mc->args[1] = pte.pte;
|
|
+#else
|
|
+ mc->args[1] = pte.pte_low;
|
|
+ mc->args[2] = pte.pte_high;
|
|
+#endif
|
|
+ mc->args[MULTI_UVMFLAGS_INDEX] = uvmf;
|
|
+ mc->args[5] = (long)__builtin_return_address(0);
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static inline bool mmu_may_merge(const multicall_entry_t *mc,
|
|
+ unsigned int op, domid_t domid)
|
|
+{
|
|
+ return mc->op == op && !mc->args[2] && mc->args[3] == domid;
|
|
+}
|
|
+
|
|
+int xen_multi_mmu_update(mmu_update_t *src, unsigned int count,
|
|
+ unsigned int *success_count, domid_t domid)
|
|
+{
|
|
+ struct lazy_mmu *lazy = &__get_cpu_var(lazy_mmu);
|
|
+ multicall_entry_t *mc = lazy->mc + lazy->nr_mc;
|
|
+ mmu_update_t *dst;
|
|
+ bool commit, merge;
|
|
+
|
|
+ if (unlikely(!use_lazy_mmu_mode()))
|
|
+ return _hypercall4(int, mmu_update, src, count,
|
|
+ success_count, domid);
|
|
+
|
|
+ commit = (lazy->nr_mmu + count) > NR_MMU || success_count;
|
|
+ merge = lazy->nr_mc && !commit
|
|
+ && mmu_may_merge(mc - 1, __HYPERVISOR_mmu_update, domid);
|
|
+ if (unlikely(lazy->nr_mc == NR_MC) && !merge) {
|
|
+ xen_multicall_flush(false);
|
|
+ mc = lazy->mc;
|
|
+ commit = count > NR_MMU || success_count;
|
|
+ }
|
|
+
|
|
+ if (!lazy->nr_mc && unlikely(commit))
|
|
+ return _hypercall4(int, mmu_update, src, count,
|
|
+ success_count, domid);
|
|
+
|
|
+ dst = lazy->mmu + lazy->nr_mmu;
|
|
+ lazy->nr_mmu += count;
|
|
+ if (merge) {
|
|
+ mc[-1].args[1] += count;
|
|
+ memcpy(dst, src, count * sizeof(*src));
|
|
+ } else {
|
|
+ ++lazy->nr_mc;
|
|
+ mc->op = __HYPERVISOR_mmu_update;
|
|
+ if (!commit) {
|
|
+ mc->args[0] = (unsigned long)dst;
|
|
+ memcpy(dst, src, count * sizeof(*src));
|
|
+ } else
|
|
+ mc->args[0] = (unsigned long)src;
|
|
+ mc->args[1] = count;
|
|
+ mc->args[2] = (unsigned long)success_count;
|
|
+ mc->args[3] = domid;
|
|
+ mc->args[5] = (long)__builtin_return_address(0);
|
|
+ }
|
|
+
|
|
+ while (!commit && count--)
|
|
+ switch (src++->ptr & (sizeof(pteval_t) - 1)) {
|
|
+ case MMU_NORMAL_PT_UPDATE:
|
|
+ case MMU_PT_UPDATE_PRESERVE_AD:
|
|
+ break;
|
|
+ default:
|
|
+ commit = true;
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ return commit ? xen_multicall_flush(true) : 0;
|
|
+}
|
|
+
|
|
+int xen_multi_mmuext_op(struct mmuext_op *src, unsigned int count,
|
|
+ unsigned int *success_count, domid_t domid)
|
|
+{
|
|
+ struct lazy_mmu *lazy = &__get_cpu_var(lazy_mmu);
|
|
+ multicall_entry_t *mc;
|
|
+ struct mmuext_op *dst;
|
|
+ bool commit, merge;
|
|
+
|
|
+ if (unlikely(!use_lazy_mmu_mode()))
|
|
+ return _hypercall4(int, mmuext_op, src, count,
|
|
+ success_count, domid);
|
|
+
|
|
+ /*
|
|
+ * While it could be useful in theory, I've never seen the body of
|
|
+ * this conditional to be reached, hence it seems more reasonable
|
|
+ * to disable it for the time being.
|
|
+ */
|
|
+ if (0 && likely(count)
|
|
+ && likely(!success_count)
|
|
+ && likely(domid == DOMID_SELF)
|
|
+ && likely(lazy->nr_mc)
|
|
+ && lazy->mc[lazy->nr_mc - 1].op == __HYPERVISOR_update_va_mapping) {
|
|
+ unsigned long oldf, newf = UVMF_NONE;
|
|
+
|
|
+ switch (src->cmd) {
|
|
+ case MMUEXT_TLB_FLUSH_ALL:
|
|
+ newf = UVMF_TLB_FLUSH | UVMF_ALL;
|
|
+ break;
|
|
+ case MMUEXT_INVLPG_ALL:
|
|
+ newf = UVMF_INVLPG | UVMF_ALL;
|
|
+ break;
|
|
+ case MMUEXT_TLB_FLUSH_MULTI:
|
|
+ newf = UVMF_TLB_FLUSH | UVMF_MULTI
|
|
+ | (unsigned long)src->arg2.vcpumask.p;
|
|
+ break;
|
|
+ case MMUEXT_INVLPG_MULTI:
|
|
+ newf = UVMF_INVLPG | UVMF_MULTI
|
|
+ | (unsigned long)src->arg2.vcpumask.p;
|
|
+ break;
|
|
+ case MMUEXT_TLB_FLUSH_LOCAL:
|
|
+ newf = UVMF_TLB_FLUSH | UVMF_LOCAL;
|
|
+ break;
|
|
+ case MMUEXT_INVLPG_LOCAL:
|
|
+ newf = UVMF_INVLPG | UVMF_LOCAL;
|
|
+ break;
|
|
+ }
|
|
+ mc = lazy->mc + lazy->nr_mc - 1;
|
|
+ oldf = mc->args[MULTI_UVMFLAGS_INDEX];
|
|
+ if (newf == UVMF_NONE || oldf == UVMF_NONE
|
|
+ || newf == (UVMF_TLB_FLUSH | UVMF_ALL))
|
|
+ ;
|
|
+ else if (oldf == (UVMF_TLB_FLUSH | UVMF_ALL))
|
|
+ newf = UVMF_TLB_FLUSH | UVMF_ALL;
|
|
+ else if ((newf & UVMF_FLUSHTYPE_MASK) == UVMF_INVLPG
|
|
+ && (oldf & UVMF_FLUSHTYPE_MASK) == UVMF_INVLPG
|
|
+ && ((src->arg1.linear_addr ^ mc->args[0])
|
|
+ >> PAGE_SHIFT))
|
|
+ newf = UVMF_NONE;
|
|
+ else if (((oldf | newf) & UVMF_ALL)
|
|
+ && !((oldf ^ newf) & UVMF_FLUSHTYPE_MASK))
|
|
+ newf |= UVMF_ALL;
|
|
+ else if ((oldf ^ newf) & ~UVMF_FLUSHTYPE_MASK)
|
|
+ newf = UVMF_NONE;
|
|
+ else if ((oldf & UVMF_FLUSHTYPE_MASK) == UVMF_TLB_FLUSH)
|
|
+ newf = (newf & ~UVMF_FLUSHTYPE_MASK) | UVMF_TLB_FLUSH;
|
|
+ else if ((newf & UVMF_FLUSHTYPE_MASK) != UVMF_TLB_FLUSH
|
|
+ && ((newf ^ oldf) & UVMF_FLUSHTYPE_MASK))
|
|
+ newf = UVMF_NONE;
|
|
+ if (newf != UVMF_NONE) {
|
|
+ mc->args[MULTI_UVMFLAGS_INDEX] = newf;
|
|
+ ++src;
|
|
+ if (!--count)
|
|
+ return 0;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ mc = lazy->mc + lazy->nr_mc;
|
|
+ commit = (lazy->nr_mmuext + count) > NR_MMUEXT || success_count;
|
|
+ merge = lazy->nr_mc && !commit
|
|
+ && mmu_may_merge(mc - 1, __HYPERVISOR_mmuext_op, domid);
|
|
+ if (unlikely(lazy->nr_mc == NR_MC) && !merge) {
|
|
+ xen_multicall_flush(false);
|
|
+ mc = lazy->mc;
|
|
+ commit = count > NR_MMUEXT || success_count;
|
|
+ }
|
|
+
|
|
+ if (!lazy->nr_mc && unlikely(commit))
|
|
+ return _hypercall4(int, mmuext_op, src, count,
|
|
+ success_count, domid);
|
|
+
|
|
+ dst = lazy->mmuext + lazy->nr_mmuext;
|
|
+ lazy->nr_mmuext += count;
|
|
+ if (merge) {
|
|
+ mc[-1].args[1] += count;
|
|
+ memcpy(dst, src, count * sizeof(*src));
|
|
+ } else {
|
|
+ ++lazy->nr_mc;
|
|
+ mc->op = __HYPERVISOR_mmuext_op;
|
|
+ if (!commit) {
|
|
+ mc->args[0] = (unsigned long)dst;
|
|
+ memcpy(dst, src, count * sizeof(*src));
|
|
+ } else
|
|
+ mc->args[0] = (unsigned long)src;
|
|
+ mc->args[1] = count;
|
|
+ mc->args[2] = (unsigned long)success_count;
|
|
+ mc->args[3] = domid;
|
|
+ mc->args[5] = (long)__builtin_return_address(0);
|
|
+ }
|
|
+
|
|
+ while (!commit && count--)
|
|
+ switch (src++->cmd) {
|
|
+ case MMUEXT_PIN_L1_TABLE:
|
|
+ case MMUEXT_PIN_L2_TABLE:
|
|
+ case MMUEXT_PIN_L3_TABLE:
|
|
+ case MMUEXT_PIN_L4_TABLE:
|
|
+ case MMUEXT_UNPIN_TABLE:
|
|
+ case MMUEXT_TLB_FLUSH_LOCAL:
|
|
+ case MMUEXT_INVLPG_LOCAL:
|
|
+ case MMUEXT_TLB_FLUSH_MULTI:
|
|
+ case MMUEXT_INVLPG_MULTI:
|
|
+ case MMUEXT_TLB_FLUSH_ALL:
|
|
+ case MMUEXT_INVLPG_ALL:
|
|
+ break;
|
|
+ default:
|
|
+ commit = true;
|
|
+ break;
|
|
+ }
|
|
+
|
|
+ return commit ? xen_multicall_flush(true) : 0;
|
|
+}
|
|
+
|
|
void xen_l1_entry_update(pte_t *ptr, pte_t val)
|
|
{
|
|
mmu_update_t u;
|
|
@@ -546,7 +843,8 @@ int write_ldt_entry(void *ldt, int entry
|
|
#define MAX_BATCHED_FULL_PTES 32
|
|
|
|
int xen_change_pte_range(struct mm_struct *mm, pmd_t *pmd,
|
|
- unsigned long addr, unsigned long end, pgprot_t newprot)
|
|
+ unsigned long addr, unsigned long end, pgprot_t newprot,
|
|
+ int dirty_accountable)
|
|
{
|
|
int rc = 0, i = 0;
|
|
mmu_update_t u[MAX_BATCHED_FULL_PTES];
|
|
@@ -559,10 +857,14 @@ int xen_change_pte_range(struct mm_struc
|
|
pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
|
|
do {
|
|
if (pte_present(*pte)) {
|
|
+ pte_t ptent = pte_modify(*pte, newprot);
|
|
+
|
|
+ if (dirty_accountable && pte_dirty(ptent))
|
|
+ ptent = pte_mkwrite(ptent);
|
|
u[i].ptr = (__pmd_val(*pmd) & PHYSICAL_PAGE_MASK)
|
|
| ((unsigned long)pte & ~PAGE_MASK)
|
|
| MMU_PT_UPDATE_PRESERVE_AD;
|
|
- u[i].val = __pte_val(pte_modify(*pte, newprot));
|
|
+ u[i].val = __pte_val(ptent);
|
|
if (++i == MAX_BATCHED_FULL_PTES) {
|
|
if ((rc = HYPERVISOR_mmu_update(
|
|
&u[0], i, NULL, DOMID_SELF)) != 0)
|
|
--- head-2010-05-25.orig/arch/x86/mm/init_32-xen.c 2008-10-29 09:55:56.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/mm/init_32-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -313,8 +313,7 @@ static void __init permanent_kmaps_init(
|
|
static void __meminit free_new_highpage(struct page *page, int pfn)
|
|
{
|
|
init_page_count(page);
|
|
- if (pfn < xen_start_info->nr_pages)
|
|
- __free_page(page);
|
|
+ __free_page(page);
|
|
totalhigh_pages++;
|
|
}
|
|
|
|
@@ -357,8 +356,16 @@ extern void set_highmem_pages_init(int);
|
|
static void __init set_highmem_pages_init(int bad_ppro)
|
|
{
|
|
int pfn;
|
|
- for (pfn = highstart_pfn; pfn < highend_pfn; pfn++)
|
|
+ for (pfn = highstart_pfn; pfn < highend_pfn
|
|
+ && pfn < xen_start_info->nr_pages; pfn++)
|
|
add_one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro);
|
|
+
|
|
+ /* XEN: init high-mem pages outside initial allocation. */
|
|
+ for (; pfn < highend_pfn; pfn++) {
|
|
+ ClearPageReserved(pfn_to_page(pfn));
|
|
+ init_page_count(pfn_to_page(pfn));
|
|
+ }
|
|
+
|
|
totalram_pages += totalhigh_pages;
|
|
}
|
|
#endif /* CONFIG_FLATMEM */
|
|
@@ -462,16 +469,22 @@ EXPORT_SYMBOL(__supported_pte_mask);
|
|
* on Enable
|
|
* off Disable
|
|
*/
|
|
-void __init noexec_setup(const char *str)
|
|
+static int __init noexec_setup(char *str)
|
|
{
|
|
- if (!strncmp(str, "on",2) && cpu_has_nx) {
|
|
- __supported_pte_mask |= _PAGE_NX;
|
|
- disable_nx = 0;
|
|
- } else if (!strncmp(str,"off",3)) {
|
|
+ if (!str || !strcmp(str, "on")) {
|
|
+ if (cpu_has_nx) {
|
|
+ __supported_pte_mask |= _PAGE_NX;
|
|
+ disable_nx = 0;
|
|
+ }
|
|
+ } else if (!strcmp(str,"off")) {
|
|
disable_nx = 1;
|
|
__supported_pte_mask &= ~_PAGE_NX;
|
|
- }
|
|
+ } else
|
|
+ return -EINVAL;
|
|
+
|
|
+ return 0;
|
|
}
|
|
+early_param("noexec", noexec_setup);
|
|
|
|
int nx_enabled = 0;
|
|
#ifdef CONFIG_X86_PAE
|
|
@@ -514,6 +527,7 @@ int __init set_kernel_exec(unsigned long
|
|
pte->pte_high &= ~(1 << (_PAGE_BIT_NX - 32));
|
|
else
|
|
pte->pte_high |= 1 << (_PAGE_BIT_NX - 32);
|
|
+ pte_update_defer(&init_mm, vaddr, pte);
|
|
__flush_tlb_all();
|
|
out:
|
|
return ret;
|
|
@@ -596,18 +610,6 @@ static void __init test_wp_bit(void)
|
|
}
|
|
}
|
|
|
|
-static void __init set_max_mapnr_init(void)
|
|
-{
|
|
-#ifdef CONFIG_HIGHMEM
|
|
- num_physpages = highend_pfn;
|
|
-#else
|
|
- num_physpages = max_low_pfn;
|
|
-#endif
|
|
-#ifdef CONFIG_FLATMEM
|
|
- max_mapnr = num_physpages;
|
|
-#endif
|
|
-}
|
|
-
|
|
static struct kcore_list kcore_mem, kcore_vmalloc;
|
|
|
|
void __init mem_init(void)
|
|
@@ -623,8 +625,7 @@ void __init mem_init(void)
|
|
#endif
|
|
|
|
#ifdef CONFIG_FLATMEM
|
|
- if (!mem_map)
|
|
- BUG();
|
|
+ BUG_ON(!mem_map);
|
|
#endif
|
|
|
|
bad_ppro = ppro_with_ram_bug();
|
|
@@ -639,24 +640,12 @@ void __init mem_init(void)
|
|
}
|
|
#endif
|
|
|
|
- set_max_mapnr_init();
|
|
-
|
|
-#ifdef CONFIG_HIGHMEM
|
|
- high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
|
|
-#else
|
|
- high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
|
|
-#endif
|
|
- printk("vmalloc area: %lx-%lx, maxmem %lx\n",
|
|
- VMALLOC_START,VMALLOC_END,MAXMEM);
|
|
- BUG_ON(VMALLOC_START > VMALLOC_END);
|
|
-
|
|
/* this will put all low memory onto the freelists */
|
|
totalram_pages += free_all_bootmem();
|
|
- /* XEN: init and count low-mem pages outside initial allocation. */
|
|
+ /* XEN: init low-mem pages outside initial allocation. */
|
|
for (pfn = xen_start_info->nr_pages; pfn < max_low_pfn; pfn++) {
|
|
ClearPageReserved(pfn_to_page(pfn));
|
|
init_page_count(pfn_to_page(pfn));
|
|
- totalram_pages++;
|
|
}
|
|
|
|
reservedpages = 0;
|
|
@@ -687,6 +676,48 @@ void __init mem_init(void)
|
|
(unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
|
|
);
|
|
|
|
+#if 1 /* double-sanity-check paranoia */
|
|
+ printk("virtual kernel memory layout:\n"
|
|
+ " fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
|
|
+#ifdef CONFIG_HIGHMEM
|
|
+ " pkmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
|
|
+#endif
|
|
+ " vmalloc : 0x%08lx - 0x%08lx (%4ld MB)\n"
|
|
+ " lowmem : 0x%08lx - 0x%08lx (%4ld MB)\n"
|
|
+ " .init : 0x%08lx - 0x%08lx (%4ld kB)\n"
|
|
+ " .data : 0x%08lx - 0x%08lx (%4ld kB)\n"
|
|
+ " .text : 0x%08lx - 0x%08lx (%4ld kB)\n",
|
|
+ FIXADDR_START, FIXADDR_TOP,
|
|
+ (FIXADDR_TOP - FIXADDR_START) >> 10,
|
|
+
|
|
+#ifdef CONFIG_HIGHMEM
|
|
+ PKMAP_BASE, PKMAP_BASE+LAST_PKMAP*PAGE_SIZE,
|
|
+ (LAST_PKMAP*PAGE_SIZE) >> 10,
|
|
+#endif
|
|
+
|
|
+ VMALLOC_START, VMALLOC_END,
|
|
+ (VMALLOC_END - VMALLOC_START) >> 20,
|
|
+
|
|
+ (unsigned long)__va(0), (unsigned long)high_memory,
|
|
+ ((unsigned long)high_memory - (unsigned long)__va(0)) >> 20,
|
|
+
|
|
+ (unsigned long)&__init_begin, (unsigned long)&__init_end,
|
|
+ ((unsigned long)&__init_end - (unsigned long)&__init_begin) >> 10,
|
|
+
|
|
+ (unsigned long)&_etext, (unsigned long)&_edata,
|
|
+ ((unsigned long)&_edata - (unsigned long)&_etext) >> 10,
|
|
+
|
|
+ (unsigned long)&_text, (unsigned long)&_etext,
|
|
+ ((unsigned long)&_etext - (unsigned long)&_text) >> 10);
|
|
+
|
|
+#ifdef CONFIG_HIGHMEM
|
|
+ BUG_ON(PKMAP_BASE+LAST_PKMAP*PAGE_SIZE > FIXADDR_START);
|
|
+ BUG_ON(VMALLOC_END > PKMAP_BASE);
|
|
+#endif
|
|
+ BUG_ON(VMALLOC_START > VMALLOC_END);
|
|
+ BUG_ON((unsigned long)high_memory > VMALLOC_START);
|
|
+#endif /* double-sanity-check paranoia */
|
|
+
|
|
#ifdef CONFIG_X86_PAE
|
|
if (!cpu_has_pae)
|
|
panic("cannot execute a PAE-enabled kernel on a PAE-less CPU!");
|
|
@@ -717,7 +748,7 @@ void __init mem_init(void)
|
|
int arch_add_memory(int nid, u64 start, u64 size)
|
|
{
|
|
struct pglist_data *pgdata = &contig_page_data;
|
|
- struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1;
|
|
+ struct zone *zone = pgdata->node_zones + ZONE_HIGHMEM;
|
|
unsigned long start_pfn = start >> PAGE_SHIFT;
|
|
unsigned long nr_pages = size >> PAGE_SHIFT;
|
|
|
|
--- head-2010-05-25.orig/arch/x86/mm/ioremap_32-xen.c 2008-04-02 12:34:02.000000000 +0200
|
|
+++ head-2010-05-25/arch/x86/mm/ioremap_32-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -12,7 +12,7 @@
|
|
#include <linux/init.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/module.h>
|
|
-#include <asm/io.h>
|
|
+#include <linux/io.h>
|
|
#include <asm/fixmap.h>
|
|
#include <asm/cacheflush.h>
|
|
#include <asm/tlbflush.h>
|
|
@@ -118,7 +118,7 @@ int direct_remap_pfn_range(struct vm_are
|
|
if (domid == DOMID_SELF)
|
|
return -EINVAL;
|
|
|
|
- vma->vm_flags |= VM_IO | VM_RESERVED;
|
|
+ vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
|
|
|
|
vma->vm_mm->context.has_foreign_mappings = 1;
|
|
|
|
@@ -203,6 +203,7 @@ void __iomem * __ioremap(unsigned long p
|
|
void __iomem * addr;
|
|
struct vm_struct * area;
|
|
unsigned long offset, last_addr;
|
|
+ pgprot_t prot;
|
|
domid_t domid = DOMID_IO;
|
|
|
|
/* Don't allow wraparound or zero size */
|
|
@@ -234,6 +235,8 @@ void __iomem * __ioremap(unsigned long p
|
|
domid = DOMID_SELF;
|
|
}
|
|
|
|
+ prot = __pgprot(_KERNPG_TABLE | flags);
|
|
+
|
|
/*
|
|
* Mappings have to be page-aligned
|
|
*/
|
|
@@ -249,10 +252,9 @@ void __iomem * __ioremap(unsigned long p
|
|
return NULL;
|
|
area->phys_addr = phys_addr;
|
|
addr = (void __iomem *) area->addr;
|
|
- flags |= _KERNPG_TABLE;
|
|
if (__direct_remap_pfn_range(&init_mm, (unsigned long)addr,
|
|
phys_addr>>PAGE_SHIFT,
|
|
- size, __pgprot(flags), domid)) {
|
|
+ size, prot, domid)) {
|
|
vunmap((void __force *) addr);
|
|
return NULL;
|
|
}
|
|
--- head-2010-05-25.orig/arch/x86/mm/pgtable_32-xen.c 2009-03-18 10:39:31.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/mm/pgtable_32-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -68,7 +68,9 @@ void show_mem(void)
|
|
printk(KERN_INFO "%lu pages writeback\n",
|
|
global_page_state(NR_WRITEBACK));
|
|
printk(KERN_INFO "%lu pages mapped\n", global_page_state(NR_FILE_MAPPED));
|
|
- printk(KERN_INFO "%lu pages slab\n", global_page_state(NR_SLAB));
|
|
+ printk(KERN_INFO "%lu pages slab\n",
|
|
+ global_page_state(NR_SLAB_RECLAIMABLE) +
|
|
+ global_page_state(NR_SLAB_UNRECLAIMABLE));
|
|
printk(KERN_INFO "%lu pages pagetables\n",
|
|
global_page_state(NR_PAGETABLE));
|
|
}
|
|
@@ -108,18 +110,11 @@ void set_pmd_pfn(unsigned long vaddr, un
|
|
__flush_tlb_one(vaddr);
|
|
}
|
|
|
|
-static int nr_fixmaps = 0;
|
|
+static int fixmaps;
|
|
unsigned long hypervisor_virt_start = HYPERVISOR_VIRT_START;
|
|
-unsigned long __FIXADDR_TOP = (HYPERVISOR_VIRT_START - 2 * PAGE_SIZE);
|
|
+unsigned long __FIXADDR_TOP = (HYPERVISOR_VIRT_START - PAGE_SIZE);
|
|
EXPORT_SYMBOL(__FIXADDR_TOP);
|
|
|
|
-void __init set_fixaddr_top(unsigned long top)
|
|
-{
|
|
- BUG_ON(nr_fixmaps > 0);
|
|
- hypervisor_virt_start = top;
|
|
- __FIXADDR_TOP = hypervisor_virt_start - 2 * PAGE_SIZE;
|
|
-}
|
|
-
|
|
void __set_fixmap (enum fixed_addresses idx, maddr_t phys, pgprot_t flags)
|
|
{
|
|
unsigned long address = __fix_to_virt(idx);
|
|
@@ -141,7 +136,21 @@ void __set_fixmap (enum fixed_addresses
|
|
if (HYPERVISOR_update_va_mapping(address, pte,
|
|
UVMF_INVLPG|UVMF_ALL))
|
|
BUG();
|
|
- nr_fixmaps++;
|
|
+ fixmaps++;
|
|
+}
|
|
+
|
|
+/**
|
|
+ * reserve_top_address - reserves a hole in the top of kernel address space
|
|
+ * @reserve - size of hole to reserve
|
|
+ *
|
|
+ * Can be used to relocate the fixmap area and poke a hole in the top
|
|
+ * of kernel address space to make room for a hypervisor.
|
|
+ */
|
|
+void __init reserve_top_address(unsigned long reserve)
|
|
+{
|
|
+ BUG_ON(fixmaps > 0);
|
|
+ __FIXADDR_TOP = -reserve - PAGE_SIZE;
|
|
+ __VMALLOC_RESERVE += reserve;
|
|
}
|
|
|
|
pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
|
|
--- head-2010-05-25.orig/arch/x86/pci/irq-xen.c 2008-03-06 08:54:32.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/pci/irq-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -991,10 +991,6 @@ static void __init pcibios_fixup_irqs(vo
|
|
pci_name(bridge), 'A' + pin, irq);
|
|
}
|
|
if (irq >= 0) {
|
|
- if (use_pci_vector() &&
|
|
- !platform_legacy_irq(irq))
|
|
- irq = IO_APIC_VECTOR(irq);
|
|
-
|
|
printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n",
|
|
pci_name(dev), 'A' + pin, irq);
|
|
dev->irq = irq;
|
|
@@ -1155,10 +1151,6 @@ static int pirq_enable_irq(struct pci_de
|
|
}
|
|
dev = temp_dev;
|
|
if (irq >= 0) {
|
|
-#ifdef CONFIG_PCI_MSI
|
|
- if (!platform_legacy_irq(irq))
|
|
- irq = IO_APIC_VECTOR(irq);
|
|
-#endif
|
|
printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n",
|
|
pci_name(dev), 'A' + pin, irq);
|
|
dev->irq = irq;
|
|
@@ -1179,33 +1171,3 @@ static int pirq_enable_irq(struct pci_de
|
|
}
|
|
return 0;
|
|
}
|
|
-
|
|
-int pci_vector_resources(int last, int nr_released)
|
|
-{
|
|
- int count = nr_released;
|
|
-
|
|
- int next = last;
|
|
- int offset = (last % 8);
|
|
-
|
|
- while (next < FIRST_SYSTEM_VECTOR) {
|
|
- next += 8;
|
|
-#ifdef CONFIG_X86_64
|
|
- if (next == IA32_SYSCALL_VECTOR)
|
|
- continue;
|
|
-#else
|
|
- if (next == SYSCALL_VECTOR)
|
|
- continue;
|
|
-#endif
|
|
- count++;
|
|
- if (next >= FIRST_SYSTEM_VECTOR) {
|
|
- if (offset%8) {
|
|
- next = FIRST_DEVICE_VECTOR + offset;
|
|
- offset++;
|
|
- continue;
|
|
- }
|
|
- count--;
|
|
- }
|
|
- }
|
|
-
|
|
- return count;
|
|
-}
|
|
--- head-2010-05-25.orig/arch/x86/ia32/ia32entry-xen.S 2008-04-02 12:34:02.000000000 +0200
|
|
+++ head-2010-05-25/arch/x86/ia32/ia32entry-xen.S 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -83,6 +83,7 @@
|
|
*/
|
|
ENTRY(ia32_sysenter_target)
|
|
CFI_STARTPROC32 simple
|
|
+ CFI_SIGNAL_FRAME
|
|
CFI_DEF_CFA rsp,SS+8-RIP+16
|
|
/*CFI_REL_OFFSET ss,SS-RIP+16*/
|
|
CFI_REL_OFFSET rsp,RSP-RIP+16
|
|
@@ -164,6 +165,7 @@ ENDPROC(ia32_sysenter_target)
|
|
*/
|
|
ENTRY(ia32_cstar_target)
|
|
CFI_STARTPROC32 simple
|
|
+ CFI_SIGNAL_FRAME
|
|
CFI_DEF_CFA rsp,SS+8-RIP+16
|
|
/*CFI_REL_OFFSET ss,SS-RIP+16*/
|
|
CFI_REL_OFFSET rsp,RSP-RIP+16
|
|
@@ -243,6 +245,7 @@ ia32_badarg:
|
|
|
|
ENTRY(ia32_syscall)
|
|
CFI_STARTPROC simple
|
|
+ CFI_SIGNAL_FRAME
|
|
CFI_DEF_CFA rsp,SS+8-RIP+16
|
|
/*CFI_REL_OFFSET ss,SS-RIP+16*/
|
|
CFI_REL_OFFSET rsp,RSP-RIP+16
|
|
@@ -320,6 +323,7 @@ ENTRY(ia32_ptregs_common)
|
|
popq %r11
|
|
CFI_ENDPROC
|
|
CFI_STARTPROC32 simple
|
|
+ CFI_SIGNAL_FRAME
|
|
CFI_DEF_CFA rsp,SS+8-ARGOFFSET
|
|
CFI_REL_OFFSET rax,RAX-ARGOFFSET
|
|
CFI_REL_OFFSET rcx,RCX-ARGOFFSET
|
|
@@ -653,8 +657,8 @@ ia32_sys_call_table:
|
|
.quad sys_readlinkat /* 305 */
|
|
.quad sys_fchmodat
|
|
.quad sys_faccessat
|
|
- .quad quiet_ni_syscall /* pselect6 for now */
|
|
- .quad quiet_ni_syscall /* ppoll for now */
|
|
+ .quad compat_sys_pselect6
|
|
+ .quad compat_sys_ppoll
|
|
.quad sys_unshare /* 310 */
|
|
.quad compat_sys_set_robust_list
|
|
.quad compat_sys_get_robust_list
|
|
@@ -663,4 +667,5 @@ ia32_sys_call_table:
|
|
.quad sys_tee
|
|
.quad compat_sys_vmsplice
|
|
.quad compat_sys_move_pages
|
|
+ .quad sys_getcpu
|
|
ia32_syscall_end:
|
|
--- head-2010-05-25.orig/arch/x86/kernel/Makefile 2010-03-24 15:06:08.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/Makefile 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -122,7 +122,7 @@ obj-$(CONFIG_X86_XEN) += fixup.o
|
|
###
|
|
# 64 bit specific files
|
|
ifeq ($(CONFIG_X86_64),y)
|
|
- obj-$(CONFIG_X86_XEN_GENAPIC) += genapic_xen_64.o
|
|
+ obj-$(CONFIG_X86_XEN_GENAPIC) += genapic_64.o genapic_xen_64.o
|
|
obj-$(CONFIG_X86_UV) += tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o uv_time.o
|
|
obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
|
|
obj-$(CONFIG_AUDIT) += audit_64.o
|
|
@@ -138,5 +138,7 @@ ifeq ($(CONFIG_X86_64),y)
|
|
pci-dma_64-$(CONFIG_XEN) += pci-dma_32.o
|
|
endif
|
|
|
|
-disabled-obj-$(CONFIG_XEN) := i8253.o i8259_$(BITS).o reboot.o smpboot_$(BITS).o tsc_$(BITS).o
|
|
+disabled-obj-$(CONFIG_XEN) := early-quirks.o i8253.o i8259_$(BITS).o reboot.o \
|
|
+ smpboot_$(BITS).o tsc_$(BITS).o
|
|
+disabled-obj-$(CONFIG_XEN_UNPRIVILEGED_GUEST) += mpparse_64.o
|
|
%/head_$(BITS).o %/head_$(BITS).s: $(if $(CONFIG_XEN),EXTRA_AFLAGS,dummy) :=
|
|
--- head-2010-05-25.orig/arch/x86/kernel/e820_64-xen.c 2009-12-04 08:45:56.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/e820_64-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -16,6 +16,7 @@
|
|
#include <linux/string.h>
|
|
#include <linux/kexec.h>
|
|
#include <linux/module.h>
|
|
+#include <linux/mm.h>
|
|
|
|
#include <asm/pgtable.h>
|
|
#include <asm/page.h>
|
|
@@ -25,6 +26,11 @@
|
|
#include <asm/sections.h>
|
|
#include <xen/interface/memory.h>
|
|
|
|
+struct e820map e820 __initdata;
|
|
+#ifdef CONFIG_XEN
|
|
+struct e820map machine_e820 __initdata;
|
|
+#endif
|
|
+
|
|
/*
|
|
* PFN of last memory page.
|
|
*/
|
|
@@ -43,14 +49,10 @@ unsigned long end_pfn_map;
|
|
/*
|
|
* Last pfn which the user wants to use.
|
|
*/
|
|
-unsigned long end_user_pfn = MAXMEM>>PAGE_SHIFT;
|
|
+static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT;
|
|
|
|
extern struct resource code_resource, data_resource;
|
|
|
|
-#ifdef CONFIG_XEN
|
|
-extern struct e820map machine_e820;
|
|
-#endif
|
|
-
|
|
/* Check for some hardcoded bad areas that early boot is not allowed to touch */
|
|
static inline int bad_addr(unsigned long *addrp, unsigned long size)
|
|
{
|
|
@@ -59,13 +61,13 @@ static inline int bad_addr(unsigned long
|
|
#ifndef CONFIG_XEN
|
|
/* various gunk below that needed for SMP startup */
|
|
if (addr < 0x8000) {
|
|
- *addrp = 0x8000;
|
|
+ *addrp = PAGE_ALIGN(0x8000);
|
|
return 1;
|
|
}
|
|
|
|
/* direct mapping tables of the kernel */
|
|
if (last >= table_start<<PAGE_SHIFT && addr < table_end<<PAGE_SHIFT) {
|
|
- *addrp = table_end << PAGE_SHIFT;
|
|
+ *addrp = PAGE_ALIGN(table_end << PAGE_SHIFT);
|
|
return 1;
|
|
}
|
|
|
|
@@ -73,23 +75,18 @@ static inline int bad_addr(unsigned long
|
|
#ifdef CONFIG_BLK_DEV_INITRD
|
|
if (LOADER_TYPE && INITRD_START && last >= INITRD_START &&
|
|
addr < INITRD_START+INITRD_SIZE) {
|
|
- *addrp = INITRD_START + INITRD_SIZE;
|
|
+ *addrp = PAGE_ALIGN(INITRD_START + INITRD_SIZE);
|
|
return 1;
|
|
}
|
|
#endif
|
|
- /* kernel code + 640k memory hole (later should not be needed, but
|
|
- be paranoid for now) */
|
|
- if (last >= 640*1024 && addr < 1024*1024) {
|
|
- *addrp = 1024*1024;
|
|
- return 1;
|
|
- }
|
|
- if (last >= __pa_symbol(&_text) && last < __pa_symbol(&_end)) {
|
|
- *addrp = __pa_symbol(&_end);
|
|
+ /* kernel code */
|
|
+ if (last >= __pa_symbol(&_text) && addr < __pa_symbol(&_end)) {
|
|
+ *addrp = PAGE_ALIGN(__pa_symbol(&_end));
|
|
return 1;
|
|
}
|
|
|
|
if (last >= ebda_addr && addr < ebda_addr + ebda_size) {
|
|
- *addrp = ebda_addr + ebda_size;
|
|
+ *addrp = PAGE_ALIGN(ebda_addr + ebda_size);
|
|
return 1;
|
|
}
|
|
|
|
@@ -186,7 +183,7 @@ unsigned long __init find_e820_area(unsi
|
|
continue;
|
|
while (bad_addr(&addr, size) && addr+size <= ei->addr+ei->size)
|
|
;
|
|
- last = addr + size;
|
|
+ last = PAGE_ALIGN(addr) + size;
|
|
if (last > ei->addr + ei->size)
|
|
continue;
|
|
if (last > end)
|
|
@@ -196,59 +193,14 @@ unsigned long __init find_e820_area(unsi
|
|
return -1UL;
|
|
}
|
|
|
|
-/*
|
|
- * Free bootmem based on the e820 table for a node.
|
|
- */
|
|
-void __init e820_bootmem_free(pg_data_t *pgdat, unsigned long start,unsigned long end)
|
|
-{
|
|
- int i;
|
|
- for (i = 0; i < e820.nr_map; i++) {
|
|
- struct e820entry *ei = &e820.map[i];
|
|
- unsigned long last, addr;
|
|
-
|
|
- if (ei->type != E820_RAM ||
|
|
- ei->addr+ei->size <= start ||
|
|
- ei->addr >= end)
|
|
- continue;
|
|
-
|
|
- addr = round_up(ei->addr, PAGE_SIZE);
|
|
- if (addr < start)
|
|
- addr = start;
|
|
-
|
|
- last = round_down(ei->addr + ei->size, PAGE_SIZE);
|
|
- if (last >= end)
|
|
- last = end;
|
|
-
|
|
- if (last > addr && last-addr >= PAGE_SIZE)
|
|
- free_bootmem_node(pgdat, addr, last-addr);
|
|
- }
|
|
-}
|
|
-
|
|
/*
|
|
* Find the highest page frame number we have available
|
|
*/
|
|
unsigned long __init e820_end_of_ram(void)
|
|
{
|
|
- int i;
|
|
unsigned long end_pfn = 0;
|
|
+ end_pfn = find_max_pfn_with_active_regions();
|
|
|
|
- for (i = 0; i < e820.nr_map; i++) {
|
|
- struct e820entry *ei = &e820.map[i];
|
|
- unsigned long start, end;
|
|
-
|
|
- start = round_up(ei->addr, PAGE_SIZE);
|
|
- end = round_down(ei->addr + ei->size, PAGE_SIZE);
|
|
- if (start >= end)
|
|
- continue;
|
|
- if (ei->type == E820_RAM) {
|
|
- if (end > end_pfn<<PAGE_SHIFT)
|
|
- end_pfn = end>>PAGE_SHIFT;
|
|
- } else {
|
|
- if (end > end_pfn_map<<PAGE_SHIFT)
|
|
- end_pfn_map = end>>PAGE_SHIFT;
|
|
- }
|
|
- }
|
|
-
|
|
if (end_pfn > end_pfn_map)
|
|
end_pfn_map = end_pfn;
|
|
if (end_pfn_map > MAXMEM>>PAGE_SHIFT)
|
|
@@ -258,43 +210,10 @@ unsigned long __init e820_end_of_ram(voi
|
|
if (end_pfn > end_pfn_map)
|
|
end_pfn = end_pfn_map;
|
|
|
|
+ printk("end_pfn_map = %lu\n", end_pfn_map);
|
|
return end_pfn;
|
|
}
|
|
|
|
-/*
|
|
- * Compute how much memory is missing in a range.
|
|
- * Unlike the other functions in this file the arguments are in page numbers.
|
|
- */
|
|
-unsigned long __init
|
|
-e820_hole_size(unsigned long start_pfn, unsigned long end_pfn)
|
|
-{
|
|
- unsigned long ram = 0;
|
|
- unsigned long start = start_pfn << PAGE_SHIFT;
|
|
- unsigned long end = end_pfn << PAGE_SHIFT;
|
|
- int i;
|
|
- for (i = 0; i < e820.nr_map; i++) {
|
|
- struct e820entry *ei = &e820.map[i];
|
|
- unsigned long last, addr;
|
|
-
|
|
- if (ei->type != E820_RAM ||
|
|
- ei->addr+ei->size <= start ||
|
|
- ei->addr >= end)
|
|
- continue;
|
|
-
|
|
- addr = round_up(ei->addr, PAGE_SIZE);
|
|
- if (addr < start)
|
|
- addr = start;
|
|
-
|
|
- last = round_down(ei->addr + ei->size, PAGE_SIZE);
|
|
- if (last >= end)
|
|
- last = end;
|
|
-
|
|
- if (last > addr)
|
|
- ram += last - addr;
|
|
- }
|
|
- return ((end - start) - ram) >> PAGE_SHIFT;
|
|
-}
|
|
-
|
|
/*
|
|
* Mark e820 reserved areas as busy for the resource manager.
|
|
*/
|
|
@@ -335,6 +254,109 @@ void __init e820_reserve_resources(struc
|
|
}
|
|
}
|
|
|
|
+#ifndef CONFIG_XEN
|
|
+/* Mark pages corresponding to given address range as nosave */
|
|
+static void __init
|
|
+e820_mark_nosave_range(unsigned long start, unsigned long end)
|
|
+{
|
|
+ unsigned long pfn, max_pfn;
|
|
+
|
|
+ if (start >= end)
|
|
+ return;
|
|
+
|
|
+ printk("Nosave address range: %016lx - %016lx\n", start, end);
|
|
+ max_pfn = end >> PAGE_SHIFT;
|
|
+ for (pfn = start >> PAGE_SHIFT; pfn < max_pfn; pfn++)
|
|
+ if (pfn_valid(pfn))
|
|
+ SetPageNosave(pfn_to_page(pfn));
|
|
+}
|
|
+
|
|
+/*
|
|
+ * Find the ranges of physical addresses that do not correspond to
|
|
+ * e820 RAM areas and mark the corresponding pages as nosave for software
|
|
+ * suspend and suspend to RAM.
|
|
+ *
|
|
+ * This function requires the e820 map to be sorted and without any
|
|
+ * overlapping entries and assumes the first e820 area to be RAM.
|
|
+ */
|
|
+void __init e820_mark_nosave_regions(void)
|
|
+{
|
|
+ int i;
|
|
+ unsigned long paddr;
|
|
+
|
|
+ paddr = round_down(e820.map[0].addr + e820.map[0].size, PAGE_SIZE);
|
|
+ for (i = 1; i < e820.nr_map; i++) {
|
|
+ struct e820entry *ei = &e820.map[i];
|
|
+
|
|
+ if (paddr < ei->addr)
|
|
+ e820_mark_nosave_range(paddr,
|
|
+ round_up(ei->addr, PAGE_SIZE));
|
|
+
|
|
+ paddr = round_down(ei->addr + ei->size, PAGE_SIZE);
|
|
+ if (ei->type != E820_RAM)
|
|
+ e820_mark_nosave_range(round_up(ei->addr, PAGE_SIZE),
|
|
+ paddr);
|
|
+
|
|
+ if (paddr >= (end_pfn << PAGE_SHIFT))
|
|
+ break;
|
|
+ }
|
|
+}
|
|
+#endif
|
|
+
|
|
+/* Walk the e820 map and register active regions within a node */
|
|
+void __init
|
|
+e820_register_active_regions(int nid, unsigned long start_pfn,
|
|
+ unsigned long end_pfn)
|
|
+{
|
|
+ int i;
|
|
+ unsigned long ei_startpfn, ei_endpfn;
|
|
+ for (i = 0; i < e820.nr_map; i++) {
|
|
+ struct e820entry *ei = &e820.map[i];
|
|
+ ei_startpfn = round_up(ei->addr, PAGE_SIZE) >> PAGE_SHIFT;
|
|
+ ei_endpfn = round_down(ei->addr + ei->size, PAGE_SIZE)
|
|
+ >> PAGE_SHIFT;
|
|
+
|
|
+ /* Skip map entries smaller than a page */
|
|
+ if (ei_startpfn >= ei_endpfn)
|
|
+ continue;
|
|
+
|
|
+ /* Check if end_pfn_map should be updated */
|
|
+ if (ei->type != E820_RAM && ei_endpfn > end_pfn_map)
|
|
+ end_pfn_map = ei_endpfn;
|
|
+
|
|
+ /* Skip if map is outside the node */
|
|
+ if (ei->type != E820_RAM ||
|
|
+ ei_endpfn <= start_pfn ||
|
|
+ ei_startpfn >= end_pfn)
|
|
+ continue;
|
|
+
|
|
+ /* Check for overlaps */
|
|
+ if (ei_startpfn < start_pfn)
|
|
+ ei_startpfn = start_pfn;
|
|
+ if (ei_endpfn > end_pfn)
|
|
+ ei_endpfn = end_pfn;
|
|
+
|
|
+ /* Obey end_user_pfn to save on memmap */
|
|
+ if (ei_startpfn >= end_user_pfn)
|
|
+ continue;
|
|
+ if (ei_endpfn > end_user_pfn)
|
|
+ ei_endpfn = end_user_pfn;
|
|
+
|
|
+#ifdef CONFIG_XEN
|
|
+ if (ei_startpfn >= xen_start_info->nr_pages)
|
|
+ continue;
|
|
+ if (ei_endpfn > xen_start_info->nr_pages)
|
|
+ ei_endpfn = xen_start_info->nr_pages;
|
|
+#endif
|
|
+
|
|
+ add_active_range(nid, ei_startpfn, ei_endpfn);
|
|
+ }
|
|
+#ifdef CONFIG_XEN
|
|
+ BUG_ON(nid);
|
|
+ add_active_range(nid, end_pfn, end_pfn);
|
|
+#endif
|
|
+}
|
|
+
|
|
/*
|
|
* Add a memory region to the kernel e820 map.
|
|
*/
|
|
@@ -555,13 +577,6 @@ static int __init sanitize_e820_map(stru
|
|
* If we're lucky and live on a modern system, the setup code
|
|
* will have given us a memory map that we can use to properly
|
|
* set up memory. If we aren't, we'll fake a memory map.
|
|
- *
|
|
- * We check to see that the memory map contains at least 2 elements
|
|
- * before we'll use it, because the detection code in setup.S may
|
|
- * not be perfect and most every PC known to man has two memory
|
|
- * regions: one from 0 to 640k, and one from 1mb up. (The IBM
|
|
- * thinkpad 560x, for example, does not cooperate with the memory
|
|
- * detection code.)
|
|
*/
|
|
static int __init copy_e820_map(struct e820entry * biosmap, int nr_map)
|
|
{
|
|
@@ -583,27 +598,6 @@ static int __init copy_e820_map(struct e
|
|
if (start > end)
|
|
return -1;
|
|
|
|
-#ifndef CONFIG_XEN
|
|
- /*
|
|
- * Some BIOSes claim RAM in the 640k - 1M region.
|
|
- * Not right. Fix it up.
|
|
- *
|
|
- * This should be removed on Hammer which is supposed to not
|
|
- * have non e820 covered ISA mappings there, but I had some strange
|
|
- * problems so it stays for now. -AK
|
|
- */
|
|
- if (type == E820_RAM) {
|
|
- if (start < 0x100000ULL && end > 0xA0000ULL) {
|
|
- if (start < 0xA0000ULL)
|
|
- add_memory_region(start, 0xA0000ULL-start, type);
|
|
- if (end <= 0x100000ULL)
|
|
- continue;
|
|
- start = 0x100000ULL;
|
|
- size = end - start;
|
|
- }
|
|
- }
|
|
-#endif
|
|
-
|
|
add_memory_region(start, size, type);
|
|
} while (biosmap++,--nr_map);
|
|
|
|
@@ -624,11 +618,15 @@ static int __init copy_e820_map(struct e
|
|
return 0;
|
|
}
|
|
|
|
+void early_panic(char *msg)
|
|
+{
|
|
+ early_printk(msg);
|
|
+ panic(msg);
|
|
+}
|
|
+
|
|
#ifndef CONFIG_XEN
|
|
void __init setup_memory_region(void)
|
|
{
|
|
- char *who = "BIOS-e820";
|
|
-
|
|
/*
|
|
* Try to copy the BIOS-supplied E820-map.
|
|
*
|
|
@@ -636,24 +634,10 @@ void __init setup_memory_region(void)
|
|
* the next section from 1mb->appropriate_mem_k
|
|
*/
|
|
sanitize_e820_map(E820_MAP, &E820_MAP_NR);
|
|
- if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0) {
|
|
- unsigned long mem_size;
|
|
-
|
|
- /* compare results from other methods and take the greater */
|
|
- if (ALT_MEM_K < EXT_MEM_K) {
|
|
- mem_size = EXT_MEM_K;
|
|
- who = "BIOS-88";
|
|
- } else {
|
|
- mem_size = ALT_MEM_K;
|
|
- who = "BIOS-e801";
|
|
- }
|
|
-
|
|
- e820.nr_map = 0;
|
|
- add_memory_region(0, LOWMEMSIZE(), E820_RAM);
|
|
- add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
|
|
- }
|
|
+ if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0)
|
|
+ early_panic("Cannot find a valid memory map");
|
|
printk(KERN_INFO "BIOS-provided physical RAM map:\n");
|
|
- e820_print_map(who);
|
|
+ e820_print_map("BIOS-e820");
|
|
}
|
|
|
|
#else /* CONFIG_XEN */
|
|
@@ -685,20 +669,23 @@ void __init setup_memory_region(void)
|
|
|
|
sanitize_e820_map(map, (char *)&memmap.nr_entries);
|
|
|
|
- BUG_ON(copy_e820_map(map, (char)memmap.nr_entries) < 0);
|
|
+ if (copy_e820_map(map, (char)memmap.nr_entries) < 0)
|
|
+ early_panic("Cannot find a valid memory map");
|
|
|
|
printk(KERN_INFO "BIOS-provided physical RAM map:\n");
|
|
e820_print_map("Xen");
|
|
}
|
|
#endif
|
|
|
|
-void __init parse_memopt(char *p, char **from)
|
|
-{
|
|
+static int __init parse_memopt(char *p)
|
|
+{
|
|
int i;
|
|
unsigned long current_end;
|
|
unsigned long end;
|
|
|
|
- end_user_pfn = memparse(p, from);
|
|
+ if (!p)
|
|
+ return -EINVAL;
|
|
+ end_user_pfn = memparse(p, &p);
|
|
end_user_pfn >>= PAGE_SHIFT;
|
|
|
|
end = end_user_pfn<<PAGE_SHIFT;
|
|
@@ -715,27 +702,61 @@ void __init parse_memopt(char *p, char *
|
|
else
|
|
add_memory_region(current_end, end - current_end, E820_RAM);
|
|
}
|
|
+
|
|
+ return 0;
|
|
}
|
|
+early_param("mem", parse_memopt);
|
|
|
|
-void __init parse_memmapopt(char *p, char **from)
|
|
+static int userdef __initdata;
|
|
+
|
|
+static int __init parse_memmap_opt(char *p)
|
|
{
|
|
+ char *oldp;
|
|
unsigned long long start_at, mem_size;
|
|
|
|
- mem_size = memparse(p, from);
|
|
- p = *from;
|
|
+ if (!strcmp(p, "exactmap")) {
|
|
+#ifdef CONFIG_CRASH_DUMP
|
|
+ /* If we are doing a crash dump, we
|
|
+ * still need to know the real mem
|
|
+ * size before original memory map is
|
|
+ * reset.
|
|
+ */
|
|
+ e820_register_active_regions(0, 0, -1UL);
|
|
+ saved_max_pfn = e820_end_of_ram();
|
|
+ remove_all_active_ranges();
|
|
+#endif
|
|
+ end_pfn_map = 0;
|
|
+ e820.nr_map = 0;
|
|
+ userdef = 1;
|
|
+ return 0;
|
|
+ }
|
|
+
|
|
+ oldp = p;
|
|
+ mem_size = memparse(p, &p);
|
|
+ if (p == oldp)
|
|
+ return -EINVAL;
|
|
if (*p == '@') {
|
|
- start_at = memparse(p+1, from);
|
|
+ start_at = memparse(p+1, &p);
|
|
add_memory_region(start_at, mem_size, E820_RAM);
|
|
} else if (*p == '#') {
|
|
- start_at = memparse(p+1, from);
|
|
+ start_at = memparse(p+1, &p);
|
|
add_memory_region(start_at, mem_size, E820_ACPI);
|
|
} else if (*p == '$') {
|
|
- start_at = memparse(p+1, from);
|
|
+ start_at = memparse(p+1, &p);
|
|
add_memory_region(start_at, mem_size, E820_RESERVED);
|
|
} else {
|
|
end_user_pfn = (mem_size >> PAGE_SHIFT);
|
|
}
|
|
- p = *from;
|
|
+ return *p == '\0' ? 0 : -EINVAL;
|
|
+}
|
|
+early_param("memmap", parse_memmap_opt);
|
|
+
|
|
+void finish_e820_parsing(void)
|
|
+{
|
|
+ if (userdef) {
|
|
+ printk(KERN_INFO "user-defined physical RAM map:\n");
|
|
+ e820_print_map("user");
|
|
+ }
|
|
}
|
|
|
|
unsigned long pci_mem_start = 0xaeedbabe;
|
|
--- head-2010-05-25.orig/arch/x86/kernel/early_printk-xen.c 2007-06-12 13:13:01.000000000 +0200
|
|
+++ head-2010-05-25/arch/x86/kernel/early_printk-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -244,20 +244,16 @@ void early_printk(const char *fmt, ...)
|
|
|
|
static int __initdata keep_early;
|
|
|
|
-int __init setup_early_printk(char *opt)
|
|
+static int __init setup_early_printk(char *buf)
|
|
{
|
|
- char *space;
|
|
- char buf[256];
|
|
+ if (!buf)
|
|
+ return 0;
|
|
|
|
if (early_console_initialized)
|
|
- return 1;
|
|
-
|
|
- strlcpy(buf,opt,sizeof(buf));
|
|
- space = strchr(buf, ' ');
|
|
- if (space)
|
|
- *space = 0;
|
|
+ return 0;
|
|
+ early_console_initialized = 1;
|
|
|
|
- if (strstr(buf,"keep"))
|
|
+ if (strstr(buf, "keep"))
|
|
keep_early = 1;
|
|
|
|
if (!strncmp(buf, "serial", 6)) {
|
|
@@ -281,11 +277,12 @@ int __init setup_early_printk(char *opt)
|
|
early_console = &simnow_console;
|
|
keep_early = 1;
|
|
}
|
|
- early_console_initialized = 1;
|
|
register_console(early_console);
|
|
return 0;
|
|
}
|
|
|
|
+early_param("earlyprintk", setup_early_printk);
|
|
+
|
|
void __init disable_early_printk(void)
|
|
{
|
|
if (!early_console_initialized || !early_console)
|
|
@@ -299,4 +296,3 @@ void __init disable_early_printk(void)
|
|
}
|
|
}
|
|
|
|
-__setup("earlyprintk=", setup_early_printk);
|
|
--- head-2010-05-25.orig/arch/x86/kernel/entry_64-xen.S 2009-06-23 09:28:21.000000000 +0200
|
|
+++ head-2010-05-25/arch/x86/kernel/entry_64-xen.S 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -4,9 +4,6 @@
|
|
* Copyright (C) 1991, 1992 Linus Torvalds
|
|
* Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs
|
|
* Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
|
|
- *
|
|
- * $Id$
|
|
- *
|
|
* Jun Nakajima <jun.nakajima@intel.com>
|
|
* Asit Mallick <asit.k.mallick@intel.com>
|
|
* Modified for Xen
|
|
@@ -26,15 +23,25 @@
|
|
* at the top of the kernel process stack.
|
|
* - partial stack frame: partially saved registers upto R11.
|
|
* - full stack frame: Like partial stack frame, but all register saved.
|
|
- *
|
|
- * TODO:
|
|
- * - schedule it carefully for the final hardware.
|
|
+ *
|
|
+ * Some macro usage:
|
|
+ * - CFI macros are used to generate dwarf2 unwind information for better
|
|
+ * backtraces. They don't change any code.
|
|
+ * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
|
|
+ * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
|
|
+ * There are unfortunately lots of special cases where some registers
|
|
+ * not touched. The macro is a big mess that should be cleaned up.
|
|
+ * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
|
|
+ * Gives a full stack frame.
|
|
+ * - ENTRY/END Define functions in the symbol table.
|
|
+ * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
|
|
+ * frame that is otherwise undefined after a SYSCALL
|
|
+ * - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
|
|
+ * - errorentry/paranoidentry/zeroentry - Define exception entry points.
|
|
*/
|
|
|
|
-#define ASSEMBLY 1
|
|
#include <linux/linkage.h>
|
|
#include <asm/segment.h>
|
|
-#include <asm/smp.h>
|
|
#include <asm/cache.h>
|
|
#include <asm/errno.h>
|
|
#include <asm/dwarf2.h>
|
|
@@ -117,6 +124,7 @@ NMI_MASK = 0x80000000
|
|
.macro CFI_DEFAULT_STACK start=1,adj=0
|
|
.if \start
|
|
CFI_STARTPROC simple
|
|
+ CFI_SIGNAL_FRAME
|
|
CFI_DEF_CFA rsp,SS+8 - \adj*ARGOFFSET
|
|
.else
|
|
CFI_DEF_CFA_OFFSET SS+8 - \adj*ARGOFFSET
|
|
@@ -207,6 +215,7 @@ END(ret_from_fork)
|
|
*/
|
|
.macro _frame ref
|
|
CFI_STARTPROC simple
|
|
+ CFI_SIGNAL_FRAME
|
|
CFI_DEF_CFA rsp,SS+8-\ref
|
|
/*CFI_REL_OFFSET ss,SS-\ref*/
|
|
CFI_REL_OFFSET rsp,RSP-\ref
|
|
@@ -334,6 +343,8 @@ tracesys:
|
|
LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */
|
|
RESTORE_REST
|
|
cmpq $__NR_syscall_max,%rax
|
|
+ movq $-ENOSYS,%rcx
|
|
+ cmova %rcx,%rax
|
|
ja 1f
|
|
movq %r10,%rcx /* fixup for C */
|
|
call *sys_call_table(,%rax,8)
|
|
@@ -349,6 +360,7 @@ END(system_call)
|
|
*/
|
|
ENTRY(int_ret_from_sys_call)
|
|
CFI_STARTPROC simple
|
|
+ CFI_SIGNAL_FRAME
|
|
CFI_DEF_CFA rsp,SS+8-ARGOFFSET
|
|
/*CFI_REL_OFFSET ss,SS-ARGOFFSET*/
|
|
CFI_REL_OFFSET rsp,RSP-ARGOFFSET
|
|
@@ -583,8 +595,7 @@ retint_signal:
|
|
#ifdef CONFIG_PREEMPT
|
|
/* Returning to kernel space. Check if we need preemption */
|
|
/* rcx: threadinfo. interrupts off. */
|
|
- .p2align
|
|
-retint_kernel:
|
|
+ENTRY(retint_kernel)
|
|
cmpl $0,threadinfo_preempt_count(%rcx)
|
|
jnz retint_restore_args
|
|
bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx)
|
|
@@ -644,7 +655,6 @@ ENTRY(call_function_interrupt)
|
|
END(call_function_interrupt)
|
|
#endif
|
|
|
|
-#ifdef CONFIG_X86_LOCAL_APIC
|
|
ENTRY(apic_timer_interrupt)
|
|
apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt
|
|
END(apic_timer_interrupt)
|
|
@@ -656,7 +666,6 @@ END(error_interrupt)
|
|
ENTRY(spurious_interrupt)
|
|
apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt
|
|
END(spurious_interrupt)
|
|
-#endif
|
|
#endif /* !CONFIG_XEN */
|
|
|
|
/*
|
|
@@ -755,7 +764,9 @@ paranoid_exit\trace:
|
|
testl $3,CS(%rsp)
|
|
jnz paranoid_userspace\trace
|
|
paranoid_swapgs\trace:
|
|
+ .if \trace
|
|
TRACE_IRQS_IRETQ 0
|
|
+ .endif
|
|
swapgs
|
|
paranoid_restore\trace:
|
|
RESTORE_ALL 8
|
|
@@ -802,7 +813,7 @@ paranoid_schedule\trace:
|
|
* Exception entry point. This expects an error code/orig_rax on the stack
|
|
* and the exception handler in %rax.
|
|
*/
|
|
-ENTRY(error_entry)
|
|
+KPROBE_ENTRY(error_entry)
|
|
_frame RDI
|
|
CFI_REL_OFFSET rax,0
|
|
/* rdi slot contains rax, oldrax contains error code */
|
|
@@ -896,7 +907,7 @@ error_kernelspace:
|
|
jmp error_sti
|
|
#endif
|
|
CFI_ENDPROC
|
|
-END(error_entry)
|
|
+KPROBE_END(error_entry)
|
|
|
|
ENTRY(hypervisor_callback)
|
|
zeroentry do_hypervisor_callback
|
|
@@ -936,26 +947,6 @@ ENTRY(do_hypervisor_callback) # do_hyp
|
|
CFI_ENDPROC
|
|
END(do_hypervisor_callback)
|
|
|
|
-#ifdef CONFIG_X86_LOCAL_APIC
|
|
-KPROBE_ENTRY(nmi)
|
|
- zeroentry do_nmi_callback
|
|
-ENTRY(do_nmi_callback)
|
|
- CFI_STARTPROC
|
|
- addq $8, %rsp
|
|
- CFI_ENDPROC
|
|
- CFI_DEFAULT_STACK
|
|
- call do_nmi
|
|
- orl $NMI_MASK,EFLAGS(%rsp)
|
|
- RESTORE_REST
|
|
- XEN_BLOCK_EVENTS(%rsi)
|
|
- TRACE_IRQS_OFF
|
|
- GET_THREAD_INFO(%rcx)
|
|
- jmp retint_restore_args
|
|
- CFI_ENDPROC
|
|
- .previous .text
|
|
-END(nmi)
|
|
-#endif
|
|
-
|
|
ALIGN
|
|
restore_all_enable_events:
|
|
CFI_DEFAULT_STACK adj=1
|
|
@@ -1121,7 +1112,7 @@ ENDPROC(child_rip)
|
|
* do_sys_execve asm fallback arguments:
|
|
* rdi: name, rsi: argv, rdx: envp, fake frame on the stack
|
|
*/
|
|
-ENTRY(execve)
|
|
+ENTRY(kernel_execve)
|
|
CFI_STARTPROC
|
|
FAKE_STACK_FRAME $0
|
|
SAVE_ALL
|
|
@@ -1135,12 +1126,11 @@ ENTRY(execve)
|
|
UNFAKE_STACK_FRAME
|
|
ret
|
|
CFI_ENDPROC
|
|
-ENDPROC(execve)
|
|
+ENDPROC(kernel_execve)
|
|
|
|
KPROBE_ENTRY(page_fault)
|
|
errorentry do_page_fault
|
|
-END(page_fault)
|
|
- .previous .text
|
|
+KPROBE_END(page_fault)
|
|
|
|
ENTRY(coprocessor_error)
|
|
zeroentry do_coprocessor_error
|
|
@@ -1162,25 +1152,25 @@ KPROBE_ENTRY(debug)
|
|
zeroentry do_debug
|
|
/* paranoidexit
|
|
CFI_ENDPROC */
|
|
-END(debug)
|
|
- .previous .text
|
|
+KPROBE_END(debug)
|
|
|
|
-#if 0
|
|
- /* runs on exception stack */
|
|
KPROBE_ENTRY(nmi)
|
|
- INTR_FRAME
|
|
- pushq $-1
|
|
- CFI_ADJUST_CFA_OFFSET 8
|
|
- paranoidentry do_nmi, 0, 0
|
|
-#ifdef CONFIG_TRACE_IRQFLAGS
|
|
- paranoidexit 0
|
|
-#else
|
|
- jmp paranoid_exit1
|
|
- CFI_ENDPROC
|
|
-#endif
|
|
-END(nmi)
|
|
- .previous .text
|
|
-#endif
|
|
+ zeroentry do_nmi_callback
|
|
+KPROBE_END(nmi)
|
|
+do_nmi_callback:
|
|
+ CFI_STARTPROC
|
|
+ addq $8, %rsp
|
|
+ CFI_ENDPROC
|
|
+ CFI_DEFAULT_STACK
|
|
+ call do_nmi
|
|
+ orl $NMI_MASK,EFLAGS(%rsp)
|
|
+ RESTORE_REST
|
|
+ XEN_BLOCK_EVENTS(%rsi)
|
|
+ TRACE_IRQS_OFF
|
|
+ GET_THREAD_INFO(%rcx)
|
|
+ jmp retint_restore_args
|
|
+ CFI_ENDPROC
|
|
+END(do_nmi_callback)
|
|
|
|
KPROBE_ENTRY(int3)
|
|
/* INTR_FRAME
|
|
@@ -1189,8 +1179,7 @@ KPROBE_ENTRY(int3)
|
|
zeroentry do_int3
|
|
/* jmp paranoid_exit1
|
|
CFI_ENDPROC */
|
|
-END(int3)
|
|
- .previous .text
|
|
+KPROBE_END(int3)
|
|
|
|
ENTRY(overflow)
|
|
zeroentry do_overflow
|
|
@@ -1241,8 +1230,7 @@ END(stack_segment)
|
|
|
|
KPROBE_ENTRY(general_protection)
|
|
errorentry do_general_protection
|
|
-END(general_protection)
|
|
- .previous .text
|
|
+KPROBE_END(general_protection)
|
|
|
|
ENTRY(alignment_check)
|
|
errorentry do_alignment_check
|
|
--- head-2010-05-25.orig/arch/x86/kernel/head_64-xen.S 2009-06-23 09:28:21.000000000 +0200
|
|
+++ head-2010-05-25/arch/x86/kernel/head_64-xen.S 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -5,9 +5,6 @@
|
|
* Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
|
|
* Copyright (C) 2000 Karsten Keil <kkeil@suse.de>
|
|
* Copyright (C) 2001,2002 Andi Kleen <ak@suse.de>
|
|
- *
|
|
- * $Id: head.S,v 1.49 2002/03/19 17:39:25 ak Exp $
|
|
- *
|
|
* Jun Nakajima <jun.nakajima@intel.com>
|
|
* Modified for Xen
|
|
*/
|
|
@@ -146,7 +143,7 @@ ENTRY(cpu_gdt_table)
|
|
.quad 0,0 /* TSS */
|
|
.quad 0,0 /* LDT */
|
|
.quad 0,0,0 /* three TLS descriptors */
|
|
- .quad 0 /* unused */
|
|
+ .quad 0x0000f40000000000 /* node/CPU stored in limit */
|
|
gdt_end:
|
|
/* asm/segment.h:GDT_ENTRIES must match this */
|
|
/* This should be a multiple of the cache line size */
|
|
--- head-2010-05-25.orig/arch/x86/kernel/head64-xen.c 2007-06-12 13:13:01.000000000 +0200
|
|
+++ head-2010-05-25/arch/x86/kernel/head64-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -7,6 +7,9 @@
|
|
* Modified for Xen.
|
|
*/
|
|
|
|
+/* PDA is not ready to be used until the end of x86_64_start_kernel(). */
|
|
+#define arch_use_lazy_mmu_mode() false
|
|
+
|
|
#include <linux/init.h>
|
|
#include <linux/linkage.h>
|
|
#include <linux/types.h>
|
|
@@ -54,11 +57,9 @@ static void __init copy_bootdata(char *r
|
|
new_data = *(int *) (x86_boot_params + NEW_CL_POINTER);
|
|
if (!new_data) {
|
|
if (OLD_CL_MAGIC != * (u16 *) OLD_CL_MAGIC_ADDR) {
|
|
- printk("so old bootloader that it does not support commandline?!\n");
|
|
return;
|
|
}
|
|
new_data = OLD_CL_BASE_ADDR + * (u16 *) OLD_CL_OFFSET;
|
|
- printk("old bootloader convention, maybe loadlin?\n");
|
|
}
|
|
command_line = (char *) ((u64)(new_data));
|
|
memcpy(saved_command_line, command_line, COMMAND_LINE_SIZE);
|
|
@@ -70,25 +71,6 @@ static void __init copy_bootdata(char *r
|
|
memcpy(saved_command_line, xen_start_info->cmd_line, max_cmdline);
|
|
saved_command_line[max_cmdline-1] = '\0';
|
|
#endif
|
|
- printk("Bootdata ok (command line is %s)\n", saved_command_line);
|
|
-}
|
|
-
|
|
-static void __init setup_boot_cpu_data(void)
|
|
-{
|
|
- unsigned int dummy, eax;
|
|
-
|
|
- /* get vendor info */
|
|
- cpuid(0, (unsigned int *)&boot_cpu_data.cpuid_level,
|
|
- (unsigned int *)&boot_cpu_data.x86_vendor_id[0],
|
|
- (unsigned int *)&boot_cpu_data.x86_vendor_id[8],
|
|
- (unsigned int *)&boot_cpu_data.x86_vendor_id[4]);
|
|
-
|
|
- /* get cpu type */
|
|
- cpuid(1, &eax, &dummy, &dummy,
|
|
- (unsigned int *) &boot_cpu_data.x86_capability);
|
|
- boot_cpu_data.x86 = (eax >> 8) & 0xf;
|
|
- boot_cpu_data.x86_model = (eax >> 4) & 0xf;
|
|
- boot_cpu_data.x86_mask = eax & 0xf;
|
|
}
|
|
|
|
#include <xen/interface/memory.h>
|
|
@@ -101,7 +83,6 @@ void __init x86_64_start_kernel(char * r
|
|
{
|
|
struct xen_machphys_mapping mapping;
|
|
unsigned long machine_to_phys_nr_ents;
|
|
- char *s;
|
|
int i;
|
|
|
|
setup_xen_features();
|
|
@@ -128,10 +109,7 @@ void __init x86_64_start_kernel(char * r
|
|
asm volatile("lidt %0" :: "m" (idt_descr));
|
|
#endif
|
|
|
|
- /*
|
|
- * This must be called really, really early:
|
|
- */
|
|
- lockdep_init();
|
|
+ early_printk("Kernel alive\n");
|
|
|
|
for (i = 0; i < NR_CPUS; i++)
|
|
cpu_pda(i) = &boot_cpu_pda[i];
|
|
@@ -141,22 +119,5 @@ void __init x86_64_start_kernel(char * r
|
|
#ifdef CONFIG_SMP
|
|
cpu_set(0, cpu_online_map);
|
|
#endif
|
|
- s = strstr(saved_command_line, "earlyprintk=");
|
|
- if (s != NULL)
|
|
- setup_early_printk(strchr(s, '=') + 1);
|
|
-#ifdef CONFIG_NUMA
|
|
- s = strstr(saved_command_line, "numa=");
|
|
- if (s != NULL)
|
|
- numa_setup(s+5);
|
|
-#endif
|
|
-#ifdef CONFIG_X86_IO_APIC
|
|
- if (strstr(saved_command_line, "disableapic"))
|
|
- disable_apic = 1;
|
|
-#endif
|
|
- /* You need early console to see that */
|
|
- if (__pa_symbol(&_end) >= KERNEL_TEXT_SIZE)
|
|
- panic("Kernel too big for kernel mapping\n");
|
|
-
|
|
- setup_boot_cpu_data();
|
|
start_kernel();
|
|
}
|
|
--- head-2010-05-25.orig/arch/x86/kernel/io_apic_64-xen.c 2009-03-18 10:39:31.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/io_apic_64-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -26,9 +26,12 @@
|
|
#include <linux/delay.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/smp_lock.h>
|
|
+#include <linux/pci.h>
|
|
#include <linux/mc146818rtc.h>
|
|
#include <linux/acpi.h>
|
|
#include <linux/sysdev.h>
|
|
+#include <linux/msi.h>
|
|
+#include <linux/htirq.h>
|
|
#ifdef CONFIG_ACPI
|
|
#include <acpi/acpi_bus.h>
|
|
#endif
|
|
@@ -41,6 +44,10 @@
|
|
#include <asm/acpi.h>
|
|
#include <asm/dma.h>
|
|
#include <asm/nmi.h>
|
|
+#include <asm/msidef.h>
|
|
+#include <asm/hypertransport.h>
|
|
+
|
|
+static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result);
|
|
|
|
#define __apicdebuginit __init
|
|
|
|
@@ -48,17 +55,30 @@ int sis_apic_bug; /* not actually suppor
|
|
|
|
static int no_timer_check;
|
|
|
|
-int disable_timer_pin_1 __initdata;
|
|
+static int disable_timer_pin_1 __initdata;
|
|
|
|
-#ifndef CONFIG_XEN
|
|
-int timer_over_8254 __initdata = 0;
|
|
+#ifdef CONFIG_XEN
|
|
+#include <xen/interface/xen.h>
|
|
+#include <xen/interface/physdev.h>
|
|
+#include <xen/evtchn.h>
|
|
+
|
|
+/* Fake i8259 */
|
|
+#define make_8259A_irq(_irq) (io_apic_irqs &= ~(1UL<<(_irq)))
|
|
+#define disable_8259A_irq(_irq) ((void)0)
|
|
+#define i8259A_irq_pending(_irq) (0)
|
|
+
|
|
+unsigned long io_apic_irqs;
|
|
+
|
|
+#define clear_IO_APIC() ((void)0)
|
|
+#else
|
|
+int timer_over_8254 __initdata = 1;
|
|
|
|
/* Where if anywhere is the i8259 connect in external int mode */
|
|
static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
|
|
#endif
|
|
|
|
static DEFINE_SPINLOCK(ioapic_lock);
|
|
-static DEFINE_SPINLOCK(vector_lock);
|
|
+DEFINE_SPINLOCK(vector_lock);
|
|
|
|
/*
|
|
* # of IRQ routing registers
|
|
@@ -83,29 +103,27 @@ static struct irq_pin_list {
|
|
short apic, pin, next;
|
|
} irq_2_pin[PIN_MAP_SIZE];
|
|
|
|
-int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
|
|
-#ifdef CONFIG_PCI_MSI
|
|
-#define vector_to_irq(vector) \
|
|
- (platform_legacy_irq(vector) ? vector : vector_irq[vector])
|
|
-#else
|
|
-#define vector_to_irq(vector) (vector)
|
|
-#endif
|
|
-
|
|
-#ifdef CONFIG_XEN
|
|
-
|
|
-#include <xen/interface/xen.h>
|
|
-#include <xen/interface/physdev.h>
|
|
-#include <xen/evtchn.h>
|
|
-
|
|
-/* Fake i8259 */
|
|
-#define make_8259A_irq(_irq) (io_apic_irqs &= ~(1UL<<(_irq)))
|
|
-#define disable_8259A_irq(_irq) ((void)0)
|
|
-#define i8259A_irq_pending(_irq) (0)
|
|
+#ifndef CONFIG_XEN
|
|
+struct io_apic {
|
|
+ unsigned int index;
|
|
+ unsigned int unused[3];
|
|
+ unsigned int data;
|
|
+};
|
|
|
|
-unsigned long io_apic_irqs;
|
|
+static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
|
|
+{
|
|
+ return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
|
|
+ + (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK);
|
|
+}
|
|
+#endif
|
|
|
|
-static inline unsigned int xen_io_apic_read(unsigned int apic, unsigned int reg)
|
|
+static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
|
|
{
|
|
+#ifndef CONFIG_XEN
|
|
+ struct io_apic __iomem *io_apic = io_apic_base(apic);
|
|
+ writel(reg, &io_apic->index);
|
|
+ return readl(&io_apic->data);
|
|
+#else
|
|
struct physdev_apic apic_op;
|
|
int ret;
|
|
|
|
@@ -115,31 +133,133 @@ static inline unsigned int xen_io_apic_r
|
|
if (ret)
|
|
return ret;
|
|
return apic_op.value;
|
|
+#endif
|
|
}
|
|
|
|
-static inline void xen_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
|
|
+static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
|
|
{
|
|
+#ifndef CONFIG_XEN
|
|
+ struct io_apic __iomem *io_apic = io_apic_base(apic);
|
|
+ writel(reg, &io_apic->index);
|
|
+ writel(value, &io_apic->data);
|
|
+#else
|
|
struct physdev_apic apic_op;
|
|
|
|
apic_op.apic_physbase = mp_ioapics[apic].mpc_apicaddr;
|
|
apic_op.reg = reg;
|
|
apic_op.value = value;
|
|
WARN_ON(HYPERVISOR_physdev_op(PHYSDEVOP_apic_write, &apic_op));
|
|
+#endif
|
|
+}
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+/*
|
|
+ * Re-write a value: to be used for read-modify-write
|
|
+ * cycles where the read already set up the index register.
|
|
+ */
|
|
+static inline void io_apic_modify(unsigned int apic, unsigned int value)
|
|
+{
|
|
+ struct io_apic __iomem *io_apic = io_apic_base(apic);
|
|
+ writel(value, &io_apic->data);
|
|
}
|
|
+#else
|
|
+#define io_apic_modify io_apic_write
|
|
+#endif
|
|
|
|
-#define io_apic_read(a,r) xen_io_apic_read(a,r)
|
|
-#define io_apic_write(a,r,v) xen_io_apic_write(a,r,v)
|
|
+/*
|
|
+ * Synchronize the IO-APIC and the CPU by doing
|
|
+ * a dummy read from the IO-APIC
|
|
+ */
|
|
+static inline void io_apic_sync(unsigned int apic)
|
|
+{
|
|
+#ifndef CONFIG_XEN
|
|
+ struct io_apic __iomem *io_apic = io_apic_base(apic);
|
|
+ readl(&io_apic->data);
|
|
+#endif
|
|
+}
|
|
|
|
-#define clear_IO_APIC() ((void)0)
|
|
+union entry_union {
|
|
+ struct { u32 w1, w2; };
|
|
+ struct IO_APIC_route_entry entry;
|
|
+};
|
|
|
|
-#else
|
|
+#ifndef CONFIG_XEN
|
|
+static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
|
|
+{
|
|
+ union entry_union eu;
|
|
+ unsigned long flags;
|
|
+ spin_lock_irqsave(&ioapic_lock, flags);
|
|
+ eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
|
|
+ eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
|
|
+ spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+ return eu.entry;
|
|
+}
|
|
+#endif
|
|
+
|
|
+/*
|
|
+ * When we write a new IO APIC routing entry, we need to write the high
|
|
+ * word first! If the mask bit in the low word is clear, we will enable
|
|
+ * the interrupt, and we need to make sure the entry is fully populated
|
|
+ * before that happens.
|
|
+ */
|
|
+static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
|
|
+{
|
|
+ unsigned long flags;
|
|
+ union entry_union eu;
|
|
+ eu.entry = e;
|
|
+ spin_lock_irqsave(&ioapic_lock, flags);
|
|
+ io_apic_write(apic, 0x11 + 2*pin, eu.w2);
|
|
+ io_apic_write(apic, 0x10 + 2*pin, eu.w1);
|
|
+ spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+}
|
|
+
|
|
+#ifndef CONFIG_XEN
|
|
+/*
|
|
+ * When we mask an IO APIC routing entry, we need to write the low
|
|
+ * word first, in order to set the mask bit before we change the
|
|
+ * high bits!
|
|
+ */
|
|
+static void ioapic_mask_entry(int apic, int pin)
|
|
+{
|
|
+ unsigned long flags;
|
|
+ union entry_union eu = { .entry.mask = 1 };
|
|
+
|
|
+ spin_lock_irqsave(&ioapic_lock, flags);
|
|
+ io_apic_write(apic, 0x10 + 2*pin, eu.w1);
|
|
+ io_apic_write(apic, 0x11 + 2*pin, eu.w2);
|
|
+ spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+}
|
|
|
|
#ifdef CONFIG_SMP
|
|
+static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
|
|
+{
|
|
+ int apic, pin;
|
|
+ struct irq_pin_list *entry = irq_2_pin + irq;
|
|
+
|
|
+ BUG_ON(irq >= NR_IRQS);
|
|
+ for (;;) {
|
|
+ unsigned int reg;
|
|
+ apic = entry->apic;
|
|
+ pin = entry->pin;
|
|
+ if (pin == -1)
|
|
+ break;
|
|
+ io_apic_write(apic, 0x11 + pin*2, dest);
|
|
+ reg = io_apic_read(apic, 0x10 + pin*2);
|
|
+ reg &= ~0x000000ff;
|
|
+ reg |= vector;
|
|
+ io_apic_modify(apic, reg);
|
|
+ if (!entry->next)
|
|
+ break;
|
|
+ entry = irq_2_pin + entry->next;
|
|
+ }
|
|
+}
|
|
+
|
|
static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
|
|
{
|
|
unsigned long flags;
|
|
unsigned int dest;
|
|
cpumask_t tmp;
|
|
+ int vector;
|
|
|
|
cpus_and(tmp, mask, cpu_online_map);
|
|
if (cpus_empty(tmp))
|
|
@@ -147,7 +267,11 @@ static void set_ioapic_affinity_irq(unsi
|
|
|
|
cpus_and(mask, tmp, CPU_MASK_ALL);
|
|
|
|
- dest = cpu_mask_to_apicid(mask);
|
|
+ vector = assign_irq_vector(irq, mask, &tmp);
|
|
+ if (vector < 0)
|
|
+ return;
|
|
+
|
|
+ dest = cpu_mask_to_apicid(tmp);
|
|
|
|
/*
|
|
* Only the high 8 bits are valid.
|
|
@@ -155,13 +279,12 @@ static void set_ioapic_affinity_irq(unsi
|
|
dest = SET_APIC_LOGICAL_ID(dest);
|
|
|
|
spin_lock_irqsave(&ioapic_lock, flags);
|
|
- __DO_ACTION(1, = dest, )
|
|
- set_irq_info(irq, mask);
|
|
+ __target_IO_APIC_irq(irq, dest, vector);
|
|
+ set_native_irq_info(irq, mask);
|
|
spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
}
|
|
#endif
|
|
-
|
|
-#endif /* !CONFIG_XEN */
|
|
+#endif
|
|
|
|
/*
|
|
* The common case is 1:1 IRQ<->pin mappings. Sometimes there are
|
|
@@ -241,24 +364,15 @@ static void unmask_IO_APIC_irq (unsigned
|
|
static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
|
|
{
|
|
struct IO_APIC_route_entry entry;
|
|
- unsigned long flags;
|
|
|
|
/* Check delivery_mode to be sure we're not clearing an SMI pin */
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
|
|
- *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+ entry = ioapic_read_entry(apic, pin);
|
|
if (entry.delivery_mode == dest_SMI)
|
|
return;
|
|
/*
|
|
* Disable it in the IO-APIC irq-routing table:
|
|
*/
|
|
- memset(&entry, 0, sizeof(entry));
|
|
- entry.mask = 1;
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
|
|
- io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+ ioapic_mask_entry(apic, pin);
|
|
}
|
|
|
|
static void clear_IO_APIC (void)
|
|
@@ -272,16 +386,6 @@ static void clear_IO_APIC (void)
|
|
|
|
#endif /* !CONFIG_XEN */
|
|
|
|
-static u8 gsi_2_irq[NR_IRQ_VECTORS] = { [0 ... NR_IRQ_VECTORS-1] = 0xFF };
|
|
-
|
|
-/*
|
|
- * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
|
|
- * specific CPU-side IRQs.
|
|
- */
|
|
-
|
|
-#define MAX_PIRQS 8
|
|
-static int pirq_entries [MAX_PIRQS];
|
|
-static int pirqs_enabled;
|
|
int skip_ioapic_setup;
|
|
int ioapic_force;
|
|
|
|
@@ -290,18 +394,17 @@ int ioapic_force;
|
|
static int __init disable_ioapic_setup(char *str)
|
|
{
|
|
skip_ioapic_setup = 1;
|
|
- return 1;
|
|
+ return 0;
|
|
}
|
|
+early_param("noapic", disable_ioapic_setup);
|
|
|
|
-static int __init enable_ioapic_setup(char *str)
|
|
+/* Actually the next is obsolete, but keep it for paranoid reasons -AK */
|
|
+static int __init disable_timer_pin_setup(char *arg)
|
|
{
|
|
- ioapic_force = 1;
|
|
- skip_ioapic_setup = 0;
|
|
+ disable_timer_pin_1 = 1;
|
|
return 1;
|
|
}
|
|
-
|
|
-__setup("noapic", disable_ioapic_setup);
|
|
-__setup("apic", enable_ioapic_setup);
|
|
+__setup("disable_timer_pin_1", disable_timer_pin_setup);
|
|
|
|
#ifndef CONFIG_XEN
|
|
static int __init setup_disable_8254_timer(char *s)
|
|
@@ -319,137 +422,6 @@ __setup("disable_8254_timer", setup_disa
|
|
__setup("enable_8254_timer", setup_enable_8254_timer);
|
|
#endif /* !CONFIG_XEN */
|
|
|
|
-#include <asm/pci-direct.h>
|
|
-#include <linux/pci_ids.h>
|
|
-#include <linux/pci.h>
|
|
-
|
|
-
|
|
-#ifdef CONFIG_ACPI
|
|
-
|
|
-static int nvidia_hpet_detected __initdata;
|
|
-
|
|
-static int __init nvidia_hpet_check(unsigned long phys, unsigned long size)
|
|
-{
|
|
- nvidia_hpet_detected = 1;
|
|
- return 0;
|
|
-}
|
|
-#endif
|
|
-
|
|
-/* Temporary Hack. Nvidia and VIA boards currently only work with IO-APIC
|
|
- off. Check for an Nvidia or VIA PCI bridge and turn it off.
|
|
- Use pci direct infrastructure because this runs before the PCI subsystem.
|
|
-
|
|
- Can be overwritten with "apic"
|
|
-
|
|
- And another hack to disable the IOMMU on VIA chipsets.
|
|
-
|
|
- ... and others. Really should move this somewhere else.
|
|
-
|
|
- Kludge-O-Rama. */
|
|
-void __init check_ioapic(void)
|
|
-{
|
|
- int num,slot,func;
|
|
- /* Poor man's PCI discovery */
|
|
- for (num = 0; num < 32; num++) {
|
|
- for (slot = 0; slot < 32; slot++) {
|
|
- for (func = 0; func < 8; func++) {
|
|
- u32 class;
|
|
- u32 vendor;
|
|
- u8 type;
|
|
- class = read_pci_config(num,slot,func,
|
|
- PCI_CLASS_REVISION);
|
|
- if (class == 0xffffffff)
|
|
- break;
|
|
-
|
|
- if ((class >> 16) != PCI_CLASS_BRIDGE_PCI)
|
|
- continue;
|
|
-
|
|
- vendor = read_pci_config(num, slot, func,
|
|
- PCI_VENDOR_ID);
|
|
- vendor &= 0xffff;
|
|
- switch (vendor) {
|
|
- case PCI_VENDOR_ID_VIA:
|
|
-#ifdef CONFIG_IOMMU
|
|
- if ((end_pfn > MAX_DMA32_PFN ||
|
|
- force_iommu) &&
|
|
- !iommu_aperture_allowed) {
|
|
- printk(KERN_INFO
|
|
- "Looks like a VIA chipset. Disabling IOMMU. Override with \"iommu=allowed\"\n");
|
|
- iommu_aperture_disabled = 1;
|
|
- }
|
|
-#endif
|
|
- return;
|
|
- case PCI_VENDOR_ID_NVIDIA:
|
|
-#ifdef CONFIG_ACPI
|
|
- /*
|
|
- * All timer overrides on Nvidia are
|
|
- * wrong unless HPET is enabled.
|
|
- */
|
|
- nvidia_hpet_detected = 0;
|
|
- acpi_table_parse(ACPI_HPET,
|
|
- nvidia_hpet_check);
|
|
- if (nvidia_hpet_detected == 0) {
|
|
- acpi_skip_timer_override = 1;
|
|
- printk(KERN_INFO "Nvidia board "
|
|
- "detected. Ignoring ACPI "
|
|
- "timer override.\n");
|
|
- }
|
|
-#endif
|
|
- /* RED-PEN skip them on mptables too? */
|
|
- return;
|
|
- case PCI_VENDOR_ID_ATI:
|
|
-
|
|
- /* This should be actually default, but
|
|
- for 2.6.16 let's do it for ATI only where
|
|
- it's really needed. */
|
|
-#ifndef CONFIG_XEN
|
|
- if (timer_over_8254 == 1) {
|
|
- timer_over_8254 = 0;
|
|
- printk(KERN_INFO
|
|
- "ATI board detected. Disabling timer routing over 8254.\n");
|
|
- }
|
|
-#endif
|
|
- return;
|
|
- }
|
|
-
|
|
-
|
|
- /* No multi-function device? */
|
|
- type = read_pci_config_byte(num,slot,func,
|
|
- PCI_HEADER_TYPE);
|
|
- if (!(type & 0x80))
|
|
- break;
|
|
- }
|
|
- }
|
|
- }
|
|
-}
|
|
-
|
|
-static int __init ioapic_pirq_setup(char *str)
|
|
-{
|
|
- int i, max;
|
|
- int ints[MAX_PIRQS+1];
|
|
-
|
|
- get_options(str, ARRAY_SIZE(ints), ints);
|
|
-
|
|
- for (i = 0; i < MAX_PIRQS; i++)
|
|
- pirq_entries[i] = -1;
|
|
-
|
|
- pirqs_enabled = 1;
|
|
- apic_printk(APIC_VERBOSE, "PIRQ redirection, working around broken MP-BIOS.\n");
|
|
- max = MAX_PIRQS;
|
|
- if (ints[0] < MAX_PIRQS)
|
|
- max = ints[0];
|
|
-
|
|
- for (i = 0; i < max; i++) {
|
|
- apic_printk(APIC_VERBOSE, "... PIRQ%d -> IRQ %d\n", i, ints[i+1]);
|
|
- /*
|
|
- * PIRQs are mapped upside down, usually.
|
|
- */
|
|
- pirq_entries[MAX_PIRQS-i-1] = ints[i+1];
|
|
- }
|
|
- return 1;
|
|
-}
|
|
-
|
|
-__setup("pirq=", ioapic_pirq_setup);
|
|
|
|
/*
|
|
* Find the IRQ entry number of a certain pin.
|
|
@@ -479,9 +451,7 @@ static int __init find_isa_irq_pin(int i
|
|
for (i = 0; i < mp_irq_entries; i++) {
|
|
int lbus = mp_irqs[i].mpc_srcbus;
|
|
|
|
- if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
|
|
- mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
|
|
- mp_bus_id_to_type[lbus] == MP_BUS_MCA) &&
|
|
+ if (test_bit(lbus, mp_bus_not_pci) &&
|
|
(mp_irqs[i].mpc_irqtype == type) &&
|
|
(mp_irqs[i].mpc_srcbusirq == irq))
|
|
|
|
@@ -497,9 +467,7 @@ static int __init find_isa_irq_apic(int
|
|
for (i = 0; i < mp_irq_entries; i++) {
|
|
int lbus = mp_irqs[i].mpc_srcbus;
|
|
|
|
- if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
|
|
- mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
|
|
- mp_bus_id_to_type[lbus] == MP_BUS_MCA) &&
|
|
+ if (test_bit(lbus, mp_bus_not_pci) &&
|
|
(mp_irqs[i].mpc_irqtype == type) &&
|
|
(mp_irqs[i].mpc_srcbusirq == irq))
|
|
break;
|
|
@@ -540,7 +508,7 @@ int IO_APIC_get_PCI_irq_vector(int bus,
|
|
mp_irqs[i].mpc_dstapic == MP_APIC_ALL)
|
|
break;
|
|
|
|
- if ((mp_bus_id_to_type[lbus] == MP_BUS_PCI) &&
|
|
+ if (!test_bit(lbus, mp_bus_not_pci) &&
|
|
!mp_irqs[i].mpc_irqtype &&
|
|
(bus == lbus) &&
|
|
(slot == ((mp_irqs[i].mpc_srcbusirq >> 2) & 0x1f))) {
|
|
@@ -563,27 +531,6 @@ int IO_APIC_get_PCI_irq_vector(int bus,
|
|
return best_guess;
|
|
}
|
|
|
|
-/*
|
|
- * EISA Edge/Level control register, ELCR
|
|
- */
|
|
-static int EISA_ELCR(unsigned int irq)
|
|
-{
|
|
- if (irq < 16) {
|
|
- unsigned int port = 0x4d0 + (irq >> 3);
|
|
- return (inb(port) >> (irq & 7)) & 1;
|
|
- }
|
|
- apic_printk(APIC_VERBOSE, "Broken MPtable reports ISA irq %d\n", irq);
|
|
- return 0;
|
|
-}
|
|
-
|
|
-/* EISA interrupts are always polarity zero and can be edge or level
|
|
- * trigger depending on the ELCR value. If an interrupt is listed as
|
|
- * EISA conforming in the MP table, that means its trigger type must
|
|
- * be read in from the ELCR */
|
|
-
|
|
-#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mpc_srcbusirq))
|
|
-#define default_EISA_polarity(idx) (0)
|
|
-
|
|
/* ISA interrupts are always polarity zero edge triggered,
|
|
* when listed as conforming in the MP table. */
|
|
|
|
@@ -596,12 +543,6 @@ static int EISA_ELCR(unsigned int irq)
|
|
#define default_PCI_trigger(idx) (1)
|
|
#define default_PCI_polarity(idx) (1)
|
|
|
|
-/* MCA interrupts are always polarity zero level triggered,
|
|
- * when listed as conforming in the MP table. */
|
|
-
|
|
-#define default_MCA_trigger(idx) (1)
|
|
-#define default_MCA_polarity(idx) (0)
|
|
-
|
|
static int __init MPBIOS_polarity(int idx)
|
|
{
|
|
int bus = mp_irqs[idx].mpc_srcbus;
|
|
@@ -613,38 +554,11 @@ static int __init MPBIOS_polarity(int id
|
|
switch (mp_irqs[idx].mpc_irqflag & 3)
|
|
{
|
|
case 0: /* conforms, ie. bus-type dependent polarity */
|
|
- {
|
|
- switch (mp_bus_id_to_type[bus])
|
|
- {
|
|
- case MP_BUS_ISA: /* ISA pin */
|
|
- {
|
|
- polarity = default_ISA_polarity(idx);
|
|
- break;
|
|
- }
|
|
- case MP_BUS_EISA: /* EISA pin */
|
|
- {
|
|
- polarity = default_EISA_polarity(idx);
|
|
- break;
|
|
- }
|
|
- case MP_BUS_PCI: /* PCI pin */
|
|
- {
|
|
- polarity = default_PCI_polarity(idx);
|
|
- break;
|
|
- }
|
|
- case MP_BUS_MCA: /* MCA pin */
|
|
- {
|
|
- polarity = default_MCA_polarity(idx);
|
|
- break;
|
|
- }
|
|
- default:
|
|
- {
|
|
- printk(KERN_WARNING "broken BIOS!!\n");
|
|
- polarity = 1;
|
|
- break;
|
|
- }
|
|
- }
|
|
+ if (test_bit(bus, mp_bus_not_pci))
|
|
+ polarity = default_ISA_polarity(idx);
|
|
+ else
|
|
+ polarity = default_PCI_polarity(idx);
|
|
break;
|
|
- }
|
|
case 1: /* high active */
|
|
{
|
|
polarity = 0;
|
|
@@ -682,38 +596,11 @@ static int MPBIOS_trigger(int idx)
|
|
switch ((mp_irqs[idx].mpc_irqflag>>2) & 3)
|
|
{
|
|
case 0: /* conforms, ie. bus-type dependent */
|
|
- {
|
|
- switch (mp_bus_id_to_type[bus])
|
|
- {
|
|
- case MP_BUS_ISA: /* ISA pin */
|
|
- {
|
|
- trigger = default_ISA_trigger(idx);
|
|
- break;
|
|
- }
|
|
- case MP_BUS_EISA: /* EISA pin */
|
|
- {
|
|
- trigger = default_EISA_trigger(idx);
|
|
- break;
|
|
- }
|
|
- case MP_BUS_PCI: /* PCI pin */
|
|
- {
|
|
- trigger = default_PCI_trigger(idx);
|
|
- break;
|
|
- }
|
|
- case MP_BUS_MCA: /* MCA pin */
|
|
- {
|
|
- trigger = default_MCA_trigger(idx);
|
|
- break;
|
|
- }
|
|
- default:
|
|
- {
|
|
- printk(KERN_WARNING "broken BIOS!!\n");
|
|
- trigger = 1;
|
|
- break;
|
|
- }
|
|
- }
|
|
+ if (test_bit(bus, mp_bus_not_pci))
|
|
+ trigger = default_ISA_trigger(idx);
|
|
+ else
|
|
+ trigger = default_PCI_trigger(idx);
|
|
break;
|
|
- }
|
|
case 1: /* edge */
|
|
{
|
|
trigger = 0;
|
|
@@ -750,64 +637,6 @@ static inline int irq_trigger(int idx)
|
|
return MPBIOS_trigger(idx);
|
|
}
|
|
|
|
-static int next_irq = 16;
|
|
-
|
|
-/*
|
|
- * gsi_irq_sharing -- Name overload! "irq" can be either a legacy IRQ
|
|
- * in the range 0-15, a linux IRQ in the range 0-223, or a GSI number
|
|
- * from ACPI, which can reach 800 in large boxen.
|
|
- *
|
|
- * Compact the sparse GSI space into a sequential IRQ series and reuse
|
|
- * vectors if possible.
|
|
- */
|
|
-int gsi_irq_sharing(int gsi)
|
|
-{
|
|
- int i, tries, vector;
|
|
-
|
|
- BUG_ON(gsi >= NR_IRQ_VECTORS);
|
|
-
|
|
- if (platform_legacy_irq(gsi))
|
|
- return gsi;
|
|
-
|
|
- if (gsi_2_irq[gsi] != 0xFF)
|
|
- return (int)gsi_2_irq[gsi];
|
|
-
|
|
- tries = NR_IRQS;
|
|
- try_again:
|
|
- vector = assign_irq_vector(gsi);
|
|
-
|
|
- /*
|
|
- * Sharing vectors means sharing IRQs, so scan irq_vectors for previous
|
|
- * use of vector and if found, return that IRQ. However, we never want
|
|
- * to share legacy IRQs, which usually have a different trigger mode
|
|
- * than PCI.
|
|
- */
|
|
- for (i = 0; i < NR_IRQS; i++)
|
|
- if (IO_APIC_VECTOR(i) == vector)
|
|
- break;
|
|
- if (platform_legacy_irq(i)) {
|
|
- if (--tries >= 0) {
|
|
- IO_APIC_VECTOR(i) = 0;
|
|
- goto try_again;
|
|
- }
|
|
- panic("gsi_irq_sharing: didn't find an IRQ using vector 0x%02X for GSI %d", vector, gsi);
|
|
- }
|
|
- if (i < NR_IRQS) {
|
|
- gsi_2_irq[gsi] = i;
|
|
- printk(KERN_INFO "GSI %d sharing vector 0x%02X and IRQ %d\n",
|
|
- gsi, vector, i);
|
|
- return i;
|
|
- }
|
|
-
|
|
- i = next_irq++;
|
|
- BUG_ON(i >= NR_IRQS);
|
|
- gsi_2_irq[gsi] = i;
|
|
- IO_APIC_VECTOR(i) = vector;
|
|
- printk(KERN_INFO "GSI %d assigned vector 0x%02X and IRQ %d\n",
|
|
- gsi, vector, i);
|
|
- return i;
|
|
-}
|
|
-
|
|
static int pin_2_irq(int idx, int apic, int pin)
|
|
{
|
|
int irq, i;
|
|
@@ -819,49 +648,16 @@ static int pin_2_irq(int idx, int apic,
|
|
if (mp_irqs[idx].mpc_dstirq != pin)
|
|
printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
|
|
|
|
- switch (mp_bus_id_to_type[bus])
|
|
- {
|
|
- case MP_BUS_ISA: /* ISA pin */
|
|
- case MP_BUS_EISA:
|
|
- case MP_BUS_MCA:
|
|
- {
|
|
- irq = mp_irqs[idx].mpc_srcbusirq;
|
|
- break;
|
|
- }
|
|
- case MP_BUS_PCI: /* PCI pin */
|
|
- {
|
|
- /*
|
|
- * PCI IRQs are mapped in order
|
|
- */
|
|
- i = irq = 0;
|
|
- while (i < apic)
|
|
- irq += nr_ioapic_registers[i++];
|
|
- irq += pin;
|
|
- irq = gsi_irq_sharing(irq);
|
|
- break;
|
|
- }
|
|
- default:
|
|
- {
|
|
- printk(KERN_ERR "unknown bus type %d.\n",bus);
|
|
- irq = 0;
|
|
- break;
|
|
- }
|
|
- }
|
|
- BUG_ON(irq >= NR_IRQS);
|
|
-
|
|
- /*
|
|
- * PCI IRQ command line redirection. Yes, limits are hardcoded.
|
|
- */
|
|
- if ((pin >= 16) && (pin <= 23)) {
|
|
- if (pirq_entries[pin-16] != -1) {
|
|
- if (!pirq_entries[pin-16]) {
|
|
- apic_printk(APIC_VERBOSE, "disabling PIRQ%d\n", pin-16);
|
|
- } else {
|
|
- irq = pirq_entries[pin-16];
|
|
- apic_printk(APIC_VERBOSE, "using PIRQ%d -> IRQ %d\n",
|
|
- pin-16, irq);
|
|
- }
|
|
- }
|
|
+ if (test_bit(bus, mp_bus_not_pci)) {
|
|
+ irq = mp_irqs[idx].mpc_srcbusirq;
|
|
+ } else {
|
|
+ /*
|
|
+ * PCI IRQs are mapped in order
|
|
+ */
|
|
+ i = irq = 0;
|
|
+ while (i < apic)
|
|
+ irq += nr_ioapic_registers[i++];
|
|
+ irq += pin;
|
|
}
|
|
BUG_ON(irq >= NR_IRQS);
|
|
return irq;
|
|
@@ -885,46 +681,71 @@ static inline int IO_APIC_irq_trigger(in
|
|
}
|
|
|
|
/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
|
|
-u8 irq_vector[NR_IRQ_VECTORS] __read_mostly;
|
|
+static u8 irq_vector[NR_IRQ_VECTORS] __read_mostly;
|
|
|
|
-int assign_irq_vector(int irq)
|
|
+static int __assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
|
|
{
|
|
- unsigned long flags;
|
|
int vector;
|
|
struct physdev_irq irq_op;
|
|
|
|
- BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
|
|
+ BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
|
|
|
|
if (irq < PIRQ_BASE || irq - PIRQ_BASE >= NR_PIRQS)
|
|
return -EINVAL;
|
|
|
|
- spin_lock_irqsave(&vector_lock, flags);
|
|
+ cpus_and(*result, mask, cpu_online_map);
|
|
|
|
- if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) {
|
|
- spin_unlock_irqrestore(&vector_lock, flags);
|
|
- return IO_APIC_VECTOR(irq);
|
|
- }
|
|
+ if (irq_vector[irq] > 0)
|
|
+ return irq_vector[irq];
|
|
|
|
irq_op.irq = irq;
|
|
- if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op)) {
|
|
- spin_unlock_irqrestore(&vector_lock, flags);
|
|
+ if (HYPERVISOR_physdev_op(PHYSDEVOP_alloc_irq_vector, &irq_op))
|
|
return -ENOSPC;
|
|
- }
|
|
|
|
vector = irq_op.vector;
|
|
- vector_irq[vector] = irq;
|
|
- if (irq != AUTO_ASSIGN)
|
|
- IO_APIC_VECTOR(irq) = vector;
|
|
+ irq_vector[irq] = vector;
|
|
|
|
- spin_unlock_irqrestore(&vector_lock, flags);
|
|
+ return vector;
|
|
+}
|
|
+
|
|
+static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
|
|
+{
|
|
+ int vector;
|
|
+ unsigned long flags;
|
|
|
|
+ spin_lock_irqsave(&vector_lock, flags);
|
|
+ vector = __assign_irq_vector(irq, mask, result);
|
|
+ spin_unlock_irqrestore(&vector_lock, flags);
|
|
return vector;
|
|
}
|
|
|
|
-extern void (*interrupt[NR_IRQS])(void);
|
|
#ifndef CONFIG_XEN
|
|
-static struct hw_interrupt_type ioapic_level_type;
|
|
-static struct hw_interrupt_type ioapic_edge_type;
|
|
+void __setup_vector_irq(int cpu)
|
|
+{
|
|
+ /* Initialize vector_irq on a new cpu */
|
|
+ /* This function must be called with vector_lock held */
|
|
+ int irq, vector;
|
|
+
|
|
+ /* Mark the inuse vectors */
|
|
+ for (irq = 0; irq < NR_IRQ_VECTORS; ++irq) {
|
|
+ if (!cpu_isset(cpu, irq_domain[irq]))
|
|
+ continue;
|
|
+ vector = irq_vector[irq];
|
|
+ per_cpu(vector_irq, cpu)[vector] = irq;
|
|
+ }
|
|
+ /* Mark the free vectors */
|
|
+ for (vector = 0; vector < NR_VECTORS; ++vector) {
|
|
+ irq = per_cpu(vector_irq, cpu)[vector];
|
|
+ if (irq < 0)
|
|
+ continue;
|
|
+ if (!cpu_isset(cpu, irq_domain[irq]))
|
|
+ per_cpu(vector_irq, cpu)[vector] = -1;
|
|
+ }
|
|
+}
|
|
+
|
|
+extern void (*interrupt[NR_IRQS])(void);
|
|
+
|
|
+static struct irq_chip ioapic_chip;
|
|
|
|
#define IOAPIC_AUTO -1
|
|
#define IOAPIC_EDGE 0
|
|
@@ -932,16 +753,15 @@ static struct hw_interrupt_type ioapic_e
|
|
|
|
static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
|
|
{
|
|
- unsigned idx;
|
|
-
|
|
- idx = use_pci_vector() && !platform_legacy_irq(irq) ? vector : irq;
|
|
-
|
|
if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
|
|
trigger == IOAPIC_LEVEL)
|
|
- irq_desc[idx].chip = &ioapic_level_type;
|
|
- else
|
|
- irq_desc[idx].chip = &ioapic_edge_type;
|
|
- set_intr_gate(vector, interrupt[idx]);
|
|
+ set_irq_chip_and_handler_name(irq, &ioapic_chip,
|
|
+ handle_fasteoi_irq, "fasteoi");
|
|
+ else {
|
|
+ irq_desc[irq].status |= IRQ_DELAYED_DISABLE;
|
|
+ set_irq_chip_and_handler_name(irq, &ioapic_chip,
|
|
+ handle_edge_irq, "edge");
|
|
+ }
|
|
}
|
|
#else
|
|
#define ioapic_register_intr(irq, vector, trigger) evtchn_register_pirq(irq)
|
|
@@ -994,16 +814,21 @@ static void __init setup_IO_APIC_irqs(vo
|
|
continue;
|
|
|
|
if (IO_APIC_IRQ(irq)) {
|
|
- vector = assign_irq_vector(irq);
|
|
+ cpumask_t mask;
|
|
+ vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
|
|
+ if (vector < 0)
|
|
+ continue;
|
|
+
|
|
+ entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
|
|
entry.vector = vector;
|
|
|
|
ioapic_register_intr(irq, vector, IOAPIC_AUTO);
|
|
if (!apic && (irq < 16))
|
|
disable_8259A_irq(irq);
|
|
}
|
|
+ ioapic_write_entry(apic, pin, entry);
|
|
+
|
|
spin_lock_irqsave(&ioapic_lock, flags);
|
|
- io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
|
|
- io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
|
|
set_native_irq_info(irq, TARGET_CPUS);
|
|
spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
}
|
|
@@ -1046,7 +871,7 @@ static void __init setup_ExtINT_IRQ0_pin
|
|
* The timer IRQ doesn't have to know that behind the
|
|
* scene we have a 8259A-master in AEOI mode ...
|
|
*/
|
|
- irq_desc[0].chip = &ioapic_edge_type;
|
|
+ set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
|
|
|
|
/*
|
|
* Add it to the IO-APIC irq-routing table:
|
|
@@ -1142,10 +967,7 @@ void __apicdebuginit print_IO_APIC(void)
|
|
for (i = 0; i <= reg_01.bits.entries; i++) {
|
|
struct IO_APIC_route_entry entry;
|
|
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
|
|
- *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+ entry = ioapic_read_entry(apic, i);
|
|
|
|
printk(KERN_DEBUG " %02x %03X %02X ",
|
|
i,
|
|
@@ -1165,17 +987,12 @@ void __apicdebuginit print_IO_APIC(void)
|
|
);
|
|
}
|
|
}
|
|
- if (use_pci_vector())
|
|
- printk(KERN_INFO "Using vector-based indexing\n");
|
|
printk(KERN_DEBUG "IRQ to pin mappings:\n");
|
|
for (i = 0; i < NR_IRQS; i++) {
|
|
struct irq_pin_list *entry = irq_2_pin + i;
|
|
if (entry->pin < 0)
|
|
continue;
|
|
- if (use_pci_vector() && !platform_legacy_irq(i))
|
|
- printk(KERN_DEBUG "IRQ%d ", IO_APIC_VECTOR(i));
|
|
- else
|
|
- printk(KERN_DEBUG "IRQ%d ", i);
|
|
+ printk(KERN_DEBUG "IRQ%d ", i);
|
|
for (;;) {
|
|
printk("-> %d:%d", entry->apic, entry->pin);
|
|
if (!entry->next)
|
|
@@ -1339,9 +1156,6 @@ static void __init enable_IO_APIC(void)
|
|
irq_2_pin[i].pin = -1;
|
|
irq_2_pin[i].next = 0;
|
|
}
|
|
- if (!pirqs_enabled)
|
|
- for (i = 0; i < MAX_PIRQS; i++)
|
|
- pirq_entries[i] = -1;
|
|
|
|
/*
|
|
* The number of IO-APIC IRQ registers (== #pins):
|
|
@@ -1358,11 +1172,7 @@ static void __init enable_IO_APIC(void)
|
|
/* See if any of the pins is in ExtINT mode */
|
|
for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
|
|
struct IO_APIC_route_entry entry;
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
|
|
- *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
-
|
|
+ entry = ioapic_read_entry(apic, pin);
|
|
|
|
/* If the interrupt line is enabled and in ExtInt mode
|
|
* I have found the pin where the i8259 is connected.
|
|
@@ -1416,7 +1226,6 @@ void disable_IO_APIC(void)
|
|
*/
|
|
if (ioapic_i8259.pin != -1) {
|
|
struct IO_APIC_route_entry entry;
|
|
- unsigned long flags;
|
|
|
|
memset(&entry, 0, sizeof(entry));
|
|
entry.mask = 0; /* Enabled */
|
|
@@ -1433,12 +1242,7 @@ void disable_IO_APIC(void)
|
|
/*
|
|
* Add it to the IO-APIC irq-routing table:
|
|
*/
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin,
|
|
- *(((int *)&entry)+1));
|
|
- io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin,
|
|
- *(((int *)&entry)+0));
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+ ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
|
|
}
|
|
|
|
disconnect_bsp_APIC(ioapic_i8259.pin != -1);
|
|
@@ -1446,76 +1250,6 @@ void disable_IO_APIC(void)
|
|
}
|
|
|
|
/*
|
|
- * function to set the IO-APIC physical IDs based on the
|
|
- * values stored in the MPC table.
|
|
- *
|
|
- * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999
|
|
- */
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
-static void __init setup_ioapic_ids_from_mpc (void)
|
|
-{
|
|
- union IO_APIC_reg_00 reg_00;
|
|
- int apic;
|
|
- int i;
|
|
- unsigned char old_id;
|
|
- unsigned long flags;
|
|
-
|
|
- /*
|
|
- * Set the IOAPIC ID to the value stored in the MPC table.
|
|
- */
|
|
- for (apic = 0; apic < nr_ioapics; apic++) {
|
|
-
|
|
- /* Read the register 0 value */
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- reg_00.raw = io_apic_read(apic, 0);
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
-
|
|
- old_id = mp_ioapics[apic].mpc_apicid;
|
|
-
|
|
-
|
|
- printk(KERN_INFO "Using IO-APIC %d\n", mp_ioapics[apic].mpc_apicid);
|
|
-
|
|
-
|
|
- /*
|
|
- * We need to adjust the IRQ routing table
|
|
- * if the ID changed.
|
|
- */
|
|
- if (old_id != mp_ioapics[apic].mpc_apicid)
|
|
- for (i = 0; i < mp_irq_entries; i++)
|
|
- if (mp_irqs[i].mpc_dstapic == old_id)
|
|
- mp_irqs[i].mpc_dstapic
|
|
- = mp_ioapics[apic].mpc_apicid;
|
|
-
|
|
- /*
|
|
- * Read the right value from the MPC table and
|
|
- * write it into the ID register.
|
|
- */
|
|
- apic_printk(APIC_VERBOSE,KERN_INFO "...changing IO-APIC physical APIC ID to %d ...",
|
|
- mp_ioapics[apic].mpc_apicid);
|
|
-
|
|
- reg_00.bits.ID = mp_ioapics[apic].mpc_apicid;
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- io_apic_write(apic, 0, reg_00.raw);
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
-
|
|
- /*
|
|
- * Sanity check
|
|
- */
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- reg_00.raw = io_apic_read(apic, 0);
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
- if (reg_00.bits.ID != mp_ioapics[apic].mpc_apicid)
|
|
- printk("could not set ID!\n");
|
|
- else
|
|
- apic_printk(APIC_VERBOSE," ok.\n");
|
|
- }
|
|
-}
|
|
-#else
|
|
-static void __init setup_ioapic_ids_from_mpc(void) { }
|
|
-#endif
|
|
-
|
|
-/*
|
|
* There is a nasty bug in some older SMP boards, their mptable lies
|
|
* about the timer IRQ. We do the following to work around the situation:
|
|
*
|
|
@@ -1569,7 +1303,7 @@ static int __init timer_irq_works(void)
|
|
* an edge even if it isn't on the 8259A...
|
|
*/
|
|
|
|
-static unsigned int startup_edge_ioapic_irq(unsigned int irq)
|
|
+static unsigned int startup_ioapic_irq(unsigned int irq)
|
|
{
|
|
int was_pending = 0;
|
|
unsigned long flags;
|
|
@@ -1586,107 +1320,19 @@ static unsigned int startup_edge_ioapic_
|
|
return was_pending;
|
|
}
|
|
|
|
-/*
|
|
- * Once we have recorded IRQ_PENDING already, we can mask the
|
|
- * interrupt for real. This prevents IRQ storms from unhandled
|
|
- * devices.
|
|
- */
|
|
-static void ack_edge_ioapic_irq(unsigned int irq)
|
|
-{
|
|
- move_irq(irq);
|
|
- if ((irq_desc[irq].status & (IRQ_PENDING | IRQ_DISABLED))
|
|
- == (IRQ_PENDING | IRQ_DISABLED))
|
|
- mask_IO_APIC_irq(irq);
|
|
- ack_APIC_irq();
|
|
-}
|
|
-
|
|
-/*
|
|
- * Level triggered interrupts can just be masked,
|
|
- * and shutting down and starting up the interrupt
|
|
- * is the same as enabling and disabling them -- except
|
|
- * with a startup need to return a "was pending" value.
|
|
- *
|
|
- * Level triggered interrupts are special because we
|
|
- * do not touch any IO-APIC register while handling
|
|
- * them. We ack the APIC in the end-IRQ handler, not
|
|
- * in the start-IRQ-handler. Protection against reentrance
|
|
- * from the same interrupt is still provided, both by the
|
|
- * generic IRQ layer and by the fact that an unacked local
|
|
- * APIC does not accept IRQs.
|
|
- */
|
|
-static unsigned int startup_level_ioapic_irq (unsigned int irq)
|
|
-{
|
|
- unmask_IO_APIC_irq(irq);
|
|
-
|
|
- return 0; /* don't check for pending */
|
|
-}
|
|
-
|
|
-static void end_level_ioapic_irq (unsigned int irq)
|
|
-{
|
|
- move_irq(irq);
|
|
- ack_APIC_irq();
|
|
-}
|
|
-
|
|
-#ifdef CONFIG_PCI_MSI
|
|
-static unsigned int startup_edge_ioapic_vector(unsigned int vector)
|
|
-{
|
|
- int irq = vector_to_irq(vector);
|
|
-
|
|
- return startup_edge_ioapic_irq(irq);
|
|
-}
|
|
-
|
|
-static void ack_edge_ioapic_vector(unsigned int vector)
|
|
-{
|
|
- int irq = vector_to_irq(vector);
|
|
-
|
|
- move_native_irq(vector);
|
|
- ack_edge_ioapic_irq(irq);
|
|
-}
|
|
-
|
|
-static unsigned int startup_level_ioapic_vector (unsigned int vector)
|
|
-{
|
|
- int irq = vector_to_irq(vector);
|
|
-
|
|
- return startup_level_ioapic_irq (irq);
|
|
-}
|
|
-
|
|
-static void end_level_ioapic_vector (unsigned int vector)
|
|
-{
|
|
- int irq = vector_to_irq(vector);
|
|
-
|
|
- move_native_irq(vector);
|
|
- end_level_ioapic_irq(irq);
|
|
-}
|
|
-
|
|
-static void mask_IO_APIC_vector (unsigned int vector)
|
|
-{
|
|
- int irq = vector_to_irq(vector);
|
|
-
|
|
- mask_IO_APIC_irq(irq);
|
|
-}
|
|
-
|
|
-static void unmask_IO_APIC_vector (unsigned int vector)
|
|
+static int ioapic_retrigger_irq(unsigned int irq)
|
|
{
|
|
- int irq = vector_to_irq(vector);
|
|
-
|
|
- unmask_IO_APIC_irq(irq);
|
|
-}
|
|
-
|
|
-#ifdef CONFIG_SMP
|
|
-static void set_ioapic_affinity_vector (unsigned int vector,
|
|
- cpumask_t cpu_mask)
|
|
-{
|
|
- int irq = vector_to_irq(vector);
|
|
+ cpumask_t mask;
|
|
+ unsigned vector;
|
|
+ unsigned long flags;
|
|
|
|
- set_native_irq_info(vector, cpu_mask);
|
|
- set_ioapic_affinity_irq(irq, cpu_mask);
|
|
-}
|
|
-#endif // CONFIG_SMP
|
|
-#endif // CONFIG_PCI_MSI
|
|
+ spin_lock_irqsave(&vector_lock, flags);
|
|
+ vector = irq_vector[irq];
|
|
+ cpus_clear(mask);
|
|
+ cpu_set(first_cpu(irq_domain[irq]), mask);
|
|
|
|
-static int ioapic_retrigger(unsigned int irq)
|
|
-{
|
|
- send_IPI_self(IO_APIC_VECTOR(irq));
|
|
+ send_IPI_mask(mask, vector);
|
|
+ spin_unlock_irqrestore(&vector_lock, flags);
|
|
|
|
return 1;
|
|
}
|
|
@@ -1700,32 +1346,47 @@ static int ioapic_retrigger(unsigned int
|
|
* races.
|
|
*/
|
|
|
|
-static struct hw_interrupt_type ioapic_edge_type __read_mostly = {
|
|
- .typename = "IO-APIC-edge",
|
|
- .startup = startup_edge_ioapic,
|
|
- .shutdown = shutdown_edge_ioapic,
|
|
- .enable = enable_edge_ioapic,
|
|
- .disable = disable_edge_ioapic,
|
|
- .ack = ack_edge_ioapic,
|
|
- .end = end_edge_ioapic,
|
|
-#ifdef CONFIG_SMP
|
|
- .set_affinity = set_ioapic_affinity,
|
|
+static void ack_apic_edge(unsigned int irq)
|
|
+{
|
|
+ move_native_irq(irq);
|
|
+ ack_APIC_irq();
|
|
+}
|
|
+
|
|
+static void ack_apic_level(unsigned int irq)
|
|
+{
|
|
+ int do_unmask_irq = 0;
|
|
+
|
|
+#if defined(CONFIG_GENERIC_PENDING_IRQ) || defined(CONFIG_IRQBALANCE)
|
|
+ /* If we are moving the irq we need to mask it */
|
|
+ if (unlikely(irq_desc[irq].status & IRQ_MOVE_PENDING)) {
|
|
+ do_unmask_irq = 1;
|
|
+ mask_IO_APIC_irq(irq);
|
|
+ }
|
|
#endif
|
|
- .retrigger = ioapic_retrigger,
|
|
-};
|
|
|
|
-static struct hw_interrupt_type ioapic_level_type __read_mostly = {
|
|
- .typename = "IO-APIC-level",
|
|
- .startup = startup_level_ioapic,
|
|
- .shutdown = shutdown_level_ioapic,
|
|
- .enable = enable_level_ioapic,
|
|
- .disable = disable_level_ioapic,
|
|
- .ack = mask_and_ack_level_ioapic,
|
|
- .end = end_level_ioapic,
|
|
+ /*
|
|
+ * We must acknowledge the irq before we move it or the acknowledge will
|
|
+ * not propogate properly.
|
|
+ */
|
|
+ ack_APIC_irq();
|
|
+
|
|
+ /* Now we can move and renable the irq */
|
|
+ move_masked_irq(irq);
|
|
+ if (unlikely(do_unmask_irq))
|
|
+ unmask_IO_APIC_irq(irq);
|
|
+}
|
|
+
|
|
+static struct irq_chip ioapic_chip __read_mostly = {
|
|
+ .name = "IO-APIC",
|
|
+ .startup = startup_ioapic_irq,
|
|
+ .mask = mask_IO_APIC_irq,
|
|
+ .unmask = unmask_IO_APIC_irq,
|
|
+ .ack = ack_apic_edge,
|
|
+ .eoi = ack_apic_level,
|
|
#ifdef CONFIG_SMP
|
|
- .set_affinity = set_ioapic_affinity,
|
|
+ .set_affinity = set_ioapic_affinity_irq,
|
|
#endif
|
|
- .retrigger = ioapic_retrigger,
|
|
+ .retrigger = ioapic_retrigger_irq,
|
|
};
|
|
#endif /* !CONFIG_XEN */
|
|
|
|
@@ -1746,12 +1407,7 @@ static inline void init_IO_APIC_traps(vo
|
|
*/
|
|
for (irq = 0; irq < NR_IRQS ; irq++) {
|
|
int tmp = irq;
|
|
- if (use_pci_vector()) {
|
|
- if (!platform_legacy_irq(tmp))
|
|
- if ((tmp = vector_to_irq(tmp)) == -1)
|
|
- continue;
|
|
- }
|
|
- if (IO_APIC_IRQ(tmp) && !IO_APIC_VECTOR(tmp)) {
|
|
+ if (IO_APIC_IRQ(tmp) && !irq_vector[tmp]) {
|
|
/*
|
|
* Hmm.. We don't have an entry for this,
|
|
* so default to an old-fashioned 8259
|
|
@@ -1762,7 +1418,7 @@ static inline void init_IO_APIC_traps(vo
|
|
#ifndef CONFIG_XEN
|
|
else
|
|
/* Strange. Oh, well.. */
|
|
- irq_desc[irq].chip = &no_irq_type;
|
|
+ irq_desc[irq].chip = &no_irq_chip;
|
|
#endif
|
|
}
|
|
}
|
|
@@ -1883,8 +1539,6 @@ static inline void unlock_ExtINT_logic(v
|
|
spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
}
|
|
|
|
-int timer_uses_ioapic_pin_0;
|
|
-
|
|
/*
|
|
* This code may look a bit paranoid, but it's supposed to cooperate with
|
|
* a wide range of boards and BIOS bugs. Fortunately only the timer IRQ
|
|
@@ -1897,13 +1551,13 @@ static inline void check_timer(void)
|
|
{
|
|
int apic1, pin1, apic2, pin2;
|
|
int vector;
|
|
+ cpumask_t mask;
|
|
|
|
/*
|
|
* get/set the timer IRQ vector:
|
|
*/
|
|
disable_8259A_irq(0);
|
|
- vector = assign_irq_vector(0);
|
|
- set_intr_gate(vector, interrupt[0]);
|
|
+ vector = assign_irq_vector(0, TARGET_CPUS, &mask);
|
|
|
|
/*
|
|
* Subtle, code in do_timer_interrupt() expects an AEOI
|
|
@@ -1922,9 +1576,6 @@ static inline void check_timer(void)
|
|
pin2 = ioapic_i8259.pin;
|
|
apic2 = ioapic_i8259.apic;
|
|
|
|
- if (pin1 == 0)
|
|
- timer_uses_ioapic_pin_0 = 1;
|
|
-
|
|
apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
|
|
vector, apic1, pin1, apic2, pin2);
|
|
|
|
@@ -2039,11 +1690,6 @@ void __init setup_IO_APIC(void)
|
|
|
|
apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
|
|
|
|
- /*
|
|
- * Set up the IO-APIC IRQ routing table.
|
|
- */
|
|
- if (!acpi_ioapic)
|
|
- setup_ioapic_ids_from_mpc();
|
|
#ifndef CONFIG_XEN
|
|
sync_Arb_IDs();
|
|
#endif /* !CONFIG_XEN */
|
|
@@ -2066,17 +1712,12 @@ static int ioapic_suspend(struct sys_dev
|
|
{
|
|
struct IO_APIC_route_entry *entry;
|
|
struct sysfs_ioapic_data *data;
|
|
- unsigned long flags;
|
|
int i;
|
|
|
|
data = container_of(dev, struct sysfs_ioapic_data, dev);
|
|
entry = data->entry;
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
|
|
- *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i);
|
|
- *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i);
|
|
- }
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+ for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ )
|
|
+ *entry = ioapic_read_entry(dev->id, i);
|
|
|
|
return 0;
|
|
}
|
|
@@ -2098,11 +1739,9 @@ static int ioapic_resume(struct sys_devi
|
|
reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
|
|
io_apic_write(dev->id, 0, reg_00.raw);
|
|
}
|
|
- for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
|
|
- io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1));
|
|
- io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0));
|
|
- }
|
|
spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+ for (i = 0; i < nr_ioapic_registers[dev->id]; i++)
|
|
+ ioapic_write_entry(dev->id, i, entry[i]);
|
|
|
|
return 0;
|
|
}
|
|
@@ -2148,28 +1787,254 @@ static int __init ioapic_init_sysfs(void
|
|
|
|
device_initcall(ioapic_init_sysfs);
|
|
|
|
+/*
|
|
+ * Dynamic irq allocate and deallocation
|
|
+ */
|
|
+int create_irq(void)
|
|
+{
|
|
+ /* Allocate an unused irq */
|
|
+ int irq;
|
|
+ int new;
|
|
+ int vector = 0;
|
|
+ unsigned long flags;
|
|
+ cpumask_t mask;
|
|
+
|
|
+ irq = -ENOSPC;
|
|
+ spin_lock_irqsave(&vector_lock, flags);
|
|
+ for (new = (NR_IRQS - 1); new >= 0; new--) {
|
|
+ if (platform_legacy_irq(new))
|
|
+ continue;
|
|
+ if (irq_vector[new] != 0)
|
|
+ continue;
|
|
+ vector = __assign_irq_vector(new, TARGET_CPUS, &mask);
|
|
+ if (likely(vector > 0))
|
|
+ irq = new;
|
|
+ break;
|
|
+ }
|
|
+ spin_unlock_irqrestore(&vector_lock, flags);
|
|
+
|
|
+ if (irq >= 0) {
|
|
+ dynamic_irq_init(irq);
|
|
+ }
|
|
+ return irq;
|
|
+}
|
|
+
|
|
+void destroy_irq(unsigned int irq)
|
|
+{
|
|
+ unsigned long flags;
|
|
+
|
|
+ dynamic_irq_cleanup(irq);
|
|
+
|
|
+ spin_lock_irqsave(&vector_lock, flags);
|
|
+ irq_vector[irq] = 0;
|
|
+ spin_unlock_irqrestore(&vector_lock, flags);
|
|
+}
|
|
+
|
|
#endif /* CONFIG_XEN */
|
|
|
|
-/* --------------------------------------------------------------------------
|
|
- ACPI-based IOAPIC Configuration
|
|
- -------------------------------------------------------------------------- */
|
|
+/*
|
|
+ * MSI mesage composition
|
|
+ */
|
|
+#if defined(CONFIG_PCI_MSI) && !defined(CONFIG_XEN)
|
|
+static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
|
|
+{
|
|
+ int vector;
|
|
+ unsigned dest;
|
|
+ cpumask_t tmp;
|
|
|
|
-#ifdef CONFIG_ACPI
|
|
+ vector = assign_irq_vector(irq, TARGET_CPUS, &tmp);
|
|
+ if (vector >= 0) {
|
|
+ dest = cpu_mask_to_apicid(tmp);
|
|
+
|
|
+ msg->address_hi = MSI_ADDR_BASE_HI;
|
|
+ msg->address_lo =
|
|
+ MSI_ADDR_BASE_LO |
|
|
+ ((INT_DEST_MODE == 0) ?
|
|
+ MSI_ADDR_DEST_MODE_PHYSICAL:
|
|
+ MSI_ADDR_DEST_MODE_LOGICAL) |
|
|
+ ((INT_DELIVERY_MODE != dest_LowestPrio) ?
|
|
+ MSI_ADDR_REDIRECTION_CPU:
|
|
+ MSI_ADDR_REDIRECTION_LOWPRI) |
|
|
+ MSI_ADDR_DEST_ID(dest);
|
|
+
|
|
+ msg->data =
|
|
+ MSI_DATA_TRIGGER_EDGE |
|
|
+ MSI_DATA_LEVEL_ASSERT |
|
|
+ ((INT_DELIVERY_MODE != dest_LowestPrio) ?
|
|
+ MSI_DATA_DELIVERY_FIXED:
|
|
+ MSI_DATA_DELIVERY_LOWPRI) |
|
|
+ MSI_DATA_VECTOR(vector);
|
|
+ }
|
|
+ return vector;
|
|
+}
|
|
|
|
-#define IO_APIC_MAX_ID 0xFE
|
|
+#ifdef CONFIG_SMP
|
|
+static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
|
|
+{
|
|
+ struct msi_msg msg;
|
|
+ unsigned int dest;
|
|
+ cpumask_t tmp;
|
|
+ int vector;
|
|
+
|
|
+ cpus_and(tmp, mask, cpu_online_map);
|
|
+ if (cpus_empty(tmp))
|
|
+ tmp = TARGET_CPUS;
|
|
+
|
|
+ cpus_and(mask, tmp, CPU_MASK_ALL);
|
|
+
|
|
+ vector = assign_irq_vector(irq, mask, &tmp);
|
|
+ if (vector < 0)
|
|
+ return;
|
|
+
|
|
+ dest = cpu_mask_to_apicid(tmp);
|
|
+
|
|
+ read_msi_msg(irq, &msg);
|
|
+
|
|
+ msg.data &= ~MSI_DATA_VECTOR_MASK;
|
|
+ msg.data |= MSI_DATA_VECTOR(vector);
|
|
+ msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
|
|
+ msg.address_lo |= MSI_ADDR_DEST_ID(dest);
|
|
|
|
-int __init io_apic_get_version (int ioapic)
|
|
+ write_msi_msg(irq, &msg);
|
|
+ set_native_irq_info(irq, mask);
|
|
+}
|
|
+#endif /* CONFIG_SMP */
|
|
+
|
|
+/*
|
|
+ * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
|
|
+ * which implement the MSI or MSI-X Capability Structure.
|
|
+ */
|
|
+static struct irq_chip msi_chip = {
|
|
+ .name = "PCI-MSI",
|
|
+ .unmask = unmask_msi_irq,
|
|
+ .mask = mask_msi_irq,
|
|
+ .ack = ack_apic_edge,
|
|
+#ifdef CONFIG_SMP
|
|
+ .set_affinity = set_msi_irq_affinity,
|
|
+#endif
|
|
+ .retrigger = ioapic_retrigger_irq,
|
|
+};
|
|
+
|
|
+int arch_setup_msi_irq(unsigned int irq, struct pci_dev *dev)
|
|
{
|
|
- union IO_APIC_reg_01 reg_01;
|
|
- unsigned long flags;
|
|
+ struct msi_msg msg;
|
|
+ int ret;
|
|
+ ret = msi_compose_msg(dev, irq, &msg);
|
|
+ if (ret < 0)
|
|
+ return ret;
|
|
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- reg_01.raw = io_apic_read(ioapic, 1);
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+ write_msi_msg(irq, &msg);
|
|
|
|
- return reg_01.bits.version;
|
|
+ set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+void arch_teardown_msi_irq(unsigned int irq)
|
|
+{
|
|
+ return;
|
|
}
|
|
|
|
+#endif /* CONFIG_PCI_MSI */
|
|
+
|
|
+/*
|
|
+ * Hypertransport interrupt support
|
|
+ */
|
|
+#ifdef CONFIG_HT_IRQ
|
|
+
|
|
+#ifdef CONFIG_SMP
|
|
+
|
|
+static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
|
|
+{
|
|
+ struct ht_irq_msg msg;
|
|
+ fetch_ht_irq_msg(irq, &msg);
|
|
+
|
|
+ msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
|
|
+ msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
|
|
+
|
|
+ msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
|
|
+ msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
|
|
+
|
|
+ write_ht_irq_msg(irq, &msg);
|
|
+}
|
|
+
|
|
+static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
|
|
+{
|
|
+ unsigned int dest;
|
|
+ cpumask_t tmp;
|
|
+ int vector;
|
|
+
|
|
+ cpus_and(tmp, mask, cpu_online_map);
|
|
+ if (cpus_empty(tmp))
|
|
+ tmp = TARGET_CPUS;
|
|
+
|
|
+ cpus_and(mask, tmp, CPU_MASK_ALL);
|
|
+
|
|
+ vector = assign_irq_vector(irq, mask, &tmp);
|
|
+ if (vector < 0)
|
|
+ return;
|
|
+
|
|
+ dest = cpu_mask_to_apicid(tmp);
|
|
+
|
|
+ target_ht_irq(irq, dest, vector);
|
|
+ set_native_irq_info(irq, mask);
|
|
+}
|
|
+#endif
|
|
+
|
|
+static struct irq_chip ht_irq_chip = {
|
|
+ .name = "PCI-HT",
|
|
+ .mask = mask_ht_irq,
|
|
+ .unmask = unmask_ht_irq,
|
|
+ .ack = ack_apic_edge,
|
|
+#ifdef CONFIG_SMP
|
|
+ .set_affinity = set_ht_irq_affinity,
|
|
+#endif
|
|
+ .retrigger = ioapic_retrigger_irq,
|
|
+};
|
|
+
|
|
+int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
|
|
+{
|
|
+ int vector;
|
|
+ cpumask_t tmp;
|
|
+
|
|
+ vector = assign_irq_vector(irq, TARGET_CPUS, &tmp);
|
|
+ if (vector >= 0) {
|
|
+ struct ht_irq_msg msg;
|
|
+ unsigned dest;
|
|
+
|
|
+ dest = cpu_mask_to_apicid(tmp);
|
|
+
|
|
+ msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
|
|
+
|
|
+ msg.address_lo =
|
|
+ HT_IRQ_LOW_BASE |
|
|
+ HT_IRQ_LOW_DEST_ID(dest) |
|
|
+ HT_IRQ_LOW_VECTOR(vector) |
|
|
+ ((INT_DEST_MODE == 0) ?
|
|
+ HT_IRQ_LOW_DM_PHYSICAL :
|
|
+ HT_IRQ_LOW_DM_LOGICAL) |
|
|
+ HT_IRQ_LOW_RQEOI_EDGE |
|
|
+ ((INT_DELIVERY_MODE != dest_LowestPrio) ?
|
|
+ HT_IRQ_LOW_MT_FIXED :
|
|
+ HT_IRQ_LOW_MT_ARBITRATED) |
|
|
+ HT_IRQ_LOW_IRQ_MASKED;
|
|
+
|
|
+ write_ht_irq_msg(irq, &msg);
|
|
+
|
|
+ set_irq_chip_and_handler_name(irq, &ht_irq_chip,
|
|
+ handle_edge_irq, "edge");
|
|
+ }
|
|
+ return vector;
|
|
+}
|
|
+#endif /* CONFIG_HT_IRQ */
|
|
+
|
|
+/* --------------------------------------------------------------------------
|
|
+ ACPI-based IOAPIC Configuration
|
|
+ -------------------------------------------------------------------------- */
|
|
+
|
|
+#ifdef CONFIG_ACPI
|
|
+
|
|
+#define IO_APIC_MAX_ID 0xFE
|
|
|
|
int __init io_apic_get_redir_entries (int ioapic)
|
|
{
|
|
@@ -2188,6 +2053,8 @@ int io_apic_set_pci_routing (int ioapic,
|
|
{
|
|
struct IO_APIC_route_entry entry;
|
|
unsigned long flags;
|
|
+ int vector;
|
|
+ cpumask_t mask;
|
|
|
|
if (!IO_APIC_IRQ(irq)) {
|
|
apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n",
|
|
@@ -2196,6 +2063,17 @@ int io_apic_set_pci_routing (int ioapic,
|
|
}
|
|
|
|
/*
|
|
+ * IRQs < 16 are already in the irq_2_pin[] map
|
|
+ */
|
|
+ if (irq >= 16)
|
|
+ add_pin_to_irq(irq, ioapic, pin);
|
|
+
|
|
+
|
|
+ vector = assign_irq_vector(irq, TARGET_CPUS, &mask);
|
|
+ if (vector < 0)
|
|
+ return vector;
|
|
+
|
|
+ /*
|
|
* Generate a PCI IRQ routing entry and program the IOAPIC accordingly.
|
|
* Note that we mask (disable) IRQs now -- these get enabled when the
|
|
* corresponding device driver registers for this IRQ.
|
|
@@ -2205,19 +2083,11 @@ int io_apic_set_pci_routing (int ioapic,
|
|
|
|
entry.delivery_mode = INT_DELIVERY_MODE;
|
|
entry.dest_mode = INT_DEST_MODE;
|
|
- entry.dest.logical.logical_dest = cpu_mask_to_apicid(TARGET_CPUS);
|
|
+ entry.dest.logical.logical_dest = cpu_mask_to_apicid(mask);
|
|
entry.trigger = edge_level;
|
|
entry.polarity = active_high_low;
|
|
entry.mask = 1; /* Disabled (masked) */
|
|
-
|
|
- irq = gsi_irq_sharing(irq);
|
|
- /*
|
|
- * IRQs < 16 are already in the irq_2_pin[] map
|
|
- */
|
|
- if (irq >= 16)
|
|
- add_pin_to_irq(irq, ioapic, pin);
|
|
-
|
|
- entry.vector = assign_irq_vector(irq);
|
|
+ entry.vector = vector & 0xff;
|
|
|
|
apic_printk(APIC_VERBOSE,KERN_DEBUG "IOAPIC[%d]: Set PCI routing entry (%d-%d -> 0x%x -> "
|
|
"IRQ %d Mode:%i Active:%i)\n", ioapic,
|
|
@@ -2229,10 +2099,10 @@ int io_apic_set_pci_routing (int ioapic,
|
|
if (!ioapic && (irq < 16))
|
|
disable_8259A_irq(irq);
|
|
|
|
+ ioapic_write_entry(ioapic, pin, entry);
|
|
+
|
|
spin_lock_irqsave(&ioapic_lock, flags);
|
|
- io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
|
|
- io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
|
|
- set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS);
|
|
+ set_native_irq_info(irq, TARGET_CPUS);
|
|
spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
|
|
return 0;
|
|
--- head-2010-05-25.orig/arch/x86/kernel/ioport_64-xen.c 2008-01-28 12:24:19.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/ioport_64-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -58,6 +58,7 @@ asmlinkage long sys_ioperm(unsigned long
|
|
|
|
memset(bitmap, 0xff, IO_BITMAP_BYTES);
|
|
t->io_bitmap_ptr = bitmap;
|
|
+ set_thread_flag(TIF_IO_BITMAP);
|
|
|
|
set_xen_guest_handle(set_iobitmap.bitmap, (char *)bitmap);
|
|
set_iobitmap.nr_ports = IO_BITMAP_BITS;
|
|
--- head-2010-05-25.orig/arch/x86/kernel/mpparse_64-xen.c 2007-06-12 13:13:01.000000000 +0200
|
|
+++ head-2010-05-25/arch/x86/kernel/mpparse_64-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -41,8 +41,7 @@ int acpi_found_madt;
|
|
* Various Linux-internal data structures created from the
|
|
* MP-table.
|
|
*/
|
|
-unsigned char apic_version [MAX_APICS];
|
|
-unsigned char mp_bus_id_to_type [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
|
|
+DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
|
|
int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
|
|
|
|
static int mp_current_pci_id = 0;
|
|
@@ -56,7 +55,6 @@ struct mpc_config_intsrc mp_irqs[MAX_IRQ
|
|
int mp_irq_entries;
|
|
|
|
int nr_ioapics;
|
|
-int pic_mode;
|
|
unsigned long mp_lapic_addr = 0;
|
|
|
|
|
|
@@ -71,19 +69,6 @@ unsigned disabled_cpus __initdata;
|
|
/* Bitmask of physically existing CPUs */
|
|
physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE;
|
|
|
|
-/* ACPI MADT entry parsing functions */
|
|
-#ifdef CONFIG_ACPI
|
|
-extern struct acpi_boot_flags acpi_boot;
|
|
-#ifdef CONFIG_X86_LOCAL_APIC
|
|
-extern int acpi_parse_lapic (acpi_table_entry_header *header);
|
|
-extern int acpi_parse_lapic_addr_ovr (acpi_table_entry_header *header);
|
|
-extern int acpi_parse_lapic_nmi (acpi_table_entry_header *header);
|
|
-#endif /*CONFIG_X86_LOCAL_APIC*/
|
|
-#ifdef CONFIG_X86_IO_APIC
|
|
-extern int acpi_parse_ioapic (acpi_table_entry_header *header);
|
|
-#endif /*CONFIG_X86_IO_APIC*/
|
|
-#endif /*CONFIG_ACPI*/
|
|
-
|
|
u8 bios_cpu_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
|
|
|
|
|
|
@@ -109,24 +94,20 @@ static int __init mpf_checksum(unsigned
|
|
static void __cpuinit MP_processor_info (struct mpc_config_processor *m)
|
|
{
|
|
int cpu;
|
|
- unsigned char ver;
|
|
cpumask_t tmp_map;
|
|
+ char *bootup_cpu = "";
|
|
|
|
if (!(m->mpc_cpuflag & CPU_ENABLED)) {
|
|
disabled_cpus++;
|
|
return;
|
|
}
|
|
-
|
|
- printk(KERN_INFO "Processor #%d %d:%d APIC version %d\n",
|
|
- m->mpc_apicid,
|
|
- (m->mpc_cpufeature & CPU_FAMILY_MASK)>>8,
|
|
- (m->mpc_cpufeature & CPU_MODEL_MASK)>>4,
|
|
- m->mpc_apicver);
|
|
-
|
|
if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
|
|
- Dprintk(" Bootup CPU\n");
|
|
+ bootup_cpu = " (Bootup-CPU)";
|
|
boot_cpu_id = m->mpc_apicid;
|
|
}
|
|
+
|
|
+ printk(KERN_INFO "Processor #%d%s\n", m->mpc_apicid, bootup_cpu);
|
|
+
|
|
if (num_processors >= NR_CPUS) {
|
|
printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
|
|
" Processor ignored.\n", NR_CPUS);
|
|
@@ -137,24 +118,7 @@ static void __cpuinit MP_processor_info
|
|
cpus_complement(tmp_map, cpu_present_map);
|
|
cpu = first_cpu(tmp_map);
|
|
|
|
-#if MAX_APICS < 255
|
|
- if ((int)m->mpc_apicid > MAX_APICS) {
|
|
- printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",
|
|
- m->mpc_apicid, MAX_APICS);
|
|
- return;
|
|
- }
|
|
-#endif
|
|
- ver = m->mpc_apicver;
|
|
-
|
|
physid_set(m->mpc_apicid, phys_cpu_present_map);
|
|
- /*
|
|
- * Validate version
|
|
- */
|
|
- if (ver == 0x0) {
|
|
- printk(KERN_ERR "BIOS bug, APIC version is 0 for CPU#%d! fixing up to 0x10. (tell your hw vendor)\n", m->mpc_apicid);
|
|
- ver = 0x10;
|
|
- }
|
|
- apic_version[m->mpc_apicid] = ver;
|
|
if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) {
|
|
/*
|
|
* bios_cpu_apicid is required to have processors listed
|
|
@@ -185,37 +149,42 @@ static void __init MP_bus_info (struct m
|
|
Dprintk("Bus #%d is %s\n", m->mpc_busid, str);
|
|
|
|
if (strncmp(str, "ISA", 3) == 0) {
|
|
- mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
|
|
- } else if (strncmp(str, "EISA", 4) == 0) {
|
|
- mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA;
|
|
+ set_bit(m->mpc_busid, mp_bus_not_pci);
|
|
} else if (strncmp(str, "PCI", 3) == 0) {
|
|
- mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI;
|
|
+ clear_bit(m->mpc_busid, mp_bus_not_pci);
|
|
mp_bus_id_to_pci_bus[m->mpc_busid] = mp_current_pci_id;
|
|
mp_current_pci_id++;
|
|
- } else if (strncmp(str, "MCA", 3) == 0) {
|
|
- mp_bus_id_to_type[m->mpc_busid] = MP_BUS_MCA;
|
|
} else {
|
|
printk(KERN_ERR "Unknown bustype %s\n", str);
|
|
}
|
|
}
|
|
|
|
+static int bad_ioapic(unsigned long address)
|
|
+{
|
|
+ if (nr_ioapics >= MAX_IO_APICS) {
|
|
+ printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
|
|
+ "(found %d)\n", MAX_IO_APICS, nr_ioapics);
|
|
+ panic("Recompile kernel with bigger MAX_IO_APICS!\n");
|
|
+ }
|
|
+ if (!address) {
|
|
+ printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
|
|
+ " found in table, skipping!\n");
|
|
+ return 1;
|
|
+ }
|
|
+ return 0;
|
|
+}
|
|
+
|
|
static void __init MP_ioapic_info (struct mpc_config_ioapic *m)
|
|
{
|
|
if (!(m->mpc_flags & MPC_APIC_USABLE))
|
|
return;
|
|
|
|
- printk("I/O APIC #%d Version %d at 0x%X.\n",
|
|
- m->mpc_apicid, m->mpc_apicver, m->mpc_apicaddr);
|
|
- if (nr_ioapics >= MAX_IO_APICS) {
|
|
- printk(KERN_ERR "Max # of I/O APICs (%d) exceeded (found %d).\n",
|
|
- MAX_IO_APICS, nr_ioapics);
|
|
- panic("Recompile kernel with bigger MAX_IO_APICS!.\n");
|
|
- }
|
|
- if (!m->mpc_apicaddr) {
|
|
- printk(KERN_ERR "WARNING: bogus zero I/O APIC address"
|
|
- " found in MP table, skipping!\n");
|
|
+ printk("I/O APIC #%d at 0x%X.\n",
|
|
+ m->mpc_apicid, m->mpc_apicaddr);
|
|
+
|
|
+ if (bad_ioapic(m->mpc_apicaddr))
|
|
return;
|
|
- }
|
|
+
|
|
mp_ioapics[nr_ioapics] = *m;
|
|
nr_ioapics++;
|
|
}
|
|
@@ -239,19 +208,6 @@ static void __init MP_lintsrc_info (stru
|
|
m->mpc_irqtype, m->mpc_irqflag & 3,
|
|
(m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
|
|
m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
|
|
- /*
|
|
- * Well it seems all SMP boards in existence
|
|
- * use ExtINT/LVT1 == LINT0 and
|
|
- * NMI/LVT2 == LINT1 - the following check
|
|
- * will show us if this assumptions is false.
|
|
- * Until then we do not have to add baggage.
|
|
- */
|
|
- if ((m->mpc_irqtype == mp_ExtINT) &&
|
|
- (m->mpc_destapiclint != 0))
|
|
- BUG();
|
|
- if ((m->mpc_irqtype == mp_NMI) &&
|
|
- (m->mpc_destapiclint != 1))
|
|
- BUG();
|
|
}
|
|
|
|
/*
|
|
@@ -265,7 +221,7 @@ static int __init smp_read_mpc(struct mp
|
|
unsigned char *mpt=((unsigned char *)mpc)+count;
|
|
|
|
if (memcmp(mpc->mpc_signature,MPC_SIGNATURE,4)) {
|
|
- printk("SMP mptable: bad signature [%c%c%c%c]!\n",
|
|
+ printk("MPTABLE: bad signature [%c%c%c%c]!\n",
|
|
mpc->mpc_signature[0],
|
|
mpc->mpc_signature[1],
|
|
mpc->mpc_signature[2],
|
|
@@ -273,31 +229,31 @@ static int __init smp_read_mpc(struct mp
|
|
return 0;
|
|
}
|
|
if (mpf_checksum((unsigned char *)mpc,mpc->mpc_length)) {
|
|
- printk("SMP mptable: checksum error!\n");
|
|
+ printk("MPTABLE: checksum error!\n");
|
|
return 0;
|
|
}
|
|
if (mpc->mpc_spec!=0x01 && mpc->mpc_spec!=0x04) {
|
|
- printk(KERN_ERR "SMP mptable: bad table version (%d)!!\n",
|
|
+ printk(KERN_ERR "MPTABLE: bad table version (%d)!!\n",
|
|
mpc->mpc_spec);
|
|
return 0;
|
|
}
|
|
if (!mpc->mpc_lapic) {
|
|
- printk(KERN_ERR "SMP mptable: null local APIC address!\n");
|
|
+ printk(KERN_ERR "MPTABLE: null local APIC address!\n");
|
|
return 0;
|
|
}
|
|
memcpy(str,mpc->mpc_oem,8);
|
|
- str[8]=0;
|
|
- printk(KERN_INFO "OEM ID: %s ",str);
|
|
+ str[8] = 0;
|
|
+ printk(KERN_INFO "MPTABLE: OEM ID: %s ",str);
|
|
|
|
memcpy(str,mpc->mpc_productid,12);
|
|
- str[12]=0;
|
|
- printk("Product ID: %s ",str);
|
|
+ str[12] = 0;
|
|
+ printk("MPTABLE: Product ID: %s ",str);
|
|
|
|
- printk("APIC at: 0x%X\n",mpc->mpc_lapic);
|
|
+ printk("MPTABLE: APIC at: 0x%X\n",mpc->mpc_lapic);
|
|
|
|
/* save the local APIC address, it might be non-default */
|
|
if (!acpi_lapic)
|
|
- mp_lapic_addr = mpc->mpc_lapic;
|
|
+ mp_lapic_addr = mpc->mpc_lapic;
|
|
|
|
/*
|
|
* Now process the configuration blocks.
|
|
@@ -309,7 +265,7 @@ static int __init smp_read_mpc(struct mp
|
|
struct mpc_config_processor *m=
|
|
(struct mpc_config_processor *)mpt;
|
|
if (!acpi_lapic)
|
|
- MP_processor_info(m);
|
|
+ MP_processor_info(m);
|
|
mpt += sizeof(*m);
|
|
count += sizeof(*m);
|
|
break;
|
|
@@ -328,8 +284,8 @@ static int __init smp_read_mpc(struct mp
|
|
struct mpc_config_ioapic *m=
|
|
(struct mpc_config_ioapic *)mpt;
|
|
MP_ioapic_info(m);
|
|
- mpt+=sizeof(*m);
|
|
- count+=sizeof(*m);
|
|
+ mpt += sizeof(*m);
|
|
+ count += sizeof(*m);
|
|
break;
|
|
}
|
|
case MP_INTSRC:
|
|
@@ -338,8 +294,8 @@ static int __init smp_read_mpc(struct mp
|
|
(struct mpc_config_intsrc *)mpt;
|
|
|
|
MP_intsrc_info(m);
|
|
- mpt+=sizeof(*m);
|
|
- count+=sizeof(*m);
|
|
+ mpt += sizeof(*m);
|
|
+ count += sizeof(*m);
|
|
break;
|
|
}
|
|
case MP_LINTSRC:
|
|
@@ -347,15 +303,15 @@ static int __init smp_read_mpc(struct mp
|
|
struct mpc_config_lintsrc *m=
|
|
(struct mpc_config_lintsrc *)mpt;
|
|
MP_lintsrc_info(m);
|
|
- mpt+=sizeof(*m);
|
|
- count+=sizeof(*m);
|
|
+ mpt += sizeof(*m);
|
|
+ count += sizeof(*m);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
clustered_apic_check();
|
|
if (!num_processors)
|
|
- printk(KERN_ERR "SMP mptable: no processors registered!\n");
|
|
+ printk(KERN_ERR "MPTABLE: no processors registered!\n");
|
|
return num_processors;
|
|
}
|
|
|
|
@@ -451,13 +407,10 @@ static inline void __init construct_defa
|
|
* 2 CPUs, numbered 0 & 1.
|
|
*/
|
|
processor.mpc_type = MP_PROCESSOR;
|
|
- /* Either an integrated APIC or a discrete 82489DX. */
|
|
- processor.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
|
|
+ processor.mpc_apicver = 0;
|
|
processor.mpc_cpuflag = CPU_ENABLED;
|
|
- processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
|
|
- (boot_cpu_data.x86_model << 4) |
|
|
- boot_cpu_data.x86_mask;
|
|
- processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
|
|
+ processor.mpc_cpufeature = 0;
|
|
+ processor.mpc_featureflag = 0;
|
|
processor.mpc_reserved[0] = 0;
|
|
processor.mpc_reserved[1] = 0;
|
|
for (i = 0; i < 2; i++) {
|
|
@@ -476,14 +429,6 @@ static inline void __init construct_defa
|
|
case 5:
|
|
memcpy(bus.mpc_bustype, "ISA ", 6);
|
|
break;
|
|
- case 2:
|
|
- case 6:
|
|
- case 3:
|
|
- memcpy(bus.mpc_bustype, "EISA ", 6);
|
|
- break;
|
|
- case 4:
|
|
- case 7:
|
|
- memcpy(bus.mpc_bustype, "MCA ", 6);
|
|
}
|
|
MP_bus_info(&bus);
|
|
if (mpc_default_type > 4) {
|
|
@@ -494,7 +439,7 @@ static inline void __init construct_defa
|
|
|
|
ioapic.mpc_type = MP_IOAPIC;
|
|
ioapic.mpc_apicid = 2;
|
|
- ioapic.mpc_apicver = mpc_default_type > 4 ? 0x10 : 0x01;
|
|
+ ioapic.mpc_apicver = 0;
|
|
ioapic.mpc_flags = MPC_APIC_USABLE;
|
|
ioapic.mpc_apicaddr = 0xFEC00000;
|
|
MP_ioapic_info(&ioapic);
|
|
@@ -537,13 +482,6 @@ void __init get_smp_config (void)
|
|
printk(KERN_INFO "Using ACPI for processor (LAPIC) configuration information\n");
|
|
|
|
printk("Intel MultiProcessor Specification v1.%d\n", mpf->mpf_specification);
|
|
- if (mpf->mpf_feature2 & (1<<7)) {
|
|
- printk(KERN_INFO " IMCR and PIC compatibility mode.\n");
|
|
- pic_mode = 1;
|
|
- } else {
|
|
- printk(KERN_INFO " Virtual Wire compatibility mode.\n");
|
|
- pic_mode = 0;
|
|
- }
|
|
|
|
/*
|
|
* Now see if we need to read further.
|
|
@@ -620,7 +558,7 @@ static int __init smp_scan_config (unsig
|
|
return 0;
|
|
}
|
|
|
|
-void __init find_intel_smp (void)
|
|
+void __init find_smp_config(void)
|
|
{
|
|
unsigned int address;
|
|
|
|
@@ -637,9 +575,7 @@ void __init find_intel_smp (void)
|
|
smp_scan_config(0xF0000,0x10000))
|
|
return;
|
|
/*
|
|
- * If it is an SMP machine we should know now, unless the
|
|
- * configuration is in an EISA/MCA bus machine with an
|
|
- * extended bios data area.
|
|
+ * If it is an SMP machine we should know now.
|
|
*
|
|
* there is a real-mode segmented pointer pointing to the
|
|
* 4K EBDA area at 0x40E, calculate and scan it here.
|
|
@@ -660,64 +596,38 @@ void __init find_intel_smp (void)
|
|
printk(KERN_INFO "No mptable found.\n");
|
|
}
|
|
|
|
-/*
|
|
- * - Intel MP Configuration Table
|
|
- */
|
|
-void __init find_smp_config (void)
|
|
-{
|
|
-#ifdef CONFIG_X86_LOCAL_APIC
|
|
- find_intel_smp();
|
|
-#endif
|
|
-}
|
|
-
|
|
-
|
|
/* --------------------------------------------------------------------------
|
|
ACPI-based MP Configuration
|
|
-------------------------------------------------------------------------- */
|
|
|
|
#ifdef CONFIG_ACPI
|
|
|
|
-void __init mp_register_lapic_address (
|
|
- u64 address)
|
|
+void __init mp_register_lapic_address(u64 address)
|
|
{
|
|
#ifndef CONFIG_XEN
|
|
mp_lapic_addr = (unsigned long) address;
|
|
-
|
|
set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
|
|
-
|
|
if (boot_cpu_id == -1U)
|
|
boot_cpu_id = GET_APIC_ID(apic_read(APIC_ID));
|
|
-
|
|
- Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid);
|
|
#endif
|
|
}
|
|
|
|
-
|
|
-void __cpuinit mp_register_lapic (
|
|
- u8 id,
|
|
- u8 enabled)
|
|
+void __cpuinit mp_register_lapic (u8 id, u8 enabled)
|
|
{
|
|
struct mpc_config_processor processor;
|
|
int boot_cpu = 0;
|
|
|
|
- if (id >= MAX_APICS) {
|
|
- printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
|
|
- id, MAX_APICS);
|
|
- return;
|
|
- }
|
|
-
|
|
- if (id == boot_cpu_physical_apicid)
|
|
+ if (id == boot_cpu_id)
|
|
boot_cpu = 1;
|
|
|
|
#ifndef CONFIG_XEN
|
|
processor.mpc_type = MP_PROCESSOR;
|
|
processor.mpc_apicid = id;
|
|
- processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR));
|
|
+ processor.mpc_apicver = 0;
|
|
processor.mpc_cpuflag = (enabled ? CPU_ENABLED : 0);
|
|
processor.mpc_cpuflag |= (boot_cpu ? CPU_BOOTPROCESSOR : 0);
|
|
- processor.mpc_cpufeature = (boot_cpu_data.x86 << 8) |
|
|
- (boot_cpu_data.x86_model << 4) | boot_cpu_data.x86_mask;
|
|
- processor.mpc_featureflag = boot_cpu_data.x86_capability[0];
|
|
+ processor.mpc_cpufeature = 0;
|
|
+ processor.mpc_featureflag = 0;
|
|
processor.mpc_reserved[0] = 0;
|
|
processor.mpc_reserved[1] = 0;
|
|
#endif
|
|
@@ -725,8 +635,6 @@ void __cpuinit mp_register_lapic (
|
|
MP_processor_info(&processor);
|
|
}
|
|
|
|
-#ifdef CONFIG_X86_IO_APIC
|
|
-
|
|
#define MP_ISA_BUS 0
|
|
#define MP_MAX_IOAPIC_PIN 127
|
|
|
|
@@ -737,11 +645,9 @@ static struct mp_ioapic_routing {
|
|
u32 pin_programmed[4];
|
|
} mp_ioapic_routing[MAX_IO_APICS];
|
|
|
|
-
|
|
-static int mp_find_ioapic (
|
|
- int gsi)
|
|
+static int mp_find_ioapic(int gsi)
|
|
{
|
|
- int i = 0;
|
|
+ int i = 0;
|
|
|
|
/* Find the IOAPIC that manages this GSI. */
|
|
for (i = 0; i < nr_ioapics; i++) {
|
|
@@ -751,28 +657,15 @@ static int mp_find_ioapic (
|
|
}
|
|
|
|
printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
|
|
-
|
|
return -1;
|
|
}
|
|
-
|
|
|
|
-void __init mp_register_ioapic (
|
|
- u8 id,
|
|
- u32 address,
|
|
- u32 gsi_base)
|
|
+void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
|
|
{
|
|
- int idx = 0;
|
|
+ int idx = 0;
|
|
|
|
- if (nr_ioapics >= MAX_IO_APICS) {
|
|
- printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
|
|
- "(found %d)\n", MAX_IO_APICS, nr_ioapics);
|
|
- panic("Recompile kernel with bigger MAX_IO_APICS!\n");
|
|
- }
|
|
- if (!address) {
|
|
- printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
|
|
- " found in MADT table, skipping!\n");
|
|
+ if (bad_ioapic(address))
|
|
return;
|
|
- }
|
|
|
|
idx = nr_ioapics++;
|
|
|
|
@@ -784,7 +677,7 @@ void __init mp_register_ioapic (
|
|
set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
|
|
#endif
|
|
mp_ioapics[idx].mpc_apicid = id;
|
|
- mp_ioapics[idx].mpc_apicver = io_apic_get_version(idx);
|
|
+ mp_ioapics[idx].mpc_apicver = 0;
|
|
|
|
/*
|
|
* Build basic IRQ lookup table to facilitate gsi->io_apic lookups
|
|
@@ -795,21 +688,15 @@ void __init mp_register_ioapic (
|
|
mp_ioapic_routing[idx].gsi_end = gsi_base +
|
|
io_apic_get_redir_entries(idx);
|
|
|
|
- printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
|
|
+ printk(KERN_INFO "IOAPIC[%d]: apic_id %d, address 0x%x, "
|
|
"GSI %d-%d\n", idx, mp_ioapics[idx].mpc_apicid,
|
|
- mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
|
|
+ mp_ioapics[idx].mpc_apicaddr,
|
|
mp_ioapic_routing[idx].gsi_start,
|
|
mp_ioapic_routing[idx].gsi_end);
|
|
-
|
|
- return;
|
|
}
|
|
|
|
-
|
|
-void __init mp_override_legacy_irq (
|
|
- u8 bus_irq,
|
|
- u8 polarity,
|
|
- u8 trigger,
|
|
- u32 gsi)
|
|
+void __init
|
|
+mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
|
|
{
|
|
struct mpc_config_intsrc intsrc;
|
|
int ioapic = -1;
|
|
@@ -847,22 +734,18 @@ void __init mp_override_legacy_irq (
|
|
mp_irqs[mp_irq_entries] = intsrc;
|
|
if (++mp_irq_entries == MAX_IRQ_SOURCES)
|
|
panic("Max # of irq sources exceeded!\n");
|
|
-
|
|
- return;
|
|
}
|
|
|
|
-
|
|
-void __init mp_config_acpi_legacy_irqs (void)
|
|
+void __init mp_config_acpi_legacy_irqs(void)
|
|
{
|
|
struct mpc_config_intsrc intsrc;
|
|
- int i = 0;
|
|
- int ioapic = -1;
|
|
+ int i = 0;
|
|
+ int ioapic = -1;
|
|
|
|
/*
|
|
* Fabricate the legacy ISA bus (bus #31).
|
|
*/
|
|
- mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA;
|
|
- Dprintk("Bus #%d is ISA\n", MP_ISA_BUS);
|
|
+ set_bit(MP_ISA_BUS, mp_bus_not_pci);
|
|
|
|
/*
|
|
* Locate the IOAPIC that manages the ISA IRQs (0-15).
|
|
@@ -915,24 +798,13 @@ void __init mp_config_acpi_legacy_irqs (
|
|
if (++mp_irq_entries == MAX_IRQ_SOURCES)
|
|
panic("Max # of irq sources exceeded!\n");
|
|
}
|
|
-
|
|
- return;
|
|
}
|
|
|
|
-#define MAX_GSI_NUM 4096
|
|
-
|
|
int mp_register_gsi(u32 gsi, int triggering, int polarity)
|
|
{
|
|
- int ioapic = -1;
|
|
- int ioapic_pin = 0;
|
|
- int idx, bit = 0;
|
|
- static int pci_irq = 16;
|
|
- /*
|
|
- * Mapping between Global System Interrupts, which
|
|
- * represent all possible interrupts, to the IRQs
|
|
- * assigned to actual devices.
|
|
- */
|
|
- static int gsi_to_irq[MAX_GSI_NUM];
|
|
+ int ioapic = -1;
|
|
+ int ioapic_pin = 0;
|
|
+ int idx, bit = 0;
|
|
|
|
if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
|
|
return gsi;
|
|
@@ -965,47 +837,14 @@ int mp_register_gsi(u32 gsi, int trigger
|
|
if ((1<<bit) & mp_ioapic_routing[ioapic].pin_programmed[idx]) {
|
|
Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n",
|
|
mp_ioapic_routing[ioapic].apic_id, ioapic_pin);
|
|
- return gsi_to_irq[gsi];
|
|
+ return gsi;
|
|
}
|
|
|
|
mp_ioapic_routing[ioapic].pin_programmed[idx] |= (1<<bit);
|
|
|
|
- if (triggering == ACPI_LEVEL_SENSITIVE) {
|
|
- /*
|
|
- * For PCI devices assign IRQs in order, avoiding gaps
|
|
- * due to unused I/O APIC pins.
|
|
- */
|
|
- int irq = gsi;
|
|
- if (gsi < MAX_GSI_NUM) {
|
|
- /*
|
|
- * Retain the VIA chipset work-around (gsi > 15), but
|
|
- * avoid a problem where the 8254 timer (IRQ0) is setup
|
|
- * via an override (so it's not on pin 0 of the ioapic),
|
|
- * and at the same time, the pin 0 interrupt is a PCI
|
|
- * type. The gsi > 15 test could cause these two pins
|
|
- * to be shared as IRQ0, and they are not shareable.
|
|
- * So test for this condition, and if necessary, avoid
|
|
- * the pin collision.
|
|
- */
|
|
- if (gsi > 15 || (gsi == 0 && !timer_uses_ioapic_pin_0))
|
|
- gsi = pci_irq++;
|
|
- /*
|
|
- * Don't assign IRQ used by ACPI SCI
|
|
- */
|
|
- if (gsi == acpi_fadt.sci_int)
|
|
- gsi = pci_irq++;
|
|
- gsi_to_irq[irq] = gsi;
|
|
- } else {
|
|
- printk(KERN_ERR "GSI %u is too high\n", gsi);
|
|
- return gsi;
|
|
- }
|
|
- }
|
|
-
|
|
io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
|
|
triggering == ACPI_EDGE_SENSITIVE ? 0 : 1,
|
|
polarity == ACPI_ACTIVE_HIGH ? 0 : 1);
|
|
return gsi;
|
|
}
|
|
-
|
|
-#endif /*CONFIG_X86_IO_APIC*/
|
|
#endif /*CONFIG_ACPI*/
|
|
--- head-2010-05-25.orig/arch/x86/kernel/process_64-xen.c 2008-08-07 12:44:36.000000000 +0200
|
|
+++ head-2010-05-25/arch/x86/kernel/process_64-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -89,25 +89,24 @@ void idle_notifier_unregister(struct not
|
|
}
|
|
EXPORT_SYMBOL(idle_notifier_unregister);
|
|
|
|
-enum idle_state { CPU_IDLE, CPU_NOT_IDLE };
|
|
-static DEFINE_PER_CPU(enum idle_state, idle_state) = CPU_NOT_IDLE;
|
|
-
|
|
void enter_idle(void)
|
|
{
|
|
- __get_cpu_var(idle_state) = CPU_IDLE;
|
|
+ write_pda(isidle, 1);
|
|
atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
|
|
}
|
|
|
|
static void __exit_idle(void)
|
|
{
|
|
- __get_cpu_var(idle_state) = CPU_NOT_IDLE;
|
|
+ if (test_and_clear_bit_pda(0, isidle) == 0)
|
|
+ return;
|
|
atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
|
|
}
|
|
|
|
/* Called from interrupts to signify idle end */
|
|
void exit_idle(void)
|
|
{
|
|
- if (current->pid | read_pda(irqcount))
|
|
+ /* idle loop has pid 0 */
|
|
+ if (current->pid)
|
|
return;
|
|
__exit_idle();
|
|
}
|
|
@@ -184,6 +183,9 @@ void cpu_idle (void)
|
|
play_dead();
|
|
enter_idle();
|
|
idle();
|
|
+ /* In many cases the interrupt that ended idle
|
|
+ has already called exit_idle. But some idle
|
|
+ loops can be woken up without interrupt. */
|
|
__exit_idle();
|
|
}
|
|
|
|
@@ -196,7 +198,7 @@ void cpu_idle (void)
|
|
void cpu_idle_wait(void)
|
|
{
|
|
unsigned int cpu, this_cpu = get_cpu();
|
|
- cpumask_t map;
|
|
+ cpumask_t map, tmp = current->cpus_allowed;
|
|
|
|
set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
|
|
put_cpu();
|
|
@@ -219,6 +221,8 @@ void cpu_idle_wait(void)
|
|
}
|
|
cpus_and(map, map, cpu_online_map);
|
|
} while (!cpus_empty(map));
|
|
+
|
|
+ set_cpus_allowed(current, tmp);
|
|
}
|
|
EXPORT_SYMBOL_GPL(cpu_idle_wait);
|
|
|
|
@@ -250,9 +254,9 @@ void __show_regs(struct pt_regs * regs)
|
|
print_modules();
|
|
printk("Pid: %d, comm: %.20s %s %s %.*s\n",
|
|
current->pid, current->comm, print_tainted(),
|
|
- system_utsname.release,
|
|
- (int)strcspn(system_utsname.version, " "),
|
|
- system_utsname.version);
|
|
+ init_utsname()->release,
|
|
+ (int)strcspn(init_utsname()->version, " "),
|
|
+ init_utsname()->version);
|
|
printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
|
|
printk_address(regs->rip);
|
|
printk("RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp,
|
|
@@ -310,6 +314,7 @@ void exit_thread(void)
|
|
|
|
kfree(t->io_bitmap_ptr);
|
|
t->io_bitmap_ptr = NULL;
|
|
+ clear_thread_flag(TIF_IO_BITMAP);
|
|
/*
|
|
* Careful, clear this in the TSS too:
|
|
*/
|
|
@@ -340,6 +345,7 @@ void flush_thread(void)
|
|
if (t->flags & _TIF_IA32)
|
|
current_thread_info()->status |= TS_COMPAT;
|
|
}
|
|
+ t->flags &= ~_TIF_DEBUG;
|
|
|
|
tsk->thread.debugreg0 = 0;
|
|
tsk->thread.debugreg1 = 0;
|
|
@@ -432,7 +438,7 @@ int copy_thread(int nr, unsigned long cl
|
|
asm("mov %%es,%0" : "=m" (p->thread.es));
|
|
asm("mov %%ds,%0" : "=m" (p->thread.ds));
|
|
|
|
- if (unlikely(me->thread.io_bitmap_ptr != NULL)) {
|
|
+ if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
|
|
p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
|
|
if (!p->thread.io_bitmap_ptr) {
|
|
p->thread.io_bitmap_max = 0;
|
|
@@ -440,6 +446,7 @@ int copy_thread(int nr, unsigned long cl
|
|
}
|
|
memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
|
|
IO_BITMAP_BYTES);
|
|
+ set_tsk_thread_flag(p, TIF_IO_BITMAP);
|
|
}
|
|
|
|
/*
|
|
@@ -474,6 +481,30 @@ static inline void __save_init_fpu( stru
|
|
}
|
|
|
|
/*
|
|
+ * This special macro can be used to load a debugging register
|
|
+ */
|
|
+#define loaddebug(thread,r) set_debugreg(thread->debugreg ## r, r)
|
|
+
|
|
+static inline void __switch_to_xtra(struct task_struct *prev_p,
|
|
+ struct task_struct *next_p)
|
|
+{
|
|
+ struct thread_struct *prev, *next;
|
|
+
|
|
+ prev = &prev_p->thread,
|
|
+ next = &next_p->thread;
|
|
+
|
|
+ if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
|
|
+ loaddebug(next, 0);
|
|
+ loaddebug(next, 1);
|
|
+ loaddebug(next, 2);
|
|
+ loaddebug(next, 3);
|
|
+ /* no 4 and 5 */
|
|
+ loaddebug(next, 6);
|
|
+ loaddebug(next, 7);
|
|
+ }
|
|
+}
|
|
+
|
|
+/*
|
|
* switch_to(x,y) should switch tasks from x to y.
|
|
*
|
|
* This could still be optimized:
|
|
@@ -501,6 +532,10 @@ __switch_to(struct task_struct *prev_p,
|
|
#endif
|
|
multicall_entry_t _mcl[8], *mcl = _mcl;
|
|
|
|
+ /* we're going to use this soon, after a few expensive things */
|
|
+ if (next_p->fpu_counter>5)
|
|
+ prefetch(&next->i387.fxsave);
|
|
+
|
|
/*
|
|
* This is basically '__unlazy_fpu', except that we queue a
|
|
* multicall to indicate FPU task switch, rather than
|
|
@@ -513,7 +548,8 @@ __switch_to(struct task_struct *prev_p,
|
|
mcl->op = __HYPERVISOR_fpu_taskswitch;
|
|
mcl->args[0] = 1;
|
|
mcl++;
|
|
- }
|
|
+ } else
|
|
+ prev_p->fpu_counter = 0;
|
|
|
|
/*
|
|
* Reload esp0, LDT and the page table pointer:
|
|
@@ -608,21 +644,29 @@ __switch_to(struct task_struct *prev_p,
|
|
write_pda(oldrsp, next->userrsp);
|
|
write_pda(pcurrent, next_p);
|
|
write_pda(kernelstack,
|
|
- task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
|
|
+ (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET);
|
|
+#ifdef CONFIG_CC_STACKPROTECTOR
|
|
+ write_pda(stack_canary, next_p->stack_canary);
|
|
+
|
|
+ /*
|
|
+ * Build time only check to make sure the stack_canary is at
|
|
+ * offset 40 in the pda; this is a gcc ABI requirement
|
|
+ */
|
|
+ BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
|
|
+#endif
|
|
|
|
/*
|
|
* Now maybe reload the debug registers
|
|
*/
|
|
- if (unlikely(next->debugreg7)) {
|
|
- set_debugreg(next->debugreg0, 0);
|
|
- set_debugreg(next->debugreg1, 1);
|
|
- set_debugreg(next->debugreg2, 2);
|
|
- set_debugreg(next->debugreg3, 3);
|
|
- /* no 4 and 5 */
|
|
- set_debugreg(next->debugreg6, 6);
|
|
- set_debugreg(next->debugreg7, 7);
|
|
- }
|
|
+ if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW))
|
|
+ __switch_to_xtra(prev_p, next_p);
|
|
|
|
+ /* If the task has used fpu the last 5 timeslices, just do a full
|
|
+ * restore of the math state immediately to avoid the trap; the
|
|
+ * chances of needing FPU soon are obviously high now
|
|
+ */
|
|
+ if (next_p->fpu_counter>5)
|
|
+ math_state_restore();
|
|
return prev_p;
|
|
}
|
|
|
|
@@ -842,7 +886,7 @@ int dump_task_regs(struct task_struct *t
|
|
|
|
unsigned long arch_align_stack(unsigned long sp)
|
|
{
|
|
- if (randomize_va_space)
|
|
+ if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
|
|
sp -= get_random_int() % 8192;
|
|
return sp & ~0xf;
|
|
}
|
|
--- head-2010-05-25.orig/arch/x86/kernel/setup_64-xen.c 2009-06-23 09:28:21.000000000 +0200
|
|
+++ head-2010-05-25/arch/x86/kernel/setup_64-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -88,9 +88,6 @@ extern struct edid_info edid_info;
|
|
shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page;
|
|
EXPORT_SYMBOL(HYPERVISOR_shared_info);
|
|
|
|
-extern char hypercall_page[PAGE_SIZE];
|
|
-EXPORT_SYMBOL(hypercall_page);
|
|
-
|
|
static int xen_panic_event(struct notifier_block *, unsigned long, void *);
|
|
static struct notifier_block xen_panic_block = {
|
|
xen_panic_event, NULL, 0 /* try to go last */
|
|
@@ -118,16 +115,6 @@ EXPORT_SYMBOL(boot_cpu_data);
|
|
|
|
unsigned long mmu_cr4_features;
|
|
|
|
-int acpi_disabled;
|
|
-EXPORT_SYMBOL(acpi_disabled);
|
|
-#ifdef CONFIG_ACPI
|
|
-extern int __initdata acpi_ht;
|
|
-extern acpi_interrupt_flags acpi_sci_flags;
|
|
-int __initdata acpi_force = 0;
|
|
-#endif
|
|
-
|
|
-int acpi_numa __initdata;
|
|
-
|
|
/* Boot loader ID as an integer, for the benefit of proc_dointvec */
|
|
int bootloader_type;
|
|
|
|
@@ -151,10 +138,6 @@ struct sys_desc_table_struct {
|
|
|
|
struct edid_info edid_info;
|
|
EXPORT_SYMBOL_GPL(edid_info);
|
|
-struct e820map e820;
|
|
-#ifdef CONFIG_XEN
|
|
-struct e820map machine_e820;
|
|
-#endif
|
|
|
|
extern int root_mountflags;
|
|
|
|
@@ -181,9 +164,6 @@ struct resource standard_io_resources[]
|
|
.flags = IORESOURCE_BUSY | IORESOURCE_IO }
|
|
};
|
|
|
|
-#define STANDARD_IO_RESOURCES \
|
|
- (sizeof standard_io_resources / sizeof standard_io_resources[0])
|
|
-
|
|
#define IORESOURCE_RAM (IORESOURCE_BUSY | IORESOURCE_MEM)
|
|
|
|
struct resource data_resource = {
|
|
@@ -230,9 +210,6 @@ static struct resource adapter_rom_resou
|
|
.flags = IORESOURCE_ROM }
|
|
};
|
|
|
|
-#define ADAPTER_ROM_RESOURCES \
|
|
- (sizeof adapter_rom_resources / sizeof adapter_rom_resources[0])
|
|
-
|
|
static struct resource video_rom_resource = {
|
|
.name = "Video ROM",
|
|
.start = 0xc0000,
|
|
@@ -309,7 +286,8 @@ static void __init probe_roms(void)
|
|
}
|
|
|
|
/* check for adapter roms on 2k boundaries */
|
|
- for (i = 0; i < ADAPTER_ROM_RESOURCES && start < upper; start += 2048) {
|
|
+ for (i = 0; i < ARRAY_SIZE(adapter_rom_resources) && start < upper;
|
|
+ start += 2048) {
|
|
rom = isa_bus_to_virt(start);
|
|
if (!romsignature(rom))
|
|
continue;
|
|
@@ -329,187 +307,22 @@ static void __init probe_roms(void)
|
|
}
|
|
}
|
|
|
|
-/* Check for full argument with no trailing characters */
|
|
-static int fullarg(char *p, char *arg)
|
|
+#ifdef CONFIG_PROC_VMCORE
|
|
+/* elfcorehdr= specifies the location of elf core header
|
|
+ * stored by the crashed kernel. This option will be passed
|
|
+ * by kexec loader to the capture kernel.
|
|
+ */
|
|
+static int __init setup_elfcorehdr(char *arg)
|
|
{
|
|
- int l = strlen(arg);
|
|
- return !memcmp(p, arg, l) && (p[l] == 0 || isspace(p[l]));
|
|
+ char *end;
|
|
+ if (!arg)
|
|
+ return -EINVAL;
|
|
+ elfcorehdr_addr = memparse(arg, &end);
|
|
+ return end > arg ? 0 : -EINVAL;
|
|
}
|
|
-
|
|
-static __init void parse_cmdline_early (char ** cmdline_p)
|
|
-{
|
|
- char c = ' ', *to = command_line, *from = COMMAND_LINE;
|
|
- int len = 0;
|
|
- int userdef = 0;
|
|
-
|
|
- for (;;) {
|
|
- if (c != ' ')
|
|
- goto next_char;
|
|
-
|
|
-#ifdef CONFIG_SMP
|
|
- /*
|
|
- * If the BIOS enumerates physical processors before logical,
|
|
- * maxcpus=N at enumeration-time can be used to disable HT.
|
|
- */
|
|
- else if (!memcmp(from, "maxcpus=", 8)) {
|
|
- extern unsigned int maxcpus;
|
|
-
|
|
- maxcpus = simple_strtoul(from + 8, NULL, 0);
|
|
- }
|
|
-#endif
|
|
-#ifdef CONFIG_ACPI
|
|
- /* "acpi=off" disables both ACPI table parsing and interpreter init */
|
|
- if (fullarg(from,"acpi=off"))
|
|
- disable_acpi();
|
|
-
|
|
- if (fullarg(from, "acpi=force")) {
|
|
- /* add later when we do DMI horrors: */
|
|
- acpi_force = 1;
|
|
- acpi_disabled = 0;
|
|
- }
|
|
-
|
|
- /* acpi=ht just means: do ACPI MADT parsing
|
|
- at bootup, but don't enable the full ACPI interpreter */
|
|
- if (fullarg(from, "acpi=ht")) {
|
|
- if (!acpi_force)
|
|
- disable_acpi();
|
|
- acpi_ht = 1;
|
|
- }
|
|
- else if (fullarg(from, "pci=noacpi"))
|
|
- acpi_disable_pci();
|
|
- else if (fullarg(from, "acpi=noirq"))
|
|
- acpi_noirq_set();
|
|
-
|
|
- else if (fullarg(from, "acpi_sci=edge"))
|
|
- acpi_sci_flags.trigger = 1;
|
|
- else if (fullarg(from, "acpi_sci=level"))
|
|
- acpi_sci_flags.trigger = 3;
|
|
- else if (fullarg(from, "acpi_sci=high"))
|
|
- acpi_sci_flags.polarity = 1;
|
|
- else if (fullarg(from, "acpi_sci=low"))
|
|
- acpi_sci_flags.polarity = 3;
|
|
-
|
|
- /* acpi=strict disables out-of-spec workarounds */
|
|
- else if (fullarg(from, "acpi=strict")) {
|
|
- acpi_strict = 1;
|
|
- }
|
|
-#ifdef CONFIG_X86_IO_APIC
|
|
- else if (fullarg(from, "acpi_skip_timer_override"))
|
|
- acpi_skip_timer_override = 1;
|
|
-#endif
|
|
-#endif
|
|
-
|
|
-#ifndef CONFIG_XEN
|
|
- if (fullarg(from, "nolapic") || fullarg(from, "disableapic")) {
|
|
- clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
|
|
- disable_apic = 1;
|
|
- }
|
|
-
|
|
- if (fullarg(from, "noapic"))
|
|
- skip_ioapic_setup = 1;
|
|
-
|
|
- if (fullarg(from,"apic")) {
|
|
- skip_ioapic_setup = 0;
|
|
- ioapic_force = 1;
|
|
- }
|
|
-#endif
|
|
-
|
|
- if (!memcmp(from, "mem=", 4))
|
|
- parse_memopt(from+4, &from);
|
|
-
|
|
- if (!memcmp(from, "memmap=", 7)) {
|
|
- /* exactmap option is for used defined memory */
|
|
- if (!memcmp(from+7, "exactmap", 8)) {
|
|
-#ifdef CONFIG_CRASH_DUMP
|
|
- /* If we are doing a crash dump, we
|
|
- * still need to know the real mem
|
|
- * size before original memory map is
|
|
- * reset.
|
|
- */
|
|
- saved_max_pfn = e820_end_of_ram();
|
|
-#endif
|
|
- from += 8+7;
|
|
- end_pfn_map = 0;
|
|
- e820.nr_map = 0;
|
|
- userdef = 1;
|
|
- }
|
|
- else {
|
|
- parse_memmapopt(from+7, &from);
|
|
- userdef = 1;
|
|
- }
|
|
- }
|
|
-
|
|
-#ifdef CONFIG_NUMA
|
|
- if (!memcmp(from, "numa=", 5))
|
|
- numa_setup(from+5);
|
|
+early_param("elfcorehdr", setup_elfcorehdr);
|
|
#endif
|
|
|
|
- if (!memcmp(from,"iommu=",6)) {
|
|
- iommu_setup(from+6);
|
|
- }
|
|
-
|
|
- if (fullarg(from,"oops=panic"))
|
|
- panic_on_oops = 1;
|
|
-
|
|
- if (!memcmp(from, "noexec=", 7))
|
|
- nonx_setup(from + 7);
|
|
-
|
|
-#ifdef CONFIG_KEXEC
|
|
- /* crashkernel=size@addr specifies the location to reserve for
|
|
- * a crash kernel. By reserving this memory we guarantee
|
|
- * that linux never set's it up as a DMA target.
|
|
- * Useful for holding code to do something appropriate
|
|
- * after a kernel panic.
|
|
- */
|
|
- else if (!memcmp(from, "crashkernel=", 12)) {
|
|
-#ifndef CONFIG_XEN
|
|
- unsigned long size, base;
|
|
- size = memparse(from+12, &from);
|
|
- if (*from == '@') {
|
|
- base = memparse(from+1, &from);
|
|
- /* FIXME: Do I want a sanity check
|
|
- * to validate the memory range?
|
|
- */
|
|
- crashk_res.start = base;
|
|
- crashk_res.end = base + size - 1;
|
|
- }
|
|
-#else
|
|
- printk("Ignoring crashkernel command line, "
|
|
- "parameter will be supplied by xen\n");
|
|
-#endif
|
|
- }
|
|
-#endif
|
|
-
|
|
-#ifdef CONFIG_PROC_VMCORE
|
|
- /* elfcorehdr= specifies the location of elf core header
|
|
- * stored by the crashed kernel. This option will be passed
|
|
- * by kexec loader to the capture kernel.
|
|
- */
|
|
- else if(!memcmp(from, "elfcorehdr=", 11))
|
|
- elfcorehdr_addr = memparse(from+11, &from);
|
|
-#endif
|
|
-
|
|
-#if defined(CONFIG_HOTPLUG_CPU) && !defined(CONFIG_XEN)
|
|
- else if (!memcmp(from, "additional_cpus=", 16))
|
|
- setup_additional_cpus(from+16);
|
|
-#endif
|
|
-
|
|
- next_char:
|
|
- c = *(from++);
|
|
- if (!c)
|
|
- break;
|
|
- if (COMMAND_LINE_SIZE <= ++len)
|
|
- break;
|
|
- *(to++) = c;
|
|
- }
|
|
- if (userdef) {
|
|
- printk(KERN_INFO "user-defined physical RAM map:\n");
|
|
- e820_print_map("user");
|
|
- }
|
|
- *to = '\0';
|
|
- *cmdline_p = command_line;
|
|
-}
|
|
-
|
|
#ifndef CONFIG_NUMA
|
|
static void __init
|
|
contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
|
|
@@ -521,13 +334,13 @@ contig_initmem_init(unsigned long start_
|
|
if (bootmap == -1L)
|
|
panic("Cannot find bootmem map of size %ld\n",bootmap_size);
|
|
bootmap_size = init_bootmem(bootmap >> PAGE_SHIFT, end_pfn);
|
|
+ e820_register_active_regions(0, start_pfn, end_pfn);
|
|
#ifdef CONFIG_XEN
|
|
if (xen_start_info->nr_pages < end_pfn)
|
|
- e820_bootmem_free(NODE_DATA(0), 0,
|
|
- xen_start_info->nr_pages<<PAGE_SHIFT);
|
|
+ free_bootmem_with_active_regions(0, xen_start_info->nr_pages);
|
|
else
|
|
#endif
|
|
- e820_bootmem_free(NODE_DATA(0), 0, end_pfn << PAGE_SHIFT);
|
|
+ free_bootmem_with_active_regions(0, end_pfn);
|
|
reserve_bootmem(bootmap, bootmap_size);
|
|
}
|
|
#endif
|
|
@@ -589,6 +402,10 @@ static void discover_ebda(void)
|
|
void __init setup_arch(char **cmdline_p)
|
|
{
|
|
#ifdef CONFIG_XEN
|
|
+ extern struct e820map machine_e820;
|
|
+
|
|
+ printk(KERN_INFO "Command line: %s\n", saved_command_line);
|
|
+
|
|
/* Register a call for panic conditions. */
|
|
atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block);
|
|
|
|
@@ -614,6 +431,8 @@ void __init setup_arch(char **cmdline_p)
|
|
|
|
ARCH_SETUP
|
|
#else
|
|
+ printk(KERN_INFO "Command line: %s\n", saved_command_line);
|
|
+
|
|
ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV);
|
|
screen_info = SCREEN_INFO;
|
|
edid_info = EDID_INFO;
|
|
@@ -641,16 +460,22 @@ void __init setup_arch(char **cmdline_p)
|
|
data_resource.start = virt_to_phys(&_etext);
|
|
data_resource.end = virt_to_phys(&_edata)-1;
|
|
|
|
- parse_cmdline_early(cmdline_p);
|
|
-
|
|
early_identify_cpu(&boot_cpu_data);
|
|
|
|
+ strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE);
|
|
+ *cmdline_p = command_line;
|
|
+
|
|
+ parse_early_param();
|
|
+
|
|
+ finish_e820_parsing();
|
|
+
|
|
+ e820_register_active_regions(0, 0, -1UL);
|
|
/*
|
|
* partially used pages are not usable - thus
|
|
* we are rounding upwards:
|
|
*/
|
|
end_pfn = e820_end_of_ram();
|
|
- num_physpages = end_pfn; /* for pfn_valid */
|
|
+ num_physpages = end_pfn;
|
|
|
|
check_efer();
|
|
|
|
@@ -661,6 +486,14 @@ void __init setup_arch(char **cmdline_p)
|
|
if (is_initial_xendomain())
|
|
dmi_scan_machine();
|
|
|
|
+ /* How many end-of-memory variables you have, grandma! */
|
|
+ max_low_pfn = end_pfn;
|
|
+ max_pfn = end_pfn;
|
|
+ high_memory = (void *)__va(end_pfn * PAGE_SIZE - 1) + 1;
|
|
+
|
|
+ /* Remove active ranges so rediscovery with NUMA-awareness happens */
|
|
+ remove_all_active_ranges();
|
|
+
|
|
#ifdef CONFIG_ACPI_NUMA
|
|
/*
|
|
* Parse SRAT to discover nodes.
|
|
@@ -850,16 +683,16 @@ void __init setup_arch(char **cmdline_p)
|
|
BUG();
|
|
}
|
|
|
|
+#ifdef CONFIG_ACPI
|
|
if (!is_initial_xendomain()) {
|
|
acpi_disabled = 1;
|
|
-#ifdef CONFIG_ACPI
|
|
acpi_ht = 0;
|
|
-#endif
|
|
}
|
|
#endif
|
|
+#endif
|
|
|
|
-#ifndef CONFIG_XEN
|
|
- check_ioapic();
|
|
+#if defined(CONFIG_PCI) && !defined(CONFIG_XEN)
|
|
+ early_quirks();
|
|
#endif
|
|
|
|
zap_low_mappings(0);
|
|
@@ -909,6 +742,7 @@ void __init setup_arch(char **cmdline_p)
|
|
e820_reserve_resources(machine_e820.map, machine_e820.nr_map);
|
|
#else
|
|
e820_reserve_resources(e820.map, e820.nr_map);
|
|
+ e820_mark_nosave_regions();
|
|
#endif
|
|
|
|
request_resource(&iomem_resource, &video_ram_resource);
|
|
@@ -916,7 +750,7 @@ void __init setup_arch(char **cmdline_p)
|
|
{
|
|
unsigned i;
|
|
/* request I/O space for devices used on all i[345]86 PCs */
|
|
- for (i = 0; i < STANDARD_IO_RESOURCES; i++)
|
|
+ for (i = 0; i < ARRAY_SIZE(standard_io_resources); i++)
|
|
request_resource(&ioport_resource, &standard_io_resources[i]);
|
|
}
|
|
|
|
@@ -1100,7 +934,7 @@ static void __init amd_detect_cmp(struct
|
|
#endif
|
|
}
|
|
|
|
-static void __init init_amd(struct cpuinfo_x86 *c)
|
|
+static void __cpuinit init_amd(struct cpuinfo_x86 *c)
|
|
{
|
|
unsigned level;
|
|
|
|
@@ -1156,6 +990,12 @@ static void __init init_amd(struct cpuin
|
|
|
|
/* Fix cpuid4 emulation for more */
|
|
num_cache_leaves = 3;
|
|
+
|
|
+ /* When there is only one core no need to synchronize RDTSC */
|
|
+ if (num_possible_cpus() == 1)
|
|
+ set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
|
|
+ else
|
|
+ clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
|
|
}
|
|
|
|
static void __cpuinit detect_ht(struct cpuinfo_x86 *c)
|
|
@@ -1237,8 +1077,7 @@ static void srat_detect_node(void)
|
|
node = first_node(node_online_map);
|
|
numa_set_node(cpu, node);
|
|
|
|
- if (acpi_numa > 0)
|
|
- printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
|
|
+ printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
|
|
#endif
|
|
}
|
|
|
|
@@ -1272,6 +1111,8 @@ static void __cpuinit init_intel(struct
|
|
if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
|
|
(c->x86 == 0x6 && c->x86_model >= 0x0e))
|
|
set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
|
|
+ if (c->x86 == 6)
|
|
+ set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability);
|
|
set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability);
|
|
c->x86_max_cores = intel_num_cpu_cores(c);
|
|
|
|
@@ -1492,8 +1333,8 @@ static int show_cpuinfo(struct seq_file
|
|
|
|
/* Intel-defined (#2) */
|
|
"pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
|
|
- "tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL,
|
|
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
|
+ "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
|
|
+ NULL, NULL, "dca", NULL, NULL, NULL, NULL, NULL,
|
|
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
|
|
|
/* VIA/Cyrix/Centaur-defined */
|
|
--- head-2010-05-25.orig/arch/x86/kernel/setup64-xen.c 2010-03-24 15:06:08.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/kernel/setup64-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -31,7 +31,7 @@
|
|
#include <asm/hypervisor.h>
|
|
#endif
|
|
|
|
-char x86_boot_params[BOOT_PARAM_SIZE] __initdata = {0,};
|
|
+char x86_boot_params[BOOT_PARAM_SIZE] __initdata;
|
|
|
|
cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
|
|
|
|
@@ -55,8 +55,10 @@ Control non executable mappings for 64bi
|
|
on Enable(default)
|
|
off Disable
|
|
*/
|
|
-int __init nonx_setup(char *str)
|
|
+static int __init nonx_setup(char *str)
|
|
{
|
|
+ if (!str)
|
|
+ return -EINVAL;
|
|
if (!strncmp(str, "on", 2)) {
|
|
__supported_pte_mask |= _PAGE_NX;
|
|
do_not_nx = 0;
|
|
@@ -64,9 +66,9 @@ int __init nonx_setup(char *str)
|
|
do_not_nx = 1;
|
|
__supported_pte_mask &= ~_PAGE_NX;
|
|
}
|
|
- return 1;
|
|
+ return 0;
|
|
}
|
|
-__setup("noexec=", nonx_setup); /* parsed early actually */
|
|
+early_param("noexec", nonx_setup);
|
|
|
|
int force_personality32 = 0;
|
|
|
|
@@ -102,12 +104,9 @@ void __init setup_per_cpu_areas(void)
|
|
#endif
|
|
|
|
/* Copy section for each CPU (we discard the original) */
|
|
- size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES);
|
|
-#ifdef CONFIG_MODULES
|
|
- if (size < PERCPU_ENOUGH_ROOM)
|
|
- size = PERCPU_ENOUGH_ROOM;
|
|
-#endif
|
|
+ size = PERCPU_ENOUGH_ROOM;
|
|
|
|
+ printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n", size);
|
|
for_each_cpu_mask (i, cpu_possible_map) {
|
|
char *ptr;
|
|
|
|
@@ -169,7 +168,10 @@ void pda_init(int cpu)
|
|
/* Setup up data that may be needed in __get_free_pages early */
|
|
asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0));
|
|
#ifndef CONFIG_XEN
|
|
+ /* Memory clobbers used to order PDA accessed */
|
|
+ mb();
|
|
wrmsrl(MSR_GS_BASE, pda);
|
|
+ mb();
|
|
#else
|
|
if (HYPERVISOR_set_segment_base(SEGBASE_GS_KERNEL,
|
|
(unsigned long)pda))
|
|
@@ -302,28 +304,17 @@ void __cpuinit cpu_init (void)
|
|
* set up and load the per-CPU TSS
|
|
*/
|
|
for (v = 0; v < N_EXCEPTION_STACKS; v++) {
|
|
+ static const unsigned int order[N_EXCEPTION_STACKS] = {
|
|
+ [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
|
|
+ [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
|
|
+ };
|
|
if (cpu) {
|
|
- static const unsigned int order[N_EXCEPTION_STACKS] = {
|
|
- [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
|
|
- [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
|
|
- };
|
|
-
|
|
estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
|
|
if (!estacks)
|
|
panic("Cannot allocate exception stack %ld %d\n",
|
|
v, cpu);
|
|
}
|
|
- switch (v + 1) {
|
|
-#if DEBUG_STKSZ > EXCEPTION_STKSZ
|
|
- case DEBUG_STACK:
|
|
- cpu_pda(cpu)->debugstack = (unsigned long)estacks;
|
|
- estacks += DEBUG_STKSZ;
|
|
- break;
|
|
-#endif
|
|
- default:
|
|
- estacks += EXCEPTION_STKSZ;
|
|
- break;
|
|
- }
|
|
+ estacks += PAGE_SIZE << order[v];
|
|
orig_ist->ist[v] = t->ist[v] = (unsigned long)estacks;
|
|
}
|
|
|
|
--- head-2010-05-25.orig/arch/x86/kernel/smp_64-xen.c 2008-04-02 12:34:02.000000000 +0200
|
|
+++ head-2010-05-25/arch/x86/kernel/smp_64-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -381,9 +381,8 @@ int smp_call_function_single (int cpu, v
|
|
/* prevent preemption and reschedule on another processor */
|
|
int me = get_cpu();
|
|
if (cpu == me) {
|
|
- WARN_ON(1);
|
|
put_cpu();
|
|
- return -EBUSY;
|
|
+ return 0;
|
|
}
|
|
spin_lock_bh(&call_lock);
|
|
__smp_call_function_single(cpu, func, info, nonatomic, wait);
|
|
@@ -501,7 +500,7 @@ void smp_send_stop(void)
|
|
#ifndef CONFIG_XEN
|
|
asmlinkage void smp_reschedule_interrupt(void)
|
|
#else
|
|
-asmlinkage irqreturn_t smp_reschedule_interrupt(void)
|
|
+asmlinkage irqreturn_t smp_reschedule_interrupt(int irq, void *ctx)
|
|
#endif
|
|
{
|
|
#ifndef CONFIG_XEN
|
|
@@ -514,7 +513,7 @@ asmlinkage irqreturn_t smp_reschedule_in
|
|
#ifndef CONFIG_XEN
|
|
asmlinkage void smp_call_function_interrupt(void)
|
|
#else
|
|
-asmlinkage irqreturn_t smp_call_function_interrupt(void)
|
|
+asmlinkage irqreturn_t smp_call_function_interrupt(int irq, void *ctx)
|
|
#endif
|
|
{
|
|
void (*func) (void *info) = call_data->func;
|
|
@@ -545,31 +544,3 @@ asmlinkage irqreturn_t smp_call_function
|
|
return IRQ_HANDLED;
|
|
#endif
|
|
}
|
|
-
|
|
-int safe_smp_processor_id(void)
|
|
-{
|
|
-#ifdef CONFIG_XEN
|
|
- return smp_processor_id();
|
|
-#else
|
|
- unsigned apicid, i;
|
|
-
|
|
- if (disable_apic)
|
|
- return 0;
|
|
-
|
|
- apicid = hard_smp_processor_id();
|
|
- if (apicid < NR_CPUS && x86_cpu_to_apicid[apicid] == apicid)
|
|
- return apicid;
|
|
-
|
|
- for (i = 0; i < NR_CPUS; ++i) {
|
|
- if (x86_cpu_to_apicid[i] == apicid)
|
|
- return i;
|
|
- }
|
|
-
|
|
- /* No entries in x86_cpu_to_apicid? Either no MPS|ACPI,
|
|
- * or called too early. Either way, we must be CPU 0. */
|
|
- if (x86_cpu_to_apicid[0] == BAD_APICID)
|
|
- return 0;
|
|
-
|
|
- return 0; /* Should not happen */
|
|
-#endif
|
|
-}
|
|
--- head-2010-05-25.orig/arch/x86/kernel/traps_64-xen.c 2008-04-02 12:34:02.000000000 +0200
|
|
+++ head-2010-05-25/arch/x86/kernel/traps_64-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -23,6 +23,7 @@
|
|
#include <linux/delay.h>
|
|
#include <linux/spinlock.h>
|
|
#include <linux/interrupt.h>
|
|
+#include <linux/kallsyms.h>
|
|
#include <linux/module.h>
|
|
#include <linux/moduleparam.h>
|
|
#include <linux/nmi.h>
|
|
@@ -45,6 +46,7 @@
|
|
#include <asm/pda.h>
|
|
#include <asm/proto.h>
|
|
#include <asm/nmi.h>
|
|
+#include <asm/stacktrace.h>
|
|
|
|
asmlinkage void divide_error(void);
|
|
asmlinkage void debug(void);
|
|
@@ -114,7 +116,6 @@ static int call_trace = 1;
|
|
#endif
|
|
|
|
#ifdef CONFIG_KALLSYMS
|
|
-# include <linux/kallsyms.h>
|
|
void printk_address(unsigned long address)
|
|
{
|
|
unsigned long offset = 0, symsize;
|
|
@@ -142,7 +143,7 @@ void printk_address(unsigned long addres
|
|
#endif
|
|
|
|
static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
|
|
- unsigned *usedp, const char **idp)
|
|
+ unsigned *usedp, char **idp)
|
|
{
|
|
#ifndef CONFIG_X86_NO_TSS
|
|
static char ids[][8] = {
|
|
@@ -162,26 +163,7 @@ static unsigned long *in_exception_stack
|
|
* 'stack' is in one of them:
|
|
*/
|
|
for (k = 0; k < N_EXCEPTION_STACKS; k++) {
|
|
- unsigned long end;
|
|
-
|
|
- /*
|
|
- * set 'end' to the end of the exception stack.
|
|
- */
|
|
- switch (k + 1) {
|
|
- /*
|
|
- * TODO: this block is not needed i think, because
|
|
- * setup64.c:cpu_init() sets up t->ist[DEBUG_STACK]
|
|
- * properly too.
|
|
- */
|
|
-#if DEBUG_STKSZ > EXCEPTION_STKSZ
|
|
- case DEBUG_STACK:
|
|
- end = cpu_pda(cpu)->debugstack + DEBUG_STKSZ;
|
|
- break;
|
|
-#endif
|
|
- default:
|
|
- end = per_cpu(orig_ist, cpu).ist[k];
|
|
- break;
|
|
- }
|
|
+ unsigned long end = per_cpu(orig_ist, cpu).ist[k];
|
|
/*
|
|
* Is 'stack' above this exception frame's end?
|
|
* If yes then skip to the next frame.
|
|
@@ -236,13 +218,19 @@ static unsigned long *in_exception_stack
|
|
return NULL;
|
|
}
|
|
|
|
-static int show_trace_unwind(struct unwind_frame_info *info, void *context)
|
|
+struct ops_and_data {
|
|
+ struct stacktrace_ops *ops;
|
|
+ void *data;
|
|
+};
|
|
+
|
|
+static int dump_trace_unwind(struct unwind_frame_info *info, void *context)
|
|
{
|
|
+ struct ops_and_data *oad = (struct ops_and_data *)context;
|
|
int n = 0;
|
|
|
|
while (unwind(info) == 0 && UNW_PC(info)) {
|
|
n++;
|
|
- printk_address(UNW_PC(info));
|
|
+ oad->ops->address(oad->data, UNW_PC(info));
|
|
if (arch_unw_user_mode(info))
|
|
break;
|
|
}
|
|
@@ -256,13 +244,19 @@ static int show_trace_unwind(struct unwi
|
|
* severe exception (double fault, nmi, stack fault, debug, mce) hardware stack
|
|
*/
|
|
|
|
-void show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack)
|
|
+static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
|
|
{
|
|
- const unsigned cpu = safe_smp_processor_id();
|
|
+ void *t = (void *)tinfo;
|
|
+ return p > t && p < t + THREAD_SIZE - 3;
|
|
+}
|
|
+
|
|
+void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long * stack,
|
|
+ struct stacktrace_ops *ops, void *data)
|
|
+{
|
|
+ const unsigned cpu = smp_processor_id();
|
|
unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr;
|
|
unsigned used = 0;
|
|
-
|
|
- printk("\nCall Trace:\n");
|
|
+ struct thread_info *tinfo;
|
|
|
|
if (!tsk)
|
|
tsk = current;
|
|
@@ -270,32 +264,47 @@ void show_trace(struct task_struct *tsk,
|
|
if (call_trace >= 0) {
|
|
int unw_ret = 0;
|
|
struct unwind_frame_info info;
|
|
+ struct ops_and_data oad = { .ops = ops, .data = data };
|
|
|
|
if (regs) {
|
|
if (unwind_init_frame_info(&info, tsk, regs) == 0)
|
|
- unw_ret = show_trace_unwind(&info, NULL);
|
|
+ unw_ret = dump_trace_unwind(&info, &oad);
|
|
} else if (tsk == current)
|
|
- unw_ret = unwind_init_running(&info, show_trace_unwind, NULL);
|
|
+ unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad);
|
|
else {
|
|
if (unwind_init_blocked(&info, tsk) == 0)
|
|
- unw_ret = show_trace_unwind(&info, NULL);
|
|
+ unw_ret = dump_trace_unwind(&info, &oad);
|
|
}
|
|
if (unw_ret > 0) {
|
|
if (call_trace == 1 && !arch_unw_user_mode(&info)) {
|
|
- print_symbol("DWARF2 unwinder stuck at %s\n",
|
|
+ ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n",
|
|
UNW_PC(&info));
|
|
if ((long)UNW_SP(&info) < 0) {
|
|
- printk("Leftover inexact backtrace:\n");
|
|
+ ops->warning(data, "Leftover inexact backtrace:\n");
|
|
stack = (unsigned long *)UNW_SP(&info);
|
|
+ if (!stack)
|
|
+ return;
|
|
} else
|
|
- printk("Full inexact backtrace again:\n");
|
|
+ ops->warning(data, "Full inexact backtrace again:\n");
|
|
} else if (call_trace >= 1)
|
|
return;
|
|
else
|
|
- printk("Full inexact backtrace again:\n");
|
|
+ ops->warning(data, "Full inexact backtrace again:\n");
|
|
} else
|
|
- printk("Inexact backtrace:\n");
|
|
+ ops->warning(data, "Inexact backtrace:\n");
|
|
}
|
|
+ if (!stack) {
|
|
+ unsigned long dummy;
|
|
+ stack = &dummy;
|
|
+ if (tsk && tsk != current)
|
|
+ stack = (unsigned long *)tsk->thread.rsp;
|
|
+ }
|
|
+ /*
|
|
+ * Align the stack pointer on word boundary, later loops
|
|
+ * rely on that (and corruption / debug info bugs can cause
|
|
+ * unaligned values here):
|
|
+ */
|
|
+ stack = (unsigned long *)((unsigned long)stack & ~(sizeof(long)-1));
|
|
|
|
/*
|
|
* Print function call entries within a stack. 'cond' is the
|
|
@@ -305,7 +314,9 @@ void show_trace(struct task_struct *tsk,
|
|
#define HANDLE_STACK(cond) \
|
|
do while (cond) { \
|
|
unsigned long addr = *stack++; \
|
|
- if (kernel_text_address(addr)) { \
|
|
+ if (oops_in_progress ? \
|
|
+ __kernel_text_address(addr) : \
|
|
+ kernel_text_address(addr)) { \
|
|
/* \
|
|
* If the address is either in the text segment of the \
|
|
* kernel, or in the region which contains vmalloc'ed \
|
|
@@ -314,7 +325,7 @@ void show_trace(struct task_struct *tsk,
|
|
* down the cause of the crash will be able to figure \
|
|
* out the call path that was taken. \
|
|
*/ \
|
|
- printk_address(addr); \
|
|
+ ops->address(data, addr); \
|
|
} \
|
|
} while (0)
|
|
|
|
@@ -323,16 +334,17 @@ void show_trace(struct task_struct *tsk,
|
|
* current stack address. If the stacks consist of nested
|
|
* exceptions
|
|
*/
|
|
- for ( ; ; ) {
|
|
- const char *id;
|
|
+ for (;;) {
|
|
+ char *id;
|
|
unsigned long *estack_end;
|
|
estack_end = in_exception_stack(cpu, (unsigned long)stack,
|
|
&used, &id);
|
|
|
|
if (estack_end) {
|
|
- printk(" <%s>", id);
|
|
+ if (ops->stack(data, id) < 0)
|
|
+ break;
|
|
HANDLE_STACK (stack < estack_end);
|
|
- printk(" <EOE>");
|
|
+ ops->stack(data, "<EOE>");
|
|
/*
|
|
* We link to the next stack via the
|
|
* second-to-last pointer (index -2 to end) in the
|
|
@@ -347,7 +359,8 @@ void show_trace(struct task_struct *tsk,
|
|
(IRQSTACKSIZE - 64) / sizeof(*irqstack);
|
|
|
|
if (stack >= irqstack && stack < irqstack_end) {
|
|
- printk(" <IRQ>");
|
|
+ if (ops->stack(data, "IRQ") < 0)
|
|
+ break;
|
|
HANDLE_STACK (stack < irqstack_end);
|
|
/*
|
|
* We link to the next stack (which would be
|
|
@@ -356,7 +369,7 @@ void show_trace(struct task_struct *tsk,
|
|
*/
|
|
stack = (unsigned long *) (irqstack_end[-1]);
|
|
irqstack_end = NULL;
|
|
- printk(" <EOI>");
|
|
+ ops->stack(data, "EOI");
|
|
continue;
|
|
}
|
|
}
|
|
@@ -364,19 +377,58 @@ void show_trace(struct task_struct *tsk,
|
|
}
|
|
|
|
/*
|
|
- * This prints the process stack:
|
|
+ * This handles the process stack:
|
|
*/
|
|
- HANDLE_STACK (((long) stack & (THREAD_SIZE-1)) != 0);
|
|
+ tinfo = current_thread_info();
|
|
+ HANDLE_STACK (valid_stack_ptr(tinfo, stack));
|
|
#undef HANDLE_STACK
|
|
+}
|
|
+EXPORT_SYMBOL(dump_trace);
|
|
|
|
+static void
|
|
+print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
|
|
+{
|
|
+ print_symbol(msg, symbol);
|
|
printk("\n");
|
|
}
|
|
|
|
-static void _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long * rsp)
|
|
+static void print_trace_warning(void *data, char *msg)
|
|
+{
|
|
+ printk("%s\n", msg);
|
|
+}
|
|
+
|
|
+static int print_trace_stack(void *data, char *name)
|
|
+{
|
|
+ printk(" <%s> ", name);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static void print_trace_address(void *data, unsigned long addr)
|
|
+{
|
|
+ printk_address(addr);
|
|
+}
|
|
+
|
|
+static struct stacktrace_ops print_trace_ops = {
|
|
+ .warning = print_trace_warning,
|
|
+ .warning_symbol = print_trace_warning_symbol,
|
|
+ .stack = print_trace_stack,
|
|
+ .address = print_trace_address,
|
|
+};
|
|
+
|
|
+void
|
|
+show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack)
|
|
+{
|
|
+ printk("\nCall Trace:\n");
|
|
+ dump_trace(tsk, regs, stack, &print_trace_ops, NULL);
|
|
+ printk("\n");
|
|
+}
|
|
+
|
|
+static void
|
|
+_show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *rsp)
|
|
{
|
|
unsigned long *stack;
|
|
int i;
|
|
- const int cpu = safe_smp_processor_id();
|
|
+ const int cpu = smp_processor_id();
|
|
unsigned long *irqstack_end = (unsigned long *) (cpu_pda(cpu)->irqstackptr);
|
|
unsigned long *irqstack = (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE);
|
|
|
|
@@ -430,7 +482,7 @@ void show_registers(struct pt_regs *regs
|
|
int i;
|
|
int in_kernel = !user_mode(regs);
|
|
unsigned long rsp;
|
|
- const int cpu = safe_smp_processor_id();
|
|
+ const int cpu = smp_processor_id();
|
|
struct task_struct *cur = cpu_pda(cpu)->pcurrent;
|
|
|
|
rsp = regs->rsp;
|
|
@@ -505,9 +557,11 @@ static unsigned int die_nest_count;
|
|
|
|
unsigned __kprobes long oops_begin(void)
|
|
{
|
|
- int cpu = safe_smp_processor_id();
|
|
+ int cpu = smp_processor_id();
|
|
unsigned long flags;
|
|
|
|
+ oops_enter();
|
|
+
|
|
/* racy, but better than risking deadlock. */
|
|
local_irq_save(flags);
|
|
if (!spin_trylock(&die_lock)) {
|
|
@@ -536,6 +590,7 @@ void __kprobes oops_end(unsigned long fl
|
|
spin_unlock_irqrestore(&die_lock, flags);
|
|
if (panic_on_oops)
|
|
panic("Fatal exception");
|
|
+ oops_exit();
|
|
}
|
|
|
|
void __kprobes __die(const char * str, struct pt_regs * regs, long err)
|
|
@@ -572,8 +627,8 @@ void die(const char * str, struct pt_reg
|
|
do_exit(SIGSEGV);
|
|
}
|
|
|
|
-#ifdef CONFIG_X86_LOCAL_APIC
|
|
-void __kprobes die_nmi(char *str, struct pt_regs *regs)
|
|
+#if defined(CONFIG_X86_LOCAL_APIC) || defined(CONFIG_SYSCTL)
|
|
+void __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic)
|
|
{
|
|
unsigned long flags = oops_begin();
|
|
|
|
@@ -581,13 +636,12 @@ void __kprobes die_nmi(char *str, struct
|
|
* We are in trouble anyway, lets at least try
|
|
* to get a message out.
|
|
*/
|
|
- printk(str, safe_smp_processor_id());
|
|
+ printk(str, smp_processor_id());
|
|
show_registers(regs);
|
|
if (kexec_should_crash(current))
|
|
crash_kexec(regs);
|
|
- if (panic_on_timeout || panic_on_oops)
|
|
- panic("nmi watchdog");
|
|
- printk("console shuts up ...\n");
|
|
+ if (do_panic || panic_on_oops)
|
|
+ panic("Non maskable interrupt");
|
|
oops_end(flags);
|
|
nmi_exit();
|
|
local_irq_enable();
|
|
@@ -734,8 +788,15 @@ asmlinkage void __kprobes do_general_pro
|
|
static __kprobes void
|
|
mem_parity_error(unsigned char reason, struct pt_regs * regs)
|
|
{
|
|
- printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n");
|
|
- printk("You probably have a hardware problem with your RAM chips\n");
|
|
+ printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
|
|
+ reason);
|
|
+ printk(KERN_EMERG "You probably have a hardware problem with your "
|
|
+ "RAM chips\n");
|
|
+
|
|
+ if (panic_on_unrecovered_nmi)
|
|
+ panic("NMI: Not continuing");
|
|
+
|
|
+ printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
|
|
|
|
#if 0 /* XEN */
|
|
/* Clear and disable the memory parity error line. */
|
|
@@ -762,9 +823,15 @@ io_check_error(unsigned char reason, str
|
|
|
|
static __kprobes void
|
|
unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
|
|
-{ printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
|
|
- printk("Dazed and confused, but trying to continue\n");
|
|
- printk("Do you have a strange power saving mode enabled?\n");
|
|
+{
|
|
+ printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
|
|
+ reason);
|
|
+ printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
|
|
+
|
|
+ if (panic_on_unrecovered_nmi)
|
|
+ panic("NMI: Not continuing");
|
|
+
|
|
+ printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
|
|
}
|
|
|
|
/* Runs on IST stack. This code must keep interrupts off all the time.
|
|
@@ -789,12 +856,12 @@ asmlinkage __kprobes void default_do_nmi
|
|
* Ok, so this is none of the documented NMI sources,
|
|
* so it must be the NMI watchdog.
|
|
*/
|
|
- if (nmi_watchdog > 0) {
|
|
- nmi_watchdog_tick(regs,reason);
|
|
+ if (nmi_watchdog_tick(regs,reason))
|
|
return;
|
|
- }
|
|
#endif
|
|
- unknown_nmi_error(reason, regs);
|
|
+ if (!do_nmi_callback(regs,cpu))
|
|
+ unknown_nmi_error(reason, regs);
|
|
+
|
|
return;
|
|
}
|
|
if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
|
|
@@ -1081,6 +1148,7 @@ asmlinkage void math_state_restore(void)
|
|
init_fpu(me);
|
|
restore_fpu_checking(&me->thread.i387.fxsave);
|
|
task_thread_info(me)->status |= TS_USEDFPU;
|
|
+ me->fpu_counter++;
|
|
}
|
|
|
|
|
|
@@ -1141,24 +1209,30 @@ void __cpuinit smp_trap_init(trap_info_t
|
|
}
|
|
|
|
|
|
-/* Actual parsing is done early in setup.c. */
|
|
-static int __init oops_dummy(char *s)
|
|
+static int __init oops_setup(char *s)
|
|
{
|
|
- panic_on_oops = 1;
|
|
- return 1;
|
|
+ if (!s)
|
|
+ return -EINVAL;
|
|
+ if (!strcmp(s, "panic"))
|
|
+ panic_on_oops = 1;
|
|
+ return 0;
|
|
}
|
|
-__setup("oops=", oops_dummy);
|
|
+early_param("oops", oops_setup);
|
|
|
|
static int __init kstack_setup(char *s)
|
|
{
|
|
+ if (!s)
|
|
+ return -EINVAL;
|
|
kstack_depth_to_print = simple_strtoul(s,NULL,0);
|
|
- return 1;
|
|
+ return 0;
|
|
}
|
|
-__setup("kstack=", kstack_setup);
|
|
+early_param("kstack", kstack_setup);
|
|
|
|
#ifdef CONFIG_STACK_UNWIND
|
|
static int __init call_trace_setup(char *s)
|
|
{
|
|
+ if (!s)
|
|
+ return -EINVAL;
|
|
if (strcmp(s, "old") == 0)
|
|
call_trace = -1;
|
|
else if (strcmp(s, "both") == 0)
|
|
@@ -1167,7 +1241,7 @@ static int __init call_trace_setup(char
|
|
call_trace = 1;
|
|
else if (strcmp(s, "new") == 0)
|
|
call_trace = 2;
|
|
- return 1;
|
|
+ return 0;
|
|
}
|
|
-__setup("call_trace=", call_trace_setup);
|
|
+early_param("call_trace", call_trace_setup);
|
|
#endif
|
|
--- head-2010-05-25.orig/arch/x86/kernel/vsyscall_64-xen.c 2007-06-18 08:38:13.000000000 +0200
|
|
+++ head-2010-05-25/arch/x86/kernel/vsyscall_64-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -26,6 +26,10 @@
|
|
#include <linux/seqlock.h>
|
|
#include <linux/jiffies.h>
|
|
#include <linux/sysctl.h>
|
|
+#include <linux/getcpu.h>
|
|
+#include <linux/cpu.h>
|
|
+#include <linux/smp.h>
|
|
+#include <linux/notifier.h>
|
|
|
|
#include <asm/vsyscall.h>
|
|
#include <asm/pgtable.h>
|
|
@@ -33,11 +37,15 @@
|
|
#include <asm/fixmap.h>
|
|
#include <asm/errno.h>
|
|
#include <asm/io.h>
|
|
+#include <asm/segment.h>
|
|
+#include <asm/desc.h>
|
|
+#include <asm/topology.h>
|
|
|
|
#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr)))
|
|
|
|
int __sysctl_vsyscall __section_sysctl_vsyscall = 1;
|
|
seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED;
|
|
+int __vgetcpu_mode __section_vgetcpu_mode;
|
|
|
|
#include <asm/unistd.h>
|
|
|
|
@@ -61,8 +69,7 @@ static __always_inline void do_vgettimeo
|
|
sequence = read_seqbegin(&__xtime_lock);
|
|
|
|
sec = __xtime.tv_sec;
|
|
- usec = (__xtime.tv_nsec / 1000) +
|
|
- (__jiffies - __wall_jiffies) * (1000000 / HZ);
|
|
+ usec = __xtime.tv_nsec / 1000;
|
|
|
|
if (__vxtime.mode != VXTIME_HPET) {
|
|
t = get_cycles_sync();
|
|
@@ -72,7 +79,8 @@ static __always_inline void do_vgettimeo
|
|
__vxtime.tsc_quot) >> 32;
|
|
/* See comment in x86_64 do_gettimeofday. */
|
|
} else {
|
|
- usec += ((readl((void *)fix_to_virt(VSYSCALL_HPET) + 0xf0) -
|
|
+ usec += ((readl((void __iomem *)
|
|
+ fix_to_virt(VSYSCALL_HPET) + 0xf0) -
|
|
__vxtime.last) * __vxtime.quot) >> 32;
|
|
}
|
|
} while (read_seqretry(&__xtime_lock, sequence));
|
|
@@ -127,9 +135,46 @@ time_t __vsyscall(1) vtime(time_t *t)
|
|
return __xtime.tv_sec;
|
|
}
|
|
|
|
-long __vsyscall(2) venosys_0(void)
|
|
-{
|
|
- return -ENOSYS;
|
|
+/* Fast way to get current CPU and node.
|
|
+ This helps to do per node and per CPU caches in user space.
|
|
+ The result is not guaranteed without CPU affinity, but usually
|
|
+ works out because the scheduler tries to keep a thread on the same
|
|
+ CPU.
|
|
+
|
|
+ tcache must point to a two element sized long array.
|
|
+ All arguments can be NULL. */
|
|
+long __vsyscall(2)
|
|
+vgetcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *tcache)
|
|
+{
|
|
+ unsigned int dummy, p;
|
|
+ unsigned long j = 0;
|
|
+
|
|
+ /* Fast cache - only recompute value once per jiffies and avoid
|
|
+ relatively costly rdtscp/cpuid otherwise.
|
|
+ This works because the scheduler usually keeps the process
|
|
+ on the same CPU and this syscall doesn't guarantee its
|
|
+ results anyways.
|
|
+ We do this here because otherwise user space would do it on
|
|
+ its own in a likely inferior way (no access to jiffies).
|
|
+ If you don't like it pass NULL. */
|
|
+ if (tcache && tcache->blob[0] == (j = __jiffies)) {
|
|
+ p = tcache->blob[1];
|
|
+ } else if (__vgetcpu_mode == VGETCPU_RDTSCP) {
|
|
+ /* Load per CPU data from RDTSCP */
|
|
+ rdtscp(dummy, dummy, p);
|
|
+ } else {
|
|
+ /* Load per CPU data from GDT */
|
|
+ asm("lsl %1,%0" : "=r" (p) : "r" (__PER_CPU_SEG));
|
|
+ }
|
|
+ if (tcache) {
|
|
+ tcache->blob[0] = j;
|
|
+ tcache->blob[1] = p;
|
|
+ }
|
|
+ if (cpu)
|
|
+ *cpu = p & 0xfff;
|
|
+ if (node)
|
|
+ *node = p >> 12;
|
|
+ return 0;
|
|
}
|
|
|
|
long __vsyscall(3) venosys_1(void)
|
|
@@ -149,7 +194,8 @@ static int vsyscall_sysctl_change(ctl_ta
|
|
void __user *buffer, size_t *lenp, loff_t *ppos)
|
|
{
|
|
extern u16 vsysc1, vsysc2;
|
|
- u16 *map1, *map2;
|
|
+ u16 __iomem *map1;
|
|
+ u16 __iomem *map2;
|
|
int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
|
|
if (!write)
|
|
return ret;
|
|
@@ -164,11 +210,11 @@ static int vsyscall_sysctl_change(ctl_ta
|
|
goto out;
|
|
}
|
|
if (!sysctl_vsyscall) {
|
|
- *map1 = SYSCALL;
|
|
- *map2 = SYSCALL;
|
|
+ writew(SYSCALL, map1);
|
|
+ writew(SYSCALL, map2);
|
|
} else {
|
|
- *map1 = NOP2;
|
|
- *map2 = NOP2;
|
|
+ writew(NOP2, map1);
|
|
+ writew(NOP2, map2);
|
|
}
|
|
iounmap(map2);
|
|
out:
|
|
@@ -200,6 +246,48 @@ static ctl_table kernel_root_table2[] =
|
|
|
|
#endif
|
|
|
|
+/* Assume __initcall executes before all user space. Hopefully kmod
|
|
+ doesn't violate that. We'll find out if it does. */
|
|
+static void __cpuinit vsyscall_set_cpu(int cpu)
|
|
+{
|
|
+ unsigned long d;
|
|
+ unsigned long node = 0;
|
|
+#ifdef CONFIG_NUMA
|
|
+ node = cpu_to_node[cpu];
|
|
+#endif
|
|
+ if (cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP))
|
|
+ write_rdtscp_aux((node << 12) | cpu);
|
|
+
|
|
+ /* Store cpu number in limit so that it can be loaded quickly
|
|
+ in user space in vgetcpu.
|
|
+ 12 bits for the CPU and 8 bits for the node. */
|
|
+ d = 0x0f40000000000ULL;
|
|
+ d |= cpu;
|
|
+ d |= (node & 0xf) << 12;
|
|
+ d |= (node >> 4) << 48;
|
|
+ if (HYPERVISOR_update_descriptor(virt_to_machine(cpu_gdt(cpu)
|
|
+ + GDT_ENTRY_PER_CPU),
|
|
+ d))
|
|
+ BUG();
|
|
+}
|
|
+
|
|
+static void __cpuinit cpu_vsyscall_init(void *arg)
|
|
+{
|
|
+ /* preemption should be already off */
|
|
+ vsyscall_set_cpu(raw_smp_processor_id());
|
|
+}
|
|
+
|
|
+#ifdef CONFIG_HOTPLUG_CPU
|
|
+static int __cpuinit
|
|
+cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
|
|
+{
|
|
+ long cpu = (long)arg;
|
|
+ if (action == CPU_ONLINE)
|
|
+ smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1);
|
|
+ return NOTIFY_DONE;
|
|
+}
|
|
+#endif
|
|
+
|
|
static void __init map_vsyscall(void)
|
|
{
|
|
extern char __vsyscall_0;
|
|
@@ -214,13 +302,20 @@ static int __init vsyscall_init(void)
|
|
VSYSCALL_ADDR(__NR_vgettimeofday)));
|
|
BUG_ON((unsigned long) &vtime != VSYSCALL_ADDR(__NR_vtime));
|
|
BUG_ON((VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)));
|
|
+ BUG_ON((unsigned long) &vgetcpu != VSYSCALL_ADDR(__NR_vgetcpu));
|
|
map_vsyscall();
|
|
#ifdef CONFIG_XEN
|
|
sysctl_vsyscall = 0; /* disable vgettimeofay() */
|
|
+ if (boot_cpu_has(X86_FEATURE_RDTSCP))
|
|
+ vgetcpu_mode = VGETCPU_RDTSCP;
|
|
+ else
|
|
+ vgetcpu_mode = VGETCPU_LSL;
|
|
#endif
|
|
#ifdef CONFIG_SYSCTL
|
|
register_sysctl_table(kernel_root_table2, 0);
|
|
#endif
|
|
+ on_each_cpu(cpu_vsyscall_init, NULL, 0, 1);
|
|
+ hotcpu_notifier(cpu_vsyscall_notifier, 0);
|
|
return 0;
|
|
}
|
|
|
|
--- head-2010-05-25.orig/arch/x86/mm/fault_64-xen.c 2007-11-02 17:34:23.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/mm/fault_64-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -40,8 +40,7 @@
|
|
#define PF_RSVD (1<<3)
|
|
#define PF_INSTR (1<<4)
|
|
|
|
-#ifdef CONFIG_KPROBES
|
|
-ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
|
|
+static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain);
|
|
|
|
/* Hook to register for page fault notifications */
|
|
int register_page_fault_notifier(struct notifier_block *nb)
|
|
@@ -49,11 +48,13 @@ int register_page_fault_notifier(struct
|
|
vmalloc_sync_all();
|
|
return atomic_notifier_chain_register(¬ify_page_fault_chain, nb);
|
|
}
|
|
+EXPORT_SYMBOL_GPL(register_page_fault_notifier);
|
|
|
|
int unregister_page_fault_notifier(struct notifier_block *nb)
|
|
{
|
|
return atomic_notifier_chain_unregister(¬ify_page_fault_chain, nb);
|
|
}
|
|
+EXPORT_SYMBOL_GPL(unregister_page_fault_notifier);
|
|
|
|
static inline int notify_page_fault(enum die_val val, const char *str,
|
|
struct pt_regs *regs, long err, int trap, int sig)
|
|
@@ -67,13 +68,6 @@ static inline int notify_page_fault(enum
|
|
};
|
|
return atomic_notifier_call_chain(¬ify_page_fault_chain, val, &args);
|
|
}
|
|
-#else
|
|
-static inline int notify_page_fault(enum die_val val, const char *str,
|
|
- struct pt_regs *regs, long err, int trap, int sig)
|
|
-{
|
|
- return NOTIFY_DONE;
|
|
-}
|
|
-#endif
|
|
|
|
void bust_spinlocks(int yes)
|
|
{
|
|
@@ -102,7 +96,7 @@ void bust_spinlocks(int yes)
|
|
static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr,
|
|
unsigned long error_code)
|
|
{
|
|
- unsigned char *instr;
|
|
+ unsigned char __user *instr;
|
|
int scan_more = 1;
|
|
int prefetch = 0;
|
|
unsigned char *max_instr;
|
|
@@ -111,7 +105,7 @@ static noinline int is_prefetch(struct p
|
|
if (error_code & PF_INSTR)
|
|
return 0;
|
|
|
|
- instr = (unsigned char *)convert_rip_to_linear(current, regs);
|
|
+ instr = (unsigned char __user *)convert_rip_to_linear(current, regs);
|
|
max_instr = instr + 15;
|
|
|
|
if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
|
|
@@ -122,7 +116,7 @@ static noinline int is_prefetch(struct p
|
|
unsigned char instr_hi;
|
|
unsigned char instr_lo;
|
|
|
|
- if (__get_user(opcode, instr))
|
|
+ if (__get_user(opcode, (char __user *)instr))
|
|
break;
|
|
|
|
instr_hi = opcode & 0xf0;
|
|
@@ -160,7 +154,7 @@ static noinline int is_prefetch(struct p
|
|
case 0x00:
|
|
/* Prefetch instruction is 0x0F0D or 0x0F18 */
|
|
scan_more = 0;
|
|
- if (__get_user(opcode, instr))
|
|
+ if (__get_user(opcode, (char __user *)instr))
|
|
break;
|
|
prefetch = (instr_lo == 0xF) &&
|
|
(opcode == 0x0D || opcode == 0x18);
|
|
@@ -176,7 +170,7 @@ static noinline int is_prefetch(struct p
|
|
static int bad_address(void *p)
|
|
{
|
|
unsigned long dummy;
|
|
- return __get_user(dummy, (unsigned long *)p);
|
|
+ return __get_user(dummy, (unsigned long __user *)p);
|
|
}
|
|
|
|
void dump_pagetable(unsigned long address)
|
|
@@ -248,7 +242,7 @@ static int is_errata93(struct pt_regs *r
|
|
|
|
int unhandled_signal(struct task_struct *tsk, int sig)
|
|
{
|
|
- if (tsk->pid == 1)
|
|
+ if (is_init(tsk))
|
|
return 1;
|
|
if (tsk->ptrace & PT_PTRACED)
|
|
return 0;
|
|
@@ -300,7 +294,7 @@ static int vmalloc_fault(unsigned long a
|
|
if (pgd_none(*pgd))
|
|
set_pgd(pgd, *pgd_ref);
|
|
else
|
|
- BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref));
|
|
+ BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
|
|
|
|
/* Below here mismatches are bugs because these lower tables
|
|
are shared */
|
|
@@ -309,7 +303,7 @@ static int vmalloc_fault(unsigned long a
|
|
pud_ref = pud_offset(pgd_ref, address);
|
|
if (pud_none(*pud_ref))
|
|
return -1;
|
|
- if (pud_none(*pud) || pud_page(*pud) != pud_page(*pud_ref))
|
|
+ if (pud_none(*pud) || pud_page_vaddr(*pud) != pud_page_vaddr(*pud_ref))
|
|
BUG();
|
|
pmd = pmd_offset(pud, address);
|
|
pmd_ref = pmd_offset(pud_ref, address);
|
|
@@ -531,7 +525,7 @@ good_area:
|
|
case PF_PROT: /* read, present */
|
|
goto bad_area;
|
|
case 0: /* read, not present */
|
|
- if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
|
|
+ if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
|
|
goto bad_area;
|
|
}
|
|
|
|
@@ -647,7 +641,7 @@ no_context:
|
|
*/
|
|
out_of_memory:
|
|
up_read(&mm->mmap_sem);
|
|
- if (current->pid == 1) {
|
|
+ if (is_init(current)) {
|
|
yield();
|
|
goto again;
|
|
}
|
|
@@ -702,7 +696,7 @@ void vmalloc_sync_all(void)
|
|
if (pgd_none(*pgd))
|
|
set_pgd(pgd, *pgd_ref);
|
|
else
|
|
- BUG_ON(pgd_page(*pgd) != pgd_page(*pgd_ref));
|
|
+ BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref));
|
|
}
|
|
spin_unlock(&pgd_lock);
|
|
set_bit(pgd_index(address), insync);
|
|
--- head-2010-05-25.orig/arch/x86/mm/init_64-xen.c 2010-04-29 09:34:47.000000000 +0200
|
|
+++ head-2010-05-25/arch/x86/mm/init_64-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -61,8 +61,6 @@ EXPORT_SYMBOL(__kernel_page_user);
|
|
|
|
int after_bootmem;
|
|
|
|
-static unsigned long dma_reserve __initdata;
|
|
-
|
|
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
|
|
extern unsigned long start_pfn;
|
|
|
|
@@ -420,7 +418,6 @@ __init void *early_ioremap(unsigned long
|
|
|
|
/* actually usually some more */
|
|
if (size >= LARGE_PAGE_SIZE) {
|
|
- printk("SMBIOS area too long %lu\n", size);
|
|
return NULL;
|
|
}
|
|
set_pmd(temp_mappings[0].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE));
|
|
@@ -442,16 +439,24 @@ __init void early_iounmap(void *addr, un
|
|
#endif
|
|
|
|
static void __meminit
|
|
-phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end)
|
|
+phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end)
|
|
{
|
|
- int i, k;
|
|
+ int i = pmd_index(address);
|
|
|
|
- for (i = 0; i < PTRS_PER_PMD; pmd++, i++) {
|
|
+ for (; i < PTRS_PER_PMD; i++) {
|
|
unsigned long pte_phys;
|
|
+ pmd_t *pmd = pmd_page + i;
|
|
pte_t *pte, *pte_save;
|
|
+ int k;
|
|
|
|
if (address >= end)
|
|
break;
|
|
+
|
|
+ if (__pmd_val(*pmd)) {
|
|
+ address += PMD_SIZE;
|
|
+ continue;
|
|
+ }
|
|
+
|
|
pte = alloc_static_page(&pte_phys);
|
|
pte_save = pte;
|
|
for (k = 0; k < PTRS_PER_PTE; pte++, k++, address += PTE_SIZE) {
|
|
@@ -478,40 +483,35 @@ phys_pmd_init(pmd_t *pmd, unsigned long
|
|
static void __meminit
|
|
phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end)
|
|
{
|
|
- pmd_t *pmd = pmd_offset(pud, (unsigned long)__va(address));
|
|
-
|
|
- if (pmd_none(*pmd)) {
|
|
- spin_lock(&init_mm.page_table_lock);
|
|
- phys_pmd_init(pmd, address, end);
|
|
- spin_unlock(&init_mm.page_table_lock);
|
|
- __flush_tlb_all();
|
|
- }
|
|
+ pmd_t *pmd = pmd_offset(pud,0);
|
|
+ spin_lock(&init_mm.page_table_lock);
|
|
+ phys_pmd_init(pmd, address, end);
|
|
+ spin_unlock(&init_mm.page_table_lock);
|
|
+ __flush_tlb_all();
|
|
}
|
|
|
|
-static void __meminit phys_pud_init(pud_t *pud, unsigned long address, unsigned long end)
|
|
+static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end)
|
|
{
|
|
- long i = pud_index(address);
|
|
-
|
|
- pud = pud + i;
|
|
-
|
|
- if (after_bootmem && pud_val(*pud)) {
|
|
- phys_pmd_update(pud, address, end);
|
|
- return;
|
|
- }
|
|
+ int i = pud_index(addr);
|
|
|
|
- for (; i < PTRS_PER_PUD; pud++, i++) {
|
|
- unsigned long paddr, pmd_phys;
|
|
+ for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) {
|
|
+ unsigned long pmd_phys;
|
|
+ pud_t *pud = pud_page + pud_index(addr);
|
|
pmd_t *pmd;
|
|
|
|
- paddr = (address & PGDIR_MASK) + i*PUD_SIZE;
|
|
- if (paddr >= end)
|
|
+ if (addr >= end)
|
|
break;
|
|
|
|
+ if (__pud_val(*pud)) {
|
|
+ phys_pmd_update(pud, addr, end);
|
|
+ continue;
|
|
+ }
|
|
+
|
|
pmd = alloc_static_page(&pmd_phys);
|
|
|
|
spin_lock(&init_mm.page_table_lock);
|
|
*pud = __pud(pmd_phys | _KERNPG_TABLE);
|
|
- phys_pmd_init(pmd, paddr, end);
|
|
+ phys_pmd_init(pmd, addr, end);
|
|
spin_unlock(&init_mm.page_table_lock);
|
|
|
|
early_make_page_readonly(pmd, XENFEAT_writable_page_tables);
|
|
@@ -800,69 +800,18 @@ void __cpuinit zap_low_mappings(int cpu)
|
|
#endif
|
|
}
|
|
|
|
-/* Compute zone sizes for the DMA and DMA32 zones in a node. */
|
|
-__init void
|
|
-size_zones(unsigned long *z, unsigned long *h,
|
|
- unsigned long start_pfn, unsigned long end_pfn)
|
|
-{
|
|
- int i;
|
|
- unsigned long w;
|
|
-
|
|
- for (i = 0; i < MAX_NR_ZONES; i++)
|
|
- z[i] = 0;
|
|
-
|
|
- if (start_pfn < MAX_DMA_PFN)
|
|
- z[ZONE_DMA] = MAX_DMA_PFN - start_pfn;
|
|
- if (start_pfn < MAX_DMA32_PFN) {
|
|
- unsigned long dma32_pfn = MAX_DMA32_PFN;
|
|
- if (dma32_pfn > end_pfn)
|
|
- dma32_pfn = end_pfn;
|
|
- z[ZONE_DMA32] = dma32_pfn - start_pfn;
|
|
- }
|
|
- z[ZONE_NORMAL] = end_pfn - start_pfn;
|
|
-
|
|
- /* Remove lower zones from higher ones. */
|
|
- w = 0;
|
|
- for (i = 0; i < MAX_NR_ZONES; i++) {
|
|
- if (z[i])
|
|
- z[i] -= w;
|
|
- w += z[i];
|
|
- }
|
|
-
|
|
- /* Compute holes */
|
|
- w = start_pfn;
|
|
- for (i = 0; i < MAX_NR_ZONES; i++) {
|
|
- unsigned long s = w;
|
|
- w += z[i];
|
|
- h[i] = e820_hole_size(s, w);
|
|
- }
|
|
-
|
|
- /* Add the space pace needed for mem_map to the holes too. */
|
|
- for (i = 0; i < MAX_NR_ZONES; i++)
|
|
- h[i] += (z[i] * sizeof(struct page)) / PAGE_SIZE;
|
|
-
|
|
- /* The 16MB DMA zone has the kernel and other misc mappings.
|
|
- Account them too */
|
|
- if (h[ZONE_DMA]) {
|
|
- h[ZONE_DMA] += dma_reserve;
|
|
- if (h[ZONE_DMA] >= z[ZONE_DMA]) {
|
|
- printk(KERN_WARNING
|
|
- "Kernel too large and filling up ZONE_DMA?\n");
|
|
- h[ZONE_DMA] = z[ZONE_DMA];
|
|
- }
|
|
- }
|
|
-}
|
|
-
|
|
#ifndef CONFIG_NUMA
|
|
void __init paging_init(void)
|
|
{
|
|
- unsigned long zones[MAX_NR_ZONES], holes[MAX_NR_ZONES];
|
|
+ unsigned long max_zone_pfns[MAX_NR_ZONES];
|
|
+ memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
|
|
+ max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN;
|
|
+ max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN;
|
|
+ max_zone_pfns[ZONE_NORMAL] = end_pfn;
|
|
|
|
memory_present(0, 0, end_pfn);
|
|
sparse_init();
|
|
- size_zones(zones, holes, 0, end_pfn);
|
|
- free_area_init_node(0, NODE_DATA(0), zones,
|
|
- __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes);
|
|
+ free_area_init_nodes(max_zone_pfns);
|
|
|
|
init_mm.context.pinned = 1;
|
|
}
|
|
@@ -916,36 +865,23 @@ void online_page(struct page *page)
|
|
|
|
#ifdef CONFIG_MEMORY_HOTPLUG
|
|
/*
|
|
- * XXX: memory_add_physaddr_to_nid() is to find node id from physical address
|
|
- * via probe interface of sysfs. If acpi notifies hot-add event, then it
|
|
- * can tell node id by searching dsdt. But, probe interface doesn't have
|
|
- * node id. So, return 0 as node id at this time.
|
|
- */
|
|
-#ifdef CONFIG_NUMA
|
|
-int memory_add_physaddr_to_nid(u64 start)
|
|
-{
|
|
- return 0;
|
|
-}
|
|
-#endif
|
|
-
|
|
-/*
|
|
* Memory is added always to NORMAL zone. This means you will never get
|
|
* additional DMA/DMA32 memory.
|
|
*/
|
|
int arch_add_memory(int nid, u64 start, u64 size)
|
|
{
|
|
struct pglist_data *pgdat = NODE_DATA(nid);
|
|
- struct zone *zone = pgdat->node_zones + MAX_NR_ZONES-2;
|
|
+ struct zone *zone = pgdat->node_zones + ZONE_NORMAL;
|
|
unsigned long start_pfn = start >> PAGE_SHIFT;
|
|
unsigned long nr_pages = size >> PAGE_SHIFT;
|
|
int ret;
|
|
|
|
+ init_memory_mapping(start, (start + size -1));
|
|
+
|
|
ret = __add_pages(zone, start_pfn, nr_pages);
|
|
if (ret)
|
|
goto error;
|
|
|
|
- init_memory_mapping(start, (start + size -1));
|
|
-
|
|
return ret;
|
|
error:
|
|
printk("%s: Problem encountered in __add_pages!\n", __func__);
|
|
@@ -959,7 +895,17 @@ int remove_memory(u64 start, u64 size)
|
|
}
|
|
EXPORT_SYMBOL_GPL(remove_memory);
|
|
|
|
-#else /* CONFIG_MEMORY_HOTPLUG */
|
|
+#if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA)
|
|
+int memory_add_physaddr_to_nid(u64 start)
|
|
+{
|
|
+ return 0;
|
|
+}
|
|
+EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
|
|
+#endif
|
|
+
|
|
+#endif /* CONFIG_MEMORY_HOTPLUG */
|
|
+
|
|
+#ifdef CONFIG_MEMORY_HOTPLUG_RESERVE
|
|
/*
|
|
* Memory Hotadd without sparsemem. The mem_maps have been allocated in advance,
|
|
* just online the pages.
|
|
@@ -985,7 +931,7 @@ int __add_pages(struct zone *z, unsigned
|
|
}
|
|
return err;
|
|
}
|
|
-#endif /* CONFIG_MEMORY_HOTPLUG */
|
|
+#endif
|
|
|
|
static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules,
|
|
kcore_vsyscall;
|
|
@@ -997,12 +943,6 @@ void __init mem_init(void)
|
|
|
|
pci_iommu_alloc();
|
|
|
|
- /* How many end-of-memory variables you have, grandma! */
|
|
- max_low_pfn = end_pfn;
|
|
- max_pfn = end_pfn;
|
|
- num_physpages = end_pfn;
|
|
- high_memory = (void *) __va(end_pfn * PAGE_SIZE);
|
|
-
|
|
/* clear the zero-page */
|
|
memset(empty_zero_page, 0, PAGE_SIZE);
|
|
|
|
@@ -1014,13 +954,13 @@ void __init mem_init(void)
|
|
#else
|
|
totalram_pages = free_all_bootmem();
|
|
#endif
|
|
- /* XEN: init and count pages outside initial allocation. */
|
|
+ /* XEN: init pages outside initial allocation. */
|
|
for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
|
|
ClearPageReserved(pfn_to_page(pfn));
|
|
init_page_count(pfn_to_page(pfn));
|
|
- totalram_pages++;
|
|
}
|
|
- reservedpages = end_pfn - totalram_pages - e820_hole_size(0, end_pfn);
|
|
+ reservedpages = end_pfn - totalram_pages -
|
|
+ absent_pages_in_range(0, end_pfn);
|
|
|
|
after_bootmem = 1;
|
|
|
|
@@ -1127,15 +1067,32 @@ void free_initrd_mem(unsigned long start
|
|
|
|
void __init reserve_bootmem_generic(unsigned long phys, unsigned len)
|
|
{
|
|
- /* Should check here against the e820 map to avoid double free */
|
|
#ifdef CONFIG_NUMA
|
|
int nid = phys_to_nid(phys);
|
|
+#endif
|
|
+ unsigned long pfn = phys >> PAGE_SHIFT;
|
|
+ if (pfn >= end_pfn) {
|
|
+ /* This can happen with kdump kernels when accessing firmware
|
|
+ tables. */
|
|
+ if (pfn < end_pfn_map)
|
|
+ return;
|
|
+ printk(KERN_ERR "reserve_bootmem: illegal reserve %lx %u\n",
|
|
+ phys, len);
|
|
+ return;
|
|
+ }
|
|
+
|
|
+ /* Should check here against the e820 map to avoid double free */
|
|
+#ifdef CONFIG_NUMA
|
|
reserve_bootmem_node(NODE_DATA(nid), phys, len);
|
|
#else
|
|
reserve_bootmem(phys, len);
|
|
#endif
|
|
- if (phys+len <= MAX_DMA_PFN*PAGE_SIZE)
|
|
+#ifndef CONFIG_XEN
|
|
+ if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) {
|
|
dma_reserve += len / PAGE_SIZE;
|
|
+ set_dma_reserve(dma_reserve);
|
|
+ }
|
|
+#endif
|
|
}
|
|
|
|
int kern_addr_valid(unsigned long addr)
|
|
--- head-2010-05-25.orig/arch/x86/mm/pageattr_64-xen.c 2009-03-18 10:39:31.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/mm/pageattr_64-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -377,8 +377,8 @@ static void revert_page(unsigned long ad
|
|
BUG_ON(pud_none(*pud));
|
|
pmd = pmd_offset(pud, address);
|
|
BUG_ON(__pmd_val(*pmd) & _PAGE_PSE);
|
|
- pgprot_val(ref_prot) |= _PAGE_PSE;
|
|
large_pte = mk_pte_phys(__pa(address) & LARGE_PAGE_MASK, ref_prot);
|
|
+ large_pte = pte_mkhuge(large_pte);
|
|
set_pte((pte_t *)pmd, large_pte);
|
|
}
|
|
|
|
@@ -388,32 +388,28 @@ __change_page_attr(unsigned long address
|
|
{
|
|
pte_t *kpte;
|
|
struct page *kpte_page;
|
|
- unsigned kpte_flags;
|
|
pgprot_t ref_prot2;
|
|
kpte = lookup_address(address);
|
|
if (!kpte) return 0;
|
|
kpte_page = virt_to_page(((unsigned long)kpte) & PAGE_MASK);
|
|
- kpte_flags = pte_val(*kpte);
|
|
if (pgprot_val(prot) != pgprot_val(ref_prot)) {
|
|
- if ((kpte_flags & _PAGE_PSE) == 0) {
|
|
+ if (!pte_huge(*kpte)) {
|
|
set_pte(kpte, pfn_pte(pfn, prot));
|
|
} else {
|
|
/*
|
|
* split_large_page will take the reference for this
|
|
* change_page_attr on the split page.
|
|
*/
|
|
-
|
|
struct page *split;
|
|
- ref_prot2 = __pgprot(pgprot_val(pte_pgprot(*lookup_address(address))) & ~(1<<_PAGE_BIT_PSE));
|
|
-
|
|
+ ref_prot2 = pte_pgprot(pte_clrhuge(*kpte));
|
|
split = split_large_page(address, prot, ref_prot2);
|
|
if (!split)
|
|
return -ENOMEM;
|
|
- set_pte(kpte,mk_pte(split, ref_prot2));
|
|
+ set_pte(kpte, mk_pte(split, ref_prot2));
|
|
kpte_page = split;
|
|
- }
|
|
+ }
|
|
page_private(kpte_page)++;
|
|
- } else if ((kpte_flags & _PAGE_PSE) == 0) {
|
|
+ } else if (!pte_huge(*kpte)) {
|
|
set_pte(kpte, pfn_pte(pfn, ref_prot));
|
|
BUG_ON(page_private(kpte_page) == 0);
|
|
page_private(kpte_page)--;
|
|
@@ -470,10 +466,12 @@ int change_page_attr_addr(unsigned long
|
|
* lowmem */
|
|
if (__pa(address) < KERNEL_TEXT_SIZE) {
|
|
unsigned long addr2;
|
|
- pgprot_t prot2 = prot;
|
|
+ pgprot_t prot2;
|
|
addr2 = __START_KERNEL_map + __pa(address);
|
|
- pgprot_val(prot2) &= ~_PAGE_NX;
|
|
- err = __change_page_attr(addr2, pfn, prot2, PAGE_KERNEL_EXEC);
|
|
+ /* Make sure the kernel mappings stay executable */
|
|
+ prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot)));
|
|
+ err = __change_page_attr(addr2, pfn, prot2,
|
|
+ PAGE_KERNEL_EXEC);
|
|
}
|
|
}
|
|
up_write(&init_mm.mmap_sem);
|
|
--- head-2010-05-25.orig/drivers/char/tpm/tpm_xen.c 2010-03-24 14:53:41.000000000 +0100
|
|
+++ head-2010-05-25/drivers/char/tpm/tpm_xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -85,8 +85,7 @@ static struct tpm_private *my_priv;
|
|
|
|
/* local function prototypes */
|
|
static irqreturn_t tpmif_int(int irq,
|
|
- void *tpm_priv,
|
|
- struct pt_regs *ptregs);
|
|
+ void *tpm_priv);
|
|
static void tpmif_rx_action(unsigned long unused);
|
|
static int tpmif_connect(struct xenbus_device *dev,
|
|
struct tpm_private *tp,
|
|
@@ -559,7 +558,7 @@ static void tpmif_rx_action(unsigned lon
|
|
}
|
|
|
|
|
|
-static irqreturn_t tpmif_int(int irq, void *tpm_priv, struct pt_regs *ptregs)
|
|
+static irqreturn_t tpmif_int(int irq, void *tpm_priv)
|
|
{
|
|
struct tpm_private *tp = tpm_priv;
|
|
unsigned long flags;
|
|
--- head-2010-05-25.orig/drivers/pci/Kconfig 2010-03-24 14:00:05.000000000 +0100
|
|
+++ head-2010-05-25/drivers/pci/Kconfig 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -64,7 +64,7 @@ config PCI_STUB
|
|
config HT_IRQ
|
|
bool "Interrupts on hypertransport devices"
|
|
default y
|
|
- depends on PCI && X86_LOCAL_APIC && X86_IO_APIC
|
|
+ depends on PCI && X86_LOCAL_APIC && X86_IO_APIC && !XEN
|
|
help
|
|
This allows native hypertransport devices to use interrupts.
|
|
|
|
--- head-2010-05-25.orig/drivers/pci/msi-xen.c 2009-12-04 08:45:56.000000000 +0100
|
|
+++ head-2010-05-25/drivers/pci/msi-xen.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -6,6 +6,7 @@
|
|
* Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com)
|
|
*/
|
|
|
|
+#include <linux/err.h>
|
|
#include <linux/mm.h>
|
|
#include <linux/irq.h>
|
|
#include <linux/interrupt.h>
|
|
@@ -14,6 +15,7 @@
|
|
#include <linux/smp_lock.h>
|
|
#include <linux/pci.h>
|
|
#include <linux/proc_fs.h>
|
|
+#include <linux/msi.h>
|
|
|
|
#include <xen/evtchn.h>
|
|
|
|
@@ -26,14 +28,6 @@
|
|
|
|
static int pci_msi_enable = 1;
|
|
|
|
-static struct msi_ops *msi_ops;
|
|
-
|
|
-int msi_register(struct msi_ops *ops)
|
|
-{
|
|
- msi_ops = ops;
|
|
- return 0;
|
|
-}
|
|
-
|
|
static LIST_HEAD(msi_dev_head);
|
|
DEFINE_SPINLOCK(msi_dev_lock);
|
|
|
|
@@ -481,9 +475,9 @@ void pci_restore_msix_state(struct pci_d
|
|
* @dev: pointer to the pci_dev data structure of MSI device function
|
|
*
|
|
* Setup the MSI capability structure of device function with a single
|
|
- * MSI vector, regardless of device function is capable of handling
|
|
+ * MSI irq, regardless of device function is capable of handling
|
|
* multiple messages. A return of zero indicates the successful setup
|
|
- * of an entry zero with the new MSI vector or non-zero for otherwise.
|
|
+ * of an entry zero with the new MSI irq or non-zero for otherwise.
|
|
**/
|
|
static int msi_capability_init(struct pci_dev *dev)
|
|
{
|
|
@@ -497,11 +491,11 @@ static int msi_capability_init(struct pc
|
|
if (pirq < 0)
|
|
return -EBUSY;
|
|
|
|
- dev->irq = pirq;
|
|
/* Set MSI enabled bits */
|
|
enable_msi_mode(dev, pos, PCI_CAP_ID_MSI);
|
|
dev->msi_enabled = 1;
|
|
|
|
+ dev->irq = pirq;
|
|
return 0;
|
|
}
|
|
|
|
@@ -512,8 +506,8 @@ static int msi_capability_init(struct pc
|
|
* @nvec: number of @entries
|
|
*
|
|
* Setup the MSI-X capability structure of device function with a
|
|
- * single MSI-X vector. A return of zero indicates the successful setup of
|
|
- * requested MSI-X entries with allocated vectors or non-zero for otherwise.
|
|
+ * single MSI-X irq. A return of zero indicates the successful setup of
|
|
+ * requested MSI-X entries with allocated irqs or non-zero for otherwise.
|
|
**/
|
|
static int msix_capability_init(struct pci_dev *dev,
|
|
struct msix_entry *entries, int nvec)
|
|
@@ -562,12 +556,18 @@ static int msix_capability_init(struct p
|
|
}
|
|
|
|
if (i != nvec) {
|
|
+ int avail = i - 1;
|
|
for (j = --i; j >= 0; j--) {
|
|
msi_unmap_pirq(dev, entries[j].vector);
|
|
detach_pirq_entry(entries[j].entry, msi_dev_entry);
|
|
entries[j].vector = 0;
|
|
}
|
|
- return -EBUSY;
|
|
+ /* If we had some success report the number of irqs
|
|
+ * we succeeded in setting up.
|
|
+ */
|
|
+ if (avail <= 0)
|
|
+ avail = -EBUSY;
|
|
+ return avail;
|
|
}
|
|
|
|
enable_msi_mode(dev, pos, PCI_CAP_ID_MSIX);
|
|
@@ -577,11 +577,40 @@ static int msix_capability_init(struct p
|
|
}
|
|
|
|
/**
|
|
+ * pci_msi_supported - check whether MSI may be enabled on device
|
|
+ * @dev: pointer to the pci_dev data structure of MSI device function
|
|
+ *
|
|
+ * Look at global flags, the device itself, and its parent busses
|
|
+ * to return 0 if MSI are supported for the device.
|
|
+ **/
|
|
+static
|
|
+int pci_msi_supported(struct pci_dev * dev)
|
|
+{
|
|
+ struct pci_bus *bus;
|
|
+
|
|
+ /* MSI must be globally enabled and supported by the device */
|
|
+ if (!pci_msi_enable || !dev || dev->no_msi)
|
|
+ return -EINVAL;
|
|
+
|
|
+ /* Any bridge which does NOT route MSI transactions from it's
|
|
+ * secondary bus to it's primary bus must set NO_MSI flag on
|
|
+ * the secondary pci_bus.
|
|
+ * We expect only arch-specific PCI host bus controller driver
|
|
+ * or quirks for specific PCI bridges to be setting NO_MSI.
|
|
+ */
|
|
+ for (bus = dev->bus; bus; bus = bus->parent)
|
|
+ if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
|
|
+ return -EINVAL;
|
|
+
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+/**
|
|
* pci_enable_msi - configure device's MSI capability structure
|
|
* @dev: pointer to the pci_dev data structure of MSI device function
|
|
*
|
|
* Setup the MSI capability structure of device function with
|
|
- * a single MSI vector upon its software driver call to request for
|
|
+ * a single MSI irq upon its software driver call to request for
|
|
* MSI mode enabled on its hardware device function. A return of zero
|
|
* indicates the successful setup of an entry zero with the new MSI
|
|
* vector or non-zero for otherwise.
|
|
@@ -589,19 +618,11 @@ static int msix_capability_init(struct p
|
|
extern int pci_frontend_enable_msi(struct pci_dev *dev);
|
|
int pci_enable_msi(struct pci_dev* dev)
|
|
{
|
|
- struct pci_bus *bus;
|
|
- int pos, temp, status = -EINVAL;
|
|
+ int pos, temp, status;
|
|
struct msi_dev_list *msi_dev_entry = get_msi_dev_pirq_list(dev);
|
|
|
|
- if (!pci_msi_enable || !dev)
|
|
- return status;
|
|
-
|
|
- if (dev->no_msi)
|
|
- return status;
|
|
-
|
|
- for (bus = dev->bus; bus; bus = bus->parent)
|
|
- if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
|
|
- return -EINVAL;
|
|
+ if (pci_msi_supported(dev) < 0)
|
|
+ return -EINVAL;
|
|
|
|
status = msi_init();
|
|
if (status < 0)
|
|
@@ -631,10 +652,10 @@ int pci_enable_msi(struct pci_dev* dev)
|
|
if (!pos)
|
|
return -EINVAL;
|
|
|
|
- /* Check whether driver already requested for MSI-X vectors */
|
|
+ /* Check whether driver already requested for MSI-X irqs */
|
|
if (dev->msix_enabled) {
|
|
printk(KERN_INFO "PCI: %s: Can't enable MSI. "
|
|
- "Device already has MSI-X vectors assigned\n",
|
|
+ "Device already has MSI-X irq assigned\n",
|
|
pci_name(dev));
|
|
dev->irq = temp;
|
|
return -EINVAL;
|
|
@@ -699,37 +720,29 @@ void pci_disable_msi(struct pci_dev* dev
|
|
* pci_enable_msix - configure device's MSI-X capability structure
|
|
* @dev: pointer to the pci_dev data structure of MSI-X device function
|
|
* @entries: pointer to an array of MSI-X entries
|
|
- * @nvec: number of MSI-X vectors requested for allocation by device driver
|
|
+ * @nvec: number of MSI-X irqs requested for allocation by device driver
|
|
*
|
|
* Setup the MSI-X capability structure of device function with the number
|
|
- * of requested vectors upon its software driver call to request for
|
|
+ * of requested irqs upon its software driver call to request for
|
|
* MSI-X mode enabled on its hardware device function. A return of zero
|
|
* indicates the successful configuration of MSI-X capability structure
|
|
- * with new allocated MSI-X vectors. A return of < 0 indicates a failure.
|
|
+ * with new allocated MSI-X irqs. A return of < 0 indicates a failure.
|
|
* Or a return of > 0 indicates that driver request is exceeding the number
|
|
- * of vectors available. Driver should use the returned value to re-send
|
|
+ * of irqs available. Driver should use the returned value to re-send
|
|
* its request.
|
|
**/
|
|
extern int pci_frontend_enable_msix(struct pci_dev *dev,
|
|
struct msix_entry *entries, int nvec);
|
|
int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec)
|
|
{
|
|
- struct pci_bus *bus;
|
|
int status, pos, nr_entries;
|
|
int i, j, temp;
|
|
u16 control;
|
|
struct msi_dev_list *msi_dev_entry = get_msi_dev_pirq_list(dev);
|
|
|
|
- if (!pci_msi_enable || !dev || !entries)
|
|
+ if (!entries || pci_msi_supported(dev) < 0)
|
|
return -EINVAL;
|
|
|
|
- if (dev->no_msi)
|
|
- return -EINVAL;
|
|
-
|
|
- for (bus = dev->bus; bus; bus = bus->parent)
|
|
- if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
|
|
- return -EINVAL;
|
|
-
|
|
#ifdef CONFIG_XEN_PCIDEV_FRONTEND
|
|
if (!is_initial_xendomain()) {
|
|
struct msi_pirq_entry *pirq_entry;
|
|
@@ -793,7 +806,7 @@ int pci_enable_msix(struct pci_dev* dev,
|
|
/* Check whether driver already requested for MSI vector */
|
|
if (dev->msi_enabled) {
|
|
printk(KERN_INFO "PCI: %s: Can't enable MSI-X. "
|
|
- "Device already has an MSI vector assigned\n",
|
|
+ "Device already has an MSI irq assigned\n",
|
|
pci_name(dev));
|
|
dev->irq = temp;
|
|
return -EINVAL;
|
|
@@ -861,11 +874,11 @@ void pci_disable_msix(struct pci_dev* de
|
|
}
|
|
|
|
/**
|
|
- * msi_remove_pci_irq_vectors - reclaim MSI(X) vectors to unused state
|
|
+ * msi_remove_pci_irq_vectors - reclaim MSI(X) irqs to unused state
|
|
* @dev: pointer to the pci_dev data structure of MSI(X) device function
|
|
*
|
|
* Being called during hotplug remove, from which the device function
|
|
- * is hot-removed. All previous assigned MSI/MSI-X vectors, if
|
|
+ * is hot-removed. All previous assigned MSI/MSI-X irqs, if
|
|
* allocated for this device function, are reclaimed to unused state,
|
|
* which may be used later on.
|
|
**/
|
|
--- head-2010-05-25.orig/drivers/xen/Kconfig 2010-03-24 15:02:14.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/Kconfig 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -332,6 +332,10 @@ endmenu
|
|
config HAVE_IRQ_IGNORE_UNHANDLED
|
|
def_bool y
|
|
|
|
+config GENERIC_HARDIRQS_NO__DO_IRQ
|
|
+ def_bool y
|
|
+ depends on X86
|
|
+
|
|
config NO_IDLE_HZ
|
|
def_bool y
|
|
|
|
--- head-2010-05-25.orig/drivers/xen/balloon/balloon.c 2010-03-31 09:56:02.000000000 +0200
|
|
+++ head-2010-05-25/drivers/xen/balloon/balloon.c 2010-04-15 09:52:32.000000000 +0200
|
|
@@ -37,6 +37,7 @@
|
|
#include <linux/sched.h>
|
|
#include <linux/errno.h>
|
|
#include <linux/mm.h>
|
|
+#include <linux/swap.h>
|
|
#include <linux/mman.h>
|
|
#include <linux/smp_lock.h>
|
|
#include <linux/pagemap.h>
|
|
@@ -81,11 +82,7 @@ struct balloon_stats balloon_stats;
|
|
/* We increase/decrease in batches which fit in a page */
|
|
static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
|
|
|
|
-/* VM /proc information for memory */
|
|
-extern unsigned long totalram_pages;
|
|
-
|
|
-#ifndef MODULE
|
|
-extern unsigned long totalhigh_pages;
|
|
+#if !defined(MODULE) && defined(CONFIG_HIGHMEM)
|
|
#define inc_totalhigh_pages() (totalhigh_pages++)
|
|
#define dec_totalhigh_pages() (totalhigh_pages--)
|
|
#else
|
|
@@ -133,29 +130,44 @@ static struct timer_list balloon_timer;
|
|
printk(KERN_WARNING "xen_mem: " fmt, ##args)
|
|
|
|
/* balloon_append: add the given page to the balloon. */
|
|
-static void balloon_append(struct page *page)
|
|
+static void balloon_append(struct page *page, int account)
|
|
{
|
|
+ unsigned long pfn;
|
|
+
|
|
/* Lowmem is re-populated first, so highmem pages go at list tail. */
|
|
if (PageHighMem(page)) {
|
|
list_add_tail(PAGE_TO_LIST(page), &ballooned_pages);
|
|
bs.balloon_high++;
|
|
- dec_totalhigh_pages();
|
|
+ if (account)
|
|
+ dec_totalhigh_pages();
|
|
} else {
|
|
list_add(PAGE_TO_LIST(page), &ballooned_pages);
|
|
bs.balloon_low++;
|
|
}
|
|
+
|
|
+ pfn = page_to_pfn(page);
|
|
+ if (account) {
|
|
+ SetPageReserved(page);
|
|
+ set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
|
|
+ page_zone(page)->present_pages--;
|
|
+ } else {
|
|
+ BUG_ON(!PageReserved(page));
|
|
+ WARN_ON_ONCE(phys_to_machine_mapping_valid(pfn));
|
|
+ }
|
|
}
|
|
|
|
/* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
|
|
-static struct page *balloon_retrieve(void)
|
|
+static struct page *balloon_retrieve(int *was_empty)
|
|
{
|
|
struct page *page;
|
|
+ struct zone *zone;
|
|
|
|
if (list_empty(&ballooned_pages))
|
|
return NULL;
|
|
|
|
page = LIST_TO_PAGE(ballooned_pages.next);
|
|
UNLIST_PAGE(page);
|
|
+ BUG_ON(!PageReserved(page));
|
|
|
|
if (PageHighMem(page)) {
|
|
bs.balloon_high--;
|
|
@@ -163,6 +175,9 @@ static struct page *balloon_retrieve(voi
|
|
}
|
|
else
|
|
bs.balloon_low--;
|
|
+ zone = page_zone(page);
|
|
+ *was_empty |= !populated_zone(zone);
|
|
+ zone->present_pages++;
|
|
|
|
return page;
|
|
}
|
|
@@ -248,6 +263,7 @@ static int increase_reservation(unsigned
|
|
unsigned long pfn, i, flags;
|
|
struct page *page;
|
|
long rc;
|
|
+ int need_zonelists_rebuild = 0;
|
|
struct xen_memory_reservation reservation = {
|
|
.address_bits = 0,
|
|
.extent_order = 0,
|
|
@@ -273,7 +289,7 @@ static int increase_reservation(unsigned
|
|
goto out;
|
|
|
|
for (i = 0; i < rc; i++) {
|
|
- page = balloon_retrieve();
|
|
+ page = balloon_retrieve(&need_zonelists_rebuild);
|
|
BUG_ON(page == NULL);
|
|
|
|
pfn = page_to_pfn(page);
|
|
@@ -306,6 +322,18 @@ static int increase_reservation(unsigned
|
|
out:
|
|
balloon_unlock(flags);
|
|
|
|
+#ifndef MODULE
|
|
+ setup_per_zone_pages_min();
|
|
+# if defined(CONFIG_MEMORY_HOTPLUG) || defined(CONFIG_ACPI_HOTPLUG_MEMORY) \
|
|
+ || defined(CONFIG_ACPI_HOTPLUG_MEMORY_MODULE)
|
|
+ /* build_all_zonelists() is __meminit */
|
|
+ if (need_zonelists_rebuild)
|
|
+ build_all_zonelists();
|
|
+ else
|
|
+# endif
|
|
+ vm_total_pages = nr_free_pagecache_pages();
|
|
+#endif
|
|
+
|
|
return rc < 0 ? rc : rc != nr_pages;
|
|
}
|
|
|
|
@@ -364,8 +392,7 @@ static int decrease_reservation(unsigned
|
|
/* No more mappings: invalidate P2M and add to balloon. */
|
|
for (i = 0; i < nr_pages; i++) {
|
|
pfn = mfn_to_pfn(frame_list[i]);
|
|
- set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
|
|
- balloon_append(pfn_to_page(pfn));
|
|
+ balloon_append(pfn_to_page(pfn), 1);
|
|
}
|
|
|
|
set_xen_guest_handle(reservation.extent_start, frame_list);
|
|
@@ -582,8 +609,11 @@ static int __init balloon_init(void)
|
|
/* Initialise the balloon with excess memory space. */
|
|
for (pfn = xen_start_info->nr_pages; pfn < max_pfn; pfn++) {
|
|
page = pfn_to_page(pfn);
|
|
- if (!PageReserved(page))
|
|
- balloon_append(page);
|
|
+ if (!PageReserved(page)) {
|
|
+ SetPageReserved(page);
|
|
+ set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
|
|
+ balloon_append(page, 0);
|
|
+ }
|
|
}
|
|
#endif
|
|
|
|
@@ -618,7 +648,7 @@ void balloon_update_driver_allowance(lon
|
|
static int dealloc_pte_fn(
|
|
pte_t *pte, struct page *pmd_page, unsigned long addr, void *data)
|
|
{
|
|
- unsigned long mfn = pte_mfn(*pte);
|
|
+ unsigned long pfn, mfn = pte_mfn(*pte);
|
|
int ret;
|
|
struct xen_memory_reservation reservation = {
|
|
.nr_extents = 1,
|
|
@@ -627,7 +657,9 @@ static int dealloc_pte_fn(
|
|
};
|
|
set_xen_guest_handle(reservation.extent_start, &mfn);
|
|
set_pte_at(&init_mm, addr, pte, __pte_ma(0));
|
|
- set_phys_to_machine(__pa(addr) >> PAGE_SHIFT, INVALID_P2M_ENTRY);
|
|
+ pfn = __pa(addr) >> PAGE_SHIFT;
|
|
+ set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
|
|
+ SetPageReserved(pfn_to_page(pfn));
|
|
ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
|
|
BUG_ON(ret != 1);
|
|
return 0;
|
|
@@ -696,6 +728,9 @@ struct page **alloc_empty_pages_and_page
|
|
}
|
|
|
|
totalram_pages = --bs.current_pages - totalram_bias;
|
|
+ if (PageHighMem(page))
|
|
+ dec_totalhigh_pages();
|
|
+ page_zone(page)->present_pages--;
|
|
|
|
balloon_unlock(flags);
|
|
}
|
|
@@ -710,7 +745,7 @@ struct page **alloc_empty_pages_and_page
|
|
err:
|
|
balloon_lock(flags);
|
|
while (--i >= 0)
|
|
- balloon_append(pagevec[i]);
|
|
+ balloon_append(pagevec[i], 0);
|
|
balloon_unlock(flags);
|
|
kfree(pagevec);
|
|
pagevec = NULL;
|
|
@@ -728,7 +763,7 @@ void free_empty_pages_and_pagevec(struct
|
|
balloon_lock(flags);
|
|
for (i = 0; i < nr_pages; i++) {
|
|
BUG_ON(page_count(pagevec[i]) != 1);
|
|
- balloon_append(pagevec[i]);
|
|
+ balloon_append(pagevec[i], 0);
|
|
}
|
|
balloon_unlock(flags);
|
|
|
|
@@ -742,7 +777,8 @@ void balloon_release_driver_page(struct
|
|
unsigned long flags;
|
|
|
|
balloon_lock(flags);
|
|
- balloon_append(page);
|
|
+ balloon_append(page, 1);
|
|
+ totalram_pages = --bs.current_pages - totalram_bias;
|
|
bs.driver_pages--;
|
|
balloon_unlock(flags);
|
|
|
|
--- head-2010-05-25.orig/drivers/xen/blkback/blkback.c 2010-03-22 12:00:53.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/blkback/blkback.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -294,7 +294,7 @@ static void blkif_notify_work(blkif_t *b
|
|
wake_up(&blkif->wq);
|
|
}
|
|
|
|
-irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
|
|
+irqreturn_t blkif_be_int(int irq, void *dev_id)
|
|
{
|
|
blkif_notify_work(dev_id);
|
|
return IRQ_HANDLED;
|
|
--- head-2010-05-25.orig/drivers/xen/blkback/common.h 2010-03-22 12:00:53.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/blkback/common.h 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -146,7 +146,7 @@ void blkif_interface_init(void);
|
|
|
|
void blkif_xenbus_init(void);
|
|
|
|
-irqreturn_t blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
|
|
+irqreturn_t blkif_be_int(int irq, void *dev_id);
|
|
int blkif_schedule(void *arg);
|
|
|
|
int blkback_barrier(struct xenbus_transaction xbt,
|
|
--- head-2010-05-25.orig/drivers/xen/blkfront/blkfront.c 2010-03-22 12:00:53.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/blkfront/blkfront.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -70,7 +70,7 @@ static int setup_blkring(struct xenbus_d
|
|
|
|
static void kick_pending_request_queues(struct blkfront_info *);
|
|
|
|
-static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs);
|
|
+static irqreturn_t blkif_int(int irq, void *dev_id);
|
|
static void blkif_restart_queue(void *arg);
|
|
static void blkif_recover(struct blkfront_info *);
|
|
static void blkif_completion(struct blk_shadow *);
|
|
@@ -733,7 +733,7 @@ void do_blkif_request(request_queue_t *r
|
|
}
|
|
|
|
|
|
-static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
|
|
+static irqreturn_t blkif_int(int irq, void *dev_id)
|
|
{
|
|
struct request *req;
|
|
blkif_response_t *bret;
|
|
--- head-2010-05-25.orig/drivers/xen/blktap/blktap.c 2010-04-29 09:34:47.000000000 +0200
|
|
+++ head-2010-05-25/drivers/xen/blktap/blktap.c 2010-04-29 09:43:21.000000000 +0200
|
|
@@ -1288,7 +1288,7 @@ static void blkif_notify_work(blkif_t *b
|
|
wake_up(&blkif->wq);
|
|
}
|
|
|
|
-irqreturn_t tap_blkif_be_int(int irq, void *dev_id, struct pt_regs *regs)
|
|
+irqreturn_t tap_blkif_be_int(int irq, void *dev_id)
|
|
{
|
|
blkif_notify_work(dev_id);
|
|
return IRQ_HANDLED;
|
|
--- head-2010-05-25.orig/drivers/xen/blktap/common.h 2008-09-15 13:40:15.000000000 +0200
|
|
+++ head-2010-05-25/drivers/xen/blktap/common.h 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -113,7 +113,7 @@ void tap_blkif_interface_init(void);
|
|
|
|
void tap_blkif_xenbus_init(void);
|
|
|
|
-irqreturn_t tap_blkif_be_int(int irq, void *dev_id, struct pt_regs *regs);
|
|
+irqreturn_t tap_blkif_be_int(int irq, void *dev_id);
|
|
int tap_blkif_schedule(void *arg);
|
|
|
|
int dom_to_devid(domid_t domid, int xenbus_id, blkif_t *blkif);
|
|
--- head-2010-05-25.orig/drivers/xen/blktap2/sysfs.c 2009-12-16 11:43:21.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/blktap2/sysfs.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -150,7 +150,7 @@ blktap_sysfs_pause_device(struct class_d
|
|
err = blktap_device_pause(tap);
|
|
if (!err) {
|
|
class_device_remove_file(dev, &class_device_attr_pause);
|
|
- class_device_create_file(dev, &class_device_attr_resume);
|
|
+ err = class_device_create_file(dev, &class_device_attr_resume);
|
|
}
|
|
|
|
out:
|
|
@@ -182,7 +182,7 @@ blktap_sysfs_resume_device(struct class_
|
|
err = blktap_device_resume(tap);
|
|
if (!err) {
|
|
class_device_remove_file(dev, &class_device_attr_resume);
|
|
- class_device_create_file(dev, &class_device_attr_pause);
|
|
+ err = class_device_create_file(dev, &class_device_attr_pause);
|
|
}
|
|
|
|
out:
|
|
@@ -292,6 +292,7 @@ blktap_sysfs_create(struct blktap *tap)
|
|
{
|
|
struct blktap_ring *ring;
|
|
struct class_device *dev;
|
|
+ int err, state = 0;
|
|
|
|
if (!class)
|
|
return -ENODEV;
|
|
@@ -310,12 +311,27 @@ blktap_sysfs_create(struct blktap *tap)
|
|
atomic_set(&ring->sysfs_refcnt, 0);
|
|
set_bit(BLKTAP_SYSFS, &tap->dev_inuse);
|
|
|
|
- class_device_create_file(dev, &class_device_attr_name);
|
|
- class_device_create_file(dev, &class_device_attr_remove);
|
|
- class_device_create_file(dev, &class_device_attr_pause);
|
|
- class_device_create_file(dev, &class_device_attr_debug);
|
|
+ err = class_device_create_file(dev, &class_device_attr_name);
|
|
+ if (!err) {
|
|
+ ++state;
|
|
+ err = class_device_create_file(dev, &class_device_attr_remove);
|
|
+ }
|
|
+ if (!err) {
|
|
+ ++state;
|
|
+ err = class_device_create_file(dev, &class_device_attr_pause);
|
|
+ }
|
|
+ if (!err) {
|
|
+ ++state;
|
|
+ err = class_device_create_file(dev, &class_device_attr_debug);
|
|
+ }
|
|
|
|
- return 0;
|
|
+ switch (state * !!err) {
|
|
+ case 3: class_device_remove_file(dev, &class_device_attr_pause);
|
|
+ case 2: class_device_remove_file(dev, &class_device_attr_remove);
|
|
+ case 1: class_device_remove_file(dev, &class_device_attr_name);
|
|
+ }
|
|
+
|
|
+ return err;
|
|
}
|
|
|
|
int
|
|
@@ -409,6 +425,7 @@ int __init
|
|
blktap_sysfs_init(void)
|
|
{
|
|
struct class *cls;
|
|
+ int err;
|
|
|
|
if (class)
|
|
return -EEXIST;
|
|
@@ -417,9 +434,16 @@ blktap_sysfs_init(void)
|
|
if (IS_ERR(cls))
|
|
return PTR_ERR(cls);
|
|
|
|
- class_create_file(cls, &class_attr_verbosity);
|
|
- class_create_file(cls, &class_attr_devices);
|
|
+ err = class_create_file(cls, &class_attr_verbosity);
|
|
+ if (!err) {
|
|
+ err = class_create_file(cls, &class_attr_devices);
|
|
+ if (err)
|
|
+ class_remove_file(cls, &class_attr_verbosity);
|
|
+ }
|
|
+ if (!err)
|
|
+ class = cls;
|
|
+ else
|
|
+ class_destroy(cls);
|
|
|
|
- class = cls;
|
|
- return 0;
|
|
+ return err;
|
|
}
|
|
--- head-2010-05-25.orig/drivers/xen/console/console.c 2010-03-24 15:06:08.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/console/console.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -360,7 +360,7 @@ static struct tty_struct *xencons_tty;
|
|
static int xencons_priv_irq;
|
|
static char x_char;
|
|
|
|
-void xencons_rx(char *buf, unsigned len, struct pt_regs *regs)
|
|
+void xencons_rx(char *buf, unsigned len)
|
|
{
|
|
int i;
|
|
unsigned long flags;
|
|
@@ -385,8 +385,7 @@ void xencons_rx(char *buf, unsigned len,
|
|
if (time_before(jiffies, sysrq_timeout)) {
|
|
spin_unlock_irqrestore(
|
|
&xencons_lock, flags);
|
|
- handle_sysrq(
|
|
- buf[i], regs, xencons_tty);
|
|
+ handle_sysrq(buf[i], xencons_tty);
|
|
spin_lock_irqsave(
|
|
&xencons_lock, flags);
|
|
continue;
|
|
@@ -451,14 +450,13 @@ void xencons_tx(void)
|
|
}
|
|
|
|
/* Privileged receive callback and transmit kicker. */
|
|
-static irqreturn_t xencons_priv_interrupt(int irq, void *dev_id,
|
|
- struct pt_regs *regs)
|
|
+static irqreturn_t xencons_priv_interrupt(int irq, void *dev_id)
|
|
{
|
|
static char rbuf[16];
|
|
int l;
|
|
|
|
while ((l = HYPERVISOR_console_io(CONSOLEIO_read, 16, rbuf)) > 0)
|
|
- xencons_rx(rbuf, l, regs);
|
|
+ xencons_rx(rbuf, l);
|
|
|
|
xencons_tx();
|
|
|
|
@@ -646,7 +644,7 @@ static void xencons_close(struct tty_str
|
|
spin_unlock_irqrestore(&xencons_lock, flags);
|
|
}
|
|
|
|
-static struct tty_operations xencons_ops = {
|
|
+static const struct tty_operations xencons_ops = {
|
|
.open = xencons_open,
|
|
.close = xencons_close,
|
|
.write = xencons_write,
|
|
--- head-2010-05-25.orig/drivers/xen/console/xencons_ring.c 2007-06-12 13:13:44.000000000 +0200
|
|
+++ head-2010-05-25/drivers/xen/console/xencons_ring.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -83,7 +83,7 @@ int xencons_ring_send(const char *data,
|
|
return sent;
|
|
}
|
|
|
|
-static irqreturn_t handle_input(int irq, void *unused, struct pt_regs *regs)
|
|
+static irqreturn_t handle_input(int irq, void *unused)
|
|
{
|
|
struct xencons_interface *intf = xencons_interface();
|
|
XENCONS_RING_IDX cons, prod;
|
|
@@ -94,7 +94,7 @@ static irqreturn_t handle_input(int irq,
|
|
BUG_ON((prod - cons) > sizeof(intf->in));
|
|
|
|
while (cons != prod) {
|
|
- xencons_rx(intf->in+MASK_XENCONS_IDX(cons,intf->in), 1, regs);
|
|
+ xencons_rx(intf->in+MASK_XENCONS_IDX(cons,intf->in), 1);
|
|
cons++;
|
|
}
|
|
|
|
--- head-2010-05-25.orig/drivers/xen/core/evtchn.c 2010-02-24 11:50:47.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/core/evtchn.c 2010-04-23 14:11:32.000000000 +0200
|
|
@@ -522,7 +522,7 @@ static void unbind_from_irq(unsigned int
|
|
|
|
int bind_caller_port_to_irqhandler(
|
|
unsigned int caller_port,
|
|
- irqreturn_t (*handler)(int, void *, struct pt_regs *),
|
|
+ irq_handler_t handler,
|
|
unsigned long irqflags,
|
|
const char *devname,
|
|
void *dev_id)
|
|
@@ -545,7 +545,7 @@ EXPORT_SYMBOL_GPL(bind_caller_port_to_ir
|
|
|
|
int bind_listening_port_to_irqhandler(
|
|
unsigned int remote_domain,
|
|
- irqreturn_t (*handler)(int, void *, struct pt_regs *),
|
|
+ irq_handler_t handler,
|
|
unsigned long irqflags,
|
|
const char *devname,
|
|
void *dev_id)
|
|
@@ -569,7 +569,7 @@ EXPORT_SYMBOL_GPL(bind_listening_port_to
|
|
int bind_interdomain_evtchn_to_irqhandler(
|
|
unsigned int remote_domain,
|
|
unsigned int remote_port,
|
|
- irqreturn_t (*handler)(int, void *, struct pt_regs *),
|
|
+ irq_handler_t handler,
|
|
unsigned long irqflags,
|
|
const char *devname,
|
|
void *dev_id)
|
|
@@ -593,7 +593,7 @@ EXPORT_SYMBOL_GPL(bind_interdomain_evtch
|
|
int bind_virq_to_irqhandler(
|
|
unsigned int virq,
|
|
unsigned int cpu,
|
|
- irqreturn_t (*handler)(int, void *, struct pt_regs *),
|
|
+ irq_handler_t handler,
|
|
unsigned long irqflags,
|
|
const char *devname,
|
|
void *dev_id)
|
|
@@ -617,7 +617,7 @@ EXPORT_SYMBOL_GPL(bind_virq_to_irqhandle
|
|
int bind_ipi_to_irqhandler(
|
|
unsigned int ipi,
|
|
unsigned int cpu,
|
|
- irqreturn_t (*handler)(int, void *, struct pt_regs *),
|
|
+ irq_handler_t handler,
|
|
unsigned long irqflags,
|
|
const char *devname,
|
|
void *dev_id)
|
|
@@ -693,16 +693,15 @@ int resend_irq_on_evtchn(unsigned int ir
|
|
* Interface to generic handling in irq.c
|
|
*/
|
|
|
|
-static unsigned int startup_dynirq(unsigned int irq)
|
|
+static void unmask_dynirq(unsigned int irq)
|
|
{
|
|
int evtchn = evtchn_from_irq(irq);
|
|
|
|
if (VALID_EVTCHN(evtchn))
|
|
unmask_evtchn(evtchn);
|
|
- return 0;
|
|
}
|
|
|
|
-static void shutdown_dynirq(unsigned int irq)
|
|
+static void mask_dynirq(unsigned int irq)
|
|
{
|
|
int evtchn = evtchn_from_irq(irq);
|
|
|
|
@@ -710,21 +709,13 @@ static void shutdown_dynirq(unsigned int
|
|
mask_evtchn(evtchn);
|
|
}
|
|
|
|
-static void enable_dynirq(unsigned int irq)
|
|
+static unsigned int startup_dynirq(unsigned int irq)
|
|
{
|
|
- int evtchn = evtchn_from_irq(irq);
|
|
-
|
|
- if (VALID_EVTCHN(evtchn))
|
|
- unmask_evtchn(evtchn);
|
|
+ unmask_dynirq(irq);
|
|
+ return 0;
|
|
}
|
|
|
|
-static void disable_dynirq(unsigned int irq)
|
|
-{
|
|
- int evtchn = evtchn_from_irq(irq);
|
|
-
|
|
- if (VALID_EVTCHN(evtchn))
|
|
- mask_evtchn(evtchn);
|
|
-}
|
|
+#define shutdown_dynirq mask_dynirq
|
|
|
|
static void ack_dynirq(unsigned int irq)
|
|
{
|
|
@@ -740,20 +731,22 @@ static void ack_dynirq(unsigned int irq)
|
|
|
|
static void end_dynirq(unsigned int irq)
|
|
{
|
|
- int evtchn = evtchn_from_irq(irq);
|
|
-
|
|
- if (VALID_EVTCHN(evtchn) && !(irq_desc[irq].status & IRQ_DISABLED))
|
|
- unmask_evtchn(evtchn);
|
|
+ if (!(irq_desc[irq].status & IRQ_DISABLED))
|
|
+ unmask_dynirq(irq);
|
|
}
|
|
|
|
-static struct hw_interrupt_type dynirq_type = {
|
|
- .typename = "Dynamic-irq",
|
|
+static struct irq_chip dynirq_chip = {
|
|
+ .name = "Dynamic",
|
|
.startup = startup_dynirq,
|
|
.shutdown = shutdown_dynirq,
|
|
- .enable = enable_dynirq,
|
|
- .disable = disable_dynirq,
|
|
+ .enable = unmask_dynirq,
|
|
+ .disable = mask_dynirq,
|
|
+ .mask = mask_dynirq,
|
|
+ .unmask = unmask_dynirq,
|
|
+ .mask_ack = ack_dynirq,
|
|
.ack = ack_dynirq,
|
|
.end = end_dynirq,
|
|
+ .eoi = end_dynirq,
|
|
#ifdef CONFIG_SMP
|
|
.set_affinity = set_affinity_irq,
|
|
#endif
|
|
@@ -815,7 +808,7 @@ static inline void pirq_query_unmask(int
|
|
*/
|
|
#define probing_irq(_irq) (irq_desc[(_irq)].action == NULL)
|
|
|
|
-static unsigned int startup_pirq(unsigned int irq)
|
|
+static void enable_pirq(unsigned int irq)
|
|
{
|
|
struct evtchn_bind_pirq bind_pirq;
|
|
int evtchn = evtchn_from_irq(irq);
|
|
@@ -830,7 +823,7 @@ static unsigned int startup_pirq(unsigne
|
|
if (!probing_irq(irq))
|
|
printk(KERN_INFO "Failed to obtain physical IRQ %d\n",
|
|
irq);
|
|
- return 0;
|
|
+ return;
|
|
}
|
|
evtchn = bind_pirq.port;
|
|
|
|
@@ -842,11 +835,9 @@ static unsigned int startup_pirq(unsigne
|
|
|
|
out:
|
|
pirq_unmask_and_notify(evtchn, irq);
|
|
-
|
|
- return 0;
|
|
}
|
|
|
|
-static void shutdown_pirq(unsigned int irq)
|
|
+static void disable_pirq(unsigned int irq)
|
|
{
|
|
struct evtchn_close close;
|
|
int evtchn = evtchn_from_irq(irq);
|
|
@@ -865,46 +856,46 @@ static void shutdown_pirq(unsigned int i
|
|
irq_info[irq] = mk_irq_info(IRQT_PIRQ, index_from_irq(irq), 0);
|
|
}
|
|
|
|
-static void enable_pirq(unsigned int irq)
|
|
+static unsigned int startup_pirq(unsigned int irq)
|
|
{
|
|
- startup_pirq(irq);
|
|
+ enable_pirq(irq);
|
|
+ return 0;
|
|
}
|
|
|
|
-static void disable_pirq(unsigned int irq)
|
|
-{
|
|
-}
|
|
+#define shutdown_pirq disable_pirq
|
|
|
|
-static void ack_pirq(unsigned int irq)
|
|
+static void unmask_pirq(unsigned int irq)
|
|
{
|
|
int evtchn = evtchn_from_irq(irq);
|
|
|
|
- move_native_irq(irq);
|
|
-
|
|
- if (VALID_EVTCHN(evtchn)) {
|
|
- mask_evtchn(evtchn);
|
|
- clear_evtchn(evtchn);
|
|
- }
|
|
+ if (VALID_EVTCHN(evtchn))
|
|
+ pirq_unmask_and_notify(evtchn, irq);
|
|
}
|
|
|
|
+#define mask_pirq mask_dynirq
|
|
+#define ack_pirq ack_dynirq
|
|
+
|
|
static void end_pirq(unsigned int irq)
|
|
{
|
|
- int evtchn = evtchn_from_irq(irq);
|
|
-
|
|
if ((irq_desc[irq].status & (IRQ_DISABLED|IRQ_PENDING)) ==
|
|
- (IRQ_DISABLED|IRQ_PENDING)) {
|
|
+ (IRQ_DISABLED|IRQ_PENDING))
|
|
shutdown_pirq(irq);
|
|
- } else if (VALID_EVTCHN(evtchn))
|
|
- pirq_unmask_and_notify(evtchn, irq);
|
|
+ else
|
|
+ unmask_pirq(irq);
|
|
}
|
|
|
|
-static struct hw_interrupt_type pirq_type = {
|
|
- .typename = "Phys-irq",
|
|
+static struct irq_chip pirq_chip = {
|
|
+ .name = "Phys",
|
|
.startup = startup_pirq,
|
|
.shutdown = shutdown_pirq,
|
|
.enable = enable_pirq,
|
|
.disable = disable_pirq,
|
|
+ .mask = mask_pirq,
|
|
+ .unmask = unmask_pirq,
|
|
+ .mask_ack = ack_pirq,
|
|
.ack = ack_pirq,
|
|
.end = end_pirq,
|
|
+ .eoi = end_pirq,
|
|
#ifdef CONFIG_SMP
|
|
.set_affinity = set_affinity_irq,
|
|
#endif
|
|
@@ -1087,7 +1078,8 @@ void evtchn_register_pirq(int irq)
|
|
if (identity_mapped_irq(irq) || type_from_irq(irq) != IRQT_UNBOUND)
|
|
return;
|
|
irq_info[irq] = mk_irq_info(IRQT_PIRQ, irq, 0);
|
|
- irq_desc[irq].chip = &pirq_type;
|
|
+ set_irq_chip_and_handler_name(irq, &pirq_chip, handle_level_irq,
|
|
+ "level");
|
|
}
|
|
|
|
int evtchn_map_pirq(int irq, int xen_pirq)
|
|
@@ -1110,11 +1102,18 @@ int evtchn_map_pirq(int irq, int xen_pir
|
|
spin_unlock(&irq_alloc_lock);
|
|
if (irq < PIRQ_BASE)
|
|
return -ENOSPC;
|
|
- irq_desc[irq].chip = &pirq_type;
|
|
+ set_irq_chip_and_handler_name(irq, &pirq_chip,
|
|
+ handle_level_irq, "level");
|
|
} else if (!xen_pirq) {
|
|
if (unlikely(type_from_irq(irq) != IRQT_PIRQ))
|
|
return -EINVAL;
|
|
- irq_desc[irq].chip = &no_irq_type;
|
|
+ /*
|
|
+ * dynamic_irq_cleanup(irq) would seem to be the correct thing
|
|
+ * here, but cannot be used as we get here also during shutdown
|
|
+ * when a driver didn't free_irq() its MSI(-X) IRQ(s), which
|
|
+ * then causes a warning in dynamic_irq_cleanup().
|
|
+ */
|
|
+ set_irq_chip_and_handler(irq, NULL, NULL);
|
|
irq_info[irq] = IRQ_UNBOUND;
|
|
return 0;
|
|
} else if (type_from_irq(irq) != IRQT_PIRQ
|
|
@@ -1160,10 +1159,9 @@ void __init xen_init_IRQ(void)
|
|
for (i = DYNIRQ_BASE; i < (DYNIRQ_BASE + NR_DYNIRQS); i++) {
|
|
irq_bindcount[i] = 0;
|
|
|
|
- irq_desc[i].status = IRQ_DISABLED|IRQ_NOPROBE;
|
|
- irq_desc[i].action = NULL;
|
|
- irq_desc[i].depth = 1;
|
|
- irq_desc[i].chip = &dynirq_type;
|
|
+ irq_desc[i].status |= IRQ_NOPROBE;
|
|
+ set_irq_chip_and_handler_name(i, &dynirq_chip,
|
|
+ handle_level_irq, "level");
|
|
}
|
|
|
|
/* Phys IRQ space is statically bound (1:1 mapping). Nail refcnts. */
|
|
@@ -1179,9 +1177,7 @@ void __init xen_init_IRQ(void)
|
|
continue;
|
|
#endif
|
|
|
|
- irq_desc[i].status = IRQ_DISABLED;
|
|
- irq_desc[i].action = NULL;
|
|
- irq_desc[i].depth = 1;
|
|
- irq_desc[i].chip = &pirq_type;
|
|
+ set_irq_chip_and_handler_name(i, &pirq_chip,
|
|
+ handle_level_irq, "level");
|
|
}
|
|
}
|
|
--- head-2010-05-25.orig/drivers/xen/core/gnttab.c 2009-03-18 10:39:31.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/core/gnttab.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -510,6 +510,7 @@ static void gnttab_page_free(struct page
|
|
BUG_ON(order);
|
|
ClearPageForeign(page);
|
|
gnttab_reset_grant_page(page);
|
|
+ ClearPageReserved(page);
|
|
put_page(page);
|
|
}
|
|
|
|
@@ -587,6 +588,8 @@ int gnttab_copy_grant_page(grant_ref_t r
|
|
new_page->mapping = page->mapping;
|
|
new_page->index = page->index;
|
|
set_bit(PG_foreign, &new_page->flags);
|
|
+ if (PageReserved(page))
|
|
+ SetPageReserved(new_page);
|
|
*pagep = new_page;
|
|
|
|
SetPageForeign(page, gnttab_page_free);
|
|
--- head-2010-05-25.orig/drivers/xen/core/reboot.c 2008-08-07 12:44:36.000000000 +0200
|
|
+++ head-2010-05-25/drivers/xen/core/reboot.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -14,6 +14,7 @@
|
|
|
|
#ifdef HAVE_XEN_PLATFORM_COMPAT_H
|
|
#include <xen/platform-compat.h>
|
|
+#undef handle_sysrq
|
|
#endif
|
|
|
|
MODULE_LICENSE("Dual BSD/GPL");
|
|
@@ -231,7 +232,7 @@ static void sysrq_handler(struct xenbus_
|
|
|
|
#ifdef CONFIG_MAGIC_SYSRQ
|
|
if (sysrq_key != '\0')
|
|
- handle_sysrq(sysrq_key, NULL, NULL);
|
|
+ handle_sysrq(sysrq_key, NULL);
|
|
#endif
|
|
}
|
|
|
|
@@ -245,7 +246,7 @@ static struct xenbus_watch sysrq_watch =
|
|
.callback = sysrq_handler
|
|
};
|
|
|
|
-static irqreturn_t suspend_int(int irq, void* dev_id, struct pt_regs *ptregs)
|
|
+static irqreturn_t suspend_int(int irq, void* dev_id)
|
|
{
|
|
switch_shutdown_state(SHUTDOWN_SUSPEND);
|
|
return IRQ_HANDLED;
|
|
--- head-2010-05-25.orig/drivers/xen/core/smpboot.c 2010-03-24 15:02:17.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/core/smpboot.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -25,8 +25,8 @@
|
|
#include <xen/cpu_hotplug.h>
|
|
#include <xen/xenbus.h>
|
|
|
|
-extern irqreturn_t smp_reschedule_interrupt(int, void *, struct pt_regs *);
|
|
-extern irqreturn_t smp_call_function_interrupt(int, void *, struct pt_regs *);
|
|
+extern irqreturn_t smp_reschedule_interrupt(int, void *);
|
|
+extern irqreturn_t smp_call_function_interrupt(int, void *);
|
|
|
|
extern int local_setup_timer(unsigned int cpu);
|
|
extern void local_teardown_timer(unsigned int cpu);
|
|
@@ -61,8 +61,6 @@ cpumask_t cpu_core_map[NR_CPUS] __cachel
|
|
#if defined(__i386__)
|
|
u8 x86_cpu_to_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = 0xff };
|
|
EXPORT_SYMBOL(x86_cpu_to_apicid);
|
|
-#elif !defined(CONFIG_X86_IO_APIC)
|
|
-unsigned int maxcpus = NR_CPUS;
|
|
#endif
|
|
|
|
void __init prefill_possible_map(void)
|
|
--- head-2010-05-25.orig/drivers/xen/fbfront/xenfb.c 2009-12-04 08:45:56.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/fbfront/xenfb.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -524,8 +524,7 @@ static struct fb_ops xenfb_fb_ops = {
|
|
.fb_set_par = xenfb_set_par,
|
|
};
|
|
|
|
-static irqreturn_t xenfb_event_handler(int rq, void *dev_id,
|
|
- struct pt_regs *regs)
|
|
+static irqreturn_t xenfb_event_handler(int rq, void *dev_id)
|
|
{
|
|
/*
|
|
* No in events recognized, simply ignore them all.
|
|
--- head-2010-05-25.orig/drivers/xen/fbfront/xenkbd.c 2008-04-02 12:34:02.000000000 +0200
|
|
+++ head-2010-05-25/drivers/xen/fbfront/xenkbd.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -46,7 +46,7 @@ static void xenkbd_disconnect_backend(st
|
|
* to do that.
|
|
*/
|
|
|
|
-static irqreturn_t input_handler(int rq, void *dev_id, struct pt_regs *regs)
|
|
+static irqreturn_t input_handler(int rq, void *dev_id)
|
|
{
|
|
struct xenkbd_info *info = dev_id;
|
|
struct xenkbd_page *page = info->page;
|
|
--- head-2010-05-25.orig/drivers/xen/gntdev/gntdev.c 2010-01-04 11:56:34.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/gntdev/gntdev.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -761,9 +761,6 @@ static pte_t gntdev_clear_pte(struct vm_
|
|
BUG();
|
|
}
|
|
|
|
- /* Copy the existing value of the PTE for returning. */
|
|
- copy = *ptep;
|
|
-
|
|
/* Calculate the grant relating to this PTE. */
|
|
slot_index = vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT);
|
|
|
|
@@ -778,6 +775,10 @@ static pte_t gntdev_clear_pte(struct vm_
|
|
GNTDEV_INVALID_HANDLE &&
|
|
!xen_feature(XENFEAT_auto_translated_physmap)) {
|
|
/* NOT USING SHADOW PAGE TABLES. */
|
|
+
|
|
+ /* Copy the existing value of the PTE for returning. */
|
|
+ copy = *ptep;
|
|
+
|
|
gnttab_set_unmap_op(&op, ptep_to_machine(ptep),
|
|
GNTMAP_contains_pte,
|
|
private_data->grants[slot_index]
|
|
@@ -790,7 +791,7 @@ static pte_t gntdev_clear_pte(struct vm_
|
|
op.status);
|
|
} else {
|
|
/* USING SHADOW PAGE TABLES. */
|
|
- pte_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
|
|
+ copy = ptep_get_and_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
|
|
}
|
|
|
|
/* Finally, we unmap the grant from kernel space. */
|
|
@@ -818,7 +819,7 @@ static pte_t gntdev_clear_pte(struct vm_
|
|
INVALID_P2M_ENTRY);
|
|
|
|
} else {
|
|
- pte_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
|
|
+ copy = ptep_get_and_clear_full(vma->vm_mm, addr, ptep, is_fullmm);
|
|
}
|
|
|
|
return copy;
|
|
--- head-2010-05-25.orig/drivers/xen/netback/accel.c 2008-01-07 13:19:18.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/netback/accel.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -65,7 +65,7 @@ static int match_accelerator(struct xenb
|
|
|
|
if (IS_ERR(eth_name)) {
|
|
/* Probably means not present */
|
|
- DPRINTK("%s: no match due to xenbus_read accel error %d\n",
|
|
+ DPRINTK("%s: no match due to xenbus_read accel error %ld\n",
|
|
__FUNCTION__, PTR_ERR(eth_name));
|
|
return 0;
|
|
} else {
|
|
--- head-2010-05-25.orig/drivers/xen/netback/common.h 2010-02-24 13:13:46.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/netback/common.h 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -203,7 +203,7 @@ void netif_deschedule_work(netif_t *neti
|
|
|
|
int netif_be_start_xmit(struct sk_buff *skb, struct net_device *dev);
|
|
struct net_device_stats *netif_be_get_stats(struct net_device *dev);
|
|
-irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs);
|
|
+irqreturn_t netif_be_int(int irq, void *dev_id);
|
|
|
|
static inline int netbk_can_queue(struct net_device *dev)
|
|
{
|
|
--- head-2010-05-25.orig/drivers/xen/netback/loopback.c 2007-08-06 15:10:49.000000000 +0200
|
|
+++ head-2010-05-25/drivers/xen/netback/loopback.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -151,7 +151,7 @@ static int loopback_start_xmit(struct sk
|
|
np->stats.rx_bytes += skb->len;
|
|
np->stats.rx_packets++;
|
|
|
|
- if (skb->ip_summed == CHECKSUM_HW) {
|
|
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
|
|
/* Defer checksum calculation. */
|
|
skb->proto_csum_blank = 1;
|
|
/* Must be a local packet: assert its integrity. */
|
|
--- head-2010-05-25.orig/drivers/xen/netback/netback.c 2010-01-04 11:56:34.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/netback/netback.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -711,7 +711,7 @@ static void net_rx_action(unsigned long
|
|
id = meta[npo.meta_cons].id;
|
|
flags = nr_frags ? NETRXF_more_data : 0;
|
|
|
|
- if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
|
|
+ if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
|
|
flags |= NETRXF_csum_blank | NETRXF_data_validated;
|
|
else if (skb->proto_data_valid) /* remote but checksummed? */
|
|
flags |= NETRXF_data_validated;
|
|
@@ -1518,7 +1518,7 @@ static void netif_page_release(struct pa
|
|
netif_idx_release(idx);
|
|
}
|
|
|
|
-irqreturn_t netif_be_int(int irq, void *dev_id, struct pt_regs *regs)
|
|
+irqreturn_t netif_be_int(int irq, void *dev_id)
|
|
{
|
|
netif_t *netif = dev_id;
|
|
|
|
@@ -1585,7 +1585,7 @@ static netif_rx_response_t *make_rx_resp
|
|
}
|
|
|
|
#ifdef NETBE_DEBUG_INTERRUPT
|
|
-static irqreturn_t netif_be_dbg(int irq, void *dev_id, struct pt_regs *regs)
|
|
+static irqreturn_t netif_be_dbg(int irq, void *dev_id)
|
|
{
|
|
struct list_head *ent;
|
|
netif_t *netif;
|
|
--- head-2010-05-25.orig/drivers/xen/netfront/netfront.c 2009-04-07 13:58:48.000000000 +0200
|
|
+++ head-2010-05-25/drivers/xen/netfront/netfront.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -136,7 +136,7 @@ static inline int netif_needs_gso(struct
|
|
{
|
|
return skb_is_gso(skb) &&
|
|
(!skb_gso_ok(skb, dev->features) ||
|
|
- unlikely(skb->ip_summed != CHECKSUM_HW));
|
|
+ unlikely(skb->ip_summed != CHECKSUM_PARTIAL));
|
|
}
|
|
#else
|
|
#define HAVE_GSO 0
|
|
@@ -222,7 +222,7 @@ static void network_tx_buf_gc(struct net
|
|
static void network_alloc_rx_buffers(struct net_device *);
|
|
static void send_fake_arp(struct net_device *);
|
|
|
|
-static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs);
|
|
+static irqreturn_t netif_int(int irq, void *dev_id);
|
|
|
|
#ifdef CONFIG_SYSFS
|
|
static int xennet_sysfs_addif(struct net_device *netdev);
|
|
@@ -992,7 +992,7 @@ static int network_start_xmit(struct sk_
|
|
tx->flags = 0;
|
|
extra = NULL;
|
|
|
|
- if (skb->ip_summed == CHECKSUM_HW) /* local packet? */
|
|
+ if (skb->ip_summed == CHECKSUM_PARTIAL) /* local packet? */
|
|
tx->flags |= NETTXF_csum_blank | NETTXF_data_validated;
|
|
#ifdef CONFIG_XEN
|
|
if (skb->proto_data_valid) /* remote but checksummed? */
|
|
@@ -1049,7 +1049,7 @@ static int network_start_xmit(struct sk_
|
|
return 0;
|
|
}
|
|
|
|
-static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs)
|
|
+static irqreturn_t netif_int(int irq, void *dev_id)
|
|
{
|
|
struct net_device *dev = dev_id;
|
|
struct netfront_info *np = netdev_priv(dev);
|
|
--- head-2010-05-25.orig/drivers/xen/pciback/pciback.h 2009-03-18 10:39:32.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/pciback/pciback.h 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -99,7 +99,7 @@ int pciback_publish_pci_roots(struct pci
|
|
void pciback_release_devices(struct pciback_device *pdev);
|
|
|
|
/* Handles events from front-end */
|
|
-irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs);
|
|
+irqreturn_t pciback_handle_event(int irq, void *dev_id);
|
|
void pciback_do_op(void *data);
|
|
|
|
int pciback_xenbus_register(void);
|
|
--- head-2010-05-25.orig/drivers/xen/pciback/pciback_ops.c 2009-03-18 10:39:32.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/pciback/pciback_ops.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -124,7 +124,7 @@ void pciback_do_op(void *data)
|
|
test_and_schedule_op(pdev);
|
|
}
|
|
|
|
-irqreturn_t pciback_handle_event(int irq, void *dev_id, struct pt_regs *regs)
|
|
+irqreturn_t pciback_handle_event(int irq, void *dev_id)
|
|
{
|
|
struct pciback_device *pdev = dev_id;
|
|
|
|
--- head-2010-05-25.orig/drivers/xen/pcifront/pcifront.h 2009-03-18 10:39:32.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/pcifront/pcifront.h 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -50,6 +50,6 @@ void pcifront_free_roots(struct pcifront
|
|
|
|
void pcifront_do_aer( void *data);
|
|
|
|
-irqreturn_t pcifront_handler_aer(int irq, void *dev, struct pt_regs *regs);
|
|
+irqreturn_t pcifront_handler_aer(int irq, void *dev);
|
|
|
|
#endif /* __XEN_PCIFRONT_H__ */
|
|
--- head-2010-05-25.orig/drivers/xen/pcifront/pci_op.c 2009-03-18 10:39:32.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/pcifront/pci_op.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -530,10 +530,16 @@ int __devinit pcifront_rescan_root(struc
|
|
|
|
d = pci_scan_single_device(b, devfn);
|
|
if (d) {
|
|
+ int err;
|
|
+
|
|
dev_info(&pdev->xdev->dev, "New device on "
|
|
"%04x:%02x:%02x.%02x found.\n", domain, bus,
|
|
PCI_SLOT(devfn), PCI_FUNC(devfn));
|
|
- pci_bus_add_device(d);
|
|
+ err = pci_bus_add_device(d);
|
|
+ if (err)
|
|
+ dev_err(&pdev->xdev->dev,
|
|
+ "error %d adding device, continuing.\n",
|
|
+ err);
|
|
}
|
|
}
|
|
|
|
@@ -658,7 +664,7 @@ void pcifront_do_aer(void *data)
|
|
|
|
}
|
|
|
|
-irqreturn_t pcifront_handler_aer(int irq, void *dev, struct pt_regs *regs)
|
|
+irqreturn_t pcifront_handler_aer(int irq, void *dev)
|
|
{
|
|
struct pcifront_device *pdev = dev;
|
|
schedule_pcifront_aer_op(pdev);
|
|
--- head-2010-05-25.orig/drivers/xen/privcmd/compat_privcmd.c 2010-01-27 14:01:48.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/privcmd/compat_privcmd.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -18,7 +18,6 @@
|
|
* Authors: Jimi Xenidis <jimix@watson.ibm.com>
|
|
*/
|
|
|
|
-#include <linux/config.h>
|
|
#include <linux/compat.h>
|
|
#include <linux/ioctl.h>
|
|
#include <linux/syscalls.h>
|
|
--- head-2010-05-25.orig/drivers/xen/privcmd/privcmd.c 2010-01-27 14:01:48.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/privcmd/privcmd.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -71,43 +71,16 @@ static long privcmd_ioctl(struct file *f
|
|
if (copy_from_user(&hypercall, udata, sizeof(hypercall)))
|
|
return -EFAULT;
|
|
|
|
+#ifdef CONFIG_X86
|
|
ret = -ENOSYS;
|
|
-#if defined(__i386__)
|
|
if (hypercall.op >= (PAGE_SIZE >> 5))
|
|
break;
|
|
- __asm__ __volatile__ (
|
|
- "pushl %%ebx; pushl %%ecx; pushl %%edx; "
|
|
- "pushl %%esi; pushl %%edi; "
|
|
- "movl 8(%%eax),%%ebx ;"
|
|
- "movl 16(%%eax),%%ecx ;"
|
|
- "movl 24(%%eax),%%edx ;"
|
|
- "movl 32(%%eax),%%esi ;"
|
|
- "movl 40(%%eax),%%edi ;"
|
|
- "movl (%%eax),%%eax ;"
|
|
- "shll $5,%%eax ;"
|
|
- "addl $hypercall_page,%%eax ;"
|
|
- "call *%%eax ;"
|
|
- "popl %%edi; popl %%esi; popl %%edx; "
|
|
- "popl %%ecx; popl %%ebx"
|
|
- : "=a" (ret) : "0" (&hypercall) : "memory" );
|
|
-#elif defined (__x86_64__)
|
|
- if (hypercall.op < (PAGE_SIZE >> 5)) {
|
|
- long ign1, ign2, ign3;
|
|
- __asm__ __volatile__ (
|
|
- "movq %8,%%r10; movq %9,%%r8;"
|
|
- "shll $5,%%eax ;"
|
|
- "addq $hypercall_page,%%rax ;"
|
|
- "call *%%rax"
|
|
- : "=a" (ret), "=D" (ign1),
|
|
- "=S" (ign2), "=d" (ign3)
|
|
- : "0" ((unsigned int)hypercall.op),
|
|
- "1" (hypercall.arg[0]),
|
|
- "2" (hypercall.arg[1]),
|
|
- "3" (hypercall.arg[2]),
|
|
- "g" (hypercall.arg[3]),
|
|
- "g" (hypercall.arg[4])
|
|
- : "r8", "r10", "memory" );
|
|
- }
|
|
+ ret = _hypercall(long, (unsigned int)hypercall.op,
|
|
+ (unsigned long)hypercall.arg[0],
|
|
+ (unsigned long)hypercall.arg[1],
|
|
+ (unsigned long)hypercall.arg[2],
|
|
+ (unsigned long)hypercall.arg[3],
|
|
+ (unsigned long)hypercall.arg[4]);
|
|
#else
|
|
ret = privcmd_hypercall(&hypercall);
|
|
#endif
|
|
@@ -446,7 +419,7 @@ static int privcmd_mmap(struct file * fi
|
|
return -ENOSYS;
|
|
|
|
/* DONTCOPY is essential for Xen as copy_page_range is broken. */
|
|
- vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY;
|
|
+ vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTCOPY;
|
|
vma->vm_ops = &privcmd_vm_ops;
|
|
vma->vm_private_data = NULL;
|
|
|
|
--- head-2010-05-25.orig/drivers/xen/scsiback/common.h 2009-03-18 10:39:32.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/scsiback/common.h 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -147,7 +147,7 @@ typedef struct {
|
|
|
|
#define VSCSI_TYPE_HOST 1
|
|
|
|
-irqreturn_t scsiback_intr(int, void *, struct pt_regs *);
|
|
+irqreturn_t scsiback_intr(int, void *);
|
|
int scsiback_init_sring(struct vscsibk_info *info,
|
|
unsigned long ring_ref, unsigned int evtchn);
|
|
int scsiback_schedule(void *data);
|
|
--- head-2010-05-25.orig/drivers/xen/scsiback/scsiback.c 2010-01-04 11:56:34.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/scsiback/scsiback.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -467,7 +467,7 @@ void scsiback_cmd_exec(pending_req_t *pe
|
|
write = (data_dir == DMA_TO_DEVICE);
|
|
rq = blk_get_request(pending_req->sdev->request_queue, write, GFP_KERNEL);
|
|
|
|
- rq->flags |= REQ_BLOCK_PC;
|
|
+ rq->cmd_type = REQ_TYPE_BLOCK_PC;
|
|
rq->cmd_len = cmd_len;
|
|
memcpy(rq->cmd, pending_req->cmnd, cmd_len);
|
|
|
|
@@ -511,7 +511,7 @@ static void scsiback_device_reset_exec(p
|
|
}
|
|
|
|
|
|
-irqreturn_t scsiback_intr(int irq, void *dev_id, struct pt_regs *regs)
|
|
+irqreturn_t scsiback_intr(int irq, void *dev_id)
|
|
{
|
|
scsiback_notify_work((struct vscsibk_info *)dev_id);
|
|
return IRQ_HANDLED;
|
|
--- head-2010-05-25.orig/drivers/xen/scsifront/common.h 2010-02-24 13:13:46.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/scsifront/common.h 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -128,7 +128,7 @@ struct vscsifrnt_info {
|
|
int scsifront_xenbus_init(void);
|
|
void scsifront_xenbus_unregister(void);
|
|
int scsifront_schedule(void *data);
|
|
-irqreturn_t scsifront_intr(int irq, void *dev_id, struct pt_regs *ptregs);
|
|
+irqreturn_t scsifront_intr(int irq, void *dev_id);
|
|
int scsifront_cmd_done(struct vscsifrnt_info *info);
|
|
|
|
|
|
--- head-2010-05-25.orig/drivers/xen/scsifront/scsifront.c 2008-07-21 11:00:33.000000000 +0200
|
|
+++ head-2010-05-25/drivers/xen/scsifront/scsifront.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -100,7 +100,7 @@ static void scsifront_do_request(struct
|
|
notify_remote_via_irq(irq);
|
|
}
|
|
|
|
-irqreturn_t scsifront_intr(int irq, void *dev_id, struct pt_regs *ptregs)
|
|
+irqreturn_t scsifront_intr(int irq, void *dev_id)
|
|
{
|
|
scsifront_notify_work((struct vscsifrnt_info *)dev_id);
|
|
return IRQ_HANDLED;
|
|
--- head-2010-05-25.orig/drivers/xen/sfc_netback/accel_xenbus.c 2010-01-04 11:56:34.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/sfc_netback/accel_xenbus.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -69,8 +69,7 @@ static void unlink_bend(struct netback_a
|
|
|
|
|
|
/* Demultiplex a message IRQ from the frontend driver. */
|
|
-static irqreturn_t msgirq_from_frontend(int irq, void *context,
|
|
- struct pt_regs *unused)
|
|
+static irqreturn_t msgirq_from_frontend(int irq, void *context)
|
|
{
|
|
struct xenbus_device *dev = context;
|
|
struct netback_accel *bend = NETBACK_ACCEL_FROM_XENBUS_DEVICE(dev);
|
|
@@ -85,8 +84,7 @@ static irqreturn_t msgirq_from_frontend(
|
|
* functionally, but we need it to pass to the bind function, and may
|
|
* get called spuriously
|
|
*/
|
|
-static irqreturn_t netirq_from_frontend(int irq, void *context,
|
|
- struct pt_regs *unused)
|
|
+static irqreturn_t netirq_from_frontend(int irq, void *context)
|
|
{
|
|
VPRINTK("netirq %d from device %s\n", irq,
|
|
((struct xenbus_device *)context)->nodename);
|
|
--- head-2010-05-25.orig/drivers/xen/sfc_netfront/accel.h 2009-04-07 13:58:48.000000000 +0200
|
|
+++ head-2010-05-25/drivers/xen/sfc_netfront/accel.h 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -467,10 +467,8 @@ void netfront_accel_msg_tx_fastpath(netf
|
|
u32 ip, u16 port, u8 protocol);
|
|
|
|
/* Process an IRQ received from back end driver */
|
|
-irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context,
|
|
- struct pt_regs *unused);
|
|
-irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context,
|
|
- struct pt_regs *unused);
|
|
+irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context);
|
|
+irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context);
|
|
|
|
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20)
|
|
extern void netfront_accel_msg_from_bend(struct work_struct *context);
|
|
--- head-2010-05-25.orig/drivers/xen/sfc_netfront/accel_msg.c 2009-04-07 13:58:48.000000000 +0200
|
|
+++ head-2010-05-25/drivers/xen/sfc_netfront/accel_msg.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -488,8 +488,7 @@ void netfront_accel_msg_from_bend(void *
|
|
}
|
|
|
|
|
|
-irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context,
|
|
- struct pt_regs *unused)
|
|
+irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context)
|
|
{
|
|
netfront_accel_vnic *vnic = (netfront_accel_vnic *)context;
|
|
VPRINTK("irq %d from device %s\n", irq, vnic->dev->nodename);
|
|
@@ -500,8 +499,7 @@ irqreturn_t netfront_accel_msg_channel_i
|
|
}
|
|
|
|
/* Process an interrupt received from the NIC via backend */
|
|
-irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context,
|
|
- struct pt_regs *unused)
|
|
+irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context)
|
|
{
|
|
netfront_accel_vnic *vnic = (netfront_accel_vnic *)context;
|
|
struct net_device *net_dev = vnic->net_dev;
|
|
--- head-2010-05-25.orig/drivers/xen/sfc_netfront/accel_tso.c 2008-02-26 10:54:12.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/sfc_netfront/accel_tso.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -363,7 +363,7 @@ int netfront_accel_enqueue_skb_tso(netfr
|
|
|
|
tso_check_safe(skb);
|
|
|
|
- if (skb->ip_summed != CHECKSUM_HW)
|
|
+ if (skb->ip_summed != CHECKSUM_PARTIAL)
|
|
EPRINTK("Trying to TSO send a packet without HW checksum\n");
|
|
|
|
tso_start(&state, skb);
|
|
--- head-2010-05-25.orig/drivers/xen/sfc_netfront/accel_vi.c 2010-01-18 15:23:12.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/sfc_netfront/accel_vi.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -463,7 +463,7 @@ netfront_accel_enqueue_skb_multi(netfron
|
|
|
|
frag_i = -1;
|
|
|
|
- if (skb->ip_summed == CHECKSUM_HW) {
|
|
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
|
|
/* Set to zero to encourage falcon to work it out for us */
|
|
*(u16*)(skb->h.raw + skb->csum) = 0;
|
|
}
|
|
@@ -582,7 +582,7 @@ netfront_accel_enqueue_skb_single(netfro
|
|
|
|
kva = buf->pkt_kva;
|
|
|
|
- if (skb->ip_summed == CHECKSUM_HW) {
|
|
+ if (skb->ip_summed == CHECKSUM_PARTIAL) {
|
|
/* Set to zero to encourage falcon to work it out for us */
|
|
*(u16*)(skb->h.raw + skb->csum) = 0;
|
|
}
|
|
--- head-2010-05-25.orig/drivers/xen/tpmback/common.h 2007-06-12 13:13:45.000000000 +0200
|
|
+++ head-2010-05-25/drivers/xen/tpmback/common.h 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -61,7 +61,7 @@ void tpmif_deschedule_work(tpmif_t * tpm
|
|
void tpmif_xenbus_init(void);
|
|
void tpmif_xenbus_exit(void);
|
|
int tpmif_map(tpmif_t *tpmif, unsigned long shared_page, unsigned int evtchn);
|
|
-irqreturn_t tpmif_be_int(int irq, void *dev_id, struct pt_regs *regs);
|
|
+irqreturn_t tpmif_be_int(int irq, void *dev_id);
|
|
|
|
long int tpmback_get_instance(struct backend_info *bi);
|
|
|
|
--- head-2010-05-25.orig/drivers/xen/tpmback/tpmback.c 2010-01-04 11:56:34.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/tpmback/tpmback.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -507,7 +507,7 @@ static ssize_t vtpm_op_read(struct file
|
|
list_del(&pak->next);
|
|
write_unlock_irqrestore(&dataex.pak_lock, flags);
|
|
|
|
- DPRINTK("size given by app: %d, available: %d\n", size, left);
|
|
+ DPRINTK("size given by app: %zu, available: %u\n", size, left);
|
|
|
|
ret_size = min_t(size_t, size, left);
|
|
|
|
@@ -904,7 +904,7 @@ static void tpm_tx_action(unsigned long
|
|
}
|
|
}
|
|
|
|
-irqreturn_t tpmif_be_int(int irq, void *dev_id, struct pt_regs *regs)
|
|
+irqreturn_t tpmif_be_int(int irq, void *dev_id)
|
|
{
|
|
tpmif_t *tpmif = (tpmif_t *) dev_id;
|
|
|
|
--- head-2010-05-25.orig/drivers/xen/usbback/usbback.c 2010-01-04 11:56:34.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/usbback/usbback.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -288,7 +288,7 @@ static void usbbk_notify_work(usbif_t *u
|
|
wake_up(&usbif->wq);
|
|
}
|
|
|
|
-irqreturn_t usbbk_be_int(int irq, void *dev_id, struct pt_regs *regs)
|
|
+irqreturn_t usbbk_be_int(int irq, void *dev_id)
|
|
{
|
|
usbbk_notify_work(dev_id);
|
|
return IRQ_HANDLED;
|
|
@@ -318,7 +318,7 @@ static void usbbk_do_response(pending_re
|
|
notify_remote_via_irq(usbif->irq);
|
|
}
|
|
|
|
-static void usbbk_urb_complete(struct urb *urb, struct pt_regs *regs)
|
|
+static void usbbk_urb_complete(struct urb *urb)
|
|
{
|
|
pending_req_t *pending_req = (pending_req_t *)urb->context;
|
|
|
|
--- head-2010-05-25.orig/drivers/xen/usbback/usbback.h 2009-11-06 10:23:23.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/usbback/usbback.h 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -158,7 +158,7 @@ int portid_add(const char *busid,
|
|
int portid_remove(const domid_t domid,
|
|
const unsigned int handle,
|
|
const int portnum);
|
|
-irqreturn_t usbbk_be_int(int irq, void *dev_id, struct pt_regs *regs);
|
|
+irqreturn_t usbbk_be_int(int irq, void *dev_id);
|
|
int usbbk_schedule(void *arg);
|
|
struct usbstub *find_attached_device(usbif_t *usbif, int port);
|
|
void usbbk_attach_device(usbif_t *usbif, struct usbstub *stub);
|
|
--- head-2010-05-25.orig/drivers/xen/usbback/usbstub.c 2009-11-06 10:23:23.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/usbback/usbstub.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -284,7 +284,7 @@ static ssize_t usbstub_show_portids(stru
|
|
DRIVER_ATTR(port_ids, S_IRUSR, usbstub_show_portids, NULL);
|
|
|
|
/* table of devices that matches any usbdevice */
|
|
-static struct usb_device_id usbstub_table[] = {
|
|
+static const struct usb_device_id usbstub_table[] = {
|
|
{ .driver_info = 1 }, /* wildcard, see usb_match_id() */
|
|
{ } /* Terminating entry */
|
|
};
|
|
@@ -308,7 +308,7 @@ int __init usbstub_init(void)
|
|
goto out;
|
|
}
|
|
|
|
- err = driver_create_file(&usbback_usb_driver.driver,
|
|
+ err = driver_create_file(&usbback_usb_driver.drvwrap.driver,
|
|
&driver_attr_port_ids);
|
|
if (err)
|
|
usb_deregister(&usbback_usb_driver);
|
|
@@ -319,7 +319,7 @@ out:
|
|
|
|
void usbstub_exit(void)
|
|
{
|
|
- driver_remove_file(&usbback_usb_driver.driver,
|
|
+ driver_remove_file(&usbback_usb_driver.drvwrap.driver,
|
|
&driver_attr_port_ids);
|
|
usb_deregister(&usbback_usb_driver);
|
|
}
|
|
--- head-2010-05-25.orig/drivers/xen/usbfront/usbfront.h 2009-10-15 11:45:41.000000000 +0200
|
|
+++ head-2010-05-25/drivers/xen/usbfront/usbfront.h 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -195,7 +195,7 @@ timer_action(struct usbfront_info *info,
|
|
extern struct kmem_cache *xenhcd_urbp_cachep;
|
|
extern struct hc_driver xen_usb20_hc_driver;
|
|
extern struct hc_driver xen_usb11_hc_driver;
|
|
-irqreturn_t xenhcd_int(int irq, void *dev_id, struct pt_regs *ptregs);
|
|
+irqreturn_t xenhcd_int(int irq, void *dev_id);
|
|
void xenhcd_rhport_state_change(struct usbfront_info *info,
|
|
int port, enum usb_device_speed speed);
|
|
int xenhcd_schedule(void *arg);
|
|
--- head-2010-05-25.orig/drivers/xen/usbfront/usbfront-dbg.c 2009-10-15 11:45:41.000000000 +0200
|
|
+++ head-2010-05-25/drivers/xen/usbfront/usbfront-dbg.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -90,7 +90,9 @@ static CLASS_DEVICE_ATTR(statistics, S_I
|
|
static inline void create_debug_file(struct usbfront_info *info)
|
|
{
|
|
struct class_device *cldev = info_to_hcd(info)->self.class_dev;
|
|
- class_device_create_file(cldev, &class_device_attr_statistics);
|
|
+ if (class_device_create_file(cldev, &class_device_attr_statistics))
|
|
+ printk(KERN_WARNING "statistics file not created for %s\n",
|
|
+ info_to_hcd(info)->self.bus_name);
|
|
}
|
|
|
|
static inline void remove_debug_file(struct usbfront_info *info)
|
|
--- head-2010-05-25.orig/drivers/xen/usbfront/usbfront-q.c 2009-10-15 11:45:41.000000000 +0200
|
|
+++ head-2010-05-25/drivers/xen/usbfront/usbfront-q.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -236,7 +236,7 @@ __acquires(info->lock)
|
|
COUNT(info->stats.complete);
|
|
}
|
|
spin_unlock(&info->lock);
|
|
- usb_hcd_giveback_urb(info_to_hcd(info), urb, NULL);
|
|
+ usb_hcd_giveback_urb(info_to_hcd(info), urb);
|
|
spin_lock(&info->lock);
|
|
}
|
|
|
|
@@ -534,7 +534,7 @@ static void xenhcd_notify_work(struct us
|
|
wake_up(&info->wq);
|
|
}
|
|
|
|
-irqreturn_t xenhcd_int(int irq, void *dev_id, struct pt_regs *ptregs)
|
|
+irqreturn_t xenhcd_int(int irq, void *dev_id)
|
|
{
|
|
xenhcd_notify_work((struct usbfront_info *) dev_id);
|
|
return IRQ_HANDLED;
|
|
--- head-2010-05-25.orig/drivers/xen/xenbus/xenbus_comms.c 2010-01-19 16:01:03.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/xenbus/xenbus_comms.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -54,7 +54,7 @@ static DECLARE_WORK(probe_work, xenbus_p
|
|
|
|
static DECLARE_WAIT_QUEUE_HEAD(xb_waitq);
|
|
|
|
-static irqreturn_t wake_waiting(int irq, void *unused, struct pt_regs *regs)
|
|
+static irqreturn_t wake_waiting(int irq, void *unused)
|
|
{
|
|
int old, new;
|
|
|
|
--- head-2010-05-25.orig/drivers/xen/xenoprof/xenoprofile.c 2010-01-07 09:38:29.000000000 +0100
|
|
+++ head-2010-05-25/drivers/xen/xenoprof/xenoprofile.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -194,8 +194,7 @@ done:
|
|
oprofile_add_domain_switch(COORDINATOR_DOMAIN);
|
|
}
|
|
|
|
-static irqreturn_t
|
|
-xenoprof_ovf_interrupt(int irq, void * dev_id, struct pt_regs * regs)
|
|
+static irqreturn_t xenoprof_ovf_interrupt(int irq, void *dev_id)
|
|
{
|
|
struct xenoprof_buf * buf;
|
|
static unsigned long flag;
|
|
--- head-2010-05-25.orig/include/asm-generic/pgtable.h 2010-03-24 14:53:41.000000000 +0100
|
|
+++ head-2010-05-25/include/asm-generic/pgtable.h 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -100,7 +100,7 @@ static inline void ptep_set_wrprotect(st
|
|
#endif
|
|
|
|
#ifndef arch_change_pte_range
|
|
-#define arch_change_pte_range(mm, pmd, addr, end, newprot) 0
|
|
+#define arch_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) 0
|
|
#endif
|
|
|
|
#ifndef __HAVE_ARCH_PTE_SAME
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/desc_32.h 2008-01-28 12:24:19.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/desc_32.h 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -32,52 +32,110 @@ static inline struct desc_struct *get_cp
|
|
return (struct desc_struct *)per_cpu(cpu_gdt_descr, cpu).address;
|
|
}
|
|
|
|
+/*
|
|
+ * This is the ldt that every process will get unless we need
|
|
+ * something other than this.
|
|
+ */
|
|
+extern struct desc_struct default_ldt[];
|
|
+extern struct desc_struct idt_table[];
|
|
+extern void set_intr_gate(unsigned int irq, void * addr);
|
|
+
|
|
+static inline void pack_descriptor(__u32 *a, __u32 *b,
|
|
+ unsigned long base, unsigned long limit, unsigned char type, unsigned char flags)
|
|
+{
|
|
+ *a = ((base & 0xffff) << 16) | (limit & 0xffff);
|
|
+ *b = (base & 0xff000000) | ((base & 0xff0000) >> 16) |
|
|
+ (limit & 0x000f0000) | ((type & 0xff) << 8) | ((flags & 0xf) << 20);
|
|
+}
|
|
+
|
|
+static inline void pack_gate(__u32 *a, __u32 *b,
|
|
+ unsigned long base, unsigned short seg, unsigned char type, unsigned char flags)
|
|
+{
|
|
+ *a = (seg << 16) | (base & 0xffff);
|
|
+ *b = (base & 0xffff0000) | ((type & 0xff) << 8) | (flags & 0xff);
|
|
+}
|
|
+
|
|
+#define DESCTYPE_LDT 0x82 /* present, system, DPL-0, LDT */
|
|
+#define DESCTYPE_TSS 0x89 /* present, system, DPL-0, 32-bit TSS */
|
|
+#define DESCTYPE_TASK 0x85 /* present, system, DPL-0, task gate */
|
|
+#define DESCTYPE_INT 0x8e /* present, system, DPL-0, interrupt gate */
|
|
+#define DESCTYPE_TRAP 0x8f /* present, system, DPL-0, trap gate */
|
|
+#define DESCTYPE_DPL3 0x60 /* DPL-3 */
|
|
+#define DESCTYPE_S 0x10 /* !system */
|
|
+
|
|
#define load_TR_desc() __asm__ __volatile__("ltr %w0"::"q" (GDT_ENTRY_TSS*8))
|
|
#define load_LDT_desc() __asm__ __volatile__("lldt %w0"::"q" (GDT_ENTRY_LDT*8))
|
|
|
|
#define load_gdt(dtr) __asm__ __volatile("lgdt %0"::"m" (*dtr))
|
|
#define load_idt(dtr) __asm__ __volatile("lidt %0"::"m" (*dtr))
|
|
-#define load_tr(tr) __asm__ __volatile("ltr %0"::"mr" (tr))
|
|
-#define load_ldt(ldt) __asm__ __volatile("lldt %0"::"mr" (ldt))
|
|
+#define load_tr(tr) __asm__ __volatile("ltr %0"::"m" (tr))
|
|
+#define load_ldt(ldt) __asm__ __volatile("lldt %0"::"m" (ldt))
|
|
|
|
#define store_gdt(dtr) __asm__ ("sgdt %0":"=m" (*dtr))
|
|
#define store_idt(dtr) __asm__ ("sidt %0":"=m" (*dtr))
|
|
-#define store_tr(tr) __asm__ ("str %0":"=mr" (tr))
|
|
-#define store_ldt(ldt) __asm__ ("sldt %0":"=mr" (ldt))
|
|
+#define store_tr(tr) __asm__ ("str %0":"=m" (tr))
|
|
+#define store_ldt(ldt) __asm__ ("sldt %0":"=m" (ldt))
|
|
|
|
-/*
|
|
- * This is the ldt that every process will get unless we need
|
|
- * something other than this.
|
|
- */
|
|
-extern struct desc_struct default_ldt[];
|
|
-extern void set_intr_gate(unsigned int irq, void * addr);
|
|
+#if TLS_SIZE != 24
|
|
+# error update this code.
|
|
+#endif
|
|
+
|
|
+static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
|
|
+{
|
|
+#define C(i) if (HYPERVISOR_update_descriptor(virt_to_machine(&get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]), \
|
|
+ *(u64 *)&t->tls_array[i]) \
|
|
+ BUG()
|
|
+ C(0); C(1); C(2);
|
|
+#undef C
|
|
+}
|
|
|
|
-#define _set_tssldt_desc(n,addr,limit,type) \
|
|
-__asm__ __volatile__ ("movw %w3,0(%2)\n\t" \
|
|
- "movw %w1,2(%2)\n\t" \
|
|
- "rorl $16,%1\n\t" \
|
|
- "movb %b1,4(%2)\n\t" \
|
|
- "movb %4,5(%2)\n\t" \
|
|
- "movb $0,6(%2)\n\t" \
|
|
- "movb %h1,7(%2)\n\t" \
|
|
- "rorl $16,%1" \
|
|
- : "=m"(*(n)) : "q" (addr), "r"(n), "ir"(limit), "i"(type))
|
|
+#ifndef CONFIG_XEN
|
|
+static inline void write_dt_entry(void *dt, int entry, __u32 entry_a, __u32 entry_b)
|
|
+{
|
|
+ __u32 *lp = (__u32 *)((char *)dt + entry*8);
|
|
+ *lp = entry_a;
|
|
+ *(lp+1) = entry_b;
|
|
+}
|
|
|
|
-#ifndef CONFIG_X86_NO_TSS
|
|
-static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, void *addr)
|
|
+#define write_ldt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
|
|
+#define write_gdt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
|
|
+#else
|
|
+extern int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b);
|
|
+extern int write_gdt_entry(void *gdt, int entry, __u32 entry_a, __u32 entry_b);
|
|
+#endif
|
|
+#ifndef CONFIG_X86_NO_IDT
|
|
+#define write_idt_entry(dt, entry, a, b) write_dt_entry(dt, entry, a, b)
|
|
+
|
|
+static inline void _set_gate(int gate, unsigned int type, void *addr, unsigned short seg)
|
|
{
|
|
- _set_tssldt_desc(&get_cpu_gdt_table(cpu)[entry], (int)addr,
|
|
- offsetof(struct tss_struct, __cacheline_filler) - 1, 0x89);
|
|
+ __u32 a, b;
|
|
+ pack_gate(&a, &b, (unsigned long)addr, seg, type, 0);
|
|
+ write_idt_entry(idt_table, gate, a, b);
|
|
}
|
|
+#endif
|
|
|
|
-#define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr)
|
|
+#ifndef CONFIG_X86_NO_TSS
|
|
+static inline void __set_tss_desc(unsigned int cpu, unsigned int entry, const void *addr)
|
|
+{
|
|
+ __u32 a, b;
|
|
+ pack_descriptor(&a, &b, (unsigned long)addr,
|
|
+ offsetof(struct tss_struct, __cacheline_filler) - 1,
|
|
+ DESCTYPE_TSS, 0);
|
|
+ write_gdt_entry(get_cpu_gdt_table(cpu), entry, a, b);
|
|
+}
|
|
#endif
|
|
|
|
-static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int size)
|
|
+static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int entries)
|
|
{
|
|
- _set_tssldt_desc(&get_cpu_gdt_table(cpu)[GDT_ENTRY_LDT], (int)addr, ((size << 3)-1), 0x82);
|
|
+ __u32 a, b;
|
|
+ pack_descriptor(&a, &b, (unsigned long)addr,
|
|
+ entries * sizeof(struct desc_struct) - 1,
|
|
+ DESCTYPE_LDT, 0);
|
|
+ write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_LDT, a, b);
|
|
}
|
|
|
|
+#define set_tss_desc(cpu,addr) __set_tss_desc(cpu, GDT_ENTRY_TSS, addr)
|
|
+
|
|
#define LDT_entry_a(info) \
|
|
((((info)->base_addr & 0x0000ffff) << 16) | ((info)->limit & 0x0ffff))
|
|
|
|
@@ -103,21 +161,6 @@ static inline void set_ldt_desc(unsigned
|
|
(info)->seg_not_present == 1 && \
|
|
(info)->useable == 0 )
|
|
|
|
-extern int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b);
|
|
-
|
|
-#if TLS_SIZE != 24
|
|
-# error update this code.
|
|
-#endif
|
|
-
|
|
-static inline void load_TLS(struct thread_struct *t, unsigned int cpu)
|
|
-{
|
|
-#define C(i) if (HYPERVISOR_update_descriptor(virt_to_machine(&get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i]), \
|
|
- *(u64 *)&t->tls_array[i])) \
|
|
- BUG();
|
|
- C(0); C(1); C(2);
|
|
-#undef C
|
|
-}
|
|
-
|
|
static inline void clear_LDT(void)
|
|
{
|
|
int cpu = get_cpu();
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/fixmap_32.h 2007-06-12 13:14:02.000000000 +0200
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/fixmap_32.h 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -55,7 +55,7 @@ enum fixed_addresses {
|
|
#ifdef CONFIG_X86_LOCAL_APIC
|
|
FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
|
|
#endif
|
|
-#ifdef CONFIG_X86_IO_APIC
|
|
+#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_XEN)
|
|
FIX_IO_APIC_BASE_0,
|
|
FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
|
|
#endif
|
|
@@ -95,10 +95,9 @@ enum fixed_addresses {
|
|
__end_of_fixed_addresses
|
|
};
|
|
|
|
-extern void set_fixaddr_top(unsigned long top);
|
|
-
|
|
extern void __set_fixmap(enum fixed_addresses idx,
|
|
maddr_t phys, pgprot_t flags);
|
|
+extern void reserve_top_address(unsigned long reserve);
|
|
|
|
#define set_fixmap(idx, phys) \
|
|
__set_fixmap(idx, phys, PAGE_KERNEL)
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/hypercall_32.h 2009-06-23 09:28:21.000000000 +0200
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/hypercall_32.h 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -128,6 +128,23 @@
|
|
__res; \
|
|
})
|
|
|
|
+#define _hypercall(type, op, a1, a2, a3, a4, a5) \
|
|
+({ \
|
|
+ type __res; \
|
|
+ register typeof((a1)+0) __arg1 asm("ebx") = (a1); \
|
|
+ register typeof((a2)+0) __arg2 asm("ecx") = (a2); \
|
|
+ register typeof((a3)+0) __arg3 asm("edx") = (a3); \
|
|
+ register typeof((a4)+0) __arg4 asm("esi") = (a4); \
|
|
+ register typeof((a5)+0) __arg5 asm("edi") = (a5); \
|
|
+ asm volatile ( \
|
|
+ "call *%6" \
|
|
+ : "=a" (__res), "+r" (__arg1), "+r" (__arg2), \
|
|
+ "+r" (__arg3), "+r" (__arg4), "+r" (__arg5) \
|
|
+ : "0" (hypercall_page + (op) * 32) \
|
|
+ : "memory" ); \
|
|
+ __res; \
|
|
+})
|
|
+
|
|
static inline int __must_check
|
|
HYPERVISOR_set_trap_table(
|
|
const trap_info_t *table)
|
|
@@ -140,6 +157,8 @@ HYPERVISOR_mmu_update(
|
|
mmu_update_t *req, unsigned int count, unsigned int *success_count,
|
|
domid_t domid)
|
|
{
|
|
+ if (arch_use_lazy_mmu_mode())
|
|
+ return xen_multi_mmu_update(req, count, success_count, domid);
|
|
return _hypercall4(int, mmu_update, req, count, success_count, domid);
|
|
}
|
|
|
|
@@ -148,6 +167,8 @@ HYPERVISOR_mmuext_op(
|
|
struct mmuext_op *op, unsigned int count, unsigned int *success_count,
|
|
domid_t domid)
|
|
{
|
|
+ if (arch_use_lazy_mmu_mode())
|
|
+ return xen_multi_mmuext_op(op, count, success_count, domid);
|
|
return _hypercall4(int, mmuext_op, op, count, success_count, domid);
|
|
}
|
|
|
|
@@ -238,6 +259,8 @@ static inline int __must_check
|
|
HYPERVISOR_memory_op(
|
|
unsigned int cmd, void *arg)
|
|
{
|
|
+ if (arch_use_lazy_mmu_mode())
|
|
+ xen_multicall_flush(false);
|
|
return _hypercall2(int, memory_op, cmd, arg);
|
|
}
|
|
|
|
@@ -253,6 +276,9 @@ HYPERVISOR_update_va_mapping(
|
|
unsigned long va, pte_t new_val, unsigned long flags)
|
|
{
|
|
unsigned long pte_hi = 0;
|
|
+
|
|
+ if (arch_use_lazy_mmu_mode())
|
|
+ return xen_multi_update_va_mapping(va, new_val, flags);
|
|
#ifdef CONFIG_X86_PAE
|
|
pte_hi = new_val.pte_high;
|
|
#endif
|
|
@@ -316,6 +342,8 @@ static inline int __must_check
|
|
HYPERVISOR_grant_table_op(
|
|
unsigned int cmd, void *uop, unsigned int count)
|
|
{
|
|
+ if (arch_use_lazy_mmu_mode())
|
|
+ xen_multicall_flush(false);
|
|
return _hypercall3(int, grant_table_op, cmd, uop, count);
|
|
}
|
|
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/hypercall_64.h 2009-06-23 09:28:21.000000000 +0200
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/hypercall_64.h 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -135,6 +135,23 @@
|
|
__res; \
|
|
})
|
|
|
|
+#define _hypercall(type, op, a1, a2, a3, a4, a5) \
|
|
+({ \
|
|
+ type __res; \
|
|
+ register typeof((a1)+0) __arg1 asm("rdi") = (a1); \
|
|
+ register typeof((a2)+0) __arg2 asm("rsi") = (a2); \
|
|
+ register typeof((a3)+0) __arg3 asm("rdx") = (a3); \
|
|
+ register typeof((a4)+0) __arg4 asm("r10") = (a4); \
|
|
+ register typeof((a5)+0) __arg5 asm("r8") = (a5); \
|
|
+ asm volatile ( \
|
|
+ "call *%6" \
|
|
+ : "=a" (__res), "+r" (__arg1), "+r" (__arg2), \
|
|
+ "+r" (__arg3), "+r" (__arg4), "+r" (__arg5) \
|
|
+ : "0" (hypercall_page + (op) * 32) \
|
|
+ : "memory" ); \
|
|
+ __res; \
|
|
+})
|
|
+
|
|
static inline int __must_check
|
|
HYPERVISOR_set_trap_table(
|
|
const trap_info_t *table)
|
|
@@ -147,6 +164,8 @@ HYPERVISOR_mmu_update(
|
|
mmu_update_t *req, unsigned int count, unsigned int *success_count,
|
|
domid_t domid)
|
|
{
|
|
+ if (arch_use_lazy_mmu_mode())
|
|
+ return xen_multi_mmu_update(req, count, success_count, domid);
|
|
return _hypercall4(int, mmu_update, req, count, success_count, domid);
|
|
}
|
|
|
|
@@ -155,6 +174,8 @@ HYPERVISOR_mmuext_op(
|
|
struct mmuext_op *op, unsigned int count, unsigned int *success_count,
|
|
domid_t domid)
|
|
{
|
|
+ if (arch_use_lazy_mmu_mode())
|
|
+ return xen_multi_mmuext_op(op, count, success_count, domid);
|
|
return _hypercall4(int, mmuext_op, op, count, success_count, domid);
|
|
}
|
|
|
|
@@ -248,6 +269,8 @@ static inline int __must_check
|
|
HYPERVISOR_memory_op(
|
|
unsigned int cmd, void *arg)
|
|
{
|
|
+ if (arch_use_lazy_mmu_mode())
|
|
+ xen_multicall_flush(false);
|
|
return _hypercall2(int, memory_op, cmd, arg);
|
|
}
|
|
|
|
@@ -262,6 +285,8 @@ static inline int __must_check
|
|
HYPERVISOR_update_va_mapping(
|
|
unsigned long va, pte_t new_val, unsigned long flags)
|
|
{
|
|
+ if (arch_use_lazy_mmu_mode())
|
|
+ return xen_multi_update_va_mapping(va, new_val, flags);
|
|
return _hypercall3(int, update_va_mapping, va, new_val.pte, flags);
|
|
}
|
|
|
|
@@ -321,6 +346,8 @@ static inline int __must_check
|
|
HYPERVISOR_grant_table_op(
|
|
unsigned int cmd, void *uop, unsigned int count)
|
|
{
|
|
+ if (arch_use_lazy_mmu_mode())
|
|
+ xen_multicall_flush(false);
|
|
return _hypercall3(int, grant_table_op, cmd, uop, count);
|
|
}
|
|
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/hypervisor.h 2009-07-13 14:25:35.000000000 +0200
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/hypervisor.h 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -44,6 +44,7 @@
|
|
#include <xen/interface/sched.h>
|
|
#include <xen/interface/nmi.h>
|
|
#include <xen/interface/tmem.h>
|
|
+#include <asm/percpu.h>
|
|
#include <asm/ptrace.h>
|
|
#include <asm/page.h>
|
|
#if defined(__i386__)
|
|
@@ -139,7 +140,44 @@ void scrub_pages(void *, unsigned int);
|
|
#define scrub_pages(_p,_n) ((void)0)
|
|
#endif
|
|
|
|
-#include <xen/hypercall.h>
|
|
+#if defined(CONFIG_XEN) && !defined(MODULE)
|
|
+
|
|
+DECLARE_PER_CPU(bool, xen_lazy_mmu);
|
|
+
|
|
+int xen_multicall_flush(bool);
|
|
+
|
|
+int __must_check xen_multi_update_va_mapping(unsigned long va, pte_t,
|
|
+ unsigned long flags);
|
|
+int __must_check xen_multi_mmu_update(mmu_update_t *, unsigned int count,
|
|
+ unsigned int *success_count, domid_t);
|
|
+int __must_check xen_multi_mmuext_op(struct mmuext_op *, unsigned int count,
|
|
+ unsigned int *success_count, domid_t);
|
|
+
|
|
+#define __HAVE_ARCH_ENTER_LAZY_MMU_MODE
|
|
+static inline void arch_enter_lazy_mmu_mode(void)
|
|
+{
|
|
+ __get_cpu_var(xen_lazy_mmu) = true;
|
|
+}
|
|
+
|
|
+static inline void arch_leave_lazy_mmu_mode(void)
|
|
+{
|
|
+ __get_cpu_var(xen_lazy_mmu) = false;
|
|
+ xen_multicall_flush(false);
|
|
+}
|
|
+
|
|
+#ifndef arch_use_lazy_mmu_mode
|
|
+#define arch_use_lazy_mmu_mode() unlikely(__get_cpu_var(xen_lazy_mmu))
|
|
+#endif
|
|
+
|
|
+#else /* !CONFIG_XEN || MODULE */
|
|
+
|
|
+static inline void xen_multicall_flush(bool ignore) {}
|
|
+#define arch_use_lazy_mmu_mode() false
|
|
+#define xen_multi_update_va_mapping(...) ({ BUG(); -ENOSYS; })
|
|
+#define xen_multi_mmu_update(...) ({ BUG(); -ENOSYS; })
|
|
+#define xen_multi_mmuext_op(...) ({ BUG(); -ENOSYS; })
|
|
+
|
|
+#endif /* CONFIG_XEN && !MODULE */
|
|
|
|
#if defined(CONFIG_X86_64)
|
|
#define MULTI_UVMFLAGS_INDEX 2
|
|
@@ -151,11 +189,15 @@ void scrub_pages(void *, unsigned int);
|
|
|
|
#ifdef CONFIG_XEN
|
|
#define is_running_on_xen() 1
|
|
+extern char hypercall_page[PAGE_SIZE];
|
|
#else
|
|
extern char *hypercall_stubs;
|
|
+#define hypercall_page hypercall_stubs
|
|
#define is_running_on_xen() (!!hypercall_stubs)
|
|
#endif
|
|
|
|
+#include <xen/hypercall.h>
|
|
+
|
|
static inline int
|
|
HYPERVISOR_yield(
|
|
void)
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/pgtable-3level.h 2008-04-02 12:34:02.000000000 +0200
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/pgtable-3level.h 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -53,7 +53,6 @@ static inline int pte_exec_kernel(pte_t
|
|
* not possible, use pte_get_and_clear to obtain the old pte
|
|
* value and then use set_pte to update it. -ben
|
|
*/
|
|
-#define __HAVE_ARCH_SET_PTE_ATOMIC
|
|
|
|
static inline void set_pte(pte_t *ptep, pte_t pte)
|
|
{
|
|
@@ -70,14 +69,6 @@ static inline void set_pte(pte_t *ptep,
|
|
set_pte((ptep), (pteval)); \
|
|
} while (0)
|
|
|
|
-#define set_pte_at_sync(_mm,addr,ptep,pteval) do { \
|
|
- if (((_mm) != current->mm && (_mm) != &init_mm) || \
|
|
- HYPERVISOR_update_va_mapping((addr), (pteval), UVMF_INVLPG)) { \
|
|
- set_pte((ptep), (pteval)); \
|
|
- xen_invlpg((addr)); \
|
|
- } \
|
|
-} while (0)
|
|
-
|
|
#define set_pmd(pmdptr,pmdval) \
|
|
xen_l2_entry_update((pmdptr), (pmdval))
|
|
#define set_pud(pudptr,pudval) \
|
|
@@ -94,7 +85,7 @@ static inline void pud_clear (pud_t * pu
|
|
#define pud_page(pud) \
|
|
((struct page *) __va(pud_val(pud) & PAGE_MASK))
|
|
|
|
-#define pud_page_kernel(pud) \
|
|
+#define pud_page_vaddr(pud) \
|
|
((unsigned long) __va(pud_val(pud) & PAGE_MASK))
|
|
|
|
|
|
@@ -124,6 +115,7 @@ static inline void pte_clear(struct mm_s
|
|
|
|
#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
|
|
|
|
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
|
|
static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
|
|
{
|
|
pte_t pte = *ptep;
|
|
@@ -142,6 +134,7 @@ static inline pte_t ptep_get_and_clear(s
|
|
return pte;
|
|
}
|
|
|
|
+#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
|
|
#define ptep_clear_flush(vma, addr, ptep) \
|
|
({ \
|
|
pte_t *__ptep = (ptep); \
|
|
@@ -159,6 +152,7 @@ static inline pte_t ptep_get_and_clear(s
|
|
__res; \
|
|
})
|
|
|
|
+#define __HAVE_ARCH_PTE_SAME
|
|
static inline int pte_same(pte_t a, pte_t b)
|
|
{
|
|
return a.pte_low == b.pte_low && a.pte_high == b.pte_high;
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/pgtable_32.h 2009-03-18 10:39:32.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/pgtable_32.h 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -260,31 +260,89 @@ static inline pte_t pte_mkhuge(pte_t pte
|
|
# include <asm/pgtable-2level.h>
|
|
#endif
|
|
|
|
-#define ptep_test_and_clear_dirty(vma, addr, ptep) \
|
|
+/*
|
|
+ * Rules for using pte_update - it must be called after any PTE update which
|
|
+ * has not been done using the set_pte / clear_pte interfaces. It is used by
|
|
+ * shadow mode hypervisors to resynchronize the shadow page tables. Kernel PTE
|
|
+ * updates should either be sets, clears, or set_pte_atomic for P->P
|
|
+ * transitions, which means this hook should only be called for user PTEs.
|
|
+ * This hook implies a P->P protection or access change has taken place, which
|
|
+ * requires a subsequent TLB flush. The notification can optionally be delayed
|
|
+ * until the TLB flush event by using the pte_update_defer form of the
|
|
+ * interface, but care must be taken to assure that the flush happens while
|
|
+ * still holding the same page table lock so that the shadow and primary pages
|
|
+ * do not become out of sync on SMP.
|
|
+ */
|
|
+#define pte_update(mm, addr, ptep) do { } while (0)
|
|
+#define pte_update_defer(mm, addr, ptep) do { } while (0)
|
|
+
|
|
+
|
|
+/*
|
|
+ * We only update the dirty/accessed state if we set
|
|
+ * the dirty bit by hand in the kernel, since the hardware
|
|
+ * will do the accessed bit for us, and we don't want to
|
|
+ * race with other CPU's that might be updating the dirty
|
|
+ * bit at the same time.
|
|
+ */
|
|
+#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
|
|
+#define ptep_set_access_flags(vma, address, ptep, entry, dirty) \
|
|
+do { \
|
|
+ if (dirty) \
|
|
+ ptep_establish(vma, address, ptep, entry); \
|
|
+} while (0)
|
|
+
|
|
+/*
|
|
+ * We don't actually have these, but we want to advertise them so that
|
|
+ * we can encompass the flush here.
|
|
+ */
|
|
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
|
|
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
|
|
+
|
|
+/*
|
|
+ * Rules for using ptep_establish: the pte MUST be a user pte, and
|
|
+ * must be a present->present transition.
|
|
+ */
|
|
+#define __HAVE_ARCH_PTEP_ESTABLISH
|
|
+#define ptep_establish(vma, address, ptep, pteval) \
|
|
+do { \
|
|
+ if ( likely((vma)->vm_mm == current->mm) ) { \
|
|
+ BUG_ON(HYPERVISOR_update_va_mapping(address, \
|
|
+ pteval, \
|
|
+ (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits| \
|
|
+ UVMF_INVLPG|UVMF_MULTI)); \
|
|
+ } else { \
|
|
+ xen_l1_entry_update(ptep, pteval); \
|
|
+ flush_tlb_page(vma, address); \
|
|
+ } \
|
|
+} while (0)
|
|
+
|
|
+#define __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH
|
|
+#define ptep_clear_flush_dirty(vma, address, ptep) \
|
|
({ \
|
|
pte_t __pte = *(ptep); \
|
|
- int __ret = pte_dirty(__pte); \
|
|
- if (__ret) { \
|
|
- __pte = pte_mkclean(__pte); \
|
|
- if ((vma)->vm_mm != current->mm || \
|
|
- HYPERVISOR_update_va_mapping(addr, __pte, 0)) \
|
|
- (ptep)->pte_low = __pte.pte_low; \
|
|
- } \
|
|
- __ret; \
|
|
+ int __dirty = pte_dirty(__pte); \
|
|
+ __pte = pte_mkclean(__pte); \
|
|
+ if (test_bit(PG_pinned, &virt_to_page((vma)->vm_mm->pgd)->flags)) \
|
|
+ ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \
|
|
+ else if (__dirty) \
|
|
+ (ptep)->pte_low = __pte.pte_low; \
|
|
+ __dirty; \
|
|
})
|
|
|
|
-#define ptep_test_and_clear_young(vma, addr, ptep) \
|
|
+#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
|
|
+#define ptep_clear_flush_young(vma, address, ptep) \
|
|
({ \
|
|
pte_t __pte = *(ptep); \
|
|
- int __ret = pte_young(__pte); \
|
|
- if (__ret) \
|
|
- __pte = pte_mkold(__pte); \
|
|
- if ((vma)->vm_mm != current->mm || \
|
|
- HYPERVISOR_update_va_mapping(addr, __pte, 0)) \
|
|
- (ptep)->pte_low = __pte.pte_low; \
|
|
- __ret; \
|
|
+ int __young = pte_young(__pte); \
|
|
+ __pte = pte_mkold(__pte); \
|
|
+ if (test_bit(PG_pinned, &virt_to_page((vma)->vm_mm->pgd)->flags)) \
|
|
+ ptep_set_access_flags(vma, address, ptep, __pte, __young); \
|
|
+ else if (__young) \
|
|
+ (ptep)->pte_low = __pte.pte_low; \
|
|
+ __young; \
|
|
})
|
|
|
|
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
|
|
#define ptep_get_and_clear_full(mm, addr, ptep, full) \
|
|
((full) ? ({ \
|
|
pte_t __res = *(ptep); \
|
|
@@ -296,6 +354,7 @@ static inline pte_t pte_mkhuge(pte_t pte
|
|
}) : \
|
|
ptep_get_and_clear(mm, addr, ptep))
|
|
|
|
+#define __HAVE_ARCH_PTEP_SET_WRPROTECT
|
|
static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
|
|
{
|
|
pte_t pte = *ptep;
|
|
@@ -391,11 +450,11 @@ static inline pte_t pte_modify(pte_t pte
|
|
#define pte_index(address) \
|
|
(((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
|
|
#define pte_offset_kernel(dir, address) \
|
|
- ((pte_t *) pmd_page_kernel(*(dir)) + pte_index(address))
|
|
+ ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(address))
|
|
|
|
#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
|
|
|
|
-#define pmd_page_kernel(pmd) \
|
|
+#define pmd_page_vaddr(pmd) \
|
|
((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
|
|
|
|
/*
|
|
@@ -418,8 +477,6 @@ extern pte_t *lookup_address(unsigned lo
|
|
static inline int set_kernel_exec(unsigned long vaddr, int enable) { return 0;}
|
|
#endif
|
|
|
|
-extern void noexec_setup(const char *str);
|
|
-
|
|
#if defined(CONFIG_HIGHPTE)
|
|
#define pte_offset_map(dir, address) \
|
|
((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + \
|
|
@@ -437,37 +494,17 @@ extern void noexec_setup(const char *str
|
|
#define pte_unmap_nested(pte) do { } while (0)
|
|
#endif
|
|
|
|
-#define __HAVE_ARCH_PTEP_ESTABLISH
|
|
-#define ptep_establish(vma, address, ptep, pteval) \
|
|
- do { \
|
|
- if ( likely((vma)->vm_mm == current->mm) ) { \
|
|
- BUG_ON(HYPERVISOR_update_va_mapping(address, \
|
|
- pteval, \
|
|
- (unsigned long)(vma)->vm_mm->cpu_vm_mask.bits| \
|
|
- UVMF_INVLPG|UVMF_MULTI)); \
|
|
- } else { \
|
|
- xen_l1_entry_update(ptep, pteval); \
|
|
- flush_tlb_page(vma, address); \
|
|
- } \
|
|
- } while (0)
|
|
+/* Clear a kernel PTE and flush it from the TLB */
|
|
+#define kpte_clear_flush(ptep, vaddr) do { \
|
|
+ if (HYPERVISOR_update_va_mapping(vaddr, __pte(0), UVMF_INVLPG)) \
|
|
+ BUG(); \
|
|
+} while (0)
|
|
|
|
/*
|
|
* The i386 doesn't have any external MMU info: the kernel page
|
|
* tables contain all the necessary information.
|
|
- *
|
|
- * Also, we only update the dirty/accessed state if we set
|
|
- * the dirty bit by hand in the kernel, since the hardware
|
|
- * will do the accessed bit for us, and we don't want to
|
|
- * race with other CPU's that might be updating the dirty
|
|
- * bit at the same time.
|
|
*/
|
|
#define update_mmu_cache(vma,address,pte) do { } while (0)
|
|
-#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
|
|
-#define ptep_set_access_flags(vma, address, ptep, entry, dirty) \
|
|
- do { \
|
|
- if (dirty) \
|
|
- ptep_establish(vma, address, ptep, entry); \
|
|
- } while (0)
|
|
|
|
#include <xen/features.h>
|
|
void make_lowmem_page_readonly(void *va, unsigned int feature);
|
|
@@ -526,10 +563,11 @@ int touch_pte_range(struct mm_struct *mm
|
|
unsigned long size);
|
|
|
|
int xen_change_pte_range(struct mm_struct *mm, pmd_t *pmd,
|
|
- unsigned long addr, unsigned long end, pgprot_t newprot);
|
|
+ unsigned long addr, unsigned long end, pgprot_t newprot,
|
|
+ int dirty_accountable);
|
|
|
|
-#define arch_change_pte_range(mm, pmd, addr, end, newprot) \
|
|
- xen_change_pte_range(mm, pmd, addr, end, newprot)
|
|
+#define arch_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) \
|
|
+ xen_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable)
|
|
|
|
#define io_remap_pfn_range(vma,from,pfn,size,prot) \
|
|
direct_remap_pfn_range(vma,from,pfn,size,prot,DOMID_IO)
|
|
@@ -538,13 +576,6 @@ direct_remap_pfn_range(vma,from,pfn,size
|
|
#define GET_IOSPACE(pfn) 0
|
|
#define GET_PFN(pfn) (pfn)
|
|
|
|
-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
|
|
-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
|
|
-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
|
|
-#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
|
|
-#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
|
|
-#define __HAVE_ARCH_PTEP_SET_WRPROTECT
|
|
-#define __HAVE_ARCH_PTE_SAME
|
|
#include <asm-generic/pgtable.h>
|
|
|
|
#endif /* _I386_PGTABLE_H */
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/processor_32.h 2010-03-24 15:06:08.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/processor_32.h 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -146,6 +146,18 @@ static inline void detect_ht(struct cpui
|
|
#define X86_EFLAGS_VIP 0x00100000 /* Virtual Interrupt Pending */
|
|
#define X86_EFLAGS_ID 0x00200000 /* CPUID detection flag */
|
|
|
|
+static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
|
|
+ unsigned int *ecx, unsigned int *edx)
|
|
+{
|
|
+ /* ecx is often an input as well as an output. */
|
|
+ __asm__(XEN_CPUID
|
|
+ : "=a" (*eax),
|
|
+ "=b" (*ebx),
|
|
+ "=c" (*ecx),
|
|
+ "=d" (*edx)
|
|
+ : "0" (*eax), "2" (*ecx));
|
|
+}
|
|
+
|
|
/*
|
|
* Generic CPUID function
|
|
* clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
|
|
@@ -153,24 +165,18 @@ static inline void detect_ht(struct cpui
|
|
*/
|
|
static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx)
|
|
{
|
|
- __asm__(XEN_CPUID
|
|
- : "=a" (*eax),
|
|
- "=b" (*ebx),
|
|
- "=c" (*ecx),
|
|
- "=d" (*edx)
|
|
- : "0" (op), "c"(0));
|
|
+ *eax = op;
|
|
+ *ecx = 0;
|
|
+ __cpuid(eax, ebx, ecx, edx);
|
|
}
|
|
|
|
/* Some CPUID calls want 'count' to be placed in ecx */
|
|
static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx,
|
|
- int *edx)
|
|
+ int *edx)
|
|
{
|
|
- __asm__(XEN_CPUID
|
|
- : "=a" (*eax),
|
|
- "=b" (*ebx),
|
|
- "=c" (*ecx),
|
|
- "=d" (*edx)
|
|
- : "0" (op), "c" (count));
|
|
+ *eax = op;
|
|
+ *ecx = count;
|
|
+ __cpuid(eax, ebx, ecx, edx);
|
|
}
|
|
|
|
/*
|
|
@@ -178,42 +184,30 @@ static inline void cpuid_count(int op, i
|
|
*/
|
|
static inline unsigned int cpuid_eax(unsigned int op)
|
|
{
|
|
- unsigned int eax;
|
|
+ unsigned int eax, ebx, ecx, edx;
|
|
|
|
- __asm__(XEN_CPUID
|
|
- : "=a" (eax)
|
|
- : "0" (op)
|
|
- : "bx", "cx", "dx");
|
|
+ cpuid(op, &eax, &ebx, &ecx, &edx);
|
|
return eax;
|
|
}
|
|
static inline unsigned int cpuid_ebx(unsigned int op)
|
|
{
|
|
- unsigned int eax, ebx;
|
|
+ unsigned int eax, ebx, ecx, edx;
|
|
|
|
- __asm__(XEN_CPUID
|
|
- : "=a" (eax), "=b" (ebx)
|
|
- : "0" (op)
|
|
- : "cx", "dx" );
|
|
+ cpuid(op, &eax, &ebx, &ecx, &edx);
|
|
return ebx;
|
|
}
|
|
static inline unsigned int cpuid_ecx(unsigned int op)
|
|
{
|
|
- unsigned int eax, ecx;
|
|
+ unsigned int eax, ebx, ecx, edx;
|
|
|
|
- __asm__(XEN_CPUID
|
|
- : "=a" (eax), "=c" (ecx)
|
|
- : "0" (op)
|
|
- : "bx", "dx" );
|
|
+ cpuid(op, &eax, &ebx, &ecx, &edx);
|
|
return ecx;
|
|
}
|
|
static inline unsigned int cpuid_edx(unsigned int op)
|
|
{
|
|
- unsigned int eax, edx;
|
|
+ unsigned int eax, ebx, ecx, edx;
|
|
|
|
- __asm__(XEN_CPUID
|
|
- : "=a" (eax), "=d" (edx)
|
|
- : "0" (op)
|
|
- : "bx", "cx");
|
|
+ cpuid(op, &eax, &ebx, &ecx, &edx);
|
|
return edx;
|
|
}
|
|
|
|
@@ -315,6 +309,8 @@ static inline void __mwait(unsigned long
|
|
: :"a" (eax), "c" (ecx));
|
|
}
|
|
|
|
+extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
|
|
+
|
|
/* from system description table in BIOS. Mostly for MCA use, but
|
|
others may find it useful. */
|
|
extern unsigned int machine_id;
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/smp_32.h 2007-06-12 13:14:02.000000000 +0200
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/smp_32.h 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -79,25 +79,36 @@ static inline int hard_smp_processor_id(
|
|
return GET_APIC_ID(*(unsigned long *)(APIC_BASE+APIC_ID));
|
|
}
|
|
#endif
|
|
-
|
|
-static __inline int logical_smp_processor_id(void)
|
|
-{
|
|
- /* we don't want to mark this access volatile - bad code generation */
|
|
- return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
|
|
-}
|
|
-
|
|
#endif
|
|
|
|
+#define safe_smp_processor_id() smp_processor_id()
|
|
extern int __cpu_disable(void);
|
|
extern void __cpu_die(unsigned int cpu);
|
|
extern void prefill_possible_map(void);
|
|
+extern unsigned int num_processors;
|
|
+
|
|
#endif /* !__ASSEMBLY__ */
|
|
|
|
#else /* CONFIG_SMP */
|
|
|
|
+#define safe_smp_processor_id() 0
|
|
#define cpu_physical_id(cpu) boot_cpu_physical_apicid
|
|
|
|
#define NO_PROC_ID 0xFF /* No processor magic marker */
|
|
|
|
#endif
|
|
+
|
|
+#ifndef __ASSEMBLY__
|
|
+
|
|
+extern u8 apicid_2_node[];
|
|
+
|
|
+#ifdef CONFIG_X86_LOCAL_APIC
|
|
+static __inline int logical_smp_processor_id(void)
|
|
+{
|
|
+ /* we don't want to mark this access volatile - bad code generation */
|
|
+ return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
|
|
+}
|
|
+#endif
|
|
+#endif
|
|
+
|
|
#endif
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/system_32.h 2007-06-12 13:14:02.000000000 +0200
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/system_32.h 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -267,6 +267,9 @@ static inline unsigned long __xchg(unsig
|
|
#define cmpxchg(ptr,o,n)\
|
|
((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
|
|
(unsigned long)(n),sizeof(*(ptr))))
|
|
+#define sync_cmpxchg(ptr,o,n)\
|
|
+ ((__typeof__(*(ptr)))__sync_cmpxchg((ptr),(unsigned long)(o),\
|
|
+ (unsigned long)(n),sizeof(*(ptr))))
|
|
#endif
|
|
|
|
static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
|
|
@@ -296,6 +299,39 @@ static inline unsigned long __cmpxchg(vo
|
|
return old;
|
|
}
|
|
|
|
+/*
|
|
+ * Always use locked operations when touching memory shared with a
|
|
+ * hypervisor, since the system may be SMP even if the guest kernel
|
|
+ * isn't.
|
|
+ */
|
|
+static inline unsigned long __sync_cmpxchg(volatile void *ptr,
|
|
+ unsigned long old,
|
|
+ unsigned long new, int size)
|
|
+{
|
|
+ unsigned long prev;
|
|
+ switch (size) {
|
|
+ case 1:
|
|
+ __asm__ __volatile__("lock; cmpxchgb %b1,%2"
|
|
+ : "=a"(prev)
|
|
+ : "q"(new), "m"(*__xg(ptr)), "0"(old)
|
|
+ : "memory");
|
|
+ return prev;
|
|
+ case 2:
|
|
+ __asm__ __volatile__("lock; cmpxchgw %w1,%2"
|
|
+ : "=a"(prev)
|
|
+ : "r"(new), "m"(*__xg(ptr)), "0"(old)
|
|
+ : "memory");
|
|
+ return prev;
|
|
+ case 4:
|
|
+ __asm__ __volatile__("lock; cmpxchgl %1,%2"
|
|
+ : "=a"(prev)
|
|
+ : "r"(new), "m"(*__xg(ptr)), "0"(old)
|
|
+ : "memory");
|
|
+ return prev;
|
|
+ }
|
|
+ return old;
|
|
+}
|
|
+
|
|
#ifndef CONFIG_X86_CMPXCHG
|
|
/*
|
|
* Building a kernel capable running on 80386. It may be necessary to
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/tlbflush_32.h 2007-11-26 16:59:25.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/tlbflush_32.h 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -8,8 +8,6 @@
|
|
#define __flush_tlb_global() xen_tlb_flush()
|
|
#define __flush_tlb_all() xen_tlb_flush()
|
|
|
|
-extern unsigned long pgkern_mask;
|
|
-
|
|
#define cpu_has_invlpg (boot_cpu_data.x86 > 3)
|
|
|
|
#define __flush_tlb_single(addr) xen_invlpg(addr)
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/fixmap_64.h 2007-06-12 13:14:13.000000000 +0200
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/fixmap_64.h 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -41,7 +41,7 @@ enum fixed_addresses {
|
|
#ifdef CONFIG_X86_LOCAL_APIC
|
|
FIX_APIC_BASE, /* local (CPU) APIC) -- required for SMP or not */
|
|
#endif
|
|
-#ifdef CONFIG_X86_IO_APIC
|
|
+#ifndef CONFIG_XEN
|
|
FIX_IO_APIC_BASE_0,
|
|
FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS-1,
|
|
#endif
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/pgtable_64.h 2010-03-24 15:06:08.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/pgtable_64.h 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -44,12 +44,9 @@ extern unsigned long __supported_pte_mas
|
|
|
|
#define swapper_pg_dir init_level4_pgt
|
|
|
|
-extern int nonx_setup(char *str);
|
|
extern void paging_init(void);
|
|
extern void clear_kernel_mapping(unsigned long addr, unsigned long size);
|
|
|
|
-extern unsigned long pgkern_mask;
|
|
-
|
|
/*
|
|
* ZERO_PAGE is a global shared page that is always zero: used
|
|
* for zero-mapped memory areas etc..
|
|
@@ -119,9 +116,6 @@ static inline void pgd_clear (pgd_t * pg
|
|
set_pgd(__user_pgd(pgd), __pgd(0));
|
|
}
|
|
|
|
-#define pud_page(pud) \
|
|
- ((unsigned long) __va(pud_val(pud) & PHYSICAL_PAGE_MASK))
|
|
-
|
|
#define pte_same(a, b) ((a).pte == (b).pte)
|
|
|
|
#define pte_pgprot(a) (__pgprot((a).pte & ~PHYSICAL_PAGE_MASK))
|
|
@@ -333,7 +327,7 @@ static inline pte_t ptep_get_and_clear_f
|
|
#define __LARGE_PTE (_PAGE_PSE|_PAGE_PRESENT)
|
|
static inline int pte_user(pte_t pte) { return __pte_val(pte) & _PAGE_USER; }
|
|
static inline int pte_read(pte_t pte) { return __pte_val(pte) & _PAGE_USER; }
|
|
-static inline int pte_exec(pte_t pte) { return __pte_val(pte) & _PAGE_USER; }
|
|
+static inline int pte_exec(pte_t pte) { return !(__pte_val(pte) & _PAGE_NX); }
|
|
static inline int pte_dirty(pte_t pte) { return __pte_val(pte) & _PAGE_DIRTY; }
|
|
static inline int pte_young(pte_t pte) { return __pte_val(pte) & _PAGE_ACCESSED; }
|
|
static inline int pte_write(pte_t pte) { return __pte_val(pte) & _PAGE_RW; }
|
|
@@ -346,29 +340,12 @@ static inline pte_t pte_mkclean(pte_t pt
|
|
static inline pte_t pte_mkold(pte_t pte) { __pte_val(pte) &= ~_PAGE_ACCESSED; return pte; }
|
|
static inline pte_t pte_wrprotect(pte_t pte) { __pte_val(pte) &= ~_PAGE_RW; return pte; }
|
|
static inline pte_t pte_mkread(pte_t pte) { __pte_val(pte) |= _PAGE_USER; return pte; }
|
|
-static inline pte_t pte_mkexec(pte_t pte) { __pte_val(pte) |= _PAGE_USER; return pte; }
|
|
+static inline pte_t pte_mkexec(pte_t pte) { __pte_val(pte) &= ~_PAGE_NX; return pte; }
|
|
static inline pte_t pte_mkdirty(pte_t pte) { __pte_val(pte) |= _PAGE_DIRTY; return pte; }
|
|
static inline pte_t pte_mkyoung(pte_t pte) { __pte_val(pte) |= _PAGE_ACCESSED; return pte; }
|
|
static inline pte_t pte_mkwrite(pte_t pte) { __pte_val(pte) |= _PAGE_RW; return pte; }
|
|
static inline pte_t pte_mkhuge(pte_t pte) { __pte_val(pte) |= _PAGE_PSE; return pte; }
|
|
-
|
|
-#define ptep_test_and_clear_dirty(vma, addr, ptep) \
|
|
-({ \
|
|
- pte_t __pte = *(ptep); \
|
|
- int __ret = pte_dirty(__pte); \
|
|
- if (__ret) \
|
|
- set_pte_at((vma)->vm_mm, addr, ptep, pte_mkclean(__pte)); \
|
|
- __ret; \
|
|
-})
|
|
-
|
|
-#define ptep_test_and_clear_young(vma, addr, ptep) \
|
|
-({ \
|
|
- pte_t __pte = *(ptep); \
|
|
- int __ret = pte_young(__pte); \
|
|
- if (__ret) \
|
|
- set_pte_at((vma)->vm_mm, addr, ptep, pte_mkold(__pte)); \
|
|
- __ret; \
|
|
-})
|
|
+static inline pte_t pte_clrhuge(pte_t pte) { __pte_val(pte) &= ~_PAGE_PSE; return pte; }
|
|
|
|
static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
|
|
{
|
|
@@ -395,7 +372,8 @@ static inline int pmd_large(pmd_t pte) {
|
|
/*
|
|
* Level 4 access.
|
|
*/
|
|
-#define pgd_page(pgd) ((unsigned long) __va(pgd_val(pgd) & PTE_MASK))
|
|
+#define pgd_page_vaddr(pgd) ((unsigned long) __va(pgd_val(pgd) & PTE_MASK))
|
|
+#define pgd_page(pgd) (pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT))
|
|
#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
|
|
#define pgd_offset(mm, addr) ((mm)->pgd + pgd_index(addr))
|
|
#define pgd_offset_k(address) (init_level4_pgt + pgd_index(address))
|
|
@@ -404,16 +382,18 @@ static inline int pmd_large(pmd_t pte) {
|
|
|
|
/* PUD - Level3 access */
|
|
/* to find an entry in a page-table-directory. */
|
|
+#define pud_page_vaddr(pud) ((unsigned long) __va(pud_val(pud) & PHYSICAL_PAGE_MASK))
|
|
+#define pud_page(pud) (pfn_to_page(pud_val(pud) >> PAGE_SHIFT))
|
|
#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
|
|
-#define pud_offset(pgd, address) ((pud_t *) pgd_page(*(pgd)) + pud_index(address))
|
|
+#define pud_offset(pgd, address) ((pud_t *) pgd_page_vaddr(*(pgd)) + pud_index(address))
|
|
#define pud_present(pud) (__pud_val(pud) & _PAGE_PRESENT)
|
|
|
|
/* PMD - Level 2 access */
|
|
-#define pmd_page_kernel(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK))
|
|
+#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val(pmd) & PTE_MASK))
|
|
#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
|
|
|
|
#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
|
|
-#define pmd_offset(dir, address) ((pmd_t *) pud_page(*(dir)) + \
|
|
+#define pmd_offset(dir, address) ((pmd_t *) pud_page_vaddr(*(dir)) + \
|
|
pmd_index(address))
|
|
#define pmd_none(x) (!__pmd_val(x))
|
|
#if CONFIG_XEN_COMPAT <= 0x030002
|
|
@@ -444,6 +424,7 @@ static inline pte_t mk_pte_phys(unsigned
|
|
{
|
|
unsigned long pteval;
|
|
pteval = physpage | pgprot_val(pgprot);
|
|
+ pteval &= __supported_pte_mask;
|
|
return __pte(pteval);
|
|
}
|
|
|
|
@@ -465,7 +446,7 @@ static inline pte_t pte_modify(pte_t pte
|
|
|
|
#define pte_index(address) \
|
|
(((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
|
|
-#define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_kernel(*(dir)) + \
|
|
+#define pte_offset_kernel(dir, address) ((pte_t *) pmd_page_vaddr(*(dir)) + \
|
|
pte_index(address))
|
|
|
|
/* x86-64 always has all page tables mapped. */
|
|
@@ -506,6 +487,40 @@ static inline pte_t pte_modify(pte_t pte
|
|
ptep_establish(vma, address, ptep, entry); \
|
|
} while (0)
|
|
|
|
+
|
|
+/*
|
|
+ * i386 says: We don't actually have these, but we want to advertise
|
|
+ * them so that we can encompass the flush here.
|
|
+ */
|
|
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
|
|
+#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
|
|
+
|
|
+#define __HAVE_ARCH_PTEP_CLEAR_DIRTY_FLUSH
|
|
+#define ptep_clear_flush_dirty(vma, address, ptep) \
|
|
+({ \
|
|
+ pte_t __pte = *(ptep); \
|
|
+ int __dirty = pte_dirty(__pte); \
|
|
+ __pte = pte_mkclean(__pte); \
|
|
+ if ((vma)->vm_mm->context.pinned) \
|
|
+ ptep_set_access_flags(vma, address, ptep, __pte, __dirty); \
|
|
+ else if (__dirty) \
|
|
+ set_pte(ptep, __pte); \
|
|
+ __dirty; \
|
|
+})
|
|
+
|
|
+#define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
|
|
+#define ptep_clear_flush_young(vma, address, ptep) \
|
|
+({ \
|
|
+ pte_t __pte = *(ptep); \
|
|
+ int __young = pte_young(__pte); \
|
|
+ __pte = pte_mkold(__pte); \
|
|
+ if ((vma)->vm_mm->context.pinned) \
|
|
+ ptep_set_access_flags(vma, address, ptep, __pte, __young); \
|
|
+ else if (__young) \
|
|
+ set_pte(ptep, __pte); \
|
|
+ __young; \
|
|
+})
|
|
+
|
|
/* Encode and de-code a swap entry */
|
|
#define __swp_type(x) (((x).val >> 1) & 0x3f)
|
|
#define __swp_offset(x) ((x).val >> 8)
|
|
@@ -547,10 +562,11 @@ int touch_pte_range(struct mm_struct *mm
|
|
unsigned long size);
|
|
|
|
int xen_change_pte_range(struct mm_struct *mm, pmd_t *pmd,
|
|
- unsigned long addr, unsigned long end, pgprot_t newprot);
|
|
+ unsigned long addr, unsigned long end, pgprot_t newprot,
|
|
+ int dirty_accountable);
|
|
|
|
-#define arch_change_pte_range(mm, pmd, addr, end, newprot) \
|
|
- xen_change_pte_range(mm, pmd, addr, end, newprot)
|
|
+#define arch_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable) \
|
|
+ xen_change_pte_range(mm, pmd, addr, end, newprot, dirty_accountable)
|
|
|
|
#define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
|
|
direct_remap_pfn_range(vma,vaddr,pfn,size,prot,DOMID_IO)
|
|
@@ -572,8 +588,6 @@ int xen_change_pte_range(struct mm_struc
|
|
#define kc_offset_to_vaddr(o) \
|
|
(((o) & (1UL << (__VIRTUAL_MASK_SHIFT-1))) ? ((o) | (~__VIRTUAL_MASK)) : (o))
|
|
|
|
-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
|
|
-#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_DIRTY
|
|
#define __HAVE_ARCH_PTEP_GET_AND_CLEAR
|
|
#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
|
|
#define __HAVE_ARCH_PTEP_CLEAR_FLUSH
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/processor_64.h 2008-03-06 08:54:32.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/processor_64.h 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -484,6 +484,8 @@ static inline void __mwait(unsigned long
|
|
: :"a" (eax), "c" (ecx));
|
|
}
|
|
|
|
+extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
|
|
+
|
|
#define stack_current() \
|
|
({ \
|
|
struct thread_info *ti; \
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/smp_64.h 2007-06-12 13:14:13.000000000 +0200
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/smp_64.h 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -4,15 +4,12 @@
|
|
/*
|
|
* We need the APIC definitions automatically as part of 'smp.h'
|
|
*/
|
|
-#ifndef __ASSEMBLY__
|
|
#include <linux/threads.h>
|
|
#include <linux/cpumask.h>
|
|
#include <linux/bitops.h>
|
|
extern int disable_apic;
|
|
-#endif
|
|
|
|
#ifdef CONFIG_X86_LOCAL_APIC
|
|
-#ifndef __ASSEMBLY__
|
|
#include <asm/fixmap.h>
|
|
#include <asm/mpspec.h>
|
|
#ifdef CONFIG_X86_IO_APIC
|
|
@@ -21,10 +18,8 @@ extern int disable_apic;
|
|
#include <asm/apic.h>
|
|
#include <asm/thread_info.h>
|
|
#endif
|
|
-#endif
|
|
|
|
#ifdef CONFIG_SMP
|
|
-#ifndef ASSEMBLY
|
|
|
|
#include <asm/pda.h>
|
|
|
|
@@ -41,14 +36,11 @@ extern cpumask_t cpu_initialized;
|
|
|
|
extern void smp_alloc_memory(void);
|
|
extern volatile unsigned long smp_invalidate_needed;
|
|
-extern int pic_mode;
|
|
extern void lock_ipi_call_lock(void);
|
|
extern void unlock_ipi_call_lock(void);
|
|
extern int smp_num_siblings;
|
|
extern void smp_send_reschedule(int cpu);
|
|
void smp_stop_cpu(void);
|
|
-extern int smp_call_function_single(int cpuid, void (*func) (void *info),
|
|
- void *info, int retry, int wait);
|
|
|
|
extern cpumask_t cpu_sibling_map[NR_CPUS];
|
|
extern cpumask_t cpu_core_map[NR_CPUS];
|
|
@@ -77,20 +69,16 @@ static inline int hard_smp_processor_id(
|
|
}
|
|
#endif
|
|
|
|
-extern int safe_smp_processor_id(void);
|
|
extern int __cpu_disable(void);
|
|
extern void __cpu_die(unsigned int cpu);
|
|
extern void prefill_possible_map(void);
|
|
extern unsigned num_processors;
|
|
extern unsigned disabled_cpus;
|
|
|
|
-#endif /* !ASSEMBLY */
|
|
-
|
|
#define NO_PROC_ID 0xFF /* No processor magic marker */
|
|
|
|
#endif
|
|
|
|
-#ifndef ASSEMBLY
|
|
/*
|
|
* Some lowlevel functions might want to know about
|
|
* the real APIC ID <-> CPU # mapping.
|
|
@@ -114,11 +102,8 @@ static inline int cpu_present_to_apicid(
|
|
}
|
|
#endif
|
|
|
|
-#endif /* !ASSEMBLY */
|
|
-
|
|
#ifndef CONFIG_SMP
|
|
#define stack_smp_processor_id() 0
|
|
-#define safe_smp_processor_id() 0
|
|
#define cpu_logical_map(x) (x)
|
|
#else
|
|
#include <asm/thread_info.h>
|
|
@@ -130,7 +115,6 @@ static inline int cpu_present_to_apicid(
|
|
})
|
|
#endif
|
|
|
|
-#ifndef __ASSEMBLY__
|
|
#ifdef CONFIG_X86_LOCAL_APIC
|
|
static __inline int logical_smp_processor_id(void)
|
|
{
|
|
@@ -138,13 +122,18 @@ static __inline int logical_smp_processo
|
|
return GET_APIC_LOGICAL_ID(*(unsigned long *)(APIC_BASE+APIC_LDR));
|
|
}
|
|
#endif
|
|
-#endif
|
|
|
|
#ifdef CONFIG_SMP
|
|
#define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu]
|
|
#else
|
|
#define cpu_physical_id(cpu) boot_cpu_id
|
|
-#endif
|
|
-
|
|
+static inline int smp_call_function_single(int cpuid, void (*func) (void *info),
|
|
+ void *info, int retry, int wait)
|
|
+{
|
|
+ /* Disable interrupts here? */
|
|
+ func(info);
|
|
+ return 0;
|
|
+}
|
|
+#endif /* !CONFIG_SMP */
|
|
#endif
|
|
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/system_64.h 2007-11-26 16:59:25.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/system_64.h 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -24,6 +24,7 @@
|
|
#define __EXTRA_CLOBBER \
|
|
,"rcx","rbx","rdx","r8","r9","r10","r11","r12","r13","r14","r15"
|
|
|
|
+/* Save restore flags to clear handle leaking NT */
|
|
#define switch_to(prev,next,last) \
|
|
asm volatile(SAVE_CONTEXT \
|
|
"movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \
|
|
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/tlbflush_64.h 2007-11-26 16:59:25.000000000 +0100
|
|
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/tlbflush_64.h 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -12,9 +12,6 @@
|
|
*/
|
|
#define __flush_tlb_global() xen_tlb_flush()
|
|
|
|
-
|
|
-extern unsigned long pgkern_mask;
|
|
-
|
|
#define __flush_tlb_all() __flush_tlb_global()
|
|
|
|
#define __flush_tlb_one(addr) xen_invlpg((unsigned long)addr)
|
|
--- head-2010-05-25.orig/include/linux/skbuff.h 2010-04-15 09:43:55.000000000 +0200
|
|
+++ head-2010-05-25/include/linux/skbuff.h 2010-04-15 09:52:44.000000000 +0200
|
|
@@ -2109,5 +2109,12 @@ static inline void skb_forward_csum(stru
|
|
}
|
|
|
|
bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off);
|
|
+
|
|
+#ifdef CONFIG_XEN
|
|
+int skb_checksum_setup(struct sk_buff *skb);
|
|
+#else
|
|
+static inline int skb_checksum_setup(struct sk_buff *skb) { return 0; }
|
|
+#endif
|
|
+
|
|
#endif /* __KERNEL__ */
|
|
#endif /* _LINUX_SKBUFF_H */
|
|
--- head-2010-05-25.orig/include/xen/evtchn.h 2010-01-19 16:01:04.000000000 +0100
|
|
+++ head-2010-05-25/include/xen/evtchn.h 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -57,34 +57,34 @@
|
|
*/
|
|
int bind_caller_port_to_irqhandler(
|
|
unsigned int caller_port,
|
|
- irqreturn_t (*handler)(int, void *, struct pt_regs *),
|
|
+ irq_handler_t handler,
|
|
unsigned long irqflags,
|
|
const char *devname,
|
|
void *dev_id);
|
|
int bind_listening_port_to_irqhandler(
|
|
unsigned int remote_domain,
|
|
- irqreturn_t (*handler)(int, void *, struct pt_regs *),
|
|
+ irq_handler_t handler,
|
|
unsigned long irqflags,
|
|
const char *devname,
|
|
void *dev_id);
|
|
int bind_interdomain_evtchn_to_irqhandler(
|
|
unsigned int remote_domain,
|
|
unsigned int remote_port,
|
|
- irqreturn_t (*handler)(int, void *, struct pt_regs *),
|
|
+ irq_handler_t handler,
|
|
unsigned long irqflags,
|
|
const char *devname,
|
|
void *dev_id);
|
|
int bind_virq_to_irqhandler(
|
|
unsigned int virq,
|
|
unsigned int cpu,
|
|
- irqreturn_t (*handler)(int, void *, struct pt_regs *),
|
|
+ irq_handler_t handler,
|
|
unsigned long irqflags,
|
|
const char *devname,
|
|
void *dev_id);
|
|
int bind_ipi_to_irqhandler(
|
|
unsigned int ipi,
|
|
unsigned int cpu,
|
|
- irqreturn_t (*handler)(int, void *, struct pt_regs *),
|
|
+ irq_handler_t handler,
|
|
unsigned long irqflags,
|
|
const char *devname,
|
|
void *dev_id);
|
|
--- head-2010-05-25.orig/include/xen/xencons.h 2007-10-15 09:39:38.000000000 +0200
|
|
+++ head-2010-05-25/include/xen/xencons.h 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -8,7 +8,7 @@ void xencons_force_flush(void);
|
|
void xencons_resume(void);
|
|
|
|
/* Interrupt work hooks. Receive data, or kick data out. */
|
|
-void xencons_rx(char *buf, unsigned len, struct pt_regs *regs);
|
|
+void xencons_rx(char *buf, unsigned len);
|
|
void xencons_tx(void);
|
|
|
|
int xencons_ring_init(void);
|
|
--- head-2010-05-25.orig/mm/mprotect.c 2010-04-15 09:44:14.000000000 +0200
|
|
+++ head-2010-05-25/mm/mprotect.c 2010-04-15 09:52:51.000000000 +0200
|
|
@@ -90,7 +90,7 @@ static inline void change_pmd_range(stru
|
|
next = pmd_addr_end(addr, end);
|
|
if (pmd_none_or_clear_bad(pmd))
|
|
continue;
|
|
- if (arch_change_pte_range(mm, pmd, addr, next, newprot))
|
|
+ if (arch_change_pte_range(mm, pmd, addr, next, newprot, dirty_accountable))
|
|
continue;
|
|
change_pte_range(mm, pmd, addr, next, newprot, dirty_accountable);
|
|
} while (pmd++, addr = next, addr != end);
|
|
--- head-2010-05-25.orig/mm/page_alloc.c 2010-03-24 14:59:37.000000000 +0100
|
|
+++ head-2010-05-25/mm/page_alloc.c 2010-03-24 15:06:12.000000000 +0100
|
|
@@ -4684,6 +4684,23 @@ static void __setup_per_zone_wmarks(void
|
|
spin_unlock_irqrestore(&zone->lock, flags);
|
|
}
|
|
|
|
+#ifdef CONFIG_XEN
|
|
+ for_each_zone(zone) {
|
|
+ unsigned int cpu;
|
|
+
|
|
+ if (!populated_zone(zone))
|
|
+ continue;
|
|
+ for_each_online_cpu(cpu) {
|
|
+ unsigned long high;
|
|
+
|
|
+ high = percpu_pagelist_fraction
|
|
+ ? zone->present_pages / percpu_pagelist_fraction
|
|
+ : 5 * zone_batchsize(zone);
|
|
+ setup_pagelist_highmark(zone_pcp(zone, cpu), high);
|
|
+ }
|
|
+ }
|
|
+#endif
|
|
+
|
|
/* update totalreserve_pages */
|
|
calculate_totalreserve_pages();
|
|
}
|
|
--- head-2010-05-25.orig/net/core/dev.c 2010-05-25 09:19:25.000000000 +0200
|
|
+++ head-2010-05-25/net/core/dev.c 2010-05-25 09:21:41.000000000 +0200
|
|
@@ -2036,17 +2036,15 @@ inline int skb_checksum_setup(struct sk_
|
|
}
|
|
if ((skb->h.raw + skb->csum + 2) > skb->tail)
|
|
goto out;
|
|
- skb->ip_summed = CHECKSUM_HW;
|
|
+ skb->ip_summed = CHECKSUM_PARTIAL;
|
|
skb->proto_csum_blank = 0;
|
|
}
|
|
return 0;
|
|
out:
|
|
return -EPROTO;
|
|
}
|
|
-#else
|
|
-inline int skb_checksum_setup(struct sk_buff *skb) { return 0; }
|
|
-#endif
|
|
EXPORT_SYMBOL(skb_checksum_setup);
|
|
+#endif
|
|
|
|
static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
|
|
struct net_device *dev,
|
|
@@ -2628,7 +2626,7 @@ int netif_receive_skb(struct sk_buff *sk
|
|
case CHECKSUM_UNNECESSARY:
|
|
skb->proto_data_valid = 1;
|
|
break;
|
|
- case CHECKSUM_HW:
|
|
+ case CHECKSUM_PARTIAL:
|
|
/* XXX Implement me. */
|
|
default:
|
|
skb->proto_data_valid = 0;
|