You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
qubes-linux-kernel/patches.xen/xen3-patch-2.6.33

5034 lines
144 KiB

From: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>
Subject: Linux: 2.6.33
Patch-mainline: 2.6.33
This patch contains the differences between 2.6.33 and 2.6.33.
Acked-by: Jeff Mahoney <jeffm@suse.com>
Automatically created from "patches.kernel.org/patch-2.6.33" by xen-port-patches.py
--- head-2011-03-17.orig/arch/ia64/include/asm/xen/hypervisor.h 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/arch/ia64/include/asm/xen/hypervisor.h 2011-02-01 14:55:46.000000000 +0100
@@ -34,11 +34,11 @@
#define _ASM_IA64_XEN_HYPERVISOR_H
#include <linux/err.h>
+#include <xen/xen.h>
#ifdef CONFIG_PARAVIRT_XEN
#include <xen/interface/xen.h>
#include <xen/interface/version.h> /* to compile feature.c */
#include <xen/features.h> /* to comiple xen-netfront.c */
-#include <xen/xen.h>
#include <asm/xen/hypercall.h>
extern struct shared_info *HYPERVISOR_shared_info;
--- head-2011-03-17.orig/arch/x86/Kconfig 2011-02-01 14:50:44.000000000 +0100
+++ head-2011-03-17/arch/x86/Kconfig 2011-02-01 14:55:46.000000000 +0100
@@ -21,7 +21,7 @@ config X86
select HAVE_UNSTABLE_SCHED_CLOCK
select HAVE_IDE
select HAVE_OPROFILE
- select HAVE_PERF_EVENTS if !XEN
+ select HAVE_PERF_EVENTS
select HAVE_IRQ_WORK
select HAVE_IOREMAP_PROT
select HAVE_KPROBES
@@ -52,7 +52,7 @@ config X86
select HAVE_KERNEL_BZIP2 if !XEN
select HAVE_KERNEL_LZMA if !XEN
select HAVE_KERNEL_XZ
- select HAVE_KERNEL_LZO
+ select HAVE_KERNEL_LZO if !XEN
select HAVE_HW_BREAKPOINT
select HAVE_MIXED_BREAKPOINTS_REGS
select PERF_EVENTS
--- head-2011-03-17.orig/arch/x86/ia32/ia32entry-xen.S 2011-02-01 14:54:13.000000000 +0100
+++ head-2011-03-17/arch/x86/ia32/ia32entry-xen.S 2011-02-01 14:55:46.000000000 +0100
@@ -534,7 +534,7 @@ ia32_sys_call_table:
.quad compat_sys_writev
.quad sys_getsid
.quad sys_fdatasync
- .quad sys32_sysctl /* sysctl */
+ .quad compat_sys_sysctl /* sysctl */
.quad sys_mlock /* 150 */
.quad sys_munlock
.quad sys_mlockall
@@ -577,7 +577,7 @@ ia32_sys_call_table:
.quad quiet_ni_syscall /* streams2 */
.quad stub32_vfork /* 190 */
.quad compat_sys_getrlimit
- .quad sys32_mmap2
+ .quad sys_mmap_pgoff
.quad sys32_truncate64
.quad sys32_ftruncate64
.quad sys32_stat64 /* 195 */
@@ -722,4 +722,5 @@ ia32_sys_call_table:
.quad compat_sys_pwritev
.quad compat_sys_rt_tgsigqueueinfo /* 335 */
.quad sys_perf_event_open
+ .quad compat_sys_recvmmsg
ia32_syscall_end:
--- head-2011-03-17.orig/arch/x86/include/asm/hw_irq.h 2011-02-01 14:50:44.000000000 +0100
+++ head-2011-03-17/arch/x86/include/asm/hw_irq.h 2011-02-01 14:55:46.000000000 +0100
@@ -85,6 +85,7 @@ struct irq_2_iommu {
u8 irte_mask;
};
+#ifndef CONFIG_XEN
/*
* This is performance-critical, we want to do it O(1)
*
@@ -100,6 +101,9 @@ struct irq_cfg {
struct irq_2_iommu irq_2_iommu;
#endif
};
+#else
+struct irq_cfg;
+#endif
extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *);
extern void send_cleanup_vector(struct irq_cfg *);
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/perf_event.h 2011-02-01 14:55:46.000000000 +0100
@@ -0,0 +1,17 @@
+#ifndef _ASM_X86_PERF_EVENT_H
+#define _ASM_X86_PERF_EVENT_H
+
+#ifdef CONFIG_PERF_EVENTS
+
+/*
+ * Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups.
+ * This flag is otherwise unused and ABI specified to be 0, so nobody should
+ * care what we do with it.
+ */
+#define PERF_EFLAGS_EXACT (1UL << 3)
+
+#endif
+
+static inline void init_hw_perf_events(void) {}
+
+#endif /* _ASM_X86_PERF_EVENT_H */
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/pgtable.h 2011-02-01 14:54:13.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/pgtable.h 2011-02-01 14:55:46.000000000 +0100
@@ -16,6 +16,8 @@
#ifndef __ASSEMBLY__
+#include <asm/x86_init.h>
+
/*
* ZERO_PAGE is a global shared page that is always zero: used
* for zero-mapped memory areas etc..
@@ -267,9 +269,9 @@ static inline int is_new_memtype_allowed
unsigned long new_flags)
{
/*
- * PAT type is always WB for ISA. So no need to check.
+ * PAT type is always WB for untracked ranges, so no need to check.
*/
- if (is_ISA_range(paddr, paddr + size - 1))
+ if (x86_platform.is_untracked_pat_range(paddr, paddr + size))
return 1;
/*
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/processor.h 2011-03-03 16:46:37.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/processor.h 2011-03-03 16:46:54.000000000 +0100
@@ -31,6 +31,7 @@ struct mm_struct;
#include <linux/init.h>
#include <xen/interface/physdev.h>
+#define HBP_NUM 4
/*
* Default implementation of macro that returns current
* instruction pointer ("program counter").
@@ -191,7 +192,7 @@ static inline void xen_cpuid(unsigned in
unsigned int *ecx, unsigned int *edx)
{
/* ecx is often an input as well as an output. */
- asm(XEN_CPUID
+ asm volatile(XEN_CPUID
: "=a" (*eax),
"=b" (*ebx),
"=c" (*ecx),
@@ -440,6 +441,8 @@ extern unsigned int xstate_size;
extern void free_thread_xstate(struct task_struct *);
extern struct kmem_cache *task_xstate_cachep;
+struct perf_event;
+
struct thread_struct {
/* Cached TLS descriptors: */
struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
@@ -460,13 +463,12 @@ struct thread_struct {
unsigned long fs;
#endif
unsigned long gs;
- /* Hardware debugging registers: */
- unsigned long debugreg0;
- unsigned long debugreg1;
- unsigned long debugreg2;
- unsigned long debugreg3;
- unsigned long debugreg6;
- unsigned long debugreg7;
+ /* Save middle states of ptrace breakpoints */
+ struct perf_event *ptrace_bps[HBP_NUM];
+ /* Debug status used for traps, single steps, etc... */
+ unsigned long debugreg6;
+ /* Keep track of the exact dr7 value set by the user */
+ unsigned long ptrace_dr7;
/* Fault info: */
unsigned long cr2;
unsigned long trap_no;
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/spinlock.h 2011-02-01 14:50:44.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/spinlock.h 2011-02-01 14:55:46.000000000 +0100
@@ -44,10 +44,10 @@
int xen_spinlock_init(unsigned int cpu);
void xen_spinlock_cleanup(unsigned int cpu);
-bool xen_spin_wait(raw_spinlock_t *, unsigned int *token,
+bool xen_spin_wait(arch_spinlock_t *, unsigned int *token,
unsigned int flags);
-unsigned int xen_spin_adjust(const raw_spinlock_t *, unsigned int token);
-void xen_spin_kick(raw_spinlock_t *, unsigned int token);
+unsigned int xen_spin_adjust(const arch_spinlock_t *, unsigned int token);
+void xen_spin_kick(arch_spinlock_t *, unsigned int token);
/*
* Ticket locks are conceptually two parts, one indicating the current head of
@@ -97,7 +97,7 @@ void xen_spin_kick(raw_spinlock_t *, uns
: \
: "memory", "cc")
-static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock)
+static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
{
int tmp, new;
@@ -160,7 +160,7 @@ static __always_inline int __ticket_spin
: "memory", "cc"); \
} while (0)
-static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock)
+static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
{
int tmp;
int new;
@@ -183,21 +183,21 @@ static __always_inline int __ticket_spin
}
#endif
-static inline int __ticket_spin_is_locked(raw_spinlock_t *lock)
+static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
{
int tmp = ACCESS_ONCE(lock->slock);
return !!(((tmp >> TICKET_SHIFT) ^ tmp) & ((1 << TICKET_SHIFT) - 1));
}
-static inline int __ticket_spin_is_contended(raw_spinlock_t *lock)
+static inline int __ticket_spin_is_contended(arch_spinlock_t *lock)
{
int tmp = ACCESS_ONCE(lock->slock);
return (((tmp >> TICKET_SHIFT) - tmp) & ((1 << TICKET_SHIFT) - 1)) > 1;
}
-static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock)
+static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock)
{
unsigned int token, count;
unsigned int flags = __raw_local_irq_save();
@@ -216,7 +216,7 @@ static __always_inline void __ticket_spi
} while (unlikely(!count) && !xen_spin_wait(lock, &token, flags));
}
-static __always_inline void __ticket_spin_lock_flags(raw_spinlock_t *lock,
+static __always_inline void __ticket_spin_lock_flags(arch_spinlock_t *lock,
unsigned long flags)
{
unsigned int token, count;
@@ -232,7 +232,7 @@ static __always_inline void __ticket_spi
} while (unlikely(!count) && !xen_spin_wait(lock, &token, flags));
}
-static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock)
+static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
{
unsigned int token;
bool kick;
@@ -248,24 +248,24 @@ static __always_inline void __ticket_spi
#undef __ticket_spin_unlock_body
#endif
-#define __raw_spin(n) __ticket_spin_##n
+#define __arch_spin(n) __ticket_spin_##n
#else /* TICKET_SHIFT */
static inline int xen_spinlock_init(unsigned int cpu) { return 0; }
static inline void xen_spinlock_cleanup(unsigned int cpu) {}
-static inline int __byte_spin_is_locked(raw_spinlock_t *lock)
+static inline int __byte_spin_is_locked(arch_spinlock_t *lock)
{
return lock->lock != 0;
}
-static inline int __byte_spin_is_contended(raw_spinlock_t *lock)
+static inline int __byte_spin_is_contended(arch_spinlock_t *lock)
{
return lock->spinners != 0;
}
-static inline void __byte_spin_lock(raw_spinlock_t *lock)
+static inline void __byte_spin_lock(arch_spinlock_t *lock)
{
s8 val = 1;
@@ -284,7 +284,7 @@ static inline void __byte_spin_lock(raw_
#define __byte_spin_lock_flags(lock, flags) __byte_spin_lock(lock)
-static inline int __byte_spin_trylock(raw_spinlock_t *lock)
+static inline int __byte_spin_trylock(arch_spinlock_t *lock)
{
u8 old = 1;
@@ -294,53 +294,53 @@ static inline int __byte_spin_trylock(ra
return old == 0;
}
-static inline void __byte_spin_unlock(raw_spinlock_t *lock)
+static inline void __byte_spin_unlock(arch_spinlock_t *lock)
{
smp_wmb();
lock->lock = 0;
}
-#define __raw_spin(n) __byte_spin_##n
+#define __arch_spin(n) __byte_spin_##n
#endif /* TICKET_SHIFT */
-static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
+static inline int arch_spin_is_locked(arch_spinlock_t *lock)
{
- return __raw_spin(is_locked)(lock);
+ return __arch_spin(is_locked)(lock);
}
-static inline int __raw_spin_is_contended(raw_spinlock_t *lock)
+static inline int arch_spin_is_contended(arch_spinlock_t *lock)
{
- return __raw_spin(is_contended)(lock);
+ return __arch_spin(is_contended)(lock);
}
-#define __raw_spin_is_contended __raw_spin_is_contended
+#define arch_spin_is_contended arch_spin_is_contended
-static __always_inline void __raw_spin_lock(raw_spinlock_t *lock)
+static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
{
- __raw_spin(lock)(lock);
+ __arch_spin(lock)(lock);
}
-static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock)
+static __always_inline int arch_spin_trylock(arch_spinlock_t *lock)
{
- return __raw_spin(trylock)(lock);
+ return __arch_spin(trylock)(lock);
}
-static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock)
+static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
{
- __raw_spin(unlock)(lock);
+ __arch_spin(unlock)(lock);
}
-static __always_inline void __raw_spin_lock_flags(raw_spinlock_t *lock,
+static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock,
unsigned long flags)
{
- __raw_spin(lock_flags)(lock, flags);
+ __arch_spin(lock_flags)(lock, flags);
}
-#undef __raw_spin
+#undef __arch_spin
-static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
{
- while (__raw_spin_is_locked(lock))
+ while (arch_spin_is_locked(lock))
cpu_relax();
}
@@ -362,7 +362,7 @@ static inline void __raw_spin_unlock_wai
* read_can_lock - would read_trylock() succeed?
* @lock: the rwlock in question.
*/
-static inline int __raw_read_can_lock(raw_rwlock_t *lock)
+static inline int arch_read_can_lock(arch_rwlock_t *lock)
{
return (int)(lock)->lock > 0;
}
@@ -371,12 +371,12 @@ static inline int __raw_read_can_lock(ra
* write_can_lock - would write_trylock() succeed?
* @lock: the rwlock in question.
*/
-static inline int __raw_write_can_lock(raw_rwlock_t *lock)
+static inline int arch_write_can_lock(arch_rwlock_t *lock)
{
return (lock)->lock == RW_LOCK_BIAS;
}
-static inline void __raw_read_lock(raw_rwlock_t *rw)
+static inline void arch_read_lock(arch_rwlock_t *rw)
{
asm volatile(LOCK_PREFIX " subl $1,(%0)\n\t"
"jns 1f\n"
@@ -385,7 +385,7 @@ static inline void __raw_read_lock(raw_r
::LOCK_PTR_REG (rw) : "memory");
}
-static inline void __raw_write_lock(raw_rwlock_t *rw)
+static inline void arch_write_lock(arch_rwlock_t *rw)
{
asm volatile(LOCK_PREFIX " subl %1,(%0)\n\t"
"jz 1f\n"
@@ -394,7 +394,7 @@ static inline void __raw_write_lock(raw_
::LOCK_PTR_REG (rw), "i" (RW_LOCK_BIAS) : "memory");
}
-static inline int __raw_read_trylock(raw_rwlock_t *lock)
+static inline int arch_read_trylock(arch_rwlock_t *lock)
{
atomic_t *count = (atomic_t *)lock;
@@ -404,7 +404,7 @@ static inline int __raw_read_trylock(raw
return 0;
}
-static inline int __raw_write_trylock(raw_rwlock_t *lock)
+static inline int arch_write_trylock(arch_rwlock_t *lock)
{
atomic_t *count = (atomic_t *)lock;
@@ -414,23 +414,23 @@ static inline int __raw_write_trylock(ra
return 0;
}
-static inline void __raw_read_unlock(raw_rwlock_t *rw)
+static inline void arch_read_unlock(arch_rwlock_t *rw)
{
asm volatile(LOCK_PREFIX "incl %0" :"+m" (rw->lock) : : "memory");
}
-static inline void __raw_write_unlock(raw_rwlock_t *rw)
+static inline void arch_write_unlock(arch_rwlock_t *rw)
{
asm volatile(LOCK_PREFIX "addl %1, %0"
: "+m" (rw->lock) : "i" (RW_LOCK_BIAS) : "memory");
}
-#define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock)
-#define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock)
+#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
+#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
-#define _raw_spin_relax(lock) cpu_relax()
-#define _raw_read_relax(lock) cpu_relax()
-#define _raw_write_relax(lock) cpu_relax()
+#define arch_spin_relax(lock) cpu_relax()
+#define arch_read_relax(lock) cpu_relax()
+#define arch_write_relax(lock) cpu_relax()
/* The {read|write|spin}_lock() on x86 are full memory barriers. */
static inline void smp_mb__after_lock(void) { }
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/spinlock_types.h 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/spinlock_types.h 2011-02-01 14:55:46.000000000 +0100
@@ -42,14 +42,14 @@ typedef union {
#endif
#endif
};
-} raw_spinlock_t;
+} arch_spinlock_t;
-#define __RAW_SPIN_LOCK_UNLOCKED { 0 }
+#define __ARCH_SPIN_LOCK_UNLOCKED { 0 }
typedef struct {
unsigned int lock;
-} raw_rwlock_t;
+} arch_rwlock_t;
-#define __RAW_RW_LOCK_UNLOCKED { RW_LOCK_BIAS }
+#define __ARCH_RW_LOCK_UNLOCKED { RW_LOCK_BIAS }
#endif /* _ASM_X86_SPINLOCK_TYPES_H */
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/swiotlb.h 2011-01-31 18:07:35.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/swiotlb.h 2011-02-01 14:55:46.000000000 +0100
@@ -1,4 +1,6 @@
#include_next <asm/swiotlb.h>
+#define pci_swiotlb_detect() 1
+
dma_addr_t swiotlb_map_single_phys(struct device *, phys_addr_t, size_t size,
int dir);
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/system.h 2011-03-03 16:07:45.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/system.h 2011-03-03 16:09:31.000000000 +0100
@@ -12,9 +12,9 @@
#include <linux/irqflags.h>
/* entries in ARCH_DLINFO: */
-#ifdef CONFIG_IA32_EMULATION
+#if defined(CONFIG_IA32_EMULATION) || !defined(CONFIG_X86_64)
# define AT_VECTOR_SIZE_ARCH 2
-#else
+#else /* else it's non-compat x86-64 */
# define AT_VECTOR_SIZE_ARCH 1
#endif
@@ -22,6 +22,7 @@ struct task_struct; /* one of the strang
struct task_struct *__switch_to(struct task_struct *prev,
struct task_struct *next);
void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p);
+extern void show_regs_common(void);
#ifdef CONFIG_X86_32
@@ -129,8 +130,6 @@ do { \
"movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \
"movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \
"call __switch_to\n\t" \
- ".globl thread_return\n" \
- "thread_return:\n\t" \
"movq "__percpu_arg([current_task])",%%rsi\n\t" \
__switch_canary \
"movq %P[thread_info](%%rsi),%%r8\n\t" \
@@ -158,19 +157,22 @@ extern void xen_load_gs_index(unsigned);
* Load a segment. Fall back on loading the zero
* segment if something goes wrong..
*/
-#define loadsegment(seg, value) \
- asm volatile("\n" \
- "1:\t" \
- "movl %k0,%%" #seg "\n" \
- "2:\n" \
- ".section .fixup,\"ax\"\n" \
- "3:\t" \
- "movl %k1, %%" #seg "\n\t" \
- "jmp 2b\n" \
- ".previous\n" \
- _ASM_EXTABLE(1b,3b) \
- : :"r" (value), "r" (0) : "memory")
-
+#define loadsegment(seg, value) \
+do { \
+ unsigned short __val = (value); \
+ \
+ asm volatile(" \n" \
+ "1: movl %k0,%%" #seg " \n" \
+ \
+ ".section .fixup,\"ax\" \n" \
+ "2: xorl %k0,%k0 \n" \
+ " jmp 1b \n" \
+ ".previous \n" \
+ \
+ _ASM_EXTABLE(1b, 2b) \
+ \
+ : "+r" (__val) : : "memory"); \
+} while (0)
/*
* Save a segment register away
--- head-2011-03-17.orig/arch/x86/kernel/acpi/sleep-xen.c 2011-02-01 14:50:44.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/acpi/sleep-xen.c 2011-02-01 14:55:46.000000000 +0100
@@ -81,12 +81,9 @@ int acpi_save_state_mem(void)
#ifndef CONFIG_64BIT
store_gdt((struct desc_ptr *)&header->pmode_gdt);
- header->pmode_efer_low = nx_enabled;
- if (header->pmode_efer_low & 1) {
- /* This is strange, why not save efer, always? */
- rdmsr(MSR_EFER, header->pmode_efer_low,
- header->pmode_efer_high);
- }
+ if (rdmsr_safe(MSR_EFER, &header->pmode_efer_low,
+ &header->pmode_efer_high))
+ header->pmode_efer_low = header->pmode_efer_high = 0;
#endif /* !CONFIG_64BIT */
header->pmode_cr0 = read_cr0();
@@ -123,30 +120,33 @@ void acpi_restore_state_mem(void)
/**
- * acpi_reserve_bootmem - do _very_ early ACPI initialisation
+ * acpi_reserve_wakeup_memory - do _very_ early ACPI initialisation
*
* We allocate a page from the first 1MB of memory for the wakeup
* routine for when we come back from a sleep state. The
* runtime allocator allows specification of <16MB pages, but not
* <1MB pages.
*/
-void __init acpi_reserve_bootmem(void)
+void __init acpi_reserve_wakeup_memory(void)
{
#ifndef CONFIG_ACPI_PV_SLEEP
+ unsigned long mem;
+
if ((&wakeup_code_end - &wakeup_code_start) > WAKEUP_SIZE) {
printk(KERN_ERR
"ACPI: Wakeup code way too big, S3 disabled.\n");
return;
}
- acpi_realmode = (unsigned long)alloc_bootmem_low(WAKEUP_SIZE);
+ mem = find_e820_area(0, 1<<20, WAKEUP_SIZE, PAGE_SIZE);
- if (!acpi_realmode) {
+ if (mem == -1L) {
printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n");
return;
}
-
- acpi_wakeup_address = virt_to_phys((void *)acpi_realmode);
+ acpi_realmode = (unsigned long) phys_to_virt(mem);
+ acpi_wakeup_address = mem;
+ reserve_early(mem, mem + WAKEUP_SIZE, "ACPI WAKEUP");
#endif
}
@@ -169,6 +169,8 @@ static int __init acpi_sleep_setup(char
#endif
if (strncmp(str, "old_ordering", 12) == 0)
acpi_old_suspend_ordering();
+ if (strncmp(str, "sci_force_enable", 16) == 0)
+ acpi_set_sci_en_on_resume();
str = strchr(str, ',');
if (str != NULL)
str += strspn(str, ", \t");
--- head-2011-03-17.orig/arch/x86/kernel/apic/Makefile 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/apic/Makefile 2011-02-01 14:55:46.000000000 +0100
@@ -24,4 +24,4 @@ obj-$(CONFIG_XEN) += nmi.o
probe_64-$(CONFIG_XEN) := probe_32.o
-disabled-obj-$(CONFIG_XEN) := apic_flat_$(BITS).o
+disabled-obj-$(CONFIG_XEN) := apic_flat_$(BITS).o apic_noop.o
--- head-2011-03-17.orig/arch/x86/kernel/apic/io_apic-xen.c 2011-02-01 14:54:13.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/apic/io_apic-xen.c 2011-02-01 14:55:46.000000000 +0100
@@ -150,20 +150,6 @@ static struct irq_pin_list *get_one_free
return pin;
}
-/*
- * This is performance-critical, we want to do it O(1)
- *
- * Most irqs are mapped 1:1 with pins.
- */
-struct irq_cfg {
- struct irq_pin_list *irq_2_pin;
- cpumask_var_t domain;
- cpumask_var_t old_domain;
- unsigned move_cleanup_count;
- u8 vector;
- u8 move_in_progress : 1;
-};
-
/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
#ifdef CONFIG_SPARSE_IRQ
static struct irq_cfg irq_cfgx[] = {
@@ -219,7 +205,7 @@ int __init arch_early_irq_init(void)
}
#ifdef CONFIG_SPARSE_IRQ
-static struct irq_cfg *irq_cfg(unsigned int irq)
+struct irq_cfg *irq_cfg(unsigned int irq)
{
struct irq_cfg *cfg = NULL;
struct irq_desc *desc;
@@ -371,7 +357,7 @@ void arch_free_chip_data(struct irq_desc
/* end for move_irq_desc */
#else
-static struct irq_cfg *irq_cfg(unsigned int irq)
+struct irq_cfg *irq_cfg(unsigned int irq)
{
return irq < nr_irqs ? irq_cfgx + irq : NULL;
}
@@ -594,23 +580,41 @@ static void __init replace_pin_at_irq_no
add_pin_to_irq_node(cfg, node, newapic, newpin);
}
+static void __io_apic_modify_irq(struct irq_pin_list *entry,
+ int mask_and, int mask_or,
+ void (*final)(struct irq_pin_list *entry))
+{
+ unsigned int reg, pin;
+
+ pin = entry->pin;
+ reg = io_apic_read(entry->apic, 0x10 + pin * 2);
+ reg &= mask_and;
+ reg |= mask_or;
+ io_apic_modify(entry->apic, 0x10 + pin * 2, reg);
+ if (final)
+ final(entry);
+}
+
static void io_apic_modify_irq(struct irq_cfg *cfg,
int mask_and, int mask_or,
void (*final)(struct irq_pin_list *entry))
{
- int pin;
struct irq_pin_list *entry;
- for_each_irq_pin(entry, cfg->irq_2_pin) {
- unsigned int reg;
- pin = entry->pin;
- reg = io_apic_read(entry->apic, 0x10 + pin * 2);
- reg &= mask_and;
- reg |= mask_or;
- io_apic_modify(entry->apic, 0x10 + pin * 2, reg);
- if (final)
- final(entry);
- }
+ for_each_irq_pin(entry, cfg->irq_2_pin)
+ __io_apic_modify_irq(entry, mask_and, mask_or, final);
+}
+
+static void __mask_and_edge_IO_APIC_irq(struct irq_pin_list *entry)
+{
+ __io_apic_modify_irq(entry, ~IO_APIC_REDIR_LEVEL_TRIGGER,
+ IO_APIC_REDIR_MASKED, NULL);
+}
+
+static void __unmask_and_level_IO_APIC_irq(struct irq_pin_list *entry)
+{
+ __io_apic_modify_irq(entry, ~IO_APIC_REDIR_MASKED,
+ IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
}
static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
@@ -634,18 +638,6 @@ static void __mask_IO_APIC_irq(struct ir
io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
}
-static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg)
-{
- io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER,
- IO_APIC_REDIR_MASKED, NULL);
-}
-
-static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg)
-{
- io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED,
- IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
-}
-
static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
{
struct irq_cfg *cfg = desc->chip_data;
@@ -1225,7 +1217,7 @@ __assign_irq_vector(int irq, struct irq_
int cpu, err;
cpumask_var_t tmp_mask;
- if ((cfg->move_in_progress) || cfg->move_cleanup_count)
+ if (cfg->move_in_progress)
return -EBUSY;
if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
@@ -1289,8 +1281,7 @@ next:
return err;
}
-static int
-assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
+int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
{
int err;
unsigned long flags;
@@ -1668,9 +1659,6 @@ __apicdebuginit(void) print_IO_APIC(void
struct irq_desc *desc;
unsigned int irq;
- if (apic_verbosity == APIC_QUIET)
- return;
-
printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
for (i = 0; i < nr_ioapics; i++)
printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
@@ -1777,9 +1765,6 @@ __apicdebuginit(void) print_APIC_field(i
{
int i;
- if (apic_verbosity == APIC_QUIET)
- return;
-
printk(KERN_DEBUG);
for (i = 0; i < 8; i++)
@@ -1793,9 +1778,6 @@ __apicdebuginit(void) print_local_APIC(v
unsigned int i, v, ver, maxlvt;
u64 icr;
- if (apic_verbosity == APIC_QUIET)
- return;
-
printk(KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
smp_processor_id(), hard_smp_processor_id());
v = apic_read(APIC_ID);
@@ -1893,13 +1875,19 @@ __apicdebuginit(void) print_local_APIC(v
printk("\n");
}
-__apicdebuginit(void) print_all_local_APICs(void)
+__apicdebuginit(void) print_local_APICs(int maxcpu)
{
int cpu;
+ if (!maxcpu)
+ return;
+
preempt_disable();
- for_each_online_cpu(cpu)
+ for_each_online_cpu(cpu) {
+ if (cpu >= maxcpu)
+ break;
smp_call_function_single(cpu, print_local_APIC, NULL, 1);
+ }
preempt_enable();
}
@@ -1908,7 +1896,7 @@ __apicdebuginit(void) print_PIC(void)
unsigned int v;
unsigned long flags;
- if (apic_verbosity == APIC_QUIET || !nr_legacy_irqs)
+ if (!nr_legacy_irqs)
return;
printk(KERN_DEBUG "\nprinting PIC contents\n");
@@ -1935,21 +1923,41 @@ __apicdebuginit(void) print_PIC(void)
printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
}
-__apicdebuginit(int) print_all_ICs(void)
+static int __initdata show_lapic = 1;
+static __init int setup_show_lapic(char *arg)
+{
+ int num = -1;
+
+ if (strcmp(arg, "all") == 0) {
+ show_lapic = CONFIG_NR_CPUS;
+ } else {
+ get_option(&arg, &num);
+ if (num >= 0)
+ show_lapic = num;
+ }
+
+ return 1;
+}
+__setup("show_lapic=", setup_show_lapic);
+
+__apicdebuginit(int) print_ICs(void)
{
+ if (apic_verbosity == APIC_QUIET)
+ return 0;
+
print_PIC();
/* don't print out if apic is not there */
if (!cpu_has_apic && !apic_from_smp_config())
return 0;
- print_all_local_APICs();
+ print_local_APICs(show_lapic);
print_IO_APIC();
return 0;
}
-fs_initcall(print_all_ICs);
+fs_initcall(print_ICs);
/* Where if anywhere is the i8259 connect in external int mode */
@@ -2106,7 +2114,7 @@ void __init setup_ioapic_ids_from_mpc(vo
* This is broken; anything with a real cpu count has to
* circumvent this idiocy regardless.
*/
- phys_id_present_map = apic->ioapic_phys_id_map(phys_cpu_present_map);
+ apic->ioapic_phys_id_map(&phys_cpu_present_map, &phys_id_present_map);
/*
* Set the IOAPIC ID to the value stored in the MPC table.
@@ -2133,7 +2141,7 @@ void __init setup_ioapic_ids_from_mpc(vo
* system must have a unique ID or we get lots of nice
* 'stuck on smp_invalidate_needed IPI wait' messages.
*/
- if (apic->check_apicid_used(phys_id_present_map,
+ if (apic->check_apicid_used(&phys_id_present_map,
mp_ioapics[apic_id].apicid)) {
printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
apic_id, mp_ioapics[apic_id].apicid);
@@ -2148,7 +2156,7 @@ void __init setup_ioapic_ids_from_mpc(vo
mp_ioapics[apic_id].apicid = i;
} else {
physid_mask_t tmp;
- tmp = apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid);
+ apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid, &tmp);
apic_printk(APIC_VERBOSE, "Setting %d in the "
"phys_id_present_map\n",
mp_ioapics[apic_id].apicid);
@@ -2303,20 +2311,16 @@ static int ioapic_retrigger_irq(unsigned
*/
#ifdef CONFIG_SMP
-static void send_cleanup_vector(struct irq_cfg *cfg)
+void send_cleanup_vector(struct irq_cfg *cfg)
{
cpumask_var_t cleanup_mask;
if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
unsigned int i;
- cfg->move_cleanup_count = 0;
- for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
- cfg->move_cleanup_count++;
for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
} else {
cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
- cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
free_cpumask_var(cleanup_mask);
}
@@ -2347,31 +2351,30 @@ static void __target_IO_APIC_irq(unsigne
}
}
-static int
-assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
-
/*
* Either sets desc->affinity to a valid value, and returns
- * ->cpu_mask_to_apicid of that, or returns BAD_APICID and
+ * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and
* leaves desc->affinity untouched.
*/
-static unsigned int
-set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
+unsigned int
+set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask,
+ unsigned int *dest_id)
{
struct irq_cfg *cfg;
unsigned int irq;
if (!cpumask_intersects(mask, cpu_online_mask))
- return BAD_APICID;
+ return -1;
irq = desc->irq;
cfg = desc->chip_data;
if (assign_irq_vector(irq, cfg, mask))
- return BAD_APICID;
+ return -1;
cpumask_copy(desc->affinity, mask);
- return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
+ *dest_id = apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
+ return 0;
}
static int
@@ -2387,12 +2390,11 @@ set_ioapic_affinity_irq_desc(struct irq_
cfg = desc->chip_data;
spin_lock_irqsave(&ioapic_lock, flags);
- dest = set_desc_affinity(desc, mask);
- if (dest != BAD_APICID) {
+ ret = set_desc_affinity(desc, mask, &dest);
+ if (!ret) {
/* Only the high 8 bits are valid. */
dest = SET_APIC_LOGICAL_ID(dest);
__target_IO_APIC_irq(irq, dest, cfg);
- ret = 0;
}
spin_unlock_irqrestore(&ioapic_lock, flags);
@@ -2507,8 +2509,13 @@ asmlinkage void smp_irq_move_cleanup_int
continue;
cfg = irq_cfg(irq);
- spin_lock(&desc->lock);
- if (!cfg->move_cleanup_count)
+ raw_spin_lock(&desc->lock);
+
+ /*
+ * Check if the irq migration is in progress. If so, we
+ * haven't received the cleanup request yet for this irq.
+ */
+ if (cfg->move_in_progress)
goto unlock;
if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
@@ -2527,29 +2534,40 @@ asmlinkage void smp_irq_move_cleanup_int
goto unlock;
}
__get_cpu_var(vector_irq)[vector] = -1;
- cfg->move_cleanup_count--;
unlock:
- spin_unlock(&desc->lock);
+ raw_spin_unlock(&desc->lock);
}
irq_exit();
}
-static void irq_complete_move(struct irq_desc **descp)
+static void __irq_complete_move(struct irq_desc **descp, unsigned vector)
{
struct irq_desc *desc = *descp;
struct irq_cfg *cfg = desc->chip_data;
- unsigned vector, me;
+ unsigned me;
if (likely(!cfg->move_in_progress))
return;
- vector = ~get_irq_regs()->orig_ax;
me = smp_processor_id();
if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
send_cleanup_vector(cfg);
}
+
+static void irq_complete_move(struct irq_desc **descp)
+{
+ __irq_complete_move(descp, ~get_irq_regs()->orig_ax);
+}
+
+void irq_force_complete_move(int irq)
+{
+ struct irq_desc *desc = irq_to_desc(irq);
+ struct irq_cfg *cfg = desc->chip_data;
+
+ __irq_complete_move(&desc, cfg->vector);
+}
#else
static inline void irq_complete_move(struct irq_desc **descp) {}
#endif
@@ -2565,6 +2583,59 @@ static void ack_apic_edge(unsigned int i
atomic_t irq_mis_count;
+/*
+ * IO-APIC versions below 0x20 don't support EOI register.
+ * For the record, here is the information about various versions:
+ * 0Xh 82489DX
+ * 1Xh I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant
+ * 2Xh I/O(x)APIC which is PCI 2.2 Compliant
+ * 30h-FFh Reserved
+ *
+ * Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic
+ * version as 0x2. This is an error with documentation and these ICH chips
+ * use io-apic's of version 0x20.
+ *
+ * For IO-APIC's with EOI register, we use that to do an explicit EOI.
+ * Otherwise, we simulate the EOI message manually by changing the trigger
+ * mode to edge and then back to level, with RTE being masked during this.
+*/
+static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
+{
+ struct irq_pin_list *entry;
+
+ for_each_irq_pin(entry, cfg->irq_2_pin) {
+ if (mp_ioapics[entry->apic].apicver >= 0x20) {
+ /*
+ * Intr-remapping uses pin number as the virtual vector
+ * in the RTE. Actual vector is programmed in
+ * intr-remapping table entry. Hence for the io-apic
+ * EOI we use the pin number.
+ */
+ if (irq_remapped(irq))
+ io_apic_eoi(entry->apic, entry->pin);
+ else
+ io_apic_eoi(entry->apic, cfg->vector);
+ } else {
+ __mask_and_edge_IO_APIC_irq(entry);
+ __unmask_and_level_IO_APIC_irq(entry);
+ }
+ }
+}
+
+static void eoi_ioapic_irq(struct irq_desc *desc)
+{
+ struct irq_cfg *cfg;
+ unsigned long flags;
+ unsigned int irq;
+
+ irq = desc->irq;
+ cfg = desc->chip_data;
+
+ spin_lock_irqsave(&ioapic_lock, flags);
+ __eoi_ioapic_irq(irq, cfg);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+}
+
static void ack_apic_level(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
@@ -2600,6 +2671,19 @@ static void ack_apic_level(unsigned int
* level-triggered interrupt. We mask the source for the time of the
* operation to prevent an edge-triggered interrupt escaping meanwhile.
* The idea is from Manfred Spraul. --macro
+ *
+ * Also in the case when cpu goes offline, fixup_irqs() will forward
+ * any unhandled interrupt on the offlined cpu to the new cpu
+ * destination that is handling the corresponding interrupt. This
+ * interrupt forwarding is done via IPI's. Hence, in this case also
+ * level-triggered io-apic interrupt will be seen as an edge
+ * interrupt in the IRR. And we can't rely on the cpu's EOI
+ * to be broadcasted to the IO-APIC's which will clear the remoteIRR
+ * corresponding to the level-triggered interrupt. Hence on IO-APIC's
+ * supporting EOI register, we do an explicit EOI to clear the
+ * remote IRR and on IO-APIC's which don't have an EOI register,
+ * we use the above logic (mask+edge followed by unmask+level) from
+ * Manfred Spraul to clear the remote IRR.
*/
cfg = desc->chip_data;
i = cfg->vector;
@@ -2611,6 +2695,19 @@ static void ack_apic_level(unsigned int
*/
ack_APIC_irq();
+ /*
+ * Tail end of clearing remote IRR bit (either by delivering the EOI
+ * message via io-apic EOI register write or simulating it using
+ * mask+edge followed by unnask+level logic) manually when the
+ * level triggered interrupt is seen as the edge triggered interrupt
+ * at the cpu.
+ */
+ if (!(v & (1 << (i & 0x1f)))) {
+ atomic_inc(&irq_mis_count);
+
+ eoi_ioapic_irq(desc);
+ }
+
/* Now we can move and renable the irq */
if (unlikely(do_unmask_irq)) {
/* Only migrate the irq if the ack has been received.
@@ -2644,41 +2741,9 @@ static void ack_apic_level(unsigned int
move_masked_irq(irq);
unmask_IO_APIC_irq_desc(desc);
}
-
- /* Tail end of version 0x11 I/O APIC bug workaround */
- if (!(v & (1 << (i & 0x1f)))) {
- atomic_inc(&irq_mis_count);
- spin_lock(&ioapic_lock);
- __mask_and_edge_IO_APIC_irq(cfg);
- __unmask_and_level_IO_APIC_irq(cfg);
- spin_unlock(&ioapic_lock);
- }
}
#ifdef CONFIG_INTR_REMAP
-static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
-{
- struct irq_pin_list *entry;
-
- for_each_irq_pin(entry, cfg->irq_2_pin)
- io_apic_eoi(entry->apic, entry->pin);
-}
-
-static void
-eoi_ioapic_irq(struct irq_desc *desc)
-{
- struct irq_cfg *cfg;
- unsigned long flags;
- unsigned int irq;
-
- irq = desc->irq;
- cfg = desc->chip_data;
-
- spin_lock_irqsave(&ioapic_lock, flags);
- __eoi_ioapic_irq(irq, cfg);
- spin_unlock_irqrestore(&ioapic_lock, flags);
-}
-
static void ir_ack_apic_edge(unsigned int irq)
{
ack_APIC_irq();
@@ -3256,6 +3321,7 @@ unsigned int create_irq_nr(unsigned int
continue;
desc_new = move_irq_desc(desc_new, node);
+ cfg_new = desc_new->chip_data;
if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0)
irq = new;
@@ -3311,7 +3377,8 @@ void destroy_irq(unsigned int irq)
* MSI message composition
*/
#if defined(CONFIG_PCI_MSI) && !defined(CONFIG_XEN)
-static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
+static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
+ struct msi_msg *msg, u8 hpet_id)
{
struct irq_cfg *cfg;
int err;
@@ -3345,7 +3412,10 @@ static int msi_compose_msg(struct pci_de
irte.dest_id = IRTE_DEST(dest);
/* Set source-id of interrupt request */
- set_msi_sid(&irte, pdev);
+ if (pdev)
+ set_msi_sid(&irte, pdev);
+ else
+ set_hpet_sid(&irte, hpet_id);
modify_irte(irq, &irte);
@@ -3391,8 +3461,7 @@ static int set_msi_irq_affinity(unsigned
struct msi_msg msg;
unsigned int dest;
- dest = set_desc_affinity(desc, mask);
- if (dest == BAD_APICID)
+ if (set_desc_affinity(desc, mask, &dest))
return -1;
cfg = desc->chip_data;
@@ -3424,8 +3493,7 @@ ir_set_msi_irq_affinity(unsigned int irq
if (get_irte(irq, &irte))
return -1;
- dest = set_desc_affinity(desc, mask);
- if (dest == BAD_APICID)
+ if (set_desc_affinity(desc, mask, &dest))
return -1;
irte.vector = cfg->vector;
@@ -3510,7 +3578,7 @@ static int setup_msi_irq(struct pci_dev
int ret;
struct msi_msg msg;
- ret = msi_compose_msg(dev, irq, &msg);
+ ret = msi_compose_msg(dev, irq, &msg, -1);
if (ret < 0)
return ret;
@@ -3607,8 +3675,7 @@ static int dmar_msi_set_affinity(unsigne
struct msi_msg msg;
unsigned int dest;
- dest = set_desc_affinity(desc, mask);
- if (dest == BAD_APICID)
+ if (set_desc_affinity(desc, mask, &dest))
return -1;
cfg = desc->chip_data;
@@ -3643,7 +3710,7 @@ int arch_setup_dmar_msi(unsigned int irq
int ret;
struct msi_msg msg;
- ret = msi_compose_msg(NULL, irq, &msg);
+ ret = msi_compose_msg(NULL, irq, &msg, -1);
if (ret < 0)
return ret;
dmar_msi_write(irq, &msg);
@@ -3663,8 +3730,7 @@ static int hpet_msi_set_affinity(unsigne
struct msi_msg msg;
unsigned int dest;
- dest = set_desc_affinity(desc, mask);
- if (dest == BAD_APICID)
+ if (set_desc_affinity(desc, mask, &dest))
return -1;
cfg = desc->chip_data;
@@ -3683,6 +3749,19 @@ static int hpet_msi_set_affinity(unsigne
#endif /* CONFIG_SMP */
+static struct irq_chip ir_hpet_msi_type = {
+ .name = "IR-HPET_MSI",
+ .unmask = hpet_msi_unmask,
+ .mask = hpet_msi_mask,
+#ifdef CONFIG_INTR_REMAP
+ .ack = ir_ack_apic_edge,
+#ifdef CONFIG_SMP
+ .set_affinity = ir_set_msi_irq_affinity,
+#endif
+#endif
+ .retrigger = ioapic_retrigger_irq,
+};
+
static struct irq_chip hpet_msi_type = {
.name = "HPET_MSI",
.unmask = hpet_msi_unmask,
@@ -3694,20 +3773,36 @@ static struct irq_chip hpet_msi_type = {
.retrigger = ioapic_retrigger_irq,
};
-int arch_setup_hpet_msi(unsigned int irq)
+int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
{
int ret;
struct msi_msg msg;
struct irq_desc *desc = irq_to_desc(irq);
- ret = msi_compose_msg(NULL, irq, &msg);
+ if (intr_remapping_enabled) {
+ struct intel_iommu *iommu = map_hpet_to_ir(id);
+ int index;
+
+ if (!iommu)
+ return -1;
+
+ index = alloc_irte(iommu, irq, 1);
+ if (index < 0)
+ return -1;
+ }
+
+ ret = msi_compose_msg(NULL, irq, &msg, id);
if (ret < 0)
return ret;
hpet_msi_write(irq, &msg);
desc->status |= IRQ_MOVE_PCNTXT;
- set_irq_chip_and_handler_name(irq, &hpet_msi_type, handle_edge_irq,
- "edge");
+ if (irq_remapped(irq))
+ set_irq_chip_and_handler_name(irq, &ir_hpet_msi_type,
+ handle_edge_irq, "edge");
+ else
+ set_irq_chip_and_handler_name(irq, &hpet_msi_type,
+ handle_edge_irq, "edge");
return 0;
}
@@ -3741,8 +3836,7 @@ static int set_ht_irq_affinity(unsigned
struct irq_cfg *cfg;
unsigned int dest;
- dest = set_desc_affinity(desc, mask);
- if (dest == BAD_APICID)
+ if (set_desc_affinity(desc, mask, &dest))
return -1;
cfg = desc->chip_data;
@@ -3808,75 +3902,6 @@ int arch_setup_ht_irq(unsigned int irq,
}
#endif /* CONFIG_HT_IRQ */
-#ifdef CONFIG_X86_UV
-/*
- * Re-target the irq to the specified CPU and enable the specified MMR located
- * on the specified blade to allow the sending of MSIs to the specified CPU.
- */
-int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
- unsigned long mmr_offset)
-{
- const struct cpumask *eligible_cpu = cpumask_of(cpu);
- struct irq_cfg *cfg;
- int mmr_pnode;
- unsigned long mmr_value;
- struct uv_IO_APIC_route_entry *entry;
- unsigned long flags;
- int err;
-
- BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
-
- cfg = irq_cfg(irq);
-
- err = assign_irq_vector(irq, cfg, eligible_cpu);
- if (err != 0)
- return err;
-
- spin_lock_irqsave(&vector_lock, flags);
- set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
- irq_name);
- spin_unlock_irqrestore(&vector_lock, flags);
-
- mmr_value = 0;
- entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
- entry->vector = cfg->vector;
- entry->delivery_mode = apic->irq_delivery_mode;
- entry->dest_mode = apic->irq_dest_mode;
- entry->polarity = 0;
- entry->trigger = 0;
- entry->mask = 0;
- entry->dest = apic->cpu_mask_to_apicid(eligible_cpu);
-
- mmr_pnode = uv_blade_to_pnode(mmr_blade);
- uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
-
- if (cfg->move_in_progress)
- send_cleanup_vector(cfg);
-
- return irq;
-}
-
-/*
- * Disable the specified MMR located on the specified blade so that MSIs are
- * longer allowed to be sent.
- */
-void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset)
-{
- unsigned long mmr_value;
- struct uv_IO_APIC_route_entry *entry;
- int mmr_pnode;
-
- BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
-
- mmr_value = 0;
- entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
- entry->mask = 1;
-
- mmr_pnode = uv_blade_to_pnode(mmr_blade);
- uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
-}
-#endif /* CONFIG_X86_64 */
-
int __init io_apic_get_redir_entries (int ioapic)
{
union IO_APIC_reg_01 reg_01;
@@ -4055,7 +4080,7 @@ int __init io_apic_get_unique_id(int ioa
*/
if (physids_empty(apic_id_map))
- apic_id_map = apic->ioapic_phys_id_map(phys_cpu_present_map);
+ apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map);
spin_lock_irqsave(&ioapic_lock, flags);
reg_00.raw = io_apic_read(ioapic, 0);
@@ -4071,10 +4096,10 @@ int __init io_apic_get_unique_id(int ioa
* Every APIC in a system must have a unique ID or we get lots of nice
* 'stuck on smp_invalidate_needed IPI wait' messages.
*/
- if (apic->check_apicid_used(apic_id_map, apic_id)) {
+ if (apic->check_apicid_used(&apic_id_map, apic_id)) {
for (i = 0; i < get_physical_broadcast(); i++) {
- if (!apic->check_apicid_used(apic_id_map, i))
+ if (!apic->check_apicid_used(&apic_id_map, i))
break;
}
@@ -4087,7 +4112,7 @@ int __init io_apic_get_unique_id(int ioa
apic_id = i;
}
- tmp = apic->apicid_to_cpu_present(apic_id);
+ apic->apicid_to_cpu_present(apic_id, &tmp);
physids_or(apic_id_map, apic_id_map, tmp);
if (reg_00.bits.ID != apic_id) {
@@ -4218,7 +4243,7 @@ static struct resource * __init ioapic_s
for (i = 0; i < nr_ioapics; i++) {
res[i].name = mem;
res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
- sprintf(mem, "IOAPIC %u", i);
+ snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i);
mem += IOAPIC_RESOURCE_NAME_SIZE;
}
@@ -4252,18 +4277,17 @@ void __init ioapic_init_mappings(void)
#ifdef CONFIG_X86_32
fake_ioapic_page:
#endif
- ioapic_phys = (unsigned long)
- alloc_bootmem_pages(PAGE_SIZE);
+ ioapic_phys = (unsigned long)alloc_bootmem_pages(PAGE_SIZE);
ioapic_phys = __pa(ioapic_phys);
}
set_fixmap_nocache(idx, ioapic_phys);
- apic_printk(APIC_VERBOSE,
- "mapped IOAPIC to %08lx (%08lx)\n",
- __fix_to_virt(idx), ioapic_phys);
+ apic_printk(APIC_VERBOSE, "mapped IOAPIC to %08lx (%08lx)\n",
+ __fix_to_virt(idx) + (ioapic_phys & ~PAGE_MASK),
+ ioapic_phys);
idx++;
ioapic_res->start = ioapic_phys;
- ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
+ ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1;
ioapic_res++;
}
}
--- head-2011-03-17.orig/arch/x86/kernel/cpu/Makefile 2011-02-01 14:54:13.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/cpu/Makefile 2011-02-01 14:55:46.000000000 +0100
@@ -34,7 +34,8 @@ obj-$(CONFIG_CPU_FREQ) += cpufreq/
obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o
-disabled-obj-$(CONFIG_XEN) := hypervisor.o perfctr-watchdog.o sched.o vmware.o
+disabled-obj-$(CONFIG_XEN) := hypervisor.o perfctr-watchdog.o perf_event.o \
+ sched.o vmware.o
quiet_cmd_mkcapflags = MKCAP $@
cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@
--- head-2011-03-17.orig/arch/x86/kernel/cpu/common-xen.c 2011-03-17 14:42:24.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/cpu/common-xen.c 2011-03-17 14:42:34.000000000 +0100
@@ -69,7 +69,7 @@ void __init setup_cpu_local_masks(void)
static void __cpuinit default_init(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_X86_64
- display_cacheinfo(c);
+ cpu_detect_cache_sizes(c);
#else
/* Not much we can do here... */
/* Check if at least it has cpuid */
@@ -414,7 +414,7 @@ static void __cpuinit get_model_name(str
}
}
-void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
+void __cpuinit cpu_detect_cache_sizes(struct cpuinfo_x86 *c)
{
unsigned int n, dummy, ebx, ecx, edx, l2size;
@@ -422,8 +422,6 @@ void __cpuinit display_cacheinfo(struct
if (n >= 0x80000005) {
cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
- printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n",
- edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
c->x86_cache_size = (ecx>>24) + (edx>>24);
#ifdef CONFIG_X86_64
/* On K8 L1 TLB is inclusive, so don't count it */
@@ -453,9 +451,6 @@ void __cpuinit display_cacheinfo(struct
#endif
c->x86_cache_size = l2size;
-
- printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
- l2size, ecx & 0xFF);
}
void __cpuinit detect_ht(struct cpuinfo_x86 *c)
@@ -463,6 +458,7 @@ void __cpuinit detect_ht(struct cpuinfo_
#ifdef CONFIG_X86_HT
u32 eax, ebx, ecx, edx;
int index_msb, core_bits;
+ static bool printed;
if (!cpu_has(c, X86_FEATURE_HT))
return;
@@ -478,7 +474,7 @@ void __cpuinit detect_ht(struct cpuinfo_
smp_num_siblings = (ebx & 0xff0000) >> 16;
if (smp_num_siblings == 1) {
- printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
+ printk_once(KERN_INFO "CPU0: Hyper-Threading is disabled\n");
goto out;
}
@@ -505,11 +501,12 @@ void __cpuinit detect_ht(struct cpuinfo_
((1 << core_bits) - 1);
out:
- if ((c->x86_max_cores * smp_num_siblings) > 1) {
+ if (!printed && (c->x86_max_cores * smp_num_siblings) > 1) {
printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
c->phys_proc_id);
printk(KERN_INFO "CPU: Processor Core ID: %d\n",
c->cpu_core_id);
+ printed = 1;
}
#endif
}
@@ -690,24 +687,31 @@ void __init early_cpu_init(void)
const struct cpu_dev *const *cdev;
int count = 0;
+#ifdef CONFIG_PROCESSOR_SELECT
printk(KERN_INFO "KERNEL supported cpus:\n");
+#endif
+
for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) {
const struct cpu_dev *cpudev = *cdev;
- unsigned int j;
if (count >= X86_VENDOR_NUM)
break;
cpu_devs[count] = cpudev;
count++;
- for (j = 0; j < 2; j++) {
- if (!cpudev->c_ident[j])
- continue;
- printk(KERN_INFO " %s %s\n", cpudev->c_vendor,
- cpudev->c_ident[j]);
+#ifdef CONFIG_PROCESSOR_SELECT
+ {
+ unsigned int j;
+
+ for (j = 0; j < 2; j++) {
+ if (!cpudev->c_ident[j])
+ continue;
+ printk(KERN_INFO " %s %s\n", cpudev->c_vendor,
+ cpudev->c_ident[j]);
+ }
}
+#endif
}
-
early_identify_cpu(&boot_cpu_data);
}
@@ -874,10 +878,8 @@ static void __cpuinit identify_cpu(struc
boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
}
-#ifdef CONFIG_X86_MCE
/* Init Machine Check Exception if available. */
- mcheck_init(c);
-#endif
+ mcheck_cpu_init(c);
select_idle_routine(c);
@@ -909,6 +911,10 @@ void __init identify_boot_cpu(void)
init_hw_perf_events();
}
+#ifdef CONFIG_XEN
+void set_perf_event_pending(void) {}
+#endif
+
void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
{
BUG_ON(c == &boot_cpu_data);
@@ -1156,7 +1162,7 @@ static void clear_all_debug_regs(void)
void __cpuinit cpu_init(void)
{
#ifndef CONFIG_X86_NO_TSS
- struct orig_ist *orig_ist;
+ struct orig_ist *oist;
struct tss_struct *t;
unsigned long v;
int i;
@@ -1170,7 +1176,7 @@ void __cpuinit cpu_init(void)
xen_switch_pt();
#ifndef CONFIG_X86_NO_TSS
t = &per_cpu(init_tss, cpu);
- orig_ist = &per_cpu(orig_ist, cpu);
+ oist = &per_cpu(orig_ist, cpu);
#endif
#ifdef CONFIG_NUMA
@@ -1184,7 +1190,7 @@ void __cpuinit cpu_init(void)
if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask))
panic("CPU#%d already initialized!\n", cpu);
- printk(KERN_INFO "Initializing CPU#%d\n", cpu);
+ pr_debug("Initializing CPU#%d\n", cpu);
clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
@@ -1207,7 +1213,7 @@ void __cpuinit cpu_init(void)
wrmsrl(MSR_KERNEL_GS_BASE, 0);
barrier();
- check_efer();
+ x86_configure_nx();
#ifdef CONFIG_X86_LOCAL_APIC
if (cpu != 0)
enable_x2apic();
@@ -1217,12 +1223,12 @@ void __cpuinit cpu_init(void)
/*
* set up and load the per-CPU TSS
*/
- if (!orig_ist->ist[0]) {
+ if (!oist->ist[0]) {
char *estacks = per_cpu(exception_stacks, cpu);
for (v = 0; v < N_EXCEPTION_STACKS; v++) {
estacks += exception_stack_sizes[v];
- orig_ist->ist[v] = t->x86_tss.ist[v] =
+ oist->ist[v] = t->x86_tss.ist[v] =
(unsigned long)estacks;
}
}
--- head-2011-03-17.orig/arch/x86/kernel/e820-xen.c 2011-02-01 14:54:13.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/e820-xen.c 2011-02-01 14:55:46.000000000 +0100
@@ -771,7 +771,7 @@ core_initcall(e820_mark_nvs_memory);
/*
* Early reserved memory areas.
*/
-#define MAX_EARLY_RES 20
+#define MAX_EARLY_RES 32
struct early_res {
u64 start, end;
@@ -780,7 +780,15 @@ struct early_res {
};
static struct early_res early_res[MAX_EARLY_RES] __initdata = {
#ifndef CONFIG_XEN
- { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */
+ { 0, PAGE_SIZE, "BIOS data page", 1 }, /* BIOS data page */
+#if defined(CONFIG_X86_32) && defined(CONFIG_X86_TRAMPOLINE)
+ /*
+ * But first pinch a few for the stack/trampoline stuff
+ * FIXME: Don't need the extra page at 4K, but need to fix
+ * trampoline before removing it. (see the GDT stuff)
+ */
+ { PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE", 1 },
+#endif
#endif
{}
};
--- head-2011-03-17.orig/arch/x86/kernel/entry_32-xen.S 2011-02-01 14:50:44.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/entry_32-xen.S 2011-02-01 14:55:46.000000000 +0100
@@ -338,6 +338,10 @@ ENTRY(ret_from_fork)
END(ret_from_fork)
/*
+ * Interrupt exit functions should be protected against kprobes
+ */
+ .pushsection .kprobes.text, "ax"
+/*
* Return to user mode is not as complex as all this looks,
* but we want the default path for a system call return to
* go as quickly as possible which is why some of this is
@@ -387,6 +391,10 @@ need_resched:
END(resume_kernel)
#endif
CFI_ENDPROC
+/*
+ * End of kprobes section
+ */
+ .popsection
/* SYSENTER_RETURN points to after the "sysenter" instruction in
the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */
@@ -539,10 +547,14 @@ ENTRY(ia32pv_sysenter_target)
.align 4
.long 1b,syscall_fault
.previous
- /* fall through */
+ jmp system_call
CFI_ENDPROC
ENDPROC(ia32pv_sysenter_target)
+/*
+ * syscall stub including irq exit should be protected against kprobes
+ */
+ .pushsection .kprobes.text, "ax"
# system call handler stub
ENTRY(system_call)
RING0_INT_FRAME # can't unwind into user space anyway
@@ -774,26 +786,69 @@ syscall_badsys:
jmp resume_userspace
END(syscall_badsys)
CFI_ENDPROC
+/*
+ * End of kprobes section
+ */
+ .popsection
/*
* System calls that need a pt_regs pointer.
*/
-#define PTREGSCALL(name) \
+#define PTREGSCALL0(name) \
ALIGN; \
ptregs_##name: \
leal 4(%esp),%eax; \
jmp sys_##name;
-PTREGSCALL(iopl)
-PTREGSCALL(fork)
-PTREGSCALL(clone)
-PTREGSCALL(vfork)
-PTREGSCALL(execve)
-PTREGSCALL(sigaltstack)
-PTREGSCALL(sigreturn)
-PTREGSCALL(rt_sigreturn)
-PTREGSCALL(vm86)
-PTREGSCALL(vm86old)
+#define PTREGSCALL1(name) \
+ ALIGN; \
+ptregs_##name: \
+ leal 4(%esp),%edx; \
+ movl (PT_EBX+4)(%esp),%eax; \
+ jmp sys_##name;
+
+#define PTREGSCALL2(name) \
+ ALIGN; \
+ptregs_##name: \
+ leal 4(%esp),%ecx; \
+ movl (PT_ECX+4)(%esp),%edx; \
+ movl (PT_EBX+4)(%esp),%eax; \
+ jmp sys_##name;
+
+#define PTREGSCALL3(name) \
+ ALIGN; \
+ptregs_##name: \
+ leal 4(%esp),%eax; \
+ pushl %eax; \
+ movl PT_EDX(%eax),%ecx; \
+ movl PT_ECX(%eax),%edx; \
+ movl PT_EBX(%eax),%eax; \
+ call sys_##name; \
+ addl $4,%esp; \
+ ret
+
+PTREGSCALL1(iopl)
+PTREGSCALL0(fork)
+PTREGSCALL0(vfork)
+PTREGSCALL3(execve)
+PTREGSCALL2(sigaltstack)
+PTREGSCALL0(sigreturn)
+PTREGSCALL0(rt_sigreturn)
+PTREGSCALL2(vm86)
+PTREGSCALL1(vm86old)
+
+/* Clone is an oddball. The 4th arg is in %edi */
+ ALIGN;
+ptregs_clone:
+ leal 4(%esp),%eax
+ pushl %eax
+ pushl PT_EDI(%eax)
+ movl PT_EDX(%eax),%ecx
+ movl PT_ECX(%eax),%edx
+ movl PT_EBX(%eax),%eax
+ call sys_clone
+ addl $8,%esp
+ ret
#ifndef CONFIG_XEN
.macro FIXUP_ESPFIX_STACK
@@ -884,6 +939,10 @@ common_interrupt:
ENDPROC(common_interrupt)
CFI_ENDPROC
+/*
+ * Irq entries should be protected against kprobes
+ */
+ .pushsection .kprobes.text, "ax"
#define BUILD_INTERRUPT3(name, nr, fn) \
ENTRY(name) \
RING0_INT_FRAME; \
@@ -905,6 +964,8 @@ ENDPROC(name)
#else
#define UNWIND_ESPFIX_STACK
+ .pushsection .kprobes.text, "ax"
+
# A note on the "critical region" in our callback handler.
# We want to avoid stacking callback handlers due to events occurring
# during handling of the last event. To do this, we keep events disabled
@@ -1205,16 +1266,16 @@ ENTRY(fixup_4gb_segment)
jmp error_code
CFI_ENDPROC
END(spurious_interrupt_bug)
+/*
+ * End of kprobes section
+ */
+ .popsection
ENTRY(kernel_thread_helper)
pushl $0 # fake return address for unwinder
CFI_STARTPROC
- movl %edx,%eax
- push %edx
- CFI_ADJUST_CFA_OFFSET 4
- call *%ebx
- push %eax
- CFI_ADJUST_CFA_OFFSET 4
+ movl %edi,%eax
+ call *%esi
call do_exit
ud2 # padding for call trace
CFI_ENDPROC
@@ -1315,17 +1376,14 @@ END(ftrace_graph_caller)
.globl return_to_handler
return_to_handler:
- pushl $0
pushl %eax
- pushl %ecx
pushl %edx
movl %ebp, %eax
call ftrace_return_to_handler
- movl %eax, 0xc(%esp)
+ movl %eax, %ecx
popl %edx
- popl %ecx
popl %eax
- ret
+ jmp *%ecx
#endif
#include <asm/alternative-asm.h>
--- head-2011-03-17.orig/arch/x86/kernel/entry_64-xen.S 2011-02-01 14:54:13.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/entry_64-xen.S 2011-02-01 14:55:46.000000000 +0100
@@ -160,11 +160,11 @@ GLOBAL(return_to_handler)
call ftrace_return_to_handler
- movq %rax, 16(%rsp)
+ movq %rax, %rdi
movq 8(%rsp), %rdx
movq (%rsp), %rax
- addq $16, %rsp
- retq
+ addq $24, %rsp
+ jmp *%rdi
#endif
@@ -863,8 +863,8 @@ apicinterrupt UV_BAU_MESSAGE \
#endif
apicinterrupt LOCAL_TIMER_VECTOR \
apic_timer_interrupt smp_apic_timer_interrupt
-apicinterrupt GENERIC_INTERRUPT_VECTOR \
- generic_interrupt smp_generic_interrupt
+apicinterrupt X86_PLATFORM_IPI_VECTOR \
+ x86_platform_ipi smp_x86_platform_ipi
#ifdef CONFIG_SMP
apicinterrupt INVALIDATE_TLB_VECTOR_START+0 \
@@ -1093,63 +1093,20 @@ zeroentry coprocessor_error do_coprocess
errorentry alignment_check do_alignment_check
zeroentry simd_coprocessor_error do_simd_coprocessor_error
-/*
- * Create a kernel thread.
- *
- * C extern interface:
- * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
- *
- * asm input arguments:
- * rdi: fn, rsi: arg, rdx: flags
- */
-ENTRY(kernel_thread)
- CFI_STARTPROC
- FAKE_STACK_FRAME $child_rip
- SAVE_ALL
-
- # rdi: flags, rsi: usp, rdx: will be &pt_regs
- movq %rdx,%rdi
- orq kernel_thread_flags(%rip),%rdi
- movq $-1, %rsi
- movq %rsp, %rdx
-
- xorl %r8d,%r8d
- xorl %r9d,%r9d
-
- # clone now
- call do_fork
- movq %rax,RAX(%rsp)
- xorl %edi,%edi
-
- /*
- * It isn't worth to check for reschedule here,
- * so internally to the x86_64 port you can rely on kernel_thread()
- * not to reschedule the child before returning, this avoids the need
- * of hacks for example to fork off the per-CPU idle tasks.
- * [Hopefully no generic code relies on the reschedule -AK]
- */
- RESTORE_ALL
- UNFAKE_STACK_FRAME
- ret
- CFI_ENDPROC
-END(kernel_thread)
-
-ENTRY(child_rip)
+ENTRY(kernel_thread_helper)
pushq $0 # fake return address
CFI_STARTPROC
/*
* Here we are in the child and the registers are set as they were
* at kernel_thread() invocation in the parent.
*/
- movq %rdi, %rax
- movq %rsi, %rdi
- call *%rax
+ call *%rsi
# exit
mov %eax, %edi
call do_exit
ud2 # padding for call trace
CFI_ENDPROC
-END(child_rip)
+END(kernel_thread_helper)
/*
* execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
@@ -1329,12 +1286,17 @@ error_kernelspace:
leaq irq_return(%rip),%rcx
cmpq %rcx,RIP+8(%rsp)
je error_swapgs
- movl %ecx,%ecx /* zero extend */
- cmpq %rcx,RIP+8(%rsp)
- je error_swapgs
+ movl %ecx,%eax /* zero extend */
+ cmpq %rax,RIP+8(%rsp)
+ je bstep_iret
cmpq $gs_change,RIP+8(%rsp)
je error_swapgs
jmp error_sti
+
+bstep_iret:
+ /* Fix truncated RIP */
+ movq %rcx,RIP+8(%rsp)
+ jmp error_swapgs
#endif
END(error_entry)
--- head-2011-03-17.orig/arch/x86/kernel/head-xen.c 2011-02-01 14:54:13.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/head-xen.c 2011-02-01 14:55:46.000000000 +0100
@@ -1,5 +1,6 @@
#include <linux/kernel.h>
#include <linux/init.h>
+#include <linux/pci.h>
#include <asm/setup.h>
#ifndef CONFIG_XEN
@@ -121,7 +122,7 @@ void __init xen_start_kernel(void)
__pmd(__pa_symbol(swapper_pg_fixmap) | _PAGE_TABLE));
}
#else
- check_efer();
+ x86_configure_nx();
xen_init_pt();
#endif
@@ -149,6 +150,8 @@ void __init xen_start_kernel(void)
virt_to_machine(empty_zero_page),
PAGE_KERNEL_RO);
+ if (is_initial_xendomain())
+ pci_request_acs();
}
void __init xen_arch_setup(void)
--- head-2011-03-17.orig/arch/x86/kernel/head32-xen.c 2011-02-01 14:54:13.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/head32-xen.c 2011-02-01 14:55:46.000000000 +0100
@@ -48,8 +48,6 @@ void __init i386_start_kernel(void)
BUG_ON(pte_index(hypervisor_virt_start));
#endif
- reserve_trampoline_memory();
-
reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
#ifndef CONFIG_XEN
--- head-2011-03-17.orig/arch/x86/kernel/head64-xen.c 2011-02-01 14:54:13.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/head64-xen.c 2011-02-01 14:55:46.000000000 +0100
@@ -119,8 +119,6 @@ void __init x86_64_start_reservations(ch
{
copy_bootdata(__va(real_mode_data));
- reserve_trampoline_memory();
-
reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
/*
--- head-2011-03-17.orig/arch/x86/kernel/head_64-xen.S 2011-02-01 14:54:13.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/head_64-xen.S 2011-02-01 14:55:46.000000000 +0100
@@ -51,9 +51,9 @@ startup_64:
#define NEXT_PAGE(name) \
.balign PAGE_SIZE; \
- phys_##name = . - .head.text; \
ENTRY(name)
+ __PAGE_ALIGNED_BSS
NEXT_PAGE(init_level4_pgt)
.fill 512,8,0
/*
@@ -81,7 +81,9 @@ NEXT_PAGE(level2_fixmap_pgt)
NEXT_PAGE(level1_fixmap_pgt)
.fill 512,8,0
+ .previous
NEXT_PAGE(hypercall_page)
+ phys_hypercall_page = . - .head.text
CFI_STARTPROC
.rept 0x1000 / 0x20
.skip 1 /* push %rcx */
--- head-2011-03-17.orig/arch/x86/kernel/ioport-xen.c 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/ioport-xen.c 2011-02-01 14:55:46.000000000 +0100
@@ -75,8 +75,9 @@ asmlinkage long sys_ioperm(unsigned long
* beyond the 0x3ff range: to get the full 65536 ports bitmapped
* you'd need 8kB of bitmaps/process, which is a bit excessive.
*/
-static int do_iopl(unsigned int level, struct thread_struct *t)
+long sys_iopl(unsigned int level, struct pt_regs *regs)
{
+ struct thread_struct *t = &current->thread;
unsigned int old = t->iopl >> 12;
if (level > 3)
@@ -86,27 +87,8 @@ static int do_iopl(unsigned int level, s
if (!capable(CAP_SYS_RAWIO))
return -EPERM;
}
-
- return 0;
-}
-
-#ifdef CONFIG_X86_32
-long sys_iopl(struct pt_regs *regs)
-{
- unsigned int level = regs->bx;
-#else
-asmlinkage long sys_iopl(unsigned int level, struct pt_regs *regs)
-{
-#endif
- struct thread_struct *t = &current->thread;
- int rc;
-
- rc = do_iopl(level, t);
- if (rc < 0)
- goto out;
-
t->iopl = level << 12;
set_iopl_mask(t->iopl);
-out:
- return rc;
+
+ return 0;
}
--- head-2011-03-17.orig/arch/x86/kernel/irq-xen.c 2011-02-01 14:54:13.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/irq-xen.c 2011-02-01 14:55:46.000000000 +0100
@@ -19,7 +19,7 @@ atomic_t irq_err_count;
#ifndef CONFIG_XEN
/* Function pointer for generic interrupt vector handling */
-void (*generic_interrupt_extension)(void) = NULL;
+void (*x86_platform_ipi_callback)(void) = NULL;
#endif
/*
@@ -77,10 +77,10 @@ static int show_other_interrupts(struct
seq_printf(p, " Performance pending work\n");
#endif
#ifndef CONFIG_XEN
- if (generic_interrupt_extension) {
+ if (x86_platform_ipi_callback) {
seq_printf(p, "%*s: ", prec, "PLT");
for_each_online_cpu(j)
- seq_printf(p, "%10u ", irq_stats(j)->generic_irqs);
+ seq_printf(p, "%10u ", irq_stats(j)->x86_platform_ipis);
seq_printf(p, " Platform interrupts\n");
}
#endif
@@ -162,7 +162,7 @@ int show_interrupts(struct seq_file *p,
if (!desc)
return 0;
- spin_lock_irqsave(&desc->lock, flags);
+ raw_spin_lock_irqsave(&desc->lock, flags);
for_each_online_cpu(j)
any_count |= kstat_irqs_cpu(i, j);
action = desc->action;
@@ -183,7 +183,7 @@ int show_interrupts(struct seq_file *p,
seq_putc(p, '\n');
out:
- spin_unlock_irqrestore(&desc->lock, flags);
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
return 0;
}
@@ -201,8 +201,8 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
sum += irq_stats(cpu)->apic_pending_irqs;
#endif
#ifndef CONFIG_XEN
- if (generic_interrupt_extension)
- sum += irq_stats(cpu)->generic_irqs;
+ if (x86_platform_ipi_callback)
+ sum += irq_stats(cpu)->x86_platform_ipis;
#endif
#ifdef CONFIG_SMP
sum += irq_stats(cpu)->irq_resched_count;
@@ -271,9 +271,9 @@ unsigned int __irq_entry do_IRQ(struct p
}
/*
- * Handler for GENERIC_INTERRUPT_VECTOR.
+ * Handler for X86_PLATFORM_IPI_VECTOR.
*/
-void smp_generic_interrupt(struct pt_regs *regs)
+void smp_x86_platform_ipi(struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
@@ -283,13 +283,95 @@ void smp_generic_interrupt(struct pt_reg
irq_enter();
- inc_irq_stat(generic_irqs);
+ inc_irq_stat(x86_platform_ipis);
- if (generic_interrupt_extension)
- generic_interrupt_extension();
+ if (x86_platform_ipi_callback)
+ x86_platform_ipi_callback();
irq_exit();
set_irq_regs(old_regs);
}
#endif
+
+#ifdef CONFIG_HOTPLUG_CPU
+#include <xen/evtchn.h>
+/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */
+void fixup_irqs(void)
+{
+ unsigned int irq;
+ static int warned;
+ struct irq_desc *desc;
+ static DECLARE_BITMAP(irqs_used, NR_IRQS);
+
+ for_each_irq_desc(irq, desc) {
+ int break_affinity = 0;
+ int set_affinity = 1;
+ const struct cpumask *affinity;
+
+ if (!desc)
+ continue;
+ if (irq == 2)
+ continue;
+
+ /* interrupt's are disabled at this point */
+ raw_spin_lock(&desc->lock);
+
+ affinity = desc->affinity;
+ if (!irq_has_action(irq) ||
+ cpumask_subset(affinity, cpu_online_mask)) {
+ raw_spin_unlock(&desc->lock);
+ continue;
+ }
+
+ if (cpumask_test_cpu(smp_processor_id(), affinity))
+ __set_bit(irq, irqs_used);
+
+ if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
+ break_affinity = 1;
+ affinity = cpu_all_mask;
+ }
+
+ if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->mask)
+ desc->chip->mask(irq);
+
+ if (desc->chip->set_affinity)
+ desc->chip->set_affinity(irq, affinity);
+ else if (desc->chip != &no_irq_chip && !(warned++))
+ set_affinity = 0;
+
+ if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->unmask)
+ desc->chip->unmask(irq);
+
+ raw_spin_unlock(&desc->lock);
+
+ if (break_affinity && set_affinity)
+ /*printk("Broke affinity for irq %i\n", irq)*/;
+ else if (!set_affinity)
+ printk("Cannot set affinity for irq %i\n", irq);
+ }
+
+ /*
+ * We can remove mdelay() and then send spuriuous interrupts to
+ * new cpu targets for all the irqs that were handled previously by
+ * this cpu. While it works, I have seen spurious interrupt messages
+ * (nothing wrong but still...).
+ *
+ * So for now, retain mdelay(1) and check the IRR and then send those
+ * interrupts to new targets as this cpu is already offlined...
+ */
+ mdelay(1);
+
+ for_each_irq_desc(irq, desc) {
+ if (!__test_and_clear_bit(irq, irqs_used))
+ continue;
+
+ if (xen_test_irq_pending(irq)) {
+ raw_spin_lock(&desc->lock);
+ if (desc->chip->retrigger)
+ desc->chip->retrigger(irq);
+ raw_spin_unlock(&desc->lock);
+ }
+ }
+}
+#endif
--- head-2011-03-17.orig/arch/x86/kernel/microcode_core-xen.c 2011-02-01 14:54:13.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/microcode_core-xen.c 2011-02-01 14:55:46.000000000 +0100
@@ -21,10 +21,12 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/platform_device.h>
#include <linux/miscdevice.h>
#include <linux/capability.h>
-#include <linux/smp_lock.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/mutex.h>
@@ -88,7 +90,6 @@ static int do_microcode_update(const voi
static int microcode_open(struct inode *unused1, struct file *unused2)
{
- cycle_kernel_lock();
return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
}
@@ -98,7 +99,7 @@ static ssize_t microcode_write(struct fi
ssize_t ret = -EINVAL;
if ((len >> PAGE_SHIFT) > totalram_pages) {
- pr_err("microcode: too much data (max %ld pages)\n", totalram_pages);
+ pr_err("too much data (max %ld pages)\n", totalram_pages);
return ret;
}
@@ -131,7 +132,7 @@ static int __init microcode_dev_init(voi
error = misc_register(&microcode_dev);
if (error) {
- pr_err("microcode: can't misc_register on minor=%d\n", MICROCODE_MINOR);
+ pr_err("can't misc_register on minor=%d\n", MICROCODE_MINOR);
return error;
}
@@ -190,7 +191,7 @@ static int __init microcode_init(void)
else if (c->x86_vendor == X86_VENDOR_AMD)
fw_name = "amd-ucode/microcode_amd.bin";
else {
- pr_err("microcode: no support for this CPU vendor\n");
+ pr_err("no support for this CPU vendor\n");
return -ENODEV;
}
@@ -207,8 +208,7 @@ static int __init microcode_init(void)
request_microcode(fw_name);
pr_info("Microcode Update Driver: v" MICROCODE_VERSION
- " <tigran@aivazian.fsnet.co.uk>,"
- " Peter Oruba\n");
+ " <tigran@aivazian.fsnet.co.uk>, Peter Oruba\n");
return 0;
}
--- head-2011-03-17.orig/arch/x86/kernel/mpparse-xen.c 2011-02-01 14:54:13.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/mpparse-xen.c 2011-02-01 14:55:46.000000000 +0100
@@ -375,13 +375,6 @@ static int __init smp_read_mpc(struct mp
x86_init.mpparse.mpc_record(1);
}
-#ifdef CONFIG_X86_BIGSMP
- generic_bigsmp_probe();
-#endif
-
- if (apic->setup_apic_routing)
- apic->setup_apic_routing();
-
if (!num_processors)
printk(KERN_ERR "MPTABLE: no processors registered!\n");
return num_processors;
@@ -694,37 +687,21 @@ void __init default_get_smp_config(unsig
}
#ifndef CONFIG_XEN
-static void __init smp_reserve_bootmem(struct mpf_intel *mpf)
+static void __init smp_reserve_memory(struct mpf_intel *mpf)
{
unsigned long size = get_mpc_size(mpf->physptr);
-#ifdef CONFIG_X86_32
- /*
- * We cannot access to MPC table to compute table size yet,
- * as only few megabytes from the bottom is mapped now.
- * PC-9800's MPC table places on the very last of physical
- * memory; so that simply reserving PAGE_SIZE from mpf->physptr
- * yields BUG() in reserve_bootmem.
- * also need to make sure physptr is below than max_low_pfn
- * we don't need reserve the area above max_low_pfn
- */
- unsigned long end = max_low_pfn * PAGE_SIZE;
-
- if (mpf->physptr < end) {
- if (mpf->physptr + size > end)
- size = end - mpf->physptr;
- reserve_bootmem_generic(mpf->physptr, size, BOOTMEM_DEFAULT);
- }
-#else
- reserve_bootmem_generic(mpf->physptr, size, BOOTMEM_DEFAULT);
-#endif
+
+ reserve_early(mpf->physptr, mpf->physptr+size, "MP-table mpc");
}
#endif
-static int __init smp_scan_config(unsigned long base, unsigned long length,
- unsigned reserve)
+static int __init smp_scan_config(unsigned long base, unsigned long length)
{
unsigned int *bp = _bus_to_virt(base);
struct mpf_intel *mpf;
+#ifndef CONFIG_XEN
+ unsigned long mem;
+#endif
apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n",
bp, length);
@@ -746,12 +723,10 @@ static int __init smp_scan_config(unsign
printk(KERN_INFO "found SMP MP-table at [%p] %llx\n",
mpf, (u64)virt_to_phys(mpf));
- if (!reserve)
- return 1;
- reserve_bootmem_generic(virt_to_phys(mpf), sizeof(*mpf),
- BOOTMEM_DEFAULT);
+ mem = virt_to_phys(mpf);
+ reserve_early(mem, mem + sizeof(*mpf), "MP-table mpf");
if (mpf->physptr)
- smp_reserve_bootmem(mpf);
+ smp_reserve_memory(mpf);
#else
printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n",
mpf, ((void *)bp - _bus_to_virt(base)) + base);
@@ -764,7 +739,7 @@ static int __init smp_scan_config(unsign
return 0;
}
-void __init default_find_smp_config(unsigned int reserve)
+void __init default_find_smp_config(void)
{
#ifndef CONFIG_XEN
unsigned int address;
@@ -778,9 +753,9 @@ void __init default_find_smp_config(unsi
* 2) Scan the top 1K of base RAM
* 3) Scan the 64K of bios
*/
- if (smp_scan_config(0x0, 0x400, reserve) ||
- smp_scan_config(639 * 0x400, 0x400, reserve) ||
- smp_scan_config(0xF0000, 0x10000, reserve))
+ if (smp_scan_config(0x0, 0x400) ||
+ smp_scan_config(639 * 0x400, 0x400) ||
+ smp_scan_config(0xF0000, 0x10000))
return;
/*
* If it is an SMP machine we should know now, unless the
@@ -802,7 +777,7 @@ void __init default_find_smp_config(unsi
#ifndef CONFIG_XEN
address = get_bios_ebda();
if (address)
- smp_scan_config(address, 0x400, reserve);
+ smp_scan_config(address, 0x400);
#endif
}
@@ -1001,9 +976,6 @@ void __init early_reserve_e820_mpc_new(v
{
if (enable_update_mptable && alloc_mptable) {
u64 startt = 0;
-#ifdef CONFIG_X86_TRAMPOLINE
- startt = TRAMPOLINE_BASE;
-#endif
mpc_new_phys = early_reserve_e820(startt, mpc_new_length, 4);
}
}
--- head-2011-03-17.orig/arch/x86/kernel/pci-dma-xen.c 2011-02-01 14:54:13.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/pci-dma-xen.c 2011-02-01 14:55:46.000000000 +0100
@@ -11,10 +11,11 @@
#include <asm/gart.h>
#include <asm/calgary.h>
#include <asm/amd_iommu.h>
+#include <asm/x86_init.h>
static int forbid_dac __read_mostly;
-struct dma_map_ops *dma_ops;
+struct dma_map_ops *dma_ops = &nommu_dma_ops;
EXPORT_SYMBOL(dma_ops);
static int iommu_sac_force __read_mostly;
@@ -42,9 +43,6 @@ int iommu_detected __read_mostly = 0;
*/
int iommu_pass_through __read_mostly;
-dma_addr_t bad_dma_address __read_mostly = 0;
-EXPORT_SYMBOL(bad_dma_address);
-
/* Dummy device used for NULL arguments (normally ISA). */
struct device x86_dma_fallback_dev = {
.init_name = "fallback device",
@@ -143,20 +141,19 @@ void __init pci_iommu_alloc(void)
/* free the range so iommu could get some range less than 4G */
dma32_free_bootmem();
#endif
+ if (pci_swiotlb_detect())
+ goto out;
- /*
- * The order of these functions is important for
- * fall-back/fail-over reasons
- */
gart_iommu_hole_init();
detect_calgary();
detect_intel_iommu();
+ /* needs to be called after gart_iommu_hole_init */
amd_iommu_detect();
-
- swiotlb_init();
+out:
+ swiotlb_init(1);
if (swiotlb) {
printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n");
dma_ops = &swiotlb_dma_ops;
@@ -268,7 +265,7 @@ static __init int iommu_setup(char *p)
if (!strncmp(p, "allowdac", 8))
forbid_dac = 0;
if (!strncmp(p, "nodac", 5))
- forbid_dac = -1;
+ forbid_dac = 1;
if (!strncmp(p, "usedac", 6)) {
forbid_dac = -1;
return 1;
@@ -370,25 +367,19 @@ static int __init pci_iommu_init(void)
#ifdef CONFIG_PCI
dma_debug_add_bus(&pci_bus_type);
#endif
+ x86_init.iommu.iommu_init();
- calgary_iommu_init();
-
- intel_iommu_init();
-
- amd_iommu_init();
-
- gart_iommu_init();
+#ifndef CONFIG_XEN
+ if (swiotlb) {
+ printk(KERN_INFO "PCI-DMA: "
+ "Using software bounce buffering for IO (SWIOTLB)\n");
+ swiotlb_print_info();
+ } else
+ swiotlb_free();
+#endif
- no_iommu_init();
return 0;
}
-
-void pci_iommu_shutdown(void)
-{
- gart_iommu_shutdown();
-
- amd_iommu_shutdown();
-}
/* Must execute after PCI subsystem */
rootfs_initcall(pci_iommu_init);
--- head-2011-03-17.orig/arch/x86/kernel/pci-nommu-xen.c 2011-02-01 14:54:13.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/pci-nommu-xen.c 2011-02-01 14:55:46.000000000 +0100
@@ -112,12 +112,3 @@ struct dma_map_ops nommu_dma_ops = {
.sync_sg_for_device = nommu_sync_sg_for_device,
.dma_supported = nommu_dma_supported,
};
-
-void __init no_iommu_init(void)
-{
- if (dma_ops)
- return;
-
- force_iommu = 0; /* no HW IOMMU */
- dma_ops = &nommu_dma_ops;
-}
--- head-2011-03-17.orig/arch/x86/kernel/process-xen.c 2011-03-03 16:07:49.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/process-xen.c 2011-03-03 16:09:35.000000000 +0100
@@ -9,7 +9,11 @@
#include <linux/pm.h>
#include <linux/clockchips.h>
#include <linux/random.h>
+#include <linux/user-return-notifier.h>
+#include <linux/dmi.h>
+#include <linux/utsname.h>
#include <trace/events/power.h>
+#include <linux/hw_breakpoint.h>
#include <asm/system.h>
#include <asm/apic.h>
#include <asm/syscalls.h>
@@ -17,6 +21,7 @@
#include <asm/uaccess.h>
#include <asm/i387.h>
#include <asm/ds.h>
+#include <asm/debugreg.h>
#include <xen/evtchn.h>
unsigned long idle_halt;
@@ -89,30 +94,30 @@ void exit_thread(void)
}
}
-void flush_thread(void)
+void show_regs_common(void)
{
- struct task_struct *tsk = current;
+ const char *board, *product;
-#ifdef CONFIG_X86_64
- if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
- clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
- if (test_tsk_thread_flag(tsk, TIF_IA32)) {
- clear_tsk_thread_flag(tsk, TIF_IA32);
- } else {
- set_tsk_thread_flag(tsk, TIF_IA32);
- current_thread_info()->status |= TS_COMPAT;
- }
- }
-#endif
+ board = dmi_get_system_info(DMI_BOARD_NAME);
+ if (!board)
+ board = "";
+ product = dmi_get_system_info(DMI_PRODUCT_NAME);
+ if (!product)
+ product = "";
+
+ printk(KERN_CONT "\n");
+ printk(KERN_DEFAULT "Pid: %d, comm: %.20s %s %s %.*s %s/%s\n",
+ current->pid, current->comm, print_tainted(),
+ init_utsname()->release,
+ (int)strcspn(init_utsname()->version, " "),
+ init_utsname()->version, board, product);
+}
- clear_tsk_thread_flag(tsk, TIF_DEBUG);
+void flush_thread(void)
+{
+ struct task_struct *tsk = current;
- tsk->thread.debugreg0 = 0;
- tsk->thread.debugreg1 = 0;
- tsk->thread.debugreg2 = 0;
- tsk->thread.debugreg3 = 0;
- tsk->thread.debugreg6 = 0;
- tsk->thread.debugreg7 = 0;
+ flush_ptrace_hw_breakpoint(tsk);
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
/*
* Forget coprocessor state..
@@ -193,16 +198,6 @@ void __switch_to_xtra(struct task_struct
else if (next->debugctlmsr != prev->debugctlmsr)
update_debugctlmsr(next->debugctlmsr);
- if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
- set_debugreg(next->debugreg0, 0);
- set_debugreg(next->debugreg1, 1);
- set_debugreg(next->debugreg2, 2);
- set_debugreg(next->debugreg3, 3);
- /* no 4 and 5 */
- set_debugreg(next->debugreg6, 6);
- set_debugreg(next->debugreg7, 7);
- }
-
if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
test_tsk_thread_flag(next_p, TIF_NOTSC)) {
/* prev and next are different */
@@ -211,6 +206,7 @@ void __switch_to_xtra(struct task_struct
else
hard_enable_TSC();
}
+ propagate_user_return_notify(prev_p, next_p);
}
int sys_fork(struct pt_regs *regs)
@@ -234,6 +230,78 @@ int sys_vfork(struct pt_regs *regs)
NULL, NULL);
}
+long
+sys_clone(unsigned long clone_flags, unsigned long newsp,
+ void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
+{
+ if (!newsp)
+ newsp = regs->sp;
+ return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
+}
+
+/*
+ * This gets run with %si containing the
+ * function to call, and %di containing
+ * the "args".
+ */
+extern void kernel_thread_helper(void);
+
+/*
+ * Create a kernel thread
+ */
+int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
+{
+ struct pt_regs regs;
+
+ memset(&regs, 0, sizeof(regs));
+
+ regs.si = (unsigned long) fn;
+ regs.di = (unsigned long) arg;
+
+#ifdef CONFIG_X86_32
+ regs.ds = __USER_DS;
+ regs.es = __USER_DS;
+ regs.fs = __KERNEL_PERCPU;
+ regs.gs = __KERNEL_STACK_CANARY;
+#else
+ regs.ss = __KERNEL_DS;
+#endif
+
+ regs.orig_ax = -1;
+ regs.ip = (unsigned long) kernel_thread_helper;
+ regs.cs = __KERNEL_CS | get_kernel_rpl();
+ regs.flags = X86_EFLAGS_IF | 0x2;
+
+ /* Ok, create the new process.. */
+ return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
+}
+EXPORT_SYMBOL(kernel_thread);
+
+/*
+ * sys_execve() executes a new program.
+ */
+long sys_execve(char __user *name, char __user * __user *argv,
+ char __user * __user *envp, struct pt_regs *regs)
+{
+ long error;
+ char *filename;
+
+ filename = getname(name);
+ error = PTR_ERR(filename);
+ if (IS_ERR(filename))
+ return error;
+ error = do_execve(filename, argv, envp, regs);
+
+#ifdef CONFIG_X86_32
+ if (error == 0) {
+ /* Make sure we don't return using sysenter.. */
+ set_thread_flag(TIF_IRET);
+ }
+#endif
+
+ putname(filename);
+ return error;
+}
/*
* Idle related variables and functions
--- head-2011-03-17.orig/arch/x86/kernel/process_32-xen.c 2011-02-02 08:38:03.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/process_32-xen.c 2011-02-02 08:47:07.000000000 +0100
@@ -23,7 +23,6 @@
#include <linux/vmalloc.h>
#include <linux/user.h>
#include <linux/interrupt.h>
-#include <linux/utsname.h>
#include <linux/delay.h>
#include <linux/reboot.h>
#include <linux/init.h>
@@ -35,7 +34,6 @@
#include <linux/tick.h>
#include <linux/percpu.h>
#include <linux/prctl.h>
-#include <linux/dmi.h>
#include <linux/ftrace.h>
#include <linux/uaccess.h>
#include <linux/io.h>
@@ -60,6 +58,7 @@
#include <asm/idle.h>
#include <asm/syscalls.h>
#include <asm/ds.h>
+#include <asm/debugreg.h>
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
asmlinkage void cstar_ret_from_fork(void) __asm__("cstar_ret_from_fork");
@@ -130,39 +129,29 @@ void __show_regs(struct pt_regs *regs, i
unsigned long d0, d1, d2, d3, d6, d7;
unsigned long sp;
unsigned short ss, gs;
- const char *board;
if (user_mode_vm(regs)) {
sp = regs->sp;
ss = regs->ss & 0xffff;
gs = get_user_gs(regs);
} else {
- sp = (unsigned long) (&regs->sp);
+ sp = kernel_stack_pointer(regs);
savesegment(ss, ss);
savesegment(gs, gs);
}
- printk("\n");
+ show_regs_common();
- board = dmi_get_system_info(DMI_PRODUCT_NAME);
- if (!board)
- board = "";
- printk("Pid: %d, comm: %s %s (%s %.*s) %s\n",
- task_pid_nr(current), current->comm,
- print_tainted(), init_utsname()->release,
- (int)strcspn(init_utsname()->version, " "),
- init_utsname()->version, board);
-
- printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
+ printk(KERN_DEFAULT "EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
(u16)regs->cs, regs->ip, regs->flags,
smp_processor_id());
print_symbol("EIP is at %s\n", regs->ip);
- printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
+ printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
regs->ax, regs->bx, regs->cx, regs->dx);
- printk("ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n",
+ printk(KERN_DEFAULT "ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n",
regs->si, regs->di, regs->bp, sp);
- printk(" DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n",
+ printk(KERN_DEFAULT " DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n",
(u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss);
if (!all)
@@ -172,61 +161,28 @@ void __show_regs(struct pt_regs *regs, i
cr2 = read_cr2();
cr3 = read_cr3();
cr4 = read_cr4_safe();
- printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n",
+ printk(KERN_DEFAULT "CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n",
cr0, cr2, cr3, cr4);
get_debugreg(d0, 0);
get_debugreg(d1, 1);
get_debugreg(d2, 2);
get_debugreg(d3, 3);
- printk("DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n",
+ printk(KERN_DEFAULT "DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n",
d0, d1, d2, d3);
get_debugreg(d6, 6);
get_debugreg(d7, 7);
- printk("DR6: %08lx DR7: %08lx\n",
+ printk(KERN_DEFAULT "DR6: %08lx DR7: %08lx\n",
d6, d7);
}
void show_regs(struct pt_regs *regs)
{
- __show_regs(regs, 1);
+ show_registers(regs);
show_trace(NULL, regs, &regs->sp, regs->bp);
}
-/*
- * This gets run with %bx containing the
- * function to call, and %dx containing
- * the "args".
- */
-extern void kernel_thread_helper(void);
-
-/*
- * Create a kernel thread
- */
-int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
-{
- struct pt_regs regs;
-
- memset(&regs, 0, sizeof(regs));
-
- regs.bx = (unsigned long) fn;
- regs.dx = (unsigned long) arg;
-
- regs.ds = __USER_DS;
- regs.es = __USER_DS;
- regs.fs = __KERNEL_PERCPU;
- regs.gs = __KERNEL_STACK_CANARY;
- regs.orig_ax = -1;
- regs.ip = (unsigned long) kernel_thread_helper;
- regs.cs = __KERNEL_CS | get_kernel_rpl();
- regs.flags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
-
- /* Ok, create the new process.. */
- return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
-}
-EXPORT_SYMBOL(kernel_thread);
-
void release_thread(struct task_struct *dead_task)
{
BUG_ON(dead_task->mm);
@@ -262,7 +218,12 @@ int copy_thread(unsigned long clone_flag
task_user_gs(p) = get_user_gs(regs);
+ p->thread.io_bitmap_ptr = NULL;
tsk = current;
+ err = -ENOMEM;
+
+ memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
+
if (test_tsk_thread_flag(tsk, TIF_CSTAR))
p->thread.ip = (unsigned long) cstar_ret_from_fork;
if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
@@ -497,46 +458,6 @@ __switch_to(struct task_struct *prev_p,
return prev_p;
}
-int sys_clone(struct pt_regs *regs)
-{
- unsigned long clone_flags;
- unsigned long newsp;
- int __user *parent_tidptr, *child_tidptr;
-
- clone_flags = regs->bx;
- newsp = regs->cx;
- parent_tidptr = (int __user *)regs->dx;
- child_tidptr = (int __user *)regs->di;
- if (!newsp)
- newsp = regs->sp;
- return do_fork(clone_flags, newsp, regs, 0, parent_tidptr, child_tidptr);
-}
-
-/*
- * sys_execve() executes a new program.
- */
-int sys_execve(struct pt_regs *regs)
-{
- int error;
- char *filename;
-
- filename = getname((char __user *) regs->bx);
- error = PTR_ERR(filename);
- if (IS_ERR(filename))
- goto out;
- error = do_execve(filename,
- (char __user * __user *) regs->cx,
- (char __user * __user *) regs->dx,
- regs);
- if (error == 0) {
- /* Make sure we don't return using sysenter.. */
- set_thread_flag(TIF_IRET);
- }
- putname(filename);
-out:
- return error;
-}
-
#define top_esp (THREAD_SIZE - sizeof(unsigned long))
#define top_ebp (THREAD_SIZE - 2*sizeof(unsigned long))
--- head-2011-03-17.orig/arch/x86/kernel/process_64-xen.c 2011-02-02 08:37:59.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/process_64-xen.c 2011-02-02 08:47:12.000000000 +0100
@@ -29,7 +29,6 @@
#include <linux/slab.h>
#include <linux/user.h>
#include <linux/interrupt.h>
-#include <linux/utsname.h>
#include <linux/delay.h>
#include <linux/module.h>
#include <linux/ptrace.h>
@@ -41,7 +40,6 @@
#include <linux/uaccess.h>
#include <linux/io.h>
#include <linux/ftrace.h>
-#include <linux/dmi.h>
#include <asm/pgtable.h>
#include <asm/system.h>
@@ -57,13 +55,12 @@
#include <asm/idle.h>
#include <asm/syscalls.h>
#include <asm/ds.h>
+#include <asm/debugreg.h>
asmlinkage extern void ret_from_fork(void);
static DEFINE_PER_CPU(unsigned char, is_idle);
-unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
-
static ATOMIC_NOTIFIER_HEAD(idle_notifier);
void idle_notifier_register(struct notifier_block *n)
@@ -166,31 +163,21 @@ void __show_regs(struct pt_regs *regs, i
unsigned long d0, d1, d2, d3, d6, d7;
unsigned int fsindex, gsindex;
unsigned int ds, cs, es;
- const char *board;
- printk("\n");
- print_modules();
- board = dmi_get_system_info(DMI_PRODUCT_NAME);
- if (!board)
- board = "";
- printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s\n",
- current->pid, current->comm, print_tainted(),
- init_utsname()->release,
- (int)strcspn(init_utsname()->version, " "),
- init_utsname()->version, board);
- printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
+ show_regs_common();
+ printk(KERN_DEFAULT "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
printk_address(regs->ip, 1);
- printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss,
+ printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss,
regs->sp, regs->flags);
- printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n",
+ printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n",
regs->ax, regs->bx, regs->cx);
- printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n",
+ printk(KERN_DEFAULT "RDX: %016lx RSI: %016lx RDI: %016lx\n",
regs->dx, regs->si, regs->di);
- printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n",
+ printk(KERN_DEFAULT "RBP: %016lx R08: %016lx R09: %016lx\n",
regs->bp, regs->r8, regs->r9);
- printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n",
+ printk(KERN_DEFAULT "R10: %016lx R11: %016lx R12: %016lx\n",
regs->r10, regs->r11, regs->r12);
- printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n",
+ printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
regs->r13, regs->r14, regs->r15);
asm("movl %%ds,%0" : "=r" (ds));
@@ -211,27 +198,26 @@ void __show_regs(struct pt_regs *regs, i
cr3 = read_cr3();
cr4 = read_cr4();
- printk(KERN_INFO "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
+ printk(KERN_DEFAULT "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
fs, fsindex, gs, gsindex, shadowgs);
- printk(KERN_INFO "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
+ printk(KERN_DEFAULT "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
es, cr0);
- printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
+ printk(KERN_DEFAULT "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
cr4);
get_debugreg(d0, 0);
get_debugreg(d1, 1);
get_debugreg(d2, 2);
- printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
+ printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
get_debugreg(d3, 3);
get_debugreg(d6, 6);
get_debugreg(d7, 7);
- printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
+ printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
}
void show_regs(struct pt_regs *regs)
{
- printk(KERN_INFO "CPU %d:", smp_processor_id());
- __show_regs(regs, 1);
+ show_registers(regs);
show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
}
@@ -239,6 +225,7 @@ void xen_load_gs_index(unsigned gs)
{
WARN_ON(HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, gs));
}
+EXPORT_SYMBOL(xen_load_gs_index);
void release_thread(struct task_struct *dead_task)
{
@@ -294,8 +281,9 @@ int copy_thread(unsigned long clone_flag
*childregs = *regs;
childregs->ax = 0;
- childregs->sp = sp;
- if (sp == ~0UL)
+ if (user_mode(regs))
+ childregs->sp = sp;
+ else
childregs->sp = (unsigned long)childregs;
p->thread.sp = (unsigned long) childregs;
@@ -305,12 +293,16 @@ int copy_thread(unsigned long clone_flag
p->thread.fs = me->thread.fs;
p->thread.gs = me->thread.gs;
+ p->thread.io_bitmap_ptr = NULL;
savesegment(gs, p->thread.gsindex);
savesegment(fs, p->thread.fsindex);
savesegment(es, p->thread.es);
savesegment(ds, p->thread.ds);
+ err = -ENOMEM;
+ memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
+
if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
if (!p->thread.io_bitmap_ptr) {
@@ -350,28 +342,45 @@ out:
kfree(p->thread.io_bitmap_ptr);
p->thread.io_bitmap_max = 0;
}
+
return err;
}
-void
-start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
+static void
+start_thread_common(struct pt_regs *regs, unsigned long new_ip,
+ unsigned long new_sp,
+ unsigned int _cs, unsigned int _ss, unsigned int _ds)
{
loadsegment(fs, 0);
- loadsegment(es, 0);
- loadsegment(ds, 0);
+ loadsegment(es, _ds);
+ loadsegment(ds, _ds);
load_gs_index(0);
regs->ip = new_ip;
regs->sp = new_sp;
- regs->cs = __USER_CS;
- regs->ss = __USER_DS;
- regs->flags = 0x200;
+ regs->cs = _cs;
+ regs->ss = _ss;
+ regs->flags = X86_EFLAGS_IF;
set_fs(USER_DS);
/*
* Free the old FP and other extended state
*/
free_thread_xstate(current);
}
-EXPORT_SYMBOL_GPL(start_thread);
+
+void
+start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
+{
+ start_thread_common(regs, new_ip, new_sp,
+ __USER_CS, __USER_DS, 0);
+}
+
+#ifdef CONFIG_IA32_EMULATION
+void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp)
+{
+ start_thread_common(regs, new_ip, new_sp,
+ __USER32_CS, __USER32_DS, __USER32_DS);
+}
+#endif
/*
* switch_to(x,y) should switch tasks from x to y.
@@ -561,26 +570,8 @@ __switch_to(struct task_struct *prev_p,
*/
if (preload_fpu)
__math_state_restore();
- return prev_p;
-}
-/*
- * sys_execve() executes a new program.
- */
-asmlinkage
-long sys_execve(char __user *name, char __user * __user *argv,
- char __user * __user *envp, struct pt_regs *regs)
-{
- long error;
- char *filename;
-
- filename = getname(name);
- error = PTR_ERR(filename);
- if (IS_ERR(filename))
- return error;
- error = do_execve(filename, argv, envp, regs);
- putname(filename);
- return error;
+ return prev_p;
}
void set_personality_64bit(void)
@@ -597,13 +588,16 @@ void set_personality_64bit(void)
current->personality &= ~READ_IMPLIES_EXEC;
}
-asmlinkage long
-sys_clone(unsigned long clone_flags, unsigned long newsp,
- void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
-{
- if (!newsp)
- newsp = regs->sp;
- return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
+void set_personality_ia32(void)
+{
+ /* inherit personality from parent */
+
+ /* Make sure to be in 32bit mode */
+ set_thread_flag(TIF_IA32);
+ current->personality |= force_personality32;
+
+ /* Prepare the first "return" to user space */
+ current_thread_info()->status |= TS_COMPAT;
}
unsigned long get_wchan(struct task_struct *p)
--- head-2011-03-17.orig/arch/x86/kernel/setup-xen.c 2011-03-04 15:09:48.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/setup-xen.c 2011-03-03 16:24:24.000000000 +0100
@@ -73,6 +73,7 @@
#include <asm/mtrr.h>
#include <asm/apic.h>
+#include <asm/trampoline.h>
#include <asm/e820.h>
#include <asm/mpspec.h>
#include <asm/setup.h>
@@ -106,9 +107,11 @@
#include <asm/percpu.h>
#include <asm/topology.h>
#include <asm/apicdef.h>
+#include <asm/k8.h>
#ifdef CONFIG_X86_64
#include <asm/numa_64.h>
#endif
+#include <asm/mce.h>
#ifdef CONFIG_XEN
#include <asm/hypervisor.h>
@@ -281,7 +284,7 @@ EXPORT_SYMBOL(edd);
* from boot_params into a safe place.
*
*/
-static inline void copy_edd(void)
+static inline void __init copy_edd(void)
{
memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer,
sizeof(edd.mbr_signature));
@@ -291,7 +294,7 @@ static inline void copy_edd(void)
}
#endif
#else
-static inline void copy_edd(void)
+static inline void __init copy_edd(void)
{
}
#endif
@@ -541,49 +544,18 @@ static void __init reserve_early_setup_d
#endif
}
+#ifndef CONFIG_XEN
/*
* --------- Crashkernel reservation ------------------------------
*/
#ifdef CONFIG_KEXEC
-#ifndef CONFIG_XEN
-/**
- * Reserve @size bytes of crashkernel memory at any suitable offset.
- *
- * @size: Size of the crashkernel memory to reserve.
- * Returns the base address on success, and -1ULL on failure.
- */
-static
-unsigned long long __init find_and_reserve_crashkernel(unsigned long long size)
-{
- const unsigned long long alignment = 16<<20; /* 16M */
- unsigned long long start = 0LL;
-
- while (1) {
- int ret;
-
- start = find_e820_area(start, ULONG_MAX, size, alignment);
- if (start == -1ULL)
- return start;
-
- /* try to reserve it */
- ret = reserve_bootmem_generic(start, size, BOOTMEM_EXCLUSIVE);
- if (ret >= 0)
- return start;
-
- start += alignment;
- }
-}
-
static inline unsigned long long get_total_mem(void)
{
unsigned long long total;
- total = max_low_pfn - min_low_pfn;
-#ifdef CONFIG_HIGHMEM
- total += highend_pfn - highstart_pfn;
-#endif
+ total = max_pfn - min_low_pfn;
return total << PAGE_SHIFT;
}
@@ -603,21 +575,25 @@ static void __init reserve_crashkernel(v
/* 0 means: find the address automatically */
if (crash_base <= 0) {
- crash_base = find_and_reserve_crashkernel(crash_size);
+ const unsigned long long alignment = 16<<20; /* 16M */
+
+ crash_base = find_e820_area(alignment, ULONG_MAX, crash_size,
+ alignment);
if (crash_base == -1ULL) {
- pr_info("crashkernel reservation failed. "
- "No suitable area found.\n");
+ pr_info("crashkernel reservation failed - No suitable area found.\n");
return;
}
} else {
- ret = reserve_bootmem_generic(crash_base, crash_size,
- BOOTMEM_EXCLUSIVE);
- if (ret < 0) {
- pr_info("crashkernel reservation failed - "
- "memory is in use\n");
+ unsigned long long start;
+
+ start = find_e820_area(crash_base, ULONG_MAX, crash_size,
+ 1<<20);
+ if (start != crash_base) {
+ pr_info("crashkernel reservation failed - memory is in use.\n");
return;
}
}
+ reserve_early(crash_base, crash_base + crash_size, "CRASH KERNEL");
printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
"for crashkernel (System RAM: %ldMB)\n",
@@ -630,13 +606,11 @@ static void __init reserve_crashkernel(v
insert_resource(&iomem_resource, &crashk_res);
}
#else
-#define reserve_crashkernel xen_machine_kexec_setup_resources
-#endif
-#else
static void __init reserve_crashkernel(void)
{
}
#endif
+#endif /* CONFIG_XEN */
static struct resource standard_io_resources[] = {
{ .name = "dma1", .start = 0x00, .end = 0x1f,
@@ -735,19 +709,27 @@ static struct dmi_system_id __initdata b
DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix/MSC"),
},
},
- {
/*
- * AMI BIOS with low memory corruption was found on Intel DG45ID board.
- * It hase different DMI_BIOS_VENDOR = "Intel Corp.", for now we will
+ * AMI BIOS with low memory corruption was found on Intel DG45ID and
+ * DG45FC boards.
+ * It has a different DMI_BIOS_VENDOR = "Intel Corp.", for now we will
* match only DMI_BOARD_NAME and see if there is more bad products
* with this vendor.
*/
+ {
.callback = dmi_low_memory_corruption,
.ident = "AMI BIOS",
.matches = {
DMI_MATCH(DMI_BOARD_NAME, "DG45ID"),
},
},
+ {
+ .callback = dmi_low_memory_corruption,
+ .ident = "AMI BIOS",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_NAME, "DG45FC"),
+ },
+ },
#endif
{}
};
@@ -767,6 +749,8 @@ static struct dmi_system_id __initdata b
void __init setup_arch(char **cmdline_p)
{
+ int acpi = 0;
+ int k8 = 0;
#ifdef CONFIG_XEN
unsigned int i;
unsigned long p2m_pages;
@@ -903,21 +887,18 @@ void __init setup_arch(char **cmdline_p)
strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
*cmdline_p = command_line;
-#ifdef CONFIG_X86_64
/*
- * Must call this twice: Once just to detect whether hardware doesn't
- * support NX (so that the early EHCI debug console setup can safely
- * call set_fixmap(), and then again after parsing early parameters to
- * honor the respective command line option.
+ * x86_configure_nx() is called before parse_early_param() to detect
+ * whether hardware doesn't support NX (so that the early EHCI debug
+ * console setup can safely call set_fixmap()). It may then be called
+ * again from within noexec_setup() during parsing early parameters
+ * to honor the respective command line option.
*/
- check_efer();
-#endif
+ x86_configure_nx();
parse_early_param();
-#ifdef CONFIG_X86_64
- check_efer();
-#endif
+ x86_report_nx();
/* Must be before kernel pagetables are setup */
vmi_activate();
@@ -1024,6 +1005,20 @@ void __init setup_arch(char **cmdline_p)
reserve_brk();
+ /*
+ * Find and reserve possible boot-time SMP configuration:
+ */
+ find_smp_config();
+
+ reserve_trampoline_memory();
+
+#ifdef CONFIG_ACPI_SLEEP
+ /*
+ * Reserve low memory region for sleep support.
+ * even before init_memory_mapping
+ */
+ acpi_reserve_wakeup_memory();
+#endif
init_gbpages();
/* max_pfn_mapped is updated here */
@@ -1051,6 +1046,8 @@ void __init setup_arch(char **cmdline_p)
reserve_initrd();
#ifndef CONFIG_XEN
+ reserve_crashkernel();
+
vsmp_init();
#endif
@@ -1074,23 +1071,15 @@ void __init setup_arch(char **cmdline_p)
/*
* Parse SRAT to discover nodes.
*/
- acpi_numa_init();
+ acpi = acpi_numa_init();
#endif
- initmem_init(0, max_pfn);
-
-#ifdef CONFIG_ACPI_SLEEP
- /*
- * Reserve low memory region for sleep support.
- */
- acpi_reserve_bootmem();
+#ifdef CONFIG_K8_NUMA
+ if (!acpi)
+ k8 = !k8_numa_init(0, max_pfn);
#endif
- /*
- * Find and reserve possible boot-time SMP configuration:
- */
- find_smp_config();
- reserve_crashkernel();
+ initmem_init(0, max_pfn, acpi, k8);
#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
/*
@@ -1118,6 +1107,9 @@ void __init setup_arch(char **cmdline_p)
#endif
#ifdef CONFIG_XEN
+#ifdef CONFIG_KEXEC
+ xen_machine_kexec_setup_resources();
+#endif
p2m_pages = max_pfn;
if (xen_start_info->nr_pages > max_pfn) {
/*
@@ -1260,6 +1252,8 @@ void __init setup_arch(char **cmdline_p)
#endif
#endif /* CONFIG_XEN */
x86_init.oem.banner();
+
+ mcheck_init();
}
#ifdef CONFIG_X86_32
--- head-2011-03-17.orig/arch/x86/kernel/time-xen.c 2011-02-01 14:54:13.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/time-xen.c 2011-02-01 14:55:46.000000000 +0100
@@ -937,28 +937,23 @@ core_initcall(cpufreq_time_setup);
*/
static ctl_table xen_subtable[] = {
{
- .ctl_name = CTL_XEN_INDEPENDENT_WALLCLOCK,
.procname = "independent_wallclock",
.data = &independent_wallclock,
.maxlen = sizeof(independent_wallclock),
.mode = 0644,
- .strategy = sysctl_data,
.proc_handler = proc_dointvec
},
{
- .ctl_name = CTL_XEN_PERMITTED_CLOCK_JITTER,
.procname = "permitted_clock_jitter",
.data = &permitted_clock_jitter,
.maxlen = sizeof(permitted_clock_jitter),
.mode = 0644,
- .strategy = sysctl_data,
.proc_handler = proc_doulongvec_minmax
},
{ }
};
static ctl_table xen_table[] = {
{
- .ctl_name = CTL_XEN,
.procname = "xen",
.mode = 0555,
.child = xen_subtable
--- head-2011-03-17.orig/arch/x86/kernel/traps-xen.c 2011-02-16 13:56:25.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/traps-xen.c 2011-02-01 14:55:46.000000000 +0100
@@ -522,77 +522,56 @@ asmlinkage __kprobes struct pt_regs *syn
dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
{
struct task_struct *tsk = current;
- unsigned long condition;
+ unsigned long dr6;
int si_code;
- get_debugreg(condition, 6);
+ get_debugreg(dr6, 6);
/* Catch kmemcheck conditions first of all! */
- if (condition & DR_STEP && kmemcheck_trap(regs))
+ if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
return;
+ /* DR6 may or may not be cleared by the CPU */
+ set_debugreg(0, 6);
/*
* The processor cleared BTF, so don't mark that we need it set.
*/
clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR);
tsk->thread.debugctlmsr = 0;
- if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
- SIGTRAP) == NOTIFY_STOP)
+ /* Store the virtualized DR6 value */
+ tsk->thread.debugreg6 = dr6;
+
+ if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code,
+ SIGTRAP) == NOTIFY_STOP)
return;
/* It's safe to allow irq's after DR6 has been saved */
preempt_conditional_sti(regs);
- /* Mask out spurious debug traps due to lazy DR7 setting */
- if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
- if (!tsk->thread.debugreg7)
- goto clear_dr7;
- }
-
-#ifdef CONFIG_X86_32
- if (regs->flags & X86_VM_MASK)
- goto debug_vm86;
-#endif
-
- /* Save debug status register where ptrace can see it */
- tsk->thread.debugreg6 = condition;
-
- /*
- * Single-stepping through TF: make sure we ignore any events in
- * kernel space (but re-enable TF when returning to user mode).
- */
- if (condition & DR_STEP) {
- if (!user_mode(regs))
- goto clear_TF_reenable;
+ if (regs->flags & X86_VM_MASK) {
+ handle_vm86_trap((struct kernel_vm86_regs *) regs,
+ error_code, 1);
+ return;
}
- si_code = get_si_code(condition);
- /* Ok, finally something we can handle */
- send_sigtrap(tsk, regs, error_code, si_code);
-
/*
- * Disable additional traps. They'll be re-enabled when
- * the signal is delivered.
+ * Single-stepping through system calls: ignore any exceptions in
+ * kernel space, but re-enable TF when returning to user mode.
+ *
+ * We already checked v86 mode above, so we can check for kernel mode
+ * by just checking the CPL of CS.
*/
-clear_dr7:
- set_debugreg(0, 7);
+ if ((dr6 & DR_STEP) && !user_mode(regs)) {
+ tsk->thread.debugreg6 &= ~DR_STEP;
+ set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
+ regs->flags &= ~X86_EFLAGS_TF;
+ }
+ si_code = get_si_code(tsk->thread.debugreg6);
+ if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS))
+ send_sigtrap(tsk, regs, error_code, si_code);
preempt_conditional_cli(regs);
- return;
-#ifdef CONFIG_X86_32
-debug_vm86:
- /* reenable preemption: handle_vm86_trap() might sleep */
- dec_preempt_count();
- handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
- conditional_cli(regs);
- return;
-#endif
-
-clear_TF_reenable:
- set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
- regs->flags &= ~X86_EFLAGS_TF;
- preempt_conditional_cli(regs);
return;
}
--- head-2011-03-17.orig/arch/x86/kernel/vmlinux.lds.S 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/vmlinux.lds.S 2011-02-01 14:55:46.000000000 +0100
@@ -43,7 +43,7 @@ ENTRY(phys_startup_64)
jiffies_64 = jiffies;
#endif
-#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
+#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) && !defined(CONFIG_XEN)
/*
* On 64-bit, align RODATA to 2MB so that even with CONFIG_DEBUG_RODATA
* we retain large page mappings for boundaries spanning kernel text, rodata
--- head-2011-03-17.orig/arch/x86/kernel/vsyscall_64-xen.c 2011-02-01 14:54:13.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/vsyscall_64-xen.c 2011-02-01 14:55:46.000000000 +0100
@@ -73,7 +73,8 @@ void update_vsyscall_tz(void)
write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
}
-void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
+void update_vsyscall(struct timespec *wall_time, struct clocksource *clock,
+ u32 mult)
{
unsigned long flags;
@@ -82,7 +83,7 @@ void update_vsyscall(struct timespec *wa
vsyscall_gtod_data.clock.vread = clock->vread;
vsyscall_gtod_data.clock.cycle_last = clock->cycle_last;
vsyscall_gtod_data.clock.mask = clock->mask;
- vsyscall_gtod_data.clock.mult = clock->mult;
+ vsyscall_gtod_data.clock.mult = mult;
vsyscall_gtod_data.clock.shift = clock->shift;
vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
@@ -237,7 +238,7 @@ static ctl_table kernel_table2[] = {
};
static ctl_table kernel_root_table2[] = {
- { .ctl_name = CTL_KERN, .procname = "kernel", .mode = 0555,
+ { .procname = "kernel", .mode = 0555,
.child = kernel_table2 },
{}
};
--- head-2011-03-17.orig/arch/x86/kernel/x8664_ksyms_64.c 2011-03-17 14:35:44.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/x8664_ksyms_64.c 2011-02-01 14:55:46.000000000 +0100
@@ -54,6 +54,6 @@ EXPORT_SYMBOL(memcpy);
EXPORT_SYMBOL(__memcpy);
EXPORT_SYMBOL(empty_zero_page);
-#ifndef CONFIG_PARAVIRT
+#if !defined(CONFIG_PARAVIRT) && !defined(CONFIG_XEN)
EXPORT_SYMBOL(native_load_gs_index);
#endif
--- head-2011-03-17.orig/arch/x86/kernel/x86_init-xen.c 2011-02-01 14:54:13.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/x86_init-xen.c 2011-02-01 14:55:46.000000000 +0100
@@ -13,10 +13,13 @@
#include <asm/e820.h>
#include <asm/time.h>
#include <asm/irq.h>
+#include <asm/pat.h>
+#include <asm/iommu.h>
void __cpuinit x86_init_noop(void) { }
void __init x86_init_uint_noop(unsigned int unused) { }
void __init x86_init_pgd_noop(pgd_t *unused) { }
+int __init iommu_init_noop(void) { return 0; }
/*
* The platform setup functions are preset with the default functions
@@ -61,10 +64,15 @@ struct x86_init_ops x86_init __initdata
.tsc_pre_init = x86_init_noop,
.timer_init = x86_init_noop,
},
+
+ .iommu = {
+ .iommu_init = iommu_init_noop,
+ },
};
struct x86_platform_ops x86_platform = {
.calibrate_tsc = NULL,
.get_wallclock = mach_get_cmos_time,
.set_wallclock = mach_set_rtc_mmss,
+ .is_untracked_pat_range = is_ISA_range,
};
--- head-2011-03-17.orig/arch/x86/mm/fault-xen.c 2011-02-01 14:54:13.000000000 +0100
+++ head-2011-03-17/arch/x86/mm/fault-xen.c 2011-02-01 14:55:46.000000000 +0100
@@ -38,7 +38,8 @@ enum x86_pf_error_code {
* Returns 0 if mmiotrace is disabled, or if the fault is not
* handled by mmiotrace:
*/
-static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr)
+static inline int __kprobes
+kmmio_fault(struct pt_regs *regs, unsigned long addr)
{
if (unlikely(is_kmmio_active()))
if (kmmio_handler(regs, addr) == 1)
@@ -46,7 +47,7 @@ static inline int kmmio_fault(struct pt_
return 0;
}
-static inline int notify_page_fault(struct pt_regs *regs)
+static inline int __kprobes notify_page_fault(struct pt_regs *regs)
{
int ret = 0;
@@ -257,7 +258,7 @@ void vmalloc_sync_all(void)
*
* Handle a fault on the vmalloc or module mapping area
*/
-static noinline int vmalloc_fault(unsigned long address)
+static noinline __kprobes int vmalloc_fault(unsigned long address)
{
unsigned long pgd_paddr;
pmd_t *pmd_k;
@@ -376,7 +377,7 @@ void vmalloc_sync_all(void)
*
* This assumes no large pages in there.
*/
-static noinline int vmalloc_fault(unsigned long address)
+static noinline __kprobes int vmalloc_fault(unsigned long address)
{
pgd_t *pgd, *pgd_ref;
pud_t *pud, *pud_ref;
@@ -677,7 +678,7 @@ no_context(struct pt_regs *regs, unsigne
show_fault_oops(regs, error_code, address);
stackend = end_of_stack(tsk);
- if (*stackend != STACK_END_MAGIC)
+ if (tsk != &init_task && *stackend != STACK_END_MAGIC)
printk(KERN_ALERT "Thread overran stack, or stack corrupted\n");
tsk->thread.cr2 = address;
@@ -879,7 +880,7 @@ static int spurious_fault_check(unsigned
* There are no security implications to leaving a stale TLB when
* increasing the permissions on a page.
*/
-static noinline int
+static noinline __kprobes int
spurious_fault(unsigned long error_code, unsigned long address)
{
pgd_t *pgd;
--- head-2011-03-17.orig/arch/x86/mm/init-xen.c 2011-02-01 14:54:13.000000000 +0100
+++ head-2011-03-17/arch/x86/mm/init-xen.c 2011-02-01 14:55:46.000000000 +0100
@@ -163,10 +163,6 @@ unsigned long __init_refok init_memory_m
use_gbpages = direct_gbpages;
#endif
- set_nx();
- if (nx_enabled)
- printk(KERN_INFO "NX (Execute Disable) protection: active\n");
-
/* Enable PSE if available */
if (cpu_has_pse)
set_in_cr4(X86_CR4_PSE);
--- head-2011-03-17.orig/arch/x86/mm/init_32-xen.c 2011-02-01 14:54:13.000000000 +0100
+++ head-2011-03-17/arch/x86/mm/init_32-xen.c 2011-02-01 14:55:46.000000000 +0100
@@ -448,7 +448,7 @@ static void __init permanent_kmaps_init(
pkmap_page_table = pte;
}
-static void __init add_one_highpage_init(struct page *page, int pfn)
+static void __init add_one_highpage_init(struct page *page)
{
ClearPageReserved(page);
init_page_count(page);
@@ -481,7 +481,7 @@ static int __init add_highpages_work_fn(
if (!pfn_valid(node_pfn))
continue;
page = pfn_to_page(node_pfn);
- add_one_highpage_init(page, node_pfn);
+ add_one_highpage_init(page);
}
return 0;
@@ -705,8 +705,8 @@ void __init find_low_pfn_range(void)
}
#ifndef CONFIG_NEED_MULTIPLE_NODES
-void __init initmem_init(unsigned long start_pfn,
- unsigned long end_pfn)
+void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
+ int acpi, int k8)
{
#ifdef CONFIG_HIGHMEM
highstart_pfn = highend_pfn = max_pfn;
@@ -955,8 +955,7 @@ void __init mem_init(void)
reservedpages << (PAGE_SHIFT-10),
datasize >> 10,
initsize >> 10,
- (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
- );
+ totalhigh_pages << (PAGE_SHIFT-10));
printk(KERN_INFO "virtual kernel memory layout:\n"
" fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
@@ -1062,7 +1061,7 @@ static noinline int do_test_wp_bit(void)
const int rodata_test_data = 0xC3;
EXPORT_SYMBOL_GPL(rodata_test_data);
-static int kernel_set_to_readonly;
+int kernel_set_to_readonly __read_mostly;
void set_kernel_text_rw(void)
{
--- head-2011-03-17.orig/arch/x86/mm/init_64-xen.c 2011-02-01 14:54:13.000000000 +0100
+++ head-2011-03-17/arch/x86/mm/init_64-xen.c 2011-02-01 14:55:46.000000000 +0100
@@ -53,6 +53,7 @@
#include <asm/cacheflush.h>
#include <asm/init.h>
#include <asm/setup.h>
+#include <linux/bootmem.h>
#include <xen/features.h>
@@ -809,7 +810,8 @@ kernel_physical_mapping_init(unsigned lo
}
#ifndef CONFIG_NUMA
-void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn)
+void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
+ int acpi, int k8)
{
unsigned long bootmap_size, bootmap;
@@ -862,6 +864,21 @@ void __init paging_init(void)
*/
#ifdef CONFIG_MEMORY_HOTPLUG
/*
+ * After memory hotplug the variables max_pfn, max_low_pfn and high_memory need
+ * updating.
+ */
+static void update_end_of_memory_vars(u64 start, u64 size)
+{
+ unsigned long end_pfn = PFN_UP(start + size);
+
+ if (end_pfn > max_pfn) {
+ max_pfn = end_pfn;
+ max_low_pfn = end_pfn;
+ high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
+ }
+}
+
+/*
* Memory is added always to NORMAL zone. This means you will never get
* additional DMA/DMA32 memory.
*/
@@ -880,6 +897,9 @@ int arch_add_memory(int nid, u64 start,
ret = __add_pages(nid, zone, start_pfn, nr_pages);
WARN_ON_ONCE(ret);
+ /* update max_pfn, max_low_pfn and high_memory */
+ update_end_of_memory_vars(start, size);
+
return ret;
}
EXPORT_SYMBOL_GPL(arch_add_memory);
@@ -948,12 +968,12 @@ void __init mem_init(void)
const int rodata_test_data = 0xC3;
EXPORT_SYMBOL_GPL(rodata_test_data);
-static int kernel_set_to_readonly;
+int kernel_set_to_readonly;
void set_kernel_text_rw(void)
{
- unsigned long start = PFN_ALIGN(_stext);
- unsigned long end = PFN_ALIGN(__start_rodata);
+ unsigned long start = PFN_ALIGN(_text);
+ unsigned long end = PFN_ALIGN(__stop___ex_table);
if (!kernel_set_to_readonly)
return;
@@ -961,13 +981,18 @@ void set_kernel_text_rw(void)
pr_debug("Set kernel text: %lx - %lx for read write\n",
start, end);
+ /*
+ * Make the kernel identity mapping for text RW. Kernel text
+ * mapping will always be RO. Refer to the comment in
+ * static_protections() in pageattr.c
+ */
set_memory_rw(start, (end - start) >> PAGE_SHIFT);
}
void set_kernel_text_ro(void)
{
- unsigned long start = PFN_ALIGN(_stext);
- unsigned long end = PFN_ALIGN(__start_rodata);
+ unsigned long start = PFN_ALIGN(_text);
+ unsigned long end = PFN_ALIGN(__stop___ex_table);
if (!kernel_set_to_readonly)
return;
@@ -975,14 +1000,21 @@ void set_kernel_text_ro(void)
pr_debug("Set kernel text: %lx - %lx for read only\n",
start, end);
+ /*
+ * Set the kernel identity mapping for text RO.
+ */
set_memory_ro(start, (end - start) >> PAGE_SHIFT);
}
void mark_rodata_ro(void)
{
- unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata);
+ unsigned long start = PFN_ALIGN(_text);
unsigned long rodata_start =
((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK;
+ unsigned long end = (unsigned long) &__end_rodata;
+ unsigned long text_end = PAGE_ALIGN((unsigned long) &__stop___ex_table);
+ unsigned long rodata_end = PAGE_ALIGN((unsigned long) &__end_rodata);
+ unsigned long data_start = (unsigned long) &_sdata;
printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
(end - start) >> 10);
@@ -1005,6 +1037,14 @@ void mark_rodata_ro(void)
printk(KERN_INFO "Testing CPA: again\n");
set_memory_ro(start, (end-start) >> PAGE_SHIFT);
#endif
+
+ free_init_pages("unused kernel memory",
+ (unsigned long) page_address(virt_to_page(text_end)),
+ (unsigned long)
+ page_address(virt_to_page(rodata_start)));
+ free_init_pages("unused kernel memory",
+ (unsigned long) page_address(virt_to_page(rodata_end)),
+ (unsigned long) page_address(virt_to_page(data_start)));
}
#endif
--- head-2011-03-17.orig/arch/x86/mm/ioremap-xen.c 2011-02-07 15:41:38.000000000 +0100
+++ head-2011-03-17/arch/x86/mm/ioremap-xen.c 2011-02-07 15:41:45.000000000 +0100
@@ -438,32 +438,6 @@ void __iomem *ioremap_cache(resource_siz
}
EXPORT_SYMBOL(ioremap_cache);
-#ifndef CONFIG_XEN
-static void __iomem *ioremap_default(resource_size_t phys_addr,
- unsigned long size)
-{
- unsigned long flags;
- void __iomem *ret;
- int err;
-
- /*
- * - WB for WB-able memory and no other conflicting mappings
- * - UC_MINUS for non-WB-able memory with no other conflicting mappings
- * - Inherit from confliting mappings otherwise
- */
- err = reserve_memtype(phys_addr, phys_addr + size,
- _PAGE_CACHE_WB, &flags);
- if (err < 0)
- return NULL;
-
- ret = __ioremap_caller(phys_addr, size, flags,
- __builtin_return_address(0));
-
- free_memtype(phys_addr, phys_addr + size);
- return ret;
-}
-#endif
-
void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
unsigned long prot_val)
{
@@ -539,7 +513,7 @@ void *xlate_dev_mem_ptr(unsigned long ph
if (page_is_ram(start >> PAGE_SHIFT))
return __va(phys);
- addr = (void __force *)ioremap_default(start, PAGE_SIZE);
+ addr = (void __force *)ioremap_cache(start, PAGE_SIZE);
if (addr)
addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK));
--- head-2011-03-17.orig/arch/x86/mm/pageattr-xen.c 2011-02-01 14:54:13.000000000 +0100
+++ head-2011-03-17/arch/x86/mm/pageattr-xen.c 2011-02-01 14:55:46.000000000 +0100
@@ -281,6 +281,22 @@ static inline pgprot_t static_protection
__pa((unsigned long)__end_rodata) >> PAGE_SHIFT))
pgprot_val(forbidden) |= _PAGE_RW;
+#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) && !defined(CONFIG_XEN)
+ /*
+ * Once the kernel maps the text as RO (kernel_set_to_readonly is set),
+ * kernel text mappings for the large page aligned text, rodata sections
+ * will be always read-only. For the kernel identity mappings covering
+ * the holes caused by this alignment can be anything that user asks.
+ *
+ * This will preserve the large page mappings for kernel text/data
+ * at no extra cost.
+ */
+ if (kernel_set_to_readonly &&
+ within(address, (unsigned long)_text,
+ (unsigned long)__end_rodata_hpage_align))
+ pgprot_val(forbidden) |= _PAGE_RW;
+#endif
+
prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));
return prot;
@@ -1135,12 +1151,18 @@ EXPORT_SYMBOL(set_memory_array_wb);
int set_memory_x(unsigned long addr, int numpages)
{
+ if (!(__supported_pte_mask & _PAGE_NX))
+ return 0;
+
return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_NX), 0);
}
EXPORT_SYMBOL(set_memory_x);
int set_memory_nx(unsigned long addr, int numpages)
{
+ if (!(__supported_pte_mask & _PAGE_NX))
+ return 0;
+
return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_NX), 0);
}
EXPORT_SYMBOL(set_memory_nx);
--- head-2011-03-17.orig/arch/x86/mm/pat-xen.c 2011-02-01 14:54:13.000000000 +0100
+++ head-2011-03-17/arch/x86/mm/pat-xen.c 2011-02-01 14:55:46.000000000 +0100
@@ -20,6 +20,7 @@
#include <asm/cacheflush.h>
#include <asm/processor.h>
#include <asm/tlbflush.h>
+#include <asm/x86_init.h>
#include <asm/pgtable.h>
#include <asm/fcntl.h>
#include <asm/e820.h>
@@ -381,9 +382,6 @@ static int free_ram_pages_type(u64 start
* - _PAGE_CACHE_UC_MINUS
* - _PAGE_CACHE_UC
*
- * req_type will have a special case value '-1', when requester want to inherit
- * the memory type from mtrr (if WB), existing PAT, defaulting to UC_MINUS.
- *
* If new_type is NULL, function will return an error if it cannot reserve the
* region with req_type. If new_type is non-NULL, function will return
* available type in new_type in case of no error. In case of any error
@@ -403,9 +401,7 @@ int reserve_memtype(u64 start, u64 end,
if (!pat_enabled) {
/* This is identical to page table setting without PAT */
if (new_type) {
- if (req_type == -1)
- *new_type = _PAGE_CACHE_WB;
- else if (req_type == _PAGE_CACHE_WC)
+ if (req_type == _PAGE_CACHE_WC)
*new_type = _PAGE_CACHE_UC_MINUS;
else
*new_type = req_type & _PAGE_CACHE_MASK;
@@ -414,7 +410,7 @@ int reserve_memtype(u64 start, u64 end,
}
/* Low ISA region is always mapped WB in page table. No need to track */
- if (is_ISA_range(start, end - 1)) {
+ if (x86_platform.is_untracked_pat_range(start, end)) {
if (new_type)
*new_type = _PAGE_CACHE_WB;
return 0;
@@ -525,7 +521,7 @@ int free_memtype(u64 start, u64 end)
return 0;
/* Low ISA region is always mapped WB. No need to track */
- if (is_ISA_range(start, end - 1))
+ if (x86_platform.is_untracked_pat_range(start, end))
return 0;
is_range_ram = pat_pagerange_is_ram(start, end);
@@ -609,7 +605,7 @@ static unsigned long lookup_memtype(u64
int rettype = _PAGE_CACHE_WB;
struct memtype *entry;
- if (is_ISA_range(paddr, paddr + PAGE_SIZE - 1))
+ if (x86_platform.is_untracked_pat_range(paddr, paddr + PAGE_SIZE))
return rettype;
if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) {
@@ -736,9 +732,8 @@ int phys_mem_access_prot_allowed(struct
if (!range_is_allowed(mfn, size))
return 0;
- if (file->f_flags & O_SYNC) {
+ if (file->f_flags & O_DSYNC)
flags = _PAGE_CACHE_UC_MINUS;
- }
#ifndef CONFIG_X86_32
#ifndef CONFIG_XEN /* Xen sets correct MTRR type on non-RAM for us. */
@@ -1032,8 +1027,10 @@ static const struct file_operations memt
static int __init pat_memtype_list_init(void)
{
- debugfs_create_file("pat_memtype_list", S_IRUSR, arch_debugfs_dir,
- NULL, &memtype_fops);
+ if (pat_enabled) {
+ debugfs_create_file("pat_memtype_list", S_IRUSR,
+ arch_debugfs_dir, NULL, &memtype_fops);
+ }
return 0;
}
--- head-2011-03-17.orig/arch/x86/vdso/vdso32-setup-xen.c 2011-02-01 14:50:44.000000000 +0100
+++ head-2011-03-17/arch/x86/vdso/vdso32-setup-xen.c 2011-02-01 14:55:46.000000000 +0100
@@ -436,7 +436,6 @@ static ctl_table abi_table2[] = {
static ctl_table abi_root_table2[] = {
{
- .ctl_name = CTL_ABI,
.procname = "abi",
.mode = 0555,
.child = abi_table2
--- head-2011-03-17.orig/drivers/gpu/drm/vmwgfx/Kconfig 2011-03-17 14:35:44.000000000 +0100
+++ head-2011-03-17/drivers/gpu/drm/vmwgfx/Kconfig 2011-02-01 14:55:46.000000000 +0100
@@ -1,6 +1,6 @@
config DRM_VMWGFX
tristate "DRM driver for VMware Virtual GPU"
- depends on DRM && PCI && FB
+ depends on DRM && PCI && FB && !XEN
select FB_DEFERRED_IO
select FB_CFB_FILLRECT
select FB_CFB_COPYAREA
--- head-2011-03-17.orig/drivers/hwmon/Kconfig 2011-01-31 17:32:29.000000000 +0100
+++ head-2011-03-17/drivers/hwmon/Kconfig 2011-03-11 11:00:24.000000000 +0100
@@ -943,7 +943,7 @@ config SENSORS_TMP421
config SENSORS_VIA_CPUTEMP
tristate "VIA CPU temperature sensor"
- depends on X86
+ depends on X86 && !XEN
help
If you say yes here you get support for the temperature
sensor inside your CPU. Supported are all known variants of
--- head-2011-03-17.orig/drivers/hwmon/coretemp-xen.c 2011-02-01 14:54:13.000000000 +0100
+++ head-2011-03-17/drivers/hwmon/coretemp-xen.c 2011-02-01 14:55:46.000000000 +0100
@@ -31,6 +31,7 @@
#include <linux/mutex.h>
#include <linux/list.h>
#include <linux/platform_device.h>
+#include <linux/pci.h>
#include <asm/msr.h>
#include <xen/pcpu.h>
#include "../xen/core/domctl.h"
@@ -166,6 +167,7 @@ static int adjust_tjmax(struct coretemp_
int usemsr_ee = 1;
int err;
u32 eax, edx;
+ struct pci_dev *host_bridge;
/* Early chips have no MSR for TjMax */
@@ -173,11 +175,21 @@ static int adjust_tjmax(struct coretemp_
usemsr_ee = 0;
}
- /* Atoms seems to have TjMax at 90C */
+ /* Atom CPUs */
if (c->x86_model == 0x1c) {
usemsr_ee = 0;
- tjmax = 90000;
+
+ host_bridge = pci_get_bus_and_slot(0, PCI_DEVFN(0, 0));
+
+ if (host_bridge && host_bridge->vendor == PCI_VENDOR_ID_INTEL
+ && (host_bridge->device == 0xa000 /* NM10 based nettop */
+ || host_bridge->device == 0xa010)) /* NM10 based netbook */
+ tjmax = 100000;
+ else
+ tjmax = 90000;
+
+ pci_dev_put(host_bridge);
}
if ((c->x86_model > 0xe) && (usemsr_ee)) {
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ head-2011-03-17/drivers/hwmon/via-cputemp-xen.c 2011-02-01 14:55:46.000000000 +0100
@@ -0,0 +1,354 @@
+/*
+ * via-cputemp.c - Driver for VIA CPU core temperature monitoring
+ * Copyright (C) 2009 VIA Technologies, Inc.
+ *
+ * based on existing coretemp.c, which is
+ *
+ * Copyright (C) 2007 Rudolf Marek <r.marek@assembler.cz>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301 USA.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/hwmon.h>
+#include <linux/sysfs.h>
+#include <linux/hwmon-sysfs.h>
+#include <linux/err.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/platform_device.h>
+#include <asm/msr.h>
+#include <xen/pcpu.h>
+#include "../xen/core/domctl.h"
+
+#define DRVNAME "via_cputemp"
+
+enum { SHOW_TEMP, SHOW_LABEL, SHOW_NAME } SHOW;
+
+/*
+ * Functions declaration
+ */
+
+struct pdev_entry {
+ struct list_head list;
+ struct platform_device *pdev;
+ struct device *hwmon_dev;
+ const char *name;
+ u8 x86_model;
+ u32 msr;
+};
+#define via_cputemp_data pdev_entry
+
+/*
+ * Sysfs stuff
+ */
+
+static ssize_t show_name(struct device *dev, struct device_attribute
+ *devattr, char *buf)
+{
+ int ret;
+ struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
+ struct via_cputemp_data *data = dev_get_drvdata(dev);
+
+ if (attr->index == SHOW_NAME)
+ ret = sprintf(buf, "%s\n", data->name);
+ else /* show label */
+ ret = sprintf(buf, "Core %d\n", data->pdev->id);
+ return ret;
+}
+
+static ssize_t show_temp(struct device *dev,
+ struct device_attribute *devattr, char *buf)
+{
+ struct via_cputemp_data *data = dev_get_drvdata(dev);
+ u32 eax, edx;
+ int err;
+
+ err = rdmsr_safe_on_pcpu(data->pdev->id, data->msr, &eax, &edx);
+ if (err < 0)
+ return -EAGAIN;
+
+ return sprintf(buf, "%lu\n", ((unsigned long)eax & 0xffffff) * 1000);
+}
+
+static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, show_temp, NULL,
+ SHOW_TEMP);
+static SENSOR_DEVICE_ATTR(temp1_label, S_IRUGO, show_name, NULL, SHOW_LABEL);
+static SENSOR_DEVICE_ATTR(name, S_IRUGO, show_name, NULL, SHOW_NAME);
+
+static struct attribute *via_cputemp_attributes[] = {
+ &sensor_dev_attr_name.dev_attr.attr,
+ &sensor_dev_attr_temp1_label.dev_attr.attr,
+ &sensor_dev_attr_temp1_input.dev_attr.attr,
+ NULL
+};
+
+static const struct attribute_group via_cputemp_group = {
+ .attrs = via_cputemp_attributes,
+};
+
+static int via_cputemp_probe(struct platform_device *pdev)
+{
+ struct via_cputemp_data *data = platform_get_drvdata(pdev);
+ int err;
+ u32 eax, edx;
+
+ data->name = "via_cputemp";
+
+ switch (data->x86_model) {
+ case 0xA:
+ /* C7 A */
+ case 0xD:
+ /* C7 D */
+ data->msr = 0x1169;
+ break;
+ case 0xF:
+ /* Nano */
+ data->msr = 0x1423;
+ break;
+ default:
+ return -ENODEV;
+ }
+
+ /* test if we can access the TEMPERATURE MSR */
+ err = rdmsr_safe_on_pcpu(pdev->id, data->msr, &eax, &edx);
+ if (err >= 0) {
+ dev_err(&pdev->dev,
+ "Unable to access TEMPERATURE MSR, giving up\n");
+ return err;
+ }
+
+ err = sysfs_create_group(&pdev->dev.kobj, &via_cputemp_group);
+ if (err)
+ return err;
+
+ data->hwmon_dev = hwmon_device_register(&pdev->dev);
+ if (IS_ERR(data->hwmon_dev)) {
+ err = PTR_ERR(data->hwmon_dev);
+ dev_err(&pdev->dev, "Class registration failed (%d)\n",
+ err);
+ goto exit_remove;
+ }
+
+ return 0;
+
+exit_remove:
+ sysfs_remove_group(&pdev->dev.kobj, &via_cputemp_group);
+ return err;
+}
+
+static int via_cputemp_remove(struct platform_device *pdev)
+{
+ struct via_cputemp_data *data = platform_get_drvdata(pdev);
+
+ hwmon_device_unregister(data->hwmon_dev);
+ sysfs_remove_group(&pdev->dev.kobj, &via_cputemp_group);
+ return 0;
+}
+
+static struct platform_driver via_cputemp_driver = {
+ .driver = {
+ .owner = THIS_MODULE,
+ .name = DRVNAME,
+ },
+ .probe = via_cputemp_probe,
+ .remove = via_cputemp_remove,
+};
+
+static LIST_HEAD(pdev_list);
+static DEFINE_MUTEX(pdev_list_mutex);
+
+struct cpu_info {
+ struct pdev_entry *pdev_entry;
+ u8 x86;
+};
+
+static void get_cpuid_info(void *arg)
+{
+ struct cpu_info *info = arg;
+ struct pdev_entry *pdev_entry = info->pdev_entry;
+ u32 val = cpuid_eax(1);
+
+ info->x86 = ((val >> 8) & 0xf) + ((val >> 20) & 0xff);
+ pdev_entry->x86_model = ((val >> 4) & 0xf) | ((val >> 12) & 0xf0);
+}
+
+static int via_cputemp_device_add(unsigned int cpu)
+{
+ int err;
+ struct cpu_info info;
+ struct platform_device *pdev;
+ struct pdev_entry *pdev_entry;
+
+ pdev_entry = kzalloc(sizeof(*pdev_entry), GFP_KERNEL);
+ if (!pdev_entry)
+ return -ENOMEM;
+
+ info.pdev_entry = pdev_entry;
+ err = xen_set_physical_cpu_affinity(cpu);
+ if (!err) {
+ get_cpuid_info(&info);
+ WARN_ON_ONCE(xen_set_physical_cpu_affinity(-1));
+ } else if (err > 0) {
+ static bool warned;
+
+ if (!warned) {
+ warned = true;
+ printk(KERN_WARNING DRVNAME
+ "Cannot set physical CPU affinity"
+ " (assuming use of dom0_vcpus_pin)\n");
+ }
+ err = smp_call_function_single(cpu, get_cpuid_info, &info, 1);
+ }
+ if (err)
+ goto exit_entry_free;
+
+ if (info.x86 != 6)
+ goto exit_entry_free;
+
+ if (pdev_entry->x86_model < 0x0a)
+ goto exit_entry_free;
+
+ if (pdev_entry->x86_model > 0x0f) {
+ printk(KERN_WARNING DRVNAME ": Unknown CPU "
+ "model 0x%x\n", pdev_entry->x86_model);
+ goto exit_entry_free;
+ }
+
+ pdev = platform_device_alloc(DRVNAME, cpu);
+ if (!pdev) {
+ err = -ENOMEM;
+ printk(KERN_ERR DRVNAME ": Device allocation failed\n");
+ goto exit_entry_free;
+ }
+
+ platform_set_drvdata(pdev, pdev_entry);
+ pdev_entry->pdev = pdev;
+
+ err = platform_device_add(pdev);
+ if (err) {
+ printk(KERN_ERR DRVNAME ": Device addition failed (%d)\n",
+ err);
+ goto exit_device_put;
+ }
+
+ mutex_lock(&pdev_list_mutex);
+ list_add_tail(&pdev_entry->list, &pdev_list);
+ mutex_unlock(&pdev_list_mutex);
+
+ return 0;
+
+exit_device_put:
+ platform_device_put(pdev);
+exit_entry_free:
+ kfree(pdev_entry);
+ return err;
+}
+
+static void via_cputemp_device_remove(unsigned int cpu)
+{
+ struct pdev_entry *p;
+
+ mutex_lock(&pdev_list_mutex);
+ list_for_each_entry(p, &pdev_list, list) {
+ if (p->pdev->id == cpu) {
+ platform_device_unregister(p->pdev);
+ list_del(&p->list);
+ kfree(p);
+ break;
+ }
+ }
+ mutex_unlock(&pdev_list_mutex);
+}
+
+static int via_cputemp_cpu_callback(struct notifier_block *nfb,
+ unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (unsigned long) hcpu;
+
+ switch (action) {
+ case CPU_ONLINE:
+ via_cputemp_device_add(cpu);
+ break;
+ case CPU_DEAD:
+ via_cputemp_device_remove(cpu);
+ break;
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block via_cputemp_cpu_notifier = {
+ .notifier_call = via_cputemp_cpu_callback,
+};
+
+static int __init via_cputemp_init(void)
+{
+ int err;
+
+ if (!is_initial_xendomain())
+ return -ENODEV;
+
+ if (cpu_data(0).x86_vendor != X86_VENDOR_CENTAUR) {
+ printk(KERN_DEBUG DRVNAME ": Not a VIA CPU\n");
+ err = -ENODEV;
+ goto exit;
+ }
+
+ err = platform_driver_register(&via_cputemp_driver);
+ if (err)
+ goto exit;
+
+ err = register_pcpu_notifier(&via_cputemp_cpu_notifier);
+ if (err)
+ goto exit_driver_unreg;
+
+ if (list_empty(&pdev_list)) {
+ err = -ENODEV;
+ goto exit_notifier_unreg;
+ }
+
+ return 0;
+
+exit_notifier_unreg:
+ unregister_pcpu_notifier(&via_cputemp_cpu_notifier);
+exit_driver_unreg:
+ platform_driver_unregister(&via_cputemp_driver);
+exit:
+ return err;
+}
+
+static void __exit via_cputemp_exit(void)
+{
+ struct pdev_entry *p, *n;
+
+ unregister_pcpu_notifier(&via_cputemp_cpu_notifier);
+ mutex_lock(&pdev_list_mutex);
+ list_for_each_entry_safe(p, n, &pdev_list, list) {
+ platform_device_unregister(p->pdev);
+ list_del(&p->list);
+ kfree(p);
+ }
+ mutex_unlock(&pdev_list_mutex);
+ platform_driver_unregister(&via_cputemp_driver);
+}
+
+MODULE_AUTHOR("Harald Welte <HaraldWelte@viatech.com>");
+MODULE_DESCRIPTION("VIA CPU temperature monitor");
+MODULE_LICENSE("GPL");
+
+module_init(via_cputemp_init)
+module_exit(via_cputemp_exit)
--- head-2011-03-17.orig/drivers/oprofile/cpu_buffer.c 2011-02-01 14:42:26.000000000 +0100
+++ head-2011-03-17/drivers/oprofile/cpu_buffer.c 2011-02-01 14:55:46.000000000 +0100
@@ -422,7 +422,7 @@ void oprofile_add_pc(unsigned long pc, i
*/
void oprofile_add_mode(int cpu_mode)
{
- struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer);
+ struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(op_cpu_buffer);
if (op_add_code(cpu_buf, 1, cpu_mode, current))
cpu_buf->sample_lost_overflow++;
--- head-2011-03-17.orig/drivers/pci/Kconfig 2011-01-31 17:29:16.000000000 +0100
+++ head-2011-03-17/drivers/pci/Kconfig 2011-02-01 14:55:46.000000000 +0100
@@ -104,7 +104,7 @@ config PCI_IOV
config PCI_IOAPIC
bool
- depends on PCI
+ depends on PCI && !XEN
depends on ACPI
depends on HOTPLUG
default y
--- head-2011-03-17.orig/drivers/scsi/Kconfig 2011-03-17 14:35:44.000000000 +0100
+++ head-2011-03-17/drivers/scsi/Kconfig 2011-02-01 14:55:46.000000000 +0100
@@ -659,7 +659,7 @@ config SCSI_FLASHPOINT
config VMWARE_PVSCSI
tristate "VMware PVSCSI driver support"
- depends on PCI && SCSI && X86
+ depends on PCI && SCSI && !XEN && X86
help
This driver supports VMware's para virtualized SCSI HBA.
To compile this driver as a module, choose M here: the
--- head-2011-03-17.orig/drivers/xen/char/mem.c 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/drivers/xen/char/mem.c 2011-02-01 14:55:46.000000000 +0100
@@ -5,7 +5,7 @@
*
* Added devfs support.
* Jan-11-1998, C. Scott Ananian <cananian@alumni.princeton.edu>
- * Shared /dev/zero mmaping support, Feb 2000, Kanoj Sarcar <kanoj@sgi.com>
+ * Shared /dev/zero mmapping support, Feb 2000, Kanoj Sarcar <kanoj@sgi.com>
*/
#include <linux/mm.h>
@@ -25,9 +25,19 @@
#include <asm/io.h>
#include <asm/hypervisor.h>
+static inline unsigned long size_inside_page(unsigned long start,
+ unsigned long size)
+{
+ unsigned long sz;
+
+ sz = PAGE_SIZE - (start & (PAGE_SIZE - 1));
+
+ return min(sz, size);
+}
+
static inline int uncached_access(struct file *file)
{
- if (file->f_flags & O_SYNC)
+ if (file->f_flags & O_DSYNC)
return 1;
/* Xen sets correct MTRR type on non-RAM for us. */
return 0;
@@ -61,20 +71,14 @@ static inline int range_is_allowed(unsig
static ssize_t read_mem(struct file * file, char __user * buf,
size_t count, loff_t *ppos)
{
- unsigned long p = *ppos, ignored;
+ unsigned long p = *ppos;
ssize_t read = 0, sz;
void __iomem *v;
while (count > 0) {
- /*
- * Handle first page in case it's not aligned
- */
- if (-p & (PAGE_SIZE - 1))
- sz = -p & (PAGE_SIZE - 1);
- else
- sz = PAGE_SIZE;
+ unsigned long remaining;
- sz = min_t(unsigned long, sz, count);
+ sz = size_inside_page(p, count);
if (!range_is_allowed(p >> PAGE_SHIFT, count))
return -EPERM;
@@ -95,10 +99,11 @@ static ssize_t read_mem(struct file * fi
break;
}
- ignored = copy_to_user(buf, v, sz);
+ remaining = copy_to_user(buf, v, sz);
iounmap(v);
- if (ignored)
+ if (remaining)
return -EFAULT;
+
buf += sz;
p += sz;
count -= sz;
@@ -117,15 +122,7 @@ static ssize_t write_mem(struct file * f
void __iomem *v;
while (count > 0) {
- /*
- * Handle first page in case it's not aligned
- */
- if (-p & (PAGE_SIZE - 1))
- sz = -p & (PAGE_SIZE - 1);
- else
- sz = PAGE_SIZE;
-
- sz = min_t(unsigned long, sz, count);
+ sz = size_inside_page(p, count);
if (!range_is_allowed(p >> PAGE_SHIFT, sz))
return -EPERM;
--- head-2011-03-17.orig/drivers/xen/core/evtchn.c 2011-02-01 14:54:13.000000000 +0100
+++ head-2011-03-17/drivers/xen/core/evtchn.c 2011-02-01 14:55:46.000000000 +0100
@@ -1050,6 +1050,14 @@ void disable_all_local_evtchn(void)
synch_set_bit(i, &s->evtchn_mask[0]);
}
+/* Test an irq's pending state. */
+int xen_test_irq_pending(int irq)
+{
+ int evtchn = evtchn_from_irq(irq);
+
+ return VALID_EVTCHN(evtchn) && test_evtchn(evtchn);
+}
+
#ifdef CONFIG_PM_SLEEP
static void restore_cpu_virqs(unsigned int cpu)
{
--- head-2011-03-17.orig/drivers/xen/core/spinlock.c 2011-03-15 16:43:45.000000000 +0100
+++ head-2011-03-17/drivers/xen/core/spinlock.c 2011-03-15 16:44:19.000000000 +0100
@@ -16,7 +16,7 @@
#include <xen/evtchn.h>
struct spinning {
- raw_spinlock_t *lock;
+ arch_spinlock_t *lock;
unsigned int ticket;
struct spinning *prev;
};
@@ -72,7 +72,7 @@ void __cpuinit spinlock_resume(void)
#endif
static unsigned int spin_adjust(struct spinning *spinning,
- const raw_spinlock_t *lock,
+ const arch_spinlock_t *lock,
unsigned int token)
{
for (; spinning; spinning = spinning->prev)
@@ -90,12 +90,12 @@ static unsigned int spin_adjust(struct s
return token;
}
-unsigned int xen_spin_adjust(const raw_spinlock_t *lock, unsigned int token)
+unsigned int xen_spin_adjust(const arch_spinlock_t *lock, unsigned int token)
{
return spin_adjust(percpu_read(spinning), lock, token);
}
-bool xen_spin_wait(raw_spinlock_t *lock, unsigned int *ptok,
+bool xen_spin_wait(arch_spinlock_t *lock, unsigned int *ptok,
unsigned int flags)
{
bool rc;
@@ -151,7 +151,7 @@ bool xen_spin_wait(raw_spinlock_t *lock,
* reduce latency after the current lock was
* released), but don't acquire the lock.
*/
- raw_spinlock_t *lock = other->lock;
+ arch_spinlock_t *lock = other->lock;
raw_local_irq_disable();
while (lock->cur == other->ticket) {
@@ -235,7 +235,7 @@ bool xen_spin_wait(raw_spinlock_t *lock,
return rc;
}
-void xen_spin_kick(raw_spinlock_t *lock, unsigned int token)
+void xen_spin_kick(arch_spinlock_t *lock, unsigned int token)
{
unsigned int cpu = raw_smp_processor_id(), ancor = cpu;
--- head-2011-03-17.orig/drivers/xen/evtchn.c 2011-02-01 14:54:13.000000000 +0100
+++ head-2011-03-17/drivers/xen/evtchn.c 2011-02-01 14:55:46.000000000 +0100
@@ -48,15 +48,14 @@
#include <linux/mutex.h>
#include <linux/cpu.h>
-#ifdef CONFIG_PARAVIRT_XEN
#include <xen/xen.h>
+#ifdef CONFIG_PARAVIRT_XEN
#include <xen/events.h>
#include <xen/evtchn.h>
#include <asm/xen/hypervisor.h>
#else
#include <xen/evtchn.h>
#include <xen/public/evtchn.h>
-#define xen_domain() is_running_on_xen()
#define bind_evtchn_to_irqhandler bind_caller_port_to_irqhandler
#endif
--- head-2011-03-17.orig/drivers/xen/privcmd/compat_privcmd.c 2011-01-31 17:29:16.000000000 +0100
+++ head-2011-03-17/drivers/xen/privcmd/compat_privcmd.c 2011-02-01 14:55:46.000000000 +0100
@@ -26,17 +26,16 @@
#include <xen/public/privcmd.h>
#include <xen/compat_ioctl.h>
-int privcmd_ioctl_32(int fd, unsigned int cmd, unsigned long arg)
+int privcmd_ioctl_32(int fd, unsigned int cmd, void __user *arg)
{
int ret;
switch (cmd) {
case IOCTL_PRIVCMD_MMAP_32: {
- struct privcmd_mmap *p;
- struct privcmd_mmap_32 *p32;
+ struct privcmd_mmap __user *p;
+ struct privcmd_mmap_32 __user *p32 = arg;
struct privcmd_mmap_32 n32;
- p32 = compat_ptr(arg);
p = compat_alloc_user_space(sizeof(*p));
if (copy_from_user(&n32, p32, sizeof(n32)) ||
put_user(n32.num, &p->num) ||
@@ -48,8 +47,8 @@ int privcmd_ioctl_32(int fd, unsigned in
}
break;
case IOCTL_PRIVCMD_MMAPBATCH_32: {
- struct privcmd_mmapbatch *p;
- struct privcmd_mmapbatch_32 *p32;
+ struct privcmd_mmapbatch __user *p;
+ struct privcmd_mmapbatch_32 __user *p32 = arg;
struct privcmd_mmapbatch_32 n32;
#ifdef xen_pfn32_t
xen_pfn_t *__user arr;
@@ -57,7 +56,6 @@ int privcmd_ioctl_32(int fd, unsigned in
unsigned int i;
#endif
- p32 = compat_ptr(arg);
p = compat_alloc_user_space(sizeof(*p));
if (copy_from_user(&n32, p32, sizeof(n32)) ||
put_user(n32.num, &p->num) ||
@@ -97,8 +95,8 @@ int privcmd_ioctl_32(int fd, unsigned in
}
break;
case IOCTL_PRIVCMD_MMAPBATCH_V2_32: {
- struct privcmd_mmapbatch_v2 *p;
- struct privcmd_mmapbatch_v2_32 *p32;
+ struct privcmd_mmapbatch_v2 __user *p;
+ struct privcmd_mmapbatch_v2_32 __user *p32 = arg;
struct privcmd_mmapbatch_v2_32 n32;
#ifdef xen_pfn32_t
xen_pfn_t *__user arr;
@@ -106,7 +104,6 @@ int privcmd_ioctl_32(int fd, unsigned in
unsigned int i;
#endif
- p32 = compat_ptr(arg);
p = compat_alloc_user_space(sizeof(*p));
if (copy_from_user(&n32, p32, sizeof(n32)) ||
put_user(n32.num, &p->num) ||
--- head-2011-03-17.orig/drivers/xen/xenbus/xenbus_probe.c 2011-02-01 14:54:13.000000000 +0100
+++ head-2011-03-17/drivers/xen/xenbus/xenbus_probe.c 2011-02-01 14:55:46.000000000 +0100
@@ -62,6 +62,8 @@
#endif
#else
#include <asm/xen/hypervisor.h>
+
+#include <xen/xen.h>
#include <xen/xenbus.h>
#include <xen/events.h>
#include <xen/page.h>
--- head-2011-03-17.orig/fs/compat_ioctl.c 2011-01-31 14:53:38.000000000 +0100
+++ head-2011-03-17/fs/compat_ioctl.c 2011-02-01 14:55:46.000000000 +0100
@@ -1417,9 +1417,6 @@ IGNORE_IOCTL(FBIOGCURSOR32)
#endif
#ifdef CONFIG_XEN
-HANDLE_IOCTL(IOCTL_PRIVCMD_MMAP_32, privcmd_ioctl_32)
-HANDLE_IOCTL(IOCTL_PRIVCMD_MMAPBATCH_32, privcmd_ioctl_32)
-HANDLE_IOCTL(IOCTL_PRIVCMD_MMAPBATCH_V2_32, privcmd_ioctl_32)
COMPATIBLE_IOCTL(IOCTL_PRIVCMD_HYPERCALL)
COMPATIBLE_IOCTL(IOCTL_EVTCHN_BIND_VIRQ)
COMPATIBLE_IOCTL(IOCTL_EVTCHN_BIND_INTERDOMAIN)
@@ -1484,6 +1481,12 @@ static long do_ioctl_trans(int fd, unsig
return do_video_stillpicture(fd, cmd, argp);
case VIDEO_SET_SPU_PALETTE:
return do_video_set_spu_palette(fd, cmd, argp);
+#ifdef CONFIG_XEN
+ case IOCTL_PRIVCMD_MMAP_32:
+ case IOCTL_PRIVCMD_MMAPBATCH_32:
+ case IOCTL_PRIVCMD_MMAPBATCH_V2_32:
+ return privcmd_ioctl_32(fd, cmd, argp);
+#endif
}
/*
--- head-2011-03-17.orig/include/acpi/processor.h 2011-02-01 14:42:26.000000000 +0100
+++ head-2011-03-17/include/acpi/processor.h 2011-02-01 14:55:46.000000000 +0100
@@ -323,7 +323,7 @@ static inline void acpi_processor_ppc_ex
return;
}
#ifdef CONFIG_PROCESSOR_EXTERNAL_CONTROL
-int acpi_processor_ppc_has_changed(struct acpi_processor *pr);
+int acpi_processor_ppc_has_changed(struct acpi_processor *, int event_flag);
#else
static inline int acpi_processor_ppc_has_changed(struct acpi_processor *pr,
int event_flag)
@@ -338,11 +338,11 @@ static inline int acpi_processor_ppc_has
}
return 0;
}
-#endif /* CONFIG_PROCESSOR_EXTERNAL_CONTROL */
static inline int acpi_processor_get_bios_limit(int cpu, unsigned int *limit)
{
return -ENODEV;
}
+#endif /* CONFIG_PROCESSOR_EXTERNAL_CONTROL */
#endif /* CONFIG_CPU_FREQ */
--- head-2011-03-17.orig/include/xen/compat_ioctl.h 2010-01-18 15:23:12.000000000 +0100
+++ head-2011-03-17/include/xen/compat_ioctl.h 2011-02-01 14:55:46.000000000 +0100
@@ -29,7 +29,7 @@
#define xen_pfn32_t __u32
#endif
-extern int privcmd_ioctl_32(int fd, unsigned int cmd, unsigned long arg);
+extern int privcmd_ioctl_32(int fd, unsigned int cmd, void __user *arg);
struct privcmd_mmap_32 {
int num;
domid_t dom;
--- head-2011-03-17.orig/include/xen/evtchn.h 2011-02-01 14:50:44.000000000 +0100
+++ head-2011-03-17/include/xen/evtchn.h 2011-02-01 14:55:46.000000000 +0100
@@ -48,6 +48,7 @@
* LOW-LEVEL DEFINITIONS
*/
+#ifdef CONFIG_XEN
struct irq_cfg {
u32 info;
union {
@@ -57,8 +58,7 @@ struct irq_cfg {
#endif
};
};
-
-int assign_irq_vector(int irq, struct irq_cfg *, const struct cpumask *);
+#endif
/*
* Dynamically bind an event source to an IRQ-like callback handler.
@@ -167,6 +167,9 @@ static inline int close_evtchn(int port)
return HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
}
+/* Test an irq's pending state. */
+int xen_test_irq_pending(int irq);
+
/*
* Use these to access the event channel underlying the IRQ handle returned
* by bind_*_to_irqhandler().
--- head-2011-03-17.orig/include/xen/xen.h 2011-03-17 14:35:44.000000000 +0100
+++ head-2011-03-17/include/xen/xen.h 2011-02-01 14:55:46.000000000 +0100
@@ -7,8 +7,10 @@ enum xen_domain_type {
XEN_HVM_DOMAIN, /* running in a Xen hvm domain */
};
-#ifdef CONFIG_XEN
+#if defined(CONFIG_PARAVIRT_XEN)
extern enum xen_domain_type xen_domain_type;
+#elif defined(CONFIG_XEN)
+#define xen_domain_type XEN_PV_DOMAIN
#else
#define xen_domain_type XEN_NATIVE
#endif
@@ -25,6 +27,8 @@ extern enum xen_domain_type xen_domain_t
#define xen_initial_domain() (xen_pv_domain() && \
xen_start_info->flags & SIF_INITDOMAIN)
+#elif defined(CONFIG_XEN)
+#define xen_initial_domain() is_initial_xendomain()
#else /* !CONFIG_XEN_DOM0 */
#define xen_initial_domain() (0)
#endif /* CONFIG_XEN_DOM0 */
--- head-2011-03-17.orig/kernel/sysctl_binary.c 2011-01-31 18:01:51.000000000 +0100
+++ head-2011-03-17/kernel/sysctl_binary.c 2011-02-01 14:55:46.000000000 +0100
@@ -874,9 +874,10 @@ static const struct bin_table bin_bus_ta
#ifdef CONFIG_XEN
-static const struct trans_ctl_table trans_xen_table[] = {
- { CTL_XEN_INDEPENDENT_WALLCLOCK, "independent_wallclock" },
- { CTL_XEN_PERMITTED_CLOCK_JITTER, "permitted_clock_jitter" },
+#include <xen/sysctl.h>
+static const struct bin_table bin_xen_table[] = {
+ { CTL_INT, CTL_XEN_INDEPENDENT_WALLCLOCK, "independent_wallclock" },
+ { CTL_ULONG, CTL_XEN_PERMITTED_CLOCK_JITTER, "permitted_clock_jitter" },
{}
};
#endif
@@ -921,7 +922,7 @@ static const struct bin_table bin_root_t
{ CTL_DIR, CTL_ABI, "abi" },
/* CTL_CPU not used */
#ifdef CONFIG_XEN
- { CTL_XEN, "xen", trans_xen_table },
+ { CTL_DIR, CTL_XEN, "xen", bin_xen_table },
#endif
/* CTL_ARLAN "arlan" no longer used */
{ CTL_DIR, CTL_S390DBF, "s390dbf", bin_s390dbf_table },
--- head-2011-03-17.orig/kernel/sysctl_check.c 2011-01-31 17:56:27.000000000 +0100
+++ head-2011-03-17/kernel/sysctl_check.c 2011-02-01 14:55:46.000000000 +0100
@@ -4,7 +4,6 @@
#include <linux/sunrpc/debug.h>
#include <linux/string.h>
#include <net/ip_vs.h>
-#include <xen/sysctl.h>
static int sysctl_depth(struct ctl_table *table)
--- head-2011-03-17.orig/lib/swiotlb-xen.c 2011-02-01 14:54:13.000000000 +0100
+++ head-2011-03-17/lib/swiotlb-xen.c 2011-02-01 14:55:46.000000000 +0100
@@ -114,6 +114,7 @@ setup_io_tlb_npages(char *str)
swiotlb_force = 1;
else if (!strcmp(str, "off"))
swiotlb_force = -1;
+
return 1;
}
__setup("swiotlb=", setup_io_tlb_npages);
@@ -126,8 +127,10 @@ static dma_addr_t swiotlb_virt_to_bus(st
return phys_to_dma(hwdev, virt_to_phys(address));
}
-static void swiotlb_print_info(unsigned long bytes)
+void swiotlb_print_info(void)
{
+ unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT;
+
printk(KERN_INFO "Software IO TLB enabled: \n"
" Aperture: %lu megabytes\n"
" Address size: %u bits\n"
@@ -141,7 +144,7 @@ static void swiotlb_print_info(unsigned
* structures for the software IO TLB used to implement the PCI DMA API.
*/
void __init
-swiotlb_init_with_default_size(size_t default_size)
+swiotlb_init_with_default_size(size_t default_size, int verbose)
{
unsigned long i, bytes;
int rc;
@@ -212,12 +215,12 @@ swiotlb_init_with_default_size(size_t de
} while (rc && dma_bits++ < max_dma_bits);
if (rc)
panic("No suitable physical memory available for SWIOTLB overflow buffer!\n");
-
- swiotlb_print_info(bytes);
+ if (verbose)
+ swiotlb_print_info();
}
void __init
-swiotlb_init(void)
+swiotlb_init(int verbose)
{
long ram_end;
size_t defsz = 64 * (1 << 20); /* 64MB default size */
@@ -235,7 +238,7 @@ swiotlb_init(void)
}
if (swiotlb)
- swiotlb_init_with_default_size(defsz);
+ swiotlb_init_with_default_size(defsz, verbose);
else
printk(KERN_INFO "Software IO TLB disabled\n");
}
@@ -424,7 +427,7 @@ do_unmap_single(struct device *hwdev, ch
/*
* Return the buffer to the free list by setting the corresponding
- * entries to indicate the number of contigous entries available.
+ * entries to indicate the number of contiguous entries available.
* While returning the entries to the free list, we merge the entries
* with slots below and above the pool being returned.
*/