5034 lines
144 KiB
Plaintext
5034 lines
144 KiB
Plaintext
From: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>
|
|
Subject: Linux: 2.6.33
|
|
Patch-mainline: 2.6.33
|
|
|
|
This patch contains the differences between 2.6.33 and 2.6.33.
|
|
|
|
Acked-by: Jeff Mahoney <jeffm@suse.com>
|
|
Automatically created from "patches.kernel.org/patch-2.6.33" by xen-port-patches.py
|
|
|
|
--- head-2011-03-17.orig/arch/ia64/include/asm/xen/hypervisor.h 2011-02-01 14:44:12.000000000 +0100
|
|
+++ head-2011-03-17/arch/ia64/include/asm/xen/hypervisor.h 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -34,11 +34,11 @@
|
|
#define _ASM_IA64_XEN_HYPERVISOR_H
|
|
|
|
#include <linux/err.h>
|
|
+#include <xen/xen.h>
|
|
#ifdef CONFIG_PARAVIRT_XEN
|
|
#include <xen/interface/xen.h>
|
|
#include <xen/interface/version.h> /* to compile feature.c */
|
|
#include <xen/features.h> /* to comiple xen-netfront.c */
|
|
-#include <xen/xen.h>
|
|
#include <asm/xen/hypercall.h>
|
|
|
|
extern struct shared_info *HYPERVISOR_shared_info;
|
|
--- head-2011-03-17.orig/arch/x86/Kconfig 2011-02-01 14:50:44.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/Kconfig 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -21,7 +21,7 @@ config X86
|
|
select HAVE_UNSTABLE_SCHED_CLOCK
|
|
select HAVE_IDE
|
|
select HAVE_OPROFILE
|
|
- select HAVE_PERF_EVENTS if !XEN
|
|
+ select HAVE_PERF_EVENTS
|
|
select HAVE_IRQ_WORK
|
|
select HAVE_IOREMAP_PROT
|
|
select HAVE_KPROBES
|
|
@@ -52,7 +52,7 @@ config X86
|
|
select HAVE_KERNEL_BZIP2 if !XEN
|
|
select HAVE_KERNEL_LZMA if !XEN
|
|
select HAVE_KERNEL_XZ
|
|
- select HAVE_KERNEL_LZO
|
|
+ select HAVE_KERNEL_LZO if !XEN
|
|
select HAVE_HW_BREAKPOINT
|
|
select HAVE_MIXED_BREAKPOINTS_REGS
|
|
select PERF_EVENTS
|
|
--- head-2011-03-17.orig/arch/x86/ia32/ia32entry-xen.S 2011-02-01 14:54:13.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/ia32/ia32entry-xen.S 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -534,7 +534,7 @@ ia32_sys_call_table:
|
|
.quad compat_sys_writev
|
|
.quad sys_getsid
|
|
.quad sys_fdatasync
|
|
- .quad sys32_sysctl /* sysctl */
|
|
+ .quad compat_sys_sysctl /* sysctl */
|
|
.quad sys_mlock /* 150 */
|
|
.quad sys_munlock
|
|
.quad sys_mlockall
|
|
@@ -577,7 +577,7 @@ ia32_sys_call_table:
|
|
.quad quiet_ni_syscall /* streams2 */
|
|
.quad stub32_vfork /* 190 */
|
|
.quad compat_sys_getrlimit
|
|
- .quad sys32_mmap2
|
|
+ .quad sys_mmap_pgoff
|
|
.quad sys32_truncate64
|
|
.quad sys32_ftruncate64
|
|
.quad sys32_stat64 /* 195 */
|
|
@@ -722,4 +722,5 @@ ia32_sys_call_table:
|
|
.quad compat_sys_pwritev
|
|
.quad compat_sys_rt_tgsigqueueinfo /* 335 */
|
|
.quad sys_perf_event_open
|
|
+ .quad compat_sys_recvmmsg
|
|
ia32_syscall_end:
|
|
--- head-2011-03-17.orig/arch/x86/include/asm/hw_irq.h 2011-02-01 14:50:44.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/asm/hw_irq.h 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -85,6 +85,7 @@ struct irq_2_iommu {
|
|
u8 irte_mask;
|
|
};
|
|
|
|
+#ifndef CONFIG_XEN
|
|
/*
|
|
* This is performance-critical, we want to do it O(1)
|
|
*
|
|
@@ -100,6 +101,9 @@ struct irq_cfg {
|
|
struct irq_2_iommu irq_2_iommu;
|
|
#endif
|
|
};
|
|
+#else
|
|
+struct irq_cfg;
|
|
+#endif
|
|
|
|
extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *);
|
|
extern void send_cleanup_vector(struct irq_cfg *);
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/perf_event.h 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -0,0 +1,17 @@
|
|
+#ifndef _ASM_X86_PERF_EVENT_H
|
|
+#define _ASM_X86_PERF_EVENT_H
|
|
+
|
|
+#ifdef CONFIG_PERF_EVENTS
|
|
+
|
|
+/*
|
|
+ * Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups.
|
|
+ * This flag is otherwise unused and ABI specified to be 0, so nobody should
|
|
+ * care what we do with it.
|
|
+ */
|
|
+#define PERF_EFLAGS_EXACT (1UL << 3)
|
|
+
|
|
+#endif
|
|
+
|
|
+static inline void init_hw_perf_events(void) {}
|
|
+
|
|
+#endif /* _ASM_X86_PERF_EVENT_H */
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/pgtable.h 2011-02-01 14:54:13.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/pgtable.h 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -16,6 +16,8 @@
|
|
|
|
#ifndef __ASSEMBLY__
|
|
|
|
+#include <asm/x86_init.h>
|
|
+
|
|
/*
|
|
* ZERO_PAGE is a global shared page that is always zero: used
|
|
* for zero-mapped memory areas etc..
|
|
@@ -267,9 +269,9 @@ static inline int is_new_memtype_allowed
|
|
unsigned long new_flags)
|
|
{
|
|
/*
|
|
- * PAT type is always WB for ISA. So no need to check.
|
|
+ * PAT type is always WB for untracked ranges, so no need to check.
|
|
*/
|
|
- if (is_ISA_range(paddr, paddr + size - 1))
|
|
+ if (x86_platform.is_untracked_pat_range(paddr, paddr + size))
|
|
return 1;
|
|
|
|
/*
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/processor.h 2011-03-03 16:46:37.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/processor.h 2011-03-03 16:46:54.000000000 +0100
|
|
@@ -31,6 +31,7 @@ struct mm_struct;
|
|
#include <linux/init.h>
|
|
#include <xen/interface/physdev.h>
|
|
|
|
+#define HBP_NUM 4
|
|
/*
|
|
* Default implementation of macro that returns current
|
|
* instruction pointer ("program counter").
|
|
@@ -191,7 +192,7 @@ static inline void xen_cpuid(unsigned in
|
|
unsigned int *ecx, unsigned int *edx)
|
|
{
|
|
/* ecx is often an input as well as an output. */
|
|
- asm(XEN_CPUID
|
|
+ asm volatile(XEN_CPUID
|
|
: "=a" (*eax),
|
|
"=b" (*ebx),
|
|
"=c" (*ecx),
|
|
@@ -440,6 +441,8 @@ extern unsigned int xstate_size;
|
|
extern void free_thread_xstate(struct task_struct *);
|
|
extern struct kmem_cache *task_xstate_cachep;
|
|
|
|
+struct perf_event;
|
|
+
|
|
struct thread_struct {
|
|
/* Cached TLS descriptors: */
|
|
struct desc_struct tls_array[GDT_ENTRY_TLS_ENTRIES];
|
|
@@ -460,13 +463,12 @@ struct thread_struct {
|
|
unsigned long fs;
|
|
#endif
|
|
unsigned long gs;
|
|
- /* Hardware debugging registers: */
|
|
- unsigned long debugreg0;
|
|
- unsigned long debugreg1;
|
|
- unsigned long debugreg2;
|
|
- unsigned long debugreg3;
|
|
- unsigned long debugreg6;
|
|
- unsigned long debugreg7;
|
|
+ /* Save middle states of ptrace breakpoints */
|
|
+ struct perf_event *ptrace_bps[HBP_NUM];
|
|
+ /* Debug status used for traps, single steps, etc... */
|
|
+ unsigned long debugreg6;
|
|
+ /* Keep track of the exact dr7 value set by the user */
|
|
+ unsigned long ptrace_dr7;
|
|
/* Fault info: */
|
|
unsigned long cr2;
|
|
unsigned long trap_no;
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/spinlock.h 2011-02-01 14:50:44.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/spinlock.h 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -44,10 +44,10 @@
|
|
|
|
int xen_spinlock_init(unsigned int cpu);
|
|
void xen_spinlock_cleanup(unsigned int cpu);
|
|
-bool xen_spin_wait(raw_spinlock_t *, unsigned int *token,
|
|
+bool xen_spin_wait(arch_spinlock_t *, unsigned int *token,
|
|
unsigned int flags);
|
|
-unsigned int xen_spin_adjust(const raw_spinlock_t *, unsigned int token);
|
|
-void xen_spin_kick(raw_spinlock_t *, unsigned int token);
|
|
+unsigned int xen_spin_adjust(const arch_spinlock_t *, unsigned int token);
|
|
+void xen_spin_kick(arch_spinlock_t *, unsigned int token);
|
|
|
|
/*
|
|
* Ticket locks are conceptually two parts, one indicating the current head of
|
|
@@ -97,7 +97,7 @@ void xen_spin_kick(raw_spinlock_t *, uns
|
|
: \
|
|
: "memory", "cc")
|
|
|
|
-static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock)
|
|
+static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
|
|
{
|
|
int tmp, new;
|
|
|
|
@@ -160,7 +160,7 @@ static __always_inline int __ticket_spin
|
|
: "memory", "cc"); \
|
|
} while (0)
|
|
|
|
-static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock)
|
|
+static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
|
|
{
|
|
int tmp;
|
|
int new;
|
|
@@ -183,21 +183,21 @@ static __always_inline int __ticket_spin
|
|
}
|
|
#endif
|
|
|
|
-static inline int __ticket_spin_is_locked(raw_spinlock_t *lock)
|
|
+static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
|
|
{
|
|
int tmp = ACCESS_ONCE(lock->slock);
|
|
|
|
return !!(((tmp >> TICKET_SHIFT) ^ tmp) & ((1 << TICKET_SHIFT) - 1));
|
|
}
|
|
|
|
-static inline int __ticket_spin_is_contended(raw_spinlock_t *lock)
|
|
+static inline int __ticket_spin_is_contended(arch_spinlock_t *lock)
|
|
{
|
|
int tmp = ACCESS_ONCE(lock->slock);
|
|
|
|
return (((tmp >> TICKET_SHIFT) - tmp) & ((1 << TICKET_SHIFT) - 1)) > 1;
|
|
}
|
|
|
|
-static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock)
|
|
+static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock)
|
|
{
|
|
unsigned int token, count;
|
|
unsigned int flags = __raw_local_irq_save();
|
|
@@ -216,7 +216,7 @@ static __always_inline void __ticket_spi
|
|
} while (unlikely(!count) && !xen_spin_wait(lock, &token, flags));
|
|
}
|
|
|
|
-static __always_inline void __ticket_spin_lock_flags(raw_spinlock_t *lock,
|
|
+static __always_inline void __ticket_spin_lock_flags(arch_spinlock_t *lock,
|
|
unsigned long flags)
|
|
{
|
|
unsigned int token, count;
|
|
@@ -232,7 +232,7 @@ static __always_inline void __ticket_spi
|
|
} while (unlikely(!count) && !xen_spin_wait(lock, &token, flags));
|
|
}
|
|
|
|
-static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock)
|
|
+static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
|
|
{
|
|
unsigned int token;
|
|
bool kick;
|
|
@@ -248,24 +248,24 @@ static __always_inline void __ticket_spi
|
|
#undef __ticket_spin_unlock_body
|
|
#endif
|
|
|
|
-#define __raw_spin(n) __ticket_spin_##n
|
|
+#define __arch_spin(n) __ticket_spin_##n
|
|
|
|
#else /* TICKET_SHIFT */
|
|
|
|
static inline int xen_spinlock_init(unsigned int cpu) { return 0; }
|
|
static inline void xen_spinlock_cleanup(unsigned int cpu) {}
|
|
|
|
-static inline int __byte_spin_is_locked(raw_spinlock_t *lock)
|
|
+static inline int __byte_spin_is_locked(arch_spinlock_t *lock)
|
|
{
|
|
return lock->lock != 0;
|
|
}
|
|
|
|
-static inline int __byte_spin_is_contended(raw_spinlock_t *lock)
|
|
+static inline int __byte_spin_is_contended(arch_spinlock_t *lock)
|
|
{
|
|
return lock->spinners != 0;
|
|
}
|
|
|
|
-static inline void __byte_spin_lock(raw_spinlock_t *lock)
|
|
+static inline void __byte_spin_lock(arch_spinlock_t *lock)
|
|
{
|
|
s8 val = 1;
|
|
|
|
@@ -284,7 +284,7 @@ static inline void __byte_spin_lock(raw_
|
|
|
|
#define __byte_spin_lock_flags(lock, flags) __byte_spin_lock(lock)
|
|
|
|
-static inline int __byte_spin_trylock(raw_spinlock_t *lock)
|
|
+static inline int __byte_spin_trylock(arch_spinlock_t *lock)
|
|
{
|
|
u8 old = 1;
|
|
|
|
@@ -294,53 +294,53 @@ static inline int __byte_spin_trylock(ra
|
|
return old == 0;
|
|
}
|
|
|
|
-static inline void __byte_spin_unlock(raw_spinlock_t *lock)
|
|
+static inline void __byte_spin_unlock(arch_spinlock_t *lock)
|
|
{
|
|
smp_wmb();
|
|
lock->lock = 0;
|
|
}
|
|
|
|
-#define __raw_spin(n) __byte_spin_##n
|
|
+#define __arch_spin(n) __byte_spin_##n
|
|
|
|
#endif /* TICKET_SHIFT */
|
|
|
|
-static inline int __raw_spin_is_locked(raw_spinlock_t *lock)
|
|
+static inline int arch_spin_is_locked(arch_spinlock_t *lock)
|
|
{
|
|
- return __raw_spin(is_locked)(lock);
|
|
+ return __arch_spin(is_locked)(lock);
|
|
}
|
|
|
|
-static inline int __raw_spin_is_contended(raw_spinlock_t *lock)
|
|
+static inline int arch_spin_is_contended(arch_spinlock_t *lock)
|
|
{
|
|
- return __raw_spin(is_contended)(lock);
|
|
+ return __arch_spin(is_contended)(lock);
|
|
}
|
|
-#define __raw_spin_is_contended __raw_spin_is_contended
|
|
+#define arch_spin_is_contended arch_spin_is_contended
|
|
|
|
-static __always_inline void __raw_spin_lock(raw_spinlock_t *lock)
|
|
+static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
|
|
{
|
|
- __raw_spin(lock)(lock);
|
|
+ __arch_spin(lock)(lock);
|
|
}
|
|
|
|
-static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock)
|
|
+static __always_inline int arch_spin_trylock(arch_spinlock_t *lock)
|
|
{
|
|
- return __raw_spin(trylock)(lock);
|
|
+ return __arch_spin(trylock)(lock);
|
|
}
|
|
|
|
-static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock)
|
|
+static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
|
|
{
|
|
- __raw_spin(unlock)(lock);
|
|
+ __arch_spin(unlock)(lock);
|
|
}
|
|
|
|
-static __always_inline void __raw_spin_lock_flags(raw_spinlock_t *lock,
|
|
+static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock,
|
|
unsigned long flags)
|
|
{
|
|
- __raw_spin(lock_flags)(lock, flags);
|
|
+ __arch_spin(lock_flags)(lock, flags);
|
|
}
|
|
|
|
-#undef __raw_spin
|
|
+#undef __arch_spin
|
|
|
|
-static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock)
|
|
+static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
|
|
{
|
|
- while (__raw_spin_is_locked(lock))
|
|
+ while (arch_spin_is_locked(lock))
|
|
cpu_relax();
|
|
}
|
|
|
|
@@ -362,7 +362,7 @@ static inline void __raw_spin_unlock_wai
|
|
* read_can_lock - would read_trylock() succeed?
|
|
* @lock: the rwlock in question.
|
|
*/
|
|
-static inline int __raw_read_can_lock(raw_rwlock_t *lock)
|
|
+static inline int arch_read_can_lock(arch_rwlock_t *lock)
|
|
{
|
|
return (int)(lock)->lock > 0;
|
|
}
|
|
@@ -371,12 +371,12 @@ static inline int __raw_read_can_lock(ra
|
|
* write_can_lock - would write_trylock() succeed?
|
|
* @lock: the rwlock in question.
|
|
*/
|
|
-static inline int __raw_write_can_lock(raw_rwlock_t *lock)
|
|
+static inline int arch_write_can_lock(arch_rwlock_t *lock)
|
|
{
|
|
return (lock)->lock == RW_LOCK_BIAS;
|
|
}
|
|
|
|
-static inline void __raw_read_lock(raw_rwlock_t *rw)
|
|
+static inline void arch_read_lock(arch_rwlock_t *rw)
|
|
{
|
|
asm volatile(LOCK_PREFIX " subl $1,(%0)\n\t"
|
|
"jns 1f\n"
|
|
@@ -385,7 +385,7 @@ static inline void __raw_read_lock(raw_r
|
|
::LOCK_PTR_REG (rw) : "memory");
|
|
}
|
|
|
|
-static inline void __raw_write_lock(raw_rwlock_t *rw)
|
|
+static inline void arch_write_lock(arch_rwlock_t *rw)
|
|
{
|
|
asm volatile(LOCK_PREFIX " subl %1,(%0)\n\t"
|
|
"jz 1f\n"
|
|
@@ -394,7 +394,7 @@ static inline void __raw_write_lock(raw_
|
|
::LOCK_PTR_REG (rw), "i" (RW_LOCK_BIAS) : "memory");
|
|
}
|
|
|
|
-static inline int __raw_read_trylock(raw_rwlock_t *lock)
|
|
+static inline int arch_read_trylock(arch_rwlock_t *lock)
|
|
{
|
|
atomic_t *count = (atomic_t *)lock;
|
|
|
|
@@ -404,7 +404,7 @@ static inline int __raw_read_trylock(raw
|
|
return 0;
|
|
}
|
|
|
|
-static inline int __raw_write_trylock(raw_rwlock_t *lock)
|
|
+static inline int arch_write_trylock(arch_rwlock_t *lock)
|
|
{
|
|
atomic_t *count = (atomic_t *)lock;
|
|
|
|
@@ -414,23 +414,23 @@ static inline int __raw_write_trylock(ra
|
|
return 0;
|
|
}
|
|
|
|
-static inline void __raw_read_unlock(raw_rwlock_t *rw)
|
|
+static inline void arch_read_unlock(arch_rwlock_t *rw)
|
|
{
|
|
asm volatile(LOCK_PREFIX "incl %0" :"+m" (rw->lock) : : "memory");
|
|
}
|
|
|
|
-static inline void __raw_write_unlock(raw_rwlock_t *rw)
|
|
+static inline void arch_write_unlock(arch_rwlock_t *rw)
|
|
{
|
|
asm volatile(LOCK_PREFIX "addl %1, %0"
|
|
: "+m" (rw->lock) : "i" (RW_LOCK_BIAS) : "memory");
|
|
}
|
|
|
|
-#define __raw_read_lock_flags(lock, flags) __raw_read_lock(lock)
|
|
-#define __raw_write_lock_flags(lock, flags) __raw_write_lock(lock)
|
|
+#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
|
|
+#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
|
|
|
|
-#define _raw_spin_relax(lock) cpu_relax()
|
|
-#define _raw_read_relax(lock) cpu_relax()
|
|
-#define _raw_write_relax(lock) cpu_relax()
|
|
+#define arch_spin_relax(lock) cpu_relax()
|
|
+#define arch_read_relax(lock) cpu_relax()
|
|
+#define arch_write_relax(lock) cpu_relax()
|
|
|
|
/* The {read|write|spin}_lock() on x86 are full memory barriers. */
|
|
static inline void smp_mb__after_lock(void) { }
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/spinlock_types.h 2011-02-01 14:44:12.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/spinlock_types.h 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -42,14 +42,14 @@ typedef union {
|
|
#endif
|
|
#endif
|
|
};
|
|
-} raw_spinlock_t;
|
|
+} arch_spinlock_t;
|
|
|
|
-#define __RAW_SPIN_LOCK_UNLOCKED { 0 }
|
|
+#define __ARCH_SPIN_LOCK_UNLOCKED { 0 }
|
|
|
|
typedef struct {
|
|
unsigned int lock;
|
|
-} raw_rwlock_t;
|
|
+} arch_rwlock_t;
|
|
|
|
-#define __RAW_RW_LOCK_UNLOCKED { RW_LOCK_BIAS }
|
|
+#define __ARCH_RW_LOCK_UNLOCKED { RW_LOCK_BIAS }
|
|
|
|
#endif /* _ASM_X86_SPINLOCK_TYPES_H */
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/swiotlb.h 2011-01-31 18:07:35.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/swiotlb.h 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -1,4 +1,6 @@
|
|
#include_next <asm/swiotlb.h>
|
|
|
|
+#define pci_swiotlb_detect() 1
|
|
+
|
|
dma_addr_t swiotlb_map_single_phys(struct device *, phys_addr_t, size_t size,
|
|
int dir);
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/system.h 2011-03-03 16:07:45.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/system.h 2011-03-03 16:09:31.000000000 +0100
|
|
@@ -12,9 +12,9 @@
|
|
#include <linux/irqflags.h>
|
|
|
|
/* entries in ARCH_DLINFO: */
|
|
-#ifdef CONFIG_IA32_EMULATION
|
|
+#if defined(CONFIG_IA32_EMULATION) || !defined(CONFIG_X86_64)
|
|
# define AT_VECTOR_SIZE_ARCH 2
|
|
-#else
|
|
+#else /* else it's non-compat x86-64 */
|
|
# define AT_VECTOR_SIZE_ARCH 1
|
|
#endif
|
|
|
|
@@ -22,6 +22,7 @@ struct task_struct; /* one of the strang
|
|
struct task_struct *__switch_to(struct task_struct *prev,
|
|
struct task_struct *next);
|
|
void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p);
|
|
+extern void show_regs_common(void);
|
|
|
|
#ifdef CONFIG_X86_32
|
|
|
|
@@ -129,8 +130,6 @@ do { \
|
|
"movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \
|
|
"movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \
|
|
"call __switch_to\n\t" \
|
|
- ".globl thread_return\n" \
|
|
- "thread_return:\n\t" \
|
|
"movq "__percpu_arg([current_task])",%%rsi\n\t" \
|
|
__switch_canary \
|
|
"movq %P[thread_info](%%rsi),%%r8\n\t" \
|
|
@@ -158,19 +157,22 @@ extern void xen_load_gs_index(unsigned);
|
|
* Load a segment. Fall back on loading the zero
|
|
* segment if something goes wrong..
|
|
*/
|
|
-#define loadsegment(seg, value) \
|
|
- asm volatile("\n" \
|
|
- "1:\t" \
|
|
- "movl %k0,%%" #seg "\n" \
|
|
- "2:\n" \
|
|
- ".section .fixup,\"ax\"\n" \
|
|
- "3:\t" \
|
|
- "movl %k1, %%" #seg "\n\t" \
|
|
- "jmp 2b\n" \
|
|
- ".previous\n" \
|
|
- _ASM_EXTABLE(1b,3b) \
|
|
- : :"r" (value), "r" (0) : "memory")
|
|
-
|
|
+#define loadsegment(seg, value) \
|
|
+do { \
|
|
+ unsigned short __val = (value); \
|
|
+ \
|
|
+ asm volatile(" \n" \
|
|
+ "1: movl %k0,%%" #seg " \n" \
|
|
+ \
|
|
+ ".section .fixup,\"ax\" \n" \
|
|
+ "2: xorl %k0,%k0 \n" \
|
|
+ " jmp 1b \n" \
|
|
+ ".previous \n" \
|
|
+ \
|
|
+ _ASM_EXTABLE(1b, 2b) \
|
|
+ \
|
|
+ : "+r" (__val) : : "memory"); \
|
|
+} while (0)
|
|
|
|
/*
|
|
* Save a segment register away
|
|
--- head-2011-03-17.orig/arch/x86/kernel/acpi/sleep-xen.c 2011-02-01 14:50:44.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/acpi/sleep-xen.c 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -81,12 +81,9 @@ int acpi_save_state_mem(void)
|
|
#ifndef CONFIG_64BIT
|
|
store_gdt((struct desc_ptr *)&header->pmode_gdt);
|
|
|
|
- header->pmode_efer_low = nx_enabled;
|
|
- if (header->pmode_efer_low & 1) {
|
|
- /* This is strange, why not save efer, always? */
|
|
- rdmsr(MSR_EFER, header->pmode_efer_low,
|
|
- header->pmode_efer_high);
|
|
- }
|
|
+ if (rdmsr_safe(MSR_EFER, &header->pmode_efer_low,
|
|
+ &header->pmode_efer_high))
|
|
+ header->pmode_efer_low = header->pmode_efer_high = 0;
|
|
#endif /* !CONFIG_64BIT */
|
|
|
|
header->pmode_cr0 = read_cr0();
|
|
@@ -123,30 +120,33 @@ void acpi_restore_state_mem(void)
|
|
|
|
|
|
/**
|
|
- * acpi_reserve_bootmem - do _very_ early ACPI initialisation
|
|
+ * acpi_reserve_wakeup_memory - do _very_ early ACPI initialisation
|
|
*
|
|
* We allocate a page from the first 1MB of memory for the wakeup
|
|
* routine for when we come back from a sleep state. The
|
|
* runtime allocator allows specification of <16MB pages, but not
|
|
* <1MB pages.
|
|
*/
|
|
-void __init acpi_reserve_bootmem(void)
|
|
+void __init acpi_reserve_wakeup_memory(void)
|
|
{
|
|
#ifndef CONFIG_ACPI_PV_SLEEP
|
|
+ unsigned long mem;
|
|
+
|
|
if ((&wakeup_code_end - &wakeup_code_start) > WAKEUP_SIZE) {
|
|
printk(KERN_ERR
|
|
"ACPI: Wakeup code way too big, S3 disabled.\n");
|
|
return;
|
|
}
|
|
|
|
- acpi_realmode = (unsigned long)alloc_bootmem_low(WAKEUP_SIZE);
|
|
+ mem = find_e820_area(0, 1<<20, WAKEUP_SIZE, PAGE_SIZE);
|
|
|
|
- if (!acpi_realmode) {
|
|
+ if (mem == -1L) {
|
|
printk(KERN_ERR "ACPI: Cannot allocate lowmem, S3 disabled.\n");
|
|
return;
|
|
}
|
|
-
|
|
- acpi_wakeup_address = virt_to_phys((void *)acpi_realmode);
|
|
+ acpi_realmode = (unsigned long) phys_to_virt(mem);
|
|
+ acpi_wakeup_address = mem;
|
|
+ reserve_early(mem, mem + WAKEUP_SIZE, "ACPI WAKEUP");
|
|
#endif
|
|
}
|
|
|
|
@@ -169,6 +169,8 @@ static int __init acpi_sleep_setup(char
|
|
#endif
|
|
if (strncmp(str, "old_ordering", 12) == 0)
|
|
acpi_old_suspend_ordering();
|
|
+ if (strncmp(str, "sci_force_enable", 16) == 0)
|
|
+ acpi_set_sci_en_on_resume();
|
|
str = strchr(str, ',');
|
|
if (str != NULL)
|
|
str += strspn(str, ", \t");
|
|
--- head-2011-03-17.orig/arch/x86/kernel/apic/Makefile 2011-02-01 14:44:12.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/apic/Makefile 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -24,4 +24,4 @@ obj-$(CONFIG_XEN) += nmi.o
|
|
|
|
probe_64-$(CONFIG_XEN) := probe_32.o
|
|
|
|
-disabled-obj-$(CONFIG_XEN) := apic_flat_$(BITS).o
|
|
+disabled-obj-$(CONFIG_XEN) := apic_flat_$(BITS).o apic_noop.o
|
|
--- head-2011-03-17.orig/arch/x86/kernel/apic/io_apic-xen.c 2011-02-01 14:54:13.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/apic/io_apic-xen.c 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -150,20 +150,6 @@ static struct irq_pin_list *get_one_free
|
|
return pin;
|
|
}
|
|
|
|
-/*
|
|
- * This is performance-critical, we want to do it O(1)
|
|
- *
|
|
- * Most irqs are mapped 1:1 with pins.
|
|
- */
|
|
-struct irq_cfg {
|
|
- struct irq_pin_list *irq_2_pin;
|
|
- cpumask_var_t domain;
|
|
- cpumask_var_t old_domain;
|
|
- unsigned move_cleanup_count;
|
|
- u8 vector;
|
|
- u8 move_in_progress : 1;
|
|
-};
|
|
-
|
|
/* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */
|
|
#ifdef CONFIG_SPARSE_IRQ
|
|
static struct irq_cfg irq_cfgx[] = {
|
|
@@ -219,7 +205,7 @@ int __init arch_early_irq_init(void)
|
|
}
|
|
|
|
#ifdef CONFIG_SPARSE_IRQ
|
|
-static struct irq_cfg *irq_cfg(unsigned int irq)
|
|
+struct irq_cfg *irq_cfg(unsigned int irq)
|
|
{
|
|
struct irq_cfg *cfg = NULL;
|
|
struct irq_desc *desc;
|
|
@@ -371,7 +357,7 @@ void arch_free_chip_data(struct irq_desc
|
|
/* end for move_irq_desc */
|
|
|
|
#else
|
|
-static struct irq_cfg *irq_cfg(unsigned int irq)
|
|
+struct irq_cfg *irq_cfg(unsigned int irq)
|
|
{
|
|
return irq < nr_irqs ? irq_cfgx + irq : NULL;
|
|
}
|
|
@@ -594,23 +580,41 @@ static void __init replace_pin_at_irq_no
|
|
add_pin_to_irq_node(cfg, node, newapic, newpin);
|
|
}
|
|
|
|
+static void __io_apic_modify_irq(struct irq_pin_list *entry,
|
|
+ int mask_and, int mask_or,
|
|
+ void (*final)(struct irq_pin_list *entry))
|
|
+{
|
|
+ unsigned int reg, pin;
|
|
+
|
|
+ pin = entry->pin;
|
|
+ reg = io_apic_read(entry->apic, 0x10 + pin * 2);
|
|
+ reg &= mask_and;
|
|
+ reg |= mask_or;
|
|
+ io_apic_modify(entry->apic, 0x10 + pin * 2, reg);
|
|
+ if (final)
|
|
+ final(entry);
|
|
+}
|
|
+
|
|
static void io_apic_modify_irq(struct irq_cfg *cfg,
|
|
int mask_and, int mask_or,
|
|
void (*final)(struct irq_pin_list *entry))
|
|
{
|
|
- int pin;
|
|
struct irq_pin_list *entry;
|
|
|
|
- for_each_irq_pin(entry, cfg->irq_2_pin) {
|
|
- unsigned int reg;
|
|
- pin = entry->pin;
|
|
- reg = io_apic_read(entry->apic, 0x10 + pin * 2);
|
|
- reg &= mask_and;
|
|
- reg |= mask_or;
|
|
- io_apic_modify(entry->apic, 0x10 + pin * 2, reg);
|
|
- if (final)
|
|
- final(entry);
|
|
- }
|
|
+ for_each_irq_pin(entry, cfg->irq_2_pin)
|
|
+ __io_apic_modify_irq(entry, mask_and, mask_or, final);
|
|
+}
|
|
+
|
|
+static void __mask_and_edge_IO_APIC_irq(struct irq_pin_list *entry)
|
|
+{
|
|
+ __io_apic_modify_irq(entry, ~IO_APIC_REDIR_LEVEL_TRIGGER,
|
|
+ IO_APIC_REDIR_MASKED, NULL);
|
|
+}
|
|
+
|
|
+static void __unmask_and_level_IO_APIC_irq(struct irq_pin_list *entry)
|
|
+{
|
|
+ __io_apic_modify_irq(entry, ~IO_APIC_REDIR_MASKED,
|
|
+ IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
|
|
}
|
|
|
|
static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
|
|
@@ -634,18 +638,6 @@ static void __mask_IO_APIC_irq(struct ir
|
|
io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
|
|
}
|
|
|
|
-static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg)
|
|
-{
|
|
- io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER,
|
|
- IO_APIC_REDIR_MASKED, NULL);
|
|
-}
|
|
-
|
|
-static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg)
|
|
-{
|
|
- io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED,
|
|
- IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
|
|
-}
|
|
-
|
|
static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
|
|
{
|
|
struct irq_cfg *cfg = desc->chip_data;
|
|
@@ -1225,7 +1217,7 @@ __assign_irq_vector(int irq, struct irq_
|
|
int cpu, err;
|
|
cpumask_var_t tmp_mask;
|
|
|
|
- if ((cfg->move_in_progress) || cfg->move_cleanup_count)
|
|
+ if (cfg->move_in_progress)
|
|
return -EBUSY;
|
|
|
|
if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
|
|
@@ -1289,8 +1281,7 @@ next:
|
|
return err;
|
|
}
|
|
|
|
-static int
|
|
-assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
|
|
+int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
|
|
{
|
|
int err;
|
|
unsigned long flags;
|
|
@@ -1668,9 +1659,6 @@ __apicdebuginit(void) print_IO_APIC(void
|
|
struct irq_desc *desc;
|
|
unsigned int irq;
|
|
|
|
- if (apic_verbosity == APIC_QUIET)
|
|
- return;
|
|
-
|
|
printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
|
|
for (i = 0; i < nr_ioapics; i++)
|
|
printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
|
|
@@ -1777,9 +1765,6 @@ __apicdebuginit(void) print_APIC_field(i
|
|
{
|
|
int i;
|
|
|
|
- if (apic_verbosity == APIC_QUIET)
|
|
- return;
|
|
-
|
|
printk(KERN_DEBUG);
|
|
|
|
for (i = 0; i < 8; i++)
|
|
@@ -1793,9 +1778,6 @@ __apicdebuginit(void) print_local_APIC(v
|
|
unsigned int i, v, ver, maxlvt;
|
|
u64 icr;
|
|
|
|
- if (apic_verbosity == APIC_QUIET)
|
|
- return;
|
|
-
|
|
printk(KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
|
|
smp_processor_id(), hard_smp_processor_id());
|
|
v = apic_read(APIC_ID);
|
|
@@ -1893,13 +1875,19 @@ __apicdebuginit(void) print_local_APIC(v
|
|
printk("\n");
|
|
}
|
|
|
|
-__apicdebuginit(void) print_all_local_APICs(void)
|
|
+__apicdebuginit(void) print_local_APICs(int maxcpu)
|
|
{
|
|
int cpu;
|
|
|
|
+ if (!maxcpu)
|
|
+ return;
|
|
+
|
|
preempt_disable();
|
|
- for_each_online_cpu(cpu)
|
|
+ for_each_online_cpu(cpu) {
|
|
+ if (cpu >= maxcpu)
|
|
+ break;
|
|
smp_call_function_single(cpu, print_local_APIC, NULL, 1);
|
|
+ }
|
|
preempt_enable();
|
|
}
|
|
|
|
@@ -1908,7 +1896,7 @@ __apicdebuginit(void) print_PIC(void)
|
|
unsigned int v;
|
|
unsigned long flags;
|
|
|
|
- if (apic_verbosity == APIC_QUIET || !nr_legacy_irqs)
|
|
+ if (!nr_legacy_irqs)
|
|
return;
|
|
|
|
printk(KERN_DEBUG "\nprinting PIC contents\n");
|
|
@@ -1935,21 +1923,41 @@ __apicdebuginit(void) print_PIC(void)
|
|
printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
|
|
}
|
|
|
|
-__apicdebuginit(int) print_all_ICs(void)
|
|
+static int __initdata show_lapic = 1;
|
|
+static __init int setup_show_lapic(char *arg)
|
|
+{
|
|
+ int num = -1;
|
|
+
|
|
+ if (strcmp(arg, "all") == 0) {
|
|
+ show_lapic = CONFIG_NR_CPUS;
|
|
+ } else {
|
|
+ get_option(&arg, &num);
|
|
+ if (num >= 0)
|
|
+ show_lapic = num;
|
|
+ }
|
|
+
|
|
+ return 1;
|
|
+}
|
|
+__setup("show_lapic=", setup_show_lapic);
|
|
+
|
|
+__apicdebuginit(int) print_ICs(void)
|
|
{
|
|
+ if (apic_verbosity == APIC_QUIET)
|
|
+ return 0;
|
|
+
|
|
print_PIC();
|
|
|
|
/* don't print out if apic is not there */
|
|
if (!cpu_has_apic && !apic_from_smp_config())
|
|
return 0;
|
|
|
|
- print_all_local_APICs();
|
|
+ print_local_APICs(show_lapic);
|
|
print_IO_APIC();
|
|
|
|
return 0;
|
|
}
|
|
|
|
-fs_initcall(print_all_ICs);
|
|
+fs_initcall(print_ICs);
|
|
|
|
|
|
/* Where if anywhere is the i8259 connect in external int mode */
|
|
@@ -2106,7 +2114,7 @@ void __init setup_ioapic_ids_from_mpc(vo
|
|
* This is broken; anything with a real cpu count has to
|
|
* circumvent this idiocy regardless.
|
|
*/
|
|
- phys_id_present_map = apic->ioapic_phys_id_map(phys_cpu_present_map);
|
|
+ apic->ioapic_phys_id_map(&phys_cpu_present_map, &phys_id_present_map);
|
|
|
|
/*
|
|
* Set the IOAPIC ID to the value stored in the MPC table.
|
|
@@ -2133,7 +2141,7 @@ void __init setup_ioapic_ids_from_mpc(vo
|
|
* system must have a unique ID or we get lots of nice
|
|
* 'stuck on smp_invalidate_needed IPI wait' messages.
|
|
*/
|
|
- if (apic->check_apicid_used(phys_id_present_map,
|
|
+ if (apic->check_apicid_used(&phys_id_present_map,
|
|
mp_ioapics[apic_id].apicid)) {
|
|
printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
|
|
apic_id, mp_ioapics[apic_id].apicid);
|
|
@@ -2148,7 +2156,7 @@ void __init setup_ioapic_ids_from_mpc(vo
|
|
mp_ioapics[apic_id].apicid = i;
|
|
} else {
|
|
physid_mask_t tmp;
|
|
- tmp = apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid);
|
|
+ apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid, &tmp);
|
|
apic_printk(APIC_VERBOSE, "Setting %d in the "
|
|
"phys_id_present_map\n",
|
|
mp_ioapics[apic_id].apicid);
|
|
@@ -2303,20 +2311,16 @@ static int ioapic_retrigger_irq(unsigned
|
|
*/
|
|
|
|
#ifdef CONFIG_SMP
|
|
-static void send_cleanup_vector(struct irq_cfg *cfg)
|
|
+void send_cleanup_vector(struct irq_cfg *cfg)
|
|
{
|
|
cpumask_var_t cleanup_mask;
|
|
|
|
if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
|
|
unsigned int i;
|
|
- cfg->move_cleanup_count = 0;
|
|
- for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
|
|
- cfg->move_cleanup_count++;
|
|
for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
|
|
apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
|
|
} else {
|
|
cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
|
|
- cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
|
|
apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
|
|
free_cpumask_var(cleanup_mask);
|
|
}
|
|
@@ -2347,31 +2351,30 @@ static void __target_IO_APIC_irq(unsigne
|
|
}
|
|
}
|
|
|
|
-static int
|
|
-assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
|
|
-
|
|
/*
|
|
* Either sets desc->affinity to a valid value, and returns
|
|
- * ->cpu_mask_to_apicid of that, or returns BAD_APICID and
|
|
+ * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and
|
|
* leaves desc->affinity untouched.
|
|
*/
|
|
-static unsigned int
|
|
-set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
|
|
+unsigned int
|
|
+set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask,
|
|
+ unsigned int *dest_id)
|
|
{
|
|
struct irq_cfg *cfg;
|
|
unsigned int irq;
|
|
|
|
if (!cpumask_intersects(mask, cpu_online_mask))
|
|
- return BAD_APICID;
|
|
+ return -1;
|
|
|
|
irq = desc->irq;
|
|
cfg = desc->chip_data;
|
|
if (assign_irq_vector(irq, cfg, mask))
|
|
- return BAD_APICID;
|
|
+ return -1;
|
|
|
|
cpumask_copy(desc->affinity, mask);
|
|
|
|
- return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
|
|
+ *dest_id = apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain);
|
|
+ return 0;
|
|
}
|
|
|
|
static int
|
|
@@ -2387,12 +2390,11 @@ set_ioapic_affinity_irq_desc(struct irq_
|
|
cfg = desc->chip_data;
|
|
|
|
spin_lock_irqsave(&ioapic_lock, flags);
|
|
- dest = set_desc_affinity(desc, mask);
|
|
- if (dest != BAD_APICID) {
|
|
+ ret = set_desc_affinity(desc, mask, &dest);
|
|
+ if (!ret) {
|
|
/* Only the high 8 bits are valid. */
|
|
dest = SET_APIC_LOGICAL_ID(dest);
|
|
__target_IO_APIC_irq(irq, dest, cfg);
|
|
- ret = 0;
|
|
}
|
|
spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
|
|
@@ -2507,8 +2509,13 @@ asmlinkage void smp_irq_move_cleanup_int
|
|
continue;
|
|
|
|
cfg = irq_cfg(irq);
|
|
- spin_lock(&desc->lock);
|
|
- if (!cfg->move_cleanup_count)
|
|
+ raw_spin_lock(&desc->lock);
|
|
+
|
|
+ /*
|
|
+ * Check if the irq migration is in progress. If so, we
|
|
+ * haven't received the cleanup request yet for this irq.
|
|
+ */
|
|
+ if (cfg->move_in_progress)
|
|
goto unlock;
|
|
|
|
if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
|
|
@@ -2527,29 +2534,40 @@ asmlinkage void smp_irq_move_cleanup_int
|
|
goto unlock;
|
|
}
|
|
__get_cpu_var(vector_irq)[vector] = -1;
|
|
- cfg->move_cleanup_count--;
|
|
unlock:
|
|
- spin_unlock(&desc->lock);
|
|
+ raw_spin_unlock(&desc->lock);
|
|
}
|
|
|
|
irq_exit();
|
|
}
|
|
|
|
-static void irq_complete_move(struct irq_desc **descp)
|
|
+static void __irq_complete_move(struct irq_desc **descp, unsigned vector)
|
|
{
|
|
struct irq_desc *desc = *descp;
|
|
struct irq_cfg *cfg = desc->chip_data;
|
|
- unsigned vector, me;
|
|
+ unsigned me;
|
|
|
|
if (likely(!cfg->move_in_progress))
|
|
return;
|
|
|
|
- vector = ~get_irq_regs()->orig_ax;
|
|
me = smp_processor_id();
|
|
|
|
if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
|
|
send_cleanup_vector(cfg);
|
|
}
|
|
+
|
|
+static void irq_complete_move(struct irq_desc **descp)
|
|
+{
|
|
+ __irq_complete_move(descp, ~get_irq_regs()->orig_ax);
|
|
+}
|
|
+
|
|
+void irq_force_complete_move(int irq)
|
|
+{
|
|
+ struct irq_desc *desc = irq_to_desc(irq);
|
|
+ struct irq_cfg *cfg = desc->chip_data;
|
|
+
|
|
+ __irq_complete_move(&desc, cfg->vector);
|
|
+}
|
|
#else
|
|
static inline void irq_complete_move(struct irq_desc **descp) {}
|
|
#endif
|
|
@@ -2565,6 +2583,59 @@ static void ack_apic_edge(unsigned int i
|
|
|
|
atomic_t irq_mis_count;
|
|
|
|
+/*
|
|
+ * IO-APIC versions below 0x20 don't support EOI register.
|
|
+ * For the record, here is the information about various versions:
|
|
+ * 0Xh 82489DX
|
|
+ * 1Xh I/OAPIC or I/O(x)APIC which are not PCI 2.2 Compliant
|
|
+ * 2Xh I/O(x)APIC which is PCI 2.2 Compliant
|
|
+ * 30h-FFh Reserved
|
|
+ *
|
|
+ * Some of the Intel ICH Specs (ICH2 to ICH5) documents the io-apic
|
|
+ * version as 0x2. This is an error with documentation and these ICH chips
|
|
+ * use io-apic's of version 0x20.
|
|
+ *
|
|
+ * For IO-APIC's with EOI register, we use that to do an explicit EOI.
|
|
+ * Otherwise, we simulate the EOI message manually by changing the trigger
|
|
+ * mode to edge and then back to level, with RTE being masked during this.
|
|
+*/
|
|
+static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
|
|
+{
|
|
+ struct irq_pin_list *entry;
|
|
+
|
|
+ for_each_irq_pin(entry, cfg->irq_2_pin) {
|
|
+ if (mp_ioapics[entry->apic].apicver >= 0x20) {
|
|
+ /*
|
|
+ * Intr-remapping uses pin number as the virtual vector
|
|
+ * in the RTE. Actual vector is programmed in
|
|
+ * intr-remapping table entry. Hence for the io-apic
|
|
+ * EOI we use the pin number.
|
|
+ */
|
|
+ if (irq_remapped(irq))
|
|
+ io_apic_eoi(entry->apic, entry->pin);
|
|
+ else
|
|
+ io_apic_eoi(entry->apic, cfg->vector);
|
|
+ } else {
|
|
+ __mask_and_edge_IO_APIC_irq(entry);
|
|
+ __unmask_and_level_IO_APIC_irq(entry);
|
|
+ }
|
|
+ }
|
|
+}
|
|
+
|
|
+static void eoi_ioapic_irq(struct irq_desc *desc)
|
|
+{
|
|
+ struct irq_cfg *cfg;
|
|
+ unsigned long flags;
|
|
+ unsigned int irq;
|
|
+
|
|
+ irq = desc->irq;
|
|
+ cfg = desc->chip_data;
|
|
+
|
|
+ spin_lock_irqsave(&ioapic_lock, flags);
|
|
+ __eoi_ioapic_irq(irq, cfg);
|
|
+ spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
+}
|
|
+
|
|
static void ack_apic_level(unsigned int irq)
|
|
{
|
|
struct irq_desc *desc = irq_to_desc(irq);
|
|
@@ -2600,6 +2671,19 @@ static void ack_apic_level(unsigned int
|
|
* level-triggered interrupt. We mask the source for the time of the
|
|
* operation to prevent an edge-triggered interrupt escaping meanwhile.
|
|
* The idea is from Manfred Spraul. --macro
|
|
+ *
|
|
+ * Also in the case when cpu goes offline, fixup_irqs() will forward
|
|
+ * any unhandled interrupt on the offlined cpu to the new cpu
|
|
+ * destination that is handling the corresponding interrupt. This
|
|
+ * interrupt forwarding is done via IPI's. Hence, in this case also
|
|
+ * level-triggered io-apic interrupt will be seen as an edge
|
|
+ * interrupt in the IRR. And we can't rely on the cpu's EOI
|
|
+ * to be broadcasted to the IO-APIC's which will clear the remoteIRR
|
|
+ * corresponding to the level-triggered interrupt. Hence on IO-APIC's
|
|
+ * supporting EOI register, we do an explicit EOI to clear the
|
|
+ * remote IRR and on IO-APIC's which don't have an EOI register,
|
|
+ * we use the above logic (mask+edge followed by unmask+level) from
|
|
+ * Manfred Spraul to clear the remote IRR.
|
|
*/
|
|
cfg = desc->chip_data;
|
|
i = cfg->vector;
|
|
@@ -2611,6 +2695,19 @@ static void ack_apic_level(unsigned int
|
|
*/
|
|
ack_APIC_irq();
|
|
|
|
+ /*
|
|
+ * Tail end of clearing remote IRR bit (either by delivering the EOI
|
|
+ * message via io-apic EOI register write or simulating it using
|
|
+ * mask+edge followed by unnask+level logic) manually when the
|
|
+ * level triggered interrupt is seen as the edge triggered interrupt
|
|
+ * at the cpu.
|
|
+ */
|
|
+ if (!(v & (1 << (i & 0x1f)))) {
|
|
+ atomic_inc(&irq_mis_count);
|
|
+
|
|
+ eoi_ioapic_irq(desc);
|
|
+ }
|
|
+
|
|
/* Now we can move and renable the irq */
|
|
if (unlikely(do_unmask_irq)) {
|
|
/* Only migrate the irq if the ack has been received.
|
|
@@ -2644,41 +2741,9 @@ static void ack_apic_level(unsigned int
|
|
move_masked_irq(irq);
|
|
unmask_IO_APIC_irq_desc(desc);
|
|
}
|
|
-
|
|
- /* Tail end of version 0x11 I/O APIC bug workaround */
|
|
- if (!(v & (1 << (i & 0x1f)))) {
|
|
- atomic_inc(&irq_mis_count);
|
|
- spin_lock(&ioapic_lock);
|
|
- __mask_and_edge_IO_APIC_irq(cfg);
|
|
- __unmask_and_level_IO_APIC_irq(cfg);
|
|
- spin_unlock(&ioapic_lock);
|
|
- }
|
|
}
|
|
|
|
#ifdef CONFIG_INTR_REMAP
|
|
-static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
|
|
-{
|
|
- struct irq_pin_list *entry;
|
|
-
|
|
- for_each_irq_pin(entry, cfg->irq_2_pin)
|
|
- io_apic_eoi(entry->apic, entry->pin);
|
|
-}
|
|
-
|
|
-static void
|
|
-eoi_ioapic_irq(struct irq_desc *desc)
|
|
-{
|
|
- struct irq_cfg *cfg;
|
|
- unsigned long flags;
|
|
- unsigned int irq;
|
|
-
|
|
- irq = desc->irq;
|
|
- cfg = desc->chip_data;
|
|
-
|
|
- spin_lock_irqsave(&ioapic_lock, flags);
|
|
- __eoi_ioapic_irq(irq, cfg);
|
|
- spin_unlock_irqrestore(&ioapic_lock, flags);
|
|
-}
|
|
-
|
|
static void ir_ack_apic_edge(unsigned int irq)
|
|
{
|
|
ack_APIC_irq();
|
|
@@ -3256,6 +3321,7 @@ unsigned int create_irq_nr(unsigned int
|
|
continue;
|
|
|
|
desc_new = move_irq_desc(desc_new, node);
|
|
+ cfg_new = desc_new->chip_data;
|
|
|
|
if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0)
|
|
irq = new;
|
|
@@ -3311,7 +3377,8 @@ void destroy_irq(unsigned int irq)
|
|
* MSI message composition
|
|
*/
|
|
#if defined(CONFIG_PCI_MSI) && !defined(CONFIG_XEN)
|
|
-static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg)
|
|
+static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
|
|
+ struct msi_msg *msg, u8 hpet_id)
|
|
{
|
|
struct irq_cfg *cfg;
|
|
int err;
|
|
@@ -3345,7 +3412,10 @@ static int msi_compose_msg(struct pci_de
|
|
irte.dest_id = IRTE_DEST(dest);
|
|
|
|
/* Set source-id of interrupt request */
|
|
- set_msi_sid(&irte, pdev);
|
|
+ if (pdev)
|
|
+ set_msi_sid(&irte, pdev);
|
|
+ else
|
|
+ set_hpet_sid(&irte, hpet_id);
|
|
|
|
modify_irte(irq, &irte);
|
|
|
|
@@ -3391,8 +3461,7 @@ static int set_msi_irq_affinity(unsigned
|
|
struct msi_msg msg;
|
|
unsigned int dest;
|
|
|
|
- dest = set_desc_affinity(desc, mask);
|
|
- if (dest == BAD_APICID)
|
|
+ if (set_desc_affinity(desc, mask, &dest))
|
|
return -1;
|
|
|
|
cfg = desc->chip_data;
|
|
@@ -3424,8 +3493,7 @@ ir_set_msi_irq_affinity(unsigned int irq
|
|
if (get_irte(irq, &irte))
|
|
return -1;
|
|
|
|
- dest = set_desc_affinity(desc, mask);
|
|
- if (dest == BAD_APICID)
|
|
+ if (set_desc_affinity(desc, mask, &dest))
|
|
return -1;
|
|
|
|
irte.vector = cfg->vector;
|
|
@@ -3510,7 +3578,7 @@ static int setup_msi_irq(struct pci_dev
|
|
int ret;
|
|
struct msi_msg msg;
|
|
|
|
- ret = msi_compose_msg(dev, irq, &msg);
|
|
+ ret = msi_compose_msg(dev, irq, &msg, -1);
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
@@ -3607,8 +3675,7 @@ static int dmar_msi_set_affinity(unsigne
|
|
struct msi_msg msg;
|
|
unsigned int dest;
|
|
|
|
- dest = set_desc_affinity(desc, mask);
|
|
- if (dest == BAD_APICID)
|
|
+ if (set_desc_affinity(desc, mask, &dest))
|
|
return -1;
|
|
|
|
cfg = desc->chip_data;
|
|
@@ -3643,7 +3710,7 @@ int arch_setup_dmar_msi(unsigned int irq
|
|
int ret;
|
|
struct msi_msg msg;
|
|
|
|
- ret = msi_compose_msg(NULL, irq, &msg);
|
|
+ ret = msi_compose_msg(NULL, irq, &msg, -1);
|
|
if (ret < 0)
|
|
return ret;
|
|
dmar_msi_write(irq, &msg);
|
|
@@ -3663,8 +3730,7 @@ static int hpet_msi_set_affinity(unsigne
|
|
struct msi_msg msg;
|
|
unsigned int dest;
|
|
|
|
- dest = set_desc_affinity(desc, mask);
|
|
- if (dest == BAD_APICID)
|
|
+ if (set_desc_affinity(desc, mask, &dest))
|
|
return -1;
|
|
|
|
cfg = desc->chip_data;
|
|
@@ -3683,6 +3749,19 @@ static int hpet_msi_set_affinity(unsigne
|
|
|
|
#endif /* CONFIG_SMP */
|
|
|
|
+static struct irq_chip ir_hpet_msi_type = {
|
|
+ .name = "IR-HPET_MSI",
|
|
+ .unmask = hpet_msi_unmask,
|
|
+ .mask = hpet_msi_mask,
|
|
+#ifdef CONFIG_INTR_REMAP
|
|
+ .ack = ir_ack_apic_edge,
|
|
+#ifdef CONFIG_SMP
|
|
+ .set_affinity = ir_set_msi_irq_affinity,
|
|
+#endif
|
|
+#endif
|
|
+ .retrigger = ioapic_retrigger_irq,
|
|
+};
|
|
+
|
|
static struct irq_chip hpet_msi_type = {
|
|
.name = "HPET_MSI",
|
|
.unmask = hpet_msi_unmask,
|
|
@@ -3694,20 +3773,36 @@ static struct irq_chip hpet_msi_type = {
|
|
.retrigger = ioapic_retrigger_irq,
|
|
};
|
|
|
|
-int arch_setup_hpet_msi(unsigned int irq)
|
|
+int arch_setup_hpet_msi(unsigned int irq, unsigned int id)
|
|
{
|
|
int ret;
|
|
struct msi_msg msg;
|
|
struct irq_desc *desc = irq_to_desc(irq);
|
|
|
|
- ret = msi_compose_msg(NULL, irq, &msg);
|
|
+ if (intr_remapping_enabled) {
|
|
+ struct intel_iommu *iommu = map_hpet_to_ir(id);
|
|
+ int index;
|
|
+
|
|
+ if (!iommu)
|
|
+ return -1;
|
|
+
|
|
+ index = alloc_irte(iommu, irq, 1);
|
|
+ if (index < 0)
|
|
+ return -1;
|
|
+ }
|
|
+
|
|
+ ret = msi_compose_msg(NULL, irq, &msg, id);
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
hpet_msi_write(irq, &msg);
|
|
desc->status |= IRQ_MOVE_PCNTXT;
|
|
- set_irq_chip_and_handler_name(irq, &hpet_msi_type, handle_edge_irq,
|
|
- "edge");
|
|
+ if (irq_remapped(irq))
|
|
+ set_irq_chip_and_handler_name(irq, &ir_hpet_msi_type,
|
|
+ handle_edge_irq, "edge");
|
|
+ else
|
|
+ set_irq_chip_and_handler_name(irq, &hpet_msi_type,
|
|
+ handle_edge_irq, "edge");
|
|
|
|
return 0;
|
|
}
|
|
@@ -3741,8 +3836,7 @@ static int set_ht_irq_affinity(unsigned
|
|
struct irq_cfg *cfg;
|
|
unsigned int dest;
|
|
|
|
- dest = set_desc_affinity(desc, mask);
|
|
- if (dest == BAD_APICID)
|
|
+ if (set_desc_affinity(desc, mask, &dest))
|
|
return -1;
|
|
|
|
cfg = desc->chip_data;
|
|
@@ -3808,75 +3902,6 @@ int arch_setup_ht_irq(unsigned int irq,
|
|
}
|
|
#endif /* CONFIG_HT_IRQ */
|
|
|
|
-#ifdef CONFIG_X86_UV
|
|
-/*
|
|
- * Re-target the irq to the specified CPU and enable the specified MMR located
|
|
- * on the specified blade to allow the sending of MSIs to the specified CPU.
|
|
- */
|
|
-int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
|
|
- unsigned long mmr_offset)
|
|
-{
|
|
- const struct cpumask *eligible_cpu = cpumask_of(cpu);
|
|
- struct irq_cfg *cfg;
|
|
- int mmr_pnode;
|
|
- unsigned long mmr_value;
|
|
- struct uv_IO_APIC_route_entry *entry;
|
|
- unsigned long flags;
|
|
- int err;
|
|
-
|
|
- BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
|
|
-
|
|
- cfg = irq_cfg(irq);
|
|
-
|
|
- err = assign_irq_vector(irq, cfg, eligible_cpu);
|
|
- if (err != 0)
|
|
- return err;
|
|
-
|
|
- spin_lock_irqsave(&vector_lock, flags);
|
|
- set_irq_chip_and_handler_name(irq, &uv_irq_chip, handle_percpu_irq,
|
|
- irq_name);
|
|
- spin_unlock_irqrestore(&vector_lock, flags);
|
|
-
|
|
- mmr_value = 0;
|
|
- entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
|
|
- entry->vector = cfg->vector;
|
|
- entry->delivery_mode = apic->irq_delivery_mode;
|
|
- entry->dest_mode = apic->irq_dest_mode;
|
|
- entry->polarity = 0;
|
|
- entry->trigger = 0;
|
|
- entry->mask = 0;
|
|
- entry->dest = apic->cpu_mask_to_apicid(eligible_cpu);
|
|
-
|
|
- mmr_pnode = uv_blade_to_pnode(mmr_blade);
|
|
- uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
|
|
-
|
|
- if (cfg->move_in_progress)
|
|
- send_cleanup_vector(cfg);
|
|
-
|
|
- return irq;
|
|
-}
|
|
-
|
|
-/*
|
|
- * Disable the specified MMR located on the specified blade so that MSIs are
|
|
- * longer allowed to be sent.
|
|
- */
|
|
-void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset)
|
|
-{
|
|
- unsigned long mmr_value;
|
|
- struct uv_IO_APIC_route_entry *entry;
|
|
- int mmr_pnode;
|
|
-
|
|
- BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
|
|
-
|
|
- mmr_value = 0;
|
|
- entry = (struct uv_IO_APIC_route_entry *)&mmr_value;
|
|
- entry->mask = 1;
|
|
-
|
|
- mmr_pnode = uv_blade_to_pnode(mmr_blade);
|
|
- uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
|
|
-}
|
|
-#endif /* CONFIG_X86_64 */
|
|
-
|
|
int __init io_apic_get_redir_entries (int ioapic)
|
|
{
|
|
union IO_APIC_reg_01 reg_01;
|
|
@@ -4055,7 +4080,7 @@ int __init io_apic_get_unique_id(int ioa
|
|
*/
|
|
|
|
if (physids_empty(apic_id_map))
|
|
- apic_id_map = apic->ioapic_phys_id_map(phys_cpu_present_map);
|
|
+ apic->ioapic_phys_id_map(&phys_cpu_present_map, &apic_id_map);
|
|
|
|
spin_lock_irqsave(&ioapic_lock, flags);
|
|
reg_00.raw = io_apic_read(ioapic, 0);
|
|
@@ -4071,10 +4096,10 @@ int __init io_apic_get_unique_id(int ioa
|
|
* Every APIC in a system must have a unique ID or we get lots of nice
|
|
* 'stuck on smp_invalidate_needed IPI wait' messages.
|
|
*/
|
|
- if (apic->check_apicid_used(apic_id_map, apic_id)) {
|
|
+ if (apic->check_apicid_used(&apic_id_map, apic_id)) {
|
|
|
|
for (i = 0; i < get_physical_broadcast(); i++) {
|
|
- if (!apic->check_apicid_used(apic_id_map, i))
|
|
+ if (!apic->check_apicid_used(&apic_id_map, i))
|
|
break;
|
|
}
|
|
|
|
@@ -4087,7 +4112,7 @@ int __init io_apic_get_unique_id(int ioa
|
|
apic_id = i;
|
|
}
|
|
|
|
- tmp = apic->apicid_to_cpu_present(apic_id);
|
|
+ apic->apicid_to_cpu_present(apic_id, &tmp);
|
|
physids_or(apic_id_map, apic_id_map, tmp);
|
|
|
|
if (reg_00.bits.ID != apic_id) {
|
|
@@ -4218,7 +4243,7 @@ static struct resource * __init ioapic_s
|
|
for (i = 0; i < nr_ioapics; i++) {
|
|
res[i].name = mem;
|
|
res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
|
|
- sprintf(mem, "IOAPIC %u", i);
|
|
+ snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i);
|
|
mem += IOAPIC_RESOURCE_NAME_SIZE;
|
|
}
|
|
|
|
@@ -4252,18 +4277,17 @@ void __init ioapic_init_mappings(void)
|
|
#ifdef CONFIG_X86_32
|
|
fake_ioapic_page:
|
|
#endif
|
|
- ioapic_phys = (unsigned long)
|
|
- alloc_bootmem_pages(PAGE_SIZE);
|
|
+ ioapic_phys = (unsigned long)alloc_bootmem_pages(PAGE_SIZE);
|
|
ioapic_phys = __pa(ioapic_phys);
|
|
}
|
|
set_fixmap_nocache(idx, ioapic_phys);
|
|
- apic_printk(APIC_VERBOSE,
|
|
- "mapped IOAPIC to %08lx (%08lx)\n",
|
|
- __fix_to_virt(idx), ioapic_phys);
|
|
+ apic_printk(APIC_VERBOSE, "mapped IOAPIC to %08lx (%08lx)\n",
|
|
+ __fix_to_virt(idx) + (ioapic_phys & ~PAGE_MASK),
|
|
+ ioapic_phys);
|
|
idx++;
|
|
|
|
ioapic_res->start = ioapic_phys;
|
|
- ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
|
|
+ ioapic_res->end = ioapic_phys + IO_APIC_SLOT_SIZE - 1;
|
|
ioapic_res++;
|
|
}
|
|
}
|
|
--- head-2011-03-17.orig/arch/x86/kernel/cpu/Makefile 2011-02-01 14:54:13.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/cpu/Makefile 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -34,7 +34,8 @@ obj-$(CONFIG_CPU_FREQ) += cpufreq/
|
|
|
|
obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o
|
|
|
|
-disabled-obj-$(CONFIG_XEN) := hypervisor.o perfctr-watchdog.o sched.o vmware.o
|
|
+disabled-obj-$(CONFIG_XEN) := hypervisor.o perfctr-watchdog.o perf_event.o \
|
|
+ sched.o vmware.o
|
|
|
|
quiet_cmd_mkcapflags = MKCAP $@
|
|
cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@
|
|
--- head-2011-03-17.orig/arch/x86/kernel/cpu/common-xen.c 2011-03-17 14:42:24.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/cpu/common-xen.c 2011-03-17 14:42:34.000000000 +0100
|
|
@@ -69,7 +69,7 @@ void __init setup_cpu_local_masks(void)
|
|
static void __cpuinit default_init(struct cpuinfo_x86 *c)
|
|
{
|
|
#ifdef CONFIG_X86_64
|
|
- display_cacheinfo(c);
|
|
+ cpu_detect_cache_sizes(c);
|
|
#else
|
|
/* Not much we can do here... */
|
|
/* Check if at least it has cpuid */
|
|
@@ -414,7 +414,7 @@ static void __cpuinit get_model_name(str
|
|
}
|
|
}
|
|
|
|
-void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
|
|
+void __cpuinit cpu_detect_cache_sizes(struct cpuinfo_x86 *c)
|
|
{
|
|
unsigned int n, dummy, ebx, ecx, edx, l2size;
|
|
|
|
@@ -422,8 +422,6 @@ void __cpuinit display_cacheinfo(struct
|
|
|
|
if (n >= 0x80000005) {
|
|
cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
|
|
- printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n",
|
|
- edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
|
|
c->x86_cache_size = (ecx>>24) + (edx>>24);
|
|
#ifdef CONFIG_X86_64
|
|
/* On K8 L1 TLB is inclusive, so don't count it */
|
|
@@ -453,9 +451,6 @@ void __cpuinit display_cacheinfo(struct
|
|
#endif
|
|
|
|
c->x86_cache_size = l2size;
|
|
-
|
|
- printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
|
|
- l2size, ecx & 0xFF);
|
|
}
|
|
|
|
void __cpuinit detect_ht(struct cpuinfo_x86 *c)
|
|
@@ -463,6 +458,7 @@ void __cpuinit detect_ht(struct cpuinfo_
|
|
#ifdef CONFIG_X86_HT
|
|
u32 eax, ebx, ecx, edx;
|
|
int index_msb, core_bits;
|
|
+ static bool printed;
|
|
|
|
if (!cpu_has(c, X86_FEATURE_HT))
|
|
return;
|
|
@@ -478,7 +474,7 @@ void __cpuinit detect_ht(struct cpuinfo_
|
|
smp_num_siblings = (ebx & 0xff0000) >> 16;
|
|
|
|
if (smp_num_siblings == 1) {
|
|
- printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
|
|
+ printk_once(KERN_INFO "CPU0: Hyper-Threading is disabled\n");
|
|
goto out;
|
|
}
|
|
|
|
@@ -505,11 +501,12 @@ void __cpuinit detect_ht(struct cpuinfo_
|
|
((1 << core_bits) - 1);
|
|
|
|
out:
|
|
- if ((c->x86_max_cores * smp_num_siblings) > 1) {
|
|
+ if (!printed && (c->x86_max_cores * smp_num_siblings) > 1) {
|
|
printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
|
|
c->phys_proc_id);
|
|
printk(KERN_INFO "CPU: Processor Core ID: %d\n",
|
|
c->cpu_core_id);
|
|
+ printed = 1;
|
|
}
|
|
#endif
|
|
}
|
|
@@ -690,24 +687,31 @@ void __init early_cpu_init(void)
|
|
const struct cpu_dev *const *cdev;
|
|
int count = 0;
|
|
|
|
+#ifdef CONFIG_PROCESSOR_SELECT
|
|
printk(KERN_INFO "KERNEL supported cpus:\n");
|
|
+#endif
|
|
+
|
|
for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) {
|
|
const struct cpu_dev *cpudev = *cdev;
|
|
- unsigned int j;
|
|
|
|
if (count >= X86_VENDOR_NUM)
|
|
break;
|
|
cpu_devs[count] = cpudev;
|
|
count++;
|
|
|
|
- for (j = 0; j < 2; j++) {
|
|
- if (!cpudev->c_ident[j])
|
|
- continue;
|
|
- printk(KERN_INFO " %s %s\n", cpudev->c_vendor,
|
|
- cpudev->c_ident[j]);
|
|
+#ifdef CONFIG_PROCESSOR_SELECT
|
|
+ {
|
|
+ unsigned int j;
|
|
+
|
|
+ for (j = 0; j < 2; j++) {
|
|
+ if (!cpudev->c_ident[j])
|
|
+ continue;
|
|
+ printk(KERN_INFO " %s %s\n", cpudev->c_vendor,
|
|
+ cpudev->c_ident[j]);
|
|
+ }
|
|
}
|
|
+#endif
|
|
}
|
|
-
|
|
early_identify_cpu(&boot_cpu_data);
|
|
}
|
|
|
|
@@ -874,10 +878,8 @@ static void __cpuinit identify_cpu(struc
|
|
boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
|
|
}
|
|
|
|
-#ifdef CONFIG_X86_MCE
|
|
/* Init Machine Check Exception if available. */
|
|
- mcheck_init(c);
|
|
-#endif
|
|
+ mcheck_cpu_init(c);
|
|
|
|
select_idle_routine(c);
|
|
|
|
@@ -909,6 +911,10 @@ void __init identify_boot_cpu(void)
|
|
init_hw_perf_events();
|
|
}
|
|
|
|
+#ifdef CONFIG_XEN
|
|
+void set_perf_event_pending(void) {}
|
|
+#endif
|
|
+
|
|
void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
|
|
{
|
|
BUG_ON(c == &boot_cpu_data);
|
|
@@ -1156,7 +1162,7 @@ static void clear_all_debug_regs(void)
|
|
void __cpuinit cpu_init(void)
|
|
{
|
|
#ifndef CONFIG_X86_NO_TSS
|
|
- struct orig_ist *orig_ist;
|
|
+ struct orig_ist *oist;
|
|
struct tss_struct *t;
|
|
unsigned long v;
|
|
int i;
|
|
@@ -1170,7 +1176,7 @@ void __cpuinit cpu_init(void)
|
|
xen_switch_pt();
|
|
#ifndef CONFIG_X86_NO_TSS
|
|
t = &per_cpu(init_tss, cpu);
|
|
- orig_ist = &per_cpu(orig_ist, cpu);
|
|
+ oist = &per_cpu(orig_ist, cpu);
|
|
#endif
|
|
|
|
#ifdef CONFIG_NUMA
|
|
@@ -1184,7 +1190,7 @@ void __cpuinit cpu_init(void)
|
|
if (cpumask_test_and_set_cpu(cpu, cpu_initialized_mask))
|
|
panic("CPU#%d already initialized!\n", cpu);
|
|
|
|
- printk(KERN_INFO "Initializing CPU#%d\n", cpu);
|
|
+ pr_debug("Initializing CPU#%d\n", cpu);
|
|
|
|
clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
|
|
|
|
@@ -1207,7 +1213,7 @@ void __cpuinit cpu_init(void)
|
|
wrmsrl(MSR_KERNEL_GS_BASE, 0);
|
|
barrier();
|
|
|
|
- check_efer();
|
|
+ x86_configure_nx();
|
|
#ifdef CONFIG_X86_LOCAL_APIC
|
|
if (cpu != 0)
|
|
enable_x2apic();
|
|
@@ -1217,12 +1223,12 @@ void __cpuinit cpu_init(void)
|
|
/*
|
|
* set up and load the per-CPU TSS
|
|
*/
|
|
- if (!orig_ist->ist[0]) {
|
|
+ if (!oist->ist[0]) {
|
|
char *estacks = per_cpu(exception_stacks, cpu);
|
|
|
|
for (v = 0; v < N_EXCEPTION_STACKS; v++) {
|
|
estacks += exception_stack_sizes[v];
|
|
- orig_ist->ist[v] = t->x86_tss.ist[v] =
|
|
+ oist->ist[v] = t->x86_tss.ist[v] =
|
|
(unsigned long)estacks;
|
|
}
|
|
}
|
|
--- head-2011-03-17.orig/arch/x86/kernel/e820-xen.c 2011-02-01 14:54:13.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/e820-xen.c 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -771,7 +771,7 @@ core_initcall(e820_mark_nvs_memory);
|
|
/*
|
|
* Early reserved memory areas.
|
|
*/
|
|
-#define MAX_EARLY_RES 20
|
|
+#define MAX_EARLY_RES 32
|
|
|
|
struct early_res {
|
|
u64 start, end;
|
|
@@ -780,7 +780,15 @@ struct early_res {
|
|
};
|
|
static struct early_res early_res[MAX_EARLY_RES] __initdata = {
|
|
#ifndef CONFIG_XEN
|
|
- { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */
|
|
+ { 0, PAGE_SIZE, "BIOS data page", 1 }, /* BIOS data page */
|
|
+#if defined(CONFIG_X86_32) && defined(CONFIG_X86_TRAMPOLINE)
|
|
+ /*
|
|
+ * But first pinch a few for the stack/trampoline stuff
|
|
+ * FIXME: Don't need the extra page at 4K, but need to fix
|
|
+ * trampoline before removing it. (see the GDT stuff)
|
|
+ */
|
|
+ { PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE", 1 },
|
|
+#endif
|
|
#endif
|
|
{}
|
|
};
|
|
--- head-2011-03-17.orig/arch/x86/kernel/entry_32-xen.S 2011-02-01 14:50:44.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/entry_32-xen.S 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -338,6 +338,10 @@ ENTRY(ret_from_fork)
|
|
END(ret_from_fork)
|
|
|
|
/*
|
|
+ * Interrupt exit functions should be protected against kprobes
|
|
+ */
|
|
+ .pushsection .kprobes.text, "ax"
|
|
+/*
|
|
* Return to user mode is not as complex as all this looks,
|
|
* but we want the default path for a system call return to
|
|
* go as quickly as possible which is why some of this is
|
|
@@ -387,6 +391,10 @@ need_resched:
|
|
END(resume_kernel)
|
|
#endif
|
|
CFI_ENDPROC
|
|
+/*
|
|
+ * End of kprobes section
|
|
+ */
|
|
+ .popsection
|
|
|
|
/* SYSENTER_RETURN points to after the "sysenter" instruction in
|
|
the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */
|
|
@@ -539,10 +547,14 @@ ENTRY(ia32pv_sysenter_target)
|
|
.align 4
|
|
.long 1b,syscall_fault
|
|
.previous
|
|
- /* fall through */
|
|
+ jmp system_call
|
|
CFI_ENDPROC
|
|
ENDPROC(ia32pv_sysenter_target)
|
|
|
|
+/*
|
|
+ * syscall stub including irq exit should be protected against kprobes
|
|
+ */
|
|
+ .pushsection .kprobes.text, "ax"
|
|
# system call handler stub
|
|
ENTRY(system_call)
|
|
RING0_INT_FRAME # can't unwind into user space anyway
|
|
@@ -774,26 +786,69 @@ syscall_badsys:
|
|
jmp resume_userspace
|
|
END(syscall_badsys)
|
|
CFI_ENDPROC
|
|
+/*
|
|
+ * End of kprobes section
|
|
+ */
|
|
+ .popsection
|
|
|
|
/*
|
|
* System calls that need a pt_regs pointer.
|
|
*/
|
|
-#define PTREGSCALL(name) \
|
|
+#define PTREGSCALL0(name) \
|
|
ALIGN; \
|
|
ptregs_##name: \
|
|
leal 4(%esp),%eax; \
|
|
jmp sys_##name;
|
|
|
|
-PTREGSCALL(iopl)
|
|
-PTREGSCALL(fork)
|
|
-PTREGSCALL(clone)
|
|
-PTREGSCALL(vfork)
|
|
-PTREGSCALL(execve)
|
|
-PTREGSCALL(sigaltstack)
|
|
-PTREGSCALL(sigreturn)
|
|
-PTREGSCALL(rt_sigreturn)
|
|
-PTREGSCALL(vm86)
|
|
-PTREGSCALL(vm86old)
|
|
+#define PTREGSCALL1(name) \
|
|
+ ALIGN; \
|
|
+ptregs_##name: \
|
|
+ leal 4(%esp),%edx; \
|
|
+ movl (PT_EBX+4)(%esp),%eax; \
|
|
+ jmp sys_##name;
|
|
+
|
|
+#define PTREGSCALL2(name) \
|
|
+ ALIGN; \
|
|
+ptregs_##name: \
|
|
+ leal 4(%esp),%ecx; \
|
|
+ movl (PT_ECX+4)(%esp),%edx; \
|
|
+ movl (PT_EBX+4)(%esp),%eax; \
|
|
+ jmp sys_##name;
|
|
+
|
|
+#define PTREGSCALL3(name) \
|
|
+ ALIGN; \
|
|
+ptregs_##name: \
|
|
+ leal 4(%esp),%eax; \
|
|
+ pushl %eax; \
|
|
+ movl PT_EDX(%eax),%ecx; \
|
|
+ movl PT_ECX(%eax),%edx; \
|
|
+ movl PT_EBX(%eax),%eax; \
|
|
+ call sys_##name; \
|
|
+ addl $4,%esp; \
|
|
+ ret
|
|
+
|
|
+PTREGSCALL1(iopl)
|
|
+PTREGSCALL0(fork)
|
|
+PTREGSCALL0(vfork)
|
|
+PTREGSCALL3(execve)
|
|
+PTREGSCALL2(sigaltstack)
|
|
+PTREGSCALL0(sigreturn)
|
|
+PTREGSCALL0(rt_sigreturn)
|
|
+PTREGSCALL2(vm86)
|
|
+PTREGSCALL1(vm86old)
|
|
+
|
|
+/* Clone is an oddball. The 4th arg is in %edi */
|
|
+ ALIGN;
|
|
+ptregs_clone:
|
|
+ leal 4(%esp),%eax
|
|
+ pushl %eax
|
|
+ pushl PT_EDI(%eax)
|
|
+ movl PT_EDX(%eax),%ecx
|
|
+ movl PT_ECX(%eax),%edx
|
|
+ movl PT_EBX(%eax),%eax
|
|
+ call sys_clone
|
|
+ addl $8,%esp
|
|
+ ret
|
|
|
|
#ifndef CONFIG_XEN
|
|
.macro FIXUP_ESPFIX_STACK
|
|
@@ -884,6 +939,10 @@ common_interrupt:
|
|
ENDPROC(common_interrupt)
|
|
CFI_ENDPROC
|
|
|
|
+/*
|
|
+ * Irq entries should be protected against kprobes
|
|
+ */
|
|
+ .pushsection .kprobes.text, "ax"
|
|
#define BUILD_INTERRUPT3(name, nr, fn) \
|
|
ENTRY(name) \
|
|
RING0_INT_FRAME; \
|
|
@@ -905,6 +964,8 @@ ENDPROC(name)
|
|
#else
|
|
#define UNWIND_ESPFIX_STACK
|
|
|
|
+ .pushsection .kprobes.text, "ax"
|
|
+
|
|
# A note on the "critical region" in our callback handler.
|
|
# We want to avoid stacking callback handlers due to events occurring
|
|
# during handling of the last event. To do this, we keep events disabled
|
|
@@ -1205,16 +1266,16 @@ ENTRY(fixup_4gb_segment)
|
|
jmp error_code
|
|
CFI_ENDPROC
|
|
END(spurious_interrupt_bug)
|
|
+/*
|
|
+ * End of kprobes section
|
|
+ */
|
|
+ .popsection
|
|
|
|
ENTRY(kernel_thread_helper)
|
|
pushl $0 # fake return address for unwinder
|
|
CFI_STARTPROC
|
|
- movl %edx,%eax
|
|
- push %edx
|
|
- CFI_ADJUST_CFA_OFFSET 4
|
|
- call *%ebx
|
|
- push %eax
|
|
- CFI_ADJUST_CFA_OFFSET 4
|
|
+ movl %edi,%eax
|
|
+ call *%esi
|
|
call do_exit
|
|
ud2 # padding for call trace
|
|
CFI_ENDPROC
|
|
@@ -1315,17 +1376,14 @@ END(ftrace_graph_caller)
|
|
|
|
.globl return_to_handler
|
|
return_to_handler:
|
|
- pushl $0
|
|
pushl %eax
|
|
- pushl %ecx
|
|
pushl %edx
|
|
movl %ebp, %eax
|
|
call ftrace_return_to_handler
|
|
- movl %eax, 0xc(%esp)
|
|
+ movl %eax, %ecx
|
|
popl %edx
|
|
- popl %ecx
|
|
popl %eax
|
|
- ret
|
|
+ jmp *%ecx
|
|
#endif
|
|
|
|
#include <asm/alternative-asm.h>
|
|
--- head-2011-03-17.orig/arch/x86/kernel/entry_64-xen.S 2011-02-01 14:54:13.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/entry_64-xen.S 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -160,11 +160,11 @@ GLOBAL(return_to_handler)
|
|
|
|
call ftrace_return_to_handler
|
|
|
|
- movq %rax, 16(%rsp)
|
|
+ movq %rax, %rdi
|
|
movq 8(%rsp), %rdx
|
|
movq (%rsp), %rax
|
|
- addq $16, %rsp
|
|
- retq
|
|
+ addq $24, %rsp
|
|
+ jmp *%rdi
|
|
#endif
|
|
|
|
|
|
@@ -863,8 +863,8 @@ apicinterrupt UV_BAU_MESSAGE \
|
|
#endif
|
|
apicinterrupt LOCAL_TIMER_VECTOR \
|
|
apic_timer_interrupt smp_apic_timer_interrupt
|
|
-apicinterrupt GENERIC_INTERRUPT_VECTOR \
|
|
- generic_interrupt smp_generic_interrupt
|
|
+apicinterrupt X86_PLATFORM_IPI_VECTOR \
|
|
+ x86_platform_ipi smp_x86_platform_ipi
|
|
|
|
#ifdef CONFIG_SMP
|
|
apicinterrupt INVALIDATE_TLB_VECTOR_START+0 \
|
|
@@ -1093,63 +1093,20 @@ zeroentry coprocessor_error do_coprocess
|
|
errorentry alignment_check do_alignment_check
|
|
zeroentry simd_coprocessor_error do_simd_coprocessor_error
|
|
|
|
-/*
|
|
- * Create a kernel thread.
|
|
- *
|
|
- * C extern interface:
|
|
- * extern long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
|
|
- *
|
|
- * asm input arguments:
|
|
- * rdi: fn, rsi: arg, rdx: flags
|
|
- */
|
|
-ENTRY(kernel_thread)
|
|
- CFI_STARTPROC
|
|
- FAKE_STACK_FRAME $child_rip
|
|
- SAVE_ALL
|
|
-
|
|
- # rdi: flags, rsi: usp, rdx: will be &pt_regs
|
|
- movq %rdx,%rdi
|
|
- orq kernel_thread_flags(%rip),%rdi
|
|
- movq $-1, %rsi
|
|
- movq %rsp, %rdx
|
|
-
|
|
- xorl %r8d,%r8d
|
|
- xorl %r9d,%r9d
|
|
-
|
|
- # clone now
|
|
- call do_fork
|
|
- movq %rax,RAX(%rsp)
|
|
- xorl %edi,%edi
|
|
-
|
|
- /*
|
|
- * It isn't worth to check for reschedule here,
|
|
- * so internally to the x86_64 port you can rely on kernel_thread()
|
|
- * not to reschedule the child before returning, this avoids the need
|
|
- * of hacks for example to fork off the per-CPU idle tasks.
|
|
- * [Hopefully no generic code relies on the reschedule -AK]
|
|
- */
|
|
- RESTORE_ALL
|
|
- UNFAKE_STACK_FRAME
|
|
- ret
|
|
- CFI_ENDPROC
|
|
-END(kernel_thread)
|
|
-
|
|
-ENTRY(child_rip)
|
|
+ENTRY(kernel_thread_helper)
|
|
pushq $0 # fake return address
|
|
CFI_STARTPROC
|
|
/*
|
|
* Here we are in the child and the registers are set as they were
|
|
* at kernel_thread() invocation in the parent.
|
|
*/
|
|
- movq %rdi, %rax
|
|
- movq %rsi, %rdi
|
|
- call *%rax
|
|
+ call *%rsi
|
|
# exit
|
|
mov %eax, %edi
|
|
call do_exit
|
|
ud2 # padding for call trace
|
|
CFI_ENDPROC
|
|
-END(child_rip)
|
|
+END(kernel_thread_helper)
|
|
|
|
/*
|
|
* execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
|
|
@@ -1329,12 +1286,17 @@ error_kernelspace:
|
|
leaq irq_return(%rip),%rcx
|
|
cmpq %rcx,RIP+8(%rsp)
|
|
je error_swapgs
|
|
- movl %ecx,%ecx /* zero extend */
|
|
- cmpq %rcx,RIP+8(%rsp)
|
|
- je error_swapgs
|
|
+ movl %ecx,%eax /* zero extend */
|
|
+ cmpq %rax,RIP+8(%rsp)
|
|
+ je bstep_iret
|
|
cmpq $gs_change,RIP+8(%rsp)
|
|
je error_swapgs
|
|
jmp error_sti
|
|
+
|
|
+bstep_iret:
|
|
+ /* Fix truncated RIP */
|
|
+ movq %rcx,RIP+8(%rsp)
|
|
+ jmp error_swapgs
|
|
#endif
|
|
END(error_entry)
|
|
|
|
--- head-2011-03-17.orig/arch/x86/kernel/head-xen.c 2011-02-01 14:54:13.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/head-xen.c 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -1,5 +1,6 @@
|
|
#include <linux/kernel.h>
|
|
#include <linux/init.h>
|
|
+#include <linux/pci.h>
|
|
|
|
#include <asm/setup.h>
|
|
#ifndef CONFIG_XEN
|
|
@@ -121,7 +122,7 @@ void __init xen_start_kernel(void)
|
|
__pmd(__pa_symbol(swapper_pg_fixmap) | _PAGE_TABLE));
|
|
}
|
|
#else
|
|
- check_efer();
|
|
+ x86_configure_nx();
|
|
xen_init_pt();
|
|
#endif
|
|
|
|
@@ -149,6 +150,8 @@ void __init xen_start_kernel(void)
|
|
virt_to_machine(empty_zero_page),
|
|
PAGE_KERNEL_RO);
|
|
|
|
+ if (is_initial_xendomain())
|
|
+ pci_request_acs();
|
|
}
|
|
|
|
void __init xen_arch_setup(void)
|
|
--- head-2011-03-17.orig/arch/x86/kernel/head32-xen.c 2011-02-01 14:54:13.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/head32-xen.c 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -48,8 +48,6 @@ void __init i386_start_kernel(void)
|
|
BUG_ON(pte_index(hypervisor_virt_start));
|
|
#endif
|
|
|
|
- reserve_trampoline_memory();
|
|
-
|
|
reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
|
|
|
|
#ifndef CONFIG_XEN
|
|
--- head-2011-03-17.orig/arch/x86/kernel/head64-xen.c 2011-02-01 14:54:13.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/head64-xen.c 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -119,8 +119,6 @@ void __init x86_64_start_reservations(ch
|
|
{
|
|
copy_bootdata(__va(real_mode_data));
|
|
|
|
- reserve_trampoline_memory();
|
|
-
|
|
reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS");
|
|
|
|
/*
|
|
--- head-2011-03-17.orig/arch/x86/kernel/head_64-xen.S 2011-02-01 14:54:13.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/head_64-xen.S 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -51,9 +51,9 @@ startup_64:
|
|
|
|
#define NEXT_PAGE(name) \
|
|
.balign PAGE_SIZE; \
|
|
- phys_##name = . - .head.text; \
|
|
ENTRY(name)
|
|
|
|
+ __PAGE_ALIGNED_BSS
|
|
NEXT_PAGE(init_level4_pgt)
|
|
.fill 512,8,0
|
|
/*
|
|
@@ -81,7 +81,9 @@ NEXT_PAGE(level2_fixmap_pgt)
|
|
NEXT_PAGE(level1_fixmap_pgt)
|
|
.fill 512,8,0
|
|
|
|
+ .previous
|
|
NEXT_PAGE(hypercall_page)
|
|
+ phys_hypercall_page = . - .head.text
|
|
CFI_STARTPROC
|
|
.rept 0x1000 / 0x20
|
|
.skip 1 /* push %rcx */
|
|
--- head-2011-03-17.orig/arch/x86/kernel/ioport-xen.c 2011-02-01 14:44:12.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/ioport-xen.c 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -75,8 +75,9 @@ asmlinkage long sys_ioperm(unsigned long
|
|
* beyond the 0x3ff range: to get the full 65536 ports bitmapped
|
|
* you'd need 8kB of bitmaps/process, which is a bit excessive.
|
|
*/
|
|
-static int do_iopl(unsigned int level, struct thread_struct *t)
|
|
+long sys_iopl(unsigned int level, struct pt_regs *regs)
|
|
{
|
|
+ struct thread_struct *t = ¤t->thread;
|
|
unsigned int old = t->iopl >> 12;
|
|
|
|
if (level > 3)
|
|
@@ -86,27 +87,8 @@ static int do_iopl(unsigned int level, s
|
|
if (!capable(CAP_SYS_RAWIO))
|
|
return -EPERM;
|
|
}
|
|
-
|
|
- return 0;
|
|
-}
|
|
-
|
|
-#ifdef CONFIG_X86_32
|
|
-long sys_iopl(struct pt_regs *regs)
|
|
-{
|
|
- unsigned int level = regs->bx;
|
|
-#else
|
|
-asmlinkage long sys_iopl(unsigned int level, struct pt_regs *regs)
|
|
-{
|
|
-#endif
|
|
- struct thread_struct *t = ¤t->thread;
|
|
- int rc;
|
|
-
|
|
- rc = do_iopl(level, t);
|
|
- if (rc < 0)
|
|
- goto out;
|
|
-
|
|
t->iopl = level << 12;
|
|
set_iopl_mask(t->iopl);
|
|
-out:
|
|
- return rc;
|
|
+
|
|
+ return 0;
|
|
}
|
|
--- head-2011-03-17.orig/arch/x86/kernel/irq-xen.c 2011-02-01 14:54:13.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/irq-xen.c 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -19,7 +19,7 @@ atomic_t irq_err_count;
|
|
|
|
#ifndef CONFIG_XEN
|
|
/* Function pointer for generic interrupt vector handling */
|
|
-void (*generic_interrupt_extension)(void) = NULL;
|
|
+void (*x86_platform_ipi_callback)(void) = NULL;
|
|
#endif
|
|
|
|
/*
|
|
@@ -77,10 +77,10 @@ static int show_other_interrupts(struct
|
|
seq_printf(p, " Performance pending work\n");
|
|
#endif
|
|
#ifndef CONFIG_XEN
|
|
- if (generic_interrupt_extension) {
|
|
+ if (x86_platform_ipi_callback) {
|
|
seq_printf(p, "%*s: ", prec, "PLT");
|
|
for_each_online_cpu(j)
|
|
- seq_printf(p, "%10u ", irq_stats(j)->generic_irqs);
|
|
+ seq_printf(p, "%10u ", irq_stats(j)->x86_platform_ipis);
|
|
seq_printf(p, " Platform interrupts\n");
|
|
}
|
|
#endif
|
|
@@ -162,7 +162,7 @@ int show_interrupts(struct seq_file *p,
|
|
if (!desc)
|
|
return 0;
|
|
|
|
- spin_lock_irqsave(&desc->lock, flags);
|
|
+ raw_spin_lock_irqsave(&desc->lock, flags);
|
|
for_each_online_cpu(j)
|
|
any_count |= kstat_irqs_cpu(i, j);
|
|
action = desc->action;
|
|
@@ -183,7 +183,7 @@ int show_interrupts(struct seq_file *p,
|
|
|
|
seq_putc(p, '\n');
|
|
out:
|
|
- spin_unlock_irqrestore(&desc->lock, flags);
|
|
+ raw_spin_unlock_irqrestore(&desc->lock, flags);
|
|
return 0;
|
|
}
|
|
|
|
@@ -201,8 +201,8 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
|
|
sum += irq_stats(cpu)->apic_pending_irqs;
|
|
#endif
|
|
#ifndef CONFIG_XEN
|
|
- if (generic_interrupt_extension)
|
|
- sum += irq_stats(cpu)->generic_irqs;
|
|
+ if (x86_platform_ipi_callback)
|
|
+ sum += irq_stats(cpu)->x86_platform_ipis;
|
|
#endif
|
|
#ifdef CONFIG_SMP
|
|
sum += irq_stats(cpu)->irq_resched_count;
|
|
@@ -271,9 +271,9 @@ unsigned int __irq_entry do_IRQ(struct p
|
|
}
|
|
|
|
/*
|
|
- * Handler for GENERIC_INTERRUPT_VECTOR.
|
|
+ * Handler for X86_PLATFORM_IPI_VECTOR.
|
|
*/
|
|
-void smp_generic_interrupt(struct pt_regs *regs)
|
|
+void smp_x86_platform_ipi(struct pt_regs *regs)
|
|
{
|
|
struct pt_regs *old_regs = set_irq_regs(regs);
|
|
|
|
@@ -283,13 +283,95 @@ void smp_generic_interrupt(struct pt_reg
|
|
|
|
irq_enter();
|
|
|
|
- inc_irq_stat(generic_irqs);
|
|
+ inc_irq_stat(x86_platform_ipis);
|
|
|
|
- if (generic_interrupt_extension)
|
|
- generic_interrupt_extension();
|
|
+ if (x86_platform_ipi_callback)
|
|
+ x86_platform_ipi_callback();
|
|
|
|
irq_exit();
|
|
|
|
set_irq_regs(old_regs);
|
|
}
|
|
#endif
|
|
+
|
|
+#ifdef CONFIG_HOTPLUG_CPU
|
|
+#include <xen/evtchn.h>
|
|
+/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */
|
|
+void fixup_irqs(void)
|
|
+{
|
|
+ unsigned int irq;
|
|
+ static int warned;
|
|
+ struct irq_desc *desc;
|
|
+ static DECLARE_BITMAP(irqs_used, NR_IRQS);
|
|
+
|
|
+ for_each_irq_desc(irq, desc) {
|
|
+ int break_affinity = 0;
|
|
+ int set_affinity = 1;
|
|
+ const struct cpumask *affinity;
|
|
+
|
|
+ if (!desc)
|
|
+ continue;
|
|
+ if (irq == 2)
|
|
+ continue;
|
|
+
|
|
+ /* interrupt's are disabled at this point */
|
|
+ raw_spin_lock(&desc->lock);
|
|
+
|
|
+ affinity = desc->affinity;
|
|
+ if (!irq_has_action(irq) ||
|
|
+ cpumask_subset(affinity, cpu_online_mask)) {
|
|
+ raw_spin_unlock(&desc->lock);
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ if (cpumask_test_cpu(smp_processor_id(), affinity))
|
|
+ __set_bit(irq, irqs_used);
|
|
+
|
|
+ if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
|
|
+ break_affinity = 1;
|
|
+ affinity = cpu_all_mask;
|
|
+ }
|
|
+
|
|
+ if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->mask)
|
|
+ desc->chip->mask(irq);
|
|
+
|
|
+ if (desc->chip->set_affinity)
|
|
+ desc->chip->set_affinity(irq, affinity);
|
|
+ else if (desc->chip != &no_irq_chip && !(warned++))
|
|
+ set_affinity = 0;
|
|
+
|
|
+ if (!(desc->status & IRQ_MOVE_PCNTXT) && desc->chip->unmask)
|
|
+ desc->chip->unmask(irq);
|
|
+
|
|
+ raw_spin_unlock(&desc->lock);
|
|
+
|
|
+ if (break_affinity && set_affinity)
|
|
+ /*printk("Broke affinity for irq %i\n", irq)*/;
|
|
+ else if (!set_affinity)
|
|
+ printk("Cannot set affinity for irq %i\n", irq);
|
|
+ }
|
|
+
|
|
+ /*
|
|
+ * We can remove mdelay() and then send spuriuous interrupts to
|
|
+ * new cpu targets for all the irqs that were handled previously by
|
|
+ * this cpu. While it works, I have seen spurious interrupt messages
|
|
+ * (nothing wrong but still...).
|
|
+ *
|
|
+ * So for now, retain mdelay(1) and check the IRR and then send those
|
|
+ * interrupts to new targets as this cpu is already offlined...
|
|
+ */
|
|
+ mdelay(1);
|
|
+
|
|
+ for_each_irq_desc(irq, desc) {
|
|
+ if (!__test_and_clear_bit(irq, irqs_used))
|
|
+ continue;
|
|
+
|
|
+ if (xen_test_irq_pending(irq)) {
|
|
+ raw_spin_lock(&desc->lock);
|
|
+ if (desc->chip->retrigger)
|
|
+ desc->chip->retrigger(irq);
|
|
+ raw_spin_unlock(&desc->lock);
|
|
+ }
|
|
+ }
|
|
+}
|
|
+#endif
|
|
--- head-2011-03-17.orig/arch/x86/kernel/microcode_core-xen.c 2011-02-01 14:54:13.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/microcode_core-xen.c 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -21,10 +21,12 @@
|
|
* as published by the Free Software Foundation; either version
|
|
* 2 of the License, or (at your option) any later version.
|
|
*/
|
|
+
|
|
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
|
+
|
|
#include <linux/platform_device.h>
|
|
#include <linux/miscdevice.h>
|
|
#include <linux/capability.h>
|
|
-#include <linux/smp_lock.h>
|
|
#include <linux/kernel.h>
|
|
#include <linux/module.h>
|
|
#include <linux/mutex.h>
|
|
@@ -88,7 +90,6 @@ static int do_microcode_update(const voi
|
|
|
|
static int microcode_open(struct inode *unused1, struct file *unused2)
|
|
{
|
|
- cycle_kernel_lock();
|
|
return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
|
|
}
|
|
|
|
@@ -98,7 +99,7 @@ static ssize_t microcode_write(struct fi
|
|
ssize_t ret = -EINVAL;
|
|
|
|
if ((len >> PAGE_SHIFT) > totalram_pages) {
|
|
- pr_err("microcode: too much data (max %ld pages)\n", totalram_pages);
|
|
+ pr_err("too much data (max %ld pages)\n", totalram_pages);
|
|
return ret;
|
|
}
|
|
|
|
@@ -131,7 +132,7 @@ static int __init microcode_dev_init(voi
|
|
|
|
error = misc_register(µcode_dev);
|
|
if (error) {
|
|
- pr_err("microcode: can't misc_register on minor=%d\n", MICROCODE_MINOR);
|
|
+ pr_err("can't misc_register on minor=%d\n", MICROCODE_MINOR);
|
|
return error;
|
|
}
|
|
|
|
@@ -190,7 +191,7 @@ static int __init microcode_init(void)
|
|
else if (c->x86_vendor == X86_VENDOR_AMD)
|
|
fw_name = "amd-ucode/microcode_amd.bin";
|
|
else {
|
|
- pr_err("microcode: no support for this CPU vendor\n");
|
|
+ pr_err("no support for this CPU vendor\n");
|
|
return -ENODEV;
|
|
}
|
|
|
|
@@ -207,8 +208,7 @@ static int __init microcode_init(void)
|
|
request_microcode(fw_name);
|
|
|
|
pr_info("Microcode Update Driver: v" MICROCODE_VERSION
|
|
- " <tigran@aivazian.fsnet.co.uk>,"
|
|
- " Peter Oruba\n");
|
|
+ " <tigran@aivazian.fsnet.co.uk>, Peter Oruba\n");
|
|
|
|
return 0;
|
|
}
|
|
--- head-2011-03-17.orig/arch/x86/kernel/mpparse-xen.c 2011-02-01 14:54:13.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/mpparse-xen.c 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -375,13 +375,6 @@ static int __init smp_read_mpc(struct mp
|
|
x86_init.mpparse.mpc_record(1);
|
|
}
|
|
|
|
-#ifdef CONFIG_X86_BIGSMP
|
|
- generic_bigsmp_probe();
|
|
-#endif
|
|
-
|
|
- if (apic->setup_apic_routing)
|
|
- apic->setup_apic_routing();
|
|
-
|
|
if (!num_processors)
|
|
printk(KERN_ERR "MPTABLE: no processors registered!\n");
|
|
return num_processors;
|
|
@@ -694,37 +687,21 @@ void __init default_get_smp_config(unsig
|
|
}
|
|
|
|
#ifndef CONFIG_XEN
|
|
-static void __init smp_reserve_bootmem(struct mpf_intel *mpf)
|
|
+static void __init smp_reserve_memory(struct mpf_intel *mpf)
|
|
{
|
|
unsigned long size = get_mpc_size(mpf->physptr);
|
|
-#ifdef CONFIG_X86_32
|
|
- /*
|
|
- * We cannot access to MPC table to compute table size yet,
|
|
- * as only few megabytes from the bottom is mapped now.
|
|
- * PC-9800's MPC table places on the very last of physical
|
|
- * memory; so that simply reserving PAGE_SIZE from mpf->physptr
|
|
- * yields BUG() in reserve_bootmem.
|
|
- * also need to make sure physptr is below than max_low_pfn
|
|
- * we don't need reserve the area above max_low_pfn
|
|
- */
|
|
- unsigned long end = max_low_pfn * PAGE_SIZE;
|
|
-
|
|
- if (mpf->physptr < end) {
|
|
- if (mpf->physptr + size > end)
|
|
- size = end - mpf->physptr;
|
|
- reserve_bootmem_generic(mpf->physptr, size, BOOTMEM_DEFAULT);
|
|
- }
|
|
-#else
|
|
- reserve_bootmem_generic(mpf->physptr, size, BOOTMEM_DEFAULT);
|
|
-#endif
|
|
+
|
|
+ reserve_early(mpf->physptr, mpf->physptr+size, "MP-table mpc");
|
|
}
|
|
#endif
|
|
|
|
-static int __init smp_scan_config(unsigned long base, unsigned long length,
|
|
- unsigned reserve)
|
|
+static int __init smp_scan_config(unsigned long base, unsigned long length)
|
|
{
|
|
unsigned int *bp = _bus_to_virt(base);
|
|
struct mpf_intel *mpf;
|
|
+#ifndef CONFIG_XEN
|
|
+ unsigned long mem;
|
|
+#endif
|
|
|
|
apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n",
|
|
bp, length);
|
|
@@ -746,12 +723,10 @@ static int __init smp_scan_config(unsign
|
|
printk(KERN_INFO "found SMP MP-table at [%p] %llx\n",
|
|
mpf, (u64)virt_to_phys(mpf));
|
|
|
|
- if (!reserve)
|
|
- return 1;
|
|
- reserve_bootmem_generic(virt_to_phys(mpf), sizeof(*mpf),
|
|
- BOOTMEM_DEFAULT);
|
|
+ mem = virt_to_phys(mpf);
|
|
+ reserve_early(mem, mem + sizeof(*mpf), "MP-table mpf");
|
|
if (mpf->physptr)
|
|
- smp_reserve_bootmem(mpf);
|
|
+ smp_reserve_memory(mpf);
|
|
#else
|
|
printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n",
|
|
mpf, ((void *)bp - _bus_to_virt(base)) + base);
|
|
@@ -764,7 +739,7 @@ static int __init smp_scan_config(unsign
|
|
return 0;
|
|
}
|
|
|
|
-void __init default_find_smp_config(unsigned int reserve)
|
|
+void __init default_find_smp_config(void)
|
|
{
|
|
#ifndef CONFIG_XEN
|
|
unsigned int address;
|
|
@@ -778,9 +753,9 @@ void __init default_find_smp_config(unsi
|
|
* 2) Scan the top 1K of base RAM
|
|
* 3) Scan the 64K of bios
|
|
*/
|
|
- if (smp_scan_config(0x0, 0x400, reserve) ||
|
|
- smp_scan_config(639 * 0x400, 0x400, reserve) ||
|
|
- smp_scan_config(0xF0000, 0x10000, reserve))
|
|
+ if (smp_scan_config(0x0, 0x400) ||
|
|
+ smp_scan_config(639 * 0x400, 0x400) ||
|
|
+ smp_scan_config(0xF0000, 0x10000))
|
|
return;
|
|
/*
|
|
* If it is an SMP machine we should know now, unless the
|
|
@@ -802,7 +777,7 @@ void __init default_find_smp_config(unsi
|
|
#ifndef CONFIG_XEN
|
|
address = get_bios_ebda();
|
|
if (address)
|
|
- smp_scan_config(address, 0x400, reserve);
|
|
+ smp_scan_config(address, 0x400);
|
|
#endif
|
|
}
|
|
|
|
@@ -1001,9 +976,6 @@ void __init early_reserve_e820_mpc_new(v
|
|
{
|
|
if (enable_update_mptable && alloc_mptable) {
|
|
u64 startt = 0;
|
|
-#ifdef CONFIG_X86_TRAMPOLINE
|
|
- startt = TRAMPOLINE_BASE;
|
|
-#endif
|
|
mpc_new_phys = early_reserve_e820(startt, mpc_new_length, 4);
|
|
}
|
|
}
|
|
--- head-2011-03-17.orig/arch/x86/kernel/pci-dma-xen.c 2011-02-01 14:54:13.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/pci-dma-xen.c 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -11,10 +11,11 @@
|
|
#include <asm/gart.h>
|
|
#include <asm/calgary.h>
|
|
#include <asm/amd_iommu.h>
|
|
+#include <asm/x86_init.h>
|
|
|
|
static int forbid_dac __read_mostly;
|
|
|
|
-struct dma_map_ops *dma_ops;
|
|
+struct dma_map_ops *dma_ops = &nommu_dma_ops;
|
|
EXPORT_SYMBOL(dma_ops);
|
|
|
|
static int iommu_sac_force __read_mostly;
|
|
@@ -42,9 +43,6 @@ int iommu_detected __read_mostly = 0;
|
|
*/
|
|
int iommu_pass_through __read_mostly;
|
|
|
|
-dma_addr_t bad_dma_address __read_mostly = 0;
|
|
-EXPORT_SYMBOL(bad_dma_address);
|
|
-
|
|
/* Dummy device used for NULL arguments (normally ISA). */
|
|
struct device x86_dma_fallback_dev = {
|
|
.init_name = "fallback device",
|
|
@@ -143,20 +141,19 @@ void __init pci_iommu_alloc(void)
|
|
/* free the range so iommu could get some range less than 4G */
|
|
dma32_free_bootmem();
|
|
#endif
|
|
+ if (pci_swiotlb_detect())
|
|
+ goto out;
|
|
|
|
- /*
|
|
- * The order of these functions is important for
|
|
- * fall-back/fail-over reasons
|
|
- */
|
|
gart_iommu_hole_init();
|
|
|
|
detect_calgary();
|
|
|
|
detect_intel_iommu();
|
|
|
|
+ /* needs to be called after gart_iommu_hole_init */
|
|
amd_iommu_detect();
|
|
-
|
|
- swiotlb_init();
|
|
+out:
|
|
+ swiotlb_init(1);
|
|
if (swiotlb) {
|
|
printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n");
|
|
dma_ops = &swiotlb_dma_ops;
|
|
@@ -268,7 +265,7 @@ static __init int iommu_setup(char *p)
|
|
if (!strncmp(p, "allowdac", 8))
|
|
forbid_dac = 0;
|
|
if (!strncmp(p, "nodac", 5))
|
|
- forbid_dac = -1;
|
|
+ forbid_dac = 1;
|
|
if (!strncmp(p, "usedac", 6)) {
|
|
forbid_dac = -1;
|
|
return 1;
|
|
@@ -370,25 +367,19 @@ static int __init pci_iommu_init(void)
|
|
#ifdef CONFIG_PCI
|
|
dma_debug_add_bus(&pci_bus_type);
|
|
#endif
|
|
+ x86_init.iommu.iommu_init();
|
|
|
|
- calgary_iommu_init();
|
|
-
|
|
- intel_iommu_init();
|
|
-
|
|
- amd_iommu_init();
|
|
-
|
|
- gart_iommu_init();
|
|
+#ifndef CONFIG_XEN
|
|
+ if (swiotlb) {
|
|
+ printk(KERN_INFO "PCI-DMA: "
|
|
+ "Using software bounce buffering for IO (SWIOTLB)\n");
|
|
+ swiotlb_print_info();
|
|
+ } else
|
|
+ swiotlb_free();
|
|
+#endif
|
|
|
|
- no_iommu_init();
|
|
return 0;
|
|
}
|
|
-
|
|
-void pci_iommu_shutdown(void)
|
|
-{
|
|
- gart_iommu_shutdown();
|
|
-
|
|
- amd_iommu_shutdown();
|
|
-}
|
|
/* Must execute after PCI subsystem */
|
|
rootfs_initcall(pci_iommu_init);
|
|
|
|
--- head-2011-03-17.orig/arch/x86/kernel/pci-nommu-xen.c 2011-02-01 14:54:13.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/pci-nommu-xen.c 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -112,12 +112,3 @@ struct dma_map_ops nommu_dma_ops = {
|
|
.sync_sg_for_device = nommu_sync_sg_for_device,
|
|
.dma_supported = nommu_dma_supported,
|
|
};
|
|
-
|
|
-void __init no_iommu_init(void)
|
|
-{
|
|
- if (dma_ops)
|
|
- return;
|
|
-
|
|
- force_iommu = 0; /* no HW IOMMU */
|
|
- dma_ops = &nommu_dma_ops;
|
|
-}
|
|
--- head-2011-03-17.orig/arch/x86/kernel/process-xen.c 2011-03-03 16:07:49.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/process-xen.c 2011-03-03 16:09:35.000000000 +0100
|
|
@@ -9,7 +9,11 @@
|
|
#include <linux/pm.h>
|
|
#include <linux/clockchips.h>
|
|
#include <linux/random.h>
|
|
+#include <linux/user-return-notifier.h>
|
|
+#include <linux/dmi.h>
|
|
+#include <linux/utsname.h>
|
|
#include <trace/events/power.h>
|
|
+#include <linux/hw_breakpoint.h>
|
|
#include <asm/system.h>
|
|
#include <asm/apic.h>
|
|
#include <asm/syscalls.h>
|
|
@@ -17,6 +21,7 @@
|
|
#include <asm/uaccess.h>
|
|
#include <asm/i387.h>
|
|
#include <asm/ds.h>
|
|
+#include <asm/debugreg.h>
|
|
#include <xen/evtchn.h>
|
|
|
|
unsigned long idle_halt;
|
|
@@ -89,30 +94,30 @@ void exit_thread(void)
|
|
}
|
|
}
|
|
|
|
-void flush_thread(void)
|
|
+void show_regs_common(void)
|
|
{
|
|
- struct task_struct *tsk = current;
|
|
+ const char *board, *product;
|
|
|
|
-#ifdef CONFIG_X86_64
|
|
- if (test_tsk_thread_flag(tsk, TIF_ABI_PENDING)) {
|
|
- clear_tsk_thread_flag(tsk, TIF_ABI_PENDING);
|
|
- if (test_tsk_thread_flag(tsk, TIF_IA32)) {
|
|
- clear_tsk_thread_flag(tsk, TIF_IA32);
|
|
- } else {
|
|
- set_tsk_thread_flag(tsk, TIF_IA32);
|
|
- current_thread_info()->status |= TS_COMPAT;
|
|
- }
|
|
- }
|
|
-#endif
|
|
+ board = dmi_get_system_info(DMI_BOARD_NAME);
|
|
+ if (!board)
|
|
+ board = "";
|
|
+ product = dmi_get_system_info(DMI_PRODUCT_NAME);
|
|
+ if (!product)
|
|
+ product = "";
|
|
+
|
|
+ printk(KERN_CONT "\n");
|
|
+ printk(KERN_DEFAULT "Pid: %d, comm: %.20s %s %s %.*s %s/%s\n",
|
|
+ current->pid, current->comm, print_tainted(),
|
|
+ init_utsname()->release,
|
|
+ (int)strcspn(init_utsname()->version, " "),
|
|
+ init_utsname()->version, board, product);
|
|
+}
|
|
|
|
- clear_tsk_thread_flag(tsk, TIF_DEBUG);
|
|
+void flush_thread(void)
|
|
+{
|
|
+ struct task_struct *tsk = current;
|
|
|
|
- tsk->thread.debugreg0 = 0;
|
|
- tsk->thread.debugreg1 = 0;
|
|
- tsk->thread.debugreg2 = 0;
|
|
- tsk->thread.debugreg3 = 0;
|
|
- tsk->thread.debugreg6 = 0;
|
|
- tsk->thread.debugreg7 = 0;
|
|
+ flush_ptrace_hw_breakpoint(tsk);
|
|
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
|
|
/*
|
|
* Forget coprocessor state..
|
|
@@ -193,16 +198,6 @@ void __switch_to_xtra(struct task_struct
|
|
else if (next->debugctlmsr != prev->debugctlmsr)
|
|
update_debugctlmsr(next->debugctlmsr);
|
|
|
|
- if (test_tsk_thread_flag(next_p, TIF_DEBUG)) {
|
|
- set_debugreg(next->debugreg0, 0);
|
|
- set_debugreg(next->debugreg1, 1);
|
|
- set_debugreg(next->debugreg2, 2);
|
|
- set_debugreg(next->debugreg3, 3);
|
|
- /* no 4 and 5 */
|
|
- set_debugreg(next->debugreg6, 6);
|
|
- set_debugreg(next->debugreg7, 7);
|
|
- }
|
|
-
|
|
if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
|
|
test_tsk_thread_flag(next_p, TIF_NOTSC)) {
|
|
/* prev and next are different */
|
|
@@ -211,6 +206,7 @@ void __switch_to_xtra(struct task_struct
|
|
else
|
|
hard_enable_TSC();
|
|
}
|
|
+ propagate_user_return_notify(prev_p, next_p);
|
|
}
|
|
|
|
int sys_fork(struct pt_regs *regs)
|
|
@@ -234,6 +230,78 @@ int sys_vfork(struct pt_regs *regs)
|
|
NULL, NULL);
|
|
}
|
|
|
|
+long
|
|
+sys_clone(unsigned long clone_flags, unsigned long newsp,
|
|
+ void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
|
|
+{
|
|
+ if (!newsp)
|
|
+ newsp = regs->sp;
|
|
+ return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
|
|
+}
|
|
+
|
|
+/*
|
|
+ * This gets run with %si containing the
|
|
+ * function to call, and %di containing
|
|
+ * the "args".
|
|
+ */
|
|
+extern void kernel_thread_helper(void);
|
|
+
|
|
+/*
|
|
+ * Create a kernel thread
|
|
+ */
|
|
+int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
|
|
+{
|
|
+ struct pt_regs regs;
|
|
+
|
|
+ memset(®s, 0, sizeof(regs));
|
|
+
|
|
+ regs.si = (unsigned long) fn;
|
|
+ regs.di = (unsigned long) arg;
|
|
+
|
|
+#ifdef CONFIG_X86_32
|
|
+ regs.ds = __USER_DS;
|
|
+ regs.es = __USER_DS;
|
|
+ regs.fs = __KERNEL_PERCPU;
|
|
+ regs.gs = __KERNEL_STACK_CANARY;
|
|
+#else
|
|
+ regs.ss = __KERNEL_DS;
|
|
+#endif
|
|
+
|
|
+ regs.orig_ax = -1;
|
|
+ regs.ip = (unsigned long) kernel_thread_helper;
|
|
+ regs.cs = __KERNEL_CS | get_kernel_rpl();
|
|
+ regs.flags = X86_EFLAGS_IF | 0x2;
|
|
+
|
|
+ /* Ok, create the new process.. */
|
|
+ return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL);
|
|
+}
|
|
+EXPORT_SYMBOL(kernel_thread);
|
|
+
|
|
+/*
|
|
+ * sys_execve() executes a new program.
|
|
+ */
|
|
+long sys_execve(char __user *name, char __user * __user *argv,
|
|
+ char __user * __user *envp, struct pt_regs *regs)
|
|
+{
|
|
+ long error;
|
|
+ char *filename;
|
|
+
|
|
+ filename = getname(name);
|
|
+ error = PTR_ERR(filename);
|
|
+ if (IS_ERR(filename))
|
|
+ return error;
|
|
+ error = do_execve(filename, argv, envp, regs);
|
|
+
|
|
+#ifdef CONFIG_X86_32
|
|
+ if (error == 0) {
|
|
+ /* Make sure we don't return using sysenter.. */
|
|
+ set_thread_flag(TIF_IRET);
|
|
+ }
|
|
+#endif
|
|
+
|
|
+ putname(filename);
|
|
+ return error;
|
|
+}
|
|
|
|
/*
|
|
* Idle related variables and functions
|
|
--- head-2011-03-17.orig/arch/x86/kernel/process_32-xen.c 2011-02-02 08:38:03.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/process_32-xen.c 2011-02-02 08:47:07.000000000 +0100
|
|
@@ -23,7 +23,6 @@
|
|
#include <linux/vmalloc.h>
|
|
#include <linux/user.h>
|
|
#include <linux/interrupt.h>
|
|
-#include <linux/utsname.h>
|
|
#include <linux/delay.h>
|
|
#include <linux/reboot.h>
|
|
#include <linux/init.h>
|
|
@@ -35,7 +34,6 @@
|
|
#include <linux/tick.h>
|
|
#include <linux/percpu.h>
|
|
#include <linux/prctl.h>
|
|
-#include <linux/dmi.h>
|
|
#include <linux/ftrace.h>
|
|
#include <linux/uaccess.h>
|
|
#include <linux/io.h>
|
|
@@ -60,6 +58,7 @@
|
|
#include <asm/idle.h>
|
|
#include <asm/syscalls.h>
|
|
#include <asm/ds.h>
|
|
+#include <asm/debugreg.h>
|
|
|
|
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
|
|
asmlinkage void cstar_ret_from_fork(void) __asm__("cstar_ret_from_fork");
|
|
@@ -130,39 +129,29 @@ void __show_regs(struct pt_regs *regs, i
|
|
unsigned long d0, d1, d2, d3, d6, d7;
|
|
unsigned long sp;
|
|
unsigned short ss, gs;
|
|
- const char *board;
|
|
|
|
if (user_mode_vm(regs)) {
|
|
sp = regs->sp;
|
|
ss = regs->ss & 0xffff;
|
|
gs = get_user_gs(regs);
|
|
} else {
|
|
- sp = (unsigned long) (®s->sp);
|
|
+ sp = kernel_stack_pointer(regs);
|
|
savesegment(ss, ss);
|
|
savesegment(gs, gs);
|
|
}
|
|
|
|
- printk("\n");
|
|
+ show_regs_common();
|
|
|
|
- board = dmi_get_system_info(DMI_PRODUCT_NAME);
|
|
- if (!board)
|
|
- board = "";
|
|
- printk("Pid: %d, comm: %s %s (%s %.*s) %s\n",
|
|
- task_pid_nr(current), current->comm,
|
|
- print_tainted(), init_utsname()->release,
|
|
- (int)strcspn(init_utsname()->version, " "),
|
|
- init_utsname()->version, board);
|
|
-
|
|
- printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
|
|
+ printk(KERN_DEFAULT "EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n",
|
|
(u16)regs->cs, regs->ip, regs->flags,
|
|
smp_processor_id());
|
|
print_symbol("EIP is at %s\n", regs->ip);
|
|
|
|
- printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
|
|
+ printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n",
|
|
regs->ax, regs->bx, regs->cx, regs->dx);
|
|
- printk("ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n",
|
|
+ printk(KERN_DEFAULT "ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n",
|
|
regs->si, regs->di, regs->bp, sp);
|
|
- printk(" DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n",
|
|
+ printk(KERN_DEFAULT " DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n",
|
|
(u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss);
|
|
|
|
if (!all)
|
|
@@ -172,61 +161,28 @@ void __show_regs(struct pt_regs *regs, i
|
|
cr2 = read_cr2();
|
|
cr3 = read_cr3();
|
|
cr4 = read_cr4_safe();
|
|
- printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n",
|
|
+ printk(KERN_DEFAULT "CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n",
|
|
cr0, cr2, cr3, cr4);
|
|
|
|
get_debugreg(d0, 0);
|
|
get_debugreg(d1, 1);
|
|
get_debugreg(d2, 2);
|
|
get_debugreg(d3, 3);
|
|
- printk("DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n",
|
|
+ printk(KERN_DEFAULT "DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n",
|
|
d0, d1, d2, d3);
|
|
|
|
get_debugreg(d6, 6);
|
|
get_debugreg(d7, 7);
|
|
- printk("DR6: %08lx DR7: %08lx\n",
|
|
+ printk(KERN_DEFAULT "DR6: %08lx DR7: %08lx\n",
|
|
d6, d7);
|
|
}
|
|
|
|
void show_regs(struct pt_regs *regs)
|
|
{
|
|
- __show_regs(regs, 1);
|
|
+ show_registers(regs);
|
|
show_trace(NULL, regs, ®s->sp, regs->bp);
|
|
}
|
|
|
|
-/*
|
|
- * This gets run with %bx containing the
|
|
- * function to call, and %dx containing
|
|
- * the "args".
|
|
- */
|
|
-extern void kernel_thread_helper(void);
|
|
-
|
|
-/*
|
|
- * Create a kernel thread
|
|
- */
|
|
-int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
|
|
-{
|
|
- struct pt_regs regs;
|
|
-
|
|
- memset(®s, 0, sizeof(regs));
|
|
-
|
|
- regs.bx = (unsigned long) fn;
|
|
- regs.dx = (unsigned long) arg;
|
|
-
|
|
- regs.ds = __USER_DS;
|
|
- regs.es = __USER_DS;
|
|
- regs.fs = __KERNEL_PERCPU;
|
|
- regs.gs = __KERNEL_STACK_CANARY;
|
|
- regs.orig_ax = -1;
|
|
- regs.ip = (unsigned long) kernel_thread_helper;
|
|
- regs.cs = __KERNEL_CS | get_kernel_rpl();
|
|
- regs.flags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
|
|
-
|
|
- /* Ok, create the new process.. */
|
|
- return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, ®s, 0, NULL, NULL);
|
|
-}
|
|
-EXPORT_SYMBOL(kernel_thread);
|
|
-
|
|
void release_thread(struct task_struct *dead_task)
|
|
{
|
|
BUG_ON(dead_task->mm);
|
|
@@ -262,7 +218,12 @@ int copy_thread(unsigned long clone_flag
|
|
|
|
task_user_gs(p) = get_user_gs(regs);
|
|
|
|
+ p->thread.io_bitmap_ptr = NULL;
|
|
tsk = current;
|
|
+ err = -ENOMEM;
|
|
+
|
|
+ memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
|
|
+
|
|
if (test_tsk_thread_flag(tsk, TIF_CSTAR))
|
|
p->thread.ip = (unsigned long) cstar_ret_from_fork;
|
|
if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
|
|
@@ -497,46 +458,6 @@ __switch_to(struct task_struct *prev_p,
|
|
return prev_p;
|
|
}
|
|
|
|
-int sys_clone(struct pt_regs *regs)
|
|
-{
|
|
- unsigned long clone_flags;
|
|
- unsigned long newsp;
|
|
- int __user *parent_tidptr, *child_tidptr;
|
|
-
|
|
- clone_flags = regs->bx;
|
|
- newsp = regs->cx;
|
|
- parent_tidptr = (int __user *)regs->dx;
|
|
- child_tidptr = (int __user *)regs->di;
|
|
- if (!newsp)
|
|
- newsp = regs->sp;
|
|
- return do_fork(clone_flags, newsp, regs, 0, parent_tidptr, child_tidptr);
|
|
-}
|
|
-
|
|
-/*
|
|
- * sys_execve() executes a new program.
|
|
- */
|
|
-int sys_execve(struct pt_regs *regs)
|
|
-{
|
|
- int error;
|
|
- char *filename;
|
|
-
|
|
- filename = getname((char __user *) regs->bx);
|
|
- error = PTR_ERR(filename);
|
|
- if (IS_ERR(filename))
|
|
- goto out;
|
|
- error = do_execve(filename,
|
|
- (char __user * __user *) regs->cx,
|
|
- (char __user * __user *) regs->dx,
|
|
- regs);
|
|
- if (error == 0) {
|
|
- /* Make sure we don't return using sysenter.. */
|
|
- set_thread_flag(TIF_IRET);
|
|
- }
|
|
- putname(filename);
|
|
-out:
|
|
- return error;
|
|
-}
|
|
-
|
|
#define top_esp (THREAD_SIZE - sizeof(unsigned long))
|
|
#define top_ebp (THREAD_SIZE - 2*sizeof(unsigned long))
|
|
|
|
--- head-2011-03-17.orig/arch/x86/kernel/process_64-xen.c 2011-02-02 08:37:59.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/process_64-xen.c 2011-02-02 08:47:12.000000000 +0100
|
|
@@ -29,7 +29,6 @@
|
|
#include <linux/slab.h>
|
|
#include <linux/user.h>
|
|
#include <linux/interrupt.h>
|
|
-#include <linux/utsname.h>
|
|
#include <linux/delay.h>
|
|
#include <linux/module.h>
|
|
#include <linux/ptrace.h>
|
|
@@ -41,7 +40,6 @@
|
|
#include <linux/uaccess.h>
|
|
#include <linux/io.h>
|
|
#include <linux/ftrace.h>
|
|
-#include <linux/dmi.h>
|
|
|
|
#include <asm/pgtable.h>
|
|
#include <asm/system.h>
|
|
@@ -57,13 +55,12 @@
|
|
#include <asm/idle.h>
|
|
#include <asm/syscalls.h>
|
|
#include <asm/ds.h>
|
|
+#include <asm/debugreg.h>
|
|
|
|
asmlinkage extern void ret_from_fork(void);
|
|
|
|
static DEFINE_PER_CPU(unsigned char, is_idle);
|
|
|
|
-unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
|
|
-
|
|
static ATOMIC_NOTIFIER_HEAD(idle_notifier);
|
|
|
|
void idle_notifier_register(struct notifier_block *n)
|
|
@@ -166,31 +163,21 @@ void __show_regs(struct pt_regs *regs, i
|
|
unsigned long d0, d1, d2, d3, d6, d7;
|
|
unsigned int fsindex, gsindex;
|
|
unsigned int ds, cs, es;
|
|
- const char *board;
|
|
|
|
- printk("\n");
|
|
- print_modules();
|
|
- board = dmi_get_system_info(DMI_PRODUCT_NAME);
|
|
- if (!board)
|
|
- board = "";
|
|
- printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s\n",
|
|
- current->pid, current->comm, print_tainted(),
|
|
- init_utsname()->release,
|
|
- (int)strcspn(init_utsname()->version, " "),
|
|
- init_utsname()->version, board);
|
|
- printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
|
|
+ show_regs_common();
|
|
+ printk(KERN_DEFAULT "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
|
|
printk_address(regs->ip, 1);
|
|
- printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss,
|
|
+ printk(KERN_DEFAULT "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss,
|
|
regs->sp, regs->flags);
|
|
- printk(KERN_INFO "RAX: %016lx RBX: %016lx RCX: %016lx\n",
|
|
+ printk(KERN_DEFAULT "RAX: %016lx RBX: %016lx RCX: %016lx\n",
|
|
regs->ax, regs->bx, regs->cx);
|
|
- printk(KERN_INFO "RDX: %016lx RSI: %016lx RDI: %016lx\n",
|
|
+ printk(KERN_DEFAULT "RDX: %016lx RSI: %016lx RDI: %016lx\n",
|
|
regs->dx, regs->si, regs->di);
|
|
- printk(KERN_INFO "RBP: %016lx R08: %016lx R09: %016lx\n",
|
|
+ printk(KERN_DEFAULT "RBP: %016lx R08: %016lx R09: %016lx\n",
|
|
regs->bp, regs->r8, regs->r9);
|
|
- printk(KERN_INFO "R10: %016lx R11: %016lx R12: %016lx\n",
|
|
+ printk(KERN_DEFAULT "R10: %016lx R11: %016lx R12: %016lx\n",
|
|
regs->r10, regs->r11, regs->r12);
|
|
- printk(KERN_INFO "R13: %016lx R14: %016lx R15: %016lx\n",
|
|
+ printk(KERN_DEFAULT "R13: %016lx R14: %016lx R15: %016lx\n",
|
|
regs->r13, regs->r14, regs->r15);
|
|
|
|
asm("movl %%ds,%0" : "=r" (ds));
|
|
@@ -211,27 +198,26 @@ void __show_regs(struct pt_regs *regs, i
|
|
cr3 = read_cr3();
|
|
cr4 = read_cr4();
|
|
|
|
- printk(KERN_INFO "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
|
|
+ printk(KERN_DEFAULT "FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
|
|
fs, fsindex, gs, gsindex, shadowgs);
|
|
- printk(KERN_INFO "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
|
|
+ printk(KERN_DEFAULT "CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds,
|
|
es, cr0);
|
|
- printk(KERN_INFO "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
|
|
+ printk(KERN_DEFAULT "CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3,
|
|
cr4);
|
|
|
|
get_debugreg(d0, 0);
|
|
get_debugreg(d1, 1);
|
|
get_debugreg(d2, 2);
|
|
- printk(KERN_INFO "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
|
|
+ printk(KERN_DEFAULT "DR0: %016lx DR1: %016lx DR2: %016lx\n", d0, d1, d2);
|
|
get_debugreg(d3, 3);
|
|
get_debugreg(d6, 6);
|
|
get_debugreg(d7, 7);
|
|
- printk(KERN_INFO "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
|
|
+ printk(KERN_DEFAULT "DR3: %016lx DR6: %016lx DR7: %016lx\n", d3, d6, d7);
|
|
}
|
|
|
|
void show_regs(struct pt_regs *regs)
|
|
{
|
|
- printk(KERN_INFO "CPU %d:", smp_processor_id());
|
|
- __show_regs(regs, 1);
|
|
+ show_registers(regs);
|
|
show_trace(NULL, regs, (void *)(regs + 1), regs->bp);
|
|
}
|
|
|
|
@@ -239,6 +225,7 @@ void xen_load_gs_index(unsigned gs)
|
|
{
|
|
WARN_ON(HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, gs));
|
|
}
|
|
+EXPORT_SYMBOL(xen_load_gs_index);
|
|
|
|
void release_thread(struct task_struct *dead_task)
|
|
{
|
|
@@ -294,8 +281,9 @@ int copy_thread(unsigned long clone_flag
|
|
*childregs = *regs;
|
|
|
|
childregs->ax = 0;
|
|
- childregs->sp = sp;
|
|
- if (sp == ~0UL)
|
|
+ if (user_mode(regs))
|
|
+ childregs->sp = sp;
|
|
+ else
|
|
childregs->sp = (unsigned long)childregs;
|
|
|
|
p->thread.sp = (unsigned long) childregs;
|
|
@@ -305,12 +293,16 @@ int copy_thread(unsigned long clone_flag
|
|
|
|
p->thread.fs = me->thread.fs;
|
|
p->thread.gs = me->thread.gs;
|
|
+ p->thread.io_bitmap_ptr = NULL;
|
|
|
|
savesegment(gs, p->thread.gsindex);
|
|
savesegment(fs, p->thread.fsindex);
|
|
savesegment(es, p->thread.es);
|
|
savesegment(ds, p->thread.ds);
|
|
|
|
+ err = -ENOMEM;
|
|
+ memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
|
|
+
|
|
if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
|
|
p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
|
|
if (!p->thread.io_bitmap_ptr) {
|
|
@@ -350,28 +342,45 @@ out:
|
|
kfree(p->thread.io_bitmap_ptr);
|
|
p->thread.io_bitmap_max = 0;
|
|
}
|
|
+
|
|
return err;
|
|
}
|
|
|
|
-void
|
|
-start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
|
|
+static void
|
|
+start_thread_common(struct pt_regs *regs, unsigned long new_ip,
|
|
+ unsigned long new_sp,
|
|
+ unsigned int _cs, unsigned int _ss, unsigned int _ds)
|
|
{
|
|
loadsegment(fs, 0);
|
|
- loadsegment(es, 0);
|
|
- loadsegment(ds, 0);
|
|
+ loadsegment(es, _ds);
|
|
+ loadsegment(ds, _ds);
|
|
load_gs_index(0);
|
|
regs->ip = new_ip;
|
|
regs->sp = new_sp;
|
|
- regs->cs = __USER_CS;
|
|
- regs->ss = __USER_DS;
|
|
- regs->flags = 0x200;
|
|
+ regs->cs = _cs;
|
|
+ regs->ss = _ss;
|
|
+ regs->flags = X86_EFLAGS_IF;
|
|
set_fs(USER_DS);
|
|
/*
|
|
* Free the old FP and other extended state
|
|
*/
|
|
free_thread_xstate(current);
|
|
}
|
|
-EXPORT_SYMBOL_GPL(start_thread);
|
|
+
|
|
+void
|
|
+start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
|
|
+{
|
|
+ start_thread_common(regs, new_ip, new_sp,
|
|
+ __USER_CS, __USER_DS, 0);
|
|
+}
|
|
+
|
|
+#ifdef CONFIG_IA32_EMULATION
|
|
+void start_thread_ia32(struct pt_regs *regs, u32 new_ip, u32 new_sp)
|
|
+{
|
|
+ start_thread_common(regs, new_ip, new_sp,
|
|
+ __USER32_CS, __USER32_DS, __USER32_DS);
|
|
+}
|
|
+#endif
|
|
|
|
/*
|
|
* switch_to(x,y) should switch tasks from x to y.
|
|
@@ -561,26 +570,8 @@ __switch_to(struct task_struct *prev_p,
|
|
*/
|
|
if (preload_fpu)
|
|
__math_state_restore();
|
|
- return prev_p;
|
|
-}
|
|
|
|
-/*
|
|
- * sys_execve() executes a new program.
|
|
- */
|
|
-asmlinkage
|
|
-long sys_execve(char __user *name, char __user * __user *argv,
|
|
- char __user * __user *envp, struct pt_regs *regs)
|
|
-{
|
|
- long error;
|
|
- char *filename;
|
|
-
|
|
- filename = getname(name);
|
|
- error = PTR_ERR(filename);
|
|
- if (IS_ERR(filename))
|
|
- return error;
|
|
- error = do_execve(filename, argv, envp, regs);
|
|
- putname(filename);
|
|
- return error;
|
|
+ return prev_p;
|
|
}
|
|
|
|
void set_personality_64bit(void)
|
|
@@ -597,13 +588,16 @@ void set_personality_64bit(void)
|
|
current->personality &= ~READ_IMPLIES_EXEC;
|
|
}
|
|
|
|
-asmlinkage long
|
|
-sys_clone(unsigned long clone_flags, unsigned long newsp,
|
|
- void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
|
|
-{
|
|
- if (!newsp)
|
|
- newsp = regs->sp;
|
|
- return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
|
|
+void set_personality_ia32(void)
|
|
+{
|
|
+ /* inherit personality from parent */
|
|
+
|
|
+ /* Make sure to be in 32bit mode */
|
|
+ set_thread_flag(TIF_IA32);
|
|
+ current->personality |= force_personality32;
|
|
+
|
|
+ /* Prepare the first "return" to user space */
|
|
+ current_thread_info()->status |= TS_COMPAT;
|
|
}
|
|
|
|
unsigned long get_wchan(struct task_struct *p)
|
|
--- head-2011-03-17.orig/arch/x86/kernel/setup-xen.c 2011-03-04 15:09:48.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/setup-xen.c 2011-03-03 16:24:24.000000000 +0100
|
|
@@ -73,6 +73,7 @@
|
|
|
|
#include <asm/mtrr.h>
|
|
#include <asm/apic.h>
|
|
+#include <asm/trampoline.h>
|
|
#include <asm/e820.h>
|
|
#include <asm/mpspec.h>
|
|
#include <asm/setup.h>
|
|
@@ -106,9 +107,11 @@
|
|
#include <asm/percpu.h>
|
|
#include <asm/topology.h>
|
|
#include <asm/apicdef.h>
|
|
+#include <asm/k8.h>
|
|
#ifdef CONFIG_X86_64
|
|
#include <asm/numa_64.h>
|
|
#endif
|
|
+#include <asm/mce.h>
|
|
|
|
#ifdef CONFIG_XEN
|
|
#include <asm/hypervisor.h>
|
|
@@ -281,7 +284,7 @@ EXPORT_SYMBOL(edd);
|
|
* from boot_params into a safe place.
|
|
*
|
|
*/
|
|
-static inline void copy_edd(void)
|
|
+static inline void __init copy_edd(void)
|
|
{
|
|
memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer,
|
|
sizeof(edd.mbr_signature));
|
|
@@ -291,7 +294,7 @@ static inline void copy_edd(void)
|
|
}
|
|
#endif
|
|
#else
|
|
-static inline void copy_edd(void)
|
|
+static inline void __init copy_edd(void)
|
|
{
|
|
}
|
|
#endif
|
|
@@ -541,49 +544,18 @@ static void __init reserve_early_setup_d
|
|
#endif
|
|
}
|
|
|
|
+#ifndef CONFIG_XEN
|
|
/*
|
|
* --------- Crashkernel reservation ------------------------------
|
|
*/
|
|
|
|
#ifdef CONFIG_KEXEC
|
|
|
|
-#ifndef CONFIG_XEN
|
|
-/**
|
|
- * Reserve @size bytes of crashkernel memory at any suitable offset.
|
|
- *
|
|
- * @size: Size of the crashkernel memory to reserve.
|
|
- * Returns the base address on success, and -1ULL on failure.
|
|
- */
|
|
-static
|
|
-unsigned long long __init find_and_reserve_crashkernel(unsigned long long size)
|
|
-{
|
|
- const unsigned long long alignment = 16<<20; /* 16M */
|
|
- unsigned long long start = 0LL;
|
|
-
|
|
- while (1) {
|
|
- int ret;
|
|
-
|
|
- start = find_e820_area(start, ULONG_MAX, size, alignment);
|
|
- if (start == -1ULL)
|
|
- return start;
|
|
-
|
|
- /* try to reserve it */
|
|
- ret = reserve_bootmem_generic(start, size, BOOTMEM_EXCLUSIVE);
|
|
- if (ret >= 0)
|
|
- return start;
|
|
-
|
|
- start += alignment;
|
|
- }
|
|
-}
|
|
-
|
|
static inline unsigned long long get_total_mem(void)
|
|
{
|
|
unsigned long long total;
|
|
|
|
- total = max_low_pfn - min_low_pfn;
|
|
-#ifdef CONFIG_HIGHMEM
|
|
- total += highend_pfn - highstart_pfn;
|
|
-#endif
|
|
+ total = max_pfn - min_low_pfn;
|
|
|
|
return total << PAGE_SHIFT;
|
|
}
|
|
@@ -603,21 +575,25 @@ static void __init reserve_crashkernel(v
|
|
|
|
/* 0 means: find the address automatically */
|
|
if (crash_base <= 0) {
|
|
- crash_base = find_and_reserve_crashkernel(crash_size);
|
|
+ const unsigned long long alignment = 16<<20; /* 16M */
|
|
+
|
|
+ crash_base = find_e820_area(alignment, ULONG_MAX, crash_size,
|
|
+ alignment);
|
|
if (crash_base == -1ULL) {
|
|
- pr_info("crashkernel reservation failed. "
|
|
- "No suitable area found.\n");
|
|
+ pr_info("crashkernel reservation failed - No suitable area found.\n");
|
|
return;
|
|
}
|
|
} else {
|
|
- ret = reserve_bootmem_generic(crash_base, crash_size,
|
|
- BOOTMEM_EXCLUSIVE);
|
|
- if (ret < 0) {
|
|
- pr_info("crashkernel reservation failed - "
|
|
- "memory is in use\n");
|
|
+ unsigned long long start;
|
|
+
|
|
+ start = find_e820_area(crash_base, ULONG_MAX, crash_size,
|
|
+ 1<<20);
|
|
+ if (start != crash_base) {
|
|
+ pr_info("crashkernel reservation failed - memory is in use.\n");
|
|
return;
|
|
}
|
|
}
|
|
+ reserve_early(crash_base, crash_base + crash_size, "CRASH KERNEL");
|
|
|
|
printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
|
|
"for crashkernel (System RAM: %ldMB)\n",
|
|
@@ -630,13 +606,11 @@ static void __init reserve_crashkernel(v
|
|
insert_resource(&iomem_resource, &crashk_res);
|
|
}
|
|
#else
|
|
-#define reserve_crashkernel xen_machine_kexec_setup_resources
|
|
-#endif
|
|
-#else
|
|
static void __init reserve_crashkernel(void)
|
|
{
|
|
}
|
|
#endif
|
|
+#endif /* CONFIG_XEN */
|
|
|
|
static struct resource standard_io_resources[] = {
|
|
{ .name = "dma1", .start = 0x00, .end = 0x1f,
|
|
@@ -735,19 +709,27 @@ static struct dmi_system_id __initdata b
|
|
DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix/MSC"),
|
|
},
|
|
},
|
|
- {
|
|
/*
|
|
- * AMI BIOS with low memory corruption was found on Intel DG45ID board.
|
|
- * It hase different DMI_BIOS_VENDOR = "Intel Corp.", for now we will
|
|
+ * AMI BIOS with low memory corruption was found on Intel DG45ID and
|
|
+ * DG45FC boards.
|
|
+ * It has a different DMI_BIOS_VENDOR = "Intel Corp.", for now we will
|
|
* match only DMI_BOARD_NAME and see if there is more bad products
|
|
* with this vendor.
|
|
*/
|
|
+ {
|
|
.callback = dmi_low_memory_corruption,
|
|
.ident = "AMI BIOS",
|
|
.matches = {
|
|
DMI_MATCH(DMI_BOARD_NAME, "DG45ID"),
|
|
},
|
|
},
|
|
+ {
|
|
+ .callback = dmi_low_memory_corruption,
|
|
+ .ident = "AMI BIOS",
|
|
+ .matches = {
|
|
+ DMI_MATCH(DMI_BOARD_NAME, "DG45FC"),
|
|
+ },
|
|
+ },
|
|
#endif
|
|
{}
|
|
};
|
|
@@ -767,6 +749,8 @@ static struct dmi_system_id __initdata b
|
|
|
|
void __init setup_arch(char **cmdline_p)
|
|
{
|
|
+ int acpi = 0;
|
|
+ int k8 = 0;
|
|
#ifdef CONFIG_XEN
|
|
unsigned int i;
|
|
unsigned long p2m_pages;
|
|
@@ -903,21 +887,18 @@ void __init setup_arch(char **cmdline_p)
|
|
strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
|
|
*cmdline_p = command_line;
|
|
|
|
-#ifdef CONFIG_X86_64
|
|
/*
|
|
- * Must call this twice: Once just to detect whether hardware doesn't
|
|
- * support NX (so that the early EHCI debug console setup can safely
|
|
- * call set_fixmap(), and then again after parsing early parameters to
|
|
- * honor the respective command line option.
|
|
+ * x86_configure_nx() is called before parse_early_param() to detect
|
|
+ * whether hardware doesn't support NX (so that the early EHCI debug
|
|
+ * console setup can safely call set_fixmap()). It may then be called
|
|
+ * again from within noexec_setup() during parsing early parameters
|
|
+ * to honor the respective command line option.
|
|
*/
|
|
- check_efer();
|
|
-#endif
|
|
+ x86_configure_nx();
|
|
|
|
parse_early_param();
|
|
|
|
-#ifdef CONFIG_X86_64
|
|
- check_efer();
|
|
-#endif
|
|
+ x86_report_nx();
|
|
|
|
/* Must be before kernel pagetables are setup */
|
|
vmi_activate();
|
|
@@ -1024,6 +1005,20 @@ void __init setup_arch(char **cmdline_p)
|
|
|
|
reserve_brk();
|
|
|
|
+ /*
|
|
+ * Find and reserve possible boot-time SMP configuration:
|
|
+ */
|
|
+ find_smp_config();
|
|
+
|
|
+ reserve_trampoline_memory();
|
|
+
|
|
+#ifdef CONFIG_ACPI_SLEEP
|
|
+ /*
|
|
+ * Reserve low memory region for sleep support.
|
|
+ * even before init_memory_mapping
|
|
+ */
|
|
+ acpi_reserve_wakeup_memory();
|
|
+#endif
|
|
init_gbpages();
|
|
|
|
/* max_pfn_mapped is updated here */
|
|
@@ -1051,6 +1046,8 @@ void __init setup_arch(char **cmdline_p)
|
|
reserve_initrd();
|
|
|
|
#ifndef CONFIG_XEN
|
|
+ reserve_crashkernel();
|
|
+
|
|
vsmp_init();
|
|
#endif
|
|
|
|
@@ -1074,23 +1071,15 @@ void __init setup_arch(char **cmdline_p)
|
|
/*
|
|
* Parse SRAT to discover nodes.
|
|
*/
|
|
- acpi_numa_init();
|
|
+ acpi = acpi_numa_init();
|
|
#endif
|
|
|
|
- initmem_init(0, max_pfn);
|
|
-
|
|
-#ifdef CONFIG_ACPI_SLEEP
|
|
- /*
|
|
- * Reserve low memory region for sleep support.
|
|
- */
|
|
- acpi_reserve_bootmem();
|
|
+#ifdef CONFIG_K8_NUMA
|
|
+ if (!acpi)
|
|
+ k8 = !k8_numa_init(0, max_pfn);
|
|
#endif
|
|
- /*
|
|
- * Find and reserve possible boot-time SMP configuration:
|
|
- */
|
|
- find_smp_config();
|
|
|
|
- reserve_crashkernel();
|
|
+ initmem_init(0, max_pfn, acpi, k8);
|
|
|
|
#if defined(CONFIG_X86_64) && !defined(CONFIG_XEN)
|
|
/*
|
|
@@ -1118,6 +1107,9 @@ void __init setup_arch(char **cmdline_p)
|
|
#endif
|
|
|
|
#ifdef CONFIG_XEN
|
|
+#ifdef CONFIG_KEXEC
|
|
+ xen_machine_kexec_setup_resources();
|
|
+#endif
|
|
p2m_pages = max_pfn;
|
|
if (xen_start_info->nr_pages > max_pfn) {
|
|
/*
|
|
@@ -1260,6 +1252,8 @@ void __init setup_arch(char **cmdline_p)
|
|
#endif
|
|
#endif /* CONFIG_XEN */
|
|
x86_init.oem.banner();
|
|
+
|
|
+ mcheck_init();
|
|
}
|
|
|
|
#ifdef CONFIG_X86_32
|
|
--- head-2011-03-17.orig/arch/x86/kernel/time-xen.c 2011-02-01 14:54:13.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/time-xen.c 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -937,28 +937,23 @@ core_initcall(cpufreq_time_setup);
|
|
*/
|
|
static ctl_table xen_subtable[] = {
|
|
{
|
|
- .ctl_name = CTL_XEN_INDEPENDENT_WALLCLOCK,
|
|
.procname = "independent_wallclock",
|
|
.data = &independent_wallclock,
|
|
.maxlen = sizeof(independent_wallclock),
|
|
.mode = 0644,
|
|
- .strategy = sysctl_data,
|
|
.proc_handler = proc_dointvec
|
|
},
|
|
{
|
|
- .ctl_name = CTL_XEN_PERMITTED_CLOCK_JITTER,
|
|
.procname = "permitted_clock_jitter",
|
|
.data = &permitted_clock_jitter,
|
|
.maxlen = sizeof(permitted_clock_jitter),
|
|
.mode = 0644,
|
|
- .strategy = sysctl_data,
|
|
.proc_handler = proc_doulongvec_minmax
|
|
},
|
|
{ }
|
|
};
|
|
static ctl_table xen_table[] = {
|
|
{
|
|
- .ctl_name = CTL_XEN,
|
|
.procname = "xen",
|
|
.mode = 0555,
|
|
.child = xen_subtable
|
|
--- head-2011-03-17.orig/arch/x86/kernel/traps-xen.c 2011-02-16 13:56:25.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/traps-xen.c 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -522,77 +522,56 @@ asmlinkage __kprobes struct pt_regs *syn
|
|
dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
|
|
{
|
|
struct task_struct *tsk = current;
|
|
- unsigned long condition;
|
|
+ unsigned long dr6;
|
|
int si_code;
|
|
|
|
- get_debugreg(condition, 6);
|
|
+ get_debugreg(dr6, 6);
|
|
|
|
/* Catch kmemcheck conditions first of all! */
|
|
- if (condition & DR_STEP && kmemcheck_trap(regs))
|
|
+ if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
|
|
return;
|
|
|
|
+ /* DR6 may or may not be cleared by the CPU */
|
|
+ set_debugreg(0, 6);
|
|
/*
|
|
* The processor cleared BTF, so don't mark that we need it set.
|
|
*/
|
|
clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR);
|
|
tsk->thread.debugctlmsr = 0;
|
|
|
|
- if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
|
|
- SIGTRAP) == NOTIFY_STOP)
|
|
+ /* Store the virtualized DR6 value */
|
|
+ tsk->thread.debugreg6 = dr6;
|
|
+
|
|
+ if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code,
|
|
+ SIGTRAP) == NOTIFY_STOP)
|
|
return;
|
|
|
|
/* It's safe to allow irq's after DR6 has been saved */
|
|
preempt_conditional_sti(regs);
|
|
|
|
- /* Mask out spurious debug traps due to lazy DR7 setting */
|
|
- if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
|
|
- if (!tsk->thread.debugreg7)
|
|
- goto clear_dr7;
|
|
- }
|
|
-
|
|
-#ifdef CONFIG_X86_32
|
|
- if (regs->flags & X86_VM_MASK)
|
|
- goto debug_vm86;
|
|
-#endif
|
|
-
|
|
- /* Save debug status register where ptrace can see it */
|
|
- tsk->thread.debugreg6 = condition;
|
|
-
|
|
- /*
|
|
- * Single-stepping through TF: make sure we ignore any events in
|
|
- * kernel space (but re-enable TF when returning to user mode).
|
|
- */
|
|
- if (condition & DR_STEP) {
|
|
- if (!user_mode(regs))
|
|
- goto clear_TF_reenable;
|
|
+ if (regs->flags & X86_VM_MASK) {
|
|
+ handle_vm86_trap((struct kernel_vm86_regs *) regs,
|
|
+ error_code, 1);
|
|
+ return;
|
|
}
|
|
|
|
- si_code = get_si_code(condition);
|
|
- /* Ok, finally something we can handle */
|
|
- send_sigtrap(tsk, regs, error_code, si_code);
|
|
-
|
|
/*
|
|
- * Disable additional traps. They'll be re-enabled when
|
|
- * the signal is delivered.
|
|
+ * Single-stepping through system calls: ignore any exceptions in
|
|
+ * kernel space, but re-enable TF when returning to user mode.
|
|
+ *
|
|
+ * We already checked v86 mode above, so we can check for kernel mode
|
|
+ * by just checking the CPL of CS.
|
|
*/
|
|
-clear_dr7:
|
|
- set_debugreg(0, 7);
|
|
+ if ((dr6 & DR_STEP) && !user_mode(regs)) {
|
|
+ tsk->thread.debugreg6 &= ~DR_STEP;
|
|
+ set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
|
|
+ regs->flags &= ~X86_EFLAGS_TF;
|
|
+ }
|
|
+ si_code = get_si_code(tsk->thread.debugreg6);
|
|
+ if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS))
|
|
+ send_sigtrap(tsk, regs, error_code, si_code);
|
|
preempt_conditional_cli(regs);
|
|
- return;
|
|
|
|
-#ifdef CONFIG_X86_32
|
|
-debug_vm86:
|
|
- /* reenable preemption: handle_vm86_trap() might sleep */
|
|
- dec_preempt_count();
|
|
- handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, 1);
|
|
- conditional_cli(regs);
|
|
- return;
|
|
-#endif
|
|
-
|
|
-clear_TF_reenable:
|
|
- set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
|
|
- regs->flags &= ~X86_EFLAGS_TF;
|
|
- preempt_conditional_cli(regs);
|
|
return;
|
|
}
|
|
|
|
--- head-2011-03-17.orig/arch/x86/kernel/vmlinux.lds.S 2011-02-01 14:44:12.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/vmlinux.lds.S 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -43,7 +43,7 @@ ENTRY(phys_startup_64)
|
|
jiffies_64 = jiffies;
|
|
#endif
|
|
|
|
-#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)
|
|
+#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) && !defined(CONFIG_XEN)
|
|
/*
|
|
* On 64-bit, align RODATA to 2MB so that even with CONFIG_DEBUG_RODATA
|
|
* we retain large page mappings for boundaries spanning kernel text, rodata
|
|
--- head-2011-03-17.orig/arch/x86/kernel/vsyscall_64-xen.c 2011-02-01 14:54:13.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/vsyscall_64-xen.c 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -73,7 +73,8 @@ void update_vsyscall_tz(void)
|
|
write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
|
|
}
|
|
|
|
-void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
|
|
+void update_vsyscall(struct timespec *wall_time, struct clocksource *clock,
|
|
+ u32 mult)
|
|
{
|
|
unsigned long flags;
|
|
|
|
@@ -82,7 +83,7 @@ void update_vsyscall(struct timespec *wa
|
|
vsyscall_gtod_data.clock.vread = clock->vread;
|
|
vsyscall_gtod_data.clock.cycle_last = clock->cycle_last;
|
|
vsyscall_gtod_data.clock.mask = clock->mask;
|
|
- vsyscall_gtod_data.clock.mult = clock->mult;
|
|
+ vsyscall_gtod_data.clock.mult = mult;
|
|
vsyscall_gtod_data.clock.shift = clock->shift;
|
|
vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
|
|
vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
|
|
@@ -237,7 +238,7 @@ static ctl_table kernel_table2[] = {
|
|
};
|
|
|
|
static ctl_table kernel_root_table2[] = {
|
|
- { .ctl_name = CTL_KERN, .procname = "kernel", .mode = 0555,
|
|
+ { .procname = "kernel", .mode = 0555,
|
|
.child = kernel_table2 },
|
|
{}
|
|
};
|
|
--- head-2011-03-17.orig/arch/x86/kernel/x8664_ksyms_64.c 2011-03-17 14:35:44.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/x8664_ksyms_64.c 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -54,6 +54,6 @@ EXPORT_SYMBOL(memcpy);
|
|
EXPORT_SYMBOL(__memcpy);
|
|
|
|
EXPORT_SYMBOL(empty_zero_page);
|
|
-#ifndef CONFIG_PARAVIRT
|
|
+#if !defined(CONFIG_PARAVIRT) && !defined(CONFIG_XEN)
|
|
EXPORT_SYMBOL(native_load_gs_index);
|
|
#endif
|
|
--- head-2011-03-17.orig/arch/x86/kernel/x86_init-xen.c 2011-02-01 14:54:13.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/x86_init-xen.c 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -13,10 +13,13 @@
|
|
#include <asm/e820.h>
|
|
#include <asm/time.h>
|
|
#include <asm/irq.h>
|
|
+#include <asm/pat.h>
|
|
+#include <asm/iommu.h>
|
|
|
|
void __cpuinit x86_init_noop(void) { }
|
|
void __init x86_init_uint_noop(unsigned int unused) { }
|
|
void __init x86_init_pgd_noop(pgd_t *unused) { }
|
|
+int __init iommu_init_noop(void) { return 0; }
|
|
|
|
/*
|
|
* The platform setup functions are preset with the default functions
|
|
@@ -61,10 +64,15 @@ struct x86_init_ops x86_init __initdata
|
|
.tsc_pre_init = x86_init_noop,
|
|
.timer_init = x86_init_noop,
|
|
},
|
|
+
|
|
+ .iommu = {
|
|
+ .iommu_init = iommu_init_noop,
|
|
+ },
|
|
};
|
|
|
|
struct x86_platform_ops x86_platform = {
|
|
.calibrate_tsc = NULL,
|
|
.get_wallclock = mach_get_cmos_time,
|
|
.set_wallclock = mach_set_rtc_mmss,
|
|
+ .is_untracked_pat_range = is_ISA_range,
|
|
};
|
|
--- head-2011-03-17.orig/arch/x86/mm/fault-xen.c 2011-02-01 14:54:13.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/mm/fault-xen.c 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -38,7 +38,8 @@ enum x86_pf_error_code {
|
|
* Returns 0 if mmiotrace is disabled, or if the fault is not
|
|
* handled by mmiotrace:
|
|
*/
|
|
-static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr)
|
|
+static inline int __kprobes
|
|
+kmmio_fault(struct pt_regs *regs, unsigned long addr)
|
|
{
|
|
if (unlikely(is_kmmio_active()))
|
|
if (kmmio_handler(regs, addr) == 1)
|
|
@@ -46,7 +47,7 @@ static inline int kmmio_fault(struct pt_
|
|
return 0;
|
|
}
|
|
|
|
-static inline int notify_page_fault(struct pt_regs *regs)
|
|
+static inline int __kprobes notify_page_fault(struct pt_regs *regs)
|
|
{
|
|
int ret = 0;
|
|
|
|
@@ -257,7 +258,7 @@ void vmalloc_sync_all(void)
|
|
*
|
|
* Handle a fault on the vmalloc or module mapping area
|
|
*/
|
|
-static noinline int vmalloc_fault(unsigned long address)
|
|
+static noinline __kprobes int vmalloc_fault(unsigned long address)
|
|
{
|
|
unsigned long pgd_paddr;
|
|
pmd_t *pmd_k;
|
|
@@ -376,7 +377,7 @@ void vmalloc_sync_all(void)
|
|
*
|
|
* This assumes no large pages in there.
|
|
*/
|
|
-static noinline int vmalloc_fault(unsigned long address)
|
|
+static noinline __kprobes int vmalloc_fault(unsigned long address)
|
|
{
|
|
pgd_t *pgd, *pgd_ref;
|
|
pud_t *pud, *pud_ref;
|
|
@@ -677,7 +678,7 @@ no_context(struct pt_regs *regs, unsigne
|
|
show_fault_oops(regs, error_code, address);
|
|
|
|
stackend = end_of_stack(tsk);
|
|
- if (*stackend != STACK_END_MAGIC)
|
|
+ if (tsk != &init_task && *stackend != STACK_END_MAGIC)
|
|
printk(KERN_ALERT "Thread overran stack, or stack corrupted\n");
|
|
|
|
tsk->thread.cr2 = address;
|
|
@@ -879,7 +880,7 @@ static int spurious_fault_check(unsigned
|
|
* There are no security implications to leaving a stale TLB when
|
|
* increasing the permissions on a page.
|
|
*/
|
|
-static noinline int
|
|
+static noinline __kprobes int
|
|
spurious_fault(unsigned long error_code, unsigned long address)
|
|
{
|
|
pgd_t *pgd;
|
|
--- head-2011-03-17.orig/arch/x86/mm/init-xen.c 2011-02-01 14:54:13.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/mm/init-xen.c 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -163,10 +163,6 @@ unsigned long __init_refok init_memory_m
|
|
use_gbpages = direct_gbpages;
|
|
#endif
|
|
|
|
- set_nx();
|
|
- if (nx_enabled)
|
|
- printk(KERN_INFO "NX (Execute Disable) protection: active\n");
|
|
-
|
|
/* Enable PSE if available */
|
|
if (cpu_has_pse)
|
|
set_in_cr4(X86_CR4_PSE);
|
|
--- head-2011-03-17.orig/arch/x86/mm/init_32-xen.c 2011-02-01 14:54:13.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/mm/init_32-xen.c 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -448,7 +448,7 @@ static void __init permanent_kmaps_init(
|
|
pkmap_page_table = pte;
|
|
}
|
|
|
|
-static void __init add_one_highpage_init(struct page *page, int pfn)
|
|
+static void __init add_one_highpage_init(struct page *page)
|
|
{
|
|
ClearPageReserved(page);
|
|
init_page_count(page);
|
|
@@ -481,7 +481,7 @@ static int __init add_highpages_work_fn(
|
|
if (!pfn_valid(node_pfn))
|
|
continue;
|
|
page = pfn_to_page(node_pfn);
|
|
- add_one_highpage_init(page, node_pfn);
|
|
+ add_one_highpage_init(page);
|
|
}
|
|
|
|
return 0;
|
|
@@ -705,8 +705,8 @@ void __init find_low_pfn_range(void)
|
|
}
|
|
|
|
#ifndef CONFIG_NEED_MULTIPLE_NODES
|
|
-void __init initmem_init(unsigned long start_pfn,
|
|
- unsigned long end_pfn)
|
|
+void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
|
|
+ int acpi, int k8)
|
|
{
|
|
#ifdef CONFIG_HIGHMEM
|
|
highstart_pfn = highend_pfn = max_pfn;
|
|
@@ -955,8 +955,7 @@ void __init mem_init(void)
|
|
reservedpages << (PAGE_SHIFT-10),
|
|
datasize >> 10,
|
|
initsize >> 10,
|
|
- (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
|
|
- );
|
|
+ totalhigh_pages << (PAGE_SHIFT-10));
|
|
|
|
printk(KERN_INFO "virtual kernel memory layout:\n"
|
|
" fixmap : 0x%08lx - 0x%08lx (%4ld kB)\n"
|
|
@@ -1062,7 +1061,7 @@ static noinline int do_test_wp_bit(void)
|
|
const int rodata_test_data = 0xC3;
|
|
EXPORT_SYMBOL_GPL(rodata_test_data);
|
|
|
|
-static int kernel_set_to_readonly;
|
|
+int kernel_set_to_readonly __read_mostly;
|
|
|
|
void set_kernel_text_rw(void)
|
|
{
|
|
--- head-2011-03-17.orig/arch/x86/mm/init_64-xen.c 2011-02-01 14:54:13.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/mm/init_64-xen.c 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -53,6 +53,7 @@
|
|
#include <asm/cacheflush.h>
|
|
#include <asm/init.h>
|
|
#include <asm/setup.h>
|
|
+#include <linux/bootmem.h>
|
|
|
|
#include <xen/features.h>
|
|
|
|
@@ -809,7 +810,8 @@ kernel_physical_mapping_init(unsigned lo
|
|
}
|
|
|
|
#ifndef CONFIG_NUMA
|
|
-void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn)
|
|
+void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn,
|
|
+ int acpi, int k8)
|
|
{
|
|
unsigned long bootmap_size, bootmap;
|
|
|
|
@@ -862,6 +864,21 @@ void __init paging_init(void)
|
|
*/
|
|
#ifdef CONFIG_MEMORY_HOTPLUG
|
|
/*
|
|
+ * After memory hotplug the variables max_pfn, max_low_pfn and high_memory need
|
|
+ * updating.
|
|
+ */
|
|
+static void update_end_of_memory_vars(u64 start, u64 size)
|
|
+{
|
|
+ unsigned long end_pfn = PFN_UP(start + size);
|
|
+
|
|
+ if (end_pfn > max_pfn) {
|
|
+ max_pfn = end_pfn;
|
|
+ max_low_pfn = end_pfn;
|
|
+ high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
|
|
+ }
|
|
+}
|
|
+
|
|
+/*
|
|
* Memory is added always to NORMAL zone. This means you will never get
|
|
* additional DMA/DMA32 memory.
|
|
*/
|
|
@@ -880,6 +897,9 @@ int arch_add_memory(int nid, u64 start,
|
|
ret = __add_pages(nid, zone, start_pfn, nr_pages);
|
|
WARN_ON_ONCE(ret);
|
|
|
|
+ /* update max_pfn, max_low_pfn and high_memory */
|
|
+ update_end_of_memory_vars(start, size);
|
|
+
|
|
return ret;
|
|
}
|
|
EXPORT_SYMBOL_GPL(arch_add_memory);
|
|
@@ -948,12 +968,12 @@ void __init mem_init(void)
|
|
const int rodata_test_data = 0xC3;
|
|
EXPORT_SYMBOL_GPL(rodata_test_data);
|
|
|
|
-static int kernel_set_to_readonly;
|
|
+int kernel_set_to_readonly;
|
|
|
|
void set_kernel_text_rw(void)
|
|
{
|
|
- unsigned long start = PFN_ALIGN(_stext);
|
|
- unsigned long end = PFN_ALIGN(__start_rodata);
|
|
+ unsigned long start = PFN_ALIGN(_text);
|
|
+ unsigned long end = PFN_ALIGN(__stop___ex_table);
|
|
|
|
if (!kernel_set_to_readonly)
|
|
return;
|
|
@@ -961,13 +981,18 @@ void set_kernel_text_rw(void)
|
|
pr_debug("Set kernel text: %lx - %lx for read write\n",
|
|
start, end);
|
|
|
|
+ /*
|
|
+ * Make the kernel identity mapping for text RW. Kernel text
|
|
+ * mapping will always be RO. Refer to the comment in
|
|
+ * static_protections() in pageattr.c
|
|
+ */
|
|
set_memory_rw(start, (end - start) >> PAGE_SHIFT);
|
|
}
|
|
|
|
void set_kernel_text_ro(void)
|
|
{
|
|
- unsigned long start = PFN_ALIGN(_stext);
|
|
- unsigned long end = PFN_ALIGN(__start_rodata);
|
|
+ unsigned long start = PFN_ALIGN(_text);
|
|
+ unsigned long end = PFN_ALIGN(__stop___ex_table);
|
|
|
|
if (!kernel_set_to_readonly)
|
|
return;
|
|
@@ -975,14 +1000,21 @@ void set_kernel_text_ro(void)
|
|
pr_debug("Set kernel text: %lx - %lx for read only\n",
|
|
start, end);
|
|
|
|
+ /*
|
|
+ * Set the kernel identity mapping for text RO.
|
|
+ */
|
|
set_memory_ro(start, (end - start) >> PAGE_SHIFT);
|
|
}
|
|
|
|
void mark_rodata_ro(void)
|
|
{
|
|
- unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata);
|
|
+ unsigned long start = PFN_ALIGN(_text);
|
|
unsigned long rodata_start =
|
|
((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK;
|
|
+ unsigned long end = (unsigned long) &__end_rodata;
|
|
+ unsigned long text_end = PAGE_ALIGN((unsigned long) &__stop___ex_table);
|
|
+ unsigned long rodata_end = PAGE_ALIGN((unsigned long) &__end_rodata);
|
|
+ unsigned long data_start = (unsigned long) &_sdata;
|
|
|
|
printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
|
|
(end - start) >> 10);
|
|
@@ -1005,6 +1037,14 @@ void mark_rodata_ro(void)
|
|
printk(KERN_INFO "Testing CPA: again\n");
|
|
set_memory_ro(start, (end-start) >> PAGE_SHIFT);
|
|
#endif
|
|
+
|
|
+ free_init_pages("unused kernel memory",
|
|
+ (unsigned long) page_address(virt_to_page(text_end)),
|
|
+ (unsigned long)
|
|
+ page_address(virt_to_page(rodata_start)));
|
|
+ free_init_pages("unused kernel memory",
|
|
+ (unsigned long) page_address(virt_to_page(rodata_end)),
|
|
+ (unsigned long) page_address(virt_to_page(data_start)));
|
|
}
|
|
|
|
#endif
|
|
--- head-2011-03-17.orig/arch/x86/mm/ioremap-xen.c 2011-02-07 15:41:38.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/mm/ioremap-xen.c 2011-02-07 15:41:45.000000000 +0100
|
|
@@ -438,32 +438,6 @@ void __iomem *ioremap_cache(resource_siz
|
|
}
|
|
EXPORT_SYMBOL(ioremap_cache);
|
|
|
|
-#ifndef CONFIG_XEN
|
|
-static void __iomem *ioremap_default(resource_size_t phys_addr,
|
|
- unsigned long size)
|
|
-{
|
|
- unsigned long flags;
|
|
- void __iomem *ret;
|
|
- int err;
|
|
-
|
|
- /*
|
|
- * - WB for WB-able memory and no other conflicting mappings
|
|
- * - UC_MINUS for non-WB-able memory with no other conflicting mappings
|
|
- * - Inherit from confliting mappings otherwise
|
|
- */
|
|
- err = reserve_memtype(phys_addr, phys_addr + size,
|
|
- _PAGE_CACHE_WB, &flags);
|
|
- if (err < 0)
|
|
- return NULL;
|
|
-
|
|
- ret = __ioremap_caller(phys_addr, size, flags,
|
|
- __builtin_return_address(0));
|
|
-
|
|
- free_memtype(phys_addr, phys_addr + size);
|
|
- return ret;
|
|
-}
|
|
-#endif
|
|
-
|
|
void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size,
|
|
unsigned long prot_val)
|
|
{
|
|
@@ -539,7 +513,7 @@ void *xlate_dev_mem_ptr(unsigned long ph
|
|
if (page_is_ram(start >> PAGE_SHIFT))
|
|
return __va(phys);
|
|
|
|
- addr = (void __force *)ioremap_default(start, PAGE_SIZE);
|
|
+ addr = (void __force *)ioremap_cache(start, PAGE_SIZE);
|
|
if (addr)
|
|
addr = (void *)((unsigned long)addr | (phys & ~PAGE_MASK));
|
|
|
|
--- head-2011-03-17.orig/arch/x86/mm/pageattr-xen.c 2011-02-01 14:54:13.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/mm/pageattr-xen.c 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -281,6 +281,22 @@ static inline pgprot_t static_protection
|
|
__pa((unsigned long)__end_rodata) >> PAGE_SHIFT))
|
|
pgprot_val(forbidden) |= _PAGE_RW;
|
|
|
|
+#if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA) && !defined(CONFIG_XEN)
|
|
+ /*
|
|
+ * Once the kernel maps the text as RO (kernel_set_to_readonly is set),
|
|
+ * kernel text mappings for the large page aligned text, rodata sections
|
|
+ * will be always read-only. For the kernel identity mappings covering
|
|
+ * the holes caused by this alignment can be anything that user asks.
|
|
+ *
|
|
+ * This will preserve the large page mappings for kernel text/data
|
|
+ * at no extra cost.
|
|
+ */
|
|
+ if (kernel_set_to_readonly &&
|
|
+ within(address, (unsigned long)_text,
|
|
+ (unsigned long)__end_rodata_hpage_align))
|
|
+ pgprot_val(forbidden) |= _PAGE_RW;
|
|
+#endif
|
|
+
|
|
prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden));
|
|
|
|
return prot;
|
|
@@ -1135,12 +1151,18 @@ EXPORT_SYMBOL(set_memory_array_wb);
|
|
|
|
int set_memory_x(unsigned long addr, int numpages)
|
|
{
|
|
+ if (!(__supported_pte_mask & _PAGE_NX))
|
|
+ return 0;
|
|
+
|
|
return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_NX), 0);
|
|
}
|
|
EXPORT_SYMBOL(set_memory_x);
|
|
|
|
int set_memory_nx(unsigned long addr, int numpages)
|
|
{
|
|
+ if (!(__supported_pte_mask & _PAGE_NX))
|
|
+ return 0;
|
|
+
|
|
return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_NX), 0);
|
|
}
|
|
EXPORT_SYMBOL(set_memory_nx);
|
|
--- head-2011-03-17.orig/arch/x86/mm/pat-xen.c 2011-02-01 14:54:13.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/mm/pat-xen.c 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -20,6 +20,7 @@
|
|
#include <asm/cacheflush.h>
|
|
#include <asm/processor.h>
|
|
#include <asm/tlbflush.h>
|
|
+#include <asm/x86_init.h>
|
|
#include <asm/pgtable.h>
|
|
#include <asm/fcntl.h>
|
|
#include <asm/e820.h>
|
|
@@ -381,9 +382,6 @@ static int free_ram_pages_type(u64 start
|
|
* - _PAGE_CACHE_UC_MINUS
|
|
* - _PAGE_CACHE_UC
|
|
*
|
|
- * req_type will have a special case value '-1', when requester want to inherit
|
|
- * the memory type from mtrr (if WB), existing PAT, defaulting to UC_MINUS.
|
|
- *
|
|
* If new_type is NULL, function will return an error if it cannot reserve the
|
|
* region with req_type. If new_type is non-NULL, function will return
|
|
* available type in new_type in case of no error. In case of any error
|
|
@@ -403,9 +401,7 @@ int reserve_memtype(u64 start, u64 end,
|
|
if (!pat_enabled) {
|
|
/* This is identical to page table setting without PAT */
|
|
if (new_type) {
|
|
- if (req_type == -1)
|
|
- *new_type = _PAGE_CACHE_WB;
|
|
- else if (req_type == _PAGE_CACHE_WC)
|
|
+ if (req_type == _PAGE_CACHE_WC)
|
|
*new_type = _PAGE_CACHE_UC_MINUS;
|
|
else
|
|
*new_type = req_type & _PAGE_CACHE_MASK;
|
|
@@ -414,7 +410,7 @@ int reserve_memtype(u64 start, u64 end,
|
|
}
|
|
|
|
/* Low ISA region is always mapped WB in page table. No need to track */
|
|
- if (is_ISA_range(start, end - 1)) {
|
|
+ if (x86_platform.is_untracked_pat_range(start, end)) {
|
|
if (new_type)
|
|
*new_type = _PAGE_CACHE_WB;
|
|
return 0;
|
|
@@ -525,7 +521,7 @@ int free_memtype(u64 start, u64 end)
|
|
return 0;
|
|
|
|
/* Low ISA region is always mapped WB. No need to track */
|
|
- if (is_ISA_range(start, end - 1))
|
|
+ if (x86_platform.is_untracked_pat_range(start, end))
|
|
return 0;
|
|
|
|
is_range_ram = pat_pagerange_is_ram(start, end);
|
|
@@ -609,7 +605,7 @@ static unsigned long lookup_memtype(u64
|
|
int rettype = _PAGE_CACHE_WB;
|
|
struct memtype *entry;
|
|
|
|
- if (is_ISA_range(paddr, paddr + PAGE_SIZE - 1))
|
|
+ if (x86_platform.is_untracked_pat_range(paddr, paddr + PAGE_SIZE))
|
|
return rettype;
|
|
|
|
if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) {
|
|
@@ -736,9 +732,8 @@ int phys_mem_access_prot_allowed(struct
|
|
if (!range_is_allowed(mfn, size))
|
|
return 0;
|
|
|
|
- if (file->f_flags & O_SYNC) {
|
|
+ if (file->f_flags & O_DSYNC)
|
|
flags = _PAGE_CACHE_UC_MINUS;
|
|
- }
|
|
|
|
#ifndef CONFIG_X86_32
|
|
#ifndef CONFIG_XEN /* Xen sets correct MTRR type on non-RAM for us. */
|
|
@@ -1032,8 +1027,10 @@ static const struct file_operations memt
|
|
|
|
static int __init pat_memtype_list_init(void)
|
|
{
|
|
- debugfs_create_file("pat_memtype_list", S_IRUSR, arch_debugfs_dir,
|
|
- NULL, &memtype_fops);
|
|
+ if (pat_enabled) {
|
|
+ debugfs_create_file("pat_memtype_list", S_IRUSR,
|
|
+ arch_debugfs_dir, NULL, &memtype_fops);
|
|
+ }
|
|
return 0;
|
|
}
|
|
|
|
--- head-2011-03-17.orig/arch/x86/vdso/vdso32-setup-xen.c 2011-02-01 14:50:44.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/vdso/vdso32-setup-xen.c 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -436,7 +436,6 @@ static ctl_table abi_table2[] = {
|
|
|
|
static ctl_table abi_root_table2[] = {
|
|
{
|
|
- .ctl_name = CTL_ABI,
|
|
.procname = "abi",
|
|
.mode = 0555,
|
|
.child = abi_table2
|
|
--- head-2011-03-17.orig/drivers/gpu/drm/vmwgfx/Kconfig 2011-03-17 14:35:44.000000000 +0100
|
|
+++ head-2011-03-17/drivers/gpu/drm/vmwgfx/Kconfig 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -1,6 +1,6 @@
|
|
config DRM_VMWGFX
|
|
tristate "DRM driver for VMware Virtual GPU"
|
|
- depends on DRM && PCI && FB
|
|
+ depends on DRM && PCI && FB && !XEN
|
|
select FB_DEFERRED_IO
|
|
select FB_CFB_FILLRECT
|
|
select FB_CFB_COPYAREA
|
|
--- head-2011-03-17.orig/drivers/hwmon/Kconfig 2011-01-31 17:32:29.000000000 +0100
|
|
+++ head-2011-03-17/drivers/hwmon/Kconfig 2011-03-11 11:00:24.000000000 +0100
|
|
@@ -943,7 +943,7 @@ config SENSORS_TMP421
|
|
|
|
config SENSORS_VIA_CPUTEMP
|
|
tristate "VIA CPU temperature sensor"
|
|
- depends on X86
|
|
+ depends on X86 && !XEN
|
|
help
|
|
If you say yes here you get support for the temperature
|
|
sensor inside your CPU. Supported are all known variants of
|
|
--- head-2011-03-17.orig/drivers/hwmon/coretemp-xen.c 2011-02-01 14:54:13.000000000 +0100
|
|
+++ head-2011-03-17/drivers/hwmon/coretemp-xen.c 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -31,6 +31,7 @@
|
|
#include <linux/mutex.h>
|
|
#include <linux/list.h>
|
|
#include <linux/platform_device.h>
|
|
+#include <linux/pci.h>
|
|
#include <asm/msr.h>
|
|
#include <xen/pcpu.h>
|
|
#include "../xen/core/domctl.h"
|
|
@@ -166,6 +167,7 @@ static int adjust_tjmax(struct coretemp_
|
|
int usemsr_ee = 1;
|
|
int err;
|
|
u32 eax, edx;
|
|
+ struct pci_dev *host_bridge;
|
|
|
|
/* Early chips have no MSR for TjMax */
|
|
|
|
@@ -173,11 +175,21 @@ static int adjust_tjmax(struct coretemp_
|
|
usemsr_ee = 0;
|
|
}
|
|
|
|
- /* Atoms seems to have TjMax at 90C */
|
|
+ /* Atom CPUs */
|
|
|
|
if (c->x86_model == 0x1c) {
|
|
usemsr_ee = 0;
|
|
- tjmax = 90000;
|
|
+
|
|
+ host_bridge = pci_get_bus_and_slot(0, PCI_DEVFN(0, 0));
|
|
+
|
|
+ if (host_bridge && host_bridge->vendor == PCI_VENDOR_ID_INTEL
|
|
+ && (host_bridge->device == 0xa000 /* NM10 based nettop */
|
|
+ || host_bridge->device == 0xa010)) /* NM10 based netbook */
|
|
+ tjmax = 100000;
|
|
+ else
|
|
+ tjmax = 90000;
|
|
+
|
|
+ pci_dev_put(host_bridge);
|
|
}
|
|
|
|
if ((c->x86_model > 0xe) && (usemsr_ee)) {
|
|
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
|
|
+++ head-2011-03-17/drivers/hwmon/via-cputemp-xen.c 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -0,0 +1,354 @@
|
|
+/*
|
|
+ * via-cputemp.c - Driver for VIA CPU core temperature monitoring
|
|
+ * Copyright (C) 2009 VIA Technologies, Inc.
|
|
+ *
|
|
+ * based on existing coretemp.c, which is
|
|
+ *
|
|
+ * Copyright (C) 2007 Rudolf Marek <r.marek@assembler.cz>
|
|
+ *
|
|
+ * This program is free software; you can redistribute it and/or modify
|
|
+ * it under the terms of the GNU General Public License as published by
|
|
+ * the Free Software Foundation; version 2 of the License.
|
|
+ *
|
|
+ * This program is distributed in the hope that it will be useful,
|
|
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
+ * GNU General Public License for more details.
|
|
+ *
|
|
+ * You should have received a copy of the GNU General Public License
|
|
+ * along with this program; if not, write to the Free Software
|
|
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
+ * 02110-1301 USA.
|
|
+ */
|
|
+
|
|
+#include <linux/module.h>
|
|
+#include <linux/init.h>
|
|
+#include <linux/slab.h>
|
|
+#include <linux/hwmon.h>
|
|
+#include <linux/sysfs.h>
|
|
+#include <linux/hwmon-sysfs.h>
|
|
+#include <linux/err.h>
|
|
+#include <linux/mutex.h>
|
|
+#include <linux/list.h>
|
|
+#include <linux/platform_device.h>
|
|
+#include <asm/msr.h>
|
|
+#include <xen/pcpu.h>
|
|
+#include "../xen/core/domctl.h"
|
|
+
|
|
+#define DRVNAME "via_cputemp"
|
|
+
|
|
+enum { SHOW_TEMP, SHOW_LABEL, SHOW_NAME } SHOW;
|
|
+
|
|
+/*
|
|
+ * Functions declaration
|
|
+ */
|
|
+
|
|
+struct pdev_entry {
|
|
+ struct list_head list;
|
|
+ struct platform_device *pdev;
|
|
+ struct device *hwmon_dev;
|
|
+ const char *name;
|
|
+ u8 x86_model;
|
|
+ u32 msr;
|
|
+};
|
|
+#define via_cputemp_data pdev_entry
|
|
+
|
|
+/*
|
|
+ * Sysfs stuff
|
|
+ */
|
|
+
|
|
+static ssize_t show_name(struct device *dev, struct device_attribute
|
|
+ *devattr, char *buf)
|
|
+{
|
|
+ int ret;
|
|
+ struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
|
|
+ struct via_cputemp_data *data = dev_get_drvdata(dev);
|
|
+
|
|
+ if (attr->index == SHOW_NAME)
|
|
+ ret = sprintf(buf, "%s\n", data->name);
|
|
+ else /* show label */
|
|
+ ret = sprintf(buf, "Core %d\n", data->pdev->id);
|
|
+ return ret;
|
|
+}
|
|
+
|
|
+static ssize_t show_temp(struct device *dev,
|
|
+ struct device_attribute *devattr, char *buf)
|
|
+{
|
|
+ struct via_cputemp_data *data = dev_get_drvdata(dev);
|
|
+ u32 eax, edx;
|
|
+ int err;
|
|
+
|
|
+ err = rdmsr_safe_on_pcpu(data->pdev->id, data->msr, &eax, &edx);
|
|
+ if (err < 0)
|
|
+ return -EAGAIN;
|
|
+
|
|
+ return sprintf(buf, "%lu\n", ((unsigned long)eax & 0xffffff) * 1000);
|
|
+}
|
|
+
|
|
+static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, show_temp, NULL,
|
|
+ SHOW_TEMP);
|
|
+static SENSOR_DEVICE_ATTR(temp1_label, S_IRUGO, show_name, NULL, SHOW_LABEL);
|
|
+static SENSOR_DEVICE_ATTR(name, S_IRUGO, show_name, NULL, SHOW_NAME);
|
|
+
|
|
+static struct attribute *via_cputemp_attributes[] = {
|
|
+ &sensor_dev_attr_name.dev_attr.attr,
|
|
+ &sensor_dev_attr_temp1_label.dev_attr.attr,
|
|
+ &sensor_dev_attr_temp1_input.dev_attr.attr,
|
|
+ NULL
|
|
+};
|
|
+
|
|
+static const struct attribute_group via_cputemp_group = {
|
|
+ .attrs = via_cputemp_attributes,
|
|
+};
|
|
+
|
|
+static int via_cputemp_probe(struct platform_device *pdev)
|
|
+{
|
|
+ struct via_cputemp_data *data = platform_get_drvdata(pdev);
|
|
+ int err;
|
|
+ u32 eax, edx;
|
|
+
|
|
+ data->name = "via_cputemp";
|
|
+
|
|
+ switch (data->x86_model) {
|
|
+ case 0xA:
|
|
+ /* C7 A */
|
|
+ case 0xD:
|
|
+ /* C7 D */
|
|
+ data->msr = 0x1169;
|
|
+ break;
|
|
+ case 0xF:
|
|
+ /* Nano */
|
|
+ data->msr = 0x1423;
|
|
+ break;
|
|
+ default:
|
|
+ return -ENODEV;
|
|
+ }
|
|
+
|
|
+ /* test if we can access the TEMPERATURE MSR */
|
|
+ err = rdmsr_safe_on_pcpu(pdev->id, data->msr, &eax, &edx);
|
|
+ if (err >= 0) {
|
|
+ dev_err(&pdev->dev,
|
|
+ "Unable to access TEMPERATURE MSR, giving up\n");
|
|
+ return err;
|
|
+ }
|
|
+
|
|
+ err = sysfs_create_group(&pdev->dev.kobj, &via_cputemp_group);
|
|
+ if (err)
|
|
+ return err;
|
|
+
|
|
+ data->hwmon_dev = hwmon_device_register(&pdev->dev);
|
|
+ if (IS_ERR(data->hwmon_dev)) {
|
|
+ err = PTR_ERR(data->hwmon_dev);
|
|
+ dev_err(&pdev->dev, "Class registration failed (%d)\n",
|
|
+ err);
|
|
+ goto exit_remove;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+
|
|
+exit_remove:
|
|
+ sysfs_remove_group(&pdev->dev.kobj, &via_cputemp_group);
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static int via_cputemp_remove(struct platform_device *pdev)
|
|
+{
|
|
+ struct via_cputemp_data *data = platform_get_drvdata(pdev);
|
|
+
|
|
+ hwmon_device_unregister(data->hwmon_dev);
|
|
+ sysfs_remove_group(&pdev->dev.kobj, &via_cputemp_group);
|
|
+ return 0;
|
|
+}
|
|
+
|
|
+static struct platform_driver via_cputemp_driver = {
|
|
+ .driver = {
|
|
+ .owner = THIS_MODULE,
|
|
+ .name = DRVNAME,
|
|
+ },
|
|
+ .probe = via_cputemp_probe,
|
|
+ .remove = via_cputemp_remove,
|
|
+};
|
|
+
|
|
+static LIST_HEAD(pdev_list);
|
|
+static DEFINE_MUTEX(pdev_list_mutex);
|
|
+
|
|
+struct cpu_info {
|
|
+ struct pdev_entry *pdev_entry;
|
|
+ u8 x86;
|
|
+};
|
|
+
|
|
+static void get_cpuid_info(void *arg)
|
|
+{
|
|
+ struct cpu_info *info = arg;
|
|
+ struct pdev_entry *pdev_entry = info->pdev_entry;
|
|
+ u32 val = cpuid_eax(1);
|
|
+
|
|
+ info->x86 = ((val >> 8) & 0xf) + ((val >> 20) & 0xff);
|
|
+ pdev_entry->x86_model = ((val >> 4) & 0xf) | ((val >> 12) & 0xf0);
|
|
+}
|
|
+
|
|
+static int via_cputemp_device_add(unsigned int cpu)
|
|
+{
|
|
+ int err;
|
|
+ struct cpu_info info;
|
|
+ struct platform_device *pdev;
|
|
+ struct pdev_entry *pdev_entry;
|
|
+
|
|
+ pdev_entry = kzalloc(sizeof(*pdev_entry), GFP_KERNEL);
|
|
+ if (!pdev_entry)
|
|
+ return -ENOMEM;
|
|
+
|
|
+ info.pdev_entry = pdev_entry;
|
|
+ err = xen_set_physical_cpu_affinity(cpu);
|
|
+ if (!err) {
|
|
+ get_cpuid_info(&info);
|
|
+ WARN_ON_ONCE(xen_set_physical_cpu_affinity(-1));
|
|
+ } else if (err > 0) {
|
|
+ static bool warned;
|
|
+
|
|
+ if (!warned) {
|
|
+ warned = true;
|
|
+ printk(KERN_WARNING DRVNAME
|
|
+ "Cannot set physical CPU affinity"
|
|
+ " (assuming use of dom0_vcpus_pin)\n");
|
|
+ }
|
|
+ err = smp_call_function_single(cpu, get_cpuid_info, &info, 1);
|
|
+ }
|
|
+ if (err)
|
|
+ goto exit_entry_free;
|
|
+
|
|
+ if (info.x86 != 6)
|
|
+ goto exit_entry_free;
|
|
+
|
|
+ if (pdev_entry->x86_model < 0x0a)
|
|
+ goto exit_entry_free;
|
|
+
|
|
+ if (pdev_entry->x86_model > 0x0f) {
|
|
+ printk(KERN_WARNING DRVNAME ": Unknown CPU "
|
|
+ "model 0x%x\n", pdev_entry->x86_model);
|
|
+ goto exit_entry_free;
|
|
+ }
|
|
+
|
|
+ pdev = platform_device_alloc(DRVNAME, cpu);
|
|
+ if (!pdev) {
|
|
+ err = -ENOMEM;
|
|
+ printk(KERN_ERR DRVNAME ": Device allocation failed\n");
|
|
+ goto exit_entry_free;
|
|
+ }
|
|
+
|
|
+ platform_set_drvdata(pdev, pdev_entry);
|
|
+ pdev_entry->pdev = pdev;
|
|
+
|
|
+ err = platform_device_add(pdev);
|
|
+ if (err) {
|
|
+ printk(KERN_ERR DRVNAME ": Device addition failed (%d)\n",
|
|
+ err);
|
|
+ goto exit_device_put;
|
|
+ }
|
|
+
|
|
+ mutex_lock(&pdev_list_mutex);
|
|
+ list_add_tail(&pdev_entry->list, &pdev_list);
|
|
+ mutex_unlock(&pdev_list_mutex);
|
|
+
|
|
+ return 0;
|
|
+
|
|
+exit_device_put:
|
|
+ platform_device_put(pdev);
|
|
+exit_entry_free:
|
|
+ kfree(pdev_entry);
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static void via_cputemp_device_remove(unsigned int cpu)
|
|
+{
|
|
+ struct pdev_entry *p;
|
|
+
|
|
+ mutex_lock(&pdev_list_mutex);
|
|
+ list_for_each_entry(p, &pdev_list, list) {
|
|
+ if (p->pdev->id == cpu) {
|
|
+ platform_device_unregister(p->pdev);
|
|
+ list_del(&p->list);
|
|
+ kfree(p);
|
|
+ break;
|
|
+ }
|
|
+ }
|
|
+ mutex_unlock(&pdev_list_mutex);
|
|
+}
|
|
+
|
|
+static int via_cputemp_cpu_callback(struct notifier_block *nfb,
|
|
+ unsigned long action, void *hcpu)
|
|
+{
|
|
+ unsigned int cpu = (unsigned long) hcpu;
|
|
+
|
|
+ switch (action) {
|
|
+ case CPU_ONLINE:
|
|
+ via_cputemp_device_add(cpu);
|
|
+ break;
|
|
+ case CPU_DEAD:
|
|
+ via_cputemp_device_remove(cpu);
|
|
+ break;
|
|
+ }
|
|
+ return NOTIFY_OK;
|
|
+}
|
|
+
|
|
+static struct notifier_block via_cputemp_cpu_notifier = {
|
|
+ .notifier_call = via_cputemp_cpu_callback,
|
|
+};
|
|
+
|
|
+static int __init via_cputemp_init(void)
|
|
+{
|
|
+ int err;
|
|
+
|
|
+ if (!is_initial_xendomain())
|
|
+ return -ENODEV;
|
|
+
|
|
+ if (cpu_data(0).x86_vendor != X86_VENDOR_CENTAUR) {
|
|
+ printk(KERN_DEBUG DRVNAME ": Not a VIA CPU\n");
|
|
+ err = -ENODEV;
|
|
+ goto exit;
|
|
+ }
|
|
+
|
|
+ err = platform_driver_register(&via_cputemp_driver);
|
|
+ if (err)
|
|
+ goto exit;
|
|
+
|
|
+ err = register_pcpu_notifier(&via_cputemp_cpu_notifier);
|
|
+ if (err)
|
|
+ goto exit_driver_unreg;
|
|
+
|
|
+ if (list_empty(&pdev_list)) {
|
|
+ err = -ENODEV;
|
|
+ goto exit_notifier_unreg;
|
|
+ }
|
|
+
|
|
+ return 0;
|
|
+
|
|
+exit_notifier_unreg:
|
|
+ unregister_pcpu_notifier(&via_cputemp_cpu_notifier);
|
|
+exit_driver_unreg:
|
|
+ platform_driver_unregister(&via_cputemp_driver);
|
|
+exit:
|
|
+ return err;
|
|
+}
|
|
+
|
|
+static void __exit via_cputemp_exit(void)
|
|
+{
|
|
+ struct pdev_entry *p, *n;
|
|
+
|
|
+ unregister_pcpu_notifier(&via_cputemp_cpu_notifier);
|
|
+ mutex_lock(&pdev_list_mutex);
|
|
+ list_for_each_entry_safe(p, n, &pdev_list, list) {
|
|
+ platform_device_unregister(p->pdev);
|
|
+ list_del(&p->list);
|
|
+ kfree(p);
|
|
+ }
|
|
+ mutex_unlock(&pdev_list_mutex);
|
|
+ platform_driver_unregister(&via_cputemp_driver);
|
|
+}
|
|
+
|
|
+MODULE_AUTHOR("Harald Welte <HaraldWelte@viatech.com>");
|
|
+MODULE_DESCRIPTION("VIA CPU temperature monitor");
|
|
+MODULE_LICENSE("GPL");
|
|
+
|
|
+module_init(via_cputemp_init)
|
|
+module_exit(via_cputemp_exit)
|
|
--- head-2011-03-17.orig/drivers/oprofile/cpu_buffer.c 2011-02-01 14:42:26.000000000 +0100
|
|
+++ head-2011-03-17/drivers/oprofile/cpu_buffer.c 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -422,7 +422,7 @@ void oprofile_add_pc(unsigned long pc, i
|
|
*/
|
|
void oprofile_add_mode(int cpu_mode)
|
|
{
|
|
- struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer);
|
|
+ struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(op_cpu_buffer);
|
|
|
|
if (op_add_code(cpu_buf, 1, cpu_mode, current))
|
|
cpu_buf->sample_lost_overflow++;
|
|
--- head-2011-03-17.orig/drivers/pci/Kconfig 2011-01-31 17:29:16.000000000 +0100
|
|
+++ head-2011-03-17/drivers/pci/Kconfig 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -104,7 +104,7 @@ config PCI_IOV
|
|
|
|
config PCI_IOAPIC
|
|
bool
|
|
- depends on PCI
|
|
+ depends on PCI && !XEN
|
|
depends on ACPI
|
|
depends on HOTPLUG
|
|
default y
|
|
--- head-2011-03-17.orig/drivers/scsi/Kconfig 2011-03-17 14:35:44.000000000 +0100
|
|
+++ head-2011-03-17/drivers/scsi/Kconfig 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -659,7 +659,7 @@ config SCSI_FLASHPOINT
|
|
|
|
config VMWARE_PVSCSI
|
|
tristate "VMware PVSCSI driver support"
|
|
- depends on PCI && SCSI && X86
|
|
+ depends on PCI && SCSI && !XEN && X86
|
|
help
|
|
This driver supports VMware's para virtualized SCSI HBA.
|
|
To compile this driver as a module, choose M here: the
|
|
--- head-2011-03-17.orig/drivers/xen/char/mem.c 2011-02-01 14:44:12.000000000 +0100
|
|
+++ head-2011-03-17/drivers/xen/char/mem.c 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -5,7 +5,7 @@
|
|
*
|
|
* Added devfs support.
|
|
* Jan-11-1998, C. Scott Ananian <cananian@alumni.princeton.edu>
|
|
- * Shared /dev/zero mmaping support, Feb 2000, Kanoj Sarcar <kanoj@sgi.com>
|
|
+ * Shared /dev/zero mmapping support, Feb 2000, Kanoj Sarcar <kanoj@sgi.com>
|
|
*/
|
|
|
|
#include <linux/mm.h>
|
|
@@ -25,9 +25,19 @@
|
|
#include <asm/io.h>
|
|
#include <asm/hypervisor.h>
|
|
|
|
+static inline unsigned long size_inside_page(unsigned long start,
|
|
+ unsigned long size)
|
|
+{
|
|
+ unsigned long sz;
|
|
+
|
|
+ sz = PAGE_SIZE - (start & (PAGE_SIZE - 1));
|
|
+
|
|
+ return min(sz, size);
|
|
+}
|
|
+
|
|
static inline int uncached_access(struct file *file)
|
|
{
|
|
- if (file->f_flags & O_SYNC)
|
|
+ if (file->f_flags & O_DSYNC)
|
|
return 1;
|
|
/* Xen sets correct MTRR type on non-RAM for us. */
|
|
return 0;
|
|
@@ -61,20 +71,14 @@ static inline int range_is_allowed(unsig
|
|
static ssize_t read_mem(struct file * file, char __user * buf,
|
|
size_t count, loff_t *ppos)
|
|
{
|
|
- unsigned long p = *ppos, ignored;
|
|
+ unsigned long p = *ppos;
|
|
ssize_t read = 0, sz;
|
|
void __iomem *v;
|
|
|
|
while (count > 0) {
|
|
- /*
|
|
- * Handle first page in case it's not aligned
|
|
- */
|
|
- if (-p & (PAGE_SIZE - 1))
|
|
- sz = -p & (PAGE_SIZE - 1);
|
|
- else
|
|
- sz = PAGE_SIZE;
|
|
+ unsigned long remaining;
|
|
|
|
- sz = min_t(unsigned long, sz, count);
|
|
+ sz = size_inside_page(p, count);
|
|
|
|
if (!range_is_allowed(p >> PAGE_SHIFT, count))
|
|
return -EPERM;
|
|
@@ -95,10 +99,11 @@ static ssize_t read_mem(struct file * fi
|
|
break;
|
|
}
|
|
|
|
- ignored = copy_to_user(buf, v, sz);
|
|
+ remaining = copy_to_user(buf, v, sz);
|
|
iounmap(v);
|
|
- if (ignored)
|
|
+ if (remaining)
|
|
return -EFAULT;
|
|
+
|
|
buf += sz;
|
|
p += sz;
|
|
count -= sz;
|
|
@@ -117,15 +122,7 @@ static ssize_t write_mem(struct file * f
|
|
void __iomem *v;
|
|
|
|
while (count > 0) {
|
|
- /*
|
|
- * Handle first page in case it's not aligned
|
|
- */
|
|
- if (-p & (PAGE_SIZE - 1))
|
|
- sz = -p & (PAGE_SIZE - 1);
|
|
- else
|
|
- sz = PAGE_SIZE;
|
|
-
|
|
- sz = min_t(unsigned long, sz, count);
|
|
+ sz = size_inside_page(p, count);
|
|
|
|
if (!range_is_allowed(p >> PAGE_SHIFT, sz))
|
|
return -EPERM;
|
|
--- head-2011-03-17.orig/drivers/xen/core/evtchn.c 2011-02-01 14:54:13.000000000 +0100
|
|
+++ head-2011-03-17/drivers/xen/core/evtchn.c 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -1050,6 +1050,14 @@ void disable_all_local_evtchn(void)
|
|
synch_set_bit(i, &s->evtchn_mask[0]);
|
|
}
|
|
|
|
+/* Test an irq's pending state. */
|
|
+int xen_test_irq_pending(int irq)
|
|
+{
|
|
+ int evtchn = evtchn_from_irq(irq);
|
|
+
|
|
+ return VALID_EVTCHN(evtchn) && test_evtchn(evtchn);
|
|
+}
|
|
+
|
|
#ifdef CONFIG_PM_SLEEP
|
|
static void restore_cpu_virqs(unsigned int cpu)
|
|
{
|
|
--- head-2011-03-17.orig/drivers/xen/core/spinlock.c 2011-03-15 16:43:45.000000000 +0100
|
|
+++ head-2011-03-17/drivers/xen/core/spinlock.c 2011-03-15 16:44:19.000000000 +0100
|
|
@@ -16,7 +16,7 @@
|
|
#include <xen/evtchn.h>
|
|
|
|
struct spinning {
|
|
- raw_spinlock_t *lock;
|
|
+ arch_spinlock_t *lock;
|
|
unsigned int ticket;
|
|
struct spinning *prev;
|
|
};
|
|
@@ -72,7 +72,7 @@ void __cpuinit spinlock_resume(void)
|
|
#endif
|
|
|
|
static unsigned int spin_adjust(struct spinning *spinning,
|
|
- const raw_spinlock_t *lock,
|
|
+ const arch_spinlock_t *lock,
|
|
unsigned int token)
|
|
{
|
|
for (; spinning; spinning = spinning->prev)
|
|
@@ -90,12 +90,12 @@ static unsigned int spin_adjust(struct s
|
|
return token;
|
|
}
|
|
|
|
-unsigned int xen_spin_adjust(const raw_spinlock_t *lock, unsigned int token)
|
|
+unsigned int xen_spin_adjust(const arch_spinlock_t *lock, unsigned int token)
|
|
{
|
|
return spin_adjust(percpu_read(spinning), lock, token);
|
|
}
|
|
|
|
-bool xen_spin_wait(raw_spinlock_t *lock, unsigned int *ptok,
|
|
+bool xen_spin_wait(arch_spinlock_t *lock, unsigned int *ptok,
|
|
unsigned int flags)
|
|
{
|
|
bool rc;
|
|
@@ -151,7 +151,7 @@ bool xen_spin_wait(raw_spinlock_t *lock,
|
|
* reduce latency after the current lock was
|
|
* released), but don't acquire the lock.
|
|
*/
|
|
- raw_spinlock_t *lock = other->lock;
|
|
+ arch_spinlock_t *lock = other->lock;
|
|
|
|
raw_local_irq_disable();
|
|
while (lock->cur == other->ticket) {
|
|
@@ -235,7 +235,7 @@ bool xen_spin_wait(raw_spinlock_t *lock,
|
|
return rc;
|
|
}
|
|
|
|
-void xen_spin_kick(raw_spinlock_t *lock, unsigned int token)
|
|
+void xen_spin_kick(arch_spinlock_t *lock, unsigned int token)
|
|
{
|
|
unsigned int cpu = raw_smp_processor_id(), ancor = cpu;
|
|
|
|
--- head-2011-03-17.orig/drivers/xen/evtchn.c 2011-02-01 14:54:13.000000000 +0100
|
|
+++ head-2011-03-17/drivers/xen/evtchn.c 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -48,15 +48,14 @@
|
|
#include <linux/mutex.h>
|
|
#include <linux/cpu.h>
|
|
|
|
-#ifdef CONFIG_PARAVIRT_XEN
|
|
#include <xen/xen.h>
|
|
+#ifdef CONFIG_PARAVIRT_XEN
|
|
#include <xen/events.h>
|
|
#include <xen/evtchn.h>
|
|
#include <asm/xen/hypervisor.h>
|
|
#else
|
|
#include <xen/evtchn.h>
|
|
#include <xen/public/evtchn.h>
|
|
-#define xen_domain() is_running_on_xen()
|
|
#define bind_evtchn_to_irqhandler bind_caller_port_to_irqhandler
|
|
#endif
|
|
|
|
--- head-2011-03-17.orig/drivers/xen/privcmd/compat_privcmd.c 2011-01-31 17:29:16.000000000 +0100
|
|
+++ head-2011-03-17/drivers/xen/privcmd/compat_privcmd.c 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -26,17 +26,16 @@
|
|
#include <xen/public/privcmd.h>
|
|
#include <xen/compat_ioctl.h>
|
|
|
|
-int privcmd_ioctl_32(int fd, unsigned int cmd, unsigned long arg)
|
|
+int privcmd_ioctl_32(int fd, unsigned int cmd, void __user *arg)
|
|
{
|
|
int ret;
|
|
|
|
switch (cmd) {
|
|
case IOCTL_PRIVCMD_MMAP_32: {
|
|
- struct privcmd_mmap *p;
|
|
- struct privcmd_mmap_32 *p32;
|
|
+ struct privcmd_mmap __user *p;
|
|
+ struct privcmd_mmap_32 __user *p32 = arg;
|
|
struct privcmd_mmap_32 n32;
|
|
|
|
- p32 = compat_ptr(arg);
|
|
p = compat_alloc_user_space(sizeof(*p));
|
|
if (copy_from_user(&n32, p32, sizeof(n32)) ||
|
|
put_user(n32.num, &p->num) ||
|
|
@@ -48,8 +47,8 @@ int privcmd_ioctl_32(int fd, unsigned in
|
|
}
|
|
break;
|
|
case IOCTL_PRIVCMD_MMAPBATCH_32: {
|
|
- struct privcmd_mmapbatch *p;
|
|
- struct privcmd_mmapbatch_32 *p32;
|
|
+ struct privcmd_mmapbatch __user *p;
|
|
+ struct privcmd_mmapbatch_32 __user *p32 = arg;
|
|
struct privcmd_mmapbatch_32 n32;
|
|
#ifdef xen_pfn32_t
|
|
xen_pfn_t *__user arr;
|
|
@@ -57,7 +56,6 @@ int privcmd_ioctl_32(int fd, unsigned in
|
|
unsigned int i;
|
|
#endif
|
|
|
|
- p32 = compat_ptr(arg);
|
|
p = compat_alloc_user_space(sizeof(*p));
|
|
if (copy_from_user(&n32, p32, sizeof(n32)) ||
|
|
put_user(n32.num, &p->num) ||
|
|
@@ -97,8 +95,8 @@ int privcmd_ioctl_32(int fd, unsigned in
|
|
}
|
|
break;
|
|
case IOCTL_PRIVCMD_MMAPBATCH_V2_32: {
|
|
- struct privcmd_mmapbatch_v2 *p;
|
|
- struct privcmd_mmapbatch_v2_32 *p32;
|
|
+ struct privcmd_mmapbatch_v2 __user *p;
|
|
+ struct privcmd_mmapbatch_v2_32 __user *p32 = arg;
|
|
struct privcmd_mmapbatch_v2_32 n32;
|
|
#ifdef xen_pfn32_t
|
|
xen_pfn_t *__user arr;
|
|
@@ -106,7 +104,6 @@ int privcmd_ioctl_32(int fd, unsigned in
|
|
unsigned int i;
|
|
#endif
|
|
|
|
- p32 = compat_ptr(arg);
|
|
p = compat_alloc_user_space(sizeof(*p));
|
|
if (copy_from_user(&n32, p32, sizeof(n32)) ||
|
|
put_user(n32.num, &p->num) ||
|
|
--- head-2011-03-17.orig/drivers/xen/xenbus/xenbus_probe.c 2011-02-01 14:54:13.000000000 +0100
|
|
+++ head-2011-03-17/drivers/xen/xenbus/xenbus_probe.c 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -62,6 +62,8 @@
|
|
#endif
|
|
#else
|
|
#include <asm/xen/hypervisor.h>
|
|
+
|
|
+#include <xen/xen.h>
|
|
#include <xen/xenbus.h>
|
|
#include <xen/events.h>
|
|
#include <xen/page.h>
|
|
--- head-2011-03-17.orig/fs/compat_ioctl.c 2011-01-31 14:53:38.000000000 +0100
|
|
+++ head-2011-03-17/fs/compat_ioctl.c 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -1417,9 +1417,6 @@ IGNORE_IOCTL(FBIOGCURSOR32)
|
|
#endif
|
|
|
|
#ifdef CONFIG_XEN
|
|
-HANDLE_IOCTL(IOCTL_PRIVCMD_MMAP_32, privcmd_ioctl_32)
|
|
-HANDLE_IOCTL(IOCTL_PRIVCMD_MMAPBATCH_32, privcmd_ioctl_32)
|
|
-HANDLE_IOCTL(IOCTL_PRIVCMD_MMAPBATCH_V2_32, privcmd_ioctl_32)
|
|
COMPATIBLE_IOCTL(IOCTL_PRIVCMD_HYPERCALL)
|
|
COMPATIBLE_IOCTL(IOCTL_EVTCHN_BIND_VIRQ)
|
|
COMPATIBLE_IOCTL(IOCTL_EVTCHN_BIND_INTERDOMAIN)
|
|
@@ -1484,6 +1481,12 @@ static long do_ioctl_trans(int fd, unsig
|
|
return do_video_stillpicture(fd, cmd, argp);
|
|
case VIDEO_SET_SPU_PALETTE:
|
|
return do_video_set_spu_palette(fd, cmd, argp);
|
|
+#ifdef CONFIG_XEN
|
|
+ case IOCTL_PRIVCMD_MMAP_32:
|
|
+ case IOCTL_PRIVCMD_MMAPBATCH_32:
|
|
+ case IOCTL_PRIVCMD_MMAPBATCH_V2_32:
|
|
+ return privcmd_ioctl_32(fd, cmd, argp);
|
|
+#endif
|
|
}
|
|
|
|
/*
|
|
--- head-2011-03-17.orig/include/acpi/processor.h 2011-02-01 14:42:26.000000000 +0100
|
|
+++ head-2011-03-17/include/acpi/processor.h 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -323,7 +323,7 @@ static inline void acpi_processor_ppc_ex
|
|
return;
|
|
}
|
|
#ifdef CONFIG_PROCESSOR_EXTERNAL_CONTROL
|
|
-int acpi_processor_ppc_has_changed(struct acpi_processor *pr);
|
|
+int acpi_processor_ppc_has_changed(struct acpi_processor *, int event_flag);
|
|
#else
|
|
static inline int acpi_processor_ppc_has_changed(struct acpi_processor *pr,
|
|
int event_flag)
|
|
@@ -338,11 +338,11 @@ static inline int acpi_processor_ppc_has
|
|
}
|
|
return 0;
|
|
}
|
|
-#endif /* CONFIG_PROCESSOR_EXTERNAL_CONTROL */
|
|
static inline int acpi_processor_get_bios_limit(int cpu, unsigned int *limit)
|
|
{
|
|
return -ENODEV;
|
|
}
|
|
+#endif /* CONFIG_PROCESSOR_EXTERNAL_CONTROL */
|
|
|
|
#endif /* CONFIG_CPU_FREQ */
|
|
|
|
--- head-2011-03-17.orig/include/xen/compat_ioctl.h 2010-01-18 15:23:12.000000000 +0100
|
|
+++ head-2011-03-17/include/xen/compat_ioctl.h 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -29,7 +29,7 @@
|
|
#define xen_pfn32_t __u32
|
|
#endif
|
|
|
|
-extern int privcmd_ioctl_32(int fd, unsigned int cmd, unsigned long arg);
|
|
+extern int privcmd_ioctl_32(int fd, unsigned int cmd, void __user *arg);
|
|
struct privcmd_mmap_32 {
|
|
int num;
|
|
domid_t dom;
|
|
--- head-2011-03-17.orig/include/xen/evtchn.h 2011-02-01 14:50:44.000000000 +0100
|
|
+++ head-2011-03-17/include/xen/evtchn.h 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -48,6 +48,7 @@
|
|
* LOW-LEVEL DEFINITIONS
|
|
*/
|
|
|
|
+#ifdef CONFIG_XEN
|
|
struct irq_cfg {
|
|
u32 info;
|
|
union {
|
|
@@ -57,8 +58,7 @@ struct irq_cfg {
|
|
#endif
|
|
};
|
|
};
|
|
-
|
|
-int assign_irq_vector(int irq, struct irq_cfg *, const struct cpumask *);
|
|
+#endif
|
|
|
|
/*
|
|
* Dynamically bind an event source to an IRQ-like callback handler.
|
|
@@ -167,6 +167,9 @@ static inline int close_evtchn(int port)
|
|
return HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
|
|
}
|
|
|
|
+/* Test an irq's pending state. */
|
|
+int xen_test_irq_pending(int irq);
|
|
+
|
|
/*
|
|
* Use these to access the event channel underlying the IRQ handle returned
|
|
* by bind_*_to_irqhandler().
|
|
--- head-2011-03-17.orig/include/xen/xen.h 2011-03-17 14:35:44.000000000 +0100
|
|
+++ head-2011-03-17/include/xen/xen.h 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -7,8 +7,10 @@ enum xen_domain_type {
|
|
XEN_HVM_DOMAIN, /* running in a Xen hvm domain */
|
|
};
|
|
|
|
-#ifdef CONFIG_XEN
|
|
+#if defined(CONFIG_PARAVIRT_XEN)
|
|
extern enum xen_domain_type xen_domain_type;
|
|
+#elif defined(CONFIG_XEN)
|
|
+#define xen_domain_type XEN_PV_DOMAIN
|
|
#else
|
|
#define xen_domain_type XEN_NATIVE
|
|
#endif
|
|
@@ -25,6 +27,8 @@ extern enum xen_domain_type xen_domain_t
|
|
|
|
#define xen_initial_domain() (xen_pv_domain() && \
|
|
xen_start_info->flags & SIF_INITDOMAIN)
|
|
+#elif defined(CONFIG_XEN)
|
|
+#define xen_initial_domain() is_initial_xendomain()
|
|
#else /* !CONFIG_XEN_DOM0 */
|
|
#define xen_initial_domain() (0)
|
|
#endif /* CONFIG_XEN_DOM0 */
|
|
--- head-2011-03-17.orig/kernel/sysctl_binary.c 2011-01-31 18:01:51.000000000 +0100
|
|
+++ head-2011-03-17/kernel/sysctl_binary.c 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -874,9 +874,10 @@ static const struct bin_table bin_bus_ta
|
|
|
|
|
|
#ifdef CONFIG_XEN
|
|
-static const struct trans_ctl_table trans_xen_table[] = {
|
|
- { CTL_XEN_INDEPENDENT_WALLCLOCK, "independent_wallclock" },
|
|
- { CTL_XEN_PERMITTED_CLOCK_JITTER, "permitted_clock_jitter" },
|
|
+#include <xen/sysctl.h>
|
|
+static const struct bin_table bin_xen_table[] = {
|
|
+ { CTL_INT, CTL_XEN_INDEPENDENT_WALLCLOCK, "independent_wallclock" },
|
|
+ { CTL_ULONG, CTL_XEN_PERMITTED_CLOCK_JITTER, "permitted_clock_jitter" },
|
|
{}
|
|
};
|
|
#endif
|
|
@@ -921,7 +922,7 @@ static const struct bin_table bin_root_t
|
|
{ CTL_DIR, CTL_ABI, "abi" },
|
|
/* CTL_CPU not used */
|
|
#ifdef CONFIG_XEN
|
|
- { CTL_XEN, "xen", trans_xen_table },
|
|
+ { CTL_DIR, CTL_XEN, "xen", bin_xen_table },
|
|
#endif
|
|
/* CTL_ARLAN "arlan" no longer used */
|
|
{ CTL_DIR, CTL_S390DBF, "s390dbf", bin_s390dbf_table },
|
|
--- head-2011-03-17.orig/kernel/sysctl_check.c 2011-01-31 17:56:27.000000000 +0100
|
|
+++ head-2011-03-17/kernel/sysctl_check.c 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -4,7 +4,6 @@
|
|
#include <linux/sunrpc/debug.h>
|
|
#include <linux/string.h>
|
|
#include <net/ip_vs.h>
|
|
-#include <xen/sysctl.h>
|
|
|
|
|
|
static int sysctl_depth(struct ctl_table *table)
|
|
--- head-2011-03-17.orig/lib/swiotlb-xen.c 2011-02-01 14:54:13.000000000 +0100
|
|
+++ head-2011-03-17/lib/swiotlb-xen.c 2011-02-01 14:55:46.000000000 +0100
|
|
@@ -114,6 +114,7 @@ setup_io_tlb_npages(char *str)
|
|
swiotlb_force = 1;
|
|
else if (!strcmp(str, "off"))
|
|
swiotlb_force = -1;
|
|
+
|
|
return 1;
|
|
}
|
|
__setup("swiotlb=", setup_io_tlb_npages);
|
|
@@ -126,8 +127,10 @@ static dma_addr_t swiotlb_virt_to_bus(st
|
|
return phys_to_dma(hwdev, virt_to_phys(address));
|
|
}
|
|
|
|
-static void swiotlb_print_info(unsigned long bytes)
|
|
+void swiotlb_print_info(void)
|
|
{
|
|
+ unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT;
|
|
+
|
|
printk(KERN_INFO "Software IO TLB enabled: \n"
|
|
" Aperture: %lu megabytes\n"
|
|
" Address size: %u bits\n"
|
|
@@ -141,7 +144,7 @@ static void swiotlb_print_info(unsigned
|
|
* structures for the software IO TLB used to implement the PCI DMA API.
|
|
*/
|
|
void __init
|
|
-swiotlb_init_with_default_size(size_t default_size)
|
|
+swiotlb_init_with_default_size(size_t default_size, int verbose)
|
|
{
|
|
unsigned long i, bytes;
|
|
int rc;
|
|
@@ -212,12 +215,12 @@ swiotlb_init_with_default_size(size_t de
|
|
} while (rc && dma_bits++ < max_dma_bits);
|
|
if (rc)
|
|
panic("No suitable physical memory available for SWIOTLB overflow buffer!\n");
|
|
-
|
|
- swiotlb_print_info(bytes);
|
|
+ if (verbose)
|
|
+ swiotlb_print_info();
|
|
}
|
|
|
|
void __init
|
|
-swiotlb_init(void)
|
|
+swiotlb_init(int verbose)
|
|
{
|
|
long ram_end;
|
|
size_t defsz = 64 * (1 << 20); /* 64MB default size */
|
|
@@ -235,7 +238,7 @@ swiotlb_init(void)
|
|
}
|
|
|
|
if (swiotlb)
|
|
- swiotlb_init_with_default_size(defsz);
|
|
+ swiotlb_init_with_default_size(defsz, verbose);
|
|
else
|
|
printk(KERN_INFO "Software IO TLB disabled\n");
|
|
}
|
|
@@ -424,7 +427,7 @@ do_unmap_single(struct device *hwdev, ch
|
|
|
|
/*
|
|
* Return the buffer to the free list by setting the corresponding
|
|
- * entries to indicate the number of contigous entries available.
|
|
+ * entries to indicate the number of contiguous entries available.
|
|
* While returning the entries to the free list, we merge the entries
|
|
* with slots below and above the pool being returned.
|
|
*/
|