qubes-linux-kernel/patches.xen/xen3-patch-2.6.36
2011-04-19 22:09:59 +02:00

2834 lines
86 KiB
Plaintext

From: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>
Subject: Linux: 2.6.36
Patch-mainline: 2.6.36
This patch contains the differences between 2.6.35 and 2.6.36.
Acked-by: Jeff Mahoney <jeffm@suse.com>
Automatically created from "patches.kernel.org/patch-2.6.36" by xen-port-patches.py
--- head-2011-03-17.orig/arch/x86/Kconfig 2011-02-01 15:03:03.000000000 +0100
+++ head-2011-03-17/arch/x86/Kconfig 2011-02-17 13:43:12.000000000 +0100
@@ -56,7 +56,7 @@ config X86
select HAVE_HW_BREAKPOINT
select HAVE_MIXED_BREAKPOINTS_REGS
select PERF_EVENTS
- select HAVE_PERF_EVENTS_NMI
+ select HAVE_PERF_EVENTS_NMI if !XEN
select ANON_INODES
select HAVE_ARCH_KMEMCHECK
select HAVE_USER_RETURN_NOTIFIER
@@ -248,7 +248,7 @@ config KTIME_SCALAR
config ARCH_CPU_PROBE_RELEASE
def_bool y
- depends on HOTPLUG_CPU
+ depends on HOTPLUG_CPU && !XEN
source "init/Kconfig"
source "kernel/Kconfig.freezer"
@@ -1064,7 +1064,7 @@ config X86_CPUID
choice
prompt "High Memory Support"
- default HIGHMEM64G if X86_NUMAQ
+ default HIGHMEM64G if X86_NUMAQ || XEN
default HIGHMEM4G
depends on X86_32
@@ -1107,7 +1107,7 @@ config NOHIGHMEM
config HIGHMEM4G
bool "4GB"
- depends on !X86_NUMAQ
+ depends on !X86_NUMAQ && !XEN
---help---
Select this if you have a 32-bit processor and between 1 and 4
gigabytes of physical RAM.
--- head-2011-03-17.orig/arch/x86/ia32/ia32entry-xen.S 2011-02-01 15:03:03.000000000 +0100
+++ head-2011-03-17/arch/x86/ia32/ia32entry-xen.S 2011-02-01 15:04:27.000000000 +0100
@@ -47,7 +47,12 @@
/*
* Reload arg registers from stack in case ptrace changed them.
* We don't reload %eax because syscall_trace_enter() returned
- * the value it wants us to use in the table lookup.
+ * the %rax value we should see. Instead, we just truncate that
+ * value to 32 bits again as we did on entry from user mode.
+ * If it's a new value set by user_regset during entry tracing,
+ * this matches the normal truncation of the user-mode value.
+ * If it's -1 to make us punt the syscall, then (u32)-1 is still
+ * an appropriately invalid value.
*/
.macro LOAD_ARGS32 offset, _r9=0
.if \_r9
@@ -57,6 +62,7 @@
movl \offset+48(%rsp),%edx
movl \offset+56(%rsp),%esi
movl \offset+64(%rsp),%edi
+ movl %eax,%eax /* zero extension */
.endm
.macro CFI_STARTPROC32 simple
@@ -151,7 +157,7 @@ ENTRY(ia32_sysenter_target)
movl $AUDIT_ARCH_I386,%edi /* 1st arg: audit arch */
call audit_syscall_entry
movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */
- cmpl $(IA32_NR_syscalls-1),%eax
+ cmpq $(IA32_NR_syscalls-1),%rax
ja ia32_badsys
movl %ebx,%edi /* reload 1st syscall arg */
movl RCX-ARGOFFSET(%rsp),%esi /* reload 2nd syscall arg */
@@ -216,7 +222,7 @@ ENTRY(ia32_cstar_target)
orl $TS_COMPAT,TI_status(%r10)
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
jnz cstar_tracesys
- cmpl $IA32_NR_syscalls-1,%eax
+ cmpq $IA32_NR_syscalls-1,%rax
ja ia32_badsys
cstar_do_call:
IA32_ARG_FIXUP 1
@@ -243,7 +249,7 @@ cstar_tracesys:
LOAD_ARGS32 ARGOFFSET, 1 /* reload args from stack in case ptrace changed it */
RESTORE_REST
xchgl %ebp,%r9d
- cmpl $(IA32_NR_syscalls-1),%eax
+ cmpq $(IA32_NR_syscalls-1),%rax
ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */
jmp cstar_do_call
END(ia32_cstar_target)
@@ -301,7 +307,7 @@ ENTRY(ia32_syscall)
testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
jnz ia32_tracesys
.Lia32_check_call:
- cmpl $(IA32_NR_syscalls-1),%eax
+ cmpq $(IA32_NR_syscalls-1),%rax
ja ia32_badsys
ia32_do_call:
IA32_ARG_FIXUP
@@ -325,7 +331,7 @@ ia32_tracesys:
call syscall_trace_enter
LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
RESTORE_REST
- cmpl $(IA32_NR_syscalls-1),%eax
+ cmpq $(IA32_NR_syscalls-1),%rax
ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */
jmp ia32_do_call
END(ia32_syscall)
@@ -723,4 +729,7 @@ ia32_sys_call_table:
.quad compat_sys_rt_tgsigqueueinfo /* 335 */
.quad sys_perf_event_open
.quad compat_sys_recvmmsg
+ .quad sys_fanotify_init
+ .quad sys32_fanotify_mark
+ .quad sys_prlimit64 /* 340 */
ia32_syscall_end:
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/highmem.h 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/highmem.h 2011-02-01 15:04:27.000000000 +0100
@@ -60,7 +60,7 @@ void *kmap(struct page *page);
void kunmap(struct page *page);
void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot);
void *kmap_atomic(struct page *page, enum km_type type);
-void kunmap_atomic(void *kvaddr, enum km_type type);
+void kunmap_atomic_notypecheck(void *kvaddr, enum km_type type);
void *kmap_atomic_pfn(unsigned long pfn, enum km_type type);
void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot);
struct page *kmap_atomic_to_page(void *ptr);
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/pci.h 2011-02-01 15:03:03.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/pci.h 2011-02-01 15:04:27.000000000 +0100
@@ -30,6 +30,9 @@ extern struct pci_bus *pci_scan_bus_on_n
int node);
extern struct pci_bus *pci_scan_bus_with_sysdata(int busno);
+#ifdef CONFIG_PCI
+
+#ifdef CONFIG_PCI_DOMAINS
static inline int pci_domain_nr(struct pci_bus *bus)
{
struct pci_sysdata *sd = bus->sysdata;
@@ -40,13 +43,12 @@ static inline int pci_proc_domain(struct
{
return pci_domain_nr(bus);
}
-
+#endif
/* Can be used to override the logic in pci_scan_bus for skipping
already-configured bus numbers - to be used for buggy BIOSes
or architectures with incomplete PCI setup by the loader */
-#ifdef CONFIG_PCI
extern unsigned int pcibios_assign_all_busses(void);
extern int pci_legacy_init(void);
# ifdef CONFIG_ACPI
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/perf_event.h 2011-02-01 15:03:10.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/perf_event.h 2011-02-01 15:04:27.000000000 +0100
@@ -19,6 +19,19 @@
_r_->flags & PERF_EFLAGS_EXACT ? _f_ | PERF_RECORD_MISC_EXACT_IP : _f_; \
})
+#include <asm/stacktrace.h>
+
+/*
+ * We abuse bit 3 from flags to pass exact information, see perf_misc_flags
+ * and the comment with PERF_EFLAGS_EXACT.
+ */
+#define perf_arch_fetch_caller_regs(regs, __ip) { \
+ (regs)->ip = (__ip); \
+ (regs)->bp = caller_frame_pointer(); \
+ (regs)->cs = __KERNEL_CS; \
+ regs->flags = 0; \
+}
+
#endif
static inline void init_hw_perf_events(void) {}
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/pgtable-3level.h 2011-02-01 14:44:12.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/pgtable-3level.h 2011-02-01 15:04:27.000000000 +0100
@@ -91,7 +91,7 @@ static inline void pud_clear(pud_t *pudp
static inline pte_t xen_ptep_get_and_clear(pte_t *ptep, pte_t res)
{
uint64_t val = __pte_val(res);
- if (__cmpxchg64(ptep, val, 0) != val) {
+ if (__cmpxchg64(&ptep->pte, val, 0) != val) {
/* xchg acts as a barrier before the setting of the high bits */
res.pte_low = xchg(&ptep->pte_low, 0);
res.pte_high = ptep->pte_high;
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/pgtable_32.h 2011-02-01 15:03:03.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/pgtable_32.h 2011-02-01 15:04:27.000000000 +0100
@@ -25,6 +25,7 @@
struct vm_area_struct;
extern pgd_t *swapper_pg_dir;
+extern pgd_t trampoline_pg_dir[1024];
static inline void pgtable_cache_init(void) { }
static inline void check_pgt_cache(void) { }
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/pgtable_64.h 2011-02-01 15:03:03.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/pgtable_64.h 2011-02-01 15:04:27.000000000 +0100
@@ -133,8 +133,8 @@ static inline int pgd_large(pgd_t pgd) {
/* x86-64 always has all page tables mapped. */
#define pte_offset_map(dir, address) pte_offset_kernel((dir), (address))
#define pte_offset_map_nested(dir, address) pte_offset_kernel((dir), (address))
-#define pte_unmap(pte) /* NOP */
-#define pte_unmap_nested(pte) /* NOP */
+#define pte_unmap(pte) ((void)(pte))/* NOP */
+#define pte_unmap_nested(pte) ((void)(pte)) /* NOP */
#define update_mmu_cache(vma, address, ptep) do { } while (0)
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/processor.h 2011-03-03 16:47:17.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/processor.h 2011-03-03 16:47:27.000000000 +0100
@@ -716,6 +716,7 @@ extern void init_c1e_mask(void);
extern unsigned long boot_option_idle_override;
extern unsigned long idle_halt;
extern unsigned long idle_nomwait;
+extern bool c1e_detected;
#ifndef CONFIG_XEN
/*
@@ -979,4 +980,24 @@ unsigned long calc_aperfmperf_ratio(stru
return ratio;
}
+/*
+ * AMD errata checking
+ */
+#ifdef CONFIG_CPU_SUP_AMD
+extern const int amd_erratum_383[];
+extern const int amd_erratum_400[];
+extern bool cpu_has_amd_erratum(const int *);
+
+#define AMD_LEGACY_ERRATUM(...) { -1, __VA_ARGS__, 0 }
+#define AMD_OSVW_ERRATUM(osvw_id, ...) { osvw_id, __VA_ARGS__, 0 }
+#define AMD_MODEL_RANGE(f, m_start, s_start, m_end, s_end) \
+ ((f << 24) | (m_start << 16) | (s_start << 12) | (m_end << 4) | (s_end))
+#define AMD_MODEL_RANGE_FAMILY(range) (((range) >> 24) & 0xff)
+#define AMD_MODEL_RANGE_START(range) (((range) >> 12) & 0xfff)
+#define AMD_MODEL_RANGE_END(range) ((range) & 0xfff)
+
+#else
+#define cpu_has_amd_erratum(x) (false)
+#endif /* CONFIG_CPU_SUP_AMD */
+
#endif /* _ASM_X86_PROCESSOR_H */
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/system.h 2011-03-03 16:10:31.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/system.h 2011-03-03 16:11:05.000000000 +0100
@@ -441,4 +441,11 @@ static __always_inline void rdtsc_barrie
alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC);
}
+/*
+ * We handle most unaligned accesses in hardware. On the other hand
+ * unaligned DMA can be quite expensive on some Nehalem processors.
+ *
+ * Based on this we disable the IP header alignment in network drivers.
+ */
+#define NET_IP_ALIGN 0
#endif /* _ASM_X86_SYSTEM_H */
--- head-2011-03-17.orig/arch/x86/kernel/acpi/sleep-xen.c 2011-02-01 15:03:10.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/acpi/sleep-xen.c 2011-02-01 15:04:27.000000000 +0100
@@ -2,7 +2,7 @@
* sleep.c - x86-specific ACPI sleep support.
*
* Copyright (C) 2001-2003 Patrick Mochel
- * Copyright (C) 2001-2003 Pavel Machek <pavel@suse.cz>
+ * Copyright (C) 2001-2003 Pavel Machek <pavel@ucw.cz>
*/
#include <linux/acpi.h>
--- head-2011-03-17.orig/arch/x86/kernel/apic/io_apic-xen.c 2011-02-01 15:03:10.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/apic/io_apic-xen.c 2011-02-01 15:04:27.000000000 +0100
@@ -319,14 +319,19 @@ void arch_init_copy_chip_data(struct irq
old_cfg = old_desc->chip_data;
- memcpy(cfg, old_cfg, sizeof(struct irq_cfg));
+ cfg->vector = old_cfg->vector;
+ cfg->move_in_progress = old_cfg->move_in_progress;
+ cpumask_copy(cfg->domain, old_cfg->domain);
+ cpumask_copy(cfg->old_domain, old_cfg->old_domain);
init_copy_irq_2_pin(old_cfg, cfg, node);
}
-static void free_irq_cfg(struct irq_cfg *old_cfg)
+static void free_irq_cfg(struct irq_cfg *cfg)
{
- kfree(old_cfg);
+ free_cpumask_var(cfg->domain);
+ free_cpumask_var(cfg->old_domain);
+ kfree(cfg);
}
void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc)
@@ -1808,6 +1813,8 @@ __apicdebuginit(void) print_IO_APIC(void
struct irq_pin_list *entry;
cfg = desc->chip_data;
+ if (!cfg)
+ continue;
entry = cfg->irq_2_pin;
if (!entry)
continue;
@@ -3498,7 +3505,7 @@ static int set_msi_irq_affinity(unsigned
cfg = desc->chip_data;
- read_msi_msg_desc(desc, &msg);
+ get_cached_msi_msg_desc(desc, &msg);
msg.data &= ~MSI_DATA_VECTOR_MASK;
msg.data |= MSI_DATA_VECTOR(cfg->vector);
--- head-2011-03-17.orig/arch/x86/kernel/cpu/common-xen.c 2011-03-17 14:42:47.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/cpu/common-xen.c 2011-03-17 14:43:00.000000000 +0100
@@ -150,10 +150,18 @@ EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
static int __init x86_xsave_setup(char *s)
{
setup_clear_cpu_cap(X86_FEATURE_XSAVE);
+ setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
return 1;
}
__setup("noxsave", x86_xsave_setup);
+static int __init x86_xsaveopt_setup(char *s)
+{
+ setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
+ return 1;
+}
+__setup("noxsaveopt", x86_xsaveopt_setup);
+
#ifdef CONFIG_X86_32
static int cachesize_override __cpuinitdata = -1;
@@ -568,7 +576,7 @@ void __cpuinit cpu_detect(struct cpuinfo
}
}
-static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c)
+void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c)
{
u32 tfms, xlvl;
u32 ebx;
@@ -582,6 +590,16 @@ static void __cpuinit get_cpu_cap(struct
c->x86_capability[4] = excap;
}
+ /* Additional Intel-defined flags: level 0x00000007 */
+ if (c->cpuid_level >= 0x00000007) {
+ u32 eax, ebx, ecx, edx;
+
+ cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx);
+
+ if (eax > 0)
+ c->x86_capability[9] = ebx;
+ }
+
/* AMD-defined flags: level 0x80000001 */
xlvl = cpuid_eax(0x80000000);
c->extended_cpuid_level = xlvl;
@@ -607,6 +625,7 @@ static void __cpuinit get_cpu_cap(struct
if (c->extended_cpuid_level >= 0x80000007)
c->x86_power = cpuid_edx(0x80000007);
+ init_scattered_cpuid_features(c);
}
static void __cpuinit identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
@@ -764,7 +783,6 @@ static void __cpuinit generic_identify(s
get_model_name(c); /* Default name */
- init_scattered_cpuid_features(c);
detect_nopl(c);
}
@@ -1273,6 +1291,7 @@ void __cpuinit cpu_init(void)
dbg_restore_debug_regs();
fpu_init();
+ xsave_init();
#ifndef CONFIG_XEN
raw_local_save_flags(kernel_eflags);
@@ -1343,12 +1362,7 @@ void __cpuinit cpu_init(void)
clear_used_math();
mxcsr_feature_mask_init();
- /*
- * Boot processor to setup the FP and extended state context info.
- */
- if (smp_processor_id() == boot_cpu_id)
- init_thread_xstate();
-
+ fpu_init();
xsave_init();
}
#endif
--- head-2011-03-17.orig/arch/x86/kernel/cpu/intel.c 2011-02-01 15:03:03.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/cpu/intel.c 2011-02-01 15:04:27.000000000 +0100
@@ -288,6 +288,7 @@ static void __cpuinit intel_workarounds(
}
#endif
+#ifndef CONFIG_XEN
static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
{
#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
@@ -306,7 +307,6 @@ static void __cpuinit srat_detect_node(s
#endif
}
-#ifndef CONFIG_XEN
/*
* find out the number of processor cores on the die
*/
@@ -324,7 +324,6 @@ static int __cpuinit intel_num_cpu_cores
else
return 1;
}
-#endif
static void __cpuinit detect_vmx_virtcap(struct cpuinfo_x86 *c)
{
@@ -363,6 +362,7 @@ static void __cpuinit detect_vmx_virtcap
set_cpu_cap(c, X86_FEATURE_VPID);
}
}
+#endif
static void __cpuinit init_intel(struct cpuinfo_x86 *c)
{
@@ -459,13 +459,13 @@ static void __cpuinit init_intel(struct
detect_ht(c);
#endif
}
-#endif
/* Work around errata */
srat_detect_node(c);
if (cpu_has(c, X86_FEATURE_VMX))
detect_vmx_virtcap(c);
+#endif
}
#ifdef CONFIG_X86_32
--- head-2011-03-17.orig/arch/x86/kernel/cpu/scattered.c 2011-03-17 14:35:43.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/cpu/scattered.c 2011-02-01 15:04:27.000000000 +0100
@@ -40,6 +40,7 @@ void __cpuinit init_scattered_cpuid_feat
{ X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 },
{ X86_FEATURE_XSAVEOPT, CR_EAX, 0, 0x0000000d, 1 },
{ X86_FEATURE_CPB, CR_EDX, 9, 0x80000007, 0 },
+#ifndef CONFIG_XEN
{ X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a, 0 },
{ X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a, 0 },
{ X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a, 0 },
@@ -50,6 +51,7 @@ void __cpuinit init_scattered_cpuid_feat
{ X86_FEATURE_DECODEASSISTS, CR_EDX, 7, 0x8000000a, 0 },
{ X86_FEATURE_PAUSEFILTER, CR_EDX,10, 0x8000000a, 0 },
{ X86_FEATURE_PFTHRESHOLD, CR_EDX,12, 0x8000000a, 0 },
+#endif
{ 0, 0, 0, 0, 0 }
};
--- head-2011-03-17.orig/arch/x86/kernel/entry_32-xen.S 2011-02-01 15:03:10.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/entry_32-xen.S 2011-02-01 15:04:27.000000000 +0100
@@ -655,14 +655,14 @@ ldt_ss:
* compensating for the offset by changing to the ESPFIX segment with
* a base address that matches for the difference.
*/
+#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8)
mov %esp, %edx /* load kernel esp */
mov PT_OLDESP(%esp), %eax /* load userspace esp */
mov %dx, %ax /* eax: new kernel esp */
sub %eax, %edx /* offset (low word is 0) */
- PER_CPU(gdt_page, %ebx)
shr $16, %edx
- mov %dl, GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx) /* bits 16..23 */
- mov %dh, GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx) /* bits 24..31 */
+ mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */
+ mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */
pushl $__ESPFIX_SS
CFI_ADJUST_CFA_OFFSET 4
push %eax /* new kernel esp */
@@ -861,9 +861,8 @@ ptregs_clone:
* normal stack and adjusts ESP with the matching offset.
*/
/* fixup the stack */
- PER_CPU(gdt_page, %ebx)
- mov GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx), %al /* bits 16..23 */
- mov GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx), %ah /* bits 24..31 */
+ mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */
+ mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */
shl $16, %eax
addl %esp, %eax /* the adjusted stack pointer */
pushl $__KERNEL_DS
@@ -1132,7 +1131,7 @@ ENTRY(simd_coprocessor_error)
.balign 4
.long 661b
.long 663f
- .byte X86_FEATURE_XMM
+ .word X86_FEATURE_XMM
.byte 662b-661b
.byte 664f-663f
.previous
--- head-2011-03-17.orig/arch/x86/kernel/entry_64-xen.S 2011-02-01 15:03:10.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/entry_64-xen.S 2011-02-01 15:04:27.000000000 +0100
@@ -1112,13 +1112,13 @@ END(kernel_thread_helper)
* execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
*
* C extern interface:
- * extern long execve(char *name, char **argv, char **envp)
+ * extern long execve(const char *name, char **argv, char **envp)
*
* asm input arguments:
* rdi: name, rsi: argv, rdx: envp
*
* We want to fallback into:
- * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs *regs)
+ * extern long sys_execve(const char *name, char **argv,char **envp, struct pt_regs *regs)
*
* do_sys_execve asm fallback arguments:
* rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack
--- head-2011-03-17.orig/arch/x86/kernel/mpparse-xen.c 2011-02-01 15:03:10.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/mpparse-xen.c 2011-02-01 15:04:27.000000000 +0100
@@ -288,6 +288,20 @@ static void __init smp_dump_mptable(stru
void __init default_smp_read_mpc_oem(struct mpc_table *mpc) { }
+static void __init smp_register_lapic_address(unsigned long address)
+{
+#ifndef CONFIG_XEN
+ mp_lapic_addr = address;
+
+ set_fixmap_nocache(FIX_APIC_BASE, address);
+ if (boot_cpu_physical_apicid == -1U) {
+ boot_cpu_physical_apicid = read_apic_id();
+ apic_version[boot_cpu_physical_apicid] =
+ GET_APIC_VERSION(apic_read(APIC_LVR));
+ }
+#endif
+}
+
static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
{
char str[16];
@@ -311,6 +325,10 @@ static int __init smp_read_mpc(struct mp
if (early)
return 1;
+ /* Initialize the lapic mapping */
+ if (!acpi_lapic)
+ smp_register_lapic_address(mpc->lapic);
+
if (mpc->oemptr)
x86_init.mpparse.smp_read_mpc_oem(mpc);
--- head-2011-03-17.orig/arch/x86/kernel/pci-dma-xen.c 2011-02-01 15:03:10.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/pci-dma-xen.c 2011-02-01 15:04:27.000000000 +0100
@@ -142,12 +142,23 @@ static struct dma_map_ops swiotlb_dma_op
.dma_supported = swiotlb_dma_supported
};
+#define pci_xen_swiotlb_detect() 1
+
+static void __init pci_xen_swiotlb_init(void)
+{
+ swiotlb_init(1);
+ if (swiotlb) {
+ printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n");
+ dma_ops = &swiotlb_dma_ops;
+ }
+}
+
void __init pci_iommu_alloc(void)
{
/* free the range so iommu could get some range less than 4G */
dma32_free_bootmem();
- if (pci_swiotlb_detect())
+ if (pci_xen_swiotlb_detect() || pci_swiotlb_detect())
goto out;
gart_iommu_hole_init();
@@ -159,11 +170,7 @@ void __init pci_iommu_alloc(void)
/* needs to be called after gart_iommu_hole_init */
amd_iommu_detect();
out:
- swiotlb_init(1);
- if (swiotlb) {
- printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n");
- dma_ops = &swiotlb_dma_ops;
- }
+ pci_xen_swiotlb_init();
}
void *dma_generic_alloc_coherent(struct device *dev, size_t size,
@@ -376,7 +383,7 @@ static int __init pci_iommu_init(void)
x86_init.iommu.iommu_init();
#ifndef CONFIG_XEN
- if (swiotlb) {
+ if (swiotlb || xen_swiotlb) {
printk(KERN_INFO "PCI-DMA: "
"Using software bounce buffering for IO (SWIOTLB)\n");
swiotlb_print_info();
--- head-2011-03-17.orig/arch/x86/kernel/process-xen.c 2011-03-03 16:10:40.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/process-xen.c 2011-03-03 16:11:01.000000000 +0100
@@ -29,6 +29,7 @@ unsigned long idle_nomwait;
EXPORT_SYMBOL(idle_nomwait);
struct kmem_cache *task_xstate_cachep;
+EXPORT_SYMBOL_GPL(task_xstate_cachep);
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
@@ -287,8 +288,9 @@ EXPORT_SYMBOL(kernel_thread);
/*
* sys_execve() executes a new program.
*/
-long sys_execve(char __user *name, char __user * __user *argv,
- char __user * __user *envp, struct pt_regs *regs)
+long sys_execve(const char __user *name,
+ const char __user *const __user *argv,
+ const char __user *const __user *envp, struct pt_regs *regs)
{
long error;
char *filename;
@@ -328,7 +330,7 @@ EXPORT_SYMBOL(pm_idle);
*/
void xen_idle(void)
{
- trace_power_start(POWER_CSTATE, 1);
+ trace_power_start(POWER_CSTATE, 1, smp_processor_id());
current_thread_info()->status &= ~TS_POLLING;
/*
* TS_POLLING-cleared state must be visible before we
@@ -394,7 +396,7 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
*/
void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
{
- trace_power_start(POWER_CSTATE, (ax>>4)+1);
+ trace_power_start(POWER_CSTATE, (ax>>4)+1, smp_processor_id());
if (!need_resched()) {
if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
clflush((void *)&current_thread_info()->flags);
@@ -410,7 +412,7 @@ void mwait_idle_with_hints(unsigned long
static void mwait_idle(void)
{
if (!need_resched()) {
- trace_power_start(POWER_CSTATE, 1);
+ trace_power_start(POWER_CSTATE, 1, smp_processor_id());
if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
clflush((void *)&current_thread_info()->flags);
@@ -432,7 +434,7 @@ static void mwait_idle(void)
*/
static void poll_idle(void)
{
- trace_power_start(POWER_CSTATE, 0);
+ trace_power_start(POWER_CSTATE, 0, smp_processor_id());
local_irq_enable();
while (!need_resched())
cpu_relax();
@@ -480,44 +482,10 @@ static int __cpuinit mwait_usable(const
return (edx & MWAIT_EDX_C1);
}
-/*
- * Check for AMD CPUs, where APIC timer interrupt does not wake up CPU from C1e.
- * For more information see
- * - Erratum #400 for NPT family 0xf and family 0x10 CPUs
- * - Erratum #365 for family 0x11 (not affected because C1e not in use)
- */
-static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c)
-{
- u64 val;
- if (c->x86_vendor != X86_VENDOR_AMD)
- goto no_c1e_idle;
-
- /* Family 0x0f models < rev F do not have C1E */
- if (c->x86 == 0x0F && c->x86_model >= 0x40)
- return 1;
-
- if (c->x86 == 0x10) {
- /*
- * check OSVW bit for CPUs that are not affected
- * by erratum #400
- */
- if (cpu_has(c, X86_FEATURE_OSVW)) {
- rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, val);
- if (val >= 2) {
- rdmsrl(MSR_AMD64_OSVW_STATUS, val);
- if (!(val & BIT(1)))
- goto no_c1e_idle;
- }
- }
- return 1;
- }
-
-no_c1e_idle:
- return 0;
-}
+bool c1e_detected;
+EXPORT_SYMBOL(c1e_detected);
static cpumask_var_t c1e_mask;
-static int c1e_detected;
void c1e_remove_cpu(int cpu)
{
@@ -539,12 +507,12 @@ static void c1e_idle(void)
u32 lo, hi;
rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi);
+
if (lo & K8_INTP_C1E_ACTIVE_MASK) {
- c1e_detected = 1;
+ c1e_detected = true;
if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC))
mark_tsc_unstable("TSC halt in AMD C1E");
printk(KERN_INFO "System has AMD C1E enabled\n");
- set_cpu_cap(&boot_cpu_data, X86_FEATURE_AMDC1E);
}
}
@@ -595,7 +563,8 @@ void __cpuinit select_idle_routine(const
*/
printk(KERN_INFO "using mwait in idle threads.\n");
pm_idle = mwait_idle;
- } else if (check_c1e_idle(c)) {
+ } else if (cpu_has_amd_erratum(amd_erratum_400)) {
+ /* E400: APIC timer interrupt does not wake up CPU from C1e */
printk(KERN_INFO "using C1E aware idle routine\n");
pm_idle = c1e_idle;
} else
--- head-2011-03-17.orig/arch/x86/kernel/process_32-xen.c 2011-02-02 08:47:43.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/process_32-xen.c 2011-02-02 08:47:59.000000000 +0100
@@ -59,6 +59,8 @@
#include <asm/syscalls.h>
#include <asm/debugreg.h>
+#include <trace/events/power.h>
+
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
asmlinkage void cstar_ret_from_fork(void) __asm__("cstar_ret_from_fork");
@@ -114,6 +116,8 @@ void cpu_idle(void)
stop_critical_timings();
xen_idle();
start_critical_timings();
+
+ trace_power_end(smp_processor_id());
}
tick_nohz_restart_sched_tick();
preempt_enable_no_resched();
--- head-2011-03-17.orig/arch/x86/kernel/process_64-xen.c 2011-02-02 08:47:47.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/process_64-xen.c 2011-02-02 08:47:56.000000000 +0100
@@ -56,6 +56,8 @@
#include <asm/syscalls.h>
#include <asm/debugreg.h>
+#include <trace/events/power.h>
+
asmlinkage extern void ret_from_fork(void);
static DEFINE_PER_CPU(unsigned char, is_idle);
@@ -142,6 +144,9 @@ void cpu_idle(void)
stop_critical_timings();
xen_idle();
start_critical_timings();
+
+ trace_power_end(smp_processor_id());
+
/* In many cases the interrupt that ended idle
has already called exit_idle. But some idle
loops can be woken up without interrupt. */
--- head-2011-03-17.orig/arch/x86/kernel/setup-xen.c 2011-03-03 16:24:49.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/setup-xen.c 2011-03-03 16:25:01.000000000 +0100
@@ -102,6 +102,7 @@
#include <asm/paravirt.h>
#include <asm/hypervisor.h>
+#include <asm/olpc_ofw.h>
#include <asm/percpu.h>
#include <asm/topology.h>
@@ -826,10 +827,15 @@ void __init setup_arch(char **cmdline_p)
/* VMI may relocate the fixmap; do this before touching ioremap area */
vmi_init();
+ /* OFW also may relocate the fixmap */
+ olpc_ofw_detect();
+
early_trap_init();
early_cpu_init();
early_ioremap_init();
+ setup_olpc_ofw_pgd();
+
#ifndef CONFIG_XEN
ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev);
screen_info = boot_params.screen_info;
@@ -1143,6 +1149,8 @@ void __init setup_arch(char **cmdline_p)
paging_init();
x86_init.paging.pagetable_setup_done(swapper_pg_dir);
+ setup_trampoline_page_table();
+
tboot_probe();
#ifdef CONFIG_X86_64
--- head-2011-03-17.orig/arch/x86/kernel/traps-xen.c 2011-02-01 15:03:10.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/traps-xen.c 2011-02-01 15:04:27.000000000 +0100
@@ -385,7 +385,13 @@ static notrace __kprobes void default_do
if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
== NOTIFY_STOP)
return;
+
#ifdef CONFIG_X86_LOCAL_APIC
+ if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
+ == NOTIFY_STOP)
+ return;
+
+#ifndef CONFIG_LOCKUP_DETECTOR
/*
* Ok, so this is none of the documented NMI sources,
* so it must be the NMI watchdog.
@@ -393,6 +399,7 @@ static notrace __kprobes void default_do
if (nmi_watchdog_tick(regs, reason))
return;
if (!do_nmi_callback(regs, cpu))
+#endif /* !CONFIG_LOCKUP_DETECTOR */
unknown_nmi_error(reason, regs);
#else
unknown_nmi_error(reason, regs);
--- head-2011-03-17.orig/arch/x86/kernel/vsyscall_64-xen.c 2011-02-01 15:03:03.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/vsyscall_64-xen.c 2011-02-01 15:04:27.000000000 +0100
@@ -73,8 +73,8 @@ void update_vsyscall_tz(void)
write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
}
-void update_vsyscall(struct timespec *wall_time, struct clocksource *clock,
- u32 mult)
+void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
+ struct clocksource *clock, u32 mult)
{
unsigned long flags;
@@ -87,7 +87,7 @@ void update_vsyscall(struct timespec *wa
vsyscall_gtod_data.clock.shift = clock->shift;
vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
- vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic;
+ vsyscall_gtod_data.wall_to_monotonic = *wtm;
vsyscall_gtod_data.wall_time_coarse = __current_kernel_time();
write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
}
@@ -169,13 +169,18 @@ int __vsyscall(0) vgettimeofday(struct t
* unlikely */
time_t __vsyscall(1) vtime(time_t *t)
{
- struct timeval tv;
+ unsigned seq;
time_t result;
if (unlikely(!__vsyscall_gtod_data.sysctl_enabled))
return time_syscall(t);
- vgettimeofday(&tv, NULL);
- result = tv.tv_sec;
+ do {
+ seq = read_seqbegin(&__vsyscall_gtod_data.lock);
+
+ result = __vsyscall_gtod_data.wall_time_sec;
+
+ } while (read_seqretry(&__vsyscall_gtod_data.lock, seq));
+
if (t)
*t = result;
return result;
--- head-2011-03-17.orig/arch/x86/mm/dump_pagetables-xen.c 2011-02-01 14:50:44.000000000 +0100
+++ head-2011-03-17/arch/x86/mm/dump_pagetables-xen.c 2011-02-01 15:04:27.000000000 +0100
@@ -40,6 +40,29 @@ struct addr_marker {
const char *name;
};
+/* indices for address_markers; keep sync'd w/ address_markers below */
+enum address_markers_idx {
+ USER_SPACE_NR = 0,
+#ifdef CONFIG_X86_64
+ XEN_SPACE_NR,
+ LOW_KERNEL_NR,
+ VMALLOC_START_NR,
+ VMEMMAP_START_NR,
+ HIGH_KERNEL_NR,
+ MODULES_VADDR_NR,
+ MODULES_END_NR,
+#else
+ KERNEL_SPACE_NR,
+ VMALLOC_START_NR,
+ VMALLOC_END_NR,
+# ifdef CONFIG_HIGHMEM
+ PKMAP_BASE_NR,
+# endif
+ FIXADDR_START_NR,
+ XEN_SPACE_NR,
+#endif
+};
+
/* Address space markers hints */
static struct addr_marker address_markers[] = {
{ 0, "User Space" },
@@ -346,16 +369,13 @@ static int __init pt_dump_init(void)
#ifdef CONFIG_X86_32
/* Not a compile-time constant on x86-32 */
- address_markers[2].start_address = VMALLOC_START;
- address_markers[3].start_address = VMALLOC_END;
+ address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
+ address_markers[VMALLOC_END_NR].start_address = VMALLOC_END;
# ifdef CONFIG_HIGHMEM
- address_markers[4].start_address = PKMAP_BASE;
- address_markers[5].start_address = FIXADDR_START;
- address_markers[6].start_address = hypervisor_virt_start;
-# else
- address_markers[4].start_address = FIXADDR_START;
- address_markers[5].start_address = hypervisor_virt_start;
+ address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE;
# endif
+ address_markers[FIXADDR_START_NR].start_address = FIXADDR_START;
+ address_markers[XEN_SPACE_NR].start_address = hypervisor_virt_start;
#endif
pe = debugfs_create_file("kernel_page_tables", 0600, NULL, NULL,
--- head-2011-03-17.orig/arch/x86/mm/fault-xen.c 2011-02-01 14:55:46.000000000 +0100
+++ head-2011-03-17/arch/x86/mm/fault-xen.c 2011-02-01 15:04:27.000000000 +0100
@@ -821,8 +821,10 @@ do_sigbus(struct pt_regs *regs, unsigned
up_read(&mm->mmap_sem);
/* Kernel mode? Handle exceptions or die: */
- if (!(error_code & PF_USER))
+ if (!(error_code & PF_USER)) {
no_context(regs, error_code, address);
+ return;
+ }
/* User-space => ok to do another page fault: */
if (is_prefetch(regs, error_code, address))
--- head-2011-03-17.orig/arch/x86/mm/highmem_32-xen.c 2011-02-01 14:54:13.000000000 +0100
+++ head-2011-03-17/arch/x86/mm/highmem_32-xen.c 2011-02-01 15:04:27.000000000 +0100
@@ -53,7 +53,7 @@ void *kmap_atomic(struct page *page, enu
return kmap_atomic_prot(page, type, kmap_prot);
}
-void kunmap_atomic(void *kvaddr, enum km_type type)
+void kunmap_atomic_notypecheck(void *kvaddr, enum km_type type)
{
unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
@@ -147,7 +147,7 @@ void copy_highpage(struct page *to, stru
EXPORT_SYMBOL(kmap);
EXPORT_SYMBOL(kunmap);
EXPORT_SYMBOL(kmap_atomic);
-EXPORT_SYMBOL(kunmap_atomic);
+EXPORT_SYMBOL(kunmap_atomic_notypecheck);
EXPORT_SYMBOL(kmap_atomic_prot);
EXPORT_SYMBOL(kmap_atomic_to_page);
EXPORT_SYMBOL(clear_highpage);
--- head-2011-03-17.orig/arch/x86/mm/init_64-xen.c 2011-02-01 15:03:03.000000000 +0100
+++ head-2011-03-17/arch/x86/mm/init_64-xen.c 2011-02-01 15:04:27.000000000 +0100
@@ -2,7 +2,7 @@
* linux/arch/x86_64/mm/init.c
*
* Copyright (C) 1995 Linus Torvalds
- * Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
+ * Copyright (C) 2000 Pavel Machek <pavel@ucw.cz>
* Copyright (C) 2002,2003 Andi Kleen <ak@suse.de>
*
* Jun Nakajima <jun.nakajima@intel.com>
--- head-2011-03-17.orig/arch/x86/mm/iomap_32-xen.c 2011-02-01 14:54:13.000000000 +0100
+++ head-2011-03-17/arch/x86/mm/iomap_32-xen.c 2011-02-01 15:04:27.000000000 +0100
@@ -75,7 +75,7 @@ void *kmap_atomic_prot_pfn(unsigned long
/*
* Map 'mfn' using fixed map 'type' and protections 'prot'
*/
-void *
+void __iomem *
iomap_atomic_prot_pfn(unsigned long mfn, enum km_type type, pgprot_t prot)
{
/*
@@ -88,12 +88,12 @@ iomap_atomic_prot_pfn(unsigned long mfn,
prot = PAGE_KERNEL_UC_MINUS;
pgprot_val(prot) |= _PAGE_IOMAP;
- return kmap_atomic_prot_pfn(mfn, type, prot);
+ return (void __force __iomem *) kmap_atomic_prot_pfn(mfn, type, prot);
}
EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn);
void
-iounmap_atomic(void *kvaddr, enum km_type type)
+iounmap_atomic(void __iomem *kvaddr, enum km_type type)
{
unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK;
enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id();
--- head-2011-03-17.orig/arch/x86/mm/ioremap-xen.c 2011-02-07 15:41:54.000000000 +0100
+++ head-2011-03-17/arch/x86/mm/ioremap-xen.c 2011-02-07 15:42:02.000000000 +0100
@@ -221,7 +221,7 @@ static void __iomem *__ioremap_caller(re
unsigned long size, unsigned long prot_val, void *caller)
{
unsigned long offset, vaddr;
- phys_addr_t mfn, last_addr;
+ phys_addr_t mfn, last_mfn, last_addr;
const resource_size_t unaligned_phys_addr = phys_addr;
const unsigned long unaligned_size = size;
struct vm_struct *area;
@@ -259,7 +259,8 @@ static void __iomem *__ioremap_caller(re
/*
* Don't allow anybody to remap normal RAM that we're using..
*/
- for (mfn = PFN_DOWN(phys_addr); mfn < PFN_UP(last_addr); mfn++) {
+ last_mfn = PFN_DOWN(last_addr);
+ for (mfn = PFN_DOWN(phys_addr); mfn <= last_mfn; mfn++) {
unsigned long pfn = mfn_to_local_pfn(mfn);
if (pfn_valid(pfn)) {
@@ -274,7 +275,7 @@ static void __iomem *__ioremap_caller(re
* Mappings have to be page-aligned
*/
offset = phys_addr & ~PAGE_MASK;
- phys_addr &= PAGE_MASK;
+ phys_addr &= PHYSICAL_PAGE_MASK;
size = PAGE_ALIGN(last_addr+1) - phys_addr;
retval = reserve_memtype(phys_addr, (u64)phys_addr + size,
@@ -798,7 +799,7 @@ void __init early_iounmap(void __iomem *
return;
}
offset = virt_addr & ~PAGE_MASK;
- nrpages = PAGE_ALIGN(offset + size - 1) >> PAGE_SHIFT;
+ nrpages = PAGE_ALIGN(offset + size) >> PAGE_SHIFT;
idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot;
while (nrpages > 0) {
--- head-2011-03-17.orig/arch/x86/pci/irq-xen.c 2011-02-01 15:03:10.000000000 +0100
+++ head-2011-03-17/arch/x86/pci/irq-xen.c 2011-02-01 15:04:27.000000000 +0100
@@ -995,7 +995,7 @@ static int pcibios_lookup_irq(struct pci
dev_info(&dev->dev, "%s PCI INT %c -> IRQ %d\n", msg, 'A' + pin - 1, irq);
/* Update IRQ for all devices with the same pirq value */
- while ((dev2 = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev2)) != NULL) {
+ for_each_pci_dev(dev2) {
pci_read_config_byte(dev2, PCI_INTERRUPT_PIN, &pin);
if (!pin)
continue;
@@ -1034,7 +1034,7 @@ void __init pcibios_fixup_irqs(void)
u8 pin;
DBG(KERN_DEBUG "PCI: IRQ fixup\n");
- while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
+ for_each_pci_dev(dev) {
/*
* If the BIOS has set an out of range IRQ number, just
* ignore it. Also keep track of which IRQ's are
@@ -1058,7 +1058,7 @@ void __init pcibios_fixup_irqs(void)
return;
dev = NULL;
- while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
+ for_each_pci_dev(dev) {
pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin);
if (!pin)
continue;
--- head-2011-03-17.orig/arch/x86/vdso/vdso32-setup-xen.c 2011-02-01 14:55:46.000000000 +0100
+++ head-2011-03-17/arch/x86/vdso/vdso32-setup-xen.c 2011-02-01 15:04:27.000000000 +0100
@@ -413,11 +413,7 @@ int arch_setup_additional_pages(struct l
#ifdef CONFIG_X86_64
-/*
- * This must be done early in case we have an initrd containing 32-bit
- * binaries (e.g., hotplug). This could be pushed upstream.
- */
-core_initcall(sysenter_setup);
+subsys_initcall(sysenter_setup);
#ifdef CONFIG_SYSCTL
/* Register vsyscall32 into the ABI table */
--- head-2011-03-17.orig/arch/x86/xen/Kconfig 2011-02-01 14:39:24.000000000 +0100
+++ head-2011-03-17/arch/x86/xen/Kconfig 2011-02-01 15:04:27.000000000 +0100
@@ -25,7 +25,7 @@ config XEN_PRIVILEGED_GUEST
config XEN_PVHVM
def_bool y
- depends on XEN
+ depends on PARAVIRT_XEN
depends on X86_LOCAL_APIC
config XEN_MAX_DOMAIN_MEMORY
--- head-2011-03-17.orig/arch/x86/xen/enlighten.c 2011-03-17 14:35:43.000000000 +0100
+++ head-2011-03-17/arch/x86/xen/enlighten.c 2011-02-01 15:04:27.000000000 +0100
@@ -115,8 +115,8 @@ static int have_vcpu_info_placement = 1;
static void clamp_max_cpus(void)
{
#ifdef CONFIG_SMP
- if (setup_max_cpus > MAX_VIRT_CPUS)
- setup_max_cpus = MAX_VIRT_CPUS;
+ if (setup_max_cpus > XEN_LEGACY_MAX_VCPUS)
+ setup_max_cpus = XEN_LEGACY_MAX_VCPUS;
#endif
}
@@ -128,11 +128,11 @@ static void xen_vcpu_setup(int cpu)
BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info);
- if (cpu < MAX_VIRT_CPUS)
+ if (cpu < XEN_LEGACY_MAX_VCPUS)
per_cpu(xen_vcpu,cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu];
if (!have_vcpu_info_placement) {
- if (cpu >= MAX_VIRT_CPUS)
+ if (cpu >= XEN_LEGACY_MAX_VCPUS)
clamp_max_cpus();
return;
}
--- head-2011-03-17.orig/drivers/hwmon/Kconfig 2011-03-11 11:00:24.000000000 +0100
+++ head-2011-03-17/drivers/hwmon/Kconfig 2011-02-01 15:04:27.000000000 +0100
@@ -400,7 +400,7 @@ config SENSORS_CORETEMP
config SENSORS_PKGTEMP
tristate "Intel processor package temperature sensor"
- depends on X86 && EXPERIMENTAL
+ depends on X86 && !XEN && EXPERIMENTAL
help
If you say yes here you get support for the package level temperature
sensor inside your CPU. Check documentation/driver for details.
--- head-2011-03-17.orig/drivers/hwmon/coretemp-xen.c 2011-02-01 15:03:10.000000000 +0100
+++ head-2011-03-17/drivers/hwmon/coretemp-xen.c 2011-02-01 15:04:27.000000000 +0100
@@ -583,15 +583,16 @@ static int __init coretemp_init(void)
if (err)
goto exit_driver_unreg;
+#ifndef CONFIG_ACPI_HOTPLUG_CPU
if (list_empty(&pdev_list)) {
+ unregister_pcpu_notifier(&coretemp_cpu_notifier);
err = -ENODEV;
- goto exit_notifier_unreg;
+ goto exit_driver_unreg;
}
+#endif
return 0;
-exit_notifier_unreg:
- unregister_pcpu_notifier(&coretemp_cpu_notifier);
exit_driver_unreg:
platform_driver_unregister(&coretemp_driver);
exit:
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ head-2011-03-17/drivers/hwmon/pkgtemp-xen.c 2011-02-01 15:04:27.000000000 +0100
@@ -0,0 +1,452 @@
+/*
+ * pkgtemp.c - Linux kernel module for processor package hardware monitoring
+ *
+ * Copyright (C) 2010 Fenghua Yu <fenghua.yu@intel.com>
+ *
+ * Inspired from many hwmon drivers especially coretemp.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301 USA.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/jiffies.h>
+#include <linux/hwmon.h>
+#include <linux/sysfs.h>
+#include <linux/hwmon-sysfs.h>
+#include <linux/err.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/platform_device.h>
+#include <asm/msr.h>
+#include <xen/pcpu.h>
+#include "../xen/core/domctl.h"
+
+#define DRVNAME "pkgtemp"
+#define pkgtemp_data pdev_entry
+
+enum { SHOW_TEMP, SHOW_TJMAX, SHOW_TTARGET, SHOW_LABEL, SHOW_NAME };
+
+/*
+ * Functions declaration
+ */
+
+static struct pkgtemp_data *pkgtemp_update_device(struct device *dev);
+
+struct pdev_entry {
+ struct list_head list;
+ struct platform_device *pdev;
+ struct device *hwmon_dev;
+ struct mutex update_lock;
+ const char *name;
+ u32 phys_proc_id;
+ char valid; /* zero until following fields are valid */
+ unsigned long last_updated; /* in jiffies */
+ int temp;
+ int tjmax;
+ int ttarget;
+ u8 alarm;
+};
+
+/*
+ * Sysfs stuff
+ */
+
+static ssize_t show_name(struct device *dev, struct device_attribute
+ *devattr, char *buf)
+{
+ int ret;
+ struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
+ struct pkgtemp_data *data = dev_get_drvdata(dev);
+
+ if (attr->index == SHOW_NAME)
+ ret = sprintf(buf, "%s\n", data->name);
+ else /* show label */
+ ret = sprintf(buf, "physical id %d\n",
+ data->phys_proc_id);
+ return ret;
+}
+
+static ssize_t show_alarm(struct device *dev, struct device_attribute
+ *devattr, char *buf)
+{
+ struct pkgtemp_data *data = pkgtemp_update_device(dev);
+ /* read the Out-of-spec log, never clear */
+ return sprintf(buf, "%d\n", data->alarm);
+}
+
+static ssize_t show_temp(struct device *dev,
+ struct device_attribute *devattr, char *buf)
+{
+ struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr);
+ struct pkgtemp_data *data = pkgtemp_update_device(dev);
+ int err = 0;
+
+ if (attr->index == SHOW_TEMP)
+ err = data->valid ? sprintf(buf, "%d\n", data->temp) : -EAGAIN;
+ else if (attr->index == SHOW_TJMAX)
+ err = sprintf(buf, "%d\n", data->tjmax);
+ else
+ err = sprintf(buf, "%d\n", data->ttarget);
+ return err;
+}
+
+static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, show_temp, NULL, SHOW_TEMP);
+static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, show_temp, NULL, SHOW_TJMAX);
+static SENSOR_DEVICE_ATTR(temp1_max, S_IRUGO, show_temp, NULL, SHOW_TTARGET);
+static DEVICE_ATTR(temp1_crit_alarm, S_IRUGO, show_alarm, NULL);
+static SENSOR_DEVICE_ATTR(temp1_label, S_IRUGO, show_name, NULL, SHOW_LABEL);
+static SENSOR_DEVICE_ATTR(name, S_IRUGO, show_name, NULL, SHOW_NAME);
+
+static struct attribute *pkgtemp_attributes[] = {
+ &sensor_dev_attr_name.dev_attr.attr,
+ &sensor_dev_attr_temp1_label.dev_attr.attr,
+ &dev_attr_temp1_crit_alarm.attr,
+ &sensor_dev_attr_temp1_input.dev_attr.attr,
+ &sensor_dev_attr_temp1_crit.dev_attr.attr,
+ NULL
+};
+
+static const struct attribute_group pkgtemp_group = {
+ .attrs = pkgtemp_attributes,
+};
+
+static struct pkgtemp_data *pkgtemp_update_device(struct device *dev)
+{
+ struct pkgtemp_data *data = dev_get_drvdata(dev);
+ int err;
+
+ mutex_lock(&data->update_lock);
+
+ if (!data->valid || time_after(jiffies, data->last_updated + HZ)) {
+ u32 eax, edx;
+
+ data->valid = 0;
+ err = rdmsr_safe_on_pcpu(data->pdev->id,
+ MSR_IA32_PACKAGE_THERM_STATUS,
+ &eax, &edx);
+ if (err >= 0) {
+ data->alarm = (eax >> 5) & 1;
+ data->temp = data->tjmax - (((eax >> 16)
+ & 0x7f) * 1000);
+ data->valid = 1;
+ } else
+ dev_dbg(dev, "Temperature data invalid (0x%x)\n", eax);
+
+ data->last_updated = jiffies;
+ }
+
+ mutex_unlock(&data->update_lock);
+ return data;
+}
+
+static int get_tjmax(int cpu, struct device *dev)
+{
+ int default_tjmax = 100000;
+ int err;
+ u32 eax, edx;
+ u32 val;
+
+ /* IA32_TEMPERATURE_TARGET contains the TjMax value */
+ err = rdmsr_safe_on_pcpu(cpu, MSR_IA32_TEMPERATURE_TARGET, &eax, &edx);
+ if (err >= 0) {
+ val = (eax >> 16) & 0xff;
+ if ((val > 80) && (val < 120)) {
+ dev_info(dev, "TjMax is %d C.\n", val);
+ return val * 1000;
+ }
+ }
+ dev_warn(dev, "Unable to read TjMax from CPU.\n");
+ return default_tjmax;
+}
+
+static int pkgtemp_probe(struct platform_device *pdev)
+{
+ struct pkgtemp_data *data = platform_get_drvdata(pdev);
+ int err;
+ u32 eax, edx;
+
+ data->name = "pkgtemp";
+ mutex_init(&data->update_lock);
+
+ /* test if we can access the THERM_STATUS MSR */
+ err = rdmsr_safe_on_pcpu(pdev->id, MSR_IA32_PACKAGE_THERM_STATUS,
+ &eax, &edx);
+ if (err < 0) {
+ dev_err(&pdev->dev,
+ "Unable to access THERM_STATUS MSR, giving up\n");
+ return err;
+ }
+
+ data->tjmax = get_tjmax(pdev->id, &pdev->dev);
+
+ err = rdmsr_safe_on_pcpu(pdev->id, MSR_IA32_TEMPERATURE_TARGET,
+ &eax, &edx);
+ if (err < 0) {
+ dev_warn(&pdev->dev, "Unable to read"
+ " IA32_TEMPERATURE_TARGET MSR\n");
+ } else {
+ data->ttarget = data->tjmax - (((eax >> 8) & 0xff) * 1000);
+ err = device_create_file(&pdev->dev,
+ &sensor_dev_attr_temp1_max.dev_attr);
+ if (err)
+ return err;
+ }
+
+ err = sysfs_create_group(&pdev->dev.kobj, &pkgtemp_group);
+ if (err)
+ goto exit_dev;
+
+ data->hwmon_dev = hwmon_device_register(&pdev->dev);
+ if (IS_ERR(data->hwmon_dev)) {
+ err = PTR_ERR(data->hwmon_dev);
+ dev_err(&pdev->dev, "Class registration failed (%d)\n",
+ err);
+ goto exit_class;
+ }
+
+ return 0;
+
+exit_class:
+ sysfs_remove_group(&pdev->dev.kobj, &pkgtemp_group);
+exit_dev:
+ device_remove_file(&pdev->dev, &sensor_dev_attr_temp1_max.dev_attr);
+ return err;
+}
+
+static int pkgtemp_remove(struct platform_device *pdev)
+{
+ struct pkgtemp_data *data = platform_get_drvdata(pdev);
+
+ hwmon_device_unregister(data->hwmon_dev);
+ sysfs_remove_group(&pdev->dev.kobj, &pkgtemp_group);
+ device_remove_file(&pdev->dev, &sensor_dev_attr_temp1_max.dev_attr);
+ return 0;
+}
+
+static struct platform_driver pkgtemp_driver = {
+ .driver = {
+ .owner = THIS_MODULE,
+ .name = DRVNAME,
+ },
+ .probe = pkgtemp_probe,
+ .remove = pkgtemp_remove,
+};
+
+static LIST_HEAD(pdev_list);
+static DEFINE_MUTEX(pdev_list_mutex);
+
+struct cpu_info {
+ u32 cpuid_6_eax;
+};
+
+static void get_cpuid_info(void *arg)
+{
+ struct cpu_info *info = arg;
+
+ info->cpuid_6_eax = cpuid_eax(0) >= 6 ? cpuid_eax(6) : 0;
+}
+
+static int pkgtemp_device_add(unsigned int cpu)
+{
+ int err;
+ struct cpu_info info;
+ struct platform_device *pdev;
+ struct pdev_entry *pdev_entry, *entry;
+
+ err = xen_set_physical_cpu_affinity(cpu);
+ if (!err) {
+ get_cpuid_info(&info);
+ WARN_ON_ONCE(xen_set_physical_cpu_affinity(-1));
+ } else if (err > 0) {
+ static bool warned;
+
+ if (!warned) {
+ warned = true;
+ printk(KERN_WARNING DRVNAME
+ "Cannot set physical CPU affinity"
+ " (assuming use of dom0_vcpus_pin)\n");
+ }
+ err = smp_call_function_single(cpu, get_cpuid_info, &info, 1);
+ }
+ if (err)
+ return err;
+
+ if (!(info.cpuid_6_eax & 0x40))
+ return 0;
+
+ pdev_entry = kzalloc(sizeof(struct pdev_entry), GFP_KERNEL);
+ if (!pdev_entry)
+ return -ENOMEM;
+
+ err = xen_get_topology_info(cpu, NULL,
+ &pdev_entry->phys_proc_id, NULL);
+ if (err)
+ goto exit_entry_free;
+
+ mutex_lock(&pdev_list_mutex);
+
+ /* Only keep the first entry in each package */
+ list_for_each_entry(entry, &pdev_list, list) {
+ if (entry->phys_proc_id == pdev_entry->phys_proc_id) {
+ err = 0; /* Not an error */
+ goto exit;
+ }
+ }
+
+ pdev = platform_device_alloc(DRVNAME, cpu);
+ if (!pdev) {
+ err = -ENOMEM;
+ printk(KERN_ERR DRVNAME ": Device allocation failed\n");
+ goto exit;
+ }
+
+ platform_set_drvdata(pdev, pdev_entry);
+ pdev_entry->pdev = pdev;
+
+ err = platform_device_add(pdev);
+ if (err) {
+ printk(KERN_ERR DRVNAME ": Device addition failed (%d)\n",
+ err);
+ goto exit_device_put;
+ }
+
+ list_add_tail(&pdev_entry->list, &pdev_list);
+ mutex_unlock(&pdev_list_mutex);
+
+ return 0;
+
+exit_device_put:
+ platform_device_put(pdev);
+exit:
+ mutex_unlock(&pdev_list_mutex);
+exit_entry_free:
+ kfree(pdev_entry);
+ return err;
+}
+
+static void pkgtemp_device_remove(unsigned int cpu)
+{
+ struct pdev_entry *p;
+ unsigned int i;
+
+ mutex_lock(&pdev_list_mutex);
+ list_for_each_entry(p, &pdev_list, list) {
+ if (p->pdev->id != cpu)
+ continue;
+
+ platform_device_unregister(p->pdev);
+ list_del(&p->list);
+ mutex_unlock(&pdev_list_mutex);
+ for (i = 0; ; ++i) {
+ u32 phys_proc_id;
+ int err;
+
+ if (i == cpu)
+ continue;
+ err = xen_get_topology_info(i, NULL, &phys_proc_id,
+ NULL);
+ if (err == -ENOENT)
+ continue;
+ if (err)
+ break;
+ if (phys_proc_id != p->phys_proc_id)
+ continue;
+ if (!pkgtemp_device_add(i))
+ break;
+ }
+ kfree(p);
+ return;
+ }
+ mutex_unlock(&pdev_list_mutex);
+}
+
+static int pkgtemp_cpu_callback(struct notifier_block *nfb,
+ unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (unsigned long) hcpu;
+
+ switch (action) {
+ case CPU_ONLINE:
+ pkgtemp_device_add(cpu);
+ break;
+ case CPU_DEAD:
+ pkgtemp_device_remove(cpu);
+ break;
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block pkgtemp_cpu_notifier = {
+ .notifier_call = pkgtemp_cpu_callback,
+};
+
+static int __init pkgtemp_init(void)
+{
+ int err = -ENODEV;
+
+ if (!is_initial_xendomain())
+ goto exit;
+
+ /* quick check if we run Intel */
+ if (cpu_data(0).x86_vendor != X86_VENDOR_INTEL)
+ goto exit;
+
+ err = platform_driver_register(&pkgtemp_driver);
+ if (err)
+ goto exit;
+
+ err = register_pcpu_notifier(&pkgtemp_cpu_notifier);
+ if (err)
+ goto exit_driver_unreg;
+
+#ifndef CONFIG_ACPI_HOTPLUG_CPU
+ if (list_empty(&pdev_list)) {
+ unregister_pcpu_notifier(&pkgtemp_cpu_notifier);
+ err = -ENODEV;
+ goto exit_driver_unreg;
+ }
+#endif
+
+ return 0;
+
+exit_driver_unreg:
+ platform_driver_unregister(&pkgtemp_driver);
+exit:
+ return err;
+}
+
+static void __exit pkgtemp_exit(void)
+{
+ struct pdev_entry *p, *n;
+
+ unregister_pcpu_notifier(&pkgtemp_cpu_notifier);
+ mutex_lock(&pdev_list_mutex);
+ list_for_each_entry_safe(p, n, &pdev_list, list) {
+ platform_device_unregister(p->pdev);
+ list_del(&p->list);
+ kfree(p);
+ }
+ mutex_unlock(&pdev_list_mutex);
+ platform_driver_unregister(&pkgtemp_driver);
+}
+
+MODULE_AUTHOR("Fenghua Yu <fenghua.yu@intel.com>");
+MODULE_DESCRIPTION("Intel processor package temperature monitor");
+MODULE_LICENSE("GPL");
+
+module_init(pkgtemp_init)
+module_exit(pkgtemp_exit)
--- head-2011-03-17.orig/drivers/hwmon/via-cputemp-xen.c 2011-02-01 14:55:46.000000000 +0100
+++ head-2011-03-17/drivers/hwmon/via-cputemp-xen.c 2011-02-01 15:04:27.000000000 +0100
@@ -37,7 +37,7 @@
#define DRVNAME "via_cputemp"
-enum { SHOW_TEMP, SHOW_LABEL, SHOW_NAME } SHOW;
+enum { SHOW_TEMP, SHOW_LABEL, SHOW_NAME };
/*
* Functions declaration
@@ -316,15 +316,16 @@ static int __init via_cputemp_init(void)
if (err)
goto exit_driver_unreg;
+#ifndef CONFIG_ACPI_HOTPLUG_CPU
if (list_empty(&pdev_list)) {
+ unregister_pcpu_notifier(&via_cputemp_cpu_notifier);
err = -ENODEV;
- goto exit_notifier_unreg;
+ goto exit_driver_unreg;
}
+#endif
return 0;
-exit_notifier_unreg:
- unregister_pcpu_notifier(&via_cputemp_cpu_notifier);
exit_driver_unreg:
platform_driver_unregister(&via_cputemp_driver);
exit:
--- head-2011-03-17.orig/drivers/xen/Kconfig 2011-02-02 15:37:42.000000000 +0100
+++ head-2011-03-17/drivers/xen/Kconfig 2011-02-02 15:37:53.000000000 +0100
@@ -448,7 +448,7 @@ config XEN_PLATFORM_PCI
config SWIOTLB_XEN
def_bool y
- depends on PCI
+ depends on PARAVIRT_XEN && PCI
select SWIOTLB
config XEN_XENCOMM
--- head-2011-03-17.orig/drivers/xen/Makefile 2011-02-01 14:54:13.000000000 +0100
+++ head-2011-03-17/drivers/xen/Makefile 2011-02-24 15:05:06.000000000 +0100
@@ -22,6 +22,8 @@ obj-$(CONFIG_XEN_BALLOON) += $(xen-ball
obj-$(CONFIG_XEN_DEV_EVTCHN) += evtchn.o
obj-$(CONFIG_XENFS) += xenfs/
obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o
+obj-$(CONFIG_XEN_PLATFORM_PCI) += platform-pci.o
+obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o
obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/
obj-$(CONFIG_XEN_BLKDEV_TAP) += blktap/
obj-$(CONFIG_XEN_BLKDEV_TAP2) += blktap2/ blktap2-new/
--- head-2011-03-17.orig/drivers/xen/blkfront/blkfront.c 2011-02-01 14:50:44.000000000 +0100
+++ head-2011-03-17/drivers/xen/blkfront/blkfront.c 2011-02-01 15:04:27.000000000 +0100
@@ -328,7 +328,7 @@ static void connect(struct blkfront_info
unsigned long long sectors;
unsigned long sector_size;
unsigned int binfo;
- int err;
+ int err, barrier;
switch (info->connected) {
case BLKIF_STATE_CONNECTED:
@@ -364,10 +364,25 @@ static void connect(struct blkfront_info
}
err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
- "feature-barrier", "%lu", &info->feature_barrier,
+ "feature-barrier", "%lu", &barrier,
NULL);
+ /*
+ * If there's no "feature-barrier" defined, then it means
+ * we're dealing with a very old backend which writes
+ * synchronously; draining will do what needs to get done.
+ *
+ * If there are barriers, then we can do full queued writes
+ * with tagged barriers.
+ *
+ * If barriers are not supported, then there's no much we can
+ * do, so just set ordering to NONE.
+ */
if (err)
- info->feature_barrier = 0;
+ info->feature_barrier = QUEUE_ORDERED_DRAIN;
+ else if (barrier)
+ info->feature_barrier = QUEUE_ORDERED_TAG;
+ else
+ info->feature_barrier = QUEUE_ORDERED_NONE;
err = xlvbd_add(sectors, info->vdevice, binfo, sector_size, info);
if (err) {
@@ -687,7 +702,7 @@ static int blkif_queue_request(struct re
ring_req->operation = rq_data_dir(req) ?
BLKIF_OP_WRITE : BLKIF_OP_READ;
- if (blk_barrier_rq(req))
+ if (req->cmd_flags & REQ_HARDBARRIER)
ring_req->operation = BLKIF_OP_WRITE_BARRIER;
ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg);
@@ -746,7 +761,7 @@ void do_blkif_request(struct request_que
blk_start_request(req);
- if (!blk_fs_request(req)) {
+ if (req->cmd_type != REQ_TYPE_FS) {
__blk_end_request_all(req, -EIO);
continue;
}
@@ -812,7 +827,7 @@ static irqreturn_t blkif_int(int irq, vo
" write barrier op failed\n",
info->gd->disk_name);
ret = -EOPNOTSUPP;
- info->feature_barrier = 0;
+ info->feature_barrier = QUEUE_ORDERED_NONE;
xlvbd_barrier(info);
}
/* fall through */
--- head-2011-03-17.orig/drivers/xen/blkfront/vbd.c 2011-02-01 15:03:03.000000000 +0100
+++ head-2011-03-17/drivers/xen/blkfront/vbd.c 2011-02-01 15:04:27.000000000 +0100
@@ -422,8 +422,7 @@ xlvbd_add(blkif_sector_t capacity, int v
info->rq = gd->queue;
info->gd = gd;
- if (info->feature_barrier)
- xlvbd_barrier(info);
+ xlvbd_barrier(info);
if (vdisk_info & VDISK_READONLY)
set_disk_ro(gd, 1);
@@ -474,21 +473,28 @@ int
xlvbd_barrier(struct blkfront_info *info)
{
int err;
+ const char *barrier;
+
+ switch (info->feature_barrier) {
+ case QUEUE_ORDERED_DRAIN: barrier = "enabled (drain)"; break;
+ case QUEUE_ORDERED_TAG: barrier = "enabled (tag)"; break;
+ case QUEUE_ORDERED_NONE: barrier = "disabled"; break;
+ default: return -EINVAL;
+ }
- err = blk_queue_ordered(info->rq,
- info->feature_barrier ? QUEUE_ORDERED_DRAIN : QUEUE_ORDERED_NONE, NULL);
+ err = blk_queue_ordered(info->rq, info->feature_barrier);
if (err)
return err;
pr_info("blkfront: %s: barriers %s\n",
- info->gd->disk_name,
- info->feature_barrier ? "enabled" : "disabled");
+ info->gd->disk_name, barrier);
return 0;
}
#else
int
xlvbd_barrier(struct blkfront_info *info)
{
- pr_info("blkfront: %s: barriers disabled\n", info->gd->disk_name);
+ if (info->feature_barrier)
+ pr_info("blkfront: %s: barriers disabled\n", info->gd->disk_name);
return -ENOSYS;
}
#endif
--- head-2011-03-17.orig/drivers/xen/blktap/blktap.c 2011-02-17 10:19:12.000000000 +0100
+++ head-2011-03-17/drivers/xen/blktap/blktap.c 2011-02-17 10:19:19.000000000 +0100
@@ -431,14 +431,14 @@ static tap_blkif_t *get_next_free_dev(vo
static int blktap_open(struct inode *inode, struct file *filp);
static int blktap_release(struct inode *inode, struct file *filp);
static int blktap_mmap(struct file *filp, struct vm_area_struct *vma);
-static int blktap_ioctl(struct inode *inode, struct file *filp,
- unsigned int cmd, unsigned long arg);
+static long blktap_ioctl(struct file *filp, unsigned int cmd,
+ unsigned long arg);
static unsigned int blktap_poll(struct file *file, poll_table *wait);
static const struct file_operations blktap_fops = {
.owner = THIS_MODULE,
.poll = blktap_poll,
- .ioctl = blktap_ioctl,
+ .unlocked_ioctl = blktap_ioctl,
.open = blktap_open,
.release = blktap_release,
.mmap = blktap_mmap,
@@ -757,8 +757,8 @@ static int blktap_mmap(struct file *filp
}
-static int blktap_ioctl(struct inode *inode, struct file *filp,
- unsigned int cmd, unsigned long arg)
+static long blktap_ioctl(struct file *filp, unsigned int cmd,
+ unsigned long arg)
{
tap_blkif_t *info = filp->private_data;
--- head-2011-03-17.orig/drivers/xen/blktap2/control.c 2011-02-01 15:03:10.000000000 +0100
+++ head-2011-03-17/drivers/xen/blktap2/control.c 2011-02-24 15:17:25.000000000 +0100
@@ -103,9 +103,8 @@ found:
return tap;
}
-static int
-blktap_control_ioctl(struct inode *inode, struct file *filp,
- unsigned int cmd, unsigned long arg)
+static long
+blktap_control_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
unsigned long dev;
struct blktap *tap;
@@ -148,7 +147,7 @@ blktap_control_ioctl(struct inode *inode
static const struct file_operations blktap_control_file_operations = {
.owner = THIS_MODULE,
- .ioctl = blktap_control_ioctl,
+ .unlocked_ioctl = blktap_control_ioctl,
};
static struct miscdevice blktap_misc = {
--- head-2011-03-17.orig/drivers/xen/blktap2/device.c 2011-02-01 15:03:03.000000000 +0100
+++ head-2011-03-17/drivers/xen/blktap2/device.c 2011-02-01 15:04:27.000000000 +0100
@@ -838,13 +838,13 @@ blktap_device_run_queue(struct blktap *t
BTDBG("running queue for %d\n", tap->minor);
while ((req = blk_peek_request(rq)) != NULL) {
- if (!blk_fs_request(req)) {
+ if (req->cmd_type != REQ_TYPE_FS) {
blk_start_request(req);
__blk_end_request_all(req, -EIO);
continue;
}
- if (blk_barrier_rq(req)) {
+ if (req->cmd_flags & REQ_HARDBARRIER) {
blk_start_request(req);
__blk_end_request_all(req, -EOPNOTSUPP);
continue;
--- head-2011-03-17.orig/drivers/xen/blktap2/ring.c 2011-01-31 18:07:35.000000000 +0100
+++ head-2011-03-17/drivers/xen/blktap2/ring.c 2011-02-01 15:04:27.000000000 +0100
@@ -363,9 +363,8 @@ blktap_ring_set_message(struct blktap *t
up_read(&tap->tap_sem);
}
-static int
-blktap_ring_ioctl(struct inode *inode, struct file *filp,
- unsigned int cmd, unsigned long arg)
+static long
+blktap_ring_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
struct blktap_params params;
struct blktap *tap = filp->private_data;
@@ -482,7 +481,7 @@ static const struct file_operations blkt
.owner = THIS_MODULE,
.open = blktap_ring_open,
.release = blktap_ring_release,
- .ioctl = blktap_ring_ioctl,
+ .unlocked_ioctl = blktap_ring_ioctl,
.mmap = blktap_ring_mmap,
.poll = blktap_ring_poll,
};
--- head-2011-03-17.orig/drivers/xen/blktap2-new/control.c 2011-02-24 15:03:58.000000000 +0100
+++ head-2011-03-17/drivers/xen/blktap2-new/control.c 2011-02-24 15:17:28.000000000 +0100
@@ -120,9 +120,8 @@ blktap_control_destroy_tap(struct blktap
return 0;
}
-static int
-blktap_control_ioctl(struct inode *inode, struct file *filp,
- unsigned int cmd, unsigned long arg)
+static long
+blktap_control_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
struct blktap *tap;
@@ -166,7 +165,7 @@ blktap_control_ioctl(struct inode *inode
static const struct file_operations blktap_control_file_operations = {
.owner = THIS_MODULE,
- .ioctl = blktap_control_ioctl,
+ .unlocked_ioctl = blktap_control_ioctl,
};
static struct miscdevice blktap_control = {
--- head-2011-03-17.orig/drivers/xen/blktap2-new/device.c 2011-02-24 15:01:27.000000000 +0100
+++ head-2011-03-17/drivers/xen/blktap2-new/device.c 2011-02-24 16:23:08.000000000 +0100
@@ -240,7 +240,7 @@ blktap_device_run_queue(struct blktap *t
if (!rq)
break;
- if (!blk_fs_request(rq)) {
+ if (rq->cmd_type != REQ_TYPE_FS) {
__blktap_end_queued_rq(rq, -EOPNOTSUPP);
continue;
}
@@ -303,7 +303,7 @@ blktap_device_configure(struct blktap *t
blk_queue_dma_alignment(rq, 511);
/* We are reordering, but cacheless. */
- blk_queue_ordered(rq, QUEUE_ORDERED_DRAIN, NULL);
+ blk_queue_ordered(rq, QUEUE_ORDERED_DRAIN);
spin_unlock_irq(&dev->lock);
}
--- head-2011-03-17.orig/drivers/xen/blktap2-new/ring.c 2011-02-24 14:19:13.000000000 +0100
+++ head-2011-03-17/drivers/xen/blktap2-new/ring.c 2011-02-24 15:10:15.000000000 +0100
@@ -370,9 +370,8 @@ fail:
return err;
}
-static int
-blktap_ring_ioctl(struct inode *inode, struct file *filp,
- unsigned int cmd, unsigned long arg)
+static long
+blktap_ring_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
struct blktap *tap = filp->private_data;
struct blktap_ring *ring = &tap->ring;
@@ -438,7 +437,7 @@ static const struct file_operations blkt
.owner = THIS_MODULE,
.open = blktap_ring_open,
.release = blktap_ring_release,
- .ioctl = blktap_ring_ioctl,
+ .unlocked_ioctl = blktap_ring_ioctl,
.mmap = blktap_ring_mmap,
.poll = blktap_ring_poll,
};
--- head-2011-03-17.orig/drivers/xen/console/console.c 2011-02-01 15:03:10.000000000 +0100
+++ head-2011-03-17/drivers/xen/console/console.c 2011-02-01 15:04:27.000000000 +0100
@@ -379,7 +379,7 @@ void xencons_rx(char *buf, unsigned len)
sysrq_requested = 0;
if (time_before(jiffies, sysrq_timeout)) {
spin_unlock_irqrestore(&xencons_lock, flags);
- handle_sysrq(buf[i], xencons_tty);
+ handle_sysrq(buf[i]);
spin_lock_irqsave(&xencons_lock, flags);
continue;
}
--- head-2011-03-17.orig/drivers/xen/core/reboot.c 2011-02-01 15:03:03.000000000 +0100
+++ head-2011-03-17/drivers/xen/core/reboot.c 2011-02-01 15:04:27.000000000 +0100
@@ -240,7 +240,7 @@ static void sysrq_handler(struct xenbus_
#ifdef CONFIG_MAGIC_SYSRQ
if (sysrq_key != '\0')
- handle_sysrq(sysrq_key, NULL);
+ handle_sysrq(sysrq_key);
#endif
}
--- head-2011-03-17.orig/drivers/xen/netfront/netfront.c 2011-02-09 16:05:04.000000000 +0100
+++ head-2011-03-17/drivers/xen/netfront/netfront.c 2011-02-09 16:05:34.000000000 +0100
@@ -50,7 +50,6 @@
#include <linux/moduleparam.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
-#include <net/arp.h>
#include <net/route.h>
#include <asm/uaccess.h>
#include <xen/evtchn.h>
@@ -219,7 +218,6 @@ static void netif_disconnect_backend(str
static int network_connect(struct net_device *);
static void network_tx_buf_gc(struct net_device *);
static void network_alloc_rx_buffers(struct net_device *);
-static void send_fake_arp(struct net_device *);
static irqreturn_t netif_int(int irq, void *dev_id);
@@ -236,6 +234,25 @@ static inline int xennet_can_sg(struct n
return dev->features & NETIF_F_SG;
}
+/*
+ * Work around net.ipv4.conf.*.arp_notify no being enabled by default.
+ */
+static void __devinit netfront_enable_arp_notify(struct netfront_info *info)
+{
+#ifdef CONFIG_INET
+ struct in_device *in_dev;
+
+ rtnl_lock();
+ in_dev = __in_dev_get_rtnl(info->netdev);
+ if (in_dev && !IN_DEV_CONF_GET(in_dev, ARP_NOTIFY))
+ IN_DEV_CONF_SET(in_dev, ARP_NOTIFY, 1);
+ rtnl_unlock();
+ if (!in_dev)
+ printk(KERN_WARNING "Cannot enable ARP notification on %s\n",
+ info->xbdev->nodename);
+#endif
+}
+
/**
* Entry point to this code when a new device is created. Allocate the basic
* structures and the ring buffers for communication with the backend, and
@@ -265,6 +282,8 @@ static int __devinit netfront_probe(stru
goto fail;
}
+ netfront_enable_arp_notify(info);
+
err = xennet_sysfs_addif(info->netdev);
if (err) {
unregister_netdev(info->netdev);
@@ -551,7 +570,7 @@ static void backend_changed(struct xenbu
if (network_connect(netdev) != 0)
break;
xenbus_switch_state(dev, XenbusStateConnected);
- send_fake_arp(netdev);
+ netif_notify_peers(netdev);
break;
case XenbusStateClosing:
@@ -560,36 +579,6 @@ static void backend_changed(struct xenbu
}
}
-/** Send a packet on a net device to encourage switches to learn the
- * MAC. We send a fake ARP request.
- *
- * @param dev device
- * @return 0 on success, error code otherwise
- */
-static void send_fake_arp(struct net_device *dev)
-{
-#ifdef CONFIG_INET
- struct sk_buff *skb;
- u32 src_ip, dst_ip;
-
- dst_ip = INADDR_BROADCAST;
- src_ip = inet_select_addr(dev, dst_ip, RT_SCOPE_LINK);
-
- /* No IP? Then nothing to do. */
- if (src_ip == 0)
- return;
-
- skb = arp_create(ARPOP_REPLY, ETH_P_ARP,
- dst_ip, dev, src_ip,
- /*dst_hw*/ NULL, /*src_hw*/ NULL,
- /*target_hw*/ dev->dev_addr);
- if (skb == NULL)
- return;
-
- dev_queue_xmit(skb);
-#endif
-}
-
static inline int netfront_tx_slot_available(struct netfront_info *np)
{
return ((np->tx.req_prod_pvt - np->tx.rsp_cons) <
@@ -2154,32 +2143,6 @@ static struct net_device * __devinit cre
return ERR_PTR(err);
}
-#ifdef CONFIG_INET
-/*
- * We use this notifier to send out a fake ARP reply to reset switches and
- * router ARP caches when an IP interface is brought up on a VIF.
- */
-static int
-inetdev_notify(struct notifier_block *this, unsigned long event, void *ptr)
-{
- struct in_ifaddr *ifa = (struct in_ifaddr *)ptr;
- struct net_device *dev = ifa->ifa_dev->dev;
-
- /* UP event and is it one of our devices? */
- if (event == NETDEV_UP && dev->netdev_ops->ndo_open == network_open)
- send_fake_arp(dev);
-
- return NOTIFY_DONE;
-}
-
-static struct notifier_block notifier_inetdev = {
- .notifier_call = inetdev_notify,
- .next = NULL,
- .priority = 0
-};
-#endif
-
-
static void netif_disconnect_backend(struct netfront_info *info)
{
/* Stop old i/f to prevent errors whilst we rebuild the state. */
@@ -2233,8 +2196,6 @@ static struct xenbus_driver netfront_dri
static int __init netif_init(void)
{
- int err;
-
if (!is_running_on_xen())
return -ENODEV;
@@ -2252,26 +2213,13 @@ static int __init netif_init(void)
IPRINTK("Initialising virtual ethernet driver.\n");
-#ifdef CONFIG_INET
- (void)register_inetaddr_notifier(&notifier_inetdev);
-#endif
-
- err = xenbus_register_frontend(&netfront_driver);
- if (err) {
-#ifdef CONFIG_INET
- unregister_inetaddr_notifier(&notifier_inetdev);
-#endif
- }
- return err;
+ return xenbus_register_frontend(&netfront_driver);
}
module_init(netif_init);
static void __exit netif_exit(void)
{
-#ifdef CONFIG_INET
- unregister_inetaddr_notifier(&notifier_inetdev);
-#endif
xenbus_unregister_driver(&netfront_driver);
netif_exit_accel();
--- head-2011-03-17.orig/drivers/xen/scsiback/scsiback.c 2011-02-01 14:50:44.000000000 +0100
+++ head-2011-03-17/drivers/xen/scsiback/scsiback.c 2011-02-01 15:04:27.000000000 +0100
@@ -386,7 +386,7 @@ static struct bio *request_map_sg(pendin
if (bio->bi_vcnt >= nr_vecs) {
bio->bi_flags &= ~(1 << BIO_SEG_VALID);
if (pending_req->sc_data_direction == WRITE)
- bio->bi_rw |= (1 << BIO_RW);
+ bio->bi_rw |= REQ_WRITE;
bio = NULL;
}
--- head-2011-03-17.orig/drivers/xen/usbfront/usbfront-hcd.c 2011-01-31 17:56:27.000000000 +0100
+++ head-2011-03-17/drivers/xen/usbfront/usbfront-hcd.c 2011-02-01 15:04:27.000000000 +0100
@@ -86,7 +86,7 @@ static int xenhcd_setup(struct usb_hcd *
static int xenhcd_run(struct usb_hcd *hcd)
{
hcd->uses_new_polling = 1;
- hcd->poll_rh = 0;
+ clear_bit(HCD_FLAG_POLL_RH, &hcd->flags);
hcd->state = HC_STATE_RUNNING;
create_debug_file(hcd_to_info(hcd));
return 0;
--- head-2011-03-17.orig/drivers/xen/xenbus/xenbus_client.c 2011-02-01 15:03:03.000000000 +0100
+++ head-2011-03-17/drivers/xen/xenbus/xenbus_client.c 2011-02-01 15:04:27.000000000 +0100
@@ -165,17 +165,12 @@ int xenbus_watch_pathfmt(struct xenbus_d
EXPORT_SYMBOL_GPL(xenbus_watch_pathfmt);
#endif
+static void xenbus_switch_fatal(struct xenbus_device *, int, int,
+ const char *, ...);
-/**
- * xenbus_switch_state
- * @dev: xenbus device
- * @state: new state
- *
- * Advertise in the store a change of the given driver to the given new_state.
- * Return 0 on success, or -errno on error. On error, the device will switch
- * to XenbusStateClosing, and the error will be saved in the store.
- */
-int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state)
+static int
+__xenbus_switch_state(struct xenbus_device *dev,
+ enum xenbus_state state, int depth)
{
/* We check whether the state is currently set to the given value, and
if not, then the state is set. We don't want to unconditionally
@@ -190,29 +185,58 @@ int xenbus_switch_state(struct xenbus_de
would not get reset if the transaction was aborted.
*/
+ struct xenbus_transaction xbt;
int current_state;
- int err;
+ int err, abort;
if (state == dev->state)
return 0;
- err = xenbus_scanf(XBT_NIL, dev->nodename, "state", "%d",
- &current_state);
- if (err != 1)
+again:
+ abort = 1;
+
+ err = xenbus_transaction_start(&xbt);
+ if (err) {
+ xenbus_switch_fatal(dev, depth, err, "starting transaction");
return 0;
+ }
+
+ err = xenbus_scanf(xbt, dev->nodename, "state", "%d", &current_state);
+ if (err != 1)
+ goto abort;
- err = xenbus_printf(XBT_NIL, dev->nodename, "state", "%d", state);
+ err = xenbus_printf(xbt, dev->nodename, "state", "%d", state);
if (err) {
- if (state != XenbusStateClosing) /* Avoid looping */
- xenbus_dev_fatal(dev, err, "writing new state");
- return err;
+ xenbus_switch_fatal(dev, depth, err, "writing new state");
+ goto abort;
}
- dev->state = state;
+ abort = 0;
+abort:
+ err = xenbus_transaction_end(xbt, abort);
+ if (err) {
+ if (err == -EAGAIN && !abort)
+ goto again;
+ xenbus_switch_fatal(dev, depth, err, "ending transaction");
+ } else
+ dev->state = state;
return 0;
}
+/**
+ * xenbus_switch_state
+ * @dev: xenbus device
+ * @state: new state
+ *
+ * Advertise in the store a change of the given driver to the given new_state.
+ * Return 0 on success, or -errno on error. On error, the device will switch
+ * to XenbusStateClosing, and the error will be saved in the store.
+ */
+int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state)
+{
+ return __xenbus_switch_state(dev, state, 0);
+}
EXPORT_SYMBOL_GPL(xenbus_switch_state);
int xenbus_frontend_closed(struct xenbus_device *dev)
@@ -234,41 +258,22 @@ static char *error_path(struct xenbus_de
static void _dev_error(struct xenbus_device *dev, int err,
- const char *fmt, va_list ap)
+ const char *fmt, va_list *ap)
{
- int ret;
- unsigned int len;
- char *printf_buffer = NULL, *path_buffer = NULL;
-
-#define PRINTF_BUFFER_SIZE 4096
- printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_KERNEL);
- if (printf_buffer == NULL)
- goto fail;
+ char *printf_buffer, *path_buffer;
+ struct va_format vaf = { .fmt = fmt, .va = ap };
- len = sprintf(printf_buffer, "%i ", -err);
- ret = vsnprintf(printf_buffer+len, PRINTF_BUFFER_SIZE-len, fmt, ap);
-
- BUG_ON(len + ret > PRINTF_BUFFER_SIZE-1);
-
- dev_err(&dev->dev, "%s\n", printf_buffer);
+ printf_buffer = kasprintf(GFP_KERNEL, "%i %pV", -err, &vaf);
+ if (printf_buffer)
+ dev_err(&dev->dev, "%s\n", printf_buffer);
path_buffer = error_path(dev);
-
- if (path_buffer == NULL) {
+ if (!printf_buffer || !path_buffer
+ || xenbus_write(XBT_NIL, path_buffer, "error", printf_buffer))
dev_err(&dev->dev,
"xenbus: failed to write error node for %s (%s)\n",
dev->nodename, printf_buffer);
- goto fail;
- }
- if (xenbus_write(XBT_NIL, path_buffer, "error", printf_buffer) != 0) {
- dev_err(&dev->dev,
- "xenbus: failed to write error node for %s (%s)\n",
- dev->nodename, printf_buffer);
- goto fail;
- }
-
-fail:
kfree(printf_buffer);
kfree(path_buffer);
}
@@ -288,7 +293,7 @@ void xenbus_dev_error(struct xenbus_devi
va_list ap;
va_start(ap, fmt);
- _dev_error(dev, err, fmt, ap);
+ _dev_error(dev, err, fmt, &ap);
va_end(ap);
}
EXPORT_SYMBOL_GPL(xenbus_dev_error);
@@ -309,13 +314,29 @@ void xenbus_dev_fatal(struct xenbus_devi
va_list ap;
va_start(ap, fmt);
- _dev_error(dev, err, fmt, ap);
+ _dev_error(dev, err, fmt, &ap);
va_end(ap);
xenbus_switch_state(dev, XenbusStateClosing);
}
EXPORT_SYMBOL_GPL(xenbus_dev_fatal);
+/**
+ * Equivalent to xenbus_dev_fatal(dev, err, fmt, args), but helps
+ * avoiding recursion within xenbus_switch_state.
+ */
+static void xenbus_switch_fatal(struct xenbus_device *dev, int depth, int err,
+ const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ _dev_error(dev, err, fmt, &ap);
+ va_end(ap);
+
+ if (!depth)
+ __xenbus_switch_state(dev, XenbusStateClosing, 1);
+}
/**
* xenbus_grant_ring
--- head-2011-03-17.orig/drivers/xen/xenbus/xenbus_probe.c 2011-02-01 15:03:03.000000000 +0100
+++ head-2011-03-17/drivers/xen/xenbus/xenbus_probe.c 2011-02-01 15:04:27.000000000 +0100
@@ -58,9 +58,6 @@
#include <xen/evtchn.h>
#include <xen/features.h>
#include <xen/gnttab.h>
-#ifdef MODULE
-#include <xen/hvm.h>
-#endif
#else
#include <asm/xen/hypervisor.h>
@@ -68,6 +65,12 @@
#include <xen/xenbus.h>
#include <xen/events.h>
#include <xen/page.h>
+
+#include <xen/platform_pci.h>
+#endif
+
+#ifndef CONFIG_XEN
+#include <xen/hvm.h>
#endif
#include "xenbus_comms.h"
@@ -962,7 +965,23 @@ void xenbus_probe(struct work_struct *un
/* Notify others that xenstore is up */
blocking_notifier_call_chain(&xenstore_chain, 0, NULL);
}
+#if !defined(CONFIG_XEN) && !defined(MODULE)
+EXPORT_SYMBOL_GPL(xenbus_probe);
+static int __init xenbus_probe_initcall(void)
+{
+ if (!xen_domain())
+ return -ENODEV;
+
+ if (xen_initial_domain() || xen_hvm_domain())
+ return 0;
+
+ xenbus_probe(NULL);
+ return 0;
+}
+
+device_initcall(xenbus_probe_initcall);
+#endif
#if defined(CONFIG_PROC_FS) && defined(CONFIG_XEN_PRIVILEGED_GUEST)
static struct file_operations xsd_kva_fops;
@@ -1077,9 +1096,9 @@ fail0:
#endif
#ifndef MODULE
-static int __init xenbus_probe_init(void)
+static int __init xenbus_init(void)
#else
-static int __devinit xenbus_probe_init(void)
+int __devinit xenbus_init(void)
#endif
{
int err = 0;
@@ -1147,17 +1166,36 @@ static int __devinit xenbus_probe_init(v
#endif
xen_store_interface = mfn_to_virt(xen_store_mfn);
} else {
- atomic_set(&xenbus_xsd_state, XENBUS_XSD_FOREIGN_READY);
+#if !defined(CONFIG_XEN) && !defined(MODULE)
+ if (xen_hvm_domain()) {
+#endif
+#ifndef CONFIG_XEN
+ uint64_t v = 0;
+
+ err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v);
+ if (err)
+ goto err;
+ xen_store_evtchn = (int)v;
+ err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v);
+ if (err)
+ goto err;
+ xen_store_mfn = (unsigned long)v;
+ xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT,
+ PAGE_SIZE);
+#endif
+#if !defined(CONFIG_XEN) && !defined(MODULE)
+ } else {
+#endif
#ifndef MODULE
- xen_store_evtchn = xen_start_info->store_evtchn;
- xen_store_mfn = xen_start_info->store_mfn;
- xen_store_interface = mfn_to_virt(xen_store_mfn);
-#else
- xen_store_evtchn = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN);
- xen_store_mfn = hvm_get_parameter(HVM_PARAM_STORE_PFN);
- xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT,
- PAGE_SIZE);
+ xen_store_evtchn = xen_start_info->store_evtchn;
+ xen_store_mfn = xen_start_info->store_mfn;
+ xen_store_interface = mfn_to_virt(xen_store_mfn);
#endif
+#if !defined(CONFIG_XEN) && !defined(MODULE)
+ }
+#endif
+ atomic_set(&xenbus_xsd_state, XENBUS_XSD_FOREIGN_READY);
+
/* Initialize the shared memory rings to talk to xenstored */
err = xb_init_comms();
if (err)
@@ -1189,8 +1227,10 @@ static int __devinit xenbus_probe_init(v
#endif
xenbus_backend_device_register();
+#if defined(CONFIG_XEN) || defined(MODULE)
if (!is_initial_xendomain())
xenbus_probe(NULL);
+#endif
#if defined(CONFIG_XEN_COMPAT_XENFS) && !defined(MODULE)
/*
@@ -1217,17 +1257,12 @@ static int __devinit xenbus_probe_init(v
}
#ifndef MODULE
-postcore_initcall(xenbus_probe_init);
+postcore_initcall(xenbus_init);
#ifdef CONFIG_XEN
MODULE_LICENSE("Dual BSD/GPL");
#else
MODULE_LICENSE("GPL");
#endif
-#else
-int __devinit xenbus_init(void)
-{
- return xenbus_probe_init();
-}
#endif
static int is_device_connecting(struct device *dev, void *data)
@@ -1345,6 +1380,11 @@ static void wait_for_devices(struct xenb
#ifndef MODULE
static int __init boot_wait_for_devices(void)
{
+#if !defined(CONFIG_XEN) && !defined(MODULE)
+ if (xen_hvm_domain() && !xen_platform_pci_unplug)
+ return -ENODEV;
+#endif
+
if (!xenbus_frontend.error) {
ready_to_wait_for_devices = 1;
wait_for_devices(NULL);
--- head-2011-03-17.orig/include/xen/hvm.h 2011-01-31 17:56:27.000000000 +0100
+++ head-2011-03-17/include/xen/hvm.h 2011-02-01 15:04:27.000000000 +0100
@@ -3,8 +3,11 @@
#define XEN_HVM_H__
#include <xen/interface/hvm/params.h>
+#ifndef HAVE_XEN_PLATFORM_COMPAT_H
+#include <asm/xen/hypercall.h>
+#endif
-static inline unsigned long hvm_get_parameter(int idx)
+static inline int hvm_get_parameter(int idx, uint64_t *value)
{
struct xen_hvm_param xhv;
int r;
@@ -14,9 +17,15 @@ static inline unsigned long hvm_get_para
r = HYPERVISOR_hvm_op(HVMOP_get_param, &xhv);
if (r < 0) {
pr_err("Cannot get hvm parameter %d: %d!\n", idx, r);
- return 0;
+ return r;
}
- return xhv.value;
+ *value = xhv.value;
+ return r;
}
+#define HVM_CALLBACK_VIA_TYPE_VECTOR 0x2
+#define HVM_CALLBACK_VIA_TYPE_SHIFT 56
+#define HVM_CALLBACK_VECTOR(x) (((uint64_t)HVM_CALLBACK_VIA_TYPE_VECTOR)<<\
+ HVM_CALLBACK_VIA_TYPE_SHIFT | (x))
+
#endif /* XEN_HVM_H__ */
--- head-2011-03-17.orig/include/xen/interface/hvm/hvm_op.h 2011-03-17 13:50:24.000000000 +0100
+++ head-2011-03-17/include/xen/interface/hvm/hvm_op.h 2011-03-17 14:14:21.000000000 +0100
@@ -33,6 +33,7 @@ struct xen_hvm_param {
uint32_t index; /* IN */
uint64_t value; /* IN/OUT */
};
+DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_param);
typedef struct xen_hvm_param xen_hvm_param_t;
DEFINE_XEN_GUEST_HANDLE(xen_hvm_param_t);
@@ -140,6 +141,7 @@ struct xen_hvm_pagetable_dying {
/* guest physical address of the toplevel pagetable dying */
uint64_t gpa;
};
+DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_pagetable_dying);
typedef struct xen_hvm_pagetable_dying xen_hvm_pagetable_dying_t;
DEFINE_XEN_GUEST_HANDLE(xen_hvm_pagetable_dying_t);
--- head-2011-03-17.orig/include/xen/interface/memory.h 2011-02-01 14:38:38.000000000 +0100
+++ head-2011-03-17/include/xen/interface/memory.h 2011-02-01 15:04:27.000000000 +0100
@@ -132,6 +132,7 @@ struct xen_memory_exchange {
*/
xen_ulong_t nr_exchanged;
};
+DEFINE_GUEST_HANDLE_STRUCT(xen_memory_exchange);
typedef struct xen_memory_exchange xen_memory_exchange_t;
DEFINE_XEN_GUEST_HANDLE(xen_memory_exchange_t);
@@ -294,4 +295,14 @@ typedef struct xen_pod_target xen_pod_ta
*/
#define XENMEM_get_sharing_freed_pages 18
+#ifndef CONFIG_XEN
+#include <linux/spinlock.h>
+
+/*
+ * Prevent the balloon driver from changing the memory reservation
+ * during a driver critical region.
+ */
+extern spinlock_t xen_reservation_lock;
+#endif
+
#endif /* __XEN_PUBLIC_MEMORY_H__ */
--- head-2011-03-17.orig/lib/swiotlb-xen.c 2011-02-01 15:03:10.000000000 +0100
+++ head-2011-03-17/lib/swiotlb-xen.c 2011-02-01 15:04:27.000000000 +0100
@@ -37,20 +37,12 @@
#define OFFSET(val,align) ((unsigned long)((val) & ( (align) - 1)))
-/*
- * Enumeration for sync targets
- */
-enum dma_sync_target {
- SYNC_FOR_CPU = 0,
- SYNC_FOR_DEVICE = 1,
-};
-
int swiotlb;
int swiotlb_force;
/*
- * Used to do a quick range check in unmap_single and
- * sync_single_*, to see if the memory was in fact allocated by this
+ * Used to do a quick range check in swiotlb_tbl_unmap_single and
+ * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this
* API.
*/
static char *io_tlb_start, *io_tlb_end;
@@ -141,44 +133,30 @@ void swiotlb_print_info(void)
io_tlb_start, io_tlb_end);
}
-/*
- * Statically reserve bounce buffer space and initialize bounce buffer data
- * structures for the software IO TLB used to implement the PCI DMA API.
- */
-void __init
-swiotlb_init_with_default_size(size_t default_size, int verbose)
+void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
{
unsigned long i, bytes;
int rc;
- if (!io_tlb_nslabs) {
- io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
- io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
- }
+ bytes = nslabs << IO_TLB_SHIFT;
- bytes = io_tlb_nslabs << IO_TLB_SHIFT;
-
- /*
- * Get IO TLB memory from the low pages
- */
- io_tlb_start = alloc_bootmem_pages(bytes);
- if (!io_tlb_start)
- panic("Cannot allocate SWIOTLB buffer!\n");
+ io_tlb_nslabs = nslabs;
+ io_tlb_start = tlb;
dma_bits = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT) + PAGE_SHIFT;
- for (i = 0; i < io_tlb_nslabs; i += IO_TLB_SEGSIZE) {
+ for (nslabs = 0; nslabs < io_tlb_nslabs; nslabs += IO_TLB_SEGSIZE) {
do {
rc = xen_create_contiguous_region(
- (unsigned long)io_tlb_start + (i << IO_TLB_SHIFT),
+ (unsigned long)io_tlb_start + (nslabs << IO_TLB_SHIFT),
get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT),
dma_bits);
} while (rc && dma_bits++ < max_dma_bits);
if (rc) {
- if (i == 0)
+ if (nslabs == 0)
panic("No suitable physical memory available for SWIOTLB buffer!\n"
"Use dom0_mem Xen boot parameter to reserve\n"
"some DMA memory (e.g., dom0_mem=-128M).\n");
- io_tlb_nslabs = i;
- i <<= IO_TLB_SHIFT;
+ io_tlb_nslabs = nslabs;
+ i = nslabs << IO_TLB_SHIFT;
free_bootmem(__pa(io_tlb_start + i), bytes - i);
bytes = i;
for (dma_bits = 0; i > 0; i -= IO_TLB_SEGSIZE << IO_TLB_SHIFT) {
@@ -221,6 +199,32 @@ swiotlb_init_with_default_size(size_t de
swiotlb_print_info();
}
+/*
+ * Statically reserve bounce buffer space and initialize bounce buffer data
+ * structures for the software IO TLB used to implement the DMA API.
+ */
+void __init
+swiotlb_init_with_default_size(size_t default_size, int verbose)
+{
+ unsigned long bytes;
+
+ if (!io_tlb_nslabs) {
+ io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
+ io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
+ }
+
+ bytes = io_tlb_nslabs << IO_TLB_SHIFT;
+
+ /*
+ * Get IO TLB memory from the low pages
+ */
+ io_tlb_start = alloc_bootmem_pages(bytes);
+ if (!io_tlb_start)
+ panic("Cannot allocate SWIOTLB buffer");
+
+ swiotlb_init_with_tbl(io_tlb_start, io_tlb_nslabs, verbose);
+}
+
void __init
swiotlb_init(int verbose)
{
@@ -267,8 +271,8 @@ static int is_swiotlb_buffer(dma_addr_t
* drivers map the buffer for DMA_BIDIRECTIONAL access. This causes an
* unnecessary copy from the aperture to the host buffer, and a page fault.
*/
-static void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
- enum dma_data_direction dir)
+void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size,
+ enum dma_data_direction dir)
{
unsigned long pfn = PFN_DOWN(phys);
@@ -306,12 +310,11 @@ static void swiotlb_bounce(phys_addr_t p
/* inaccessible */;
}
}
+EXPORT_SYMBOL_GPL(swiotlb_bounce);
-/*
- * Allocates bounce buffer and returns its kernel virtual address.
- */
-static void *
-map_single(struct device *hwdev, phys_addr_t phys, size_t size, int dir)
+void *swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr,
+ phys_addr_t phys, size_t size,
+ enum dma_data_direction dir)
{
unsigned long flags;
char *dma_addr;
@@ -409,12 +412,27 @@ found:
return dma_addr;
}
+EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single);
+
+/*
+ * Allocates bounce buffer and returns its kernel virtual address.
+ */
+
+static void *
+map_single(struct device *hwdev, phys_addr_t phys, size_t size,
+ enum dma_data_direction dir)
+{
+ dma_addr_t start_dma_addr = swiotlb_virt_to_bus(hwdev, io_tlb_start);
+
+ return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, dir);
+}
/*
* dma_addr is the kernel virtual address of the bounce buffer to unmap.
*/
-static void
-do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir)
+void
+swiotlb_tbl_unmap_single(struct device *hwdev, char *dma_addr, size_t size,
+ enum dma_data_direction dir)
{
unsigned long flags;
int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
@@ -455,10 +473,12 @@ do_unmap_single(struct device *hwdev, ch
}
spin_unlock_irqrestore(&io_tlb_lock, flags);
}
+EXPORT_SYMBOL_GPL(swiotlb_tbl_unmap_single);
-static void
-sync_single(struct device *hwdev, char *dma_addr, size_t size,
- int dir, int target)
+void
+swiotlb_tbl_sync_single(struct device *hwdev, char *dma_addr, size_t size,
+ enum dma_data_direction dir,
+ enum dma_sync_target target)
{
int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT;
phys_addr_t phys = io_tlb_orig_addr[index];
@@ -482,9 +502,11 @@ sync_single(struct device *hwdev, char *
BUG();
}
}
+EXPORT_SYMBOL_GPL(swiotlb_tbl_sync_single);
static void
-swiotlb_full(struct device *dev, size_t size, int dir, int do_panic)
+swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir,
+ int do_panic)
{
/*
* Ran out of IOMMU space for this operation. This is very bad.
@@ -558,14 +580,14 @@ EXPORT_SYMBOL_GPL(swiotlb_map_page);
* whatever the device wrote there.
*/
static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
- size_t size, int dir)
+ size_t size, enum dma_data_direction dir)
{
phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
BUG_ON(dir == DMA_NONE);
if (is_swiotlb_buffer(dev_addr)) {
- do_unmap_single(hwdev, phys_to_virt(paddr), size, dir);
+ swiotlb_tbl_unmap_single(hwdev, phys_to_virt(paddr), size, dir);
return;
}
@@ -592,14 +614,16 @@ EXPORT_SYMBOL_GPL(swiotlb_unmap_page);
*/
static void
swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
- size_t size, int dir, int target)
+ size_t size, enum dma_data_direction dir,
+ enum dma_sync_target target)
{
phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
BUG_ON(dir == DMA_NONE);
if (is_swiotlb_buffer(dev_addr))
- sync_single(hwdev, phys_to_virt(paddr), size, dir, target);
+ swiotlb_tbl_sync_single(hwdev, phys_to_virt(paddr), size, dir,
+ target);
}
void
@@ -676,7 +700,7 @@ EXPORT_SYMBOL(swiotlb_map_sg_attrs);
int
swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
- int dir)
+ enum dma_data_direction dir)
{
return swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, NULL);
}
@@ -703,7 +727,7 @@ EXPORT_SYMBOL(swiotlb_unmap_sg_attrs);
void
swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems,
- int dir)
+ enum dma_data_direction dir)
{
return swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, NULL);
}
@@ -718,7 +742,8 @@ EXPORT_SYMBOL(swiotlb_unmap_sg);
*/
static void
swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
- int nelems, int dir, int target)
+ int nelems, enum dma_data_direction dir,
+ enum dma_sync_target target)
{
struct scatterlist *sg;
int i;