From: Linux Kernel Mailing List Subject: Linux: 2.6.36 Patch-mainline: 2.6.36 This patch contains the differences between 2.6.35 and 2.6.36. Acked-by: Jeff Mahoney Automatically created from "patches.kernel.org/patch-2.6.36" by xen-port-patches.py --- head-2011-03-17.orig/arch/x86/Kconfig 2011-02-01 15:03:03.000000000 +0100 +++ head-2011-03-17/arch/x86/Kconfig 2011-02-17 13:43:12.000000000 +0100 @@ -56,7 +56,7 @@ config X86 select HAVE_HW_BREAKPOINT select HAVE_MIXED_BREAKPOINTS_REGS select PERF_EVENTS - select HAVE_PERF_EVENTS_NMI + select HAVE_PERF_EVENTS_NMI if !XEN select ANON_INODES select HAVE_ARCH_KMEMCHECK select HAVE_USER_RETURN_NOTIFIER @@ -248,7 +248,7 @@ config KTIME_SCALAR config ARCH_CPU_PROBE_RELEASE def_bool y - depends on HOTPLUG_CPU + depends on HOTPLUG_CPU && !XEN source "init/Kconfig" source "kernel/Kconfig.freezer" @@ -1064,7 +1064,7 @@ config X86_CPUID choice prompt "High Memory Support" - default HIGHMEM64G if X86_NUMAQ + default HIGHMEM64G if X86_NUMAQ || XEN default HIGHMEM4G depends on X86_32 @@ -1107,7 +1107,7 @@ config NOHIGHMEM config HIGHMEM4G bool "4GB" - depends on !X86_NUMAQ + depends on !X86_NUMAQ && !XEN ---help--- Select this if you have a 32-bit processor and between 1 and 4 gigabytes of physical RAM. --- head-2011-03-17.orig/arch/x86/ia32/ia32entry-xen.S 2011-02-01 15:03:03.000000000 +0100 +++ head-2011-03-17/arch/x86/ia32/ia32entry-xen.S 2011-02-01 15:04:27.000000000 +0100 @@ -47,7 +47,12 @@ /* * Reload arg registers from stack in case ptrace changed them. * We don't reload %eax because syscall_trace_enter() returned - * the value it wants us to use in the table lookup. + * the %rax value we should see. Instead, we just truncate that + * value to 32 bits again as we did on entry from user mode. + * If it's a new value set by user_regset during entry tracing, + * this matches the normal truncation of the user-mode value. + * If it's -1 to make us punt the syscall, then (u32)-1 is still + * an appropriately invalid value. */ .macro LOAD_ARGS32 offset, _r9=0 .if \_r9 @@ -57,6 +62,7 @@ movl \offset+48(%rsp),%edx movl \offset+56(%rsp),%esi movl \offset+64(%rsp),%edi + movl %eax,%eax /* zero extension */ .endm .macro CFI_STARTPROC32 simple @@ -151,7 +157,7 @@ ENTRY(ia32_sysenter_target) movl $AUDIT_ARCH_I386,%edi /* 1st arg: audit arch */ call audit_syscall_entry movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */ - cmpl $(IA32_NR_syscalls-1),%eax + cmpq $(IA32_NR_syscalls-1),%rax ja ia32_badsys movl %ebx,%edi /* reload 1st syscall arg */ movl RCX-ARGOFFSET(%rsp),%esi /* reload 2nd syscall arg */ @@ -216,7 +222,7 @@ ENTRY(ia32_cstar_target) orl $TS_COMPAT,TI_status(%r10) testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) jnz cstar_tracesys - cmpl $IA32_NR_syscalls-1,%eax + cmpq $IA32_NR_syscalls-1,%rax ja ia32_badsys cstar_do_call: IA32_ARG_FIXUP 1 @@ -243,7 +249,7 @@ cstar_tracesys: LOAD_ARGS32 ARGOFFSET, 1 /* reload args from stack in case ptrace changed it */ RESTORE_REST xchgl %ebp,%r9d - cmpl $(IA32_NR_syscalls-1),%eax + cmpq $(IA32_NR_syscalls-1),%rax ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */ jmp cstar_do_call END(ia32_cstar_target) @@ -301,7 +307,7 @@ ENTRY(ia32_syscall) testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) jnz ia32_tracesys .Lia32_check_call: - cmpl $(IA32_NR_syscalls-1),%eax + cmpq $(IA32_NR_syscalls-1),%rax ja ia32_badsys ia32_do_call: IA32_ARG_FIXUP @@ -325,7 +331,7 @@ ia32_tracesys: call syscall_trace_enter LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ RESTORE_REST - cmpl $(IA32_NR_syscalls-1),%eax + cmpq $(IA32_NR_syscalls-1),%rax ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */ jmp ia32_do_call END(ia32_syscall) @@ -723,4 +729,7 @@ ia32_sys_call_table: .quad compat_sys_rt_tgsigqueueinfo /* 335 */ .quad sys_perf_event_open .quad compat_sys_recvmmsg + .quad sys_fanotify_init + .quad sys32_fanotify_mark + .quad sys_prlimit64 /* 340 */ ia32_syscall_end: --- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/highmem.h 2011-02-01 14:44:12.000000000 +0100 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/highmem.h 2011-02-01 15:04:27.000000000 +0100 @@ -60,7 +60,7 @@ void *kmap(struct page *page); void kunmap(struct page *page); void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot); void *kmap_atomic(struct page *page, enum km_type type); -void kunmap_atomic(void *kvaddr, enum km_type type); +void kunmap_atomic_notypecheck(void *kvaddr, enum km_type type); void *kmap_atomic_pfn(unsigned long pfn, enum km_type type); void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot); struct page *kmap_atomic_to_page(void *ptr); --- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/pci.h 2011-02-01 15:03:03.000000000 +0100 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/pci.h 2011-02-01 15:04:27.000000000 +0100 @@ -30,6 +30,9 @@ extern struct pci_bus *pci_scan_bus_on_n int node); extern struct pci_bus *pci_scan_bus_with_sysdata(int busno); +#ifdef CONFIG_PCI + +#ifdef CONFIG_PCI_DOMAINS static inline int pci_domain_nr(struct pci_bus *bus) { struct pci_sysdata *sd = bus->sysdata; @@ -40,13 +43,12 @@ static inline int pci_proc_domain(struct { return pci_domain_nr(bus); } - +#endif /* Can be used to override the logic in pci_scan_bus for skipping already-configured bus numbers - to be used for buggy BIOSes or architectures with incomplete PCI setup by the loader */ -#ifdef CONFIG_PCI extern unsigned int pcibios_assign_all_busses(void); extern int pci_legacy_init(void); # ifdef CONFIG_ACPI --- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/perf_event.h 2011-02-01 15:03:10.000000000 +0100 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/perf_event.h 2011-02-01 15:04:27.000000000 +0100 @@ -19,6 +19,19 @@ _r_->flags & PERF_EFLAGS_EXACT ? _f_ | PERF_RECORD_MISC_EXACT_IP : _f_; \ }) +#include + +/* + * We abuse bit 3 from flags to pass exact information, see perf_misc_flags + * and the comment with PERF_EFLAGS_EXACT. + */ +#define perf_arch_fetch_caller_regs(regs, __ip) { \ + (regs)->ip = (__ip); \ + (regs)->bp = caller_frame_pointer(); \ + (regs)->cs = __KERNEL_CS; \ + regs->flags = 0; \ +} + #endif static inline void init_hw_perf_events(void) {} --- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/pgtable-3level.h 2011-02-01 14:44:12.000000000 +0100 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/pgtable-3level.h 2011-02-01 15:04:27.000000000 +0100 @@ -91,7 +91,7 @@ static inline void pud_clear(pud_t *pudp static inline pte_t xen_ptep_get_and_clear(pte_t *ptep, pte_t res) { uint64_t val = __pte_val(res); - if (__cmpxchg64(ptep, val, 0) != val) { + if (__cmpxchg64(&ptep->pte, val, 0) != val) { /* xchg acts as a barrier before the setting of the high bits */ res.pte_low = xchg(&ptep->pte_low, 0); res.pte_high = ptep->pte_high; --- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/pgtable_32.h 2011-02-01 15:03:03.000000000 +0100 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/pgtable_32.h 2011-02-01 15:04:27.000000000 +0100 @@ -25,6 +25,7 @@ struct vm_area_struct; extern pgd_t *swapper_pg_dir; +extern pgd_t trampoline_pg_dir[1024]; static inline void pgtable_cache_init(void) { } static inline void check_pgt_cache(void) { } --- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/pgtable_64.h 2011-02-01 15:03:03.000000000 +0100 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/pgtable_64.h 2011-02-01 15:04:27.000000000 +0100 @@ -133,8 +133,8 @@ static inline int pgd_large(pgd_t pgd) { /* x86-64 always has all page tables mapped. */ #define pte_offset_map(dir, address) pte_offset_kernel((dir), (address)) #define pte_offset_map_nested(dir, address) pte_offset_kernel((dir), (address)) -#define pte_unmap(pte) /* NOP */ -#define pte_unmap_nested(pte) /* NOP */ +#define pte_unmap(pte) ((void)(pte))/* NOP */ +#define pte_unmap_nested(pte) ((void)(pte)) /* NOP */ #define update_mmu_cache(vma, address, ptep) do { } while (0) --- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/processor.h 2011-03-03 16:47:17.000000000 +0100 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/processor.h 2011-03-03 16:47:27.000000000 +0100 @@ -716,6 +716,7 @@ extern void init_c1e_mask(void); extern unsigned long boot_option_idle_override; extern unsigned long idle_halt; extern unsigned long idle_nomwait; +extern bool c1e_detected; #ifndef CONFIG_XEN /* @@ -979,4 +980,24 @@ unsigned long calc_aperfmperf_ratio(stru return ratio; } +/* + * AMD errata checking + */ +#ifdef CONFIG_CPU_SUP_AMD +extern const int amd_erratum_383[]; +extern const int amd_erratum_400[]; +extern bool cpu_has_amd_erratum(const int *); + +#define AMD_LEGACY_ERRATUM(...) { -1, __VA_ARGS__, 0 } +#define AMD_OSVW_ERRATUM(osvw_id, ...) { osvw_id, __VA_ARGS__, 0 } +#define AMD_MODEL_RANGE(f, m_start, s_start, m_end, s_end) \ + ((f << 24) | (m_start << 16) | (s_start << 12) | (m_end << 4) | (s_end)) +#define AMD_MODEL_RANGE_FAMILY(range) (((range) >> 24) & 0xff) +#define AMD_MODEL_RANGE_START(range) (((range) >> 12) & 0xfff) +#define AMD_MODEL_RANGE_END(range) ((range) & 0xfff) + +#else +#define cpu_has_amd_erratum(x) (false) +#endif /* CONFIG_CPU_SUP_AMD */ + #endif /* _ASM_X86_PROCESSOR_H */ --- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/system.h 2011-03-03 16:10:31.000000000 +0100 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/system.h 2011-03-03 16:11:05.000000000 +0100 @@ -441,4 +441,11 @@ static __always_inline void rdtsc_barrie alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); } +/* + * We handle most unaligned accesses in hardware. On the other hand + * unaligned DMA can be quite expensive on some Nehalem processors. + * + * Based on this we disable the IP header alignment in network drivers. + */ +#define NET_IP_ALIGN 0 #endif /* _ASM_X86_SYSTEM_H */ --- head-2011-03-17.orig/arch/x86/kernel/acpi/sleep-xen.c 2011-02-01 15:03:10.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/acpi/sleep-xen.c 2011-02-01 15:04:27.000000000 +0100 @@ -2,7 +2,7 @@ * sleep.c - x86-specific ACPI sleep support. * * Copyright (C) 2001-2003 Patrick Mochel - * Copyright (C) 2001-2003 Pavel Machek + * Copyright (C) 2001-2003 Pavel Machek */ #include --- head-2011-03-17.orig/arch/x86/kernel/apic/io_apic-xen.c 2011-02-01 15:03:10.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/apic/io_apic-xen.c 2011-02-01 15:04:27.000000000 +0100 @@ -319,14 +319,19 @@ void arch_init_copy_chip_data(struct irq old_cfg = old_desc->chip_data; - memcpy(cfg, old_cfg, sizeof(struct irq_cfg)); + cfg->vector = old_cfg->vector; + cfg->move_in_progress = old_cfg->move_in_progress; + cpumask_copy(cfg->domain, old_cfg->domain); + cpumask_copy(cfg->old_domain, old_cfg->old_domain); init_copy_irq_2_pin(old_cfg, cfg, node); } -static void free_irq_cfg(struct irq_cfg *old_cfg) +static void free_irq_cfg(struct irq_cfg *cfg) { - kfree(old_cfg); + free_cpumask_var(cfg->domain); + free_cpumask_var(cfg->old_domain); + kfree(cfg); } void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc) @@ -1808,6 +1813,8 @@ __apicdebuginit(void) print_IO_APIC(void struct irq_pin_list *entry; cfg = desc->chip_data; + if (!cfg) + continue; entry = cfg->irq_2_pin; if (!entry) continue; @@ -3498,7 +3505,7 @@ static int set_msi_irq_affinity(unsigned cfg = desc->chip_data; - read_msi_msg_desc(desc, &msg); + get_cached_msi_msg_desc(desc, &msg); msg.data &= ~MSI_DATA_VECTOR_MASK; msg.data |= MSI_DATA_VECTOR(cfg->vector); --- head-2011-03-17.orig/arch/x86/kernel/cpu/common-xen.c 2011-03-17 14:42:47.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/cpu/common-xen.c 2011-03-17 14:43:00.000000000 +0100 @@ -150,10 +150,18 @@ EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); static int __init x86_xsave_setup(char *s) { setup_clear_cpu_cap(X86_FEATURE_XSAVE); + setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); return 1; } __setup("noxsave", x86_xsave_setup); +static int __init x86_xsaveopt_setup(char *s) +{ + setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); + return 1; +} +__setup("noxsaveopt", x86_xsaveopt_setup); + #ifdef CONFIG_X86_32 static int cachesize_override __cpuinitdata = -1; @@ -568,7 +576,7 @@ void __cpuinit cpu_detect(struct cpuinfo } } -static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) +void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c) { u32 tfms, xlvl; u32 ebx; @@ -582,6 +590,16 @@ static void __cpuinit get_cpu_cap(struct c->x86_capability[4] = excap; } + /* Additional Intel-defined flags: level 0x00000007 */ + if (c->cpuid_level >= 0x00000007) { + u32 eax, ebx, ecx, edx; + + cpuid_count(0x00000007, 0, &eax, &ebx, &ecx, &edx); + + if (eax > 0) + c->x86_capability[9] = ebx; + } + /* AMD-defined flags: level 0x80000001 */ xlvl = cpuid_eax(0x80000000); c->extended_cpuid_level = xlvl; @@ -607,6 +625,7 @@ static void __cpuinit get_cpu_cap(struct if (c->extended_cpuid_level >= 0x80000007) c->x86_power = cpuid_edx(0x80000007); + init_scattered_cpuid_features(c); } static void __cpuinit identify_cpu_without_cpuid(struct cpuinfo_x86 *c) @@ -764,7 +783,6 @@ static void __cpuinit generic_identify(s get_model_name(c); /* Default name */ - init_scattered_cpuid_features(c); detect_nopl(c); } @@ -1273,6 +1291,7 @@ void __cpuinit cpu_init(void) dbg_restore_debug_regs(); fpu_init(); + xsave_init(); #ifndef CONFIG_XEN raw_local_save_flags(kernel_eflags); @@ -1343,12 +1362,7 @@ void __cpuinit cpu_init(void) clear_used_math(); mxcsr_feature_mask_init(); - /* - * Boot processor to setup the FP and extended state context info. - */ - if (smp_processor_id() == boot_cpu_id) - init_thread_xstate(); - + fpu_init(); xsave_init(); } #endif --- head-2011-03-17.orig/arch/x86/kernel/cpu/intel.c 2011-02-01 15:03:03.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/cpu/intel.c 2011-02-01 15:04:27.000000000 +0100 @@ -288,6 +288,7 @@ static void __cpuinit intel_workarounds( } #endif +#ifndef CONFIG_XEN static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) { #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) @@ -306,7 +307,6 @@ static void __cpuinit srat_detect_node(s #endif } -#ifndef CONFIG_XEN /* * find out the number of processor cores on the die */ @@ -324,7 +324,6 @@ static int __cpuinit intel_num_cpu_cores else return 1; } -#endif static void __cpuinit detect_vmx_virtcap(struct cpuinfo_x86 *c) { @@ -363,6 +362,7 @@ static void __cpuinit detect_vmx_virtcap set_cpu_cap(c, X86_FEATURE_VPID); } } +#endif static void __cpuinit init_intel(struct cpuinfo_x86 *c) { @@ -459,13 +459,13 @@ static void __cpuinit init_intel(struct detect_ht(c); #endif } -#endif /* Work around errata */ srat_detect_node(c); if (cpu_has(c, X86_FEATURE_VMX)) detect_vmx_virtcap(c); +#endif } #ifdef CONFIG_X86_32 --- head-2011-03-17.orig/arch/x86/kernel/cpu/scattered.c 2011-03-17 14:35:43.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/cpu/scattered.c 2011-02-01 15:04:27.000000000 +0100 @@ -40,6 +40,7 @@ void __cpuinit init_scattered_cpuid_feat { X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 }, { X86_FEATURE_XSAVEOPT, CR_EAX, 0, 0x0000000d, 1 }, { X86_FEATURE_CPB, CR_EDX, 9, 0x80000007, 0 }, +#ifndef CONFIG_XEN { X86_FEATURE_NPT, CR_EDX, 0, 0x8000000a, 0 }, { X86_FEATURE_LBRV, CR_EDX, 1, 0x8000000a, 0 }, { X86_FEATURE_SVML, CR_EDX, 2, 0x8000000a, 0 }, @@ -50,6 +51,7 @@ void __cpuinit init_scattered_cpuid_feat { X86_FEATURE_DECODEASSISTS, CR_EDX, 7, 0x8000000a, 0 }, { X86_FEATURE_PAUSEFILTER, CR_EDX,10, 0x8000000a, 0 }, { X86_FEATURE_PFTHRESHOLD, CR_EDX,12, 0x8000000a, 0 }, +#endif { 0, 0, 0, 0, 0 } }; --- head-2011-03-17.orig/arch/x86/kernel/entry_32-xen.S 2011-02-01 15:03:10.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/entry_32-xen.S 2011-02-01 15:04:27.000000000 +0100 @@ -655,14 +655,14 @@ ldt_ss: * compensating for the offset by changing to the ESPFIX segment with * a base address that matches for the difference. */ +#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8) mov %esp, %edx /* load kernel esp */ mov PT_OLDESP(%esp), %eax /* load userspace esp */ mov %dx, %ax /* eax: new kernel esp */ sub %eax, %edx /* offset (low word is 0) */ - PER_CPU(gdt_page, %ebx) shr $16, %edx - mov %dl, GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx) /* bits 16..23 */ - mov %dh, GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx) /* bits 24..31 */ + mov %dl, GDT_ESPFIX_SS + 4 /* bits 16..23 */ + mov %dh, GDT_ESPFIX_SS + 7 /* bits 24..31 */ pushl $__ESPFIX_SS CFI_ADJUST_CFA_OFFSET 4 push %eax /* new kernel esp */ @@ -861,9 +861,8 @@ ptregs_clone: * normal stack and adjusts ESP with the matching offset. */ /* fixup the stack */ - PER_CPU(gdt_page, %ebx) - mov GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx), %al /* bits 16..23 */ - mov GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx), %ah /* bits 24..31 */ + mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */ + mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */ shl $16, %eax addl %esp, %eax /* the adjusted stack pointer */ pushl $__KERNEL_DS @@ -1132,7 +1131,7 @@ ENTRY(simd_coprocessor_error) .balign 4 .long 661b .long 663f - .byte X86_FEATURE_XMM + .word X86_FEATURE_XMM .byte 662b-661b .byte 664f-663f .previous --- head-2011-03-17.orig/arch/x86/kernel/entry_64-xen.S 2011-02-01 15:03:10.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/entry_64-xen.S 2011-02-01 15:04:27.000000000 +0100 @@ -1112,13 +1112,13 @@ END(kernel_thread_helper) * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. * * C extern interface: - * extern long execve(char *name, char **argv, char **envp) + * extern long execve(const char *name, char **argv, char **envp) * * asm input arguments: * rdi: name, rsi: argv, rdx: envp * * We want to fallback into: - * extern long sys_execve(char *name, char **argv,char **envp, struct pt_regs *regs) + * extern long sys_execve(const char *name, char **argv,char **envp, struct pt_regs *regs) * * do_sys_execve asm fallback arguments: * rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack --- head-2011-03-17.orig/arch/x86/kernel/mpparse-xen.c 2011-02-01 15:03:10.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/mpparse-xen.c 2011-02-01 15:04:27.000000000 +0100 @@ -288,6 +288,20 @@ static void __init smp_dump_mptable(stru void __init default_smp_read_mpc_oem(struct mpc_table *mpc) { } +static void __init smp_register_lapic_address(unsigned long address) +{ +#ifndef CONFIG_XEN + mp_lapic_addr = address; + + set_fixmap_nocache(FIX_APIC_BASE, address); + if (boot_cpu_physical_apicid == -1U) { + boot_cpu_physical_apicid = read_apic_id(); + apic_version[boot_cpu_physical_apicid] = + GET_APIC_VERSION(apic_read(APIC_LVR)); + } +#endif +} + static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) { char str[16]; @@ -311,6 +325,10 @@ static int __init smp_read_mpc(struct mp if (early) return 1; + /* Initialize the lapic mapping */ + if (!acpi_lapic) + smp_register_lapic_address(mpc->lapic); + if (mpc->oemptr) x86_init.mpparse.smp_read_mpc_oem(mpc); --- head-2011-03-17.orig/arch/x86/kernel/pci-dma-xen.c 2011-02-01 15:03:10.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/pci-dma-xen.c 2011-02-01 15:04:27.000000000 +0100 @@ -142,12 +142,23 @@ static struct dma_map_ops swiotlb_dma_op .dma_supported = swiotlb_dma_supported }; +#define pci_xen_swiotlb_detect() 1 + +static void __init pci_xen_swiotlb_init(void) +{ + swiotlb_init(1); + if (swiotlb) { + printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n"); + dma_ops = &swiotlb_dma_ops; + } +} + void __init pci_iommu_alloc(void) { /* free the range so iommu could get some range less than 4G */ dma32_free_bootmem(); - if (pci_swiotlb_detect()) + if (pci_xen_swiotlb_detect() || pci_swiotlb_detect()) goto out; gart_iommu_hole_init(); @@ -159,11 +170,7 @@ void __init pci_iommu_alloc(void) /* needs to be called after gart_iommu_hole_init */ amd_iommu_detect(); out: - swiotlb_init(1); - if (swiotlb) { - printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n"); - dma_ops = &swiotlb_dma_ops; - } + pci_xen_swiotlb_init(); } void *dma_generic_alloc_coherent(struct device *dev, size_t size, @@ -376,7 +383,7 @@ static int __init pci_iommu_init(void) x86_init.iommu.iommu_init(); #ifndef CONFIG_XEN - if (swiotlb) { + if (swiotlb || xen_swiotlb) { printk(KERN_INFO "PCI-DMA: " "Using software bounce buffering for IO (SWIOTLB)\n"); swiotlb_print_info(); --- head-2011-03-17.orig/arch/x86/kernel/process-xen.c 2011-03-03 16:10:40.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/process-xen.c 2011-03-03 16:11:01.000000000 +0100 @@ -29,6 +29,7 @@ unsigned long idle_nomwait; EXPORT_SYMBOL(idle_nomwait); struct kmem_cache *task_xstate_cachep; +EXPORT_SYMBOL_GPL(task_xstate_cachep); int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { @@ -287,8 +288,9 @@ EXPORT_SYMBOL(kernel_thread); /* * sys_execve() executes a new program. */ -long sys_execve(char __user *name, char __user * __user *argv, - char __user * __user *envp, struct pt_regs *regs) +long sys_execve(const char __user *name, + const char __user *const __user *argv, + const char __user *const __user *envp, struct pt_regs *regs) { long error; char *filename; @@ -328,7 +330,7 @@ EXPORT_SYMBOL(pm_idle); */ void xen_idle(void) { - trace_power_start(POWER_CSTATE, 1); + trace_power_start(POWER_CSTATE, 1, smp_processor_id()); current_thread_info()->status &= ~TS_POLLING; /* * TS_POLLING-cleared state must be visible before we @@ -394,7 +396,7 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait); */ void mwait_idle_with_hints(unsigned long ax, unsigned long cx) { - trace_power_start(POWER_CSTATE, (ax>>4)+1); + trace_power_start(POWER_CSTATE, (ax>>4)+1, smp_processor_id()); if (!need_resched()) { if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) clflush((void *)¤t_thread_info()->flags); @@ -410,7 +412,7 @@ void mwait_idle_with_hints(unsigned long static void mwait_idle(void) { if (!need_resched()) { - trace_power_start(POWER_CSTATE, 1); + trace_power_start(POWER_CSTATE, 1, smp_processor_id()); if (cpu_has(¤t_cpu_data, X86_FEATURE_CLFLUSH_MONITOR)) clflush((void *)¤t_thread_info()->flags); @@ -432,7 +434,7 @@ static void mwait_idle(void) */ static void poll_idle(void) { - trace_power_start(POWER_CSTATE, 0); + trace_power_start(POWER_CSTATE, 0, smp_processor_id()); local_irq_enable(); while (!need_resched()) cpu_relax(); @@ -480,44 +482,10 @@ static int __cpuinit mwait_usable(const return (edx & MWAIT_EDX_C1); } -/* - * Check for AMD CPUs, where APIC timer interrupt does not wake up CPU from C1e. - * For more information see - * - Erratum #400 for NPT family 0xf and family 0x10 CPUs - * - Erratum #365 for family 0x11 (not affected because C1e not in use) - */ -static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c) -{ - u64 val; - if (c->x86_vendor != X86_VENDOR_AMD) - goto no_c1e_idle; - - /* Family 0x0f models < rev F do not have C1E */ - if (c->x86 == 0x0F && c->x86_model >= 0x40) - return 1; - - if (c->x86 == 0x10) { - /* - * check OSVW bit for CPUs that are not affected - * by erratum #400 - */ - if (cpu_has(c, X86_FEATURE_OSVW)) { - rdmsrl(MSR_AMD64_OSVW_ID_LENGTH, val); - if (val >= 2) { - rdmsrl(MSR_AMD64_OSVW_STATUS, val); - if (!(val & BIT(1))) - goto no_c1e_idle; - } - } - return 1; - } - -no_c1e_idle: - return 0; -} +bool c1e_detected; +EXPORT_SYMBOL(c1e_detected); static cpumask_var_t c1e_mask; -static int c1e_detected; void c1e_remove_cpu(int cpu) { @@ -539,12 +507,12 @@ static void c1e_idle(void) u32 lo, hi; rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); + if (lo & K8_INTP_C1E_ACTIVE_MASK) { - c1e_detected = 1; + c1e_detected = true; if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) mark_tsc_unstable("TSC halt in AMD C1E"); printk(KERN_INFO "System has AMD C1E enabled\n"); - set_cpu_cap(&boot_cpu_data, X86_FEATURE_AMDC1E); } } @@ -595,7 +563,8 @@ void __cpuinit select_idle_routine(const */ printk(KERN_INFO "using mwait in idle threads.\n"); pm_idle = mwait_idle; - } else if (check_c1e_idle(c)) { + } else if (cpu_has_amd_erratum(amd_erratum_400)) { + /* E400: APIC timer interrupt does not wake up CPU from C1e */ printk(KERN_INFO "using C1E aware idle routine\n"); pm_idle = c1e_idle; } else --- head-2011-03-17.orig/arch/x86/kernel/process_32-xen.c 2011-02-02 08:47:43.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/process_32-xen.c 2011-02-02 08:47:59.000000000 +0100 @@ -59,6 +59,8 @@ #include #include +#include + asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); asmlinkage void cstar_ret_from_fork(void) __asm__("cstar_ret_from_fork"); @@ -114,6 +116,8 @@ void cpu_idle(void) stop_critical_timings(); xen_idle(); start_critical_timings(); + + trace_power_end(smp_processor_id()); } tick_nohz_restart_sched_tick(); preempt_enable_no_resched(); --- head-2011-03-17.orig/arch/x86/kernel/process_64-xen.c 2011-02-02 08:47:47.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/process_64-xen.c 2011-02-02 08:47:56.000000000 +0100 @@ -56,6 +56,8 @@ #include #include +#include + asmlinkage extern void ret_from_fork(void); static DEFINE_PER_CPU(unsigned char, is_idle); @@ -142,6 +144,9 @@ void cpu_idle(void) stop_critical_timings(); xen_idle(); start_critical_timings(); + + trace_power_end(smp_processor_id()); + /* In many cases the interrupt that ended idle has already called exit_idle. But some idle loops can be woken up without interrupt. */ --- head-2011-03-17.orig/arch/x86/kernel/setup-xen.c 2011-03-03 16:24:49.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/setup-xen.c 2011-03-03 16:25:01.000000000 +0100 @@ -102,6 +102,7 @@ #include #include +#include #include #include @@ -826,10 +827,15 @@ void __init setup_arch(char **cmdline_p) /* VMI may relocate the fixmap; do this before touching ioremap area */ vmi_init(); + /* OFW also may relocate the fixmap */ + olpc_ofw_detect(); + early_trap_init(); early_cpu_init(); early_ioremap_init(); + setup_olpc_ofw_pgd(); + #ifndef CONFIG_XEN ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev); screen_info = boot_params.screen_info; @@ -1143,6 +1149,8 @@ void __init setup_arch(char **cmdline_p) paging_init(); x86_init.paging.pagetable_setup_done(swapper_pg_dir); + setup_trampoline_page_table(); + tboot_probe(); #ifdef CONFIG_X86_64 --- head-2011-03-17.orig/arch/x86/kernel/traps-xen.c 2011-02-01 15:03:10.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/traps-xen.c 2011-02-01 15:04:27.000000000 +0100 @@ -385,7 +385,13 @@ static notrace __kprobes void default_do if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) == NOTIFY_STOP) return; + #ifdef CONFIG_X86_LOCAL_APIC + if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) + == NOTIFY_STOP) + return; + +#ifndef CONFIG_LOCKUP_DETECTOR /* * Ok, so this is none of the documented NMI sources, * so it must be the NMI watchdog. @@ -393,6 +399,7 @@ static notrace __kprobes void default_do if (nmi_watchdog_tick(regs, reason)) return; if (!do_nmi_callback(regs, cpu)) +#endif /* !CONFIG_LOCKUP_DETECTOR */ unknown_nmi_error(reason, regs); #else unknown_nmi_error(reason, regs); --- head-2011-03-17.orig/arch/x86/kernel/vsyscall_64-xen.c 2011-02-01 15:03:03.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/vsyscall_64-xen.c 2011-02-01 15:04:27.000000000 +0100 @@ -73,8 +73,8 @@ void update_vsyscall_tz(void) write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); } -void update_vsyscall(struct timespec *wall_time, struct clocksource *clock, - u32 mult) +void update_vsyscall(struct timespec *wall_time, struct timespec *wtm, + struct clocksource *clock, u32 mult) { unsigned long flags; @@ -87,7 +87,7 @@ void update_vsyscall(struct timespec *wa vsyscall_gtod_data.clock.shift = clock->shift; vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; - vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic; + vsyscall_gtod_data.wall_to_monotonic = *wtm; vsyscall_gtod_data.wall_time_coarse = __current_kernel_time(); write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); } @@ -169,13 +169,18 @@ int __vsyscall(0) vgettimeofday(struct t * unlikely */ time_t __vsyscall(1) vtime(time_t *t) { - struct timeval tv; + unsigned seq; time_t result; if (unlikely(!__vsyscall_gtod_data.sysctl_enabled)) return time_syscall(t); - vgettimeofday(&tv, NULL); - result = tv.tv_sec; + do { + seq = read_seqbegin(&__vsyscall_gtod_data.lock); + + result = __vsyscall_gtod_data.wall_time_sec; + + } while (read_seqretry(&__vsyscall_gtod_data.lock, seq)); + if (t) *t = result; return result; --- head-2011-03-17.orig/arch/x86/mm/dump_pagetables-xen.c 2011-02-01 14:50:44.000000000 +0100 +++ head-2011-03-17/arch/x86/mm/dump_pagetables-xen.c 2011-02-01 15:04:27.000000000 +0100 @@ -40,6 +40,29 @@ struct addr_marker { const char *name; }; +/* indices for address_markers; keep sync'd w/ address_markers below */ +enum address_markers_idx { + USER_SPACE_NR = 0, +#ifdef CONFIG_X86_64 + XEN_SPACE_NR, + LOW_KERNEL_NR, + VMALLOC_START_NR, + VMEMMAP_START_NR, + HIGH_KERNEL_NR, + MODULES_VADDR_NR, + MODULES_END_NR, +#else + KERNEL_SPACE_NR, + VMALLOC_START_NR, + VMALLOC_END_NR, +# ifdef CONFIG_HIGHMEM + PKMAP_BASE_NR, +# endif + FIXADDR_START_NR, + XEN_SPACE_NR, +#endif +}; + /* Address space markers hints */ static struct addr_marker address_markers[] = { { 0, "User Space" }, @@ -346,16 +369,13 @@ static int __init pt_dump_init(void) #ifdef CONFIG_X86_32 /* Not a compile-time constant on x86-32 */ - address_markers[2].start_address = VMALLOC_START; - address_markers[3].start_address = VMALLOC_END; + address_markers[VMALLOC_START_NR].start_address = VMALLOC_START; + address_markers[VMALLOC_END_NR].start_address = VMALLOC_END; # ifdef CONFIG_HIGHMEM - address_markers[4].start_address = PKMAP_BASE; - address_markers[5].start_address = FIXADDR_START; - address_markers[6].start_address = hypervisor_virt_start; -# else - address_markers[4].start_address = FIXADDR_START; - address_markers[5].start_address = hypervisor_virt_start; + address_markers[PKMAP_BASE_NR].start_address = PKMAP_BASE; # endif + address_markers[FIXADDR_START_NR].start_address = FIXADDR_START; + address_markers[XEN_SPACE_NR].start_address = hypervisor_virt_start; #endif pe = debugfs_create_file("kernel_page_tables", 0600, NULL, NULL, --- head-2011-03-17.orig/arch/x86/mm/fault-xen.c 2011-02-01 14:55:46.000000000 +0100 +++ head-2011-03-17/arch/x86/mm/fault-xen.c 2011-02-01 15:04:27.000000000 +0100 @@ -821,8 +821,10 @@ do_sigbus(struct pt_regs *regs, unsigned up_read(&mm->mmap_sem); /* Kernel mode? Handle exceptions or die: */ - if (!(error_code & PF_USER)) + if (!(error_code & PF_USER)) { no_context(regs, error_code, address); + return; + } /* User-space => ok to do another page fault: */ if (is_prefetch(regs, error_code, address)) --- head-2011-03-17.orig/arch/x86/mm/highmem_32-xen.c 2011-02-01 14:54:13.000000000 +0100 +++ head-2011-03-17/arch/x86/mm/highmem_32-xen.c 2011-02-01 15:04:27.000000000 +0100 @@ -53,7 +53,7 @@ void *kmap_atomic(struct page *page, enu return kmap_atomic_prot(page, type, kmap_prot); } -void kunmap_atomic(void *kvaddr, enum km_type type) +void kunmap_atomic_notypecheck(void *kvaddr, enum km_type type) { unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); @@ -147,7 +147,7 @@ void copy_highpage(struct page *to, stru EXPORT_SYMBOL(kmap); EXPORT_SYMBOL(kunmap); EXPORT_SYMBOL(kmap_atomic); -EXPORT_SYMBOL(kunmap_atomic); +EXPORT_SYMBOL(kunmap_atomic_notypecheck); EXPORT_SYMBOL(kmap_atomic_prot); EXPORT_SYMBOL(kmap_atomic_to_page); EXPORT_SYMBOL(clear_highpage); --- head-2011-03-17.orig/arch/x86/mm/init_64-xen.c 2011-02-01 15:03:03.000000000 +0100 +++ head-2011-03-17/arch/x86/mm/init_64-xen.c 2011-02-01 15:04:27.000000000 +0100 @@ -2,7 +2,7 @@ * linux/arch/x86_64/mm/init.c * * Copyright (C) 1995 Linus Torvalds - * Copyright (C) 2000 Pavel Machek + * Copyright (C) 2000 Pavel Machek * Copyright (C) 2002,2003 Andi Kleen * * Jun Nakajima --- head-2011-03-17.orig/arch/x86/mm/iomap_32-xen.c 2011-02-01 14:54:13.000000000 +0100 +++ head-2011-03-17/arch/x86/mm/iomap_32-xen.c 2011-02-01 15:04:27.000000000 +0100 @@ -75,7 +75,7 @@ void *kmap_atomic_prot_pfn(unsigned long /* * Map 'mfn' using fixed map 'type' and protections 'prot' */ -void * +void __iomem * iomap_atomic_prot_pfn(unsigned long mfn, enum km_type type, pgprot_t prot) { /* @@ -88,12 +88,12 @@ iomap_atomic_prot_pfn(unsigned long mfn, prot = PAGE_KERNEL_UC_MINUS; pgprot_val(prot) |= _PAGE_IOMAP; - return kmap_atomic_prot_pfn(mfn, type, prot); + return (void __force __iomem *) kmap_atomic_prot_pfn(mfn, type, prot); } EXPORT_SYMBOL_GPL(iomap_atomic_prot_pfn); void -iounmap_atomic(void *kvaddr, enum km_type type) +iounmap_atomic(void __iomem *kvaddr, enum km_type type) { unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; enum fixed_addresses idx = type + KM_TYPE_NR*smp_processor_id(); --- head-2011-03-17.orig/arch/x86/mm/ioremap-xen.c 2011-02-07 15:41:54.000000000 +0100 +++ head-2011-03-17/arch/x86/mm/ioremap-xen.c 2011-02-07 15:42:02.000000000 +0100 @@ -221,7 +221,7 @@ static void __iomem *__ioremap_caller(re unsigned long size, unsigned long prot_val, void *caller) { unsigned long offset, vaddr; - phys_addr_t mfn, last_addr; + phys_addr_t mfn, last_mfn, last_addr; const resource_size_t unaligned_phys_addr = phys_addr; const unsigned long unaligned_size = size; struct vm_struct *area; @@ -259,7 +259,8 @@ static void __iomem *__ioremap_caller(re /* * Don't allow anybody to remap normal RAM that we're using.. */ - for (mfn = PFN_DOWN(phys_addr); mfn < PFN_UP(last_addr); mfn++) { + last_mfn = PFN_DOWN(last_addr); + for (mfn = PFN_DOWN(phys_addr); mfn <= last_mfn; mfn++) { unsigned long pfn = mfn_to_local_pfn(mfn); if (pfn_valid(pfn)) { @@ -274,7 +275,7 @@ static void __iomem *__ioremap_caller(re * Mappings have to be page-aligned */ offset = phys_addr & ~PAGE_MASK; - phys_addr &= PAGE_MASK; + phys_addr &= PHYSICAL_PAGE_MASK; size = PAGE_ALIGN(last_addr+1) - phys_addr; retval = reserve_memtype(phys_addr, (u64)phys_addr + size, @@ -798,7 +799,7 @@ void __init early_iounmap(void __iomem * return; } offset = virt_addr & ~PAGE_MASK; - nrpages = PAGE_ALIGN(offset + size - 1) >> PAGE_SHIFT; + nrpages = PAGE_ALIGN(offset + size) >> PAGE_SHIFT; idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot; while (nrpages > 0) { --- head-2011-03-17.orig/arch/x86/pci/irq-xen.c 2011-02-01 15:03:10.000000000 +0100 +++ head-2011-03-17/arch/x86/pci/irq-xen.c 2011-02-01 15:04:27.000000000 +0100 @@ -995,7 +995,7 @@ static int pcibios_lookup_irq(struct pci dev_info(&dev->dev, "%s PCI INT %c -> IRQ %d\n", msg, 'A' + pin - 1, irq); /* Update IRQ for all devices with the same pirq value */ - while ((dev2 = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev2)) != NULL) { + for_each_pci_dev(dev2) { pci_read_config_byte(dev2, PCI_INTERRUPT_PIN, &pin); if (!pin) continue; @@ -1034,7 +1034,7 @@ void __init pcibios_fixup_irqs(void) u8 pin; DBG(KERN_DEBUG "PCI: IRQ fixup\n"); - while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { + for_each_pci_dev(dev) { /* * If the BIOS has set an out of range IRQ number, just * ignore it. Also keep track of which IRQ's are @@ -1058,7 +1058,7 @@ void __init pcibios_fixup_irqs(void) return; dev = NULL; - while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { + for_each_pci_dev(dev) { pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); if (!pin) continue; --- head-2011-03-17.orig/arch/x86/vdso/vdso32-setup-xen.c 2011-02-01 14:55:46.000000000 +0100 +++ head-2011-03-17/arch/x86/vdso/vdso32-setup-xen.c 2011-02-01 15:04:27.000000000 +0100 @@ -413,11 +413,7 @@ int arch_setup_additional_pages(struct l #ifdef CONFIG_X86_64 -/* - * This must be done early in case we have an initrd containing 32-bit - * binaries (e.g., hotplug). This could be pushed upstream. - */ -core_initcall(sysenter_setup); +subsys_initcall(sysenter_setup); #ifdef CONFIG_SYSCTL /* Register vsyscall32 into the ABI table */ --- head-2011-03-17.orig/arch/x86/xen/Kconfig 2011-02-01 14:39:24.000000000 +0100 +++ head-2011-03-17/arch/x86/xen/Kconfig 2011-02-01 15:04:27.000000000 +0100 @@ -25,7 +25,7 @@ config XEN_PRIVILEGED_GUEST config XEN_PVHVM def_bool y - depends on XEN + depends on PARAVIRT_XEN depends on X86_LOCAL_APIC config XEN_MAX_DOMAIN_MEMORY --- head-2011-03-17.orig/arch/x86/xen/enlighten.c 2011-03-17 14:35:43.000000000 +0100 +++ head-2011-03-17/arch/x86/xen/enlighten.c 2011-02-01 15:04:27.000000000 +0100 @@ -115,8 +115,8 @@ static int have_vcpu_info_placement = 1; static void clamp_max_cpus(void) { #ifdef CONFIG_SMP - if (setup_max_cpus > MAX_VIRT_CPUS) - setup_max_cpus = MAX_VIRT_CPUS; + if (setup_max_cpus > XEN_LEGACY_MAX_VCPUS) + setup_max_cpus = XEN_LEGACY_MAX_VCPUS; #endif } @@ -128,11 +128,11 @@ static void xen_vcpu_setup(int cpu) BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); - if (cpu < MAX_VIRT_CPUS) + if (cpu < XEN_LEGACY_MAX_VCPUS) per_cpu(xen_vcpu,cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; if (!have_vcpu_info_placement) { - if (cpu >= MAX_VIRT_CPUS) + if (cpu >= XEN_LEGACY_MAX_VCPUS) clamp_max_cpus(); return; } --- head-2011-03-17.orig/drivers/hwmon/Kconfig 2011-03-11 11:00:24.000000000 +0100 +++ head-2011-03-17/drivers/hwmon/Kconfig 2011-02-01 15:04:27.000000000 +0100 @@ -400,7 +400,7 @@ config SENSORS_CORETEMP config SENSORS_PKGTEMP tristate "Intel processor package temperature sensor" - depends on X86 && EXPERIMENTAL + depends on X86 && !XEN && EXPERIMENTAL help If you say yes here you get support for the package level temperature sensor inside your CPU. Check documentation/driver for details. --- head-2011-03-17.orig/drivers/hwmon/coretemp-xen.c 2011-02-01 15:03:10.000000000 +0100 +++ head-2011-03-17/drivers/hwmon/coretemp-xen.c 2011-02-01 15:04:27.000000000 +0100 @@ -583,15 +583,16 @@ static int __init coretemp_init(void) if (err) goto exit_driver_unreg; +#ifndef CONFIG_ACPI_HOTPLUG_CPU if (list_empty(&pdev_list)) { + unregister_pcpu_notifier(&coretemp_cpu_notifier); err = -ENODEV; - goto exit_notifier_unreg; + goto exit_driver_unreg; } +#endif return 0; -exit_notifier_unreg: - unregister_pcpu_notifier(&coretemp_cpu_notifier); exit_driver_unreg: platform_driver_unregister(&coretemp_driver); exit: --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2011-03-17/drivers/hwmon/pkgtemp-xen.c 2011-02-01 15:04:27.000000000 +0100 @@ -0,0 +1,452 @@ +/* + * pkgtemp.c - Linux kernel module for processor package hardware monitoring + * + * Copyright (C) 2010 Fenghua Yu + * + * Inspired from many hwmon drivers especially coretemp. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../xen/core/domctl.h" + +#define DRVNAME "pkgtemp" +#define pkgtemp_data pdev_entry + +enum { SHOW_TEMP, SHOW_TJMAX, SHOW_TTARGET, SHOW_LABEL, SHOW_NAME }; + +/* + * Functions declaration + */ + +static struct pkgtemp_data *pkgtemp_update_device(struct device *dev); + +struct pdev_entry { + struct list_head list; + struct platform_device *pdev; + struct device *hwmon_dev; + struct mutex update_lock; + const char *name; + u32 phys_proc_id; + char valid; /* zero until following fields are valid */ + unsigned long last_updated; /* in jiffies */ + int temp; + int tjmax; + int ttarget; + u8 alarm; +}; + +/* + * Sysfs stuff + */ + +static ssize_t show_name(struct device *dev, struct device_attribute + *devattr, char *buf) +{ + int ret; + struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr); + struct pkgtemp_data *data = dev_get_drvdata(dev); + + if (attr->index == SHOW_NAME) + ret = sprintf(buf, "%s\n", data->name); + else /* show label */ + ret = sprintf(buf, "physical id %d\n", + data->phys_proc_id); + return ret; +} + +static ssize_t show_alarm(struct device *dev, struct device_attribute + *devattr, char *buf) +{ + struct pkgtemp_data *data = pkgtemp_update_device(dev); + /* read the Out-of-spec log, never clear */ + return sprintf(buf, "%d\n", data->alarm); +} + +static ssize_t show_temp(struct device *dev, + struct device_attribute *devattr, char *buf) +{ + struct sensor_device_attribute *attr = to_sensor_dev_attr(devattr); + struct pkgtemp_data *data = pkgtemp_update_device(dev); + int err = 0; + + if (attr->index == SHOW_TEMP) + err = data->valid ? sprintf(buf, "%d\n", data->temp) : -EAGAIN; + else if (attr->index == SHOW_TJMAX) + err = sprintf(buf, "%d\n", data->tjmax); + else + err = sprintf(buf, "%d\n", data->ttarget); + return err; +} + +static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO, show_temp, NULL, SHOW_TEMP); +static SENSOR_DEVICE_ATTR(temp1_crit, S_IRUGO, show_temp, NULL, SHOW_TJMAX); +static SENSOR_DEVICE_ATTR(temp1_max, S_IRUGO, show_temp, NULL, SHOW_TTARGET); +static DEVICE_ATTR(temp1_crit_alarm, S_IRUGO, show_alarm, NULL); +static SENSOR_DEVICE_ATTR(temp1_label, S_IRUGO, show_name, NULL, SHOW_LABEL); +static SENSOR_DEVICE_ATTR(name, S_IRUGO, show_name, NULL, SHOW_NAME); + +static struct attribute *pkgtemp_attributes[] = { + &sensor_dev_attr_name.dev_attr.attr, + &sensor_dev_attr_temp1_label.dev_attr.attr, + &dev_attr_temp1_crit_alarm.attr, + &sensor_dev_attr_temp1_input.dev_attr.attr, + &sensor_dev_attr_temp1_crit.dev_attr.attr, + NULL +}; + +static const struct attribute_group pkgtemp_group = { + .attrs = pkgtemp_attributes, +}; + +static struct pkgtemp_data *pkgtemp_update_device(struct device *dev) +{ + struct pkgtemp_data *data = dev_get_drvdata(dev); + int err; + + mutex_lock(&data->update_lock); + + if (!data->valid || time_after(jiffies, data->last_updated + HZ)) { + u32 eax, edx; + + data->valid = 0; + err = rdmsr_safe_on_pcpu(data->pdev->id, + MSR_IA32_PACKAGE_THERM_STATUS, + &eax, &edx); + if (err >= 0) { + data->alarm = (eax >> 5) & 1; + data->temp = data->tjmax - (((eax >> 16) + & 0x7f) * 1000); + data->valid = 1; + } else + dev_dbg(dev, "Temperature data invalid (0x%x)\n", eax); + + data->last_updated = jiffies; + } + + mutex_unlock(&data->update_lock); + return data; +} + +static int get_tjmax(int cpu, struct device *dev) +{ + int default_tjmax = 100000; + int err; + u32 eax, edx; + u32 val; + + /* IA32_TEMPERATURE_TARGET contains the TjMax value */ + err = rdmsr_safe_on_pcpu(cpu, MSR_IA32_TEMPERATURE_TARGET, &eax, &edx); + if (err >= 0) { + val = (eax >> 16) & 0xff; + if ((val > 80) && (val < 120)) { + dev_info(dev, "TjMax is %d C.\n", val); + return val * 1000; + } + } + dev_warn(dev, "Unable to read TjMax from CPU.\n"); + return default_tjmax; +} + +static int pkgtemp_probe(struct platform_device *pdev) +{ + struct pkgtemp_data *data = platform_get_drvdata(pdev); + int err; + u32 eax, edx; + + data->name = "pkgtemp"; + mutex_init(&data->update_lock); + + /* test if we can access the THERM_STATUS MSR */ + err = rdmsr_safe_on_pcpu(pdev->id, MSR_IA32_PACKAGE_THERM_STATUS, + &eax, &edx); + if (err < 0) { + dev_err(&pdev->dev, + "Unable to access THERM_STATUS MSR, giving up\n"); + return err; + } + + data->tjmax = get_tjmax(pdev->id, &pdev->dev); + + err = rdmsr_safe_on_pcpu(pdev->id, MSR_IA32_TEMPERATURE_TARGET, + &eax, &edx); + if (err < 0) { + dev_warn(&pdev->dev, "Unable to read" + " IA32_TEMPERATURE_TARGET MSR\n"); + } else { + data->ttarget = data->tjmax - (((eax >> 8) & 0xff) * 1000); + err = device_create_file(&pdev->dev, + &sensor_dev_attr_temp1_max.dev_attr); + if (err) + return err; + } + + err = sysfs_create_group(&pdev->dev.kobj, &pkgtemp_group); + if (err) + goto exit_dev; + + data->hwmon_dev = hwmon_device_register(&pdev->dev); + if (IS_ERR(data->hwmon_dev)) { + err = PTR_ERR(data->hwmon_dev); + dev_err(&pdev->dev, "Class registration failed (%d)\n", + err); + goto exit_class; + } + + return 0; + +exit_class: + sysfs_remove_group(&pdev->dev.kobj, &pkgtemp_group); +exit_dev: + device_remove_file(&pdev->dev, &sensor_dev_attr_temp1_max.dev_attr); + return err; +} + +static int pkgtemp_remove(struct platform_device *pdev) +{ + struct pkgtemp_data *data = platform_get_drvdata(pdev); + + hwmon_device_unregister(data->hwmon_dev); + sysfs_remove_group(&pdev->dev.kobj, &pkgtemp_group); + device_remove_file(&pdev->dev, &sensor_dev_attr_temp1_max.dev_attr); + return 0; +} + +static struct platform_driver pkgtemp_driver = { + .driver = { + .owner = THIS_MODULE, + .name = DRVNAME, + }, + .probe = pkgtemp_probe, + .remove = pkgtemp_remove, +}; + +static LIST_HEAD(pdev_list); +static DEFINE_MUTEX(pdev_list_mutex); + +struct cpu_info { + u32 cpuid_6_eax; +}; + +static void get_cpuid_info(void *arg) +{ + struct cpu_info *info = arg; + + info->cpuid_6_eax = cpuid_eax(0) >= 6 ? cpuid_eax(6) : 0; +} + +static int pkgtemp_device_add(unsigned int cpu) +{ + int err; + struct cpu_info info; + struct platform_device *pdev; + struct pdev_entry *pdev_entry, *entry; + + err = xen_set_physical_cpu_affinity(cpu); + if (!err) { + get_cpuid_info(&info); + WARN_ON_ONCE(xen_set_physical_cpu_affinity(-1)); + } else if (err > 0) { + static bool warned; + + if (!warned) { + warned = true; + printk(KERN_WARNING DRVNAME + "Cannot set physical CPU affinity" + " (assuming use of dom0_vcpus_pin)\n"); + } + err = smp_call_function_single(cpu, get_cpuid_info, &info, 1); + } + if (err) + return err; + + if (!(info.cpuid_6_eax & 0x40)) + return 0; + + pdev_entry = kzalloc(sizeof(struct pdev_entry), GFP_KERNEL); + if (!pdev_entry) + return -ENOMEM; + + err = xen_get_topology_info(cpu, NULL, + &pdev_entry->phys_proc_id, NULL); + if (err) + goto exit_entry_free; + + mutex_lock(&pdev_list_mutex); + + /* Only keep the first entry in each package */ + list_for_each_entry(entry, &pdev_list, list) { + if (entry->phys_proc_id == pdev_entry->phys_proc_id) { + err = 0; /* Not an error */ + goto exit; + } + } + + pdev = platform_device_alloc(DRVNAME, cpu); + if (!pdev) { + err = -ENOMEM; + printk(KERN_ERR DRVNAME ": Device allocation failed\n"); + goto exit; + } + + platform_set_drvdata(pdev, pdev_entry); + pdev_entry->pdev = pdev; + + err = platform_device_add(pdev); + if (err) { + printk(KERN_ERR DRVNAME ": Device addition failed (%d)\n", + err); + goto exit_device_put; + } + + list_add_tail(&pdev_entry->list, &pdev_list); + mutex_unlock(&pdev_list_mutex); + + return 0; + +exit_device_put: + platform_device_put(pdev); +exit: + mutex_unlock(&pdev_list_mutex); +exit_entry_free: + kfree(pdev_entry); + return err; +} + +static void pkgtemp_device_remove(unsigned int cpu) +{ + struct pdev_entry *p; + unsigned int i; + + mutex_lock(&pdev_list_mutex); + list_for_each_entry(p, &pdev_list, list) { + if (p->pdev->id != cpu) + continue; + + platform_device_unregister(p->pdev); + list_del(&p->list); + mutex_unlock(&pdev_list_mutex); + for (i = 0; ; ++i) { + u32 phys_proc_id; + int err; + + if (i == cpu) + continue; + err = xen_get_topology_info(i, NULL, &phys_proc_id, + NULL); + if (err == -ENOENT) + continue; + if (err) + break; + if (phys_proc_id != p->phys_proc_id) + continue; + if (!pkgtemp_device_add(i)) + break; + } + kfree(p); + return; + } + mutex_unlock(&pdev_list_mutex); +} + +static int pkgtemp_cpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned long) hcpu; + + switch (action) { + case CPU_ONLINE: + pkgtemp_device_add(cpu); + break; + case CPU_DEAD: + pkgtemp_device_remove(cpu); + break; + } + return NOTIFY_OK; +} + +static struct notifier_block pkgtemp_cpu_notifier = { + .notifier_call = pkgtemp_cpu_callback, +}; + +static int __init pkgtemp_init(void) +{ + int err = -ENODEV; + + if (!is_initial_xendomain()) + goto exit; + + /* quick check if we run Intel */ + if (cpu_data(0).x86_vendor != X86_VENDOR_INTEL) + goto exit; + + err = platform_driver_register(&pkgtemp_driver); + if (err) + goto exit; + + err = register_pcpu_notifier(&pkgtemp_cpu_notifier); + if (err) + goto exit_driver_unreg; + +#ifndef CONFIG_ACPI_HOTPLUG_CPU + if (list_empty(&pdev_list)) { + unregister_pcpu_notifier(&pkgtemp_cpu_notifier); + err = -ENODEV; + goto exit_driver_unreg; + } +#endif + + return 0; + +exit_driver_unreg: + platform_driver_unregister(&pkgtemp_driver); +exit: + return err; +} + +static void __exit pkgtemp_exit(void) +{ + struct pdev_entry *p, *n; + + unregister_pcpu_notifier(&pkgtemp_cpu_notifier); + mutex_lock(&pdev_list_mutex); + list_for_each_entry_safe(p, n, &pdev_list, list) { + platform_device_unregister(p->pdev); + list_del(&p->list); + kfree(p); + } + mutex_unlock(&pdev_list_mutex); + platform_driver_unregister(&pkgtemp_driver); +} + +MODULE_AUTHOR("Fenghua Yu "); +MODULE_DESCRIPTION("Intel processor package temperature monitor"); +MODULE_LICENSE("GPL"); + +module_init(pkgtemp_init) +module_exit(pkgtemp_exit) --- head-2011-03-17.orig/drivers/hwmon/via-cputemp-xen.c 2011-02-01 14:55:46.000000000 +0100 +++ head-2011-03-17/drivers/hwmon/via-cputemp-xen.c 2011-02-01 15:04:27.000000000 +0100 @@ -37,7 +37,7 @@ #define DRVNAME "via_cputemp" -enum { SHOW_TEMP, SHOW_LABEL, SHOW_NAME } SHOW; +enum { SHOW_TEMP, SHOW_LABEL, SHOW_NAME }; /* * Functions declaration @@ -316,15 +316,16 @@ static int __init via_cputemp_init(void) if (err) goto exit_driver_unreg; +#ifndef CONFIG_ACPI_HOTPLUG_CPU if (list_empty(&pdev_list)) { + unregister_pcpu_notifier(&via_cputemp_cpu_notifier); err = -ENODEV; - goto exit_notifier_unreg; + goto exit_driver_unreg; } +#endif return 0; -exit_notifier_unreg: - unregister_pcpu_notifier(&via_cputemp_cpu_notifier); exit_driver_unreg: platform_driver_unregister(&via_cputemp_driver); exit: --- head-2011-03-17.orig/drivers/xen/Kconfig 2011-02-02 15:37:42.000000000 +0100 +++ head-2011-03-17/drivers/xen/Kconfig 2011-02-02 15:37:53.000000000 +0100 @@ -448,7 +448,7 @@ config XEN_PLATFORM_PCI config SWIOTLB_XEN def_bool y - depends on PCI + depends on PARAVIRT_XEN && PCI select SWIOTLB config XEN_XENCOMM --- head-2011-03-17.orig/drivers/xen/Makefile 2011-02-01 14:54:13.000000000 +0100 +++ head-2011-03-17/drivers/xen/Makefile 2011-02-24 15:05:06.000000000 +0100 @@ -22,6 +22,8 @@ obj-$(CONFIG_XEN_BALLOON) += $(xen-ball obj-$(CONFIG_XEN_DEV_EVTCHN) += evtchn.o obj-$(CONFIG_XENFS) += xenfs/ obj-$(CONFIG_XEN_SYS_HYPERVISOR) += sys-hypervisor.o +obj-$(CONFIG_XEN_PLATFORM_PCI) += platform-pci.o +obj-$(CONFIG_SWIOTLB_XEN) += swiotlb-xen.o obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/ obj-$(CONFIG_XEN_BLKDEV_TAP) += blktap/ obj-$(CONFIG_XEN_BLKDEV_TAP2) += blktap2/ blktap2-new/ --- head-2011-03-17.orig/drivers/xen/blkfront/blkfront.c 2011-02-01 14:50:44.000000000 +0100 +++ head-2011-03-17/drivers/xen/blkfront/blkfront.c 2011-02-01 15:04:27.000000000 +0100 @@ -328,7 +328,7 @@ static void connect(struct blkfront_info unsigned long long sectors; unsigned long sector_size; unsigned int binfo; - int err; + int err, barrier; switch (info->connected) { case BLKIF_STATE_CONNECTED: @@ -364,10 +364,25 @@ static void connect(struct blkfront_info } err = xenbus_gather(XBT_NIL, info->xbdev->otherend, - "feature-barrier", "%lu", &info->feature_barrier, + "feature-barrier", "%lu", &barrier, NULL); + /* + * If there's no "feature-barrier" defined, then it means + * we're dealing with a very old backend which writes + * synchronously; draining will do what needs to get done. + * + * If there are barriers, then we can do full queued writes + * with tagged barriers. + * + * If barriers are not supported, then there's no much we can + * do, so just set ordering to NONE. + */ if (err) - info->feature_barrier = 0; + info->feature_barrier = QUEUE_ORDERED_DRAIN; + else if (barrier) + info->feature_barrier = QUEUE_ORDERED_TAG; + else + info->feature_barrier = QUEUE_ORDERED_NONE; err = xlvbd_add(sectors, info->vdevice, binfo, sector_size, info); if (err) { @@ -687,7 +702,7 @@ static int blkif_queue_request(struct re ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE : BLKIF_OP_READ; - if (blk_barrier_rq(req)) + if (req->cmd_flags & REQ_HARDBARRIER) ring_req->operation = BLKIF_OP_WRITE_BARRIER; ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg); @@ -746,7 +761,7 @@ void do_blkif_request(struct request_que blk_start_request(req); - if (!blk_fs_request(req)) { + if (req->cmd_type != REQ_TYPE_FS) { __blk_end_request_all(req, -EIO); continue; } @@ -812,7 +827,7 @@ static irqreturn_t blkif_int(int irq, vo " write barrier op failed\n", info->gd->disk_name); ret = -EOPNOTSUPP; - info->feature_barrier = 0; + info->feature_barrier = QUEUE_ORDERED_NONE; xlvbd_barrier(info); } /* fall through */ --- head-2011-03-17.orig/drivers/xen/blkfront/vbd.c 2011-02-01 15:03:03.000000000 +0100 +++ head-2011-03-17/drivers/xen/blkfront/vbd.c 2011-02-01 15:04:27.000000000 +0100 @@ -422,8 +422,7 @@ xlvbd_add(blkif_sector_t capacity, int v info->rq = gd->queue; info->gd = gd; - if (info->feature_barrier) - xlvbd_barrier(info); + xlvbd_barrier(info); if (vdisk_info & VDISK_READONLY) set_disk_ro(gd, 1); @@ -474,21 +473,28 @@ int xlvbd_barrier(struct blkfront_info *info) { int err; + const char *barrier; + + switch (info->feature_barrier) { + case QUEUE_ORDERED_DRAIN: barrier = "enabled (drain)"; break; + case QUEUE_ORDERED_TAG: barrier = "enabled (tag)"; break; + case QUEUE_ORDERED_NONE: barrier = "disabled"; break; + default: return -EINVAL; + } - err = blk_queue_ordered(info->rq, - info->feature_barrier ? QUEUE_ORDERED_DRAIN : QUEUE_ORDERED_NONE, NULL); + err = blk_queue_ordered(info->rq, info->feature_barrier); if (err) return err; pr_info("blkfront: %s: barriers %s\n", - info->gd->disk_name, - info->feature_barrier ? "enabled" : "disabled"); + info->gd->disk_name, barrier); return 0; } #else int xlvbd_barrier(struct blkfront_info *info) { - pr_info("blkfront: %s: barriers disabled\n", info->gd->disk_name); + if (info->feature_barrier) + pr_info("blkfront: %s: barriers disabled\n", info->gd->disk_name); return -ENOSYS; } #endif --- head-2011-03-17.orig/drivers/xen/blktap/blktap.c 2011-02-17 10:19:12.000000000 +0100 +++ head-2011-03-17/drivers/xen/blktap/blktap.c 2011-02-17 10:19:19.000000000 +0100 @@ -431,14 +431,14 @@ static tap_blkif_t *get_next_free_dev(vo static int blktap_open(struct inode *inode, struct file *filp); static int blktap_release(struct inode *inode, struct file *filp); static int blktap_mmap(struct file *filp, struct vm_area_struct *vma); -static int blktap_ioctl(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg); +static long blktap_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg); static unsigned int blktap_poll(struct file *file, poll_table *wait); static const struct file_operations blktap_fops = { .owner = THIS_MODULE, .poll = blktap_poll, - .ioctl = blktap_ioctl, + .unlocked_ioctl = blktap_ioctl, .open = blktap_open, .release = blktap_release, .mmap = blktap_mmap, @@ -757,8 +757,8 @@ static int blktap_mmap(struct file *filp } -static int blktap_ioctl(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) +static long blktap_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) { tap_blkif_t *info = filp->private_data; --- head-2011-03-17.orig/drivers/xen/blktap2/control.c 2011-02-01 15:03:10.000000000 +0100 +++ head-2011-03-17/drivers/xen/blktap2/control.c 2011-02-24 15:17:25.000000000 +0100 @@ -103,9 +103,8 @@ found: return tap; } -static int -blktap_control_ioctl(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) +static long +blktap_control_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { unsigned long dev; struct blktap *tap; @@ -148,7 +147,7 @@ blktap_control_ioctl(struct inode *inode static const struct file_operations blktap_control_file_operations = { .owner = THIS_MODULE, - .ioctl = blktap_control_ioctl, + .unlocked_ioctl = blktap_control_ioctl, }; static struct miscdevice blktap_misc = { --- head-2011-03-17.orig/drivers/xen/blktap2/device.c 2011-02-01 15:03:03.000000000 +0100 +++ head-2011-03-17/drivers/xen/blktap2/device.c 2011-02-01 15:04:27.000000000 +0100 @@ -838,13 +838,13 @@ blktap_device_run_queue(struct blktap *t BTDBG("running queue for %d\n", tap->minor); while ((req = blk_peek_request(rq)) != NULL) { - if (!blk_fs_request(req)) { + if (req->cmd_type != REQ_TYPE_FS) { blk_start_request(req); __blk_end_request_all(req, -EIO); continue; } - if (blk_barrier_rq(req)) { + if (req->cmd_flags & REQ_HARDBARRIER) { blk_start_request(req); __blk_end_request_all(req, -EOPNOTSUPP); continue; --- head-2011-03-17.orig/drivers/xen/blktap2/ring.c 2011-01-31 18:07:35.000000000 +0100 +++ head-2011-03-17/drivers/xen/blktap2/ring.c 2011-02-01 15:04:27.000000000 +0100 @@ -363,9 +363,8 @@ blktap_ring_set_message(struct blktap *t up_read(&tap->tap_sem); } -static int -blktap_ring_ioctl(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) +static long +blktap_ring_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct blktap_params params; struct blktap *tap = filp->private_data; @@ -482,7 +481,7 @@ static const struct file_operations blkt .owner = THIS_MODULE, .open = blktap_ring_open, .release = blktap_ring_release, - .ioctl = blktap_ring_ioctl, + .unlocked_ioctl = blktap_ring_ioctl, .mmap = blktap_ring_mmap, .poll = blktap_ring_poll, }; --- head-2011-03-17.orig/drivers/xen/blktap2-new/control.c 2011-02-24 15:03:58.000000000 +0100 +++ head-2011-03-17/drivers/xen/blktap2-new/control.c 2011-02-24 15:17:28.000000000 +0100 @@ -120,9 +120,8 @@ blktap_control_destroy_tap(struct blktap return 0; } -static int -blktap_control_ioctl(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) +static long +blktap_control_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct blktap *tap; @@ -166,7 +165,7 @@ blktap_control_ioctl(struct inode *inode static const struct file_operations blktap_control_file_operations = { .owner = THIS_MODULE, - .ioctl = blktap_control_ioctl, + .unlocked_ioctl = blktap_control_ioctl, }; static struct miscdevice blktap_control = { --- head-2011-03-17.orig/drivers/xen/blktap2-new/device.c 2011-02-24 15:01:27.000000000 +0100 +++ head-2011-03-17/drivers/xen/blktap2-new/device.c 2011-02-24 16:23:08.000000000 +0100 @@ -240,7 +240,7 @@ blktap_device_run_queue(struct blktap *t if (!rq) break; - if (!blk_fs_request(rq)) { + if (rq->cmd_type != REQ_TYPE_FS) { __blktap_end_queued_rq(rq, -EOPNOTSUPP); continue; } @@ -303,7 +303,7 @@ blktap_device_configure(struct blktap *t blk_queue_dma_alignment(rq, 511); /* We are reordering, but cacheless. */ - blk_queue_ordered(rq, QUEUE_ORDERED_DRAIN, NULL); + blk_queue_ordered(rq, QUEUE_ORDERED_DRAIN); spin_unlock_irq(&dev->lock); } --- head-2011-03-17.orig/drivers/xen/blktap2-new/ring.c 2011-02-24 14:19:13.000000000 +0100 +++ head-2011-03-17/drivers/xen/blktap2-new/ring.c 2011-02-24 15:10:15.000000000 +0100 @@ -370,9 +370,8 @@ fail: return err; } -static int -blktap_ring_ioctl(struct inode *inode, struct file *filp, - unsigned int cmd, unsigned long arg) +static long +blktap_ring_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { struct blktap *tap = filp->private_data; struct blktap_ring *ring = &tap->ring; @@ -438,7 +437,7 @@ static const struct file_operations blkt .owner = THIS_MODULE, .open = blktap_ring_open, .release = blktap_ring_release, - .ioctl = blktap_ring_ioctl, + .unlocked_ioctl = blktap_ring_ioctl, .mmap = blktap_ring_mmap, .poll = blktap_ring_poll, }; --- head-2011-03-17.orig/drivers/xen/console/console.c 2011-02-01 15:03:10.000000000 +0100 +++ head-2011-03-17/drivers/xen/console/console.c 2011-02-01 15:04:27.000000000 +0100 @@ -379,7 +379,7 @@ void xencons_rx(char *buf, unsigned len) sysrq_requested = 0; if (time_before(jiffies, sysrq_timeout)) { spin_unlock_irqrestore(&xencons_lock, flags); - handle_sysrq(buf[i], xencons_tty); + handle_sysrq(buf[i]); spin_lock_irqsave(&xencons_lock, flags); continue; } --- head-2011-03-17.orig/drivers/xen/core/reboot.c 2011-02-01 15:03:03.000000000 +0100 +++ head-2011-03-17/drivers/xen/core/reboot.c 2011-02-01 15:04:27.000000000 +0100 @@ -240,7 +240,7 @@ static void sysrq_handler(struct xenbus_ #ifdef CONFIG_MAGIC_SYSRQ if (sysrq_key != '\0') - handle_sysrq(sysrq_key, NULL); + handle_sysrq(sysrq_key); #endif } --- head-2011-03-17.orig/drivers/xen/netfront/netfront.c 2011-02-09 16:05:04.000000000 +0100 +++ head-2011-03-17/drivers/xen/netfront/netfront.c 2011-02-09 16:05:34.000000000 +0100 @@ -50,7 +50,6 @@ #include #include #include -#include #include #include #include @@ -219,7 +218,6 @@ static void netif_disconnect_backend(str static int network_connect(struct net_device *); static void network_tx_buf_gc(struct net_device *); static void network_alloc_rx_buffers(struct net_device *); -static void send_fake_arp(struct net_device *); static irqreturn_t netif_int(int irq, void *dev_id); @@ -236,6 +234,25 @@ static inline int xennet_can_sg(struct n return dev->features & NETIF_F_SG; } +/* + * Work around net.ipv4.conf.*.arp_notify no being enabled by default. + */ +static void __devinit netfront_enable_arp_notify(struct netfront_info *info) +{ +#ifdef CONFIG_INET + struct in_device *in_dev; + + rtnl_lock(); + in_dev = __in_dev_get_rtnl(info->netdev); + if (in_dev && !IN_DEV_CONF_GET(in_dev, ARP_NOTIFY)) + IN_DEV_CONF_SET(in_dev, ARP_NOTIFY, 1); + rtnl_unlock(); + if (!in_dev) + printk(KERN_WARNING "Cannot enable ARP notification on %s\n", + info->xbdev->nodename); +#endif +} + /** * Entry point to this code when a new device is created. Allocate the basic * structures and the ring buffers for communication with the backend, and @@ -265,6 +282,8 @@ static int __devinit netfront_probe(stru goto fail; } + netfront_enable_arp_notify(info); + err = xennet_sysfs_addif(info->netdev); if (err) { unregister_netdev(info->netdev); @@ -551,7 +570,7 @@ static void backend_changed(struct xenbu if (network_connect(netdev) != 0) break; xenbus_switch_state(dev, XenbusStateConnected); - send_fake_arp(netdev); + netif_notify_peers(netdev); break; case XenbusStateClosing: @@ -560,36 +579,6 @@ static void backend_changed(struct xenbu } } -/** Send a packet on a net device to encourage switches to learn the - * MAC. We send a fake ARP request. - * - * @param dev device - * @return 0 on success, error code otherwise - */ -static void send_fake_arp(struct net_device *dev) -{ -#ifdef CONFIG_INET - struct sk_buff *skb; - u32 src_ip, dst_ip; - - dst_ip = INADDR_BROADCAST; - src_ip = inet_select_addr(dev, dst_ip, RT_SCOPE_LINK); - - /* No IP? Then nothing to do. */ - if (src_ip == 0) - return; - - skb = arp_create(ARPOP_REPLY, ETH_P_ARP, - dst_ip, dev, src_ip, - /*dst_hw*/ NULL, /*src_hw*/ NULL, - /*target_hw*/ dev->dev_addr); - if (skb == NULL) - return; - - dev_queue_xmit(skb); -#endif -} - static inline int netfront_tx_slot_available(struct netfront_info *np) { return ((np->tx.req_prod_pvt - np->tx.rsp_cons) < @@ -2154,32 +2143,6 @@ static struct net_device * __devinit cre return ERR_PTR(err); } -#ifdef CONFIG_INET -/* - * We use this notifier to send out a fake ARP reply to reset switches and - * router ARP caches when an IP interface is brought up on a VIF. - */ -static int -inetdev_notify(struct notifier_block *this, unsigned long event, void *ptr) -{ - struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; - struct net_device *dev = ifa->ifa_dev->dev; - - /* UP event and is it one of our devices? */ - if (event == NETDEV_UP && dev->netdev_ops->ndo_open == network_open) - send_fake_arp(dev); - - return NOTIFY_DONE; -} - -static struct notifier_block notifier_inetdev = { - .notifier_call = inetdev_notify, - .next = NULL, - .priority = 0 -}; -#endif - - static void netif_disconnect_backend(struct netfront_info *info) { /* Stop old i/f to prevent errors whilst we rebuild the state. */ @@ -2233,8 +2196,6 @@ static struct xenbus_driver netfront_dri static int __init netif_init(void) { - int err; - if (!is_running_on_xen()) return -ENODEV; @@ -2252,26 +2213,13 @@ static int __init netif_init(void) IPRINTK("Initialising virtual ethernet driver.\n"); -#ifdef CONFIG_INET - (void)register_inetaddr_notifier(¬ifier_inetdev); -#endif - - err = xenbus_register_frontend(&netfront_driver); - if (err) { -#ifdef CONFIG_INET - unregister_inetaddr_notifier(¬ifier_inetdev); -#endif - } - return err; + return xenbus_register_frontend(&netfront_driver); } module_init(netif_init); static void __exit netif_exit(void) { -#ifdef CONFIG_INET - unregister_inetaddr_notifier(¬ifier_inetdev); -#endif xenbus_unregister_driver(&netfront_driver); netif_exit_accel(); --- head-2011-03-17.orig/drivers/xen/scsiback/scsiback.c 2011-02-01 14:50:44.000000000 +0100 +++ head-2011-03-17/drivers/xen/scsiback/scsiback.c 2011-02-01 15:04:27.000000000 +0100 @@ -386,7 +386,7 @@ static struct bio *request_map_sg(pendin if (bio->bi_vcnt >= nr_vecs) { bio->bi_flags &= ~(1 << BIO_SEG_VALID); if (pending_req->sc_data_direction == WRITE) - bio->bi_rw |= (1 << BIO_RW); + bio->bi_rw |= REQ_WRITE; bio = NULL; } --- head-2011-03-17.orig/drivers/xen/usbfront/usbfront-hcd.c 2011-01-31 17:56:27.000000000 +0100 +++ head-2011-03-17/drivers/xen/usbfront/usbfront-hcd.c 2011-02-01 15:04:27.000000000 +0100 @@ -86,7 +86,7 @@ static int xenhcd_setup(struct usb_hcd * static int xenhcd_run(struct usb_hcd *hcd) { hcd->uses_new_polling = 1; - hcd->poll_rh = 0; + clear_bit(HCD_FLAG_POLL_RH, &hcd->flags); hcd->state = HC_STATE_RUNNING; create_debug_file(hcd_to_info(hcd)); return 0; --- head-2011-03-17.orig/drivers/xen/xenbus/xenbus_client.c 2011-02-01 15:03:03.000000000 +0100 +++ head-2011-03-17/drivers/xen/xenbus/xenbus_client.c 2011-02-01 15:04:27.000000000 +0100 @@ -165,17 +165,12 @@ int xenbus_watch_pathfmt(struct xenbus_d EXPORT_SYMBOL_GPL(xenbus_watch_pathfmt); #endif +static void xenbus_switch_fatal(struct xenbus_device *, int, int, + const char *, ...); -/** - * xenbus_switch_state - * @dev: xenbus device - * @state: new state - * - * Advertise in the store a change of the given driver to the given new_state. - * Return 0 on success, or -errno on error. On error, the device will switch - * to XenbusStateClosing, and the error will be saved in the store. - */ -int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state) +static int +__xenbus_switch_state(struct xenbus_device *dev, + enum xenbus_state state, int depth) { /* We check whether the state is currently set to the given value, and if not, then the state is set. We don't want to unconditionally @@ -190,29 +185,58 @@ int xenbus_switch_state(struct xenbus_de would not get reset if the transaction was aborted. */ + struct xenbus_transaction xbt; int current_state; - int err; + int err, abort; if (state == dev->state) return 0; - err = xenbus_scanf(XBT_NIL, dev->nodename, "state", "%d", - ¤t_state); - if (err != 1) +again: + abort = 1; + + err = xenbus_transaction_start(&xbt); + if (err) { + xenbus_switch_fatal(dev, depth, err, "starting transaction"); return 0; + } + + err = xenbus_scanf(xbt, dev->nodename, "state", "%d", ¤t_state); + if (err != 1) + goto abort; - err = xenbus_printf(XBT_NIL, dev->nodename, "state", "%d", state); + err = xenbus_printf(xbt, dev->nodename, "state", "%d", state); if (err) { - if (state != XenbusStateClosing) /* Avoid looping */ - xenbus_dev_fatal(dev, err, "writing new state"); - return err; + xenbus_switch_fatal(dev, depth, err, "writing new state"); + goto abort; } - dev->state = state; + abort = 0; +abort: + err = xenbus_transaction_end(xbt, abort); + if (err) { + if (err == -EAGAIN && !abort) + goto again; + xenbus_switch_fatal(dev, depth, err, "ending transaction"); + } else + dev->state = state; return 0; } +/** + * xenbus_switch_state + * @dev: xenbus device + * @state: new state + * + * Advertise in the store a change of the given driver to the given new_state. + * Return 0 on success, or -errno on error. On error, the device will switch + * to XenbusStateClosing, and the error will be saved in the store. + */ +int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state state) +{ + return __xenbus_switch_state(dev, state, 0); +} EXPORT_SYMBOL_GPL(xenbus_switch_state); int xenbus_frontend_closed(struct xenbus_device *dev) @@ -234,41 +258,22 @@ static char *error_path(struct xenbus_de static void _dev_error(struct xenbus_device *dev, int err, - const char *fmt, va_list ap) + const char *fmt, va_list *ap) { - int ret; - unsigned int len; - char *printf_buffer = NULL, *path_buffer = NULL; - -#define PRINTF_BUFFER_SIZE 4096 - printf_buffer = kmalloc(PRINTF_BUFFER_SIZE, GFP_KERNEL); - if (printf_buffer == NULL) - goto fail; + char *printf_buffer, *path_buffer; + struct va_format vaf = { .fmt = fmt, .va = ap }; - len = sprintf(printf_buffer, "%i ", -err); - ret = vsnprintf(printf_buffer+len, PRINTF_BUFFER_SIZE-len, fmt, ap); - - BUG_ON(len + ret > PRINTF_BUFFER_SIZE-1); - - dev_err(&dev->dev, "%s\n", printf_buffer); + printf_buffer = kasprintf(GFP_KERNEL, "%i %pV", -err, &vaf); + if (printf_buffer) + dev_err(&dev->dev, "%s\n", printf_buffer); path_buffer = error_path(dev); - - if (path_buffer == NULL) { + if (!printf_buffer || !path_buffer + || xenbus_write(XBT_NIL, path_buffer, "error", printf_buffer)) dev_err(&dev->dev, "xenbus: failed to write error node for %s (%s)\n", dev->nodename, printf_buffer); - goto fail; - } - if (xenbus_write(XBT_NIL, path_buffer, "error", printf_buffer) != 0) { - dev_err(&dev->dev, - "xenbus: failed to write error node for %s (%s)\n", - dev->nodename, printf_buffer); - goto fail; - } - -fail: kfree(printf_buffer); kfree(path_buffer); } @@ -288,7 +293,7 @@ void xenbus_dev_error(struct xenbus_devi va_list ap; va_start(ap, fmt); - _dev_error(dev, err, fmt, ap); + _dev_error(dev, err, fmt, &ap); va_end(ap); } EXPORT_SYMBOL_GPL(xenbus_dev_error); @@ -309,13 +314,29 @@ void xenbus_dev_fatal(struct xenbus_devi va_list ap; va_start(ap, fmt); - _dev_error(dev, err, fmt, ap); + _dev_error(dev, err, fmt, &ap); va_end(ap); xenbus_switch_state(dev, XenbusStateClosing); } EXPORT_SYMBOL_GPL(xenbus_dev_fatal); +/** + * Equivalent to xenbus_dev_fatal(dev, err, fmt, args), but helps + * avoiding recursion within xenbus_switch_state. + */ +static void xenbus_switch_fatal(struct xenbus_device *dev, int depth, int err, + const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + _dev_error(dev, err, fmt, &ap); + va_end(ap); + + if (!depth) + __xenbus_switch_state(dev, XenbusStateClosing, 1); +} /** * xenbus_grant_ring --- head-2011-03-17.orig/drivers/xen/xenbus/xenbus_probe.c 2011-02-01 15:03:03.000000000 +0100 +++ head-2011-03-17/drivers/xen/xenbus/xenbus_probe.c 2011-02-01 15:04:27.000000000 +0100 @@ -58,9 +58,6 @@ #include #include #include -#ifdef MODULE -#include -#endif #else #include @@ -68,6 +65,12 @@ #include #include #include + +#include +#endif + +#ifndef CONFIG_XEN +#include #endif #include "xenbus_comms.h" @@ -962,7 +965,23 @@ void xenbus_probe(struct work_struct *un /* Notify others that xenstore is up */ blocking_notifier_call_chain(&xenstore_chain, 0, NULL); } +#if !defined(CONFIG_XEN) && !defined(MODULE) +EXPORT_SYMBOL_GPL(xenbus_probe); +static int __init xenbus_probe_initcall(void) +{ + if (!xen_domain()) + return -ENODEV; + + if (xen_initial_domain() || xen_hvm_domain()) + return 0; + + xenbus_probe(NULL); + return 0; +} + +device_initcall(xenbus_probe_initcall); +#endif #if defined(CONFIG_PROC_FS) && defined(CONFIG_XEN_PRIVILEGED_GUEST) static struct file_operations xsd_kva_fops; @@ -1077,9 +1096,9 @@ fail0: #endif #ifndef MODULE -static int __init xenbus_probe_init(void) +static int __init xenbus_init(void) #else -static int __devinit xenbus_probe_init(void) +int __devinit xenbus_init(void) #endif { int err = 0; @@ -1147,17 +1166,36 @@ static int __devinit xenbus_probe_init(v #endif xen_store_interface = mfn_to_virt(xen_store_mfn); } else { - atomic_set(&xenbus_xsd_state, XENBUS_XSD_FOREIGN_READY); +#if !defined(CONFIG_XEN) && !defined(MODULE) + if (xen_hvm_domain()) { +#endif +#ifndef CONFIG_XEN + uint64_t v = 0; + + err = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN, &v); + if (err) + goto err; + xen_store_evtchn = (int)v; + err = hvm_get_parameter(HVM_PARAM_STORE_PFN, &v); + if (err) + goto err; + xen_store_mfn = (unsigned long)v; + xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT, + PAGE_SIZE); +#endif +#if !defined(CONFIG_XEN) && !defined(MODULE) + } else { +#endif #ifndef MODULE - xen_store_evtchn = xen_start_info->store_evtchn; - xen_store_mfn = xen_start_info->store_mfn; - xen_store_interface = mfn_to_virt(xen_store_mfn); -#else - xen_store_evtchn = hvm_get_parameter(HVM_PARAM_STORE_EVTCHN); - xen_store_mfn = hvm_get_parameter(HVM_PARAM_STORE_PFN); - xen_store_interface = ioremap(xen_store_mfn << PAGE_SHIFT, - PAGE_SIZE); + xen_store_evtchn = xen_start_info->store_evtchn; + xen_store_mfn = xen_start_info->store_mfn; + xen_store_interface = mfn_to_virt(xen_store_mfn); #endif +#if !defined(CONFIG_XEN) && !defined(MODULE) + } +#endif + atomic_set(&xenbus_xsd_state, XENBUS_XSD_FOREIGN_READY); + /* Initialize the shared memory rings to talk to xenstored */ err = xb_init_comms(); if (err) @@ -1189,8 +1227,10 @@ static int __devinit xenbus_probe_init(v #endif xenbus_backend_device_register(); +#if defined(CONFIG_XEN) || defined(MODULE) if (!is_initial_xendomain()) xenbus_probe(NULL); +#endif #if defined(CONFIG_XEN_COMPAT_XENFS) && !defined(MODULE) /* @@ -1217,17 +1257,12 @@ static int __devinit xenbus_probe_init(v } #ifndef MODULE -postcore_initcall(xenbus_probe_init); +postcore_initcall(xenbus_init); #ifdef CONFIG_XEN MODULE_LICENSE("Dual BSD/GPL"); #else MODULE_LICENSE("GPL"); #endif -#else -int __devinit xenbus_init(void) -{ - return xenbus_probe_init(); -} #endif static int is_device_connecting(struct device *dev, void *data) @@ -1345,6 +1380,11 @@ static void wait_for_devices(struct xenb #ifndef MODULE static int __init boot_wait_for_devices(void) { +#if !defined(CONFIG_XEN) && !defined(MODULE) + if (xen_hvm_domain() && !xen_platform_pci_unplug) + return -ENODEV; +#endif + if (!xenbus_frontend.error) { ready_to_wait_for_devices = 1; wait_for_devices(NULL); --- head-2011-03-17.orig/include/xen/hvm.h 2011-01-31 17:56:27.000000000 +0100 +++ head-2011-03-17/include/xen/hvm.h 2011-02-01 15:04:27.000000000 +0100 @@ -3,8 +3,11 @@ #define XEN_HVM_H__ #include +#ifndef HAVE_XEN_PLATFORM_COMPAT_H +#include +#endif -static inline unsigned long hvm_get_parameter(int idx) +static inline int hvm_get_parameter(int idx, uint64_t *value) { struct xen_hvm_param xhv; int r; @@ -14,9 +17,15 @@ static inline unsigned long hvm_get_para r = HYPERVISOR_hvm_op(HVMOP_get_param, &xhv); if (r < 0) { pr_err("Cannot get hvm parameter %d: %d!\n", idx, r); - return 0; + return r; } - return xhv.value; + *value = xhv.value; + return r; } +#define HVM_CALLBACK_VIA_TYPE_VECTOR 0x2 +#define HVM_CALLBACK_VIA_TYPE_SHIFT 56 +#define HVM_CALLBACK_VECTOR(x) (((uint64_t)HVM_CALLBACK_VIA_TYPE_VECTOR)<<\ + HVM_CALLBACK_VIA_TYPE_SHIFT | (x)) + #endif /* XEN_HVM_H__ */ --- head-2011-03-17.orig/include/xen/interface/hvm/hvm_op.h 2011-03-17 13:50:24.000000000 +0100 +++ head-2011-03-17/include/xen/interface/hvm/hvm_op.h 2011-03-17 14:14:21.000000000 +0100 @@ -33,6 +33,7 @@ struct xen_hvm_param { uint32_t index; /* IN */ uint64_t value; /* IN/OUT */ }; +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_param); typedef struct xen_hvm_param xen_hvm_param_t; DEFINE_XEN_GUEST_HANDLE(xen_hvm_param_t); @@ -140,6 +141,7 @@ struct xen_hvm_pagetable_dying { /* guest physical address of the toplevel pagetable dying */ uint64_t gpa; }; +DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_pagetable_dying); typedef struct xen_hvm_pagetable_dying xen_hvm_pagetable_dying_t; DEFINE_XEN_GUEST_HANDLE(xen_hvm_pagetable_dying_t); --- head-2011-03-17.orig/include/xen/interface/memory.h 2011-02-01 14:38:38.000000000 +0100 +++ head-2011-03-17/include/xen/interface/memory.h 2011-02-01 15:04:27.000000000 +0100 @@ -132,6 +132,7 @@ struct xen_memory_exchange { */ xen_ulong_t nr_exchanged; }; +DEFINE_GUEST_HANDLE_STRUCT(xen_memory_exchange); typedef struct xen_memory_exchange xen_memory_exchange_t; DEFINE_XEN_GUEST_HANDLE(xen_memory_exchange_t); @@ -294,4 +295,14 @@ typedef struct xen_pod_target xen_pod_ta */ #define XENMEM_get_sharing_freed_pages 18 +#ifndef CONFIG_XEN +#include + +/* + * Prevent the balloon driver from changing the memory reservation + * during a driver critical region. + */ +extern spinlock_t xen_reservation_lock; +#endif + #endif /* __XEN_PUBLIC_MEMORY_H__ */ --- head-2011-03-17.orig/lib/swiotlb-xen.c 2011-02-01 15:03:10.000000000 +0100 +++ head-2011-03-17/lib/swiotlb-xen.c 2011-02-01 15:04:27.000000000 +0100 @@ -37,20 +37,12 @@ #define OFFSET(val,align) ((unsigned long)((val) & ( (align) - 1))) -/* - * Enumeration for sync targets - */ -enum dma_sync_target { - SYNC_FOR_CPU = 0, - SYNC_FOR_DEVICE = 1, -}; - int swiotlb; int swiotlb_force; /* - * Used to do a quick range check in unmap_single and - * sync_single_*, to see if the memory was in fact allocated by this + * Used to do a quick range check in swiotlb_tbl_unmap_single and + * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this * API. */ static char *io_tlb_start, *io_tlb_end; @@ -141,44 +133,30 @@ void swiotlb_print_info(void) io_tlb_start, io_tlb_end); } -/* - * Statically reserve bounce buffer space and initialize bounce buffer data - * structures for the software IO TLB used to implement the PCI DMA API. - */ -void __init -swiotlb_init_with_default_size(size_t default_size, int verbose) +void __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) { unsigned long i, bytes; int rc; - if (!io_tlb_nslabs) { - io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); - io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); - } + bytes = nslabs << IO_TLB_SHIFT; - bytes = io_tlb_nslabs << IO_TLB_SHIFT; - - /* - * Get IO TLB memory from the low pages - */ - io_tlb_start = alloc_bootmem_pages(bytes); - if (!io_tlb_start) - panic("Cannot allocate SWIOTLB buffer!\n"); + io_tlb_nslabs = nslabs; + io_tlb_start = tlb; dma_bits = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT) + PAGE_SHIFT; - for (i = 0; i < io_tlb_nslabs; i += IO_TLB_SEGSIZE) { + for (nslabs = 0; nslabs < io_tlb_nslabs; nslabs += IO_TLB_SEGSIZE) { do { rc = xen_create_contiguous_region( - (unsigned long)io_tlb_start + (i << IO_TLB_SHIFT), + (unsigned long)io_tlb_start + (nslabs << IO_TLB_SHIFT), get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT), dma_bits); } while (rc && dma_bits++ < max_dma_bits); if (rc) { - if (i == 0) + if (nslabs == 0) panic("No suitable physical memory available for SWIOTLB buffer!\n" "Use dom0_mem Xen boot parameter to reserve\n" "some DMA memory (e.g., dom0_mem=-128M).\n"); - io_tlb_nslabs = i; - i <<= IO_TLB_SHIFT; + io_tlb_nslabs = nslabs; + i = nslabs << IO_TLB_SHIFT; free_bootmem(__pa(io_tlb_start + i), bytes - i); bytes = i; for (dma_bits = 0; i > 0; i -= IO_TLB_SEGSIZE << IO_TLB_SHIFT) { @@ -221,6 +199,32 @@ swiotlb_init_with_default_size(size_t de swiotlb_print_info(); } +/* + * Statically reserve bounce buffer space and initialize bounce buffer data + * structures for the software IO TLB used to implement the DMA API. + */ +void __init +swiotlb_init_with_default_size(size_t default_size, int verbose) +{ + unsigned long bytes; + + if (!io_tlb_nslabs) { + io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); + io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); + } + + bytes = io_tlb_nslabs << IO_TLB_SHIFT; + + /* + * Get IO TLB memory from the low pages + */ + io_tlb_start = alloc_bootmem_pages(bytes); + if (!io_tlb_start) + panic("Cannot allocate SWIOTLB buffer"); + + swiotlb_init_with_tbl(io_tlb_start, io_tlb_nslabs, verbose); +} + void __init swiotlb_init(int verbose) { @@ -267,8 +271,8 @@ static int is_swiotlb_buffer(dma_addr_t * drivers map the buffer for DMA_BIDIRECTIONAL access. This causes an * unnecessary copy from the aperture to the host buffer, and a page fault. */ -static void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size, - enum dma_data_direction dir) +void swiotlb_bounce(phys_addr_t phys, char *dma_addr, size_t size, + enum dma_data_direction dir) { unsigned long pfn = PFN_DOWN(phys); @@ -306,12 +310,11 @@ static void swiotlb_bounce(phys_addr_t p /* inaccessible */; } } +EXPORT_SYMBOL_GPL(swiotlb_bounce); -/* - * Allocates bounce buffer and returns its kernel virtual address. - */ -static void * -map_single(struct device *hwdev, phys_addr_t phys, size_t size, int dir) +void *swiotlb_tbl_map_single(struct device *hwdev, dma_addr_t tbl_dma_addr, + phys_addr_t phys, size_t size, + enum dma_data_direction dir) { unsigned long flags; char *dma_addr; @@ -409,12 +412,27 @@ found: return dma_addr; } +EXPORT_SYMBOL_GPL(swiotlb_tbl_map_single); + +/* + * Allocates bounce buffer and returns its kernel virtual address. + */ + +static void * +map_single(struct device *hwdev, phys_addr_t phys, size_t size, + enum dma_data_direction dir) +{ + dma_addr_t start_dma_addr = swiotlb_virt_to_bus(hwdev, io_tlb_start); + + return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, dir); +} /* * dma_addr is the kernel virtual address of the bounce buffer to unmap. */ -static void -do_unmap_single(struct device *hwdev, char *dma_addr, size_t size, int dir) +void +swiotlb_tbl_unmap_single(struct device *hwdev, char *dma_addr, size_t size, + enum dma_data_direction dir) { unsigned long flags; int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; @@ -455,10 +473,12 @@ do_unmap_single(struct device *hwdev, ch } spin_unlock_irqrestore(&io_tlb_lock, flags); } +EXPORT_SYMBOL_GPL(swiotlb_tbl_unmap_single); -static void -sync_single(struct device *hwdev, char *dma_addr, size_t size, - int dir, int target) +void +swiotlb_tbl_sync_single(struct device *hwdev, char *dma_addr, size_t size, + enum dma_data_direction dir, + enum dma_sync_target target) { int index = (dma_addr - io_tlb_start) >> IO_TLB_SHIFT; phys_addr_t phys = io_tlb_orig_addr[index]; @@ -482,9 +502,11 @@ sync_single(struct device *hwdev, char * BUG(); } } +EXPORT_SYMBOL_GPL(swiotlb_tbl_sync_single); static void -swiotlb_full(struct device *dev, size_t size, int dir, int do_panic) +swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir, + int do_panic) { /* * Ran out of IOMMU space for this operation. This is very bad. @@ -558,14 +580,14 @@ EXPORT_SYMBOL_GPL(swiotlb_map_page); * whatever the device wrote there. */ static void unmap_single(struct device *hwdev, dma_addr_t dev_addr, - size_t size, int dir) + size_t size, enum dma_data_direction dir) { phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); BUG_ON(dir == DMA_NONE); if (is_swiotlb_buffer(dev_addr)) { - do_unmap_single(hwdev, phys_to_virt(paddr), size, dir); + swiotlb_tbl_unmap_single(hwdev, phys_to_virt(paddr), size, dir); return; } @@ -592,14 +614,16 @@ EXPORT_SYMBOL_GPL(swiotlb_unmap_page); */ static void swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, - size_t size, int dir, int target) + size_t size, enum dma_data_direction dir, + enum dma_sync_target target) { phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); BUG_ON(dir == DMA_NONE); if (is_swiotlb_buffer(dev_addr)) - sync_single(hwdev, phys_to_virt(paddr), size, dir, target); + swiotlb_tbl_sync_single(hwdev, phys_to_virt(paddr), size, dir, + target); } void @@ -676,7 +700,7 @@ EXPORT_SYMBOL(swiotlb_map_sg_attrs); int swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, - int dir) + enum dma_data_direction dir) { return swiotlb_map_sg_attrs(hwdev, sgl, nelems, dir, NULL); } @@ -703,7 +727,7 @@ EXPORT_SYMBOL(swiotlb_unmap_sg_attrs); void swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, - int dir) + enum dma_data_direction dir) { return swiotlb_unmap_sg_attrs(hwdev, sgl, nelems, dir, NULL); } @@ -718,7 +742,8 @@ EXPORT_SYMBOL(swiotlb_unmap_sg); */ static void swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl, - int nelems, int dir, int target) + int nelems, enum dma_data_direction dir, + enum dma_sync_target target) { struct scatterlist *sg; int i;