From: www.kernel.org Subject: Update to 2.6.24 Patch-mainline: 2.6.24 Automatically created from "patches.kernel.org/patch-2.6.24" by xen-port-patches.py Acked-by: jbeulich@novell.com --- head-2011-03-17.orig/arch/x86/Kconfig 2011-01-31 17:32:29.000000000 +0100 +++ head-2011-03-17/arch/x86/Kconfig 2011-01-31 17:56:27.000000000 +0100 @@ -86,15 +86,16 @@ config GENERIC_CMOS_UPDATE config CLOCKSOURCE_WATCHDOG def_bool y - depends on !X86_XEN + depends on !XEN config GENERIC_CLOCKEVENTS def_bool y - depends on !X86_XEN + depends on !XEN config GENERIC_CLOCKEVENTS_BROADCAST def_bool y - depends on X86_64 || (X86_32 && X86_LOCAL_APIC && !X86_XEN) + depends on X86_64 || (X86_32 && X86_LOCAL_APIC) + depends on !XEN config LOCKDEP_SUPPORT def_bool y @@ -227,12 +228,12 @@ config X86_TRAMPOLINE config X86_NO_TSS bool - depends on X86_XEN || X86_64_XEN + depends on XEN default y config X86_NO_IDT bool - depends on X86_XEN || X86_64_XEN + depends on XEN default y config X86_32_LAZY_GS @@ -307,6 +308,7 @@ config X86_MPPARSE config X86_XEN bool "Xen-compatible" + depends on X86_32 select XEN select X86_PAE select X86_UP_APIC if !SMP && XEN_PRIVILEGED_GUEST @@ -347,6 +349,7 @@ endif config X86_64_XEN bool "Enable Xen compatible kernel" + depends on X86_64 select XEN select SWIOTLB help @@ -630,7 +633,7 @@ source "arch/x86/Kconfig.cpu" config HPET_TIMER def_bool X86_64 prompt "HPET Timer Support" if X86_32 - depends on !X86_XEN && !X86_64_XEN + depends on !XEN ---help--- Use the IA-PC HPET (High Precision Event Timer) to manage time in preference to the PIT and RTC, if a HPET is @@ -991,7 +994,7 @@ config I8K config X86_REBOOTFIXUPS bool "Enable X86 board specific fixups for reboot" - depends on X86_32 && !X86_XEN + depends on X86_32 && !XEN ---help--- This enables chipset and/or board specific fixups to be done in order to get reboot to work correctly. This is only needed on @@ -1401,7 +1404,7 @@ config X86_RESERVE_LOW config MATH_EMULATION bool prompt "Math emulation" if X86_32 - depends on !X86_XEN + depends on !XEN ---help--- Linux can emulate a math coprocessor (used for floating point operations) if you don't have one. 486DX and Pentium processors have @@ -1769,6 +1772,7 @@ endmenu config ARCH_ENABLE_MEMORY_HOTPLUG def_bool y depends on X86_64 || (X86_32 && HIGHMEM) + depends on !XEN config ARCH_ENABLE_MEMORY_HOTREMOVE def_bool y @@ -1963,7 +1967,7 @@ choice config PCI_GOBIOS bool "BIOS" - depends on !X86_XEN + depends on !XEN config PCI_GOMMCONFIG bool "MMConfig" @@ -2033,7 +2037,7 @@ config PCI_CNB20LE_QUIRK config XEN_PCIDEV_FRONTEND bool "Xen PCI Frontend" if X86_64 - depends on PCI && ((X86_XEN && (PCI_GOXEN_FE || PCI_GOANY)) || X86_64_XEN) + depends on PCI && XEN && (PCI_GOXEN_FE || PCI_GOANY || X86_64) select HOTPLUG default y help @@ -2049,7 +2053,7 @@ config XEN_PCIDEV_FE_DEBUG config DMAR bool "Support for DMA Remapping Devices (EXPERIMENTAL)" - depends on PCI_MSI && ACPI && EXPERIMENTAL + depends on PCI_MSI && ACPI && !XEN && EXPERIMENTAL help DMA remapping (DMAR) devices support enables independent address translations for Direct Memory Access (DMA) from devices. --- head-2011-03-17.orig/arch/x86/Makefile 2011-02-01 14:10:15.000000000 +0100 +++ head-2011-03-17/arch/x86/Makefile 2011-02-01 14:11:04.000000000 +0100 @@ -158,8 +158,8 @@ BOOT_TARGETS = bzlilo bzdisk fdimage fdi PHONY += bzImage vmlinuz $(BOOT_TARGETS) ifdef CONFIG_XEN -CPPFLAGS := -D__XEN_INTERFACE_VERSION__=$(CONFIG_XEN_INTERFACE_VERSION) \ - -Iinclude$(if $(KBUILD_SRC),2)/asm/mach-xen $(CPPFLAGS) +KBUILD_CPPFLAGS := -D__XEN_INTERFACE_VERSION__=$(CONFIG_XEN_INTERFACE_VERSION) \ + -Iinclude$(if $(KBUILD_SRC),2)/asm/mach-xen $(KBUILD_CPPFLAGS) ifdef CONFIG_X86_64 LDFLAGS_vmlinux := -e startup_64 @@ -173,6 +173,8 @@ KBUILD_IMAGE := $(boot)/vmlinuz vmlinuz: vmlinux $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE) + $(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot + $(Q)ln -fsn ../../x86/boot/$@ $(objtree)/arch/$(UTS_MACHINE)/boot/$@ else # Default kernel to build all: bzImage --- head-2011-03-17.orig/arch/x86/ia32/ia32entry-xen.S 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/arch/x86/ia32/ia32entry-xen.S 2011-01-31 17:56:27.000000000 +0100 @@ -125,20 +125,16 @@ sysenter_do_call: jmp int_ret_from_sys_call sysenter_tracesys: + xchgl %r9d,%ebp SAVE_REST CLEAR_RREGS + movq %r9,R9(%rsp) movq $-ENOSYS,RAX(%rsp) /* really needed? */ movq %rsp,%rdi /* &pt_regs -> arg1 */ call syscall_trace_enter LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ RESTORE_REST - movl %ebp, %ebp - /* no need to do an access_ok check here because rbp has been - 32bit zero extended */ -1: movl (%rbp),%r9d - .section __ex_table,"a" - .quad 1b,ia32_badarg - .previous + xchgl %ebp,%r9d jmp sysenter_do_call CFI_ENDPROC ENDPROC(ia32_sysenter_target) @@ -200,20 +196,17 @@ cstar_do_call: jmp int_ret_from_sys_call cstar_tracesys: + xchgl %r9d,%ebp SAVE_REST CLEAR_RREGS + movq %r9,R9(%rsp) movq $-ENOSYS,RAX(%rsp) /* really needed? */ movq %rsp,%rdi /* &pt_regs -> arg1 */ call syscall_trace_enter LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ RESTORE_REST + xchgl %ebp,%r9d movl RSP-ARGOFFSET(%rsp), %r8d - /* no need to do an access_ok check here because r8 has been - 32bit zero extended */ -1: movl (%r8),%r9d - .section __ex_table,"a" - .quad 1b,ia32_badarg - .previous jmp cstar_do_call END(ia32_cstar_target) --- head-2011-03-17.orig/arch/x86/include/asm/acpi.h 2011-03-11 10:52:46.000000000 +0100 +++ head-2011-03-17/arch/x86/include/asm/acpi.h 2011-03-11 10:56:05.000000000 +0100 @@ -152,6 +152,7 @@ static inline int acpi_notify_hypervisor */ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate) { +#ifndef CONFIG_PROCESSOR_EXTERNAL_CONTROL /* * Early models (<=5) of AMD Opterons are not supposed to go into * C2 state. @@ -166,6 +167,7 @@ static inline unsigned int acpi_processo else if (c1e_detected) return 1; else +#endif return max_cstate; } --- head-2011-03-17.orig/arch/x86/include/asm/mmu.h 2011-03-17 13:45:28.000000000 +0100 +++ head-2011-03-17/arch/x86/include/asm/mmu.h 2011-01-31 17:56:27.000000000 +0100 @@ -11,6 +11,9 @@ typedef struct { void *ldt; int size; +#ifdef CONFIG_XEN + unsigned has_foreign_mappings:1; +#endif struct mutex lock; void *vdso; } mm_context_t; --- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/agp.h 2007-06-22 09:08:06.000000000 +0200 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/agp.h 2011-01-31 17:56:27.000000000 +0100 @@ -1,20 +1,22 @@ -#ifndef AGP_H -#define AGP_H 1 +#ifndef _ASM_X86_AGP_H +#define _ASM_X86_AGP_H #include #include #include -/* - * Functions to keep the agpgart mappings coherent with the MMU. - * The GART gives the CPU a physical alias of pages in memory. The alias region is - * mapped uncacheable. Make sure there are no conflicting mappings - * with different cachability attributes for the same page. This avoids - * data corruption on some CPUs. +/* + * Functions to keep the agpgart mappings coherent with the MMU. The + * GART gives the CPU a physical alias of pages in memory. The alias + * region is mapped uncacheable. Make sure there are no conflicting + * mappings with different cachability attributes for the same + * page. This avoids data corruption on some CPUs. */ -/* Caller's responsibility to call global_flush_tlb() for - * performance reasons */ +/* + * Caller's responsibility to call global_flush_tlb() for performance + * reasons + */ #define map_page_into_agp(page) ( \ xen_create_contiguous_region((unsigned long)page_address(page), 0, 32) \ ?: change_page_attr(page, 1, PAGE_KERNEL_NOCACHE)) @@ -24,9 +26,11 @@ change_page_attr(page, 1, PAGE_KERNEL)) #define flush_agp_mappings() global_flush_tlb() -/* Could use CLFLUSH here if the cpu supports it. But then it would - need to be called for each cacheline of the whole page so it may not be - worth it. Would need a page for it. */ +/* + * Could use CLFLUSH here if the cpu supports it. But then it would + * need to be called for each cacheline of the whole page so it may + * not be worth it. Would need a page for it. + */ #define flush_agp_cache() wbinvd() /* Convert a physical address to an address suitable for the GART. */ --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/desc.h 2011-01-31 17:56:27.000000000 +0100 @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "desc_32.h" +#else +# include "desc_64.h" +#endif --- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/desc_64.h 2011-01-31 17:32:29.000000000 +0100 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/desc_64.h 2011-01-31 17:56:27.000000000 +0100 @@ -34,6 +34,18 @@ static inline void clear_LDT(void) put_cpu(); } +#ifndef CONFIG_X86_NO_TSS +static inline unsigned long __store_tr(void) +{ + unsigned long tr; + + asm volatile ("str %w0":"=r" (tr)); + return tr; +} + +#define store_tr(tr) (tr) = __store_tr() +#endif + /* * This is the ldt that every process will get unless we need * something other than this. @@ -47,6 +59,18 @@ extern struct desc_ptr cpu_gdt_descr[]; /* the cpu gdt accessor */ #define cpu_gdt(_cpu) ((struct desc_struct *)cpu_gdt_descr[_cpu].address) +#ifndef CONFIG_XEN +static inline void load_gdt(const struct desc_ptr *ptr) +{ + asm volatile("lgdt %w0"::"m" (*ptr)); +} + +static inline void store_gdt(struct desc_ptr *ptr) +{ + asm("sgdt %w0":"=m" (*ptr)); +} +#endif + static inline void _set_gate(void *adr, unsigned type, unsigned long func, unsigned dpl, unsigned ist) { struct gate_struct s; @@ -87,6 +111,16 @@ static inline void set_system_gate_ist(i { _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, ist); } + +static inline void load_idt(const struct desc_ptr *ptr) +{ + asm volatile("lidt %w0"::"m" (*ptr)); +} + +static inline void store_idt(struct desc_ptr *dtr) +{ + asm("sidt %w0":"=m" (*dtr)); +} #endif static inline void set_tssldt_descriptor(void *ptr, unsigned long tss, unsigned type, --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/dma-mapping.h 2011-01-31 17:56:27.000000000 +0100 @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "dma-mapping_32.h" +#else +# include "dma-mapping_64.h" +#endif --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/fixmap.h 2011-01-31 17:56:27.000000000 +0100 @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "fixmap_32.h" +#else +# include "fixmap_64.h" +#endif --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/hypercall.h 2011-01-31 17:56:27.000000000 +0100 @@ -0,0 +1,420 @@ +/****************************************************************************** + * hypercall.h + * + * Linux-specific hypervisor handling. + * + * Copyright (c) 2002-2004, K A Fraser + * + * 64-bit updates: + * Benjamin Liu + * Jun Nakajima + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __HYPERCALL_H__ +#define __HYPERCALL_H__ + +#ifndef __HYPERVISOR_H__ +# error "please don't include this file directly" +#endif + +#if CONFIG_XEN_COMPAT <= 0x030002 +# include /* memcpy() */ +#endif + +#ifdef CONFIG_XEN +#define HYPERCALL_ASM_OPERAND "%c" +#define HYPERCALL_LOCATION(op) (hypercall_page + (op) * 32) +#define HYPERCALL_C_OPERAND(name) "i" (HYPERCALL_LOCATION(__HYPERVISOR_##name)) +#else +#define HYPERCALL_ASM_OPERAND "*%" +#define HYPERCALL_LOCATION(op) (hypercall_stubs + (op) * 32) +#define HYPERCALL_C_OPERAND(name) "g" (HYPERCALL_LOCATION(__HYPERVISOR_##name)) +#endif + +#define HYPERCALL_ARG(arg, n) \ + register typeof((arg)+0) __arg##n asm(HYPERCALL_arg##n) = (arg) + +#define _hypercall0(type, name) \ +({ \ + type __res; \ + asm volatile ( \ + "call " HYPERCALL_ASM_OPERAND "1" \ + : "=a" (__res) \ + : HYPERCALL_C_OPERAND(name) \ + : "memory" ); \ + __res; \ +}) + +#define _hypercall1(type, name, arg) \ +({ \ + type __res; \ + HYPERCALL_ARG(arg, 1); \ + asm volatile ( \ + "call " HYPERCALL_ASM_OPERAND "2" \ + : "=a" (__res), "+r" (__arg1) \ + : HYPERCALL_C_OPERAND(name) \ + : "memory" ); \ + __res; \ +}) + +#define _hypercall2(type, name, a1, a2) \ +({ \ + type __res; \ + HYPERCALL_ARG(a1, 1); \ + HYPERCALL_ARG(a2, 2); \ + asm volatile ( \ + "call " HYPERCALL_ASM_OPERAND "3" \ + : "=a" (__res), "+r" (__arg1), "+r" (__arg2) \ + : HYPERCALL_C_OPERAND(name) \ + : "memory" ); \ + __res; \ +}) + +#define _hypercall3(type, name, a1, a2, a3) \ +({ \ + type __res; \ + HYPERCALL_ARG(a1, 1); \ + HYPERCALL_ARG(a2, 2); \ + HYPERCALL_ARG(a3, 3); \ + asm volatile ( \ + "call " HYPERCALL_ASM_OPERAND "4" \ + : "=a" (__res), "+r" (__arg1), \ + "+r" (__arg2), "+r" (__arg3) \ + : HYPERCALL_C_OPERAND(name) \ + : "memory" ); \ + __res; \ +}) + +#define _hypercall4(type, name, a1, a2, a3, a4) \ +({ \ + type __res; \ + HYPERCALL_ARG(a1, 1); \ + HYPERCALL_ARG(a2, 2); \ + HYPERCALL_ARG(a3, 3); \ + HYPERCALL_ARG(a4, 4); \ + asm volatile ( \ + "call " HYPERCALL_ASM_OPERAND "5" \ + : "=a" (__res), "+r" (__arg1), "+r" (__arg2), \ + "+r" (__arg3), "+r" (__arg4) \ + : HYPERCALL_C_OPERAND(name) \ + : "memory" ); \ + __res; \ +}) + +#define _hypercall5(type, name, a1, a2, a3, a4, a5) \ +({ \ + type __res; \ + HYPERCALL_ARG(a1, 1); \ + HYPERCALL_ARG(a2, 2); \ + HYPERCALL_ARG(a3, 3); \ + HYPERCALL_ARG(a4, 4); \ + HYPERCALL_ARG(a5, 5); \ + asm volatile ( \ + "call " HYPERCALL_ASM_OPERAND "6" \ + : "=a" (__res), "+r" (__arg1), "+r" (__arg2), \ + "+r" (__arg3), "+r" (__arg4), "+r" (__arg5) \ + : HYPERCALL_C_OPERAND(name) \ + : "memory" ); \ + __res; \ +}) + +#define _hypercall(type, op, a1, a2, a3, a4, a5) \ +({ \ + type __res; \ + HYPERCALL_ARG(a1, 1); \ + HYPERCALL_ARG(a2, 2); \ + HYPERCALL_ARG(a3, 3); \ + HYPERCALL_ARG(a4, 4); \ + HYPERCALL_ARG(a5, 5); \ + asm volatile ( \ + "call *%6" \ + : "=a" (__res), "+r" (__arg1), "+r" (__arg2), \ + "+r" (__arg3), "+r" (__arg4), "+r" (__arg5) \ + : "g" (HYPERCALL_LOCATION(op)) \ + : "memory" ); \ + __res; \ +}) + +#ifdef CONFIG_X86_32 +# include "hypercall_32.h" +#else +# include "hypercall_64.h" +#endif + +static inline int __must_check +HYPERVISOR_set_trap_table( + const trap_info_t *table) +{ + return _hypercall1(int, set_trap_table, table); +} + +static inline int __must_check +HYPERVISOR_mmu_update( + mmu_update_t *req, unsigned int count, unsigned int *success_count, + domid_t domid) +{ + if (arch_use_lazy_mmu_mode()) + return xen_multi_mmu_update(req, count, success_count, domid); + return _hypercall4(int, mmu_update, req, count, success_count, domid); +} + +static inline int __must_check +HYPERVISOR_mmuext_op( + struct mmuext_op *op, unsigned int count, unsigned int *success_count, + domid_t domid) +{ + if (arch_use_lazy_mmu_mode()) + return xen_multi_mmuext_op(op, count, success_count, domid); + return _hypercall4(int, mmuext_op, op, count, success_count, domid); +} + +static inline int __must_check +HYPERVISOR_set_gdt( + unsigned long *frame_list, unsigned int entries) +{ + return _hypercall2(int, set_gdt, frame_list, entries); +} + +static inline int __must_check +HYPERVISOR_stack_switch( + unsigned long ss, unsigned long esp) +{ + return _hypercall2(int, stack_switch, ss, esp); +} + +static inline int +HYPERVISOR_fpu_taskswitch( + int set) +{ + return _hypercall1(int, fpu_taskswitch, set); +} + +#if CONFIG_XEN_COMPAT <= 0x030002 +static inline int __must_check +HYPERVISOR_sched_op_compat( + int cmd, unsigned long arg) +{ + return _hypercall2(int, sched_op_compat, cmd, arg); +} +#endif + +static inline int __must_check +HYPERVISOR_sched_op( + int cmd, void *arg) +{ + return _hypercall2(int, sched_op, cmd, arg); +} + +static inline int __must_check +HYPERVISOR_platform_op( + struct xen_platform_op *platform_op) +{ + platform_op->interface_version = XENPF_INTERFACE_VERSION; + return _hypercall1(int, platform_op, platform_op); +} + +struct xen_mc; +static inline int __must_check +HYPERVISOR_mca( + struct xen_mc *mc_op) +{ + mc_op->interface_version = XEN_MCA_INTERFACE_VERSION; + return _hypercall1(int, mca, mc_op); +} + +static inline int __must_check +HYPERVISOR_set_debugreg( + unsigned int reg, unsigned long value) +{ + return _hypercall2(int, set_debugreg, reg, value); +} + +static inline unsigned long __must_check +HYPERVISOR_get_debugreg( + unsigned int reg) +{ + return _hypercall1(unsigned long, get_debugreg, reg); +} + +static inline int __must_check +HYPERVISOR_memory_op( + unsigned int cmd, void *arg) +{ + if (arch_use_lazy_mmu_mode()) + xen_multicall_flush(false); + return _hypercall2(int, memory_op, cmd, arg); +} + +static inline int __must_check +HYPERVISOR_multicall( + multicall_entry_t *call_list, unsigned int nr_calls) +{ + return _hypercall2(int, multicall, call_list, nr_calls); +} + +static inline int __must_check +HYPERVISOR_event_channel_op( + int cmd, void *arg) +{ + int rc = _hypercall2(int, event_channel_op, cmd, arg); + +#if CONFIG_XEN_COMPAT <= 0x030002 + if (unlikely(rc == -ENOSYS)) { + struct evtchn_op op; + op.cmd = cmd; + memcpy(&op.u, arg, sizeof(op.u)); + rc = _hypercall1(int, event_channel_op_compat, &op); + memcpy(arg, &op.u, sizeof(op.u)); + } +#endif + + return rc; +} + +static inline int __must_check +HYPERVISOR_xen_version( + int cmd, void *arg) +{ + return _hypercall2(int, xen_version, cmd, arg); +} + +static inline int __must_check +HYPERVISOR_console_io( + int cmd, unsigned int count, char *str) +{ + return _hypercall3(int, console_io, cmd, count, str); +} + +static inline int __must_check +HYPERVISOR_physdev_op( + int cmd, void *arg) +{ + int rc = _hypercall2(int, physdev_op, cmd, arg); + +#if CONFIG_XEN_COMPAT <= 0x030002 + if (unlikely(rc == -ENOSYS)) { + struct physdev_op op; + op.cmd = cmd; + memcpy(&op.u, arg, sizeof(op.u)); + rc = _hypercall1(int, physdev_op_compat, &op); + memcpy(arg, &op.u, sizeof(op.u)); + } +#endif + + return rc; +} + +static inline int __must_check +HYPERVISOR_grant_table_op( + unsigned int cmd, void *uop, unsigned int count) +{ + if (arch_use_lazy_mmu_mode()) + xen_multicall_flush(false); + return _hypercall3(int, grant_table_op, cmd, uop, count); +} + +static inline int __must_check +HYPERVISOR_vm_assist( + unsigned int cmd, unsigned int type) +{ + return _hypercall2(int, vm_assist, cmd, type); +} + +static inline int __must_check +HYPERVISOR_vcpu_op( + int cmd, unsigned int vcpuid, void *extra_args) +{ + return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args); +} + +static inline int __must_check +HYPERVISOR_suspend( + unsigned long srec) +{ + struct sched_shutdown sched_shutdown = { + .reason = SHUTDOWN_suspend + }; + + int rc = _hypercall3(int, sched_op, SCHEDOP_shutdown, + &sched_shutdown, srec); + +#if CONFIG_XEN_COMPAT <= 0x030002 + if (rc == -ENOSYS) + rc = _hypercall3(int, sched_op_compat, SCHEDOP_shutdown, + SHUTDOWN_suspend, srec); +#endif + + return rc; +} + +#if CONFIG_XEN_COMPAT <= 0x030002 +static inline int +HYPERVISOR_nmi_op( + unsigned long op, void *arg) +{ + return _hypercall2(int, nmi_op, op, arg); +} +#endif + +#ifndef CONFIG_XEN +static inline unsigned long __must_check +HYPERVISOR_hvm_op( + int op, void *arg) +{ + return _hypercall2(unsigned long, hvm_op, op, arg); +} +#endif + +static inline int __must_check +HYPERVISOR_callback_op( + int cmd, const void *arg) +{ + return _hypercall2(int, callback_op, cmd, arg); +} + +static inline int __must_check +HYPERVISOR_xenoprof_op( + int op, void *arg) +{ + return _hypercall2(int, xenoprof_op, op, arg); +} + +static inline int __must_check +HYPERVISOR_kexec_op( + unsigned long op, void *args) +{ + return _hypercall2(int, kexec_op, op, args); +} + +static inline int __must_check +HYPERVISOR_tmem_op( + struct tmem_op *op) +{ + return _hypercall1(int, tmem_op, op); +} + +#endif /* __HYPERCALL_H__ */ --- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/hypercall_32.h 2011-01-31 17:29:16.000000000 +0100 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/hypercall_32.h 2011-01-31 17:56:27.000000000 +0100 @@ -1,191 +1,10 @@ -/****************************************************************************** - * hypercall.h - * - * Linux-specific hypervisor handling. - * - * Copyright (c) 2002-2004, K A Fraser - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation; or, when distributed - * separately from the Linux kernel or incorporated into other - * software packages, subject to the following license: - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this source file (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, modify, - * merge, publish, distribute, sublicense, and/or sell copies of the Software, - * and to permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#ifndef __HYPERCALL_H__ -#define __HYPERCALL_H__ - -#include /* memcpy() */ -#include - -#ifndef __HYPERVISOR_H__ -# error "please don't include this file directly" -#endif - -#ifdef CONFIG_XEN -#define HYPERCALL_STR(name) \ - "call hypercall_page + ("__stringify(__HYPERVISOR_##name)" * 32)" -#else -#define HYPERCALL_STR(name) \ - "mov hypercall_stubs,%%eax; " \ - "add $("__stringify(__HYPERVISOR_##name)" * 32),%%eax; "\ - "call *%%eax" -#endif - -#define _hypercall0(type, name) \ -({ \ - type __res; \ - asm volatile ( \ - HYPERCALL_STR(name) \ - : "=a" (__res) \ - : \ - : "memory" ); \ - __res; \ -}) - -#define _hypercall1(type, name, a1) \ -({ \ - type __res; \ - long __ign1; \ - asm volatile ( \ - HYPERCALL_STR(name) \ - : "=a" (__res), "=b" (__ign1) \ - : "1" ((long)(a1)) \ - : "memory" ); \ - __res; \ -}) - -#define _hypercall2(type, name, a1, a2) \ -({ \ - type __res; \ - long __ign1, __ign2; \ - asm volatile ( \ - HYPERCALL_STR(name) \ - : "=a" (__res), "=b" (__ign1), "=c" (__ign2) \ - : "1" ((long)(a1)), "2" ((long)(a2)) \ - : "memory" ); \ - __res; \ -}) - -#define _hypercall3(type, name, a1, a2, a3) \ -({ \ - type __res; \ - long __ign1, __ign2, __ign3; \ - asm volatile ( \ - HYPERCALL_STR(name) \ - : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ - "=d" (__ign3) \ - : "1" ((long)(a1)), "2" ((long)(a2)), \ - "3" ((long)(a3)) \ - : "memory" ); \ - __res; \ -}) - -#define _hypercall4(type, name, a1, a2, a3, a4) \ -({ \ - type __res; \ - long __ign1, __ign2, __ign3, __ign4; \ - asm volatile ( \ - HYPERCALL_STR(name) \ - : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ - "=d" (__ign3), "=S" (__ign4) \ - : "1" ((long)(a1)), "2" ((long)(a2)), \ - "3" ((long)(a3)), "4" ((long)(a4)) \ - : "memory" ); \ - __res; \ -}) - -#define _hypercall5(type, name, a1, a2, a3, a4, a5) \ -({ \ - type __res; \ - long __ign1, __ign2, __ign3, __ign4, __ign5; \ - asm volatile ( \ - HYPERCALL_STR(name) \ - : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ - "=d" (__ign3), "=S" (__ign4), "=D" (__ign5) \ - : "1" ((long)(a1)), "2" ((long)(a2)), \ - "3" ((long)(a3)), "4" ((long)(a4)), \ - "5" ((long)(a5)) \ - : "memory" ); \ - __res; \ -}) - -#define _hypercall(type, op, a1, a2, a3, a4, a5) \ -({ \ - type __res; \ - register typeof((a1)+0) __arg1 asm("ebx") = (a1); \ - register typeof((a2)+0) __arg2 asm("ecx") = (a2); \ - register typeof((a3)+0) __arg3 asm("edx") = (a3); \ - register typeof((a4)+0) __arg4 asm("esi") = (a4); \ - register typeof((a5)+0) __arg5 asm("edi") = (a5); \ - asm volatile ( \ - "call *%6" \ - : "=a" (__res), "+r" (__arg1), "+r" (__arg2), \ - "+r" (__arg3), "+r" (__arg4), "+r" (__arg5) \ - : "0" (hypercall_page + (op) * 32) \ - : "memory" ); \ - __res; \ -}) - -static inline int __must_check -HYPERVISOR_set_trap_table( - const trap_info_t *table) -{ - return _hypercall1(int, set_trap_table, table); -} - -static inline int __must_check -HYPERVISOR_mmu_update( - mmu_update_t *req, unsigned int count, unsigned int *success_count, - domid_t domid) -{ - if (arch_use_lazy_mmu_mode()) - return xen_multi_mmu_update(req, count, success_count, domid); - return _hypercall4(int, mmu_update, req, count, success_count, domid); -} - -static inline int __must_check -HYPERVISOR_mmuext_op( - struct mmuext_op *op, unsigned int count, unsigned int *success_count, - domid_t domid) -{ - if (arch_use_lazy_mmu_mode()) - return xen_multi_mmuext_op(op, count, success_count, domid); - return _hypercall4(int, mmuext_op, op, count, success_count, domid); -} - -static inline int __must_check -HYPERVISOR_set_gdt( - unsigned long *frame_list, unsigned int entries) -{ - return _hypercall2(int, set_gdt, frame_list, entries); -} - -static inline int __must_check -HYPERVISOR_stack_switch( - unsigned long ss, unsigned long esp) -{ - return _hypercall2(int, stack_switch, ss, esp); -} +#define HYPERCALL_arg1 "ebx" +#define HYPERCALL_arg2 "ecx" +#define HYPERCALL_arg3 "edx" +#define HYPERCALL_arg4 "esi" +#define HYPERCALL_arg5 "edi" +#if CONFIG_XEN_COMPAT <= 0x030002 static inline int __must_check HYPERVISOR_set_callbacks( unsigned long event_selector, unsigned long event_address, @@ -195,80 +14,24 @@ HYPERVISOR_set_callbacks( event_selector, event_address, failsafe_selector, failsafe_address); } - -static inline int -HYPERVISOR_fpu_taskswitch( - int set) -{ - return _hypercall1(int, fpu_taskswitch, set); -} - -static inline int __must_check -HYPERVISOR_sched_op_compat( - int cmd, unsigned long arg) -{ - return _hypercall2(int, sched_op_compat, cmd, arg); -} - -static inline int __must_check -HYPERVISOR_sched_op( - int cmd, void *arg) -{ - return _hypercall2(int, sched_op, cmd, arg); -} +#endif static inline long __must_check HYPERVISOR_set_timer_op( u64 timeout) { - unsigned long timeout_hi = (unsigned long)(timeout>>32); - unsigned long timeout_lo = (unsigned long)timeout; - return _hypercall2(long, set_timer_op, timeout_lo, timeout_hi); -} - -static inline int __must_check -HYPERVISOR_platform_op( - struct xen_platform_op *platform_op) -{ - platform_op->interface_version = XENPF_INTERFACE_VERSION; - return _hypercall1(int, platform_op, platform_op); -} - -static inline int __must_check -HYPERVISOR_set_debugreg( - unsigned int reg, unsigned long value) -{ - return _hypercall2(int, set_debugreg, reg, value); -} - -static inline unsigned long __must_check -HYPERVISOR_get_debugreg( - unsigned int reg) -{ - return _hypercall1(unsigned long, get_debugreg, reg); + return _hypercall2(long, set_timer_op, + (unsigned long)timeout, + (unsigned long)(timeout>>32)); } static inline int __must_check HYPERVISOR_update_descriptor( u64 ma, u64 desc) { - return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32); -} - -static inline int __must_check -HYPERVISOR_memory_op( - unsigned int cmd, void *arg) -{ - if (arch_use_lazy_mmu_mode()) - xen_multicall_flush(false); - return _hypercall2(int, memory_op, cmd, arg); -} - -static inline int __must_check -HYPERVISOR_multicall( - multicall_entry_t *call_list, unsigned int nr_calls) -{ - return _hypercall2(int, multicall, call_list, nr_calls); + return _hypercall4(int, update_descriptor, + (unsigned long)ma, (unsigned long)(ma>>32), + (unsigned long)desc, (unsigned long)(desc>>32)); } static inline int __must_check @@ -287,67 +50,6 @@ HYPERVISOR_update_va_mapping( } static inline int __must_check -HYPERVISOR_event_channel_op( - int cmd, void *arg) -{ - int rc = _hypercall2(int, event_channel_op, cmd, arg); - -#if CONFIG_XEN_COMPAT <= 0x030002 - if (unlikely(rc == -ENOSYS)) { - struct evtchn_op op; - op.cmd = cmd; - memcpy(&op.u, arg, sizeof(op.u)); - rc = _hypercall1(int, event_channel_op_compat, &op); - memcpy(arg, &op.u, sizeof(op.u)); - } -#endif - - return rc; -} - -static inline int __must_check -HYPERVISOR_xen_version( - int cmd, void *arg) -{ - return _hypercall2(int, xen_version, cmd, arg); -} - -static inline int __must_check -HYPERVISOR_console_io( - int cmd, unsigned int count, char *str) -{ - return _hypercall3(int, console_io, cmd, count, str); -} - -static inline int __must_check -HYPERVISOR_physdev_op( - int cmd, void *arg) -{ - int rc = _hypercall2(int, physdev_op, cmd, arg); - -#if CONFIG_XEN_COMPAT <= 0x030002 - if (unlikely(rc == -ENOSYS)) { - struct physdev_op op; - op.cmd = cmd; - memcpy(&op.u, arg, sizeof(op.u)); - rc = _hypercall1(int, physdev_op_compat, &op); - memcpy(arg, &op.u, sizeof(op.u)); - } -#endif - - return rc; -} - -static inline int __must_check -HYPERVISOR_grant_table_op( - unsigned int cmd, void *uop, unsigned int count) -{ - if (arch_use_lazy_mmu_mode()) - xen_multicall_flush(false); - return _hypercall3(int, grant_table_op, cmd, uop, count); -} - -static inline int __must_check HYPERVISOR_update_va_mapping_otherdomain( unsigned long va, pte_t new_val, unsigned long flags, domid_t domid) { @@ -358,86 +60,3 @@ HYPERVISOR_update_va_mapping_otherdomain return _hypercall5(int, update_va_mapping_otherdomain, va, new_val.pte_low, pte_hi, flags, domid); } - -static inline int __must_check -HYPERVISOR_vm_assist( - unsigned int cmd, unsigned int type) -{ - return _hypercall2(int, vm_assist, cmd, type); -} - -static inline int __must_check -HYPERVISOR_vcpu_op( - int cmd, unsigned int vcpuid, void *extra_args) -{ - return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args); -} - -static inline int __must_check -HYPERVISOR_suspend( - unsigned long srec) -{ - struct sched_shutdown sched_shutdown = { - .reason = SHUTDOWN_suspend - }; - - int rc = _hypercall3(int, sched_op, SCHEDOP_shutdown, - &sched_shutdown, srec); - -#if CONFIG_XEN_COMPAT <= 0x030002 - if (rc == -ENOSYS) - rc = _hypercall3(int, sched_op_compat, SCHEDOP_shutdown, - SHUTDOWN_suspend, srec); -#endif - - return rc; -} - -#if CONFIG_XEN_COMPAT <= 0x030002 -static inline int -HYPERVISOR_nmi_op( - unsigned long op, void *arg) -{ - return _hypercall2(int, nmi_op, op, arg); -} -#endif - -#ifndef CONFIG_XEN -static inline unsigned long __must_check -HYPERVISOR_hvm_op( - int op, void *arg) -{ - return _hypercall2(unsigned long, hvm_op, op, arg); -} -#endif - -static inline int __must_check -HYPERVISOR_callback_op( - int cmd, const void *arg) -{ - return _hypercall2(int, callback_op, cmd, arg); -} - -static inline int __must_check -HYPERVISOR_xenoprof_op( - int op, void *arg) -{ - return _hypercall2(int, xenoprof_op, op, arg); -} - -static inline int __must_check -HYPERVISOR_kexec_op( - unsigned long op, void *args) -{ - return _hypercall2(int, kexec_op, op, args); -} - -static inline int __must_check -HYPERVISOR_tmem_op( - struct tmem_op *op) -{ - return _hypercall1(int, tmem_op, op); -} - - -#endif /* __HYPERCALL_H__ */ --- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/hypercall_64.h 2011-01-31 17:29:16.000000000 +0100 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/hypercall_64.h 2011-01-31 17:56:27.000000000 +0100 @@ -1,198 +1,10 @@ -/****************************************************************************** - * hypercall.h - * - * Linux-specific hypervisor handling. - * - * Copyright (c) 2002-2004, K A Fraser - * - * 64-bit updates: - * Benjamin Liu - * Jun Nakajima - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation; or, when distributed - * separately from the Linux kernel or incorporated into other - * software packages, subject to the following license: - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this source file (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, modify, - * merge, publish, distribute, sublicense, and/or sell copies of the Software, - * and to permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#ifndef __HYPERCALL_H__ -#define __HYPERCALL_H__ - -#include /* memcpy() */ -#include -#include - -#ifndef __HYPERVISOR_H__ -# error "please don't include this file directly" -#endif - -#ifdef CONFIG_XEN -#define HYPERCALL_STR(name) \ - "call hypercall_page + ("__stringify(__HYPERVISOR_##name)" * 32)" -#else -#define HYPERCALL_STR(name) \ - "mov $("__stringify(__HYPERVISOR_##name)" * 32),%%eax; "\ - "add hypercall_stubs(%%rip),%%rax; " \ - "call *%%rax" -#endif - -#define _hypercall0(type, name) \ -({ \ - type __res; \ - asm volatile ( \ - HYPERCALL_STR(name) \ - : "=a" (__res) \ - : \ - : "memory" ); \ - __res; \ -}) - -#define _hypercall1(type, name, a1) \ -({ \ - type __res; \ - long __ign1; \ - asm volatile ( \ - HYPERCALL_STR(name) \ - : "=a" (__res), "=D" (__ign1) \ - : "1" ((long)(a1)) \ - : "memory" ); \ - __res; \ -}) - -#define _hypercall2(type, name, a1, a2) \ -({ \ - type __res; \ - long __ign1, __ign2; \ - asm volatile ( \ - HYPERCALL_STR(name) \ - : "=a" (__res), "=D" (__ign1), "=S" (__ign2) \ - : "1" ((long)(a1)), "2" ((long)(a2)) \ - : "memory" ); \ - __res; \ -}) - -#define _hypercall3(type, name, a1, a2, a3) \ -({ \ - type __res; \ - long __ign1, __ign2, __ign3; \ - asm volatile ( \ - HYPERCALL_STR(name) \ - : "=a" (__res), "=D" (__ign1), "=S" (__ign2), \ - "=d" (__ign3) \ - : "1" ((long)(a1)), "2" ((long)(a2)), \ - "3" ((long)(a3)) \ - : "memory" ); \ - __res; \ -}) - -#define _hypercall4(type, name, a1, a2, a3, a4) \ -({ \ - type __res; \ - long __ign1, __ign2, __ign3; \ - register long __arg4 asm("r10") = (long)(a4); \ - asm volatile ( \ - HYPERCALL_STR(name) \ - : "=a" (__res), "=D" (__ign1), "=S" (__ign2), \ - "=d" (__ign3), "+r" (__arg4) \ - : "1" ((long)(a1)), "2" ((long)(a2)), \ - "3" ((long)(a3)) \ - : "memory" ); \ - __res; \ -}) - -#define _hypercall5(type, name, a1, a2, a3, a4, a5) \ -({ \ - type __res; \ - long __ign1, __ign2, __ign3; \ - register long __arg4 asm("r10") = (long)(a4); \ - register long __arg5 asm("r8") = (long)(a5); \ - asm volatile ( \ - HYPERCALL_STR(name) \ - : "=a" (__res), "=D" (__ign1), "=S" (__ign2), \ - "=d" (__ign3), "+r" (__arg4), "+r" (__arg5) \ - : "1" ((long)(a1)), "2" ((long)(a2)), \ - "3" ((long)(a3)) \ - : "memory" ); \ - __res; \ -}) - -#define _hypercall(type, op, a1, a2, a3, a4, a5) \ -({ \ - type __res; \ - register typeof((a1)+0) __arg1 asm("rdi") = (a1); \ - register typeof((a2)+0) __arg2 asm("rsi") = (a2); \ - register typeof((a3)+0) __arg3 asm("rdx") = (a3); \ - register typeof((a4)+0) __arg4 asm("r10") = (a4); \ - register typeof((a5)+0) __arg5 asm("r8") = (a5); \ - asm volatile ( \ - "call *%6" \ - : "=a" (__res), "+r" (__arg1), "+r" (__arg2), \ - "+r" (__arg3), "+r" (__arg4), "+r" (__arg5) \ - : "0" (hypercall_page + (op) * 32) \ - : "memory" ); \ - __res; \ -}) - -static inline int __must_check -HYPERVISOR_set_trap_table( - const trap_info_t *table) -{ - return _hypercall1(int, set_trap_table, table); -} - -static inline int __must_check -HYPERVISOR_mmu_update( - mmu_update_t *req, unsigned int count, unsigned int *success_count, - domid_t domid) -{ - if (arch_use_lazy_mmu_mode()) - return xen_multi_mmu_update(req, count, success_count, domid); - return _hypercall4(int, mmu_update, req, count, success_count, domid); -} - -static inline int __must_check -HYPERVISOR_mmuext_op( - struct mmuext_op *op, unsigned int count, unsigned int *success_count, - domid_t domid) -{ - if (arch_use_lazy_mmu_mode()) - return xen_multi_mmuext_op(op, count, success_count, domid); - return _hypercall4(int, mmuext_op, op, count, success_count, domid); -} - -static inline int __must_check -HYPERVISOR_set_gdt( - unsigned long *frame_list, unsigned int entries) -{ - return _hypercall2(int, set_gdt, frame_list, entries); -} - -static inline int __must_check -HYPERVISOR_stack_switch( - unsigned long ss, unsigned long esp) -{ - return _hypercall2(int, stack_switch, ss, esp); -} +#define HYPERCALL_arg1 "rdi" +#define HYPERCALL_arg2 "rsi" +#define HYPERCALL_arg3 "rdx" +#define HYPERCALL_arg4 "r10" +#define HYPERCALL_arg5 "r8" +#if CONFIG_XEN_COMPAT <= 0x030002 static inline int __must_check HYPERVISOR_set_callbacks( unsigned long event_address, unsigned long failsafe_address, @@ -201,27 +13,7 @@ HYPERVISOR_set_callbacks( return _hypercall3(int, set_callbacks, event_address, failsafe_address, syscall_address); } - -static inline int -HYPERVISOR_fpu_taskswitch( - int set) -{ - return _hypercall1(int, fpu_taskswitch, set); -} - -static inline int __must_check -HYPERVISOR_sched_op_compat( - int cmd, unsigned long arg) -{ - return _hypercall2(int, sched_op_compat, cmd, arg); -} - -static inline int __must_check -HYPERVISOR_sched_op( - int cmd, void *arg) -{ - return _hypercall2(int, sched_op, cmd, arg); -} +#endif static inline long __must_check HYPERVISOR_set_timer_op( @@ -231,34 +23,6 @@ HYPERVISOR_set_timer_op( } static inline int __must_check -HYPERVISOR_platform_op( - struct xen_platform_op *platform_op) -{ - platform_op->interface_version = XENPF_INTERFACE_VERSION; - return _hypercall1(int, platform_op, platform_op); -} -static inline int __must_check -HYPERVISOR_mca( - struct xen_mc *mc_op) -{ - mc_op->interface_version = XEN_MCA_INTERFACE_VERSION; - return _hypercall1(int, mca, mc_op); -} -static inline int __must_check -HYPERVISOR_set_debugreg( - unsigned int reg, unsigned long value) -{ - return _hypercall2(int, set_debugreg, reg, value); -} - -static inline unsigned long __must_check -HYPERVISOR_get_debugreg( - unsigned int reg) -{ - return _hypercall1(unsigned long, get_debugreg, reg); -} - -static inline int __must_check HYPERVISOR_update_descriptor( unsigned long ma, unsigned long word) { @@ -266,22 +30,6 @@ HYPERVISOR_update_descriptor( } static inline int __must_check -HYPERVISOR_memory_op( - unsigned int cmd, void *arg) -{ - if (arch_use_lazy_mmu_mode()) - xen_multicall_flush(false); - return _hypercall2(int, memory_op, cmd, arg); -} - -static inline int __must_check -HYPERVISOR_multicall( - multicall_entry_t *call_list, unsigned int nr_calls) -{ - return _hypercall2(int, multicall, call_list, nr_calls); -} - -static inline int __must_check HYPERVISOR_update_va_mapping( unsigned long va, pte_t new_val, unsigned long flags) { @@ -291,67 +39,6 @@ HYPERVISOR_update_va_mapping( } static inline int __must_check -HYPERVISOR_event_channel_op( - int cmd, void *arg) -{ - int rc = _hypercall2(int, event_channel_op, cmd, arg); - -#if CONFIG_XEN_COMPAT <= 0x030002 - if (unlikely(rc == -ENOSYS)) { - struct evtchn_op op; - op.cmd = cmd; - memcpy(&op.u, arg, sizeof(op.u)); - rc = _hypercall1(int, event_channel_op_compat, &op); - memcpy(arg, &op.u, sizeof(op.u)); - } -#endif - - return rc; -} - -static inline int __must_check -HYPERVISOR_xen_version( - int cmd, void *arg) -{ - return _hypercall2(int, xen_version, cmd, arg); -} - -static inline int __must_check -HYPERVISOR_console_io( - int cmd, unsigned int count, char *str) -{ - return _hypercall3(int, console_io, cmd, count, str); -} - -static inline int __must_check -HYPERVISOR_physdev_op( - int cmd, void *arg) -{ - int rc = _hypercall2(int, physdev_op, cmd, arg); - -#if CONFIG_XEN_COMPAT <= 0x030002 - if (unlikely(rc == -ENOSYS)) { - struct physdev_op op; - op.cmd = cmd; - memcpy(&op.u, arg, sizeof(op.u)); - rc = _hypercall1(int, physdev_op_compat, &op); - memcpy(arg, &op.u, sizeof(op.u)); - } -#endif - - return rc; -} - -static inline int __must_check -HYPERVISOR_grant_table_op( - unsigned int cmd, void *uop, unsigned int count) -{ - if (arch_use_lazy_mmu_mode()) - xen_multicall_flush(false); - return _hypercall3(int, grant_table_op, cmd, uop, count); -} - -static inline int __must_check HYPERVISOR_update_va_mapping_otherdomain( unsigned long va, pte_t new_val, unsigned long flags, domid_t domid) { @@ -360,90 +47,8 @@ HYPERVISOR_update_va_mapping_otherdomain } static inline int __must_check -HYPERVISOR_vm_assist( - unsigned int cmd, unsigned int type) -{ - return _hypercall2(int, vm_assist, cmd, type); -} - -static inline int __must_check -HYPERVISOR_vcpu_op( - int cmd, unsigned int vcpuid, void *extra_args) -{ - return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args); -} - -static inline int __must_check HYPERVISOR_set_segment_base( int reg, unsigned long value) { return _hypercall2(int, set_segment_base, reg, value); } - -static inline int __must_check -HYPERVISOR_suspend( - unsigned long srec) -{ - struct sched_shutdown sched_shutdown = { - .reason = SHUTDOWN_suspend - }; - - int rc = _hypercall3(int, sched_op, SCHEDOP_shutdown, - &sched_shutdown, srec); - -#if CONFIG_XEN_COMPAT <= 0x030002 - if (rc == -ENOSYS) - rc = _hypercall3(int, sched_op_compat, SCHEDOP_shutdown, - SHUTDOWN_suspend, srec); -#endif - - return rc; -} - -#if CONFIG_XEN_COMPAT <= 0x030002 -static inline int -HYPERVISOR_nmi_op( - unsigned long op, void *arg) -{ - return _hypercall2(int, nmi_op, op, arg); -} -#endif - -#ifndef CONFIG_XEN -static inline unsigned long __must_check -HYPERVISOR_hvm_op( - int op, void *arg) -{ - return _hypercall2(unsigned long, hvm_op, op, arg); -} -#endif - -static inline int __must_check -HYPERVISOR_callback_op( - int cmd, const void *arg) -{ - return _hypercall2(int, callback_op, cmd, arg); -} - -static inline int __must_check -HYPERVISOR_xenoprof_op( - int op, void *arg) -{ - return _hypercall2(int, xenoprof_op, op, arg); -} - -static inline int __must_check -HYPERVISOR_kexec_op( - unsigned long op, void *args) -{ - return _hypercall2(int, kexec_op, op, args); -} - -static inline int __must_check -HYPERVISOR_tmem_op( - struct tmem_op *op) -{ - return _hypercall1(int, tmem_op, op); -} - -#endif /* __HYPERCALL_H__ */ --- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/hypervisor.h 2011-01-31 17:32:29.000000000 +0100 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/hypervisor.h 2011-01-31 17:56:27.000000000 +0100 @@ -44,6 +44,7 @@ #include #include #include +#include #include #include #include @@ -200,7 +201,6 @@ static inline void xen_multicall_flush(b extern char hypercall_page[PAGE_SIZE]; #else extern char *hypercall_stubs; -#define hypercall_page hypercall_stubs #define is_running_on_xen() (!!hypercall_stubs) #endif --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/io.h 2011-01-31 17:56:27.000000000 +0100 @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "io_32.h" +#else +# include "io_64.h" +#endif --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/irqflags.h 2011-01-31 17:56:27.000000000 +0100 @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "irqflags_32.h" +#else +# include "irqflags_64.h" +#endif --- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/irqflags_32.h 2011-01-31 17:32:29.000000000 +0100 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/irqflags_32.h 2011-01-31 17:56:27.000000000 +0100 @@ -150,6 +150,23 @@ static inline int raw_irqs_disabled_flag \ raw_irqs_disabled_flags(flags); \ }) + +/* + * makes the traced hardirq state match with the machine state + * + * should be a rarely used function, only in places where its + * otherwise impossible to know the irq state, like in traps. + */ +static inline void trace_hardirqs_fixup_flags(unsigned long flags) +{ + if (raw_irqs_disabled_flags(flags)) + trace_hardirqs_off(); + else + trace_hardirqs_on(); +} + +#define trace_hardirqs_fixup() \ + trace_hardirqs_fixup_flags(__raw_local_save_flags()) #endif /* __ASSEMBLY__ */ /* @@ -181,4 +198,17 @@ static inline int raw_irqs_disabled_flag # define TRACE_IRQS_OFF #endif +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define LOCKDEP_SYS_EXIT \ + pushl %eax; \ + pushl %ecx; \ + pushl %edx; \ + call lockdep_sys_exit; \ + popl %edx; \ + popl %ecx; \ + popl %eax; +#else +# define LOCKDEP_SYS_EXIT +#endif + #endif --- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/irqflags_64.h 2011-01-31 17:32:29.000000000 +0100 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/irqflags_64.h 2011-01-31 17:56:27.000000000 +0100 @@ -116,6 +116,22 @@ static inline int raw_irqs_disabled_flag }) /* + * makes the traced hardirq state match with the machine state + * + * should be a rarely used function, only in places where its + * otherwise impossible to know the irq state, like in traps. + */ +static inline void trace_hardirqs_fixup_flags(unsigned long flags) +{ + if (raw_irqs_disabled_flags(flags)) + trace_hardirqs_off(); + else + trace_hardirqs_on(); +} + +#define trace_hardirqs_fixup() \ + trace_hardirqs_fixup_flags(__raw_local_save_flags()) +/* * Used in the idle loop; sti takes one instruction cycle * to complete: */ @@ -143,6 +159,20 @@ static inline void halt(void) # define TRACE_IRQS_ON # define TRACE_IRQS_OFF # endif +# ifdef CONFIG_DEBUG_LOCK_ALLOC +# define LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk +# define LOCKDEP_SYS_EXIT_IRQ \ + TRACE_IRQS_ON; \ + sti; \ + SAVE_REST; \ + LOCKDEP_SYS_EXIT; \ + RESTORE_REST; \ + cli; \ + TRACE_IRQS_OFF; +# else +# define LOCKDEP_SYS_EXIT +# define LOCKDEP_SYS_EXIT_IRQ +# endif #endif #endif --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/maddr.h 2011-01-31 17:56:27.000000000 +0100 @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "maddr_32.h" +#else +# include "maddr_64.h" +#endif --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/mmu_context.h 2011-01-31 17:56:27.000000000 +0100 @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "mmu_context_32.h" +#else +# include "mmu_context_64.h" +#endif --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/pci.h 2011-01-31 17:56:27.000000000 +0100 @@ -0,0 +1,100 @@ +#ifndef __x86_PCI_H +#define __x86_PCI_H + +#include /* for struct page */ +#include +#include +#include +#include +#include + + +#ifdef __KERNEL__ + +struct pci_sysdata { + int domain; /* PCI domain */ + int node; /* NUMA node */ +#ifdef CONFIG_X86_64 + void* iommu; /* IOMMU private data */ +#endif +#ifdef CONFIG_XEN_PCIDEV_FRONTEND + struct pcifront_device *pdev; +#endif +}; + +/* scan a bus after allocating a pci_sysdata for it */ +extern struct pci_bus *pci_scan_bus_with_sysdata(int busno); + +static inline int pci_domain_nr(struct pci_bus *bus) +{ + struct pci_sysdata *sd = bus->sysdata; + return sd->domain; +} + +static inline int pci_proc_domain(struct pci_bus *bus) +{ + return pci_domain_nr(bus); +} + + +/* Can be used to override the logic in pci_scan_bus for skipping + already-configured bus numbers - to be used for buggy BIOSes + or architectures with incomplete PCI setup by the loader */ + +#ifdef CONFIG_PCI +extern unsigned int pcibios_assign_all_busses(void); +#else +#define pcibios_assign_all_busses() 0 +#endif + +#include +#define pcibios_scan_all_fns(a, b) (!is_initial_xendomain()) + +extern unsigned long pci_mem_start; +#define PCIBIOS_MIN_IO 0x1000 +#define PCIBIOS_MIN_MEM (pci_mem_start) + +#define PCIBIOS_MIN_CARDBUS_IO 0x4000 + +void pcibios_config_init(void); +struct pci_bus * pcibios_scan_root(int bus); + +void pcibios_set_master(struct pci_dev *dev); +void pcibios_penalize_isa_irq(int irq, int active); +struct irq_routing_table *pcibios_get_irq_routing_table(void); +int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq); + + +#define HAVE_PCI_MMAP +extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, + enum pci_mmap_state mmap_state, int write_combine); + + +#ifdef CONFIG_PCI +static inline void pci_dma_burst_advice(struct pci_dev *pdev, + enum pci_dma_burst_strategy *strat, + unsigned long *strategy_parameter) +{ + *strat = PCI_DMA_BURST_INFINITY; + *strategy_parameter = ~0UL; +} +#endif + + +#endif /* __KERNEL__ */ + +#ifdef CONFIG_X86_32 +# include "pci_32.h" +#else +# include "pci_64.h" +#endif + +/* implement the pci_ DMA API in terms of the generic device dma_ one */ +#include + +/* generic pci stuff */ +#include + + + +#endif --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/pgalloc.h 2011-01-31 17:56:27.000000000 +0100 @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "pgalloc_32.h" +#else +# include "pgalloc_64.h" +#endif --- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/pgalloc_64.h 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/pgalloc_64.h 2011-01-31 17:56:27.000000000 +0100 @@ -115,6 +115,8 @@ static inline void pgd_list_del(pgd_t *p spin_unlock(&pgd_lock); } +extern void pgd_test_and_unpin(pgd_t *); + static inline pgd_t *pgd_alloc(struct mm_struct *mm) { /* @@ -126,6 +128,7 @@ static inline pgd_t *pgd_alloc(struct mm pgd = (pgd_t *)__get_free_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 1); if (!pgd) return NULL; + pgd_test_and_unpin(pgd); pgd_list_add(pgd, mm); /* * Copy kernel pointers in from init. @@ -147,29 +150,8 @@ static inline pgd_t *pgd_alloc(struct mm static inline void pgd_free(pgd_t *pgd) { - pte_t *ptep = virt_to_ptep(pgd); - pgd_list_del(pgd); - - if (!pte_write(*ptep)) { - xen_pgd_unpin(__pa(pgd)); - BUG_ON(HYPERVISOR_update_va_mapping( - (unsigned long)pgd, - pfn_pte(virt_to_phys(pgd)>>PAGE_SHIFT, PAGE_KERNEL), - 0)); - } - - ptep = virt_to_ptep(__user_pgd(pgd)); - - if (!pte_write(*ptep)) { - xen_pgd_unpin(__pa(__user_pgd(pgd))); - BUG_ON(HYPERVISOR_update_va_mapping( - (unsigned long)__user_pgd(pgd), - pfn_pte(virt_to_phys(__user_pgd(pgd))>>PAGE_SHIFT, - PAGE_KERNEL), - 0)); - } - + pgd_test_and_unpin(pgd); free_pages((unsigned long)pgd, 1); } --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/pgtable.h 2011-01-31 17:56:27.000000000 +0100 @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "pgtable_32.h" +#else +# include "pgtable_64.h" +#endif --- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/pgtable_32.h 2011-02-07 15:38:23.000000000 +0100 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/pgtable_32.h 2011-01-31 17:56:27.000000000 +0100 @@ -17,10 +17,7 @@ #include #include -#ifndef _I386_BITOPS_H -#include -#endif - +#include #include #include #include @@ -40,7 +37,7 @@ extern spinlock_t pgd_lock; extern struct page *pgd_list; void check_pgt_cache(void); -void pmd_ctor(void *, struct kmem_cache *, unsigned long); +void pmd_ctor(struct kmem_cache *, void *); void pgtable_cache_init(void); void paging_init(void); --- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/pgtable_64.h 2011-02-07 15:38:18.000000000 +0100 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/pgtable_64.h 2011-01-31 17:56:27.000000000 +0100 @@ -9,7 +9,7 @@ * the x86-64 page table tree. */ #include -#include +#include #include #include #include @@ -139,6 +139,7 @@ static inline void pgd_clear (pgd_t * pg #define MAXMEM _AC(0x6fffffffff, UL) #define VMALLOC_START _AC(0xffffc20000000000, UL) #define VMALLOC_END _AC(0xffffe1ffffffffff, UL) +#define VMEMMAP_START _AC(0xffffe20000000000, UL) #define MODULES_VADDR _AC(0xffffffff88000000, UL) #define MODULES_END _AC(0xffffffffff000000, UL) #define MODULES_LEN (MODULES_END - MODULES_VADDR) --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/processor.h 2011-01-31 17:56:27.000000000 +0100 @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "processor_32.h" +#else +# include "processor_64.h" +#endif --- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/processor_32.h 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/processor_32.h 2011-01-31 17:56:27.000000000 +0100 @@ -70,17 +70,22 @@ struct cpuinfo_x86 { char pad0; int x86_power; unsigned long loops_per_jiffy; +#ifndef CONFIG_XEN #ifdef CONFIG_SMP cpumask_t llc_shared_map; /* cpus sharing the last level cache */ #endif unsigned char x86_max_cores; /* cpuid returned max cores value */ unsigned char apicid; +#endif unsigned short x86_clflush_size; -#ifdef CONFIG_SMP +#ifdef CONFIG_X86_HT unsigned char booted_cores; /* number of cores as seen by OS */ __u8 phys_proc_id; /* Physical processor id. */ __u8 cpu_core_id; /* Core id */ #endif +#ifdef CONFIG_SMP + __u8 cpu_index; /* index into per_cpu list */ +#endif } __attribute__((__aligned__(SMP_CACHE_BYTES))); #define X86_VENDOR_INTEL 0 @@ -106,14 +111,19 @@ DECLARE_PER_CPU(struct tss_struct, init_ #endif #ifdef CONFIG_SMP -extern struct cpuinfo_x86 cpu_data[]; -#define current_cpu_data cpu_data[smp_processor_id()] +DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info); +#define cpu_data(cpu) per_cpu(cpu_info, cpu) +#define current_cpu_data cpu_data(smp_processor_id()) #else -#define cpu_data (&boot_cpu_data) -#define current_cpu_data boot_cpu_data +#define cpu_data(cpu) boot_cpu_data +#define current_cpu_data boot_cpu_data #endif -extern int cpu_llc_id[NR_CPUS]; +/* + * the following now lives in the per cpu area: + * extern int cpu_llc_id[NR_CPUS]; + */ +DECLARE_PER_CPU(u8, cpu_llc_id); extern char ignore_fpu_irq; void __init cpu_detect(struct cpuinfo_x86 *c); @@ -560,7 +570,9 @@ static inline void xen_set_iopl_mask(uns * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx * resulting in stale register contents being returned. */ -static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) +static inline void cpuid(unsigned int op, + unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) { *eax = op; *ecx = 0; @@ -568,8 +580,9 @@ static inline void cpuid(unsigned int op } /* Some CPUID calls want 'count' to be placed in ecx */ -static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, - int *edx) +static inline void cpuid_count(unsigned int op, int count, + unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) { *eax = op; *ecx = count; @@ -639,6 +652,17 @@ static inline unsigned int cpuid_edx(uns #define K7_NOP7 ".byte 0x8D,0x04,0x05,0,0,0,0\n" #define K7_NOP8 K7_NOP7 ASM_NOP1 +/* P6 nops */ +/* uses eax dependencies (Intel-recommended choice) */ +#define P6_NOP1 GENERIC_NOP1 +#define P6_NOP2 ".byte 0x66,0x90\n" +#define P6_NOP3 ".byte 0x0f,0x1f,0x00\n" +#define P6_NOP4 ".byte 0x0f,0x1f,0x40,0\n" +#define P6_NOP5 ".byte 0x0f,0x1f,0x44,0x00,0\n" +#define P6_NOP6 ".byte 0x66,0x0f,0x1f,0x44,0x00,0\n" +#define P6_NOP7 ".byte 0x0f,0x1f,0x80,0,0,0,0\n" +#define P6_NOP8 ".byte 0x0f,0x1f,0x84,0x00,0,0,0,0\n" + #ifdef CONFIG_MK8 #define ASM_NOP1 K8_NOP1 #define ASM_NOP2 K8_NOP2 @@ -657,6 +681,17 @@ static inline unsigned int cpuid_edx(uns #define ASM_NOP6 K7_NOP6 #define ASM_NOP7 K7_NOP7 #define ASM_NOP8 K7_NOP8 +#elif defined(CONFIG_M686) || defined(CONFIG_MPENTIUMII) || \ + defined(CONFIG_MPENTIUMIII) || defined(CONFIG_MPENTIUMM) || \ + defined(CONFIG_MCORE2) || defined(CONFIG_PENTIUM4) +#define ASM_NOP1 P6_NOP1 +#define ASM_NOP2 P6_NOP2 +#define ASM_NOP3 P6_NOP3 +#define ASM_NOP4 P6_NOP4 +#define ASM_NOP5 P6_NOP5 +#define ASM_NOP6 P6_NOP6 +#define ASM_NOP7 P6_NOP7 +#define ASM_NOP8 P6_NOP8 #else #define ASM_NOP1 GENERIC_NOP1 #define ASM_NOP2 GENERIC_NOP2 --- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/processor_64.h 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/processor_64.h 2011-01-31 17:56:27.000000000 +0100 @@ -62,19 +62,26 @@ struct cpuinfo_x86 { int x86_cache_alignment; int x86_tlbsize; /* number of 4K pages in DTLB/ITLB combined(in pages)*/ __u8 x86_virt_bits, x86_phys_bits; +#ifndef CONFIG_XEN __u8 x86_max_cores; /* cpuid returned max cores value */ +#endif __u32 x86_power; __u32 extended_cpuid_level; /* Max extended CPUID function supported */ unsigned long loops_per_jiffy; -#ifdef CONFIG_SMP +#if defined(CONFIG_SMP) && !defined(CONFIG_XEN) cpumask_t llc_shared_map; /* cpus sharing the last level cache */ #endif +#ifndef CONFIG_XEN __u8 apicid; -#ifdef CONFIG_SMP +#endif +#ifdef CONFIG_X86_HT __u8 booted_cores; /* number of cores as seen by OS */ __u8 phys_proc_id; /* Physical Processor id. */ __u8 cpu_core_id; /* Core id. */ #endif +#ifdef CONFIG_SMP + __u8 cpu_index; /* index into per_cpu list */ +#endif } ____cacheline_aligned; #define X86_VENDOR_INTEL 0 @@ -88,11 +95,12 @@ struct cpuinfo_x86 { #define X86_VENDOR_UNKNOWN 0xff #ifdef CONFIG_SMP -extern struct cpuinfo_x86 cpu_data[]; -#define current_cpu_data cpu_data[smp_processor_id()] +DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info); +#define cpu_data(cpu) per_cpu(cpu_info, cpu) +#define current_cpu_data cpu_data(smp_processor_id()) #else -#define cpu_data (&boot_cpu_data) -#define current_cpu_data boot_cpu_data +#define cpu_data(cpu) boot_cpu_data +#define current_cpu_data boot_cpu_data #endif extern char ignore_irq13; @@ -343,6 +351,16 @@ struct extended_sigtable { }; +#if defined(CONFIG_MPSC) || defined(CONFIG_MCORE2) +#define ASM_NOP1 P6_NOP1 +#define ASM_NOP2 P6_NOP2 +#define ASM_NOP3 P6_NOP3 +#define ASM_NOP4 P6_NOP4 +#define ASM_NOP5 P6_NOP5 +#define ASM_NOP6 P6_NOP6 +#define ASM_NOP7 P6_NOP7 +#define ASM_NOP8 P6_NOP8 +#else #define ASM_NOP1 K8_NOP1 #define ASM_NOP2 K8_NOP2 #define ASM_NOP3 K8_NOP3 @@ -351,6 +369,7 @@ struct extended_sigtable { #define ASM_NOP6 K8_NOP6 #define ASM_NOP7 K8_NOP7 #define ASM_NOP8 K8_NOP8 +#endif /* Opteron nops */ #define K8_NOP1 ".byte 0x90\n" @@ -362,6 +381,17 @@ struct extended_sigtable { #define K8_NOP7 K8_NOP4 K8_NOP3 #define K8_NOP8 K8_NOP4 K8_NOP4 +/* P6 nops */ +/* uses eax dependencies (Intel-recommended choice) */ +#define P6_NOP1 ".byte 0x90\n" +#define P6_NOP2 ".byte 0x66,0x90\n" +#define P6_NOP3 ".byte 0x0f,0x1f,0x00\n" +#define P6_NOP4 ".byte 0x0f,0x1f,0x40,0\n" +#define P6_NOP5 ".byte 0x0f,0x1f,0x44,0x00,0\n" +#define P6_NOP6 ".byte 0x66,0x0f,0x1f,0x44,0x00,0\n" +#define P6_NOP7 ".byte 0x0f,0x1f,0x80,0,0,0,0\n" +#define P6_NOP8 ".byte 0x0f,0x1f,0x84,0x00,0,0,0,0\n" + #define ASM_NOP_MAX 8 /* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ @@ -377,12 +407,6 @@ static inline void sync_core(void) asm volatile("cpuid" : "=a" (tmp) : "0" (1) : "ebx","ecx","edx","memory"); } -#define ARCH_HAS_PREFETCH -static inline void prefetch(void *x) -{ - asm volatile("prefetcht0 (%0)" :: "r" (x)); -} - #define ARCH_HAS_PREFETCHW 1 static inline void prefetchw(void *x) { @@ -398,11 +422,6 @@ static inline void prefetchw(void *x) #define cpu_relax() rep_nop() -static inline void serialize_cpu(void) -{ - __asm__ __volatile__ ("cpuid" : : : "ax", "bx", "cx", "dx"); -} - static inline void __monitor(const void *eax, unsigned long ecx, unsigned long edx) { --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/smp.h 2011-01-31 17:56:27.000000000 +0100 @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "smp_32.h" +#else +# include "smp_64.h" +#endif --- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/smp_32.h 2011-01-31 17:32:29.000000000 +0100 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/smp_32.h 2011-01-31 17:56:27.000000000 +0100 @@ -11,7 +11,7 @@ #endif #if defined(CONFIG_X86_LOCAL_APIC) && !defined(__ASSEMBLY__) -#include +#include #include #include #ifdef CONFIG_X86_IO_APIC @@ -29,19 +29,20 @@ extern void smp_alloc_memory(void); extern int pic_mode; -extern int smp_num_siblings; -extern cpumask_t cpu_sibling_map[]; -extern cpumask_t cpu_core_map[]; extern void (*mtrr_hook) (void); extern void zap_low_mappings (void); extern void lock_ipi_call_lock(void); extern void unlock_ipi_call_lock(void); +#ifndef CONFIG_XEN #define MAX_APICID 256 -extern u8 x86_cpu_to_apicid[]; +extern u8 __initdata x86_cpu_to_apicid_init[]; +extern void *x86_cpu_to_apicid_ptr; +DECLARE_PER_CPU(u8, x86_cpu_to_apicid); +#endif -#define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu] +#define cpu_physical_id(cpu) (cpu) #ifdef CONFIG_HOTPLUG_CPU extern void cpu_exit_clear(void); @@ -148,7 +149,7 @@ extern unsigned int num_processors; #else /* CONFIG_SMP */ #define safe_smp_processor_id() 0 -#define cpu_physical_id(cpu) boot_cpu_physical_apicid +#define cpu_physical_id(cpu) 0 #define NO_PROC_ID 0xFF /* No processor magic marker */ --- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/smp_64.h 2011-01-31 17:32:29.000000000 +0100 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/smp_64.h 2011-01-31 17:56:27.000000000 +0100 @@ -38,12 +38,11 @@ extern void smp_alloc_memory(void); extern volatile unsigned long smp_invalidate_needed; extern void lock_ipi_call_lock(void); extern void unlock_ipi_call_lock(void); -extern int smp_num_siblings; extern void smp_send_reschedule(int cpu); +extern int smp_call_function_mask(cpumask_t mask, void (*func)(void *), + void *info, int wait); -extern cpumask_t cpu_sibling_map[NR_CPUS]; -extern cpumask_t cpu_core_map[NR_CPUS]; -extern u8 cpu_llc_id[NR_CPUS]; +DECLARE_PER_CPU(u8, cpu_llc_id); #define SMP_TRAMPOLINE_BASE 0x6000 @@ -70,6 +69,9 @@ extern unsigned __cpuinitdata disabled_c #endif /* CONFIG_SMP */ +#define safe_smp_processor_id() smp_processor_id() + +#ifndef CONFIG_XEN #ifdef CONFIG_X86_LOCAL_APIC static inline int hard_smp_processor_id(void) { @@ -82,8 +84,9 @@ static inline int hard_smp_processor_id( * Some lowlevel functions might want to know about * the real APIC ID <-> CPU # mapping. */ -extern u8 x86_cpu_to_apicid[NR_CPUS]; /* physical ID */ -extern u8 x86_cpu_to_log_apicid[NR_CPUS]; +extern u8 __initdata x86_cpu_to_apicid_init[]; +extern void *x86_cpu_to_apicid_ptr; +DECLARE_PER_CPU(u8, x86_cpu_to_apicid); /* physical ID */ extern u8 bios_cpu_apicid[]; #ifdef CONFIG_X86_LOCAL_APIC @@ -95,6 +98,7 @@ static inline int cpu_present_to_apicid( return BAD_APICID; } #endif +#endif /* CONFIG_XEN */ #ifndef CONFIG_SMP #define stack_smp_processor_id() 0 @@ -118,9 +122,9 @@ static __inline int logical_smp_processo #endif #ifdef CONFIG_SMP -#define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu] +#define cpu_physical_id(cpu) (cpu) #else -#define cpu_physical_id(cpu) boot_cpu_id +#define cpu_physical_id(cpu) 0 #endif /* !CONFIG_SMP */ #endif --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/swiotlb.h 2011-01-31 17:56:27.000000000 +0100 @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "swiotlb_32.h" +#else +# include_next +#endif --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/system.h 2011-01-31 17:56:27.000000000 +0100 @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "system_32.h" +#else +# include "system_64.h" +#endif --- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/system_32.h 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/system_32.h 2011-01-31 17:56:27.000000000 +0100 @@ -9,6 +9,7 @@ #include #ifdef __KERNEL__ +#define AT_VECTOR_SIZE_ARCH 2 /* entries in ARCH_DLINFO */ struct task_struct; /* one of the stranger aspects of C forward declarations.. */ extern struct task_struct * FASTCALL(__switch_to(struct task_struct *prev, struct task_struct *next)); @@ -138,7 +139,7 @@ static inline unsigned long xen_read_cr4 { unsigned long val; /* This could fault if %cr4 does not exist */ - asm("1: movl %%cr4, %0 \n" + asm volatile("1: movl %%cr4, %0 \n" "2: \n" ".section __ex_table,\"a\" \n" ".long 1b,2b \n" @@ -157,6 +158,11 @@ static inline void xen_wbinvd(void) asm volatile("wbinvd": : :"memory"); } +static inline void clflush(volatile void *__p) +{ + asm volatile("clflush %0" : "+m" (*(char __force *)__p)); +} + #define read_cr0() (xen_read_cr0()) #define write_cr0(x) (xen_write_cr0(x)) #define read_cr2() (xen_read_cr2()) @@ -207,6 +213,7 @@ static inline unsigned long get_limit(un #define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) #define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) +#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) /** * read_barrier_depends - Flush all pending reads that subsequents reads @@ -262,18 +269,18 @@ static inline unsigned long get_limit(un #define read_barrier_depends() do { } while(0) +#ifdef CONFIG_SMP +#define smp_mb() mb() +#ifdef CONFIG_X86_PPRO_FENCE +# define smp_rmb() rmb() +#else +# define smp_rmb() barrier() +#endif #ifdef CONFIG_X86_OOSTORE -/* Actually there are no OOO store capable CPUs for now that do SSE, - but make it already an possibility. */ -#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) +# define smp_wmb() wmb() #else -#define wmb() __asm__ __volatile__ ("": : :"memory") +# define smp_wmb() barrier() #endif - -#ifdef CONFIG_SMP -#define smp_mb() mb() -#define smp_rmb() rmb() -#define smp_wmb() wmb() #define smp_read_barrier_depends() read_barrier_depends() #define set_mb(var, value) do { (void) xchg(&var, value); } while (0) #else @@ -300,5 +307,6 @@ extern unsigned long arch_align_stack(un extern void free_init_pages(char *what, unsigned long begin, unsigned long end); void default_idle(void); +void __show_registers(struct pt_regs *, int all); #endif --- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/system_64.h 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/system_64.h 2011-01-31 17:56:27.000000000 +0100 @@ -11,8 +11,12 @@ #ifdef __KERNEL__ -#define __STR(x) #x -#define STR(x) __STR(x) +/* entries in ARCH_DLINFO: */ +#ifdef CONFIG_IA32_EMULATION +# define AT_VECTOR_SIZE_ARCH 2 +#else +# define AT_VECTOR_SIZE_ARCH 1 +#endif #define __SAVE(reg,offset) "movq %%" #reg ",(14-" #offset ")*8(%%rsp)\n\t" #define __RESTORE(reg,offset) "movq (14-" #offset ")*8(%%rsp),%%" #reg "\n\t" @@ -92,7 +96,7 @@ static inline void write_cr0(unsigned lo #define read_cr3() ({ \ unsigned long __dummy; \ - asm("movq %%cr3,%0" : "=r" (__dummy)); \ + asm volatile("movq %%cr3,%0" : "=r" (__dummy)); \ machine_to_phys(__dummy); \ }) @@ -105,7 +109,7 @@ static inline void write_cr3(unsigned lo static inline unsigned long read_cr4(void) { unsigned long cr4; - asm("movq %%cr4,%0" : "=r" (cr4)); + asm volatile("movq %%cr4,%0" : "=r" (cr4)); return cr4; } @@ -131,12 +135,17 @@ static inline void write_cr8(unsigned lo #endif /* __KERNEL__ */ +static inline void clflush(volatile void *__p) +{ + asm volatile("clflush %0" : "+m" (*(char __force *)__p)); +} + #define nop() __asm__ __volatile__ ("nop") #ifdef CONFIG_SMP #define smp_mb() mb() -#define smp_rmb() rmb() -#define smp_wmb() wmb() +#define smp_rmb() barrier() +#define smp_wmb() barrier() #define smp_read_barrier_depends() do {} while(0) #else #define smp_mb() barrier() @@ -153,12 +162,8 @@ static inline void write_cr8(unsigned lo */ #define mb() asm volatile("mfence":::"memory") #define rmb() asm volatile("lfence":::"memory") - -#ifdef CONFIG_UNORDERED_IO #define wmb() asm volatile("sfence" ::: "memory") -#else -#define wmb() asm volatile("" ::: "memory") -#endif + #define read_barrier_depends() do {} while(0) #define set_mb(var, value) do { (void) xchg(&var, value); } while (0) --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/tlbflush.h 2011-01-31 17:56:27.000000000 +0100 @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "tlbflush_32.h" +#else +# include "tlbflush_64.h" +#endif --- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/tlbflush_32.h 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/tlbflush_32.h 2011-01-31 17:56:27.000000000 +0100 @@ -23,7 +23,6 @@ * - flush_tlb_page(vma, vmaddr) flushes one page * - flush_tlb_range(vma, start, end) flushes a range of pages * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages - * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables * * ..but the i386 has somewhat limited tlb flushing capabilities, * and page-granular flushes are available only on i486 and up. @@ -97,10 +96,4 @@ static inline void flush_tlb_kernel_rang flush_tlb_all(); } -static inline void flush_tlb_pgtables(struct mm_struct *mm, - unsigned long start, unsigned long end) -{ - /* i386 does not keep any page table caches in TLB */ -} - #endif /* _I386_TLBFLUSH_H */ --- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/tlbflush_64.h 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/tlbflush_64.h 2011-01-31 17:56:27.000000000 +0100 @@ -28,7 +28,6 @@ * - flush_tlb_page(vma, vmaddr) flushes one page * - flush_tlb_range(vma, start, end) flushes a range of pages * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages - * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables * * x86-64 can only flush individual pages or full VMs. For a range flush * we always do the full VM. Might be worth trying if for a small @@ -95,12 +94,4 @@ static inline void flush_tlb_kernel_rang flush_tlb_all(); } -static inline void flush_tlb_pgtables(struct mm_struct *mm, - unsigned long start, unsigned long end) -{ - /* x86_64 does not keep any page table caches in a software TLB. - The CPUs do in their hardware TLBs, but they are handled - by the normal TLB flushing algorithms. */ -} - #endif /* _X8664_TLBFLUSH_H */ --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/xor.h 2011-01-31 17:56:27.000000000 +0100 @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "../../xor_32.h" +#else +# include "xor_64.h" +#endif --- head-2011-03-17.orig/arch/x86/kernel/Makefile 2011-01-31 17:32:29.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/Makefile 2011-01-31 17:56:27.000000000 +0100 @@ -132,4 +132,4 @@ endif disabled-obj-$(CONFIG_XEN) := early-quirks.o hpet.o i8253.o i8259_$(BITS).o reboot.o \ smpboot_$(BITS).o tsc_$(BITS).o tsc_sync.o disabled-obj-$(CONFIG_XEN_UNPRIVILEGED_GUEST) += mpparse_64.o -%/head_64.o %/head_64.s: $(if $(CONFIG_XEN),EXTRA_AFLAGS,dummy) := +%/head_64.o %/head_64.s: asflags-$(CONFIG_XEN) := --- head-2011-03-17.orig/arch/x86/kernel/acpi/processor_extcntl_xen.c 2010-03-22 12:00:53.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/acpi/processor_extcntl_xen.c 2011-01-31 17:56:27.000000000 +0100 @@ -68,7 +68,7 @@ static int xen_cx_notifier(struct acpi_p /* Get dependency relationships */ if (cx->csd_count) { - printk("Wow! _CSD is found. Not support for now!\n"); + pr_warning("_CSD found: Not supported for now!\n"); kfree(buf); return -EINVAL; } else { @@ -81,7 +81,7 @@ static int xen_cx_notifier(struct acpi_p } if (!count) { - printk("No available Cx info for cpu %d\n", pr->acpi_id); + pr_info("No available Cx info for cpu %d\n", pr->acpi_id); kfree(buf); return -EINVAL; } --- head-2011-03-17.orig/arch/x86/kernel/acpi/sleep_32-xen.c 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/acpi/sleep_32-xen.c 2011-01-31 17:56:27.000000000 +0100 @@ -90,7 +90,7 @@ __setup("acpi_sleep=", acpi_sleep_setup) /* Ouch, we want to delete this. We already have better version in userspace, in s2ram from suspend.sf.net project */ -static __init int reset_videomode_after_s3(struct dmi_system_id *d) +static __init int reset_videomode_after_s3(const struct dmi_system_id *d) { acpi_realmode_flags |= 2; return 0; --- head-2011-03-17.orig/arch/x86/kernel/acpi/sleep_64-xen.c 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/acpi/sleep_64-xen.c 2011-01-31 17:56:27.000000000 +0100 @@ -123,6 +123,3 @@ static int __init acpi_sleep_setup(char __setup("acpi_sleep=", acpi_sleep_setup); #endif /* CONFIG_ACPI_PV_SLEEP */ -void acpi_pci_link_exit(void) -{ -} --- head-2011-03-17.orig/arch/x86/kernel/cpu/Makefile 2011-03-17 13:45:28.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/cpu/Makefile 2011-02-03 14:29:46.000000000 +0100 @@ -34,6 +34,8 @@ obj-$(CONFIG_CPU_FREQ) += cpufreq/ obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o +disabled-obj-$(CONFIG_XEN) := perfctr-watchdog.o + quiet_cmd_mkcapflags = MKCAP $@ cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@ --- head-2011-03-17.orig/arch/x86/kernel/cpu/common-xen.c 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/cpu/common-xen.c 2011-01-31 17:56:27.000000000 +0100 @@ -214,7 +214,7 @@ static void __cpuinit get_cpu_vendor(str static int __init x86_fxsr_setup(char * s) { - /* Tell all the other CPU's to not use it... */ + /* Tell all the other CPUs to not use it... */ disable_x86_fxsr = 1; /* @@ -412,7 +412,9 @@ static void __cpuinit identify_cpu(struc c->x86_model = c->x86_mask = 0; /* So far unknown... */ c->x86_vendor_id[0] = '\0'; /* Unset */ c->x86_model_id[0] = '\0'; /* Unset */ +#ifndef CONFIG_XEN c->x86_max_cores = 1; +#endif c->x86_clflush_size = 32; memset(&c->x86_capability, 0, sizeof c->x86_capability); --- head-2011-03-17.orig/arch/x86/kernel/cpu/intel.c 2011-03-17 13:45:28.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/cpu/intel.c 2011-01-31 17:56:27.000000000 +0100 @@ -292,6 +292,7 @@ static void __cpuinit srat_detect_node(s #endif } +#ifndef CONFIG_XEN /* * find out the number of processor cores on the die */ @@ -309,6 +310,7 @@ static int __cpuinit intel_num_cpu_cores else return 1; } +#endif static void __cpuinit detect_vmx_virtcap(struct cpuinfo_x86 *c) { @@ -432,6 +434,7 @@ static void __cpuinit init_intel(struct set_cpu_cap(c, X86_FEATURE_P3); #endif +#ifndef CONFIG_XEN if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { /* * let's use the legacy cpuid vector 0x1 and 0x4 for topology @@ -442,6 +445,7 @@ static void __cpuinit init_intel(struct detect_ht(c); #endif } +#endif /* Work around errata */ srat_detect_node(c); --- head-2011-03-17.orig/arch/x86/kernel/cpu/intel_cacheinfo.c 2011-02-08 10:02:51.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/cpu/intel_cacheinfo.c 2011-02-08 10:04:17.000000000 +0100 @@ -290,8 +290,9 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_ eax->split.type = types[leaf]; eax->split.level = levels[leaf]; eax->split.num_threads_sharing = 0; +#ifndef CONFIG_XEN eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1; - +#endif if (assoc == 0xffff) eax->split.is_fully_associative = 1; @@ -578,8 +579,8 @@ unsigned int __cpuinit init_intel_cachei unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ - unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb; #ifdef CONFIG_X86_HT + unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb; unsigned int cpu = c->cpu_index; #endif @@ -613,16 +614,20 @@ unsigned int __cpuinit init_intel_cachei break; case 2: new_l2 = this_leaf.size/1024; +#ifdef CONFIG_X86_HT num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; index_msb = get_count_order(num_threads_sharing); l2_id = c->apicid >> index_msb; +#endif break; case 3: new_l3 = this_leaf.size/1024; +#ifdef CONFIG_X86_HT num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; index_msb = get_count_order( num_threads_sharing); l3_id = c->apicid >> index_msb; +#endif break; default: break; --- head-2011-03-17.orig/arch/x86/kernel/cpu/mcheck/mce_dom0.c 2011-01-31 17:29:16.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/cpu/mcheck/mce_dom0.c 2011-01-31 17:56:27.000000000 +0100 @@ -17,7 +17,7 @@ static int convert_log(struct mc_info *m x86_mcinfo_lookup(mic, mi, MC_TYPE_GLOBAL); if (mic == NULL) { - printk(KERN_ERR "DOM0_MCE_LOG: global data is NULL\n"); + pr_err("DOM0_MCE_LOG: global data is NULL\n"); return -1; } @@ -75,7 +75,7 @@ urgent: { result = convert_log(g_mi); if (result) { - printk(KERN_ERR "MCE_DOM0_LOG: Log conversion failed\n"); + pr_err("MCE_DOM0_LOG: Log conversion failed\n"); goto end; } /* After fetching the telem from DOM0, we need to dec the telem's @@ -100,7 +100,7 @@ nonurgent: { result = convert_log(g_mi); if (result) { - printk(KERN_ERR "MCE_DOM0_LOG: Log conversion failed\n"); + pr_err("MCE_DOM0_LOG: Log conversion failed\n"); goto end; } /* After fetching the telem from DOM0, we need to dec the telem's @@ -125,7 +125,7 @@ void bind_virq_for_mce(void) g_mi = kmalloc(sizeof(struct mc_info), GFP_KERNEL); if (ret < 0) - printk(KERN_ERR "MCE_DOM0_LOG: bind_virq for DOM0 failed\n"); + pr_err("MCE_DOM0_LOG: bind_virq for DOM0 failed\n"); /* Log the machine checks left over from the previous reset. */ mce_dom0_interrupt(VIRQ_MCA, NULL); --- head-2011-03-17.orig/arch/x86/kernel/e820_32-xen.c 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/e820_32-xen.c 2011-01-31 17:56:27.000000000 +0100 @@ -52,6 +52,13 @@ struct resource code_resource = { .flags = IORESOURCE_BUSY | IORESOURCE_MEM }; +struct resource bss_resource = { + .name = "Kernel bss", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_MEM +}; + static struct resource system_rom_resource = { .name = "System ROM", .start = 0xf0000, @@ -266,7 +273,9 @@ static struct e820map machine_e820; * and also for regions reported as reserved by the e820. */ static void __init -legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource) +legacy_init_iomem_resources(struct resource *code_resource, + struct resource *data_resource, + struct resource *bss_resource) { int i; @@ -300,9 +309,11 @@ legacy_init_iomem_resources(struct resou #ifndef CONFIG_XEN request_resource(res, code_resource); request_resource(res, data_resource); + request_resource(res, bss_resource); #endif #ifdef CONFIG_KEXEC - request_resource(res, &crashk_res); + if (crashk_res.start != crashk_res.end) + request_resource(res, &crashk_res); #ifdef CONFIG_XEN xen_machine_kexec_register_resources(res); #endif @@ -329,9 +340,11 @@ static int __init request_standard_resou printk("Setting up standard PCI resources\n"); if (efi_enabled) - efi_initialize_iomem_resources(&code_resource, &data_resource); + efi_initialize_iomem_resources(&code_resource, + &data_resource, &bss_resource); else - legacy_init_iomem_resources(&code_resource, &data_resource); + legacy_init_iomem_resources(&code_resource, + &data_resource, &bss_resource); /* EFI systems may still have VGA */ request_resource(&iomem_resource, &video_ram_resource); @@ -761,7 +774,7 @@ void __init e820_register_memory(void) #define e820 machine_e820 #endif /* - * Search for the bigest gap in the low 32 bits of the e820 + * Search for the biggest gap in the low 32 bits of the e820 * memory space. */ last = 0x100000000ull; --- head-2011-03-17.orig/arch/x86/kernel/e820_64-xen.c 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/e820_64-xen.c 2011-01-31 17:56:27.000000000 +0100 @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include @@ -53,7 +53,7 @@ unsigned long end_pfn_map; */ static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT; -extern struct resource code_resource, data_resource; +extern struct resource code_resource, data_resource, bss_resource; /* Check for some hardcoded bad areas that early boot is not allowed to touch */ static inline int bad_addr(unsigned long *addrp, unsigned long size) @@ -75,10 +75,15 @@ static inline int bad_addr(unsigned long /* initrd */ #ifdef CONFIG_BLK_DEV_INITRD - if (LOADER_TYPE && INITRD_START && last >= INITRD_START && - addr < INITRD_START+INITRD_SIZE) { - *addrp = PAGE_ALIGN(INITRD_START + INITRD_SIZE); - return 1; + if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { + unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; + unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; + unsigned long ramdisk_end = ramdisk_image+ramdisk_size; + + if (last >= ramdisk_image && addr < ramdisk_end) { + *addrp = PAGE_ALIGN(ramdisk_end); + return 1; + } } #endif /* kernel code */ @@ -251,6 +256,7 @@ void __init e820_reserve_resources(struc #ifndef CONFIG_XEN request_resource(res, &code_resource); request_resource(res, &data_resource); + request_resource(res, &bss_resource); #endif #ifdef CONFIG_KEXEC if (crashk_res.start != crashk_res.end) @@ -661,8 +667,8 @@ void __init setup_memory_region(void) * Otherwise fake a memory map; one section from 0k->640k, * the next section from 1mb->appropriate_mem_k */ - sanitize_e820_map(E820_MAP, &E820_MAP_NR); - if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0) + sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries); + if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) < 0) early_panic("Cannot find a valid memory map"); printk(KERN_INFO "BIOS-provided physical RAM map:\n"); e820_print_map("BIOS-e820"); @@ -847,3 +853,22 @@ __init void e820_setup_gap(struct e820en printk(KERN_INFO "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", pci_mem_start, gapstart, gapsize); } + +int __init arch_get_ram_range(int slot, u64 *addr, u64 *size) +{ + int i; + + if (slot < 0 || slot >= e820.nr_map) + return -1; + for (i = slot; i < e820.nr_map; i++) { + if (e820.map[i].type != E820_RAM) + continue; + break; + } + if (i == e820.nr_map || e820.map[i].addr > (max_pfn << PAGE_SHIFT)) + return -1; + *addr = e820.map[i].addr; + *size = min_t(u64, e820.map[i].size + e820.map[i].addr, + max_pfn << PAGE_SHIFT) - *addr; + return i + 1; +} --- head-2011-03-17.orig/arch/x86/kernel/early_printk-xen.c 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/early_printk-xen.c 2011-01-31 17:56:27.000000000 +0100 @@ -6,15 +6,10 @@ #include #include #include - -/* Simple VGA output */ - -#ifdef __i386__ #include -#else -#include -#endif + #ifndef CONFIG_XEN +/* Simple VGA output */ #define VGABASE (__ISA_IO_base + 0xb8000) static int max_ypos = 25, max_xpos = 80; @@ -264,10 +259,10 @@ static int __init setup_early_printk(cha early_console = &early_serial_console; } else if (!strncmp(buf, "vga", 3)) { #ifndef CONFIG_XEN - && SCREEN_INFO.orig_video_isVGA == 1) { - max_xpos = SCREEN_INFO.orig_video_cols; - max_ypos = SCREEN_INFO.orig_video_lines; - current_ypos = SCREEN_INFO.orig_y; + && boot_params.screen_info.orig_video_isVGA == 1) { + max_xpos = boot_params.screen_info.orig_video_cols; + max_ypos = boot_params.screen_info.orig_video_lines; + current_ypos = boot_params.screen_info.orig_y; #endif early_console = &early_vga_console; } else if (!strncmp(buf, "simnow", 6)) { --- head-2011-03-17.orig/arch/x86/kernel/entry_32-xen.S 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/entry_32-xen.S 2011-01-31 17:56:27.000000000 +0100 @@ -254,6 +254,7 @@ check_userspace: jb resume_kernel # not returning to v8086 or userspace ENTRY(resume_userspace) + LOCKDEP_SYS_EXIT DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret @@ -341,6 +342,7 @@ sysenter_past_esp: jae syscall_badsys call *sys_call_table(,%eax,4) movl %eax,PT_EAX(%esp) + LOCKDEP_SYS_EXIT DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF movl TI_flags(%ebp), %ecx @@ -406,6 +408,7 @@ syscall_call: call *sys_call_table(,%eax,4) movl %eax,PT_EAX(%esp) # store the return value syscall_exit: + LOCKDEP_SYS_EXIT DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret @@ -478,7 +481,7 @@ ldt_ss: * is still available to implement the setting of the high * 16-bits in the INTERRUPT_RETURN paravirt-op. */ - cmpl $0, paravirt_ops+PARAVIRT_enabled + cmpl $0, pv_info+PARAVIRT_enabled jne restore_nocheck #endif @@ -540,6 +543,7 @@ work_pending: jz work_notifysig work_resched: call schedule + LOCKDEP_SYS_EXIT DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret @@ -1268,6 +1272,6 @@ ENTRY(kernel_thread_helper) ENDPROC(kernel_thread_helper) .section .rodata,"a" -#include "syscall_table.S" +#include "syscall_table_32.S" syscall_table_size=(.-sys_call_table) --- head-2011-03-17.orig/arch/x86/kernel/entry_64-xen.S 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/entry_64-xen.S 2011-01-31 17:56:27.000000000 +0100 @@ -57,7 +57,7 @@ #include #include -#include "xen_entry.S" +#include "xen_entry_64.S" .code64 @@ -275,6 +275,7 @@ ret_from_sys_call: movl $_TIF_ALLWORK_MASK,%edi /* edi: flagmask */ sysret_check: + LOCKDEP_SYS_EXIT GET_THREAD_INFO(%rcx) XEN_BLOCK_EVENTS(%rsi) TRACE_IRQS_OFF @@ -365,6 +366,7 @@ int_ret_from_sys_call: movl $_TIF_ALLWORK_MASK,%edi /* edi: mask to check */ int_with_check: + LOCKDEP_SYS_EXIT_IRQ GET_THREAD_INFO(%rcx) movl threadinfo_flags(%rcx),%edx andl %edi,%edx @@ -516,11 +518,12 @@ END(stub_rt_sigreturn) retint_check: CFI_DEFAULT_STACK adj=1 + LOCKDEP_SYS_EXIT_IRQ movl threadinfo_flags(%rcx),%edx andl %edi,%edx CFI_REMEMBER_STATE jnz retint_careful -retint_restore_args: +retint_restore_args: /* return to kernel space */ movl EFLAGS-REST_SKIP(%rsp), %eax shr $9, %eax # EAX[0] == IRET_EFLAGS.IF XEN_GET_VCPU_INFO(%rsi) @@ -841,7 +844,7 @@ error_call_handler: movq ORIG_RAX(%rsp),%rsi # get error code movq $-1,ORIG_RAX(%rsp) call *%rax -error_exit: +error_exit: RESTORE_REST /* cli */ XEN_BLOCK_EVENTS(%rsi) @@ -849,14 +852,11 @@ error_exit: GET_THREAD_INFO(%rcx) testb $3,CS-ARGOFFSET(%rsp) jz retint_kernel + LOCKDEP_SYS_EXIT_IRQ movl threadinfo_flags(%rcx),%edx movl $_TIF_WORK_MASK,%edi andl %edi,%edx jnz retint_careful - /* - * The iret might restore flags: - */ - TRACE_IRQS_IRETQ jmp retint_restore_args #if 0 @@ -1071,7 +1071,7 @@ child_rip: movq %rsi, %rdi call *%rax # exit - xorl %edi, %edi + mov %eax, %edi call do_exit CFI_ENDPROC ENDPROC(child_rip) --- head-2011-03-17.orig/arch/x86/kernel/fixup.c 2008-01-28 12:24:18.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/fixup.c 2011-01-31 17:56:27.000000000 +0100 @@ -34,7 +34,7 @@ #include #include -#define DP(_f, _args...) printk(KERN_ALERT " " _f "\n" , ## _args ) +#define DP(_f, _args...) pr_alert(" " _f "\n" , ## _args ) fastcall void do_fixup_4gb_segment(struct pt_regs *regs, long error_code) { --- head-2011-03-17.orig/arch/x86/kernel/head64-xen.c 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/head64-xen.c 2011-01-31 17:56:27.000000000 +0100 @@ -1,5 +1,5 @@ /* - * linux/arch/x86_64/kernel/head64.c -- prepare to run common code + * prepare to run common code * * Copyright (C) 2000 Andrea Arcangeli SuSE * @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include @@ -47,27 +46,16 @@ static void __init clear_bss(void) } #endif -#define NEW_CL_POINTER 0x228 /* Relative to real mode data */ -#define OLD_CL_MAGIC_ADDR 0x20 -#define OLD_CL_MAGIC 0xA33F -#define OLD_CL_OFFSET 0x22 - static void __init copy_bootdata(char *real_mode_data) { #ifndef CONFIG_XEN - unsigned long new_data; char * command_line; - memcpy(x86_boot_params, real_mode_data, BOOT_PARAM_SIZE); - new_data = *(u32 *) (x86_boot_params + NEW_CL_POINTER); - if (!new_data) { - if (OLD_CL_MAGIC != *(u16 *)(real_mode_data + OLD_CL_MAGIC_ADDR)) { - return; - } - new_data = __pa(real_mode_data) + *(u16 *)(real_mode_data + OLD_CL_OFFSET); + memcpy(&boot_params, real_mode_data, sizeof boot_params); + if (boot_params.hdr.cmd_line_ptr) { + command_line = __va(boot_params.hdr.cmd_line_ptr); + memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); } - command_line = __va(new_data); - memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); #else int max_cmdline; @@ -117,7 +105,7 @@ void __init x86_64_start_kernel(char * r for (i = 0; i < IDT_ENTRIES; i++) set_intr_gate(i, early_idt_handler); - asm volatile("lidt %0" :: "m" (idt_descr)); + load_idt((const struct desc_ptr *)&idt_descr); #endif early_printk("Kernel alive\n"); --- head-2011-03-17.orig/arch/x86/kernel/io_apic_32-xen.c 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/io_apic_32-xen.c 2011-01-31 17:56:27.000000000 +0100 @@ -427,7 +427,7 @@ static struct irq_cpu_info { #define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask) -#define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i])) +#define CPU_TO_PACKAGEINDEX(i) (first_cpu(per_cpu(cpu_sibling_map, i))) static cpumask_t balance_irq_affinity[NR_IRQS] = { [0 ... NR_IRQS-1] = CPU_MASK_ALL @@ -633,7 +633,7 @@ tryanotherirq: imbalance = move_this_load; - /* For physical_balance case, we accumlated both load + /* For physical_balance case, we accumulated both load * values in the one of the siblings cpu_irq[], * to use the same code for physical and logical processors * as much as possible. @@ -647,7 +647,7 @@ tryanotherirq: * (A+B)/2 vs B */ load = CPU_IRQ(min_loaded) >> 1; - for_each_cpu_mask(j, cpu_sibling_map[min_loaded]) { + for_each_cpu_mask(j, per_cpu(cpu_sibling_map, min_loaded)) { if (load > CPU_IRQ(j)) { /* This won't change cpu_sibling_map[min_loaded] */ load = CPU_IRQ(j); @@ -1018,7 +1018,7 @@ static int EISA_ELCR(unsigned int irq) #define default_MCA_trigger(idx) (1) #define default_MCA_polarity(idx) (0) -static int __init MPBIOS_polarity(int idx) +static int MPBIOS_polarity(int idx) { int bus = mp_irqs[idx].mpc_srcbus; int polarity; @@ -1347,6 +1347,11 @@ static void __init setup_IO_APIC_irqs(vo continue; } + if (!first_notcon) { + apic_printk(APIC_VERBOSE, " not connected.\n"); + first_notcon = 1; + } + entry.trigger = irq_trigger(idx); entry.polarity = irq_polarity(idx); @@ -1936,13 +1941,16 @@ __setup("no_timer_check", notimercheck); static int __init timer_irq_works(void) { unsigned long t1 = jiffies; + unsigned long flags; if (no_timer_check) return 1; + local_save_flags(flags); local_irq_enable(); /* Let ten ticks pass... */ mdelay((10 * 1000) / HZ); + local_irq_restore(flags); /* * Expect a few ticks at least, to be sure some possible @@ -2223,6 +2231,9 @@ static inline void __init check_timer(vo { int apic1, pin1, apic2, pin2; int vector; + unsigned long flags; + + local_irq_save(flags); /* * get/set the timer IRQ vector: @@ -2268,7 +2279,7 @@ static inline void __init check_timer(vo } if (disable_timer_pin_1 > 0) clear_IO_APIC_pin(0, pin1); - return; + goto out; } clear_IO_APIC_pin(apic1, pin1); printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to " @@ -2291,7 +2302,7 @@ static inline void __init check_timer(vo if (nmi_watchdog == NMI_IO_APIC) { setup_nmi(); } - return; + goto out; } /* * Cleanup, just in case ... @@ -2315,7 +2326,7 @@ static inline void __init check_timer(vo if (timer_irq_works()) { printk(" works.\n"); - return; + goto out; } apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector); printk(" failed.\n"); @@ -2331,11 +2342,13 @@ static inline void __init check_timer(vo if (timer_irq_works()) { printk(" works.\n"); - return; + goto out; } printk(" failed :(.\n"); panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " "report. Then try booting with the 'noapic' option"); +out: + local_irq_restore(flags); } #else int timer_uses_ioapic_pin_0 = 0; @@ -2353,6 +2366,14 @@ int timer_uses_ioapic_pin_0 = 0; void __init setup_IO_APIC(void) { +#ifndef CONFIG_XEN + int i; + + /* Reserve all the system vectors. */ + for (i = FIRST_SYSTEM_VECTOR; i < NR_VECTORS; i++) + set_bit(i, used_vectors); +#endif + enable_IO_APIC(); if (acpi_ioapic) @@ -2542,7 +2563,7 @@ void destroy_irq(unsigned int irq) #endif /* CONFIG_XEN */ /* - * MSI mesage composition + * MSI message composition */ #if defined(CONFIG_PCI_MSI) && !defined(CONFIG_XEN) static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) @@ -2745,9 +2766,9 @@ int arch_setup_ht_irq(unsigned int irq, #ifdef CONFIG_ACPI +#ifndef CONFIG_XEN int __init io_apic_get_unique_id (int ioapic, int apic_id) { -#ifndef CONFIG_XEN union IO_APIC_reg_00 reg_00; static physid_mask_t apic_id_map = PHYSID_MASK_NONE; physid_mask_t tmp; @@ -2816,10 +2837,10 @@ int __init io_apic_get_unique_id (int io apic_printk(APIC_VERBOSE, KERN_INFO "IOAPIC[%d]: Assigned apic_id %d\n", ioapic, apic_id); -#endif /* !CONFIG_XEN */ return apic_id; } +#endif /* !CONFIG_XEN */ int __init io_apic_get_version (int ioapic) @@ -2899,6 +2920,25 @@ int io_apic_set_pci_routing (int ioapic, return 0; } +int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) +{ + int i; + + if (skip_ioapic_setup) + return -1; + + for (i = 0; i < mp_irq_entries; i++) + if (mp_irqs[i].mpc_irqtype == mp_INT && + mp_irqs[i].mpc_srcbusirq == bus_irq) + break; + if (i >= mp_irq_entries) + return -1; + + *trigger = irq_trigger(i); + *polarity = irq_polarity(i); + return 0; +} + #endif /* CONFIG_ACPI */ static int __init parse_disable_timer_pin_1(char *arg) --- head-2011-03-17.orig/arch/x86/kernel/io_apic_64-xen.c 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/io_apic_64-xen.c 2011-01-31 17:56:27.000000000 +0100 @@ -31,6 +31,7 @@ #include #include #include +#include #ifdef CONFIG_ACPI #include #endif @@ -584,7 +585,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, #define default_PCI_trigger(idx) (1) #define default_PCI_polarity(idx) (1) -static int __init MPBIOS_polarity(int idx) +static int MPBIOS_polarity(int idx) { int bus = mp_irqs[idx].mpc_srcbus; int polarity; @@ -871,6 +872,10 @@ static void __init setup_IO_APIC_irqs(vo apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mpc_apicid, pin); continue; } + if (!first_notcon) { + apic_printk(APIC_VERBOSE, " not connected.\n"); + first_notcon = 1; + } irq = pin_2_irq(idx, apic, pin); add_pin_to_irq(irq, apic, pin); @@ -881,7 +886,7 @@ static void __init setup_IO_APIC_irqs(vo } if (!first_notcon) - apic_printk(APIC_VERBOSE," not connected.\n"); + apic_printk(APIC_VERBOSE, " not connected.\n"); } #ifndef CONFIG_XEN @@ -1277,10 +1282,13 @@ void disable_IO_APIC(void) static int __init timer_irq_works(void) { unsigned long t1 = jiffies; + unsigned long flags; + local_save_flags(flags); local_irq_enable(); /* Let ten ticks pass... */ mdelay((10 * 1000) / HZ); + local_irq_restore(flags); /* * Expect a few ticks at least, to be sure some possible @@ -1655,6 +1663,9 @@ static inline void check_timer(void) { struct irq_cfg *cfg = irq_cfg + 0; int apic1, pin1, apic2, pin2; + unsigned long flags; + + local_irq_save(flags); /* * get/set the timer IRQ vector: @@ -1696,7 +1707,7 @@ static inline void check_timer(void) } if (disable_timer_pin_1 > 0) clear_IO_APIC_pin(0, pin1); - return; + goto out; } clear_IO_APIC_pin(apic1, pin1); apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: 8254 timer not " @@ -1718,7 +1729,7 @@ static inline void check_timer(void) if (nmi_watchdog == NMI_IO_APIC) { setup_nmi(); } - return; + goto out; } /* * Cleanup, just in case ... @@ -1741,7 +1752,7 @@ static inline void check_timer(void) if (timer_irq_works()) { apic_printk(APIC_VERBOSE," works.\n"); - return; + goto out; } apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); apic_printk(APIC_VERBOSE," failed.\n"); @@ -1756,10 +1767,12 @@ static inline void check_timer(void) if (timer_irq_works()) { apic_printk(APIC_VERBOSE," works.\n"); - return; + goto out; } apic_printk(APIC_VERBOSE," failed :(.\n"); panic("IO-APIC + timer doesn't work! Try using the 'noapic' kernel parameter\n"); +out: + local_irq_restore(flags); } #else #define check_timer() ((void)0) @@ -1775,7 +1788,7 @@ __setup("no_timer_check", notimercheck); /* * - * IRQ's that are handled by the PIC in the MPS IOAPIC case. + * IRQs that are handled by the PIC in the MPS IOAPIC case. * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ. * Linux doesn't really care, as it's not actually used * for any interrupt handling anyway. @@ -1858,7 +1871,7 @@ static struct sysdev_class ioapic_sysdev static int __init ioapic_init_sysfs(void) { struct sys_device * dev; - int i, size, error = 0; + int i, size, error; error = sysdev_class_register(&ioapic_sysdev_class); if (error) @@ -1867,12 +1880,11 @@ static int __init ioapic_init_sysfs(void for (i = 0; i < nr_ioapics; i++ ) { size = sizeof(struct sys_device) + nr_ioapic_registers[i] * sizeof(struct IO_APIC_route_entry); - mp_ioapic_data[i] = kmalloc(size, GFP_KERNEL); + mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL); if (!mp_ioapic_data[i]) { printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); continue; } - memset(mp_ioapic_data[i], 0, size); dev = &mp_ioapic_data[i]->dev; dev->id = i; dev->cls = &ioapic_sysdev_class; @@ -1933,7 +1945,7 @@ void destroy_irq(unsigned int irq) #endif /* CONFIG_XEN */ /* - * MSI mesage composition + * MSI message composition */ #if defined(CONFIG_PCI_MSI) && !defined(CONFIG_XEN) static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) @@ -2043,8 +2055,64 @@ void arch_teardown_msi_irq(unsigned int destroy_irq(irq); } -#endif /* CONFIG_PCI_MSI */ +#ifdef CONFIG_DMAR +#ifdef CONFIG_SMP +static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) +{ + struct irq_cfg *cfg = irq_cfg + irq; + struct msi_msg msg; + unsigned int dest; + cpumask_t tmp; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + return; + + if (assign_irq_vector(irq, mask)) + return; + + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + + dmar_msi_read(irq, &msg); + + msg.data &= ~MSI_DATA_VECTOR_MASK; + msg.data |= MSI_DATA_VECTOR(cfg->vector); + msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; + msg.address_lo |= MSI_ADDR_DEST_ID(dest); + + dmar_msi_write(irq, &msg); + irq_desc[irq].affinity = mask; +} +#endif /* CONFIG_SMP */ + +struct irq_chip dmar_msi_type = { + .name = "DMAR_MSI", + .unmask = dmar_msi_unmask, + .mask = dmar_msi_mask, + .ack = ack_apic_edge, +#ifdef CONFIG_SMP + .set_affinity = dmar_msi_set_affinity, +#endif + .retrigger = ioapic_retrigger_irq, +}; + +int arch_setup_dmar_msi(unsigned int irq) +{ + int ret; + struct msi_msg msg; + + ret = msi_compose_msg(NULL, irq, &msg); + if (ret < 0) + return ret; + dmar_msi_write(irq, &msg); + set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq, + "edge"); + return 0; +} +#endif +#endif /* CONFIG_PCI_MSI */ /* * Hypertransport interrupt support */ @@ -2177,8 +2245,27 @@ int io_apic_set_pci_routing (int ioapic, return 0; } -#endif /* CONFIG_ACPI */ +int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) +{ + int i; + + if (skip_ioapic_setup) + return -1; + + for (i = 0; i < mp_irq_entries; i++) + if (mp_irqs[i].mpc_irqtype == mp_INT && + mp_irqs[i].mpc_srcbusirq == bus_irq) + break; + if (i >= mp_irq_entries) + return -1; + + *trigger = irq_trigger(i); + *polarity = irq_polarity(i); + return 0; +} + +#endif /* CONFIG_ACPI */ #ifndef CONFIG_XEN /* @@ -2217,3 +2304,4 @@ void __init setup_ioapic_dest(void) } #endif #endif /* !CONFIG_XEN */ + --- head-2011-03-17.orig/arch/x86/kernel/ioport_32-xen.c 2011-01-31 17:32:29.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/ioport_32-xen.c 2011-01-31 17:56:27.000000000 +0100 @@ -1,6 +1,4 @@ /* - * linux/arch/i386/kernel/ioport.c - * * This contains the io-permission bitmap code - written by obz, with changes * by Linus. */ --- head-2011-03-17.orig/arch/x86/kernel/ioport_64-xen.c 2011-01-31 17:32:29.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/ioport_64-xen.c 2011-01-31 17:56:27.000000000 +0100 @@ -1,6 +1,4 @@ /* - * linux/arch/x86_64/kernel/ioport.c - * * This contains the io-permission bitmap code - written by obz, with changes * by Linus. */ --- head-2011-03-17.orig/arch/x86/kernel/ldt_32-xen.c 2011-01-31 17:32:29.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/ldt_32-xen.c 2011-01-31 17:56:27.000000000 +0100 @@ -1,6 +1,4 @@ /* - * linux/arch/i386/kernel/ldt.c - * * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds * Copyright (C) 1999 Ingo Molnar */ @@ -106,14 +104,14 @@ int init_new_context(struct task_struct struct mm_struct * old_mm; int retval = 0; - init_MUTEX(&mm->context.sem); + mutex_init(&mm->context.lock); mm->context.size = 0; mm->context.has_foreign_mappings = 0; old_mm = current->mm; if (old_mm && old_mm->context.size > 0) { - down(&old_mm->context.sem); + mutex_lock(&old_mm->context.lock); retval = copy_ldt(&mm->context, &old_mm->context); - up(&old_mm->context.sem); + mutex_unlock(&old_mm->context.lock); } return retval; } @@ -149,7 +147,7 @@ static int read_ldt(void __user * ptr, u if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES) bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES; - down(&mm->context.sem); + mutex_lock(&mm->context.lock); size = mm->context.size*LDT_ENTRY_SIZE; if (size > bytecount) size = bytecount; @@ -157,7 +155,7 @@ static int read_ldt(void __user * ptr, u err = 0; if (copy_to_user(ptr, mm->context.ldt, size)) err = -EFAULT; - up(&mm->context.sem); + mutex_unlock(&mm->context.lock); if (err < 0) goto error_return; if (size != bytecount) { @@ -213,7 +211,7 @@ static int write_ldt(void __user * ptr, goto out; } - down(&mm->context.sem); + mutex_lock(&mm->context.lock); if (ldt_info.entry_number >= mm->context.size) { error = alloc_ldt(¤t->mm->context, ldt_info.entry_number+1, 1); if (error < 0) @@ -240,7 +238,7 @@ install: entry_1, entry_2); out_unlock: - up(&mm->context.sem); + mutex_unlock(&mm->context.lock); out: return error; } --- head-2011-03-17.orig/arch/x86/kernel/ldt_64-xen.c 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/ldt_64-xen.c 2011-01-31 17:56:27.000000000 +0100 @@ -1,6 +1,4 @@ /* - * linux/arch/x86_64/kernel/ldt.c - * * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds * Copyright (C) 1999 Ingo Molnar * Copyright (C) 2002 Andi Kleen @@ -112,19 +110,14 @@ int init_new_context(struct task_struct int retval = 0; memset(&mm->context, 0, sizeof(mm->context)); - init_MUTEX(&mm->context.sem); + mutex_init(&mm->context.lock); old_mm = current->mm; if (old_mm) mm->context.vdso = old_mm->context.vdso; if (old_mm && old_mm->context.size > 0) { - down(&old_mm->context.sem); + mutex_lock(&old_mm->context.lock); retval = copy_ldt(&mm->context, &old_mm->context); - up(&old_mm->context.sem); - } - if (retval == 0) { - spin_lock(&mm_unpinned_lock); - list_add(&mm->context.unpinned, &mm_unpinned); - spin_unlock(&mm_unpinned_lock); + mutex_unlock(&old_mm->context.lock); } return retval; } @@ -148,11 +141,6 @@ void destroy_context(struct mm_struct *m kfree(mm->context.ldt); mm->context.size = 0; } - if (!PagePinned(virt_to_page(mm->pgd))) { - spin_lock(&mm_unpinned_lock); - list_del(&mm->context.unpinned); - spin_unlock(&mm_unpinned_lock); - } } static int read_ldt(void __user * ptr, unsigned long bytecount) @@ -166,7 +154,7 @@ static int read_ldt(void __user * ptr, u if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES) bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES; - down(&mm->context.sem); + mutex_lock(&mm->context.lock); size = mm->context.size*LDT_ENTRY_SIZE; if (size > bytecount) size = bytecount; @@ -174,7 +162,7 @@ static int read_ldt(void __user * ptr, u err = 0; if (copy_to_user(ptr, mm->context.ldt, size)) err = -EFAULT; - up(&mm->context.sem); + mutex_unlock(&mm->context.lock); if (err < 0) goto error_return; if (size != bytecount) { @@ -227,7 +215,7 @@ static int write_ldt(void __user * ptr, goto out; } - down(&mm->context.sem); + mutex_lock(&mm->context.lock); if (ldt_info.entry_number >= (unsigned)mm->context.size) { error = alloc_ldt(¤t->mm->context, ldt_info.entry_number+1, 1); if (error < 0) @@ -256,7 +244,7 @@ install: error = HYPERVISOR_update_descriptor(mach_lp, (unsigned long)((entry_1 | (unsigned long) entry_2 << 32))); out_unlock: - up(&mm->context.sem); + mutex_unlock(&mm->context.lock); out: return error; } --- head-2011-03-17.orig/arch/x86/kernel/mpparse_32-xen.c 2011-01-31 17:32:29.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/mpparse_32-xen.c 2011-01-31 17:56:27.000000000 +0100 @@ -65,8 +65,10 @@ unsigned long mp_lapic_addr; unsigned int def_to_bigsmp = 0; +#ifndef CONFIG_XEN /* Processor that is doing the boot up */ unsigned int boot_cpu_physical_apicid = -1U; +#endif /* Internal processor count */ unsigned int __cpuinitdata num_processors; @@ -842,6 +844,7 @@ void __init mp_register_lapic_address(u6 void __cpuinit mp_register_lapic (u8 id, u8 enabled) { struct mpc_config_processor processor; +#ifndef CONFIG_XEN int boot_cpu = 0; if (MAX_APICS - id <= 0) { @@ -853,7 +856,6 @@ void __cpuinit mp_register_lapic (u8 id, if (id == boot_cpu_physical_apicid) boot_cpu = 1; -#ifndef CONFIG_XEN processor.mpc_type = MP_PROCESSOR; processor.mpc_apicid = id; processor.mpc_apicver = GET_APIC_VERSION(apic_read(APIC_LVR)); @@ -921,11 +923,11 @@ void __init mp_register_ioapic(u8 id, u3 #ifndef CONFIG_XEN set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); -#endif if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && !APIC_XAPIC(apic_version[boot_cpu_physical_apicid])) tmpid = io_apic_get_unique_id(idx, id); else +#endif tmpid = id; if (tmpid == -1) { nr_ioapics--; @@ -1023,7 +1025,7 @@ void __init mp_config_acpi_legacy_irqs ( /* * Use the default configuration for the IRQs 0-15. Unless - * overriden by (MADT) interrupt source override entries. + * overridden by (MADT) interrupt source override entries. */ for (i = 0; i < 16; i++) { int idx; --- head-2011-03-17.orig/arch/x86/kernel/mpparse_64-xen.c 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/mpparse_64-xen.c 2011-01-31 17:56:27.000000000 +0100 @@ -54,9 +54,12 @@ int nr_ioapics; unsigned long mp_lapic_addr = 0; - +#ifndef CONFIG_XEN /* Processor that is doing the boot up */ unsigned int boot_cpu_id = -1U; +EXPORT_SYMBOL(boot_cpu_id); +#endif + /* Internal processor count */ unsigned int num_processors __cpuinitdata = 0; @@ -87,7 +90,7 @@ static int __init mpf_checksum(unsigned } #ifndef CONFIG_XEN -static void __cpuinit MP_processor_info (struct mpc_config_processor *m) +static void __cpuinit MP_processor_info(struct mpc_config_processor *m) { int cpu; cpumask_t tmp_map; @@ -124,13 +127,24 @@ static void __cpuinit MP_processor_info cpu = 0; } bios_cpu_apicid[cpu] = m->mpc_apicid; - x86_cpu_to_apicid[cpu] = m->mpc_apicid; + /* + * We get called early in the the start_kernel initialization + * process when the per_cpu data area is not yet setup, so we + * use a static array that is removed after the per_cpu data + * area is created. + */ + if (x86_cpu_to_apicid_ptr) { + u8 *x86_cpu_to_apicid = (u8 *)x86_cpu_to_apicid_ptr; + x86_cpu_to_apicid[cpu] = m->mpc_apicid; + } else { + per_cpu(x86_cpu_to_apicid, cpu) = m->mpc_apicid; + } cpu_set(cpu, cpu_possible_map); cpu_set(cpu, cpu_present_map); } #else -static void __cpuinit MP_processor_info (struct mpc_config_processor *m) +static void __cpuinit MP_processor_info(struct mpc_config_processor *m) { num_processors++; } @@ -611,12 +625,12 @@ void __init mp_register_lapic_address(u6 void __cpuinit mp_register_lapic (u8 id, u8 enabled) { struct mpc_config_processor processor; +#ifndef CONFIG_XEN int boot_cpu = 0; if (id == boot_cpu_id) boot_cpu = 1; -#ifndef CONFIG_XEN processor.mpc_type = MP_PROCESSOR; processor.mpc_apicid = id; processor.mpc_apicver = 0; --- head-2011-03-17.orig/arch/x86/kernel/pci-dma-xen.c 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/pci-dma-xen.c 2011-01-31 17:56:27.000000000 +0100 @@ -13,14 +13,13 @@ #include #include #include -#include #include #include #include #include #include -#include -#include +#include +#include #include #ifdef __x86_64__ @@ -106,27 +105,29 @@ int range_straddles_page_boundary(paddr_ } int -dma_map_sg(struct device *hwdev, struct scatterlist *sg, int nents, +dma_map_sg(struct device *hwdev, struct scatterlist *sgl, int nents, enum dma_data_direction direction) { int i, rc; BUG_ON(!valid_dma_direction(direction)); - WARN_ON(nents == 0 || sg[0].length == 0); + WARN_ON(nents == 0 || sgl->length == 0); if (swiotlb) { - rc = swiotlb_map_sg(hwdev, sg, nents, direction); + rc = swiotlb_map_sg(hwdev, sgl, nents, direction); } else { - for (i = 0; i < nents; i++ ) { - BUG_ON(!sg[i].page); - sg[i].dma_address = - gnttab_dma_map_page(sg[i].page) + sg[i].offset; - sg[i].dma_length = sg[i].length; + struct scatterlist *sg; + + for_each_sg(sgl, sg, nents, i) { + BUG_ON(!sg_page(sg)); + sg->dma_address = + gnttab_dma_map_page(sg_page(sg)) + sg->offset; + sg->dma_length = sg->length; IOMMU_BUG_ON(address_needs_mapping( - hwdev, sg[i].dma_address)); + hwdev, sg->dma_address)); IOMMU_BUG_ON(range_straddles_page_boundary( - page_to_pseudophys(sg[i].page) + sg[i].offset, - sg[i].length)); + page_to_pseudophys(sg_page(sg)) + sg->offset, + sg->length)); } rc = nents; } @@ -137,17 +138,19 @@ dma_map_sg(struct device *hwdev, struct EXPORT_SYMBOL(dma_map_sg); void -dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents, +dma_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nents, enum dma_data_direction direction) { int i; BUG_ON(!valid_dma_direction(direction)); if (swiotlb) - swiotlb_unmap_sg(hwdev, sg, nents, direction); + swiotlb_unmap_sg(hwdev, sgl, nents, direction); else { - for (i = 0; i < nents; i++ ) - gnttab_dma_unmap_page(sg[i].dma_address); + struct scatterlist *sg; + + for_each_sg(sgl, sg, nents, i) + gnttab_dma_unmap_page(sg->dma_address); } } EXPORT_SYMBOL(dma_unmap_sg); @@ -258,7 +261,8 @@ void dma_free_coherent(struct device *de { struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; int order = get_order(size); - + + WARN_ON(irqs_disabled()); /* for portability */ if (mem && vaddr >= mem->virt_base && vaddr < (mem->virt_base + (mem->size << PAGE_SHIFT))) { int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; --- head-2011-03-17.orig/arch/x86/kernel/process_32-xen.c 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/process_32-xen.c 2011-01-31 17:56:27.000000000 +0100 @@ -1,6 +1,4 @@ /* - * linux/arch/i386/kernel/process.c - * * Copyright (C) 1995 Linus Torvalds * * Pentium III FXSR, SSE support @@ -190,6 +188,10 @@ void cpu_idle(void) } } +static void do_nothing(void *unused) +{ +} + void cpu_idle_wait(void) { unsigned int cpu, this_cpu = get_cpu(); @@ -214,13 +216,20 @@ void cpu_idle_wait(void) cpu_clear(cpu, map); } cpus_and(map, map, cpu_online_map); + /* + * We waited 1 sec, if a CPU still did not call idle + * it may be because it is in idle and not waking up + * because it has nothing to do. + * Give all the remaining CPUS a kick. + */ + smp_call_function_mask(map, do_nothing, 0, 0); } while (!cpus_empty(map)); set_cpus_allowed(current, tmp); } EXPORT_SYMBOL_GPL(cpu_idle_wait); -void __devinit select_idle_routine(const struct cpuinfo_x86 *c) +void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) { } @@ -238,34 +247,52 @@ static int __init idle_setup(char *str) } early_param("idle", idle_setup); -void show_regs(struct pt_regs * regs) +void __show_registers(struct pt_regs *regs, int all) { unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; unsigned long d0, d1, d2, d3, d6, d7; + unsigned long esp; + unsigned short ss, gs; + + if (user_mode_vm(regs)) { + esp = regs->esp; + ss = regs->xss & 0xffff; + savesegment(gs, gs); + } else { + esp = (unsigned long) (®s->esp); + savesegment(ss, ss); + savesegment(gs, gs); + } printk("\n"); - printk("Pid: %d, comm: %20s\n", current->pid, current->comm); - printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id()); + printk("Pid: %d, comm: %s %s (%s %.*s)\n", + task_pid_nr(current), current->comm, + print_tainted(), init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), + init_utsname()->version); + + printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n", + 0xffff & regs->xcs, regs->eip, regs->eflags, + smp_processor_id()); print_symbol("EIP is at %s\n", regs->eip); - if (user_mode_vm(regs)) - printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp); - printk(" EFLAGS: %08lx %s (%s %.*s)\n", - regs->eflags, print_tainted(), init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", - regs->eax,regs->ebx,regs->ecx,regs->edx); - printk("ESI: %08lx EDI: %08lx EBP: %08lx", - regs->esi, regs->edi, regs->ebp); - printk(" DS: %04x ES: %04x FS: %04x\n", - 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xfs); + regs->eax, regs->ebx, regs->ecx, regs->edx); + printk("ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n", + regs->esi, regs->edi, regs->ebp, esp); + printk(" DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n", + regs->xds & 0xffff, regs->xes & 0xffff, + regs->xfs & 0xffff, gs, ss); + + if (!all) + return; cr0 = read_cr0(); cr2 = read_cr2(); cr3 = read_cr3(); cr4 = read_cr4_safe(); - printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4); + printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", + cr0, cr2, cr3, cr4); get_debugreg(d0, 0); get_debugreg(d1, 1); @@ -273,10 +300,16 @@ void show_regs(struct pt_regs * regs) get_debugreg(d3, 3); printk("DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n", d0, d1, d2, d3); + get_debugreg(d6, 6); get_debugreg(d7, 7); - printk("DR6: %08lx DR7: %08lx\n", d6, d7); + printk("DR6: %08lx DR7: %08lx\n", + d6, d7); +} +void show_regs(struct pt_regs *regs) +{ + __show_registers(regs, 1); show_trace(NULL, regs, ®s->esp); } --- head-2011-03-17.orig/arch/x86/kernel/process_64-xen.c 2011-02-02 08:31:50.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/process_64-xen.c 2011-02-02 08:32:00.000000000 +0100 @@ -1,6 +1,4 @@ /* - * linux/arch/x86-64/kernel/process.c - * * Copyright (C) 1995 Linus Torvalds * * Pentium III FXSR, SSE support @@ -41,6 +39,7 @@ #include #include #include +#include #include #include @@ -171,6 +170,9 @@ void cpu_idle (void) if (__get_cpu_var(cpu_idle_state)) __get_cpu_var(cpu_idle_state) = 0; + + tick_nohz_stop_sched_tick(); + rmb(); idle = xen_idle; /* no alternatives */ if (cpu_is_offline(smp_processor_id())) @@ -189,12 +191,17 @@ void cpu_idle (void) __exit_idle(); } + tick_nohz_restart_sched_tick(); preempt_enable_no_resched(); schedule(); preempt_disable(); } } +static void do_nothing(void *unused) +{ +} + void cpu_idle_wait(void) { unsigned int cpu, this_cpu = get_cpu(); @@ -220,6 +227,13 @@ void cpu_idle_wait(void) cpu_clear(cpu, map); } cpus_and(map, map, cpu_online_map); + /* + * We waited 1 sec, if a CPU still did not call idle + * it may be because it is in idle and not waking up + * because it has nothing to do. + * Give all the remaining CPUS a kick. + */ + smp_call_function_mask(map, do_nothing, 0, 0); } while (!cpus_empty(map)); set_cpus_allowed(current, tmp); @@ -527,7 +541,7 @@ static inline void __switch_to_xtra(stru * * Kprobes not supported here. Set the probe on schedule instead. */ -__kprobes struct task_struct * +struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev = &prev_p->thread, --- head-2011-03-17.orig/arch/x86/kernel/quirks.c 2011-02-28 15:04:15.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/quirks.c 2011-02-28 15:11:55.000000000 +0100 @@ -4,8 +4,6 @@ #include #include -#include - #if defined(CONFIG_X86_IO_APIC) && (defined(CONFIG_SMP) || defined(CONFIG_XEN)) && defined(CONFIG_PCI) static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) @@ -65,6 +63,8 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_IN #endif #if defined(CONFIG_HPET_TIMER) +#include + unsigned long force_hpet_address; static enum { --- head-2011-03-17.orig/arch/x86/kernel/setup64-xen.c 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/setup64-xen.c 2011-01-31 17:56:27.000000000 +0100 @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -27,11 +26,12 @@ #include #include #include +#include #ifdef CONFIG_XEN #include #endif -char x86_boot_params[BOOT_PARAM_SIZE] __initdata; +struct boot_params __initdata boot_params; cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; @@ -159,8 +159,8 @@ static void switch_pt(void) static void __cpuinit cpu_gdt_init(const struct desc_ptr *gdt_descr) { - asm volatile("lgdt %0" :: "m" (*gdt_descr)); - asm volatile("lidt %0" :: "m" (idt_descr)); + load_gdt(gdt_descr); + load_idt(idt_descr); } #endif @@ -252,6 +252,14 @@ void __cpuinit check_efer(void) unsigned long kernel_eflags; +#ifndef CONFIG_X86_NO_TSS +/* + * Copies of the original ist values from the tss are only accessed during + * debugging, no special alignment required. + */ +DEFINE_PER_CPU(struct orig_ist, orig_ist); +#endif + /* * cpu_init() initializes state that is per-CPU. Some data is already * initialized (naturally) in the bootstrap process, such as the GDT --- head-2011-03-17.orig/arch/x86/kernel/setup_32-xen.c 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/setup_32-xen.c 2011-01-31 17:56:27.000000000 +0100 @@ -1,6 +1,4 @@ /* - * linux/arch/i386/kernel/setup.c - * * Copyright (C) 1995 Linus Torvalds * * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 @@ -70,6 +68,7 @@ #include #include #include +#include #ifdef CONFIG_XEN #include @@ -80,13 +79,14 @@ static struct notifier_block xen_panic_b xen_panic_event, NULL, 0 /* try to go last */ }; -int disable_pse __devinitdata = 0; +int disable_pse __cpuinitdata = 0; /* * Machine setup.. */ extern struct resource code_resource; extern struct resource data_resource; +extern struct resource bss_resource; /* cpu data as detected by the assembly code in head.S */ struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; @@ -98,9 +98,6 @@ unsigned long mmu_cr4_features; /* for MCA, but anyone else can use it if they want */ unsigned int machine_id; -#ifdef CONFIG_MCA -EXPORT_SYMBOL(machine_id); -#endif unsigned int machine_submodel_id; unsigned int BIOS_revision; unsigned int mca_pentium_flag; @@ -121,7 +118,7 @@ EXPORT_SYMBOL(apm_info); struct edid_info edid_info; EXPORT_SYMBOL_GPL(edid_info); #ifndef CONFIG_XEN -#define copy_edid() (edid_info = EDID_INFO) +#define copy_edid() (edid_info = boot_params.edid_info) #endif struct ist_info ist_info; #if defined(CONFIG_X86_SPEEDSTEP_SMI) || \ @@ -170,10 +167,11 @@ EXPORT_SYMBOL(edd); */ static inline void copy_edd(void) { - memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature)); - memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info)); - edd.mbr_signature_nr = EDD_MBR_SIG_NR; - edd.edd_info_nr = EDD_NR; + memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer, + sizeof(edd.mbr_signature)); + memcpy(edd.edd_info, boot_params.eddbuf, sizeof(edd.edd_info)); + edd.mbr_signature_nr = boot_params.edd_mbr_sig_buf_entries; + edd.edd_info_nr = boot_params.eddbuf_entries; } #endif #else @@ -418,6 +416,53 @@ extern unsigned long __init setup_memory extern void zone_sizes_init(void); #endif /* !CONFIG_NEED_MULTIPLE_NODES */ +static inline unsigned long long get_total_mem(void) +{ + unsigned long long total; + + total = max_low_pfn - min_low_pfn; +#ifdef CONFIG_HIGHMEM + total += highend_pfn - highstart_pfn; +#endif + + return total << PAGE_SHIFT; +} + +#ifdef CONFIG_KEXEC +#ifndef CONFIG_XEN +static void __init reserve_crashkernel(void) +{ + unsigned long long total_mem; + unsigned long long crash_size, crash_base; + int ret; + + total_mem = get_total_mem(); + + ret = parse_crashkernel(boot_command_line, total_mem, + &crash_size, &crash_base); + if (ret == 0 && crash_size > 0) { + if (crash_base > 0) { + printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " + "for crashkernel (System RAM: %ldMB)\n", + (unsigned long)(crash_size >> 20), + (unsigned long)(crash_base >> 20), + (unsigned long)(total_mem >> 20)); + crashk_res.start = crash_base; + crashk_res.end = crash_base + crash_size - 1; + reserve_bootmem(crash_base, crash_size); + } else + printk(KERN_INFO "crashkernel reservation failed - " + "you have to specify a base address\n"); + } +} +#else +#define reserve_crashkernel xen_machine_kexec_setup_resources +#endif +#else +static inline void __init reserve_crashkernel(void) +{} +#endif + void __init setup_bootmem_allocator(void) { unsigned long bootmap_size; @@ -473,30 +518,25 @@ void __init setup_bootmem_allocator(void #ifdef CONFIG_BLK_DEV_INITRD if (xen_start_info->mod_start) { - if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) { - /*reserve_bootmem(INITRD_START, INITRD_SIZE);*/ - initrd_start = INITRD_START + PAGE_OFFSET; - initrd_end = initrd_start+INITRD_SIZE; + unsigned long ramdisk_image = __pa(xen_start_info->mod_start); + unsigned long ramdisk_size = xen_start_info->mod_len; + unsigned long ramdisk_end = ramdisk_image + ramdisk_size; + unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT; + + if (ramdisk_end <= end_of_lowmem) { + /*reserve_bootmem(ramdisk_image, ramdisk_size);*/ + initrd_start = ramdisk_image + PAGE_OFFSET; + initrd_end = initrd_start+ramdisk_size; initrd_below_start_ok = 1; - } - else { + } else { printk(KERN_ERR "initrd extends beyond end of memory " - "(0x%08lx > 0x%08lx)\ndisabling initrd\n", - INITRD_START + INITRD_SIZE, - max_low_pfn << PAGE_SHIFT); + "(0x%08lx > 0x%08lx)\ndisabling initrd\n", + ramdisk_end, end_of_lowmem); initrd_start = 0; } } #endif -#ifdef CONFIG_KEXEC -#ifdef CONFIG_XEN - xen_machine_kexec_setup_resources(); -#else - if (crashk_res.start != crashk_res.end) - reserve_bootmem(crashk_res.start, - crashk_res.end - crashk_res.start + 1); -#endif -#endif + reserve_crashkernel(); } /* @@ -574,7 +614,8 @@ void __init setup_arch(char **cmdline_p) * the system table is valid. If not, then initialize normally. */ #ifdef CONFIG_EFI - if ((LOADER_TYPE == 0x50) && EFI_SYSTAB) + if ((boot_params.hdr.type_of_loader == 0x50) && + boot_params.efi_info.efi_systab) efi_enabled = 1; #endif @@ -582,18 +623,18 @@ void __init setup_arch(char **cmdline_p) properly. Setting ROOT_DEV to default to /dev/ram0 breaks initrd. */ ROOT_DEV = MKDEV(UNNAMED_MAJOR,0); - screen_info = SCREEN_INFO; + screen_info = boot_params.screen_info; copy_edid(); - apm_info.bios = APM_BIOS_INFO; - ist_info = IST_INFO; - saved_videomode = VIDEO_MODE; - if( SYS_DESC_TABLE.length != 0 ) { - set_mca_bus(SYS_DESC_TABLE.table[3] & 0x2); - machine_id = SYS_DESC_TABLE.table[0]; - machine_submodel_id = SYS_DESC_TABLE.table[1]; - BIOS_revision = SYS_DESC_TABLE.table[2]; + apm_info.bios = boot_params.apm_bios_info; + ist_info = boot_params.ist_info; + saved_videomode = boot_params.hdr.vid_mode; + if( boot_params.sys_desc_table.length != 0 ) { + set_mca_bus(boot_params.sys_desc_table.table[3] & 0x2); + machine_id = boot_params.sys_desc_table.table[0]; + machine_submodel_id = boot_params.sys_desc_table.table[1]; + BIOS_revision = boot_params.sys_desc_table.table[2]; } - bootloader_type = LOADER_TYPE; + bootloader_type = boot_params.hdr.type_of_loader; if (is_initial_xendomain()) { const struct dom0_vga_console_info *info = @@ -608,9 +649,9 @@ void __init setup_arch(char **cmdline_p) screen_info.orig_video_isVGA = 0; #ifdef CONFIG_BLK_DEV_RAM - rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK; - rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0); - rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0); + rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK; + rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0); + rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0); #endif ARCH_SETUP @@ -623,7 +664,7 @@ void __init setup_arch(char **cmdline_p) copy_edd(); - if (!MOUNT_ROOT_RDONLY) + if (!boot_params.hdr.root_flags) root_mountflags &= ~MS_RDONLY; init_mm.start_code = (unsigned long) _text; init_mm.end_code = (unsigned long) _etext; @@ -635,6 +676,8 @@ void __init setup_arch(char **cmdline_p) code_resource.end = virt_to_phys(_etext)-1; data_resource.start = virt_to_phys(_etext); data_resource.end = virt_to_phys(_edata)-1; + bss_resource.start = virt_to_phys(&__bss_start); + bss_resource.end = virt_to_phys(&__bss_stop)-1; if ((i = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE) i = COMMAND_LINE_SIZE; @@ -663,7 +706,7 @@ void __init setup_arch(char **cmdline_p) /* * NOTE: before this point _nobody_ is allowed to allocate * any memory using the bootmem allocator. Although the - * alloctor is now initialised only the first 8Mb of the kernel + * allocator is now initialised only the first 8Mb of the kernel * virtual address space has been mapped. All allocations before * paging_init() has completed must use the alloc_bootmem_low_pages() * variant (which allocates DMA'able memory) and care must be taken @@ -786,10 +829,8 @@ void __init setup_arch(char **cmdline_p) acpi_boot_table_init(); #endif -#ifdef CONFIG_PCI -#ifdef CONFIG_X86_IO_APIC - check_acpi_pci(); /* Checks more than just ACPI actually */ -#endif +#if defined(CONFIG_PCI) && !defined(CONFIG_XEN) + early_quirks(); #endif #ifdef CONFIG_ACPI --- head-2011-03-17.orig/arch/x86/kernel/setup_64-xen.c 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/setup_64-xen.c 2011-01-31 17:56:27.000000000 +0100 @@ -1,10 +1,5 @@ /* - * linux/arch/x86-64/kernel/setup.c - * * Copyright (C) 1995 Linus Torvalds - * - * Nov 2001 Dave Jones - * Forked from i386 setup code. */ /* @@ -57,13 +52,13 @@ #include #include #include -#include #include #include #include #include #include #include +#include #ifdef CONFIG_XEN #include #include @@ -180,6 +175,12 @@ struct resource code_resource = { .end = 0, .flags = IORESOURCE_RAM, }; +struct resource bss_resource = { + .name = "Kernel bss", + .start = 0, + .end = 0, + .flags = IORESOURCE_RAM, +}; #ifdef CONFIG_PROC_VMCORE /* elfcorehdr= specifies the location of elf core header @@ -232,10 +233,11 @@ EXPORT_SYMBOL(edd); */ static inline void copy_edd(void) { - memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature)); - memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info)); - edd.mbr_signature_nr = EDD_MBR_SIG_NR; - edd.edd_info_nr = EDD_NR; + memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer, + sizeof(edd.mbr_signature)); + memcpy(edd.edd_info, boot_params.eddbuf, sizeof(edd.edd_info)); + edd.mbr_signature_nr = boot_params.edd_mbr_sig_buf_entries; + edd.edd_info_nr = boot_params.eddbuf_entries; } #endif #else @@ -244,6 +246,41 @@ static inline void copy_edd(void) } #endif +#ifdef CONFIG_KEXEC +#ifndef CONFIG_XEN +static void __init reserve_crashkernel(void) +{ + unsigned long long free_mem; + unsigned long long crash_size, crash_base; + int ret; + + free_mem = ((unsigned long long)max_low_pfn - min_low_pfn) << PAGE_SHIFT; + + ret = parse_crashkernel(boot_command_line, free_mem, + &crash_size, &crash_base); + if (ret == 0 && crash_size) { + if (crash_base > 0) { + printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " + "for crashkernel (System RAM: %ldMB)\n", + (unsigned long)(crash_size >> 20), + (unsigned long)(crash_base >> 20), + (unsigned long)(free_mem >> 20)); + crashk_res.start = crash_base; + crashk_res.end = crash_base + crash_size - 1; + reserve_bootmem(crash_base, crash_size); + } else + printk(KERN_INFO "crashkernel reservation failed - " + "you have to specify a base address\n"); + } +} +#else +#define reserve_crashkernel xen_machine_kexec_setup_resources +#endif +#else +static inline void __init reserve_crashkernel(void) +{} +#endif + #ifndef CONFIG_XEN #define EBDA_ADDR_POINTER 0x40E @@ -284,7 +321,7 @@ void __init setup_arch(char **cmdline_p) atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block); ROOT_DEV = MKDEV(RAMDISK_MAJOR,0); - screen_info = SCREEN_INFO; + screen_info = boot_params.screen_info; if (is_initial_xendomain()) { const struct dom0_vga_console_info *info = @@ -307,22 +344,22 @@ void __init setup_arch(char **cmdline_p) #else printk(KERN_INFO "Command line: %s\n", boot_command_line); - ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV); - screen_info = SCREEN_INFO; - edid_info = EDID_INFO; + ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev); + screen_info = boot_params.screen_info; + edid_info = boot_params.edid_info; #endif /* !CONFIG_XEN */ - saved_video_mode = SAVED_VIDEO_MODE; - bootloader_type = LOADER_TYPE; + saved_video_mode = boot_params.hdr.vid_mode; + bootloader_type = boot_params.hdr.type_of_loader; #ifdef CONFIG_BLK_DEV_RAM - rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK; - rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0); - rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0); + rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK; + rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0); + rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0); #endif setup_memory_region(); copy_edd(); - if (!MOUNT_ROOT_RDONLY) + if (!boot_params.hdr.root_flags) root_mountflags &= ~MS_RDONLY; init_mm.start_code = (unsigned long) &_text; init_mm.end_code = (unsigned long) &_etext; @@ -333,6 +370,8 @@ void __init setup_arch(char **cmdline_p) code_resource.end = virt_to_phys(&_etext)-1; data_resource.start = virt_to_phys(&_etext); data_resource.end = virt_to_phys(&_edata)-1; + bss_resource.start = virt_to_phys(&__bss_start); + bss_resource.end = virt_to_phys(&__bss_stop)-1; early_identify_cpu(&boot_cpu_data); @@ -360,6 +399,11 @@ void __init setup_arch(char **cmdline_p) if (is_initial_xendomain()) dmi_scan_machine(); +#if defined(CONFIG_SMP) && !defined(CONFIG_XEN) + /* setup to use the static apicid table during kernel startup */ + x86_cpu_to_apicid_ptr = (void *)&x86_cpu_to_apicid_init; +#endif + /* How many end-of-memory variables you have, grandma! */ max_low_pfn = end_pfn; max_pfn = end_pfn; @@ -424,52 +468,37 @@ void __init setup_arch(char **cmdline_p) */ acpi_reserve_bootmem(); #endif -#ifdef CONFIG_XEN #ifdef CONFIG_BLK_DEV_INITRD +#ifdef CONFIG_XEN if (xen_start_info->mod_start) { - if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) { - /*reserve_bootmem_generic(INITRD_START, INITRD_SIZE);*/ - initrd_start = INITRD_START + PAGE_OFFSET; - initrd_end = initrd_start+INITRD_SIZE; + unsigned long ramdisk_image = __pa(xen_start_info->mod_start); + unsigned long ramdisk_size = xen_start_info->mod_len; +#else + if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { + unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; + unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; +#endif + unsigned long ramdisk_end = ramdisk_image + ramdisk_size; + unsigned long end_of_mem = end_pfn << PAGE_SHIFT; + + if (ramdisk_end <= end_of_mem) { +#ifndef CONFIG_XEN + reserve_bootmem_generic(ramdisk_image, ramdisk_size); +#endif + initrd_start = ramdisk_image + PAGE_OFFSET; + initrd_end = initrd_start+ramdisk_size; +#ifdef CONFIG_XEN initrd_below_start_ok = 1; - } else { - printk(KERN_ERR "initrd extends beyond end of memory " - "(0x%08lx > 0x%08lx)\ndisabling initrd\n", - (unsigned long)(INITRD_START + INITRD_SIZE), - (unsigned long)(end_pfn << PAGE_SHIFT)); - initrd_start = 0; - } - } #endif -#else /* CONFIG_XEN */ -#ifdef CONFIG_BLK_DEV_INITRD - if (LOADER_TYPE && INITRD_START) { - if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) { - reserve_bootmem_generic(INITRD_START, INITRD_SIZE); - initrd_start = INITRD_START + PAGE_OFFSET; - initrd_end = initrd_start+INITRD_SIZE; - } - else { + } else { printk(KERN_ERR "initrd extends beyond end of memory " - "(0x%08lx > 0x%08lx)\ndisabling initrd\n", - (unsigned long)(INITRD_START + INITRD_SIZE), - (unsigned long)(end_pfn << PAGE_SHIFT)); + "(0x%08lx > 0x%08lx)\ndisabling initrd\n", + ramdisk_end, end_of_mem); initrd_start = 0; } } #endif -#endif /* !CONFIG_XEN */ -#ifdef CONFIG_KEXEC -#ifdef CONFIG_XEN - xen_machine_kexec_setup_resources(); -#else - if (crashk_res.start != crashk_res.end) { - reserve_bootmem_generic(crashk_res.start, - crashk_res.end - crashk_res.start + 1); - } -#endif -#endif - + reserve_crashkernel(); paging_init(); #ifdef CONFIG_X86_LOCAL_APIC /* @@ -784,7 +813,7 @@ static void __init amd_detect_cmp(struct but in the same order as the HT nodeids. If that doesn't result in a usable node fall back to the path for the previous case. */ - int ht_nodeid = apicid - (cpu_data[0].phys_proc_id << bits); + int ht_nodeid = apicid - (cpu_data(0).phys_proc_id << bits); if (ht_nodeid >= 0 && apicid_to_node[ht_nodeid] != NUMA_NO_NODE) node = apicid_to_node[ht_nodeid]; @@ -799,6 +828,39 @@ static void __init amd_detect_cmp(struct #endif } +#define ENABLE_C1E_MASK 0x18000000 +#define CPUID_PROCESSOR_SIGNATURE 1 +#define CPUID_XFAM 0x0ff00000 +#define CPUID_XFAM_K8 0x00000000 +#define CPUID_XFAM_10H 0x00100000 +#define CPUID_XFAM_11H 0x00200000 +#define CPUID_XMOD 0x000f0000 +#define CPUID_XMOD_REV_F 0x00040000 + +#ifndef CONFIG_XEN +/* AMD systems with C1E don't have a working lAPIC timer. Check for that. */ +static __cpuinit int amd_apic_timer_broken(void) +{ + u32 lo, hi; + u32 eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); + switch (eax & CPUID_XFAM) { + case CPUID_XFAM_K8: + if ((eax & CPUID_XMOD) < CPUID_XMOD_REV_F) + break; + case CPUID_XFAM_10H: + case CPUID_XFAM_11H: + rdmsr(MSR_K8_ENABLE_C1E, lo, hi); + if (lo & ENABLE_C1E_MASK) + return 1; + break; + default: + /* err on the side of caution */ + return 1; + } + return 0; +} +#endif + static void __cpuinit init_amd(struct cpuinfo_x86 *c) { unsigned level; @@ -828,7 +890,7 @@ static void __cpuinit init_amd(struct cp level = cpuid_eax(1); if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)) set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability); - if (c->x86 == 0x10) + if (c->x86 == 0x10 || c->x86 == 0x11) set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability); /* Enable workaround for FXSAVE leak */ @@ -870,6 +932,11 @@ static void __cpuinit init_amd(struct cp /* Family 10 doesn't support C states in MWAIT so don't use it */ if (c->x86 == 0x10 && !force_mwait) clear_bit(X86_FEATURE_MWAIT, &c->x86_capability); + +#ifndef CONFIG_XEN + if (amd_apic_timer_broken()) + disable_apic_timer = 1; +#endif } static void __cpuinit detect_ht(struct cpuinfo_x86 *c) @@ -919,6 +986,7 @@ out: #endif } +#ifndef CONFIG_XEN /* * find out the number of processor cores on the die */ @@ -936,6 +1004,7 @@ static int __cpuinit intel_num_cpu_cores else return 1; } +#endif static void srat_detect_node(void) { @@ -1000,7 +1069,9 @@ static void __cpuinit init_intel(struct set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); else clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); +#ifndef CONFIG_XEN c->x86_max_cores = intel_num_cpu_cores(c); +#endif srat_detect_node(); } @@ -1038,7 +1109,9 @@ void __cpuinit early_identify_cpu(struct c->x86_model_id[0] = '\0'; /* Unset */ c->x86_clflush_size = 64; c->x86_cache_alignment = c->x86_clflush_size; +#ifndef CONFIG_XEN c->x86_max_cores = 1; +#endif c->extended_cpuid_level = 0; memset(&c->x86_capability, 0, sizeof c->x86_capability); @@ -1182,6 +1255,7 @@ void __cpuinit print_cpu_info(struct cpu static int show_cpuinfo(struct seq_file *m, void *v) { struct cpuinfo_x86 *c = v; + int cpu = 0; /* * These flag bits must match the definitions in . @@ -1191,7 +1265,7 @@ static int show_cpuinfo(struct seq_file * applications want to get the raw CPUID data, they should access * /dev/cpu//cpuid instead. */ - static char *x86_cap_flags[] = { + static const char *const x86_cap_flags[] = { /* Intel-defined */ "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce", "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov", @@ -1222,7 +1296,7 @@ static int show_cpuinfo(struct seq_file /* Intel-defined (#2) */ "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, - NULL, NULL, "dca", NULL, NULL, NULL, NULL, "popcnt", + NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* VIA/Cyrix/Centaur-defined */ @@ -1232,10 +1306,10 @@ static int show_cpuinfo(struct seq_file NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* AMD-defined (#2) */ - "lahf_lm", "cmp_legacy", "svm", "extapic", "cr8_legacy", - "altmovcr8", "abm", "sse4a", - "misalignsse", "3dnowprefetch", - "osvw", "ibs", NULL, NULL, NULL, NULL, + "lahf_lm", "cmp_legacy", "svm", "extapic", + "cr8_legacy", "abm", "sse4a", "misalignsse", + "3dnowprefetch", "osvw", "ibs", "sse5", + "skinit", "wdt", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -1245,7 +1319,7 @@ static int show_cpuinfo(struct seq_file NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, }; - static char *x86_power_flags[] = { + static const char *const x86_power_flags[] = { "ts", /* temperature sensor */ "fid", /* frequency id control */ "vid", /* voltage id control */ @@ -1260,8 +1334,7 @@ static int show_cpuinfo(struct seq_file #ifdef CONFIG_SMP - if (!cpu_online(c-cpu_data)) - return 0; + cpu = c->cpu_index; #endif seq_printf(m,"processor\t: %u\n" @@ -1269,7 +1342,7 @@ static int show_cpuinfo(struct seq_file "cpu family\t: %d\n" "model\t\t: %d\n" "model name\t: %s\n", - (unsigned)(c-cpu_data), + (unsigned)cpu, c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown", c->x86, (int)c->x86_model, @@ -1281,7 +1354,7 @@ static int show_cpuinfo(struct seq_file seq_printf(m, "stepping\t: unknown\n"); if (cpu_has(c,X86_FEATURE_TSC)) { - unsigned int freq = cpufreq_quick_get((unsigned)(c-cpu_data)); + unsigned int freq = cpufreq_quick_get((unsigned)cpu); if (!freq) freq = cpu_khz; seq_printf(m, "cpu MHz\t\t: %u.%03u\n", @@ -1294,9 +1367,9 @@ static int show_cpuinfo(struct seq_file #if defined(CONFIG_SMP) && !defined(CONFIG_XEN) if (smp_num_siblings * c->x86_max_cores > 1) { - int cpu = c - cpu_data; seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); - seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[cpu])); + seq_printf(m, "siblings\t: %d\n", + cpus_weight(per_cpu(cpu_core_map, cpu))); seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); } @@ -1351,12 +1424,16 @@ static int show_cpuinfo(struct seq_file static void *c_start(struct seq_file *m, loff_t *pos) { - return *pos < NR_CPUS ? cpu_data + *pos : NULL; + if (*pos == 0) /* just in case, cpu 0 is not the first */ + *pos = first_cpu(cpu_online_map); + if ((*pos) < NR_CPUS && cpu_online(*pos)) + return &cpu_data(*pos); + return NULL; } static void *c_next(struct seq_file *m, void *v, loff_t *pos) { - ++*pos; + *pos = next_cpu(*pos, cpu_online_map); return c_start(m, pos); } --- head-2011-03-17.orig/arch/x86/kernel/smp_32-xen.c 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/smp_32-xen.c 2011-01-31 17:56:27.000000000 +0100 @@ -72,7 +72,7 @@ * * B stepping CPUs may hang. There are hardware work arounds * for this. We warn about it in case your board doesn't have the work - * arounds. Basically thats so I can tell anyone with a B stepping + * arounds. Basically that's so I can tell anyone with a B stepping * CPU and SMP problems "tough". * * Specific items [From Pentium Processor Specification Update] @@ -241,7 +241,7 @@ void leave_mm(unsigned long cpu) * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask); * Stop ipi delivery for the old mm. This is not synchronized with * the other cpus, but smp_invalidate_interrupt ignore flush ipis - * for the wrong mm, and in the worst case we perform a superflous + * for the wrong mm, and in the worst case we perform a superfluous * tlb flush. * 1a2) set cpu_tlbstate to TLBSTATE_OK * Now the smp_invalidate_interrupt won't call leave_mm if cpu0 @@ -309,6 +309,7 @@ irqreturn_t smp_invalidate_interrupt(int smp_mb__after_clear_bit(); out: put_cpu_no_resched(); + __get_cpu_var(irq_stat).irq_tlb_count++; return IRQ_HANDLED; } @@ -580,7 +581,7 @@ static void stop_this_cpu (void * dummy) */ cpu_clear(smp_processor_id(), cpu_online_map); disable_all_local_evtchn(); - if (cpu_data[smp_processor_id()].hlt_works_ok) + if (cpu_data(smp_processor_id()).hlt_works_ok) for(;;) halt(); for (;;); } @@ -610,6 +611,7 @@ void xen_smp_send_stop(void) */ irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id) { + __get_cpu_var(irq_stat).irq_resched_count++; return IRQ_HANDLED; } @@ -632,6 +634,7 @@ irqreturn_t smp_call_function_interrupt( */ irq_enter(); (*func)(info); + __get_cpu_var(irq_stat).irq_call_count++; irq_exit(); if (wait) { --- head-2011-03-17.orig/arch/x86/kernel/smp_64-xen.c 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/smp_64-xen.c 2011-01-31 17:56:27.000000000 +0100 @@ -167,6 +167,7 @@ asmlinkage void smp_invalidate_interrupt out: ack_APIC_irq(); cpu_clear(cpu, f->flush_cpumask); + add_pda(irq_tlb_count, 1); } static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, @@ -326,17 +327,27 @@ void unlock_ipi_call_lock(void) } /* - * this function sends a 'generic call function' IPI to one other CPU - * in the system. - * - * cpu is a standard Linux logical CPU number. + * this function sends a 'generic call function' IPI to all other CPU + * of the system defined in the mask. */ -static void -__smp_call_function_single(int cpu, void (*func) (void *info), void *info, - int nonatomic, int wait) + +static int +__smp_call_function_mask(cpumask_t mask, + void (*func)(void *), void *info, + int wait) { struct call_data_struct data; - int cpus = 1; + cpumask_t allbutself; + int cpus; + + allbutself = cpu_online_map; + cpu_clear(smp_processor_id(), allbutself); + + cpus_and(mask, mask, allbutself); + cpus = cpus_weight(mask); + + if (!cpus) + return 0; data.func = func; data.info = info; @@ -347,19 +358,55 @@ __smp_call_function_single(int cpu, void call_data = &data; wmb(); - /* Send a message to all other CPUs and wait for them to respond */ - send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR); + + /* Send a message to other CPUs */ + if (cpus_equal(mask, allbutself)) + send_IPI_allbutself(CALL_FUNCTION_VECTOR); + else + send_IPI_mask(mask, CALL_FUNCTION_VECTOR); /* Wait for response */ while (atomic_read(&data.started) != cpus) cpu_relax(); if (!wait) - return; + return 0; while (atomic_read(&data.finished) != cpus) cpu_relax(); + + return 0; +} +/** + * smp_call_function_mask(): Run a function on a set of other CPUs. + * @mask: The set of cpus to run on. Must not include the current cpu. + * @func: The function to run. This must be fast and non-blocking. + * @info: An arbitrary pointer to pass to the function. + * @wait: If true, wait (atomically) until function has completed on other CPUs. + * + * Returns 0 on success, else a negative status code. + * + * If @wait is true, then returns once @func has returned; otherwise + * it returns just before the target cpu calls @func. + * + * You must not call this function with disabled interrupts or from a + * hardware interrupt handler or from a bottom half handler. + */ +int smp_call_function_mask(cpumask_t mask, + void (*func)(void *), void *info, + int wait) +{ + int ret; + + /* Can deadlock when called with interrupts disabled */ + WARN_ON(irqs_disabled()); + + spin_lock(&call_lock); + ret = __smp_call_function_mask(mask, func, info, wait); + spin_unlock(&call_lock); + return ret; } +EXPORT_SYMBOL(smp_call_function_mask); /* * smp_call_function_single - Run a function on a specific CPU @@ -378,6 +425,7 @@ int smp_call_function_single (int cpu, v int nonatomic, int wait) { /* prevent preemption and reschedule on another processor */ + int ret; int me = get_cpu(); /* Can deadlock when called with interrupts disabled */ @@ -391,51 +439,14 @@ int smp_call_function_single (int cpu, v return 0; } - spin_lock(&call_lock); - __smp_call_function_single(cpu, func, info, nonatomic, wait); - spin_unlock(&call_lock); + ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait); + put_cpu(); - return 0; + return ret; } EXPORT_SYMBOL(smp_call_function_single); /* - * this function sends a 'generic call function' IPI to all other CPUs - * in the system. - */ -static void __smp_call_function (void (*func) (void *info), void *info, - int nonatomic, int wait) -{ - struct call_data_struct data; - int cpus = num_online_cpus()-1; - - if (!cpus) - return; - - data.func = func; - data.info = info; - atomic_set(&data.started, 0); - data.wait = wait; - if (wait) - atomic_set(&data.finished, 0); - - call_data = &data; - wmb(); - /* Send a message to all other CPUs and wait for them to respond */ - send_IPI_allbutself(CALL_FUNCTION_VECTOR); - - /* Wait for response */ - while (atomic_read(&data.started) != cpus) - cpu_relax(); - - if (!wait) - return; - - while (atomic_read(&data.finished) != cpus) - cpu_relax(); -} - -/* * smp_call_function - run a function on all other CPUs. * @func: The function to run. This must be fast and non-blocking. * @info: An arbitrary pointer to pass to the function. @@ -453,10 +464,7 @@ static void __smp_call_function (void (* int smp_call_function (void (*func) (void *info), void *info, int nonatomic, int wait) { - spin_lock(&call_lock); - __smp_call_function(func,info,nonatomic,wait); - spin_unlock(&call_lock); - return 0; + return smp_call_function_mask(cpu_online_map, func, info, wait); } EXPORT_SYMBOL(smp_call_function); @@ -485,7 +493,7 @@ void smp_send_stop(void) /* Don't deadlock on the call lock in panic */ nolock = !spin_trylock(&call_lock); local_irq_save(flags); - __smp_call_function(stop_this_cpu, NULL, 0, 0); + __smp_call_function_mask(cpu_online_map, stop_this_cpu, NULL, 0); if (!nolock) spin_unlock(&call_lock); disable_all_local_evtchn(); @@ -505,7 +513,9 @@ asmlinkage irqreturn_t smp_reschedule_in { #ifndef CONFIG_XEN ack_APIC_irq(); -#else +#endif + add_pda(irq_resched_count, 1); +#ifdef CONFIG_XEN return IRQ_HANDLED; #endif } @@ -535,6 +545,7 @@ asmlinkage irqreturn_t smp_call_function exit_idle(); irq_enter(); (*func)(info); + add_pda(irq_call_count, 1); irq_exit(); if (wait) { mb(); --- head-2011-03-17.orig/arch/x86/kernel/time-xen.c 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/time-xen.c 2011-01-31 17:56:27.000000000 +0100 @@ -1,6 +1,4 @@ /* - * linux/arch/i386/kernel/time.c - * * Copyright (C) 1991, 1992, 1995 Linus Torvalds * * This file contains the PC-specific time handling details: @@ -73,6 +71,7 @@ #include #include +#include #include #include @@ -536,6 +535,13 @@ irqreturn_t timer_interrupt(int irq, voi struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu); struct vcpu_runstate_info runstate; + /* Keep nmi watchdog up to date */ +#ifdef __i386__ + per_cpu(irq_stat, smp_processor_id()).irq0_irqs++; +#else + add_pda(irq0_irqs, 1); +#endif + /* * Here we are in the timer irq handler. We just have irqs locally * disabled but we don't know if the timer_bh is running on the other @@ -1011,7 +1017,7 @@ static int time_cpufreq_notifier(struct struct cpufreq_freqs *freq = data; struct xen_platform_op op; - if (cpu_has(&cpu_data[freq->cpu], X86_FEATURE_CONSTANT_TSC)) + if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC)) return 0; if (val == CPUFREQ_PRECHANGE) @@ -1049,30 +1055,33 @@ core_initcall(cpufreq_time_setup); */ static ctl_table xen_subtable[] = { { - .ctl_name = 1, + .ctl_name = CTL_XEN_INDEPENDENT_WALLCLOCK, .procname = "independent_wallclock", .data = &independent_wallclock, .maxlen = sizeof(independent_wallclock), .mode = 0644, + .strategy = sysctl_data, .proc_handler = proc_dointvec }, { - .ctl_name = 2, + .ctl_name = CTL_XEN_PERMITTED_CLOCK_JITTER, .procname = "permitted_clock_jitter", .data = &permitted_clock_jitter, .maxlen = sizeof(permitted_clock_jitter), .mode = 0644, + .strategy = sysctl_data, .proc_handler = proc_doulongvec_minmax }, - { 0 } + { } }; static ctl_table xen_table[] = { { - .ctl_name = 123, + .ctl_name = CTL_XEN, .procname = "xen", .mode = 0555, - .child = xen_subtable}, - { 0 } + .child = xen_subtable + }, + { } }; static int __init xen_sysctl_init(void) { --- head-2011-03-17.orig/arch/x86/kernel/traps_32-xen.c 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/traps_32-xen.c 2011-01-31 17:56:27.000000000 +0100 @@ -1,6 +1,4 @@ /* - * linux/arch/i386/traps.c - * * Copyright (C) 1991, 1992 Linus Torvalds * * Pentium III FXSR, SSE support @@ -65,6 +63,11 @@ int panic_on_unrecovered_nmi; +#ifndef CONFIG_XEN +DECLARE_BITMAP(used_vectors, NR_VECTORS); +EXPORT_SYMBOL_GPL(used_vectors); +#endif + asmlinkage int system_call(void); /* Do we ignore FPU interrupts ? */ @@ -120,7 +123,7 @@ struct stack_frame { static inline unsigned long print_context_stack(struct thread_info *tinfo, unsigned long *stack, unsigned long ebp, - struct stacktrace_ops *ops, void *data) + const struct stacktrace_ops *ops, void *data) { #ifdef CONFIG_FRAME_POINTER struct stack_frame *frame = (struct stack_frame *)ebp; @@ -157,7 +160,7 @@ static inline unsigned long print_contex void dump_trace(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, - struct stacktrace_ops *ops, void *data) + const struct stacktrace_ops *ops, void *data) { unsigned long ebp = 0; @@ -229,7 +232,7 @@ static void print_trace_address(void *da touch_nmi_watchdog(); } -static struct stacktrace_ops print_trace_ops = { +static const struct stacktrace_ops print_trace_ops = { .warning = print_trace_warning, .warning_symbol = print_trace_warning_symbol, .stack = print_trace_stack, @@ -288,6 +291,11 @@ void dump_stack(void) { unsigned long stack; + printk("Pid: %d, comm: %.20s %s %s %.*s\n", + current->pid, current->comm, print_tainted(), + init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), + init_utsname()->version); show_trace(current, NULL, &stack); } @@ -296,48 +304,24 @@ EXPORT_SYMBOL(dump_stack); void show_registers(struct pt_regs *regs) { int i; - int in_kernel = 1; - unsigned long esp; - unsigned short ss, gs; - - esp = (unsigned long) (®s->esp); - savesegment(ss, ss); - savesegment(gs, gs); - if (user_mode_vm(regs)) { - in_kernel = 0; - esp = regs->esp; - ss = regs->xss & 0xffff; - } + print_modules(); - printk(KERN_EMERG "CPU: %d\n" - KERN_EMERG "EIP: %04x:[<%08lx>] %s VLI\n" - KERN_EMERG "EFLAGS: %08lx (%s %.*s)\n", - smp_processor_id(), 0xffff & regs->xcs, regs->eip, - print_tainted(), regs->eflags, init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); - print_symbol(KERN_EMERG "EIP is at %s\n", regs->eip); - printk(KERN_EMERG "eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", - regs->eax, regs->ebx, regs->ecx, regs->edx); - printk(KERN_EMERG "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", - regs->esi, regs->edi, regs->ebp, esp); - printk(KERN_EMERG "ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n", - regs->xds & 0xffff, regs->xes & 0xffff, regs->xfs & 0xffff, gs, ss); + __show_registers(regs, 0); printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)", - TASK_COMM_LEN, current->comm, current->pid, + TASK_COMM_LEN, current->comm, task_pid_nr(current), current_thread_info(), current, task_thread_info(current)); /* * When in-kernel, we also print out the stack and code at the * time of the fault.. */ - if (in_kernel) { + if (!user_mode_vm(regs)) { u8 *eip; unsigned int code_prologue = code_bytes * 43 / 64; unsigned int code_len = code_bytes; unsigned char c; printk("\n" KERN_EMERG "Stack: "); - show_stack_log_lvl(NULL, regs, (unsigned long *)esp, KERN_EMERG); + show_stack_log_lvl(NULL, regs, ®s->esp, KERN_EMERG); printk(KERN_EMERG "Code: "); @@ -382,11 +366,11 @@ int is_valid_bugaddr(unsigned long eip) void die(const char * str, struct pt_regs * regs, long err) { static struct { - spinlock_t lock; + raw_spinlock_t lock; u32 lock_owner; int lock_owner_depth; } die = { - .lock = __SPIN_LOCK_UNLOCKED(die.lock), + .lock = __RAW_SPIN_LOCK_UNLOCKED, .lock_owner = -1, .lock_owner_depth = 0 }; @@ -397,40 +381,33 @@ void die(const char * str, struct pt_reg if (die.lock_owner != raw_smp_processor_id()) { console_verbose(); - spin_lock_irqsave(&die.lock, flags); + raw_local_irq_save(flags); + __raw_spin_lock(&die.lock); die.lock_owner = smp_processor_id(); die.lock_owner_depth = 0; bust_spinlocks(1); - } - else - local_save_flags(flags); + } else + raw_local_irq_save(flags); if (++die.lock_owner_depth < 3) { - int nl = 0; unsigned long esp; unsigned short ss; report_bug(regs->eip, regs); - printk(KERN_EMERG "%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter); + printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, + ++die_counter); #ifdef CONFIG_PREEMPT - printk(KERN_EMERG "PREEMPT "); - nl = 1; + printk("PREEMPT "); #endif #ifdef CONFIG_SMP - if (!nl) - printk(KERN_EMERG); printk("SMP "); - nl = 1; #endif #ifdef CONFIG_DEBUG_PAGEALLOC - if (!nl) - printk(KERN_EMERG); printk("DEBUG_PAGEALLOC"); - nl = 1; #endif - if (nl) - printk("\n"); + printk("\n"); + if (notify_die(DIE_OOPS, str, regs, err, current->thread.trap_no, SIGSEGV) != NOTIFY_STOP) { @@ -454,7 +431,8 @@ void die(const char * str, struct pt_reg bust_spinlocks(0); die.lock_owner = -1; add_taint(TAINT_DIE); - spin_unlock_irqrestore(&die.lock, flags); + __raw_spin_unlock(&die.lock); + raw_local_irq_restore(flags); if (!regs) return; @@ -571,6 +549,7 @@ fastcall void do_##name(struct pt_regs * info.si_errno = 0; \ info.si_code = sicode; \ info.si_addr = (void __user *)siaddr; \ + trace_hardirqs_fixup(); \ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ == NOTIFY_STOP) \ return; \ @@ -606,7 +585,7 @@ fastcall void __kprobes do_general_prote printk_ratelimit()) printk(KERN_INFO "%s[%d] general protection eip:%lx esp:%lx error:%lx\n", - current->comm, current->pid, + current->comm, task_pid_nr(current), regs->eip, regs->esp, error_code); force_sig(SIGSEGV, current); @@ -785,6 +764,8 @@ void restart_nmi(void) #ifdef CONFIG_KPROBES fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code) { + trace_hardirqs_fixup(); + if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) return; @@ -822,6 +803,8 @@ fastcall void __kprobes do_debug(struct unsigned int condition; struct task_struct *tsk = current; + trace_hardirqs_fixup(); + get_debugreg(condition, 6); if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, @@ -1084,20 +1067,6 @@ asmlinkage void math_emulate(long arg) #endif /* CONFIG_MATH_EMULATION */ -#ifdef CONFIG_X86_F00F_BUG -void __init trap_init_f00f_bug(void) -{ - __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); - - /* - * Update the IDT descriptor and reload the IDT so that - * it uses the read-only mapped virtual address. - */ - idt_descr.address = fix_to_virt(FIX_F00F_IDT); - load_idt(&idt_descr); -} -#endif - /* * NB. All these are "trap gates" (i.e. events_mask isn't set) except --- head-2011-03-17.orig/arch/x86/kernel/traps_64-xen.c 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/traps_64-xen.c 2011-01-31 17:56:27.000000000 +0100 @@ -1,6 +1,4 @@ /* - * linux/arch/x86-64/traps.c - * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs * @@ -33,6 +31,7 @@ #include #include #include +#include #if defined(CONFIG_EDAC) #include @@ -205,7 +204,7 @@ static unsigned long *in_exception_stack #define MSG(txt) ops->warning(data, txt) /* - * x86-64 can have upto three kernel stacks: + * x86-64 can have up to three kernel stacks: * process stack * interrupt stack * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack @@ -219,7 +218,7 @@ static inline int valid_stack_ptr(struct void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack, - struct stacktrace_ops *ops, void *data) + const struct stacktrace_ops *ops, void *data) { const unsigned cpu = get_cpu(); unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr; @@ -340,7 +339,7 @@ static void print_trace_address(void *da printk_address(addr); } -static struct stacktrace_ops print_trace_ops = { +static const struct stacktrace_ops print_trace_ops = { .warning = print_trace_warning, .warning_symbol = print_trace_warning_symbol, .stack = print_trace_stack, @@ -404,6 +403,12 @@ void show_stack(struct task_struct *tsk, void dump_stack(void) { unsigned long dummy; + + printk("Pid: %d, comm: %.20s %s %s %.*s\n", + current->pid, current->comm, print_tainted(), + init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), + init_utsname()->version); show_trace(NULL, NULL, &dummy); } @@ -466,7 +471,7 @@ void out_of_line_bug(void) EXPORT_SYMBOL(out_of_line_bug); #endif -static DEFINE_SPINLOCK(die_lock); +static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; static int die_owner = -1; static unsigned int die_nest_count; @@ -478,13 +483,13 @@ unsigned __kprobes long oops_begin(void) oops_enter(); /* racy, but better than risking deadlock. */ - local_irq_save(flags); + raw_local_irq_save(flags); cpu = smp_processor_id(); - if (!spin_trylock(&die_lock)) { + if (!__raw_spin_trylock(&die_lock)) { if (cpu == die_owner) /* nested oops. should stop eventually */; else - spin_lock(&die_lock); + __raw_spin_lock(&die_lock); } die_nest_count++; die_owner = cpu; @@ -498,12 +503,10 @@ void __kprobes oops_end(unsigned long fl die_owner = -1; bust_spinlocks(0); die_nest_count--; - if (die_nest_count) - /* We still own the lock */ - local_irq_restore(flags); - else + if (!die_nest_count) /* Nest count reaches zero, release the lock. */ - spin_unlock_irqrestore(&die_lock, flags); + __raw_spin_unlock(&die_lock); + raw_local_irq_restore(flags); if (panic_on_oops) panic("Fatal exception"); oops_exit(); @@ -636,6 +639,7 @@ asmlinkage void do_##name(struct pt_regs info.si_errno = 0; \ info.si_code = sicode; \ info.si_addr = (void __user *)siaddr; \ + trace_hardirqs_fixup(); \ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ == NOTIFY_STOP) \ return; \ @@ -741,11 +745,8 @@ mem_parity_error(unsigned char reason, s printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); -#if 0 /* XEN */ /* Clear and disable the memory parity error line. */ - reason = (reason & 0xf) | 4; - outb(reason, 0x61); -#endif /* XEN */ + clear_mem_error(reason); } static __kprobes void @@ -754,14 +755,8 @@ io_check_error(unsigned char reason, str printk("NMI: IOCK error (debug interrupt?)\n"); show_registers(regs); -#if 0 /* XEN */ /* Re-enable the IOCK line, wait for a few seconds */ - reason = (reason & 0xf) | 8; - outb(reason, 0x61); - mdelay(2000); - reason &= ~8; - outb(reason, 0x61); -#endif /* XEN */ + clear_io_check_error(reason); } static __kprobes void @@ -821,6 +816,8 @@ asmlinkage __kprobes void default_do_nmi /* runs on IST stack. */ asmlinkage void __kprobes do_int3(struct pt_regs * regs, long error_code) { + trace_hardirqs_fixup(); + if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) { return; } @@ -858,6 +855,8 @@ asmlinkage void __kprobes do_debug(struc struct task_struct *tsk = current; siginfo_t info; + trace_hardirqs_fixup(); + get_debugreg(condition, 6); if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, --- head-2011-03-17.orig/arch/x86/kernel/vsyscall_64-xen.c 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/vsyscall_64-xen.c 2011-01-31 17:56:27.000000000 +0100 @@ -1,6 +1,4 @@ /* - * linux/arch/x86_64/kernel/vsyscall.c - * * Copyright (C) 2001 Andrea Arcangeli SuSE * Copyright 2003 Andi Kleen, SuSE Labs. * @@ -50,12 +48,12 @@ ({unsigned long v; \ extern char __vsyscall_0; \ asm("" : "=r" (v) : "0" (x)); \ - ((v - VSYSCALL_FIRST_PAGE) + __pa_symbol(&__vsyscall_0)); }) + ((v - VSYSCALL_START) + __pa_symbol(&__vsyscall_0)); }) /* * vsyscall_gtod_data contains data that is : * - readonly from vsyscalls - * - writen by timer interrupt or systcl (/proc/sys/kernel/vsyscall64) + * - written by timer interrupt or systcl (/proc/sys/kernel/vsyscall64) * Try to keep this structure as small as possible to avoid cache line ping pongs */ int __vgetcpu_mode __section_vgetcpu_mode; @@ -66,6 +64,16 @@ struct vsyscall_gtod_data __vsyscall_gto .sysctl_enabled = 1, }; +void update_vsyscall_tz(void) +{ + unsigned long flags; + + write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); + /* sys_tz has changed */ + vsyscall_gtod_data.sys_tz = sys_tz; + write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); +} + void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) { unsigned long flags; @@ -79,8 +87,6 @@ void update_vsyscall(struct timespec *wa vsyscall_gtod_data.clock.shift = clock->shift; vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; - vsyscall_gtod_data.sys_tz = sys_tz; - vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic; write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); } @@ -166,7 +172,7 @@ time_t __vsyscall(1) vtime(time_t *t) if (unlikely(!__vsyscall_gtod_data.sysctl_enabled)) return time_syscall(t); - vgettimeofday(&tv, 0); + vgettimeofday(&tv, NULL); result = tv.tv_sec; if (t) *t = result; @@ -260,18 +266,10 @@ out: return ret; } -static int vsyscall_sysctl_nostrat(ctl_table *t, int __user *name, int nlen, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen) -{ - return -ENOSYS; -} - static ctl_table kernel_table2[] = { - { .ctl_name = 99, .procname = "vsyscall64", + { .procname = "vsyscall64", .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int), .mode = 0644, - .strategy = vsyscall_sysctl_nostrat, .proc_handler = vsyscall_sysctl_change }, {} }; @@ -291,9 +289,9 @@ static void __cpuinit vsyscall_set_cpu(i unsigned long d; unsigned long node = 0; #ifdef CONFIG_NUMA - node = cpu_to_node[cpu]; + node = cpu_to_node(cpu); #endif - if (cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP)) + if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP)) write_rdtscp_aux((node << 12) | cpu); /* Store cpu number in limit so that it can be loaded quickly --- head-2011-03-17.orig/arch/x86/mm/fault_32-xen.c 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/arch/x86/mm/fault_32-xen.c 2011-01-31 17:56:27.000000000 +0100 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -32,33 +33,27 @@ extern void die(const char *,struct pt_regs *,long); -static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain); - -int register_page_fault_notifier(struct notifier_block *nb) +#ifdef CONFIG_KPROBES +static inline int notify_page_fault(struct pt_regs *regs) { - vmalloc_sync_all(); - return atomic_notifier_chain_register(¬ify_page_fault_chain, nb); -} -EXPORT_SYMBOL_GPL(register_page_fault_notifier); + int ret = 0; -int unregister_page_fault_notifier(struct notifier_block *nb) -{ - return atomic_notifier_chain_unregister(¬ify_page_fault_chain, nb); -} -EXPORT_SYMBOL_GPL(unregister_page_fault_notifier); + /* kprobe_running() needs smp_processor_id() */ + if (!user_mode_vm(regs)) { + preempt_disable(); + if (kprobe_running() && kprobe_fault_handler(regs, 14)) + ret = 1; + preempt_enable(); + } -static inline int notify_page_fault(struct pt_regs *regs, long err) + return ret; +} +#else +static inline int notify_page_fault(struct pt_regs *regs) { - struct die_args args = { - .regs = regs, - .str = "page fault", - .err = err, - .trapnr = 14, - .signr = SIGSEGV - }; - return atomic_notifier_call_chain(¬ify_page_fault_chain, - DIE_PAGE_FAULT, &args); + return 0; } +#endif /* * Return EIP plus the CS segment base. The segment limit is also @@ -110,7 +105,7 @@ static inline unsigned long get_segment_ LDT and other horrors are only used in user space. */ if (seg & (1<<2)) { /* Must lock the LDT while reading it. */ - down(¤t->mm->context.sem); + mutex_lock(¤t->mm->context.lock); desc = current->mm->context.ldt; desc = (void *)desc + (seg & ~7); } else { @@ -123,7 +118,7 @@ static inline unsigned long get_segment_ base = get_desc_base((unsigned long *)desc); if (seg & (1<<2)) { - up(¤t->mm->context.sem); + mutex_unlock(¤t->mm->context.lock); } else put_cpu(); @@ -244,7 +239,7 @@ static void dump_fault_path(unsigned lon if (mfn_to_pfn(mfn) >= highstart_pfn) return; #endif - if (p[0] & _PAGE_PRESENT) { + if ((p[0] & _PAGE_PRESENT) && !(p[0] & _PAGE_PSE)) { page = mfn_to_pfn(mfn) << PAGE_SHIFT; p = (unsigned long *) __va(page); address &= 0x001fffff; @@ -270,7 +265,8 @@ static void dump_fault_path(unsigned lon * it's allocated already. */ if ((machine_to_phys(page) >> PAGE_SHIFT) < max_low_pfn - && (page & _PAGE_PRESENT)) { + && (page & _PAGE_PRESENT) + && !(page & _PAGE_PSE)) { page = machine_to_phys(page & PAGE_MASK); page = ((unsigned long *) __va(page))[(address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)]; @@ -416,6 +412,11 @@ fastcall void __kprobes do_page_fault(st int write, si_code; int fault; + /* + * We can fault from pretty much anywhere, with unknown IRQ state. + */ + trace_hardirqs_fixup(); + /* get the address */ address = read_cr2(); @@ -453,7 +454,7 @@ fastcall void __kprobes do_page_fault(st /* Can take a spurious fault if mapping changes R/O -> R/W. */ if (spurious_fault(regs, address, error_code)) return; - if (notify_page_fault(regs, error_code) == NOTIFY_STOP) + if (notify_page_fault(regs)) return; /* * Don't take the mm semaphore here. If we fixup a prefetch @@ -462,7 +463,7 @@ fastcall void __kprobes do_page_fault(st goto bad_area_nosemaphore; } - if (notify_page_fault(regs, error_code) == NOTIFY_STOP) + if (notify_page_fault(regs)) return; /* It's safe to allow irq's after cr2 has been saved and the vmalloc @@ -481,7 +482,7 @@ fastcall void __kprobes do_page_fault(st /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the - * kernel and should generate an OOPS. Unfortunatly, in the case of an + * kernel and should generate an OOPS. Unfortunately, in the case of an * erroneous fault occurring in a code path which already holds mmap_sem * we will deadlock attempting to validate the fault against the * address space. Luckily the kernel only validly references user @@ -489,7 +490,7 @@ fastcall void __kprobes do_page_fault(st * exceptions table. * * As the vast majority of faults will be valid we will only perform - * the source reference check when there is a possibilty of a deadlock. + * the source reference check when there is a possibility of a deadlock. * Attempt to lock the address space, if we cannot we then validate the * source. If this is invalid we can skip the address space check, * thus avoiding the deadlock. @@ -598,8 +599,8 @@ bad_area_nosemaphore: printk_ratelimit()) { printk("%s%s[%d]: segfault at %08lx eip %08lx " "esp %08lx error %lx\n", - tsk->pid > 1 ? KERN_INFO : KERN_EMERG, - tsk->comm, tsk->pid, address, regs->eip, + task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, + tsk->comm, task_pid_nr(tsk), address, regs->eip, regs->esp, error_code); } tsk->thread.cr2 = address; @@ -664,8 +665,7 @@ no_context: printk(KERN_ALERT "BUG: unable to handle kernel paging" " request"); printk(" at virtual address %08lx\n",address); - printk(KERN_ALERT " printing eip:\n"); - printk("%08lx\n", regs->eip); + printk(KERN_ALERT "printing eip: %08lx\n", regs->eip); dump_fault_path(address); } tsk->thread.cr2 = address; @@ -681,14 +681,14 @@ no_context: */ out_of_memory: up_read(&mm->mmap_sem); - if (is_init(tsk)) { + if (is_global_init(tsk)) { yield(); down_read(&mm->mmap_sem); goto survive; } printk("VM: killing process %s\n", tsk->comm); if (error_code & 4) - do_exit(SIGKILL); + do_group_exit(SIGKILL); goto no_context; do_sigbus: --- head-2011-03-17.orig/arch/x86/mm/fault_64-xen.c 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/arch/x86/mm/fault_64-xen.c 2011-01-31 17:56:27.000000000 +0100 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -40,34 +41,27 @@ #define PF_RSVD (1<<3) #define PF_INSTR (1<<4) -static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain); - -/* Hook to register for page fault notifications */ -int register_page_fault_notifier(struct notifier_block *nb) +#ifdef CONFIG_KPROBES +static inline int notify_page_fault(struct pt_regs *regs) { - vmalloc_sync_all(); - return atomic_notifier_chain_register(¬ify_page_fault_chain, nb); -} -EXPORT_SYMBOL_GPL(register_page_fault_notifier); + int ret = 0; -int unregister_page_fault_notifier(struct notifier_block *nb) -{ - return atomic_notifier_chain_unregister(¬ify_page_fault_chain, nb); -} -EXPORT_SYMBOL_GPL(unregister_page_fault_notifier); + /* kprobe_running() needs smp_processor_id() */ + if (!user_mode(regs)) { + preempt_disable(); + if (kprobe_running() && kprobe_fault_handler(regs, 14)) + ret = 1; + preempt_enable(); + } -static inline int notify_page_fault(struct pt_regs *regs, long err) + return ret; +} +#else +static inline int notify_page_fault(struct pt_regs *regs) { - struct die_args args = { - .regs = regs, - .str = "page fault", - .err = err, - .trapnr = 14, - .signr = SIGSEGV - }; - return atomic_notifier_call_chain(¬ify_page_fault_chain, - DIE_PAGE_FAULT, &args); + return 0; } +#endif /* Sometimes the CPU reports invalid exceptions on prefetch. Check that here and ignore. @@ -175,7 +169,7 @@ void dump_pagetable(unsigned long addres pmd = pmd_offset(pud, address); if (bad_address(pmd)) goto bad; printk("PMD %lx ", pmd_val(*pmd)); - if (!pmd_present(*pmd)) goto ret; + if (!pmd_present(*pmd) || pmd_large(*pmd)) goto ret; pte = pte_offset_kernel(pmd, address); if (bad_address(pte)) goto bad; @@ -294,7 +288,6 @@ static int vmalloc_fault(unsigned long a return 0; } -static int page_fault_trace; int show_unhandled_signals = 1; @@ -371,6 +364,11 @@ asmlinkage void __kprobes do_page_fault( if (!user_mode(regs)) error_code &= ~PF_USER; /* means kernel */ + /* + * We can fault from pretty much anywhere, with unknown IRQ state. + */ + trace_hardirqs_fixup(); + tsk = current; mm = tsk->mm; prefetchw(&mm->mmap_sem); @@ -408,7 +406,7 @@ asmlinkage void __kprobes do_page_fault( /* Can take a spurious fault if mapping changes R/O -> R/W. */ if (spurious_fault(regs, address, error_code)) return; - if (notify_page_fault(regs, error_code) == NOTIFY_STOP) + if (notify_page_fault(regs)) return; /* * Don't take the mm semaphore here. If we fixup a prefetch @@ -417,16 +415,12 @@ asmlinkage void __kprobes do_page_fault( goto bad_area_nosemaphore; } - if (notify_page_fault(regs, error_code) == NOTIFY_STOP) + if (notify_page_fault(regs)) return; if (likely(regs->eflags & X86_EFLAGS_IF)) local_irq_enable(); - if (unlikely(page_fault_trace)) - printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n", - regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code); - if (unlikely(error_code & PF_RSVD)) pgtable_bad(address, regs, error_code); @@ -447,7 +441,7 @@ asmlinkage void __kprobes do_page_fault( again: /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the - * kernel and should generate an OOPS. Unfortunatly, in the case of an + * kernel and should generate an OOPS. Unfortunately, in the case of an * erroneous fault occurring in a code path which already holds mmap_sem * we will deadlock attempting to validate the fault against the * address space. Luckily the kernel only validly references user @@ -455,7 +449,7 @@ asmlinkage void __kprobes do_page_fault( * exceptions table. * * As the vast majority of faults will be valid we will only perform - * the source reference check when there is a possibilty of a deadlock. + * the source reference check when there is a possibility of a deadlock. * Attempt to lock the address space, if we cannot we then validate the * source. If this is invalid we can skip the address space check, * thus avoiding the deadlock. @@ -557,7 +551,7 @@ bad_area_nosemaphore: if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && printk_ratelimit()) { printk( - "%s%s[%d]: segfault at %016lx rip %016lx rsp %016lx error %lx\n", + "%s%s[%d]: segfault at %lx rip %lx rsp %lx error %lx\n", tsk->pid > 1 ? KERN_INFO : KERN_EMERG, tsk->comm, tsk->pid, address, regs->rip, regs->rsp, error_code); @@ -623,7 +617,7 @@ no_context: */ out_of_memory: up_read(&mm->mmap_sem); - if (is_init(current)) { + if (is_global_init(current)) { yield(); goto again; } @@ -696,10 +690,3 @@ void vmalloc_sync_all(void) BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == (__START_KERNEL & PGDIR_MASK))); } - -static int __init enable_pagefaulttrace(char *str) -{ - page_fault_trace = 1; - return 1; -} -__setup("pagefaulttrace", enable_pagefaulttrace); --- head-2011-03-17.orig/arch/x86/mm/hypervisor.c 2011-01-31 17:32:29.000000000 +0100 +++ head-2011-03-17/arch/x86/mm/hypervisor.c 2011-01-31 17:56:27.000000000 +0100 @@ -71,8 +71,8 @@ static inline bool use_lazy_mmu_mode(voi static void multicall_failed(const multicall_entry_t *mc, int rc) { - printk(KERN_EMERG "hypercall#%lu(%lx, %lx, %lx, %lx)" - " failed: %d (caller %lx)\n", + pr_emerg("hypercall#%lu(%lx, %lx, %lx, %lx) failed: %d" + " (caller %lx)\n", mc->op, mc->args[0], mc->args[1], mc->args[2], mc->args[3], rc, mc->args[5]); BUG(); @@ -498,6 +498,9 @@ int xen_create_contiguous_region( unsigned long frame, flags; unsigned int i; int rc, success; +#ifdef CONFIG_64BIT + pte_t *ptep = NULL; +#endif struct xen_memory_exchange exchange = { .in = { .nr_extents = 1UL << order, @@ -523,6 +526,27 @@ int xen_create_contiguous_region( if (unlikely(order > MAX_CONTIG_ORDER)) return -ENOMEM; +#ifdef CONFIG_64BIT + if (unlikely(vstart > PAGE_OFFSET + MAXMEM)) { + unsigned int level; + + if (vstart < __START_KERNEL_map + || vstart + (PAGE_SIZE << order) > (unsigned long)_end) + return -EINVAL; + ptep = lookup_address((unsigned long)__va(__pa(vstart)), + &level); + if (ptep && pte_none(*ptep)) + ptep = NULL; + if (vstart < __START_KERNEL && ptep) + return -EINVAL; + if (order > MAX_CONTIG_ORDER - 1) + return -ENOMEM; + } +#else + if (unlikely(vstart + (PAGE_SIZE << order) > (unsigned long)high_memory)) + return -EINVAL; +#endif + set_xen_guest_handle(exchange.in.extent_start, in_frames); set_xen_guest_handle(exchange.out.extent_start, &out_frame); @@ -535,9 +559,19 @@ int xen_create_contiguous_region( in_frames[i] = pfn_to_mfn((__pa(vstart) >> PAGE_SHIFT) + i); MULTI_update_va_mapping(cr_mcl + i, vstart + (i*PAGE_SIZE), __pte_ma(0), 0); +#ifdef CONFIG_64BIT + if (ptep) + MULTI_update_va_mapping(cr_mcl + i + (1U << order), + (unsigned long)__va(__pa(vstart)) + (i*PAGE_SIZE), + __pte_ma(0), 0); +#endif set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i, INVALID_P2M_ENTRY); } +#ifdef CONFIG_64BIT + if (ptep) + i += i; +#endif if (HYPERVISOR_multicall_check(cr_mcl, i, NULL)) BUG(); @@ -571,9 +605,18 @@ int xen_create_contiguous_region( frame = success ? (out_frame + i) : in_frames[i]; MULTI_update_va_mapping(cr_mcl + i, vstart + (i*PAGE_SIZE), pfn_pte_ma(frame, PAGE_KERNEL), 0); +#ifdef CONFIG_64BIT + if (ptep) + MULTI_update_va_mapping(cr_mcl + i + (1U << order), + (unsigned long)__va(__pa(vstart)) + (i*PAGE_SIZE), + pfn_pte_ma(frame, PAGE_KERNEL_RO), 0); +#endif set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i, frame); } - +#ifdef CONFIG_64BIT + if (ptep) + i += i; +#endif cr_mcl[i - 1].args[MULTI_UVMFLAGS_INDEX] = order ? UVMF_TLB_FLUSH|UVMF_ALL : UVMF_INVLPG|UVMF_ALL; --- head-2011-03-17.orig/arch/x86/mm/init_32-xen.c 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/arch/x86/mm/init_32-xen.c 2011-01-31 17:56:27.000000000 +0100 @@ -94,7 +94,14 @@ static pte_t * __init one_page_table_ini #else if (!(__pmd_val(*pmd) & _PAGE_PRESENT)) { #endif - pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); + pte_t *page_table = NULL; + +#ifdef CONFIG_DEBUG_PAGEALLOC + page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); +#endif + if (!page_table) + page_table = + (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE); paravirt_alloc_pt(&init_mm, __pa(page_table) >> PAGE_SHIFT); make_lowmem_page_readonly(page_table, @@ -102,7 +109,7 @@ static pte_t * __init one_page_table_ini set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); BUG_ON(page_table != pte_offset_kernel(pmd, 0)); } - + return pte_offset_kernel(pmd, 0); } @@ -360,8 +367,13 @@ static void __init set_highmem_pages_ini { int pfn; for (pfn = highstart_pfn; pfn < highend_pfn - && pfn < xen_start_info->nr_pages; pfn++) - add_one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro); + && pfn < xen_start_info->nr_pages; pfn++) { + /* + * Holes under sparsemem might not have no mem_map[]: + */ + if (pfn_valid(pfn)) + add_one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro); + } /* XEN: init high-mem pages outside initial allocation. */ for (; pfn < highend_pfn; pfn++) { @@ -785,35 +797,18 @@ int arch_add_memory(int nid, u64 start, return __add_pages(zone, start_pfn, nr_pages); } -int remove_memory(u64 start, u64 size) -{ - return -EINVAL; -} -EXPORT_SYMBOL_GPL(remove_memory); #endif struct kmem_cache *pmd_cache; void __init pgtable_cache_init(void) { - size_t pgd_size = PTRS_PER_PGD*sizeof(pgd_t); - - if (PTRS_PER_PMD > 1) { + if (PTRS_PER_PMD > 1) pmd_cache = kmem_cache_create("pmd", - PTRS_PER_PMD*sizeof(pmd_t), - PTRS_PER_PMD*sizeof(pmd_t), - SLAB_PANIC, - pmd_ctor); - if (!SHARED_KERNEL_PMD) { - /* If we're in PAE mode and have a non-shared - kernel pmd, then the pgd size must be a - page size. This is because the pgd_list - links through the page structure, so there - can only be one pgd per page for this to - work. */ - pgd_size = PAGE_SIZE; - } - } + PTRS_PER_PMD*sizeof(pmd_t), + PTRS_PER_PMD*sizeof(pmd_t), + SLAB_PANIC, + pmd_ctor); } /* --- head-2011-03-17.orig/arch/x86/mm/init_64-xen.c 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/arch/x86/mm/init_64-xen.c 2011-01-31 17:56:27.000000000 +0100 @@ -798,7 +798,7 @@ static void xen_finish_init_mapping(void /* Setup the direct mapping of the physical memory at PAGE_OFFSET. This runs before bootmem is initialized and gets pages directly from the physical memory. To access them they are temporarily mapped. */ -void __meminit init_memory_mapping(unsigned long start, unsigned long end) +void __init_refok init_memory_mapping(unsigned long start, unsigned long end) { unsigned long next; @@ -932,12 +932,6 @@ error: } EXPORT_SYMBOL_GPL(arch_add_memory); -int remove_memory(u64 start, u64 size) -{ - return -EINVAL; -} -EXPORT_SYMBOL_GPL(remove_memory); - #if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA) int memory_add_physaddr_to_nid(u64 start) { @@ -1216,14 +1210,6 @@ int in_gate_area_no_task(unsigned long a return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END); } -#ifndef CONFIG_XEN -void * __init alloc_bootmem_high_node(pg_data_t *pgdat, unsigned long size) -{ - return __alloc_bootmem_core(pgdat->bdata, size, - SMP_CACHE_BYTES, (4UL*1024*1024*1024), 0); -} -#endif - const char *arch_vma_name(struct vm_area_struct *vma) { if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso) @@ -1232,3 +1218,48 @@ const char *arch_vma_name(struct vm_area return "[vsyscall]"; return NULL; } + +#ifdef CONFIG_SPARSEMEM_VMEMMAP +/* + * Initialise the sparsemem vmemmap using huge-pages at the PMD level. + */ +int __meminit vmemmap_populate(struct page *start_page, + unsigned long size, int node) +{ + unsigned long addr = (unsigned long)start_page; + unsigned long end = (unsigned long)(start_page + size); + unsigned long next; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + + for (; addr < end; addr = next) { + next = pmd_addr_end(addr, end); + + pgd = vmemmap_pgd_populate(addr, node); + if (!pgd) + return -ENOMEM; + pud = vmemmap_pud_populate(pgd, addr, node); + if (!pud) + return -ENOMEM; + + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) { + pte_t entry; + void *p = vmemmap_alloc_block(PMD_SIZE, node); + if (!p) + return -ENOMEM; + + entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL); + mk_pte_huge(entry); + set_pmd(pmd, __pmd(pte_val(entry))); + + printk(KERN_DEBUG " [%lx-%lx] PMD ->%p on node %d\n", + addr, addr + PMD_SIZE - 1, p, node); + } else + vmemmap_verify((pte_t *)pmd, node, addr, next); + } + + return 0; +} +#endif --- head-2011-03-17.orig/arch/x86/mm/pageattr_64-xen.c 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/arch/x86/mm/pageattr_64-xen.c 2011-01-31 17:56:27.000000000 +0100 @@ -17,9 +17,6 @@ #include #include -LIST_HEAD(mm_unpinned); -DEFINE_SPINLOCK(mm_unpinned_lock); - static void _pin_lock(struct mm_struct *mm, int lock) { if (lock) spin_lock(&mm->page_table_lock); @@ -81,8 +78,8 @@ static void _pin_lock(struct mm_struct * #define PIN_BATCH 8 static DEFINE_PER_CPU(multicall_entry_t[PIN_BATCH], pb_mcl); -static inline unsigned int mm_walk_set_prot(void *pt, pgprot_t flags, - unsigned int cpu, unsigned int seq) +static inline unsigned int pgd_walk_set_prot(void *pt, pgprot_t flags, + unsigned int cpu, unsigned int seq) { struct page *page = virt_to_page(pt); unsigned long pfn = page_to_pfn(page); @@ -100,9 +97,9 @@ static inline unsigned int mm_walk_set_p return seq; } -static void mm_walk(struct mm_struct *mm, pgprot_t flags) +static void pgd_walk(pgd_t *pgd_base, pgprot_t flags) { - pgd_t *pgd; + pgd_t *pgd = pgd_base; pud_t *pud; pmd_t *pmd; pte_t *pte; @@ -110,7 +107,6 @@ static void mm_walk(struct mm_struct *mm unsigned int cpu, seq; multicall_entry_t *mcl; - pgd = mm->pgd; cpu = get_cpu(); /* @@ -125,18 +121,18 @@ static void mm_walk(struct mm_struct *mm continue; pud = pud_offset(pgd, 0); if (PTRS_PER_PUD > 1) /* not folded */ - seq = mm_walk_set_prot(pud,flags,cpu,seq); + seq = pgd_walk_set_prot(pud,flags,cpu,seq); for (u = 0; u < PTRS_PER_PUD; u++, pud++) { if (pud_none(*pud)) continue; pmd = pmd_offset(pud, 0); if (PTRS_PER_PMD > 1) /* not folded */ - seq = mm_walk_set_prot(pmd,flags,cpu,seq); + seq = pgd_walk_set_prot(pmd,flags,cpu,seq); for (m = 0; m < PTRS_PER_PMD; m++, pmd++) { if (pmd_none(*pmd)) continue; pte = pte_offset_kernel(pmd,0); - seq = mm_walk_set_prot(pte,flags,cpu,seq); + seq = pgd_walk_set_prot(pte,flags,cpu,seq); } } } @@ -148,12 +144,12 @@ static void mm_walk(struct mm_struct *mm seq = 0; } MULTI_update_va_mapping(mcl + seq, - (unsigned long)__user_pgd(mm->pgd), - pfn_pte(virt_to_phys(__user_pgd(mm->pgd))>>PAGE_SHIFT, flags), + (unsigned long)__user_pgd(pgd_base), + pfn_pte(virt_to_phys(__user_pgd(pgd_base))>>PAGE_SHIFT, flags), 0); MULTI_update_va_mapping(mcl + seq + 1, - (unsigned long)mm->pgd, - pfn_pte(virt_to_phys(mm->pgd)>>PAGE_SHIFT, flags), + (unsigned long)pgd_base, + pfn_pte(virt_to_phys(pgd_base)>>PAGE_SHIFT, flags), UVMF_TLB_FLUSH); if (unlikely(HYPERVISOR_multicall_check(mcl, seq + 2, NULL))) BUG(); @@ -161,21 +157,35 @@ static void mm_walk(struct mm_struct *mm put_cpu(); } +static void __pgd_pin(pgd_t *pgd) +{ + pgd_walk(pgd, PAGE_KERNEL_RO); + xen_pgd_pin(__pa(pgd)); /* kernel */ + xen_pgd_pin(__pa(__user_pgd(pgd))); /* user */ + SetPagePinned(virt_to_page(pgd)); +} + +static void __pgd_unpin(pgd_t *pgd) +{ + xen_pgd_unpin(__pa(pgd)); + xen_pgd_unpin(__pa(__user_pgd(pgd))); + pgd_walk(pgd, PAGE_KERNEL); + ClearPagePinned(virt_to_page(pgd)); +} + +void pgd_test_and_unpin(pgd_t *pgd) +{ + if (PagePinned(virt_to_page(pgd))) + __pgd_unpin(pgd); +} + void mm_pin(struct mm_struct *mm) { if (xen_feature(XENFEAT_writable_page_tables)) return; pin_lock(mm); - - mm_walk(mm, PAGE_KERNEL_RO); - xen_pgd_pin(__pa(mm->pgd)); /* kernel */ - xen_pgd_pin(__pa(__user_pgd(mm->pgd))); /* user */ - SetPagePinned(virt_to_page(mm->pgd)); - spin_lock(&mm_unpinned_lock); - list_del(&mm->context.unpinned); - spin_unlock(&mm_unpinned_lock); - + __pgd_pin(mm->pgd); pin_unlock(mm); } @@ -185,34 +195,30 @@ void mm_unpin(struct mm_struct *mm) return; pin_lock(mm); - - xen_pgd_unpin(__pa(mm->pgd)); - xen_pgd_unpin(__pa(__user_pgd(mm->pgd))); - mm_walk(mm, PAGE_KERNEL); - ClearPagePinned(virt_to_page(mm->pgd)); - spin_lock(&mm_unpinned_lock); - list_add(&mm->context.unpinned, &mm_unpinned); - spin_unlock(&mm_unpinned_lock); - + __pgd_unpin(mm->pgd); pin_unlock(mm); } void mm_pin_all(void) { + struct page *page; + unsigned long flags; + if (xen_feature(XENFEAT_writable_page_tables)) return; /* - * Allow uninterrupted access to the mm_unpinned list. We don't - * actually take the mm_unpinned_lock as it is taken inside mm_pin(). + * Allow uninterrupted access to the pgd_list. Also protects + * __pgd_pin() by disabling preemption. * All other CPUs must be at a safe point (e.g., in stop_machine * or offlined entirely). */ - preempt_disable(); - while (!list_empty(&mm_unpinned)) - mm_pin(list_entry(mm_unpinned.next, struct mm_struct, - context.unpinned)); - preempt_enable(); + spin_lock_irqsave(&pgd_lock, flags); + list_for_each_entry(page, &pgd_list, lru) { + if (!PagePinned(page)) + __pgd_pin((pgd_t *)page_address(page)); + } + spin_unlock_irqrestore(&pgd_lock, flags); } void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) @@ -331,11 +337,11 @@ static struct page *split_large_page(uns return base; } -static void cache_flush_page(void *adr) +void clflush_cache_range(void *adr, int size) { int i; - for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size) - asm volatile("clflush (%0)" :: "r" (adr + i)); + for (i = 0; i < size; i += boot_cpu_data.x86_clflush_size) + clflush(adr+i); } static void flush_kernel_map(void *arg) @@ -350,7 +356,7 @@ static void flush_kernel_map(void *arg) asm volatile("wbinvd" ::: "memory"); else list_for_each_entry(pg, l, lru) { void *adr = page_address(pg); - cache_flush_page(adr); + clflush_cache_range(adr, PAGE_SIZE); } __flush_tlb_all(); } @@ -418,6 +424,7 @@ __change_page_attr(unsigned long address split = split_large_page(address, prot, ref_prot2); if (!split) return -ENOMEM; + pgprot_val(ref_prot2) &= ~_PAGE_NX; set_pte(kpte, mk_pte(split, ref_prot2)); kpte_page = split; } @@ -510,9 +517,14 @@ void global_flush_tlb(void) struct page *pg, *next; struct list_head l; - down_read(&init_mm.mmap_sem); + /* + * Write-protect the semaphore, to exclude two contexts + * doing a list_replace_init() call in parallel and to + * exclude new additions to the deferred_pages list: + */ + down_write(&init_mm.mmap_sem); list_replace_init(&deferred_pages, &l); - up_read(&init_mm.mmap_sem); + up_write(&init_mm.mmap_sem); flush_map(&l); --- head-2011-03-17.orig/arch/x86/mm/pgtable_32-xen.c 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/arch/x86/mm/pgtable_32-xen.c 2011-01-31 17:56:27.000000000 +0100 @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -46,6 +47,8 @@ void show_mem(void) for_each_online_pgdat(pgdat) { pgdat_resize_lock(pgdat, &flags); for (i = 0; i < pgdat->node_spanned_pages; ++i) { + if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) + touch_nmi_watchdog(); page = pgdat_page_nr(pgdat, i); total++; if (PageHighMem(page)) @@ -206,7 +209,7 @@ void pte_free(struct page *pte) __free_page(pte); } -void pmd_ctor(void *pmd, struct kmem_cache *cache, unsigned long flags) +void pmd_ctor(struct kmem_cache *cache, void *pmd) { memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t)); } --- head-2011-03-17.orig/arch/x86/pci/irq-xen.c 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/arch/x86/pci/irq-xen.c 2011-01-31 17:56:27.000000000 +0100 @@ -173,7 +173,7 @@ void eisa_set_level_irq(unsigned int irq } /* - * Common IRQ routing practice: nybbles in config space, + * Common IRQ routing practice: nibbles in config space, * offset by some magic constant. */ static unsigned int read_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr) @@ -496,6 +496,26 @@ static int pirq_amd756_set(struct pci_de return 1; } +/* + * PicoPower PT86C523 + */ +static int pirq_pico_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + outb(0x10 + ((pirq - 1) >> 1), 0x24); + return ((pirq - 1) & 1) ? (inb(0x26) >> 4) : (inb(0x26) & 0xf); +} + +static int pirq_pico_set(struct pci_dev *router, struct pci_dev *dev, int pirq, + int irq) +{ + unsigned int x; + outb(0x10 + ((pirq - 1) >> 1), 0x24); + x = inb(0x26); + x = ((pirq - 1) & 1) ? ((x & 0x0f) | (irq << 4)) : ((x & 0xf0) | (irq)); + outb(x, 0x26); + return 1; +} + #ifdef CONFIG_PCI_BIOS static int pirq_bios_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) @@ -569,7 +589,7 @@ static __init int via_router_probe(struc /* FIXME: We should move some of the quirk fixup stuff here */ /* - * work arounds for some buggy BIOSes + * workarounds for some buggy BIOSes */ if (device == PCI_DEVICE_ID_VIA_82C586_0) { switch(router->device) { @@ -725,6 +745,24 @@ static __init int amd_router_probe(struc return 1; } +static __init int pico_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) +{ + switch (device) { + case PCI_DEVICE_ID_PICOPOWER_PT86C523: + r->name = "PicoPower PT86C523"; + r->get = pirq_pico_get; + r->set = pirq_pico_set; + return 1; + + case PCI_DEVICE_ID_PICOPOWER_PT86C523BBP: + r->name = "PicoPower PT86C523 rev. BB+"; + r->get = pirq_pico_get; + r->set = pirq_pico_set; + return 1; + } + return 0; +} + static __initdata struct irq_router_handler pirq_routers[] = { { PCI_VENDOR_ID_INTEL, intel_router_probe }, { PCI_VENDOR_ID_AL, ali_router_probe }, @@ -736,6 +774,7 @@ static __initdata struct irq_router_hand { PCI_VENDOR_ID_VLSI, vlsi_router_probe }, { PCI_VENDOR_ID_SERVERWORKS, serverworks_router_probe }, { PCI_VENDOR_ID_AMD, amd_router_probe }, + { PCI_VENDOR_ID_PICOPOWER, pico_router_probe }, /* Someone with docs needs to add the ATI Radeon IGP */ { 0, NULL } }; @@ -1014,7 +1053,7 @@ static void __init pcibios_fixup_irqs(vo * Work around broken HP Pavilion Notebooks which assign USB to * IRQ 9 even though it is actually wired to IRQ 11 */ -static int __init fix_broken_hp_bios_irq9(struct dmi_system_id *d) +static int __init fix_broken_hp_bios_irq9(const struct dmi_system_id *d) { if (!broken_hp_bios_irq9) { broken_hp_bios_irq9 = 1; @@ -1027,7 +1066,7 @@ static int __init fix_broken_hp_bios_irq * Work around broken Acer TravelMate 360 Notebooks which assign * Cardbus to IRQ 11 even though it is actually wired to IRQ 10 */ -static int __init fix_acer_tm360_irqrouting(struct dmi_system_id *d) +static int __init fix_acer_tm360_irqrouting(const struct dmi_system_id *d) { if (!acer_tm360_irqrouting) { acer_tm360_irqrouting = 1; --- head-2011-03-17.orig/arch/x86/pci/pcifront.c 2009-03-18 10:39:31.000000000 +0100 +++ head-2011-03-17/arch/x86/pci/pcifront.c 2011-01-31 17:56:27.000000000 +0100 @@ -31,7 +31,7 @@ static int __init pcifront_x86_stub_init if (raw_pci_ops) return 0; - printk(KERN_INFO "PCI: setting up Xen PCI frontend stub\n"); + pr_info("PCI: setting up Xen PCI frontend stub\n"); /* Copied from arch/i386/pci/common.c */ pci_cache_line_size = 32 >> 2; --- head-2011-03-17.orig/drivers/acpi/processor_core.c 2011-03-17 13:45:28.000000000 +0100 +++ head-2011-03-17/drivers/acpi/processor_core.c 2011-01-31 17:56:27.000000000 +0100 @@ -174,10 +174,20 @@ int acpi_get_cpuid(acpi_handle handle, i if (apic_id == -1) return apic_id; +#ifndef CONFIG_PROCESSOR_EXTERNAL_CONTROL for_each_possible_cpu(i) { if (cpu_physical_id(i) == apic_id) return i; } +#else + /* + * Use of cpu_physical_id() is bogus here. Rather than defining a + * stub enforcing a 1:1 mapping, we keep it undefined to catch bad + * uses. Return as if there was a 1:1 mapping. + */ + if (apic_id < NR_CPUS && cpu_possible(apic_id)) + return apic_id; +#endif return -1; } EXPORT_SYMBOL_GPL(acpi_get_cpuid); --- head-2011-03-17.orig/drivers/acpi/processor_extcntl.c 2011-01-31 17:32:22.000000000 +0100 +++ head-2011-03-17/drivers/acpi/processor_extcntl.c 2011-01-31 17:56:27.000000000 +0100 @@ -95,7 +95,7 @@ int processor_notify_external(struct acp ret = processor_extcntl_ops->hotplug(pr, type); break; default: - printk(KERN_ERR "Unsupport processor events %d.\n", event); + pr_err("Unsupported processor event %d.\n", event); break; } --- head-2011-03-17.orig/drivers/acpi/processor_idle.c 2011-01-31 14:53:38.000000000 +0100 +++ head-2011-03-17/drivers/acpi/processor_idle.c 2011-01-31 17:56:27.000000000 +0100 @@ -1081,6 +1081,14 @@ int acpi_processor_cst_has_changed(struc if (!pr->flags.power_setup_done) return -ENODEV; + if (processor_pm_external()) { + pr->flags.power = 0; + ret = acpi_processor_get_power_info(pr); + processor_notify_external(pr, + PROCESSOR_PM_CHANGE, PM_TYPE_IDLE); + return ret; + } + cpuidle_pause_and_lock(); cpuidle_disable_device(&pr->power.dev); acpi_processor_get_power_info(pr); --- head-2011-03-17.orig/drivers/char/tpm/tpm_xen.c 2011-01-31 17:32:29.000000000 +0100 +++ head-2011-03-17/drivers/char/tpm/tpm_xen.c 2011-01-31 17:56:27.000000000 +0100 @@ -104,9 +104,9 @@ void __exit tpmif_exit(void); #define DPRINTK(fmt, args...) \ pr_debug("xen_tpm_fr (%s:%d) " fmt, __FUNCTION__, __LINE__, ##args) #define IPRINTK(fmt, args...) \ - printk(KERN_INFO "xen_tpm_fr: " fmt, ##args) + pr_info("xen_tpm_fr: " fmt, ##args) #define WPRINTK(fmt, args...) \ - printk(KERN_WARNING "xen_tpm_fr: " fmt, ##args) + pr_warning("xen_tpm_fr: " fmt, ##args) #define GRANT_INVALID_REF 0 --- head-2011-03-17.orig/drivers/cpuidle/Kconfig 2011-03-17 13:45:28.000000000 +0100 +++ head-2011-03-17/drivers/cpuidle/Kconfig 2011-01-31 17:56:27.000000000 +0100 @@ -1,6 +1,7 @@ config CPU_IDLE bool "CPU idle PM support" + depends on !PROCESSOR_EXTERNAL_CONTROL default ACPI help CPU idle is a generic framework for supporting software-controlled --- head-2011-03-17.orig/drivers/dma/Kconfig 2011-03-17 13:45:28.000000000 +0100 +++ head-2011-03-17/drivers/dma/Kconfig 2011-01-31 17:56:27.000000000 +0100 @@ -61,7 +61,7 @@ config INTEL_IOATDMA tristate "Intel I/OAT DMA support" depends on PCI && X86 select DMA_ENGINE - select DCA + select DCA if !XEN select ASYNC_TX_DISABLE_PQ_VAL_DMA select ASYNC_TX_DISABLE_XOR_VAL_DMA help --- head-2011-03-17.orig/drivers/dma/ioat/Makefile 2011-03-17 13:45:28.000000000 +0100 +++ head-2011-03-17/drivers/dma/ioat/Makefile 2011-01-31 17:56:27.000000000 +0100 @@ -1,2 +1,3 @@ obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o -ioatdma-y := pci.o dma.o dma_v2.o dma_v3.o dca.o +dca-$(CONFIG_DCA) := dca.o +ioatdma-y := pci.o dma.o dma_v2.o dma_v3.o $(dca-y) $(dca-m) --- head-2011-03-17.orig/drivers/dma/ioat/dca.c 2011-03-17 13:45:28.000000000 +0100 +++ head-2011-03-17/drivers/dma/ioat/dca.c 2011-01-31 17:56:27.000000000 +0100 @@ -682,3 +682,15 @@ ioat3_dca_init(struct pci_dev *pdev, voi return dca; } + +void ioat_remove_dca_provider(struct pci_dev *pdev) +{ + struct ioatdma_device *device = pci_get_drvdata(pdev); + + if (!device->dca) + return; + + unregister_dca_provider(device->dca, &pdev->dev); + free_dca_provider(device->dca); + device->dca = NULL; +} --- head-2011-03-17.orig/drivers/dma/ioat/dma.h 2011-03-17 13:45:28.000000000 +0100 +++ head-2011-03-17/drivers/dma/ioat/dma.h 2011-01-31 17:56:27.000000000 +0100 @@ -347,4 +347,22 @@ void ioat_kobject_del(struct ioatdma_dev extern const struct sysfs_ops ioat_sysfs_ops; extern struct ioat_sysfs_entry ioat_version_attr; extern struct ioat_sysfs_entry ioat_cap_attr; + +#ifndef CONFIG_XEN +void ioat_remove_dca_provider(struct pci_dev *); +#else +static inline void ioat_remove_dca_provider(struct pci_dev *pdev) +{ + struct ioatdma_device *device = pci_get_drvdata(pdev); + BUG_ON(device->dca); +} +static inline struct dca_provider *__devinit +__ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase) +{ + return NULL; +} +#define ioat_dca_init __ioat_dca_init +#define ioat2_dca_init __ioat_dca_init +#endif + #endif /* IOATDMA_H */ --- head-2011-03-17.orig/drivers/dma/ioat/pci.c 2011-03-17 13:45:28.000000000 +0100 +++ head-2011-03-17/drivers/dma/ioat/pci.c 2011-01-31 17:56:27.000000000 +0100 @@ -29,7 +29,6 @@ #include #include #include -#include #include #include "dma.h" #include "dma_v2.h" @@ -170,11 +169,7 @@ static void __devexit ioat_remove(struct return; dev_err(&pdev->dev, "Removing dma and dca services\n"); - if (device->dca) { - unregister_dca_provider(device->dca, &pdev->dev); - free_dca_provider(device->dca); - device->dca = NULL; - } + ioat_remove_dca_provider(pdev); ioat_dma_remove(device); } --- head-2011-03-17.orig/drivers/firmware/dell_rbu.c 2011-01-31 14:53:38.000000000 +0100 +++ head-2011-03-17/drivers/firmware/dell_rbu.c 2011-01-31 17:56:27.000000000 +0100 @@ -175,9 +175,8 @@ static int create_packet(void *data, siz (unsigned long)packet_data_temp_buf, ordernum, 0)) { free_pages((unsigned long)packet_data_temp_buf, ordernum); - printk(KERN_WARNING - "dell_rbu:%s: failed to adjust new " - "packet\n", __func__); + pr_warning("dell_rbu:%s: failed to adjust new " + "packet\n", __func__); retval = -ENOMEM; spin_lock(&rbu_data.lock); goto out_alloc_packet_array; --- head-2011-03-17.orig/drivers/hwmon/coretemp-xen.c 2011-01-31 17:32:29.000000000 +0100 +++ head-2011-03-17/drivers/hwmon/coretemp-xen.c 2011-01-31 17:56:27.000000000 +0100 @@ -49,7 +49,7 @@ static struct coretemp_data *coretemp_up struct pdev_entry { struct list_head list; struct platform_device *pdev; - struct class_device *class_dev; + struct device *hwmon_dev; struct mutex update_lock; const char *name; u8 x86_model, x86_mask; @@ -61,8 +61,6 @@ struct pdev_entry { u8 alarm; }; -static struct coretemp_data *coretemp_update_device(struct device *dev); - /* * Sysfs stuff */ @@ -224,9 +222,9 @@ static int coretemp_probe(struct platfor if ((err = sysfs_create_group(&pdev->dev.kobj, &coretemp_group))) return err; - data->class_dev = hwmon_device_register(&pdev->dev); - if (IS_ERR(data->class_dev)) { - err = PTR_ERR(data->class_dev); + data->hwmon_dev = hwmon_device_register(&pdev->dev); + if (IS_ERR(data->hwmon_dev)) { + err = PTR_ERR(data->hwmon_dev); dev_err(&pdev->dev, "Class registration failed (%d)\n", err); goto exit_class; @@ -243,7 +241,7 @@ static int coretemp_remove(struct platfo { struct coretemp_data *data = platform_get_drvdata(pdev); - hwmon_device_unregister(data->class_dev); + hwmon_device_unregister(data->hwmon_dev); sysfs_remove_group(&pdev->dev.kobj, &coretemp_group); return 0; } @@ -315,9 +313,10 @@ static int coretemp_device_add(unsigned if (err) goto exit_entry_free; - /* check if family 6, models e, f */ + /* check if family 6, models e, f, 16 */ if (info.x86 != 0x6 || - !((pdev_entry->x86_model == 0xe) || (pdev_entry->x86_model == 0xf))) { + !((pdev_entry->x86_model == 0xe) || (pdev_entry->x86_model == 0xf) || + (pdev_entry->x86_model == 0x16))) { /* supported CPU not found, but report the unknown family 6 CPU */ --- head-2011-03-17.orig/drivers/oprofile/cpu_buffer.c 2011-01-31 17:01:49.000000000 +0100 +++ head-2011-03-17/drivers/oprofile/cpu_buffer.c 2011-01-31 17:56:27.000000000 +0100 @@ -415,6 +415,39 @@ void oprofile_add_pc(unsigned long pc, i log_sample(cpu_buf, pc, 0, is_kernel, event); } +#ifdef CONFIG_XEN +/* + * This is basically log_sample(b, ESCAPE_CODE, cpu_mode, CPU_TRACE_BEGIN), + * as was previously accessible through oprofile_add_pc(). + */ +void oprofile_add_mode(int cpu_mode) +{ + struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer); + struct task_struct *task; + + if (nr_available_slots(cpu_buf) < 3) { + cpu_buf->sample_lost_overflow++; + return; + } + + task = current; + + /* notice a switch from user->kernel or vice versa */ + if (cpu_buf->last_cpu_mode != cpu_mode) { + cpu_buf->last_cpu_mode = cpu_mode; + add_code(cpu_buf, cpu_mode); + } + + /* notice a task switch */ + if (cpu_buf->last_task != task) { + cpu_buf->last_task = task; + add_code(cpu_buf, (unsigned long)task); + } + + add_code(cpu_buf, CPU_TRACE_BEGIN); +} +#endif + void oprofile_add_trace(unsigned long pc) { struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(op_cpu_buffer); --- head-2011-03-17.orig/drivers/pci/msi-xen.c 2011-01-31 17:32:29.000000000 +0100 +++ head-2011-03-17/drivers/pci/msi-xen.c 2011-01-31 17:56:27.000000000 +0100 @@ -260,6 +260,12 @@ static int msi_map_vector(struct pci_dev map_irq.pirq : evtchn_map_pirq(-1, map_irq.pirq)); } +static void pci_intx_for_msi(struct pci_dev *dev, int enable) +{ + if (!(dev->dev_flags & PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG)) + pci_intx(dev, enable); +} + #ifdef CONFIG_PM void pci_restore_msi_state(struct pci_dev *dev) { @@ -269,7 +275,7 @@ void pci_restore_msi_state(struct pci_de if (!dev->msi_enabled && !dev->msix_enabled) return; - pci_intx(dev, 0); /* disable intx */ + pci_intx_for_msi(dev, 0); if (dev->msi_enabled) msi_set_enable(dev, 0); if (dev->msix_enabled) @@ -306,7 +312,7 @@ static int msi_capability_init(struct pc return -EBUSY; /* Set MSI enabled bits */ - pci_intx(dev, 0); /* disable intx */ + pci_intx_for_msi(dev, 0); msi_set_enable(dev, 1); dev->msi_enabled = 1; @@ -380,7 +386,7 @@ static int msix_capability_init(struct p return avail; } - pci_intx(dev, 0); /* disable intx */ + pci_intx_for_msi(dev, 0); msix_set_enable(dev, 1); dev->msix_enabled = 1; @@ -516,7 +522,7 @@ void pci_disable_msi(struct pci_dev* dev /* Disable MSI mode */ msi_set_enable(dev, 0); - pci_intx(dev, 1); /* enable intx */ + pci_intx_for_msi(dev, 1); dev->msi_enabled = 0; } EXPORT_SYMBOL(pci_disable_msi); @@ -653,7 +659,7 @@ void pci_disable_msix(struct pci_dev* de /* Disable MSI mode */ msix_set_enable(dev, 0); - pci_intx(dev, 1); /* enable intx */ + pci_intx_for_msi(dev, 1); dev->msix_enabled = 0; } EXPORT_SYMBOL(pci_disable_msix); --- head-2011-03-17.orig/drivers/xen/balloon/balloon.c 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/drivers/xen/balloon/balloon.c 2011-01-31 17:56:27.000000000 +0100 @@ -124,10 +124,8 @@ static struct timer_list balloon_timer; PAGE_TO_LIST(p)->prev = NULL; \ } while(0) -#define IPRINTK(fmt, args...) \ - printk(KERN_INFO "xen_mem: " fmt, ##args) -#define WPRINTK(fmt, args...) \ - printk(KERN_WARNING "xen_mem: " fmt, ##args) +#define IPRINTK(fmt, args...) pr_info("xen_mem: " fmt, ##args) +#define WPRINTK(fmt, args...) pr_warning("xen_mem: " fmt, ##args) /* balloon_append: add the given page to the balloon. */ static void balloon_append(struct page *page, int account) @@ -324,6 +322,8 @@ static int increase_reservation(unsigned #ifndef MODULE setup_per_zone_pages_min(); + if (rc > 0) + kswapd_run(0); if (need_zonelists_rebuild) build_all_zonelists(); else @@ -477,7 +477,7 @@ static int balloon_init_watcher(struct n err = register_xenbus_watch(&target_watch); if (err) - printk(KERN_ERR "Failed to set balloon watcher\n"); + pr_err("Failed to set balloon watcher\n"); return NOTIFY_DONE; } --- head-2011-03-17.orig/drivers/xen/blkback/blkback.c 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/drivers/xen/blkback/blkback.c 2011-01-31 17:56:27.000000000 +0100 @@ -275,13 +275,10 @@ static void __end_block_io_op(pending_re } } -static int end_block_io_op(struct bio *bio, unsigned int done, int error) +static void end_block_io_op(struct bio *bio, int error) { - if (bio->bi_size != 0) - return 1; __end_block_io_op(bio->bi_private, error); bio_put(bio); - return error; } @@ -664,7 +661,7 @@ static int __init blkif_init(void) kfree(pending_reqs); kfree(pending_grant_handles); free_empty_pages_and_pagevec(pending_pages, mmap_pages); - printk("%s: out of memory\n", __FUNCTION__); + pr_warning("%s: out of memory\n", __FUNCTION__); return -ENOMEM; } --- head-2011-03-17.orig/drivers/xen/blkback/blkback-pagemap.c 2009-06-09 15:01:37.000000000 +0200 +++ head-2011-03-17/drivers/xen/blkback/blkback-pagemap.c 2011-01-31 17:56:27.000000000 +0100 @@ -38,8 +38,8 @@ blkback_pagemap_set(int idx, struct page entry = blkback_pagemap + idx; if (!blkback_pagemap_entry_clear(entry)) { - printk("overwriting pagemap %d: d %u b %u g %u\n", - idx, entry->domid, entry->busid, entry->gref); + pr_emerg("overwriting pagemap %d: d %u b %u g %u\n", + idx, entry->domid, entry->busid, entry->gref); BUG(); } @@ -63,7 +63,7 @@ blkback_pagemap_clear(struct page *page) entry = blkback_pagemap + idx; if (blkback_pagemap_entry_clear(entry)) { - printk("clearing empty pagemap %d\n", idx); + pr_emerg("clearing empty pagemap %d\n", idx); BUG(); } @@ -85,7 +85,7 @@ blkback_pagemap_read(struct page *page) entry = blkback_pagemap + idx; if (blkback_pagemap_entry_clear(entry)) { - printk("reading empty pagemap %d\n", idx); + pr_emerg("reading empty pagemap %d\n", idx); BUG(); } --- head-2011-03-17.orig/drivers/xen/blkback/vbd.c 2010-03-22 12:00:53.000000000 +0100 +++ head-2011-03-17/drivers/xen/blkback/vbd.c 2011-01-31 17:56:27.000000000 +0100 @@ -126,18 +126,18 @@ void vbd_resize(blkif_t *blkif) struct xenbus_device *dev = blkif->be->dev; unsigned long long new_size = vbd_size(vbd); - printk(KERN_INFO "VBD Resize: new size %Lu\n", new_size); + pr_info("VBD Resize: new size %Lu\n", new_size); vbd->size = new_size; again: err = xenbus_transaction_start(&xbt); if (err) { - printk(KERN_WARNING "Error starting transaction"); + pr_warning("Error %d starting transaction", err); return; } err = xenbus_printf(xbt, dev->nodename, "sectors", "%Lu", vbd_size(vbd)); if (err) { - printk(KERN_WARNING "Error writing new size"); + pr_warning("Error %d writing new size", err); goto abort; } /* @@ -147,7 +147,7 @@ again: */ err = xenbus_printf(xbt, dev->nodename, "state", "%d", dev->state); if (err) { - printk(KERN_WARNING "Error writing the state"); + pr_warning("Error %d writing the state", err); goto abort; } @@ -155,7 +155,7 @@ again: if (err == -EAGAIN) goto again; if (err) - printk(KERN_WARNING "Error ending transaction"); + pr_warning("Error %d ending transaction", err); abort: xenbus_transaction_end(xbt, 1); } --- head-2011-03-17.orig/drivers/xen/blkfront/blkfront.c 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/drivers/xen/blkfront/blkfront.c 2011-01-31 17:56:27.000000000 +0100 @@ -233,7 +233,7 @@ static int setup_blkring(struct xenbus_d SHARED_RING_INIT(sring); FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); - memset(info->sg, 0, sizeof(info->sg)); + sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST); err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring)); if (err < 0) { @@ -336,8 +336,7 @@ static void connect(struct blkfront_info "sectors", "%Lu", §ors); if (XENBUS_EXIST_ERR(err)) return; - printk(KERN_INFO "Setting capacity to %Lu\n", - sectors); + pr_info("Setting capacity to %Lu\n", sectors); set_capacity(info->gd, sectors); /* fall through */ @@ -591,8 +590,6 @@ int blkif_ioctl(struct inode *inode, str } } - /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", - command);*/ return -EINVAL; /* same return as native Linux */ } @@ -667,9 +664,8 @@ static int blkif_queue_request(struct re ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg); BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST); - for (i = 0; i < ring_req->nr_segments; ++i) { - sg = info->sg + i; - buffer_mfn = page_to_phys(sg->page) >> PAGE_SHIFT; + for_each_sg(info->sg, sg, ring_req->nr_segments, i) { + buffer_mfn = page_to_phys(sg_page(sg)) >> PAGE_SHIFT; fsect = sg->offset >> 9; lsect = fsect + (sg->length >> 9) - 1; /* install a grant reference. */ @@ -785,8 +781,9 @@ static irqreturn_t blkif_int(int irq, vo switch (bret->operation) { case BLKIF_OP_WRITE_BARRIER: if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { - printk("blkfront: %s: write barrier op failed\n", - info->gd->disk_name); + pr_warning("blkfront: %s:" + " write barrier op failed\n", + info->gd->disk_name); uptodate = -EOPNOTSUPP; info->feature_barrier = 0; xlvbd_barrier(info); --- head-2011-03-17.orig/drivers/xen/blkfront/block.h 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/drivers/xen/blkfront/block.h 2011-01-31 17:56:27.000000000 +0100 @@ -59,7 +59,7 @@ #define DPRINTK(_f, _a...) pr_debug(_f, ## _a) #if 0 -#define DPRINTK_IOCTL(_f, _a...) printk(KERN_ALERT _f, ## _a) +#define DPRINTK_IOCTL(_f, _a...) pr_alert(_f, ## _a) #else #define DPRINTK_IOCTL(_f, _a...) ((void)0) #endif --- head-2011-03-17.orig/drivers/xen/blkfront/vbd.c 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/drivers/xen/blkfront/vbd.c 2011-01-31 17:56:27.000000000 +0100 @@ -185,7 +185,8 @@ xlbd_alloc_major_info(int major, int min return NULL; } - printk("xen-vbd: registered block device major %i\n", ptr->major); + pr_info("xen-vbd: registered block device major %i\n", + ptr->major); } ptr->minors = minors; @@ -435,7 +436,8 @@ xlvbd_add(blkif_sector_t capacity, int v if ((vdevice>>EXT_SHIFT) > 1) { /* this is above the extended range; something is wrong */ - printk(KERN_WARNING "blkfront: vdevice 0x%x is above the extended range; ignoring\n", vdevice); + pr_warning("blkfront: vdevice %#x is above the extended" + " range; ignoring\n", vdevice); return -ENODEV; } @@ -494,15 +496,16 @@ xlvbd_barrier(struct blkfront_info *info info->feature_barrier ? QUEUE_ORDERED_DRAIN : QUEUE_ORDERED_NONE, NULL); if (err) return err; - printk(KERN_INFO "blkfront: %s: barriers %s\n", - info->gd->disk_name, info->feature_barrier ? "enabled" : "disabled"); + pr_info("blkfront: %s: barriers %s\n", + info->gd->disk_name, + info->feature_barrier ? "enabled" : "disabled"); return 0; } #else int xlvbd_barrier(struct blkfront_info *info) { - printk(KERN_INFO "blkfront: %s: barriers disabled\n", info->gd->disk_name); + pr_info("blkfront: %s: barriers disabled\n", info->gd->disk_name); return -ENOSYS; } #endif --- head-2011-03-17.orig/drivers/xen/blktap/blktap.c 2011-01-31 17:32:16.000000000 +0100 +++ head-2011-03-17/drivers/xen/blktap/blktap.c 2011-02-17 10:10:44.000000000 +0100 @@ -776,8 +776,8 @@ static int blktap_ioctl(struct inode *in case BLKTAP_IOCTL_PRINT_IDXS: { if (info) { - printk("User Rings: \n-----------\n"); - printk("UF: rsp_cons: %2d, req_prod_prv: %2d " + pr_info("User Rings: \n-----------\n"); + pr_info("UF: rsp_cons: %2d, req_prod_prv: %2d " "| req_prod: %2d, rsp_prod: %2d\n", info->ufe_ring.rsp_cons, info->ufe_ring.req_prod_pvt, --- head-2011-03-17.orig/drivers/xen/blktap/common.h 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/drivers/xen/blktap/common.h 2011-01-31 17:56:27.000000000 +0100 @@ -44,7 +44,7 @@ #define DPRINTK(_f, _a...) pr_debug("(file=%s, line=%d) " _f, \ __FILE__ , __LINE__ , ## _a ) -#define WPRINTK(fmt, args...) printk(KERN_WARNING "blk_tap: " fmt, ##args) +#define WPRINTK(fmt, args...) pr_warning("blktap: " fmt, ##args) struct backend_info; --- head-2011-03-17.orig/drivers/xen/blktap/xenbus.c 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/drivers/xen/blktap/xenbus.c 2011-01-31 17:56:27.000000000 +0100 @@ -359,8 +359,8 @@ static void tap_frontend_changed(struct switch (frontend_state) { case XenbusStateInitialising: if (dev->state == XenbusStateClosed) { - printk(KERN_INFO "%s: %s: prepare for reconnect\n", - __FUNCTION__, dev->nodename); + pr_info("%s: %s: prepare for reconnect\n", + __FUNCTION__, dev->nodename); xenbus_switch_state(dev, XenbusStateInitWait); } break; @@ -461,9 +461,8 @@ static int connect_ring(struct backend_i xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol); return -1; } - printk(KERN_INFO - "blktap: ring-ref %ld, event-channel %d, protocol %d (%s)\n", - ring_ref, evtchn, be->blkif->blk_protocol, protocol); + pr_info("blktap: ring-ref %ld, event-channel %d, protocol %d (%s)\n", + ring_ref, evtchn, be->blkif->blk_protocol, protocol); /* Map the shared frame, irq etc. */ err = tap_blkif_map(be->blkif, ring_ref, evtchn); --- head-2011-03-17.orig/drivers/xen/blktap2/control.c 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/drivers/xen/blktap2/control.c 2011-01-31 17:56:27.000000000 +0100 @@ -18,6 +18,7 @@ blktap_control_initialize_tap(struct blk memset(tap, 0, sizeof(*tap)); set_bit(BLKTAP_CONTROL, &tap->dev_inuse); init_rwsem(&tap->tap_sem); + sg_init_table(tap->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST); init_waitqueue_head(&tap->wq); atomic_set(&tap->refcnt, 0); --- head-2011-03-17.orig/drivers/xen/blktap2/device.c 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/drivers/xen/blktap2/device.c 2011-01-31 17:56:27.000000000 +0100 @@ -16,7 +16,7 @@ #include "../blkback/blkback-pagemap.h" #if 0 -#define DPRINTK_IOCTL(_f, _a...) printk(KERN_ALERT _f, ## _a) +#define DPRINTK_IOCTL(_f, _a...) pr_alert(_f, ## _a) #else #define DPRINTK_IOCTL(_f, _a...) ((void)0) #endif @@ -133,8 +133,6 @@ blktap_device_ioctl(struct inode *inode, return 0; default: - /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n", - command);*/ return -EINVAL; /* same return as native Linux */ } @@ -662,8 +660,7 @@ blktap_device_process_request(struct blk request->nr_pages = 0; blkif_req.nr_segments = blk_rq_map_sg(req->q, req, tap->sg); BUG_ON(blkif_req.nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST); - for (i = 0; i < blkif_req.nr_segments; ++i) { - sg = tap->sg + i; + for_each_sg(tap->sg, sg, blkif_req.nr_segments, i) { fsect = sg->offset >> 9; lsect = fsect + (sg->length >> 9) - 1; nr_sects += sg->length >> 9; @@ -674,13 +671,13 @@ blktap_device_process_request(struct blk .first_sect = fsect, .last_sect = lsect }; - if (PageBlkback(sg->page)) { + if (PageBlkback(sg_page(sg))) { /* foreign page -- use xen */ if (blktap_prep_foreign(tap, request, &blkif_req, i, - sg->page, + sg_page(sg), &table)) goto out; } else { @@ -688,7 +685,7 @@ blktap_device_process_request(struct blk if (blktap_map(tap, request, i, - sg->page)) + sg_page(sg))) goto out; } --- head-2011-03-17.orig/drivers/xen/console/console.c 2011-01-31 17:32:16.000000000 +0100 +++ head-2011-03-17/drivers/xen/console/console.c 2011-01-31 17:56:27.000000000 +0100 @@ -716,10 +716,10 @@ static int __init xencons_init(void) tty_set_operations(xencons_driver, &xencons_ops); if ((rc = tty_register_driver(DRV(xencons_driver))) != 0) { - printk("WARNING: Failed to register Xen virtual " - "console driver as '%s%d'\n", - DRV(xencons_driver)->name, - DRV(xencons_driver)->name_base); + pr_warning("WARNING: Failed to register Xen virtual " + "console driver as '%s%d'\n", + DRV(xencons_driver)->name, + DRV(xencons_driver)->name_base); put_tty_driver(xencons_driver); xencons_driver = NULL; return rc; @@ -736,8 +736,8 @@ static int __init xencons_init(void) BUG_ON(xencons_priv_irq < 0); } - printk("Xen virtual console successfully installed as %s%d\n", - DRV(xencons_driver)->name, xc_num); + pr_info("Xen virtual console successfully installed as %s%d\n", + DRV(xencons_driver)->name, xc_num); return 0; } --- head-2011-03-17.orig/drivers/xen/console/xencons_ring.c 2011-01-31 17:29:16.000000000 +0100 +++ head-2011-03-17/drivers/xen/console/xencons_ring.c 2011-01-31 17:56:27.000000000 +0100 @@ -125,7 +125,7 @@ int xencons_ring_init(void) xen_start_info->console.domU.evtchn, handle_input, 0, "xencons", NULL); if (irq < 0) { - printk(KERN_ERR "XEN console request irq failed %i\n", irq); + pr_err("XEN console request irq failed %i\n", irq); return irq; } --- head-2011-03-17.orig/drivers/xen/core/cpu_hotplug.c 2011-01-24 12:06:05.000000000 +0100 +++ head-2011-03-17/drivers/xen/core/cpu_hotplug.c 2011-01-31 17:56:27.000000000 +0100 @@ -36,7 +36,7 @@ static void vcpu_hotplug(unsigned int cp sprintf(dir, "cpu/%u", cpu); err = xenbus_scanf(XBT_NIL, dir, "availability", "%s", state); if (err != 1) { - printk(KERN_ERR "XENBUS: Unable to read cpu state\n"); + pr_err("XENBUS: Unable to read cpu state\n"); return; } @@ -49,7 +49,7 @@ static void vcpu_hotplug(unsigned int cp if (!cpu_down(cpu) && dev) kobject_uevent(&dev->kobj, KOBJ_OFFLINE); } else { - printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n", + pr_err("XENBUS: unknown state(%s) on CPU%d\n", state, cpu); } } @@ -97,8 +97,7 @@ static int setup_cpu_watcher(struct noti if (!is_initial_xendomain()) { for_each_possible_cpu(i) vcpu_hotplug(i, get_cpu_sysdev(i)); - printk(KERN_INFO "Brought up %ld CPUs\n", - (long)num_online_cpus()); + pr_info("Brought up %ld CPUs\n", (long)num_online_cpus()); } return NOTIFY_DONE; @@ -132,8 +131,7 @@ int smp_suspend(void) continue; err = cpu_down(cpu); if (err) { - printk(KERN_CRIT "Failed to take all CPUs " - "down: %d.\n", err); + pr_crit("Failed to take all CPUs down: %d\n", err); for_each_possible_cpu(cpu) vcpu_hotplug(cpu, NULL); return err; @@ -161,8 +159,8 @@ int cpu_up_check(unsigned int cpu) if (local_cpu_hotplug_request()) { cpu_set(cpu, local_allowed_cpumask); if (!cpu_isset(cpu, xenbus_allowed_cpumask)) { - printk("%s: attempt to bring up CPU %u disallowed by " - "remote admin.\n", __FUNCTION__, cpu); + pr_warning("%s: attempt to bring up CPU %u disallowed " + "by remote admin.\n", __FUNCTION__, cpu); rc = -EBUSY; } } else if (!cpu_isset(cpu, local_allowed_cpumask) || --- head-2011-03-17.orig/drivers/xen/core/evtchn.c 2011-01-31 17:32:22.000000000 +0100 +++ head-2011-03-17/drivers/xen/core/evtchn.c 2011-01-31 17:56:27.000000000 +0100 @@ -350,8 +350,8 @@ static int find_unbound_irq(void) if (!warned) { warned = 1; - printk(KERN_WARNING "No available IRQ to bind to: " - "increase NR_DYNIRQS.\n"); + pr_warning("No available IRQ to bind to: " + "increase NR_DYNIRQS.\n"); } return -ENOSPC; @@ -837,8 +837,7 @@ static void enable_pirq(unsigned int irq bind_pirq.flags = probing_irq(irq) ? 0 : BIND_PIRQ__WILL_SHARE; if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq) != 0) { if (!probing_irq(irq)) - printk(KERN_INFO "Failed to obtain physical IRQ %d\n", - irq); + pr_info("Failed to obtain physical IRQ %d\n", irq); return; } evtchn = bind_pirq.port; @@ -1136,8 +1135,8 @@ int evtchn_map_pirq(int irq, int xen_pir return 0; } else if (type_from_irq(irq) != IRQT_PIRQ || index_from_irq(irq) != xen_pirq) { - printk(KERN_ERR "IRQ#%d is already mapped to %d:%u - " - "cannot map to PIRQ#%u\n", + pr_err("IRQ#%d is already mapped to %d:%u - " + "cannot map to PIRQ#%u\n", irq, type_from_irq(irq), index_from_irq(irq), xen_pirq); return -EINVAL; } --- head-2011-03-17.orig/drivers/xen/core/firmware.c 2007-06-22 09:08:06.000000000 +0200 +++ head-2011-03-17/drivers/xen/core/firmware.c 2011-01-31 17:56:27.000000000 +0100 @@ -1,4 +1,5 @@ #include +#include #include #include #include --- head-2011-03-17.orig/drivers/xen/core/gnttab.c 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/drivers/xen/core/gnttab.c 2011-01-31 17:56:27.000000000 +0100 @@ -691,7 +691,7 @@ int gnttab_resume(void) resume_frames = alloc_xen_mmio(PAGE_SIZE * max_nr_gframes); shared = ioremap(resume_frames, PAGE_SIZE * max_nr_gframes); if (shared == NULL) { - printk("error to ioremap gnttab share frames\n"); + pr_warning("error to ioremap gnttab share frames\n"); return -1; } } --- head-2011-03-17.orig/drivers/xen/core/machine_kexec.c 2009-07-13 14:25:35.000000000 +0200 +++ head-2011-03-17/drivers/xen/core/machine_kexec.c 2011-01-31 17:56:27.000000000 +0100 @@ -29,6 +29,10 @@ void __init xen_machine_kexec_setup_reso int k = 0; int rc; + if (strstr(boot_command_line, "crashkernel=")) + pr_warning("Ignoring crashkernel command line, " + "parameter will be supplied by xen\n"); + if (!is_initial_xendomain()) return; @@ -130,6 +134,13 @@ void __init xen_machine_kexec_setup_reso xen_max_nr_phys_cpus)) goto err; +#ifdef CONFIG_X86 + if (xen_create_contiguous_region((unsigned long)&vmcoreinfo_note, + get_order(sizeof(vmcoreinfo_note)), + BITS_PER_LONG)) + goto err; +#endif + return; err: @@ -213,6 +224,13 @@ NORET_TYPE void machine_kexec(struct kim panic("KEXEC_CMD_kexec hypercall should not return\n"); } +#ifdef CONFIG_X86 +unsigned long paddr_vmcoreinfo_note(void) +{ + return virt_to_machine(&vmcoreinfo_note); +} +#endif + void machine_shutdown(void) { /* do nothing */ --- head-2011-03-17.orig/drivers/xen/core/machine_reboot.c 2011-01-31 17:32:29.000000000 +0100 +++ head-2011-03-17/drivers/xen/core/machine_reboot.c 2011-01-31 17:56:27.000000000 +0100 @@ -204,8 +204,7 @@ int __xen_suspend(int fast_suspend, void #if defined(__i386__) || defined(__x86_64__) if (xen_feature(XENFEAT_auto_translated_physmap)) { - printk(KERN_WARNING "Cannot suspend in " - "auto_translated_physmap mode.\n"); + pr_warning("Can't suspend in auto_translated_physmap mode\n"); return -EOPNOTSUPP; } #endif --- head-2011-03-17.orig/drivers/xen/core/reboot.c 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/drivers/xen/core/reboot.c 2011-01-31 17:56:27.000000000 +0100 @@ -82,14 +82,14 @@ static int xen_suspend(void *__unused) daemonize("suspend"); err = set_cpus_allowed(current, cpumask_of_cpu(0)); if (err) { - printk(KERN_ERR "Xen suspend can't run on CPU0 (%d)\n", err); + pr_err("Xen suspend can't run on CPU0 (%d)\n", err); goto fail; } do { err = __xen_suspend(fast_suspend, xen_resume_notifier); if (err) { - printk(KERN_ERR "Xen suspend failed (%d)\n", err); + pr_err("Xen suspend failed (%d)\n", err); goto fail; } if (!suspend_cancelled) @@ -151,8 +151,8 @@ static void __shutdown_handler(struct wo NULL, CLONE_FS | CLONE_FILES); if (err < 0) { - printk(KERN_WARNING "Error creating shutdown process (%d): " - "retrying...\n", -err); + pr_warning("Error creating shutdown process (%d): " + "retrying...\n", -err); schedule_delayed_work(&shutdown_work, HZ/2); } } @@ -198,7 +198,7 @@ static void shutdown_handler(struct xenb else if (strcmp(str, "halt") == 0) new_state = SHUTDOWN_HALT; else - printk("Ignoring shutdown request: %s\n", str); + pr_warning("Ignoring shutdown request: %s\n", str); switch_shutdown_state(new_state); @@ -217,8 +217,7 @@ static void sysrq_handler(struct xenbus_ if (err) return; if (!xenbus_scanf(xbt, "control", "sysrq", "%c", &sysrq_key)) { - printk(KERN_ERR "Unable to read sysrq code in " - "control/sysrq\n"); + pr_err("Unable to read sysrq code in control/sysrq\n"); xenbus_transaction_end(xbt, 1); return; } @@ -267,7 +266,7 @@ static int setup_suspend_evtchn(void) return -1; port = irq_to_evtchn_port(irq); - printk(KERN_INFO "suspend: event channel %d\n", port); + pr_info("suspend: event channel %d\n", port); sprintf(portstr, "%d", port); xenbus_write(XBT_NIL, "device/suspend", "event-channel", portstr); @@ -280,7 +279,7 @@ static int setup_shutdown_watcher(void) err = register_xenbus_watch(&sysrq_watch); if (err) { - printk(KERN_ERR "Failed to set sysrq watcher\n"); + pr_err("Failed to set sysrq watcher\n"); return err; } @@ -293,14 +292,14 @@ static int setup_shutdown_watcher(void) err = register_xenbus_watch(&shutdown_watch); if (err) { - printk(KERN_ERR "Failed to set shutdown watcher\n"); + pr_err("Failed to set shutdown watcher\n"); return err; } /* suspend event channel */ err = setup_suspend_evtchn(); if (err) { - printk(KERN_ERR "Failed to register suspend event channel\n"); + pr_err("Failed to register suspend event channel\n"); return err; } --- head-2011-03-17.orig/drivers/xen/core/smpboot.c 2011-01-31 17:32:29.000000000 +0100 +++ head-2011-03-17/drivers/xen/core/smpboot.c 2011-01-31 17:56:27.000000000 +0100 @@ -36,31 +36,20 @@ extern void failsafe_callback(void); extern void system_call(void); extern void smp_trap_init(trap_info_t *); -/* Number of siblings per CPU package */ -int smp_num_siblings = 1; - cpumask_t cpu_online_map; EXPORT_SYMBOL(cpu_online_map); cpumask_t cpu_possible_map; EXPORT_SYMBOL(cpu_possible_map); cpumask_t cpu_initialized_map; -struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; -EXPORT_SYMBOL(cpu_data); +DEFINE_PER_CPU(struct cpuinfo_x86, cpu_info); +EXPORT_PER_CPU_SYMBOL(cpu_info); static DEFINE_PER_CPU(int, resched_irq); static DEFINE_PER_CPU(int, callfunc_irq); static char resched_name[NR_CPUS][15]; static char callfunc_name[NR_CPUS][15]; -cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned; -cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned; - -#if defined(__i386__) -u8 x86_cpu_to_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = 0xff }; -EXPORT_SYMBOL(x86_cpu_to_apicid); -#endif - void __init prefill_possible_map(void) { int i, rc; @@ -80,30 +69,6 @@ void __init smp_alloc_memory(void) { } -static inline void -set_cpu_sibling_map(unsigned int cpu) -{ - cpu_data[cpu].phys_proc_id = cpu; - cpu_data[cpu].cpu_core_id = 0; - - cpu_sibling_map[cpu] = cpumask_of_cpu(cpu); - cpu_core_map[cpu] = cpumask_of_cpu(cpu); - - cpu_data[cpu].booted_cores = 1; -} - -static void -remove_siblinginfo(unsigned int cpu) -{ - cpu_data[cpu].phys_proc_id = BAD_APICID; - cpu_data[cpu].cpu_core_id = BAD_APICID; - - cpus_clear(cpu_sibling_map[cpu]); - cpus_clear(cpu_core_map[cpu]); - - cpu_data[cpu].booted_cores = 0; -} - static int __cpuinit xen_smp_intr_init(unsigned int cpu) { int rc; @@ -160,9 +125,9 @@ void __cpuinit cpu_bringup(void) { cpu_init(); #ifdef __i386__ - identify_secondary_cpu(cpu_data + smp_processor_id()); + identify_secondary_cpu(¤t_cpu_data); #else - identify_cpu(cpu_data + smp_processor_id()); + identify_cpu(¤t_cpu_data); #endif touch_softlockup_watchdog(); preempt_disable(); @@ -262,20 +227,9 @@ void __init smp_prepare_cpus(unsigned in apicid = 0; if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, 0, &cpu_id) == 0) apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id); - boot_cpu_data.apicid = apicid; - cpu_data[0] = boot_cpu_data; - - x86_cpu_to_apicid[0] = apicid; - + cpu_data(0) = boot_cpu_data; current_thread_info()->cpu = 0; - for (cpu = 0; cpu < NR_CPUS; cpu++) { - cpus_clear(cpu_sibling_map[cpu]); - cpus_clear(cpu_core_map[cpu]); - } - - set_cpu_sibling_map(0); - if (xen_smp_intr_init(0)) BUG(); @@ -300,8 +254,7 @@ void __init smp_prepare_cpus(unsigned in gdt_descr = &cpu_gdt_descr[cpu]; gdt_descr->address = get_zeroed_page(GFP_KERNEL); if (unlikely(!gdt_descr->address)) { - printk(KERN_CRIT "CPU%d failed to allocate GDT\n", - cpu); + pr_crit("CPU%d failed to allocate GDT\n", cpu); continue; } gdt_descr->size = GDT_SIZE; @@ -316,10 +269,8 @@ void __init smp_prepare_cpus(unsigned in apicid = cpu; if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0) apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id); - cpu_data[cpu] = boot_cpu_data; - cpu_data[cpu].apicid = apicid; - - x86_cpu_to_apicid[cpu] = apicid; + cpu_data(cpu) = boot_cpu_data; + cpu_data(cpu).cpu_index = cpu; #ifdef __x86_64__ cpu_pda(cpu)->pcurrent = idle; @@ -382,8 +333,6 @@ int __cpu_disable(void) if (cpu == 0) return -EBUSY; - remove_siblinginfo(cpu); - cpu_clear(cpu, map); fixup_irqs(map); cpu_clear(cpu, cpu_online_map); @@ -420,14 +369,11 @@ int __cpuinit __cpu_up(unsigned int cpu) alternatives_smp_switch(1); /* This must be done before setting cpu_online_map */ - set_cpu_sibling_map(cpu); wmb(); rc = xen_smp_intr_init(cpu); - if (rc) { - remove_siblinginfo(cpu); + if (rc) return rc; - } cpu_set(cpu, cpu_online_map); --- head-2011-03-17.orig/drivers/xen/fbfront/xenfb.c 2011-02-17 10:08:20.000000000 +0100 +++ head-2011-03-17/drivers/xen/fbfront/xenfb.c 2011-01-31 17:56:27.000000000 +0100 @@ -240,8 +240,8 @@ static void xenfb_update_screen(struct x mutex_unlock(&info->mm_lock); if (x2 < x1 || y2 < y1) { - printk("xenfb_update_screen bogus rect %d %d %d %d\n", - x1, x2, y1, y2); + pr_warning("xenfb_update_screen bogus rect %d %d %d %d\n", + x1, x2, y1, y2); WARN_ON(1); } xenfb_do_update(info, x1, y1, x2 - x1, y2 - y1); --- head-2011-03-17.orig/drivers/xen/fbfront/xenkbd.c 2011-01-31 17:32:22.000000000 +0100 +++ head-2011-03-17/drivers/xen/fbfront/xenkbd.c 2011-01-31 17:56:27.000000000 +0100 @@ -80,8 +80,8 @@ static irqreturn_t input_handler(int rq, input_report_key(dev, event->key.keycode, event->key.pressed); else - printk("xenkbd: unhandled keycode 0x%x\n", - event->key.keycode); + pr_warning("xenkbd: unhandled keycode 0x%x\n", + event->key.keycode); break; case XENKBD_TYPE_POS: if (event->pos.rel_z) --- head-2011-03-17.orig/drivers/xen/gntdev/gntdev.c 2011-01-31 17:29:16.000000000 +0100 +++ head-2011-03-17/drivers/xen/gntdev/gntdev.c 2011-01-31 17:56:27.000000000 +0100 @@ -378,14 +378,14 @@ static int __init gntdev_init(void) struct class_device *device; if (!is_running_on_xen()) { - printk(KERN_ERR "You must be running Xen to use gntdev\n"); + pr_err("You must be running Xen to use gntdev\n"); return -ENODEV; } gntdev_major = register_chrdev(0, GNTDEV_NAME, &gntdev_fops); if (gntdev_major < 0) { - printk(KERN_ERR "Could not register gntdev device\n"); + pr_err("Could not register gntdev device\n"); return -ENOMEM; } @@ -394,18 +394,16 @@ static int __init gntdev_init(void) * created manually using mknod. */ if ((class = get_xen_class()) == NULL) { - printk(KERN_ERR "Error setting up xen_class\n"); - printk(KERN_ERR "gntdev created with major number = %d\n", - gntdev_major); + pr_err("Error setting up xen_class\n"); + pr_err("gntdev created, major number = %d\n", gntdev_major); return 0; } device = class_device_create(class, NULL, MKDEV(gntdev_major, 0), NULL, GNTDEV_NAME); if (IS_ERR(device)) { - printk(KERN_ERR "Error creating gntdev device in xen_class\n"); - printk(KERN_ERR "gntdev created with major number = %d\n", - gntdev_major); + pr_err("Error creating gntdev device in xen_class\n"); + pr_err("gntdev created, major number = %d\n", gntdev_major); return 0; } @@ -491,7 +489,7 @@ static int gntdev_mmap (struct file *fli gntdev_file_private_data_t *private_data = flip->private_data; if (unlikely(!private_data)) { - printk(KERN_ERR "File's private data is NULL.\n"); + pr_err("file's private data is NULL\n"); return -EINVAL; } @@ -499,21 +497,21 @@ static int gntdev_mmap (struct file *fli down_read(&private_data->grants_sem); if (unlikely(!private_data->grants)) { up_read(&private_data->grants_sem); - printk(KERN_ERR "Attempted to mmap before ioctl.\n"); + pr_err("attempted to mmap before ioctl\n"); return -EINVAL; } up_read(&private_data->grants_sem); if (unlikely((size <= 0) || (size + slot_index) > private_data->grants_size)) { - printk(KERN_ERR "Invalid number of pages or offset" - "(num_pages = %d, first_slot = %ld).\n", + pr_err("Invalid number of pages or offset" + "(num_pages = %d, first_slot = %ld)\n", size, slot_index); return -ENXIO; } if ((vma->vm_flags & VM_WRITE) && !(vma->vm_flags & VM_SHARED)) { - printk(KERN_ERR "Writable mappings must be shared.\n"); + pr_err("writable mappings must be shared\n"); return -EINVAL; } @@ -522,8 +520,8 @@ static int gntdev_mmap (struct file *fli for (i = 0; i < size; ++i) { if (private_data->grants[slot_index + i].state != GNTDEV_SLOT_NOT_YET_MAPPED) { - printk(KERN_ERR "Slot (index = %ld) is in the wrong " - "state (%d).\n", slot_index + i, + pr_err("Slot (index = %ld) is in the wrong " + "state (%d)\n", slot_index + i, private_data->grants[slot_index + i].state); up_write(&private_data->grants_sem); return -EINVAL; @@ -538,8 +536,7 @@ static int gntdev_mmap (struct file *fli vma->vm_private_data = kzalloc(size * sizeof(struct page *), GFP_KERNEL); if (vma->vm_private_data == NULL) { - printk(KERN_ERR "Couldn't allocate mapping structure for VM " - "area.\n"); + pr_err("couldn't allocate mapping structure for VM area\n"); return -ENOMEM; } @@ -584,7 +581,7 @@ static int gntdev_mmap (struct file *fli BUG_ON(ret); if (op.status != GNTST_okay) { if (op.status != GNTST_eagain) - printk(KERN_ERR "Error mapping the grant reference " + pr_err("Error mapping the grant reference " "into the kernel (%d). domid = %d; ref = %d\n", op.status, private_data->grants[slot_index+i] @@ -631,8 +628,8 @@ static int gntdev_mmap (struct file *fli + (i << PAGE_SHIFT), &ptep))) { - printk(KERN_ERR "Error obtaining PTE pointer " - "(%d).\n", ret); + pr_err("Error obtaining PTE pointer (%d)\n", + ret); goto undo_map_out; } @@ -663,7 +660,7 @@ static int gntdev_mmap (struct file *fli &op, 1); BUG_ON(ret); if (op.status != GNTST_okay) { - printk(KERN_ERR "Error mapping the grant " + pr_err("Error mapping the grant " "reference into user space (%d). domid " "= %d; ref = %d\n", op.status, private_data->grants[slot_index+i].u @@ -770,8 +767,8 @@ static pte_t gntdev_clear_pte(struct vm_ GNTTABOP_unmap_grant_ref, &op, 1); BUG_ON(ret); if (op.status != GNTST_okay) - printk("User unmap grant status = %d\n", - op.status); + pr_warning("User unmap grant status = %d\n", + op.status); } else { /* USING SHADOW PAGE TABLES. */ copy = ptep_get_and_clear_full(vma->vm_mm, addr, ptep, is_fullmm); @@ -787,7 +784,8 @@ static pte_t gntdev_clear_pte(struct vm_ &op, 1); BUG_ON(ret); if (op.status != GNTST_okay) - printk("Kernel unmap grant status = %d\n", op.status); + pr_warning("Kernel unmap grant status = %d\n", + op.status); /* Return slot to the not-yet-mapped state, so that it may be @@ -845,8 +843,7 @@ static long gntdev_ioctl(struct file *fl up_write(&private_data->grants_sem); if (rc) { - printk (KERN_ERR "Initialising gntdev private data " - "failed.\n"); + pr_err("Initialising gntdev private data failed\n"); return rc; } } @@ -888,22 +885,22 @@ private_data_initialised: if (op.count == 1) { if ((rc = add_grant_reference(private_data, op.refs, &op.index)) < 0) { - printk(KERN_ERR "Adding grant reference " - "failed (%d).\n", rc); + pr_err("Adding grant reference failed (%d)\n", + rc); goto map_out; } } else { if ((rc = find_contiguous_free_range(private_data, op.count)) < 0) { - printk(KERN_ERR "Finding contiguous range " - "failed (%d).\n", rc); + pr_err("Finding contiguous range failed" + " (%d)\n", rc); goto map_out; } op.index = rc << PAGE_SHIFT; if ((rc = add_grant_references(private_data, op.count, refs, rc))) { - printk(KERN_ERR "Adding grant references " - "failed (%d).\n", rc); + pr_err("Adding grant references failed (%d)\n", + rc); goto map_out; } compress_free_list(private_data); @@ -942,15 +939,13 @@ private_data_initialised: != GNTDEV_SLOT_NOT_YET_MAPPED)) { if (private_data->grants[start_index + i].state == GNTDEV_SLOT_INVALID) { - printk(KERN_ERR - "Tried to remove an invalid " + pr_err("Tried to remove an invalid " "grant at offset 0x%x.", (start_index + i) << PAGE_SHIFT); rc = -EINVAL; } else { - printk(KERN_ERR - "Tried to remove a grant which " + pr_err("Tried to remove a grant which " "is currently mmap()-ed at " "offset 0x%x.", (start_index + i) @@ -998,7 +993,7 @@ private_data_initialised: goto get_offset_out; } if (vma->vm_start != vaddr) { - printk(KERN_ERR "The vaddr specified in an " + pr_err("The vaddr specified in an " "IOCTL_GNTDEV_GET_OFFSET_FOR_VADDR must be at " "the start of the VM area. vma->vm_start = " "%#lx; vaddr = %#lx\n", --- head-2011-03-17.orig/drivers/xen/netback/common.h 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/drivers/xen/netback/common.h 2011-01-31 17:56:27.000000000 +0100 @@ -49,10 +49,8 @@ #define DPRINTK(_f, _a...) \ pr_debug("(file=%s, line=%d) " _f, \ __FILE__ , __LINE__ , ## _a ) -#define IPRINTK(fmt, args...) \ - printk(KERN_INFO "xen_net: " fmt, ##args) -#define WPRINTK(fmt, args...) \ - printk(KERN_WARNING "xen_net: " fmt, ##args) +#define IPRINTK(fmt, args...) pr_info("xen_net: " fmt, ##args) +#define WPRINTK(fmt, args...) pr_warning("xen_net: " fmt, ##args) typedef struct netif_st { /* Unique identifier for this interface. */ --- head-2011-03-17.orig/drivers/xen/netback/interface.c 2011-02-17 10:10:00.000000000 +0100 +++ head-2011-03-17/drivers/xen/netback/interface.c 2011-03-17 14:12:41.000000000 +0100 @@ -173,9 +173,13 @@ static const struct netif_stat { { "rx_gso_csum_fixups", offsetof(netif_t, rx_gso_csum_fixups) / sizeof(long) }, }; -static int netbk_get_stats_count(struct net_device *dev) +static int netbk_get_sset_count(struct net_device *dev, int sset) { - return ARRAY_SIZE(netbk_stats); + switch (sset) { + case ETH_SS_STATS: + return ARRAY_SIZE(netbk_stats); + } + return -EOPNOTSUPP; } static void netbk_get_ethtool_stats(struct net_device *dev, @@ -213,7 +217,7 @@ static struct ethtool_ops network_ethtoo .set_tso = netbk_set_tso, .get_link = ethtool_op_get_link, - .get_stats_count = netbk_get_stats_count, + .get_sset_count = netbk_get_sset_count, .get_ethtool_stats = netbk_get_ethtool_stats, .get_strings = netbk_get_strings, }; --- head-2011-03-17.orig/drivers/xen/netback/netback.c 2011-02-09 15:54:33.000000000 +0100 +++ head-2011-03-17/drivers/xen/netback/netback.c 2011-02-09 15:55:20.000000000 +0100 @@ -362,8 +362,8 @@ static void xen_network_done_notify(void { static struct net_device *eth0_dev = NULL; if (unlikely(eth0_dev == NULL)) - eth0_dev = __dev_get_by_name("eth0"); - netif_rx_schedule(eth0_dev); + eth0_dev = __dev_get_by_name(&init_net, "eth0"); + netif_rx_schedule(eth0_dev, ???); } /* * Add following to poll() function in NAPI driver (Tigon3 is example): @@ -1605,28 +1605,30 @@ static irqreturn_t netif_be_dbg(int irq, netif_t *netif; int i = 0; - printk(KERN_ALERT "netif_schedule_list:\n"); + pr_alert("netif_schedule_list:\n"); spin_lock_irq(&net_schedule_list_lock); list_for_each (ent, &net_schedule_list) { netif = list_entry(ent, netif_t, list); - printk(KERN_ALERT " %d: private(rx_req_cons=%08x " - "rx_resp_prod=%08x\n", - i, netif->rx.req_cons, netif->rx.rsp_prod_pvt); - printk(KERN_ALERT " tx_req_cons=%08x tx_resp_prod=%08x)\n", - netif->tx.req_cons, netif->tx.rsp_prod_pvt); - printk(KERN_ALERT " shared(rx_req_prod=%08x " - "rx_resp_prod=%08x\n", - netif->rx.sring->req_prod, netif->rx.sring->rsp_prod); - printk(KERN_ALERT " rx_event=%08x tx_req_prod=%08x\n", - netif->rx.sring->rsp_event, netif->tx.sring->req_prod); - printk(KERN_ALERT " tx_resp_prod=%08x, tx_event=%08x)\n", - netif->tx.sring->rsp_prod, netif->tx.sring->rsp_event); + pr_alert(" %d: private(rx_req_cons=%08x " + "rx_resp_prod=%08x\n", + i, netif->rx.req_cons, netif->rx.rsp_prod_pvt); + pr_alert(" tx_req_cons=%08x tx_resp_prod=%08x)\n", + netif->tx.req_cons, netif->tx.rsp_prod_pvt); + pr_alert(" shared(rx_req_prod=%08x " + "rx_resp_prod=%08x\n", + netif->rx.sring->req_prod, netif->rx.sring->rsp_prod); + pr_alert(" rx_event=%08x tx_req_prod=%08x\n", + netif->rx.sring->rsp_event, + netif->tx.sring->req_prod); + pr_alert(" tx_resp_prod=%08x, tx_event=%08x)\n", + netif->tx.sring->rsp_prod, + netif->tx.sring->rsp_event); i++; } spin_unlock_irq(&net_schedule_list_lock); - printk(KERN_ALERT " ** End of netif_schedule_list **\n"); + pr_alert(" ** End of netif_schedule_list **\n"); return IRQ_HANDLED; } @@ -1656,7 +1658,7 @@ static int __init netback_init(void) mmap_pages = alloc_empty_pages_and_pagevec(MAX_PENDING_REQS); if (mmap_pages == NULL) { - printk("%s: out of memory\n", __FUNCTION__); + pr_err("%s: out of memory\n", __FUNCTION__); return -ENOMEM; } --- head-2011-03-17.orig/drivers/xen/netback/xenbus.c 2011-01-31 17:32:29.000000000 +0100 +++ head-2011-03-17/drivers/xen/netback/xenbus.c 2011-01-31 17:56:27.000000000 +0100 @@ -163,11 +163,9 @@ fail: * and vif variables to the environment, for the benefit of the vif-* hotplug * scripts. */ -static int netback_uevent(struct xenbus_device *xdev, char **envp, - int num_envp, char *buffer, int buffer_size) +static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *env) { struct backend_info *be; - int i = 0, length = 0; char *val; DPRINTK("netback_uevent"); @@ -178,21 +176,16 @@ static int netback_uevent(struct xenbus_ xenbus_dev_fatal(xdev, err, "reading script"); return err; } - else { - add_uevent_var(envp, num_envp, &i, buffer, buffer_size, - &length, "script=%s", val); - kfree(val); - } + + add_uevent_var(env, "script=%s", val); + kfree(val); down_read(&teardown_sem); be = xdev->dev.driver_data; if (be && be->netif) - add_uevent_var(envp, num_envp, &i, buffer, buffer_size, - &length, "vif=%s", be->netif->dev->name); + add_uevent_var(env, "vif=%s", be->netif->dev->name); up_read(&teardown_sem); - envp[i] = NULL; - return 0; } @@ -240,8 +233,8 @@ static void frontend_changed(struct xenb switch (frontend_state) { case XenbusStateInitialising: if (dev->state == XenbusStateClosed) { - printk(KERN_INFO "%s: %s: prepare for reconnect\n", - __FUNCTION__, dev->nodename); + pr_info("%s: %s: prepare for reconnect\n", + __FUNCTION__, dev->nodename); xenbus_switch_state(dev, XenbusStateInitWait); } break; --- head-2011-03-17.orig/drivers/xen/netfront/accel.c 2011-01-31 17:32:29.000000000 +0100 +++ head-2011-03-17/drivers/xen/netfront/accel.c 2011-01-31 17:56:27.000000000 +0100 @@ -40,10 +40,8 @@ #define DPRINTK(fmt, args...) \ pr_debug("netfront/accel (%s:%d) " fmt, \ __FUNCTION__, __LINE__, ##args) -#define IPRINTK(fmt, args...) \ - printk(KERN_INFO "netfront/accel: " fmt, ##args) -#define WPRINTK(fmt, args...) \ - printk(KERN_WARNING "netfront/accel: " fmt, ##args) +#define IPRINTK(fmt, args...) pr_info("netfront/accel: " fmt, ##args) +#define WPRINTK(fmt, args...) pr_warning("netfront/accel: " fmt, ##args) static int netfront_remove_accelerator(struct netfront_info *np, struct xenbus_device *dev); @@ -325,7 +323,7 @@ accelerator_set_vif_state_hooks(struct n DPRINTK("%p\n",vif_state); /* Make sure there are no data path operations going on */ - netif_poll_disable(vif_state->np->netdev); + napi_disable(&vif_state->np->napi); netif_tx_lock_bh(vif_state->np->netdev); accelerator = vif_state->np->accelerator; @@ -334,7 +332,7 @@ accelerator_set_vif_state_hooks(struct n spin_unlock_irqrestore(&accelerator->vif_states_lock, flags); netif_tx_unlock_bh(vif_state->np->netdev); - netif_poll_enable(vif_state->np->netdev); + napi_enable(&vif_state->np->napi); } @@ -508,7 +506,7 @@ accelerator_remove_single_hook(struct ne unsigned long flags; /* Make sure there are no data path operations going on */ - netif_poll_disable(vif_state->np->netdev); + napi_disable(&vif_state->np->napi); netif_tx_lock_bh(vif_state->np->netdev); spin_lock_irqsave(&accelerator->vif_states_lock, flags); @@ -524,7 +522,7 @@ accelerator_remove_single_hook(struct ne spin_unlock_irqrestore(&accelerator->vif_states_lock, flags); netif_tx_unlock_bh(vif_state->np->netdev); - netif_poll_enable(vif_state->np->netdev); + napi_enable(&vif_state->np->napi); } --- head-2011-03-17.orig/drivers/xen/netfront/netfront.c 2011-02-09 15:54:17.000000000 +0100 +++ head-2011-03-17/drivers/xen/netfront/netfront.c 2011-02-09 16:00:35.000000000 +0100 @@ -207,10 +207,8 @@ static inline grant_ref_t xennet_get_rx_ #define DPRINTK(fmt, args...) \ pr_debug("netfront (%s:%d) " fmt, \ __FUNCTION__, __LINE__, ##args) -#define IPRINTK(fmt, args...) \ - printk(KERN_INFO "netfront: " fmt, ##args) -#define WPRINTK(fmt, args...) \ - printk(KERN_WARNING "netfront: " fmt, ##args) +#define IPRINTK(fmt, args...) pr_info("netfront: " fmt, ##args) +#define WPRINTK(fmt, args...) pr_warning("netfront: " fmt, ##args) static int setup_device(struct xenbus_device *, struct netfront_info *); static struct net_device *create_netdev(struct xenbus_device *); @@ -262,16 +260,16 @@ static int __devinit netfront_probe(stru err = register_netdev(info->netdev); if (err) { - printk(KERN_WARNING "%s: register_netdev err=%d\n", - __FUNCTION__, err); + pr_warning("%s: register_netdev err=%d\n", + __FUNCTION__, err); goto fail; } err = xennet_sysfs_addif(info->netdev); if (err) { unregister_netdev(info->netdev); - printk(KERN_WARNING "%s: add sysfs failed err=%d\n", - __FUNCTION__, err); + pr_warning("%s: add sysfs failed err=%d\n", + __FUNCTION__, err); goto fail; } @@ -621,11 +619,12 @@ int netfront_check_queue_ready(struct ne } EXPORT_SYMBOL(netfront_check_queue_ready); - static int network_open(struct net_device *dev) { struct netfront_info *np = netdev_priv(dev); + napi_enable(&np->napi); + spin_lock_bh(&np->rx_lock); if (netfront_carrier_ok(np)) { network_alloc_rx_buffers(dev); @@ -633,7 +632,7 @@ static int network_open(struct net_devic if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)){ netfront_accelerator_call_stop_napi_irq(np, dev); - netif_rx_schedule(dev); + netif_rx_schedule(dev, &np->napi); } } spin_unlock_bh(&np->rx_lock); @@ -667,9 +666,8 @@ static void network_tx_buf_gc(struct net skb = np->tx_skbs[id]; if (unlikely(gnttab_query_foreign_access( np->grant_tx_ref[id]) != 0)) { - printk(KERN_ALERT "network_tx_buf_gc: warning " - "-- grant still in use by backend " - "domain.\n"); + pr_alert("network_tx_buf_gc: grant still" + " in use by backend domain\n"); BUG(); } gnttab_end_foreign_access_ref(np->grant_tx_ref[id]); @@ -705,7 +703,7 @@ static void rx_refill_timeout(unsigned l netfront_accelerator_call_stop_napi_irq(np, dev); - netif_rx_schedule(dev); + netif_rx_schedule(dev, &np->napi); } static void network_alloc_rx_buffers(struct net_device *dev) @@ -956,8 +954,7 @@ static int network_start_xmit(struct sk_ frags += (offset + len + PAGE_SIZE - 1) / PAGE_SIZE; if (unlikely(frags > MAX_SKB_FRAGS + 1)) { - printk(KERN_ALERT "xennet: skb rides the rocket: %d frags\n", - frags); + pr_alert("xennet: skb rides the rocket: %d frags\n", frags); dump_stack(); goto drop; } @@ -1060,7 +1057,7 @@ static irqreturn_t netif_int(int irq, vo if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) { netfront_accelerator_call_stop_napi_irq(np, dev); - netif_rx_schedule(dev); + netif_rx_schedule(dev, &np->napi); dev->last_rx = jiffies; } } @@ -1313,16 +1310,17 @@ static int xennet_set_skb_gso(struct sk_ #endif } -static int netif_poll(struct net_device *dev, int *pbudget) +static int netif_poll(struct napi_struct *napi, int budget) { - struct netfront_info *np = netdev_priv(dev); + struct netfront_info *np = container_of(napi, struct netfront_info, napi); + struct net_device *dev = np->netdev; struct sk_buff *skb; struct netfront_rx_info rinfo; struct netif_rx_response *rx = &rinfo.rx; struct netif_extra_info *extras = rinfo.extras; RING_IDX i, rp; struct multicall_entry *mcl; - int work_done, budget, more_to_do = 1, accel_more_to_do = 1; + int work_done, more_to_do = 1, accel_more_to_do = 1; struct sk_buff_head rxq; struct sk_buff_head errq; struct sk_buff_head tmpq; @@ -1342,8 +1340,6 @@ static int netif_poll(struct net_device skb_queue_head_init(&errq); skb_queue_head_init(&tmpq); - if ((budget = *pbudget) > dev->quota) - budget = dev->quota; rp = np->rx.sring->rsp_prod; rmb(); /* Ensure we see queued responses up to 'rp'. */ @@ -1505,9 +1501,6 @@ err: accel_more_to_do = 0; } - *pbudget -= work_done; - dev->quota -= work_done; - if (work_done < budget) { local_irq_save(flags); @@ -1524,14 +1517,14 @@ err: } if (!more_to_do && !accel_more_to_do) - __netif_rx_complete(dev); + __netif_rx_complete(dev, napi); local_irq_restore(flags); } spin_unlock(&np->rx_lock); - return more_to_do | accel_more_to_do; + return work_done; } static void netif_release_tx_bufs(struct netfront_info *np) @@ -1678,6 +1671,7 @@ static int network_close(struct net_devi { struct netfront_info *np = netdev_priv(dev); netif_stop_queue(np->netdev); + napi_disable(&np->napi); return 0; } @@ -1777,9 +1771,13 @@ static const struct xennet_stat { }, }; -static int xennet_get_stats_count(struct net_device *dev) +static int xennet_get_sset_count(struct net_device *dev, int sset) { - return ARRAY_SIZE(xennet_stats); + switch (sset) { + case ETH_SS_STATS: + return ARRAY_SIZE(xennet_stats); + } + return -EOPNOTSUPP; } static void xennet_get_ethtool_stats(struct net_device *dev, @@ -1931,7 +1929,7 @@ static struct ethtool_ops network_ethtoo #endif .get_link = ethtool_op_get_link, - .get_stats_count = xennet_get_stats_count, + .get_sset_count = xennet_get_sset_count, .get_ethtool_stats = xennet_get_ethtool_stats, .get_strings = xennet_get_strings, }; @@ -2081,8 +2079,7 @@ static struct net_device * __devinit cre netdev = alloc_etherdev(sizeof(struct netfront_info)); if (!netdev) { - printk(KERN_WARNING "%s> alloc_etherdev failed.\n", - __FUNCTION__); + pr_warning("%s: alloc_etherdev failed\n", __FUNCTION__); return ERR_PTR(-ENOMEM); } @@ -2117,14 +2114,14 @@ static struct net_device * __devinit cre /* A grant for every tx ring slot */ if (gnttab_alloc_grant_references(TX_MAX_TARGET, &np->gref_tx_head) < 0) { - printk(KERN_ALERT "#### netfront can't alloc tx grant refs\n"); + pr_alert("#### netfront can't alloc tx grant refs\n"); err = -ENOMEM; goto exit; } /* A grant for every rx ring slot */ if (gnttab_alloc_grant_references(RX_MAX_TARGET, &np->gref_rx_head) < 0) { - printk(KERN_ALERT "#### netfront can't alloc rx grant refs\n"); + pr_alert("#### netfront can't alloc rx grant refs\n"); err = -ENOMEM; goto exit_free_tx; } @@ -2133,16 +2130,14 @@ static struct net_device * __devinit cre netdev->hard_start_xmit = network_start_xmit; netdev->stop = network_close; netdev->get_stats = network_get_stats; - netdev->poll = netif_poll; + netif_napi_add(netdev, &np->napi, netif_poll, 64); netdev->set_multicast_list = network_set_multicast_list; netdev->uninit = netif_uninit; netdev->set_mac_address = xennet_set_mac_address; netdev->change_mtu = xennet_change_mtu; - netdev->weight = 64; netdev->features = NETIF_F_IP_CSUM; SET_ETHTOOL_OPS(netdev, &network_ethtool_ops); - SET_MODULE_OWNER(netdev); SET_NETDEV_DEV(netdev, &dev->dev); np->netdev = netdev; --- head-2011-03-17.orig/drivers/xen/netfront/netfront.h 2011-02-09 15:54:19.000000000 +0100 +++ head-2011-03-17/drivers/xen/netfront/netfront.h 2011-01-31 17:56:27.000000000 +0100 @@ -155,6 +155,8 @@ struct netfront_info { spinlock_t tx_lock; spinlock_t rx_lock; + struct napi_struct napi; + unsigned int irq; unsigned int copying_receiver; unsigned int carrier; --- head-2011-03-17.orig/drivers/xen/pciback/Makefile 2008-07-21 11:00:33.000000000 +0200 +++ head-2011-03-17/drivers/xen/pciback/Makefile 2011-01-31 17:56:27.000000000 +0100 @@ -12,6 +12,4 @@ pciback-$(CONFIG_XEN_PCIDEV_BACKEND_SLOT pciback-$(CONFIG_XEN_PCIDEV_BACKEND_PASS) += passthrough.o pciback-$(CONFIG_XEN_PCIDEV_BACKEND_CONTROLLER) += controller.o -ifeq ($(CONFIG_XEN_PCIDEV_BE_DEBUG),y) -EXTRA_CFLAGS += -DDEBUG -endif +ccflags-$(CONFIG_XEN_PCIDEV_BE_DEBUG) += -DDEBUG --- head-2011-03-17.orig/drivers/xen/pciback/conf_space_capability_msi.c 2008-09-15 13:40:15.000000000 +0200 +++ head-2011-03-17/drivers/xen/pciback/conf_space_capability_msi.c 2011-01-31 17:56:27.000000000 +0100 @@ -17,7 +17,8 @@ int pciback_enable_msi(struct pciback_de status = pci_enable_msi(dev); if (status) { - printk("error enable msi for guest %x status %x\n", otherend, status); + pr_err("error enable msi for guest %x status %x\n", + otherend, status); op->value = 0; return XEN_PCI_ERR_op_failed; } --- head-2011-03-17.orig/drivers/xen/pciback/conf_space_header.c 2011-01-31 17:32:16.000000000 +0100 +++ head-2011-03-17/drivers/xen/pciback/conf_space_header.c 2011-01-31 17:56:27.000000000 +0100 @@ -69,9 +69,9 @@ static int command_write(struct pci_dev pci_name(dev)); err = pci_set_mwi(dev); if (err) { - printk(KERN_WARNING - "pciback: %s: cannot enable memory-write-invalidate (%d)\n", - pci_name(dev), err); + pr_warning("pciback: %s: cannot enable" + " memory-write-invalidate (%d)\n", + pci_name(dev), err); value &= ~PCI_COMMAND_INVALIDATE; } } @@ -84,8 +84,8 @@ static int rom_write(struct pci_dev *dev struct pci_bar_info *bar = data; if (unlikely(!bar)) { - printk(KERN_WARNING "pciback: driver data not found for %s\n", - pci_name(dev)); + pr_warning("pciback: driver data not found for %s\n", + pci_name(dev)); return XEN_PCI_ERR_op_failed; } @@ -118,8 +118,8 @@ static int bar_write(struct pci_dev *dev struct pci_bar_info *bar = data; if (unlikely(!bar)) { - printk(KERN_WARNING "pciback: driver data not found for %s\n", - pci_name(dev)); + pr_warning("pciback: driver data not found for %s\n", + pci_name(dev)); return XEN_PCI_ERR_op_failed; } @@ -146,8 +146,8 @@ static int bar_read(struct pci_dev *dev, struct pci_bar_info *bar = data; if (unlikely(!bar)) { - printk(KERN_WARNING "pciback: driver data not found for %s\n", - pci_name(dev)); + pr_warning("pciback: driver data not found for %s\n", + pci_name(dev)); return XEN_PCI_ERR_op_failed; } @@ -368,7 +368,7 @@ int pciback_config_header_add_fields(str default: err = -EINVAL; - printk(KERN_ERR "pciback: %s: Unsupported header type %d!\n", + pr_err("pciback: %s: Unsupported header type %d!\n", pci_name(dev), dev->hdr_type); break; } --- head-2011-03-17.orig/drivers/xen/pciback/pci_stub.c 2011-03-02 12:00:16.000000000 +0100 +++ head-2011-03-17/drivers/xen/pciback/pci_stub.c 2011-03-11 10:55:55.000000000 +0100 @@ -470,15 +470,15 @@ static void pcistub_remove(struct pci_de found_psdev->pdev); if (found_psdev->pdev) { - printk(KERN_WARNING "pciback: ****** removing device " - "%s while still in-use! ******\n", - pci_name(found_psdev->dev)); - printk(KERN_WARNING "pciback: ****** driver domain may " - "still access this device's i/o resources!\n"); - printk(KERN_WARNING "pciback: ****** shutdown driver " - "domain before binding device\n"); - printk(KERN_WARNING "pciback: ****** to other drivers " - "or domains\n"); + pr_warning("pciback: ****** removing device %s" + " while still in-use! ******\n", + pci_name(found_psdev->dev)); + pr_warning("pciback: ****** driver domain may still" + " access this device's i/o resources!\n"); + pr_warning("pciback: ****** shutdown driver " + "domain before binding device\n"); + pr_warning("pciback: ****** to other drivers " + "or domains\n"); pciback_release_pci_dev(found_psdev->pdev, found_psdev->dev); @@ -1261,7 +1261,7 @@ static int __init pcistub_init(void) return err; parse_error: - printk(KERN_ERR "pciback: Error parsing pci_devs_to_hide at \"%s\"\n", + pr_err("pciback: Error parsing pci_devs_to_hide at \"%s\"\n", pci_devs_to_hide + pos); return -EINVAL; } --- head-2011-03-17.orig/drivers/xen/pciback/slot.c 2009-03-18 10:39:32.000000000 +0100 +++ head-2011-03-17/drivers/xen/pciback/slot.c 2011-01-31 17:56:27.000000000 +0100 @@ -64,9 +64,9 @@ int pciback_add_pci_dev(struct pciback_d for (bus = 0; bus < PCI_BUS_NBR; bus++) for (slot = 0; slot < PCI_SLOT_MAX; slot++) { if (slot_dev->slots[bus][slot] == NULL) { - printk(KERN_INFO - "pciback: slot: %s: assign to virtual slot %d, bus %d\n", - pci_name(dev), slot, bus); + pr_info("pciback: slot: %s: assign to" + " virtual slot %d, bus %d\n", + pci_name(dev), slot, bus); slot_dev->slots[bus][slot] = dev; goto unlock; } --- head-2011-03-17.orig/drivers/xen/pciback/vpci.c 2009-03-18 10:39:32.000000000 +0100 +++ head-2011-03-17/drivers/xen/pciback/vpci.c 2011-01-31 17:56:27.000000000 +0100 @@ -111,9 +111,9 @@ int pciback_add_pci_dev(struct pciback_d /* Assign to a new slot on the virtual PCI bus */ for (slot = 0; slot < PCI_SLOT_MAX; slot++) { if (list_empty(&vpci_dev->dev_list[slot])) { - printk(KERN_INFO - "pciback: vpci: %s: assign to virtual slot %d\n", - pci_name(dev), slot); + pr_info("pciback: vpci: %s:" + " assign to virtual slot %d\n", + pci_name(dev), slot); list_add_tail(&dev_entry->list, &vpci_dev->dev_list[slot]); func = PCI_FUNC(dev->devfn); --- head-2011-03-17.orig/drivers/xen/pciback/xenbus.c 2011-01-31 17:32:29.000000000 +0100 +++ head-2011-03-17/drivers/xen/pciback/xenbus.c 2011-01-31 17:56:27.000000000 +0100 @@ -695,8 +695,7 @@ int __init pciback_xenbus_register(void) return -ENODEV; pciback_wq = create_workqueue("pciback_workqueue"); if (!pciback_wq) { - printk(KERN_ERR "pciback_xenbus_register: create" - "pciback_workqueue failed\n"); + pr_err("pciback_xenbus_register: create workqueue failed\n"); return -EFAULT; } return xenbus_register_backend(&xenbus_pciback_driver); --- head-2011-03-17.orig/drivers/xen/pcifront/Makefile 2007-06-12 13:13:45.000000000 +0200 +++ head-2011-03-17/drivers/xen/pcifront/Makefile 2011-01-31 17:56:27.000000000 +0100 @@ -2,6 +2,4 @@ obj-y += pcifront.o pcifront-y := pci_op.o xenbus.o pci.o -ifeq ($(CONFIG_XEN_PCIDEV_FE_DEBUG),y) -EXTRA_CFLAGS += -DDEBUG -endif +ccflags-$(CONFIG_XEN_PCIDEV_FE_DEBUG) += -DDEBUG --- head-2011-03-17.orig/drivers/xen/pcifront/pci_op.c 2011-01-31 17:32:16.000000000 +0100 +++ head-2011-03-17/drivers/xen/pcifront/pci_op.c 2011-01-31 17:56:27.000000000 +0100 @@ -67,7 +67,7 @@ static void pcifront_init_sd(struct pcif return; /* No resources, nothing to do */ if (magic != (sizeof(res) * 2) + 1) { - printk(KERN_WARNING "pcifront: resource magic mismatch\n"); + pr_warning("pcifront: resource magic mismatch\n"); return; } @@ -105,9 +105,9 @@ static void pcifront_init_sd(struct pcif err = xenbus_scanf(XBT_NIL, pdev->xdev->otherend, str, "%s", buf); if (err != 1) { - printk(KERN_WARNING "pcifront: error reading " - "resource %d on bus %04x:%02x\n", - j, domain, bus); + pr_warning("pcifront: error reading " + "resource %d on bus %04x:%02x\n", + j, domain, bus); continue; } @@ -317,7 +317,7 @@ int pci_frontend_enable_msix(struct pci_ struct pcifront_device *pdev = pcifront_get_pdev(sd); if (nvec > SH_INFO_MAX_VEC) { - printk("too much vector for pci frontend%x\n", nvec); + pr_warning("too many vectors (%#x) for pci frontend\n", nvec); return -EINVAL; } @@ -336,12 +336,12 @@ int pci_frontend_enable_msix(struct pci_ return 0; } else { - printk("enable msix get value %x\n", op.value); + pr_err("enable msix get value %#x\n", op.value); return op.value; } } else { - printk("enable msix get err %x\n", err); + pr_err("enable msix err %#x\n", err); return err; } } @@ -362,7 +362,7 @@ void pci_frontend_disable_msix(struct pc /* What should do for error ? */ if (err) - printk("pci_disable_msix get err %x\n", err); + pr_err("disable msix err %#x\n", err); } int pci_frontend_enable_msi(struct pci_dev *dev) @@ -382,8 +382,8 @@ int pci_frontend_enable_msi(struct pci_d dev->irq = op.value; } else { - printk("pci frontend enable msi failed for dev %x:%x \n", - op.bus, op.devfn); + pr_err("pci frontend enable msi failed for dev %x:%x\n", + op.bus, op.devfn); err = -EINVAL; } return err; @@ -404,14 +404,14 @@ void pci_frontend_disable_msi(struct pci err = do_pci_op(pdev, &op); if (err == XEN_PCI_ERR_dev_not_found) { /* XXX No response from backend, what shall we do? */ - printk("get no response from backend for disable MSI\n"); + pr_err("no response from backend for disable MSI\n"); return; } if (likely(!err)) dev->irq = op.value; else /* how can pciback notify us fail? */ - printk("get fake response frombackend \n"); + pr_err("got bogus response from backend\n"); } #endif /* CONFIG_PCI_MSI */ --- head-2011-03-17.orig/drivers/xen/scsiback/emulate.c 2011-02-02 12:19:11.000000000 +0100 +++ head-2011-03-17/drivers/xen/scsiback/emulate.c 2011-02-08 10:04:09.000000000 +0100 @@ -114,9 +114,10 @@ static void resp_not_supported_cmd(pendi } -static int __copy_to_sg(struct scatterlist *sg, unsigned int nr_sg, +static int __copy_to_sg(struct scatterlist *sgl, unsigned int nr_sg, void *buf, unsigned int buflen) { + struct scatterlist *sg; void *from = buf; void *to; unsigned int from_rest = buflen; @@ -125,17 +126,17 @@ static int __copy_to_sg(struct scatterli unsigned int i; unsigned long pfn; - for (i = 0; i < nr_sg; i++) { - if (sg->page == NULL) { - printk(KERN_WARNING "%s: inconsistent length field in " - "scatterlist\n", __FUNCTION__); + for_each_sg (sgl, sg, nr_sg, i) { + if (sg_page(sg) == NULL) { + pr_warning("%s: inconsistent length field in " + "scatterlist\n", __FUNCTION__); return -ENOMEM; } to_capa = sg->length; copy_size = min_t(unsigned int, to_capa, from_rest); - pfn = page_to_pfn(sg->page); + pfn = page_to_pfn(sg_page(sg)); to = pfn_to_kaddr(pfn) + (sg->offset); memcpy(to, from, copy_size); @@ -144,18 +145,17 @@ static int __copy_to_sg(struct scatterli return 0; } - sg++; from += copy_size; } - printk(KERN_WARNING "%s: no space in scatterlist\n", - __FUNCTION__); + pr_warning("%s: no space in scatterlist\n", __FUNCTION__); return -ENOMEM; } -static int __copy_from_sg(struct scatterlist *sg, unsigned int nr_sg, +static int __copy_from_sg(struct scatterlist *sgl, unsigned int nr_sg, void *buf, unsigned int buflen) { + struct scatterlist *sg; void *from; void *to = buf; unsigned int from_rest; @@ -164,29 +164,26 @@ static int __copy_from_sg(struct scatter unsigned int i; unsigned long pfn; - for (i = 0; i < nr_sg; i++) { - if (sg->page == NULL) { - printk(KERN_WARNING "%s: inconsistent length field in " - "scatterlist\n", __FUNCTION__); + for_each_sg (sgl, sg, nr_sg, i) { + if (sg_page(sg) == NULL) { + pr_warning("%s: inconsistent length field in " + "scatterlist\n", __FUNCTION__); return -ENOMEM; } from_rest = sg->length; if ((from_rest > 0) && (to_capa < from_rest)) { - printk(KERN_WARNING - "%s: no space in destination buffer\n", - __FUNCTION__); + pr_warning("%s: no space in destination buffer\n", + __FUNCTION__); return -ENOMEM; } copy_size = from_rest; - pfn = page_to_pfn(sg->page); + pfn = page_to_pfn(sg_page(sg)); from = pfn_to_kaddr(pfn) + (sg->offset); memcpy(to, from, copy_size); to_capa -= copy_size; - - sg++; to += copy_size; } @@ -247,7 +244,7 @@ static void __report_luns(pending_req_t + VSCSI_REPORT_LUNS_HEADER; retry: if ((buff = kmalloc(alloc_len, GFP_KERNEL)) == NULL) { - printk(KERN_ERR "scsiback:%s kmalloc err\n", __FUNCTION__); + pr_err("scsiback:%s kmalloc err\n", __FUNCTION__); goto fail; } --- head-2011-03-17.orig/drivers/xen/scsiback/interface.c 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/drivers/xen/scsiback/interface.c 2011-01-31 17:56:27.000000000 +0100 @@ -71,7 +71,8 @@ static int map_frontend_page( struct vsc gnttab_check_GNTST_eagain_do_while(GNTTABOP_map_grant_ref, &op); if (op.status != GNTST_okay) { - printk(KERN_ERR "scsiback: Grant table operation failure %d!\n", (int)op.status); + pr_err("scsiback: Grant table operation failure %d!\n", + (int)op.status); ret = -EINVAL; } else { info->shmem_ref = ring_ref; @@ -102,7 +103,7 @@ int scsiback_init_sring(struct vscsibk_i int err; if (info->irq) { - printk(KERN_ERR "scsiback: Already connected through?\n"); + pr_err("scsiback: Already connected through?\n"); return -1; } @@ -168,7 +169,7 @@ int __init scsiback_interface_init(void) scsiback_cachep = kmem_cache_create("vscsiif_cache", sizeof(struct vscsibk_info), 0, 0, NULL); if (!scsiback_cachep) { - printk(KERN_ERR "scsiback: can't init scsi cache\n"); + pr_err("scsiback: can't init scsi cache\n"); return -ENOMEM; } --- head-2011-03-17.orig/drivers/xen/scsiback/scsiback.c 2011-01-31 17:32:16.000000000 +0100 +++ head-2011-03-17/drivers/xen/scsiback/scsiback.c 2011-01-31 17:56:27.000000000 +0100 @@ -202,14 +202,14 @@ static void scsiback_print_status(char * { struct scsi_device *sdev = pending_req->sdev; - printk(KERN_ERR "scsiback: %d:%d:%d:%d ",sdev->host->host_no, - sdev->channel, sdev->id, sdev->lun); - printk(KERN_ERR "status = 0x%02x, message = 0x%02x, host = 0x%02x, driver = 0x%02x\n", - status_byte(errors), msg_byte(errors), - host_byte(errors), driver_byte(errors)); + pr_err("scsiback: %d:%d:%d:%d ", + sdev->host->host_no, sdev->channel, sdev->id, sdev->lun); + pr_err("status = 0x%02x, message = 0x%02x, host = 0x%02x," + " driver = 0x%02x\n", + status_byte(errors), msg_byte(errors), + host_byte(errors), driver_byte(errors)); - printk(KERN_ERR "scsiback: cmnd[0]=0x%02X\n", - pending_req->cmnd[0]); + pr_err("scsiback: cmnd[0]=0x%02X\n", pending_req->cmnd[0]); if (CHECK_CONDITION & status_byte(errors)) __scsi_print_sense("scsiback", sense_buffer, SCSI_SENSE_BUFFERSIZE); @@ -260,14 +260,18 @@ static int scsiback_gnttab_data_map(vscs write = (data_dir == DMA_TO_DEVICE); if (nr_segments) { + struct scatterlist *sg; + /* free of (sgl) in fast_flush_area()*/ pending_req->sgl = kmalloc(sizeof(struct scatterlist) * nr_segments, GFP_KERNEL); if (!pending_req->sgl) { - printk(KERN_ERR "scsiback: %s: kmalloc() error.\n", __FUNCTION__); + pr_err("scsiback: %s: kmalloc() error\n", __FUNCTION__); return -ENOMEM; } + sg_init_table(pending_req->sgl, nr_segments); + for (i = 0; i < nr_segments; i++) { flags = GNTMAP_host_map; if (write) @@ -280,14 +284,14 @@ static int scsiback_gnttab_data_map(vscs err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nr_segments); BUG_ON(err); - for (i = 0; i < nr_segments; i++) { + for_each_sg (pending_req->sgl, sg, nr_segments, i) { struct page *pg; /* Retry maps with GNTST_eagain */ if (unlikely(map[i].status == GNTST_eagain)) gnttab_check_GNTST_eagain_while(GNTTABOP_map_grant_ref, &map[i]); if (unlikely(map[i].status != GNTST_okay)) { - printk(KERN_ERR "scsiback: invalid buffer -- could not remap it\n"); + pr_err("scsiback: invalid buffer -- could not remap it\n"); map[i].handle = SCSIBACK_INVALID_HANDLE; err |= 1; } @@ -302,15 +306,14 @@ static int scsiback_gnttab_data_map(vscs set_phys_to_machine(page_to_pfn(pg), FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT)); - pending_req->sgl[i].page = pg; - pending_req->sgl[i].offset = ring_req->seg[i].offset; - pending_req->sgl[i].length = ring_req->seg[i].length; - data_len += pending_req->sgl[i].length; + sg_set_page(sg, pg, ring_req->seg[i].length, + ring_req->seg[i].offset); + data_len += sg->length; barrier(); - if (pending_req->sgl[i].offset >= PAGE_SIZE || - pending_req->sgl[i].length > PAGE_SIZE || - pending_req->sgl[i].offset + pending_req->sgl[i].length > PAGE_SIZE) + if (sg->offset >= PAGE_SIZE || + sg->length > PAGE_SIZE || + sg->offset + sg->length > PAGE_SIZE) err |= 1; } @@ -339,27 +342,14 @@ static int scsiback_merge_bio(struct req blk_queue_bounce(q, &bio); - if (!rq->bio) - blk_rq_bio_prep(q, rq, bio); - else if (!ll_back_merge_fn(q, rq, bio)) - return -EINVAL; - else { - rq->biotail->bi_next = bio; - rq->biotail = bio; - } - - return 0; + return blk_rq_append_bio(q, rq, bio); } /* quoted scsi_lib.c/scsi_bi_endio */ -static int scsiback_bi_endio(struct bio *bio, unsigned int bytes_done, int error) +static void scsiback_bi_endio(struct bio *bio, int error) { - if (bio->bi_size) - return 1; - bio_put(bio); - return 0; } @@ -370,16 +360,16 @@ static int request_map_sg(struct request struct request_queue *q = rq->q; int nr_pages; unsigned int nsegs = count; - unsigned int data_len = 0, len, bytes, off; + struct scatterlist *sg; struct page *page; struct bio *bio = NULL; int i, err, nr_vecs = 0; - for (i = 0; i < nsegs; i++) { - page = pending_req->sgl[i].page; - off = (unsigned int)pending_req->sgl[i].offset; - len = (unsigned int)pending_req->sgl[i].length; + for_each_sg (pending_req->sgl, sg, nsegs, i) { + page = sg_page(sg); + off = sg->offset; + len = sg->length; data_len += len; nr_pages = (len + off + PAGE_SIZE - 1) >> PAGE_SHIFT; @@ -407,7 +397,7 @@ static int request_map_sg(struct request if (bio->bi_vcnt >= nr_vecs) { err = scsiback_merge_bio(rq, bio); if (err) { - bio_endio(bio, bio->bi_size, 0); + bio_endio(bio, 0); goto free_bios; } bio = NULL; @@ -430,7 +420,7 @@ free_bios: /* * call endio instead of bio_put incase it was bounced */ - bio_endio(bio, bio->bi_size, 0); + bio_endio(bio, 0); } return err; @@ -473,7 +463,7 @@ void scsiback_cmd_exec(pending_req_t *pe if (nr_segments) { if (request_map_sg(rq, pending_req, nr_segments)) { - printk(KERN_ERR "scsiback: SG Request Map Error\n"); + pr_err("scsiback: SG Request Map Error\n"); return; } } @@ -632,7 +622,7 @@ static int scsiback_do_cmd_fn(struct vsc } else if (pending_req->act == VSCSIIF_ACT_SCSI_RESET) { scsiback_device_reset_exec(pending_req); } else { - printk(KERN_ERR "scsiback: invalid parameter for request\n"); + pr_err("scsiback: invalid parameter for request\n"); scsiback_do_resp_with_sense(NULL, (DRIVER_ERROR << 24), 0, pending_req); continue; @@ -719,7 +709,7 @@ out_of_memory: kfree(pending_reqs); kfree(pending_grant_handles); free_empty_pages_and_pagevec(pending_pages, mmap_pages); - printk(KERN_ERR "scsiback: %s: out of memory\n", __FUNCTION__); + pr_err("scsiback: %s: out of memory\n", __FUNCTION__); return -ENOMEM; } --- head-2011-03-17.orig/drivers/xen/scsiback/translate.c 2008-07-21 11:00:33.000000000 +0200 +++ head-2011-03-17/drivers/xen/scsiback/translate.c 2011-01-31 17:56:27.000000000 +0100 @@ -62,8 +62,8 @@ int scsiback_add_translation_entry(struc if ((entry->v.chn == v->chn) && (entry->v.tgt == v->tgt) && (entry->v.lun == v->lun)) { - printk(KERN_WARNING "scsiback: Virtual ID is already used. " - "Assignment was not performed.\n"); + pr_warning("scsiback: Virtual ID is already used. " + "Assignment was not performed.\n"); err = -EEXIST; goto out; } @@ -72,7 +72,7 @@ int scsiback_add_translation_entry(struc /* Create a new translation entry and add to the list */ if ((new = kmalloc(sizeof(struct v2p_entry), GFP_ATOMIC)) == NULL) { - printk(KERN_ERR "scsiback: %s: kmalloc() error.\n", __FUNCTION__); + pr_err("scsiback: %s: kmalloc() error\n", __FUNCTION__); err = -ENOMEM; goto out; } --- head-2011-03-17.orig/drivers/xen/scsiback/xenbus.c 2011-01-31 17:32:22.000000000 +0100 +++ head-2011-03-17/drivers/xen/scsiback/xenbus.c 2011-01-31 17:56:27.000000000 +0100 @@ -102,14 +102,13 @@ struct scsi_device *scsiback_get_scsi_de shost = scsi_host_lookup(phy->hst); if (IS_ERR(shost)) { - printk(KERN_ERR "scsiback: host%d doesn't exist.\n", - phy->hst); + pr_err("scsiback: host%d doesn't exist\n", phy->hst); return NULL; } sdev = scsi_device_lookup(shost, phy->chn, phy->tgt, phy->lun); if (!sdev) { - printk(KERN_ERR "scsiback: %d:%d:%d:%d doesn't exist.\n", - phy->hst, phy->chn, phy->tgt, phy->lun); + pr_err("scsiback: %d:%d:%d:%d doesn't exist\n", + phy->hst, phy->chn, phy->tgt, phy->lun); scsi_host_put(shost); return NULL; } @@ -178,7 +177,8 @@ static void scsiback_do_lun_hotplug(stru if (!err) { if (xenbus_printf(XBT_NIL, dev->nodename, state_str, "%d", XenbusStateInitialised)) { - printk(KERN_ERR "scsiback: xenbus_printf error %s\n", state_str); + pr_err("scsiback: xenbus_printf error %s\n", + state_str); scsiback_del_translation_entry(be->info, &vir); } } else { @@ -193,7 +193,8 @@ static void scsiback_do_lun_hotplug(stru if (!scsiback_del_translation_entry(be->info, &vir)) { if (xenbus_printf(XBT_NIL, dev->nodename, state_str, "%d", XenbusStateClosed)) - printk(KERN_ERR "scsiback: xenbus_printf error %s\n", state_str); + pr_err("scsiback: xenbus_printf error %s\n", + state_str); } } break; @@ -203,7 +204,8 @@ static void scsiback_do_lun_hotplug(stru /* modify vscsi-devs/dev-x/state */ if (xenbus_printf(XBT_NIL, dev->nodename, state_str, "%d", XenbusStateConnected)) { - printk(KERN_ERR "scsiback: xenbus_printf error %s\n", state_str); + pr_err("scsiback: xenbus_printf error %s\n", + state_str); scsiback_del_translation_entry(be->info, &vir); xenbus_printf(XBT_NIL, dev->nodename, state_str, "%d", XenbusStateClosed); @@ -346,7 +348,7 @@ static int scsiback_probe(struct xenbus_ fail: - printk(KERN_WARNING "scsiback: %s failed\n",__FUNCTION__); + pr_warning("scsiback: %s failed\n",__FUNCTION__); scsiback_remove(dev); return err; --- head-2011-03-17.orig/drivers/xen/scsifront/scsifront.c 2011-02-08 10:03:55.000000000 +0100 +++ head-2011-03-17/drivers/xen/scsifront/scsifront.c 2011-01-31 17:56:27.000000000 +0100 @@ -118,8 +118,8 @@ static void scsifront_gnttab_done(struct for (i = 0; i < s->nr_segments; i++) { if (unlikely(gnttab_query_foreign_access( s->gref[i]) != 0)) { - printk(KERN_ALERT "scsifront: " - "grant still in use by backend.\n"); + pr_alert("scsifront: " + "grant still in use by backend\n"); BUG(); } gnttab_end_foreign_access(s->gref[i], 0UL); @@ -246,42 +246,47 @@ static int map_data_for_request(struct v { grant_ref_t gref_head; struct page *page; - int err, i, ref, ref_cnt = 0; + int err, ref, ref_cnt = 0; int write = (sc->sc_data_direction == DMA_TO_DEVICE); - int nr_pages, off, len, bytes; + unsigned int i, nr_pages, off, len, bytes; unsigned long buffer_pfn; - unsigned int data_len = 0; if (sc->sc_data_direction == DMA_NONE) return 0; err = gnttab_alloc_grant_references(VSCSIIF_SG_TABLESIZE, &gref_head); if (err) { - printk(KERN_ERR "scsifront: gnttab_alloc_grant_references() error\n"); + pr_err("scsifront: gnttab_alloc_grant_references() error\n"); return -ENOMEM; } if (sc->use_sg) { /* quoted scsi_lib.c/scsi_req_map_sg . */ - struct scatterlist *sg = (struct scatterlist *)sc->request_buffer; - nr_pages = (sc->request_bufflen + sg[0].offset + PAGE_SIZE - 1) >> PAGE_SHIFT; + struct scatterlist *sg, *sgl = (struct scatterlist *)sc->request_buffer; + unsigned int data_len = sc->request_bufflen; + nr_pages = (sc->request_bufflen + sgl->offset + PAGE_SIZE - 1) >> PAGE_SHIFT; if (nr_pages > VSCSIIF_SG_TABLESIZE) { - printk(KERN_ERR "scsifront: Unable to map request_buffer for command!\n"); + pr_err("scsifront: Unable to map request_buffer for command!\n"); ref_cnt = (-E2BIG); goto big_to_sg; } - for (i = 0; i < sc->use_sg; i++) { - page = sg[i].page; - off = sg[i].offset; - len = sg[i].length; - data_len += len; + for_each_sg (sgl, sg, sc->use_sg, i) { + page = sg_page(sg); + off = sg->offset; + len = sg->length; buffer_pfn = page_to_phys(page) >> PAGE_SHIFT; - while (len > 0) { + while (len > 0 && data_len > 0) { + /* + * sg sends a scatterlist that is larger than + * the data_len it wants transferred for certain + * IO sizes + */ bytes = min_t(unsigned int, len, PAGE_SIZE - off); + bytes = min(bytes, data_len); ref = gnttab_claim_grant_reference(&gref_head); BUG_ON(ref == -ENOSPC); @@ -296,6 +301,7 @@ static int map_data_for_request(struct v buffer_pfn++; len -= bytes; + data_len -= bytes; off = 0; ref_cnt++; } --- head-2011-03-17.orig/drivers/xen/scsifront/xenbus.c 2011-02-08 10:03:46.000000000 +0100 +++ head-2011-03-17/drivers/xen/scsifront/xenbus.c 2011-02-08 10:04:06.000000000 +0100 @@ -215,7 +215,7 @@ static int scsifront_probe(struct xenbus if (IS_ERR(info->kthread)) { err = PTR_ERR(info->kthread); info->kthread = NULL; - printk(KERN_ERR "scsifront: kthread start err %d\n", err); + pr_err("scsifront: kthread start err %d\n", err); goto free_sring; } @@ -227,7 +227,7 @@ static int scsifront_probe(struct xenbus err = scsi_add_host(host, &dev->dev); if (err) { - printk(KERN_ERR "scsifront: fail to add scsi host %d\n", err); + pr_err("scsifront: fail to add scsi host %d\n", err); goto free_sring; } @@ -318,7 +318,7 @@ static void scsifront_do_lun_hotplug(str if (device_state == XenbusStateInitialised) { sdev = scsi_device_lookup(info->host, chn, tgt, lun); if (sdev) { - printk(KERN_ERR "scsifront: Device already in use.\n"); + pr_err("scsifront: Device already in use.\n"); scsi_device_put(sdev); xenbus_printf(XBT_NIL, dev->nodename, state_str, "%d", XenbusStateClosed); --- head-2011-03-17.orig/drivers/xen/sfc_netback/accel_fwd.c 2011-01-31 17:32:29.000000000 +0100 +++ head-2011-03-17/drivers/xen/sfc_netback/accel_fwd.c 2011-01-31 17:56:27.000000000 +0100 @@ -181,10 +181,11 @@ int netback_accel_fwd_add(const __u8 *ma unsigned long flags; cuckoo_hash_mac_key key = cuckoo_mac_to_key(mac); struct port_fwd *fwd_set = (struct port_fwd *)fwd_priv; + DECLARE_MAC_BUF(buf); BUG_ON(fwd_priv == NULL); - DPRINTK("Adding mac " MAC_FMT "\n", MAC_ARG(mac)); + DPRINTK("Adding mac %s\n", print_mac(buf, mac)); spin_lock_irqsave(&fwd_set->fwd_lock, flags); @@ -199,8 +200,8 @@ int netback_accel_fwd_add(const __u8 *ma if (cuckoo_hash_lookup(&fwd_set->fwd_hash_table, (cuckoo_hash_key *)(&key), &rc) != 0) { spin_unlock_irqrestore(&fwd_set->fwd_lock, flags); - EPRINTK("MAC address " MAC_FMT " already accelerated.\n", - MAC_ARG(mac)); + EPRINTK("MAC address %s already accelerated.\n", + print_mac(buf, mac)); return -EEXIST; } @@ -235,8 +236,9 @@ void netback_accel_fwd_remove(const __u8 unsigned long flags; cuckoo_hash_mac_key key = cuckoo_mac_to_key(mac); struct port_fwd *fwd_set = (struct port_fwd *)fwd_priv; + DECLARE_MAC_BUF(buf); - DPRINTK("Removing mac " MAC_FMT "\n", MAC_ARG(mac)); + DPRINTK("Removing mac %s\n", print_mac(buf, mac)); BUG_ON(fwd_priv == NULL); @@ -394,14 +396,16 @@ void netback_accel_tx_packet(struct sk_b if (is_broadcast_ether_addr(skb_mac_header(skb)) && packet_is_arp_reply(skb)) { + DECLARE_MAC_BUF(buf); + /* * update our fast path forwarding to reflect this * gratuitous ARP */ mac = skb_mac_header(skb)+ETH_ALEN; - DPRINTK("%s: found gratuitous ARP for " MAC_FMT "\n", - __FUNCTION__, MAC_ARG(mac)); + DPRINTK("%s: found gratuitous ARP for %s\n", + __FUNCTION__, print_mac(buf, mac)); spin_lock_irqsave(&fwd_set->fwd_lock, flags); /* --- head-2011-03-17.orig/drivers/xen/sfc_netback/accel_msg.c 2008-02-20 09:32:49.000000000 +0100 +++ head-2011-03-17/drivers/xen/sfc_netback/accel_msg.c 2011-01-31 17:56:27.000000000 +0100 @@ -57,11 +57,11 @@ static void netback_accel_msg_tx_localma { unsigned long lock_state; struct net_accel_msg *msg; + DECLARE_MAC_BUF(buf); BUG_ON(bend == NULL || mac == NULL); - VPRINTK("Sending local mac message: " MAC_FMT "\n", - MAC_ARG((const char *)mac)); + VPRINTK("Sending local mac message: %s\n", print_mac(buf, mac)); msg = net_accel_msg_start_send(bend->shared_page, &bend->to_domU, &lock_state); --- head-2011-03-17.orig/drivers/xen/sfc_netfront/accel_msg.c 2011-01-31 17:29:16.000000000 +0100 +++ head-2011-03-17/drivers/xen/sfc_netfront/accel_msg.c 2011-01-31 17:56:27.000000000 +0100 @@ -41,11 +41,13 @@ static void vnic_start_interrupts(netfro /* Prime our interrupt */ spin_lock_irqsave(&vnic->irq_enabled_lock, flags); if (!netfront_accel_vi_enable_interrupts(vnic)) { + struct netfront_info *np = netdev_priv(vnic->net_dev); + /* Cripes, that was quick, better pass it up */ netfront_accel_disable_net_interrupts(vnic); vnic->irq_enabled = 0; NETFRONT_ACCEL_STATS_OP(vnic->stats.poll_schedule_count++); - netif_rx_schedule(vnic->net_dev); + netif_rx_schedule(vnic->net_dev, &np->napi); } else { /* * Nothing yet, make sure we get interrupts through @@ -72,6 +74,7 @@ static void vnic_stop_interrupts(netfron static void vnic_start_fastpath(netfront_accel_vnic *vnic) { struct net_device *net_dev = vnic->net_dev; + struct netfront_info *np = netdev_priv(net_dev); unsigned long flags; DPRINTK("%s\n", __FUNCTION__); @@ -80,9 +83,9 @@ static void vnic_start_fastpath(netfront vnic->tx_enabled = 1; spin_unlock_irqrestore(&vnic->tx_lock, flags); - netif_poll_disable(net_dev); + napi_disable(&np->napi); vnic->poll_enabled = 1; - netif_poll_enable(net_dev); + napi_enable(&np->napi); vnic_start_interrupts(vnic); } @@ -114,11 +117,11 @@ void vnic_stop_fastpath(netfront_accel_v spin_unlock_irqrestore(&vnic->tx_lock, flags1); /* Must prevent polls and hold lock to modify poll_enabled */ - netif_poll_disable(net_dev); + napi_disable(&np->napi); spin_lock_irqsave(&vnic->irq_enabled_lock, flags1); vnic->poll_enabled = 0; spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags1); - netif_poll_enable(net_dev); + napi_enable(&np->napi); } @@ -324,8 +327,10 @@ static int vnic_process_localmac_msg(net cuckoo_hash_mac_key key; if (msg->u.localmac.flags & NET_ACCEL_MSG_ADD) { - DPRINTK("MAC has moved, could be local: " MAC_FMT "\n", - MAC_ARG(msg->u.localmac.mac)); + DECLARE_MAC_BUF(buf); + + DPRINTK("MAC has moved, could be local: %s\n", + print_mac(buf, msg->u.localmac.mac)); key = cuckoo_mac_to_key(msg->u.localmac.mac); spin_lock_irqsave(&vnic->table_lock, flags); /* Try to remove it, not a big deal if not there */ @@ -513,6 +518,8 @@ irqreturn_t netfront_accel_net_channel_i spin_lock_irqsave(&vnic->irq_enabled_lock, flags); if (vnic->irq_enabled) { + struct netfront_info *np = netdev_priv(net_dev); + netfront_accel_disable_net_interrupts(vnic); vnic->irq_enabled = 0; spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags); @@ -525,7 +532,7 @@ irqreturn_t netfront_accel_net_channel_i vnic->stats.event_count_since_irq; vnic->stats.event_count_since_irq = 0; #endif - netif_rx_schedule(net_dev); + netif_rx_schedule(net_dev, &np->napi); } else { spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags); --- head-2011-03-17.orig/drivers/xen/sfc_netfront/accel_vi.c 2011-01-31 17:32:29.000000000 +0100 +++ head-2011-03-17/drivers/xen/sfc_netfront/accel_vi.c 2011-01-31 17:56:27.000000000 +0100 @@ -643,8 +643,10 @@ netfront_accel_vi_tx_post(netfront_accel (cuckoo_hash_key *)(&key), &value); if (!try_fastpath) { - VPRINTK("try fast path false for mac: " MAC_FMT "\n", - MAC_ARG(skb->data)); + DECLARE_MAC_BUF(buf); + + VPRINTK("try fast path false for mac: %s\n", + print_mac(buf, skb->data)); return NETFRONT_ACCEL_STATUS_CANT; } @@ -770,9 +772,10 @@ static void netfront_accel_vi_rx_comple if (compare_ether_addr(skb->data, vnic->mac)) { struct iphdr *ip = (struct iphdr *)(skb->data + ETH_HLEN); u16 port; + DECLARE_MAC_BUF(buf); - DPRINTK("%s: saw wrong MAC address " MAC_FMT "\n", - __FUNCTION__, MAC_ARG(skb->data)); + DPRINTK("%s: saw wrong MAC address %s\n", + __FUNCTION__, print_mac(buf, skb->data)); if (ip->protocol == IPPROTO_TCP) { struct tcphdr *tcp = (struct tcphdr *) --- head-2011-03-17.orig/drivers/xen/sfc_netutil/accel_msg_iface.c 2008-02-20 09:32:49.000000000 +0100 +++ head-2011-03-17/drivers/xen/sfc_netutil/accel_msg_iface.c 2011-01-31 17:56:27.000000000 +0100 @@ -36,7 +36,7 @@ #else #define NET_ACCEL_CHECK_MAGIC(_p, _errval) \ if (_p->magic != NET_ACCEL_MSG_MAGIC) { \ - printk(KERN_ERR "%s: passed invalid shared page %p!\n", \ + pr_err("%s: passed invalid shared page %p!\n", \ __FUNCTION__, _p); \ return _errval; \ } --- head-2011-03-17.orig/drivers/xen/sfc_netutil/accel_util.h 2008-02-20 09:32:49.000000000 +0100 +++ head-2011-03-17/drivers/xen/sfc_netutil/accel_util.h 2011-01-31 17:56:27.000000000 +0100 @@ -63,9 +63,6 @@ DPRINTK("%s at %s:%d\n", #exp, __FILE__, __LINE__); \ } while(0) -#define MAC_FMT "%.2x:%.2x:%.2x:%.2x:%.2x:%.2x" -#define MAC_ARG(_mac) (_mac)[0], (_mac)[1], (_mac)[2], (_mac)[3], (_mac)[4], (_mac)[5] - #include /*! Map a set of pages from another domain --- head-2011-03-17.orig/drivers/xen/tpmback/interface.c 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/drivers/xen/tpmback/interface.c 2011-01-31 17:56:27.000000000 +0100 @@ -48,7 +48,7 @@ static tpmif_t *alloc_tpmif(domid_t domi out_of_memory: if (tpmif != NULL) kmem_cache_free(tpmif_cachep, tpmif); - printk("%s: out of memory\n", __FUNCTION__); + pr_err("%s: out of memory\n", __FUNCTION__); return ERR_PTR(-ENOMEM); } --- head-2011-03-17.orig/drivers/xen/tpmback/tpmback.c 2011-01-31 17:32:22.000000000 +0100 +++ head-2011-03-17/drivers/xen/tpmback/tpmback.c 2011-01-31 17:56:27.000000000 +0100 @@ -908,8 +908,7 @@ static int __init tpmback_init(void) int rc; if ((rc = misc_register(&vtpms_miscdevice)) != 0) { - printk(KERN_ALERT - "Could not register misc device for TPM BE.\n"); + pr_alert("Could not register misc device for TPM BE\n"); return rc; } @@ -929,7 +928,7 @@ static int __init tpmback_init(void) return rc; } - printk(KERN_ALERT "Successfully initialized TPM backend driver.\n"); + pr_alert("Successfully initialized TPM backend driver\n"); return 0; } --- head-2011-03-17.orig/drivers/xen/usbback/interface.c 2010-09-23 15:39:04.000000000 +0200 +++ head-2011-03-17/drivers/xen/usbback/interface.c 2011-01-31 17:56:27.000000000 +0100 @@ -113,7 +113,8 @@ static int map_frontend_pages(usbif_t *u gnttab_check_GNTST_eagain_do_while(GNTTABOP_map_grant_ref, &op); if (op.status != GNTST_okay) { - printk(KERN_ERR "grant table failure mapping urb_ring_ref %d\n", (int)op.status); + pr_err("grant table failure mapping urb_ring_ref %d\n", + (int)op.status); return -EINVAL; } @@ -132,7 +133,8 @@ static int map_frontend_pages(usbif_t *u GNTMAP_host_map, usbif->urb_shmem_handle); VOID(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &unop, 1)); - printk(KERN_ERR "grant table failure mapping conn_ring_ref %d\n", (int)op.status); + pr_err("grant table failure mapping conn_ring_ref %d\n", + (int)op.status); return -EINVAL; } --- head-2011-03-17.orig/drivers/xen/usbback/usbback.c 2011-01-31 17:32:16.000000000 +0100 +++ head-2011-03-17/drivers/xen/usbback/usbback.c 2011-01-31 17:56:27.000000000 +0100 @@ -86,6 +86,8 @@ typedef struct { static pending_req_t *pending_reqs; static struct list_head pending_free; static DEFINE_SPINLOCK(pending_free_lock); +static LIST_HEAD(pending_urb_free); +static DEFINE_SPINLOCK(urb_free_lock); static DECLARE_WAIT_QUEUE_HEAD(pending_free_wq); #define USBBACK_INVALID_HANDLE (~0) @@ -231,7 +233,7 @@ static int usbbk_alloc_urb(usbif_urb_req else pending_req->urb = usb_alloc_urb(0, GFP_KERNEL); if (!pending_req->urb) { - printk(KERN_ERR "usbback: can't alloc urb\n"); + pr_err("usbback: can't alloc urb\n"); ret = -ENOMEM; goto fail; } @@ -241,7 +243,7 @@ static int usbbk_alloc_urb(usbif_urb_req req->buffer_length, GFP_KERNEL, &pending_req->transfer_dma); if (!pending_req->buffer) { - printk(KERN_ERR "usbback: can't alloc urb buffer\n"); + pr_err("usbback: can't alloc urb buffer\n"); ret = -ENOMEM; goto fail_free_urb; } @@ -252,7 +254,7 @@ static int usbbk_alloc_urb(usbif_urb_req sizeof(struct usb_ctrlrequest), GFP_KERNEL, &pending_req->setup_dma); if (!pending_req->setup) { - printk(KERN_ERR "usbback: can't alloc usb_ctrlrequest\n"); + pr_err("usbback: can't alloc usb_ctrlrequest\n"); ret = -ENOMEM; goto fail_free_buffer; } @@ -272,6 +274,15 @@ fail: static void usbbk_free_urb(struct urb *urb) { + unsigned long flags; + + spin_lock_irqsave(&urb_free_lock, flags); + list_add(&urb->urb_list, &pending_urb_free); + spin_unlock_irqrestore(&urb_free_lock, flags); +} + +static void _usbbk_free_urb(struct urb *urb) +{ if (usb_pipecontrol(urb->pipe)) usb_buffer_free(urb->dev, sizeof(struct usb_ctrlrequest), urb->setup_packet, urb->setup_dma); @@ -282,6 +293,29 @@ static void usbbk_free_urb(struct urb *u usb_free_urb(urb); } +static void usbbk_free_urbs(void) +{ + unsigned long flags; + struct list_head tmp_list; + + if (list_empty(&pending_urb_free)) + return; + + INIT_LIST_HEAD(&tmp_list); + + spin_lock_irqsave(&urb_free_lock, flags); + list_splice_init(&pending_urb_free, &tmp_list); + spin_unlock_irqrestore(&urb_free_lock, flags); + + while (!list_empty(&tmp_list)) { + struct urb *next_urb = list_first_entry(&tmp_list, struct urb, + urb_list); + + list_del(&next_urb->urb_list); + _usbbk_free_urb(next_urb); + } +} + static void usbbk_notify_work(usbif_t *usbif) { usbif->waiting_reqs = 1; @@ -356,7 +390,7 @@ static int usbbk_gnttab_map(usbif_t *usb nr_segs = pending_req->nr_buffer_segs + pending_req->nr_extra_segs; if (nr_segs > USBIF_MAX_SEGMENTS_PER_REQUEST) { - printk(KERN_ERR "Bad number of segments in request\n"); + pr_err("Bad number of segments in request\n"); ret = -EINVAL; goto fail; } @@ -399,7 +433,7 @@ static int usbbk_gnttab_map(usbif_t *usb gnttab_check_GNTST_eagain_while(GNTTABOP_map_grant_ref, &map[i]); if (unlikely(map[i].status != GNTST_okay)) { - printk(KERN_ERR "usbback: invalid buffer -- could not remap it\n"); + pr_err("usbback: invalid buffer -- could not remap it\n"); map[i].handle = USBBACK_INVALID_HANDLE; ret |= 1; } @@ -927,7 +961,7 @@ static void dispatch_request_to_pending_ ret = usbbk_gnttab_map(usbif, req, pending_req); if (ret) { - printk(KERN_ERR "usbback: invalid buffer\n"); + pr_err("usbback: invalid buffer\n"); ret = -ESHUTDOWN; goto fail_free_urb; } @@ -950,7 +984,7 @@ static void dispatch_request_to_pending_ ret = usb_submit_urb(pending_req->urb, GFP_KERNEL); if (ret) { - printk(KERN_ERR "usbback: failed submitting urb, error %d\n", ret); + pr_err("usbback: failed submitting urb, error %d\n", ret); ret = -ESHUTDOWN; goto fail_flush_area; } @@ -982,7 +1016,7 @@ static int usbbk_start_submit_urb(usbif_ while (rc != rp) { if (RING_REQUEST_CONS_OVERFLOW(urb_ring, rc)) { - printk(KERN_WARNING "RING_REQUEST_CONS_OVERFLOW\n"); + pr_warning("RING_REQUEST_CONS_OVERFLOW\n"); break; } @@ -1053,8 +1087,11 @@ int usbbk_schedule(void *arg) if (usbbk_start_submit_urb(usbif)) usbif->waiting_reqs = 1; + + usbbk_free_urbs(); } + usbbk_free_urbs(); usbif->xenusbd = NULL; usbif_put(usbif); --- head-2011-03-17.orig/drivers/xen/usbback/usbstub.c 2011-03-11 10:54:35.000000000 +0100 +++ head-2011-03-17/drivers/xen/usbback/usbstub.c 2011-03-11 10:55:46.000000000 +0100 @@ -150,7 +150,7 @@ static struct usbstub *usbstub_alloc(str stub = kzalloc(sizeof(*stub), GFP_KERNEL); if (!stub) { - printk(KERN_ERR "no memory for alloc usbstub\n"); + pr_err("no memory for usbstub\n"); return NULL; } kref_init(&stub->kref); @@ -303,7 +303,7 @@ int __init usbstub_init(void) err = usb_register(&usbback_usb_driver); if (err < 0) { - printk(KERN_ERR "usbback: usb_register failed (error %d)\n", err); + pr_err("usbback: usb_register failed (%d)\n", err); goto out; } --- head-2011-03-17.orig/drivers/xen/usbback/xenbus.c 2011-01-31 17:32:22.000000000 +0100 +++ head-2011-03-17/drivers/xen/usbback/xenbus.c 2011-01-31 17:56:27.000000000 +0100 @@ -242,8 +242,9 @@ static int connect_rings(usbif_t *usbif) return err; } - printk("usbback: urb-ring-ref %ld, conn-ring-ref %ld, event-channel %d\n", - urb_ring_ref, conn_ring_ref, evtchn); + pr_info("usbback: urb-ring-ref %ld, conn-ring-ref %ld," + " event-channel %d\n", + urb_ring_ref, conn_ring_ref, evtchn); err = usbif_map(usbif, urb_ring_ref, conn_ring_ref, evtchn); if (err) { @@ -270,8 +271,8 @@ static void frontend_changed(struct xenb case XenbusStateInitialising: if (dev->state == XenbusStateClosed) { - printk("%s: %s: prepare for reconnect\n", - __FUNCTION__, dev->nodename); + pr_info("%s: %s: prepare for reconnect\n", + __FUNCTION__, dev->nodename); xenbus_switch_state(dev, XenbusStateInitWait); } break; --- head-2011-03-17.orig/drivers/xen/usbfront/usbfront-dbg.c 2011-01-31 17:29:16.000000000 +0100 +++ head-2011-03-17/drivers/xen/usbfront/usbfront-dbg.c 2011-01-31 17:56:27.000000000 +0100 @@ -91,8 +91,8 @@ static inline void create_debug_file(str { struct class_device *cldev = info_to_hcd(info)->self.class_dev; if (class_device_create_file(cldev, &class_device_attr_statistics)) - printk(KERN_WARNING "statistics file not created for %s\n", - info_to_hcd(info)->self.bus_name); + pr_warning("statistics file not created for %s\n", + info_to_hcd(info)->self.bus_name); } static inline void remove_debug_file(struct usbfront_info *info) --- head-2011-03-17.orig/drivers/xen/usbfront/usbfront-hcd.c 2009-10-15 11:45:41.000000000 +0200 +++ head-2011-03-17/drivers/xen/usbfront/usbfront-hcd.c 2011-01-31 17:56:27.000000000 +0100 @@ -114,7 +114,6 @@ static void xenhcd_stop(struct usb_hcd * * non-error returns are promise to giveback the urb later */ static int xenhcd_urb_enqueue(struct usb_hcd *hcd, - struct usb_host_endpoint *ep, struct urb *urb, gfp_t mem_flags) { @@ -130,6 +129,7 @@ static int xenhcd_urb_enqueue(struct usb ret = -ENOMEM; goto done; } + urbp->status = 1; ret = xenhcd_submit_urb(info, urbp); if (ret != 0) @@ -144,7 +144,7 @@ done: * called as .urb_dequeue() */ static int xenhcd_urb_dequeue(struct usb_hcd *hcd, - struct urb *urb) + struct urb *urb, int status) { struct usbfront_info *info = hcd_to_info(hcd); struct urb_priv *urbp; @@ -157,6 +157,7 @@ static int xenhcd_urb_dequeue(struct usb if (!urbp) goto done; + urbp->status = status; ret = xenhcd_unlink_urb(info, urbp); done: --- head-2011-03-17.orig/drivers/xen/usbfront/usbfront-q.c 2011-01-31 17:29:16.000000000 +0100 +++ head-2011-03-17/drivers/xen/usbfront/usbfront-q.c 2011-01-31 17:56:27.000000000 +0100 @@ -151,7 +151,7 @@ static int map_urb_for_request(struct us ret = gnttab_alloc_grant_references(USBIF_MAX_SEGMENTS_PER_REQUEST, &gref_head); if (ret) { - printk(KERN_ERR "usbfront: gnttab_alloc_grant_references() error\n"); + pr_err("usbfront: gnttab_alloc_grant_references() error\n"); return -ENOMEM; } @@ -236,7 +236,8 @@ __acquires(info->lock) COUNT(info->stats.complete); } spin_unlock(&info->lock); - usb_hcd_giveback_urb(info_to_hcd(info), urb); + usb_hcd_giveback_urb(info_to_hcd(info), urb, + urbp->status <= 0 ? urbp->status : urb->status); spin_lock(&info->lock); } --- head-2011-03-17.orig/drivers/xen/usbfront/usbfront.h 2011-01-31 17:29:16.000000000 +0100 +++ head-2011-03-17/drivers/xen/usbfront/usbfront.h 2011-01-31 17:56:27.000000000 +0100 @@ -82,6 +82,7 @@ struct urb_priv { struct urb *urb; int req_id; /* RING_REQUEST id for submitting */ int unlink_req_id; /* RING_REQUEST id for unlinking */ + int status; unsigned unlinked:1; /* dequeued marker */ }; --- head-2011-03-17.orig/drivers/xen/usbfront/xenbus.c 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/drivers/xen/usbfront/xenbus.c 2011-01-31 17:56:27.000000000 +0100 @@ -395,7 +395,7 @@ static int __init usbfront_init(void) xenhcd_urbp_cachep = kmem_cache_create("xenhcd_urb_priv", sizeof(struct urb_priv), 0, 0, NULL); if (!xenhcd_urbp_cachep) { - printk(KERN_ERR "usbfront failed to create kmem cache\n"); + pr_err("usbfront failed to create kmem cache\n"); return -ENOMEM; } --- head-2011-03-17.orig/drivers/xen/util.c 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/drivers/xen/util.c 2011-01-31 17:56:27.000000000 +0100 @@ -11,7 +11,7 @@ struct class *get_xen_class(void) xen_class = class_create(THIS_MODULE, "xen"); if (IS_ERR(xen_class)) { - printk("Failed to create xen sysfs class.\n"); + pr_err("failed to create xen sysfs class\n"); xen_class = NULL; } --- head-2011-03-17.orig/drivers/xen/xenbus/xenbus_comms.c 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/drivers/xen/xenbus/xenbus_comms.c 2011-01-31 17:56:27.000000000 +0100 @@ -238,13 +238,13 @@ int xb_init_comms(void) int err; if (intf->req_prod != intf->req_cons) - printk(KERN_ERR "XENBUS request ring is not quiescent " + pr_err("XENBUS request ring is not quiescent " "(%08x:%08x)!\n", intf->req_cons, intf->req_prod); if (intf->rsp_prod != intf->rsp_cons) { - printk(KERN_WARNING "XENBUS response ring is not quiescent " - "(%08x:%08x): fixing up\n", - intf->rsp_cons, intf->rsp_prod); + pr_warning("XENBUS response ring is not quiescent" + " (%08x:%08x): fixing up\n", + intf->rsp_cons, intf->rsp_prod); intf->rsp_cons = intf->rsp_prod; } @@ -259,7 +259,7 @@ int xb_init_comms(void) xen_store_evtchn, wake_waiting, 0, "xenbus", &xb_waitq); if (err <= 0) { - printk(KERN_ERR "XENBUS request irq failed %i\n", err); + pr_err("XENBUS request irq failed %i\n", err); return err; } --- head-2011-03-17.orig/drivers/xen/xenbus/xenbus_probe.c 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/drivers/xen/xenbus/xenbus_probe.c 2011-01-31 17:56:27.000000000 +0100 @@ -112,13 +112,13 @@ static int frontend_bus_id(char bus_id[X { nodename = strchr(nodename, '/'); if (!nodename || strlen(nodename + 1) >= XEN_BUS_ID_SIZE) { - printk(KERN_WARNING "XENBUS: bad frontend %s\n", nodename); + pr_warning("XENBUS: bad frontend %s\n", nodename); return -EINVAL; } strlcpy(bus_id, nodename + 1, XEN_BUS_ID_SIZE); if (!strchr(bus_id, '/')) { - printk(KERN_WARNING "XENBUS: bus_id %s no slash\n", bus_id); + pr_warning("XENBUS: bus_id %s no slash\n", bus_id); return -EINVAL; } *strchr(bus_id, '/') = '-'; @@ -176,11 +176,9 @@ static int read_backend_details(struct x } #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) && (defined(CONFIG_XEN) || defined(MODULE)) -static int xenbus_uevent_frontend(struct device *dev, char **envp, - int num_envp, char *buffer, int buffer_size) +static int xenbus_uevent_frontend(struct device *dev, struct kobj_uevent_env *env) { struct xenbus_device *xdev; - int length = 0, i = 0; if (dev == NULL) return -ENODEV; @@ -189,12 +187,9 @@ static int xenbus_uevent_frontend(struct return -ENODEV; /* stuff we want to pass to /sbin/hotplug */ - add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, - "XENBUS_TYPE=%s", xdev->devicetype); - add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, - "XENBUS_PATH=%s", xdev->nodename); - add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, - "MODALIAS=xen:%s", xdev->devicetype); + add_uevent_var(env, "XENBUS_TYPE=%s", xdev->devicetype); + add_uevent_var(env, "XENBUS_PATH=%s", xdev->nodename); + add_uevent_var(env, "MODALIAS=xen:%s", xdev->devicetype); return 0; } @@ -762,8 +757,8 @@ static int suspend_dev(struct device *de if (drv->suspend) err = drv->suspend(xdev); if (err) - printk(KERN_WARNING - "xenbus: suspend %s failed: %i\n", dev->bus_id, err); + pr_warning("xenbus: suspend %s failed: %i\n", + dev->bus_id, err); return 0; } @@ -782,9 +777,8 @@ static int suspend_cancel_dev(struct dev if (drv->suspend_cancel) err = drv->suspend_cancel(xdev); if (err) - printk(KERN_WARNING - "xenbus: suspend_cancel %s failed: %i\n", - dev->bus_id, err); + pr_warning("xenbus: suspend_cancel %s failed: %i\n", + dev->bus_id, err); return 0; } @@ -804,9 +798,8 @@ static int resume_dev(struct device *dev err = talk_to_otherend(xdev); if (err) { - printk(KERN_WARNING - "xenbus: resume (talk_to_otherend) %s failed: %i\n", - dev->bus_id, err); + pr_warning("xenbus: resume (talk_to_otherend) %s failed: %i\n", + dev->bus_id, err); return err; } @@ -815,18 +808,16 @@ static int resume_dev(struct device *dev if (drv->resume) { err = drv->resume(xdev); if (err) { - printk(KERN_WARNING - "xenbus: resume %s failed: %i\n", - dev->bus_id, err); + pr_warning("xenbus: resume %s failed: %i\n", + dev->bus_id, err); return err; } } err = watch_otherend(xdev); if (err) { - printk(KERN_WARNING - "xenbus_probe: resume (watch_otherend) %s failed: " - "%d.\n", dev->bus_id, err); + pr_warning("xenbus_probe: resume (watch_otherend) %s failed:" + " %d\n", dev->bus_id, err); return err; } @@ -1012,9 +1003,8 @@ int xenbus_conn(domid_t remote_dom, unsi fail1: rc2 = xb_free_port(xen_store_evtchn); if (rc2 != 0) - printk(KERN_WARNING - "XENBUS: Error freeing xenstore event channel: %d\n", - rc2); + pr_warning("XENBUS: Error freeing xenstore event channel:" + " %d\n", rc2); fail0: xen_store_evtchn = -1; return rc; @@ -1040,9 +1030,8 @@ static int __devinit xenbus_probe_init(v /* Register ourselves with the kernel bus subsystem */ xenbus_frontend.error = bus_register(&xenbus_frontend.bus); if (xenbus_frontend.error) - printk(KERN_WARNING - "XENBUS: Error registering frontend bus: %i\n", - xenbus_frontend.error); + pr_warning("XENBUS: Error registering frontend bus: %i\n", + xenbus_frontend.error); xenbus_backend_bus_register(); /* @@ -1117,8 +1106,8 @@ static int __devinit xenbus_probe_init(v /* Initialize the interface to xenstore. */ err = xs_init(); if (err) { - printk(KERN_WARNING - "XENBUS: Error initializing xenstore comms: %i\n", err); + pr_warning("XENBUS: Error initializing xenstore comms: %i\n", + err); goto err; } @@ -1128,9 +1117,8 @@ static int __devinit xenbus_probe_init(v xenbus_frontend.error = device_register(&xenbus_frontend.dev); if (xenbus_frontend.error) { bus_unregister(&xenbus_frontend.bus); - printk(KERN_WARNING - "XENBUS: Error registering frontend device: %i\n", - xenbus_frontend.error); + pr_warning("XENBUS: Error registering frontend device:" + " %d\n", xenbus_frontend.error); } } #endif @@ -1212,8 +1200,8 @@ static int print_device_status(struct de if (!dev->driver) { /* Information only: is this too noisy? */ - printk(KERN_INFO "XENBUS: Device with no driver: %s\n", - xendev->nodename); + pr_info("XENBUS: Device with no driver: %s\n", + xendev->nodename); return 0; } @@ -1221,15 +1209,15 @@ static int print_device_status(struct de enum xenbus_state rstate = XenbusStateUnknown; if (xendev->otherend) rstate = xenbus_read_driver_state(xendev->otherend); - printk(KERN_WARNING "XENBUS: Timeout connecting " - "to device: %s (local state %d, remote state %d)\n", - xendev->nodename, xendev->state, rstate); + pr_warning("XENBUS: Timeout connecting to device: %s" + " (local state %d, remote state %d)\n", + xendev->nodename, xendev->state, rstate); } xendrv = to_xenbus_driver(dev->driver); if (xendrv->is_ready && !xendrv->is_ready(xendev)) - printk(KERN_WARNING "XENBUS: Device not ready: %s\n", - xendev->nodename); + pr_warning("XENBUS: Device not ready: %s\n", + xendev->nodename); return 0; } @@ -1263,8 +1251,8 @@ static void wait_for_devices(struct xenb while (exists_connecting_device(drv)) { if (time_after(jiffies, start + (seconds_waited+5)*HZ)) { if (!seconds_waited) - printk(KERN_WARNING "XENBUS: Waiting for " - "devices to initialise: "); + pr_warning("XENBUS: Waiting for " + "devices to initialise: "); seconds_waited += 5; printk("%us...", 300 - seconds_waited); if (seconds_waited == 300) --- head-2011-03-17.orig/drivers/xen/xenbus/xenbus_probe_backend.c 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/drivers/xen/xenbus/xenbus_probe_backend.c 2011-01-31 17:56:27.000000000 +0100 @@ -60,8 +60,7 @@ #include #endif -static int xenbus_uevent_backend(struct device *dev, char **envp, - int num_envp, char *buffer, int buffer_size); +static int xenbus_uevent_backend(struct device *dev, struct kobj_uevent_env *env); static int xenbus_probe_backend(const char *type, const char *domid); extern int read_otherend_details(struct xenbus_device *xendev, @@ -128,13 +127,10 @@ static struct xen_bus_type xenbus_backen }, }; -static int xenbus_uevent_backend(struct device *dev, char **envp, - int num_envp, char *buffer, int buffer_size) +static int xenbus_uevent_backend(struct device *dev, struct kobj_uevent_env *env) { struct xenbus_device *xdev; struct xenbus_driver *drv; - int i = 0; - int length = 0; DPRINTK(""); @@ -146,27 +142,16 @@ static int xenbus_uevent_backend(struct return -ENODEV; /* stuff we want to pass to /sbin/hotplug */ - add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, - "XENBUS_TYPE=%s", xdev->devicetype); + add_uevent_var(env, "XENBUS_TYPE=%s", xdev->devicetype); - add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, - "XENBUS_PATH=%s", xdev->nodename); + add_uevent_var(env, "XENBUS_PATH=%s", xdev->nodename); - add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, - "XENBUS_BASE_PATH=%s", xenbus_backend.root); - - /* terminate, set to next free slot, shrink available space */ - envp[i] = NULL; - envp = &envp[i]; - num_envp -= i; - buffer = &buffer[length]; - buffer_size -= length; + add_uevent_var(env, "XENBUS_BASE_PATH=%s", xenbus_backend.root); if (dev->driver) { drv = to_xenbus_driver(dev->driver); if (drv && drv->uevent) - return drv->uevent(xdev, envp, num_envp, buffer, - buffer_size); + return drv->uevent(xdev, env); } return 0; @@ -268,9 +253,8 @@ void xenbus_backend_bus_register(void) { xenbus_backend.error = bus_register(&xenbus_backend.bus); if (xenbus_backend.error) - printk(KERN_WARNING - "XENBUS: Error registering backend bus: %i\n", - xenbus_backend.error); + pr_warning("XENBUS: Error registering backend bus: %i\n", + xenbus_backend.error); } void xenbus_backend_device_register(void) @@ -281,9 +265,8 @@ void xenbus_backend_device_register(void xenbus_backend.error = device_register(&xenbus_backend.dev); if (xenbus_backend.error) { bus_unregister(&xenbus_backend.bus); - printk(KERN_WARNING - "XENBUS: Error registering backend device: %i\n", - xenbus_backend.error); + pr_warning("XENBUS: Error registering backend device: %i\n", + xenbus_backend.error); } } --- head-2011-03-17.orig/drivers/xen/xenbus/xenbus_xs.c 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/drivers/xen/xenbus/xenbus_xs.c 2011-01-31 17:56:27.000000000 +0100 @@ -135,9 +135,8 @@ static int get_error(const char *errorst for (i = 0; strcmp(errorstring, xsd_errors[i].errstring) != 0; i++) { if (i == ARRAY_SIZE(xsd_errors) - 1) { - printk(KERN_WARNING - "XENBUS xen store gave: unknown error %s", - errorstring); + pr_warning("XENBUS xen store gave: unknown error %s", + errorstring); return EINVAL; } } @@ -278,9 +277,9 @@ static void *xs_talkv(struct xenbus_tran if (msg.type != type) { if (printk_ratelimit()) - printk(KERN_WARNING - "XENBUS unexpected type [%d], expected [%d]\n", - msg.type, type); + pr_warning("XENBUS unexpected type [%d]," + " expected [%d]\n", + msg.type, type); kfree(ret); return ERR_PTR(-EINVAL); } @@ -677,9 +676,8 @@ void unregister_xenbus_watch(struct xenb err = xs_unwatch(watch->node, token); if (err) - printk(KERN_WARNING - "XENBUS Failed to release watch %s: %i\n", - watch->node, err); + pr_warning("XENBUS Failed to release watch %s: %i\n", + watch->node, err); up_read(&xs_state.watch_mutex); @@ -909,8 +907,8 @@ static int xenbus_thread(void *unused) for (;;) { err = process_msg(); if (err) - printk(KERN_WARNING "XENBUS error %d while reading " - "message\n", err); + pr_warning("XENBUS error %d while reading " + "message\n", err); if (kthread_should_stop()) break; } --- head-2011-03-17.orig/drivers/xen/xenoprof/xenoprofile.c 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/drivers/xen/xenoprof/xenoprofile.c 2011-01-31 17:56:27.000000000 +0100 @@ -28,7 +28,6 @@ #include #include #include -#include "../../../drivers/oprofile/cpu_buffer.h" #include "../../../drivers/oprofile/event_buffer.h" #define MAX_XENOPROF_SAMPLES 16 @@ -141,8 +140,7 @@ static void xenoprof_add_pc(xenoprof_buf if (xenoprof_is_escape(buf, tail) && xenoprof_get_event(buf, tail) == XENOPROF_TRACE_BEGIN) { tracing=1; - oprofile_add_pc(ESCAPE_CODE, buf->event_log[tail].mode, - CPU_TRACE_BEGIN); + oprofile_add_mode(buf->event_log[tail].mode); if (!is_passive) oprofile_samples++; else @@ -566,8 +564,8 @@ int __init xenoprofile_init(struct oprof active_defined = 0; } - printk(KERN_INFO "%s: ret %d, events %d, xenoprof_is_primary %d\n", - __func__, ret, init.num_events, xenoprof_is_primary); + pr_info("%s: ret %d, events %d, xenoprof_is_primary %d\n", + __func__, ret, init.num_events, xenoprof_is_primary); return ret; } --- head-2011-03-17.orig/include/linux/kexec.h 2011-01-31 14:53:38.000000000 +0100 +++ head-2011-03-17/include/linux/kexec.h 2011-01-31 17:56:27.000000000 +0100 @@ -205,8 +205,15 @@ extern struct kimage *kexec_crash_image; #define VMCOREINFO_BYTES (4096) #define VMCOREINFO_NOTE_NAME "VMCOREINFO" #define VMCOREINFO_NOTE_NAME_BYTES ALIGN(sizeof(VMCOREINFO_NOTE_NAME), 4) +#if !defined(CONFIG_XEN) || !defined(CONFIG_X86) #define VMCOREINFO_NOTE_SIZE (KEXEC_NOTE_HEAD_BYTES*2 + VMCOREINFO_BYTES \ + VMCOREINFO_NOTE_NAME_BYTES) +#else +#define VMCOREINFO_NOTE_SIZE ALIGN(KEXEC_NOTE_HEAD_BYTES*2 \ + + VMCOREINFO_BYTES \ + + VMCOREINFO_NOTE_NAME_BYTES, \ + PAGE_SIZE) +#endif /* Location of a reserved region to hold the crash kernel. */ --- head-2011-03-17.orig/include/linux/oprofile.h 2011-02-17 10:06:04.000000000 +0100 +++ head-2011-03-17/include/linux/oprofile.h 2011-02-17 10:10:35.000000000 +0100 @@ -123,6 +123,8 @@ void oprofile_add_ext_sample(unsigned lo * backtrace. */ void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event); +void oprofile_add_mode(int cpu_mode); + /* add a backtrace entry, to be called from the ->backtrace callback */ void oprofile_add_trace(unsigned long eip); --- head-2011-03-17.orig/include/linux/sysctl.h 2011-03-17 13:45:28.000000000 +0100 +++ head-2011-03-17/include/linux/sysctl.h 2011-01-31 17:56:27.000000000 +0100 @@ -59,6 +59,7 @@ enum CTL_BUS=8, /* Busses */ CTL_ABI=9, /* Binary emulation */ CTL_CPU=10, /* CPU stuff (speed scaling, etc) */ + CTL_XEN=123, /* Xen info and control */ CTL_ARLAN=254, /* arlan wireless driver */ CTL_S390DBF=5677, /* s390 debug */ CTL_SUNRPC=7249, /* sunrpc debug */ --- head-2011-03-17.orig/include/xen/cpu_hotplug.h 2007-08-16 18:07:01.000000000 +0200 +++ head-2011-03-17/include/xen/cpu_hotplug.h 2011-01-31 17:56:27.000000000 +0100 @@ -25,8 +25,8 @@ void cpu_bringup(void); static inline int smp_suspend(void) { if (num_online_cpus() > 1) { - printk(KERN_WARNING "Can't suspend SMP guests " - "without CONFIG_HOTPLUG_CPU\n"); + pr_warning("Can't suspend SMP guests without" + " CONFIG_HOTPLUG_CPU\n"); return -EOPNOTSUPP; } return 0; --- head-2011-03-17.orig/include/xen/gnttab.h 2010-09-23 15:39:04.000000000 +0200 +++ head-2011-03-17/include/xen/gnttab.h 2011-01-31 17:56:27.000000000 +0100 @@ -172,11 +172,11 @@ gnttab_set_replace_op(struct gnttab_unma BUG_ON(__ret); \ } \ if (__hc_delay == 0) { \ - printk(KERN_ERR "%s: %s gnt busy\n", __func__, current->comm); \ + pr_err("%s: %s gnt busy\n", __func__, current->comm); \ (__HCarg_p)->status = GNTST_bad_page; \ } \ if ((__HCarg_p)->status != GNTST_okay) \ - printk(KERN_ERR "%s: %s gnt status %x\n", \ + pr_err("%s: %s gnt status %x\n", \ __func__, current->comm, (__HCarg_p)->status); \ } @@ -191,11 +191,11 @@ gnttab_set_replace_op(struct gnttab_unma msleep(__hc_delay++); \ } while ((__HCarg_p)->status == GNTST_eagain && __hc_delay); \ if (__hc_delay == 0) { \ - printk(KERN_ERR "%s: %s gnt busy\n", __func__, current->comm); \ + pr_err("%s: %s gnt busy\n", __func__, current->comm); \ (__HCarg_p)->status = GNTST_bad_page; \ } \ if ((__HCarg_p)->status != GNTST_okay) \ - printk(KERN_ERR "%s: %s gnt status %x\n", \ + pr_err("%s: %s gnt status %x\n", \ __func__, current->comm, (__HCarg_p)->status); \ } --- head-2011-03-17.orig/include/xen/hvm.h 2011-01-31 15:14:12.000000000 +0100 +++ head-2011-03-17/include/xen/hvm.h 2011-01-31 17:56:27.000000000 +0100 @@ -13,8 +13,7 @@ static inline unsigned long hvm_get_para xhv.index = idx; r = HYPERVISOR_hvm_op(HVMOP_get_param, &xhv); if (r < 0) { - printk(KERN_ERR "Cannot get hvm parameter %d: %d!\n", - idx, r); + pr_err("Cannot get hvm parameter %d: %d!\n", idx, r); return 0; } return xhv.value; --- head-2011-03-17.orig/include/xen/net-util.h 2011-02-09 15:53:07.000000000 +0100 +++ head-2011-03-17/include/xen/net-util.h 2011-02-09 15:55:10.000000000 +0100 @@ -51,7 +51,7 @@ static inline int skb_checksum_setup(str break; default: if (net_ratelimit()) - printk(KERN_ERR "Attempting to checksum a non-" + pr_err("Attempting to checksum a non-" "TCP/UDP packet, dropping a protocol" " %d packet\n", iph->protocol); goto out; --- head-2011-03-17.orig/include/xen/pcifront.h 2007-06-18 08:38:13.000000000 +0200 +++ head-2011-03-17/include/xen/pcifront.h 2011-01-31 17:56:27.000000000 +0100 @@ -12,13 +12,11 @@ #ifndef __ia64__ +#include + struct pcifront_device; struct pci_bus; - -struct pcifront_sd { - int domain; - struct pcifront_device *pdev; -}; +#define pcifront_sd pci_sysdata static inline struct pcifront_device * pcifront_get_pdev(struct pcifront_sd *sd) @@ -34,18 +32,6 @@ static inline void pcifront_init_sd(stru sd->pdev = pdev; } -#if defined(CONFIG_PCI_DOMAINS) -static inline int pci_domain_nr(struct pci_bus *bus) -{ - struct pcifront_sd *sd = bus->sysdata; - return sd->domain; -} -static inline int pci_proc_domain(struct pci_bus *bus) -{ - return pci_domain_nr(bus); -} -#endif /* CONFIG_PCI_DOMAINS */ - static inline void pcifront_setup_root_resources(struct pci_bus *bus, struct pcifront_sd *sd) { --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2011-03-17/include/xen/sysctl.h 2011-01-31 17:56:27.000000000 +0100 @@ -0,0 +1,11 @@ +#ifndef _XEN_SYSCTL_H +#define _XEN_SYSCTL_H + +/* CTL_XEN names: */ +enum +{ + CTL_XEN_INDEPENDENT_WALLCLOCK=1, + CTL_XEN_PERMITTED_CLOCK_JITTER=2, +}; + +#endif /* _XEN_SYSCTL_H */ --- head-2011-03-17.orig/include/xen/xenbus.h 2011-01-31 17:49:31.000000000 +0100 +++ head-2011-03-17/include/xen/xenbus.h 2011-01-31 17:56:27.000000000 +0100 @@ -107,7 +107,7 @@ struct xenbus_driver { int (*suspend)(struct xenbus_device *dev); int (*suspend_cancel)(struct xenbus_device *dev); int (*resume)(struct xenbus_device *dev); - int (*uevent)(struct xenbus_device *, char **, int, char *, int); + int (*uevent)(struct xenbus_device *, struct kobj_uevent_env *); struct device_driver driver; int (*read_otherend_details)(struct xenbus_device *dev); int (*is_ready)(struct xenbus_device *dev); --- head-2011-03-17.orig/kernel/kexec.c 2011-01-31 17:32:16.000000000 +0100 +++ head-2011-03-17/kernel/kexec.c 2011-01-31 17:56:27.000000000 +0100 @@ -47,7 +47,11 @@ note_buf_t __percpu *crash_notes; /* vmcoreinfo stuff */ static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES]; -u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4]; +u32 +#if defined(CONFIG_XEN) && defined(CONFIG_X86) +__attribute__((__section__(".bss.page_aligned"), __aligned__(PAGE_SIZE))) +#endif +vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4]; size_t vmcoreinfo_size; size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data); @@ -1260,6 +1264,7 @@ static int __init crash_notes_memory_ini module_init(crash_notes_memory_init) +#ifndef CONFIG_XEN /* * parsing the "crashkernel" commandline * @@ -1422,7 +1427,7 @@ int __init parse_crashkernel(char *cm return 0; } - +#endif void crash_save_vmcoreinfo(void) @@ -1479,7 +1484,18 @@ static int __init crash_save_vmcoreinfo_ VMCOREINFO_SYMBOL(init_uts_ns); VMCOREINFO_SYMBOL(node_online_map); +#ifndef CONFIG_X86_XEN VMCOREINFO_SYMBOL(swapper_pg_dir); +#else +/* + * Since for x86-32 Xen swapper_pg_dir is a pointer rather than an array, + * make the value stored consistent with native (i.e. the base address of + * the page directory). + */ +# define swapper_pg_dir *swapper_pg_dir + VMCOREINFO_SYMBOL(swapper_pg_dir); +# undef swapper_pg_dir +#endif VMCOREINFO_SYMBOL(_stext); VMCOREINFO_SYMBOL(vmlist); --- head-2011-03-17.orig/kernel/sysctl_binary.c 2011-03-17 13:45:28.000000000 +0100 +++ head-2011-03-17/kernel/sysctl_binary.c 2011-01-31 17:56:27.000000000 +0100 @@ -873,6 +873,14 @@ static const struct bin_table bin_bus_ta }; +#ifdef CONFIG_XEN +static struct trans_ctl_table trans_xen_table[] = { + { CTL_XEN_INDEPENDENT_WALLCLOCK, "independent_wallclock" }, + { CTL_XEN_PERMITTED_CLOCK_JITTER, "permitted_clock_jitter" }, + {} +}; +#endif + static const struct bin_table bin_s390dbf_table[] = { { CTL_INT, 5678 /* CTL_S390DBF_STOPPABLE */, "debug_stoppable" }, { CTL_INT, 5679 /* CTL_S390DBF_ACTIVE */, "debug_active" }, @@ -912,6 +920,9 @@ static const struct bin_table bin_root_t { CTL_DIR, CTL_BUS, "bus", bin_bus_table }, { CTL_DIR, CTL_ABI, "abi" }, /* CTL_CPU not used */ +#ifdef CONFIG_XEN + { CTL_XEN, "xen", trans_xen_table }, +#endif /* CTL_ARLAN "arlan" no longer used */ { CTL_DIR, CTL_S390DBF, "s390dbf", bin_s390dbf_table }, { CTL_DIR, CTL_SUNRPC, "sunrpc", bin_sunrpc_table }, --- head-2011-03-17.orig/kernel/sysctl_check.c 2011-03-17 13:45:28.000000000 +0100 +++ head-2011-03-17/kernel/sysctl_check.c 2011-01-31 17:56:27.000000000 +0100 @@ -4,6 +4,7 @@ #include #include #include +#include static int sysctl_depth(struct ctl_table *table) --- head-2011-03-17.orig/lib/swiotlb-xen.c 2011-01-31 17:32:29.000000000 +0100 +++ head-2011-03-17/lib/swiotlb-xen.c 2011-01-31 17:56:27.000000000 +0100 @@ -27,7 +27,7 @@ #include #include #include -#include +#include int swiotlb; EXPORT_SYMBOL(swiotlb); @@ -602,9 +602,10 @@ swiotlb_sync_single_for_device(struct de * same here. */ int -swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nelems, +swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, int dir) { + struct scatterlist *sg; struct phys_addr buffer; dma_addr_t dev_addr; char *map; @@ -612,22 +613,22 @@ swiotlb_map_sg(struct device *hwdev, str BUG_ON(dir == DMA_NONE); - for (i = 0; i < nelems; i++, sg++) { - dev_addr = gnttab_dma_map_page(sg->page) + sg->offset; + for_each_sg(sgl, sg, nelems, i) { + dev_addr = gnttab_dma_map_page(sg_page(sg)) + sg->offset; - if (range_straddles_page_boundary(page_to_pseudophys(sg->page) + if (range_straddles_page_boundary(page_to_pseudophys(sg_page(sg)) + sg->offset, sg->length) || address_needs_mapping(hwdev, dev_addr)) { gnttab_dma_unmap_page(dev_addr); - buffer.page = sg->page; + buffer.page = sg_page(sg); buffer.offset = sg->offset; map = map_single(hwdev, buffer, sg->length, dir); if (!map) { /* Don't panic here, we expect map_sg users to do proper error handling. */ swiotlb_full(hwdev, sg->length, dir, 0); - swiotlb_unmap_sg(hwdev, sg - i, i, dir); - sg[0].dma_length = 0; + swiotlb_unmap_sg(hwdev, sgl, i, dir); + sgl[0].dma_length = 0; return 0; } sg->dma_address = virt_to_bus(map); @@ -643,19 +644,21 @@ swiotlb_map_sg(struct device *hwdev, str * concerning calls here are the same as for swiotlb_unmap_single() above. */ void -swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nelems, +swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, int dir) { + struct scatterlist *sg; int i; BUG_ON(dir == DMA_NONE); - for (i = 0; i < nelems; i++, sg++) + for_each_sg(sgl, sg, nelems, i) { if (in_swiotlb_aperture(sg->dma_address)) unmap_single(hwdev, bus_to_virt(sg->dma_address), sg->dma_length, dir); else gnttab_dma_unmap_page(sg->dma_address); + } } /* @@ -666,17 +669,19 @@ swiotlb_unmap_sg(struct device *hwdev, s * and usage. */ static void -swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sg, +swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, int dir, int target) { + struct scatterlist *sg; int i; BUG_ON(dir == DMA_NONE); - for (i = 0; i < nelems; i++, sg++) + for_each_sg(sgl, sg, nelems, i) { if (in_swiotlb_aperture(sg->dma_address)) sync_single(hwdev, bus_to_virt(sg->dma_address), sg->dma_length, dir, target); + } } void