From: www.kernel.org Subject: Update to 2.6.24 Patch-mainline: 2.6.24 Automatically created from "patches.kernel.org/patch-2.6.24" by xen-port-patches.py Acked-by: jbeulich@novell.com --- head-2010-05-25.orig/arch/x86/Kconfig 2010-03-24 15:09:15.000000000 +0100 +++ head-2010-05-25/arch/x86/Kconfig 2010-03-24 15:10:29.000000000 +0100 @@ -76,15 +76,16 @@ config GENERIC_CMOS_UPDATE config CLOCKSOURCE_WATCHDOG def_bool y - depends on !X86_XEN + depends on !XEN config GENERIC_CLOCKEVENTS def_bool y - depends on !X86_XEN + depends on !XEN config GENERIC_CLOCKEVENTS_BROADCAST def_bool y - depends on X86_64 || (X86_32 && X86_LOCAL_APIC && !X86_XEN) + depends on X86_64 || (X86_32 && X86_LOCAL_APIC) + depends on !XEN config LOCKDEP_SUPPORT def_bool y @@ -240,12 +241,12 @@ config X86_TRAMPOLINE config X86_NO_TSS bool - depends on X86_XEN || X86_64_XEN + depends on XEN default y config X86_NO_IDT bool - depends on X86_XEN || X86_64_XEN + depends on XEN default y config X86_32_LAZY_GS @@ -327,6 +328,7 @@ config X86_MPPARSE config X86_XEN bool "Xen-compatible" + depends on X86_32 select XEN select X86_PAE select X86_UP_APIC if !SMP && XEN_PRIVILEGED_GUEST @@ -367,6 +369,7 @@ endif config X86_64_XEN bool "Enable Xen compatible kernel" + depends on X86_64 select XEN select SWIOTLB help @@ -702,7 +705,7 @@ source "arch/x86/Kconfig.cpu" config HPET_TIMER def_bool X86_64 prompt "HPET Timer Support" if X86_32 - depends on !X86_XEN && !X86_64_XEN + depends on !XEN ---help--- Use the IA-PC HPET (High Precision Event Timer) to manage time in preference to the PIT and RTC, if a HPET is @@ -1056,7 +1059,7 @@ config I8K config X86_REBOOTFIXUPS bool "Enable X86 board specific fixups for reboot" - depends on X86_32 && !X86_XEN + depends on X86_32 && !XEN ---help--- This enables chipset and/or board specific fixups to be done in order to get reboot to work correctly. This is only needed on @@ -1454,7 +1457,7 @@ config X86_RESERVE_LOW_64K config MATH_EMULATION bool prompt "Math emulation" if X86_32 - depends on !X86_XEN + depends on !XEN ---help--- Linux can emulate a math coprocessor (used for floating point operations) if you don't have one. 486DX and Pentium processors have @@ -1828,6 +1831,7 @@ endmenu config ARCH_ENABLE_MEMORY_HOTPLUG def_bool y depends on X86_64 || (X86_32 && HIGHMEM) + depends on !XEN config ARCH_ENABLE_MEMORY_HOTREMOVE def_bool y @@ -2019,7 +2023,7 @@ choice config PCI_GOBIOS bool "BIOS" - depends on !X86_XEN + depends on !XEN config PCI_GOMMCONFIG bool "MMConfig" @@ -2070,7 +2074,7 @@ config PCI_MMCONFIG config XEN_PCIDEV_FRONTEND bool "Xen PCI Frontend" if X86_64 - depends on PCI && ((X86_XEN && (PCI_GOXEN_FE || PCI_GOANY)) || X86_64_XEN) + depends on PCI && XEN && (PCI_GOXEN_FE || PCI_GOANY || X86_64) select HOTPLUG default y help @@ -2086,7 +2090,7 @@ config XEN_PCIDEV_FE_DEBUG config DMAR bool "Support for DMA Remapping Devices (EXPERIMENTAL)" - depends on PCI_MSI && ACPI && EXPERIMENTAL + depends on PCI_MSI && ACPI && !XEN && EXPERIMENTAL help DMA remapping (DMAR) devices support enables independent address translations for Direct Memory Access (DMA) from devices. --- head-2010-05-25.orig/arch/x86/Makefile 2010-03-24 15:01:37.000000000 +0100 +++ head-2010-05-25/arch/x86/Makefile 2010-03-24 15:10:29.000000000 +0100 @@ -156,8 +156,8 @@ BOOT_TARGETS = bzlilo bzdisk fdimage fdi PHONY += bzImage vmlinuz $(BOOT_TARGETS) ifdef CONFIG_XEN -CPPFLAGS := -D__XEN_INTERFACE_VERSION__=$(CONFIG_XEN_INTERFACE_VERSION) \ - -Iinclude$(if $(KBUILD_SRC),2)/asm/mach-xen $(CPPFLAGS) +KBUILD_CPPFLAGS := -D__XEN_INTERFACE_VERSION__=$(CONFIG_XEN_INTERFACE_VERSION) \ + -Iinclude$(if $(KBUILD_SRC),2)/asm/mach-xen $(KBUILD_CPPFLAGS) ifdef CONFIG_X86_64 LDFLAGS_vmlinux := -e startup_64 @@ -171,6 +171,8 @@ KBUILD_IMAGE := $(boot)/vmlinuz vmlinuz: vmlinux $(Q)$(MAKE) $(build)=$(boot) $(KBUILD_IMAGE) + $(Q)mkdir -p $(objtree)/arch/$(UTS_MACHINE)/boot + $(Q)ln -fsn ../../x86/boot/$@ $(objtree)/arch/$(UTS_MACHINE)/boot/$@ else # Default kernel to build all: bzImage --- head-2010-05-25.orig/arch/x86/ia32/ia32entry-xen.S 2010-03-24 15:09:22.000000000 +0100 +++ head-2010-05-25/arch/x86/ia32/ia32entry-xen.S 2010-03-24 15:10:29.000000000 +0100 @@ -125,20 +125,16 @@ sysenter_do_call: jmp int_ret_from_sys_call sysenter_tracesys: + xchgl %r9d,%ebp SAVE_REST CLEAR_RREGS + movq %r9,R9(%rsp) movq $-ENOSYS,RAX(%rsp) /* really needed? */ movq %rsp,%rdi /* &pt_regs -> arg1 */ call syscall_trace_enter LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ RESTORE_REST - movl %ebp, %ebp - /* no need to do an access_ok check here because rbp has been - 32bit zero extended */ -1: movl (%rbp),%r9d - .section __ex_table,"a" - .quad 1b,ia32_badarg - .previous + xchgl %ebp,%r9d jmp sysenter_do_call CFI_ENDPROC ENDPROC(ia32_sysenter_target) @@ -200,20 +196,17 @@ cstar_do_call: jmp int_ret_from_sys_call cstar_tracesys: + xchgl %r9d,%ebp SAVE_REST CLEAR_RREGS + movq %r9,R9(%rsp) movq $-ENOSYS,RAX(%rsp) /* really needed? */ movq %rsp,%rdi /* &pt_regs -> arg1 */ call syscall_trace_enter LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */ RESTORE_REST + xchgl %ebp,%r9d movl RSP-ARGOFFSET(%rsp), %r8d - /* no need to do an access_ok check here because r8 has been - 32bit zero extended */ -1: movl (%r8),%r9d - .section __ex_table,"a" - .quad 1b,ia32_badarg - .previous jmp cstar_do_call END(ia32_cstar_target) --- head-2010-05-25.orig/arch/x86/kernel/Makefile 2010-03-24 15:09:15.000000000 +0100 +++ head-2010-05-25/arch/x86/kernel/Makefile 2010-03-24 15:10:29.000000000 +0100 @@ -141,4 +141,4 @@ endif disabled-obj-$(CONFIG_XEN) := early-quirks.o hpet.o i8253.o i8259_$(BITS).o reboot.o \ smpboot_$(BITS).o tsc_$(BITS).o tsc_sync.o disabled-obj-$(CONFIG_XEN_UNPRIVILEGED_GUEST) += mpparse_64.o -%/head_64.o %/head_64.s: $(if $(CONFIG_XEN),EXTRA_AFLAGS,dummy) := +%/head_64.o %/head_64.s: asflags-$(CONFIG_XEN) := --- head-2010-05-25.orig/arch/x86/kernel/acpi/sleep_32-xen.c 2010-03-24 15:09:22.000000000 +0100 +++ head-2010-05-25/arch/x86/kernel/acpi/sleep_32-xen.c 2010-03-24 15:10:29.000000000 +0100 @@ -90,7 +90,7 @@ __setup("acpi_sleep=", acpi_sleep_setup) /* Ouch, we want to delete this. We already have better version in userspace, in s2ram from suspend.sf.net project */ -static __init int reset_videomode_after_s3(struct dmi_system_id *d) +static __init int reset_videomode_after_s3(const struct dmi_system_id *d) { acpi_realmode_flags |= 2; return 0; --- head-2010-05-25.orig/arch/x86/kernel/acpi/sleep_64-xen.c 2010-03-24 15:09:22.000000000 +0100 +++ head-2010-05-25/arch/x86/kernel/acpi/sleep_64-xen.c 2010-03-24 15:10:29.000000000 +0100 @@ -123,6 +123,3 @@ static int __init acpi_sleep_setup(char __setup("acpi_sleep=", acpi_sleep_setup); #endif /* CONFIG_ACPI_PV_SLEEP */ -void acpi_pci_link_exit(void) -{ -} --- head-2010-05-25.orig/arch/x86/kernel/cpu/common-xen.c 2010-03-24 15:09:22.000000000 +0100 +++ head-2010-05-25/arch/x86/kernel/cpu/common-xen.c 2010-03-24 15:10:29.000000000 +0100 @@ -214,7 +214,7 @@ static void __cpuinit get_cpu_vendor(str static int __init x86_fxsr_setup(char * s) { - /* Tell all the other CPU's to not use it... */ + /* Tell all the other CPUs to not use it... */ disable_x86_fxsr = 1; /* --- head-2010-05-25.orig/arch/x86/kernel/e820_32-xen.c 2010-03-24 15:09:22.000000000 +0100 +++ head-2010-05-25/arch/x86/kernel/e820_32-xen.c 2010-03-24 15:10:29.000000000 +0100 @@ -52,6 +52,13 @@ struct resource code_resource = { .flags = IORESOURCE_BUSY | IORESOURCE_MEM }; +struct resource bss_resource = { + .name = "Kernel bss", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_MEM +}; + static struct resource system_rom_resource = { .name = "System ROM", .start = 0xf0000, @@ -266,7 +273,9 @@ static struct e820map machine_e820; * and also for regions reported as reserved by the e820. */ static void __init -legacy_init_iomem_resources(struct resource *code_resource, struct resource *data_resource) +legacy_init_iomem_resources(struct resource *code_resource, + struct resource *data_resource, + struct resource *bss_resource) { int i; @@ -300,9 +309,11 @@ legacy_init_iomem_resources(struct resou #ifndef CONFIG_XEN request_resource(res, code_resource); request_resource(res, data_resource); + request_resource(res, bss_resource); #endif #ifdef CONFIG_KEXEC - request_resource(res, &crashk_res); + if (crashk_res.start != crashk_res.end) + request_resource(res, &crashk_res); #ifdef CONFIG_XEN xen_machine_kexec_register_resources(res); #endif @@ -329,9 +340,11 @@ static int __init request_standard_resou printk("Setting up standard PCI resources\n"); if (efi_enabled) - efi_initialize_iomem_resources(&code_resource, &data_resource); + efi_initialize_iomem_resources(&code_resource, + &data_resource, &bss_resource); else - legacy_init_iomem_resources(&code_resource, &data_resource); + legacy_init_iomem_resources(&code_resource, + &data_resource, &bss_resource); /* EFI systems may still have VGA */ request_resource(&iomem_resource, &video_ram_resource); @@ -761,7 +774,7 @@ void __init e820_register_memory(void) #define e820 machine_e820 #endif /* - * Search for the bigest gap in the low 32 bits of the e820 + * Search for the biggest gap in the low 32 bits of the e820 * memory space. */ last = 0x100000000ull; --- head-2010-05-25.orig/arch/x86/kernel/e820_64-xen.c 2010-03-24 15:09:22.000000000 +0100 +++ head-2010-05-25/arch/x86/kernel/e820_64-xen.c 2010-03-24 15:10:29.000000000 +0100 @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include @@ -53,7 +53,7 @@ unsigned long end_pfn_map; */ static unsigned long __initdata end_user_pfn = MAXMEM>>PAGE_SHIFT; -extern struct resource code_resource, data_resource; +extern struct resource code_resource, data_resource, bss_resource; /* Check for some hardcoded bad areas that early boot is not allowed to touch */ static inline int bad_addr(unsigned long *addrp, unsigned long size) @@ -75,10 +75,15 @@ static inline int bad_addr(unsigned long /* initrd */ #ifdef CONFIG_BLK_DEV_INITRD - if (LOADER_TYPE && INITRD_START && last >= INITRD_START && - addr < INITRD_START+INITRD_SIZE) { - *addrp = PAGE_ALIGN(INITRD_START + INITRD_SIZE); - return 1; + if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { + unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; + unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; + unsigned long ramdisk_end = ramdisk_image+ramdisk_size; + + if (last >= ramdisk_image && addr < ramdisk_end) { + *addrp = PAGE_ALIGN(ramdisk_end); + return 1; + } } #endif /* kernel code */ @@ -251,6 +256,7 @@ void __init e820_reserve_resources(struc #ifndef CONFIG_XEN request_resource(res, &code_resource); request_resource(res, &data_resource); + request_resource(res, &bss_resource); #endif #ifdef CONFIG_KEXEC if (crashk_res.start != crashk_res.end) @@ -661,8 +667,8 @@ void __init setup_memory_region(void) * Otherwise fake a memory map; one section from 0k->640k, * the next section from 1mb->appropriate_mem_k */ - sanitize_e820_map(E820_MAP, &E820_MAP_NR); - if (copy_e820_map(E820_MAP, E820_MAP_NR) < 0) + sanitize_e820_map(boot_params.e820_map, &boot_params.e820_entries); + if (copy_e820_map(boot_params.e820_map, boot_params.e820_entries) < 0) early_panic("Cannot find a valid memory map"); printk(KERN_INFO "BIOS-provided physical RAM map:\n"); e820_print_map("BIOS-e820"); @@ -847,3 +853,22 @@ __init void e820_setup_gap(struct e820en printk(KERN_INFO "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", pci_mem_start, gapstart, gapsize); } + +int __init arch_get_ram_range(int slot, u64 *addr, u64 *size) +{ + int i; + + if (slot < 0 || slot >= e820.nr_map) + return -1; + for (i = slot; i < e820.nr_map; i++) { + if (e820.map[i].type != E820_RAM) + continue; + break; + } + if (i == e820.nr_map || e820.map[i].addr > (max_pfn << PAGE_SHIFT)) + return -1; + *addr = e820.map[i].addr; + *size = min_t(u64, e820.map[i].size + e820.map[i].addr, + max_pfn << PAGE_SHIFT) - *addr; + return i + 1; +} --- head-2010-05-25.orig/arch/x86/kernel/early_printk-xen.c 2010-03-24 15:09:22.000000000 +0100 +++ head-2010-05-25/arch/x86/kernel/early_printk-xen.c 2010-03-24 15:10:29.000000000 +0100 @@ -6,15 +6,10 @@ #include #include #include - -/* Simple VGA output */ - -#ifdef __i386__ #include -#else -#include -#endif + #ifndef CONFIG_XEN +/* Simple VGA output */ #define VGABASE (__ISA_IO_base + 0xb8000) static int max_ypos = 25, max_xpos = 80; @@ -264,10 +259,10 @@ static int __init setup_early_printk(cha early_console = &early_serial_console; } else if (!strncmp(buf, "vga", 3)) { #ifndef CONFIG_XEN - && SCREEN_INFO.orig_video_isVGA == 1) { - max_xpos = SCREEN_INFO.orig_video_cols; - max_ypos = SCREEN_INFO.orig_video_lines; - current_ypos = SCREEN_INFO.orig_y; + && boot_params.screen_info.orig_video_isVGA == 1) { + max_xpos = boot_params.screen_info.orig_video_cols; + max_ypos = boot_params.screen_info.orig_video_lines; + current_ypos = boot_params.screen_info.orig_y; #endif early_console = &early_vga_console; } else if (!strncmp(buf, "simnow", 6)) { --- head-2010-05-25.orig/arch/x86/kernel/entry_32-xen.S 2010-03-24 15:09:22.000000000 +0100 +++ head-2010-05-25/arch/x86/kernel/entry_32-xen.S 2010-03-24 15:10:29.000000000 +0100 @@ -254,6 +254,7 @@ check_userspace: jb resume_kernel # not returning to v8086 or userspace ENTRY(resume_userspace) + LOCKDEP_SYS_EXIT DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret @@ -341,6 +342,7 @@ sysenter_past_esp: jae syscall_badsys call *sys_call_table(,%eax,4) movl %eax,PT_EAX(%esp) + LOCKDEP_SYS_EXIT DISABLE_INTERRUPTS(CLBR_ANY) TRACE_IRQS_OFF movl TI_flags(%ebp), %ecx @@ -406,6 +408,7 @@ syscall_call: call *sys_call_table(,%eax,4) movl %eax,PT_EAX(%esp) # store the return value syscall_exit: + LOCKDEP_SYS_EXIT DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret @@ -478,7 +481,7 @@ ldt_ss: * is still available to implement the setting of the high * 16-bits in the INTERRUPT_RETURN paravirt-op. */ - cmpl $0, paravirt_ops+PARAVIRT_enabled + cmpl $0, pv_info+PARAVIRT_enabled jne restore_nocheck #endif @@ -540,6 +543,7 @@ work_pending: jz work_notifysig work_resched: call schedule + LOCKDEP_SYS_EXIT DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt # setting need_resched or sigpending # between sampling and the iret @@ -1268,6 +1272,6 @@ ENTRY(kernel_thread_helper) ENDPROC(kernel_thread_helper) .section .rodata,"a" -#include "syscall_table.S" +#include "syscall_table_32.S" syscall_table_size=(.-sys_call_table) --- head-2010-05-25.orig/arch/x86/kernel/entry_64-xen.S 2010-03-24 15:09:22.000000000 +0100 +++ head-2010-05-25/arch/x86/kernel/entry_64-xen.S 2010-03-24 15:10:29.000000000 +0100 @@ -57,7 +57,7 @@ #include #include -#include "xen_entry.S" +#include "xen_entry_64.S" .code64 @@ -275,6 +275,7 @@ ret_from_sys_call: movl $_TIF_ALLWORK_MASK,%edi /* edi: flagmask */ sysret_check: + LOCKDEP_SYS_EXIT GET_THREAD_INFO(%rcx) XEN_BLOCK_EVENTS(%rsi) TRACE_IRQS_OFF @@ -365,6 +366,7 @@ int_ret_from_sys_call: movl $_TIF_ALLWORK_MASK,%edi /* edi: mask to check */ int_with_check: + LOCKDEP_SYS_EXIT_IRQ GET_THREAD_INFO(%rcx) movl threadinfo_flags(%rcx),%edx andl %edi,%edx @@ -516,11 +518,12 @@ END(stub_rt_sigreturn) retint_check: CFI_DEFAULT_STACK adj=1 + LOCKDEP_SYS_EXIT_IRQ movl threadinfo_flags(%rcx),%edx andl %edi,%edx CFI_REMEMBER_STATE jnz retint_careful -retint_restore_args: +retint_restore_args: /* return to kernel space */ movl EFLAGS-REST_SKIP(%rsp), %eax shr $9, %eax # EAX[0] == IRET_EFLAGS.IF XEN_GET_VCPU_INFO(%rsi) @@ -841,7 +844,7 @@ error_call_handler: movq ORIG_RAX(%rsp),%rsi # get error code movq $-1,ORIG_RAX(%rsp) call *%rax -error_exit: +error_exit: RESTORE_REST /* cli */ XEN_BLOCK_EVENTS(%rsi) @@ -849,14 +852,11 @@ error_exit: GET_THREAD_INFO(%rcx) testb $3,CS-ARGOFFSET(%rsp) jz retint_kernel + LOCKDEP_SYS_EXIT_IRQ movl threadinfo_flags(%rcx),%edx movl $_TIF_WORK_MASK,%edi andl %edi,%edx jnz retint_careful - /* - * The iret might restore flags: - */ - TRACE_IRQS_IRETQ jmp retint_restore_args #if 0 @@ -1071,7 +1071,7 @@ child_rip: movq %rsi, %rdi call *%rax # exit - xorl %edi, %edi + mov %eax, %edi call do_exit CFI_ENDPROC ENDPROC(child_rip) --- head-2010-05-25.orig/arch/x86/kernel/head64-xen.c 2010-03-24 15:09:22.000000000 +0100 +++ head-2010-05-25/arch/x86/kernel/head64-xen.c 2010-03-24 15:10:29.000000000 +0100 @@ -1,5 +1,5 @@ /* - * linux/arch/x86_64/kernel/head64.c -- prepare to run common code + * prepare to run common code * * Copyright (C) 2000 Andrea Arcangeli SuSE * @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include @@ -47,27 +46,16 @@ static void __init clear_bss(void) } #endif -#define NEW_CL_POINTER 0x228 /* Relative to real mode data */ -#define OLD_CL_MAGIC_ADDR 0x20 -#define OLD_CL_MAGIC 0xA33F -#define OLD_CL_OFFSET 0x22 - static void __init copy_bootdata(char *real_mode_data) { #ifndef CONFIG_XEN - unsigned long new_data; char * command_line; - memcpy(x86_boot_params, real_mode_data, BOOT_PARAM_SIZE); - new_data = *(u32 *) (x86_boot_params + NEW_CL_POINTER); - if (!new_data) { - if (OLD_CL_MAGIC != *(u16 *)(real_mode_data + OLD_CL_MAGIC_ADDR)) { - return; - } - new_data = __pa(real_mode_data) + *(u16 *)(real_mode_data + OLD_CL_OFFSET); + memcpy(&boot_params, real_mode_data, sizeof boot_params); + if (boot_params.hdr.cmd_line_ptr) { + command_line = __va(boot_params.hdr.cmd_line_ptr); + memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); } - command_line = __va(new_data); - memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); #else int max_cmdline; @@ -117,7 +105,7 @@ void __init x86_64_start_kernel(char * r for (i = 0; i < IDT_ENTRIES; i++) set_intr_gate(i, early_idt_handler); - asm volatile("lidt %0" :: "m" (idt_descr)); + load_idt((const struct desc_ptr *)&idt_descr); #endif early_printk("Kernel alive\n"); --- head-2010-05-25.orig/arch/x86/kernel/io_apic_32-xen.c 2010-03-24 15:09:22.000000000 +0100 +++ head-2010-05-25/arch/x86/kernel/io_apic_32-xen.c 2010-03-24 15:10:29.000000000 +0100 @@ -427,7 +427,7 @@ static struct irq_cpu_info { #define IRQ_ALLOWED(cpu, allowed_mask) cpu_isset(cpu, allowed_mask) -#define CPU_TO_PACKAGEINDEX(i) (first_cpu(cpu_sibling_map[i])) +#define CPU_TO_PACKAGEINDEX(i) (first_cpu(per_cpu(cpu_sibling_map, i))) static cpumask_t balance_irq_affinity[NR_IRQS] = { [0 ... NR_IRQS-1] = CPU_MASK_ALL @@ -633,7 +633,7 @@ tryanotherirq: imbalance = move_this_load; - /* For physical_balance case, we accumlated both load + /* For physical_balance case, we accumulated both load * values in the one of the siblings cpu_irq[], * to use the same code for physical and logical processors * as much as possible. @@ -647,7 +647,7 @@ tryanotherirq: * (A+B)/2 vs B */ load = CPU_IRQ(min_loaded) >> 1; - for_each_cpu_mask(j, cpu_sibling_map[min_loaded]) { + for_each_cpu_mask(j, per_cpu(cpu_sibling_map, min_loaded)) { if (load > CPU_IRQ(j)) { /* This won't change cpu_sibling_map[min_loaded] */ load = CPU_IRQ(j); @@ -1018,7 +1018,7 @@ static int EISA_ELCR(unsigned int irq) #define default_MCA_trigger(idx) (1) #define default_MCA_polarity(idx) (0) -static int __init MPBIOS_polarity(int idx) +static int MPBIOS_polarity(int idx) { int bus = mp_irqs[idx].mpc_srcbus; int polarity; @@ -1347,6 +1347,11 @@ static void __init setup_IO_APIC_irqs(vo continue; } + if (!first_notcon) { + apic_printk(APIC_VERBOSE, " not connected.\n"); + first_notcon = 1; + } + entry.trigger = irq_trigger(idx); entry.polarity = irq_polarity(idx); @@ -1936,13 +1941,16 @@ __setup("no_timer_check", notimercheck); static int __init timer_irq_works(void) { unsigned long t1 = jiffies; + unsigned long flags; if (no_timer_check) return 1; + local_save_flags(flags); local_irq_enable(); /* Let ten ticks pass... */ mdelay((10 * 1000) / HZ); + local_irq_restore(flags); /* * Expect a few ticks at least, to be sure some possible @@ -2223,6 +2231,9 @@ static inline void __init check_timer(vo { int apic1, pin1, apic2, pin2; int vector; + unsigned long flags; + + local_irq_save(flags); /* * get/set the timer IRQ vector: @@ -2268,7 +2279,7 @@ static inline void __init check_timer(vo } if (disable_timer_pin_1 > 0) clear_IO_APIC_pin(0, pin1); - return; + goto out; } clear_IO_APIC_pin(apic1, pin1); printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to " @@ -2291,7 +2302,7 @@ static inline void __init check_timer(vo if (nmi_watchdog == NMI_IO_APIC) { setup_nmi(); } - return; + goto out; } /* * Cleanup, just in case ... @@ -2315,7 +2326,7 @@ static inline void __init check_timer(vo if (timer_irq_works()) { printk(" works.\n"); - return; + goto out; } apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector); printk(" failed.\n"); @@ -2331,11 +2342,13 @@ static inline void __init check_timer(vo if (timer_irq_works()) { printk(" works.\n"); - return; + goto out; } printk(" failed :(.\n"); panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " "report. Then try booting with the 'noapic' option"); +out: + local_irq_restore(flags); } #else int timer_uses_ioapic_pin_0 = 0; @@ -2353,6 +2366,14 @@ int timer_uses_ioapic_pin_0 = 0; void __init setup_IO_APIC(void) { +#ifndef CONFIG_XEN + int i; + + /* Reserve all the system vectors. */ + for (i = FIRST_SYSTEM_VECTOR; i < NR_VECTORS; i++) + set_bit(i, used_vectors); +#endif + enable_IO_APIC(); if (acpi_ioapic) @@ -2542,7 +2563,7 @@ void destroy_irq(unsigned int irq) #endif /* CONFIG_XEN */ /* - * MSI mesage composition + * MSI message composition */ #if defined(CONFIG_PCI_MSI) && !defined(CONFIG_XEN) static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) @@ -2899,6 +2920,25 @@ int io_apic_set_pci_routing (int ioapic, return 0; } +int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) +{ + int i; + + if (skip_ioapic_setup) + return -1; + + for (i = 0; i < mp_irq_entries; i++) + if (mp_irqs[i].mpc_irqtype == mp_INT && + mp_irqs[i].mpc_srcbusirq == bus_irq) + break; + if (i >= mp_irq_entries) + return -1; + + *trigger = irq_trigger(i); + *polarity = irq_polarity(i); + return 0; +} + #endif /* CONFIG_ACPI */ static int __init parse_disable_timer_pin_1(char *arg) --- head-2010-05-25.orig/arch/x86/kernel/io_apic_64-xen.c 2010-03-24 15:09:22.000000000 +0100 +++ head-2010-05-25/arch/x86/kernel/io_apic_64-xen.c 2010-03-24 15:10:29.000000000 +0100 @@ -31,6 +31,7 @@ #include #include #include +#include #ifdef CONFIG_ACPI #include #endif @@ -584,7 +585,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, #define default_PCI_trigger(idx) (1) #define default_PCI_polarity(idx) (1) -static int __init MPBIOS_polarity(int idx) +static int MPBIOS_polarity(int idx) { int bus = mp_irqs[idx].mpc_srcbus; int polarity; @@ -871,6 +872,10 @@ static void __init setup_IO_APIC_irqs(vo apic_printk(APIC_VERBOSE, ", %d-%d", mp_ioapics[apic].mpc_apicid, pin); continue; } + if (!first_notcon) { + apic_printk(APIC_VERBOSE, " not connected.\n"); + first_notcon = 1; + } irq = pin_2_irq(idx, apic, pin); add_pin_to_irq(irq, apic, pin); @@ -881,7 +886,7 @@ static void __init setup_IO_APIC_irqs(vo } if (!first_notcon) - apic_printk(APIC_VERBOSE," not connected.\n"); + apic_printk(APIC_VERBOSE, " not connected.\n"); } #ifndef CONFIG_XEN @@ -1277,10 +1282,13 @@ void disable_IO_APIC(void) static int __init timer_irq_works(void) { unsigned long t1 = jiffies; + unsigned long flags; + local_save_flags(flags); local_irq_enable(); /* Let ten ticks pass... */ mdelay((10 * 1000) / HZ); + local_irq_restore(flags); /* * Expect a few ticks at least, to be sure some possible @@ -1655,6 +1663,9 @@ static inline void check_timer(void) { struct irq_cfg *cfg = irq_cfg + 0; int apic1, pin1, apic2, pin2; + unsigned long flags; + + local_irq_save(flags); /* * get/set the timer IRQ vector: @@ -1696,7 +1707,7 @@ static inline void check_timer(void) } if (disable_timer_pin_1 > 0) clear_IO_APIC_pin(0, pin1); - return; + goto out; } clear_IO_APIC_pin(apic1, pin1); apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: 8254 timer not " @@ -1718,7 +1729,7 @@ static inline void check_timer(void) if (nmi_watchdog == NMI_IO_APIC) { setup_nmi(); } - return; + goto out; } /* * Cleanup, just in case ... @@ -1741,7 +1752,7 @@ static inline void check_timer(void) if (timer_irq_works()) { apic_printk(APIC_VERBOSE," works.\n"); - return; + goto out; } apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); apic_printk(APIC_VERBOSE," failed.\n"); @@ -1756,10 +1767,12 @@ static inline void check_timer(void) if (timer_irq_works()) { apic_printk(APIC_VERBOSE," works.\n"); - return; + goto out; } apic_printk(APIC_VERBOSE," failed :(.\n"); panic("IO-APIC + timer doesn't work! Try using the 'noapic' kernel parameter\n"); +out: + local_irq_restore(flags); } #else #define check_timer() ((void)0) @@ -1775,7 +1788,7 @@ __setup("no_timer_check", notimercheck); /* * - * IRQ's that are handled by the PIC in the MPS IOAPIC case. + * IRQs that are handled by the PIC in the MPS IOAPIC case. * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ. * Linux doesn't really care, as it's not actually used * for any interrupt handling anyway. @@ -1858,7 +1871,7 @@ static struct sysdev_class ioapic_sysdev static int __init ioapic_init_sysfs(void) { struct sys_device * dev; - int i, size, error = 0; + int i, size, error; error = sysdev_class_register(&ioapic_sysdev_class); if (error) @@ -1867,12 +1880,11 @@ static int __init ioapic_init_sysfs(void for (i = 0; i < nr_ioapics; i++ ) { size = sizeof(struct sys_device) + nr_ioapic_registers[i] * sizeof(struct IO_APIC_route_entry); - mp_ioapic_data[i] = kmalloc(size, GFP_KERNEL); + mp_ioapic_data[i] = kzalloc(size, GFP_KERNEL); if (!mp_ioapic_data[i]) { printk(KERN_ERR "Can't suspend/resume IOAPIC %d\n", i); continue; } - memset(mp_ioapic_data[i], 0, size); dev = &mp_ioapic_data[i]->dev; dev->id = i; dev->cls = &ioapic_sysdev_class; @@ -1933,7 +1945,7 @@ void destroy_irq(unsigned int irq) #endif /* CONFIG_XEN */ /* - * MSI mesage composition + * MSI message composition */ #if defined(CONFIG_PCI_MSI) && !defined(CONFIG_XEN) static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) @@ -2043,8 +2055,64 @@ void arch_teardown_msi_irq(unsigned int destroy_irq(irq); } -#endif /* CONFIG_PCI_MSI */ +#ifdef CONFIG_DMAR +#ifdef CONFIG_SMP +static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) +{ + struct irq_cfg *cfg = irq_cfg + irq; + struct msi_msg msg; + unsigned int dest; + cpumask_t tmp; + + cpus_and(tmp, mask, cpu_online_map); + if (cpus_empty(tmp)) + return; + + if (assign_irq_vector(irq, mask)) + return; + + cpus_and(tmp, cfg->domain, mask); + dest = cpu_mask_to_apicid(tmp); + + dmar_msi_read(irq, &msg); + + msg.data &= ~MSI_DATA_VECTOR_MASK; + msg.data |= MSI_DATA_VECTOR(cfg->vector); + msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; + msg.address_lo |= MSI_ADDR_DEST_ID(dest); + + dmar_msi_write(irq, &msg); + irq_desc[irq].affinity = mask; +} +#endif /* CONFIG_SMP */ + +struct irq_chip dmar_msi_type = { + .name = "DMAR_MSI", + .unmask = dmar_msi_unmask, + .mask = dmar_msi_mask, + .ack = ack_apic_edge, +#ifdef CONFIG_SMP + .set_affinity = dmar_msi_set_affinity, +#endif + .retrigger = ioapic_retrigger_irq, +}; + +int arch_setup_dmar_msi(unsigned int irq) +{ + int ret; + struct msi_msg msg; + + ret = msi_compose_msg(NULL, irq, &msg); + if (ret < 0) + return ret; + dmar_msi_write(irq, &msg); + set_irq_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq, + "edge"); + return 0; +} +#endif +#endif /* CONFIG_PCI_MSI */ /* * Hypertransport interrupt support */ @@ -2177,8 +2245,27 @@ int io_apic_set_pci_routing (int ioapic, return 0; } -#endif /* CONFIG_ACPI */ +int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) +{ + int i; + + if (skip_ioapic_setup) + return -1; + + for (i = 0; i < mp_irq_entries; i++) + if (mp_irqs[i].mpc_irqtype == mp_INT && + mp_irqs[i].mpc_srcbusirq == bus_irq) + break; + if (i >= mp_irq_entries) + return -1; + + *trigger = irq_trigger(i); + *polarity = irq_polarity(i); + return 0; +} + +#endif /* CONFIG_ACPI */ #ifndef CONFIG_XEN /* @@ -2217,3 +2304,4 @@ void __init setup_ioapic_dest(void) } #endif #endif /* !CONFIG_XEN */ + --- head-2010-05-25.orig/arch/x86/kernel/ioport_32-xen.c 2010-03-24 15:09:15.000000000 +0100 +++ head-2010-05-25/arch/x86/kernel/ioport_32-xen.c 2010-03-24 15:10:29.000000000 +0100 @@ -1,6 +1,4 @@ /* - * linux/arch/i386/kernel/ioport.c - * * This contains the io-permission bitmap code - written by obz, with changes * by Linus. */ --- head-2010-05-25.orig/arch/x86/kernel/ioport_64-xen.c 2010-03-24 15:09:15.000000000 +0100 +++ head-2010-05-25/arch/x86/kernel/ioport_64-xen.c 2010-03-24 15:10:29.000000000 +0100 @@ -1,6 +1,4 @@ /* - * linux/arch/x86_64/kernel/ioport.c - * * This contains the io-permission bitmap code - written by obz, with changes * by Linus. */ --- head-2010-05-25.orig/arch/x86/kernel/ldt_32-xen.c 2010-03-24 15:09:15.000000000 +0100 +++ head-2010-05-25/arch/x86/kernel/ldt_32-xen.c 2010-03-24 15:10:29.000000000 +0100 @@ -1,6 +1,4 @@ /* - * linux/arch/i386/kernel/ldt.c - * * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds * Copyright (C) 1999 Ingo Molnar */ @@ -106,14 +104,14 @@ int init_new_context(struct task_struct struct mm_struct * old_mm; int retval = 0; - init_MUTEX(&mm->context.sem); + mutex_init(&mm->context.lock); mm->context.size = 0; mm->context.has_foreign_mappings = 0; old_mm = current->mm; if (old_mm && old_mm->context.size > 0) { - down(&old_mm->context.sem); + mutex_lock(&old_mm->context.lock); retval = copy_ldt(&mm->context, &old_mm->context); - up(&old_mm->context.sem); + mutex_unlock(&old_mm->context.lock); } return retval; } @@ -149,7 +147,7 @@ static int read_ldt(void __user * ptr, u if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES) bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES; - down(&mm->context.sem); + mutex_lock(&mm->context.lock); size = mm->context.size*LDT_ENTRY_SIZE; if (size > bytecount) size = bytecount; @@ -157,7 +155,7 @@ static int read_ldt(void __user * ptr, u err = 0; if (copy_to_user(ptr, mm->context.ldt, size)) err = -EFAULT; - up(&mm->context.sem); + mutex_unlock(&mm->context.lock); if (err < 0) goto error_return; if (size != bytecount) { @@ -213,7 +211,7 @@ static int write_ldt(void __user * ptr, goto out; } - down(&mm->context.sem); + mutex_lock(&mm->context.lock); if (ldt_info.entry_number >= mm->context.size) { error = alloc_ldt(¤t->mm->context, ldt_info.entry_number+1, 1); if (error < 0) @@ -240,7 +238,7 @@ install: entry_1, entry_2); out_unlock: - up(&mm->context.sem); + mutex_unlock(&mm->context.lock); out: return error; } --- head-2010-05-25.orig/arch/x86/kernel/ldt_64-xen.c 2010-03-24 15:09:22.000000000 +0100 +++ head-2010-05-25/arch/x86/kernel/ldt_64-xen.c 2010-03-24 15:10:29.000000000 +0100 @@ -1,6 +1,4 @@ /* - * linux/arch/x86_64/kernel/ldt.c - * * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds * Copyright (C) 1999 Ingo Molnar * Copyright (C) 2002 Andi Kleen @@ -112,19 +110,14 @@ int init_new_context(struct task_struct int retval = 0; memset(&mm->context, 0, sizeof(mm->context)); - init_MUTEX(&mm->context.sem); + mutex_init(&mm->context.lock); old_mm = current->mm; if (old_mm) mm->context.vdso = old_mm->context.vdso; if (old_mm && old_mm->context.size > 0) { - down(&old_mm->context.sem); + mutex_lock(&old_mm->context.lock); retval = copy_ldt(&mm->context, &old_mm->context); - up(&old_mm->context.sem); - } - if (retval == 0) { - spin_lock(&mm_unpinned_lock); - list_add(&mm->context.unpinned, &mm_unpinned); - spin_unlock(&mm_unpinned_lock); + mutex_unlock(&old_mm->context.lock); } return retval; } @@ -148,11 +141,6 @@ void destroy_context(struct mm_struct *m kfree(mm->context.ldt); mm->context.size = 0; } - if (!PagePinned(virt_to_page(mm->pgd))) { - spin_lock(&mm_unpinned_lock); - list_del(&mm->context.unpinned); - spin_unlock(&mm_unpinned_lock); - } } static int read_ldt(void __user * ptr, unsigned long bytecount) @@ -166,7 +154,7 @@ static int read_ldt(void __user * ptr, u if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES) bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES; - down(&mm->context.sem); + mutex_lock(&mm->context.lock); size = mm->context.size*LDT_ENTRY_SIZE; if (size > bytecount) size = bytecount; @@ -174,7 +162,7 @@ static int read_ldt(void __user * ptr, u err = 0; if (copy_to_user(ptr, mm->context.ldt, size)) err = -EFAULT; - up(&mm->context.sem); + mutex_unlock(&mm->context.lock); if (err < 0) goto error_return; if (size != bytecount) { @@ -227,7 +215,7 @@ static int write_ldt(void __user * ptr, goto out; } - down(&mm->context.sem); + mutex_lock(&mm->context.lock); if (ldt_info.entry_number >= (unsigned)mm->context.size) { error = alloc_ldt(¤t->mm->context, ldt_info.entry_number+1, 1); if (error < 0) @@ -256,7 +244,7 @@ install: error = HYPERVISOR_update_descriptor(mach_lp, (unsigned long)((entry_1 | (unsigned long) entry_2 << 32))); out_unlock: - up(&mm->context.sem); + mutex_unlock(&mm->context.lock); out: return error; } --- head-2010-05-25.orig/arch/x86/kernel/mpparse_32-xen.c 2010-03-24 15:09:15.000000000 +0100 +++ head-2010-05-25/arch/x86/kernel/mpparse_32-xen.c 2010-03-24 15:10:29.000000000 +0100 @@ -1023,7 +1023,7 @@ void __init mp_config_acpi_legacy_irqs ( /* * Use the default configuration for the IRQs 0-15. Unless - * overriden by (MADT) interrupt source override entries. + * overridden by (MADT) interrupt source override entries. */ for (i = 0; i < 16; i++) { int idx; --- head-2010-05-25.orig/arch/x86/kernel/mpparse_64-xen.c 2010-03-24 15:09:22.000000000 +0100 +++ head-2010-05-25/arch/x86/kernel/mpparse_64-xen.c 2010-03-24 15:10:29.000000000 +0100 @@ -57,6 +57,8 @@ unsigned long mp_lapic_addr = 0; /* Processor that is doing the boot up */ unsigned int boot_cpu_id = -1U; +EXPORT_SYMBOL(boot_cpu_id); + /* Internal processor count */ unsigned int num_processors __cpuinitdata = 0; @@ -87,7 +89,7 @@ static int __init mpf_checksum(unsigned } #ifndef CONFIG_XEN -static void __cpuinit MP_processor_info (struct mpc_config_processor *m) +static void __cpuinit MP_processor_info(struct mpc_config_processor *m) { int cpu; cpumask_t tmp_map; @@ -124,13 +126,24 @@ static void __cpuinit MP_processor_info cpu = 0; } bios_cpu_apicid[cpu] = m->mpc_apicid; - x86_cpu_to_apicid[cpu] = m->mpc_apicid; + /* + * We get called early in the the start_kernel initialization + * process when the per_cpu data area is not yet setup, so we + * use a static array that is removed after the per_cpu data + * area is created. + */ + if (x86_cpu_to_apicid_ptr) { + u8 *x86_cpu_to_apicid = (u8 *)x86_cpu_to_apicid_ptr; + x86_cpu_to_apicid[cpu] = m->mpc_apicid; + } else { + per_cpu(x86_cpu_to_apicid, cpu) = m->mpc_apicid; + } cpu_set(cpu, cpu_possible_map); cpu_set(cpu, cpu_present_map); } #else -static void __cpuinit MP_processor_info (struct mpc_config_processor *m) +static void __cpuinit MP_processor_info(struct mpc_config_processor *m) { num_processors++; } --- head-2010-05-25.orig/arch/x86/kernel/pci-dma-xen.c 2010-03-24 15:09:22.000000000 +0100 +++ head-2010-05-25/arch/x86/kernel/pci-dma-xen.c 2010-03-24 15:10:29.000000000 +0100 @@ -13,14 +13,13 @@ #include #include #include -#include #include #include #include #include #include -#include -#include +#include +#include #include #ifdef __x86_64__ @@ -106,27 +105,29 @@ int range_straddles_page_boundary(paddr_ } int -dma_map_sg(struct device *hwdev, struct scatterlist *sg, int nents, +dma_map_sg(struct device *hwdev, struct scatterlist *sgl, int nents, enum dma_data_direction direction) { int i, rc; BUG_ON(!valid_dma_direction(direction)); - WARN_ON(nents == 0 || sg[0].length == 0); + WARN_ON(nents == 0 || sgl->length == 0); if (swiotlb) { - rc = swiotlb_map_sg(hwdev, sg, nents, direction); + rc = swiotlb_map_sg(hwdev, sgl, nents, direction); } else { - for (i = 0; i < nents; i++ ) { - BUG_ON(!sg[i].page); - sg[i].dma_address = - gnttab_dma_map_page(sg[i].page) + sg[i].offset; - sg[i].dma_length = sg[i].length; + struct scatterlist *sg; + + for_each_sg(sgl, sg, nents, i) { + BUG_ON(!sg_page(sg)); + sg->dma_address = + gnttab_dma_map_page(sg_page(sg)) + sg->offset; + sg->dma_length = sg->length; IOMMU_BUG_ON(address_needs_mapping( - hwdev, sg[i].dma_address)); + hwdev, sg->dma_address)); IOMMU_BUG_ON(range_straddles_page_boundary( - page_to_pseudophys(sg[i].page) + sg[i].offset, - sg[i].length)); + page_to_pseudophys(sg_page(sg)) + sg->offset, + sg->length)); } rc = nents; } @@ -137,17 +138,19 @@ dma_map_sg(struct device *hwdev, struct EXPORT_SYMBOL(dma_map_sg); void -dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents, +dma_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nents, enum dma_data_direction direction) { int i; BUG_ON(!valid_dma_direction(direction)); if (swiotlb) - swiotlb_unmap_sg(hwdev, sg, nents, direction); + swiotlb_unmap_sg(hwdev, sgl, nents, direction); else { - for (i = 0; i < nents; i++ ) - gnttab_dma_unmap_page(sg[i].dma_address); + struct scatterlist *sg; + + for_each_sg(sgl, sg, nents, i) + gnttab_dma_unmap_page(sg->dma_address); } } EXPORT_SYMBOL(dma_unmap_sg); @@ -258,7 +261,8 @@ void dma_free_coherent(struct device *de { struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; int order = get_order(size); - + + WARN_ON(irqs_disabled()); /* for portability */ if (mem && vaddr >= mem->virt_base && vaddr < (mem->virt_base + (mem->size << PAGE_SHIFT))) { int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; --- head-2010-05-25.orig/arch/x86/kernel/process_32-xen.c 2010-03-24 15:09:22.000000000 +0100 +++ head-2010-05-25/arch/x86/kernel/process_32-xen.c 2010-03-24 15:10:29.000000000 +0100 @@ -1,6 +1,4 @@ /* - * linux/arch/i386/kernel/process.c - * * Copyright (C) 1995 Linus Torvalds * * Pentium III FXSR, SSE support @@ -190,6 +188,10 @@ void cpu_idle(void) } } +static void do_nothing(void *unused) +{ +} + void cpu_idle_wait(void) { unsigned int cpu, this_cpu = get_cpu(); @@ -214,13 +216,20 @@ void cpu_idle_wait(void) cpu_clear(cpu, map); } cpus_and(map, map, cpu_online_map); + /* + * We waited 1 sec, if a CPU still did not call idle + * it may be because it is in idle and not waking up + * because it has nothing to do. + * Give all the remaining CPUS a kick. + */ + smp_call_function_mask(map, do_nothing, 0, 0); } while (!cpus_empty(map)); set_cpus_allowed(current, tmp); } EXPORT_SYMBOL_GPL(cpu_idle_wait); -void __devinit select_idle_routine(const struct cpuinfo_x86 *c) +void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) { } @@ -238,34 +247,52 @@ static int __init idle_setup(char *str) } early_param("idle", idle_setup); -void show_regs(struct pt_regs * regs) +void __show_registers(struct pt_regs *regs, int all) { unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; unsigned long d0, d1, d2, d3, d6, d7; + unsigned long esp; + unsigned short ss, gs; + + if (user_mode_vm(regs)) { + esp = regs->esp; + ss = regs->xss & 0xffff; + savesegment(gs, gs); + } else { + esp = (unsigned long) (®s->esp); + savesegment(ss, ss); + savesegment(gs, gs); + } printk("\n"); - printk("Pid: %d, comm: %20s\n", current->pid, current->comm); - printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id()); + printk("Pid: %d, comm: %s %s (%s %.*s)\n", + task_pid_nr(current), current->comm, + print_tainted(), init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), + init_utsname()->version); + + printk("EIP: %04x:[<%08lx>] EFLAGS: %08lx CPU: %d\n", + 0xffff & regs->xcs, regs->eip, regs->eflags, + smp_processor_id()); print_symbol("EIP is at %s\n", regs->eip); - if (user_mode_vm(regs)) - printk(" ESP: %04x:%08lx",0xffff & regs->xss,regs->esp); - printk(" EFLAGS: %08lx %s (%s %.*s)\n", - regs->eflags, print_tainted(), init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); printk("EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", - regs->eax,regs->ebx,regs->ecx,regs->edx); - printk("ESI: %08lx EDI: %08lx EBP: %08lx", - regs->esi, regs->edi, regs->ebp); - printk(" DS: %04x ES: %04x FS: %04x\n", - 0xffff & regs->xds,0xffff & regs->xes, 0xffff & regs->xfs); + regs->eax, regs->ebx, regs->ecx, regs->edx); + printk("ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n", + regs->esi, regs->edi, regs->ebp, esp); + printk(" DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n", + regs->xds & 0xffff, regs->xes & 0xffff, + regs->xfs & 0xffff, gs, ss); + + if (!all) + return; cr0 = read_cr0(); cr2 = read_cr2(); cr3 = read_cr3(); cr4 = read_cr4_safe(); - printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4); + printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", + cr0, cr2, cr3, cr4); get_debugreg(d0, 0); get_debugreg(d1, 1); @@ -273,10 +300,16 @@ void show_regs(struct pt_regs * regs) get_debugreg(d3, 3); printk("DR0: %08lx DR1: %08lx DR2: %08lx DR3: %08lx\n", d0, d1, d2, d3); + get_debugreg(d6, 6); get_debugreg(d7, 7); - printk("DR6: %08lx DR7: %08lx\n", d6, d7); + printk("DR6: %08lx DR7: %08lx\n", + d6, d7); +} +void show_regs(struct pt_regs *regs) +{ + __show_registers(regs, 1); show_trace(NULL, regs, ®s->esp); } --- head-2010-05-25.orig/arch/x86/kernel/process_64-xen.c 2010-03-24 15:09:22.000000000 +0100 +++ head-2010-05-25/arch/x86/kernel/process_64-xen.c 2010-03-24 15:10:29.000000000 +0100 @@ -1,6 +1,4 @@ /* - * linux/arch/x86-64/kernel/process.c - * * Copyright (C) 1995 Linus Torvalds * * Pentium III FXSR, SSE support @@ -41,6 +39,7 @@ #include #include #include +#include #include #include @@ -172,6 +171,9 @@ void cpu_idle (void) if (__get_cpu_var(cpu_idle_state)) __get_cpu_var(cpu_idle_state) = 0; + + tick_nohz_stop_sched_tick(); + rmb(); idle = xen_idle; /* no alternatives */ if (cpu_is_offline(smp_processor_id())) @@ -190,12 +192,17 @@ void cpu_idle (void) __exit_idle(); } + tick_nohz_restart_sched_tick(); preempt_enable_no_resched(); schedule(); preempt_disable(); } } +static void do_nothing(void *unused) +{ +} + void cpu_idle_wait(void) { unsigned int cpu, this_cpu = get_cpu(); @@ -221,6 +228,13 @@ void cpu_idle_wait(void) cpu_clear(cpu, map); } cpus_and(map, map, cpu_online_map); + /* + * We waited 1 sec, if a CPU still did not call idle + * it may be because it is in idle and not waking up + * because it has nothing to do. + * Give all the remaining CPUS a kick. + */ + smp_call_function_mask(map, do_nothing, 0, 0); } while (!cpus_empty(map)); set_cpus_allowed(current, tmp); @@ -528,7 +542,7 @@ static inline void __switch_to_xtra(stru * * Kprobes not supported here. Set the probe on schedule instead. */ -__kprobes struct task_struct * +struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev = &prev_p->thread, --- head-2010-05-25.orig/arch/x86/kernel/quirks-xen.c 2010-03-24 15:09:15.000000000 +0100 +++ head-2010-05-25/arch/x86/kernel/quirks-xen.c 2010-03-24 15:10:29.000000000 +0100 @@ -41,7 +41,353 @@ static void __devinit quirk_intel_irqbal if (!(config & 0x2)) pci_write_config_byte(dev, 0xf4, config); } -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, quirk_intel_irqbalance); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, quirk_intel_irqbalance); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, quirk_intel_irqbalance); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, + quirk_intel_irqbalance); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, + quirk_intel_irqbalance); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, + quirk_intel_irqbalance); +#endif + +#if defined(CONFIG_HPET_TIMER) +#include + +unsigned long force_hpet_address; + +static enum { + NONE_FORCE_HPET_RESUME, + OLD_ICH_FORCE_HPET_RESUME, + ICH_FORCE_HPET_RESUME, + VT8237_FORCE_HPET_RESUME, + NVIDIA_FORCE_HPET_RESUME, +} force_hpet_resume_type; + +static void __iomem *rcba_base; + +static void ich_force_hpet_resume(void) +{ + u32 val; + + if (!force_hpet_address) + return; + + if (rcba_base == NULL) + BUG(); + + /* read the Function Disable register, dword mode only */ + val = readl(rcba_base + 0x3404); + if (!(val & 0x80)) { + /* HPET disabled in HPTC. Trying to enable */ + writel(val | 0x80, rcba_base + 0x3404); + } + + val = readl(rcba_base + 0x3404); + if (!(val & 0x80)) + BUG(); + else + printk(KERN_DEBUG "Force enabled HPET at resume\n"); + + return; +} + +static void ich_force_enable_hpet(struct pci_dev *dev) +{ + u32 val; + u32 uninitialized_var(rcba); + int err = 0; + + if (hpet_address || force_hpet_address) + return; + + pci_read_config_dword(dev, 0xF0, &rcba); + rcba &= 0xFFFFC000; + if (rcba == 0) { + printk(KERN_DEBUG "RCBA disabled. Cannot force enable HPET\n"); + return; + } + + /* use bits 31:14, 16 kB aligned */ + rcba_base = ioremap_nocache(rcba, 0x4000); + if (rcba_base == NULL) { + printk(KERN_DEBUG "ioremap failed. Cannot force enable HPET\n"); + return; + } + + /* read the Function Disable register, dword mode only */ + val = readl(rcba_base + 0x3404); + + if (val & 0x80) { + /* HPET is enabled in HPTC. Just not reported by BIOS */ + val = val & 0x3; + force_hpet_address = 0xFED00000 | (val << 12); + printk(KERN_DEBUG "Force enabled HPET at base address 0x%lx\n", + force_hpet_address); + iounmap(rcba_base); + return; + } + + /* HPET disabled in HPTC. Trying to enable */ + writel(val | 0x80, rcba_base + 0x3404); + + val = readl(rcba_base + 0x3404); + if (!(val & 0x80)) { + err = 1; + } else { + val = val & 0x3; + force_hpet_address = 0xFED00000 | (val << 12); + } + + if (err) { + force_hpet_address = 0; + iounmap(rcba_base); + printk(KERN_DEBUG "Failed to force enable HPET\n"); + } else { + force_hpet_resume_type = ICH_FORCE_HPET_RESUME; + printk(KERN_DEBUG "Force enabled HPET at base address 0x%lx\n", + force_hpet_address); + } +} + +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB2_0, + ich_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH6_1, + ich_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_0, + ich_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_1, + ich_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_31, + ich_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_1, + ich_force_enable_hpet); + + +static struct pci_dev *cached_dev; + +static void old_ich_force_hpet_resume(void) +{ + u32 val; + u32 uninitialized_var(gen_cntl); + + if (!force_hpet_address || !cached_dev) + return; + + pci_read_config_dword(cached_dev, 0xD0, &gen_cntl); + gen_cntl &= (~(0x7 << 15)); + gen_cntl |= (0x4 << 15); + + pci_write_config_dword(cached_dev, 0xD0, gen_cntl); + pci_read_config_dword(cached_dev, 0xD0, &gen_cntl); + val = gen_cntl >> 15; + val &= 0x7; + if (val == 0x4) + printk(KERN_DEBUG "Force enabled HPET at resume\n"); + else + BUG(); +} + +static void old_ich_force_enable_hpet(struct pci_dev *dev) +{ + u32 val; + u32 uninitialized_var(gen_cntl); + + if (hpet_address || force_hpet_address) + return; + + pci_read_config_dword(dev, 0xD0, &gen_cntl); + /* + * Bit 17 is HPET enable bit. + * Bit 16:15 control the HPET base address. + */ + val = gen_cntl >> 15; + val &= 0x7; + if (val & 0x4) { + val &= 0x3; + force_hpet_address = 0xFED00000 | (val << 12); + printk(KERN_DEBUG "HPET at base address 0x%lx\n", + force_hpet_address); + return; + } + + /* + * HPET is disabled. Trying enabling at FED00000 and check + * whether it sticks + */ + gen_cntl &= (~(0x7 << 15)); + gen_cntl |= (0x4 << 15); + pci_write_config_dword(dev, 0xD0, gen_cntl); + + pci_read_config_dword(dev, 0xD0, &gen_cntl); + + val = gen_cntl >> 15; + val &= 0x7; + if (val & 0x4) { + /* HPET is enabled in HPTC. Just not reported by BIOS */ + val &= 0x3; + force_hpet_address = 0xFED00000 | (val << 12); + printk(KERN_DEBUG "Force enabled HPET at base address 0x%lx\n", + force_hpet_address); + cached_dev = dev; + force_hpet_resume_type = OLD_ICH_FORCE_HPET_RESUME; + return; + } + + printk(KERN_DEBUG "Failed to force enable HPET\n"); +} + +/* + * Undocumented chipset features. Make sure that the user enforced + * this. + */ +static void old_ich_force_enable_hpet_user(struct pci_dev *dev) +{ + if (hpet_force_user) + old_ich_force_enable_hpet(dev); +} + +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_0, + old_ich_force_enable_hpet_user); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_12, + old_ich_force_enable_hpet_user); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_0, + old_ich_force_enable_hpet_user); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801DB_12, + old_ich_force_enable_hpet_user); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_0, + old_ich_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801EB_12, + old_ich_force_enable_hpet); + + +static void vt8237_force_hpet_resume(void) +{ + u32 val; + + if (!force_hpet_address || !cached_dev) + return; + + val = 0xfed00000 | 0x80; + pci_write_config_dword(cached_dev, 0x68, val); + + pci_read_config_dword(cached_dev, 0x68, &val); + if (val & 0x80) + printk(KERN_DEBUG "Force enabled HPET at resume\n"); + else + BUG(); +} + +static void vt8237_force_enable_hpet(struct pci_dev *dev) +{ + u32 uninitialized_var(val); + + if (!hpet_force_user || hpet_address || force_hpet_address) + return; + + pci_read_config_dword(dev, 0x68, &val); + /* + * Bit 7 is HPET enable bit. + * Bit 31:10 is HPET base address (contrary to what datasheet claims) + */ + if (val & 0x80) { + force_hpet_address = (val & ~0x3ff); + printk(KERN_DEBUG "HPET at base address 0x%lx\n", + force_hpet_address); + return; + } + + /* + * HPET is disabled. Trying enabling at FED00000 and check + * whether it sticks + */ + val = 0xfed00000 | 0x80; + pci_write_config_dword(dev, 0x68, val); + + pci_read_config_dword(dev, 0x68, &val); + if (val & 0x80) { + force_hpet_address = (val & ~0x3ff); + printk(KERN_DEBUG "Force enabled HPET at base address 0x%lx\n", + force_hpet_address); + cached_dev = dev; + force_hpet_resume_type = VT8237_FORCE_HPET_RESUME; + return; + } + + printk(KERN_DEBUG "Failed to force enable HPET\n"); +} + +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8235, + vt8237_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8237, + vt8237_force_enable_hpet); + +/* + * Undocumented chipset feature taken from LinuxBIOS. + */ +static void nvidia_force_hpet_resume(void) +{ + pci_write_config_dword(cached_dev, 0x44, 0xfed00001); + printk(KERN_DEBUG "Force enabled HPET at resume\n"); +} + +static void nvidia_force_enable_hpet(struct pci_dev *dev) +{ + u32 uninitialized_var(val); + + if (!hpet_force_user || hpet_address || force_hpet_address) + return; + + pci_write_config_dword(dev, 0x44, 0xfed00001); + pci_read_config_dword(dev, 0x44, &val); + force_hpet_address = val & 0xfffffffe; + force_hpet_resume_type = NVIDIA_FORCE_HPET_RESUME; + printk(KERN_DEBUG "Force enabled HPET at base address 0x%lx\n", + force_hpet_address); + cached_dev = dev; + return; +} + +/* ISA Bridges */ +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0050, + nvidia_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0051, + nvidia_force_enable_hpet); + +/* LPC bridges */ +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0360, + nvidia_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0361, + nvidia_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0362, + nvidia_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0363, + nvidia_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0364, + nvidia_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0365, + nvidia_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0366, + nvidia_force_enable_hpet); +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0367, + nvidia_force_enable_hpet); + +void force_hpet_resume(void) +{ + switch (force_hpet_resume_type) { + case ICH_FORCE_HPET_RESUME: + return ich_force_hpet_resume(); + + case OLD_ICH_FORCE_HPET_RESUME: + return old_ich_force_hpet_resume(); + + case VT8237_FORCE_HPET_RESUME: + return vt8237_force_hpet_resume(); + + case NVIDIA_FORCE_HPET_RESUME: + return nvidia_force_hpet_resume(); + + default: + break; + } +} + #endif --- head-2010-05-25.orig/arch/x86/kernel/setup64-xen.c 2010-03-24 15:09:22.000000000 +0100 +++ head-2010-05-25/arch/x86/kernel/setup64-xen.c 2010-03-24 15:10:29.000000000 +0100 @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -27,11 +26,12 @@ #include #include #include +#include #ifdef CONFIG_XEN #include #endif -char x86_boot_params[BOOT_PARAM_SIZE] __initdata; +struct boot_params __initdata boot_params; cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; @@ -159,8 +159,8 @@ static void switch_pt(void) static void __cpuinit cpu_gdt_init(const struct desc_ptr *gdt_descr) { - asm volatile("lgdt %0" :: "m" (*gdt_descr)); - asm volatile("lidt %0" :: "m" (idt_descr)); + load_gdt(gdt_descr); + load_idt(idt_descr); } #endif @@ -252,6 +252,14 @@ void __cpuinit check_efer(void) unsigned long kernel_eflags; +#ifndef CONFIG_X86_NO_TSS +/* + * Copies of the original ist values from the tss are only accessed during + * debugging, no special alignment required. + */ +DEFINE_PER_CPU(struct orig_ist, orig_ist); +#endif + /* * cpu_init() initializes state that is per-CPU. Some data is already * initialized (naturally) in the bootstrap process, such as the GDT --- head-2010-05-25.orig/arch/x86/kernel/setup_32-xen.c 2010-03-24 15:09:22.000000000 +0100 +++ head-2010-05-25/arch/x86/kernel/setup_32-xen.c 2010-03-24 15:10:29.000000000 +0100 @@ -1,6 +1,4 @@ /* - * linux/arch/i386/kernel/setup.c - * * Copyright (C) 1995 Linus Torvalds * * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 @@ -70,6 +68,7 @@ #include #include #include +#include #ifdef CONFIG_XEN #include @@ -80,13 +79,14 @@ static struct notifier_block xen_panic_b xen_panic_event, NULL, 0 /* try to go last */ }; -int disable_pse __devinitdata = 0; +int disable_pse __cpuinitdata = 0; /* * Machine setup.. */ extern struct resource code_resource; extern struct resource data_resource; +extern struct resource bss_resource; /* cpu data as detected by the assembly code in head.S */ struct cpuinfo_x86 new_cpu_data __cpuinitdata = { 0, 0, 0, 0, -1, 1, 0, 0, -1 }; @@ -98,9 +98,6 @@ unsigned long mmu_cr4_features; /* for MCA, but anyone else can use it if they want */ unsigned int machine_id; -#ifdef CONFIG_MCA -EXPORT_SYMBOL(machine_id); -#endif unsigned int machine_submodel_id; unsigned int BIOS_revision; unsigned int mca_pentium_flag; @@ -121,7 +118,7 @@ EXPORT_SYMBOL(apm_info); struct edid_info edid_info; EXPORT_SYMBOL_GPL(edid_info); #ifndef CONFIG_XEN -#define copy_edid() (edid_info = EDID_INFO) +#define copy_edid() (edid_info = boot_params.edid_info) #endif struct ist_info ist_info; #if defined(CONFIG_X86_SPEEDSTEP_SMI) || \ @@ -170,10 +167,11 @@ EXPORT_SYMBOL(edd); */ static inline void copy_edd(void) { - memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature)); - memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info)); - edd.mbr_signature_nr = EDD_MBR_SIG_NR; - edd.edd_info_nr = EDD_NR; + memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer, + sizeof(edd.mbr_signature)); + memcpy(edd.edd_info, boot_params.eddbuf, sizeof(edd.edd_info)); + edd.mbr_signature_nr = boot_params.edd_mbr_sig_buf_entries; + edd.edd_info_nr = boot_params.eddbuf_entries; } #endif #else @@ -418,6 +416,53 @@ extern unsigned long __init setup_memory extern void zone_sizes_init(void); #endif /* !CONFIG_NEED_MULTIPLE_NODES */ +static inline unsigned long long get_total_mem(void) +{ + unsigned long long total; + + total = max_low_pfn - min_low_pfn; +#ifdef CONFIG_HIGHMEM + total += highend_pfn - highstart_pfn; +#endif + + return total << PAGE_SHIFT; +} + +#ifdef CONFIG_KEXEC +#ifndef CONFIG_XEN +static void __init reserve_crashkernel(void) +{ + unsigned long long total_mem; + unsigned long long crash_size, crash_base; + int ret; + + total_mem = get_total_mem(); + + ret = parse_crashkernel(boot_command_line, total_mem, + &crash_size, &crash_base); + if (ret == 0 && crash_size > 0) { + if (crash_base > 0) { + printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " + "for crashkernel (System RAM: %ldMB)\n", + (unsigned long)(crash_size >> 20), + (unsigned long)(crash_base >> 20), + (unsigned long)(total_mem >> 20)); + crashk_res.start = crash_base; + crashk_res.end = crash_base + crash_size - 1; + reserve_bootmem(crash_base, crash_size); + } else + printk(KERN_INFO "crashkernel reservation failed - " + "you have to specify a base address\n"); + } +} +#else +#define reserve_crashkernel xen_machine_kexec_setup_resources +#endif +#else +static inline void __init reserve_crashkernel(void) +{} +#endif + void __init setup_bootmem_allocator(void) { unsigned long bootmap_size; @@ -473,30 +518,25 @@ void __init setup_bootmem_allocator(void #ifdef CONFIG_BLK_DEV_INITRD if (xen_start_info->mod_start) { - if (INITRD_START + INITRD_SIZE <= (max_low_pfn << PAGE_SHIFT)) { - /*reserve_bootmem(INITRD_START, INITRD_SIZE);*/ - initrd_start = INITRD_START + PAGE_OFFSET; - initrd_end = initrd_start+INITRD_SIZE; + unsigned long ramdisk_image = __pa(xen_start_info->mod_start); + unsigned long ramdisk_size = xen_start_info->mod_len; + unsigned long ramdisk_end = ramdisk_image + ramdisk_size; + unsigned long end_of_lowmem = max_low_pfn << PAGE_SHIFT; + + if (ramdisk_end <= end_of_lowmem) { + /*reserve_bootmem(ramdisk_image, ramdisk_size);*/ + initrd_start = ramdisk_image + PAGE_OFFSET; + initrd_end = initrd_start+ramdisk_size; initrd_below_start_ok = 1; - } - else { + } else { printk(KERN_ERR "initrd extends beyond end of memory " - "(0x%08lx > 0x%08lx)\ndisabling initrd\n", - INITRD_START + INITRD_SIZE, - max_low_pfn << PAGE_SHIFT); + "(0x%08lx > 0x%08lx)\ndisabling initrd\n", + ramdisk_end, end_of_lowmem); initrd_start = 0; } } #endif -#ifdef CONFIG_KEXEC -#ifdef CONFIG_XEN - xen_machine_kexec_setup_resources(); -#else - if (crashk_res.start != crashk_res.end) - reserve_bootmem(crashk_res.start, - crashk_res.end - crashk_res.start + 1); -#endif -#endif + reserve_crashkernel(); } /* @@ -574,7 +614,8 @@ void __init setup_arch(char **cmdline_p) * the system table is valid. If not, then initialize normally. */ #ifdef CONFIG_EFI - if ((LOADER_TYPE == 0x50) && EFI_SYSTAB) + if ((boot_params.hdr.type_of_loader == 0x50) && + boot_params.efi_info.efi_systab) efi_enabled = 1; #endif @@ -582,18 +623,18 @@ void __init setup_arch(char **cmdline_p) properly. Setting ROOT_DEV to default to /dev/ram0 breaks initrd. */ ROOT_DEV = MKDEV(UNNAMED_MAJOR,0); - screen_info = SCREEN_INFO; + screen_info = boot_params.screen_info; copy_edid(); - apm_info.bios = APM_BIOS_INFO; - ist_info = IST_INFO; - saved_videomode = VIDEO_MODE; - if( SYS_DESC_TABLE.length != 0 ) { - set_mca_bus(SYS_DESC_TABLE.table[3] & 0x2); - machine_id = SYS_DESC_TABLE.table[0]; - machine_submodel_id = SYS_DESC_TABLE.table[1]; - BIOS_revision = SYS_DESC_TABLE.table[2]; + apm_info.bios = boot_params.apm_bios_info; + ist_info = boot_params.ist_info; + saved_videomode = boot_params.hdr.vid_mode; + if( boot_params.sys_desc_table.length != 0 ) { + set_mca_bus(boot_params.sys_desc_table.table[3] & 0x2); + machine_id = boot_params.sys_desc_table.table[0]; + machine_submodel_id = boot_params.sys_desc_table.table[1]; + BIOS_revision = boot_params.sys_desc_table.table[2]; } - bootloader_type = LOADER_TYPE; + bootloader_type = boot_params.hdr.type_of_loader; if (is_initial_xendomain()) { const struct dom0_vga_console_info *info = @@ -608,9 +649,9 @@ void __init setup_arch(char **cmdline_p) screen_info.orig_video_isVGA = 0; #ifdef CONFIG_BLK_DEV_RAM - rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK; - rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0); - rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0); + rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK; + rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0); + rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0); #endif ARCH_SETUP @@ -623,7 +664,7 @@ void __init setup_arch(char **cmdline_p) copy_edd(); - if (!MOUNT_ROOT_RDONLY) + if (!boot_params.hdr.root_flags) root_mountflags &= ~MS_RDONLY; init_mm.start_code = (unsigned long) _text; init_mm.end_code = (unsigned long) _etext; @@ -635,6 +676,8 @@ void __init setup_arch(char **cmdline_p) code_resource.end = virt_to_phys(_etext)-1; data_resource.start = virt_to_phys(_etext); data_resource.end = virt_to_phys(_edata)-1; + bss_resource.start = virt_to_phys(&__bss_start); + bss_resource.end = virt_to_phys(&__bss_stop)-1; if ((i = MAX_GUEST_CMDLINE) > COMMAND_LINE_SIZE) i = COMMAND_LINE_SIZE; @@ -663,7 +706,7 @@ void __init setup_arch(char **cmdline_p) /* * NOTE: before this point _nobody_ is allowed to allocate * any memory using the bootmem allocator. Although the - * alloctor is now initialised only the first 8Mb of the kernel + * allocator is now initialised only the first 8Mb of the kernel * virtual address space has been mapped. All allocations before * paging_init() has completed must use the alloc_bootmem_low_pages() * variant (which allocates DMA'able memory) and care must be taken @@ -786,10 +829,8 @@ void __init setup_arch(char **cmdline_p) acpi_boot_table_init(); #endif -#ifdef CONFIG_PCI -#ifdef CONFIG_X86_IO_APIC - check_acpi_pci(); /* Checks more than just ACPI actually */ -#endif +#if defined(CONFIG_PCI) && !defined(CONFIG_XEN) + early_quirks(); #endif #ifdef CONFIG_ACPI --- head-2010-05-25.orig/arch/x86/kernel/setup_64-xen.c 2010-03-24 15:09:22.000000000 +0100 +++ head-2010-05-25/arch/x86/kernel/setup_64-xen.c 2010-03-24 15:10:29.000000000 +0100 @@ -1,10 +1,5 @@ /* - * linux/arch/x86-64/kernel/setup.c - * * Copyright (C) 1995 Linus Torvalds - * - * Nov 2001 Dave Jones - * Forked from i386 setup code. */ /* @@ -57,13 +52,13 @@ #include #include #include -#include #include #include #include #include #include #include +#include #ifdef CONFIG_XEN #include #include @@ -180,6 +175,12 @@ struct resource code_resource = { .end = 0, .flags = IORESOURCE_RAM, }; +struct resource bss_resource = { + .name = "Kernel bss", + .start = 0, + .end = 0, + .flags = IORESOURCE_RAM, +}; #ifdef CONFIG_PROC_VMCORE /* elfcorehdr= specifies the location of elf core header @@ -232,10 +233,11 @@ EXPORT_SYMBOL(edd); */ static inline void copy_edd(void) { - memcpy(edd.mbr_signature, EDD_MBR_SIGNATURE, sizeof(edd.mbr_signature)); - memcpy(edd.edd_info, EDD_BUF, sizeof(edd.edd_info)); - edd.mbr_signature_nr = EDD_MBR_SIG_NR; - edd.edd_info_nr = EDD_NR; + memcpy(edd.mbr_signature, boot_params.edd_mbr_sig_buffer, + sizeof(edd.mbr_signature)); + memcpy(edd.edd_info, boot_params.eddbuf, sizeof(edd.edd_info)); + edd.mbr_signature_nr = boot_params.edd_mbr_sig_buf_entries; + edd.edd_info_nr = boot_params.eddbuf_entries; } #endif #else @@ -244,6 +246,41 @@ static inline void copy_edd(void) } #endif +#ifdef CONFIG_KEXEC +#ifndef CONFIG_XEN +static void __init reserve_crashkernel(void) +{ + unsigned long long free_mem; + unsigned long long crash_size, crash_base; + int ret; + + free_mem = ((unsigned long long)max_low_pfn - min_low_pfn) << PAGE_SHIFT; + + ret = parse_crashkernel(boot_command_line, free_mem, + &crash_size, &crash_base); + if (ret == 0 && crash_size) { + if (crash_base > 0) { + printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " + "for crashkernel (System RAM: %ldMB)\n", + (unsigned long)(crash_size >> 20), + (unsigned long)(crash_base >> 20), + (unsigned long)(free_mem >> 20)); + crashk_res.start = crash_base; + crashk_res.end = crash_base + crash_size - 1; + reserve_bootmem(crash_base, crash_size); + } else + printk(KERN_INFO "crashkernel reservation failed - " + "you have to specify a base address\n"); + } +} +#else +#define reserve_crashkernel xen_machine_kexec_setup_resources +#endif +#else +static inline void __init reserve_crashkernel(void) +{} +#endif + #ifndef CONFIG_XEN #define EBDA_ADDR_POINTER 0x40E @@ -284,7 +321,7 @@ void __init setup_arch(char **cmdline_p) atomic_notifier_chain_register(&panic_notifier_list, &xen_panic_block); ROOT_DEV = MKDEV(RAMDISK_MAJOR,0); - screen_info = SCREEN_INFO; + screen_info = boot_params.screen_info; if (is_initial_xendomain()) { const struct dom0_vga_console_info *info = @@ -307,22 +344,22 @@ void __init setup_arch(char **cmdline_p) #else printk(KERN_INFO "Command line: %s\n", boot_command_line); - ROOT_DEV = old_decode_dev(ORIG_ROOT_DEV); - screen_info = SCREEN_INFO; - edid_info = EDID_INFO; + ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev); + screen_info = boot_params.screen_info; + edid_info = boot_params.edid_info; #endif /* !CONFIG_XEN */ - saved_video_mode = SAVED_VIDEO_MODE; - bootloader_type = LOADER_TYPE; + saved_video_mode = boot_params.hdr.vid_mode; + bootloader_type = boot_params.hdr.type_of_loader; #ifdef CONFIG_BLK_DEV_RAM - rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK; - rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0); - rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0); + rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK; + rd_prompt = ((boot_params.hdr.ram_size & RAMDISK_PROMPT_FLAG) != 0); + rd_doload = ((boot_params.hdr.ram_size & RAMDISK_LOAD_FLAG) != 0); #endif setup_memory_region(); copy_edd(); - if (!MOUNT_ROOT_RDONLY) + if (!boot_params.hdr.root_flags) root_mountflags &= ~MS_RDONLY; init_mm.start_code = (unsigned long) &_text; init_mm.end_code = (unsigned long) &_etext; @@ -333,6 +370,8 @@ void __init setup_arch(char **cmdline_p) code_resource.end = virt_to_phys(&_etext)-1; data_resource.start = virt_to_phys(&_etext); data_resource.end = virt_to_phys(&_edata)-1; + bss_resource.start = virt_to_phys(&__bss_start); + bss_resource.end = virt_to_phys(&__bss_stop)-1; early_identify_cpu(&boot_cpu_data); @@ -360,6 +399,11 @@ void __init setup_arch(char **cmdline_p) if (is_initial_xendomain()) dmi_scan_machine(); +#if defined(CONFIG_SMP) && !defined(CONFIG_XEN) + /* setup to use the static apicid table during kernel startup */ + x86_cpu_to_apicid_ptr = (void *)&x86_cpu_to_apicid_init; +#endif + /* How many end-of-memory variables you have, grandma! */ max_low_pfn = end_pfn; max_pfn = end_pfn; @@ -424,52 +468,37 @@ void __init setup_arch(char **cmdline_p) */ acpi_reserve_bootmem(); #endif -#ifdef CONFIG_XEN #ifdef CONFIG_BLK_DEV_INITRD +#ifdef CONFIG_XEN if (xen_start_info->mod_start) { - if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) { - /*reserve_bootmem_generic(INITRD_START, INITRD_SIZE);*/ - initrd_start = INITRD_START + PAGE_OFFSET; - initrd_end = initrd_start+INITRD_SIZE; + unsigned long ramdisk_image = __pa(xen_start_info->mod_start); + unsigned long ramdisk_size = xen_start_info->mod_len; +#else + if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { + unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; + unsigned long ramdisk_size = boot_params.hdr.ramdisk_size; +#endif + unsigned long ramdisk_end = ramdisk_image + ramdisk_size; + unsigned long end_of_mem = end_pfn << PAGE_SHIFT; + + if (ramdisk_end <= end_of_mem) { +#ifndef CONFIG_XEN + reserve_bootmem_generic(ramdisk_image, ramdisk_size); +#endif + initrd_start = ramdisk_image + PAGE_OFFSET; + initrd_end = initrd_start+ramdisk_size; +#ifdef CONFIG_XEN initrd_below_start_ok = 1; - } else { - printk(KERN_ERR "initrd extends beyond end of memory " - "(0x%08lx > 0x%08lx)\ndisabling initrd\n", - (unsigned long)(INITRD_START + INITRD_SIZE), - (unsigned long)(end_pfn << PAGE_SHIFT)); - initrd_start = 0; - } - } #endif -#else /* CONFIG_XEN */ -#ifdef CONFIG_BLK_DEV_INITRD - if (LOADER_TYPE && INITRD_START) { - if (INITRD_START + INITRD_SIZE <= (end_pfn << PAGE_SHIFT)) { - reserve_bootmem_generic(INITRD_START, INITRD_SIZE); - initrd_start = INITRD_START + PAGE_OFFSET; - initrd_end = initrd_start+INITRD_SIZE; - } - else { + } else { printk(KERN_ERR "initrd extends beyond end of memory " - "(0x%08lx > 0x%08lx)\ndisabling initrd\n", - (unsigned long)(INITRD_START + INITRD_SIZE), - (unsigned long)(end_pfn << PAGE_SHIFT)); + "(0x%08lx > 0x%08lx)\ndisabling initrd\n", + ramdisk_end, end_of_mem); initrd_start = 0; } } #endif -#endif /* !CONFIG_XEN */ -#ifdef CONFIG_KEXEC -#ifdef CONFIG_XEN - xen_machine_kexec_setup_resources(); -#else - if (crashk_res.start != crashk_res.end) { - reserve_bootmem_generic(crashk_res.start, - crashk_res.end - crashk_res.start + 1); - } -#endif -#endif - + reserve_crashkernel(); paging_init(); #ifdef CONFIG_X86_LOCAL_APIC /* @@ -784,7 +813,7 @@ static void __init amd_detect_cmp(struct but in the same order as the HT nodeids. If that doesn't result in a usable node fall back to the path for the previous case. */ - int ht_nodeid = apicid - (cpu_data[0].phys_proc_id << bits); + int ht_nodeid = apicid - (cpu_data(0).phys_proc_id << bits); if (ht_nodeid >= 0 && apicid_to_node[ht_nodeid] != NUMA_NO_NODE) node = apicid_to_node[ht_nodeid]; @@ -799,6 +828,39 @@ static void __init amd_detect_cmp(struct #endif } +#define ENABLE_C1E_MASK 0x18000000 +#define CPUID_PROCESSOR_SIGNATURE 1 +#define CPUID_XFAM 0x0ff00000 +#define CPUID_XFAM_K8 0x00000000 +#define CPUID_XFAM_10H 0x00100000 +#define CPUID_XFAM_11H 0x00200000 +#define CPUID_XMOD 0x000f0000 +#define CPUID_XMOD_REV_F 0x00040000 + +#ifndef CONFIG_XEN +/* AMD systems with C1E don't have a working lAPIC timer. Check for that. */ +static __cpuinit int amd_apic_timer_broken(void) +{ + u32 lo, hi; + u32 eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); + switch (eax & CPUID_XFAM) { + case CPUID_XFAM_K8: + if ((eax & CPUID_XMOD) < CPUID_XMOD_REV_F) + break; + case CPUID_XFAM_10H: + case CPUID_XFAM_11H: + rdmsr(MSR_K8_ENABLE_C1E, lo, hi); + if (lo & ENABLE_C1E_MASK) + return 1; + break; + default: + /* err on the side of caution */ + return 1; + } + return 0; +} +#endif + static void __cpuinit init_amd(struct cpuinfo_x86 *c) { unsigned level; @@ -828,7 +890,7 @@ static void __cpuinit init_amd(struct cp level = cpuid_eax(1); if (c->x86 == 15 && ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)) set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability); - if (c->x86 == 0x10) + if (c->x86 == 0x10 || c->x86 == 0x11) set_bit(X86_FEATURE_REP_GOOD, &c->x86_capability); /* Enable workaround for FXSAVE leak */ @@ -870,6 +932,11 @@ static void __cpuinit init_amd(struct cp /* Family 10 doesn't support C states in MWAIT so don't use it */ if (c->x86 == 0x10 && !force_mwait) clear_bit(X86_FEATURE_MWAIT, &c->x86_capability); + +#ifndef CONFIG_XEN + if (amd_apic_timer_broken()) + disable_apic_timer = 1; +#endif } static void __cpuinit detect_ht(struct cpuinfo_x86 *c) @@ -1182,6 +1249,7 @@ void __cpuinit print_cpu_info(struct cpu static int show_cpuinfo(struct seq_file *m, void *v) { struct cpuinfo_x86 *c = v; + int cpu = 0; /* * These flag bits must match the definitions in . @@ -1191,7 +1259,7 @@ static int show_cpuinfo(struct seq_file * applications want to get the raw CPUID data, they should access * /dev/cpu//cpuid instead. */ - static char *x86_cap_flags[] = { + static const char *const x86_cap_flags[] = { /* Intel-defined */ "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce", "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov", @@ -1222,7 +1290,7 @@ static int show_cpuinfo(struct seq_file /* Intel-defined (#2) */ "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, - NULL, NULL, "dca", NULL, NULL, NULL, NULL, "popcnt", + NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* VIA/Cyrix/Centaur-defined */ @@ -1232,10 +1300,10 @@ static int show_cpuinfo(struct seq_file NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* AMD-defined (#2) */ - "lahf_lm", "cmp_legacy", "svm", "extapic", "cr8_legacy", - "altmovcr8", "abm", "sse4a", - "misalignsse", "3dnowprefetch", - "osvw", "ibs", NULL, NULL, NULL, NULL, + "lahf_lm", "cmp_legacy", "svm", "extapic", + "cr8_legacy", "abm", "sse4a", "misalignsse", + "3dnowprefetch", "osvw", "ibs", "sse5", + "skinit", "wdt", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -1245,7 +1313,7 @@ static int show_cpuinfo(struct seq_file NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, }; - static char *x86_power_flags[] = { + static const char *const x86_power_flags[] = { "ts", /* temperature sensor */ "fid", /* frequency id control */ "vid", /* voltage id control */ @@ -1260,8 +1328,7 @@ static int show_cpuinfo(struct seq_file #ifdef CONFIG_SMP - if (!cpu_online(c-cpu_data)) - return 0; + cpu = c->cpu_index; #endif seq_printf(m,"processor\t: %u\n" @@ -1269,7 +1336,7 @@ static int show_cpuinfo(struct seq_file "cpu family\t: %d\n" "model\t\t: %d\n" "model name\t: %s\n", - (unsigned)(c-cpu_data), + (unsigned)cpu, c->x86_vendor_id[0] ? c->x86_vendor_id : "unknown", c->x86, (int)c->x86_model, @@ -1281,7 +1348,7 @@ static int show_cpuinfo(struct seq_file seq_printf(m, "stepping\t: unknown\n"); if (cpu_has(c,X86_FEATURE_TSC)) { - unsigned int freq = cpufreq_quick_get((unsigned)(c-cpu_data)); + unsigned int freq = cpufreq_quick_get((unsigned)cpu); if (!freq) freq = cpu_khz; seq_printf(m, "cpu MHz\t\t: %u.%03u\n", @@ -1294,9 +1361,9 @@ static int show_cpuinfo(struct seq_file #ifdef CONFIG_SMP if (smp_num_siblings * c->x86_max_cores > 1) { - int cpu = c - cpu_data; seq_printf(m, "physical id\t: %d\n", c->phys_proc_id); - seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[cpu])); + seq_printf(m, "siblings\t: %d\n", + cpus_weight(per_cpu(cpu_core_map, cpu))); seq_printf(m, "core id\t\t: %d\n", c->cpu_core_id); seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); } @@ -1351,12 +1418,16 @@ static int show_cpuinfo(struct seq_file static void *c_start(struct seq_file *m, loff_t *pos) { - return *pos < NR_CPUS ? cpu_data + *pos : NULL; + if (*pos == 0) /* just in case, cpu 0 is not the first */ + *pos = first_cpu(cpu_online_map); + if ((*pos) < NR_CPUS && cpu_online(*pos)) + return &cpu_data(*pos); + return NULL; } static void *c_next(struct seq_file *m, void *v, loff_t *pos) { - ++*pos; + *pos = next_cpu(*pos, cpu_online_map); return c_start(m, pos); } --- head-2010-05-25.orig/arch/x86/kernel/smp_32-xen.c 2010-03-24 15:09:22.000000000 +0100 +++ head-2010-05-25/arch/x86/kernel/smp_32-xen.c 2010-03-24 15:10:29.000000000 +0100 @@ -72,7 +72,7 @@ * * B stepping CPUs may hang. There are hardware work arounds * for this. We warn about it in case your board doesn't have the work - * arounds. Basically thats so I can tell anyone with a B stepping + * arounds. Basically that's so I can tell anyone with a B stepping * CPU and SMP problems "tough". * * Specific items [From Pentium Processor Specification Update] @@ -241,7 +241,7 @@ void leave_mm(unsigned long cpu) * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask); * Stop ipi delivery for the old mm. This is not synchronized with * the other cpus, but smp_invalidate_interrupt ignore flush ipis - * for the wrong mm, and in the worst case we perform a superflous + * for the wrong mm, and in the worst case we perform a superfluous * tlb flush. * 1a2) set cpu_tlbstate to TLBSTATE_OK * Now the smp_invalidate_interrupt won't call leave_mm if cpu0 @@ -309,6 +309,7 @@ irqreturn_t smp_invalidate_interrupt(int smp_mb__after_clear_bit(); out: put_cpu_no_resched(); + __get_cpu_var(irq_stat).irq_tlb_count++; return IRQ_HANDLED; } @@ -580,7 +581,7 @@ static void stop_this_cpu (void * dummy) */ cpu_clear(smp_processor_id(), cpu_online_map); disable_all_local_evtchn(); - if (cpu_data[smp_processor_id()].hlt_works_ok) + if (cpu_data(smp_processor_id()).hlt_works_ok) for(;;) halt(); for (;;); } @@ -610,6 +611,7 @@ void xen_smp_send_stop(void) */ irqreturn_t smp_reschedule_interrupt(int irq, void *dev_id) { + __get_cpu_var(irq_stat).irq_resched_count++; return IRQ_HANDLED; } @@ -632,6 +634,7 @@ irqreturn_t smp_call_function_interrupt( */ irq_enter(); (*func)(info); + __get_cpu_var(irq_stat).irq_call_count++; irq_exit(); if (wait) { --- head-2010-05-25.orig/arch/x86/kernel/smp_64-xen.c 2010-03-24 15:09:22.000000000 +0100 +++ head-2010-05-25/arch/x86/kernel/smp_64-xen.c 2010-03-24 15:10:29.000000000 +0100 @@ -167,6 +167,7 @@ asmlinkage void smp_invalidate_interrupt out: ack_APIC_irq(); cpu_clear(cpu, f->flush_cpumask); + add_pda(irq_tlb_count, 1); } static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, @@ -326,17 +327,27 @@ void unlock_ipi_call_lock(void) } /* - * this function sends a 'generic call function' IPI to one other CPU - * in the system. - * - * cpu is a standard Linux logical CPU number. + * this function sends a 'generic call function' IPI to all other CPU + * of the system defined in the mask. */ -static void -__smp_call_function_single(int cpu, void (*func) (void *info), void *info, - int nonatomic, int wait) + +static int +__smp_call_function_mask(cpumask_t mask, + void (*func)(void *), void *info, + int wait) { struct call_data_struct data; - int cpus = 1; + cpumask_t allbutself; + int cpus; + + allbutself = cpu_online_map; + cpu_clear(smp_processor_id(), allbutself); + + cpus_and(mask, mask, allbutself); + cpus = cpus_weight(mask); + + if (!cpus) + return 0; data.func = func; data.info = info; @@ -347,19 +358,55 @@ __smp_call_function_single(int cpu, void call_data = &data; wmb(); - /* Send a message to all other CPUs and wait for them to respond */ - send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR); + + /* Send a message to other CPUs */ + if (cpus_equal(mask, allbutself)) + send_IPI_allbutself(CALL_FUNCTION_VECTOR); + else + send_IPI_mask(mask, CALL_FUNCTION_VECTOR); /* Wait for response */ while (atomic_read(&data.started) != cpus) cpu_relax(); if (!wait) - return; + return 0; while (atomic_read(&data.finished) != cpus) cpu_relax(); + + return 0; +} +/** + * smp_call_function_mask(): Run a function on a set of other CPUs. + * @mask: The set of cpus to run on. Must not include the current cpu. + * @func: The function to run. This must be fast and non-blocking. + * @info: An arbitrary pointer to pass to the function. + * @wait: If true, wait (atomically) until function has completed on other CPUs. + * + * Returns 0 on success, else a negative status code. + * + * If @wait is true, then returns once @func has returned; otherwise + * it returns just before the target cpu calls @func. + * + * You must not call this function with disabled interrupts or from a + * hardware interrupt handler or from a bottom half handler. + */ +int smp_call_function_mask(cpumask_t mask, + void (*func)(void *), void *info, + int wait) +{ + int ret; + + /* Can deadlock when called with interrupts disabled */ + WARN_ON(irqs_disabled()); + + spin_lock(&call_lock); + ret = __smp_call_function_mask(mask, func, info, wait); + spin_unlock(&call_lock); + return ret; } +EXPORT_SYMBOL(smp_call_function_mask); /* * smp_call_function_single - Run a function on a specific CPU @@ -378,6 +425,7 @@ int smp_call_function_single (int cpu, v int nonatomic, int wait) { /* prevent preemption and reschedule on another processor */ + int ret; int me = get_cpu(); /* Can deadlock when called with interrupts disabled */ @@ -391,51 +439,14 @@ int smp_call_function_single (int cpu, v return 0; } - spin_lock(&call_lock); - __smp_call_function_single(cpu, func, info, nonatomic, wait); - spin_unlock(&call_lock); + ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait); + put_cpu(); - return 0; + return ret; } EXPORT_SYMBOL(smp_call_function_single); /* - * this function sends a 'generic call function' IPI to all other CPUs - * in the system. - */ -static void __smp_call_function (void (*func) (void *info), void *info, - int nonatomic, int wait) -{ - struct call_data_struct data; - int cpus = num_online_cpus()-1; - - if (!cpus) - return; - - data.func = func; - data.info = info; - atomic_set(&data.started, 0); - data.wait = wait; - if (wait) - atomic_set(&data.finished, 0); - - call_data = &data; - wmb(); - /* Send a message to all other CPUs and wait for them to respond */ - send_IPI_allbutself(CALL_FUNCTION_VECTOR); - - /* Wait for response */ - while (atomic_read(&data.started) != cpus) - cpu_relax(); - - if (!wait) - return; - - while (atomic_read(&data.finished) != cpus) - cpu_relax(); -} - -/* * smp_call_function - run a function on all other CPUs. * @func: The function to run. This must be fast and non-blocking. * @info: An arbitrary pointer to pass to the function. @@ -453,10 +464,7 @@ static void __smp_call_function (void (* int smp_call_function (void (*func) (void *info), void *info, int nonatomic, int wait) { - spin_lock(&call_lock); - __smp_call_function(func,info,nonatomic,wait); - spin_unlock(&call_lock); - return 0; + return smp_call_function_mask(cpu_online_map, func, info, wait); } EXPORT_SYMBOL(smp_call_function); @@ -485,7 +493,7 @@ void smp_send_stop(void) /* Don't deadlock on the call lock in panic */ nolock = !spin_trylock(&call_lock); local_irq_save(flags); - __smp_call_function(stop_this_cpu, NULL, 0, 0); + __smp_call_function_mask(cpu_online_map, stop_this_cpu, NULL, 0); if (!nolock) spin_unlock(&call_lock); disable_all_local_evtchn(); @@ -505,7 +513,9 @@ asmlinkage irqreturn_t smp_reschedule_in { #ifndef CONFIG_XEN ack_APIC_irq(); -#else +#endif + add_pda(irq_resched_count, 1); +#ifdef CONFIG_XEN return IRQ_HANDLED; #endif } @@ -535,6 +545,7 @@ asmlinkage irqreturn_t smp_call_function exit_idle(); irq_enter(); (*func)(info); + add_pda(irq_call_count, 1); irq_exit(); if (wait) { mb(); --- head-2010-05-25.orig/arch/x86/kernel/time-xen.c 2010-03-24 15:09:22.000000000 +0100 +++ head-2010-05-25/arch/x86/kernel/time-xen.c 2010-03-24 15:10:29.000000000 +0100 @@ -1,6 +1,4 @@ /* - * linux/arch/i386/kernel/time.c - * * Copyright (C) 1991, 1992, 1995 Linus Torvalds * * This file contains the PC-specific time handling details: @@ -73,6 +71,7 @@ #include #include +#include #include #include @@ -535,6 +534,13 @@ irqreturn_t timer_interrupt(int irq, voi struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu); struct vcpu_runstate_info runstate; + /* Keep nmi watchdog up to date */ +#ifdef __i386__ + per_cpu(irq_stat, smp_processor_id()).irq0_irqs++; +#else + add_pda(irq0_irqs, 1); +#endif + /* * Here we are in the timer irq handler. We just have irqs locally * disabled but we don't know if the timer_bh is running on the other @@ -1008,7 +1014,7 @@ static int time_cpufreq_notifier(struct struct cpufreq_freqs *freq = data; struct xen_platform_op op; - if (cpu_has(&cpu_data[freq->cpu], X86_FEATURE_CONSTANT_TSC)) + if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC)) return 0; if (val == CPUFREQ_PRECHANGE) @@ -1046,30 +1052,33 @@ core_initcall(cpufreq_time_setup); */ static ctl_table xen_subtable[] = { { - .ctl_name = 1, + .ctl_name = CTL_XEN_INDEPENDENT_WALLCLOCK, .procname = "independent_wallclock", .data = &independent_wallclock, .maxlen = sizeof(independent_wallclock), .mode = 0644, + .strategy = sysctl_data, .proc_handler = proc_dointvec }, { - .ctl_name = 2, + .ctl_name = CTL_XEN_PERMITTED_CLOCK_JITTER, .procname = "permitted_clock_jitter", .data = &permitted_clock_jitter, .maxlen = sizeof(permitted_clock_jitter), .mode = 0644, + .strategy = sysctl_data, .proc_handler = proc_doulongvec_minmax }, - { 0 } + { } }; static ctl_table xen_table[] = { { - .ctl_name = 123, + .ctl_name = CTL_XEN, .procname = "xen", .mode = 0555, - .child = xen_subtable}, - { 0 } + .child = xen_subtable + }, + { } }; static int __init xen_sysctl_init(void) { --- head-2010-05-25.orig/arch/x86/kernel/traps_32-xen.c 2010-03-24 15:09:22.000000000 +0100 +++ head-2010-05-25/arch/x86/kernel/traps_32-xen.c 2010-03-24 15:10:29.000000000 +0100 @@ -1,6 +1,4 @@ /* - * linux/arch/i386/traps.c - * * Copyright (C) 1991, 1992 Linus Torvalds * * Pentium III FXSR, SSE support @@ -65,6 +63,11 @@ int panic_on_unrecovered_nmi; +#ifndef CONFIG_XEN +DECLARE_BITMAP(used_vectors, NR_VECTORS); +EXPORT_SYMBOL_GPL(used_vectors); +#endif + asmlinkage int system_call(void); /* Do we ignore FPU interrupts ? */ @@ -120,7 +123,7 @@ struct stack_frame { static inline unsigned long print_context_stack(struct thread_info *tinfo, unsigned long *stack, unsigned long ebp, - struct stacktrace_ops *ops, void *data) + const struct stacktrace_ops *ops, void *data) { #ifdef CONFIG_FRAME_POINTER struct stack_frame *frame = (struct stack_frame *)ebp; @@ -157,7 +160,7 @@ static inline unsigned long print_contex void dump_trace(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, - struct stacktrace_ops *ops, void *data) + const struct stacktrace_ops *ops, void *data) { unsigned long ebp = 0; @@ -229,7 +232,7 @@ static void print_trace_address(void *da touch_nmi_watchdog(); } -static struct stacktrace_ops print_trace_ops = { +static const struct stacktrace_ops print_trace_ops = { .warning = print_trace_warning, .warning_symbol = print_trace_warning_symbol, .stack = print_trace_stack, @@ -288,6 +291,11 @@ void dump_stack(void) { unsigned long stack; + printk("Pid: %d, comm: %.20s %s %s %.*s\n", + current->pid, current->comm, print_tainted(), + init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), + init_utsname()->version); show_trace(current, NULL, &stack); } @@ -296,48 +304,24 @@ EXPORT_SYMBOL(dump_stack); void show_registers(struct pt_regs *regs) { int i; - int in_kernel = 1; - unsigned long esp; - unsigned short ss, gs; - - esp = (unsigned long) (®s->esp); - savesegment(ss, ss); - savesegment(gs, gs); - if (user_mode_vm(regs)) { - in_kernel = 0; - esp = regs->esp; - ss = regs->xss & 0xffff; - } + print_modules(); - printk(KERN_EMERG "CPU: %d\n" - KERN_EMERG "EIP: %04x:[<%08lx>] %s VLI\n" - KERN_EMERG "EFLAGS: %08lx (%s %.*s)\n", - smp_processor_id(), 0xffff & regs->xcs, regs->eip, - print_tainted(), regs->eflags, init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); - print_symbol(KERN_EMERG "EIP is at %s\n", regs->eip); - printk(KERN_EMERG "eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", - regs->eax, regs->ebx, regs->ecx, regs->edx); - printk(KERN_EMERG "esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", - regs->esi, regs->edi, regs->ebp, esp); - printk(KERN_EMERG "ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n", - regs->xds & 0xffff, regs->xes & 0xffff, regs->xfs & 0xffff, gs, ss); + __show_registers(regs, 0); printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)", - TASK_COMM_LEN, current->comm, current->pid, + TASK_COMM_LEN, current->comm, task_pid_nr(current), current_thread_info(), current, task_thread_info(current)); /* * When in-kernel, we also print out the stack and code at the * time of the fault.. */ - if (in_kernel) { + if (!user_mode_vm(regs)) { u8 *eip; unsigned int code_prologue = code_bytes * 43 / 64; unsigned int code_len = code_bytes; unsigned char c; printk("\n" KERN_EMERG "Stack: "); - show_stack_log_lvl(NULL, regs, (unsigned long *)esp, KERN_EMERG); + show_stack_log_lvl(NULL, regs, ®s->esp, KERN_EMERG); printk(KERN_EMERG "Code: "); @@ -382,11 +366,11 @@ int is_valid_bugaddr(unsigned long eip) void die(const char * str, struct pt_regs * regs, long err) { static struct { - spinlock_t lock; + raw_spinlock_t lock; u32 lock_owner; int lock_owner_depth; } die = { - .lock = __SPIN_LOCK_UNLOCKED(die.lock), + .lock = __RAW_SPIN_LOCK_UNLOCKED, .lock_owner = -1, .lock_owner_depth = 0 }; @@ -397,40 +381,33 @@ void die(const char * str, struct pt_reg if (die.lock_owner != raw_smp_processor_id()) { console_verbose(); - spin_lock_irqsave(&die.lock, flags); + raw_local_irq_save(flags); + __raw_spin_lock(&die.lock); die.lock_owner = smp_processor_id(); die.lock_owner_depth = 0; bust_spinlocks(1); - } - else - local_save_flags(flags); + } else + raw_local_irq_save(flags); if (++die.lock_owner_depth < 3) { - int nl = 0; unsigned long esp; unsigned short ss; report_bug(regs->eip, regs); - printk(KERN_EMERG "%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter); + printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, + ++die_counter); #ifdef CONFIG_PREEMPT - printk(KERN_EMERG "PREEMPT "); - nl = 1; + printk("PREEMPT "); #endif #ifdef CONFIG_SMP - if (!nl) - printk(KERN_EMERG); printk("SMP "); - nl = 1; #endif #ifdef CONFIG_DEBUG_PAGEALLOC - if (!nl) - printk(KERN_EMERG); printk("DEBUG_PAGEALLOC"); - nl = 1; #endif - if (nl) - printk("\n"); + printk("\n"); + if (notify_die(DIE_OOPS, str, regs, err, current->thread.trap_no, SIGSEGV) != NOTIFY_STOP) { @@ -454,7 +431,8 @@ void die(const char * str, struct pt_reg bust_spinlocks(0); die.lock_owner = -1; add_taint(TAINT_DIE); - spin_unlock_irqrestore(&die.lock, flags); + __raw_spin_unlock(&die.lock); + raw_local_irq_restore(flags); if (!regs) return; @@ -571,6 +549,7 @@ fastcall void do_##name(struct pt_regs * info.si_errno = 0; \ info.si_code = sicode; \ info.si_addr = (void __user *)siaddr; \ + trace_hardirqs_fixup(); \ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ == NOTIFY_STOP) \ return; \ @@ -606,7 +585,7 @@ fastcall void __kprobes do_general_prote printk_ratelimit()) printk(KERN_INFO "%s[%d] general protection eip:%lx esp:%lx error:%lx\n", - current->comm, current->pid, + current->comm, task_pid_nr(current), regs->eip, regs->esp, error_code); force_sig(SIGSEGV, current); @@ -785,6 +764,8 @@ void restart_nmi(void) #ifdef CONFIG_KPROBES fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code) { + trace_hardirqs_fixup(); + if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) return; @@ -822,6 +803,8 @@ fastcall void __kprobes do_debug(struct unsigned int condition; struct task_struct *tsk = current; + trace_hardirqs_fixup(); + get_debugreg(condition, 6); if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, @@ -1084,20 +1067,6 @@ asmlinkage void math_emulate(long arg) #endif /* CONFIG_MATH_EMULATION */ -#ifdef CONFIG_X86_F00F_BUG -void __init trap_init_f00f_bug(void) -{ - __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); - - /* - * Update the IDT descriptor and reload the IDT so that - * it uses the read-only mapped virtual address. - */ - idt_descr.address = fix_to_virt(FIX_F00F_IDT); - load_idt(&idt_descr); -} -#endif - /* * NB. All these are "trap gates" (i.e. events_mask isn't set) except --- head-2010-05-25.orig/arch/x86/kernel/traps_64-xen.c 2010-03-24 15:09:22.000000000 +0100 +++ head-2010-05-25/arch/x86/kernel/traps_64-xen.c 2010-03-24 15:10:29.000000000 +0100 @@ -1,6 +1,4 @@ /* - * linux/arch/x86-64/traps.c - * * Copyright (C) 1991, 1992 Linus Torvalds * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs * @@ -33,6 +31,7 @@ #include #include #include +#include #if defined(CONFIG_EDAC) #include @@ -205,7 +204,7 @@ static unsigned long *in_exception_stack #define MSG(txt) ops->warning(data, txt) /* - * x86-64 can have upto three kernel stacks: + * x86-64 can have up to three kernel stacks: * process stack * interrupt stack * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack @@ -219,7 +218,7 @@ static inline int valid_stack_ptr(struct void dump_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack, - struct stacktrace_ops *ops, void *data) + const struct stacktrace_ops *ops, void *data) { const unsigned cpu = get_cpu(); unsigned long *irqstack_end = (unsigned long*)cpu_pda(cpu)->irqstackptr; @@ -340,7 +339,7 @@ static void print_trace_address(void *da printk_address(addr); } -static struct stacktrace_ops print_trace_ops = { +static const struct stacktrace_ops print_trace_ops = { .warning = print_trace_warning, .warning_symbol = print_trace_warning_symbol, .stack = print_trace_stack, @@ -404,6 +403,12 @@ void show_stack(struct task_struct *tsk, void dump_stack(void) { unsigned long dummy; + + printk("Pid: %d, comm: %.20s %s %s %.*s\n", + current->pid, current->comm, print_tainted(), + init_utsname()->release, + (int)strcspn(init_utsname()->version, " "), + init_utsname()->version); show_trace(NULL, NULL, &dummy); } @@ -466,7 +471,7 @@ void out_of_line_bug(void) EXPORT_SYMBOL(out_of_line_bug); #endif -static DEFINE_SPINLOCK(die_lock); +static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; static int die_owner = -1; static unsigned int die_nest_count; @@ -478,13 +483,13 @@ unsigned __kprobes long oops_begin(void) oops_enter(); /* racy, but better than risking deadlock. */ - local_irq_save(flags); + raw_local_irq_save(flags); cpu = smp_processor_id(); - if (!spin_trylock(&die_lock)) { + if (!__raw_spin_trylock(&die_lock)) { if (cpu == die_owner) /* nested oops. should stop eventually */; else - spin_lock(&die_lock); + __raw_spin_lock(&die_lock); } die_nest_count++; die_owner = cpu; @@ -498,12 +503,10 @@ void __kprobes oops_end(unsigned long fl die_owner = -1; bust_spinlocks(0); die_nest_count--; - if (die_nest_count) - /* We still own the lock */ - local_irq_restore(flags); - else + if (!die_nest_count) /* Nest count reaches zero, release the lock. */ - spin_unlock_irqrestore(&die_lock, flags); + __raw_spin_unlock(&die_lock); + raw_local_irq_restore(flags); if (panic_on_oops) panic("Fatal exception"); oops_exit(); @@ -636,6 +639,7 @@ asmlinkage void do_##name(struct pt_regs info.si_errno = 0; \ info.si_code = sicode; \ info.si_addr = (void __user *)siaddr; \ + trace_hardirqs_fixup(); \ if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ == NOTIFY_STOP) \ return; \ @@ -741,11 +745,8 @@ mem_parity_error(unsigned char reason, s printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); -#if 0 /* XEN */ /* Clear and disable the memory parity error line. */ - reason = (reason & 0xf) | 4; - outb(reason, 0x61); -#endif /* XEN */ + clear_mem_error(reason); } static __kprobes void @@ -754,14 +755,8 @@ io_check_error(unsigned char reason, str printk("NMI: IOCK error (debug interrupt?)\n"); show_registers(regs); -#if 0 /* XEN */ /* Re-enable the IOCK line, wait for a few seconds */ - reason = (reason & 0xf) | 8; - outb(reason, 0x61); - mdelay(2000); - reason &= ~8; - outb(reason, 0x61); -#endif /* XEN */ + clear_io_check_error(reason); } static __kprobes void @@ -821,6 +816,8 @@ asmlinkage __kprobes void default_do_nmi /* runs on IST stack. */ asmlinkage void __kprobes do_int3(struct pt_regs * regs, long error_code) { + trace_hardirqs_fixup(); + if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) { return; } @@ -858,6 +855,8 @@ asmlinkage void __kprobes do_debug(struc struct task_struct *tsk = current; siginfo_t info; + trace_hardirqs_fixup(); + get_debugreg(condition, 6); if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, --- head-2010-05-25.orig/arch/x86/kernel/vsyscall_64-xen.c 2010-03-24 15:09:22.000000000 +0100 +++ head-2010-05-25/arch/x86/kernel/vsyscall_64-xen.c 2010-03-24 15:10:29.000000000 +0100 @@ -1,6 +1,4 @@ /* - * linux/arch/x86_64/kernel/vsyscall.c - * * Copyright (C) 2001 Andrea Arcangeli SuSE * Copyright 2003 Andi Kleen, SuSE Labs. * @@ -50,12 +48,12 @@ ({unsigned long v; \ extern char __vsyscall_0; \ asm("" : "=r" (v) : "0" (x)); \ - ((v - VSYSCALL_FIRST_PAGE) + __pa_symbol(&__vsyscall_0)); }) + ((v - VSYSCALL_START) + __pa_symbol(&__vsyscall_0)); }) /* * vsyscall_gtod_data contains data that is : * - readonly from vsyscalls - * - writen by timer interrupt or systcl (/proc/sys/kernel/vsyscall64) + * - written by timer interrupt or systcl (/proc/sys/kernel/vsyscall64) * Try to keep this structure as small as possible to avoid cache line ping pongs */ int __vgetcpu_mode __section_vgetcpu_mode; @@ -66,6 +64,16 @@ struct vsyscall_gtod_data __vsyscall_gto .sysctl_enabled = 1, }; +void update_vsyscall_tz(void) +{ + unsigned long flags; + + write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); + /* sys_tz has changed */ + vsyscall_gtod_data.sys_tz = sys_tz; + write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); +} + void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) { unsigned long flags; @@ -79,8 +87,6 @@ void update_vsyscall(struct timespec *wa vsyscall_gtod_data.clock.shift = clock->shift; vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; - vsyscall_gtod_data.sys_tz = sys_tz; - vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic; write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); } @@ -166,7 +172,7 @@ time_t __vsyscall(1) vtime(time_t *t) if (unlikely(!__vsyscall_gtod_data.sysctl_enabled)) return time_syscall(t); - vgettimeofday(&tv, 0); + vgettimeofday(&tv, NULL); result = tv.tv_sec; if (t) *t = result; @@ -260,18 +266,10 @@ out: return ret; } -static int vsyscall_sysctl_nostrat(ctl_table *t, int __user *name, int nlen, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen) -{ - return -ENOSYS; -} - static ctl_table kernel_table2[] = { - { .ctl_name = 99, .procname = "vsyscall64", + { .procname = "vsyscall64", .data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int), .mode = 0644, - .strategy = vsyscall_sysctl_nostrat, .proc_handler = vsyscall_sysctl_change }, {} }; @@ -291,9 +289,9 @@ static void __cpuinit vsyscall_set_cpu(i unsigned long d; unsigned long node = 0; #ifdef CONFIG_NUMA - node = cpu_to_node[cpu]; + node = cpu_to_node(cpu); #endif - if (cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP)) + if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP)) write_rdtscp_aux((node << 12) | cpu); /* Store cpu number in limit so that it can be loaded quickly --- head-2010-05-25.orig/arch/x86/mm/fault_32-xen.c 2010-03-24 15:09:22.000000000 +0100 +++ head-2010-05-25/arch/x86/mm/fault_32-xen.c 2010-03-24 15:10:29.000000000 +0100 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -32,33 +33,27 @@ extern void die(const char *,struct pt_regs *,long); -static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain); - -int register_page_fault_notifier(struct notifier_block *nb) +#ifdef CONFIG_KPROBES +static inline int notify_page_fault(struct pt_regs *regs) { - vmalloc_sync_all(); - return atomic_notifier_chain_register(¬ify_page_fault_chain, nb); -} -EXPORT_SYMBOL_GPL(register_page_fault_notifier); + int ret = 0; -int unregister_page_fault_notifier(struct notifier_block *nb) -{ - return atomic_notifier_chain_unregister(¬ify_page_fault_chain, nb); -} -EXPORT_SYMBOL_GPL(unregister_page_fault_notifier); + /* kprobe_running() needs smp_processor_id() */ + if (!user_mode_vm(regs)) { + preempt_disable(); + if (kprobe_running() && kprobe_fault_handler(regs, 14)) + ret = 1; + preempt_enable(); + } -static inline int notify_page_fault(struct pt_regs *regs, long err) + return ret; +} +#else +static inline int notify_page_fault(struct pt_regs *regs) { - struct die_args args = { - .regs = regs, - .str = "page fault", - .err = err, - .trapnr = 14, - .signr = SIGSEGV - }; - return atomic_notifier_call_chain(¬ify_page_fault_chain, - DIE_PAGE_FAULT, &args); + return 0; } +#endif /* * Return EIP plus the CS segment base. The segment limit is also @@ -110,7 +105,7 @@ static inline unsigned long get_segment_ LDT and other horrors are only used in user space. */ if (seg & (1<<2)) { /* Must lock the LDT while reading it. */ - down(¤t->mm->context.sem); + mutex_lock(¤t->mm->context.lock); desc = current->mm->context.ldt; desc = (void *)desc + (seg & ~7); } else { @@ -123,7 +118,7 @@ static inline unsigned long get_segment_ base = get_desc_base((unsigned long *)desc); if (seg & (1<<2)) { - up(¤t->mm->context.sem); + mutex_unlock(¤t->mm->context.lock); } else put_cpu(); @@ -244,7 +239,7 @@ static void dump_fault_path(unsigned lon if (mfn_to_pfn(mfn) >= highstart_pfn) return; #endif - if (p[0] & _PAGE_PRESENT) { + if ((p[0] & _PAGE_PRESENT) && !(p[0] & _PAGE_PSE)) { page = mfn_to_pfn(mfn) << PAGE_SHIFT; p = (unsigned long *) __va(page); address &= 0x001fffff; @@ -270,7 +265,8 @@ static void dump_fault_path(unsigned lon * it's allocated already. */ if ((machine_to_phys(page) >> PAGE_SHIFT) < max_low_pfn - && (page & _PAGE_PRESENT)) { + && (page & _PAGE_PRESENT) + && !(page & _PAGE_PSE)) { page = machine_to_phys(page & PAGE_MASK); page = ((unsigned long *) __va(page))[(address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)]; @@ -416,6 +412,11 @@ fastcall void __kprobes do_page_fault(st int write, si_code; int fault; + /* + * We can fault from pretty much anywhere, with unknown IRQ state. + */ + trace_hardirqs_fixup(); + /* get the address */ address = read_cr2(); @@ -453,7 +454,7 @@ fastcall void __kprobes do_page_fault(st /* Can take a spurious fault if mapping changes R/O -> R/W. */ if (spurious_fault(regs, address, error_code)) return; - if (notify_page_fault(regs, error_code) == NOTIFY_STOP) + if (notify_page_fault(regs)) return; /* * Don't take the mm semaphore here. If we fixup a prefetch @@ -462,7 +463,7 @@ fastcall void __kprobes do_page_fault(st goto bad_area_nosemaphore; } - if (notify_page_fault(regs, error_code) == NOTIFY_STOP) + if (notify_page_fault(regs)) return; /* It's safe to allow irq's after cr2 has been saved and the vmalloc @@ -481,7 +482,7 @@ fastcall void __kprobes do_page_fault(st /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the - * kernel and should generate an OOPS. Unfortunatly, in the case of an + * kernel and should generate an OOPS. Unfortunately, in the case of an * erroneous fault occurring in a code path which already holds mmap_sem * we will deadlock attempting to validate the fault against the * address space. Luckily the kernel only validly references user @@ -489,7 +490,7 @@ fastcall void __kprobes do_page_fault(st * exceptions table. * * As the vast majority of faults will be valid we will only perform - * the source reference check when there is a possibilty of a deadlock. + * the source reference check when there is a possibility of a deadlock. * Attempt to lock the address space, if we cannot we then validate the * source. If this is invalid we can skip the address space check, * thus avoiding the deadlock. @@ -598,8 +599,8 @@ bad_area_nosemaphore: printk_ratelimit()) { printk("%s%s[%d]: segfault at %08lx eip %08lx " "esp %08lx error %lx\n", - tsk->pid > 1 ? KERN_INFO : KERN_EMERG, - tsk->comm, tsk->pid, address, regs->eip, + task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, + tsk->comm, task_pid_nr(tsk), address, regs->eip, regs->esp, error_code); } tsk->thread.cr2 = address; @@ -664,8 +665,7 @@ no_context: printk(KERN_ALERT "BUG: unable to handle kernel paging" " request"); printk(" at virtual address %08lx\n",address); - printk(KERN_ALERT " printing eip:\n"); - printk("%08lx\n", regs->eip); + printk(KERN_ALERT "printing eip: %08lx\n", regs->eip); dump_fault_path(address); } tsk->thread.cr2 = address; @@ -681,14 +681,14 @@ no_context: */ out_of_memory: up_read(&mm->mmap_sem); - if (is_init(tsk)) { + if (is_global_init(tsk)) { yield(); down_read(&mm->mmap_sem); goto survive; } printk("VM: killing process %s\n", tsk->comm); if (error_code & 4) - do_exit(SIGKILL); + do_group_exit(SIGKILL); goto no_context; do_sigbus: --- head-2010-05-25.orig/arch/x86/mm/fault_64-xen.c 2010-03-24 15:09:22.000000000 +0100 +++ head-2010-05-25/arch/x86/mm/fault_64-xen.c 2010-03-24 15:10:29.000000000 +0100 @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -40,34 +41,27 @@ #define PF_RSVD (1<<3) #define PF_INSTR (1<<4) -static ATOMIC_NOTIFIER_HEAD(notify_page_fault_chain); - -/* Hook to register for page fault notifications */ -int register_page_fault_notifier(struct notifier_block *nb) +#ifdef CONFIG_KPROBES +static inline int notify_page_fault(struct pt_regs *regs) { - vmalloc_sync_all(); - return atomic_notifier_chain_register(¬ify_page_fault_chain, nb); -} -EXPORT_SYMBOL_GPL(register_page_fault_notifier); + int ret = 0; -int unregister_page_fault_notifier(struct notifier_block *nb) -{ - return atomic_notifier_chain_unregister(¬ify_page_fault_chain, nb); -} -EXPORT_SYMBOL_GPL(unregister_page_fault_notifier); + /* kprobe_running() needs smp_processor_id() */ + if (!user_mode(regs)) { + preempt_disable(); + if (kprobe_running() && kprobe_fault_handler(regs, 14)) + ret = 1; + preempt_enable(); + } -static inline int notify_page_fault(struct pt_regs *regs, long err) + return ret; +} +#else +static inline int notify_page_fault(struct pt_regs *regs) { - struct die_args args = { - .regs = regs, - .str = "page fault", - .err = err, - .trapnr = 14, - .signr = SIGSEGV - }; - return atomic_notifier_call_chain(¬ify_page_fault_chain, - DIE_PAGE_FAULT, &args); + return 0; } +#endif /* Sometimes the CPU reports invalid exceptions on prefetch. Check that here and ignore. @@ -175,7 +169,7 @@ void dump_pagetable(unsigned long addres pmd = pmd_offset(pud, address); if (bad_address(pmd)) goto bad; printk("PMD %lx ", pmd_val(*pmd)); - if (!pmd_present(*pmd)) goto ret; + if (!pmd_present(*pmd) || pmd_large(*pmd)) goto ret; pte = pte_offset_kernel(pmd, address); if (bad_address(pte)) goto bad; @@ -294,7 +288,6 @@ static int vmalloc_fault(unsigned long a return 0; } -static int page_fault_trace; int show_unhandled_signals = 1; @@ -371,6 +364,11 @@ asmlinkage void __kprobes do_page_fault( if (!user_mode(regs)) error_code &= ~PF_USER; /* means kernel */ + /* + * We can fault from pretty much anywhere, with unknown IRQ state. + */ + trace_hardirqs_fixup(); + tsk = current; mm = tsk->mm; prefetchw(&mm->mmap_sem); @@ -408,7 +406,7 @@ asmlinkage void __kprobes do_page_fault( /* Can take a spurious fault if mapping changes R/O -> R/W. */ if (spurious_fault(regs, address, error_code)) return; - if (notify_page_fault(regs, error_code) == NOTIFY_STOP) + if (notify_page_fault(regs)) return; /* * Don't take the mm semaphore here. If we fixup a prefetch @@ -417,16 +415,12 @@ asmlinkage void __kprobes do_page_fault( goto bad_area_nosemaphore; } - if (notify_page_fault(regs, error_code) == NOTIFY_STOP) + if (notify_page_fault(regs)) return; if (likely(regs->eflags & X86_EFLAGS_IF)) local_irq_enable(); - if (unlikely(page_fault_trace)) - printk("pagefault rip:%lx rsp:%lx cs:%lu ss:%lu address %lx error %lx\n", - regs->rip,regs->rsp,regs->cs,regs->ss,address,error_code); - if (unlikely(error_code & PF_RSVD)) pgtable_bad(address, regs, error_code); @@ -447,7 +441,7 @@ asmlinkage void __kprobes do_page_fault( again: /* When running in the kernel we expect faults to occur only to * addresses in user space. All other faults represent errors in the - * kernel and should generate an OOPS. Unfortunatly, in the case of an + * kernel and should generate an OOPS. Unfortunately, in the case of an * erroneous fault occurring in a code path which already holds mmap_sem * we will deadlock attempting to validate the fault against the * address space. Luckily the kernel only validly references user @@ -455,7 +449,7 @@ asmlinkage void __kprobes do_page_fault( * exceptions table. * * As the vast majority of faults will be valid we will only perform - * the source reference check when there is a possibilty of a deadlock. + * the source reference check when there is a possibility of a deadlock. * Attempt to lock the address space, if we cannot we then validate the * source. If this is invalid we can skip the address space check, * thus avoiding the deadlock. @@ -557,7 +551,7 @@ bad_area_nosemaphore: if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && printk_ratelimit()) { printk( - "%s%s[%d]: segfault at %016lx rip %016lx rsp %016lx error %lx\n", + "%s%s[%d]: segfault at %lx rip %lx rsp %lx error %lx\n", tsk->pid > 1 ? KERN_INFO : KERN_EMERG, tsk->comm, tsk->pid, address, regs->rip, regs->rsp, error_code); @@ -623,7 +617,7 @@ no_context: */ out_of_memory: up_read(&mm->mmap_sem); - if (is_init(current)) { + if (is_global_init(current)) { yield(); goto again; } @@ -690,10 +684,3 @@ void vmalloc_sync_all(void) BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == (__START_KERNEL & PGDIR_MASK))); } - -static int __init enable_pagefaulttrace(char *str) -{ - page_fault_trace = 1; - return 1; -} -__setup("pagefaulttrace", enable_pagefaulttrace); --- head-2010-05-25.orig/arch/x86/mm/hypervisor.c 2010-03-24 15:09:15.000000000 +0100 +++ head-2010-05-25/arch/x86/mm/hypervisor.c 2010-03-24 15:10:29.000000000 +0100 @@ -500,6 +500,9 @@ int xen_create_contiguous_region( unsigned long frame, flags; unsigned int i; int rc, success; +#ifdef CONFIG_64BIT + pte_t *ptep = NULL; +#endif struct xen_memory_exchange exchange = { .in = { .nr_extents = 1UL << order, @@ -525,6 +528,27 @@ int xen_create_contiguous_region( if (unlikely(order > MAX_CONTIG_ORDER)) return -ENOMEM; +#ifdef CONFIG_64BIT + if (unlikely(vstart > PAGE_OFFSET + MAXMEM)) { + unsigned int level; + + if (vstart < __START_KERNEL_map + || vstart + (PAGE_SIZE << order) > (unsigned long)_end) + return -EINVAL; + ptep = lookup_address((unsigned long)__va(__pa(vstart)), + &level); + if (ptep && pte_none(*ptep)) + ptep = NULL; + if (vstart < __START_KERNEL && ptep) + return -EINVAL; + if (order > MAX_CONTIG_ORDER - 1) + return -ENOMEM; + } +#else + if (unlikely(vstart + (PAGE_SIZE << order) > (unsigned long)high_memory)) + return -EINVAL; +#endif + set_xen_guest_handle(exchange.in.extent_start, in_frames); set_xen_guest_handle(exchange.out.extent_start, &out_frame); @@ -537,9 +561,19 @@ int xen_create_contiguous_region( in_frames[i] = pfn_to_mfn((__pa(vstart) >> PAGE_SHIFT) + i); MULTI_update_va_mapping(cr_mcl + i, vstart + (i*PAGE_SIZE), __pte_ma(0), 0); +#ifdef CONFIG_64BIT + if (ptep) + MULTI_update_va_mapping(cr_mcl + i + (1U << order), + (unsigned long)__va(__pa(vstart)) + (i*PAGE_SIZE), + __pte_ma(0), 0); +#endif set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i, INVALID_P2M_ENTRY); } +#ifdef CONFIG_64BIT + if (ptep) + i += i; +#endif if (HYPERVISOR_multicall_check(cr_mcl, i, NULL)) BUG(); @@ -573,9 +607,18 @@ int xen_create_contiguous_region( frame = success ? (out_frame + i) : in_frames[i]; MULTI_update_va_mapping(cr_mcl + i, vstart + (i*PAGE_SIZE), pfn_pte_ma(frame, PAGE_KERNEL), 0); +#ifdef CONFIG_64BIT + if (ptep) + MULTI_update_va_mapping(cr_mcl + i + (1U << order), + (unsigned long)__va(__pa(vstart)) + (i*PAGE_SIZE), + pfn_pte_ma(frame, PAGE_KERNEL_RO), 0); +#endif set_phys_to_machine((__pa(vstart)>>PAGE_SHIFT)+i, frame); } - +#ifdef CONFIG_64BIT + if (ptep) + i += i; +#endif cr_mcl[i - 1].args[MULTI_UVMFLAGS_INDEX] = order ? UVMF_TLB_FLUSH|UVMF_ALL : UVMF_INVLPG|UVMF_ALL; --- head-2010-05-25.orig/arch/x86/mm/init_32-xen.c 2010-03-24 15:09:22.000000000 +0100 +++ head-2010-05-25/arch/x86/mm/init_32-xen.c 2010-03-24 15:10:29.000000000 +0100 @@ -94,7 +94,14 @@ static pte_t * __init one_page_table_ini #else if (!(__pmd_val(*pmd) & _PAGE_PRESENT)) { #endif - pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); + pte_t *page_table = NULL; + +#ifdef CONFIG_DEBUG_PAGEALLOC + page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); +#endif + if (!page_table) + page_table = + (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE); paravirt_alloc_pt(&init_mm, __pa(page_table) >> PAGE_SHIFT); make_lowmem_page_readonly(page_table, @@ -102,7 +109,7 @@ static pte_t * __init one_page_table_ini set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); BUG_ON(page_table != pte_offset_kernel(pmd, 0)); } - + return pte_offset_kernel(pmd, 0); } @@ -360,8 +367,13 @@ static void __init set_highmem_pages_ini { int pfn; for (pfn = highstart_pfn; pfn < highend_pfn - && pfn < xen_start_info->nr_pages; pfn++) - add_one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro); + && pfn < xen_start_info->nr_pages; pfn++) { + /* + * Holes under sparsemem might not have no mem_map[]: + */ + if (pfn_valid(pfn)) + add_one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro); + } /* XEN: init high-mem pages outside initial allocation. */ for (; pfn < highend_pfn; pfn++) { @@ -785,35 +797,18 @@ int arch_add_memory(int nid, u64 start, return __add_pages(zone, start_pfn, nr_pages); } -int remove_memory(u64 start, u64 size) -{ - return -EINVAL; -} -EXPORT_SYMBOL_GPL(remove_memory); #endif struct kmem_cache *pmd_cache; void __init pgtable_cache_init(void) { - size_t pgd_size = PTRS_PER_PGD*sizeof(pgd_t); - - if (PTRS_PER_PMD > 1) { + if (PTRS_PER_PMD > 1) pmd_cache = kmem_cache_create("pmd", - PTRS_PER_PMD*sizeof(pmd_t), - PTRS_PER_PMD*sizeof(pmd_t), - SLAB_PANIC, - pmd_ctor); - if (!SHARED_KERNEL_PMD) { - /* If we're in PAE mode and have a non-shared - kernel pmd, then the pgd size must be a - page size. This is because the pgd_list - links through the page structure, so there - can only be one pgd per page for this to - work. */ - pgd_size = PAGE_SIZE; - } - } + PTRS_PER_PMD*sizeof(pmd_t), + PTRS_PER_PMD*sizeof(pmd_t), + SLAB_PANIC, + pmd_ctor); } /* --- head-2010-05-25.orig/arch/x86/mm/init_64-xen.c 2010-04-29 09:47:49.000000000 +0200 +++ head-2010-05-25/arch/x86/mm/init_64-xen.c 2010-04-29 09:48:00.000000000 +0200 @@ -798,7 +798,7 @@ static void xen_finish_init_mapping(void /* Setup the direct mapping of the physical memory at PAGE_OFFSET. This runs before bootmem is initialized and gets pages directly from the physical memory. To access them they are temporarily mapped. */ -void __meminit init_memory_mapping(unsigned long start, unsigned long end) +void __init_refok init_memory_mapping(unsigned long start, unsigned long end) { unsigned long next; @@ -932,12 +932,6 @@ error: } EXPORT_SYMBOL_GPL(arch_add_memory); -int remove_memory(u64 start, u64 size) -{ - return -EINVAL; -} -EXPORT_SYMBOL_GPL(remove_memory); - #if !defined(CONFIG_ACPI_NUMA) && defined(CONFIG_NUMA) int memory_add_physaddr_to_nid(u64 start) { @@ -1216,14 +1210,6 @@ int in_gate_area_no_task(unsigned long a return (addr >= VSYSCALL_START) && (addr < VSYSCALL_END); } -#ifndef CONFIG_XEN -void * __init alloc_bootmem_high_node(pg_data_t *pgdat, unsigned long size) -{ - return __alloc_bootmem_core(pgdat->bdata, size, - SMP_CACHE_BYTES, (4UL*1024*1024*1024), 0); -} -#endif - const char *arch_vma_name(struct vm_area_struct *vma) { if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso) @@ -1232,3 +1218,48 @@ const char *arch_vma_name(struct vm_area return "[vsyscall]"; return NULL; } + +#ifdef CONFIG_SPARSEMEM_VMEMMAP +/* + * Initialise the sparsemem vmemmap using huge-pages at the PMD level. + */ +int __meminit vmemmap_populate(struct page *start_page, + unsigned long size, int node) +{ + unsigned long addr = (unsigned long)start_page; + unsigned long end = (unsigned long)(start_page + size); + unsigned long next; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + + for (; addr < end; addr = next) { + next = pmd_addr_end(addr, end); + + pgd = vmemmap_pgd_populate(addr, node); + if (!pgd) + return -ENOMEM; + pud = vmemmap_pud_populate(pgd, addr, node); + if (!pud) + return -ENOMEM; + + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) { + pte_t entry; + void *p = vmemmap_alloc_block(PMD_SIZE, node); + if (!p) + return -ENOMEM; + + entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL); + mk_pte_huge(entry); + set_pmd(pmd, __pmd(pte_val(entry))); + + printk(KERN_DEBUG " [%lx-%lx] PMD ->%p on node %d\n", + addr, addr + PMD_SIZE - 1, p, node); + } else + vmemmap_verify((pte_t *)pmd, node, addr, next); + } + + return 0; +} +#endif --- head-2010-05-25.orig/arch/x86/mm/pageattr_64-xen.c 2010-03-24 15:09:22.000000000 +0100 +++ head-2010-05-25/arch/x86/mm/pageattr_64-xen.c 2010-03-24 15:10:29.000000000 +0100 @@ -17,9 +17,6 @@ #include #include -LIST_HEAD(mm_unpinned); -DEFINE_SPINLOCK(mm_unpinned_lock); - static void _pin_lock(struct mm_struct *mm, int lock) { if (lock) spin_lock(&mm->page_table_lock); @@ -81,8 +78,8 @@ static void _pin_lock(struct mm_struct * #define PIN_BATCH 8 static DEFINE_PER_CPU(multicall_entry_t[PIN_BATCH], pb_mcl); -static inline unsigned int mm_walk_set_prot(void *pt, pgprot_t flags, - unsigned int cpu, unsigned int seq) +static inline unsigned int pgd_walk_set_prot(void *pt, pgprot_t flags, + unsigned int cpu, unsigned int seq) { struct page *page = virt_to_page(pt); unsigned long pfn = page_to_pfn(page); @@ -100,9 +97,9 @@ static inline unsigned int mm_walk_set_p return seq; } -static void mm_walk(struct mm_struct *mm, pgprot_t flags) +static void pgd_walk(pgd_t *pgd_base, pgprot_t flags) { - pgd_t *pgd; + pgd_t *pgd = pgd_base; pud_t *pud; pmd_t *pmd; pte_t *pte; @@ -110,7 +107,6 @@ static void mm_walk(struct mm_struct *mm unsigned int cpu, seq; multicall_entry_t *mcl; - pgd = mm->pgd; cpu = get_cpu(); /* @@ -125,18 +121,18 @@ static void mm_walk(struct mm_struct *mm continue; pud = pud_offset(pgd, 0); if (PTRS_PER_PUD > 1) /* not folded */ - seq = mm_walk_set_prot(pud,flags,cpu,seq); + seq = pgd_walk_set_prot(pud,flags,cpu,seq); for (u = 0; u < PTRS_PER_PUD; u++, pud++) { if (pud_none(*pud)) continue; pmd = pmd_offset(pud, 0); if (PTRS_PER_PMD > 1) /* not folded */ - seq = mm_walk_set_prot(pmd,flags,cpu,seq); + seq = pgd_walk_set_prot(pmd,flags,cpu,seq); for (m = 0; m < PTRS_PER_PMD; m++, pmd++) { if (pmd_none(*pmd)) continue; pte = pte_offset_kernel(pmd,0); - seq = mm_walk_set_prot(pte,flags,cpu,seq); + seq = pgd_walk_set_prot(pte,flags,cpu,seq); } } } @@ -148,12 +144,12 @@ static void mm_walk(struct mm_struct *mm seq = 0; } MULTI_update_va_mapping(mcl + seq, - (unsigned long)__user_pgd(mm->pgd), - pfn_pte(virt_to_phys(__user_pgd(mm->pgd))>>PAGE_SHIFT, flags), + (unsigned long)__user_pgd(pgd_base), + pfn_pte(virt_to_phys(__user_pgd(pgd_base))>>PAGE_SHIFT, flags), 0); MULTI_update_va_mapping(mcl + seq + 1, - (unsigned long)mm->pgd, - pfn_pte(virt_to_phys(mm->pgd)>>PAGE_SHIFT, flags), + (unsigned long)pgd_base, + pfn_pte(virt_to_phys(pgd_base)>>PAGE_SHIFT, flags), UVMF_TLB_FLUSH); if (unlikely(HYPERVISOR_multicall_check(mcl, seq + 2, NULL))) BUG(); @@ -161,21 +157,35 @@ static void mm_walk(struct mm_struct *mm put_cpu(); } +static void __pgd_pin(pgd_t *pgd) +{ + pgd_walk(pgd, PAGE_KERNEL_RO); + xen_pgd_pin(__pa(pgd)); /* kernel */ + xen_pgd_pin(__pa(__user_pgd(pgd))); /* user */ + SetPagePinned(virt_to_page(pgd)); +} + +static void __pgd_unpin(pgd_t *pgd) +{ + xen_pgd_unpin(__pa(pgd)); + xen_pgd_unpin(__pa(__user_pgd(pgd))); + pgd_walk(pgd, PAGE_KERNEL); + ClearPagePinned(virt_to_page(pgd)); +} + +void pgd_test_and_unpin(pgd_t *pgd) +{ + if (PagePinned(virt_to_page(pgd))) + __pgd_unpin(pgd); +} + void mm_pin(struct mm_struct *mm) { if (xen_feature(XENFEAT_writable_page_tables)) return; pin_lock(mm); - - mm_walk(mm, PAGE_KERNEL_RO); - xen_pgd_pin(__pa(mm->pgd)); /* kernel */ - xen_pgd_pin(__pa(__user_pgd(mm->pgd))); /* user */ - SetPagePinned(virt_to_page(mm->pgd)); - spin_lock(&mm_unpinned_lock); - list_del(&mm->context.unpinned); - spin_unlock(&mm_unpinned_lock); - + __pgd_pin(mm->pgd); pin_unlock(mm); } @@ -185,34 +195,30 @@ void mm_unpin(struct mm_struct *mm) return; pin_lock(mm); - - xen_pgd_unpin(__pa(mm->pgd)); - xen_pgd_unpin(__pa(__user_pgd(mm->pgd))); - mm_walk(mm, PAGE_KERNEL); - ClearPagePinned(virt_to_page(mm->pgd)); - spin_lock(&mm_unpinned_lock); - list_add(&mm->context.unpinned, &mm_unpinned); - spin_unlock(&mm_unpinned_lock); - + __pgd_unpin(mm->pgd); pin_unlock(mm); } void mm_pin_all(void) { + struct page *page; + unsigned long flags; + if (xen_feature(XENFEAT_writable_page_tables)) return; /* - * Allow uninterrupted access to the mm_unpinned list. We don't - * actually take the mm_unpinned_lock as it is taken inside mm_pin(). + * Allow uninterrupted access to the pgd_list. Also protects + * __pgd_pin() by disabling preemption. * All other CPUs must be at a safe point (e.g., in stop_machine * or offlined entirely). */ - preempt_disable(); - while (!list_empty(&mm_unpinned)) - mm_pin(list_entry(mm_unpinned.next, struct mm_struct, - context.unpinned)); - preempt_enable(); + spin_lock_irqsave(&pgd_lock, flags); + list_for_each_entry(page, &pgd_list, lru) { + if (!PagePinned(page)) + __pgd_pin((pgd_t *)page_address(page)); + } + spin_unlock_irqrestore(&pgd_lock, flags); } void arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) @@ -331,11 +337,11 @@ static struct page *split_large_page(uns return base; } -static void cache_flush_page(void *adr) +void clflush_cache_range(void *adr, int size) { int i; - for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size) - asm volatile("clflush (%0)" :: "r" (adr + i)); + for (i = 0; i < size; i += boot_cpu_data.x86_clflush_size) + clflush(adr+i); } static void flush_kernel_map(void *arg) @@ -350,7 +356,7 @@ static void flush_kernel_map(void *arg) asm volatile("wbinvd" ::: "memory"); else list_for_each_entry(pg, l, lru) { void *adr = page_address(pg); - cache_flush_page(adr); + clflush_cache_range(adr, PAGE_SIZE); } __flush_tlb_all(); } @@ -418,6 +424,7 @@ __change_page_attr(unsigned long address split = split_large_page(address, prot, ref_prot2); if (!split) return -ENOMEM; + pgprot_val(ref_prot2) &= ~_PAGE_NX; set_pte(kpte, mk_pte(split, ref_prot2)); kpte_page = split; } @@ -510,9 +517,14 @@ void global_flush_tlb(void) struct page *pg, *next; struct list_head l; - down_read(&init_mm.mmap_sem); + /* + * Write-protect the semaphore, to exclude two contexts + * doing a list_replace_init() call in parallel and to + * exclude new additions to the deferred_pages list: + */ + down_write(&init_mm.mmap_sem); list_replace_init(&deferred_pages, &l); - up_read(&init_mm.mmap_sem); + up_write(&init_mm.mmap_sem); flush_map(&l); --- head-2010-05-25.orig/arch/x86/mm/pgtable_32-xen.c 2010-03-24 15:09:22.000000000 +0100 +++ head-2010-05-25/arch/x86/mm/pgtable_32-xen.c 2010-03-24 15:10:29.000000000 +0100 @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -46,6 +47,8 @@ void show_mem(void) for_each_online_pgdat(pgdat) { pgdat_resize_lock(pgdat, &flags); for (i = 0; i < pgdat->node_spanned_pages; ++i) { + if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) + touch_nmi_watchdog(); page = pgdat_page_nr(pgdat, i); total++; if (PageHighMem(page)) @@ -206,7 +209,7 @@ void pte_free(struct page *pte) __free_page(pte); } -void pmd_ctor(void *pmd, struct kmem_cache *cache, unsigned long flags) +void pmd_ctor(struct kmem_cache *cache, void *pmd) { memset(pmd, 0, PTRS_PER_PMD*sizeof(pmd_t)); } --- head-2010-05-25.orig/arch/x86/pci/irq-xen.c 2010-03-24 15:09:22.000000000 +0100 +++ head-2010-05-25/arch/x86/pci/irq-xen.c 2010-03-24 15:10:29.000000000 +0100 @@ -173,7 +173,7 @@ void eisa_set_level_irq(unsigned int irq } /* - * Common IRQ routing practice: nybbles in config space, + * Common IRQ routing practice: nibbles in config space, * offset by some magic constant. */ static unsigned int read_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr) @@ -496,6 +496,26 @@ static int pirq_amd756_set(struct pci_de return 1; } +/* + * PicoPower PT86C523 + */ +static int pirq_pico_get(struct pci_dev *router, struct pci_dev *dev, int pirq) +{ + outb(0x10 + ((pirq - 1) >> 1), 0x24); + return ((pirq - 1) & 1) ? (inb(0x26) >> 4) : (inb(0x26) & 0xf); +} + +static int pirq_pico_set(struct pci_dev *router, struct pci_dev *dev, int pirq, + int irq) +{ + unsigned int x; + outb(0x10 + ((pirq - 1) >> 1), 0x24); + x = inb(0x26); + x = ((pirq - 1) & 1) ? ((x & 0x0f) | (irq << 4)) : ((x & 0xf0) | (irq)); + outb(x, 0x26); + return 1; +} + #ifdef CONFIG_PCI_BIOS static int pirq_bios_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) @@ -569,7 +589,7 @@ static __init int via_router_probe(struc /* FIXME: We should move some of the quirk fixup stuff here */ /* - * work arounds for some buggy BIOSes + * workarounds for some buggy BIOSes */ if (device == PCI_DEVICE_ID_VIA_82C586_0) { switch(router->device) { @@ -725,6 +745,24 @@ static __init int amd_router_probe(struc return 1; } +static __init int pico_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) +{ + switch (device) { + case PCI_DEVICE_ID_PICOPOWER_PT86C523: + r->name = "PicoPower PT86C523"; + r->get = pirq_pico_get; + r->set = pirq_pico_set; + return 1; + + case PCI_DEVICE_ID_PICOPOWER_PT86C523BBP: + r->name = "PicoPower PT86C523 rev. BB+"; + r->get = pirq_pico_get; + r->set = pirq_pico_set; + return 1; + } + return 0; +} + static __initdata struct irq_router_handler pirq_routers[] = { { PCI_VENDOR_ID_INTEL, intel_router_probe }, { PCI_VENDOR_ID_AL, ali_router_probe }, @@ -736,6 +774,7 @@ static __initdata struct irq_router_hand { PCI_VENDOR_ID_VLSI, vlsi_router_probe }, { PCI_VENDOR_ID_SERVERWORKS, serverworks_router_probe }, { PCI_VENDOR_ID_AMD, amd_router_probe }, + { PCI_VENDOR_ID_PICOPOWER, pico_router_probe }, /* Someone with docs needs to add the ATI Radeon IGP */ { 0, NULL } }; @@ -1014,7 +1053,7 @@ static void __init pcibios_fixup_irqs(vo * Work around broken HP Pavilion Notebooks which assign USB to * IRQ 9 even though it is actually wired to IRQ 11 */ -static int __init fix_broken_hp_bios_irq9(struct dmi_system_id *d) +static int __init fix_broken_hp_bios_irq9(const struct dmi_system_id *d) { if (!broken_hp_bios_irq9) { broken_hp_bios_irq9 = 1; @@ -1027,7 +1066,7 @@ static int __init fix_broken_hp_bios_irq * Work around broken Acer TravelMate 360 Notebooks which assign * Cardbus to IRQ 11 even though it is actually wired to IRQ 10 */ -static int __init fix_acer_tm360_irqrouting(struct dmi_system_id *d) +static int __init fix_acer_tm360_irqrouting(const struct dmi_system_id *d) { if (!acer_tm360_irqrouting) { acer_tm360_irqrouting = 1; --- head-2010-05-25.orig/drivers/acpi/processor_idle.c 2010-04-15 09:43:01.000000000 +0200 +++ head-2010-05-25/drivers/acpi/processor_idle.c 2010-04-15 09:55:39.000000000 +0200 @@ -1159,6 +1159,14 @@ int acpi_processor_cst_has_changed(struc if (!pr->flags.power_setup_done) return -ENODEV; + if (processor_pm_external()) { + pr->flags.power = 0; + ret = acpi_processor_get_power_info(pr); + processor_notify_external(pr, + PROCESSOR_PM_CHANGE, PM_TYPE_IDLE); + return ret; + } + cpuidle_pause_and_lock(); cpuidle_disable_device(&pr->power.dev); acpi_processor_get_power_info(pr); --- head-2010-05-25.orig/drivers/cpuidle/Kconfig 2010-05-25 09:12:09.000000000 +0200 +++ head-2010-05-25/drivers/cpuidle/Kconfig 2010-03-24 15:10:29.000000000 +0100 @@ -1,6 +1,7 @@ config CPU_IDLE bool "CPU idle PM support" + depends on !PROCESSOR_EXTERNAL_CONTROL default ACPI help CPU idle is a generic framework for supporting software-controlled --- head-2010-05-25.orig/drivers/pci/msi-xen.c 2010-03-24 15:09:15.000000000 +0100 +++ head-2010-05-25/drivers/pci/msi-xen.c 2010-03-24 15:10:29.000000000 +0100 @@ -260,6 +260,12 @@ static int msi_map_vector(struct pci_dev map_irq.pirq : evtchn_map_pirq(-1, map_irq.pirq)); } +static void pci_intx_for_msi(struct pci_dev *dev, int enable) +{ + if (!(dev->dev_flags & PCI_DEV_FLAGS_MSI_INTX_DISABLE_BUG)) + pci_intx(dev, enable); +} + #ifdef CONFIG_PM void pci_restore_msi_state(struct pci_dev *dev) { @@ -269,7 +275,7 @@ void pci_restore_msi_state(struct pci_de if (!dev->msi_enabled && !dev->msix_enabled) return; - pci_intx(dev, 0); /* disable intx */ + pci_intx_for_msi(dev, 0); if (dev->msi_enabled) msi_set_enable(dev, 0); if (dev->msix_enabled) @@ -306,7 +312,7 @@ static int msi_capability_init(struct pc return -EBUSY; /* Set MSI enabled bits */ - pci_intx(dev, 0); /* disable intx */ + pci_intx_for_msi(dev, 0); msi_set_enable(dev, 1); dev->msi_enabled = 1; @@ -380,7 +386,7 @@ static int msix_capability_init(struct p return avail; } - pci_intx(dev, 0); /* disable intx */ + pci_intx_for_msi(dev, 0); msix_set_enable(dev, 1); dev->msix_enabled = 1; @@ -516,7 +522,7 @@ void pci_disable_msi(struct pci_dev* dev /* Disable MSI mode */ msi_set_enable(dev, 0); - pci_intx(dev, 1); /* enable intx */ + pci_intx_for_msi(dev, 1); dev->msi_enabled = 0; } EXPORT_SYMBOL(pci_disable_msi); @@ -653,7 +659,7 @@ void pci_disable_msix(struct pci_dev* de /* Disable MSI mode */ msix_set_enable(dev, 0); - pci_intx(dev, 1); /* enable intx */ + pci_intx_for_msi(dev, 1); dev->msix_enabled = 0; } EXPORT_SYMBOL(pci_disable_msix); --- head-2010-05-25.orig/drivers/oprofile/cpu_buffer.c 2010-03-24 15:02:17.000000000 +0100 +++ head-2010-05-25/drivers/oprofile/cpu_buffer.c 2010-03-24 15:10:29.000000000 +0100 @@ -442,6 +442,39 @@ void oprofile_add_pc(unsigned long pc, i log_sample(cpu_buf, pc, 0, is_kernel, event); } +#ifdef CONFIG_XEN +/* + * This is basically log_sample(b, ESCAPE_CODE, cpu_mode, CPU_TRACE_BEGIN), + * as was previously accessible through oprofile_add_pc(). + */ +void oprofile_add_mode(int cpu_mode) +{ + struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer); + struct task_struct *task; + + if (nr_available_slots(cpu_buf) < 3) { + cpu_buf->sample_lost_overflow++; + return; + } + + task = current; + + /* notice a switch from user->kernel or vice versa */ + if (cpu_buf->last_cpu_mode != cpu_mode) { + cpu_buf->last_cpu_mode = cpu_mode; + add_code(cpu_buf, cpu_mode); + } + + /* notice a task switch */ + if (cpu_buf->last_task != task) { + cpu_buf->last_task = task; + add_code(cpu_buf, (unsigned long)task); + } + + add_code(cpu_buf, CPU_TRACE_BEGIN); +} +#endif + void oprofile_add_trace(unsigned long pc) { struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(op_cpu_buffer); --- head-2010-05-25.orig/drivers/xen/balloon/balloon.c 2010-03-24 15:09:22.000000000 +0100 +++ head-2010-05-25/drivers/xen/balloon/balloon.c 2010-03-24 15:10:29.000000000 +0100 @@ -324,6 +324,8 @@ static int increase_reservation(unsigned #ifndef MODULE setup_per_zone_pages_min(); + if (rc > 0) + kswapd_run(0); if (need_zonelists_rebuild) build_all_zonelists(); else --- head-2010-05-25.orig/drivers/xen/blkback/blkback.c 2010-03-24 15:09:22.000000000 +0100 +++ head-2010-05-25/drivers/xen/blkback/blkback.c 2010-03-24 15:10:29.000000000 +0100 @@ -275,13 +275,10 @@ static void __end_block_io_op(pending_re } } -static int end_block_io_op(struct bio *bio, unsigned int done, int error) +static void end_block_io_op(struct bio *bio, int error) { - if (bio->bi_size != 0) - return 1; __end_block_io_op(bio->bi_private, error); bio_put(bio); - return error; } --- head-2010-05-25.orig/drivers/xen/blkfront/blkfront.c 2010-03-24 15:09:22.000000000 +0100 +++ head-2010-05-25/drivers/xen/blkfront/blkfront.c 2010-03-24 15:10:29.000000000 +0100 @@ -233,7 +233,7 @@ static int setup_blkring(struct xenbus_d SHARED_RING_INIT(sring); FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); - memset(info->sg, 0, sizeof(info->sg)); + sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST); err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring)); if (err < 0) { @@ -651,9 +651,8 @@ static int blkif_queue_request(struct re ring_req->nr_segments = blk_rq_map_sg(req->q, req, info->sg); BUG_ON(ring_req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST); - for (i = 0; i < ring_req->nr_segments; ++i) { - sg = info->sg + i; - buffer_mfn = page_to_phys(sg->page) >> PAGE_SHIFT; + for_each_sg(info->sg, sg, ring_req->nr_segments, i) { + buffer_mfn = page_to_phys(sg_page(sg)) >> PAGE_SHIFT; fsect = sg->offset >> 9; lsect = fsect + (sg->length >> 9) - 1; /* install a grant reference. */ --- head-2010-05-25.orig/drivers/xen/blktap2/control.c 2010-05-19 17:51:54.000000000 +0200 +++ head-2010-05-25/drivers/xen/blktap2/control.c 2010-03-24 15:10:29.000000000 +0100 @@ -18,6 +18,7 @@ blktap_control_initialize_tap(struct blk memset(tap, 0, sizeof(*tap)); set_bit(BLKTAP_CONTROL, &tap->dev_inuse); init_rwsem(&tap->tap_sem); + sg_init_table(tap->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST); init_waitqueue_head(&tap->wq); atomic_set(&tap->refcnt, 0); --- head-2010-05-25.orig/drivers/xen/blktap2/device.c 2010-03-24 15:09:22.000000000 +0100 +++ head-2010-05-25/drivers/xen/blktap2/device.c 2010-03-24 15:10:29.000000000 +0100 @@ -665,8 +665,7 @@ blktap_device_process_request(struct blk request->nr_pages = 0; blkif_req.nr_segments = blk_rq_map_sg(req->q, req, tap->sg); BUG_ON(blkif_req.nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST); - for (i = 0; i < blkif_req.nr_segments; ++i) { - sg = tap->sg + i; + for_each_sg(tap->sg, sg, blkif_req.nr_segments, i) { fsect = sg->offset >> 9; lsect = fsect + (sg->length >> 9) - 1; nr_sects += sg->length >> 9; @@ -677,13 +676,13 @@ blktap_device_process_request(struct blk .first_sect = fsect, .last_sect = lsect }; - if (PageBlkback(sg->page)) { + if (PageBlkback(sg_page(sg))) { /* foreign page -- use xen */ if (blktap_prep_foreign(tap, request, &blkif_req, i, - sg->page, + sg_page(sg), &table)) goto out; } else { @@ -691,7 +690,7 @@ blktap_device_process_request(struct blk if (blktap_map(tap, request, i, - sg->page)) + sg_page(sg))) goto out; } --- head-2010-05-25.orig/drivers/xen/core/firmware.c 2007-06-22 09:08:06.000000000 +0200 +++ head-2010-05-25/drivers/xen/core/firmware.c 2010-03-24 15:10:29.000000000 +0100 @@ -1,4 +1,5 @@ #include +#include #include #include #include --- head-2010-05-25.orig/drivers/xen/core/machine_kexec.c 2009-07-13 14:25:35.000000000 +0200 +++ head-2010-05-25/drivers/xen/core/machine_kexec.c 2010-03-24 15:10:29.000000000 +0100 @@ -29,6 +29,10 @@ void __init xen_machine_kexec_setup_reso int k = 0; int rc; + if (strstr(boot_command_line, "crashkernel=")) + printk(KERN_WARNING "Ignoring crashkernel command line, " + "parameter will be supplied by xen\n"); + if (!is_initial_xendomain()) return; @@ -130,6 +134,13 @@ void __init xen_machine_kexec_setup_reso xen_max_nr_phys_cpus)) goto err; +#ifdef CONFIG_X86 + if (xen_create_contiguous_region((unsigned long)&vmcoreinfo_note, + get_order(sizeof(vmcoreinfo_note)), + BITS_PER_LONG)) + goto err; +#endif + return; err: @@ -213,6 +224,13 @@ NORET_TYPE void machine_kexec(struct kim panic("KEXEC_CMD_kexec hypercall should not return\n"); } +#ifdef CONFIG_X86 +unsigned long paddr_vmcoreinfo_note(void) +{ + return virt_to_machine(&vmcoreinfo_note); +} +#endif + void machine_shutdown(void) { /* do nothing */ --- head-2010-05-25.orig/drivers/xen/core/smpboot.c 2010-03-24 15:09:15.000000000 +0100 +++ head-2010-05-25/drivers/xen/core/smpboot.c 2010-03-24 15:10:29.000000000 +0100 @@ -45,8 +45,8 @@ cpumask_t cpu_possible_map; EXPORT_SYMBOL(cpu_possible_map); cpumask_t cpu_initialized_map; -struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; -EXPORT_SYMBOL(cpu_data); +DEFINE_PER_CPU(struct cpuinfo_x86, cpu_info); +EXPORT_PER_CPU_SYMBOL(cpu_info); static DEFINE_PER_CPU(int, resched_irq); static DEFINE_PER_CPU(int, callfunc_irq); @@ -55,12 +55,12 @@ static char callfunc_name[NR_CPUS][15]; u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; -cpumask_t cpu_sibling_map[NR_CPUS] __cacheline_aligned; -cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned; +DEFINE_PER_CPU(cpumask_t, cpu_sibling_map); +DEFINE_PER_CPU(cpumask_t, cpu_core_map); #if defined(__i386__) -u8 x86_cpu_to_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = 0xff }; -EXPORT_SYMBOL(x86_cpu_to_apicid); +DEFINE_PER_CPU(u8, x86_cpu_to_apicid) = BAD_APICID; +EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid); #endif void __init prefill_possible_map(void) @@ -85,25 +85,25 @@ void __init smp_alloc_memory(void) static inline void set_cpu_sibling_map(unsigned int cpu) { - cpu_data[cpu].phys_proc_id = cpu; - cpu_data[cpu].cpu_core_id = 0; + cpu_data(cpu).phys_proc_id = cpu; + cpu_data(cpu).cpu_core_id = 0; - cpu_sibling_map[cpu] = cpumask_of_cpu(cpu); - cpu_core_map[cpu] = cpumask_of_cpu(cpu); + per_cpu(cpu_sibling_map, cpu) = cpumask_of_cpu(cpu); + per_cpu(cpu_core_map, cpu) = cpumask_of_cpu(cpu); - cpu_data[cpu].booted_cores = 1; + cpu_data(cpu).booted_cores = 1; } static void remove_siblinginfo(unsigned int cpu) { - cpu_data[cpu].phys_proc_id = BAD_APICID; - cpu_data[cpu].cpu_core_id = BAD_APICID; + cpu_data(cpu).phys_proc_id = BAD_APICID; + cpu_data(cpu).cpu_core_id = BAD_APICID; - cpus_clear(cpu_sibling_map[cpu]); - cpus_clear(cpu_core_map[cpu]); + cpus_clear(per_cpu(cpu_sibling_map, cpu)); + cpus_clear(per_cpu(cpu_core_map, cpu)); - cpu_data[cpu].booted_cores = 0; + cpu_data(cpu).booted_cores = 0; } static int __cpuinit xen_smp_intr_init(unsigned int cpu) @@ -162,9 +162,9 @@ void __cpuinit cpu_bringup(void) { cpu_init(); #ifdef __i386__ - identify_secondary_cpu(cpu_data + smp_processor_id()); + identify_secondary_cpu(¤t_cpu_data); #else - identify_cpu(cpu_data + smp_processor_id()); + identify_cpu(¤t_cpu_data); #endif touch_softlockup_watchdog(); preempt_disable(); @@ -265,16 +265,16 @@ void __init smp_prepare_cpus(unsigned in if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, 0, &cpu_id) == 0) apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id); boot_cpu_data.apicid = apicid; - cpu_data[0] = boot_cpu_data; + cpu_data(0) = boot_cpu_data; cpu_2_logical_apicid[0] = apicid; - x86_cpu_to_apicid[0] = apicid; + per_cpu(x86_cpu_to_apicid, 0) = apicid; current_thread_info()->cpu = 0; for (cpu = 0; cpu < NR_CPUS; cpu++) { - cpus_clear(cpu_sibling_map[cpu]); - cpus_clear(cpu_core_map[cpu]); + cpus_clear(per_cpu(cpu_sibling_map, cpu)); + cpus_clear(per_cpu(cpu_core_map, cpu)); } set_cpu_sibling_map(0); @@ -319,11 +319,12 @@ void __init smp_prepare_cpus(unsigned in apicid = cpu; if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0) apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id); - cpu_data[cpu] = boot_cpu_data; - cpu_data[cpu].apicid = apicid; + cpu_data(cpu) = boot_cpu_data; + cpu_data(cpu).cpu_index = cpu; + cpu_data(cpu).apicid = apicid; cpu_2_logical_apicid[cpu] = apicid; - x86_cpu_to_apicid[cpu] = apicid; + per_cpu(x86_cpu_to_apicid, cpu) = apicid; #ifdef __x86_64__ cpu_pda(cpu)->pcurrent = idle; --- head-2010-05-25.orig/drivers/xen/netback/loopback.c 2010-03-24 15:08:58.000000000 +0100 +++ head-2010-05-25/drivers/xen/netback/loopback.c 2010-03-24 15:10:29.000000000 +0100 @@ -285,9 +285,9 @@ static void __exit clean_loopback(int i) char dev_name[IFNAMSIZ]; sprintf(dev_name, "vif0.%d", i); - dev1 = dev_get_by_name(dev_name); + dev1 = dev_get_by_name(&init_net, dev_name); sprintf(dev_name, "veth%d", i); - dev2 = dev_get_by_name(dev_name); + dev2 = dev_get_by_name(&init_net, dev_name); if (dev1 && dev2) { unregister_netdev(dev2); unregister_netdev(dev1); --- head-2010-05-25.orig/drivers/xen/netback/netback.c 2010-03-24 15:09:15.000000000 +0100 +++ head-2010-05-25/drivers/xen/netback/netback.c 2010-03-24 15:10:29.000000000 +0100 @@ -353,8 +353,8 @@ static void xen_network_done_notify(void { static struct net_device *eth0_dev = NULL; if (unlikely(eth0_dev == NULL)) - eth0_dev = __dev_get_by_name("eth0"); - netif_rx_schedule(eth0_dev); + eth0_dev = __dev_get_by_name(&init_net, "eth0"); + netif_rx_schedule(eth0_dev, ???); } /* * Add following to poll() function in NAPI driver (Tigon3 is example): --- head-2010-05-25.orig/drivers/xen/netback/xenbus.c 2010-03-24 15:09:08.000000000 +0100 +++ head-2010-05-25/drivers/xen/netback/xenbus.c 2010-03-24 15:10:29.000000000 +0100 @@ -149,12 +149,10 @@ fail: * and vif variables to the environment, for the benefit of the vif-* hotplug * scripts. */ -static int netback_uevent(struct xenbus_device *xdev, char **envp, - int num_envp, char *buffer, int buffer_size) +static int netback_uevent(struct xenbus_device *xdev, struct kobj_uevent_env *env) { struct backend_info *be = xdev->dev.driver_data; netif_t *netif = be->netif; - int i = 0, length = 0; char *val; DPRINTK("netback_uevent"); @@ -166,15 +164,11 @@ static int netback_uevent(struct xenbus_ return err; } else { - add_uevent_var(envp, num_envp, &i, buffer, buffer_size, - &length, "script=%s", val); + add_uevent_var(env, "script=%s", val); kfree(val); } - add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, - "vif=%s", netif->dev->name); - - envp[i] = NULL; + add_uevent_var(env, "vif=%s", netif->dev->name); return 0; } --- head-2010-05-25.orig/drivers/xen/netfront/accel.c 2009-05-04 10:01:03.000000000 +0200 +++ head-2010-05-25/drivers/xen/netfront/accel.c 2010-03-24 15:10:29.000000000 +0100 @@ -325,7 +325,7 @@ accelerator_set_vif_state_hooks(struct n DPRINTK("%p\n",vif_state); /* Make sure there are no data path operations going on */ - netif_poll_disable(vif_state->np->netdev); + napi_disable(&vif_state->np->napi); netif_tx_lock_bh(vif_state->np->netdev); accelerator = vif_state->np->accelerator; @@ -334,7 +334,7 @@ accelerator_set_vif_state_hooks(struct n spin_unlock_irqrestore(&accelerator->vif_states_lock, flags); netif_tx_unlock_bh(vif_state->np->netdev); - netif_poll_enable(vif_state->np->netdev); + napi_enable(&vif_state->np->napi); } @@ -508,7 +508,7 @@ accelerator_remove_single_hook(struct ne unsigned long flags; /* Make sure there are no data path operations going on */ - netif_poll_disable(vif_state->np->netdev); + napi_disable(&vif_state->np->napi); netif_tx_lock_bh(vif_state->np->netdev); spin_lock_irqsave(&accelerator->vif_states_lock, flags); @@ -524,7 +524,7 @@ accelerator_remove_single_hook(struct ne spin_unlock_irqrestore(&accelerator->vif_states_lock, flags); netif_tx_unlock_bh(vif_state->np->netdev); - netif_poll_enable(vif_state->np->netdev); + napi_enable(&vif_state->np->napi); } --- head-2010-05-25.orig/drivers/xen/netfront/netfront.c 2010-03-24 15:09:15.000000000 +0100 +++ head-2010-05-25/drivers/xen/netfront/netfront.c 2010-03-24 15:10:29.000000000 +0100 @@ -626,6 +626,7 @@ static int network_open(struct net_devic struct netfront_info *np = netdev_priv(dev); memset(&np->stats, 0, sizeof(np->stats)); + napi_enable(&np->napi); spin_lock_bh(&np->rx_lock); if (netfront_carrier_ok(np)) { @@ -634,7 +635,7 @@ static int network_open(struct net_devic if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)){ netfront_accelerator_call_stop_napi_irq(np, dev); - netif_rx_schedule(dev); + netif_rx_schedule(dev, &np->napi); } } spin_unlock_bh(&np->rx_lock); @@ -706,7 +707,7 @@ static void rx_refill_timeout(unsigned l netfront_accelerator_call_stop_napi_irq(np, dev); - netif_rx_schedule(dev); + netif_rx_schedule(dev, &np->napi); } static void network_alloc_rx_buffers(struct net_device *dev) @@ -1063,7 +1064,7 @@ static irqreturn_t netif_int(int irq, vo if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx)) { netfront_accelerator_call_stop_napi_irq(np, dev); - netif_rx_schedule(dev); + netif_rx_schedule(dev, &np->napi); dev->last_rx = jiffies; } } @@ -1316,16 +1317,17 @@ static int xennet_set_skb_gso(struct sk_ #endif } -static int netif_poll(struct net_device *dev, int *pbudget) +static int netif_poll(struct napi_struct *napi, int budget) { - struct netfront_info *np = netdev_priv(dev); + struct netfront_info *np = container_of(napi, struct netfront_info, napi); + struct net_device *dev = np->netdev; struct sk_buff *skb; struct netfront_rx_info rinfo; struct netif_rx_response *rx = &rinfo.rx; struct netif_extra_info *extras = rinfo.extras; RING_IDX i, rp; struct multicall_entry *mcl; - int work_done, budget, more_to_do = 1, accel_more_to_do = 1; + int work_done, more_to_do = 1, accel_more_to_do = 1; struct sk_buff_head rxq; struct sk_buff_head errq; struct sk_buff_head tmpq; @@ -1345,8 +1347,6 @@ static int netif_poll(struct net_device skb_queue_head_init(&errq); skb_queue_head_init(&tmpq); - if ((budget = *pbudget) > dev->quota) - budget = dev->quota; rp = np->rx.sring->rsp_prod; rmb(); /* Ensure we see queued responses up to 'rp'. */ @@ -1508,9 +1508,6 @@ err: accel_more_to_do = 0; } - *pbudget -= work_done; - dev->quota -= work_done; - if (work_done < budget) { local_irq_save(flags); @@ -1527,14 +1524,14 @@ err: } if (!more_to_do && !accel_more_to_do) - __netif_rx_complete(dev); + __netif_rx_complete(dev, napi); local_irq_restore(flags); } spin_unlock(&np->rx_lock); - return more_to_do | accel_more_to_do; + return work_done; } static void netif_release_tx_bufs(struct netfront_info *np) @@ -1681,6 +1678,7 @@ static int network_close(struct net_devi { struct netfront_info *np = netdev_priv(dev); netif_stop_queue(np->netdev); + napi_disable(&np->napi); return 0; } @@ -2088,16 +2086,14 @@ static struct net_device * __devinit cre netdev->hard_start_xmit = network_start_xmit; netdev->stop = network_close; netdev->get_stats = network_get_stats; - netdev->poll = netif_poll; + netif_napi_add(netdev, &np->napi, netif_poll, 64); netdev->set_multicast_list = network_set_multicast_list; netdev->uninit = netif_uninit; netdev->set_mac_address = xennet_set_mac_address; netdev->change_mtu = xennet_change_mtu; - netdev->weight = 64; netdev->features = NETIF_F_IP_CSUM; SET_ETHTOOL_OPS(netdev, &network_ethtool_ops); - SET_MODULE_OWNER(netdev); SET_NETDEV_DEV(netdev, &dev->dev); np->netdev = netdev; --- head-2010-05-25.orig/drivers/xen/netfront/netfront.h 2010-02-24 13:13:46.000000000 +0100 +++ head-2010-05-25/drivers/xen/netfront/netfront.h 2010-03-24 15:10:29.000000000 +0100 @@ -157,6 +157,8 @@ struct netfront_info { spinlock_t tx_lock; spinlock_t rx_lock; + struct napi_struct napi; + unsigned int irq; unsigned int copying_receiver; unsigned int carrier; --- head-2010-05-25.orig/drivers/xen/pciback/Makefile 2008-07-21 11:00:33.000000000 +0200 +++ head-2010-05-25/drivers/xen/pciback/Makefile 2010-03-24 15:10:29.000000000 +0100 @@ -12,6 +12,4 @@ pciback-$(CONFIG_XEN_PCIDEV_BACKEND_SLOT pciback-$(CONFIG_XEN_PCIDEV_BACKEND_PASS) += passthrough.o pciback-$(CONFIG_XEN_PCIDEV_BACKEND_CONTROLLER) += controller.o -ifeq ($(CONFIG_XEN_PCIDEV_BE_DEBUG),y) -EXTRA_CFLAGS += -DDEBUG -endif +ccflags-$(CONFIG_XEN_PCIDEV_BE_DEBUG) += -DDEBUG --- head-2010-05-25.orig/drivers/xen/pcifront/Makefile 2007-06-12 13:13:45.000000000 +0200 +++ head-2010-05-25/drivers/xen/pcifront/Makefile 2010-03-24 15:10:29.000000000 +0100 @@ -2,6 +2,4 @@ obj-y += pcifront.o pcifront-y := pci_op.o xenbus.o pci.o -ifeq ($(CONFIG_XEN_PCIDEV_FE_DEBUG),y) -EXTRA_CFLAGS += -DDEBUG -endif +ccflags-$(CONFIG_XEN_PCIDEV_FE_DEBUG) += -DDEBUG --- head-2010-05-25.orig/drivers/xen/scsiback/emulate.c 2009-03-18 10:39:32.000000000 +0100 +++ head-2010-05-25/drivers/xen/scsiback/emulate.c 2010-03-24 15:10:29.000000000 +0100 @@ -109,9 +109,10 @@ static void resp_not_supported_cmd(pendi } -static int __copy_to_sg(struct scatterlist *sg, unsigned int nr_sg, +static int __copy_to_sg(struct scatterlist *sgl, unsigned int nr_sg, void *buf, unsigned int buflen) { + struct scatterlist *sg; void *from = buf; void *to; unsigned int from_rest = buflen; @@ -120,8 +121,8 @@ static int __copy_to_sg(struct scatterli unsigned int i; unsigned long pfn; - for (i = 0; i < nr_sg; i++) { - if (sg->page == NULL) { + for_each_sg (sgl, sg, nr_sg, i) { + if (sg_page(sg) == NULL) { printk(KERN_WARNING "%s: inconsistent length field in " "scatterlist\n", __FUNCTION__); return -ENOMEM; @@ -130,7 +131,7 @@ static int __copy_to_sg(struct scatterli to_capa = sg->length; copy_size = min_t(unsigned int, to_capa, from_rest); - pfn = page_to_pfn(sg->page); + pfn = page_to_pfn(sg_page(sg)); to = pfn_to_kaddr(pfn) + (sg->offset); memcpy(to, from, copy_size); @@ -139,7 +140,6 @@ static int __copy_to_sg(struct scatterli return 0; } - sg++; from += copy_size; } @@ -148,9 +148,10 @@ static int __copy_to_sg(struct scatterli return -ENOMEM; } -static int __copy_from_sg(struct scatterlist *sg, unsigned int nr_sg, +static int __copy_from_sg(struct scatterlist *sgl, unsigned int nr_sg, void *buf, unsigned int buflen) { + struct scatterlist *sg; void *from; void *to = buf; unsigned int from_rest; @@ -159,8 +160,8 @@ static int __copy_from_sg(struct scatter unsigned int i; unsigned long pfn; - for (i = 0; i < nr_sg; i++) { - if (sg->page == NULL) { + for_each_sg (sgl, sg, nr_sg, i) { + if (sg_page(sg) == NULL) { printk(KERN_WARNING "%s: inconsistent length field in " "scatterlist\n", __FUNCTION__); return -ENOMEM; @@ -175,13 +176,11 @@ static int __copy_from_sg(struct scatter } copy_size = from_rest; - pfn = page_to_pfn(sg->page); + pfn = page_to_pfn(sg_page(sg)); from = pfn_to_kaddr(pfn) + (sg->offset); memcpy(to, from, copy_size); to_capa -= copy_size; - - sg++; to += copy_size; } --- head-2010-05-25.orig/drivers/xen/scsiback/scsiback.c 2010-03-24 15:08:58.000000000 +0100 +++ head-2010-05-25/drivers/xen/scsiback/scsiback.c 2010-03-24 15:10:29.000000000 +0100 @@ -260,6 +260,8 @@ static int scsiback_gnttab_data_map(vscs write = (data_dir == DMA_TO_DEVICE); if (nr_segments) { + struct scatterlist *sg; + /* free of (sgl) in fast_flush_area()*/ pending_req->sgl = kmalloc(sizeof(struct scatterlist) * nr_segments, GFP_KERNEL); @@ -268,6 +270,8 @@ static int scsiback_gnttab_data_map(vscs return -ENOMEM; } + sg_init_table(pending_req->sgl, nr_segments); + for (i = 0; i < nr_segments; i++) { flags = GNTMAP_host_map; if (write) @@ -291,7 +295,7 @@ static int scsiback_gnttab_data_map(vscs } } - for (i = 0; i < nr_segments; i++) { + for_each_sg (pending_req->sgl, sg, nr_segments, i) { struct page *pg; if (unlikely(map[i].status != 0)) { @@ -310,15 +314,14 @@ static int scsiback_gnttab_data_map(vscs set_phys_to_machine(page_to_pfn(pg), FOREIGN_FRAME(map[i].dev_bus_addr >> PAGE_SHIFT)); - pending_req->sgl[i].page = pg; - pending_req->sgl[i].offset = ring_req->seg[i].offset; - pending_req->sgl[i].length = ring_req->seg[i].length; - data_len += pending_req->sgl[i].length; + sg_set_page(sg, pg, ring_req->seg[i].length, + ring_req->seg[i].offset); + data_len += sg->length; barrier(); - if (pending_req->sgl[i].offset >= PAGE_SIZE || - pending_req->sgl[i].length > PAGE_SIZE || - pending_req->sgl[i].offset + pending_req->sgl[i].length > PAGE_SIZE) + if (sg->offset >= PAGE_SIZE || + sg->length > PAGE_SIZE || + sg->offset + sg->length > PAGE_SIZE) err |= 1; } @@ -347,27 +350,14 @@ static int scsiback_merge_bio(struct req blk_queue_bounce(q, &bio); - if (!rq->bio) - blk_rq_bio_prep(q, rq, bio); - else if (!ll_back_merge_fn(q, rq, bio)) - return -EINVAL; - else { - rq->biotail->bi_next = bio; - rq->biotail = bio; - } - - return 0; + return blk_rq_append_bio(q, rq, bio); } /* quoted scsi_lib.c/scsi_bi_endio */ -static int scsiback_bi_endio(struct bio *bio, unsigned int bytes_done, int error) +static void scsiback_bi_endio(struct bio *bio, int error) { - if (bio->bi_size) - return 1; - bio_put(bio); - return 0; } @@ -378,16 +368,16 @@ static int request_map_sg(struct request struct request_queue *q = rq->q; int nr_pages; unsigned int nsegs = count; - unsigned int data_len = 0, len, bytes, off; + struct scatterlist *sg; struct page *page; struct bio *bio = NULL; int i, err, nr_vecs = 0; - for (i = 0; i < nsegs; i++) { - page = pending_req->sgl[i].page; - off = (unsigned int)pending_req->sgl[i].offset; - len = (unsigned int)pending_req->sgl[i].length; + for_each_sg (pending_req->sgl, sg, nsegs, i) { + page = sg_page(sg); + off = sg->offset; + len = sg->length; data_len += len; nr_pages = (len + off + PAGE_SIZE - 1) >> PAGE_SHIFT; @@ -415,7 +405,7 @@ static int request_map_sg(struct request if (bio->bi_vcnt >= nr_vecs) { err = scsiback_merge_bio(rq, bio); if (err) { - bio_endio(bio, bio->bi_size, 0); + bio_endio(bio, 0); goto free_bios; } bio = NULL; @@ -438,7 +428,7 @@ free_bios: /* * call endio instead of bio_put incase it was bounced */ - bio_endio(bio, bio->bi_size, 0); + bio_endio(bio, 0); } return err; --- head-2010-05-25.orig/drivers/xen/scsifront/scsifront.c 2010-03-24 15:09:22.000000000 +0100 +++ head-2010-05-25/drivers/xen/scsifront/scsifront.c 2010-03-24 15:10:29.000000000 +0100 @@ -246,11 +246,10 @@ static int map_data_for_request(struct v { grant_ref_t gref_head; struct page *page; - int err, i, ref, ref_cnt = 0; + int err, ref, ref_cnt = 0; int write = (sc->sc_data_direction == DMA_TO_DEVICE); - int nr_pages, off, len, bytes; + unsigned int i, nr_pages, off, len, bytes; unsigned long buffer_pfn; - unsigned int data_len = 0; if (sc->sc_data_direction == DMA_NONE) return 0; @@ -263,25 +262,31 @@ static int map_data_for_request(struct v if (sc->use_sg) { /* quoted scsi_lib.c/scsi_req_map_sg . */ - struct scatterlist *sg = (struct scatterlist *)sc->request_buffer; - nr_pages = (sc->request_bufflen + sg[0].offset + PAGE_SIZE - 1) >> PAGE_SHIFT; + struct scatterlist *sg, *sgl = (struct scatterlist *)sc->request_buffer; + unsigned int data_len = sc->request_bufflen; + nr_pages = (sc->request_bufflen + sgl->offset + PAGE_SIZE - 1) >> PAGE_SHIFT; if (nr_pages > VSCSIIF_SG_TABLESIZE) { printk(KERN_ERR "scsifront: Unable to map request_buffer for command!\n"); ref_cnt = (-E2BIG); goto big_to_sg; } - for (i = 0; i < sc->use_sg; i++) { - page = sg[i].page; - off = sg[i].offset; - len = sg[i].length; - data_len += len; + for_each_sg (sgl, sg, sc->use_sg, i) { + page = sg_page(sg); + off = sg->offset; + len = sg->length; buffer_pfn = page_to_phys(page) >> PAGE_SHIFT; - while (len > 0) { + while (len > 0 && data_len > 0) { + /* + * sg sends a scatterlist that is larger than + * the data_len it wants transferred for certain + * IO sizes + */ bytes = min_t(unsigned int, len, PAGE_SIZE - off); + bytes = min(bytes, data_len); ref = gnttab_claim_grant_reference(&gref_head); BUG_ON(ref == -ENOSPC); @@ -296,6 +301,7 @@ static int map_data_for_request(struct v buffer_pfn++; len -= bytes; + data_len -= bytes; off = 0; ref_cnt++; } --- head-2010-05-25.orig/drivers/xen/sfc_netback/accel_fwd.c 2010-03-24 15:09:15.000000000 +0100 +++ head-2010-05-25/drivers/xen/sfc_netback/accel_fwd.c 2010-03-24 15:10:29.000000000 +0100 @@ -181,10 +181,11 @@ int netback_accel_fwd_add(const __u8 *ma unsigned long flags; cuckoo_hash_mac_key key = cuckoo_mac_to_key(mac); struct port_fwd *fwd_set = (struct port_fwd *)fwd_priv; + DECLARE_MAC_BUF(buf); BUG_ON(fwd_priv == NULL); - DPRINTK("Adding mac " MAC_FMT "\n", MAC_ARG(mac)); + DPRINTK("Adding mac %s\n", print_mac(buf, mac)); spin_lock_irqsave(&fwd_set->fwd_lock, flags); @@ -199,8 +200,8 @@ int netback_accel_fwd_add(const __u8 *ma if (cuckoo_hash_lookup(&fwd_set->fwd_hash_table, (cuckoo_hash_key *)(&key), &rc) != 0) { spin_unlock_irqrestore(&fwd_set->fwd_lock, flags); - EPRINTK("MAC address " MAC_FMT " already accelerated.\n", - MAC_ARG(mac)); + EPRINTK("MAC address %s already accelerated.\n", + print_mac(buf, mac)); return -EEXIST; } @@ -235,8 +236,9 @@ void netback_accel_fwd_remove(const __u8 unsigned long flags; cuckoo_hash_mac_key key = cuckoo_mac_to_key(mac); struct port_fwd *fwd_set = (struct port_fwd *)fwd_priv; + DECLARE_MAC_BUF(buf); - DPRINTK("Removing mac " MAC_FMT "\n", MAC_ARG(mac)); + DPRINTK("Removing mac %s\n", print_mac(buf, mac)); BUG_ON(fwd_priv == NULL); @@ -394,14 +396,16 @@ void netback_accel_tx_packet(struct sk_b if (is_broadcast_ether_addr(skb_mac_header(skb)) && packet_is_arp_reply(skb)) { + DECLARE_MAC_BUF(buf); + /* * update our fast path forwarding to reflect this * gratuitous ARP */ mac = skb_mac_header(skb)+ETH_ALEN; - DPRINTK("%s: found gratuitous ARP for " MAC_FMT "\n", - __FUNCTION__, MAC_ARG(mac)); + DPRINTK("%s: found gratuitous ARP for %s\n", + __FUNCTION__, print_mac(buf, mac)); spin_lock_irqsave(&fwd_set->fwd_lock, flags); /* --- head-2010-05-25.orig/drivers/xen/sfc_netback/accel_msg.c 2008-02-20 09:32:49.000000000 +0100 +++ head-2010-05-25/drivers/xen/sfc_netback/accel_msg.c 2010-03-24 15:10:29.000000000 +0100 @@ -57,11 +57,11 @@ static void netback_accel_msg_tx_localma { unsigned long lock_state; struct net_accel_msg *msg; + DECLARE_MAC_BUF(buf); BUG_ON(bend == NULL || mac == NULL); - VPRINTK("Sending local mac message: " MAC_FMT "\n", - MAC_ARG((const char *)mac)); + VPRINTK("Sending local mac message: %s\n", print_mac(buf, mac)); msg = net_accel_msg_start_send(bend->shared_page, &bend->to_domU, &lock_state); --- head-2010-05-25.orig/drivers/xen/sfc_netfront/accel_msg.c 2010-03-24 15:06:12.000000000 +0100 +++ head-2010-05-25/drivers/xen/sfc_netfront/accel_msg.c 2010-03-24 15:10:29.000000000 +0100 @@ -41,11 +41,13 @@ static void vnic_start_interrupts(netfro /* Prime our interrupt */ spin_lock_irqsave(&vnic->irq_enabled_lock, flags); if (!netfront_accel_vi_enable_interrupts(vnic)) { + struct netfront_info *np = netdev_priv(vnic->net_dev); + /* Cripes, that was quick, better pass it up */ netfront_accel_disable_net_interrupts(vnic); vnic->irq_enabled = 0; NETFRONT_ACCEL_STATS_OP(vnic->stats.poll_schedule_count++); - netif_rx_schedule(vnic->net_dev); + netif_rx_schedule(vnic->net_dev, &np->napi); } else { /* * Nothing yet, make sure we get interrupts through @@ -72,6 +74,7 @@ static void vnic_stop_interrupts(netfron static void vnic_start_fastpath(netfront_accel_vnic *vnic) { struct net_device *net_dev = vnic->net_dev; + struct netfront_info *np = netdev_priv(net_dev); unsigned long flags; DPRINTK("%s\n", __FUNCTION__); @@ -80,9 +83,9 @@ static void vnic_start_fastpath(netfront vnic->tx_enabled = 1; spin_unlock_irqrestore(&vnic->tx_lock, flags); - netif_poll_disable(net_dev); + napi_disable(&np->napi); vnic->poll_enabled = 1; - netif_poll_enable(net_dev); + napi_enable(&np->napi); vnic_start_interrupts(vnic); } @@ -114,11 +117,11 @@ void vnic_stop_fastpath(netfront_accel_v spin_unlock_irqrestore(&vnic->tx_lock, flags1); /* Must prevent polls and hold lock to modify poll_enabled */ - netif_poll_disable(net_dev); + napi_disable(&np->napi); spin_lock_irqsave(&vnic->irq_enabled_lock, flags1); vnic->poll_enabled = 0; spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags1); - netif_poll_enable(net_dev); + napi_enable(&np->napi); } @@ -324,8 +327,10 @@ static int vnic_process_localmac_msg(net cuckoo_hash_mac_key key; if (msg->u.localmac.flags & NET_ACCEL_MSG_ADD) { - DPRINTK("MAC has moved, could be local: " MAC_FMT "\n", - MAC_ARG(msg->u.localmac.mac)); + DECLARE_MAC_BUF(buf); + + DPRINTK("MAC has moved, could be local: %s\n", + print_mac(buf, msg->u.localmac.mac)); key = cuckoo_mac_to_key(msg->u.localmac.mac); spin_lock_irqsave(&vnic->table_lock, flags); /* Try to remove it, not a big deal if not there */ @@ -513,6 +518,8 @@ irqreturn_t netfront_accel_net_channel_i spin_lock_irqsave(&vnic->irq_enabled_lock, flags); if (vnic->irq_enabled) { + struct netfront_info *np = netdev_priv(net_dev); + netfront_accel_disable_net_interrupts(vnic); vnic->irq_enabled = 0; spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags); @@ -525,7 +532,7 @@ irqreturn_t netfront_accel_net_channel_i vnic->stats.event_count_since_irq; vnic->stats.event_count_since_irq = 0; #endif - netif_rx_schedule(net_dev); + netif_rx_schedule(net_dev, &np->napi); } else { spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags); --- head-2010-05-25.orig/drivers/xen/sfc_netfront/accel_vi.c 2010-03-24 15:09:15.000000000 +0100 +++ head-2010-05-25/drivers/xen/sfc_netfront/accel_vi.c 2010-03-24 15:10:29.000000000 +0100 @@ -643,8 +643,10 @@ netfront_accel_vi_tx_post(netfront_accel (cuckoo_hash_key *)(&key), &value); if (!try_fastpath) { - VPRINTK("try fast path false for mac: " MAC_FMT "\n", - MAC_ARG(skb->data)); + DECLARE_MAC_BUF(buf); + + VPRINTK("try fast path false for mac: %s\n", + print_mac(buf, skb->data)); return NETFRONT_ACCEL_STATUS_CANT; } @@ -770,9 +772,10 @@ static void netfront_accel_vi_rx_comple if (compare_ether_addr(skb->data, vnic->mac)) { struct iphdr *ip = (struct iphdr *)(skb->data + ETH_HLEN); u16 port; + DECLARE_MAC_BUF(buf); - DPRINTK("%s: saw wrong MAC address " MAC_FMT "\n", - __FUNCTION__, MAC_ARG(skb->data)); + DPRINTK("%s: saw wrong MAC address %s\n", + __FUNCTION__, print_mac(buf, skb->data)); if (ip->protocol == IPPROTO_TCP) { struct tcphdr *tcp = (struct tcphdr *) --- head-2010-05-25.orig/drivers/xen/sfc_netutil/accel_util.h 2008-02-20 09:32:49.000000000 +0100 +++ head-2010-05-25/drivers/xen/sfc_netutil/accel_util.h 2010-03-24 15:10:29.000000000 +0100 @@ -63,9 +63,6 @@ DPRINTK("%s at %s:%d\n", #exp, __FILE__, __LINE__); \ } while(0) -#define MAC_FMT "%.2x:%.2x:%.2x:%.2x:%.2x:%.2x" -#define MAC_ARG(_mac) (_mac)[0], (_mac)[1], (_mac)[2], (_mac)[3], (_mac)[4], (_mac)[5] - #include /*! Map a set of pages from another domain --- head-2010-05-25.orig/drivers/xen/usbback/usbback.c 2010-03-24 15:08:58.000000000 +0100 +++ head-2010-05-25/drivers/xen/usbback/usbback.c 2010-04-15 17:36:18.000000000 +0200 @@ -86,6 +86,8 @@ typedef struct { static pending_req_t *pending_reqs; static struct list_head pending_free; static DEFINE_SPINLOCK(pending_free_lock); +static LIST_HEAD(pending_urb_free); +static DEFINE_SPINLOCK(urb_free_lock); static DECLARE_WAIT_QUEUE_HEAD(pending_free_wq); #define USBBACK_INVALID_HANDLE (~0) @@ -272,6 +274,15 @@ fail: static void usbbk_free_urb(struct urb *urb) { + unsigned long flags; + + spin_lock_irqsave(&urb_free_lock, flags); + list_add(&urb->urb_list, &pending_urb_free); + spin_unlock_irqrestore(&urb_free_lock, flags); +} + +static void _usbbk_free_urb(struct urb *urb) +{ if (usb_pipecontrol(urb->pipe)) usb_buffer_free(urb->dev, sizeof(struct usb_ctrlrequest), urb->setup_packet, urb->setup_dma); @@ -282,6 +293,29 @@ static void usbbk_free_urb(struct urb *u usb_free_urb(urb); } +static void usbbk_free_urbs(void) +{ + unsigned long flags; + struct list_head tmp_list; + + if (list_empty(&pending_urb_free)) + return; + + INIT_LIST_HEAD(&tmp_list); + + spin_lock_irqsave(&urb_free_lock, flags); + list_splice_init(&pending_urb_free, &tmp_list); + spin_unlock_irqrestore(&urb_free_lock, flags); + + while (!list_empty(&tmp_list)) { + struct urb *next_urb = list_first_entry(&tmp_list, struct urb, + urb_list); + + list_del(&next_urb->urb_list); + _usbbk_free_urb(next_urb); + } +} + static void usbbk_notify_work(usbif_t *usbif) { usbif->waiting_reqs = 1; @@ -1059,8 +1093,11 @@ int usbbk_schedule(void *arg) if (usbbk_start_submit_urb(usbif)) usbif->waiting_reqs = 1; + + usbbk_free_urbs(); } + usbbk_free_urbs(); usbif->xenusbd = NULL; usbif_put(usbif); --- head-2010-05-25.orig/drivers/xen/usbfront/usbfront.h 2010-03-24 15:06:12.000000000 +0100 +++ head-2010-05-25/drivers/xen/usbfront/usbfront.h 2010-03-24 15:10:29.000000000 +0100 @@ -82,6 +82,7 @@ struct urb_priv { struct urb *urb; int req_id; /* RING_REQUEST id for submitting */ int unlink_req_id; /* RING_REQUEST id for unlinking */ + int status; unsigned unlinked:1; /* dequeued marker */ }; --- head-2010-05-25.orig/drivers/xen/usbfront/usbfront-hcd.c 2009-10-15 11:45:41.000000000 +0200 +++ head-2010-05-25/drivers/xen/usbfront/usbfront-hcd.c 2010-03-24 15:10:29.000000000 +0100 @@ -114,7 +114,6 @@ static void xenhcd_stop(struct usb_hcd * * non-error returns are promise to giveback the urb later */ static int xenhcd_urb_enqueue(struct usb_hcd *hcd, - struct usb_host_endpoint *ep, struct urb *urb, gfp_t mem_flags) { @@ -130,6 +129,7 @@ static int xenhcd_urb_enqueue(struct usb ret = -ENOMEM; goto done; } + urbp->status = 1; ret = xenhcd_submit_urb(info, urbp); if (ret != 0) @@ -144,7 +144,7 @@ done: * called as .urb_dequeue() */ static int xenhcd_urb_dequeue(struct usb_hcd *hcd, - struct urb *urb) + struct urb *urb, int status) { struct usbfront_info *info = hcd_to_info(hcd); struct urb_priv *urbp; @@ -157,6 +157,7 @@ static int xenhcd_urb_dequeue(struct usb if (!urbp) goto done; + urbp->status = status; ret = xenhcd_unlink_urb(info, urbp); done: --- head-2010-05-25.orig/drivers/xen/usbfront/usbfront-q.c 2010-03-24 15:06:12.000000000 +0100 +++ head-2010-05-25/drivers/xen/usbfront/usbfront-q.c 2010-03-24 15:10:29.000000000 +0100 @@ -236,7 +236,8 @@ __acquires(info->lock) COUNT(info->stats.complete); } spin_unlock(&info->lock); - usb_hcd_giveback_urb(info_to_hcd(info), urb); + usb_hcd_giveback_urb(info_to_hcd(info), urb, + urbp->status <= 0 ? urbp->status : urb->status); spin_lock(&info->lock); } --- head-2010-05-25.orig/drivers/xen/xenbus/xenbus_probe.c 2010-03-24 15:09:22.000000000 +0100 +++ head-2010-05-25/drivers/xen/xenbus/xenbus_probe.c 2010-03-24 15:10:29.000000000 +0100 @@ -175,11 +175,9 @@ static int read_backend_details(struct x } #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16) && (defined(CONFIG_XEN) || defined(MODULE)) -static int xenbus_uevent_frontend(struct device *dev, char **envp, - int num_envp, char *buffer, int buffer_size) +static int xenbus_uevent_frontend(struct device *dev, struct kobj_uevent_env *env) { struct xenbus_device *xdev; - int length = 0, i = 0; if (dev == NULL) return -ENODEV; @@ -188,12 +186,9 @@ static int xenbus_uevent_frontend(struct return -ENODEV; /* stuff we want to pass to /sbin/hotplug */ - add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, - "XENBUS_TYPE=%s", xdev->devicetype); - add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, - "XENBUS_PATH=%s", xdev->nodename); - add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, - "MODALIAS=xen:%s", xdev->devicetype); + add_uevent_var(env, "XENBUS_TYPE=%s", xdev->devicetype); + add_uevent_var(env, "XENBUS_PATH=%s", xdev->nodename); + add_uevent_var(env, "MODALIAS=xen:%s", xdev->devicetype); return 0; } --- head-2010-05-25.orig/drivers/xen/xenbus/xenbus_probe_backend.c 2010-03-24 15:09:22.000000000 +0100 +++ head-2010-05-25/drivers/xen/xenbus/xenbus_probe_backend.c 2010-03-24 15:10:29.000000000 +0100 @@ -60,8 +60,7 @@ #include #endif -static int xenbus_uevent_backend(struct device *dev, char **envp, - int num_envp, char *buffer, int buffer_size); +static int xenbus_uevent_backend(struct device *dev, struct kobj_uevent_env *env); static int xenbus_probe_backend(const char *type, const char *domid); extern int read_otherend_details(struct xenbus_device *xendev, @@ -128,13 +127,10 @@ static struct xen_bus_type xenbus_backen }, }; -static int xenbus_uevent_backend(struct device *dev, char **envp, - int num_envp, char *buffer, int buffer_size) +static int xenbus_uevent_backend(struct device *dev, struct kobj_uevent_env *env) { struct xenbus_device *xdev; struct xenbus_driver *drv; - int i = 0; - int length = 0; DPRINTK(""); @@ -146,27 +142,16 @@ static int xenbus_uevent_backend(struct return -ENODEV; /* stuff we want to pass to /sbin/hotplug */ - add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, - "XENBUS_TYPE=%s", xdev->devicetype); + add_uevent_var(env, "XENBUS_TYPE=%s", xdev->devicetype); - add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, - "XENBUS_PATH=%s", xdev->nodename); + add_uevent_var(env, "XENBUS_PATH=%s", xdev->nodename); - add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length, - "XENBUS_BASE_PATH=%s", xenbus_backend.root); - - /* terminate, set to next free slot, shrink available space */ - envp[i] = NULL; - envp = &envp[i]; - num_envp -= i; - buffer = &buffer[length]; - buffer_size -= length; + add_uevent_var(env, "XENBUS_BASE_PATH=%s", xenbus_backend.root); if (dev->driver) { drv = to_xenbus_driver(dev->driver); if (drv && drv->uevent) - return drv->uevent(xdev, envp, num_envp, buffer, - buffer_size); + return drv->uevent(xdev, env); } return 0; --- head-2010-05-25.orig/drivers/xen/xenoprof/xenoprofile.c 2010-03-24 15:09:08.000000000 +0100 +++ head-2010-05-25/drivers/xen/xenoprof/xenoprofile.c 2010-03-24 15:10:29.000000000 +0100 @@ -29,7 +29,6 @@ #include #include #include -#include "../../../drivers/oprofile/cpu_buffer.h" #include "../../../drivers/oprofile/event_buffer.h" #define MAX_XENOPROF_SAMPLES 16 @@ -142,8 +141,7 @@ static void xenoprof_add_pc(xenoprof_buf if (xenoprof_is_escape(buf, tail) && xenoprof_get_event(buf, tail) == XENOPROF_TRACE_BEGIN) { tracing=1; - oprofile_add_pc(ESCAPE_CODE, buf->event_log[tail].mode, - CPU_TRACE_BEGIN); + oprofile_add_mode(buf->event_log[tail].mode); if (!is_passive) oprofile_samples++; else --- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/agp.h 2007-06-22 09:08:06.000000000 +0200 +++ head-2010-05-25/arch/x86/include/mach-xen/asm/agp.h 2010-03-24 15:10:29.000000000 +0100 @@ -1,20 +1,22 @@ -#ifndef AGP_H -#define AGP_H 1 +#ifndef _ASM_X86_AGP_H +#define _ASM_X86_AGP_H #include #include #include -/* - * Functions to keep the agpgart mappings coherent with the MMU. - * The GART gives the CPU a physical alias of pages in memory. The alias region is - * mapped uncacheable. Make sure there are no conflicting mappings - * with different cachability attributes for the same page. This avoids - * data corruption on some CPUs. +/* + * Functions to keep the agpgart mappings coherent with the MMU. The + * GART gives the CPU a physical alias of pages in memory. The alias + * region is mapped uncacheable. Make sure there are no conflicting + * mappings with different cachability attributes for the same + * page. This avoids data corruption on some CPUs. */ -/* Caller's responsibility to call global_flush_tlb() for - * performance reasons */ +/* + * Caller's responsibility to call global_flush_tlb() for performance + * reasons + */ #define map_page_into_agp(page) ( \ xen_create_contiguous_region((unsigned long)page_address(page), 0, 32) \ ?: change_page_attr(page, 1, PAGE_KERNEL_NOCACHE)) @@ -24,9 +26,11 @@ change_page_attr(page, 1, PAGE_KERNEL)) #define flush_agp_mappings() global_flush_tlb() -/* Could use CLFLUSH here if the cpu supports it. But then it would - need to be called for each cacheline of the whole page so it may not be - worth it. Would need a page for it. */ +/* + * Could use CLFLUSH here if the cpu supports it. But then it would + * need to be called for each cacheline of the whole page so it may + * not be worth it. Would need a page for it. + */ #define flush_agp_cache() wbinvd() /* Convert a physical address to an address suitable for the GART. */ --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2010-05-25/arch/x86/include/mach-xen/asm/desc.h 2010-03-24 15:10:29.000000000 +0100 @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "desc_32.h" +#else +# include "desc_64.h" +#endif --- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/desc_64.h 2010-03-24 15:09:15.000000000 +0100 +++ head-2010-05-25/arch/x86/include/mach-xen/asm/desc_64.h 2010-03-24 15:10:29.000000000 +0100 @@ -34,6 +34,18 @@ static inline void clear_LDT(void) put_cpu(); } +#ifndef CONFIG_X86_NO_TSS +static inline unsigned long __store_tr(void) +{ + unsigned long tr; + + asm volatile ("str %w0":"=r" (tr)); + return tr; +} + +#define store_tr(tr) (tr) = __store_tr() +#endif + /* * This is the ldt that every process will get unless we need * something other than this. @@ -47,6 +59,18 @@ extern struct desc_ptr cpu_gdt_descr[]; /* the cpu gdt accessor */ #define cpu_gdt(_cpu) ((struct desc_struct *)cpu_gdt_descr[_cpu].address) +#ifndef CONFIG_XEN +static inline void load_gdt(const struct desc_ptr *ptr) +{ + asm volatile("lgdt %w0"::"m" (*ptr)); +} + +static inline void store_gdt(struct desc_ptr *ptr) +{ + asm("sgdt %w0":"=m" (*ptr)); +} +#endif + static inline void _set_gate(void *adr, unsigned type, unsigned long func, unsigned dpl, unsigned ist) { struct gate_struct s; @@ -87,6 +111,16 @@ static inline void set_system_gate_ist(i { _set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, ist); } + +static inline void load_idt(const struct desc_ptr *ptr) +{ + asm volatile("lidt %w0"::"m" (*ptr)); +} + +static inline void store_idt(struct desc_ptr *dtr) +{ + asm("sidt %w0":"=m" (*dtr)); +} #endif static inline void set_tssldt_descriptor(void *ptr, unsigned long tss, unsigned type, --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2010-05-25/arch/x86/include/mach-xen/asm/dma-mapping.h 2010-03-24 15:10:29.000000000 +0100 @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "dma-mapping_32.h" +#else +# include "dma-mapping_64.h" +#endif --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2010-05-25/arch/x86/include/mach-xen/asm/fixmap.h 2010-03-24 15:10:29.000000000 +0100 @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "fixmap_32.h" +#else +# include "fixmap_64.h" +#endif --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2010-05-25/arch/x86/include/mach-xen/asm/hypercall.h 2010-03-24 15:10:29.000000000 +0100 @@ -0,0 +1,420 @@ +/****************************************************************************** + * hypercall.h + * + * Linux-specific hypervisor handling. + * + * Copyright (c) 2002-2004, K A Fraser + * + * 64-bit updates: + * Benjamin Liu + * Jun Nakajima + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation; or, when distributed + * separately from the Linux kernel or incorporated into other + * software packages, subject to the following license: + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this source file (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, modify, + * merge, publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef __HYPERCALL_H__ +#define __HYPERCALL_H__ + +#ifndef __HYPERVISOR_H__ +# error "please don't include this file directly" +#endif + +#if CONFIG_XEN_COMPAT <= 0x030002 +# include /* memcpy() */ +#endif + +#ifdef CONFIG_XEN +#define HYPERCALL_ASM_OPERAND "%c" +#define HYPERCALL_LOCATION(op) (hypercall_page + (op) * 32) +#define HYPERCALL_C_OPERAND(name) "i" (HYPERCALL_LOCATION(__HYPERVISOR_##name)) +#else +#define HYPERCALL_ASM_OPERAND "*%" +#define HYPERCALL_LOCATION(op) (hypercall_stubs + (op) * 32) +#define HYPERCALL_C_OPERAND(name) "g" (HYPERCALL_LOCATION(__HYPERVISOR_##name)) +#endif + +#define HYPERCALL_ARG(arg, n) \ + register typeof((arg)+0) __arg##n asm(HYPERCALL_arg##n) = (arg) + +#define _hypercall0(type, name) \ +({ \ + type __res; \ + asm volatile ( \ + "call " HYPERCALL_ASM_OPERAND "1" \ + : "=a" (__res) \ + : HYPERCALL_C_OPERAND(name) \ + : "memory" ); \ + __res; \ +}) + +#define _hypercall1(type, name, arg) \ +({ \ + type __res; \ + HYPERCALL_ARG(arg, 1); \ + asm volatile ( \ + "call " HYPERCALL_ASM_OPERAND "2" \ + : "=a" (__res), "+r" (__arg1) \ + : HYPERCALL_C_OPERAND(name) \ + : "memory" ); \ + __res; \ +}) + +#define _hypercall2(type, name, a1, a2) \ +({ \ + type __res; \ + HYPERCALL_ARG(a1, 1); \ + HYPERCALL_ARG(a2, 2); \ + asm volatile ( \ + "call " HYPERCALL_ASM_OPERAND "3" \ + : "=a" (__res), "+r" (__arg1), "+r" (__arg2) \ + : HYPERCALL_C_OPERAND(name) \ + : "memory" ); \ + __res; \ +}) + +#define _hypercall3(type, name, a1, a2, a3) \ +({ \ + type __res; \ + HYPERCALL_ARG(a1, 1); \ + HYPERCALL_ARG(a2, 2); \ + HYPERCALL_ARG(a3, 3); \ + asm volatile ( \ + "call " HYPERCALL_ASM_OPERAND "4" \ + : "=a" (__res), "+r" (__arg1), \ + "+r" (__arg2), "+r" (__arg3) \ + : HYPERCALL_C_OPERAND(name) \ + : "memory" ); \ + __res; \ +}) + +#define _hypercall4(type, name, a1, a2, a3, a4) \ +({ \ + type __res; \ + HYPERCALL_ARG(a1, 1); \ + HYPERCALL_ARG(a2, 2); \ + HYPERCALL_ARG(a3, 3); \ + HYPERCALL_ARG(a4, 4); \ + asm volatile ( \ + "call " HYPERCALL_ASM_OPERAND "5" \ + : "=a" (__res), "+r" (__arg1), "+r" (__arg2), \ + "+r" (__arg3), "+r" (__arg4) \ + : HYPERCALL_C_OPERAND(name) \ + : "memory" ); \ + __res; \ +}) + +#define _hypercall5(type, name, a1, a2, a3, a4, a5) \ +({ \ + type __res; \ + HYPERCALL_ARG(a1, 1); \ + HYPERCALL_ARG(a2, 2); \ + HYPERCALL_ARG(a3, 3); \ + HYPERCALL_ARG(a4, 4); \ + HYPERCALL_ARG(a5, 5); \ + asm volatile ( \ + "call " HYPERCALL_ASM_OPERAND "6" \ + : "=a" (__res), "+r" (__arg1), "+r" (__arg2), \ + "+r" (__arg3), "+r" (__arg4), "+r" (__arg5) \ + : HYPERCALL_C_OPERAND(name) \ + : "memory" ); \ + __res; \ +}) + +#define _hypercall(type, op, a1, a2, a3, a4, a5) \ +({ \ + type __res; \ + HYPERCALL_ARG(a1, 1); \ + HYPERCALL_ARG(a2, 2); \ + HYPERCALL_ARG(a3, 3); \ + HYPERCALL_ARG(a4, 4); \ + HYPERCALL_ARG(a5, 5); \ + asm volatile ( \ + "call *%6" \ + : "=a" (__res), "+r" (__arg1), "+r" (__arg2), \ + "+r" (__arg3), "+r" (__arg4), "+r" (__arg5) \ + : "g" (HYPERCALL_LOCATION(op)) \ + : "memory" ); \ + __res; \ +}) + +#ifdef CONFIG_X86_32 +# include "hypercall_32.h" +#else +# include "hypercall_64.h" +#endif + +static inline int __must_check +HYPERVISOR_set_trap_table( + const trap_info_t *table) +{ + return _hypercall1(int, set_trap_table, table); +} + +static inline int __must_check +HYPERVISOR_mmu_update( + mmu_update_t *req, unsigned int count, unsigned int *success_count, + domid_t domid) +{ + if (arch_use_lazy_mmu_mode()) + return xen_multi_mmu_update(req, count, success_count, domid); + return _hypercall4(int, mmu_update, req, count, success_count, domid); +} + +static inline int __must_check +HYPERVISOR_mmuext_op( + struct mmuext_op *op, unsigned int count, unsigned int *success_count, + domid_t domid) +{ + if (arch_use_lazy_mmu_mode()) + return xen_multi_mmuext_op(op, count, success_count, domid); + return _hypercall4(int, mmuext_op, op, count, success_count, domid); +} + +static inline int __must_check +HYPERVISOR_set_gdt( + unsigned long *frame_list, unsigned int entries) +{ + return _hypercall2(int, set_gdt, frame_list, entries); +} + +static inline int __must_check +HYPERVISOR_stack_switch( + unsigned long ss, unsigned long esp) +{ + return _hypercall2(int, stack_switch, ss, esp); +} + +static inline int +HYPERVISOR_fpu_taskswitch( + int set) +{ + return _hypercall1(int, fpu_taskswitch, set); +} + +#if CONFIG_XEN_COMPAT <= 0x030002 +static inline int __must_check +HYPERVISOR_sched_op_compat( + int cmd, unsigned long arg) +{ + return _hypercall2(int, sched_op_compat, cmd, arg); +} +#endif + +static inline int __must_check +HYPERVISOR_sched_op( + int cmd, void *arg) +{ + return _hypercall2(int, sched_op, cmd, arg); +} + +static inline int __must_check +HYPERVISOR_platform_op( + struct xen_platform_op *platform_op) +{ + platform_op->interface_version = XENPF_INTERFACE_VERSION; + return _hypercall1(int, platform_op, platform_op); +} + +struct xen_mc; +static inline int __must_check +HYPERVISOR_mca( + struct xen_mc *mc_op) +{ + mc_op->interface_version = XEN_MCA_INTERFACE_VERSION; + return _hypercall1(int, mca, mc_op); +} + +static inline int __must_check +HYPERVISOR_set_debugreg( + unsigned int reg, unsigned long value) +{ + return _hypercall2(int, set_debugreg, reg, value); +} + +static inline unsigned long __must_check +HYPERVISOR_get_debugreg( + unsigned int reg) +{ + return _hypercall1(unsigned long, get_debugreg, reg); +} + +static inline int __must_check +HYPERVISOR_memory_op( + unsigned int cmd, void *arg) +{ + if (arch_use_lazy_mmu_mode()) + xen_multicall_flush(false); + return _hypercall2(int, memory_op, cmd, arg); +} + +static inline int __must_check +HYPERVISOR_multicall( + multicall_entry_t *call_list, unsigned int nr_calls) +{ + return _hypercall2(int, multicall, call_list, nr_calls); +} + +static inline int __must_check +HYPERVISOR_event_channel_op( + int cmd, void *arg) +{ + int rc = _hypercall2(int, event_channel_op, cmd, arg); + +#if CONFIG_XEN_COMPAT <= 0x030002 + if (unlikely(rc == -ENOSYS)) { + struct evtchn_op op; + op.cmd = cmd; + memcpy(&op.u, arg, sizeof(op.u)); + rc = _hypercall1(int, event_channel_op_compat, &op); + memcpy(arg, &op.u, sizeof(op.u)); + } +#endif + + return rc; +} + +static inline int __must_check +HYPERVISOR_xen_version( + int cmd, void *arg) +{ + return _hypercall2(int, xen_version, cmd, arg); +} + +static inline int __must_check +HYPERVISOR_console_io( + int cmd, unsigned int count, char *str) +{ + return _hypercall3(int, console_io, cmd, count, str); +} + +static inline int __must_check +HYPERVISOR_physdev_op( + int cmd, void *arg) +{ + int rc = _hypercall2(int, physdev_op, cmd, arg); + +#if CONFIG_XEN_COMPAT <= 0x030002 + if (unlikely(rc == -ENOSYS)) { + struct physdev_op op; + op.cmd = cmd; + memcpy(&op.u, arg, sizeof(op.u)); + rc = _hypercall1(int, physdev_op_compat, &op); + memcpy(arg, &op.u, sizeof(op.u)); + } +#endif + + return rc; +} + +static inline int __must_check +HYPERVISOR_grant_table_op( + unsigned int cmd, void *uop, unsigned int count) +{ + if (arch_use_lazy_mmu_mode()) + xen_multicall_flush(false); + return _hypercall3(int, grant_table_op, cmd, uop, count); +} + +static inline int __must_check +HYPERVISOR_vm_assist( + unsigned int cmd, unsigned int type) +{ + return _hypercall2(int, vm_assist, cmd, type); +} + +static inline int __must_check +HYPERVISOR_vcpu_op( + int cmd, unsigned int vcpuid, void *extra_args) +{ + return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args); +} + +static inline int __must_check +HYPERVISOR_suspend( + unsigned long srec) +{ + struct sched_shutdown sched_shutdown = { + .reason = SHUTDOWN_suspend + }; + + int rc = _hypercall3(int, sched_op, SCHEDOP_shutdown, + &sched_shutdown, srec); + +#if CONFIG_XEN_COMPAT <= 0x030002 + if (rc == -ENOSYS) + rc = _hypercall3(int, sched_op_compat, SCHEDOP_shutdown, + SHUTDOWN_suspend, srec); +#endif + + return rc; +} + +#if CONFIG_XEN_COMPAT <= 0x030002 +static inline int +HYPERVISOR_nmi_op( + unsigned long op, void *arg) +{ + return _hypercall2(int, nmi_op, op, arg); +} +#endif + +#ifndef CONFIG_XEN +static inline unsigned long __must_check +HYPERVISOR_hvm_op( + int op, void *arg) +{ + return _hypercall2(unsigned long, hvm_op, op, arg); +} +#endif + +static inline int __must_check +HYPERVISOR_callback_op( + int cmd, const void *arg) +{ + return _hypercall2(int, callback_op, cmd, arg); +} + +static inline int __must_check +HYPERVISOR_xenoprof_op( + int op, void *arg) +{ + return _hypercall2(int, xenoprof_op, op, arg); +} + +static inline int __must_check +HYPERVISOR_kexec_op( + unsigned long op, void *args) +{ + return _hypercall2(int, kexec_op, op, args); +} + +static inline int __must_check +HYPERVISOR_tmem_op( + struct tmem_op *op) +{ + return _hypercall1(int, tmem_op, op); +} + +#endif /* __HYPERCALL_H__ */ --- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/hypercall_32.h 2010-03-24 15:06:12.000000000 +0100 +++ head-2010-05-25/arch/x86/include/mach-xen/asm/hypercall_32.h 2010-03-24 15:10:29.000000000 +0100 @@ -1,191 +1,10 @@ -/****************************************************************************** - * hypercall.h - * - * Linux-specific hypervisor handling. - * - * Copyright (c) 2002-2004, K A Fraser - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation; or, when distributed - * separately from the Linux kernel or incorporated into other - * software packages, subject to the following license: - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this source file (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, modify, - * merge, publish, distribute, sublicense, and/or sell copies of the Software, - * and to permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#ifndef __HYPERCALL_H__ -#define __HYPERCALL_H__ - -#include /* memcpy() */ -#include - -#ifndef __HYPERVISOR_H__ -# error "please don't include this file directly" -#endif - -#ifdef CONFIG_XEN -#define HYPERCALL_STR(name) \ - "call hypercall_page + ("__stringify(__HYPERVISOR_##name)" * 32)" -#else -#define HYPERCALL_STR(name) \ - "mov hypercall_stubs,%%eax; " \ - "add $("__stringify(__HYPERVISOR_##name)" * 32),%%eax; "\ - "call *%%eax" -#endif - -#define _hypercall0(type, name) \ -({ \ - type __res; \ - asm volatile ( \ - HYPERCALL_STR(name) \ - : "=a" (__res) \ - : \ - : "memory" ); \ - __res; \ -}) - -#define _hypercall1(type, name, a1) \ -({ \ - type __res; \ - long __ign1; \ - asm volatile ( \ - HYPERCALL_STR(name) \ - : "=a" (__res), "=b" (__ign1) \ - : "1" ((long)(a1)) \ - : "memory" ); \ - __res; \ -}) - -#define _hypercall2(type, name, a1, a2) \ -({ \ - type __res; \ - long __ign1, __ign2; \ - asm volatile ( \ - HYPERCALL_STR(name) \ - : "=a" (__res), "=b" (__ign1), "=c" (__ign2) \ - : "1" ((long)(a1)), "2" ((long)(a2)) \ - : "memory" ); \ - __res; \ -}) - -#define _hypercall3(type, name, a1, a2, a3) \ -({ \ - type __res; \ - long __ign1, __ign2, __ign3; \ - asm volatile ( \ - HYPERCALL_STR(name) \ - : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ - "=d" (__ign3) \ - : "1" ((long)(a1)), "2" ((long)(a2)), \ - "3" ((long)(a3)) \ - : "memory" ); \ - __res; \ -}) - -#define _hypercall4(type, name, a1, a2, a3, a4) \ -({ \ - type __res; \ - long __ign1, __ign2, __ign3, __ign4; \ - asm volatile ( \ - HYPERCALL_STR(name) \ - : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ - "=d" (__ign3), "=S" (__ign4) \ - : "1" ((long)(a1)), "2" ((long)(a2)), \ - "3" ((long)(a3)), "4" ((long)(a4)) \ - : "memory" ); \ - __res; \ -}) - -#define _hypercall5(type, name, a1, a2, a3, a4, a5) \ -({ \ - type __res; \ - long __ign1, __ign2, __ign3, __ign4, __ign5; \ - asm volatile ( \ - HYPERCALL_STR(name) \ - : "=a" (__res), "=b" (__ign1), "=c" (__ign2), \ - "=d" (__ign3), "=S" (__ign4), "=D" (__ign5) \ - : "1" ((long)(a1)), "2" ((long)(a2)), \ - "3" ((long)(a3)), "4" ((long)(a4)), \ - "5" ((long)(a5)) \ - : "memory" ); \ - __res; \ -}) - -#define _hypercall(type, op, a1, a2, a3, a4, a5) \ -({ \ - type __res; \ - register typeof((a1)+0) __arg1 asm("ebx") = (a1); \ - register typeof((a2)+0) __arg2 asm("ecx") = (a2); \ - register typeof((a3)+0) __arg3 asm("edx") = (a3); \ - register typeof((a4)+0) __arg4 asm("esi") = (a4); \ - register typeof((a5)+0) __arg5 asm("edi") = (a5); \ - asm volatile ( \ - "call *%6" \ - : "=a" (__res), "+r" (__arg1), "+r" (__arg2), \ - "+r" (__arg3), "+r" (__arg4), "+r" (__arg5) \ - : "0" (hypercall_page + (op) * 32) \ - : "memory" ); \ - __res; \ -}) - -static inline int __must_check -HYPERVISOR_set_trap_table( - const trap_info_t *table) -{ - return _hypercall1(int, set_trap_table, table); -} - -static inline int __must_check -HYPERVISOR_mmu_update( - mmu_update_t *req, unsigned int count, unsigned int *success_count, - domid_t domid) -{ - if (arch_use_lazy_mmu_mode()) - return xen_multi_mmu_update(req, count, success_count, domid); - return _hypercall4(int, mmu_update, req, count, success_count, domid); -} - -static inline int __must_check -HYPERVISOR_mmuext_op( - struct mmuext_op *op, unsigned int count, unsigned int *success_count, - domid_t domid) -{ - if (arch_use_lazy_mmu_mode()) - return xen_multi_mmuext_op(op, count, success_count, domid); - return _hypercall4(int, mmuext_op, op, count, success_count, domid); -} - -static inline int __must_check -HYPERVISOR_set_gdt( - unsigned long *frame_list, unsigned int entries) -{ - return _hypercall2(int, set_gdt, frame_list, entries); -} - -static inline int __must_check -HYPERVISOR_stack_switch( - unsigned long ss, unsigned long esp) -{ - return _hypercall2(int, stack_switch, ss, esp); -} +#define HYPERCALL_arg1 "ebx" +#define HYPERCALL_arg2 "ecx" +#define HYPERCALL_arg3 "edx" +#define HYPERCALL_arg4 "esi" +#define HYPERCALL_arg5 "edi" +#if CONFIG_XEN_COMPAT <= 0x030002 static inline int __must_check HYPERVISOR_set_callbacks( unsigned long event_selector, unsigned long event_address, @@ -195,80 +14,24 @@ HYPERVISOR_set_callbacks( event_selector, event_address, failsafe_selector, failsafe_address); } - -static inline int -HYPERVISOR_fpu_taskswitch( - int set) -{ - return _hypercall1(int, fpu_taskswitch, set); -} - -static inline int __must_check -HYPERVISOR_sched_op_compat( - int cmd, unsigned long arg) -{ - return _hypercall2(int, sched_op_compat, cmd, arg); -} - -static inline int __must_check -HYPERVISOR_sched_op( - int cmd, void *arg) -{ - return _hypercall2(int, sched_op, cmd, arg); -} +#endif static inline long __must_check HYPERVISOR_set_timer_op( u64 timeout) { - unsigned long timeout_hi = (unsigned long)(timeout>>32); - unsigned long timeout_lo = (unsigned long)timeout; - return _hypercall2(long, set_timer_op, timeout_lo, timeout_hi); -} - -static inline int __must_check -HYPERVISOR_platform_op( - struct xen_platform_op *platform_op) -{ - platform_op->interface_version = XENPF_INTERFACE_VERSION; - return _hypercall1(int, platform_op, platform_op); -} - -static inline int __must_check -HYPERVISOR_set_debugreg( - unsigned int reg, unsigned long value) -{ - return _hypercall2(int, set_debugreg, reg, value); -} - -static inline unsigned long __must_check -HYPERVISOR_get_debugreg( - unsigned int reg) -{ - return _hypercall1(unsigned long, get_debugreg, reg); + return _hypercall2(long, set_timer_op, + (unsigned long)timeout, + (unsigned long)(timeout>>32)); } static inline int __must_check HYPERVISOR_update_descriptor( u64 ma, u64 desc) { - return _hypercall4(int, update_descriptor, ma, ma>>32, desc, desc>>32); -} - -static inline int __must_check -HYPERVISOR_memory_op( - unsigned int cmd, void *arg) -{ - if (arch_use_lazy_mmu_mode()) - xen_multicall_flush(false); - return _hypercall2(int, memory_op, cmd, arg); -} - -static inline int __must_check -HYPERVISOR_multicall( - multicall_entry_t *call_list, unsigned int nr_calls) -{ - return _hypercall2(int, multicall, call_list, nr_calls); + return _hypercall4(int, update_descriptor, + (unsigned long)ma, (unsigned long)(ma>>32), + (unsigned long)desc, (unsigned long)(desc>>32)); } static inline int __must_check @@ -287,67 +50,6 @@ HYPERVISOR_update_va_mapping( } static inline int __must_check -HYPERVISOR_event_channel_op( - int cmd, void *arg) -{ - int rc = _hypercall2(int, event_channel_op, cmd, arg); - -#if CONFIG_XEN_COMPAT <= 0x030002 - if (unlikely(rc == -ENOSYS)) { - struct evtchn_op op; - op.cmd = cmd; - memcpy(&op.u, arg, sizeof(op.u)); - rc = _hypercall1(int, event_channel_op_compat, &op); - memcpy(arg, &op.u, sizeof(op.u)); - } -#endif - - return rc; -} - -static inline int __must_check -HYPERVISOR_xen_version( - int cmd, void *arg) -{ - return _hypercall2(int, xen_version, cmd, arg); -} - -static inline int __must_check -HYPERVISOR_console_io( - int cmd, unsigned int count, char *str) -{ - return _hypercall3(int, console_io, cmd, count, str); -} - -static inline int __must_check -HYPERVISOR_physdev_op( - int cmd, void *arg) -{ - int rc = _hypercall2(int, physdev_op, cmd, arg); - -#if CONFIG_XEN_COMPAT <= 0x030002 - if (unlikely(rc == -ENOSYS)) { - struct physdev_op op; - op.cmd = cmd; - memcpy(&op.u, arg, sizeof(op.u)); - rc = _hypercall1(int, physdev_op_compat, &op); - memcpy(arg, &op.u, sizeof(op.u)); - } -#endif - - return rc; -} - -static inline int __must_check -HYPERVISOR_grant_table_op( - unsigned int cmd, void *uop, unsigned int count) -{ - if (arch_use_lazy_mmu_mode()) - xen_multicall_flush(false); - return _hypercall3(int, grant_table_op, cmd, uop, count); -} - -static inline int __must_check HYPERVISOR_update_va_mapping_otherdomain( unsigned long va, pte_t new_val, unsigned long flags, domid_t domid) { @@ -358,86 +60,3 @@ HYPERVISOR_update_va_mapping_otherdomain return _hypercall5(int, update_va_mapping_otherdomain, va, new_val.pte_low, pte_hi, flags, domid); } - -static inline int __must_check -HYPERVISOR_vm_assist( - unsigned int cmd, unsigned int type) -{ - return _hypercall2(int, vm_assist, cmd, type); -} - -static inline int __must_check -HYPERVISOR_vcpu_op( - int cmd, unsigned int vcpuid, void *extra_args) -{ - return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args); -} - -static inline int __must_check -HYPERVISOR_suspend( - unsigned long srec) -{ - struct sched_shutdown sched_shutdown = { - .reason = SHUTDOWN_suspend - }; - - int rc = _hypercall3(int, sched_op, SCHEDOP_shutdown, - &sched_shutdown, srec); - -#if CONFIG_XEN_COMPAT <= 0x030002 - if (rc == -ENOSYS) - rc = _hypercall3(int, sched_op_compat, SCHEDOP_shutdown, - SHUTDOWN_suspend, srec); -#endif - - return rc; -} - -#if CONFIG_XEN_COMPAT <= 0x030002 -static inline int -HYPERVISOR_nmi_op( - unsigned long op, void *arg) -{ - return _hypercall2(int, nmi_op, op, arg); -} -#endif - -#ifndef CONFIG_XEN -static inline unsigned long __must_check -HYPERVISOR_hvm_op( - int op, void *arg) -{ - return _hypercall2(unsigned long, hvm_op, op, arg); -} -#endif - -static inline int __must_check -HYPERVISOR_callback_op( - int cmd, const void *arg) -{ - return _hypercall2(int, callback_op, cmd, arg); -} - -static inline int __must_check -HYPERVISOR_xenoprof_op( - int op, void *arg) -{ - return _hypercall2(int, xenoprof_op, op, arg); -} - -static inline int __must_check -HYPERVISOR_kexec_op( - unsigned long op, void *args) -{ - return _hypercall2(int, kexec_op, op, args); -} - -static inline int __must_check -HYPERVISOR_tmem_op( - struct tmem_op *op) -{ - return _hypercall1(int, tmem_op, op); -} - - -#endif /* __HYPERCALL_H__ */ --- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/hypercall_64.h 2010-03-24 15:06:12.000000000 +0100 +++ head-2010-05-25/arch/x86/include/mach-xen/asm/hypercall_64.h 2010-03-24 15:10:29.000000000 +0100 @@ -1,198 +1,10 @@ -/****************************************************************************** - * hypercall.h - * - * Linux-specific hypervisor handling. - * - * Copyright (c) 2002-2004, K A Fraser - * - * 64-bit updates: - * Benjamin Liu - * Jun Nakajima - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation; or, when distributed - * separately from the Linux kernel or incorporated into other - * software packages, subject to the following license: - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this source file (the "Software"), to deal in the Software without - * restriction, including without limitation the rights to use, copy, modify, - * merge, publish, distribute, sublicense, and/or sell copies of the Software, - * and to permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#ifndef __HYPERCALL_H__ -#define __HYPERCALL_H__ - -#include /* memcpy() */ -#include -#include - -#ifndef __HYPERVISOR_H__ -# error "please don't include this file directly" -#endif - -#ifdef CONFIG_XEN -#define HYPERCALL_STR(name) \ - "call hypercall_page + ("__stringify(__HYPERVISOR_##name)" * 32)" -#else -#define HYPERCALL_STR(name) \ - "mov $("__stringify(__HYPERVISOR_##name)" * 32),%%eax; "\ - "add hypercall_stubs(%%rip),%%rax; " \ - "call *%%rax" -#endif - -#define _hypercall0(type, name) \ -({ \ - type __res; \ - asm volatile ( \ - HYPERCALL_STR(name) \ - : "=a" (__res) \ - : \ - : "memory" ); \ - __res; \ -}) - -#define _hypercall1(type, name, a1) \ -({ \ - type __res; \ - long __ign1; \ - asm volatile ( \ - HYPERCALL_STR(name) \ - : "=a" (__res), "=D" (__ign1) \ - : "1" ((long)(a1)) \ - : "memory" ); \ - __res; \ -}) - -#define _hypercall2(type, name, a1, a2) \ -({ \ - type __res; \ - long __ign1, __ign2; \ - asm volatile ( \ - HYPERCALL_STR(name) \ - : "=a" (__res), "=D" (__ign1), "=S" (__ign2) \ - : "1" ((long)(a1)), "2" ((long)(a2)) \ - : "memory" ); \ - __res; \ -}) - -#define _hypercall3(type, name, a1, a2, a3) \ -({ \ - type __res; \ - long __ign1, __ign2, __ign3; \ - asm volatile ( \ - HYPERCALL_STR(name) \ - : "=a" (__res), "=D" (__ign1), "=S" (__ign2), \ - "=d" (__ign3) \ - : "1" ((long)(a1)), "2" ((long)(a2)), \ - "3" ((long)(a3)) \ - : "memory" ); \ - __res; \ -}) - -#define _hypercall4(type, name, a1, a2, a3, a4) \ -({ \ - type __res; \ - long __ign1, __ign2, __ign3; \ - register long __arg4 asm("r10") = (long)(a4); \ - asm volatile ( \ - HYPERCALL_STR(name) \ - : "=a" (__res), "=D" (__ign1), "=S" (__ign2), \ - "=d" (__ign3), "+r" (__arg4) \ - : "1" ((long)(a1)), "2" ((long)(a2)), \ - "3" ((long)(a3)) \ - : "memory" ); \ - __res; \ -}) - -#define _hypercall5(type, name, a1, a2, a3, a4, a5) \ -({ \ - type __res; \ - long __ign1, __ign2, __ign3; \ - register long __arg4 asm("r10") = (long)(a4); \ - register long __arg5 asm("r8") = (long)(a5); \ - asm volatile ( \ - HYPERCALL_STR(name) \ - : "=a" (__res), "=D" (__ign1), "=S" (__ign2), \ - "=d" (__ign3), "+r" (__arg4), "+r" (__arg5) \ - : "1" ((long)(a1)), "2" ((long)(a2)), \ - "3" ((long)(a3)) \ - : "memory" ); \ - __res; \ -}) - -#define _hypercall(type, op, a1, a2, a3, a4, a5) \ -({ \ - type __res; \ - register typeof((a1)+0) __arg1 asm("rdi") = (a1); \ - register typeof((a2)+0) __arg2 asm("rsi") = (a2); \ - register typeof((a3)+0) __arg3 asm("rdx") = (a3); \ - register typeof((a4)+0) __arg4 asm("r10") = (a4); \ - register typeof((a5)+0) __arg5 asm("r8") = (a5); \ - asm volatile ( \ - "call *%6" \ - : "=a" (__res), "+r" (__arg1), "+r" (__arg2), \ - "+r" (__arg3), "+r" (__arg4), "+r" (__arg5) \ - : "0" (hypercall_page + (op) * 32) \ - : "memory" ); \ - __res; \ -}) - -static inline int __must_check -HYPERVISOR_set_trap_table( - const trap_info_t *table) -{ - return _hypercall1(int, set_trap_table, table); -} - -static inline int __must_check -HYPERVISOR_mmu_update( - mmu_update_t *req, unsigned int count, unsigned int *success_count, - domid_t domid) -{ - if (arch_use_lazy_mmu_mode()) - return xen_multi_mmu_update(req, count, success_count, domid); - return _hypercall4(int, mmu_update, req, count, success_count, domid); -} - -static inline int __must_check -HYPERVISOR_mmuext_op( - struct mmuext_op *op, unsigned int count, unsigned int *success_count, - domid_t domid) -{ - if (arch_use_lazy_mmu_mode()) - return xen_multi_mmuext_op(op, count, success_count, domid); - return _hypercall4(int, mmuext_op, op, count, success_count, domid); -} - -static inline int __must_check -HYPERVISOR_set_gdt( - unsigned long *frame_list, unsigned int entries) -{ - return _hypercall2(int, set_gdt, frame_list, entries); -} - -static inline int __must_check -HYPERVISOR_stack_switch( - unsigned long ss, unsigned long esp) -{ - return _hypercall2(int, stack_switch, ss, esp); -} +#define HYPERCALL_arg1 "rdi" +#define HYPERCALL_arg2 "rsi" +#define HYPERCALL_arg3 "rdx" +#define HYPERCALL_arg4 "r10" +#define HYPERCALL_arg5 "r8" +#if CONFIG_XEN_COMPAT <= 0x030002 static inline int __must_check HYPERVISOR_set_callbacks( unsigned long event_address, unsigned long failsafe_address, @@ -201,27 +13,7 @@ HYPERVISOR_set_callbacks( return _hypercall3(int, set_callbacks, event_address, failsafe_address, syscall_address); } - -static inline int -HYPERVISOR_fpu_taskswitch( - int set) -{ - return _hypercall1(int, fpu_taskswitch, set); -} - -static inline int __must_check -HYPERVISOR_sched_op_compat( - int cmd, unsigned long arg) -{ - return _hypercall2(int, sched_op_compat, cmd, arg); -} - -static inline int __must_check -HYPERVISOR_sched_op( - int cmd, void *arg) -{ - return _hypercall2(int, sched_op, cmd, arg); -} +#endif static inline long __must_check HYPERVISOR_set_timer_op( @@ -231,34 +23,6 @@ HYPERVISOR_set_timer_op( } static inline int __must_check -HYPERVISOR_platform_op( - struct xen_platform_op *platform_op) -{ - platform_op->interface_version = XENPF_INTERFACE_VERSION; - return _hypercall1(int, platform_op, platform_op); -} -static inline int __must_check -HYPERVISOR_mca( - struct xen_mc *mc_op) -{ - mc_op->interface_version = XEN_MCA_INTERFACE_VERSION; - return _hypercall1(int, mca, mc_op); -} -static inline int __must_check -HYPERVISOR_set_debugreg( - unsigned int reg, unsigned long value) -{ - return _hypercall2(int, set_debugreg, reg, value); -} - -static inline unsigned long __must_check -HYPERVISOR_get_debugreg( - unsigned int reg) -{ - return _hypercall1(unsigned long, get_debugreg, reg); -} - -static inline int __must_check HYPERVISOR_update_descriptor( unsigned long ma, unsigned long word) { @@ -266,22 +30,6 @@ HYPERVISOR_update_descriptor( } static inline int __must_check -HYPERVISOR_memory_op( - unsigned int cmd, void *arg) -{ - if (arch_use_lazy_mmu_mode()) - xen_multicall_flush(false); - return _hypercall2(int, memory_op, cmd, arg); -} - -static inline int __must_check -HYPERVISOR_multicall( - multicall_entry_t *call_list, unsigned int nr_calls) -{ - return _hypercall2(int, multicall, call_list, nr_calls); -} - -static inline int __must_check HYPERVISOR_update_va_mapping( unsigned long va, pte_t new_val, unsigned long flags) { @@ -291,67 +39,6 @@ HYPERVISOR_update_va_mapping( } static inline int __must_check -HYPERVISOR_event_channel_op( - int cmd, void *arg) -{ - int rc = _hypercall2(int, event_channel_op, cmd, arg); - -#if CONFIG_XEN_COMPAT <= 0x030002 - if (unlikely(rc == -ENOSYS)) { - struct evtchn_op op; - op.cmd = cmd; - memcpy(&op.u, arg, sizeof(op.u)); - rc = _hypercall1(int, event_channel_op_compat, &op); - memcpy(arg, &op.u, sizeof(op.u)); - } -#endif - - return rc; -} - -static inline int __must_check -HYPERVISOR_xen_version( - int cmd, void *arg) -{ - return _hypercall2(int, xen_version, cmd, arg); -} - -static inline int __must_check -HYPERVISOR_console_io( - int cmd, unsigned int count, char *str) -{ - return _hypercall3(int, console_io, cmd, count, str); -} - -static inline int __must_check -HYPERVISOR_physdev_op( - int cmd, void *arg) -{ - int rc = _hypercall2(int, physdev_op, cmd, arg); - -#if CONFIG_XEN_COMPAT <= 0x030002 - if (unlikely(rc == -ENOSYS)) { - struct physdev_op op; - op.cmd = cmd; - memcpy(&op.u, arg, sizeof(op.u)); - rc = _hypercall1(int, physdev_op_compat, &op); - memcpy(arg, &op.u, sizeof(op.u)); - } -#endif - - return rc; -} - -static inline int __must_check -HYPERVISOR_grant_table_op( - unsigned int cmd, void *uop, unsigned int count) -{ - if (arch_use_lazy_mmu_mode()) - xen_multicall_flush(false); - return _hypercall3(int, grant_table_op, cmd, uop, count); -} - -static inline int __must_check HYPERVISOR_update_va_mapping_otherdomain( unsigned long va, pte_t new_val, unsigned long flags, domid_t domid) { @@ -360,90 +47,8 @@ HYPERVISOR_update_va_mapping_otherdomain } static inline int __must_check -HYPERVISOR_vm_assist( - unsigned int cmd, unsigned int type) -{ - return _hypercall2(int, vm_assist, cmd, type); -} - -static inline int __must_check -HYPERVISOR_vcpu_op( - int cmd, unsigned int vcpuid, void *extra_args) -{ - return _hypercall3(int, vcpu_op, cmd, vcpuid, extra_args); -} - -static inline int __must_check HYPERVISOR_set_segment_base( int reg, unsigned long value) { return _hypercall2(int, set_segment_base, reg, value); } - -static inline int __must_check -HYPERVISOR_suspend( - unsigned long srec) -{ - struct sched_shutdown sched_shutdown = { - .reason = SHUTDOWN_suspend - }; - - int rc = _hypercall3(int, sched_op, SCHEDOP_shutdown, - &sched_shutdown, srec); - -#if CONFIG_XEN_COMPAT <= 0x030002 - if (rc == -ENOSYS) - rc = _hypercall3(int, sched_op_compat, SCHEDOP_shutdown, - SHUTDOWN_suspend, srec); -#endif - - return rc; -} - -#if CONFIG_XEN_COMPAT <= 0x030002 -static inline int -HYPERVISOR_nmi_op( - unsigned long op, void *arg) -{ - return _hypercall2(int, nmi_op, op, arg); -} -#endif - -#ifndef CONFIG_XEN -static inline unsigned long __must_check -HYPERVISOR_hvm_op( - int op, void *arg) -{ - return _hypercall2(unsigned long, hvm_op, op, arg); -} -#endif - -static inline int __must_check -HYPERVISOR_callback_op( - int cmd, const void *arg) -{ - return _hypercall2(int, callback_op, cmd, arg); -} - -static inline int __must_check -HYPERVISOR_xenoprof_op( - int op, void *arg) -{ - return _hypercall2(int, xenoprof_op, op, arg); -} - -static inline int __must_check -HYPERVISOR_kexec_op( - unsigned long op, void *args) -{ - return _hypercall2(int, kexec_op, op, args); -} - -static inline int __must_check -HYPERVISOR_tmem_op( - struct tmem_op *op) -{ - return _hypercall1(int, tmem_op, op); -} - -#endif /* __HYPERCALL_H__ */ --- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/hypervisor.h 2010-03-24 15:09:15.000000000 +0100 +++ head-2010-05-25/arch/x86/include/mach-xen/asm/hypervisor.h 2010-03-24 15:10:29.000000000 +0100 @@ -44,6 +44,7 @@ #include #include #include +#include #include #include #include @@ -200,7 +201,6 @@ static inline void xen_multicall_flush(b extern char hypercall_page[PAGE_SIZE]; #else extern char *hypercall_stubs; -#define hypercall_page hypercall_stubs #define is_running_on_xen() (!!hypercall_stubs) #endif --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2010-05-25/arch/x86/include/mach-xen/asm/io.h 2010-03-24 15:10:29.000000000 +0100 @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "io_32.h" +#else +# include "io_64.h" +#endif --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2010-05-25/arch/x86/include/mach-xen/asm/irqflags.h 2010-03-24 15:10:29.000000000 +0100 @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "irqflags_32.h" +#else +# include "irqflags_64.h" +#endif --- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/irqflags_32.h 2010-03-24 15:09:15.000000000 +0100 +++ head-2010-05-25/arch/x86/include/mach-xen/asm/irqflags_32.h 2010-03-24 15:10:29.000000000 +0100 @@ -150,6 +150,23 @@ static inline int raw_irqs_disabled_flag \ raw_irqs_disabled_flags(flags); \ }) + +/* + * makes the traced hardirq state match with the machine state + * + * should be a rarely used function, only in places where its + * otherwise impossible to know the irq state, like in traps. + */ +static inline void trace_hardirqs_fixup_flags(unsigned long flags) +{ + if (raw_irqs_disabled_flags(flags)) + trace_hardirqs_off(); + else + trace_hardirqs_on(); +} + +#define trace_hardirqs_fixup() \ + trace_hardirqs_fixup_flags(__raw_local_save_flags()) #endif /* __ASSEMBLY__ */ /* @@ -181,4 +198,17 @@ static inline int raw_irqs_disabled_flag # define TRACE_IRQS_OFF #endif +#ifdef CONFIG_DEBUG_LOCK_ALLOC +# define LOCKDEP_SYS_EXIT \ + pushl %eax; \ + pushl %ecx; \ + pushl %edx; \ + call lockdep_sys_exit; \ + popl %edx; \ + popl %ecx; \ + popl %eax; +#else +# define LOCKDEP_SYS_EXIT +#endif + #endif --- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/irqflags_64.h 2010-03-24 15:09:15.000000000 +0100 +++ head-2010-05-25/arch/x86/include/mach-xen/asm/irqflags_64.h 2010-03-24 15:10:29.000000000 +0100 @@ -116,6 +116,22 @@ static inline int raw_irqs_disabled_flag }) /* + * makes the traced hardirq state match with the machine state + * + * should be a rarely used function, only in places where its + * otherwise impossible to know the irq state, like in traps. + */ +static inline void trace_hardirqs_fixup_flags(unsigned long flags) +{ + if (raw_irqs_disabled_flags(flags)) + trace_hardirqs_off(); + else + trace_hardirqs_on(); +} + +#define trace_hardirqs_fixup() \ + trace_hardirqs_fixup_flags(__raw_local_save_flags()) +/* * Used in the idle loop; sti takes one instruction cycle * to complete: */ @@ -143,6 +159,20 @@ static inline void halt(void) # define TRACE_IRQS_ON # define TRACE_IRQS_OFF # endif +# ifdef CONFIG_DEBUG_LOCK_ALLOC +# define LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk +# define LOCKDEP_SYS_EXIT_IRQ \ + TRACE_IRQS_ON; \ + sti; \ + SAVE_REST; \ + LOCKDEP_SYS_EXIT; \ + RESTORE_REST; \ + cli; \ + TRACE_IRQS_OFF; +# else +# define LOCKDEP_SYS_EXIT +# define LOCKDEP_SYS_EXIT_IRQ +# endif #endif #endif --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2010-05-25/arch/x86/include/mach-xen/asm/maddr.h 2010-03-24 15:10:29.000000000 +0100 @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "maddr_32.h" +#else +# include "maddr_64.h" +#endif --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2010-05-25/arch/x86/include/mach-xen/asm/mmu_context.h 2010-03-24 15:10:29.000000000 +0100 @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "mmu_context_32.h" +#else +# include "mmu_context_64.h" +#endif --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2010-05-25/arch/x86/include/mach-xen/asm/pci.h 2010-03-24 15:10:29.000000000 +0100 @@ -0,0 +1,100 @@ +#ifndef __x86_PCI_H +#define __x86_PCI_H + +#include /* for struct page */ +#include +#include +#include +#include +#include + + +#ifdef __KERNEL__ + +struct pci_sysdata { + int domain; /* PCI domain */ + int node; /* NUMA node */ +#ifdef CONFIG_X86_64 + void* iommu; /* IOMMU private data */ +#endif +#ifdef CONFIG_XEN_PCIDEV_FRONTEND + struct pcifront_device *pdev; +#endif +}; + +/* scan a bus after allocating a pci_sysdata for it */ +extern struct pci_bus *pci_scan_bus_with_sysdata(int busno); + +static inline int pci_domain_nr(struct pci_bus *bus) +{ + struct pci_sysdata *sd = bus->sysdata; + return sd->domain; +} + +static inline int pci_proc_domain(struct pci_bus *bus) +{ + return pci_domain_nr(bus); +} + + +/* Can be used to override the logic in pci_scan_bus for skipping + already-configured bus numbers - to be used for buggy BIOSes + or architectures with incomplete PCI setup by the loader */ + +#ifdef CONFIG_PCI +extern unsigned int pcibios_assign_all_busses(void); +#else +#define pcibios_assign_all_busses() 0 +#endif + +#include +#define pcibios_scan_all_fns(a, b) (!is_initial_xendomain()) + +extern unsigned long pci_mem_start; +#define PCIBIOS_MIN_IO 0x1000 +#define PCIBIOS_MIN_MEM (pci_mem_start) + +#define PCIBIOS_MIN_CARDBUS_IO 0x4000 + +void pcibios_config_init(void); +struct pci_bus * pcibios_scan_root(int bus); + +void pcibios_set_master(struct pci_dev *dev); +void pcibios_penalize_isa_irq(int irq, int active); +struct irq_routing_table *pcibios_get_irq_routing_table(void); +int pcibios_set_irq_routing(struct pci_dev *dev, int pin, int irq); + + +#define HAVE_PCI_MMAP +extern int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, + enum pci_mmap_state mmap_state, int write_combine); + + +#ifdef CONFIG_PCI +static inline void pci_dma_burst_advice(struct pci_dev *pdev, + enum pci_dma_burst_strategy *strat, + unsigned long *strategy_parameter) +{ + *strat = PCI_DMA_BURST_INFINITY; + *strategy_parameter = ~0UL; +} +#endif + + +#endif /* __KERNEL__ */ + +#ifdef CONFIG_X86_32 +# include "pci_32.h" +#else +# include "pci_64.h" +#endif + +/* implement the pci_ DMA API in terms of the generic device dma_ one */ +#include + +/* generic pci stuff */ +#include + + + +#endif --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2010-05-25/arch/x86/include/mach-xen/asm/pgalloc.h 2010-03-24 15:10:29.000000000 +0100 @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "pgalloc_32.h" +#else +# include "pgalloc_64.h" +#endif --- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/pgalloc_64.h 2010-03-24 15:09:23.000000000 +0100 +++ head-2010-05-25/arch/x86/include/mach-xen/asm/pgalloc_64.h 2010-03-24 15:10:29.000000000 +0100 @@ -112,6 +112,8 @@ static inline void pgd_list_del(pgd_t *p spin_unlock(&pgd_lock); } +extern void pgd_test_and_unpin(pgd_t *); + static inline pgd_t *pgd_alloc(struct mm_struct *mm) { /* @@ -122,6 +124,7 @@ static inline pgd_t *pgd_alloc(struct mm if (!pgd) return NULL; pgd_list_add(pgd); + pgd_test_and_unpin(pgd); /* * Copy kernel pointers in from init. * Could keep a freelist or slab cache of those because the kernel @@ -144,27 +147,7 @@ static inline pgd_t *pgd_alloc(struct mm static inline void pgd_free(pgd_t *pgd) { - pte_t *ptep = virt_to_ptep(pgd); - - if (!pte_write(*ptep)) { - xen_pgd_unpin(__pa(pgd)); - BUG_ON(HYPERVISOR_update_va_mapping( - (unsigned long)pgd, - pfn_pte(virt_to_phys(pgd)>>PAGE_SHIFT, PAGE_KERNEL), - 0)); - } - - ptep = virt_to_ptep(__user_pgd(pgd)); - - if (!pte_write(*ptep)) { - xen_pgd_unpin(__pa(__user_pgd(pgd))); - BUG_ON(HYPERVISOR_update_va_mapping( - (unsigned long)__user_pgd(pgd), - pfn_pte(virt_to_phys(__user_pgd(pgd))>>PAGE_SHIFT, - PAGE_KERNEL), - 0)); - } - + pgd_test_and_unpin(pgd); pgd_list_del(pgd); free_pages((unsigned long)pgd, 1); } --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2010-05-25/arch/x86/include/mach-xen/asm/pgtable.h 2010-03-24 15:10:29.000000000 +0100 @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "pgtable_32.h" +#else +# include "pgtable_64.h" +#endif --- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/pgtable_32.h 2010-03-24 15:09:23.000000000 +0100 +++ head-2010-05-25/arch/x86/include/mach-xen/asm/pgtable_32.h 2010-03-24 15:10:29.000000000 +0100 @@ -17,10 +17,7 @@ #include #include -#ifndef _I386_BITOPS_H -#include -#endif - +#include #include #include #include @@ -40,7 +37,7 @@ extern spinlock_t pgd_lock; extern struct page *pgd_list; void check_pgt_cache(void); -void pmd_ctor(void *, struct kmem_cache *, unsigned long); +void pmd_ctor(struct kmem_cache *, void *); void pgtable_cache_init(void); void paging_init(void); --- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/pgtable_64.h 2010-03-24 15:09:23.000000000 +0100 +++ head-2010-05-25/arch/x86/include/mach-xen/asm/pgtable_64.h 2010-03-24 15:10:29.000000000 +0100 @@ -9,7 +9,7 @@ * the x86-64 page table tree. */ #include -#include +#include #include #include #include @@ -139,6 +139,7 @@ static inline void pgd_clear (pgd_t * pg #define MAXMEM _AC(0x6fffffffff, UL) #define VMALLOC_START _AC(0xffffc20000000000, UL) #define VMALLOC_END _AC(0xffffe1ffffffffff, UL) +#define VMEMMAP_START _AC(0xffffe20000000000, UL) #define MODULES_VADDR _AC(0xffffffff88000000, UL) #define MODULES_END _AC(0xffffffffff000000, UL) #define MODULES_LEN (MODULES_END - MODULES_VADDR) --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2010-05-25/arch/x86/include/mach-xen/asm/processor.h 2010-03-24 15:10:29.000000000 +0100 @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "processor_32.h" +#else +# include "processor_64.h" +#endif --- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/processor_32.h 2010-03-24 15:09:23.000000000 +0100 +++ head-2010-05-25/arch/x86/include/mach-xen/asm/processor_32.h 2010-03-24 15:10:29.000000000 +0100 @@ -80,6 +80,7 @@ struct cpuinfo_x86 { unsigned char booted_cores; /* number of cores as seen by OS */ __u8 phys_proc_id; /* Physical processor id. */ __u8 cpu_core_id; /* Core id */ + __u8 cpu_index; /* index into per_cpu list */ #endif } __attribute__((__aligned__(SMP_CACHE_BYTES))); @@ -106,14 +107,19 @@ DECLARE_PER_CPU(struct tss_struct, init_ #endif #ifdef CONFIG_SMP -extern struct cpuinfo_x86 cpu_data[]; -#define current_cpu_data cpu_data[smp_processor_id()] +DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info); +#define cpu_data(cpu) per_cpu(cpu_info, cpu) +#define current_cpu_data cpu_data(smp_processor_id()) #else -#define cpu_data (&boot_cpu_data) -#define current_cpu_data boot_cpu_data +#define cpu_data(cpu) boot_cpu_data +#define current_cpu_data boot_cpu_data #endif -extern int cpu_llc_id[NR_CPUS]; +/* + * the following now lives in the per cpu area: + * extern int cpu_llc_id[NR_CPUS]; + */ +DECLARE_PER_CPU(u8, cpu_llc_id); extern char ignore_fpu_irq; void __init cpu_detect(struct cpuinfo_x86 *c); @@ -560,7 +566,9 @@ static inline void xen_set_iopl_mask(uns * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx * resulting in stale register contents being returned. */ -static inline void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) +static inline void cpuid(unsigned int op, + unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) { *eax = op; *ecx = 0; @@ -568,8 +576,9 @@ static inline void cpuid(unsigned int op } /* Some CPUID calls want 'count' to be placed in ecx */ -static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, - int *edx) +static inline void cpuid_count(unsigned int op, int count, + unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) { *eax = op; *ecx = count; @@ -639,6 +648,17 @@ static inline unsigned int cpuid_edx(uns #define K7_NOP7 ".byte 0x8D,0x04,0x05,0,0,0,0\n" #define K7_NOP8 K7_NOP7 ASM_NOP1 +/* P6 nops */ +/* uses eax dependencies (Intel-recommended choice) */ +#define P6_NOP1 GENERIC_NOP1 +#define P6_NOP2 ".byte 0x66,0x90\n" +#define P6_NOP3 ".byte 0x0f,0x1f,0x00\n" +#define P6_NOP4 ".byte 0x0f,0x1f,0x40,0\n" +#define P6_NOP5 ".byte 0x0f,0x1f,0x44,0x00,0\n" +#define P6_NOP6 ".byte 0x66,0x0f,0x1f,0x44,0x00,0\n" +#define P6_NOP7 ".byte 0x0f,0x1f,0x80,0,0,0,0\n" +#define P6_NOP8 ".byte 0x0f,0x1f,0x84,0x00,0,0,0,0\n" + #ifdef CONFIG_MK8 #define ASM_NOP1 K8_NOP1 #define ASM_NOP2 K8_NOP2 @@ -657,6 +677,17 @@ static inline unsigned int cpuid_edx(uns #define ASM_NOP6 K7_NOP6 #define ASM_NOP7 K7_NOP7 #define ASM_NOP8 K7_NOP8 +#elif defined(CONFIG_M686) || defined(CONFIG_MPENTIUMII) || \ + defined(CONFIG_MPENTIUMIII) || defined(CONFIG_MPENTIUMM) || \ + defined(CONFIG_MCORE2) || defined(CONFIG_PENTIUM4) +#define ASM_NOP1 P6_NOP1 +#define ASM_NOP2 P6_NOP2 +#define ASM_NOP3 P6_NOP3 +#define ASM_NOP4 P6_NOP4 +#define ASM_NOP5 P6_NOP5 +#define ASM_NOP6 P6_NOP6 +#define ASM_NOP7 P6_NOP7 +#define ASM_NOP8 P6_NOP8 #else #define ASM_NOP1 GENERIC_NOP1 #define ASM_NOP2 GENERIC_NOP2 --- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/processor_64.h 2010-03-24 15:09:23.000000000 +0100 +++ head-2010-05-25/arch/x86/include/mach-xen/asm/processor_64.h 2010-03-24 15:10:29.000000000 +0100 @@ -74,6 +74,7 @@ struct cpuinfo_x86 { __u8 booted_cores; /* number of cores as seen by OS */ __u8 phys_proc_id; /* Physical Processor id. */ __u8 cpu_core_id; /* Core id. */ + __u8 cpu_index; /* index into per_cpu list */ #endif } ____cacheline_aligned; @@ -88,11 +89,12 @@ struct cpuinfo_x86 { #define X86_VENDOR_UNKNOWN 0xff #ifdef CONFIG_SMP -extern struct cpuinfo_x86 cpu_data[]; -#define current_cpu_data cpu_data[smp_processor_id()] +DECLARE_PER_CPU(struct cpuinfo_x86, cpu_info); +#define cpu_data(cpu) per_cpu(cpu_info, cpu) +#define current_cpu_data cpu_data(smp_processor_id()) #else -#define cpu_data (&boot_cpu_data) -#define current_cpu_data boot_cpu_data +#define cpu_data(cpu) boot_cpu_data +#define current_cpu_data boot_cpu_data #endif extern char ignore_irq13; @@ -343,6 +345,16 @@ struct extended_sigtable { }; +#if defined(CONFIG_MPSC) || defined(CONFIG_MCORE2) +#define ASM_NOP1 P6_NOP1 +#define ASM_NOP2 P6_NOP2 +#define ASM_NOP3 P6_NOP3 +#define ASM_NOP4 P6_NOP4 +#define ASM_NOP5 P6_NOP5 +#define ASM_NOP6 P6_NOP6 +#define ASM_NOP7 P6_NOP7 +#define ASM_NOP8 P6_NOP8 +#else #define ASM_NOP1 K8_NOP1 #define ASM_NOP2 K8_NOP2 #define ASM_NOP3 K8_NOP3 @@ -351,6 +363,7 @@ struct extended_sigtable { #define ASM_NOP6 K8_NOP6 #define ASM_NOP7 K8_NOP7 #define ASM_NOP8 K8_NOP8 +#endif /* Opteron nops */ #define K8_NOP1 ".byte 0x90\n" @@ -362,6 +375,17 @@ struct extended_sigtable { #define K8_NOP7 K8_NOP4 K8_NOP3 #define K8_NOP8 K8_NOP4 K8_NOP4 +/* P6 nops */ +/* uses eax dependencies (Intel-recommended choice) */ +#define P6_NOP1 ".byte 0x90\n" +#define P6_NOP2 ".byte 0x66,0x90\n" +#define P6_NOP3 ".byte 0x0f,0x1f,0x00\n" +#define P6_NOP4 ".byte 0x0f,0x1f,0x40,0\n" +#define P6_NOP5 ".byte 0x0f,0x1f,0x44,0x00,0\n" +#define P6_NOP6 ".byte 0x66,0x0f,0x1f,0x44,0x00,0\n" +#define P6_NOP7 ".byte 0x0f,0x1f,0x80,0,0,0,0\n" +#define P6_NOP8 ".byte 0x0f,0x1f,0x84,0x00,0,0,0,0\n" + #define ASM_NOP_MAX 8 /* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */ @@ -377,12 +401,6 @@ static inline void sync_core(void) asm volatile("cpuid" : "=a" (tmp) : "0" (1) : "ebx","ecx","edx","memory"); } -#define ARCH_HAS_PREFETCH -static inline void prefetch(void *x) -{ - asm volatile("prefetcht0 (%0)" :: "r" (x)); -} - #define ARCH_HAS_PREFETCHW 1 static inline void prefetchw(void *x) { @@ -398,11 +416,6 @@ static inline void prefetchw(void *x) #define cpu_relax() rep_nop() -static inline void serialize_cpu(void) -{ - __asm__ __volatile__ ("cpuid" : : : "ax", "bx", "cx", "dx"); -} - static inline void __monitor(const void *eax, unsigned long ecx, unsigned long edx) { --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2010-05-25/arch/x86/include/mach-xen/asm/smp.h 2010-03-24 15:10:29.000000000 +0100 @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "smp_32.h" +#else +# include "smp_64.h" +#endif --- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/smp_32.h 2010-03-24 15:09:15.000000000 +0100 +++ head-2010-05-25/arch/x86/include/mach-xen/asm/smp_32.h 2010-03-24 15:10:29.000000000 +0100 @@ -11,7 +11,7 @@ #endif #if defined(CONFIG_X86_LOCAL_APIC) && !defined(__ASSEMBLY__) -#include +#include #include #include #ifdef CONFIG_X86_IO_APIC @@ -30,8 +30,8 @@ extern void smp_alloc_memory(void); extern int pic_mode; extern int smp_num_siblings; -extern cpumask_t cpu_sibling_map[]; -extern cpumask_t cpu_core_map[]; +DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); +DECLARE_PER_CPU(cpumask_t, cpu_core_map); extern void (*mtrr_hook) (void); extern void zap_low_mappings (void); @@ -39,9 +39,11 @@ extern void lock_ipi_call_lock(void); extern void unlock_ipi_call_lock(void); #define MAX_APICID 256 -extern u8 x86_cpu_to_apicid[]; +extern u8 __initdata x86_cpu_to_apicid_init[]; +extern void *x86_cpu_to_apicid_ptr; +DECLARE_PER_CPU(u8, x86_cpu_to_apicid); -#define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu] +#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) #ifdef CONFIG_HOTPLUG_CPU extern void cpu_exit_clear(void); --- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/smp_64.h 2010-03-24 15:09:15.000000000 +0100 +++ head-2010-05-25/arch/x86/include/mach-xen/asm/smp_64.h 2010-03-24 15:10:29.000000000 +0100 @@ -40,10 +40,19 @@ extern void lock_ipi_call_lock(void); extern void unlock_ipi_call_lock(void); extern int smp_num_siblings; extern void smp_send_reschedule(int cpu); +extern int smp_call_function_mask(cpumask_t mask, void (*func)(void *), + void *info, int wait); -extern cpumask_t cpu_sibling_map[NR_CPUS]; -extern cpumask_t cpu_core_map[NR_CPUS]; -extern u8 cpu_llc_id[NR_CPUS]; +/* + * cpu_sibling_map and cpu_core_map now live + * in the per cpu area + * + * extern cpumask_t cpu_sibling_map[NR_CPUS]; + * extern cpumask_t cpu_core_map[NR_CPUS]; + */ +DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); +DECLARE_PER_CPU(cpumask_t, cpu_core_map); +DECLARE_PER_CPU(u8, cpu_llc_id); #define SMP_TRAMPOLINE_BASE 0x6000 @@ -70,6 +79,8 @@ extern unsigned __cpuinitdata disabled_c #endif /* CONFIG_SMP */ +#define safe_smp_processor_id() smp_processor_id() + #ifdef CONFIG_X86_LOCAL_APIC static inline int hard_smp_processor_id(void) { @@ -82,8 +93,9 @@ static inline int hard_smp_processor_id( * Some lowlevel functions might want to know about * the real APIC ID <-> CPU # mapping. */ -extern u8 x86_cpu_to_apicid[NR_CPUS]; /* physical ID */ -extern u8 x86_cpu_to_log_apicid[NR_CPUS]; +extern u8 __initdata x86_cpu_to_apicid_init[]; +extern void *x86_cpu_to_apicid_ptr; +DECLARE_PER_CPU(u8, x86_cpu_to_apicid); /* physical ID */ extern u8 bios_cpu_apicid[]; #ifdef CONFIG_X86_LOCAL_APIC @@ -118,8 +130,9 @@ static __inline int logical_smp_processo #endif #ifdef CONFIG_SMP -#define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu] +#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) #else +extern unsigned int boot_cpu_id; #define cpu_physical_id(cpu) boot_cpu_id #endif /* !CONFIG_SMP */ #endif --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2010-05-25/arch/x86/include/mach-xen/asm/swiotlb.h 2010-03-24 15:10:29.000000000 +0100 @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "swiotlb_32.h" +#else +# include_next +#endif --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2010-05-25/arch/x86/include/mach-xen/asm/system.h 2010-03-24 15:10:29.000000000 +0100 @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "system_32.h" +#else +# include "system_64.h" +#endif --- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/system_32.h 2010-03-24 15:09:23.000000000 +0100 +++ head-2010-05-25/arch/x86/include/mach-xen/asm/system_32.h 2010-03-24 15:10:29.000000000 +0100 @@ -9,6 +9,7 @@ #include #ifdef __KERNEL__ +#define AT_VECTOR_SIZE_ARCH 2 /* entries in ARCH_DLINFO */ struct task_struct; /* one of the stranger aspects of C forward declarations.. */ extern struct task_struct * FASTCALL(__switch_to(struct task_struct *prev, struct task_struct *next)); @@ -138,7 +139,7 @@ static inline unsigned long xen_read_cr4 { unsigned long val; /* This could fault if %cr4 does not exist */ - asm("1: movl %%cr4, %0 \n" + asm volatile("1: movl %%cr4, %0 \n" "2: \n" ".section __ex_table,\"a\" \n" ".long 1b,2b \n" @@ -157,6 +158,11 @@ static inline void xen_wbinvd(void) asm volatile("wbinvd": : :"memory"); } +static inline void clflush(volatile void *__p) +{ + asm volatile("clflush %0" : "+m" (*(char __force *)__p)); +} + #define read_cr0() (xen_read_cr0()) #define write_cr0(x) (xen_write_cr0(x)) #define read_cr2() (xen_read_cr2()) @@ -207,6 +213,7 @@ static inline unsigned long get_limit(un #define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) #define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) +#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) /** * read_barrier_depends - Flush all pending reads that subsequents reads @@ -262,18 +269,18 @@ static inline unsigned long get_limit(un #define read_barrier_depends() do { } while(0) +#ifdef CONFIG_SMP +#define smp_mb() mb() +#ifdef CONFIG_X86_PPRO_FENCE +# define smp_rmb() rmb() +#else +# define smp_rmb() barrier() +#endif #ifdef CONFIG_X86_OOSTORE -/* Actually there are no OOO store capable CPUs for now that do SSE, - but make it already an possibility. */ -#define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) +# define smp_wmb() wmb() #else -#define wmb() __asm__ __volatile__ ("": : :"memory") +# define smp_wmb() barrier() #endif - -#ifdef CONFIG_SMP -#define smp_mb() mb() -#define smp_rmb() rmb() -#define smp_wmb() wmb() #define smp_read_barrier_depends() read_barrier_depends() #define set_mb(var, value) do { (void) xchg(&var, value); } while (0) #else @@ -300,5 +307,6 @@ extern unsigned long arch_align_stack(un extern void free_init_pages(char *what, unsigned long begin, unsigned long end); void default_idle(void); +void __show_registers(struct pt_regs *, int all); #endif --- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/system_64.h 2010-03-24 15:09:23.000000000 +0100 +++ head-2010-05-25/arch/x86/include/mach-xen/asm/system_64.h 2010-03-24 15:10:29.000000000 +0100 @@ -11,8 +11,12 @@ #ifdef __KERNEL__ -#define __STR(x) #x -#define STR(x) __STR(x) +/* entries in ARCH_DLINFO: */ +#ifdef CONFIG_IA32_EMULATION +# define AT_VECTOR_SIZE_ARCH 2 +#else +# define AT_VECTOR_SIZE_ARCH 1 +#endif #define __SAVE(reg,offset) "movq %%" #reg ",(14-" #offset ")*8(%%rsp)\n\t" #define __RESTORE(reg,offset) "movq (14-" #offset ")*8(%%rsp),%%" #reg "\n\t" @@ -92,7 +96,7 @@ static inline void write_cr0(unsigned lo #define read_cr3() ({ \ unsigned long __dummy; \ - asm("movq %%cr3,%0" : "=r" (__dummy)); \ + asm volatile("movq %%cr3,%0" : "=r" (__dummy)); \ machine_to_phys(__dummy); \ }) @@ -105,7 +109,7 @@ static inline void write_cr3(unsigned lo static inline unsigned long read_cr4(void) { unsigned long cr4; - asm("movq %%cr4,%0" : "=r" (cr4)); + asm volatile("movq %%cr4,%0" : "=r" (cr4)); return cr4; } @@ -131,12 +135,17 @@ static inline void write_cr8(unsigned lo #endif /* __KERNEL__ */ +static inline void clflush(volatile void *__p) +{ + asm volatile("clflush %0" : "+m" (*(char __force *)__p)); +} + #define nop() __asm__ __volatile__ ("nop") #ifdef CONFIG_SMP #define smp_mb() mb() -#define smp_rmb() rmb() -#define smp_wmb() wmb() +#define smp_rmb() barrier() +#define smp_wmb() barrier() #define smp_read_barrier_depends() do {} while(0) #else #define smp_mb() barrier() @@ -153,12 +162,8 @@ static inline void write_cr8(unsigned lo */ #define mb() asm volatile("mfence":::"memory") #define rmb() asm volatile("lfence":::"memory") - -#ifdef CONFIG_UNORDERED_IO #define wmb() asm volatile("sfence" ::: "memory") -#else -#define wmb() asm volatile("" ::: "memory") -#endif + #define read_barrier_depends() do {} while(0) #define set_mb(var, value) do { (void) xchg(&var, value); } while (0) --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2010-05-25/arch/x86/include/mach-xen/asm/tlbflush.h 2010-03-24 15:10:29.000000000 +0100 @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "tlbflush_32.h" +#else +# include "tlbflush_64.h" +#endif --- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/tlbflush_32.h 2010-03-24 15:09:23.000000000 +0100 +++ head-2010-05-25/arch/x86/include/mach-xen/asm/tlbflush_32.h 2010-03-24 15:10:29.000000000 +0100 @@ -23,7 +23,6 @@ * - flush_tlb_page(vma, vmaddr) flushes one page * - flush_tlb_range(vma, start, end) flushes a range of pages * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages - * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables * * ..but the i386 has somewhat limited tlb flushing capabilities, * and page-granular flushes are available only on i486 and up. @@ -97,10 +96,4 @@ static inline void flush_tlb_kernel_rang flush_tlb_all(); } -static inline void flush_tlb_pgtables(struct mm_struct *mm, - unsigned long start, unsigned long end) -{ - /* i386 does not keep any page table caches in TLB */ -} - #endif /* _I386_TLBFLUSH_H */ --- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/tlbflush_64.h 2010-03-24 15:09:23.000000000 +0100 +++ head-2010-05-25/arch/x86/include/mach-xen/asm/tlbflush_64.h 2010-03-24 15:10:29.000000000 +0100 @@ -28,7 +28,6 @@ * - flush_tlb_page(vma, vmaddr) flushes one page * - flush_tlb_range(vma, start, end) flushes a range of pages * - flush_tlb_kernel_range(start, end) flushes a range of kernel pages - * - flush_tlb_pgtables(mm, start, end) flushes a range of page tables * * x86-64 can only flush individual pages or full VMs. For a range flush * we always do the full VM. Might be worth trying if for a small @@ -95,12 +94,4 @@ static inline void flush_tlb_kernel_rang flush_tlb_all(); } -static inline void flush_tlb_pgtables(struct mm_struct *mm, - unsigned long start, unsigned long end) -{ - /* x86_64 does not keep any page table caches in a software TLB. - The CPUs do in their hardware TLBs, but they are handled - by the normal TLB flushing algorithms. */ -} - #endif /* _X8664_TLBFLUSH_H */ --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2010-05-25/arch/x86/include/mach-xen/asm/xor.h 2010-03-24 15:10:29.000000000 +0100 @@ -0,0 +1,5 @@ +#ifdef CONFIG_X86_32 +# include "../../xor_32.h" +#else +# include "xor_64.h" +#endif --- head-2010-05-25.orig/arch/x86/include/asm/mmu.h 2010-05-25 09:12:09.000000000 +0200 +++ head-2010-05-25/arch/x86/include/asm/mmu.h 2010-03-24 15:10:29.000000000 +0100 @@ -11,6 +11,9 @@ typedef struct { void *ldt; int size; +#ifdef CONFIG_XEN + unsigned has_foreign_mappings:1; +#endif struct mutex lock; void *vdso; } mm_context_t; --- head-2010-05-25.orig/include/linux/kexec.h 2010-03-24 14:53:41.000000000 +0100 +++ head-2010-05-25/include/linux/kexec.h 2010-03-24 15:10:29.000000000 +0100 @@ -205,8 +205,15 @@ extern struct kimage *kexec_crash_image; #define VMCOREINFO_BYTES (4096) #define VMCOREINFO_NOTE_NAME "VMCOREINFO" #define VMCOREINFO_NOTE_NAME_BYTES ALIGN(sizeof(VMCOREINFO_NOTE_NAME), 4) +#if !defined(CONFIG_XEN) || !defined(CONFIG_X86) #define VMCOREINFO_NOTE_SIZE (KEXEC_NOTE_HEAD_BYTES*2 + VMCOREINFO_BYTES \ + VMCOREINFO_NOTE_NAME_BYTES) +#else +#define VMCOREINFO_NOTE_SIZE ALIGN(KEXEC_NOTE_HEAD_BYTES*2 \ + + VMCOREINFO_BYTES \ + + VMCOREINFO_NOTE_NAME_BYTES, \ + PAGE_SIZE) +#endif /* Location of a reserved region to hold the crash kernel. */ --- head-2010-05-25.orig/include/linux/oprofile.h 2010-03-24 15:02:17.000000000 +0100 +++ head-2010-05-25/include/linux/oprofile.h 2010-03-24 15:10:29.000000000 +0100 @@ -120,6 +120,8 @@ void oprofile_add_ext_sample(unsigned lo * backtrace. */ void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event); +void oprofile_add_mode(int cpu_mode); + /* add a backtrace entry, to be called from the ->backtrace callback */ void oprofile_add_trace(unsigned long eip); --- head-2010-05-25.orig/include/linux/sysctl.h 2010-05-25 09:12:09.000000000 +0200 +++ head-2010-05-25/include/linux/sysctl.h 2010-03-24 15:10:29.000000000 +0100 @@ -59,6 +59,7 @@ enum CTL_BUS=8, /* Busses */ CTL_ABI=9, /* Binary emulation */ CTL_CPU=10, /* CPU stuff (speed scaling, etc) */ + CTL_XEN=123, /* Xen info and control */ CTL_ARLAN=254, /* arlan wireless driver */ CTL_S390DBF=5677, /* s390 debug */ CTL_SUNRPC=7249, /* sunrpc debug */ --- head-2010-05-25.orig/include/xen/pcifront.h 2007-06-18 08:38:13.000000000 +0200 +++ head-2010-05-25/include/xen/pcifront.h 2010-03-24 15:10:29.000000000 +0100 @@ -12,13 +12,11 @@ #ifndef __ia64__ +#include + struct pcifront_device; struct pci_bus; - -struct pcifront_sd { - int domain; - struct pcifront_device *pdev; -}; +#define pcifront_sd pci_sysdata static inline struct pcifront_device * pcifront_get_pdev(struct pcifront_sd *sd) @@ -34,18 +32,6 @@ static inline void pcifront_init_sd(stru sd->pdev = pdev; } -#if defined(CONFIG_PCI_DOMAINS) -static inline int pci_domain_nr(struct pci_bus *bus) -{ - struct pcifront_sd *sd = bus->sysdata; - return sd->domain; -} -static inline int pci_proc_domain(struct pci_bus *bus) -{ - return pci_domain_nr(bus); -} -#endif /* CONFIG_PCI_DOMAINS */ - static inline void pcifront_setup_root_resources(struct pci_bus *bus, struct pcifront_sd *sd) { --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ head-2010-05-25/include/xen/sysctl.h 2010-03-24 15:10:29.000000000 +0100 @@ -0,0 +1,11 @@ +#ifndef _XEN_SYSCTL_H +#define _XEN_SYSCTL_H + +/* CTL_XEN names: */ +enum +{ + CTL_XEN_INDEPENDENT_WALLCLOCK=1, + CTL_XEN_PERMITTED_CLOCK_JITTER=2, +}; + +#endif /* _XEN_SYSCTL_H */ --- head-2010-05-25.orig/include/xen/xenbus.h 2010-03-24 15:09:23.000000000 +0100 +++ head-2010-05-25/include/xen/xenbus.h 2010-03-24 15:10:29.000000000 +0100 @@ -107,7 +107,7 @@ struct xenbus_driver { int (*suspend)(struct xenbus_device *dev); int (*suspend_cancel)(struct xenbus_device *dev); int (*resume)(struct xenbus_device *dev); - int (*uevent)(struct xenbus_device *, char **, int, char *, int); + int (*uevent)(struct xenbus_device *, struct kobj_uevent_env *); struct device_driver driver; int (*read_otherend_details)(struct xenbus_device *dev); int (*is_ready)(struct xenbus_device *dev); --- head-2010-05-25.orig/kernel/kexec.c 2010-03-24 15:08:58.000000000 +0100 +++ head-2010-05-25/kernel/kexec.c 2010-05-25 09:22:21.000000000 +0200 @@ -53,7 +53,11 @@ note_buf_t __percpu *crash_notes; /* vmcoreinfo stuff */ static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES]; -u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4]; +u32 +#if defined(CONFIG_XEN) && defined(CONFIG_X86) +__attribute__((__section__(".bss.page_aligned"), __aligned__(PAGE_SIZE))) +#endif +vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4]; size_t vmcoreinfo_size; size_t vmcoreinfo_max_size = sizeof(vmcoreinfo_data); @@ -1272,6 +1276,7 @@ static int __init crash_notes_memory_ini module_init(crash_notes_memory_init) +#ifndef CONFIG_XEN /* * parsing the "crashkernel" commandline * @@ -1434,7 +1439,7 @@ int __init parse_crashkernel(char *cm return 0; } - +#endif void crash_save_vmcoreinfo(void) @@ -1491,7 +1496,18 @@ static int __init crash_save_vmcoreinfo_ VMCOREINFO_SYMBOL(init_uts_ns); VMCOREINFO_SYMBOL(node_online_map); +#ifndef CONFIG_X86_XEN VMCOREINFO_SYMBOL(swapper_pg_dir); +#else +/* + * Since for x86-32 Xen swapper_pg_dir is a pointer rather than an array, + * make the value stored consistent with native (i.e. the base address of + * the page directory). + */ +# define swapper_pg_dir *swapper_pg_dir + VMCOREINFO_SYMBOL(swapper_pg_dir); +# undef swapper_pg_dir +#endif VMCOREINFO_SYMBOL(_stext); VMCOREINFO_SYMBOL(vmlist); --- head-2010-05-25.orig/kernel/sysctl_binary.c 2010-05-25 09:12:09.000000000 +0200 +++ head-2010-05-25/kernel/sysctl_binary.c 2010-04-15 09:55:30.000000000 +0200 @@ -874,6 +874,14 @@ static const struct bin_table bin_bus_ta }; +#ifdef CONFIG_XEN +static struct trans_ctl_table trans_xen_table[] = { + { CTL_XEN_INDEPENDENT_WALLCLOCK, "independent_wallclock" }, + { CTL_XEN_PERMITTED_CLOCK_JITTER, "permitted_clock_jitter" }, + {} +}; +#endif + static const struct bin_table bin_s390dbf_table[] = { { CTL_INT, 5678 /* CTL_S390DBF_STOPPABLE */, "debug_stoppable" }, { CTL_INT, 5679 /* CTL_S390DBF_ACTIVE */, "debug_active" }, @@ -913,6 +921,9 @@ static const struct bin_table bin_root_t { CTL_DIR, CTL_BUS, "bus", bin_bus_table }, { CTL_DIR, CTL_ABI, "abi" }, /* CTL_CPU not used */ +#ifdef CONFIG_XEN + { CTL_XEN, "xen", trans_xen_table }, +#endif /* CTL_ARLAN "arlan" no longer used */ { CTL_DIR, CTL_S390DBF, "s390dbf", bin_s390dbf_table }, { CTL_DIR, CTL_SUNRPC, "sunrpc", bin_sunrpc_table }, --- head-2010-05-25.orig/kernel/sysctl_check.c 2010-05-25 09:12:09.000000000 +0200 +++ head-2010-05-25/kernel/sysctl_check.c 2010-03-24 15:10:29.000000000 +0100 @@ -4,6 +4,7 @@ #include #include #include +#include static int sysctl_depth(struct ctl_table *table) --- head-2010-05-25.orig/lib/swiotlb-xen.c 2010-03-24 15:09:15.000000000 +0100 +++ head-2010-05-25/lib/swiotlb-xen.c 2010-03-24 15:10:29.000000000 +0100 @@ -27,7 +27,7 @@ #include #include #include -#include +#include int swiotlb; EXPORT_SYMBOL(swiotlb); @@ -574,9 +574,10 @@ swiotlb_sync_single_for_device(struct de * same here. */ int -swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, int nelems, +swiotlb_map_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, int dir) { + struct scatterlist *sg; struct phys_addr buffer; dma_addr_t dev_addr; char *map; @@ -584,22 +585,22 @@ swiotlb_map_sg(struct device *hwdev, str BUG_ON(dir == DMA_NONE); - for (i = 0; i < nelems; i++, sg++) { - dev_addr = gnttab_dma_map_page(sg->page) + sg->offset; + for_each_sg(sgl, sg, nelems, i) { + dev_addr = gnttab_dma_map_page(sg_page(sg)) + sg->offset; - if (range_straddles_page_boundary(page_to_pseudophys(sg->page) + if (range_straddles_page_boundary(page_to_pseudophys(sg_page(sg)) + sg->offset, sg->length) || address_needs_mapping(hwdev, dev_addr)) { gnttab_dma_unmap_page(dev_addr); - buffer.page = sg->page; + buffer.page = sg_page(sg); buffer.offset = sg->offset; map = map_single(hwdev, buffer, sg->length, dir); if (!map) { /* Don't panic here, we expect map_sg users to do proper error handling. */ swiotlb_full(hwdev, sg->length, dir, 0); - swiotlb_unmap_sg(hwdev, sg - i, i, dir); - sg[0].dma_length = 0; + swiotlb_unmap_sg(hwdev, sgl, i, dir); + sgl[0].dma_length = 0; return 0; } sg->dma_address = virt_to_bus(map); @@ -615,19 +616,21 @@ swiotlb_map_sg(struct device *hwdev, str * concerning calls here are the same as for swiotlb_unmap_single() above. */ void -swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nelems, +swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sgl, int nelems, int dir) { + struct scatterlist *sg; int i; BUG_ON(dir == DMA_NONE); - for (i = 0; i < nelems; i++, sg++) + for_each_sg(sgl, sg, nelems, i) { if (in_swiotlb_aperture(sg->dma_address)) unmap_single(hwdev, bus_to_virt(sg->dma_address), sg->dma_length, dir); else gnttab_dma_unmap_page(sg->dma_address); + } } /* @@ -638,31 +641,35 @@ swiotlb_unmap_sg(struct device *hwdev, s * and usage. */ void -swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, +swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sgl, int nelems, int dir) { + struct scatterlist *sg; int i; BUG_ON(dir == DMA_NONE); - for (i = 0; i < nelems; i++, sg++) + for_each_sg(sgl, sg, nelems, i) { if (in_swiotlb_aperture(sg->dma_address)) sync_single(hwdev, bus_to_virt(sg->dma_address), sg->dma_length, dir); + } } void -swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, +swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sgl, int nelems, int dir) { + struct scatterlist *sg; int i; BUG_ON(dir == DMA_NONE); - for (i = 0; i < nelems; i++, sg++) + for_each_sg(sgl, sg, nelems, i) { if (in_swiotlb_aperture(sg->dma_address)) sync_single(hwdev, bus_to_virt(sg->dma_address), sg->dma_length, dir); + } } #ifdef CONFIG_HIGHMEM