From: Linux Kernel Mailing List Subject: Linux: 2.6.35 Patch-mainline: 2.6.35 This patch contains the differences between 2.6.34 and 2.6.35. Acked-by: Jeff Mahoney Automatically created from "patches.kernel.org/patch-2.6.35" by xen-port-patches.py --- head-2011-03-17.orig/arch/x86/include/asm/thread_info.h 2011-02-01 14:42:26.000000000 +0100 +++ head-2011-03-17/arch/x86/include/asm/thread_info.h 2011-02-01 15:03:10.000000000 +0100 @@ -153,8 +153,7 @@ struct thread_info { (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP) #else -#define _TIF_WORK_CTXSW (_TIF_NOTSC \ - /*todo | _TIF_DEBUGCTLMSR | _TIF_DS_AREA_MSR */) +#define _TIF_WORK_CTXSW (_TIF_NOTSC /*todo | _TIF_BLOCKSTEP */) #endif #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) --- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/hypervisor.h 2011-03-11 11:14:34.000000000 +0100 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/hypervisor.h 2011-02-01 15:03:10.000000000 +0100 @@ -70,7 +70,7 @@ extern start_info_t *xen_start_info; #define is_initial_xendomain() 0 #endif -#define init_hypervisor(c) ((void)((c)->x86_hyper_vendor = X86_HYPER_VENDOR_XEN)) +#define init_hypervisor(c) ((void)(c)) #define init_hypervisor_platform() init_hypervisor(&boot_cpu_data) DECLARE_PER_CPU(struct vcpu_runstate_info, runstate); --- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/perf_event.h 2011-02-01 14:55:46.000000000 +0100 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/perf_event.h 2011-02-01 15:03:10.000000000 +0100 @@ -10,6 +10,15 @@ */ #define PERF_EFLAGS_EXACT (1UL << 3) +#define perf_instruction_pointer(regs) instruction_pointer(regs) + +#define perf_misc_flags(regs) ({ \ + struct pt_regs *_r_ = (regs); \ + unsigned long _f_ = user_mode(_r_) ? PERF_RECORD_MISC_USER \ + : PERF_RECORD_MISC_KERNEL; \ + _r_->flags & PERF_EFLAGS_EXACT ? _f_ | PERF_RECORD_MISC_EXACT_IP : _f_; \ +}) + #endif static inline void init_hw_perf_events(void) {} --- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/processor.h 2011-03-03 16:46:54.000000000 +0100 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/processor.h 2011-03-03 16:47:17.000000000 +0100 @@ -21,7 +21,6 @@ struct mm_struct; #include #include #include -#include #include #include @@ -29,6 +28,8 @@ struct mm_struct; #include #include #include +#include + #include #define HBP_NUM 4 @@ -124,7 +125,6 @@ struct cpuinfo_x86 { /* Index into per_cpu list: */ u16 cpu_index; #endif - unsigned int x86_hyper_vendor; } __attribute__((__aligned__(SMP_CACHE_BYTES))); #define X86_VENDOR_INTEL 0 @@ -138,10 +138,6 @@ struct cpuinfo_x86 { #define X86_VENDOR_UNKNOWN 0xff -#define X86_HYPER_VENDOR_NONE 0 -#define X86_HYPER_VENDOR_VMWARE 1 -#define X86_HYPER_VENDOR_XEN 'X' - /* * capabilities of CPUs */ @@ -396,6 +392,10 @@ union thread_xstate { struct xsave_struct xsave; }; +struct fpu { + union thread_xstate *state; +}; + #ifdef CONFIG_X86_64 #ifndef CONFIG_X86_NO_TSS DECLARE_PER_CPU(struct orig_ist, orig_ist); @@ -474,7 +474,7 @@ struct thread_struct { unsigned long trap_no; unsigned long error_code; /* floating point and extended processor state */ - union thread_xstate *xstate; + struct fpu fpu; #ifdef CONFIG_X86_32 /* Virtual 86 mode info */ struct vm86_struct __user *vm86_info; @@ -487,10 +487,6 @@ struct thread_struct { unsigned long iopl; /* Max allowed port in the bitmap, in bytes: */ unsigned io_bitmap_max; -/* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */ - unsigned long debugctlmsr; - /* Debug Store context; see asm/ds.h */ - struct ds_context *ds_ctx; }; static inline unsigned long xen_get_debugreg(int regno) @@ -749,6 +745,8 @@ static inline void wbinvd_halt(void) extern void enable_sep_cpu(void); extern int sysenter_setup(void); +extern void early_trap_init(void); + /* Defined in head.S */ extern struct desc_ptr early_gdt_descr; @@ -759,7 +757,7 @@ extern void cpu_init(void); static inline unsigned long get_debugctlmsr(void) { - unsigned long debugctlmsr = 0; + unsigned long debugctlmsr = 0; #ifndef CONFIG_X86_DEBUGCTLMSR if (boot_cpu_data.x86 < 6) @@ -767,21 +765,6 @@ static inline unsigned long get_debugctl #endif rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); - return debugctlmsr; -} - -static inline unsigned long get_debugctlmsr_on_cpu(int cpu) -{ - u64 debugctlmsr = 0; - u32 val1, val2; - -#ifndef CONFIG_X86_DEBUGCTLMSR - if (boot_cpu_data.x86 < 6) - return 0; -#endif - rdmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR, &val1, &val2); - debugctlmsr = val1 | ((u64)val2 << 32); - return debugctlmsr; } @@ -794,18 +777,6 @@ static inline void update_debugctlmsr(un wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); } -static inline void update_debugctlmsr_on_cpu(int cpu, - unsigned long debugctlmsr) -{ -#ifndef CONFIG_X86_DEBUGCTLMSR - if (boot_cpu_data.x86 < 6) - return; -#endif - wrmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR, - (u32)((u64)debugctlmsr), - (u32)((u64)debugctlmsr >> 32)); -} - /* * from system description table in BIOS. Mostly for MCA use, but * others may find it useful: --- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/system.h 2011-03-03 16:10:13.000000000 +0100 +++ head-2011-03-17/arch/x86/include/mach-xen/asm/system.h 2011-03-03 16:10:31.000000000 +0100 @@ -435,7 +435,7 @@ void stop_this_cpu(void *dummy); * * (Could use an alternative three way for this if there was one.) */ -static inline void rdtsc_barrier(void) +static __always_inline void rdtsc_barrier(void) { alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC); alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); --- head-2011-03-17.orig/arch/x86/kernel/acpi/processor_extcntl_xen.c 2011-02-01 15:03:03.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/acpi/processor_extcntl_xen.c 2011-02-01 15:03:10.000000000 +0100 @@ -63,7 +63,7 @@ static int xen_cx_notifier(struct acpi_p data->reg.space_id = cx->reg.space_id; data->reg.bit_width = cx->reg.bit_width; data->reg.bit_offset = cx->reg.bit_offset; - data->reg.access_size = cx->reg.reserved; + data->reg.access_size = cx->reg.access_size; data->reg.address = cx->reg.address; /* Get dependency relationships */ --- head-2011-03-17.orig/arch/x86/kernel/acpi/sleep-xen.c 2011-02-01 14:55:46.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/acpi/sleep-xen.c 2011-02-01 15:03:10.000000000 +0100 @@ -164,13 +164,16 @@ static int __init acpi_sleep_setup(char #ifdef CONFIG_HIBERNATION if (strncmp(str, "s4_nohwsig", 10) == 0) acpi_no_s4_hw_signature(); - if (strncmp(str, "s4_nonvs", 8) == 0) - acpi_s4_no_nvs(); + if (strncmp(str, "s4_nonvs", 8) == 0) { + pr_warning("ACPI: acpi_sleep=s4_nonvs is deprecated, " + "please use acpi_sleep=nonvs instead"); + acpi_nvs_nosave(); + } #endif + if (strncmp(str, "nonvs", 5) == 0) + acpi_nvs_nosave(); if (strncmp(str, "old_ordering", 12) == 0) acpi_old_suspend_ordering(); - if (strncmp(str, "sci_force_enable", 16) == 0) - acpi_set_sci_en_on_resume(); str = strchr(str, ','); if (str != NULL) str += strspn(str, ", \t"); --- head-2011-03-17.orig/arch/x86/kernel/apic/io_apic-xen.c 2011-02-01 15:03:03.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/apic/io_apic-xen.c 2011-02-01 15:03:10.000000000 +0100 @@ -99,6 +99,9 @@ int nr_ioapics; /* IO APIC gsi routing info */ struct mp_ioapic_gsi mp_gsi_routing[MAX_IO_APICS]; +/* The one past the highest gsi number used */ +u32 gsi_top; + /* MP IRQ source entries */ struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; @@ -1063,10 +1066,9 @@ static inline int irq_trigger(int idx) return MPBIOS_trigger(idx); } -int (*ioapic_renumber_irq)(int ioapic, int irq); static int pin_2_irq(int idx, int apic, int pin) { - int irq, i; + int irq; int bus = mp_irqs[idx].srcbus; /* @@ -1078,18 +1080,12 @@ static int pin_2_irq(int idx, int apic, if (test_bit(bus, mp_bus_not_pci)) { irq = mp_irqs[idx].srcbusirq; } else { - /* - * PCI IRQs are mapped in order - */ - i = irq = 0; - while (i < apic) - irq += nr_ioapic_registers[i++]; - irq += pin; - /* - * For MPS mode, so far only needed by ES7000 platform - */ - if (ioapic_renumber_irq) - irq = ioapic_renumber_irq(apic, irq); + u32 gsi = mp_gsi_routing[apic].gsi_base + pin; + + if (gsi >= NR_IRQS_LEGACY) + irq = gsi; + else + irq = gsi_top + gsi; } #ifdef CONFIG_X86_32 @@ -2027,31 +2023,15 @@ fs_initcall(print_ICs); /* Where if anywhere is the i8259 connect in external int mode */ static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; -#endif /* !CONFIG_XEN */ void __init enable_IO_APIC(void) { - union IO_APIC_reg_01 reg_01; -#ifndef CONFIG_XEN int i8259_apic, i8259_pin; -#endif int apic; - unsigned long flags; - - /* - * The number of IO-APIC IRQ registers (== #pins): - */ - for (apic = 0; apic < nr_ioapics; apic++) { - raw_spin_lock_irqsave(&ioapic_lock, flags); - reg_01.raw = io_apic_read(apic, 1); - raw_spin_unlock_irqrestore(&ioapic_lock, flags); - nr_ioapic_registers[apic] = reg_01.bits.entries+1; - } if (!legacy_pic->nr_legacy_irqs) return; -#ifndef CONFIG_XEN for(apic = 0; apic < nr_ioapics; apic++) { int pin; /* See if any of the pins is in ExtINT mode */ @@ -2094,10 +2074,8 @@ void __init enable_IO_APIC(void) * Do not trust the IO-APIC being empty at bootup */ clear_IO_APIC(); -#endif } -#ifndef CONFIG_XEN /* * Not an __init, needed by the reboot code */ @@ -3229,13 +3207,9 @@ out: void __init setup_IO_APIC(void) { -#ifdef CONFIG_XEN - enable_IO_APIC(); -#else /* * calling enable_IO_APIC() is moved to setup_local_APIC for BP */ -#endif io_apic_irqs = legacy_pic->nr_legacy_irqs ? ~PIC_IRQS : ~0UL; apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); @@ -3969,28 +3943,21 @@ int __init io_apic_get_redir_entries (in reg_01.raw = io_apic_read(ioapic, 1); raw_spin_unlock_irqrestore(&ioapic_lock, flags); - return reg_01.bits.entries; + /* The register returns the maximum index redir index + * supported, which is one less than the total number of redir + * entries. + */ + return reg_01.bits.entries + 1; } #ifndef CONFIG_XEN void __init probe_nr_irqs_gsi(void) { - int nr = 0; + int nr; - nr = acpi_probe_gsi(); - if (nr > nr_irqs_gsi) { + nr = gsi_top + NR_IRQS_LEGACY; + if (nr > nr_irqs_gsi) nr_irqs_gsi = nr; - } else { - /* for acpi=off or acpi is not compiled in */ - int idx; - - nr = 0; - for (idx = 0; idx < nr_ioapics; idx++) - nr += io_apic_get_redir_entries(idx) + 1; - - if (nr > nr_irqs_gsi) - nr_irqs_gsi = nr; - } printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi); } @@ -4207,22 +4174,27 @@ int __init io_apic_get_version(int ioapi return reg_01.bits.version; } -int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) +int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity) { - int i; + int ioapic, pin, idx; if (skip_ioapic_setup) return -1; - for (i = 0; i < mp_irq_entries; i++) - if (mp_irqs[i].irqtype == mp_INT && - mp_irqs[i].srcbusirq == bus_irq) - break; - if (i >= mp_irq_entries) + ioapic = mp_find_ioapic(gsi); + if (ioapic < 0) + return -1; + + pin = mp_find_ioapic_pin(ioapic, gsi); + if (pin < 0) + return -1; + + idx = find_irq_entry(ioapic, pin, mp_INT); + if (idx < 0) return -1; - *trigger = irq_trigger(i); - *polarity = irq_polarity(i); + *trigger = irq_trigger(idx); + *polarity = irq_polarity(idx); return 0; } @@ -4365,7 +4337,7 @@ void __init ioapic_insert_resources(void } #endif /* !CONFIG_XEN */ -int mp_find_ioapic(int gsi) +int mp_find_ioapic(u32 gsi) { int i = 0; @@ -4380,7 +4352,7 @@ int mp_find_ioapic(int gsi) return -1; } -int mp_find_ioapic_pin(int ioapic, int gsi) +int mp_find_ioapic_pin(int ioapic, u32 gsi) { if (WARN_ON(ioapic == -1)) return -1; @@ -4408,6 +4380,7 @@ static int bad_ioapic(unsigned long addr void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) { int idx = 0; + int entries; if (bad_ioapic(address)) return; @@ -4428,9 +4401,17 @@ void __init mp_register_ioapic(int id, u * Build basic GSI lookup table to facilitate gsi->io_apic lookups * and to prevent reprogramming of IOAPIC pins (PCI GSIs). */ + entries = io_apic_get_redir_entries(idx); mp_gsi_routing[idx].gsi_base = gsi_base; - mp_gsi_routing[idx].gsi_end = gsi_base + - io_apic_get_redir_entries(idx); + mp_gsi_routing[idx].gsi_end = gsi_base + entries - 1; + + /* + * The number of IO-APIC IRQ registers (== #pins): + */ + nr_ioapic_registers[idx] = entries; + + if (mp_gsi_routing[idx].gsi_end >= gsi_top) + gsi_top = mp_gsi_routing[idx].gsi_end + 1; printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, " "GSI %d-%d\n", idx, mp_ioapics[idx].apicid, --- head-2011-03-17.orig/arch/x86/kernel/cpu/Makefile 2011-02-01 14:55:46.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/cpu/Makefile 2011-02-01 15:03:10.000000000 +0100 @@ -34,8 +34,8 @@ obj-$(CONFIG_CPU_FREQ) += cpufreq/ obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o -disabled-obj-$(CONFIG_XEN) := hypervisor.o perfctr-watchdog.o perf_event.o \ - sched.o vmware.o +disabled-obj-$(CONFIG_XEN) := hypervisor.o mshyperv.o perfctr-watchdog.o \ + perf_event.o sched.o vmware.o quiet_cmd_mkcapflags = MKCAP $@ cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@ --- head-2011-03-17.orig/arch/x86/kernel/cpu/common-xen.c 2011-03-17 14:42:34.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/cpu/common-xen.c 2011-03-17 14:42:47.000000000 +0100 @@ -1150,6 +1150,20 @@ static void clear_all_debug_regs(void) } } +#ifdef CONFIG_KGDB +/* + * Restore debug regs if using kgdbwait and you have a kernel debugger + * connection established. + */ +static void dbg_restore_debug_regs(void) +{ + if (unlikely(kgdb_connected && arch_kgdb_ops.correct_hw_break)) + arch_kgdb_ops.correct_hw_break(); +} +#else /* ! CONFIG_KGDB */ +#define dbg_restore_debug_regs() +#endif /* ! CONFIG_KGDB */ + /* * cpu_init() initializes state that is per-CPU. Some data is already * initialized (naturally) in the bootstrap process, such as the GDT @@ -1180,9 +1194,9 @@ void __cpuinit cpu_init(void) #endif #ifdef CONFIG_NUMA - if (cpu != 0 && percpu_read(node_number) == 0 && - cpu_to_node(cpu) != NUMA_NO_NODE) - percpu_write(node_number, cpu_to_node(cpu)); + if (cpu != 0 && percpu_read(numa_node) == 0 && + early_cpu_to_node(cpu) != NUMA_NO_NODE) + set_numa_node(early_cpu_to_node(cpu)); #endif me = current; @@ -1255,18 +1269,8 @@ void __cpuinit cpu_init(void) #endif load_LDT(&init_mm.context); -#ifdef CONFIG_KGDB - /* - * If the kgdb is connected no debug regs should be altered. This - * is only applicable when KGDB and a KGDB I/O module are built - * into the kernel and you are using early debugging with - * kgdbwait. KGDB will control the kernel HW breakpoint registers. - */ - if (kgdb_connected && arch_kgdb_ops.correct_hw_break) - arch_kgdb_ops.correct_hw_break(); - else -#endif - clear_all_debug_regs(); + clear_all_debug_regs(); + dbg_restore_debug_regs(); fpu_init(); @@ -1330,14 +1334,12 @@ void __cpuinit cpu_init(void) #endif clear_all_debug_regs(); + dbg_restore_debug_regs(); /* * Force FPU initialization: */ - if (cpu_has_xsave) - current_thread_info()->status = TS_XSAVE; - else - current_thread_info()->status = 0; + current_thread_info()->status = 0; clear_used_math(); mxcsr_feature_mask_init(); --- head-2011-03-17.orig/arch/x86/kernel/e820-xen.c 2011-02-01 15:03:03.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/e820-xen.c 2011-02-01 15:03:10.000000000 +0100 @@ -776,7 +776,7 @@ static int __init e820_mark_nvs_memory(v struct e820entry *ei = &e820.map[i]; if (ei->type == E820_NVS) - hibernate_nvs_register(ei->addr, ei->size); + suspend_nvs_register(ei->addr, ei->size); } return 0; --- head-2011-03-17.orig/arch/x86/kernel/early_printk-xen.c 2011-02-01 14:54:13.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/early_printk-xen.c 2011-02-01 15:03:10.000000000 +0100 @@ -41,6 +41,14 @@ static void early_vga_write(struct conso writew(0x720, VGABASE + 2*(max_xpos*j + i)); current_ypos = max_ypos-1; } +#ifdef CONFIG_KGDB_KDB + if (c == '\b') { + if (current_xpos > 0) + current_xpos--; + } else if (c == '\r') { + current_xpos = 0; + } else +#endif if (c == '\n') { current_xpos = 0; current_ypos++; --- head-2011-03-17.orig/arch/x86/kernel/entry_32-xen.S 2011-02-01 14:55:46.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/entry_32-xen.S 2011-02-01 15:03:10.000000000 +0100 @@ -53,6 +53,7 @@ #include #include #include +#include #include /* Avoid __ASSEMBLER__'ifying just for this. */ @@ -1123,7 +1124,25 @@ ENTRY(simd_coprocessor_error) RING0_INT_FRAME pushl $0 CFI_ADJUST_CFA_OFFSET 4 +#ifdef CONFIG_X86_INVD_BUG + /* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */ +661: pushl $do_general_protection +662: +.section .altinstructions,"a" + .balign 4 + .long 661b + .long 663f + .byte X86_FEATURE_XMM + .byte 662b-661b + .byte 664f-663f +.previous +.section .altinstr_replacement,"ax" +663: pushl $do_simd_coprocessor_error +664: +.previous +#else pushl $do_simd_coprocessor_error +#endif CFI_ADJUST_CFA_OFFSET 4 jmp error_code CFI_ENDPROC --- head-2011-03-17.orig/arch/x86/kernel/entry_64-xen.S 2011-02-01 14:55:46.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/entry_64-xen.S 2011-02-01 15:03:10.000000000 +0100 @@ -582,8 +582,8 @@ auditsys: * masked off. */ sysret_audit: - movq %rax,%rsi /* second arg, syscall return value */ - cmpq $0,%rax /* is it < 0? */ + movq RAX-ARGOFFSET(%rsp),%rsi /* second arg, syscall return value */ + cmpq $0,%rsi /* is it < 0? */ setl %al /* 1 if so, 0 if not */ movzbl %al,%edi /* zero-extend that into %edi */ inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ --- head-2011-03-17.orig/arch/x86/kernel/microcode_core-xen.c 2011-02-01 14:55:46.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/microcode_core-xen.c 2011-02-01 15:03:10.000000000 +0100 @@ -88,9 +88,9 @@ static int do_microcode_update(const voi return err; } -static int microcode_open(struct inode *unused1, struct file *unused2) +static int microcode_open(struct inode *inode, struct file *file) { - return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; + return capable(CAP_SYS_RAWIO) ? nonseekable_open(inode, file) : -EPERM; } static ssize_t microcode_write(struct file *file, const char __user *buf, @@ -145,6 +145,7 @@ static void microcode_dev_exit(void) } MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); +MODULE_ALIAS("devname:cpu/microcode"); #else #define microcode_dev_init() 0 #define microcode_dev_exit() do { } while (0) --- head-2011-03-17.orig/arch/x86/kernel/mpparse-xen.c 2011-02-01 15:03:03.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/mpparse-xen.c 2011-02-01 15:03:10.000000000 +0100 @@ -127,21 +127,6 @@ static void __init MP_bus_info(struct mp printk(KERN_WARNING "Unknown bustype %s - ignoring\n", str); } -static int bad_ioapic(unsigned long address) -{ - if (nr_ioapics >= MAX_IO_APICS) { - printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " - "(found %d)\n", MAX_IO_APICS, nr_ioapics); - panic("Recompile kernel with bigger MAX_IO_APICS!\n"); - } - if (!address) { - printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address" - " found in table, skipping!\n"); - return 1; - } - return 0; -} - static void __init MP_ioapic_info(struct mpc_ioapic *m) { if (!(m->flags & MPC_APIC_USABLE)) @@ -150,15 +135,7 @@ static void __init MP_ioapic_info(struct printk(KERN_INFO "I/O APIC #%d Version %d at 0x%X.\n", m->apicid, m->apicver, m->apicaddr); - if (bad_ioapic(m->apicaddr)) - return; - - mp_ioapics[nr_ioapics].apicaddr = m->apicaddr; - mp_ioapics[nr_ioapics].apicid = m->apicid; - mp_ioapics[nr_ioapics].type = m->type; - mp_ioapics[nr_ioapics].apicver = m->apicver; - mp_ioapics[nr_ioapics].flags = m->flags; - nr_ioapics++; + mp_register_ioapic(m->apicid, m->apicaddr, gsi_top); } static void print_MP_intsrc_info(struct mpc_intsrc *m) --- head-2011-03-17.orig/arch/x86/kernel/pci-dma-xen.c 2011-02-01 15:03:03.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/pci-dma-xen.c 2011-02-01 15:03:10.000000000 +0100 @@ -135,8 +135,6 @@ static struct dma_map_ops swiotlb_dma_op .unmap_page = swiotlb_unmap_page, .sync_single_for_cpu = swiotlb_sync_single_for_cpu, .sync_single_for_device = swiotlb_sync_single_for_device, - .sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu, - .sync_single_range_for_device = swiotlb_sync_single_range_for_device, .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, .sync_sg_for_device = swiotlb_sync_sg_for_device, .map_sg = swiotlb_map_sg_attrs, --- head-2011-03-17.orig/arch/x86/kernel/process-xen.c 2011-03-03 16:10:10.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/process-xen.c 2011-03-03 16:10:40.000000000 +0100 @@ -20,7 +20,6 @@ #include #include #include -#include #include #include @@ -33,26 +32,22 @@ struct kmem_cache *task_xstate_cachep; int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { + int ret; + *dst = *src; - if (src->thread.xstate) { - dst->thread.xstate = kmem_cache_alloc(task_xstate_cachep, - GFP_KERNEL); - if (!dst->thread.xstate) - return -ENOMEM; - WARN_ON((unsigned long)dst->thread.xstate & 15); - memcpy(dst->thread.xstate, src->thread.xstate, xstate_size); + if (fpu_allocated(&src->thread.fpu)) { + memset(&dst->thread.fpu, 0, sizeof(dst->thread.fpu)); + ret = fpu_alloc(&dst->thread.fpu); + if (ret) + return ret; + fpu_copy(&dst->thread.fpu, &src->thread.fpu); } return 0; } void free_thread_xstate(struct task_struct *tsk) { - if (tsk->thread.xstate) { - kmem_cache_free(task_xstate_cachep, tsk->thread.xstate); - tsk->thread.xstate = NULL; - } - - WARN(tsk->thread.ds_ctx, "leaking DS context\n"); + fpu_free(&tsk->thread.fpu); } void free_thread_info(struct thread_info *ti) @@ -199,11 +194,16 @@ void __switch_to_xtra(struct task_struct prev = &prev_p->thread; next = &next_p->thread; - if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) || - test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR)) - ds_switch_to(prev_p, next_p); - else if (next->debugctlmsr != prev->debugctlmsr) - update_debugctlmsr(next->debugctlmsr); + if (test_tsk_thread_flag(prev_p, TIF_BLOCKSTEP) ^ + test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) { + unsigned long debugctl = get_debugctlmsr(); + + debugctl &= ~DEBUGCTLMSR_BTF; + if (test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) + debugctl |= DEBUGCTLMSR_BTF; + + update_debugctlmsr(debugctl); + } if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ test_tsk_thread_flag(next_p, TIF_NOTSC)) { --- head-2011-03-17.orig/arch/x86/kernel/process_32-xen.c 2011-02-02 08:47:25.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/process_32-xen.c 2011-02-02 08:47:43.000000000 +0100 @@ -57,7 +57,6 @@ #include #include #include -#include #include asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); @@ -245,13 +244,6 @@ int copy_thread(unsigned long clone_flag kfree(p->thread.io_bitmap_ptr); p->thread.io_bitmap_max = 0; } - - clear_tsk_thread_flag(p, TIF_DS_AREA_MSR); - p->thread.ds_ctx = NULL; - - clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR); - p->thread.debugctlmsr = 0; - return err; } @@ -420,7 +412,7 @@ __switch_to(struct task_struct *prev_p, /* we're going to use this soon, after a few expensive things */ if (preload_fpu) - prefetch(next->xstate); + prefetch(next->fpu.state); /* * Now maybe handle debug registers --- head-2011-03-17.orig/arch/x86/kernel/process_64-xen.c 2011-02-02 08:47:22.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/process_64-xen.c 2011-02-02 08:47:47.000000000 +0100 @@ -54,7 +54,6 @@ #include #include #include -#include #include asmlinkage extern void ret_from_fork(void); @@ -324,12 +323,6 @@ int copy_thread(unsigned long clone_flag } p->thread.iopl = current->thread.iopl; - clear_tsk_thread_flag(p, TIF_DS_AREA_MSR); - p->thread.ds_ctx = NULL; - - clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR); - p->thread.debugctlmsr = 0; - err = 0; out: if (err && p->thread.io_bitmap_ptr) { @@ -415,7 +408,7 @@ __switch_to(struct task_struct *prev_p, /* we're going to use this soon, after a few expensive things */ if (preload_fpu) - prefetch(next->xstate); + prefetch(next->fpu.state); /* * This is basically '__unlazy_fpu', except that we queue a --- head-2011-03-17.orig/arch/x86/kernel/setup-xen.c 2011-03-03 16:24:33.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/setup-xen.c 2011-03-03 16:24:49.000000000 +0100 @@ -746,6 +746,17 @@ static struct dmi_system_id __initdata b DMI_MATCH(DMI_BOARD_NAME, "DG45FC"), }, }, + /* + * The Dell Inspiron Mini 1012 has DMI_BIOS_VENDOR = "Dell Inc.", so + * match on the product name. + */ + { + .callback = dmi_low_memory_corruption, + .ident = "Phoenix BIOS", + .matches = { + DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 1012"), + }, + }, #endif {} }; @@ -815,6 +826,7 @@ void __init setup_arch(char **cmdline_p) /* VMI may relocate the fixmap; do this before touching ioremap area */ vmi_init(); + early_trap_init(); early_cpu_init(); early_ioremap_init(); --- head-2011-03-17.orig/arch/x86/kernel/time-xen.c 2011-02-01 15:03:03.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/time-xen.c 2011-02-01 15:03:10.000000000 +0100 @@ -30,7 +30,7 @@ #include #include -DEFINE_SPINLOCK(i8253_lock); +DEFINE_RAW_SPINLOCK(i8253_lock); EXPORT_SYMBOL(i8253_lock); #ifdef CONFIG_X86_64 @@ -69,12 +69,6 @@ DEFINE_PER_CPU(struct vcpu_runstate_info /* Must be signed, as it's compared with s64 quantities which can be -ve. */ #define NS_PER_TICK (1000000000LL/HZ) -static void __clock_was_set(struct work_struct *unused) -{ - clock_was_set(); -} -static DECLARE_WORK(clock_was_set_work, __clock_was_set); - /* * GCC 4.3 can turn loops over an induction variable into division. We do * not support arbitrary 64-bit division, and so must break the induction. @@ -187,33 +181,13 @@ static u64 get_nsec_offset(struct shadow return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift); } -static void __update_wallclock(time_t sec, long nsec) -{ - long wtm_nsec, xtime_nsec; - time_t wtm_sec, xtime_sec; - u64 tmp, wc_nsec; - - /* Adjust wall-clock time base. */ - wc_nsec = processed_system_time; - wc_nsec += sec * (u64)NSEC_PER_SEC; - wc_nsec += nsec; - - /* Split wallclock base into seconds and nanoseconds. */ - tmp = wc_nsec; - xtime_nsec = do_div(tmp, 1000000000); - xtime_sec = (time_t)tmp; - - wtm_sec = wall_to_monotonic.tv_sec + (xtime.tv_sec - xtime_sec); - wtm_nsec = wall_to_monotonic.tv_nsec + (xtime.tv_nsec - xtime_nsec); - - set_normalized_timespec(&xtime, xtime_sec, xtime_nsec); - set_normalized_timespec(&wall_to_monotonic, wtm_sec, wtm_nsec); -} - static void update_wallclock(void) { + static DEFINE_MUTEX(uwc_mutex); shared_info_t *s = HYPERVISOR_shared_info; + mutex_lock(&uwc_mutex); + do { shadow_tv_version = s->wc_version; rmb(); @@ -222,9 +196,24 @@ static void update_wallclock(void) rmb(); } while ((s->wc_version & 1) | (shadow_tv_version ^ s->wc_version)); - if (!independent_wallclock) - __update_wallclock(shadow_tv.tv_sec, shadow_tv.tv_nsec); + if (!independent_wallclock) { + u64 tmp = processed_system_time; + long nsec = do_div(tmp, NSEC_PER_SEC); + struct timespec tv; + + set_normalized_timespec(&tv, shadow_tv.tv_sec + tmp, + shadow_tv.tv_nsec + nsec); + do_settimeofday(&tv); + } + + mutex_unlock(&uwc_mutex); +} + +static void _update_wallclock(struct work_struct *unused) +{ + update_wallclock(); } +static DECLARE_WORK(update_wallclock_work, _update_wallclock); /* * Reads a consistent set of time-base values from Xen, into a shadow data @@ -275,15 +264,19 @@ static DEFINE_TIMER(sync_xen_wallclock_t static void sync_xen_wallclock(unsigned long dummy) { struct timespec now; + unsigned long seq; struct xen_platform_op op; BUG_ON(!is_initial_xendomain()); if (!ntp_synced() || independent_wallclock) return; - write_seqlock_irq(&xtime_lock); + do { + seq = read_seqbegin(&xtime_lock); + now = __current_kernel_time(); + } while (read_seqretry(&xtime_lock, seq)); - set_normalized_timespec(&now, xtime.tv_sec, xtime.tv_nsec); + set_normalized_timespec(&now, now.tv_sec, now.tv_nsec); op.cmd = XENPF_settime; op.u.settime.secs = now.tv_sec; @@ -293,8 +286,6 @@ static void sync_xen_wallclock(unsigned update_wallclock(); - write_sequnlock_irq(&xtime_lock); - /* Once per minute. */ mod_timer(&sync_xen_wallclock_timer, jiffies + 60*HZ); } @@ -409,7 +400,6 @@ static irqreturn_t timer_interrupt(int i { s64 delta, delta_cpu, stolen, blocked; unsigned int i, cpu = smp_processor_id(); - int schedule_clock_was_set_work = 0; struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu); struct vcpu_runstate_info runstate; @@ -464,15 +454,11 @@ static irqreturn_t timer_interrupt(int i do_timer(delta); } - if (shadow_tv_version != HYPERVISOR_shared_info->wc_version) { - update_wallclock(); - schedule_clock_was_set_work = 1; - } - write_sequnlock(&xtime_lock); - if (schedule_clock_was_set_work && keventd_up()) - schedule_work(&clock_was_set_work); + if (shadow_tv_version != HYPERVISOR_shared_info->wc_version + && keventd_up()) + schedule_work(&update_wallclock_work); /* * Account stolen ticks. @@ -679,6 +665,12 @@ static struct vcpu_set_periodic_timer xe .period_ns = NS_PER_TICK }; +static void __init _late_time_init(void) +{ + update_wallclock(); + setup_cpu0_timer_irq(); +} + void __init time_init(void) { init_cpu_khz(); @@ -704,12 +696,13 @@ void __init time_init(void) clocksource_register(&clocksource_xen); - update_wallclock(); - use_tsc_delay(); - /* Cannot request_irq() until kmem is initialised. */ - late_time_init = setup_cpu0_timer_irq; + /* + * Cannot request_irq() until kmem is initialised, and cannot + * do_settimeofday() (i.e. clock_was_set()) until interrupts are on. + */ + late_time_init = _late_time_init; } /* Convert jiffies to system time. */ --- head-2011-03-17.orig/arch/x86/kernel/traps-xen.c 2011-02-01 15:03:03.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/traps-xen.c 2011-02-01 15:03:10.000000000 +0100 @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -112,15 +113,6 @@ static inline void preempt_conditional_c dec_preempt_count(); } -#ifdef CONFIG_X86_32 -static inline void -die_if_kernel(const char *str, struct pt_regs *regs, long err) -{ - if (!user_mode_vm(regs)) - die(str, regs, err); -} -#endif - static void __kprobes do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, long error_code, siginfo_t *info) @@ -453,6 +445,11 @@ void restart_nmi(void) /* May run on IST stack. */ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) { +#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP + if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) + == NOTIFY_STOP) + return; +#endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */ #ifdef CONFIG_KPROBES if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) @@ -522,6 +519,7 @@ asmlinkage __kprobes struct pt_regs *syn dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) { struct task_struct *tsk = current; + int user_icebp = 0; unsigned long dr6; int si_code; @@ -530,17 +528,25 @@ dotraplinkage void __kprobes do_debug(st /* Filter out all the reserved bits which are preset to 1 */ dr6 &= ~DR6_RESERVED; + /* + * If dr6 has no reason to give us about the origin of this trap, + * then it's very likely the result of an icebp/int01 trap. + * User wants a sigtrap for that. + */ + if (!dr6 && user_mode(regs)) + user_icebp = 1; + /* Catch kmemcheck conditions first of all! */ if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) return; /* DR6 may or may not be cleared by the CPU */ set_debugreg(0, 6); + /* * The processor cleared BTF, so don't mark that we need it set. */ - clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); - tsk->thread.debugctlmsr = 0; + clear_tsk_thread_flag(tsk, TIF_BLOCKSTEP); /* Store the virtualized DR6 value */ tsk->thread.debugreg6 = dr6; @@ -571,62 +577,74 @@ dotraplinkage void __kprobes do_debug(st regs->flags &= ~X86_EFLAGS_TF; } si_code = get_si_code(tsk->thread.debugreg6); - if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS)) + if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp) send_sigtrap(tsk, regs, error_code, si_code); preempt_conditional_cli(regs); return; } -#ifdef CONFIG_X86_64 -static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr) -{ - if (fixup_exception(regs)) - return 1; - - notify_die(DIE_GPF, str, regs, 0, trapnr, SIGFPE); - /* Illegal floating point operation in the kernel */ - current->thread.trap_no = trapnr; - die(str, regs, 0); - return 0; -} -#endif - /* * Note that we play around with the 'TS' bit in an attempt to get * the correct behaviour even in the presence of the asynchronous * IRQ13 behaviour */ -void math_error(void __user *ip) +void math_error(struct pt_regs *regs, int error_code, int trapnr) { - struct task_struct *task; + struct task_struct *task = current; siginfo_t info; - unsigned short cwd, swd, err; + unsigned short err; + char *str = (trapnr == 16) ? "fpu exception" : "simd exception"; + + if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, SIGFPE) == NOTIFY_STOP) + return; + conditional_sti(regs); + + if (!user_mode_vm(regs)) + { + if (!fixup_exception(regs)) { + task->thread.error_code = error_code; + task->thread.trap_no = trapnr; + die(str, regs, error_code); + } + return; + } /* * Save the info for the exception handler and clear the error. */ - task = current; save_init_fpu(task); - task->thread.trap_no = 16; - task->thread.error_code = 0; + task->thread.trap_no = trapnr; + task->thread.error_code = error_code; info.si_signo = SIGFPE; info.si_errno = 0; - info.si_addr = ip; - /* - * (~cwd & swd) will mask out exceptions that are not set to unmasked - * status. 0x3f is the exception bits in these regs, 0x200 is the - * C1 reg you need in case of a stack fault, 0x040 is the stack - * fault bit. We should only be taking one exception at a time, - * so if this combination doesn't produce any single exception, - * then we have a bad program that isn't synchronizing its FPU usage - * and it will suffer the consequences since we won't be able to - * fully reproduce the context of the exception - */ - cwd = get_fpu_cwd(task); - swd = get_fpu_swd(task); + info.si_addr = (void __user *)regs->ip; + if (trapnr == 16) { + unsigned short cwd, swd; + /* + * (~cwd & swd) will mask out exceptions that are not set to unmasked + * status. 0x3f is the exception bits in these regs, 0x200 is the + * C1 reg you need in case of a stack fault, 0x040 is the stack + * fault bit. We should only be taking one exception at a time, + * so if this combination doesn't produce any single exception, + * then we have a bad program that isn't synchronizing its FPU usage + * and it will suffer the consequences since we won't be able to + * fully reproduce the context of the exception + */ + cwd = get_fpu_cwd(task); + swd = get_fpu_swd(task); - err = swd & ~cwd; + err = swd & ~cwd; + } else { + /* + * The SIMD FPU exceptions are handled a little differently, as there + * is only a single status/control register. Thus, to determine which + * unmasked exception was caught we must mask the exception mask bits + * at 0x1f80, and then use these to mask the exception bits at 0x3f. + */ + unsigned short mxcsr = get_fpu_mxcsr(task); + err = ~(mxcsr >> 7) & mxcsr; + } if (err & 0x001) { /* Invalid op */ /* @@ -655,97 +673,17 @@ void math_error(void __user *ip) dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code) { - conditional_sti(regs); - #ifdef CONFIG_X86_32 ignore_fpu_irq = 1; -#else - if (!user_mode(regs) && - kernel_math_error(regs, "kernel x87 math error", 16)) - return; #endif - math_error((void __user *)regs->ip); -} - -static void simd_math_error(void __user *ip) -{ - struct task_struct *task; - siginfo_t info; - unsigned short mxcsr; - - /* - * Save the info for the exception handler and clear the error. - */ - task = current; - save_init_fpu(task); - task->thread.trap_no = 19; - task->thread.error_code = 0; - info.si_signo = SIGFPE; - info.si_errno = 0; - info.si_code = __SI_FAULT; - info.si_addr = ip; - /* - * The SIMD FPU exceptions are handled a little differently, as there - * is only a single status/control register. Thus, to determine which - * unmasked exception was caught we must mask the exception mask bits - * at 0x1f80, and then use these to mask the exception bits at 0x3f. - */ - mxcsr = get_fpu_mxcsr(task); - switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) { - case 0x000: - default: - break; - case 0x001: /* Invalid Op */ - info.si_code = FPE_FLTINV; - break; - case 0x002: /* Denormalize */ - case 0x010: /* Underflow */ - info.si_code = FPE_FLTUND; - break; - case 0x004: /* Zero Divide */ - info.si_code = FPE_FLTDIV; - break; - case 0x008: /* Overflow */ - info.si_code = FPE_FLTOVF; - break; - case 0x020: /* Precision */ - info.si_code = FPE_FLTRES; - break; - } - force_sig_info(SIGFPE, &info, task); + math_error(regs, error_code, 16); } dotraplinkage void do_simd_coprocessor_error(struct pt_regs *regs, long error_code) { - conditional_sti(regs); - -#ifdef CONFIG_X86_32 - if (cpu_has_xmm) { - /* Handle SIMD FPU exceptions on PIII+ processors. */ - ignore_fpu_irq = 1; - simd_math_error((void __user *)regs->ip); - return; - } - /* - * Handle strange cache flush from user space exception - * in all other cases. This is undocumented behaviour. - */ - if (regs->flags & X86_VM_MASK) { - handle_vm86_fault((struct kernel_vm86_regs *)regs, error_code); - return; - } - current->thread.trap_no = 19; - current->thread.error_code = error_code; - die_if_kernel("cache flush denied", regs, error_code); - force_sig(SIGSEGV, current); -#else - if (!user_mode(regs) && - kernel_math_error(regs, "kernel simd math error", 19)) - return; - simd_math_error((void __user *)regs->ip); -#endif + math_error(regs, error_code, 19); } #ifndef CONFIG_XEN @@ -877,12 +815,18 @@ dotraplinkage void do_iret_error(struct * NB. All these are "trap gates" (i.e. events_mask isn't set) except * for those that specify |4 in the second field. */ -static const trap_info_t __cpuinitconst trap_table[] = { #ifdef CONFIG_X86_32 #define X 0 #else #define X 4 #endif +static const trap_info_t __initconst early_trap_table[] = { + { 1, 0|4, __KERNEL_CS, (unsigned long)debug }, + { 3, 3|4, __KERNEL_CS, (unsigned long)int3 }, + { 14, 0|4, __KERNEL_CS, (unsigned long)page_fault }, + { 0, 0, 0, 0 } +}; +static const trap_info_t __cpuinitconst trap_table[] = { { 0, 0|X, __KERNEL_CS, (unsigned long)divide_error }, { 1, 0|4, __KERNEL_CS, (unsigned long)debug }, { 3, 3|4, __KERNEL_CS, (unsigned long)int3 }, @@ -911,6 +855,16 @@ static const trap_info_t __cpuinitconst { 0, 0, 0, 0 } }; +/* Set of traps needed for early debugging. */ +void __init early_trap_init(void) +{ + int ret; + + ret = HYPERVISOR_set_trap_table(early_trap_table); + if (ret) + printk("early set_trap_table failed (%d)\n", ret); +} + void __init trap_init(void) { int ret; --- head-2011-03-17.orig/arch/x86/kernel/x86_init-xen.c 2011-02-01 15:03:03.000000000 +0100 +++ head-2011-03-17/arch/x86/kernel/x86_init-xen.c 2011-02-01 15:03:10.000000000 +0100 @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -80,9 +81,14 @@ struct x86_init_ops x86_init __initdata }, }; +static int default_i8042_detect(void) { return 1; }; + struct x86_platform_ops x86_platform = { .calibrate_tsc = NULL, .get_wallclock = mach_get_cmos_time, .set_wallclock = mach_set_rtc_mmss, .is_untracked_pat_range = is_ISA_range, + .i8042_detect = default_i8042_detect }; + +EXPORT_SYMBOL_GPL(x86_platform); --- head-2011-03-17.orig/arch/x86/mm/pageattr-xen.c 2011-02-01 15:03:03.000000000 +0100 +++ head-2011-03-17/arch/x86/mm/pageattr-xen.c 2011-02-01 15:03:10.000000000 +0100 @@ -1063,7 +1063,8 @@ out_err: } EXPORT_SYMBOL(set_memory_uc); -int set_memory_array_uc(unsigned long *addr, int addrinarray) +int _set_memory_array(unsigned long *addr, int addrinarray, + unsigned long new_type) { int i, j; int ret; @@ -1073,13 +1074,19 @@ int set_memory_array_uc(unsigned long *a */ for (i = 0; i < addrinarray; i++) { ret = reserve_memtype(__pa(addr[i]), __pa(addr[i]) + PAGE_SIZE, - _PAGE_CACHE_UC_MINUS, NULL); + new_type, NULL); if (ret) goto out_free; } ret = change_page_attr_set(addr, addrinarray, __pgprot(_PAGE_CACHE_UC_MINUS), 1); + + if (!ret && new_type == _PAGE_CACHE_WC) + ret = change_page_attr_set_clr(addr, addrinarray, + __pgprot(_PAGE_CACHE_WC), + __pgprot(_PAGE_CACHE_MASK), + 0, CPA_ARRAY, NULL); if (ret) goto out_free; @@ -1091,8 +1098,19 @@ out_free: return ret; } + +int set_memory_array_uc(unsigned long *addr, int addrinarray) +{ + return _set_memory_array(addr, addrinarray, _PAGE_CACHE_UC_MINUS); +} EXPORT_SYMBOL(set_memory_array_uc); +int set_memory_array_wc(unsigned long *addr, int addrinarray) +{ + return _set_memory_array(addr, addrinarray, _PAGE_CACHE_WC); +} +EXPORT_SYMBOL(set_memory_array_wc); + int _set_memory_wc(unsigned long addr, int numpages) { int ret; @@ -1219,26 +1237,34 @@ int set_pages_uc(struct page *page, int } EXPORT_SYMBOL(set_pages_uc); -int set_pages_array_uc(struct page **pages, int addrinarray) +static int _set_pages_array(struct page **pages, int addrinarray, + unsigned long new_type) { unsigned long start; unsigned long end; int i; int free_idx; + int ret; for (i = 0; i < addrinarray; i++) { if (PageHighMem(pages[i])) continue; start = page_to_pfn(pages[i]) << PAGE_SHIFT; end = start + PAGE_SIZE; - if (reserve_memtype(start, end, _PAGE_CACHE_UC_MINUS, NULL)) + if (reserve_memtype(start, end, new_type, NULL)) goto err_out; } - if (cpa_set_pages_array(pages, addrinarray, - __pgprot(_PAGE_CACHE_UC_MINUS)) == 0) { - return 0; /* Success */ - } + ret = cpa_set_pages_array(pages, addrinarray, + __pgprot(_PAGE_CACHE_UC_MINUS)); + if (!ret && new_type == _PAGE_CACHE_WC) + ret = change_page_attr_set_clr(NULL, addrinarray, + __pgprot(_PAGE_CACHE_WC), + __pgprot(_PAGE_CACHE_MASK), + 0, CPA_PAGES_ARRAY, pages); + if (ret) + goto err_out; + return 0; /* Success */ err_out: free_idx = i; for (i = 0; i < free_idx; i++) { @@ -1250,8 +1276,19 @@ err_out: } return -EINVAL; } + +int set_pages_array_uc(struct page **pages, int addrinarray) +{ + return _set_pages_array(pages, addrinarray, _PAGE_CACHE_UC_MINUS); +} EXPORT_SYMBOL(set_pages_array_uc); +int set_pages_array_wc(struct page **pages, int addrinarray) +{ + return _set_pages_array(pages, addrinarray, _PAGE_CACHE_WC); +} +EXPORT_SYMBOL(set_pages_array_wc); + int set_pages_wb(struct page *page, int numpages) { unsigned long addr = (unsigned long)page_address(page); --- head-2011-03-17.orig/arch/x86/mm/pat-xen.c 2011-02-01 15:03:03.000000000 +0100 +++ head-2011-03-17/arch/x86/mm/pat-xen.c 2011-02-01 15:03:10.000000000 +0100 @@ -30,6 +30,8 @@ #include #include +#include "pat_internal.h" + #ifdef CONFIG_X86_PAT int __read_mostly pat_enabled = 1; @@ -53,19 +55,15 @@ static inline void pat_disable(const cha #endif -static int debug_enable; +int pat_debug_enable; static int __init pat_debug_setup(char *str) { - debug_enable = 1; + pat_debug_enable = 1; return 0; } __setup("debugpat", pat_debug_setup); -#define dprintk(fmt, arg...) \ - do { if (debug_enable) printk(KERN_INFO fmt, ##arg); } while (0) - - static u64 __read_mostly boot_pat_state; enum { @@ -142,86 +140,7 @@ void pat_init(void) #undef PAT -static char *cattr_name(unsigned long flags) -{ - switch (flags & _PAGE_CACHE_MASK) { - case _PAGE_CACHE_UC: return "uncached"; - case _PAGE_CACHE_UC_MINUS: return "uncached-minus"; - case _PAGE_CACHE_WB: return "write-back"; - case _PAGE_CACHE_WC: return "write-combining"; - case _PAGE_CACHE_WP: return "write-protected"; - case _PAGE_CACHE_WT: return "write-through"; - default: return "broken"; - } -} - -/* - * The global memtype list keeps track of memory type for specific - * physical memory areas. Conflicting memory types in different - * mappings can cause CPU cache corruption. To avoid this we keep track. - * - * The list is sorted based on starting address and can contain multiple - * entries for each address (this allows reference counting for overlapping - * areas). All the aliases have the same cache attributes of course. - * Zero attributes are represented as holes. - * - * The data structure is a list that is also organized as an rbtree - * sorted on the start address of memtype range. - * - * memtype_lock protects both the linear list and rbtree. - */ - -struct memtype { - u64 start; - u64 end; - unsigned long type; - struct list_head nd; - struct rb_node rb; -}; - -static struct rb_root memtype_rbroot = RB_ROOT; -static LIST_HEAD(memtype_list); -static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */ - -static struct memtype *memtype_rb_search(struct rb_root *root, u64 start) -{ - struct rb_node *node = root->rb_node; - struct memtype *last_lower = NULL; - - while (node) { - struct memtype *data = container_of(node, struct memtype, rb); - - if (data->start < start) { - last_lower = data; - node = node->rb_right; - } else if (data->start > start) { - node = node->rb_left; - } else - return data; - } - - /* Will return NULL if there is no entry with its start <= start */ - return last_lower; -} - -static void memtype_rb_insert(struct rb_root *root, struct memtype *data) -{ - struct rb_node **new = &(root->rb_node); - struct rb_node *parent = NULL; - - while (*new) { - struct memtype *this = container_of(*new, struct memtype, rb); - - parent = *new; - if (data->start <= this->start) - new = &((*new)->rb_left); - else if (data->start > this->start) - new = &((*new)->rb_right); - } - - rb_link_node(&data->rb, parent, new); - rb_insert_color(&data->rb, root); -} +static DEFINE_SPINLOCK(memtype_lock); /* protects memtype accesses */ static int pat_pagerange_is_ram(resource_size_t start, resource_size_t end); static inline u8 _mtrr_type_lookup(u64 start, u64 end) @@ -259,33 +178,6 @@ static unsigned long pat_x_mtrr_type(u64 return req_type; } -static int -chk_conflict(struct memtype *new, struct memtype *entry, unsigned long *type) -{ - if (new->type != entry->type) { - if (type) { - new->type = entry->type; - *type = entry->type; - } else - goto conflict; - } - - /* check overlaps with more than one entry in the list */ - list_for_each_entry_continue(entry, &memtype_list, nd) { - if (new->end <= entry->start) - break; - else if (new->type != entry->type) - goto conflict; - } - return 0; - - conflict: - printk(KERN_INFO "%s:%d conflicting memory types " - "%Lx-%Lx %s<->%s\n", current->comm, current->pid, new->start, - new->end, cattr_name(new->type), cattr_name(entry->type)); - return -EBUSY; -} - static int pat_pagerange_is_ram(resource_size_t start, resource_size_t end) { int ram_page = 0, not_rampage = 0; @@ -318,8 +210,6 @@ static int pat_pagerange_is_ram(resource * Here we do two pass: * - Find the memtype of all the pages in the range, look for any conflicts * - In case of no conflicts, set the new memtype for pages in the range - * - * Caller must hold memtype_lock for atomicity. */ static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type, unsigned long *new_type) @@ -390,9 +280,8 @@ static int free_ram_pages_type(u64 start int reserve_memtype(u64 start, u64 end, unsigned long req_type, unsigned long *new_type) { - struct memtype *new, *entry; + struct memtype *new; unsigned long actual_type; - struct list_head *where; int is_range_ram; int err = 0; @@ -430,16 +319,14 @@ int reserve_memtype(u64 start, u64 end, is_range_ram = pat_pagerange_is_ram(start, end); if (is_range_ram == 1) { - spin_lock(&memtype_lock); err = reserve_ram_pages_type(start, end, req_type, new_type); - spin_unlock(&memtype_lock); return err; } else if (is_range_ram < 0) { return -EINVAL; } - new = kmalloc(sizeof(struct memtype), GFP_KERNEL); + new = kzalloc(sizeof(struct memtype), GFP_KERNEL); if (!new) return -ENOMEM; @@ -449,42 +336,7 @@ int reserve_memtype(u64 start, u64 end, spin_lock(&memtype_lock); - /* Search for existing mapping that overlaps the current range */ - where = NULL; - list_for_each_entry(entry, &memtype_list, nd) { - if (end <= entry->start) { - where = entry->nd.prev; - break; - } else if (start <= entry->start) { /* end > entry->start */ - err = chk_conflict(new, entry, new_type); - if (!err) { - dprintk("Overlap at 0x%Lx-0x%Lx\n", - entry->start, entry->end); - where = entry->nd.prev; - } - break; - } else if (start < entry->end) { /* start > entry->start */ - err = chk_conflict(new, entry, new_type); - if (!err) { - dprintk("Overlap at 0x%Lx-0x%Lx\n", - entry->start, entry->end); - - /* - * Move to right position in the linked - * list to add this new entry - */ - list_for_each_entry_continue(entry, - &memtype_list, nd) { - if (start <= entry->start) { - where = entry->nd.prev; - break; - } - } - } - break; - } - } - + err = rbt_memtype_check_insert(new, new_type); if (err) { printk(KERN_INFO "reserve_memtype failed 0x%Lx-0x%Lx, " "track %s, req %s\n", @@ -495,13 +347,6 @@ int reserve_memtype(u64 start, u64 end, return err; } - if (where) - list_add(&new->nd, where); - else - list_add_tail(&new->nd, &memtype_list); - - memtype_rb_insert(&memtype_rbroot, new); - spin_unlock(&memtype_lock); dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n", @@ -513,9 +358,9 @@ int reserve_memtype(u64 start, u64 end, int free_memtype(u64 start, u64 end) { - struct memtype *entry, *saved_entry; int err = -EINVAL; int is_range_ram; + struct memtype *entry; if (!pat_enabled) return 0; @@ -527,9 +372,7 @@ int free_memtype(u64 start, u64 end) is_range_ram = pat_pagerange_is_ram(start, end); if (is_range_ram == 1) { - spin_lock(&memtype_lock); err = free_ram_pages_type(start, end); - spin_unlock(&memtype_lock); return err; } else if (is_range_ram < 0) { @@ -537,56 +380,20 @@ int free_memtype(u64 start, u64 end) } spin_lock(&memtype_lock); - - entry = memtype_rb_search(&memtype_rbroot, start); - if (unlikely(entry == NULL)) - goto unlock_ret; - - /* - * Saved entry points to an entry with start same or less than what - * we searched for. Now go through the list in both directions to look - * for the entry that matches with both start and end, with list stored - * in sorted start address - */ - saved_entry = entry; - list_for_each_entry_from(entry, &memtype_list, nd) { - if (entry->start == start && entry->end == end) { - rb_erase(&entry->rb, &memtype_rbroot); - list_del(&entry->nd); - kfree(entry); - err = 0; - break; - } else if (entry->start > start) { - break; - } - } - - if (!err) - goto unlock_ret; - - entry = saved_entry; - list_for_each_entry_reverse(entry, &memtype_list, nd) { - if (entry->start == start && entry->end == end) { - rb_erase(&entry->rb, &memtype_rbroot); - list_del(&entry->nd); - kfree(entry); - err = 0; - break; - } else if (entry->start < start) { - break; - } - } -unlock_ret: + entry = rbt_memtype_erase(start, end); spin_unlock(&memtype_lock); - if (err) { + if (!entry) { printk(KERN_INFO "%s:%d freeing invalid memtype %Lx-%Lx\n", current->comm, current->pid, start, end); + return -EINVAL; } + kfree(entry); + dprintk("free_memtype request 0x%Lx-0x%Lx\n", start, end); - return err; + return 0; } @@ -610,10 +417,8 @@ static unsigned long lookup_memtype(u64 if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) { struct page *page; - spin_lock(&memtype_lock); page = pfn_to_page(paddr >> PAGE_SHIFT); rettype = get_page_memtype(page); - spin_unlock(&memtype_lock); /* * -1 from get_page_memtype() implies RAM page is in its * default state and not reserved, and hence of type WB @@ -626,7 +431,7 @@ static unsigned long lookup_memtype(u64 spin_lock(&memtype_lock); - entry = memtype_rb_search(&memtype_rbroot, paddr); + entry = rbt_memtype_lookup(paddr); if (entry != NULL) rettype = entry->type; else @@ -950,29 +755,25 @@ EXPORT_SYMBOL_GPL(pgprot_writecombine); #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT) -/* get Nth element of the linked list */ static struct memtype *memtype_get_idx(loff_t pos) { - struct memtype *list_node, *print_entry; - int i = 1; + struct memtype *print_entry; + int ret; - print_entry = kmalloc(sizeof(struct memtype), GFP_KERNEL); + print_entry = kzalloc(sizeof(struct memtype), GFP_KERNEL); if (!print_entry) return NULL; spin_lock(&memtype_lock); - list_for_each_entry(list_node, &memtype_list, nd) { - if (pos == i) { - *print_entry = *list_node; - spin_unlock(&memtype_lock); - return print_entry; - } - ++i; - } + ret = rbt_memtype_copy_nth_element(print_entry, pos); spin_unlock(&memtype_lock); - kfree(print_entry); - return NULL; + if (!ret) { + return print_entry; + } else { + kfree(print_entry); + return NULL; + } } static void *memtype_seq_start(struct seq_file *seq, loff_t *pos) --- head-2011-03-17.orig/arch/x86/mm/pat_internal.h 2011-03-17 14:35:43.000000000 +0100 +++ head-2011-03-17/arch/x86/mm/pat_internal.h 2011-02-01 15:03:10.000000000 +0100 @@ -21,6 +21,10 @@ static inline char *cattr_name(unsigned case _PAGE_CACHE_UC_MINUS: return "uncached-minus"; case _PAGE_CACHE_WB: return "write-back"; case _PAGE_CACHE_WC: return "write-combining"; +#ifdef CONFIG_XEN + case _PAGE_CACHE_WP: return "write-protected"; + case _PAGE_CACHE_WT: return "write-through"; +#endif default: return "broken"; } } --- head-2011-03-17.orig/arch/x86/mm/pgtable_32-xen.c 2011-02-01 15:03:03.000000000 +0100 +++ head-2011-03-17/arch/x86/mm/pgtable_32-xen.c 2011-02-01 15:03:10.000000000 +0100 @@ -9,7 +9,6 @@ #include #include #include -#include #include #include --- head-2011-03-17.orig/arch/x86/pci/irq-xen.c 2011-02-01 15:03:03.000000000 +0100 +++ head-2011-03-17/arch/x86/pci/irq-xen.c 2011-02-01 15:03:10.000000000 +0100 @@ -595,8 +595,6 @@ static __init int intel_router_probe(str case PCI_DEVICE_ID_INTEL_ICH10_1: case PCI_DEVICE_ID_INTEL_ICH10_2: case PCI_DEVICE_ID_INTEL_ICH10_3: - case PCI_DEVICE_ID_INTEL_CPT_LPC1: - case PCI_DEVICE_ID_INTEL_CPT_LPC2: r->name = "PIIX/ICH"; r->get = pirq_piix_get; r->set = pirq_piix_set; @@ -611,6 +609,13 @@ static __init int intel_router_probe(str return 1; } + if ((device >= PCI_DEVICE_ID_INTEL_CPT_LPC_MIN) && + (device <= PCI_DEVICE_ID_INTEL_CPT_LPC_MAX)) { + r->name = "PIIX/ICH"; + r->get = pirq_piix_get; + r->set = pirq_piix_set; + return 1; + } return 0; } --- head-2011-03-17.orig/drivers/acpi/processor_driver.c 2011-02-01 15:03:03.000000000 +0100 +++ head-2011-03-17/drivers/acpi/processor_driver.c 2011-02-01 15:03:10.000000000 +0100 @@ -331,6 +331,11 @@ static int acpi_processor_get_info(struc return -ENODEV; } } +#if defined(CONFIG_SMP) && defined(CONFIG_PROCESSOR_EXTERNAL_CONTROL) + if (pr->id >= setup_max_cpus && pr->id > 0) + pr->id = -1; +#endif + /* * On some boxes several processors use the same processor bus id. * But they are located in different scope. For example: @@ -483,8 +488,11 @@ static int __cpuinit acpi_processor_add( } #ifdef CONFIG_SMP - if (pr->id >= setup_max_cpus && pr->id != 0) - return 0; + if (pr->id >= setup_max_cpus && pr->id != 0) { + if (!processor_cntl_external()) + return 0; + WARN_ON(pr->id != -1); + } #endif BUG_ON(!processor_cntl_external() && @@ -541,7 +549,8 @@ static int __cpuinit acpi_processor_add( acpi_processor_get_limit_info(pr); } - if (cpuidle_get_driver() == &acpi_idle_driver) + if (cpuidle_get_driver() == &acpi_idle_driver + || processor_pm_external()) acpi_processor_power_init(pr, device); result = processor_extcntl_prepare(pr); @@ -864,6 +873,7 @@ static int __init acpi_processor_init(vo memset(&errata, 0, sizeof(errata)); +#ifdef CONFIG_CPU_IDLE if (!cpuidle_register_driver(&acpi_idle_driver)) { printk(KERN_DEBUG "ACPI: %s registered with cpuidle\n", acpi_idle_driver.name); @@ -871,6 +881,7 @@ static int __init acpi_processor_init(vo printk(KERN_DEBUG "ACPI: acpi_idle yielding to %s\n", cpuidle_get_driver()->name); } +#endif result = acpi_bus_register_driver(&acpi_processor_driver); if (result < 0) --- head-2011-03-17.orig/drivers/acpi/processor_idle.c 2011-02-01 14:42:26.000000000 +0100 +++ head-2011-03-17/drivers/acpi/processor_idle.c 2011-02-01 15:03:10.000000000 +0100 @@ -1068,12 +1068,6 @@ static int acpi_processor_setup_cpuidle( return 0; } - -#else /* CONFIG_PROCESSOR_EXTERNAL_CONTROL */ -static inline int acpi_processor_setup_cpuidle(struct acpi_processor *pr) -{ - return 0; -} #endif /* CONFIG_PROCESSOR_EXTERNAL_CONTROL */ int acpi_processor_cst_has_changed(struct acpi_processor *pr) @@ -1104,10 +1098,12 @@ int acpi_processor_cst_has_changed(struc cpuidle_pause_and_lock(); cpuidle_disable_device(&pr->power.dev); acpi_processor_get_power_info(pr); +#ifndef CONFIG_PROCESSOR_EXTERNAL_CONTROL if (pr->flags.power) { acpi_processor_setup_cpuidle(pr); ret = cpuidle_enable_device(&pr->power.dev); } +#endif cpuidle_resume_and_unlock(); return ret; @@ -1147,6 +1143,7 @@ int __cpuinit acpi_processor_power_init( acpi_processor_get_power_info(pr); pr->flags.power_setup_done = 1; +#ifndef CONFIG_PROCESSOR_EXTERNAL_CONTROL /* * Install the idle handler if processor power management is supported. * Note that we use previously set idle handler will be used on @@ -1157,6 +1154,7 @@ int __cpuinit acpi_processor_power_init( if (cpuidle_register_device(&pr->power.dev)) return -EIO; } +#endif if (processor_pm_external()) processor_notify_external(pr, --- head-2011-03-17.orig/drivers/edac/i7core_edac.c 2011-03-17 14:35:43.000000000 +0100 +++ head-2011-03-17/drivers/edac/i7core_edac.c 2011-02-01 15:03:10.000000000 +0100 @@ -1842,8 +1842,11 @@ static int i7core_mce_check_error(void * if (mce->bank != 8) return 0; -#ifdef CONFIG_SMP /* Only handle if it is the right mc controller */ +#if defined(CONFIG_XEN) /* Could easily be used for native too. */ + if (mce->socketid != pvt->i7core_dev->socket) + return 0; +#elif defined(CONFIG_SMP) if (cpu_data(mce->cpu).phys_proc_id != pvt->i7core_dev->socket) return 0; #endif --- head-2011-03-17.orig/drivers/gpu/drm/ttm/ttm_page_alloc.c 2011-03-17 14:35:43.000000000 +0100 +++ head-2011-03-17/drivers/gpu/drm/ttm/ttm_page_alloc.c 2011-02-01 15:03:10.000000000 +0100 @@ -514,6 +514,21 @@ static int ttm_alloc_new_pages(struct li goto out; } +#ifdef CONFIG_XEN + if (gfp_flags & __GFP_DMA32) { + r = xen_limit_pages_to_max_mfn(p, 0, 32); + + if (r) { + __free_page(p); + printk(KERN_ERR TTM_PFX + "Cannot restrict page (%d).", r); + break; + } + if (gfp_flags & __GFP_ZERO) + clear_page(page_address(p)); + } +#endif + #ifdef CONFIG_HIGHMEM /* gfp flags of highmem page should never be dma32 so we * we should be fine in such case @@ -689,6 +704,22 @@ int ttm_get_pages(struct list_head *page return -ENOMEM; } +#ifdef CONFIG_XEN + if (flags & TTM_PAGE_FLAG_DMA32) { + int rc = xen_limit_pages_to_max_mfn(p, 0, 32); + + if (rc) { + __free_page(p); + printk(KERN_ERR TTM_PFX + "Unable to restrict page (%d).", + rc); + return rc; + } + if (flags & TTM_PAGE_FLAG_ZERO_ALLOC) + clear_page(page_address(p)); + } +#endif + list_add(&p->lru, pages); } return 0; --- head-2011-03-17.orig/drivers/hwmon/coretemp-xen.c 2011-02-01 15:03:03.000000000 +0100 +++ head-2011-03-17/drivers/hwmon/coretemp-xen.c 2011-02-01 15:03:10.000000000 +0100 @@ -54,6 +54,7 @@ struct pdev_entry { struct device *hwmon_dev; struct mutex update_lock; const char *name; + u32 cpu_core_id, phys_proc_id; u8 x86_model, x86_mask; u32 ucode_rev; char valid; /* zero until following fields are valid */ @@ -78,7 +79,7 @@ static ssize_t show_name(struct device * if (attr->index == SHOW_NAME) ret = sprintf(buf, "%s\n", data->name); else /* show label */ - ret = sprintf(buf, "Core %d\n", data->pdev->id); + ret = sprintf(buf, "Core %d\n", data->cpu_core_id); return ret; } @@ -246,6 +247,52 @@ static int adjust_tjmax(struct coretemp_ return tjmax; } +static int get_tjmax(struct coretemp_data *c, u32 id, struct device *dev) +{ + /* The 100C is default for both mobile and non mobile CPUs */ + int err; + u32 eax, edx; + u32 val; + + /* A new feature of current Intel(R) processors, the + IA32_TEMPERATURE_TARGET contains the TjMax value */ + err = rdmsr_safe_on_pcpu(id, MSR_IA32_TEMPERATURE_TARGET, &eax, &edx); + if (err < 0) { + dev_warn(dev, "Unable to read TjMax from CPU.\n"); + } else { + val = (eax >> 16) & 0xff; + /* + * If the TjMax is not plausible, an assumption + * will be used + */ + if ((val > 80) && (val < 120)) { + dev_info(dev, "TjMax is %d C.\n", val); + return val * 1000; + } + } + + /* + * An assumption is made for early CPUs and unreadable MSR. + * NOTE: the given value may not be correct. + */ + + switch (c->x86_model) { + case 0xe: + case 0xf: + case 0x16: + case 0x1a: + dev_warn(dev, "TjMax is assumed as 100 C!\n"); + return 100000; + case 0x17: + case 0x1c: /* Atom CPUs */ + return adjust_tjmax(c, id, dev); + default: + dev_warn(dev, "CPU (model=0x%x) is not supported yet," + " using default TjMax of 100C.\n", c->x86_model); + return 100000; + } +} + static int coretemp_probe(struct platform_device *pdev) { struct coretemp_data *data = platform_get_drvdata(pdev); @@ -282,13 +329,17 @@ static int coretemp_probe(struct platfor } } - data->tjmax = adjust_tjmax(data, pdev->id, &pdev->dev); + data->tjmax = get_tjmax(data, pdev->id, &pdev->dev); - /* read the still undocumented IA32_TEMPERATURE_TARGET it exists - on older CPUs but not in this register, Atoms don't have it either */ + /* + * read the still undocumented IA32_TEMPERATURE_TARGET. It exists + * on older CPUs but not in this register, + * Atoms don't have it either. + */ if ((data->x86_model > 0xe) && (data->x86_model != 0x1c)) { - err = rdmsr_safe_on_pcpu(pdev->id, 0x1a2, &eax, &edx); + err = rdmsr_safe_on_pcpu(pdev->id, MSR_IA32_TEMPERATURE_TARGET, + &eax, &edx); if (err < 0) { dev_warn(&pdev->dev, "Unable to read" " IA32_TEMPERATURE_TARGET MSR\n"); @@ -346,7 +397,6 @@ static DEFINE_MUTEX(pdev_list_mutex); struct cpu_info { struct pdev_entry *pdev_entry; - u8 x86; u32 cpuid_6_eax; }; @@ -356,11 +406,11 @@ static void get_cpuid_info(void *arg) struct pdev_entry *pdev_entry = info->pdev_entry; u32 val = cpuid_eax(1); - info->x86 = ((val >> 8) & 0xf) + ((val >> 20) & 0xff); pdev_entry->x86_model = ((val >> 4) & 0xf) | ((val >> 12) & 0xf0); pdev_entry->x86_mask = val & 0xf; - if (info->x86 != 6 || !pdev_entry->x86_model + if (((val >> 8) & 0xf) != 6 || ((val >> 20) & 0xff) + || !pdev_entry->x86_model || wrmsr_safe(MSR_IA32_UCODE_REV, 0, 0) < 0 || (sync_core(), rdmsr_safe(MSR_IA32_UCODE_REV, &val, &pdev_entry->ucode_rev)) < 0) @@ -376,11 +426,10 @@ static int coretemp_device_add(unsigned struct platform_device *pdev; struct pdev_entry *pdev_entry; - pdev_entry = kzalloc(sizeof(*pdev_entry), GFP_KERNEL); + info.pdev_entry = kzalloc(sizeof(*pdev_entry), GFP_KERNEL); if (!info.pdev_entry) return -ENOMEM; - info.pdev_entry = pdev_entry; err = xen_set_physical_cpu_affinity(cpu); if (!err) { get_cpuid_info(&info); @@ -399,31 +448,41 @@ static int coretemp_device_add(unsigned if (err) goto exit_entry_free; - /* check if family 6, models 0xe (Pentium M DC), - 0xf (Core 2 DC 65nm), 0x16 (Core 2 SC 65nm), - 0x17 (Penryn 45nm), 0x1a (Nehalem), 0x1c (Atom), - 0x1e (Lynnfield) */ - if (info.x86 != 0x6 || - !((pdev_entry->x86_model == 0xe) || (pdev_entry->x86_model == 0xf) || - (pdev_entry->x86_model == 0x16) || (pdev_entry->x86_model == 0x17) || - (pdev_entry->x86_model == 0x1a) || (pdev_entry->x86_model == 0x1c) || - (pdev_entry->x86_model == 0x1e))) { - - /* supported CPU not found, but report the unknown - family 6 CPU */ - if ((info.x86 == 0x6) && (pdev_entry->x86_model > 0xf)) - printk(KERN_WARNING DRVNAME ": Unknown CPU " - "model 0x%x\n", pdev_entry->x86_model); + /* + * CPUID.06H.EAX[0] indicates whether the CPU has thermal + * sensors. We check this bit only, all the early CPUs + * without thermal sensors will be filtered out. + */ + if (!(info.cpuid_6_eax & 0x1)) { + printk(KERN_INFO DRVNAME ": CPU (model=0x%x)" + " has no thermal sensor.\n", info.pdev_entry->x86_model); + goto exit_entry_free; + } + + err = xen_get_topology_info(cpu, &info.pdev_entry->cpu_core_id, + &info.pdev_entry->phys_proc_id, NULL); + if (err) goto exit_entry_free; + + mutex_lock(&pdev_list_mutex); + + /* Skip second HT entry of each core */ + list_for_each_entry(pdev_entry, &pdev_list, list) { + if (info.pdev_entry->phys_proc_id == pdev_entry->phys_proc_id && + info.pdev_entry->cpu_core_id == pdev_entry->cpu_core_id) { + err = 0; /* Not an error */ + goto exit; + } } pdev = platform_device_alloc(DRVNAME, cpu); if (!pdev) { err = -ENOMEM; printk(KERN_ERR DRVNAME ": Device allocation failed\n"); - goto exit_entry_free; + goto exit; } + pdev_entry = info.pdev_entry; platform_set_drvdata(pdev, pdev_entry); pdev_entry->pdev = pdev; @@ -434,7 +493,6 @@ static int coretemp_device_add(unsigned goto exit_device_put; } - mutex_lock(&pdev_list_mutex); list_add_tail(&pdev_entry->list, &pdev_list); mutex_unlock(&pdev_list_mutex); @@ -442,6 +500,8 @@ static int coretemp_device_add(unsigned exit_device_put: platform_device_put(pdev); +exit: + mutex_unlock(&pdev_list_mutex); exit_entry_free: kfree(info.pdev_entry); return err; @@ -450,14 +510,36 @@ exit_entry_free: static void coretemp_device_remove(unsigned int cpu) { struct pdev_entry *p; + unsigned int i; mutex_lock(&pdev_list_mutex); list_for_each_entry(p, &pdev_list, list) { - if (p->pdev->id == cpu) { - platform_device_unregister(p->pdev); - list_del(&p->list); - kfree(p); + if (p->pdev->id != cpu) + continue; + + platform_device_unregister(p->pdev); + list_del(&p->list); + mutex_unlock(&pdev_list_mutex); + for (i = 0; ; ++i) { + u32 cpu_core_id, phys_proc_id; + int err; + + if (i == cpu) + continue; + err = xen_get_topology_info(i, &cpu_core_id, + &phys_proc_id, NULL); + if (err == -ENOENT) + continue; + if (err) + break; + if (phys_proc_id != p->phys_proc_id || + cpu_core_id != p->cpu_core_id) + continue; + if (!coretemp_device_add(i)) + break; } + kfree(p); + return; } mutex_unlock(&pdev_list_mutex); } --- head-2011-03-17.orig/drivers/xen/balloon/balloon.c 2011-02-01 15:03:03.000000000 +0100 +++ head-2011-03-17/drivers/xen/balloon/balloon.c 2011-02-01 15:03:10.000000000 +0100 @@ -343,7 +343,7 @@ static int increase_reservation(unsigned if (rc > 0) kswapd_run(0); if (need_zonelists_rebuild) - build_all_zonelists(); + build_all_zonelists(NULL); else vm_total_pages = nr_free_pagecache_pages(); #endif --- head-2011-03-17.orig/drivers/xen/blktap/blktap.c 2011-02-17 10:18:48.000000000 +0100 +++ head-2011-03-17/drivers/xen/blktap/blktap.c 2011-02-17 10:19:12.000000000 +0100 @@ -1768,3 +1768,4 @@ static int __init blkif_init(void) module_init(blkif_init); MODULE_LICENSE("Dual BSD/GPL"); +MODULE_ALIAS("devname:xen/blktap0"); --- head-2011-03-17.orig/drivers/xen/blktap2/control.c 2011-02-01 14:54:13.000000000 +0100 +++ head-2011-03-17/drivers/xen/blktap2/control.c 2011-02-01 15:03:10.000000000 +0100 @@ -283,3 +283,4 @@ fail: module_init(blktap_init); module_exit(blktap_exit); MODULE_LICENSE("Dual BSD/GPL"); +MODULE_ALIAS("devname:" BLKTAP2_DEV_DIR "control"); --- head-2011-03-17.orig/drivers/xen/blktap2-new/control.c 2011-02-24 15:14:41.000000000 +0100 +++ head-2011-03-17/drivers/xen/blktap2-new/control.c 2011-02-24 15:03:58.000000000 +0100 @@ -314,3 +314,4 @@ fail: module_init(blktap_init); module_exit(blktap_exit); MODULE_LICENSE("Dual BSD/GPL"); +MODULE_ALIAS("devname:" BLKTAP2_DEV_DIR "control"); --- head-2011-03-17.orig/drivers/xen/console/console.c 2011-02-01 14:50:44.000000000 +0100 +++ head-2011-03-17/drivers/xen/console/console.c 2011-02-01 15:03:10.000000000 +0100 @@ -365,27 +365,23 @@ void xencons_rx(char *buf, unsigned len) for (i = 0; i < len; i++) { #ifdef CONFIG_MAGIC_SYSRQ - if (sysrq_on()) { - static unsigned long sysrq_requested; + static unsigned long sysrq_requested; - if (buf[i] == '\x0f') { /* ^O */ - if (!sysrq_requested) { - sysrq_requested = jiffies; - continue; /* don't print sysrq key */ - } - sysrq_requested = 0; - } else if (sysrq_requested) { - unsigned long sysrq_timeout = - sysrq_requested + HZ*2; - sysrq_requested = 0; - if (time_before(jiffies, sysrq_timeout)) { - spin_unlock_irqrestore( - &xencons_lock, flags); - handle_sysrq(buf[i], xencons_tty); - spin_lock_irqsave( - &xencons_lock, flags); - continue; - } + if (buf[i] == '\x0f') { /* ^O */ + if (!sysrq_requested) { + sysrq_requested = jiffies; + continue; /* don't print sysrq key */ + } + sysrq_requested = 0; + } else if (sysrq_requested) { + unsigned long sysrq_timeout = sysrq_requested + HZ*2; + + sysrq_requested = 0; + if (time_before(jiffies, sysrq_timeout)) { + spin_unlock_irqrestore(&xencons_lock, flags); + handle_sysrq(buf[i], xencons_tty); + spin_lock_irqsave(&xencons_lock, flags); + continue; } } #endif --- head-2011-03-17.orig/drivers/xen/core/evtchn.c 2011-02-01 15:03:03.000000000 +0100 +++ head-2011-03-17/drivers/xen/core/evtchn.c 2011-02-09 12:45:24.000000000 +0100 @@ -1201,18 +1201,10 @@ int __init arch_probe_nr_irqs(void) int nr = 256, nr_irqs_gsi; if (is_initial_xendomain()) { - nr_irqs_gsi = acpi_probe_gsi(); + nr_irqs_gsi = NR_IRQS_LEGACY; #ifdef CONFIG_X86_IO_APIC - if (nr_irqs_gsi <= NR_IRQS_LEGACY) { - /* for acpi=off or acpi not compiled in */ - int idx; - - for (nr_irq_gsi = idx = 0; idx < nr_ioapics; idx++) - nr_irqs_gsi += io_apic_get_redir_entries(idx) + 1; - } + nr_irqs_gsi += gsi_top; #endif - if (nr_irqs_gsi < NR_IRQS_LEGACY) - nr_irqs_gsi = NR_IRQS_LEGACY; #ifdef CONFIG_PCI_MSI nr += max(nr_irqs_gsi * 16, nr_cpu_ids * 8); #endif --- head-2011-03-17.orig/drivers/xen/core/machine_reboot.c 2011-02-01 14:42:26.000000000 +0100 +++ head-2011-03-17/drivers/xen/core/machine_reboot.c 2011-02-01 15:03:10.000000000 +0100 @@ -222,12 +222,6 @@ int __xen_suspend(int fast_suspend, void if (num_possible_cpus() == 1) fast_suspend = 0; - if (fast_suspend) { - err = stop_machine_create(); - if (err) - return err; - } - suspend.fast_suspend = fast_suspend; suspend.resume_notifier = resume_notifier; @@ -254,8 +248,6 @@ int __xen_suspend(int fast_suspend, void if (!fast_suspend) smp_resume(); - else - stop_machine_destroy(); return 0; } --- head-2011-03-17.orig/drivers/xen/evtchn.c 2011-02-01 14:55:46.000000000 +0100 +++ head-2011-03-17/drivers/xen/evtchn.c 2011-02-01 15:03:10.000000000 +0100 @@ -569,3 +569,4 @@ module_init(evtchn_init); module_exit(evtchn_cleanup); MODULE_LICENSE("GPL"); +MODULE_ALIAS("devname:xen/evtchn"); --- head-2011-03-17.orig/drivers/xen/gntdev/gntdev.c 2011-02-01 15:03:03.000000000 +0100 +++ head-2011-03-17/drivers/xen/gntdev/gntdev.c 2011-02-01 15:03:10.000000000 +0100 @@ -44,6 +44,9 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); +#define GNTDEV_NAME "gntdev" +MODULE_ALIAS("devname:xen/" GNTDEV_NAME); + #define MAX_GRANTS_LIMIT 1024 #define DEFAULT_MAX_GRANTS 128 @@ -162,8 +165,6 @@ static struct vm_operations_struct gntde /* The driver major number, for use when unregistering the driver. */ static int gntdev_major; -#define GNTDEV_NAME "gntdev" - /* Memory mapping functions * ------------------------ * --- head-2011-03-17.orig/drivers/xen/usbback/usbback.c 2011-01-31 17:56:27.000000000 +0100 +++ head-2011-03-17/drivers/xen/usbback/usbback.c 2011-02-01 15:03:10.000000000 +0100 @@ -73,7 +73,6 @@ typedef struct { void *buffer; dma_addr_t transfer_dma; struct usb_ctrlrequest *setup; - dma_addr_t setup_dma; /* request segments */ uint16_t nr_buffer_segs; /* number of urb->transfer_buffer segments */ @@ -239,7 +238,7 @@ static int usbbk_alloc_urb(usbif_urb_req } if (req->buffer_length) { - pending_req->buffer = usb_buffer_alloc(pending_req->stub->udev, + pending_req->buffer = usb_alloc_coherent(pending_req->stub->udev, req->buffer_length, GFP_KERNEL, &pending_req->transfer_dma); if (!pending_req->buffer) { @@ -250,9 +249,8 @@ static int usbbk_alloc_urb(usbif_urb_req } if (usb_pipecontrol(req->pipe)) { - pending_req->setup = usb_buffer_alloc(pending_req->stub->udev, - sizeof(struct usb_ctrlrequest), GFP_KERNEL, - &pending_req->setup_dma); + pending_req->setup = kmalloc(sizeof(struct usb_ctrlrequest), + GFP_KERNEL); if (!pending_req->setup) { pr_err("usbback: can't alloc usb_ctrlrequest\n"); ret = -ENOMEM; @@ -264,8 +262,10 @@ static int usbbk_alloc_urb(usbif_urb_req fail_free_buffer: if (req->buffer_length) - usb_buffer_free(pending_req->stub->udev, req->buffer_length, - pending_req->buffer, pending_req->transfer_dma); + usb_free_coherent(pending_req->stub->udev, + req->buffer_length, + pending_req->buffer, + pending_req->transfer_dma); fail_free_urb: usb_free_urb(pending_req->urb); fail: @@ -284,11 +284,10 @@ static void usbbk_free_urb(struct urb *u static void _usbbk_free_urb(struct urb *urb) { if (usb_pipecontrol(urb->pipe)) - usb_buffer_free(urb->dev, sizeof(struct usb_ctrlrequest), - urb->setup_packet, urb->setup_dma); + kfree(urb->setup_packet); if (urb->transfer_buffer_length) - usb_buffer_free(urb->dev, urb->transfer_buffer_length, - urb->transfer_buffer, urb->transfer_dma); + usb_free_coherent(urb->dev, urb->transfer_buffer_length, + urb->transfer_buffer, urb->transfer_dma); barrier(); usb_free_urb(urb); } @@ -534,9 +533,7 @@ static void usbbk_init_urb(usbif_urb_req pending_req->buffer, req->buffer_length, usbbk_urb_complete, pending_req); memcpy(pending_req->setup, req->u.ctrl, 8); - urb->setup_dma = pending_req->setup_dma; urb->transfer_flags = req->transfer_flags; - urb->transfer_flags |= URB_NO_SETUP_DMA_MAP; break; case PIPE_BULK: --- head-2011-03-17.orig/drivers/xen/usbfront/usbfront.h 2011-01-31 17:56:27.000000000 +0100 +++ head-2011-03-17/drivers/xen/usbfront/usbfront.h 2011-02-01 15:03:10.000000000 +0100 @@ -51,6 +51,7 @@ #include #include #include +#include #include #include #include @@ -58,14 +59,6 @@ #include #include -/* - * usbfront needs USB HCD headers, - * drivers/usb/core/hcd.h and drivers/usb/core/hub.h, - * but, they are not in public include path. - */ -#include "../../usb/core/hcd.h" -#include "../../usb/core/hub.h" - static inline struct usbfront_info *hcd_to_info(struct usb_hcd *hcd) { return (struct usbfront_info *) (hcd->hcd_priv); --- head-2011-03-17.orig/include/acpi/processor.h 2011-02-01 15:03:03.000000000 +0100 +++ head-2011-03-17/include/acpi/processor.h 2011-02-01 15:03:10.000000000 +0100 @@ -80,8 +80,9 @@ struct acpi_processor_cx { u32 power; u32 usage; u64 time; +#ifndef CONFIG_PROCESSOR_EXTERNAL_CONTROL u8 bm_sts_skip; -#ifdef CONFIG_PROCESSOR_EXTERNAL_CONTROL +#else /* Require raw information for external control logic */ struct acpi_power_register reg; u32 csd_count; --- head-2011-03-17.orig/lib/swiotlb-xen.c 2011-02-01 15:03:03.000000000 +0100 +++ head-2011-03-17/lib/swiotlb-xen.c 2011-02-01 15:03:10.000000000 +0100 @@ -619,37 +619,6 @@ swiotlb_sync_single_for_device(struct de EXPORT_SYMBOL(swiotlb_sync_single_for_device); /* - * Same as above, but for a sub-range of the mapping. - */ -static void -swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr, - unsigned long offset, size_t size, - int dir, int target) -{ - swiotlb_sync_single(hwdev, dev_addr + offset, size, dir, target); -} - -void -swiotlb_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dev_addr, - unsigned long offset, size_t size, - enum dma_data_direction dir) -{ - swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir, - SYNC_FOR_CPU); -} -EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_cpu); - -void -swiotlb_sync_single_range_for_device(struct device *hwdev, dma_addr_t dev_addr, - unsigned long offset, size_t size, - enum dma_data_direction dir) -{ - swiotlb_sync_single_range(hwdev, dev_addr, offset, size, dir, - SYNC_FOR_DEVICE); -} -EXPORT_SYMBOL_GPL(swiotlb_sync_single_range_for_device); - -/* * Map a set of buffers described by scatterlist in streaming mode for DMA. * This is the scatter-gather version of the above swiotlb_map_page * interface. Here the scatter gather list elements are each tagged with the --- head-2011-03-17.orig/mm/page_alloc.c 2011-02-08 10:06:32.000000000 +0100 +++ head-2011-03-17/mm/page_alloc.c 2011-02-08 10:06:44.000000000 +0100 @@ -649,9 +649,8 @@ static bool free_pages_prepare(struct pa #ifdef CONFIG_XEN if (PageForeign(page)) { - WARN_ON(wasMlocked); PageForeignDestructor(page, order); - return; + return false; } #endif @@ -681,6 +680,9 @@ static void __free_pages_ok(struct page unsigned long flags; int wasMlocked = __TestClearPageMlocked(page); +#ifdef CONFIG_XEN + WARN_ON(PageForeign(page) && wasMlocked); +#endif if (!free_pages_prepare(page, order)) return; @@ -1171,6 +1173,9 @@ void free_hot_cold_page(struct page *pag int migratetype; int wasMlocked = __TestClearPageMlocked(page); +#ifdef CONFIG_XEN + WARN_ON(PageForeign(page) && wasMlocked); +#endif if (!free_pages_prepare(page, 0)) return;