qubes-linux-kernel/patches.xen/xen3-patch-2.6.32
2010-07-07 13:12:45 +02:00

6509 lines
191 KiB
Plaintext

From: Linux Kernel Mailing List <linux-kernel@vger.kernel.org>
Subject: Linux: 2.6.32
Patch-mainline: 2.6.32
This patch contains the differences between 2.6.31 and 2.6.32.
Acked-by: Jeff Mahoney <jeffm@suse.com>
Automatically created from "patches.kernel.org/patch-2.6.32" by xen-port-patches.py
--- head-2010-05-25.orig/arch/x86/ia32/ia32entry-xen.S 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/arch/x86/ia32/ia32entry-xen.S 2010-03-24 15:32:27.000000000 +0100
@@ -20,18 +20,15 @@
#define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE)
#define __AUDIT_ARCH_LE 0x40000000
-#ifndef CONFIG_AUDITSYSCALL
-#define sysexit_audit int_ret_from_sys_call
-#define sysretl_audit int_ret_from_sys_call
-#endif
-
#define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8)
.macro IA32_ARG_FIXUP noebp=0
movl %edi,%r8d
.if \noebp
+ jmp ia32_common
.else
movl %ebp,%r9d
+ia32_common:
.endif
xchg %ecx,%esi
movl %ebx,%edi
@@ -39,12 +36,12 @@
.endm
/* clobbers %eax */
- .macro CLEAR_RREGS _r9=rax
+ .macro CLEAR_RREGS offset=0, _r9=rax
xorl %eax,%eax
- movq %rax,R11(%rsp)
- movq %rax,R10(%rsp)
- movq %\_r9,R9(%rsp)
- movq %rax,R8(%rsp)
+ movq %rax,\offset+R11(%rsp)
+ movq %rax,\offset+R10(%rsp)
+ movq %\_r9,\offset+R9(%rsp)
+ movq %rax,\offset+R8(%rsp)
.endm
/*
@@ -144,17 +141,7 @@ ENTRY(ia32_sysenter_target)
jnz sysenter_tracesys
cmpl $(IA32_NR_syscalls-1),%eax
ja ia32_badsys
-sysenter_do_call:
- IA32_ARG_FIXUP
-sysenter_dispatch:
- call *ia32_sys_call_table(,%rax,8)
- movq %rax,RAX-ARGOFFSET(%rsp)
- GET_THREAD_INFO(%r10)
- DISABLE_INTERRUPTS(CLBR_NONE)
- TRACE_IRQS_OFF
- testl $_TIF_ALLWORK_MASK,TI_flags(%r10)
- jnz sysexit_audit
- jmp int_ret_from_sys_call
+ jmp ia32_do_call
#ifdef CONFIG_AUDITSYSCALL
.macro auditsys_entry_common
@@ -175,31 +162,10 @@ sysenter_dispatch:
movl RDI-ARGOFFSET(%rsp),%r8d /* reload 5th syscall arg */
.endm
- .macro auditsys_exit exit,ebpsave=RBP
- testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10)
- jnz int_ret_from_sys_call
- TRACE_IRQS_ON
- ENABLE_INTERRUPTS(CLBR_NONE)
- movl %eax,%esi /* second arg, syscall return value */
- cmpl $0,%eax /* is it < 0? */
- setl %al /* 1 if so, 0 if not */
- movzbl %al,%edi /* zero-extend that into %edi */
- inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */
- call audit_syscall_exit
- movl \ebpsave-ARGOFFSET(%rsp),%ebp /* reload user register value */
- movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
- DISABLE_INTERRUPTS(CLBR_NONE)
- TRACE_IRQS_OFF
- jmp int_with_check
- .endm
-
sysenter_auditsys:
auditsys_entry_common
movl %ebp,%r9d /* reload 6th syscall arg */
- jmp sysenter_dispatch
-
-sysexit_audit:
- auditsys_exit sysexit_from_sys_call
+ jmp ia32_dispatch
#endif
sysenter_tracesys:
@@ -216,7 +182,7 @@ sysenter_tracesys:
RESTORE_REST
cmpl $(IA32_NR_syscalls-1),%eax
ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */
- jmp sysenter_do_call
+ jmp ia32_do_call
CFI_ENDPROC
ENDPROC(ia32_sysenter_target)
@@ -272,24 +238,13 @@ ENTRY(ia32_cstar_target)
ja ia32_badsys
cstar_do_call:
IA32_ARG_FIXUP 1
-cstar_dispatch:
- call *ia32_sys_call_table(,%rax,8)
- movq %rax,RAX-ARGOFFSET(%rsp)
- GET_THREAD_INFO(%r10)
- DISABLE_INTERRUPTS(CLBR_NONE)
- testl $_TIF_ALLWORK_MASK,TI_flags(%r10)
- jnz sysretl_audit
- jmp int_ret_from_sys_call
#ifdef CONFIG_AUDITSYSCALL
cstar_auditsys:
movl %r9d,R9-ARGOFFSET(%rsp) /* register to be clobbered by call */
auditsys_entry_common
movl R9-ARGOFFSET(%rsp),%r9d /* reload 6th syscall arg */
- jmp cstar_dispatch
-
-sysretl_audit:
- auditsys_exit sysretl_from_sys_call, RCX /* user %ebp in RCX slot */
+ jmp ia32_dispatch
#endif
cstar_tracesys:
@@ -299,7 +254,7 @@ cstar_tracesys:
#endif
xchgl %r9d,%ebp
SAVE_REST
- CLEAR_RREGS r9
+ CLEAR_RREGS 0, r9
movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
movq %rsp,%rdi /* &pt_regs -> arg1 */
call syscall_trace_enter
@@ -367,9 +322,11 @@ ENTRY(ia32_syscall)
ja ia32_badsys
ia32_do_call:
IA32_ARG_FIXUP
+ia32_dispatch:
call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
ia32_sysret:
movq %rax,RAX-ARGOFFSET(%rsp)
+ CLEAR_RREGS -ARGOFFSET
jmp int_ret_from_sys_call
ia32_tracesys:
@@ -387,8 +344,8 @@ END(ia32_syscall)
ia32_badsys:
movq $0,ORIG_RAX-ARGOFFSET(%rsp)
- movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
- jmp int_ret_from_sys_call
+ movq $-ENOSYS,%rax
+ jmp ia32_sysret
quiet_ni_syscall:
movq $-ENOSYS,%rax
@@ -482,7 +439,7 @@ ia32_sys_call_table:
.quad sys_mkdir
.quad sys_rmdir /* 40 */
.quad sys_dup
- .quad sys32_pipe
+ .quad sys_pipe
.quad compat_sys_times
.quad quiet_ni_syscall /* old prof syscall holder */
.quad sys_brk /* 45 */
@@ -776,5 +733,5 @@ ia32_sys_call_table:
.quad compat_sys_preadv
.quad compat_sys_pwritev
.quad compat_sys_rt_tgsigqueueinfo /* 335 */
- .quad sys_perf_counter_open
+ .quad sys_perf_event_open
ia32_syscall_end:
--- head-2010-05-25.orig/arch/x86/include/asm/time.h 2010-03-24 15:10:37.000000000 +0100
+++ head-2010-05-25/arch/x86/include/asm/time.h 2010-03-24 15:32:27.000000000 +0100
@@ -8,8 +8,9 @@ extern void hpet_time_init(void);
extern void time_init(void);
#ifdef CONFIG_XEN
+struct timespec;
extern int xen_independent_wallclock(void);
-extern unsigned long xen_read_persistent_clock(void);
+extern void xen_read_persistent_clock(struct timespec *);
extern int xen_update_persistent_clock(void);
#endif
--- head-2010-05-25.orig/arch/x86/include/asm/uv/uv_hub.h 2010-05-25 09:12:08.000000000 +0200
+++ head-2010-05-25/arch/x86/include/asm/uv/uv_hub.h 2010-03-24 15:32:27.000000000 +0100
@@ -11,7 +11,7 @@
#ifndef _ASM_X86_UV_UV_HUB_H
#define _ASM_X86_UV_UV_HUB_H
-#ifdef CONFIG_X86_64
+#ifdef CONFIG_X86_UV
#include <linux/numa.h>
#include <linux/percpu.h>
#include <linux/timer.h>
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/agp.h 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/agp.h 2010-03-24 15:32:27.000000000 +0100
@@ -28,10 +28,7 @@
*/
#define flush_agp_cache() wbinvd()
-/* Convert a physical address to an address suitable for the GART. */
-#define phys_to_gart(x) phys_to_machine(x)
-#define gart_to_phys(x) machine_to_phys(x)
-#define page_to_gart(x) phys_to_gart(page_to_pseudophys(x))
+#define virt_to_gart virt_to_machine
/* GATT allocation. Returns/accepts GATT kernel virtual address. */
#define alloc_gatt_pages(order) ({ \
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/desc.h 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/desc.h 2010-03-24 15:32:27.000000000 +0100
@@ -312,7 +312,14 @@ static inline void load_LDT(mm_context_t
static inline unsigned long get_desc_base(const struct desc_struct *desc)
{
- return desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24);
+ return (unsigned)(desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24));
+}
+
+static inline void set_desc_base(struct desc_struct *desc, unsigned long base)
+{
+ desc->base0 = base & 0xffff;
+ desc->base1 = (base >> 16) & 0xff;
+ desc->base2 = (base >> 24) & 0xff;
}
static inline unsigned long get_desc_limit(const struct desc_struct *desc)
@@ -320,6 +327,12 @@ static inline unsigned long get_desc_lim
return desc->limit0 | (desc->limit << 16);
}
+static inline void set_desc_limit(struct desc_struct *desc, unsigned long limit)
+{
+ desc->limit0 = limit & 0xffff;
+ desc->limit = (limit >> 16) & 0xf;
+}
+
#ifndef CONFIG_X86_NO_IDT
static inline void _set_gate(int gate, unsigned type, void *addr,
unsigned dpl, unsigned ist, unsigned seg)
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/dma-mapping.h 2010-03-24 15:14:47.000000000 +0100
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/dma-mapping.h 2010-03-24 15:32:27.000000000 +0100
@@ -1,11 +1,24 @@
#ifndef _ASM_X86_DMA_MAPPING_H_
+#define phys_to_dma _phys_to_dma_
+#define dma_to_phys _dma_to_phys_
+
#include_next <asm/dma-mapping.h>
-void dma_generic_free_coherent(struct device *, size_t, void *, dma_addr_t);
+#undef phys_to_dma
+#undef dma_to_phys
+
+static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
+{
+ return phys_to_machine(paddr);
+}
-#define address_needs_mapping(hwdev, addr, size) \
- !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size)
+static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr)
+{
+ return machine_to_phys(daddr);
+}
+
+void dma_generic_free_coherent(struct device *, size_t, void *, dma_addr_t);
extern int range_straddles_page_boundary(paddr_t p, size_t size);
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/fixmap.h 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/fixmap.h 2010-03-24 15:32:27.000000000 +0100
@@ -139,6 +139,9 @@ enum fixed_addresses {
#ifdef CONFIG_X86_32
FIX_WP_TEST,
#endif
+#ifdef CONFIG_INTEL_TXT
+ FIX_TBOOT_BASE,
+#endif
__end_of_fixed_addresses
};
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/hypervisor.h 2010-03-24 15:25:06.000000000 +0100
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/hypervisor.h 2010-03-24 15:32:27.000000000 +0100
@@ -70,6 +70,7 @@ extern start_info_t *xen_start_info;
#endif
#define init_hypervisor(c) ((void)((c)->x86_hyper_vendor = X86_HYPER_VENDOR_XEN))
+#define init_hypervisor_platform() init_hypervisor(&boot_cpu_data)
struct vcpu_runstate_info *setup_runstate_area(unsigned int cpu);
@@ -351,6 +352,6 @@ MULTI_grant_table_op(multicall_entry_t *
#endif
-#define uvm_multi(cpumask) ((unsigned long)cpus_addr(cpumask) | UVMF_MULTI)
+#define uvm_multi(cpumask) ((unsigned long)cpumask_bits(cpumask) | UVMF_MULTI)
#endif /* __HYPERVISOR_H__ */
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/irqflags.h 2010-03-24 15:25:06.000000000 +0100
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/irqflags.h 2010-03-24 15:32:27.000000000 +0100
@@ -1,7 +1,7 @@
#ifndef _X86_IRQFLAGS_H_
#define _X86_IRQFLAGS_H_
-#include <asm/processor-flags.h>
+#include <asm/smp-processor-id.h>
#ifndef __ASSEMBLY__
/*
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/mmu_context.h 2010-03-24 15:25:06.000000000 +0100
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/mmu_context.h 2010-03-24 15:32:27.000000000 +0100
@@ -88,12 +88,12 @@ static inline void switch_mm(struct mm_s
!PagePinned(virt_to_page(next->pgd)));
/* stop flush ipis for the previous mm */
- cpu_clear(cpu, prev->cpu_vm_mask);
+ cpumask_clear_cpu(cpu, mm_cpumask(prev));
#if defined(CONFIG_SMP) && !defined(CONFIG_XEN) /* XEN: no lazy tlb */
percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
percpu_write(cpu_tlbstate.active_mm, next);
#endif
- cpu_set(cpu, next->cpu_vm_mask);
+ cpumask_set_cpu(cpu, mm_cpumask(next));
/* Re-load page tables: load_cr3(next->pgd) */
op->cmd = MMUEXT_NEW_BASEPTR;
@@ -125,7 +125,7 @@ static inline void switch_mm(struct mm_s
percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
- if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
+ if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next))) {
/* We were in lazy tlb mode and leave_mm disabled
* tlb flush IPI delivery. We must reload CR3
* to make sure to use no freed page tables.
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/pci.h 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/pci.h 2010-03-24 15:32:27.000000000 +0100
@@ -151,7 +151,11 @@ static inline int __pcibus_to_node(const
static inline const struct cpumask *
cpumask_of_pcibus(const struct pci_bus *bus)
{
- return cpumask_of_node(__pcibus_to_node(bus));
+ int node;
+
+ node = __pcibus_to_node(bus);
+ return (node == -1) ? cpu_online_mask :
+ cpumask_of_node(node);
}
#endif
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/pgtable.h 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/pgtable.h 2010-03-24 15:32:27.000000000 +0100
@@ -53,16 +53,6 @@ extern struct list_head pgd_list;
#define pte_update(mm, addr, ptep) do { } while (0)
#define pte_update_defer(mm, addr, ptep) do { } while (0)
-static inline void __init paravirt_pagetable_setup_start(pgd_t *base)
-{
- xen_pagetable_setup_start(base);
-}
-
-static inline void __init paravirt_pagetable_setup_done(pgd_t *base)
-{
- xen_pagetable_setup_done(base);
-}
-
#define pgd_val(x) xen_pgd_val(x)
#define __pgd(x) xen_make_pgd(x)
@@ -134,6 +124,11 @@ static inline int pte_special(pte_t pte)
#define pte_page(pte) pfn_to_page(pte_pfn(pte))
+static inline unsigned long pmd_pfn(pmd_t pmd)
+{
+ return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT;
+}
+
static inline int pmd_large(pmd_t pte)
{
return (pmd_flags(pte) & (_PAGE_PSE | _PAGE_PRESENT)) ==
@@ -363,7 +358,7 @@ static inline unsigned long pmd_page_vad
* this macro returns the index of the entry in the pmd page which would
* control the given virtual address
*/
-static inline unsigned pmd_index(unsigned long address)
+static inline unsigned long pmd_index(unsigned long address)
{
return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1);
}
@@ -383,7 +378,7 @@ static inline unsigned pmd_index(unsigne
* this function returns the index of the entry in the pte page which would
* control the given virtual address
*/
-static inline unsigned pte_index(unsigned long address)
+static inline unsigned long pte_index(unsigned long address)
{
return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
}
@@ -439,11 +434,6 @@ static inline pmd_t *pmd_offset(pud_t *p
return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address);
}
-static inline unsigned long pmd_pfn(pmd_t pmd)
-{
- return (pmd_val(pmd) & PTE_PFN_MASK) >> PAGE_SHIFT;
-}
-
static inline int pud_large(pud_t pud)
{
return (__pud_val(pud) & (_PAGE_PSE | _PAGE_PRESENT)) ==
@@ -479,7 +469,7 @@ static inline unsigned long pgd_page_vad
#define pgd_page(pgd) pfn_to_page(pgd_val(pgd) >> PAGE_SHIFT)
/* to find an entry in a page-table-directory. */
-static inline unsigned pud_index(unsigned long address)
+static inline unsigned long pud_index(unsigned long address)
{
return (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1);
}
@@ -600,7 +590,7 @@ extern int ptep_clear_flush_young(struct
if (!pte_none(__res) && \
((vma)->vm_mm != current->mm || \
HYPERVISOR_update_va_mapping(addr, __pte(0), \
- uvm_multi((vma)->vm_mm->cpu_vm_mask) | \
+ uvm_multi(mm_cpumask((vma)->vm_mm)) | \
UVMF_INVLPG))) { \
__xen_pte_clear(__ptep); \
flush_tlb_page(vma, addr); \
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/pgtable_types.h 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/pgtable_types.h 2010-03-24 15:32:27.000000000 +0100
@@ -334,6 +334,7 @@ static inline pteval_t pte_flags(pte_t p
typedef struct page *pgtable_t;
extern pteval_t __supported_pte_mask;
+extern void set_nx(void);
extern int nx_enabled;
#define pgprot_writecombine pgprot_writecombine
@@ -354,14 +355,6 @@ int phys_mem_access_prot_allowed(struct
/* Install a pte for a particular vaddr in kernel space. */
void set_pte_vaddr(unsigned long vaddr, pte_t pte);
-#ifndef CONFIG_XEN
-extern void native_pagetable_setup_start(pgd_t *base);
-extern void native_pagetable_setup_done(pgd_t *base);
-#else
-static inline void xen_pagetable_setup_start(pgd_t *base) {}
-static inline void xen_pagetable_setup_done(pgd_t *base) {}
-#endif
-
struct seq_file;
extern void arch_report_meminfo(struct seq_file *m);
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/processor.h 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/processor.h 2010-03-24 15:32:27.000000000 +0100
@@ -27,6 +27,7 @@ struct mm_struct;
#include <linux/cpumask.h>
#include <linux/cache.h>
#include <linux/threads.h>
+#include <linux/math64.h>
#include <linux/init.h>
#include <xen/interface/physdev.h>
@@ -411,7 +412,17 @@ extern unsigned long kernel_eflags;
extern asmlinkage void ignore_sysret(void);
#else /* X86_64 */
#ifdef CONFIG_CC_STACKPROTECTOR
-DECLARE_PER_CPU(unsigned long, stack_canary);
+/*
+ * Make sure stack canary segment base is cached-aligned:
+ * "For Intel Atom processors, avoid non zero segment base address
+ * that is not aligned to cache line boundary at all cost."
+ * (Optim Ref Manual Assembly/Compiler Coding Rule 15.)
+ */
+struct stack_canary {
+ char __pad[20]; /* canary at %gs:20 */
+ unsigned long canary;
+};
+DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
#endif
#endif /* X86_64 */
@@ -647,13 +658,23 @@ static inline void cpu_relax(void)
rep_nop();
}
-/* Stop speculative execution: */
+/* Stop speculative execution and prefetching of modified code. */
static inline void sync_core(void)
{
int tmp;
- asm volatile("cpuid" : "=a" (tmp) : "0" (1)
- : "ebx", "ecx", "edx", "memory");
+#if defined(CONFIG_M386) || defined(CONFIG_M486)
+ if (boot_cpu_data.x86 < 5)
+ /* There is no speculative execution.
+ * jmp is a barrier to prefetching. */
+ asm volatile("jmp 1f\n1:\n" ::: "memory");
+ else
+#endif
+ /* cpuid is a barrier to speculative execution.
+ * Prefetched instructions are automatically
+ * invalidated when modified. */
+ asm volatile("cpuid" : "=a" (tmp) : "0" (1)
+ : "ebx", "ecx", "edx", "memory");
}
static inline void __monitor(const void *eax, unsigned long ecx,
@@ -944,4 +965,35 @@ extern void start_thread(struct pt_regs
extern int get_tsc_mode(unsigned long adr);
extern int set_tsc_mode(unsigned int val);
+extern int amd_get_nb_id(int cpu);
+
+struct aperfmperf {
+ u64 aperf, mperf;
+};
+
+static inline void get_aperfmperf(struct aperfmperf *am)
+{
+ WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_APERFMPERF));
+
+ rdmsrl(MSR_IA32_APERF, am->aperf);
+ rdmsrl(MSR_IA32_MPERF, am->mperf);
+}
+
+#define APERFMPERF_SHIFT 10
+
+static inline
+unsigned long calc_aperfmperf_ratio(struct aperfmperf *old,
+ struct aperfmperf *new)
+{
+ u64 aperf = new->aperf - old->aperf;
+ u64 mperf = new->mperf - old->mperf;
+ unsigned long ratio = aperf;
+
+ mperf >>= APERFMPERF_SHIFT;
+ if (mperf)
+ ratio = div64_u64(aperf, mperf);
+
+ return ratio;
+}
+
#endif /* _ASM_X86_PROCESSOR_H */
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/setup.h 2010-03-24 15:32:27.000000000 +0100
@@ -0,0 +1,8 @@
+#ifndef __ASSEMBLY__
+
+void xen_start_kernel(void);
+void xen_arch_setup(void);
+
+#endif
+
+#include_next <asm/setup.h>
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/smp-processor-id.h 2010-03-24 15:32:27.000000000 +0100
@@ -0,0 +1,36 @@
+#ifndef _ASM_X86_SMP_PROCESSOR_ID_H
+#define _ASM_X86_SMP_PROCESSOR_ID_H
+
+#if defined(CONFIG_SMP) && !defined(__ASSEMBLY__)
+
+#include <asm/percpu.h>
+
+DECLARE_PER_CPU(int, cpu_number);
+
+/*
+ * This function is needed by all SMP systems. It must _always_ be valid
+ * from the initial startup. We map APIC_BASE very early in page_setup(),
+ * so this is correct in the x86 case.
+ */
+#define raw_smp_processor_id() percpu_read(cpu_number)
+#define safe_smp_processor_id() smp_processor_id()
+
+#ifdef CONFIG_X86_64_SMP
+#define stack_smp_processor_id() \
+({ \
+ struct thread_info *ti; \
+ __asm__("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK)); \
+ ti->cpu; \
+})
+#endif
+
+#ifdef CONFIG_DEBUG_PREEMPT
+extern unsigned int debug_smp_processor_id(void);
+# define smp_processor_id() debug_smp_processor_id()
+#else
+# define smp_processor_id() raw_smp_processor_id()
+#endif
+
+#endif /* SMP && !__ASSEMBLY__ */
+
+#endif /* _ASM_X86_SMP_PROCESSOR_ID_H */
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/smp.h 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/smp.h 2010-03-24 15:32:27.000000000 +0100
@@ -121,7 +121,6 @@ static inline void arch_send_call_functi
smp_ops.send_call_func_single_ipi(cpu);
}
-#define arch_send_call_function_ipi_mask arch_send_call_function_ipi_mask
static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask)
{
smp_ops.send_call_func_ipi(mask);
@@ -167,27 +166,7 @@ static inline int num_booting_cpus(void)
extern unsigned disabled_cpus __cpuinitdata;
-#ifdef CONFIG_X86_32_SMP
-/*
- * This function is needed by all SMP systems. It must _always_ be valid
- * from the initial startup. We map APIC_BASE very early in page_setup(),
- * so this is correct in the x86 case.
- */
-#define raw_smp_processor_id() (percpu_read(cpu_number))
-#define safe_smp_processor_id() smp_processor_id()
-
-#elif defined(CONFIG_X86_64_SMP)
-#define raw_smp_processor_id() (percpu_read(cpu_number))
-
-#define stack_smp_processor_id() \
-({ \
- struct thread_info *ti; \
- __asm__("andq %%rsp,%0; ":"=r" (ti) : "0" (CURRENT_MASK)); \
- ti->cpu; \
-})
-#define safe_smp_processor_id() smp_processor_id()
-
-#endif
+#include <asm/smp-processor-id.h>
#ifdef CONFIG_X86_LOCAL_APIC
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/system.h 2010-03-24 15:25:06.000000000 +0100
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/system.h 2010-03-24 15:32:27.000000000 +0100
@@ -30,7 +30,7 @@ void __switch_to_xtra(struct task_struct
"movl %P[task_canary](%[next]), %%ebx\n\t" \
"movl %%ebx, "__percpu_arg([stack_canary])"\n\t"
#define __switch_canary_oparam \
- , [stack_canary] "=m" (per_cpu_var(stack_canary))
+ , [stack_canary] "=m" (per_cpu_var(stack_canary.canary))
#define __switch_canary_iparam \
, [task_canary] "i" (offsetof(struct task_struct, stack_canary))
#else /* CC_STACKPROTECTOR */
@@ -149,33 +149,6 @@ do { \
#endif
#ifdef __KERNEL__
-#define _set_base(addr, base) do { unsigned long __pr; \
-__asm__ __volatile__ ("movw %%dx,%1\n\t" \
- "rorl $16,%%edx\n\t" \
- "movb %%dl,%2\n\t" \
- "movb %%dh,%3" \
- :"=&d" (__pr) \
- :"m" (*((addr)+2)), \
- "m" (*((addr)+4)), \
- "m" (*((addr)+7)), \
- "0" (base) \
- ); } while (0)
-
-#define _set_limit(addr, limit) do { unsigned long __lr; \
-__asm__ __volatile__ ("movw %%dx,%1\n\t" \
- "rorl $16,%%edx\n\t" \
- "movb %2,%%dh\n\t" \
- "andb $0xf0,%%dh\n\t" \
- "orb %%dh,%%dl\n\t" \
- "movb %%dl,%2" \
- :"=&d" (__lr) \
- :"m" (*(addr)), \
- "m" (*((addr)+6)), \
- "0" (limit) \
- ); } while (0)
-
-#define set_base(ldt, base) _set_base(((char *)&(ldt)) , (base))
-#define set_limit(ldt, limit) _set_limit(((char *)&(ldt)) , ((limit)-1))
extern void xen_load_gs_index(unsigned);
--- head-2010-05-25.orig/arch/x86/include/mach-xen/asm/tlbflush.h 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/arch/x86/include/mach-xen/asm/tlbflush.h 2010-03-24 15:32:27.000000000 +0100
@@ -74,9 +74,9 @@ static inline void reset_lazy_tlbstate(v
#define local_flush_tlb() __flush_tlb()
#define flush_tlb_all xen_tlb_flush_all
-#define flush_tlb_current_task() xen_tlb_flush_mask(&current->mm->cpu_vm_mask)
-#define flush_tlb_mm(mm) xen_tlb_flush_mask(&(mm)->cpu_vm_mask)
-#define flush_tlb_page(vma, va) xen_invlpg_mask(&(vma)->vm_mm->cpu_vm_mask, va)
+#define flush_tlb_current_task() xen_tlb_flush_mask(mm_cpumask(current->mm))
+#define flush_tlb_mm(mm) xen_tlb_flush_mask(mm_cpumask(mm))
+#define flush_tlb_page(vma, va) xen_invlpg_mask(mm_cpumask((vma)->vm_mm), va)
#define flush_tlb() flush_tlb_current_task()
--- head-2010-05-25.orig/arch/x86/kernel/Makefile 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/arch/x86/kernel/Makefile 2010-03-24 15:32:27.000000000 +0100
@@ -132,8 +132,6 @@ ifeq ($(CONFIG_X86_64),y)
obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o
obj-y += vsmp_64.o
-
- time_64-$(CONFIG_XEN) += time_32.o
endif
disabled-obj-$(CONFIG_XEN) := %_uv.o crash.o early-quirks.o hpet.o i8253.o \
--- head-2010-05-25.orig/arch/x86/kernel/apic/io_apic-xen.c 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/arch/x86/kernel/apic/io_apic-xen.c 2010-03-24 15:32:27.000000000 +0100
@@ -79,6 +79,8 @@ unsigned long io_apic_irqs;
#endif /* CONFIG_XEN */
#define __apicdebuginit(type) static type __init
+#define for_each_irq_pin(entry, head) \
+ for (entry = head; entry; entry = entry->next)
/*
* Is the SiS APIC rmw bug present ?
@@ -100,12 +102,24 @@ int nr_ioapic_registers[MAX_IO_APICS];
struct mpc_ioapic mp_ioapics[MAX_IO_APICS];
int nr_ioapics;
+/* IO APIC gsi routing info */
+struct mp_ioapic_gsi mp_gsi_routing[MAX_IO_APICS];
+
/* MP IRQ source entries */
struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
/* # of MP IRQ source entries */
int mp_irq_entries;
+#ifndef CONFIG_XEN
+/* Number of legacy interrupts */
+static int nr_legacy_irqs __read_mostly = NR_IRQS_LEGACY;
+/* GSI interrupts */
+static int nr_irqs_gsi = NR_IRQS_LEGACY;
+#else
+#define nr_legacy_irqs NR_IRQS_LEGACY
+#endif
+
#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
int mp_bus_id_to_type[MAX_MP_BUSSES];
#endif
@@ -132,15 +146,6 @@ static int __init parse_noapic(char *str
early_param("noapic", parse_noapic);
#ifndef CONFIG_XEN
-struct irq_pin_list;
-
-/*
- * This is performance-critical, we want to do it O(1)
- *
- * the indexing order of this array favors 1:1 mappings
- * between pins and IRQs.
- */
-
struct irq_pin_list {
int apic, pin;
struct irq_pin_list *next;
@@ -155,6 +160,11 @@ static struct irq_pin_list *get_one_free
return pin;
}
+/*
+ * This is performance-critical, we want to do it O(1)
+ *
+ * Most irqs are mapped 1:1 with pins.
+ */
struct irq_cfg {
struct irq_pin_list *irq_2_pin;
cpumask_var_t domain;
@@ -188,6 +198,12 @@ static struct irq_cfg irq_cfgx[NR_IRQS]
[15] = { .vector = IRQ15_VECTOR, },
};
+void __init io_apic_disable_legacy(void)
+{
+ nr_legacy_irqs = 0;
+ nr_irqs_gsi = 0;
+}
+
int __init arch_early_irq_init(void)
{
struct irq_cfg *cfg;
@@ -205,7 +221,7 @@ int __init arch_early_irq_init(void)
desc->chip_data = &cfg[i];
zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node);
zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node);
- if (i < NR_IRQS_LEGACY)
+ if (i < nr_legacy_irqs)
cpumask_setall(cfg[i].domain);
}
@@ -231,17 +247,14 @@ static struct irq_cfg *get_one_free_irq_
cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node);
if (cfg) {
- if (!alloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) {
+ if (!zalloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) {
kfree(cfg);
cfg = NULL;
- } else if (!alloc_cpumask_var_node(&cfg->old_domain,
+ } else if (!zalloc_cpumask_var_node(&cfg->old_domain,
GFP_ATOMIC, node)) {
free_cpumask_var(cfg->domain);
kfree(cfg);
cfg = NULL;
- } else {
- cpumask_clear(cfg->domain);
- cpumask_clear(cfg->old_domain);
}
}
@@ -455,13 +468,10 @@ static bool io_apic_level_ack_pending(st
unsigned long flags;
spin_lock_irqsave(&ioapic_lock, flags);
- entry = cfg->irq_2_pin;
- for (;;) {
+ for_each_irq_pin(entry, cfg->irq_2_pin) {
unsigned int reg;
int pin;
- if (!entry)
- break;
pin = entry->pin;
reg = io_apic_read(entry->apic, 0x10 + pin*2);
/* Is the remote IRR bit set? */
@@ -469,9 +479,6 @@ static bool io_apic_level_ack_pending(st
spin_unlock_irqrestore(&ioapic_lock, flags);
return true;
}
- if (!entry->next)
- break;
- entry = entry->next;
}
spin_unlock_irqrestore(&ioapic_lock, flags);
@@ -543,72 +550,68 @@ static void ioapic_mask_entry(int apic,
* shared ISA-space IRQs, so we have to support them. We are super
* fast in the common case, and fast for shared ISA-space IRQs.
*/
-static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
+static int
+add_pin_to_irq_node_nopanic(struct irq_cfg *cfg, int node, int apic, int pin)
{
- struct irq_pin_list *entry;
+ struct irq_pin_list **last, *entry;
- entry = cfg->irq_2_pin;
- if (!entry) {
- entry = get_one_free_irq_2_pin(node);
- if (!entry) {
- printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n",
- apic, pin);
- return;
- }
- cfg->irq_2_pin = entry;
- entry->apic = apic;
- entry->pin = pin;
- return;
- }
-
- while (entry->next) {
- /* not again, please */
+ /* don't allow duplicates */
+ last = &cfg->irq_2_pin;
+ for_each_irq_pin(entry, cfg->irq_2_pin) {
if (entry->apic == apic && entry->pin == pin)
- return;
-
- entry = entry->next;
+ return 0;
+ last = &entry->next;
}
- entry->next = get_one_free_irq_2_pin(node);
- entry = entry->next;
+ entry = get_one_free_irq_2_pin(node);
+ if (!entry) {
+ printk(KERN_ERR "can not alloc irq_pin_list (%d,%d,%d)\n",
+ node, apic, pin);
+ return -ENOMEM;
+ }
entry->apic = apic;
entry->pin = pin;
+
+ *last = entry;
+ return 0;
+}
+
+static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
+{
+ if (add_pin_to_irq_node_nopanic(cfg, node, apic, pin))
+ panic("IO-APIC: failed to add irq-pin. Can not proceed\n");
}
/*
* Reroute an IRQ to a different pin.
*/
static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node,
- int oldapic, int oldpin,
- int newapic, int newpin)
+ int oldapic, int oldpin,
+ int newapic, int newpin)
{
- struct irq_pin_list *entry = cfg->irq_2_pin;
- int replaced = 0;
+ struct irq_pin_list *entry;
- while (entry) {
+ for_each_irq_pin(entry, cfg->irq_2_pin) {
if (entry->apic == oldapic && entry->pin == oldpin) {
entry->apic = newapic;
entry->pin = newpin;
- replaced = 1;
/* every one is different, right? */
- break;
+ return;
}
- entry = entry->next;
}
- /* why? call replace before add? */
- if (!replaced)
- add_pin_to_irq_node(cfg, node, newapic, newpin);
+ /* old apic/pin didn't exist, so just add new ones */
+ add_pin_to_irq_node(cfg, node, newapic, newpin);
}
-static inline void io_apic_modify_irq(struct irq_cfg *cfg,
- int mask_and, int mask_or,
- void (*final)(struct irq_pin_list *entry))
+static void io_apic_modify_irq(struct irq_cfg *cfg,
+ int mask_and, int mask_or,
+ void (*final)(struct irq_pin_list *entry))
{
int pin;
struct irq_pin_list *entry;
- for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) {
+ for_each_irq_pin(entry, cfg->irq_2_pin) {
unsigned int reg;
pin = entry->pin;
reg = io_apic_read(entry->apic, 0x10 + pin * 2);
@@ -625,7 +628,6 @@ static void __unmask_IO_APIC_irq(struct
io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
}
-#ifdef CONFIG_X86_64
static void io_apic_sync(struct irq_pin_list *entry)
{
/*
@@ -641,11 +643,6 @@ static void __mask_IO_APIC_irq(struct ir
{
io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
}
-#else /* CONFIG_X86_32 */
-static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
-{
- io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, NULL);
-}
static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg)
{
@@ -658,7 +655,6 @@ static void __unmask_and_level_IO_APIC_i
io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED,
IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
}
-#endif /* CONFIG_X86_32 */
static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
{
@@ -719,6 +715,7 @@ static void clear_IO_APIC (void)
}
#else
#define add_pin_to_irq_node(cfg, node, apic, pin)
+#define add_pin_to_irq_node_nopanic(cfg, node, apic, pin) 0
#endif /* CONFIG_XEN */
#ifdef CONFIG_X86_32
@@ -935,7 +932,7 @@ static int __init find_isa_irq_apic(int
*/
static int EISA_ELCR(unsigned int irq)
{
- if (irq < NR_IRQS_LEGACY) {
+ if (irq < nr_legacy_irqs) {
unsigned int port = 0x4d0 + (irq >> 3);
return (inb(port) >> (irq & 7)) & 1;
}
@@ -1547,7 +1544,7 @@ static void setup_IO_APIC_irq(int apic_i
}
ioapic_register_intr(irq, desc, trigger);
- if (irq < NR_IRQS_LEGACY)
+ if (irq < nr_legacy_irqs)
disable_8259A_irq(irq);
ioapic_write_entry(apic_id, pin, entry);
@@ -1775,12 +1772,8 @@ __apicdebuginit(void) print_IO_APIC(void
if (!entry)
continue;
printk(KERN_DEBUG "IRQ%d ", irq);
- for (;;) {
+ for_each_irq_pin(entry, cfg->irq_2_pin)
printk("-> %d:%d", entry->apic, entry->pin);
- if (!entry->next)
- break;
- entry = entry->next;
- }
printk("\n");
}
@@ -1924,7 +1917,7 @@ __apicdebuginit(void) print_PIC(void)
unsigned int v;
unsigned long flags;
- if (apic_verbosity == APIC_QUIET)
+ if (apic_verbosity == APIC_QUIET || !nr_legacy_irqs)
return;
printk(KERN_DEBUG "\nprinting PIC contents\n");
@@ -1956,7 +1949,7 @@ __apicdebuginit(int) print_all_ICs(void)
print_PIC();
/* don't print out if apic is not there */
- if (!cpu_has_apic || disable_apic)
+ if (!cpu_has_apic && !apic_from_smp_config())
return 0;
print_all_local_APICs();
@@ -1990,6 +1983,10 @@ void __init enable_IO_APIC(void)
spin_unlock_irqrestore(&ioapic_lock, flags);
nr_ioapic_registers[apic] = reg_01.bits.entries+1;
}
+
+ if (!nr_legacy_irqs)
+ return;
+
#ifndef CONFIG_XEN
for(apic = 0; apic < nr_ioapics; apic++) {
int pin;
@@ -2049,6 +2046,9 @@ void disable_IO_APIC(void)
*/
clear_IO_APIC();
+ if (!nr_legacy_irqs)
+ return;
+
/*
* If the i8259 is routed through an IOAPIC
* Put that IOAPIC in virtual wire mode
@@ -2082,7 +2082,7 @@ void disable_IO_APIC(void)
/*
* Use virtual wire A mode when interrupt remapping is enabled.
*/
- if (cpu_has_apic)
+ if (cpu_has_apic || apic_from_smp_config())
disconnect_bsp_APIC(!intr_remapping_enabled &&
ioapic_i8259.pin != -1);
}
@@ -2095,7 +2095,7 @@ void disable_IO_APIC(void)
* by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999
*/
-static void __init setup_ioapic_ids_from_mpc(void)
+void __init setup_ioapic_ids_from_mpc(void)
{
union IO_APIC_reg_00 reg_00;
physid_mask_t phys_id_present_map;
@@ -2104,9 +2104,8 @@ static void __init setup_ioapic_ids_from
unsigned char old_id;
unsigned long flags;
- if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids())
+ if (acpi_ioapic)
return;
-
/*
* Don't check I/O APIC IDs for xAPIC systems. They have
* no meaning without the serial APIC bus.
@@ -2280,7 +2279,7 @@ static unsigned int startup_ioapic_irq(u
struct irq_cfg *cfg;
spin_lock_irqsave(&ioapic_lock, flags);
- if (irq < NR_IRQS_LEGACY) {
+ if (irq < nr_legacy_irqs) {
disable_8259A_irq(irq);
if (i8259A_irq_pending(irq))
was_pending = 1;
@@ -2292,7 +2291,6 @@ static unsigned int startup_ioapic_irq(u
return was_pending;
}
-#ifdef CONFIG_X86_64
static int ioapic_retrigger_irq(unsigned int irq)
{
@@ -2305,14 +2303,6 @@ static int ioapic_retrigger_irq(unsigned
return 1;
}
-#else
-static int ioapic_retrigger_irq(unsigned int irq)
-{
- apic->send_IPI_self(irq_cfg(irq)->vector);
-
- return 1;
-}
-#endif
/*
* Level and edge triggered IO-APIC interrupts need different handling,
@@ -2350,13 +2340,9 @@ static void __target_IO_APIC_irq(unsigne
struct irq_pin_list *entry;
u8 vector = cfg->vector;
- entry = cfg->irq_2_pin;
- for (;;) {
+ for_each_irq_pin(entry, cfg->irq_2_pin) {
unsigned int reg;
- if (!entry)
- break;
-
apic = entry->apic;
pin = entry->pin;
/*
@@ -2369,9 +2355,6 @@ static void __target_IO_APIC_irq(unsigne
reg &= ~IO_APIC_REDIR_VECTOR_MASK;
reg |= vector;
io_apic_modify(apic, 0x10 + pin*2, reg);
- if (!entry->next)
- break;
- entry = entry->next;
}
}
@@ -2596,11 +2579,8 @@ atomic_t irq_mis_count;
static void ack_apic_level(unsigned int irq)
{
struct irq_desc *desc = irq_to_desc(irq);
-
-#ifdef CONFIG_X86_32
unsigned long v;
int i;
-#endif
struct irq_cfg *cfg;
int do_unmask_irq = 0;
@@ -2613,31 +2593,28 @@ static void ack_apic_level(unsigned int
}
#endif
-#ifdef CONFIG_X86_32
/*
- * It appears there is an erratum which affects at least version 0x11
- * of I/O APIC (that's the 82093AA and cores integrated into various
- * chipsets). Under certain conditions a level-triggered interrupt is
- * erroneously delivered as edge-triggered one but the respective IRR
- * bit gets set nevertheless. As a result the I/O unit expects an EOI
- * message but it will never arrive and further interrupts are blocked
- * from the source. The exact reason is so far unknown, but the
- * phenomenon was observed when two consecutive interrupt requests
- * from a given source get delivered to the same CPU and the source is
- * temporarily disabled in between.
- *
- * A workaround is to simulate an EOI message manually. We achieve it
- * by setting the trigger mode to edge and then to level when the edge
- * trigger mode gets detected in the TMR of a local APIC for a
- * level-triggered interrupt. We mask the source for the time of the
- * operation to prevent an edge-triggered interrupt escaping meanwhile.
- * The idea is from Manfred Spraul. --macro
- */
+ * It appears there is an erratum which affects at least version 0x11
+ * of I/O APIC (that's the 82093AA and cores integrated into various
+ * chipsets). Under certain conditions a level-triggered interrupt is
+ * erroneously delivered as edge-triggered one but the respective IRR
+ * bit gets set nevertheless. As a result the I/O unit expects an EOI
+ * message but it will never arrive and further interrupts are blocked
+ * from the source. The exact reason is so far unknown, but the
+ * phenomenon was observed when two consecutive interrupt requests
+ * from a given source get delivered to the same CPU and the source is
+ * temporarily disabled in between.
+ *
+ * A workaround is to simulate an EOI message manually. We achieve it
+ * by setting the trigger mode to edge and then to level when the edge
+ * trigger mode gets detected in the TMR of a local APIC for a
+ * level-triggered interrupt. We mask the source for the time of the
+ * operation to prevent an edge-triggered interrupt escaping meanwhile.
+ * The idea is from Manfred Spraul. --macro
+ */
cfg = desc->chip_data;
i = cfg->vector;
-
v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
-#endif
/*
* We must acknowledge the irq before we move it or the acknowledge will
@@ -2679,7 +2656,7 @@ static void ack_apic_level(unsigned int
unmask_IO_APIC_irq_desc(desc);
}
-#ifdef CONFIG_X86_32
+ /* Tail end of version 0x11 I/O APIC bug workaround */
if (!(v & (1 << (i & 0x1f)))) {
atomic_inc(&irq_mis_count);
spin_lock(&ioapic_lock);
@@ -2687,26 +2664,15 @@ static void ack_apic_level(unsigned int
__unmask_and_level_IO_APIC_irq(cfg);
spin_unlock(&ioapic_lock);
}
-#endif
}
#ifdef CONFIG_INTR_REMAP
static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
{
- int apic, pin;
struct irq_pin_list *entry;
- entry = cfg->irq_2_pin;
- for (;;) {
-
- if (!entry)
- break;
-
- apic = entry->apic;
- pin = entry->pin;
- io_apic_eoi(apic, pin);
- entry = entry->next;
- }
+ for_each_irq_pin(entry, cfg->irq_2_pin)
+ io_apic_eoi(entry->apic, entry->pin);
}
static void
@@ -2796,7 +2762,7 @@ static inline void init_IO_APIC_traps(vo
* so default to an old-fashioned 8259
* interrupt if we can..
*/
- if (irq < NR_IRQS_LEGACY)
+ if (irq < nr_legacy_irqs)
make_8259A_irq(irq);
else
/* Strange. Oh, well.. */
@@ -3136,7 +3102,7 @@ out:
* the I/O APIC in all cases now. No actual device should request
* it anyway. --macro
*/
-#define PIC_IRQS (1 << PIC_CASCADE_IR)
+#define PIC_IRQS (1UL << PIC_CASCADE_IR)
void __init setup_IO_APIC(void)
{
@@ -3148,23 +3114,21 @@ void __init setup_IO_APIC(void)
* calling enable_IO_APIC() is moved to setup_local_APIC for BP
*/
#endif
-
- io_apic_irqs = ~PIC_IRQS;
+ io_apic_irqs = nr_legacy_irqs ? ~PIC_IRQS : ~0UL;
apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
/*
* Set up IO-APIC IRQ routing.
*/
#ifndef CONFIG_XEN
-#ifdef CONFIG_X86_32
- if (!acpi_ioapic)
- setup_ioapic_ids_from_mpc();
-#endif
+ x86_init.mpparse.setup_ioapic_ids();
+
sync_Arb_IDs();
#endif
setup_IO_APIC_irqs();
init_IO_APIC_traps();
- check_timer();
+ if (nr_legacy_irqs)
+ check_timer();
}
/*
@@ -3274,7 +3238,6 @@ static int __init ioapic_init_sysfs(void
device_initcall(ioapic_init_sysfs);
-static int nr_irqs_gsi = NR_IRQS_LEGACY;
/*
* Dynamic irq allocate and deallocation
*/
@@ -3346,8 +3309,7 @@ void destroy_irq(unsigned int irq)
cfg = desc->chip_data;
dynamic_irq_cleanup(irq);
/* connect back irq_cfg */
- if (desc)
- desc->chip_data = cfg;
+ desc->chip_data = cfg;
free_irte(irq);
spin_lock_irqsave(&vector_lock, flags);
@@ -4025,9 +3987,13 @@ static int __io_apic_set_pci_routing(str
/*
* IRQs < 16 are already in the irq_2_pin[] map
*/
- if (irq >= NR_IRQS_LEGACY) {
+ if (irq >= nr_legacy_irqs) {
cfg = desc->chip_data;
- add_pin_to_irq_node(cfg, node, ioapic, pin);
+ if (add_pin_to_irq_node_nopanic(cfg, node, ioapic, pin)) {
+ printk(KERN_INFO "can not add pin %d for irq %d\n",
+ pin, irq);
+ return 0;
+ }
}
setup_IO_APIC_irq(ioapic, pin, irq, desc, trigger, polarity);
@@ -4056,11 +4022,28 @@ int io_apic_set_pci_routing(struct devic
return __io_apic_set_pci_routing(dev, irq, irq_attr);
}
-/* --------------------------------------------------------------------------
- ACPI-based IOAPIC Configuration
- -------------------------------------------------------------------------- */
+u8 __init io_apic_unique_id(u8 id)
+{
+#ifdef CONFIG_X86_32
+ if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
+ !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
+ return io_apic_get_unique_id(nr_ioapics, id);
+ else
+ return id;
+#else
+ int i;
+ DECLARE_BITMAP(used, 256);
-#ifdef CONFIG_ACPI
+ bitmap_zero(used, 256);
+ for (i = 0; i < nr_ioapics; i++) {
+ struct mpc_ioapic *ia = &mp_ioapics[i];
+ __set_bit(ia->apicid, used);
+ }
+ if (!test_bit(id, used))
+ return id;
+ return find_first_zero_bit(used, 256);
+#endif
+}
#ifdef CONFIG_X86_32
int __init io_apic_get_unique_id(int ioapic, int apic_id)
@@ -4171,8 +4154,6 @@ int acpi_get_override_irq(int bus_irq, i
return 0;
}
-#endif /* CONFIG_ACPI */
-
#ifndef CONFIG_XEN
/*
* This function currently is only a helper for the i386 smp boot process where
@@ -4227,7 +4208,7 @@ void __init setup_ioapic_dest(void)
static struct resource *ioapic_resources;
-static struct resource * __init ioapic_setup_resources(void)
+static struct resource * __init ioapic_setup_resources(int nr_ioapics)
{
unsigned long n;
struct resource *res;
@@ -4243,15 +4224,13 @@ static struct resource * __init ioapic_s
mem = alloc_bootmem(n);
res = (void *)mem;
- if (mem != NULL) {
- mem += sizeof(struct resource) * nr_ioapics;
+ mem += sizeof(struct resource) * nr_ioapics;
- for (i = 0; i < nr_ioapics; i++) {
- res[i].name = mem;
- res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
- sprintf(mem, "IOAPIC %u", i);
- mem += IOAPIC_RESOURCE_NAME_SIZE;
- }
+ for (i = 0; i < nr_ioapics; i++) {
+ res[i].name = mem;
+ res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+ sprintf(mem, "IOAPIC %u", i);
+ mem += IOAPIC_RESOURCE_NAME_SIZE;
}
ioapic_resources = res;
@@ -4265,7 +4244,7 @@ void __init ioapic_init_mappings(void)
struct resource *ioapic_res;
int i;
- ioapic_res = ioapic_setup_resources();
+ ioapic_res = ioapic_setup_resources(nr_ioapics);
for (i = 0; i < nr_ioapics; i++) {
if (smp_found_config) {
ioapic_phys = mp_ioapics[i].apicaddr;
@@ -4294,11 +4273,9 @@ fake_ioapic_page:
__fix_to_virt(idx), ioapic_phys);
idx++;
- if (ioapic_res != NULL) {
- ioapic_res->start = ioapic_phys;
- ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
- ioapic_res++;
- }
+ ioapic_res->start = ioapic_phys;
+ ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
+ ioapic_res++;
}
}
@@ -4320,3 +4297,78 @@ void __init ioapic_insert_resources(void
}
}
#endif /* !CONFIG_XEN */
+
+int mp_find_ioapic(int gsi)
+{
+ int i = 0;
+
+ /* Find the IOAPIC that manages this GSI. */
+ for (i = 0; i < nr_ioapics; i++) {
+ if ((gsi >= mp_gsi_routing[i].gsi_base)
+ && (gsi <= mp_gsi_routing[i].gsi_end))
+ return i;
+ }
+
+ printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
+ return -1;
+}
+
+int mp_find_ioapic_pin(int ioapic, int gsi)
+{
+ if (WARN_ON(ioapic == -1))
+ return -1;
+ if (WARN_ON(gsi > mp_gsi_routing[ioapic].gsi_end))
+ return -1;
+
+ return gsi - mp_gsi_routing[ioapic].gsi_base;
+}
+
+static int bad_ioapic(unsigned long address)
+{
+ if (nr_ioapics >= MAX_IO_APICS) {
+ printk(KERN_WARNING "WARING: Max # of I/O APICs (%d) exceeded "
+ "(found %d), skipping\n", MAX_IO_APICS, nr_ioapics);
+ return 1;
+ }
+ if (!address) {
+ printk(KERN_WARNING "WARNING: Bogus (zero) I/O APIC address"
+ " found in table, skipping!\n");
+ return 1;
+ }
+ return 0;
+}
+
+void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
+{
+ int idx = 0;
+
+ if (bad_ioapic(address))
+ return;
+
+ idx = nr_ioapics;
+
+ mp_ioapics[idx].type = MP_IOAPIC;
+ mp_ioapics[idx].flags = MPC_APIC_USABLE;
+ mp_ioapics[idx].apicaddr = address;
+
+#ifndef CONFIG_XEN
+ set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
+#endif
+ mp_ioapics[idx].apicid = io_apic_unique_id(id);
+ mp_ioapics[idx].apicver = io_apic_get_version(idx);
+
+ /*
+ * Build basic GSI lookup table to facilitate gsi->io_apic lookups
+ * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
+ */
+ mp_gsi_routing[idx].gsi_base = gsi_base;
+ mp_gsi_routing[idx].gsi_end = gsi_base +
+ io_apic_get_redir_entries(idx);
+
+ printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
+ "GSI %d-%d\n", idx, mp_ioapics[idx].apicid,
+ mp_ioapics[idx].apicver, mp_ioapics[idx].apicaddr,
+ mp_gsi_routing[idx].gsi_base, mp_gsi_routing[idx].gsi_end);
+
+ nr_ioapics++;
+}
--- head-2010-05-25.orig/arch/x86/kernel/cpu/Makefile 2010-03-24 15:17:58.000000000 +0100
+++ head-2010-05-25/arch/x86/kernel/cpu/Makefile 2010-03-24 15:32:27.000000000 +0100
@@ -34,7 +34,7 @@ obj-$(CONFIG_CPU_FREQ) += cpufreq/
obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o
-disabled-obj-$(CONFIG_XEN) := hypervisor.o vmware.o
+disabled-obj-$(CONFIG_XEN) := hypervisor.o sched.o vmware.o
quiet_cmd_mkcapflags = MKCAP $@
cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@
--- head-2010-05-25.orig/arch/x86/kernel/cpu/amd.c 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/arch/x86/kernel/cpu/amd.c 2010-03-24 15:32:27.000000000 +0100
@@ -313,7 +313,7 @@ static void __cpuinit amd_detect_cmp(str
int amd_get_nb_id(int cpu)
{
int id = 0;
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) && !defined(CONFIG_XEN)
id = per_cpu(cpu_llc_id, cpu);
#endif
return id;
@@ -469,8 +469,10 @@ static void __cpuinit init_amd(struct cp
if (c->x86 == 0x10 || c->x86 == 0x11)
set_cpu_cap(c, X86_FEATURE_REP_GOOD);
+#ifndef CONFIG_XEN
/* get apicid instead of initial apic id from cpuid */
c->apicid = hard_smp_processor_id();
+#endif
#else
/*
--- head-2010-05-25.orig/arch/x86/kernel/cpu/common-xen.c 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/arch/x86/kernel/cpu/common-xen.c 2010-03-24 15:32:27.000000000 +0100
@@ -13,13 +13,13 @@
#include <linux/io.h>
#include <asm/stackprotector.h>
-#include <asm/perf_counter.h>
+#include <asm/perf_event.h>
#include <asm/mmu_context.h>
#include <asm/hypervisor.h>
#include <asm/processor.h>
#include <asm/sections.h>
-#include <asm/topology.h>
-#include <asm/cpumask.h>
+#include <linux/topology.h>
+#include <linux/cpumask.h>
#include <asm/pgtable.h>
#include <asm/atomic.h>
#include <asm/proto.h>
@@ -28,13 +28,12 @@
#include <asm/desc.h>
#include <asm/i387.h>
#include <asm/mtrr.h>
-#include <asm/numa.h>
+#include <linux/numa.h>
#include <asm/asm.h>
#include <asm/cpu.h>
#include <asm/mce.h>
#include <asm/msr.h>
#include <asm/pat.h>
-#include <asm/smp.h>
#ifdef CONFIG_X86_LOCAL_APIC
#include <asm/uv/uv.h>
@@ -102,17 +101,17 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_p
* TLS descriptors are currently at a different place compared to i386.
* Hopefully nobody expects them at a fixed place (Wine?)
*/
- [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
- [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
- [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
- [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
- [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
- [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
+ [GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(0xc09b, 0, 0xfffff),
+ [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff),
+ [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc093, 0, 0xfffff),
+ [GDT_ENTRY_DEFAULT_USER32_CS] = GDT_ENTRY_INIT(0xc0fb, 0, 0xfffff),
+ [GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(0xc0f3, 0, 0xfffff),
+ [GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(0xa0fb, 0, 0xfffff),
#else
- [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } },
- [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } },
- [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } },
- [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff200 } } },
+ [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xc09a, 0, 0xfffff),
+ [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
+ [GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(0xc0fa, 0, 0xfffff),
+ [GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(0xc0f2, 0, 0xfffff),
#ifndef CONFIG_XEN
/*
* Segments used for calling PnP BIOS have byte granularity.
@@ -120,29 +119,29 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_p
* the transfer segment sizes are set at run time.
*/
/* 32-bit code */
- [GDT_ENTRY_PNPBIOS_CS32] = { { { 0x0000ffff, 0x00409a00 } } },
+ [GDT_ENTRY_PNPBIOS_CS32] = GDT_ENTRY_INIT(0x409a, 0, 0xffff),
/* 16-bit code */
- [GDT_ENTRY_PNPBIOS_CS16] = { { { 0x0000ffff, 0x00009a00 } } },
+ [GDT_ENTRY_PNPBIOS_CS16] = GDT_ENTRY_INIT(0x009a, 0, 0xffff),
/* 16-bit data */
- [GDT_ENTRY_PNPBIOS_DS] = { { { 0x0000ffff, 0x00009200 } } },
+ [GDT_ENTRY_PNPBIOS_DS] = GDT_ENTRY_INIT(0x0092, 0, 0xffff),
/* 16-bit data */
- [GDT_ENTRY_PNPBIOS_TS1] = { { { 0x00000000, 0x00009200 } } },
+ [GDT_ENTRY_PNPBIOS_TS1] = GDT_ENTRY_INIT(0x0092, 0, 0),
/* 16-bit data */
- [GDT_ENTRY_PNPBIOS_TS2] = { { { 0x00000000, 0x00009200 } } },
+ [GDT_ENTRY_PNPBIOS_TS2] = GDT_ENTRY_INIT(0x0092, 0, 0),
/*
* The APM segments have byte granularity and their bases
* are set at run time. All have 64k limits.
*/
/* 32-bit code */
- [GDT_ENTRY_APMBIOS_BASE] = { { { 0x0000ffff, 0x00409a00 } } },
+ [GDT_ENTRY_APMBIOS_BASE] = GDT_ENTRY_INIT(0x409a, 0, 0xffff),
/* 16-bit code */
- [GDT_ENTRY_APMBIOS_BASE+1] = { { { 0x0000ffff, 0x00009a00 } } },
+ [GDT_ENTRY_APMBIOS_BASE+1] = GDT_ENTRY_INIT(0x009a, 0, 0xffff),
/* data */
- [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } },
+ [GDT_ENTRY_APMBIOS_BASE+2] = GDT_ENTRY_INIT(0x4092, 0, 0xffff),
- [GDT_ENTRY_ESPFIX_SS] = { { { 0x0000ffff, 0x00cf9200 } } },
+ [GDT_ENTRY_ESPFIX_SS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
#endif
- [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } },
+ [GDT_ENTRY_PERCPU] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
GDT_STACK_CANARY_INIT
#endif
} };
@@ -900,7 +899,7 @@ void __init identify_boot_cpu(void)
#else
vgetcpu_set_mode();
#endif
- init_hw_perf_counters();
+ init_hw_perf_events();
}
void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
@@ -1013,7 +1012,7 @@ __setup("clearcpuid=", setup_disablecpui
#ifdef CONFIG_X86_64
#ifndef CONFIG_X86_NO_IDT
-struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
+struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table };
#endif
DEFINE_PER_CPU_FIRST(union irq_stack_union,
@@ -1027,13 +1026,21 @@ void xen_switch_pt(void)
#endif
}
-DEFINE_PER_CPU(char *, irq_stack_ptr) =
- init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
+/*
+ * The following four percpu variables are hot. Align current_task to
+ * cacheline size such that all four fall in the same cacheline.
+ */
+DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
+ &init_task;
+EXPORT_PER_CPU_SYMBOL(current_task);
DEFINE_PER_CPU(unsigned long, kernel_stack) =
(unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
EXPORT_PER_CPU_SYMBOL(kernel_stack);
+DEFINE_PER_CPU(char *, irq_stack_ptr) =
+ init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
+
DEFINE_PER_CPU(unsigned int, irq_count) = -1;
#ifndef CONFIG_X86_NO_TSS
@@ -1049,8 +1056,7 @@ static const unsigned int exception_stac
};
static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
- [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ])
- __aligned(PAGE_SIZE);
+ [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
#endif
void __cpuinit syscall_init(void)
@@ -1097,8 +1103,11 @@ DEFINE_PER_CPU(struct orig_ist, orig_ist
#else /* CONFIG_X86_64 */
+DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
+EXPORT_PER_CPU_SYMBOL(current_task);
+
#ifdef CONFIG_CC_STACKPROTECTOR
-DEFINE_PER_CPU(unsigned long, stack_canary);
+DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
#endif
/* Make sure %fs and %gs are initialized properly in idle threads */
--- head-2010-05-25.orig/arch/x86/kernel/cpu/mcheck/mce-inject.c 2010-05-25 09:12:08.000000000 +0200
+++ head-2010-05-25/arch/x86/kernel/cpu/mcheck/mce-inject.c 2010-04-15 10:10:43.000000000 +0200
@@ -144,7 +144,7 @@ static void raise_mce(struct mce *m)
if (context == MCJ_CTX_RANDOM)
return;
-#ifdef CONFIG_X86_LOCAL_APIC
+#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_XEN)
if (m->inject_flags & MCJ_NMI_BROADCAST) {
unsigned long start;
int cpu;
--- head-2010-05-25.orig/arch/x86/kernel/cpu/mtrr/main-xen.c 2010-03-24 15:17:58.000000000 +0100
+++ head-2010-05-25/arch/x86/kernel/cpu/mtrr/main-xen.c 2010-03-24 15:32:27.000000000 +0100
@@ -1,10 +1,9 @@
-#include <linux/init.h>
-#include <linux/proc_fs.h>
-#include <linux/ctype.h>
+#define DEBUG
+
+#include <linux/uaccess.h>
#include <linux/module.h>
-#include <linux/seq_file.h>
-#include <asm/uaccess.h>
#include <linux/mutex.h>
+#include <linux/init.h>
#include <asm/mtrr.h>
#include "mtrr.h"
@@ -58,7 +57,7 @@ static void __init init_table(void)
mtrr_usage_table[i] = 0;
}
-int mtrr_add_page(unsigned long base, unsigned long size,
+int mtrr_add_page(unsigned long base, unsigned long size,
unsigned int type, bool increment)
{
int error;
@@ -88,25 +87,23 @@ int mtrr_add_page(unsigned long base, un
static int mtrr_check(unsigned long base, unsigned long size)
{
if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
- printk(KERN_WARNING
- "mtrr: size and base must be multiples of 4 kiB\n");
- printk(KERN_DEBUG
- "mtrr: size: 0x%lx base: 0x%lx\n", size, base);
+ pr_warning("mtrr: size and base must be multiples of 4 kiB\n");
+ pr_debug("mtrr: size: 0x%lx base: 0x%lx\n", size, base);
dump_stack();
return -1;
}
return 0;
}
-int
-mtrr_add(unsigned long base, unsigned long size, unsigned int type,
- bool increment)
+int mtrr_add(unsigned long base, unsigned long size, unsigned int type,
+ bool increment)
{
if (mtrr_check(base, size))
return -EINVAL;
return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type,
increment);
}
+EXPORT_SYMBOL(mtrr_add);
int mtrr_del_page(int reg, unsigned long base, unsigned long size)
{
@@ -128,13 +125,13 @@ int mtrr_del_page(int reg, unsigned long
}
}
if (reg < 0) {
- printk(KERN_DEBUG "mtrr: no MTRR for %lx000,%lx000 found\n", base,
- size);
+ pr_debug("mtrr: no MTRR for %lx000,%lx000 found\n",
+ base, size);
goto out;
}
}
if (mtrr_usage_table[reg] < 1) {
- printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg);
+ pr_warning("mtrr: reg: %d has count=0\n", reg);
goto out;
}
if (--mtrr_usage_table[reg] < 1) {
@@ -153,15 +150,12 @@ int mtrr_del_page(int reg, unsigned long
return error;
}
-int
-mtrr_del(int reg, unsigned long base, unsigned long size)
+int mtrr_del(int reg, unsigned long base, unsigned long size)
{
if (mtrr_check(base, size))
return -EINVAL;
return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT);
}
-
-EXPORT_SYMBOL(mtrr_add);
EXPORT_SYMBOL(mtrr_del);
/*
--- head-2010-05-25.orig/arch/x86/kernel/e820-xen.c 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/arch/x86/kernel/e820-xen.c 2010-03-24 15:32:27.000000000 +0100
@@ -134,7 +134,7 @@ static void __init __e820_add_region(str
{
int x = e820x->nr_map;
- if (x == ARRAY_SIZE(e820x->map)) {
+ if (x >= ARRAY_SIZE(e820x->map)) {
printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
return;
}
@@ -1455,7 +1455,7 @@ void __init e820_reserve_resources(void)
struct resource *res;
u64 end;
- res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map);
+ res = alloc_bootmem(sizeof(struct resource) * e820.nr_map);
e820_res = res;
for (i = 0; i < e820.nr_map; i++) {
end = e820.map[i].addr + e820.map[i].size - 1;
@@ -1502,8 +1502,8 @@ static unsigned long ram_alignment(resou
if (mb < 16)
return 1024*1024;
- /* To 32MB for anything above that */
- return 32*1024*1024;
+ /* To 64MB for anything above that */
+ return 64*1024*1024;
}
#define MAX_RESOURCE_SIZE ((resource_size_t)-1)
@@ -1543,59 +1543,8 @@ void __init e820_reserve_resources_late(
#undef e820
-#ifndef CONFIG_XEN
char *__init default_machine_specific_memory_setup(void)
{
- char *who = "BIOS-e820";
- u32 new_nr;
- /*
- * Try to copy the BIOS-supplied E820-map.
- *
- * Otherwise fake a memory map; one section from 0k->640k,
- * the next section from 1mb->appropriate_mem_k
- */
- new_nr = boot_params.e820_entries;
- sanitize_e820_map(boot_params.e820_map,
- ARRAY_SIZE(boot_params.e820_map),
- &new_nr);
- boot_params.e820_entries = new_nr;
- if (append_e820_map(boot_params.e820_map, boot_params.e820_entries)
- < 0) {
- u64 mem_size;
-
- /* compare results from other methods and take the greater */
- if (boot_params.alt_mem_k
- < boot_params.screen_info.ext_mem_k) {
- mem_size = boot_params.screen_info.ext_mem_k;
- who = "BIOS-88";
- } else {
- mem_size = boot_params.alt_mem_k;
- who = "BIOS-e801";
- }
-
- e820.nr_map = 0;
- e820_add_region(0, LOWMEMSIZE(), E820_RAM);
- e820_add_region(HIGH_MEMORY, mem_size << 10, E820_RAM);
- }
-
- /* In case someone cares... */
- return who;
-}
-
-char *__init __attribute__((weak)) machine_specific_memory_setup(void)
-{
- if (x86_quirks->arch_memory_setup) {
- char *who = x86_quirks->arch_memory_setup();
-
- if (who)
- return who;
- }
- return default_machine_specific_memory_setup();
-}
-#endif
-
-static char * __init _memory_setup(void)
-{
int rc, nr_map;
struct xen_memory_map memmap;
static struct e820entry __initdata map[E820MAX];
@@ -1639,7 +1588,7 @@ void __init setup_memory_map(void)
{
char *who;
- who = _memory_setup();
+ who = x86_init.resources.memory_setup();
#ifdef CONFIG_XEN
if (is_initial_xendomain()) {
printk(KERN_INFO "Xen-provided machine memory map:\n");
--- head-2010-05-25.orig/arch/x86/kernel/early_printk-xen.c 2010-03-24 15:25:06.000000000 +0100
+++ head-2010-05-25/arch/x86/kernel/early_printk-xen.c 2010-03-24 15:32:27.000000000 +0100
@@ -178,7 +178,6 @@ static __init void early_serial_init(cha
* mappings. Someone should fix this for domain 0. For now, use fake serial.
*/
#define early_vga_console early_serial_console
-#define xenboot_console early_serial_console
#endif
@@ -189,721 +188,6 @@ static struct console early_serial_conso
.index = -1,
};
-#ifdef CONFIG_EARLY_PRINTK_DBGP
-
-static struct ehci_caps __iomem *ehci_caps;
-static struct ehci_regs __iomem *ehci_regs;
-static struct ehci_dbg_port __iomem *ehci_debug;
-static unsigned int dbgp_endpoint_out;
-
-struct ehci_dev {
- u32 bus;
- u32 slot;
- u32 func;
-};
-
-static struct ehci_dev ehci_dev;
-
-#define USB_DEBUG_DEVNUM 127
-
-#define DBGP_DATA_TOGGLE 0x8800
-
-static inline u32 dbgp_pid_update(u32 x, u32 tok)
-{
- return ((x ^ DBGP_DATA_TOGGLE) & 0xffff00) | (tok & 0xff);
-}
-
-static inline u32 dbgp_len_update(u32 x, u32 len)
-{
- return (x & ~0x0f) | (len & 0x0f);
-}
-
-/*
- * USB Packet IDs (PIDs)
- */
-
-/* token */
-#define USB_PID_OUT 0xe1
-#define USB_PID_IN 0x69
-#define USB_PID_SOF 0xa5
-#define USB_PID_SETUP 0x2d
-/* handshake */
-#define USB_PID_ACK 0xd2
-#define USB_PID_NAK 0x5a
-#define USB_PID_STALL 0x1e
-#define USB_PID_NYET 0x96
-/* data */
-#define USB_PID_DATA0 0xc3
-#define USB_PID_DATA1 0x4b
-#define USB_PID_DATA2 0x87
-#define USB_PID_MDATA 0x0f
-/* Special */
-#define USB_PID_PREAMBLE 0x3c
-#define USB_PID_ERR 0x3c
-#define USB_PID_SPLIT 0x78
-#define USB_PID_PING 0xb4
-#define USB_PID_UNDEF_0 0xf0
-
-#define USB_PID_DATA_TOGGLE 0x88
-#define DBGP_CLAIM (DBGP_OWNER | DBGP_ENABLED | DBGP_INUSE)
-
-#define PCI_CAP_ID_EHCI_DEBUG 0xa
-
-#define HUB_ROOT_RESET_TIME 50 /* times are in msec */
-#define HUB_SHORT_RESET_TIME 10
-#define HUB_LONG_RESET_TIME 200
-#define HUB_RESET_TIMEOUT 500
-
-#define DBGP_MAX_PACKET 8
-
-static int dbgp_wait_until_complete(void)
-{
- u32 ctrl;
- int loop = 0x100000;
-
- do {
- ctrl = readl(&ehci_debug->control);
- /* Stop when the transaction is finished */
- if (ctrl & DBGP_DONE)
- break;
- } while (--loop > 0);
-
- if (!loop)
- return -1;
-
- /*
- * Now that we have observed the completed transaction,
- * clear the done bit.
- */
- writel(ctrl | DBGP_DONE, &ehci_debug->control);
- return (ctrl & DBGP_ERROR) ? -DBGP_ERRCODE(ctrl) : DBGP_LEN(ctrl);
-}
-
-static void __init dbgp_mdelay(int ms)
-{
- int i;
-
- while (ms--) {
- for (i = 0; i < 1000; i++)
- outb(0x1, 0x80);
- }
-}
-
-static void dbgp_breath(void)
-{
- /* Sleep to give the debug port a chance to breathe */
-}
-
-static int dbgp_wait_until_done(unsigned ctrl)
-{
- u32 pids, lpid;
- int ret;
- int loop = 3;
-
-retry:
- writel(ctrl | DBGP_GO, &ehci_debug->control);
- ret = dbgp_wait_until_complete();
- pids = readl(&ehci_debug->pids);
- lpid = DBGP_PID_GET(pids);
-
- if (ret < 0)
- return ret;
-
- /*
- * If the port is getting full or it has dropped data
- * start pacing ourselves, not necessary but it's friendly.
- */
- if ((lpid == USB_PID_NAK) || (lpid == USB_PID_NYET))
- dbgp_breath();
-
- /* If I get a NACK reissue the transmission */
- if (lpid == USB_PID_NAK) {
- if (--loop > 0)
- goto retry;
- }
-
- return ret;
-}
-
-static void dbgp_set_data(const void *buf, int size)
-{
- const unsigned char *bytes = buf;
- u32 lo, hi;
- int i;
-
- lo = hi = 0;
- for (i = 0; i < 4 && i < size; i++)
- lo |= bytes[i] << (8*i);
- for (; i < 8 && i < size; i++)
- hi |= bytes[i] << (8*(i - 4));
- writel(lo, &ehci_debug->data03);
- writel(hi, &ehci_debug->data47);
-}
-
-static void __init dbgp_get_data(void *buf, int size)
-{
- unsigned char *bytes = buf;
- u32 lo, hi;
- int i;
-
- lo = readl(&ehci_debug->data03);
- hi = readl(&ehci_debug->data47);
- for (i = 0; i < 4 && i < size; i++)
- bytes[i] = (lo >> (8*i)) & 0xff;
- for (; i < 8 && i < size; i++)
- bytes[i] = (hi >> (8*(i - 4))) & 0xff;
-}
-
-static int dbgp_bulk_write(unsigned devnum, unsigned endpoint,
- const char *bytes, int size)
-{
- u32 pids, addr, ctrl;
- int ret;
-
- if (size > DBGP_MAX_PACKET)
- return -1;
-
- addr = DBGP_EPADDR(devnum, endpoint);
-
- pids = readl(&ehci_debug->pids);
- pids = dbgp_pid_update(pids, USB_PID_OUT);
-
- ctrl = readl(&ehci_debug->control);
- ctrl = dbgp_len_update(ctrl, size);
- ctrl |= DBGP_OUT;
- ctrl |= DBGP_GO;
-
- dbgp_set_data(bytes, size);
- writel(addr, &ehci_debug->address);
- writel(pids, &ehci_debug->pids);
-
- ret = dbgp_wait_until_done(ctrl);
- if (ret < 0)
- return ret;
-
- return ret;
-}
-
-static int __init dbgp_bulk_read(unsigned devnum, unsigned endpoint, void *data,
- int size)
-{
- u32 pids, addr, ctrl;
- int ret;
-
- if (size > DBGP_MAX_PACKET)
- return -1;
-
- addr = DBGP_EPADDR(devnum, endpoint);
-
- pids = readl(&ehci_debug->pids);
- pids = dbgp_pid_update(pids, USB_PID_IN);
-
- ctrl = readl(&ehci_debug->control);
- ctrl = dbgp_len_update(ctrl, size);
- ctrl &= ~DBGP_OUT;
- ctrl |= DBGP_GO;
-
- writel(addr, &ehci_debug->address);
- writel(pids, &ehci_debug->pids);
- ret = dbgp_wait_until_done(ctrl);
- if (ret < 0)
- return ret;
-
- if (size > ret)
- size = ret;
- dbgp_get_data(data, size);
- return ret;
-}
-
-static int __init dbgp_control_msg(unsigned devnum, int requesttype,
- int request, int value, int index, void *data, int size)
-{
- u32 pids, addr, ctrl;
- struct usb_ctrlrequest req;
- int read;
- int ret;
-
- read = (requesttype & USB_DIR_IN) != 0;
- if (size > (read ? DBGP_MAX_PACKET:0))
- return -1;
-
- /* Compute the control message */
- req.bRequestType = requesttype;
- req.bRequest = request;
- req.wValue = cpu_to_le16(value);
- req.wIndex = cpu_to_le16(index);
- req.wLength = cpu_to_le16(size);
-
- pids = DBGP_PID_SET(USB_PID_DATA0, USB_PID_SETUP);
- addr = DBGP_EPADDR(devnum, 0);
-
- ctrl = readl(&ehci_debug->control);
- ctrl = dbgp_len_update(ctrl, sizeof(req));
- ctrl |= DBGP_OUT;
- ctrl |= DBGP_GO;
-
- /* Send the setup message */
- dbgp_set_data(&req, sizeof(req));
- writel(addr, &ehci_debug->address);
- writel(pids, &ehci_debug->pids);
- ret = dbgp_wait_until_done(ctrl);
- if (ret < 0)
- return ret;
-
- /* Read the result */
- return dbgp_bulk_read(devnum, 0, data, size);
-}
-
-
-/* Find a PCI capability */
-static u32 __init find_cap(u32 num, u32 slot, u32 func, int cap)
-{
- u8 pos;
- int bytes;
-
- if (!(read_pci_config_16(num, slot, func, PCI_STATUS) &
- PCI_STATUS_CAP_LIST))
- return 0;
-
- pos = read_pci_config_byte(num, slot, func, PCI_CAPABILITY_LIST);
- for (bytes = 0; bytes < 48 && pos >= 0x40; bytes++) {
- u8 id;
-
- pos &= ~3;
- id = read_pci_config_byte(num, slot, func, pos+PCI_CAP_LIST_ID);
- if (id == 0xff)
- break;
- if (id == cap)
- return pos;
-
- pos = read_pci_config_byte(num, slot, func,
- pos+PCI_CAP_LIST_NEXT);
- }
- return 0;
-}
-
-static u32 __init __find_dbgp(u32 bus, u32 slot, u32 func)
-{
- u32 class;
-
- class = read_pci_config(bus, slot, func, PCI_CLASS_REVISION);
- if ((class >> 8) != PCI_CLASS_SERIAL_USB_EHCI)
- return 0;
-
- return find_cap(bus, slot, func, PCI_CAP_ID_EHCI_DEBUG);
-}
-
-static u32 __init find_dbgp(int ehci_num, u32 *rbus, u32 *rslot, u32 *rfunc)
-{
- u32 bus, slot, func;
-
- for (bus = 0; bus < 256; bus++) {
- for (slot = 0; slot < 32; slot++) {
- for (func = 0; func < 8; func++) {
- unsigned cap;
-
- cap = __find_dbgp(bus, slot, func);
-
- if (!cap)
- continue;
- if (ehci_num-- != 0)
- continue;
- *rbus = bus;
- *rslot = slot;
- *rfunc = func;
- return cap;
- }
- }
- }
- return 0;
-}
-
-static int __init ehci_reset_port(int port)
-{
- u32 portsc;
- u32 delay_time, delay;
- int loop;
-
- /* Reset the usb debug port */
- portsc = readl(&ehci_regs->port_status[port - 1]);
- portsc &= ~PORT_PE;
- portsc |= PORT_RESET;
- writel(portsc, &ehci_regs->port_status[port - 1]);
-
- delay = HUB_ROOT_RESET_TIME;
- for (delay_time = 0; delay_time < HUB_RESET_TIMEOUT;
- delay_time += delay) {
- dbgp_mdelay(delay);
-
- portsc = readl(&ehci_regs->port_status[port - 1]);
- if (portsc & PORT_RESET) {
- /* force reset to complete */
- loop = 2;
- writel(portsc & ~(PORT_RWC_BITS | PORT_RESET),
- &ehci_regs->port_status[port - 1]);
- do {
- portsc = readl(&ehci_regs->port_status[port-1]);
- } while ((portsc & PORT_RESET) && (--loop > 0));
- }
-
- /* Device went away? */
- if (!(portsc & PORT_CONNECT))
- return -ENOTCONN;
-
- /* bomb out completely if something weird happend */
- if ((portsc & PORT_CSC))
- return -EINVAL;
-
- /* If we've finished resetting, then break out of the loop */
- if (!(portsc & PORT_RESET) && (portsc & PORT_PE))
- return 0;
- }
- return -EBUSY;
-}
-
-static int __init ehci_wait_for_port(int port)
-{
- u32 status;
- int ret, reps;
-
- for (reps = 0; reps < 3; reps++) {
- dbgp_mdelay(100);
- status = readl(&ehci_regs->status);
- if (status & STS_PCD) {
- ret = ehci_reset_port(port);
- if (ret == 0)
- return 0;
- }
- }
- return -ENOTCONN;
-}
-
-#ifdef DBGP_DEBUG
-# define dbgp_printk early_printk
-#else
-static inline void dbgp_printk(const char *fmt, ...) { }
-#endif
-
-typedef void (*set_debug_port_t)(int port);
-
-static void __init default_set_debug_port(int port)
-{
-}
-
-static set_debug_port_t __initdata set_debug_port = default_set_debug_port;
-
-static void __init nvidia_set_debug_port(int port)
-{
- u32 dword;
- dword = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func,
- 0x74);
- dword &= ~(0x0f<<12);
- dword |= ((port & 0x0f)<<12);
- write_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func, 0x74,
- dword);
- dbgp_printk("set debug port to %d\n", port);
-}
-
-static void __init detect_set_debug_port(void)
-{
- u32 vendorid;
-
- vendorid = read_pci_config(ehci_dev.bus, ehci_dev.slot, ehci_dev.func,
- 0x00);
-
- if ((vendorid & 0xffff) == 0x10de) {
- dbgp_printk("using nvidia set_debug_port\n");
- set_debug_port = nvidia_set_debug_port;
- }
-}
-
-static int __init ehci_setup(void)
-{
- struct usb_debug_descriptor dbgp_desc;
- u32 cmd, ctrl, status, portsc, hcs_params;
- u32 debug_port, new_debug_port = 0, n_ports;
- u32 devnum;
- int ret, i;
- int loop;
- int port_map_tried;
- int playtimes = 3;
-
-try_next_time:
- port_map_tried = 0;
-
-try_next_port:
-
- hcs_params = readl(&ehci_caps->hcs_params);
- debug_port = HCS_DEBUG_PORT(hcs_params);
- n_ports = HCS_N_PORTS(hcs_params);
-
- dbgp_printk("debug_port: %d\n", debug_port);
- dbgp_printk("n_ports: %d\n", n_ports);
-
- for (i = 1; i <= n_ports; i++) {
- portsc = readl(&ehci_regs->port_status[i-1]);
- dbgp_printk("portstatus%d: %08x\n", i, portsc);
- }
-
- if (port_map_tried && (new_debug_port != debug_port)) {
- if (--playtimes) {
- set_debug_port(new_debug_port);
- goto try_next_time;
- }
- return -1;
- }
-
- loop = 10;
- /* Reset the EHCI controller */
- cmd = readl(&ehci_regs->command);
- cmd |= CMD_RESET;
- writel(cmd, &ehci_regs->command);
- do {
- cmd = readl(&ehci_regs->command);
- } while ((cmd & CMD_RESET) && (--loop > 0));
-
- if (!loop) {
- dbgp_printk("can not reset ehci\n");
- return -1;
- }
- dbgp_printk("ehci reset done\n");
-
- /* Claim ownership, but do not enable yet */
- ctrl = readl(&ehci_debug->control);
- ctrl |= DBGP_OWNER;
- ctrl &= ~(DBGP_ENABLED | DBGP_INUSE);
- writel(ctrl, &ehci_debug->control);
-
- /* Start the ehci running */
- cmd = readl(&ehci_regs->command);
- cmd &= ~(CMD_LRESET | CMD_IAAD | CMD_PSE | CMD_ASE | CMD_RESET);
- cmd |= CMD_RUN;
- writel(cmd, &ehci_regs->command);
-
- /* Ensure everything is routed to the EHCI */
- writel(FLAG_CF, &ehci_regs->configured_flag);
-
- /* Wait until the controller is no longer halted */
- loop = 10;
- do {
- status = readl(&ehci_regs->status);
- } while ((status & STS_HALT) && (--loop > 0));
-
- if (!loop) {
- dbgp_printk("ehci can be started\n");
- return -1;
- }
- dbgp_printk("ehci started\n");
-
- /* Wait for a device to show up in the debug port */
- ret = ehci_wait_for_port(debug_port);
- if (ret < 0) {
- dbgp_printk("No device found in debug port\n");
- goto next_debug_port;
- }
- dbgp_printk("ehci wait for port done\n");
-
- /* Enable the debug port */
- ctrl = readl(&ehci_debug->control);
- ctrl |= DBGP_CLAIM;
- writel(ctrl, &ehci_debug->control);
- ctrl = readl(&ehci_debug->control);
- if ((ctrl & DBGP_CLAIM) != DBGP_CLAIM) {
- dbgp_printk("No device in debug port\n");
- writel(ctrl & ~DBGP_CLAIM, &ehci_debug->control);
- goto err;
- }
- dbgp_printk("debug ported enabled\n");
-
- /* Completely transfer the debug device to the debug controller */
- portsc = readl(&ehci_regs->port_status[debug_port - 1]);
- portsc &= ~PORT_PE;
- writel(portsc, &ehci_regs->port_status[debug_port - 1]);
-
- dbgp_mdelay(100);
-
- /* Find the debug device and make it device number 127 */
- for (devnum = 0; devnum <= 127; devnum++) {
- ret = dbgp_control_msg(devnum,
- USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
- USB_REQ_GET_DESCRIPTOR, (USB_DT_DEBUG << 8), 0,
- &dbgp_desc, sizeof(dbgp_desc));
- if (ret > 0)
- break;
- }
- if (devnum > 127) {
- dbgp_printk("Could not find attached debug device\n");
- goto err;
- }
- if (ret < 0) {
- dbgp_printk("Attached device is not a debug device\n");
- goto err;
- }
- dbgp_endpoint_out = dbgp_desc.bDebugOutEndpoint;
-
- /* Move the device to 127 if it isn't already there */
- if (devnum != USB_DEBUG_DEVNUM) {
- ret = dbgp_control_msg(devnum,
- USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
- USB_REQ_SET_ADDRESS, USB_DEBUG_DEVNUM, 0, NULL, 0);
- if (ret < 0) {
- dbgp_printk("Could not move attached device to %d\n",
- USB_DEBUG_DEVNUM);
- goto err;
- }
- devnum = USB_DEBUG_DEVNUM;
- dbgp_printk("debug device renamed to 127\n");
- }
-
- /* Enable the debug interface */
- ret = dbgp_control_msg(USB_DEBUG_DEVNUM,
- USB_DIR_OUT | USB_TYPE_STANDARD | USB_RECIP_DEVICE,
- USB_REQ_SET_FEATURE, USB_DEVICE_DEBUG_MODE, 0, NULL, 0);
- if (ret < 0) {
- dbgp_printk(" Could not enable the debug device\n");
- goto err;
- }
- dbgp_printk("debug interface enabled\n");
-
- /* Perform a small write to get the even/odd data state in sync
- */
- ret = dbgp_bulk_write(USB_DEBUG_DEVNUM, dbgp_endpoint_out, " ", 1);
- if (ret < 0) {
- dbgp_printk("dbgp_bulk_write failed: %d\n", ret);
- goto err;
- }
- dbgp_printk("small write doned\n");
-
- return 0;
-err:
- /* Things didn't work so remove my claim */
- ctrl = readl(&ehci_debug->control);
- ctrl &= ~(DBGP_CLAIM | DBGP_OUT);
- writel(ctrl, &ehci_debug->control);
- return -1;
-
-next_debug_port:
- port_map_tried |= (1<<(debug_port - 1));
- new_debug_port = ((debug_port-1+1)%n_ports) + 1;
- if (port_map_tried != ((1<<n_ports) - 1)) {
- set_debug_port(new_debug_port);
- goto try_next_port;
- }
- if (--playtimes) {
- set_debug_port(new_debug_port);
- goto try_next_time;
- }
-
- return -1;
-}
-
-static int __init early_dbgp_init(char *s)
-{
- u32 debug_port, bar, offset;
- u32 bus, slot, func, cap;
- void __iomem *ehci_bar;
- u32 dbgp_num;
- u32 bar_val;
- char *e;
- int ret;
- u8 byte;
-
- if (!early_pci_allowed())
- return -1;
-
- dbgp_num = 0;
- if (*s)
- dbgp_num = simple_strtoul(s, &e, 10);
- dbgp_printk("dbgp_num: %d\n", dbgp_num);
-
- cap = find_dbgp(dbgp_num, &bus, &slot, &func);
- if (!cap)
- return -1;
-
- dbgp_printk("Found EHCI debug port on %02x:%02x.%1x\n", bus, slot,
- func);
-
- debug_port = read_pci_config(bus, slot, func, cap);
- bar = (debug_port >> 29) & 0x7;
- bar = (bar * 4) + 0xc;
- offset = (debug_port >> 16) & 0xfff;
- dbgp_printk("bar: %02x offset: %03x\n", bar, offset);
- if (bar != PCI_BASE_ADDRESS_0) {
- dbgp_printk("only debug ports on bar 1 handled.\n");
-
- return -1;
- }
-
- bar_val = read_pci_config(bus, slot, func, PCI_BASE_ADDRESS_0);
- dbgp_printk("bar_val: %02x offset: %03x\n", bar_val, offset);
- if (bar_val & ~PCI_BASE_ADDRESS_MEM_MASK) {
- dbgp_printk("only simple 32bit mmio bars supported\n");
-
- return -1;
- }
-
- /* double check if the mem space is enabled */
- byte = read_pci_config_byte(bus, slot, func, 0x04);
- if (!(byte & 0x2)) {
- byte |= 0x02;
- write_pci_config_byte(bus, slot, func, 0x04, byte);
- dbgp_printk("mmio for ehci enabled\n");
- }
-
- /*
- * FIXME I don't have the bar size so just guess PAGE_SIZE is more
- * than enough. 1K is the biggest I have seen.
- */
- set_fixmap_nocache(FIX_DBGP_BASE, bar_val & PAGE_MASK);
- ehci_bar = (void __iomem *)__fix_to_virt(FIX_DBGP_BASE);
- ehci_bar += bar_val & ~PAGE_MASK;
- dbgp_printk("ehci_bar: %p\n", ehci_bar);
-
- ehci_caps = ehci_bar;
- ehci_regs = ehci_bar + HC_LENGTH(readl(&ehci_caps->hc_capbase));
- ehci_debug = ehci_bar + offset;
- ehci_dev.bus = bus;
- ehci_dev.slot = slot;
- ehci_dev.func = func;
-
- detect_set_debug_port();
-
- ret = ehci_setup();
- if (ret < 0) {
- dbgp_printk("ehci_setup failed\n");
- ehci_debug = NULL;
-
- return -1;
- }
-
- return 0;
-}
-
-static void early_dbgp_write(struct console *con, const char *str, u32 n)
-{
- int chunk, ret;
-
- if (!ehci_debug)
- return;
- while (n > 0) {
- chunk = n;
- if (chunk > DBGP_MAX_PACKET)
- chunk = DBGP_MAX_PACKET;
- ret = dbgp_bulk_write(USB_DEBUG_DEVNUM,
- dbgp_endpoint_out, str, chunk);
- str += chunk;
- n -= chunk;
- }
-}
-
-static struct console early_dbgp_console = {
- .name = "earlydbg",
- .write = early_dbgp_write,
- .flags = CON_PRINTBUFFER,
- .index = -1,
-};
-#endif
-
/* Direct interface for emergencies */
static struct console *early_console = &early_vga_console;
static int __initdata early_console_initialized;
@@ -920,10 +204,24 @@ asmlinkage void early_printk(const char
va_end(ap);
}
+static inline void early_console_register(struct console *con, int keep_early)
+{
+ if (early_console->index != -1) {
+ printk(KERN_CRIT "ERROR: earlyprintk= %s already used\n",
+ con->name);
+ return;
+ }
+ early_console = con;
+ if (keep_early)
+ early_console->flags &= ~CON_BOOT;
+ else
+ early_console->flags |= CON_BOOT;
+ register_console(early_console);
+}
static int __init setup_early_printk(char *buf)
{
- int keep_early;
+ int keep;
if (!buf)
return 0;
@@ -932,44 +230,41 @@ static int __init setup_early_printk(cha
return 0;
early_console_initialized = 1;
- keep_early = (strstr(buf, "keep") != NULL);
+ keep = (strstr(buf, "keep") != NULL);
- if (!strncmp(buf, "serial", 6)) {
- early_serial_init(buf + 6);
- early_console = &early_serial_console;
- } else if (!strncmp(buf, "ttyS", 4)) {
- early_serial_init(buf);
- early_console = &early_serial_console;
- } else if (!strncmp(buf, "vga", 3)) {
+ while (*buf != '\0') {
+ if (!strncmp(buf, "serial", 6)) {
+ buf += 6;
+ early_serial_init(buf);
+ early_console_register(&early_serial_console, keep);
+ if (!strncmp(buf, ",ttyS", 5))
+ buf += 5;
+ }
+ if (!strncmp(buf, "ttyS", 4)) {
+ early_serial_init(buf + 4);
+ early_console_register(&early_serial_console, keep);
+ }
#ifndef CONFIG_XEN
- && boot_params.screen_info.orig_video_isVGA == 1) {
- max_xpos = boot_params.screen_info.orig_video_cols;
- max_ypos = boot_params.screen_info.orig_video_lines;
- current_ypos = boot_params.screen_info.orig_y;
+ if (!strncmp(buf, "vga", 3) &&
+ boot_params.screen_info.orig_video_isVGA == 1) {
+ max_xpos = boot_params.screen_info.orig_video_cols;
+ max_ypos = boot_params.screen_info.orig_video_lines;
+ current_ypos = boot_params.screen_info.orig_y;
+#else
+ if (!strncmp(buf, "vga", 3) || !strncmp(buf, "xen", 3)) {
#endif
- early_console = &early_vga_console;
+ early_console_register(&early_vga_console, keep);
+ }
#ifdef CONFIG_EARLY_PRINTK_DBGP
- } else if (!strncmp(buf, "dbgp", 4)) {
- if (early_dbgp_init(buf+4) < 0)
- return 0;
- early_console = &early_dbgp_console;
- /*
- * usb subsys will reset ehci controller, so don't keep
- * that early console
- */
- keep_early = 0;
+ if (!strncmp(buf, "dbgp", 4) && !early_dbgp_init(buf + 4))
+ early_console_register(&early_dbgp_console, keep);
#endif
-#ifdef CONFIG_XEN
- } else if (!strncmp(buf, "xen", 3)) {
- early_console = &xenboot_console;
+#ifdef CONFIG_HVC_XEN
+ if (!strncmp(buf, "xen", 3))
+ early_console_register(&xenboot_console, keep);
#endif
+ buf++;
}
-
- if (keep_early)
- early_console->flags &= ~CON_BOOT;
- else
- early_console->flags |= CON_BOOT;
- register_console(early_console);
return 0;
}
--- head-2010-05-25.orig/arch/x86/kernel/entry_64-xen.S 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/arch/x86/kernel/entry_64-xen.S 2010-03-24 15:32:27.000000000 +0100
@@ -53,6 +53,7 @@
#include <asm/hw_irq.h>
#include <asm/page_types.h>
#include <asm/irqflags.h>
+#include <asm/processor-flags.h>
#include <asm/ftrace.h>
#include <asm/percpu.h>
#include <xen/interface/xen.h>
@@ -150,7 +151,7 @@ ENTRY(ftrace_graph_caller)
END(ftrace_graph_caller)
GLOBAL(return_to_handler)
- subq $80, %rsp
+ subq $24, %rsp
/* Save the return values */
movq %rax, (%rsp)
@@ -159,10 +160,10 @@ GLOBAL(return_to_handler)
call ftrace_return_to_handler
- movq %rax, 72(%rsp)
+ movq %rax, 16(%rsp)
movq 8(%rsp), %rdx
movq (%rsp), %rax
- addq $72, %rsp
+ addq $16, %rsp
retq
#endif
@@ -546,20 +547,13 @@ sysret_signal:
bt $TIF_SYSCALL_AUDIT,%edx
jc sysret_audit
#endif
- /* edx: work flags (arg3) */
- leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
- xorl %esi,%esi # oldset -> arg2
- SAVE_REST
- FIXUP_TOP_OF_STACK %r11
- call do_notify_resume
- RESTORE_TOP_OF_STACK %r11
- RESTORE_REST
- movl $_TIF_WORK_MASK,%edi
- /* Use IRET because user could have changed frame. This
- works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
- DISABLE_INTERRUPTS(CLBR_NONE)
- TRACE_IRQS_OFF
- jmp int_with_check
+ /*
+ * We have a signal, or exit tracing or single-step.
+ * These all wind up with the iret return path anyway,
+ * so just join that path right now.
+ */
+ FIXUP_TOP_OF_STACK %r11, -ARGOFFSET
+ jmp int_check_syscall_exit_work
badsys:
movq $-ENOSYS,RAX-ARGOFFSET(%rsp)
@@ -668,6 +662,7 @@ int_careful:
int_very_careful:
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
+int_check_syscall_exit_work:
SAVE_REST
/* Check for syscall exit trace */
testl $_TIF_WORK_SYSCALL_EXIT,%edx
@@ -914,7 +909,7 @@ apicinterrupt ERROR_APIC_VECTOR \
apicinterrupt SPURIOUS_APIC_VECTOR \
spurious_interrupt smp_spurious_interrupt
-#ifdef CONFIG_PERF_COUNTERS
+#ifdef CONFIG_PERF_EVENTS
apicinterrupt LOCAL_PENDING_VECTOR \
perf_pending_interrupt smp_perf_pending_interrupt
#endif
--- head-2010-05-25.orig/arch/x86/kernel/head-xen.c 2010-04-28 17:07:13.000000000 +0200
+++ head-2010-05-25/arch/x86/kernel/head-xen.c 2010-04-15 10:10:51.000000000 +0200
@@ -59,7 +59,6 @@ void __init reserve_ebda_region(void)
#include <asm/fixmap.h>
#include <asm/pgtable.h>
#include <asm/sections.h>
-#include <asm/setup_arch.h>
#include <xen/interface/callback.h>
#include <xen/interface/memory.h>
@@ -164,7 +163,7 @@ void __init xen_start_kernel(void)
}
-void __init machine_specific_arch_setup(void)
+void __init xen_arch_setup(void)
{
int ret;
static const struct callback_register __initconst event = {
--- head-2010-05-25.orig/arch/x86/kernel/head32-xen.c 2010-03-24 15:25:06.000000000 +0100
+++ head-2010-05-25/arch/x86/kernel/head32-xen.c 2010-03-24 15:32:27.000000000 +0100
@@ -9,11 +9,26 @@
#include <linux/start_kernel.h>
#include <asm/setup.h>
-#include <asm/setup_arch.h>
#include <asm/sections.h>
#include <asm/e820.h>
-#include <asm/bios_ebda.h>
+#include <asm/page.h>
#include <asm/trampoline.h>
+#include <asm/apic.h>
+#include <asm/io_apic.h>
+#include <asm/bios_ebda.h>
+
+static void __init i386_default_early_setup(void)
+{
+ /* Initialize 32bit specific setup functions */
+ if (is_initial_xendomain())
+ x86_init.resources.probe_roms = probe_roms;
+ x86_init.resources.reserve_resources = i386_reserve_resources;
+#ifndef CONFIG_XEN
+ x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc;
+
+ reserve_ebda_region();
+#endif
+}
void __init i386_start_kernel(void)
{
@@ -31,7 +46,16 @@ void __init i386_start_kernel(void)
reserve_early(ramdisk_image, ramdisk_end, "RAMDISK");
}
#endif
- reserve_ebda_region();
+
+ /* Call the subarch specific early setup function */
+ switch (boot_params.hdr.hardware_subarch) {
+ case X86_SUBARCH_MRST:
+ x86_mrst_early_setup();
+ break;
+ default:
+ i386_default_early_setup();
+ break;
+ }
#else
{
int max_cmdline;
@@ -42,6 +66,7 @@ void __init i386_start_kernel(void)
boot_command_line[max_cmdline-1] = '\0';
}
+ i386_default_early_setup();
xen_start_kernel();
#endif
--- head-2010-05-25.orig/arch/x86/kernel/head64-xen.c 2010-03-24 15:25:06.000000000 +0100
+++ head-2010-05-25/arch/x86/kernel/head64-xen.c 2010-03-24 15:32:27.000000000 +0100
@@ -20,15 +20,14 @@
#include <asm/proto.h>
#include <asm/smp.h>
#include <asm/setup.h>
-#include <asm/setup_arch.h>
#include <asm/desc.h>
#include <asm/pgtable.h>
#include <asm/tlbflush.h>
#include <asm/sections.h>
#include <asm/kdebug.h>
#include <asm/e820.h>
-#include <asm/bios_ebda.h>
#include <asm/trampoline.h>
+#include <asm/bios_ebda.h>
#ifndef CONFIG_XEN
static void __init zap_identity_mappings(void)
--- head-2010-05-25.orig/arch/x86/kernel/head_32-xen.S 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/arch/x86/kernel/head_32-xen.S 2010-03-24 15:32:27.000000000 +0100
@@ -30,7 +30,7 @@
#define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability
#define X86_VENDOR_ID new_cpu_data+CPUINFO_x86_vendor_id
-.section .text.head,"ax",@progbits
+__HEAD
#define VIRT_ENTRY_OFFSET 0x0
.org VIRT_ENTRY_OFFSET
ENTRY(startup_32)
@@ -69,7 +69,6 @@ ENTRY(startup_32)
*/
movl $per_cpu__gdt_page,%eax
movl $per_cpu__stack_canary,%ecx
- subl $20, %ecx
movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax)
shrl $16, %ecx
movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax)
@@ -122,7 +121,7 @@ ENTRY(hypercall_page)
/*
* BSS section
*/
-.section ".bss.page_aligned","wa"
+__PAGE_ALIGNED_BSS
.align PAGE_SIZE_asm
ENTRY(swapper_pg_fixmap)
.fill 1024,4,0
--- head-2010-05-25.orig/arch/x86/kernel/head_64-xen.S 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/arch/x86/kernel/head_64-xen.S 2010-03-24 15:32:27.000000000 +0100
@@ -23,7 +23,7 @@
#include <asm/percpu.h>
#include <xen/interface/elfnote.h>
- .section .text.head, "ax", @progbits
+ __HEAD
.code64
.globl startup_64
startup_64:
@@ -51,7 +51,7 @@ startup_64:
#define NEXT_PAGE(name) \
.balign PAGE_SIZE; \
- phys_##name = . - .text.head; \
+ phys_##name = . - .head.text; \
ENTRY(name)
NEXT_PAGE(init_level4_pgt)
@@ -104,7 +104,7 @@ NEXT_PAGE(hypercall_page)
#undef NEXT_PAGE
- .section .bss.page_aligned, "aw", @nobits
+ __PAGE_ALIGNED_BSS
.align PAGE_SIZE
ENTRY(empty_zero_page)
.skip PAGE_SIZE
--- head-2010-05-25.orig/arch/x86/kernel/irq-xen.c 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/arch/x86/kernel/irq-xen.c 2010-03-24 15:32:27.000000000 +0100
@@ -67,10 +67,10 @@ static int show_other_interrupts(struct
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count);
seq_printf(p, " Spurious interrupts\n");
- seq_printf(p, "%*s: ", prec, "CNT");
+ seq_printf(p, "%*s: ", prec, "PMI");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs);
- seq_printf(p, " Performance counter interrupts\n");
+ seq_printf(p, " Performance monitoring interrupts\n");
seq_printf(p, "%*s: ", prec, "PND");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs);
@@ -112,7 +112,7 @@ static int show_other_interrupts(struct
seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
seq_printf(p, " Threshold APIC interrupts\n");
#endif
-#ifdef CONFIG_X86_NEW_MCE
+#ifdef CONFIG_X86_MCE
seq_printf(p, "%*s: ", prec, "MCE");
for_each_online_cpu(j)
seq_printf(p, "%10u ", per_cpu(mce_exception_count, j));
@@ -212,7 +212,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
#ifdef CONFIG_X86_MCE_THRESHOLD
sum += irq_stats(cpu)->irq_threshold_count;
#endif
-#ifdef CONFIG_X86_NEW_MCE
+#ifdef CONFIG_X86_MCE
sum += per_cpu(mce_exception_count, cpu);
sum += per_cpu(mce_poll_count, cpu);
#endif
--- head-2010-05-25.orig/arch/x86/kernel/ldt-xen.c 2010-03-24 15:17:58.000000000 +0100
+++ head-2010-05-25/arch/x86/kernel/ldt-xen.c 2010-03-24 15:32:27.000000000 +0100
@@ -70,8 +70,8 @@ static int alloc_ldt(mm_context_t *pc, i
XENFEAT_writable_descriptor_tables);
load_LDT(pc);
#ifdef CONFIG_SMP
- if (!cpus_equal(current->mm->cpu_vm_mask,
- cpumask_of_cpu(smp_processor_id())))
+ if (!cpumask_equal(mm_cpumask(current->mm),
+ cpumask_of(smp_processor_id())))
smp_call_function(flush_ldt, current->mm, 1);
preempt_enable();
#endif
--- head-2010-05-25.orig/arch/x86/kernel/microcode_core-xen.c 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/arch/x86/kernel/microcode_core-xen.c 2010-03-24 15:32:27.000000000 +0100
@@ -97,8 +97,8 @@ static ssize_t microcode_write(struct fi
{
ssize_t ret = -EINVAL;
- if ((len >> PAGE_SHIFT) > num_physpages) {
- pr_err("microcode: too much data (max %ld pages)\n", num_physpages);
+ if ((len >> PAGE_SHIFT) > totalram_pages) {
+ pr_err("microcode: too much data (max %ld pages)\n", totalram_pages);
return ret;
}
@@ -121,7 +121,7 @@ static const struct file_operations micr
static struct miscdevice microcode_dev = {
.minor = MICROCODE_MINOR,
.name = "microcode",
- .devnode = "cpu/microcode",
+ .nodename = "cpu/microcode",
.fops = &microcode_fops,
};
--- head-2010-05-25.orig/arch/x86/kernel/mpparse-xen.c 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/arch/x86/kernel/mpparse-xen.c 2010-03-24 15:32:27.000000000 +0100
@@ -51,6 +51,13 @@ static int __init mpf_checksum(unsigned
return sum & 0xFF;
}
+#ifndef CONFIG_XEN
+int __init default_mpc_apic_id(struct mpc_cpu *m)
+{
+ return m->apicid;
+}
+#endif
+
static void __init MP_processor_info(struct mpc_cpu *m)
{
#ifndef CONFIG_XEN
@@ -62,10 +69,7 @@ static void __init MP_processor_info(str
return;
}
- if (x86_quirks->mpc_apic_id)
- apicid = x86_quirks->mpc_apic_id(m);
- else
- apicid = m->apicid;
+ apicid = x86_init.mpparse.mpc_apic_id(m);
if (m->cpuflag & CPU_BOOTPROCESSOR) {
bootup_cpu = " (Bootup-CPU)";
@@ -80,16 +84,18 @@ static void __init MP_processor_info(str
}
#ifdef CONFIG_X86_IO_APIC
-static void __init MP_bus_info(struct mpc_bus *m)
+void __init default_mpc_oem_bus_info(struct mpc_bus *m, char *str)
{
- char str[7];
memcpy(str, m->bustype, 6);
str[6] = 0;
+ apic_printk(APIC_VERBOSE, "Bus #%d is %s\n", m->busid, str);
+}
- if (x86_quirks->mpc_oem_bus_info)
- x86_quirks->mpc_oem_bus_info(m, str);
- else
- apic_printk(APIC_VERBOSE, "Bus #%d is %s\n", m->busid, str);
+static void __init MP_bus_info(struct mpc_bus *m)
+{
+ char str[7];
+
+ x86_init.mpparse.mpc_oem_bus_info(m, str);
#if MAX_MP_BUSSES < 256
if (m->busid >= MAX_MP_BUSSES) {
@@ -106,8 +112,8 @@ static void __init MP_bus_info(struct mp
mp_bus_id_to_type[m->busid] = MP_BUS_ISA;
#endif
} else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) {
- if (x86_quirks->mpc_oem_pci_bus)
- x86_quirks->mpc_oem_pci_bus(m);
+ if (x86_init.mpparse.mpc_oem_pci_bus)
+ x86_init.mpparse.mpc_oem_pci_bus(m);
clear_bit(m->busid, mp_bus_not_pci);
#if defined(CONFIG_EISA) || defined(CONFIG_MCA)
@@ -301,6 +307,8 @@ static void __init smp_dump_mptable(stru
1, mpc, mpc->length, 1);
}
+void __init default_smp_read_mpc_oem(struct mpc_table *mpc) { }
+
static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
{
char str[16];
@@ -322,16 +330,13 @@ static int __init smp_read_mpc(struct mp
if (early)
return 1;
- if (mpc->oemptr && x86_quirks->smp_read_mpc_oem) {
- struct mpc_oemtable *oem_table = (void *)(long)mpc->oemptr;
- x86_quirks->smp_read_mpc_oem(oem_table, mpc->oemsize);
- }
+ if (mpc->oemptr)
+ x86_init.mpparse.smp_read_mpc_oem(mpc);
/*
* Now process the configuration blocks.
*/
- if (x86_quirks->mpc_record)
- *x86_quirks->mpc_record = 0;
+ x86_init.mpparse.mpc_record(0);
while (count < mpc->length) {
switch (*mpt) {
@@ -363,8 +368,7 @@ static int __init smp_read_mpc(struct mp
count = mpc->length;
break;
}
- if (x86_quirks->mpc_record)
- (*x86_quirks->mpc_record)++;
+ x86_init.mpparse.mpc_record(1);
}
#ifdef CONFIG_X86_BIGSMP
@@ -492,11 +496,11 @@ static void __init construct_ioapic_tabl
MP_bus_info(&bus);
}
- ioapic.type = MP_IOAPIC;
- ioapic.apicid = 2;
- ioapic.apicver = mpc_default_type > 4 ? 0x10 : 0x01;
- ioapic.flags = MPC_APIC_USABLE;
- ioapic.apicaddr = 0xFEC00000;
+ ioapic.type = MP_IOAPIC;
+ ioapic.apicid = 2;
+ ioapic.apicver = mpc_default_type > 4 ? 0x10 : 0x01;
+ ioapic.flags = MPC_APIC_USABLE;
+ ioapic.apicaddr = IO_APIC_DEFAULT_PHYS_BASE;
MP_ioapic_info(&ioapic);
/*
@@ -618,7 +622,7 @@ static int __init check_physptr(struct m
/*
* Scan the memory blocks for an SMP configuration block.
*/
-static void __init __get_smp_config(unsigned int early)
+void __init default_get_smp_config(unsigned int early)
{
struct mpf_intel *mpf = mpf_found;
@@ -635,11 +639,6 @@ static void __init __get_smp_config(unsi
if (acpi_lapic && acpi_ioapic)
return;
- if (x86_quirks->mach_get_smp_config) {
- if (x86_quirks->mach_get_smp_config(early))
- return;
- }
-
printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n",
mpf->specification);
#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
@@ -680,16 +679,6 @@ static void __init __get_smp_config(unsi
*/
}
-void __init early_get_smp_config(void)
-{
- __get_smp_config(1);
-}
-
-void __init get_smp_config(void)
-{
- __get_smp_config(0);
-}
-
#ifndef CONFIG_XEN
static void __init smp_reserve_bootmem(struct mpf_intel *mpf)
{
@@ -761,16 +750,12 @@ static int __init smp_scan_config(unsign
return 0;
}
-static void __init __find_smp_config(unsigned int reserve)
+void __init default_find_smp_config(unsigned int reserve)
{
#ifndef CONFIG_XEN
unsigned int address;
#endif
- if (x86_quirks->mach_find_smp_config) {
- if (x86_quirks->mach_find_smp_config(reserve))
- return;
- }
/*
* FIXME: Linux assumes you have 640K of base ram..
* this continues the error...
@@ -807,16 +792,6 @@ static void __init __find_smp_config(uns
#endif
}
-void __init early_find_smp_config(void)
-{
- __find_smp_config(0);
-}
-
-void __init find_smp_config(void)
-{
- __find_smp_config(1);
-}
-
#ifdef CONFIG_X86_IO_APIC
static u8 __initdata irq_used[MAX_IRQ_SOURCES];
--- head-2010-05-25.orig/arch/x86/kernel/pci-dma-xen.c 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/arch/x86/kernel/pci-dma-xen.c 2010-03-24 15:32:27.000000000 +0100
@@ -3,6 +3,7 @@
#include <linux/dmar.h>
#include <linux/bootmem.h>
#include <linux/pci.h>
+#include <linux/kmemleak.h>
#include <asm/proto.h>
#include <asm/dma.h>
@@ -32,17 +33,22 @@ int no_iommu __read_mostly;
/* Set this to 1 if there is a HW IOMMU in the system */
int iommu_detected __read_mostly = 0;
-int iommu_pass_through;
+/*
+ * This variable becomes 1 if iommu=pt is passed on the kernel command line.
+ * If this variable is 1, IOMMU implementations do no DMA translation for
+ * devices and allow every device to access to whole physical memory. This is
+ * useful if a user want to use an IOMMU only for KVM device assignment to
+ * guests and not for driver dma translation.
+ */
+int iommu_pass_through __read_mostly;
dma_addr_t bad_dma_address __read_mostly = 0;
EXPORT_SYMBOL(bad_dma_address);
-/* Dummy device used for NULL arguments (normally ISA). Better would
- be probably a smaller DMA mask, but this is bug-to-bug compatible
- to older i386. */
+/* Dummy device used for NULL arguments (normally ISA). */
struct device x86_dma_fallback_dev = {
.init_name = "fallback device",
- .coherent_dma_mask = DMA_BIT_MASK(32),
+ .coherent_dma_mask = ISA_DMA_BIT_MASK,
.dma_mask = &x86_dma_fallback_dev.coherent_dma_mask,
};
EXPORT_SYMBOL(x86_dma_fallback_dev);
@@ -88,6 +94,11 @@ void __init dma32_reserve_bootmem(void)
size = roundup(dma32_bootmem_size, align);
dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align,
512ULL<<20);
+ /*
+ * Kmemleak should not scan this block as it may not be mapped via the
+ * kernel direct mapping.
+ */
+ kmemleak_ignore(dma32_bootmem_ptr);
if (dma32_bootmem_ptr)
dma32_bootmem_size = size;
else
@@ -178,7 +189,7 @@ again:
#ifndef CONFIG_XEN
addr = page_to_phys(page);
- if (!is_buffer_dma_capable(dma_mask, addr, size)) {
+ if (addr + size > dma_mask) {
__free_pages(page, order);
if (dma_mask < DMA_BIT_MASK(32) && !(flag & GFP_DMA)) {
@@ -266,10 +277,8 @@ static __init int iommu_setup(char *p)
if (!strncmp(p, "soft", 4))
swiotlb = 1;
#endif
- if (!strncmp(p, "pt", 2)) {
+ if (!strncmp(p, "pt", 2))
iommu_pass_through = 1;
- return 1;
- }
gart_parse_options(p);
@@ -381,7 +390,7 @@ void pci_iommu_shutdown(void)
amd_iommu_shutdown();
}
/* Must execute after PCI subsystem */
-fs_initcall(pci_iommu_init);
+rootfs_initcall(pci_iommu_init);
#ifdef CONFIG_PCI
/* Many VIA bridges seem to corrupt data for DAC. Disable it here */
--- head-2010-05-25.orig/arch/x86/kernel/pci-nommu-xen.c 2010-03-24 15:25:06.000000000 +0100
+++ head-2010-05-25/arch/x86/kernel/pci-nommu-xen.c 2010-03-24 15:32:27.000000000 +0100
@@ -36,7 +36,7 @@ gnttab_map_sg(struct device *hwdev, stru
sg->dma_address =
gnttab_dma_map_page(sg_page(sg)) + sg->offset;
sg->dma_length = sg->length;
- IOMMU_BUG_ON(address_needs_mapping(
+ IOMMU_BUG_ON(!dma_capable(
hwdev, sg->dma_address, sg->length));
IOMMU_BUG_ON(range_straddles_page_boundary(
page_to_pseudophys(sg_page(sg)) + sg->offset,
@@ -69,7 +69,7 @@ gnttab_map_page(struct device *dev, stru
dma = gnttab_dma_map_page(page) + offset;
IOMMU_BUG_ON(range_straddles_page_boundary(page_to_pseudophys(page) +
offset, size));
- IOMMU_BUG_ON(address_needs_mapping(dev, dma, size));
+ IOMMU_BUG_ON(!dma_capable(dev, dma, size));
return dma;
}
@@ -81,19 +81,36 @@ gnttab_unmap_page(struct device *dev, dm
gnttab_dma_unmap_page(dma_addr);
}
+static void nommu_sync_single_for_device(struct device *dev,
+ dma_addr_t addr, size_t size,
+ enum dma_data_direction dir)
+{
+ flush_write_buffers();
+}
+
+
+static void nommu_sync_sg_for_device(struct device *dev,
+ struct scatterlist *sg, int nelems,
+ enum dma_data_direction dir)
+{
+ flush_write_buffers();
+}
+
static int nommu_dma_supported(struct device *hwdev, u64 mask)
{
return 1;
}
struct dma_map_ops nommu_dma_ops = {
- .alloc_coherent = dma_generic_alloc_coherent,
- .free_coherent = dma_generic_free_coherent,
- .map_page = gnttab_map_page,
- .unmap_page = gnttab_unmap_page,
- .map_sg = gnttab_map_sg,
- .unmap_sg = gnttab_unmap_sg,
- .dma_supported = nommu_dma_supported,
+ .alloc_coherent = dma_generic_alloc_coherent,
+ .free_coherent = dma_generic_free_coherent,
+ .map_page = gnttab_map_page,
+ .unmap_page = gnttab_unmap_page,
+ .map_sg = gnttab_map_sg,
+ .unmap_sg = gnttab_unmap_sg,
+ .sync_single_for_device = nommu_sync_single_for_device,
+ .sync_sg_for_device = nommu_sync_sg_for_device,
+ .dma_supported = nommu_dma_supported,
};
void __init no_iommu_init(void)
--- head-2010-05-25.orig/arch/x86/kernel/process-xen.c 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/arch/x86/kernel/process-xen.c 2010-03-24 15:32:27.000000000 +0100
@@ -9,7 +9,7 @@
#include <linux/pm.h>
#include <linux/clockchips.h>
#include <linux/random.h>
-#include <trace/power.h>
+#include <trace/events/power.h>
#include <asm/system.h>
#include <asm/apic.h>
#include <asm/syscalls.h>
@@ -26,9 +26,6 @@ EXPORT_SYMBOL(idle_nomwait);
struct kmem_cache *task_xstate_cachep;
-DEFINE_TRACE(power_start);
-DEFINE_TRACE(power_end);
-
int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
{
*dst = *src;
@@ -285,9 +282,7 @@ static inline int hlt_use_halt(void)
*/
void xen_idle(void)
{
- struct power_trace it;
-
- trace_power_start(&it, POWER_CSTATE, 1);
+ trace_power_start(POWER_CSTATE, 1);
current_thread_info()->status &= ~TS_POLLING;
/*
* TS_POLLING-cleared state must be visible before we
@@ -300,7 +295,6 @@ void xen_idle(void)
else
local_irq_enable();
current_thread_info()->status |= TS_POLLING;
- trace_power_end(&it);
}
#ifdef CONFIG_APM_MODULE
EXPORT_SYMBOL(default_idle);
@@ -354,9 +348,7 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
*/
void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
{
- struct power_trace it;
-
- trace_power_start(&it, POWER_CSTATE, (ax>>4)+1);
+ trace_power_start(POWER_CSTATE, (ax>>4)+1);
if (!need_resched()) {
if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
clflush((void *)&current_thread_info()->flags);
@@ -366,15 +358,13 @@ void mwait_idle_with_hints(unsigned long
if (!need_resched())
__mwait(ax, cx);
}
- trace_power_end(&it);
}
/* Default MONITOR/MWAIT with no hints, used for default C1 state */
static void mwait_idle(void)
{
- struct power_trace it;
if (!need_resched()) {
- trace_power_start(&it, POWER_CSTATE, 1);
+ trace_power_start(POWER_CSTATE, 1);
if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
clflush((void *)&current_thread_info()->flags);
@@ -384,7 +374,6 @@ static void mwait_idle(void)
__sti_mwait(0, 0);
else
local_irq_enable();
- trace_power_end(&it);
} else
local_irq_enable();
}
@@ -397,13 +386,11 @@ static void mwait_idle(void)
*/
static void poll_idle(void)
{
- struct power_trace it;
-
- trace_power_start(&it, POWER_CSTATE, 0);
+ trace_power_start(POWER_CSTATE, 0);
local_irq_enable();
while (!need_resched())
cpu_relax();
- trace_power_end(&it);
+ trace_power_end(0);
}
#ifndef CONFIG_XEN
@@ -556,10 +543,8 @@ void __init init_c1e_mask(void)
{
#ifndef CONFIG_XEN
/* If we're using c1e_idle, we need to allocate c1e_mask. */
- if (pm_idle == c1e_idle) {
- alloc_cpumask_var(&c1e_mask, GFP_KERNEL);
- cpumask_clear(c1e_mask);
- }
+ if (pm_idle == c1e_idle)
+ zalloc_cpumask_var(&c1e_mask, GFP_KERNEL);
#endif
}
--- head-2010-05-25.orig/arch/x86/kernel/process_32-xen.c 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/arch/x86/kernel/process_32-xen.c 2010-03-24 15:32:27.000000000 +0100
@@ -66,9 +66,6 @@
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
asmlinkage void cstar_ret_from_fork(void) __asm__("cstar_ret_from_fork");
-DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
-EXPORT_PER_CPU_SYMBOL(current_task);
-
/*
* Return saved PC of a blocked thread.
*/
@@ -360,6 +357,7 @@ __switch_to(struct task_struct *prev_p,
#ifndef CONFIG_X86_NO_TSS
struct tss_struct *tss = &per_cpu(init_tss, cpu);
#endif
+ bool preload_fpu;
#if CONFIG_XEN_COMPAT > 0x030002
struct physdev_set_iopl iopl_op;
struct physdev_set_iobitmap iobmp_op;
@@ -373,15 +371,24 @@ __switch_to(struct task_struct *prev_p,
/* XEN NOTE: FS/GS saved in switch_mm(), not here. */
/*
+ * If the task has used fpu the last 5 timeslices, just do a full
+ * restore of the math state immediately to avoid the trap; the
+ * chances of needing FPU soon are obviously high now
+ */
+ preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
+
+ /*
* This is basically '__unlazy_fpu', except that we queue a
* multicall to indicate FPU task switch, rather than
* synchronously trapping to Xen.
*/
if (task_thread_info(prev_p)->status & TS_USEDFPU) {
__save_init_fpu(prev_p); /* _not_ save_init_fpu() */
- mcl->op = __HYPERVISOR_fpu_taskswitch;
- mcl->args[0] = 1;
- mcl++;
+ if (!preload_fpu) {
+ mcl->op = __HYPERVISOR_fpu_taskswitch;
+ mcl->args[0] = 1;
+ mcl++;
+ }
}
#if 0 /* lazy fpu sanity check */
else BUG_ON(!(read_cr0() & 8));
@@ -427,6 +434,14 @@ __switch_to(struct task_struct *prev_p,
mcl++;
}
+ /* If we're going to preload the fpu context, make sure clts
+ is run while we're batching the cpu state updates. */
+ if (preload_fpu) {
+ mcl->op = __HYPERVISOR_fpu_taskswitch;
+ mcl->args[0] = 0;
+ mcl++;
+ }
+
if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) {
set_xen_guest_handle(iobmp_op.bitmap,
(char *)next->io_bitmap_ptr);
@@ -451,7 +466,7 @@ __switch_to(struct task_struct *prev_p,
BUG();
/* we're going to use this soon, after a few expensive things */
- if (next_p->fpu_counter > 5)
+ if (preload_fpu)
prefetch(next->xstate);
/*
@@ -470,15 +485,8 @@ __switch_to(struct task_struct *prev_p,
*/
arch_end_context_switch(next_p);
- /* If the task has used fpu the last 5 timeslices, just do a full
- * restore of the math state immediately to avoid the trap; the
- * chances of needing FPU soon are obviously high now
- *
- * tsk_used_math() checks prevent calling math_state_restore(),
- * which can sleep in the case of !tsk_used_math()
- */
- if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
- math_state_restore();
+ if (preload_fpu)
+ __math_state_restore();
/*
* Restore %gs if needed (which is common)
--- head-2010-05-25.orig/arch/x86/kernel/process_64-xen.c 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/arch/x86/kernel/process_64-xen.c 2010-03-24 15:32:27.000000000 +0100
@@ -64,9 +64,6 @@
asmlinkage extern void ret_from_fork(void);
-DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
-EXPORT_PER_CPU_SYMBOL(current_task);
-
static DEFINE_PER_CPU(unsigned char, is_idle);
unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
@@ -399,6 +396,7 @@ __switch_to(struct task_struct *prev_p,
#ifndef CONFIG_X86_NO_TSS
struct tss_struct *tss = &per_cpu(init_tss, cpu);
#endif
+ bool preload_fpu;
#if CONFIG_XEN_COMPAT > 0x030002
struct physdev_set_iopl iopl_op;
struct physdev_set_iobitmap iobmp_op;
@@ -409,8 +407,15 @@ __switch_to(struct task_struct *prev_p,
#endif
multicall_entry_t _mcl[8], *mcl = _mcl;
+ /*
+ * If the task has used fpu the last 5 timeslices, just do a full
+ * restore of the math state immediately to avoid the trap; the
+ * chances of needing FPU soon are obviously high now
+ */
+ preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
+
/* we're going to use this soon, after a few expensive things */
- if (next_p->fpu_counter > 5)
+ if (preload_fpu)
prefetch(next->xstate);
/*
@@ -422,12 +427,21 @@ __switch_to(struct task_struct *prev_p,
*/
if (task_thread_info(prev_p)->status & TS_USEDFPU) {
__save_init_fpu(prev_p); /* _not_ save_init_fpu() */
- mcl->op = __HYPERVISOR_fpu_taskswitch;
- mcl->args[0] = 1;
- mcl++;
+ if (!preload_fpu) {
+ mcl->op = __HYPERVISOR_fpu_taskswitch;
+ mcl->args[0] = 1;
+ mcl++;
+ }
} else
prev_p->fpu_counter = 0;
+ /* Make sure cpu is ready for new context */
+ if (preload_fpu) {
+ mcl->op = __HYPERVISOR_fpu_taskswitch;
+ mcl->args[0] = 0;
+ mcl++;
+ }
+
/*
* Reload sp0.
* This is load_sp0(tss, next) with a multicall.
@@ -545,15 +559,12 @@ __switch_to(struct task_struct *prev_p,
task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
__switch_to_xtra(prev_p, next_p);
- /* If the task has used fpu the last 5 timeslices, just do a full
- * restore of the math state immediately to avoid the trap; the
- * chances of needing FPU soon are obviously high now
- *
- * tsk_used_math() checks prevent calling math_state_restore(),
- * which can sleep in the case of !tsk_used_math()
+ /*
+ * Preload the FPU context, now that we've determined that the
+ * task is likely to be using it.
*/
- if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
- math_state_restore();
+ if (preload_fpu)
+ __math_state_restore();
return prev_p;
}
--- head-2010-05-25.orig/arch/x86/kernel/quirks-xen.c 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/arch/x86/kernel/quirks-xen.c 2010-03-24 15:32:27.000000000 +0100
@@ -509,7 +509,7 @@ static void __init quirk_amd_nb_node(str
pci_read_config_dword(nb_ht, 0x60, &val);
set_dev_node(&dev->dev, val & 7);
- pci_dev_put(dev);
+ pci_dev_put(nb_ht);
}
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB,
--- head-2010-05-25.orig/arch/x86/kernel/rtc.c 2010-03-24 15:10:37.000000000 +0100
+++ head-2010-05-25/arch/x86/kernel/rtc.c 2010-03-24 15:32:27.000000000 +0100
@@ -189,8 +189,10 @@ void read_persistent_clock(struct timesp
unsigned long retval, flags;
#ifdef CONFIG_XEN
- if (!is_initial_xendomain())
- return xen_read_persistent_clock();
+ if (!is_initial_xendomain()) {
+ xen_read_persistent_clock(ts);
+ return;
+ }
#endif
spin_lock_irqsave(&rtc_lock, flags);
retval = x86_platform.get_wallclock();
--- head-2010-05-25.orig/arch/x86/kernel/setup-xen.c 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/arch/x86/kernel/setup-xen.c 2010-03-24 15:32:27.000000000 +0100
@@ -27,6 +27,7 @@
#include <linux/screen_info.h>
#include <linux/ioport.h>
#include <linux/acpi.h>
+#include <linux/sfi.h>
#include <linux/apm_bios.h>
#include <linux/initrd.h>
#include <linux/bootmem.h>
@@ -66,6 +67,7 @@
#include <linux/percpu.h>
#include <linux/crash_dump.h>
+#include <linux/tboot.h>
#include <video/edid.h>
@@ -138,10 +140,6 @@ start_info_t *xen_start_info;
EXPORT_SYMBOL(xen_start_info);
#endif
-#ifndef ARCH_SETUP
-#define ARCH_SETUP
-#endif
-
/*
* end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
* The direct mapping extends to max_pfn_mapped, so that we can directly access
@@ -164,9 +162,9 @@ int default_cpu_present_to_apicid(int mp
return __default_cpu_present_to_apicid(mps_cpu);
}
-int default_check_phys_apicid_present(int boot_cpu_physical_apicid)
+int default_check_phys_apicid_present(int phys_apicid)
{
- return __default_check_phys_apicid_present(boot_cpu_physical_apicid);
+ return __default_check_phys_apicid_present(phys_apicid);
}
#endif
@@ -203,13 +201,6 @@ static struct resource bss_resource = {
#ifdef CONFIG_X86_32
-static struct resource video_ram_resource = {
- .name = "Video RAM area",
- .start = 0xa0000,
- .end = 0xbffff,
- .flags = IORESOURCE_BUSY | IORESOURCE_MEM
-};
-
/* cpu data as detected by the assembly code in head.S */
struct cpuinfo_x86 new_cpu_data __cpuinitdata = {0, 0, 0, 0, -1, 1, 0, 0, -1};
/* common cpu data for all cpus */
@@ -670,7 +661,7 @@ static struct resource standard_io_resou
.flags = IORESOURCE_BUSY | IORESOURCE_IO }
};
-static void __init reserve_standard_io_resources(void)
+void __init reserve_standard_io_resources(void)
{
int i;
@@ -706,10 +697,6 @@ static int __init setup_elfcorehdr(char
early_param("elfcorehdr", setup_elfcorehdr);
#endif
-static struct x86_quirks default_x86_quirks __initdata;
-
-struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
-
#ifdef CONFIG_X86_RESERVE_LOW_64K
static int __init dmi_low_memory_corruption(const struct dmi_system_id *d)
{
@@ -742,6 +729,13 @@ static struct dmi_system_id __initdata b
},
},
{
+ .callback = dmi_low_memory_corruption,
+ .ident = "Phoenix/MSC BIOS",
+ .matches = {
+ DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix/MSC"),
+ },
+ },
+ {
/*
* AMI BIOS with low memory corruption was found on Intel DG45ID board.
* It hase different DMI_BIOS_VENDOR = "Intel Corp.", for now we will
@@ -865,7 +859,7 @@ void __init setup_arch(char **cmdline_p)
copy_edid();
#endif /* CONFIG_XEN */
- ARCH_SETUP
+ x86_init.oem.arch_setup();
setup_memory_map();
parse_setup_data();
@@ -906,6 +900,16 @@ void __init setup_arch(char **cmdline_p)
strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
*cmdline_p = command_line;
+#ifdef CONFIG_X86_64
+ /*
+ * Must call this twice: Once just to detect whether hardware doesn't
+ * support NX (so that the early EHCI debug console setup can safely
+ * call set_fixmap(), and then again after parsing early parameters to
+ * honor the respective command line option.
+ */
+ check_efer();
+#endif
+
parse_early_param();
#ifdef CONFIG_X86_64
@@ -945,12 +949,9 @@ void __init setup_arch(char **cmdline_p)
* VMware detection requires dmi to be available, so this
* needs to be done after dmi_scan_machine, for the BP.
*/
- init_hypervisor(&boot_cpu_data);
+ init_hypervisor_platform();
-#ifdef CONFIG_X86_32
- if (is_initial_xendomain())
- probe_roms();
-#endif
+ x86_init.resources.probe_roms();
#ifndef CONFIG_XEN
/* after parse_early_param, so could debug it */
@@ -1103,10 +1104,11 @@ void __init setup_arch(char **cmdline_p)
kvmclock_init();
#endif
- xen_pagetable_setup_start(swapper_pg_dir);
+ x86_init.paging.pagetable_setup_start(swapper_pg_dir);
paging_init();
- xen_pagetable_setup_done(swapper_pg_dir);
- paravirt_post_allocator_init();
+ x86_init.paging.pagetable_setup_done(swapper_pg_dir);
+
+ tboot_probe();
#ifdef CONFIG_X86_64
map_vsyscall();
@@ -1197,13 +1199,13 @@ void __init setup_arch(char **cmdline_p)
*/
acpi_boot_init();
-#if defined(CONFIG_X86_MPPARSE) || defined(CONFIG_X86_VISWS)
+ sfi_init();
+
/*
* get boot-time SMP configuration:
*/
if (smp_found_config)
get_smp_config();
-#endif
prefill_possible_map();
@@ -1227,11 +1229,7 @@ void __init setup_arch(char **cmdline_p)
e820_reserve_resources();
#endif
-#ifdef CONFIG_X86_32
- if (is_initial_xendomain())
- request_resource(&iomem_resource, &video_ram_resource);
-#endif
- reserve_standard_io_resources();
+ x86_init.resources.reserve_resources();
#ifndef CONFIG_XEN
e820_setup_gap();
@@ -1261,80 +1259,25 @@ void __init setup_arch(char **cmdline_p)
#endif
#endif
#endif /* CONFIG_XEN */
+ x86_init.oem.banner();
}
-#if defined(CONFIG_X86_32) && !defined(CONFIG_XEN)
-
-/**
- * x86_quirk_intr_init - post gate setup interrupt initialisation
- *
- * Description:
- * Fill in any interrupts that may have been left out by the general
- * init_IRQ() routine. interrupts having to do with the machine rather
- * than the devices on the I/O bus (like APIC interrupts in intel MP
- * systems) are started here.
- **/
-void __init x86_quirk_intr_init(void)
-{
- if (x86_quirks->arch_intr_init) {
- if (x86_quirks->arch_intr_init())
- return;
- }
-}
-
-/**
- * x86_quirk_trap_init - initialise system specific traps
- *
- * Description:
- * Called as the final act of trap_init(). Used in VISWS to initialise
- * the various board specific APIC traps.
- **/
-void __init x86_quirk_trap_init(void)
-{
- if (x86_quirks->arch_trap_init) {
- if (x86_quirks->arch_trap_init())
- return;
- }
-}
+#ifdef CONFIG_X86_32
-static struct irqaction irq0 = {
- .handler = timer_interrupt,
- .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER,
- .name = "timer"
+static struct resource video_ram_resource = {
+ .name = "Video RAM area",
+ .start = 0xa0000,
+ .end = 0xbffff,
+ .flags = IORESOURCE_BUSY | IORESOURCE_MEM
};
-/**
- * x86_quirk_pre_time_init - do any specific initialisations before.
- *
- **/
-void __init x86_quirk_pre_time_init(void)
+void __init i386_reserve_resources(void)
{
- if (x86_quirks->arch_pre_time_init)
- x86_quirks->arch_pre_time_init();
+ if (is_initial_xendomain())
+ request_resource(&iomem_resource, &video_ram_resource);
+ reserve_standard_io_resources();
}
-/**
- * x86_quirk_time_init - do any specific initialisations for the system timer.
- *
- * Description:
- * Must plug the system timer interrupt source at HZ into the IRQ listed
- * in irq_vectors.h:TIMER_IRQ
- **/
-void __init x86_quirk_time_init(void)
-{
- if (x86_quirks->arch_time_init) {
- /*
- * A nonzero return code does not mean failure, it means
- * that the architecture quirk does not want any
- * generic (timer) setup to be performed after this:
- */
- if (x86_quirks->arch_time_init())
- return;
- }
-
- irq0.mask = cpumask_of_cpu(0);
- setup_irq(0, &irq0);
-}
#endif /* CONFIG_X86_32 */
#ifdef CONFIG_XEN
--- head-2010-05-25.orig/arch/x86/kernel/sfi.c 2010-05-25 09:12:08.000000000 +0200
+++ head-2010-05-25/arch/x86/kernel/sfi.c 2010-03-24 15:32:27.000000000 +0100
@@ -31,7 +31,7 @@
#include <asm/setup.h>
#include <asm/apic.h>
-#ifdef CONFIG_X86_LOCAL_APIC
+#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_XEN)
static unsigned long sfi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
void __init mp_sfi_register_lapic_address(unsigned long address)
@@ -99,9 +99,12 @@ static int __init sfi_parse_ioapic(struc
pentry++;
}
+#ifndef CONFIG_XEN
WARN(pic_mode, KERN_WARNING
"SFI: pic_mod shouldn't be 1 when IOAPIC table is present\n");
pic_mode = 0;
+#endif
+
return 0;
}
#endif /* CONFIG_X86_IO_APIC */
@@ -111,7 +114,7 @@ static int __init sfi_parse_ioapic(struc
*/
int __init sfi_platform_init(void)
{
-#ifdef CONFIG_X86_LOCAL_APIC
+#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_XEN)
mp_sfi_register_lapic_address(sfi_lapic_addr);
sfi_table_parse(SFI_SIG_CPUS, NULL, NULL, sfi_parse_cpus);
#endif
--- head-2010-05-25.orig/arch/x86/kernel/time-xen.c 2010-05-12 09:02:08.000000000 +0200
+++ head-2010-05-25/arch/x86/kernel/time-xen.c 2010-05-12 09:02:39.000000000 +0200
@@ -1,31 +1,12 @@
/*
- * Copyright (C) 1991, 1992, 1995 Linus Torvalds
+ * Copyright (c) 1991,1992,1995 Linus Torvalds
+ * Copyright (c) 1994 Alan Modra
+ * Copyright (c) 1995 Markus Kuhn
+ * Copyright (c) 1996 Ingo Molnar
+ * Copyright (c) 1998 Andrea Arcangeli
+ * Copyright (c) 2002,2006 Vojtech Pavlik
+ * Copyright (c) 2003 Andi Kleen
*
- * This file contains the PC-specific time handling details:
- * reading the RTC at bootup, etc..
- * 1994-07-02 Alan Modra
- * fixed set_rtc_mmss, fixed time.year for >= 2000, new mktime
- * 1995-03-26 Markus Kuhn
- * fixed 500 ms bug at call to set_rtc_mmss, fixed DS12887
- * precision CMOS clock update
- * 1996-05-03 Ingo Molnar
- * fixed time warps in do_[slow|fast]_gettimeoffset()
- * 1997-09-10 Updated NTP code according to technical memorandum Jan '96
- * "A Kernel Model for Precision Timekeeping" by Dave Mills
- * 1998-09-05 (Various)
- * More robust do_fast_gettimeoffset() algorithm implemented
- * (works with APM, Cyrix 6x86MX and Centaur C6),
- * monotonic gettimeofday() with fast_get_timeoffset(),
- * drift-proof precision TSC calibration on boot
- * (C. Scott Ananian <cananian@alumni.princeton.edu>, Andrew D.
- * Balsa <andrebalsa@altern.org>, Philip Gladstone <philip@raptor.com>;
- * ported from 2.0.35 Jumbo-9 by Michael Krause <m.krause@tu-harburg.de>).
- * 1998-12-16 Andrea Arcangeli
- * Fixed Jumbo-9 code in 2.1.131: do_gettimeofday was missing 1 jiffy
- * because was not accounting lost_ticks.
- * 1998-12-24 Copyright (C) 1998 Andrea Arcangeli
- * Fixed a xtime SMP race (we need the xtime_lock rw spinlock to
- * serialize accesses to xtime/lost_ticks).
*/
#include <linux/init.h>
@@ -39,6 +20,7 @@
#include <linux/clocksource.h>
#include <linux/sysdev.h>
+#include <asm/vsyscall.h>
#include <asm/delay.h>
#include <asm/time.h>
#include <asm/timer.h>
@@ -52,7 +34,6 @@ DEFINE_SPINLOCK(i8253_lock);
EXPORT_SYMBOL(i8253_lock);
#ifdef CONFIG_X86_64
-#include <asm/vsyscall.h>
volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES;
#endif
@@ -415,38 +396,33 @@ unsigned long profile_pc(struct pt_regs
{
unsigned long pc = instruction_pointer(regs);
-#if defined(CONFIG_SMP) || defined(__x86_64__)
if (!user_mode_vm(regs) && in_lock_functions(pc)) {
-# ifdef CONFIG_FRAME_POINTER
+#ifdef CONFIG_FRAME_POINTER
return *(unsigned long *)(regs->bp + sizeof(long));
-# else
-# ifdef __i386__
- unsigned long *sp = (unsigned long *)&regs->sp;
-# else
- unsigned long *sp = (unsigned long *)regs->sp;
-# endif
-
- /* Return address is either directly at stack pointer
- or above a saved flags. Eflags has bits 22-31 zero,
- kernel addresses don't. */
+#else
+ unsigned long *sp =
+ (unsigned long *)kernel_stack_pointer(regs);
+
+ /*
+ * Return address is either directly at stack pointer
+ * or above a saved flags. Eflags has bits 22-31 zero,
+ * kernel addresses don't.
+ */
if (sp[0] >> 22)
return sp[0];
if (sp[1] >> 22)
return sp[1];
-# endif
- }
#endif
+ }
return pc;
}
EXPORT_SYMBOL(profile_pc);
/*
- * This is the same as the above, except we _also_ save the current
- * Time Stamp Counter value at the time of the timer interrupt, so that
- * we later on can estimate the time of day more exactly.
+ * Default timer interrupt handler
*/
-irqreturn_t timer_interrupt(int irq, void *dev_id)
+static irqreturn_t timer_interrupt(int irq, void *dev_id)
{
s64 delta, delta_cpu, stolen, blocked;
unsigned int i, cpu = smp_processor_id();
@@ -568,8 +544,7 @@ irqreturn_t timer_interrupt(int irq, voi
/* Local timer processing (see update_process_times()). */
run_local_timers();
- if (rcu_pending(cpu))
- rcu_check_callbacks(cpu, user_mode_vm(get_irq_regs()));
+ rcu_check_callbacks(cpu, user_mode_vm(get_irq_regs()));
printk_tick();
scheduler_tick();
run_posix_cpu_timers(current);
@@ -669,7 +644,7 @@ static void init_missing_ticks_accountin
runstate->time[RUNSTATE_offline];
}
-unsigned long xen_read_persistent_clock(void)
+void xen_read_persistent_clock(struct timespec *ts)
{
const shared_info_t *s = HYPERVISOR_shared_info;
u32 version, sec, nsec;
@@ -686,7 +661,8 @@ unsigned long xen_read_persistent_clock(
delta = local_clock() + (u64)sec * NSEC_PER_SEC + nsec;
do_div(delta, NSEC_PER_SEC);
- return delta;
+ ts->tv_sec = delta;
+ ts->tv_nsec = 0;
}
int xen_update_persistent_clock(void)
--- head-2010-05-25.orig/arch/x86/kernel/traps-xen.c 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/arch/x86/kernel/traps-xen.c 2010-03-24 15:32:27.000000000 +0100
@@ -14,7 +14,6 @@
#include <linux/spinlock.h>
#include <linux/kprobes.h>
#include <linux/uaccess.h>
-#include <linux/utsname.h>
#include <linux/kdebug.h>
#include <linux/kernel.h>
#include <linux/module.h>
@@ -59,12 +58,12 @@
#include <asm/mach_traps.h>
#ifdef CONFIG_X86_64
+#include <asm/x86_init.h>
#include <asm/pgalloc.h>
#include <asm/proto.h>
#else
#include <asm/processor-flags.h>
#include <asm/setup.h>
-#include <asm/traps.h>
asmlinkage int system_call(void);
@@ -74,11 +73,9 @@ char ignore_fpu_irq;
#ifndef CONFIG_X86_NO_IDT
/*
* The IDT has to be page-aligned to simplify the Pentium
- * F0 0F bug workaround.. We have a special link segment
- * for this.
+ * F0 0F bug workaround.
*/
-gate_desc idt_table[256]
- __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, };
+gate_desc idt_table[NR_VECTORS] __page_aligned_data = { { { { 0, 0 } } }, };
#endif
#endif
@@ -780,27 +777,6 @@ do_spurious_interrupt_bug(struct pt_regs
#endif
}
-#ifdef CONFIG_X86_32
-unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp)
-{
- struct desc_struct *gdt = get_cpu_gdt_table(smp_processor_id());
- unsigned long base = (kesp - uesp) & -THREAD_SIZE;
- unsigned long new_kesp = kesp - base;
- unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT;
- __u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS];
-
- /* Set up base for espfix segment */
- desc &= 0x00f0ff0000000000ULL;
- desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) |
- ((((__u64)base) << 32) & 0xff00000000000000ULL) |
- ((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) |
- (lim_pages & 0xffff);
- *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc;
-
- return new_kesp;
-}
-#endif
-
asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
{
}
@@ -811,6 +787,28 @@ asmlinkage void __attribute__((weak)) sm
#endif /* CONFIG_XEN */
/*
+ * __math_state_restore assumes that cr0.TS is already clear and the
+ * fpu state is all ready for use. Used during context switch.
+ */
+void __math_state_restore(void)
+{
+ struct thread_info *thread = current_thread_info();
+ struct task_struct *tsk = thread->task;
+
+ /*
+ * Paranoid restore. send a SIGSEGV if we fail to restore the state.
+ */
+ if (unlikely(restore_fpu_checking(tsk))) {
+ stts();
+ force_sig(SIGSEGV, tsk);
+ return;
+ }
+
+ thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
+ tsk->fpu_counter++;
+}
+
+/*
* 'math_state_restore()' saves the current math information in the
* old math state array, and gets the new ones from the current task
*
@@ -841,17 +839,7 @@ asmlinkage void math_state_restore(void)
}
/* NB. 'clts' is done for us by Xen during virtual trap. */
- /*
- * Paranoid restore. send a SIGSEGV if we fail to restore the state.
- */
- if (unlikely(restore_fpu_checking(tsk))) {
- stts();
- force_sig(SIGSEGV, tsk);
- return;
- }
-
- thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
- tsk->fpu_counter++;
+ __math_state_restore();
}
EXPORT_SYMBOL_GPL(math_state_restore);
@@ -967,6 +955,8 @@ void __init trap_init(void)
* Should be a barrier for any external CPU state:
*/
cpu_init();
+
+ x86_init.irqs.trap_init();
}
void __cpuinit smp_trap_init(trap_info_t *trap_ctxt)
--- head-2010-05-25.orig/arch/x86/kernel/vsyscall_64-xen.c 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/arch/x86/kernel/vsyscall_64-xen.c 2010-03-24 15:32:27.000000000 +0100
@@ -87,6 +87,7 @@ void update_vsyscall(struct timespec *wa
vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic;
+ vsyscall_gtod_data.wall_time_coarse = __current_kernel_time();
write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
}
@@ -227,19 +228,11 @@ static long __vsyscall(3) venosys_1(void
}
#ifdef CONFIG_SYSCTL
-
-static int
-vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- return proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
-}
-
static ctl_table kernel_table2[] = {
{ .procname = "vsyscall64",
.data = &vsyscall_gtod_data.sysctl_enabled, .maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = vsyscall_sysctl_change },
+ .proc_handler = proc_dointvec },
{}
};
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ head-2010-05-25/arch/x86/kernel/x86_init-xen.c 2010-03-24 15:32:27.000000000 +0100
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2009 Thomas Gleixner <tglx@linutronix.de>
+ *
+ * For licencing details see kernel-base/COPYING
+ */
+#include <linux/bitmap.h>
+#include <linux/init.h>
+#include <linux/threads.h>
+
+#include <asm/mpspec.h>
+#include <asm/setup.h>
+#include <asm/apic.h>
+#include <asm/e820.h>
+#include <asm/time.h>
+#include <asm/irq.h>
+
+void __cpuinit x86_init_noop(void) { }
+void __init x86_init_uint_noop(unsigned int unused) { }
+void __init x86_init_pgd_noop(pgd_t *unused) { }
+
+/*
+ * The platform setup functions are preset with the default functions
+ * for standard PC hardware.
+ */
+struct x86_init_ops x86_init __initdata = {
+
+ .resources = {
+ .probe_roms = x86_init_noop,
+ .reserve_resources = reserve_standard_io_resources,
+ .memory_setup = default_machine_specific_memory_setup,
+ },
+
+ .mpparse = {
+ .mpc_record = x86_init_uint_noop,
+ .setup_ioapic_ids = x86_init_noop,
+ .mpc_apic_id = NULL,
+ .smp_read_mpc_oem = default_smp_read_mpc_oem,
+ .mpc_oem_bus_info = default_mpc_oem_bus_info,
+ .find_smp_config = default_find_smp_config,
+ .get_smp_config = default_get_smp_config,
+ },
+
+ .irqs = {
+ .pre_vector_init = NULL,
+ .intr_init = NULL,
+ .trap_init = x86_init_noop,
+ },
+
+ .oem = {
+ .arch_setup = xen_arch_setup,
+ .banner = x86_init_noop,
+ },
+
+ .paging = {
+ .pagetable_setup_start = x86_init_pgd_noop,
+ .pagetable_setup_done = x86_init_pgd_noop,
+ },
+
+ .timers = {
+ .setup_percpu_clockev = NULL,
+ .tsc_pre_init = x86_init_noop,
+ .timer_init = x86_init_noop,
+ },
+};
+
+struct x86_platform_ops x86_platform = {
+ .calibrate_tsc = NULL,
+ .get_wallclock = mach_get_cmos_time,
+ .set_wallclock = mach_set_rtc_mmss,
+};
--- head-2010-05-25.orig/arch/x86/mm/fault-xen.c 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/arch/x86/mm/fault-xen.c 2010-03-24 15:32:27.000000000 +0100
@@ -10,7 +10,7 @@
#include <linux/bootmem.h> /* max_low_pfn */
#include <linux/kprobes.h> /* __kprobes, ... */
#include <linux/mmiotrace.h> /* kmmio_handler, ... */
-#include <linux/perf_counter.h> /* perf_swcounter_event */
+#include <linux/perf_event.h> /* perf_sw_event */
#include <asm/traps.h> /* dotraplinkage, ... */
#include <asm/pgalloc.h> /* pgd_*(), ... */
@@ -167,6 +167,7 @@ force_sig_info_fault(int si_signo, int s
info.si_errno = 0;
info.si_code = si_code;
info.si_addr = (void __user *)address;
+ info.si_addr_lsb = si_code == BUS_MCEERR_AR ? PAGE_SHIFT : 0;
force_sig_info(si_signo, &info, tsk);
}
@@ -293,27 +294,25 @@ check_v8086_mode(struct pt_regs *regs, u
tsk->thread.screen_bitmap |= 1 << bit;
}
-static void dump_pagetable(unsigned long address)
+static bool low_pfn(unsigned long pfn)
{
- __typeof__(pte_val(__pte(0))) page;
+ return pfn < max_low_pfn;
+}
- page = read_cr3();
- page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT];
+static void dump_pagetable(unsigned long address)
+{
+ pgd_t *base = __va(read_cr3());
+ pgd_t *pgd = &base[pgd_index(address)];
+ pmd_t *pmd;
+ pte_t *pte;
#ifdef CONFIG_X86_PAE
- printk("*pdpt = %016Lx ", page);
- if ((page & _PAGE_PRESENT)
- && mfn_to_local_pfn(page >> PAGE_SHIFT) < max_low_pfn) {
- page = mfn_to_pfn(page >> PAGE_SHIFT);
- page <<= PAGE_SHIFT;
- page = ((__typeof__(page) *) __va(page))[(address >> PMD_SHIFT)
- & (PTRS_PER_PMD - 1)];
- printk(KERN_CONT "*pde = %016Lx ", page);
- page &= ~_PAGE_NX;
- }
-#else
- printk("*pde = %08lx ", page);
+ printk("*pdpt = %016Lx ", __pgd_val(*pgd));
+ if (!low_pfn(pgd_val(*pgd) >> PAGE_SHIFT) || !pgd_present(*pgd))
+ goto out;
#endif
+ pmd = pmd_offset(pud_offset(pgd, address), address);
+ printk(KERN_CONT "*pde = %0*Lx ", sizeof(*pmd) * 2, (u64)__pmd_val(*pmd));
/*
* We must not directly access the pte in the highpte
@@ -321,17 +320,12 @@ static void dump_pagetable(unsigned long
* And let's rather not kmap-atomic the pte, just in case
* it's allocated already:
*/
- if ((page & _PAGE_PRESENT)
- && mfn_to_local_pfn(page >> PAGE_SHIFT) < max_low_pfn
- && !(page & _PAGE_PSE)) {
-
- page = mfn_to_pfn(page >> PAGE_SHIFT);
- page <<= PAGE_SHIFT;
- page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT)
- & (PTRS_PER_PTE - 1)];
- printk(KERN_CONT "*pte = %0*Lx ", sizeof(page)*2, (u64)page);
- }
+ if (!low_pfn(pmd_pfn(*pmd)) || !pmd_present(*pmd) || pmd_large(*pmd))
+ goto out;
+ pte = pte_offset_kernel(pmd, address);
+ printk(KERN_CONT "*pte = %0*Lx ", sizeof(*pte) * 2, (u64)__pte_val(*pte));
+out:
printk(KERN_CONT "\n");
}
@@ -460,16 +454,12 @@ static int bad_address(void *p)
static void dump_pagetable(unsigned long address)
{
- pgd_t *pgd;
+ pgd_t *base = __va(read_cr3() & PHYSICAL_PAGE_MASK);
+ pgd_t *pgd = base + pgd_index(address);
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
- pgd = (pgd_t *)read_cr3();
-
- pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK);
-
- pgd += pgd_index(address);
if (bad_address(pgd))
goto bad;
@@ -809,10 +799,12 @@ out_of_memory(struct pt_regs *regs, unsi
}
static void
-do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address)
+do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
+ unsigned int fault)
{
struct task_struct *tsk = current;
struct mm_struct *mm = tsk->mm;
+ int code = BUS_ADRERR;
up_read(&mm->mmap_sem);
@@ -828,7 +820,15 @@ do_sigbus(struct pt_regs *regs, unsigned
tsk->thread.error_code = error_code;
tsk->thread.trap_no = 14;
- force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
+#ifdef CONFIG_MEMORY_FAILURE
+ if (fault & VM_FAULT_HWPOISON) {
+ printk(KERN_ERR
+ "MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n",
+ tsk->comm, tsk->pid, address);
+ code = BUS_MCEERR_AR;
+ }
+#endif
+ force_sig_info_fault(SIGBUS, code, address, tsk);
}
static noinline void
@@ -838,8 +838,8 @@ mm_fault_error(struct pt_regs *regs, uns
if (fault & VM_FAULT_OOM) {
out_of_memory(regs, error_code, address);
} else {
- if (fault & VM_FAULT_SIGBUS)
- do_sigbus(regs, error_code, address);
+ if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON))
+ do_sigbus(regs, error_code, address, fault);
else
BUG();
}
@@ -1053,7 +1053,7 @@ do_page_fault(struct pt_regs *regs, unsi
if (unlikely(error_code & PF_RSVD))
pgtable_bad(regs, error_code, address);
- perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address);
/*
* If we're in an interrupt, have no user context or are running
@@ -1150,11 +1150,11 @@ good_area:
if (fault & VM_FAULT_MAJOR) {
tsk->maj_flt++;
- perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0,
regs, address);
} else {
tsk->min_flt++;
- perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
regs, address);
}
--- head-2010-05-25.orig/arch/x86/mm/highmem_32-xen.c 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/arch/x86/mm/highmem_32-xen.c 2010-03-24 15:32:27.000000000 +0100
@@ -24,7 +24,7 @@ void kunmap(struct page *page)
* no global lock is needed and because the kmap code must perform a global TLB
* invalidation when the kmap pool wraps.
*
- * However when holding an atomic kmap is is not legal to sleep, so atomic
+ * However when holding an atomic kmap it is not legal to sleep, so atomic
* kmaps are appropriate for short, tight code paths only.
*/
void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot)
@@ -149,9 +149,7 @@ EXPORT_SYMBOL(kunmap);
EXPORT_SYMBOL(kmap_atomic);
EXPORT_SYMBOL(kunmap_atomic);
EXPORT_SYMBOL(kmap_atomic_prot);
-#ifdef CONFIG_HIGHPTE
EXPORT_SYMBOL(kmap_atomic_to_page);
-#endif
EXPORT_SYMBOL(clear_highpage);
EXPORT_SYMBOL(copy_highpage);
--- head-2010-05-25.orig/arch/x86/mm/init-xen.c 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/arch/x86/mm/init-xen.c 2010-03-24 15:32:27.000000000 +0100
@@ -36,69 +36,6 @@ extern unsigned long extend_init_mapping
extern void xen_finish_init_mapping(void);
#endif
-int nx_enabled;
-
-#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
-static int disable_nx __cpuinitdata;
-
-/*
- * noexec = on|off
- *
- * Control non-executable mappings for processes.
- *
- * on Enable
- * off Disable
- */
-static int __init noexec_setup(char *str)
-{
- if (!str)
- return -EINVAL;
- if (!strncmp(str, "on", 2)) {
- __supported_pte_mask |= _PAGE_NX;
- disable_nx = 0;
- } else if (!strncmp(str, "off", 3)) {
- disable_nx = 1;
- __supported_pte_mask &= ~_PAGE_NX;
- }
- return 0;
-}
-early_param("noexec", noexec_setup);
-#endif
-
-#ifdef CONFIG_X86_PAE
-static void __init set_nx(void)
-{
- unsigned int v[4], l, h;
-
- if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) {
- cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]);
-
- if ((v[3] & (1 << 20)) && !disable_nx) {
- rdmsr(MSR_EFER, l, h);
- l |= EFER_NX;
- wrmsr(MSR_EFER, l, h);
- nx_enabled = 1;
- __supported_pte_mask |= _PAGE_NX;
- }
- }
-}
-#else
-static inline void set_nx(void)
-{
-}
-#endif
-
-#ifdef CONFIG_X86_64
-void __cpuinit check_efer(void)
-{
- unsigned long efer;
-
- rdmsrl(MSR_EFER, efer);
- if (!(efer & EFER_NX) || disable_nx)
- __supported_pte_mask &= ~_PAGE_NX;
-}
-#endif
-
static void __init find_early_table_space(unsigned long end, int use_pse,
int use_gbpages)
{
--- head-2010-05-25.orig/arch/x86/mm/init_32-xen.c 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/arch/x86/mm/init_32-xen.c 2010-03-24 15:32:27.000000000 +0100
@@ -87,7 +87,7 @@ static pmd_t * __init one_md_table_init(
#ifdef CONFIG_X86_PAE
if (!(__pgd_val(*pgd) & _PAGE_PRESENT)) {
if (after_bootmem)
- pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE);
+ pmd_table = (pmd_t *)alloc_bootmem_pages(PAGE_SIZE);
else
pmd_table = (pmd_t *)alloc_low_page();
paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT);
@@ -124,7 +124,7 @@ static pte_t * __init one_page_table_ini
#endif
if (!page_table)
page_table =
- (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE);
+ (pte_t *)alloc_bootmem_pages(PAGE_SIZE);
} else
page_table = (pte_t *)alloc_low_page();
@@ -914,8 +914,6 @@ static void __init test_wp_bit(void)
}
}
-static struct kcore_list kcore_mem, kcore_vmalloc;
-
void __init mem_init(void)
{
int codesize, reservedpages, datasize, initsize;
@@ -949,13 +947,9 @@ void __init mem_init(void)
datasize = (unsigned long) &_edata - (unsigned long) &_etext;
initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
- kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
- kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
- VMALLOC_END-VMALLOC_START);
-
printk(KERN_INFO "Memory: %luk/%luk available (%dk kernel code, "
"%dk reserved, %dk data, %dk init, %ldk highmem)\n",
- (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
+ nr_free_pages() << (PAGE_SHIFT-10),
num_physpages << (PAGE_SHIFT-10),
codesize >> 10,
reservedpages << (PAGE_SHIFT-10),
--- head-2010-05-25.orig/arch/x86/mm/init_64-xen.c 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/arch/x86/mm/init_64-xen.c 2010-03-24 15:32:27.000000000 +0100
@@ -894,8 +894,7 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to
#endif /* CONFIG_MEMORY_HOTPLUG */
-static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel,
- kcore_modules, kcore_vsyscall;
+static struct kcore_list kcore_vsyscall;
void __init mem_init(void)
{
@@ -931,17 +930,12 @@ void __init mem_init(void)
initsize = (unsigned long) &__init_end - (unsigned long) &__init_begin;
/* Register memory areas for /proc/kcore */
- kclist_add(&kcore_mem, __va(0), max_low_pfn << PAGE_SHIFT);
- kclist_add(&kcore_vmalloc, (void *)VMALLOC_START,
- VMALLOC_END-VMALLOC_START);
- kclist_add(&kcore_kernel, &_stext, _end - _stext);
- kclist_add(&kcore_modules, (void *)MODULES_VADDR, MODULES_LEN);
kclist_add(&kcore_vsyscall, (void *)VSYSCALL_START,
- VSYSCALL_END - VSYSCALL_START);
+ VSYSCALL_END - VSYSCALL_START, KCORE_OTHER);
printk(KERN_INFO "Memory: %luk/%luk available (%ldk kernel code, "
"%ldk absent, %ldk reserved, %ldk data, %ldk init)\n",
- (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
+ nr_free_pages() << (PAGE_SHIFT-10),
max_pfn << (PAGE_SHIFT-10),
codesize >> 10,
absent_pages << (PAGE_SHIFT-10),
--- head-2010-05-25.orig/arch/x86/mm/iomap_32-xen.c 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/arch/x86/mm/iomap_32-xen.c 2010-03-24 15:32:27.000000000 +0100
@@ -22,7 +22,7 @@
#include <linux/module.h>
#include <linux/highmem.h>
-int is_io_mapping_possible(resource_size_t base, unsigned long size)
+static int is_io_mapping_possible(resource_size_t base, unsigned long size)
{
#if !defined(CONFIG_X86_PAE) && defined(CONFIG_PHYS_ADDR_T_64BIT)
/* There is no way to map greater than 1 << 32 address without PAE */
@@ -31,7 +31,30 @@ int is_io_mapping_possible(resource_size
#endif
return 1;
}
-EXPORT_SYMBOL_GPL(is_io_mapping_possible);
+
+int iomap_create_wc(resource_size_t base, unsigned long size, pgprot_t *prot)
+{
+ unsigned long flag = _PAGE_CACHE_WC;
+ int ret;
+
+ if (!is_io_mapping_possible(base, size))
+ return -EINVAL;
+
+ ret = io_reserve_memtype(base, base + size, &flag);
+ if (ret)
+ return ret;
+
+ *prot = __pgprot(__PAGE_KERNEL | flag);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(iomap_create_wc);
+
+void
+iomap_free(resource_size_t base, unsigned long size)
+{
+ io_free_memtype(base, base + size);
+}
+EXPORT_SYMBOL_GPL(iomap_free);
void *kmap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
{
--- head-2010-05-25.orig/arch/x86/mm/ioremap-xen.c 2010-03-24 15:25:06.000000000 +0100
+++ head-2010-05-25/arch/x86/mm/ioremap-xen.c 2010-03-24 15:32:27.000000000 +0100
@@ -23,81 +23,7 @@
#include <asm/pgalloc.h>
#include <asm/pat.h>
-static inline int phys_addr_valid(resource_size_t addr)
-{
-#ifdef CONFIG_PHYS_ADDR_T_64BIT
- return !(addr >> boot_cpu_data.x86_phys_bits);
-#else
- return 1;
-#endif
-}
-
-#ifdef CONFIG_X86_64
-
-#define phys_base 0
-
-unsigned long __phys_addr(unsigned long x)
-{
- if (x >= __START_KERNEL_map) {
- x -= __START_KERNEL_map;
- VIRTUAL_BUG_ON(x >= KERNEL_IMAGE_SIZE);
- x += phys_base;
- } else {
- VIRTUAL_BUG_ON(x < PAGE_OFFSET);
- x -= PAGE_OFFSET;
- VIRTUAL_BUG_ON(!phys_addr_valid(x));
- }
- return x;
-}
-EXPORT_SYMBOL(__phys_addr);
-
-bool __virt_addr_valid(unsigned long x)
-{
- if (x >= __START_KERNEL_map) {
- x -= __START_KERNEL_map;
- if (x >= KERNEL_IMAGE_SIZE)
- return false;
- x += phys_base;
- } else {
- if (x < PAGE_OFFSET)
- return false;
- x -= PAGE_OFFSET;
- if (!phys_addr_valid(x))
- return false;
- }
-
- return pfn_valid(x >> PAGE_SHIFT);
-}
-EXPORT_SYMBOL(__virt_addr_valid);
-
-#undef phys_base
-
-#else
-
-#ifdef CONFIG_DEBUG_VIRTUAL
-unsigned long __phys_addr(unsigned long x)
-{
- /* VMALLOC_* aren't constants */
- VIRTUAL_BUG_ON(x < PAGE_OFFSET);
- VIRTUAL_BUG_ON(__vmalloc_start_set && is_vmalloc_addr((void *) x));
- return x - PAGE_OFFSET;
-}
-EXPORT_SYMBOL(__phys_addr);
-#endif
-
-bool __virt_addr_valid(unsigned long x)
-{
- if (x < PAGE_OFFSET)
- return false;
- if (__vmalloc_start_set && is_vmalloc_addr((void *) x))
- return false;
- if (x >= FIXADDR_START)
- return false;
- return pfn_valid((x - PAGE_OFFSET) >> PAGE_SHIFT);
-}
-EXPORT_SYMBOL(__virt_addr_valid);
-
-#endif
+#include "physaddr.h"
static int direct_remap_area_pte_fn(pte_t *pte,
struct page *pmd_page,
@@ -407,30 +333,19 @@ static void __iomem *__ioremap_caller(re
retval = reserve_memtype(phys_addr, (u64)phys_addr + size,
prot_val, &new_prot_val);
if (retval) {
- pr_debug("Warning: reserve_memtype returned %d\n", retval);
+ printk(KERN_ERR "ioremap reserve_memtype failed %d\n", retval);
return NULL;
}
if (prot_val != new_prot_val) {
- /*
- * Do not fallback to certain memory types with certain
- * requested type:
- * - request is uc-, return cannot be write-back
- * - request is uc-, return cannot be write-combine
- * - request is write-combine, return cannot be write-back
- */
- if ((prot_val == _PAGE_CACHE_UC_MINUS &&
- (new_prot_val == _PAGE_CACHE_WB ||
- new_prot_val == _PAGE_CACHE_WC)) ||
- (prot_val == _PAGE_CACHE_WC &&
- new_prot_val == _PAGE_CACHE_WB)) {
- pr_debug(
+ if (!is_new_memtype_allowed(phys_addr, size,
+ prot_val, new_prot_val)) {
+ printk(KERN_ERR
"ioremap error for 0x%llx-0x%llx, requested 0x%lx, got 0x%lx\n",
(unsigned long long)phys_addr,
(unsigned long long)(phys_addr + size),
prot_val, new_prot_val);
- free_memtype(phys_addr, phys_addr + size);
- return NULL;
+ goto err_free_memtype;
}
prot_val = new_prot_val;
}
@@ -456,27 +371,26 @@ static void __iomem *__ioremap_caller(re
*/
area = get_vm_area_caller(size, VM_IOREMAP, caller);
if (!area)
- return NULL;
+ goto err_free_memtype;
area->phys_addr = phys_addr;
vaddr = (unsigned long) area->addr;
- if (kernel_map_sync_memtype(phys_addr, size, prot_val)) {
- free_memtype(phys_addr, phys_addr + size);
- free_vm_area(area);
- return NULL;
- }
+ if (kernel_map_sync_memtype(phys_addr, size, prot_val))
+ goto err_free_area;
if (__direct_remap_pfn_range(&init_mm, vaddr, PFN_DOWN(phys_addr),
- size, prot, domid)) {
- free_memtype(phys_addr, phys_addr + size);
- free_vm_area(area);
- return NULL;
- }
+ size, prot, domid))
+ goto err_free_area;
ret_addr = (void __iomem *) (vaddr + offset);
mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr);
return ret_addr;
+err_free_area:
+ free_vm_area(area);
+err_free_memtype:
+ free_memtype(phys_addr, phys_addr + size);
+ return NULL;
}
/**
--- head-2010-05-25.orig/arch/x86/mm/pageattr-xen.c 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/arch/x86/mm/pageattr-xen.c 2010-03-24 15:32:27.000000000 +0100
@@ -12,6 +12,7 @@
#include <linux/seq_file.h>
#include <linux/debugfs.h>
#include <linux/pfn.h>
+#include <linux/percpu.h>
#include <asm/e820.h>
#include <asm/processor.h>
@@ -143,6 +144,7 @@ void clflush_cache_range(void *vaddr, un
mb();
}
+EXPORT_SYMBOL_GPL(clflush_cache_range);
static void __cpa_flush_all(void *arg)
{
@@ -707,7 +709,7 @@ static int cpa_process_alias(struct cpa_
{
struct cpa_data alias_cpa;
unsigned long laddr = (unsigned long)__va(cpa->pfn << PAGE_SHIFT);
- unsigned long vaddr, remapped;
+ unsigned long vaddr;
int ret;
if (cpa->pfn >= max_pfn_mapped)
@@ -765,24 +767,6 @@ static int cpa_process_alias(struct cpa_
}
#endif
- /*
- * If the PMD page was partially used for per-cpu remapping,
- * the recycled area needs to be split and modified. Because
- * the area is always proper subset of a PMD page
- * cpa->numpages is guaranteed to be 1 for these areas, so
- * there's no need to loop over and check for further remaps.
- */
- remapped = (unsigned long)pcpu_lpage_remapped((void *)laddr);
- if (remapped) {
- WARN_ON(cpa->numpages > 1);
- alias_cpa = *cpa;
- alias_cpa.vaddr = &remapped;
- alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
- ret = __change_page_attr_set_clr(&alias_cpa, 0);
- if (ret)
- return ret;
- }
-
return 0;
}
@@ -843,6 +827,7 @@ static int change_page_attr_set_clr(unsi
{
struct cpa_data cpa;
int ret, cache, checkalias;
+ unsigned long baddr = 0;
/*
* Check, if we are requested to change a not supported
@@ -874,6 +859,11 @@ static int change_page_attr_set_clr(unsi
*/
WARN_ON_ONCE(1);
}
+ /*
+ * Save address for cache flush. *addr is modified in the call
+ * to __change_page_attr_set_clr() below.
+ */
+ baddr = *addr;
}
/* Must avoid aliasing mappings in the highmem code */
@@ -921,7 +911,7 @@ static int change_page_attr_set_clr(unsi
cpa_flush_array(addr, numpages, cache,
cpa.flags, pages);
} else
- cpa_flush_range(*addr, numpages, cache);
+ cpa_flush_range(baddr, numpages, cache);
} else
cpa_flush_all(cache);
--- head-2010-05-25.orig/arch/x86/mm/pat-xen.c 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/arch/x86/mm/pat-xen.c 2010-03-24 15:32:27.000000000 +0100
@@ -15,6 +15,7 @@
#include <linux/gfp.h>
#include <linux/mm.h>
#include <linux/fs.h>
+#include <linux/rbtree.h>
#include <asm/cacheflush.h>
#include <asm/processor.h>
@@ -80,6 +81,7 @@ enum {
void pat_init(void)
{
u64 pat;
+ bool boot_cpu = !boot_pat_state;
if (!pat_enabled)
return;
@@ -131,8 +133,10 @@ void pat_init(void)
if (!boot_pat_state)
boot_pat_state = pat;
#endif
- printk(KERN_INFO "x86 PAT enabled: cpu %d, old 0x%Lx, new 0x%Lx\n",
- smp_processor_id(), boot_pat_state, pat);
+
+ if (boot_cpu)
+ printk(KERN_INFO "x86 PAT enabled: cpu %d, old 0x%Lx, new 0x%Lx\n",
+ smp_processor_id(), boot_pat_state, pat);
}
#undef PAT
@@ -160,11 +164,10 @@ static char *cattr_name(unsigned long fl
* areas). All the aliases have the same cache attributes of course.
* Zero attributes are represented as holes.
*
- * Currently the data structure is a list because the number of mappings
- * are expected to be relatively small. If this should be a problem
- * it could be changed to a rbtree or similar.
+ * The data structure is a list that is also organized as an rbtree
+ * sorted on the start address of memtype range.
*
- * memtype_lock protects the whole list.
+ * memtype_lock protects both the linear list and rbtree.
*/
struct memtype {
@@ -172,11 +175,53 @@ struct memtype {
u64 end;
unsigned long type;
struct list_head nd;
+ struct rb_node rb;
};
+static struct rb_root memtype_rbroot = RB_ROOT;
static LIST_HEAD(memtype_list);
static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */
+static struct memtype *memtype_rb_search(struct rb_root *root, u64 start)
+{
+ struct rb_node *node = root->rb_node;
+ struct memtype *last_lower = NULL;
+
+ while (node) {
+ struct memtype *data = container_of(node, struct memtype, rb);
+
+ if (data->start < start) {
+ last_lower = data;
+ node = node->rb_right;
+ } else if (data->start > start) {
+ node = node->rb_left;
+ } else
+ return data;
+ }
+
+ /* Will return NULL if there is no entry with its start <= start */
+ return last_lower;
+}
+
+static void memtype_rb_insert(struct rb_root *root, struct memtype *data)
+{
+ struct rb_node **new = &(root->rb_node);
+ struct rb_node *parent = NULL;
+
+ while (*new) {
+ struct memtype *this = container_of(*new, struct memtype, rb);
+
+ parent = *new;
+ if (data->start <= this->start)
+ new = &((*new)->rb_left);
+ else if (data->start > this->start)
+ new = &((*new)->rb_right);
+ }
+
+ rb_link_node(&data->rb, parent, new);
+ rb_insert_color(&data->rb, root);
+}
+
static int pat_pagerange_is_ram(resource_size_t start, resource_size_t end);
static inline u8 _mtrr_type_lookup(u64 start, u64 end)
{
@@ -240,9 +285,6 @@ chk_conflict(struct memtype *new, struct
return -EBUSY;
}
-static struct memtype *cached_entry;
-static u64 cached_start;
-
static int pat_pagerange_is_ram(resource_size_t start, resource_size_t end)
{
int ram_page = 0, not_rampage = 0;
@@ -271,69 +313,65 @@ static int pat_pagerange_is_ram(resource
}
/*
- * For RAM pages, mark the pages as non WB memory type using
- * PageNonWB (PG_arch_1). We allow only one set_memory_uc() or
- * set_memory_wc() on a RAM page at a time before marking it as WB again.
- * This is ok, because only one driver will be owning the page and
- * doing set_memory_*() calls.
- *
- * For now, we use PageNonWB to track that the RAM page is being mapped
- * as non WB. In future, we will have to use one more flag
- * (or some other mechanism in page_struct) to distinguish between
- * UC and WC mapping.
+ * For RAM pages, we use page flags to mark the pages with appropriate type.
+ * Here we do two pass:
+ * - Find the memtype of all the pages in the range, look for any conflicts
+ * - In case of no conflicts, set the new memtype for pages in the range
+ *
+ * Caller must hold memtype_lock for atomicity.
*/
static int reserve_ram_pages_type(u64 start, u64 end, unsigned long req_type,
unsigned long *new_type)
{
struct page *page;
- unsigned long mfn, end_mfn;
+ unsigned long mfn;
+
+ if (req_type == _PAGE_CACHE_UC) {
+ /* We do not support strong UC */
+ WARN_ON_ONCE(1);
+ req_type = _PAGE_CACHE_UC_MINUS;
+ }
for (mfn = (start >> PAGE_SHIFT); mfn < (end >> PAGE_SHIFT); ++mfn) {
- unsigned long pfn = mfn_to_local_pfn(mfn);
+ unsigned long type, pfn = mfn_to_local_pfn(mfn);
BUG_ON(!pfn_valid(pfn));
page = pfn_to_page(pfn);
- if (page_mapped(page) || PageNonWB(page))
- goto out;
+ type = get_page_memtype(page);
+ if (type != -1) {
+ printk(KERN_INFO "reserve_ram_pages_type failed "
+ "0x%Lx-0x%Lx, track 0x%lx, req 0x%lx\n",
+ start, end, type, req_type);
+ if (new_type)
+ *new_type = type;
- SetPageNonWB(page);
+ return -EBUSY;
+ }
}
- return 0;
-out:
- end_mfn = mfn;
- for (mfn = (start >> PAGE_SHIFT); mfn < end_mfn; ++mfn) {
+ if (new_type)
+ *new_type = req_type;
+
+ for (mfn = (start >> PAGE_SHIFT); mfn < (end >> PAGE_SHIFT); ++mfn) {
page = pfn_to_page(mfn_to_local_pfn(mfn));
- ClearPageNonWB(page);
+ set_page_memtype(page, req_type);
}
-
- return -EINVAL;
+ return 0;
}
static int free_ram_pages_type(u64 start, u64 end)
{
struct page *page;
- unsigned long mfn, end_mfn;
+ unsigned long mfn;
for (mfn = (start >> PAGE_SHIFT); mfn < (end >> PAGE_SHIFT); ++mfn) {
unsigned long pfn = mfn_to_local_pfn(mfn);
BUG_ON(!pfn_valid(pfn));
page = pfn_to_page(pfn);
- if (page_mapped(page) || !PageNonWB(page))
- goto out;
-
- ClearPageNonWB(page);
+ set_page_memtype(page, -1);
}
return 0;
-
-out:
- end_mfn = mfn;
- for (mfn = (start >> PAGE_SHIFT); mfn < end_mfn; ++mfn) {
- page = pfn_to_page(mfn_to_local_pfn(mfn));
- SetPageNonWB(page);
- }
- return -EINVAL;
}
/*
@@ -367,6 +405,8 @@ int reserve_memtype(u64 start, u64 end,
if (new_type) {
if (req_type == -1)
*new_type = _PAGE_CACHE_WB;
+ else if (req_type == _PAGE_CACHE_WC)
+ *new_type = _PAGE_CACHE_UC_MINUS;
else
*new_type = req_type & _PAGE_CACHE_MASK;
}
@@ -392,11 +432,16 @@ int reserve_memtype(u64 start, u64 end,
*new_type = actual_type;
is_range_ram = pat_pagerange_is_ram(start, end);
- if (is_range_ram == 1)
- return reserve_ram_pages_type(start, end, req_type,
- new_type);
- else if (is_range_ram < 0)
+ if (is_range_ram == 1) {
+
+ spin_lock(&memtype_lock);
+ err = reserve_ram_pages_type(start, end, req_type, new_type);
+ spin_unlock(&memtype_lock);
+
+ return err;
+ } else if (is_range_ram < 0) {
return -EINVAL;
+ }
new = kmalloc(sizeof(struct memtype), GFP_KERNEL);
if (!new)
@@ -408,17 +453,11 @@ int reserve_memtype(u64 start, u64 end,
spin_lock(&memtype_lock);
- if (cached_entry && start >= cached_start)
- entry = cached_entry;
- else
- entry = list_entry(&memtype_list, struct memtype, nd);
-
/* Search for existing mapping that overlaps the current range */
where = NULL;
- list_for_each_entry_continue(entry, &memtype_list, nd) {
+ list_for_each_entry(entry, &memtype_list, nd) {
if (end <= entry->start) {
where = entry->nd.prev;
- cached_entry = list_entry(where, struct memtype, nd);
break;
} else if (start <= entry->start) { /* end > entry->start */
err = chk_conflict(new, entry, new_type);
@@ -426,8 +465,6 @@ int reserve_memtype(u64 start, u64 end,
dprintk("Overlap at 0x%Lx-0x%Lx\n",
entry->start, entry->end);
where = entry->nd.prev;
- cached_entry = list_entry(where,
- struct memtype, nd);
}
break;
} else if (start < entry->end) { /* start > entry->start */
@@ -435,8 +472,6 @@ int reserve_memtype(u64 start, u64 end,
if (!err) {
dprintk("Overlap at 0x%Lx-0x%Lx\n",
entry->start, entry->end);
- cached_entry = list_entry(entry->nd.prev,
- struct memtype, nd);
/*
* Move to right position in the linked
@@ -464,13 +499,13 @@ int reserve_memtype(u64 start, u64 end,
return err;
}
- cached_start = start;
-
if (where)
list_add(&new->nd, where);
else
list_add_tail(&new->nd, &memtype_list);
+ memtype_rb_insert(&memtype_rbroot, new);
+
spin_unlock(&memtype_lock);
dprintk("reserve_memtype added 0x%Lx-0x%Lx, track %s, req %s, ret %s\n",
@@ -482,7 +517,7 @@ int reserve_memtype(u64 start, u64 end,
int free_memtype(u64 start, u64 end)
{
- struct memtype *entry;
+ struct memtype *entry, *saved_entry;
int err = -EINVAL;
int is_range_ram;
@@ -494,23 +529,58 @@ int free_memtype(u64 start, u64 end)
return 0;
is_range_ram = pat_pagerange_is_ram(start, end);
- if (is_range_ram == 1)
- return free_ram_pages_type(start, end);
- else if (is_range_ram < 0)
+ if (is_range_ram == 1) {
+
+ spin_lock(&memtype_lock);
+ err = free_ram_pages_type(start, end);
+ spin_unlock(&memtype_lock);
+
+ return err;
+ } else if (is_range_ram < 0) {
return -EINVAL;
+ }
spin_lock(&memtype_lock);
- list_for_each_entry(entry, &memtype_list, nd) {
+
+ entry = memtype_rb_search(&memtype_rbroot, start);
+ if (unlikely(entry == NULL))
+ goto unlock_ret;
+
+ /*
+ * Saved entry points to an entry with start same or less than what
+ * we searched for. Now go through the list in both directions to look
+ * for the entry that matches with both start and end, with list stored
+ * in sorted start address
+ */
+ saved_entry = entry;
+ list_for_each_entry_from(entry, &memtype_list, nd) {
if (entry->start == start && entry->end == end) {
- if (cached_entry == entry || cached_start == start)
- cached_entry = NULL;
+ rb_erase(&entry->rb, &memtype_rbroot);
+ list_del(&entry->nd);
+ kfree(entry);
+ err = 0;
+ break;
+ } else if (entry->start > start) {
+ break;
+ }
+ }
+
+ if (!err)
+ goto unlock_ret;
+ entry = saved_entry;
+ list_for_each_entry_reverse(entry, &memtype_list, nd) {
+ if (entry->start == start && entry->end == end) {
+ rb_erase(&entry->rb, &memtype_rbroot);
list_del(&entry->nd);
kfree(entry);
err = 0;
break;
+ } else if (entry->start < start) {
+ break;
}
}
+unlock_ret:
spin_unlock(&memtype_lock);
if (err) {
@@ -524,6 +594,103 @@ int free_memtype(u64 start, u64 end)
}
+#ifndef CONFIG_XEN
+/**
+ * lookup_memtype - Looksup the memory type for a physical address
+ * @paddr: physical address of which memory type needs to be looked up
+ *
+ * Only to be called when PAT is enabled
+ *
+ * Returns _PAGE_CACHE_WB, _PAGE_CACHE_WC, _PAGE_CACHE_UC_MINUS or
+ * _PAGE_CACHE_UC
+ */
+static unsigned long lookup_memtype(u64 paddr)
+{
+ int rettype = _PAGE_CACHE_WB;
+ struct memtype *entry;
+
+ if (is_ISA_range(paddr, paddr + PAGE_SIZE - 1))
+ return rettype;
+
+ if (pat_pagerange_is_ram(paddr, paddr + PAGE_SIZE)) {
+ struct page *page;
+ spin_lock(&memtype_lock);
+ page = pfn_to_page(paddr >> PAGE_SHIFT);
+ rettype = get_page_memtype(page);
+ spin_unlock(&memtype_lock);
+ /*
+ * -1 from get_page_memtype() implies RAM page is in its
+ * default state and not reserved, and hence of type WB
+ */
+ if (rettype == -1)
+ rettype = _PAGE_CACHE_WB;
+
+ return rettype;
+ }
+
+ spin_lock(&memtype_lock);
+
+ entry = memtype_rb_search(&memtype_rbroot, paddr);
+ if (entry != NULL)
+ rettype = entry->type;
+ else
+ rettype = _PAGE_CACHE_UC_MINUS;
+
+ spin_unlock(&memtype_lock);
+ return rettype;
+}
+#endif
+
+/**
+ * io_reserve_memtype - Request a memory type mapping for a region of memory
+ * @start: start (physical address) of the region
+ * @end: end (physical address) of the region
+ * @type: A pointer to memtype, with requested type. On success, requested
+ * or any other compatible type that was available for the region is returned
+ *
+ * On success, returns 0
+ * On failure, returns non-zero
+ */
+int io_reserve_memtype(resource_size_t start, resource_size_t end,
+ unsigned long *type)
+{
+ resource_size_t size = end - start;
+ unsigned long req_type = *type;
+ unsigned long new_type;
+ int ret;
+
+ WARN_ON_ONCE(iomem_map_sanity_check(start, size));
+
+ ret = reserve_memtype(start, end, req_type, &new_type);
+ if (ret)
+ goto out_err;
+
+ if (!is_new_memtype_allowed(start, size, req_type, new_type))
+ goto out_free;
+
+ if (kernel_map_sync_memtype(start, size, new_type) < 0)
+ goto out_free;
+
+ *type = new_type;
+ return 0;
+
+out_free:
+ free_memtype(start, end);
+ ret = -EBUSY;
+out_err:
+ return ret;
+}
+
+/**
+ * io_free_memtype - Release a memory type mapping for a region of memory
+ * @start: start (physical address) of the region
+ * @end: end (physical address) of the region
+ */
+void io_free_memtype(resource_size_t start, resource_size_t end)
+{
+ free_memtype(start, end);
+}
+
pgprot_t phys_mem_access_prot(struct file *file, unsigned long mfn,
unsigned long size, pgprot_t vma_prot)
{
@@ -605,9 +772,6 @@ int phys_mem_access_prot_allowed(struct
*/
int kernel_map_sync_memtype(u64 ma, unsigned long size, unsigned long flags)
{
- if (!pat_enabled)
- return 0;
-
return ioremap_check_change_attr(ma >> PAGE_SHIFT, size, flags);
}
@@ -628,11 +792,29 @@ static int reserve_pfn_range(u64 paddr,
is_ram = pat_pagerange_is_ram(paddr, paddr + size);
/*
- * reserve_pfn_range() doesn't support RAM pages. Maintain the current
- * behavior with RAM pages by returning success.
+ * reserve_pfn_range() for RAM pages. We do not refcount to keep
+ * track of number of mappings of RAM pages. We can assert that
+ * the type requested matches the type of first page in the range.
*/
- if (is_ram != 0)
+ if (is_ram) {
+ if (!pat_enabled)
+ return 0;
+
+ flags = lookup_memtype(paddr);
+ if (want_flags != flags) {
+ printk(KERN_WARNING
+ "%s:%d map pfn RAM range req %s for %Lx-%Lx, got %s\n",
+ current->comm, current->pid,
+ cattr_name(want_flags),
+ (unsigned long long)paddr,
+ (unsigned long long)(paddr + size),
+ cattr_name(flags));
+ *vma_prot = __pgprot((pgprot_val(*vma_prot) &
+ (~_PAGE_CACHE_MASK)) |
+ flags);
+ }
return 0;
+ }
ret = reserve_memtype(paddr, paddr + size, want_flags, &flags);
if (ret)
@@ -694,14 +876,6 @@ int track_pfn_vma_copy(struct vm_area_st
unsigned long vma_size = vma->vm_end - vma->vm_start;
pgprot_t pgprot;
- if (!pat_enabled)
- return 0;
-
- /*
- * For now, only handle remap_pfn_range() vmas where
- * is_linear_pfn_mapping() == TRUE. Handling of
- * vm_insert_pfn() is TBD.
- */
if (is_linear_pfn_mapping(vma)) {
/*
* reserve the whole chunk covered by vma. We need the
@@ -729,23 +903,24 @@ int track_pfn_vma_copy(struct vm_area_st
int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot,
unsigned long pfn, unsigned long size)
{
+ unsigned long flags;
resource_size_t paddr;
unsigned long vma_size = vma->vm_end - vma->vm_start;
- if (!pat_enabled)
- return 0;
-
- /*
- * For now, only handle remap_pfn_range() vmas where
- * is_linear_pfn_mapping() == TRUE. Handling of
- * vm_insert_pfn() is TBD.
- */
if (is_linear_pfn_mapping(vma)) {
/* reserve the whole chunk starting from vm_pgoff */
paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
return reserve_pfn_range(paddr, vma_size, prot, 0);
}
+ if (!pat_enabled)
+ return 0;
+
+ /* for vm_insert_pfn and friends, we set prot based on lookup */
+ flags = lookup_memtype(pfn << PAGE_SHIFT);
+ *prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) |
+ flags);
+
return 0;
}
@@ -760,14 +935,6 @@ void untrack_pfn_vma(struct vm_area_stru
resource_size_t paddr;
unsigned long vma_size = vma->vm_end - vma->vm_start;
- if (!pat_enabled)
- return;
-
- /*
- * For now, only handle remap_pfn_range() vmas where
- * is_linear_pfn_mapping() == TRUE. Handling of
- * vm_insert_pfn() is TBD.
- */
if (is_linear_pfn_mapping(vma)) {
/* free the whole chunk starting from vm_pgoff */
paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
@@ -844,7 +1011,7 @@ static int memtype_seq_show(struct seq_f
return 0;
}
-static struct seq_operations memtype_seq_ops = {
+static const struct seq_operations memtype_seq_ops = {
.start = memtype_seq_start,
.next = memtype_seq_next,
.stop = memtype_seq_stop,
--- head-2010-05-25.orig/arch/x86/mm/pgtable-xen.c 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/arch/x86/mm/pgtable-xen.c 2010-03-24 15:32:27.000000000 +0100
@@ -692,8 +692,7 @@ int ptep_set_access_flags(struct vm_area
if (likely(vma->vm_mm == current->mm)) {
if (HYPERVISOR_update_va_mapping(address,
entry,
- uvm_multi(vma->vm_mm->cpu_vm_mask) |
- UVMF_INVLPG))
+ uvm_multi(mm_cpumask(vma->vm_mm))|UVMF_INVLPG))
BUG();
} else {
xen_l1_entry_update(ptep, entry);
--- head-2010-05-25.orig/arch/x86/mm/physaddr.c 2010-05-25 09:12:08.000000000 +0200
+++ head-2010-05-25/arch/x86/mm/physaddr.c 2010-03-24 15:32:27.000000000 +0100
@@ -8,6 +8,10 @@
#ifdef CONFIG_X86_64
+#ifdef CONFIG_XEN
+#define phys_base 0
+#endif
+
unsigned long __phys_addr(unsigned long x)
{
if (x >= __START_KERNEL_map) {
--- head-2010-05-25.orig/drivers/acpi/processor_driver.c 2010-04-15 10:07:40.000000000 +0200
+++ head-2010-05-25/drivers/acpi/processor_driver.c 2010-05-25 09:25:03.000000000 +0200
@@ -663,7 +663,7 @@ static int __cpuinit acpi_processor_add(
result = processor_extcntl_prepare(pr);
if (result)
- goto end;
+ goto err_power_exit;
pr->cdev = thermal_cooling_device_register("Processor", device,
&processor_cooling_ops);
--- head-2010-05-25.orig/drivers/char/agp/agp.h 2010-05-25 09:12:08.000000000 +0200
+++ head-2010-05-25/drivers/char/agp/agp.h 2010-03-24 15:32:27.000000000 +0100
@@ -31,6 +31,10 @@
#include <asm/agp.h> /* for flush_agp_cache() */
+#ifndef virt_to_gart
+#define virt_to_gart virt_to_phys
+#endif
+
#define PFX "agpgart: "
//#define AGP_DEBUG 1
--- head-2010-05-25.orig/drivers/char/agp/amd-k7-agp.c 2010-05-25 09:12:08.000000000 +0200
+++ head-2010-05-25/drivers/char/agp/amd-k7-agp.c 2010-03-24 15:32:27.000000000 +0100
@@ -44,7 +44,7 @@ static int amd_create_page_map(struct am
#ifndef CONFIG_X86
SetPageReserved(virt_to_page(page_map->real));
global_cache_flush();
- page_map->remapped = ioremap_nocache(virt_to_phys(page_map->real),
+ page_map->remapped = ioremap_nocache(virt_to_gart(page_map->real),
PAGE_SIZE);
if (page_map->remapped == NULL) {
ClearPageReserved(virt_to_page(page_map->real));
@@ -160,7 +160,7 @@ static int amd_create_gatt_table(struct
agp_bridge->gatt_table_real = (u32 *)page_dir.real;
agp_bridge->gatt_table = (u32 __iomem *)page_dir.remapped;
- agp_bridge->gatt_bus_addr = virt_to_phys(page_dir.real);
+ agp_bridge->gatt_bus_addr = virt_to_gart(page_dir.real);
/* Get the address for the gart region.
* This is a bus address even on the alpha, b/c its
@@ -173,7 +173,7 @@ static int amd_create_gatt_table(struct
/* Calculate the agp offset */
for (i = 0; i < value->num_entries / 1024; i++, addr += 0x00400000) {
- writel(virt_to_phys(amd_irongate_private.gatt_pages[i]->real) | 1,
+ writel(virt_to_gart(amd_irongate_private.gatt_pages[i]->real) | 1,
page_dir.remapped+GET_PAGE_DIR_OFF(addr));
readl(page_dir.remapped+GET_PAGE_DIR_OFF(addr)); /* PCI Posting. */
}
--- head-2010-05-25.orig/drivers/char/agp/amd64-agp.c 2010-05-25 09:12:08.000000000 +0200
+++ head-2010-05-25/drivers/char/agp/amd64-agp.c 2010-03-24 15:32:27.000000000 +0100
@@ -178,7 +178,7 @@ static const struct aper_size_info_32 am
static int amd_8151_configure(void)
{
- unsigned long gatt_bus = virt_to_phys(agp_bridge->gatt_table_real);
+ unsigned long gatt_bus = virt_to_gart(agp_bridge->gatt_table_real);
int i;
/* Configure AGP regs in each x86-64 host bridge. */
@@ -558,7 +558,7 @@ static void __devexit agp_amd64_remove(s
{
struct agp_bridge_data *bridge = pci_get_drvdata(pdev);
- release_mem_region(virt_to_phys(bridge->gatt_table_real),
+ release_mem_region(virt_to_gart(bridge->gatt_table_real),
amd64_aperture_sizes[bridge->aperture_size_idx].size);
agp_remove_bridge(bridge);
agp_put_bridge(bridge);
--- head-2010-05-25.orig/drivers/char/agp/ati-agp.c 2010-05-25 09:12:08.000000000 +0200
+++ head-2010-05-25/drivers/char/agp/ati-agp.c 2010-03-24 15:32:27.000000000 +0100
@@ -360,7 +360,7 @@ static int ati_create_gatt_table(struct
agp_bridge->gatt_table_real = (u32 *)page_dir.real;
agp_bridge->gatt_table = (u32 __iomem *) page_dir.remapped;
- agp_bridge->gatt_bus_addr = virt_to_phys(page_dir.real);
+ agp_bridge->gatt_bus_addr = virt_to_gart(page_dir.real);
/* Write out the size register */
current_size = A_SIZE_LVL2(agp_bridge->current_size);
@@ -390,7 +390,7 @@ static int ati_create_gatt_table(struct
/* Calculate the agp offset */
for (i = 0; i < value->num_entries / 1024; i++, addr += 0x00400000) {
- writel(virt_to_phys(ati_generic_private.gatt_pages[i]->real) | 1,
+ writel(virt_to_gart(ati_generic_private.gatt_pages[i]->real) | 1,
page_dir.remapped+GET_PAGE_DIR_OFF(addr));
readl(page_dir.remapped+GET_PAGE_DIR_OFF(addr)); /* PCI Posting. */
}
--- head-2010-05-25.orig/drivers/char/agp/efficeon-agp.c 2010-05-25 09:12:08.000000000 +0200
+++ head-2010-05-25/drivers/char/agp/efficeon-agp.c 2010-03-24 15:32:27.000000000 +0100
@@ -226,7 +226,7 @@ static int efficeon_create_gatt_table(st
efficeon_private.l1_table[index] = page;
- value = virt_to_phys((unsigned long *)page) | pati | present | index;
+ value = virt_to_gart((unsigned long *)page) | pati | present | index;
pci_write_config_dword(agp_bridge->dev,
EFFICEON_ATTPAGE, value);
--- head-2010-05-25.orig/drivers/char/agp/generic.c 2010-05-25 09:12:08.000000000 +0200
+++ head-2010-05-25/drivers/char/agp/generic.c 2010-04-15 10:11:24.000000000 +0200
@@ -989,7 +989,7 @@ int agp_generic_create_gatt_table(struct
set_memory_uc((unsigned long)table, 1 << page_order);
bridge->gatt_table = (void *)table;
#else
- bridge->gatt_table = ioremap_nocache(virt_to_phys(table),
+ bridge->gatt_table = ioremap_nocache(virt_to_gart(table),
(PAGE_SIZE * (1 << page_order)));
bridge->driver->cache_flush();
#endif
@@ -1002,7 +1002,7 @@ int agp_generic_create_gatt_table(struct
return -ENOMEM;
}
- bridge->gatt_bus_addr = virt_to_phys(bridge->gatt_table_real);
+ bridge->gatt_bus_addr = virt_to_gart(bridge->gatt_table_real);
/* AK: bogus, should encode addresses > 4GB */
for (i = 0; i < num_entries; i++) {
--- head-2010-05-25.orig/drivers/char/agp/sworks-agp.c 2010-05-25 09:12:08.000000000 +0200
+++ head-2010-05-25/drivers/char/agp/sworks-agp.c 2010-03-24 15:32:27.000000000 +0100
@@ -155,7 +155,7 @@ static int serverworks_create_gatt_table
/* Create a fake scratch directory */
for (i = 0; i < 1024; i++) {
writel(agp_bridge->scratch_page, serverworks_private.scratch_dir.remapped+i);
- writel(virt_to_phys(serverworks_private.scratch_dir.real) | 1, page_dir.remapped+i);
+ writel(virt_to_gart(serverworks_private.scratch_dir.real) | 1, page_dir.remapped+i);
}
retval = serverworks_create_gatt_pages(value->num_entries / 1024);
@@ -167,7 +167,7 @@ static int serverworks_create_gatt_table
agp_bridge->gatt_table_real = (u32 *)page_dir.real;
agp_bridge->gatt_table = (u32 __iomem *)page_dir.remapped;
- agp_bridge->gatt_bus_addr = virt_to_phys(page_dir.real);
+ agp_bridge->gatt_bus_addr = virt_to_gart(page_dir.real);
/* Get the address for the gart region.
* This is a bus address even on the alpha, b/c its
@@ -179,7 +179,7 @@ static int serverworks_create_gatt_table
/* Calculate the agp offset */
for (i = 0; i < value->num_entries / 1024; i++)
- writel(virt_to_phys(serverworks_private.gatt_pages[i]->real)|1, page_dir.remapped+i);
+ writel(virt_to_gart(serverworks_private.gatt_pages[i]->real)|1, page_dir.remapped+i);
return 0;
}
--- head-2010-05-25.orig/drivers/gpu/drm/radeon/radeon_device.c 2010-05-25 09:12:08.000000000 +0200
+++ head-2010-05-25/drivers/gpu/drm/radeon/radeon_device.c 2010-05-07 11:25:36.000000000 +0200
@@ -345,6 +345,18 @@ int radeon_dummy_page_init(struct radeon
rdev->dummy_page.page = alloc_page(GFP_DMA32 | GFP_KERNEL | __GFP_ZERO);
if (rdev->dummy_page.page == NULL)
return -ENOMEM;
+#ifdef CONFIG_XEN
+ {
+ int ret = xen_limit_pages_to_max_mfn(rdev->dummy_page.page,
+ 0, 32);
+
+ if (!ret)
+ clear_page(page_address(rdev->dummy_page.page));
+ else
+ dev_warn(rdev->dev,
+ "Error restricting dummy page: %d\n", ret);
+ }
+#endif
rdev->dummy_page.addr = pci_map_page(rdev->pdev, rdev->dummy_page.page,
0, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
if (!rdev->dummy_page.addr) {
--- head-2010-05-25.orig/drivers/net/Kconfig 2010-04-15 09:54:18.000000000 +0200
+++ head-2010-05-25/drivers/net/Kconfig 2010-04-15 10:11:31.000000000 +0200
@@ -3313,7 +3313,7 @@ config VIRTIO_NET
config VMXNET3
tristate "VMware VMXNET3 ethernet driver"
- depends on PCI && INET
+ depends on PCI && INET && !XEN
help
This driver supports VMware's vmxnet3 virtual ethernet NIC.
To compile this driver as a module, choose M here: the
--- head-2010-05-25.orig/drivers/pci/msi-xen.c 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/drivers/pci/msi-xen.c 2010-03-24 15:32:27.000000000 +0100
@@ -16,12 +16,11 @@
#include <linux/proc_fs.h>
#include <linux/msi.h>
#include <linux/smp.h>
+#include <linux/errno.h>
+#include <linux/io.h>
#include <xen/evtchn.h>
-#include <asm/errno.h>
-#include <asm/io.h>
-
#include "pci.h"
#include "msi.h"
@@ -479,7 +478,7 @@ static int msix_capability_init(struct p
* to determine if MSI/-X are supported for the device. If MSI/-X is
* supported return 0, else return an error code.
**/
-static int pci_msi_check_device(struct pci_dev* dev, int nvec, int type)
+static int pci_msi_check_device(struct pci_dev *dev, int nvec, int type)
{
struct pci_bus *bus;
int ret;
@@ -496,8 +495,9 @@ static int pci_msi_check_device(struct p
if (nvec < 1)
return -ERANGE;
- /* Any bridge which does NOT route MSI transactions from it's
- * secondary bus to it's primary bus must set NO_MSI flag on
+ /*
+ * Any bridge which does NOT route MSI transactions from its
+ * secondary bus to its primary bus must set NO_MSI flag on
* the secondary pci_bus.
* We expect only arch-specific PCI host bus controller driver
* or quirks for specific PCI bridges to be setting NO_MSI.
@@ -615,7 +615,7 @@ void pci_msi_shutdown(struct pci_dev *de
dev->msi_enabled = 0;
}
-void pci_disable_msi(struct pci_dev* dev)
+void pci_disable_msi(struct pci_dev *dev)
{
pci_msi_shutdown(dev);
}
@@ -655,14 +655,14 @@ int pci_msix_table_size(struct pci_dev *
**/
extern int pci_frontend_enable_msix(struct pci_dev *dev,
struct msix_entry *entries, int nvec);
-int pci_enable_msix(struct pci_dev* dev, struct msix_entry *entries, int nvec)
+int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec)
{
int status, nr_entries;
int i, j, temp;
struct msi_dev_list *msi_dev_entry = get_msi_dev_pirq_list(dev);
if (!entries)
- return -EINVAL;
+ return -EINVAL;
#ifdef CONFIG_XEN_PCIDEV_FRONTEND
if (!is_initial_xendomain()) {
@@ -737,7 +737,7 @@ int pci_enable_msix(struct pci_dev* dev,
EXPORT_SYMBOL(pci_enable_msix);
extern void pci_frontend_disable_msix(struct pci_dev* dev);
-void pci_msix_shutdown(struct pci_dev* dev)
+void pci_msix_shutdown(struct pci_dev *dev)
{
if (!pci_msi_enable || !dev || !dev->msix_enabled)
return;
@@ -770,7 +770,8 @@ void pci_msix_shutdown(struct pci_dev* d
pci_intx_for_msi(dev, 1);
dev->msix_enabled = 0;
}
-void pci_disable_msix(struct pci_dev* dev)
+
+void pci_disable_msix(struct pci_dev *dev)
{
pci_msix_shutdown(dev);
}
@@ -785,14 +786,14 @@ EXPORT_SYMBOL(pci_disable_msix);
* allocated for this device function, are reclaimed to unused state,
* which may be used later on.
**/
-void msi_remove_pci_irq_vectors(struct pci_dev* dev)
+void msi_remove_pci_irq_vectors(struct pci_dev *dev)
{
unsigned long flags;
struct msi_dev_list *msi_dev_entry;
struct msi_pirq_entry *pirq_entry, *tmp;
if (!pci_msi_enable || !dev)
- return;
+ return;
msi_dev_entry = get_msi_dev_pirq_list(dev);
--- head-2010-05-25.orig/drivers/pci/probe.c 2010-04-29 09:52:00.000000000 +0200
+++ head-2010-05-25/drivers/pci/probe.c 2010-04-29 09:53:13.000000000 +0200
@@ -1338,13 +1338,20 @@ int pci_scan_slot(struct pci_bus *bus, i
return 0; /* Already scanned the entire slot */
dev = pci_scan_single_device(bus, devfn);
- if (!dev)
+ if (!dev) {
+#ifdef pcibios_scan_all_fns
+ if (!pcibios_scan_all_fns(bus, devfn))
+#endif
return 0;
- if (!dev->is_added)
+ } else if (!dev->is_added)
nr++;
if (pci_ari_enabled(bus))
next_fn = next_ari_fn;
+#ifdef pcibios_scan_all_fns
+ else if (pcibios_scan_all_fns(bus, devfn))
+ next_fn = next_trad_fn;
+#endif
else if (dev->multifunction)
next_fn = next_trad_fn;
--- head-2010-05-25.orig/drivers/sfi/sfi_core.c 2010-05-25 09:12:08.000000000 +0200
+++ head-2010-05-25/drivers/sfi/sfi_core.c 2010-03-24 15:32:27.000000000 +0100
@@ -387,6 +387,11 @@ void __init sfi_init(void)
if (!acpi_disabled)
disable_sfi();
+#ifdef CONFIG_XEN
+ if (!is_initial_xendomain())
+ disable_sfi();
+#endif
+
if (sfi_disabled)
return;
--- head-2010-05-25.orig/drivers/staging/hv/Kconfig 2010-05-25 09:12:08.000000000 +0200
+++ head-2010-05-25/drivers/staging/hv/Kconfig 2010-03-24 15:32:27.000000000 +0100
@@ -1,6 +1,6 @@
config HYPERV
tristate "Microsoft Hyper-V client drivers"
- depends on X86 && m
+ depends on X86 && !XEN && m
default n
help
Select this option to run Linux as a Hyper-V client operating
--- head-2010-05-25.orig/drivers/xen/Kconfig 2010-03-31 13:35:09.000000000 +0200
+++ head-2010-05-25/drivers/xen/Kconfig 2010-03-31 14:01:28.000000000 +0200
@@ -22,6 +22,7 @@ config XEN_UNPRIVILEGED_GUEST
select PM
select PM_SLEEP
select PM_SLEEP_SMP if SMP
+ select PM_RUNTIME if PCI
select SUSPEND
config XEN_PRIVCMD
--- head-2010-05-25.orig/drivers/xen/Makefile 2010-04-19 14:53:25.000000000 +0200
+++ head-2010-05-25/drivers/xen/Makefile 2010-04-19 14:53:46.000000000 +0200
@@ -8,6 +8,11 @@ obj-$(CONFIG_XEN) += console/
obj-y += xenbus/
obj-$(CONFIG_XEN) += char/
+nostackp := $(call cc-option, -fno-stack-protector)
+ifeq ($(CONFIG_PARAVIRT_XEN),y)
+CFLAGS_features.o := $(nostackp)
+endif
+
obj-$(CONFIG_XEN) += features.o util.o
obj-$(CONFIG_HOTPLUG_CPU) += $(xen-hotplug-y)
obj-$(CONFIG_XEN_XENCOMM) += xencomm.o
--- head-2010-05-25.orig/drivers/xen/balloon/balloon.c 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/drivers/xen/balloon/balloon.c 2010-04-15 10:11:45.000000000 +0200
@@ -77,6 +77,11 @@ static DEFINE_MUTEX(balloon_mutex);
*/
DEFINE_SPINLOCK(balloon_lock);
+#ifndef MODULE
+#include <linux/pagevec.h>
+static struct pagevec free_pagevec;
+#endif
+
struct balloon_stats balloon_stats;
/* We increase/decrease in batches which fit in a page */
@@ -200,14 +205,27 @@ static struct page *balloon_next_page(st
static inline void balloon_free_page(struct page *page)
{
#ifndef MODULE
- if (put_page_testzero(page))
- free_cold_page(page);
+ if (put_page_testzero(page) && !pagevec_add(&free_pagevec, page)) {
+ __pagevec_free(&free_pagevec);
+ pagevec_reinit(&free_pagevec);
+ }
#else
- /* free_cold_page() is not being exported. */
+ /* pagevec interface is not being exported. */
__free_page(page);
#endif
}
+static inline void balloon_free_and_unlock(unsigned long flags)
+{
+#ifndef MODULE
+ if (pagevec_count(&free_pagevec)) {
+ __pagevec_free(&free_pagevec);
+ pagevec_reinit(&free_pagevec);
+ }
+#endif
+ balloon_unlock(flags);
+}
+
static void balloon_alarm(unsigned long unused)
{
schedule_work(&balloon_worker);
@@ -320,7 +338,7 @@ static int increase_reservation(unsigned
totalram_pages = bs.current_pages - totalram_bias;
out:
- balloon_unlock(flags);
+ balloon_free_and_unlock(flags);
#ifndef MODULE
setup_per_zone_wmarks();
@@ -559,6 +577,7 @@ static int __init balloon_init(void)
IPRINTK("Initialising balloon driver.\n");
#ifdef CONFIG_XEN
+ pagevec_init(&free_pagevec, true);
bs.current_pages = min(xen_start_info->nr_pages, max_pfn);
totalram_pages = bs.current_pages;
#else
@@ -720,8 +739,8 @@ struct page **alloc_empty_pages_and_page
}
if (ret != 0) {
- balloon_unlock(flags);
balloon_free_page(page);
+ balloon_free_and_unlock(flags);
goto err;
}
--- head-2010-05-25.orig/drivers/xen/blkfront/vbd.c 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/drivers/xen/blkfront/vbd.c 2010-03-24 15:32:27.000000000 +0100
@@ -105,7 +105,7 @@ static struct xlbd_major_info *major_inf
#define XLBD_MAJOR_VBD_ALT(idx) ((idx) ^ XLBD_MAJOR_VBD_START ^ (XLBD_MAJOR_VBD_START + 1))
-static struct block_device_operations xlvbd_block_fops =
+static const struct block_device_operations xlvbd_block_fops =
{
.owner = THIS_MODULE,
.open = blkif_open,
--- head-2010-05-25.orig/drivers/xen/blktap2/device.c 2010-04-19 14:53:31.000000000 +0200
+++ head-2010-05-25/drivers/xen/blktap2/device.c 2010-03-24 15:32:27.000000000 +0100
@@ -141,7 +141,7 @@ blktap_device_ioctl(struct block_device
return 0;
}
-static struct block_device_operations blktap_device_file_operations = {
+static const struct block_device_operations blktap_device_file_operations = {
.owner = THIS_MODULE,
.open = blktap_device_open,
.release = blktap_device_release,
--- head-2010-05-25.orig/drivers/xen/core/evtchn.c 2010-04-23 15:19:43.000000000 +0200
+++ head-2010-05-25/drivers/xen/core/evtchn.c 2010-03-31 14:37:57.000000000 +0200
@@ -144,13 +144,13 @@ unsigned int irq_from_evtchn(unsigned in
EXPORT_SYMBOL_GPL(irq_from_evtchn);
/* IRQ <-> VIRQ mapping. */
-DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]) = {[0 ... NR_VIRQS-1] = -1};
+DEFINE_PER_CPU(int[NR_VIRQS], virq_to_irq) = {[0 ... NR_VIRQS-1] = -1};
/* IRQ <-> IPI mapping. */
#ifndef NR_IPIS
#define NR_IPIS 1
#endif
-DEFINE_PER_CPU(int, ipi_to_irq[NR_IPIS]) = {[0 ... NR_IPIS-1] = -1};
+DEFINE_PER_CPU(int[NR_IPIS], ipi_to_irq) = {[0 ... NR_IPIS-1] = -1};
#ifdef CONFIG_SMP
--- head-2010-05-25.orig/drivers/xen/core/reboot.c 2010-03-24 15:12:46.000000000 +0100
+++ head-2010-05-25/drivers/xen/core/reboot.c 2010-03-24 15:32:27.000000000 +0100
@@ -83,7 +83,7 @@ static int xen_suspend(void *__unused)
int err, old_state;
daemonize("suspend");
- err = set_cpus_allowed(current, cpumask_of_cpu(0));
+ err = set_cpus_allowed_ptr(current, cpumask_of(0));
if (err) {
printk(KERN_ERR "Xen suspend can't run on CPU0 (%d)\n", err);
goto fail;
--- head-2010-05-25.orig/drivers/xen/netback/interface.c 2010-03-24 15:25:06.000000000 +0100
+++ head-2010-05-25/drivers/xen/netback/interface.c 2010-03-24 15:32:27.000000000 +0100
@@ -159,7 +159,7 @@ static void netbk_get_strings(struct net
}
}
-static struct ethtool_ops network_ethtool_ops =
+static const struct ethtool_ops network_ethtool_ops =
{
.get_drvinfo = netbk_get_drvinfo,
--- head-2010-05-25.orig/drivers/xen/netback/loopback.c 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/drivers/xen/netback/loopback.c 2010-03-24 15:32:27.000000000 +0100
@@ -134,7 +134,7 @@ static int loopback_start_xmit(struct sk
if (!skb_remove_foreign_references(skb)) {
np->stats.tx_dropped++;
dev_kfree_skb(skb);
- return 0;
+ return NETDEV_TX_OK;
}
dst_release(skb_dst(skb));
@@ -173,7 +173,7 @@ static int loopback_start_xmit(struct sk
netif_rx(skb);
- return 0;
+ return NETDEV_TX_OK;
}
static struct net_device_stats *loopback_get_stats(struct net_device *dev)
@@ -182,7 +182,7 @@ static struct net_device_stats *loopback
return &np->stats;
}
-static struct ethtool_ops network_ethtool_ops =
+static const struct ethtool_ops network_ethtool_ops =
{
.get_tx_csum = ethtool_op_get_tx_csum,
.set_tx_csum = ethtool_op_set_tx_csum,
--- head-2010-05-25.orig/drivers/xen/netback/netback.c 2010-03-24 15:25:06.000000000 +0100
+++ head-2010-05-25/drivers/xen/netback/netback.c 2010-03-24 15:32:27.000000000 +0100
@@ -340,12 +340,12 @@ int netif_be_start_xmit(struct sk_buff *
skb_queue_tail(&rx_queue, skb);
tasklet_schedule(&net_rx_tasklet);
- return 0;
+ return NETDEV_TX_OK;
drop:
netif->stats.tx_dropped++;
dev_kfree_skb(skb);
- return 0;
+ return NETDEV_TX_OK;
}
#if 0
--- head-2010-05-25.orig/drivers/xen/netfront/netfront.c 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/drivers/xen/netfront/netfront.c 2010-03-24 15:32:27.000000000 +0100
@@ -953,7 +953,7 @@ static int network_start_xmit(struct sk_
if (np->accel_vif_state.hooks &&
np->accel_vif_state.hooks->start_xmit(skb, dev)) {
/* Fast path has sent this packet */
- return 0;
+ return NETDEV_TX_OK;
}
frags += DIV_ROUND_UP(offset + len, PAGE_SIZE);
@@ -1042,12 +1042,12 @@ static int network_start_xmit(struct sk_
spin_unlock_irq(&np->tx_lock);
- return 0;
+ return NETDEV_TX_OK;
drop:
np->stats.tx_dropped++;
dev_kfree_skb(skb);
- return 0;
+ return NETDEV_TX_OK;
}
static irqreturn_t netif_int(int irq, void *dev_id)
@@ -1872,7 +1872,7 @@ static void netif_uninit(struct net_devi
gnttab_free_grant_references(np->gref_rx_head);
}
-static struct ethtool_ops network_ethtool_ops =
+static const struct ethtool_ops network_ethtool_ops =
{
.get_tx_csum = ethtool_op_get_tx_csum,
.set_tx_csum = ethtool_op_set_tx_csum,
--- head-2010-05-25.orig/drivers/xen/sfc_netback/accel_fwd.c 2010-03-24 15:10:29.000000000 +0100
+++ head-2010-05-25/drivers/xen/sfc_netback/accel_fwd.c 2010-03-24 15:32:27.000000000 +0100
@@ -181,11 +181,10 @@ int netback_accel_fwd_add(const __u8 *ma
unsigned long flags;
cuckoo_hash_mac_key key = cuckoo_mac_to_key(mac);
struct port_fwd *fwd_set = (struct port_fwd *)fwd_priv;
- DECLARE_MAC_BUF(buf);
BUG_ON(fwd_priv == NULL);
- DPRINTK("Adding mac %s\n", print_mac(buf, mac));
+ DPRINTK("Adding mac %pM\n", mac);
spin_lock_irqsave(&fwd_set->fwd_lock, flags);
@@ -200,8 +199,7 @@ int netback_accel_fwd_add(const __u8 *ma
if (cuckoo_hash_lookup(&fwd_set->fwd_hash_table,
(cuckoo_hash_key *)(&key), &rc) != 0) {
spin_unlock_irqrestore(&fwd_set->fwd_lock, flags);
- EPRINTK("MAC address %s already accelerated.\n",
- print_mac(buf, mac));
+ EPRINTK("MAC address %pM already accelerated.\n", mac);
return -EEXIST;
}
@@ -236,9 +234,8 @@ void netback_accel_fwd_remove(const __u8
unsigned long flags;
cuckoo_hash_mac_key key = cuckoo_mac_to_key(mac);
struct port_fwd *fwd_set = (struct port_fwd *)fwd_priv;
- DECLARE_MAC_BUF(buf);
- DPRINTK("Removing mac %s\n", print_mac(buf, mac));
+ DPRINTK("Removing mac %pM\n", mac);
BUG_ON(fwd_priv == NULL);
@@ -396,16 +393,14 @@ void netback_accel_tx_packet(struct sk_b
if (is_broadcast_ether_addr(skb_mac_header(skb))
&& packet_is_arp_reply(skb)) {
- DECLARE_MAC_BUF(buf);
-
/*
* update our fast path forwarding to reflect this
* gratuitous ARP
*/
mac = skb_mac_header(skb)+ETH_ALEN;
- DPRINTK("%s: found gratuitous ARP for %s\n",
- __FUNCTION__, print_mac(buf, mac));
+ DPRINTK("%s: found gratuitous ARP for %pM\n",
+ __FUNCTION__, mac);
spin_lock_irqsave(&fwd_set->fwd_lock, flags);
/*
--- head-2010-05-25.orig/drivers/xen/sfc_netback/accel_msg.c 2010-03-24 15:10:29.000000000 +0100
+++ head-2010-05-25/drivers/xen/sfc_netback/accel_msg.c 2010-03-24 15:32:27.000000000 +0100
@@ -57,11 +57,10 @@ static void netback_accel_msg_tx_localma
{
unsigned long lock_state;
struct net_accel_msg *msg;
- DECLARE_MAC_BUF(buf);
BUG_ON(bend == NULL || mac == NULL);
- VPRINTK("Sending local mac message: %s\n", print_mac(buf, mac));
+ VPRINTK("Sending local mac message: %pM\n", mac);
msg = net_accel_msg_start_send(bend->shared_page, &bend->to_domU,
&lock_state);
--- head-2010-05-25.orig/drivers/xen/sfc_netfront/accel_msg.c 2010-03-24 15:25:06.000000000 +0100
+++ head-2010-05-25/drivers/xen/sfc_netfront/accel_msg.c 2010-03-24 15:32:27.000000000 +0100
@@ -327,10 +327,8 @@ static int vnic_process_localmac_msg(net
cuckoo_hash_mac_key key;
if (msg->u.localmac.flags & NET_ACCEL_MSG_ADD) {
- DECLARE_MAC_BUF(buf);
-
- DPRINTK("MAC has moved, could be local: %s\n",
- print_mac(buf, msg->u.localmac.mac));
+ DPRINTK("MAC has moved, could be local: %pM\n",
+ msg->u.localmac.mac);
key = cuckoo_mac_to_key(msg->u.localmac.mac);
spin_lock_irqsave(&vnic->table_lock, flags);
/* Try to remove it, not a big deal if not there */
--- head-2010-05-25.orig/drivers/xen/sfc_netfront/accel_vi.c 2010-03-24 15:10:29.000000000 +0100
+++ head-2010-05-25/drivers/xen/sfc_netfront/accel_vi.c 2010-03-24 15:32:27.000000000 +0100
@@ -643,10 +643,7 @@ netfront_accel_vi_tx_post(netfront_accel
(cuckoo_hash_key *)(&key), &value);
if (!try_fastpath) {
- DECLARE_MAC_BUF(buf);
-
- VPRINTK("try fast path false for mac: %s\n",
- print_mac(buf, skb->data));
+ VPRINTK("try fast path false for mac: %pM\n", skb->data);
return NETFRONT_ACCEL_STATUS_CANT;
}
@@ -772,10 +769,9 @@ static void netfront_accel_vi_rx_comple
if (compare_ether_addr(skb->data, vnic->mac)) {
struct iphdr *ip = (struct iphdr *)(skb->data + ETH_HLEN);
u16 port;
- DECLARE_MAC_BUF(buf);
- DPRINTK("%s: saw wrong MAC address %s\n",
- __FUNCTION__, print_mac(buf, skb->data));
+ DPRINTK("%s: saw wrong MAC address %pM\n",
+ __FUNCTION__, skb->data);
if (ip->protocol == IPPROTO_TCP) {
struct tcphdr *tcp = (struct tcphdr *)
--- head-2010-05-25.orig/drivers/xen/xenbus/xenbus_dev.c 2009-05-29 10:25:53.000000000 +0200
+++ head-2010-05-25/drivers/xen/xenbus/xenbus_dev.c 2010-03-24 15:32:27.000000000 +0100
@@ -36,6 +36,7 @@
#include <linux/errno.h>
#include <linux/uio.h>
#include <linux/notifier.h>
+#include <linux/sched.h>
#include <linux/wait.h>
#include <linux/fs.h>
#include <linux/poll.h>
--- head-2010-05-25.orig/drivers/xen/xenbus/xenbus_probe.c 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/drivers/xen/xenbus/xenbus_probe.c 2010-03-24 15:32:27.000000000 +0100
@@ -42,6 +42,7 @@
#include <linux/ctype.h>
#include <linux/fcntl.h>
#include <linux/mm.h>
+#include <linux/sched.h>
#include <linux/proc_fs.h>
#include <linux/notifier.h>
#include <linux/mutex.h>
--- head-2010-05-25.orig/lib/swiotlb-xen.c 2010-03-24 15:25:21.000000000 +0100
+++ head-2010-05-25/lib/swiotlb-xen.c 2010-03-24 15:32:27.000000000 +0100
@@ -111,79 +111,11 @@ setup_io_tlb_npages(char *str)
__setup("swiotlb=", setup_io_tlb_npages);
/* make io_tlb_overflow tunable too? */
-void *__init swiotlb_alloc_boot(size_t size, unsigned long nslabs)
-{
- void *start = alloc_bootmem_pages(size);
- unsigned int i;
- int rc;
-
- dma_bits = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT) + PAGE_SHIFT;
- for (i = 0; i < nslabs; i += IO_TLB_SEGSIZE) {
- do {
- rc = xen_create_contiguous_region(
- (unsigned long)start + (i << IO_TLB_SHIFT),
- get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT),
- dma_bits);
- } while (rc && dma_bits++ < max_dma_bits);
- if (rc) {
- if (i == 0)
- panic("No suitable physical memory available for SWIOTLB buffer!\n"
- "Use dom0_mem Xen boot parameter to reserve\n"
- "some DMA memory (e.g., dom0_mem=-128M).\n");
- io_tlb_nslabs = i;
- i <<= IO_TLB_SHIFT;
- free_bootmem(__pa(start + i), size - i);
- size = i;
- for (dma_bits = 0; i > 0; i -= IO_TLB_SEGSIZE << IO_TLB_SHIFT) {
- unsigned int bits = fls64(virt_to_bus(start + i - 1));
-
- if (bits > dma_bits)
- dma_bits = bits;
- }
- break;
- }
- }
-
- return start;
-}
-
-#ifndef CONFIG_XEN
-void * __weak swiotlb_alloc(unsigned order, unsigned long nslabs)
-{
- return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order);
-}
-#endif
-
-dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr)
-{
- return phys_to_machine(paddr);
-}
-
-phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr)
-{
- return machine_to_phys(baddr);
-}
-
+/* Note that this doesn't work with highmem page */
static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
volatile void *address)
{
- return swiotlb_phys_to_bus(hwdev, virt_to_phys(address));
-}
-
-void * __weak swiotlb_bus_to_virt(struct device *hwdev, dma_addr_t address)
-{
- return phys_to_virt(swiotlb_bus_to_phys(hwdev, address));
-}
-
-int __weak swiotlb_arch_address_needs_mapping(struct device *hwdev,
- dma_addr_t addr, size_t size)
-{
- return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size);
-}
-
-int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size)
-{
- return 0;
+ return phys_to_dma(hwdev, virt_to_phys(address));
}
static void swiotlb_print_info(unsigned long bytes)
@@ -216,10 +148,35 @@ swiotlb_init_with_default_size(size_t de
/*
* Get IO TLB memory from the low pages
*/
- io_tlb_start = swiotlb_alloc_boot(bytes, io_tlb_nslabs);
+ io_tlb_start = alloc_bootmem_pages(bytes);
if (!io_tlb_start)
panic("Cannot allocate SWIOTLB buffer!\n");
- bytes = io_tlb_nslabs << IO_TLB_SHIFT;
+ dma_bits = get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT) + PAGE_SHIFT;
+ for (i = 0; i < io_tlb_nslabs; i += IO_TLB_SEGSIZE) {
+ do {
+ rc = xen_create_contiguous_region(
+ (unsigned long)io_tlb_start + (i << IO_TLB_SHIFT),
+ get_order(IO_TLB_SEGSIZE << IO_TLB_SHIFT),
+ dma_bits);
+ } while (rc && dma_bits++ < max_dma_bits);
+ if (rc) {
+ if (i == 0)
+ panic("No suitable physical memory available for SWIOTLB buffer!\n"
+ "Use dom0_mem Xen boot parameter to reserve\n"
+ "some DMA memory (e.g., dom0_mem=-128M).\n");
+ io_tlb_nslabs = i;
+ i <<= IO_TLB_SHIFT;
+ free_bootmem(__pa(io_tlb_start + i), bytes - i);
+ bytes = i;
+ for (dma_bits = 0; i > 0; i -= IO_TLB_SEGSIZE << IO_TLB_SHIFT) {
+ unsigned int bits = fls64(virt_to_bus(io_tlb_start + i - 1));
+
+ if (bits > dma_bits)
+ dma_bits = bits;
+ }
+ break;
+ }
+ }
io_tlb_end = io_tlb_start + bytes;
/*
@@ -283,13 +240,10 @@ static inline int range_needs_mapping(ph
static int is_swiotlb_buffer(dma_addr_t addr)
{
unsigned long pfn = mfn_to_local_pfn(PFN_DOWN(addr));
- char *va = pfn_valid(pfn) ? __va(pfn << PAGE_SHIFT) : NULL;
+ phys_addr_t paddr = (phys_addr_t)pfn << PAGE_SHIFT;
-#ifdef CONFIG_HIGHMEM
- if (pfn >= highstart_pfn)
- return 0;
-#endif
- return va >= io_tlb_start && va < io_tlb_end;
+ return paddr >= virt_to_phys(io_tlb_start) &&
+ paddr < virt_to_phys(io_tlb_end);
}
/*
@@ -514,12 +468,15 @@ swiotlb_full(struct device *dev, size_t
printk(KERN_ERR "PCI-DMA: Out of SW-IOMMU space for %zu bytes at "
"device %s\n", size, dev ? dev_name(dev) : "?");
- if (size > io_tlb_overflow && do_panic) {
- if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL)
- panic("PCI-DMA: Memory would be corrupted\n");
- if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL)
- panic("PCI-DMA: Random memory would be DMAed\n");
- }
+ if (size <= io_tlb_overflow || !do_panic)
+ return;
+
+ if (dir == DMA_BIDIRECTIONAL)
+ panic("DMA: Random memory could be DMA accessed\n");
+ if (dir == DMA_FROM_DEVICE)
+ panic("DMA: Random memory could be DMA written\n");
+ if (dir == DMA_TO_DEVICE)
+ panic("DMA: Random memory could be DMA read\n");
}
/*
@@ -545,7 +502,7 @@ dma_addr_t swiotlb_map_page(struct devic
* we can safely return the device addr and not worry about bounce
* buffering it.
*/
- if (!address_needs_mapping(dev, dev_addr, size) &&
+ if (dma_capable(dev, dev_addr, size) &&
!range_needs_mapping(phys, size))
return dev_addr;
@@ -575,12 +532,12 @@ EXPORT_SYMBOL_GPL(swiotlb_map_page);
static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
size_t size, int dir)
{
- char *dma_addr = swiotlb_bus_to_virt(hwdev, dev_addr);
+ phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
BUG_ON(dir == DMA_NONE);
if (is_swiotlb_buffer(dev_addr)) {
- do_unmap_single(hwdev, dma_addr, size, dir);
+ do_unmap_single(hwdev, phys_to_virt(paddr), size, dir);
return;
}
@@ -609,12 +566,12 @@ void
swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
size_t size, enum dma_data_direction dir)
{
- char *dma_addr = swiotlb_bus_to_virt(hwdev, dev_addr);
+ phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
BUG_ON(dir == DMA_NONE);
if (is_swiotlb_buffer(dev_addr))
- sync_single(hwdev, dma_addr, size, dir);
+ sync_single(hwdev, phys_to_virt(paddr), size, dir);
}
EXPORT_SYMBOL(swiotlb_sync_single_for_cpu);
@@ -622,12 +579,12 @@ void
swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
size_t size, enum dma_data_direction dir)
{
- char *dma_addr = swiotlb_bus_to_virt(hwdev, dev_addr);
+ phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
BUG_ON(dir == DMA_NONE);
if (is_swiotlb_buffer(dev_addr))
- sync_single(hwdev, dma_addr, size, dir);
+ sync_single(hwdev, phys_to_virt(paddr), size, dir);
}
EXPORT_SYMBOL(swiotlb_sync_single_for_device);
@@ -680,8 +637,8 @@ swiotlb_map_sg_attrs(struct device *hwde
phys_addr_t paddr = page_to_pseudophys(sg_page(sg))
+ sg->offset;
- if (range_needs_mapping(paddr, sg->length)
- || address_needs_mapping(hwdev, dev_addr, sg->length)) {
+ if (range_needs_mapping(paddr, sg->length) ||
+ !dma_capable(hwdev, dev_addr, sg->length)) {
void *map;
gnttab_dma_unmap_page(dev_addr);