338 lines
11 KiB
Plaintext
338 lines
11 KiB
Plaintext
From: jbeulich@novell.com
|
|
Subject: don't require order-1 allocations for pgd-s
|
|
Patch-mainline: n/a
|
|
|
|
At the same time remove the useless user mode pair of init_level4_pgt.
|
|
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/hypervisor.h 2010-11-23 16:31:40.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/hypervisor.h 2011-02-03 14:42:36.000000000 +0100
|
|
@@ -106,8 +106,8 @@ void do_hypervisor_callback(struct pt_re
|
|
* be MACHINE addresses.
|
|
*/
|
|
|
|
-void xen_pt_switch(unsigned long ptr);
|
|
-void xen_new_user_pt(unsigned long ptr); /* x86_64 only */
|
|
+void xen_pt_switch(pgd_t *);
|
|
+void xen_new_user_pt(pgd_t *); /* x86_64 only */
|
|
void xen_load_gs(unsigned int selector); /* x86_64 only */
|
|
void xen_tlb_flush(void);
|
|
void xen_invlpg(unsigned long ptr);
|
|
@@ -115,7 +115,7 @@ void xen_invlpg(unsigned long ptr);
|
|
void xen_l1_entry_update(pte_t *ptr, pte_t val);
|
|
void xen_l2_entry_update(pmd_t *ptr, pmd_t val);
|
|
void xen_l3_entry_update(pud_t *ptr, pud_t val); /* x86_64/PAE */
|
|
-void xen_l4_entry_update(pgd_t *ptr, int user, pgd_t val); /* x86_64 only */
|
|
+void xen_l4_entry_update(pgd_t *ptr, pgd_t val); /* x86_64 only */
|
|
void xen_pgd_pin(pgd_t *);
|
|
void xen_pgd_unpin(pgd_t *);
|
|
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/mmu_context.h 2011-02-08 10:25:49.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/mmu_context.h 2011-02-08 10:46:27.000000000 +0100
|
|
@@ -82,6 +82,9 @@ static inline void switch_mm(struct mm_s
|
|
{
|
|
unsigned cpu = smp_processor_id();
|
|
struct mmuext_op _op[2 + (sizeof(long) > 4)], *op = _op;
|
|
+#ifdef CONFIG_X86_64
|
|
+ pgd_t *upgd;
|
|
+#endif
|
|
|
|
if (likely(prev != next)) {
|
|
BUG_ON(!xen_feature(XENFEAT_writable_page_tables) &&
|
|
@@ -98,10 +101,11 @@ static inline void switch_mm(struct mm_s
|
|
op->arg1.mfn = virt_to_mfn(next->pgd);
|
|
op++;
|
|
|
|
- /* xen_new_user_pt(__pa(__user_pgd(next->pgd))) */
|
|
+ /* xen_new_user_pt(next->pgd) */
|
|
#ifdef CONFIG_X86_64
|
|
op->cmd = MMUEXT_NEW_USER_BASEPTR;
|
|
- op->arg1.mfn = virt_to_mfn(__user_pgd(next->pgd));
|
|
+ upgd = __user_pgd(next->pgd);
|
|
+ op->arg1.mfn = likely(upgd) ? virt_to_mfn(upgd) : 0;
|
|
op++;
|
|
#endif
|
|
|
|
@@ -132,7 +136,7 @@ static inline void switch_mm(struct mm_s
|
|
* to make sure to use no freed page tables.
|
|
*/
|
|
load_cr3(next->pgd);
|
|
- xen_new_user_pt(__pa(__user_pgd(next->pgd)));
|
|
+ xen_new_user_pt(next->pgd);
|
|
load_LDT_nolock(&next->context);
|
|
}
|
|
}
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/pgalloc.h 2011-02-03 14:41:13.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/pgalloc.h 2011-02-03 14:42:36.000000000 +0100
|
|
@@ -123,15 +123,13 @@ static inline void pud_populate(struct m
|
|
#endif /* CONFIG_X86_PAE */
|
|
|
|
#if PAGETABLE_LEVELS > 3
|
|
-#define __user_pgd(pgd) ((pgd) + PTRS_PER_PGD)
|
|
-
|
|
static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
|
|
{
|
|
pgd_t ent = __pgd(_PAGE_TABLE | __pa(pud));
|
|
|
|
paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT);
|
|
if (unlikely(PagePinned(virt_to_page(pgd))))
|
|
- xen_l4_entry_update(pgd, 1, ent);
|
|
+ xen_l4_entry_update(pgd, ent);
|
|
else
|
|
*__user_pgd(pgd) = *pgd = ent;
|
|
}
|
|
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/pgtable_64.h 2011-02-03 14:42:15.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/pgtable_64.h 2011-02-03 14:42:36.000000000 +0100
|
|
@@ -111,18 +111,25 @@ static inline void xen_set_pud(pud_t *pu
|
|
: (void)(*__pudp = xen_make_pud(0)); \
|
|
})
|
|
|
|
-#define __user_pgd(pgd) ((pgd) + PTRS_PER_PGD)
|
|
+static inline pgd_t *__user_pgd(pgd_t *pgd)
|
|
+{
|
|
+ if (unlikely(((unsigned long)pgd & PAGE_MASK)
|
|
+ == (unsigned long)init_level4_pgt))
|
|
+ return NULL;
|
|
+ return (pgd_t *)(virt_to_page(pgd)->private
|
|
+ + ((unsigned long)pgd & ~PAGE_MASK));
|
|
+}
|
|
|
|
static inline void xen_set_pgd(pgd_t *pgdp, pgd_t pgd)
|
|
{
|
|
- xen_l4_entry_update(pgdp, 0, pgd);
|
|
+ xen_l4_entry_update(pgdp, pgd);
|
|
}
|
|
|
|
#define xen_pgd_clear(pgd) \
|
|
({ \
|
|
pgd_t *__pgdp = (pgd); \
|
|
PagePinned(virt_to_page(__pgdp)) \
|
|
- ? xen_l4_entry_update(__pgdp, 1, xen_make_pgd(0)) \
|
|
+ ? xen_l4_entry_update(__pgdp, xen_make_pgd(0)) \
|
|
: (void)(*__user_pgd(__pgdp) = *__pgdp = xen_make_pgd(0)); \
|
|
})
|
|
|
|
--- head-2011-03-17.orig/arch/x86/kernel/cpu/common-xen.c 2011-03-17 14:44:07.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/cpu/common-xen.c 2011-03-17 14:44:15.000000000 +0100
|
|
@@ -1064,8 +1064,7 @@ DEFINE_PER_CPU_FIRST(union irq_stack_uni
|
|
void xen_switch_pt(void)
|
|
{
|
|
#ifdef CONFIG_XEN
|
|
- xen_pt_switch(__pa_symbol(init_level4_pgt));
|
|
- xen_new_user_pt(__pa_symbol(__user_pgd(init_level4_pgt)));
|
|
+ xen_pt_switch(init_level4_pgt);
|
|
#endif
|
|
}
|
|
|
|
--- head-2011-03-17.orig/arch/x86/kernel/head_64-xen.S 2011-02-01 14:55:46.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/kernel/head_64-xen.S 2011-02-03 14:42:36.000000000 +0100
|
|
@@ -56,14 +56,6 @@ ENTRY(name)
|
|
__PAGE_ALIGNED_BSS
|
|
NEXT_PAGE(init_level4_pgt)
|
|
.fill 512,8,0
|
|
- /*
|
|
- * We update two pgd entries to make kernel and user pgd consistent
|
|
- * at pgd_populate(). It can be used for kernel modules. So we place
|
|
- * this page here for those cases to avoid memory corruption.
|
|
- * We also use this page to establish the initial mapping for the
|
|
- * vsyscall area.
|
|
- */
|
|
- .fill 512,8,0
|
|
|
|
NEXT_PAGE(level3_kernel_pgt)
|
|
.fill 512,8,0
|
|
--- head-2011-03-17.orig/arch/x86/mm/hypervisor.c 2010-12-08 10:45:40.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/mm/hypervisor.c 2011-02-03 14:42:36.000000000 +0100
|
|
@@ -521,7 +521,7 @@ void xen_l3_entry_update(pud_t *ptr, pud
|
|
#endif
|
|
|
|
#ifdef CONFIG_X86_64
|
|
-void xen_l4_entry_update(pgd_t *ptr, int user, pgd_t val)
|
|
+void xen_l4_entry_update(pgd_t *ptr, pgd_t val)
|
|
{
|
|
mmu_update_t u[2];
|
|
struct page *page = NULL;
|
|
@@ -534,8 +534,11 @@ void xen_l4_entry_update(pgd_t *ptr, int
|
|
}
|
|
u[0].ptr = virt_to_machine(ptr);
|
|
u[0].val = __pgd_val(val);
|
|
- if (user) {
|
|
- u[1].ptr = virt_to_machine(__user_pgd(ptr));
|
|
+ if (((unsigned long)ptr & ~PAGE_MASK)
|
|
+ <= pgd_index(TASK_SIZE_MAX) * sizeof(*ptr)) {
|
|
+ ptr = __user_pgd(ptr);
|
|
+ BUG_ON(!ptr);
|
|
+ u[1].ptr = virt_to_machine(ptr);
|
|
u[1].val = __pgd_val(val);
|
|
do_lN_entry_update(u, 2, page);
|
|
} else
|
|
@@ -543,21 +546,25 @@ void xen_l4_entry_update(pgd_t *ptr, int
|
|
}
|
|
#endif /* CONFIG_X86_64 */
|
|
|
|
-void xen_pt_switch(unsigned long ptr)
|
|
+#ifdef CONFIG_X86_64
|
|
+void xen_pt_switch(pgd_t *pgd)
|
|
{
|
|
struct mmuext_op op;
|
|
op.cmd = MMUEXT_NEW_BASEPTR;
|
|
- op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
|
|
+ op.arg1.mfn = virt_to_mfn(pgd);
|
|
BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
|
|
}
|
|
|
|
-void xen_new_user_pt(unsigned long ptr)
|
|
+void xen_new_user_pt(pgd_t *pgd)
|
|
{
|
|
struct mmuext_op op;
|
|
+
|
|
+ pgd = __user_pgd(pgd);
|
|
op.cmd = MMUEXT_NEW_USER_BASEPTR;
|
|
- op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
|
|
+ op.arg1.mfn = pgd ? virt_to_mfn(pgd) : 0;
|
|
BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
|
|
}
|
|
+#endif
|
|
|
|
void xen_tlb_flush(void)
|
|
{
|
|
@@ -634,7 +641,14 @@ void xen_pgd_pin(pgd_t *pgd)
|
|
op[0].arg1.mfn = virt_to_mfn(pgd);
|
|
#ifdef CONFIG_X86_64
|
|
op[1].cmd = op[0].cmd = MMUEXT_PIN_L4_TABLE;
|
|
- op[1].arg1.mfn = virt_to_mfn(__user_pgd(pgd));
|
|
+ pgd = __user_pgd(pgd);
|
|
+ if (pgd)
|
|
+ op[1].arg1.mfn = virt_to_mfn(pgd);
|
|
+ else {
|
|
+ op[1].cmd = MMUEXT_PIN_L3_TABLE;
|
|
+ op[1].arg1.mfn = pfn_to_mfn(__pa_symbol(level3_user_pgt)
|
|
+ >> PAGE_SHIFT);
|
|
+ }
|
|
#endif
|
|
if (HYPERVISOR_mmuext_op(op, NR_PGD_PIN_OPS, NULL, DOMID_SELF) < 0)
|
|
BUG();
|
|
@@ -647,8 +661,10 @@ void xen_pgd_unpin(pgd_t *pgd)
|
|
op[0].cmd = MMUEXT_UNPIN_TABLE;
|
|
op[0].arg1.mfn = virt_to_mfn(pgd);
|
|
#ifdef CONFIG_X86_64
|
|
+ pgd = __user_pgd(pgd);
|
|
+ BUG_ON(!pgd);
|
|
op[1].cmd = MMUEXT_UNPIN_TABLE;
|
|
- op[1].arg1.mfn = virt_to_mfn(__user_pgd(pgd));
|
|
+ op[1].arg1.mfn = virt_to_mfn(pgd);
|
|
#endif
|
|
if (HYPERVISOR_mmuext_op(op, NR_PGD_PIN_OPS, NULL, DOMID_SELF) < 0)
|
|
BUG();
|
|
--- head-2011-03-17.orig/arch/x86/mm/init_64-xen.c 2010-11-23 16:31:40.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/mm/init_64-xen.c 2011-02-03 14:42:36.000000000 +0100
|
|
@@ -761,9 +761,6 @@ void __init xen_init_pt(void)
|
|
(PTRS_PER_PUD - pud_index(__START_KERNEL_map))
|
|
* sizeof(*level3_kernel_pgt));
|
|
|
|
- __user_pgd(init_level4_pgt)[pgd_index(VSYSCALL_START)] =
|
|
- __pgd(__pa_symbol(level3_user_pgt) | _PAGE_TABLE);
|
|
-
|
|
/* Do an early initialization of the fixmap area. */
|
|
addr = __fix_to_virt(FIX_EARLYCON_MEM_BASE);
|
|
if (pud_present(level3_kernel_pgt[pud_index(addr)])) {
|
|
@@ -779,8 +776,6 @@ void __init xen_init_pt(void)
|
|
|
|
early_make_page_readonly(init_level4_pgt,
|
|
XENFEAT_writable_page_tables);
|
|
- early_make_page_readonly(__user_pgd(init_level4_pgt),
|
|
- XENFEAT_writable_page_tables);
|
|
early_make_page_readonly(level3_kernel_pgt,
|
|
XENFEAT_writable_page_tables);
|
|
early_make_page_readonly(level3_user_pgt,
|
|
--- head-2011-03-17.orig/arch/x86/mm/pgtable-xen.c 2010-11-23 16:31:40.000000000 +0100
|
|
+++ head-2011-03-17/arch/x86/mm/pgtable-xen.c 2011-03-17 14:35:10.000000000 +0100
|
|
@@ -291,9 +291,11 @@ static void pgd_walk(pgd_t *pgd_base, pg
|
|
BUG();
|
|
seq = 0;
|
|
}
|
|
+ pgd = __user_pgd(pgd_base);
|
|
+ BUG_ON(!pgd);
|
|
MULTI_update_va_mapping(mcl + seq,
|
|
- (unsigned long)__user_pgd(pgd_base),
|
|
- pfn_pte(virt_to_phys(__user_pgd(pgd_base))>>PAGE_SHIFT, flags),
|
|
+ (unsigned long)pgd,
|
|
+ pfn_pte(virt_to_phys(pgd)>>PAGE_SHIFT, flags),
|
|
0);
|
|
MULTI_update_va_mapping(mcl + seq + 1,
|
|
(unsigned long)pgd_base,
|
|
@@ -689,19 +691,37 @@ static void pgd_prepopulate_pmd(struct m
|
|
}
|
|
}
|
|
|
|
+static inline pgd_t *user_pgd_alloc(pgd_t *pgd)
|
|
+{
|
|
#ifdef CONFIG_X86_64
|
|
-/* We allocate two contiguous pages for kernel and user. */
|
|
-#define PGD_ORDER 1
|
|
-#else
|
|
-#define PGD_ORDER 0
|
|
+ if (pgd) {
|
|
+ pgd_t *upgd = (void *)__get_free_page(PGALLOC_GFP);
|
|
+
|
|
+ if (upgd)
|
|
+ set_page_private(virt_to_page(pgd),
|
|
+ (unsigned long)upgd);
|
|
+ else {
|
|
+ free_page((unsigned long)pgd);
|
|
+ pgd = NULL;
|
|
+ }
|
|
+ }
|
|
+#endif
|
|
+ return pgd;
|
|
+}
|
|
+
|
|
+static inline void user_pgd_free(pgd_t *pgd)
|
|
+{
|
|
+#ifdef CONFIG_X86_64
|
|
+ free_page(page_private(virt_to_page(pgd)));
|
|
#endif
|
|
+}
|
|
|
|
pgd_t *pgd_alloc(struct mm_struct *mm)
|
|
{
|
|
pgd_t *pgd;
|
|
pmd_t *pmds[PREALLOCATED_PMDS];
|
|
|
|
- pgd = (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ORDER);
|
|
+ pgd = user_pgd_alloc((void *)__get_free_page(PGALLOC_GFP));
|
|
|
|
if (pgd == NULL)
|
|
goto out;
|
|
@@ -740,7 +760,8 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
|
|
out_free_pmds:
|
|
free_pmds(pmds, mm, !xen_feature(XENFEAT_pae_pgdir_above_4gb));
|
|
out_free_pgd:
|
|
- free_pages((unsigned long)pgd, PGD_ORDER);
|
|
+ user_pgd_free(pgd);
|
|
+ free_page((unsigned long)pgd);
|
|
out:
|
|
return NULL;
|
|
}
|
|
@@ -759,7 +780,8 @@ void pgd_free(struct mm_struct *mm, pgd_
|
|
|
|
pgd_mop_up_pmds(mm, pgd);
|
|
paravirt_pgd_free(mm, pgd);
|
|
- free_pages((unsigned long)pgd, PGD_ORDER);
|
|
+ user_pgd_free(pgd);
|
|
+ free_page((unsigned long)pgd);
|
|
}
|
|
|
|
/* blktap and gntdev need this, as otherwise they would implicitly (and
|
|
--- head-2011-03-17.orig/drivers/xen/core/machine_reboot.c 2011-02-03 14:42:15.000000000 +0100
|
|
+++ head-2011-03-17/drivers/xen/core/machine_reboot.c 2011-02-03 14:42:36.000000000 +0100
|
|
@@ -186,8 +186,7 @@ static int take_machine_down(void *_susp
|
|
* in fast-suspend mode as that implies a new enough Xen.
|
|
*/
|
|
if (!suspend->fast_suspend)
|
|
- xen_new_user_pt(__pa(__user_pgd(
|
|
- current->active_mm->pgd)));
|
|
+ xen_new_user_pt(current->active_mm->pgd);
|
|
#endif
|
|
}
|
|
|