You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
qubes-linux-kernel/patches.xen/xen-x86_64-pgd-alloc-order

338 lines
11 KiB

From: jbeulich@novell.com
Subject: don't require order-1 allocations for pgd-s
Patch-mainline: n/a
At the same time remove the useless user mode pair of init_level4_pgt.
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/hypervisor.h 2010-11-23 16:31:40.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/hypervisor.h 2011-02-03 14:42:36.000000000 +0100
@@ -106,8 +106,8 @@ void do_hypervisor_callback(struct pt_re
* be MACHINE addresses.
*/
-void xen_pt_switch(unsigned long ptr);
-void xen_new_user_pt(unsigned long ptr); /* x86_64 only */
+void xen_pt_switch(pgd_t *);
+void xen_new_user_pt(pgd_t *); /* x86_64 only */
void xen_load_gs(unsigned int selector); /* x86_64 only */
void xen_tlb_flush(void);
void xen_invlpg(unsigned long ptr);
@@ -115,7 +115,7 @@ void xen_invlpg(unsigned long ptr);
void xen_l1_entry_update(pte_t *ptr, pte_t val);
void xen_l2_entry_update(pmd_t *ptr, pmd_t val);
void xen_l3_entry_update(pud_t *ptr, pud_t val); /* x86_64/PAE */
-void xen_l4_entry_update(pgd_t *ptr, int user, pgd_t val); /* x86_64 only */
+void xen_l4_entry_update(pgd_t *ptr, pgd_t val); /* x86_64 only */
void xen_pgd_pin(pgd_t *);
void xen_pgd_unpin(pgd_t *);
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/mmu_context.h 2011-02-08 10:25:49.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/mmu_context.h 2011-02-08 10:46:27.000000000 +0100
@@ -82,6 +82,9 @@ static inline void switch_mm(struct mm_s
{
unsigned cpu = smp_processor_id();
struct mmuext_op _op[2 + (sizeof(long) > 4)], *op = _op;
+#ifdef CONFIG_X86_64
+ pgd_t *upgd;
+#endif
if (likely(prev != next)) {
BUG_ON(!xen_feature(XENFEAT_writable_page_tables) &&
@@ -98,10 +101,11 @@ static inline void switch_mm(struct mm_s
op->arg1.mfn = virt_to_mfn(next->pgd);
op++;
- /* xen_new_user_pt(__pa(__user_pgd(next->pgd))) */
+ /* xen_new_user_pt(next->pgd) */
#ifdef CONFIG_X86_64
op->cmd = MMUEXT_NEW_USER_BASEPTR;
- op->arg1.mfn = virt_to_mfn(__user_pgd(next->pgd));
+ upgd = __user_pgd(next->pgd);
+ op->arg1.mfn = likely(upgd) ? virt_to_mfn(upgd) : 0;
op++;
#endif
@@ -132,7 +136,7 @@ static inline void switch_mm(struct mm_s
* to make sure to use no freed page tables.
*/
load_cr3(next->pgd);
- xen_new_user_pt(__pa(__user_pgd(next->pgd)));
+ xen_new_user_pt(next->pgd);
load_LDT_nolock(&next->context);
}
}
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/pgalloc.h 2011-02-03 14:41:13.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/pgalloc.h 2011-02-03 14:42:36.000000000 +0100
@@ -123,15 +123,13 @@ static inline void pud_populate(struct m
#endif /* CONFIG_X86_PAE */
#if PAGETABLE_LEVELS > 3
-#define __user_pgd(pgd) ((pgd) + PTRS_PER_PGD)
-
static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud)
{
pgd_t ent = __pgd(_PAGE_TABLE | __pa(pud));
paravirt_alloc_pud(mm, __pa(pud) >> PAGE_SHIFT);
if (unlikely(PagePinned(virt_to_page(pgd))))
- xen_l4_entry_update(pgd, 1, ent);
+ xen_l4_entry_update(pgd, ent);
else
*__user_pgd(pgd) = *pgd = ent;
}
--- head-2011-03-17.orig/arch/x86/include/mach-xen/asm/pgtable_64.h 2011-02-03 14:42:15.000000000 +0100
+++ head-2011-03-17/arch/x86/include/mach-xen/asm/pgtable_64.h 2011-02-03 14:42:36.000000000 +0100
@@ -111,18 +111,25 @@ static inline void xen_set_pud(pud_t *pu
: (void)(*__pudp = xen_make_pud(0)); \
})
-#define __user_pgd(pgd) ((pgd) + PTRS_PER_PGD)
+static inline pgd_t *__user_pgd(pgd_t *pgd)
+{
+ if (unlikely(((unsigned long)pgd & PAGE_MASK)
+ == (unsigned long)init_level4_pgt))
+ return NULL;
+ return (pgd_t *)(virt_to_page(pgd)->private
+ + ((unsigned long)pgd & ~PAGE_MASK));
+}
static inline void xen_set_pgd(pgd_t *pgdp, pgd_t pgd)
{
- xen_l4_entry_update(pgdp, 0, pgd);
+ xen_l4_entry_update(pgdp, pgd);
}
#define xen_pgd_clear(pgd) \
({ \
pgd_t *__pgdp = (pgd); \
PagePinned(virt_to_page(__pgdp)) \
- ? xen_l4_entry_update(__pgdp, 1, xen_make_pgd(0)) \
+ ? xen_l4_entry_update(__pgdp, xen_make_pgd(0)) \
: (void)(*__user_pgd(__pgdp) = *__pgdp = xen_make_pgd(0)); \
})
--- head-2011-03-17.orig/arch/x86/kernel/cpu/common-xen.c 2011-03-17 14:44:07.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/cpu/common-xen.c 2011-03-17 14:44:15.000000000 +0100
@@ -1064,8 +1064,7 @@ DEFINE_PER_CPU_FIRST(union irq_stack_uni
void xen_switch_pt(void)
{
#ifdef CONFIG_XEN
- xen_pt_switch(__pa_symbol(init_level4_pgt));
- xen_new_user_pt(__pa_symbol(__user_pgd(init_level4_pgt)));
+ xen_pt_switch(init_level4_pgt);
#endif
}
--- head-2011-03-17.orig/arch/x86/kernel/head_64-xen.S 2011-02-01 14:55:46.000000000 +0100
+++ head-2011-03-17/arch/x86/kernel/head_64-xen.S 2011-02-03 14:42:36.000000000 +0100
@@ -56,14 +56,6 @@ ENTRY(name)
__PAGE_ALIGNED_BSS
NEXT_PAGE(init_level4_pgt)
.fill 512,8,0
- /*
- * We update two pgd entries to make kernel and user pgd consistent
- * at pgd_populate(). It can be used for kernel modules. So we place
- * this page here for those cases to avoid memory corruption.
- * We also use this page to establish the initial mapping for the
- * vsyscall area.
- */
- .fill 512,8,0
NEXT_PAGE(level3_kernel_pgt)
.fill 512,8,0
--- head-2011-03-17.orig/arch/x86/mm/hypervisor.c 2010-12-08 10:45:40.000000000 +0100
+++ head-2011-03-17/arch/x86/mm/hypervisor.c 2011-02-03 14:42:36.000000000 +0100
@@ -521,7 +521,7 @@ void xen_l3_entry_update(pud_t *ptr, pud
#endif
#ifdef CONFIG_X86_64
-void xen_l4_entry_update(pgd_t *ptr, int user, pgd_t val)
+void xen_l4_entry_update(pgd_t *ptr, pgd_t val)
{
mmu_update_t u[2];
struct page *page = NULL;
@@ -534,8 +534,11 @@ void xen_l4_entry_update(pgd_t *ptr, int
}
u[0].ptr = virt_to_machine(ptr);
u[0].val = __pgd_val(val);
- if (user) {
- u[1].ptr = virt_to_machine(__user_pgd(ptr));
+ if (((unsigned long)ptr & ~PAGE_MASK)
+ <= pgd_index(TASK_SIZE_MAX) * sizeof(*ptr)) {
+ ptr = __user_pgd(ptr);
+ BUG_ON(!ptr);
+ u[1].ptr = virt_to_machine(ptr);
u[1].val = __pgd_val(val);
do_lN_entry_update(u, 2, page);
} else
@@ -543,21 +546,25 @@ void xen_l4_entry_update(pgd_t *ptr, int
}
#endif /* CONFIG_X86_64 */
-void xen_pt_switch(unsigned long ptr)
+#ifdef CONFIG_X86_64
+void xen_pt_switch(pgd_t *pgd)
{
struct mmuext_op op;
op.cmd = MMUEXT_NEW_BASEPTR;
- op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
+ op.arg1.mfn = virt_to_mfn(pgd);
BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}
-void xen_new_user_pt(unsigned long ptr)
+void xen_new_user_pt(pgd_t *pgd)
{
struct mmuext_op op;
+
+ pgd = __user_pgd(pgd);
op.cmd = MMUEXT_NEW_USER_BASEPTR;
- op.arg1.mfn = pfn_to_mfn(ptr >> PAGE_SHIFT);
+ op.arg1.mfn = pgd ? virt_to_mfn(pgd) : 0;
BUG_ON(HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF) < 0);
}
+#endif
void xen_tlb_flush(void)
{
@@ -634,7 +641,14 @@ void xen_pgd_pin(pgd_t *pgd)
op[0].arg1.mfn = virt_to_mfn(pgd);
#ifdef CONFIG_X86_64
op[1].cmd = op[0].cmd = MMUEXT_PIN_L4_TABLE;
- op[1].arg1.mfn = virt_to_mfn(__user_pgd(pgd));
+ pgd = __user_pgd(pgd);
+ if (pgd)
+ op[1].arg1.mfn = virt_to_mfn(pgd);
+ else {
+ op[1].cmd = MMUEXT_PIN_L3_TABLE;
+ op[1].arg1.mfn = pfn_to_mfn(__pa_symbol(level3_user_pgt)
+ >> PAGE_SHIFT);
+ }
#endif
if (HYPERVISOR_mmuext_op(op, NR_PGD_PIN_OPS, NULL, DOMID_SELF) < 0)
BUG();
@@ -647,8 +661,10 @@ void xen_pgd_unpin(pgd_t *pgd)
op[0].cmd = MMUEXT_UNPIN_TABLE;
op[0].arg1.mfn = virt_to_mfn(pgd);
#ifdef CONFIG_X86_64
+ pgd = __user_pgd(pgd);
+ BUG_ON(!pgd);
op[1].cmd = MMUEXT_UNPIN_TABLE;
- op[1].arg1.mfn = virt_to_mfn(__user_pgd(pgd));
+ op[1].arg1.mfn = virt_to_mfn(pgd);
#endif
if (HYPERVISOR_mmuext_op(op, NR_PGD_PIN_OPS, NULL, DOMID_SELF) < 0)
BUG();
--- head-2011-03-17.orig/arch/x86/mm/init_64-xen.c 2010-11-23 16:31:40.000000000 +0100
+++ head-2011-03-17/arch/x86/mm/init_64-xen.c 2011-02-03 14:42:36.000000000 +0100
@@ -761,9 +761,6 @@ void __init xen_init_pt(void)
(PTRS_PER_PUD - pud_index(__START_KERNEL_map))
* sizeof(*level3_kernel_pgt));
- __user_pgd(init_level4_pgt)[pgd_index(VSYSCALL_START)] =
- __pgd(__pa_symbol(level3_user_pgt) | _PAGE_TABLE);
-
/* Do an early initialization of the fixmap area. */
addr = __fix_to_virt(FIX_EARLYCON_MEM_BASE);
if (pud_present(level3_kernel_pgt[pud_index(addr)])) {
@@ -779,8 +776,6 @@ void __init xen_init_pt(void)
early_make_page_readonly(init_level4_pgt,
XENFEAT_writable_page_tables);
- early_make_page_readonly(__user_pgd(init_level4_pgt),
- XENFEAT_writable_page_tables);
early_make_page_readonly(level3_kernel_pgt,
XENFEAT_writable_page_tables);
early_make_page_readonly(level3_user_pgt,
--- head-2011-03-17.orig/arch/x86/mm/pgtable-xen.c 2010-11-23 16:31:40.000000000 +0100
+++ head-2011-03-17/arch/x86/mm/pgtable-xen.c 2011-03-17 14:35:10.000000000 +0100
@@ -291,9 +291,11 @@ static void pgd_walk(pgd_t *pgd_base, pg
BUG();
seq = 0;
}
+ pgd = __user_pgd(pgd_base);
+ BUG_ON(!pgd);
MULTI_update_va_mapping(mcl + seq,
- (unsigned long)__user_pgd(pgd_base),
- pfn_pte(virt_to_phys(__user_pgd(pgd_base))>>PAGE_SHIFT, flags),
+ (unsigned long)pgd,
+ pfn_pte(virt_to_phys(pgd)>>PAGE_SHIFT, flags),
0);
MULTI_update_va_mapping(mcl + seq + 1,
(unsigned long)pgd_base,
@@ -689,19 +691,37 @@ static void pgd_prepopulate_pmd(struct m
}
}
+static inline pgd_t *user_pgd_alloc(pgd_t *pgd)
+{
#ifdef CONFIG_X86_64
-/* We allocate two contiguous pages for kernel and user. */
-#define PGD_ORDER 1
-#else
-#define PGD_ORDER 0
+ if (pgd) {
+ pgd_t *upgd = (void *)__get_free_page(PGALLOC_GFP);
+
+ if (upgd)
+ set_page_private(virt_to_page(pgd),
+ (unsigned long)upgd);
+ else {
+ free_page((unsigned long)pgd);
+ pgd = NULL;
+ }
+ }
+#endif
+ return pgd;
+}
+
+static inline void user_pgd_free(pgd_t *pgd)
+{
+#ifdef CONFIG_X86_64
+ free_page(page_private(virt_to_page(pgd)));
#endif
+}
pgd_t *pgd_alloc(struct mm_struct *mm)
{
pgd_t *pgd;
pmd_t *pmds[PREALLOCATED_PMDS];
- pgd = (pgd_t *)__get_free_pages(PGALLOC_GFP, PGD_ORDER);
+ pgd = user_pgd_alloc((void *)__get_free_page(PGALLOC_GFP));
if (pgd == NULL)
goto out;
@@ -740,7 +760,8 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
out_free_pmds:
free_pmds(pmds, mm, !xen_feature(XENFEAT_pae_pgdir_above_4gb));
out_free_pgd:
- free_pages((unsigned long)pgd, PGD_ORDER);
+ user_pgd_free(pgd);
+ free_page((unsigned long)pgd);
out:
return NULL;
}
@@ -759,7 +780,8 @@ void pgd_free(struct mm_struct *mm, pgd_
pgd_mop_up_pmds(mm, pgd);
paravirt_pgd_free(mm, pgd);
- free_pages((unsigned long)pgd, PGD_ORDER);
+ user_pgd_free(pgd);
+ free_page((unsigned long)pgd);
}
/* blktap and gntdev need this, as otherwise they would implicitly (and
--- head-2011-03-17.orig/drivers/xen/core/machine_reboot.c 2011-02-03 14:42:15.000000000 +0100
+++ head-2011-03-17/drivers/xen/core/machine_reboot.c 2011-02-03 14:42:36.000000000 +0100
@@ -186,8 +186,7 @@ static int take_machine_down(void *_susp
* in fast-suspend mode as that implies a new enough Xen.
*/
if (!suspend->fast_suspend)
- xen_new_user_pt(__pa(__user_pgd(
- current->active_mm->pgd)));
+ xen_new_user_pt(current->active_mm->pgd);
#endif
}