diff --git a/patches.xen/pvops-3.4-Revert-xen-pat-Disable-PAT-support-for-now.patch b/patches.xen/pvops-3.4-Revert-xen-pat-Disable-PAT-support-for-now.patch new file mode 100644 index 0000000..52a33e2 --- /dev/null +++ b/patches.xen/pvops-3.4-Revert-xen-pat-Disable-PAT-support-for-now.patch @@ -0,0 +1,72 @@ +From 433928d3823f561919ead305194e46e5311b573d Mon Sep 17 00:00:00 2001 +From: Marek Marczykowski +Date: Sat, 23 Jun 2012 19:50:44 +0200 +Subject: [PATCH 1/2] Revert "xen/pat: Disable PAT support for now." +Organization: Invisible Things Lab + +This reverts commit 8eaffa67b43e99ae581622c5133e20b0f48bcef1. + +We haven't observed failure which is workarounded by this patch, but it caused +horrible GPU performance. Anyway there is "nopat" option. + +Signed-off-by: Marek Marczykowski +--- + arch/x86/xen/enlighten.c | 2 -- + arch/x86/xen/mmu.c | 8 ++++---- + 2 files changed, 4 insertions(+), 6 deletions(-) + +diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c +index 6c7f1e8..bf3319c 100644 +--- a/arch/x86/xen/enlighten.c ++++ b/arch/x86/xen/enlighten.c +@@ -1269,9 +1269,7 @@ asmlinkage void __init xen_start_kernel(void) + + /* Prevent unwanted bits from being set in PTEs. */ + __supported_pte_mask &= ~_PAGE_GLOBAL; +-#if 0 + if (!xen_initial_domain()) +-#endif + __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); + + __supported_pte_mask |= _PAGE_IOMAP; +diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c +index 69f5857..a5d252a 100644 +--- a/arch/x86/xen/mmu.c ++++ b/arch/x86/xen/mmu.c +@@ -420,13 +420,13 @@ static pteval_t iomap_pte(pteval_t val) + static pteval_t xen_pte_val(pte_t pte) + { + pteval_t pteval = pte.pte; +-#if 0 ++ + /* If this is a WC pte, convert back from Xen WC to Linux WC */ + if ((pteval & (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT)) == _PAGE_PAT) { + WARN_ON(!pat_enabled); + pteval = (pteval & ~_PAGE_PAT) | _PAGE_PWT; + } +-#endif ++ + if (xen_initial_domain() && (pteval & _PAGE_IOMAP)) + return pteval; + +@@ -468,7 +468,7 @@ void xen_set_pat(u64 pat) + static pte_t xen_make_pte(pteval_t pte) + { + phys_addr_t addr = (pte & PTE_PFN_MASK); +-#if 0 ++ + /* If Linux is trying to set a WC pte, then map to the Xen WC. + * If _PAGE_PAT is set, then it probably means it is really + * _PAGE_PSE, so avoid fiddling with the PAT mapping and hope +@@ -481,7 +481,7 @@ static pte_t xen_make_pte(pteval_t pte) + if ((pte & (_PAGE_PCD | _PAGE_PWT)) == _PAGE_PWT) + pte = (pte & ~(_PAGE_PCD | _PAGE_PWT)) | _PAGE_PAT; + } +-#endif ++ + /* + * Unprivileged domains are allowed to do IOMAPpings for + * PCI passthrough, but not map ISA space. The ISA +-- +1.7.4.4 + diff --git a/patches.xen/pvops-3.4-x86-cpa-Use-pte_attrs-instead-of-pte_flags-on-CPA-se.patch b/patches.xen/pvops-3.4-x86-cpa-Use-pte_attrs-instead-of-pte_flags-on-CPA-se.patch new file mode 100644 index 0000000..52cb5eb --- /dev/null +++ b/patches.xen/pvops-3.4-x86-cpa-Use-pte_attrs-instead-of-pte_flags-on-CPA-se.patch @@ -0,0 +1,196 @@ +From f37a97dead89d07bce4d8fedc4c295c9bc700ab5 Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Fri, 4 Nov 2011 11:59:34 -0400 +Subject: [PATCH 2/2] x86/cpa: Use pte_attrs instead of pte_flags on + CPA/set_p.._wb/wc operations. + +When using the paravirt interface, most of the page operations are wrapped +in the pvops interface. The one that is not is the pte_flags. The reason +being that for most cases, the "raw" PTE flag values for baremetal and whatever +pvops platform is running (in this case) - share the same bit meaning. + +Except for PAT. Under Linux, the PAT MSR is written to be: + + PAT4 PAT0 ++---+----+----+----+-----+----+----+ + WC | WC | WB | UC | UC- | WC | WB | <= Linux ++---+----+----+----+-----+----+----+ + WC | WT | WB | UC | UC- | WT | WB | <= BIOS ++---+----+----+----+-----+----+----+ + WC | WP | WC | UC | UC- | WT | WB | <= Xen ++---+----+----+----+-----+----+----+ + +The lookup of this index table translates to looking up +Bit 7, Bit 4, and Bit 3 of PTE: + + PAT/PSE (bit 7) ... PCD (bit 4) .. PWT (bit 3). + +If all bits are off, then we are using PAT0. If bit 3 turned on, +then we are using PAT1, if bit 3 and bit 4, then PAT2.. + +Back to the PAT MSR table: + +As you can see, the PAT1 translates to PAT4 under Xen. Under Linux +we only use PAT0, PAT1, and PAT2 for the caching as: + + WB = none (so PAT0) + WC = PWT (bit 3 on) + UC = PWT | PCD (bit 3 and 4 are on). + +But to make it work with Xen, we end up doing for WC a translation: + + PWT (so bit 3 on) --> PAT (so bit 7 is on) and clear bit 3 + +And to translate back (when the paravirt pte_val is used) we would: + + PAT (bit 7 on) --> PWT (bit 3 on) and clear bit 7. + +This works quite well, except if code uses the pte_flags, as pte_flags +reads the raw value and does not go through the paravirt. Which means +that if (when running under Xen): + + 1) we allocate some pages. + 2) call set_pages_array_wc, which ends up calling: + __page_change_att_set_clr(.., __pgprot(__PAGE_WC), /* set */ + , __pgprot(__PAGE_MASK), /* clear */ + which ends up reading the _raw_ PTE flags and _only_ look at the + _PTE_FLAG_MASK contents with __PAGE_MASK cleared (0x18) and + __PAGE_WC (0x8) set. + + read raw *pte -> 0x67 + *pte = 0x67 & ^0x18 | 0x8 + *pte = 0x67 & 0xfffffe7 | 0x8 + *pte = 0x6f + + [now set_pte_atomic is called, and 0x6f is written in, but under + xen_make_pte, the bit 3 is translated to bit 7, so it ends up + writting 0xa7, which is correct] + + 3) do something to them. + 4) call set_pages_array_wb + __page_change_att_set_clr(.., __pgprot(__PAGE_WB), /* set */ + , __pgprot(__PAGE_MASK), /* clear */ + which ends up reading the _raw_ PTE and _only_ look at the + _PTE_FLAG_MASK contents with _PAGE_MASK cleared (0x18) and + __PAGE_WB (0x0) set: + + read raw *pte -> 0xa7 + *pte = 0xa7 & &0x18 | 0 + *pte = 0xa7 & 0xfffffe7 | 0 + *pte = 0xa7 + + [we check whether the old PTE is different from the new one + + if (pte_val(old_pte) != pte_val(new_pte)) { + set_pte_atomic(kpte, new_pte); + ... + + and find out that 0xA7 == 0xA7 so we do not write the new PTE value in] + + End result is that we failed at removing the WC caching bit! + + 5) free them. + [and have pages with PAT4 (bit 7) set, so other subsystems end up using + the pages that have the write combined bit set resulting in crashes. Yikes!]. + +The fix, which this patch proposes, is to wrap the pte_pgprot in the CPA +code with newly introduced pte_attrs which can go through the pvops interface +to get the "emulated" value instead of the raw. Naturally if CONFIG_PARAVIRT is +not set, it would end calling native_pte_val. + +The other way to fix this is by wrapping pte_flags and go through the pvops +interface and it really is the Right Thing to do. The problem is, that past +experience with mprotect stuff demonstrates that it be really expensive in inner +loops, and pte_flags() is used in some very perf-critical areas. + +Example code to run this and see the various mysterious subsystems/applications +crashing + +MODULE_AUTHOR("Konrad Rzeszutek Wilk "); +MODULE_DESCRIPTION("wb_to_wc_and_back"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(WB_TO_WC); + +static int thread(void *arg) +{ + struct page *a[MAX_PAGES]; + unsigned int i, j; + do { + for (j = 0, i = 0;i < MAX_PAGES; i++, j++) { + a[i] = alloc_page(GFP_KERNEL); + if (!a[i]) + break; + } + set_pages_array_wc(a, j); + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout_interruptible(HZ); + for (i = 0; i < j; i++) { + unsigned long *addr = page_address(a[i]); + if (addr) { + memset(addr, 0xc2, PAGE_SIZE); + } + } + set_pages_array_wb(a, j); + for (i = 0; i< MAX_PAGES; i++) { + if (a[i]) + __free_page(a[i]); + a[i] = NULL; + } + } while (!kthread_should_stop()); + return 0; +} +static struct task_struct *t; +static int __init wb_to_wc_init(void) +{ + t = kthread_run(thread, NULL, "wb_to_wc_and_back"); + return 0; +} +static void __exit wb_to_wc_exit(void) +{ + if (t) + kthread_stop(t); +} +module_init(wb_to_wc_init); +module_exit(wb_to_wc_exit); + +This fixes RH BZ #742032, #787403, and #745574 +Signed-off-by: Konrad Rzeszutek Wilk +Tested-by: Tom Goetz +CC: stable@kernel.org +--- + arch/x86/include/asm/pgtable.h | 5 +++++ + arch/x86/mm/pageattr.c | 2 +- + 2 files changed, 6 insertions(+), 1 deletions(-) + +diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h +index 49afb3f..fa7bd2c 100644 +--- a/arch/x86/include/asm/pgtable.h ++++ b/arch/x86/include/asm/pgtable.h +@@ -349,6 +349,11 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) + return __pgprot(preservebits | addbits); + } + ++static inline pgprot_t pte_attrs(pte_t pte) ++{ ++ return __pgprot(pte_val(pte) & PTE_FLAGS_MASK); ++} ++ + #define pte_pgprot(x) __pgprot(pte_flags(x) & PTE_FLAGS_MASK) + + #define canon_pgprot(p) __pgprot(massage_pgprot(p)) +diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c +index e1ebde3..1ae1b4b 100644 +--- a/arch/x86/mm/pageattr.c ++++ b/arch/x86/mm/pageattr.c +@@ -651,7 +651,7 @@ repeat: + + if (level == PG_LEVEL_4K) { + pte_t new_pte; +- pgprot_t new_prot = pte_pgprot(old_pte); ++ pgprot_t new_prot = pte_attrs(old_pte); + unsigned long pfn = pte_pfn(old_pte); + + pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr); +-- +1.7.4.4 + diff --git a/series-pvops.conf b/series-pvops.conf index ecaa364..6aa9887 100644 --- a/series-pvops.conf +++ b/series-pvops.conf @@ -11,6 +11,10 @@ patches.xen/pvops-xen-setup-filter-APERFMPERF-cpuid-feature-out.patch patches.xen/pvops-3.4-enable-netfront-in-dom0.patch patches.xen/pvops-netback-calculate-correctly-the-SKB-slots.patch +# fix for GPU performance (revert workaround and apply proper fix), should go in 3.5 +patches.xen/pvops-3.4-Revert-xen-pat-Disable-PAT-support-for-now.patch +patches.xen/pvops-3.4-x86-cpa-Use-pte_attrs-instead-of-pte_flags-on-CPA-se.patch + # Additional features patches.xen/pvops-3.4-0100-usb-xen-pvusb-driver.patch patches.xen/pvops-blkfront-removable-flag.patch