pvops: fix for GPU performance
Details here: http://groups.google.com/group/qubes-devel/t/33256d29a29fa883
This commit is contained in:
parent
b5606021c9
commit
394e2aaef1
@ -0,0 +1,72 @@
|
||||
From 433928d3823f561919ead305194e46e5311b573d Mon Sep 17 00:00:00 2001
|
||||
From: Marek Marczykowski <marmarek@invisiblethingslab.com>
|
||||
Date: Sat, 23 Jun 2012 19:50:44 +0200
|
||||
Subject: [PATCH 1/2] Revert "xen/pat: Disable PAT support for now."
|
||||
Organization: Invisible Things Lab
|
||||
|
||||
This reverts commit 8eaffa67b43e99ae581622c5133e20b0f48bcef1.
|
||||
|
||||
We haven't observed failure which is workarounded by this patch, but it caused
|
||||
horrible GPU performance. Anyway there is "nopat" option.
|
||||
|
||||
Signed-off-by: Marek Marczykowski <marmarek@invisiblethingslab.com>
|
||||
---
|
||||
arch/x86/xen/enlighten.c | 2 --
|
||||
arch/x86/xen/mmu.c | 8 ++++----
|
||||
2 files changed, 4 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
|
||||
index 6c7f1e8..bf3319c 100644
|
||||
--- a/arch/x86/xen/enlighten.c
|
||||
+++ b/arch/x86/xen/enlighten.c
|
||||
@@ -1269,9 +1269,7 @@ asmlinkage void __init xen_start_kernel(void)
|
||||
|
||||
/* Prevent unwanted bits from being set in PTEs. */
|
||||
__supported_pte_mask &= ~_PAGE_GLOBAL;
|
||||
-#if 0
|
||||
if (!xen_initial_domain())
|
||||
-#endif
|
||||
__supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD);
|
||||
|
||||
__supported_pte_mask |= _PAGE_IOMAP;
|
||||
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
|
||||
index 69f5857..a5d252a 100644
|
||||
--- a/arch/x86/xen/mmu.c
|
||||
+++ b/arch/x86/xen/mmu.c
|
||||
@@ -420,13 +420,13 @@ static pteval_t iomap_pte(pteval_t val)
|
||||
static pteval_t xen_pte_val(pte_t pte)
|
||||
{
|
||||
pteval_t pteval = pte.pte;
|
||||
-#if 0
|
||||
+
|
||||
/* If this is a WC pte, convert back from Xen WC to Linux WC */
|
||||
if ((pteval & (_PAGE_PAT | _PAGE_PCD | _PAGE_PWT)) == _PAGE_PAT) {
|
||||
WARN_ON(!pat_enabled);
|
||||
pteval = (pteval & ~_PAGE_PAT) | _PAGE_PWT;
|
||||
}
|
||||
-#endif
|
||||
+
|
||||
if (xen_initial_domain() && (pteval & _PAGE_IOMAP))
|
||||
return pteval;
|
||||
|
||||
@@ -468,7 +468,7 @@ void xen_set_pat(u64 pat)
|
||||
static pte_t xen_make_pte(pteval_t pte)
|
||||
{
|
||||
phys_addr_t addr = (pte & PTE_PFN_MASK);
|
||||
-#if 0
|
||||
+
|
||||
/* If Linux is trying to set a WC pte, then map to the Xen WC.
|
||||
* If _PAGE_PAT is set, then it probably means it is really
|
||||
* _PAGE_PSE, so avoid fiddling with the PAT mapping and hope
|
||||
@@ -481,7 +481,7 @@ static pte_t xen_make_pte(pteval_t pte)
|
||||
if ((pte & (_PAGE_PCD | _PAGE_PWT)) == _PAGE_PWT)
|
||||
pte = (pte & ~(_PAGE_PCD | _PAGE_PWT)) | _PAGE_PAT;
|
||||
}
|
||||
-#endif
|
||||
+
|
||||
/*
|
||||
* Unprivileged domains are allowed to do IOMAPpings for
|
||||
* PCI passthrough, but not map ISA space. The ISA
|
||||
--
|
||||
1.7.4.4
|
||||
|
@ -0,0 +1,196 @@
|
||||
From f37a97dead89d07bce4d8fedc4c295c9bc700ab5 Mon Sep 17 00:00:00 2001
|
||||
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
|
||||
Date: Fri, 4 Nov 2011 11:59:34 -0400
|
||||
Subject: [PATCH 2/2] x86/cpa: Use pte_attrs instead of pte_flags on
|
||||
CPA/set_p.._wb/wc operations.
|
||||
|
||||
When using the paravirt interface, most of the page operations are wrapped
|
||||
in the pvops interface. The one that is not is the pte_flags. The reason
|
||||
being that for most cases, the "raw" PTE flag values for baremetal and whatever
|
||||
pvops platform is running (in this case) - share the same bit meaning.
|
||||
|
||||
Except for PAT. Under Linux, the PAT MSR is written to be:
|
||||
|
||||
PAT4 PAT0
|
||||
+---+----+----+----+-----+----+----+
|
||||
WC | WC | WB | UC | UC- | WC | WB | <= Linux
|
||||
+---+----+----+----+-----+----+----+
|
||||
WC | WT | WB | UC | UC- | WT | WB | <= BIOS
|
||||
+---+----+----+----+-----+----+----+
|
||||
WC | WP | WC | UC | UC- | WT | WB | <= Xen
|
||||
+---+----+----+----+-----+----+----+
|
||||
|
||||
The lookup of this index table translates to looking up
|
||||
Bit 7, Bit 4, and Bit 3 of PTE:
|
||||
|
||||
PAT/PSE (bit 7) ... PCD (bit 4) .. PWT (bit 3).
|
||||
|
||||
If all bits are off, then we are using PAT0. If bit 3 turned on,
|
||||
then we are using PAT1, if bit 3 and bit 4, then PAT2..
|
||||
|
||||
Back to the PAT MSR table:
|
||||
|
||||
As you can see, the PAT1 translates to PAT4 under Xen. Under Linux
|
||||
we only use PAT0, PAT1, and PAT2 for the caching as:
|
||||
|
||||
WB = none (so PAT0)
|
||||
WC = PWT (bit 3 on)
|
||||
UC = PWT | PCD (bit 3 and 4 are on).
|
||||
|
||||
But to make it work with Xen, we end up doing for WC a translation:
|
||||
|
||||
PWT (so bit 3 on) --> PAT (so bit 7 is on) and clear bit 3
|
||||
|
||||
And to translate back (when the paravirt pte_val is used) we would:
|
||||
|
||||
PAT (bit 7 on) --> PWT (bit 3 on) and clear bit 7.
|
||||
|
||||
This works quite well, except if code uses the pte_flags, as pte_flags
|
||||
reads the raw value and does not go through the paravirt. Which means
|
||||
that if (when running under Xen):
|
||||
|
||||
1) we allocate some pages.
|
||||
2) call set_pages_array_wc, which ends up calling:
|
||||
__page_change_att_set_clr(.., __pgprot(__PAGE_WC), /* set */
|
||||
, __pgprot(__PAGE_MASK), /* clear */
|
||||
which ends up reading the _raw_ PTE flags and _only_ look at the
|
||||
_PTE_FLAG_MASK contents with __PAGE_MASK cleared (0x18) and
|
||||
__PAGE_WC (0x8) set.
|
||||
|
||||
read raw *pte -> 0x67
|
||||
*pte = 0x67 & ^0x18 | 0x8
|
||||
*pte = 0x67 & 0xfffffe7 | 0x8
|
||||
*pte = 0x6f
|
||||
|
||||
[now set_pte_atomic is called, and 0x6f is written in, but under
|
||||
xen_make_pte, the bit 3 is translated to bit 7, so it ends up
|
||||
writting 0xa7, which is correct]
|
||||
|
||||
3) do something to them.
|
||||
4) call set_pages_array_wb
|
||||
__page_change_att_set_clr(.., __pgprot(__PAGE_WB), /* set */
|
||||
, __pgprot(__PAGE_MASK), /* clear */
|
||||
which ends up reading the _raw_ PTE and _only_ look at the
|
||||
_PTE_FLAG_MASK contents with _PAGE_MASK cleared (0x18) and
|
||||
__PAGE_WB (0x0) set:
|
||||
|
||||
read raw *pte -> 0xa7
|
||||
*pte = 0xa7 & &0x18 | 0
|
||||
*pte = 0xa7 & 0xfffffe7 | 0
|
||||
*pte = 0xa7
|
||||
|
||||
[we check whether the old PTE is different from the new one
|
||||
|
||||
if (pte_val(old_pte) != pte_val(new_pte)) {
|
||||
set_pte_atomic(kpte, new_pte);
|
||||
...
|
||||
|
||||
and find out that 0xA7 == 0xA7 so we do not write the new PTE value in]
|
||||
|
||||
End result is that we failed at removing the WC caching bit!
|
||||
|
||||
5) free them.
|
||||
[and have pages with PAT4 (bit 7) set, so other subsystems end up using
|
||||
the pages that have the write combined bit set resulting in crashes. Yikes!].
|
||||
|
||||
The fix, which this patch proposes, is to wrap the pte_pgprot in the CPA
|
||||
code with newly introduced pte_attrs which can go through the pvops interface
|
||||
to get the "emulated" value instead of the raw. Naturally if CONFIG_PARAVIRT is
|
||||
not set, it would end calling native_pte_val.
|
||||
|
||||
The other way to fix this is by wrapping pte_flags and go through the pvops
|
||||
interface and it really is the Right Thing to do. The problem is, that past
|
||||
experience with mprotect stuff demonstrates that it be really expensive in inner
|
||||
loops, and pte_flags() is used in some very perf-critical areas.
|
||||
|
||||
Example code to run this and see the various mysterious subsystems/applications
|
||||
crashing
|
||||
|
||||
MODULE_AUTHOR("Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>");
|
||||
MODULE_DESCRIPTION("wb_to_wc_and_back");
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_VERSION(WB_TO_WC);
|
||||
|
||||
static int thread(void *arg)
|
||||
{
|
||||
struct page *a[MAX_PAGES];
|
||||
unsigned int i, j;
|
||||
do {
|
||||
for (j = 0, i = 0;i < MAX_PAGES; i++, j++) {
|
||||
a[i] = alloc_page(GFP_KERNEL);
|
||||
if (!a[i])
|
||||
break;
|
||||
}
|
||||
set_pages_array_wc(a, j);
|
||||
set_current_state(TASK_INTERRUPTIBLE);
|
||||
schedule_timeout_interruptible(HZ);
|
||||
for (i = 0; i < j; i++) {
|
||||
unsigned long *addr = page_address(a[i]);
|
||||
if (addr) {
|
||||
memset(addr, 0xc2, PAGE_SIZE);
|
||||
}
|
||||
}
|
||||
set_pages_array_wb(a, j);
|
||||
for (i = 0; i< MAX_PAGES; i++) {
|
||||
if (a[i])
|
||||
__free_page(a[i]);
|
||||
a[i] = NULL;
|
||||
}
|
||||
} while (!kthread_should_stop());
|
||||
return 0;
|
||||
}
|
||||
static struct task_struct *t;
|
||||
static int __init wb_to_wc_init(void)
|
||||
{
|
||||
t = kthread_run(thread, NULL, "wb_to_wc_and_back");
|
||||
return 0;
|
||||
}
|
||||
static void __exit wb_to_wc_exit(void)
|
||||
{
|
||||
if (t)
|
||||
kthread_stop(t);
|
||||
}
|
||||
module_init(wb_to_wc_init);
|
||||
module_exit(wb_to_wc_exit);
|
||||
|
||||
This fixes RH BZ #742032, #787403, and #745574
|
||||
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
|
||||
Tested-by: Tom Goetz <tom.goetz@virtualcomputer.com>
|
||||
CC: stable@kernel.org
|
||||
---
|
||||
arch/x86/include/asm/pgtable.h | 5 +++++
|
||||
arch/x86/mm/pageattr.c | 2 +-
|
||||
2 files changed, 6 insertions(+), 1 deletions(-)
|
||||
|
||||
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
|
||||
index 49afb3f..fa7bd2c 100644
|
||||
--- a/arch/x86/include/asm/pgtable.h
|
||||
+++ b/arch/x86/include/asm/pgtable.h
|
||||
@@ -349,6 +349,11 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
|
||||
return __pgprot(preservebits | addbits);
|
||||
}
|
||||
|
||||
+static inline pgprot_t pte_attrs(pte_t pte)
|
||||
+{
|
||||
+ return __pgprot(pte_val(pte) & PTE_FLAGS_MASK);
|
||||
+}
|
||||
+
|
||||
#define pte_pgprot(x) __pgprot(pte_flags(x) & PTE_FLAGS_MASK)
|
||||
|
||||
#define canon_pgprot(p) __pgprot(massage_pgprot(p))
|
||||
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
|
||||
index e1ebde3..1ae1b4b 100644
|
||||
--- a/arch/x86/mm/pageattr.c
|
||||
+++ b/arch/x86/mm/pageattr.c
|
||||
@@ -651,7 +651,7 @@ repeat:
|
||||
|
||||
if (level == PG_LEVEL_4K) {
|
||||
pte_t new_pte;
|
||||
- pgprot_t new_prot = pte_pgprot(old_pte);
|
||||
+ pgprot_t new_prot = pte_attrs(old_pte);
|
||||
unsigned long pfn = pte_pfn(old_pte);
|
||||
|
||||
pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr);
|
||||
--
|
||||
1.7.4.4
|
||||
|
@ -11,6 +11,10 @@ patches.xen/pvops-xen-setup-filter-APERFMPERF-cpuid-feature-out.patch
|
||||
patches.xen/pvops-3.4-enable-netfront-in-dom0.patch
|
||||
patches.xen/pvops-netback-calculate-correctly-the-SKB-slots.patch
|
||||
|
||||
# fix for GPU performance (revert workaround and apply proper fix), should go in 3.5
|
||||
patches.xen/pvops-3.4-Revert-xen-pat-Disable-PAT-support-for-now.patch
|
||||
patches.xen/pvops-3.4-x86-cpa-Use-pte_attrs-instead-of-pte_flags-on-CPA-se.patch
|
||||
|
||||
# Additional features
|
||||
patches.xen/pvops-3.4-0100-usb-xen-pvusb-driver.patch
|
||||
patches.xen/pvops-blkfront-removable-flag.patch
|
||||
|
Loading…
Reference in New Issue
Block a user