pvops: add a couple of fixes from 3.5-rc kernels

Especially for block backend.
devel-3.7
Marek Marczykowski 12 years ago
parent 5751db78f9
commit 3541d3d012

@ -0,0 +1,54 @@
From 8c9ce606a60e4a0cb447bdc082ce383b96b227b4 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Fri, 25 May 2012 16:11:09 -0400
Subject: [PATCH 2/3] xen/blkback: Copy id field when doing BLKIF_DISCARD.
We weren't copying the id field so when we sent the response
back to the frontend (especially with a 64-bit host and 32-bit
guest), we ended up using a random value. This lead to the
frontend crashing as it would try to pass to __blk_end_request_all
a NULL 'struct request' (b/c it would use the 'id' to find the
proper 'struct request' in its shadow array) and end up crashing:
BUG: unable to handle kernel NULL pointer dereference at 000000e4
IP: [<c0646d4c>] __blk_end_request_all+0xc/0x40
.. snip..
EIP is at __blk_end_request_all+0xc/0x40
.. snip..
[<ed95db72>] blkif_interrupt+0x172/0x330 [xen_blkfront]
This fixes the bug by passing in the proper id for the response.
Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=824641
CC: stable@kernel.org
Tested-by: William Dauchy <wdauchy@gmail.com>
Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
drivers/block/xen-blkback/common.h | 2 ++
1 files changed, 2 insertions(+), 0 deletions(-)
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index 773cf27..9ad3b5e 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -257,6 +257,7 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst,
break;
case BLKIF_OP_DISCARD:
dst->u.discard.flag = src->u.discard.flag;
+ dst->u.discard.id = src->u.discard.id;
dst->u.discard.sector_number = src->u.discard.sector_number;
dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
break;
@@ -287,6 +288,7 @@ static inline void blkif_get_x86_64_req(struct blkif_request *dst,
break;
case BLKIF_OP_DISCARD:
dst->u.discard.flag = src->u.discard.flag;
+ dst->u.discard.id = src->u.discard.id;
dst->u.discard.sector_number = src->u.discard.sector_number;
dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
break;
--
1.7.4.4

@ -0,0 +1,141 @@
From 6878c32e5cc0e40980abe51d1f02fb453e27493e Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Fri, 25 May 2012 17:34:51 -0400
Subject: [PATCH 3/3] xen/blkfront: Add WARN to deal with misbehaving
backends.
Part of the ring structure is the 'id' field which is under
control of the frontend. The frontend stamps it with "some"
value (this some in this implementation being a value less
than BLK_RING_SIZE), and when it gets a response expects
said value to be in the response structure. We have a check
for the id field when spolling new requests but not when
de-spolling responses.
We also add an extra check in add_id_to_freelist to make
sure that the 'struct request' was not NULL - as we cannot
pass a NULL to __blk_end_request_all, otherwise that crashes
(and all the operations that the response is dealing with
end up with __blk_end_request_all).
Lastly we also print the name of the operation that failed.
[v1: s/BUG/WARN/ suggested by Stefano]
[v2: Add extra check in add_id_to_freelist]
[v3: Redid op_name per Jan's suggestion]
[v4: add const * and add WARN on failure returns]
Acked-by: Jan Beulich <jbeulich@suse.com>
Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
drivers/block/xen-blkfront.c | 58 +++++++++++++++++++++++++++++++++--------
1 files changed, 46 insertions(+), 12 deletions(-)
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 60eed4b..e4fb337 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -141,14 +141,36 @@ static int get_id_from_freelist(struct blkfront_info *info)
return free;
}
-static void add_id_to_freelist(struct blkfront_info *info,
+static int add_id_to_freelist(struct blkfront_info *info,
unsigned long id)
{
+ if (info->shadow[id].req.u.rw.id != id)
+ return -EINVAL;
+ if (info->shadow[id].request == NULL)
+ return -EINVAL;
info->shadow[id].req.u.rw.id = info->shadow_free;
info->shadow[id].request = NULL;
info->shadow_free = id;
+ return 0;
}
+static const char *op_name(int op)
+{
+ static const char *const names[] = {
+ [BLKIF_OP_READ] = "read",
+ [BLKIF_OP_WRITE] = "write",
+ [BLKIF_OP_WRITE_BARRIER] = "barrier",
+ [BLKIF_OP_FLUSH_DISKCACHE] = "flush",
+ [BLKIF_OP_DISCARD] = "discard" };
+
+ if (op < 0 || op >= ARRAY_SIZE(names))
+ return "unknown";
+
+ if (!names[op])
+ return "reserved";
+
+ return names[op];
+}
static int xlbd_reserve_minors(unsigned int minor, unsigned int nr)
{
unsigned int end = minor + nr;
@@ -746,20 +768,36 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
bret = RING_GET_RESPONSE(&info->ring, i);
id = bret->id;
+ /*
+ * The backend has messed up and given us an id that we would
+ * never have given to it (we stamp it up to BLK_RING_SIZE -
+ * look in get_id_from_freelist.
+ */
+ if (id >= BLK_RING_SIZE) {
+ WARN(1, "%s: response to %s has incorrect id (%ld)\n",
+ info->gd->disk_name, op_name(bret->operation), id);
+ /* We can't safely get the 'struct request' as
+ * the id is busted. */
+ continue;
+ }
req = info->shadow[id].request;
if (bret->operation != BLKIF_OP_DISCARD)
blkif_completion(&info->shadow[id]);
- add_id_to_freelist(info, id);
+ if (add_id_to_freelist(info, id)) {
+ WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n",
+ info->gd->disk_name, op_name(bret->operation), id);
+ continue;
+ }
error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO;
switch (bret->operation) {
case BLKIF_OP_DISCARD:
if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
struct request_queue *rq = info->rq;
- printk(KERN_WARNING "blkfront: %s: discard op failed\n",
- info->gd->disk_name);
+ printk(KERN_WARNING "blkfront: %s: %s op failed\n",
+ info->gd->disk_name, op_name(bret->operation));
error = -EOPNOTSUPP;
info->feature_discard = 0;
info->feature_secdiscard = 0;
@@ -771,18 +809,14 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
case BLKIF_OP_FLUSH_DISKCACHE:
case BLKIF_OP_WRITE_BARRIER:
if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) {
- printk(KERN_WARNING "blkfront: %s: write %s op failed\n",
- info->flush_op == BLKIF_OP_WRITE_BARRIER ?
- "barrier" : "flush disk cache",
- info->gd->disk_name);
+ printk(KERN_WARNING "blkfront: %s: %s op failed\n",
+ info->gd->disk_name, op_name(bret->operation));
error = -EOPNOTSUPP;
}
if (unlikely(bret->status == BLKIF_RSP_ERROR &&
info->shadow[id].req.u.rw.nr_segments == 0)) {
- printk(KERN_WARNING "blkfront: %s: empty write %s op failed\n",
- info->flush_op == BLKIF_OP_WRITE_BARRIER ?
- "barrier" : "flush disk cache",
- info->gd->disk_name);
+ printk(KERN_WARNING "blkfront: %s: empty %s op failed\n",
+ info->gd->disk_name, op_name(bret->operation));
error = -EOPNOTSUPP;
}
if (unlikely(error)) {
--
1.7.4.4

@ -0,0 +1,33 @@
From 8605067fb9b8e34aecf44ec258657c9cc009fc5a Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@suse.com>
Date: Thu, 5 Apr 2012 16:04:52 +0100
Subject: [PATCH 1/3] xen-blkfront: module exit handling adjustments
The blkdev major must be released upon exit, or else the module can't
attach to devices using the same majors upon being loaded again. Also
avoid leaking the minor tracking bitmap.
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
drivers/block/xen-blkfront.c | 4 +++-
1 files changed, 3 insertions(+), 1 deletions(-)
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 4f2b460..60eed4b 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -1500,7 +1500,9 @@ module_init(xlblk_init);
static void __exit xlblk_exit(void)
{
- return xenbus_unregister_driver(&blkfront_driver);
+ xenbus_unregister_driver(&blkfront_driver);
+ unregister_blkdev(XENVBD_MAJOR, DEV_NAME);
+ kfree(minors);
}
module_exit(xlblk_exit);
--
1.7.4.4

@ -0,0 +1,56 @@
From 5e626254206a709c6e937f3dda69bf26c7344f6f Mon Sep 17 00:00:00 2001
From: Andre Przywara <andre.przywara@amd.com>
Date: Tue, 29 May 2012 13:07:31 +0200
Subject: [PATCH] xen/setup: filter APERFMPERF cpuid feature out
Xen PV kernels allow access to the APERF/MPERF registers to read the
effective frequency. Access to the MSRs is however redirected to the
currently scheduled physical CPU, making consecutive read and
compares unreliable. In addition each rdmsr traps into the hypervisor.
So to avoid bogus readouts and expensive traps, disable the kernel
internal feature flag for APERF/MPERF if running under Xen.
This will
a) remove the aperfmperf flag from /proc/cpuinfo
b) not mislead the power scheduler (arch/x86/kernel/cpu/sched.c) to
use the feature to improve scheduling (by default disabled)
c) not mislead the cpufreq driver to use the MSRs
This does not cover userland programs which access the MSRs via the
device file interface, but this will be addressed separately.
Signed-off-by: Andre Przywara <andre.przywara@amd.com>
Cc: stable@vger.kernel.org # v3.0+
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
arch/x86/xen/enlighten.c | 8 ++++++++
1 files changed, 8 insertions(+), 0 deletions(-)
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index d1f9a04..272ebd0 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -208,6 +208,9 @@ static void __init xen_banner(void)
xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : "");
}
+#define CPUID_THERM_POWER_LEAF 6
+#define APERFMPERF_PRESENT 0
+
static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0;
static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0;
@@ -241,6 +244,11 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx,
*dx = cpuid_leaf5_edx_val;
return;
+ case CPUID_THERM_POWER_LEAF:
+ /* Disabling APERFMPERF for kernel usage */
+ maskecx = ~(1 << APERFMPERF_PRESENT);
+ break;
+
case 0xb:
/* Suppress extended topology stuff */
maskebx = 0;
--
1.7.4.4

@ -1,7 +1,16 @@
# ACPI S3
patches.xen/pvops-3.4-0001-x86-acpi-sleep-Provide-registration-for-acpi_suspend.patch
patches.xen/pvops-3.4-0002-xen-acpi-sleep-Enable-ACPI-sleep-via-the-__acpi_os_p.patch
patches.xen/pvops-3.4-0003-xen-acpi-sleep-Register-to-the-acpi_suspend_lowlevel.patch
# Fixes which should will go in 3.5
patches.xen/pvops-xen-blkfront-module-exit-handling-adjustments.patch
patches.xen/pvops-xen-blkback-Copy-id-field-when-doing-BLKIF_DISCARD.patch
patches.xen/pvops-xen-blkfront-Add-WARN-to-deal-with-misbehaving-backe.patch
patches.xen/pvops-xen-setup-filter-APERFMPERF-cpuid-feature-out.patch
patches.xen/pvops-3.4-enable-netfront-in-dom0.patch
patches.xen/pvops-netback-calculate-correctly-the-SKB-slots.patch
# Additional features
patches.xen/pvops-3.4-0100-usb-xen-pvusb-driver.patch
patches.xen/pvops-blkfront-removable-flag.patch

Loading…
Cancel
Save