From 3541d3d0126c62cdf10c8f76862d4ae90bbda472 Mon Sep 17 00:00:00 2001 From: Marek Marczykowski Date: Thu, 14 Jun 2012 00:26:45 +0200 Subject: [PATCH] pvops: add a couple of fixes from 3.5-rc kernels Especially for block backend. --- ...py-id-field-when-doing-BLKIF_DISCARD.patch | 54 +++++++ ...-WARN-to-deal-with-misbehaving-backe.patch | 141 ++++++++++++++++++ ...ont-module-exit-handling-adjustments.patch | 33 ++++ ...-filter-APERFMPERF-cpuid-feature-out.patch | 56 +++++++ series-pvops.conf | 9 ++ 5 files changed, 293 insertions(+) create mode 100644 patches.xen/pvops-xen-blkback-Copy-id-field-when-doing-BLKIF_DISCARD.patch create mode 100644 patches.xen/pvops-xen-blkfront-Add-WARN-to-deal-with-misbehaving-backe.patch create mode 100644 patches.xen/pvops-xen-blkfront-module-exit-handling-adjustments.patch create mode 100644 patches.xen/pvops-xen-setup-filter-APERFMPERF-cpuid-feature-out.patch diff --git a/patches.xen/pvops-xen-blkback-Copy-id-field-when-doing-BLKIF_DISCARD.patch b/patches.xen/pvops-xen-blkback-Copy-id-field-when-doing-BLKIF_DISCARD.patch new file mode 100644 index 0000000..edf6398 --- /dev/null +++ b/patches.xen/pvops-xen-blkback-Copy-id-field-when-doing-BLKIF_DISCARD.patch @@ -0,0 +1,54 @@ +From 8c9ce606a60e4a0cb447bdc082ce383b96b227b4 Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Fri, 25 May 2012 16:11:09 -0400 +Subject: [PATCH 2/3] xen/blkback: Copy id field when doing BLKIF_DISCARD. + +We weren't copying the id field so when we sent the response +back to the frontend (especially with a 64-bit host and 32-bit +guest), we ended up using a random value. This lead to the +frontend crashing as it would try to pass to __blk_end_request_all +a NULL 'struct request' (b/c it would use the 'id' to find the +proper 'struct request' in its shadow array) and end up crashing: + +BUG: unable to handle kernel NULL pointer dereference at 000000e4 +IP: [] __blk_end_request_all+0xc/0x40 +.. snip.. +EIP is at __blk_end_request_all+0xc/0x40 +.. snip.. + [] blkif_interrupt+0x172/0x330 [xen_blkfront] + +This fixes the bug by passing in the proper id for the response. + +Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=824641 + +CC: stable@kernel.org +Tested-by: William Dauchy +Acked-by: Stefano Stabellini +Signed-off-by: Konrad Rzeszutek Wilk +--- + drivers/block/xen-blkback/common.h | 2 ++ + 1 files changed, 2 insertions(+), 0 deletions(-) + +diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h +index 773cf27..9ad3b5e 100644 +--- a/drivers/block/xen-blkback/common.h ++++ b/drivers/block/xen-blkback/common.h +@@ -257,6 +257,7 @@ static inline void blkif_get_x86_32_req(struct blkif_request *dst, + break; + case BLKIF_OP_DISCARD: + dst->u.discard.flag = src->u.discard.flag; ++ dst->u.discard.id = src->u.discard.id; + dst->u.discard.sector_number = src->u.discard.sector_number; + dst->u.discard.nr_sectors = src->u.discard.nr_sectors; + break; +@@ -287,6 +288,7 @@ static inline void blkif_get_x86_64_req(struct blkif_request *dst, + break; + case BLKIF_OP_DISCARD: + dst->u.discard.flag = src->u.discard.flag; ++ dst->u.discard.id = src->u.discard.id; + dst->u.discard.sector_number = src->u.discard.sector_number; + dst->u.discard.nr_sectors = src->u.discard.nr_sectors; + break; +-- +1.7.4.4 + diff --git a/patches.xen/pvops-xen-blkfront-Add-WARN-to-deal-with-misbehaving-backe.patch b/patches.xen/pvops-xen-blkfront-Add-WARN-to-deal-with-misbehaving-backe.patch new file mode 100644 index 0000000..96968dd --- /dev/null +++ b/patches.xen/pvops-xen-blkfront-Add-WARN-to-deal-with-misbehaving-backe.patch @@ -0,0 +1,141 @@ +From 6878c32e5cc0e40980abe51d1f02fb453e27493e Mon Sep 17 00:00:00 2001 +From: Konrad Rzeszutek Wilk +Date: Fri, 25 May 2012 17:34:51 -0400 +Subject: [PATCH 3/3] xen/blkfront: Add WARN to deal with misbehaving + backends. + +Part of the ring structure is the 'id' field which is under +control of the frontend. The frontend stamps it with "some" +value (this some in this implementation being a value less +than BLK_RING_SIZE), and when it gets a response expects +said value to be in the response structure. We have a check +for the id field when spolling new requests but not when +de-spolling responses. + +We also add an extra check in add_id_to_freelist to make +sure that the 'struct request' was not NULL - as we cannot +pass a NULL to __blk_end_request_all, otherwise that crashes +(and all the operations that the response is dealing with +end up with __blk_end_request_all). + +Lastly we also print the name of the operation that failed. + +[v1: s/BUG/WARN/ suggested by Stefano] +[v2: Add extra check in add_id_to_freelist] +[v3: Redid op_name per Jan's suggestion] +[v4: add const * and add WARN on failure returns] +Acked-by: Jan Beulich +Acked-by: Stefano Stabellini +Signed-off-by: Konrad Rzeszutek Wilk +--- + drivers/block/xen-blkfront.c | 58 +++++++++++++++++++++++++++++++++-------- + 1 files changed, 46 insertions(+), 12 deletions(-) + +diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c +index 60eed4b..e4fb337 100644 +--- a/drivers/block/xen-blkfront.c ++++ b/drivers/block/xen-blkfront.c +@@ -141,14 +141,36 @@ static int get_id_from_freelist(struct blkfront_info *info) + return free; + } + +-static void add_id_to_freelist(struct blkfront_info *info, ++static int add_id_to_freelist(struct blkfront_info *info, + unsigned long id) + { ++ if (info->shadow[id].req.u.rw.id != id) ++ return -EINVAL; ++ if (info->shadow[id].request == NULL) ++ return -EINVAL; + info->shadow[id].req.u.rw.id = info->shadow_free; + info->shadow[id].request = NULL; + info->shadow_free = id; ++ return 0; + } + ++static const char *op_name(int op) ++{ ++ static const char *const names[] = { ++ [BLKIF_OP_READ] = "read", ++ [BLKIF_OP_WRITE] = "write", ++ [BLKIF_OP_WRITE_BARRIER] = "barrier", ++ [BLKIF_OP_FLUSH_DISKCACHE] = "flush", ++ [BLKIF_OP_DISCARD] = "discard" }; ++ ++ if (op < 0 || op >= ARRAY_SIZE(names)) ++ return "unknown"; ++ ++ if (!names[op]) ++ return "reserved"; ++ ++ return names[op]; ++} + static int xlbd_reserve_minors(unsigned int minor, unsigned int nr) + { + unsigned int end = minor + nr; +@@ -746,20 +768,36 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) + + bret = RING_GET_RESPONSE(&info->ring, i); + id = bret->id; ++ /* ++ * The backend has messed up and given us an id that we would ++ * never have given to it (we stamp it up to BLK_RING_SIZE - ++ * look in get_id_from_freelist. ++ */ ++ if (id >= BLK_RING_SIZE) { ++ WARN(1, "%s: response to %s has incorrect id (%ld)\n", ++ info->gd->disk_name, op_name(bret->operation), id); ++ /* We can't safely get the 'struct request' as ++ * the id is busted. */ ++ continue; ++ } + req = info->shadow[id].request; + + if (bret->operation != BLKIF_OP_DISCARD) + blkif_completion(&info->shadow[id]); + +- add_id_to_freelist(info, id); ++ if (add_id_to_freelist(info, id)) { ++ WARN(1, "%s: response to %s (id %ld) couldn't be recycled!\n", ++ info->gd->disk_name, op_name(bret->operation), id); ++ continue; ++ } + + error = (bret->status == BLKIF_RSP_OKAY) ? 0 : -EIO; + switch (bret->operation) { + case BLKIF_OP_DISCARD: + if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { + struct request_queue *rq = info->rq; +- printk(KERN_WARNING "blkfront: %s: discard op failed\n", +- info->gd->disk_name); ++ printk(KERN_WARNING "blkfront: %s: %s op failed\n", ++ info->gd->disk_name, op_name(bret->operation)); + error = -EOPNOTSUPP; + info->feature_discard = 0; + info->feature_secdiscard = 0; +@@ -771,18 +809,14 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id) + case BLKIF_OP_FLUSH_DISKCACHE: + case BLKIF_OP_WRITE_BARRIER: + if (unlikely(bret->status == BLKIF_RSP_EOPNOTSUPP)) { +- printk(KERN_WARNING "blkfront: %s: write %s op failed\n", +- info->flush_op == BLKIF_OP_WRITE_BARRIER ? +- "barrier" : "flush disk cache", +- info->gd->disk_name); ++ printk(KERN_WARNING "blkfront: %s: %s op failed\n", ++ info->gd->disk_name, op_name(bret->operation)); + error = -EOPNOTSUPP; + } + if (unlikely(bret->status == BLKIF_RSP_ERROR && + info->shadow[id].req.u.rw.nr_segments == 0)) { +- printk(KERN_WARNING "blkfront: %s: empty write %s op failed\n", +- info->flush_op == BLKIF_OP_WRITE_BARRIER ? +- "barrier" : "flush disk cache", +- info->gd->disk_name); ++ printk(KERN_WARNING "blkfront: %s: empty %s op failed\n", ++ info->gd->disk_name, op_name(bret->operation)); + error = -EOPNOTSUPP; + } + if (unlikely(error)) { +-- +1.7.4.4 + diff --git a/patches.xen/pvops-xen-blkfront-module-exit-handling-adjustments.patch b/patches.xen/pvops-xen-blkfront-module-exit-handling-adjustments.patch new file mode 100644 index 0000000..c3479ff --- /dev/null +++ b/patches.xen/pvops-xen-blkfront-module-exit-handling-adjustments.patch @@ -0,0 +1,33 @@ +From 8605067fb9b8e34aecf44ec258657c9cc009fc5a Mon Sep 17 00:00:00 2001 +From: Jan Beulich +Date: Thu, 5 Apr 2012 16:04:52 +0100 +Subject: [PATCH 1/3] xen-blkfront: module exit handling adjustments + +The blkdev major must be released upon exit, or else the module can't +attach to devices using the same majors upon being loaded again. Also +avoid leaking the minor tracking bitmap. + +Signed-off-by: Jan Beulich +Signed-off-by: Konrad Rzeszutek Wilk +--- + drivers/block/xen-blkfront.c | 4 +++- + 1 files changed, 3 insertions(+), 1 deletions(-) + +diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c +index 4f2b460..60eed4b 100644 +--- a/drivers/block/xen-blkfront.c ++++ b/drivers/block/xen-blkfront.c +@@ -1500,7 +1500,9 @@ module_init(xlblk_init); + + static void __exit xlblk_exit(void) + { +- return xenbus_unregister_driver(&blkfront_driver); ++ xenbus_unregister_driver(&blkfront_driver); ++ unregister_blkdev(XENVBD_MAJOR, DEV_NAME); ++ kfree(minors); + } + module_exit(xlblk_exit); + +-- +1.7.4.4 + diff --git a/patches.xen/pvops-xen-setup-filter-APERFMPERF-cpuid-feature-out.patch b/patches.xen/pvops-xen-setup-filter-APERFMPERF-cpuid-feature-out.patch new file mode 100644 index 0000000..106ff6b --- /dev/null +++ b/patches.xen/pvops-xen-setup-filter-APERFMPERF-cpuid-feature-out.patch @@ -0,0 +1,56 @@ +From 5e626254206a709c6e937f3dda69bf26c7344f6f Mon Sep 17 00:00:00 2001 +From: Andre Przywara +Date: Tue, 29 May 2012 13:07:31 +0200 +Subject: [PATCH] xen/setup: filter APERFMPERF cpuid feature out + +Xen PV kernels allow access to the APERF/MPERF registers to read the +effective frequency. Access to the MSRs is however redirected to the +currently scheduled physical CPU, making consecutive read and +compares unreliable. In addition each rdmsr traps into the hypervisor. +So to avoid bogus readouts and expensive traps, disable the kernel +internal feature flag for APERF/MPERF if running under Xen. +This will +a) remove the aperfmperf flag from /proc/cpuinfo +b) not mislead the power scheduler (arch/x86/kernel/cpu/sched.c) to + use the feature to improve scheduling (by default disabled) +c) not mislead the cpufreq driver to use the MSRs + +This does not cover userland programs which access the MSRs via the +device file interface, but this will be addressed separately. + +Signed-off-by: Andre Przywara +Cc: stable@vger.kernel.org # v3.0+ +Signed-off-by: Konrad Rzeszutek Wilk +--- + arch/x86/xen/enlighten.c | 8 ++++++++ + 1 files changed, 8 insertions(+), 0 deletions(-) + +diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c +index d1f9a04..272ebd0 100644 +--- a/arch/x86/xen/enlighten.c ++++ b/arch/x86/xen/enlighten.c +@@ -208,6 +208,9 @@ static void __init xen_banner(void) + xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); + } + ++#define CPUID_THERM_POWER_LEAF 6 ++#define APERFMPERF_PRESENT 0 ++ + static __read_mostly unsigned int cpuid_leaf1_edx_mask = ~0; + static __read_mostly unsigned int cpuid_leaf1_ecx_mask = ~0; + +@@ -241,6 +244,11 @@ static void xen_cpuid(unsigned int *ax, unsigned int *bx, + *dx = cpuid_leaf5_edx_val; + return; + ++ case CPUID_THERM_POWER_LEAF: ++ /* Disabling APERFMPERF for kernel usage */ ++ maskecx = ~(1 << APERFMPERF_PRESENT); ++ break; ++ + case 0xb: + /* Suppress extended topology stuff */ + maskebx = 0; +-- +1.7.4.4 + diff --git a/series-pvops.conf b/series-pvops.conf index 6c8b00f..ecaa364 100644 --- a/series-pvops.conf +++ b/series-pvops.conf @@ -1,7 +1,16 @@ +# ACPI S3 patches.xen/pvops-3.4-0001-x86-acpi-sleep-Provide-registration-for-acpi_suspend.patch patches.xen/pvops-3.4-0002-xen-acpi-sleep-Enable-ACPI-sleep-via-the-__acpi_os_p.patch patches.xen/pvops-3.4-0003-xen-acpi-sleep-Register-to-the-acpi_suspend_lowlevel.patch + +# Fixes which should will go in 3.5 +patches.xen/pvops-xen-blkfront-module-exit-handling-adjustments.patch +patches.xen/pvops-xen-blkback-Copy-id-field-when-doing-BLKIF_DISCARD.patch +patches.xen/pvops-xen-blkfront-Add-WARN-to-deal-with-misbehaving-backe.patch +patches.xen/pvops-xen-setup-filter-APERFMPERF-cpuid-feature-out.patch patches.xen/pvops-3.4-enable-netfront-in-dom0.patch patches.xen/pvops-netback-calculate-correctly-the-SKB-slots.patch + +# Additional features patches.xen/pvops-3.4-0100-usb-xen-pvusb-driver.patch patches.xen/pvops-blkfront-removable-flag.patch