qubes-linux-kernel/patches.suse/dm-mpath-evaluate-request-result-and-sense
2011-04-19 22:09:59 +02:00

159 lines
4.4 KiB
Plaintext

From: Hannes Reinecke <hare@suse.de>
Subject: multipath: Evaluate request result and sense code
References: FATE#303695,bnc#433920,bnc#442001
Patch-mainline: not yet
Currently we're updating the request result upon completion
only for BLK_PC requests. This makes it impossible for the
upper layers to reliable detect the real cause for an
I/O failure. By attaching the result and the sense to all
requests we can update multipathing to make some more elaborate
choices on how to handle I/O errors.
This also solves a potential data corruption with multipathing
and persistent reservations. When queue_if_no_path is active
multipath will queue any I/O failure (including those failed
with RESERVATION CONFLICT) until the reservation status changes.
But by then I/O might have been ongoing on the other paths,
thus the delayed submission will severely corrupt your data.
Signed-off-by: Hannes Reinecke <hare@suse.de>
---
drivers/md/dm-mpath.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++
drivers/scsi/scsi_lib.c | 28 +++++++++++---------------
2 files changed, 63 insertions(+), 16 deletions(-)
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -19,6 +19,7 @@
#include <linux/time.h>
#include <linux/workqueue.h>
#include <scsi/scsi_dh.h>
+#include <scsi/scsi_eh.h>
#include <asm/atomic.h>
#define DM_MSG_PREFIX "multipath"
@@ -104,6 +105,7 @@ struct multipath {
struct dm_mpath_io {
struct pgpath *pgpath;
size_t nr_bytes;
+ char sense[SCSI_SENSE_BUFFERSIZE];
};
typedef int (*action_fn) (struct pgpath *pgpath);
@@ -997,6 +999,9 @@ static int multipath_map(struct dm_targe
map_context->ptr = mpio;
clone->cmd_flags |= REQ_FAILFAST_TRANSPORT;
+ /* Always attach a sense buffer */
+ if (!clone->sense)
+ clone->sense = mpio->sense;
r = map_io(m, clone, mpio, 0);
if (r < 0 || r == DM_MAPIO_REQUEUE)
mempool_free(mpio, m->mpio_pool);
@@ -1295,6 +1300,44 @@ static void activate_path(struct work_st
}
/*
+ * Evaluate scsi return code
+ */
+static int eval_scsi_error(int result, char *sense, int sense_len)
+{
+ struct scsi_sense_hdr sshdr;
+ int r = DM_ENDIO_REQUEUE;
+
+ if (host_byte(result) != DID_OK)
+ return r;
+
+ if (msg_byte(result) != COMMAND_COMPLETE)
+ return r;
+
+ if (status_byte(result) == RESERVATION_CONFLICT)
+ /* Do not retry here, possible data corruption */
+ return -EIO;
+
+#if defined(CONFIG_SCSI) || defined(CONFIG_SCSI_MODULE)
+ if (status_byte(result) == CHECK_CONDITION &&
+ !scsi_normalize_sense(sense, sense_len, &sshdr)) {
+
+ switch (sshdr.sense_key) {
+ case MEDIUM_ERROR:
+ case DATA_PROTECT:
+ case BLANK_CHECK:
+ case COPY_ABORTED:
+ case VOLUME_OVERFLOW:
+ case MISCOMPARE:
+ r = -EIO;
+ break;
+ }
+ }
+#endif
+
+ return r;
+}
+
+/*
* end_io handling
*/
static int do_end_io(struct multipath *m, struct request *clone,
@@ -1320,6 +1363,10 @@ static int do_end_io(struct multipath *m
if (error == -EOPNOTSUPP)
return error;
+ r = eval_scsi_error(clone->errors, clone->sense, clone->sense_len);
+ if (r != DM_ENDIO_REQUEUE)
+ return r;
+
if (clone->cmd_flags & REQ_DISCARD)
/*
* Pass all discard request failures up.
@@ -1355,6 +1402,10 @@ static int multipath_end_io(struct dm_ta
if (ps->type->end_io)
ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes);
}
+ if (clone->sense == mpio->sense) {
+ clone->sense = NULL;
+ clone->sense_len = 0;
+ }
mempool_free(mpio, m->mpio_pool);
return r;
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -722,23 +722,19 @@ void scsi_io_completion(struct scsi_cmnd
sense_deferred = scsi_sense_is_deferred(&sshdr);
}
- if (req->cmd_type == REQ_TYPE_BLOCK_PC) { /* SG_IO ioctl from block level */
- req->errors = result;
- if (result) {
- if (sense_valid && req->sense) {
- /*
- * SG_IO wants current and deferred errors
- */
- int len = 8 + cmd->sense_buffer[7];
+ req->errors = result;
+ if (sense_valid && req->sense) {
+ int len = 8 + cmd->sense_buffer[7];
- if (len > SCSI_SENSE_BUFFERSIZE)
- len = SCSI_SENSE_BUFFERSIZE;
- memcpy(req->sense, cmd->sense_buffer, len);
- req->sense_len = len;
- }
- if (!sense_deferred)
- error = -EIO;
- }
+ if (len > SCSI_SENSE_BUFFERSIZE)
+ len = SCSI_SENSE_BUFFERSIZE;
+ memcpy(req->sense, cmd->sense_buffer, len);
+ req->sense_len = len;
+ }
+
+ if (req->cmd_type == REQ_TYPE_BLOCK_PC) { /* SG_IO ioctl from block level */
+ if ((result) && (!sense_deferred))
+ error = -EIO;
req->resid_len = scsi_get_resid(cmd);