From: Hannes Reinecke Subject: multipath: Evaluate request result and sense code References: FATE#303695,bnc#433920,bnc#442001 Patch-mainline: not yet Currently we're updating the request result upon completion only for BLK_PC requests. This makes it impossible for the upper layers to reliable detect the real cause for an I/O failure. By attaching the result and the sense to all requests we can update multipathing to make some more elaborate choices on how to handle I/O errors. This also solves a potential data corruption with multipathing and persistent reservations. When queue_if_no_path is active multipath will queue any I/O failure (including those failed with RESERVATION CONFLICT) until the reservation status changes. But by then I/O might have been ongoing on the other paths, thus the delayed submission will severely corrupt your data. Signed-off-by: Hannes Reinecke --- drivers/md/dm-mpath.c | 51 ++++++++++++++++++++++++++++++++++++++++++++++++ drivers/scsi/scsi_lib.c | 28 +++++++++++--------------- 2 files changed, 63 insertions(+), 16 deletions(-) --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #define DM_MSG_PREFIX "multipath" @@ -104,6 +105,7 @@ struct multipath { struct dm_mpath_io { struct pgpath *pgpath; size_t nr_bytes; + char sense[SCSI_SENSE_BUFFERSIZE]; }; typedef int (*action_fn) (struct pgpath *pgpath); @@ -997,6 +999,9 @@ static int multipath_map(struct dm_targe map_context->ptr = mpio; clone->cmd_flags |= REQ_FAILFAST_TRANSPORT; + /* Always attach a sense buffer */ + if (!clone->sense) + clone->sense = mpio->sense; r = map_io(m, clone, mpio, 0); if (r < 0 || r == DM_MAPIO_REQUEUE) mempool_free(mpio, m->mpio_pool); @@ -1295,6 +1300,44 @@ static void activate_path(struct work_st } /* + * Evaluate scsi return code + */ +static int eval_scsi_error(int result, char *sense, int sense_len) +{ + struct scsi_sense_hdr sshdr; + int r = DM_ENDIO_REQUEUE; + + if (host_byte(result) != DID_OK) + return r; + + if (msg_byte(result) != COMMAND_COMPLETE) + return r; + + if (status_byte(result) == RESERVATION_CONFLICT) + /* Do not retry here, possible data corruption */ + return -EIO; + +#if defined(CONFIG_SCSI) || defined(CONFIG_SCSI_MODULE) + if (status_byte(result) == CHECK_CONDITION && + !scsi_normalize_sense(sense, sense_len, &sshdr)) { + + switch (sshdr.sense_key) { + case MEDIUM_ERROR: + case DATA_PROTECT: + case BLANK_CHECK: + case COPY_ABORTED: + case VOLUME_OVERFLOW: + case MISCOMPARE: + r = -EIO; + break; + } + } +#endif + + return r; +} + +/* * end_io handling */ static int do_end_io(struct multipath *m, struct request *clone, @@ -1320,6 +1363,10 @@ static int do_end_io(struct multipath *m if (error == -EOPNOTSUPP) return error; + r = eval_scsi_error(clone->errors, clone->sense, clone->sense_len); + if (r != DM_ENDIO_REQUEUE) + return r; + if (clone->cmd_flags & REQ_DISCARD) /* * Pass all discard request failures up. @@ -1355,6 +1402,10 @@ static int multipath_end_io(struct dm_ta if (ps->type->end_io) ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes); } + if (clone->sense == mpio->sense) { + clone->sense = NULL; + clone->sense_len = 0; + } mempool_free(mpio, m->mpio_pool); return r; --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -722,23 +722,19 @@ void scsi_io_completion(struct scsi_cmnd sense_deferred = scsi_sense_is_deferred(&sshdr); } - if (req->cmd_type == REQ_TYPE_BLOCK_PC) { /* SG_IO ioctl from block level */ - req->errors = result; - if (result) { - if (sense_valid && req->sense) { - /* - * SG_IO wants current and deferred errors - */ - int len = 8 + cmd->sense_buffer[7]; + req->errors = result; + if (sense_valid && req->sense) { + int len = 8 + cmd->sense_buffer[7]; - if (len > SCSI_SENSE_BUFFERSIZE) - len = SCSI_SENSE_BUFFERSIZE; - memcpy(req->sense, cmd->sense_buffer, len); - req->sense_len = len; - } - if (!sense_deferred) - error = -EIO; - } + if (len > SCSI_SENSE_BUFFERSIZE) + len = SCSI_SENSE_BUFFERSIZE; + memcpy(req->sense, cmd->sense_buffer, len); + req->sense_len = len; + } + + if (req->cmd_type == REQ_TYPE_BLOCK_PC) { /* SG_IO ioctl from block level */ + if ((result) && (!sense_deferred)) + error = -EIO; req->resid_len = scsi_get_resid(cmd);