1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright 2024 RackTop Systems, Inc.
25  */
26 
27 /*
28  * Implementation of "scsi_vhci_f_tpgs" T10 standard based failover_ops.
29  *
30  * NOTE: for non-sequential devices only.
31  */
32 
33 #include <sys/conf.h>
34 #include <sys/file.h>
35 #include <sys/ddi.h>
36 #include <sys/sunddi.h>
37 #include <sys/scsi/scsi.h>
38 #include <sys/scsi/adapters/scsi_vhci.h>
39 #include <sys/scsi/adapters/scsi_vhci_tpgs.h>
40 
41 /* Supported device table entries.  */
42 char	*std_dev_table[] = { NULL };
43 
44 /* Failover module plumbing. */
45 SCSI_FAILOVER_OP(SFO_NAME_TPGS, std);
46 
47 #define	STD_FO_CMD_RETRY_DELAY	1000000 /* 1 seconds */
48 #define	STD_FO_RETRY_DELAY	2000000 /* 2 seconds */
49 /*
50  * max time for failover to complete is 3 minutes.  Compute
51  * number of retries accordingly, to ensure we wait for at least
52  * 3 minutes
53  */
54 #define	STD_FO_MAX_RETRIES	(3*60*1000000)/STD_FO_RETRY_DELAY
55 
56 
57 /* ARGSUSED */
58 static int
std_device_probe(struct scsi_device * sd,struct scsi_inquiry * inq,void ** ctpriv)59 std_device_probe(struct scsi_device *sd, struct scsi_inquiry *inq,
60     void **ctpriv)
61 {
62 	int		mode, state, xlf, preferred = 0;
63 
64 	VHCI_DEBUG(6, (CE_NOTE, NULL, "std_device_probe: vidpid %s\n",
65 	    inq->inq_vid));
66 
67 	if (inq->inq_tpgs == TPGS_FAILOVER_NONE) {
68 		VHCI_DEBUG(4, (CE_WARN, NULL,
69 		    "!std_device_probe: not a standard tpgs device"));
70 		return (SFO_DEVICE_PROBE_PHCI);
71 	}
72 
73 	if (inq->inq_dtype == DTYPE_SEQUENTIAL) {
74 		VHCI_DEBUG(4, (CE_NOTE, NULL,
75 		    "!std_device_probe: Detected a "
76 		    "Standard Asymmetric device "
77 		    "not yet supported\n"));
78 		return (SFO_DEVICE_PROBE_PHCI);
79 	}
80 
81 	if (vhci_tpgs_get_target_fo_mode(sd, &mode, &state, &xlf, &preferred)) {
82 		VHCI_DEBUG(4, (CE_WARN, NULL, "!unable to fetch fo "
83 		    "mode: sd(%p)", (void *) sd));
84 		return (SFO_DEVICE_PROBE_PHCI);
85 	}
86 
87 	if (inq->inq_tpgs == TPGS_FAILOVER_IMPLICIT) {
88 		VHCI_DEBUG(1, (CE_NOTE, NULL,
89 		    "!std_device_probe: Detected a "
90 		    "Standard Asymmetric device "
91 		    "with implicit failover\n"));
92 		return (SFO_DEVICE_PROBE_VHCI);
93 	}
94 	if (inq->inq_tpgs == TPGS_FAILOVER_EXPLICIT) {
95 		VHCI_DEBUG(1, (CE_NOTE, NULL,
96 		    "!std_device_probe: Detected a "
97 		    "Standard Asymmetric device "
98 		    "with explicit failover\n"));
99 		return (SFO_DEVICE_PROBE_VHCI);
100 	}
101 	if (inq->inq_tpgs == TPGS_FAILOVER_BOTH) {
102 		VHCI_DEBUG(1, (CE_NOTE, NULL,
103 		    "!std_device_probe: Detected a "
104 		    "Standard Asymmetric device "
105 		    "which supports both implicit and explicit failover\n"));
106 		return (SFO_DEVICE_PROBE_VHCI);
107 	}
108 	VHCI_DEBUG(1, (CE_WARN, NULL,
109 	    "!std_device_probe: "
110 	    "Unknown tpgs_bits: %x", inq->inq_tpgs));
111 	return (SFO_DEVICE_PROBE_PHCI);
112 }
113 
114 /* ARGSUSED */
115 static void
std_device_unprobe(struct scsi_device * sd,void * ctpriv)116 std_device_unprobe(struct scsi_device *sd, void *ctpriv)
117 {
118 	/*
119 	 * For future use
120 	 */
121 }
122 
123 /* ARGSUSED */
124 static int
std_activate_explicit(struct scsi_device * sd,int xlf_capable)125 std_activate_explicit(struct scsi_device *sd, int xlf_capable)
126 {
127 	cmn_err(CE_NOTE, "Explicit Activation is done by "
128 	    "vhci_tpgs_set_target_groups() call from MPAPI");
129 	return (1);
130 }
131 
132 /*
133  * Process the packet reason of CMD_PKT_CMPLT - return 0 if no
134  * retry and 1 if a retry should be done
135  */
136 static int
std_process_cmplt_pkt(struct scsi_device * sd,struct scsi_pkt * pkt,int * retry_cnt,int * retval)137 std_process_cmplt_pkt(struct scsi_device *sd, struct scsi_pkt *pkt,
138     int *retry_cnt, int *retval)
139 {
140 	*retval = 1; /* fail */
141 
142 	switch (SCBP_C(pkt)) {
143 		case STATUS_GOOD:
144 			*retval = 0;
145 			break;
146 		case STATUS_CHECK:
147 			if (pkt->pkt_state & STATE_ARQ_DONE) {
148 				uint8_t *sns, skey, asc, ascq;
149 				sns = (uint8_t *)
150 				    &(((struct scsi_arq_status *)(uintptr_t)
151 				    (pkt->pkt_scbp))->sts_sensedata);
152 				skey = scsi_sense_key(sns);
153 				asc = scsi_sense_asc(sns);
154 				ascq = scsi_sense_ascq(sns);
155 				if (skey == KEY_UNIT_ATTENTION) {
156 					/*
157 					 * tpgs access state changed
158 					 */
159 					if (asc == STD_SCSI_ASC_STATE_CHG &&
160 					    ascq ==
161 					    STD_SCSI_ASCQ_STATE_CHG_SUCC) {
162 						/* XXX: update path info? */
163 						cmn_err(CE_WARN,
164 						    "!Device failover"
165 						    " state change");
166 					}
167 					return (1);
168 				} else if (skey == KEY_NOT_READY) {
169 					if (asc ==
170 					    STD_LOGICAL_UNIT_NOT_ACCESSIBLE &&
171 					    ascq == STD_TGT_PORT_STANDBY) {
172 						/*
173 						 * Don't retry on the path
174 						 * which is indicated as
175 						 * standby, return failure.
176 						 */
177 						return (0);
178 					} else if ((*retry_cnt)++ >=
179 					    STD_FO_MAX_RETRIES) {
180 						cmn_err(CE_WARN,
181 						    "!Device failover failed: "
182 						    "timed out waiting for "
183 						    "path to become active");
184 						return (0);
185 					}
186 					VHCI_DEBUG(6, (CE_NOTE, NULL,
187 					    "!(sd:%p)lun becoming active...\n",
188 					    (void *)sd));
189 					drv_usecwait(STD_FO_RETRY_DELAY);
190 					return (1);
191 				}
192 				cmn_err(CE_NOTE, "!Failover failed;"
193 				    " sense key:%x, ASC: %x, "
194 				    "ASCQ:%x", skey, asc, ascq);
195 				return (0);
196 			}
197 			VHCI_DEBUG(4, (CE_WARN, NULL,
198 			    "!(sd:%p):"
199 			    " status returned CHECK during std"
200 			    " path activation", (void *)sd));
201 			return (0);
202 		case STATUS_QFULL:
203 			VHCI_DEBUG(6, (CE_NOTE, NULL, "QFULL "
204 			    "status returned QFULL during std "
205 			    "path activation for %p\n", (void *)sd));
206 			drv_usecwait(5000);
207 			return (1);
208 		case STATUS_BUSY:
209 			VHCI_DEBUG(6, (CE_NOTE, NULL, "BUSY "
210 			    "status returned BUSY during std "
211 			    "path activation for %p\n", (void *)sd));
212 			drv_usecwait(5000);
213 			return (1);
214 		default:
215 			VHCI_DEBUG(4, (CE_WARN, NULL,
216 			    "!(sd:%p) Bad status returned during std "
217 			    "activation (pkt %p, status %x)",
218 			    (void *)sd, (void *)pkt, SCBP_C(pkt)));
219 			return (0);
220 	}
221 	return (0);
222 }
223 
224 /*
225  * For now we are going to use primary/online and secondary/online.
226  * There is no standby path returned by the dsp and we may have
227  * to do something different for other devices that use standby
228  */
229 /* ARGSUSED */
230 static int
std_path_activate(struct scsi_device * sd,char * pathclass,void * ctpriv)231 std_path_activate(struct scsi_device *sd, char *pathclass,
232     void *ctpriv)
233 {
234 	struct buf			*bp;
235 	struct scsi_pkt			*pkt;
236 	struct scsi_address		*ap;
237 	int				err, retry_cnt, retry_cmd_cnt;
238 	int				mode, state, retval, xlf, preferred;
239 	size_t				blksize;
240 
241 	ap = &sd->sd_address;
242 
243 	mode = state = 0;
244 
245 	blksize = vhci_get_blocksize(sd->sd_dev);
246 
247 	if (vhci_tpgs_get_target_fo_mode(sd, &mode, &state, &xlf, &preferred)) {
248 		VHCI_DEBUG(1, (CE_NOTE, NULL, "!std_path_activate:"
249 		    " failed vhci_tpgs_get_target_fo_mode\n"));
250 		return (1);
251 	}
252 	if ((state == STD_ACTIVE_OPTIMIZED) ||
253 	    (state == STD_ACTIVE_NONOPTIMIZED)) {
254 		VHCI_DEBUG(4, (CE_NOTE, NULL, "!path already active for %p\n",
255 		    (void *)sd));
256 		return (0);
257 	}
258 
259 	if (mode == SCSI_EXPLICIT_FAILOVER) {
260 		VHCI_DEBUG(4, (CE_NOTE, NULL,
261 		    "!mode is EXPLICIT for %p xlf %x\n",
262 		    (void *)sd, xlf));
263 		retval = std_activate_explicit(sd, xlf);
264 		if (retval != 0) {
265 			VHCI_DEBUG(4, (CE_NOTE, NULL,
266 			    "!(sd:%p)std_path_activate failed(1)\n",
267 			    (void *)sd));
268 			return (1);
269 		}
270 	} else {
271 		VHCI_DEBUG(4, (CE_NOTE, NULL, "STD mode is IMPLICIT for %p\n",
272 		    (void *)sd));
273 	}
274 
275 	bp = scsi_alloc_consistent_buf(ap, (struct buf *)NULL, blksize, B_READ,
276 	    NULL, NULL);
277 	if (!bp) {
278 		VHCI_DEBUG(4, (CE_WARN, NULL,
279 		    "!(sd:%p)std_path_activate failed to alloc buffer",
280 		    (void *)sd));
281 		return (1);
282 	}
283 
284 	pkt = scsi_init_pkt(ap, NULL, bp, CDB_GROUP1,
285 	    sizeof (struct scsi_arq_status), 0, PKT_CONSISTENT, NULL, NULL);
286 	if (!pkt) {
287 		VHCI_DEBUG(4, (CE_WARN, NULL,
288 		    "!(sd:%p)std_path_activate failed to initialize packet",
289 		    (void *)sd));
290 		scsi_free_consistent_buf(bp);
291 		return (1);
292 	}
293 
294 	(void) scsi_setup_cdb((union scsi_cdb *)(uintptr_t)pkt->pkt_cdbp,
295 	    SCMD_READ_G1, 1, 1, 0);
296 	pkt->pkt_time = 3*30;
297 	pkt->pkt_flags |= FLAG_NOINTR;
298 
299 	retry_cnt = 0;
300 	retry_cmd_cnt = 0;
301 retry:
302 	err = scsi_transport(pkt);
303 	if (err != TRAN_ACCEPT) {
304 		/*
305 		 * Retry TRAN_BUSY till STD_FO_MAX_RETRIES is exhausted.
306 		 * All other errors are fatal and should not be retried.
307 		 */
308 		if ((err == TRAN_BUSY) &&
309 		    (retry_cnt++ < STD_FO_MAX_RETRIES)) {
310 			drv_usecwait(STD_FO_RETRY_DELAY);
311 			goto retry;
312 		}
313 		cmn_err(CE_WARN, "Failover failed, "
314 		    "couldn't transport packet");
315 		scsi_destroy_pkt(pkt);
316 		scsi_free_consistent_buf(bp);
317 		return (1);
318 	}
319 	switch (pkt->pkt_reason) {
320 		case CMD_CMPLT:
321 			/*
322 			 * Re-initialize retry_cmd_cnt. Allow transport and
323 			 * cmd errors to go through a full retry count when
324 			 * these are encountered.  This way TRAN/CMD errors
325 			 * retry count is not exhausted due to CMD_CMPLTs
326 			 * delay. This allows the system
327 			 * to brave a hick-up on the link at any given time,
328 			 * while waiting for the fo to complete.
329 			 */
330 			retry_cmd_cnt = 0;
331 			if (std_process_cmplt_pkt(sd, pkt, &retry_cnt,
332 			    &retval) != 0) {
333 				goto retry;
334 			}
335 			break;
336 		case CMD_TIMEOUT:
337 			cmn_err(CE_WARN, "!Failover failed: timed out ");
338 			retval = 1;
339 			break;
340 		case CMD_INCOMPLETE:
341 		case CMD_RESET:
342 		case CMD_ABORTED:
343 		case CMD_TRAN_ERR:
344 			/*
345 			 * Increased the number of retries when these error
346 			 * cases are encountered.  Also added a 1 sec wait
347 			 * before retrying.
348 			 */
349 			if (retry_cmd_cnt++ < STD_FO_MAX_CMD_RETRIES) {
350 				drv_usecwait(STD_FO_CMD_RETRY_DELAY);
351 				VHCI_DEBUG(4, (CE_WARN, NULL,
352 				    "!Retrying path activation due to "
353 				    "pkt reason:%x, retry cnt:%d",
354 				    pkt->pkt_reason, retry_cmd_cnt));
355 				goto retry;
356 			}
357 			/* FALLTHROUGH */
358 		default:
359 			cmn_err(CE_WARN, "!Path activation did not "
360 			    "complete successfully,"
361 			    "(pkt reason %x)", pkt->pkt_reason);
362 			retval = 1;
363 			break;
364 	}
365 
366 	scsi_destroy_pkt(pkt);
367 	scsi_free_consistent_buf(bp);
368 	return (retval);
369 }
370 
371 /* ARGSUSED */
std_path_deactivate(struct scsi_device * sd,char * pathclass,void * ctpriv)372 static int std_path_deactivate(struct scsi_device *sd, char *pathclass,
373     void *ctpriv)
374 {
375 	return (0);
376 }
377 
378 /* ARGSUSED */
379 static int
std_path_get_opinfo(struct scsi_device * sd,struct scsi_path_opinfo * opinfo,void * ctpriv)380 std_path_get_opinfo(struct scsi_device *sd, struct scsi_path_opinfo *opinfo,
381     void *ctpriv)
382 {
383 	int			mode, preferred, state, xlf;
384 
385 	opinfo->opinfo_rev = OPINFO_REV;
386 
387 	if (vhci_tpgs_get_target_fo_mode(sd, &mode, &state, &xlf, &preferred)) {
388 		VHCI_DEBUG(1, (CE_NOTE, NULL, "!std_path_getopinfo:"
389 		    " failed vhci_tpgs_get_target_fo_mode\n"));
390 		return (1);
391 	}
392 
393 	if (state == STD_ACTIVE_OPTIMIZED) {
394 		opinfo->opinfo_path_state = SCSI_PATH_ACTIVE;
395 	} else if (state == STD_ACTIVE_NONOPTIMIZED) {
396 		opinfo->opinfo_path_state = SCSI_PATH_ACTIVE_NONOPT;
397 	} else if (state == STD_STANDBY) {
398 		opinfo->opinfo_path_state = SCSI_PATH_INACTIVE;
399 	} else if (state == STD_UNAVAILABLE) {
400 		opinfo->opinfo_path_state = SCSI_PATH_INACTIVE;
401 	}
402 	if (preferred) {
403 		(void) strcpy(opinfo->opinfo_path_attr, PCLASS_PRIMARY);
404 	} else {
405 		(void) strcpy(opinfo->opinfo_path_attr, PCLASS_SECONDARY);
406 	}
407 	VHCI_DEBUG(4, (CE_NOTE, NULL, "std_path_get_opinfo: "
408 	    "class: %s state: %s\n", opinfo->opinfo_path_attr,
409 	    opinfo->opinfo_path_state == SCSI_PATH_ACTIVE ?
410 	    "ACTIVE" : "INACTIVE"));
411 	opinfo->opinfo_xlf_capable = 0;
412 	opinfo->opinfo_pswtch_best = 30;
413 	opinfo->opinfo_pswtch_worst = 3*30;
414 	opinfo->opinfo_preferred = (uint16_t)preferred;
415 	opinfo->opinfo_mode = (uint16_t)mode;
416 
417 	return (0);
418 }
419 
420 /* ARGSUSED */
std_path_ping(struct scsi_device * sd,void * ctpriv)421 static int std_path_ping(struct scsi_device *sd, void *ctpriv)
422 {
423 	/*
424 	 * For future use
425 	 */
426 	return (1);
427 }
428 
429 /*
430  * Analyze the sense code to determine whether failover process
431  */
432 /* ARGSUSED */
433 static int
std_analyze_sense(struct scsi_device * sd,uint8_t * sense,void * ctpriv)434 std_analyze_sense(struct scsi_device *sd, uint8_t *sense,
435     void *ctpriv)
436 {
437 	int rval = SCSI_SENSE_UNKNOWN;
438 
439 	uint8_t skey, asc, ascq;
440 
441 	skey = scsi_sense_key(sense);
442 	asc = scsi_sense_asc(sense);
443 	ascq = scsi_sense_ascq(sense);
444 
445 	if ((skey == KEY_UNIT_ATTENTION) &&
446 	    (asc == STD_SCSI_ASC_STATE_CHG) &&
447 	    (ascq == STD_SCSI_ASCQ_STATE_CHG_SUCC)) {
448 		rval = SCSI_SENSE_STATE_CHANGED;
449 		VHCI_DEBUG(4, (CE_NOTE, NULL, "!std_analyze_sense:"
450 		    " sense_key:%x, add_code: %x, qual_code:%x"
451 		    " sense:%x\n", skey, asc, ascq, rval));
452 	} else if ((skey == KEY_NOT_READY) &&
453 	    (asc == STD_LOGICAL_UNIT_NOT_ACCESSIBLE) &&
454 	    ((ascq == STD_TGT_PORT_UNAVAILABLE) ||
455 	    (ascq == STD_TGT_PORT_STANDBY))) {
456 		rval = SCSI_SENSE_INACTIVE;
457 		VHCI_DEBUG(4, (CE_NOTE, NULL, "!std_analyze_sense:"
458 		    " sense_key:%x, add_code: %x, qual_code:%x"
459 		    " sense:%x\n", skey, asc, ascq, rval));
460 	} else if ((skey == KEY_ILLEGAL_REQUEST) &&
461 	    (asc == STD_SCSI_ASC_INVAL_PARAM_LIST)) {
462 		rval = SCSI_SENSE_NOFAILOVER;
463 		VHCI_DEBUG(1, (CE_NOTE, NULL, "!std_analyze_sense:"
464 		    " sense_key:%x, add_code: %x, qual_code:%x"
465 		    " sense:%x\n", skey, asc, ascq, rval));
466 	} else if ((skey == KEY_ILLEGAL_REQUEST) &&
467 	    (asc == STD_SCSI_ASC_INVAL_CMD_OPCODE)) {
468 		rval = SCSI_SENSE_NOFAILOVER;
469 		VHCI_DEBUG(1, (CE_NOTE, NULL, "!std_analyze_sense:"
470 		    " sense_key:%x, add_code: %x, qual_code:%x"
471 		    " sense:%x\n", skey, asc, ascq, rval));
472 	} else {
473 		/*
474 		 * At this point sense data may be for power-on-reset
475 		 * UNIT ATTN hardware errors, vendor unqiue sense data etc.
476 		 * For all these cases, return SCSI_SENSE_UNKNOWN.
477 		 */
478 		VHCI_DEBUG(1, (CE_NOTE, NULL, "!Analyze sense UNKNOWN:"
479 		    " sense key:%x, ASC:%x, ASCQ:%x\n", skey, asc, ascq));
480 	}
481 
482 	return (rval);
483 }
484 
485 /* ARGSUSED */
486 static int
std_pathclass_next(char * cur,char ** nxt,void * ctpriv)487 std_pathclass_next(char *cur, char **nxt, void *ctpriv)
488 {
489 	/*
490 	 * The first phase does not have a standby path so
491 	 * there will be no explicit failover - when standard tpgs.
492 	 * standard defines preferred flag then we should start
493 	 * using this as the selection mechanism - there can be
494 	 * preferred primary standby that we should fail to first and then
495 	 * nonpreferred secondary standby.
496 	 */
497 	if (cur == NULL) {
498 		*nxt = PCLASS_PRIMARY;
499 		return (0);
500 	} else if (strcmp(cur, PCLASS_PRIMARY) == 0) {
501 		*nxt = PCLASS_SECONDARY;
502 		return (0);
503 	} else if (strcmp(cur, PCLASS_SECONDARY) == 0) {
504 		return (ENOENT);
505 	} else {
506 		return (EINVAL);
507 	}
508 }
509