1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 2014 Nexenta Systems Inc. All rights reserved.
24 * Copyright (c) 2018, Joyent, Inc.
25 */
26
27 /*
28 * Multipath driver interface (MDI) implementation; see mdi_impldefs.h for a
29 * more detailed discussion of the overall mpxio architecture.
30 *
31 * Default locking order:
32 *
33 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_phci_mutex);
34 * _NOTE(LOCK_ORDER(mdi_mutex, mdi_vhci:vh_client_mutex);
35 * _NOTE(LOCK_ORDER(mdi_vhci:vh_phci_mutex, mdi_phci::ph_mutex);
36 * _NOTE(LOCK_ORDER(mdi_vhci:vh_client_mutex, mdi_client::ct_mutex);
37 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
38 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_client::ct_mutex))
39 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
40 */
41
42 #include <sys/note.h>
43 #include <sys/types.h>
44 #include <sys/varargs.h>
45 #include <sys/param.h>
46 #include <sys/errno.h>
47 #include <sys/uio.h>
48 #include <sys/buf.h>
49 #include <sys/modctl.h>
50 #include <sys/open.h>
51 #include <sys/kmem.h>
52 #include <sys/poll.h>
53 #include <sys/conf.h>
54 #include <sys/bootconf.h>
55 #include <sys/cmn_err.h>
56 #include <sys/stat.h>
57 #include <sys/ddi.h>
58 #include <sys/sunddi.h>
59 #include <sys/ddipropdefs.h>
60 #include <sys/sunndi.h>
61 #include <sys/ndi_impldefs.h>
62 #include <sys/promif.h>
63 #include <sys/sunmdi.h>
64 #include <sys/mdi_impldefs.h>
65 #include <sys/taskq.h>
66 #include <sys/epm.h>
67 #include <sys/sunpm.h>
68 #include <sys/modhash.h>
69 #include <sys/disp.h>
70 #include <sys/autoconf.h>
71 #include <sys/sysmacros.h>
72
73 #ifdef DEBUG
74 #include <sys/debug.h>
75 int mdi_debug = 1;
76 int mdi_debug_logonly = 0;
77 #define MDI_DEBUG(dbglevel, pargs) if (mdi_debug >= (dbglevel)) i_mdi_log pargs
78 #define MDI_WARN CE_WARN, __func__
79 #define MDI_NOTE CE_NOTE, __func__
80 #define MDI_CONT CE_CONT, __func__
81 static void i_mdi_log(int, const char *, dev_info_t *, const char *, ...);
82 #else /* !DEBUG */
83 #define MDI_DEBUG(dbglevel, pargs)
84 #endif /* DEBUG */
85 int mdi_debug_consoleonly = 0;
86 int mdi_delay = 3;
87
88 extern pri_t minclsyspri;
89 extern int modrootloaded;
90
91 /*
92 * Global mutex:
93 * Protects vHCI list and structure members.
94 */
95 kmutex_t mdi_mutex;
96
97 /*
98 * Registered vHCI class driver lists
99 */
100 int mdi_vhci_count;
101 mdi_vhci_t *mdi_vhci_head;
102 mdi_vhci_t *mdi_vhci_tail;
103
104 /*
105 * Client Hash Table size
106 */
107 static int mdi_client_table_size = CLIENT_HASH_TABLE_SIZE;
108
109 /*
110 * taskq interface definitions
111 */
112 #define MDI_TASKQ_N_THREADS 8
113 #define MDI_TASKQ_PRI minclsyspri
114 #define MDI_TASKQ_MINALLOC (4*mdi_taskq_n_threads)
115 #define MDI_TASKQ_MAXALLOC (500*mdi_taskq_n_threads)
116
117 taskq_t *mdi_taskq;
118 static uint_t mdi_taskq_n_threads = MDI_TASKQ_N_THREADS;
119
120 #define TICKS_PER_SECOND (drv_usectohz(1000000))
121
122 /*
123 * The data should be "quiet" for this interval (in seconds) before the
124 * vhci cached data is flushed to the disk.
125 */
126 static int mdi_vhcache_flush_delay = 10;
127
128 /* number of seconds the vhcache flush daemon will sleep idle before exiting */
129 static int mdi_vhcache_flush_daemon_idle_time = 60;
130
131 /*
132 * MDI falls back to discovery of all paths when a bus_config_one fails.
133 * The following parameters can be used to tune this operation.
134 *
135 * mdi_path_discovery_boot
136 * Number of times path discovery will be attempted during early boot.
137 * Probably there is no reason to ever set this value to greater than one.
138 *
139 * mdi_path_discovery_postboot
140 * Number of times path discovery will be attempted after early boot.
141 * Set it to a minimum of two to allow for discovery of iscsi paths which
142 * may happen very late during booting.
143 *
144 * mdi_path_discovery_interval
145 * Minimum number of seconds MDI will wait between successive discovery
146 * of all paths. Set it to -1 to disable discovery of all paths.
147 */
148 static int mdi_path_discovery_boot = 1;
149 static int mdi_path_discovery_postboot = 2;
150 static int mdi_path_discovery_interval = 10;
151
152 /*
153 * number of seconds the asynchronous configuration thread will sleep idle
154 * before exiting.
155 */
156 static int mdi_async_config_idle_time = 600;
157
158 static int mdi_bus_config_cache_hash_size = 256;
159
160 /* turns off multithreaded configuration for certain operations */
161 static int mdi_mtc_off = 0;
162
163 /*
164 * The "path" to a pathinfo node is identical to the /devices path to a
165 * devinfo node had the device been enumerated under a pHCI instead of
166 * a vHCI. This pathinfo "path" is associated with a 'path_instance'.
167 * This association persists across create/delete of the pathinfo nodes,
168 * but not across reboot.
169 */
170 static uint_t mdi_pathmap_instance = 1; /* 0 -> any path */
171 static int mdi_pathmap_hash_size = 256;
172 static kmutex_t mdi_pathmap_mutex;
173 static mod_hash_t *mdi_pathmap_bypath; /* "path"->instance */
174 static mod_hash_t *mdi_pathmap_byinstance; /* instance->"path" */
175 static mod_hash_t *mdi_pathmap_sbyinstance; /* inst->shortpath */
176
177 /*
178 * MDI component property name/value string definitions
179 */
180 const char *mdi_component_prop = "mpxio-component";
181 const char *mdi_component_prop_vhci = "vhci";
182 const char *mdi_component_prop_phci = "phci";
183 const char *mdi_component_prop_client = "client";
184
185 /*
186 * MDI client global unique identifier property name
187 */
188 const char *mdi_client_guid_prop = "client-guid";
189
190 /*
191 * MDI client load balancing property name/value string definitions
192 */
193 const char *mdi_load_balance = "load-balance";
194 const char *mdi_load_balance_none = "none";
195 const char *mdi_load_balance_rr = "round-robin";
196 const char *mdi_load_balance_lba = "logical-block";
197
198 /*
199 * Obsolete vHCI class definition; to be removed after Leadville update
200 */
201 const char *mdi_vhci_class_scsi = MDI_HCI_CLASS_SCSI;
202
203 static char vhci_greeting[] =
204 "\tThere already exists one vHCI driver for class %s\n"
205 "\tOnly one vHCI driver for each class is allowed\n";
206
207 /*
208 * Static function prototypes
209 */
210 static int i_mdi_phci_offline(dev_info_t *, uint_t);
211 static int i_mdi_client_offline(dev_info_t *, uint_t);
212 static int i_mdi_phci_pre_detach(dev_info_t *, ddi_detach_cmd_t);
213 static void i_mdi_phci_post_detach(dev_info_t *,
214 ddi_detach_cmd_t, int);
215 static int i_mdi_client_pre_detach(dev_info_t *,
216 ddi_detach_cmd_t);
217 static void i_mdi_client_post_detach(dev_info_t *,
218 ddi_detach_cmd_t, int);
219 static void i_mdi_pm_hold_pip(mdi_pathinfo_t *);
220 static void i_mdi_pm_rele_pip(mdi_pathinfo_t *);
221 static int i_mdi_lba_lb(mdi_client_t *ct,
222 mdi_pathinfo_t **ret_pip, struct buf *buf);
223 static void i_mdi_pm_hold_client(mdi_client_t *, int);
224 static void i_mdi_pm_rele_client(mdi_client_t *, int);
225 static void i_mdi_pm_reset_client(mdi_client_t *);
226 static int i_mdi_power_all_phci(mdi_client_t *);
227 static void i_mdi_log_sysevent(dev_info_t *, char *, char *);
228
229
230 /*
231 * Internal mdi_pathinfo node functions
232 */
233 static void i_mdi_pi_kstat_destroy(mdi_pathinfo_t *);
234
235 static mdi_vhci_t *i_mdi_vhci_class2vhci(char *);
236 static mdi_vhci_t *i_devi_get_vhci(dev_info_t *);
237 static mdi_phci_t *i_devi_get_phci(dev_info_t *);
238 static void i_mdi_phci_lock(mdi_phci_t *, mdi_pathinfo_t *);
239 static void i_mdi_phci_unlock(mdi_phci_t *);
240 static mdi_pathinfo_t *i_mdi_pi_alloc(mdi_phci_t *, char *, mdi_client_t *);
241 static void i_mdi_phci_add_path(mdi_phci_t *, mdi_pathinfo_t *);
242 static void i_mdi_client_add_path(mdi_client_t *, mdi_pathinfo_t *);
243 static void i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *,
244 mdi_client_t *);
245 static void i_mdi_phci_remove_path(mdi_phci_t *, mdi_pathinfo_t *);
246 static void i_mdi_client_remove_path(mdi_client_t *,
247 mdi_pathinfo_t *);
248
249 static int i_mdi_pi_state_change(mdi_pathinfo_t *,
250 mdi_pathinfo_state_t, int);
251 static int i_mdi_pi_offline(mdi_pathinfo_t *, int);
252 static dev_info_t *i_mdi_devinfo_create(mdi_vhci_t *, char *, char *,
253 char **, int);
254 static dev_info_t *i_mdi_devinfo_find(mdi_vhci_t *, char *, char *);
255 static int i_mdi_devinfo_remove(dev_info_t *, dev_info_t *, int);
256 static int i_mdi_is_child_present(dev_info_t *, dev_info_t *);
257 static mdi_client_t *i_mdi_client_alloc(mdi_vhci_t *, char *, char *);
258 static void i_mdi_client_enlist_table(mdi_vhci_t *, mdi_client_t *);
259 static void i_mdi_client_delist_table(mdi_vhci_t *, mdi_client_t *);
260 static mdi_client_t *i_mdi_client_find(mdi_vhci_t *, char *, char *);
261 static void i_mdi_client_update_state(mdi_client_t *);
262 static int i_mdi_client_compute_state(mdi_client_t *,
263 mdi_phci_t *);
264 static void i_mdi_client_lock(mdi_client_t *, mdi_pathinfo_t *);
265 static void i_mdi_client_unlock(mdi_client_t *);
266 static int i_mdi_client_free(mdi_vhci_t *, mdi_client_t *);
267 static mdi_client_t *i_devi_get_client(dev_info_t *);
268 /*
269 * NOTE: this will be removed once the NWS files are changed to use the new
270 * mdi_{enable,disable}_path interfaces
271 */
272 static int i_mdi_pi_enable_disable(dev_info_t *, dev_info_t *,
273 int, int);
274 static mdi_pathinfo_t *i_mdi_enable_disable_path(mdi_pathinfo_t *pip,
275 mdi_vhci_t *vh, int flags, int op);
276 /*
277 * Failover related function prototypes
278 */
279 static int i_mdi_failover(void *);
280
281 /*
282 * misc internal functions
283 */
284 static int i_mdi_get_hash_key(char *);
285 static int i_map_nvlist_error_to_mdi(int);
286 static void i_mdi_report_path_state(mdi_client_t *,
287 mdi_pathinfo_t *);
288
289 static void setup_vhci_cache(mdi_vhci_t *);
290 static int destroy_vhci_cache(mdi_vhci_t *);
291 static int stop_vhcache_async_threads(mdi_vhci_config_t *);
292 static boolean_t stop_vhcache_flush_thread(void *, int);
293 static void free_string_array(char **, int);
294 static void free_vhcache_phci(mdi_vhcache_phci_t *);
295 static void free_vhcache_pathinfo(mdi_vhcache_pathinfo_t *);
296 static void free_vhcache_client(mdi_vhcache_client_t *);
297 static int mainnvl_to_vhcache(mdi_vhci_cache_t *, nvlist_t *);
298 static nvlist_t *vhcache_to_mainnvl(mdi_vhci_cache_t *);
299 static void vhcache_phci_add(mdi_vhci_config_t *, mdi_phci_t *);
300 static void vhcache_phci_remove(mdi_vhci_config_t *, mdi_phci_t *);
301 static void vhcache_pi_add(mdi_vhci_config_t *,
302 struct mdi_pathinfo *);
303 static void vhcache_pi_remove(mdi_vhci_config_t *,
304 struct mdi_pathinfo *);
305 static void free_phclient_path_list(mdi_phys_path_t *);
306 static void sort_vhcache_paths(mdi_vhcache_client_t *);
307 static int flush_vhcache(mdi_vhci_config_t *, int);
308 static void vhcache_dirty(mdi_vhci_config_t *);
309 static void free_async_client_config(mdi_async_client_config_t *);
310 static void single_threaded_vhconfig_enter(mdi_vhci_config_t *);
311 static void single_threaded_vhconfig_exit(mdi_vhci_config_t *);
312 static nvlist_t *read_on_disk_vhci_cache(char *);
313 extern int fread_nvlist(char *, nvlist_t **);
314 extern int fwrite_nvlist(char *, nvlist_t *);
315
316 /* called once when first vhci registers with mdi */
317 static void
i_mdi_init()318 i_mdi_init()
319 {
320 static int initialized = 0;
321
322 if (initialized)
323 return;
324 initialized = 1;
325
326 mutex_init(&mdi_mutex, NULL, MUTEX_DEFAULT, NULL);
327
328 /* Create our taskq resources */
329 mdi_taskq = taskq_create("mdi_taskq", mdi_taskq_n_threads,
330 MDI_TASKQ_PRI, MDI_TASKQ_MINALLOC, MDI_TASKQ_MAXALLOC,
331 TASKQ_PREPOPULATE | TASKQ_CPR_SAFE);
332 ASSERT(mdi_taskq != NULL); /* taskq_create never fails */
333
334 /* Allocate ['path_instance' <-> "path"] maps */
335 mutex_init(&mdi_pathmap_mutex, NULL, MUTEX_DRIVER, NULL);
336 mdi_pathmap_bypath = mod_hash_create_strhash(
337 "mdi_pathmap_bypath", mdi_pathmap_hash_size,
338 mod_hash_null_valdtor);
339 mdi_pathmap_byinstance = mod_hash_create_idhash(
340 "mdi_pathmap_byinstance", mdi_pathmap_hash_size,
341 mod_hash_null_valdtor);
342 mdi_pathmap_sbyinstance = mod_hash_create_idhash(
343 "mdi_pathmap_sbyinstance", mdi_pathmap_hash_size,
344 mod_hash_null_valdtor);
345 }
346
347 /*
348 * mdi_get_component_type():
349 * Return mpxio component type
350 * Return Values:
351 * MDI_COMPONENT_NONE
352 * MDI_COMPONENT_VHCI
353 * MDI_COMPONENT_PHCI
354 * MDI_COMPONENT_CLIENT
355 * XXX This doesn't work under multi-level MPxIO and should be
356 * removed when clients migrate mdi_component_is_*() interfaces.
357 */
358 int
mdi_get_component_type(dev_info_t * dip)359 mdi_get_component_type(dev_info_t *dip)
360 {
361 return (DEVI(dip)->devi_mdi_component);
362 }
363
364 /*
365 * mdi_vhci_register():
366 * Register a vHCI module with the mpxio framework
367 * mdi_vhci_register() is called by vHCI drivers to register the
368 * 'class_driver' vHCI driver and its MDI entrypoints with the
369 * mpxio framework. The vHCI driver must call this interface as
370 * part of its attach(9e) handler.
371 * Competing threads may try to attach mdi_vhci_register() as
372 * the vHCI drivers are loaded and attached as a result of pHCI
373 * driver instance registration (mdi_phci_register()) with the
374 * framework.
375 * Return Values:
376 * MDI_SUCCESS
377 * MDI_FAILURE
378 */
379 /*ARGSUSED*/
380 int
mdi_vhci_register(char * class,dev_info_t * vdip,mdi_vhci_ops_t * vops,int flags)381 mdi_vhci_register(char *class, dev_info_t *vdip, mdi_vhci_ops_t *vops,
382 int flags)
383 {
384 mdi_vhci_t *vh = NULL;
385
386 /* Registrant can't be older */
387 ASSERT(vops->vo_revision <= MDI_VHCI_OPS_REV);
388
389 #ifdef DEBUG
390 /*
391 * IB nexus driver is loaded only when IB hardware is present.
392 * In order to be able to do this there is a need to drive the loading
393 * and attaching of the IB nexus driver (especially when an IB hardware
394 * is dynamically plugged in) when an IB HCA driver (PHCI)
395 * is being attached. Unfortunately this gets into the limitations
396 * of devfs as there seems to be no clean way to drive configuration
397 * of a subtree from another subtree of a devfs. Hence, do not ASSERT
398 * for IB.
399 */
400 if (strcmp(class, MDI_HCI_CLASS_IB) != 0)
401 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip)));
402 #endif
403
404 i_mdi_init();
405
406 mutex_enter(&mdi_mutex);
407 /*
408 * Scan for already registered vhci
409 */
410 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
411 if (strcmp(vh->vh_class, class) == 0) {
412 /*
413 * vHCI has already been created. Check for valid
414 * vHCI ops registration. We only support one vHCI
415 * module per class
416 */
417 if (vh->vh_ops != NULL) {
418 mutex_exit(&mdi_mutex);
419 cmn_err(CE_NOTE, vhci_greeting, class);
420 return (MDI_FAILURE);
421 }
422 break;
423 }
424 }
425
426 /*
427 * if not yet created, create the vHCI component
428 */
429 if (vh == NULL) {
430 struct client_hash *hash = NULL;
431 char *load_balance;
432
433 /*
434 * Allocate and initialize the mdi extensions
435 */
436 vh = kmem_zalloc(sizeof (mdi_vhci_t), KM_SLEEP);
437 hash = kmem_zalloc(mdi_client_table_size * sizeof (*hash),
438 KM_SLEEP);
439 vh->vh_client_table = hash;
440 vh->vh_class = kmem_zalloc(strlen(class) + 1, KM_SLEEP);
441 (void) strcpy(vh->vh_class, class);
442 vh->vh_lb = LOAD_BALANCE_RR;
443 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, vdip,
444 0, LOAD_BALANCE_PROP, &load_balance) == DDI_SUCCESS) {
445 if (strcmp(load_balance, LOAD_BALANCE_PROP_NONE) == 0) {
446 vh->vh_lb = LOAD_BALANCE_NONE;
447 } else if (strcmp(load_balance, LOAD_BALANCE_PROP_LBA)
448 == 0) {
449 vh->vh_lb = LOAD_BALANCE_LBA;
450 }
451 ddi_prop_free(load_balance);
452 }
453
454 mutex_init(&vh->vh_phci_mutex, NULL, MUTEX_DEFAULT, NULL);
455 mutex_init(&vh->vh_client_mutex, NULL, MUTEX_DEFAULT, NULL);
456
457 /*
458 * Store the vHCI ops vectors
459 */
460 vh->vh_dip = vdip;
461 vh->vh_ops = vops;
462
463 setup_vhci_cache(vh);
464
465 if (mdi_vhci_head == NULL) {
466 mdi_vhci_head = vh;
467 }
468 if (mdi_vhci_tail) {
469 mdi_vhci_tail->vh_next = vh;
470 }
471 mdi_vhci_tail = vh;
472 mdi_vhci_count++;
473 }
474
475 /*
476 * Claim the devfs node as a vhci component
477 */
478 DEVI(vdip)->devi_mdi_component |= MDI_COMPONENT_VHCI;
479
480 /*
481 * Initialize our back reference from dev_info node
482 */
483 DEVI(vdip)->devi_mdi_xhci = (caddr_t)vh;
484 mutex_exit(&mdi_mutex);
485 return (MDI_SUCCESS);
486 }
487
488 /*
489 * mdi_vhci_unregister():
490 * Unregister a vHCI module from mpxio framework
491 * mdi_vhci_unregister() is called from the detach(9E) entrypoint
492 * of a vhci to unregister it from the framework.
493 * Return Values:
494 * MDI_SUCCESS
495 * MDI_FAILURE
496 */
497 /*ARGSUSED*/
498 int
mdi_vhci_unregister(dev_info_t * vdip,int flags)499 mdi_vhci_unregister(dev_info_t *vdip, int flags)
500 {
501 mdi_vhci_t *found, *vh, *prev = NULL;
502
503 ASSERT(DEVI_BUSY_OWNED(ddi_get_parent(vdip)));
504
505 /*
506 * Check for invalid VHCI
507 */
508 if ((vh = i_devi_get_vhci(vdip)) == NULL)
509 return (MDI_FAILURE);
510
511 /*
512 * Scan the list of registered vHCIs for a match
513 */
514 mutex_enter(&mdi_mutex);
515 for (found = mdi_vhci_head; found != NULL; found = found->vh_next) {
516 if (found == vh)
517 break;
518 prev = found;
519 }
520
521 if (found == NULL) {
522 mutex_exit(&mdi_mutex);
523 return (MDI_FAILURE);
524 }
525
526 /*
527 * Check the vHCI, pHCI and client count. All the pHCIs and clients
528 * should have been unregistered, before a vHCI can be
529 * unregistered.
530 */
531 MDI_VHCI_PHCI_LOCK(vh);
532 if (vh->vh_refcnt || vh->vh_phci_count || vh->vh_client_count) {
533 MDI_VHCI_PHCI_UNLOCK(vh);
534 mutex_exit(&mdi_mutex);
535 return (MDI_FAILURE);
536 }
537 MDI_VHCI_PHCI_UNLOCK(vh);
538
539 if (destroy_vhci_cache(vh) != MDI_SUCCESS) {
540 mutex_exit(&mdi_mutex);
541 return (MDI_FAILURE);
542 }
543
544 /*
545 * Remove the vHCI from the global list
546 */
547 if (vh == mdi_vhci_head) {
548 mdi_vhci_head = vh->vh_next;
549 } else {
550 prev->vh_next = vh->vh_next;
551 }
552 if (vh == mdi_vhci_tail) {
553 mdi_vhci_tail = prev;
554 }
555 mdi_vhci_count--;
556 mutex_exit(&mdi_mutex);
557
558 vh->vh_ops = NULL;
559 DEVI(vdip)->devi_mdi_component &= ~MDI_COMPONENT_VHCI;
560 DEVI(vdip)->devi_mdi_xhci = NULL;
561 kmem_free(vh->vh_class, strlen(vh->vh_class)+1);
562 kmem_free(vh->vh_client_table,
563 mdi_client_table_size * sizeof (struct client_hash));
564 mutex_destroy(&vh->vh_phci_mutex);
565 mutex_destroy(&vh->vh_client_mutex);
566
567 kmem_free(vh, sizeof (mdi_vhci_t));
568 return (MDI_SUCCESS);
569 }
570
571 /*
572 * i_mdi_vhci_class2vhci():
573 * Look for a matching vHCI module given a vHCI class name
574 * Return Values:
575 * Handle to a vHCI component
576 * NULL
577 */
578 static mdi_vhci_t *
i_mdi_vhci_class2vhci(char * class)579 i_mdi_vhci_class2vhci(char *class)
580 {
581 mdi_vhci_t *vh = NULL;
582
583 ASSERT(!MUTEX_HELD(&mdi_mutex));
584
585 mutex_enter(&mdi_mutex);
586 for (vh = mdi_vhci_head; vh != NULL; vh = vh->vh_next) {
587 if (strcmp(vh->vh_class, class) == 0) {
588 break;
589 }
590 }
591 mutex_exit(&mdi_mutex);
592 return (vh);
593 }
594
595 /*
596 * i_devi_get_vhci():
597 * Utility function to get the handle to a vHCI component
598 * Return Values:
599 * Handle to a vHCI component
600 * NULL
601 */
602 mdi_vhci_t *
i_devi_get_vhci(dev_info_t * vdip)603 i_devi_get_vhci(dev_info_t *vdip)
604 {
605 mdi_vhci_t *vh = NULL;
606 if (MDI_VHCI(vdip)) {
607 vh = (mdi_vhci_t *)DEVI(vdip)->devi_mdi_xhci;
608 }
609 return (vh);
610 }
611
612 /*
613 * mdi_phci_register():
614 * Register a pHCI module with mpxio framework
615 * mdi_phci_register() is called by pHCI drivers to register with
616 * the mpxio framework and a specific 'class_driver' vHCI. The
617 * pHCI driver must call this interface as part of its attach(9e)
618 * handler.
619 * Return Values:
620 * MDI_SUCCESS
621 * MDI_FAILURE
622 */
623 /*ARGSUSED*/
624 int
mdi_phci_register(char * class,dev_info_t * pdip,int flags)625 mdi_phci_register(char *class, dev_info_t *pdip, int flags)
626 {
627 mdi_phci_t *ph;
628 mdi_vhci_t *vh;
629 char *data;
630
631 /*
632 * Some subsystems, like fcp, perform pHCI registration from a
633 * different thread than the one doing the pHCI attach(9E) - the
634 * driver attach code is waiting for this other thread to complete.
635 * This means we can only ASSERT DEVI_BUSY_CHANGING of parent
636 * (indicating that some thread has done an ndi_devi_enter of parent)
637 * not DEVI_BUSY_OWNED (which would indicate that we did the enter).
638 */
639 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip)));
640
641 /*
642 * Check for mpxio-disable property. Enable mpxio if the property is
643 * missing or not set to "yes".
644 * If the property is set to "yes" then emit a brief message.
645 */
646 if ((ddi_prop_lookup_string(DDI_DEV_T_ANY, pdip, 0, "mpxio-disable",
647 &data) == DDI_SUCCESS)) {
648 if (strcmp(data, "yes") == 0) {
649 MDI_DEBUG(1, (MDI_CONT, pdip,
650 "?multipath capabilities disabled via %s.conf.",
651 ddi_driver_name(pdip)));
652 ddi_prop_free(data);
653 return (MDI_FAILURE);
654 }
655 ddi_prop_free(data);
656 }
657
658 /*
659 * Search for a matching vHCI
660 */
661 vh = (mdi_vhci_t *)i_mdi_vhci_class2vhci(class);
662 if (vh == NULL) {
663 return (MDI_FAILURE);
664 }
665
666 ph = kmem_zalloc(sizeof (mdi_phci_t), KM_SLEEP);
667 mutex_init(&ph->ph_mutex, NULL, MUTEX_DEFAULT, NULL);
668 ph->ph_dip = pdip;
669 ph->ph_vhci = vh;
670 ph->ph_next = NULL;
671 ph->ph_unstable = 0;
672 ph->ph_vprivate = 0;
673 cv_init(&ph->ph_unstable_cv, NULL, CV_DRIVER, NULL);
674
675 MDI_PHCI_LOCK(ph);
676 MDI_PHCI_SET_POWER_UP(ph);
677 MDI_PHCI_UNLOCK(ph);
678 DEVI(pdip)->devi_mdi_component |= MDI_COMPONENT_PHCI;
679 DEVI(pdip)->devi_mdi_xhci = (caddr_t)ph;
680
681 vhcache_phci_add(vh->vh_config, ph);
682
683 MDI_VHCI_PHCI_LOCK(vh);
684 if (vh->vh_phci_head == NULL) {
685 vh->vh_phci_head = ph;
686 }
687 if (vh->vh_phci_tail) {
688 vh->vh_phci_tail->ph_next = ph;
689 }
690 vh->vh_phci_tail = ph;
691 vh->vh_phci_count++;
692 MDI_VHCI_PHCI_UNLOCK(vh);
693
694 i_mdi_log_sysevent(pdip, class, ESC_DDI_INITIATOR_REGISTER);
695 return (MDI_SUCCESS);
696 }
697
698 /*
699 * mdi_phci_unregister():
700 * Unregister a pHCI module from mpxio framework
701 * mdi_phci_unregister() is called by the pHCI drivers from their
702 * detach(9E) handler to unregister their instances from the
703 * framework.
704 * Return Values:
705 * MDI_SUCCESS
706 * MDI_FAILURE
707 */
708 /*ARGSUSED*/
709 int
mdi_phci_unregister(dev_info_t * pdip,int flags)710 mdi_phci_unregister(dev_info_t *pdip, int flags)
711 {
712 mdi_vhci_t *vh;
713 mdi_phci_t *ph;
714 mdi_phci_t *tmp;
715 mdi_phci_t *prev = NULL;
716 mdi_pathinfo_t *pip;
717
718 ASSERT(DEVI_BUSY_CHANGING(ddi_get_parent(pdip)));
719
720 ph = i_devi_get_phci(pdip);
721 if (ph == NULL) {
722 MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid pHCI"));
723 return (MDI_FAILURE);
724 }
725
726 vh = ph->ph_vhci;
727 ASSERT(vh != NULL);
728 if (vh == NULL) {
729 MDI_DEBUG(1, (MDI_WARN, pdip, "!not a valid vHCI"));
730 return (MDI_FAILURE);
731 }
732
733 MDI_VHCI_PHCI_LOCK(vh);
734 tmp = vh->vh_phci_head;
735 while (tmp) {
736 if (tmp == ph) {
737 break;
738 }
739 prev = tmp;
740 tmp = tmp->ph_next;
741 }
742
743 if (ph == vh->vh_phci_head) {
744 vh->vh_phci_head = ph->ph_next;
745 } else {
746 prev->ph_next = ph->ph_next;
747 }
748
749 if (ph == vh->vh_phci_tail) {
750 vh->vh_phci_tail = prev;
751 }
752
753 vh->vh_phci_count--;
754 MDI_VHCI_PHCI_UNLOCK(vh);
755
756 /* Walk remaining pathinfo nodes and disassociate them from pHCI */
757 MDI_PHCI_LOCK(ph);
758 for (pip = (mdi_pathinfo_t *)ph->ph_path_head; pip;
759 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link)
760 MDI_PI(pip)->pi_phci = NULL;
761 MDI_PHCI_UNLOCK(ph);
762
763 i_mdi_log_sysevent(pdip, ph->ph_vhci->vh_class,
764 ESC_DDI_INITIATOR_UNREGISTER);
765 vhcache_phci_remove(vh->vh_config, ph);
766 cv_destroy(&ph->ph_unstable_cv);
767 mutex_destroy(&ph->ph_mutex);
768 kmem_free(ph, sizeof (mdi_phci_t));
769 DEVI(pdip)->devi_mdi_component &= ~MDI_COMPONENT_PHCI;
770 DEVI(pdip)->devi_mdi_xhci = NULL;
771 return (MDI_SUCCESS);
772 }
773
774 /*
775 * i_devi_get_phci():
776 * Utility function to return the phci extensions.
777 */
778 static mdi_phci_t *
i_devi_get_phci(dev_info_t * pdip)779 i_devi_get_phci(dev_info_t *pdip)
780 {
781 mdi_phci_t *ph = NULL;
782
783 if (MDI_PHCI(pdip)) {
784 ph = (mdi_phci_t *)DEVI(pdip)->devi_mdi_xhci;
785 }
786 return (ph);
787 }
788
789 /*
790 * Single thread mdi entry into devinfo node for modifying its children.
791 * If necessary we perform an ndi_devi_enter of the vHCI before doing
792 * an ndi_devi_enter of 'dip'. We maintain circular in two parts: one
793 * for the vHCI and one for the pHCI.
794 */
795 void
mdi_devi_enter(dev_info_t * phci_dip,int * circular)796 mdi_devi_enter(dev_info_t *phci_dip, int *circular)
797 {
798 dev_info_t *vdip;
799 int vcircular, pcircular;
800
801 /* Verify calling context */
802 ASSERT(MDI_PHCI(phci_dip));
803 vdip = mdi_devi_get_vdip(phci_dip);
804 ASSERT(vdip); /* A pHCI always has a vHCI */
805
806 /*
807 * If pHCI is detaching then the framework has already entered the
808 * vHCI on a threads that went down the code path leading to
809 * detach_node(). This framework enter of the vHCI during pHCI
810 * detach is done to avoid deadlock with vHCI power management
811 * operations which enter the vHCI and the enter down the path
812 * to the pHCI. If pHCI is detaching then we piggyback this calls
813 * enter of the vHCI on frameworks vHCI enter that has already
814 * occurred - this is OK because we know that the framework thread
815 * doing detach is waiting for our completion.
816 *
817 * We should DEVI_IS_DETACHING under an enter of the parent to avoid
818 * race with detach - but we can't do that because the framework has
819 * already entered the parent, so we have some complexity instead.
820 */
821 for (;;) {
822 if (ndi_devi_tryenter(vdip, &vcircular)) {
823 ASSERT(vcircular != -1);
824 if (DEVI_IS_DETACHING(phci_dip)) {
825 ndi_devi_exit(vdip, vcircular);
826 vcircular = -1;
827 }
828 break;
829 } else if (DEVI_IS_DETACHING(phci_dip)) {
830 vcircular = -1;
831 break;
832 } else if (servicing_interrupt()) {
833 /*
834 * Don't delay an interrupt (and ensure adaptive
835 * mutex inversion support).
836 */
837 ndi_devi_enter(vdip, &vcircular);
838 break;
839 } else {
840 delay_random(mdi_delay);
841 }
842 }
843
844 ndi_devi_enter(phci_dip, &pcircular);
845 *circular = (vcircular << 16) | (pcircular & 0xFFFF);
846 }
847
848 /*
849 * Attempt to mdi_devi_enter.
850 */
851 int
mdi_devi_tryenter(dev_info_t * phci_dip,int * circular)852 mdi_devi_tryenter(dev_info_t *phci_dip, int *circular)
853 {
854 dev_info_t *vdip;
855 int vcircular, pcircular;
856
857 /* Verify calling context */
858 ASSERT(MDI_PHCI(phci_dip));
859 vdip = mdi_devi_get_vdip(phci_dip);
860 ASSERT(vdip); /* A pHCI always has a vHCI */
861
862 if (ndi_devi_tryenter(vdip, &vcircular)) {
863 if (ndi_devi_tryenter(phci_dip, &pcircular)) {
864 *circular = (vcircular << 16) | (pcircular & 0xFFFF);
865 return (1); /* locked */
866 }
867 ndi_devi_exit(vdip, vcircular);
868 }
869 return (0); /* busy */
870 }
871
872 /*
873 * Release mdi_devi_enter or successful mdi_devi_tryenter.
874 */
875 void
mdi_devi_exit(dev_info_t * phci_dip,int circular)876 mdi_devi_exit(dev_info_t *phci_dip, int circular)
877 {
878 dev_info_t *vdip;
879 int vcircular, pcircular;
880
881 /* Verify calling context */
882 ASSERT(MDI_PHCI(phci_dip));
883 vdip = mdi_devi_get_vdip(phci_dip);
884 ASSERT(vdip); /* A pHCI always has a vHCI */
885
886 /* extract two circular recursion values from single int */
887 pcircular = (short)(circular & 0xFFFF);
888 vcircular = (short)((circular >> 16) & 0xFFFF);
889
890 ndi_devi_exit(phci_dip, pcircular);
891 if (vcircular != -1)
892 ndi_devi_exit(vdip, vcircular);
893 }
894
895 /*
896 * The functions mdi_devi_exit_phci() and mdi_devi_enter_phci() are used
897 * around a pHCI drivers calls to mdi_pi_online/offline, after holding
898 * the pathinfo node via mdi_hold_path/mdi_rele_path, to avoid deadlock
899 * with vHCI power management code during path online/offline. Each
900 * mdi_devi_exit_phci must have a matching mdi_devi_enter_phci, and both must
901 * occur within the scope of an active mdi_devi_enter that establishes the
902 * circular value.
903 */
904 void
mdi_devi_exit_phci(dev_info_t * phci_dip,int circular)905 mdi_devi_exit_phci(dev_info_t *phci_dip, int circular)
906 {
907 int pcircular;
908
909 /* Verify calling context */
910 ASSERT(MDI_PHCI(phci_dip));
911
912 /* Keep hold on pHCI until we reenter in mdi_devi_enter_phci */
913 ndi_hold_devi(phci_dip);
914
915 pcircular = (short)(circular & 0xFFFF);
916 ndi_devi_exit(phci_dip, pcircular);
917 }
918
919 void
mdi_devi_enter_phci(dev_info_t * phci_dip,int * circular)920 mdi_devi_enter_phci(dev_info_t *phci_dip, int *circular)
921 {
922 int pcircular;
923
924 /* Verify calling context */
925 ASSERT(MDI_PHCI(phci_dip));
926
927 ndi_devi_enter(phci_dip, &pcircular);
928
929 /* Drop hold from mdi_devi_exit_phci. */
930 ndi_rele_devi(phci_dip);
931
932 /* verify matching mdi_devi_exit_phci/mdi_devi_enter_phci use */
933 ASSERT(pcircular == ((short)(*circular & 0xFFFF)));
934 }
935
936 /*
937 * mdi_devi_get_vdip():
938 * given a pHCI dip return vHCI dip
939 */
940 dev_info_t *
mdi_devi_get_vdip(dev_info_t * pdip)941 mdi_devi_get_vdip(dev_info_t *pdip)
942 {
943 mdi_phci_t *ph;
944
945 ph = i_devi_get_phci(pdip);
946 if (ph && ph->ph_vhci)
947 return (ph->ph_vhci->vh_dip);
948 return (NULL);
949 }
950
951 /*
952 * mdi_devi_pdip_entered():
953 * Return 1 if we are vHCI and have done an ndi_devi_enter
954 * of a pHCI
955 */
956 int
mdi_devi_pdip_entered(dev_info_t * vdip)957 mdi_devi_pdip_entered(dev_info_t *vdip)
958 {
959 mdi_vhci_t *vh;
960 mdi_phci_t *ph;
961
962 vh = i_devi_get_vhci(vdip);
963 if (vh == NULL)
964 return (0);
965
966 MDI_VHCI_PHCI_LOCK(vh);
967 ph = vh->vh_phci_head;
968 while (ph) {
969 if (ph->ph_dip && DEVI_BUSY_OWNED(ph->ph_dip)) {
970 MDI_VHCI_PHCI_UNLOCK(vh);
971 return (1);
972 }
973 ph = ph->ph_next;
974 }
975 MDI_VHCI_PHCI_UNLOCK(vh);
976 return (0);
977 }
978
979 /*
980 * mdi_phci_path2devinfo():
981 * Utility function to search for a valid phci device given
982 * the devfs pathname.
983 */
984 dev_info_t *
mdi_phci_path2devinfo(dev_info_t * vdip,caddr_t pathname)985 mdi_phci_path2devinfo(dev_info_t *vdip, caddr_t pathname)
986 {
987 char *temp_pathname;
988 mdi_vhci_t *vh;
989 mdi_phci_t *ph;
990 dev_info_t *pdip = NULL;
991
992 vh = i_devi_get_vhci(vdip);
993 ASSERT(vh != NULL);
994
995 if (vh == NULL) {
996 /*
997 * Invalid vHCI component, return failure
998 */
999 return (NULL);
1000 }
1001
1002 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1003 MDI_VHCI_PHCI_LOCK(vh);
1004 ph = vh->vh_phci_head;
1005 while (ph != NULL) {
1006 pdip = ph->ph_dip;
1007 ASSERT(pdip != NULL);
1008 *temp_pathname = '\0';
1009 (void) ddi_pathname(pdip, temp_pathname);
1010 if (strcmp(temp_pathname, pathname) == 0) {
1011 break;
1012 }
1013 ph = ph->ph_next;
1014 }
1015 if (ph == NULL) {
1016 pdip = NULL;
1017 }
1018 MDI_VHCI_PHCI_UNLOCK(vh);
1019 kmem_free(temp_pathname, MAXPATHLEN);
1020 return (pdip);
1021 }
1022
1023 /*
1024 * mdi_phci_get_path_count():
1025 * get number of path information nodes associated with a given
1026 * pHCI device.
1027 */
1028 int
mdi_phci_get_path_count(dev_info_t * pdip)1029 mdi_phci_get_path_count(dev_info_t *pdip)
1030 {
1031 mdi_phci_t *ph;
1032 int count = 0;
1033
1034 ph = i_devi_get_phci(pdip);
1035 if (ph != NULL) {
1036 count = ph->ph_path_count;
1037 }
1038 return (count);
1039 }
1040
1041 /*
1042 * i_mdi_phci_lock():
1043 * Lock a pHCI device
1044 * Return Values:
1045 * None
1046 * Note:
1047 * The default locking order is:
1048 * _NOTE(LOCK_ORDER(mdi_phci::ph_mutex mdi_pathinfo::pi_mutex))
1049 * But there are number of situations where locks need to be
1050 * grabbed in reverse order. This routine implements try and lock
1051 * mechanism depending on the requested parameter option.
1052 */
1053 static void
i_mdi_phci_lock(mdi_phci_t * ph,mdi_pathinfo_t * pip)1054 i_mdi_phci_lock(mdi_phci_t *ph, mdi_pathinfo_t *pip)
1055 {
1056 if (pip) {
1057 /* Reverse locking is requested. */
1058 while (MDI_PHCI_TRYLOCK(ph) == 0) {
1059 if (servicing_interrupt()) {
1060 MDI_PI_HOLD(pip);
1061 MDI_PI_UNLOCK(pip);
1062 MDI_PHCI_LOCK(ph);
1063 MDI_PI_LOCK(pip);
1064 MDI_PI_RELE(pip);
1065 break;
1066 } else {
1067 /*
1068 * tryenter failed. Try to grab again
1069 * after a small delay
1070 */
1071 MDI_PI_HOLD(pip);
1072 MDI_PI_UNLOCK(pip);
1073 delay_random(mdi_delay);
1074 MDI_PI_LOCK(pip);
1075 MDI_PI_RELE(pip);
1076 }
1077 }
1078 } else {
1079 MDI_PHCI_LOCK(ph);
1080 }
1081 }
1082
1083 /*
1084 * i_mdi_phci_unlock():
1085 * Unlock the pHCI component
1086 */
1087 static void
i_mdi_phci_unlock(mdi_phci_t * ph)1088 i_mdi_phci_unlock(mdi_phci_t *ph)
1089 {
1090 MDI_PHCI_UNLOCK(ph);
1091 }
1092
1093 /*
1094 * i_mdi_devinfo_create():
1095 * create client device's devinfo node
1096 * Return Values:
1097 * dev_info
1098 * NULL
1099 * Notes:
1100 */
1101 static dev_info_t *
i_mdi_devinfo_create(mdi_vhci_t * vh,char * name,char * guid,char ** compatible,int ncompatible)1102 i_mdi_devinfo_create(mdi_vhci_t *vh, char *name, char *guid,
1103 char **compatible, int ncompatible)
1104 {
1105 dev_info_t *cdip = NULL;
1106
1107 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1108
1109 /* Verify for duplicate entry */
1110 cdip = i_mdi_devinfo_find(vh, name, guid);
1111 ASSERT(cdip == NULL);
1112 if (cdip) {
1113 cmn_err(CE_WARN,
1114 "i_mdi_devinfo_create: client %s@%s already exists",
1115 name ? name : "", guid ? guid : "");
1116 }
1117
1118 ndi_devi_alloc_sleep(vh->vh_dip, name, DEVI_SID_NODEID, &cdip);
1119 if (cdip == NULL)
1120 goto fail;
1121
1122 /*
1123 * Create component type and Global unique identifier
1124 * properties
1125 */
1126 if (ndi_prop_update_string(DDI_DEV_T_NONE, cdip,
1127 MDI_CLIENT_GUID_PROP, guid) != DDI_PROP_SUCCESS) {
1128 goto fail;
1129 }
1130
1131 /* Decorate the node with compatible property */
1132 if (compatible &&
1133 (ndi_prop_update_string_array(DDI_DEV_T_NONE, cdip,
1134 "compatible", compatible, ncompatible) != DDI_PROP_SUCCESS)) {
1135 goto fail;
1136 }
1137
1138 return (cdip);
1139
1140 fail:
1141 if (cdip) {
1142 (void) ndi_prop_remove_all(cdip);
1143 (void) ndi_devi_free(cdip);
1144 }
1145 return (NULL);
1146 }
1147
1148 /*
1149 * i_mdi_devinfo_find():
1150 * Find a matching devinfo node for given client node name
1151 * and its guid.
1152 * Return Values:
1153 * Handle to a dev_info node or NULL
1154 */
1155 static dev_info_t *
i_mdi_devinfo_find(mdi_vhci_t * vh,caddr_t name,char * guid)1156 i_mdi_devinfo_find(mdi_vhci_t *vh, caddr_t name, char *guid)
1157 {
1158 char *data;
1159 dev_info_t *cdip = NULL;
1160 dev_info_t *ndip = NULL;
1161 int circular;
1162
1163 ndi_devi_enter(vh->vh_dip, &circular);
1164 ndip = (dev_info_t *)DEVI(vh->vh_dip)->devi_child;
1165 while ((cdip = ndip) != NULL) {
1166 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
1167
1168 if (strcmp(DEVI(cdip)->devi_node_name, name)) {
1169 continue;
1170 }
1171
1172 if (ddi_prop_lookup_string(DDI_DEV_T_ANY, cdip,
1173 DDI_PROP_DONTPASS, MDI_CLIENT_GUID_PROP,
1174 &data) != DDI_PROP_SUCCESS) {
1175 continue;
1176 }
1177
1178 if (strcmp(data, guid) != 0) {
1179 ddi_prop_free(data);
1180 continue;
1181 }
1182 ddi_prop_free(data);
1183 break;
1184 }
1185 ndi_devi_exit(vh->vh_dip, circular);
1186 return (cdip);
1187 }
1188
1189 /*
1190 * i_mdi_devinfo_remove():
1191 * Remove a client device node
1192 */
1193 static int
i_mdi_devinfo_remove(dev_info_t * vdip,dev_info_t * cdip,int flags)1194 i_mdi_devinfo_remove(dev_info_t *vdip, dev_info_t *cdip, int flags)
1195 {
1196 int rv = MDI_SUCCESS;
1197
1198 if (i_mdi_is_child_present(vdip, cdip) == MDI_SUCCESS ||
1199 (flags & MDI_CLIENT_FLAGS_DEV_NOT_SUPPORTED)) {
1200 rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN | NDI_DEVI_REMOVE);
1201 if (rv != NDI_SUCCESS) {
1202 MDI_DEBUG(1, (MDI_NOTE, cdip,
1203 "!failed: cdip %p", (void *)cdip));
1204 }
1205 /*
1206 * Convert to MDI error code
1207 */
1208 switch (rv) {
1209 case NDI_SUCCESS:
1210 rv = MDI_SUCCESS;
1211 break;
1212 case NDI_BUSY:
1213 rv = MDI_BUSY;
1214 break;
1215 default:
1216 rv = MDI_FAILURE;
1217 break;
1218 }
1219 }
1220 return (rv);
1221 }
1222
1223 /*
1224 * i_devi_get_client()
1225 * Utility function to get mpxio component extensions
1226 */
1227 static mdi_client_t *
i_devi_get_client(dev_info_t * cdip)1228 i_devi_get_client(dev_info_t *cdip)
1229 {
1230 mdi_client_t *ct = NULL;
1231
1232 if (MDI_CLIENT(cdip)) {
1233 ct = (mdi_client_t *)DEVI(cdip)->devi_mdi_client;
1234 }
1235 return (ct);
1236 }
1237
1238 /*
1239 * i_mdi_is_child_present():
1240 * Search for the presence of client device dev_info node
1241 */
1242 static int
i_mdi_is_child_present(dev_info_t * vdip,dev_info_t * cdip)1243 i_mdi_is_child_present(dev_info_t *vdip, dev_info_t *cdip)
1244 {
1245 int rv = MDI_FAILURE;
1246 struct dev_info *dip;
1247 int circular;
1248
1249 ndi_devi_enter(vdip, &circular);
1250 dip = DEVI(vdip)->devi_child;
1251 while (dip) {
1252 if (dip == DEVI(cdip)) {
1253 rv = MDI_SUCCESS;
1254 break;
1255 }
1256 dip = dip->devi_sibling;
1257 }
1258 ndi_devi_exit(vdip, circular);
1259 return (rv);
1260 }
1261
1262
1263 /*
1264 * i_mdi_client_lock():
1265 * Grab client component lock
1266 * Return Values:
1267 * None
1268 * Note:
1269 * The default locking order is:
1270 * _NOTE(LOCK_ORDER(mdi_client::ct_mutex mdi_pathinfo::pi_mutex))
1271 * But there are number of situations where locks need to be
1272 * grabbed in reverse order. This routine implements try and lock
1273 * mechanism depending on the requested parameter option.
1274 */
1275 static void
i_mdi_client_lock(mdi_client_t * ct,mdi_pathinfo_t * pip)1276 i_mdi_client_lock(mdi_client_t *ct, mdi_pathinfo_t *pip)
1277 {
1278 if (pip) {
1279 /*
1280 * Reverse locking is requested.
1281 */
1282 while (MDI_CLIENT_TRYLOCK(ct) == 0) {
1283 if (servicing_interrupt()) {
1284 MDI_PI_HOLD(pip);
1285 MDI_PI_UNLOCK(pip);
1286 MDI_CLIENT_LOCK(ct);
1287 MDI_PI_LOCK(pip);
1288 MDI_PI_RELE(pip);
1289 break;
1290 } else {
1291 /*
1292 * tryenter failed. Try to grab again
1293 * after a small delay
1294 */
1295 MDI_PI_HOLD(pip);
1296 MDI_PI_UNLOCK(pip);
1297 delay_random(mdi_delay);
1298 MDI_PI_LOCK(pip);
1299 MDI_PI_RELE(pip);
1300 }
1301 }
1302 } else {
1303 MDI_CLIENT_LOCK(ct);
1304 }
1305 }
1306
1307 /*
1308 * i_mdi_client_unlock():
1309 * Unlock a client component
1310 */
1311 static void
i_mdi_client_unlock(mdi_client_t * ct)1312 i_mdi_client_unlock(mdi_client_t *ct)
1313 {
1314 MDI_CLIENT_UNLOCK(ct);
1315 }
1316
1317 /*
1318 * i_mdi_client_alloc():
1319 * Allocate and initialize a client structure. Caller should
1320 * hold the vhci client lock.
1321 * Return Values:
1322 * Handle to a client component
1323 */
1324 /*ARGSUSED*/
1325 static mdi_client_t *
i_mdi_client_alloc(mdi_vhci_t * vh,char * name,char * lguid)1326 i_mdi_client_alloc(mdi_vhci_t *vh, char *name, char *lguid)
1327 {
1328 mdi_client_t *ct;
1329
1330 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1331
1332 /*
1333 * Allocate and initialize a component structure.
1334 */
1335 ct = kmem_zalloc(sizeof (*ct), KM_SLEEP);
1336 mutex_init(&ct->ct_mutex, NULL, MUTEX_DEFAULT, NULL);
1337 ct->ct_hnext = NULL;
1338 ct->ct_hprev = NULL;
1339 ct->ct_dip = NULL;
1340 ct->ct_vhci = vh;
1341 ct->ct_drvname = kmem_alloc(strlen(name) + 1, KM_SLEEP);
1342 (void) strcpy(ct->ct_drvname, name);
1343 ct->ct_guid = kmem_alloc(strlen(lguid) + 1, KM_SLEEP);
1344 (void) strcpy(ct->ct_guid, lguid);
1345 ct->ct_cprivate = NULL;
1346 ct->ct_vprivate = NULL;
1347 ct->ct_flags = 0;
1348 ct->ct_state = MDI_CLIENT_STATE_FAILED;
1349 MDI_CLIENT_LOCK(ct);
1350 MDI_CLIENT_SET_OFFLINE(ct);
1351 MDI_CLIENT_SET_DETACH(ct);
1352 MDI_CLIENT_SET_POWER_UP(ct);
1353 MDI_CLIENT_UNLOCK(ct);
1354 ct->ct_failover_flags = 0;
1355 ct->ct_failover_status = 0;
1356 cv_init(&ct->ct_failover_cv, NULL, CV_DRIVER, NULL);
1357 ct->ct_unstable = 0;
1358 cv_init(&ct->ct_unstable_cv, NULL, CV_DRIVER, NULL);
1359 cv_init(&ct->ct_powerchange_cv, NULL, CV_DRIVER, NULL);
1360 ct->ct_lb = vh->vh_lb;
1361 ct->ct_lb_args = kmem_zalloc(sizeof (client_lb_args_t), KM_SLEEP);
1362 ct->ct_lb_args->region_size = LOAD_BALANCE_DEFAULT_REGION_SIZE;
1363 ct->ct_path_count = 0;
1364 ct->ct_path_head = NULL;
1365 ct->ct_path_tail = NULL;
1366 ct->ct_path_last = NULL;
1367
1368 /*
1369 * Add this client component to our client hash queue
1370 */
1371 i_mdi_client_enlist_table(vh, ct);
1372 return (ct);
1373 }
1374
1375 /*
1376 * i_mdi_client_enlist_table():
1377 * Attach the client device to the client hash table. Caller
1378 * should hold the vhci client lock.
1379 */
1380 static void
i_mdi_client_enlist_table(mdi_vhci_t * vh,mdi_client_t * ct)1381 i_mdi_client_enlist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1382 {
1383 int index;
1384 struct client_hash *head;
1385
1386 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1387
1388 index = i_mdi_get_hash_key(ct->ct_guid);
1389 head = &vh->vh_client_table[index];
1390 ct->ct_hnext = (mdi_client_t *)head->ct_hash_head;
1391 head->ct_hash_head = ct;
1392 head->ct_hash_count++;
1393 vh->vh_client_count++;
1394 }
1395
1396 /*
1397 * i_mdi_client_delist_table():
1398 * Attach the client device to the client hash table.
1399 * Caller should hold the vhci client lock.
1400 */
1401 static void
i_mdi_client_delist_table(mdi_vhci_t * vh,mdi_client_t * ct)1402 i_mdi_client_delist_table(mdi_vhci_t *vh, mdi_client_t *ct)
1403 {
1404 int index;
1405 char *guid;
1406 struct client_hash *head;
1407 mdi_client_t *next;
1408 mdi_client_t *last;
1409
1410 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1411
1412 guid = ct->ct_guid;
1413 index = i_mdi_get_hash_key(guid);
1414 head = &vh->vh_client_table[index];
1415
1416 last = NULL;
1417 next = (mdi_client_t *)head->ct_hash_head;
1418 while (next != NULL) {
1419 if (next == ct) {
1420 break;
1421 }
1422 last = next;
1423 next = next->ct_hnext;
1424 }
1425
1426 if (next) {
1427 head->ct_hash_count--;
1428 if (last == NULL) {
1429 head->ct_hash_head = ct->ct_hnext;
1430 } else {
1431 last->ct_hnext = ct->ct_hnext;
1432 }
1433 ct->ct_hnext = NULL;
1434 vh->vh_client_count--;
1435 }
1436 }
1437
1438
1439 /*
1440 * i_mdi_client_free():
1441 * Free a client component
1442 */
1443 static int
i_mdi_client_free(mdi_vhci_t * vh,mdi_client_t * ct)1444 i_mdi_client_free(mdi_vhci_t *vh, mdi_client_t *ct)
1445 {
1446 int rv = MDI_SUCCESS;
1447 int flags = ct->ct_flags;
1448 dev_info_t *cdip;
1449 dev_info_t *vdip;
1450
1451 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1452
1453 vdip = vh->vh_dip;
1454 cdip = ct->ct_dip;
1455
1456 (void) ndi_prop_remove(DDI_DEV_T_NONE, cdip, MDI_CLIENT_GUID_PROP);
1457 DEVI(cdip)->devi_mdi_component &= ~MDI_COMPONENT_CLIENT;
1458 DEVI(cdip)->devi_mdi_client = NULL;
1459
1460 /*
1461 * Clear out back ref. to dev_info_t node
1462 */
1463 ct->ct_dip = NULL;
1464
1465 /*
1466 * Remove this client from our hash queue
1467 */
1468 i_mdi_client_delist_table(vh, ct);
1469
1470 /*
1471 * Uninitialize and free the component
1472 */
1473 kmem_free(ct->ct_drvname, strlen(ct->ct_drvname) + 1);
1474 kmem_free(ct->ct_guid, strlen(ct->ct_guid) + 1);
1475 kmem_free(ct->ct_lb_args, sizeof (client_lb_args_t));
1476 cv_destroy(&ct->ct_failover_cv);
1477 cv_destroy(&ct->ct_unstable_cv);
1478 cv_destroy(&ct->ct_powerchange_cv);
1479 mutex_destroy(&ct->ct_mutex);
1480 kmem_free(ct, sizeof (*ct));
1481
1482 MDI_VHCI_CLIENT_UNLOCK(vh);
1483 (void) i_mdi_devinfo_remove(vdip, cdip, flags);
1484 MDI_VHCI_CLIENT_LOCK(vh);
1485
1486 return (rv);
1487 }
1488
1489 /*
1490 * i_mdi_client_find():
1491 * Find the client structure corresponding to a given guid
1492 * Caller should hold the vhci client lock.
1493 */
1494 static mdi_client_t *
i_mdi_client_find(mdi_vhci_t * vh,char * cname,char * guid)1495 i_mdi_client_find(mdi_vhci_t *vh, char *cname, char *guid)
1496 {
1497 int index;
1498 struct client_hash *head;
1499 mdi_client_t *ct;
1500
1501 ASSERT(MDI_VHCI_CLIENT_LOCKED(vh));
1502
1503 index = i_mdi_get_hash_key(guid);
1504 head = &vh->vh_client_table[index];
1505
1506 ct = head->ct_hash_head;
1507 while (ct != NULL) {
1508 if (strcmp(ct->ct_guid, guid) == 0 &&
1509 (cname == NULL || strcmp(ct->ct_drvname, cname) == 0)) {
1510 break;
1511 }
1512 ct = ct->ct_hnext;
1513 }
1514 return (ct);
1515 }
1516
1517 /*
1518 * i_mdi_client_update_state():
1519 * Compute and update client device state
1520 * Notes:
1521 * A client device can be in any of three possible states:
1522 *
1523 * MDI_CLIENT_STATE_OPTIMAL - Client in optimal state with more
1524 * one online/standby paths. Can tolerate failures.
1525 * MDI_CLIENT_STATE_DEGRADED - Client device in degraded state with
1526 * no alternate paths available as standby. A failure on the online
1527 * would result in loss of access to device data.
1528 * MDI_CLIENT_STATE_FAILED - Client device in failed state with
1529 * no paths available to access the device.
1530 */
1531 static void
i_mdi_client_update_state(mdi_client_t * ct)1532 i_mdi_client_update_state(mdi_client_t *ct)
1533 {
1534 int state;
1535
1536 ASSERT(MDI_CLIENT_LOCKED(ct));
1537 state = i_mdi_client_compute_state(ct, NULL);
1538 MDI_CLIENT_SET_STATE(ct, state);
1539 }
1540
1541 /*
1542 * i_mdi_client_compute_state():
1543 * Compute client device state
1544 *
1545 * mdi_phci_t * Pointer to pHCI structure which should
1546 * while computing the new value. Used by
1547 * i_mdi_phci_offline() to find the new
1548 * client state after DR of a pHCI.
1549 */
1550 static int
i_mdi_client_compute_state(mdi_client_t * ct,mdi_phci_t * ph)1551 i_mdi_client_compute_state(mdi_client_t *ct, mdi_phci_t *ph)
1552 {
1553 int state;
1554 int online_count = 0;
1555 int standby_count = 0;
1556 mdi_pathinfo_t *pip, *next;
1557
1558 ASSERT(MDI_CLIENT_LOCKED(ct));
1559 pip = ct->ct_path_head;
1560 while (pip != NULL) {
1561 MDI_PI_LOCK(pip);
1562 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
1563 if (MDI_PI(pip)->pi_phci == ph) {
1564 MDI_PI_UNLOCK(pip);
1565 pip = next;
1566 continue;
1567 }
1568
1569 if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1570 == MDI_PATHINFO_STATE_ONLINE)
1571 online_count++;
1572 else if ((MDI_PI(pip)->pi_state & MDI_PATHINFO_STATE_MASK)
1573 == MDI_PATHINFO_STATE_STANDBY)
1574 standby_count++;
1575 MDI_PI_UNLOCK(pip);
1576 pip = next;
1577 }
1578
1579 if (online_count == 0) {
1580 if (standby_count == 0) {
1581 state = MDI_CLIENT_STATE_FAILED;
1582 MDI_DEBUG(2, (MDI_NOTE, ct->ct_dip,
1583 "client state failed: ct = %p", (void *)ct));
1584 } else if (standby_count == 1) {
1585 state = MDI_CLIENT_STATE_DEGRADED;
1586 } else {
1587 state = MDI_CLIENT_STATE_OPTIMAL;
1588 }
1589 } else if (online_count == 1) {
1590 if (standby_count == 0) {
1591 state = MDI_CLIENT_STATE_DEGRADED;
1592 } else {
1593 state = MDI_CLIENT_STATE_OPTIMAL;
1594 }
1595 } else {
1596 state = MDI_CLIENT_STATE_OPTIMAL;
1597 }
1598 return (state);
1599 }
1600
1601 /*
1602 * i_mdi_client2devinfo():
1603 * Utility function
1604 */
1605 dev_info_t *
i_mdi_client2devinfo(mdi_client_t * ct)1606 i_mdi_client2devinfo(mdi_client_t *ct)
1607 {
1608 return (ct->ct_dip);
1609 }
1610
1611 /*
1612 * mdi_client_path2_devinfo():
1613 * Given the parent devinfo and child devfs pathname, search for
1614 * a valid devfs node handle.
1615 */
1616 dev_info_t *
mdi_client_path2devinfo(dev_info_t * vdip,char * pathname)1617 mdi_client_path2devinfo(dev_info_t *vdip, char *pathname)
1618 {
1619 dev_info_t *cdip = NULL;
1620 dev_info_t *ndip = NULL;
1621 char *temp_pathname;
1622 int circular;
1623
1624 /*
1625 * Allocate temp buffer
1626 */
1627 temp_pathname = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1628
1629 /*
1630 * Lock parent against changes
1631 */
1632 ndi_devi_enter(vdip, &circular);
1633 ndip = (dev_info_t *)DEVI(vdip)->devi_child;
1634 while ((cdip = ndip) != NULL) {
1635 ndip = (dev_info_t *)DEVI(cdip)->devi_sibling;
1636
1637 *temp_pathname = '\0';
1638 (void) ddi_pathname(cdip, temp_pathname);
1639 if (strcmp(temp_pathname, pathname) == 0) {
1640 break;
1641 }
1642 }
1643 /*
1644 * Release devinfo lock
1645 */
1646 ndi_devi_exit(vdip, circular);
1647
1648 /*
1649 * Free the temp buffer
1650 */
1651 kmem_free(temp_pathname, MAXPATHLEN);
1652 return (cdip);
1653 }
1654
1655 /*
1656 * mdi_client_get_path_count():
1657 * Utility function to get number of path information nodes
1658 * associated with a given client device.
1659 */
1660 int
mdi_client_get_path_count(dev_info_t * cdip)1661 mdi_client_get_path_count(dev_info_t *cdip)
1662 {
1663 mdi_client_t *ct;
1664 int count = 0;
1665
1666 ct = i_devi_get_client(cdip);
1667 if (ct != NULL) {
1668 count = ct->ct_path_count;
1669 }
1670 return (count);
1671 }
1672
1673
1674 /*
1675 * i_mdi_get_hash_key():
1676 * Create a hash using strings as keys
1677 *
1678 */
1679 static int
i_mdi_get_hash_key(char * str)1680 i_mdi_get_hash_key(char *str)
1681 {
1682 uint32_t g, hash = 0;
1683 char *p;
1684
1685 for (p = str; *p != '\0'; p++) {
1686 g = *p;
1687 hash += g;
1688 }
1689 return (hash % (CLIENT_HASH_TABLE_SIZE - 1));
1690 }
1691
1692 /*
1693 * mdi_get_lb_policy():
1694 * Get current load balancing policy for a given client device
1695 */
1696 client_lb_t
mdi_get_lb_policy(dev_info_t * cdip)1697 mdi_get_lb_policy(dev_info_t *cdip)
1698 {
1699 client_lb_t lb = LOAD_BALANCE_NONE;
1700 mdi_client_t *ct;
1701
1702 ct = i_devi_get_client(cdip);
1703 if (ct != NULL) {
1704 lb = ct->ct_lb;
1705 }
1706 return (lb);
1707 }
1708
1709 /*
1710 * mdi_set_lb_region_size():
1711 * Set current region size for the load-balance
1712 */
1713 int
mdi_set_lb_region_size(dev_info_t * cdip,int region_size)1714 mdi_set_lb_region_size(dev_info_t *cdip, int region_size)
1715 {
1716 mdi_client_t *ct;
1717 int rv = MDI_FAILURE;
1718
1719 ct = i_devi_get_client(cdip);
1720 if (ct != NULL && ct->ct_lb_args != NULL) {
1721 ct->ct_lb_args->region_size = region_size;
1722 rv = MDI_SUCCESS;
1723 }
1724 return (rv);
1725 }
1726
1727 /*
1728 * mdi_Set_lb_policy():
1729 * Set current load balancing policy for a given client device
1730 */
1731 int
mdi_set_lb_policy(dev_info_t * cdip,client_lb_t lb)1732 mdi_set_lb_policy(dev_info_t *cdip, client_lb_t lb)
1733 {
1734 mdi_client_t *ct;
1735 int rv = MDI_FAILURE;
1736
1737 ct = i_devi_get_client(cdip);
1738 if (ct != NULL) {
1739 ct->ct_lb = lb;
1740 rv = MDI_SUCCESS;
1741 }
1742 return (rv);
1743 }
1744
1745 static void
mdi_failover_cb(void * arg)1746 mdi_failover_cb(void *arg)
1747 {
1748 (void)i_mdi_failover(arg);
1749 }
1750
1751 /*
1752 * mdi_failover():
1753 * failover function called by the vHCI drivers to initiate
1754 * a failover operation. This is typically due to non-availability
1755 * of online paths to route I/O requests. Failover can be
1756 * triggered through user application also.
1757 *
1758 * The vHCI driver calls mdi_failover() to initiate a failover
1759 * operation. mdi_failover() calls back into the vHCI driver's
1760 * vo_failover() entry point to perform the actual failover
1761 * operation. The reason for requiring the vHCI driver to
1762 * initiate failover by calling mdi_failover(), instead of directly
1763 * executing vo_failover() itself, is to ensure that the mdi
1764 * framework can keep track of the client state properly.
1765 * Additionally, mdi_failover() provides as a convenience the
1766 * option of performing the failover operation synchronously or
1767 * asynchronously
1768 *
1769 * Upon successful completion of the failover operation, the
1770 * paths that were previously ONLINE will be in the STANDBY state,
1771 * and the newly activated paths will be in the ONLINE state.
1772 *
1773 * The flags modifier determines whether the activation is done
1774 * synchronously: MDI_FAILOVER_SYNC
1775 * Return Values:
1776 * MDI_SUCCESS
1777 * MDI_FAILURE
1778 * MDI_BUSY
1779 */
1780 /*ARGSUSED*/
1781 int
mdi_failover(dev_info_t * vdip,dev_info_t * cdip,int flags)1782 mdi_failover(dev_info_t *vdip, dev_info_t *cdip, int flags)
1783 {
1784 int rv;
1785 mdi_client_t *ct;
1786
1787 ct = i_devi_get_client(cdip);
1788 ASSERT(ct != NULL);
1789 if (ct == NULL) {
1790 /* cdip is not a valid client device. Nothing more to do. */
1791 return (MDI_FAILURE);
1792 }
1793
1794 MDI_CLIENT_LOCK(ct);
1795
1796 if (MDI_CLIENT_IS_PATH_FREE_IN_PROGRESS(ct)) {
1797 /* A path to the client is being freed */
1798 MDI_CLIENT_UNLOCK(ct);
1799 return (MDI_BUSY);
1800 }
1801
1802
1803 if (MDI_CLIENT_IS_FAILED(ct)) {
1804 /*
1805 * Client is in failed state. Nothing more to do.
1806 */
1807 MDI_CLIENT_UNLOCK(ct);
1808 return (MDI_FAILURE);
1809 }
1810
1811 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
1812 /*
1813 * Failover is already in progress; return BUSY
1814 */
1815 MDI_CLIENT_UNLOCK(ct);
1816 return (MDI_BUSY);
1817 }
1818 /*
1819 * Make sure that mdi_pathinfo node state changes are processed.
1820 * We do not allow failovers to progress while client path state
1821 * changes are in progress
1822 */
1823 if (ct->ct_unstable) {
1824 if (flags == MDI_FAILOVER_ASYNC) {
1825 MDI_CLIENT_UNLOCK(ct);
1826 return (MDI_BUSY);
1827 } else {
1828 while (ct->ct_unstable)
1829 cv_wait(&ct->ct_unstable_cv, &ct->ct_mutex);
1830 }
1831 }
1832
1833 /*
1834 * Client device is in stable state. Before proceeding, perform sanity
1835 * checks again.
1836 */
1837 if ((MDI_CLIENT_IS_DETACHED(ct)) || (MDI_CLIENT_IS_FAILED(ct)) ||
1838 (!i_ddi_devi_attached(cdip))) {
1839 /*
1840 * Client is in failed state. Nothing more to do.
1841 */
1842 MDI_CLIENT_UNLOCK(ct);
1843 return (MDI_FAILURE);
1844 }
1845
1846 /*
1847 * Set the client state as failover in progress.
1848 */
1849 MDI_CLIENT_SET_FAILOVER_IN_PROGRESS(ct);
1850 ct->ct_failover_flags = flags;
1851 MDI_CLIENT_UNLOCK(ct);
1852
1853 if (flags == MDI_FAILOVER_ASYNC) {
1854 /*
1855 * Submit the initiate failover request via CPR safe
1856 * taskq threads.
1857 */
1858 (void) taskq_dispatch(mdi_taskq, mdi_failover_cb, ct, KM_SLEEP);
1859 return (MDI_ACCEPT);
1860 } else {
1861 /*
1862 * Synchronous failover mode. Typically invoked from the user
1863 * land.
1864 */
1865 rv = i_mdi_failover(ct);
1866 }
1867 return (rv);
1868 }
1869
1870 /*
1871 * i_mdi_failover():
1872 * internal failover function. Invokes vHCI drivers failover
1873 * callback function and process the failover status
1874 * Return Values:
1875 * None
1876 *
1877 * Note: A client device in failover state can not be detached or freed.
1878 */
1879 static int
i_mdi_failover(void * arg)1880 i_mdi_failover(void *arg)
1881 {
1882 int rv = MDI_SUCCESS;
1883 mdi_client_t *ct = (mdi_client_t *)arg;
1884 mdi_vhci_t *vh = ct->ct_vhci;
1885
1886 ASSERT(!MDI_CLIENT_LOCKED(ct));
1887
1888 if (vh->vh_ops->vo_failover != NULL) {
1889 /*
1890 * Call vHCI drivers callback routine
1891 */
1892 rv = (*vh->vh_ops->vo_failover)(vh->vh_dip, ct->ct_dip,
1893 ct->ct_failover_flags);
1894 }
1895
1896 MDI_CLIENT_LOCK(ct);
1897 MDI_CLIENT_CLEAR_FAILOVER_IN_PROGRESS(ct);
1898
1899 /*
1900 * Save the failover return status
1901 */
1902 ct->ct_failover_status = rv;
1903
1904 /*
1905 * As a result of failover, client status would have been changed.
1906 * Update the client state and wake up anyone waiting on this client
1907 * device.
1908 */
1909 i_mdi_client_update_state(ct);
1910
1911 cv_broadcast(&ct->ct_failover_cv);
1912 MDI_CLIENT_UNLOCK(ct);
1913 return (rv);
1914 }
1915
1916 /*
1917 * Load balancing is logical block.
1918 * IOs within the range described by region_size
1919 * would go on the same path. This would improve the
1920 * performance by cache-hit on some of the RAID devices.
1921 * Search only for online paths(At some point we
1922 * may want to balance across target ports).
1923 * If no paths are found then default to round-robin.
1924 */
1925 static int
i_mdi_lba_lb(mdi_client_t * ct,mdi_pathinfo_t ** ret_pip,struct buf * bp)1926 i_mdi_lba_lb(mdi_client_t *ct, mdi_pathinfo_t **ret_pip, struct buf *bp)
1927 {
1928 int path_index = -1;
1929 int online_path_count = 0;
1930 int online_nonpref_path_count = 0;
1931 int region_size = ct->ct_lb_args->region_size;
1932 mdi_pathinfo_t *pip;
1933 mdi_pathinfo_t *next;
1934 int preferred, path_cnt;
1935
1936 pip = ct->ct_path_head;
1937 while (pip) {
1938 MDI_PI_LOCK(pip);
1939 if (MDI_PI(pip)->pi_state ==
1940 MDI_PATHINFO_STATE_ONLINE && MDI_PI(pip)->pi_preferred) {
1941 online_path_count++;
1942 } else if (MDI_PI(pip)->pi_state ==
1943 MDI_PATHINFO_STATE_ONLINE && !MDI_PI(pip)->pi_preferred) {
1944 online_nonpref_path_count++;
1945 }
1946 next = (mdi_pathinfo_t *)
1947 MDI_PI(pip)->pi_client_link;
1948 MDI_PI_UNLOCK(pip);
1949 pip = next;
1950 }
1951 /* if found any online/preferred then use this type */
1952 if (online_path_count > 0) {
1953 path_cnt = online_path_count;
1954 preferred = 1;
1955 } else if (online_nonpref_path_count > 0) {
1956 path_cnt = online_nonpref_path_count;
1957 preferred = 0;
1958 } else {
1959 path_cnt = 0;
1960 }
1961 if (path_cnt) {
1962 path_index = (bp->b_blkno >> region_size) % path_cnt;
1963 pip = ct->ct_path_head;
1964 while (pip && path_index != -1) {
1965 MDI_PI_LOCK(pip);
1966 if (path_index == 0 &&
1967 (MDI_PI(pip)->pi_state ==
1968 MDI_PATHINFO_STATE_ONLINE) &&
1969 MDI_PI(pip)->pi_preferred == preferred) {
1970 MDI_PI_HOLD(pip);
1971 MDI_PI_UNLOCK(pip);
1972 *ret_pip = pip;
1973 return (MDI_SUCCESS);
1974 }
1975 path_index --;
1976 next = (mdi_pathinfo_t *)
1977 MDI_PI(pip)->pi_client_link;
1978 MDI_PI_UNLOCK(pip);
1979 pip = next;
1980 }
1981 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
1982 "lba %llx: path %s %p",
1983 bp->b_lblkno, mdi_pi_spathname(pip), (void *)pip));
1984 }
1985 return (MDI_FAILURE);
1986 }
1987
1988 /*
1989 * mdi_select_path():
1990 * select a path to access a client device.
1991 *
1992 * mdi_select_path() function is called by the vHCI drivers to
1993 * select a path to route the I/O request to. The caller passes
1994 * the block I/O data transfer structure ("buf") as one of the
1995 * parameters. The mpxio framework uses the buf structure
1996 * contents to maintain per path statistics (total I/O size /
1997 * count pending). If more than one online paths are available to
1998 * select, the framework automatically selects a suitable path
1999 * for routing I/O request. If a failover operation is active for
2000 * this client device the call shall be failed with MDI_BUSY error
2001 * code.
2002 *
2003 * By default this function returns a suitable path in online
2004 * state based on the current load balancing policy. Currently
2005 * we support LOAD_BALANCE_NONE (Previously selected online path
2006 * will continue to be used till the path is usable) and
2007 * LOAD_BALANCE_RR (Online paths will be selected in a round
2008 * robin fashion), LOAD_BALANCE_LB(Online paths will be selected
2009 * based on the logical block). The load balancing
2010 * through vHCI drivers configuration file (driver.conf).
2011 *
2012 * vHCI drivers may override this default behavior by specifying
2013 * appropriate flags. The meaning of the thrid argument depends
2014 * on the flags specified. If MDI_SELECT_PATH_INSTANCE is set
2015 * then the argument is the "path instance" of the path to select.
2016 * If MDI_SELECT_PATH_INSTANCE is not set then the argument is
2017 * "start_pip". A non NULL "start_pip" is the starting point to
2018 * walk and find the next appropriate path. The following values
2019 * are currently defined: MDI_SELECT_ONLINE_PATH (to select an
2020 * ONLINE path) and/or MDI_SELECT_STANDBY_PATH (to select an
2021 * STANDBY path).
2022 *
2023 * The non-standard behavior is used by the scsi_vhci driver,
2024 * whenever it has to use a STANDBY/FAULTED path. Eg. during
2025 * attach of client devices (to avoid an unnecessary failover
2026 * when the STANDBY path comes up first), during failover
2027 * (to activate a STANDBY path as ONLINE).
2028 *
2029 * The selected path is returned in a a mdi_hold_path() state
2030 * (pi_ref_cnt). Caller should release the hold by calling
2031 * mdi_rele_path().
2032 *
2033 * Return Values:
2034 * MDI_SUCCESS - Completed successfully
2035 * MDI_BUSY - Client device is busy failing over
2036 * MDI_NOPATH - Client device is online, but no valid path are
2037 * available to access this client device
2038 * MDI_FAILURE - Invalid client device or state
2039 * MDI_DEVI_ONLINING
2040 * - Client device (struct dev_info state) is in
2041 * onlining state.
2042 */
2043
2044 /*ARGSUSED*/
2045 int
mdi_select_path(dev_info_t * cdip,struct buf * bp,int flags,void * arg,mdi_pathinfo_t ** ret_pip)2046 mdi_select_path(dev_info_t *cdip, struct buf *bp, int flags,
2047 void *arg, mdi_pathinfo_t **ret_pip)
2048 {
2049 mdi_client_t *ct;
2050 mdi_pathinfo_t *pip;
2051 mdi_pathinfo_t *next;
2052 mdi_pathinfo_t *head;
2053 mdi_pathinfo_t *start;
2054 client_lb_t lbp; /* load balancing policy */
2055 int sb = 1; /* standard behavior */
2056 int preferred = 1; /* preferred path */
2057 int cond, cont = 1;
2058 int retry = 0;
2059 mdi_pathinfo_t *start_pip; /* request starting pathinfo */
2060 int path_instance; /* request specific path instance */
2061
2062 /* determine type of arg based on flags */
2063 if (flags & MDI_SELECT_PATH_INSTANCE) {
2064 path_instance = (int)(intptr_t)arg;
2065 start_pip = NULL;
2066 } else {
2067 path_instance = 0;
2068 start_pip = (mdi_pathinfo_t *)arg;
2069 }
2070
2071 if (flags != 0) {
2072 /*
2073 * disable default behavior
2074 */
2075 sb = 0;
2076 }
2077
2078 *ret_pip = NULL;
2079 ct = i_devi_get_client(cdip);
2080 if (ct == NULL) {
2081 /* mdi extensions are NULL, Nothing more to do */
2082 return (MDI_FAILURE);
2083 }
2084
2085 MDI_CLIENT_LOCK(ct);
2086
2087 if (sb) {
2088 if (MDI_CLIENT_IS_FAILED(ct)) {
2089 /*
2090 * Client is not ready to accept any I/O requests.
2091 * Fail this request.
2092 */
2093 MDI_DEBUG(2, (MDI_NOTE, cdip,
2094 "client state offline ct = %p", (void *)ct));
2095 MDI_CLIENT_UNLOCK(ct);
2096 return (MDI_FAILURE);
2097 }
2098
2099 if (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct)) {
2100 /*
2101 * Check for Failover is in progress. If so tell the
2102 * caller that this device is busy.
2103 */
2104 MDI_DEBUG(2, (MDI_NOTE, cdip,
2105 "client failover in progress ct = %p",
2106 (void *)ct));
2107 MDI_CLIENT_UNLOCK(ct);
2108 return (MDI_BUSY);
2109 }
2110
2111 /*
2112 * Check to see whether the client device is attached.
2113 * If not so, let the vHCI driver manually select a path
2114 * (standby) and let the probe/attach process to continue.
2115 */
2116 if (MDI_CLIENT_IS_DETACHED(ct) || !i_ddi_devi_attached(cdip)) {
2117 MDI_DEBUG(4, (MDI_NOTE, cdip,
2118 "devi is onlining ct = %p", (void *)ct));
2119 MDI_CLIENT_UNLOCK(ct);
2120 return (MDI_DEVI_ONLINING);
2121 }
2122 }
2123
2124 /*
2125 * Cache in the client list head. If head of the list is NULL
2126 * return MDI_NOPATH
2127 */
2128 head = ct->ct_path_head;
2129 if (head == NULL) {
2130 MDI_CLIENT_UNLOCK(ct);
2131 return (MDI_NOPATH);
2132 }
2133
2134 /* Caller is specifying a specific pathinfo path by path_instance */
2135 if (path_instance) {
2136 /* search for pathinfo with correct path_instance */
2137 for (pip = head;
2138 pip && (mdi_pi_get_path_instance(pip) != path_instance);
2139 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link)
2140 ;
2141
2142 /* If path can't be selected then MDI_NOPATH is returned. */
2143 if (pip == NULL) {
2144 MDI_CLIENT_UNLOCK(ct);
2145 return (MDI_NOPATH);
2146 }
2147
2148 /*
2149 * Verify state of path. When asked to select a specific
2150 * path_instance, we select the requested path in any
2151 * state (ONLINE, OFFLINE, STANDBY, FAULT) other than INIT.
2152 * We don't however select paths where the pHCI has detached.
2153 * NOTE: last pathinfo node of an opened client device may
2154 * exist in an OFFLINE state after the pHCI associated with
2155 * that path has detached (but pi_phci will be NULL if that
2156 * has occurred).
2157 */
2158 MDI_PI_LOCK(pip);
2159 if ((MDI_PI(pip)->pi_state == MDI_PATHINFO_STATE_INIT) ||
2160 (MDI_PI(pip)->pi_phci == NULL)) {
2161 MDI_PI_UNLOCK(pip);
2162 MDI_CLIENT_UNLOCK(ct);
2163 return (MDI_FAILURE);
2164 }
2165
2166 /* Return MDI_BUSY if we have a transient condition */
2167 if (MDI_PI_IS_TRANSIENT(pip)) {
2168 MDI_PI_UNLOCK(pip);
2169 MDI_CLIENT_UNLOCK(ct);
2170 return (MDI_BUSY);
2171 }
2172
2173 /*
2174 * Return the path in hold state. Caller should release the
2175 * lock by calling mdi_rele_path()
2176 */
2177 MDI_PI_HOLD(pip);
2178 MDI_PI_UNLOCK(pip);
2179 *ret_pip = pip;
2180 MDI_CLIENT_UNLOCK(ct);
2181 return (MDI_SUCCESS);
2182 }
2183
2184 /*
2185 * for non default behavior, bypass current
2186 * load balancing policy and always use LOAD_BALANCE_RR
2187 * except that the start point will be adjusted based
2188 * on the provided start_pip
2189 */
2190 lbp = sb ? ct->ct_lb : LOAD_BALANCE_RR;
2191
2192 switch (lbp) {
2193 case LOAD_BALANCE_NONE:
2194 /*
2195 * Load balancing is None or Alternate path mode
2196 * Start looking for a online mdi_pathinfo node starting from
2197 * last known selected path
2198 */
2199 preferred = 1;
2200 pip = (mdi_pathinfo_t *)ct->ct_path_last;
2201 if (pip == NULL) {
2202 pip = head;
2203 }
2204 start = pip;
2205 do {
2206 MDI_PI_LOCK(pip);
2207 /*
2208 * No need to explicitly check if the path is disabled.
2209 * Since we are checking for state == ONLINE and the
2210 * same variable is used for DISABLE/ENABLE information.
2211 */
2212 if ((MDI_PI(pip)->pi_state ==
2213 MDI_PATHINFO_STATE_ONLINE) &&
2214 preferred == MDI_PI(pip)->pi_preferred) {
2215 /*
2216 * Return the path in hold state. Caller should
2217 * release the lock by calling mdi_rele_path()
2218 */
2219 MDI_PI_HOLD(pip);
2220 MDI_PI_UNLOCK(pip);
2221 ct->ct_path_last = pip;
2222 *ret_pip = pip;
2223 MDI_CLIENT_UNLOCK(ct);
2224 return (MDI_SUCCESS);
2225 }
2226
2227 /*
2228 * Path is busy.
2229 */
2230 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
2231 MDI_PI_IS_TRANSIENT(pip))
2232 retry = 1;
2233 /*
2234 * Keep looking for a next available online path
2235 */
2236 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2237 if (next == NULL) {
2238 next = head;
2239 }
2240 MDI_PI_UNLOCK(pip);
2241 pip = next;
2242 if (start == pip && preferred) {
2243 preferred = 0;
2244 } else if (start == pip && !preferred) {
2245 cont = 0;
2246 }
2247 } while (cont);
2248 break;
2249
2250 case LOAD_BALANCE_LBA:
2251 /*
2252 * Make sure we are looking
2253 * for an online path. Otherwise, if it is for a STANDBY
2254 * path request, it will go through and fetch an ONLINE
2255 * path which is not desirable.
2256 */
2257 if ((ct->ct_lb_args != NULL) &&
2258 (ct->ct_lb_args->region_size) && bp &&
2259 (sb || (flags == MDI_SELECT_ONLINE_PATH))) {
2260 if (i_mdi_lba_lb(ct, ret_pip, bp)
2261 == MDI_SUCCESS) {
2262 MDI_CLIENT_UNLOCK(ct);
2263 return (MDI_SUCCESS);
2264 }
2265 }
2266 /* FALLTHROUGH */
2267 case LOAD_BALANCE_RR:
2268 /*
2269 * Load balancing is Round Robin. Start looking for a online
2270 * mdi_pathinfo node starting from last known selected path
2271 * as the start point. If override flags are specified,
2272 * process accordingly.
2273 * If the search is already in effect(start_pip not null),
2274 * then lets just use the same path preference to continue the
2275 * traversal.
2276 */
2277
2278 if (start_pip != NULL) {
2279 preferred = MDI_PI(start_pip)->pi_preferred;
2280 } else {
2281 preferred = 1;
2282 }
2283
2284 start = sb ? (mdi_pathinfo_t *)ct->ct_path_last : start_pip;
2285 if (start == NULL) {
2286 pip = head;
2287 } else {
2288 pip = (mdi_pathinfo_t *)MDI_PI(start)->pi_client_link;
2289 if (pip == NULL) {
2290 if ( flags & MDI_SELECT_NO_PREFERRED) {
2291 /*
2292 * Return since we hit the end of list
2293 */
2294 MDI_CLIENT_UNLOCK(ct);
2295 return (MDI_NOPATH);
2296 }
2297
2298 if (!sb) {
2299 if (preferred == 0) {
2300 /*
2301 * Looks like we have completed
2302 * the traversal as preferred
2303 * value is 0. Time to bail out.
2304 */
2305 *ret_pip = NULL;
2306 MDI_CLIENT_UNLOCK(ct);
2307 return (MDI_NOPATH);
2308 } else {
2309 /*
2310 * Looks like we reached the
2311 * end of the list. Lets enable
2312 * traversal of non preferred
2313 * paths.
2314 */
2315 preferred = 0;
2316 }
2317 }
2318 pip = head;
2319 }
2320 }
2321 start = pip;
2322 do {
2323 MDI_PI_LOCK(pip);
2324 if (sb) {
2325 cond = ((MDI_PI(pip)->pi_state ==
2326 MDI_PATHINFO_STATE_ONLINE &&
2327 MDI_PI(pip)->pi_preferred ==
2328 preferred) ? 1 : 0);
2329 } else {
2330 if (flags == MDI_SELECT_ONLINE_PATH) {
2331 cond = ((MDI_PI(pip)->pi_state ==
2332 MDI_PATHINFO_STATE_ONLINE &&
2333 MDI_PI(pip)->pi_preferred ==
2334 preferred) ? 1 : 0);
2335 } else if (flags == MDI_SELECT_STANDBY_PATH) {
2336 cond = ((MDI_PI(pip)->pi_state ==
2337 MDI_PATHINFO_STATE_STANDBY &&
2338 MDI_PI(pip)->pi_preferred ==
2339 preferred) ? 1 : 0);
2340 } else if (flags == (MDI_SELECT_ONLINE_PATH |
2341 MDI_SELECT_STANDBY_PATH)) {
2342 cond = (((MDI_PI(pip)->pi_state ==
2343 MDI_PATHINFO_STATE_ONLINE ||
2344 (MDI_PI(pip)->pi_state ==
2345 MDI_PATHINFO_STATE_STANDBY)) &&
2346 MDI_PI(pip)->pi_preferred ==
2347 preferred) ? 1 : 0);
2348 } else if (flags ==
2349 (MDI_SELECT_STANDBY_PATH |
2350 MDI_SELECT_ONLINE_PATH |
2351 MDI_SELECT_USER_DISABLE_PATH)) {
2352 cond = (((MDI_PI(pip)->pi_state ==
2353 MDI_PATHINFO_STATE_ONLINE ||
2354 (MDI_PI(pip)->pi_state ==
2355 MDI_PATHINFO_STATE_STANDBY) ||
2356 (MDI_PI(pip)->pi_state ==
2357 (MDI_PATHINFO_STATE_ONLINE|
2358 MDI_PATHINFO_STATE_USER_DISABLE)) ||
2359 (MDI_PI(pip)->pi_state ==
2360 (MDI_PATHINFO_STATE_STANDBY |
2361 MDI_PATHINFO_STATE_USER_DISABLE)))&&
2362 MDI_PI(pip)->pi_preferred ==
2363 preferred) ? 1 : 0);
2364 } else if (flags ==
2365 (MDI_SELECT_STANDBY_PATH |
2366 MDI_SELECT_ONLINE_PATH |
2367 MDI_SELECT_NO_PREFERRED)) {
2368 cond = (((MDI_PI(pip)->pi_state ==
2369 MDI_PATHINFO_STATE_ONLINE) ||
2370 (MDI_PI(pip)->pi_state ==
2371 MDI_PATHINFO_STATE_STANDBY))
2372 ? 1 : 0);
2373 } else {
2374 cond = 0;
2375 }
2376 }
2377 /*
2378 * No need to explicitly check if the path is disabled.
2379 * Since we are checking for state == ONLINE and the
2380 * same variable is used for DISABLE/ENABLE information.
2381 */
2382 if (cond) {
2383 /*
2384 * Return the path in hold state. Caller should
2385 * release the lock by calling mdi_rele_path()
2386 */
2387 MDI_PI_HOLD(pip);
2388 MDI_PI_UNLOCK(pip);
2389 if (sb)
2390 ct->ct_path_last = pip;
2391 *ret_pip = pip;
2392 MDI_CLIENT_UNLOCK(ct);
2393 return (MDI_SUCCESS);
2394 }
2395 /*
2396 * Path is busy.
2397 */
2398 if (MDI_PI_IS_DRV_DISABLE_TRANSIENT(pip) ||
2399 MDI_PI_IS_TRANSIENT(pip))
2400 retry = 1;
2401
2402 /*
2403 * Keep looking for a next available online path
2404 */
2405 do_again:
2406 next = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2407 if (next == NULL) {
2408 if ( flags & MDI_SELECT_NO_PREFERRED) {
2409 /*
2410 * Bail out since we hit the end of list
2411 */
2412 MDI_PI_UNLOCK(pip);
2413 break;
2414 }
2415
2416 if (!sb) {
2417 if (preferred == 1) {
2418 /*
2419 * Looks like we reached the
2420 * end of the list. Lets enable
2421 * traversal of non preferred
2422 * paths.
2423 */
2424 preferred = 0;
2425 next = head;
2426 } else {
2427 /*
2428 * We have done both the passes
2429 * Preferred as well as for
2430 * Non-preferred. Bail out now.
2431 */
2432 cont = 0;
2433 }
2434 } else {
2435 /*
2436 * Standard behavior case.
2437 */
2438 next = head;
2439 }
2440 }
2441 MDI_PI_UNLOCK(pip);
2442 if (cont == 0) {
2443 break;
2444 }
2445 pip = next;
2446
2447 if (!sb) {
2448 /*
2449 * We need to handle the selection of
2450 * non-preferred path in the following
2451 * case:
2452 *
2453 * +------+ +------+ +------+ +-----+
2454 * | A : 1| - | B : 1| - | C : 0| - |NULL |
2455 * +------+ +------+ +------+ +-----+
2456 *
2457 * If we start the search with B, we need to
2458 * skip beyond B to pick C which is non -
2459 * preferred in the second pass. The following
2460 * test, if true, will allow us to skip over
2461 * the 'start'(B in the example) to select
2462 * other non preferred elements.
2463 */
2464 if ((start_pip != NULL) && (start_pip == pip) &&
2465 (MDI_PI(start_pip)->pi_preferred
2466 != preferred)) {
2467 /*
2468 * try again after going past the start
2469 * pip
2470 */
2471 MDI_PI_LOCK(pip);
2472 goto do_again;
2473 }
2474 } else {
2475 /*
2476 * Standard behavior case
2477 */
2478 if (start == pip && preferred) {
2479 /* look for nonpreferred paths */
2480 preferred = 0;
2481 } else if (start == pip && !preferred) {
2482 /*
2483 * Exit condition
2484 */
2485 cont = 0;
2486 }
2487 }
2488 } while (cont);
2489 break;
2490 }
2491
2492 MDI_CLIENT_UNLOCK(ct);
2493 if (retry == 1) {
2494 return (MDI_BUSY);
2495 } else {
2496 return (MDI_NOPATH);
2497 }
2498 }
2499
2500 /*
2501 * For a client, return the next available path to any phci
2502 *
2503 * Note:
2504 * Caller should hold the branch's devinfo node to get a consistent
2505 * snap shot of the mdi_pathinfo nodes.
2506 *
2507 * Please note that even the list is stable the mdi_pathinfo
2508 * node state and properties are volatile. The caller should lock
2509 * and unlock the nodes by calling mdi_pi_lock() and
2510 * mdi_pi_unlock() functions to get a stable properties.
2511 *
2512 * If there is a need to use the nodes beyond the hold of the
2513 * devinfo node period (For ex. I/O), then mdi_pathinfo node
2514 * need to be held against unexpected removal by calling
2515 * mdi_hold_path() and should be released by calling
2516 * mdi_rele_path() on completion.
2517 */
2518 mdi_pathinfo_t *
mdi_get_next_phci_path(dev_info_t * ct_dip,mdi_pathinfo_t * pip)2519 mdi_get_next_phci_path(dev_info_t *ct_dip, mdi_pathinfo_t *pip)
2520 {
2521 mdi_client_t *ct;
2522
2523 if (!MDI_CLIENT(ct_dip))
2524 return (NULL);
2525
2526 /*
2527 * Walk through client link
2528 */
2529 ct = (mdi_client_t *)DEVI(ct_dip)->devi_mdi_client;
2530 ASSERT(ct != NULL);
2531
2532 if (pip == NULL)
2533 return ((mdi_pathinfo_t *)ct->ct_path_head);
2534
2535 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link);
2536 }
2537
2538 /*
2539 * For a phci, return the next available path to any client
2540 * Note: ditto mdi_get_next_phci_path()
2541 */
2542 mdi_pathinfo_t *
mdi_get_next_client_path(dev_info_t * ph_dip,mdi_pathinfo_t * pip)2543 mdi_get_next_client_path(dev_info_t *ph_dip, mdi_pathinfo_t *pip)
2544 {
2545 mdi_phci_t *ph;
2546
2547 if (!MDI_PHCI(ph_dip))
2548 return (NULL);
2549
2550 /*
2551 * Walk through pHCI link
2552 */
2553 ph = (mdi_phci_t *)DEVI(ph_dip)->devi_mdi_xhci;
2554 ASSERT(ph != NULL);
2555
2556 if (pip == NULL)
2557 return ((mdi_pathinfo_t *)ph->ph_path_head);
2558
2559 return ((mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link);
2560 }
2561
2562 /*
2563 * mdi_hold_path():
2564 * Hold the mdi_pathinfo node against unwanted unexpected free.
2565 * Return Values:
2566 * None
2567 */
2568 void
mdi_hold_path(mdi_pathinfo_t * pip)2569 mdi_hold_path(mdi_pathinfo_t *pip)
2570 {
2571 if (pip) {
2572 MDI_PI_LOCK(pip);
2573 MDI_PI_HOLD(pip);
2574 MDI_PI_UNLOCK(pip);
2575 }
2576 }
2577
2578
2579 /*
2580 * mdi_rele_path():
2581 * Release the mdi_pathinfo node which was selected
2582 * through mdi_select_path() mechanism or manually held by
2583 * calling mdi_hold_path().
2584 * Return Values:
2585 * None
2586 */
2587 void
mdi_rele_path(mdi_pathinfo_t * pip)2588 mdi_rele_path(mdi_pathinfo_t *pip)
2589 {
2590 if (pip) {
2591 MDI_PI_LOCK(pip);
2592 MDI_PI_RELE(pip);
2593 if (MDI_PI(pip)->pi_ref_cnt == 0) {
2594 cv_broadcast(&MDI_PI(pip)->pi_ref_cv);
2595 }
2596 MDI_PI_UNLOCK(pip);
2597 }
2598 }
2599
2600 /*
2601 * mdi_pi_lock():
2602 * Lock the mdi_pathinfo node.
2603 * Note:
2604 * The caller should release the lock by calling mdi_pi_unlock()
2605 */
2606 void
mdi_pi_lock(mdi_pathinfo_t * pip)2607 mdi_pi_lock(mdi_pathinfo_t *pip)
2608 {
2609 ASSERT(pip != NULL);
2610 if (pip) {
2611 MDI_PI_LOCK(pip);
2612 }
2613 }
2614
2615
2616 /*
2617 * mdi_pi_unlock():
2618 * Unlock the mdi_pathinfo node.
2619 * Note:
2620 * The mdi_pathinfo node should have been locked with mdi_pi_lock()
2621 */
2622 void
mdi_pi_unlock(mdi_pathinfo_t * pip)2623 mdi_pi_unlock(mdi_pathinfo_t *pip)
2624 {
2625 ASSERT(pip != NULL);
2626 if (pip) {
2627 MDI_PI_UNLOCK(pip);
2628 }
2629 }
2630
2631 /*
2632 * mdi_pi_find():
2633 * Search the list of mdi_pathinfo nodes attached to the
2634 * pHCI/Client device node whose path address matches "paddr".
2635 * Returns a pointer to the mdi_pathinfo node if a matching node is
2636 * found.
2637 * Return Values:
2638 * mdi_pathinfo node handle
2639 * NULL
2640 * Notes:
2641 * Caller need not hold any locks to call this function.
2642 */
2643 mdi_pathinfo_t *
mdi_pi_find(dev_info_t * pdip,char * caddr,char * paddr)2644 mdi_pi_find(dev_info_t *pdip, char *caddr, char *paddr)
2645 {
2646 mdi_phci_t *ph;
2647 mdi_vhci_t *vh;
2648 mdi_client_t *ct;
2649 mdi_pathinfo_t *pip = NULL;
2650
2651 MDI_DEBUG(2, (MDI_NOTE, pdip,
2652 "caddr@%s paddr@%s", caddr ? caddr : "", paddr ? paddr : ""));
2653 if ((pdip == NULL) || (paddr == NULL)) {
2654 return (NULL);
2655 }
2656 ph = i_devi_get_phci(pdip);
2657 if (ph == NULL) {
2658 /*
2659 * Invalid pHCI device, Nothing more to do.
2660 */
2661 MDI_DEBUG(2, (MDI_WARN, pdip, "invalid phci"));
2662 return (NULL);
2663 }
2664
2665 vh = ph->ph_vhci;
2666 if (vh == NULL) {
2667 /*
2668 * Invalid vHCI device, Nothing more to do.
2669 */
2670 MDI_DEBUG(2, (MDI_WARN, pdip, "invalid vhci"));
2671 return (NULL);
2672 }
2673
2674 /*
2675 * Look for pathinfo node identified by paddr.
2676 */
2677 if (caddr == NULL) {
2678 /*
2679 * Find a mdi_pathinfo node under pHCI list for a matching
2680 * unit address.
2681 */
2682 MDI_PHCI_LOCK(ph);
2683 if (MDI_PHCI_IS_OFFLINE(ph)) {
2684 MDI_DEBUG(2, (MDI_WARN, pdip,
2685 "offline phci %p", (void *)ph));
2686 MDI_PHCI_UNLOCK(ph);
2687 return (NULL);
2688 }
2689 pip = (mdi_pathinfo_t *)ph->ph_path_head;
2690
2691 while (pip != NULL) {
2692 if (strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2693 break;
2694 }
2695 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_phci_link;
2696 }
2697 MDI_PHCI_UNLOCK(ph);
2698 MDI_DEBUG(2, (MDI_NOTE, pdip,
2699 "found %s %p", mdi_pi_spathname(pip), (void *)pip));
2700 return (pip);
2701 }
2702
2703 /*
2704 * XXX - Is the rest of the code in this function really necessary?
2705 * The consumers of mdi_pi_find() can search for the desired pathinfo
2706 * node by calling mdi_pi_find(pdip, NULL, paddr). Irrespective of
2707 * whether the search is based on the pathinfo nodes attached to
2708 * the pHCI or the client node, the result will be the same.
2709 */
2710
2711 /*
2712 * Find the client device corresponding to 'caddr'
2713 */
2714 MDI_VHCI_CLIENT_LOCK(vh);
2715
2716 /*
2717 * XXX - Passing NULL to the following function works as long as the
2718 * the client addresses (caddr) are unique per vhci basis.
2719 */
2720 ct = i_mdi_client_find(vh, NULL, caddr);
2721 if (ct == NULL) {
2722 /*
2723 * Client not found, Obviously mdi_pathinfo node has not been
2724 * created yet.
2725 */
2726 MDI_VHCI_CLIENT_UNLOCK(vh);
2727 MDI_DEBUG(2, (MDI_NOTE, pdip,
2728 "client not found for caddr @%s", caddr ? caddr : ""));
2729 return (NULL);
2730 }
2731
2732 /*
2733 * Hold the client lock and look for a mdi_pathinfo node with matching
2734 * pHCI and paddr
2735 */
2736 MDI_CLIENT_LOCK(ct);
2737
2738 /*
2739 * Release the global mutex as it is no more needed. Note: We always
2740 * respect the locking order while acquiring.
2741 */
2742 MDI_VHCI_CLIENT_UNLOCK(vh);
2743
2744 pip = (mdi_pathinfo_t *)ct->ct_path_head;
2745 while (pip != NULL) {
2746 /*
2747 * Compare the unit address
2748 */
2749 if ((MDI_PI(pip)->pi_phci == ph) &&
2750 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2751 break;
2752 }
2753 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2754 }
2755 MDI_CLIENT_UNLOCK(ct);
2756 MDI_DEBUG(2, (MDI_NOTE, pdip,
2757 "found: %s %p", mdi_pi_spathname(pip), (void *)pip));
2758 return (pip);
2759 }
2760
2761 /*
2762 * mdi_pi_alloc():
2763 * Allocate and initialize a new instance of a mdi_pathinfo node.
2764 * The mdi_pathinfo node returned by this function identifies a
2765 * unique device path is capable of having properties attached
2766 * and passed to mdi_pi_online() to fully attach and online the
2767 * path and client device node.
2768 * The mdi_pathinfo node returned by this function must be
2769 * destroyed using mdi_pi_free() if the path is no longer
2770 * operational or if the caller fails to attach a client device
2771 * node when calling mdi_pi_online(). The framework will not free
2772 * the resources allocated.
2773 * This function can be called from both interrupt and kernel
2774 * contexts. DDI_NOSLEEP flag should be used while calling
2775 * from interrupt contexts.
2776 * Return Values:
2777 * MDI_SUCCESS
2778 * MDI_FAILURE
2779 * MDI_NOMEM
2780 */
2781 /*ARGSUSED*/
2782 int
mdi_pi_alloc_compatible(dev_info_t * pdip,char * cname,char * caddr,char * paddr,char ** compatible,int ncompatible,int flags,mdi_pathinfo_t ** ret_pip)2783 mdi_pi_alloc_compatible(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2784 char **compatible, int ncompatible, int flags, mdi_pathinfo_t **ret_pip)
2785 {
2786 mdi_vhci_t *vh;
2787 mdi_phci_t *ph;
2788 mdi_client_t *ct;
2789 mdi_pathinfo_t *pip = NULL;
2790 dev_info_t *cdip;
2791 int rv = MDI_NOMEM;
2792 int path_allocated = 0;
2793
2794 MDI_DEBUG(2, (MDI_NOTE, pdip,
2795 "cname %s: caddr@%s paddr@%s",
2796 cname ? cname : "", caddr ? caddr : "", paddr ? paddr : ""));
2797
2798 if (pdip == NULL || cname == NULL || caddr == NULL || paddr == NULL ||
2799 ret_pip == NULL) {
2800 /* Nothing more to do */
2801 return (MDI_FAILURE);
2802 }
2803
2804 *ret_pip = NULL;
2805
2806 /* No allocations on detaching pHCI */
2807 if (DEVI_IS_DETACHING(pdip)) {
2808 /* Invalid pHCI device, return failure */
2809 MDI_DEBUG(1, (MDI_WARN, pdip,
2810 "!detaching pHCI=%p", (void *)pdip));
2811 return (MDI_FAILURE);
2812 }
2813
2814 ph = i_devi_get_phci(pdip);
2815 ASSERT(ph != NULL);
2816 if (ph == NULL) {
2817 /* Invalid pHCI device, return failure */
2818 MDI_DEBUG(1, (MDI_WARN, pdip,
2819 "!invalid pHCI=%p", (void *)pdip));
2820 return (MDI_FAILURE);
2821 }
2822
2823 MDI_PHCI_LOCK(ph);
2824 vh = ph->ph_vhci;
2825 if (vh == NULL) {
2826 /* Invalid vHCI device, return failure */
2827 MDI_DEBUG(1, (MDI_WARN, pdip,
2828 "!invalid vHCI=%p", (void *)pdip));
2829 MDI_PHCI_UNLOCK(ph);
2830 return (MDI_FAILURE);
2831 }
2832
2833 if (MDI_PHCI_IS_READY(ph) == 0) {
2834 /*
2835 * Do not allow new node creation when pHCI is in
2836 * offline/suspended states
2837 */
2838 MDI_DEBUG(1, (MDI_WARN, pdip,
2839 "pHCI=%p is not ready", (void *)ph));
2840 MDI_PHCI_UNLOCK(ph);
2841 return (MDI_BUSY);
2842 }
2843 MDI_PHCI_UNSTABLE(ph);
2844 MDI_PHCI_UNLOCK(ph);
2845
2846 /* look for a matching client, create one if not found */
2847 MDI_VHCI_CLIENT_LOCK(vh);
2848 ct = i_mdi_client_find(vh, cname, caddr);
2849 if (ct == NULL) {
2850 ct = i_mdi_client_alloc(vh, cname, caddr);
2851 ASSERT(ct != NULL);
2852 }
2853
2854 if (ct->ct_dip == NULL) {
2855 /*
2856 * Allocate a devinfo node
2857 */
2858 ct->ct_dip = i_mdi_devinfo_create(vh, cname, caddr,
2859 compatible, ncompatible);
2860 if (ct->ct_dip == NULL) {
2861 (void) i_mdi_client_free(vh, ct);
2862 goto fail;
2863 }
2864 }
2865 cdip = ct->ct_dip;
2866
2867 DEVI(cdip)->devi_mdi_component |= MDI_COMPONENT_CLIENT;
2868 DEVI(cdip)->devi_mdi_client = (caddr_t)ct;
2869
2870 MDI_CLIENT_LOCK(ct);
2871 pip = (mdi_pathinfo_t *)ct->ct_path_head;
2872 while (pip != NULL) {
2873 /*
2874 * Compare the unit address
2875 */
2876 if ((MDI_PI(pip)->pi_phci == ph) &&
2877 strcmp(MDI_PI(pip)->pi_addr, paddr) == 0) {
2878 break;
2879 }
2880 pip = (mdi_pathinfo_t *)MDI_PI(pip)->pi_client_link;
2881 }
2882 MDI_CLIENT_UNLOCK(ct);
2883
2884 if (pip == NULL) {
2885 /*
2886 * This is a new path for this client device. Allocate and
2887 * initialize a new pathinfo node
2888 */
2889 pip = i_mdi_pi_alloc(ph, paddr, ct);
2890 ASSERT(pip != NULL);
2891 path_allocated = 1;
2892 }
2893 rv = MDI_SUCCESS;
2894
2895 fail:
2896 /*
2897 * Release the global mutex.
2898 */
2899 MDI_VHCI_CLIENT_UNLOCK(vh);
2900
2901 /*
2902 * Mark the pHCI as stable
2903 */
2904 MDI_PHCI_LOCK(ph);
2905 MDI_PHCI_STABLE(ph);
2906 MDI_PHCI_UNLOCK(ph);
2907 *ret_pip = pip;
2908
2909 MDI_DEBUG(2, (MDI_NOTE, pdip,
2910 "alloc %s %p", mdi_pi_spathname(pip), (void *)pip));
2911
2912 if (path_allocated)
2913 vhcache_pi_add(vh->vh_config, MDI_PI(pip));
2914
2915 return (rv);
2916 }
2917
2918 /*ARGSUSED*/
2919 int
mdi_pi_alloc(dev_info_t * pdip,char * cname,char * caddr,char * paddr,int flags,mdi_pathinfo_t ** ret_pip)2920 mdi_pi_alloc(dev_info_t *pdip, char *cname, char *caddr, char *paddr,
2921 int flags, mdi_pathinfo_t **ret_pip)
2922 {
2923 return (mdi_pi_alloc_compatible(pdip, cname, caddr, paddr, NULL, 0,
2924 flags, ret_pip));
2925 }
2926
2927 /*
2928 * i_mdi_pi_alloc():
2929 * Allocate a mdi_pathinfo node and add to the pHCI path list
2930 * Return Values:
2931 * mdi_pathinfo
2932 */
2933 /*ARGSUSED*/
2934 static mdi_pathinfo_t *
i_mdi_pi_alloc(mdi_phci_t * ph,char * paddr,mdi_client_t * ct)2935 i_mdi_pi_alloc(mdi_phci_t *ph, char *paddr, mdi_client_t *ct)
2936 {
2937 mdi_pathinfo_t *pip;
2938 int ct_circular;
2939 int ph_circular;
2940 static char path[MAXPATHLEN]; /* mdi_pathmap_mutex protects */
2941 char *path_persistent;
2942 int path_instance;
2943 mod_hash_val_t hv;
2944
2945 ASSERT(MDI_VHCI_CLIENT_LOCKED(ph->ph_vhci));
2946
2947 pip = kmem_zalloc(sizeof (struct mdi_pathinfo), KM_SLEEP);
2948 mutex_init(&MDI_PI(pip)->pi_mutex, NULL, MUTEX_DEFAULT, NULL);
2949 MDI_PI(pip)->pi_state = MDI_PATHINFO_STATE_INIT |
2950 MDI_PATHINFO_STATE_TRANSIENT;
2951
2952 if (MDI_PHCI_IS_USER_DISABLED(ph))
2953 MDI_PI_SET_USER_DISABLE(pip);
2954
2955 if (MDI_PHCI_IS_DRV_DISABLED_TRANSIENT(ph))
2956 MDI_PI_SET_DRV_DISABLE_TRANS(pip);
2957
2958 if (MDI_PHCI_IS_DRV_DISABLED(ph))
2959 MDI_PI_SET_DRV_DISABLE(pip);
2960
2961 MDI_PI(pip)->pi_old_state = MDI_PATHINFO_STATE_INIT;
2962 cv_init(&MDI_PI(pip)->pi_state_cv, NULL, CV_DEFAULT, NULL);
2963 MDI_PI(pip)->pi_client = ct;
2964 MDI_PI(pip)->pi_phci = ph;
2965 MDI_PI(pip)->pi_addr = kmem_alloc(strlen(paddr) + 1, KM_SLEEP);
2966 (void) strcpy(MDI_PI(pip)->pi_addr, paddr);
2967
2968 /*
2969 * We form the "path" to the pathinfo node, and see if we have
2970 * already allocated a 'path_instance' for that "path". If so,
2971 * we use the already allocated 'path_instance'. If not, we
2972 * allocate a new 'path_instance' and associate it with a copy of
2973 * the "path" string (which is never freed). The association
2974 * between a 'path_instance' this "path" string persists until
2975 * reboot.
2976 */
2977 mutex_enter(&mdi_pathmap_mutex);
2978 (void) ddi_pathname(ph->ph_dip, path);
2979 (void) sprintf(path + strlen(path), "/%s@%s",
2980 mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip));
2981 if (mod_hash_find(mdi_pathmap_bypath, (mod_hash_key_t)path, &hv) == 0) {
2982 path_instance = (uint_t)(intptr_t)hv;
2983 } else {
2984 /* allocate a new 'path_instance' and persistent "path" */
2985 path_instance = mdi_pathmap_instance++;
2986 path_persistent = i_ddi_strdup(path, KM_SLEEP);
2987 (void) mod_hash_insert(mdi_pathmap_bypath,
2988 (mod_hash_key_t)path_persistent,
2989 (mod_hash_val_t)(intptr_t)path_instance);
2990 (void) mod_hash_insert(mdi_pathmap_byinstance,
2991 (mod_hash_key_t)(intptr_t)path_instance,
2992 (mod_hash_val_t)path_persistent);
2993
2994 /* create shortpath name */
2995 (void) snprintf(path, sizeof(path), "%s%d/%s@%s",
2996 ddi_driver_name(ph->ph_dip), ddi_get_instance(ph->ph_dip),
2997 mdi_pi_get_node_name(pip), mdi_pi_get_addr(pip));
2998 path_persistent = i_ddi_strdup(path, KM_SLEEP);
2999 (void) mod_hash_insert(mdi_pathmap_sbyinstance,
3000 (mod_hash_key_t)(intptr_t)path_instance,
3001 (mod_hash_val_t)path_persistent);
3002 }
3003 mutex_exit(&mdi_pathmap_mutex);
3004 MDI_PI(pip)->pi_path_instance = path_instance;
3005
3006 (void) nvlist_alloc(&MDI_PI(pip)->pi_prop, NV_UNIQUE_NAME, KM_SLEEP);
3007 ASSERT(MDI_PI(pip)->pi_prop != NULL);
3008 MDI_PI(pip)->pi_pprivate = NULL;
3009 MDI_PI(pip)->pi_cprivate = NULL;
3010 MDI_PI(pip)->pi_vprivate = NULL;
3011 MDI_PI(pip)->pi_client_link = NULL;
3012 MDI_PI(pip)->pi_phci_link = NULL;
3013 MDI_PI(pip)->pi_ref_cnt = 0;
3014 MDI_PI(pip)->pi_kstats = NULL;
3015 MDI_PI(pip)->pi_preferred = 1;
3016 cv_init(&MDI_PI(pip)->pi_ref_cv, NULL, CV_DEFAULT, NULL);
3017
3018 /*
3019 * Lock both dev_info nodes against changes in parallel.
3020 *
3021 * The ndi_devi_enter(Client), is atypical since the client is a leaf.
3022 * This atypical operation is done to synchronize pathinfo nodes
3023 * during devinfo snapshot (see di_register_pip) by 'pretending' that
3024 * the pathinfo nodes are children of the Client.
3025 */
3026 ndi_devi_enter(ct->ct_dip, &ct_circular);
3027 ndi_devi_enter(ph->ph_dip, &ph_circular);
3028
3029 i_mdi_phci_add_path(ph, pip);
3030 i_mdi_client_add_path(ct, pip);
3031
3032 ndi_devi_exit(ph->ph_dip, ph_circular);
3033 ndi_devi_exit(ct->ct_dip, ct_circular);
3034
3035 return (pip);
3036 }
3037
3038 /*
3039 * mdi_pi_pathname_by_instance():
3040 * Lookup of "path" by 'path_instance'. Return "path".
3041 * NOTE: returned "path" remains valid forever (until reboot).
3042 */
3043 char *
mdi_pi_pathname_by_instance(int path_instance)3044 mdi_pi_pathname_by_instance(int path_instance)
3045 {
3046 char *path;
3047 mod_hash_val_t hv;
3048
3049 /* mdi_pathmap lookup of "path" by 'path_instance' */
3050 mutex_enter(&mdi_pathmap_mutex);
3051 if (mod_hash_find(mdi_pathmap_byinstance,
3052 (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0)
3053 path = (char *)hv;
3054 else
3055 path = NULL;
3056 mutex_exit(&mdi_pathmap_mutex);
3057 return (path);
3058 }
3059
3060 /*
3061 * mdi_pi_spathname_by_instance():
3062 * Lookup of "shortpath" by 'path_instance'. Return "shortpath".
3063 * NOTE: returned "shortpath" remains valid forever (until reboot).
3064 */
3065 char *
mdi_pi_spathname_by_instance(int path_instance)3066 mdi_pi_spathname_by_instance(int path_instance)
3067 {
3068 char *path;
3069 mod_hash_val_t hv;
3070
3071 /* mdi_pathmap lookup of "path" by 'path_instance' */
3072 mutex_enter(&mdi_pathmap_mutex);
3073 if (mod_hash_find(mdi_pathmap_sbyinstance,
3074 (mod_hash_key_t)(intptr_t)path_instance, &hv) == 0)
3075 path = (char *)hv;
3076 else
3077 path = NULL;
3078 mutex_exit(&mdi_pathmap_mutex);
3079 return (path);
3080 }
3081
3082
3083 /*
3084 * i_mdi_phci_add_path():
3085 * Add a mdi_pathinfo node to pHCI list.
3086 * Notes:
3087 * Caller should per-pHCI mutex
3088 */
3089 static void
i_mdi_phci_add_path(mdi_phci_t * ph,mdi_pathinfo_t * pip)3090 i_mdi_phci_add_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
3091 {
3092 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
3093
3094 MDI_PHCI_LOCK(ph);
3095 if (ph->ph_path_head == NULL) {
3096 ph->ph_path_head = pip;
3097 } else {
3098 MDI_PI(ph->ph_path_tail)->pi_phci_link = MDI_PI(pip);
3099 }
3100 ph->ph_path_tail = pip;
3101 ph->ph_path_count++;
3102 MDI_PHCI_UNLOCK(ph);
3103 }
3104
3105 /*
3106 * i_mdi_client_add_path():
3107 * Add mdi_pathinfo node to client list
3108 */
3109 static void
i_mdi_client_add_path(mdi_client_t * ct,mdi_pathinfo_t * pip)3110 i_mdi_client_add_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
3111 {
3112 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
3113
3114 MDI_CLIENT_LOCK(ct);
3115 if (ct->ct_path_head == NULL) {
3116 ct->ct_path_head = pip;
3117 } else {
3118 MDI_PI(ct->ct_path_tail)->pi_client_link = MDI_PI(pip);
3119 }
3120 ct->ct_path_tail = pip;
3121 ct->ct_path_count++;
3122 MDI_CLIENT_UNLOCK(ct);
3123 }
3124
3125 /*
3126 * mdi_pi_free():
3127 * Free the mdi_pathinfo node and also client device node if this
3128 * is the last path to the device
3129 * Return Values:
3130 * MDI_SUCCESS
3131 * MDI_FAILURE
3132 * MDI_BUSY
3133 */
3134 /*ARGSUSED*/
3135 int
mdi_pi_free(mdi_pathinfo_t * pip,int flags)3136 mdi_pi_free(mdi_pathinfo_t *pip, int flags)
3137 {
3138 int rv;
3139 mdi_vhci_t *vh;
3140 mdi_phci_t *ph;
3141 mdi_client_t *ct;
3142 int (*f)();
3143 int client_held = 0;
3144
3145 MDI_PI_LOCK(pip);
3146 ph = MDI_PI(pip)->pi_phci;
3147 ASSERT(ph != NULL);
3148 if (ph == NULL) {
3149 /*
3150 * Invalid pHCI device, return failure
3151 */
3152 MDI_DEBUG(1, (MDI_WARN, NULL,
3153 "!invalid pHCI: pip %s %p",
3154 mdi_pi_spathname(pip), (void *)pip));
3155 MDI_PI_UNLOCK(pip);
3156 return (MDI_FAILURE);
3157 }
3158
3159 vh = ph->ph_vhci;
3160 ASSERT(vh != NULL);
3161 if (vh == NULL) {
3162 /* Invalid pHCI device, return failure */
3163 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3164 "!invalid vHCI: pip %s %p",
3165 mdi_pi_spathname(pip), (void *)pip));
3166 MDI_PI_UNLOCK(pip);
3167 return (MDI_FAILURE);
3168 }
3169
3170 ct = MDI_PI(pip)->pi_client;
3171 ASSERT(ct != NULL);
3172 if (ct == NULL) {
3173 /*
3174 * Invalid Client device, return failure
3175 */
3176 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3177 "!invalid client: pip %s %p",
3178 mdi_pi_spathname(pip), (void *)pip));
3179 MDI_PI_UNLOCK(pip);
3180 return (MDI_FAILURE);
3181 }
3182
3183 /*
3184 * Check to see for busy condition. A mdi_pathinfo can only be freed
3185 * if the node state is either offline or init and the reference count
3186 * is zero.
3187 */
3188 if (!(MDI_PI_IS_OFFLINE(pip) || MDI_PI_IS_INIT(pip) ||
3189 MDI_PI_IS_INITING(pip))) {
3190 /*
3191 * Node is busy
3192 */
3193 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3194 "!busy: pip %s %p", mdi_pi_spathname(pip), (void *)pip));
3195 MDI_PI_UNLOCK(pip);
3196 return (MDI_BUSY);
3197 }
3198
3199 while (MDI_PI(pip)->pi_ref_cnt != 0) {
3200 /*
3201 * Give a chance for pending I/Os to complete.
3202 */
3203 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3204 "!%d cmds still pending on path: %s %p",
3205 MDI_PI(pip)->pi_ref_cnt,
3206 mdi_pi_spathname(pip), (void *)pip));
3207 if (cv_reltimedwait(&MDI_PI(pip)->pi_ref_cv,
3208 &MDI_PI(pip)->pi_mutex, drv_usectohz(60 * 1000000),
3209 TR_CLOCK_TICK) == -1) {
3210 /*
3211 * The timeout time reached without ref_cnt being zero
3212 * being signaled.
3213 */
3214 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3215 "!Timeout reached on path %s %p without the cond",
3216 mdi_pi_spathname(pip), (void *)pip));
3217 MDI_DEBUG(1, (MDI_NOTE, ct->ct_dip,
3218 "!%d cmds still pending on path %s %p",
3219 MDI_PI(pip)->pi_ref_cnt,
3220 mdi_pi_spathname(pip), (void *)pip));
3221 MDI_PI_UNLOCK(pip);
3222 return (MDI_BUSY);
3223 }
3224 }
3225 if (MDI_PI(pip)->pi_pm_held) {
3226 client_held = 1;
3227 }
3228 MDI_PI_UNLOCK(pip);
3229
3230 vhcache_pi_remove(vh->vh_config, MDI_PI(pip));
3231
3232 MDI_CLIENT_LOCK(ct);
3233
3234 /* Prevent further failovers till MDI_VHCI_CLIENT_LOCK is held */
3235 MDI_CLIENT_SET_PATH_FREE_IN_PROGRESS(ct);
3236
3237 /*
3238 * Wait till failover is complete before removing this node.
3239 */
3240 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
3241 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
3242
3243 MDI_CLIENT_UNLOCK(ct);
3244 MDI_VHCI_CLIENT_LOCK(vh);
3245 MDI_CLIENT_LOCK(ct);
3246 MDI_CLIENT_CLEAR_PATH_FREE_IN_PROGRESS(ct);
3247
3248 rv = MDI_SUCCESS;
3249 if (!MDI_PI_IS_INITING(pip)) {
3250 f = vh->vh_ops->vo_pi_uninit;
3251 if (f != NULL) {
3252 rv = (*f)(vh->vh_dip, pip, 0);
3253 }
3254 }
3255
3256 /*
3257 * If vo_pi_uninit() completed successfully.
3258 */
3259 if (rv == MDI_SUCCESS) {
3260 if (client_held) {
3261 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3262 "i_mdi_pm_rele_client\n"));
3263 i_mdi_pm_rele_client(ct, 1);
3264 }
3265 i_mdi_pi_free(ph, pip, ct);
3266 if (ct->ct_path_count == 0) {
3267 /*
3268 * Client lost its last path.
3269 * Clean up the client device
3270 */
3271 MDI_CLIENT_UNLOCK(ct);
3272 (void) i_mdi_client_free(ct->ct_vhci, ct);
3273 MDI_VHCI_CLIENT_UNLOCK(vh);
3274 return (rv);
3275 }
3276 }
3277 MDI_CLIENT_UNLOCK(ct);
3278 MDI_VHCI_CLIENT_UNLOCK(vh);
3279
3280 if (rv == MDI_FAILURE)
3281 vhcache_pi_add(vh->vh_config, MDI_PI(pip));
3282
3283 return (rv);
3284 }
3285
3286 /*
3287 * i_mdi_pi_free():
3288 * Free the mdi_pathinfo node
3289 */
3290 static void
i_mdi_pi_free(mdi_phci_t * ph,mdi_pathinfo_t * pip,mdi_client_t * ct)3291 i_mdi_pi_free(mdi_phci_t *ph, mdi_pathinfo_t *pip, mdi_client_t *ct)
3292 {
3293 int ct_circular;
3294 int ph_circular;
3295
3296 ASSERT(MDI_CLIENT_LOCKED(ct));
3297
3298 /*
3299 * remove any per-path kstats
3300 */
3301 i_mdi_pi_kstat_destroy(pip);
3302
3303 /* See comments in i_mdi_pi_alloc() */
3304 ndi_devi_enter(ct->ct_dip, &ct_circular);
3305 ndi_devi_enter(ph->ph_dip, &ph_circular);
3306
3307 i_mdi_client_remove_path(ct, pip);
3308 i_mdi_phci_remove_path(ph, pip);
3309
3310 ndi_devi_exit(ph->ph_dip, ph_circular);
3311 ndi_devi_exit(ct->ct_dip, ct_circular);
3312
3313 mutex_destroy(&MDI_PI(pip)->pi_mutex);
3314 cv_destroy(&MDI_PI(pip)->pi_state_cv);
3315 cv_destroy(&MDI_PI(pip)->pi_ref_cv);
3316 if (MDI_PI(pip)->pi_addr) {
3317 kmem_free(MDI_PI(pip)->pi_addr,
3318 strlen(MDI_PI(pip)->pi_addr) + 1);
3319 MDI_PI(pip)->pi_addr = NULL;
3320 }
3321
3322 if (MDI_PI(pip)->pi_prop) {
3323 (void) nvlist_free(MDI_PI(pip)->pi_prop);
3324 MDI_PI(pip)->pi_prop = NULL;
3325 }
3326 kmem_free(pip, sizeof (struct mdi_pathinfo));
3327 }
3328
3329
3330 /*
3331 * i_mdi_phci_remove_path():
3332 * Remove a mdi_pathinfo node from pHCI list.
3333 * Notes:
3334 * Caller should hold per-pHCI mutex
3335 */
3336 static void
i_mdi_phci_remove_path(mdi_phci_t * ph,mdi_pathinfo_t * pip)3337 i_mdi_phci_remove_path(mdi_phci_t *ph, mdi_pathinfo_t *pip)
3338 {
3339 mdi_pathinfo_t *prev = NULL;
3340 mdi_pathinfo_t *path = NULL;
3341
3342 ASSERT(DEVI_BUSY_OWNED(ph->ph_dip));
3343
3344 MDI_PHCI_LOCK(ph);
3345 path = ph->ph_path_head;
3346 while (path != NULL) {
3347 if (path == pip) {
3348 break;
3349 }
3350 prev = path;
3351 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
3352 }
3353
3354 if (path) {
3355 ph->ph_path_count--;
3356 if (prev) {
3357 MDI_PI(prev)->pi_phci_link = MDI_PI(path)->pi_phci_link;
3358 } else {
3359 ph->ph_path_head =
3360 (mdi_pathinfo_t *)MDI_PI(path)->pi_phci_link;
3361 }
3362 if (ph->ph_path_tail == path) {
3363 ph->ph_path_tail = prev;
3364 }
3365 }
3366
3367 /*
3368 * Clear the pHCI link
3369 */
3370 MDI_PI(pip)->pi_phci_link = NULL;
3371 MDI_PI(pip)->pi_phci = NULL;
3372 MDI_PHCI_UNLOCK(ph);
3373 }
3374
3375 /*
3376 * i_mdi_client_remove_path():
3377 * Remove a mdi_pathinfo node from client path list.
3378 */
3379 static void
i_mdi_client_remove_path(mdi_client_t * ct,mdi_pathinfo_t * pip)3380 i_mdi_client_remove_path(mdi_client_t *ct, mdi_pathinfo_t *pip)
3381 {
3382 mdi_pathinfo_t *prev = NULL;
3383 mdi_pathinfo_t *path;
3384
3385 ASSERT(DEVI_BUSY_OWNED(ct->ct_dip));
3386
3387 ASSERT(MDI_CLIENT_LOCKED(ct));
3388 path = ct->ct_path_head;
3389 while (path != NULL) {
3390 if (path == pip) {
3391 break;
3392 }
3393 prev = path;
3394 path = (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
3395 }
3396
3397 if (path) {
3398 ct->ct_path_count--;
3399 if (prev) {
3400 MDI_PI(prev)->pi_client_link =
3401 MDI_PI(path)->pi_client_link;
3402 } else {
3403 ct->ct_path_head =
3404 (mdi_pathinfo_t *)MDI_PI(path)->pi_client_link;
3405 }
3406 if (ct->ct_path_tail == path) {
3407 ct->ct_path_tail = prev;
3408 }
3409 if (ct->ct_path_last == path) {
3410 ct->ct_path_last = ct->ct_path_head;
3411 }
3412 }
3413 MDI_PI(pip)->pi_client_link = NULL;
3414 MDI_PI(pip)->pi_client = NULL;
3415 }
3416
3417 /*
3418 * i_mdi_pi_state_change():
3419 * online a mdi_pathinfo node
3420 *
3421 * Return Values:
3422 * MDI_SUCCESS
3423 * MDI_FAILURE
3424 */
3425 /*ARGSUSED*/
3426 static int
i_mdi_pi_state_change(mdi_pathinfo_t * pip,mdi_pathinfo_state_t state,int flag)3427 i_mdi_pi_state_change(mdi_pathinfo_t *pip, mdi_pathinfo_state_t state, int flag)
3428 {
3429 int rv = MDI_SUCCESS;
3430 mdi_vhci_t *vh;
3431 mdi_phci_t *ph;
3432 mdi_client_t *ct;
3433 int (*f)();
3434 dev_info_t *cdip;
3435
3436 MDI_PI_LOCK(pip);
3437
3438 ph = MDI_PI(pip)->pi_phci;
3439 ASSERT(ph);
3440 if (ph == NULL) {
3441 /*
3442 * Invalid pHCI device, fail the request
3443 */
3444 MDI_PI_UNLOCK(pip);
3445 MDI_DEBUG(1, (MDI_WARN, NULL,
3446 "!invalid phci: pip %s %p",
3447 mdi_pi_spathname(pip), (void *)pip));
3448 return (MDI_FAILURE);
3449 }
3450
3451 vh = ph->ph_vhci;
3452 ASSERT(vh);
3453 if (vh == NULL) {
3454 /*
3455 * Invalid vHCI device, fail the request
3456 */
3457 MDI_PI_UNLOCK(pip);
3458 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3459 "!invalid vhci: pip %s %p",
3460 mdi_pi_spathname(pip), (void *)pip));
3461 return (MDI_FAILURE);
3462 }
3463
3464 ct = MDI_PI(pip)->pi_client;
3465 ASSERT(ct != NULL);
3466 if (ct == NULL) {
3467 /*
3468 * Invalid client device, fail the request
3469 */
3470 MDI_PI_UNLOCK(pip);
3471 MDI_DEBUG(1, (MDI_WARN, ph->ph_dip,
3472 "!invalid client: pip %s %p",
3473 mdi_pi_spathname(pip), (void *)pip));
3474 return (MDI_FAILURE);
3475 }
3476
3477 /*
3478 * If this path has not been initialized yet, Callback vHCI driver's
3479 * pathinfo node initialize entry point
3480 */
3481
3482 if (MDI_PI_IS_INITING(pip)) {
3483 MDI_PI_UNLOCK(pip);
3484 f = vh->vh_ops->vo_pi_init;
3485 if (f != NULL) {
3486 rv = (*f)(vh->vh_dip, pip, 0);
3487 if (rv != MDI_SUCCESS) {
3488 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3489 "!vo_pi_init failed: vHCI %p, pip %s %p",
3490 (void *)vh, mdi_pi_spathname(pip),
3491 (void *)pip));
3492 return (MDI_FAILURE);
3493 }
3494 }
3495 MDI_PI_LOCK(pip);
3496 MDI_PI_CLEAR_TRANSIENT(pip);
3497 }
3498
3499 /*
3500 * Do not allow state transition when pHCI is in offline/suspended
3501 * states
3502 */
3503 i_mdi_phci_lock(ph, pip);
3504 if (MDI_PHCI_IS_READY(ph) == 0) {
3505 MDI_DEBUG(1, (MDI_WARN, ct->ct_dip,
3506 "!pHCI not ready, pHCI=%p", (void *)ph));
3507 MDI_PI_UNLOCK(pip);
3508 i_mdi_phci_unlock(ph);
3509 return (MDI_BUSY);
3510 }
3511 MDI_PHCI_UNSTABLE(ph);
3512 i_mdi_phci_unlock(ph);
3513
3514 /*
3515 * Check if mdi_pathinfo state is in transient state.
3516 * If yes, offlining is in progress and wait till transient state is
3517 * cleared.
3518 */
3519 if (MDI_PI_IS_TRANSIENT(pip)) {
3520 while (MDI_PI_IS_TRANSIENT(pip)) {
3521 cv_wait(&MDI_PI(pip)->pi_state_cv,
3522 &MDI_PI(pip)->pi_mutex);
3523 }
3524 }
3525
3526 /*
3527 * Grab the client lock in reverse order sequence and release the
3528 * mdi_pathinfo mutex.
3529 */
3530 i_mdi_client_lock(ct, pip);
3531 MDI_PI_UNLOCK(pip);
3532
3533 /*
3534 * Wait till failover state is cleared
3535 */
3536 while (MDI_CLIENT_IS_FAILOVER_IN_PROGRESS(ct))
3537 cv_wait(&ct->ct_failover_cv, &ct->ct_mutex);
3538
3539 /*
3540 * Mark the mdi_pathinfo node state as transient
3541 */
3542 MDI_PI_LOCK(pip);
3543 switch (state) {
3544 case MDI_PATHINFO_STATE_ONLINE:
3545 MDI_PI_SET_ONLINING(pip);
3546 break;
3547
3548 case MDI_PATHINFO_STATE_STANDBY:
3549 MDI_PI_SET_STANDBYING(pip);
3550 break;
3551
3552 case MDI_PATHINFO_STATE_FAULT:
3553 /*
3554 * Mark the pathinfo state as FAULTED
3555 */
3556 MDI_PI_SET_FAULTING(pip);
3557 MDI_PI_ERRSTAT(pip, MDI_PI_HARDERR);
3558 break;
3559
3560 case MDI_PATHINFO_STATE_OFFLINE:
3561 /*
3562 * ndi_devi_offline() cannot hold pip or ct locks.
3563 */
3564 MDI_PI_UNLOCK(pip);
3565
3566 /*
3567 * If this is a user initiated path online->offline operation
3568 * who's success would transition a client from DEGRADED to
3569 * FAILED then only proceed if we can offline the client first.
3570 */
3571 cdip = ct->ct_dip;
3572 if ((flag & NDI_USER_REQ) &&
3573 MDI_PI_IS_ONLINE(pip) &&
3574 (MDI_CLIENT_STATE(ct) == MDI_CLIENT_STATE_DEGRADED)) {
3575 i_mdi_client_unlock(ct);
3576 rv = ndi_devi_offline(cdip, NDI_DEVFS_CLEAN);
3577 if (rv != NDI_SUCCESS) {
3578 /*
3579 * Convert to MDI error code
3580 */
3581 switch (rv) {
3582 case NDI_BUSY:
3583 rv = MDI_BUSY;
3584 break;
3585 default:
3586 rv = MDI_FAILURE;
3587 break;
3588 }
3589 goto state_change_exit;
3590 } else {
3591 i_mdi_client_lock(ct, NULL);
3592 }
3593 }
3594 /*
3595 * Mark the mdi_pathinfo node state as transient
3596 */
3597 MDI_PI_LOCK(pip);
3598 MDI_PI_SET_OFFLINING(pip);
3599 break;
3600 }
3601 MDI_PI_UNLOCK(pip);
3602 MDI_CLIENT_UNSTABLE(ct);
3603 i_mdi_client_unlock(ct);
3604
3605 f = vh->vh_ops->vo_pi_state_change;
3606 if (f != NULL)
3607 rv = (*f)(vh->vh_dip, pip, state, 0, flag);
3608
3609 MDI_CLIENT_LOCK(ct);
3610 MDI_PI_LOCK(pip);
3611 if (rv == MDI_NOT_SUPPORTED) {
3612 MDI_CLIENT_SET_DEV_NOT_SUPPORTED(ct);
3613 }
3614 if (rv != MDI_SUCCESS) {
3615 MDI_DEBUG(2, (MDI_WARN, ct->ct_dip,
3616 "vo_pi_state_change failed: rv %x", rv));
3617 }
3618 if (MDI_PI_IS_TRANSIENT(pip)) {
3619 if (rv == MDI_SUCCESS) {
3620 MDI_PI_CLEAR_TRANSIENT(pip);
3621 } else {
3622 MDI_PI(pip)->pi_state = MDI_PI_OLD_STATE(pip);
3623 }
3624 }
3625
3626 /*
3627 * Wake anyone waiting for this mdi_pathinfo node
3628 */
3629 cv_broadcast(&MDI_PI(pip)->pi_state_cv);
3630 MDI_PI_UNLOCK(pip);
3631
3632 /*
3633 * Mark the client device as stable
3634 */
3635 MDI_CLIENT_STABLE(ct);
3636 if (rv == MDI_SUCCESS) {
3637 if (ct->ct_unstable == 0) {
3638 cdip = ct->ct_dip;
3639
3640 /*
3641 * Onlining the mdi_pathinfo node will impact the
3642 * client state Update the client and dev_info node
3643 * state accordingly
3644 */
3645 rv = NDI_SUCCESS;
3646 i_mdi_client_update_state(ct);
3647 switch (MDI_CLIENT_STATE(ct)) {
3648 case MDI_CLIENT_STATE_OPTIMAL:
3649 case MDI_CLIENT_STATE_DEGRADED:
3650 if (cdip && !i_ddi_devi_attached(cdip) &&
3651 ((state == MDI_PATHINFO_STATE_ONLINE) ||
3652 (state == MDI_PATHINFO_STATE_STANDBY))) {
3653
3654 /*
3655 * Must do ndi_devi_online() through
3656 * hotplug thread for deferred
3657 * attach mechanism to work
3658 */
3659 MDI_CLIENT_UNLOCK(ct);
3660 rv = ndi_devi_online(cdip, 0);
3661 MDI_CLIENT_LOCK(ct);
3662 if ((rv != NDI_SUCCESS) &&
3663 (MDI_CLIENT_STATE(ct) ==
3664 MDI_CLIENT_STATE_DEGRADED)) {
3665 MDI_DEBUG(1, (MDI_WARN, cdip,
3666 "!ndi_devi_online failed "
3667 "error %x", rv));
3668 }
3669 rv = NDI_SUCCESS;
3670 }
3671 break;
3672
3673 case MDI_CLIENT_STATE_FAILED:
3674 /*
3675 * This is the last path case for
3676 * non-user initiated events.
3677 */
3678 if (((flag & NDI_USER_REQ) == 0) &&
3679 cdip && (i_ddi_node_state(cdip) >=
3680 DS_INITIALIZED)) {
3681 MDI_CLIENT_UNLOCK(ct);
3682 rv = ndi_devi_offline(cdip,
3683 NDI_DEVFS_CLEAN);
3684 MDI_CLIENT_LOCK(ct);
3685
3686 if (rv != NDI_SUCCESS) {
3687 /*
3688 * ndi_devi_offline failed.
3689 * Reset client flags to
3690 * online as the path could not
3691 * be offlined.
3692 */
3693 MDI_DEBUG(1, (MDI_WARN, cdip,
3694 "!ndi_devi_offline failed: "
3695 "error %x", rv));
3696 MDI_CLIENT_SET_ONLINE(ct);
3697 }
3698 }
3699 break;
3700 }
3701 /*
3702 * Convert to MDI error code
3703 */
3704 switch (rv) {
3705 case NDI_SUCCESS:
3706 MDI_CLIENT_SET_REPORT_DEV_NEEDED(ct);
3707 i_mdi_report_path_state(ct, pip);
3708 rv = MDI_SUCCESS;
3709 break;
3710 case NDI_BUSY:
3711 rv = MDI_BUSY;
3712 break;
3713 default:
3714 rv = MDI_FAILURE;
3715 break;
3716 }
3717 }
3718 }
3719 MDI_CLIENT_UNLOCK(ct);
3720
3721 state_change_exit:
3722 /*
3723 * Mark the pHCI as stable again.
3724 */
3725 MDI_PHCI_LOCK(ph);
3726 MDI_PHCI_STABLE(ph);
3727 MDI_PHCI_UNLOCK(ph);
3728 return (rv);
3729 }
3730
3731 /*
3732 * mdi_pi_online():
3733 * Place the path_info node in the online state. The path is
3734 * now available to be selected by mdi_select_path() for
3735 * transporting I/O requests to client devices.
3736 * Return Values:
3737 * MDI_SUCCESS
3738 * MDI_FAILURE
3739 */
3740 int
mdi_pi_online(mdi_pathinfo_t * pip,int flags)3741 mdi_pi_online(mdi_pathinfo_t *pip, int flags)
3742 {
3743 mdi_client_t *ct = MDI_PI(pip)->pi_client;
3744 int client_held = 0;
3745 int rv;
3746
3747 ASSERT(ct != NULL);
3748 rv = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_ONLINE, flags);
3749 if (rv != MDI_SUCCESS)
3750 return (rv);
3751
3752 MDI_PI_LOCK(pip);
3753 if (MDI_PI(pip)->pi_pm_held == 0) {
3754 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3755 "i_mdi_pm_hold_pip %p", (void *)pip));
3756 i_mdi_pm_hold_pip(pip);
3757 client_held = 1;
3758 }
3759 MDI_PI_UNLOCK(pip);
3760
3761 if (client_held) {
3762 MDI_CLIENT_LOCK(ct);
3763 if (ct->ct_power_cnt == 0) {
3764 rv = i_mdi_power_all_phci(ct);
3765 }
3766
3767 MDI_DEBUG(4, (MDI_NOTE, ct->ct_dip,
3768 "i_mdi_pm_hold_client %p", (void *)ct));
3769 i_mdi_pm_hold_client(ct, 1);
3770 MDI_CLIENT_UNLOCK(ct);
3771 }
3772
3773 return (rv);
3774 }
3775
3776 /*
3777 * mdi_pi_standby():
3778 * Place the mdi_pathinfo node in standby state
3779 *
3780 * Return Values:
3781 * MDI_SUCCESS
3782 * MDI_FAILURE
3783 */
3784 int
mdi_pi_standby(mdi_pathinfo_t * pip,int flags)3785 mdi_pi_standby(mdi_pathinfo_t *pip, int flags)
3786 {
3787 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_STANDBY, flags));
3788 }
3789
3790 /*
3791 * mdi_pi_fault():
3792 * Place the mdi_pathinfo node in fault'ed state
3793 * Return Values:
3794 * MDI_SUCCESS
3795 * MDI_FAILURE
3796 */
3797 int
mdi_pi_fault(mdi_pathinfo_t * pip,int flags)3798 mdi_pi_fault(mdi_pathinfo_t *pip, int flags)
3799 {
3800 return (i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_FAULT, flags));
3801 }
3802
3803 /*
3804 * mdi_pi_offline():
3805 * Offline a mdi_pathinfo node.
3806 * Return Values:
3807 * MDI_SUCCESS
3808 * MDI_FAILURE
3809 */
3810 int
mdi_pi_offline(mdi_pathinfo_t * pip,int flags)3811 mdi_pi_offline(mdi_pathinfo_t *pip, int flags)
3812 {
3813 int ret, client_held = 0;
3814 mdi_client_t *ct;
3815
3816 /*
3817 * Original code overloaded NDI_DEVI_REMOVE to this interface, and
3818 * used it to mean "user initiated operation" (i.e. devctl). Callers
3819 * should now just use NDI_USER_REQ.
3820 */
3821 if (flags & NDI_DEVI_REMOVE) {
3822 flags &= ~NDI_DEVI_REMOVE;
3823 flags |= NDI_USER_REQ;
3824 }
3825
3826 ret = i_mdi_pi_state_change(pip, MDI_PATHINFO_STATE_OFFLINE, flags);
3827
3828 if (ret == MDI_SUCCESS) {
3829 MDI_PI_LOCK(pip);
3830 if (MDI_PI(pip)->pi_pm_held) {
3831 client_held = 1;
3832 }
3833 MDI_PI_UNLOCK(pip);
3834
3835 if (client_held) {
3836 ct =